1*b411b363SPhilipp Reisner /* 2*b411b363SPhilipp Reisner drbd_bitmap.c 3*b411b363SPhilipp Reisner 4*b411b363SPhilipp Reisner This file is part of DRBD by Philipp Reisner and Lars Ellenberg. 5*b411b363SPhilipp Reisner 6*b411b363SPhilipp Reisner Copyright (C) 2004-2008, LINBIT Information Technologies GmbH. 7*b411b363SPhilipp Reisner Copyright (C) 2004-2008, Philipp Reisner <philipp.reisner@linbit.com>. 8*b411b363SPhilipp Reisner Copyright (C) 2004-2008, Lars Ellenberg <lars.ellenberg@linbit.com>. 9*b411b363SPhilipp Reisner 10*b411b363SPhilipp Reisner drbd is free software; you can redistribute it and/or modify 11*b411b363SPhilipp Reisner it under the terms of the GNU General Public License as published by 12*b411b363SPhilipp Reisner the Free Software Foundation; either version 2, or (at your option) 13*b411b363SPhilipp Reisner any later version. 14*b411b363SPhilipp Reisner 15*b411b363SPhilipp Reisner drbd is distributed in the hope that it will be useful, 16*b411b363SPhilipp Reisner but WITHOUT ANY WARRANTY; without even the implied warranty of 17*b411b363SPhilipp Reisner MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 18*b411b363SPhilipp Reisner GNU General Public License for more details. 19*b411b363SPhilipp Reisner 20*b411b363SPhilipp Reisner You should have received a copy of the GNU General Public License 21*b411b363SPhilipp Reisner along with drbd; see the file COPYING. If not, write to 22*b411b363SPhilipp Reisner the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. 23*b411b363SPhilipp Reisner */ 24*b411b363SPhilipp Reisner 25*b411b363SPhilipp Reisner #include <linux/bitops.h> 26*b411b363SPhilipp Reisner #include <linux/vmalloc.h> 27*b411b363SPhilipp Reisner #include <linux/string.h> 28*b411b363SPhilipp Reisner #include <linux/drbd.h> 29*b411b363SPhilipp Reisner #include <asm/kmap_types.h> 30*b411b363SPhilipp Reisner #include "drbd_int.h" 31*b411b363SPhilipp Reisner 32*b411b363SPhilipp Reisner /* OPAQUE outside this file! 33*b411b363SPhilipp Reisner * interface defined in drbd_int.h 34*b411b363SPhilipp Reisner 35*b411b363SPhilipp Reisner * convention: 36*b411b363SPhilipp Reisner * function name drbd_bm_... => used elsewhere, "public". 37*b411b363SPhilipp Reisner * function name bm_... => internal to implementation, "private". 38*b411b363SPhilipp Reisner 39*b411b363SPhilipp Reisner * Note that since find_first_bit returns int, at the current granularity of 40*b411b363SPhilipp Reisner * the bitmap (4KB per byte), this implementation "only" supports up to 41*b411b363SPhilipp Reisner * 1<<(32+12) == 16 TB... 42*b411b363SPhilipp Reisner */ 43*b411b363SPhilipp Reisner 44*b411b363SPhilipp Reisner /* 45*b411b363SPhilipp Reisner * NOTE 46*b411b363SPhilipp Reisner * Access to the *bm_pages is protected by bm_lock. 47*b411b363SPhilipp Reisner * It is safe to read the other members within the lock. 48*b411b363SPhilipp Reisner * 49*b411b363SPhilipp Reisner * drbd_bm_set_bits is called from bio_endio callbacks, 50*b411b363SPhilipp Reisner * We may be called with irq already disabled, 51*b411b363SPhilipp Reisner * so we need spin_lock_irqsave(). 52*b411b363SPhilipp Reisner * And we need the kmap_atomic. 53*b411b363SPhilipp Reisner */ 54*b411b363SPhilipp Reisner struct drbd_bitmap { 55*b411b363SPhilipp Reisner struct page **bm_pages; 56*b411b363SPhilipp Reisner spinlock_t bm_lock; 57*b411b363SPhilipp Reisner /* WARNING unsigned long bm_*: 58*b411b363SPhilipp Reisner * 32bit number of bit offset is just enough for 512 MB bitmap. 59*b411b363SPhilipp Reisner * it will blow up if we make the bitmap bigger... 60*b411b363SPhilipp Reisner * not that it makes much sense to have a bitmap that large, 61*b411b363SPhilipp Reisner * rather change the granularity to 16k or 64k or something. 62*b411b363SPhilipp Reisner * (that implies other problems, however...) 63*b411b363SPhilipp Reisner */ 64*b411b363SPhilipp Reisner unsigned long bm_set; /* nr of set bits; THINK maybe atomic_t? */ 65*b411b363SPhilipp Reisner unsigned long bm_bits; 66*b411b363SPhilipp Reisner size_t bm_words; 67*b411b363SPhilipp Reisner size_t bm_number_of_pages; 68*b411b363SPhilipp Reisner sector_t bm_dev_capacity; 69*b411b363SPhilipp Reisner struct semaphore bm_change; /* serializes resize operations */ 70*b411b363SPhilipp Reisner 71*b411b363SPhilipp Reisner atomic_t bm_async_io; 72*b411b363SPhilipp Reisner wait_queue_head_t bm_io_wait; 73*b411b363SPhilipp Reisner 74*b411b363SPhilipp Reisner unsigned long bm_flags; 75*b411b363SPhilipp Reisner 76*b411b363SPhilipp Reisner /* debugging aid, in case we are still racy somewhere */ 77*b411b363SPhilipp Reisner char *bm_why; 78*b411b363SPhilipp Reisner struct task_struct *bm_task; 79*b411b363SPhilipp Reisner }; 80*b411b363SPhilipp Reisner 81*b411b363SPhilipp Reisner /* definition of bits in bm_flags */ 82*b411b363SPhilipp Reisner #define BM_LOCKED 0 83*b411b363SPhilipp Reisner #define BM_MD_IO_ERROR 1 84*b411b363SPhilipp Reisner #define BM_P_VMALLOCED 2 85*b411b363SPhilipp Reisner 86*b411b363SPhilipp Reisner static int bm_is_locked(struct drbd_bitmap *b) 87*b411b363SPhilipp Reisner { 88*b411b363SPhilipp Reisner return test_bit(BM_LOCKED, &b->bm_flags); 89*b411b363SPhilipp Reisner } 90*b411b363SPhilipp Reisner 91*b411b363SPhilipp Reisner #define bm_print_lock_info(m) __bm_print_lock_info(m, __func__) 92*b411b363SPhilipp Reisner static void __bm_print_lock_info(struct drbd_conf *mdev, const char *func) 93*b411b363SPhilipp Reisner { 94*b411b363SPhilipp Reisner struct drbd_bitmap *b = mdev->bitmap; 95*b411b363SPhilipp Reisner if (!__ratelimit(&drbd_ratelimit_state)) 96*b411b363SPhilipp Reisner return; 97*b411b363SPhilipp Reisner dev_err(DEV, "FIXME %s in %s, bitmap locked for '%s' by %s\n", 98*b411b363SPhilipp Reisner current == mdev->receiver.task ? "receiver" : 99*b411b363SPhilipp Reisner current == mdev->asender.task ? "asender" : 100*b411b363SPhilipp Reisner current == mdev->worker.task ? "worker" : current->comm, 101*b411b363SPhilipp Reisner func, b->bm_why ?: "?", 102*b411b363SPhilipp Reisner b->bm_task == mdev->receiver.task ? "receiver" : 103*b411b363SPhilipp Reisner b->bm_task == mdev->asender.task ? "asender" : 104*b411b363SPhilipp Reisner b->bm_task == mdev->worker.task ? "worker" : "?"); 105*b411b363SPhilipp Reisner } 106*b411b363SPhilipp Reisner 107*b411b363SPhilipp Reisner void drbd_bm_lock(struct drbd_conf *mdev, char *why) 108*b411b363SPhilipp Reisner { 109*b411b363SPhilipp Reisner struct drbd_bitmap *b = mdev->bitmap; 110*b411b363SPhilipp Reisner int trylock_failed; 111*b411b363SPhilipp Reisner 112*b411b363SPhilipp Reisner if (!b) { 113*b411b363SPhilipp Reisner dev_err(DEV, "FIXME no bitmap in drbd_bm_lock!?\n"); 114*b411b363SPhilipp Reisner return; 115*b411b363SPhilipp Reisner } 116*b411b363SPhilipp Reisner 117*b411b363SPhilipp Reisner trylock_failed = down_trylock(&b->bm_change); 118*b411b363SPhilipp Reisner 119*b411b363SPhilipp Reisner if (trylock_failed) { 120*b411b363SPhilipp Reisner dev_warn(DEV, "%s going to '%s' but bitmap already locked for '%s' by %s\n", 121*b411b363SPhilipp Reisner current == mdev->receiver.task ? "receiver" : 122*b411b363SPhilipp Reisner current == mdev->asender.task ? "asender" : 123*b411b363SPhilipp Reisner current == mdev->worker.task ? "worker" : current->comm, 124*b411b363SPhilipp Reisner why, b->bm_why ?: "?", 125*b411b363SPhilipp Reisner b->bm_task == mdev->receiver.task ? "receiver" : 126*b411b363SPhilipp Reisner b->bm_task == mdev->asender.task ? "asender" : 127*b411b363SPhilipp Reisner b->bm_task == mdev->worker.task ? "worker" : "?"); 128*b411b363SPhilipp Reisner down(&b->bm_change); 129*b411b363SPhilipp Reisner } 130*b411b363SPhilipp Reisner if (__test_and_set_bit(BM_LOCKED, &b->bm_flags)) 131*b411b363SPhilipp Reisner dev_err(DEV, "FIXME bitmap already locked in bm_lock\n"); 132*b411b363SPhilipp Reisner 133*b411b363SPhilipp Reisner b->bm_why = why; 134*b411b363SPhilipp Reisner b->bm_task = current; 135*b411b363SPhilipp Reisner } 136*b411b363SPhilipp Reisner 137*b411b363SPhilipp Reisner void drbd_bm_unlock(struct drbd_conf *mdev) 138*b411b363SPhilipp Reisner { 139*b411b363SPhilipp Reisner struct drbd_bitmap *b = mdev->bitmap; 140*b411b363SPhilipp Reisner if (!b) { 141*b411b363SPhilipp Reisner dev_err(DEV, "FIXME no bitmap in drbd_bm_unlock!?\n"); 142*b411b363SPhilipp Reisner return; 143*b411b363SPhilipp Reisner } 144*b411b363SPhilipp Reisner 145*b411b363SPhilipp Reisner if (!__test_and_clear_bit(BM_LOCKED, &mdev->bitmap->bm_flags)) 146*b411b363SPhilipp Reisner dev_err(DEV, "FIXME bitmap not locked in bm_unlock\n"); 147*b411b363SPhilipp Reisner 148*b411b363SPhilipp Reisner b->bm_why = NULL; 149*b411b363SPhilipp Reisner b->bm_task = NULL; 150*b411b363SPhilipp Reisner up(&b->bm_change); 151*b411b363SPhilipp Reisner } 152*b411b363SPhilipp Reisner 153*b411b363SPhilipp Reisner /* word offset to long pointer */ 154*b411b363SPhilipp Reisner static unsigned long *__bm_map_paddr(struct drbd_bitmap *b, unsigned long offset, const enum km_type km) 155*b411b363SPhilipp Reisner { 156*b411b363SPhilipp Reisner struct page *page; 157*b411b363SPhilipp Reisner unsigned long page_nr; 158*b411b363SPhilipp Reisner 159*b411b363SPhilipp Reisner /* page_nr = (word*sizeof(long)) >> PAGE_SHIFT; */ 160*b411b363SPhilipp Reisner page_nr = offset >> (PAGE_SHIFT - LN2_BPL + 3); 161*b411b363SPhilipp Reisner BUG_ON(page_nr >= b->bm_number_of_pages); 162*b411b363SPhilipp Reisner page = b->bm_pages[page_nr]; 163*b411b363SPhilipp Reisner 164*b411b363SPhilipp Reisner return (unsigned long *) kmap_atomic(page, km); 165*b411b363SPhilipp Reisner } 166*b411b363SPhilipp Reisner 167*b411b363SPhilipp Reisner static unsigned long * bm_map_paddr(struct drbd_bitmap *b, unsigned long offset) 168*b411b363SPhilipp Reisner { 169*b411b363SPhilipp Reisner return __bm_map_paddr(b, offset, KM_IRQ1); 170*b411b363SPhilipp Reisner } 171*b411b363SPhilipp Reisner 172*b411b363SPhilipp Reisner static void __bm_unmap(unsigned long *p_addr, const enum km_type km) 173*b411b363SPhilipp Reisner { 174*b411b363SPhilipp Reisner kunmap_atomic(p_addr, km); 175*b411b363SPhilipp Reisner }; 176*b411b363SPhilipp Reisner 177*b411b363SPhilipp Reisner static void bm_unmap(unsigned long *p_addr) 178*b411b363SPhilipp Reisner { 179*b411b363SPhilipp Reisner return __bm_unmap(p_addr, KM_IRQ1); 180*b411b363SPhilipp Reisner } 181*b411b363SPhilipp Reisner 182*b411b363SPhilipp Reisner /* long word offset of _bitmap_ sector */ 183*b411b363SPhilipp Reisner #define S2W(s) ((s)<<(BM_EXT_SHIFT-BM_BLOCK_SHIFT-LN2_BPL)) 184*b411b363SPhilipp Reisner /* word offset from start of bitmap to word number _in_page_ 185*b411b363SPhilipp Reisner * modulo longs per page 186*b411b363SPhilipp Reisner #define MLPP(X) ((X) % (PAGE_SIZE/sizeof(long)) 187*b411b363SPhilipp Reisner hm, well, Philipp thinks gcc might not optimze the % into & (... - 1) 188*b411b363SPhilipp Reisner so do it explicitly: 189*b411b363SPhilipp Reisner */ 190*b411b363SPhilipp Reisner #define MLPP(X) ((X) & ((PAGE_SIZE/sizeof(long))-1)) 191*b411b363SPhilipp Reisner 192*b411b363SPhilipp Reisner /* Long words per page */ 193*b411b363SPhilipp Reisner #define LWPP (PAGE_SIZE/sizeof(long)) 194*b411b363SPhilipp Reisner 195*b411b363SPhilipp Reisner /* 196*b411b363SPhilipp Reisner * actually most functions herein should take a struct drbd_bitmap*, not a 197*b411b363SPhilipp Reisner * struct drbd_conf*, but for the debug macros I like to have the mdev around 198*b411b363SPhilipp Reisner * to be able to report device specific. 199*b411b363SPhilipp Reisner */ 200*b411b363SPhilipp Reisner 201*b411b363SPhilipp Reisner static void bm_free_pages(struct page **pages, unsigned long number) 202*b411b363SPhilipp Reisner { 203*b411b363SPhilipp Reisner unsigned long i; 204*b411b363SPhilipp Reisner if (!pages) 205*b411b363SPhilipp Reisner return; 206*b411b363SPhilipp Reisner 207*b411b363SPhilipp Reisner for (i = 0; i < number; i++) { 208*b411b363SPhilipp Reisner if (!pages[i]) { 209*b411b363SPhilipp Reisner printk(KERN_ALERT "drbd: bm_free_pages tried to free " 210*b411b363SPhilipp Reisner "a NULL pointer; i=%lu n=%lu\n", 211*b411b363SPhilipp Reisner i, number); 212*b411b363SPhilipp Reisner continue; 213*b411b363SPhilipp Reisner } 214*b411b363SPhilipp Reisner __free_page(pages[i]); 215*b411b363SPhilipp Reisner pages[i] = NULL; 216*b411b363SPhilipp Reisner } 217*b411b363SPhilipp Reisner } 218*b411b363SPhilipp Reisner 219*b411b363SPhilipp Reisner static void bm_vk_free(void *ptr, int v) 220*b411b363SPhilipp Reisner { 221*b411b363SPhilipp Reisner if (v) 222*b411b363SPhilipp Reisner vfree(ptr); 223*b411b363SPhilipp Reisner else 224*b411b363SPhilipp Reisner kfree(ptr); 225*b411b363SPhilipp Reisner } 226*b411b363SPhilipp Reisner 227*b411b363SPhilipp Reisner /* 228*b411b363SPhilipp Reisner * "have" and "want" are NUMBER OF PAGES. 229*b411b363SPhilipp Reisner */ 230*b411b363SPhilipp Reisner static struct page **bm_realloc_pages(struct drbd_bitmap *b, unsigned long want) 231*b411b363SPhilipp Reisner { 232*b411b363SPhilipp Reisner struct page **old_pages = b->bm_pages; 233*b411b363SPhilipp Reisner struct page **new_pages, *page; 234*b411b363SPhilipp Reisner unsigned int i, bytes, vmalloced = 0; 235*b411b363SPhilipp Reisner unsigned long have = b->bm_number_of_pages; 236*b411b363SPhilipp Reisner 237*b411b363SPhilipp Reisner BUG_ON(have == 0 && old_pages != NULL); 238*b411b363SPhilipp Reisner BUG_ON(have != 0 && old_pages == NULL); 239*b411b363SPhilipp Reisner 240*b411b363SPhilipp Reisner if (have == want) 241*b411b363SPhilipp Reisner return old_pages; 242*b411b363SPhilipp Reisner 243*b411b363SPhilipp Reisner /* Trying kmalloc first, falling back to vmalloc. 244*b411b363SPhilipp Reisner * GFP_KERNEL is ok, as this is done when a lower level disk is 245*b411b363SPhilipp Reisner * "attached" to the drbd. Context is receiver thread or cqueue 246*b411b363SPhilipp Reisner * thread. As we have no disk yet, we are not in the IO path, 247*b411b363SPhilipp Reisner * not even the IO path of the peer. */ 248*b411b363SPhilipp Reisner bytes = sizeof(struct page *)*want; 249*b411b363SPhilipp Reisner new_pages = kmalloc(bytes, GFP_KERNEL); 250*b411b363SPhilipp Reisner if (!new_pages) { 251*b411b363SPhilipp Reisner new_pages = vmalloc(bytes); 252*b411b363SPhilipp Reisner if (!new_pages) 253*b411b363SPhilipp Reisner return NULL; 254*b411b363SPhilipp Reisner vmalloced = 1; 255*b411b363SPhilipp Reisner } 256*b411b363SPhilipp Reisner 257*b411b363SPhilipp Reisner memset(new_pages, 0, bytes); 258*b411b363SPhilipp Reisner if (want >= have) { 259*b411b363SPhilipp Reisner for (i = 0; i < have; i++) 260*b411b363SPhilipp Reisner new_pages[i] = old_pages[i]; 261*b411b363SPhilipp Reisner for (; i < want; i++) { 262*b411b363SPhilipp Reisner page = alloc_page(GFP_HIGHUSER); 263*b411b363SPhilipp Reisner if (!page) { 264*b411b363SPhilipp Reisner bm_free_pages(new_pages + have, i - have); 265*b411b363SPhilipp Reisner bm_vk_free(new_pages, vmalloced); 266*b411b363SPhilipp Reisner return NULL; 267*b411b363SPhilipp Reisner } 268*b411b363SPhilipp Reisner new_pages[i] = page; 269*b411b363SPhilipp Reisner } 270*b411b363SPhilipp Reisner } else { 271*b411b363SPhilipp Reisner for (i = 0; i < want; i++) 272*b411b363SPhilipp Reisner new_pages[i] = old_pages[i]; 273*b411b363SPhilipp Reisner /* NOT HERE, we are outside the spinlock! 274*b411b363SPhilipp Reisner bm_free_pages(old_pages + want, have - want); 275*b411b363SPhilipp Reisner */ 276*b411b363SPhilipp Reisner } 277*b411b363SPhilipp Reisner 278*b411b363SPhilipp Reisner if (vmalloced) 279*b411b363SPhilipp Reisner set_bit(BM_P_VMALLOCED, &b->bm_flags); 280*b411b363SPhilipp Reisner else 281*b411b363SPhilipp Reisner clear_bit(BM_P_VMALLOCED, &b->bm_flags); 282*b411b363SPhilipp Reisner 283*b411b363SPhilipp Reisner return new_pages; 284*b411b363SPhilipp Reisner } 285*b411b363SPhilipp Reisner 286*b411b363SPhilipp Reisner /* 287*b411b363SPhilipp Reisner * called on driver init only. TODO call when a device is created. 288*b411b363SPhilipp Reisner * allocates the drbd_bitmap, and stores it in mdev->bitmap. 289*b411b363SPhilipp Reisner */ 290*b411b363SPhilipp Reisner int drbd_bm_init(struct drbd_conf *mdev) 291*b411b363SPhilipp Reisner { 292*b411b363SPhilipp Reisner struct drbd_bitmap *b = mdev->bitmap; 293*b411b363SPhilipp Reisner WARN_ON(b != NULL); 294*b411b363SPhilipp Reisner b = kzalloc(sizeof(struct drbd_bitmap), GFP_KERNEL); 295*b411b363SPhilipp Reisner if (!b) 296*b411b363SPhilipp Reisner return -ENOMEM; 297*b411b363SPhilipp Reisner spin_lock_init(&b->bm_lock); 298*b411b363SPhilipp Reisner init_MUTEX(&b->bm_change); 299*b411b363SPhilipp Reisner init_waitqueue_head(&b->bm_io_wait); 300*b411b363SPhilipp Reisner 301*b411b363SPhilipp Reisner mdev->bitmap = b; 302*b411b363SPhilipp Reisner 303*b411b363SPhilipp Reisner return 0; 304*b411b363SPhilipp Reisner } 305*b411b363SPhilipp Reisner 306*b411b363SPhilipp Reisner sector_t drbd_bm_capacity(struct drbd_conf *mdev) 307*b411b363SPhilipp Reisner { 308*b411b363SPhilipp Reisner ERR_IF(!mdev->bitmap) return 0; 309*b411b363SPhilipp Reisner return mdev->bitmap->bm_dev_capacity; 310*b411b363SPhilipp Reisner } 311*b411b363SPhilipp Reisner 312*b411b363SPhilipp Reisner /* called on driver unload. TODO: call when a device is destroyed. 313*b411b363SPhilipp Reisner */ 314*b411b363SPhilipp Reisner void drbd_bm_cleanup(struct drbd_conf *mdev) 315*b411b363SPhilipp Reisner { 316*b411b363SPhilipp Reisner ERR_IF (!mdev->bitmap) return; 317*b411b363SPhilipp Reisner bm_free_pages(mdev->bitmap->bm_pages, mdev->bitmap->bm_number_of_pages); 318*b411b363SPhilipp Reisner bm_vk_free(mdev->bitmap->bm_pages, test_bit(BM_P_VMALLOCED, &mdev->bitmap->bm_flags)); 319*b411b363SPhilipp Reisner kfree(mdev->bitmap); 320*b411b363SPhilipp Reisner mdev->bitmap = NULL; 321*b411b363SPhilipp Reisner } 322*b411b363SPhilipp Reisner 323*b411b363SPhilipp Reisner /* 324*b411b363SPhilipp Reisner * since (b->bm_bits % BITS_PER_LONG) != 0, 325*b411b363SPhilipp Reisner * this masks out the remaining bits. 326*b411b363SPhilipp Reisner * Returns the number of bits cleared. 327*b411b363SPhilipp Reisner */ 328*b411b363SPhilipp Reisner static int bm_clear_surplus(struct drbd_bitmap *b) 329*b411b363SPhilipp Reisner { 330*b411b363SPhilipp Reisner const unsigned long mask = (1UL << (b->bm_bits & (BITS_PER_LONG-1))) - 1; 331*b411b363SPhilipp Reisner size_t w = b->bm_bits >> LN2_BPL; 332*b411b363SPhilipp Reisner int cleared = 0; 333*b411b363SPhilipp Reisner unsigned long *p_addr, *bm; 334*b411b363SPhilipp Reisner 335*b411b363SPhilipp Reisner p_addr = bm_map_paddr(b, w); 336*b411b363SPhilipp Reisner bm = p_addr + MLPP(w); 337*b411b363SPhilipp Reisner if (w < b->bm_words) { 338*b411b363SPhilipp Reisner cleared = hweight_long(*bm & ~mask); 339*b411b363SPhilipp Reisner *bm &= mask; 340*b411b363SPhilipp Reisner w++; bm++; 341*b411b363SPhilipp Reisner } 342*b411b363SPhilipp Reisner 343*b411b363SPhilipp Reisner if (w < b->bm_words) { 344*b411b363SPhilipp Reisner cleared += hweight_long(*bm); 345*b411b363SPhilipp Reisner *bm = 0; 346*b411b363SPhilipp Reisner } 347*b411b363SPhilipp Reisner bm_unmap(p_addr); 348*b411b363SPhilipp Reisner return cleared; 349*b411b363SPhilipp Reisner } 350*b411b363SPhilipp Reisner 351*b411b363SPhilipp Reisner static void bm_set_surplus(struct drbd_bitmap *b) 352*b411b363SPhilipp Reisner { 353*b411b363SPhilipp Reisner const unsigned long mask = (1UL << (b->bm_bits & (BITS_PER_LONG-1))) - 1; 354*b411b363SPhilipp Reisner size_t w = b->bm_bits >> LN2_BPL; 355*b411b363SPhilipp Reisner unsigned long *p_addr, *bm; 356*b411b363SPhilipp Reisner 357*b411b363SPhilipp Reisner p_addr = bm_map_paddr(b, w); 358*b411b363SPhilipp Reisner bm = p_addr + MLPP(w); 359*b411b363SPhilipp Reisner if (w < b->bm_words) { 360*b411b363SPhilipp Reisner *bm |= ~mask; 361*b411b363SPhilipp Reisner bm++; w++; 362*b411b363SPhilipp Reisner } 363*b411b363SPhilipp Reisner 364*b411b363SPhilipp Reisner if (w < b->bm_words) { 365*b411b363SPhilipp Reisner *bm = ~(0UL); 366*b411b363SPhilipp Reisner } 367*b411b363SPhilipp Reisner bm_unmap(p_addr); 368*b411b363SPhilipp Reisner } 369*b411b363SPhilipp Reisner 370*b411b363SPhilipp Reisner static unsigned long __bm_count_bits(struct drbd_bitmap *b, const int swap_endian) 371*b411b363SPhilipp Reisner { 372*b411b363SPhilipp Reisner unsigned long *p_addr, *bm, offset = 0; 373*b411b363SPhilipp Reisner unsigned long bits = 0; 374*b411b363SPhilipp Reisner unsigned long i, do_now; 375*b411b363SPhilipp Reisner 376*b411b363SPhilipp Reisner while (offset < b->bm_words) { 377*b411b363SPhilipp Reisner i = do_now = min_t(size_t, b->bm_words-offset, LWPP); 378*b411b363SPhilipp Reisner p_addr = __bm_map_paddr(b, offset, KM_USER0); 379*b411b363SPhilipp Reisner bm = p_addr + MLPP(offset); 380*b411b363SPhilipp Reisner while (i--) { 381*b411b363SPhilipp Reisner #ifndef __LITTLE_ENDIAN 382*b411b363SPhilipp Reisner if (swap_endian) 383*b411b363SPhilipp Reisner *bm = lel_to_cpu(*bm); 384*b411b363SPhilipp Reisner #endif 385*b411b363SPhilipp Reisner bits += hweight_long(*bm++); 386*b411b363SPhilipp Reisner } 387*b411b363SPhilipp Reisner __bm_unmap(p_addr, KM_USER0); 388*b411b363SPhilipp Reisner offset += do_now; 389*b411b363SPhilipp Reisner cond_resched(); 390*b411b363SPhilipp Reisner } 391*b411b363SPhilipp Reisner 392*b411b363SPhilipp Reisner return bits; 393*b411b363SPhilipp Reisner } 394*b411b363SPhilipp Reisner 395*b411b363SPhilipp Reisner static unsigned long bm_count_bits(struct drbd_bitmap *b) 396*b411b363SPhilipp Reisner { 397*b411b363SPhilipp Reisner return __bm_count_bits(b, 0); 398*b411b363SPhilipp Reisner } 399*b411b363SPhilipp Reisner 400*b411b363SPhilipp Reisner static unsigned long bm_count_bits_swap_endian(struct drbd_bitmap *b) 401*b411b363SPhilipp Reisner { 402*b411b363SPhilipp Reisner return __bm_count_bits(b, 1); 403*b411b363SPhilipp Reisner } 404*b411b363SPhilipp Reisner 405*b411b363SPhilipp Reisner /* offset and len in long words.*/ 406*b411b363SPhilipp Reisner static void bm_memset(struct drbd_bitmap *b, size_t offset, int c, size_t len) 407*b411b363SPhilipp Reisner { 408*b411b363SPhilipp Reisner unsigned long *p_addr, *bm; 409*b411b363SPhilipp Reisner size_t do_now, end; 410*b411b363SPhilipp Reisner 411*b411b363SPhilipp Reisner #define BM_SECTORS_PER_BIT (BM_BLOCK_SIZE/512) 412*b411b363SPhilipp Reisner 413*b411b363SPhilipp Reisner end = offset + len; 414*b411b363SPhilipp Reisner 415*b411b363SPhilipp Reisner if (end > b->bm_words) { 416*b411b363SPhilipp Reisner printk(KERN_ALERT "drbd: bm_memset end > bm_words\n"); 417*b411b363SPhilipp Reisner return; 418*b411b363SPhilipp Reisner } 419*b411b363SPhilipp Reisner 420*b411b363SPhilipp Reisner while (offset < end) { 421*b411b363SPhilipp Reisner do_now = min_t(size_t, ALIGN(offset + 1, LWPP), end) - offset; 422*b411b363SPhilipp Reisner p_addr = bm_map_paddr(b, offset); 423*b411b363SPhilipp Reisner bm = p_addr + MLPP(offset); 424*b411b363SPhilipp Reisner if (bm+do_now > p_addr + LWPP) { 425*b411b363SPhilipp Reisner printk(KERN_ALERT "drbd: BUG BUG BUG! p_addr:%p bm:%p do_now:%d\n", 426*b411b363SPhilipp Reisner p_addr, bm, (int)do_now); 427*b411b363SPhilipp Reisner break; /* breaks to after catch_oob_access_end() only! */ 428*b411b363SPhilipp Reisner } 429*b411b363SPhilipp Reisner memset(bm, c, do_now * sizeof(long)); 430*b411b363SPhilipp Reisner bm_unmap(p_addr); 431*b411b363SPhilipp Reisner offset += do_now; 432*b411b363SPhilipp Reisner } 433*b411b363SPhilipp Reisner } 434*b411b363SPhilipp Reisner 435*b411b363SPhilipp Reisner /* 436*b411b363SPhilipp Reisner * make sure the bitmap has enough room for the attached storage, 437*b411b363SPhilipp Reisner * if necessary, resize. 438*b411b363SPhilipp Reisner * called whenever we may have changed the device size. 439*b411b363SPhilipp Reisner * returns -ENOMEM if we could not allocate enough memory, 0 on success. 440*b411b363SPhilipp Reisner * In case this is actually a resize, we copy the old bitmap into the new one. 441*b411b363SPhilipp Reisner * Otherwise, the bitmap is initialized to all bits set. 442*b411b363SPhilipp Reisner */ 443*b411b363SPhilipp Reisner int drbd_bm_resize(struct drbd_conf *mdev, sector_t capacity) 444*b411b363SPhilipp Reisner { 445*b411b363SPhilipp Reisner struct drbd_bitmap *b = mdev->bitmap; 446*b411b363SPhilipp Reisner unsigned long bits, words, owords, obits, *p_addr, *bm; 447*b411b363SPhilipp Reisner unsigned long want, have, onpages; /* number of pages */ 448*b411b363SPhilipp Reisner struct page **npages, **opages = NULL; 449*b411b363SPhilipp Reisner int err = 0, growing; 450*b411b363SPhilipp Reisner int opages_vmalloced; 451*b411b363SPhilipp Reisner 452*b411b363SPhilipp Reisner ERR_IF(!b) return -ENOMEM; 453*b411b363SPhilipp Reisner 454*b411b363SPhilipp Reisner drbd_bm_lock(mdev, "resize"); 455*b411b363SPhilipp Reisner 456*b411b363SPhilipp Reisner dev_info(DEV, "drbd_bm_resize called with capacity == %llu\n", 457*b411b363SPhilipp Reisner (unsigned long long)capacity); 458*b411b363SPhilipp Reisner 459*b411b363SPhilipp Reisner if (capacity == b->bm_dev_capacity) 460*b411b363SPhilipp Reisner goto out; 461*b411b363SPhilipp Reisner 462*b411b363SPhilipp Reisner opages_vmalloced = test_bit(BM_P_VMALLOCED, &b->bm_flags); 463*b411b363SPhilipp Reisner 464*b411b363SPhilipp Reisner if (capacity == 0) { 465*b411b363SPhilipp Reisner spin_lock_irq(&b->bm_lock); 466*b411b363SPhilipp Reisner opages = b->bm_pages; 467*b411b363SPhilipp Reisner onpages = b->bm_number_of_pages; 468*b411b363SPhilipp Reisner owords = b->bm_words; 469*b411b363SPhilipp Reisner b->bm_pages = NULL; 470*b411b363SPhilipp Reisner b->bm_number_of_pages = 471*b411b363SPhilipp Reisner b->bm_set = 472*b411b363SPhilipp Reisner b->bm_bits = 473*b411b363SPhilipp Reisner b->bm_words = 474*b411b363SPhilipp Reisner b->bm_dev_capacity = 0; 475*b411b363SPhilipp Reisner spin_unlock_irq(&b->bm_lock); 476*b411b363SPhilipp Reisner bm_free_pages(opages, onpages); 477*b411b363SPhilipp Reisner bm_vk_free(opages, opages_vmalloced); 478*b411b363SPhilipp Reisner goto out; 479*b411b363SPhilipp Reisner } 480*b411b363SPhilipp Reisner bits = BM_SECT_TO_BIT(ALIGN(capacity, BM_SECT_PER_BIT)); 481*b411b363SPhilipp Reisner 482*b411b363SPhilipp Reisner /* if we would use 483*b411b363SPhilipp Reisner words = ALIGN(bits,BITS_PER_LONG) >> LN2_BPL; 484*b411b363SPhilipp Reisner a 32bit host could present the wrong number of words 485*b411b363SPhilipp Reisner to a 64bit host. 486*b411b363SPhilipp Reisner */ 487*b411b363SPhilipp Reisner words = ALIGN(bits, 64) >> LN2_BPL; 488*b411b363SPhilipp Reisner 489*b411b363SPhilipp Reisner if (get_ldev(mdev)) { 490*b411b363SPhilipp Reisner D_ASSERT((u64)bits <= (((u64)mdev->ldev->md.md_size_sect-MD_BM_OFFSET) << 12)); 491*b411b363SPhilipp Reisner put_ldev(mdev); 492*b411b363SPhilipp Reisner } 493*b411b363SPhilipp Reisner 494*b411b363SPhilipp Reisner /* one extra long to catch off by one errors */ 495*b411b363SPhilipp Reisner want = ALIGN((words+1)*sizeof(long), PAGE_SIZE) >> PAGE_SHIFT; 496*b411b363SPhilipp Reisner have = b->bm_number_of_pages; 497*b411b363SPhilipp Reisner if (want == have) { 498*b411b363SPhilipp Reisner D_ASSERT(b->bm_pages != NULL); 499*b411b363SPhilipp Reisner npages = b->bm_pages; 500*b411b363SPhilipp Reisner } else { 501*b411b363SPhilipp Reisner if (FAULT_ACTIVE(mdev, DRBD_FAULT_BM_ALLOC)) 502*b411b363SPhilipp Reisner npages = NULL; 503*b411b363SPhilipp Reisner else 504*b411b363SPhilipp Reisner npages = bm_realloc_pages(b, want); 505*b411b363SPhilipp Reisner } 506*b411b363SPhilipp Reisner 507*b411b363SPhilipp Reisner if (!npages) { 508*b411b363SPhilipp Reisner err = -ENOMEM; 509*b411b363SPhilipp Reisner goto out; 510*b411b363SPhilipp Reisner } 511*b411b363SPhilipp Reisner 512*b411b363SPhilipp Reisner spin_lock_irq(&b->bm_lock); 513*b411b363SPhilipp Reisner opages = b->bm_pages; 514*b411b363SPhilipp Reisner owords = b->bm_words; 515*b411b363SPhilipp Reisner obits = b->bm_bits; 516*b411b363SPhilipp Reisner 517*b411b363SPhilipp Reisner growing = bits > obits; 518*b411b363SPhilipp Reisner if (opages) 519*b411b363SPhilipp Reisner bm_set_surplus(b); 520*b411b363SPhilipp Reisner 521*b411b363SPhilipp Reisner b->bm_pages = npages; 522*b411b363SPhilipp Reisner b->bm_number_of_pages = want; 523*b411b363SPhilipp Reisner b->bm_bits = bits; 524*b411b363SPhilipp Reisner b->bm_words = words; 525*b411b363SPhilipp Reisner b->bm_dev_capacity = capacity; 526*b411b363SPhilipp Reisner 527*b411b363SPhilipp Reisner if (growing) { 528*b411b363SPhilipp Reisner bm_memset(b, owords, 0xff, words-owords); 529*b411b363SPhilipp Reisner b->bm_set += bits - obits; 530*b411b363SPhilipp Reisner } 531*b411b363SPhilipp Reisner 532*b411b363SPhilipp Reisner if (want < have) { 533*b411b363SPhilipp Reisner /* implicit: (opages != NULL) && (opages != npages) */ 534*b411b363SPhilipp Reisner bm_free_pages(opages + want, have - want); 535*b411b363SPhilipp Reisner } 536*b411b363SPhilipp Reisner 537*b411b363SPhilipp Reisner p_addr = bm_map_paddr(b, words); 538*b411b363SPhilipp Reisner bm = p_addr + MLPP(words); 539*b411b363SPhilipp Reisner *bm = DRBD_MAGIC; 540*b411b363SPhilipp Reisner bm_unmap(p_addr); 541*b411b363SPhilipp Reisner 542*b411b363SPhilipp Reisner (void)bm_clear_surplus(b); 543*b411b363SPhilipp Reisner 544*b411b363SPhilipp Reisner spin_unlock_irq(&b->bm_lock); 545*b411b363SPhilipp Reisner if (opages != npages) 546*b411b363SPhilipp Reisner bm_vk_free(opages, opages_vmalloced); 547*b411b363SPhilipp Reisner if (!growing) 548*b411b363SPhilipp Reisner b->bm_set = bm_count_bits(b); 549*b411b363SPhilipp Reisner dev_info(DEV, "resync bitmap: bits=%lu words=%lu\n", bits, words); 550*b411b363SPhilipp Reisner 551*b411b363SPhilipp Reisner out: 552*b411b363SPhilipp Reisner drbd_bm_unlock(mdev); 553*b411b363SPhilipp Reisner return err; 554*b411b363SPhilipp Reisner } 555*b411b363SPhilipp Reisner 556*b411b363SPhilipp Reisner /* inherently racy: 557*b411b363SPhilipp Reisner * if not protected by other means, return value may be out of date when 558*b411b363SPhilipp Reisner * leaving this function... 559*b411b363SPhilipp Reisner * we still need to lock it, since it is important that this returns 560*b411b363SPhilipp Reisner * bm_set == 0 precisely. 561*b411b363SPhilipp Reisner * 562*b411b363SPhilipp Reisner * maybe bm_set should be atomic_t ? 563*b411b363SPhilipp Reisner */ 564*b411b363SPhilipp Reisner static unsigned long _drbd_bm_total_weight(struct drbd_conf *mdev) 565*b411b363SPhilipp Reisner { 566*b411b363SPhilipp Reisner struct drbd_bitmap *b = mdev->bitmap; 567*b411b363SPhilipp Reisner unsigned long s; 568*b411b363SPhilipp Reisner unsigned long flags; 569*b411b363SPhilipp Reisner 570*b411b363SPhilipp Reisner ERR_IF(!b) return 0; 571*b411b363SPhilipp Reisner ERR_IF(!b->bm_pages) return 0; 572*b411b363SPhilipp Reisner 573*b411b363SPhilipp Reisner spin_lock_irqsave(&b->bm_lock, flags); 574*b411b363SPhilipp Reisner s = b->bm_set; 575*b411b363SPhilipp Reisner spin_unlock_irqrestore(&b->bm_lock, flags); 576*b411b363SPhilipp Reisner 577*b411b363SPhilipp Reisner return s; 578*b411b363SPhilipp Reisner } 579*b411b363SPhilipp Reisner 580*b411b363SPhilipp Reisner unsigned long drbd_bm_total_weight(struct drbd_conf *mdev) 581*b411b363SPhilipp Reisner { 582*b411b363SPhilipp Reisner unsigned long s; 583*b411b363SPhilipp Reisner /* if I don't have a disk, I don't know about out-of-sync status */ 584*b411b363SPhilipp Reisner if (!get_ldev_if_state(mdev, D_NEGOTIATING)) 585*b411b363SPhilipp Reisner return 0; 586*b411b363SPhilipp Reisner s = _drbd_bm_total_weight(mdev); 587*b411b363SPhilipp Reisner put_ldev(mdev); 588*b411b363SPhilipp Reisner return s; 589*b411b363SPhilipp Reisner } 590*b411b363SPhilipp Reisner 591*b411b363SPhilipp Reisner size_t drbd_bm_words(struct drbd_conf *mdev) 592*b411b363SPhilipp Reisner { 593*b411b363SPhilipp Reisner struct drbd_bitmap *b = mdev->bitmap; 594*b411b363SPhilipp Reisner ERR_IF(!b) return 0; 595*b411b363SPhilipp Reisner ERR_IF(!b->bm_pages) return 0; 596*b411b363SPhilipp Reisner 597*b411b363SPhilipp Reisner return b->bm_words; 598*b411b363SPhilipp Reisner } 599*b411b363SPhilipp Reisner 600*b411b363SPhilipp Reisner unsigned long drbd_bm_bits(struct drbd_conf *mdev) 601*b411b363SPhilipp Reisner { 602*b411b363SPhilipp Reisner struct drbd_bitmap *b = mdev->bitmap; 603*b411b363SPhilipp Reisner ERR_IF(!b) return 0; 604*b411b363SPhilipp Reisner 605*b411b363SPhilipp Reisner return b->bm_bits; 606*b411b363SPhilipp Reisner } 607*b411b363SPhilipp Reisner 608*b411b363SPhilipp Reisner /* merge number words from buffer into the bitmap starting at offset. 609*b411b363SPhilipp Reisner * buffer[i] is expected to be little endian unsigned long. 610*b411b363SPhilipp Reisner * bitmap must be locked by drbd_bm_lock. 611*b411b363SPhilipp Reisner * currently only used from receive_bitmap. 612*b411b363SPhilipp Reisner */ 613*b411b363SPhilipp Reisner void drbd_bm_merge_lel(struct drbd_conf *mdev, size_t offset, size_t number, 614*b411b363SPhilipp Reisner unsigned long *buffer) 615*b411b363SPhilipp Reisner { 616*b411b363SPhilipp Reisner struct drbd_bitmap *b = mdev->bitmap; 617*b411b363SPhilipp Reisner unsigned long *p_addr, *bm; 618*b411b363SPhilipp Reisner unsigned long word, bits; 619*b411b363SPhilipp Reisner size_t end, do_now; 620*b411b363SPhilipp Reisner 621*b411b363SPhilipp Reisner end = offset + number; 622*b411b363SPhilipp Reisner 623*b411b363SPhilipp Reisner ERR_IF(!b) return; 624*b411b363SPhilipp Reisner ERR_IF(!b->bm_pages) return; 625*b411b363SPhilipp Reisner if (number == 0) 626*b411b363SPhilipp Reisner return; 627*b411b363SPhilipp Reisner WARN_ON(offset >= b->bm_words); 628*b411b363SPhilipp Reisner WARN_ON(end > b->bm_words); 629*b411b363SPhilipp Reisner 630*b411b363SPhilipp Reisner spin_lock_irq(&b->bm_lock); 631*b411b363SPhilipp Reisner while (offset < end) { 632*b411b363SPhilipp Reisner do_now = min_t(size_t, ALIGN(offset+1, LWPP), end) - offset; 633*b411b363SPhilipp Reisner p_addr = bm_map_paddr(b, offset); 634*b411b363SPhilipp Reisner bm = p_addr + MLPP(offset); 635*b411b363SPhilipp Reisner offset += do_now; 636*b411b363SPhilipp Reisner while (do_now--) { 637*b411b363SPhilipp Reisner bits = hweight_long(*bm); 638*b411b363SPhilipp Reisner word = *bm | lel_to_cpu(*buffer++); 639*b411b363SPhilipp Reisner *bm++ = word; 640*b411b363SPhilipp Reisner b->bm_set += hweight_long(word) - bits; 641*b411b363SPhilipp Reisner } 642*b411b363SPhilipp Reisner bm_unmap(p_addr); 643*b411b363SPhilipp Reisner } 644*b411b363SPhilipp Reisner /* with 32bit <-> 64bit cross-platform connect 645*b411b363SPhilipp Reisner * this is only correct for current usage, 646*b411b363SPhilipp Reisner * where we _know_ that we are 64 bit aligned, 647*b411b363SPhilipp Reisner * and know that this function is used in this way, too... 648*b411b363SPhilipp Reisner */ 649*b411b363SPhilipp Reisner if (end == b->bm_words) 650*b411b363SPhilipp Reisner b->bm_set -= bm_clear_surplus(b); 651*b411b363SPhilipp Reisner 652*b411b363SPhilipp Reisner spin_unlock_irq(&b->bm_lock); 653*b411b363SPhilipp Reisner } 654*b411b363SPhilipp Reisner 655*b411b363SPhilipp Reisner /* copy number words from the bitmap starting at offset into the buffer. 656*b411b363SPhilipp Reisner * buffer[i] will be little endian unsigned long. 657*b411b363SPhilipp Reisner */ 658*b411b363SPhilipp Reisner void drbd_bm_get_lel(struct drbd_conf *mdev, size_t offset, size_t number, 659*b411b363SPhilipp Reisner unsigned long *buffer) 660*b411b363SPhilipp Reisner { 661*b411b363SPhilipp Reisner struct drbd_bitmap *b = mdev->bitmap; 662*b411b363SPhilipp Reisner unsigned long *p_addr, *bm; 663*b411b363SPhilipp Reisner size_t end, do_now; 664*b411b363SPhilipp Reisner 665*b411b363SPhilipp Reisner end = offset + number; 666*b411b363SPhilipp Reisner 667*b411b363SPhilipp Reisner ERR_IF(!b) return; 668*b411b363SPhilipp Reisner ERR_IF(!b->bm_pages) return; 669*b411b363SPhilipp Reisner 670*b411b363SPhilipp Reisner spin_lock_irq(&b->bm_lock); 671*b411b363SPhilipp Reisner if ((offset >= b->bm_words) || 672*b411b363SPhilipp Reisner (end > b->bm_words) || 673*b411b363SPhilipp Reisner (number <= 0)) 674*b411b363SPhilipp Reisner dev_err(DEV, "offset=%lu number=%lu bm_words=%lu\n", 675*b411b363SPhilipp Reisner (unsigned long) offset, 676*b411b363SPhilipp Reisner (unsigned long) number, 677*b411b363SPhilipp Reisner (unsigned long) b->bm_words); 678*b411b363SPhilipp Reisner else { 679*b411b363SPhilipp Reisner while (offset < end) { 680*b411b363SPhilipp Reisner do_now = min_t(size_t, ALIGN(offset+1, LWPP), end) - offset; 681*b411b363SPhilipp Reisner p_addr = bm_map_paddr(b, offset); 682*b411b363SPhilipp Reisner bm = p_addr + MLPP(offset); 683*b411b363SPhilipp Reisner offset += do_now; 684*b411b363SPhilipp Reisner while (do_now--) 685*b411b363SPhilipp Reisner *buffer++ = cpu_to_lel(*bm++); 686*b411b363SPhilipp Reisner bm_unmap(p_addr); 687*b411b363SPhilipp Reisner } 688*b411b363SPhilipp Reisner } 689*b411b363SPhilipp Reisner spin_unlock_irq(&b->bm_lock); 690*b411b363SPhilipp Reisner } 691*b411b363SPhilipp Reisner 692*b411b363SPhilipp Reisner /* set all bits in the bitmap */ 693*b411b363SPhilipp Reisner void drbd_bm_set_all(struct drbd_conf *mdev) 694*b411b363SPhilipp Reisner { 695*b411b363SPhilipp Reisner struct drbd_bitmap *b = mdev->bitmap; 696*b411b363SPhilipp Reisner ERR_IF(!b) return; 697*b411b363SPhilipp Reisner ERR_IF(!b->bm_pages) return; 698*b411b363SPhilipp Reisner 699*b411b363SPhilipp Reisner spin_lock_irq(&b->bm_lock); 700*b411b363SPhilipp Reisner bm_memset(b, 0, 0xff, b->bm_words); 701*b411b363SPhilipp Reisner (void)bm_clear_surplus(b); 702*b411b363SPhilipp Reisner b->bm_set = b->bm_bits; 703*b411b363SPhilipp Reisner spin_unlock_irq(&b->bm_lock); 704*b411b363SPhilipp Reisner } 705*b411b363SPhilipp Reisner 706*b411b363SPhilipp Reisner /* clear all bits in the bitmap */ 707*b411b363SPhilipp Reisner void drbd_bm_clear_all(struct drbd_conf *mdev) 708*b411b363SPhilipp Reisner { 709*b411b363SPhilipp Reisner struct drbd_bitmap *b = mdev->bitmap; 710*b411b363SPhilipp Reisner ERR_IF(!b) return; 711*b411b363SPhilipp Reisner ERR_IF(!b->bm_pages) return; 712*b411b363SPhilipp Reisner 713*b411b363SPhilipp Reisner spin_lock_irq(&b->bm_lock); 714*b411b363SPhilipp Reisner bm_memset(b, 0, 0, b->bm_words); 715*b411b363SPhilipp Reisner b->bm_set = 0; 716*b411b363SPhilipp Reisner spin_unlock_irq(&b->bm_lock); 717*b411b363SPhilipp Reisner } 718*b411b363SPhilipp Reisner 719*b411b363SPhilipp Reisner static void bm_async_io_complete(struct bio *bio, int error) 720*b411b363SPhilipp Reisner { 721*b411b363SPhilipp Reisner struct drbd_bitmap *b = bio->bi_private; 722*b411b363SPhilipp Reisner int uptodate = bio_flagged(bio, BIO_UPTODATE); 723*b411b363SPhilipp Reisner 724*b411b363SPhilipp Reisner 725*b411b363SPhilipp Reisner /* strange behavior of some lower level drivers... 726*b411b363SPhilipp Reisner * fail the request by clearing the uptodate flag, 727*b411b363SPhilipp Reisner * but do not return any error?! 728*b411b363SPhilipp Reisner * do we want to WARN() on this? */ 729*b411b363SPhilipp Reisner if (!error && !uptodate) 730*b411b363SPhilipp Reisner error = -EIO; 731*b411b363SPhilipp Reisner 732*b411b363SPhilipp Reisner if (error) { 733*b411b363SPhilipp Reisner /* doh. what now? 734*b411b363SPhilipp Reisner * for now, set all bits, and flag MD_IO_ERROR */ 735*b411b363SPhilipp Reisner __set_bit(BM_MD_IO_ERROR, &b->bm_flags); 736*b411b363SPhilipp Reisner } 737*b411b363SPhilipp Reisner if (atomic_dec_and_test(&b->bm_async_io)) 738*b411b363SPhilipp Reisner wake_up(&b->bm_io_wait); 739*b411b363SPhilipp Reisner 740*b411b363SPhilipp Reisner bio_put(bio); 741*b411b363SPhilipp Reisner } 742*b411b363SPhilipp Reisner 743*b411b363SPhilipp Reisner static void bm_page_io_async(struct drbd_conf *mdev, struct drbd_bitmap *b, int page_nr, int rw) __must_hold(local) 744*b411b363SPhilipp Reisner { 745*b411b363SPhilipp Reisner /* we are process context. we always get a bio */ 746*b411b363SPhilipp Reisner struct bio *bio = bio_alloc(GFP_KERNEL, 1); 747*b411b363SPhilipp Reisner unsigned int len; 748*b411b363SPhilipp Reisner sector_t on_disk_sector = 749*b411b363SPhilipp Reisner mdev->ldev->md.md_offset + mdev->ldev->md.bm_offset; 750*b411b363SPhilipp Reisner on_disk_sector += ((sector_t)page_nr) << (PAGE_SHIFT-9); 751*b411b363SPhilipp Reisner 752*b411b363SPhilipp Reisner /* this might happen with very small 753*b411b363SPhilipp Reisner * flexible external meta data device */ 754*b411b363SPhilipp Reisner len = min_t(unsigned int, PAGE_SIZE, 755*b411b363SPhilipp Reisner (drbd_md_last_sector(mdev->ldev) - on_disk_sector + 1)<<9); 756*b411b363SPhilipp Reisner 757*b411b363SPhilipp Reisner bio->bi_bdev = mdev->ldev->md_bdev; 758*b411b363SPhilipp Reisner bio->bi_sector = on_disk_sector; 759*b411b363SPhilipp Reisner bio_add_page(bio, b->bm_pages[page_nr], len, 0); 760*b411b363SPhilipp Reisner bio->bi_private = b; 761*b411b363SPhilipp Reisner bio->bi_end_io = bm_async_io_complete; 762*b411b363SPhilipp Reisner 763*b411b363SPhilipp Reisner if (FAULT_ACTIVE(mdev, (rw & WRITE) ? DRBD_FAULT_MD_WR : DRBD_FAULT_MD_RD)) { 764*b411b363SPhilipp Reisner bio->bi_rw |= rw; 765*b411b363SPhilipp Reisner bio_endio(bio, -EIO); 766*b411b363SPhilipp Reisner } else { 767*b411b363SPhilipp Reisner submit_bio(rw, bio); 768*b411b363SPhilipp Reisner } 769*b411b363SPhilipp Reisner } 770*b411b363SPhilipp Reisner 771*b411b363SPhilipp Reisner # if defined(__LITTLE_ENDIAN) 772*b411b363SPhilipp Reisner /* nothing to do, on disk == in memory */ 773*b411b363SPhilipp Reisner # define bm_cpu_to_lel(x) ((void)0) 774*b411b363SPhilipp Reisner # else 775*b411b363SPhilipp Reisner void bm_cpu_to_lel(struct drbd_bitmap *b) 776*b411b363SPhilipp Reisner { 777*b411b363SPhilipp Reisner /* need to cpu_to_lel all the pages ... 778*b411b363SPhilipp Reisner * this may be optimized by using 779*b411b363SPhilipp Reisner * cpu_to_lel(-1) == -1 and cpu_to_lel(0) == 0; 780*b411b363SPhilipp Reisner * the following is still not optimal, but better than nothing */ 781*b411b363SPhilipp Reisner unsigned int i; 782*b411b363SPhilipp Reisner unsigned long *p_addr, *bm; 783*b411b363SPhilipp Reisner if (b->bm_set == 0) { 784*b411b363SPhilipp Reisner /* no page at all; avoid swap if all is 0 */ 785*b411b363SPhilipp Reisner i = b->bm_number_of_pages; 786*b411b363SPhilipp Reisner } else if (b->bm_set == b->bm_bits) { 787*b411b363SPhilipp Reisner /* only the last page */ 788*b411b363SPhilipp Reisner i = b->bm_number_of_pages - 1; 789*b411b363SPhilipp Reisner } else { 790*b411b363SPhilipp Reisner /* all pages */ 791*b411b363SPhilipp Reisner i = 0; 792*b411b363SPhilipp Reisner } 793*b411b363SPhilipp Reisner for (; i < b->bm_number_of_pages; i++) { 794*b411b363SPhilipp Reisner p_addr = kmap_atomic(b->bm_pages[i], KM_USER0); 795*b411b363SPhilipp Reisner for (bm = p_addr; bm < p_addr + PAGE_SIZE/sizeof(long); bm++) 796*b411b363SPhilipp Reisner *bm = cpu_to_lel(*bm); 797*b411b363SPhilipp Reisner kunmap_atomic(p_addr, KM_USER0); 798*b411b363SPhilipp Reisner } 799*b411b363SPhilipp Reisner } 800*b411b363SPhilipp Reisner # endif 801*b411b363SPhilipp Reisner /* lel_to_cpu == cpu_to_lel */ 802*b411b363SPhilipp Reisner # define bm_lel_to_cpu(x) bm_cpu_to_lel(x) 803*b411b363SPhilipp Reisner 804*b411b363SPhilipp Reisner /* 805*b411b363SPhilipp Reisner * bm_rw: read/write the whole bitmap from/to its on disk location. 806*b411b363SPhilipp Reisner */ 807*b411b363SPhilipp Reisner static int bm_rw(struct drbd_conf *mdev, int rw) __must_hold(local) 808*b411b363SPhilipp Reisner { 809*b411b363SPhilipp Reisner struct drbd_bitmap *b = mdev->bitmap; 810*b411b363SPhilipp Reisner /* sector_t sector; */ 811*b411b363SPhilipp Reisner int bm_words, num_pages, i; 812*b411b363SPhilipp Reisner unsigned long now; 813*b411b363SPhilipp Reisner char ppb[10]; 814*b411b363SPhilipp Reisner int err = 0; 815*b411b363SPhilipp Reisner 816*b411b363SPhilipp Reisner WARN_ON(!bm_is_locked(b)); 817*b411b363SPhilipp Reisner 818*b411b363SPhilipp Reisner /* no spinlock here, the drbd_bm_lock should be enough! */ 819*b411b363SPhilipp Reisner 820*b411b363SPhilipp Reisner bm_words = drbd_bm_words(mdev); 821*b411b363SPhilipp Reisner num_pages = (bm_words*sizeof(long) + PAGE_SIZE-1) >> PAGE_SHIFT; 822*b411b363SPhilipp Reisner 823*b411b363SPhilipp Reisner /* on disk bitmap is little endian */ 824*b411b363SPhilipp Reisner if (rw == WRITE) 825*b411b363SPhilipp Reisner bm_cpu_to_lel(b); 826*b411b363SPhilipp Reisner 827*b411b363SPhilipp Reisner now = jiffies; 828*b411b363SPhilipp Reisner atomic_set(&b->bm_async_io, num_pages); 829*b411b363SPhilipp Reisner __clear_bit(BM_MD_IO_ERROR, &b->bm_flags); 830*b411b363SPhilipp Reisner 831*b411b363SPhilipp Reisner /* let the layers below us try to merge these bios... */ 832*b411b363SPhilipp Reisner for (i = 0; i < num_pages; i++) 833*b411b363SPhilipp Reisner bm_page_io_async(mdev, b, i, rw); 834*b411b363SPhilipp Reisner 835*b411b363SPhilipp Reisner drbd_blk_run_queue(bdev_get_queue(mdev->ldev->md_bdev)); 836*b411b363SPhilipp Reisner wait_event(b->bm_io_wait, atomic_read(&b->bm_async_io) == 0); 837*b411b363SPhilipp Reisner 838*b411b363SPhilipp Reisner if (test_bit(BM_MD_IO_ERROR, &b->bm_flags)) { 839*b411b363SPhilipp Reisner dev_alert(DEV, "we had at least one MD IO ERROR during bitmap IO\n"); 840*b411b363SPhilipp Reisner drbd_chk_io_error(mdev, 1, TRUE); 841*b411b363SPhilipp Reisner err = -EIO; 842*b411b363SPhilipp Reisner } 843*b411b363SPhilipp Reisner 844*b411b363SPhilipp Reisner now = jiffies; 845*b411b363SPhilipp Reisner if (rw == WRITE) { 846*b411b363SPhilipp Reisner /* swap back endianness */ 847*b411b363SPhilipp Reisner bm_lel_to_cpu(b); 848*b411b363SPhilipp Reisner /* flush bitmap to stable storage */ 849*b411b363SPhilipp Reisner drbd_md_flush(mdev); 850*b411b363SPhilipp Reisner } else /* rw == READ */ { 851*b411b363SPhilipp Reisner /* just read, if necessary adjust endianness */ 852*b411b363SPhilipp Reisner b->bm_set = bm_count_bits_swap_endian(b); 853*b411b363SPhilipp Reisner dev_info(DEV, "recounting of set bits took additional %lu jiffies\n", 854*b411b363SPhilipp Reisner jiffies - now); 855*b411b363SPhilipp Reisner } 856*b411b363SPhilipp Reisner now = b->bm_set; 857*b411b363SPhilipp Reisner 858*b411b363SPhilipp Reisner dev_info(DEV, "%s (%lu bits) marked out-of-sync by on disk bit-map.\n", 859*b411b363SPhilipp Reisner ppsize(ppb, now << (BM_BLOCK_SHIFT-10)), now); 860*b411b363SPhilipp Reisner 861*b411b363SPhilipp Reisner return err; 862*b411b363SPhilipp Reisner } 863*b411b363SPhilipp Reisner 864*b411b363SPhilipp Reisner /** 865*b411b363SPhilipp Reisner * drbd_bm_read() - Read the whole bitmap from its on disk location. 866*b411b363SPhilipp Reisner * @mdev: DRBD device. 867*b411b363SPhilipp Reisner */ 868*b411b363SPhilipp Reisner int drbd_bm_read(struct drbd_conf *mdev) __must_hold(local) 869*b411b363SPhilipp Reisner { 870*b411b363SPhilipp Reisner return bm_rw(mdev, READ); 871*b411b363SPhilipp Reisner } 872*b411b363SPhilipp Reisner 873*b411b363SPhilipp Reisner /** 874*b411b363SPhilipp Reisner * drbd_bm_write() - Write the whole bitmap to its on disk location. 875*b411b363SPhilipp Reisner * @mdev: DRBD device. 876*b411b363SPhilipp Reisner */ 877*b411b363SPhilipp Reisner int drbd_bm_write(struct drbd_conf *mdev) __must_hold(local) 878*b411b363SPhilipp Reisner { 879*b411b363SPhilipp Reisner return bm_rw(mdev, WRITE); 880*b411b363SPhilipp Reisner } 881*b411b363SPhilipp Reisner 882*b411b363SPhilipp Reisner /** 883*b411b363SPhilipp Reisner * drbd_bm_write_sect: Writes a 512 (MD_SECTOR_SIZE) byte piece of the bitmap 884*b411b363SPhilipp Reisner * @mdev: DRBD device. 885*b411b363SPhilipp Reisner * @enr: Extent number in the resync lru (happens to be sector offset) 886*b411b363SPhilipp Reisner * 887*b411b363SPhilipp Reisner * The BM_EXT_SIZE is on purpose exactly the amount of the bitmap covered 888*b411b363SPhilipp Reisner * by a single sector write. Therefore enr == sector offset from the 889*b411b363SPhilipp Reisner * start of the bitmap. 890*b411b363SPhilipp Reisner */ 891*b411b363SPhilipp Reisner int drbd_bm_write_sect(struct drbd_conf *mdev, unsigned long enr) __must_hold(local) 892*b411b363SPhilipp Reisner { 893*b411b363SPhilipp Reisner sector_t on_disk_sector = enr + mdev->ldev->md.md_offset 894*b411b363SPhilipp Reisner + mdev->ldev->md.bm_offset; 895*b411b363SPhilipp Reisner int bm_words, num_words, offset; 896*b411b363SPhilipp Reisner int err = 0; 897*b411b363SPhilipp Reisner 898*b411b363SPhilipp Reisner mutex_lock(&mdev->md_io_mutex); 899*b411b363SPhilipp Reisner bm_words = drbd_bm_words(mdev); 900*b411b363SPhilipp Reisner offset = S2W(enr); /* word offset into bitmap */ 901*b411b363SPhilipp Reisner num_words = min(S2W(1), bm_words - offset); 902*b411b363SPhilipp Reisner if (num_words < S2W(1)) 903*b411b363SPhilipp Reisner memset(page_address(mdev->md_io_page), 0, MD_SECTOR_SIZE); 904*b411b363SPhilipp Reisner drbd_bm_get_lel(mdev, offset, num_words, 905*b411b363SPhilipp Reisner page_address(mdev->md_io_page)); 906*b411b363SPhilipp Reisner if (!drbd_md_sync_page_io(mdev, mdev->ldev, on_disk_sector, WRITE)) { 907*b411b363SPhilipp Reisner int i; 908*b411b363SPhilipp Reisner err = -EIO; 909*b411b363SPhilipp Reisner dev_err(DEV, "IO ERROR writing bitmap sector %lu " 910*b411b363SPhilipp Reisner "(meta-disk sector %llus)\n", 911*b411b363SPhilipp Reisner enr, (unsigned long long)on_disk_sector); 912*b411b363SPhilipp Reisner drbd_chk_io_error(mdev, 1, TRUE); 913*b411b363SPhilipp Reisner for (i = 0; i < AL_EXT_PER_BM_SECT; i++) 914*b411b363SPhilipp Reisner drbd_bm_ALe_set_all(mdev, enr*AL_EXT_PER_BM_SECT+i); 915*b411b363SPhilipp Reisner } 916*b411b363SPhilipp Reisner mdev->bm_writ_cnt++; 917*b411b363SPhilipp Reisner mutex_unlock(&mdev->md_io_mutex); 918*b411b363SPhilipp Reisner return err; 919*b411b363SPhilipp Reisner } 920*b411b363SPhilipp Reisner 921*b411b363SPhilipp Reisner /* NOTE 922*b411b363SPhilipp Reisner * find_first_bit returns int, we return unsigned long. 923*b411b363SPhilipp Reisner * should not make much difference anyways, but ... 924*b411b363SPhilipp Reisner * 925*b411b363SPhilipp Reisner * this returns a bit number, NOT a sector! 926*b411b363SPhilipp Reisner */ 927*b411b363SPhilipp Reisner #define BPP_MASK ((1UL << (PAGE_SHIFT+3)) - 1) 928*b411b363SPhilipp Reisner static unsigned long __bm_find_next(struct drbd_conf *mdev, unsigned long bm_fo, 929*b411b363SPhilipp Reisner const int find_zero_bit, const enum km_type km) 930*b411b363SPhilipp Reisner { 931*b411b363SPhilipp Reisner struct drbd_bitmap *b = mdev->bitmap; 932*b411b363SPhilipp Reisner unsigned long i = -1UL; 933*b411b363SPhilipp Reisner unsigned long *p_addr; 934*b411b363SPhilipp Reisner unsigned long bit_offset; /* bit offset of the mapped page. */ 935*b411b363SPhilipp Reisner 936*b411b363SPhilipp Reisner if (bm_fo > b->bm_bits) { 937*b411b363SPhilipp Reisner dev_err(DEV, "bm_fo=%lu bm_bits=%lu\n", bm_fo, b->bm_bits); 938*b411b363SPhilipp Reisner } else { 939*b411b363SPhilipp Reisner while (bm_fo < b->bm_bits) { 940*b411b363SPhilipp Reisner unsigned long offset; 941*b411b363SPhilipp Reisner bit_offset = bm_fo & ~BPP_MASK; /* bit offset of the page */ 942*b411b363SPhilipp Reisner offset = bit_offset >> LN2_BPL; /* word offset of the page */ 943*b411b363SPhilipp Reisner p_addr = __bm_map_paddr(b, offset, km); 944*b411b363SPhilipp Reisner 945*b411b363SPhilipp Reisner if (find_zero_bit) 946*b411b363SPhilipp Reisner i = find_next_zero_bit(p_addr, PAGE_SIZE*8, bm_fo & BPP_MASK); 947*b411b363SPhilipp Reisner else 948*b411b363SPhilipp Reisner i = find_next_bit(p_addr, PAGE_SIZE*8, bm_fo & BPP_MASK); 949*b411b363SPhilipp Reisner 950*b411b363SPhilipp Reisner __bm_unmap(p_addr, km); 951*b411b363SPhilipp Reisner if (i < PAGE_SIZE*8) { 952*b411b363SPhilipp Reisner i = bit_offset + i; 953*b411b363SPhilipp Reisner if (i >= b->bm_bits) 954*b411b363SPhilipp Reisner break; 955*b411b363SPhilipp Reisner goto found; 956*b411b363SPhilipp Reisner } 957*b411b363SPhilipp Reisner bm_fo = bit_offset + PAGE_SIZE*8; 958*b411b363SPhilipp Reisner } 959*b411b363SPhilipp Reisner i = -1UL; 960*b411b363SPhilipp Reisner } 961*b411b363SPhilipp Reisner found: 962*b411b363SPhilipp Reisner return i; 963*b411b363SPhilipp Reisner } 964*b411b363SPhilipp Reisner 965*b411b363SPhilipp Reisner static unsigned long bm_find_next(struct drbd_conf *mdev, 966*b411b363SPhilipp Reisner unsigned long bm_fo, const int find_zero_bit) 967*b411b363SPhilipp Reisner { 968*b411b363SPhilipp Reisner struct drbd_bitmap *b = mdev->bitmap; 969*b411b363SPhilipp Reisner unsigned long i = -1UL; 970*b411b363SPhilipp Reisner 971*b411b363SPhilipp Reisner ERR_IF(!b) return i; 972*b411b363SPhilipp Reisner ERR_IF(!b->bm_pages) return i; 973*b411b363SPhilipp Reisner 974*b411b363SPhilipp Reisner spin_lock_irq(&b->bm_lock); 975*b411b363SPhilipp Reisner if (bm_is_locked(b)) 976*b411b363SPhilipp Reisner bm_print_lock_info(mdev); 977*b411b363SPhilipp Reisner 978*b411b363SPhilipp Reisner i = __bm_find_next(mdev, bm_fo, find_zero_bit, KM_IRQ1); 979*b411b363SPhilipp Reisner 980*b411b363SPhilipp Reisner spin_unlock_irq(&b->bm_lock); 981*b411b363SPhilipp Reisner return i; 982*b411b363SPhilipp Reisner } 983*b411b363SPhilipp Reisner 984*b411b363SPhilipp Reisner unsigned long drbd_bm_find_next(struct drbd_conf *mdev, unsigned long bm_fo) 985*b411b363SPhilipp Reisner { 986*b411b363SPhilipp Reisner return bm_find_next(mdev, bm_fo, 0); 987*b411b363SPhilipp Reisner } 988*b411b363SPhilipp Reisner 989*b411b363SPhilipp Reisner #if 0 990*b411b363SPhilipp Reisner /* not yet needed for anything. */ 991*b411b363SPhilipp Reisner unsigned long drbd_bm_find_next_zero(struct drbd_conf *mdev, unsigned long bm_fo) 992*b411b363SPhilipp Reisner { 993*b411b363SPhilipp Reisner return bm_find_next(mdev, bm_fo, 1); 994*b411b363SPhilipp Reisner } 995*b411b363SPhilipp Reisner #endif 996*b411b363SPhilipp Reisner 997*b411b363SPhilipp Reisner /* does not spin_lock_irqsave. 998*b411b363SPhilipp Reisner * you must take drbd_bm_lock() first */ 999*b411b363SPhilipp Reisner unsigned long _drbd_bm_find_next(struct drbd_conf *mdev, unsigned long bm_fo) 1000*b411b363SPhilipp Reisner { 1001*b411b363SPhilipp Reisner /* WARN_ON(!bm_is_locked(mdev)); */ 1002*b411b363SPhilipp Reisner return __bm_find_next(mdev, bm_fo, 0, KM_USER1); 1003*b411b363SPhilipp Reisner } 1004*b411b363SPhilipp Reisner 1005*b411b363SPhilipp Reisner unsigned long _drbd_bm_find_next_zero(struct drbd_conf *mdev, unsigned long bm_fo) 1006*b411b363SPhilipp Reisner { 1007*b411b363SPhilipp Reisner /* WARN_ON(!bm_is_locked(mdev)); */ 1008*b411b363SPhilipp Reisner return __bm_find_next(mdev, bm_fo, 1, KM_USER1); 1009*b411b363SPhilipp Reisner } 1010*b411b363SPhilipp Reisner 1011*b411b363SPhilipp Reisner /* returns number of bits actually changed. 1012*b411b363SPhilipp Reisner * for val != 0, we change 0 -> 1, return code positive 1013*b411b363SPhilipp Reisner * for val == 0, we change 1 -> 0, return code negative 1014*b411b363SPhilipp Reisner * wants bitnr, not sector. 1015*b411b363SPhilipp Reisner * expected to be called for only a few bits (e - s about BITS_PER_LONG). 1016*b411b363SPhilipp Reisner * Must hold bitmap lock already. */ 1017*b411b363SPhilipp Reisner int __bm_change_bits_to(struct drbd_conf *mdev, const unsigned long s, 1018*b411b363SPhilipp Reisner unsigned long e, int val, const enum km_type km) 1019*b411b363SPhilipp Reisner { 1020*b411b363SPhilipp Reisner struct drbd_bitmap *b = mdev->bitmap; 1021*b411b363SPhilipp Reisner unsigned long *p_addr = NULL; 1022*b411b363SPhilipp Reisner unsigned long bitnr; 1023*b411b363SPhilipp Reisner unsigned long last_page_nr = -1UL; 1024*b411b363SPhilipp Reisner int c = 0; 1025*b411b363SPhilipp Reisner 1026*b411b363SPhilipp Reisner if (e >= b->bm_bits) { 1027*b411b363SPhilipp Reisner dev_err(DEV, "ASSERT FAILED: bit_s=%lu bit_e=%lu bm_bits=%lu\n", 1028*b411b363SPhilipp Reisner s, e, b->bm_bits); 1029*b411b363SPhilipp Reisner e = b->bm_bits ? b->bm_bits -1 : 0; 1030*b411b363SPhilipp Reisner } 1031*b411b363SPhilipp Reisner for (bitnr = s; bitnr <= e; bitnr++) { 1032*b411b363SPhilipp Reisner unsigned long offset = bitnr>>LN2_BPL; 1033*b411b363SPhilipp Reisner unsigned long page_nr = offset >> (PAGE_SHIFT - LN2_BPL + 3); 1034*b411b363SPhilipp Reisner if (page_nr != last_page_nr) { 1035*b411b363SPhilipp Reisner if (p_addr) 1036*b411b363SPhilipp Reisner __bm_unmap(p_addr, km); 1037*b411b363SPhilipp Reisner p_addr = __bm_map_paddr(b, offset, km); 1038*b411b363SPhilipp Reisner last_page_nr = page_nr; 1039*b411b363SPhilipp Reisner } 1040*b411b363SPhilipp Reisner if (val) 1041*b411b363SPhilipp Reisner c += (0 == __test_and_set_bit(bitnr & BPP_MASK, p_addr)); 1042*b411b363SPhilipp Reisner else 1043*b411b363SPhilipp Reisner c -= (0 != __test_and_clear_bit(bitnr & BPP_MASK, p_addr)); 1044*b411b363SPhilipp Reisner } 1045*b411b363SPhilipp Reisner if (p_addr) 1046*b411b363SPhilipp Reisner __bm_unmap(p_addr, km); 1047*b411b363SPhilipp Reisner b->bm_set += c; 1048*b411b363SPhilipp Reisner return c; 1049*b411b363SPhilipp Reisner } 1050*b411b363SPhilipp Reisner 1051*b411b363SPhilipp Reisner /* returns number of bits actually changed. 1052*b411b363SPhilipp Reisner * for val != 0, we change 0 -> 1, return code positive 1053*b411b363SPhilipp Reisner * for val == 0, we change 1 -> 0, return code negative 1054*b411b363SPhilipp Reisner * wants bitnr, not sector */ 1055*b411b363SPhilipp Reisner int bm_change_bits_to(struct drbd_conf *mdev, const unsigned long s, 1056*b411b363SPhilipp Reisner const unsigned long e, int val) 1057*b411b363SPhilipp Reisner { 1058*b411b363SPhilipp Reisner unsigned long flags; 1059*b411b363SPhilipp Reisner struct drbd_bitmap *b = mdev->bitmap; 1060*b411b363SPhilipp Reisner int c = 0; 1061*b411b363SPhilipp Reisner 1062*b411b363SPhilipp Reisner ERR_IF(!b) return 1; 1063*b411b363SPhilipp Reisner ERR_IF(!b->bm_pages) return 0; 1064*b411b363SPhilipp Reisner 1065*b411b363SPhilipp Reisner spin_lock_irqsave(&b->bm_lock, flags); 1066*b411b363SPhilipp Reisner if (bm_is_locked(b)) 1067*b411b363SPhilipp Reisner bm_print_lock_info(mdev); 1068*b411b363SPhilipp Reisner 1069*b411b363SPhilipp Reisner c = __bm_change_bits_to(mdev, s, e, val, KM_IRQ1); 1070*b411b363SPhilipp Reisner 1071*b411b363SPhilipp Reisner spin_unlock_irqrestore(&b->bm_lock, flags); 1072*b411b363SPhilipp Reisner return c; 1073*b411b363SPhilipp Reisner } 1074*b411b363SPhilipp Reisner 1075*b411b363SPhilipp Reisner /* returns number of bits changed 0 -> 1 */ 1076*b411b363SPhilipp Reisner int drbd_bm_set_bits(struct drbd_conf *mdev, const unsigned long s, const unsigned long e) 1077*b411b363SPhilipp Reisner { 1078*b411b363SPhilipp Reisner return bm_change_bits_to(mdev, s, e, 1); 1079*b411b363SPhilipp Reisner } 1080*b411b363SPhilipp Reisner 1081*b411b363SPhilipp Reisner /* returns number of bits changed 1 -> 0 */ 1082*b411b363SPhilipp Reisner int drbd_bm_clear_bits(struct drbd_conf *mdev, const unsigned long s, const unsigned long e) 1083*b411b363SPhilipp Reisner { 1084*b411b363SPhilipp Reisner return -bm_change_bits_to(mdev, s, e, 0); 1085*b411b363SPhilipp Reisner } 1086*b411b363SPhilipp Reisner 1087*b411b363SPhilipp Reisner /* sets all bits in full words, 1088*b411b363SPhilipp Reisner * from first_word up to, but not including, last_word */ 1089*b411b363SPhilipp Reisner static inline void bm_set_full_words_within_one_page(struct drbd_bitmap *b, 1090*b411b363SPhilipp Reisner int page_nr, int first_word, int last_word) 1091*b411b363SPhilipp Reisner { 1092*b411b363SPhilipp Reisner int i; 1093*b411b363SPhilipp Reisner int bits; 1094*b411b363SPhilipp Reisner unsigned long *paddr = kmap_atomic(b->bm_pages[page_nr], KM_USER0); 1095*b411b363SPhilipp Reisner for (i = first_word; i < last_word; i++) { 1096*b411b363SPhilipp Reisner bits = hweight_long(paddr[i]); 1097*b411b363SPhilipp Reisner paddr[i] = ~0UL; 1098*b411b363SPhilipp Reisner b->bm_set += BITS_PER_LONG - bits; 1099*b411b363SPhilipp Reisner } 1100*b411b363SPhilipp Reisner kunmap_atomic(paddr, KM_USER0); 1101*b411b363SPhilipp Reisner } 1102*b411b363SPhilipp Reisner 1103*b411b363SPhilipp Reisner /* Same thing as drbd_bm_set_bits, but without taking the spin_lock_irqsave. 1104*b411b363SPhilipp Reisner * You must first drbd_bm_lock(). 1105*b411b363SPhilipp Reisner * Can be called to set the whole bitmap in one go. 1106*b411b363SPhilipp Reisner * Sets bits from s to e _inclusive_. */ 1107*b411b363SPhilipp Reisner void _drbd_bm_set_bits(struct drbd_conf *mdev, const unsigned long s, const unsigned long e) 1108*b411b363SPhilipp Reisner { 1109*b411b363SPhilipp Reisner /* First set_bit from the first bit (s) 1110*b411b363SPhilipp Reisner * up to the next long boundary (sl), 1111*b411b363SPhilipp Reisner * then assign full words up to the last long boundary (el), 1112*b411b363SPhilipp Reisner * then set_bit up to and including the last bit (e). 1113*b411b363SPhilipp Reisner * 1114*b411b363SPhilipp Reisner * Do not use memset, because we must account for changes, 1115*b411b363SPhilipp Reisner * so we need to loop over the words with hweight() anyways. 1116*b411b363SPhilipp Reisner */ 1117*b411b363SPhilipp Reisner unsigned long sl = ALIGN(s,BITS_PER_LONG); 1118*b411b363SPhilipp Reisner unsigned long el = (e+1) & ~((unsigned long)BITS_PER_LONG-1); 1119*b411b363SPhilipp Reisner int first_page; 1120*b411b363SPhilipp Reisner int last_page; 1121*b411b363SPhilipp Reisner int page_nr; 1122*b411b363SPhilipp Reisner int first_word; 1123*b411b363SPhilipp Reisner int last_word; 1124*b411b363SPhilipp Reisner 1125*b411b363SPhilipp Reisner if (e - s <= 3*BITS_PER_LONG) { 1126*b411b363SPhilipp Reisner /* don't bother; el and sl may even be wrong. */ 1127*b411b363SPhilipp Reisner __bm_change_bits_to(mdev, s, e, 1, KM_USER0); 1128*b411b363SPhilipp Reisner return; 1129*b411b363SPhilipp Reisner } 1130*b411b363SPhilipp Reisner 1131*b411b363SPhilipp Reisner /* difference is large enough that we can trust sl and el */ 1132*b411b363SPhilipp Reisner 1133*b411b363SPhilipp Reisner /* bits filling the current long */ 1134*b411b363SPhilipp Reisner if (sl) 1135*b411b363SPhilipp Reisner __bm_change_bits_to(mdev, s, sl-1, 1, KM_USER0); 1136*b411b363SPhilipp Reisner 1137*b411b363SPhilipp Reisner first_page = sl >> (3 + PAGE_SHIFT); 1138*b411b363SPhilipp Reisner last_page = el >> (3 + PAGE_SHIFT); 1139*b411b363SPhilipp Reisner 1140*b411b363SPhilipp Reisner /* MLPP: modulo longs per page */ 1141*b411b363SPhilipp Reisner /* LWPP: long words per page */ 1142*b411b363SPhilipp Reisner first_word = MLPP(sl >> LN2_BPL); 1143*b411b363SPhilipp Reisner last_word = LWPP; 1144*b411b363SPhilipp Reisner 1145*b411b363SPhilipp Reisner /* first and full pages, unless first page == last page */ 1146*b411b363SPhilipp Reisner for (page_nr = first_page; page_nr < last_page; page_nr++) { 1147*b411b363SPhilipp Reisner bm_set_full_words_within_one_page(mdev->bitmap, page_nr, first_word, last_word); 1148*b411b363SPhilipp Reisner cond_resched(); 1149*b411b363SPhilipp Reisner first_word = 0; 1150*b411b363SPhilipp Reisner } 1151*b411b363SPhilipp Reisner 1152*b411b363SPhilipp Reisner /* last page (respectively only page, for first page == last page) */ 1153*b411b363SPhilipp Reisner last_word = MLPP(el >> LN2_BPL); 1154*b411b363SPhilipp Reisner bm_set_full_words_within_one_page(mdev->bitmap, last_page, first_word, last_word); 1155*b411b363SPhilipp Reisner 1156*b411b363SPhilipp Reisner /* possibly trailing bits. 1157*b411b363SPhilipp Reisner * example: (e & 63) == 63, el will be e+1. 1158*b411b363SPhilipp Reisner * if that even was the very last bit, 1159*b411b363SPhilipp Reisner * it would trigger an assert in __bm_change_bits_to() 1160*b411b363SPhilipp Reisner */ 1161*b411b363SPhilipp Reisner if (el <= e) 1162*b411b363SPhilipp Reisner __bm_change_bits_to(mdev, el, e, 1, KM_USER0); 1163*b411b363SPhilipp Reisner } 1164*b411b363SPhilipp Reisner 1165*b411b363SPhilipp Reisner /* returns bit state 1166*b411b363SPhilipp Reisner * wants bitnr, NOT sector. 1167*b411b363SPhilipp Reisner * inherently racy... area needs to be locked by means of {al,rs}_lru 1168*b411b363SPhilipp Reisner * 1 ... bit set 1169*b411b363SPhilipp Reisner * 0 ... bit not set 1170*b411b363SPhilipp Reisner * -1 ... first out of bounds access, stop testing for bits! 1171*b411b363SPhilipp Reisner */ 1172*b411b363SPhilipp Reisner int drbd_bm_test_bit(struct drbd_conf *mdev, const unsigned long bitnr) 1173*b411b363SPhilipp Reisner { 1174*b411b363SPhilipp Reisner unsigned long flags; 1175*b411b363SPhilipp Reisner struct drbd_bitmap *b = mdev->bitmap; 1176*b411b363SPhilipp Reisner unsigned long *p_addr; 1177*b411b363SPhilipp Reisner int i; 1178*b411b363SPhilipp Reisner 1179*b411b363SPhilipp Reisner ERR_IF(!b) return 0; 1180*b411b363SPhilipp Reisner ERR_IF(!b->bm_pages) return 0; 1181*b411b363SPhilipp Reisner 1182*b411b363SPhilipp Reisner spin_lock_irqsave(&b->bm_lock, flags); 1183*b411b363SPhilipp Reisner if (bm_is_locked(b)) 1184*b411b363SPhilipp Reisner bm_print_lock_info(mdev); 1185*b411b363SPhilipp Reisner if (bitnr < b->bm_bits) { 1186*b411b363SPhilipp Reisner unsigned long offset = bitnr>>LN2_BPL; 1187*b411b363SPhilipp Reisner p_addr = bm_map_paddr(b, offset); 1188*b411b363SPhilipp Reisner i = test_bit(bitnr & BPP_MASK, p_addr) ? 1 : 0; 1189*b411b363SPhilipp Reisner bm_unmap(p_addr); 1190*b411b363SPhilipp Reisner } else if (bitnr == b->bm_bits) { 1191*b411b363SPhilipp Reisner i = -1; 1192*b411b363SPhilipp Reisner } else { /* (bitnr > b->bm_bits) */ 1193*b411b363SPhilipp Reisner dev_err(DEV, "bitnr=%lu > bm_bits=%lu\n", bitnr, b->bm_bits); 1194*b411b363SPhilipp Reisner i = 0; 1195*b411b363SPhilipp Reisner } 1196*b411b363SPhilipp Reisner 1197*b411b363SPhilipp Reisner spin_unlock_irqrestore(&b->bm_lock, flags); 1198*b411b363SPhilipp Reisner return i; 1199*b411b363SPhilipp Reisner } 1200*b411b363SPhilipp Reisner 1201*b411b363SPhilipp Reisner /* returns number of bits set in the range [s, e] */ 1202*b411b363SPhilipp Reisner int drbd_bm_count_bits(struct drbd_conf *mdev, const unsigned long s, const unsigned long e) 1203*b411b363SPhilipp Reisner { 1204*b411b363SPhilipp Reisner unsigned long flags; 1205*b411b363SPhilipp Reisner struct drbd_bitmap *b = mdev->bitmap; 1206*b411b363SPhilipp Reisner unsigned long *p_addr = NULL, page_nr = -1; 1207*b411b363SPhilipp Reisner unsigned long bitnr; 1208*b411b363SPhilipp Reisner int c = 0; 1209*b411b363SPhilipp Reisner size_t w; 1210*b411b363SPhilipp Reisner 1211*b411b363SPhilipp Reisner /* If this is called without a bitmap, that is a bug. But just to be 1212*b411b363SPhilipp Reisner * robust in case we screwed up elsewhere, in that case pretend there 1213*b411b363SPhilipp Reisner * was one dirty bit in the requested area, so we won't try to do a 1214*b411b363SPhilipp Reisner * local read there (no bitmap probably implies no disk) */ 1215*b411b363SPhilipp Reisner ERR_IF(!b) return 1; 1216*b411b363SPhilipp Reisner ERR_IF(!b->bm_pages) return 1; 1217*b411b363SPhilipp Reisner 1218*b411b363SPhilipp Reisner spin_lock_irqsave(&b->bm_lock, flags); 1219*b411b363SPhilipp Reisner if (bm_is_locked(b)) 1220*b411b363SPhilipp Reisner bm_print_lock_info(mdev); 1221*b411b363SPhilipp Reisner for (bitnr = s; bitnr <= e; bitnr++) { 1222*b411b363SPhilipp Reisner w = bitnr >> LN2_BPL; 1223*b411b363SPhilipp Reisner if (page_nr != w >> (PAGE_SHIFT - LN2_BPL + 3)) { 1224*b411b363SPhilipp Reisner page_nr = w >> (PAGE_SHIFT - LN2_BPL + 3); 1225*b411b363SPhilipp Reisner if (p_addr) 1226*b411b363SPhilipp Reisner bm_unmap(p_addr); 1227*b411b363SPhilipp Reisner p_addr = bm_map_paddr(b, w); 1228*b411b363SPhilipp Reisner } 1229*b411b363SPhilipp Reisner ERR_IF (bitnr >= b->bm_bits) { 1230*b411b363SPhilipp Reisner dev_err(DEV, "bitnr=%lu bm_bits=%lu\n", bitnr, b->bm_bits); 1231*b411b363SPhilipp Reisner } else { 1232*b411b363SPhilipp Reisner c += (0 != test_bit(bitnr - (page_nr << (PAGE_SHIFT+3)), p_addr)); 1233*b411b363SPhilipp Reisner } 1234*b411b363SPhilipp Reisner } 1235*b411b363SPhilipp Reisner if (p_addr) 1236*b411b363SPhilipp Reisner bm_unmap(p_addr); 1237*b411b363SPhilipp Reisner spin_unlock_irqrestore(&b->bm_lock, flags); 1238*b411b363SPhilipp Reisner return c; 1239*b411b363SPhilipp Reisner } 1240*b411b363SPhilipp Reisner 1241*b411b363SPhilipp Reisner 1242*b411b363SPhilipp Reisner /* inherently racy... 1243*b411b363SPhilipp Reisner * return value may be already out-of-date when this function returns. 1244*b411b363SPhilipp Reisner * but the general usage is that this is only use during a cstate when bits are 1245*b411b363SPhilipp Reisner * only cleared, not set, and typically only care for the case when the return 1246*b411b363SPhilipp Reisner * value is zero, or we already "locked" this "bitmap extent" by other means. 1247*b411b363SPhilipp Reisner * 1248*b411b363SPhilipp Reisner * enr is bm-extent number, since we chose to name one sector (512 bytes) 1249*b411b363SPhilipp Reisner * worth of the bitmap a "bitmap extent". 1250*b411b363SPhilipp Reisner * 1251*b411b363SPhilipp Reisner * TODO 1252*b411b363SPhilipp Reisner * I think since we use it like a reference count, we should use the real 1253*b411b363SPhilipp Reisner * reference count of some bitmap extent element from some lru instead... 1254*b411b363SPhilipp Reisner * 1255*b411b363SPhilipp Reisner */ 1256*b411b363SPhilipp Reisner int drbd_bm_e_weight(struct drbd_conf *mdev, unsigned long enr) 1257*b411b363SPhilipp Reisner { 1258*b411b363SPhilipp Reisner struct drbd_bitmap *b = mdev->bitmap; 1259*b411b363SPhilipp Reisner int count, s, e; 1260*b411b363SPhilipp Reisner unsigned long flags; 1261*b411b363SPhilipp Reisner unsigned long *p_addr, *bm; 1262*b411b363SPhilipp Reisner 1263*b411b363SPhilipp Reisner ERR_IF(!b) return 0; 1264*b411b363SPhilipp Reisner ERR_IF(!b->bm_pages) return 0; 1265*b411b363SPhilipp Reisner 1266*b411b363SPhilipp Reisner spin_lock_irqsave(&b->bm_lock, flags); 1267*b411b363SPhilipp Reisner if (bm_is_locked(b)) 1268*b411b363SPhilipp Reisner bm_print_lock_info(mdev); 1269*b411b363SPhilipp Reisner 1270*b411b363SPhilipp Reisner s = S2W(enr); 1271*b411b363SPhilipp Reisner e = min((size_t)S2W(enr+1), b->bm_words); 1272*b411b363SPhilipp Reisner count = 0; 1273*b411b363SPhilipp Reisner if (s < b->bm_words) { 1274*b411b363SPhilipp Reisner int n = e-s; 1275*b411b363SPhilipp Reisner p_addr = bm_map_paddr(b, s); 1276*b411b363SPhilipp Reisner bm = p_addr + MLPP(s); 1277*b411b363SPhilipp Reisner while (n--) 1278*b411b363SPhilipp Reisner count += hweight_long(*bm++); 1279*b411b363SPhilipp Reisner bm_unmap(p_addr); 1280*b411b363SPhilipp Reisner } else { 1281*b411b363SPhilipp Reisner dev_err(DEV, "start offset (%d) too large in drbd_bm_e_weight\n", s); 1282*b411b363SPhilipp Reisner } 1283*b411b363SPhilipp Reisner spin_unlock_irqrestore(&b->bm_lock, flags); 1284*b411b363SPhilipp Reisner return count; 1285*b411b363SPhilipp Reisner } 1286*b411b363SPhilipp Reisner 1287*b411b363SPhilipp Reisner /* set all bits covered by the AL-extent al_enr */ 1288*b411b363SPhilipp Reisner unsigned long drbd_bm_ALe_set_all(struct drbd_conf *mdev, unsigned long al_enr) 1289*b411b363SPhilipp Reisner { 1290*b411b363SPhilipp Reisner struct drbd_bitmap *b = mdev->bitmap; 1291*b411b363SPhilipp Reisner unsigned long *p_addr, *bm; 1292*b411b363SPhilipp Reisner unsigned long weight; 1293*b411b363SPhilipp Reisner int count, s, e, i, do_now; 1294*b411b363SPhilipp Reisner ERR_IF(!b) return 0; 1295*b411b363SPhilipp Reisner ERR_IF(!b->bm_pages) return 0; 1296*b411b363SPhilipp Reisner 1297*b411b363SPhilipp Reisner spin_lock_irq(&b->bm_lock); 1298*b411b363SPhilipp Reisner if (bm_is_locked(b)) 1299*b411b363SPhilipp Reisner bm_print_lock_info(mdev); 1300*b411b363SPhilipp Reisner weight = b->bm_set; 1301*b411b363SPhilipp Reisner 1302*b411b363SPhilipp Reisner s = al_enr * BM_WORDS_PER_AL_EXT; 1303*b411b363SPhilipp Reisner e = min_t(size_t, s + BM_WORDS_PER_AL_EXT, b->bm_words); 1304*b411b363SPhilipp Reisner /* assert that s and e are on the same page */ 1305*b411b363SPhilipp Reisner D_ASSERT((e-1) >> (PAGE_SHIFT - LN2_BPL + 3) 1306*b411b363SPhilipp Reisner == s >> (PAGE_SHIFT - LN2_BPL + 3)); 1307*b411b363SPhilipp Reisner count = 0; 1308*b411b363SPhilipp Reisner if (s < b->bm_words) { 1309*b411b363SPhilipp Reisner i = do_now = e-s; 1310*b411b363SPhilipp Reisner p_addr = bm_map_paddr(b, s); 1311*b411b363SPhilipp Reisner bm = p_addr + MLPP(s); 1312*b411b363SPhilipp Reisner while (i--) { 1313*b411b363SPhilipp Reisner count += hweight_long(*bm); 1314*b411b363SPhilipp Reisner *bm = -1UL; 1315*b411b363SPhilipp Reisner bm++; 1316*b411b363SPhilipp Reisner } 1317*b411b363SPhilipp Reisner bm_unmap(p_addr); 1318*b411b363SPhilipp Reisner b->bm_set += do_now*BITS_PER_LONG - count; 1319*b411b363SPhilipp Reisner if (e == b->bm_words) 1320*b411b363SPhilipp Reisner b->bm_set -= bm_clear_surplus(b); 1321*b411b363SPhilipp Reisner } else { 1322*b411b363SPhilipp Reisner dev_err(DEV, "start offset (%d) too large in drbd_bm_ALe_set_all\n", s); 1323*b411b363SPhilipp Reisner } 1324*b411b363SPhilipp Reisner weight = b->bm_set - weight; 1325*b411b363SPhilipp Reisner spin_unlock_irq(&b->bm_lock); 1326*b411b363SPhilipp Reisner return weight; 1327*b411b363SPhilipp Reisner } 1328