1b411b363SPhilipp Reisner /* 2b411b363SPhilipp Reisner drbd_bitmap.c 3b411b363SPhilipp Reisner 4b411b363SPhilipp Reisner This file is part of DRBD by Philipp Reisner and Lars Ellenberg. 5b411b363SPhilipp Reisner 6b411b363SPhilipp Reisner Copyright (C) 2004-2008, LINBIT Information Technologies GmbH. 7b411b363SPhilipp Reisner Copyright (C) 2004-2008, Philipp Reisner <philipp.reisner@linbit.com>. 8b411b363SPhilipp Reisner Copyright (C) 2004-2008, Lars Ellenberg <lars.ellenberg@linbit.com>. 9b411b363SPhilipp Reisner 10b411b363SPhilipp Reisner drbd is free software; you can redistribute it and/or modify 11b411b363SPhilipp Reisner it under the terms of the GNU General Public License as published by 12b411b363SPhilipp Reisner the Free Software Foundation; either version 2, or (at your option) 13b411b363SPhilipp Reisner any later version. 14b411b363SPhilipp Reisner 15b411b363SPhilipp Reisner drbd is distributed in the hope that it will be useful, 16b411b363SPhilipp Reisner but WITHOUT ANY WARRANTY; without even the implied warranty of 17b411b363SPhilipp Reisner MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 18b411b363SPhilipp Reisner GNU General Public License for more details. 19b411b363SPhilipp Reisner 20b411b363SPhilipp Reisner You should have received a copy of the GNU General Public License 21b411b363SPhilipp Reisner along with drbd; see the file COPYING. If not, write to 22b411b363SPhilipp Reisner the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. 23b411b363SPhilipp Reisner */ 24b411b363SPhilipp Reisner 25b411b363SPhilipp Reisner #include <linux/bitops.h> 26b411b363SPhilipp Reisner #include <linux/vmalloc.h> 27b411b363SPhilipp Reisner #include <linux/string.h> 28b411b363SPhilipp Reisner #include <linux/drbd.h> 295a0e3ad6STejun Heo #include <linux/slab.h> 30b411b363SPhilipp Reisner #include <asm/kmap_types.h> 31b411b363SPhilipp Reisner #include "drbd_int.h" 32b411b363SPhilipp Reisner 3395a0f10cSLars Ellenberg 34b411b363SPhilipp Reisner /* OPAQUE outside this file! 35b411b363SPhilipp Reisner * interface defined in drbd_int.h 36b411b363SPhilipp Reisner 37b411b363SPhilipp Reisner * convention: 38b411b363SPhilipp Reisner * function name drbd_bm_... => used elsewhere, "public". 39b411b363SPhilipp Reisner * function name bm_... => internal to implementation, "private". 40b411b363SPhilipp Reisner 41b411b363SPhilipp Reisner * Note that since find_first_bit returns int, at the current granularity of 42b411b363SPhilipp Reisner * the bitmap (4KB per byte), this implementation "only" supports up to 43b411b363SPhilipp Reisner * 1<<(32+12) == 16 TB... 44b411b363SPhilipp Reisner */ 45b411b363SPhilipp Reisner 46b411b363SPhilipp Reisner /* 47b411b363SPhilipp Reisner * NOTE 48b411b363SPhilipp Reisner * Access to the *bm_pages is protected by bm_lock. 49b411b363SPhilipp Reisner * It is safe to read the other members within the lock. 50b411b363SPhilipp Reisner * 51b411b363SPhilipp Reisner * drbd_bm_set_bits is called from bio_endio callbacks, 52b411b363SPhilipp Reisner * We may be called with irq already disabled, 53b411b363SPhilipp Reisner * so we need spin_lock_irqsave(). 54b411b363SPhilipp Reisner * And we need the kmap_atomic. 55b411b363SPhilipp Reisner */ 56b411b363SPhilipp Reisner struct drbd_bitmap { 57b411b363SPhilipp Reisner struct page **bm_pages; 58b411b363SPhilipp Reisner spinlock_t bm_lock; 59b411b363SPhilipp Reisner /* WARNING unsigned long bm_*: 60b411b363SPhilipp Reisner * 32bit number of bit offset is just enough for 512 MB bitmap. 61b411b363SPhilipp Reisner * it will blow up if we make the bitmap bigger... 62b411b363SPhilipp Reisner * not that it makes much sense to have a bitmap that large, 63b411b363SPhilipp Reisner * rather change the granularity to 16k or 64k or something. 64b411b363SPhilipp Reisner * (that implies other problems, however...) 65b411b363SPhilipp Reisner */ 66b411b363SPhilipp Reisner unsigned long bm_set; /* nr of set bits; THINK maybe atomic_t? */ 67b411b363SPhilipp Reisner unsigned long bm_bits; 68b411b363SPhilipp Reisner size_t bm_words; 69b411b363SPhilipp Reisner size_t bm_number_of_pages; 70b411b363SPhilipp Reisner sector_t bm_dev_capacity; 718a03ae2aSThomas Gleixner struct mutex bm_change; /* serializes resize operations */ 72b411b363SPhilipp Reisner 73*19f843aaSLars Ellenberg wait_queue_head_t bm_io_wait; /* used to serialize IO of single pages */ 74b411b363SPhilipp Reisner 75b411b363SPhilipp Reisner unsigned long bm_flags; 76b411b363SPhilipp Reisner 77b411b363SPhilipp Reisner /* debugging aid, in case we are still racy somewhere */ 78b411b363SPhilipp Reisner char *bm_why; 79b411b363SPhilipp Reisner struct task_struct *bm_task; 80b411b363SPhilipp Reisner }; 81b411b363SPhilipp Reisner 82b411b363SPhilipp Reisner /* definition of bits in bm_flags */ 83b411b363SPhilipp Reisner #define BM_LOCKED 0 84*19f843aaSLars Ellenberg // #define BM_MD_IO_ERROR 1 unused now. 85b411b363SPhilipp Reisner #define BM_P_VMALLOCED 2 86b411b363SPhilipp Reisner 87b4ee79daSPhilipp Reisner static int __bm_change_bits_to(struct drbd_conf *mdev, const unsigned long s, 88fd76438cSPhilipp Reisner unsigned long e, int val, const enum km_type km); 89fd76438cSPhilipp Reisner 90b411b363SPhilipp Reisner static int bm_is_locked(struct drbd_bitmap *b) 91b411b363SPhilipp Reisner { 92b411b363SPhilipp Reisner return test_bit(BM_LOCKED, &b->bm_flags); 93b411b363SPhilipp Reisner } 94b411b363SPhilipp Reisner 95b411b363SPhilipp Reisner #define bm_print_lock_info(m) __bm_print_lock_info(m, __func__) 96b411b363SPhilipp Reisner static void __bm_print_lock_info(struct drbd_conf *mdev, const char *func) 97b411b363SPhilipp Reisner { 98b411b363SPhilipp Reisner struct drbd_bitmap *b = mdev->bitmap; 99b411b363SPhilipp Reisner if (!__ratelimit(&drbd_ratelimit_state)) 100b411b363SPhilipp Reisner return; 101b411b363SPhilipp Reisner dev_err(DEV, "FIXME %s in %s, bitmap locked for '%s' by %s\n", 102b411b363SPhilipp Reisner current == mdev->receiver.task ? "receiver" : 103b411b363SPhilipp Reisner current == mdev->asender.task ? "asender" : 104b411b363SPhilipp Reisner current == mdev->worker.task ? "worker" : current->comm, 105b411b363SPhilipp Reisner func, b->bm_why ?: "?", 106b411b363SPhilipp Reisner b->bm_task == mdev->receiver.task ? "receiver" : 107b411b363SPhilipp Reisner b->bm_task == mdev->asender.task ? "asender" : 108b411b363SPhilipp Reisner b->bm_task == mdev->worker.task ? "worker" : "?"); 109b411b363SPhilipp Reisner } 110b411b363SPhilipp Reisner 111b411b363SPhilipp Reisner void drbd_bm_lock(struct drbd_conf *mdev, char *why) 112b411b363SPhilipp Reisner { 113b411b363SPhilipp Reisner struct drbd_bitmap *b = mdev->bitmap; 114b411b363SPhilipp Reisner int trylock_failed; 115b411b363SPhilipp Reisner 116b411b363SPhilipp Reisner if (!b) { 117b411b363SPhilipp Reisner dev_err(DEV, "FIXME no bitmap in drbd_bm_lock!?\n"); 118b411b363SPhilipp Reisner return; 119b411b363SPhilipp Reisner } 120b411b363SPhilipp Reisner 1218a03ae2aSThomas Gleixner trylock_failed = !mutex_trylock(&b->bm_change); 122b411b363SPhilipp Reisner 123b411b363SPhilipp Reisner if (trylock_failed) { 124b411b363SPhilipp Reisner dev_warn(DEV, "%s going to '%s' but bitmap already locked for '%s' by %s\n", 125b411b363SPhilipp Reisner current == mdev->receiver.task ? "receiver" : 126b411b363SPhilipp Reisner current == mdev->asender.task ? "asender" : 127b411b363SPhilipp Reisner current == mdev->worker.task ? "worker" : current->comm, 128b411b363SPhilipp Reisner why, b->bm_why ?: "?", 129b411b363SPhilipp Reisner b->bm_task == mdev->receiver.task ? "receiver" : 130b411b363SPhilipp Reisner b->bm_task == mdev->asender.task ? "asender" : 131b411b363SPhilipp Reisner b->bm_task == mdev->worker.task ? "worker" : "?"); 1328a03ae2aSThomas Gleixner mutex_lock(&b->bm_change); 133b411b363SPhilipp Reisner } 134b411b363SPhilipp Reisner if (__test_and_set_bit(BM_LOCKED, &b->bm_flags)) 135b411b363SPhilipp Reisner dev_err(DEV, "FIXME bitmap already locked in bm_lock\n"); 136b411b363SPhilipp Reisner 137b411b363SPhilipp Reisner b->bm_why = why; 138b411b363SPhilipp Reisner b->bm_task = current; 139b411b363SPhilipp Reisner } 140b411b363SPhilipp Reisner 141b411b363SPhilipp Reisner void drbd_bm_unlock(struct drbd_conf *mdev) 142b411b363SPhilipp Reisner { 143b411b363SPhilipp Reisner struct drbd_bitmap *b = mdev->bitmap; 144b411b363SPhilipp Reisner if (!b) { 145b411b363SPhilipp Reisner dev_err(DEV, "FIXME no bitmap in drbd_bm_unlock!?\n"); 146b411b363SPhilipp Reisner return; 147b411b363SPhilipp Reisner } 148b411b363SPhilipp Reisner 149b411b363SPhilipp Reisner if (!__test_and_clear_bit(BM_LOCKED, &mdev->bitmap->bm_flags)) 150b411b363SPhilipp Reisner dev_err(DEV, "FIXME bitmap not locked in bm_unlock\n"); 151b411b363SPhilipp Reisner 152b411b363SPhilipp Reisner b->bm_why = NULL; 153b411b363SPhilipp Reisner b->bm_task = NULL; 1548a03ae2aSThomas Gleixner mutex_unlock(&b->bm_change); 155b411b363SPhilipp Reisner } 156b411b363SPhilipp Reisner 157*19f843aaSLars Ellenberg /* we store some "meta" info about our pages in page->private */ 158*19f843aaSLars Ellenberg /* at a granularity of 4k storage per bitmap bit: 159*19f843aaSLars Ellenberg * one peta byte storage: 1<<50 byte, 1<<38 * 4k storage blocks 160*19f843aaSLars Ellenberg * 1<<38 bits, 161*19f843aaSLars Ellenberg * 1<<23 4k bitmap pages. 162*19f843aaSLars Ellenberg * Use 24 bits as page index, covers 2 peta byte storage 163*19f843aaSLars Ellenberg * at a granularity of 4k per bit. 164*19f843aaSLars Ellenberg * Used to report the failed page idx on io error from the endio handlers. 165*19f843aaSLars Ellenberg */ 166*19f843aaSLars Ellenberg #define BM_PAGE_IDX_MASK ((1UL<<24)-1) 167*19f843aaSLars Ellenberg /* this page is currently read in, or written back */ 168*19f843aaSLars Ellenberg #define BM_PAGE_IO_LOCK 31 169*19f843aaSLars Ellenberg /* if there has been an IO error for this page */ 170*19f843aaSLars Ellenberg #define BM_PAGE_IO_ERROR 30 171*19f843aaSLars Ellenberg /* this is to be able to intelligently skip disk IO, 172*19f843aaSLars Ellenberg * set if bits have been set since last IO. */ 173*19f843aaSLars Ellenberg #define BM_PAGE_NEED_WRITEOUT 29 174*19f843aaSLars Ellenberg /* to mark for lazy writeout once syncer cleared all clearable bits, 175*19f843aaSLars Ellenberg * we if bits have been cleared since last IO. */ 176*19f843aaSLars Ellenberg #define BM_PAGE_LAZY_WRITEOUT 28 177*19f843aaSLars Ellenberg 178*19f843aaSLars Ellenberg /* store_page_idx uses non-atomic assingment. It is only used directly after 179*19f843aaSLars Ellenberg * allocating the page. All other bm_set_page_* and bm_clear_page_* need to 180*19f843aaSLars Ellenberg * use atomic bit manipulation, as set_out_of_sync (and therefore bitmap 181*19f843aaSLars Ellenberg * changes) may happen from various contexts, and wait_on_bit/wake_up_bit 182*19f843aaSLars Ellenberg * requires it all to be atomic as well. */ 183*19f843aaSLars Ellenberg static void bm_store_page_idx(struct page *page, unsigned long idx) 184*19f843aaSLars Ellenberg { 185*19f843aaSLars Ellenberg BUG_ON(0 != (idx & ~BM_PAGE_IDX_MASK)); 186*19f843aaSLars Ellenberg page_private(page) |= idx; 187*19f843aaSLars Ellenberg } 188*19f843aaSLars Ellenberg 189*19f843aaSLars Ellenberg static unsigned long bm_page_to_idx(struct page *page) 190*19f843aaSLars Ellenberg { 191*19f843aaSLars Ellenberg return page_private(page) & BM_PAGE_IDX_MASK; 192*19f843aaSLars Ellenberg } 193*19f843aaSLars Ellenberg 194*19f843aaSLars Ellenberg /* As is very unlikely that the same page is under IO from more than one 195*19f843aaSLars Ellenberg * context, we can get away with a bit per page and one wait queue per bitmap. 196*19f843aaSLars Ellenberg */ 197*19f843aaSLars Ellenberg static void bm_page_lock_io(struct drbd_conf *mdev, int page_nr) 198*19f843aaSLars Ellenberg { 199*19f843aaSLars Ellenberg struct drbd_bitmap *b = mdev->bitmap; 200*19f843aaSLars Ellenberg void *addr = &page_private(b->bm_pages[page_nr]); 201*19f843aaSLars Ellenberg wait_event(b->bm_io_wait, !test_and_set_bit(BM_PAGE_IO_LOCK, addr)); 202*19f843aaSLars Ellenberg } 203*19f843aaSLars Ellenberg 204*19f843aaSLars Ellenberg static void bm_page_unlock_io(struct drbd_conf *mdev, int page_nr) 205*19f843aaSLars Ellenberg { 206*19f843aaSLars Ellenberg struct drbd_bitmap *b = mdev->bitmap; 207*19f843aaSLars Ellenberg void *addr = &page_private(b->bm_pages[page_nr]); 208*19f843aaSLars Ellenberg clear_bit(BM_PAGE_IO_LOCK, addr); 209*19f843aaSLars Ellenberg smp_mb__after_clear_bit(); 210*19f843aaSLars Ellenberg wake_up(&mdev->bitmap->bm_io_wait); 211*19f843aaSLars Ellenberg } 212*19f843aaSLars Ellenberg 213*19f843aaSLars Ellenberg /* set _before_ submit_io, so it may be reset due to being changed 214*19f843aaSLars Ellenberg * while this page is in flight... will get submitted later again */ 215*19f843aaSLars Ellenberg static void bm_set_page_unchanged(struct page *page) 216*19f843aaSLars Ellenberg { 217*19f843aaSLars Ellenberg /* use cmpxchg? */ 218*19f843aaSLars Ellenberg clear_bit(BM_PAGE_NEED_WRITEOUT, &page_private(page)); 219*19f843aaSLars Ellenberg clear_bit(BM_PAGE_LAZY_WRITEOUT, &page_private(page)); 220*19f843aaSLars Ellenberg } 221*19f843aaSLars Ellenberg 222*19f843aaSLars Ellenberg static void bm_set_page_need_writeout(struct page *page) 223*19f843aaSLars Ellenberg { 224*19f843aaSLars Ellenberg set_bit(BM_PAGE_NEED_WRITEOUT, &page_private(page)); 225*19f843aaSLars Ellenberg } 226*19f843aaSLars Ellenberg 227*19f843aaSLars Ellenberg static int bm_test_page_unchanged(struct page *page) 228*19f843aaSLars Ellenberg { 229*19f843aaSLars Ellenberg volatile const unsigned long *addr = &page_private(page); 230*19f843aaSLars Ellenberg return (*addr & ((1UL<<BM_PAGE_NEED_WRITEOUT)|(1UL<<BM_PAGE_LAZY_WRITEOUT))) == 0; 231*19f843aaSLars Ellenberg } 232*19f843aaSLars Ellenberg 233*19f843aaSLars Ellenberg static void bm_set_page_io_err(struct page *page) 234*19f843aaSLars Ellenberg { 235*19f843aaSLars Ellenberg set_bit(BM_PAGE_IO_ERROR, &page_private(page)); 236*19f843aaSLars Ellenberg } 237*19f843aaSLars Ellenberg 238*19f843aaSLars Ellenberg static void bm_clear_page_io_err(struct page *page) 239*19f843aaSLars Ellenberg { 240*19f843aaSLars Ellenberg clear_bit(BM_PAGE_IO_ERROR, &page_private(page)); 241*19f843aaSLars Ellenberg } 242*19f843aaSLars Ellenberg 243*19f843aaSLars Ellenberg static void bm_set_page_lazy_writeout(struct page *page) 244*19f843aaSLars Ellenberg { 245*19f843aaSLars Ellenberg set_bit(BM_PAGE_LAZY_WRITEOUT, &page_private(page)); 246*19f843aaSLars Ellenberg } 247*19f843aaSLars Ellenberg 248*19f843aaSLars Ellenberg static int bm_test_page_lazy_writeout(struct page *page) 249*19f843aaSLars Ellenberg { 250*19f843aaSLars Ellenberg return test_bit(BM_PAGE_LAZY_WRITEOUT, &page_private(page)); 251*19f843aaSLars Ellenberg } 252*19f843aaSLars Ellenberg 253*19f843aaSLars Ellenberg /* on a 32bit box, this would allow for exactly (2<<38) bits. */ 254*19f843aaSLars Ellenberg static unsigned int bm_word_to_page_idx(struct drbd_bitmap *b, unsigned long long_nr) 255*19f843aaSLars Ellenberg { 256*19f843aaSLars Ellenberg /* page_nr = (word*sizeof(long)) >> PAGE_SHIFT; */ 257*19f843aaSLars Ellenberg unsigned int page_nr = long_nr >> (PAGE_SHIFT - LN2_BPL + 3); 258*19f843aaSLars Ellenberg BUG_ON(page_nr >= b->bm_number_of_pages); 259*19f843aaSLars Ellenberg return page_nr; 260*19f843aaSLars Ellenberg } 261*19f843aaSLars Ellenberg 26295a0f10cSLars Ellenberg static unsigned int bm_bit_to_page_idx(struct drbd_bitmap *b, u64 bitnr) 26395a0f10cSLars Ellenberg { 26495a0f10cSLars Ellenberg /* page_nr = (bitnr/8) >> PAGE_SHIFT; */ 26595a0f10cSLars Ellenberg unsigned int page_nr = bitnr >> (PAGE_SHIFT + 3); 26695a0f10cSLars Ellenberg BUG_ON(page_nr >= b->bm_number_of_pages); 26795a0f10cSLars Ellenberg return page_nr; 26895a0f10cSLars Ellenberg } 26995a0f10cSLars Ellenberg 27095a0f10cSLars Ellenberg static unsigned long *__bm_map_pidx(struct drbd_bitmap *b, unsigned int idx, const enum km_type km) 27195a0f10cSLars Ellenberg { 27295a0f10cSLars Ellenberg struct page *page = b->bm_pages[idx]; 27395a0f10cSLars Ellenberg return (unsigned long *) kmap_atomic(page, km); 27495a0f10cSLars Ellenberg } 27595a0f10cSLars Ellenberg 27695a0f10cSLars Ellenberg static unsigned long *bm_map_pidx(struct drbd_bitmap *b, unsigned int idx) 27795a0f10cSLars Ellenberg { 27895a0f10cSLars Ellenberg return __bm_map_pidx(b, idx, KM_IRQ1); 27995a0f10cSLars Ellenberg } 28095a0f10cSLars Ellenberg 281b411b363SPhilipp Reisner static void __bm_unmap(unsigned long *p_addr, const enum km_type km) 282b411b363SPhilipp Reisner { 283b411b363SPhilipp Reisner kunmap_atomic(p_addr, km); 284b411b363SPhilipp Reisner }; 285b411b363SPhilipp Reisner 286b411b363SPhilipp Reisner static void bm_unmap(unsigned long *p_addr) 287b411b363SPhilipp Reisner { 288b411b363SPhilipp Reisner return __bm_unmap(p_addr, KM_IRQ1); 289b411b363SPhilipp Reisner } 290b411b363SPhilipp Reisner 291b411b363SPhilipp Reisner /* long word offset of _bitmap_ sector */ 292b411b363SPhilipp Reisner #define S2W(s) ((s)<<(BM_EXT_SHIFT-BM_BLOCK_SHIFT-LN2_BPL)) 293b411b363SPhilipp Reisner /* word offset from start of bitmap to word number _in_page_ 294b411b363SPhilipp Reisner * modulo longs per page 295b411b363SPhilipp Reisner #define MLPP(X) ((X) % (PAGE_SIZE/sizeof(long)) 296b411b363SPhilipp Reisner hm, well, Philipp thinks gcc might not optimze the % into & (... - 1) 297b411b363SPhilipp Reisner so do it explicitly: 298b411b363SPhilipp Reisner */ 299b411b363SPhilipp Reisner #define MLPP(X) ((X) & ((PAGE_SIZE/sizeof(long))-1)) 300b411b363SPhilipp Reisner 301b411b363SPhilipp Reisner /* Long words per page */ 302b411b363SPhilipp Reisner #define LWPP (PAGE_SIZE/sizeof(long)) 303b411b363SPhilipp Reisner 304b411b363SPhilipp Reisner /* 305b411b363SPhilipp Reisner * actually most functions herein should take a struct drbd_bitmap*, not a 306b411b363SPhilipp Reisner * struct drbd_conf*, but for the debug macros I like to have the mdev around 307b411b363SPhilipp Reisner * to be able to report device specific. 308b411b363SPhilipp Reisner */ 309b411b363SPhilipp Reisner 310*19f843aaSLars Ellenberg 311b411b363SPhilipp Reisner static void bm_free_pages(struct page **pages, unsigned long number) 312b411b363SPhilipp Reisner { 313b411b363SPhilipp Reisner unsigned long i; 314b411b363SPhilipp Reisner if (!pages) 315b411b363SPhilipp Reisner return; 316b411b363SPhilipp Reisner 317b411b363SPhilipp Reisner for (i = 0; i < number; i++) { 318b411b363SPhilipp Reisner if (!pages[i]) { 319b411b363SPhilipp Reisner printk(KERN_ALERT "drbd: bm_free_pages tried to free " 320b411b363SPhilipp Reisner "a NULL pointer; i=%lu n=%lu\n", 321b411b363SPhilipp Reisner i, number); 322b411b363SPhilipp Reisner continue; 323b411b363SPhilipp Reisner } 324b411b363SPhilipp Reisner __free_page(pages[i]); 325b411b363SPhilipp Reisner pages[i] = NULL; 326b411b363SPhilipp Reisner } 327b411b363SPhilipp Reisner } 328b411b363SPhilipp Reisner 329b411b363SPhilipp Reisner static void bm_vk_free(void *ptr, int v) 330b411b363SPhilipp Reisner { 331b411b363SPhilipp Reisner if (v) 332b411b363SPhilipp Reisner vfree(ptr); 333b411b363SPhilipp Reisner else 334b411b363SPhilipp Reisner kfree(ptr); 335b411b363SPhilipp Reisner } 336b411b363SPhilipp Reisner 337b411b363SPhilipp Reisner /* 338b411b363SPhilipp Reisner * "have" and "want" are NUMBER OF PAGES. 339b411b363SPhilipp Reisner */ 340b411b363SPhilipp Reisner static struct page **bm_realloc_pages(struct drbd_bitmap *b, unsigned long want) 341b411b363SPhilipp Reisner { 342b411b363SPhilipp Reisner struct page **old_pages = b->bm_pages; 343b411b363SPhilipp Reisner struct page **new_pages, *page; 344b411b363SPhilipp Reisner unsigned int i, bytes, vmalloced = 0; 345b411b363SPhilipp Reisner unsigned long have = b->bm_number_of_pages; 346b411b363SPhilipp Reisner 347b411b363SPhilipp Reisner BUG_ON(have == 0 && old_pages != NULL); 348b411b363SPhilipp Reisner BUG_ON(have != 0 && old_pages == NULL); 349b411b363SPhilipp Reisner 350b411b363SPhilipp Reisner if (have == want) 351b411b363SPhilipp Reisner return old_pages; 352b411b363SPhilipp Reisner 353b411b363SPhilipp Reisner /* Trying kmalloc first, falling back to vmalloc. 354b411b363SPhilipp Reisner * GFP_KERNEL is ok, as this is done when a lower level disk is 355b411b363SPhilipp Reisner * "attached" to the drbd. Context is receiver thread or cqueue 356b411b363SPhilipp Reisner * thread. As we have no disk yet, we are not in the IO path, 357b411b363SPhilipp Reisner * not even the IO path of the peer. */ 358b411b363SPhilipp Reisner bytes = sizeof(struct page *)*want; 359b411b363SPhilipp Reisner new_pages = kmalloc(bytes, GFP_KERNEL); 360b411b363SPhilipp Reisner if (!new_pages) { 361b411b363SPhilipp Reisner new_pages = vmalloc(bytes); 362b411b363SPhilipp Reisner if (!new_pages) 363b411b363SPhilipp Reisner return NULL; 364b411b363SPhilipp Reisner vmalloced = 1; 365b411b363SPhilipp Reisner } 366b411b363SPhilipp Reisner 367b411b363SPhilipp Reisner memset(new_pages, 0, bytes); 368b411b363SPhilipp Reisner if (want >= have) { 369b411b363SPhilipp Reisner for (i = 0; i < have; i++) 370b411b363SPhilipp Reisner new_pages[i] = old_pages[i]; 371b411b363SPhilipp Reisner for (; i < want; i++) { 372b411b363SPhilipp Reisner page = alloc_page(GFP_HIGHUSER); 373b411b363SPhilipp Reisner if (!page) { 374b411b363SPhilipp Reisner bm_free_pages(new_pages + have, i - have); 375b411b363SPhilipp Reisner bm_vk_free(new_pages, vmalloced); 376b411b363SPhilipp Reisner return NULL; 377b411b363SPhilipp Reisner } 378*19f843aaSLars Ellenberg /* we want to know which page it is 379*19f843aaSLars Ellenberg * from the endio handlers */ 380*19f843aaSLars Ellenberg bm_store_page_idx(page, i); 381b411b363SPhilipp Reisner new_pages[i] = page; 382b411b363SPhilipp Reisner } 383b411b363SPhilipp Reisner } else { 384b411b363SPhilipp Reisner for (i = 0; i < want; i++) 385b411b363SPhilipp Reisner new_pages[i] = old_pages[i]; 386b411b363SPhilipp Reisner /* NOT HERE, we are outside the spinlock! 387b411b363SPhilipp Reisner bm_free_pages(old_pages + want, have - want); 388b411b363SPhilipp Reisner */ 389b411b363SPhilipp Reisner } 390b411b363SPhilipp Reisner 391b411b363SPhilipp Reisner if (vmalloced) 392b411b363SPhilipp Reisner set_bit(BM_P_VMALLOCED, &b->bm_flags); 393b411b363SPhilipp Reisner else 394b411b363SPhilipp Reisner clear_bit(BM_P_VMALLOCED, &b->bm_flags); 395b411b363SPhilipp Reisner 396b411b363SPhilipp Reisner return new_pages; 397b411b363SPhilipp Reisner } 398b411b363SPhilipp Reisner 399b411b363SPhilipp Reisner /* 400b411b363SPhilipp Reisner * called on driver init only. TODO call when a device is created. 401b411b363SPhilipp Reisner * allocates the drbd_bitmap, and stores it in mdev->bitmap. 402b411b363SPhilipp Reisner */ 403b411b363SPhilipp Reisner int drbd_bm_init(struct drbd_conf *mdev) 404b411b363SPhilipp Reisner { 405b411b363SPhilipp Reisner struct drbd_bitmap *b = mdev->bitmap; 406b411b363SPhilipp Reisner WARN_ON(b != NULL); 407b411b363SPhilipp Reisner b = kzalloc(sizeof(struct drbd_bitmap), GFP_KERNEL); 408b411b363SPhilipp Reisner if (!b) 409b411b363SPhilipp Reisner return -ENOMEM; 410b411b363SPhilipp Reisner spin_lock_init(&b->bm_lock); 4118a03ae2aSThomas Gleixner mutex_init(&b->bm_change); 412b411b363SPhilipp Reisner init_waitqueue_head(&b->bm_io_wait); 413b411b363SPhilipp Reisner 414b411b363SPhilipp Reisner mdev->bitmap = b; 415b411b363SPhilipp Reisner 416b411b363SPhilipp Reisner return 0; 417b411b363SPhilipp Reisner } 418b411b363SPhilipp Reisner 419b411b363SPhilipp Reisner sector_t drbd_bm_capacity(struct drbd_conf *mdev) 420b411b363SPhilipp Reisner { 421b411b363SPhilipp Reisner ERR_IF(!mdev->bitmap) return 0; 422b411b363SPhilipp Reisner return mdev->bitmap->bm_dev_capacity; 423b411b363SPhilipp Reisner } 424b411b363SPhilipp Reisner 425b411b363SPhilipp Reisner /* called on driver unload. TODO: call when a device is destroyed. 426b411b363SPhilipp Reisner */ 427b411b363SPhilipp Reisner void drbd_bm_cleanup(struct drbd_conf *mdev) 428b411b363SPhilipp Reisner { 429b411b363SPhilipp Reisner ERR_IF (!mdev->bitmap) return; 430b411b363SPhilipp Reisner bm_free_pages(mdev->bitmap->bm_pages, mdev->bitmap->bm_number_of_pages); 431b411b363SPhilipp Reisner bm_vk_free(mdev->bitmap->bm_pages, test_bit(BM_P_VMALLOCED, &mdev->bitmap->bm_flags)); 432b411b363SPhilipp Reisner kfree(mdev->bitmap); 433b411b363SPhilipp Reisner mdev->bitmap = NULL; 434b411b363SPhilipp Reisner } 435b411b363SPhilipp Reisner 436b411b363SPhilipp Reisner /* 437b411b363SPhilipp Reisner * since (b->bm_bits % BITS_PER_LONG) != 0, 438b411b363SPhilipp Reisner * this masks out the remaining bits. 439b411b363SPhilipp Reisner * Returns the number of bits cleared. 440b411b363SPhilipp Reisner */ 44195a0f10cSLars Ellenberg #define BITS_PER_PAGE (1UL << (PAGE_SHIFT + 3)) 44295a0f10cSLars Ellenberg #define BITS_PER_PAGE_MASK (BITS_PER_PAGE - 1) 44395a0f10cSLars Ellenberg #define BITS_PER_LONG_MASK (BITS_PER_LONG - 1) 444b411b363SPhilipp Reisner static int bm_clear_surplus(struct drbd_bitmap *b) 445b411b363SPhilipp Reisner { 44695a0f10cSLars Ellenberg unsigned long mask; 447b411b363SPhilipp Reisner unsigned long *p_addr, *bm; 44895a0f10cSLars Ellenberg int tmp; 44995a0f10cSLars Ellenberg int cleared = 0; 450b411b363SPhilipp Reisner 45195a0f10cSLars Ellenberg /* number of bits modulo bits per page */ 45295a0f10cSLars Ellenberg tmp = (b->bm_bits & BITS_PER_PAGE_MASK); 45395a0f10cSLars Ellenberg /* mask the used bits of the word containing the last bit */ 45495a0f10cSLars Ellenberg mask = (1UL << (tmp & BITS_PER_LONG_MASK)) -1; 45595a0f10cSLars Ellenberg /* bitmap is always stored little endian, 45695a0f10cSLars Ellenberg * on disk and in core memory alike */ 45795a0f10cSLars Ellenberg mask = cpu_to_lel(mask); 45895a0f10cSLars Ellenberg 45995a0f10cSLars Ellenberg /* because of the "extra long to catch oob access" we allocate in 46095a0f10cSLars Ellenberg * drbd_bm_resize, bm_number_of_pages -1 is not necessarily the page 46195a0f10cSLars Ellenberg * containing the last _relevant_ bitmap word */ 46295a0f10cSLars Ellenberg p_addr = bm_map_pidx(b, bm_bit_to_page_idx(b, b->bm_bits - 1)); 46395a0f10cSLars Ellenberg bm = p_addr + (tmp/BITS_PER_LONG); 46495a0f10cSLars Ellenberg if (mask) { 46595a0f10cSLars Ellenberg /* If mask != 0, we are not exactly aligned, so bm now points 46695a0f10cSLars Ellenberg * to the long containing the last bit. 46795a0f10cSLars Ellenberg * If mask == 0, bm already points to the word immediately 46895a0f10cSLars Ellenberg * after the last (long word aligned) bit. */ 469b411b363SPhilipp Reisner cleared = hweight_long(*bm & ~mask); 470b411b363SPhilipp Reisner *bm &= mask; 47195a0f10cSLars Ellenberg bm++; 472b411b363SPhilipp Reisner } 473b411b363SPhilipp Reisner 47495a0f10cSLars Ellenberg if (BITS_PER_LONG == 32 && ((bm - p_addr) & 1) == 1) { 47595a0f10cSLars Ellenberg /* on a 32bit arch, we may need to zero out 47695a0f10cSLars Ellenberg * a padding long to align with a 64bit remote */ 477b411b363SPhilipp Reisner cleared += hweight_long(*bm); 478b411b363SPhilipp Reisner *bm = 0; 479b411b363SPhilipp Reisner } 480b411b363SPhilipp Reisner bm_unmap(p_addr); 481b411b363SPhilipp Reisner return cleared; 482b411b363SPhilipp Reisner } 483b411b363SPhilipp Reisner 484b411b363SPhilipp Reisner static void bm_set_surplus(struct drbd_bitmap *b) 485b411b363SPhilipp Reisner { 48695a0f10cSLars Ellenberg unsigned long mask; 487b411b363SPhilipp Reisner unsigned long *p_addr, *bm; 48895a0f10cSLars Ellenberg int tmp; 489b411b363SPhilipp Reisner 49095a0f10cSLars Ellenberg /* number of bits modulo bits per page */ 49195a0f10cSLars Ellenberg tmp = (b->bm_bits & BITS_PER_PAGE_MASK); 49295a0f10cSLars Ellenberg /* mask the used bits of the word containing the last bit */ 49395a0f10cSLars Ellenberg mask = (1UL << (tmp & BITS_PER_LONG_MASK)) -1; 49495a0f10cSLars Ellenberg /* bitmap is always stored little endian, 49595a0f10cSLars Ellenberg * on disk and in core memory alike */ 49695a0f10cSLars Ellenberg mask = cpu_to_lel(mask); 49795a0f10cSLars Ellenberg 49895a0f10cSLars Ellenberg /* because of the "extra long to catch oob access" we allocate in 49995a0f10cSLars Ellenberg * drbd_bm_resize, bm_number_of_pages -1 is not necessarily the page 50095a0f10cSLars Ellenberg * containing the last _relevant_ bitmap word */ 50195a0f10cSLars Ellenberg p_addr = bm_map_pidx(b, bm_bit_to_page_idx(b, b->bm_bits - 1)); 50295a0f10cSLars Ellenberg bm = p_addr + (tmp/BITS_PER_LONG); 50395a0f10cSLars Ellenberg if (mask) { 50495a0f10cSLars Ellenberg /* If mask != 0, we are not exactly aligned, so bm now points 50595a0f10cSLars Ellenberg * to the long containing the last bit. 50695a0f10cSLars Ellenberg * If mask == 0, bm already points to the word immediately 50795a0f10cSLars Ellenberg * after the last (long word aligned) bit. */ 508b411b363SPhilipp Reisner *bm |= ~mask; 50995a0f10cSLars Ellenberg bm++; 510b411b363SPhilipp Reisner } 511b411b363SPhilipp Reisner 51295a0f10cSLars Ellenberg if (BITS_PER_LONG == 32 && ((bm - p_addr) & 1) == 1) { 51395a0f10cSLars Ellenberg /* on a 32bit arch, we may need to zero out 51495a0f10cSLars Ellenberg * a padding long to align with a 64bit remote */ 51595a0f10cSLars Ellenberg *bm = ~0UL; 516b411b363SPhilipp Reisner } 517b411b363SPhilipp Reisner bm_unmap(p_addr); 518b411b363SPhilipp Reisner } 519b411b363SPhilipp Reisner 52095a0f10cSLars Ellenberg static unsigned long bm_count_bits(struct drbd_bitmap *b) 521b411b363SPhilipp Reisner { 522b411b363SPhilipp Reisner unsigned long *p_addr, *bm, offset = 0; 523b411b363SPhilipp Reisner unsigned long bits = 0; 524b411b363SPhilipp Reisner unsigned long i, do_now; 5257777a8baSLars Ellenberg unsigned long words; 526b411b363SPhilipp Reisner 5277777a8baSLars Ellenberg /* due to 64bit alignment, the last long on a 32bit arch 5287777a8baSLars Ellenberg * may be not used at all. The last used long will likely 5297777a8baSLars Ellenberg * be only partially used, always. Don't count those bits, 5307777a8baSLars Ellenberg * but mask them out. */ 5317777a8baSLars Ellenberg words = (b->bm_bits + BITS_PER_LONG - 1) >> LN2_BPL; 5327777a8baSLars Ellenberg 5337777a8baSLars Ellenberg while (offset < words) { 5347777a8baSLars Ellenberg i = do_now = min_t(size_t, words-offset, LWPP); 535*19f843aaSLars Ellenberg p_addr = __bm_map_pidx(b, bm_word_to_page_idx(b, offset), KM_USER0); 536b411b363SPhilipp Reisner bm = p_addr + MLPP(offset); 537b411b363SPhilipp Reisner while (i--) { 538b411b363SPhilipp Reisner bits += hweight_long(*bm++); 539b411b363SPhilipp Reisner } 540b411b363SPhilipp Reisner offset += do_now; 5417777a8baSLars Ellenberg if (offset == words) { 5427777a8baSLars Ellenberg /* last word may only be partially used, 5437777a8baSLars Ellenberg * see also bm_clear_surplus. */ 5447777a8baSLars Ellenberg i = (1UL << (b->bm_bits & (BITS_PER_LONG-1))) -1; 5457777a8baSLars Ellenberg if (i) { 5467777a8baSLars Ellenberg bits -= hweight_long(p_addr[do_now-1] & ~i); 5477777a8baSLars Ellenberg p_addr[do_now-1] &= i; 5487777a8baSLars Ellenberg } 5497777a8baSLars Ellenberg /* 32bit arch, may have an unused padding long */ 5507777a8baSLars Ellenberg if (words != b->bm_words) 5517777a8baSLars Ellenberg p_addr[do_now] = 0; 5527777a8baSLars Ellenberg } 5537777a8baSLars Ellenberg __bm_unmap(p_addr, KM_USER0); 554b411b363SPhilipp Reisner cond_resched(); 555b411b363SPhilipp Reisner } 556b411b363SPhilipp Reisner 557b411b363SPhilipp Reisner return bits; 558b411b363SPhilipp Reisner } 559b411b363SPhilipp Reisner 560b411b363SPhilipp Reisner /* offset and len in long words.*/ 561b411b363SPhilipp Reisner static void bm_memset(struct drbd_bitmap *b, size_t offset, int c, size_t len) 562b411b363SPhilipp Reisner { 563b411b363SPhilipp Reisner unsigned long *p_addr, *bm; 564*19f843aaSLars Ellenberg unsigned int idx; 565b411b363SPhilipp Reisner size_t do_now, end; 566b411b363SPhilipp Reisner 567b411b363SPhilipp Reisner #define BM_SECTORS_PER_BIT (BM_BLOCK_SIZE/512) 568b411b363SPhilipp Reisner 569b411b363SPhilipp Reisner end = offset + len; 570b411b363SPhilipp Reisner 571b411b363SPhilipp Reisner if (end > b->bm_words) { 572b411b363SPhilipp Reisner printk(KERN_ALERT "drbd: bm_memset end > bm_words\n"); 573b411b363SPhilipp Reisner return; 574b411b363SPhilipp Reisner } 575b411b363SPhilipp Reisner 576b411b363SPhilipp Reisner while (offset < end) { 577b411b363SPhilipp Reisner do_now = min_t(size_t, ALIGN(offset + 1, LWPP), end) - offset; 578*19f843aaSLars Ellenberg idx = bm_word_to_page_idx(b, offset); 579*19f843aaSLars Ellenberg p_addr = bm_map_pidx(b, idx); 580b411b363SPhilipp Reisner bm = p_addr + MLPP(offset); 581b411b363SPhilipp Reisner if (bm+do_now > p_addr + LWPP) { 582b411b363SPhilipp Reisner printk(KERN_ALERT "drbd: BUG BUG BUG! p_addr:%p bm:%p do_now:%d\n", 583b411b363SPhilipp Reisner p_addr, bm, (int)do_now); 584b411b363SPhilipp Reisner break; /* breaks to after catch_oob_access_end() only! */ 585b411b363SPhilipp Reisner } 586b411b363SPhilipp Reisner memset(bm, c, do_now * sizeof(long)); 587b411b363SPhilipp Reisner bm_unmap(p_addr); 588*19f843aaSLars Ellenberg bm_set_page_need_writeout(b->bm_pages[idx]); 589b411b363SPhilipp Reisner offset += do_now; 590b411b363SPhilipp Reisner } 591b411b363SPhilipp Reisner } 592b411b363SPhilipp Reisner 593b411b363SPhilipp Reisner /* 594b411b363SPhilipp Reisner * make sure the bitmap has enough room for the attached storage, 595b411b363SPhilipp Reisner * if necessary, resize. 596b411b363SPhilipp Reisner * called whenever we may have changed the device size. 597b411b363SPhilipp Reisner * returns -ENOMEM if we could not allocate enough memory, 0 on success. 598b411b363SPhilipp Reisner * In case this is actually a resize, we copy the old bitmap into the new one. 599b411b363SPhilipp Reisner * Otherwise, the bitmap is initialized to all bits set. 600b411b363SPhilipp Reisner */ 60102d9a94bSPhilipp Reisner int drbd_bm_resize(struct drbd_conf *mdev, sector_t capacity, int set_new_bits) 602b411b363SPhilipp Reisner { 603b411b363SPhilipp Reisner struct drbd_bitmap *b = mdev->bitmap; 604b411b363SPhilipp Reisner unsigned long bits, words, owords, obits, *p_addr, *bm; 605b411b363SPhilipp Reisner unsigned long want, have, onpages; /* number of pages */ 606b411b363SPhilipp Reisner struct page **npages, **opages = NULL; 607b411b363SPhilipp Reisner int err = 0, growing; 608b411b363SPhilipp Reisner int opages_vmalloced; 609b411b363SPhilipp Reisner 610b411b363SPhilipp Reisner ERR_IF(!b) return -ENOMEM; 611b411b363SPhilipp Reisner 612b411b363SPhilipp Reisner drbd_bm_lock(mdev, "resize"); 613b411b363SPhilipp Reisner 614b411b363SPhilipp Reisner dev_info(DEV, "drbd_bm_resize called with capacity == %llu\n", 615b411b363SPhilipp Reisner (unsigned long long)capacity); 616b411b363SPhilipp Reisner 617b411b363SPhilipp Reisner if (capacity == b->bm_dev_capacity) 618b411b363SPhilipp Reisner goto out; 619b411b363SPhilipp Reisner 620b411b363SPhilipp Reisner opages_vmalloced = test_bit(BM_P_VMALLOCED, &b->bm_flags); 621b411b363SPhilipp Reisner 622b411b363SPhilipp Reisner if (capacity == 0) { 623b411b363SPhilipp Reisner spin_lock_irq(&b->bm_lock); 624b411b363SPhilipp Reisner opages = b->bm_pages; 625b411b363SPhilipp Reisner onpages = b->bm_number_of_pages; 626b411b363SPhilipp Reisner owords = b->bm_words; 627b411b363SPhilipp Reisner b->bm_pages = NULL; 628b411b363SPhilipp Reisner b->bm_number_of_pages = 629b411b363SPhilipp Reisner b->bm_set = 630b411b363SPhilipp Reisner b->bm_bits = 631b411b363SPhilipp Reisner b->bm_words = 632b411b363SPhilipp Reisner b->bm_dev_capacity = 0; 633b411b363SPhilipp Reisner spin_unlock_irq(&b->bm_lock); 634b411b363SPhilipp Reisner bm_free_pages(opages, onpages); 635b411b363SPhilipp Reisner bm_vk_free(opages, opages_vmalloced); 636b411b363SPhilipp Reisner goto out; 637b411b363SPhilipp Reisner } 638b411b363SPhilipp Reisner bits = BM_SECT_TO_BIT(ALIGN(capacity, BM_SECT_PER_BIT)); 639b411b363SPhilipp Reisner 640b411b363SPhilipp Reisner /* if we would use 641b411b363SPhilipp Reisner words = ALIGN(bits,BITS_PER_LONG) >> LN2_BPL; 642b411b363SPhilipp Reisner a 32bit host could present the wrong number of words 643b411b363SPhilipp Reisner to a 64bit host. 644b411b363SPhilipp Reisner */ 645b411b363SPhilipp Reisner words = ALIGN(bits, 64) >> LN2_BPL; 646b411b363SPhilipp Reisner 647b411b363SPhilipp Reisner if (get_ldev(mdev)) { 648b411b363SPhilipp Reisner D_ASSERT((u64)bits <= (((u64)mdev->ldev->md.md_size_sect-MD_BM_OFFSET) << 12)); 649b411b363SPhilipp Reisner put_ldev(mdev); 650b411b363SPhilipp Reisner } 651b411b363SPhilipp Reisner 652b411b363SPhilipp Reisner /* one extra long to catch off by one errors */ 653b411b363SPhilipp Reisner want = ALIGN((words+1)*sizeof(long), PAGE_SIZE) >> PAGE_SHIFT; 654b411b363SPhilipp Reisner have = b->bm_number_of_pages; 655b411b363SPhilipp Reisner if (want == have) { 656b411b363SPhilipp Reisner D_ASSERT(b->bm_pages != NULL); 657b411b363SPhilipp Reisner npages = b->bm_pages; 658b411b363SPhilipp Reisner } else { 6590cf9d27eSAndreas Gruenbacher if (drbd_insert_fault(mdev, DRBD_FAULT_BM_ALLOC)) 660b411b363SPhilipp Reisner npages = NULL; 661b411b363SPhilipp Reisner else 662b411b363SPhilipp Reisner npages = bm_realloc_pages(b, want); 663b411b363SPhilipp Reisner } 664b411b363SPhilipp Reisner 665b411b363SPhilipp Reisner if (!npages) { 666b411b363SPhilipp Reisner err = -ENOMEM; 667b411b363SPhilipp Reisner goto out; 668b411b363SPhilipp Reisner } 669b411b363SPhilipp Reisner 670b411b363SPhilipp Reisner spin_lock_irq(&b->bm_lock); 671b411b363SPhilipp Reisner opages = b->bm_pages; 672b411b363SPhilipp Reisner owords = b->bm_words; 673b411b363SPhilipp Reisner obits = b->bm_bits; 674b411b363SPhilipp Reisner 675b411b363SPhilipp Reisner growing = bits > obits; 6765223671bSPhilipp Reisner if (opages && growing && set_new_bits) 677b411b363SPhilipp Reisner bm_set_surplus(b); 678b411b363SPhilipp Reisner 679b411b363SPhilipp Reisner b->bm_pages = npages; 680b411b363SPhilipp Reisner b->bm_number_of_pages = want; 681b411b363SPhilipp Reisner b->bm_bits = bits; 682b411b363SPhilipp Reisner b->bm_words = words; 683b411b363SPhilipp Reisner b->bm_dev_capacity = capacity; 684b411b363SPhilipp Reisner 685b411b363SPhilipp Reisner if (growing) { 68602d9a94bSPhilipp Reisner if (set_new_bits) { 687b411b363SPhilipp Reisner bm_memset(b, owords, 0xff, words-owords); 688b411b363SPhilipp Reisner b->bm_set += bits - obits; 68902d9a94bSPhilipp Reisner } else 69002d9a94bSPhilipp Reisner bm_memset(b, owords, 0x00, words-owords); 69102d9a94bSPhilipp Reisner 692b411b363SPhilipp Reisner } 693b411b363SPhilipp Reisner 694b411b363SPhilipp Reisner if (want < have) { 695b411b363SPhilipp Reisner /* implicit: (opages != NULL) && (opages != npages) */ 696b411b363SPhilipp Reisner bm_free_pages(opages + want, have - want); 697b411b363SPhilipp Reisner } 698b411b363SPhilipp Reisner 699*19f843aaSLars Ellenberg p_addr = bm_map_pidx(b, bm_word_to_page_idx(b, words)); 700b411b363SPhilipp Reisner bm = p_addr + MLPP(words); 701b411b363SPhilipp Reisner *bm = DRBD_MAGIC; 702b411b363SPhilipp Reisner bm_unmap(p_addr); 703b411b363SPhilipp Reisner 704b411b363SPhilipp Reisner (void)bm_clear_surplus(b); 705b411b363SPhilipp Reisner 706b411b363SPhilipp Reisner spin_unlock_irq(&b->bm_lock); 707b411b363SPhilipp Reisner if (opages != npages) 708b411b363SPhilipp Reisner bm_vk_free(opages, opages_vmalloced); 709b411b363SPhilipp Reisner if (!growing) 710b411b363SPhilipp Reisner b->bm_set = bm_count_bits(b); 711*19f843aaSLars Ellenberg dev_info(DEV, "resync bitmap: bits=%lu words=%lu pages=%lu\n", bits, words, want); 712b411b363SPhilipp Reisner 713b411b363SPhilipp Reisner out: 714b411b363SPhilipp Reisner drbd_bm_unlock(mdev); 715b411b363SPhilipp Reisner return err; 716b411b363SPhilipp Reisner } 717b411b363SPhilipp Reisner 718b411b363SPhilipp Reisner /* inherently racy: 719b411b363SPhilipp Reisner * if not protected by other means, return value may be out of date when 720b411b363SPhilipp Reisner * leaving this function... 721b411b363SPhilipp Reisner * we still need to lock it, since it is important that this returns 722b411b363SPhilipp Reisner * bm_set == 0 precisely. 723b411b363SPhilipp Reisner * 724b411b363SPhilipp Reisner * maybe bm_set should be atomic_t ? 725b411b363SPhilipp Reisner */ 7260778286aSPhilipp Reisner unsigned long _drbd_bm_total_weight(struct drbd_conf *mdev) 727b411b363SPhilipp Reisner { 728b411b363SPhilipp Reisner struct drbd_bitmap *b = mdev->bitmap; 729b411b363SPhilipp Reisner unsigned long s; 730b411b363SPhilipp Reisner unsigned long flags; 731b411b363SPhilipp Reisner 732b411b363SPhilipp Reisner ERR_IF(!b) return 0; 733b411b363SPhilipp Reisner ERR_IF(!b->bm_pages) return 0; 734b411b363SPhilipp Reisner 735b411b363SPhilipp Reisner spin_lock_irqsave(&b->bm_lock, flags); 736b411b363SPhilipp Reisner s = b->bm_set; 737b411b363SPhilipp Reisner spin_unlock_irqrestore(&b->bm_lock, flags); 738b411b363SPhilipp Reisner 739b411b363SPhilipp Reisner return s; 740b411b363SPhilipp Reisner } 741b411b363SPhilipp Reisner 742b411b363SPhilipp Reisner unsigned long drbd_bm_total_weight(struct drbd_conf *mdev) 743b411b363SPhilipp Reisner { 744b411b363SPhilipp Reisner unsigned long s; 745b411b363SPhilipp Reisner /* if I don't have a disk, I don't know about out-of-sync status */ 746b411b363SPhilipp Reisner if (!get_ldev_if_state(mdev, D_NEGOTIATING)) 747b411b363SPhilipp Reisner return 0; 748b411b363SPhilipp Reisner s = _drbd_bm_total_weight(mdev); 749b411b363SPhilipp Reisner put_ldev(mdev); 750b411b363SPhilipp Reisner return s; 751b411b363SPhilipp Reisner } 752b411b363SPhilipp Reisner 753b411b363SPhilipp Reisner size_t drbd_bm_words(struct drbd_conf *mdev) 754b411b363SPhilipp Reisner { 755b411b363SPhilipp Reisner struct drbd_bitmap *b = mdev->bitmap; 756b411b363SPhilipp Reisner ERR_IF(!b) return 0; 757b411b363SPhilipp Reisner ERR_IF(!b->bm_pages) return 0; 758b411b363SPhilipp Reisner 759b411b363SPhilipp Reisner return b->bm_words; 760b411b363SPhilipp Reisner } 761b411b363SPhilipp Reisner 762b411b363SPhilipp Reisner unsigned long drbd_bm_bits(struct drbd_conf *mdev) 763b411b363SPhilipp Reisner { 764b411b363SPhilipp Reisner struct drbd_bitmap *b = mdev->bitmap; 765b411b363SPhilipp Reisner ERR_IF(!b) return 0; 766b411b363SPhilipp Reisner 767b411b363SPhilipp Reisner return b->bm_bits; 768b411b363SPhilipp Reisner } 769b411b363SPhilipp Reisner 770b411b363SPhilipp Reisner /* merge number words from buffer into the bitmap starting at offset. 771b411b363SPhilipp Reisner * buffer[i] is expected to be little endian unsigned long. 772b411b363SPhilipp Reisner * bitmap must be locked by drbd_bm_lock. 773b411b363SPhilipp Reisner * currently only used from receive_bitmap. 774b411b363SPhilipp Reisner */ 775b411b363SPhilipp Reisner void drbd_bm_merge_lel(struct drbd_conf *mdev, size_t offset, size_t number, 776b411b363SPhilipp Reisner unsigned long *buffer) 777b411b363SPhilipp Reisner { 778b411b363SPhilipp Reisner struct drbd_bitmap *b = mdev->bitmap; 779b411b363SPhilipp Reisner unsigned long *p_addr, *bm; 780b411b363SPhilipp Reisner unsigned long word, bits; 781*19f843aaSLars Ellenberg unsigned int idx; 782b411b363SPhilipp Reisner size_t end, do_now; 783b411b363SPhilipp Reisner 784b411b363SPhilipp Reisner end = offset + number; 785b411b363SPhilipp Reisner 786b411b363SPhilipp Reisner ERR_IF(!b) return; 787b411b363SPhilipp Reisner ERR_IF(!b->bm_pages) return; 788b411b363SPhilipp Reisner if (number == 0) 789b411b363SPhilipp Reisner return; 790b411b363SPhilipp Reisner WARN_ON(offset >= b->bm_words); 791b411b363SPhilipp Reisner WARN_ON(end > b->bm_words); 792b411b363SPhilipp Reisner 793b411b363SPhilipp Reisner spin_lock_irq(&b->bm_lock); 794b411b363SPhilipp Reisner while (offset < end) { 795b411b363SPhilipp Reisner do_now = min_t(size_t, ALIGN(offset+1, LWPP), end) - offset; 796*19f843aaSLars Ellenberg idx = bm_word_to_page_idx(b, offset); 797*19f843aaSLars Ellenberg p_addr = bm_map_pidx(b, idx); 798b411b363SPhilipp Reisner bm = p_addr + MLPP(offset); 799b411b363SPhilipp Reisner offset += do_now; 800b411b363SPhilipp Reisner while (do_now--) { 801b411b363SPhilipp Reisner bits = hweight_long(*bm); 80295a0f10cSLars Ellenberg word = *bm | *buffer++; 803b411b363SPhilipp Reisner *bm++ = word; 804b411b363SPhilipp Reisner b->bm_set += hweight_long(word) - bits; 805b411b363SPhilipp Reisner } 806b411b363SPhilipp Reisner bm_unmap(p_addr); 807*19f843aaSLars Ellenberg bm_set_page_need_writeout(b->bm_pages[idx]); 808b411b363SPhilipp Reisner } 809b411b363SPhilipp Reisner /* with 32bit <-> 64bit cross-platform connect 810b411b363SPhilipp Reisner * this is only correct for current usage, 811b411b363SPhilipp Reisner * where we _know_ that we are 64 bit aligned, 812b411b363SPhilipp Reisner * and know that this function is used in this way, too... 813b411b363SPhilipp Reisner */ 814b411b363SPhilipp Reisner if (end == b->bm_words) 815b411b363SPhilipp Reisner b->bm_set -= bm_clear_surplus(b); 816b411b363SPhilipp Reisner 817b411b363SPhilipp Reisner spin_unlock_irq(&b->bm_lock); 818b411b363SPhilipp Reisner } 819b411b363SPhilipp Reisner 820b411b363SPhilipp Reisner /* copy number words from the bitmap starting at offset into the buffer. 821b411b363SPhilipp Reisner * buffer[i] will be little endian unsigned long. 822b411b363SPhilipp Reisner */ 823b411b363SPhilipp Reisner void drbd_bm_get_lel(struct drbd_conf *mdev, size_t offset, size_t number, 824b411b363SPhilipp Reisner unsigned long *buffer) 825b411b363SPhilipp Reisner { 826b411b363SPhilipp Reisner struct drbd_bitmap *b = mdev->bitmap; 827b411b363SPhilipp Reisner unsigned long *p_addr, *bm; 828b411b363SPhilipp Reisner size_t end, do_now; 829b411b363SPhilipp Reisner 830b411b363SPhilipp Reisner end = offset + number; 831b411b363SPhilipp Reisner 832b411b363SPhilipp Reisner ERR_IF(!b) return; 833b411b363SPhilipp Reisner ERR_IF(!b->bm_pages) return; 834b411b363SPhilipp Reisner 835b411b363SPhilipp Reisner spin_lock_irq(&b->bm_lock); 836b411b363SPhilipp Reisner if ((offset >= b->bm_words) || 837b411b363SPhilipp Reisner (end > b->bm_words) || 838b411b363SPhilipp Reisner (number <= 0)) 839b411b363SPhilipp Reisner dev_err(DEV, "offset=%lu number=%lu bm_words=%lu\n", 840b411b363SPhilipp Reisner (unsigned long) offset, 841b411b363SPhilipp Reisner (unsigned long) number, 842b411b363SPhilipp Reisner (unsigned long) b->bm_words); 843b411b363SPhilipp Reisner else { 844b411b363SPhilipp Reisner while (offset < end) { 845b411b363SPhilipp Reisner do_now = min_t(size_t, ALIGN(offset+1, LWPP), end) - offset; 846*19f843aaSLars Ellenberg p_addr = bm_map_pidx(b, bm_word_to_page_idx(b, offset)); 847b411b363SPhilipp Reisner bm = p_addr + MLPP(offset); 848b411b363SPhilipp Reisner offset += do_now; 849b411b363SPhilipp Reisner while (do_now--) 85095a0f10cSLars Ellenberg *buffer++ = *bm++; 851b411b363SPhilipp Reisner bm_unmap(p_addr); 852b411b363SPhilipp Reisner } 853b411b363SPhilipp Reisner } 854b411b363SPhilipp Reisner spin_unlock_irq(&b->bm_lock); 855b411b363SPhilipp Reisner } 856b411b363SPhilipp Reisner 857b411b363SPhilipp Reisner /* set all bits in the bitmap */ 858b411b363SPhilipp Reisner void drbd_bm_set_all(struct drbd_conf *mdev) 859b411b363SPhilipp Reisner { 860b411b363SPhilipp Reisner struct drbd_bitmap *b = mdev->bitmap; 861b411b363SPhilipp Reisner ERR_IF(!b) return; 862b411b363SPhilipp Reisner ERR_IF(!b->bm_pages) return; 863b411b363SPhilipp Reisner 864b411b363SPhilipp Reisner spin_lock_irq(&b->bm_lock); 865b411b363SPhilipp Reisner bm_memset(b, 0, 0xff, b->bm_words); 866b411b363SPhilipp Reisner (void)bm_clear_surplus(b); 867b411b363SPhilipp Reisner b->bm_set = b->bm_bits; 868b411b363SPhilipp Reisner spin_unlock_irq(&b->bm_lock); 869b411b363SPhilipp Reisner } 870b411b363SPhilipp Reisner 871b411b363SPhilipp Reisner /* clear all bits in the bitmap */ 872b411b363SPhilipp Reisner void drbd_bm_clear_all(struct drbd_conf *mdev) 873b411b363SPhilipp Reisner { 874b411b363SPhilipp Reisner struct drbd_bitmap *b = mdev->bitmap; 875b411b363SPhilipp Reisner ERR_IF(!b) return; 876b411b363SPhilipp Reisner ERR_IF(!b->bm_pages) return; 877b411b363SPhilipp Reisner 878b411b363SPhilipp Reisner spin_lock_irq(&b->bm_lock); 879b411b363SPhilipp Reisner bm_memset(b, 0, 0, b->bm_words); 880b411b363SPhilipp Reisner b->bm_set = 0; 881b411b363SPhilipp Reisner spin_unlock_irq(&b->bm_lock); 882b411b363SPhilipp Reisner } 883b411b363SPhilipp Reisner 884*19f843aaSLars Ellenberg struct bm_aio_ctx { 885*19f843aaSLars Ellenberg struct drbd_conf *mdev; 886*19f843aaSLars Ellenberg atomic_t in_flight; 887*19f843aaSLars Ellenberg wait_queue_head_t io_wait; 888*19f843aaSLars Ellenberg unsigned flags; 889*19f843aaSLars Ellenberg #define BM_AIO_COPY_PAGES 1 890*19f843aaSLars Ellenberg int error; 891*19f843aaSLars Ellenberg }; 892*19f843aaSLars Ellenberg 893*19f843aaSLars Ellenberg /* bv_page may be a copy, or may be the original */ 894b411b363SPhilipp Reisner static void bm_async_io_complete(struct bio *bio, int error) 895b411b363SPhilipp Reisner { 896*19f843aaSLars Ellenberg struct bm_aio_ctx *ctx = bio->bi_private; 897*19f843aaSLars Ellenberg struct drbd_conf *mdev = ctx->mdev; 898*19f843aaSLars Ellenberg struct drbd_bitmap *b = mdev->bitmap; 899*19f843aaSLars Ellenberg unsigned int idx = bm_page_to_idx(bio->bi_io_vec[0].bv_page); 900b411b363SPhilipp Reisner int uptodate = bio_flagged(bio, BIO_UPTODATE); 901b411b363SPhilipp Reisner 902b411b363SPhilipp Reisner 903b411b363SPhilipp Reisner /* strange behavior of some lower level drivers... 904b411b363SPhilipp Reisner * fail the request by clearing the uptodate flag, 905b411b363SPhilipp Reisner * but do not return any error?! 906b411b363SPhilipp Reisner * do we want to WARN() on this? */ 907b411b363SPhilipp Reisner if (!error && !uptodate) 908b411b363SPhilipp Reisner error = -EIO; 909b411b363SPhilipp Reisner 910*19f843aaSLars Ellenberg if (!bm_test_page_unchanged(b->bm_pages[idx])) 911*19f843aaSLars Ellenberg dev_info(DEV, "bitmap page idx %u changed during IO!\n", idx); 912*19f843aaSLars Ellenberg 913b411b363SPhilipp Reisner if (error) { 914*19f843aaSLars Ellenberg /* ctx error will hold the completed-last non-zero error code, 915*19f843aaSLars Ellenberg * in case error codes differ. */ 916*19f843aaSLars Ellenberg ctx->error = error; 917*19f843aaSLars Ellenberg bm_set_page_io_err(b->bm_pages[idx]); 918*19f843aaSLars Ellenberg /* Not identical to on disk version of it. 919*19f843aaSLars Ellenberg * Is BM_PAGE_IO_ERROR enough? */ 920*19f843aaSLars Ellenberg if (__ratelimit(&drbd_ratelimit_state)) 921*19f843aaSLars Ellenberg dev_err(DEV, "IO ERROR %d on bitmap page idx %u\n", 922*19f843aaSLars Ellenberg error, idx); 923*19f843aaSLars Ellenberg } else { 924*19f843aaSLars Ellenberg bm_clear_page_io_err(b->bm_pages[idx]); 925*19f843aaSLars Ellenberg dynamic_dev_dbg(DEV, "bitmap page idx %u completed\n", idx); 926b411b363SPhilipp Reisner } 927*19f843aaSLars Ellenberg 928*19f843aaSLars Ellenberg bm_page_unlock_io(mdev, idx); 929*19f843aaSLars Ellenberg 930*19f843aaSLars Ellenberg /* FIXME give back to page pool */ 931*19f843aaSLars Ellenberg if (ctx->flags & BM_AIO_COPY_PAGES) 932*19f843aaSLars Ellenberg put_page(bio->bi_io_vec[0].bv_page); 933b411b363SPhilipp Reisner 934b411b363SPhilipp Reisner bio_put(bio); 935*19f843aaSLars Ellenberg 936*19f843aaSLars Ellenberg if (atomic_dec_and_test(&ctx->in_flight)) 937*19f843aaSLars Ellenberg wake_up(&ctx->io_wait); 938b411b363SPhilipp Reisner } 939b411b363SPhilipp Reisner 940*19f843aaSLars Ellenberg static void bm_page_io_async(struct bm_aio_ctx *ctx, int page_nr, int rw) __must_hold(local) 941b411b363SPhilipp Reisner { 942b411b363SPhilipp Reisner /* we are process context. we always get a bio */ 943b411b363SPhilipp Reisner struct bio *bio = bio_alloc(GFP_KERNEL, 1); 944*19f843aaSLars Ellenberg struct drbd_conf *mdev = ctx->mdev; 945*19f843aaSLars Ellenberg struct drbd_bitmap *b = mdev->bitmap; 946*19f843aaSLars Ellenberg struct page *page; 947b411b363SPhilipp Reisner unsigned int len; 948*19f843aaSLars Ellenberg 949b411b363SPhilipp Reisner sector_t on_disk_sector = 950b411b363SPhilipp Reisner mdev->ldev->md.md_offset + mdev->ldev->md.bm_offset; 951b411b363SPhilipp Reisner on_disk_sector += ((sector_t)page_nr) << (PAGE_SHIFT-9); 952b411b363SPhilipp Reisner 953b411b363SPhilipp Reisner /* this might happen with very small 954*19f843aaSLars Ellenberg * flexible external meta data device, 955*19f843aaSLars Ellenberg * or with PAGE_SIZE > 4k */ 956b411b363SPhilipp Reisner len = min_t(unsigned int, PAGE_SIZE, 957b411b363SPhilipp Reisner (drbd_md_last_sector(mdev->ldev) - on_disk_sector + 1)<<9); 958b411b363SPhilipp Reisner 959*19f843aaSLars Ellenberg /* serialize IO on this page */ 960*19f843aaSLars Ellenberg bm_page_lock_io(mdev, page_nr); 961*19f843aaSLars Ellenberg /* before memcpy and submit, 962*19f843aaSLars Ellenberg * so it can be redirtied any time */ 963*19f843aaSLars Ellenberg bm_set_page_unchanged(b->bm_pages[page_nr]); 964*19f843aaSLars Ellenberg 965*19f843aaSLars Ellenberg if (ctx->flags & BM_AIO_COPY_PAGES) { 966*19f843aaSLars Ellenberg /* FIXME alloc_page is good enough for now, but actually needs 967*19f843aaSLars Ellenberg * to use pre-allocated page pool */ 968*19f843aaSLars Ellenberg void *src, *dest; 969*19f843aaSLars Ellenberg page = alloc_page(__GFP_HIGHMEM|__GFP_WAIT); 970*19f843aaSLars Ellenberg dest = kmap_atomic(page, KM_USER0); 971*19f843aaSLars Ellenberg src = kmap_atomic(b->bm_pages[page_nr], KM_USER1); 972*19f843aaSLars Ellenberg memcpy(dest, src, PAGE_SIZE); 973*19f843aaSLars Ellenberg kunmap_atomic(src, KM_USER1); 974*19f843aaSLars Ellenberg kunmap_atomic(dest, KM_USER0); 975*19f843aaSLars Ellenberg bm_store_page_idx(page, page_nr); 976*19f843aaSLars Ellenberg } else 977*19f843aaSLars Ellenberg page = b->bm_pages[page_nr]; 978*19f843aaSLars Ellenberg 979b411b363SPhilipp Reisner bio->bi_bdev = mdev->ldev->md_bdev; 980b411b363SPhilipp Reisner bio->bi_sector = on_disk_sector; 981*19f843aaSLars Ellenberg bio_add_page(bio, page, len, 0); 982*19f843aaSLars Ellenberg bio->bi_private = ctx; 983b411b363SPhilipp Reisner bio->bi_end_io = bm_async_io_complete; 984b411b363SPhilipp Reisner 9850cf9d27eSAndreas Gruenbacher if (drbd_insert_fault(mdev, (rw & WRITE) ? DRBD_FAULT_MD_WR : DRBD_FAULT_MD_RD)) { 986b411b363SPhilipp Reisner bio->bi_rw |= rw; 987b411b363SPhilipp Reisner bio_endio(bio, -EIO); 988b411b363SPhilipp Reisner } else { 989b411b363SPhilipp Reisner submit_bio(rw, bio); 990b411b363SPhilipp Reisner } 991b411b363SPhilipp Reisner } 992b411b363SPhilipp Reisner 993b411b363SPhilipp Reisner /* 994b411b363SPhilipp Reisner * bm_rw: read/write the whole bitmap from/to its on disk location. 995b411b363SPhilipp Reisner */ 996*19f843aaSLars Ellenberg static int bm_rw(struct drbd_conf *mdev, int rw, unsigned lazy_writeout_upper_idx) __must_hold(local) 997b411b363SPhilipp Reisner { 998*19f843aaSLars Ellenberg struct bm_aio_ctx ctx = 999*19f843aaSLars Ellenberg { .flags = lazy_writeout_upper_idx ? BM_AIO_COPY_PAGES : 0 }; 1000b411b363SPhilipp Reisner struct drbd_bitmap *b = mdev->bitmap; 1001*19f843aaSLars Ellenberg int last_page, i, count = 0; 1002b411b363SPhilipp Reisner unsigned long now; 1003b411b363SPhilipp Reisner char ppb[10]; 1004b411b363SPhilipp Reisner int err = 0; 1005b411b363SPhilipp Reisner 1006*19f843aaSLars Ellenberg /* 1007*19f843aaSLars Ellenberg * We are protected against bitmap disappearing/resizing by holding an 1008*19f843aaSLars Ellenberg * ldev reference (caller must have called get_ldev()). 1009*19f843aaSLars Ellenberg * For read/write, we are protected against changes to the bitmap by 1010*19f843aaSLars Ellenberg * the bitmap lock (see drbd_bitmap_io). 1011*19f843aaSLars Ellenberg * For lazy writeout, we don't care for ongoing changes to the bitmap, 1012*19f843aaSLars Ellenberg * as we submit copies of pages anyways. 1013*19f843aaSLars Ellenberg */ 1014*19f843aaSLars Ellenberg if (!ctx.flags) 1015b411b363SPhilipp Reisner WARN_ON(!bm_is_locked(b)); 1016b411b363SPhilipp Reisner 1017*19f843aaSLars Ellenberg /* because of the "extra long to catch oob access" we allocate in 1018*19f843aaSLars Ellenberg * drbd_bm_resize, bm_number_of_pages -1 is not necessarily the page 1019*19f843aaSLars Ellenberg * containing the last _relevant_ bitmap word */ 1020*19f843aaSLars Ellenberg last_page = bm_word_to_page_idx(b, b->bm_words - 1); 1021b411b363SPhilipp Reisner 1022b411b363SPhilipp Reisner now = jiffies; 1023*19f843aaSLars Ellenberg ctx.mdev = mdev; 1024*19f843aaSLars Ellenberg atomic_set(&ctx.in_flight, 1); /* one extra ref */ 1025*19f843aaSLars Ellenberg init_waitqueue_head(&ctx.io_wait); 1026*19f843aaSLars Ellenberg ctx.error = 0; 1027b411b363SPhilipp Reisner 1028b411b363SPhilipp Reisner /* let the layers below us try to merge these bios... */ 1029*19f843aaSLars Ellenberg for (i = 0; i <= last_page; i++) { 1030*19f843aaSLars Ellenberg /* ignore completely unchanged pages */ 1031*19f843aaSLars Ellenberg if (lazy_writeout_upper_idx && i == lazy_writeout_upper_idx) 1032*19f843aaSLars Ellenberg break; 1033*19f843aaSLars Ellenberg if (rw & WRITE) { 1034*19f843aaSLars Ellenberg if (bm_test_page_unchanged(b->bm_pages[i])) { 1035*19f843aaSLars Ellenberg dynamic_dev_dbg(DEV, "skipped bm write for idx %u\n", i); 1036*19f843aaSLars Ellenberg continue; 1037*19f843aaSLars Ellenberg } 1038*19f843aaSLars Ellenberg /* during lazy writeout, 1039*19f843aaSLars Ellenberg * ignore those pages not marked for lazy writeout. */ 1040*19f843aaSLars Ellenberg if (lazy_writeout_upper_idx && 1041*19f843aaSLars Ellenberg !bm_test_page_lazy_writeout(b->bm_pages[i])) { 1042*19f843aaSLars Ellenberg dynamic_dev_dbg(DEV, "skipped bm lazy write for idx %u\n", i); 1043*19f843aaSLars Ellenberg continue; 1044*19f843aaSLars Ellenberg } 1045*19f843aaSLars Ellenberg } 1046*19f843aaSLars Ellenberg atomic_inc(&ctx.in_flight); 1047*19f843aaSLars Ellenberg bm_page_io_async(&ctx, i, rw); 1048*19f843aaSLars Ellenberg ++count; 1049*19f843aaSLars Ellenberg cond_resched(); 1050*19f843aaSLars Ellenberg } 1051b411b363SPhilipp Reisner 1052*19f843aaSLars Ellenberg atomic_dec(&ctx.in_flight); /* drop the extra ref */ 1053*19f843aaSLars Ellenberg wait_event(ctx.io_wait, atomic_read(&ctx.in_flight) == 0); 1054*19f843aaSLars Ellenberg dev_info(DEV, "bitmap %s of %u pages took %lu jiffies\n", 1055*19f843aaSLars Ellenberg rw == WRITE ? "WRITE" : "READ", 1056*19f843aaSLars Ellenberg count, jiffies - now); 1057b411b363SPhilipp Reisner 1058*19f843aaSLars Ellenberg if (ctx.error) { 1059b411b363SPhilipp Reisner dev_alert(DEV, "we had at least one MD IO ERROR during bitmap IO\n"); 106081e84650SAndreas Gruenbacher drbd_chk_io_error(mdev, 1, true); 1061*19f843aaSLars Ellenberg err = -EIO; /* ctx.error ? */ 1062b411b363SPhilipp Reisner } 1063b411b363SPhilipp Reisner 1064b411b363SPhilipp Reisner now = jiffies; 1065b411b363SPhilipp Reisner if (rw == WRITE) { 1066b411b363SPhilipp Reisner drbd_md_flush(mdev); 1067b411b363SPhilipp Reisner } else /* rw == READ */ { 106895a0f10cSLars Ellenberg b->bm_set = bm_count_bits(b); 1069b411b363SPhilipp Reisner dev_info(DEV, "recounting of set bits took additional %lu jiffies\n", 1070b411b363SPhilipp Reisner jiffies - now); 1071b411b363SPhilipp Reisner } 1072b411b363SPhilipp Reisner now = b->bm_set; 1073b411b363SPhilipp Reisner 1074b411b363SPhilipp Reisner dev_info(DEV, "%s (%lu bits) marked out-of-sync by on disk bit-map.\n", 1075b411b363SPhilipp Reisner ppsize(ppb, now << (BM_BLOCK_SHIFT-10)), now); 1076b411b363SPhilipp Reisner 1077b411b363SPhilipp Reisner return err; 1078b411b363SPhilipp Reisner } 1079b411b363SPhilipp Reisner 1080b411b363SPhilipp Reisner /** 1081b411b363SPhilipp Reisner * drbd_bm_read() - Read the whole bitmap from its on disk location. 1082b411b363SPhilipp Reisner * @mdev: DRBD device. 1083b411b363SPhilipp Reisner */ 1084b411b363SPhilipp Reisner int drbd_bm_read(struct drbd_conf *mdev) __must_hold(local) 1085b411b363SPhilipp Reisner { 1086*19f843aaSLars Ellenberg return bm_rw(mdev, READ, 0); 1087b411b363SPhilipp Reisner } 1088b411b363SPhilipp Reisner 1089b411b363SPhilipp Reisner /** 1090b411b363SPhilipp Reisner * drbd_bm_write() - Write the whole bitmap to its on disk location. 1091b411b363SPhilipp Reisner * @mdev: DRBD device. 1092*19f843aaSLars Ellenberg * 1093*19f843aaSLars Ellenberg * Will only write pages that have changed since last IO. 1094b411b363SPhilipp Reisner */ 1095b411b363SPhilipp Reisner int drbd_bm_write(struct drbd_conf *mdev) __must_hold(local) 1096b411b363SPhilipp Reisner { 1097*19f843aaSLars Ellenberg return bm_rw(mdev, WRITE, 0); 1098b411b363SPhilipp Reisner } 1099b411b363SPhilipp Reisner 1100b411b363SPhilipp Reisner /** 1101*19f843aaSLars Ellenberg * drbd_bm_lazy_write_out() - Write bitmap pages 0 to @upper_idx-1, if they have changed. 1102b411b363SPhilipp Reisner * @mdev: DRBD device. 1103*19f843aaSLars Ellenberg * @upper_idx: 0: write all changed pages; +ve: page index to stop scanning for changed pages 1104b411b363SPhilipp Reisner */ 1105*19f843aaSLars Ellenberg int drbd_bm_write_lazy(struct drbd_conf *mdev, unsigned upper_idx) __must_hold(local) 1106b411b363SPhilipp Reisner { 1107*19f843aaSLars Ellenberg return bm_rw(mdev, WRITE, upper_idx); 1108b411b363SPhilipp Reisner } 1109*19f843aaSLars Ellenberg 1110*19f843aaSLars Ellenberg 1111*19f843aaSLars Ellenberg /** 1112*19f843aaSLars Ellenberg * drbd_bm_write_page: Writes a PAGE_SIZE aligned piece of bitmap 1113*19f843aaSLars Ellenberg * @mdev: DRBD device. 1114*19f843aaSLars Ellenberg * @idx: bitmap page index 1115*19f843aaSLars Ellenberg * 1116*19f843aaSLars Ellenberg * We don't want to special case on logical_block_size of the underlaying 1117*19f843aaSLars Ellenberg * device, so we submit PAGE_SIZE aligned pieces containing the requested enr. 1118*19f843aaSLars Ellenberg * Note that on "most" systems, PAGE_SIZE is 4k. 1119*19f843aaSLars Ellenberg */ 1120*19f843aaSLars Ellenberg int drbd_bm_write_page(struct drbd_conf *mdev, unsigned int idx) __must_hold(local) 1121*19f843aaSLars Ellenberg { 1122*19f843aaSLars Ellenberg struct bm_aio_ctx ctx = { .flags = BM_AIO_COPY_PAGES, }; 1123*19f843aaSLars Ellenberg 1124*19f843aaSLars Ellenberg if (bm_test_page_unchanged(mdev->bitmap->bm_pages[idx])) { 1125*19f843aaSLars Ellenberg dev_info(DEV, "skipped bm page write for idx %u\n", idx); 1126*19f843aaSLars Ellenberg return 0; 1127*19f843aaSLars Ellenberg } 1128*19f843aaSLars Ellenberg 1129*19f843aaSLars Ellenberg ctx.mdev = mdev; 1130*19f843aaSLars Ellenberg atomic_set(&ctx.in_flight, 1); 1131*19f843aaSLars Ellenberg init_waitqueue_head(&ctx.io_wait); 1132*19f843aaSLars Ellenberg 1133*19f843aaSLars Ellenberg bm_page_io_async(&ctx, idx, WRITE_SYNC); 1134*19f843aaSLars Ellenberg wait_event(ctx.io_wait, atomic_read(&ctx.in_flight) == 0); 1135*19f843aaSLars Ellenberg 1136*19f843aaSLars Ellenberg if (ctx.error) 1137*19f843aaSLars Ellenberg drbd_chk_io_error(mdev, 1, true); 1138*19f843aaSLars Ellenberg /* that should force detach, so the in memory bitmap will be 1139*19f843aaSLars Ellenberg * gone in a moment as well. */ 1140*19f843aaSLars Ellenberg 1141b411b363SPhilipp Reisner mdev->bm_writ_cnt++; 1142*19f843aaSLars Ellenberg return ctx.error; 1143b411b363SPhilipp Reisner } 1144b411b363SPhilipp Reisner 1145b411b363SPhilipp Reisner /* NOTE 1146b411b363SPhilipp Reisner * find_first_bit returns int, we return unsigned long. 1147b411b363SPhilipp Reisner * should not make much difference anyways, but ... 1148b411b363SPhilipp Reisner * 1149b411b363SPhilipp Reisner * this returns a bit number, NOT a sector! 1150b411b363SPhilipp Reisner */ 1151b411b363SPhilipp Reisner #define BPP_MASK ((1UL << (PAGE_SHIFT+3)) - 1) 1152b411b363SPhilipp Reisner static unsigned long __bm_find_next(struct drbd_conf *mdev, unsigned long bm_fo, 1153b411b363SPhilipp Reisner const int find_zero_bit, const enum km_type km) 1154b411b363SPhilipp Reisner { 1155b411b363SPhilipp Reisner struct drbd_bitmap *b = mdev->bitmap; 1156b411b363SPhilipp Reisner unsigned long i = -1UL; 1157b411b363SPhilipp Reisner unsigned long *p_addr; 1158b411b363SPhilipp Reisner unsigned long bit_offset; /* bit offset of the mapped page. */ 1159b411b363SPhilipp Reisner 1160b411b363SPhilipp Reisner if (bm_fo > b->bm_bits) { 1161b411b363SPhilipp Reisner dev_err(DEV, "bm_fo=%lu bm_bits=%lu\n", bm_fo, b->bm_bits); 1162b411b363SPhilipp Reisner } else { 1163b411b363SPhilipp Reisner while (bm_fo < b->bm_bits) { 1164*19f843aaSLars Ellenberg /* bit offset of the first bit in the page */ 1165*19f843aaSLars Ellenberg bit_offset = bm_fo & ~BPP_MASK; 1166*19f843aaSLars Ellenberg p_addr = __bm_map_pidx(b, bm_bit_to_page_idx(b, bm_fo), km); 1167b411b363SPhilipp Reisner 1168b411b363SPhilipp Reisner if (find_zero_bit) 116995a0f10cSLars Ellenberg i = generic_find_next_zero_le_bit(p_addr, PAGE_SIZE*8, bm_fo & BPP_MASK); 1170b411b363SPhilipp Reisner else 117195a0f10cSLars Ellenberg i = generic_find_next_le_bit(p_addr, PAGE_SIZE*8, bm_fo & BPP_MASK); 1172b411b363SPhilipp Reisner 1173b411b363SPhilipp Reisner __bm_unmap(p_addr, km); 1174b411b363SPhilipp Reisner if (i < PAGE_SIZE*8) { 1175b411b363SPhilipp Reisner i = bit_offset + i; 1176b411b363SPhilipp Reisner if (i >= b->bm_bits) 1177b411b363SPhilipp Reisner break; 1178b411b363SPhilipp Reisner goto found; 1179b411b363SPhilipp Reisner } 1180b411b363SPhilipp Reisner bm_fo = bit_offset + PAGE_SIZE*8; 1181b411b363SPhilipp Reisner } 1182b411b363SPhilipp Reisner i = -1UL; 1183b411b363SPhilipp Reisner } 1184b411b363SPhilipp Reisner found: 1185b411b363SPhilipp Reisner return i; 1186b411b363SPhilipp Reisner } 1187b411b363SPhilipp Reisner 1188b411b363SPhilipp Reisner static unsigned long bm_find_next(struct drbd_conf *mdev, 1189b411b363SPhilipp Reisner unsigned long bm_fo, const int find_zero_bit) 1190b411b363SPhilipp Reisner { 1191b411b363SPhilipp Reisner struct drbd_bitmap *b = mdev->bitmap; 1192b411b363SPhilipp Reisner unsigned long i = -1UL; 1193b411b363SPhilipp Reisner 1194b411b363SPhilipp Reisner ERR_IF(!b) return i; 1195b411b363SPhilipp Reisner ERR_IF(!b->bm_pages) return i; 1196b411b363SPhilipp Reisner 1197b411b363SPhilipp Reisner spin_lock_irq(&b->bm_lock); 1198b411b363SPhilipp Reisner if (bm_is_locked(b)) 1199b411b363SPhilipp Reisner bm_print_lock_info(mdev); 1200b411b363SPhilipp Reisner 1201b411b363SPhilipp Reisner i = __bm_find_next(mdev, bm_fo, find_zero_bit, KM_IRQ1); 1202b411b363SPhilipp Reisner 1203b411b363SPhilipp Reisner spin_unlock_irq(&b->bm_lock); 1204b411b363SPhilipp Reisner return i; 1205b411b363SPhilipp Reisner } 1206b411b363SPhilipp Reisner 1207b411b363SPhilipp Reisner unsigned long drbd_bm_find_next(struct drbd_conf *mdev, unsigned long bm_fo) 1208b411b363SPhilipp Reisner { 1209b411b363SPhilipp Reisner return bm_find_next(mdev, bm_fo, 0); 1210b411b363SPhilipp Reisner } 1211b411b363SPhilipp Reisner 1212b411b363SPhilipp Reisner #if 0 1213b411b363SPhilipp Reisner /* not yet needed for anything. */ 1214b411b363SPhilipp Reisner unsigned long drbd_bm_find_next_zero(struct drbd_conf *mdev, unsigned long bm_fo) 1215b411b363SPhilipp Reisner { 1216b411b363SPhilipp Reisner return bm_find_next(mdev, bm_fo, 1); 1217b411b363SPhilipp Reisner } 1218b411b363SPhilipp Reisner #endif 1219b411b363SPhilipp Reisner 1220b411b363SPhilipp Reisner /* does not spin_lock_irqsave. 1221b411b363SPhilipp Reisner * you must take drbd_bm_lock() first */ 1222b411b363SPhilipp Reisner unsigned long _drbd_bm_find_next(struct drbd_conf *mdev, unsigned long bm_fo) 1223b411b363SPhilipp Reisner { 1224b411b363SPhilipp Reisner /* WARN_ON(!bm_is_locked(mdev)); */ 1225b411b363SPhilipp Reisner return __bm_find_next(mdev, bm_fo, 0, KM_USER1); 1226b411b363SPhilipp Reisner } 1227b411b363SPhilipp Reisner 1228b411b363SPhilipp Reisner unsigned long _drbd_bm_find_next_zero(struct drbd_conf *mdev, unsigned long bm_fo) 1229b411b363SPhilipp Reisner { 1230b411b363SPhilipp Reisner /* WARN_ON(!bm_is_locked(mdev)); */ 1231b411b363SPhilipp Reisner return __bm_find_next(mdev, bm_fo, 1, KM_USER1); 1232b411b363SPhilipp Reisner } 1233b411b363SPhilipp Reisner 1234b411b363SPhilipp Reisner /* returns number of bits actually changed. 1235b411b363SPhilipp Reisner * for val != 0, we change 0 -> 1, return code positive 1236b411b363SPhilipp Reisner * for val == 0, we change 1 -> 0, return code negative 1237b411b363SPhilipp Reisner * wants bitnr, not sector. 1238b411b363SPhilipp Reisner * expected to be called for only a few bits (e - s about BITS_PER_LONG). 1239b411b363SPhilipp Reisner * Must hold bitmap lock already. */ 1240b4ee79daSPhilipp Reisner static int __bm_change_bits_to(struct drbd_conf *mdev, const unsigned long s, 1241b411b363SPhilipp Reisner unsigned long e, int val, const enum km_type km) 1242b411b363SPhilipp Reisner { 1243b411b363SPhilipp Reisner struct drbd_bitmap *b = mdev->bitmap; 1244b411b363SPhilipp Reisner unsigned long *p_addr = NULL; 1245b411b363SPhilipp Reisner unsigned long bitnr; 1246*19f843aaSLars Ellenberg unsigned int last_page_nr = -1U; 1247b411b363SPhilipp Reisner int c = 0; 1248*19f843aaSLars Ellenberg int changed_total = 0; 1249b411b363SPhilipp Reisner 1250b411b363SPhilipp Reisner if (e >= b->bm_bits) { 1251b411b363SPhilipp Reisner dev_err(DEV, "ASSERT FAILED: bit_s=%lu bit_e=%lu bm_bits=%lu\n", 1252b411b363SPhilipp Reisner s, e, b->bm_bits); 1253b411b363SPhilipp Reisner e = b->bm_bits ? b->bm_bits -1 : 0; 1254b411b363SPhilipp Reisner } 1255b411b363SPhilipp Reisner for (bitnr = s; bitnr <= e; bitnr++) { 1256*19f843aaSLars Ellenberg unsigned int page_nr = bm_bit_to_page_idx(b, bitnr); 1257b411b363SPhilipp Reisner if (page_nr != last_page_nr) { 1258b411b363SPhilipp Reisner if (p_addr) 1259b411b363SPhilipp Reisner __bm_unmap(p_addr, km); 1260*19f843aaSLars Ellenberg if (c < 0) 1261*19f843aaSLars Ellenberg bm_set_page_lazy_writeout(b->bm_pages[last_page_nr]); 1262*19f843aaSLars Ellenberg else if (c > 0) 1263*19f843aaSLars Ellenberg bm_set_page_need_writeout(b->bm_pages[last_page_nr]); 1264*19f843aaSLars Ellenberg changed_total += c; 1265*19f843aaSLars Ellenberg c = 0; 1266*19f843aaSLars Ellenberg p_addr = __bm_map_pidx(b, page_nr, km); 1267b411b363SPhilipp Reisner last_page_nr = page_nr; 1268b411b363SPhilipp Reisner } 1269b411b363SPhilipp Reisner if (val) 127095a0f10cSLars Ellenberg c += (0 == generic___test_and_set_le_bit(bitnr & BPP_MASK, p_addr)); 1271b411b363SPhilipp Reisner else 127295a0f10cSLars Ellenberg c -= (0 != generic___test_and_clear_le_bit(bitnr & BPP_MASK, p_addr)); 1273b411b363SPhilipp Reisner } 1274b411b363SPhilipp Reisner if (p_addr) 1275b411b363SPhilipp Reisner __bm_unmap(p_addr, km); 1276*19f843aaSLars Ellenberg if (c < 0) 1277*19f843aaSLars Ellenberg bm_set_page_lazy_writeout(b->bm_pages[last_page_nr]); 1278*19f843aaSLars Ellenberg else if (c > 0) 1279*19f843aaSLars Ellenberg bm_set_page_need_writeout(b->bm_pages[last_page_nr]); 1280*19f843aaSLars Ellenberg changed_total += c; 1281*19f843aaSLars Ellenberg b->bm_set += changed_total; 1282*19f843aaSLars Ellenberg return changed_total; 1283b411b363SPhilipp Reisner } 1284b411b363SPhilipp Reisner 1285b411b363SPhilipp Reisner /* returns number of bits actually changed. 1286b411b363SPhilipp Reisner * for val != 0, we change 0 -> 1, return code positive 1287b411b363SPhilipp Reisner * for val == 0, we change 1 -> 0, return code negative 1288b411b363SPhilipp Reisner * wants bitnr, not sector */ 1289b4ee79daSPhilipp Reisner static int bm_change_bits_to(struct drbd_conf *mdev, const unsigned long s, 1290b411b363SPhilipp Reisner const unsigned long e, int val) 1291b411b363SPhilipp Reisner { 1292b411b363SPhilipp Reisner unsigned long flags; 1293b411b363SPhilipp Reisner struct drbd_bitmap *b = mdev->bitmap; 1294b411b363SPhilipp Reisner int c = 0; 1295b411b363SPhilipp Reisner 1296b411b363SPhilipp Reisner ERR_IF(!b) return 1; 1297b411b363SPhilipp Reisner ERR_IF(!b->bm_pages) return 0; 1298b411b363SPhilipp Reisner 1299b411b363SPhilipp Reisner spin_lock_irqsave(&b->bm_lock, flags); 1300b411b363SPhilipp Reisner if (bm_is_locked(b)) 1301b411b363SPhilipp Reisner bm_print_lock_info(mdev); 1302b411b363SPhilipp Reisner 1303b411b363SPhilipp Reisner c = __bm_change_bits_to(mdev, s, e, val, KM_IRQ1); 1304b411b363SPhilipp Reisner 1305b411b363SPhilipp Reisner spin_unlock_irqrestore(&b->bm_lock, flags); 1306b411b363SPhilipp Reisner return c; 1307b411b363SPhilipp Reisner } 1308b411b363SPhilipp Reisner 1309b411b363SPhilipp Reisner /* returns number of bits changed 0 -> 1 */ 1310b411b363SPhilipp Reisner int drbd_bm_set_bits(struct drbd_conf *mdev, const unsigned long s, const unsigned long e) 1311b411b363SPhilipp Reisner { 1312b411b363SPhilipp Reisner return bm_change_bits_to(mdev, s, e, 1); 1313b411b363SPhilipp Reisner } 1314b411b363SPhilipp Reisner 1315b411b363SPhilipp Reisner /* returns number of bits changed 1 -> 0 */ 1316b411b363SPhilipp Reisner int drbd_bm_clear_bits(struct drbd_conf *mdev, const unsigned long s, const unsigned long e) 1317b411b363SPhilipp Reisner { 1318b411b363SPhilipp Reisner return -bm_change_bits_to(mdev, s, e, 0); 1319b411b363SPhilipp Reisner } 1320b411b363SPhilipp Reisner 1321b411b363SPhilipp Reisner /* sets all bits in full words, 1322b411b363SPhilipp Reisner * from first_word up to, but not including, last_word */ 1323b411b363SPhilipp Reisner static inline void bm_set_full_words_within_one_page(struct drbd_bitmap *b, 1324b411b363SPhilipp Reisner int page_nr, int first_word, int last_word) 1325b411b363SPhilipp Reisner { 1326b411b363SPhilipp Reisner int i; 1327b411b363SPhilipp Reisner int bits; 1328b411b363SPhilipp Reisner unsigned long *paddr = kmap_atomic(b->bm_pages[page_nr], KM_USER0); 1329b411b363SPhilipp Reisner for (i = first_word; i < last_word; i++) { 1330b411b363SPhilipp Reisner bits = hweight_long(paddr[i]); 1331b411b363SPhilipp Reisner paddr[i] = ~0UL; 1332b411b363SPhilipp Reisner b->bm_set += BITS_PER_LONG - bits; 1333b411b363SPhilipp Reisner } 1334b411b363SPhilipp Reisner kunmap_atomic(paddr, KM_USER0); 1335b411b363SPhilipp Reisner } 1336b411b363SPhilipp Reisner 1337b411b363SPhilipp Reisner /* Same thing as drbd_bm_set_bits, but without taking the spin_lock_irqsave. 1338b411b363SPhilipp Reisner * You must first drbd_bm_lock(). 1339b411b363SPhilipp Reisner * Can be called to set the whole bitmap in one go. 1340b411b363SPhilipp Reisner * Sets bits from s to e _inclusive_. */ 1341b411b363SPhilipp Reisner void _drbd_bm_set_bits(struct drbd_conf *mdev, const unsigned long s, const unsigned long e) 1342b411b363SPhilipp Reisner { 1343b411b363SPhilipp Reisner /* First set_bit from the first bit (s) 1344b411b363SPhilipp Reisner * up to the next long boundary (sl), 1345b411b363SPhilipp Reisner * then assign full words up to the last long boundary (el), 1346b411b363SPhilipp Reisner * then set_bit up to and including the last bit (e). 1347b411b363SPhilipp Reisner * 1348b411b363SPhilipp Reisner * Do not use memset, because we must account for changes, 1349b411b363SPhilipp Reisner * so we need to loop over the words with hweight() anyways. 1350b411b363SPhilipp Reisner */ 1351b411b363SPhilipp Reisner unsigned long sl = ALIGN(s,BITS_PER_LONG); 1352b411b363SPhilipp Reisner unsigned long el = (e+1) & ~((unsigned long)BITS_PER_LONG-1); 1353b411b363SPhilipp Reisner int first_page; 1354b411b363SPhilipp Reisner int last_page; 1355b411b363SPhilipp Reisner int page_nr; 1356b411b363SPhilipp Reisner int first_word; 1357b411b363SPhilipp Reisner int last_word; 1358b411b363SPhilipp Reisner 1359b411b363SPhilipp Reisner if (e - s <= 3*BITS_PER_LONG) { 1360b411b363SPhilipp Reisner /* don't bother; el and sl may even be wrong. */ 1361b411b363SPhilipp Reisner __bm_change_bits_to(mdev, s, e, 1, KM_USER0); 1362b411b363SPhilipp Reisner return; 1363b411b363SPhilipp Reisner } 1364b411b363SPhilipp Reisner 1365b411b363SPhilipp Reisner /* difference is large enough that we can trust sl and el */ 1366b411b363SPhilipp Reisner 1367b411b363SPhilipp Reisner /* bits filling the current long */ 1368b411b363SPhilipp Reisner if (sl) 1369b411b363SPhilipp Reisner __bm_change_bits_to(mdev, s, sl-1, 1, KM_USER0); 1370b411b363SPhilipp Reisner 1371b411b363SPhilipp Reisner first_page = sl >> (3 + PAGE_SHIFT); 1372b411b363SPhilipp Reisner last_page = el >> (3 + PAGE_SHIFT); 1373b411b363SPhilipp Reisner 1374b411b363SPhilipp Reisner /* MLPP: modulo longs per page */ 1375b411b363SPhilipp Reisner /* LWPP: long words per page */ 1376b411b363SPhilipp Reisner first_word = MLPP(sl >> LN2_BPL); 1377b411b363SPhilipp Reisner last_word = LWPP; 1378b411b363SPhilipp Reisner 1379b411b363SPhilipp Reisner /* first and full pages, unless first page == last page */ 1380b411b363SPhilipp Reisner for (page_nr = first_page; page_nr < last_page; page_nr++) { 1381b411b363SPhilipp Reisner bm_set_full_words_within_one_page(mdev->bitmap, page_nr, first_word, last_word); 1382b411b363SPhilipp Reisner cond_resched(); 1383b411b363SPhilipp Reisner first_word = 0; 1384b411b363SPhilipp Reisner } 1385b411b363SPhilipp Reisner 1386b411b363SPhilipp Reisner /* last page (respectively only page, for first page == last page) */ 1387b411b363SPhilipp Reisner last_word = MLPP(el >> LN2_BPL); 1388b411b363SPhilipp Reisner bm_set_full_words_within_one_page(mdev->bitmap, last_page, first_word, last_word); 1389b411b363SPhilipp Reisner 1390b411b363SPhilipp Reisner /* possibly trailing bits. 1391b411b363SPhilipp Reisner * example: (e & 63) == 63, el will be e+1. 1392b411b363SPhilipp Reisner * if that even was the very last bit, 1393b411b363SPhilipp Reisner * it would trigger an assert in __bm_change_bits_to() 1394b411b363SPhilipp Reisner */ 1395b411b363SPhilipp Reisner if (el <= e) 1396b411b363SPhilipp Reisner __bm_change_bits_to(mdev, el, e, 1, KM_USER0); 1397b411b363SPhilipp Reisner } 1398b411b363SPhilipp Reisner 1399b411b363SPhilipp Reisner /* returns bit state 1400b411b363SPhilipp Reisner * wants bitnr, NOT sector. 1401b411b363SPhilipp Reisner * inherently racy... area needs to be locked by means of {al,rs}_lru 1402b411b363SPhilipp Reisner * 1 ... bit set 1403b411b363SPhilipp Reisner * 0 ... bit not set 1404b411b363SPhilipp Reisner * -1 ... first out of bounds access, stop testing for bits! 1405b411b363SPhilipp Reisner */ 1406b411b363SPhilipp Reisner int drbd_bm_test_bit(struct drbd_conf *mdev, const unsigned long bitnr) 1407b411b363SPhilipp Reisner { 1408b411b363SPhilipp Reisner unsigned long flags; 1409b411b363SPhilipp Reisner struct drbd_bitmap *b = mdev->bitmap; 1410b411b363SPhilipp Reisner unsigned long *p_addr; 1411b411b363SPhilipp Reisner int i; 1412b411b363SPhilipp Reisner 1413b411b363SPhilipp Reisner ERR_IF(!b) return 0; 1414b411b363SPhilipp Reisner ERR_IF(!b->bm_pages) return 0; 1415b411b363SPhilipp Reisner 1416b411b363SPhilipp Reisner spin_lock_irqsave(&b->bm_lock, flags); 1417b411b363SPhilipp Reisner if (bm_is_locked(b)) 1418b411b363SPhilipp Reisner bm_print_lock_info(mdev); 1419b411b363SPhilipp Reisner if (bitnr < b->bm_bits) { 1420*19f843aaSLars Ellenberg p_addr = bm_map_pidx(b, bm_bit_to_page_idx(b, bitnr)); 142195a0f10cSLars Ellenberg i = generic_test_le_bit(bitnr & BPP_MASK, p_addr) ? 1 : 0; 1422b411b363SPhilipp Reisner bm_unmap(p_addr); 1423b411b363SPhilipp Reisner } else if (bitnr == b->bm_bits) { 1424b411b363SPhilipp Reisner i = -1; 1425b411b363SPhilipp Reisner } else { /* (bitnr > b->bm_bits) */ 1426b411b363SPhilipp Reisner dev_err(DEV, "bitnr=%lu > bm_bits=%lu\n", bitnr, b->bm_bits); 1427b411b363SPhilipp Reisner i = 0; 1428b411b363SPhilipp Reisner } 1429b411b363SPhilipp Reisner 1430b411b363SPhilipp Reisner spin_unlock_irqrestore(&b->bm_lock, flags); 1431b411b363SPhilipp Reisner return i; 1432b411b363SPhilipp Reisner } 1433b411b363SPhilipp Reisner 1434b411b363SPhilipp Reisner /* returns number of bits set in the range [s, e] */ 1435b411b363SPhilipp Reisner int drbd_bm_count_bits(struct drbd_conf *mdev, const unsigned long s, const unsigned long e) 1436b411b363SPhilipp Reisner { 1437b411b363SPhilipp Reisner unsigned long flags; 1438b411b363SPhilipp Reisner struct drbd_bitmap *b = mdev->bitmap; 1439*19f843aaSLars Ellenberg unsigned long *p_addr = NULL; 1440b411b363SPhilipp Reisner unsigned long bitnr; 1441*19f843aaSLars Ellenberg unsigned int page_nr = -1U; 1442b411b363SPhilipp Reisner int c = 0; 1443b411b363SPhilipp Reisner 1444b411b363SPhilipp Reisner /* If this is called without a bitmap, that is a bug. But just to be 1445b411b363SPhilipp Reisner * robust in case we screwed up elsewhere, in that case pretend there 1446b411b363SPhilipp Reisner * was one dirty bit in the requested area, so we won't try to do a 1447b411b363SPhilipp Reisner * local read there (no bitmap probably implies no disk) */ 1448b411b363SPhilipp Reisner ERR_IF(!b) return 1; 1449b411b363SPhilipp Reisner ERR_IF(!b->bm_pages) return 1; 1450b411b363SPhilipp Reisner 1451b411b363SPhilipp Reisner spin_lock_irqsave(&b->bm_lock, flags); 1452b411b363SPhilipp Reisner if (bm_is_locked(b)) 1453b411b363SPhilipp Reisner bm_print_lock_info(mdev); 1454b411b363SPhilipp Reisner for (bitnr = s; bitnr <= e; bitnr++) { 1455*19f843aaSLars Ellenberg unsigned int idx = bm_bit_to_page_idx(b, bitnr); 1456*19f843aaSLars Ellenberg if (page_nr != idx) { 1457*19f843aaSLars Ellenberg page_nr = idx; 1458b411b363SPhilipp Reisner if (p_addr) 1459b411b363SPhilipp Reisner bm_unmap(p_addr); 1460*19f843aaSLars Ellenberg p_addr = bm_map_pidx(b, idx); 1461b411b363SPhilipp Reisner } 1462b411b363SPhilipp Reisner ERR_IF (bitnr >= b->bm_bits) { 1463b411b363SPhilipp Reisner dev_err(DEV, "bitnr=%lu bm_bits=%lu\n", bitnr, b->bm_bits); 1464b411b363SPhilipp Reisner } else { 146595a0f10cSLars Ellenberg c += (0 != generic_test_le_bit(bitnr - (page_nr << (PAGE_SHIFT+3)), p_addr)); 1466b411b363SPhilipp Reisner } 1467b411b363SPhilipp Reisner } 1468b411b363SPhilipp Reisner if (p_addr) 1469b411b363SPhilipp Reisner bm_unmap(p_addr); 1470b411b363SPhilipp Reisner spin_unlock_irqrestore(&b->bm_lock, flags); 1471b411b363SPhilipp Reisner return c; 1472b411b363SPhilipp Reisner } 1473b411b363SPhilipp Reisner 1474b411b363SPhilipp Reisner 1475b411b363SPhilipp Reisner /* inherently racy... 1476b411b363SPhilipp Reisner * return value may be already out-of-date when this function returns. 1477b411b363SPhilipp Reisner * but the general usage is that this is only use during a cstate when bits are 1478b411b363SPhilipp Reisner * only cleared, not set, and typically only care for the case when the return 1479b411b363SPhilipp Reisner * value is zero, or we already "locked" this "bitmap extent" by other means. 1480b411b363SPhilipp Reisner * 1481b411b363SPhilipp Reisner * enr is bm-extent number, since we chose to name one sector (512 bytes) 1482b411b363SPhilipp Reisner * worth of the bitmap a "bitmap extent". 1483b411b363SPhilipp Reisner * 1484b411b363SPhilipp Reisner * TODO 1485b411b363SPhilipp Reisner * I think since we use it like a reference count, we should use the real 1486b411b363SPhilipp Reisner * reference count of some bitmap extent element from some lru instead... 1487b411b363SPhilipp Reisner * 1488b411b363SPhilipp Reisner */ 1489b411b363SPhilipp Reisner int drbd_bm_e_weight(struct drbd_conf *mdev, unsigned long enr) 1490b411b363SPhilipp Reisner { 1491b411b363SPhilipp Reisner struct drbd_bitmap *b = mdev->bitmap; 1492b411b363SPhilipp Reisner int count, s, e; 1493b411b363SPhilipp Reisner unsigned long flags; 1494b411b363SPhilipp Reisner unsigned long *p_addr, *bm; 1495b411b363SPhilipp Reisner 1496b411b363SPhilipp Reisner ERR_IF(!b) return 0; 1497b411b363SPhilipp Reisner ERR_IF(!b->bm_pages) return 0; 1498b411b363SPhilipp Reisner 1499b411b363SPhilipp Reisner spin_lock_irqsave(&b->bm_lock, flags); 1500b411b363SPhilipp Reisner if (bm_is_locked(b)) 1501b411b363SPhilipp Reisner bm_print_lock_info(mdev); 1502b411b363SPhilipp Reisner 1503b411b363SPhilipp Reisner s = S2W(enr); 1504b411b363SPhilipp Reisner e = min((size_t)S2W(enr+1), b->bm_words); 1505b411b363SPhilipp Reisner count = 0; 1506b411b363SPhilipp Reisner if (s < b->bm_words) { 1507b411b363SPhilipp Reisner int n = e-s; 1508*19f843aaSLars Ellenberg p_addr = bm_map_pidx(b, bm_word_to_page_idx(b, s)); 1509b411b363SPhilipp Reisner bm = p_addr + MLPP(s); 1510b411b363SPhilipp Reisner while (n--) 1511b411b363SPhilipp Reisner count += hweight_long(*bm++); 1512b411b363SPhilipp Reisner bm_unmap(p_addr); 1513b411b363SPhilipp Reisner } else { 1514b411b363SPhilipp Reisner dev_err(DEV, "start offset (%d) too large in drbd_bm_e_weight\n", s); 1515b411b363SPhilipp Reisner } 1516b411b363SPhilipp Reisner spin_unlock_irqrestore(&b->bm_lock, flags); 1517b411b363SPhilipp Reisner return count; 1518b411b363SPhilipp Reisner } 1519b411b363SPhilipp Reisner 1520b411b363SPhilipp Reisner /* set all bits covered by the AL-extent al_enr */ 1521b411b363SPhilipp Reisner unsigned long drbd_bm_ALe_set_all(struct drbd_conf *mdev, unsigned long al_enr) 1522b411b363SPhilipp Reisner { 1523b411b363SPhilipp Reisner struct drbd_bitmap *b = mdev->bitmap; 1524b411b363SPhilipp Reisner unsigned long *p_addr, *bm; 1525b411b363SPhilipp Reisner unsigned long weight; 1526b411b363SPhilipp Reisner int count, s, e, i, do_now; 1527b411b363SPhilipp Reisner ERR_IF(!b) return 0; 1528b411b363SPhilipp Reisner ERR_IF(!b->bm_pages) return 0; 1529b411b363SPhilipp Reisner 1530b411b363SPhilipp Reisner spin_lock_irq(&b->bm_lock); 1531b411b363SPhilipp Reisner if (bm_is_locked(b)) 1532b411b363SPhilipp Reisner bm_print_lock_info(mdev); 1533b411b363SPhilipp Reisner weight = b->bm_set; 1534b411b363SPhilipp Reisner 1535b411b363SPhilipp Reisner s = al_enr * BM_WORDS_PER_AL_EXT; 1536b411b363SPhilipp Reisner e = min_t(size_t, s + BM_WORDS_PER_AL_EXT, b->bm_words); 1537b411b363SPhilipp Reisner /* assert that s and e are on the same page */ 1538b411b363SPhilipp Reisner D_ASSERT((e-1) >> (PAGE_SHIFT - LN2_BPL + 3) 1539b411b363SPhilipp Reisner == s >> (PAGE_SHIFT - LN2_BPL + 3)); 1540b411b363SPhilipp Reisner count = 0; 1541b411b363SPhilipp Reisner if (s < b->bm_words) { 1542b411b363SPhilipp Reisner i = do_now = e-s; 1543*19f843aaSLars Ellenberg p_addr = bm_map_pidx(b, bm_word_to_page_idx(b, s)); 1544b411b363SPhilipp Reisner bm = p_addr + MLPP(s); 1545b411b363SPhilipp Reisner while (i--) { 1546b411b363SPhilipp Reisner count += hweight_long(*bm); 1547b411b363SPhilipp Reisner *bm = -1UL; 1548b411b363SPhilipp Reisner bm++; 1549b411b363SPhilipp Reisner } 1550b411b363SPhilipp Reisner bm_unmap(p_addr); 1551b411b363SPhilipp Reisner b->bm_set += do_now*BITS_PER_LONG - count; 1552b411b363SPhilipp Reisner if (e == b->bm_words) 1553b411b363SPhilipp Reisner b->bm_set -= bm_clear_surplus(b); 1554b411b363SPhilipp Reisner } else { 1555b411b363SPhilipp Reisner dev_err(DEV, "start offset (%d) too large in drbd_bm_ALe_set_all\n", s); 1556b411b363SPhilipp Reisner } 1557b411b363SPhilipp Reisner weight = b->bm_set - weight; 1558b411b363SPhilipp Reisner spin_unlock_irq(&b->bm_lock); 1559b411b363SPhilipp Reisner return weight; 1560b411b363SPhilipp Reisner } 1561