193c68cc4SChristoph Böhmwalder // SPDX-License-Identifier: GPL-2.0-only 2b411b363SPhilipp Reisner /* 3b411b363SPhilipp Reisner drbd_bitmap.c 4b411b363SPhilipp Reisner 5b411b363SPhilipp Reisner This file is part of DRBD by Philipp Reisner and Lars Ellenberg. 6b411b363SPhilipp Reisner 7b411b363SPhilipp Reisner Copyright (C) 2004-2008, LINBIT Information Technologies GmbH. 8b411b363SPhilipp Reisner Copyright (C) 2004-2008, Philipp Reisner <philipp.reisner@linbit.com>. 9b411b363SPhilipp Reisner Copyright (C) 2004-2008, Lars Ellenberg <lars.ellenberg@linbit.com>. 10b411b363SPhilipp Reisner 11b411b363SPhilipp Reisner */ 12b411b363SPhilipp Reisner 13f88c5d90SLars Ellenberg #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 14f88c5d90SLars Ellenberg 155fb3bc4dSLars Ellenberg #include <linux/bitmap.h> 16b411b363SPhilipp Reisner #include <linux/vmalloc.h> 17b411b363SPhilipp Reisner #include <linux/string.h> 18b411b363SPhilipp Reisner #include <linux/drbd.h> 195a0e3ad6STejun Heo #include <linux/slab.h> 20dbcbdc43SChristoph Hellwig #include <linux/highmem.h> 21f0ff1357SStephen Rothwell 22b411b363SPhilipp Reisner #include "drbd_int.h" 23b411b363SPhilipp Reisner 2495a0f10cSLars Ellenberg 25b411b363SPhilipp Reisner /* OPAQUE outside this file! 26b411b363SPhilipp Reisner * interface defined in drbd_int.h 27b411b363SPhilipp Reisner 28b411b363SPhilipp Reisner * convention: 29b411b363SPhilipp Reisner * function name drbd_bm_... => used elsewhere, "public". 30b411b363SPhilipp Reisner * function name bm_... => internal to implementation, "private". 314b0715f0SLars Ellenberg */ 32b411b363SPhilipp Reisner 334b0715f0SLars Ellenberg 344b0715f0SLars Ellenberg /* 354b0715f0SLars Ellenberg * LIMITATIONS: 364b0715f0SLars Ellenberg * We want to support >= peta byte of backend storage, while for now still using 374b0715f0SLars Ellenberg * a granularity of one bit per 4KiB of storage. 384b0715f0SLars Ellenberg * 1 << 50 bytes backend storage (1 PiB) 394b0715f0SLars Ellenberg * 1 << (50 - 12) bits needed 404b0715f0SLars Ellenberg * 38 --> we need u64 to index and count bits 414b0715f0SLars Ellenberg * 1 << (38 - 3) bitmap bytes needed 424b0715f0SLars Ellenberg * 35 --> we still need u64 to index and count bytes 434b0715f0SLars Ellenberg * (that's 32 GiB of bitmap for 1 PiB storage) 444b0715f0SLars Ellenberg * 1 << (35 - 2) 32bit longs needed 454b0715f0SLars Ellenberg * 33 --> we'd even need u64 to index and count 32bit long words. 464b0715f0SLars Ellenberg * 1 << (35 - 3) 64bit longs needed 474b0715f0SLars Ellenberg * 32 --> we could get away with a 32bit unsigned int to index and count 484b0715f0SLars Ellenberg * 64bit long words, but I rather stay with unsigned long for now. 494b0715f0SLars Ellenberg * We probably should neither count nor point to bytes or long words 504b0715f0SLars Ellenberg * directly, but either by bitnumber, or by page index and offset. 514b0715f0SLars Ellenberg * 1 << (35 - 12) 524b0715f0SLars Ellenberg * 22 --> we need that much 4KiB pages of bitmap. 534b0715f0SLars Ellenberg * 1 << (22 + 3) --> on a 64bit arch, 544b0715f0SLars Ellenberg * we need 32 MiB to store the array of page pointers. 554b0715f0SLars Ellenberg * 564b0715f0SLars Ellenberg * Because I'm lazy, and because the resulting patch was too large, too ugly 574b0715f0SLars Ellenberg * and still incomplete, on 32bit we still "only" support 16 TiB (minus some), 584b0715f0SLars Ellenberg * (1 << 32) bits * 4k storage. 594b0715f0SLars Ellenberg * 604b0715f0SLars Ellenberg 614b0715f0SLars Ellenberg * bitmap storage and IO: 624b0715f0SLars Ellenberg * Bitmap is stored little endian on disk, and is kept little endian in 634b0715f0SLars Ellenberg * core memory. Currently we still hold the full bitmap in core as long 644b0715f0SLars Ellenberg * as we are "attached" to a local disk, which at 32 GiB for 1PiB storage 654b0715f0SLars Ellenberg * seems excessive. 664b0715f0SLars Ellenberg * 6724c4830cSBart Van Assche * We plan to reduce the amount of in-core bitmap pages by paging them in 684b0715f0SLars Ellenberg * and out against their on-disk location as necessary, but need to make 694b0715f0SLars Ellenberg * sure we don't cause too much meta data IO, and must not deadlock in 704b0715f0SLars Ellenberg * tight memory situations. This needs some more work. 71b411b363SPhilipp Reisner */ 72b411b363SPhilipp Reisner 73b411b363SPhilipp Reisner /* 74b411b363SPhilipp Reisner * NOTE 75b411b363SPhilipp Reisner * Access to the *bm_pages is protected by bm_lock. 76b411b363SPhilipp Reisner * It is safe to read the other members within the lock. 77b411b363SPhilipp Reisner * 78b411b363SPhilipp Reisner * drbd_bm_set_bits is called from bio_endio callbacks, 79b411b363SPhilipp Reisner * We may be called with irq already disabled, 80b411b363SPhilipp Reisner * so we need spin_lock_irqsave(). 81b411b363SPhilipp Reisner * And we need the kmap_atomic. 82b411b363SPhilipp Reisner */ 83b411b363SPhilipp Reisner struct drbd_bitmap { 84b411b363SPhilipp Reisner struct page **bm_pages; 85b411b363SPhilipp Reisner spinlock_t bm_lock; 864b0715f0SLars Ellenberg 8727ea1d87SLars Ellenberg /* exclusively to be used by __al_write_transaction(), 8827ea1d87SLars Ellenberg * drbd_bm_mark_for_writeout() and 8927ea1d87SLars Ellenberg * and drbd_bm_write_hinted() -> bm_rw() called from there. 9027ea1d87SLars Ellenberg */ 9127ea1d87SLars Ellenberg unsigned int n_bitmap_hints; 9227ea1d87SLars Ellenberg unsigned int al_bitmap_hints[AL_UPDATES_PER_TRANSACTION]; 9327ea1d87SLars Ellenberg 944b0715f0SLars Ellenberg /* see LIMITATIONS: above */ 954b0715f0SLars Ellenberg 96b411b363SPhilipp Reisner unsigned long bm_set; /* nr of set bits; THINK maybe atomic_t? */ 97b411b363SPhilipp Reisner unsigned long bm_bits; 98b411b363SPhilipp Reisner size_t bm_words; 99b411b363SPhilipp Reisner size_t bm_number_of_pages; 100b411b363SPhilipp Reisner sector_t bm_dev_capacity; 1018a03ae2aSThomas Gleixner struct mutex bm_change; /* serializes resize operations */ 102b411b363SPhilipp Reisner 10319f843aaSLars Ellenberg wait_queue_head_t bm_io_wait; /* used to serialize IO of single pages */ 104b411b363SPhilipp Reisner 10520ceb2b2SLars Ellenberg enum bm_flag bm_flags; 106b411b363SPhilipp Reisner 107b411b363SPhilipp Reisner /* debugging aid, in case we are still racy somewhere */ 108b411b363SPhilipp Reisner char *bm_why; 109b411b363SPhilipp Reisner struct task_struct *bm_task; 110b411b363SPhilipp Reisner }; 111b411b363SPhilipp Reisner 112b411b363SPhilipp Reisner #define bm_print_lock_info(m) __bm_print_lock_info(m, __func__) 113b30ab791SAndreas Gruenbacher static void __bm_print_lock_info(struct drbd_device *device, const char *func) 114b411b363SPhilipp Reisner { 115b30ab791SAndreas Gruenbacher struct drbd_bitmap *b = device->bitmap; 116e3fa02d7SChristoph Böhmwalder if (!drbd_ratelimit()) 117b411b363SPhilipp Reisner return; 118c60b0251SAndreas Gruenbacher drbd_err(device, "FIXME %s[%d] in %s, bitmap locked for '%s' by %s[%d]\n", 119c60b0251SAndreas Gruenbacher current->comm, task_pid_nr(current), 120b411b363SPhilipp Reisner func, b->bm_why ?: "?", 121c60b0251SAndreas Gruenbacher b->bm_task->comm, task_pid_nr(b->bm_task)); 122b411b363SPhilipp Reisner } 123b411b363SPhilipp Reisner 124b30ab791SAndreas Gruenbacher void drbd_bm_lock(struct drbd_device *device, char *why, enum bm_flag flags) 125b411b363SPhilipp Reisner { 126b30ab791SAndreas Gruenbacher struct drbd_bitmap *b = device->bitmap; 127b411b363SPhilipp Reisner int trylock_failed; 128b411b363SPhilipp Reisner 129b411b363SPhilipp Reisner if (!b) { 130d0180171SAndreas Gruenbacher drbd_err(device, "FIXME no bitmap in drbd_bm_lock!?\n"); 131b411b363SPhilipp Reisner return; 132b411b363SPhilipp Reisner } 133b411b363SPhilipp Reisner 1348a03ae2aSThomas Gleixner trylock_failed = !mutex_trylock(&b->bm_change); 135b411b363SPhilipp Reisner 136b411b363SPhilipp Reisner if (trylock_failed) { 137c60b0251SAndreas Gruenbacher drbd_warn(device, "%s[%d] going to '%s' but bitmap already locked for '%s' by %s[%d]\n", 138c60b0251SAndreas Gruenbacher current->comm, task_pid_nr(current), 139b411b363SPhilipp Reisner why, b->bm_why ?: "?", 140c60b0251SAndreas Gruenbacher b->bm_task->comm, task_pid_nr(b->bm_task)); 1418a03ae2aSThomas Gleixner mutex_lock(&b->bm_change); 142b411b363SPhilipp Reisner } 14320ceb2b2SLars Ellenberg if (BM_LOCKED_MASK & b->bm_flags) 144d0180171SAndreas Gruenbacher drbd_err(device, "FIXME bitmap already locked in bm_lock\n"); 14520ceb2b2SLars Ellenberg b->bm_flags |= flags & BM_LOCKED_MASK; 146b411b363SPhilipp Reisner 147b411b363SPhilipp Reisner b->bm_why = why; 148b411b363SPhilipp Reisner b->bm_task = current; 149b411b363SPhilipp Reisner } 150b411b363SPhilipp Reisner 151b30ab791SAndreas Gruenbacher void drbd_bm_unlock(struct drbd_device *device) 152b411b363SPhilipp Reisner { 153b30ab791SAndreas Gruenbacher struct drbd_bitmap *b = device->bitmap; 154b411b363SPhilipp Reisner if (!b) { 155d0180171SAndreas Gruenbacher drbd_err(device, "FIXME no bitmap in drbd_bm_unlock!?\n"); 156b411b363SPhilipp Reisner return; 157b411b363SPhilipp Reisner } 158b411b363SPhilipp Reisner 159b30ab791SAndreas Gruenbacher if (!(BM_LOCKED_MASK & device->bitmap->bm_flags)) 160d0180171SAndreas Gruenbacher drbd_err(device, "FIXME bitmap not locked in bm_unlock\n"); 161b411b363SPhilipp Reisner 16220ceb2b2SLars Ellenberg b->bm_flags &= ~BM_LOCKED_MASK; 163b411b363SPhilipp Reisner b->bm_why = NULL; 164b411b363SPhilipp Reisner b->bm_task = NULL; 1658a03ae2aSThomas Gleixner mutex_unlock(&b->bm_change); 166b411b363SPhilipp Reisner } 167b411b363SPhilipp Reisner 16819f843aaSLars Ellenberg /* we store some "meta" info about our pages in page->private */ 16919f843aaSLars Ellenberg /* at a granularity of 4k storage per bitmap bit: 17019f843aaSLars Ellenberg * one peta byte storage: 1<<50 byte, 1<<38 * 4k storage blocks 17119f843aaSLars Ellenberg * 1<<38 bits, 17219f843aaSLars Ellenberg * 1<<23 4k bitmap pages. 17319f843aaSLars Ellenberg * Use 24 bits as page index, covers 2 peta byte storage 17419f843aaSLars Ellenberg * at a granularity of 4k per bit. 17519f843aaSLars Ellenberg * Used to report the failed page idx on io error from the endio handlers. 17619f843aaSLars Ellenberg */ 17719f843aaSLars Ellenberg #define BM_PAGE_IDX_MASK ((1UL<<24)-1) 17819f843aaSLars Ellenberg /* this page is currently read in, or written back */ 17919f843aaSLars Ellenberg #define BM_PAGE_IO_LOCK 31 18019f843aaSLars Ellenberg /* if there has been an IO error for this page */ 18119f843aaSLars Ellenberg #define BM_PAGE_IO_ERROR 30 18219f843aaSLars Ellenberg /* this is to be able to intelligently skip disk IO, 18319f843aaSLars Ellenberg * set if bits have been set since last IO. */ 18419f843aaSLars Ellenberg #define BM_PAGE_NEED_WRITEOUT 29 18519f843aaSLars Ellenberg /* to mark for lazy writeout once syncer cleared all clearable bits, 18619f843aaSLars Ellenberg * we if bits have been cleared since last IO. */ 18719f843aaSLars Ellenberg #define BM_PAGE_LAZY_WRITEOUT 28 18845dfffebSLars Ellenberg /* pages marked with this "HINT" will be considered for writeout 18945dfffebSLars Ellenberg * on activity log transactions */ 19045dfffebSLars Ellenberg #define BM_PAGE_HINT_WRITEOUT 27 19119f843aaSLars Ellenberg 19224c4830cSBart Van Assche /* store_page_idx uses non-atomic assignment. It is only used directly after 19319f843aaSLars Ellenberg * allocating the page. All other bm_set_page_* and bm_clear_page_* need to 19419f843aaSLars Ellenberg * use atomic bit manipulation, as set_out_of_sync (and therefore bitmap 19519f843aaSLars Ellenberg * changes) may happen from various contexts, and wait_on_bit/wake_up_bit 19619f843aaSLars Ellenberg * requires it all to be atomic as well. */ 19719f843aaSLars Ellenberg static void bm_store_page_idx(struct page *page, unsigned long idx) 19819f843aaSLars Ellenberg { 19919f843aaSLars Ellenberg BUG_ON(0 != (idx & ~BM_PAGE_IDX_MASK)); 2000c7db279SArne Redlich set_page_private(page, idx); 20119f843aaSLars Ellenberg } 20219f843aaSLars Ellenberg 20319f843aaSLars Ellenberg static unsigned long bm_page_to_idx(struct page *page) 20419f843aaSLars Ellenberg { 20519f843aaSLars Ellenberg return page_private(page) & BM_PAGE_IDX_MASK; 20619f843aaSLars Ellenberg } 20719f843aaSLars Ellenberg 20819f843aaSLars Ellenberg /* As is very unlikely that the same page is under IO from more than one 20919f843aaSLars Ellenberg * context, we can get away with a bit per page and one wait queue per bitmap. 21019f843aaSLars Ellenberg */ 211b30ab791SAndreas Gruenbacher static void bm_page_lock_io(struct drbd_device *device, int page_nr) 21219f843aaSLars Ellenberg { 213b30ab791SAndreas Gruenbacher struct drbd_bitmap *b = device->bitmap; 21419f843aaSLars Ellenberg void *addr = &page_private(b->bm_pages[page_nr]); 21519f843aaSLars Ellenberg wait_event(b->bm_io_wait, !test_and_set_bit(BM_PAGE_IO_LOCK, addr)); 21619f843aaSLars Ellenberg } 21719f843aaSLars Ellenberg 218b30ab791SAndreas Gruenbacher static void bm_page_unlock_io(struct drbd_device *device, int page_nr) 21919f843aaSLars Ellenberg { 220b30ab791SAndreas Gruenbacher struct drbd_bitmap *b = device->bitmap; 22119f843aaSLars Ellenberg void *addr = &page_private(b->bm_pages[page_nr]); 2224738fa16SLars Ellenberg clear_bit_unlock(BM_PAGE_IO_LOCK, addr); 223b30ab791SAndreas Gruenbacher wake_up(&device->bitmap->bm_io_wait); 22419f843aaSLars Ellenberg } 22519f843aaSLars Ellenberg 22619f843aaSLars Ellenberg /* set _before_ submit_io, so it may be reset due to being changed 22719f843aaSLars Ellenberg * while this page is in flight... will get submitted later again */ 22819f843aaSLars Ellenberg static void bm_set_page_unchanged(struct page *page) 22919f843aaSLars Ellenberg { 23019f843aaSLars Ellenberg /* use cmpxchg? */ 23119f843aaSLars Ellenberg clear_bit(BM_PAGE_NEED_WRITEOUT, &page_private(page)); 23219f843aaSLars Ellenberg clear_bit(BM_PAGE_LAZY_WRITEOUT, &page_private(page)); 23319f843aaSLars Ellenberg } 23419f843aaSLars Ellenberg 23519f843aaSLars Ellenberg static void bm_set_page_need_writeout(struct page *page) 23619f843aaSLars Ellenberg { 23719f843aaSLars Ellenberg set_bit(BM_PAGE_NEED_WRITEOUT, &page_private(page)); 23819f843aaSLars Ellenberg } 23919f843aaSLars Ellenberg 24027ea1d87SLars Ellenberg void drbd_bm_reset_al_hints(struct drbd_device *device) 24127ea1d87SLars Ellenberg { 24227ea1d87SLars Ellenberg device->bitmap->n_bitmap_hints = 0; 24327ea1d87SLars Ellenberg } 24427ea1d87SLars Ellenberg 24545dfffebSLars Ellenberg /** 24645dfffebSLars Ellenberg * drbd_bm_mark_for_writeout() - mark a page with a "hint" to be considered for writeout 247b30ab791SAndreas Gruenbacher * @device: DRBD device. 24845dfffebSLars Ellenberg * @page_nr: the bitmap page to mark with the "hint" flag 24945dfffebSLars Ellenberg * 25045dfffebSLars Ellenberg * From within an activity log transaction, we mark a few pages with these 25145dfffebSLars Ellenberg * hints, then call drbd_bm_write_hinted(), which will only write out changed 25245dfffebSLars Ellenberg * pages which are flagged with this mark. 25345dfffebSLars Ellenberg */ 254b30ab791SAndreas Gruenbacher void drbd_bm_mark_for_writeout(struct drbd_device *device, int page_nr) 25545dfffebSLars Ellenberg { 25627ea1d87SLars Ellenberg struct drbd_bitmap *b = device->bitmap; 25745dfffebSLars Ellenberg struct page *page; 258b30ab791SAndreas Gruenbacher if (page_nr >= device->bitmap->bm_number_of_pages) { 259d0180171SAndreas Gruenbacher drbd_warn(device, "BAD: page_nr: %u, number_of_pages: %u\n", 260b30ab791SAndreas Gruenbacher page_nr, (int)device->bitmap->bm_number_of_pages); 26145dfffebSLars Ellenberg return; 26245dfffebSLars Ellenberg } 263b30ab791SAndreas Gruenbacher page = device->bitmap->bm_pages[page_nr]; 26427ea1d87SLars Ellenberg BUG_ON(b->n_bitmap_hints >= ARRAY_SIZE(b->al_bitmap_hints)); 26527ea1d87SLars Ellenberg if (!test_and_set_bit(BM_PAGE_HINT_WRITEOUT, &page_private(page))) 26627ea1d87SLars Ellenberg b->al_bitmap_hints[b->n_bitmap_hints++] = page_nr; 26745dfffebSLars Ellenberg } 26845dfffebSLars Ellenberg 26919f843aaSLars Ellenberg static int bm_test_page_unchanged(struct page *page) 27019f843aaSLars Ellenberg { 27119f843aaSLars Ellenberg volatile const unsigned long *addr = &page_private(page); 27219f843aaSLars Ellenberg return (*addr & ((1UL<<BM_PAGE_NEED_WRITEOUT)|(1UL<<BM_PAGE_LAZY_WRITEOUT))) == 0; 27319f843aaSLars Ellenberg } 27419f843aaSLars Ellenberg 27519f843aaSLars Ellenberg static void bm_set_page_io_err(struct page *page) 27619f843aaSLars Ellenberg { 27719f843aaSLars Ellenberg set_bit(BM_PAGE_IO_ERROR, &page_private(page)); 27819f843aaSLars Ellenberg } 27919f843aaSLars Ellenberg 28019f843aaSLars Ellenberg static void bm_clear_page_io_err(struct page *page) 28119f843aaSLars Ellenberg { 28219f843aaSLars Ellenberg clear_bit(BM_PAGE_IO_ERROR, &page_private(page)); 28319f843aaSLars Ellenberg } 28419f843aaSLars Ellenberg 28519f843aaSLars Ellenberg static void bm_set_page_lazy_writeout(struct page *page) 28619f843aaSLars Ellenberg { 28719f843aaSLars Ellenberg set_bit(BM_PAGE_LAZY_WRITEOUT, &page_private(page)); 28819f843aaSLars Ellenberg } 28919f843aaSLars Ellenberg 29019f843aaSLars Ellenberg static int bm_test_page_lazy_writeout(struct page *page) 29119f843aaSLars Ellenberg { 29219f843aaSLars Ellenberg return test_bit(BM_PAGE_LAZY_WRITEOUT, &page_private(page)); 29319f843aaSLars Ellenberg } 29419f843aaSLars Ellenberg 29519f843aaSLars Ellenberg /* on a 32bit box, this would allow for exactly (2<<38) bits. */ 29619f843aaSLars Ellenberg static unsigned int bm_word_to_page_idx(struct drbd_bitmap *b, unsigned long long_nr) 29719f843aaSLars Ellenberg { 29819f843aaSLars Ellenberg /* page_nr = (word*sizeof(long)) >> PAGE_SHIFT; */ 29919f843aaSLars Ellenberg unsigned int page_nr = long_nr >> (PAGE_SHIFT - LN2_BPL + 3); 30019f843aaSLars Ellenberg BUG_ON(page_nr >= b->bm_number_of_pages); 30119f843aaSLars Ellenberg return page_nr; 30219f843aaSLars Ellenberg } 30319f843aaSLars Ellenberg 30495a0f10cSLars Ellenberg static unsigned int bm_bit_to_page_idx(struct drbd_bitmap *b, u64 bitnr) 30595a0f10cSLars Ellenberg { 30695a0f10cSLars Ellenberg /* page_nr = (bitnr/8) >> PAGE_SHIFT; */ 30795a0f10cSLars Ellenberg unsigned int page_nr = bitnr >> (PAGE_SHIFT + 3); 30895a0f10cSLars Ellenberg BUG_ON(page_nr >= b->bm_number_of_pages); 30995a0f10cSLars Ellenberg return page_nr; 31095a0f10cSLars Ellenberg } 31195a0f10cSLars Ellenberg 312589973a7SCong Wang static unsigned long *__bm_map_pidx(struct drbd_bitmap *b, unsigned int idx) 31395a0f10cSLars Ellenberg { 31495a0f10cSLars Ellenberg struct page *page = b->bm_pages[idx]; 315cfd8005cSCong Wang return (unsigned long *) kmap_atomic(page); 31695a0f10cSLars Ellenberg } 31795a0f10cSLars Ellenberg 31895a0f10cSLars Ellenberg static unsigned long *bm_map_pidx(struct drbd_bitmap *b, unsigned int idx) 31995a0f10cSLars Ellenberg { 320cfd8005cSCong Wang return __bm_map_pidx(b, idx); 32195a0f10cSLars Ellenberg } 32295a0f10cSLars Ellenberg 323cfd8005cSCong Wang static void __bm_unmap(unsigned long *p_addr) 324b411b363SPhilipp Reisner { 325cfd8005cSCong Wang kunmap_atomic(p_addr); 326b411b363SPhilipp Reisner }; 327b411b363SPhilipp Reisner 328b411b363SPhilipp Reisner static void bm_unmap(unsigned long *p_addr) 329b411b363SPhilipp Reisner { 330cfd8005cSCong Wang return __bm_unmap(p_addr); 331b411b363SPhilipp Reisner } 332b411b363SPhilipp Reisner 333b411b363SPhilipp Reisner /* long word offset of _bitmap_ sector */ 334b411b363SPhilipp Reisner #define S2W(s) ((s)<<(BM_EXT_SHIFT-BM_BLOCK_SHIFT-LN2_BPL)) 335b411b363SPhilipp Reisner /* word offset from start of bitmap to word number _in_page_ 336b411b363SPhilipp Reisner * modulo longs per page 337b411b363SPhilipp Reisner #define MLPP(X) ((X) % (PAGE_SIZE/sizeof(long)) 33824c4830cSBart Van Assche hm, well, Philipp thinks gcc might not optimize the % into & (... - 1) 339b411b363SPhilipp Reisner so do it explicitly: 340b411b363SPhilipp Reisner */ 341b411b363SPhilipp Reisner #define MLPP(X) ((X) & ((PAGE_SIZE/sizeof(long))-1)) 342b411b363SPhilipp Reisner 343b411b363SPhilipp Reisner /* Long words per page */ 344b411b363SPhilipp Reisner #define LWPP (PAGE_SIZE/sizeof(long)) 345b411b363SPhilipp Reisner 346b411b363SPhilipp Reisner /* 347b411b363SPhilipp Reisner * actually most functions herein should take a struct drbd_bitmap*, not a 348b30ab791SAndreas Gruenbacher * struct drbd_device*, but for the debug macros I like to have the device around 349b411b363SPhilipp Reisner * to be able to report device specific. 350b411b363SPhilipp Reisner */ 351b411b363SPhilipp Reisner 35219f843aaSLars Ellenberg 353b411b363SPhilipp Reisner static void bm_free_pages(struct page **pages, unsigned long number) 354b411b363SPhilipp Reisner { 355b411b363SPhilipp Reisner unsigned long i; 356b411b363SPhilipp Reisner if (!pages) 357b411b363SPhilipp Reisner return; 358b411b363SPhilipp Reisner 359b411b363SPhilipp Reisner for (i = 0; i < number; i++) { 360b411b363SPhilipp Reisner if (!pages[i]) { 361f88c5d90SLars Ellenberg pr_alert("bm_free_pages tried to free a NULL pointer; i=%lu n=%lu\n", 362b411b363SPhilipp Reisner i, number); 363b411b363SPhilipp Reisner continue; 364b411b363SPhilipp Reisner } 365b411b363SPhilipp Reisner __free_page(pages[i]); 366b411b363SPhilipp Reisner pages[i] = NULL; 367b411b363SPhilipp Reisner } 368b411b363SPhilipp Reisner } 369b411b363SPhilipp Reisner 3701d5cfdb0STetsuo Handa static inline void bm_vk_free(void *ptr) 371b411b363SPhilipp Reisner { 3721d5cfdb0STetsuo Handa kvfree(ptr); 373b411b363SPhilipp Reisner } 374b411b363SPhilipp Reisner 375b411b363SPhilipp Reisner /* 376b411b363SPhilipp Reisner * "have" and "want" are NUMBER OF PAGES. 377b411b363SPhilipp Reisner */ 378b411b363SPhilipp Reisner static struct page **bm_realloc_pages(struct drbd_bitmap *b, unsigned long want) 379b411b363SPhilipp Reisner { 380b411b363SPhilipp Reisner struct page **old_pages = b->bm_pages; 381b411b363SPhilipp Reisner struct page **new_pages, *page; 3821d5cfdb0STetsuo Handa unsigned int i, bytes; 383b411b363SPhilipp Reisner unsigned long have = b->bm_number_of_pages; 384b411b363SPhilipp Reisner 385b411b363SPhilipp Reisner BUG_ON(have == 0 && old_pages != NULL); 386b411b363SPhilipp Reisner BUG_ON(have != 0 && old_pages == NULL); 387b411b363SPhilipp Reisner 388b411b363SPhilipp Reisner if (have == want) 389b411b363SPhilipp Reisner return old_pages; 390b411b363SPhilipp Reisner 391b411b363SPhilipp Reisner /* Trying kmalloc first, falling back to vmalloc. 3920b143d43SLars Ellenberg * GFP_NOIO, as this is called while drbd IO is "suspended", 3930b143d43SLars Ellenberg * and during resize or attach on diskless Primary, 3940b143d43SLars Ellenberg * we must not block on IO to ourselves. 395bc891c9aSLars Ellenberg * Context is receiver thread or dmsetup. */ 396b411b363SPhilipp Reisner bytes = sizeof(struct page *)*want; 3978be04b93SJoe Perches new_pages = kzalloc(bytes, GFP_NOIO | __GFP_NOWARN); 398b411b363SPhilipp Reisner if (!new_pages) { 39988dca4caSChristoph Hellwig new_pages = __vmalloc(bytes, GFP_NOIO | __GFP_ZERO); 400b411b363SPhilipp Reisner if (!new_pages) 401b411b363SPhilipp Reisner return NULL; 402b411b363SPhilipp Reisner } 403b411b363SPhilipp Reisner 404b411b363SPhilipp Reisner if (want >= have) { 405b411b363SPhilipp Reisner for (i = 0; i < have; i++) 406b411b363SPhilipp Reisner new_pages[i] = old_pages[i]; 407b411b363SPhilipp Reisner for (; i < want; i++) { 4080b143d43SLars Ellenberg page = alloc_page(GFP_NOIO | __GFP_HIGHMEM); 409b411b363SPhilipp Reisner if (!page) { 410b411b363SPhilipp Reisner bm_free_pages(new_pages + have, i - have); 4111d5cfdb0STetsuo Handa bm_vk_free(new_pages); 412b411b363SPhilipp Reisner return NULL; 413b411b363SPhilipp Reisner } 41419f843aaSLars Ellenberg /* we want to know which page it is 41519f843aaSLars Ellenberg * from the endio handlers */ 41619f843aaSLars Ellenberg bm_store_page_idx(page, i); 417b411b363SPhilipp Reisner new_pages[i] = page; 418b411b363SPhilipp Reisner } 419b411b363SPhilipp Reisner } else { 420b411b363SPhilipp Reisner for (i = 0; i < want; i++) 421b411b363SPhilipp Reisner new_pages[i] = old_pages[i]; 422b411b363SPhilipp Reisner /* NOT HERE, we are outside the spinlock! 423b411b363SPhilipp Reisner bm_free_pages(old_pages + want, have - want); 424b411b363SPhilipp Reisner */ 425b411b363SPhilipp Reisner } 426b411b363SPhilipp Reisner 427b411b363SPhilipp Reisner return new_pages; 428b411b363SPhilipp Reisner } 429b411b363SPhilipp Reisner 430b411b363SPhilipp Reisner /* 4317e5fec31SFabian Frederick * allocates the drbd_bitmap and stores it in device->bitmap. 432b411b363SPhilipp Reisner */ 433b30ab791SAndreas Gruenbacher int drbd_bm_init(struct drbd_device *device) 434b411b363SPhilipp Reisner { 435b30ab791SAndreas Gruenbacher struct drbd_bitmap *b = device->bitmap; 436b411b363SPhilipp Reisner WARN_ON(b != NULL); 437b411b363SPhilipp Reisner b = kzalloc(sizeof(struct drbd_bitmap), GFP_KERNEL); 438b411b363SPhilipp Reisner if (!b) 439b411b363SPhilipp Reisner return -ENOMEM; 440b411b363SPhilipp Reisner spin_lock_init(&b->bm_lock); 4418a03ae2aSThomas Gleixner mutex_init(&b->bm_change); 442b411b363SPhilipp Reisner init_waitqueue_head(&b->bm_io_wait); 443b411b363SPhilipp Reisner 444b30ab791SAndreas Gruenbacher device->bitmap = b; 445b411b363SPhilipp Reisner 446b411b363SPhilipp Reisner return 0; 447b411b363SPhilipp Reisner } 448b411b363SPhilipp Reisner 449b30ab791SAndreas Gruenbacher sector_t drbd_bm_capacity(struct drbd_device *device) 450b411b363SPhilipp Reisner { 451677b3672SChristoph Böhmwalder if (!expect(device, device->bitmap)) 452841ce241SAndreas Gruenbacher return 0; 453b30ab791SAndreas Gruenbacher return device->bitmap->bm_dev_capacity; 454b411b363SPhilipp Reisner } 455b411b363SPhilipp Reisner 456b411b363SPhilipp Reisner /* called on driver unload. TODO: call when a device is destroyed. 457b411b363SPhilipp Reisner */ 458b30ab791SAndreas Gruenbacher void drbd_bm_cleanup(struct drbd_device *device) 459b411b363SPhilipp Reisner { 460677b3672SChristoph Böhmwalder if (!expect(device, device->bitmap)) 461841ce241SAndreas Gruenbacher return; 462b30ab791SAndreas Gruenbacher bm_free_pages(device->bitmap->bm_pages, device->bitmap->bm_number_of_pages); 4631d5cfdb0STetsuo Handa bm_vk_free(device->bitmap->bm_pages); 464b30ab791SAndreas Gruenbacher kfree(device->bitmap); 465b30ab791SAndreas Gruenbacher device->bitmap = NULL; 466b411b363SPhilipp Reisner } 467b411b363SPhilipp Reisner 468b411b363SPhilipp Reisner /* 469b411b363SPhilipp Reisner * since (b->bm_bits % BITS_PER_LONG) != 0, 470b411b363SPhilipp Reisner * this masks out the remaining bits. 471b411b363SPhilipp Reisner * Returns the number of bits cleared. 472b411b363SPhilipp Reisner */ 4732630628bSLars Ellenberg #ifndef BITS_PER_PAGE 47495a0f10cSLars Ellenberg #define BITS_PER_PAGE (1UL << (PAGE_SHIFT + 3)) 47595a0f10cSLars Ellenberg #define BITS_PER_PAGE_MASK (BITS_PER_PAGE - 1) 4762630628bSLars Ellenberg #else 4772630628bSLars Ellenberg # if BITS_PER_PAGE != (1UL << (PAGE_SHIFT + 3)) 4782630628bSLars Ellenberg # error "ambiguous BITS_PER_PAGE" 4792630628bSLars Ellenberg # endif 4802630628bSLars Ellenberg #endif 48195a0f10cSLars Ellenberg #define BITS_PER_LONG_MASK (BITS_PER_LONG - 1) 482b411b363SPhilipp Reisner static int bm_clear_surplus(struct drbd_bitmap *b) 483b411b363SPhilipp Reisner { 48495a0f10cSLars Ellenberg unsigned long mask; 485b411b363SPhilipp Reisner unsigned long *p_addr, *bm; 48695a0f10cSLars Ellenberg int tmp; 48795a0f10cSLars Ellenberg int cleared = 0; 488b411b363SPhilipp Reisner 48995a0f10cSLars Ellenberg /* number of bits modulo bits per page */ 49095a0f10cSLars Ellenberg tmp = (b->bm_bits & BITS_PER_PAGE_MASK); 49195a0f10cSLars Ellenberg /* mask the used bits of the word containing the last bit */ 49295a0f10cSLars Ellenberg mask = (1UL << (tmp & BITS_PER_LONG_MASK)) -1; 49395a0f10cSLars Ellenberg /* bitmap is always stored little endian, 49495a0f10cSLars Ellenberg * on disk and in core memory alike */ 49595a0f10cSLars Ellenberg mask = cpu_to_lel(mask); 49695a0f10cSLars Ellenberg 4976850c442SLars Ellenberg p_addr = bm_map_pidx(b, b->bm_number_of_pages - 1); 49895a0f10cSLars Ellenberg bm = p_addr + (tmp/BITS_PER_LONG); 49995a0f10cSLars Ellenberg if (mask) { 50095a0f10cSLars Ellenberg /* If mask != 0, we are not exactly aligned, so bm now points 50195a0f10cSLars Ellenberg * to the long containing the last bit. 50295a0f10cSLars Ellenberg * If mask == 0, bm already points to the word immediately 50395a0f10cSLars Ellenberg * after the last (long word aligned) bit. */ 504b411b363SPhilipp Reisner cleared = hweight_long(*bm & ~mask); 505b411b363SPhilipp Reisner *bm &= mask; 50695a0f10cSLars Ellenberg bm++; 507b411b363SPhilipp Reisner } 508b411b363SPhilipp Reisner 50995a0f10cSLars Ellenberg if (BITS_PER_LONG == 32 && ((bm - p_addr) & 1) == 1) { 51095a0f10cSLars Ellenberg /* on a 32bit arch, we may need to zero out 51195a0f10cSLars Ellenberg * a padding long to align with a 64bit remote */ 512b411b363SPhilipp Reisner cleared += hweight_long(*bm); 513b411b363SPhilipp Reisner *bm = 0; 514b411b363SPhilipp Reisner } 515b411b363SPhilipp Reisner bm_unmap(p_addr); 516b411b363SPhilipp Reisner return cleared; 517b411b363SPhilipp Reisner } 518b411b363SPhilipp Reisner 519b411b363SPhilipp Reisner static void bm_set_surplus(struct drbd_bitmap *b) 520b411b363SPhilipp Reisner { 52195a0f10cSLars Ellenberg unsigned long mask; 522b411b363SPhilipp Reisner unsigned long *p_addr, *bm; 52395a0f10cSLars Ellenberg int tmp; 524b411b363SPhilipp Reisner 52595a0f10cSLars Ellenberg /* number of bits modulo bits per page */ 52695a0f10cSLars Ellenberg tmp = (b->bm_bits & BITS_PER_PAGE_MASK); 52795a0f10cSLars Ellenberg /* mask the used bits of the word containing the last bit */ 52895a0f10cSLars Ellenberg mask = (1UL << (tmp & BITS_PER_LONG_MASK)) -1; 52995a0f10cSLars Ellenberg /* bitmap is always stored little endian, 53095a0f10cSLars Ellenberg * on disk and in core memory alike */ 53195a0f10cSLars Ellenberg mask = cpu_to_lel(mask); 53295a0f10cSLars Ellenberg 5336850c442SLars Ellenberg p_addr = bm_map_pidx(b, b->bm_number_of_pages - 1); 53495a0f10cSLars Ellenberg bm = p_addr + (tmp/BITS_PER_LONG); 53595a0f10cSLars Ellenberg if (mask) { 53695a0f10cSLars Ellenberg /* If mask != 0, we are not exactly aligned, so bm now points 53795a0f10cSLars Ellenberg * to the long containing the last bit. 53895a0f10cSLars Ellenberg * If mask == 0, bm already points to the word immediately 53995a0f10cSLars Ellenberg * after the last (long word aligned) bit. */ 540b411b363SPhilipp Reisner *bm |= ~mask; 54195a0f10cSLars Ellenberg bm++; 542b411b363SPhilipp Reisner } 543b411b363SPhilipp Reisner 54495a0f10cSLars Ellenberg if (BITS_PER_LONG == 32 && ((bm - p_addr) & 1) == 1) { 54595a0f10cSLars Ellenberg /* on a 32bit arch, we may need to zero out 54695a0f10cSLars Ellenberg * a padding long to align with a 64bit remote */ 54795a0f10cSLars Ellenberg *bm = ~0UL; 548b411b363SPhilipp Reisner } 549b411b363SPhilipp Reisner bm_unmap(p_addr); 550b411b363SPhilipp Reisner } 551b411b363SPhilipp Reisner 5524b0715f0SLars Ellenberg /* you better not modify the bitmap while this is running, 5534b0715f0SLars Ellenberg * or its results will be stale */ 55495a0f10cSLars Ellenberg static unsigned long bm_count_bits(struct drbd_bitmap *b) 555b411b363SPhilipp Reisner { 5564b0715f0SLars Ellenberg unsigned long *p_addr; 557b411b363SPhilipp Reisner unsigned long bits = 0; 5584b0715f0SLars Ellenberg unsigned long mask = (1UL << (b->bm_bits & BITS_PER_LONG_MASK)) -1; 5595fb3bc4dSLars Ellenberg int idx, last_word; 5607777a8baSLars Ellenberg 5614b0715f0SLars Ellenberg /* all but last page */ 5626850c442SLars Ellenberg for (idx = 0; idx < b->bm_number_of_pages - 1; idx++) { 563cfd8005cSCong Wang p_addr = __bm_map_pidx(b, idx); 5645fb3bc4dSLars Ellenberg bits += bitmap_weight(p_addr, BITS_PER_PAGE); 565cfd8005cSCong Wang __bm_unmap(p_addr); 566b411b363SPhilipp Reisner cond_resched(); 567b411b363SPhilipp Reisner } 5684b0715f0SLars Ellenberg /* last (or only) page */ 5694b0715f0SLars Ellenberg last_word = ((b->bm_bits - 1) & BITS_PER_PAGE_MASK) >> LN2_BPL; 570589973a7SCong Wang p_addr = __bm_map_pidx(b, idx); 5715fb3bc4dSLars Ellenberg bits += bitmap_weight(p_addr, last_word * BITS_PER_LONG); 5724b0715f0SLars Ellenberg p_addr[last_word] &= cpu_to_lel(mask); 5734b0715f0SLars Ellenberg bits += hweight_long(p_addr[last_word]); 5744b0715f0SLars Ellenberg /* 32bit arch, may have an unused padding long */ 5754b0715f0SLars Ellenberg if (BITS_PER_LONG == 32 && (last_word & 1) == 0) 5764b0715f0SLars Ellenberg p_addr[last_word+1] = 0; 577589973a7SCong Wang __bm_unmap(p_addr); 578b411b363SPhilipp Reisner return bits; 579b411b363SPhilipp Reisner } 580b411b363SPhilipp Reisner 581b411b363SPhilipp Reisner /* offset and len in long words.*/ 582b411b363SPhilipp Reisner static void bm_memset(struct drbd_bitmap *b, size_t offset, int c, size_t len) 583b411b363SPhilipp Reisner { 584b411b363SPhilipp Reisner unsigned long *p_addr, *bm; 58519f843aaSLars Ellenberg unsigned int idx; 586b411b363SPhilipp Reisner size_t do_now, end; 587b411b363SPhilipp Reisner 588b411b363SPhilipp Reisner end = offset + len; 589b411b363SPhilipp Reisner 590b411b363SPhilipp Reisner if (end > b->bm_words) { 591f88c5d90SLars Ellenberg pr_alert("bm_memset end > bm_words\n"); 592b411b363SPhilipp Reisner return; 593b411b363SPhilipp Reisner } 594b411b363SPhilipp Reisner 595b411b363SPhilipp Reisner while (offset < end) { 596b411b363SPhilipp Reisner do_now = min_t(size_t, ALIGN(offset + 1, LWPP), end) - offset; 59719f843aaSLars Ellenberg idx = bm_word_to_page_idx(b, offset); 59819f843aaSLars Ellenberg p_addr = bm_map_pidx(b, idx); 599b411b363SPhilipp Reisner bm = p_addr + MLPP(offset); 600b411b363SPhilipp Reisner if (bm+do_now > p_addr + LWPP) { 601f88c5d90SLars Ellenberg pr_alert("BUG BUG BUG! p_addr:%p bm:%p do_now:%d\n", 602b411b363SPhilipp Reisner p_addr, bm, (int)do_now); 60384e7c0f7SLars Ellenberg } else 604b411b363SPhilipp Reisner memset(bm, c, do_now * sizeof(long)); 605b411b363SPhilipp Reisner bm_unmap(p_addr); 60619f843aaSLars Ellenberg bm_set_page_need_writeout(b->bm_pages[idx]); 607b411b363SPhilipp Reisner offset += do_now; 608b411b363SPhilipp Reisner } 609b411b363SPhilipp Reisner } 610b411b363SPhilipp Reisner 611ae8bf312SLars Ellenberg /* For the layout, see comment above drbd_md_set_sector_offsets(). */ 612ae8bf312SLars Ellenberg static u64 drbd_md_on_disk_bits(struct drbd_backing_dev *ldev) 613ae8bf312SLars Ellenberg { 614ae8bf312SLars Ellenberg u64 bitmap_sectors; 615ae8bf312SLars Ellenberg if (ldev->md.al_offset == 8) 616ae8bf312SLars Ellenberg bitmap_sectors = ldev->md.md_size_sect - ldev->md.bm_offset; 617ae8bf312SLars Ellenberg else 618ae8bf312SLars Ellenberg bitmap_sectors = ldev->md.al_offset - ldev->md.bm_offset; 619ae8bf312SLars Ellenberg return bitmap_sectors << (9 + 3); 620ae8bf312SLars Ellenberg } 621ae8bf312SLars Ellenberg 622b411b363SPhilipp Reisner /* 623b411b363SPhilipp Reisner * make sure the bitmap has enough room for the attached storage, 624b411b363SPhilipp Reisner * if necessary, resize. 625b411b363SPhilipp Reisner * called whenever we may have changed the device size. 626b411b363SPhilipp Reisner * returns -ENOMEM if we could not allocate enough memory, 0 on success. 627b411b363SPhilipp Reisner * In case this is actually a resize, we copy the old bitmap into the new one. 628b411b363SPhilipp Reisner * Otherwise, the bitmap is initialized to all bits set. 629b411b363SPhilipp Reisner */ 630b30ab791SAndreas Gruenbacher int drbd_bm_resize(struct drbd_device *device, sector_t capacity, int set_new_bits) 631b411b363SPhilipp Reisner { 632b30ab791SAndreas Gruenbacher struct drbd_bitmap *b = device->bitmap; 6336850c442SLars Ellenberg unsigned long bits, words, owords, obits; 634b411b363SPhilipp Reisner unsigned long want, have, onpages; /* number of pages */ 635b411b363SPhilipp Reisner struct page **npages, **opages = NULL; 6367e5fec31SFabian Frederick int err = 0; 6377e5fec31SFabian Frederick bool growing; 638b411b363SPhilipp Reisner 639677b3672SChristoph Böhmwalder if (!expect(device, b)) 640841ce241SAndreas Gruenbacher return -ENOMEM; 641b411b363SPhilipp Reisner 642b30ab791SAndreas Gruenbacher drbd_bm_lock(device, "resize", BM_LOCKED_MASK); 643b411b363SPhilipp Reisner 644d0180171SAndreas Gruenbacher drbd_info(device, "drbd_bm_resize called with capacity == %llu\n", 645b411b363SPhilipp Reisner (unsigned long long)capacity); 646b411b363SPhilipp Reisner 647b411b363SPhilipp Reisner if (capacity == b->bm_dev_capacity) 648b411b363SPhilipp Reisner goto out; 649b411b363SPhilipp Reisner 650b411b363SPhilipp Reisner if (capacity == 0) { 651b411b363SPhilipp Reisner spin_lock_irq(&b->bm_lock); 652b411b363SPhilipp Reisner opages = b->bm_pages; 653b411b363SPhilipp Reisner onpages = b->bm_number_of_pages; 654b411b363SPhilipp Reisner owords = b->bm_words; 655b411b363SPhilipp Reisner b->bm_pages = NULL; 656b411b363SPhilipp Reisner b->bm_number_of_pages = 657b411b363SPhilipp Reisner b->bm_set = 658b411b363SPhilipp Reisner b->bm_bits = 659b411b363SPhilipp Reisner b->bm_words = 660b411b363SPhilipp Reisner b->bm_dev_capacity = 0; 661b411b363SPhilipp Reisner spin_unlock_irq(&b->bm_lock); 662b411b363SPhilipp Reisner bm_free_pages(opages, onpages); 6631d5cfdb0STetsuo Handa bm_vk_free(opages); 664b411b363SPhilipp Reisner goto out; 665b411b363SPhilipp Reisner } 666b411b363SPhilipp Reisner bits = BM_SECT_TO_BIT(ALIGN(capacity, BM_SECT_PER_BIT)); 667b411b363SPhilipp Reisner 668b411b363SPhilipp Reisner /* if we would use 669b411b363SPhilipp Reisner words = ALIGN(bits,BITS_PER_LONG) >> LN2_BPL; 670b411b363SPhilipp Reisner a 32bit host could present the wrong number of words 671b411b363SPhilipp Reisner to a 64bit host. 672b411b363SPhilipp Reisner */ 673b411b363SPhilipp Reisner words = ALIGN(bits, 64) >> LN2_BPL; 674b411b363SPhilipp Reisner 675b30ab791SAndreas Gruenbacher if (get_ldev(device)) { 676b30ab791SAndreas Gruenbacher u64 bits_on_disk = drbd_md_on_disk_bits(device->ldev); 677b30ab791SAndreas Gruenbacher put_ldev(device); 6784b0715f0SLars Ellenberg if (bits > bits_on_disk) { 679d0180171SAndreas Gruenbacher drbd_info(device, "bits = %lu\n", bits); 680d0180171SAndreas Gruenbacher drbd_info(device, "bits_on_disk = %llu\n", bits_on_disk); 6814b0715f0SLars Ellenberg err = -ENOSPC; 6824b0715f0SLars Ellenberg goto out; 6834b0715f0SLars Ellenberg } 684b411b363SPhilipp Reisner } 685b411b363SPhilipp Reisner 686ba6bee98SCai Huoqing want = PFN_UP(words*sizeof(long)); 687b411b363SPhilipp Reisner have = b->bm_number_of_pages; 688b411b363SPhilipp Reisner if (want == have) { 6890b0ba1efSAndreas Gruenbacher D_ASSERT(device, b->bm_pages != NULL); 690b411b363SPhilipp Reisner npages = b->bm_pages; 691b411b363SPhilipp Reisner } else { 692b30ab791SAndreas Gruenbacher if (drbd_insert_fault(device, DRBD_FAULT_BM_ALLOC)) 693b411b363SPhilipp Reisner npages = NULL; 694b411b363SPhilipp Reisner else 695b411b363SPhilipp Reisner npages = bm_realloc_pages(b, want); 696b411b363SPhilipp Reisner } 697b411b363SPhilipp Reisner 698b411b363SPhilipp Reisner if (!npages) { 699b411b363SPhilipp Reisner err = -ENOMEM; 700b411b363SPhilipp Reisner goto out; 701b411b363SPhilipp Reisner } 702b411b363SPhilipp Reisner 703b411b363SPhilipp Reisner spin_lock_irq(&b->bm_lock); 704b411b363SPhilipp Reisner opages = b->bm_pages; 705b411b363SPhilipp Reisner owords = b->bm_words; 706b411b363SPhilipp Reisner obits = b->bm_bits; 707b411b363SPhilipp Reisner 708b411b363SPhilipp Reisner growing = bits > obits; 7095223671bSPhilipp Reisner if (opages && growing && set_new_bits) 710b411b363SPhilipp Reisner bm_set_surplus(b); 711b411b363SPhilipp Reisner 712b411b363SPhilipp Reisner b->bm_pages = npages; 713b411b363SPhilipp Reisner b->bm_number_of_pages = want; 714b411b363SPhilipp Reisner b->bm_bits = bits; 715b411b363SPhilipp Reisner b->bm_words = words; 716b411b363SPhilipp Reisner b->bm_dev_capacity = capacity; 717b411b363SPhilipp Reisner 718b411b363SPhilipp Reisner if (growing) { 71902d9a94bSPhilipp Reisner if (set_new_bits) { 720b411b363SPhilipp Reisner bm_memset(b, owords, 0xff, words-owords); 721b411b363SPhilipp Reisner b->bm_set += bits - obits; 72202d9a94bSPhilipp Reisner } else 72302d9a94bSPhilipp Reisner bm_memset(b, owords, 0x00, words-owords); 72402d9a94bSPhilipp Reisner 725b411b363SPhilipp Reisner } 726b411b363SPhilipp Reisner 727b411b363SPhilipp Reisner if (want < have) { 728b411b363SPhilipp Reisner /* implicit: (opages != NULL) && (opages != npages) */ 729b411b363SPhilipp Reisner bm_free_pages(opages + want, have - want); 730b411b363SPhilipp Reisner } 731b411b363SPhilipp Reisner 732b411b363SPhilipp Reisner (void)bm_clear_surplus(b); 733b411b363SPhilipp Reisner 734b411b363SPhilipp Reisner spin_unlock_irq(&b->bm_lock); 735b411b363SPhilipp Reisner if (opages != npages) 7361d5cfdb0STetsuo Handa bm_vk_free(opages); 737b411b363SPhilipp Reisner if (!growing) 738b411b363SPhilipp Reisner b->bm_set = bm_count_bits(b); 739d0180171SAndreas Gruenbacher drbd_info(device, "resync bitmap: bits=%lu words=%lu pages=%lu\n", bits, words, want); 740b411b363SPhilipp Reisner 741b411b363SPhilipp Reisner out: 742b30ab791SAndreas Gruenbacher drbd_bm_unlock(device); 743b411b363SPhilipp Reisner return err; 744b411b363SPhilipp Reisner } 745b411b363SPhilipp Reisner 746b411b363SPhilipp Reisner /* inherently racy: 747b411b363SPhilipp Reisner * if not protected by other means, return value may be out of date when 748b411b363SPhilipp Reisner * leaving this function... 749b411b363SPhilipp Reisner * we still need to lock it, since it is important that this returns 750b411b363SPhilipp Reisner * bm_set == 0 precisely. 751b411b363SPhilipp Reisner * 752b411b363SPhilipp Reisner * maybe bm_set should be atomic_t ? 753b411b363SPhilipp Reisner */ 754b30ab791SAndreas Gruenbacher unsigned long _drbd_bm_total_weight(struct drbd_device *device) 755b411b363SPhilipp Reisner { 756b30ab791SAndreas Gruenbacher struct drbd_bitmap *b = device->bitmap; 757b411b363SPhilipp Reisner unsigned long s; 758b411b363SPhilipp Reisner unsigned long flags; 759b411b363SPhilipp Reisner 760677b3672SChristoph Böhmwalder if (!expect(device, b)) 761841ce241SAndreas Gruenbacher return 0; 762677b3672SChristoph Böhmwalder if (!expect(device, b->bm_pages)) 763841ce241SAndreas Gruenbacher return 0; 764b411b363SPhilipp Reisner 765b411b363SPhilipp Reisner spin_lock_irqsave(&b->bm_lock, flags); 766b411b363SPhilipp Reisner s = b->bm_set; 767b411b363SPhilipp Reisner spin_unlock_irqrestore(&b->bm_lock, flags); 768b411b363SPhilipp Reisner 769b411b363SPhilipp Reisner return s; 770b411b363SPhilipp Reisner } 771b411b363SPhilipp Reisner 772b30ab791SAndreas Gruenbacher unsigned long drbd_bm_total_weight(struct drbd_device *device) 773b411b363SPhilipp Reisner { 774b411b363SPhilipp Reisner unsigned long s; 775b411b363SPhilipp Reisner /* if I don't have a disk, I don't know about out-of-sync status */ 776b30ab791SAndreas Gruenbacher if (!get_ldev_if_state(device, D_NEGOTIATING)) 777b411b363SPhilipp Reisner return 0; 778b30ab791SAndreas Gruenbacher s = _drbd_bm_total_weight(device); 779b30ab791SAndreas Gruenbacher put_ldev(device); 780b411b363SPhilipp Reisner return s; 781b411b363SPhilipp Reisner } 782b411b363SPhilipp Reisner 783b30ab791SAndreas Gruenbacher size_t drbd_bm_words(struct drbd_device *device) 784b411b363SPhilipp Reisner { 785b30ab791SAndreas Gruenbacher struct drbd_bitmap *b = device->bitmap; 786677b3672SChristoph Böhmwalder if (!expect(device, b)) 787841ce241SAndreas Gruenbacher return 0; 788677b3672SChristoph Böhmwalder if (!expect(device, b->bm_pages)) 789841ce241SAndreas Gruenbacher return 0; 790b411b363SPhilipp Reisner 791b411b363SPhilipp Reisner return b->bm_words; 792b411b363SPhilipp Reisner } 793b411b363SPhilipp Reisner 794b30ab791SAndreas Gruenbacher unsigned long drbd_bm_bits(struct drbd_device *device) 795b411b363SPhilipp Reisner { 796b30ab791SAndreas Gruenbacher struct drbd_bitmap *b = device->bitmap; 797677b3672SChristoph Böhmwalder if (!expect(device, b)) 798841ce241SAndreas Gruenbacher return 0; 799b411b363SPhilipp Reisner 800b411b363SPhilipp Reisner return b->bm_bits; 801b411b363SPhilipp Reisner } 802b411b363SPhilipp Reisner 803b411b363SPhilipp Reisner /* merge number words from buffer into the bitmap starting at offset. 804b411b363SPhilipp Reisner * buffer[i] is expected to be little endian unsigned long. 805b411b363SPhilipp Reisner * bitmap must be locked by drbd_bm_lock. 806b411b363SPhilipp Reisner * currently only used from receive_bitmap. 807b411b363SPhilipp Reisner */ 808b30ab791SAndreas Gruenbacher void drbd_bm_merge_lel(struct drbd_device *device, size_t offset, size_t number, 809b411b363SPhilipp Reisner unsigned long *buffer) 810b411b363SPhilipp Reisner { 811b30ab791SAndreas Gruenbacher struct drbd_bitmap *b = device->bitmap; 812b411b363SPhilipp Reisner unsigned long *p_addr, *bm; 813b411b363SPhilipp Reisner unsigned long word, bits; 81419f843aaSLars Ellenberg unsigned int idx; 815b411b363SPhilipp Reisner size_t end, do_now; 816b411b363SPhilipp Reisner 817b411b363SPhilipp Reisner end = offset + number; 818b411b363SPhilipp Reisner 819677b3672SChristoph Böhmwalder if (!expect(device, b)) 820841ce241SAndreas Gruenbacher return; 821677b3672SChristoph Böhmwalder if (!expect(device, b->bm_pages)) 822841ce241SAndreas Gruenbacher return; 823b411b363SPhilipp Reisner if (number == 0) 824b411b363SPhilipp Reisner return; 825b411b363SPhilipp Reisner WARN_ON(offset >= b->bm_words); 826b411b363SPhilipp Reisner WARN_ON(end > b->bm_words); 827b411b363SPhilipp Reisner 828b411b363SPhilipp Reisner spin_lock_irq(&b->bm_lock); 829b411b363SPhilipp Reisner while (offset < end) { 830b411b363SPhilipp Reisner do_now = min_t(size_t, ALIGN(offset+1, LWPP), end) - offset; 83119f843aaSLars Ellenberg idx = bm_word_to_page_idx(b, offset); 83219f843aaSLars Ellenberg p_addr = bm_map_pidx(b, idx); 833b411b363SPhilipp Reisner bm = p_addr + MLPP(offset); 834b411b363SPhilipp Reisner offset += do_now; 835b411b363SPhilipp Reisner while (do_now--) { 836b411b363SPhilipp Reisner bits = hweight_long(*bm); 83795a0f10cSLars Ellenberg word = *bm | *buffer++; 838b411b363SPhilipp Reisner *bm++ = word; 839b411b363SPhilipp Reisner b->bm_set += hweight_long(word) - bits; 840b411b363SPhilipp Reisner } 841b411b363SPhilipp Reisner bm_unmap(p_addr); 84219f843aaSLars Ellenberg bm_set_page_need_writeout(b->bm_pages[idx]); 843b411b363SPhilipp Reisner } 844b411b363SPhilipp Reisner /* with 32bit <-> 64bit cross-platform connect 845b411b363SPhilipp Reisner * this is only correct for current usage, 846b411b363SPhilipp Reisner * where we _know_ that we are 64 bit aligned, 847b411b363SPhilipp Reisner * and know that this function is used in this way, too... 848b411b363SPhilipp Reisner */ 849b411b363SPhilipp Reisner if (end == b->bm_words) 850b411b363SPhilipp Reisner b->bm_set -= bm_clear_surplus(b); 851b411b363SPhilipp Reisner spin_unlock_irq(&b->bm_lock); 852b411b363SPhilipp Reisner } 853b411b363SPhilipp Reisner 854b411b363SPhilipp Reisner /* copy number words from the bitmap starting at offset into the buffer. 855b411b363SPhilipp Reisner * buffer[i] will be little endian unsigned long. 856b411b363SPhilipp Reisner */ 857b30ab791SAndreas Gruenbacher void drbd_bm_get_lel(struct drbd_device *device, size_t offset, size_t number, 858b411b363SPhilipp Reisner unsigned long *buffer) 859b411b363SPhilipp Reisner { 860b30ab791SAndreas Gruenbacher struct drbd_bitmap *b = device->bitmap; 861b411b363SPhilipp Reisner unsigned long *p_addr, *bm; 862b411b363SPhilipp Reisner size_t end, do_now; 863b411b363SPhilipp Reisner 864b411b363SPhilipp Reisner end = offset + number; 865b411b363SPhilipp Reisner 866677b3672SChristoph Böhmwalder if (!expect(device, b)) 867841ce241SAndreas Gruenbacher return; 868677b3672SChristoph Böhmwalder if (!expect(device, b->bm_pages)) 869841ce241SAndreas Gruenbacher return; 870b411b363SPhilipp Reisner 871b411b363SPhilipp Reisner spin_lock_irq(&b->bm_lock); 872b411b363SPhilipp Reisner if ((offset >= b->bm_words) || 873b411b363SPhilipp Reisner (end > b->bm_words) || 874b411b363SPhilipp Reisner (number <= 0)) 875d0180171SAndreas Gruenbacher drbd_err(device, "offset=%lu number=%lu bm_words=%lu\n", 876b411b363SPhilipp Reisner (unsigned long) offset, 877b411b363SPhilipp Reisner (unsigned long) number, 878b411b363SPhilipp Reisner (unsigned long) b->bm_words); 879b411b363SPhilipp Reisner else { 880b411b363SPhilipp Reisner while (offset < end) { 881b411b363SPhilipp Reisner do_now = min_t(size_t, ALIGN(offset+1, LWPP), end) - offset; 88219f843aaSLars Ellenberg p_addr = bm_map_pidx(b, bm_word_to_page_idx(b, offset)); 883b411b363SPhilipp Reisner bm = p_addr + MLPP(offset); 884b411b363SPhilipp Reisner offset += do_now; 885b411b363SPhilipp Reisner while (do_now--) 88695a0f10cSLars Ellenberg *buffer++ = *bm++; 887b411b363SPhilipp Reisner bm_unmap(p_addr); 888b411b363SPhilipp Reisner } 889b411b363SPhilipp Reisner } 890b411b363SPhilipp Reisner spin_unlock_irq(&b->bm_lock); 891b411b363SPhilipp Reisner } 892b411b363SPhilipp Reisner 893b411b363SPhilipp Reisner /* set all bits in the bitmap */ 894b30ab791SAndreas Gruenbacher void drbd_bm_set_all(struct drbd_device *device) 895b411b363SPhilipp Reisner { 896b30ab791SAndreas Gruenbacher struct drbd_bitmap *b = device->bitmap; 897677b3672SChristoph Böhmwalder if (!expect(device, b)) 898841ce241SAndreas Gruenbacher return; 899677b3672SChristoph Böhmwalder if (!expect(device, b->bm_pages)) 900841ce241SAndreas Gruenbacher return; 901b411b363SPhilipp Reisner 902b411b363SPhilipp Reisner spin_lock_irq(&b->bm_lock); 903b411b363SPhilipp Reisner bm_memset(b, 0, 0xff, b->bm_words); 904b411b363SPhilipp Reisner (void)bm_clear_surplus(b); 905b411b363SPhilipp Reisner b->bm_set = b->bm_bits; 906b411b363SPhilipp Reisner spin_unlock_irq(&b->bm_lock); 907b411b363SPhilipp Reisner } 908b411b363SPhilipp Reisner 909b411b363SPhilipp Reisner /* clear all bits in the bitmap */ 910b30ab791SAndreas Gruenbacher void drbd_bm_clear_all(struct drbd_device *device) 911b411b363SPhilipp Reisner { 912b30ab791SAndreas Gruenbacher struct drbd_bitmap *b = device->bitmap; 913677b3672SChristoph Böhmwalder if (!expect(device, b)) 914841ce241SAndreas Gruenbacher return; 915677b3672SChristoph Böhmwalder if (!expect(device, b->bm_pages)) 916841ce241SAndreas Gruenbacher return; 917b411b363SPhilipp Reisner 918b411b363SPhilipp Reisner spin_lock_irq(&b->bm_lock); 919b411b363SPhilipp Reisner bm_memset(b, 0, 0, b->bm_words); 920b411b363SPhilipp Reisner b->bm_set = 0; 921b411b363SPhilipp Reisner spin_unlock_irq(&b->bm_lock); 922b411b363SPhilipp Reisner } 923b411b363SPhilipp Reisner 9244ce49266SLars Ellenberg static void drbd_bm_aio_ctx_destroy(struct kref *kref) 925d1f3779bSPhilipp Reisner { 9264ce49266SLars Ellenberg struct drbd_bm_aio_ctx *ctx = container_of(kref, struct drbd_bm_aio_ctx, kref); 9274ce49266SLars Ellenberg unsigned long flags; 928d1f3779bSPhilipp Reisner 9294ce49266SLars Ellenberg spin_lock_irqsave(&ctx->device->resource->req_lock, flags); 9304ce49266SLars Ellenberg list_del(&ctx->list); 9314ce49266SLars Ellenberg spin_unlock_irqrestore(&ctx->device->resource->req_lock, flags); 932b30ab791SAndreas Gruenbacher put_ldev(ctx->device); 933d1f3779bSPhilipp Reisner kfree(ctx); 934d1f3779bSPhilipp Reisner } 935d1f3779bSPhilipp Reisner 93619f843aaSLars Ellenberg /* bv_page may be a copy, or may be the original */ 9374246a0b6SChristoph Hellwig static void drbd_bm_endio(struct bio *bio) 938b411b363SPhilipp Reisner { 9394ce49266SLars Ellenberg struct drbd_bm_aio_ctx *ctx = bio->bi_private; 940b30ab791SAndreas Gruenbacher struct drbd_device *device = ctx->device; 941b30ab791SAndreas Gruenbacher struct drbd_bitmap *b = device->bitmap; 942263663cdSMing Lei unsigned int idx = bm_page_to_idx(bio_first_page_all(bio)); 943b411b363SPhilipp Reisner 9447648cdfeSLars Ellenberg if ((ctx->flags & BM_AIO_COPY_PAGES) == 0 && 9457648cdfeSLars Ellenberg !bm_test_page_unchanged(b->bm_pages[idx])) 946d0180171SAndreas Gruenbacher drbd_warn(device, "bitmap page idx %u changed during IO!\n", idx); 94719f843aaSLars Ellenberg 9484e4cbee9SChristoph Hellwig if (bio->bi_status) { 94919f843aaSLars Ellenberg /* ctx error will hold the completed-last non-zero error code, 95019f843aaSLars Ellenberg * in case error codes differ. */ 9514e4cbee9SChristoph Hellwig ctx->error = blk_status_to_errno(bio->bi_status); 95219f843aaSLars Ellenberg bm_set_page_io_err(b->bm_pages[idx]); 95319f843aaSLars Ellenberg /* Not identical to on disk version of it. 95419f843aaSLars Ellenberg * Is BM_PAGE_IO_ERROR enough? */ 955e3fa02d7SChristoph Böhmwalder if (drbd_ratelimit()) 956d0180171SAndreas Gruenbacher drbd_err(device, "IO ERROR %d on bitmap page idx %u\n", 9574e4cbee9SChristoph Hellwig bio->bi_status, idx); 95819f843aaSLars Ellenberg } else { 95919f843aaSLars Ellenberg bm_clear_page_io_err(b->bm_pages[idx]); 960d0180171SAndreas Gruenbacher dynamic_drbd_dbg(device, "bitmap page idx %u completed\n", idx); 961b411b363SPhilipp Reisner } 96219f843aaSLars Ellenberg 963b30ab791SAndreas Gruenbacher bm_page_unlock_io(device, idx); 96419f843aaSLars Ellenberg 96519f843aaSLars Ellenberg if (ctx->flags & BM_AIO_COPY_PAGES) 9660892fac8SKent Overstreet mempool_free(bio->bi_io_vec[0].bv_page, &drbd_md_io_page_pool); 967b411b363SPhilipp Reisner 968b411b363SPhilipp Reisner bio_put(bio); 96919f843aaSLars Ellenberg 970d1f3779bSPhilipp Reisner if (atomic_dec_and_test(&ctx->in_flight)) { 9719e58c4daSPhilipp Reisner ctx->done = 1; 972b30ab791SAndreas Gruenbacher wake_up(&device->misc_wait); 9734ce49266SLars Ellenberg kref_put(&ctx->kref, &drbd_bm_aio_ctx_destroy); 974d1f3779bSPhilipp Reisner } 975b411b363SPhilipp Reisner } 976b411b363SPhilipp Reisner 97766757001SLars Ellenberg /* For the layout, see comment above drbd_md_set_sector_offsets(). */ 97866757001SLars Ellenberg static inline sector_t drbd_md_last_bitmap_sector(struct drbd_backing_dev *bdev) 97966757001SLars Ellenberg { 98066757001SLars Ellenberg switch (bdev->md.meta_dev_idx) { 98166757001SLars Ellenberg case DRBD_MD_INDEX_INTERNAL: 98266757001SLars Ellenberg case DRBD_MD_INDEX_FLEX_INT: 98366757001SLars Ellenberg return bdev->md.md_offset + bdev->md.al_offset -1; 98466757001SLars Ellenberg case DRBD_MD_INDEX_FLEX_EXT: 98566757001SLars Ellenberg default: 98666757001SLars Ellenberg return bdev->md.md_offset + bdev->md.md_size_sect -1; 98766757001SLars Ellenberg } 98866757001SLars Ellenberg } 98966757001SLars Ellenberg 9904ce49266SLars Ellenberg static void bm_page_io_async(struct drbd_bm_aio_ctx *ctx, int page_nr) __must_hold(local) 991b411b363SPhilipp Reisner { 992b30ab791SAndreas Gruenbacher struct drbd_device *device = ctx->device; 9939945172aSBart Van Assche enum req_op op = ctx->flags & BM_AIO_READ ? REQ_OP_READ : REQ_OP_WRITE; 994b30ab791SAndreas Gruenbacher struct drbd_bitmap *b = device->bitmap; 99566757001SLars Ellenberg struct bio *bio; 99619f843aaSLars Ellenberg struct page *page; 99766757001SLars Ellenberg sector_t last_bm_sect; 99866757001SLars Ellenberg sector_t first_bm_sect; 99966757001SLars Ellenberg sector_t on_disk_sector; 1000b411b363SPhilipp Reisner unsigned int len; 100119f843aaSLars Ellenberg 100266757001SLars Ellenberg first_bm_sect = device->ldev->md.md_offset + device->ldev->md.bm_offset; 100366757001SLars Ellenberg on_disk_sector = first_bm_sect + (((sector_t)page_nr) << (PAGE_SHIFT-SECTOR_SHIFT)); 1004b411b363SPhilipp Reisner 1005b411b363SPhilipp Reisner /* this might happen with very small 100619f843aaSLars Ellenberg * flexible external meta data device, 100719f843aaSLars Ellenberg * or with PAGE_SIZE > 4k */ 100866757001SLars Ellenberg last_bm_sect = drbd_md_last_bitmap_sector(device->ldev); 100966757001SLars Ellenberg if (first_bm_sect <= on_disk_sector && last_bm_sect >= on_disk_sector) { 101066757001SLars Ellenberg sector_t len_sect = last_bm_sect - on_disk_sector + 1; 101166757001SLars Ellenberg if (len_sect < PAGE_SIZE/SECTOR_SIZE) 101266757001SLars Ellenberg len = (unsigned int)len_sect*SECTOR_SIZE; 101366757001SLars Ellenberg else 101466757001SLars Ellenberg len = PAGE_SIZE; 101566757001SLars Ellenberg } else { 1016e3fa02d7SChristoph Böhmwalder if (drbd_ratelimit()) { 101766757001SLars Ellenberg drbd_err(device, "Invalid offset during on-disk bitmap access: " 101866757001SLars Ellenberg "page idx %u, sector %llu\n", page_nr, on_disk_sector); 101966757001SLars Ellenberg } 102066757001SLars Ellenberg ctx->error = -EIO; 102166757001SLars Ellenberg bm_set_page_io_err(b->bm_pages[page_nr]); 102266757001SLars Ellenberg if (atomic_dec_and_test(&ctx->in_flight)) { 102366757001SLars Ellenberg ctx->done = 1; 102466757001SLars Ellenberg wake_up(&device->misc_wait); 102566757001SLars Ellenberg kref_put(&ctx->kref, &drbd_bm_aio_ctx_destroy); 102666757001SLars Ellenberg } 102766757001SLars Ellenberg return; 102866757001SLars Ellenberg } 1029b411b363SPhilipp Reisner 103019f843aaSLars Ellenberg /* serialize IO on this page */ 1031b30ab791SAndreas Gruenbacher bm_page_lock_io(device, page_nr); 103219f843aaSLars Ellenberg /* before memcpy and submit, 103319f843aaSLars Ellenberg * so it can be redirtied any time */ 103419f843aaSLars Ellenberg bm_set_page_unchanged(b->bm_pages[page_nr]); 103519f843aaSLars Ellenberg 103619f843aaSLars Ellenberg if (ctx->flags & BM_AIO_COPY_PAGES) { 10370892fac8SKent Overstreet page = mempool_alloc(&drbd_md_io_page_pool, 10380eb0b63cSChristoph Hellwig GFP_NOIO | __GFP_HIGHMEM); 1039f1d6a328SAkinobu Mita copy_highpage(page, b->bm_pages[page_nr]); 104019f843aaSLars Ellenberg bm_store_page_idx(page, page_nr); 104119f843aaSLars Ellenberg } else 104219f843aaSLars Ellenberg page = b->bm_pages[page_nr]; 104366757001SLars Ellenberg bio = bio_alloc_bioset(device->ldev->md_bdev, 1, op, GFP_NOIO, 104466757001SLars Ellenberg &drbd_md_io_bio_set); 10454f024f37SKent Overstreet bio->bi_iter.bi_sector = on_disk_sector; 10464d95a10fSLars Ellenberg /* bio_add_page of a single page to an empty bio will always succeed, 10474d95a10fSLars Ellenberg * according to api. Do we want to assert that? */ 104819f843aaSLars Ellenberg bio_add_page(bio, page, len, 0); 104919f843aaSLars Ellenberg bio->bi_private = ctx; 1050ed15b795SAndreas Gruenbacher bio->bi_end_io = drbd_bm_endio; 1051b411b363SPhilipp Reisner 1052bb3cc85eSMike Christie if (drbd_insert_fault(device, (op == REQ_OP_WRITE) ? DRBD_FAULT_MD_WR : DRBD_FAULT_MD_RD)) { 10534246a0b6SChristoph Hellwig bio_io_error(bio); 1054b411b363SPhilipp Reisner } else { 10554e49ea4aSMike Christie submit_bio(bio); 10565a8b4242SLars Ellenberg /* this should not count as user activity and cause the 10575a8b4242SLars Ellenberg * resync to throttle -- see drbd_rs_should_slow_down(). */ 1058b30ab791SAndreas Gruenbacher atomic_add(len >> 9, &device->rs_sect_ev); 1059b411b363SPhilipp Reisner } 1060b411b363SPhilipp Reisner } 1061b411b363SPhilipp Reisner 1062b411b363SPhilipp Reisner /* 1063b411b363SPhilipp Reisner * bm_rw: read/write the whole bitmap from/to its on disk location. 1064b411b363SPhilipp Reisner */ 10654ce49266SLars Ellenberg static int bm_rw(struct drbd_device *device, const unsigned int flags, unsigned lazy_writeout_upper_idx) __must_hold(local) 1066b411b363SPhilipp Reisner { 10674ce49266SLars Ellenberg struct drbd_bm_aio_ctx *ctx; 1068b30ab791SAndreas Gruenbacher struct drbd_bitmap *b = device->bitmap; 106927ea1d87SLars Ellenberg unsigned int num_pages, i, count = 0; 1070b411b363SPhilipp Reisner unsigned long now; 1071b411b363SPhilipp Reisner char ppb[10]; 1072b411b363SPhilipp Reisner int err = 0; 1073b411b363SPhilipp Reisner 107419f843aaSLars Ellenberg /* 107519f843aaSLars Ellenberg * We are protected against bitmap disappearing/resizing by holding an 107619f843aaSLars Ellenberg * ldev reference (caller must have called get_ldev()). 107719f843aaSLars Ellenberg * For read/write, we are protected against changes to the bitmap by 107819f843aaSLars Ellenberg * the bitmap lock (see drbd_bitmap_io). 107919f843aaSLars Ellenberg * For lazy writeout, we don't care for ongoing changes to the bitmap, 108019f843aaSLars Ellenberg * as we submit copies of pages anyways. 108119f843aaSLars Ellenberg */ 1082d1f3779bSPhilipp Reisner 10834ce49266SLars Ellenberg ctx = kmalloc(sizeof(struct drbd_bm_aio_ctx), GFP_NOIO); 1084d1f3779bSPhilipp Reisner if (!ctx) 1085d1f3779bSPhilipp Reisner return -ENOMEM; 1086d1f3779bSPhilipp Reisner 10874ce49266SLars Ellenberg *ctx = (struct drbd_bm_aio_ctx) { 1088b30ab791SAndreas Gruenbacher .device = device, 10894ce49266SLars Ellenberg .start_jif = jiffies, 1090d1f3779bSPhilipp Reisner .in_flight = ATOMIC_INIT(1), 10919e58c4daSPhilipp Reisner .done = 0, 10920e8488adSLars Ellenberg .flags = flags, 1093d1f3779bSPhilipp Reisner .error = 0, 10941e24edcaSPeter Zijlstra .kref = KREF_INIT(2), 1095d1f3779bSPhilipp Reisner }; 1096d1f3779bSPhilipp Reisner 10974ce49266SLars Ellenberg if (!get_ldev_if_state(device, D_ATTACHING)) { /* put is in drbd_bm_aio_ctx_destroy() */ 1098d0180171SAndreas Gruenbacher drbd_err(device, "ASSERT FAILED: get_ldev_if_state() == 1 in bm_rw()\n"); 10999e58c4daSPhilipp Reisner kfree(ctx); 11009e58c4daSPhilipp Reisner return -ENODEV; 11019e58c4daSPhilipp Reisner } 11028fe39aacSPhilipp Reisner /* Here D_ATTACHING is sufficient since drbd_bm_read() is called only from 11038fe39aacSPhilipp Reisner drbd_adm_attach(), after device->ldev was assigned. */ 11049e58c4daSPhilipp Reisner 11054ce49266SLars Ellenberg if (0 == (ctx->flags & ~BM_AIO_READ)) 110620ceb2b2SLars Ellenberg WARN_ON(!(BM_LOCKED_MASK & b->bm_flags)); 1107b411b363SPhilipp Reisner 11084ce49266SLars Ellenberg spin_lock_irq(&device->resource->req_lock); 11094ce49266SLars Ellenberg list_add_tail(&ctx->list, &device->pending_bitmap_io); 11104ce49266SLars Ellenberg spin_unlock_irq(&device->resource->req_lock); 11114ce49266SLars Ellenberg 11126850c442SLars Ellenberg num_pages = b->bm_number_of_pages; 1113b411b363SPhilipp Reisner 1114b411b363SPhilipp Reisner now = jiffies; 1115b411b363SPhilipp Reisner 1116b411b363SPhilipp Reisner /* let the layers below us try to merge these bios... */ 111727ea1d87SLars Ellenberg 111827ea1d87SLars Ellenberg if (flags & BM_AIO_READ) { 111927ea1d87SLars Ellenberg for (i = 0; i < num_pages; i++) { 112027ea1d87SLars Ellenberg atomic_inc(&ctx->in_flight); 112127ea1d87SLars Ellenberg bm_page_io_async(ctx, i); 112227ea1d87SLars Ellenberg ++count; 112327ea1d87SLars Ellenberg cond_resched(); 112427ea1d87SLars Ellenberg } 112527ea1d87SLars Ellenberg } else if (flags & BM_AIO_WRITE_HINTED) { 112627ea1d87SLars Ellenberg /* ASSERT: BM_AIO_WRITE_ALL_PAGES is not set. */ 112727ea1d87SLars Ellenberg unsigned int hint; 112827ea1d87SLars Ellenberg for (hint = 0; hint < b->n_bitmap_hints; hint++) { 112927ea1d87SLars Ellenberg i = b->al_bitmap_hints[hint]; 113027ea1d87SLars Ellenberg if (i >= num_pages) /* == -1U: no hint here. */ 113127ea1d87SLars Ellenberg continue; 113227ea1d87SLars Ellenberg /* Several AL-extents may point to the same page. */ 113327ea1d87SLars Ellenberg if (!test_and_clear_bit(BM_PAGE_HINT_WRITEOUT, 113427ea1d87SLars Ellenberg &page_private(b->bm_pages[i]))) 113527ea1d87SLars Ellenberg continue; 113627ea1d87SLars Ellenberg /* Has it even changed? */ 113727ea1d87SLars Ellenberg if (bm_test_page_unchanged(b->bm_pages[i])) 113827ea1d87SLars Ellenberg continue; 113927ea1d87SLars Ellenberg atomic_inc(&ctx->in_flight); 114027ea1d87SLars Ellenberg bm_page_io_async(ctx, i); 114127ea1d87SLars Ellenberg ++count; 114227ea1d87SLars Ellenberg } 114327ea1d87SLars Ellenberg } else { 11446850c442SLars Ellenberg for (i = 0; i < num_pages; i++) { 114519f843aaSLars Ellenberg /* ignore completely unchanged pages */ 114619f843aaSLars Ellenberg if (lazy_writeout_upper_idx && i == lazy_writeout_upper_idx) 114719f843aaSLars Ellenberg break; 11484ce49266SLars Ellenberg if (!(flags & BM_AIO_WRITE_ALL_PAGES) && 1149d1aa4d04SPhilipp Reisner bm_test_page_unchanged(b->bm_pages[i])) { 1150d0180171SAndreas Gruenbacher dynamic_drbd_dbg(device, "skipped bm write for idx %u\n", i); 115119f843aaSLars Ellenberg continue; 115219f843aaSLars Ellenberg } 115319f843aaSLars Ellenberg /* during lazy writeout, 115419f843aaSLars Ellenberg * ignore those pages not marked for lazy writeout. */ 115519f843aaSLars Ellenberg if (lazy_writeout_upper_idx && 115619f843aaSLars Ellenberg !bm_test_page_lazy_writeout(b->bm_pages[i])) { 1157d0180171SAndreas Gruenbacher dynamic_drbd_dbg(device, "skipped bm lazy write for idx %u\n", i); 115819f843aaSLars Ellenberg continue; 115919f843aaSLars Ellenberg } 1160d1f3779bSPhilipp Reisner atomic_inc(&ctx->in_flight); 11614ce49266SLars Ellenberg bm_page_io_async(ctx, i); 116219f843aaSLars Ellenberg ++count; 116319f843aaSLars Ellenberg cond_resched(); 116419f843aaSLars Ellenberg } 116527ea1d87SLars Ellenberg } 1166b411b363SPhilipp Reisner 1167725a97e4SLars Ellenberg /* 1168ed15b795SAndreas Gruenbacher * We initialize ctx->in_flight to one to make sure drbd_bm_endio 11699e58c4daSPhilipp Reisner * will not set ctx->done early, and decrement / test it here. If there 1170725a97e4SLars Ellenberg * are still some bios in flight, we need to wait for them here. 11719e58c4daSPhilipp Reisner * If all IO is done already (or nothing had been submitted), there is 11729e58c4daSPhilipp Reisner * no need to wait. Still, we need to put the kref associated with the 11739e58c4daSPhilipp Reisner * "in_flight reached zero, all done" event. 1174725a97e4SLars Ellenberg */ 1175d1f3779bSPhilipp Reisner if (!atomic_dec_and_test(&ctx->in_flight)) 1176b30ab791SAndreas Gruenbacher wait_until_done_or_force_detached(device, device->ldev, &ctx->done); 11779e58c4daSPhilipp Reisner else 11784ce49266SLars Ellenberg kref_put(&ctx->kref, &drbd_bm_aio_ctx_destroy); 1179d1f3779bSPhilipp Reisner 1180c9d963a4SLars Ellenberg /* summary for global bitmap IO */ 118113c2088dSLars Ellenberg if (flags == 0) { 118213c2088dSLars Ellenberg unsigned int ms = jiffies_to_msecs(jiffies - now); 118313c2088dSLars Ellenberg if (ms > 5) { 118413c2088dSLars Ellenberg drbd_info(device, "bitmap %s of %u pages took %u ms\n", 11854ce49266SLars Ellenberg (flags & BM_AIO_READ) ? "READ" : "WRITE", 118613c2088dSLars Ellenberg count, ms); 118713c2088dSLars Ellenberg } 118813c2088dSLars Ellenberg } 1189b411b363SPhilipp Reisner 1190d1f3779bSPhilipp Reisner if (ctx->error) { 1191d0180171SAndreas Gruenbacher drbd_alert(device, "we had at least one MD IO ERROR during bitmap IO\n"); 1192b30ab791SAndreas Gruenbacher drbd_chk_io_error(device, 1, DRBD_META_IO_ERROR); 1193d1f3779bSPhilipp Reisner err = -EIO; /* ctx->error ? */ 1194b411b363SPhilipp Reisner } 1195b411b363SPhilipp Reisner 11969e58c4daSPhilipp Reisner if (atomic_read(&ctx->in_flight)) 119744edfb0dSLars Ellenberg err = -EIO; /* Disk timeout/force-detach during IO... */ 11989e58c4daSPhilipp Reisner 1199b411b363SPhilipp Reisner now = jiffies; 12004ce49266SLars Ellenberg if (flags & BM_AIO_READ) { 120195a0f10cSLars Ellenberg b->bm_set = bm_count_bits(b); 1202d0180171SAndreas Gruenbacher drbd_info(device, "recounting of set bits took additional %lu jiffies\n", 1203b411b363SPhilipp Reisner jiffies - now); 1204b411b363SPhilipp Reisner } 1205b411b363SPhilipp Reisner now = b->bm_set; 1206b411b363SPhilipp Reisner 12074ce49266SLars Ellenberg if ((flags & ~BM_AIO_READ) == 0) 1208d0180171SAndreas Gruenbacher drbd_info(device, "%s (%lu bits) marked out-of-sync by on disk bit-map.\n", 1209b411b363SPhilipp Reisner ppsize(ppb, now << (BM_BLOCK_SHIFT-10)), now); 1210b411b363SPhilipp Reisner 12114ce49266SLars Ellenberg kref_put(&ctx->kref, &drbd_bm_aio_ctx_destroy); 1212b411b363SPhilipp Reisner return err; 1213b411b363SPhilipp Reisner } 1214b411b363SPhilipp Reisner 1215b411b363SPhilipp Reisner /** 1216b411b363SPhilipp Reisner * drbd_bm_read() - Read the whole bitmap from its on disk location. 1217b30ab791SAndreas Gruenbacher * @device: DRBD device. 1218b411b363SPhilipp Reisner */ 1219*8164dd6cSAndreas Gruenbacher int drbd_bm_read(struct drbd_device *device, 1220*8164dd6cSAndreas Gruenbacher struct drbd_peer_device *peer_device) __must_hold(local) 1221*8164dd6cSAndreas Gruenbacher 1222b411b363SPhilipp Reisner { 12234ce49266SLars Ellenberg return bm_rw(device, BM_AIO_READ, 0); 1224b411b363SPhilipp Reisner } 1225b411b363SPhilipp Reisner 1226b411b363SPhilipp Reisner /** 1227b411b363SPhilipp Reisner * drbd_bm_write() - Write the whole bitmap to its on disk location. 1228b30ab791SAndreas Gruenbacher * @device: DRBD device. 122919f843aaSLars Ellenberg * 123019f843aaSLars Ellenberg * Will only write pages that have changed since last IO. 1231b411b363SPhilipp Reisner */ 1232*8164dd6cSAndreas Gruenbacher int drbd_bm_write(struct drbd_device *device, 1233*8164dd6cSAndreas Gruenbacher struct drbd_peer_device *peer_device) __must_hold(local) 1234b411b363SPhilipp Reisner { 12354ce49266SLars Ellenberg return bm_rw(device, 0, 0); 1236b411b363SPhilipp Reisner } 1237b411b363SPhilipp Reisner 1238b411b363SPhilipp Reisner /** 1239d1aa4d04SPhilipp Reisner * drbd_bm_write_all() - Write the whole bitmap to its on disk location. 1240b30ab791SAndreas Gruenbacher * @device: DRBD device. 1241d1aa4d04SPhilipp Reisner * 1242d1aa4d04SPhilipp Reisner * Will write all pages. 1243d1aa4d04SPhilipp Reisner */ 1244*8164dd6cSAndreas Gruenbacher int drbd_bm_write_all(struct drbd_device *device, 1245*8164dd6cSAndreas Gruenbacher struct drbd_peer_device *peer_device) __must_hold(local) 1246d1aa4d04SPhilipp Reisner { 12474ce49266SLars Ellenberg return bm_rw(device, BM_AIO_WRITE_ALL_PAGES, 0); 1248d1aa4d04SPhilipp Reisner } 1249d1aa4d04SPhilipp Reisner 1250d1aa4d04SPhilipp Reisner /** 1251c7a58db4SLars Ellenberg * drbd_bm_write_lazy() - Write bitmap pages 0 to @upper_idx-1, if they have changed. 1252c7a58db4SLars Ellenberg * @device: DRBD device. 1253c7a58db4SLars Ellenberg * @upper_idx: 0: write all changed pages; +ve: page index to stop scanning for changed pages 1254c7a58db4SLars Ellenberg */ 1255c7a58db4SLars Ellenberg int drbd_bm_write_lazy(struct drbd_device *device, unsigned upper_idx) __must_hold(local) 1256c7a58db4SLars Ellenberg { 1257c7a58db4SLars Ellenberg return bm_rw(device, BM_AIO_COPY_PAGES, upper_idx); 1258c7a58db4SLars Ellenberg } 1259c7a58db4SLars Ellenberg 1260c7a58db4SLars Ellenberg /** 12610e8488adSLars Ellenberg * drbd_bm_write_copy_pages() - Write the whole bitmap to its on disk location. 1262b30ab791SAndreas Gruenbacher * @device: DRBD device. 12630e8488adSLars Ellenberg * 12640e8488adSLars Ellenberg * Will only write pages that have changed since last IO. 12650e8488adSLars Ellenberg * In contrast to drbd_bm_write(), this will copy the bitmap pages 12660e8488adSLars Ellenberg * to temporary writeout pages. It is intended to trigger a full write-out 12670e8488adSLars Ellenberg * while still allowing the bitmap to change, for example if a resync or online 12680e8488adSLars Ellenberg * verify is aborted due to a failed peer disk, while local IO continues, or 12690e8488adSLars Ellenberg * pending resync acks are still being processed. 12700e8488adSLars Ellenberg */ 1271*8164dd6cSAndreas Gruenbacher int drbd_bm_write_copy_pages(struct drbd_device *device, 1272*8164dd6cSAndreas Gruenbacher struct drbd_peer_device *peer_device) __must_hold(local) 12730e8488adSLars Ellenberg { 12744ce49266SLars Ellenberg return bm_rw(device, BM_AIO_COPY_PAGES, 0); 1275b411b363SPhilipp Reisner } 127619f843aaSLars Ellenberg 1277a220d291SLars Ellenberg /** 127845dfffebSLars Ellenberg * drbd_bm_write_hinted() - Write bitmap pages with "hint" marks, if they have changed. 1279b30ab791SAndreas Gruenbacher * @device: DRBD device. 128045dfffebSLars Ellenberg */ 1281b30ab791SAndreas Gruenbacher int drbd_bm_write_hinted(struct drbd_device *device) __must_hold(local) 128245dfffebSLars Ellenberg { 12834ce49266SLars Ellenberg return bm_rw(device, BM_AIO_WRITE_HINTED | BM_AIO_COPY_PAGES, 0); 128445dfffebSLars Ellenberg } 128519f843aaSLars Ellenberg 1286b411b363SPhilipp Reisner /* NOTE 1287b411b363SPhilipp Reisner * find_first_bit returns int, we return unsigned long. 12884b0715f0SLars Ellenberg * For this to work on 32bit arch with bitnumbers > (1<<32), 12894b0715f0SLars Ellenberg * we'd need to return u64, and get a whole lot of other places 12904b0715f0SLars Ellenberg * fixed where we still use unsigned long. 1291b411b363SPhilipp Reisner * 1292b411b363SPhilipp Reisner * this returns a bit number, NOT a sector! 1293b411b363SPhilipp Reisner */ 1294b30ab791SAndreas Gruenbacher static unsigned long __bm_find_next(struct drbd_device *device, unsigned long bm_fo, 1295cfd8005cSCong Wang const int find_zero_bit) 1296b411b363SPhilipp Reisner { 1297b30ab791SAndreas Gruenbacher struct drbd_bitmap *b = device->bitmap; 1298b411b363SPhilipp Reisner unsigned long *p_addr; 12994b0715f0SLars Ellenberg unsigned long bit_offset; 13004b0715f0SLars Ellenberg unsigned i; 13014b0715f0SLars Ellenberg 1302b411b363SPhilipp Reisner 1303b411b363SPhilipp Reisner if (bm_fo > b->bm_bits) { 1304d0180171SAndreas Gruenbacher drbd_err(device, "bm_fo=%lu bm_bits=%lu\n", bm_fo, b->bm_bits); 13054b0715f0SLars Ellenberg bm_fo = DRBD_END_OF_BITMAP; 1306b411b363SPhilipp Reisner } else { 1307b411b363SPhilipp Reisner while (bm_fo < b->bm_bits) { 130819f843aaSLars Ellenberg /* bit offset of the first bit in the page */ 13094b0715f0SLars Ellenberg bit_offset = bm_fo & ~BITS_PER_PAGE_MASK; 1310cfd8005cSCong Wang p_addr = __bm_map_pidx(b, bm_bit_to_page_idx(b, bm_fo)); 1311b411b363SPhilipp Reisner 1312b411b363SPhilipp Reisner if (find_zero_bit) 13137e599e6eSLinus Torvalds i = find_next_zero_bit_le(p_addr, 13144b0715f0SLars Ellenberg PAGE_SIZE*8, bm_fo & BITS_PER_PAGE_MASK); 1315b411b363SPhilipp Reisner else 13167e599e6eSLinus Torvalds i = find_next_bit_le(p_addr, 13174b0715f0SLars Ellenberg PAGE_SIZE*8, bm_fo & BITS_PER_PAGE_MASK); 1318b411b363SPhilipp Reisner 1319cfd8005cSCong Wang __bm_unmap(p_addr); 1320b411b363SPhilipp Reisner if (i < PAGE_SIZE*8) { 13214b0715f0SLars Ellenberg bm_fo = bit_offset + i; 13224b0715f0SLars Ellenberg if (bm_fo >= b->bm_bits) 1323b411b363SPhilipp Reisner break; 1324b411b363SPhilipp Reisner goto found; 1325b411b363SPhilipp Reisner } 1326b411b363SPhilipp Reisner bm_fo = bit_offset + PAGE_SIZE*8; 1327b411b363SPhilipp Reisner } 13284b0715f0SLars Ellenberg bm_fo = DRBD_END_OF_BITMAP; 1329b411b363SPhilipp Reisner } 1330b411b363SPhilipp Reisner found: 13314b0715f0SLars Ellenberg return bm_fo; 1332b411b363SPhilipp Reisner } 1333b411b363SPhilipp Reisner 1334b30ab791SAndreas Gruenbacher static unsigned long bm_find_next(struct drbd_device *device, 1335b411b363SPhilipp Reisner unsigned long bm_fo, const int find_zero_bit) 1336b411b363SPhilipp Reisner { 1337b30ab791SAndreas Gruenbacher struct drbd_bitmap *b = device->bitmap; 13384b0715f0SLars Ellenberg unsigned long i = DRBD_END_OF_BITMAP; 1339b411b363SPhilipp Reisner 1340677b3672SChristoph Böhmwalder if (!expect(device, b)) 1341841ce241SAndreas Gruenbacher return i; 1342677b3672SChristoph Böhmwalder if (!expect(device, b->bm_pages)) 1343841ce241SAndreas Gruenbacher return i; 1344b411b363SPhilipp Reisner 1345b411b363SPhilipp Reisner spin_lock_irq(&b->bm_lock); 134620ceb2b2SLars Ellenberg if (BM_DONT_TEST & b->bm_flags) 1347b30ab791SAndreas Gruenbacher bm_print_lock_info(device); 1348b411b363SPhilipp Reisner 1349b30ab791SAndreas Gruenbacher i = __bm_find_next(device, bm_fo, find_zero_bit); 1350b411b363SPhilipp Reisner 1351b411b363SPhilipp Reisner spin_unlock_irq(&b->bm_lock); 1352b411b363SPhilipp Reisner return i; 1353b411b363SPhilipp Reisner } 1354b411b363SPhilipp Reisner 1355b30ab791SAndreas Gruenbacher unsigned long drbd_bm_find_next(struct drbd_device *device, unsigned long bm_fo) 1356b411b363SPhilipp Reisner { 1357b30ab791SAndreas Gruenbacher return bm_find_next(device, bm_fo, 0); 1358b411b363SPhilipp Reisner } 1359b411b363SPhilipp Reisner 1360b411b363SPhilipp Reisner #if 0 1361b411b363SPhilipp Reisner /* not yet needed for anything. */ 1362b30ab791SAndreas Gruenbacher unsigned long drbd_bm_find_next_zero(struct drbd_device *device, unsigned long bm_fo) 1363b411b363SPhilipp Reisner { 1364b30ab791SAndreas Gruenbacher return bm_find_next(device, bm_fo, 1); 1365b411b363SPhilipp Reisner } 1366b411b363SPhilipp Reisner #endif 1367b411b363SPhilipp Reisner 1368b411b363SPhilipp Reisner /* does not spin_lock_irqsave. 1369b411b363SPhilipp Reisner * you must take drbd_bm_lock() first */ 1370b30ab791SAndreas Gruenbacher unsigned long _drbd_bm_find_next(struct drbd_device *device, unsigned long bm_fo) 1371b411b363SPhilipp Reisner { 1372b30ab791SAndreas Gruenbacher /* WARN_ON(!(BM_DONT_SET & device->b->bm_flags)); */ 1373b30ab791SAndreas Gruenbacher return __bm_find_next(device, bm_fo, 0); 1374b411b363SPhilipp Reisner } 1375b411b363SPhilipp Reisner 1376b30ab791SAndreas Gruenbacher unsigned long _drbd_bm_find_next_zero(struct drbd_device *device, unsigned long bm_fo) 1377b411b363SPhilipp Reisner { 1378b30ab791SAndreas Gruenbacher /* WARN_ON(!(BM_DONT_SET & device->b->bm_flags)); */ 1379b30ab791SAndreas Gruenbacher return __bm_find_next(device, bm_fo, 1); 1380b411b363SPhilipp Reisner } 1381b411b363SPhilipp Reisner 1382b411b363SPhilipp Reisner /* returns number of bits actually changed. 1383b411b363SPhilipp Reisner * for val != 0, we change 0 -> 1, return code positive 1384b411b363SPhilipp Reisner * for val == 0, we change 1 -> 0, return code negative 1385b411b363SPhilipp Reisner * wants bitnr, not sector. 1386b411b363SPhilipp Reisner * expected to be called for only a few bits (e - s about BITS_PER_LONG). 1387b411b363SPhilipp Reisner * Must hold bitmap lock already. */ 1388b30ab791SAndreas Gruenbacher static int __bm_change_bits_to(struct drbd_device *device, const unsigned long s, 1389829c6087SLars Ellenberg unsigned long e, int val) 1390b411b363SPhilipp Reisner { 1391b30ab791SAndreas Gruenbacher struct drbd_bitmap *b = device->bitmap; 1392b411b363SPhilipp Reisner unsigned long *p_addr = NULL; 1393b411b363SPhilipp Reisner unsigned long bitnr; 139419f843aaSLars Ellenberg unsigned int last_page_nr = -1U; 1395b411b363SPhilipp Reisner int c = 0; 139619f843aaSLars Ellenberg int changed_total = 0; 1397b411b363SPhilipp Reisner 1398b411b363SPhilipp Reisner if (e >= b->bm_bits) { 1399d0180171SAndreas Gruenbacher drbd_err(device, "ASSERT FAILED: bit_s=%lu bit_e=%lu bm_bits=%lu\n", 1400b411b363SPhilipp Reisner s, e, b->bm_bits); 1401b411b363SPhilipp Reisner e = b->bm_bits ? b->bm_bits -1 : 0; 1402b411b363SPhilipp Reisner } 1403b411b363SPhilipp Reisner for (bitnr = s; bitnr <= e; bitnr++) { 140419f843aaSLars Ellenberg unsigned int page_nr = bm_bit_to_page_idx(b, bitnr); 1405b411b363SPhilipp Reisner if (page_nr != last_page_nr) { 1406b411b363SPhilipp Reisner if (p_addr) 1407cfd8005cSCong Wang __bm_unmap(p_addr); 140819f843aaSLars Ellenberg if (c < 0) 140919f843aaSLars Ellenberg bm_set_page_lazy_writeout(b->bm_pages[last_page_nr]); 141019f843aaSLars Ellenberg else if (c > 0) 141119f843aaSLars Ellenberg bm_set_page_need_writeout(b->bm_pages[last_page_nr]); 141219f843aaSLars Ellenberg changed_total += c; 141319f843aaSLars Ellenberg c = 0; 1414cfd8005cSCong Wang p_addr = __bm_map_pidx(b, page_nr); 1415b411b363SPhilipp Reisner last_page_nr = page_nr; 1416b411b363SPhilipp Reisner } 1417b411b363SPhilipp Reisner if (val) 14187e599e6eSLinus Torvalds c += (0 == __test_and_set_bit_le(bitnr & BITS_PER_PAGE_MASK, p_addr)); 1419b411b363SPhilipp Reisner else 14207e599e6eSLinus Torvalds c -= (0 != __test_and_clear_bit_le(bitnr & BITS_PER_PAGE_MASK, p_addr)); 1421b411b363SPhilipp Reisner } 1422b411b363SPhilipp Reisner if (p_addr) 1423cfd8005cSCong Wang __bm_unmap(p_addr); 142419f843aaSLars Ellenberg if (c < 0) 142519f843aaSLars Ellenberg bm_set_page_lazy_writeout(b->bm_pages[last_page_nr]); 142619f843aaSLars Ellenberg else if (c > 0) 142719f843aaSLars Ellenberg bm_set_page_need_writeout(b->bm_pages[last_page_nr]); 142819f843aaSLars Ellenberg changed_total += c; 142919f843aaSLars Ellenberg b->bm_set += changed_total; 143019f843aaSLars Ellenberg return changed_total; 1431b411b363SPhilipp Reisner } 1432b411b363SPhilipp Reisner 1433b411b363SPhilipp Reisner /* returns number of bits actually changed. 1434b411b363SPhilipp Reisner * for val != 0, we change 0 -> 1, return code positive 1435b411b363SPhilipp Reisner * for val == 0, we change 1 -> 0, return code negative 1436b411b363SPhilipp Reisner * wants bitnr, not sector */ 1437b30ab791SAndreas Gruenbacher static int bm_change_bits_to(struct drbd_device *device, const unsigned long s, 1438b411b363SPhilipp Reisner const unsigned long e, int val) 1439b411b363SPhilipp Reisner { 1440b411b363SPhilipp Reisner unsigned long flags; 1441b30ab791SAndreas Gruenbacher struct drbd_bitmap *b = device->bitmap; 1442b411b363SPhilipp Reisner int c = 0; 1443b411b363SPhilipp Reisner 1444677b3672SChristoph Böhmwalder if (!expect(device, b)) 1445841ce241SAndreas Gruenbacher return 1; 1446677b3672SChristoph Böhmwalder if (!expect(device, b->bm_pages)) 1447841ce241SAndreas Gruenbacher return 0; 1448b411b363SPhilipp Reisner 1449b411b363SPhilipp Reisner spin_lock_irqsave(&b->bm_lock, flags); 145020ceb2b2SLars Ellenberg if ((val ? BM_DONT_SET : BM_DONT_CLEAR) & b->bm_flags) 1451b30ab791SAndreas Gruenbacher bm_print_lock_info(device); 1452b411b363SPhilipp Reisner 1453b30ab791SAndreas Gruenbacher c = __bm_change_bits_to(device, s, e, val); 1454b411b363SPhilipp Reisner 1455b411b363SPhilipp Reisner spin_unlock_irqrestore(&b->bm_lock, flags); 1456b411b363SPhilipp Reisner return c; 1457b411b363SPhilipp Reisner } 1458b411b363SPhilipp Reisner 1459b411b363SPhilipp Reisner /* returns number of bits changed 0 -> 1 */ 1460b30ab791SAndreas Gruenbacher int drbd_bm_set_bits(struct drbd_device *device, const unsigned long s, const unsigned long e) 1461b411b363SPhilipp Reisner { 1462b30ab791SAndreas Gruenbacher return bm_change_bits_to(device, s, e, 1); 1463b411b363SPhilipp Reisner } 1464b411b363SPhilipp Reisner 1465b411b363SPhilipp Reisner /* returns number of bits changed 1 -> 0 */ 1466b30ab791SAndreas Gruenbacher int drbd_bm_clear_bits(struct drbd_device *device, const unsigned long s, const unsigned long e) 1467b411b363SPhilipp Reisner { 1468b30ab791SAndreas Gruenbacher return -bm_change_bits_to(device, s, e, 0); 1469b411b363SPhilipp Reisner } 1470b411b363SPhilipp Reisner 1471b411b363SPhilipp Reisner /* sets all bits in full words, 1472b411b363SPhilipp Reisner * from first_word up to, but not including, last_word */ 1473b411b363SPhilipp Reisner static inline void bm_set_full_words_within_one_page(struct drbd_bitmap *b, 1474b411b363SPhilipp Reisner int page_nr, int first_word, int last_word) 1475b411b363SPhilipp Reisner { 1476b411b363SPhilipp Reisner int i; 1477b411b363SPhilipp Reisner int bits; 147822d81140SLars Ellenberg int changed = 0; 1479cfd8005cSCong Wang unsigned long *paddr = kmap_atomic(b->bm_pages[page_nr]); 14805fb3bc4dSLars Ellenberg 14815fb3bc4dSLars Ellenberg /* I think it is more cache line friendly to hweight_long then set to ~0UL, 14825fb3bc4dSLars Ellenberg * than to first bitmap_weight() all words, then bitmap_fill() all words */ 1483b411b363SPhilipp Reisner for (i = first_word; i < last_word; i++) { 1484b411b363SPhilipp Reisner bits = hweight_long(paddr[i]); 1485b411b363SPhilipp Reisner paddr[i] = ~0UL; 148622d81140SLars Ellenberg changed += BITS_PER_LONG - bits; 1487b411b363SPhilipp Reisner } 1488cfd8005cSCong Wang kunmap_atomic(paddr); 148922d81140SLars Ellenberg if (changed) { 149022d81140SLars Ellenberg /* We only need lazy writeout, the information is still in the 149122d81140SLars Ellenberg * remote bitmap as well, and is reconstructed during the next 149222d81140SLars Ellenberg * bitmap exchange, if lost locally due to a crash. */ 149322d81140SLars Ellenberg bm_set_page_lazy_writeout(b->bm_pages[page_nr]); 149422d81140SLars Ellenberg b->bm_set += changed; 149522d81140SLars Ellenberg } 1496b411b363SPhilipp Reisner } 1497b411b363SPhilipp Reisner 1498829c6087SLars Ellenberg /* Same thing as drbd_bm_set_bits, 1499829c6087SLars Ellenberg * but more efficient for a large bit range. 1500b411b363SPhilipp Reisner * You must first drbd_bm_lock(). 1501b411b363SPhilipp Reisner * Can be called to set the whole bitmap in one go. 1502b411b363SPhilipp Reisner * Sets bits from s to e _inclusive_. */ 1503b30ab791SAndreas Gruenbacher void _drbd_bm_set_bits(struct drbd_device *device, const unsigned long s, const unsigned long e) 1504b411b363SPhilipp Reisner { 1505b411b363SPhilipp Reisner /* First set_bit from the first bit (s) 1506b411b363SPhilipp Reisner * up to the next long boundary (sl), 1507b411b363SPhilipp Reisner * then assign full words up to the last long boundary (el), 1508b411b363SPhilipp Reisner * then set_bit up to and including the last bit (e). 1509b411b363SPhilipp Reisner * 1510b411b363SPhilipp Reisner * Do not use memset, because we must account for changes, 1511b411b363SPhilipp Reisner * so we need to loop over the words with hweight() anyways. 1512b411b363SPhilipp Reisner */ 1513b30ab791SAndreas Gruenbacher struct drbd_bitmap *b = device->bitmap; 1514b411b363SPhilipp Reisner unsigned long sl = ALIGN(s,BITS_PER_LONG); 1515b411b363SPhilipp Reisner unsigned long el = (e+1) & ~((unsigned long)BITS_PER_LONG-1); 1516b411b363SPhilipp Reisner int first_page; 1517b411b363SPhilipp Reisner int last_page; 1518b411b363SPhilipp Reisner int page_nr; 1519b411b363SPhilipp Reisner int first_word; 1520b411b363SPhilipp Reisner int last_word; 1521b411b363SPhilipp Reisner 1522b411b363SPhilipp Reisner if (e - s <= 3*BITS_PER_LONG) { 1523b411b363SPhilipp Reisner /* don't bother; el and sl may even be wrong. */ 1524829c6087SLars Ellenberg spin_lock_irq(&b->bm_lock); 1525b30ab791SAndreas Gruenbacher __bm_change_bits_to(device, s, e, 1); 1526829c6087SLars Ellenberg spin_unlock_irq(&b->bm_lock); 1527b411b363SPhilipp Reisner return; 1528b411b363SPhilipp Reisner } 1529b411b363SPhilipp Reisner 1530b411b363SPhilipp Reisner /* difference is large enough that we can trust sl and el */ 1531b411b363SPhilipp Reisner 1532829c6087SLars Ellenberg spin_lock_irq(&b->bm_lock); 1533829c6087SLars Ellenberg 1534b411b363SPhilipp Reisner /* bits filling the current long */ 1535b411b363SPhilipp Reisner if (sl) 1536b30ab791SAndreas Gruenbacher __bm_change_bits_to(device, s, sl-1, 1); 1537b411b363SPhilipp Reisner 1538b411b363SPhilipp Reisner first_page = sl >> (3 + PAGE_SHIFT); 1539b411b363SPhilipp Reisner last_page = el >> (3 + PAGE_SHIFT); 1540b411b363SPhilipp Reisner 1541b411b363SPhilipp Reisner /* MLPP: modulo longs per page */ 1542b411b363SPhilipp Reisner /* LWPP: long words per page */ 1543b411b363SPhilipp Reisner first_word = MLPP(sl >> LN2_BPL); 1544b411b363SPhilipp Reisner last_word = LWPP; 1545b411b363SPhilipp Reisner 1546b411b363SPhilipp Reisner /* first and full pages, unless first page == last page */ 1547b411b363SPhilipp Reisner for (page_nr = first_page; page_nr < last_page; page_nr++) { 1548b30ab791SAndreas Gruenbacher bm_set_full_words_within_one_page(device->bitmap, page_nr, first_word, last_word); 15498ccee20eSLars Ellenberg spin_unlock_irq(&b->bm_lock); 15508ccee20eSLars Ellenberg cond_resched(); 1551b411b363SPhilipp Reisner first_word = 0; 15528ccee20eSLars Ellenberg spin_lock_irq(&b->bm_lock); 1553b411b363SPhilipp Reisner } 1554b411b363SPhilipp Reisner /* last page (respectively only page, for first page == last page) */ 1555b411b363SPhilipp Reisner last_word = MLPP(el >> LN2_BPL); 15564eccc579SLars Ellenberg 15574eccc579SLars Ellenberg /* consider bitmap->bm_bits = 32768, bitmap->bm_number_of_pages = 1. (or multiples). 15584eccc579SLars Ellenberg * ==> e = 32767, el = 32768, last_page = 2, 15594eccc579SLars Ellenberg * and now last_word = 0. 15604eccc579SLars Ellenberg * We do not want to touch last_page in this case, 15614eccc579SLars Ellenberg * as we did not allocate it, it is not present in bitmap->bm_pages. 15624eccc579SLars Ellenberg */ 15634eccc579SLars Ellenberg if (last_word) 1564b30ab791SAndreas Gruenbacher bm_set_full_words_within_one_page(device->bitmap, last_page, first_word, last_word); 1565b411b363SPhilipp Reisner 1566b411b363SPhilipp Reisner /* possibly trailing bits. 1567b411b363SPhilipp Reisner * example: (e & 63) == 63, el will be e+1. 1568b411b363SPhilipp Reisner * if that even was the very last bit, 1569b411b363SPhilipp Reisner * it would trigger an assert in __bm_change_bits_to() 1570b411b363SPhilipp Reisner */ 1571b411b363SPhilipp Reisner if (el <= e) 1572b30ab791SAndreas Gruenbacher __bm_change_bits_to(device, el, e, 1); 1573829c6087SLars Ellenberg spin_unlock_irq(&b->bm_lock); 1574b411b363SPhilipp Reisner } 1575b411b363SPhilipp Reisner 1576b411b363SPhilipp Reisner /* returns bit state 1577b411b363SPhilipp Reisner * wants bitnr, NOT sector. 1578b411b363SPhilipp Reisner * inherently racy... area needs to be locked by means of {al,rs}_lru 1579b411b363SPhilipp Reisner * 1 ... bit set 1580b411b363SPhilipp Reisner * 0 ... bit not set 1581b411b363SPhilipp Reisner * -1 ... first out of bounds access, stop testing for bits! 1582b411b363SPhilipp Reisner */ 1583b30ab791SAndreas Gruenbacher int drbd_bm_test_bit(struct drbd_device *device, const unsigned long bitnr) 1584b411b363SPhilipp Reisner { 1585b411b363SPhilipp Reisner unsigned long flags; 1586b30ab791SAndreas Gruenbacher struct drbd_bitmap *b = device->bitmap; 1587b411b363SPhilipp Reisner unsigned long *p_addr; 1588b411b363SPhilipp Reisner int i; 1589b411b363SPhilipp Reisner 1590677b3672SChristoph Böhmwalder if (!expect(device, b)) 1591841ce241SAndreas Gruenbacher return 0; 1592677b3672SChristoph Böhmwalder if (!expect(device, b->bm_pages)) 1593841ce241SAndreas Gruenbacher return 0; 1594b411b363SPhilipp Reisner 1595b411b363SPhilipp Reisner spin_lock_irqsave(&b->bm_lock, flags); 159620ceb2b2SLars Ellenberg if (BM_DONT_TEST & b->bm_flags) 1597b30ab791SAndreas Gruenbacher bm_print_lock_info(device); 1598b411b363SPhilipp Reisner if (bitnr < b->bm_bits) { 159919f843aaSLars Ellenberg p_addr = bm_map_pidx(b, bm_bit_to_page_idx(b, bitnr)); 16007e599e6eSLinus Torvalds i = test_bit_le(bitnr & BITS_PER_PAGE_MASK, p_addr) ? 1 : 0; 1601b411b363SPhilipp Reisner bm_unmap(p_addr); 1602b411b363SPhilipp Reisner } else if (bitnr == b->bm_bits) { 1603b411b363SPhilipp Reisner i = -1; 1604b411b363SPhilipp Reisner } else { /* (bitnr > b->bm_bits) */ 1605d0180171SAndreas Gruenbacher drbd_err(device, "bitnr=%lu > bm_bits=%lu\n", bitnr, b->bm_bits); 1606b411b363SPhilipp Reisner i = 0; 1607b411b363SPhilipp Reisner } 1608b411b363SPhilipp Reisner 1609b411b363SPhilipp Reisner spin_unlock_irqrestore(&b->bm_lock, flags); 1610b411b363SPhilipp Reisner return i; 1611b411b363SPhilipp Reisner } 1612b411b363SPhilipp Reisner 1613b411b363SPhilipp Reisner /* returns number of bits set in the range [s, e] */ 1614b30ab791SAndreas Gruenbacher int drbd_bm_count_bits(struct drbd_device *device, const unsigned long s, const unsigned long e) 1615b411b363SPhilipp Reisner { 1616b411b363SPhilipp Reisner unsigned long flags; 1617b30ab791SAndreas Gruenbacher struct drbd_bitmap *b = device->bitmap; 161819f843aaSLars Ellenberg unsigned long *p_addr = NULL; 1619b411b363SPhilipp Reisner unsigned long bitnr; 162019f843aaSLars Ellenberg unsigned int page_nr = -1U; 1621b411b363SPhilipp Reisner int c = 0; 1622b411b363SPhilipp Reisner 1623b411b363SPhilipp Reisner /* If this is called without a bitmap, that is a bug. But just to be 1624b411b363SPhilipp Reisner * robust in case we screwed up elsewhere, in that case pretend there 1625b411b363SPhilipp Reisner * was one dirty bit in the requested area, so we won't try to do a 1626b411b363SPhilipp Reisner * local read there (no bitmap probably implies no disk) */ 1627677b3672SChristoph Böhmwalder if (!expect(device, b)) 1628841ce241SAndreas Gruenbacher return 1; 1629677b3672SChristoph Böhmwalder if (!expect(device, b->bm_pages)) 1630841ce241SAndreas Gruenbacher return 1; 1631b411b363SPhilipp Reisner 1632b411b363SPhilipp Reisner spin_lock_irqsave(&b->bm_lock, flags); 163320ceb2b2SLars Ellenberg if (BM_DONT_TEST & b->bm_flags) 1634b30ab791SAndreas Gruenbacher bm_print_lock_info(device); 1635b411b363SPhilipp Reisner for (bitnr = s; bitnr <= e; bitnr++) { 163619f843aaSLars Ellenberg unsigned int idx = bm_bit_to_page_idx(b, bitnr); 163719f843aaSLars Ellenberg if (page_nr != idx) { 163819f843aaSLars Ellenberg page_nr = idx; 1639b411b363SPhilipp Reisner if (p_addr) 1640b411b363SPhilipp Reisner bm_unmap(p_addr); 164119f843aaSLars Ellenberg p_addr = bm_map_pidx(b, idx); 1642b411b363SPhilipp Reisner } 1643677b3672SChristoph Böhmwalder if (expect(device, bitnr < b->bm_bits)) 16447e599e6eSLinus Torvalds c += (0 != test_bit_le(bitnr - (page_nr << (PAGE_SHIFT+3)), p_addr)); 1645841ce241SAndreas Gruenbacher else 1646d0180171SAndreas Gruenbacher drbd_err(device, "bitnr=%lu bm_bits=%lu\n", bitnr, b->bm_bits); 1647b411b363SPhilipp Reisner } 1648b411b363SPhilipp Reisner if (p_addr) 1649b411b363SPhilipp Reisner bm_unmap(p_addr); 1650b411b363SPhilipp Reisner spin_unlock_irqrestore(&b->bm_lock, flags); 1651b411b363SPhilipp Reisner return c; 1652b411b363SPhilipp Reisner } 1653b411b363SPhilipp Reisner 1654b411b363SPhilipp Reisner 1655b411b363SPhilipp Reisner /* inherently racy... 1656b411b363SPhilipp Reisner * return value may be already out-of-date when this function returns. 1657b411b363SPhilipp Reisner * but the general usage is that this is only use during a cstate when bits are 1658b411b363SPhilipp Reisner * only cleared, not set, and typically only care for the case when the return 1659b411b363SPhilipp Reisner * value is zero, or we already "locked" this "bitmap extent" by other means. 1660b411b363SPhilipp Reisner * 1661b411b363SPhilipp Reisner * enr is bm-extent number, since we chose to name one sector (512 bytes) 1662b411b363SPhilipp Reisner * worth of the bitmap a "bitmap extent". 1663b411b363SPhilipp Reisner * 1664b411b363SPhilipp Reisner * TODO 1665b411b363SPhilipp Reisner * I think since we use it like a reference count, we should use the real 1666b411b363SPhilipp Reisner * reference count of some bitmap extent element from some lru instead... 1667b411b363SPhilipp Reisner * 1668b411b363SPhilipp Reisner */ 1669b30ab791SAndreas Gruenbacher int drbd_bm_e_weight(struct drbd_device *device, unsigned long enr) 1670b411b363SPhilipp Reisner { 1671b30ab791SAndreas Gruenbacher struct drbd_bitmap *b = device->bitmap; 1672b411b363SPhilipp Reisner int count, s, e; 1673b411b363SPhilipp Reisner unsigned long flags; 1674b411b363SPhilipp Reisner unsigned long *p_addr, *bm; 1675b411b363SPhilipp Reisner 1676677b3672SChristoph Böhmwalder if (!expect(device, b)) 1677841ce241SAndreas Gruenbacher return 0; 1678677b3672SChristoph Böhmwalder if (!expect(device, b->bm_pages)) 1679841ce241SAndreas Gruenbacher return 0; 1680b411b363SPhilipp Reisner 1681b411b363SPhilipp Reisner spin_lock_irqsave(&b->bm_lock, flags); 168220ceb2b2SLars Ellenberg if (BM_DONT_TEST & b->bm_flags) 1683b30ab791SAndreas Gruenbacher bm_print_lock_info(device); 1684b411b363SPhilipp Reisner 1685b411b363SPhilipp Reisner s = S2W(enr); 1686b411b363SPhilipp Reisner e = min((size_t)S2W(enr+1), b->bm_words); 1687b411b363SPhilipp Reisner count = 0; 1688b411b363SPhilipp Reisner if (s < b->bm_words) { 1689b411b363SPhilipp Reisner int n = e-s; 169019f843aaSLars Ellenberg p_addr = bm_map_pidx(b, bm_word_to_page_idx(b, s)); 1691b411b363SPhilipp Reisner bm = p_addr + MLPP(s); 16925fb3bc4dSLars Ellenberg count += bitmap_weight(bm, n * BITS_PER_LONG); 1693b411b363SPhilipp Reisner bm_unmap(p_addr); 1694b411b363SPhilipp Reisner } else { 1695d0180171SAndreas Gruenbacher drbd_err(device, "start offset (%d) too large in drbd_bm_e_weight\n", s); 1696b411b363SPhilipp Reisner } 1697b411b363SPhilipp Reisner spin_unlock_irqrestore(&b->bm_lock, flags); 1698b411b363SPhilipp Reisner return count; 1699b411b363SPhilipp Reisner } 1700