1c6ae4c04SThomas Gleixner // SPDX-License-Identifier: GPL-2.0-or-later 2b411b363SPhilipp Reisner /* 3b411b363SPhilipp Reisner drbd_bitmap.c 4b411b363SPhilipp Reisner 5b411b363SPhilipp Reisner This file is part of DRBD by Philipp Reisner and Lars Ellenberg. 6b411b363SPhilipp Reisner 7b411b363SPhilipp Reisner Copyright (C) 2004-2008, LINBIT Information Technologies GmbH. 8b411b363SPhilipp Reisner Copyright (C) 2004-2008, Philipp Reisner <philipp.reisner@linbit.com>. 9b411b363SPhilipp Reisner Copyright (C) 2004-2008, Lars Ellenberg <lars.ellenberg@linbit.com>. 10b411b363SPhilipp Reisner 11b411b363SPhilipp Reisner */ 12b411b363SPhilipp Reisner 13f88c5d90SLars Ellenberg #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 14f88c5d90SLars Ellenberg 155fb3bc4dSLars Ellenberg #include <linux/bitmap.h> 16b411b363SPhilipp Reisner #include <linux/vmalloc.h> 17b411b363SPhilipp Reisner #include <linux/string.h> 18b411b363SPhilipp Reisner #include <linux/drbd.h> 195a0e3ad6STejun Heo #include <linux/slab.h> 20dbcbdc43SChristoph Hellwig #include <linux/highmem.h> 21f0ff1357SStephen Rothwell 22b411b363SPhilipp Reisner #include "drbd_int.h" 23b411b363SPhilipp Reisner 2495a0f10cSLars Ellenberg 25b411b363SPhilipp Reisner /* OPAQUE outside this file! 26b411b363SPhilipp Reisner * interface defined in drbd_int.h 27b411b363SPhilipp Reisner 28b411b363SPhilipp Reisner * convention: 29b411b363SPhilipp Reisner * function name drbd_bm_... => used elsewhere, "public". 30b411b363SPhilipp Reisner * function name bm_... => internal to implementation, "private". 314b0715f0SLars Ellenberg */ 32b411b363SPhilipp Reisner 334b0715f0SLars Ellenberg 344b0715f0SLars Ellenberg /* 354b0715f0SLars Ellenberg * LIMITATIONS: 364b0715f0SLars Ellenberg * We want to support >= peta byte of backend storage, while for now still using 374b0715f0SLars Ellenberg * a granularity of one bit per 4KiB of storage. 384b0715f0SLars Ellenberg * 1 << 50 bytes backend storage (1 PiB) 394b0715f0SLars Ellenberg * 1 << (50 - 12) bits needed 404b0715f0SLars Ellenberg * 38 --> we need u64 to index and count bits 414b0715f0SLars Ellenberg * 1 << (38 - 3) bitmap bytes needed 424b0715f0SLars Ellenberg * 35 --> we still need u64 to index and count bytes 434b0715f0SLars Ellenberg * (that's 32 GiB of bitmap for 1 PiB storage) 444b0715f0SLars Ellenberg * 1 << (35 - 2) 32bit longs needed 454b0715f0SLars Ellenberg * 33 --> we'd even need u64 to index and count 32bit long words. 464b0715f0SLars Ellenberg * 1 << (35 - 3) 64bit longs needed 474b0715f0SLars Ellenberg * 32 --> we could get away with a 32bit unsigned int to index and count 484b0715f0SLars Ellenberg * 64bit long words, but I rather stay with unsigned long for now. 494b0715f0SLars Ellenberg * We probably should neither count nor point to bytes or long words 504b0715f0SLars Ellenberg * directly, but either by bitnumber, or by page index and offset. 514b0715f0SLars Ellenberg * 1 << (35 - 12) 524b0715f0SLars Ellenberg * 22 --> we need that much 4KiB pages of bitmap. 534b0715f0SLars Ellenberg * 1 << (22 + 3) --> on a 64bit arch, 544b0715f0SLars Ellenberg * we need 32 MiB to store the array of page pointers. 554b0715f0SLars Ellenberg * 564b0715f0SLars Ellenberg * Because I'm lazy, and because the resulting patch was too large, too ugly 574b0715f0SLars Ellenberg * and still incomplete, on 32bit we still "only" support 16 TiB (minus some), 584b0715f0SLars Ellenberg * (1 << 32) bits * 4k storage. 594b0715f0SLars Ellenberg * 604b0715f0SLars Ellenberg 614b0715f0SLars Ellenberg * bitmap storage and IO: 624b0715f0SLars Ellenberg * Bitmap is stored little endian on disk, and is kept little endian in 634b0715f0SLars Ellenberg * core memory. Currently we still hold the full bitmap in core as long 644b0715f0SLars Ellenberg * as we are "attached" to a local disk, which at 32 GiB for 1PiB storage 654b0715f0SLars Ellenberg * seems excessive. 664b0715f0SLars Ellenberg * 6724c4830cSBart Van Assche * We plan to reduce the amount of in-core bitmap pages by paging them in 684b0715f0SLars Ellenberg * and out against their on-disk location as necessary, but need to make 694b0715f0SLars Ellenberg * sure we don't cause too much meta data IO, and must not deadlock in 704b0715f0SLars Ellenberg * tight memory situations. This needs some more work. 71b411b363SPhilipp Reisner */ 72b411b363SPhilipp Reisner 73b411b363SPhilipp Reisner /* 74b411b363SPhilipp Reisner * NOTE 75b411b363SPhilipp Reisner * Access to the *bm_pages is protected by bm_lock. 76b411b363SPhilipp Reisner * It is safe to read the other members within the lock. 77b411b363SPhilipp Reisner * 78b411b363SPhilipp Reisner * drbd_bm_set_bits is called from bio_endio callbacks, 79b411b363SPhilipp Reisner * We may be called with irq already disabled, 80b411b363SPhilipp Reisner * so we need spin_lock_irqsave(). 81b411b363SPhilipp Reisner * And we need the kmap_atomic. 82b411b363SPhilipp Reisner */ 83b411b363SPhilipp Reisner struct drbd_bitmap { 84b411b363SPhilipp Reisner struct page **bm_pages; 85b411b363SPhilipp Reisner spinlock_t bm_lock; 864b0715f0SLars Ellenberg 8727ea1d87SLars Ellenberg /* exclusively to be used by __al_write_transaction(), 8827ea1d87SLars Ellenberg * drbd_bm_mark_for_writeout() and 8927ea1d87SLars Ellenberg * and drbd_bm_write_hinted() -> bm_rw() called from there. 9027ea1d87SLars Ellenberg */ 9127ea1d87SLars Ellenberg unsigned int n_bitmap_hints; 9227ea1d87SLars Ellenberg unsigned int al_bitmap_hints[AL_UPDATES_PER_TRANSACTION]; 9327ea1d87SLars Ellenberg 944b0715f0SLars Ellenberg /* see LIMITATIONS: above */ 954b0715f0SLars Ellenberg 96b411b363SPhilipp Reisner unsigned long bm_set; /* nr of set bits; THINK maybe atomic_t? */ 97b411b363SPhilipp Reisner unsigned long bm_bits; 98b411b363SPhilipp Reisner size_t bm_words; 99b411b363SPhilipp Reisner size_t bm_number_of_pages; 100b411b363SPhilipp Reisner sector_t bm_dev_capacity; 1018a03ae2aSThomas Gleixner struct mutex bm_change; /* serializes resize operations */ 102b411b363SPhilipp Reisner 10319f843aaSLars Ellenberg wait_queue_head_t bm_io_wait; /* used to serialize IO of single pages */ 104b411b363SPhilipp Reisner 10520ceb2b2SLars Ellenberg enum bm_flag bm_flags; 106b411b363SPhilipp Reisner 107b411b363SPhilipp Reisner /* debugging aid, in case we are still racy somewhere */ 108b411b363SPhilipp Reisner char *bm_why; 109b411b363SPhilipp Reisner struct task_struct *bm_task; 110b411b363SPhilipp Reisner }; 111b411b363SPhilipp Reisner 112b411b363SPhilipp Reisner #define bm_print_lock_info(m) __bm_print_lock_info(m, __func__) 113b30ab791SAndreas Gruenbacher static void __bm_print_lock_info(struct drbd_device *device, const char *func) 114b411b363SPhilipp Reisner { 115b30ab791SAndreas Gruenbacher struct drbd_bitmap *b = device->bitmap; 116b411b363SPhilipp Reisner if (!__ratelimit(&drbd_ratelimit_state)) 117b411b363SPhilipp Reisner return; 118c60b0251SAndreas Gruenbacher drbd_err(device, "FIXME %s[%d] in %s, bitmap locked for '%s' by %s[%d]\n", 119c60b0251SAndreas Gruenbacher current->comm, task_pid_nr(current), 120b411b363SPhilipp Reisner func, b->bm_why ?: "?", 121c60b0251SAndreas Gruenbacher b->bm_task->comm, task_pid_nr(b->bm_task)); 122b411b363SPhilipp Reisner } 123b411b363SPhilipp Reisner 124b30ab791SAndreas Gruenbacher void drbd_bm_lock(struct drbd_device *device, char *why, enum bm_flag flags) 125b411b363SPhilipp Reisner { 126b30ab791SAndreas Gruenbacher struct drbd_bitmap *b = device->bitmap; 127b411b363SPhilipp Reisner int trylock_failed; 128b411b363SPhilipp Reisner 129b411b363SPhilipp Reisner if (!b) { 130d0180171SAndreas Gruenbacher drbd_err(device, "FIXME no bitmap in drbd_bm_lock!?\n"); 131b411b363SPhilipp Reisner return; 132b411b363SPhilipp Reisner } 133b411b363SPhilipp Reisner 1348a03ae2aSThomas Gleixner trylock_failed = !mutex_trylock(&b->bm_change); 135b411b363SPhilipp Reisner 136b411b363SPhilipp Reisner if (trylock_failed) { 137c60b0251SAndreas Gruenbacher drbd_warn(device, "%s[%d] going to '%s' but bitmap already locked for '%s' by %s[%d]\n", 138c60b0251SAndreas Gruenbacher current->comm, task_pid_nr(current), 139b411b363SPhilipp Reisner why, b->bm_why ?: "?", 140c60b0251SAndreas Gruenbacher b->bm_task->comm, task_pid_nr(b->bm_task)); 1418a03ae2aSThomas Gleixner mutex_lock(&b->bm_change); 142b411b363SPhilipp Reisner } 14320ceb2b2SLars Ellenberg if (BM_LOCKED_MASK & b->bm_flags) 144d0180171SAndreas Gruenbacher drbd_err(device, "FIXME bitmap already locked in bm_lock\n"); 14520ceb2b2SLars Ellenberg b->bm_flags |= flags & BM_LOCKED_MASK; 146b411b363SPhilipp Reisner 147b411b363SPhilipp Reisner b->bm_why = why; 148b411b363SPhilipp Reisner b->bm_task = current; 149b411b363SPhilipp Reisner } 150b411b363SPhilipp Reisner 151b30ab791SAndreas Gruenbacher void drbd_bm_unlock(struct drbd_device *device) 152b411b363SPhilipp Reisner { 153b30ab791SAndreas Gruenbacher struct drbd_bitmap *b = device->bitmap; 154b411b363SPhilipp Reisner if (!b) { 155d0180171SAndreas Gruenbacher drbd_err(device, "FIXME no bitmap in drbd_bm_unlock!?\n"); 156b411b363SPhilipp Reisner return; 157b411b363SPhilipp Reisner } 158b411b363SPhilipp Reisner 159b30ab791SAndreas Gruenbacher if (!(BM_LOCKED_MASK & device->bitmap->bm_flags)) 160d0180171SAndreas Gruenbacher drbd_err(device, "FIXME bitmap not locked in bm_unlock\n"); 161b411b363SPhilipp Reisner 16220ceb2b2SLars Ellenberg b->bm_flags &= ~BM_LOCKED_MASK; 163b411b363SPhilipp Reisner b->bm_why = NULL; 164b411b363SPhilipp Reisner b->bm_task = NULL; 1658a03ae2aSThomas Gleixner mutex_unlock(&b->bm_change); 166b411b363SPhilipp Reisner } 167b411b363SPhilipp Reisner 16819f843aaSLars Ellenberg /* we store some "meta" info about our pages in page->private */ 16919f843aaSLars Ellenberg /* at a granularity of 4k storage per bitmap bit: 17019f843aaSLars Ellenberg * one peta byte storage: 1<<50 byte, 1<<38 * 4k storage blocks 17119f843aaSLars Ellenberg * 1<<38 bits, 17219f843aaSLars Ellenberg * 1<<23 4k bitmap pages. 17319f843aaSLars Ellenberg * Use 24 bits as page index, covers 2 peta byte storage 17419f843aaSLars Ellenberg * at a granularity of 4k per bit. 17519f843aaSLars Ellenberg * Used to report the failed page idx on io error from the endio handlers. 17619f843aaSLars Ellenberg */ 17719f843aaSLars Ellenberg #define BM_PAGE_IDX_MASK ((1UL<<24)-1) 17819f843aaSLars Ellenberg /* this page is currently read in, or written back */ 17919f843aaSLars Ellenberg #define BM_PAGE_IO_LOCK 31 18019f843aaSLars Ellenberg /* if there has been an IO error for this page */ 18119f843aaSLars Ellenberg #define BM_PAGE_IO_ERROR 30 18219f843aaSLars Ellenberg /* this is to be able to intelligently skip disk IO, 18319f843aaSLars Ellenberg * set if bits have been set since last IO. */ 18419f843aaSLars Ellenberg #define BM_PAGE_NEED_WRITEOUT 29 18519f843aaSLars Ellenberg /* to mark for lazy writeout once syncer cleared all clearable bits, 18619f843aaSLars Ellenberg * we if bits have been cleared since last IO. */ 18719f843aaSLars Ellenberg #define BM_PAGE_LAZY_WRITEOUT 28 18845dfffebSLars Ellenberg /* pages marked with this "HINT" will be considered for writeout 18945dfffebSLars Ellenberg * on activity log transactions */ 19045dfffebSLars Ellenberg #define BM_PAGE_HINT_WRITEOUT 27 19119f843aaSLars Ellenberg 19224c4830cSBart Van Assche /* store_page_idx uses non-atomic assignment. It is only used directly after 19319f843aaSLars Ellenberg * allocating the page. All other bm_set_page_* and bm_clear_page_* need to 19419f843aaSLars Ellenberg * use atomic bit manipulation, as set_out_of_sync (and therefore bitmap 19519f843aaSLars Ellenberg * changes) may happen from various contexts, and wait_on_bit/wake_up_bit 19619f843aaSLars Ellenberg * requires it all to be atomic as well. */ 19719f843aaSLars Ellenberg static void bm_store_page_idx(struct page *page, unsigned long idx) 19819f843aaSLars Ellenberg { 19919f843aaSLars Ellenberg BUG_ON(0 != (idx & ~BM_PAGE_IDX_MASK)); 2000c7db279SArne Redlich set_page_private(page, idx); 20119f843aaSLars Ellenberg } 20219f843aaSLars Ellenberg 20319f843aaSLars Ellenberg static unsigned long bm_page_to_idx(struct page *page) 20419f843aaSLars Ellenberg { 20519f843aaSLars Ellenberg return page_private(page) & BM_PAGE_IDX_MASK; 20619f843aaSLars Ellenberg } 20719f843aaSLars Ellenberg 20819f843aaSLars Ellenberg /* As is very unlikely that the same page is under IO from more than one 20919f843aaSLars Ellenberg * context, we can get away with a bit per page and one wait queue per bitmap. 21019f843aaSLars Ellenberg */ 211b30ab791SAndreas Gruenbacher static void bm_page_lock_io(struct drbd_device *device, int page_nr) 21219f843aaSLars Ellenberg { 213b30ab791SAndreas Gruenbacher struct drbd_bitmap *b = device->bitmap; 21419f843aaSLars Ellenberg void *addr = &page_private(b->bm_pages[page_nr]); 21519f843aaSLars Ellenberg wait_event(b->bm_io_wait, !test_and_set_bit(BM_PAGE_IO_LOCK, addr)); 21619f843aaSLars Ellenberg } 21719f843aaSLars Ellenberg 218b30ab791SAndreas Gruenbacher static void bm_page_unlock_io(struct drbd_device *device, int page_nr) 21919f843aaSLars Ellenberg { 220b30ab791SAndreas Gruenbacher struct drbd_bitmap *b = device->bitmap; 22119f843aaSLars Ellenberg void *addr = &page_private(b->bm_pages[page_nr]); 2224738fa16SLars Ellenberg clear_bit_unlock(BM_PAGE_IO_LOCK, addr); 223b30ab791SAndreas Gruenbacher wake_up(&device->bitmap->bm_io_wait); 22419f843aaSLars Ellenberg } 22519f843aaSLars Ellenberg 22619f843aaSLars Ellenberg /* set _before_ submit_io, so it may be reset due to being changed 22719f843aaSLars Ellenberg * while this page is in flight... will get submitted later again */ 22819f843aaSLars Ellenberg static void bm_set_page_unchanged(struct page *page) 22919f843aaSLars Ellenberg { 23019f843aaSLars Ellenberg /* use cmpxchg? */ 23119f843aaSLars Ellenberg clear_bit(BM_PAGE_NEED_WRITEOUT, &page_private(page)); 23219f843aaSLars Ellenberg clear_bit(BM_PAGE_LAZY_WRITEOUT, &page_private(page)); 23319f843aaSLars Ellenberg } 23419f843aaSLars Ellenberg 23519f843aaSLars Ellenberg static void bm_set_page_need_writeout(struct page *page) 23619f843aaSLars Ellenberg { 23719f843aaSLars Ellenberg set_bit(BM_PAGE_NEED_WRITEOUT, &page_private(page)); 23819f843aaSLars Ellenberg } 23919f843aaSLars Ellenberg 24027ea1d87SLars Ellenberg void drbd_bm_reset_al_hints(struct drbd_device *device) 24127ea1d87SLars Ellenberg { 24227ea1d87SLars Ellenberg device->bitmap->n_bitmap_hints = 0; 24327ea1d87SLars Ellenberg } 24427ea1d87SLars Ellenberg 24545dfffebSLars Ellenberg /** 24645dfffebSLars Ellenberg * drbd_bm_mark_for_writeout() - mark a page with a "hint" to be considered for writeout 247b30ab791SAndreas Gruenbacher * @device: DRBD device. 24845dfffebSLars Ellenberg * @page_nr: the bitmap page to mark with the "hint" flag 24945dfffebSLars Ellenberg * 25045dfffebSLars Ellenberg * From within an activity log transaction, we mark a few pages with these 25145dfffebSLars Ellenberg * hints, then call drbd_bm_write_hinted(), which will only write out changed 25245dfffebSLars Ellenberg * pages which are flagged with this mark. 25345dfffebSLars Ellenberg */ 254b30ab791SAndreas Gruenbacher void drbd_bm_mark_for_writeout(struct drbd_device *device, int page_nr) 25545dfffebSLars Ellenberg { 25627ea1d87SLars Ellenberg struct drbd_bitmap *b = device->bitmap; 25745dfffebSLars Ellenberg struct page *page; 258b30ab791SAndreas Gruenbacher if (page_nr >= device->bitmap->bm_number_of_pages) { 259d0180171SAndreas Gruenbacher drbd_warn(device, "BAD: page_nr: %u, number_of_pages: %u\n", 260b30ab791SAndreas Gruenbacher page_nr, (int)device->bitmap->bm_number_of_pages); 26145dfffebSLars Ellenberg return; 26245dfffebSLars Ellenberg } 263b30ab791SAndreas Gruenbacher page = device->bitmap->bm_pages[page_nr]; 26427ea1d87SLars Ellenberg BUG_ON(b->n_bitmap_hints >= ARRAY_SIZE(b->al_bitmap_hints)); 26527ea1d87SLars Ellenberg if (!test_and_set_bit(BM_PAGE_HINT_WRITEOUT, &page_private(page))) 26627ea1d87SLars Ellenberg b->al_bitmap_hints[b->n_bitmap_hints++] = page_nr; 26745dfffebSLars Ellenberg } 26845dfffebSLars Ellenberg 26919f843aaSLars Ellenberg static int bm_test_page_unchanged(struct page *page) 27019f843aaSLars Ellenberg { 27119f843aaSLars Ellenberg volatile const unsigned long *addr = &page_private(page); 27219f843aaSLars Ellenberg return (*addr & ((1UL<<BM_PAGE_NEED_WRITEOUT)|(1UL<<BM_PAGE_LAZY_WRITEOUT))) == 0; 27319f843aaSLars Ellenberg } 27419f843aaSLars Ellenberg 27519f843aaSLars Ellenberg static void bm_set_page_io_err(struct page *page) 27619f843aaSLars Ellenberg { 27719f843aaSLars Ellenberg set_bit(BM_PAGE_IO_ERROR, &page_private(page)); 27819f843aaSLars Ellenberg } 27919f843aaSLars Ellenberg 28019f843aaSLars Ellenberg static void bm_clear_page_io_err(struct page *page) 28119f843aaSLars Ellenberg { 28219f843aaSLars Ellenberg clear_bit(BM_PAGE_IO_ERROR, &page_private(page)); 28319f843aaSLars Ellenberg } 28419f843aaSLars Ellenberg 28519f843aaSLars Ellenberg static void bm_set_page_lazy_writeout(struct page *page) 28619f843aaSLars Ellenberg { 28719f843aaSLars Ellenberg set_bit(BM_PAGE_LAZY_WRITEOUT, &page_private(page)); 28819f843aaSLars Ellenberg } 28919f843aaSLars Ellenberg 29019f843aaSLars Ellenberg static int bm_test_page_lazy_writeout(struct page *page) 29119f843aaSLars Ellenberg { 29219f843aaSLars Ellenberg return test_bit(BM_PAGE_LAZY_WRITEOUT, &page_private(page)); 29319f843aaSLars Ellenberg } 29419f843aaSLars Ellenberg 29519f843aaSLars Ellenberg /* on a 32bit box, this would allow for exactly (2<<38) bits. */ 29619f843aaSLars Ellenberg static unsigned int bm_word_to_page_idx(struct drbd_bitmap *b, unsigned long long_nr) 29719f843aaSLars Ellenberg { 29819f843aaSLars Ellenberg /* page_nr = (word*sizeof(long)) >> PAGE_SHIFT; */ 29919f843aaSLars Ellenberg unsigned int page_nr = long_nr >> (PAGE_SHIFT - LN2_BPL + 3); 30019f843aaSLars Ellenberg BUG_ON(page_nr >= b->bm_number_of_pages); 30119f843aaSLars Ellenberg return page_nr; 30219f843aaSLars Ellenberg } 30319f843aaSLars Ellenberg 30495a0f10cSLars Ellenberg static unsigned int bm_bit_to_page_idx(struct drbd_bitmap *b, u64 bitnr) 30595a0f10cSLars Ellenberg { 30695a0f10cSLars Ellenberg /* page_nr = (bitnr/8) >> PAGE_SHIFT; */ 30795a0f10cSLars Ellenberg unsigned int page_nr = bitnr >> (PAGE_SHIFT + 3); 30895a0f10cSLars Ellenberg BUG_ON(page_nr >= b->bm_number_of_pages); 30995a0f10cSLars Ellenberg return page_nr; 31095a0f10cSLars Ellenberg } 31195a0f10cSLars Ellenberg 312589973a7SCong Wang static unsigned long *__bm_map_pidx(struct drbd_bitmap *b, unsigned int idx) 31395a0f10cSLars Ellenberg { 31495a0f10cSLars Ellenberg struct page *page = b->bm_pages[idx]; 315cfd8005cSCong Wang return (unsigned long *) kmap_atomic(page); 31695a0f10cSLars Ellenberg } 31795a0f10cSLars Ellenberg 31895a0f10cSLars Ellenberg static unsigned long *bm_map_pidx(struct drbd_bitmap *b, unsigned int idx) 31995a0f10cSLars Ellenberg { 320cfd8005cSCong Wang return __bm_map_pidx(b, idx); 32195a0f10cSLars Ellenberg } 32295a0f10cSLars Ellenberg 323cfd8005cSCong Wang static void __bm_unmap(unsigned long *p_addr) 324b411b363SPhilipp Reisner { 325cfd8005cSCong Wang kunmap_atomic(p_addr); 326b411b363SPhilipp Reisner }; 327b411b363SPhilipp Reisner 328b411b363SPhilipp Reisner static void bm_unmap(unsigned long *p_addr) 329b411b363SPhilipp Reisner { 330cfd8005cSCong Wang return __bm_unmap(p_addr); 331b411b363SPhilipp Reisner } 332b411b363SPhilipp Reisner 333b411b363SPhilipp Reisner /* long word offset of _bitmap_ sector */ 334b411b363SPhilipp Reisner #define S2W(s) ((s)<<(BM_EXT_SHIFT-BM_BLOCK_SHIFT-LN2_BPL)) 335b411b363SPhilipp Reisner /* word offset from start of bitmap to word number _in_page_ 336b411b363SPhilipp Reisner * modulo longs per page 337b411b363SPhilipp Reisner #define MLPP(X) ((X) % (PAGE_SIZE/sizeof(long)) 33824c4830cSBart Van Assche hm, well, Philipp thinks gcc might not optimize the % into & (... - 1) 339b411b363SPhilipp Reisner so do it explicitly: 340b411b363SPhilipp Reisner */ 341b411b363SPhilipp Reisner #define MLPP(X) ((X) & ((PAGE_SIZE/sizeof(long))-1)) 342b411b363SPhilipp Reisner 343b411b363SPhilipp Reisner /* Long words per page */ 344b411b363SPhilipp Reisner #define LWPP (PAGE_SIZE/sizeof(long)) 345b411b363SPhilipp Reisner 346b411b363SPhilipp Reisner /* 347b411b363SPhilipp Reisner * actually most functions herein should take a struct drbd_bitmap*, not a 348b30ab791SAndreas Gruenbacher * struct drbd_device*, but for the debug macros I like to have the device around 349b411b363SPhilipp Reisner * to be able to report device specific. 350b411b363SPhilipp Reisner */ 351b411b363SPhilipp Reisner 35219f843aaSLars Ellenberg 353b411b363SPhilipp Reisner static void bm_free_pages(struct page **pages, unsigned long number) 354b411b363SPhilipp Reisner { 355b411b363SPhilipp Reisner unsigned long i; 356b411b363SPhilipp Reisner if (!pages) 357b411b363SPhilipp Reisner return; 358b411b363SPhilipp Reisner 359b411b363SPhilipp Reisner for (i = 0; i < number; i++) { 360b411b363SPhilipp Reisner if (!pages[i]) { 361f88c5d90SLars Ellenberg pr_alert("bm_free_pages tried to free a NULL pointer; i=%lu n=%lu\n", 362b411b363SPhilipp Reisner i, number); 363b411b363SPhilipp Reisner continue; 364b411b363SPhilipp Reisner } 365b411b363SPhilipp Reisner __free_page(pages[i]); 366b411b363SPhilipp Reisner pages[i] = NULL; 367b411b363SPhilipp Reisner } 368b411b363SPhilipp Reisner } 369b411b363SPhilipp Reisner 3701d5cfdb0STetsuo Handa static inline void bm_vk_free(void *ptr) 371b411b363SPhilipp Reisner { 3721d5cfdb0STetsuo Handa kvfree(ptr); 373b411b363SPhilipp Reisner } 374b411b363SPhilipp Reisner 375b411b363SPhilipp Reisner /* 376b411b363SPhilipp Reisner * "have" and "want" are NUMBER OF PAGES. 377b411b363SPhilipp Reisner */ 378b411b363SPhilipp Reisner static struct page **bm_realloc_pages(struct drbd_bitmap *b, unsigned long want) 379b411b363SPhilipp Reisner { 380b411b363SPhilipp Reisner struct page **old_pages = b->bm_pages; 381b411b363SPhilipp Reisner struct page **new_pages, *page; 3821d5cfdb0STetsuo Handa unsigned int i, bytes; 383b411b363SPhilipp Reisner unsigned long have = b->bm_number_of_pages; 384b411b363SPhilipp Reisner 385b411b363SPhilipp Reisner BUG_ON(have == 0 && old_pages != NULL); 386b411b363SPhilipp Reisner BUG_ON(have != 0 && old_pages == NULL); 387b411b363SPhilipp Reisner 388b411b363SPhilipp Reisner if (have == want) 389b411b363SPhilipp Reisner return old_pages; 390b411b363SPhilipp Reisner 391b411b363SPhilipp Reisner /* Trying kmalloc first, falling back to vmalloc. 3920b143d43SLars Ellenberg * GFP_NOIO, as this is called while drbd IO is "suspended", 3930b143d43SLars Ellenberg * and during resize or attach on diskless Primary, 3940b143d43SLars Ellenberg * we must not block on IO to ourselves. 395bc891c9aSLars Ellenberg * Context is receiver thread or dmsetup. */ 396b411b363SPhilipp Reisner bytes = sizeof(struct page *)*want; 3978be04b93SJoe Perches new_pages = kzalloc(bytes, GFP_NOIO | __GFP_NOWARN); 398b411b363SPhilipp Reisner if (!new_pages) { 39988dca4caSChristoph Hellwig new_pages = __vmalloc(bytes, GFP_NOIO | __GFP_ZERO); 400b411b363SPhilipp Reisner if (!new_pages) 401b411b363SPhilipp Reisner return NULL; 402b411b363SPhilipp Reisner } 403b411b363SPhilipp Reisner 404b411b363SPhilipp Reisner if (want >= have) { 405b411b363SPhilipp Reisner for (i = 0; i < have; i++) 406b411b363SPhilipp Reisner new_pages[i] = old_pages[i]; 407b411b363SPhilipp Reisner for (; i < want; i++) { 4080b143d43SLars Ellenberg page = alloc_page(GFP_NOIO | __GFP_HIGHMEM); 409b411b363SPhilipp Reisner if (!page) { 410b411b363SPhilipp Reisner bm_free_pages(new_pages + have, i - have); 4111d5cfdb0STetsuo Handa bm_vk_free(new_pages); 412b411b363SPhilipp Reisner return NULL; 413b411b363SPhilipp Reisner } 41419f843aaSLars Ellenberg /* we want to know which page it is 41519f843aaSLars Ellenberg * from the endio handlers */ 41619f843aaSLars Ellenberg bm_store_page_idx(page, i); 417b411b363SPhilipp Reisner new_pages[i] = page; 418b411b363SPhilipp Reisner } 419b411b363SPhilipp Reisner } else { 420b411b363SPhilipp Reisner for (i = 0; i < want; i++) 421b411b363SPhilipp Reisner new_pages[i] = old_pages[i]; 422b411b363SPhilipp Reisner /* NOT HERE, we are outside the spinlock! 423b411b363SPhilipp Reisner bm_free_pages(old_pages + want, have - want); 424b411b363SPhilipp Reisner */ 425b411b363SPhilipp Reisner } 426b411b363SPhilipp Reisner 427b411b363SPhilipp Reisner return new_pages; 428b411b363SPhilipp Reisner } 429b411b363SPhilipp Reisner 430b411b363SPhilipp Reisner /* 4317e5fec31SFabian Frederick * allocates the drbd_bitmap and stores it in device->bitmap. 432b411b363SPhilipp Reisner */ 433b30ab791SAndreas Gruenbacher int drbd_bm_init(struct drbd_device *device) 434b411b363SPhilipp Reisner { 435b30ab791SAndreas Gruenbacher struct drbd_bitmap *b = device->bitmap; 436b411b363SPhilipp Reisner WARN_ON(b != NULL); 437b411b363SPhilipp Reisner b = kzalloc(sizeof(struct drbd_bitmap), GFP_KERNEL); 438b411b363SPhilipp Reisner if (!b) 439b411b363SPhilipp Reisner return -ENOMEM; 440b411b363SPhilipp Reisner spin_lock_init(&b->bm_lock); 4418a03ae2aSThomas Gleixner mutex_init(&b->bm_change); 442b411b363SPhilipp Reisner init_waitqueue_head(&b->bm_io_wait); 443b411b363SPhilipp Reisner 444b30ab791SAndreas Gruenbacher device->bitmap = b; 445b411b363SPhilipp Reisner 446b411b363SPhilipp Reisner return 0; 447b411b363SPhilipp Reisner } 448b411b363SPhilipp Reisner 449b30ab791SAndreas Gruenbacher sector_t drbd_bm_capacity(struct drbd_device *device) 450b411b363SPhilipp Reisner { 451b30ab791SAndreas Gruenbacher if (!expect(device->bitmap)) 452841ce241SAndreas Gruenbacher return 0; 453b30ab791SAndreas Gruenbacher return device->bitmap->bm_dev_capacity; 454b411b363SPhilipp Reisner } 455b411b363SPhilipp Reisner 456b411b363SPhilipp Reisner /* called on driver unload. TODO: call when a device is destroyed. 457b411b363SPhilipp Reisner */ 458b30ab791SAndreas Gruenbacher void drbd_bm_cleanup(struct drbd_device *device) 459b411b363SPhilipp Reisner { 460b30ab791SAndreas Gruenbacher if (!expect(device->bitmap)) 461841ce241SAndreas Gruenbacher return; 462b30ab791SAndreas Gruenbacher bm_free_pages(device->bitmap->bm_pages, device->bitmap->bm_number_of_pages); 4631d5cfdb0STetsuo Handa bm_vk_free(device->bitmap->bm_pages); 464b30ab791SAndreas Gruenbacher kfree(device->bitmap); 465b30ab791SAndreas Gruenbacher device->bitmap = NULL; 466b411b363SPhilipp Reisner } 467b411b363SPhilipp Reisner 468b411b363SPhilipp Reisner /* 469b411b363SPhilipp Reisner * since (b->bm_bits % BITS_PER_LONG) != 0, 470b411b363SPhilipp Reisner * this masks out the remaining bits. 471b411b363SPhilipp Reisner * Returns the number of bits cleared. 472b411b363SPhilipp Reisner */ 4732630628bSLars Ellenberg #ifndef BITS_PER_PAGE 47495a0f10cSLars Ellenberg #define BITS_PER_PAGE (1UL << (PAGE_SHIFT + 3)) 47595a0f10cSLars Ellenberg #define BITS_PER_PAGE_MASK (BITS_PER_PAGE - 1) 4762630628bSLars Ellenberg #else 4772630628bSLars Ellenberg # if BITS_PER_PAGE != (1UL << (PAGE_SHIFT + 3)) 4782630628bSLars Ellenberg # error "ambiguous BITS_PER_PAGE" 4792630628bSLars Ellenberg # endif 4802630628bSLars Ellenberg #endif 48195a0f10cSLars Ellenberg #define BITS_PER_LONG_MASK (BITS_PER_LONG - 1) 482b411b363SPhilipp Reisner static int bm_clear_surplus(struct drbd_bitmap *b) 483b411b363SPhilipp Reisner { 48495a0f10cSLars Ellenberg unsigned long mask; 485b411b363SPhilipp Reisner unsigned long *p_addr, *bm; 48695a0f10cSLars Ellenberg int tmp; 48795a0f10cSLars Ellenberg int cleared = 0; 488b411b363SPhilipp Reisner 48995a0f10cSLars Ellenberg /* number of bits modulo bits per page */ 49095a0f10cSLars Ellenberg tmp = (b->bm_bits & BITS_PER_PAGE_MASK); 49195a0f10cSLars Ellenberg /* mask the used bits of the word containing the last bit */ 49295a0f10cSLars Ellenberg mask = (1UL << (tmp & BITS_PER_LONG_MASK)) -1; 49395a0f10cSLars Ellenberg /* bitmap is always stored little endian, 49495a0f10cSLars Ellenberg * on disk and in core memory alike */ 49595a0f10cSLars Ellenberg mask = cpu_to_lel(mask); 49695a0f10cSLars Ellenberg 4976850c442SLars Ellenberg p_addr = bm_map_pidx(b, b->bm_number_of_pages - 1); 49895a0f10cSLars Ellenberg bm = p_addr + (tmp/BITS_PER_LONG); 49995a0f10cSLars Ellenberg if (mask) { 50095a0f10cSLars Ellenberg /* If mask != 0, we are not exactly aligned, so bm now points 50195a0f10cSLars Ellenberg * to the long containing the last bit. 50295a0f10cSLars Ellenberg * If mask == 0, bm already points to the word immediately 50395a0f10cSLars Ellenberg * after the last (long word aligned) bit. */ 504b411b363SPhilipp Reisner cleared = hweight_long(*bm & ~mask); 505b411b363SPhilipp Reisner *bm &= mask; 50695a0f10cSLars Ellenberg bm++; 507b411b363SPhilipp Reisner } 508b411b363SPhilipp Reisner 50995a0f10cSLars Ellenberg if (BITS_PER_LONG == 32 && ((bm - p_addr) & 1) == 1) { 51095a0f10cSLars Ellenberg /* on a 32bit arch, we may need to zero out 51195a0f10cSLars Ellenberg * a padding long to align with a 64bit remote */ 512b411b363SPhilipp Reisner cleared += hweight_long(*bm); 513b411b363SPhilipp Reisner *bm = 0; 514b411b363SPhilipp Reisner } 515b411b363SPhilipp Reisner bm_unmap(p_addr); 516b411b363SPhilipp Reisner return cleared; 517b411b363SPhilipp Reisner } 518b411b363SPhilipp Reisner 519b411b363SPhilipp Reisner static void bm_set_surplus(struct drbd_bitmap *b) 520b411b363SPhilipp Reisner { 52195a0f10cSLars Ellenberg unsigned long mask; 522b411b363SPhilipp Reisner unsigned long *p_addr, *bm; 52395a0f10cSLars Ellenberg int tmp; 524b411b363SPhilipp Reisner 52595a0f10cSLars Ellenberg /* number of bits modulo bits per page */ 52695a0f10cSLars Ellenberg tmp = (b->bm_bits & BITS_PER_PAGE_MASK); 52795a0f10cSLars Ellenberg /* mask the used bits of the word containing the last bit */ 52895a0f10cSLars Ellenberg mask = (1UL << (tmp & BITS_PER_LONG_MASK)) -1; 52995a0f10cSLars Ellenberg /* bitmap is always stored little endian, 53095a0f10cSLars Ellenberg * on disk and in core memory alike */ 53195a0f10cSLars Ellenberg mask = cpu_to_lel(mask); 53295a0f10cSLars Ellenberg 5336850c442SLars Ellenberg p_addr = bm_map_pidx(b, b->bm_number_of_pages - 1); 53495a0f10cSLars Ellenberg bm = p_addr + (tmp/BITS_PER_LONG); 53595a0f10cSLars Ellenberg if (mask) { 53695a0f10cSLars Ellenberg /* If mask != 0, we are not exactly aligned, so bm now points 53795a0f10cSLars Ellenberg * to the long containing the last bit. 53895a0f10cSLars Ellenberg * If mask == 0, bm already points to the word immediately 53995a0f10cSLars Ellenberg * after the last (long word aligned) bit. */ 540b411b363SPhilipp Reisner *bm |= ~mask; 54195a0f10cSLars Ellenberg bm++; 542b411b363SPhilipp Reisner } 543b411b363SPhilipp Reisner 54495a0f10cSLars Ellenberg if (BITS_PER_LONG == 32 && ((bm - p_addr) & 1) == 1) { 54595a0f10cSLars Ellenberg /* on a 32bit arch, we may need to zero out 54695a0f10cSLars Ellenberg * a padding long to align with a 64bit remote */ 54795a0f10cSLars Ellenberg *bm = ~0UL; 548b411b363SPhilipp Reisner } 549b411b363SPhilipp Reisner bm_unmap(p_addr); 550b411b363SPhilipp Reisner } 551b411b363SPhilipp Reisner 5524b0715f0SLars Ellenberg /* you better not modify the bitmap while this is running, 5534b0715f0SLars Ellenberg * or its results will be stale */ 55495a0f10cSLars Ellenberg static unsigned long bm_count_bits(struct drbd_bitmap *b) 555b411b363SPhilipp Reisner { 5564b0715f0SLars Ellenberg unsigned long *p_addr; 557b411b363SPhilipp Reisner unsigned long bits = 0; 5584b0715f0SLars Ellenberg unsigned long mask = (1UL << (b->bm_bits & BITS_PER_LONG_MASK)) -1; 5595fb3bc4dSLars Ellenberg int idx, last_word; 5607777a8baSLars Ellenberg 5614b0715f0SLars Ellenberg /* all but last page */ 5626850c442SLars Ellenberg for (idx = 0; idx < b->bm_number_of_pages - 1; idx++) { 563cfd8005cSCong Wang p_addr = __bm_map_pidx(b, idx); 5645fb3bc4dSLars Ellenberg bits += bitmap_weight(p_addr, BITS_PER_PAGE); 565cfd8005cSCong Wang __bm_unmap(p_addr); 566b411b363SPhilipp Reisner cond_resched(); 567b411b363SPhilipp Reisner } 5684b0715f0SLars Ellenberg /* last (or only) page */ 5694b0715f0SLars Ellenberg last_word = ((b->bm_bits - 1) & BITS_PER_PAGE_MASK) >> LN2_BPL; 570589973a7SCong Wang p_addr = __bm_map_pidx(b, idx); 5715fb3bc4dSLars Ellenberg bits += bitmap_weight(p_addr, last_word * BITS_PER_LONG); 5724b0715f0SLars Ellenberg p_addr[last_word] &= cpu_to_lel(mask); 5734b0715f0SLars Ellenberg bits += hweight_long(p_addr[last_word]); 5744b0715f0SLars Ellenberg /* 32bit arch, may have an unused padding long */ 5754b0715f0SLars Ellenberg if (BITS_PER_LONG == 32 && (last_word & 1) == 0) 5764b0715f0SLars Ellenberg p_addr[last_word+1] = 0; 577589973a7SCong Wang __bm_unmap(p_addr); 578b411b363SPhilipp Reisner return bits; 579b411b363SPhilipp Reisner } 580b411b363SPhilipp Reisner 581b411b363SPhilipp Reisner /* offset and len in long words.*/ 582b411b363SPhilipp Reisner static void bm_memset(struct drbd_bitmap *b, size_t offset, int c, size_t len) 583b411b363SPhilipp Reisner { 584b411b363SPhilipp Reisner unsigned long *p_addr, *bm; 58519f843aaSLars Ellenberg unsigned int idx; 586b411b363SPhilipp Reisner size_t do_now, end; 587b411b363SPhilipp Reisner 588b411b363SPhilipp Reisner end = offset + len; 589b411b363SPhilipp Reisner 590b411b363SPhilipp Reisner if (end > b->bm_words) { 591f88c5d90SLars Ellenberg pr_alert("bm_memset end > bm_words\n"); 592b411b363SPhilipp Reisner return; 593b411b363SPhilipp Reisner } 594b411b363SPhilipp Reisner 595b411b363SPhilipp Reisner while (offset < end) { 596b411b363SPhilipp Reisner do_now = min_t(size_t, ALIGN(offset + 1, LWPP), end) - offset; 59719f843aaSLars Ellenberg idx = bm_word_to_page_idx(b, offset); 59819f843aaSLars Ellenberg p_addr = bm_map_pidx(b, idx); 599b411b363SPhilipp Reisner bm = p_addr + MLPP(offset); 600b411b363SPhilipp Reisner if (bm+do_now > p_addr + LWPP) { 601f88c5d90SLars Ellenberg pr_alert("BUG BUG BUG! p_addr:%p bm:%p do_now:%d\n", 602b411b363SPhilipp Reisner p_addr, bm, (int)do_now); 60384e7c0f7SLars Ellenberg } else 604b411b363SPhilipp Reisner memset(bm, c, do_now * sizeof(long)); 605b411b363SPhilipp Reisner bm_unmap(p_addr); 60619f843aaSLars Ellenberg bm_set_page_need_writeout(b->bm_pages[idx]); 607b411b363SPhilipp Reisner offset += do_now; 608b411b363SPhilipp Reisner } 609b411b363SPhilipp Reisner } 610b411b363SPhilipp Reisner 611ae8bf312SLars Ellenberg /* For the layout, see comment above drbd_md_set_sector_offsets(). */ 612ae8bf312SLars Ellenberg static u64 drbd_md_on_disk_bits(struct drbd_backing_dev *ldev) 613ae8bf312SLars Ellenberg { 614ae8bf312SLars Ellenberg u64 bitmap_sectors; 615ae8bf312SLars Ellenberg if (ldev->md.al_offset == 8) 616ae8bf312SLars Ellenberg bitmap_sectors = ldev->md.md_size_sect - ldev->md.bm_offset; 617ae8bf312SLars Ellenberg else 618ae8bf312SLars Ellenberg bitmap_sectors = ldev->md.al_offset - ldev->md.bm_offset; 619ae8bf312SLars Ellenberg return bitmap_sectors << (9 + 3); 620ae8bf312SLars Ellenberg } 621ae8bf312SLars Ellenberg 622b411b363SPhilipp Reisner /* 623b411b363SPhilipp Reisner * make sure the bitmap has enough room for the attached storage, 624b411b363SPhilipp Reisner * if necessary, resize. 625b411b363SPhilipp Reisner * called whenever we may have changed the device size. 626b411b363SPhilipp Reisner * returns -ENOMEM if we could not allocate enough memory, 0 on success. 627b411b363SPhilipp Reisner * In case this is actually a resize, we copy the old bitmap into the new one. 628b411b363SPhilipp Reisner * Otherwise, the bitmap is initialized to all bits set. 629b411b363SPhilipp Reisner */ 630b30ab791SAndreas Gruenbacher int drbd_bm_resize(struct drbd_device *device, sector_t capacity, int set_new_bits) 631b411b363SPhilipp Reisner { 632b30ab791SAndreas Gruenbacher struct drbd_bitmap *b = device->bitmap; 6336850c442SLars Ellenberg unsigned long bits, words, owords, obits; 634b411b363SPhilipp Reisner unsigned long want, have, onpages; /* number of pages */ 635b411b363SPhilipp Reisner struct page **npages, **opages = NULL; 6367e5fec31SFabian Frederick int err = 0; 6377e5fec31SFabian Frederick bool growing; 638b411b363SPhilipp Reisner 639841ce241SAndreas Gruenbacher if (!expect(b)) 640841ce241SAndreas Gruenbacher return -ENOMEM; 641b411b363SPhilipp Reisner 642b30ab791SAndreas Gruenbacher drbd_bm_lock(device, "resize", BM_LOCKED_MASK); 643b411b363SPhilipp Reisner 644d0180171SAndreas Gruenbacher drbd_info(device, "drbd_bm_resize called with capacity == %llu\n", 645b411b363SPhilipp Reisner (unsigned long long)capacity); 646b411b363SPhilipp Reisner 647b411b363SPhilipp Reisner if (capacity == b->bm_dev_capacity) 648b411b363SPhilipp Reisner goto out; 649b411b363SPhilipp Reisner 650b411b363SPhilipp Reisner if (capacity == 0) { 651b411b363SPhilipp Reisner spin_lock_irq(&b->bm_lock); 652b411b363SPhilipp Reisner opages = b->bm_pages; 653b411b363SPhilipp Reisner onpages = b->bm_number_of_pages; 654b411b363SPhilipp Reisner owords = b->bm_words; 655b411b363SPhilipp Reisner b->bm_pages = NULL; 656b411b363SPhilipp Reisner b->bm_number_of_pages = 657b411b363SPhilipp Reisner b->bm_set = 658b411b363SPhilipp Reisner b->bm_bits = 659b411b363SPhilipp Reisner b->bm_words = 660b411b363SPhilipp Reisner b->bm_dev_capacity = 0; 661b411b363SPhilipp Reisner spin_unlock_irq(&b->bm_lock); 662b411b363SPhilipp Reisner bm_free_pages(opages, onpages); 6631d5cfdb0STetsuo Handa bm_vk_free(opages); 664b411b363SPhilipp Reisner goto out; 665b411b363SPhilipp Reisner } 666b411b363SPhilipp Reisner bits = BM_SECT_TO_BIT(ALIGN(capacity, BM_SECT_PER_BIT)); 667b411b363SPhilipp Reisner 668b411b363SPhilipp Reisner /* if we would use 669b411b363SPhilipp Reisner words = ALIGN(bits,BITS_PER_LONG) >> LN2_BPL; 670b411b363SPhilipp Reisner a 32bit host could present the wrong number of words 671b411b363SPhilipp Reisner to a 64bit host. 672b411b363SPhilipp Reisner */ 673b411b363SPhilipp Reisner words = ALIGN(bits, 64) >> LN2_BPL; 674b411b363SPhilipp Reisner 675b30ab791SAndreas Gruenbacher if (get_ldev(device)) { 676b30ab791SAndreas Gruenbacher u64 bits_on_disk = drbd_md_on_disk_bits(device->ldev); 677b30ab791SAndreas Gruenbacher put_ldev(device); 6784b0715f0SLars Ellenberg if (bits > bits_on_disk) { 679d0180171SAndreas Gruenbacher drbd_info(device, "bits = %lu\n", bits); 680d0180171SAndreas Gruenbacher drbd_info(device, "bits_on_disk = %llu\n", bits_on_disk); 6814b0715f0SLars Ellenberg err = -ENOSPC; 6824b0715f0SLars Ellenberg goto out; 6834b0715f0SLars Ellenberg } 684b411b363SPhilipp Reisner } 685b411b363SPhilipp Reisner 686*ba6bee98SCai Huoqing want = PFN_UP(words*sizeof(long)); 687b411b363SPhilipp Reisner have = b->bm_number_of_pages; 688b411b363SPhilipp Reisner if (want == have) { 6890b0ba1efSAndreas Gruenbacher D_ASSERT(device, b->bm_pages != NULL); 690b411b363SPhilipp Reisner npages = b->bm_pages; 691b411b363SPhilipp Reisner } else { 692b30ab791SAndreas Gruenbacher if (drbd_insert_fault(device, DRBD_FAULT_BM_ALLOC)) 693b411b363SPhilipp Reisner npages = NULL; 694b411b363SPhilipp Reisner else 695b411b363SPhilipp Reisner npages = bm_realloc_pages(b, want); 696b411b363SPhilipp Reisner } 697b411b363SPhilipp Reisner 698b411b363SPhilipp Reisner if (!npages) { 699b411b363SPhilipp Reisner err = -ENOMEM; 700b411b363SPhilipp Reisner goto out; 701b411b363SPhilipp Reisner } 702b411b363SPhilipp Reisner 703b411b363SPhilipp Reisner spin_lock_irq(&b->bm_lock); 704b411b363SPhilipp Reisner opages = b->bm_pages; 705b411b363SPhilipp Reisner owords = b->bm_words; 706b411b363SPhilipp Reisner obits = b->bm_bits; 707b411b363SPhilipp Reisner 708b411b363SPhilipp Reisner growing = bits > obits; 7095223671bSPhilipp Reisner if (opages && growing && set_new_bits) 710b411b363SPhilipp Reisner bm_set_surplus(b); 711b411b363SPhilipp Reisner 712b411b363SPhilipp Reisner b->bm_pages = npages; 713b411b363SPhilipp Reisner b->bm_number_of_pages = want; 714b411b363SPhilipp Reisner b->bm_bits = bits; 715b411b363SPhilipp Reisner b->bm_words = words; 716b411b363SPhilipp Reisner b->bm_dev_capacity = capacity; 717b411b363SPhilipp Reisner 718b411b363SPhilipp Reisner if (growing) { 71902d9a94bSPhilipp Reisner if (set_new_bits) { 720b411b363SPhilipp Reisner bm_memset(b, owords, 0xff, words-owords); 721b411b363SPhilipp Reisner b->bm_set += bits - obits; 72202d9a94bSPhilipp Reisner } else 72302d9a94bSPhilipp Reisner bm_memset(b, owords, 0x00, words-owords); 72402d9a94bSPhilipp Reisner 725b411b363SPhilipp Reisner } 726b411b363SPhilipp Reisner 727b411b363SPhilipp Reisner if (want < have) { 728b411b363SPhilipp Reisner /* implicit: (opages != NULL) && (opages != npages) */ 729b411b363SPhilipp Reisner bm_free_pages(opages + want, have - want); 730b411b363SPhilipp Reisner } 731b411b363SPhilipp Reisner 732b411b363SPhilipp Reisner (void)bm_clear_surplus(b); 733b411b363SPhilipp Reisner 734b411b363SPhilipp Reisner spin_unlock_irq(&b->bm_lock); 735b411b363SPhilipp Reisner if (opages != npages) 7361d5cfdb0STetsuo Handa bm_vk_free(opages); 737b411b363SPhilipp Reisner if (!growing) 738b411b363SPhilipp Reisner b->bm_set = bm_count_bits(b); 739d0180171SAndreas Gruenbacher drbd_info(device, "resync bitmap: bits=%lu words=%lu pages=%lu\n", bits, words, want); 740b411b363SPhilipp Reisner 741b411b363SPhilipp Reisner out: 742b30ab791SAndreas Gruenbacher drbd_bm_unlock(device); 743b411b363SPhilipp Reisner return err; 744b411b363SPhilipp Reisner } 745b411b363SPhilipp Reisner 746b411b363SPhilipp Reisner /* inherently racy: 747b411b363SPhilipp Reisner * if not protected by other means, return value may be out of date when 748b411b363SPhilipp Reisner * leaving this function... 749b411b363SPhilipp Reisner * we still need to lock it, since it is important that this returns 750b411b363SPhilipp Reisner * bm_set == 0 precisely. 751b411b363SPhilipp Reisner * 752b411b363SPhilipp Reisner * maybe bm_set should be atomic_t ? 753b411b363SPhilipp Reisner */ 754b30ab791SAndreas Gruenbacher unsigned long _drbd_bm_total_weight(struct drbd_device *device) 755b411b363SPhilipp Reisner { 756b30ab791SAndreas Gruenbacher struct drbd_bitmap *b = device->bitmap; 757b411b363SPhilipp Reisner unsigned long s; 758b411b363SPhilipp Reisner unsigned long flags; 759b411b363SPhilipp Reisner 760841ce241SAndreas Gruenbacher if (!expect(b)) 761841ce241SAndreas Gruenbacher return 0; 762841ce241SAndreas Gruenbacher if (!expect(b->bm_pages)) 763841ce241SAndreas Gruenbacher return 0; 764b411b363SPhilipp Reisner 765b411b363SPhilipp Reisner spin_lock_irqsave(&b->bm_lock, flags); 766b411b363SPhilipp Reisner s = b->bm_set; 767b411b363SPhilipp Reisner spin_unlock_irqrestore(&b->bm_lock, flags); 768b411b363SPhilipp Reisner 769b411b363SPhilipp Reisner return s; 770b411b363SPhilipp Reisner } 771b411b363SPhilipp Reisner 772b30ab791SAndreas Gruenbacher unsigned long drbd_bm_total_weight(struct drbd_device *device) 773b411b363SPhilipp Reisner { 774b411b363SPhilipp Reisner unsigned long s; 775b411b363SPhilipp Reisner /* if I don't have a disk, I don't know about out-of-sync status */ 776b30ab791SAndreas Gruenbacher if (!get_ldev_if_state(device, D_NEGOTIATING)) 777b411b363SPhilipp Reisner return 0; 778b30ab791SAndreas Gruenbacher s = _drbd_bm_total_weight(device); 779b30ab791SAndreas Gruenbacher put_ldev(device); 780b411b363SPhilipp Reisner return s; 781b411b363SPhilipp Reisner } 782b411b363SPhilipp Reisner 783b30ab791SAndreas Gruenbacher size_t drbd_bm_words(struct drbd_device *device) 784b411b363SPhilipp Reisner { 785b30ab791SAndreas Gruenbacher struct drbd_bitmap *b = device->bitmap; 786841ce241SAndreas Gruenbacher if (!expect(b)) 787841ce241SAndreas Gruenbacher return 0; 788841ce241SAndreas Gruenbacher if (!expect(b->bm_pages)) 789841ce241SAndreas Gruenbacher return 0; 790b411b363SPhilipp Reisner 791b411b363SPhilipp Reisner return b->bm_words; 792b411b363SPhilipp Reisner } 793b411b363SPhilipp Reisner 794b30ab791SAndreas Gruenbacher unsigned long drbd_bm_bits(struct drbd_device *device) 795b411b363SPhilipp Reisner { 796b30ab791SAndreas Gruenbacher struct drbd_bitmap *b = device->bitmap; 797841ce241SAndreas Gruenbacher if (!expect(b)) 798841ce241SAndreas Gruenbacher return 0; 799b411b363SPhilipp Reisner 800b411b363SPhilipp Reisner return b->bm_bits; 801b411b363SPhilipp Reisner } 802b411b363SPhilipp Reisner 803b411b363SPhilipp Reisner /* merge number words from buffer into the bitmap starting at offset. 804b411b363SPhilipp Reisner * buffer[i] is expected to be little endian unsigned long. 805b411b363SPhilipp Reisner * bitmap must be locked by drbd_bm_lock. 806b411b363SPhilipp Reisner * currently only used from receive_bitmap. 807b411b363SPhilipp Reisner */ 808b30ab791SAndreas Gruenbacher void drbd_bm_merge_lel(struct drbd_device *device, size_t offset, size_t number, 809b411b363SPhilipp Reisner unsigned long *buffer) 810b411b363SPhilipp Reisner { 811b30ab791SAndreas Gruenbacher struct drbd_bitmap *b = device->bitmap; 812b411b363SPhilipp Reisner unsigned long *p_addr, *bm; 813b411b363SPhilipp Reisner unsigned long word, bits; 81419f843aaSLars Ellenberg unsigned int idx; 815b411b363SPhilipp Reisner size_t end, do_now; 816b411b363SPhilipp Reisner 817b411b363SPhilipp Reisner end = offset + number; 818b411b363SPhilipp Reisner 819841ce241SAndreas Gruenbacher if (!expect(b)) 820841ce241SAndreas Gruenbacher return; 821841ce241SAndreas Gruenbacher if (!expect(b->bm_pages)) 822841ce241SAndreas Gruenbacher return; 823b411b363SPhilipp Reisner if (number == 0) 824b411b363SPhilipp Reisner return; 825b411b363SPhilipp Reisner WARN_ON(offset >= b->bm_words); 826b411b363SPhilipp Reisner WARN_ON(end > b->bm_words); 827b411b363SPhilipp Reisner 828b411b363SPhilipp Reisner spin_lock_irq(&b->bm_lock); 829b411b363SPhilipp Reisner while (offset < end) { 830b411b363SPhilipp Reisner do_now = min_t(size_t, ALIGN(offset+1, LWPP), end) - offset; 83119f843aaSLars Ellenberg idx = bm_word_to_page_idx(b, offset); 83219f843aaSLars Ellenberg p_addr = bm_map_pidx(b, idx); 833b411b363SPhilipp Reisner bm = p_addr + MLPP(offset); 834b411b363SPhilipp Reisner offset += do_now; 835b411b363SPhilipp Reisner while (do_now--) { 836b411b363SPhilipp Reisner bits = hweight_long(*bm); 83795a0f10cSLars Ellenberg word = *bm | *buffer++; 838b411b363SPhilipp Reisner *bm++ = word; 839b411b363SPhilipp Reisner b->bm_set += hweight_long(word) - bits; 840b411b363SPhilipp Reisner } 841b411b363SPhilipp Reisner bm_unmap(p_addr); 84219f843aaSLars Ellenberg bm_set_page_need_writeout(b->bm_pages[idx]); 843b411b363SPhilipp Reisner } 844b411b363SPhilipp Reisner /* with 32bit <-> 64bit cross-platform connect 845b411b363SPhilipp Reisner * this is only correct for current usage, 846b411b363SPhilipp Reisner * where we _know_ that we are 64 bit aligned, 847b411b363SPhilipp Reisner * and know that this function is used in this way, too... 848b411b363SPhilipp Reisner */ 849b411b363SPhilipp Reisner if (end == b->bm_words) 850b411b363SPhilipp Reisner b->bm_set -= bm_clear_surplus(b); 851b411b363SPhilipp Reisner spin_unlock_irq(&b->bm_lock); 852b411b363SPhilipp Reisner } 853b411b363SPhilipp Reisner 854b411b363SPhilipp Reisner /* copy number words from the bitmap starting at offset into the buffer. 855b411b363SPhilipp Reisner * buffer[i] will be little endian unsigned long. 856b411b363SPhilipp Reisner */ 857b30ab791SAndreas Gruenbacher void drbd_bm_get_lel(struct drbd_device *device, size_t offset, size_t number, 858b411b363SPhilipp Reisner unsigned long *buffer) 859b411b363SPhilipp Reisner { 860b30ab791SAndreas Gruenbacher struct drbd_bitmap *b = device->bitmap; 861b411b363SPhilipp Reisner unsigned long *p_addr, *bm; 862b411b363SPhilipp Reisner size_t end, do_now; 863b411b363SPhilipp Reisner 864b411b363SPhilipp Reisner end = offset + number; 865b411b363SPhilipp Reisner 866841ce241SAndreas Gruenbacher if (!expect(b)) 867841ce241SAndreas Gruenbacher return; 868841ce241SAndreas Gruenbacher if (!expect(b->bm_pages)) 869841ce241SAndreas Gruenbacher return; 870b411b363SPhilipp Reisner 871b411b363SPhilipp Reisner spin_lock_irq(&b->bm_lock); 872b411b363SPhilipp Reisner if ((offset >= b->bm_words) || 873b411b363SPhilipp Reisner (end > b->bm_words) || 874b411b363SPhilipp Reisner (number <= 0)) 875d0180171SAndreas Gruenbacher drbd_err(device, "offset=%lu number=%lu bm_words=%lu\n", 876b411b363SPhilipp Reisner (unsigned long) offset, 877b411b363SPhilipp Reisner (unsigned long) number, 878b411b363SPhilipp Reisner (unsigned long) b->bm_words); 879b411b363SPhilipp Reisner else { 880b411b363SPhilipp Reisner while (offset < end) { 881b411b363SPhilipp Reisner do_now = min_t(size_t, ALIGN(offset+1, LWPP), end) - offset; 88219f843aaSLars Ellenberg p_addr = bm_map_pidx(b, bm_word_to_page_idx(b, offset)); 883b411b363SPhilipp Reisner bm = p_addr + MLPP(offset); 884b411b363SPhilipp Reisner offset += do_now; 885b411b363SPhilipp Reisner while (do_now--) 88695a0f10cSLars Ellenberg *buffer++ = *bm++; 887b411b363SPhilipp Reisner bm_unmap(p_addr); 888b411b363SPhilipp Reisner } 889b411b363SPhilipp Reisner } 890b411b363SPhilipp Reisner spin_unlock_irq(&b->bm_lock); 891b411b363SPhilipp Reisner } 892b411b363SPhilipp Reisner 893b411b363SPhilipp Reisner /* set all bits in the bitmap */ 894b30ab791SAndreas Gruenbacher void drbd_bm_set_all(struct drbd_device *device) 895b411b363SPhilipp Reisner { 896b30ab791SAndreas Gruenbacher struct drbd_bitmap *b = device->bitmap; 897841ce241SAndreas Gruenbacher if (!expect(b)) 898841ce241SAndreas Gruenbacher return; 899841ce241SAndreas Gruenbacher if (!expect(b->bm_pages)) 900841ce241SAndreas Gruenbacher return; 901b411b363SPhilipp Reisner 902b411b363SPhilipp Reisner spin_lock_irq(&b->bm_lock); 903b411b363SPhilipp Reisner bm_memset(b, 0, 0xff, b->bm_words); 904b411b363SPhilipp Reisner (void)bm_clear_surplus(b); 905b411b363SPhilipp Reisner b->bm_set = b->bm_bits; 906b411b363SPhilipp Reisner spin_unlock_irq(&b->bm_lock); 907b411b363SPhilipp Reisner } 908b411b363SPhilipp Reisner 909b411b363SPhilipp Reisner /* clear all bits in the bitmap */ 910b30ab791SAndreas Gruenbacher void drbd_bm_clear_all(struct drbd_device *device) 911b411b363SPhilipp Reisner { 912b30ab791SAndreas Gruenbacher struct drbd_bitmap *b = device->bitmap; 913841ce241SAndreas Gruenbacher if (!expect(b)) 914841ce241SAndreas Gruenbacher return; 915841ce241SAndreas Gruenbacher if (!expect(b->bm_pages)) 916841ce241SAndreas Gruenbacher return; 917b411b363SPhilipp Reisner 918b411b363SPhilipp Reisner spin_lock_irq(&b->bm_lock); 919b411b363SPhilipp Reisner bm_memset(b, 0, 0, b->bm_words); 920b411b363SPhilipp Reisner b->bm_set = 0; 921b411b363SPhilipp Reisner spin_unlock_irq(&b->bm_lock); 922b411b363SPhilipp Reisner } 923b411b363SPhilipp Reisner 9244ce49266SLars Ellenberg static void drbd_bm_aio_ctx_destroy(struct kref *kref) 925d1f3779bSPhilipp Reisner { 9264ce49266SLars Ellenberg struct drbd_bm_aio_ctx *ctx = container_of(kref, struct drbd_bm_aio_ctx, kref); 9274ce49266SLars Ellenberg unsigned long flags; 928d1f3779bSPhilipp Reisner 9294ce49266SLars Ellenberg spin_lock_irqsave(&ctx->device->resource->req_lock, flags); 9304ce49266SLars Ellenberg list_del(&ctx->list); 9314ce49266SLars Ellenberg spin_unlock_irqrestore(&ctx->device->resource->req_lock, flags); 932b30ab791SAndreas Gruenbacher put_ldev(ctx->device); 933d1f3779bSPhilipp Reisner kfree(ctx); 934d1f3779bSPhilipp Reisner } 935d1f3779bSPhilipp Reisner 93619f843aaSLars Ellenberg /* bv_page may be a copy, or may be the original */ 9374246a0b6SChristoph Hellwig static void drbd_bm_endio(struct bio *bio) 938b411b363SPhilipp Reisner { 9394ce49266SLars Ellenberg struct drbd_bm_aio_ctx *ctx = bio->bi_private; 940b30ab791SAndreas Gruenbacher struct drbd_device *device = ctx->device; 941b30ab791SAndreas Gruenbacher struct drbd_bitmap *b = device->bitmap; 942263663cdSMing Lei unsigned int idx = bm_page_to_idx(bio_first_page_all(bio)); 943b411b363SPhilipp Reisner 9447648cdfeSLars Ellenberg if ((ctx->flags & BM_AIO_COPY_PAGES) == 0 && 9457648cdfeSLars Ellenberg !bm_test_page_unchanged(b->bm_pages[idx])) 946d0180171SAndreas Gruenbacher drbd_warn(device, "bitmap page idx %u changed during IO!\n", idx); 94719f843aaSLars Ellenberg 9484e4cbee9SChristoph Hellwig if (bio->bi_status) { 94919f843aaSLars Ellenberg /* ctx error will hold the completed-last non-zero error code, 95019f843aaSLars Ellenberg * in case error codes differ. */ 9514e4cbee9SChristoph Hellwig ctx->error = blk_status_to_errno(bio->bi_status); 95219f843aaSLars Ellenberg bm_set_page_io_err(b->bm_pages[idx]); 95319f843aaSLars Ellenberg /* Not identical to on disk version of it. 95419f843aaSLars Ellenberg * Is BM_PAGE_IO_ERROR enough? */ 95519f843aaSLars Ellenberg if (__ratelimit(&drbd_ratelimit_state)) 956d0180171SAndreas Gruenbacher drbd_err(device, "IO ERROR %d on bitmap page idx %u\n", 9574e4cbee9SChristoph Hellwig bio->bi_status, idx); 95819f843aaSLars Ellenberg } else { 95919f843aaSLars Ellenberg bm_clear_page_io_err(b->bm_pages[idx]); 960d0180171SAndreas Gruenbacher dynamic_drbd_dbg(device, "bitmap page idx %u completed\n", idx); 961b411b363SPhilipp Reisner } 96219f843aaSLars Ellenberg 963b30ab791SAndreas Gruenbacher bm_page_unlock_io(device, idx); 96419f843aaSLars Ellenberg 96519f843aaSLars Ellenberg if (ctx->flags & BM_AIO_COPY_PAGES) 9660892fac8SKent Overstreet mempool_free(bio->bi_io_vec[0].bv_page, &drbd_md_io_page_pool); 967b411b363SPhilipp Reisner 968b411b363SPhilipp Reisner bio_put(bio); 96919f843aaSLars Ellenberg 970d1f3779bSPhilipp Reisner if (atomic_dec_and_test(&ctx->in_flight)) { 9719e58c4daSPhilipp Reisner ctx->done = 1; 972b30ab791SAndreas Gruenbacher wake_up(&device->misc_wait); 9734ce49266SLars Ellenberg kref_put(&ctx->kref, &drbd_bm_aio_ctx_destroy); 974d1f3779bSPhilipp Reisner } 975b411b363SPhilipp Reisner } 976b411b363SPhilipp Reisner 9774ce49266SLars Ellenberg static void bm_page_io_async(struct drbd_bm_aio_ctx *ctx, int page_nr) __must_hold(local) 978b411b363SPhilipp Reisner { 979b30ab791SAndreas Gruenbacher struct drbd_device *device = ctx->device; 980609be106SChristoph Hellwig unsigned int op = (ctx->flags & BM_AIO_READ) ? REQ_OP_READ : REQ_OP_WRITE; 981609be106SChristoph Hellwig struct bio *bio = bio_alloc_bioset(device->ldev->md_bdev, 1, op, 982609be106SChristoph Hellwig GFP_NOIO, &drbd_md_io_bio_set); 983b30ab791SAndreas Gruenbacher struct drbd_bitmap *b = device->bitmap; 98419f843aaSLars Ellenberg struct page *page; 985b411b363SPhilipp Reisner unsigned int len; 98619f843aaSLars Ellenberg 987b411b363SPhilipp Reisner sector_t on_disk_sector = 988b30ab791SAndreas Gruenbacher device->ldev->md.md_offset + device->ldev->md.bm_offset; 989b411b363SPhilipp Reisner on_disk_sector += ((sector_t)page_nr) << (PAGE_SHIFT-9); 990b411b363SPhilipp Reisner 991b411b363SPhilipp Reisner /* this might happen with very small 99219f843aaSLars Ellenberg * flexible external meta data device, 99319f843aaSLars Ellenberg * or with PAGE_SIZE > 4k */ 994b411b363SPhilipp Reisner len = min_t(unsigned int, PAGE_SIZE, 995b30ab791SAndreas Gruenbacher (drbd_md_last_sector(device->ldev) - on_disk_sector + 1)<<9); 996b411b363SPhilipp Reisner 99719f843aaSLars Ellenberg /* serialize IO on this page */ 998b30ab791SAndreas Gruenbacher bm_page_lock_io(device, page_nr); 99919f843aaSLars Ellenberg /* before memcpy and submit, 100019f843aaSLars Ellenberg * so it can be redirtied any time */ 100119f843aaSLars Ellenberg bm_set_page_unchanged(b->bm_pages[page_nr]); 100219f843aaSLars Ellenberg 100319f843aaSLars Ellenberg if (ctx->flags & BM_AIO_COPY_PAGES) { 10040892fac8SKent Overstreet page = mempool_alloc(&drbd_md_io_page_pool, 10050eb0b63cSChristoph Hellwig GFP_NOIO | __GFP_HIGHMEM); 1006f1d6a328SAkinobu Mita copy_highpage(page, b->bm_pages[page_nr]); 100719f843aaSLars Ellenberg bm_store_page_idx(page, page_nr); 100819f843aaSLars Ellenberg } else 100919f843aaSLars Ellenberg page = b->bm_pages[page_nr]; 10104f024f37SKent Overstreet bio->bi_iter.bi_sector = on_disk_sector; 10114d95a10fSLars Ellenberg /* bio_add_page of a single page to an empty bio will always succeed, 10124d95a10fSLars Ellenberg * according to api. Do we want to assert that? */ 101319f843aaSLars Ellenberg bio_add_page(bio, page, len, 0); 101419f843aaSLars Ellenberg bio->bi_private = ctx; 1015ed15b795SAndreas Gruenbacher bio->bi_end_io = drbd_bm_endio; 1016b411b363SPhilipp Reisner 1017bb3cc85eSMike Christie if (drbd_insert_fault(device, (op == REQ_OP_WRITE) ? DRBD_FAULT_MD_WR : DRBD_FAULT_MD_RD)) { 10184246a0b6SChristoph Hellwig bio_io_error(bio); 1019b411b363SPhilipp Reisner } else { 10204e49ea4aSMike Christie submit_bio(bio); 10215a8b4242SLars Ellenberg /* this should not count as user activity and cause the 10225a8b4242SLars Ellenberg * resync to throttle -- see drbd_rs_should_slow_down(). */ 1023b30ab791SAndreas Gruenbacher atomic_add(len >> 9, &device->rs_sect_ev); 1024b411b363SPhilipp Reisner } 1025b411b363SPhilipp Reisner } 1026b411b363SPhilipp Reisner 1027b411b363SPhilipp Reisner /* 1028b411b363SPhilipp Reisner * bm_rw: read/write the whole bitmap from/to its on disk location. 1029b411b363SPhilipp Reisner */ 10304ce49266SLars Ellenberg static int bm_rw(struct drbd_device *device, const unsigned int flags, unsigned lazy_writeout_upper_idx) __must_hold(local) 1031b411b363SPhilipp Reisner { 10324ce49266SLars Ellenberg struct drbd_bm_aio_ctx *ctx; 1033b30ab791SAndreas Gruenbacher struct drbd_bitmap *b = device->bitmap; 103427ea1d87SLars Ellenberg unsigned int num_pages, i, count = 0; 1035b411b363SPhilipp Reisner unsigned long now; 1036b411b363SPhilipp Reisner char ppb[10]; 1037b411b363SPhilipp Reisner int err = 0; 1038b411b363SPhilipp Reisner 103919f843aaSLars Ellenberg /* 104019f843aaSLars Ellenberg * We are protected against bitmap disappearing/resizing by holding an 104119f843aaSLars Ellenberg * ldev reference (caller must have called get_ldev()). 104219f843aaSLars Ellenberg * For read/write, we are protected against changes to the bitmap by 104319f843aaSLars Ellenberg * the bitmap lock (see drbd_bitmap_io). 104419f843aaSLars Ellenberg * For lazy writeout, we don't care for ongoing changes to the bitmap, 104519f843aaSLars Ellenberg * as we submit copies of pages anyways. 104619f843aaSLars Ellenberg */ 1047d1f3779bSPhilipp Reisner 10484ce49266SLars Ellenberg ctx = kmalloc(sizeof(struct drbd_bm_aio_ctx), GFP_NOIO); 1049d1f3779bSPhilipp Reisner if (!ctx) 1050d1f3779bSPhilipp Reisner return -ENOMEM; 1051d1f3779bSPhilipp Reisner 10524ce49266SLars Ellenberg *ctx = (struct drbd_bm_aio_ctx) { 1053b30ab791SAndreas Gruenbacher .device = device, 10544ce49266SLars Ellenberg .start_jif = jiffies, 1055d1f3779bSPhilipp Reisner .in_flight = ATOMIC_INIT(1), 10569e58c4daSPhilipp Reisner .done = 0, 10570e8488adSLars Ellenberg .flags = flags, 1058d1f3779bSPhilipp Reisner .error = 0, 10591e24edcaSPeter Zijlstra .kref = KREF_INIT(2), 1060d1f3779bSPhilipp Reisner }; 1061d1f3779bSPhilipp Reisner 10624ce49266SLars Ellenberg if (!get_ldev_if_state(device, D_ATTACHING)) { /* put is in drbd_bm_aio_ctx_destroy() */ 1063d0180171SAndreas Gruenbacher drbd_err(device, "ASSERT FAILED: get_ldev_if_state() == 1 in bm_rw()\n"); 10649e58c4daSPhilipp Reisner kfree(ctx); 10659e58c4daSPhilipp Reisner return -ENODEV; 10669e58c4daSPhilipp Reisner } 10678fe39aacSPhilipp Reisner /* Here D_ATTACHING is sufficient since drbd_bm_read() is called only from 10688fe39aacSPhilipp Reisner drbd_adm_attach(), after device->ldev was assigned. */ 10699e58c4daSPhilipp Reisner 10704ce49266SLars Ellenberg if (0 == (ctx->flags & ~BM_AIO_READ)) 107120ceb2b2SLars Ellenberg WARN_ON(!(BM_LOCKED_MASK & b->bm_flags)); 1072b411b363SPhilipp Reisner 10734ce49266SLars Ellenberg spin_lock_irq(&device->resource->req_lock); 10744ce49266SLars Ellenberg list_add_tail(&ctx->list, &device->pending_bitmap_io); 10754ce49266SLars Ellenberg spin_unlock_irq(&device->resource->req_lock); 10764ce49266SLars Ellenberg 10776850c442SLars Ellenberg num_pages = b->bm_number_of_pages; 1078b411b363SPhilipp Reisner 1079b411b363SPhilipp Reisner now = jiffies; 1080b411b363SPhilipp Reisner 1081b411b363SPhilipp Reisner /* let the layers below us try to merge these bios... */ 108227ea1d87SLars Ellenberg 108327ea1d87SLars Ellenberg if (flags & BM_AIO_READ) { 108427ea1d87SLars Ellenberg for (i = 0; i < num_pages; i++) { 108527ea1d87SLars Ellenberg atomic_inc(&ctx->in_flight); 108627ea1d87SLars Ellenberg bm_page_io_async(ctx, i); 108727ea1d87SLars Ellenberg ++count; 108827ea1d87SLars Ellenberg cond_resched(); 108927ea1d87SLars Ellenberg } 109027ea1d87SLars Ellenberg } else if (flags & BM_AIO_WRITE_HINTED) { 109127ea1d87SLars Ellenberg /* ASSERT: BM_AIO_WRITE_ALL_PAGES is not set. */ 109227ea1d87SLars Ellenberg unsigned int hint; 109327ea1d87SLars Ellenberg for (hint = 0; hint < b->n_bitmap_hints; hint++) { 109427ea1d87SLars Ellenberg i = b->al_bitmap_hints[hint]; 109527ea1d87SLars Ellenberg if (i >= num_pages) /* == -1U: no hint here. */ 109627ea1d87SLars Ellenberg continue; 109727ea1d87SLars Ellenberg /* Several AL-extents may point to the same page. */ 109827ea1d87SLars Ellenberg if (!test_and_clear_bit(BM_PAGE_HINT_WRITEOUT, 109927ea1d87SLars Ellenberg &page_private(b->bm_pages[i]))) 110027ea1d87SLars Ellenberg continue; 110127ea1d87SLars Ellenberg /* Has it even changed? */ 110227ea1d87SLars Ellenberg if (bm_test_page_unchanged(b->bm_pages[i])) 110327ea1d87SLars Ellenberg continue; 110427ea1d87SLars Ellenberg atomic_inc(&ctx->in_flight); 110527ea1d87SLars Ellenberg bm_page_io_async(ctx, i); 110627ea1d87SLars Ellenberg ++count; 110727ea1d87SLars Ellenberg } 110827ea1d87SLars Ellenberg } else { 11096850c442SLars Ellenberg for (i = 0; i < num_pages; i++) { 111019f843aaSLars Ellenberg /* ignore completely unchanged pages */ 111119f843aaSLars Ellenberg if (lazy_writeout_upper_idx && i == lazy_writeout_upper_idx) 111219f843aaSLars Ellenberg break; 11134ce49266SLars Ellenberg if (!(flags & BM_AIO_WRITE_ALL_PAGES) && 1114d1aa4d04SPhilipp Reisner bm_test_page_unchanged(b->bm_pages[i])) { 1115d0180171SAndreas Gruenbacher dynamic_drbd_dbg(device, "skipped bm write for idx %u\n", i); 111619f843aaSLars Ellenberg continue; 111719f843aaSLars Ellenberg } 111819f843aaSLars Ellenberg /* during lazy writeout, 111919f843aaSLars Ellenberg * ignore those pages not marked for lazy writeout. */ 112019f843aaSLars Ellenberg if (lazy_writeout_upper_idx && 112119f843aaSLars Ellenberg !bm_test_page_lazy_writeout(b->bm_pages[i])) { 1122d0180171SAndreas Gruenbacher dynamic_drbd_dbg(device, "skipped bm lazy write for idx %u\n", i); 112319f843aaSLars Ellenberg continue; 112419f843aaSLars Ellenberg } 1125d1f3779bSPhilipp Reisner atomic_inc(&ctx->in_flight); 11264ce49266SLars Ellenberg bm_page_io_async(ctx, i); 112719f843aaSLars Ellenberg ++count; 112819f843aaSLars Ellenberg cond_resched(); 112919f843aaSLars Ellenberg } 113027ea1d87SLars Ellenberg } 1131b411b363SPhilipp Reisner 1132725a97e4SLars Ellenberg /* 1133ed15b795SAndreas Gruenbacher * We initialize ctx->in_flight to one to make sure drbd_bm_endio 11349e58c4daSPhilipp Reisner * will not set ctx->done early, and decrement / test it here. If there 1135725a97e4SLars Ellenberg * are still some bios in flight, we need to wait for them here. 11369e58c4daSPhilipp Reisner * If all IO is done already (or nothing had been submitted), there is 11379e58c4daSPhilipp Reisner * no need to wait. Still, we need to put the kref associated with the 11389e58c4daSPhilipp Reisner * "in_flight reached zero, all done" event. 1139725a97e4SLars Ellenberg */ 1140d1f3779bSPhilipp Reisner if (!atomic_dec_and_test(&ctx->in_flight)) 1141b30ab791SAndreas Gruenbacher wait_until_done_or_force_detached(device, device->ldev, &ctx->done); 11429e58c4daSPhilipp Reisner else 11434ce49266SLars Ellenberg kref_put(&ctx->kref, &drbd_bm_aio_ctx_destroy); 1144d1f3779bSPhilipp Reisner 1145c9d963a4SLars Ellenberg /* summary for global bitmap IO */ 114613c2088dSLars Ellenberg if (flags == 0) { 114713c2088dSLars Ellenberg unsigned int ms = jiffies_to_msecs(jiffies - now); 114813c2088dSLars Ellenberg if (ms > 5) { 114913c2088dSLars Ellenberg drbd_info(device, "bitmap %s of %u pages took %u ms\n", 11504ce49266SLars Ellenberg (flags & BM_AIO_READ) ? "READ" : "WRITE", 115113c2088dSLars Ellenberg count, ms); 115213c2088dSLars Ellenberg } 115313c2088dSLars Ellenberg } 1154b411b363SPhilipp Reisner 1155d1f3779bSPhilipp Reisner if (ctx->error) { 1156d0180171SAndreas Gruenbacher drbd_alert(device, "we had at least one MD IO ERROR during bitmap IO\n"); 1157b30ab791SAndreas Gruenbacher drbd_chk_io_error(device, 1, DRBD_META_IO_ERROR); 1158d1f3779bSPhilipp Reisner err = -EIO; /* ctx->error ? */ 1159b411b363SPhilipp Reisner } 1160b411b363SPhilipp Reisner 11619e58c4daSPhilipp Reisner if (atomic_read(&ctx->in_flight)) 116244edfb0dSLars Ellenberg err = -EIO; /* Disk timeout/force-detach during IO... */ 11639e58c4daSPhilipp Reisner 1164b411b363SPhilipp Reisner now = jiffies; 11654ce49266SLars Ellenberg if (flags & BM_AIO_READ) { 116695a0f10cSLars Ellenberg b->bm_set = bm_count_bits(b); 1167d0180171SAndreas Gruenbacher drbd_info(device, "recounting of set bits took additional %lu jiffies\n", 1168b411b363SPhilipp Reisner jiffies - now); 1169b411b363SPhilipp Reisner } 1170b411b363SPhilipp Reisner now = b->bm_set; 1171b411b363SPhilipp Reisner 11724ce49266SLars Ellenberg if ((flags & ~BM_AIO_READ) == 0) 1173d0180171SAndreas Gruenbacher drbd_info(device, "%s (%lu bits) marked out-of-sync by on disk bit-map.\n", 1174b411b363SPhilipp Reisner ppsize(ppb, now << (BM_BLOCK_SHIFT-10)), now); 1175b411b363SPhilipp Reisner 11764ce49266SLars Ellenberg kref_put(&ctx->kref, &drbd_bm_aio_ctx_destroy); 1177b411b363SPhilipp Reisner return err; 1178b411b363SPhilipp Reisner } 1179b411b363SPhilipp Reisner 1180b411b363SPhilipp Reisner /** 1181b411b363SPhilipp Reisner * drbd_bm_read() - Read the whole bitmap from its on disk location. 1182b30ab791SAndreas Gruenbacher * @device: DRBD device. 1183b411b363SPhilipp Reisner */ 1184b30ab791SAndreas Gruenbacher int drbd_bm_read(struct drbd_device *device) __must_hold(local) 1185b411b363SPhilipp Reisner { 11864ce49266SLars Ellenberg return bm_rw(device, BM_AIO_READ, 0); 1187b411b363SPhilipp Reisner } 1188b411b363SPhilipp Reisner 1189b411b363SPhilipp Reisner /** 1190b411b363SPhilipp Reisner * drbd_bm_write() - Write the whole bitmap to its on disk location. 1191b30ab791SAndreas Gruenbacher * @device: DRBD device. 119219f843aaSLars Ellenberg * 119319f843aaSLars Ellenberg * Will only write pages that have changed since last IO. 1194b411b363SPhilipp Reisner */ 1195b30ab791SAndreas Gruenbacher int drbd_bm_write(struct drbd_device *device) __must_hold(local) 1196b411b363SPhilipp Reisner { 11974ce49266SLars Ellenberg return bm_rw(device, 0, 0); 1198b411b363SPhilipp Reisner } 1199b411b363SPhilipp Reisner 1200b411b363SPhilipp Reisner /** 1201d1aa4d04SPhilipp Reisner * drbd_bm_write_all() - Write the whole bitmap to its on disk location. 1202b30ab791SAndreas Gruenbacher * @device: DRBD device. 1203d1aa4d04SPhilipp Reisner * 1204d1aa4d04SPhilipp Reisner * Will write all pages. 1205d1aa4d04SPhilipp Reisner */ 1206b30ab791SAndreas Gruenbacher int drbd_bm_write_all(struct drbd_device *device) __must_hold(local) 1207d1aa4d04SPhilipp Reisner { 12084ce49266SLars Ellenberg return bm_rw(device, BM_AIO_WRITE_ALL_PAGES, 0); 1209d1aa4d04SPhilipp Reisner } 1210d1aa4d04SPhilipp Reisner 1211d1aa4d04SPhilipp Reisner /** 1212c7a58db4SLars Ellenberg * drbd_bm_write_lazy() - Write bitmap pages 0 to @upper_idx-1, if they have changed. 1213c7a58db4SLars Ellenberg * @device: DRBD device. 1214c7a58db4SLars Ellenberg * @upper_idx: 0: write all changed pages; +ve: page index to stop scanning for changed pages 1215c7a58db4SLars Ellenberg */ 1216c7a58db4SLars Ellenberg int drbd_bm_write_lazy(struct drbd_device *device, unsigned upper_idx) __must_hold(local) 1217c7a58db4SLars Ellenberg { 1218c7a58db4SLars Ellenberg return bm_rw(device, BM_AIO_COPY_PAGES, upper_idx); 1219c7a58db4SLars Ellenberg } 1220c7a58db4SLars Ellenberg 1221c7a58db4SLars Ellenberg /** 12220e8488adSLars Ellenberg * drbd_bm_write_copy_pages() - Write the whole bitmap to its on disk location. 1223b30ab791SAndreas Gruenbacher * @device: DRBD device. 12240e8488adSLars Ellenberg * 12250e8488adSLars Ellenberg * Will only write pages that have changed since last IO. 12260e8488adSLars Ellenberg * In contrast to drbd_bm_write(), this will copy the bitmap pages 12270e8488adSLars Ellenberg * to temporary writeout pages. It is intended to trigger a full write-out 12280e8488adSLars Ellenberg * while still allowing the bitmap to change, for example if a resync or online 12290e8488adSLars Ellenberg * verify is aborted due to a failed peer disk, while local IO continues, or 12300e8488adSLars Ellenberg * pending resync acks are still being processed. 12310e8488adSLars Ellenberg */ 1232b30ab791SAndreas Gruenbacher int drbd_bm_write_copy_pages(struct drbd_device *device) __must_hold(local) 12330e8488adSLars Ellenberg { 12344ce49266SLars Ellenberg return bm_rw(device, BM_AIO_COPY_PAGES, 0); 1235b411b363SPhilipp Reisner } 123619f843aaSLars Ellenberg 1237a220d291SLars Ellenberg /** 123845dfffebSLars Ellenberg * drbd_bm_write_hinted() - Write bitmap pages with "hint" marks, if they have changed. 1239b30ab791SAndreas Gruenbacher * @device: DRBD device. 124045dfffebSLars Ellenberg */ 1241b30ab791SAndreas Gruenbacher int drbd_bm_write_hinted(struct drbd_device *device) __must_hold(local) 124245dfffebSLars Ellenberg { 12434ce49266SLars Ellenberg return bm_rw(device, BM_AIO_WRITE_HINTED | BM_AIO_COPY_PAGES, 0); 124445dfffebSLars Ellenberg } 124519f843aaSLars Ellenberg 1246b411b363SPhilipp Reisner /* NOTE 1247b411b363SPhilipp Reisner * find_first_bit returns int, we return unsigned long. 12484b0715f0SLars Ellenberg * For this to work on 32bit arch with bitnumbers > (1<<32), 12494b0715f0SLars Ellenberg * we'd need to return u64, and get a whole lot of other places 12504b0715f0SLars Ellenberg * fixed where we still use unsigned long. 1251b411b363SPhilipp Reisner * 1252b411b363SPhilipp Reisner * this returns a bit number, NOT a sector! 1253b411b363SPhilipp Reisner */ 1254b30ab791SAndreas Gruenbacher static unsigned long __bm_find_next(struct drbd_device *device, unsigned long bm_fo, 1255cfd8005cSCong Wang const int find_zero_bit) 1256b411b363SPhilipp Reisner { 1257b30ab791SAndreas Gruenbacher struct drbd_bitmap *b = device->bitmap; 1258b411b363SPhilipp Reisner unsigned long *p_addr; 12594b0715f0SLars Ellenberg unsigned long bit_offset; 12604b0715f0SLars Ellenberg unsigned i; 12614b0715f0SLars Ellenberg 1262b411b363SPhilipp Reisner 1263b411b363SPhilipp Reisner if (bm_fo > b->bm_bits) { 1264d0180171SAndreas Gruenbacher drbd_err(device, "bm_fo=%lu bm_bits=%lu\n", bm_fo, b->bm_bits); 12654b0715f0SLars Ellenberg bm_fo = DRBD_END_OF_BITMAP; 1266b411b363SPhilipp Reisner } else { 1267b411b363SPhilipp Reisner while (bm_fo < b->bm_bits) { 126819f843aaSLars Ellenberg /* bit offset of the first bit in the page */ 12694b0715f0SLars Ellenberg bit_offset = bm_fo & ~BITS_PER_PAGE_MASK; 1270cfd8005cSCong Wang p_addr = __bm_map_pidx(b, bm_bit_to_page_idx(b, bm_fo)); 1271b411b363SPhilipp Reisner 1272b411b363SPhilipp Reisner if (find_zero_bit) 12737e599e6eSLinus Torvalds i = find_next_zero_bit_le(p_addr, 12744b0715f0SLars Ellenberg PAGE_SIZE*8, bm_fo & BITS_PER_PAGE_MASK); 1275b411b363SPhilipp Reisner else 12767e599e6eSLinus Torvalds i = find_next_bit_le(p_addr, 12774b0715f0SLars Ellenberg PAGE_SIZE*8, bm_fo & BITS_PER_PAGE_MASK); 1278b411b363SPhilipp Reisner 1279cfd8005cSCong Wang __bm_unmap(p_addr); 1280b411b363SPhilipp Reisner if (i < PAGE_SIZE*8) { 12814b0715f0SLars Ellenberg bm_fo = bit_offset + i; 12824b0715f0SLars Ellenberg if (bm_fo >= b->bm_bits) 1283b411b363SPhilipp Reisner break; 1284b411b363SPhilipp Reisner goto found; 1285b411b363SPhilipp Reisner } 1286b411b363SPhilipp Reisner bm_fo = bit_offset + PAGE_SIZE*8; 1287b411b363SPhilipp Reisner } 12884b0715f0SLars Ellenberg bm_fo = DRBD_END_OF_BITMAP; 1289b411b363SPhilipp Reisner } 1290b411b363SPhilipp Reisner found: 12914b0715f0SLars Ellenberg return bm_fo; 1292b411b363SPhilipp Reisner } 1293b411b363SPhilipp Reisner 1294b30ab791SAndreas Gruenbacher static unsigned long bm_find_next(struct drbd_device *device, 1295b411b363SPhilipp Reisner unsigned long bm_fo, const int find_zero_bit) 1296b411b363SPhilipp Reisner { 1297b30ab791SAndreas Gruenbacher struct drbd_bitmap *b = device->bitmap; 12984b0715f0SLars Ellenberg unsigned long i = DRBD_END_OF_BITMAP; 1299b411b363SPhilipp Reisner 1300841ce241SAndreas Gruenbacher if (!expect(b)) 1301841ce241SAndreas Gruenbacher return i; 1302841ce241SAndreas Gruenbacher if (!expect(b->bm_pages)) 1303841ce241SAndreas Gruenbacher return i; 1304b411b363SPhilipp Reisner 1305b411b363SPhilipp Reisner spin_lock_irq(&b->bm_lock); 130620ceb2b2SLars Ellenberg if (BM_DONT_TEST & b->bm_flags) 1307b30ab791SAndreas Gruenbacher bm_print_lock_info(device); 1308b411b363SPhilipp Reisner 1309b30ab791SAndreas Gruenbacher i = __bm_find_next(device, bm_fo, find_zero_bit); 1310b411b363SPhilipp Reisner 1311b411b363SPhilipp Reisner spin_unlock_irq(&b->bm_lock); 1312b411b363SPhilipp Reisner return i; 1313b411b363SPhilipp Reisner } 1314b411b363SPhilipp Reisner 1315b30ab791SAndreas Gruenbacher unsigned long drbd_bm_find_next(struct drbd_device *device, unsigned long bm_fo) 1316b411b363SPhilipp Reisner { 1317b30ab791SAndreas Gruenbacher return bm_find_next(device, bm_fo, 0); 1318b411b363SPhilipp Reisner } 1319b411b363SPhilipp Reisner 1320b411b363SPhilipp Reisner #if 0 1321b411b363SPhilipp Reisner /* not yet needed for anything. */ 1322b30ab791SAndreas Gruenbacher unsigned long drbd_bm_find_next_zero(struct drbd_device *device, unsigned long bm_fo) 1323b411b363SPhilipp Reisner { 1324b30ab791SAndreas Gruenbacher return bm_find_next(device, bm_fo, 1); 1325b411b363SPhilipp Reisner } 1326b411b363SPhilipp Reisner #endif 1327b411b363SPhilipp Reisner 1328b411b363SPhilipp Reisner /* does not spin_lock_irqsave. 1329b411b363SPhilipp Reisner * you must take drbd_bm_lock() first */ 1330b30ab791SAndreas Gruenbacher unsigned long _drbd_bm_find_next(struct drbd_device *device, unsigned long bm_fo) 1331b411b363SPhilipp Reisner { 1332b30ab791SAndreas Gruenbacher /* WARN_ON(!(BM_DONT_SET & device->b->bm_flags)); */ 1333b30ab791SAndreas Gruenbacher return __bm_find_next(device, bm_fo, 0); 1334b411b363SPhilipp Reisner } 1335b411b363SPhilipp Reisner 1336b30ab791SAndreas Gruenbacher unsigned long _drbd_bm_find_next_zero(struct drbd_device *device, unsigned long bm_fo) 1337b411b363SPhilipp Reisner { 1338b30ab791SAndreas Gruenbacher /* WARN_ON(!(BM_DONT_SET & device->b->bm_flags)); */ 1339b30ab791SAndreas Gruenbacher return __bm_find_next(device, bm_fo, 1); 1340b411b363SPhilipp Reisner } 1341b411b363SPhilipp Reisner 1342b411b363SPhilipp Reisner /* returns number of bits actually changed. 1343b411b363SPhilipp Reisner * for val != 0, we change 0 -> 1, return code positive 1344b411b363SPhilipp Reisner * for val == 0, we change 1 -> 0, return code negative 1345b411b363SPhilipp Reisner * wants bitnr, not sector. 1346b411b363SPhilipp Reisner * expected to be called for only a few bits (e - s about BITS_PER_LONG). 1347b411b363SPhilipp Reisner * Must hold bitmap lock already. */ 1348b30ab791SAndreas Gruenbacher static int __bm_change_bits_to(struct drbd_device *device, const unsigned long s, 1349829c6087SLars Ellenberg unsigned long e, int val) 1350b411b363SPhilipp Reisner { 1351b30ab791SAndreas Gruenbacher struct drbd_bitmap *b = device->bitmap; 1352b411b363SPhilipp Reisner unsigned long *p_addr = NULL; 1353b411b363SPhilipp Reisner unsigned long bitnr; 135419f843aaSLars Ellenberg unsigned int last_page_nr = -1U; 1355b411b363SPhilipp Reisner int c = 0; 135619f843aaSLars Ellenberg int changed_total = 0; 1357b411b363SPhilipp Reisner 1358b411b363SPhilipp Reisner if (e >= b->bm_bits) { 1359d0180171SAndreas Gruenbacher drbd_err(device, "ASSERT FAILED: bit_s=%lu bit_e=%lu bm_bits=%lu\n", 1360b411b363SPhilipp Reisner s, e, b->bm_bits); 1361b411b363SPhilipp Reisner e = b->bm_bits ? b->bm_bits -1 : 0; 1362b411b363SPhilipp Reisner } 1363b411b363SPhilipp Reisner for (bitnr = s; bitnr <= e; bitnr++) { 136419f843aaSLars Ellenberg unsigned int page_nr = bm_bit_to_page_idx(b, bitnr); 1365b411b363SPhilipp Reisner if (page_nr != last_page_nr) { 1366b411b363SPhilipp Reisner if (p_addr) 1367cfd8005cSCong Wang __bm_unmap(p_addr); 136819f843aaSLars Ellenberg if (c < 0) 136919f843aaSLars Ellenberg bm_set_page_lazy_writeout(b->bm_pages[last_page_nr]); 137019f843aaSLars Ellenberg else if (c > 0) 137119f843aaSLars Ellenberg bm_set_page_need_writeout(b->bm_pages[last_page_nr]); 137219f843aaSLars Ellenberg changed_total += c; 137319f843aaSLars Ellenberg c = 0; 1374cfd8005cSCong Wang p_addr = __bm_map_pidx(b, page_nr); 1375b411b363SPhilipp Reisner last_page_nr = page_nr; 1376b411b363SPhilipp Reisner } 1377b411b363SPhilipp Reisner if (val) 13787e599e6eSLinus Torvalds c += (0 == __test_and_set_bit_le(bitnr & BITS_PER_PAGE_MASK, p_addr)); 1379b411b363SPhilipp Reisner else 13807e599e6eSLinus Torvalds c -= (0 != __test_and_clear_bit_le(bitnr & BITS_PER_PAGE_MASK, p_addr)); 1381b411b363SPhilipp Reisner } 1382b411b363SPhilipp Reisner if (p_addr) 1383cfd8005cSCong Wang __bm_unmap(p_addr); 138419f843aaSLars Ellenberg if (c < 0) 138519f843aaSLars Ellenberg bm_set_page_lazy_writeout(b->bm_pages[last_page_nr]); 138619f843aaSLars Ellenberg else if (c > 0) 138719f843aaSLars Ellenberg bm_set_page_need_writeout(b->bm_pages[last_page_nr]); 138819f843aaSLars Ellenberg changed_total += c; 138919f843aaSLars Ellenberg b->bm_set += changed_total; 139019f843aaSLars Ellenberg return changed_total; 1391b411b363SPhilipp Reisner } 1392b411b363SPhilipp Reisner 1393b411b363SPhilipp Reisner /* returns number of bits actually changed. 1394b411b363SPhilipp Reisner * for val != 0, we change 0 -> 1, return code positive 1395b411b363SPhilipp Reisner * for val == 0, we change 1 -> 0, return code negative 1396b411b363SPhilipp Reisner * wants bitnr, not sector */ 1397b30ab791SAndreas Gruenbacher static int bm_change_bits_to(struct drbd_device *device, const unsigned long s, 1398b411b363SPhilipp Reisner const unsigned long e, int val) 1399b411b363SPhilipp Reisner { 1400b411b363SPhilipp Reisner unsigned long flags; 1401b30ab791SAndreas Gruenbacher struct drbd_bitmap *b = device->bitmap; 1402b411b363SPhilipp Reisner int c = 0; 1403b411b363SPhilipp Reisner 1404841ce241SAndreas Gruenbacher if (!expect(b)) 1405841ce241SAndreas Gruenbacher return 1; 1406841ce241SAndreas Gruenbacher if (!expect(b->bm_pages)) 1407841ce241SAndreas Gruenbacher return 0; 1408b411b363SPhilipp Reisner 1409b411b363SPhilipp Reisner spin_lock_irqsave(&b->bm_lock, flags); 141020ceb2b2SLars Ellenberg if ((val ? BM_DONT_SET : BM_DONT_CLEAR) & b->bm_flags) 1411b30ab791SAndreas Gruenbacher bm_print_lock_info(device); 1412b411b363SPhilipp Reisner 1413b30ab791SAndreas Gruenbacher c = __bm_change_bits_to(device, s, e, val); 1414b411b363SPhilipp Reisner 1415b411b363SPhilipp Reisner spin_unlock_irqrestore(&b->bm_lock, flags); 1416b411b363SPhilipp Reisner return c; 1417b411b363SPhilipp Reisner } 1418b411b363SPhilipp Reisner 1419b411b363SPhilipp Reisner /* returns number of bits changed 0 -> 1 */ 1420b30ab791SAndreas Gruenbacher int drbd_bm_set_bits(struct drbd_device *device, const unsigned long s, const unsigned long e) 1421b411b363SPhilipp Reisner { 1422b30ab791SAndreas Gruenbacher return bm_change_bits_to(device, s, e, 1); 1423b411b363SPhilipp Reisner } 1424b411b363SPhilipp Reisner 1425b411b363SPhilipp Reisner /* returns number of bits changed 1 -> 0 */ 1426b30ab791SAndreas Gruenbacher int drbd_bm_clear_bits(struct drbd_device *device, const unsigned long s, const unsigned long e) 1427b411b363SPhilipp Reisner { 1428b30ab791SAndreas Gruenbacher return -bm_change_bits_to(device, s, e, 0); 1429b411b363SPhilipp Reisner } 1430b411b363SPhilipp Reisner 1431b411b363SPhilipp Reisner /* sets all bits in full words, 1432b411b363SPhilipp Reisner * from first_word up to, but not including, last_word */ 1433b411b363SPhilipp Reisner static inline void bm_set_full_words_within_one_page(struct drbd_bitmap *b, 1434b411b363SPhilipp Reisner int page_nr, int first_word, int last_word) 1435b411b363SPhilipp Reisner { 1436b411b363SPhilipp Reisner int i; 1437b411b363SPhilipp Reisner int bits; 143822d81140SLars Ellenberg int changed = 0; 1439cfd8005cSCong Wang unsigned long *paddr = kmap_atomic(b->bm_pages[page_nr]); 14405fb3bc4dSLars Ellenberg 14415fb3bc4dSLars Ellenberg /* I think it is more cache line friendly to hweight_long then set to ~0UL, 14425fb3bc4dSLars Ellenberg * than to first bitmap_weight() all words, then bitmap_fill() all words */ 1443b411b363SPhilipp Reisner for (i = first_word; i < last_word; i++) { 1444b411b363SPhilipp Reisner bits = hweight_long(paddr[i]); 1445b411b363SPhilipp Reisner paddr[i] = ~0UL; 144622d81140SLars Ellenberg changed += BITS_PER_LONG - bits; 1447b411b363SPhilipp Reisner } 1448cfd8005cSCong Wang kunmap_atomic(paddr); 144922d81140SLars Ellenberg if (changed) { 145022d81140SLars Ellenberg /* We only need lazy writeout, the information is still in the 145122d81140SLars Ellenberg * remote bitmap as well, and is reconstructed during the next 145222d81140SLars Ellenberg * bitmap exchange, if lost locally due to a crash. */ 145322d81140SLars Ellenberg bm_set_page_lazy_writeout(b->bm_pages[page_nr]); 145422d81140SLars Ellenberg b->bm_set += changed; 145522d81140SLars Ellenberg } 1456b411b363SPhilipp Reisner } 1457b411b363SPhilipp Reisner 1458829c6087SLars Ellenberg /* Same thing as drbd_bm_set_bits, 1459829c6087SLars Ellenberg * but more efficient for a large bit range. 1460b411b363SPhilipp Reisner * You must first drbd_bm_lock(). 1461b411b363SPhilipp Reisner * Can be called to set the whole bitmap in one go. 1462b411b363SPhilipp Reisner * Sets bits from s to e _inclusive_. */ 1463b30ab791SAndreas Gruenbacher void _drbd_bm_set_bits(struct drbd_device *device, const unsigned long s, const unsigned long e) 1464b411b363SPhilipp Reisner { 1465b411b363SPhilipp Reisner /* First set_bit from the first bit (s) 1466b411b363SPhilipp Reisner * up to the next long boundary (sl), 1467b411b363SPhilipp Reisner * then assign full words up to the last long boundary (el), 1468b411b363SPhilipp Reisner * then set_bit up to and including the last bit (e). 1469b411b363SPhilipp Reisner * 1470b411b363SPhilipp Reisner * Do not use memset, because we must account for changes, 1471b411b363SPhilipp Reisner * so we need to loop over the words with hweight() anyways. 1472b411b363SPhilipp Reisner */ 1473b30ab791SAndreas Gruenbacher struct drbd_bitmap *b = device->bitmap; 1474b411b363SPhilipp Reisner unsigned long sl = ALIGN(s,BITS_PER_LONG); 1475b411b363SPhilipp Reisner unsigned long el = (e+1) & ~((unsigned long)BITS_PER_LONG-1); 1476b411b363SPhilipp Reisner int first_page; 1477b411b363SPhilipp Reisner int last_page; 1478b411b363SPhilipp Reisner int page_nr; 1479b411b363SPhilipp Reisner int first_word; 1480b411b363SPhilipp Reisner int last_word; 1481b411b363SPhilipp Reisner 1482b411b363SPhilipp Reisner if (e - s <= 3*BITS_PER_LONG) { 1483b411b363SPhilipp Reisner /* don't bother; el and sl may even be wrong. */ 1484829c6087SLars Ellenberg spin_lock_irq(&b->bm_lock); 1485b30ab791SAndreas Gruenbacher __bm_change_bits_to(device, s, e, 1); 1486829c6087SLars Ellenberg spin_unlock_irq(&b->bm_lock); 1487b411b363SPhilipp Reisner return; 1488b411b363SPhilipp Reisner } 1489b411b363SPhilipp Reisner 1490b411b363SPhilipp Reisner /* difference is large enough that we can trust sl and el */ 1491b411b363SPhilipp Reisner 1492829c6087SLars Ellenberg spin_lock_irq(&b->bm_lock); 1493829c6087SLars Ellenberg 1494b411b363SPhilipp Reisner /* bits filling the current long */ 1495b411b363SPhilipp Reisner if (sl) 1496b30ab791SAndreas Gruenbacher __bm_change_bits_to(device, s, sl-1, 1); 1497b411b363SPhilipp Reisner 1498b411b363SPhilipp Reisner first_page = sl >> (3 + PAGE_SHIFT); 1499b411b363SPhilipp Reisner last_page = el >> (3 + PAGE_SHIFT); 1500b411b363SPhilipp Reisner 1501b411b363SPhilipp Reisner /* MLPP: modulo longs per page */ 1502b411b363SPhilipp Reisner /* LWPP: long words per page */ 1503b411b363SPhilipp Reisner first_word = MLPP(sl >> LN2_BPL); 1504b411b363SPhilipp Reisner last_word = LWPP; 1505b411b363SPhilipp Reisner 1506b411b363SPhilipp Reisner /* first and full pages, unless first page == last page */ 1507b411b363SPhilipp Reisner for (page_nr = first_page; page_nr < last_page; page_nr++) { 1508b30ab791SAndreas Gruenbacher bm_set_full_words_within_one_page(device->bitmap, page_nr, first_word, last_word); 15098ccee20eSLars Ellenberg spin_unlock_irq(&b->bm_lock); 15108ccee20eSLars Ellenberg cond_resched(); 1511b411b363SPhilipp Reisner first_word = 0; 15128ccee20eSLars Ellenberg spin_lock_irq(&b->bm_lock); 1513b411b363SPhilipp Reisner } 1514b411b363SPhilipp Reisner /* last page (respectively only page, for first page == last page) */ 1515b411b363SPhilipp Reisner last_word = MLPP(el >> LN2_BPL); 15164eccc579SLars Ellenberg 15174eccc579SLars Ellenberg /* consider bitmap->bm_bits = 32768, bitmap->bm_number_of_pages = 1. (or multiples). 15184eccc579SLars Ellenberg * ==> e = 32767, el = 32768, last_page = 2, 15194eccc579SLars Ellenberg * and now last_word = 0. 15204eccc579SLars Ellenberg * We do not want to touch last_page in this case, 15214eccc579SLars Ellenberg * as we did not allocate it, it is not present in bitmap->bm_pages. 15224eccc579SLars Ellenberg */ 15234eccc579SLars Ellenberg if (last_word) 1524b30ab791SAndreas Gruenbacher bm_set_full_words_within_one_page(device->bitmap, last_page, first_word, last_word); 1525b411b363SPhilipp Reisner 1526b411b363SPhilipp Reisner /* possibly trailing bits. 1527b411b363SPhilipp Reisner * example: (e & 63) == 63, el will be e+1. 1528b411b363SPhilipp Reisner * if that even was the very last bit, 1529b411b363SPhilipp Reisner * it would trigger an assert in __bm_change_bits_to() 1530b411b363SPhilipp Reisner */ 1531b411b363SPhilipp Reisner if (el <= e) 1532b30ab791SAndreas Gruenbacher __bm_change_bits_to(device, el, e, 1); 1533829c6087SLars Ellenberg spin_unlock_irq(&b->bm_lock); 1534b411b363SPhilipp Reisner } 1535b411b363SPhilipp Reisner 1536b411b363SPhilipp Reisner /* returns bit state 1537b411b363SPhilipp Reisner * wants bitnr, NOT sector. 1538b411b363SPhilipp Reisner * inherently racy... area needs to be locked by means of {al,rs}_lru 1539b411b363SPhilipp Reisner * 1 ... bit set 1540b411b363SPhilipp Reisner * 0 ... bit not set 1541b411b363SPhilipp Reisner * -1 ... first out of bounds access, stop testing for bits! 1542b411b363SPhilipp Reisner */ 1543b30ab791SAndreas Gruenbacher int drbd_bm_test_bit(struct drbd_device *device, const unsigned long bitnr) 1544b411b363SPhilipp Reisner { 1545b411b363SPhilipp Reisner unsigned long flags; 1546b30ab791SAndreas Gruenbacher struct drbd_bitmap *b = device->bitmap; 1547b411b363SPhilipp Reisner unsigned long *p_addr; 1548b411b363SPhilipp Reisner int i; 1549b411b363SPhilipp Reisner 1550841ce241SAndreas Gruenbacher if (!expect(b)) 1551841ce241SAndreas Gruenbacher return 0; 1552841ce241SAndreas Gruenbacher if (!expect(b->bm_pages)) 1553841ce241SAndreas Gruenbacher return 0; 1554b411b363SPhilipp Reisner 1555b411b363SPhilipp Reisner spin_lock_irqsave(&b->bm_lock, flags); 155620ceb2b2SLars Ellenberg if (BM_DONT_TEST & b->bm_flags) 1557b30ab791SAndreas Gruenbacher bm_print_lock_info(device); 1558b411b363SPhilipp Reisner if (bitnr < b->bm_bits) { 155919f843aaSLars Ellenberg p_addr = bm_map_pidx(b, bm_bit_to_page_idx(b, bitnr)); 15607e599e6eSLinus Torvalds i = test_bit_le(bitnr & BITS_PER_PAGE_MASK, p_addr) ? 1 : 0; 1561b411b363SPhilipp Reisner bm_unmap(p_addr); 1562b411b363SPhilipp Reisner } else if (bitnr == b->bm_bits) { 1563b411b363SPhilipp Reisner i = -1; 1564b411b363SPhilipp Reisner } else { /* (bitnr > b->bm_bits) */ 1565d0180171SAndreas Gruenbacher drbd_err(device, "bitnr=%lu > bm_bits=%lu\n", bitnr, b->bm_bits); 1566b411b363SPhilipp Reisner i = 0; 1567b411b363SPhilipp Reisner } 1568b411b363SPhilipp Reisner 1569b411b363SPhilipp Reisner spin_unlock_irqrestore(&b->bm_lock, flags); 1570b411b363SPhilipp Reisner return i; 1571b411b363SPhilipp Reisner } 1572b411b363SPhilipp Reisner 1573b411b363SPhilipp Reisner /* returns number of bits set in the range [s, e] */ 1574b30ab791SAndreas Gruenbacher int drbd_bm_count_bits(struct drbd_device *device, const unsigned long s, const unsigned long e) 1575b411b363SPhilipp Reisner { 1576b411b363SPhilipp Reisner unsigned long flags; 1577b30ab791SAndreas Gruenbacher struct drbd_bitmap *b = device->bitmap; 157819f843aaSLars Ellenberg unsigned long *p_addr = NULL; 1579b411b363SPhilipp Reisner unsigned long bitnr; 158019f843aaSLars Ellenberg unsigned int page_nr = -1U; 1581b411b363SPhilipp Reisner int c = 0; 1582b411b363SPhilipp Reisner 1583b411b363SPhilipp Reisner /* If this is called without a bitmap, that is a bug. But just to be 1584b411b363SPhilipp Reisner * robust in case we screwed up elsewhere, in that case pretend there 1585b411b363SPhilipp Reisner * was one dirty bit in the requested area, so we won't try to do a 1586b411b363SPhilipp Reisner * local read there (no bitmap probably implies no disk) */ 1587841ce241SAndreas Gruenbacher if (!expect(b)) 1588841ce241SAndreas Gruenbacher return 1; 1589841ce241SAndreas Gruenbacher if (!expect(b->bm_pages)) 1590841ce241SAndreas Gruenbacher return 1; 1591b411b363SPhilipp Reisner 1592b411b363SPhilipp Reisner spin_lock_irqsave(&b->bm_lock, flags); 159320ceb2b2SLars Ellenberg if (BM_DONT_TEST & b->bm_flags) 1594b30ab791SAndreas Gruenbacher bm_print_lock_info(device); 1595b411b363SPhilipp Reisner for (bitnr = s; bitnr <= e; bitnr++) { 159619f843aaSLars Ellenberg unsigned int idx = bm_bit_to_page_idx(b, bitnr); 159719f843aaSLars Ellenberg if (page_nr != idx) { 159819f843aaSLars Ellenberg page_nr = idx; 1599b411b363SPhilipp Reisner if (p_addr) 1600b411b363SPhilipp Reisner bm_unmap(p_addr); 160119f843aaSLars Ellenberg p_addr = bm_map_pidx(b, idx); 1602b411b363SPhilipp Reisner } 1603841ce241SAndreas Gruenbacher if (expect(bitnr < b->bm_bits)) 16047e599e6eSLinus Torvalds c += (0 != test_bit_le(bitnr - (page_nr << (PAGE_SHIFT+3)), p_addr)); 1605841ce241SAndreas Gruenbacher else 1606d0180171SAndreas Gruenbacher drbd_err(device, "bitnr=%lu bm_bits=%lu\n", bitnr, b->bm_bits); 1607b411b363SPhilipp Reisner } 1608b411b363SPhilipp Reisner if (p_addr) 1609b411b363SPhilipp Reisner bm_unmap(p_addr); 1610b411b363SPhilipp Reisner spin_unlock_irqrestore(&b->bm_lock, flags); 1611b411b363SPhilipp Reisner return c; 1612b411b363SPhilipp Reisner } 1613b411b363SPhilipp Reisner 1614b411b363SPhilipp Reisner 1615b411b363SPhilipp Reisner /* inherently racy... 1616b411b363SPhilipp Reisner * return value may be already out-of-date when this function returns. 1617b411b363SPhilipp Reisner * but the general usage is that this is only use during a cstate when bits are 1618b411b363SPhilipp Reisner * only cleared, not set, and typically only care for the case when the return 1619b411b363SPhilipp Reisner * value is zero, or we already "locked" this "bitmap extent" by other means. 1620b411b363SPhilipp Reisner * 1621b411b363SPhilipp Reisner * enr is bm-extent number, since we chose to name one sector (512 bytes) 1622b411b363SPhilipp Reisner * worth of the bitmap a "bitmap extent". 1623b411b363SPhilipp Reisner * 1624b411b363SPhilipp Reisner * TODO 1625b411b363SPhilipp Reisner * I think since we use it like a reference count, we should use the real 1626b411b363SPhilipp Reisner * reference count of some bitmap extent element from some lru instead... 1627b411b363SPhilipp Reisner * 1628b411b363SPhilipp Reisner */ 1629b30ab791SAndreas Gruenbacher int drbd_bm_e_weight(struct drbd_device *device, unsigned long enr) 1630b411b363SPhilipp Reisner { 1631b30ab791SAndreas Gruenbacher struct drbd_bitmap *b = device->bitmap; 1632b411b363SPhilipp Reisner int count, s, e; 1633b411b363SPhilipp Reisner unsigned long flags; 1634b411b363SPhilipp Reisner unsigned long *p_addr, *bm; 1635b411b363SPhilipp Reisner 1636841ce241SAndreas Gruenbacher if (!expect(b)) 1637841ce241SAndreas Gruenbacher return 0; 1638841ce241SAndreas Gruenbacher if (!expect(b->bm_pages)) 1639841ce241SAndreas Gruenbacher return 0; 1640b411b363SPhilipp Reisner 1641b411b363SPhilipp Reisner spin_lock_irqsave(&b->bm_lock, flags); 164220ceb2b2SLars Ellenberg if (BM_DONT_TEST & b->bm_flags) 1643b30ab791SAndreas Gruenbacher bm_print_lock_info(device); 1644b411b363SPhilipp Reisner 1645b411b363SPhilipp Reisner s = S2W(enr); 1646b411b363SPhilipp Reisner e = min((size_t)S2W(enr+1), b->bm_words); 1647b411b363SPhilipp Reisner count = 0; 1648b411b363SPhilipp Reisner if (s < b->bm_words) { 1649b411b363SPhilipp Reisner int n = e-s; 165019f843aaSLars Ellenberg p_addr = bm_map_pidx(b, bm_word_to_page_idx(b, s)); 1651b411b363SPhilipp Reisner bm = p_addr + MLPP(s); 16525fb3bc4dSLars Ellenberg count += bitmap_weight(bm, n * BITS_PER_LONG); 1653b411b363SPhilipp Reisner bm_unmap(p_addr); 1654b411b363SPhilipp Reisner } else { 1655d0180171SAndreas Gruenbacher drbd_err(device, "start offset (%d) too large in drbd_bm_e_weight\n", s); 1656b411b363SPhilipp Reisner } 1657b411b363SPhilipp Reisner spin_unlock_irqrestore(&b->bm_lock, flags); 1658b411b363SPhilipp Reisner return count; 1659b411b363SPhilipp Reisner } 1660