188459642SOmar Sandoval /* 288459642SOmar Sandoval * Copyright (C) 2016 Facebook 388459642SOmar Sandoval * Copyright (C) 2013-2014 Jens Axboe 488459642SOmar Sandoval * 588459642SOmar Sandoval * This program is free software; you can redistribute it and/or 688459642SOmar Sandoval * modify it under the terms of the GNU General Public 788459642SOmar Sandoval * License v2 as published by the Free Software Foundation. 888459642SOmar Sandoval * 988459642SOmar Sandoval * This program is distributed in the hope that it will be useful, 1088459642SOmar Sandoval * but WITHOUT ANY WARRANTY; without even the implied warranty of 1188459642SOmar Sandoval * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 1288459642SOmar Sandoval * General Public License for more details. 1388459642SOmar Sandoval * 1488459642SOmar Sandoval * You should have received a copy of the GNU General Public License 1588459642SOmar Sandoval * along with this program. If not, see <https://www.gnu.org/licenses/>. 1688459642SOmar Sandoval */ 1788459642SOmar Sandoval 18af8601adSIngo Molnar #include <linux/sched.h> 1998d95416SOmar Sandoval #include <linux/random.h> 2088459642SOmar Sandoval #include <linux/sbitmap.h> 2124af1ccfSOmar Sandoval #include <linux/seq_file.h> 2288459642SOmar Sandoval 2388459642SOmar Sandoval int sbitmap_init_node(struct sbitmap *sb, unsigned int depth, int shift, 2488459642SOmar Sandoval gfp_t flags, int node) 2588459642SOmar Sandoval { 2688459642SOmar Sandoval unsigned int bits_per_word; 2788459642SOmar Sandoval unsigned int i; 2888459642SOmar Sandoval 2988459642SOmar Sandoval if (shift < 0) { 3088459642SOmar Sandoval shift = ilog2(BITS_PER_LONG); 3188459642SOmar Sandoval /* 3288459642SOmar Sandoval * If the bitmap is small, shrink the number of bits per word so 3388459642SOmar Sandoval * we spread over a few cachelines, at least. If less than 4 3488459642SOmar Sandoval * bits, just forget about it, it's not going to work optimally 3588459642SOmar Sandoval * anyway. 3688459642SOmar Sandoval */ 3788459642SOmar Sandoval if (depth >= 4) { 3888459642SOmar Sandoval while ((4U << shift) > depth) 3988459642SOmar Sandoval shift--; 4088459642SOmar Sandoval } 4188459642SOmar Sandoval } 4288459642SOmar Sandoval bits_per_word = 1U << shift; 4388459642SOmar Sandoval if (bits_per_word > BITS_PER_LONG) 4488459642SOmar Sandoval return -EINVAL; 4588459642SOmar Sandoval 4688459642SOmar Sandoval sb->shift = shift; 4788459642SOmar Sandoval sb->depth = depth; 4888459642SOmar Sandoval sb->map_nr = DIV_ROUND_UP(sb->depth, bits_per_word); 4988459642SOmar Sandoval 5088459642SOmar Sandoval if (depth == 0) { 5188459642SOmar Sandoval sb->map = NULL; 5288459642SOmar Sandoval return 0; 5388459642SOmar Sandoval } 5488459642SOmar Sandoval 55590b5b7dSKees Cook sb->map = kcalloc_node(sb->map_nr, sizeof(*sb->map), flags, node); 5688459642SOmar Sandoval if (!sb->map) 5788459642SOmar Sandoval return -ENOMEM; 5888459642SOmar Sandoval 5988459642SOmar Sandoval for (i = 0; i < sb->map_nr; i++) { 6088459642SOmar Sandoval sb->map[i].depth = min(depth, bits_per_word); 6188459642SOmar Sandoval depth -= sb->map[i].depth; 62ea86ea2cSJens Axboe spin_lock_init(&sb->map[i].swap_lock); 6388459642SOmar Sandoval } 6488459642SOmar Sandoval return 0; 6588459642SOmar Sandoval } 6688459642SOmar Sandoval EXPORT_SYMBOL_GPL(sbitmap_init_node); 6788459642SOmar Sandoval 6888459642SOmar Sandoval void sbitmap_resize(struct sbitmap *sb, unsigned int depth) 6988459642SOmar Sandoval { 7088459642SOmar Sandoval unsigned int bits_per_word = 1U << sb->shift; 7188459642SOmar Sandoval unsigned int i; 7288459642SOmar Sandoval 7388459642SOmar Sandoval sb->depth = depth; 7488459642SOmar Sandoval sb->map_nr = DIV_ROUND_UP(sb->depth, bits_per_word); 7588459642SOmar Sandoval 7688459642SOmar Sandoval for (i = 0; i < sb->map_nr; i++) { 7788459642SOmar Sandoval sb->map[i].depth = min(depth, bits_per_word); 7888459642SOmar Sandoval depth -= sb->map[i].depth; 7988459642SOmar Sandoval } 8088459642SOmar Sandoval } 8188459642SOmar Sandoval EXPORT_SYMBOL_GPL(sbitmap_resize); 8288459642SOmar Sandoval 83c05e6673SOmar Sandoval static int __sbitmap_get_word(unsigned long *word, unsigned long depth, 84c05e6673SOmar Sandoval unsigned int hint, bool wrap) 8588459642SOmar Sandoval { 8688459642SOmar Sandoval unsigned int orig_hint = hint; 8788459642SOmar Sandoval int nr; 8888459642SOmar Sandoval 8988459642SOmar Sandoval while (1) { 90c05e6673SOmar Sandoval nr = find_next_zero_bit(word, depth, hint); 91c05e6673SOmar Sandoval if (unlikely(nr >= depth)) { 9288459642SOmar Sandoval /* 9388459642SOmar Sandoval * We started with an offset, and we didn't reset the 9488459642SOmar Sandoval * offset to 0 in a failure case, so start from 0 to 9588459642SOmar Sandoval * exhaust the map. 9688459642SOmar Sandoval */ 9788459642SOmar Sandoval if (orig_hint && hint && wrap) { 9888459642SOmar Sandoval hint = orig_hint = 0; 9988459642SOmar Sandoval continue; 10088459642SOmar Sandoval } 10188459642SOmar Sandoval return -1; 10288459642SOmar Sandoval } 10388459642SOmar Sandoval 1044ace53f1SOmar Sandoval if (!test_and_set_bit_lock(nr, word)) 10588459642SOmar Sandoval break; 10688459642SOmar Sandoval 10788459642SOmar Sandoval hint = nr + 1; 108c05e6673SOmar Sandoval if (hint >= depth - 1) 10988459642SOmar Sandoval hint = 0; 11088459642SOmar Sandoval } 11188459642SOmar Sandoval 11288459642SOmar Sandoval return nr; 11388459642SOmar Sandoval } 11488459642SOmar Sandoval 115ea86ea2cSJens Axboe /* 116ea86ea2cSJens Axboe * See if we have deferred clears that we can batch move 117ea86ea2cSJens Axboe */ 118ea86ea2cSJens Axboe static inline bool sbitmap_deferred_clear(struct sbitmap *sb, int index) 119ea86ea2cSJens Axboe { 120ea86ea2cSJens Axboe unsigned long mask, val; 121ea86ea2cSJens Axboe bool ret = false; 122ea86ea2cSJens Axboe 123ea86ea2cSJens Axboe spin_lock(&sb->map[index].swap_lock); 124ea86ea2cSJens Axboe 125ea86ea2cSJens Axboe if (!sb->map[index].cleared) 126ea86ea2cSJens Axboe goto out_unlock; 127ea86ea2cSJens Axboe 128ea86ea2cSJens Axboe /* 129ea86ea2cSJens Axboe * First get a stable cleared mask, setting the old mask to 0. 130ea86ea2cSJens Axboe */ 131ea86ea2cSJens Axboe do { 132ea86ea2cSJens Axboe mask = sb->map[index].cleared; 133ea86ea2cSJens Axboe } while (cmpxchg(&sb->map[index].cleared, mask, 0) != mask); 134ea86ea2cSJens Axboe 135ea86ea2cSJens Axboe /* 136ea86ea2cSJens Axboe * Now clear the masked bits in our free word 137ea86ea2cSJens Axboe */ 138ea86ea2cSJens Axboe do { 139ea86ea2cSJens Axboe val = sb->map[index].word; 140ea86ea2cSJens Axboe } while (cmpxchg(&sb->map[index].word, val, val & ~mask) != val); 141ea86ea2cSJens Axboe 142ea86ea2cSJens Axboe ret = true; 143ea86ea2cSJens Axboe out_unlock: 144ea86ea2cSJens Axboe spin_unlock(&sb->map[index].swap_lock); 145ea86ea2cSJens Axboe return ret; 146ea86ea2cSJens Axboe } 147ea86ea2cSJens Axboe 148ea86ea2cSJens Axboe static int sbitmap_find_bit_in_index(struct sbitmap *sb, int index, 149ea86ea2cSJens Axboe unsigned int alloc_hint, bool round_robin) 150ea86ea2cSJens Axboe { 151ea86ea2cSJens Axboe int nr; 152ea86ea2cSJens Axboe 153ea86ea2cSJens Axboe do { 154ea86ea2cSJens Axboe nr = __sbitmap_get_word(&sb->map[index].word, 155ea86ea2cSJens Axboe sb->map[index].depth, alloc_hint, 156ea86ea2cSJens Axboe !round_robin); 157ea86ea2cSJens Axboe if (nr != -1) 158ea86ea2cSJens Axboe break; 159ea86ea2cSJens Axboe if (!sbitmap_deferred_clear(sb, index)) 160ea86ea2cSJens Axboe break; 161ea86ea2cSJens Axboe } while (1); 162ea86ea2cSJens Axboe 163ea86ea2cSJens Axboe return nr; 164ea86ea2cSJens Axboe } 165ea86ea2cSJens Axboe 16688459642SOmar Sandoval int sbitmap_get(struct sbitmap *sb, unsigned int alloc_hint, bool round_robin) 16788459642SOmar Sandoval { 16888459642SOmar Sandoval unsigned int i, index; 16988459642SOmar Sandoval int nr = -1; 17088459642SOmar Sandoval 17188459642SOmar Sandoval index = SB_NR_TO_INDEX(sb, alloc_hint); 17288459642SOmar Sandoval 17327fae429SJens Axboe /* 17427fae429SJens Axboe * Unless we're doing round robin tag allocation, just use the 17527fae429SJens Axboe * alloc_hint to find the right word index. No point in looping 17627fae429SJens Axboe * twice in find_next_zero_bit() for that case. 17727fae429SJens Axboe */ 17827fae429SJens Axboe if (round_robin) 17927fae429SJens Axboe alloc_hint = SB_NR_TO_BIT(sb, alloc_hint); 18027fae429SJens Axboe else 18127fae429SJens Axboe alloc_hint = 0; 18227fae429SJens Axboe 18388459642SOmar Sandoval for (i = 0; i < sb->map_nr; i++) { 184ea86ea2cSJens Axboe nr = sbitmap_find_bit_in_index(sb, index, alloc_hint, 185ea86ea2cSJens Axboe round_robin); 18688459642SOmar Sandoval if (nr != -1) { 18788459642SOmar Sandoval nr += index << sb->shift; 18888459642SOmar Sandoval break; 18988459642SOmar Sandoval } 19088459642SOmar Sandoval 19188459642SOmar Sandoval /* Jump to next index. */ 19288459642SOmar Sandoval alloc_hint = 0; 19327fae429SJens Axboe if (++index >= sb->map_nr) 19427fae429SJens Axboe index = 0; 19588459642SOmar Sandoval } 19688459642SOmar Sandoval 19788459642SOmar Sandoval return nr; 19888459642SOmar Sandoval } 19988459642SOmar Sandoval EXPORT_SYMBOL_GPL(sbitmap_get); 20088459642SOmar Sandoval 201c05e6673SOmar Sandoval int sbitmap_get_shallow(struct sbitmap *sb, unsigned int alloc_hint, 202c05e6673SOmar Sandoval unsigned long shallow_depth) 203c05e6673SOmar Sandoval { 204c05e6673SOmar Sandoval unsigned int i, index; 205c05e6673SOmar Sandoval int nr = -1; 206c05e6673SOmar Sandoval 207c05e6673SOmar Sandoval index = SB_NR_TO_INDEX(sb, alloc_hint); 208c05e6673SOmar Sandoval 209c05e6673SOmar Sandoval for (i = 0; i < sb->map_nr; i++) { 210c05e6673SOmar Sandoval nr = __sbitmap_get_word(&sb->map[index].word, 211c05e6673SOmar Sandoval min(sb->map[index].depth, shallow_depth), 212c05e6673SOmar Sandoval SB_NR_TO_BIT(sb, alloc_hint), true); 213c05e6673SOmar Sandoval if (nr != -1) { 214c05e6673SOmar Sandoval nr += index << sb->shift; 215c05e6673SOmar Sandoval break; 216c05e6673SOmar Sandoval } 217c05e6673SOmar Sandoval 218c05e6673SOmar Sandoval /* Jump to next index. */ 219c05e6673SOmar Sandoval index++; 220c05e6673SOmar Sandoval alloc_hint = index << sb->shift; 221c05e6673SOmar Sandoval 222c05e6673SOmar Sandoval if (index >= sb->map_nr) { 223c05e6673SOmar Sandoval index = 0; 224c05e6673SOmar Sandoval alloc_hint = 0; 225c05e6673SOmar Sandoval } 226c05e6673SOmar Sandoval } 227c05e6673SOmar Sandoval 228c05e6673SOmar Sandoval return nr; 229c05e6673SOmar Sandoval } 230c05e6673SOmar Sandoval EXPORT_SYMBOL_GPL(sbitmap_get_shallow); 231c05e6673SOmar Sandoval 23288459642SOmar Sandoval bool sbitmap_any_bit_set(const struct sbitmap *sb) 23388459642SOmar Sandoval { 23488459642SOmar Sandoval unsigned int i; 23588459642SOmar Sandoval 23688459642SOmar Sandoval for (i = 0; i < sb->map_nr; i++) { 23788459642SOmar Sandoval if (sb->map[i].word) 23888459642SOmar Sandoval return true; 23988459642SOmar Sandoval } 24088459642SOmar Sandoval return false; 24188459642SOmar Sandoval } 24288459642SOmar Sandoval EXPORT_SYMBOL_GPL(sbitmap_any_bit_set); 24388459642SOmar Sandoval 24488459642SOmar Sandoval bool sbitmap_any_bit_clear(const struct sbitmap *sb) 24588459642SOmar Sandoval { 24688459642SOmar Sandoval unsigned int i; 24788459642SOmar Sandoval 24888459642SOmar Sandoval for (i = 0; i < sb->map_nr; i++) { 24988459642SOmar Sandoval const struct sbitmap_word *word = &sb->map[i]; 25088459642SOmar Sandoval unsigned long ret; 25188459642SOmar Sandoval 25288459642SOmar Sandoval ret = find_first_zero_bit(&word->word, word->depth); 25388459642SOmar Sandoval if (ret < word->depth) 25488459642SOmar Sandoval return true; 25588459642SOmar Sandoval } 25688459642SOmar Sandoval return false; 25788459642SOmar Sandoval } 25888459642SOmar Sandoval EXPORT_SYMBOL_GPL(sbitmap_any_bit_clear); 25988459642SOmar Sandoval 260ea86ea2cSJens Axboe static unsigned int __sbitmap_weight(const struct sbitmap *sb, bool set) 26188459642SOmar Sandoval { 26260658e0dSColin Ian King unsigned int i, weight = 0; 26388459642SOmar Sandoval 26488459642SOmar Sandoval for (i = 0; i < sb->map_nr; i++) { 26588459642SOmar Sandoval const struct sbitmap_word *word = &sb->map[i]; 26688459642SOmar Sandoval 267ea86ea2cSJens Axboe if (set) 26888459642SOmar Sandoval weight += bitmap_weight(&word->word, word->depth); 269ea86ea2cSJens Axboe else 270ea86ea2cSJens Axboe weight += bitmap_weight(&word->cleared, word->depth); 27188459642SOmar Sandoval } 27288459642SOmar Sandoval return weight; 27388459642SOmar Sandoval } 274ea86ea2cSJens Axboe 275ea86ea2cSJens Axboe static unsigned int sbitmap_weight(const struct sbitmap *sb) 276ea86ea2cSJens Axboe { 277ea86ea2cSJens Axboe return __sbitmap_weight(sb, true); 278ea86ea2cSJens Axboe } 279ea86ea2cSJens Axboe 280ea86ea2cSJens Axboe static unsigned int sbitmap_cleared(const struct sbitmap *sb) 281ea86ea2cSJens Axboe { 282ea86ea2cSJens Axboe return __sbitmap_weight(sb, false); 283ea86ea2cSJens Axboe } 28488459642SOmar Sandoval 28524af1ccfSOmar Sandoval void sbitmap_show(struct sbitmap *sb, struct seq_file *m) 28624af1ccfSOmar Sandoval { 28724af1ccfSOmar Sandoval seq_printf(m, "depth=%u\n", sb->depth); 288ea86ea2cSJens Axboe seq_printf(m, "busy=%u\n", sbitmap_weight(sb) - sbitmap_cleared(sb)); 289ea86ea2cSJens Axboe seq_printf(m, "cleared=%u\n", sbitmap_cleared(sb)); 29024af1ccfSOmar Sandoval seq_printf(m, "bits_per_word=%u\n", 1U << sb->shift); 29124af1ccfSOmar Sandoval seq_printf(m, "map_nr=%u\n", sb->map_nr); 29224af1ccfSOmar Sandoval } 29324af1ccfSOmar Sandoval EXPORT_SYMBOL_GPL(sbitmap_show); 29424af1ccfSOmar Sandoval 29524af1ccfSOmar Sandoval static inline void emit_byte(struct seq_file *m, unsigned int offset, u8 byte) 29624af1ccfSOmar Sandoval { 29724af1ccfSOmar Sandoval if ((offset & 0xf) == 0) { 29824af1ccfSOmar Sandoval if (offset != 0) 29924af1ccfSOmar Sandoval seq_putc(m, '\n'); 30024af1ccfSOmar Sandoval seq_printf(m, "%08x:", offset); 30124af1ccfSOmar Sandoval } 30224af1ccfSOmar Sandoval if ((offset & 0x1) == 0) 30324af1ccfSOmar Sandoval seq_putc(m, ' '); 30424af1ccfSOmar Sandoval seq_printf(m, "%02x", byte); 30524af1ccfSOmar Sandoval } 30624af1ccfSOmar Sandoval 30724af1ccfSOmar Sandoval void sbitmap_bitmap_show(struct sbitmap *sb, struct seq_file *m) 30824af1ccfSOmar Sandoval { 30924af1ccfSOmar Sandoval u8 byte = 0; 31024af1ccfSOmar Sandoval unsigned int byte_bits = 0; 31124af1ccfSOmar Sandoval unsigned int offset = 0; 31224af1ccfSOmar Sandoval int i; 31324af1ccfSOmar Sandoval 31424af1ccfSOmar Sandoval for (i = 0; i < sb->map_nr; i++) { 31524af1ccfSOmar Sandoval unsigned long word = READ_ONCE(sb->map[i].word); 31624af1ccfSOmar Sandoval unsigned int word_bits = READ_ONCE(sb->map[i].depth); 31724af1ccfSOmar Sandoval 31824af1ccfSOmar Sandoval while (word_bits > 0) { 31924af1ccfSOmar Sandoval unsigned int bits = min(8 - byte_bits, word_bits); 32024af1ccfSOmar Sandoval 32124af1ccfSOmar Sandoval byte |= (word & (BIT(bits) - 1)) << byte_bits; 32224af1ccfSOmar Sandoval byte_bits += bits; 32324af1ccfSOmar Sandoval if (byte_bits == 8) { 32424af1ccfSOmar Sandoval emit_byte(m, offset, byte); 32524af1ccfSOmar Sandoval byte = 0; 32624af1ccfSOmar Sandoval byte_bits = 0; 32724af1ccfSOmar Sandoval offset++; 32824af1ccfSOmar Sandoval } 32924af1ccfSOmar Sandoval word >>= bits; 33024af1ccfSOmar Sandoval word_bits -= bits; 33124af1ccfSOmar Sandoval } 33224af1ccfSOmar Sandoval } 33324af1ccfSOmar Sandoval if (byte_bits) { 33424af1ccfSOmar Sandoval emit_byte(m, offset, byte); 33524af1ccfSOmar Sandoval offset++; 33624af1ccfSOmar Sandoval } 33724af1ccfSOmar Sandoval if (offset) 33824af1ccfSOmar Sandoval seq_putc(m, '\n'); 33924af1ccfSOmar Sandoval } 34024af1ccfSOmar Sandoval EXPORT_SYMBOL_GPL(sbitmap_bitmap_show); 34124af1ccfSOmar Sandoval 342a3275539SOmar Sandoval static unsigned int sbq_calc_wake_batch(struct sbitmap_queue *sbq, 343a3275539SOmar Sandoval unsigned int depth) 34488459642SOmar Sandoval { 34588459642SOmar Sandoval unsigned int wake_batch; 346a3275539SOmar Sandoval unsigned int shallow_depth; 34788459642SOmar Sandoval 34888459642SOmar Sandoval /* 34988459642SOmar Sandoval * For each batch, we wake up one queue. We need to make sure that our 350a3275539SOmar Sandoval * batch size is small enough that the full depth of the bitmap, 351a3275539SOmar Sandoval * potentially limited by a shallow depth, is enough to wake up all of 352a3275539SOmar Sandoval * the queues. 353a3275539SOmar Sandoval * 354a3275539SOmar Sandoval * Each full word of the bitmap has bits_per_word bits, and there might 355a3275539SOmar Sandoval * be a partial word. There are depth / bits_per_word full words and 356a3275539SOmar Sandoval * depth % bits_per_word bits left over. In bitwise arithmetic: 357a3275539SOmar Sandoval * 358a3275539SOmar Sandoval * bits_per_word = 1 << shift 359a3275539SOmar Sandoval * depth / bits_per_word = depth >> shift 360a3275539SOmar Sandoval * depth % bits_per_word = depth & ((1 << shift) - 1) 361a3275539SOmar Sandoval * 362a3275539SOmar Sandoval * Each word can be limited to sbq->min_shallow_depth bits. 36388459642SOmar Sandoval */ 364a3275539SOmar Sandoval shallow_depth = min(1U << sbq->sb.shift, sbq->min_shallow_depth); 365a3275539SOmar Sandoval depth = ((depth >> sbq->sb.shift) * shallow_depth + 366a3275539SOmar Sandoval min(depth & ((1U << sbq->sb.shift) - 1), shallow_depth)); 367a3275539SOmar Sandoval wake_batch = clamp_t(unsigned int, depth / SBQ_WAIT_QUEUES, 1, 368a3275539SOmar Sandoval SBQ_WAKE_BATCH); 36988459642SOmar Sandoval 37088459642SOmar Sandoval return wake_batch; 37188459642SOmar Sandoval } 37288459642SOmar Sandoval 37388459642SOmar Sandoval int sbitmap_queue_init_node(struct sbitmap_queue *sbq, unsigned int depth, 374f4a644dbSOmar Sandoval int shift, bool round_robin, gfp_t flags, int node) 37588459642SOmar Sandoval { 37688459642SOmar Sandoval int ret; 37788459642SOmar Sandoval int i; 37888459642SOmar Sandoval 37988459642SOmar Sandoval ret = sbitmap_init_node(&sbq->sb, depth, shift, flags, node); 38088459642SOmar Sandoval if (ret) 38188459642SOmar Sandoval return ret; 38288459642SOmar Sandoval 38340aabb67SOmar Sandoval sbq->alloc_hint = alloc_percpu_gfp(unsigned int, flags); 38440aabb67SOmar Sandoval if (!sbq->alloc_hint) { 38540aabb67SOmar Sandoval sbitmap_free(&sbq->sb); 38640aabb67SOmar Sandoval return -ENOMEM; 38740aabb67SOmar Sandoval } 38840aabb67SOmar Sandoval 38998d95416SOmar Sandoval if (depth && !round_robin) { 39098d95416SOmar Sandoval for_each_possible_cpu(i) 39198d95416SOmar Sandoval *per_cpu_ptr(sbq->alloc_hint, i) = prandom_u32() % depth; 39298d95416SOmar Sandoval } 39398d95416SOmar Sandoval 394a3275539SOmar Sandoval sbq->min_shallow_depth = UINT_MAX; 395a3275539SOmar Sandoval sbq->wake_batch = sbq_calc_wake_batch(sbq, depth); 39688459642SOmar Sandoval atomic_set(&sbq->wake_index, 0); 397*5d2ee712SJens Axboe atomic_set(&sbq->ws_active, 0); 39888459642SOmar Sandoval 39948e28166SOmar Sandoval sbq->ws = kzalloc_node(SBQ_WAIT_QUEUES * sizeof(*sbq->ws), flags, node); 40088459642SOmar Sandoval if (!sbq->ws) { 40140aabb67SOmar Sandoval free_percpu(sbq->alloc_hint); 40288459642SOmar Sandoval sbitmap_free(&sbq->sb); 40388459642SOmar Sandoval return -ENOMEM; 40488459642SOmar Sandoval } 40588459642SOmar Sandoval 40688459642SOmar Sandoval for (i = 0; i < SBQ_WAIT_QUEUES; i++) { 40788459642SOmar Sandoval init_waitqueue_head(&sbq->ws[i].wait); 40888459642SOmar Sandoval atomic_set(&sbq->ws[i].wait_cnt, sbq->wake_batch); 40988459642SOmar Sandoval } 410f4a644dbSOmar Sandoval 411f4a644dbSOmar Sandoval sbq->round_robin = round_robin; 41288459642SOmar Sandoval return 0; 41388459642SOmar Sandoval } 41488459642SOmar Sandoval EXPORT_SYMBOL_GPL(sbitmap_queue_init_node); 41588459642SOmar Sandoval 416a3275539SOmar Sandoval static void sbitmap_queue_update_wake_batch(struct sbitmap_queue *sbq, 417a3275539SOmar Sandoval unsigned int depth) 41888459642SOmar Sandoval { 419a3275539SOmar Sandoval unsigned int wake_batch = sbq_calc_wake_batch(sbq, depth); 4206c0ca7aeSOmar Sandoval int i; 4216c0ca7aeSOmar Sandoval 4226c0ca7aeSOmar Sandoval if (sbq->wake_batch != wake_batch) { 4236c0ca7aeSOmar Sandoval WRITE_ONCE(sbq->wake_batch, wake_batch); 4246c0ca7aeSOmar Sandoval /* 425e6fc4649SMing Lei * Pairs with the memory barrier in sbitmap_queue_wake_up() 426e6fc4649SMing Lei * to ensure that the batch size is updated before the wait 427e6fc4649SMing Lei * counts. 4286c0ca7aeSOmar Sandoval */ 4296c0ca7aeSOmar Sandoval smp_mb__before_atomic(); 4306c0ca7aeSOmar Sandoval for (i = 0; i < SBQ_WAIT_QUEUES; i++) 4316c0ca7aeSOmar Sandoval atomic_set(&sbq->ws[i].wait_cnt, 1); 4326c0ca7aeSOmar Sandoval } 433a3275539SOmar Sandoval } 434a3275539SOmar Sandoval 435a3275539SOmar Sandoval void sbitmap_queue_resize(struct sbitmap_queue *sbq, unsigned int depth) 436a3275539SOmar Sandoval { 437a3275539SOmar Sandoval sbitmap_queue_update_wake_batch(sbq, depth); 43888459642SOmar Sandoval sbitmap_resize(&sbq->sb, depth); 43988459642SOmar Sandoval } 44088459642SOmar Sandoval EXPORT_SYMBOL_GPL(sbitmap_queue_resize); 44188459642SOmar Sandoval 442f4a644dbSOmar Sandoval int __sbitmap_queue_get(struct sbitmap_queue *sbq) 44340aabb67SOmar Sandoval { 44405fd095dSOmar Sandoval unsigned int hint, depth; 44540aabb67SOmar Sandoval int nr; 44640aabb67SOmar Sandoval 44740aabb67SOmar Sandoval hint = this_cpu_read(*sbq->alloc_hint); 44805fd095dSOmar Sandoval depth = READ_ONCE(sbq->sb.depth); 44905fd095dSOmar Sandoval if (unlikely(hint >= depth)) { 45005fd095dSOmar Sandoval hint = depth ? prandom_u32() % depth : 0; 45105fd095dSOmar Sandoval this_cpu_write(*sbq->alloc_hint, hint); 45205fd095dSOmar Sandoval } 453f4a644dbSOmar Sandoval nr = sbitmap_get(&sbq->sb, hint, sbq->round_robin); 45440aabb67SOmar Sandoval 45540aabb67SOmar Sandoval if (nr == -1) { 45640aabb67SOmar Sandoval /* If the map is full, a hint won't do us much good. */ 45740aabb67SOmar Sandoval this_cpu_write(*sbq->alloc_hint, 0); 458f4a644dbSOmar Sandoval } else if (nr == hint || unlikely(sbq->round_robin)) { 45940aabb67SOmar Sandoval /* Only update the hint if we used it. */ 46040aabb67SOmar Sandoval hint = nr + 1; 46105fd095dSOmar Sandoval if (hint >= depth - 1) 46240aabb67SOmar Sandoval hint = 0; 46340aabb67SOmar Sandoval this_cpu_write(*sbq->alloc_hint, hint); 46440aabb67SOmar Sandoval } 46540aabb67SOmar Sandoval 46640aabb67SOmar Sandoval return nr; 46740aabb67SOmar Sandoval } 46840aabb67SOmar Sandoval EXPORT_SYMBOL_GPL(__sbitmap_queue_get); 46940aabb67SOmar Sandoval 470c05e6673SOmar Sandoval int __sbitmap_queue_get_shallow(struct sbitmap_queue *sbq, 471c05e6673SOmar Sandoval unsigned int shallow_depth) 472c05e6673SOmar Sandoval { 473c05e6673SOmar Sandoval unsigned int hint, depth; 474c05e6673SOmar Sandoval int nr; 475c05e6673SOmar Sandoval 47661445b56SOmar Sandoval WARN_ON_ONCE(shallow_depth < sbq->min_shallow_depth); 47761445b56SOmar Sandoval 478c05e6673SOmar Sandoval hint = this_cpu_read(*sbq->alloc_hint); 479c05e6673SOmar Sandoval depth = READ_ONCE(sbq->sb.depth); 480c05e6673SOmar Sandoval if (unlikely(hint >= depth)) { 481c05e6673SOmar Sandoval hint = depth ? prandom_u32() % depth : 0; 482c05e6673SOmar Sandoval this_cpu_write(*sbq->alloc_hint, hint); 483c05e6673SOmar Sandoval } 484c05e6673SOmar Sandoval nr = sbitmap_get_shallow(&sbq->sb, hint, shallow_depth); 485c05e6673SOmar Sandoval 486c05e6673SOmar Sandoval if (nr == -1) { 487c05e6673SOmar Sandoval /* If the map is full, a hint won't do us much good. */ 488c05e6673SOmar Sandoval this_cpu_write(*sbq->alloc_hint, 0); 489c05e6673SOmar Sandoval } else if (nr == hint || unlikely(sbq->round_robin)) { 490c05e6673SOmar Sandoval /* Only update the hint if we used it. */ 491c05e6673SOmar Sandoval hint = nr + 1; 492c05e6673SOmar Sandoval if (hint >= depth - 1) 493c05e6673SOmar Sandoval hint = 0; 494c05e6673SOmar Sandoval this_cpu_write(*sbq->alloc_hint, hint); 495c05e6673SOmar Sandoval } 496c05e6673SOmar Sandoval 497c05e6673SOmar Sandoval return nr; 498c05e6673SOmar Sandoval } 499c05e6673SOmar Sandoval EXPORT_SYMBOL_GPL(__sbitmap_queue_get_shallow); 500c05e6673SOmar Sandoval 501a3275539SOmar Sandoval void sbitmap_queue_min_shallow_depth(struct sbitmap_queue *sbq, 502a3275539SOmar Sandoval unsigned int min_shallow_depth) 503a3275539SOmar Sandoval { 504a3275539SOmar Sandoval sbq->min_shallow_depth = min_shallow_depth; 505a3275539SOmar Sandoval sbitmap_queue_update_wake_batch(sbq, sbq->sb.depth); 506a3275539SOmar Sandoval } 507a3275539SOmar Sandoval EXPORT_SYMBOL_GPL(sbitmap_queue_min_shallow_depth); 508a3275539SOmar Sandoval 50988459642SOmar Sandoval static struct sbq_wait_state *sbq_wake_ptr(struct sbitmap_queue *sbq) 51088459642SOmar Sandoval { 51188459642SOmar Sandoval int i, wake_index; 51288459642SOmar Sandoval 513*5d2ee712SJens Axboe if (!atomic_read(&sbq->ws_active)) 514*5d2ee712SJens Axboe return NULL; 515*5d2ee712SJens Axboe 51688459642SOmar Sandoval wake_index = atomic_read(&sbq->wake_index); 51788459642SOmar Sandoval for (i = 0; i < SBQ_WAIT_QUEUES; i++) { 51888459642SOmar Sandoval struct sbq_wait_state *ws = &sbq->ws[wake_index]; 51988459642SOmar Sandoval 52088459642SOmar Sandoval if (waitqueue_active(&ws->wait)) { 52188459642SOmar Sandoval int o = atomic_read(&sbq->wake_index); 52288459642SOmar Sandoval 52388459642SOmar Sandoval if (wake_index != o) 52488459642SOmar Sandoval atomic_cmpxchg(&sbq->wake_index, o, wake_index); 52588459642SOmar Sandoval return ws; 52688459642SOmar Sandoval } 52788459642SOmar Sandoval 52888459642SOmar Sandoval wake_index = sbq_index_inc(wake_index); 52988459642SOmar Sandoval } 53088459642SOmar Sandoval 53188459642SOmar Sandoval return NULL; 53288459642SOmar Sandoval } 53388459642SOmar Sandoval 534c854ab57SJens Axboe static bool __sbq_wake_up(struct sbitmap_queue *sbq) 53588459642SOmar Sandoval { 53688459642SOmar Sandoval struct sbq_wait_state *ws; 5376c0ca7aeSOmar Sandoval unsigned int wake_batch; 53888459642SOmar Sandoval int wait_cnt; 53988459642SOmar Sandoval 54088459642SOmar Sandoval ws = sbq_wake_ptr(sbq); 54188459642SOmar Sandoval if (!ws) 542c854ab57SJens Axboe return false; 54388459642SOmar Sandoval 54488459642SOmar Sandoval wait_cnt = atomic_dec_return(&ws->wait_cnt); 5456c0ca7aeSOmar Sandoval if (wait_cnt <= 0) { 546c854ab57SJens Axboe int ret; 547c854ab57SJens Axboe 5486c0ca7aeSOmar Sandoval wake_batch = READ_ONCE(sbq->wake_batch); 549c854ab57SJens Axboe 5506c0ca7aeSOmar Sandoval /* 5516c0ca7aeSOmar Sandoval * Pairs with the memory barrier in sbitmap_queue_resize() to 5526c0ca7aeSOmar Sandoval * ensure that we see the batch size update before the wait 5536c0ca7aeSOmar Sandoval * count is reset. 5546c0ca7aeSOmar Sandoval */ 5556c0ca7aeSOmar Sandoval smp_mb__before_atomic(); 556c854ab57SJens Axboe 5576c0ca7aeSOmar Sandoval /* 558c854ab57SJens Axboe * For concurrent callers of this, the one that failed the 559c854ab57SJens Axboe * atomic_cmpxhcg() race should call this function again 560c854ab57SJens Axboe * to wakeup a new batch on a different 'ws'. 5616c0ca7aeSOmar Sandoval */ 562c854ab57SJens Axboe ret = atomic_cmpxchg(&ws->wait_cnt, wait_cnt, wake_batch); 563c854ab57SJens Axboe if (ret == wait_cnt) { 56488459642SOmar Sandoval sbq_index_atomic_inc(&sbq->wake_index); 5654e5dff41SJens Axboe wake_up_nr(&ws->wait, wake_batch); 566c854ab57SJens Axboe return false; 56788459642SOmar Sandoval } 568c854ab57SJens Axboe 569c854ab57SJens Axboe return true; 570c854ab57SJens Axboe } 571c854ab57SJens Axboe 572c854ab57SJens Axboe return false; 573c854ab57SJens Axboe } 574c854ab57SJens Axboe 575e6fc4649SMing Lei void sbitmap_queue_wake_up(struct sbitmap_queue *sbq) 576c854ab57SJens Axboe { 577c854ab57SJens Axboe while (__sbq_wake_up(sbq)) 578c854ab57SJens Axboe ; 57988459642SOmar Sandoval } 580e6fc4649SMing Lei EXPORT_SYMBOL_GPL(sbitmap_queue_wake_up); 58188459642SOmar Sandoval 58240aabb67SOmar Sandoval void sbitmap_queue_clear(struct sbitmap_queue *sbq, unsigned int nr, 583f4a644dbSOmar Sandoval unsigned int cpu) 58488459642SOmar Sandoval { 585ea86ea2cSJens Axboe sbitmap_deferred_clear_bit(&sbq->sb, nr); 586ea86ea2cSJens Axboe 587e6fc4649SMing Lei /* 588e6fc4649SMing Lei * Pairs with the memory barrier in set_current_state() to ensure the 589e6fc4649SMing Lei * proper ordering of clear_bit_unlock()/waitqueue_active() in the waker 590e6fc4649SMing Lei * and test_and_set_bit_lock()/prepare_to_wait()/finish_wait() in the 591e6fc4649SMing Lei * waiter. See the comment on waitqueue_active(). 592e6fc4649SMing Lei */ 593e6fc4649SMing Lei smp_mb__after_atomic(); 594e6fc4649SMing Lei sbitmap_queue_wake_up(sbq); 595e6fc4649SMing Lei 5965c64a8dfSOmar Sandoval if (likely(!sbq->round_robin && nr < sbq->sb.depth)) 59740aabb67SOmar Sandoval *per_cpu_ptr(sbq->alloc_hint, cpu) = nr; 59888459642SOmar Sandoval } 59988459642SOmar Sandoval EXPORT_SYMBOL_GPL(sbitmap_queue_clear); 60088459642SOmar Sandoval 60188459642SOmar Sandoval void sbitmap_queue_wake_all(struct sbitmap_queue *sbq) 60288459642SOmar Sandoval { 60388459642SOmar Sandoval int i, wake_index; 60488459642SOmar Sandoval 60588459642SOmar Sandoval /* 606f66227deSOmar Sandoval * Pairs with the memory barrier in set_current_state() like in 607e6fc4649SMing Lei * sbitmap_queue_wake_up(). 60888459642SOmar Sandoval */ 60988459642SOmar Sandoval smp_mb(); 61088459642SOmar Sandoval wake_index = atomic_read(&sbq->wake_index); 61188459642SOmar Sandoval for (i = 0; i < SBQ_WAIT_QUEUES; i++) { 61288459642SOmar Sandoval struct sbq_wait_state *ws = &sbq->ws[wake_index]; 61388459642SOmar Sandoval 61488459642SOmar Sandoval if (waitqueue_active(&ws->wait)) 61588459642SOmar Sandoval wake_up(&ws->wait); 61688459642SOmar Sandoval 61788459642SOmar Sandoval wake_index = sbq_index_inc(wake_index); 61888459642SOmar Sandoval } 61988459642SOmar Sandoval } 62088459642SOmar Sandoval EXPORT_SYMBOL_GPL(sbitmap_queue_wake_all); 62124af1ccfSOmar Sandoval 62224af1ccfSOmar Sandoval void sbitmap_queue_show(struct sbitmap_queue *sbq, struct seq_file *m) 62324af1ccfSOmar Sandoval { 62424af1ccfSOmar Sandoval bool first; 62524af1ccfSOmar Sandoval int i; 62624af1ccfSOmar Sandoval 62724af1ccfSOmar Sandoval sbitmap_show(&sbq->sb, m); 62824af1ccfSOmar Sandoval 62924af1ccfSOmar Sandoval seq_puts(m, "alloc_hint={"); 63024af1ccfSOmar Sandoval first = true; 63124af1ccfSOmar Sandoval for_each_possible_cpu(i) { 63224af1ccfSOmar Sandoval if (!first) 63324af1ccfSOmar Sandoval seq_puts(m, ", "); 63424af1ccfSOmar Sandoval first = false; 63524af1ccfSOmar Sandoval seq_printf(m, "%u", *per_cpu_ptr(sbq->alloc_hint, i)); 63624af1ccfSOmar Sandoval } 63724af1ccfSOmar Sandoval seq_puts(m, "}\n"); 63824af1ccfSOmar Sandoval 63924af1ccfSOmar Sandoval seq_printf(m, "wake_batch=%u\n", sbq->wake_batch); 64024af1ccfSOmar Sandoval seq_printf(m, "wake_index=%d\n", atomic_read(&sbq->wake_index)); 641*5d2ee712SJens Axboe seq_printf(m, "ws_active=%d\n", atomic_read(&sbq->ws_active)); 64224af1ccfSOmar Sandoval 64324af1ccfSOmar Sandoval seq_puts(m, "ws={\n"); 64424af1ccfSOmar Sandoval for (i = 0; i < SBQ_WAIT_QUEUES; i++) { 64524af1ccfSOmar Sandoval struct sbq_wait_state *ws = &sbq->ws[i]; 64624af1ccfSOmar Sandoval 64724af1ccfSOmar Sandoval seq_printf(m, "\t{.wait_cnt=%d, .wait=%s},\n", 64824af1ccfSOmar Sandoval atomic_read(&ws->wait_cnt), 64924af1ccfSOmar Sandoval waitqueue_active(&ws->wait) ? "active" : "inactive"); 65024af1ccfSOmar Sandoval } 65124af1ccfSOmar Sandoval seq_puts(m, "}\n"); 65224af1ccfSOmar Sandoval 65324af1ccfSOmar Sandoval seq_printf(m, "round_robin=%d\n", sbq->round_robin); 654a3275539SOmar Sandoval seq_printf(m, "min_shallow_depth=%u\n", sbq->min_shallow_depth); 65524af1ccfSOmar Sandoval } 65624af1ccfSOmar Sandoval EXPORT_SYMBOL_GPL(sbitmap_queue_show); 657*5d2ee712SJens Axboe 658*5d2ee712SJens Axboe void sbitmap_prepare_to_wait(struct sbitmap_queue *sbq, 659*5d2ee712SJens Axboe struct sbq_wait_state *ws, 660*5d2ee712SJens Axboe struct sbq_wait *sbq_wait, int state) 661*5d2ee712SJens Axboe { 662*5d2ee712SJens Axboe if (!sbq_wait->accounted) { 663*5d2ee712SJens Axboe atomic_inc(&sbq->ws_active); 664*5d2ee712SJens Axboe sbq_wait->accounted = 1; 665*5d2ee712SJens Axboe } 666*5d2ee712SJens Axboe prepare_to_wait_exclusive(&ws->wait, &sbq_wait->wait, state); 667*5d2ee712SJens Axboe } 668*5d2ee712SJens Axboe EXPORT_SYMBOL_GPL(sbitmap_prepare_to_wait); 669*5d2ee712SJens Axboe 670*5d2ee712SJens Axboe void sbitmap_finish_wait(struct sbitmap_queue *sbq, struct sbq_wait_state *ws, 671*5d2ee712SJens Axboe struct sbq_wait *sbq_wait) 672*5d2ee712SJens Axboe { 673*5d2ee712SJens Axboe finish_wait(&ws->wait, &sbq_wait->wait); 674*5d2ee712SJens Axboe if (sbq_wait->accounted) { 675*5d2ee712SJens Axboe atomic_dec(&sbq->ws_active); 676*5d2ee712SJens Axboe sbq_wait->accounted = 0; 677*5d2ee712SJens Axboe } 678*5d2ee712SJens Axboe } 679*5d2ee712SJens Axboe EXPORT_SYMBOL_GPL(sbitmap_finish_wait); 680