10fc479b1SThomas Gleixner // SPDX-License-Identifier: GPL-2.0-only 288459642SOmar Sandoval /* 388459642SOmar Sandoval * Copyright (C) 2016 Facebook 488459642SOmar Sandoval * Copyright (C) 2013-2014 Jens Axboe 588459642SOmar Sandoval */ 688459642SOmar Sandoval 7af8601adSIngo Molnar #include <linux/sched.h> 898d95416SOmar Sandoval #include <linux/random.h> 988459642SOmar Sandoval #include <linux/sbitmap.h> 1024af1ccfSOmar Sandoval #include <linux/seq_file.h> 1188459642SOmar Sandoval 12c548e62bSMing Lei static int init_alloc_hint(struct sbitmap *sb, gfp_t flags) 13bf2c4282SMing Lei { 14c548e62bSMing Lei unsigned depth = sb->depth; 15bf2c4282SMing Lei 16c548e62bSMing Lei sb->alloc_hint = alloc_percpu_gfp(unsigned int, flags); 17c548e62bSMing Lei if (!sb->alloc_hint) 18bf2c4282SMing Lei return -ENOMEM; 19bf2c4282SMing Lei 20c548e62bSMing Lei if (depth && !sb->round_robin) { 21bf2c4282SMing Lei int i; 22bf2c4282SMing Lei 23bf2c4282SMing Lei for_each_possible_cpu(i) 24c548e62bSMing Lei *per_cpu_ptr(sb->alloc_hint, i) = prandom_u32() % depth; 25bf2c4282SMing Lei } 26bf2c4282SMing Lei return 0; 27bf2c4282SMing Lei } 28bf2c4282SMing Lei 29c548e62bSMing Lei static inline unsigned update_alloc_hint_before_get(struct sbitmap *sb, 30bf2c4282SMing Lei unsigned int depth) 31bf2c4282SMing Lei { 32bf2c4282SMing Lei unsigned hint; 33bf2c4282SMing Lei 34c548e62bSMing Lei hint = this_cpu_read(*sb->alloc_hint); 35bf2c4282SMing Lei if (unlikely(hint >= depth)) { 36bf2c4282SMing Lei hint = depth ? prandom_u32() % depth : 0; 37c548e62bSMing Lei this_cpu_write(*sb->alloc_hint, hint); 38bf2c4282SMing Lei } 39bf2c4282SMing Lei 40bf2c4282SMing Lei return hint; 41bf2c4282SMing Lei } 42bf2c4282SMing Lei 43c548e62bSMing Lei static inline void update_alloc_hint_after_get(struct sbitmap *sb, 44bf2c4282SMing Lei unsigned int depth, 45bf2c4282SMing Lei unsigned int hint, 46bf2c4282SMing Lei unsigned int nr) 47bf2c4282SMing Lei { 48bf2c4282SMing Lei if (nr == -1) { 49bf2c4282SMing Lei /* If the map is full, a hint won't do us much good. */ 50c548e62bSMing Lei this_cpu_write(*sb->alloc_hint, 0); 51c548e62bSMing Lei } else if (nr == hint || unlikely(sb->round_robin)) { 52bf2c4282SMing Lei /* Only update the hint if we used it. */ 53bf2c4282SMing Lei hint = nr + 1; 54bf2c4282SMing Lei if (hint >= depth - 1) 55bf2c4282SMing Lei hint = 0; 56c548e62bSMing Lei this_cpu_write(*sb->alloc_hint, hint); 57bf2c4282SMing Lei } 58bf2c4282SMing Lei } 59bf2c4282SMing Lei 60b2dbff1bSJens Axboe /* 61b2dbff1bSJens Axboe * See if we have deferred clears that we can batch move 62b2dbff1bSJens Axboe */ 63b78beea0SPavel Begunkov static inline bool sbitmap_deferred_clear(struct sbitmap_word *map) 64b2dbff1bSJens Axboe { 65c3250c8dSPavel Begunkov unsigned long mask; 66b2dbff1bSJens Axboe 67661d4f55SPavel Begunkov if (!READ_ONCE(map->cleared)) 68661d4f55SPavel Begunkov return false; 69b2dbff1bSJens Axboe 70b2dbff1bSJens Axboe /* 71b2dbff1bSJens Axboe * First get a stable cleared mask, setting the old mask to 0. 72b2dbff1bSJens Axboe */ 73b78beea0SPavel Begunkov mask = xchg(&map->cleared, 0); 74b2dbff1bSJens Axboe 75b2dbff1bSJens Axboe /* 76b2dbff1bSJens Axboe * Now clear the masked bits in our free word 77b2dbff1bSJens Axboe */ 78c3250c8dSPavel Begunkov atomic_long_andnot(mask, (atomic_long_t *)&map->word); 79c3250c8dSPavel Begunkov BUILD_BUG_ON(sizeof(atomic_long_t) != sizeof(map->word)); 80661d4f55SPavel Begunkov return true; 81b2dbff1bSJens Axboe } 82b2dbff1bSJens Axboe 8388459642SOmar Sandoval int sbitmap_init_node(struct sbitmap *sb, unsigned int depth, int shift, 84c548e62bSMing Lei gfp_t flags, int node, bool round_robin, 85c548e62bSMing Lei bool alloc_hint) 8688459642SOmar Sandoval { 8788459642SOmar Sandoval unsigned int bits_per_word; 8888459642SOmar Sandoval unsigned int i; 8988459642SOmar Sandoval 902d13b1eaSMing Lei if (shift < 0) 912d13b1eaSMing Lei shift = sbitmap_calculate_shift(depth); 922d13b1eaSMing Lei 9388459642SOmar Sandoval bits_per_word = 1U << shift; 9488459642SOmar Sandoval if (bits_per_word > BITS_PER_LONG) 9588459642SOmar Sandoval return -EINVAL; 9688459642SOmar Sandoval 9788459642SOmar Sandoval sb->shift = shift; 9888459642SOmar Sandoval sb->depth = depth; 9988459642SOmar Sandoval sb->map_nr = DIV_ROUND_UP(sb->depth, bits_per_word); 100efe1f3a1SMing Lei sb->round_robin = round_robin; 10188459642SOmar Sandoval 10288459642SOmar Sandoval if (depth == 0) { 10388459642SOmar Sandoval sb->map = NULL; 10488459642SOmar Sandoval return 0; 10588459642SOmar Sandoval } 10688459642SOmar Sandoval 107c548e62bSMing Lei if (alloc_hint) { 108c548e62bSMing Lei if (init_alloc_hint(sb, flags)) 10988459642SOmar Sandoval return -ENOMEM; 110c548e62bSMing Lei } else { 111c548e62bSMing Lei sb->alloc_hint = NULL; 112c548e62bSMing Lei } 113c548e62bSMing Lei 114c548e62bSMing Lei sb->map = kcalloc_node(sb->map_nr, sizeof(*sb->map), flags, node); 115c548e62bSMing Lei if (!sb->map) { 116c548e62bSMing Lei free_percpu(sb->alloc_hint); 117c548e62bSMing Lei return -ENOMEM; 118c548e62bSMing Lei } 11988459642SOmar Sandoval 12088459642SOmar Sandoval for (i = 0; i < sb->map_nr; i++) { 12188459642SOmar Sandoval sb->map[i].depth = min(depth, bits_per_word); 12288459642SOmar Sandoval depth -= sb->map[i].depth; 12388459642SOmar Sandoval } 12488459642SOmar Sandoval return 0; 12588459642SOmar Sandoval } 12688459642SOmar Sandoval EXPORT_SYMBOL_GPL(sbitmap_init_node); 12788459642SOmar Sandoval 12888459642SOmar Sandoval void sbitmap_resize(struct sbitmap *sb, unsigned int depth) 12988459642SOmar Sandoval { 13088459642SOmar Sandoval unsigned int bits_per_word = 1U << sb->shift; 13188459642SOmar Sandoval unsigned int i; 13288459642SOmar Sandoval 133b2dbff1bSJens Axboe for (i = 0; i < sb->map_nr; i++) 134b78beea0SPavel Begunkov sbitmap_deferred_clear(&sb->map[i]); 135b2dbff1bSJens Axboe 13688459642SOmar Sandoval sb->depth = depth; 13788459642SOmar Sandoval sb->map_nr = DIV_ROUND_UP(sb->depth, bits_per_word); 13888459642SOmar Sandoval 13988459642SOmar Sandoval for (i = 0; i < sb->map_nr; i++) { 14088459642SOmar Sandoval sb->map[i].depth = min(depth, bits_per_word); 14188459642SOmar Sandoval depth -= sb->map[i].depth; 14288459642SOmar Sandoval } 14388459642SOmar Sandoval } 14488459642SOmar Sandoval EXPORT_SYMBOL_GPL(sbitmap_resize); 14588459642SOmar Sandoval 146c05e6673SOmar Sandoval static int __sbitmap_get_word(unsigned long *word, unsigned long depth, 147c05e6673SOmar Sandoval unsigned int hint, bool wrap) 14888459642SOmar Sandoval { 14988459642SOmar Sandoval int nr; 15088459642SOmar Sandoval 1510eff1f1aSPavel Begunkov /* don't wrap if starting from 0 */ 1520eff1f1aSPavel Begunkov wrap = wrap && hint; 1530eff1f1aSPavel Begunkov 15488459642SOmar Sandoval while (1) { 155c05e6673SOmar Sandoval nr = find_next_zero_bit(word, depth, hint); 156c05e6673SOmar Sandoval if (unlikely(nr >= depth)) { 15788459642SOmar Sandoval /* 15888459642SOmar Sandoval * We started with an offset, and we didn't reset the 15988459642SOmar Sandoval * offset to 0 in a failure case, so start from 0 to 16088459642SOmar Sandoval * exhaust the map. 16188459642SOmar Sandoval */ 1620eff1f1aSPavel Begunkov if (hint && wrap) { 1630eff1f1aSPavel Begunkov hint = 0; 16488459642SOmar Sandoval continue; 16588459642SOmar Sandoval } 16688459642SOmar Sandoval return -1; 16788459642SOmar Sandoval } 16888459642SOmar Sandoval 1694ace53f1SOmar Sandoval if (!test_and_set_bit_lock(nr, word)) 17088459642SOmar Sandoval break; 17188459642SOmar Sandoval 17288459642SOmar Sandoval hint = nr + 1; 173c05e6673SOmar Sandoval if (hint >= depth - 1) 17488459642SOmar Sandoval hint = 0; 17588459642SOmar Sandoval } 17688459642SOmar Sandoval 17788459642SOmar Sandoval return nr; 17888459642SOmar Sandoval } 17988459642SOmar Sandoval 180ea86ea2cSJens Axboe static int sbitmap_find_bit_in_index(struct sbitmap *sb, int index, 181efe1f3a1SMing Lei unsigned int alloc_hint) 182ea86ea2cSJens Axboe { 183b78beea0SPavel Begunkov struct sbitmap_word *map = &sb->map[index]; 184ea86ea2cSJens Axboe int nr; 185ea86ea2cSJens Axboe 186ea86ea2cSJens Axboe do { 187b78beea0SPavel Begunkov nr = __sbitmap_get_word(&map->word, map->depth, alloc_hint, 188efe1f3a1SMing Lei !sb->round_robin); 189ea86ea2cSJens Axboe if (nr != -1) 190ea86ea2cSJens Axboe break; 191b78beea0SPavel Begunkov if (!sbitmap_deferred_clear(map)) 192ea86ea2cSJens Axboe break; 193ea86ea2cSJens Axboe } while (1); 194ea86ea2cSJens Axboe 195ea86ea2cSJens Axboe return nr; 196ea86ea2cSJens Axboe } 197ea86ea2cSJens Axboe 198c548e62bSMing Lei static int __sbitmap_get(struct sbitmap *sb, unsigned int alloc_hint) 19988459642SOmar Sandoval { 20088459642SOmar Sandoval unsigned int i, index; 20188459642SOmar Sandoval int nr = -1; 20288459642SOmar Sandoval 20388459642SOmar Sandoval index = SB_NR_TO_INDEX(sb, alloc_hint); 20488459642SOmar Sandoval 20527fae429SJens Axboe /* 20627fae429SJens Axboe * Unless we're doing round robin tag allocation, just use the 20727fae429SJens Axboe * alloc_hint to find the right word index. No point in looping 20827fae429SJens Axboe * twice in find_next_zero_bit() for that case. 20927fae429SJens Axboe */ 210efe1f3a1SMing Lei if (sb->round_robin) 21127fae429SJens Axboe alloc_hint = SB_NR_TO_BIT(sb, alloc_hint); 21227fae429SJens Axboe else 21327fae429SJens Axboe alloc_hint = 0; 21427fae429SJens Axboe 21588459642SOmar Sandoval for (i = 0; i < sb->map_nr; i++) { 216efe1f3a1SMing Lei nr = sbitmap_find_bit_in_index(sb, index, alloc_hint); 21788459642SOmar Sandoval if (nr != -1) { 21888459642SOmar Sandoval nr += index << sb->shift; 21988459642SOmar Sandoval break; 22088459642SOmar Sandoval } 22188459642SOmar Sandoval 22288459642SOmar Sandoval /* Jump to next index. */ 22388459642SOmar Sandoval alloc_hint = 0; 22427fae429SJens Axboe if (++index >= sb->map_nr) 22527fae429SJens Axboe index = 0; 22688459642SOmar Sandoval } 22788459642SOmar Sandoval 22888459642SOmar Sandoval return nr; 22988459642SOmar Sandoval } 230c548e62bSMing Lei 231c548e62bSMing Lei int sbitmap_get(struct sbitmap *sb) 232c548e62bSMing Lei { 233c548e62bSMing Lei int nr; 234c548e62bSMing Lei unsigned int hint, depth; 235c548e62bSMing Lei 236c548e62bSMing Lei if (WARN_ON_ONCE(unlikely(!sb->alloc_hint))) 237c548e62bSMing Lei return -1; 238c548e62bSMing Lei 239c548e62bSMing Lei depth = READ_ONCE(sb->depth); 240c548e62bSMing Lei hint = update_alloc_hint_before_get(sb, depth); 241c548e62bSMing Lei nr = __sbitmap_get(sb, hint); 242c548e62bSMing Lei update_alloc_hint_after_get(sb, depth, hint, nr); 243c548e62bSMing Lei 244c548e62bSMing Lei return nr; 245c548e62bSMing Lei } 24688459642SOmar Sandoval EXPORT_SYMBOL_GPL(sbitmap_get); 24788459642SOmar Sandoval 248c548e62bSMing Lei static int __sbitmap_get_shallow(struct sbitmap *sb, 249c548e62bSMing Lei unsigned int alloc_hint, 250c05e6673SOmar Sandoval unsigned long shallow_depth) 251c05e6673SOmar Sandoval { 252c05e6673SOmar Sandoval unsigned int i, index; 253c05e6673SOmar Sandoval int nr = -1; 254c05e6673SOmar Sandoval 255c05e6673SOmar Sandoval index = SB_NR_TO_INDEX(sb, alloc_hint); 256c05e6673SOmar Sandoval 257c05e6673SOmar Sandoval for (i = 0; i < sb->map_nr; i++) { 258b2dbff1bSJens Axboe again: 259c05e6673SOmar Sandoval nr = __sbitmap_get_word(&sb->map[index].word, 260c05e6673SOmar Sandoval min(sb->map[index].depth, shallow_depth), 261c05e6673SOmar Sandoval SB_NR_TO_BIT(sb, alloc_hint), true); 262c05e6673SOmar Sandoval if (nr != -1) { 263c05e6673SOmar Sandoval nr += index << sb->shift; 264c05e6673SOmar Sandoval break; 265c05e6673SOmar Sandoval } 266c05e6673SOmar Sandoval 267b78beea0SPavel Begunkov if (sbitmap_deferred_clear(&sb->map[index])) 268b2dbff1bSJens Axboe goto again; 269b2dbff1bSJens Axboe 270c05e6673SOmar Sandoval /* Jump to next index. */ 271c05e6673SOmar Sandoval index++; 272c05e6673SOmar Sandoval alloc_hint = index << sb->shift; 273c05e6673SOmar Sandoval 274c05e6673SOmar Sandoval if (index >= sb->map_nr) { 275c05e6673SOmar Sandoval index = 0; 276c05e6673SOmar Sandoval alloc_hint = 0; 277c05e6673SOmar Sandoval } 278c05e6673SOmar Sandoval } 279c05e6673SOmar Sandoval 280c05e6673SOmar Sandoval return nr; 281c05e6673SOmar Sandoval } 282c548e62bSMing Lei 283c548e62bSMing Lei int sbitmap_get_shallow(struct sbitmap *sb, unsigned long shallow_depth) 284c548e62bSMing Lei { 285c548e62bSMing Lei int nr; 286c548e62bSMing Lei unsigned int hint, depth; 287c548e62bSMing Lei 288c548e62bSMing Lei if (WARN_ON_ONCE(unlikely(!sb->alloc_hint))) 289c548e62bSMing Lei return -1; 290c548e62bSMing Lei 291c548e62bSMing Lei depth = READ_ONCE(sb->depth); 292c548e62bSMing Lei hint = update_alloc_hint_before_get(sb, depth); 293c548e62bSMing Lei nr = __sbitmap_get_shallow(sb, hint, shallow_depth); 294c548e62bSMing Lei update_alloc_hint_after_get(sb, depth, hint, nr); 295c548e62bSMing Lei 296c548e62bSMing Lei return nr; 297c548e62bSMing Lei } 298c05e6673SOmar Sandoval EXPORT_SYMBOL_GPL(sbitmap_get_shallow); 299c05e6673SOmar Sandoval 30088459642SOmar Sandoval bool sbitmap_any_bit_set(const struct sbitmap *sb) 30188459642SOmar Sandoval { 30288459642SOmar Sandoval unsigned int i; 30388459642SOmar Sandoval 30488459642SOmar Sandoval for (i = 0; i < sb->map_nr; i++) { 305b2dbff1bSJens Axboe if (sb->map[i].word & ~sb->map[i].cleared) 30688459642SOmar Sandoval return true; 30788459642SOmar Sandoval } 30888459642SOmar Sandoval return false; 30988459642SOmar Sandoval } 31088459642SOmar Sandoval EXPORT_SYMBOL_GPL(sbitmap_any_bit_set); 31188459642SOmar Sandoval 312ea86ea2cSJens Axboe static unsigned int __sbitmap_weight(const struct sbitmap *sb, bool set) 31388459642SOmar Sandoval { 31460658e0dSColin Ian King unsigned int i, weight = 0; 31588459642SOmar Sandoval 31688459642SOmar Sandoval for (i = 0; i < sb->map_nr; i++) { 31788459642SOmar Sandoval const struct sbitmap_word *word = &sb->map[i]; 31888459642SOmar Sandoval 319ea86ea2cSJens Axboe if (set) 32088459642SOmar Sandoval weight += bitmap_weight(&word->word, word->depth); 321ea86ea2cSJens Axboe else 322ea86ea2cSJens Axboe weight += bitmap_weight(&word->cleared, word->depth); 32388459642SOmar Sandoval } 32488459642SOmar Sandoval return weight; 32588459642SOmar Sandoval } 326ea86ea2cSJens Axboe 327ea86ea2cSJens Axboe static unsigned int sbitmap_cleared(const struct sbitmap *sb) 328ea86ea2cSJens Axboe { 329ea86ea2cSJens Axboe return __sbitmap_weight(sb, false); 330ea86ea2cSJens Axboe } 33188459642SOmar Sandoval 332cbb9950bSMing Lei unsigned int sbitmap_weight(const struct sbitmap *sb) 333cbb9950bSMing Lei { 334cbb9950bSMing Lei return __sbitmap_weight(sb, true) - sbitmap_cleared(sb); 335cbb9950bSMing Lei } 336cbb9950bSMing Lei EXPORT_SYMBOL_GPL(sbitmap_weight); 337cbb9950bSMing Lei 33824af1ccfSOmar Sandoval void sbitmap_show(struct sbitmap *sb, struct seq_file *m) 33924af1ccfSOmar Sandoval { 34024af1ccfSOmar Sandoval seq_printf(m, "depth=%u\n", sb->depth); 341cbb9950bSMing Lei seq_printf(m, "busy=%u\n", sbitmap_weight(sb)); 342ea86ea2cSJens Axboe seq_printf(m, "cleared=%u\n", sbitmap_cleared(sb)); 34324af1ccfSOmar Sandoval seq_printf(m, "bits_per_word=%u\n", 1U << sb->shift); 34424af1ccfSOmar Sandoval seq_printf(m, "map_nr=%u\n", sb->map_nr); 34524af1ccfSOmar Sandoval } 34624af1ccfSOmar Sandoval EXPORT_SYMBOL_GPL(sbitmap_show); 34724af1ccfSOmar Sandoval 34824af1ccfSOmar Sandoval static inline void emit_byte(struct seq_file *m, unsigned int offset, u8 byte) 34924af1ccfSOmar Sandoval { 35024af1ccfSOmar Sandoval if ((offset & 0xf) == 0) { 35124af1ccfSOmar Sandoval if (offset != 0) 35224af1ccfSOmar Sandoval seq_putc(m, '\n'); 35324af1ccfSOmar Sandoval seq_printf(m, "%08x:", offset); 35424af1ccfSOmar Sandoval } 35524af1ccfSOmar Sandoval if ((offset & 0x1) == 0) 35624af1ccfSOmar Sandoval seq_putc(m, ' '); 35724af1ccfSOmar Sandoval seq_printf(m, "%02x", byte); 35824af1ccfSOmar Sandoval } 35924af1ccfSOmar Sandoval 36024af1ccfSOmar Sandoval void sbitmap_bitmap_show(struct sbitmap *sb, struct seq_file *m) 36124af1ccfSOmar Sandoval { 36224af1ccfSOmar Sandoval u8 byte = 0; 36324af1ccfSOmar Sandoval unsigned int byte_bits = 0; 36424af1ccfSOmar Sandoval unsigned int offset = 0; 36524af1ccfSOmar Sandoval int i; 36624af1ccfSOmar Sandoval 36724af1ccfSOmar Sandoval for (i = 0; i < sb->map_nr; i++) { 36824af1ccfSOmar Sandoval unsigned long word = READ_ONCE(sb->map[i].word); 3696bf0eb55SJohn Garry unsigned long cleared = READ_ONCE(sb->map[i].cleared); 37024af1ccfSOmar Sandoval unsigned int word_bits = READ_ONCE(sb->map[i].depth); 37124af1ccfSOmar Sandoval 3726bf0eb55SJohn Garry word &= ~cleared; 3736bf0eb55SJohn Garry 37424af1ccfSOmar Sandoval while (word_bits > 0) { 37524af1ccfSOmar Sandoval unsigned int bits = min(8 - byte_bits, word_bits); 37624af1ccfSOmar Sandoval 37724af1ccfSOmar Sandoval byte |= (word & (BIT(bits) - 1)) << byte_bits; 37824af1ccfSOmar Sandoval byte_bits += bits; 37924af1ccfSOmar Sandoval if (byte_bits == 8) { 38024af1ccfSOmar Sandoval emit_byte(m, offset, byte); 38124af1ccfSOmar Sandoval byte = 0; 38224af1ccfSOmar Sandoval byte_bits = 0; 38324af1ccfSOmar Sandoval offset++; 38424af1ccfSOmar Sandoval } 38524af1ccfSOmar Sandoval word >>= bits; 38624af1ccfSOmar Sandoval word_bits -= bits; 38724af1ccfSOmar Sandoval } 38824af1ccfSOmar Sandoval } 38924af1ccfSOmar Sandoval if (byte_bits) { 39024af1ccfSOmar Sandoval emit_byte(m, offset, byte); 39124af1ccfSOmar Sandoval offset++; 39224af1ccfSOmar Sandoval } 39324af1ccfSOmar Sandoval if (offset) 39424af1ccfSOmar Sandoval seq_putc(m, '\n'); 39524af1ccfSOmar Sandoval } 39624af1ccfSOmar Sandoval EXPORT_SYMBOL_GPL(sbitmap_bitmap_show); 39724af1ccfSOmar Sandoval 398a3275539SOmar Sandoval static unsigned int sbq_calc_wake_batch(struct sbitmap_queue *sbq, 399a3275539SOmar Sandoval unsigned int depth) 40088459642SOmar Sandoval { 40188459642SOmar Sandoval unsigned int wake_batch; 402a3275539SOmar Sandoval unsigned int shallow_depth; 40388459642SOmar Sandoval 40488459642SOmar Sandoval /* 40588459642SOmar Sandoval * For each batch, we wake up one queue. We need to make sure that our 406a3275539SOmar Sandoval * batch size is small enough that the full depth of the bitmap, 407a3275539SOmar Sandoval * potentially limited by a shallow depth, is enough to wake up all of 408a3275539SOmar Sandoval * the queues. 409a3275539SOmar Sandoval * 410a3275539SOmar Sandoval * Each full word of the bitmap has bits_per_word bits, and there might 411a3275539SOmar Sandoval * be a partial word. There are depth / bits_per_word full words and 412a3275539SOmar Sandoval * depth % bits_per_word bits left over. In bitwise arithmetic: 413a3275539SOmar Sandoval * 414a3275539SOmar Sandoval * bits_per_word = 1 << shift 415a3275539SOmar Sandoval * depth / bits_per_word = depth >> shift 416a3275539SOmar Sandoval * depth % bits_per_word = depth & ((1 << shift) - 1) 417a3275539SOmar Sandoval * 418a3275539SOmar Sandoval * Each word can be limited to sbq->min_shallow_depth bits. 41988459642SOmar Sandoval */ 420a3275539SOmar Sandoval shallow_depth = min(1U << sbq->sb.shift, sbq->min_shallow_depth); 421a3275539SOmar Sandoval depth = ((depth >> sbq->sb.shift) * shallow_depth + 422a3275539SOmar Sandoval min(depth & ((1U << sbq->sb.shift) - 1), shallow_depth)); 423a3275539SOmar Sandoval wake_batch = clamp_t(unsigned int, depth / SBQ_WAIT_QUEUES, 1, 424a3275539SOmar Sandoval SBQ_WAKE_BATCH); 42588459642SOmar Sandoval 42688459642SOmar Sandoval return wake_batch; 42788459642SOmar Sandoval } 42888459642SOmar Sandoval 42988459642SOmar Sandoval int sbitmap_queue_init_node(struct sbitmap_queue *sbq, unsigned int depth, 430f4a644dbSOmar Sandoval int shift, bool round_robin, gfp_t flags, int node) 43188459642SOmar Sandoval { 43288459642SOmar Sandoval int ret; 43388459642SOmar Sandoval int i; 43488459642SOmar Sandoval 435efe1f3a1SMing Lei ret = sbitmap_init_node(&sbq->sb, depth, shift, flags, node, 436c548e62bSMing Lei round_robin, true); 43788459642SOmar Sandoval if (ret) 43888459642SOmar Sandoval return ret; 43988459642SOmar Sandoval 440a3275539SOmar Sandoval sbq->min_shallow_depth = UINT_MAX; 441a3275539SOmar Sandoval sbq->wake_batch = sbq_calc_wake_batch(sbq, depth); 44288459642SOmar Sandoval atomic_set(&sbq->wake_index, 0); 4435d2ee712SJens Axboe atomic_set(&sbq->ws_active, 0); 44488459642SOmar Sandoval 44548e28166SOmar Sandoval sbq->ws = kzalloc_node(SBQ_WAIT_QUEUES * sizeof(*sbq->ws), flags, node); 44688459642SOmar Sandoval if (!sbq->ws) { 44788459642SOmar Sandoval sbitmap_free(&sbq->sb); 44888459642SOmar Sandoval return -ENOMEM; 44988459642SOmar Sandoval } 45088459642SOmar Sandoval 45188459642SOmar Sandoval for (i = 0; i < SBQ_WAIT_QUEUES; i++) { 45288459642SOmar Sandoval init_waitqueue_head(&sbq->ws[i].wait); 45388459642SOmar Sandoval atomic_set(&sbq->ws[i].wait_cnt, sbq->wake_batch); 45488459642SOmar Sandoval } 455f4a644dbSOmar Sandoval 45688459642SOmar Sandoval return 0; 45788459642SOmar Sandoval } 45888459642SOmar Sandoval EXPORT_SYMBOL_GPL(sbitmap_queue_init_node); 45988459642SOmar Sandoval 460a3275539SOmar Sandoval static void sbitmap_queue_update_wake_batch(struct sbitmap_queue *sbq, 461a3275539SOmar Sandoval unsigned int depth) 46288459642SOmar Sandoval { 463a3275539SOmar Sandoval unsigned int wake_batch = sbq_calc_wake_batch(sbq, depth); 4646c0ca7aeSOmar Sandoval int i; 4656c0ca7aeSOmar Sandoval 4666c0ca7aeSOmar Sandoval if (sbq->wake_batch != wake_batch) { 4676c0ca7aeSOmar Sandoval WRITE_ONCE(sbq->wake_batch, wake_batch); 4686c0ca7aeSOmar Sandoval /* 469e6fc4649SMing Lei * Pairs with the memory barrier in sbitmap_queue_wake_up() 470e6fc4649SMing Lei * to ensure that the batch size is updated before the wait 471e6fc4649SMing Lei * counts. 4726c0ca7aeSOmar Sandoval */ 473a0934fd2SAndrea Parri smp_mb(); 4746c0ca7aeSOmar Sandoval for (i = 0; i < SBQ_WAIT_QUEUES; i++) 4756c0ca7aeSOmar Sandoval atomic_set(&sbq->ws[i].wait_cnt, 1); 4766c0ca7aeSOmar Sandoval } 477a3275539SOmar Sandoval } 478a3275539SOmar Sandoval 479a3275539SOmar Sandoval void sbitmap_queue_resize(struct sbitmap_queue *sbq, unsigned int depth) 480a3275539SOmar Sandoval { 481a3275539SOmar Sandoval sbitmap_queue_update_wake_batch(sbq, depth); 48288459642SOmar Sandoval sbitmap_resize(&sbq->sb, depth); 48388459642SOmar Sandoval } 48488459642SOmar Sandoval EXPORT_SYMBOL_GPL(sbitmap_queue_resize); 48588459642SOmar Sandoval 486f4a644dbSOmar Sandoval int __sbitmap_queue_get(struct sbitmap_queue *sbq) 48740aabb67SOmar Sandoval { 488c548e62bSMing Lei return sbitmap_get(&sbq->sb); 48940aabb67SOmar Sandoval } 49040aabb67SOmar Sandoval EXPORT_SYMBOL_GPL(__sbitmap_queue_get); 49140aabb67SOmar Sandoval 492*9672b0d4SJens Axboe unsigned long __sbitmap_queue_get_batch(struct sbitmap_queue *sbq, int nr_tags, 493*9672b0d4SJens Axboe unsigned int *offset) 494*9672b0d4SJens Axboe { 495*9672b0d4SJens Axboe struct sbitmap *sb = &sbq->sb; 496*9672b0d4SJens Axboe unsigned int hint, depth; 497*9672b0d4SJens Axboe unsigned long index, nr; 498*9672b0d4SJens Axboe int i; 499*9672b0d4SJens Axboe 500*9672b0d4SJens Axboe if (unlikely(sb->round_robin)) 501*9672b0d4SJens Axboe return 0; 502*9672b0d4SJens Axboe 503*9672b0d4SJens Axboe depth = READ_ONCE(sb->depth); 504*9672b0d4SJens Axboe hint = update_alloc_hint_before_get(sb, depth); 505*9672b0d4SJens Axboe 506*9672b0d4SJens Axboe index = SB_NR_TO_INDEX(sb, hint); 507*9672b0d4SJens Axboe 508*9672b0d4SJens Axboe for (i = 0; i < sb->map_nr; i++) { 509*9672b0d4SJens Axboe struct sbitmap_word *map = &sb->map[index]; 510*9672b0d4SJens Axboe unsigned long get_mask; 511*9672b0d4SJens Axboe 512*9672b0d4SJens Axboe sbitmap_deferred_clear(map); 513*9672b0d4SJens Axboe if (map->word == (1UL << (map->depth - 1)) - 1) 514*9672b0d4SJens Axboe continue; 515*9672b0d4SJens Axboe 516*9672b0d4SJens Axboe nr = find_first_zero_bit(&map->word, map->depth); 517*9672b0d4SJens Axboe if (nr + nr_tags <= map->depth) { 518*9672b0d4SJens Axboe atomic_long_t *ptr = (atomic_long_t *) &map->word; 519*9672b0d4SJens Axboe int map_tags = min_t(int, nr_tags, map->depth); 520*9672b0d4SJens Axboe unsigned long val, ret; 521*9672b0d4SJens Axboe 522*9672b0d4SJens Axboe get_mask = ((1UL << map_tags) - 1) << nr; 523*9672b0d4SJens Axboe do { 524*9672b0d4SJens Axboe val = READ_ONCE(map->word); 525*9672b0d4SJens Axboe ret = atomic_long_cmpxchg(ptr, val, get_mask | val); 526*9672b0d4SJens Axboe } while (ret != val); 527*9672b0d4SJens Axboe get_mask = (get_mask & ~ret) >> nr; 528*9672b0d4SJens Axboe if (get_mask) { 529*9672b0d4SJens Axboe *offset = nr + (index << sb->shift); 530*9672b0d4SJens Axboe update_alloc_hint_after_get(sb, depth, hint, 531*9672b0d4SJens Axboe *offset + map_tags - 1); 532*9672b0d4SJens Axboe return get_mask; 533*9672b0d4SJens Axboe } 534*9672b0d4SJens Axboe } 535*9672b0d4SJens Axboe /* Jump to next index. */ 536*9672b0d4SJens Axboe if (++index >= sb->map_nr) 537*9672b0d4SJens Axboe index = 0; 538*9672b0d4SJens Axboe } 539*9672b0d4SJens Axboe 540*9672b0d4SJens Axboe return 0; 541*9672b0d4SJens Axboe } 542*9672b0d4SJens Axboe 543c05e6673SOmar Sandoval int __sbitmap_queue_get_shallow(struct sbitmap_queue *sbq, 544c05e6673SOmar Sandoval unsigned int shallow_depth) 545c05e6673SOmar Sandoval { 54661445b56SOmar Sandoval WARN_ON_ONCE(shallow_depth < sbq->min_shallow_depth); 54761445b56SOmar Sandoval 548c548e62bSMing Lei return sbitmap_get_shallow(&sbq->sb, shallow_depth); 549c05e6673SOmar Sandoval } 550c05e6673SOmar Sandoval EXPORT_SYMBOL_GPL(__sbitmap_queue_get_shallow); 551c05e6673SOmar Sandoval 552a3275539SOmar Sandoval void sbitmap_queue_min_shallow_depth(struct sbitmap_queue *sbq, 553a3275539SOmar Sandoval unsigned int min_shallow_depth) 554a3275539SOmar Sandoval { 555a3275539SOmar Sandoval sbq->min_shallow_depth = min_shallow_depth; 556a3275539SOmar Sandoval sbitmap_queue_update_wake_batch(sbq, sbq->sb.depth); 557a3275539SOmar Sandoval } 558a3275539SOmar Sandoval EXPORT_SYMBOL_GPL(sbitmap_queue_min_shallow_depth); 559a3275539SOmar Sandoval 56088459642SOmar Sandoval static struct sbq_wait_state *sbq_wake_ptr(struct sbitmap_queue *sbq) 56188459642SOmar Sandoval { 56288459642SOmar Sandoval int i, wake_index; 56388459642SOmar Sandoval 5645d2ee712SJens Axboe if (!atomic_read(&sbq->ws_active)) 5655d2ee712SJens Axboe return NULL; 5665d2ee712SJens Axboe 56788459642SOmar Sandoval wake_index = atomic_read(&sbq->wake_index); 56888459642SOmar Sandoval for (i = 0; i < SBQ_WAIT_QUEUES; i++) { 56988459642SOmar Sandoval struct sbq_wait_state *ws = &sbq->ws[wake_index]; 57088459642SOmar Sandoval 57188459642SOmar Sandoval if (waitqueue_active(&ws->wait)) { 57241723288SPavel Begunkov if (wake_index != atomic_read(&sbq->wake_index)) 57341723288SPavel Begunkov atomic_set(&sbq->wake_index, wake_index); 57488459642SOmar Sandoval return ws; 57588459642SOmar Sandoval } 57688459642SOmar Sandoval 57788459642SOmar Sandoval wake_index = sbq_index_inc(wake_index); 57888459642SOmar Sandoval } 57988459642SOmar Sandoval 58088459642SOmar Sandoval return NULL; 58188459642SOmar Sandoval } 58288459642SOmar Sandoval 583c854ab57SJens Axboe static bool __sbq_wake_up(struct sbitmap_queue *sbq) 58488459642SOmar Sandoval { 58588459642SOmar Sandoval struct sbq_wait_state *ws; 5866c0ca7aeSOmar Sandoval unsigned int wake_batch; 58788459642SOmar Sandoval int wait_cnt; 58888459642SOmar Sandoval 58988459642SOmar Sandoval ws = sbq_wake_ptr(sbq); 59088459642SOmar Sandoval if (!ws) 591c854ab57SJens Axboe return false; 59288459642SOmar Sandoval 59388459642SOmar Sandoval wait_cnt = atomic_dec_return(&ws->wait_cnt); 5946c0ca7aeSOmar Sandoval if (wait_cnt <= 0) { 595c854ab57SJens Axboe int ret; 596c854ab57SJens Axboe 5976c0ca7aeSOmar Sandoval wake_batch = READ_ONCE(sbq->wake_batch); 598c854ab57SJens Axboe 5996c0ca7aeSOmar Sandoval /* 6006c0ca7aeSOmar Sandoval * Pairs with the memory barrier in sbitmap_queue_resize() to 6016c0ca7aeSOmar Sandoval * ensure that we see the batch size update before the wait 6026c0ca7aeSOmar Sandoval * count is reset. 6036c0ca7aeSOmar Sandoval */ 6046c0ca7aeSOmar Sandoval smp_mb__before_atomic(); 605c854ab57SJens Axboe 6066c0ca7aeSOmar Sandoval /* 607c854ab57SJens Axboe * For concurrent callers of this, the one that failed the 608c854ab57SJens Axboe * atomic_cmpxhcg() race should call this function again 609c854ab57SJens Axboe * to wakeup a new batch on a different 'ws'. 6106c0ca7aeSOmar Sandoval */ 611c854ab57SJens Axboe ret = atomic_cmpxchg(&ws->wait_cnt, wait_cnt, wake_batch); 612c854ab57SJens Axboe if (ret == wait_cnt) { 61388459642SOmar Sandoval sbq_index_atomic_inc(&sbq->wake_index); 6144e5dff41SJens Axboe wake_up_nr(&ws->wait, wake_batch); 615c854ab57SJens Axboe return false; 61688459642SOmar Sandoval } 617c854ab57SJens Axboe 618c854ab57SJens Axboe return true; 619c854ab57SJens Axboe } 620c854ab57SJens Axboe 621c854ab57SJens Axboe return false; 622c854ab57SJens Axboe } 623c854ab57SJens Axboe 624e6fc4649SMing Lei void sbitmap_queue_wake_up(struct sbitmap_queue *sbq) 625c854ab57SJens Axboe { 626c854ab57SJens Axboe while (__sbq_wake_up(sbq)) 627c854ab57SJens Axboe ; 62888459642SOmar Sandoval } 629e6fc4649SMing Lei EXPORT_SYMBOL_GPL(sbitmap_queue_wake_up); 63088459642SOmar Sandoval 63140aabb67SOmar Sandoval void sbitmap_queue_clear(struct sbitmap_queue *sbq, unsigned int nr, 632f4a644dbSOmar Sandoval unsigned int cpu) 63388459642SOmar Sandoval { 634e6d1fa58SMing Lei /* 635e6d1fa58SMing Lei * Once the clear bit is set, the bit may be allocated out. 636e6d1fa58SMing Lei * 6379dbbc3b9SZhen Lei * Orders READ/WRITE on the associated instance(such as request 638e6d1fa58SMing Lei * of blk_mq) by this bit for avoiding race with re-allocation, 639e6d1fa58SMing Lei * and its pair is the memory barrier implied in __sbitmap_get_word. 640e6d1fa58SMing Lei * 641e6d1fa58SMing Lei * One invariant is that the clear bit has to be zero when the bit 642e6d1fa58SMing Lei * is in use. 643e6d1fa58SMing Lei */ 644e6d1fa58SMing Lei smp_mb__before_atomic(); 645ea86ea2cSJens Axboe sbitmap_deferred_clear_bit(&sbq->sb, nr); 646ea86ea2cSJens Axboe 647e6fc4649SMing Lei /* 648e6fc4649SMing Lei * Pairs with the memory barrier in set_current_state() to ensure the 649e6fc4649SMing Lei * proper ordering of clear_bit_unlock()/waitqueue_active() in the waker 650e6fc4649SMing Lei * and test_and_set_bit_lock()/prepare_to_wait()/finish_wait() in the 651e6fc4649SMing Lei * waiter. See the comment on waitqueue_active(). 652e6fc4649SMing Lei */ 653e6fc4649SMing Lei smp_mb__after_atomic(); 654e6fc4649SMing Lei sbitmap_queue_wake_up(sbq); 655e6fc4649SMing Lei 656efe1f3a1SMing Lei if (likely(!sbq->sb.round_robin && nr < sbq->sb.depth)) 657c548e62bSMing Lei *per_cpu_ptr(sbq->sb.alloc_hint, cpu) = nr; 65888459642SOmar Sandoval } 65988459642SOmar Sandoval EXPORT_SYMBOL_GPL(sbitmap_queue_clear); 66088459642SOmar Sandoval 66188459642SOmar Sandoval void sbitmap_queue_wake_all(struct sbitmap_queue *sbq) 66288459642SOmar Sandoval { 66388459642SOmar Sandoval int i, wake_index; 66488459642SOmar Sandoval 66588459642SOmar Sandoval /* 666f66227deSOmar Sandoval * Pairs with the memory barrier in set_current_state() like in 667e6fc4649SMing Lei * sbitmap_queue_wake_up(). 66888459642SOmar Sandoval */ 66988459642SOmar Sandoval smp_mb(); 67088459642SOmar Sandoval wake_index = atomic_read(&sbq->wake_index); 67188459642SOmar Sandoval for (i = 0; i < SBQ_WAIT_QUEUES; i++) { 67288459642SOmar Sandoval struct sbq_wait_state *ws = &sbq->ws[wake_index]; 67388459642SOmar Sandoval 67488459642SOmar Sandoval if (waitqueue_active(&ws->wait)) 67588459642SOmar Sandoval wake_up(&ws->wait); 67688459642SOmar Sandoval 67788459642SOmar Sandoval wake_index = sbq_index_inc(wake_index); 67888459642SOmar Sandoval } 67988459642SOmar Sandoval } 68088459642SOmar Sandoval EXPORT_SYMBOL_GPL(sbitmap_queue_wake_all); 68124af1ccfSOmar Sandoval 68224af1ccfSOmar Sandoval void sbitmap_queue_show(struct sbitmap_queue *sbq, struct seq_file *m) 68324af1ccfSOmar Sandoval { 68424af1ccfSOmar Sandoval bool first; 68524af1ccfSOmar Sandoval int i; 68624af1ccfSOmar Sandoval 68724af1ccfSOmar Sandoval sbitmap_show(&sbq->sb, m); 68824af1ccfSOmar Sandoval 68924af1ccfSOmar Sandoval seq_puts(m, "alloc_hint={"); 69024af1ccfSOmar Sandoval first = true; 69124af1ccfSOmar Sandoval for_each_possible_cpu(i) { 69224af1ccfSOmar Sandoval if (!first) 69324af1ccfSOmar Sandoval seq_puts(m, ", "); 69424af1ccfSOmar Sandoval first = false; 695c548e62bSMing Lei seq_printf(m, "%u", *per_cpu_ptr(sbq->sb.alloc_hint, i)); 69624af1ccfSOmar Sandoval } 69724af1ccfSOmar Sandoval seq_puts(m, "}\n"); 69824af1ccfSOmar Sandoval 69924af1ccfSOmar Sandoval seq_printf(m, "wake_batch=%u\n", sbq->wake_batch); 70024af1ccfSOmar Sandoval seq_printf(m, "wake_index=%d\n", atomic_read(&sbq->wake_index)); 7015d2ee712SJens Axboe seq_printf(m, "ws_active=%d\n", atomic_read(&sbq->ws_active)); 70224af1ccfSOmar Sandoval 70324af1ccfSOmar Sandoval seq_puts(m, "ws={\n"); 70424af1ccfSOmar Sandoval for (i = 0; i < SBQ_WAIT_QUEUES; i++) { 70524af1ccfSOmar Sandoval struct sbq_wait_state *ws = &sbq->ws[i]; 70624af1ccfSOmar Sandoval 70724af1ccfSOmar Sandoval seq_printf(m, "\t{.wait_cnt=%d, .wait=%s},\n", 70824af1ccfSOmar Sandoval atomic_read(&ws->wait_cnt), 70924af1ccfSOmar Sandoval waitqueue_active(&ws->wait) ? "active" : "inactive"); 71024af1ccfSOmar Sandoval } 71124af1ccfSOmar Sandoval seq_puts(m, "}\n"); 71224af1ccfSOmar Sandoval 713efe1f3a1SMing Lei seq_printf(m, "round_robin=%d\n", sbq->sb.round_robin); 714a3275539SOmar Sandoval seq_printf(m, "min_shallow_depth=%u\n", sbq->min_shallow_depth); 71524af1ccfSOmar Sandoval } 71624af1ccfSOmar Sandoval EXPORT_SYMBOL_GPL(sbitmap_queue_show); 7175d2ee712SJens Axboe 7189f6b7ef6SJens Axboe void sbitmap_add_wait_queue(struct sbitmap_queue *sbq, 7199f6b7ef6SJens Axboe struct sbq_wait_state *ws, 7209f6b7ef6SJens Axboe struct sbq_wait *sbq_wait) 7219f6b7ef6SJens Axboe { 7229f6b7ef6SJens Axboe if (!sbq_wait->sbq) { 7239f6b7ef6SJens Axboe sbq_wait->sbq = sbq; 7249f6b7ef6SJens Axboe atomic_inc(&sbq->ws_active); 7259f6b7ef6SJens Axboe add_wait_queue(&ws->wait, &sbq_wait->wait); 7269f6b7ef6SJens Axboe } 727df034c93SDavid Jeffery } 7289f6b7ef6SJens Axboe EXPORT_SYMBOL_GPL(sbitmap_add_wait_queue); 7299f6b7ef6SJens Axboe 7309f6b7ef6SJens Axboe void sbitmap_del_wait_queue(struct sbq_wait *sbq_wait) 7319f6b7ef6SJens Axboe { 7329f6b7ef6SJens Axboe list_del_init(&sbq_wait->wait.entry); 7339f6b7ef6SJens Axboe if (sbq_wait->sbq) { 7349f6b7ef6SJens Axboe atomic_dec(&sbq_wait->sbq->ws_active); 7359f6b7ef6SJens Axboe sbq_wait->sbq = NULL; 7369f6b7ef6SJens Axboe } 7379f6b7ef6SJens Axboe } 7389f6b7ef6SJens Axboe EXPORT_SYMBOL_GPL(sbitmap_del_wait_queue); 7399f6b7ef6SJens Axboe 7405d2ee712SJens Axboe void sbitmap_prepare_to_wait(struct sbitmap_queue *sbq, 7415d2ee712SJens Axboe struct sbq_wait_state *ws, 7425d2ee712SJens Axboe struct sbq_wait *sbq_wait, int state) 7435d2ee712SJens Axboe { 7449f6b7ef6SJens Axboe if (!sbq_wait->sbq) { 7455d2ee712SJens Axboe atomic_inc(&sbq->ws_active); 7469f6b7ef6SJens Axboe sbq_wait->sbq = sbq; 7475d2ee712SJens Axboe } 7485d2ee712SJens Axboe prepare_to_wait_exclusive(&ws->wait, &sbq_wait->wait, state); 7495d2ee712SJens Axboe } 7505d2ee712SJens Axboe EXPORT_SYMBOL_GPL(sbitmap_prepare_to_wait); 7515d2ee712SJens Axboe 7525d2ee712SJens Axboe void sbitmap_finish_wait(struct sbitmap_queue *sbq, struct sbq_wait_state *ws, 7535d2ee712SJens Axboe struct sbq_wait *sbq_wait) 7545d2ee712SJens Axboe { 7555d2ee712SJens Axboe finish_wait(&ws->wait, &sbq_wait->wait); 7569f6b7ef6SJens Axboe if (sbq_wait->sbq) { 7575d2ee712SJens Axboe atomic_dec(&sbq->ws_active); 7589f6b7ef6SJens Axboe sbq_wait->sbq = NULL; 7595d2ee712SJens Axboe } 7605d2ee712SJens Axboe } 7615d2ee712SJens Axboe EXPORT_SYMBOL_GPL(sbitmap_finish_wait); 762