188459642SOmar Sandoval /* 288459642SOmar Sandoval * Copyright (C) 2016 Facebook 388459642SOmar Sandoval * Copyright (C) 2013-2014 Jens Axboe 488459642SOmar Sandoval * 588459642SOmar Sandoval * This program is free software; you can redistribute it and/or 688459642SOmar Sandoval * modify it under the terms of the GNU General Public 788459642SOmar Sandoval * License v2 as published by the Free Software Foundation. 888459642SOmar Sandoval * 988459642SOmar Sandoval * This program is distributed in the hope that it will be useful, 1088459642SOmar Sandoval * but WITHOUT ANY WARRANTY; without even the implied warranty of 1188459642SOmar Sandoval * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 1288459642SOmar Sandoval * General Public License for more details. 1388459642SOmar Sandoval * 1488459642SOmar Sandoval * You should have received a copy of the GNU General Public License 1588459642SOmar Sandoval * along with this program. If not, see <https://www.gnu.org/licenses/>. 1688459642SOmar Sandoval */ 1788459642SOmar Sandoval 18af8601adSIngo Molnar #include <linux/sched.h> 1998d95416SOmar Sandoval #include <linux/random.h> 2088459642SOmar Sandoval #include <linux/sbitmap.h> 2124af1ccfSOmar Sandoval #include <linux/seq_file.h> 2288459642SOmar Sandoval 23b2dbff1bSJens Axboe /* 24b2dbff1bSJens Axboe * See if we have deferred clears that we can batch move 25b2dbff1bSJens Axboe */ 26b2dbff1bSJens Axboe static inline bool sbitmap_deferred_clear(struct sbitmap *sb, int index) 27b2dbff1bSJens Axboe { 28b2dbff1bSJens Axboe unsigned long mask, val; 29b2dbff1bSJens Axboe unsigned long __maybe_unused flags; 30b2dbff1bSJens Axboe bool ret = false; 31b2dbff1bSJens Axboe 32b2dbff1bSJens Axboe /* Silence bogus lockdep warning */ 33b2dbff1bSJens Axboe #if defined(CONFIG_LOCKDEP) 34b2dbff1bSJens Axboe local_irq_save(flags); 35b2dbff1bSJens Axboe #endif 36b2dbff1bSJens Axboe spin_lock(&sb->map[index].swap_lock); 37b2dbff1bSJens Axboe 38b2dbff1bSJens Axboe if (!sb->map[index].cleared) 39b2dbff1bSJens Axboe goto out_unlock; 40b2dbff1bSJens Axboe 41b2dbff1bSJens Axboe /* 42b2dbff1bSJens Axboe * First get a stable cleared mask, setting the old mask to 0. 43b2dbff1bSJens Axboe */ 44b2dbff1bSJens Axboe do { 45b2dbff1bSJens Axboe mask = sb->map[index].cleared; 46b2dbff1bSJens Axboe } while (cmpxchg(&sb->map[index].cleared, mask, 0) != mask); 47b2dbff1bSJens Axboe 48b2dbff1bSJens Axboe /* 49b2dbff1bSJens Axboe * Now clear the masked bits in our free word 50b2dbff1bSJens Axboe */ 51b2dbff1bSJens Axboe do { 52b2dbff1bSJens Axboe val = sb->map[index].word; 53b2dbff1bSJens Axboe } while (cmpxchg(&sb->map[index].word, val, val & ~mask) != val); 54b2dbff1bSJens Axboe 55b2dbff1bSJens Axboe ret = true; 56b2dbff1bSJens Axboe out_unlock: 57b2dbff1bSJens Axboe spin_unlock(&sb->map[index].swap_lock); 58b2dbff1bSJens Axboe #if defined(CONFIG_LOCKDEP) 59b2dbff1bSJens Axboe local_irq_restore(flags); 60b2dbff1bSJens Axboe #endif 61b2dbff1bSJens Axboe return ret; 62b2dbff1bSJens Axboe } 63b2dbff1bSJens Axboe 6488459642SOmar Sandoval int sbitmap_init_node(struct sbitmap *sb, unsigned int depth, int shift, 6588459642SOmar Sandoval gfp_t flags, int node) 6688459642SOmar Sandoval { 6788459642SOmar Sandoval unsigned int bits_per_word; 6888459642SOmar Sandoval unsigned int i; 6988459642SOmar Sandoval 7088459642SOmar Sandoval if (shift < 0) { 7188459642SOmar Sandoval shift = ilog2(BITS_PER_LONG); 7288459642SOmar Sandoval /* 7388459642SOmar Sandoval * If the bitmap is small, shrink the number of bits per word so 7488459642SOmar Sandoval * we spread over a few cachelines, at least. If less than 4 7588459642SOmar Sandoval * bits, just forget about it, it's not going to work optimally 7688459642SOmar Sandoval * anyway. 7788459642SOmar Sandoval */ 7888459642SOmar Sandoval if (depth >= 4) { 7988459642SOmar Sandoval while ((4U << shift) > depth) 8088459642SOmar Sandoval shift--; 8188459642SOmar Sandoval } 8288459642SOmar Sandoval } 8388459642SOmar Sandoval bits_per_word = 1U << shift; 8488459642SOmar Sandoval if (bits_per_word > BITS_PER_LONG) 8588459642SOmar Sandoval return -EINVAL; 8688459642SOmar Sandoval 8788459642SOmar Sandoval sb->shift = shift; 8888459642SOmar Sandoval sb->depth = depth; 8988459642SOmar Sandoval sb->map_nr = DIV_ROUND_UP(sb->depth, bits_per_word); 9088459642SOmar Sandoval 9188459642SOmar Sandoval if (depth == 0) { 9288459642SOmar Sandoval sb->map = NULL; 9388459642SOmar Sandoval return 0; 9488459642SOmar Sandoval } 9588459642SOmar Sandoval 96590b5b7dSKees Cook sb->map = kcalloc_node(sb->map_nr, sizeof(*sb->map), flags, node); 9788459642SOmar Sandoval if (!sb->map) 9888459642SOmar Sandoval return -ENOMEM; 9988459642SOmar Sandoval 10088459642SOmar Sandoval for (i = 0; i < sb->map_nr; i++) { 10188459642SOmar Sandoval sb->map[i].depth = min(depth, bits_per_word); 10288459642SOmar Sandoval depth -= sb->map[i].depth; 103ea86ea2cSJens Axboe spin_lock_init(&sb->map[i].swap_lock); 10488459642SOmar Sandoval } 10588459642SOmar Sandoval return 0; 10688459642SOmar Sandoval } 10788459642SOmar Sandoval EXPORT_SYMBOL_GPL(sbitmap_init_node); 10888459642SOmar Sandoval 10988459642SOmar Sandoval void sbitmap_resize(struct sbitmap *sb, unsigned int depth) 11088459642SOmar Sandoval { 11188459642SOmar Sandoval unsigned int bits_per_word = 1U << sb->shift; 11288459642SOmar Sandoval unsigned int i; 11388459642SOmar Sandoval 114b2dbff1bSJens Axboe for (i = 0; i < sb->map_nr; i++) 115b2dbff1bSJens Axboe sbitmap_deferred_clear(sb, i); 116b2dbff1bSJens Axboe 11788459642SOmar Sandoval sb->depth = depth; 11888459642SOmar Sandoval sb->map_nr = DIV_ROUND_UP(sb->depth, bits_per_word); 11988459642SOmar Sandoval 12088459642SOmar Sandoval for (i = 0; i < sb->map_nr; i++) { 12188459642SOmar Sandoval sb->map[i].depth = min(depth, bits_per_word); 12288459642SOmar Sandoval depth -= sb->map[i].depth; 12388459642SOmar Sandoval } 12488459642SOmar Sandoval } 12588459642SOmar Sandoval EXPORT_SYMBOL_GPL(sbitmap_resize); 12688459642SOmar Sandoval 127c05e6673SOmar Sandoval static int __sbitmap_get_word(unsigned long *word, unsigned long depth, 128c05e6673SOmar Sandoval unsigned int hint, bool wrap) 12988459642SOmar Sandoval { 13088459642SOmar Sandoval unsigned int orig_hint = hint; 13188459642SOmar Sandoval int nr; 13288459642SOmar Sandoval 13388459642SOmar Sandoval while (1) { 134c05e6673SOmar Sandoval nr = find_next_zero_bit(word, depth, hint); 135c05e6673SOmar Sandoval if (unlikely(nr >= depth)) { 13688459642SOmar Sandoval /* 13788459642SOmar Sandoval * We started with an offset, and we didn't reset the 13888459642SOmar Sandoval * offset to 0 in a failure case, so start from 0 to 13988459642SOmar Sandoval * exhaust the map. 14088459642SOmar Sandoval */ 14188459642SOmar Sandoval if (orig_hint && hint && wrap) { 14288459642SOmar Sandoval hint = orig_hint = 0; 14388459642SOmar Sandoval continue; 14488459642SOmar Sandoval } 14588459642SOmar Sandoval return -1; 14688459642SOmar Sandoval } 14788459642SOmar Sandoval 1484ace53f1SOmar Sandoval if (!test_and_set_bit_lock(nr, word)) 14988459642SOmar Sandoval break; 15088459642SOmar Sandoval 15188459642SOmar Sandoval hint = nr + 1; 152c05e6673SOmar Sandoval if (hint >= depth - 1) 15388459642SOmar Sandoval hint = 0; 15488459642SOmar Sandoval } 15588459642SOmar Sandoval 15688459642SOmar Sandoval return nr; 15788459642SOmar Sandoval } 15888459642SOmar Sandoval 159ea86ea2cSJens Axboe static int sbitmap_find_bit_in_index(struct sbitmap *sb, int index, 160ea86ea2cSJens Axboe unsigned int alloc_hint, bool round_robin) 161ea86ea2cSJens Axboe { 162ea86ea2cSJens Axboe int nr; 163ea86ea2cSJens Axboe 164ea86ea2cSJens Axboe do { 165ea86ea2cSJens Axboe nr = __sbitmap_get_word(&sb->map[index].word, 166ea86ea2cSJens Axboe sb->map[index].depth, alloc_hint, 167ea86ea2cSJens Axboe !round_robin); 168ea86ea2cSJens Axboe if (nr != -1) 169ea86ea2cSJens Axboe break; 170ea86ea2cSJens Axboe if (!sbitmap_deferred_clear(sb, index)) 171ea86ea2cSJens Axboe break; 172ea86ea2cSJens Axboe } while (1); 173ea86ea2cSJens Axboe 174ea86ea2cSJens Axboe return nr; 175ea86ea2cSJens Axboe } 176ea86ea2cSJens Axboe 17788459642SOmar Sandoval int sbitmap_get(struct sbitmap *sb, unsigned int alloc_hint, bool round_robin) 17888459642SOmar Sandoval { 17988459642SOmar Sandoval unsigned int i, index; 18088459642SOmar Sandoval int nr = -1; 18188459642SOmar Sandoval 18288459642SOmar Sandoval index = SB_NR_TO_INDEX(sb, alloc_hint); 18388459642SOmar Sandoval 18427fae429SJens Axboe /* 18527fae429SJens Axboe * Unless we're doing round robin tag allocation, just use the 18627fae429SJens Axboe * alloc_hint to find the right word index. No point in looping 18727fae429SJens Axboe * twice in find_next_zero_bit() for that case. 18827fae429SJens Axboe */ 18927fae429SJens Axboe if (round_robin) 19027fae429SJens Axboe alloc_hint = SB_NR_TO_BIT(sb, alloc_hint); 19127fae429SJens Axboe else 19227fae429SJens Axboe alloc_hint = 0; 19327fae429SJens Axboe 19488459642SOmar Sandoval for (i = 0; i < sb->map_nr; i++) { 195ea86ea2cSJens Axboe nr = sbitmap_find_bit_in_index(sb, index, alloc_hint, 196ea86ea2cSJens Axboe round_robin); 19788459642SOmar Sandoval if (nr != -1) { 19888459642SOmar Sandoval nr += index << sb->shift; 19988459642SOmar Sandoval break; 20088459642SOmar Sandoval } 20188459642SOmar Sandoval 20288459642SOmar Sandoval /* Jump to next index. */ 20388459642SOmar Sandoval alloc_hint = 0; 20427fae429SJens Axboe if (++index >= sb->map_nr) 20527fae429SJens Axboe index = 0; 20688459642SOmar Sandoval } 20788459642SOmar Sandoval 20888459642SOmar Sandoval return nr; 20988459642SOmar Sandoval } 21088459642SOmar Sandoval EXPORT_SYMBOL_GPL(sbitmap_get); 21188459642SOmar Sandoval 212c05e6673SOmar Sandoval int sbitmap_get_shallow(struct sbitmap *sb, unsigned int alloc_hint, 213c05e6673SOmar Sandoval unsigned long shallow_depth) 214c05e6673SOmar Sandoval { 215c05e6673SOmar Sandoval unsigned int i, index; 216c05e6673SOmar Sandoval int nr = -1; 217c05e6673SOmar Sandoval 218c05e6673SOmar Sandoval index = SB_NR_TO_INDEX(sb, alloc_hint); 219c05e6673SOmar Sandoval 220c05e6673SOmar Sandoval for (i = 0; i < sb->map_nr; i++) { 221b2dbff1bSJens Axboe again: 222c05e6673SOmar Sandoval nr = __sbitmap_get_word(&sb->map[index].word, 223c05e6673SOmar Sandoval min(sb->map[index].depth, shallow_depth), 224c05e6673SOmar Sandoval SB_NR_TO_BIT(sb, alloc_hint), true); 225c05e6673SOmar Sandoval if (nr != -1) { 226c05e6673SOmar Sandoval nr += index << sb->shift; 227c05e6673SOmar Sandoval break; 228c05e6673SOmar Sandoval } 229c05e6673SOmar Sandoval 230b2dbff1bSJens Axboe if (sbitmap_deferred_clear(sb, index)) 231b2dbff1bSJens Axboe goto again; 232b2dbff1bSJens Axboe 233c05e6673SOmar Sandoval /* Jump to next index. */ 234c05e6673SOmar Sandoval index++; 235c05e6673SOmar Sandoval alloc_hint = index << sb->shift; 236c05e6673SOmar Sandoval 237c05e6673SOmar Sandoval if (index >= sb->map_nr) { 238c05e6673SOmar Sandoval index = 0; 239c05e6673SOmar Sandoval alloc_hint = 0; 240c05e6673SOmar Sandoval } 241c05e6673SOmar Sandoval } 242c05e6673SOmar Sandoval 243c05e6673SOmar Sandoval return nr; 244c05e6673SOmar Sandoval } 245c05e6673SOmar Sandoval EXPORT_SYMBOL_GPL(sbitmap_get_shallow); 246c05e6673SOmar Sandoval 24788459642SOmar Sandoval bool sbitmap_any_bit_set(const struct sbitmap *sb) 24888459642SOmar Sandoval { 24988459642SOmar Sandoval unsigned int i; 25088459642SOmar Sandoval 25188459642SOmar Sandoval for (i = 0; i < sb->map_nr; i++) { 252b2dbff1bSJens Axboe if (sb->map[i].word & ~sb->map[i].cleared) 25388459642SOmar Sandoval return true; 25488459642SOmar Sandoval } 25588459642SOmar Sandoval return false; 25688459642SOmar Sandoval } 25788459642SOmar Sandoval EXPORT_SYMBOL_GPL(sbitmap_any_bit_set); 25888459642SOmar Sandoval 25988459642SOmar Sandoval bool sbitmap_any_bit_clear(const struct sbitmap *sb) 26088459642SOmar Sandoval { 26188459642SOmar Sandoval unsigned int i; 26288459642SOmar Sandoval 26388459642SOmar Sandoval for (i = 0; i < sb->map_nr; i++) { 26488459642SOmar Sandoval const struct sbitmap_word *word = &sb->map[i]; 265b2dbff1bSJens Axboe unsigned long mask = word->word & ~word->cleared; 26688459642SOmar Sandoval unsigned long ret; 26788459642SOmar Sandoval 268b2dbff1bSJens Axboe ret = find_first_zero_bit(&mask, word->depth); 26988459642SOmar Sandoval if (ret < word->depth) 27088459642SOmar Sandoval return true; 27188459642SOmar Sandoval } 27288459642SOmar Sandoval return false; 27388459642SOmar Sandoval } 27488459642SOmar Sandoval EXPORT_SYMBOL_GPL(sbitmap_any_bit_clear); 27588459642SOmar Sandoval 276ea86ea2cSJens Axboe static unsigned int __sbitmap_weight(const struct sbitmap *sb, bool set) 27788459642SOmar Sandoval { 27860658e0dSColin Ian King unsigned int i, weight = 0; 27988459642SOmar Sandoval 28088459642SOmar Sandoval for (i = 0; i < sb->map_nr; i++) { 28188459642SOmar Sandoval const struct sbitmap_word *word = &sb->map[i]; 28288459642SOmar Sandoval 283ea86ea2cSJens Axboe if (set) 28488459642SOmar Sandoval weight += bitmap_weight(&word->word, word->depth); 285ea86ea2cSJens Axboe else 286ea86ea2cSJens Axboe weight += bitmap_weight(&word->cleared, word->depth); 28788459642SOmar Sandoval } 28888459642SOmar Sandoval return weight; 28988459642SOmar Sandoval } 290ea86ea2cSJens Axboe 291ea86ea2cSJens Axboe static unsigned int sbitmap_weight(const struct sbitmap *sb) 292ea86ea2cSJens Axboe { 293ea86ea2cSJens Axboe return __sbitmap_weight(sb, true); 294ea86ea2cSJens Axboe } 295ea86ea2cSJens Axboe 296ea86ea2cSJens Axboe static unsigned int sbitmap_cleared(const struct sbitmap *sb) 297ea86ea2cSJens Axboe { 298ea86ea2cSJens Axboe return __sbitmap_weight(sb, false); 299ea86ea2cSJens Axboe } 30088459642SOmar Sandoval 30124af1ccfSOmar Sandoval void sbitmap_show(struct sbitmap *sb, struct seq_file *m) 30224af1ccfSOmar Sandoval { 30324af1ccfSOmar Sandoval seq_printf(m, "depth=%u\n", sb->depth); 304ea86ea2cSJens Axboe seq_printf(m, "busy=%u\n", sbitmap_weight(sb) - sbitmap_cleared(sb)); 305ea86ea2cSJens Axboe seq_printf(m, "cleared=%u\n", sbitmap_cleared(sb)); 30624af1ccfSOmar Sandoval seq_printf(m, "bits_per_word=%u\n", 1U << sb->shift); 30724af1ccfSOmar Sandoval seq_printf(m, "map_nr=%u\n", sb->map_nr); 30824af1ccfSOmar Sandoval } 30924af1ccfSOmar Sandoval EXPORT_SYMBOL_GPL(sbitmap_show); 31024af1ccfSOmar Sandoval 31124af1ccfSOmar Sandoval static inline void emit_byte(struct seq_file *m, unsigned int offset, u8 byte) 31224af1ccfSOmar Sandoval { 31324af1ccfSOmar Sandoval if ((offset & 0xf) == 0) { 31424af1ccfSOmar Sandoval if (offset != 0) 31524af1ccfSOmar Sandoval seq_putc(m, '\n'); 31624af1ccfSOmar Sandoval seq_printf(m, "%08x:", offset); 31724af1ccfSOmar Sandoval } 31824af1ccfSOmar Sandoval if ((offset & 0x1) == 0) 31924af1ccfSOmar Sandoval seq_putc(m, ' '); 32024af1ccfSOmar Sandoval seq_printf(m, "%02x", byte); 32124af1ccfSOmar Sandoval } 32224af1ccfSOmar Sandoval 32324af1ccfSOmar Sandoval void sbitmap_bitmap_show(struct sbitmap *sb, struct seq_file *m) 32424af1ccfSOmar Sandoval { 32524af1ccfSOmar Sandoval u8 byte = 0; 32624af1ccfSOmar Sandoval unsigned int byte_bits = 0; 32724af1ccfSOmar Sandoval unsigned int offset = 0; 32824af1ccfSOmar Sandoval int i; 32924af1ccfSOmar Sandoval 33024af1ccfSOmar Sandoval for (i = 0; i < sb->map_nr; i++) { 33124af1ccfSOmar Sandoval unsigned long word = READ_ONCE(sb->map[i].word); 33224af1ccfSOmar Sandoval unsigned int word_bits = READ_ONCE(sb->map[i].depth); 33324af1ccfSOmar Sandoval 33424af1ccfSOmar Sandoval while (word_bits > 0) { 33524af1ccfSOmar Sandoval unsigned int bits = min(8 - byte_bits, word_bits); 33624af1ccfSOmar Sandoval 33724af1ccfSOmar Sandoval byte |= (word & (BIT(bits) - 1)) << byte_bits; 33824af1ccfSOmar Sandoval byte_bits += bits; 33924af1ccfSOmar Sandoval if (byte_bits == 8) { 34024af1ccfSOmar Sandoval emit_byte(m, offset, byte); 34124af1ccfSOmar Sandoval byte = 0; 34224af1ccfSOmar Sandoval byte_bits = 0; 34324af1ccfSOmar Sandoval offset++; 34424af1ccfSOmar Sandoval } 34524af1ccfSOmar Sandoval word >>= bits; 34624af1ccfSOmar Sandoval word_bits -= bits; 34724af1ccfSOmar Sandoval } 34824af1ccfSOmar Sandoval } 34924af1ccfSOmar Sandoval if (byte_bits) { 35024af1ccfSOmar Sandoval emit_byte(m, offset, byte); 35124af1ccfSOmar Sandoval offset++; 35224af1ccfSOmar Sandoval } 35324af1ccfSOmar Sandoval if (offset) 35424af1ccfSOmar Sandoval seq_putc(m, '\n'); 35524af1ccfSOmar Sandoval } 35624af1ccfSOmar Sandoval EXPORT_SYMBOL_GPL(sbitmap_bitmap_show); 35724af1ccfSOmar Sandoval 358a3275539SOmar Sandoval static unsigned int sbq_calc_wake_batch(struct sbitmap_queue *sbq, 359a3275539SOmar Sandoval unsigned int depth) 36088459642SOmar Sandoval { 36188459642SOmar Sandoval unsigned int wake_batch; 362a3275539SOmar Sandoval unsigned int shallow_depth; 36388459642SOmar Sandoval 36488459642SOmar Sandoval /* 36588459642SOmar Sandoval * For each batch, we wake up one queue. We need to make sure that our 366a3275539SOmar Sandoval * batch size is small enough that the full depth of the bitmap, 367a3275539SOmar Sandoval * potentially limited by a shallow depth, is enough to wake up all of 368a3275539SOmar Sandoval * the queues. 369a3275539SOmar Sandoval * 370a3275539SOmar Sandoval * Each full word of the bitmap has bits_per_word bits, and there might 371a3275539SOmar Sandoval * be a partial word. There are depth / bits_per_word full words and 372a3275539SOmar Sandoval * depth % bits_per_word bits left over. In bitwise arithmetic: 373a3275539SOmar Sandoval * 374a3275539SOmar Sandoval * bits_per_word = 1 << shift 375a3275539SOmar Sandoval * depth / bits_per_word = depth >> shift 376a3275539SOmar Sandoval * depth % bits_per_word = depth & ((1 << shift) - 1) 377a3275539SOmar Sandoval * 378a3275539SOmar Sandoval * Each word can be limited to sbq->min_shallow_depth bits. 37988459642SOmar Sandoval */ 380a3275539SOmar Sandoval shallow_depth = min(1U << sbq->sb.shift, sbq->min_shallow_depth); 381a3275539SOmar Sandoval depth = ((depth >> sbq->sb.shift) * shallow_depth + 382a3275539SOmar Sandoval min(depth & ((1U << sbq->sb.shift) - 1), shallow_depth)); 383a3275539SOmar Sandoval wake_batch = clamp_t(unsigned int, depth / SBQ_WAIT_QUEUES, 1, 384a3275539SOmar Sandoval SBQ_WAKE_BATCH); 38588459642SOmar Sandoval 38688459642SOmar Sandoval return wake_batch; 38788459642SOmar Sandoval } 38888459642SOmar Sandoval 38988459642SOmar Sandoval int sbitmap_queue_init_node(struct sbitmap_queue *sbq, unsigned int depth, 390f4a644dbSOmar Sandoval int shift, bool round_robin, gfp_t flags, int node) 39188459642SOmar Sandoval { 39288459642SOmar Sandoval int ret; 39388459642SOmar Sandoval int i; 39488459642SOmar Sandoval 39588459642SOmar Sandoval ret = sbitmap_init_node(&sbq->sb, depth, shift, flags, node); 39688459642SOmar Sandoval if (ret) 39788459642SOmar Sandoval return ret; 39888459642SOmar Sandoval 39940aabb67SOmar Sandoval sbq->alloc_hint = alloc_percpu_gfp(unsigned int, flags); 40040aabb67SOmar Sandoval if (!sbq->alloc_hint) { 40140aabb67SOmar Sandoval sbitmap_free(&sbq->sb); 40240aabb67SOmar Sandoval return -ENOMEM; 40340aabb67SOmar Sandoval } 40440aabb67SOmar Sandoval 40598d95416SOmar Sandoval if (depth && !round_robin) { 40698d95416SOmar Sandoval for_each_possible_cpu(i) 40798d95416SOmar Sandoval *per_cpu_ptr(sbq->alloc_hint, i) = prandom_u32() % depth; 40898d95416SOmar Sandoval } 40998d95416SOmar Sandoval 410a3275539SOmar Sandoval sbq->min_shallow_depth = UINT_MAX; 411a3275539SOmar Sandoval sbq->wake_batch = sbq_calc_wake_batch(sbq, depth); 41288459642SOmar Sandoval atomic_set(&sbq->wake_index, 0); 4135d2ee712SJens Axboe atomic_set(&sbq->ws_active, 0); 41488459642SOmar Sandoval 41548e28166SOmar Sandoval sbq->ws = kzalloc_node(SBQ_WAIT_QUEUES * sizeof(*sbq->ws), flags, node); 41688459642SOmar Sandoval if (!sbq->ws) { 41740aabb67SOmar Sandoval free_percpu(sbq->alloc_hint); 41888459642SOmar Sandoval sbitmap_free(&sbq->sb); 41988459642SOmar Sandoval return -ENOMEM; 42088459642SOmar Sandoval } 42188459642SOmar Sandoval 42288459642SOmar Sandoval for (i = 0; i < SBQ_WAIT_QUEUES; i++) { 42388459642SOmar Sandoval init_waitqueue_head(&sbq->ws[i].wait); 42488459642SOmar Sandoval atomic_set(&sbq->ws[i].wait_cnt, sbq->wake_batch); 42588459642SOmar Sandoval } 426f4a644dbSOmar Sandoval 427f4a644dbSOmar Sandoval sbq->round_robin = round_robin; 42888459642SOmar Sandoval return 0; 42988459642SOmar Sandoval } 43088459642SOmar Sandoval EXPORT_SYMBOL_GPL(sbitmap_queue_init_node); 43188459642SOmar Sandoval 432a3275539SOmar Sandoval static void sbitmap_queue_update_wake_batch(struct sbitmap_queue *sbq, 433a3275539SOmar Sandoval unsigned int depth) 43488459642SOmar Sandoval { 435a3275539SOmar Sandoval unsigned int wake_batch = sbq_calc_wake_batch(sbq, depth); 4366c0ca7aeSOmar Sandoval int i; 4376c0ca7aeSOmar Sandoval 4386c0ca7aeSOmar Sandoval if (sbq->wake_batch != wake_batch) { 4396c0ca7aeSOmar Sandoval WRITE_ONCE(sbq->wake_batch, wake_batch); 4406c0ca7aeSOmar Sandoval /* 441e6fc4649SMing Lei * Pairs with the memory barrier in sbitmap_queue_wake_up() 442e6fc4649SMing Lei * to ensure that the batch size is updated before the wait 443e6fc4649SMing Lei * counts. 4446c0ca7aeSOmar Sandoval */ 4456c0ca7aeSOmar Sandoval smp_mb__before_atomic(); 4466c0ca7aeSOmar Sandoval for (i = 0; i < SBQ_WAIT_QUEUES; i++) 4476c0ca7aeSOmar Sandoval atomic_set(&sbq->ws[i].wait_cnt, 1); 4486c0ca7aeSOmar Sandoval } 449a3275539SOmar Sandoval } 450a3275539SOmar Sandoval 451a3275539SOmar Sandoval void sbitmap_queue_resize(struct sbitmap_queue *sbq, unsigned int depth) 452a3275539SOmar Sandoval { 453a3275539SOmar Sandoval sbitmap_queue_update_wake_batch(sbq, depth); 45488459642SOmar Sandoval sbitmap_resize(&sbq->sb, depth); 45588459642SOmar Sandoval } 45688459642SOmar Sandoval EXPORT_SYMBOL_GPL(sbitmap_queue_resize); 45788459642SOmar Sandoval 458f4a644dbSOmar Sandoval int __sbitmap_queue_get(struct sbitmap_queue *sbq) 45940aabb67SOmar Sandoval { 46005fd095dSOmar Sandoval unsigned int hint, depth; 46140aabb67SOmar Sandoval int nr; 46240aabb67SOmar Sandoval 46340aabb67SOmar Sandoval hint = this_cpu_read(*sbq->alloc_hint); 46405fd095dSOmar Sandoval depth = READ_ONCE(sbq->sb.depth); 46505fd095dSOmar Sandoval if (unlikely(hint >= depth)) { 46605fd095dSOmar Sandoval hint = depth ? prandom_u32() % depth : 0; 46705fd095dSOmar Sandoval this_cpu_write(*sbq->alloc_hint, hint); 46805fd095dSOmar Sandoval } 469f4a644dbSOmar Sandoval nr = sbitmap_get(&sbq->sb, hint, sbq->round_robin); 47040aabb67SOmar Sandoval 47140aabb67SOmar Sandoval if (nr == -1) { 47240aabb67SOmar Sandoval /* If the map is full, a hint won't do us much good. */ 47340aabb67SOmar Sandoval this_cpu_write(*sbq->alloc_hint, 0); 474f4a644dbSOmar Sandoval } else if (nr == hint || unlikely(sbq->round_robin)) { 47540aabb67SOmar Sandoval /* Only update the hint if we used it. */ 47640aabb67SOmar Sandoval hint = nr + 1; 47705fd095dSOmar Sandoval if (hint >= depth - 1) 47840aabb67SOmar Sandoval hint = 0; 47940aabb67SOmar Sandoval this_cpu_write(*sbq->alloc_hint, hint); 48040aabb67SOmar Sandoval } 48140aabb67SOmar Sandoval 48240aabb67SOmar Sandoval return nr; 48340aabb67SOmar Sandoval } 48440aabb67SOmar Sandoval EXPORT_SYMBOL_GPL(__sbitmap_queue_get); 48540aabb67SOmar Sandoval 486c05e6673SOmar Sandoval int __sbitmap_queue_get_shallow(struct sbitmap_queue *sbq, 487c05e6673SOmar Sandoval unsigned int shallow_depth) 488c05e6673SOmar Sandoval { 489c05e6673SOmar Sandoval unsigned int hint, depth; 490c05e6673SOmar Sandoval int nr; 491c05e6673SOmar Sandoval 49261445b56SOmar Sandoval WARN_ON_ONCE(shallow_depth < sbq->min_shallow_depth); 49361445b56SOmar Sandoval 494c05e6673SOmar Sandoval hint = this_cpu_read(*sbq->alloc_hint); 495c05e6673SOmar Sandoval depth = READ_ONCE(sbq->sb.depth); 496c05e6673SOmar Sandoval if (unlikely(hint >= depth)) { 497c05e6673SOmar Sandoval hint = depth ? prandom_u32() % depth : 0; 498c05e6673SOmar Sandoval this_cpu_write(*sbq->alloc_hint, hint); 499c05e6673SOmar Sandoval } 500c05e6673SOmar Sandoval nr = sbitmap_get_shallow(&sbq->sb, hint, shallow_depth); 501c05e6673SOmar Sandoval 502c05e6673SOmar Sandoval if (nr == -1) { 503c05e6673SOmar Sandoval /* If the map is full, a hint won't do us much good. */ 504c05e6673SOmar Sandoval this_cpu_write(*sbq->alloc_hint, 0); 505c05e6673SOmar Sandoval } else if (nr == hint || unlikely(sbq->round_robin)) { 506c05e6673SOmar Sandoval /* Only update the hint if we used it. */ 507c05e6673SOmar Sandoval hint = nr + 1; 508c05e6673SOmar Sandoval if (hint >= depth - 1) 509c05e6673SOmar Sandoval hint = 0; 510c05e6673SOmar Sandoval this_cpu_write(*sbq->alloc_hint, hint); 511c05e6673SOmar Sandoval } 512c05e6673SOmar Sandoval 513c05e6673SOmar Sandoval return nr; 514c05e6673SOmar Sandoval } 515c05e6673SOmar Sandoval EXPORT_SYMBOL_GPL(__sbitmap_queue_get_shallow); 516c05e6673SOmar Sandoval 517a3275539SOmar Sandoval void sbitmap_queue_min_shallow_depth(struct sbitmap_queue *sbq, 518a3275539SOmar Sandoval unsigned int min_shallow_depth) 519a3275539SOmar Sandoval { 520a3275539SOmar Sandoval sbq->min_shallow_depth = min_shallow_depth; 521a3275539SOmar Sandoval sbitmap_queue_update_wake_batch(sbq, sbq->sb.depth); 522a3275539SOmar Sandoval } 523a3275539SOmar Sandoval EXPORT_SYMBOL_GPL(sbitmap_queue_min_shallow_depth); 524a3275539SOmar Sandoval 52588459642SOmar Sandoval static struct sbq_wait_state *sbq_wake_ptr(struct sbitmap_queue *sbq) 52688459642SOmar Sandoval { 52788459642SOmar Sandoval int i, wake_index; 52888459642SOmar Sandoval 5295d2ee712SJens Axboe if (!atomic_read(&sbq->ws_active)) 5305d2ee712SJens Axboe return NULL; 5315d2ee712SJens Axboe 53288459642SOmar Sandoval wake_index = atomic_read(&sbq->wake_index); 53388459642SOmar Sandoval for (i = 0; i < SBQ_WAIT_QUEUES; i++) { 53488459642SOmar Sandoval struct sbq_wait_state *ws = &sbq->ws[wake_index]; 53588459642SOmar Sandoval 53688459642SOmar Sandoval if (waitqueue_active(&ws->wait)) { 53788459642SOmar Sandoval int o = atomic_read(&sbq->wake_index); 53888459642SOmar Sandoval 53988459642SOmar Sandoval if (wake_index != o) 54088459642SOmar Sandoval atomic_cmpxchg(&sbq->wake_index, o, wake_index); 54188459642SOmar Sandoval return ws; 54288459642SOmar Sandoval } 54388459642SOmar Sandoval 54488459642SOmar Sandoval wake_index = sbq_index_inc(wake_index); 54588459642SOmar Sandoval } 54688459642SOmar Sandoval 54788459642SOmar Sandoval return NULL; 54888459642SOmar Sandoval } 54988459642SOmar Sandoval 550c854ab57SJens Axboe static bool __sbq_wake_up(struct sbitmap_queue *sbq) 55188459642SOmar Sandoval { 55288459642SOmar Sandoval struct sbq_wait_state *ws; 5536c0ca7aeSOmar Sandoval unsigned int wake_batch; 55488459642SOmar Sandoval int wait_cnt; 55588459642SOmar Sandoval 55688459642SOmar Sandoval ws = sbq_wake_ptr(sbq); 55788459642SOmar Sandoval if (!ws) 558c854ab57SJens Axboe return false; 55988459642SOmar Sandoval 56088459642SOmar Sandoval wait_cnt = atomic_dec_return(&ws->wait_cnt); 5616c0ca7aeSOmar Sandoval if (wait_cnt <= 0) { 562c854ab57SJens Axboe int ret; 563c854ab57SJens Axboe 5646c0ca7aeSOmar Sandoval wake_batch = READ_ONCE(sbq->wake_batch); 565c854ab57SJens Axboe 5666c0ca7aeSOmar Sandoval /* 5676c0ca7aeSOmar Sandoval * Pairs with the memory barrier in sbitmap_queue_resize() to 5686c0ca7aeSOmar Sandoval * ensure that we see the batch size update before the wait 5696c0ca7aeSOmar Sandoval * count is reset. 5706c0ca7aeSOmar Sandoval */ 5716c0ca7aeSOmar Sandoval smp_mb__before_atomic(); 572c854ab57SJens Axboe 5736c0ca7aeSOmar Sandoval /* 574c854ab57SJens Axboe * For concurrent callers of this, the one that failed the 575c854ab57SJens Axboe * atomic_cmpxhcg() race should call this function again 576c854ab57SJens Axboe * to wakeup a new batch on a different 'ws'. 5776c0ca7aeSOmar Sandoval */ 578c854ab57SJens Axboe ret = atomic_cmpxchg(&ws->wait_cnt, wait_cnt, wake_batch); 579c854ab57SJens Axboe if (ret == wait_cnt) { 58088459642SOmar Sandoval sbq_index_atomic_inc(&sbq->wake_index); 5814e5dff41SJens Axboe wake_up_nr(&ws->wait, wake_batch); 582c854ab57SJens Axboe return false; 58388459642SOmar Sandoval } 584c854ab57SJens Axboe 585c854ab57SJens Axboe return true; 586c854ab57SJens Axboe } 587c854ab57SJens Axboe 588c854ab57SJens Axboe return false; 589c854ab57SJens Axboe } 590c854ab57SJens Axboe 591e6fc4649SMing Lei void sbitmap_queue_wake_up(struct sbitmap_queue *sbq) 592c854ab57SJens Axboe { 593c854ab57SJens Axboe while (__sbq_wake_up(sbq)) 594c854ab57SJens Axboe ; 59588459642SOmar Sandoval } 596e6fc4649SMing Lei EXPORT_SYMBOL_GPL(sbitmap_queue_wake_up); 59788459642SOmar Sandoval 59840aabb67SOmar Sandoval void sbitmap_queue_clear(struct sbitmap_queue *sbq, unsigned int nr, 599f4a644dbSOmar Sandoval unsigned int cpu) 60088459642SOmar Sandoval { 601ea86ea2cSJens Axboe sbitmap_deferred_clear_bit(&sbq->sb, nr); 602ea86ea2cSJens Axboe 603e6fc4649SMing Lei /* 604e6fc4649SMing Lei * Pairs with the memory barrier in set_current_state() to ensure the 605e6fc4649SMing Lei * proper ordering of clear_bit_unlock()/waitqueue_active() in the waker 606e6fc4649SMing Lei * and test_and_set_bit_lock()/prepare_to_wait()/finish_wait() in the 607e6fc4649SMing Lei * waiter. See the comment on waitqueue_active(). 608e6fc4649SMing Lei */ 609e6fc4649SMing Lei smp_mb__after_atomic(); 610e6fc4649SMing Lei sbitmap_queue_wake_up(sbq); 611e6fc4649SMing Lei 6125c64a8dfSOmar Sandoval if (likely(!sbq->round_robin && nr < sbq->sb.depth)) 61340aabb67SOmar Sandoval *per_cpu_ptr(sbq->alloc_hint, cpu) = nr; 61488459642SOmar Sandoval } 61588459642SOmar Sandoval EXPORT_SYMBOL_GPL(sbitmap_queue_clear); 61688459642SOmar Sandoval 61788459642SOmar Sandoval void sbitmap_queue_wake_all(struct sbitmap_queue *sbq) 61888459642SOmar Sandoval { 61988459642SOmar Sandoval int i, wake_index; 62088459642SOmar Sandoval 62188459642SOmar Sandoval /* 622f66227deSOmar Sandoval * Pairs with the memory barrier in set_current_state() like in 623e6fc4649SMing Lei * sbitmap_queue_wake_up(). 62488459642SOmar Sandoval */ 62588459642SOmar Sandoval smp_mb(); 62688459642SOmar Sandoval wake_index = atomic_read(&sbq->wake_index); 62788459642SOmar Sandoval for (i = 0; i < SBQ_WAIT_QUEUES; i++) { 62888459642SOmar Sandoval struct sbq_wait_state *ws = &sbq->ws[wake_index]; 62988459642SOmar Sandoval 63088459642SOmar Sandoval if (waitqueue_active(&ws->wait)) 63188459642SOmar Sandoval wake_up(&ws->wait); 63288459642SOmar Sandoval 63388459642SOmar Sandoval wake_index = sbq_index_inc(wake_index); 63488459642SOmar Sandoval } 63588459642SOmar Sandoval } 63688459642SOmar Sandoval EXPORT_SYMBOL_GPL(sbitmap_queue_wake_all); 63724af1ccfSOmar Sandoval 63824af1ccfSOmar Sandoval void sbitmap_queue_show(struct sbitmap_queue *sbq, struct seq_file *m) 63924af1ccfSOmar Sandoval { 64024af1ccfSOmar Sandoval bool first; 64124af1ccfSOmar Sandoval int i; 64224af1ccfSOmar Sandoval 64324af1ccfSOmar Sandoval sbitmap_show(&sbq->sb, m); 64424af1ccfSOmar Sandoval 64524af1ccfSOmar Sandoval seq_puts(m, "alloc_hint={"); 64624af1ccfSOmar Sandoval first = true; 64724af1ccfSOmar Sandoval for_each_possible_cpu(i) { 64824af1ccfSOmar Sandoval if (!first) 64924af1ccfSOmar Sandoval seq_puts(m, ", "); 65024af1ccfSOmar Sandoval first = false; 65124af1ccfSOmar Sandoval seq_printf(m, "%u", *per_cpu_ptr(sbq->alloc_hint, i)); 65224af1ccfSOmar Sandoval } 65324af1ccfSOmar Sandoval seq_puts(m, "}\n"); 65424af1ccfSOmar Sandoval 65524af1ccfSOmar Sandoval seq_printf(m, "wake_batch=%u\n", sbq->wake_batch); 65624af1ccfSOmar Sandoval seq_printf(m, "wake_index=%d\n", atomic_read(&sbq->wake_index)); 6575d2ee712SJens Axboe seq_printf(m, "ws_active=%d\n", atomic_read(&sbq->ws_active)); 65824af1ccfSOmar Sandoval 65924af1ccfSOmar Sandoval seq_puts(m, "ws={\n"); 66024af1ccfSOmar Sandoval for (i = 0; i < SBQ_WAIT_QUEUES; i++) { 66124af1ccfSOmar Sandoval struct sbq_wait_state *ws = &sbq->ws[i]; 66224af1ccfSOmar Sandoval 66324af1ccfSOmar Sandoval seq_printf(m, "\t{.wait_cnt=%d, .wait=%s},\n", 66424af1ccfSOmar Sandoval atomic_read(&ws->wait_cnt), 66524af1ccfSOmar Sandoval waitqueue_active(&ws->wait) ? "active" : "inactive"); 66624af1ccfSOmar Sandoval } 66724af1ccfSOmar Sandoval seq_puts(m, "}\n"); 66824af1ccfSOmar Sandoval 66924af1ccfSOmar Sandoval seq_printf(m, "round_robin=%d\n", sbq->round_robin); 670a3275539SOmar Sandoval seq_printf(m, "min_shallow_depth=%u\n", sbq->min_shallow_depth); 67124af1ccfSOmar Sandoval } 67224af1ccfSOmar Sandoval EXPORT_SYMBOL_GPL(sbitmap_queue_show); 6735d2ee712SJens Axboe 674*9f6b7ef6SJens Axboe void sbitmap_add_wait_queue(struct sbitmap_queue *sbq, 675*9f6b7ef6SJens Axboe struct sbq_wait_state *ws, 676*9f6b7ef6SJens Axboe struct sbq_wait *sbq_wait) 677*9f6b7ef6SJens Axboe { 678*9f6b7ef6SJens Axboe if (!sbq_wait->sbq) { 679*9f6b7ef6SJens Axboe sbq_wait->sbq = sbq; 680*9f6b7ef6SJens Axboe atomic_inc(&sbq->ws_active); 681*9f6b7ef6SJens Axboe } 682*9f6b7ef6SJens Axboe add_wait_queue(&ws->wait, &sbq_wait->wait); 683*9f6b7ef6SJens Axboe } 684*9f6b7ef6SJens Axboe EXPORT_SYMBOL_GPL(sbitmap_add_wait_queue); 685*9f6b7ef6SJens Axboe 686*9f6b7ef6SJens Axboe void sbitmap_del_wait_queue(struct sbq_wait *sbq_wait) 687*9f6b7ef6SJens Axboe { 688*9f6b7ef6SJens Axboe list_del_init(&sbq_wait->wait.entry); 689*9f6b7ef6SJens Axboe if (sbq_wait->sbq) { 690*9f6b7ef6SJens Axboe atomic_dec(&sbq_wait->sbq->ws_active); 691*9f6b7ef6SJens Axboe sbq_wait->sbq = NULL; 692*9f6b7ef6SJens Axboe } 693*9f6b7ef6SJens Axboe } 694*9f6b7ef6SJens Axboe EXPORT_SYMBOL_GPL(sbitmap_del_wait_queue); 695*9f6b7ef6SJens Axboe 6965d2ee712SJens Axboe void sbitmap_prepare_to_wait(struct sbitmap_queue *sbq, 6975d2ee712SJens Axboe struct sbq_wait_state *ws, 6985d2ee712SJens Axboe struct sbq_wait *sbq_wait, int state) 6995d2ee712SJens Axboe { 700*9f6b7ef6SJens Axboe if (!sbq_wait->sbq) { 7015d2ee712SJens Axboe atomic_inc(&sbq->ws_active); 702*9f6b7ef6SJens Axboe sbq_wait->sbq = sbq; 7035d2ee712SJens Axboe } 7045d2ee712SJens Axboe prepare_to_wait_exclusive(&ws->wait, &sbq_wait->wait, state); 7055d2ee712SJens Axboe } 7065d2ee712SJens Axboe EXPORT_SYMBOL_GPL(sbitmap_prepare_to_wait); 7075d2ee712SJens Axboe 7085d2ee712SJens Axboe void sbitmap_finish_wait(struct sbitmap_queue *sbq, struct sbq_wait_state *ws, 7095d2ee712SJens Axboe struct sbq_wait *sbq_wait) 7105d2ee712SJens Axboe { 7115d2ee712SJens Axboe finish_wait(&ws->wait, &sbq_wait->wait); 712*9f6b7ef6SJens Axboe if (sbq_wait->sbq) { 7135d2ee712SJens Axboe atomic_dec(&sbq->ws_active); 714*9f6b7ef6SJens Axboe sbq_wait->sbq = NULL; 7155d2ee712SJens Axboe } 7165d2ee712SJens Axboe } 7175d2ee712SJens Axboe EXPORT_SYMBOL_GPL(sbitmap_finish_wait); 718