188459642SOmar Sandoval /* 288459642SOmar Sandoval * Copyright (C) 2016 Facebook 388459642SOmar Sandoval * Copyright (C) 2013-2014 Jens Axboe 488459642SOmar Sandoval * 588459642SOmar Sandoval * This program is free software; you can redistribute it and/or 688459642SOmar Sandoval * modify it under the terms of the GNU General Public 788459642SOmar Sandoval * License v2 as published by the Free Software Foundation. 888459642SOmar Sandoval * 988459642SOmar Sandoval * This program is distributed in the hope that it will be useful, 1088459642SOmar Sandoval * but WITHOUT ANY WARRANTY; without even the implied warranty of 1188459642SOmar Sandoval * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 1288459642SOmar Sandoval * General Public License for more details. 1388459642SOmar Sandoval * 1488459642SOmar Sandoval * You should have received a copy of the GNU General Public License 1588459642SOmar Sandoval * along with this program. If not, see <https://www.gnu.org/licenses/>. 1688459642SOmar Sandoval */ 1788459642SOmar Sandoval 18af8601adSIngo Molnar #include <linux/sched.h> 1998d95416SOmar Sandoval #include <linux/random.h> 2088459642SOmar Sandoval #include <linux/sbitmap.h> 2124af1ccfSOmar Sandoval #include <linux/seq_file.h> 2288459642SOmar Sandoval 2388459642SOmar Sandoval int sbitmap_init_node(struct sbitmap *sb, unsigned int depth, int shift, 2488459642SOmar Sandoval gfp_t flags, int node) 2588459642SOmar Sandoval { 2688459642SOmar Sandoval unsigned int bits_per_word; 2788459642SOmar Sandoval unsigned int i; 2888459642SOmar Sandoval 2988459642SOmar Sandoval if (shift < 0) { 3088459642SOmar Sandoval shift = ilog2(BITS_PER_LONG); 3188459642SOmar Sandoval /* 3288459642SOmar Sandoval * If the bitmap is small, shrink the number of bits per word so 3388459642SOmar Sandoval * we spread over a few cachelines, at least. If less than 4 3488459642SOmar Sandoval * bits, just forget about it, it's not going to work optimally 3588459642SOmar Sandoval * anyway. 3688459642SOmar Sandoval */ 3788459642SOmar Sandoval if (depth >= 4) { 3888459642SOmar Sandoval while ((4U << shift) > depth) 3988459642SOmar Sandoval shift--; 4088459642SOmar Sandoval } 4188459642SOmar Sandoval } 4288459642SOmar Sandoval bits_per_word = 1U << shift; 4388459642SOmar Sandoval if (bits_per_word > BITS_PER_LONG) 4488459642SOmar Sandoval return -EINVAL; 4588459642SOmar Sandoval 4688459642SOmar Sandoval sb->shift = shift; 4788459642SOmar Sandoval sb->depth = depth; 4888459642SOmar Sandoval sb->map_nr = DIV_ROUND_UP(sb->depth, bits_per_word); 4988459642SOmar Sandoval 5088459642SOmar Sandoval if (depth == 0) { 5188459642SOmar Sandoval sb->map = NULL; 5288459642SOmar Sandoval return 0; 5388459642SOmar Sandoval } 5488459642SOmar Sandoval 55590b5b7dSKees Cook sb->map = kcalloc_node(sb->map_nr, sizeof(*sb->map), flags, node); 5688459642SOmar Sandoval if (!sb->map) 5788459642SOmar Sandoval return -ENOMEM; 5888459642SOmar Sandoval 5988459642SOmar Sandoval for (i = 0; i < sb->map_nr; i++) { 6088459642SOmar Sandoval sb->map[i].depth = min(depth, bits_per_word); 6188459642SOmar Sandoval depth -= sb->map[i].depth; 6288459642SOmar Sandoval } 6388459642SOmar Sandoval return 0; 6488459642SOmar Sandoval } 6588459642SOmar Sandoval EXPORT_SYMBOL_GPL(sbitmap_init_node); 6688459642SOmar Sandoval 6788459642SOmar Sandoval void sbitmap_resize(struct sbitmap *sb, unsigned int depth) 6888459642SOmar Sandoval { 6988459642SOmar Sandoval unsigned int bits_per_word = 1U << sb->shift; 7088459642SOmar Sandoval unsigned int i; 7188459642SOmar Sandoval 7288459642SOmar Sandoval sb->depth = depth; 7388459642SOmar Sandoval sb->map_nr = DIV_ROUND_UP(sb->depth, bits_per_word); 7488459642SOmar Sandoval 7588459642SOmar Sandoval for (i = 0; i < sb->map_nr; i++) { 7688459642SOmar Sandoval sb->map[i].depth = min(depth, bits_per_word); 7788459642SOmar Sandoval depth -= sb->map[i].depth; 7888459642SOmar Sandoval } 7988459642SOmar Sandoval } 8088459642SOmar Sandoval EXPORT_SYMBOL_GPL(sbitmap_resize); 8188459642SOmar Sandoval 82c05e6673SOmar Sandoval static int __sbitmap_get_word(unsigned long *word, unsigned long depth, 83c05e6673SOmar Sandoval unsigned int hint, bool wrap) 8488459642SOmar Sandoval { 8588459642SOmar Sandoval unsigned int orig_hint = hint; 8688459642SOmar Sandoval int nr; 8788459642SOmar Sandoval 8888459642SOmar Sandoval while (1) { 89c05e6673SOmar Sandoval nr = find_next_zero_bit(word, depth, hint); 90c05e6673SOmar Sandoval if (unlikely(nr >= depth)) { 9188459642SOmar Sandoval /* 9288459642SOmar Sandoval * We started with an offset, and we didn't reset the 9388459642SOmar Sandoval * offset to 0 in a failure case, so start from 0 to 9488459642SOmar Sandoval * exhaust the map. 9588459642SOmar Sandoval */ 9688459642SOmar Sandoval if (orig_hint && hint && wrap) { 9788459642SOmar Sandoval hint = orig_hint = 0; 9888459642SOmar Sandoval continue; 9988459642SOmar Sandoval } 10088459642SOmar Sandoval return -1; 10188459642SOmar Sandoval } 10288459642SOmar Sandoval 1034ace53f1SOmar Sandoval if (!test_and_set_bit_lock(nr, word)) 10488459642SOmar Sandoval break; 10588459642SOmar Sandoval 10688459642SOmar Sandoval hint = nr + 1; 107c05e6673SOmar Sandoval if (hint >= depth - 1) 10888459642SOmar Sandoval hint = 0; 10988459642SOmar Sandoval } 11088459642SOmar Sandoval 11188459642SOmar Sandoval return nr; 11288459642SOmar Sandoval } 11388459642SOmar Sandoval 11488459642SOmar Sandoval int sbitmap_get(struct sbitmap *sb, unsigned int alloc_hint, bool round_robin) 11588459642SOmar Sandoval { 11688459642SOmar Sandoval unsigned int i, index; 11788459642SOmar Sandoval int nr = -1; 11888459642SOmar Sandoval 11988459642SOmar Sandoval index = SB_NR_TO_INDEX(sb, alloc_hint); 12088459642SOmar Sandoval 121*27fae429SJens Axboe /* 122*27fae429SJens Axboe * Unless we're doing round robin tag allocation, just use the 123*27fae429SJens Axboe * alloc_hint to find the right word index. No point in looping 124*27fae429SJens Axboe * twice in find_next_zero_bit() for that case. 125*27fae429SJens Axboe */ 126*27fae429SJens Axboe if (round_robin) 127*27fae429SJens Axboe alloc_hint = SB_NR_TO_BIT(sb, alloc_hint); 128*27fae429SJens Axboe else 129*27fae429SJens Axboe alloc_hint = 0; 130*27fae429SJens Axboe 13188459642SOmar Sandoval for (i = 0; i < sb->map_nr; i++) { 132c05e6673SOmar Sandoval nr = __sbitmap_get_word(&sb->map[index].word, 133*27fae429SJens Axboe sb->map[index].depth, alloc_hint, 13488459642SOmar Sandoval !round_robin); 13588459642SOmar Sandoval if (nr != -1) { 13688459642SOmar Sandoval nr += index << sb->shift; 13788459642SOmar Sandoval break; 13888459642SOmar Sandoval } 13988459642SOmar Sandoval 14088459642SOmar Sandoval /* Jump to next index. */ 14188459642SOmar Sandoval alloc_hint = 0; 142*27fae429SJens Axboe if (++index >= sb->map_nr) 143*27fae429SJens Axboe index = 0; 14488459642SOmar Sandoval } 14588459642SOmar Sandoval 14688459642SOmar Sandoval return nr; 14788459642SOmar Sandoval } 14888459642SOmar Sandoval EXPORT_SYMBOL_GPL(sbitmap_get); 14988459642SOmar Sandoval 150c05e6673SOmar Sandoval int sbitmap_get_shallow(struct sbitmap *sb, unsigned int alloc_hint, 151c05e6673SOmar Sandoval unsigned long shallow_depth) 152c05e6673SOmar Sandoval { 153c05e6673SOmar Sandoval unsigned int i, index; 154c05e6673SOmar Sandoval int nr = -1; 155c05e6673SOmar Sandoval 156c05e6673SOmar Sandoval index = SB_NR_TO_INDEX(sb, alloc_hint); 157c05e6673SOmar Sandoval 158c05e6673SOmar Sandoval for (i = 0; i < sb->map_nr; i++) { 159c05e6673SOmar Sandoval nr = __sbitmap_get_word(&sb->map[index].word, 160c05e6673SOmar Sandoval min(sb->map[index].depth, shallow_depth), 161c05e6673SOmar Sandoval SB_NR_TO_BIT(sb, alloc_hint), true); 162c05e6673SOmar Sandoval if (nr != -1) { 163c05e6673SOmar Sandoval nr += index << sb->shift; 164c05e6673SOmar Sandoval break; 165c05e6673SOmar Sandoval } 166c05e6673SOmar Sandoval 167c05e6673SOmar Sandoval /* Jump to next index. */ 168c05e6673SOmar Sandoval index++; 169c05e6673SOmar Sandoval alloc_hint = index << sb->shift; 170c05e6673SOmar Sandoval 171c05e6673SOmar Sandoval if (index >= sb->map_nr) { 172c05e6673SOmar Sandoval index = 0; 173c05e6673SOmar Sandoval alloc_hint = 0; 174c05e6673SOmar Sandoval } 175c05e6673SOmar Sandoval } 176c05e6673SOmar Sandoval 177c05e6673SOmar Sandoval return nr; 178c05e6673SOmar Sandoval } 179c05e6673SOmar Sandoval EXPORT_SYMBOL_GPL(sbitmap_get_shallow); 180c05e6673SOmar Sandoval 18188459642SOmar Sandoval bool sbitmap_any_bit_set(const struct sbitmap *sb) 18288459642SOmar Sandoval { 18388459642SOmar Sandoval unsigned int i; 18488459642SOmar Sandoval 18588459642SOmar Sandoval for (i = 0; i < sb->map_nr; i++) { 18688459642SOmar Sandoval if (sb->map[i].word) 18788459642SOmar Sandoval return true; 18888459642SOmar Sandoval } 18988459642SOmar Sandoval return false; 19088459642SOmar Sandoval } 19188459642SOmar Sandoval EXPORT_SYMBOL_GPL(sbitmap_any_bit_set); 19288459642SOmar Sandoval 19388459642SOmar Sandoval bool sbitmap_any_bit_clear(const struct sbitmap *sb) 19488459642SOmar Sandoval { 19588459642SOmar Sandoval unsigned int i; 19688459642SOmar Sandoval 19788459642SOmar Sandoval for (i = 0; i < sb->map_nr; i++) { 19888459642SOmar Sandoval const struct sbitmap_word *word = &sb->map[i]; 19988459642SOmar Sandoval unsigned long ret; 20088459642SOmar Sandoval 20188459642SOmar Sandoval ret = find_first_zero_bit(&word->word, word->depth); 20288459642SOmar Sandoval if (ret < word->depth) 20388459642SOmar Sandoval return true; 20488459642SOmar Sandoval } 20588459642SOmar Sandoval return false; 20688459642SOmar Sandoval } 20788459642SOmar Sandoval EXPORT_SYMBOL_GPL(sbitmap_any_bit_clear); 20888459642SOmar Sandoval 20988459642SOmar Sandoval unsigned int sbitmap_weight(const struct sbitmap *sb) 21088459642SOmar Sandoval { 21160658e0dSColin Ian King unsigned int i, weight = 0; 21288459642SOmar Sandoval 21388459642SOmar Sandoval for (i = 0; i < sb->map_nr; i++) { 21488459642SOmar Sandoval const struct sbitmap_word *word = &sb->map[i]; 21588459642SOmar Sandoval 21688459642SOmar Sandoval weight += bitmap_weight(&word->word, word->depth); 21788459642SOmar Sandoval } 21888459642SOmar Sandoval return weight; 21988459642SOmar Sandoval } 22088459642SOmar Sandoval EXPORT_SYMBOL_GPL(sbitmap_weight); 22188459642SOmar Sandoval 22224af1ccfSOmar Sandoval void sbitmap_show(struct sbitmap *sb, struct seq_file *m) 22324af1ccfSOmar Sandoval { 22424af1ccfSOmar Sandoval seq_printf(m, "depth=%u\n", sb->depth); 22524af1ccfSOmar Sandoval seq_printf(m, "busy=%u\n", sbitmap_weight(sb)); 22624af1ccfSOmar Sandoval seq_printf(m, "bits_per_word=%u\n", 1U << sb->shift); 22724af1ccfSOmar Sandoval seq_printf(m, "map_nr=%u\n", sb->map_nr); 22824af1ccfSOmar Sandoval } 22924af1ccfSOmar Sandoval EXPORT_SYMBOL_GPL(sbitmap_show); 23024af1ccfSOmar Sandoval 23124af1ccfSOmar Sandoval static inline void emit_byte(struct seq_file *m, unsigned int offset, u8 byte) 23224af1ccfSOmar Sandoval { 23324af1ccfSOmar Sandoval if ((offset & 0xf) == 0) { 23424af1ccfSOmar Sandoval if (offset != 0) 23524af1ccfSOmar Sandoval seq_putc(m, '\n'); 23624af1ccfSOmar Sandoval seq_printf(m, "%08x:", offset); 23724af1ccfSOmar Sandoval } 23824af1ccfSOmar Sandoval if ((offset & 0x1) == 0) 23924af1ccfSOmar Sandoval seq_putc(m, ' '); 24024af1ccfSOmar Sandoval seq_printf(m, "%02x", byte); 24124af1ccfSOmar Sandoval } 24224af1ccfSOmar Sandoval 24324af1ccfSOmar Sandoval void sbitmap_bitmap_show(struct sbitmap *sb, struct seq_file *m) 24424af1ccfSOmar Sandoval { 24524af1ccfSOmar Sandoval u8 byte = 0; 24624af1ccfSOmar Sandoval unsigned int byte_bits = 0; 24724af1ccfSOmar Sandoval unsigned int offset = 0; 24824af1ccfSOmar Sandoval int i; 24924af1ccfSOmar Sandoval 25024af1ccfSOmar Sandoval for (i = 0; i < sb->map_nr; i++) { 25124af1ccfSOmar Sandoval unsigned long word = READ_ONCE(sb->map[i].word); 25224af1ccfSOmar Sandoval unsigned int word_bits = READ_ONCE(sb->map[i].depth); 25324af1ccfSOmar Sandoval 25424af1ccfSOmar Sandoval while (word_bits > 0) { 25524af1ccfSOmar Sandoval unsigned int bits = min(8 - byte_bits, word_bits); 25624af1ccfSOmar Sandoval 25724af1ccfSOmar Sandoval byte |= (word & (BIT(bits) - 1)) << byte_bits; 25824af1ccfSOmar Sandoval byte_bits += bits; 25924af1ccfSOmar Sandoval if (byte_bits == 8) { 26024af1ccfSOmar Sandoval emit_byte(m, offset, byte); 26124af1ccfSOmar Sandoval byte = 0; 26224af1ccfSOmar Sandoval byte_bits = 0; 26324af1ccfSOmar Sandoval offset++; 26424af1ccfSOmar Sandoval } 26524af1ccfSOmar Sandoval word >>= bits; 26624af1ccfSOmar Sandoval word_bits -= bits; 26724af1ccfSOmar Sandoval } 26824af1ccfSOmar Sandoval } 26924af1ccfSOmar Sandoval if (byte_bits) { 27024af1ccfSOmar Sandoval emit_byte(m, offset, byte); 27124af1ccfSOmar Sandoval offset++; 27224af1ccfSOmar Sandoval } 27324af1ccfSOmar Sandoval if (offset) 27424af1ccfSOmar Sandoval seq_putc(m, '\n'); 27524af1ccfSOmar Sandoval } 27624af1ccfSOmar Sandoval EXPORT_SYMBOL_GPL(sbitmap_bitmap_show); 27724af1ccfSOmar Sandoval 278a3275539SOmar Sandoval static unsigned int sbq_calc_wake_batch(struct sbitmap_queue *sbq, 279a3275539SOmar Sandoval unsigned int depth) 28088459642SOmar Sandoval { 28188459642SOmar Sandoval unsigned int wake_batch; 282a3275539SOmar Sandoval unsigned int shallow_depth; 28388459642SOmar Sandoval 28488459642SOmar Sandoval /* 28588459642SOmar Sandoval * For each batch, we wake up one queue. We need to make sure that our 286a3275539SOmar Sandoval * batch size is small enough that the full depth of the bitmap, 287a3275539SOmar Sandoval * potentially limited by a shallow depth, is enough to wake up all of 288a3275539SOmar Sandoval * the queues. 289a3275539SOmar Sandoval * 290a3275539SOmar Sandoval * Each full word of the bitmap has bits_per_word bits, and there might 291a3275539SOmar Sandoval * be a partial word. There are depth / bits_per_word full words and 292a3275539SOmar Sandoval * depth % bits_per_word bits left over. In bitwise arithmetic: 293a3275539SOmar Sandoval * 294a3275539SOmar Sandoval * bits_per_word = 1 << shift 295a3275539SOmar Sandoval * depth / bits_per_word = depth >> shift 296a3275539SOmar Sandoval * depth % bits_per_word = depth & ((1 << shift) - 1) 297a3275539SOmar Sandoval * 298a3275539SOmar Sandoval * Each word can be limited to sbq->min_shallow_depth bits. 29988459642SOmar Sandoval */ 300a3275539SOmar Sandoval shallow_depth = min(1U << sbq->sb.shift, sbq->min_shallow_depth); 301a3275539SOmar Sandoval depth = ((depth >> sbq->sb.shift) * shallow_depth + 302a3275539SOmar Sandoval min(depth & ((1U << sbq->sb.shift) - 1), shallow_depth)); 303a3275539SOmar Sandoval wake_batch = clamp_t(unsigned int, depth / SBQ_WAIT_QUEUES, 1, 304a3275539SOmar Sandoval SBQ_WAKE_BATCH); 30588459642SOmar Sandoval 30688459642SOmar Sandoval return wake_batch; 30788459642SOmar Sandoval } 30888459642SOmar Sandoval 30988459642SOmar Sandoval int sbitmap_queue_init_node(struct sbitmap_queue *sbq, unsigned int depth, 310f4a644dbSOmar Sandoval int shift, bool round_robin, gfp_t flags, int node) 31188459642SOmar Sandoval { 31288459642SOmar Sandoval int ret; 31388459642SOmar Sandoval int i; 31488459642SOmar Sandoval 31588459642SOmar Sandoval ret = sbitmap_init_node(&sbq->sb, depth, shift, flags, node); 31688459642SOmar Sandoval if (ret) 31788459642SOmar Sandoval return ret; 31888459642SOmar Sandoval 31940aabb67SOmar Sandoval sbq->alloc_hint = alloc_percpu_gfp(unsigned int, flags); 32040aabb67SOmar Sandoval if (!sbq->alloc_hint) { 32140aabb67SOmar Sandoval sbitmap_free(&sbq->sb); 32240aabb67SOmar Sandoval return -ENOMEM; 32340aabb67SOmar Sandoval } 32440aabb67SOmar Sandoval 32598d95416SOmar Sandoval if (depth && !round_robin) { 32698d95416SOmar Sandoval for_each_possible_cpu(i) 32798d95416SOmar Sandoval *per_cpu_ptr(sbq->alloc_hint, i) = prandom_u32() % depth; 32898d95416SOmar Sandoval } 32998d95416SOmar Sandoval 330a3275539SOmar Sandoval sbq->min_shallow_depth = UINT_MAX; 331a3275539SOmar Sandoval sbq->wake_batch = sbq_calc_wake_batch(sbq, depth); 33288459642SOmar Sandoval atomic_set(&sbq->wake_index, 0); 33388459642SOmar Sandoval 33448e28166SOmar Sandoval sbq->ws = kzalloc_node(SBQ_WAIT_QUEUES * sizeof(*sbq->ws), flags, node); 33588459642SOmar Sandoval if (!sbq->ws) { 33640aabb67SOmar Sandoval free_percpu(sbq->alloc_hint); 33788459642SOmar Sandoval sbitmap_free(&sbq->sb); 33888459642SOmar Sandoval return -ENOMEM; 33988459642SOmar Sandoval } 34088459642SOmar Sandoval 34188459642SOmar Sandoval for (i = 0; i < SBQ_WAIT_QUEUES; i++) { 34288459642SOmar Sandoval init_waitqueue_head(&sbq->ws[i].wait); 34388459642SOmar Sandoval atomic_set(&sbq->ws[i].wait_cnt, sbq->wake_batch); 34488459642SOmar Sandoval } 345f4a644dbSOmar Sandoval 346f4a644dbSOmar Sandoval sbq->round_robin = round_robin; 34788459642SOmar Sandoval return 0; 34888459642SOmar Sandoval } 34988459642SOmar Sandoval EXPORT_SYMBOL_GPL(sbitmap_queue_init_node); 35088459642SOmar Sandoval 351a3275539SOmar Sandoval static void sbitmap_queue_update_wake_batch(struct sbitmap_queue *sbq, 352a3275539SOmar Sandoval unsigned int depth) 35388459642SOmar Sandoval { 354a3275539SOmar Sandoval unsigned int wake_batch = sbq_calc_wake_batch(sbq, depth); 3556c0ca7aeSOmar Sandoval int i; 3566c0ca7aeSOmar Sandoval 3576c0ca7aeSOmar Sandoval if (sbq->wake_batch != wake_batch) { 3586c0ca7aeSOmar Sandoval WRITE_ONCE(sbq->wake_batch, wake_batch); 3596c0ca7aeSOmar Sandoval /* 360e6fc4649SMing Lei * Pairs with the memory barrier in sbitmap_queue_wake_up() 361e6fc4649SMing Lei * to ensure that the batch size is updated before the wait 362e6fc4649SMing Lei * counts. 3636c0ca7aeSOmar Sandoval */ 3646c0ca7aeSOmar Sandoval smp_mb__before_atomic(); 3656c0ca7aeSOmar Sandoval for (i = 0; i < SBQ_WAIT_QUEUES; i++) 3666c0ca7aeSOmar Sandoval atomic_set(&sbq->ws[i].wait_cnt, 1); 3676c0ca7aeSOmar Sandoval } 368a3275539SOmar Sandoval } 369a3275539SOmar Sandoval 370a3275539SOmar Sandoval void sbitmap_queue_resize(struct sbitmap_queue *sbq, unsigned int depth) 371a3275539SOmar Sandoval { 372a3275539SOmar Sandoval sbitmap_queue_update_wake_batch(sbq, depth); 37388459642SOmar Sandoval sbitmap_resize(&sbq->sb, depth); 37488459642SOmar Sandoval } 37588459642SOmar Sandoval EXPORT_SYMBOL_GPL(sbitmap_queue_resize); 37688459642SOmar Sandoval 377f4a644dbSOmar Sandoval int __sbitmap_queue_get(struct sbitmap_queue *sbq) 37840aabb67SOmar Sandoval { 37905fd095dSOmar Sandoval unsigned int hint, depth; 38040aabb67SOmar Sandoval int nr; 38140aabb67SOmar Sandoval 38240aabb67SOmar Sandoval hint = this_cpu_read(*sbq->alloc_hint); 38305fd095dSOmar Sandoval depth = READ_ONCE(sbq->sb.depth); 38405fd095dSOmar Sandoval if (unlikely(hint >= depth)) { 38505fd095dSOmar Sandoval hint = depth ? prandom_u32() % depth : 0; 38605fd095dSOmar Sandoval this_cpu_write(*sbq->alloc_hint, hint); 38705fd095dSOmar Sandoval } 388f4a644dbSOmar Sandoval nr = sbitmap_get(&sbq->sb, hint, sbq->round_robin); 38940aabb67SOmar Sandoval 39040aabb67SOmar Sandoval if (nr == -1) { 39140aabb67SOmar Sandoval /* If the map is full, a hint won't do us much good. */ 39240aabb67SOmar Sandoval this_cpu_write(*sbq->alloc_hint, 0); 393f4a644dbSOmar Sandoval } else if (nr == hint || unlikely(sbq->round_robin)) { 39440aabb67SOmar Sandoval /* Only update the hint if we used it. */ 39540aabb67SOmar Sandoval hint = nr + 1; 39605fd095dSOmar Sandoval if (hint >= depth - 1) 39740aabb67SOmar Sandoval hint = 0; 39840aabb67SOmar Sandoval this_cpu_write(*sbq->alloc_hint, hint); 39940aabb67SOmar Sandoval } 40040aabb67SOmar Sandoval 40140aabb67SOmar Sandoval return nr; 40240aabb67SOmar Sandoval } 40340aabb67SOmar Sandoval EXPORT_SYMBOL_GPL(__sbitmap_queue_get); 40440aabb67SOmar Sandoval 405c05e6673SOmar Sandoval int __sbitmap_queue_get_shallow(struct sbitmap_queue *sbq, 406c05e6673SOmar Sandoval unsigned int shallow_depth) 407c05e6673SOmar Sandoval { 408c05e6673SOmar Sandoval unsigned int hint, depth; 409c05e6673SOmar Sandoval int nr; 410c05e6673SOmar Sandoval 41161445b56SOmar Sandoval WARN_ON_ONCE(shallow_depth < sbq->min_shallow_depth); 41261445b56SOmar Sandoval 413c05e6673SOmar Sandoval hint = this_cpu_read(*sbq->alloc_hint); 414c05e6673SOmar Sandoval depth = READ_ONCE(sbq->sb.depth); 415c05e6673SOmar Sandoval if (unlikely(hint >= depth)) { 416c05e6673SOmar Sandoval hint = depth ? prandom_u32() % depth : 0; 417c05e6673SOmar Sandoval this_cpu_write(*sbq->alloc_hint, hint); 418c05e6673SOmar Sandoval } 419c05e6673SOmar Sandoval nr = sbitmap_get_shallow(&sbq->sb, hint, shallow_depth); 420c05e6673SOmar Sandoval 421c05e6673SOmar Sandoval if (nr == -1) { 422c05e6673SOmar Sandoval /* If the map is full, a hint won't do us much good. */ 423c05e6673SOmar Sandoval this_cpu_write(*sbq->alloc_hint, 0); 424c05e6673SOmar Sandoval } else if (nr == hint || unlikely(sbq->round_robin)) { 425c05e6673SOmar Sandoval /* Only update the hint if we used it. */ 426c05e6673SOmar Sandoval hint = nr + 1; 427c05e6673SOmar Sandoval if (hint >= depth - 1) 428c05e6673SOmar Sandoval hint = 0; 429c05e6673SOmar Sandoval this_cpu_write(*sbq->alloc_hint, hint); 430c05e6673SOmar Sandoval } 431c05e6673SOmar Sandoval 432c05e6673SOmar Sandoval return nr; 433c05e6673SOmar Sandoval } 434c05e6673SOmar Sandoval EXPORT_SYMBOL_GPL(__sbitmap_queue_get_shallow); 435c05e6673SOmar Sandoval 436a3275539SOmar Sandoval void sbitmap_queue_min_shallow_depth(struct sbitmap_queue *sbq, 437a3275539SOmar Sandoval unsigned int min_shallow_depth) 438a3275539SOmar Sandoval { 439a3275539SOmar Sandoval sbq->min_shallow_depth = min_shallow_depth; 440a3275539SOmar Sandoval sbitmap_queue_update_wake_batch(sbq, sbq->sb.depth); 441a3275539SOmar Sandoval } 442a3275539SOmar Sandoval EXPORT_SYMBOL_GPL(sbitmap_queue_min_shallow_depth); 443a3275539SOmar Sandoval 44488459642SOmar Sandoval static struct sbq_wait_state *sbq_wake_ptr(struct sbitmap_queue *sbq) 44588459642SOmar Sandoval { 44688459642SOmar Sandoval int i, wake_index; 44788459642SOmar Sandoval 44888459642SOmar Sandoval wake_index = atomic_read(&sbq->wake_index); 44988459642SOmar Sandoval for (i = 0; i < SBQ_WAIT_QUEUES; i++) { 45088459642SOmar Sandoval struct sbq_wait_state *ws = &sbq->ws[wake_index]; 45188459642SOmar Sandoval 45288459642SOmar Sandoval if (waitqueue_active(&ws->wait)) { 45388459642SOmar Sandoval int o = atomic_read(&sbq->wake_index); 45488459642SOmar Sandoval 45588459642SOmar Sandoval if (wake_index != o) 45688459642SOmar Sandoval atomic_cmpxchg(&sbq->wake_index, o, wake_index); 45788459642SOmar Sandoval return ws; 45888459642SOmar Sandoval } 45988459642SOmar Sandoval 46088459642SOmar Sandoval wake_index = sbq_index_inc(wake_index); 46188459642SOmar Sandoval } 46288459642SOmar Sandoval 46388459642SOmar Sandoval return NULL; 46488459642SOmar Sandoval } 46588459642SOmar Sandoval 466c854ab57SJens Axboe static bool __sbq_wake_up(struct sbitmap_queue *sbq) 46788459642SOmar Sandoval { 46888459642SOmar Sandoval struct sbq_wait_state *ws; 4696c0ca7aeSOmar Sandoval unsigned int wake_batch; 47088459642SOmar Sandoval int wait_cnt; 47188459642SOmar Sandoval 47288459642SOmar Sandoval ws = sbq_wake_ptr(sbq); 47388459642SOmar Sandoval if (!ws) 474c854ab57SJens Axboe return false; 47588459642SOmar Sandoval 47688459642SOmar Sandoval wait_cnt = atomic_dec_return(&ws->wait_cnt); 4776c0ca7aeSOmar Sandoval if (wait_cnt <= 0) { 478c854ab57SJens Axboe int ret; 479c854ab57SJens Axboe 4806c0ca7aeSOmar Sandoval wake_batch = READ_ONCE(sbq->wake_batch); 481c854ab57SJens Axboe 4826c0ca7aeSOmar Sandoval /* 4836c0ca7aeSOmar Sandoval * Pairs with the memory barrier in sbitmap_queue_resize() to 4846c0ca7aeSOmar Sandoval * ensure that we see the batch size update before the wait 4856c0ca7aeSOmar Sandoval * count is reset. 4866c0ca7aeSOmar Sandoval */ 4876c0ca7aeSOmar Sandoval smp_mb__before_atomic(); 488c854ab57SJens Axboe 4896c0ca7aeSOmar Sandoval /* 490c854ab57SJens Axboe * For concurrent callers of this, the one that failed the 491c854ab57SJens Axboe * atomic_cmpxhcg() race should call this function again 492c854ab57SJens Axboe * to wakeup a new batch on a different 'ws'. 4936c0ca7aeSOmar Sandoval */ 494c854ab57SJens Axboe ret = atomic_cmpxchg(&ws->wait_cnt, wait_cnt, wake_batch); 495c854ab57SJens Axboe if (ret == wait_cnt) { 49688459642SOmar Sandoval sbq_index_atomic_inc(&sbq->wake_index); 4974e5dff41SJens Axboe wake_up_nr(&ws->wait, wake_batch); 498c854ab57SJens Axboe return false; 49988459642SOmar Sandoval } 500c854ab57SJens Axboe 501c854ab57SJens Axboe return true; 502c854ab57SJens Axboe } 503c854ab57SJens Axboe 504c854ab57SJens Axboe return false; 505c854ab57SJens Axboe } 506c854ab57SJens Axboe 507e6fc4649SMing Lei void sbitmap_queue_wake_up(struct sbitmap_queue *sbq) 508c854ab57SJens Axboe { 509c854ab57SJens Axboe while (__sbq_wake_up(sbq)) 510c854ab57SJens Axboe ; 51188459642SOmar Sandoval } 512e6fc4649SMing Lei EXPORT_SYMBOL_GPL(sbitmap_queue_wake_up); 51388459642SOmar Sandoval 51440aabb67SOmar Sandoval void sbitmap_queue_clear(struct sbitmap_queue *sbq, unsigned int nr, 515f4a644dbSOmar Sandoval unsigned int cpu) 51688459642SOmar Sandoval { 5174ace53f1SOmar Sandoval sbitmap_clear_bit_unlock(&sbq->sb, nr); 518e6fc4649SMing Lei /* 519e6fc4649SMing Lei * Pairs with the memory barrier in set_current_state() to ensure the 520e6fc4649SMing Lei * proper ordering of clear_bit_unlock()/waitqueue_active() in the waker 521e6fc4649SMing Lei * and test_and_set_bit_lock()/prepare_to_wait()/finish_wait() in the 522e6fc4649SMing Lei * waiter. See the comment on waitqueue_active(). 523e6fc4649SMing Lei */ 524e6fc4649SMing Lei smp_mb__after_atomic(); 525e6fc4649SMing Lei sbitmap_queue_wake_up(sbq); 526e6fc4649SMing Lei 5275c64a8dfSOmar Sandoval if (likely(!sbq->round_robin && nr < sbq->sb.depth)) 52840aabb67SOmar Sandoval *per_cpu_ptr(sbq->alloc_hint, cpu) = nr; 52988459642SOmar Sandoval } 53088459642SOmar Sandoval EXPORT_SYMBOL_GPL(sbitmap_queue_clear); 53188459642SOmar Sandoval 53288459642SOmar Sandoval void sbitmap_queue_wake_all(struct sbitmap_queue *sbq) 53388459642SOmar Sandoval { 53488459642SOmar Sandoval int i, wake_index; 53588459642SOmar Sandoval 53688459642SOmar Sandoval /* 537f66227deSOmar Sandoval * Pairs with the memory barrier in set_current_state() like in 538e6fc4649SMing Lei * sbitmap_queue_wake_up(). 53988459642SOmar Sandoval */ 54088459642SOmar Sandoval smp_mb(); 54188459642SOmar Sandoval wake_index = atomic_read(&sbq->wake_index); 54288459642SOmar Sandoval for (i = 0; i < SBQ_WAIT_QUEUES; i++) { 54388459642SOmar Sandoval struct sbq_wait_state *ws = &sbq->ws[wake_index]; 54488459642SOmar Sandoval 54588459642SOmar Sandoval if (waitqueue_active(&ws->wait)) 54688459642SOmar Sandoval wake_up(&ws->wait); 54788459642SOmar Sandoval 54888459642SOmar Sandoval wake_index = sbq_index_inc(wake_index); 54988459642SOmar Sandoval } 55088459642SOmar Sandoval } 55188459642SOmar Sandoval EXPORT_SYMBOL_GPL(sbitmap_queue_wake_all); 55224af1ccfSOmar Sandoval 55324af1ccfSOmar Sandoval void sbitmap_queue_show(struct sbitmap_queue *sbq, struct seq_file *m) 55424af1ccfSOmar Sandoval { 55524af1ccfSOmar Sandoval bool first; 55624af1ccfSOmar Sandoval int i; 55724af1ccfSOmar Sandoval 55824af1ccfSOmar Sandoval sbitmap_show(&sbq->sb, m); 55924af1ccfSOmar Sandoval 56024af1ccfSOmar Sandoval seq_puts(m, "alloc_hint={"); 56124af1ccfSOmar Sandoval first = true; 56224af1ccfSOmar Sandoval for_each_possible_cpu(i) { 56324af1ccfSOmar Sandoval if (!first) 56424af1ccfSOmar Sandoval seq_puts(m, ", "); 56524af1ccfSOmar Sandoval first = false; 56624af1ccfSOmar Sandoval seq_printf(m, "%u", *per_cpu_ptr(sbq->alloc_hint, i)); 56724af1ccfSOmar Sandoval } 56824af1ccfSOmar Sandoval seq_puts(m, "}\n"); 56924af1ccfSOmar Sandoval 57024af1ccfSOmar Sandoval seq_printf(m, "wake_batch=%u\n", sbq->wake_batch); 57124af1ccfSOmar Sandoval seq_printf(m, "wake_index=%d\n", atomic_read(&sbq->wake_index)); 57224af1ccfSOmar Sandoval 57324af1ccfSOmar Sandoval seq_puts(m, "ws={\n"); 57424af1ccfSOmar Sandoval for (i = 0; i < SBQ_WAIT_QUEUES; i++) { 57524af1ccfSOmar Sandoval struct sbq_wait_state *ws = &sbq->ws[i]; 57624af1ccfSOmar Sandoval 57724af1ccfSOmar Sandoval seq_printf(m, "\t{.wait_cnt=%d, .wait=%s},\n", 57824af1ccfSOmar Sandoval atomic_read(&ws->wait_cnt), 57924af1ccfSOmar Sandoval waitqueue_active(&ws->wait) ? "active" : "inactive"); 58024af1ccfSOmar Sandoval } 58124af1ccfSOmar Sandoval seq_puts(m, "}\n"); 58224af1ccfSOmar Sandoval 58324af1ccfSOmar Sandoval seq_printf(m, "round_robin=%d\n", sbq->round_robin); 584a3275539SOmar Sandoval seq_printf(m, "min_shallow_depth=%u\n", sbq->min_shallow_depth); 58524af1ccfSOmar Sandoval } 58624af1ccfSOmar Sandoval EXPORT_SYMBOL_GPL(sbitmap_queue_show); 587