1 /* 2 * Copyright (C) 2016 Facebook 3 * Copyright (C) 2013-2014 Jens Axboe 4 * 5 * This program is free software; you can redistribute it and/or 6 * modify it under the terms of the GNU General Public 7 * License v2 as published by the Free Software Foundation. 8 * 9 * This program is distributed in the hope that it will be useful, 10 * but WITHOUT ANY WARRANTY; without even the implied warranty of 11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 12 * General Public License for more details. 13 * 14 * You should have received a copy of the GNU General Public License 15 * along with this program. If not, see <https://www.gnu.org/licenses/>. 16 */ 17 18 #include <linux/sched.h> 19 #include <linux/random.h> 20 #include <linux/sbitmap.h> 21 #include <linux/seq_file.h> 22 23 int sbitmap_init_node(struct sbitmap *sb, unsigned int depth, int shift, 24 gfp_t flags, int node) 25 { 26 unsigned int bits_per_word; 27 unsigned int i; 28 29 if (shift < 0) { 30 shift = ilog2(BITS_PER_LONG); 31 /* 32 * If the bitmap is small, shrink the number of bits per word so 33 * we spread over a few cachelines, at least. If less than 4 34 * bits, just forget about it, it's not going to work optimally 35 * anyway. 36 */ 37 if (depth >= 4) { 38 while ((4U << shift) > depth) 39 shift--; 40 } 41 } 42 bits_per_word = 1U << shift; 43 if (bits_per_word > BITS_PER_LONG) 44 return -EINVAL; 45 46 sb->shift = shift; 47 sb->depth = depth; 48 sb->map_nr = DIV_ROUND_UP(sb->depth, bits_per_word); 49 50 if (depth == 0) { 51 sb->map = NULL; 52 return 0; 53 } 54 55 sb->map = kzalloc_node(sb->map_nr * sizeof(*sb->map), flags, node); 56 if (!sb->map) 57 return -ENOMEM; 58 59 for (i = 0; i < sb->map_nr; i++) { 60 sb->map[i].depth = min(depth, bits_per_word); 61 depth -= sb->map[i].depth; 62 } 63 return 0; 64 } 65 EXPORT_SYMBOL_GPL(sbitmap_init_node); 66 67 void sbitmap_resize(struct sbitmap *sb, unsigned int depth) 68 { 69 unsigned int bits_per_word = 1U << sb->shift; 70 unsigned int i; 71 72 sb->depth = depth; 73 sb->map_nr = DIV_ROUND_UP(sb->depth, bits_per_word); 74 75 for (i = 0; i < sb->map_nr; i++) { 76 sb->map[i].depth = min(depth, bits_per_word); 77 depth -= sb->map[i].depth; 78 } 79 } 80 EXPORT_SYMBOL_GPL(sbitmap_resize); 81 82 static int __sbitmap_get_word(struct sbitmap_word *word, unsigned int hint, 83 bool wrap) 84 { 85 unsigned int orig_hint = hint; 86 int nr; 87 88 while (1) { 89 nr = find_next_zero_bit(&word->word, word->depth, hint); 90 if (unlikely(nr >= word->depth)) { 91 /* 92 * We started with an offset, and we didn't reset the 93 * offset to 0 in a failure case, so start from 0 to 94 * exhaust the map. 95 */ 96 if (orig_hint && hint && wrap) { 97 hint = orig_hint = 0; 98 continue; 99 } 100 return -1; 101 } 102 103 if (!test_and_set_bit(nr, &word->word)) 104 break; 105 106 hint = nr + 1; 107 if (hint >= word->depth - 1) 108 hint = 0; 109 } 110 111 return nr; 112 } 113 114 int sbitmap_get(struct sbitmap *sb, unsigned int alloc_hint, bool round_robin) 115 { 116 unsigned int i, index; 117 int nr = -1; 118 119 index = SB_NR_TO_INDEX(sb, alloc_hint); 120 121 for (i = 0; i < sb->map_nr; i++) { 122 nr = __sbitmap_get_word(&sb->map[index], 123 SB_NR_TO_BIT(sb, alloc_hint), 124 !round_robin); 125 if (nr != -1) { 126 nr += index << sb->shift; 127 break; 128 } 129 130 /* Jump to next index. */ 131 index++; 132 alloc_hint = index << sb->shift; 133 134 if (index >= sb->map_nr) { 135 index = 0; 136 alloc_hint = 0; 137 } 138 } 139 140 return nr; 141 } 142 EXPORT_SYMBOL_GPL(sbitmap_get); 143 144 bool sbitmap_any_bit_set(const struct sbitmap *sb) 145 { 146 unsigned int i; 147 148 for (i = 0; i < sb->map_nr; i++) { 149 if (sb->map[i].word) 150 return true; 151 } 152 return false; 153 } 154 EXPORT_SYMBOL_GPL(sbitmap_any_bit_set); 155 156 bool sbitmap_any_bit_clear(const struct sbitmap *sb) 157 { 158 unsigned int i; 159 160 for (i = 0; i < sb->map_nr; i++) { 161 const struct sbitmap_word *word = &sb->map[i]; 162 unsigned long ret; 163 164 ret = find_first_zero_bit(&word->word, word->depth); 165 if (ret < word->depth) 166 return true; 167 } 168 return false; 169 } 170 EXPORT_SYMBOL_GPL(sbitmap_any_bit_clear); 171 172 unsigned int sbitmap_weight(const struct sbitmap *sb) 173 { 174 unsigned int i, weight = 0; 175 176 for (i = 0; i < sb->map_nr; i++) { 177 const struct sbitmap_word *word = &sb->map[i]; 178 179 weight += bitmap_weight(&word->word, word->depth); 180 } 181 return weight; 182 } 183 EXPORT_SYMBOL_GPL(sbitmap_weight); 184 185 void sbitmap_show(struct sbitmap *sb, struct seq_file *m) 186 { 187 seq_printf(m, "depth=%u\n", sb->depth); 188 seq_printf(m, "busy=%u\n", sbitmap_weight(sb)); 189 seq_printf(m, "bits_per_word=%u\n", 1U << sb->shift); 190 seq_printf(m, "map_nr=%u\n", sb->map_nr); 191 } 192 EXPORT_SYMBOL_GPL(sbitmap_show); 193 194 static inline void emit_byte(struct seq_file *m, unsigned int offset, u8 byte) 195 { 196 if ((offset & 0xf) == 0) { 197 if (offset != 0) 198 seq_putc(m, '\n'); 199 seq_printf(m, "%08x:", offset); 200 } 201 if ((offset & 0x1) == 0) 202 seq_putc(m, ' '); 203 seq_printf(m, "%02x", byte); 204 } 205 206 void sbitmap_bitmap_show(struct sbitmap *sb, struct seq_file *m) 207 { 208 u8 byte = 0; 209 unsigned int byte_bits = 0; 210 unsigned int offset = 0; 211 int i; 212 213 for (i = 0; i < sb->map_nr; i++) { 214 unsigned long word = READ_ONCE(sb->map[i].word); 215 unsigned int word_bits = READ_ONCE(sb->map[i].depth); 216 217 while (word_bits > 0) { 218 unsigned int bits = min(8 - byte_bits, word_bits); 219 220 byte |= (word & (BIT(bits) - 1)) << byte_bits; 221 byte_bits += bits; 222 if (byte_bits == 8) { 223 emit_byte(m, offset, byte); 224 byte = 0; 225 byte_bits = 0; 226 offset++; 227 } 228 word >>= bits; 229 word_bits -= bits; 230 } 231 } 232 if (byte_bits) { 233 emit_byte(m, offset, byte); 234 offset++; 235 } 236 if (offset) 237 seq_putc(m, '\n'); 238 } 239 EXPORT_SYMBOL_GPL(sbitmap_bitmap_show); 240 241 static unsigned int sbq_calc_wake_batch(unsigned int depth) 242 { 243 unsigned int wake_batch; 244 245 /* 246 * For each batch, we wake up one queue. We need to make sure that our 247 * batch size is small enough that the full depth of the bitmap is 248 * enough to wake up all of the queues. 249 */ 250 wake_batch = SBQ_WAKE_BATCH; 251 if (wake_batch > depth / SBQ_WAIT_QUEUES) 252 wake_batch = max(1U, depth / SBQ_WAIT_QUEUES); 253 254 return wake_batch; 255 } 256 257 int sbitmap_queue_init_node(struct sbitmap_queue *sbq, unsigned int depth, 258 int shift, bool round_robin, gfp_t flags, int node) 259 { 260 int ret; 261 int i; 262 263 ret = sbitmap_init_node(&sbq->sb, depth, shift, flags, node); 264 if (ret) 265 return ret; 266 267 sbq->alloc_hint = alloc_percpu_gfp(unsigned int, flags); 268 if (!sbq->alloc_hint) { 269 sbitmap_free(&sbq->sb); 270 return -ENOMEM; 271 } 272 273 if (depth && !round_robin) { 274 for_each_possible_cpu(i) 275 *per_cpu_ptr(sbq->alloc_hint, i) = prandom_u32() % depth; 276 } 277 278 sbq->wake_batch = sbq_calc_wake_batch(depth); 279 atomic_set(&sbq->wake_index, 0); 280 281 sbq->ws = kzalloc_node(SBQ_WAIT_QUEUES * sizeof(*sbq->ws), flags, node); 282 if (!sbq->ws) { 283 free_percpu(sbq->alloc_hint); 284 sbitmap_free(&sbq->sb); 285 return -ENOMEM; 286 } 287 288 for (i = 0; i < SBQ_WAIT_QUEUES; i++) { 289 init_waitqueue_head(&sbq->ws[i].wait); 290 atomic_set(&sbq->ws[i].wait_cnt, sbq->wake_batch); 291 } 292 293 sbq->round_robin = round_robin; 294 return 0; 295 } 296 EXPORT_SYMBOL_GPL(sbitmap_queue_init_node); 297 298 void sbitmap_queue_resize(struct sbitmap_queue *sbq, unsigned int depth) 299 { 300 unsigned int wake_batch = sbq_calc_wake_batch(depth); 301 int i; 302 303 if (sbq->wake_batch != wake_batch) { 304 WRITE_ONCE(sbq->wake_batch, wake_batch); 305 /* 306 * Pairs with the memory barrier in sbq_wake_up() to ensure that 307 * the batch size is updated before the wait counts. 308 */ 309 smp_mb__before_atomic(); 310 for (i = 0; i < SBQ_WAIT_QUEUES; i++) 311 atomic_set(&sbq->ws[i].wait_cnt, 1); 312 } 313 sbitmap_resize(&sbq->sb, depth); 314 } 315 EXPORT_SYMBOL_GPL(sbitmap_queue_resize); 316 317 int __sbitmap_queue_get(struct sbitmap_queue *sbq) 318 { 319 unsigned int hint, depth; 320 int nr; 321 322 hint = this_cpu_read(*sbq->alloc_hint); 323 depth = READ_ONCE(sbq->sb.depth); 324 if (unlikely(hint >= depth)) { 325 hint = depth ? prandom_u32() % depth : 0; 326 this_cpu_write(*sbq->alloc_hint, hint); 327 } 328 nr = sbitmap_get(&sbq->sb, hint, sbq->round_robin); 329 330 if (nr == -1) { 331 /* If the map is full, a hint won't do us much good. */ 332 this_cpu_write(*sbq->alloc_hint, 0); 333 } else if (nr == hint || unlikely(sbq->round_robin)) { 334 /* Only update the hint if we used it. */ 335 hint = nr + 1; 336 if (hint >= depth - 1) 337 hint = 0; 338 this_cpu_write(*sbq->alloc_hint, hint); 339 } 340 341 return nr; 342 } 343 EXPORT_SYMBOL_GPL(__sbitmap_queue_get); 344 345 static struct sbq_wait_state *sbq_wake_ptr(struct sbitmap_queue *sbq) 346 { 347 int i, wake_index; 348 349 wake_index = atomic_read(&sbq->wake_index); 350 for (i = 0; i < SBQ_WAIT_QUEUES; i++) { 351 struct sbq_wait_state *ws = &sbq->ws[wake_index]; 352 353 if (waitqueue_active(&ws->wait)) { 354 int o = atomic_read(&sbq->wake_index); 355 356 if (wake_index != o) 357 atomic_cmpxchg(&sbq->wake_index, o, wake_index); 358 return ws; 359 } 360 361 wake_index = sbq_index_inc(wake_index); 362 } 363 364 return NULL; 365 } 366 367 static void sbq_wake_up(struct sbitmap_queue *sbq) 368 { 369 struct sbq_wait_state *ws; 370 unsigned int wake_batch; 371 int wait_cnt; 372 373 /* 374 * Pairs with the memory barrier in set_current_state() to ensure the 375 * proper ordering of clear_bit()/waitqueue_active() in the waker and 376 * test_and_set_bit()/prepare_to_wait()/finish_wait() in the waiter. See 377 * the comment on waitqueue_active(). This is __after_atomic because we 378 * just did clear_bit() in the caller. 379 */ 380 smp_mb__after_atomic(); 381 382 ws = sbq_wake_ptr(sbq); 383 if (!ws) 384 return; 385 386 wait_cnt = atomic_dec_return(&ws->wait_cnt); 387 if (wait_cnt <= 0) { 388 wake_batch = READ_ONCE(sbq->wake_batch); 389 /* 390 * Pairs with the memory barrier in sbitmap_queue_resize() to 391 * ensure that we see the batch size update before the wait 392 * count is reset. 393 */ 394 smp_mb__before_atomic(); 395 /* 396 * If there are concurrent callers to sbq_wake_up(), the last 397 * one to decrement the wait count below zero will bump it back 398 * up. If there is a concurrent resize, the count reset will 399 * either cause the cmpxchg to fail or overwrite after the 400 * cmpxchg. 401 */ 402 atomic_cmpxchg(&ws->wait_cnt, wait_cnt, wait_cnt + wake_batch); 403 sbq_index_atomic_inc(&sbq->wake_index); 404 wake_up(&ws->wait); 405 } 406 } 407 408 void sbitmap_queue_clear(struct sbitmap_queue *sbq, unsigned int nr, 409 unsigned int cpu) 410 { 411 sbitmap_clear_bit(&sbq->sb, nr); 412 sbq_wake_up(sbq); 413 if (likely(!sbq->round_robin && nr < sbq->sb.depth)) 414 *per_cpu_ptr(sbq->alloc_hint, cpu) = nr; 415 } 416 EXPORT_SYMBOL_GPL(sbitmap_queue_clear); 417 418 void sbitmap_queue_wake_all(struct sbitmap_queue *sbq) 419 { 420 int i, wake_index; 421 422 /* 423 * Pairs with the memory barrier in set_current_state() like in 424 * sbq_wake_up(). 425 */ 426 smp_mb(); 427 wake_index = atomic_read(&sbq->wake_index); 428 for (i = 0; i < SBQ_WAIT_QUEUES; i++) { 429 struct sbq_wait_state *ws = &sbq->ws[wake_index]; 430 431 if (waitqueue_active(&ws->wait)) 432 wake_up(&ws->wait); 433 434 wake_index = sbq_index_inc(wake_index); 435 } 436 } 437 EXPORT_SYMBOL_GPL(sbitmap_queue_wake_all); 438 439 void sbitmap_queue_show(struct sbitmap_queue *sbq, struct seq_file *m) 440 { 441 bool first; 442 int i; 443 444 sbitmap_show(&sbq->sb, m); 445 446 seq_puts(m, "alloc_hint={"); 447 first = true; 448 for_each_possible_cpu(i) { 449 if (!first) 450 seq_puts(m, ", "); 451 first = false; 452 seq_printf(m, "%u", *per_cpu_ptr(sbq->alloc_hint, i)); 453 } 454 seq_puts(m, "}\n"); 455 456 seq_printf(m, "wake_batch=%u\n", sbq->wake_batch); 457 seq_printf(m, "wake_index=%d\n", atomic_read(&sbq->wake_index)); 458 459 seq_puts(m, "ws={\n"); 460 for (i = 0; i < SBQ_WAIT_QUEUES; i++) { 461 struct sbq_wait_state *ws = &sbq->ws[i]; 462 463 seq_printf(m, "\t{.wait_cnt=%d, .wait=%s},\n", 464 atomic_read(&ws->wait_cnt), 465 waitqueue_active(&ws->wait) ? "active" : "inactive"); 466 } 467 seq_puts(m, "}\n"); 468 469 seq_printf(m, "round_robin=%d\n", sbq->round_robin); 470 } 471 EXPORT_SYMBOL_GPL(sbitmap_queue_show); 472