1 /* 2 * Copyright (C) 2016 Facebook 3 * Copyright (C) 2013-2014 Jens Axboe 4 * 5 * This program is free software; you can redistribute it and/or 6 * modify it under the terms of the GNU General Public 7 * License v2 as published by the Free Software Foundation. 8 * 9 * This program is distributed in the hope that it will be useful, 10 * but WITHOUT ANY WARRANTY; without even the implied warranty of 11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 12 * General Public License for more details. 13 * 14 * You should have received a copy of the GNU General Public License 15 * along with this program. If not, see <https://www.gnu.org/licenses/>. 16 */ 17 18 #include <linux/sched.h> 19 #include <linux/random.h> 20 #include <linux/sbitmap.h> 21 #include <linux/seq_file.h> 22 23 int sbitmap_init_node(struct sbitmap *sb, unsigned int depth, int shift, 24 gfp_t flags, int node) 25 { 26 unsigned int bits_per_word; 27 unsigned int i; 28 29 if (shift < 0) { 30 shift = ilog2(BITS_PER_LONG); 31 /* 32 * If the bitmap is small, shrink the number of bits per word so 33 * we spread over a few cachelines, at least. If less than 4 34 * bits, just forget about it, it's not going to work optimally 35 * anyway. 36 */ 37 if (depth >= 4) { 38 while ((4U << shift) > depth) 39 shift--; 40 } 41 } 42 bits_per_word = 1U << shift; 43 if (bits_per_word > BITS_PER_LONG) 44 return -EINVAL; 45 46 sb->shift = shift; 47 sb->depth = depth; 48 sb->map_nr = DIV_ROUND_UP(sb->depth, bits_per_word); 49 50 if (depth == 0) { 51 sb->map = NULL; 52 return 0; 53 } 54 55 sb->map = kcalloc_node(sb->map_nr, sizeof(*sb->map), flags, node); 56 if (!sb->map) 57 return -ENOMEM; 58 59 for (i = 0; i < sb->map_nr; i++) { 60 sb->map[i].depth = min(depth, bits_per_word); 61 depth -= sb->map[i].depth; 62 } 63 return 0; 64 } 65 EXPORT_SYMBOL_GPL(sbitmap_init_node); 66 67 void sbitmap_resize(struct sbitmap *sb, unsigned int depth) 68 { 69 unsigned int bits_per_word = 1U << sb->shift; 70 unsigned int i; 71 72 sb->depth = depth; 73 sb->map_nr = DIV_ROUND_UP(sb->depth, bits_per_word); 74 75 for (i = 0; i < sb->map_nr; i++) { 76 sb->map[i].depth = min(depth, bits_per_word); 77 depth -= sb->map[i].depth; 78 } 79 } 80 EXPORT_SYMBOL_GPL(sbitmap_resize); 81 82 static int __sbitmap_get_word(unsigned long *word, unsigned long depth, 83 unsigned int hint, bool wrap) 84 { 85 unsigned int orig_hint = hint; 86 int nr; 87 88 while (1) { 89 nr = find_next_zero_bit(word, depth, hint); 90 if (unlikely(nr >= depth)) { 91 /* 92 * We started with an offset, and we didn't reset the 93 * offset to 0 in a failure case, so start from 0 to 94 * exhaust the map. 95 */ 96 if (orig_hint && hint && wrap) { 97 hint = orig_hint = 0; 98 continue; 99 } 100 return -1; 101 } 102 103 if (!test_and_set_bit_lock(nr, word)) 104 break; 105 106 hint = nr + 1; 107 if (hint >= depth - 1) 108 hint = 0; 109 } 110 111 return nr; 112 } 113 114 int sbitmap_get(struct sbitmap *sb, unsigned int alloc_hint, bool round_robin) 115 { 116 unsigned int i, index; 117 int nr = -1; 118 119 index = SB_NR_TO_INDEX(sb, alloc_hint); 120 121 for (i = 0; i < sb->map_nr; i++) { 122 nr = __sbitmap_get_word(&sb->map[index].word, 123 sb->map[index].depth, 124 SB_NR_TO_BIT(sb, alloc_hint), 125 !round_robin); 126 if (nr != -1) { 127 nr += index << sb->shift; 128 break; 129 } 130 131 /* Jump to next index. */ 132 index++; 133 alloc_hint = index << sb->shift; 134 135 if (index >= sb->map_nr) { 136 index = 0; 137 alloc_hint = 0; 138 } 139 } 140 141 return nr; 142 } 143 EXPORT_SYMBOL_GPL(sbitmap_get); 144 145 int sbitmap_get_shallow(struct sbitmap *sb, unsigned int alloc_hint, 146 unsigned long shallow_depth) 147 { 148 unsigned int i, index; 149 int nr = -1; 150 151 index = SB_NR_TO_INDEX(sb, alloc_hint); 152 153 for (i = 0; i < sb->map_nr; i++) { 154 nr = __sbitmap_get_word(&sb->map[index].word, 155 min(sb->map[index].depth, shallow_depth), 156 SB_NR_TO_BIT(sb, alloc_hint), true); 157 if (nr != -1) { 158 nr += index << sb->shift; 159 break; 160 } 161 162 /* Jump to next index. */ 163 index++; 164 alloc_hint = index << sb->shift; 165 166 if (index >= sb->map_nr) { 167 index = 0; 168 alloc_hint = 0; 169 } 170 } 171 172 return nr; 173 } 174 EXPORT_SYMBOL_GPL(sbitmap_get_shallow); 175 176 bool sbitmap_any_bit_set(const struct sbitmap *sb) 177 { 178 unsigned int i; 179 180 for (i = 0; i < sb->map_nr; i++) { 181 if (sb->map[i].word) 182 return true; 183 } 184 return false; 185 } 186 EXPORT_SYMBOL_GPL(sbitmap_any_bit_set); 187 188 bool sbitmap_any_bit_clear(const struct sbitmap *sb) 189 { 190 unsigned int i; 191 192 for (i = 0; i < sb->map_nr; i++) { 193 const struct sbitmap_word *word = &sb->map[i]; 194 unsigned long ret; 195 196 ret = find_first_zero_bit(&word->word, word->depth); 197 if (ret < word->depth) 198 return true; 199 } 200 return false; 201 } 202 EXPORT_SYMBOL_GPL(sbitmap_any_bit_clear); 203 204 unsigned int sbitmap_weight(const struct sbitmap *sb) 205 { 206 unsigned int i, weight = 0; 207 208 for (i = 0; i < sb->map_nr; i++) { 209 const struct sbitmap_word *word = &sb->map[i]; 210 211 weight += bitmap_weight(&word->word, word->depth); 212 } 213 return weight; 214 } 215 EXPORT_SYMBOL_GPL(sbitmap_weight); 216 217 void sbitmap_show(struct sbitmap *sb, struct seq_file *m) 218 { 219 seq_printf(m, "depth=%u\n", sb->depth); 220 seq_printf(m, "busy=%u\n", sbitmap_weight(sb)); 221 seq_printf(m, "bits_per_word=%u\n", 1U << sb->shift); 222 seq_printf(m, "map_nr=%u\n", sb->map_nr); 223 } 224 EXPORT_SYMBOL_GPL(sbitmap_show); 225 226 static inline void emit_byte(struct seq_file *m, unsigned int offset, u8 byte) 227 { 228 if ((offset & 0xf) == 0) { 229 if (offset != 0) 230 seq_putc(m, '\n'); 231 seq_printf(m, "%08x:", offset); 232 } 233 if ((offset & 0x1) == 0) 234 seq_putc(m, ' '); 235 seq_printf(m, "%02x", byte); 236 } 237 238 void sbitmap_bitmap_show(struct sbitmap *sb, struct seq_file *m) 239 { 240 u8 byte = 0; 241 unsigned int byte_bits = 0; 242 unsigned int offset = 0; 243 int i; 244 245 for (i = 0; i < sb->map_nr; i++) { 246 unsigned long word = READ_ONCE(sb->map[i].word); 247 unsigned int word_bits = READ_ONCE(sb->map[i].depth); 248 249 while (word_bits > 0) { 250 unsigned int bits = min(8 - byte_bits, word_bits); 251 252 byte |= (word & (BIT(bits) - 1)) << byte_bits; 253 byte_bits += bits; 254 if (byte_bits == 8) { 255 emit_byte(m, offset, byte); 256 byte = 0; 257 byte_bits = 0; 258 offset++; 259 } 260 word >>= bits; 261 word_bits -= bits; 262 } 263 } 264 if (byte_bits) { 265 emit_byte(m, offset, byte); 266 offset++; 267 } 268 if (offset) 269 seq_putc(m, '\n'); 270 } 271 EXPORT_SYMBOL_GPL(sbitmap_bitmap_show); 272 273 static unsigned int sbq_calc_wake_batch(struct sbitmap_queue *sbq, 274 unsigned int depth) 275 { 276 unsigned int wake_batch; 277 unsigned int shallow_depth; 278 279 /* 280 * For each batch, we wake up one queue. We need to make sure that our 281 * batch size is small enough that the full depth of the bitmap, 282 * potentially limited by a shallow depth, is enough to wake up all of 283 * the queues. 284 * 285 * Each full word of the bitmap has bits_per_word bits, and there might 286 * be a partial word. There are depth / bits_per_word full words and 287 * depth % bits_per_word bits left over. In bitwise arithmetic: 288 * 289 * bits_per_word = 1 << shift 290 * depth / bits_per_word = depth >> shift 291 * depth % bits_per_word = depth & ((1 << shift) - 1) 292 * 293 * Each word can be limited to sbq->min_shallow_depth bits. 294 */ 295 shallow_depth = min(1U << sbq->sb.shift, sbq->min_shallow_depth); 296 depth = ((depth >> sbq->sb.shift) * shallow_depth + 297 min(depth & ((1U << sbq->sb.shift) - 1), shallow_depth)); 298 wake_batch = clamp_t(unsigned int, depth / SBQ_WAIT_QUEUES, 1, 299 SBQ_WAKE_BATCH); 300 301 return wake_batch; 302 } 303 304 int sbitmap_queue_init_node(struct sbitmap_queue *sbq, unsigned int depth, 305 int shift, bool round_robin, gfp_t flags, int node) 306 { 307 int ret; 308 int i; 309 310 ret = sbitmap_init_node(&sbq->sb, depth, shift, flags, node); 311 if (ret) 312 return ret; 313 314 sbq->alloc_hint = alloc_percpu_gfp(unsigned int, flags); 315 if (!sbq->alloc_hint) { 316 sbitmap_free(&sbq->sb); 317 return -ENOMEM; 318 } 319 320 if (depth && !round_robin) { 321 for_each_possible_cpu(i) 322 *per_cpu_ptr(sbq->alloc_hint, i) = prandom_u32() % depth; 323 } 324 325 sbq->min_shallow_depth = UINT_MAX; 326 sbq->wake_batch = sbq_calc_wake_batch(sbq, depth); 327 atomic_set(&sbq->wake_index, 0); 328 329 sbq->ws = kzalloc_node(SBQ_WAIT_QUEUES * sizeof(*sbq->ws), flags, node); 330 if (!sbq->ws) { 331 free_percpu(sbq->alloc_hint); 332 sbitmap_free(&sbq->sb); 333 return -ENOMEM; 334 } 335 336 for (i = 0; i < SBQ_WAIT_QUEUES; i++) { 337 init_waitqueue_head(&sbq->ws[i].wait); 338 atomic_set(&sbq->ws[i].wait_cnt, sbq->wake_batch); 339 } 340 341 sbq->round_robin = round_robin; 342 return 0; 343 } 344 EXPORT_SYMBOL_GPL(sbitmap_queue_init_node); 345 346 static void sbitmap_queue_update_wake_batch(struct sbitmap_queue *sbq, 347 unsigned int depth) 348 { 349 unsigned int wake_batch = sbq_calc_wake_batch(sbq, depth); 350 int i; 351 352 if (sbq->wake_batch != wake_batch) { 353 WRITE_ONCE(sbq->wake_batch, wake_batch); 354 /* 355 * Pairs with the memory barrier in sbitmap_queue_wake_up() 356 * to ensure that the batch size is updated before the wait 357 * counts. 358 */ 359 smp_mb__before_atomic(); 360 for (i = 0; i < SBQ_WAIT_QUEUES; i++) 361 atomic_set(&sbq->ws[i].wait_cnt, 1); 362 } 363 } 364 365 void sbitmap_queue_resize(struct sbitmap_queue *sbq, unsigned int depth) 366 { 367 sbitmap_queue_update_wake_batch(sbq, depth); 368 sbitmap_resize(&sbq->sb, depth); 369 } 370 EXPORT_SYMBOL_GPL(sbitmap_queue_resize); 371 372 int __sbitmap_queue_get(struct sbitmap_queue *sbq) 373 { 374 unsigned int hint, depth; 375 int nr; 376 377 hint = this_cpu_read(*sbq->alloc_hint); 378 depth = READ_ONCE(sbq->sb.depth); 379 if (unlikely(hint >= depth)) { 380 hint = depth ? prandom_u32() % depth : 0; 381 this_cpu_write(*sbq->alloc_hint, hint); 382 } 383 nr = sbitmap_get(&sbq->sb, hint, sbq->round_robin); 384 385 if (nr == -1) { 386 /* If the map is full, a hint won't do us much good. */ 387 this_cpu_write(*sbq->alloc_hint, 0); 388 } else if (nr == hint || unlikely(sbq->round_robin)) { 389 /* Only update the hint if we used it. */ 390 hint = nr + 1; 391 if (hint >= depth - 1) 392 hint = 0; 393 this_cpu_write(*sbq->alloc_hint, hint); 394 } 395 396 return nr; 397 } 398 EXPORT_SYMBOL_GPL(__sbitmap_queue_get); 399 400 int __sbitmap_queue_get_shallow(struct sbitmap_queue *sbq, 401 unsigned int shallow_depth) 402 { 403 unsigned int hint, depth; 404 int nr; 405 406 WARN_ON_ONCE(shallow_depth < sbq->min_shallow_depth); 407 408 hint = this_cpu_read(*sbq->alloc_hint); 409 depth = READ_ONCE(sbq->sb.depth); 410 if (unlikely(hint >= depth)) { 411 hint = depth ? prandom_u32() % depth : 0; 412 this_cpu_write(*sbq->alloc_hint, hint); 413 } 414 nr = sbitmap_get_shallow(&sbq->sb, hint, shallow_depth); 415 416 if (nr == -1) { 417 /* If the map is full, a hint won't do us much good. */ 418 this_cpu_write(*sbq->alloc_hint, 0); 419 } else if (nr == hint || unlikely(sbq->round_robin)) { 420 /* Only update the hint if we used it. */ 421 hint = nr + 1; 422 if (hint >= depth - 1) 423 hint = 0; 424 this_cpu_write(*sbq->alloc_hint, hint); 425 } 426 427 return nr; 428 } 429 EXPORT_SYMBOL_GPL(__sbitmap_queue_get_shallow); 430 431 void sbitmap_queue_min_shallow_depth(struct sbitmap_queue *sbq, 432 unsigned int min_shallow_depth) 433 { 434 sbq->min_shallow_depth = min_shallow_depth; 435 sbitmap_queue_update_wake_batch(sbq, sbq->sb.depth); 436 } 437 EXPORT_SYMBOL_GPL(sbitmap_queue_min_shallow_depth); 438 439 static struct sbq_wait_state *sbq_wake_ptr(struct sbitmap_queue *sbq) 440 { 441 int i, wake_index; 442 443 wake_index = atomic_read(&sbq->wake_index); 444 for (i = 0; i < SBQ_WAIT_QUEUES; i++) { 445 struct sbq_wait_state *ws = &sbq->ws[wake_index]; 446 447 if (waitqueue_active(&ws->wait)) { 448 int o = atomic_read(&sbq->wake_index); 449 450 if (wake_index != o) 451 atomic_cmpxchg(&sbq->wake_index, o, wake_index); 452 return ws; 453 } 454 455 wake_index = sbq_index_inc(wake_index); 456 } 457 458 return NULL; 459 } 460 461 static bool __sbq_wake_up(struct sbitmap_queue *sbq) 462 { 463 struct sbq_wait_state *ws; 464 unsigned int wake_batch; 465 int wait_cnt; 466 467 ws = sbq_wake_ptr(sbq); 468 if (!ws) 469 return false; 470 471 wait_cnt = atomic_dec_return(&ws->wait_cnt); 472 if (wait_cnt <= 0) { 473 int ret; 474 475 wake_batch = READ_ONCE(sbq->wake_batch); 476 477 /* 478 * Pairs with the memory barrier in sbitmap_queue_resize() to 479 * ensure that we see the batch size update before the wait 480 * count is reset. 481 */ 482 smp_mb__before_atomic(); 483 484 /* 485 * For concurrent callers of this, the one that failed the 486 * atomic_cmpxhcg() race should call this function again 487 * to wakeup a new batch on a different 'ws'. 488 */ 489 ret = atomic_cmpxchg(&ws->wait_cnt, wait_cnt, wake_batch); 490 if (ret == wait_cnt) { 491 sbq_index_atomic_inc(&sbq->wake_index); 492 wake_up_nr(&ws->wait, wake_batch); 493 return false; 494 } 495 496 return true; 497 } 498 499 return false; 500 } 501 502 void sbitmap_queue_wake_up(struct sbitmap_queue *sbq) 503 { 504 while (__sbq_wake_up(sbq)) 505 ; 506 } 507 EXPORT_SYMBOL_GPL(sbitmap_queue_wake_up); 508 509 void sbitmap_queue_clear(struct sbitmap_queue *sbq, unsigned int nr, 510 unsigned int cpu) 511 { 512 sbitmap_clear_bit_unlock(&sbq->sb, nr); 513 /* 514 * Pairs with the memory barrier in set_current_state() to ensure the 515 * proper ordering of clear_bit_unlock()/waitqueue_active() in the waker 516 * and test_and_set_bit_lock()/prepare_to_wait()/finish_wait() in the 517 * waiter. See the comment on waitqueue_active(). 518 */ 519 smp_mb__after_atomic(); 520 sbitmap_queue_wake_up(sbq); 521 522 if (likely(!sbq->round_robin && nr < sbq->sb.depth)) 523 *per_cpu_ptr(sbq->alloc_hint, cpu) = nr; 524 } 525 EXPORT_SYMBOL_GPL(sbitmap_queue_clear); 526 527 void sbitmap_queue_wake_all(struct sbitmap_queue *sbq) 528 { 529 int i, wake_index; 530 531 /* 532 * Pairs with the memory barrier in set_current_state() like in 533 * sbitmap_queue_wake_up(). 534 */ 535 smp_mb(); 536 wake_index = atomic_read(&sbq->wake_index); 537 for (i = 0; i < SBQ_WAIT_QUEUES; i++) { 538 struct sbq_wait_state *ws = &sbq->ws[wake_index]; 539 540 if (waitqueue_active(&ws->wait)) 541 wake_up(&ws->wait); 542 543 wake_index = sbq_index_inc(wake_index); 544 } 545 } 546 EXPORT_SYMBOL_GPL(sbitmap_queue_wake_all); 547 548 void sbitmap_queue_show(struct sbitmap_queue *sbq, struct seq_file *m) 549 { 550 bool first; 551 int i; 552 553 sbitmap_show(&sbq->sb, m); 554 555 seq_puts(m, "alloc_hint={"); 556 first = true; 557 for_each_possible_cpu(i) { 558 if (!first) 559 seq_puts(m, ", "); 560 first = false; 561 seq_printf(m, "%u", *per_cpu_ptr(sbq->alloc_hint, i)); 562 } 563 seq_puts(m, "}\n"); 564 565 seq_printf(m, "wake_batch=%u\n", sbq->wake_batch); 566 seq_printf(m, "wake_index=%d\n", atomic_read(&sbq->wake_index)); 567 568 seq_puts(m, "ws={\n"); 569 for (i = 0; i < SBQ_WAIT_QUEUES; i++) { 570 struct sbq_wait_state *ws = &sbq->ws[i]; 571 572 seq_printf(m, "\t{.wait_cnt=%d, .wait=%s},\n", 573 atomic_read(&ws->wait_cnt), 574 waitqueue_active(&ws->wait) ? "active" : "inactive"); 575 } 576 seq_puts(m, "}\n"); 577 578 seq_printf(m, "round_robin=%d\n", sbq->round_robin); 579 seq_printf(m, "min_shallow_depth=%u\n", sbq->min_shallow_depth); 580 } 581 EXPORT_SYMBOL_GPL(sbitmap_queue_show); 582