1 /* 2 * Copyright (C) 2016 Facebook 3 * Copyright (C) 2013-2014 Jens Axboe 4 * 5 * This program is free software; you can redistribute it and/or 6 * modify it under the terms of the GNU General Public 7 * License v2 as published by the Free Software Foundation. 8 * 9 * This program is distributed in the hope that it will be useful, 10 * but WITHOUT ANY WARRANTY; without even the implied warranty of 11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 12 * General Public License for more details. 13 * 14 * You should have received a copy of the GNU General Public License 15 * along with this program. If not, see <https://www.gnu.org/licenses/>. 16 */ 17 18 #include <linux/sched.h> 19 #include <linux/random.h> 20 #include <linux/sbitmap.h> 21 #include <linux/seq_file.h> 22 23 int sbitmap_init_node(struct sbitmap *sb, unsigned int depth, int shift, 24 gfp_t flags, int node) 25 { 26 unsigned int bits_per_word; 27 unsigned int i; 28 29 if (shift < 0) { 30 shift = ilog2(BITS_PER_LONG); 31 /* 32 * If the bitmap is small, shrink the number of bits per word so 33 * we spread over a few cachelines, at least. If less than 4 34 * bits, just forget about it, it's not going to work optimally 35 * anyway. 36 */ 37 if (depth >= 4) { 38 while ((4U << shift) > depth) 39 shift--; 40 } 41 } 42 bits_per_word = 1U << shift; 43 if (bits_per_word > BITS_PER_LONG) 44 return -EINVAL; 45 46 sb->shift = shift; 47 sb->depth = depth; 48 sb->map_nr = DIV_ROUND_UP(sb->depth, bits_per_word); 49 50 if (depth == 0) { 51 sb->map = NULL; 52 return 0; 53 } 54 55 sb->map = kzalloc_node(sb->map_nr * sizeof(*sb->map), flags, node); 56 if (!sb->map) 57 return -ENOMEM; 58 59 for (i = 0; i < sb->map_nr; i++) { 60 sb->map[i].depth = min(depth, bits_per_word); 61 depth -= sb->map[i].depth; 62 } 63 return 0; 64 } 65 EXPORT_SYMBOL_GPL(sbitmap_init_node); 66 67 void sbitmap_resize(struct sbitmap *sb, unsigned int depth) 68 { 69 unsigned int bits_per_word = 1U << sb->shift; 70 unsigned int i; 71 72 sb->depth = depth; 73 sb->map_nr = DIV_ROUND_UP(sb->depth, bits_per_word); 74 75 for (i = 0; i < sb->map_nr; i++) { 76 sb->map[i].depth = min(depth, bits_per_word); 77 depth -= sb->map[i].depth; 78 } 79 } 80 EXPORT_SYMBOL_GPL(sbitmap_resize); 81 82 static int __sbitmap_get_word(unsigned long *word, unsigned long depth, 83 unsigned int hint, bool wrap) 84 { 85 unsigned int orig_hint = hint; 86 int nr; 87 88 while (1) { 89 nr = find_next_zero_bit(word, depth, hint); 90 if (unlikely(nr >= depth)) { 91 /* 92 * We started with an offset, and we didn't reset the 93 * offset to 0 in a failure case, so start from 0 to 94 * exhaust the map. 95 */ 96 if (orig_hint && hint && wrap) { 97 hint = orig_hint = 0; 98 continue; 99 } 100 return -1; 101 } 102 103 if (!test_and_set_bit(nr, word)) 104 break; 105 106 hint = nr + 1; 107 if (hint >= depth - 1) 108 hint = 0; 109 } 110 111 return nr; 112 } 113 114 int sbitmap_get(struct sbitmap *sb, unsigned int alloc_hint, bool round_robin) 115 { 116 unsigned int i, index; 117 int nr = -1; 118 119 index = SB_NR_TO_INDEX(sb, alloc_hint); 120 121 for (i = 0; i < sb->map_nr; i++) { 122 nr = __sbitmap_get_word(&sb->map[index].word, 123 sb->map[index].depth, 124 SB_NR_TO_BIT(sb, alloc_hint), 125 !round_robin); 126 if (nr != -1) { 127 nr += index << sb->shift; 128 break; 129 } 130 131 /* Jump to next index. */ 132 index++; 133 alloc_hint = index << sb->shift; 134 135 if (index >= sb->map_nr) { 136 index = 0; 137 alloc_hint = 0; 138 } 139 } 140 141 return nr; 142 } 143 EXPORT_SYMBOL_GPL(sbitmap_get); 144 145 int sbitmap_get_shallow(struct sbitmap *sb, unsigned int alloc_hint, 146 unsigned long shallow_depth) 147 { 148 unsigned int i, index; 149 int nr = -1; 150 151 index = SB_NR_TO_INDEX(sb, alloc_hint); 152 153 for (i = 0; i < sb->map_nr; i++) { 154 nr = __sbitmap_get_word(&sb->map[index].word, 155 min(sb->map[index].depth, shallow_depth), 156 SB_NR_TO_BIT(sb, alloc_hint), true); 157 if (nr != -1) { 158 nr += index << sb->shift; 159 break; 160 } 161 162 /* Jump to next index. */ 163 index++; 164 alloc_hint = index << sb->shift; 165 166 if (index >= sb->map_nr) { 167 index = 0; 168 alloc_hint = 0; 169 } 170 } 171 172 return nr; 173 } 174 EXPORT_SYMBOL_GPL(sbitmap_get_shallow); 175 176 bool sbitmap_any_bit_set(const struct sbitmap *sb) 177 { 178 unsigned int i; 179 180 for (i = 0; i < sb->map_nr; i++) { 181 if (sb->map[i].word) 182 return true; 183 } 184 return false; 185 } 186 EXPORT_SYMBOL_GPL(sbitmap_any_bit_set); 187 188 bool sbitmap_any_bit_clear(const struct sbitmap *sb) 189 { 190 unsigned int i; 191 192 for (i = 0; i < sb->map_nr; i++) { 193 const struct sbitmap_word *word = &sb->map[i]; 194 unsigned long ret; 195 196 ret = find_first_zero_bit(&word->word, word->depth); 197 if (ret < word->depth) 198 return true; 199 } 200 return false; 201 } 202 EXPORT_SYMBOL_GPL(sbitmap_any_bit_clear); 203 204 unsigned int sbitmap_weight(const struct sbitmap *sb) 205 { 206 unsigned int i, weight = 0; 207 208 for (i = 0; i < sb->map_nr; i++) { 209 const struct sbitmap_word *word = &sb->map[i]; 210 211 weight += bitmap_weight(&word->word, word->depth); 212 } 213 return weight; 214 } 215 EXPORT_SYMBOL_GPL(sbitmap_weight); 216 217 void sbitmap_show(struct sbitmap *sb, struct seq_file *m) 218 { 219 seq_printf(m, "depth=%u\n", sb->depth); 220 seq_printf(m, "busy=%u\n", sbitmap_weight(sb)); 221 seq_printf(m, "bits_per_word=%u\n", 1U << sb->shift); 222 seq_printf(m, "map_nr=%u\n", sb->map_nr); 223 } 224 EXPORT_SYMBOL_GPL(sbitmap_show); 225 226 static inline void emit_byte(struct seq_file *m, unsigned int offset, u8 byte) 227 { 228 if ((offset & 0xf) == 0) { 229 if (offset != 0) 230 seq_putc(m, '\n'); 231 seq_printf(m, "%08x:", offset); 232 } 233 if ((offset & 0x1) == 0) 234 seq_putc(m, ' '); 235 seq_printf(m, "%02x", byte); 236 } 237 238 void sbitmap_bitmap_show(struct sbitmap *sb, struct seq_file *m) 239 { 240 u8 byte = 0; 241 unsigned int byte_bits = 0; 242 unsigned int offset = 0; 243 int i; 244 245 for (i = 0; i < sb->map_nr; i++) { 246 unsigned long word = READ_ONCE(sb->map[i].word); 247 unsigned int word_bits = READ_ONCE(sb->map[i].depth); 248 249 while (word_bits > 0) { 250 unsigned int bits = min(8 - byte_bits, word_bits); 251 252 byte |= (word & (BIT(bits) - 1)) << byte_bits; 253 byte_bits += bits; 254 if (byte_bits == 8) { 255 emit_byte(m, offset, byte); 256 byte = 0; 257 byte_bits = 0; 258 offset++; 259 } 260 word >>= bits; 261 word_bits -= bits; 262 } 263 } 264 if (byte_bits) { 265 emit_byte(m, offset, byte); 266 offset++; 267 } 268 if (offset) 269 seq_putc(m, '\n'); 270 } 271 EXPORT_SYMBOL_GPL(sbitmap_bitmap_show); 272 273 static unsigned int sbq_calc_wake_batch(unsigned int depth) 274 { 275 unsigned int wake_batch; 276 277 /* 278 * For each batch, we wake up one queue. We need to make sure that our 279 * batch size is small enough that the full depth of the bitmap is 280 * enough to wake up all of the queues. 281 */ 282 wake_batch = SBQ_WAKE_BATCH; 283 if (wake_batch > depth / SBQ_WAIT_QUEUES) 284 wake_batch = max(1U, depth / SBQ_WAIT_QUEUES); 285 286 return wake_batch; 287 } 288 289 int sbitmap_queue_init_node(struct sbitmap_queue *sbq, unsigned int depth, 290 int shift, bool round_robin, gfp_t flags, int node) 291 { 292 int ret; 293 int i; 294 295 ret = sbitmap_init_node(&sbq->sb, depth, shift, flags, node); 296 if (ret) 297 return ret; 298 299 sbq->alloc_hint = alloc_percpu_gfp(unsigned int, flags); 300 if (!sbq->alloc_hint) { 301 sbitmap_free(&sbq->sb); 302 return -ENOMEM; 303 } 304 305 if (depth && !round_robin) { 306 for_each_possible_cpu(i) 307 *per_cpu_ptr(sbq->alloc_hint, i) = prandom_u32() % depth; 308 } 309 310 sbq->wake_batch = sbq_calc_wake_batch(depth); 311 atomic_set(&sbq->wake_index, 0); 312 313 sbq->ws = kzalloc_node(SBQ_WAIT_QUEUES * sizeof(*sbq->ws), flags, node); 314 if (!sbq->ws) { 315 free_percpu(sbq->alloc_hint); 316 sbitmap_free(&sbq->sb); 317 return -ENOMEM; 318 } 319 320 for (i = 0; i < SBQ_WAIT_QUEUES; i++) { 321 init_waitqueue_head(&sbq->ws[i].wait); 322 atomic_set(&sbq->ws[i].wait_cnt, sbq->wake_batch); 323 } 324 325 sbq->round_robin = round_robin; 326 return 0; 327 } 328 EXPORT_SYMBOL_GPL(sbitmap_queue_init_node); 329 330 void sbitmap_queue_resize(struct sbitmap_queue *sbq, unsigned int depth) 331 { 332 unsigned int wake_batch = sbq_calc_wake_batch(depth); 333 int i; 334 335 if (sbq->wake_batch != wake_batch) { 336 WRITE_ONCE(sbq->wake_batch, wake_batch); 337 /* 338 * Pairs with the memory barrier in sbq_wake_up() to ensure that 339 * the batch size is updated before the wait counts. 340 */ 341 smp_mb__before_atomic(); 342 for (i = 0; i < SBQ_WAIT_QUEUES; i++) 343 atomic_set(&sbq->ws[i].wait_cnt, 1); 344 } 345 sbitmap_resize(&sbq->sb, depth); 346 } 347 EXPORT_SYMBOL_GPL(sbitmap_queue_resize); 348 349 int __sbitmap_queue_get(struct sbitmap_queue *sbq) 350 { 351 unsigned int hint, depth; 352 int nr; 353 354 hint = this_cpu_read(*sbq->alloc_hint); 355 depth = READ_ONCE(sbq->sb.depth); 356 if (unlikely(hint >= depth)) { 357 hint = depth ? prandom_u32() % depth : 0; 358 this_cpu_write(*sbq->alloc_hint, hint); 359 } 360 nr = sbitmap_get(&sbq->sb, hint, sbq->round_robin); 361 362 if (nr == -1) { 363 /* If the map is full, a hint won't do us much good. */ 364 this_cpu_write(*sbq->alloc_hint, 0); 365 } else if (nr == hint || unlikely(sbq->round_robin)) { 366 /* Only update the hint if we used it. */ 367 hint = nr + 1; 368 if (hint >= depth - 1) 369 hint = 0; 370 this_cpu_write(*sbq->alloc_hint, hint); 371 } 372 373 return nr; 374 } 375 EXPORT_SYMBOL_GPL(__sbitmap_queue_get); 376 377 int __sbitmap_queue_get_shallow(struct sbitmap_queue *sbq, 378 unsigned int shallow_depth) 379 { 380 unsigned int hint, depth; 381 int nr; 382 383 hint = this_cpu_read(*sbq->alloc_hint); 384 depth = READ_ONCE(sbq->sb.depth); 385 if (unlikely(hint >= depth)) { 386 hint = depth ? prandom_u32() % depth : 0; 387 this_cpu_write(*sbq->alloc_hint, hint); 388 } 389 nr = sbitmap_get_shallow(&sbq->sb, hint, shallow_depth); 390 391 if (nr == -1) { 392 /* If the map is full, a hint won't do us much good. */ 393 this_cpu_write(*sbq->alloc_hint, 0); 394 } else if (nr == hint || unlikely(sbq->round_robin)) { 395 /* Only update the hint if we used it. */ 396 hint = nr + 1; 397 if (hint >= depth - 1) 398 hint = 0; 399 this_cpu_write(*sbq->alloc_hint, hint); 400 } 401 402 return nr; 403 } 404 EXPORT_SYMBOL_GPL(__sbitmap_queue_get_shallow); 405 406 static struct sbq_wait_state *sbq_wake_ptr(struct sbitmap_queue *sbq) 407 { 408 int i, wake_index; 409 410 wake_index = atomic_read(&sbq->wake_index); 411 for (i = 0; i < SBQ_WAIT_QUEUES; i++) { 412 struct sbq_wait_state *ws = &sbq->ws[wake_index]; 413 414 if (waitqueue_active(&ws->wait)) { 415 int o = atomic_read(&sbq->wake_index); 416 417 if (wake_index != o) 418 atomic_cmpxchg(&sbq->wake_index, o, wake_index); 419 return ws; 420 } 421 422 wake_index = sbq_index_inc(wake_index); 423 } 424 425 return NULL; 426 } 427 428 static void sbq_wake_up(struct sbitmap_queue *sbq) 429 { 430 struct sbq_wait_state *ws; 431 unsigned int wake_batch; 432 int wait_cnt; 433 434 /* 435 * Pairs with the memory barrier in set_current_state() to ensure the 436 * proper ordering of clear_bit()/waitqueue_active() in the waker and 437 * test_and_set_bit()/prepare_to_wait()/finish_wait() in the waiter. See 438 * the comment on waitqueue_active(). This is __after_atomic because we 439 * just did clear_bit() in the caller. 440 */ 441 smp_mb__after_atomic(); 442 443 ws = sbq_wake_ptr(sbq); 444 if (!ws) 445 return; 446 447 wait_cnt = atomic_dec_return(&ws->wait_cnt); 448 if (wait_cnt <= 0) { 449 wake_batch = READ_ONCE(sbq->wake_batch); 450 /* 451 * Pairs with the memory barrier in sbitmap_queue_resize() to 452 * ensure that we see the batch size update before the wait 453 * count is reset. 454 */ 455 smp_mb__before_atomic(); 456 /* 457 * If there are concurrent callers to sbq_wake_up(), the last 458 * one to decrement the wait count below zero will bump it back 459 * up. If there is a concurrent resize, the count reset will 460 * either cause the cmpxchg to fail or overwrite after the 461 * cmpxchg. 462 */ 463 atomic_cmpxchg(&ws->wait_cnt, wait_cnt, wait_cnt + wake_batch); 464 sbq_index_atomic_inc(&sbq->wake_index); 465 wake_up(&ws->wait); 466 } 467 } 468 469 void sbitmap_queue_clear(struct sbitmap_queue *sbq, unsigned int nr, 470 unsigned int cpu) 471 { 472 sbitmap_clear_bit(&sbq->sb, nr); 473 sbq_wake_up(sbq); 474 if (likely(!sbq->round_robin && nr < sbq->sb.depth)) 475 *per_cpu_ptr(sbq->alloc_hint, cpu) = nr; 476 } 477 EXPORT_SYMBOL_GPL(sbitmap_queue_clear); 478 479 void sbitmap_queue_wake_all(struct sbitmap_queue *sbq) 480 { 481 int i, wake_index; 482 483 /* 484 * Pairs with the memory barrier in set_current_state() like in 485 * sbq_wake_up(). 486 */ 487 smp_mb(); 488 wake_index = atomic_read(&sbq->wake_index); 489 for (i = 0; i < SBQ_WAIT_QUEUES; i++) { 490 struct sbq_wait_state *ws = &sbq->ws[wake_index]; 491 492 if (waitqueue_active(&ws->wait)) 493 wake_up(&ws->wait); 494 495 wake_index = sbq_index_inc(wake_index); 496 } 497 } 498 EXPORT_SYMBOL_GPL(sbitmap_queue_wake_all); 499 500 void sbitmap_queue_show(struct sbitmap_queue *sbq, struct seq_file *m) 501 { 502 bool first; 503 int i; 504 505 sbitmap_show(&sbq->sb, m); 506 507 seq_puts(m, "alloc_hint={"); 508 first = true; 509 for_each_possible_cpu(i) { 510 if (!first) 511 seq_puts(m, ", "); 512 first = false; 513 seq_printf(m, "%u", *per_cpu_ptr(sbq->alloc_hint, i)); 514 } 515 seq_puts(m, "}\n"); 516 517 seq_printf(m, "wake_batch=%u\n", sbq->wake_batch); 518 seq_printf(m, "wake_index=%d\n", atomic_read(&sbq->wake_index)); 519 520 seq_puts(m, "ws={\n"); 521 for (i = 0; i < SBQ_WAIT_QUEUES; i++) { 522 struct sbq_wait_state *ws = &sbq->ws[i]; 523 524 seq_printf(m, "\t{.wait_cnt=%d, .wait=%s},\n", 525 atomic_read(&ws->wait_cnt), 526 waitqueue_active(&ws->wait) ? "active" : "inactive"); 527 } 528 seq_puts(m, "}\n"); 529 530 seq_printf(m, "round_robin=%d\n", sbq->round_robin); 531 } 532 EXPORT_SYMBOL_GPL(sbitmap_queue_show); 533