1 /* 2 * Slab allocator functions that are independent of the allocator strategy 3 * 4 * (C) 2012 Christoph Lameter <cl@linux.com> 5 */ 6 #include <linux/slab.h> 7 8 #include <linux/mm.h> 9 #include <linux/poison.h> 10 #include <linux/interrupt.h> 11 #include <linux/memory.h> 12 #include <linux/compiler.h> 13 #include <linux/module.h> 14 #include <linux/cpu.h> 15 #include <linux/uaccess.h> 16 #include <linux/seq_file.h> 17 #include <linux/proc_fs.h> 18 #include <asm/cacheflush.h> 19 #include <asm/tlbflush.h> 20 #include <asm/page.h> 21 #include <linux/memcontrol.h> 22 #include <trace/events/kmem.h> 23 24 #include "slab.h" 25 26 enum slab_state slab_state; 27 LIST_HEAD(slab_caches); 28 DEFINE_MUTEX(slab_mutex); 29 struct kmem_cache *kmem_cache; 30 31 #ifdef CONFIG_DEBUG_VM 32 static int kmem_cache_sanity_check(const char *name, size_t size) 33 { 34 struct kmem_cache *s = NULL; 35 36 if (!name || in_interrupt() || size < sizeof(void *) || 37 size > KMALLOC_MAX_SIZE) { 38 pr_err("kmem_cache_create(%s) integrity check failed\n", name); 39 return -EINVAL; 40 } 41 42 list_for_each_entry(s, &slab_caches, list) { 43 char tmp; 44 int res; 45 46 /* 47 * This happens when the module gets unloaded and doesn't 48 * destroy its slab cache and no-one else reuses the vmalloc 49 * area of the module. Print a warning. 50 */ 51 res = probe_kernel_address(s->name, tmp); 52 if (res) { 53 pr_err("Slab cache with size %d has lost its name\n", 54 s->object_size); 55 continue; 56 } 57 58 #if !defined(CONFIG_SLUB) || !defined(CONFIG_SLUB_DEBUG_ON) 59 if (!strcmp(s->name, name)) { 60 pr_err("%s (%s): Cache name already exists.\n", 61 __func__, name); 62 dump_stack(); 63 s = NULL; 64 return -EINVAL; 65 } 66 #endif 67 } 68 69 WARN_ON(strchr(name, ' ')); /* It confuses parsers */ 70 return 0; 71 } 72 #else 73 static inline int kmem_cache_sanity_check(const char *name, size_t size) 74 { 75 return 0; 76 } 77 #endif 78 79 #ifdef CONFIG_MEMCG_KMEM 80 int memcg_update_all_caches(int num_memcgs) 81 { 82 struct kmem_cache *s; 83 int ret = 0; 84 mutex_lock(&slab_mutex); 85 86 list_for_each_entry(s, &slab_caches, list) { 87 if (!is_root_cache(s)) 88 continue; 89 90 ret = memcg_update_cache_size(s, num_memcgs); 91 /* 92 * See comment in memcontrol.c, memcg_update_cache_size: 93 * Instead of freeing the memory, we'll just leave the caches 94 * up to this point in an updated state. 95 */ 96 if (ret) 97 goto out; 98 } 99 100 memcg_update_array_size(num_memcgs); 101 out: 102 mutex_unlock(&slab_mutex); 103 return ret; 104 } 105 #endif 106 107 /* 108 * Figure out what the alignment of the objects will be given a set of 109 * flags, a user specified alignment and the size of the objects. 110 */ 111 unsigned long calculate_alignment(unsigned long flags, 112 unsigned long align, unsigned long size) 113 { 114 /* 115 * If the user wants hardware cache aligned objects then follow that 116 * suggestion if the object is sufficiently large. 117 * 118 * The hardware cache alignment cannot override the specified 119 * alignment though. If that is greater then use it. 120 */ 121 if (flags & SLAB_HWCACHE_ALIGN) { 122 unsigned long ralign = cache_line_size(); 123 while (size <= ralign / 2) 124 ralign /= 2; 125 align = max(align, ralign); 126 } 127 128 if (align < ARCH_SLAB_MINALIGN) 129 align = ARCH_SLAB_MINALIGN; 130 131 return ALIGN(align, sizeof(void *)); 132 } 133 134 static struct kmem_cache * 135 do_kmem_cache_create(char *name, size_t object_size, size_t size, size_t align, 136 unsigned long flags, void (*ctor)(void *), 137 struct mem_cgroup *memcg, struct kmem_cache *root_cache) 138 { 139 struct kmem_cache *s; 140 int err; 141 142 err = -ENOMEM; 143 s = kmem_cache_zalloc(kmem_cache, GFP_KERNEL); 144 if (!s) 145 goto out; 146 147 s->name = name; 148 s->object_size = object_size; 149 s->size = size; 150 s->align = align; 151 s->ctor = ctor; 152 153 err = memcg_alloc_cache_params(memcg, s, root_cache); 154 if (err) 155 goto out_free_cache; 156 157 err = __kmem_cache_create(s, flags); 158 if (err) 159 goto out_free_cache; 160 161 s->refcount = 1; 162 list_add(&s->list, &slab_caches); 163 memcg_register_cache(s); 164 out: 165 if (err) 166 return ERR_PTR(err); 167 return s; 168 169 out_free_cache: 170 memcg_free_cache_params(s); 171 kfree(s); 172 goto out; 173 } 174 175 /* 176 * kmem_cache_create - Create a cache. 177 * @name: A string which is used in /proc/slabinfo to identify this cache. 178 * @size: The size of objects to be created in this cache. 179 * @align: The required alignment for the objects. 180 * @flags: SLAB flags 181 * @ctor: A constructor for the objects. 182 * 183 * Returns a ptr to the cache on success, NULL on failure. 184 * Cannot be called within a interrupt, but can be interrupted. 185 * The @ctor is run when new pages are allocated by the cache. 186 * 187 * The flags are 188 * 189 * %SLAB_POISON - Poison the slab with a known test pattern (a5a5a5a5) 190 * to catch references to uninitialised memory. 191 * 192 * %SLAB_RED_ZONE - Insert `Red' zones around the allocated memory to check 193 * for buffer overruns. 194 * 195 * %SLAB_HWCACHE_ALIGN - Align the objects in this cache to a hardware 196 * cacheline. This can be beneficial if you're counting cycles as closely 197 * as davem. 198 */ 199 struct kmem_cache * 200 kmem_cache_create(const char *name, size_t size, size_t align, 201 unsigned long flags, void (*ctor)(void *)) 202 { 203 struct kmem_cache *s; 204 char *cache_name; 205 int err; 206 207 get_online_cpus(); 208 mutex_lock(&slab_mutex); 209 210 err = kmem_cache_sanity_check(name, size); 211 if (err) 212 goto out_unlock; 213 214 /* 215 * Some allocators will constraint the set of valid flags to a subset 216 * of all flags. We expect them to define CACHE_CREATE_MASK in this 217 * case, and we'll just provide them with a sanitized version of the 218 * passed flags. 219 */ 220 flags &= CACHE_CREATE_MASK; 221 222 s = __kmem_cache_alias(name, size, align, flags, ctor); 223 if (s) 224 goto out_unlock; 225 226 cache_name = kstrdup(name, GFP_KERNEL); 227 if (!cache_name) { 228 err = -ENOMEM; 229 goto out_unlock; 230 } 231 232 s = do_kmem_cache_create(cache_name, size, size, 233 calculate_alignment(flags, align, size), 234 flags, ctor, NULL, NULL); 235 if (IS_ERR(s)) { 236 err = PTR_ERR(s); 237 kfree(cache_name); 238 } 239 240 out_unlock: 241 mutex_unlock(&slab_mutex); 242 put_online_cpus(); 243 244 if (err) { 245 if (flags & SLAB_PANIC) 246 panic("kmem_cache_create: Failed to create slab '%s'. Error %d\n", 247 name, err); 248 else { 249 printk(KERN_WARNING "kmem_cache_create(%s) failed with error %d", 250 name, err); 251 dump_stack(); 252 } 253 return NULL; 254 } 255 return s; 256 } 257 EXPORT_SYMBOL(kmem_cache_create); 258 259 #ifdef CONFIG_MEMCG_KMEM 260 /* 261 * kmem_cache_create_memcg - Create a cache for a memory cgroup. 262 * @memcg: The memory cgroup the new cache is for. 263 * @root_cache: The parent of the new cache. 264 * 265 * This function attempts to create a kmem cache that will serve allocation 266 * requests going from @memcg to @root_cache. The new cache inherits properties 267 * from its parent. 268 */ 269 void kmem_cache_create_memcg(struct mem_cgroup *memcg, struct kmem_cache *root_cache) 270 { 271 struct kmem_cache *s; 272 char *cache_name; 273 274 get_online_cpus(); 275 mutex_lock(&slab_mutex); 276 277 /* 278 * Since per-memcg caches are created asynchronously on first 279 * allocation (see memcg_kmem_get_cache()), several threads can try to 280 * create the same cache, but only one of them may succeed. 281 */ 282 if (cache_from_memcg_idx(root_cache, memcg_cache_id(memcg))) 283 goto out_unlock; 284 285 cache_name = memcg_create_cache_name(memcg, root_cache); 286 if (!cache_name) 287 goto out_unlock; 288 289 s = do_kmem_cache_create(cache_name, root_cache->object_size, 290 root_cache->size, root_cache->align, 291 root_cache->flags, root_cache->ctor, 292 memcg, root_cache); 293 if (IS_ERR(s)) { 294 kfree(cache_name); 295 goto out_unlock; 296 } 297 298 s->allocflags |= __GFP_KMEMCG; 299 300 out_unlock: 301 mutex_unlock(&slab_mutex); 302 put_online_cpus(); 303 } 304 305 static int kmem_cache_destroy_memcg_children(struct kmem_cache *s) 306 { 307 int rc; 308 309 if (!s->memcg_params || 310 !s->memcg_params->is_root_cache) 311 return 0; 312 313 mutex_unlock(&slab_mutex); 314 rc = __kmem_cache_destroy_memcg_children(s); 315 mutex_lock(&slab_mutex); 316 317 return rc; 318 } 319 #else 320 static int kmem_cache_destroy_memcg_children(struct kmem_cache *s) 321 { 322 return 0; 323 } 324 #endif /* CONFIG_MEMCG_KMEM */ 325 326 void kmem_cache_destroy(struct kmem_cache *s) 327 { 328 get_online_cpus(); 329 mutex_lock(&slab_mutex); 330 331 s->refcount--; 332 if (s->refcount) 333 goto out_unlock; 334 335 if (kmem_cache_destroy_memcg_children(s) != 0) 336 goto out_unlock; 337 338 list_del(&s->list); 339 memcg_unregister_cache(s); 340 341 if (__kmem_cache_shutdown(s) != 0) { 342 list_add(&s->list, &slab_caches); 343 memcg_register_cache(s); 344 printk(KERN_ERR "kmem_cache_destroy %s: " 345 "Slab cache still has objects\n", s->name); 346 dump_stack(); 347 goto out_unlock; 348 } 349 350 mutex_unlock(&slab_mutex); 351 if (s->flags & SLAB_DESTROY_BY_RCU) 352 rcu_barrier(); 353 354 memcg_free_cache_params(s); 355 kfree(s->name); 356 kmem_cache_free(kmem_cache, s); 357 goto out_put_cpus; 358 359 out_unlock: 360 mutex_unlock(&slab_mutex); 361 out_put_cpus: 362 put_online_cpus(); 363 } 364 EXPORT_SYMBOL(kmem_cache_destroy); 365 366 int slab_is_available(void) 367 { 368 return slab_state >= UP; 369 } 370 371 #ifndef CONFIG_SLOB 372 /* Create a cache during boot when no slab services are available yet */ 373 void __init create_boot_cache(struct kmem_cache *s, const char *name, size_t size, 374 unsigned long flags) 375 { 376 int err; 377 378 s->name = name; 379 s->size = s->object_size = size; 380 s->align = calculate_alignment(flags, ARCH_KMALLOC_MINALIGN, size); 381 err = __kmem_cache_create(s, flags); 382 383 if (err) 384 panic("Creation of kmalloc slab %s size=%zu failed. Reason %d\n", 385 name, size, err); 386 387 s->refcount = -1; /* Exempt from merging for now */ 388 } 389 390 struct kmem_cache *__init create_kmalloc_cache(const char *name, size_t size, 391 unsigned long flags) 392 { 393 struct kmem_cache *s = kmem_cache_zalloc(kmem_cache, GFP_NOWAIT); 394 395 if (!s) 396 panic("Out of memory when creating slab %s\n", name); 397 398 create_boot_cache(s, name, size, flags); 399 list_add(&s->list, &slab_caches); 400 s->refcount = 1; 401 return s; 402 } 403 404 struct kmem_cache *kmalloc_caches[KMALLOC_SHIFT_HIGH + 1]; 405 EXPORT_SYMBOL(kmalloc_caches); 406 407 #ifdef CONFIG_ZONE_DMA 408 struct kmem_cache *kmalloc_dma_caches[KMALLOC_SHIFT_HIGH + 1]; 409 EXPORT_SYMBOL(kmalloc_dma_caches); 410 #endif 411 412 /* 413 * Conversion table for small slabs sizes / 8 to the index in the 414 * kmalloc array. This is necessary for slabs < 192 since we have non power 415 * of two cache sizes there. The size of larger slabs can be determined using 416 * fls. 417 */ 418 static s8 size_index[24] = { 419 3, /* 8 */ 420 4, /* 16 */ 421 5, /* 24 */ 422 5, /* 32 */ 423 6, /* 40 */ 424 6, /* 48 */ 425 6, /* 56 */ 426 6, /* 64 */ 427 1, /* 72 */ 428 1, /* 80 */ 429 1, /* 88 */ 430 1, /* 96 */ 431 7, /* 104 */ 432 7, /* 112 */ 433 7, /* 120 */ 434 7, /* 128 */ 435 2, /* 136 */ 436 2, /* 144 */ 437 2, /* 152 */ 438 2, /* 160 */ 439 2, /* 168 */ 440 2, /* 176 */ 441 2, /* 184 */ 442 2 /* 192 */ 443 }; 444 445 static inline int size_index_elem(size_t bytes) 446 { 447 return (bytes - 1) / 8; 448 } 449 450 /* 451 * Find the kmem_cache structure that serves a given size of 452 * allocation 453 */ 454 struct kmem_cache *kmalloc_slab(size_t size, gfp_t flags) 455 { 456 int index; 457 458 if (unlikely(size > KMALLOC_MAX_SIZE)) { 459 WARN_ON_ONCE(!(flags & __GFP_NOWARN)); 460 return NULL; 461 } 462 463 if (size <= 192) { 464 if (!size) 465 return ZERO_SIZE_PTR; 466 467 index = size_index[size_index_elem(size)]; 468 } else 469 index = fls(size - 1); 470 471 #ifdef CONFIG_ZONE_DMA 472 if (unlikely((flags & GFP_DMA))) 473 return kmalloc_dma_caches[index]; 474 475 #endif 476 return kmalloc_caches[index]; 477 } 478 479 /* 480 * Create the kmalloc array. Some of the regular kmalloc arrays 481 * may already have been created because they were needed to 482 * enable allocations for slab creation. 483 */ 484 void __init create_kmalloc_caches(unsigned long flags) 485 { 486 int i; 487 488 /* 489 * Patch up the size_index table if we have strange large alignment 490 * requirements for the kmalloc array. This is only the case for 491 * MIPS it seems. The standard arches will not generate any code here. 492 * 493 * Largest permitted alignment is 256 bytes due to the way we 494 * handle the index determination for the smaller caches. 495 * 496 * Make sure that nothing crazy happens if someone starts tinkering 497 * around with ARCH_KMALLOC_MINALIGN 498 */ 499 BUILD_BUG_ON(KMALLOC_MIN_SIZE > 256 || 500 (KMALLOC_MIN_SIZE & (KMALLOC_MIN_SIZE - 1))); 501 502 for (i = 8; i < KMALLOC_MIN_SIZE; i += 8) { 503 int elem = size_index_elem(i); 504 505 if (elem >= ARRAY_SIZE(size_index)) 506 break; 507 size_index[elem] = KMALLOC_SHIFT_LOW; 508 } 509 510 if (KMALLOC_MIN_SIZE >= 64) { 511 /* 512 * The 96 byte size cache is not used if the alignment 513 * is 64 byte. 514 */ 515 for (i = 64 + 8; i <= 96; i += 8) 516 size_index[size_index_elem(i)] = 7; 517 518 } 519 520 if (KMALLOC_MIN_SIZE >= 128) { 521 /* 522 * The 192 byte sized cache is not used if the alignment 523 * is 128 byte. Redirect kmalloc to use the 256 byte cache 524 * instead. 525 */ 526 for (i = 128 + 8; i <= 192; i += 8) 527 size_index[size_index_elem(i)] = 8; 528 } 529 for (i = KMALLOC_SHIFT_LOW; i <= KMALLOC_SHIFT_HIGH; i++) { 530 if (!kmalloc_caches[i]) { 531 kmalloc_caches[i] = create_kmalloc_cache(NULL, 532 1 << i, flags); 533 } 534 535 /* 536 * Caches that are not of the two-to-the-power-of size. 537 * These have to be created immediately after the 538 * earlier power of two caches 539 */ 540 if (KMALLOC_MIN_SIZE <= 32 && !kmalloc_caches[1] && i == 6) 541 kmalloc_caches[1] = create_kmalloc_cache(NULL, 96, flags); 542 543 if (KMALLOC_MIN_SIZE <= 64 && !kmalloc_caches[2] && i == 7) 544 kmalloc_caches[2] = create_kmalloc_cache(NULL, 192, flags); 545 } 546 547 /* Kmalloc array is now usable */ 548 slab_state = UP; 549 550 for (i = 0; i <= KMALLOC_SHIFT_HIGH; i++) { 551 struct kmem_cache *s = kmalloc_caches[i]; 552 char *n; 553 554 if (s) { 555 n = kasprintf(GFP_NOWAIT, "kmalloc-%d", kmalloc_size(i)); 556 557 BUG_ON(!n); 558 s->name = n; 559 } 560 } 561 562 #ifdef CONFIG_ZONE_DMA 563 for (i = 0; i <= KMALLOC_SHIFT_HIGH; i++) { 564 struct kmem_cache *s = kmalloc_caches[i]; 565 566 if (s) { 567 int size = kmalloc_size(i); 568 char *n = kasprintf(GFP_NOWAIT, 569 "dma-kmalloc-%d", size); 570 571 BUG_ON(!n); 572 kmalloc_dma_caches[i] = create_kmalloc_cache(n, 573 size, SLAB_CACHE_DMA | flags); 574 } 575 } 576 #endif 577 } 578 #endif /* !CONFIG_SLOB */ 579 580 #ifdef CONFIG_TRACING 581 void *kmalloc_order_trace(size_t size, gfp_t flags, unsigned int order) 582 { 583 void *ret = kmalloc_order(size, flags, order); 584 trace_kmalloc(_RET_IP_, ret, size, PAGE_SIZE << order, flags); 585 return ret; 586 } 587 EXPORT_SYMBOL(kmalloc_order_trace); 588 #endif 589 590 #ifdef CONFIG_SLABINFO 591 592 #ifdef CONFIG_SLAB 593 #define SLABINFO_RIGHTS (S_IWUSR | S_IRUSR) 594 #else 595 #define SLABINFO_RIGHTS S_IRUSR 596 #endif 597 598 void print_slabinfo_header(struct seq_file *m) 599 { 600 /* 601 * Output format version, so at least we can change it 602 * without _too_ many complaints. 603 */ 604 #ifdef CONFIG_DEBUG_SLAB 605 seq_puts(m, "slabinfo - version: 2.1 (statistics)\n"); 606 #else 607 seq_puts(m, "slabinfo - version: 2.1\n"); 608 #endif 609 seq_puts(m, "# name <active_objs> <num_objs> <objsize> " 610 "<objperslab> <pagesperslab>"); 611 seq_puts(m, " : tunables <limit> <batchcount> <sharedfactor>"); 612 seq_puts(m, " : slabdata <active_slabs> <num_slabs> <sharedavail>"); 613 #ifdef CONFIG_DEBUG_SLAB 614 seq_puts(m, " : globalstat <listallocs> <maxobjs> <grown> <reaped> " 615 "<error> <maxfreeable> <nodeallocs> <remotefrees> <alienoverflow>"); 616 seq_puts(m, " : cpustat <allochit> <allocmiss> <freehit> <freemiss>"); 617 #endif 618 seq_putc(m, '\n'); 619 } 620 621 static void *s_start(struct seq_file *m, loff_t *pos) 622 { 623 loff_t n = *pos; 624 625 mutex_lock(&slab_mutex); 626 if (!n) 627 print_slabinfo_header(m); 628 629 return seq_list_start(&slab_caches, *pos); 630 } 631 632 void *slab_next(struct seq_file *m, void *p, loff_t *pos) 633 { 634 return seq_list_next(p, &slab_caches, pos); 635 } 636 637 void slab_stop(struct seq_file *m, void *p) 638 { 639 mutex_unlock(&slab_mutex); 640 } 641 642 static void 643 memcg_accumulate_slabinfo(struct kmem_cache *s, struct slabinfo *info) 644 { 645 struct kmem_cache *c; 646 struct slabinfo sinfo; 647 int i; 648 649 if (!is_root_cache(s)) 650 return; 651 652 for_each_memcg_cache_index(i) { 653 c = cache_from_memcg_idx(s, i); 654 if (!c) 655 continue; 656 657 memset(&sinfo, 0, sizeof(sinfo)); 658 get_slabinfo(c, &sinfo); 659 660 info->active_slabs += sinfo.active_slabs; 661 info->num_slabs += sinfo.num_slabs; 662 info->shared_avail += sinfo.shared_avail; 663 info->active_objs += sinfo.active_objs; 664 info->num_objs += sinfo.num_objs; 665 } 666 } 667 668 int cache_show(struct kmem_cache *s, struct seq_file *m) 669 { 670 struct slabinfo sinfo; 671 672 memset(&sinfo, 0, sizeof(sinfo)); 673 get_slabinfo(s, &sinfo); 674 675 memcg_accumulate_slabinfo(s, &sinfo); 676 677 seq_printf(m, "%-17s %6lu %6lu %6u %4u %4d", 678 cache_name(s), sinfo.active_objs, sinfo.num_objs, s->size, 679 sinfo.objects_per_slab, (1 << sinfo.cache_order)); 680 681 seq_printf(m, " : tunables %4u %4u %4u", 682 sinfo.limit, sinfo.batchcount, sinfo.shared); 683 seq_printf(m, " : slabdata %6lu %6lu %6lu", 684 sinfo.active_slabs, sinfo.num_slabs, sinfo.shared_avail); 685 slabinfo_show_stats(m, s); 686 seq_putc(m, '\n'); 687 return 0; 688 } 689 690 static int s_show(struct seq_file *m, void *p) 691 { 692 struct kmem_cache *s = list_entry(p, struct kmem_cache, list); 693 694 if (!is_root_cache(s)) 695 return 0; 696 return cache_show(s, m); 697 } 698 699 /* 700 * slabinfo_op - iterator that generates /proc/slabinfo 701 * 702 * Output layout: 703 * cache-name 704 * num-active-objs 705 * total-objs 706 * object size 707 * num-active-slabs 708 * total-slabs 709 * num-pages-per-slab 710 * + further values on SMP and with statistics enabled 711 */ 712 static const struct seq_operations slabinfo_op = { 713 .start = s_start, 714 .next = slab_next, 715 .stop = slab_stop, 716 .show = s_show, 717 }; 718 719 static int slabinfo_open(struct inode *inode, struct file *file) 720 { 721 return seq_open(file, &slabinfo_op); 722 } 723 724 static const struct file_operations proc_slabinfo_operations = { 725 .open = slabinfo_open, 726 .read = seq_read, 727 .write = slabinfo_write, 728 .llseek = seq_lseek, 729 .release = seq_release, 730 }; 731 732 static int __init slab_proc_init(void) 733 { 734 proc_create("slabinfo", SLABINFO_RIGHTS, NULL, 735 &proc_slabinfo_operations); 736 return 0; 737 } 738 module_init(slab_proc_init); 739 #endif /* CONFIG_SLABINFO */ 740