1 /* 2 * Slab allocator functions that are independent of the allocator strategy 3 * 4 * (C) 2012 Christoph Lameter <cl@linux.com> 5 */ 6 #include <linux/slab.h> 7 8 #include <linux/mm.h> 9 #include <linux/poison.h> 10 #include <linux/interrupt.h> 11 #include <linux/memory.h> 12 #include <linux/compiler.h> 13 #include <linux/module.h> 14 #include <linux/cpu.h> 15 #include <linux/uaccess.h> 16 #include <linux/seq_file.h> 17 #include <linux/proc_fs.h> 18 #include <asm/cacheflush.h> 19 #include <asm/tlbflush.h> 20 #include <asm/page.h> 21 #include <linux/memcontrol.h> 22 #include <trace/events/kmem.h> 23 24 #include "slab.h" 25 26 enum slab_state slab_state; 27 LIST_HEAD(slab_caches); 28 DEFINE_MUTEX(slab_mutex); 29 struct kmem_cache *kmem_cache; 30 31 #ifdef CONFIG_DEBUG_VM 32 static int kmem_cache_sanity_check(struct mem_cgroup *memcg, const char *name, 33 size_t size) 34 { 35 struct kmem_cache *s = NULL; 36 37 if (!name || in_interrupt() || size < sizeof(void *) || 38 size > KMALLOC_MAX_SIZE) { 39 pr_err("kmem_cache_create(%s) integrity check failed\n", name); 40 return -EINVAL; 41 } 42 43 list_for_each_entry(s, &slab_caches, list) { 44 char tmp; 45 int res; 46 47 /* 48 * This happens when the module gets unloaded and doesn't 49 * destroy its slab cache and no-one else reuses the vmalloc 50 * area of the module. Print a warning. 51 */ 52 res = probe_kernel_address(s->name, tmp); 53 if (res) { 54 pr_err("Slab cache with size %d has lost its name\n", 55 s->object_size); 56 continue; 57 } 58 59 #if !defined(CONFIG_SLUB) || !defined(CONFIG_SLUB_DEBUG_ON) 60 /* 61 * For simplicity, we won't check this in the list of memcg 62 * caches. We have control over memcg naming, and if there 63 * aren't duplicates in the global list, there won't be any 64 * duplicates in the memcg lists as well. 65 */ 66 if (!memcg && !strcmp(s->name, name)) { 67 pr_err("%s (%s): Cache name already exists.\n", 68 __func__, name); 69 dump_stack(); 70 s = NULL; 71 return -EINVAL; 72 } 73 #endif 74 } 75 76 WARN_ON(strchr(name, ' ')); /* It confuses parsers */ 77 return 0; 78 } 79 #else 80 static inline int kmem_cache_sanity_check(struct mem_cgroup *memcg, 81 const char *name, size_t size) 82 { 83 return 0; 84 } 85 #endif 86 87 #ifdef CONFIG_MEMCG_KMEM 88 int memcg_update_all_caches(int num_memcgs) 89 { 90 struct kmem_cache *s; 91 int ret = 0; 92 mutex_lock(&slab_mutex); 93 94 list_for_each_entry(s, &slab_caches, list) { 95 if (!is_root_cache(s)) 96 continue; 97 98 ret = memcg_update_cache_size(s, num_memcgs); 99 /* 100 * See comment in memcontrol.c, memcg_update_cache_size: 101 * Instead of freeing the memory, we'll just leave the caches 102 * up to this point in an updated state. 103 */ 104 if (ret) 105 goto out; 106 } 107 108 memcg_update_array_size(num_memcgs); 109 out: 110 mutex_unlock(&slab_mutex); 111 return ret; 112 } 113 #endif 114 115 /* 116 * Figure out what the alignment of the objects will be given a set of 117 * flags, a user specified alignment and the size of the objects. 118 */ 119 unsigned long calculate_alignment(unsigned long flags, 120 unsigned long align, unsigned long size) 121 { 122 /* 123 * If the user wants hardware cache aligned objects then follow that 124 * suggestion if the object is sufficiently large. 125 * 126 * The hardware cache alignment cannot override the specified 127 * alignment though. If that is greater then use it. 128 */ 129 if (flags & SLAB_HWCACHE_ALIGN) { 130 unsigned long ralign = cache_line_size(); 131 while (size <= ralign / 2) 132 ralign /= 2; 133 align = max(align, ralign); 134 } 135 136 if (align < ARCH_SLAB_MINALIGN) 137 align = ARCH_SLAB_MINALIGN; 138 139 return ALIGN(align, sizeof(void *)); 140 } 141 142 143 /* 144 * kmem_cache_create - Create a cache. 145 * @name: A string which is used in /proc/slabinfo to identify this cache. 146 * @size: The size of objects to be created in this cache. 147 * @align: The required alignment for the objects. 148 * @flags: SLAB flags 149 * @ctor: A constructor for the objects. 150 * 151 * Returns a ptr to the cache on success, NULL on failure. 152 * Cannot be called within a interrupt, but can be interrupted. 153 * The @ctor is run when new pages are allocated by the cache. 154 * 155 * The flags are 156 * 157 * %SLAB_POISON - Poison the slab with a known test pattern (a5a5a5a5) 158 * to catch references to uninitialised memory. 159 * 160 * %SLAB_RED_ZONE - Insert `Red' zones around the allocated memory to check 161 * for buffer overruns. 162 * 163 * %SLAB_HWCACHE_ALIGN - Align the objects in this cache to a hardware 164 * cacheline. This can be beneficial if you're counting cycles as closely 165 * as davem. 166 */ 167 168 struct kmem_cache * 169 kmem_cache_create_memcg(struct mem_cgroup *memcg, const char *name, size_t size, 170 size_t align, unsigned long flags, void (*ctor)(void *), 171 struct kmem_cache *parent_cache) 172 { 173 struct kmem_cache *s = NULL; 174 int err = 0; 175 176 get_online_cpus(); 177 mutex_lock(&slab_mutex); 178 179 if (!kmem_cache_sanity_check(memcg, name, size) == 0) 180 goto out_locked; 181 182 /* 183 * Some allocators will constraint the set of valid flags to a subset 184 * of all flags. We expect them to define CACHE_CREATE_MASK in this 185 * case, and we'll just provide them with a sanitized version of the 186 * passed flags. 187 */ 188 flags &= CACHE_CREATE_MASK; 189 190 s = __kmem_cache_alias(memcg, name, size, align, flags, ctor); 191 if (s) 192 goto out_locked; 193 194 s = kmem_cache_zalloc(kmem_cache, GFP_KERNEL); 195 if (s) { 196 s->object_size = s->size = size; 197 s->align = calculate_alignment(flags, align, size); 198 s->ctor = ctor; 199 200 if (memcg_register_cache(memcg, s, parent_cache)) { 201 kmem_cache_free(kmem_cache, s); 202 err = -ENOMEM; 203 goto out_locked; 204 } 205 206 s->name = kstrdup(name, GFP_KERNEL); 207 if (!s->name) { 208 kmem_cache_free(kmem_cache, s); 209 err = -ENOMEM; 210 goto out_locked; 211 } 212 213 err = __kmem_cache_create(s, flags); 214 if (!err) { 215 s->refcount = 1; 216 list_add(&s->list, &slab_caches); 217 memcg_cache_list_add(memcg, s); 218 } else { 219 kfree(s->name); 220 kmem_cache_free(kmem_cache, s); 221 } 222 } else 223 err = -ENOMEM; 224 225 out_locked: 226 mutex_unlock(&slab_mutex); 227 put_online_cpus(); 228 229 if (err) { 230 231 if (flags & SLAB_PANIC) 232 panic("kmem_cache_create: Failed to create slab '%s'. Error %d\n", 233 name, err); 234 else { 235 printk(KERN_WARNING "kmem_cache_create(%s) failed with error %d", 236 name, err); 237 dump_stack(); 238 } 239 240 return NULL; 241 } 242 243 return s; 244 } 245 246 struct kmem_cache * 247 kmem_cache_create(const char *name, size_t size, size_t align, 248 unsigned long flags, void (*ctor)(void *)) 249 { 250 return kmem_cache_create_memcg(NULL, name, size, align, flags, ctor, NULL); 251 } 252 EXPORT_SYMBOL(kmem_cache_create); 253 254 void kmem_cache_destroy(struct kmem_cache *s) 255 { 256 /* Destroy all the children caches if we aren't a memcg cache */ 257 kmem_cache_destroy_memcg_children(s); 258 259 get_online_cpus(); 260 mutex_lock(&slab_mutex); 261 s->refcount--; 262 if (!s->refcount) { 263 list_del(&s->list); 264 265 if (!__kmem_cache_shutdown(s)) { 266 mutex_unlock(&slab_mutex); 267 if (s->flags & SLAB_DESTROY_BY_RCU) 268 rcu_barrier(); 269 270 memcg_release_cache(s); 271 kfree(s->name); 272 kmem_cache_free(kmem_cache, s); 273 } else { 274 list_add(&s->list, &slab_caches); 275 mutex_unlock(&slab_mutex); 276 printk(KERN_ERR "kmem_cache_destroy %s: Slab cache still has objects\n", 277 s->name); 278 dump_stack(); 279 } 280 } else { 281 mutex_unlock(&slab_mutex); 282 } 283 put_online_cpus(); 284 } 285 EXPORT_SYMBOL(kmem_cache_destroy); 286 287 int slab_is_available(void) 288 { 289 return slab_state >= UP; 290 } 291 292 #ifndef CONFIG_SLOB 293 /* Create a cache during boot when no slab services are available yet */ 294 void __init create_boot_cache(struct kmem_cache *s, const char *name, size_t size, 295 unsigned long flags) 296 { 297 int err; 298 299 s->name = name; 300 s->size = s->object_size = size; 301 s->align = calculate_alignment(flags, ARCH_KMALLOC_MINALIGN, size); 302 err = __kmem_cache_create(s, flags); 303 304 if (err) 305 panic("Creation of kmalloc slab %s size=%zu failed. Reason %d\n", 306 name, size, err); 307 308 s->refcount = -1; /* Exempt from merging for now */ 309 } 310 311 struct kmem_cache *__init create_kmalloc_cache(const char *name, size_t size, 312 unsigned long flags) 313 { 314 struct kmem_cache *s = kmem_cache_zalloc(kmem_cache, GFP_NOWAIT); 315 316 if (!s) 317 panic("Out of memory when creating slab %s\n", name); 318 319 create_boot_cache(s, name, size, flags); 320 list_add(&s->list, &slab_caches); 321 s->refcount = 1; 322 return s; 323 } 324 325 struct kmem_cache *kmalloc_caches[KMALLOC_SHIFT_HIGH + 1]; 326 EXPORT_SYMBOL(kmalloc_caches); 327 328 #ifdef CONFIG_ZONE_DMA 329 struct kmem_cache *kmalloc_dma_caches[KMALLOC_SHIFT_HIGH + 1]; 330 EXPORT_SYMBOL(kmalloc_dma_caches); 331 #endif 332 333 /* 334 * Conversion table for small slabs sizes / 8 to the index in the 335 * kmalloc array. This is necessary for slabs < 192 since we have non power 336 * of two cache sizes there. The size of larger slabs can be determined using 337 * fls. 338 */ 339 static s8 size_index[24] = { 340 3, /* 8 */ 341 4, /* 16 */ 342 5, /* 24 */ 343 5, /* 32 */ 344 6, /* 40 */ 345 6, /* 48 */ 346 6, /* 56 */ 347 6, /* 64 */ 348 1, /* 72 */ 349 1, /* 80 */ 350 1, /* 88 */ 351 1, /* 96 */ 352 7, /* 104 */ 353 7, /* 112 */ 354 7, /* 120 */ 355 7, /* 128 */ 356 2, /* 136 */ 357 2, /* 144 */ 358 2, /* 152 */ 359 2, /* 160 */ 360 2, /* 168 */ 361 2, /* 176 */ 362 2, /* 184 */ 363 2 /* 192 */ 364 }; 365 366 static inline int size_index_elem(size_t bytes) 367 { 368 return (bytes - 1) / 8; 369 } 370 371 /* 372 * Find the kmem_cache structure that serves a given size of 373 * allocation 374 */ 375 struct kmem_cache *kmalloc_slab(size_t size, gfp_t flags) 376 { 377 int index; 378 379 if (unlikely(size > KMALLOC_MAX_SIZE)) { 380 WARN_ON_ONCE(!(flags & __GFP_NOWARN)); 381 return NULL; 382 } 383 384 if (size <= 192) { 385 if (!size) 386 return ZERO_SIZE_PTR; 387 388 index = size_index[size_index_elem(size)]; 389 } else 390 index = fls(size - 1); 391 392 #ifdef CONFIG_ZONE_DMA 393 if (unlikely((flags & GFP_DMA))) 394 return kmalloc_dma_caches[index]; 395 396 #endif 397 return kmalloc_caches[index]; 398 } 399 400 /* 401 * Create the kmalloc array. Some of the regular kmalloc arrays 402 * may already have been created because they were needed to 403 * enable allocations for slab creation. 404 */ 405 void __init create_kmalloc_caches(unsigned long flags) 406 { 407 int i; 408 409 /* 410 * Patch up the size_index table if we have strange large alignment 411 * requirements for the kmalloc array. This is only the case for 412 * MIPS it seems. The standard arches will not generate any code here. 413 * 414 * Largest permitted alignment is 256 bytes due to the way we 415 * handle the index determination for the smaller caches. 416 * 417 * Make sure that nothing crazy happens if someone starts tinkering 418 * around with ARCH_KMALLOC_MINALIGN 419 */ 420 BUILD_BUG_ON(KMALLOC_MIN_SIZE > 256 || 421 (KMALLOC_MIN_SIZE & (KMALLOC_MIN_SIZE - 1))); 422 423 for (i = 8; i < KMALLOC_MIN_SIZE; i += 8) { 424 int elem = size_index_elem(i); 425 426 if (elem >= ARRAY_SIZE(size_index)) 427 break; 428 size_index[elem] = KMALLOC_SHIFT_LOW; 429 } 430 431 if (KMALLOC_MIN_SIZE >= 64) { 432 /* 433 * The 96 byte size cache is not used if the alignment 434 * is 64 byte. 435 */ 436 for (i = 64 + 8; i <= 96; i += 8) 437 size_index[size_index_elem(i)] = 7; 438 439 } 440 441 if (KMALLOC_MIN_SIZE >= 128) { 442 /* 443 * The 192 byte sized cache is not used if the alignment 444 * is 128 byte. Redirect kmalloc to use the 256 byte cache 445 * instead. 446 */ 447 for (i = 128 + 8; i <= 192; i += 8) 448 size_index[size_index_elem(i)] = 8; 449 } 450 for (i = KMALLOC_SHIFT_LOW; i <= KMALLOC_SHIFT_HIGH; i++) { 451 if (!kmalloc_caches[i]) { 452 kmalloc_caches[i] = create_kmalloc_cache(NULL, 453 1 << i, flags); 454 } 455 456 /* 457 * Caches that are not of the two-to-the-power-of size. 458 * These have to be created immediately after the 459 * earlier power of two caches 460 */ 461 if (KMALLOC_MIN_SIZE <= 32 && !kmalloc_caches[1] && i == 6) 462 kmalloc_caches[1] = create_kmalloc_cache(NULL, 96, flags); 463 464 if (KMALLOC_MIN_SIZE <= 64 && !kmalloc_caches[2] && i == 7) 465 kmalloc_caches[2] = create_kmalloc_cache(NULL, 192, flags); 466 } 467 468 /* Kmalloc array is now usable */ 469 slab_state = UP; 470 471 for (i = 0; i <= KMALLOC_SHIFT_HIGH; i++) { 472 struct kmem_cache *s = kmalloc_caches[i]; 473 char *n; 474 475 if (s) { 476 n = kasprintf(GFP_NOWAIT, "kmalloc-%d", kmalloc_size(i)); 477 478 BUG_ON(!n); 479 s->name = n; 480 } 481 } 482 483 #ifdef CONFIG_ZONE_DMA 484 for (i = 0; i <= KMALLOC_SHIFT_HIGH; i++) { 485 struct kmem_cache *s = kmalloc_caches[i]; 486 487 if (s) { 488 int size = kmalloc_size(i); 489 char *n = kasprintf(GFP_NOWAIT, 490 "dma-kmalloc-%d", size); 491 492 BUG_ON(!n); 493 kmalloc_dma_caches[i] = create_kmalloc_cache(n, 494 size, SLAB_CACHE_DMA | flags); 495 } 496 } 497 #endif 498 } 499 #endif /* !CONFIG_SLOB */ 500 501 #ifdef CONFIG_TRACING 502 void *kmalloc_order_trace(size_t size, gfp_t flags, unsigned int order) 503 { 504 void *ret = kmalloc_order(size, flags, order); 505 trace_kmalloc(_RET_IP_, ret, size, PAGE_SIZE << order, flags); 506 return ret; 507 } 508 EXPORT_SYMBOL(kmalloc_order_trace); 509 #endif 510 511 #ifdef CONFIG_SLABINFO 512 513 #ifdef CONFIG_SLAB 514 #define SLABINFO_RIGHTS (S_IWUSR | S_IRUSR) 515 #else 516 #define SLABINFO_RIGHTS S_IRUSR 517 #endif 518 519 void print_slabinfo_header(struct seq_file *m) 520 { 521 /* 522 * Output format version, so at least we can change it 523 * without _too_ many complaints. 524 */ 525 #ifdef CONFIG_DEBUG_SLAB 526 seq_puts(m, "slabinfo - version: 2.1 (statistics)\n"); 527 #else 528 seq_puts(m, "slabinfo - version: 2.1\n"); 529 #endif 530 seq_puts(m, "# name <active_objs> <num_objs> <objsize> " 531 "<objperslab> <pagesperslab>"); 532 seq_puts(m, " : tunables <limit> <batchcount> <sharedfactor>"); 533 seq_puts(m, " : slabdata <active_slabs> <num_slabs> <sharedavail>"); 534 #ifdef CONFIG_DEBUG_SLAB 535 seq_puts(m, " : globalstat <listallocs> <maxobjs> <grown> <reaped> " 536 "<error> <maxfreeable> <nodeallocs> <remotefrees> <alienoverflow>"); 537 seq_puts(m, " : cpustat <allochit> <allocmiss> <freehit> <freemiss>"); 538 #endif 539 seq_putc(m, '\n'); 540 } 541 542 static void *s_start(struct seq_file *m, loff_t *pos) 543 { 544 loff_t n = *pos; 545 546 mutex_lock(&slab_mutex); 547 if (!n) 548 print_slabinfo_header(m); 549 550 return seq_list_start(&slab_caches, *pos); 551 } 552 553 void *slab_next(struct seq_file *m, void *p, loff_t *pos) 554 { 555 return seq_list_next(p, &slab_caches, pos); 556 } 557 558 void slab_stop(struct seq_file *m, void *p) 559 { 560 mutex_unlock(&slab_mutex); 561 } 562 563 static void 564 memcg_accumulate_slabinfo(struct kmem_cache *s, struct slabinfo *info) 565 { 566 struct kmem_cache *c; 567 struct slabinfo sinfo; 568 int i; 569 570 if (!is_root_cache(s)) 571 return; 572 573 for_each_memcg_cache_index(i) { 574 c = cache_from_memcg_idx(s, i); 575 if (!c) 576 continue; 577 578 memset(&sinfo, 0, sizeof(sinfo)); 579 get_slabinfo(c, &sinfo); 580 581 info->active_slabs += sinfo.active_slabs; 582 info->num_slabs += sinfo.num_slabs; 583 info->shared_avail += sinfo.shared_avail; 584 info->active_objs += sinfo.active_objs; 585 info->num_objs += sinfo.num_objs; 586 } 587 } 588 589 int cache_show(struct kmem_cache *s, struct seq_file *m) 590 { 591 struct slabinfo sinfo; 592 593 memset(&sinfo, 0, sizeof(sinfo)); 594 get_slabinfo(s, &sinfo); 595 596 memcg_accumulate_slabinfo(s, &sinfo); 597 598 seq_printf(m, "%-17s %6lu %6lu %6u %4u %4d", 599 cache_name(s), sinfo.active_objs, sinfo.num_objs, s->size, 600 sinfo.objects_per_slab, (1 << sinfo.cache_order)); 601 602 seq_printf(m, " : tunables %4u %4u %4u", 603 sinfo.limit, sinfo.batchcount, sinfo.shared); 604 seq_printf(m, " : slabdata %6lu %6lu %6lu", 605 sinfo.active_slabs, sinfo.num_slabs, sinfo.shared_avail); 606 slabinfo_show_stats(m, s); 607 seq_putc(m, '\n'); 608 return 0; 609 } 610 611 static int s_show(struct seq_file *m, void *p) 612 { 613 struct kmem_cache *s = list_entry(p, struct kmem_cache, list); 614 615 if (!is_root_cache(s)) 616 return 0; 617 return cache_show(s, m); 618 } 619 620 /* 621 * slabinfo_op - iterator that generates /proc/slabinfo 622 * 623 * Output layout: 624 * cache-name 625 * num-active-objs 626 * total-objs 627 * object size 628 * num-active-slabs 629 * total-slabs 630 * num-pages-per-slab 631 * + further values on SMP and with statistics enabled 632 */ 633 static const struct seq_operations slabinfo_op = { 634 .start = s_start, 635 .next = slab_next, 636 .stop = slab_stop, 637 .show = s_show, 638 }; 639 640 static int slabinfo_open(struct inode *inode, struct file *file) 641 { 642 return seq_open(file, &slabinfo_op); 643 } 644 645 static const struct file_operations proc_slabinfo_operations = { 646 .open = slabinfo_open, 647 .read = seq_read, 648 .write = slabinfo_write, 649 .llseek = seq_lseek, 650 .release = seq_release, 651 }; 652 653 static int __init slab_proc_init(void) 654 { 655 proc_create("slabinfo", SLABINFO_RIGHTS, NULL, 656 &proc_slabinfo_operations); 657 return 0; 658 } 659 module_init(slab_proc_init); 660 #endif /* CONFIG_SLABINFO */ 661