1 /* 2 * Slab allocator functions that are independent of the allocator strategy 3 * 4 * (C) 2012 Christoph Lameter <cl@linux.com> 5 */ 6 #include <linux/slab.h> 7 8 #include <linux/mm.h> 9 #include <linux/poison.h> 10 #include <linux/interrupt.h> 11 #include <linux/memory.h> 12 #include <linux/compiler.h> 13 #include <linux/module.h> 14 #include <linux/cpu.h> 15 #include <linux/uaccess.h> 16 #include <linux/seq_file.h> 17 #include <linux/proc_fs.h> 18 #include <asm/cacheflush.h> 19 #include <asm/tlbflush.h> 20 #include <asm/page.h> 21 #include <linux/memcontrol.h> 22 #include <trace/events/kmem.h> 23 24 #include "slab.h" 25 26 enum slab_state slab_state; 27 LIST_HEAD(slab_caches); 28 DEFINE_MUTEX(slab_mutex); 29 struct kmem_cache *kmem_cache; 30 31 #ifdef CONFIG_DEBUG_VM 32 static int kmem_cache_sanity_check(struct mem_cgroup *memcg, const char *name, 33 size_t size) 34 { 35 struct kmem_cache *s = NULL; 36 37 if (!name || in_interrupt() || size < sizeof(void *) || 38 size > KMALLOC_MAX_SIZE) { 39 pr_err("kmem_cache_create(%s) integrity check failed\n", name); 40 return -EINVAL; 41 } 42 43 list_for_each_entry(s, &slab_caches, list) { 44 char tmp; 45 int res; 46 47 /* 48 * This happens when the module gets unloaded and doesn't 49 * destroy its slab cache and no-one else reuses the vmalloc 50 * area of the module. Print a warning. 51 */ 52 res = probe_kernel_address(s->name, tmp); 53 if (res) { 54 pr_err("Slab cache with size %d has lost its name\n", 55 s->object_size); 56 continue; 57 } 58 59 /* 60 * For simplicity, we won't check this in the list of memcg 61 * caches. We have control over memcg naming, and if there 62 * aren't duplicates in the global list, there won't be any 63 * duplicates in the memcg lists as well. 64 */ 65 if (!memcg && !strcmp(s->name, name)) { 66 pr_err("%s (%s): Cache name already exists.\n", 67 __func__, name); 68 dump_stack(); 69 s = NULL; 70 return -EINVAL; 71 } 72 } 73 74 WARN_ON(strchr(name, ' ')); /* It confuses parsers */ 75 return 0; 76 } 77 #else 78 static inline int kmem_cache_sanity_check(struct mem_cgroup *memcg, 79 const char *name, size_t size) 80 { 81 return 0; 82 } 83 #endif 84 85 #ifdef CONFIG_MEMCG_KMEM 86 int memcg_update_all_caches(int num_memcgs) 87 { 88 struct kmem_cache *s; 89 int ret = 0; 90 mutex_lock(&slab_mutex); 91 92 list_for_each_entry(s, &slab_caches, list) { 93 if (!is_root_cache(s)) 94 continue; 95 96 ret = memcg_update_cache_size(s, num_memcgs); 97 /* 98 * See comment in memcontrol.c, memcg_update_cache_size: 99 * Instead of freeing the memory, we'll just leave the caches 100 * up to this point in an updated state. 101 */ 102 if (ret) 103 goto out; 104 } 105 106 memcg_update_array_size(num_memcgs); 107 out: 108 mutex_unlock(&slab_mutex); 109 return ret; 110 } 111 #endif 112 113 /* 114 * Figure out what the alignment of the objects will be given a set of 115 * flags, a user specified alignment and the size of the objects. 116 */ 117 unsigned long calculate_alignment(unsigned long flags, 118 unsigned long align, unsigned long size) 119 { 120 /* 121 * If the user wants hardware cache aligned objects then follow that 122 * suggestion if the object is sufficiently large. 123 * 124 * The hardware cache alignment cannot override the specified 125 * alignment though. If that is greater then use it. 126 */ 127 if (flags & SLAB_HWCACHE_ALIGN) { 128 unsigned long ralign = cache_line_size(); 129 while (size <= ralign / 2) 130 ralign /= 2; 131 align = max(align, ralign); 132 } 133 134 if (align < ARCH_SLAB_MINALIGN) 135 align = ARCH_SLAB_MINALIGN; 136 137 return ALIGN(align, sizeof(void *)); 138 } 139 140 141 /* 142 * kmem_cache_create - Create a cache. 143 * @name: A string which is used in /proc/slabinfo to identify this cache. 144 * @size: The size of objects to be created in this cache. 145 * @align: The required alignment for the objects. 146 * @flags: SLAB flags 147 * @ctor: A constructor for the objects. 148 * 149 * Returns a ptr to the cache on success, NULL on failure. 150 * Cannot be called within a interrupt, but can be interrupted. 151 * The @ctor is run when new pages are allocated by the cache. 152 * 153 * The flags are 154 * 155 * %SLAB_POISON - Poison the slab with a known test pattern (a5a5a5a5) 156 * to catch references to uninitialised memory. 157 * 158 * %SLAB_RED_ZONE - Insert `Red' zones around the allocated memory to check 159 * for buffer overruns. 160 * 161 * %SLAB_HWCACHE_ALIGN - Align the objects in this cache to a hardware 162 * cacheline. This can be beneficial if you're counting cycles as closely 163 * as davem. 164 */ 165 166 struct kmem_cache * 167 kmem_cache_create_memcg(struct mem_cgroup *memcg, const char *name, size_t size, 168 size_t align, unsigned long flags, void (*ctor)(void *), 169 struct kmem_cache *parent_cache) 170 { 171 struct kmem_cache *s = NULL; 172 int err = 0; 173 174 get_online_cpus(); 175 mutex_lock(&slab_mutex); 176 177 if (!kmem_cache_sanity_check(memcg, name, size) == 0) 178 goto out_locked; 179 180 /* 181 * Some allocators will constraint the set of valid flags to a subset 182 * of all flags. We expect them to define CACHE_CREATE_MASK in this 183 * case, and we'll just provide them with a sanitized version of the 184 * passed flags. 185 */ 186 flags &= CACHE_CREATE_MASK; 187 188 s = __kmem_cache_alias(memcg, name, size, align, flags, ctor); 189 if (s) 190 goto out_locked; 191 192 s = kmem_cache_zalloc(kmem_cache, GFP_KERNEL); 193 if (s) { 194 s->object_size = s->size = size; 195 s->align = calculate_alignment(flags, align, size); 196 s->ctor = ctor; 197 198 if (memcg_register_cache(memcg, s, parent_cache)) { 199 kmem_cache_free(kmem_cache, s); 200 err = -ENOMEM; 201 goto out_locked; 202 } 203 204 s->name = kstrdup(name, GFP_KERNEL); 205 if (!s->name) { 206 kmem_cache_free(kmem_cache, s); 207 err = -ENOMEM; 208 goto out_locked; 209 } 210 211 err = __kmem_cache_create(s, flags); 212 if (!err) { 213 s->refcount = 1; 214 list_add(&s->list, &slab_caches); 215 memcg_cache_list_add(memcg, s); 216 } else { 217 kfree(s->name); 218 kmem_cache_free(kmem_cache, s); 219 } 220 } else 221 err = -ENOMEM; 222 223 out_locked: 224 mutex_unlock(&slab_mutex); 225 put_online_cpus(); 226 227 if (err) { 228 229 if (flags & SLAB_PANIC) 230 panic("kmem_cache_create: Failed to create slab '%s'. Error %d\n", 231 name, err); 232 else { 233 printk(KERN_WARNING "kmem_cache_create(%s) failed with error %d", 234 name, err); 235 dump_stack(); 236 } 237 238 return NULL; 239 } 240 241 return s; 242 } 243 244 struct kmem_cache * 245 kmem_cache_create(const char *name, size_t size, size_t align, 246 unsigned long flags, void (*ctor)(void *)) 247 { 248 return kmem_cache_create_memcg(NULL, name, size, align, flags, ctor, NULL); 249 } 250 EXPORT_SYMBOL(kmem_cache_create); 251 252 void kmem_cache_destroy(struct kmem_cache *s) 253 { 254 /* Destroy all the children caches if we aren't a memcg cache */ 255 kmem_cache_destroy_memcg_children(s); 256 257 get_online_cpus(); 258 mutex_lock(&slab_mutex); 259 s->refcount--; 260 if (!s->refcount) { 261 list_del(&s->list); 262 263 if (!__kmem_cache_shutdown(s)) { 264 mutex_unlock(&slab_mutex); 265 if (s->flags & SLAB_DESTROY_BY_RCU) 266 rcu_barrier(); 267 268 memcg_release_cache(s); 269 kfree(s->name); 270 kmem_cache_free(kmem_cache, s); 271 } else { 272 list_add(&s->list, &slab_caches); 273 mutex_unlock(&slab_mutex); 274 printk(KERN_ERR "kmem_cache_destroy %s: Slab cache still has objects\n", 275 s->name); 276 dump_stack(); 277 } 278 } else { 279 mutex_unlock(&slab_mutex); 280 } 281 put_online_cpus(); 282 } 283 EXPORT_SYMBOL(kmem_cache_destroy); 284 285 int slab_is_available(void) 286 { 287 return slab_state >= UP; 288 } 289 290 #ifndef CONFIG_SLOB 291 /* Create a cache during boot when no slab services are available yet */ 292 void __init create_boot_cache(struct kmem_cache *s, const char *name, size_t size, 293 unsigned long flags) 294 { 295 int err; 296 297 s->name = name; 298 s->size = s->object_size = size; 299 s->align = calculate_alignment(flags, ARCH_KMALLOC_MINALIGN, size); 300 err = __kmem_cache_create(s, flags); 301 302 if (err) 303 panic("Creation of kmalloc slab %s size=%zu failed. Reason %d\n", 304 name, size, err); 305 306 s->refcount = -1; /* Exempt from merging for now */ 307 } 308 309 struct kmem_cache *__init create_kmalloc_cache(const char *name, size_t size, 310 unsigned long flags) 311 { 312 struct kmem_cache *s = kmem_cache_zalloc(kmem_cache, GFP_NOWAIT); 313 314 if (!s) 315 panic("Out of memory when creating slab %s\n", name); 316 317 create_boot_cache(s, name, size, flags); 318 list_add(&s->list, &slab_caches); 319 s->refcount = 1; 320 return s; 321 } 322 323 struct kmem_cache *kmalloc_caches[KMALLOC_SHIFT_HIGH + 1]; 324 EXPORT_SYMBOL(kmalloc_caches); 325 326 #ifdef CONFIG_ZONE_DMA 327 struct kmem_cache *kmalloc_dma_caches[KMALLOC_SHIFT_HIGH + 1]; 328 EXPORT_SYMBOL(kmalloc_dma_caches); 329 #endif 330 331 /* 332 * Conversion table for small slabs sizes / 8 to the index in the 333 * kmalloc array. This is necessary for slabs < 192 since we have non power 334 * of two cache sizes there. The size of larger slabs can be determined using 335 * fls. 336 */ 337 static s8 size_index[24] = { 338 3, /* 8 */ 339 4, /* 16 */ 340 5, /* 24 */ 341 5, /* 32 */ 342 6, /* 40 */ 343 6, /* 48 */ 344 6, /* 56 */ 345 6, /* 64 */ 346 1, /* 72 */ 347 1, /* 80 */ 348 1, /* 88 */ 349 1, /* 96 */ 350 7, /* 104 */ 351 7, /* 112 */ 352 7, /* 120 */ 353 7, /* 128 */ 354 2, /* 136 */ 355 2, /* 144 */ 356 2, /* 152 */ 357 2, /* 160 */ 358 2, /* 168 */ 359 2, /* 176 */ 360 2, /* 184 */ 361 2 /* 192 */ 362 }; 363 364 static inline int size_index_elem(size_t bytes) 365 { 366 return (bytes - 1) / 8; 367 } 368 369 /* 370 * Find the kmem_cache structure that serves a given size of 371 * allocation 372 */ 373 struct kmem_cache *kmalloc_slab(size_t size, gfp_t flags) 374 { 375 int index; 376 377 if (unlikely(size > KMALLOC_MAX_SIZE)) { 378 WARN_ON_ONCE(!(flags & __GFP_NOWARN)); 379 return NULL; 380 } 381 382 if (size <= 192) { 383 if (!size) 384 return ZERO_SIZE_PTR; 385 386 index = size_index[size_index_elem(size)]; 387 } else 388 index = fls(size - 1); 389 390 #ifdef CONFIG_ZONE_DMA 391 if (unlikely((flags & GFP_DMA))) 392 return kmalloc_dma_caches[index]; 393 394 #endif 395 return kmalloc_caches[index]; 396 } 397 398 /* 399 * Create the kmalloc array. Some of the regular kmalloc arrays 400 * may already have been created because they were needed to 401 * enable allocations for slab creation. 402 */ 403 void __init create_kmalloc_caches(unsigned long flags) 404 { 405 int i; 406 407 /* 408 * Patch up the size_index table if we have strange large alignment 409 * requirements for the kmalloc array. This is only the case for 410 * MIPS it seems. The standard arches will not generate any code here. 411 * 412 * Largest permitted alignment is 256 bytes due to the way we 413 * handle the index determination for the smaller caches. 414 * 415 * Make sure that nothing crazy happens if someone starts tinkering 416 * around with ARCH_KMALLOC_MINALIGN 417 */ 418 BUILD_BUG_ON(KMALLOC_MIN_SIZE > 256 || 419 (KMALLOC_MIN_SIZE & (KMALLOC_MIN_SIZE - 1))); 420 421 for (i = 8; i < KMALLOC_MIN_SIZE; i += 8) { 422 int elem = size_index_elem(i); 423 424 if (elem >= ARRAY_SIZE(size_index)) 425 break; 426 size_index[elem] = KMALLOC_SHIFT_LOW; 427 } 428 429 if (KMALLOC_MIN_SIZE >= 64) { 430 /* 431 * The 96 byte size cache is not used if the alignment 432 * is 64 byte. 433 */ 434 for (i = 64 + 8; i <= 96; i += 8) 435 size_index[size_index_elem(i)] = 7; 436 437 } 438 439 if (KMALLOC_MIN_SIZE >= 128) { 440 /* 441 * The 192 byte sized cache is not used if the alignment 442 * is 128 byte. Redirect kmalloc to use the 256 byte cache 443 * instead. 444 */ 445 for (i = 128 + 8; i <= 192; i += 8) 446 size_index[size_index_elem(i)] = 8; 447 } 448 for (i = KMALLOC_SHIFT_LOW; i <= KMALLOC_SHIFT_HIGH; i++) { 449 if (!kmalloc_caches[i]) { 450 kmalloc_caches[i] = create_kmalloc_cache(NULL, 451 1 << i, flags); 452 } 453 454 /* 455 * Caches that are not of the two-to-the-power-of size. 456 * These have to be created immediately after the 457 * earlier power of two caches 458 */ 459 if (KMALLOC_MIN_SIZE <= 32 && !kmalloc_caches[1] && i == 6) 460 kmalloc_caches[1] = create_kmalloc_cache(NULL, 96, flags); 461 462 if (KMALLOC_MIN_SIZE <= 64 && !kmalloc_caches[2] && i == 7) 463 kmalloc_caches[2] = create_kmalloc_cache(NULL, 192, flags); 464 } 465 466 /* Kmalloc array is now usable */ 467 slab_state = UP; 468 469 for (i = 0; i <= KMALLOC_SHIFT_HIGH; i++) { 470 struct kmem_cache *s = kmalloc_caches[i]; 471 char *n; 472 473 if (s) { 474 n = kasprintf(GFP_NOWAIT, "kmalloc-%d", kmalloc_size(i)); 475 476 BUG_ON(!n); 477 s->name = n; 478 } 479 } 480 481 #ifdef CONFIG_ZONE_DMA 482 for (i = 0; i <= KMALLOC_SHIFT_HIGH; i++) { 483 struct kmem_cache *s = kmalloc_caches[i]; 484 485 if (s) { 486 int size = kmalloc_size(i); 487 char *n = kasprintf(GFP_NOWAIT, 488 "dma-kmalloc-%d", size); 489 490 BUG_ON(!n); 491 kmalloc_dma_caches[i] = create_kmalloc_cache(n, 492 size, SLAB_CACHE_DMA | flags); 493 } 494 } 495 #endif 496 } 497 #endif /* !CONFIG_SLOB */ 498 499 #ifdef CONFIG_TRACING 500 void *kmalloc_order_trace(size_t size, gfp_t flags, unsigned int order) 501 { 502 void *ret = kmalloc_order(size, flags, order); 503 trace_kmalloc(_RET_IP_, ret, size, PAGE_SIZE << order, flags); 504 return ret; 505 } 506 EXPORT_SYMBOL(kmalloc_order_trace); 507 #endif 508 509 #ifdef CONFIG_SLABINFO 510 511 #ifdef CONFIG_SLAB 512 #define SLABINFO_RIGHTS (S_IWUSR | S_IRUSR) 513 #else 514 #define SLABINFO_RIGHTS S_IRUSR 515 #endif 516 517 void print_slabinfo_header(struct seq_file *m) 518 { 519 /* 520 * Output format version, so at least we can change it 521 * without _too_ many complaints. 522 */ 523 #ifdef CONFIG_DEBUG_SLAB 524 seq_puts(m, "slabinfo - version: 2.1 (statistics)\n"); 525 #else 526 seq_puts(m, "slabinfo - version: 2.1\n"); 527 #endif 528 seq_puts(m, "# name <active_objs> <num_objs> <objsize> " 529 "<objperslab> <pagesperslab>"); 530 seq_puts(m, " : tunables <limit> <batchcount> <sharedfactor>"); 531 seq_puts(m, " : slabdata <active_slabs> <num_slabs> <sharedavail>"); 532 #ifdef CONFIG_DEBUG_SLAB 533 seq_puts(m, " : globalstat <listallocs> <maxobjs> <grown> <reaped> " 534 "<error> <maxfreeable> <nodeallocs> <remotefrees> <alienoverflow>"); 535 seq_puts(m, " : cpustat <allochit> <allocmiss> <freehit> <freemiss>"); 536 #endif 537 seq_putc(m, '\n'); 538 } 539 540 static void *s_start(struct seq_file *m, loff_t *pos) 541 { 542 loff_t n = *pos; 543 544 mutex_lock(&slab_mutex); 545 if (!n) 546 print_slabinfo_header(m); 547 548 return seq_list_start(&slab_caches, *pos); 549 } 550 551 void *slab_next(struct seq_file *m, void *p, loff_t *pos) 552 { 553 return seq_list_next(p, &slab_caches, pos); 554 } 555 556 void slab_stop(struct seq_file *m, void *p) 557 { 558 mutex_unlock(&slab_mutex); 559 } 560 561 static void 562 memcg_accumulate_slabinfo(struct kmem_cache *s, struct slabinfo *info) 563 { 564 struct kmem_cache *c; 565 struct slabinfo sinfo; 566 int i; 567 568 if (!is_root_cache(s)) 569 return; 570 571 for_each_memcg_cache_index(i) { 572 c = cache_from_memcg(s, i); 573 if (!c) 574 continue; 575 576 memset(&sinfo, 0, sizeof(sinfo)); 577 get_slabinfo(c, &sinfo); 578 579 info->active_slabs += sinfo.active_slabs; 580 info->num_slabs += sinfo.num_slabs; 581 info->shared_avail += sinfo.shared_avail; 582 info->active_objs += sinfo.active_objs; 583 info->num_objs += sinfo.num_objs; 584 } 585 } 586 587 int cache_show(struct kmem_cache *s, struct seq_file *m) 588 { 589 struct slabinfo sinfo; 590 591 memset(&sinfo, 0, sizeof(sinfo)); 592 get_slabinfo(s, &sinfo); 593 594 memcg_accumulate_slabinfo(s, &sinfo); 595 596 seq_printf(m, "%-17s %6lu %6lu %6u %4u %4d", 597 cache_name(s), sinfo.active_objs, sinfo.num_objs, s->size, 598 sinfo.objects_per_slab, (1 << sinfo.cache_order)); 599 600 seq_printf(m, " : tunables %4u %4u %4u", 601 sinfo.limit, sinfo.batchcount, sinfo.shared); 602 seq_printf(m, " : slabdata %6lu %6lu %6lu", 603 sinfo.active_slabs, sinfo.num_slabs, sinfo.shared_avail); 604 slabinfo_show_stats(m, s); 605 seq_putc(m, '\n'); 606 return 0; 607 } 608 609 static int s_show(struct seq_file *m, void *p) 610 { 611 struct kmem_cache *s = list_entry(p, struct kmem_cache, list); 612 613 if (!is_root_cache(s)) 614 return 0; 615 return cache_show(s, m); 616 } 617 618 /* 619 * slabinfo_op - iterator that generates /proc/slabinfo 620 * 621 * Output layout: 622 * cache-name 623 * num-active-objs 624 * total-objs 625 * object size 626 * num-active-slabs 627 * total-slabs 628 * num-pages-per-slab 629 * + further values on SMP and with statistics enabled 630 */ 631 static const struct seq_operations slabinfo_op = { 632 .start = s_start, 633 .next = slab_next, 634 .stop = slab_stop, 635 .show = s_show, 636 }; 637 638 static int slabinfo_open(struct inode *inode, struct file *file) 639 { 640 return seq_open(file, &slabinfo_op); 641 } 642 643 static const struct file_operations proc_slabinfo_operations = { 644 .open = slabinfo_open, 645 .read = seq_read, 646 .write = slabinfo_write, 647 .llseek = seq_lseek, 648 .release = seq_release, 649 }; 650 651 static int __init slab_proc_init(void) 652 { 653 proc_create("slabinfo", SLABINFO_RIGHTS, NULL, 654 &proc_slabinfo_operations); 655 return 0; 656 } 657 module_init(slab_proc_init); 658 #endif /* CONFIG_SLABINFO */ 659