1 /* 2 * Slab allocator functions that are independent of the allocator strategy 3 * 4 * (C) 2012 Christoph Lameter <cl@linux.com> 5 */ 6 #include <linux/slab.h> 7 8 #include <linux/mm.h> 9 #include <linux/poison.h> 10 #include <linux/interrupt.h> 11 #include <linux/memory.h> 12 #include <linux/compiler.h> 13 #include <linux/module.h> 14 #include <linux/cpu.h> 15 #include <linux/uaccess.h> 16 #include <linux/seq_file.h> 17 #include <linux/proc_fs.h> 18 #include <asm/cacheflush.h> 19 #include <asm/tlbflush.h> 20 #include <asm/page.h> 21 #include <linux/memcontrol.h> 22 23 #define CREATE_TRACE_POINTS 24 #include <trace/events/kmem.h> 25 26 #include "slab.h" 27 28 enum slab_state slab_state; 29 LIST_HEAD(slab_caches); 30 DEFINE_MUTEX(slab_mutex); 31 struct kmem_cache *kmem_cache; 32 33 /* 34 * Set of flags that will prevent slab merging 35 */ 36 #define SLAB_NEVER_MERGE (SLAB_RED_ZONE | SLAB_POISON | SLAB_STORE_USER | \ 37 SLAB_TRACE | SLAB_DESTROY_BY_RCU | SLAB_NOLEAKTRACE | \ 38 SLAB_FAILSLAB) 39 40 #define SLAB_MERGE_SAME (SLAB_DEBUG_FREE | SLAB_RECLAIM_ACCOUNT | \ 41 SLAB_CACHE_DMA | SLAB_NOTRACK) 42 43 /* 44 * Merge control. If this is set then no merging of slab caches will occur. 45 * (Could be removed. This was introduced to pacify the merge skeptics.) 46 */ 47 static int slab_nomerge; 48 49 static int __init setup_slab_nomerge(char *str) 50 { 51 slab_nomerge = 1; 52 return 1; 53 } 54 55 #ifdef CONFIG_SLUB 56 __setup_param("slub_nomerge", slub_nomerge, setup_slab_nomerge, 0); 57 #endif 58 59 __setup("slab_nomerge", setup_slab_nomerge); 60 61 /* 62 * Determine the size of a slab object 63 */ 64 unsigned int kmem_cache_size(struct kmem_cache *s) 65 { 66 return s->object_size; 67 } 68 EXPORT_SYMBOL(kmem_cache_size); 69 70 #ifdef CONFIG_DEBUG_VM 71 static int kmem_cache_sanity_check(const char *name, size_t size) 72 { 73 struct kmem_cache *s = NULL; 74 75 if (!name || in_interrupt() || size < sizeof(void *) || 76 size > KMALLOC_MAX_SIZE) { 77 pr_err("kmem_cache_create(%s) integrity check failed\n", name); 78 return -EINVAL; 79 } 80 81 list_for_each_entry(s, &slab_caches, list) { 82 char tmp; 83 int res; 84 85 /* 86 * This happens when the module gets unloaded and doesn't 87 * destroy its slab cache and no-one else reuses the vmalloc 88 * area of the module. Print a warning. 89 */ 90 res = probe_kernel_address(s->name, tmp); 91 if (res) { 92 pr_err("Slab cache with size %d has lost its name\n", 93 s->object_size); 94 continue; 95 } 96 } 97 98 WARN_ON(strchr(name, ' ')); /* It confuses parsers */ 99 return 0; 100 } 101 #else 102 static inline int kmem_cache_sanity_check(const char *name, size_t size) 103 { 104 return 0; 105 } 106 #endif 107 108 #ifdef CONFIG_MEMCG_KMEM 109 void slab_init_memcg_params(struct kmem_cache *s) 110 { 111 s->memcg_params.is_root_cache = true; 112 INIT_LIST_HEAD(&s->memcg_params.list); 113 RCU_INIT_POINTER(s->memcg_params.memcg_caches, NULL); 114 } 115 116 static int init_memcg_params(struct kmem_cache *s, 117 struct mem_cgroup *memcg, struct kmem_cache *root_cache) 118 { 119 struct memcg_cache_array *arr; 120 121 if (memcg) { 122 s->memcg_params.is_root_cache = false; 123 s->memcg_params.memcg = memcg; 124 s->memcg_params.root_cache = root_cache; 125 return 0; 126 } 127 128 slab_init_memcg_params(s); 129 130 if (!memcg_nr_cache_ids) 131 return 0; 132 133 arr = kzalloc(sizeof(struct memcg_cache_array) + 134 memcg_nr_cache_ids * sizeof(void *), 135 GFP_KERNEL); 136 if (!arr) 137 return -ENOMEM; 138 139 RCU_INIT_POINTER(s->memcg_params.memcg_caches, arr); 140 return 0; 141 } 142 143 static void destroy_memcg_params(struct kmem_cache *s) 144 { 145 if (is_root_cache(s)) 146 kfree(rcu_access_pointer(s->memcg_params.memcg_caches)); 147 } 148 149 static int update_memcg_params(struct kmem_cache *s, int new_array_size) 150 { 151 struct memcg_cache_array *old, *new; 152 153 if (!is_root_cache(s)) 154 return 0; 155 156 new = kzalloc(sizeof(struct memcg_cache_array) + 157 new_array_size * sizeof(void *), GFP_KERNEL); 158 if (!new) 159 return -ENOMEM; 160 161 old = rcu_dereference_protected(s->memcg_params.memcg_caches, 162 lockdep_is_held(&slab_mutex)); 163 if (old) 164 memcpy(new->entries, old->entries, 165 memcg_nr_cache_ids * sizeof(void *)); 166 167 rcu_assign_pointer(s->memcg_params.memcg_caches, new); 168 if (old) 169 kfree_rcu(old, rcu); 170 return 0; 171 } 172 173 int memcg_update_all_caches(int num_memcgs) 174 { 175 struct kmem_cache *s; 176 int ret = 0; 177 178 mutex_lock(&slab_mutex); 179 list_for_each_entry(s, &slab_caches, list) { 180 ret = update_memcg_params(s, num_memcgs); 181 /* 182 * Instead of freeing the memory, we'll just leave the caches 183 * up to this point in an updated state. 184 */ 185 if (ret) 186 break; 187 } 188 mutex_unlock(&slab_mutex); 189 return ret; 190 } 191 #else 192 static inline int init_memcg_params(struct kmem_cache *s, 193 struct mem_cgroup *memcg, struct kmem_cache *root_cache) 194 { 195 return 0; 196 } 197 198 static inline void destroy_memcg_params(struct kmem_cache *s) 199 { 200 } 201 #endif /* CONFIG_MEMCG_KMEM */ 202 203 /* 204 * Find a mergeable slab cache 205 */ 206 int slab_unmergeable(struct kmem_cache *s) 207 { 208 if (slab_nomerge || (s->flags & SLAB_NEVER_MERGE)) 209 return 1; 210 211 if (!is_root_cache(s)) 212 return 1; 213 214 if (s->ctor) 215 return 1; 216 217 /* 218 * We may have set a slab to be unmergeable during bootstrap. 219 */ 220 if (s->refcount < 0) 221 return 1; 222 223 return 0; 224 } 225 226 struct kmem_cache *find_mergeable(size_t size, size_t align, 227 unsigned long flags, const char *name, void (*ctor)(void *)) 228 { 229 struct kmem_cache *s; 230 231 if (slab_nomerge || (flags & SLAB_NEVER_MERGE)) 232 return NULL; 233 234 if (ctor) 235 return NULL; 236 237 size = ALIGN(size, sizeof(void *)); 238 align = calculate_alignment(flags, align, size); 239 size = ALIGN(size, align); 240 flags = kmem_cache_flags(size, flags, name, NULL); 241 242 list_for_each_entry_reverse(s, &slab_caches, list) { 243 if (slab_unmergeable(s)) 244 continue; 245 246 if (size > s->size) 247 continue; 248 249 if ((flags & SLAB_MERGE_SAME) != (s->flags & SLAB_MERGE_SAME)) 250 continue; 251 /* 252 * Check if alignment is compatible. 253 * Courtesy of Adrian Drzewiecki 254 */ 255 if ((s->size & ~(align - 1)) != s->size) 256 continue; 257 258 if (s->size - size >= sizeof(void *)) 259 continue; 260 261 if (IS_ENABLED(CONFIG_SLAB) && align && 262 (align > s->align || s->align % align)) 263 continue; 264 265 return s; 266 } 267 return NULL; 268 } 269 270 /* 271 * Figure out what the alignment of the objects will be given a set of 272 * flags, a user specified alignment and the size of the objects. 273 */ 274 unsigned long calculate_alignment(unsigned long flags, 275 unsigned long align, unsigned long size) 276 { 277 /* 278 * If the user wants hardware cache aligned objects then follow that 279 * suggestion if the object is sufficiently large. 280 * 281 * The hardware cache alignment cannot override the specified 282 * alignment though. If that is greater then use it. 283 */ 284 if (flags & SLAB_HWCACHE_ALIGN) { 285 unsigned long ralign = cache_line_size(); 286 while (size <= ralign / 2) 287 ralign /= 2; 288 align = max(align, ralign); 289 } 290 291 if (align < ARCH_SLAB_MINALIGN) 292 align = ARCH_SLAB_MINALIGN; 293 294 return ALIGN(align, sizeof(void *)); 295 } 296 297 static struct kmem_cache * 298 do_kmem_cache_create(const char *name, size_t object_size, size_t size, 299 size_t align, unsigned long flags, void (*ctor)(void *), 300 struct mem_cgroup *memcg, struct kmem_cache *root_cache) 301 { 302 struct kmem_cache *s; 303 int err; 304 305 err = -ENOMEM; 306 s = kmem_cache_zalloc(kmem_cache, GFP_KERNEL); 307 if (!s) 308 goto out; 309 310 s->name = name; 311 s->object_size = object_size; 312 s->size = size; 313 s->align = align; 314 s->ctor = ctor; 315 316 err = init_memcg_params(s, memcg, root_cache); 317 if (err) 318 goto out_free_cache; 319 320 err = __kmem_cache_create(s, flags); 321 if (err) 322 goto out_free_cache; 323 324 s->refcount = 1; 325 list_add(&s->list, &slab_caches); 326 out: 327 if (err) 328 return ERR_PTR(err); 329 return s; 330 331 out_free_cache: 332 destroy_memcg_params(s); 333 kmem_cache_free(kmem_cache, s); 334 goto out; 335 } 336 337 /* 338 * kmem_cache_create - Create a cache. 339 * @name: A string which is used in /proc/slabinfo to identify this cache. 340 * @size: The size of objects to be created in this cache. 341 * @align: The required alignment for the objects. 342 * @flags: SLAB flags 343 * @ctor: A constructor for the objects. 344 * 345 * Returns a ptr to the cache on success, NULL on failure. 346 * Cannot be called within a interrupt, but can be interrupted. 347 * The @ctor is run when new pages are allocated by the cache. 348 * 349 * The flags are 350 * 351 * %SLAB_POISON - Poison the slab with a known test pattern (a5a5a5a5) 352 * to catch references to uninitialised memory. 353 * 354 * %SLAB_RED_ZONE - Insert `Red' zones around the allocated memory to check 355 * for buffer overruns. 356 * 357 * %SLAB_HWCACHE_ALIGN - Align the objects in this cache to a hardware 358 * cacheline. This can be beneficial if you're counting cycles as closely 359 * as davem. 360 */ 361 struct kmem_cache * 362 kmem_cache_create(const char *name, size_t size, size_t align, 363 unsigned long flags, void (*ctor)(void *)) 364 { 365 struct kmem_cache *s; 366 const char *cache_name; 367 int err; 368 369 get_online_cpus(); 370 get_online_mems(); 371 memcg_get_cache_ids(); 372 373 mutex_lock(&slab_mutex); 374 375 err = kmem_cache_sanity_check(name, size); 376 if (err) { 377 s = NULL; /* suppress uninit var warning */ 378 goto out_unlock; 379 } 380 381 /* 382 * Some allocators will constraint the set of valid flags to a subset 383 * of all flags. We expect them to define CACHE_CREATE_MASK in this 384 * case, and we'll just provide them with a sanitized version of the 385 * passed flags. 386 */ 387 flags &= CACHE_CREATE_MASK; 388 389 s = __kmem_cache_alias(name, size, align, flags, ctor); 390 if (s) 391 goto out_unlock; 392 393 cache_name = kstrdup_const(name, GFP_KERNEL); 394 if (!cache_name) { 395 err = -ENOMEM; 396 goto out_unlock; 397 } 398 399 s = do_kmem_cache_create(cache_name, size, size, 400 calculate_alignment(flags, align, size), 401 flags, ctor, NULL, NULL); 402 if (IS_ERR(s)) { 403 err = PTR_ERR(s); 404 kfree_const(cache_name); 405 } 406 407 out_unlock: 408 mutex_unlock(&slab_mutex); 409 410 memcg_put_cache_ids(); 411 put_online_mems(); 412 put_online_cpus(); 413 414 if (err) { 415 if (flags & SLAB_PANIC) 416 panic("kmem_cache_create: Failed to create slab '%s'. Error %d\n", 417 name, err); 418 else { 419 printk(KERN_WARNING "kmem_cache_create(%s) failed with error %d", 420 name, err); 421 dump_stack(); 422 } 423 return NULL; 424 } 425 return s; 426 } 427 EXPORT_SYMBOL(kmem_cache_create); 428 429 static int do_kmem_cache_shutdown(struct kmem_cache *s, 430 struct list_head *release, bool *need_rcu_barrier) 431 { 432 if (__kmem_cache_shutdown(s) != 0) { 433 printk(KERN_ERR "kmem_cache_destroy %s: " 434 "Slab cache still has objects\n", s->name); 435 dump_stack(); 436 return -EBUSY; 437 } 438 439 if (s->flags & SLAB_DESTROY_BY_RCU) 440 *need_rcu_barrier = true; 441 442 #ifdef CONFIG_MEMCG_KMEM 443 if (!is_root_cache(s)) 444 list_del(&s->memcg_params.list); 445 #endif 446 list_move(&s->list, release); 447 return 0; 448 } 449 450 static void do_kmem_cache_release(struct list_head *release, 451 bool need_rcu_barrier) 452 { 453 struct kmem_cache *s, *s2; 454 455 if (need_rcu_barrier) 456 rcu_barrier(); 457 458 list_for_each_entry_safe(s, s2, release, list) { 459 #ifdef SLAB_SUPPORTS_SYSFS 460 sysfs_slab_remove(s); 461 #else 462 slab_kmem_cache_release(s); 463 #endif 464 } 465 } 466 467 #ifdef CONFIG_MEMCG_KMEM 468 /* 469 * memcg_create_kmem_cache - Create a cache for a memory cgroup. 470 * @memcg: The memory cgroup the new cache is for. 471 * @root_cache: The parent of the new cache. 472 * 473 * This function attempts to create a kmem cache that will serve allocation 474 * requests going from @memcg to @root_cache. The new cache inherits properties 475 * from its parent. 476 */ 477 void memcg_create_kmem_cache(struct mem_cgroup *memcg, 478 struct kmem_cache *root_cache) 479 { 480 static char memcg_name_buf[NAME_MAX + 1]; /* protected by slab_mutex */ 481 struct cgroup_subsys_state *css = mem_cgroup_css(memcg); 482 struct memcg_cache_array *arr; 483 struct kmem_cache *s = NULL; 484 char *cache_name; 485 int idx; 486 487 get_online_cpus(); 488 get_online_mems(); 489 490 mutex_lock(&slab_mutex); 491 492 /* 493 * The memory cgroup could have been deactivated while the cache 494 * creation work was pending. 495 */ 496 if (!memcg_kmem_is_active(memcg)) 497 goto out_unlock; 498 499 idx = memcg_cache_id(memcg); 500 arr = rcu_dereference_protected(root_cache->memcg_params.memcg_caches, 501 lockdep_is_held(&slab_mutex)); 502 503 /* 504 * Since per-memcg caches are created asynchronously on first 505 * allocation (see memcg_kmem_get_cache()), several threads can try to 506 * create the same cache, but only one of them may succeed. 507 */ 508 if (arr->entries[idx]) 509 goto out_unlock; 510 511 cgroup_name(css->cgroup, memcg_name_buf, sizeof(memcg_name_buf)); 512 cache_name = kasprintf(GFP_KERNEL, "%s(%d:%s)", root_cache->name, 513 css->id, memcg_name_buf); 514 if (!cache_name) 515 goto out_unlock; 516 517 s = do_kmem_cache_create(cache_name, root_cache->object_size, 518 root_cache->size, root_cache->align, 519 root_cache->flags, root_cache->ctor, 520 memcg, root_cache); 521 /* 522 * If we could not create a memcg cache, do not complain, because 523 * that's not critical at all as we can always proceed with the root 524 * cache. 525 */ 526 if (IS_ERR(s)) { 527 kfree(cache_name); 528 goto out_unlock; 529 } 530 531 list_add(&s->memcg_params.list, &root_cache->memcg_params.list); 532 533 /* 534 * Since readers won't lock (see cache_from_memcg_idx()), we need a 535 * barrier here to ensure nobody will see the kmem_cache partially 536 * initialized. 537 */ 538 smp_wmb(); 539 arr->entries[idx] = s; 540 541 out_unlock: 542 mutex_unlock(&slab_mutex); 543 544 put_online_mems(); 545 put_online_cpus(); 546 } 547 548 void memcg_deactivate_kmem_caches(struct mem_cgroup *memcg) 549 { 550 int idx; 551 struct memcg_cache_array *arr; 552 struct kmem_cache *s, *c; 553 554 idx = memcg_cache_id(memcg); 555 556 get_online_cpus(); 557 get_online_mems(); 558 559 mutex_lock(&slab_mutex); 560 list_for_each_entry(s, &slab_caches, list) { 561 if (!is_root_cache(s)) 562 continue; 563 564 arr = rcu_dereference_protected(s->memcg_params.memcg_caches, 565 lockdep_is_held(&slab_mutex)); 566 c = arr->entries[idx]; 567 if (!c) 568 continue; 569 570 __kmem_cache_shrink(c, true); 571 arr->entries[idx] = NULL; 572 } 573 mutex_unlock(&slab_mutex); 574 575 put_online_mems(); 576 put_online_cpus(); 577 } 578 579 void memcg_destroy_kmem_caches(struct mem_cgroup *memcg) 580 { 581 LIST_HEAD(release); 582 bool need_rcu_barrier = false; 583 struct kmem_cache *s, *s2; 584 585 get_online_cpus(); 586 get_online_mems(); 587 588 mutex_lock(&slab_mutex); 589 list_for_each_entry_safe(s, s2, &slab_caches, list) { 590 if (is_root_cache(s) || s->memcg_params.memcg != memcg) 591 continue; 592 /* 593 * The cgroup is about to be freed and therefore has no charges 594 * left. Hence, all its caches must be empty by now. 595 */ 596 BUG_ON(do_kmem_cache_shutdown(s, &release, &need_rcu_barrier)); 597 } 598 mutex_unlock(&slab_mutex); 599 600 put_online_mems(); 601 put_online_cpus(); 602 603 do_kmem_cache_release(&release, need_rcu_barrier); 604 } 605 #endif /* CONFIG_MEMCG_KMEM */ 606 607 void slab_kmem_cache_release(struct kmem_cache *s) 608 { 609 destroy_memcg_params(s); 610 kfree_const(s->name); 611 kmem_cache_free(kmem_cache, s); 612 } 613 614 void kmem_cache_destroy(struct kmem_cache *s) 615 { 616 struct kmem_cache *c, *c2; 617 LIST_HEAD(release); 618 bool need_rcu_barrier = false; 619 bool busy = false; 620 621 BUG_ON(!is_root_cache(s)); 622 623 get_online_cpus(); 624 get_online_mems(); 625 626 mutex_lock(&slab_mutex); 627 628 s->refcount--; 629 if (s->refcount) 630 goto out_unlock; 631 632 for_each_memcg_cache_safe(c, c2, s) { 633 if (do_kmem_cache_shutdown(c, &release, &need_rcu_barrier)) 634 busy = true; 635 } 636 637 if (!busy) 638 do_kmem_cache_shutdown(s, &release, &need_rcu_barrier); 639 640 out_unlock: 641 mutex_unlock(&slab_mutex); 642 643 put_online_mems(); 644 put_online_cpus(); 645 646 do_kmem_cache_release(&release, need_rcu_barrier); 647 } 648 EXPORT_SYMBOL(kmem_cache_destroy); 649 650 /** 651 * kmem_cache_shrink - Shrink a cache. 652 * @cachep: The cache to shrink. 653 * 654 * Releases as many slabs as possible for a cache. 655 * To help debugging, a zero exit status indicates all slabs were released. 656 */ 657 int kmem_cache_shrink(struct kmem_cache *cachep) 658 { 659 int ret; 660 661 get_online_cpus(); 662 get_online_mems(); 663 ret = __kmem_cache_shrink(cachep, false); 664 put_online_mems(); 665 put_online_cpus(); 666 return ret; 667 } 668 EXPORT_SYMBOL(kmem_cache_shrink); 669 670 int slab_is_available(void) 671 { 672 return slab_state >= UP; 673 } 674 675 #ifndef CONFIG_SLOB 676 /* Create a cache during boot when no slab services are available yet */ 677 void __init create_boot_cache(struct kmem_cache *s, const char *name, size_t size, 678 unsigned long flags) 679 { 680 int err; 681 682 s->name = name; 683 s->size = s->object_size = size; 684 s->align = calculate_alignment(flags, ARCH_KMALLOC_MINALIGN, size); 685 686 slab_init_memcg_params(s); 687 688 err = __kmem_cache_create(s, flags); 689 690 if (err) 691 panic("Creation of kmalloc slab %s size=%zu failed. Reason %d\n", 692 name, size, err); 693 694 s->refcount = -1; /* Exempt from merging for now */ 695 } 696 697 struct kmem_cache *__init create_kmalloc_cache(const char *name, size_t size, 698 unsigned long flags) 699 { 700 struct kmem_cache *s = kmem_cache_zalloc(kmem_cache, GFP_NOWAIT); 701 702 if (!s) 703 panic("Out of memory when creating slab %s\n", name); 704 705 create_boot_cache(s, name, size, flags); 706 list_add(&s->list, &slab_caches); 707 s->refcount = 1; 708 return s; 709 } 710 711 struct kmem_cache *kmalloc_caches[KMALLOC_SHIFT_HIGH + 1]; 712 EXPORT_SYMBOL(kmalloc_caches); 713 714 #ifdef CONFIG_ZONE_DMA 715 struct kmem_cache *kmalloc_dma_caches[KMALLOC_SHIFT_HIGH + 1]; 716 EXPORT_SYMBOL(kmalloc_dma_caches); 717 #endif 718 719 /* 720 * Conversion table for small slabs sizes / 8 to the index in the 721 * kmalloc array. This is necessary for slabs < 192 since we have non power 722 * of two cache sizes there. The size of larger slabs can be determined using 723 * fls. 724 */ 725 static s8 size_index[24] = { 726 3, /* 8 */ 727 4, /* 16 */ 728 5, /* 24 */ 729 5, /* 32 */ 730 6, /* 40 */ 731 6, /* 48 */ 732 6, /* 56 */ 733 6, /* 64 */ 734 1, /* 72 */ 735 1, /* 80 */ 736 1, /* 88 */ 737 1, /* 96 */ 738 7, /* 104 */ 739 7, /* 112 */ 740 7, /* 120 */ 741 7, /* 128 */ 742 2, /* 136 */ 743 2, /* 144 */ 744 2, /* 152 */ 745 2, /* 160 */ 746 2, /* 168 */ 747 2, /* 176 */ 748 2, /* 184 */ 749 2 /* 192 */ 750 }; 751 752 static inline int size_index_elem(size_t bytes) 753 { 754 return (bytes - 1) / 8; 755 } 756 757 /* 758 * Find the kmem_cache structure that serves a given size of 759 * allocation 760 */ 761 struct kmem_cache *kmalloc_slab(size_t size, gfp_t flags) 762 { 763 int index; 764 765 if (unlikely(size > KMALLOC_MAX_SIZE)) { 766 WARN_ON_ONCE(!(flags & __GFP_NOWARN)); 767 return NULL; 768 } 769 770 if (size <= 192) { 771 if (!size) 772 return ZERO_SIZE_PTR; 773 774 index = size_index[size_index_elem(size)]; 775 } else 776 index = fls(size - 1); 777 778 #ifdef CONFIG_ZONE_DMA 779 if (unlikely((flags & GFP_DMA))) 780 return kmalloc_dma_caches[index]; 781 782 #endif 783 return kmalloc_caches[index]; 784 } 785 786 /* 787 * Create the kmalloc array. Some of the regular kmalloc arrays 788 * may already have been created because they were needed to 789 * enable allocations for slab creation. 790 */ 791 void __init create_kmalloc_caches(unsigned long flags) 792 { 793 int i; 794 795 /* 796 * Patch up the size_index table if we have strange large alignment 797 * requirements for the kmalloc array. This is only the case for 798 * MIPS it seems. The standard arches will not generate any code here. 799 * 800 * Largest permitted alignment is 256 bytes due to the way we 801 * handle the index determination for the smaller caches. 802 * 803 * Make sure that nothing crazy happens if someone starts tinkering 804 * around with ARCH_KMALLOC_MINALIGN 805 */ 806 BUILD_BUG_ON(KMALLOC_MIN_SIZE > 256 || 807 (KMALLOC_MIN_SIZE & (KMALLOC_MIN_SIZE - 1))); 808 809 for (i = 8; i < KMALLOC_MIN_SIZE; i += 8) { 810 int elem = size_index_elem(i); 811 812 if (elem >= ARRAY_SIZE(size_index)) 813 break; 814 size_index[elem] = KMALLOC_SHIFT_LOW; 815 } 816 817 if (KMALLOC_MIN_SIZE >= 64) { 818 /* 819 * The 96 byte size cache is not used if the alignment 820 * is 64 byte. 821 */ 822 for (i = 64 + 8; i <= 96; i += 8) 823 size_index[size_index_elem(i)] = 7; 824 825 } 826 827 if (KMALLOC_MIN_SIZE >= 128) { 828 /* 829 * The 192 byte sized cache is not used if the alignment 830 * is 128 byte. Redirect kmalloc to use the 256 byte cache 831 * instead. 832 */ 833 for (i = 128 + 8; i <= 192; i += 8) 834 size_index[size_index_elem(i)] = 8; 835 } 836 for (i = KMALLOC_SHIFT_LOW; i <= KMALLOC_SHIFT_HIGH; i++) { 837 if (!kmalloc_caches[i]) { 838 kmalloc_caches[i] = create_kmalloc_cache(NULL, 839 1 << i, flags); 840 } 841 842 /* 843 * Caches that are not of the two-to-the-power-of size. 844 * These have to be created immediately after the 845 * earlier power of two caches 846 */ 847 if (KMALLOC_MIN_SIZE <= 32 && !kmalloc_caches[1] && i == 6) 848 kmalloc_caches[1] = create_kmalloc_cache(NULL, 96, flags); 849 850 if (KMALLOC_MIN_SIZE <= 64 && !kmalloc_caches[2] && i == 7) 851 kmalloc_caches[2] = create_kmalloc_cache(NULL, 192, flags); 852 } 853 854 /* Kmalloc array is now usable */ 855 slab_state = UP; 856 857 for (i = 0; i <= KMALLOC_SHIFT_HIGH; i++) { 858 struct kmem_cache *s = kmalloc_caches[i]; 859 char *n; 860 861 if (s) { 862 n = kasprintf(GFP_NOWAIT, "kmalloc-%d", kmalloc_size(i)); 863 864 BUG_ON(!n); 865 s->name = n; 866 } 867 } 868 869 #ifdef CONFIG_ZONE_DMA 870 for (i = 0; i <= KMALLOC_SHIFT_HIGH; i++) { 871 struct kmem_cache *s = kmalloc_caches[i]; 872 873 if (s) { 874 int size = kmalloc_size(i); 875 char *n = kasprintf(GFP_NOWAIT, 876 "dma-kmalloc-%d", size); 877 878 BUG_ON(!n); 879 kmalloc_dma_caches[i] = create_kmalloc_cache(n, 880 size, SLAB_CACHE_DMA | flags); 881 } 882 } 883 #endif 884 } 885 #endif /* !CONFIG_SLOB */ 886 887 /* 888 * To avoid unnecessary overhead, we pass through large allocation requests 889 * directly to the page allocator. We use __GFP_COMP, because we will need to 890 * know the allocation order to free the pages properly in kfree. 891 */ 892 void *kmalloc_order(size_t size, gfp_t flags, unsigned int order) 893 { 894 void *ret; 895 struct page *page; 896 897 flags |= __GFP_COMP; 898 page = alloc_kmem_pages(flags, order); 899 ret = page ? page_address(page) : NULL; 900 kmemleak_alloc(ret, size, 1, flags); 901 kasan_kmalloc_large(ret, size); 902 return ret; 903 } 904 EXPORT_SYMBOL(kmalloc_order); 905 906 #ifdef CONFIG_TRACING 907 void *kmalloc_order_trace(size_t size, gfp_t flags, unsigned int order) 908 { 909 void *ret = kmalloc_order(size, flags, order); 910 trace_kmalloc(_RET_IP_, ret, size, PAGE_SIZE << order, flags); 911 return ret; 912 } 913 EXPORT_SYMBOL(kmalloc_order_trace); 914 #endif 915 916 #ifdef CONFIG_SLABINFO 917 918 #ifdef CONFIG_SLAB 919 #define SLABINFO_RIGHTS (S_IWUSR | S_IRUSR) 920 #else 921 #define SLABINFO_RIGHTS S_IRUSR 922 #endif 923 924 static void print_slabinfo_header(struct seq_file *m) 925 { 926 /* 927 * Output format version, so at least we can change it 928 * without _too_ many complaints. 929 */ 930 #ifdef CONFIG_DEBUG_SLAB 931 seq_puts(m, "slabinfo - version: 2.1 (statistics)\n"); 932 #else 933 seq_puts(m, "slabinfo - version: 2.1\n"); 934 #endif 935 seq_puts(m, "# name <active_objs> <num_objs> <objsize> " 936 "<objperslab> <pagesperslab>"); 937 seq_puts(m, " : tunables <limit> <batchcount> <sharedfactor>"); 938 seq_puts(m, " : slabdata <active_slabs> <num_slabs> <sharedavail>"); 939 #ifdef CONFIG_DEBUG_SLAB 940 seq_puts(m, " : globalstat <listallocs> <maxobjs> <grown> <reaped> " 941 "<error> <maxfreeable> <nodeallocs> <remotefrees> <alienoverflow>"); 942 seq_puts(m, " : cpustat <allochit> <allocmiss> <freehit> <freemiss>"); 943 #endif 944 seq_putc(m, '\n'); 945 } 946 947 void *slab_start(struct seq_file *m, loff_t *pos) 948 { 949 mutex_lock(&slab_mutex); 950 return seq_list_start(&slab_caches, *pos); 951 } 952 953 void *slab_next(struct seq_file *m, void *p, loff_t *pos) 954 { 955 return seq_list_next(p, &slab_caches, pos); 956 } 957 958 void slab_stop(struct seq_file *m, void *p) 959 { 960 mutex_unlock(&slab_mutex); 961 } 962 963 static void 964 memcg_accumulate_slabinfo(struct kmem_cache *s, struct slabinfo *info) 965 { 966 struct kmem_cache *c; 967 struct slabinfo sinfo; 968 969 if (!is_root_cache(s)) 970 return; 971 972 for_each_memcg_cache(c, s) { 973 memset(&sinfo, 0, sizeof(sinfo)); 974 get_slabinfo(c, &sinfo); 975 976 info->active_slabs += sinfo.active_slabs; 977 info->num_slabs += sinfo.num_slabs; 978 info->shared_avail += sinfo.shared_avail; 979 info->active_objs += sinfo.active_objs; 980 info->num_objs += sinfo.num_objs; 981 } 982 } 983 984 static void cache_show(struct kmem_cache *s, struct seq_file *m) 985 { 986 struct slabinfo sinfo; 987 988 memset(&sinfo, 0, sizeof(sinfo)); 989 get_slabinfo(s, &sinfo); 990 991 memcg_accumulate_slabinfo(s, &sinfo); 992 993 seq_printf(m, "%-17s %6lu %6lu %6u %4u %4d", 994 cache_name(s), sinfo.active_objs, sinfo.num_objs, s->size, 995 sinfo.objects_per_slab, (1 << sinfo.cache_order)); 996 997 seq_printf(m, " : tunables %4u %4u %4u", 998 sinfo.limit, sinfo.batchcount, sinfo.shared); 999 seq_printf(m, " : slabdata %6lu %6lu %6lu", 1000 sinfo.active_slabs, sinfo.num_slabs, sinfo.shared_avail); 1001 slabinfo_show_stats(m, s); 1002 seq_putc(m, '\n'); 1003 } 1004 1005 static int slab_show(struct seq_file *m, void *p) 1006 { 1007 struct kmem_cache *s = list_entry(p, struct kmem_cache, list); 1008 1009 if (p == slab_caches.next) 1010 print_slabinfo_header(m); 1011 if (is_root_cache(s)) 1012 cache_show(s, m); 1013 return 0; 1014 } 1015 1016 #ifdef CONFIG_MEMCG_KMEM 1017 int memcg_slab_show(struct seq_file *m, void *p) 1018 { 1019 struct kmem_cache *s = list_entry(p, struct kmem_cache, list); 1020 struct mem_cgroup *memcg = mem_cgroup_from_css(seq_css(m)); 1021 1022 if (p == slab_caches.next) 1023 print_slabinfo_header(m); 1024 if (!is_root_cache(s) && s->memcg_params.memcg == memcg) 1025 cache_show(s, m); 1026 return 0; 1027 } 1028 #endif 1029 1030 /* 1031 * slabinfo_op - iterator that generates /proc/slabinfo 1032 * 1033 * Output layout: 1034 * cache-name 1035 * num-active-objs 1036 * total-objs 1037 * object size 1038 * num-active-slabs 1039 * total-slabs 1040 * num-pages-per-slab 1041 * + further values on SMP and with statistics enabled 1042 */ 1043 static const struct seq_operations slabinfo_op = { 1044 .start = slab_start, 1045 .next = slab_next, 1046 .stop = slab_stop, 1047 .show = slab_show, 1048 }; 1049 1050 static int slabinfo_open(struct inode *inode, struct file *file) 1051 { 1052 return seq_open(file, &slabinfo_op); 1053 } 1054 1055 static const struct file_operations proc_slabinfo_operations = { 1056 .open = slabinfo_open, 1057 .read = seq_read, 1058 .write = slabinfo_write, 1059 .llseek = seq_lseek, 1060 .release = seq_release, 1061 }; 1062 1063 static int __init slab_proc_init(void) 1064 { 1065 proc_create("slabinfo", SLABINFO_RIGHTS, NULL, 1066 &proc_slabinfo_operations); 1067 return 0; 1068 } 1069 module_init(slab_proc_init); 1070 #endif /* CONFIG_SLABINFO */ 1071 1072 static __always_inline void *__do_krealloc(const void *p, size_t new_size, 1073 gfp_t flags) 1074 { 1075 void *ret; 1076 size_t ks = 0; 1077 1078 if (p) 1079 ks = ksize(p); 1080 1081 if (ks >= new_size) { 1082 kasan_krealloc((void *)p, new_size); 1083 return (void *)p; 1084 } 1085 1086 ret = kmalloc_track_caller(new_size, flags); 1087 if (ret && p) 1088 memcpy(ret, p, ks); 1089 1090 return ret; 1091 } 1092 1093 /** 1094 * __krealloc - like krealloc() but don't free @p. 1095 * @p: object to reallocate memory for. 1096 * @new_size: how many bytes of memory are required. 1097 * @flags: the type of memory to allocate. 1098 * 1099 * This function is like krealloc() except it never frees the originally 1100 * allocated buffer. Use this if you don't want to free the buffer immediately 1101 * like, for example, with RCU. 1102 */ 1103 void *__krealloc(const void *p, size_t new_size, gfp_t flags) 1104 { 1105 if (unlikely(!new_size)) 1106 return ZERO_SIZE_PTR; 1107 1108 return __do_krealloc(p, new_size, flags); 1109 1110 } 1111 EXPORT_SYMBOL(__krealloc); 1112 1113 /** 1114 * krealloc - reallocate memory. The contents will remain unchanged. 1115 * @p: object to reallocate memory for. 1116 * @new_size: how many bytes of memory are required. 1117 * @flags: the type of memory to allocate. 1118 * 1119 * The contents of the object pointed to are preserved up to the 1120 * lesser of the new and old sizes. If @p is %NULL, krealloc() 1121 * behaves exactly like kmalloc(). If @new_size is 0 and @p is not a 1122 * %NULL pointer, the object pointed to is freed. 1123 */ 1124 void *krealloc(const void *p, size_t new_size, gfp_t flags) 1125 { 1126 void *ret; 1127 1128 if (unlikely(!new_size)) { 1129 kfree(p); 1130 return ZERO_SIZE_PTR; 1131 } 1132 1133 ret = __do_krealloc(p, new_size, flags); 1134 if (ret && p != ret) 1135 kfree(p); 1136 1137 return ret; 1138 } 1139 EXPORT_SYMBOL(krealloc); 1140 1141 /** 1142 * kzfree - like kfree but zero memory 1143 * @p: object to free memory of 1144 * 1145 * The memory of the object @p points to is zeroed before freed. 1146 * If @p is %NULL, kzfree() does nothing. 1147 * 1148 * Note: this function zeroes the whole allocated buffer which can be a good 1149 * deal bigger than the requested buffer size passed to kmalloc(). So be 1150 * careful when using this function in performance sensitive code. 1151 */ 1152 void kzfree(const void *p) 1153 { 1154 size_t ks; 1155 void *mem = (void *)p; 1156 1157 if (unlikely(ZERO_OR_NULL_PTR(mem))) 1158 return; 1159 ks = ksize(mem); 1160 memset(mem, 0, ks); 1161 kfree(mem); 1162 } 1163 EXPORT_SYMBOL(kzfree); 1164 1165 /* Tracepoints definitions. */ 1166 EXPORT_TRACEPOINT_SYMBOL(kmalloc); 1167 EXPORT_TRACEPOINT_SYMBOL(kmem_cache_alloc); 1168 EXPORT_TRACEPOINT_SYMBOL(kmalloc_node); 1169 EXPORT_TRACEPOINT_SYMBOL(kmem_cache_alloc_node); 1170 EXPORT_TRACEPOINT_SYMBOL(kfree); 1171 EXPORT_TRACEPOINT_SYMBOL(kmem_cache_free); 1172