1 /* 2 * Slab allocator functions that are independent of the allocator strategy 3 * 4 * (C) 2012 Christoph Lameter <cl@linux.com> 5 */ 6 #include <linux/slab.h> 7 8 #include <linux/mm.h> 9 #include <linux/poison.h> 10 #include <linux/interrupt.h> 11 #include <linux/memory.h> 12 #include <linux/compiler.h> 13 #include <linux/module.h> 14 #include <linux/cpu.h> 15 #include <linux/uaccess.h> 16 #include <linux/seq_file.h> 17 #include <linux/proc_fs.h> 18 #include <asm/cacheflush.h> 19 #include <asm/tlbflush.h> 20 #include <asm/page.h> 21 #include <linux/memcontrol.h> 22 23 #define CREATE_TRACE_POINTS 24 #include <trace/events/kmem.h> 25 26 #include "slab.h" 27 28 enum slab_state slab_state; 29 LIST_HEAD(slab_caches); 30 DEFINE_MUTEX(slab_mutex); 31 struct kmem_cache *kmem_cache; 32 33 /* 34 * Set of flags that will prevent slab merging 35 */ 36 #define SLAB_NEVER_MERGE (SLAB_RED_ZONE | SLAB_POISON | SLAB_STORE_USER | \ 37 SLAB_TRACE | SLAB_DESTROY_BY_RCU | SLAB_NOLEAKTRACE | \ 38 SLAB_FAILSLAB) 39 40 #define SLAB_MERGE_SAME (SLAB_DEBUG_FREE | SLAB_RECLAIM_ACCOUNT | \ 41 SLAB_CACHE_DMA | SLAB_NOTRACK) 42 43 /* 44 * Merge control. If this is set then no merging of slab caches will occur. 45 * (Could be removed. This was introduced to pacify the merge skeptics.) 46 */ 47 static int slab_nomerge; 48 49 static int __init setup_slab_nomerge(char *str) 50 { 51 slab_nomerge = 1; 52 return 1; 53 } 54 55 #ifdef CONFIG_SLUB 56 __setup_param("slub_nomerge", slub_nomerge, setup_slab_nomerge, 0); 57 #endif 58 59 __setup("slab_nomerge", setup_slab_nomerge); 60 61 /* 62 * Determine the size of a slab object 63 */ 64 unsigned int kmem_cache_size(struct kmem_cache *s) 65 { 66 return s->object_size; 67 } 68 EXPORT_SYMBOL(kmem_cache_size); 69 70 #ifdef CONFIG_DEBUG_VM 71 static int kmem_cache_sanity_check(const char *name, size_t size) 72 { 73 struct kmem_cache *s = NULL; 74 75 if (!name || in_interrupt() || size < sizeof(void *) || 76 size > KMALLOC_MAX_SIZE) { 77 pr_err("kmem_cache_create(%s) integrity check failed\n", name); 78 return -EINVAL; 79 } 80 81 list_for_each_entry(s, &slab_caches, list) { 82 char tmp; 83 int res; 84 85 /* 86 * This happens when the module gets unloaded and doesn't 87 * destroy its slab cache and no-one else reuses the vmalloc 88 * area of the module. Print a warning. 89 */ 90 res = probe_kernel_address(s->name, tmp); 91 if (res) { 92 pr_err("Slab cache with size %d has lost its name\n", 93 s->object_size); 94 continue; 95 } 96 } 97 98 WARN_ON(strchr(name, ' ')); /* It confuses parsers */ 99 return 0; 100 } 101 #else 102 static inline int kmem_cache_sanity_check(const char *name, size_t size) 103 { 104 return 0; 105 } 106 #endif 107 108 #ifdef CONFIG_MEMCG_KMEM 109 void slab_init_memcg_params(struct kmem_cache *s) 110 { 111 s->memcg_params.is_root_cache = true; 112 INIT_LIST_HEAD(&s->memcg_params.list); 113 RCU_INIT_POINTER(s->memcg_params.memcg_caches, NULL); 114 } 115 116 static int init_memcg_params(struct kmem_cache *s, 117 struct mem_cgroup *memcg, struct kmem_cache *root_cache) 118 { 119 struct memcg_cache_array *arr; 120 121 if (memcg) { 122 s->memcg_params.is_root_cache = false; 123 s->memcg_params.memcg = memcg; 124 s->memcg_params.root_cache = root_cache; 125 return 0; 126 } 127 128 slab_init_memcg_params(s); 129 130 if (!memcg_nr_cache_ids) 131 return 0; 132 133 arr = kzalloc(sizeof(struct memcg_cache_array) + 134 memcg_nr_cache_ids * sizeof(void *), 135 GFP_KERNEL); 136 if (!arr) 137 return -ENOMEM; 138 139 RCU_INIT_POINTER(s->memcg_params.memcg_caches, arr); 140 return 0; 141 } 142 143 static void destroy_memcg_params(struct kmem_cache *s) 144 { 145 if (is_root_cache(s)) 146 kfree(rcu_access_pointer(s->memcg_params.memcg_caches)); 147 } 148 149 static int update_memcg_params(struct kmem_cache *s, int new_array_size) 150 { 151 struct memcg_cache_array *old, *new; 152 153 if (!is_root_cache(s)) 154 return 0; 155 156 new = kzalloc(sizeof(struct memcg_cache_array) + 157 new_array_size * sizeof(void *), GFP_KERNEL); 158 if (!new) 159 return -ENOMEM; 160 161 old = rcu_dereference_protected(s->memcg_params.memcg_caches, 162 lockdep_is_held(&slab_mutex)); 163 if (old) 164 memcpy(new->entries, old->entries, 165 memcg_nr_cache_ids * sizeof(void *)); 166 167 rcu_assign_pointer(s->memcg_params.memcg_caches, new); 168 if (old) 169 kfree_rcu(old, rcu); 170 return 0; 171 } 172 173 int memcg_update_all_caches(int num_memcgs) 174 { 175 struct kmem_cache *s; 176 int ret = 0; 177 178 mutex_lock(&slab_mutex); 179 list_for_each_entry(s, &slab_caches, list) { 180 ret = update_memcg_params(s, num_memcgs); 181 /* 182 * Instead of freeing the memory, we'll just leave the caches 183 * up to this point in an updated state. 184 */ 185 if (ret) 186 break; 187 } 188 mutex_unlock(&slab_mutex); 189 return ret; 190 } 191 #else 192 static inline int init_memcg_params(struct kmem_cache *s, 193 struct mem_cgroup *memcg, struct kmem_cache *root_cache) 194 { 195 return 0; 196 } 197 198 static inline void destroy_memcg_params(struct kmem_cache *s) 199 { 200 } 201 #endif /* CONFIG_MEMCG_KMEM */ 202 203 /* 204 * Find a mergeable slab cache 205 */ 206 int slab_unmergeable(struct kmem_cache *s) 207 { 208 if (slab_nomerge || (s->flags & SLAB_NEVER_MERGE)) 209 return 1; 210 211 if (!is_root_cache(s)) 212 return 1; 213 214 if (s->ctor) 215 return 1; 216 217 /* 218 * We may have set a slab to be unmergeable during bootstrap. 219 */ 220 if (s->refcount < 0) 221 return 1; 222 223 return 0; 224 } 225 226 struct kmem_cache *find_mergeable(size_t size, size_t align, 227 unsigned long flags, const char *name, void (*ctor)(void *)) 228 { 229 struct kmem_cache *s; 230 231 if (slab_nomerge || (flags & SLAB_NEVER_MERGE)) 232 return NULL; 233 234 if (ctor) 235 return NULL; 236 237 size = ALIGN(size, sizeof(void *)); 238 align = calculate_alignment(flags, align, size); 239 size = ALIGN(size, align); 240 flags = kmem_cache_flags(size, flags, name, NULL); 241 242 list_for_each_entry_reverse(s, &slab_caches, list) { 243 if (slab_unmergeable(s)) 244 continue; 245 246 if (size > s->size) 247 continue; 248 249 if ((flags & SLAB_MERGE_SAME) != (s->flags & SLAB_MERGE_SAME)) 250 continue; 251 /* 252 * Check if alignment is compatible. 253 * Courtesy of Adrian Drzewiecki 254 */ 255 if ((s->size & ~(align - 1)) != s->size) 256 continue; 257 258 if (s->size - size >= sizeof(void *)) 259 continue; 260 261 if (IS_ENABLED(CONFIG_SLAB) && align && 262 (align > s->align || s->align % align)) 263 continue; 264 265 return s; 266 } 267 return NULL; 268 } 269 270 /* 271 * Figure out what the alignment of the objects will be given a set of 272 * flags, a user specified alignment and the size of the objects. 273 */ 274 unsigned long calculate_alignment(unsigned long flags, 275 unsigned long align, unsigned long size) 276 { 277 /* 278 * If the user wants hardware cache aligned objects then follow that 279 * suggestion if the object is sufficiently large. 280 * 281 * The hardware cache alignment cannot override the specified 282 * alignment though. If that is greater then use it. 283 */ 284 if (flags & SLAB_HWCACHE_ALIGN) { 285 unsigned long ralign = cache_line_size(); 286 while (size <= ralign / 2) 287 ralign /= 2; 288 align = max(align, ralign); 289 } 290 291 if (align < ARCH_SLAB_MINALIGN) 292 align = ARCH_SLAB_MINALIGN; 293 294 return ALIGN(align, sizeof(void *)); 295 } 296 297 static struct kmem_cache * 298 do_kmem_cache_create(const char *name, size_t object_size, size_t size, 299 size_t align, unsigned long flags, void (*ctor)(void *), 300 struct mem_cgroup *memcg, struct kmem_cache *root_cache) 301 { 302 struct kmem_cache *s; 303 int err; 304 305 err = -ENOMEM; 306 s = kmem_cache_zalloc(kmem_cache, GFP_KERNEL); 307 if (!s) 308 goto out; 309 310 s->name = name; 311 s->object_size = object_size; 312 s->size = size; 313 s->align = align; 314 s->ctor = ctor; 315 316 err = init_memcg_params(s, memcg, root_cache); 317 if (err) 318 goto out_free_cache; 319 320 err = __kmem_cache_create(s, flags); 321 if (err) 322 goto out_free_cache; 323 324 s->refcount = 1; 325 list_add(&s->list, &slab_caches); 326 out: 327 if (err) 328 return ERR_PTR(err); 329 return s; 330 331 out_free_cache: 332 destroy_memcg_params(s); 333 kmem_cache_free(kmem_cache, s); 334 goto out; 335 } 336 337 /* 338 * kmem_cache_create - Create a cache. 339 * @name: A string which is used in /proc/slabinfo to identify this cache. 340 * @size: The size of objects to be created in this cache. 341 * @align: The required alignment for the objects. 342 * @flags: SLAB flags 343 * @ctor: A constructor for the objects. 344 * 345 * Returns a ptr to the cache on success, NULL on failure. 346 * Cannot be called within a interrupt, but can be interrupted. 347 * The @ctor is run when new pages are allocated by the cache. 348 * 349 * The flags are 350 * 351 * %SLAB_POISON - Poison the slab with a known test pattern (a5a5a5a5) 352 * to catch references to uninitialised memory. 353 * 354 * %SLAB_RED_ZONE - Insert `Red' zones around the allocated memory to check 355 * for buffer overruns. 356 * 357 * %SLAB_HWCACHE_ALIGN - Align the objects in this cache to a hardware 358 * cacheline. This can be beneficial if you're counting cycles as closely 359 * as davem. 360 */ 361 struct kmem_cache * 362 kmem_cache_create(const char *name, size_t size, size_t align, 363 unsigned long flags, void (*ctor)(void *)) 364 { 365 struct kmem_cache *s; 366 const char *cache_name; 367 int err; 368 369 get_online_cpus(); 370 get_online_mems(); 371 memcg_get_cache_ids(); 372 373 mutex_lock(&slab_mutex); 374 375 err = kmem_cache_sanity_check(name, size); 376 if (err) { 377 s = NULL; /* suppress uninit var warning */ 378 goto out_unlock; 379 } 380 381 /* 382 * Some allocators will constraint the set of valid flags to a subset 383 * of all flags. We expect them to define CACHE_CREATE_MASK in this 384 * case, and we'll just provide them with a sanitized version of the 385 * passed flags. 386 */ 387 flags &= CACHE_CREATE_MASK; 388 389 s = __kmem_cache_alias(name, size, align, flags, ctor); 390 if (s) 391 goto out_unlock; 392 393 cache_name = kstrdup_const(name, GFP_KERNEL); 394 if (!cache_name) { 395 err = -ENOMEM; 396 goto out_unlock; 397 } 398 399 s = do_kmem_cache_create(cache_name, size, size, 400 calculate_alignment(flags, align, size), 401 flags, ctor, NULL, NULL); 402 if (IS_ERR(s)) { 403 err = PTR_ERR(s); 404 kfree_const(cache_name); 405 } 406 407 out_unlock: 408 mutex_unlock(&slab_mutex); 409 410 memcg_put_cache_ids(); 411 put_online_mems(); 412 put_online_cpus(); 413 414 if (err) { 415 if (flags & SLAB_PANIC) 416 panic("kmem_cache_create: Failed to create slab '%s'. Error %d\n", 417 name, err); 418 else { 419 printk(KERN_WARNING "kmem_cache_create(%s) failed with error %d", 420 name, err); 421 dump_stack(); 422 } 423 return NULL; 424 } 425 return s; 426 } 427 EXPORT_SYMBOL(kmem_cache_create); 428 429 static int do_kmem_cache_shutdown(struct kmem_cache *s, 430 struct list_head *release, bool *need_rcu_barrier) 431 { 432 if (__kmem_cache_shutdown(s) != 0) { 433 printk(KERN_ERR "kmem_cache_destroy %s: " 434 "Slab cache still has objects\n", s->name); 435 dump_stack(); 436 return -EBUSY; 437 } 438 439 if (s->flags & SLAB_DESTROY_BY_RCU) 440 *need_rcu_barrier = true; 441 442 #ifdef CONFIG_MEMCG_KMEM 443 if (!is_root_cache(s)) 444 list_del(&s->memcg_params.list); 445 #endif 446 list_move(&s->list, release); 447 return 0; 448 } 449 450 static void do_kmem_cache_release(struct list_head *release, 451 bool need_rcu_barrier) 452 { 453 struct kmem_cache *s, *s2; 454 455 if (need_rcu_barrier) 456 rcu_barrier(); 457 458 list_for_each_entry_safe(s, s2, release, list) { 459 #ifdef SLAB_SUPPORTS_SYSFS 460 sysfs_slab_remove(s); 461 #else 462 slab_kmem_cache_release(s); 463 #endif 464 } 465 } 466 467 #ifdef CONFIG_MEMCG_KMEM 468 /* 469 * memcg_create_kmem_cache - Create a cache for a memory cgroup. 470 * @memcg: The memory cgroup the new cache is for. 471 * @root_cache: The parent of the new cache. 472 * 473 * This function attempts to create a kmem cache that will serve allocation 474 * requests going from @memcg to @root_cache. The new cache inherits properties 475 * from its parent. 476 */ 477 void memcg_create_kmem_cache(struct mem_cgroup *memcg, 478 struct kmem_cache *root_cache) 479 { 480 static char memcg_name_buf[NAME_MAX + 1]; /* protected by slab_mutex */ 481 struct cgroup_subsys_state *css = mem_cgroup_css(memcg); 482 struct memcg_cache_array *arr; 483 struct kmem_cache *s = NULL; 484 char *cache_name; 485 int idx; 486 487 get_online_cpus(); 488 get_online_mems(); 489 490 mutex_lock(&slab_mutex); 491 492 /* 493 * The memory cgroup could have been deactivated while the cache 494 * creation work was pending. 495 */ 496 if (!memcg_kmem_is_active(memcg)) 497 goto out_unlock; 498 499 idx = memcg_cache_id(memcg); 500 arr = rcu_dereference_protected(root_cache->memcg_params.memcg_caches, 501 lockdep_is_held(&slab_mutex)); 502 503 /* 504 * Since per-memcg caches are created asynchronously on first 505 * allocation (see memcg_kmem_get_cache()), several threads can try to 506 * create the same cache, but only one of them may succeed. 507 */ 508 if (arr->entries[idx]) 509 goto out_unlock; 510 511 cgroup_name(css->cgroup, memcg_name_buf, sizeof(memcg_name_buf)); 512 cache_name = kasprintf(GFP_KERNEL, "%s(%d:%s)", root_cache->name, 513 css->id, memcg_name_buf); 514 if (!cache_name) 515 goto out_unlock; 516 517 s = do_kmem_cache_create(cache_name, root_cache->object_size, 518 root_cache->size, root_cache->align, 519 root_cache->flags, root_cache->ctor, 520 memcg, root_cache); 521 /* 522 * If we could not create a memcg cache, do not complain, because 523 * that's not critical at all as we can always proceed with the root 524 * cache. 525 */ 526 if (IS_ERR(s)) { 527 kfree(cache_name); 528 goto out_unlock; 529 } 530 531 list_add(&s->memcg_params.list, &root_cache->memcg_params.list); 532 533 /* 534 * Since readers won't lock (see cache_from_memcg_idx()), we need a 535 * barrier here to ensure nobody will see the kmem_cache partially 536 * initialized. 537 */ 538 smp_wmb(); 539 arr->entries[idx] = s; 540 541 out_unlock: 542 mutex_unlock(&slab_mutex); 543 544 put_online_mems(); 545 put_online_cpus(); 546 } 547 548 void memcg_deactivate_kmem_caches(struct mem_cgroup *memcg) 549 { 550 int idx; 551 struct memcg_cache_array *arr; 552 struct kmem_cache *s, *c; 553 554 idx = memcg_cache_id(memcg); 555 556 get_online_cpus(); 557 get_online_mems(); 558 559 mutex_lock(&slab_mutex); 560 list_for_each_entry(s, &slab_caches, list) { 561 if (!is_root_cache(s)) 562 continue; 563 564 arr = rcu_dereference_protected(s->memcg_params.memcg_caches, 565 lockdep_is_held(&slab_mutex)); 566 c = arr->entries[idx]; 567 if (!c) 568 continue; 569 570 __kmem_cache_shrink(c, true); 571 arr->entries[idx] = NULL; 572 } 573 mutex_unlock(&slab_mutex); 574 575 put_online_mems(); 576 put_online_cpus(); 577 } 578 579 void memcg_destroy_kmem_caches(struct mem_cgroup *memcg) 580 { 581 LIST_HEAD(release); 582 bool need_rcu_barrier = false; 583 struct kmem_cache *s, *s2; 584 585 get_online_cpus(); 586 get_online_mems(); 587 588 mutex_lock(&slab_mutex); 589 list_for_each_entry_safe(s, s2, &slab_caches, list) { 590 if (is_root_cache(s) || s->memcg_params.memcg != memcg) 591 continue; 592 /* 593 * The cgroup is about to be freed and therefore has no charges 594 * left. Hence, all its caches must be empty by now. 595 */ 596 BUG_ON(do_kmem_cache_shutdown(s, &release, &need_rcu_barrier)); 597 } 598 mutex_unlock(&slab_mutex); 599 600 put_online_mems(); 601 put_online_cpus(); 602 603 do_kmem_cache_release(&release, need_rcu_barrier); 604 } 605 #endif /* CONFIG_MEMCG_KMEM */ 606 607 void slab_kmem_cache_release(struct kmem_cache *s) 608 { 609 destroy_memcg_params(s); 610 kfree_const(s->name); 611 kmem_cache_free(kmem_cache, s); 612 } 613 614 void kmem_cache_destroy(struct kmem_cache *s) 615 { 616 struct kmem_cache *c, *c2; 617 LIST_HEAD(release); 618 bool need_rcu_barrier = false; 619 bool busy = false; 620 621 BUG_ON(!is_root_cache(s)); 622 623 get_online_cpus(); 624 get_online_mems(); 625 626 mutex_lock(&slab_mutex); 627 628 s->refcount--; 629 if (s->refcount) 630 goto out_unlock; 631 632 for_each_memcg_cache_safe(c, c2, s) { 633 if (do_kmem_cache_shutdown(c, &release, &need_rcu_barrier)) 634 busy = true; 635 } 636 637 if (!busy) 638 do_kmem_cache_shutdown(s, &release, &need_rcu_barrier); 639 640 out_unlock: 641 mutex_unlock(&slab_mutex); 642 643 put_online_mems(); 644 put_online_cpus(); 645 646 do_kmem_cache_release(&release, need_rcu_barrier); 647 } 648 EXPORT_SYMBOL(kmem_cache_destroy); 649 650 /** 651 * kmem_cache_shrink - Shrink a cache. 652 * @cachep: The cache to shrink. 653 * 654 * Releases as many slabs as possible for a cache. 655 * To help debugging, a zero exit status indicates all slabs were released. 656 */ 657 int kmem_cache_shrink(struct kmem_cache *cachep) 658 { 659 int ret; 660 661 get_online_cpus(); 662 get_online_mems(); 663 ret = __kmem_cache_shrink(cachep, false); 664 put_online_mems(); 665 put_online_cpus(); 666 return ret; 667 } 668 EXPORT_SYMBOL(kmem_cache_shrink); 669 670 int slab_is_available(void) 671 { 672 return slab_state >= UP; 673 } 674 675 #ifndef CONFIG_SLOB 676 /* Create a cache during boot when no slab services are available yet */ 677 void __init create_boot_cache(struct kmem_cache *s, const char *name, size_t size, 678 unsigned long flags) 679 { 680 int err; 681 682 s->name = name; 683 s->size = s->object_size = size; 684 s->align = calculate_alignment(flags, ARCH_KMALLOC_MINALIGN, size); 685 686 slab_init_memcg_params(s); 687 688 err = __kmem_cache_create(s, flags); 689 690 if (err) 691 panic("Creation of kmalloc slab %s size=%zu failed. Reason %d\n", 692 name, size, err); 693 694 s->refcount = -1; /* Exempt from merging for now */ 695 } 696 697 struct kmem_cache *__init create_kmalloc_cache(const char *name, size_t size, 698 unsigned long flags) 699 { 700 struct kmem_cache *s = kmem_cache_zalloc(kmem_cache, GFP_NOWAIT); 701 702 if (!s) 703 panic("Out of memory when creating slab %s\n", name); 704 705 create_boot_cache(s, name, size, flags); 706 list_add(&s->list, &slab_caches); 707 s->refcount = 1; 708 return s; 709 } 710 711 struct kmem_cache *kmalloc_caches[KMALLOC_SHIFT_HIGH + 1]; 712 EXPORT_SYMBOL(kmalloc_caches); 713 714 #ifdef CONFIG_ZONE_DMA 715 struct kmem_cache *kmalloc_dma_caches[KMALLOC_SHIFT_HIGH + 1]; 716 EXPORT_SYMBOL(kmalloc_dma_caches); 717 #endif 718 719 /* 720 * Conversion table for small slabs sizes / 8 to the index in the 721 * kmalloc array. This is necessary for slabs < 192 since we have non power 722 * of two cache sizes there. The size of larger slabs can be determined using 723 * fls. 724 */ 725 static s8 size_index[24] = { 726 3, /* 8 */ 727 4, /* 16 */ 728 5, /* 24 */ 729 5, /* 32 */ 730 6, /* 40 */ 731 6, /* 48 */ 732 6, /* 56 */ 733 6, /* 64 */ 734 1, /* 72 */ 735 1, /* 80 */ 736 1, /* 88 */ 737 1, /* 96 */ 738 7, /* 104 */ 739 7, /* 112 */ 740 7, /* 120 */ 741 7, /* 128 */ 742 2, /* 136 */ 743 2, /* 144 */ 744 2, /* 152 */ 745 2, /* 160 */ 746 2, /* 168 */ 747 2, /* 176 */ 748 2, /* 184 */ 749 2 /* 192 */ 750 }; 751 752 static inline int size_index_elem(size_t bytes) 753 { 754 return (bytes - 1) / 8; 755 } 756 757 /* 758 * Find the kmem_cache structure that serves a given size of 759 * allocation 760 */ 761 struct kmem_cache *kmalloc_slab(size_t size, gfp_t flags) 762 { 763 int index; 764 765 if (unlikely(size > KMALLOC_MAX_SIZE)) { 766 WARN_ON_ONCE(!(flags & __GFP_NOWARN)); 767 return NULL; 768 } 769 770 if (size <= 192) { 771 if (!size) 772 return ZERO_SIZE_PTR; 773 774 index = size_index[size_index_elem(size)]; 775 } else 776 index = fls(size - 1); 777 778 #ifdef CONFIG_ZONE_DMA 779 if (unlikely((flags & GFP_DMA))) 780 return kmalloc_dma_caches[index]; 781 782 #endif 783 return kmalloc_caches[index]; 784 } 785 786 /* 787 * kmalloc_info[] is to make slub_debug=,kmalloc-xx option work at boot time. 788 * kmalloc_index() supports up to 2^26=64MB, so the final entry of the table is 789 * kmalloc-67108864. 790 */ 791 static struct { 792 const char *name; 793 unsigned long size; 794 } const kmalloc_info[] __initconst = { 795 {NULL, 0}, {"kmalloc-96", 96}, 796 {"kmalloc-192", 192}, {"kmalloc-8", 8}, 797 {"kmalloc-16", 16}, {"kmalloc-32", 32}, 798 {"kmalloc-64", 64}, {"kmalloc-128", 128}, 799 {"kmalloc-256", 256}, {"kmalloc-512", 512}, 800 {"kmalloc-1024", 1024}, {"kmalloc-2048", 2048}, 801 {"kmalloc-4096", 4096}, {"kmalloc-8192", 8192}, 802 {"kmalloc-16384", 16384}, {"kmalloc-32768", 32768}, 803 {"kmalloc-65536", 65536}, {"kmalloc-131072", 131072}, 804 {"kmalloc-262144", 262144}, {"kmalloc-524288", 524288}, 805 {"kmalloc-1048576", 1048576}, {"kmalloc-2097152", 2097152}, 806 {"kmalloc-4194304", 4194304}, {"kmalloc-8388608", 8388608}, 807 {"kmalloc-16777216", 16777216}, {"kmalloc-33554432", 33554432}, 808 {"kmalloc-67108864", 67108864} 809 }; 810 811 /* 812 * Patch up the size_index table if we have strange large alignment 813 * requirements for the kmalloc array. This is only the case for 814 * MIPS it seems. The standard arches will not generate any code here. 815 * 816 * Largest permitted alignment is 256 bytes due to the way we 817 * handle the index determination for the smaller caches. 818 * 819 * Make sure that nothing crazy happens if someone starts tinkering 820 * around with ARCH_KMALLOC_MINALIGN 821 */ 822 void __init setup_kmalloc_cache_index_table(void) 823 { 824 int i; 825 826 BUILD_BUG_ON(KMALLOC_MIN_SIZE > 256 || 827 (KMALLOC_MIN_SIZE & (KMALLOC_MIN_SIZE - 1))); 828 829 for (i = 8; i < KMALLOC_MIN_SIZE; i += 8) { 830 int elem = size_index_elem(i); 831 832 if (elem >= ARRAY_SIZE(size_index)) 833 break; 834 size_index[elem] = KMALLOC_SHIFT_LOW; 835 } 836 837 if (KMALLOC_MIN_SIZE >= 64) { 838 /* 839 * The 96 byte size cache is not used if the alignment 840 * is 64 byte. 841 */ 842 for (i = 64 + 8; i <= 96; i += 8) 843 size_index[size_index_elem(i)] = 7; 844 845 } 846 847 if (KMALLOC_MIN_SIZE >= 128) { 848 /* 849 * The 192 byte sized cache is not used if the alignment 850 * is 128 byte. Redirect kmalloc to use the 256 byte cache 851 * instead. 852 */ 853 for (i = 128 + 8; i <= 192; i += 8) 854 size_index[size_index_elem(i)] = 8; 855 } 856 } 857 858 static void __init new_kmalloc_cache(int idx, unsigned long flags) 859 { 860 kmalloc_caches[idx] = create_kmalloc_cache(kmalloc_info[idx].name, 861 kmalloc_info[idx].size, flags); 862 } 863 864 /* 865 * Create the kmalloc array. Some of the regular kmalloc arrays 866 * may already have been created because they were needed to 867 * enable allocations for slab creation. 868 */ 869 void __init create_kmalloc_caches(unsigned long flags) 870 { 871 int i; 872 873 for (i = KMALLOC_SHIFT_LOW; i <= KMALLOC_SHIFT_HIGH; i++) { 874 if (!kmalloc_caches[i]) 875 new_kmalloc_cache(i, flags); 876 877 /* 878 * Caches that are not of the two-to-the-power-of size. 879 * These have to be created immediately after the 880 * earlier power of two caches 881 */ 882 if (KMALLOC_MIN_SIZE <= 32 && !kmalloc_caches[1] && i == 6) 883 new_kmalloc_cache(1, flags); 884 if (KMALLOC_MIN_SIZE <= 64 && !kmalloc_caches[2] && i == 7) 885 new_kmalloc_cache(2, flags); 886 } 887 888 /* Kmalloc array is now usable */ 889 slab_state = UP; 890 891 #ifdef CONFIG_ZONE_DMA 892 for (i = 0; i <= KMALLOC_SHIFT_HIGH; i++) { 893 struct kmem_cache *s = kmalloc_caches[i]; 894 895 if (s) { 896 int size = kmalloc_size(i); 897 char *n = kasprintf(GFP_NOWAIT, 898 "dma-kmalloc-%d", size); 899 900 BUG_ON(!n); 901 kmalloc_dma_caches[i] = create_kmalloc_cache(n, 902 size, SLAB_CACHE_DMA | flags); 903 } 904 } 905 #endif 906 } 907 #endif /* !CONFIG_SLOB */ 908 909 /* 910 * To avoid unnecessary overhead, we pass through large allocation requests 911 * directly to the page allocator. We use __GFP_COMP, because we will need to 912 * know the allocation order to free the pages properly in kfree. 913 */ 914 void *kmalloc_order(size_t size, gfp_t flags, unsigned int order) 915 { 916 void *ret; 917 struct page *page; 918 919 flags |= __GFP_COMP; 920 page = alloc_kmem_pages(flags, order); 921 ret = page ? page_address(page) : NULL; 922 kmemleak_alloc(ret, size, 1, flags); 923 kasan_kmalloc_large(ret, size); 924 return ret; 925 } 926 EXPORT_SYMBOL(kmalloc_order); 927 928 #ifdef CONFIG_TRACING 929 void *kmalloc_order_trace(size_t size, gfp_t flags, unsigned int order) 930 { 931 void *ret = kmalloc_order(size, flags, order); 932 trace_kmalloc(_RET_IP_, ret, size, PAGE_SIZE << order, flags); 933 return ret; 934 } 935 EXPORT_SYMBOL(kmalloc_order_trace); 936 #endif 937 938 #ifdef CONFIG_SLABINFO 939 940 #ifdef CONFIG_SLAB 941 #define SLABINFO_RIGHTS (S_IWUSR | S_IRUSR) 942 #else 943 #define SLABINFO_RIGHTS S_IRUSR 944 #endif 945 946 static void print_slabinfo_header(struct seq_file *m) 947 { 948 /* 949 * Output format version, so at least we can change it 950 * without _too_ many complaints. 951 */ 952 #ifdef CONFIG_DEBUG_SLAB 953 seq_puts(m, "slabinfo - version: 2.1 (statistics)\n"); 954 #else 955 seq_puts(m, "slabinfo - version: 2.1\n"); 956 #endif 957 seq_puts(m, "# name <active_objs> <num_objs> <objsize> " 958 "<objperslab> <pagesperslab>"); 959 seq_puts(m, " : tunables <limit> <batchcount> <sharedfactor>"); 960 seq_puts(m, " : slabdata <active_slabs> <num_slabs> <sharedavail>"); 961 #ifdef CONFIG_DEBUG_SLAB 962 seq_puts(m, " : globalstat <listallocs> <maxobjs> <grown> <reaped> " 963 "<error> <maxfreeable> <nodeallocs> <remotefrees> <alienoverflow>"); 964 seq_puts(m, " : cpustat <allochit> <allocmiss> <freehit> <freemiss>"); 965 #endif 966 seq_putc(m, '\n'); 967 } 968 969 void *slab_start(struct seq_file *m, loff_t *pos) 970 { 971 mutex_lock(&slab_mutex); 972 return seq_list_start(&slab_caches, *pos); 973 } 974 975 void *slab_next(struct seq_file *m, void *p, loff_t *pos) 976 { 977 return seq_list_next(p, &slab_caches, pos); 978 } 979 980 void slab_stop(struct seq_file *m, void *p) 981 { 982 mutex_unlock(&slab_mutex); 983 } 984 985 static void 986 memcg_accumulate_slabinfo(struct kmem_cache *s, struct slabinfo *info) 987 { 988 struct kmem_cache *c; 989 struct slabinfo sinfo; 990 991 if (!is_root_cache(s)) 992 return; 993 994 for_each_memcg_cache(c, s) { 995 memset(&sinfo, 0, sizeof(sinfo)); 996 get_slabinfo(c, &sinfo); 997 998 info->active_slabs += sinfo.active_slabs; 999 info->num_slabs += sinfo.num_slabs; 1000 info->shared_avail += sinfo.shared_avail; 1001 info->active_objs += sinfo.active_objs; 1002 info->num_objs += sinfo.num_objs; 1003 } 1004 } 1005 1006 static void cache_show(struct kmem_cache *s, struct seq_file *m) 1007 { 1008 struct slabinfo sinfo; 1009 1010 memset(&sinfo, 0, sizeof(sinfo)); 1011 get_slabinfo(s, &sinfo); 1012 1013 memcg_accumulate_slabinfo(s, &sinfo); 1014 1015 seq_printf(m, "%-17s %6lu %6lu %6u %4u %4d", 1016 cache_name(s), sinfo.active_objs, sinfo.num_objs, s->size, 1017 sinfo.objects_per_slab, (1 << sinfo.cache_order)); 1018 1019 seq_printf(m, " : tunables %4u %4u %4u", 1020 sinfo.limit, sinfo.batchcount, sinfo.shared); 1021 seq_printf(m, " : slabdata %6lu %6lu %6lu", 1022 sinfo.active_slabs, sinfo.num_slabs, sinfo.shared_avail); 1023 slabinfo_show_stats(m, s); 1024 seq_putc(m, '\n'); 1025 } 1026 1027 static int slab_show(struct seq_file *m, void *p) 1028 { 1029 struct kmem_cache *s = list_entry(p, struct kmem_cache, list); 1030 1031 if (p == slab_caches.next) 1032 print_slabinfo_header(m); 1033 if (is_root_cache(s)) 1034 cache_show(s, m); 1035 return 0; 1036 } 1037 1038 #ifdef CONFIG_MEMCG_KMEM 1039 int memcg_slab_show(struct seq_file *m, void *p) 1040 { 1041 struct kmem_cache *s = list_entry(p, struct kmem_cache, list); 1042 struct mem_cgroup *memcg = mem_cgroup_from_css(seq_css(m)); 1043 1044 if (p == slab_caches.next) 1045 print_slabinfo_header(m); 1046 if (!is_root_cache(s) && s->memcg_params.memcg == memcg) 1047 cache_show(s, m); 1048 return 0; 1049 } 1050 #endif 1051 1052 /* 1053 * slabinfo_op - iterator that generates /proc/slabinfo 1054 * 1055 * Output layout: 1056 * cache-name 1057 * num-active-objs 1058 * total-objs 1059 * object size 1060 * num-active-slabs 1061 * total-slabs 1062 * num-pages-per-slab 1063 * + further values on SMP and with statistics enabled 1064 */ 1065 static const struct seq_operations slabinfo_op = { 1066 .start = slab_start, 1067 .next = slab_next, 1068 .stop = slab_stop, 1069 .show = slab_show, 1070 }; 1071 1072 static int slabinfo_open(struct inode *inode, struct file *file) 1073 { 1074 return seq_open(file, &slabinfo_op); 1075 } 1076 1077 static const struct file_operations proc_slabinfo_operations = { 1078 .open = slabinfo_open, 1079 .read = seq_read, 1080 .write = slabinfo_write, 1081 .llseek = seq_lseek, 1082 .release = seq_release, 1083 }; 1084 1085 static int __init slab_proc_init(void) 1086 { 1087 proc_create("slabinfo", SLABINFO_RIGHTS, NULL, 1088 &proc_slabinfo_operations); 1089 return 0; 1090 } 1091 module_init(slab_proc_init); 1092 #endif /* CONFIG_SLABINFO */ 1093 1094 static __always_inline void *__do_krealloc(const void *p, size_t new_size, 1095 gfp_t flags) 1096 { 1097 void *ret; 1098 size_t ks = 0; 1099 1100 if (p) 1101 ks = ksize(p); 1102 1103 if (ks >= new_size) { 1104 kasan_krealloc((void *)p, new_size); 1105 return (void *)p; 1106 } 1107 1108 ret = kmalloc_track_caller(new_size, flags); 1109 if (ret && p) 1110 memcpy(ret, p, ks); 1111 1112 return ret; 1113 } 1114 1115 /** 1116 * __krealloc - like krealloc() but don't free @p. 1117 * @p: object to reallocate memory for. 1118 * @new_size: how many bytes of memory are required. 1119 * @flags: the type of memory to allocate. 1120 * 1121 * This function is like krealloc() except it never frees the originally 1122 * allocated buffer. Use this if you don't want to free the buffer immediately 1123 * like, for example, with RCU. 1124 */ 1125 void *__krealloc(const void *p, size_t new_size, gfp_t flags) 1126 { 1127 if (unlikely(!new_size)) 1128 return ZERO_SIZE_PTR; 1129 1130 return __do_krealloc(p, new_size, flags); 1131 1132 } 1133 EXPORT_SYMBOL(__krealloc); 1134 1135 /** 1136 * krealloc - reallocate memory. The contents will remain unchanged. 1137 * @p: object to reallocate memory for. 1138 * @new_size: how many bytes of memory are required. 1139 * @flags: the type of memory to allocate. 1140 * 1141 * The contents of the object pointed to are preserved up to the 1142 * lesser of the new and old sizes. If @p is %NULL, krealloc() 1143 * behaves exactly like kmalloc(). If @new_size is 0 and @p is not a 1144 * %NULL pointer, the object pointed to is freed. 1145 */ 1146 void *krealloc(const void *p, size_t new_size, gfp_t flags) 1147 { 1148 void *ret; 1149 1150 if (unlikely(!new_size)) { 1151 kfree(p); 1152 return ZERO_SIZE_PTR; 1153 } 1154 1155 ret = __do_krealloc(p, new_size, flags); 1156 if (ret && p != ret) 1157 kfree(p); 1158 1159 return ret; 1160 } 1161 EXPORT_SYMBOL(krealloc); 1162 1163 /** 1164 * kzfree - like kfree but zero memory 1165 * @p: object to free memory of 1166 * 1167 * The memory of the object @p points to is zeroed before freed. 1168 * If @p is %NULL, kzfree() does nothing. 1169 * 1170 * Note: this function zeroes the whole allocated buffer which can be a good 1171 * deal bigger than the requested buffer size passed to kmalloc(). So be 1172 * careful when using this function in performance sensitive code. 1173 */ 1174 void kzfree(const void *p) 1175 { 1176 size_t ks; 1177 void *mem = (void *)p; 1178 1179 if (unlikely(ZERO_OR_NULL_PTR(mem))) 1180 return; 1181 ks = ksize(mem); 1182 memset(mem, 0, ks); 1183 kfree(mem); 1184 } 1185 EXPORT_SYMBOL(kzfree); 1186 1187 /* Tracepoints definitions. */ 1188 EXPORT_TRACEPOINT_SYMBOL(kmalloc); 1189 EXPORT_TRACEPOINT_SYMBOL(kmem_cache_alloc); 1190 EXPORT_TRACEPOINT_SYMBOL(kmalloc_node); 1191 EXPORT_TRACEPOINT_SYMBOL(kmem_cache_alloc_node); 1192 EXPORT_TRACEPOINT_SYMBOL(kfree); 1193 EXPORT_TRACEPOINT_SYMBOL(kmem_cache_free); 1194