1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * Stack depot - a stack trace storage that avoids duplication. 4 * 5 * Internally, stack depot maintains a hash table of unique stacktraces. The 6 * stack traces themselves are stored contiguously one after another in a set 7 * of separate page allocations. 8 * 9 * Author: Alexander Potapenko <glider@google.com> 10 * Copyright (C) 2016 Google, Inc. 11 * 12 * Based on the code by Dmitry Chernenkov. 13 */ 14 15 #define pr_fmt(fmt) "stackdepot: " fmt 16 17 #include <linux/gfp.h> 18 #include <linux/jhash.h> 19 #include <linux/kernel.h> 20 #include <linux/mm.h> 21 #include <linux/mutex.h> 22 #include <linux/percpu.h> 23 #include <linux/printk.h> 24 #include <linux/slab.h> 25 #include <linux/stacktrace.h> 26 #include <linux/stackdepot.h> 27 #include <linux/string.h> 28 #include <linux/types.h> 29 #include <linux/memblock.h> 30 #include <linux/kasan-enabled.h> 31 32 #define DEPOT_HANDLE_BITS (sizeof(depot_stack_handle_t) * 8) 33 34 #define DEPOT_VALID_BITS 1 35 #define DEPOT_POOL_ORDER 2 /* Pool size order, 4 pages */ 36 #define DEPOT_POOL_SIZE (1LL << (PAGE_SHIFT + DEPOT_POOL_ORDER)) 37 #define DEPOT_STACK_ALIGN 4 38 #define DEPOT_OFFSET_BITS (DEPOT_POOL_ORDER + PAGE_SHIFT - DEPOT_STACK_ALIGN) 39 #define DEPOT_POOL_INDEX_BITS (DEPOT_HANDLE_BITS - DEPOT_VALID_BITS - \ 40 DEPOT_OFFSET_BITS - STACK_DEPOT_EXTRA_BITS) 41 #define DEPOT_POOLS_CAP 8192 42 #define DEPOT_MAX_POOLS \ 43 (((1LL << (DEPOT_POOL_INDEX_BITS)) < DEPOT_POOLS_CAP) ? \ 44 (1LL << (DEPOT_POOL_INDEX_BITS)) : DEPOT_POOLS_CAP) 45 46 /* Compact structure that stores a reference to a stack. */ 47 union handle_parts { 48 depot_stack_handle_t handle; 49 struct { 50 u32 pool_index : DEPOT_POOL_INDEX_BITS; 51 u32 offset : DEPOT_OFFSET_BITS; 52 u32 valid : DEPOT_VALID_BITS; 53 u32 extra : STACK_DEPOT_EXTRA_BITS; 54 }; 55 }; 56 57 struct stack_record { 58 struct stack_record *next; /* Link in the hash table */ 59 u32 hash; /* Hash in the hash table */ 60 u32 size; /* Number of stored frames */ 61 union handle_parts handle; 62 unsigned long entries[]; /* Variable-sized array of frames */ 63 }; 64 65 static bool stack_depot_disabled; 66 static bool __stack_depot_early_init_requested __initdata = IS_ENABLED(CONFIG_STACKDEPOT_ALWAYS_INIT); 67 static bool __stack_depot_early_init_passed __initdata; 68 69 /* Use one hash table bucket per 16 KB of memory. */ 70 #define STACK_HASH_TABLE_SCALE 14 71 /* Limit the number of buckets between 4K and 1M. */ 72 #define STACK_BUCKET_NUMBER_ORDER_MIN 12 73 #define STACK_BUCKET_NUMBER_ORDER_MAX 20 74 /* Initial seed for jhash2. */ 75 #define STACK_HASH_SEED 0x9747b28c 76 77 /* Hash table of pointers to stored stack traces. */ 78 static struct stack_record **stack_table; 79 /* Fixed order of the number of table buckets. Used when KASAN is enabled. */ 80 static unsigned int stack_bucket_number_order; 81 /* Hash mask for indexing the table. */ 82 static unsigned int stack_hash_mask; 83 84 /* Array of memory regions that store stack traces. */ 85 static void *stack_pools[DEPOT_MAX_POOLS]; 86 /* Currently used pool in stack_pools. */ 87 static int pool_index; 88 /* Offset to the unused space in the currently used pool. */ 89 static size_t pool_offset; 90 /* Lock that protects the variables above. */ 91 static DEFINE_RAW_SPINLOCK(pool_lock); 92 /* 93 * Stack depot tries to keep an extra pool allocated even before it runs out 94 * of space in the currently used pool. 95 * This flag marks that this next extra pool needs to be allocated and 96 * initialized. It has the value 0 when either the next pool is not yet 97 * initialized or the limit on the number of pools is reached. 98 */ 99 static int next_pool_required = 1; 100 101 static int __init disable_stack_depot(char *str) 102 { 103 int ret; 104 105 ret = kstrtobool(str, &stack_depot_disabled); 106 if (!ret && stack_depot_disabled) { 107 pr_info("disabled\n"); 108 stack_table = NULL; 109 } 110 return 0; 111 } 112 early_param("stack_depot_disable", disable_stack_depot); 113 114 void __init stack_depot_request_early_init(void) 115 { 116 /* Too late to request early init now. */ 117 WARN_ON(__stack_depot_early_init_passed); 118 119 __stack_depot_early_init_requested = true; 120 } 121 122 /* Allocates a hash table via memblock. Can only be used during early boot. */ 123 int __init stack_depot_early_init(void) 124 { 125 unsigned long entries = 0; 126 127 /* This function must be called only once, from mm_init(). */ 128 if (WARN_ON(__stack_depot_early_init_passed)) 129 return 0; 130 __stack_depot_early_init_passed = true; 131 132 /* 133 * If KASAN is enabled, use the maximum order: KASAN is frequently used 134 * in fuzzing scenarios, which leads to a large number of different 135 * stack traces being stored in stack depot. 136 */ 137 if (kasan_enabled() && !stack_bucket_number_order) 138 stack_bucket_number_order = STACK_BUCKET_NUMBER_ORDER_MAX; 139 140 if (!__stack_depot_early_init_requested || stack_depot_disabled) 141 return 0; 142 143 /* 144 * If stack_bucket_number_order is not set, leave entries as 0 to rely 145 * on the automatic calculations performed by alloc_large_system_hash. 146 */ 147 if (stack_bucket_number_order) 148 entries = 1UL << stack_bucket_number_order; 149 pr_info("allocating hash table via alloc_large_system_hash\n"); 150 stack_table = alloc_large_system_hash("stackdepot", 151 sizeof(struct stack_record *), 152 entries, 153 STACK_HASH_TABLE_SCALE, 154 HASH_EARLY | HASH_ZERO, 155 NULL, 156 &stack_hash_mask, 157 1UL << STACK_BUCKET_NUMBER_ORDER_MIN, 158 1UL << STACK_BUCKET_NUMBER_ORDER_MAX); 159 if (!stack_table) { 160 pr_err("hash table allocation failed, disabling\n"); 161 stack_depot_disabled = true; 162 return -ENOMEM; 163 } 164 165 return 0; 166 } 167 168 /* Allocates a hash table via kvcalloc. Can be used after boot. */ 169 int stack_depot_init(void) 170 { 171 static DEFINE_MUTEX(stack_depot_init_mutex); 172 unsigned long entries; 173 int ret = 0; 174 175 mutex_lock(&stack_depot_init_mutex); 176 177 if (stack_depot_disabled || stack_table) 178 goto out_unlock; 179 180 /* 181 * Similarly to stack_depot_early_init, use stack_bucket_number_order 182 * if assigned, and rely on automatic scaling otherwise. 183 */ 184 if (stack_bucket_number_order) { 185 entries = 1UL << stack_bucket_number_order; 186 } else { 187 int scale = STACK_HASH_TABLE_SCALE; 188 189 entries = nr_free_buffer_pages(); 190 entries = roundup_pow_of_two(entries); 191 192 if (scale > PAGE_SHIFT) 193 entries >>= (scale - PAGE_SHIFT); 194 else 195 entries <<= (PAGE_SHIFT - scale); 196 } 197 198 if (entries < 1UL << STACK_BUCKET_NUMBER_ORDER_MIN) 199 entries = 1UL << STACK_BUCKET_NUMBER_ORDER_MIN; 200 if (entries > 1UL << STACK_BUCKET_NUMBER_ORDER_MAX) 201 entries = 1UL << STACK_BUCKET_NUMBER_ORDER_MAX; 202 203 pr_info("allocating hash table of %lu entries via kvcalloc\n", entries); 204 stack_table = kvcalloc(entries, sizeof(struct stack_record *), GFP_KERNEL); 205 if (!stack_table) { 206 pr_err("hash table allocation failed, disabling\n"); 207 stack_depot_disabled = true; 208 ret = -ENOMEM; 209 goto out_unlock; 210 } 211 stack_hash_mask = entries - 1; 212 213 out_unlock: 214 mutex_unlock(&stack_depot_init_mutex); 215 216 return ret; 217 } 218 EXPORT_SYMBOL_GPL(stack_depot_init); 219 220 /* Uses preallocated memory to initialize a new stack depot pool. */ 221 static void depot_init_pool(void **prealloc) 222 { 223 /* 224 * If the next pool is already initialized or the maximum number of 225 * pools is reached, do not use the preallocated memory. 226 * smp_load_acquire() here pairs with smp_store_release() below and 227 * in depot_alloc_stack(). 228 */ 229 if (!smp_load_acquire(&next_pool_required)) 230 return; 231 232 /* Check if the current pool is not yet allocated. */ 233 if (stack_pools[pool_index] == NULL) { 234 /* Use the preallocated memory for the current pool. */ 235 stack_pools[pool_index] = *prealloc; 236 *prealloc = NULL; 237 } else { 238 /* 239 * Otherwise, use the preallocated memory for the next pool 240 * as long as we do not exceed the maximum number of pools. 241 */ 242 if (pool_index + 1 < DEPOT_MAX_POOLS) { 243 stack_pools[pool_index + 1] = *prealloc; 244 *prealloc = NULL; 245 } 246 /* 247 * At this point, either the next pool is initialized or the 248 * maximum number of pools is reached. In either case, take 249 * note that initializing another pool is not required. 250 * This smp_store_release pairs with smp_load_acquire() above 251 * and in stack_depot_save(). 252 */ 253 smp_store_release(&next_pool_required, 0); 254 } 255 } 256 257 /* Allocates a new stack in a stack depot pool. */ 258 static struct stack_record * 259 depot_alloc_stack(unsigned long *entries, int size, u32 hash, void **prealloc) 260 { 261 struct stack_record *stack; 262 size_t required_size = struct_size(stack, entries, size); 263 264 required_size = ALIGN(required_size, 1 << DEPOT_STACK_ALIGN); 265 266 /* Check if there is not enough space in the current pool. */ 267 if (unlikely(pool_offset + required_size > DEPOT_POOL_SIZE)) { 268 /* Bail out if we reached the pool limit. */ 269 if (unlikely(pool_index + 1 >= DEPOT_MAX_POOLS)) { 270 WARN_ONCE(1, "Stack depot reached limit capacity"); 271 return NULL; 272 } 273 274 /* 275 * Move on to the next pool. 276 * WRITE_ONCE pairs with potential concurrent read in 277 * stack_depot_fetch(). 278 */ 279 WRITE_ONCE(pool_index, pool_index + 1); 280 pool_offset = 0; 281 /* 282 * If the maximum number of pools is not reached, take note 283 * that the next pool needs to initialized. 284 * smp_store_release() here pairs with smp_load_acquire() in 285 * stack_depot_save() and depot_init_pool(). 286 */ 287 if (pool_index + 1 < DEPOT_MAX_POOLS) 288 smp_store_release(&next_pool_required, 1); 289 } 290 291 /* Assign the preallocated memory to a pool if required. */ 292 if (*prealloc) 293 depot_init_pool(prealloc); 294 295 /* Check if we have a pool to save the stack trace. */ 296 if (stack_pools[pool_index] == NULL) 297 return NULL; 298 299 /* Save the stack trace. */ 300 stack = stack_pools[pool_index] + pool_offset; 301 stack->hash = hash; 302 stack->size = size; 303 stack->handle.pool_index = pool_index; 304 stack->handle.offset = pool_offset >> DEPOT_STACK_ALIGN; 305 stack->handle.valid = 1; 306 stack->handle.extra = 0; 307 memcpy(stack->entries, entries, flex_array_size(stack, entries, size)); 308 pool_offset += required_size; 309 310 return stack; 311 } 312 313 /* Calculates the hash for a stack. */ 314 static inline u32 hash_stack(unsigned long *entries, unsigned int size) 315 { 316 return jhash2((u32 *)entries, 317 array_size(size, sizeof(*entries)) / sizeof(u32), 318 STACK_HASH_SEED); 319 } 320 321 /* 322 * Non-instrumented version of memcmp(). 323 * Does not check the lexicographical order, only the equality. 324 */ 325 static inline 326 int stackdepot_memcmp(const unsigned long *u1, const unsigned long *u2, 327 unsigned int n) 328 { 329 for ( ; n-- ; u1++, u2++) { 330 if (*u1 != *u2) 331 return 1; 332 } 333 return 0; 334 } 335 336 /* Finds a stack in a bucket of the hash table. */ 337 static inline struct stack_record *find_stack(struct stack_record *bucket, 338 unsigned long *entries, int size, 339 u32 hash) 340 { 341 struct stack_record *found; 342 343 for (found = bucket; found; found = found->next) { 344 if (found->hash == hash && 345 found->size == size && 346 !stackdepot_memcmp(entries, found->entries, size)) 347 return found; 348 } 349 return NULL; 350 } 351 352 depot_stack_handle_t __stack_depot_save(unsigned long *entries, 353 unsigned int nr_entries, 354 gfp_t alloc_flags, bool can_alloc) 355 { 356 struct stack_record *found = NULL, **bucket; 357 union handle_parts retval = { .handle = 0 }; 358 struct page *page = NULL; 359 void *prealloc = NULL; 360 unsigned long flags; 361 u32 hash; 362 363 /* 364 * If this stack trace is from an interrupt, including anything before 365 * interrupt entry usually leads to unbounded stack depot growth. 366 * 367 * Since use of filter_irq_stacks() is a requirement to ensure stack 368 * depot can efficiently deduplicate interrupt stacks, always 369 * filter_irq_stacks() to simplify all callers' use of stack depot. 370 */ 371 nr_entries = filter_irq_stacks(entries, nr_entries); 372 373 if (unlikely(nr_entries == 0) || stack_depot_disabled) 374 goto fast_exit; 375 376 hash = hash_stack(entries, nr_entries); 377 bucket = &stack_table[hash & stack_hash_mask]; 378 379 /* 380 * Fast path: look the stack trace up without locking. 381 * The smp_load_acquire() here pairs with smp_store_release() to 382 * |bucket| below. 383 */ 384 found = find_stack(smp_load_acquire(bucket), entries, nr_entries, hash); 385 if (found) 386 goto exit; 387 388 /* 389 * Check if another stack pool needs to be initialized. If so, allocate 390 * the memory now - we won't be able to do that under the lock. 391 * 392 * The smp_load_acquire() here pairs with smp_store_release() to 393 * |next_pool_inited| in depot_alloc_stack() and depot_init_pool(). 394 */ 395 if (unlikely(can_alloc && smp_load_acquire(&next_pool_required))) { 396 /* 397 * Zero out zone modifiers, as we don't have specific zone 398 * requirements. Keep the flags related to allocation in atomic 399 * contexts and I/O. 400 */ 401 alloc_flags &= ~GFP_ZONEMASK; 402 alloc_flags &= (GFP_ATOMIC | GFP_KERNEL); 403 alloc_flags |= __GFP_NOWARN; 404 page = alloc_pages(alloc_flags, DEPOT_POOL_ORDER); 405 if (page) 406 prealloc = page_address(page); 407 } 408 409 raw_spin_lock_irqsave(&pool_lock, flags); 410 411 found = find_stack(*bucket, entries, nr_entries, hash); 412 if (!found) { 413 struct stack_record *new = 414 depot_alloc_stack(entries, nr_entries, hash, &prealloc); 415 416 if (new) { 417 new->next = *bucket; 418 /* 419 * This smp_store_release() pairs with 420 * smp_load_acquire() from |bucket| above. 421 */ 422 smp_store_release(bucket, new); 423 found = new; 424 } 425 } else if (prealloc) { 426 /* 427 * Stack depot already contains this stack trace, but let's 428 * keep the preallocated memory for the future. 429 */ 430 depot_init_pool(&prealloc); 431 } 432 433 raw_spin_unlock_irqrestore(&pool_lock, flags); 434 exit: 435 if (prealloc) { 436 /* Stack depot didn't use this memory, free it. */ 437 free_pages((unsigned long)prealloc, DEPOT_POOL_ORDER); 438 } 439 if (found) 440 retval.handle = found->handle.handle; 441 fast_exit: 442 return retval.handle; 443 } 444 EXPORT_SYMBOL_GPL(__stack_depot_save); 445 446 depot_stack_handle_t stack_depot_save(unsigned long *entries, 447 unsigned int nr_entries, 448 gfp_t alloc_flags) 449 { 450 return __stack_depot_save(entries, nr_entries, alloc_flags, true); 451 } 452 EXPORT_SYMBOL_GPL(stack_depot_save); 453 454 unsigned int stack_depot_fetch(depot_stack_handle_t handle, 455 unsigned long **entries) 456 { 457 union handle_parts parts = { .handle = handle }; 458 /* 459 * READ_ONCE pairs with potential concurrent write in 460 * depot_alloc_stack. 461 */ 462 int pool_index_cached = READ_ONCE(pool_index); 463 void *pool; 464 size_t offset = parts.offset << DEPOT_STACK_ALIGN; 465 struct stack_record *stack; 466 467 *entries = NULL; 468 if (!handle) 469 return 0; 470 471 if (parts.pool_index > pool_index_cached) { 472 WARN(1, "pool index %d out of bounds (%d) for stack id %08x\n", 473 parts.pool_index, pool_index_cached, handle); 474 return 0; 475 } 476 pool = stack_pools[parts.pool_index]; 477 if (!pool) 478 return 0; 479 stack = pool + offset; 480 481 *entries = stack->entries; 482 return stack->size; 483 } 484 EXPORT_SYMBOL_GPL(stack_depot_fetch); 485 486 void stack_depot_print(depot_stack_handle_t stack) 487 { 488 unsigned long *entries; 489 unsigned int nr_entries; 490 491 nr_entries = stack_depot_fetch(stack, &entries); 492 if (nr_entries > 0) 493 stack_trace_print(entries, nr_entries, 0); 494 } 495 EXPORT_SYMBOL_GPL(stack_depot_print); 496 497 int stack_depot_snprint(depot_stack_handle_t handle, char *buf, size_t size, 498 int spaces) 499 { 500 unsigned long *entries; 501 unsigned int nr_entries; 502 503 nr_entries = stack_depot_fetch(handle, &entries); 504 return nr_entries ? stack_trace_snprint(buf, size, entries, nr_entries, 505 spaces) : 0; 506 } 507 EXPORT_SYMBOL_GPL(stack_depot_snprint); 508 509 depot_stack_handle_t __must_check stack_depot_set_extra_bits( 510 depot_stack_handle_t handle, unsigned int extra_bits) 511 { 512 union handle_parts parts = { .handle = handle }; 513 514 /* Don't set extra bits on empty handles. */ 515 if (!handle) 516 return 0; 517 518 parts.extra = extra_bits; 519 return parts.handle; 520 } 521 EXPORT_SYMBOL(stack_depot_set_extra_bits); 522 523 unsigned int stack_depot_get_extra_bits(depot_stack_handle_t handle) 524 { 525 union handle_parts parts = { .handle = handle }; 526 527 return parts.extra; 528 } 529 EXPORT_SYMBOL(stack_depot_get_extra_bits); 530