xref: /openbmc/linux/lib/stackdepot.c (revision 90cb380f9ceb811059340d06ff5fd0c0e93ecbe1)
1  // SPDX-License-Identifier: GPL-2.0-only
2  /*
3   * Generic stack depot for storing stack traces.
4   *
5   * Some debugging tools need to save stack traces of certain events which can
6   * be later presented to the user. For example, KASAN needs to safe alloc and
7   * free stacks for each object, but storing two stack traces per object
8   * requires too much memory (e.g. SLUB_DEBUG needs 256 bytes per object for
9   * that).
10   *
11   * Instead, stack depot maintains a hashtable of unique stacktraces. Since alloc
12   * and free stacks repeat a lot, we save about 100x space.
13   * Stacks are never removed from depot, so we store them contiguously one after
14   * another in a contiguous memory allocation.
15   *
16   * Author: Alexander Potapenko <glider@google.com>
17   * Copyright (C) 2016 Google, Inc.
18   *
19   * Based on code by Dmitry Chernenkov.
20   */
21  
22  #include <linux/gfp.h>
23  #include <linux/jhash.h>
24  #include <linux/kernel.h>
25  #include <linux/mm.h>
26  #include <linux/mutex.h>
27  #include <linux/percpu.h>
28  #include <linux/printk.h>
29  #include <linux/slab.h>
30  #include <linux/stacktrace.h>
31  #include <linux/stackdepot.h>
32  #include <linux/string.h>
33  #include <linux/types.h>
34  #include <linux/memblock.h>
35  #include <linux/kasan-enabled.h>
36  
37  #define DEPOT_STACK_BITS (sizeof(depot_stack_handle_t) * 8)
38  
39  #define STACK_ALLOC_NULL_PROTECTION_BITS 1
40  #define STACK_ALLOC_ORDER 2 /* 'Slab' size order for stack depot, 4 pages */
41  #define STACK_ALLOC_SIZE (1LL << (PAGE_SHIFT + STACK_ALLOC_ORDER))
42  #define STACK_ALLOC_ALIGN 4
43  #define STACK_ALLOC_OFFSET_BITS (STACK_ALLOC_ORDER + PAGE_SHIFT - \
44  					STACK_ALLOC_ALIGN)
45  #define STACK_ALLOC_INDEX_BITS (DEPOT_STACK_BITS - \
46  		STACK_ALLOC_NULL_PROTECTION_BITS - STACK_ALLOC_OFFSET_BITS)
47  #define STACK_ALLOC_SLABS_CAP 8192
48  #define STACK_ALLOC_MAX_SLABS \
49  	(((1LL << (STACK_ALLOC_INDEX_BITS)) < STACK_ALLOC_SLABS_CAP) ? \
50  	 (1LL << (STACK_ALLOC_INDEX_BITS)) : STACK_ALLOC_SLABS_CAP)
51  
52  /* The compact structure to store the reference to stacks. */
53  union handle_parts {
54  	depot_stack_handle_t handle;
55  	struct {
56  		u32 slabindex : STACK_ALLOC_INDEX_BITS;
57  		u32 offset : STACK_ALLOC_OFFSET_BITS;
58  		u32 valid : STACK_ALLOC_NULL_PROTECTION_BITS;
59  	};
60  };
61  
62  struct stack_record {
63  	struct stack_record *next;	/* Link in the hashtable */
64  	u32 hash;			/* Hash in the hastable */
65  	u32 size;			/* Number of frames in the stack */
66  	union handle_parts handle;
67  	unsigned long entries[];	/* Variable-sized array of entries. */
68  };
69  
70  static bool __stack_depot_want_early_init __initdata = IS_ENABLED(CONFIG_STACKDEPOT_ALWAYS_INIT);
71  static bool __stack_depot_early_init_passed __initdata;
72  
73  static void *stack_slabs[STACK_ALLOC_MAX_SLABS];
74  
75  static int depot_index;
76  static int next_slab_inited;
77  static size_t depot_offset;
78  static DEFINE_RAW_SPINLOCK(depot_lock);
79  
80  static bool init_stack_slab(void **prealloc)
81  {
82  	if (!*prealloc)
83  		return false;
84  	/*
85  	 * This smp_load_acquire() pairs with smp_store_release() to
86  	 * |next_slab_inited| below and in depot_alloc_stack().
87  	 */
88  	if (smp_load_acquire(&next_slab_inited))
89  		return true;
90  	if (stack_slabs[depot_index] == NULL) {
91  		stack_slabs[depot_index] = *prealloc;
92  		*prealloc = NULL;
93  	} else {
94  		/* If this is the last depot slab, do not touch the next one. */
95  		if (depot_index + 1 < STACK_ALLOC_MAX_SLABS) {
96  			stack_slabs[depot_index + 1] = *prealloc;
97  			*prealloc = NULL;
98  		}
99  		/*
100  		 * This smp_store_release pairs with smp_load_acquire() from
101  		 * |next_slab_inited| above and in stack_depot_save().
102  		 */
103  		smp_store_release(&next_slab_inited, 1);
104  	}
105  	return true;
106  }
107  
108  /* Allocation of a new stack in raw storage */
109  static struct stack_record *
110  depot_alloc_stack(unsigned long *entries, int size, u32 hash, void **prealloc)
111  {
112  	struct stack_record *stack;
113  	size_t required_size = struct_size(stack, entries, size);
114  
115  	required_size = ALIGN(required_size, 1 << STACK_ALLOC_ALIGN);
116  
117  	if (unlikely(depot_offset + required_size > STACK_ALLOC_SIZE)) {
118  		if (unlikely(depot_index + 1 >= STACK_ALLOC_MAX_SLABS)) {
119  			WARN_ONCE(1, "Stack depot reached limit capacity");
120  			return NULL;
121  		}
122  		depot_index++;
123  		depot_offset = 0;
124  		/*
125  		 * smp_store_release() here pairs with smp_load_acquire() from
126  		 * |next_slab_inited| in stack_depot_save() and
127  		 * init_stack_slab().
128  		 */
129  		if (depot_index + 1 < STACK_ALLOC_MAX_SLABS)
130  			smp_store_release(&next_slab_inited, 0);
131  	}
132  	init_stack_slab(prealloc);
133  	if (stack_slabs[depot_index] == NULL)
134  		return NULL;
135  
136  	stack = stack_slabs[depot_index] + depot_offset;
137  
138  	stack->hash = hash;
139  	stack->size = size;
140  	stack->handle.slabindex = depot_index;
141  	stack->handle.offset = depot_offset >> STACK_ALLOC_ALIGN;
142  	stack->handle.valid = 1;
143  	memcpy(stack->entries, entries, flex_array_size(stack, entries, size));
144  	depot_offset += required_size;
145  
146  	return stack;
147  }
148  
149  /* one hash table bucket entry per 16kB of memory */
150  #define STACK_HASH_SCALE	14
151  /* limited between 4k and 1M buckets */
152  #define STACK_HASH_ORDER_MIN	12
153  #define STACK_HASH_ORDER_MAX	20
154  #define STACK_HASH_SEED 0x9747b28c
155  
156  static unsigned int stack_hash_order;
157  static unsigned int stack_hash_mask;
158  
159  static bool stack_depot_disable;
160  static struct stack_record **stack_table;
161  
162  static int __init is_stack_depot_disabled(char *str)
163  {
164  	int ret;
165  
166  	ret = kstrtobool(str, &stack_depot_disable);
167  	if (!ret && stack_depot_disable) {
168  		pr_info("Stack Depot is disabled\n");
169  		stack_table = NULL;
170  	}
171  	return 0;
172  }
173  early_param("stack_depot_disable", is_stack_depot_disabled);
174  
175  void __init stack_depot_want_early_init(void)
176  {
177  	/* Too late to request early init now */
178  	WARN_ON(__stack_depot_early_init_passed);
179  
180  	__stack_depot_want_early_init = true;
181  }
182  
183  int __init stack_depot_early_init(void)
184  {
185  	unsigned long entries = 0;
186  
187  	/* This is supposed to be called only once, from mm_init() */
188  	if (WARN_ON(__stack_depot_early_init_passed))
189  		return 0;
190  
191  	__stack_depot_early_init_passed = true;
192  
193  	if (kasan_enabled() && !stack_hash_order)
194  		stack_hash_order = STACK_HASH_ORDER_MAX;
195  
196  	if (!__stack_depot_want_early_init || stack_depot_disable)
197  		return 0;
198  
199  	if (stack_hash_order)
200  		entries = 1UL <<  stack_hash_order;
201  	stack_table = alloc_large_system_hash("stackdepot",
202  						sizeof(struct stack_record *),
203  						entries,
204  						STACK_HASH_SCALE,
205  						HASH_EARLY | HASH_ZERO,
206  						NULL,
207  						&stack_hash_mask,
208  						1UL << STACK_HASH_ORDER_MIN,
209  						1UL << STACK_HASH_ORDER_MAX);
210  
211  	if (!stack_table) {
212  		pr_err("Stack Depot hash table allocation failed, disabling\n");
213  		stack_depot_disable = true;
214  		return -ENOMEM;
215  	}
216  
217  	return 0;
218  }
219  
220  int stack_depot_init(void)
221  {
222  	static DEFINE_MUTEX(stack_depot_init_mutex);
223  	int ret = 0;
224  
225  	mutex_lock(&stack_depot_init_mutex);
226  	if (!stack_depot_disable && !stack_table) {
227  		unsigned long entries;
228  		int scale = STACK_HASH_SCALE;
229  
230  		if (stack_hash_order) {
231  			entries = 1UL << stack_hash_order;
232  		} else {
233  			entries = nr_free_buffer_pages();
234  			entries = roundup_pow_of_two(entries);
235  
236  			if (scale > PAGE_SHIFT)
237  				entries >>= (scale - PAGE_SHIFT);
238  			else
239  				entries <<= (PAGE_SHIFT - scale);
240  		}
241  
242  		if (entries < 1UL << STACK_HASH_ORDER_MIN)
243  			entries = 1UL << STACK_HASH_ORDER_MIN;
244  		if (entries > 1UL << STACK_HASH_ORDER_MAX)
245  			entries = 1UL << STACK_HASH_ORDER_MAX;
246  
247  		pr_info("Stack Depot allocating hash table of %lu entries with kvcalloc\n",
248  				entries);
249  		stack_table = kvcalloc(entries, sizeof(struct stack_record *), GFP_KERNEL);
250  		if (!stack_table) {
251  			pr_err("Stack Depot hash table allocation failed, disabling\n");
252  			stack_depot_disable = true;
253  			ret = -ENOMEM;
254  		}
255  		stack_hash_mask = entries - 1;
256  	}
257  	mutex_unlock(&stack_depot_init_mutex);
258  	return ret;
259  }
260  EXPORT_SYMBOL_GPL(stack_depot_init);
261  
262  /* Calculate hash for a stack */
263  static inline u32 hash_stack(unsigned long *entries, unsigned int size)
264  {
265  	return jhash2((u32 *)entries,
266  		      array_size(size,  sizeof(*entries)) / sizeof(u32),
267  		      STACK_HASH_SEED);
268  }
269  
270  /* Use our own, non-instrumented version of memcmp().
271   *
272   * We actually don't care about the order, just the equality.
273   */
274  static inline
275  int stackdepot_memcmp(const unsigned long *u1, const unsigned long *u2,
276  			unsigned int n)
277  {
278  	for ( ; n-- ; u1++, u2++) {
279  		if (*u1 != *u2)
280  			return 1;
281  	}
282  	return 0;
283  }
284  
285  /* Find a stack that is equal to the one stored in entries in the hash */
286  static inline struct stack_record *find_stack(struct stack_record *bucket,
287  					     unsigned long *entries, int size,
288  					     u32 hash)
289  {
290  	struct stack_record *found;
291  
292  	for (found = bucket; found; found = found->next) {
293  		if (found->hash == hash &&
294  		    found->size == size &&
295  		    !stackdepot_memcmp(entries, found->entries, size))
296  			return found;
297  	}
298  	return NULL;
299  }
300  
301  /**
302   * stack_depot_snprint - print stack entries from a depot into a buffer
303   *
304   * @handle:	Stack depot handle which was returned from
305   *		stack_depot_save().
306   * @buf:	Pointer to the print buffer
307   *
308   * @size:	Size of the print buffer
309   *
310   * @spaces:	Number of leading spaces to print
311   *
312   * Return:	Number of bytes printed.
313   */
314  int stack_depot_snprint(depot_stack_handle_t handle, char *buf, size_t size,
315  		       int spaces)
316  {
317  	unsigned long *entries;
318  	unsigned int nr_entries;
319  
320  	nr_entries = stack_depot_fetch(handle, &entries);
321  	return nr_entries ? stack_trace_snprint(buf, size, entries, nr_entries,
322  						spaces) : 0;
323  }
324  EXPORT_SYMBOL_GPL(stack_depot_snprint);
325  
326  /**
327   * stack_depot_print - print stack entries from a depot
328   *
329   * @stack:		Stack depot handle which was returned from
330   *			stack_depot_save().
331   *
332   */
333  void stack_depot_print(depot_stack_handle_t stack)
334  {
335  	unsigned long *entries;
336  	unsigned int nr_entries;
337  
338  	nr_entries = stack_depot_fetch(stack, &entries);
339  	if (nr_entries > 0)
340  		stack_trace_print(entries, nr_entries, 0);
341  }
342  EXPORT_SYMBOL_GPL(stack_depot_print);
343  
344  /**
345   * stack_depot_fetch - Fetch stack entries from a depot
346   *
347   * @handle:		Stack depot handle which was returned from
348   *			stack_depot_save().
349   * @entries:		Pointer to store the entries address
350   *
351   * Return: The number of trace entries for this depot.
352   */
353  unsigned int stack_depot_fetch(depot_stack_handle_t handle,
354  			       unsigned long **entries)
355  {
356  	union handle_parts parts = { .handle = handle };
357  	void *slab;
358  	size_t offset = parts.offset << STACK_ALLOC_ALIGN;
359  	struct stack_record *stack;
360  
361  	*entries = NULL;
362  	if (!handle)
363  		return 0;
364  
365  	if (parts.slabindex > depot_index) {
366  		WARN(1, "slab index %d out of bounds (%d) for stack id %08x\n",
367  			parts.slabindex, depot_index, handle);
368  		return 0;
369  	}
370  	slab = stack_slabs[parts.slabindex];
371  	if (!slab)
372  		return 0;
373  	stack = slab + offset;
374  
375  	*entries = stack->entries;
376  	return stack->size;
377  }
378  EXPORT_SYMBOL_GPL(stack_depot_fetch);
379  
380  /**
381   * __stack_depot_save - Save a stack trace from an array
382   *
383   * @entries:		Pointer to storage array
384   * @nr_entries:		Size of the storage array
385   * @alloc_flags:	Allocation gfp flags
386   * @can_alloc:		Allocate stack slabs (increased chance of failure if false)
387   *
388   * Saves a stack trace from @entries array of size @nr_entries. If @can_alloc is
389   * %true, is allowed to replenish the stack slab pool in case no space is left
390   * (allocates using GFP flags of @alloc_flags). If @can_alloc is %false, avoids
391   * any allocations and will fail if no space is left to store the stack trace.
392   *
393   * If the stack trace in @entries is from an interrupt, only the portion up to
394   * interrupt entry is saved.
395   *
396   * Context: Any context, but setting @can_alloc to %false is required if
397   *          alloc_pages() cannot be used from the current context. Currently
398   *          this is the case from contexts where neither %GFP_ATOMIC nor
399   *          %GFP_NOWAIT can be used (NMI, raw_spin_lock).
400   *
401   * Return: The handle of the stack struct stored in depot, 0 on failure.
402   */
403  depot_stack_handle_t __stack_depot_save(unsigned long *entries,
404  					unsigned int nr_entries,
405  					gfp_t alloc_flags, bool can_alloc)
406  {
407  	struct stack_record *found = NULL, **bucket;
408  	depot_stack_handle_t retval = 0;
409  	struct page *page = NULL;
410  	void *prealloc = NULL;
411  	unsigned long flags;
412  	u32 hash;
413  
414  	/*
415  	 * If this stack trace is from an interrupt, including anything before
416  	 * interrupt entry usually leads to unbounded stackdepot growth.
417  	 *
418  	 * Because use of filter_irq_stacks() is a requirement to ensure
419  	 * stackdepot can efficiently deduplicate interrupt stacks, always
420  	 * filter_irq_stacks() to simplify all callers' use of stackdepot.
421  	 */
422  	nr_entries = filter_irq_stacks(entries, nr_entries);
423  
424  	if (unlikely(nr_entries == 0) || stack_depot_disable)
425  		goto fast_exit;
426  
427  	hash = hash_stack(entries, nr_entries);
428  	bucket = &stack_table[hash & stack_hash_mask];
429  
430  	/*
431  	 * Fast path: look the stack trace up without locking.
432  	 * The smp_load_acquire() here pairs with smp_store_release() to
433  	 * |bucket| below.
434  	 */
435  	found = find_stack(smp_load_acquire(bucket), entries,
436  			   nr_entries, hash);
437  	if (found)
438  		goto exit;
439  
440  	/*
441  	 * Check if the current or the next stack slab need to be initialized.
442  	 * If so, allocate the memory - we won't be able to do that under the
443  	 * lock.
444  	 *
445  	 * The smp_load_acquire() here pairs with smp_store_release() to
446  	 * |next_slab_inited| in depot_alloc_stack() and init_stack_slab().
447  	 */
448  	if (unlikely(can_alloc && !smp_load_acquire(&next_slab_inited))) {
449  		/*
450  		 * Zero out zone modifiers, as we don't have specific zone
451  		 * requirements. Keep the flags related to allocation in atomic
452  		 * contexts and I/O.
453  		 */
454  		alloc_flags &= ~GFP_ZONEMASK;
455  		alloc_flags &= (GFP_ATOMIC | GFP_KERNEL);
456  		alloc_flags |= __GFP_NOWARN;
457  		page = alloc_pages(alloc_flags, STACK_ALLOC_ORDER);
458  		if (page)
459  			prealloc = page_address(page);
460  	}
461  
462  	raw_spin_lock_irqsave(&depot_lock, flags);
463  
464  	found = find_stack(*bucket, entries, nr_entries, hash);
465  	if (!found) {
466  		struct stack_record *new = depot_alloc_stack(entries, nr_entries, hash, &prealloc);
467  
468  		if (new) {
469  			new->next = *bucket;
470  			/*
471  			 * This smp_store_release() pairs with
472  			 * smp_load_acquire() from |bucket| above.
473  			 */
474  			smp_store_release(bucket, new);
475  			found = new;
476  		}
477  	} else if (prealloc) {
478  		/*
479  		 * We didn't need to store this stack trace, but let's keep
480  		 * the preallocated memory for the future.
481  		 */
482  		WARN_ON(!init_stack_slab(&prealloc));
483  	}
484  
485  	raw_spin_unlock_irqrestore(&depot_lock, flags);
486  exit:
487  	if (prealloc) {
488  		/* Nobody used this memory, ok to free it. */
489  		free_pages((unsigned long)prealloc, STACK_ALLOC_ORDER);
490  	}
491  	if (found)
492  		retval = found->handle.handle;
493  fast_exit:
494  	return retval;
495  }
496  EXPORT_SYMBOL_GPL(__stack_depot_save);
497  
498  /**
499   * stack_depot_save - Save a stack trace from an array
500   *
501   * @entries:		Pointer to storage array
502   * @nr_entries:		Size of the storage array
503   * @alloc_flags:	Allocation gfp flags
504   *
505   * Context: Contexts where allocations via alloc_pages() are allowed.
506   *          See __stack_depot_save() for more details.
507   *
508   * Return: The handle of the stack struct stored in depot, 0 on failure.
509   */
510  depot_stack_handle_t stack_depot_save(unsigned long *entries,
511  				      unsigned int nr_entries,
512  				      gfp_t alloc_flags)
513  {
514  	return __stack_depot_save(entries, nr_entries, alloc_flags, true);
515  }
516  EXPORT_SYMBOL_GPL(stack_depot_save);
517