xref: /openbmc/linux/lib/stackdepot.c (revision eafc0a02)
1  // SPDX-License-Identifier: GPL-2.0-only
2  /*
3   * Generic stack depot for storing stack traces.
4   *
5   * Some debugging tools need to save stack traces of certain events which can
6   * be later presented to the user. For example, KASAN needs to safe alloc and
7   * free stacks for each object, but storing two stack traces per object
8   * requires too much memory (e.g. SLUB_DEBUG needs 256 bytes per object for
9   * that).
10   *
11   * Instead, stack depot maintains a hashtable of unique stacktraces. Since alloc
12   * and free stacks repeat a lot, we save about 100x space.
13   * Stacks are never removed from depot, so we store them contiguously one after
14   * another in a contiguous memory allocation.
15   *
16   * Author: Alexander Potapenko <glider@google.com>
17   * Copyright (C) 2016 Google, Inc.
18   *
19   * Based on code by Dmitry Chernenkov.
20   */
21  
22  #include <linux/gfp.h>
23  #include <linux/jhash.h>
24  #include <linux/kernel.h>
25  #include <linux/mm.h>
26  #include <linux/mutex.h>
27  #include <linux/percpu.h>
28  #include <linux/printk.h>
29  #include <linux/slab.h>
30  #include <linux/stacktrace.h>
31  #include <linux/stackdepot.h>
32  #include <linux/string.h>
33  #include <linux/types.h>
34  #include <linux/memblock.h>
35  
36  #define DEPOT_STACK_BITS (sizeof(depot_stack_handle_t) * 8)
37  
38  #define STACK_ALLOC_NULL_PROTECTION_BITS 1
39  #define STACK_ALLOC_ORDER 2 /* 'Slab' size order for stack depot, 4 pages */
40  #define STACK_ALLOC_SIZE (1LL << (PAGE_SHIFT + STACK_ALLOC_ORDER))
41  #define STACK_ALLOC_ALIGN 4
42  #define STACK_ALLOC_OFFSET_BITS (STACK_ALLOC_ORDER + PAGE_SHIFT - \
43  					STACK_ALLOC_ALIGN)
44  #define STACK_ALLOC_INDEX_BITS (DEPOT_STACK_BITS - \
45  		STACK_ALLOC_NULL_PROTECTION_BITS - STACK_ALLOC_OFFSET_BITS)
46  #define STACK_ALLOC_SLABS_CAP 8192
47  #define STACK_ALLOC_MAX_SLABS \
48  	(((1LL << (STACK_ALLOC_INDEX_BITS)) < STACK_ALLOC_SLABS_CAP) ? \
49  	 (1LL << (STACK_ALLOC_INDEX_BITS)) : STACK_ALLOC_SLABS_CAP)
50  
51  /* The compact structure to store the reference to stacks. */
52  union handle_parts {
53  	depot_stack_handle_t handle;
54  	struct {
55  		u32 slabindex : STACK_ALLOC_INDEX_BITS;
56  		u32 offset : STACK_ALLOC_OFFSET_BITS;
57  		u32 valid : STACK_ALLOC_NULL_PROTECTION_BITS;
58  	};
59  };
60  
61  struct stack_record {
62  	struct stack_record *next;	/* Link in the hashtable */
63  	u32 hash;			/* Hash in the hastable */
64  	u32 size;			/* Number of frames in the stack */
65  	union handle_parts handle;
66  	unsigned long entries[];	/* Variable-sized array of entries. */
67  };
68  
69  static void *stack_slabs[STACK_ALLOC_MAX_SLABS];
70  
71  static int depot_index;
72  static int next_slab_inited;
73  static size_t depot_offset;
74  static DEFINE_RAW_SPINLOCK(depot_lock);
75  
76  static bool init_stack_slab(void **prealloc)
77  {
78  	if (!*prealloc)
79  		return false;
80  	/*
81  	 * This smp_load_acquire() pairs with smp_store_release() to
82  	 * |next_slab_inited| below and in depot_alloc_stack().
83  	 */
84  	if (smp_load_acquire(&next_slab_inited))
85  		return true;
86  	if (stack_slabs[depot_index] == NULL) {
87  		stack_slabs[depot_index] = *prealloc;
88  		*prealloc = NULL;
89  	} else {
90  		/* If this is the last depot slab, do not touch the next one. */
91  		if (depot_index + 1 < STACK_ALLOC_MAX_SLABS) {
92  			stack_slabs[depot_index + 1] = *prealloc;
93  			*prealloc = NULL;
94  		}
95  		/*
96  		 * This smp_store_release pairs with smp_load_acquire() from
97  		 * |next_slab_inited| above and in stack_depot_save().
98  		 */
99  		smp_store_release(&next_slab_inited, 1);
100  	}
101  	return true;
102  }
103  
104  /* Allocation of a new stack in raw storage */
105  static struct stack_record *
106  depot_alloc_stack(unsigned long *entries, int size, u32 hash, void **prealloc)
107  {
108  	struct stack_record *stack;
109  	size_t required_size = struct_size(stack, entries, size);
110  
111  	required_size = ALIGN(required_size, 1 << STACK_ALLOC_ALIGN);
112  
113  	if (unlikely(depot_offset + required_size > STACK_ALLOC_SIZE)) {
114  		if (unlikely(depot_index + 1 >= STACK_ALLOC_MAX_SLABS)) {
115  			WARN_ONCE(1, "Stack depot reached limit capacity");
116  			return NULL;
117  		}
118  		depot_index++;
119  		depot_offset = 0;
120  		/*
121  		 * smp_store_release() here pairs with smp_load_acquire() from
122  		 * |next_slab_inited| in stack_depot_save() and
123  		 * init_stack_slab().
124  		 */
125  		if (depot_index + 1 < STACK_ALLOC_MAX_SLABS)
126  			smp_store_release(&next_slab_inited, 0);
127  	}
128  	init_stack_slab(prealloc);
129  	if (stack_slabs[depot_index] == NULL)
130  		return NULL;
131  
132  	stack = stack_slabs[depot_index] + depot_offset;
133  
134  	stack->hash = hash;
135  	stack->size = size;
136  	stack->handle.slabindex = depot_index;
137  	stack->handle.offset = depot_offset >> STACK_ALLOC_ALIGN;
138  	stack->handle.valid = 1;
139  	memcpy(stack->entries, entries, flex_array_size(stack, entries, size));
140  	depot_offset += required_size;
141  
142  	return stack;
143  }
144  
145  #define STACK_HASH_SIZE (1L << CONFIG_STACK_HASH_ORDER)
146  #define STACK_HASH_MASK (STACK_HASH_SIZE - 1)
147  #define STACK_HASH_SEED 0x9747b28c
148  
149  static bool stack_depot_disable;
150  static struct stack_record **stack_table;
151  
152  static int __init is_stack_depot_disabled(char *str)
153  {
154  	int ret;
155  
156  	ret = kstrtobool(str, &stack_depot_disable);
157  	if (!ret && stack_depot_disable) {
158  		pr_info("Stack Depot is disabled\n");
159  		stack_table = NULL;
160  	}
161  	return 0;
162  }
163  early_param("stack_depot_disable", is_stack_depot_disabled);
164  
165  /*
166   * __ref because of memblock_alloc(), which will not be actually called after
167   * the __init code is gone, because at that point slab_is_available() is true
168   */
169  __ref int stack_depot_init(void)
170  {
171  	static DEFINE_MUTEX(stack_depot_init_mutex);
172  
173  	mutex_lock(&stack_depot_init_mutex);
174  	if (!stack_depot_disable && !stack_table) {
175  		size_t size = (STACK_HASH_SIZE * sizeof(struct stack_record *));
176  		int i;
177  
178  		if (slab_is_available()) {
179  			pr_info("Stack Depot allocating hash table with kvmalloc\n");
180  			stack_table = kvmalloc(size, GFP_KERNEL);
181  		} else {
182  			pr_info("Stack Depot allocating hash table with memblock_alloc\n");
183  			stack_table = memblock_alloc(size, SMP_CACHE_BYTES);
184  		}
185  		if (stack_table) {
186  			for (i = 0; i < STACK_HASH_SIZE;  i++)
187  				stack_table[i] = NULL;
188  		} else {
189  			pr_err("Stack Depot hash table allocation failed, disabling\n");
190  			stack_depot_disable = true;
191  			mutex_unlock(&stack_depot_init_mutex);
192  			return -ENOMEM;
193  		}
194  	}
195  	mutex_unlock(&stack_depot_init_mutex);
196  	return 0;
197  }
198  EXPORT_SYMBOL_GPL(stack_depot_init);
199  
200  /* Calculate hash for a stack */
201  static inline u32 hash_stack(unsigned long *entries, unsigned int size)
202  {
203  	return jhash2((u32 *)entries,
204  		      array_size(size,  sizeof(*entries)) / sizeof(u32),
205  		      STACK_HASH_SEED);
206  }
207  
208  /* Use our own, non-instrumented version of memcmp().
209   *
210   * We actually don't care about the order, just the equality.
211   */
212  static inline
213  int stackdepot_memcmp(const unsigned long *u1, const unsigned long *u2,
214  			unsigned int n)
215  {
216  	for ( ; n-- ; u1++, u2++) {
217  		if (*u1 != *u2)
218  			return 1;
219  	}
220  	return 0;
221  }
222  
223  /* Find a stack that is equal to the one stored in entries in the hash */
224  static inline struct stack_record *find_stack(struct stack_record *bucket,
225  					     unsigned long *entries, int size,
226  					     u32 hash)
227  {
228  	struct stack_record *found;
229  
230  	for (found = bucket; found; found = found->next) {
231  		if (found->hash == hash &&
232  		    found->size == size &&
233  		    !stackdepot_memcmp(entries, found->entries, size))
234  			return found;
235  	}
236  	return NULL;
237  }
238  
239  /**
240   * stack_depot_snprint - print stack entries from a depot into a buffer
241   *
242   * @handle:	Stack depot handle which was returned from
243   *		stack_depot_save().
244   * @buf:	Pointer to the print buffer
245   *
246   * @size:	Size of the print buffer
247   *
248   * @spaces:	Number of leading spaces to print
249   *
250   * Return:	Number of bytes printed.
251   */
252  int stack_depot_snprint(depot_stack_handle_t handle, char *buf, size_t size,
253  		       int spaces)
254  {
255  	unsigned long *entries;
256  	unsigned int nr_entries;
257  
258  	nr_entries = stack_depot_fetch(handle, &entries);
259  	return nr_entries ? stack_trace_snprint(buf, size, entries, nr_entries,
260  						spaces) : 0;
261  }
262  EXPORT_SYMBOL_GPL(stack_depot_snprint);
263  
264  /**
265   * stack_depot_print - print stack entries from a depot
266   *
267   * @stack:		Stack depot handle which was returned from
268   *			stack_depot_save().
269   *
270   */
271  void stack_depot_print(depot_stack_handle_t stack)
272  {
273  	unsigned long *entries;
274  	unsigned int nr_entries;
275  
276  	nr_entries = stack_depot_fetch(stack, &entries);
277  	if (nr_entries > 0)
278  		stack_trace_print(entries, nr_entries, 0);
279  }
280  EXPORT_SYMBOL_GPL(stack_depot_print);
281  
282  /**
283   * stack_depot_fetch - Fetch stack entries from a depot
284   *
285   * @handle:		Stack depot handle which was returned from
286   *			stack_depot_save().
287   * @entries:		Pointer to store the entries address
288   *
289   * Return: The number of trace entries for this depot.
290   */
291  unsigned int stack_depot_fetch(depot_stack_handle_t handle,
292  			       unsigned long **entries)
293  {
294  	union handle_parts parts = { .handle = handle };
295  	void *slab;
296  	size_t offset = parts.offset << STACK_ALLOC_ALIGN;
297  	struct stack_record *stack;
298  
299  	*entries = NULL;
300  	if (!handle)
301  		return 0;
302  
303  	if (parts.slabindex > depot_index) {
304  		WARN(1, "slab index %d out of bounds (%d) for stack id %08x\n",
305  			parts.slabindex, depot_index, handle);
306  		return 0;
307  	}
308  	slab = stack_slabs[parts.slabindex];
309  	if (!slab)
310  		return 0;
311  	stack = slab + offset;
312  
313  	*entries = stack->entries;
314  	return stack->size;
315  }
316  EXPORT_SYMBOL_GPL(stack_depot_fetch);
317  
318  /**
319   * __stack_depot_save - Save a stack trace from an array
320   *
321   * @entries:		Pointer to storage array
322   * @nr_entries:		Size of the storage array
323   * @alloc_flags:	Allocation gfp flags
324   * @can_alloc:		Allocate stack slabs (increased chance of failure if false)
325   *
326   * Saves a stack trace from @entries array of size @nr_entries. If @can_alloc is
327   * %true, is allowed to replenish the stack slab pool in case no space is left
328   * (allocates using GFP flags of @alloc_flags). If @can_alloc is %false, avoids
329   * any allocations and will fail if no space is left to store the stack trace.
330   *
331   * If the stack trace in @entries is from an interrupt, only the portion up to
332   * interrupt entry is saved.
333   *
334   * Context: Any context, but setting @can_alloc to %false is required if
335   *          alloc_pages() cannot be used from the current context. Currently
336   *          this is the case from contexts where neither %GFP_ATOMIC nor
337   *          %GFP_NOWAIT can be used (NMI, raw_spin_lock).
338   *
339   * Return: The handle of the stack struct stored in depot, 0 on failure.
340   */
341  depot_stack_handle_t __stack_depot_save(unsigned long *entries,
342  					unsigned int nr_entries,
343  					gfp_t alloc_flags, bool can_alloc)
344  {
345  	struct stack_record *found = NULL, **bucket;
346  	depot_stack_handle_t retval = 0;
347  	struct page *page = NULL;
348  	void *prealloc = NULL;
349  	unsigned long flags;
350  	u32 hash;
351  
352  	/*
353  	 * If this stack trace is from an interrupt, including anything before
354  	 * interrupt entry usually leads to unbounded stackdepot growth.
355  	 *
356  	 * Because use of filter_irq_stacks() is a requirement to ensure
357  	 * stackdepot can efficiently deduplicate interrupt stacks, always
358  	 * filter_irq_stacks() to simplify all callers' use of stackdepot.
359  	 */
360  	nr_entries = filter_irq_stacks(entries, nr_entries);
361  
362  	if (unlikely(nr_entries == 0) || stack_depot_disable)
363  		goto fast_exit;
364  
365  	hash = hash_stack(entries, nr_entries);
366  	bucket = &stack_table[hash & STACK_HASH_MASK];
367  
368  	/*
369  	 * Fast path: look the stack trace up without locking.
370  	 * The smp_load_acquire() here pairs with smp_store_release() to
371  	 * |bucket| below.
372  	 */
373  	found = find_stack(smp_load_acquire(bucket), entries,
374  			   nr_entries, hash);
375  	if (found)
376  		goto exit;
377  
378  	/*
379  	 * Check if the current or the next stack slab need to be initialized.
380  	 * If so, allocate the memory - we won't be able to do that under the
381  	 * lock.
382  	 *
383  	 * The smp_load_acquire() here pairs with smp_store_release() to
384  	 * |next_slab_inited| in depot_alloc_stack() and init_stack_slab().
385  	 */
386  	if (unlikely(can_alloc && !smp_load_acquire(&next_slab_inited))) {
387  		/*
388  		 * Zero out zone modifiers, as we don't have specific zone
389  		 * requirements. Keep the flags related to allocation in atomic
390  		 * contexts and I/O.
391  		 */
392  		alloc_flags &= ~GFP_ZONEMASK;
393  		alloc_flags &= (GFP_ATOMIC | GFP_KERNEL);
394  		alloc_flags |= __GFP_NOWARN;
395  		page = alloc_pages(alloc_flags, STACK_ALLOC_ORDER);
396  		if (page)
397  			prealloc = page_address(page);
398  	}
399  
400  	raw_spin_lock_irqsave(&depot_lock, flags);
401  
402  	found = find_stack(*bucket, entries, nr_entries, hash);
403  	if (!found) {
404  		struct stack_record *new = depot_alloc_stack(entries, nr_entries, hash, &prealloc);
405  
406  		if (new) {
407  			new->next = *bucket;
408  			/*
409  			 * This smp_store_release() pairs with
410  			 * smp_load_acquire() from |bucket| above.
411  			 */
412  			smp_store_release(bucket, new);
413  			found = new;
414  		}
415  	} else if (prealloc) {
416  		/*
417  		 * We didn't need to store this stack trace, but let's keep
418  		 * the preallocated memory for the future.
419  		 */
420  		WARN_ON(!init_stack_slab(&prealloc));
421  	}
422  
423  	raw_spin_unlock_irqrestore(&depot_lock, flags);
424  exit:
425  	if (prealloc) {
426  		/* Nobody used this memory, ok to free it. */
427  		free_pages((unsigned long)prealloc, STACK_ALLOC_ORDER);
428  	}
429  	if (found)
430  		retval = found->handle.handle;
431  fast_exit:
432  	return retval;
433  }
434  EXPORT_SYMBOL_GPL(__stack_depot_save);
435  
436  /**
437   * stack_depot_save - Save a stack trace from an array
438   *
439   * @entries:		Pointer to storage array
440   * @nr_entries:		Size of the storage array
441   * @alloc_flags:	Allocation gfp flags
442   *
443   * Context: Contexts where allocations via alloc_pages() are allowed.
444   *          See __stack_depot_save() for more details.
445   *
446   * Return: The handle of the stack struct stored in depot, 0 on failure.
447   */
448  depot_stack_handle_t stack_depot_save(unsigned long *entries,
449  				      unsigned int nr_entries,
450  				      gfp_t alloc_flags)
451  {
452  	return __stack_depot_save(entries, nr_entries, alloc_flags, true);
453  }
454  EXPORT_SYMBOL_GPL(stack_depot_save);
455