xref: /openbmc/linux/kernel/kcsan/report.c (revision 5a6b56de)
1 // SPDX-License-Identifier: GPL-2.0
2 
3 #include <linux/debug_locks.h>
4 #include <linux/delay.h>
5 #include <linux/jiffies.h>
6 #include <linux/kernel.h>
7 #include <linux/lockdep.h>
8 #include <linux/preempt.h>
9 #include <linux/printk.h>
10 #include <linux/sched.h>
11 #include <linux/spinlock.h>
12 #include <linux/stacktrace.h>
13 
14 #include "kcsan.h"
15 #include "encoding.h"
16 
17 /*
18  * Max. number of stack entries to show in the report.
19  */
20 #define NUM_STACK_ENTRIES 64
21 
22 /* Common access info. */
23 struct access_info {
24 	const volatile void	*ptr;
25 	size_t			size;
26 	int			access_type;
27 	int			task_pid;
28 	int			cpu_id;
29 };
30 
31 /*
32  * Other thread info: communicated from other racing thread to thread that set
33  * up the watchpoint, which then prints the complete report atomically.
34  */
35 struct other_info {
36 	struct access_info	ai;
37 	unsigned long		stack_entries[NUM_STACK_ENTRIES];
38 	int			num_stack_entries;
39 
40 	/*
41 	 * Optionally pass @current. Typically we do not need to pass @current
42 	 * via @other_info since just @task_pid is sufficient. Passing @current
43 	 * has additional overhead.
44 	 *
45 	 * To safely pass @current, we must either use get_task_struct/
46 	 * put_task_struct, or stall the thread that populated @other_info.
47 	 *
48 	 * We cannot rely on get_task_struct/put_task_struct in case
49 	 * release_report() races with a task being released, and would have to
50 	 * free it in release_report(). This may result in deadlock if we want
51 	 * to use KCSAN on the allocators.
52 	 *
53 	 * Since we also want to reliably print held locks for
54 	 * CONFIG_KCSAN_VERBOSE, the current implementation stalls the thread
55 	 * that populated @other_info until it has been consumed.
56 	 */
57 	struct task_struct	*task;
58 };
59 
60 /*
61  * To never block any producers of struct other_info, we need as many elements
62  * as we have watchpoints (upper bound on concurrent races to report).
63  */
64 static struct other_info other_infos[CONFIG_KCSAN_NUM_WATCHPOINTS + NUM_SLOTS-1];
65 
66 /*
67  * Information about reported races; used to rate limit reporting.
68  */
69 struct report_time {
70 	/*
71 	 * The last time the race was reported.
72 	 */
73 	unsigned long time;
74 
75 	/*
76 	 * The frames of the 2 threads; if only 1 thread is known, one frame
77 	 * will be 0.
78 	 */
79 	unsigned long frame1;
80 	unsigned long frame2;
81 };
82 
83 /*
84  * Since we also want to be able to debug allocators with KCSAN, to avoid
85  * deadlock, report_times cannot be dynamically resized with krealloc in
86  * rate_limit_report.
87  *
88  * Therefore, we use a fixed-size array, which at most will occupy a page. This
89  * still adequately rate limits reports, assuming that a) number of unique data
90  * races is not excessive, and b) occurrence of unique races within the
91  * same time window is limited.
92  */
93 #define REPORT_TIMES_MAX (PAGE_SIZE / sizeof(struct report_time))
94 #define REPORT_TIMES_SIZE                                                      \
95 	(CONFIG_KCSAN_REPORT_ONCE_IN_MS > REPORT_TIMES_MAX ?                   \
96 		 REPORT_TIMES_MAX :                                            \
97 		 CONFIG_KCSAN_REPORT_ONCE_IN_MS)
98 static struct report_time report_times[REPORT_TIMES_SIZE];
99 
100 /*
101  * Spinlock serializing report generation, and access to @other_infos. Although
102  * it could make sense to have a finer-grained locking story for @other_infos,
103  * report generation needs to be serialized either way, so not much is gained.
104  */
105 static DEFINE_RAW_SPINLOCK(report_lock);
106 
107 /*
108  * Checks if the race identified by thread frames frame1 and frame2 has
109  * been reported since (now - KCSAN_REPORT_ONCE_IN_MS).
110  */
111 static bool rate_limit_report(unsigned long frame1, unsigned long frame2)
112 {
113 	struct report_time *use_entry = &report_times[0];
114 	unsigned long invalid_before;
115 	int i;
116 
117 	BUILD_BUG_ON(CONFIG_KCSAN_REPORT_ONCE_IN_MS != 0 && REPORT_TIMES_SIZE == 0);
118 
119 	if (CONFIG_KCSAN_REPORT_ONCE_IN_MS == 0)
120 		return false;
121 
122 	invalid_before = jiffies - msecs_to_jiffies(CONFIG_KCSAN_REPORT_ONCE_IN_MS);
123 
124 	/* Check if a matching race report exists. */
125 	for (i = 0; i < REPORT_TIMES_SIZE; ++i) {
126 		struct report_time *rt = &report_times[i];
127 
128 		/*
129 		 * Must always select an entry for use to store info as we
130 		 * cannot resize report_times; at the end of the scan, use_entry
131 		 * will be the oldest entry, which ideally also happened before
132 		 * KCSAN_REPORT_ONCE_IN_MS ago.
133 		 */
134 		if (time_before(rt->time, use_entry->time))
135 			use_entry = rt;
136 
137 		/*
138 		 * Initially, no need to check any further as this entry as well
139 		 * as following entries have never been used.
140 		 */
141 		if (rt->time == 0)
142 			break;
143 
144 		/* Check if entry expired. */
145 		if (time_before(rt->time, invalid_before))
146 			continue; /* before KCSAN_REPORT_ONCE_IN_MS ago */
147 
148 		/* Reported recently, check if race matches. */
149 		if ((rt->frame1 == frame1 && rt->frame2 == frame2) ||
150 		    (rt->frame1 == frame2 && rt->frame2 == frame1))
151 			return true;
152 	}
153 
154 	use_entry->time = jiffies;
155 	use_entry->frame1 = frame1;
156 	use_entry->frame2 = frame2;
157 	return false;
158 }
159 
160 /*
161  * Special rules to skip reporting.
162  */
163 static bool
164 skip_report(enum kcsan_value_change value_change, unsigned long top_frame)
165 {
166 	/* Should never get here if value_change==FALSE. */
167 	WARN_ON_ONCE(value_change == KCSAN_VALUE_CHANGE_FALSE);
168 
169 	/*
170 	 * The first call to skip_report always has value_change==TRUE, since we
171 	 * cannot know the value written of an instrumented access. For the 2nd
172 	 * call there are 6 cases with CONFIG_KCSAN_REPORT_VALUE_CHANGE_ONLY:
173 	 *
174 	 * 1. read watchpoint, conflicting write (value_change==TRUE): report;
175 	 * 2. read watchpoint, conflicting write (value_change==MAYBE): skip;
176 	 * 3. write watchpoint, conflicting write (value_change==TRUE): report;
177 	 * 4. write watchpoint, conflicting write (value_change==MAYBE): skip;
178 	 * 5. write watchpoint, conflicting read (value_change==MAYBE): skip;
179 	 * 6. write watchpoint, conflicting read (value_change==TRUE): report;
180 	 *
181 	 * Cases 1-4 are intuitive and expected; case 5 ensures we do not report
182 	 * data races where the write may have rewritten the same value; case 6
183 	 * is possible either if the size is larger than what we check value
184 	 * changes for or the access type is KCSAN_ACCESS_ASSERT.
185 	 */
186 	if (IS_ENABLED(CONFIG_KCSAN_REPORT_VALUE_CHANGE_ONLY) &&
187 	    value_change == KCSAN_VALUE_CHANGE_MAYBE) {
188 		/*
189 		 * The access is a write, but the data value did not change.
190 		 *
191 		 * We opt-out of this filter for certain functions at request of
192 		 * maintainers.
193 		 */
194 		char buf[64];
195 		int len = scnprintf(buf, sizeof(buf), "%ps", (void *)top_frame);
196 
197 		if (!strnstr(buf, "rcu_", len) &&
198 		    !strnstr(buf, "_rcu", len) &&
199 		    !strnstr(buf, "_srcu", len))
200 			return true;
201 	}
202 
203 	return kcsan_skip_report_debugfs(top_frame);
204 }
205 
206 static const char *get_access_type(int type)
207 {
208 	if (type & KCSAN_ACCESS_ASSERT) {
209 		if (type & KCSAN_ACCESS_SCOPED) {
210 			if (type & KCSAN_ACCESS_WRITE)
211 				return "assert no accesses (scoped)";
212 			else
213 				return "assert no writes (scoped)";
214 		} else {
215 			if (type & KCSAN_ACCESS_WRITE)
216 				return "assert no accesses";
217 			else
218 				return "assert no writes";
219 		}
220 	}
221 
222 	switch (type) {
223 	case 0:
224 		return "read";
225 	case KCSAN_ACCESS_ATOMIC:
226 		return "read (marked)";
227 	case KCSAN_ACCESS_WRITE:
228 		return "write";
229 	case KCSAN_ACCESS_WRITE | KCSAN_ACCESS_ATOMIC:
230 		return "write (marked)";
231 	case KCSAN_ACCESS_SCOPED:
232 		return "read (scoped)";
233 	case KCSAN_ACCESS_SCOPED | KCSAN_ACCESS_ATOMIC:
234 		return "read (marked, scoped)";
235 	case KCSAN_ACCESS_SCOPED | KCSAN_ACCESS_WRITE:
236 		return "write (scoped)";
237 	case KCSAN_ACCESS_SCOPED | KCSAN_ACCESS_WRITE | KCSAN_ACCESS_ATOMIC:
238 		return "write (marked, scoped)";
239 	default:
240 		BUG();
241 	}
242 }
243 
244 static const char *get_bug_type(int type)
245 {
246 	return (type & KCSAN_ACCESS_ASSERT) != 0 ? "assert: race" : "data-race";
247 }
248 
249 /* Return thread description: in task or interrupt. */
250 static const char *get_thread_desc(int task_id)
251 {
252 	if (task_id != -1) {
253 		static char buf[32]; /* safe: protected by report_lock */
254 
255 		snprintf(buf, sizeof(buf), "task %i", task_id);
256 		return buf;
257 	}
258 	return "interrupt";
259 }
260 
261 /* Helper to skip KCSAN-related functions in stack-trace. */
262 static int get_stack_skipnr(const unsigned long stack_entries[], int num_entries)
263 {
264 	char buf[64];
265 	char *cur;
266 	int len, skip;
267 
268 	for (skip = 0; skip < num_entries; ++skip) {
269 		len = scnprintf(buf, sizeof(buf), "%ps", (void *)stack_entries[skip]);
270 
271 		/* Never show tsan_* or {read,write}_once_size. */
272 		if (strnstr(buf, "tsan_", len) ||
273 		    strnstr(buf, "_once_size", len))
274 			continue;
275 
276 		cur = strnstr(buf, "kcsan_", len);
277 		if (cur) {
278 			cur += sizeof("kcsan_") - 1;
279 			if (strncmp(cur, "test", sizeof("test") - 1))
280 				continue; /* KCSAN runtime function. */
281 			/* KCSAN related test. */
282 		}
283 
284 		/*
285 		 * No match for runtime functions -- @skip entries to skip to
286 		 * get to first frame of interest.
287 		 */
288 		break;
289 	}
290 
291 	return skip;
292 }
293 
294 /* Compares symbolized strings of addr1 and addr2. */
295 static int sym_strcmp(void *addr1, void *addr2)
296 {
297 	char buf1[64];
298 	char buf2[64];
299 
300 	snprintf(buf1, sizeof(buf1), "%pS", addr1);
301 	snprintf(buf2, sizeof(buf2), "%pS", addr2);
302 
303 	return strncmp(buf1, buf2, sizeof(buf1));
304 }
305 
306 static void print_verbose_info(struct task_struct *task)
307 {
308 	if (!task)
309 		return;
310 
311 	pr_err("\n");
312 	debug_show_held_locks(task);
313 	print_irqtrace_events(task);
314 }
315 
316 /*
317  * Returns true if a report was generated, false otherwise.
318  */
319 static bool print_report(enum kcsan_value_change value_change,
320 			 enum kcsan_report_type type,
321 			 const struct access_info *ai,
322 			 const struct other_info *other_info)
323 {
324 	unsigned long stack_entries[NUM_STACK_ENTRIES] = { 0 };
325 	int num_stack_entries = stack_trace_save(stack_entries, NUM_STACK_ENTRIES, 1);
326 	int skipnr = get_stack_skipnr(stack_entries, num_stack_entries);
327 	unsigned long this_frame = stack_entries[skipnr];
328 	unsigned long other_frame = 0;
329 	int other_skipnr = 0; /* silence uninit warnings */
330 
331 	/*
332 	 * Must check report filter rules before starting to print.
333 	 */
334 	if (skip_report(KCSAN_VALUE_CHANGE_TRUE, stack_entries[skipnr]))
335 		return false;
336 
337 	if (type == KCSAN_REPORT_RACE_SIGNAL) {
338 		other_skipnr = get_stack_skipnr(other_info->stack_entries,
339 						other_info->num_stack_entries);
340 		other_frame = other_info->stack_entries[other_skipnr];
341 
342 		/* @value_change is only known for the other thread */
343 		if (skip_report(value_change, other_frame))
344 			return false;
345 	}
346 
347 	if (rate_limit_report(this_frame, other_frame))
348 		return false;
349 
350 	/* Print report header. */
351 	pr_err("==================================================================\n");
352 	switch (type) {
353 	case KCSAN_REPORT_RACE_SIGNAL: {
354 		int cmp;
355 
356 		/*
357 		 * Order functions lexographically for consistent bug titles.
358 		 * Do not print offset of functions to keep title short.
359 		 */
360 		cmp = sym_strcmp((void *)other_frame, (void *)this_frame);
361 		pr_err("BUG: KCSAN: %s in %ps / %ps\n",
362 		       get_bug_type(ai->access_type | other_info->ai.access_type),
363 		       (void *)(cmp < 0 ? other_frame : this_frame),
364 		       (void *)(cmp < 0 ? this_frame : other_frame));
365 	} break;
366 
367 	case KCSAN_REPORT_RACE_UNKNOWN_ORIGIN:
368 		pr_err("BUG: KCSAN: %s in %pS\n", get_bug_type(ai->access_type),
369 		       (void *)this_frame);
370 		break;
371 
372 	default:
373 		BUG();
374 	}
375 
376 	pr_err("\n");
377 
378 	/* Print information about the racing accesses. */
379 	switch (type) {
380 	case KCSAN_REPORT_RACE_SIGNAL:
381 		pr_err("%s to 0x%px of %zu bytes by %s on cpu %i:\n",
382 		       get_access_type(other_info->ai.access_type), other_info->ai.ptr,
383 		       other_info->ai.size, get_thread_desc(other_info->ai.task_pid),
384 		       other_info->ai.cpu_id);
385 
386 		/* Print the other thread's stack trace. */
387 		stack_trace_print(other_info->stack_entries + other_skipnr,
388 				  other_info->num_stack_entries - other_skipnr,
389 				  0);
390 
391 		if (IS_ENABLED(CONFIG_KCSAN_VERBOSE))
392 			print_verbose_info(other_info->task);
393 
394 		pr_err("\n");
395 		pr_err("%s to 0x%px of %zu bytes by %s on cpu %i:\n",
396 		       get_access_type(ai->access_type), ai->ptr, ai->size,
397 		       get_thread_desc(ai->task_pid), ai->cpu_id);
398 		break;
399 
400 	case KCSAN_REPORT_RACE_UNKNOWN_ORIGIN:
401 		pr_err("race at unknown origin, with %s to 0x%px of %zu bytes by %s on cpu %i:\n",
402 		       get_access_type(ai->access_type), ai->ptr, ai->size,
403 		       get_thread_desc(ai->task_pid), ai->cpu_id);
404 		break;
405 
406 	default:
407 		BUG();
408 	}
409 	/* Print stack trace of this thread. */
410 	stack_trace_print(stack_entries + skipnr, num_stack_entries - skipnr,
411 			  0);
412 
413 	if (IS_ENABLED(CONFIG_KCSAN_VERBOSE))
414 		print_verbose_info(current);
415 
416 	/* Print report footer. */
417 	pr_err("\n");
418 	pr_err("Reported by Kernel Concurrency Sanitizer on:\n");
419 	dump_stack_print_info(KERN_DEFAULT);
420 	pr_err("==================================================================\n");
421 
422 	return true;
423 }
424 
425 static void release_report(unsigned long *flags, struct other_info *other_info)
426 {
427 	if (other_info)
428 		/*
429 		 * Use size to denote valid/invalid, since KCSAN entirely
430 		 * ignores 0-sized accesses.
431 		 */
432 		other_info->ai.size = 0;
433 
434 	raw_spin_unlock_irqrestore(&report_lock, *flags);
435 }
436 
437 /*
438  * Sets @other_info->task and awaits consumption of @other_info.
439  *
440  * Precondition: report_lock is held.
441  * Postcondition: report_lock is held.
442  */
443 static void set_other_info_task_blocking(unsigned long *flags,
444 					 const struct access_info *ai,
445 					 struct other_info *other_info)
446 {
447 	/*
448 	 * We may be instrumenting a code-path where current->state is already
449 	 * something other than TASK_RUNNING.
450 	 */
451 	const bool is_running = current->state == TASK_RUNNING;
452 	/*
453 	 * To avoid deadlock in case we are in an interrupt here and this is a
454 	 * race with a task on the same CPU (KCSAN_INTERRUPT_WATCHER), provide a
455 	 * timeout to ensure this works in all contexts.
456 	 *
457 	 * Await approximately the worst case delay of the reporting thread (if
458 	 * we are not interrupted).
459 	 */
460 	int timeout = max(kcsan_udelay_task, kcsan_udelay_interrupt);
461 
462 	other_info->task = current;
463 	do {
464 		if (is_running) {
465 			/*
466 			 * Let lockdep know the real task is sleeping, to print
467 			 * the held locks (recall we turned lockdep off, so
468 			 * locking/unlocking @report_lock won't be recorded).
469 			 */
470 			set_current_state(TASK_UNINTERRUPTIBLE);
471 		}
472 		raw_spin_unlock_irqrestore(&report_lock, *flags);
473 		/*
474 		 * We cannot call schedule() since we also cannot reliably
475 		 * determine if sleeping here is permitted -- see in_atomic().
476 		 */
477 
478 		udelay(1);
479 		raw_spin_lock_irqsave(&report_lock, *flags);
480 		if (timeout-- < 0) {
481 			/*
482 			 * Abort. Reset @other_info->task to NULL, since it
483 			 * appears the other thread is still going to consume
484 			 * it. It will result in no verbose info printed for
485 			 * this task.
486 			 */
487 			other_info->task = NULL;
488 			break;
489 		}
490 		/*
491 		 * If invalid, or @ptr nor @current matches, then @other_info
492 		 * has been consumed and we may continue. If not, retry.
493 		 */
494 	} while (other_info->ai.size && other_info->ai.ptr == ai->ptr &&
495 		 other_info->task == current);
496 	if (is_running)
497 		set_current_state(TASK_RUNNING);
498 }
499 
500 /* Populate @other_info; requires that the provided @other_info not in use. */
501 static void prepare_report_producer(unsigned long *flags,
502 				    const struct access_info *ai,
503 				    struct other_info *other_info)
504 {
505 	raw_spin_lock_irqsave(&report_lock, *flags);
506 
507 	/*
508 	 * The same @other_infos entry cannot be used concurrently, because
509 	 * there is a one-to-one mapping to watchpoint slots (@watchpoints in
510 	 * core.c), and a watchpoint is only released for reuse after reporting
511 	 * is done by the consumer of @other_info. Therefore, it is impossible
512 	 * for another concurrent prepare_report_producer() to set the same
513 	 * @other_info, and are guaranteed exclusivity for the @other_infos
514 	 * entry pointed to by @other_info.
515 	 *
516 	 * To check this property holds, size should never be non-zero here,
517 	 * because every consumer of struct other_info resets size to 0 in
518 	 * release_report().
519 	 */
520 	WARN_ON(other_info->ai.size);
521 
522 	other_info->ai = *ai;
523 	other_info->num_stack_entries = stack_trace_save(other_info->stack_entries, NUM_STACK_ENTRIES, 2);
524 
525 	if (IS_ENABLED(CONFIG_KCSAN_VERBOSE))
526 		set_other_info_task_blocking(flags, ai, other_info);
527 
528 	raw_spin_unlock_irqrestore(&report_lock, *flags);
529 }
530 
531 /* Awaits producer to fill @other_info and then returns. */
532 static bool prepare_report_consumer(unsigned long *flags,
533 				    const struct access_info *ai,
534 				    struct other_info *other_info)
535 {
536 
537 	raw_spin_lock_irqsave(&report_lock, *flags);
538 	while (!other_info->ai.size) { /* Await valid @other_info. */
539 		raw_spin_unlock_irqrestore(&report_lock, *flags);
540 		cpu_relax();
541 		raw_spin_lock_irqsave(&report_lock, *flags);
542 	}
543 
544 	/* Should always have a matching access based on watchpoint encoding. */
545 	if (WARN_ON(!matching_access((unsigned long)other_info->ai.ptr & WATCHPOINT_ADDR_MASK, other_info->ai.size,
546 				     (unsigned long)ai->ptr & WATCHPOINT_ADDR_MASK, ai->size)))
547 		goto discard;
548 
549 	if (!matching_access((unsigned long)other_info->ai.ptr, other_info->ai.size,
550 			     (unsigned long)ai->ptr, ai->size)) {
551 		/*
552 		 * If the actual accesses to not match, this was a false
553 		 * positive due to watchpoint encoding.
554 		 */
555 		kcsan_counter_inc(KCSAN_COUNTER_ENCODING_FALSE_POSITIVES);
556 		goto discard;
557 	}
558 
559 	return true;
560 
561 discard:
562 	release_report(flags, other_info);
563 	return false;
564 }
565 
566 /*
567  * Depending on the report type either sets @other_info and returns false, or
568  * awaits @other_info and returns true. If @other_info is not required for the
569  * report type, simply acquires @report_lock and returns true.
570  */
571 static noinline bool prepare_report(unsigned long *flags,
572 				    enum kcsan_report_type type,
573 				    const struct access_info *ai,
574 				    struct other_info *other_info)
575 {
576 	switch (type) {
577 	case KCSAN_REPORT_CONSUMED_WATCHPOINT:
578 		prepare_report_producer(flags, ai, other_info);
579 		return false;
580 	case KCSAN_REPORT_RACE_SIGNAL:
581 		return prepare_report_consumer(flags, ai, other_info);
582 	default:
583 		/* @other_info not required; just acquire @report_lock. */
584 		raw_spin_lock_irqsave(&report_lock, *flags);
585 		return true;
586 	}
587 }
588 
589 void kcsan_report(const volatile void *ptr, size_t size, int access_type,
590 		  enum kcsan_value_change value_change,
591 		  enum kcsan_report_type type, int watchpoint_idx)
592 {
593 	unsigned long flags = 0;
594 	const struct access_info ai = {
595 		.ptr		= ptr,
596 		.size		= size,
597 		.access_type	= access_type,
598 		.task_pid	= in_task() ? task_pid_nr(current) : -1,
599 		.cpu_id		= raw_smp_processor_id()
600 	};
601 	struct other_info *other_info = type == KCSAN_REPORT_RACE_UNKNOWN_ORIGIN
602 					? NULL : &other_infos[watchpoint_idx];
603 
604 	kcsan_disable_current();
605 	if (WARN_ON(watchpoint_idx < 0 || watchpoint_idx >= ARRAY_SIZE(other_infos)))
606 		goto out;
607 
608 	/*
609 	 * With TRACE_IRQFLAGS, lockdep's IRQ trace state becomes corrupted if
610 	 * we do not turn off lockdep here; this could happen due to recursion
611 	 * into lockdep via KCSAN if we detect a race in utilities used by
612 	 * lockdep.
613 	 */
614 	lockdep_off();
615 
616 	if (prepare_report(&flags, type, &ai, other_info)) {
617 		/*
618 		 * Never report if value_change is FALSE, only if we it is
619 		 * either TRUE or MAYBE. In case of MAYBE, further filtering may
620 		 * be done once we know the full stack trace in print_report().
621 		 */
622 		bool reported = value_change != KCSAN_VALUE_CHANGE_FALSE &&
623 				print_report(value_change, type, &ai, other_info);
624 
625 		if (reported && panic_on_warn)
626 			panic("panic_on_warn set ...\n");
627 
628 		release_report(&flags, other_info);
629 	}
630 
631 	lockdep_on();
632 out:
633 	kcsan_enable_current();
634 }
635