1 // SPDX-License-Identifier: GPL-2.0 2 3 #include <linux/debug_locks.h> 4 #include <linux/delay.h> 5 #include <linux/jiffies.h> 6 #include <linux/kernel.h> 7 #include <linux/lockdep.h> 8 #include <linux/preempt.h> 9 #include <linux/printk.h> 10 #include <linux/sched.h> 11 #include <linux/spinlock.h> 12 #include <linux/stacktrace.h> 13 14 #include "kcsan.h" 15 #include "encoding.h" 16 17 /* 18 * Max. number of stack entries to show in the report. 19 */ 20 #define NUM_STACK_ENTRIES 64 21 22 /* Common access info. */ 23 struct access_info { 24 const volatile void *ptr; 25 size_t size; 26 int access_type; 27 int task_pid; 28 int cpu_id; 29 }; 30 31 /* 32 * Other thread info: communicated from other racing thread to thread that set 33 * up the watchpoint, which then prints the complete report atomically. 34 */ 35 struct other_info { 36 struct access_info ai; 37 unsigned long stack_entries[NUM_STACK_ENTRIES]; 38 int num_stack_entries; 39 40 /* 41 * Optionally pass @current. Typically we do not need to pass @current 42 * via @other_info since just @task_pid is sufficient. Passing @current 43 * has additional overhead. 44 * 45 * To safely pass @current, we must either use get_task_struct/ 46 * put_task_struct, or stall the thread that populated @other_info. 47 * 48 * We cannot rely on get_task_struct/put_task_struct in case 49 * release_report() races with a task being released, and would have to 50 * free it in release_report(). This may result in deadlock if we want 51 * to use KCSAN on the allocators. 52 * 53 * Since we also want to reliably print held locks for 54 * CONFIG_KCSAN_VERBOSE, the current implementation stalls the thread 55 * that populated @other_info until it has been consumed. 56 */ 57 struct task_struct *task; 58 }; 59 60 /* 61 * To never block any producers of struct other_info, we need as many elements 62 * as we have watchpoints (upper bound on concurrent races to report). 63 */ 64 static struct other_info other_infos[CONFIG_KCSAN_NUM_WATCHPOINTS + NUM_SLOTS-1]; 65 66 /* 67 * Information about reported races; used to rate limit reporting. 68 */ 69 struct report_time { 70 /* 71 * The last time the race was reported. 72 */ 73 unsigned long time; 74 75 /* 76 * The frames of the 2 threads; if only 1 thread is known, one frame 77 * will be 0. 78 */ 79 unsigned long frame1; 80 unsigned long frame2; 81 }; 82 83 /* 84 * Since we also want to be able to debug allocators with KCSAN, to avoid 85 * deadlock, report_times cannot be dynamically resized with krealloc in 86 * rate_limit_report. 87 * 88 * Therefore, we use a fixed-size array, which at most will occupy a page. This 89 * still adequately rate limits reports, assuming that a) number of unique data 90 * races is not excessive, and b) occurrence of unique races within the 91 * same time window is limited. 92 */ 93 #define REPORT_TIMES_MAX (PAGE_SIZE / sizeof(struct report_time)) 94 #define REPORT_TIMES_SIZE \ 95 (CONFIG_KCSAN_REPORT_ONCE_IN_MS > REPORT_TIMES_MAX ? \ 96 REPORT_TIMES_MAX : \ 97 CONFIG_KCSAN_REPORT_ONCE_IN_MS) 98 static struct report_time report_times[REPORT_TIMES_SIZE]; 99 100 /* 101 * Spinlock serializing report generation, and access to @other_infos. Although 102 * it could make sense to have a finer-grained locking story for @other_infos, 103 * report generation needs to be serialized either way, so not much is gained. 104 */ 105 static DEFINE_RAW_SPINLOCK(report_lock); 106 107 /* 108 * Checks if the race identified by thread frames frame1 and frame2 has 109 * been reported since (now - KCSAN_REPORT_ONCE_IN_MS). 110 */ 111 static bool rate_limit_report(unsigned long frame1, unsigned long frame2) 112 { 113 struct report_time *use_entry = &report_times[0]; 114 unsigned long invalid_before; 115 int i; 116 117 BUILD_BUG_ON(CONFIG_KCSAN_REPORT_ONCE_IN_MS != 0 && REPORT_TIMES_SIZE == 0); 118 119 if (CONFIG_KCSAN_REPORT_ONCE_IN_MS == 0) 120 return false; 121 122 invalid_before = jiffies - msecs_to_jiffies(CONFIG_KCSAN_REPORT_ONCE_IN_MS); 123 124 /* Check if a matching race report exists. */ 125 for (i = 0; i < REPORT_TIMES_SIZE; ++i) { 126 struct report_time *rt = &report_times[i]; 127 128 /* 129 * Must always select an entry for use to store info as we 130 * cannot resize report_times; at the end of the scan, use_entry 131 * will be the oldest entry, which ideally also happened before 132 * KCSAN_REPORT_ONCE_IN_MS ago. 133 */ 134 if (time_before(rt->time, use_entry->time)) 135 use_entry = rt; 136 137 /* 138 * Initially, no need to check any further as this entry as well 139 * as following entries have never been used. 140 */ 141 if (rt->time == 0) 142 break; 143 144 /* Check if entry expired. */ 145 if (time_before(rt->time, invalid_before)) 146 continue; /* before KCSAN_REPORT_ONCE_IN_MS ago */ 147 148 /* Reported recently, check if race matches. */ 149 if ((rt->frame1 == frame1 && rt->frame2 == frame2) || 150 (rt->frame1 == frame2 && rt->frame2 == frame1)) 151 return true; 152 } 153 154 use_entry->time = jiffies; 155 use_entry->frame1 = frame1; 156 use_entry->frame2 = frame2; 157 return false; 158 } 159 160 /* 161 * Special rules to skip reporting. 162 */ 163 static bool 164 skip_report(enum kcsan_value_change value_change, unsigned long top_frame) 165 { 166 /* Should never get here if value_change==FALSE. */ 167 WARN_ON_ONCE(value_change == KCSAN_VALUE_CHANGE_FALSE); 168 169 /* 170 * The first call to skip_report always has value_change==TRUE, since we 171 * cannot know the value written of an instrumented access. For the 2nd 172 * call there are 6 cases with CONFIG_KCSAN_REPORT_VALUE_CHANGE_ONLY: 173 * 174 * 1. read watchpoint, conflicting write (value_change==TRUE): report; 175 * 2. read watchpoint, conflicting write (value_change==MAYBE): skip; 176 * 3. write watchpoint, conflicting write (value_change==TRUE): report; 177 * 4. write watchpoint, conflicting write (value_change==MAYBE): skip; 178 * 5. write watchpoint, conflicting read (value_change==MAYBE): skip; 179 * 6. write watchpoint, conflicting read (value_change==TRUE): report; 180 * 181 * Cases 1-4 are intuitive and expected; case 5 ensures we do not report 182 * data races where the write may have rewritten the same value; case 6 183 * is possible either if the size is larger than what we check value 184 * changes for or the access type is KCSAN_ACCESS_ASSERT. 185 */ 186 if (IS_ENABLED(CONFIG_KCSAN_REPORT_VALUE_CHANGE_ONLY) && 187 value_change == KCSAN_VALUE_CHANGE_MAYBE) { 188 /* 189 * The access is a write, but the data value did not change. 190 * 191 * We opt-out of this filter for certain functions at request of 192 * maintainers. 193 */ 194 char buf[64]; 195 int len = scnprintf(buf, sizeof(buf), "%ps", (void *)top_frame); 196 197 if (!strnstr(buf, "rcu_", len) && 198 !strnstr(buf, "_rcu", len) && 199 !strnstr(buf, "_srcu", len)) 200 return true; 201 } 202 203 return kcsan_skip_report_debugfs(top_frame); 204 } 205 206 static const char *get_access_type(int type) 207 { 208 if (type & KCSAN_ACCESS_ASSERT) { 209 if (type & KCSAN_ACCESS_SCOPED) { 210 if (type & KCSAN_ACCESS_WRITE) 211 return "assert no accesses (scoped)"; 212 else 213 return "assert no writes (scoped)"; 214 } else { 215 if (type & KCSAN_ACCESS_WRITE) 216 return "assert no accesses"; 217 else 218 return "assert no writes"; 219 } 220 } 221 222 switch (type) { 223 case 0: 224 return "read"; 225 case KCSAN_ACCESS_ATOMIC: 226 return "read (marked)"; 227 case KCSAN_ACCESS_WRITE: 228 return "write"; 229 case KCSAN_ACCESS_WRITE | KCSAN_ACCESS_ATOMIC: 230 return "write (marked)"; 231 case KCSAN_ACCESS_SCOPED: 232 return "read (scoped)"; 233 case KCSAN_ACCESS_SCOPED | KCSAN_ACCESS_ATOMIC: 234 return "read (marked, scoped)"; 235 case KCSAN_ACCESS_SCOPED | KCSAN_ACCESS_WRITE: 236 return "write (scoped)"; 237 case KCSAN_ACCESS_SCOPED | KCSAN_ACCESS_WRITE | KCSAN_ACCESS_ATOMIC: 238 return "write (marked, scoped)"; 239 default: 240 BUG(); 241 } 242 } 243 244 static const char *get_bug_type(int type) 245 { 246 return (type & KCSAN_ACCESS_ASSERT) != 0 ? "assert: race" : "data-race"; 247 } 248 249 /* Return thread description: in task or interrupt. */ 250 static const char *get_thread_desc(int task_id) 251 { 252 if (task_id != -1) { 253 static char buf[32]; /* safe: protected by report_lock */ 254 255 snprintf(buf, sizeof(buf), "task %i", task_id); 256 return buf; 257 } 258 return "interrupt"; 259 } 260 261 /* Helper to skip KCSAN-related functions in stack-trace. */ 262 static int get_stack_skipnr(const unsigned long stack_entries[], int num_entries) 263 { 264 char buf[64]; 265 char *cur; 266 int len, skip; 267 268 for (skip = 0; skip < num_entries; ++skip) { 269 len = scnprintf(buf, sizeof(buf), "%ps", (void *)stack_entries[skip]); 270 271 /* Never show tsan_* or {read,write}_once_size. */ 272 if (strnstr(buf, "tsan_", len) || 273 strnstr(buf, "_once_size", len)) 274 continue; 275 276 cur = strnstr(buf, "kcsan_", len); 277 if (cur) { 278 cur += sizeof("kcsan_") - 1; 279 if (strncmp(cur, "test", sizeof("test") - 1)) 280 continue; /* KCSAN runtime function. */ 281 /* KCSAN related test. */ 282 } 283 284 /* 285 * No match for runtime functions -- @skip entries to skip to 286 * get to first frame of interest. 287 */ 288 break; 289 } 290 291 return skip; 292 } 293 294 /* Compares symbolized strings of addr1 and addr2. */ 295 static int sym_strcmp(void *addr1, void *addr2) 296 { 297 char buf1[64]; 298 char buf2[64]; 299 300 snprintf(buf1, sizeof(buf1), "%pS", addr1); 301 snprintf(buf2, sizeof(buf2), "%pS", addr2); 302 303 return strncmp(buf1, buf2, sizeof(buf1)); 304 } 305 306 static void print_verbose_info(struct task_struct *task) 307 { 308 if (!task) 309 return; 310 311 pr_err("\n"); 312 debug_show_held_locks(task); 313 print_irqtrace_events(task); 314 } 315 316 /* 317 * Returns true if a report was generated, false otherwise. 318 */ 319 static bool print_report(enum kcsan_value_change value_change, 320 enum kcsan_report_type type, 321 const struct access_info *ai, 322 const struct other_info *other_info) 323 { 324 unsigned long stack_entries[NUM_STACK_ENTRIES] = { 0 }; 325 int num_stack_entries = stack_trace_save(stack_entries, NUM_STACK_ENTRIES, 1); 326 int skipnr = get_stack_skipnr(stack_entries, num_stack_entries); 327 unsigned long this_frame = stack_entries[skipnr]; 328 unsigned long other_frame = 0; 329 int other_skipnr = 0; /* silence uninit warnings */ 330 331 /* 332 * Must check report filter rules before starting to print. 333 */ 334 if (skip_report(KCSAN_VALUE_CHANGE_TRUE, stack_entries[skipnr])) 335 return false; 336 337 if (type == KCSAN_REPORT_RACE_SIGNAL) { 338 other_skipnr = get_stack_skipnr(other_info->stack_entries, 339 other_info->num_stack_entries); 340 other_frame = other_info->stack_entries[other_skipnr]; 341 342 /* @value_change is only known for the other thread */ 343 if (skip_report(value_change, other_frame)) 344 return false; 345 } 346 347 if (rate_limit_report(this_frame, other_frame)) 348 return false; 349 350 /* Print report header. */ 351 pr_err("==================================================================\n"); 352 switch (type) { 353 case KCSAN_REPORT_RACE_SIGNAL: { 354 int cmp; 355 356 /* 357 * Order functions lexographically for consistent bug titles. 358 * Do not print offset of functions to keep title short. 359 */ 360 cmp = sym_strcmp((void *)other_frame, (void *)this_frame); 361 pr_err("BUG: KCSAN: %s in %ps / %ps\n", 362 get_bug_type(ai->access_type | other_info->ai.access_type), 363 (void *)(cmp < 0 ? other_frame : this_frame), 364 (void *)(cmp < 0 ? this_frame : other_frame)); 365 } break; 366 367 case KCSAN_REPORT_RACE_UNKNOWN_ORIGIN: 368 pr_err("BUG: KCSAN: %s in %pS\n", get_bug_type(ai->access_type), 369 (void *)this_frame); 370 break; 371 372 default: 373 BUG(); 374 } 375 376 pr_err("\n"); 377 378 /* Print information about the racing accesses. */ 379 switch (type) { 380 case KCSAN_REPORT_RACE_SIGNAL: 381 pr_err("%s to 0x%px of %zu bytes by %s on cpu %i:\n", 382 get_access_type(other_info->ai.access_type), other_info->ai.ptr, 383 other_info->ai.size, get_thread_desc(other_info->ai.task_pid), 384 other_info->ai.cpu_id); 385 386 /* Print the other thread's stack trace. */ 387 stack_trace_print(other_info->stack_entries + other_skipnr, 388 other_info->num_stack_entries - other_skipnr, 389 0); 390 391 if (IS_ENABLED(CONFIG_KCSAN_VERBOSE)) 392 print_verbose_info(other_info->task); 393 394 pr_err("\n"); 395 pr_err("%s to 0x%px of %zu bytes by %s on cpu %i:\n", 396 get_access_type(ai->access_type), ai->ptr, ai->size, 397 get_thread_desc(ai->task_pid), ai->cpu_id); 398 break; 399 400 case KCSAN_REPORT_RACE_UNKNOWN_ORIGIN: 401 pr_err("race at unknown origin, with %s to 0x%px of %zu bytes by %s on cpu %i:\n", 402 get_access_type(ai->access_type), ai->ptr, ai->size, 403 get_thread_desc(ai->task_pid), ai->cpu_id); 404 break; 405 406 default: 407 BUG(); 408 } 409 /* Print stack trace of this thread. */ 410 stack_trace_print(stack_entries + skipnr, num_stack_entries - skipnr, 411 0); 412 413 if (IS_ENABLED(CONFIG_KCSAN_VERBOSE)) 414 print_verbose_info(current); 415 416 /* Print report footer. */ 417 pr_err("\n"); 418 pr_err("Reported by Kernel Concurrency Sanitizer on:\n"); 419 dump_stack_print_info(KERN_DEFAULT); 420 pr_err("==================================================================\n"); 421 422 return true; 423 } 424 425 static void release_report(unsigned long *flags, struct other_info *other_info) 426 { 427 if (other_info) 428 /* 429 * Use size to denote valid/invalid, since KCSAN entirely 430 * ignores 0-sized accesses. 431 */ 432 other_info->ai.size = 0; 433 434 raw_spin_unlock_irqrestore(&report_lock, *flags); 435 } 436 437 /* 438 * Sets @other_info->task and awaits consumption of @other_info. 439 * 440 * Precondition: report_lock is held. 441 * Postcondition: report_lock is held. 442 */ 443 static void set_other_info_task_blocking(unsigned long *flags, 444 const struct access_info *ai, 445 struct other_info *other_info) 446 { 447 /* 448 * We may be instrumenting a code-path where current->state is already 449 * something other than TASK_RUNNING. 450 */ 451 const bool is_running = current->state == TASK_RUNNING; 452 /* 453 * To avoid deadlock in case we are in an interrupt here and this is a 454 * race with a task on the same CPU (KCSAN_INTERRUPT_WATCHER), provide a 455 * timeout to ensure this works in all contexts. 456 * 457 * Await approximately the worst case delay of the reporting thread (if 458 * we are not interrupted). 459 */ 460 int timeout = max(kcsan_udelay_task, kcsan_udelay_interrupt); 461 462 other_info->task = current; 463 do { 464 if (is_running) { 465 /* 466 * Let lockdep know the real task is sleeping, to print 467 * the held locks (recall we turned lockdep off, so 468 * locking/unlocking @report_lock won't be recorded). 469 */ 470 set_current_state(TASK_UNINTERRUPTIBLE); 471 } 472 raw_spin_unlock_irqrestore(&report_lock, *flags); 473 /* 474 * We cannot call schedule() since we also cannot reliably 475 * determine if sleeping here is permitted -- see in_atomic(). 476 */ 477 478 udelay(1); 479 raw_spin_lock_irqsave(&report_lock, *flags); 480 if (timeout-- < 0) { 481 /* 482 * Abort. Reset @other_info->task to NULL, since it 483 * appears the other thread is still going to consume 484 * it. It will result in no verbose info printed for 485 * this task. 486 */ 487 other_info->task = NULL; 488 break; 489 } 490 /* 491 * If invalid, or @ptr nor @current matches, then @other_info 492 * has been consumed and we may continue. If not, retry. 493 */ 494 } while (other_info->ai.size && other_info->ai.ptr == ai->ptr && 495 other_info->task == current); 496 if (is_running) 497 set_current_state(TASK_RUNNING); 498 } 499 500 /* Populate @other_info; requires that the provided @other_info not in use. */ 501 static void prepare_report_producer(unsigned long *flags, 502 const struct access_info *ai, 503 struct other_info *other_info) 504 { 505 raw_spin_lock_irqsave(&report_lock, *flags); 506 507 /* 508 * The same @other_infos entry cannot be used concurrently, because 509 * there is a one-to-one mapping to watchpoint slots (@watchpoints in 510 * core.c), and a watchpoint is only released for reuse after reporting 511 * is done by the consumer of @other_info. Therefore, it is impossible 512 * for another concurrent prepare_report_producer() to set the same 513 * @other_info, and are guaranteed exclusivity for the @other_infos 514 * entry pointed to by @other_info. 515 * 516 * To check this property holds, size should never be non-zero here, 517 * because every consumer of struct other_info resets size to 0 in 518 * release_report(). 519 */ 520 WARN_ON(other_info->ai.size); 521 522 other_info->ai = *ai; 523 other_info->num_stack_entries = stack_trace_save(other_info->stack_entries, NUM_STACK_ENTRIES, 2); 524 525 if (IS_ENABLED(CONFIG_KCSAN_VERBOSE)) 526 set_other_info_task_blocking(flags, ai, other_info); 527 528 raw_spin_unlock_irqrestore(&report_lock, *flags); 529 } 530 531 /* Awaits producer to fill @other_info and then returns. */ 532 static bool prepare_report_consumer(unsigned long *flags, 533 const struct access_info *ai, 534 struct other_info *other_info) 535 { 536 537 raw_spin_lock_irqsave(&report_lock, *flags); 538 while (!other_info->ai.size) { /* Await valid @other_info. */ 539 raw_spin_unlock_irqrestore(&report_lock, *flags); 540 cpu_relax(); 541 raw_spin_lock_irqsave(&report_lock, *flags); 542 } 543 544 /* Should always have a matching access based on watchpoint encoding. */ 545 if (WARN_ON(!matching_access((unsigned long)other_info->ai.ptr & WATCHPOINT_ADDR_MASK, other_info->ai.size, 546 (unsigned long)ai->ptr & WATCHPOINT_ADDR_MASK, ai->size))) 547 goto discard; 548 549 if (!matching_access((unsigned long)other_info->ai.ptr, other_info->ai.size, 550 (unsigned long)ai->ptr, ai->size)) { 551 /* 552 * If the actual accesses to not match, this was a false 553 * positive due to watchpoint encoding. 554 */ 555 kcsan_counter_inc(KCSAN_COUNTER_ENCODING_FALSE_POSITIVES); 556 goto discard; 557 } 558 559 return true; 560 561 discard: 562 release_report(flags, other_info); 563 return false; 564 } 565 566 /* 567 * Depending on the report type either sets @other_info and returns false, or 568 * awaits @other_info and returns true. If @other_info is not required for the 569 * report type, simply acquires @report_lock and returns true. 570 */ 571 static noinline bool prepare_report(unsigned long *flags, 572 enum kcsan_report_type type, 573 const struct access_info *ai, 574 struct other_info *other_info) 575 { 576 switch (type) { 577 case KCSAN_REPORT_CONSUMED_WATCHPOINT: 578 prepare_report_producer(flags, ai, other_info); 579 return false; 580 case KCSAN_REPORT_RACE_SIGNAL: 581 return prepare_report_consumer(flags, ai, other_info); 582 default: 583 /* @other_info not required; just acquire @report_lock. */ 584 raw_spin_lock_irqsave(&report_lock, *flags); 585 return true; 586 } 587 } 588 589 void kcsan_report(const volatile void *ptr, size_t size, int access_type, 590 enum kcsan_value_change value_change, 591 enum kcsan_report_type type, int watchpoint_idx) 592 { 593 unsigned long flags = 0; 594 const struct access_info ai = { 595 .ptr = ptr, 596 .size = size, 597 .access_type = access_type, 598 .task_pid = in_task() ? task_pid_nr(current) : -1, 599 .cpu_id = raw_smp_processor_id() 600 }; 601 struct other_info *other_info = type == KCSAN_REPORT_RACE_UNKNOWN_ORIGIN 602 ? NULL : &other_infos[watchpoint_idx]; 603 604 kcsan_disable_current(); 605 if (WARN_ON(watchpoint_idx < 0 || watchpoint_idx >= ARRAY_SIZE(other_infos))) 606 goto out; 607 608 /* 609 * With TRACE_IRQFLAGS, lockdep's IRQ trace state becomes corrupted if 610 * we do not turn off lockdep here; this could happen due to recursion 611 * into lockdep via KCSAN if we detect a race in utilities used by 612 * lockdep. 613 */ 614 lockdep_off(); 615 616 if (prepare_report(&flags, type, &ai, other_info)) { 617 /* 618 * Never report if value_change is FALSE, only if we it is 619 * either TRUE or MAYBE. In case of MAYBE, further filtering may 620 * be done once we know the full stack trace in print_report(). 621 */ 622 bool reported = value_change != KCSAN_VALUE_CHANGE_FALSE && 623 print_report(value_change, type, &ai, other_info); 624 625 if (reported && panic_on_warn) 626 panic("panic_on_warn set ...\n"); 627 628 release_report(&flags, other_info); 629 } 630 631 lockdep_on(); 632 out: 633 kcsan_enable_current(); 634 } 635