1 // SPDX-License-Identifier: GPL-2.0 2 3 #include <linux/debug_locks.h> 4 #include <linux/delay.h> 5 #include <linux/jiffies.h> 6 #include <linux/kernel.h> 7 #include <linux/lockdep.h> 8 #include <linux/preempt.h> 9 #include <linux/printk.h> 10 #include <linux/sched.h> 11 #include <linux/spinlock.h> 12 #include <linux/stacktrace.h> 13 14 #include "kcsan.h" 15 #include "encoding.h" 16 17 /* 18 * Max. number of stack entries to show in the report. 19 */ 20 #define NUM_STACK_ENTRIES 64 21 22 /* Common access info. */ 23 struct access_info { 24 const volatile void *ptr; 25 size_t size; 26 int access_type; 27 int task_pid; 28 int cpu_id; 29 }; 30 31 /* 32 * Other thread info: communicated from other racing thread to thread that set 33 * up the watchpoint, which then prints the complete report atomically. 34 */ 35 struct other_info { 36 struct access_info ai; 37 unsigned long stack_entries[NUM_STACK_ENTRIES]; 38 int num_stack_entries; 39 40 /* 41 * Optionally pass @current. Typically we do not need to pass @current 42 * via @other_info since just @task_pid is sufficient. Passing @current 43 * has additional overhead. 44 * 45 * To safely pass @current, we must either use get_task_struct/ 46 * put_task_struct, or stall the thread that populated @other_info. 47 * 48 * We cannot rely on get_task_struct/put_task_struct in case 49 * release_report() races with a task being released, and would have to 50 * free it in release_report(). This may result in deadlock if we want 51 * to use KCSAN on the allocators. 52 * 53 * Since we also want to reliably print held locks for 54 * CONFIG_KCSAN_VERBOSE, the current implementation stalls the thread 55 * that populated @other_info until it has been consumed. 56 */ 57 struct task_struct *task; 58 }; 59 60 /* 61 * To never block any producers of struct other_info, we need as many elements 62 * as we have watchpoints (upper bound on concurrent races to report). 63 */ 64 static struct other_info other_infos[CONFIG_KCSAN_NUM_WATCHPOINTS + NUM_SLOTS-1]; 65 66 /* 67 * Information about reported races; used to rate limit reporting. 68 */ 69 struct report_time { 70 /* 71 * The last time the race was reported. 72 */ 73 unsigned long time; 74 75 /* 76 * The frames of the 2 threads; if only 1 thread is known, one frame 77 * will be 0. 78 */ 79 unsigned long frame1; 80 unsigned long frame2; 81 }; 82 83 /* 84 * Since we also want to be able to debug allocators with KCSAN, to avoid 85 * deadlock, report_times cannot be dynamically resized with krealloc in 86 * rate_limit_report. 87 * 88 * Therefore, we use a fixed-size array, which at most will occupy a page. This 89 * still adequately rate limits reports, assuming that a) number of unique data 90 * races is not excessive, and b) occurrence of unique races within the 91 * same time window is limited. 92 */ 93 #define REPORT_TIMES_MAX (PAGE_SIZE / sizeof(struct report_time)) 94 #define REPORT_TIMES_SIZE \ 95 (CONFIG_KCSAN_REPORT_ONCE_IN_MS > REPORT_TIMES_MAX ? \ 96 REPORT_TIMES_MAX : \ 97 CONFIG_KCSAN_REPORT_ONCE_IN_MS) 98 static struct report_time report_times[REPORT_TIMES_SIZE]; 99 100 /* 101 * Spinlock serializing report generation, and access to @other_infos. Although 102 * it could make sense to have a finer-grained locking story for @other_infos, 103 * report generation needs to be serialized either way, so not much is gained. 104 */ 105 static DEFINE_RAW_SPINLOCK(report_lock); 106 107 /* 108 * Checks if the race identified by thread frames frame1 and frame2 has 109 * been reported since (now - KCSAN_REPORT_ONCE_IN_MS). 110 */ 111 static bool rate_limit_report(unsigned long frame1, unsigned long frame2) 112 { 113 struct report_time *use_entry = &report_times[0]; 114 unsigned long invalid_before; 115 int i; 116 117 BUILD_BUG_ON(CONFIG_KCSAN_REPORT_ONCE_IN_MS != 0 && REPORT_TIMES_SIZE == 0); 118 119 if (CONFIG_KCSAN_REPORT_ONCE_IN_MS == 0) 120 return false; 121 122 invalid_before = jiffies - msecs_to_jiffies(CONFIG_KCSAN_REPORT_ONCE_IN_MS); 123 124 /* Check if a matching race report exists. */ 125 for (i = 0; i < REPORT_TIMES_SIZE; ++i) { 126 struct report_time *rt = &report_times[i]; 127 128 /* 129 * Must always select an entry for use to store info as we 130 * cannot resize report_times; at the end of the scan, use_entry 131 * will be the oldest entry, which ideally also happened before 132 * KCSAN_REPORT_ONCE_IN_MS ago. 133 */ 134 if (time_before(rt->time, use_entry->time)) 135 use_entry = rt; 136 137 /* 138 * Initially, no need to check any further as this entry as well 139 * as following entries have never been used. 140 */ 141 if (rt->time == 0) 142 break; 143 144 /* Check if entry expired. */ 145 if (time_before(rt->time, invalid_before)) 146 continue; /* before KCSAN_REPORT_ONCE_IN_MS ago */ 147 148 /* Reported recently, check if race matches. */ 149 if ((rt->frame1 == frame1 && rt->frame2 == frame2) || 150 (rt->frame1 == frame2 && rt->frame2 == frame1)) 151 return true; 152 } 153 154 use_entry->time = jiffies; 155 use_entry->frame1 = frame1; 156 use_entry->frame2 = frame2; 157 return false; 158 } 159 160 /* 161 * Special rules to skip reporting. 162 */ 163 static bool 164 skip_report(enum kcsan_value_change value_change, unsigned long top_frame) 165 { 166 /* Should never get here if value_change==FALSE. */ 167 WARN_ON_ONCE(value_change == KCSAN_VALUE_CHANGE_FALSE); 168 169 /* 170 * The first call to skip_report always has value_change==TRUE, since we 171 * cannot know the value written of an instrumented access. For the 2nd 172 * call there are 6 cases with CONFIG_KCSAN_REPORT_VALUE_CHANGE_ONLY: 173 * 174 * 1. read watchpoint, conflicting write (value_change==TRUE): report; 175 * 2. read watchpoint, conflicting write (value_change==MAYBE): skip; 176 * 3. write watchpoint, conflicting write (value_change==TRUE): report; 177 * 4. write watchpoint, conflicting write (value_change==MAYBE): skip; 178 * 5. write watchpoint, conflicting read (value_change==MAYBE): skip; 179 * 6. write watchpoint, conflicting read (value_change==TRUE): report; 180 * 181 * Cases 1-4 are intuitive and expected; case 5 ensures we do not report 182 * data races where the write may have rewritten the same value; case 6 183 * is possible either if the size is larger than what we check value 184 * changes for or the access type is KCSAN_ACCESS_ASSERT. 185 */ 186 if (IS_ENABLED(CONFIG_KCSAN_REPORT_VALUE_CHANGE_ONLY) && 187 value_change == KCSAN_VALUE_CHANGE_MAYBE) { 188 /* 189 * The access is a write, but the data value did not change. 190 * 191 * We opt-out of this filter for certain functions at request of 192 * maintainers. 193 */ 194 char buf[64]; 195 int len = scnprintf(buf, sizeof(buf), "%ps", (void *)top_frame); 196 197 if (!strnstr(buf, "rcu_", len) && 198 !strnstr(buf, "_rcu", len) && 199 !strnstr(buf, "_srcu", len)) 200 return true; 201 } 202 203 return kcsan_skip_report_debugfs(top_frame); 204 } 205 206 static const char *get_access_type(int type) 207 { 208 if (type & KCSAN_ACCESS_ASSERT) { 209 if (type & KCSAN_ACCESS_SCOPED) { 210 if (type & KCSAN_ACCESS_WRITE) 211 return "assert no accesses (scoped)"; 212 else 213 return "assert no writes (scoped)"; 214 } else { 215 if (type & KCSAN_ACCESS_WRITE) 216 return "assert no accesses"; 217 else 218 return "assert no writes"; 219 } 220 } 221 222 switch (type) { 223 case 0: 224 return "read"; 225 case KCSAN_ACCESS_ATOMIC: 226 return "read (marked)"; 227 case KCSAN_ACCESS_WRITE: 228 return "write"; 229 case KCSAN_ACCESS_WRITE | KCSAN_ACCESS_ATOMIC: 230 return "write (marked)"; 231 case KCSAN_ACCESS_COMPOUND | KCSAN_ACCESS_WRITE: 232 return "read-write"; 233 case KCSAN_ACCESS_COMPOUND | KCSAN_ACCESS_WRITE | KCSAN_ACCESS_ATOMIC: 234 return "read-write (marked)"; 235 case KCSAN_ACCESS_SCOPED: 236 return "read (scoped)"; 237 case KCSAN_ACCESS_SCOPED | KCSAN_ACCESS_ATOMIC: 238 return "read (marked, scoped)"; 239 case KCSAN_ACCESS_SCOPED | KCSAN_ACCESS_WRITE: 240 return "write (scoped)"; 241 case KCSAN_ACCESS_SCOPED | KCSAN_ACCESS_WRITE | KCSAN_ACCESS_ATOMIC: 242 return "write (marked, scoped)"; 243 default: 244 BUG(); 245 } 246 } 247 248 static const char *get_bug_type(int type) 249 { 250 return (type & KCSAN_ACCESS_ASSERT) != 0 ? "assert: race" : "data-race"; 251 } 252 253 /* Return thread description: in task or interrupt. */ 254 static const char *get_thread_desc(int task_id) 255 { 256 if (task_id != -1) { 257 static char buf[32]; /* safe: protected by report_lock */ 258 259 snprintf(buf, sizeof(buf), "task %i", task_id); 260 return buf; 261 } 262 return "interrupt"; 263 } 264 265 /* Helper to skip KCSAN-related functions in stack-trace. */ 266 static int get_stack_skipnr(const unsigned long stack_entries[], int num_entries) 267 { 268 char buf[64]; 269 char *cur; 270 int len, skip; 271 272 for (skip = 0; skip < num_entries; ++skip) { 273 len = scnprintf(buf, sizeof(buf), "%ps", (void *)stack_entries[skip]); 274 275 /* Never show tsan_* or {read,write}_once_size. */ 276 if (strnstr(buf, "tsan_", len) || 277 strnstr(buf, "_once_size", len)) 278 continue; 279 280 cur = strnstr(buf, "kcsan_", len); 281 if (cur) { 282 cur += strlen("kcsan_"); 283 if (!str_has_prefix(cur, "test")) 284 continue; /* KCSAN runtime function. */ 285 /* KCSAN related test. */ 286 } 287 288 /* 289 * No match for runtime functions -- @skip entries to skip to 290 * get to first frame of interest. 291 */ 292 break; 293 } 294 295 return skip; 296 } 297 298 /* Compares symbolized strings of addr1 and addr2. */ 299 static int sym_strcmp(void *addr1, void *addr2) 300 { 301 char buf1[64]; 302 char buf2[64]; 303 304 snprintf(buf1, sizeof(buf1), "%pS", addr1); 305 snprintf(buf2, sizeof(buf2), "%pS", addr2); 306 307 return strncmp(buf1, buf2, sizeof(buf1)); 308 } 309 310 static void print_verbose_info(struct task_struct *task) 311 { 312 if (!task) 313 return; 314 315 /* Restore IRQ state trace for printing. */ 316 kcsan_restore_irqtrace(task); 317 318 pr_err("\n"); 319 debug_show_held_locks(task); 320 print_irqtrace_events(task); 321 } 322 323 /* 324 * Returns true if a report was generated, false otherwise. 325 */ 326 static bool print_report(enum kcsan_value_change value_change, 327 enum kcsan_report_type type, 328 const struct access_info *ai, 329 const struct other_info *other_info) 330 { 331 unsigned long stack_entries[NUM_STACK_ENTRIES] = { 0 }; 332 int num_stack_entries = stack_trace_save(stack_entries, NUM_STACK_ENTRIES, 1); 333 int skipnr = get_stack_skipnr(stack_entries, num_stack_entries); 334 unsigned long this_frame = stack_entries[skipnr]; 335 unsigned long other_frame = 0; 336 int other_skipnr = 0; /* silence uninit warnings */ 337 338 /* 339 * Must check report filter rules before starting to print. 340 */ 341 if (skip_report(KCSAN_VALUE_CHANGE_TRUE, stack_entries[skipnr])) 342 return false; 343 344 if (type == KCSAN_REPORT_RACE_SIGNAL) { 345 other_skipnr = get_stack_skipnr(other_info->stack_entries, 346 other_info->num_stack_entries); 347 other_frame = other_info->stack_entries[other_skipnr]; 348 349 /* @value_change is only known for the other thread */ 350 if (skip_report(value_change, other_frame)) 351 return false; 352 } 353 354 if (rate_limit_report(this_frame, other_frame)) 355 return false; 356 357 /* Print report header. */ 358 pr_err("==================================================================\n"); 359 switch (type) { 360 case KCSAN_REPORT_RACE_SIGNAL: { 361 int cmp; 362 363 /* 364 * Order functions lexographically for consistent bug titles. 365 * Do not print offset of functions to keep title short. 366 */ 367 cmp = sym_strcmp((void *)other_frame, (void *)this_frame); 368 pr_err("BUG: KCSAN: %s in %ps / %ps\n", 369 get_bug_type(ai->access_type | other_info->ai.access_type), 370 (void *)(cmp < 0 ? other_frame : this_frame), 371 (void *)(cmp < 0 ? this_frame : other_frame)); 372 } break; 373 374 case KCSAN_REPORT_RACE_UNKNOWN_ORIGIN: 375 pr_err("BUG: KCSAN: %s in %pS\n", get_bug_type(ai->access_type), 376 (void *)this_frame); 377 break; 378 379 default: 380 BUG(); 381 } 382 383 pr_err("\n"); 384 385 /* Print information about the racing accesses. */ 386 switch (type) { 387 case KCSAN_REPORT_RACE_SIGNAL: 388 pr_err("%s to 0x%px of %zu bytes by %s on cpu %i:\n", 389 get_access_type(other_info->ai.access_type), other_info->ai.ptr, 390 other_info->ai.size, get_thread_desc(other_info->ai.task_pid), 391 other_info->ai.cpu_id); 392 393 /* Print the other thread's stack trace. */ 394 stack_trace_print(other_info->stack_entries + other_skipnr, 395 other_info->num_stack_entries - other_skipnr, 396 0); 397 398 if (IS_ENABLED(CONFIG_KCSAN_VERBOSE)) 399 print_verbose_info(other_info->task); 400 401 pr_err("\n"); 402 pr_err("%s to 0x%px of %zu bytes by %s on cpu %i:\n", 403 get_access_type(ai->access_type), ai->ptr, ai->size, 404 get_thread_desc(ai->task_pid), ai->cpu_id); 405 break; 406 407 case KCSAN_REPORT_RACE_UNKNOWN_ORIGIN: 408 pr_err("race at unknown origin, with %s to 0x%px of %zu bytes by %s on cpu %i:\n", 409 get_access_type(ai->access_type), ai->ptr, ai->size, 410 get_thread_desc(ai->task_pid), ai->cpu_id); 411 break; 412 413 default: 414 BUG(); 415 } 416 /* Print stack trace of this thread. */ 417 stack_trace_print(stack_entries + skipnr, num_stack_entries - skipnr, 418 0); 419 420 if (IS_ENABLED(CONFIG_KCSAN_VERBOSE)) 421 print_verbose_info(current); 422 423 /* Print report footer. */ 424 pr_err("\n"); 425 pr_err("Reported by Kernel Concurrency Sanitizer on:\n"); 426 dump_stack_print_info(KERN_DEFAULT); 427 pr_err("==================================================================\n"); 428 429 return true; 430 } 431 432 static void release_report(unsigned long *flags, struct other_info *other_info) 433 { 434 if (other_info) 435 /* 436 * Use size to denote valid/invalid, since KCSAN entirely 437 * ignores 0-sized accesses. 438 */ 439 other_info->ai.size = 0; 440 441 raw_spin_unlock_irqrestore(&report_lock, *flags); 442 } 443 444 /* 445 * Sets @other_info->task and awaits consumption of @other_info. 446 * 447 * Precondition: report_lock is held. 448 * Postcondition: report_lock is held. 449 */ 450 static void set_other_info_task_blocking(unsigned long *flags, 451 const struct access_info *ai, 452 struct other_info *other_info) 453 { 454 /* 455 * We may be instrumenting a code-path where current->state is already 456 * something other than TASK_RUNNING. 457 */ 458 const bool is_running = current->state == TASK_RUNNING; 459 /* 460 * To avoid deadlock in case we are in an interrupt here and this is a 461 * race with a task on the same CPU (KCSAN_INTERRUPT_WATCHER), provide a 462 * timeout to ensure this works in all contexts. 463 * 464 * Await approximately the worst case delay of the reporting thread (if 465 * we are not interrupted). 466 */ 467 int timeout = max(kcsan_udelay_task, kcsan_udelay_interrupt); 468 469 other_info->task = current; 470 do { 471 if (is_running) { 472 /* 473 * Let lockdep know the real task is sleeping, to print 474 * the held locks (recall we turned lockdep off, so 475 * locking/unlocking @report_lock won't be recorded). 476 */ 477 set_current_state(TASK_UNINTERRUPTIBLE); 478 } 479 raw_spin_unlock_irqrestore(&report_lock, *flags); 480 /* 481 * We cannot call schedule() since we also cannot reliably 482 * determine if sleeping here is permitted -- see in_atomic(). 483 */ 484 485 udelay(1); 486 raw_spin_lock_irqsave(&report_lock, *flags); 487 if (timeout-- < 0) { 488 /* 489 * Abort. Reset @other_info->task to NULL, since it 490 * appears the other thread is still going to consume 491 * it. It will result in no verbose info printed for 492 * this task. 493 */ 494 other_info->task = NULL; 495 break; 496 } 497 /* 498 * If invalid, or @ptr nor @current matches, then @other_info 499 * has been consumed and we may continue. If not, retry. 500 */ 501 } while (other_info->ai.size && other_info->ai.ptr == ai->ptr && 502 other_info->task == current); 503 if (is_running) 504 set_current_state(TASK_RUNNING); 505 } 506 507 /* Populate @other_info; requires that the provided @other_info not in use. */ 508 static void prepare_report_producer(unsigned long *flags, 509 const struct access_info *ai, 510 struct other_info *other_info) 511 { 512 raw_spin_lock_irqsave(&report_lock, *flags); 513 514 /* 515 * The same @other_infos entry cannot be used concurrently, because 516 * there is a one-to-one mapping to watchpoint slots (@watchpoints in 517 * core.c), and a watchpoint is only released for reuse after reporting 518 * is done by the consumer of @other_info. Therefore, it is impossible 519 * for another concurrent prepare_report_producer() to set the same 520 * @other_info, and are guaranteed exclusivity for the @other_infos 521 * entry pointed to by @other_info. 522 * 523 * To check this property holds, size should never be non-zero here, 524 * because every consumer of struct other_info resets size to 0 in 525 * release_report(). 526 */ 527 WARN_ON(other_info->ai.size); 528 529 other_info->ai = *ai; 530 other_info->num_stack_entries = stack_trace_save(other_info->stack_entries, NUM_STACK_ENTRIES, 2); 531 532 if (IS_ENABLED(CONFIG_KCSAN_VERBOSE)) 533 set_other_info_task_blocking(flags, ai, other_info); 534 535 raw_spin_unlock_irqrestore(&report_lock, *flags); 536 } 537 538 /* Awaits producer to fill @other_info and then returns. */ 539 static bool prepare_report_consumer(unsigned long *flags, 540 const struct access_info *ai, 541 struct other_info *other_info) 542 { 543 544 raw_spin_lock_irqsave(&report_lock, *flags); 545 while (!other_info->ai.size) { /* Await valid @other_info. */ 546 raw_spin_unlock_irqrestore(&report_lock, *flags); 547 cpu_relax(); 548 raw_spin_lock_irqsave(&report_lock, *flags); 549 } 550 551 /* Should always have a matching access based on watchpoint encoding. */ 552 if (WARN_ON(!matching_access((unsigned long)other_info->ai.ptr & WATCHPOINT_ADDR_MASK, other_info->ai.size, 553 (unsigned long)ai->ptr & WATCHPOINT_ADDR_MASK, ai->size))) 554 goto discard; 555 556 if (!matching_access((unsigned long)other_info->ai.ptr, other_info->ai.size, 557 (unsigned long)ai->ptr, ai->size)) { 558 /* 559 * If the actual accesses to not match, this was a false 560 * positive due to watchpoint encoding. 561 */ 562 atomic_long_inc(&kcsan_counters[KCSAN_COUNTER_ENCODING_FALSE_POSITIVES]); 563 goto discard; 564 } 565 566 return true; 567 568 discard: 569 release_report(flags, other_info); 570 return false; 571 } 572 573 /* 574 * Depending on the report type either sets @other_info and returns false, or 575 * awaits @other_info and returns true. If @other_info is not required for the 576 * report type, simply acquires @report_lock and returns true. 577 */ 578 static noinline bool prepare_report(unsigned long *flags, 579 enum kcsan_report_type type, 580 const struct access_info *ai, 581 struct other_info *other_info) 582 { 583 switch (type) { 584 case KCSAN_REPORT_CONSUMED_WATCHPOINT: 585 prepare_report_producer(flags, ai, other_info); 586 return false; 587 case KCSAN_REPORT_RACE_SIGNAL: 588 return prepare_report_consumer(flags, ai, other_info); 589 default: 590 /* @other_info not required; just acquire @report_lock. */ 591 raw_spin_lock_irqsave(&report_lock, *flags); 592 return true; 593 } 594 } 595 596 void kcsan_report(const volatile void *ptr, size_t size, int access_type, 597 enum kcsan_value_change value_change, 598 enum kcsan_report_type type, int watchpoint_idx) 599 { 600 unsigned long flags = 0; 601 const struct access_info ai = { 602 .ptr = ptr, 603 .size = size, 604 .access_type = access_type, 605 .task_pid = in_task() ? task_pid_nr(current) : -1, 606 .cpu_id = raw_smp_processor_id() 607 }; 608 struct other_info *other_info = type == KCSAN_REPORT_RACE_UNKNOWN_ORIGIN 609 ? NULL : &other_infos[watchpoint_idx]; 610 611 kcsan_disable_current(); 612 if (WARN_ON(watchpoint_idx < 0 || watchpoint_idx >= ARRAY_SIZE(other_infos))) 613 goto out; 614 615 /* 616 * Because we may generate reports when we're in scheduler code, the use 617 * of printk() could deadlock. Until such time that all printing code 618 * called in print_report() is scheduler-safe, accept the risk, and just 619 * get our message out. As such, also disable lockdep to hide the 620 * warning, and avoid disabling lockdep for the rest of the kernel. 621 */ 622 lockdep_off(); 623 624 if (prepare_report(&flags, type, &ai, other_info)) { 625 /* 626 * Never report if value_change is FALSE, only if we it is 627 * either TRUE or MAYBE. In case of MAYBE, further filtering may 628 * be done once we know the full stack trace in print_report(). 629 */ 630 bool reported = value_change != KCSAN_VALUE_CHANGE_FALSE && 631 print_report(value_change, type, &ai, other_info); 632 633 if (reported && panic_on_warn) 634 panic("panic_on_warn set ...\n"); 635 636 release_report(&flags, other_info); 637 } 638 639 lockdep_on(); 640 out: 641 kcsan_enable_current(); 642 } 643