1 // SPDX-License-Identifier: GPL-2.0 2 3 #include <linux/debug_locks.h> 4 #include <linux/delay.h> 5 #include <linux/jiffies.h> 6 #include <linux/kernel.h> 7 #include <linux/lockdep.h> 8 #include <linux/preempt.h> 9 #include <linux/printk.h> 10 #include <linux/sched.h> 11 #include <linux/spinlock.h> 12 #include <linux/stacktrace.h> 13 14 #include "kcsan.h" 15 #include "encoding.h" 16 17 /* 18 * Max. number of stack entries to show in the report. 19 */ 20 #define NUM_STACK_ENTRIES 64 21 22 /* Common access info. */ 23 struct access_info { 24 const volatile void *ptr; 25 size_t size; 26 int access_type; 27 int task_pid; 28 int cpu_id; 29 }; 30 31 /* 32 * Other thread info: communicated from other racing thread to thread that set 33 * up the watchpoint, which then prints the complete report atomically. 34 */ 35 struct other_info { 36 struct access_info ai; 37 unsigned long stack_entries[NUM_STACK_ENTRIES]; 38 int num_stack_entries; 39 40 /* 41 * Optionally pass @current. Typically we do not need to pass @current 42 * via @other_info since just @task_pid is sufficient. Passing @current 43 * has additional overhead. 44 * 45 * To safely pass @current, we must either use get_task_struct/ 46 * put_task_struct, or stall the thread that populated @other_info. 47 * 48 * We cannot rely on get_task_struct/put_task_struct in case 49 * release_report() races with a task being released, and would have to 50 * free it in release_report(). This may result in deadlock if we want 51 * to use KCSAN on the allocators. 52 * 53 * Since we also want to reliably print held locks for 54 * CONFIG_KCSAN_VERBOSE, the current implementation stalls the thread 55 * that populated @other_info until it has been consumed. 56 */ 57 struct task_struct *task; 58 }; 59 60 /* 61 * To never block any producers of struct other_info, we need as many elements 62 * as we have watchpoints (upper bound on concurrent races to report). 63 */ 64 static struct other_info other_infos[CONFIG_KCSAN_NUM_WATCHPOINTS + NUM_SLOTS-1]; 65 66 /* 67 * Information about reported races; used to rate limit reporting. 68 */ 69 struct report_time { 70 /* 71 * The last time the race was reported. 72 */ 73 unsigned long time; 74 75 /* 76 * The frames of the 2 threads; if only 1 thread is known, one frame 77 * will be 0. 78 */ 79 unsigned long frame1; 80 unsigned long frame2; 81 }; 82 83 /* 84 * Since we also want to be able to debug allocators with KCSAN, to avoid 85 * deadlock, report_times cannot be dynamically resized with krealloc in 86 * rate_limit_report. 87 * 88 * Therefore, we use a fixed-size array, which at most will occupy a page. This 89 * still adequately rate limits reports, assuming that a) number of unique data 90 * races is not excessive, and b) occurrence of unique races within the 91 * same time window is limited. 92 */ 93 #define REPORT_TIMES_MAX (PAGE_SIZE / sizeof(struct report_time)) 94 #define REPORT_TIMES_SIZE \ 95 (CONFIG_KCSAN_REPORT_ONCE_IN_MS > REPORT_TIMES_MAX ? \ 96 REPORT_TIMES_MAX : \ 97 CONFIG_KCSAN_REPORT_ONCE_IN_MS) 98 static struct report_time report_times[REPORT_TIMES_SIZE]; 99 100 /* 101 * Spinlock serializing report generation, and access to @other_infos. Although 102 * it could make sense to have a finer-grained locking story for @other_infos, 103 * report generation needs to be serialized either way, so not much is gained. 104 */ 105 static DEFINE_RAW_SPINLOCK(report_lock); 106 107 /* 108 * Checks if the race identified by thread frames frame1 and frame2 has 109 * been reported since (now - KCSAN_REPORT_ONCE_IN_MS). 110 */ 111 static bool rate_limit_report(unsigned long frame1, unsigned long frame2) 112 { 113 struct report_time *use_entry = &report_times[0]; 114 unsigned long invalid_before; 115 int i; 116 117 BUILD_BUG_ON(CONFIG_KCSAN_REPORT_ONCE_IN_MS != 0 && REPORT_TIMES_SIZE == 0); 118 119 if (CONFIG_KCSAN_REPORT_ONCE_IN_MS == 0) 120 return false; 121 122 invalid_before = jiffies - msecs_to_jiffies(CONFIG_KCSAN_REPORT_ONCE_IN_MS); 123 124 /* Check if a matching race report exists. */ 125 for (i = 0; i < REPORT_TIMES_SIZE; ++i) { 126 struct report_time *rt = &report_times[i]; 127 128 /* 129 * Must always select an entry for use to store info as we 130 * cannot resize report_times; at the end of the scan, use_entry 131 * will be the oldest entry, which ideally also happened before 132 * KCSAN_REPORT_ONCE_IN_MS ago. 133 */ 134 if (time_before(rt->time, use_entry->time)) 135 use_entry = rt; 136 137 /* 138 * Initially, no need to check any further as this entry as well 139 * as following entries have never been used. 140 */ 141 if (rt->time == 0) 142 break; 143 144 /* Check if entry expired. */ 145 if (time_before(rt->time, invalid_before)) 146 continue; /* before KCSAN_REPORT_ONCE_IN_MS ago */ 147 148 /* Reported recently, check if race matches. */ 149 if ((rt->frame1 == frame1 && rt->frame2 == frame2) || 150 (rt->frame1 == frame2 && rt->frame2 == frame1)) 151 return true; 152 } 153 154 use_entry->time = jiffies; 155 use_entry->frame1 = frame1; 156 use_entry->frame2 = frame2; 157 return false; 158 } 159 160 /* 161 * Special rules to skip reporting. 162 */ 163 static bool 164 skip_report(enum kcsan_value_change value_change, unsigned long top_frame) 165 { 166 /* Should never get here if value_change==FALSE. */ 167 WARN_ON_ONCE(value_change == KCSAN_VALUE_CHANGE_FALSE); 168 169 /* 170 * The first call to skip_report always has value_change==TRUE, since we 171 * cannot know the value written of an instrumented access. For the 2nd 172 * call there are 6 cases with CONFIG_KCSAN_REPORT_VALUE_CHANGE_ONLY: 173 * 174 * 1. read watchpoint, conflicting write (value_change==TRUE): report; 175 * 2. read watchpoint, conflicting write (value_change==MAYBE): skip; 176 * 3. write watchpoint, conflicting write (value_change==TRUE): report; 177 * 4. write watchpoint, conflicting write (value_change==MAYBE): skip; 178 * 5. write watchpoint, conflicting read (value_change==MAYBE): skip; 179 * 6. write watchpoint, conflicting read (value_change==TRUE): report; 180 * 181 * Cases 1-4 are intuitive and expected; case 5 ensures we do not report 182 * data races where the write may have rewritten the same value; case 6 183 * is possible either if the size is larger than what we check value 184 * changes for or the access type is KCSAN_ACCESS_ASSERT. 185 */ 186 if (IS_ENABLED(CONFIG_KCSAN_REPORT_VALUE_CHANGE_ONLY) && 187 value_change == KCSAN_VALUE_CHANGE_MAYBE) { 188 /* 189 * The access is a write, but the data value did not change. 190 * 191 * We opt-out of this filter for certain functions at request of 192 * maintainers. 193 */ 194 char buf[64]; 195 int len = scnprintf(buf, sizeof(buf), "%ps", (void *)top_frame); 196 197 if (!strnstr(buf, "rcu_", len) && 198 !strnstr(buf, "_rcu", len) && 199 !strnstr(buf, "_srcu", len)) 200 return true; 201 } 202 203 return kcsan_skip_report_debugfs(top_frame); 204 } 205 206 static const char *get_access_type(int type) 207 { 208 if (type & KCSAN_ACCESS_ASSERT) { 209 if (type & KCSAN_ACCESS_SCOPED) { 210 if (type & KCSAN_ACCESS_WRITE) 211 return "assert no accesses (scoped)"; 212 else 213 return "assert no writes (scoped)"; 214 } else { 215 if (type & KCSAN_ACCESS_WRITE) 216 return "assert no accesses"; 217 else 218 return "assert no writes"; 219 } 220 } 221 222 switch (type) { 223 case 0: 224 return "read"; 225 case KCSAN_ACCESS_ATOMIC: 226 return "read (marked)"; 227 case KCSAN_ACCESS_WRITE: 228 return "write"; 229 case KCSAN_ACCESS_WRITE | KCSAN_ACCESS_ATOMIC: 230 return "write (marked)"; 231 case KCSAN_ACCESS_SCOPED: 232 return "read (scoped)"; 233 case KCSAN_ACCESS_SCOPED | KCSAN_ACCESS_ATOMIC: 234 return "read (marked, scoped)"; 235 case KCSAN_ACCESS_SCOPED | KCSAN_ACCESS_WRITE: 236 return "write (scoped)"; 237 case KCSAN_ACCESS_SCOPED | KCSAN_ACCESS_WRITE | KCSAN_ACCESS_ATOMIC: 238 return "write (marked, scoped)"; 239 default: 240 BUG(); 241 } 242 } 243 244 static const char *get_bug_type(int type) 245 { 246 return (type & KCSAN_ACCESS_ASSERT) != 0 ? "assert: race" : "data-race"; 247 } 248 249 /* Return thread description: in task or interrupt. */ 250 static const char *get_thread_desc(int task_id) 251 { 252 if (task_id != -1) { 253 static char buf[32]; /* safe: protected by report_lock */ 254 255 snprintf(buf, sizeof(buf), "task %i", task_id); 256 return buf; 257 } 258 return "interrupt"; 259 } 260 261 /* Helper to skip KCSAN-related functions in stack-trace. */ 262 static int get_stack_skipnr(const unsigned long stack_entries[], int num_entries) 263 { 264 char buf[64]; 265 char *cur; 266 int len, skip; 267 268 for (skip = 0; skip < num_entries; ++skip) { 269 len = scnprintf(buf, sizeof(buf), "%ps", (void *)stack_entries[skip]); 270 271 /* Never show tsan_* or {read,write}_once_size. */ 272 if (strnstr(buf, "tsan_", len) || 273 strnstr(buf, "_once_size", len)) 274 continue; 275 276 cur = strnstr(buf, "kcsan_", len); 277 if (cur) { 278 cur += sizeof("kcsan_") - 1; 279 if (strncmp(cur, "test", sizeof("test") - 1)) 280 continue; /* KCSAN runtime function. */ 281 /* KCSAN related test. */ 282 } 283 284 /* 285 * No match for runtime functions -- @skip entries to skip to 286 * get to first frame of interest. 287 */ 288 break; 289 } 290 291 return skip; 292 } 293 294 /* Compares symbolized strings of addr1 and addr2. */ 295 static int sym_strcmp(void *addr1, void *addr2) 296 { 297 char buf1[64]; 298 char buf2[64]; 299 300 snprintf(buf1, sizeof(buf1), "%pS", addr1); 301 snprintf(buf2, sizeof(buf2), "%pS", addr2); 302 303 return strncmp(buf1, buf2, sizeof(buf1)); 304 } 305 306 static void print_verbose_info(struct task_struct *task) 307 { 308 if (!task) 309 return; 310 311 /* Restore IRQ state trace for printing. */ 312 kcsan_restore_irqtrace(task); 313 314 pr_err("\n"); 315 debug_show_held_locks(task); 316 print_irqtrace_events(task); 317 } 318 319 /* 320 * Returns true if a report was generated, false otherwise. 321 */ 322 static bool print_report(enum kcsan_value_change value_change, 323 enum kcsan_report_type type, 324 const struct access_info *ai, 325 const struct other_info *other_info) 326 { 327 unsigned long stack_entries[NUM_STACK_ENTRIES] = { 0 }; 328 int num_stack_entries = stack_trace_save(stack_entries, NUM_STACK_ENTRIES, 1); 329 int skipnr = get_stack_skipnr(stack_entries, num_stack_entries); 330 unsigned long this_frame = stack_entries[skipnr]; 331 unsigned long other_frame = 0; 332 int other_skipnr = 0; /* silence uninit warnings */ 333 334 /* 335 * Must check report filter rules before starting to print. 336 */ 337 if (skip_report(KCSAN_VALUE_CHANGE_TRUE, stack_entries[skipnr])) 338 return false; 339 340 if (type == KCSAN_REPORT_RACE_SIGNAL) { 341 other_skipnr = get_stack_skipnr(other_info->stack_entries, 342 other_info->num_stack_entries); 343 other_frame = other_info->stack_entries[other_skipnr]; 344 345 /* @value_change is only known for the other thread */ 346 if (skip_report(value_change, other_frame)) 347 return false; 348 } 349 350 if (rate_limit_report(this_frame, other_frame)) 351 return false; 352 353 /* Print report header. */ 354 pr_err("==================================================================\n"); 355 switch (type) { 356 case KCSAN_REPORT_RACE_SIGNAL: { 357 int cmp; 358 359 /* 360 * Order functions lexographically for consistent bug titles. 361 * Do not print offset of functions to keep title short. 362 */ 363 cmp = sym_strcmp((void *)other_frame, (void *)this_frame); 364 pr_err("BUG: KCSAN: %s in %ps / %ps\n", 365 get_bug_type(ai->access_type | other_info->ai.access_type), 366 (void *)(cmp < 0 ? other_frame : this_frame), 367 (void *)(cmp < 0 ? this_frame : other_frame)); 368 } break; 369 370 case KCSAN_REPORT_RACE_UNKNOWN_ORIGIN: 371 pr_err("BUG: KCSAN: %s in %pS\n", get_bug_type(ai->access_type), 372 (void *)this_frame); 373 break; 374 375 default: 376 BUG(); 377 } 378 379 pr_err("\n"); 380 381 /* Print information about the racing accesses. */ 382 switch (type) { 383 case KCSAN_REPORT_RACE_SIGNAL: 384 pr_err("%s to 0x%px of %zu bytes by %s on cpu %i:\n", 385 get_access_type(other_info->ai.access_type), other_info->ai.ptr, 386 other_info->ai.size, get_thread_desc(other_info->ai.task_pid), 387 other_info->ai.cpu_id); 388 389 /* Print the other thread's stack trace. */ 390 stack_trace_print(other_info->stack_entries + other_skipnr, 391 other_info->num_stack_entries - other_skipnr, 392 0); 393 394 if (IS_ENABLED(CONFIG_KCSAN_VERBOSE)) 395 print_verbose_info(other_info->task); 396 397 pr_err("\n"); 398 pr_err("%s to 0x%px of %zu bytes by %s on cpu %i:\n", 399 get_access_type(ai->access_type), ai->ptr, ai->size, 400 get_thread_desc(ai->task_pid), ai->cpu_id); 401 break; 402 403 case KCSAN_REPORT_RACE_UNKNOWN_ORIGIN: 404 pr_err("race at unknown origin, with %s to 0x%px of %zu bytes by %s on cpu %i:\n", 405 get_access_type(ai->access_type), ai->ptr, ai->size, 406 get_thread_desc(ai->task_pid), ai->cpu_id); 407 break; 408 409 default: 410 BUG(); 411 } 412 /* Print stack trace of this thread. */ 413 stack_trace_print(stack_entries + skipnr, num_stack_entries - skipnr, 414 0); 415 416 if (IS_ENABLED(CONFIG_KCSAN_VERBOSE)) 417 print_verbose_info(current); 418 419 /* Print report footer. */ 420 pr_err("\n"); 421 pr_err("Reported by Kernel Concurrency Sanitizer on:\n"); 422 dump_stack_print_info(KERN_DEFAULT); 423 pr_err("==================================================================\n"); 424 425 return true; 426 } 427 428 static void release_report(unsigned long *flags, struct other_info *other_info) 429 { 430 if (other_info) 431 /* 432 * Use size to denote valid/invalid, since KCSAN entirely 433 * ignores 0-sized accesses. 434 */ 435 other_info->ai.size = 0; 436 437 raw_spin_unlock_irqrestore(&report_lock, *flags); 438 } 439 440 /* 441 * Sets @other_info->task and awaits consumption of @other_info. 442 * 443 * Precondition: report_lock is held. 444 * Postcondition: report_lock is held. 445 */ 446 static void set_other_info_task_blocking(unsigned long *flags, 447 const struct access_info *ai, 448 struct other_info *other_info) 449 { 450 /* 451 * We may be instrumenting a code-path where current->state is already 452 * something other than TASK_RUNNING. 453 */ 454 const bool is_running = current->state == TASK_RUNNING; 455 /* 456 * To avoid deadlock in case we are in an interrupt here and this is a 457 * race with a task on the same CPU (KCSAN_INTERRUPT_WATCHER), provide a 458 * timeout to ensure this works in all contexts. 459 * 460 * Await approximately the worst case delay of the reporting thread (if 461 * we are not interrupted). 462 */ 463 int timeout = max(kcsan_udelay_task, kcsan_udelay_interrupt); 464 465 other_info->task = current; 466 do { 467 if (is_running) { 468 /* 469 * Let lockdep know the real task is sleeping, to print 470 * the held locks (recall we turned lockdep off, so 471 * locking/unlocking @report_lock won't be recorded). 472 */ 473 set_current_state(TASK_UNINTERRUPTIBLE); 474 } 475 raw_spin_unlock_irqrestore(&report_lock, *flags); 476 /* 477 * We cannot call schedule() since we also cannot reliably 478 * determine if sleeping here is permitted -- see in_atomic(). 479 */ 480 481 udelay(1); 482 raw_spin_lock_irqsave(&report_lock, *flags); 483 if (timeout-- < 0) { 484 /* 485 * Abort. Reset @other_info->task to NULL, since it 486 * appears the other thread is still going to consume 487 * it. It will result in no verbose info printed for 488 * this task. 489 */ 490 other_info->task = NULL; 491 break; 492 } 493 /* 494 * If invalid, or @ptr nor @current matches, then @other_info 495 * has been consumed and we may continue. If not, retry. 496 */ 497 } while (other_info->ai.size && other_info->ai.ptr == ai->ptr && 498 other_info->task == current); 499 if (is_running) 500 set_current_state(TASK_RUNNING); 501 } 502 503 /* Populate @other_info; requires that the provided @other_info not in use. */ 504 static void prepare_report_producer(unsigned long *flags, 505 const struct access_info *ai, 506 struct other_info *other_info) 507 { 508 raw_spin_lock_irqsave(&report_lock, *flags); 509 510 /* 511 * The same @other_infos entry cannot be used concurrently, because 512 * there is a one-to-one mapping to watchpoint slots (@watchpoints in 513 * core.c), and a watchpoint is only released for reuse after reporting 514 * is done by the consumer of @other_info. Therefore, it is impossible 515 * for another concurrent prepare_report_producer() to set the same 516 * @other_info, and are guaranteed exclusivity for the @other_infos 517 * entry pointed to by @other_info. 518 * 519 * To check this property holds, size should never be non-zero here, 520 * because every consumer of struct other_info resets size to 0 in 521 * release_report(). 522 */ 523 WARN_ON(other_info->ai.size); 524 525 other_info->ai = *ai; 526 other_info->num_stack_entries = stack_trace_save(other_info->stack_entries, NUM_STACK_ENTRIES, 2); 527 528 if (IS_ENABLED(CONFIG_KCSAN_VERBOSE)) 529 set_other_info_task_blocking(flags, ai, other_info); 530 531 raw_spin_unlock_irqrestore(&report_lock, *flags); 532 } 533 534 /* Awaits producer to fill @other_info and then returns. */ 535 static bool prepare_report_consumer(unsigned long *flags, 536 const struct access_info *ai, 537 struct other_info *other_info) 538 { 539 540 raw_spin_lock_irqsave(&report_lock, *flags); 541 while (!other_info->ai.size) { /* Await valid @other_info. */ 542 raw_spin_unlock_irqrestore(&report_lock, *flags); 543 cpu_relax(); 544 raw_spin_lock_irqsave(&report_lock, *flags); 545 } 546 547 /* Should always have a matching access based on watchpoint encoding. */ 548 if (WARN_ON(!matching_access((unsigned long)other_info->ai.ptr & WATCHPOINT_ADDR_MASK, other_info->ai.size, 549 (unsigned long)ai->ptr & WATCHPOINT_ADDR_MASK, ai->size))) 550 goto discard; 551 552 if (!matching_access((unsigned long)other_info->ai.ptr, other_info->ai.size, 553 (unsigned long)ai->ptr, ai->size)) { 554 /* 555 * If the actual accesses to not match, this was a false 556 * positive due to watchpoint encoding. 557 */ 558 kcsan_counter_inc(KCSAN_COUNTER_ENCODING_FALSE_POSITIVES); 559 goto discard; 560 } 561 562 return true; 563 564 discard: 565 release_report(flags, other_info); 566 return false; 567 } 568 569 /* 570 * Depending on the report type either sets @other_info and returns false, or 571 * awaits @other_info and returns true. If @other_info is not required for the 572 * report type, simply acquires @report_lock and returns true. 573 */ 574 static noinline bool prepare_report(unsigned long *flags, 575 enum kcsan_report_type type, 576 const struct access_info *ai, 577 struct other_info *other_info) 578 { 579 switch (type) { 580 case KCSAN_REPORT_CONSUMED_WATCHPOINT: 581 prepare_report_producer(flags, ai, other_info); 582 return false; 583 case KCSAN_REPORT_RACE_SIGNAL: 584 return prepare_report_consumer(flags, ai, other_info); 585 default: 586 /* @other_info not required; just acquire @report_lock. */ 587 raw_spin_lock_irqsave(&report_lock, *flags); 588 return true; 589 } 590 } 591 592 void kcsan_report(const volatile void *ptr, size_t size, int access_type, 593 enum kcsan_value_change value_change, 594 enum kcsan_report_type type, int watchpoint_idx) 595 { 596 unsigned long flags = 0; 597 const struct access_info ai = { 598 .ptr = ptr, 599 .size = size, 600 .access_type = access_type, 601 .task_pid = in_task() ? task_pid_nr(current) : -1, 602 .cpu_id = raw_smp_processor_id() 603 }; 604 struct other_info *other_info = type == KCSAN_REPORT_RACE_UNKNOWN_ORIGIN 605 ? NULL : &other_infos[watchpoint_idx]; 606 607 kcsan_disable_current(); 608 if (WARN_ON(watchpoint_idx < 0 || watchpoint_idx >= ARRAY_SIZE(other_infos))) 609 goto out; 610 611 /* 612 * Because we may generate reports when we're in scheduler code, the use 613 * of printk() could deadlock. Until such time that all printing code 614 * called in print_report() is scheduler-safe, accept the risk, and just 615 * get our message out. As such, also disable lockdep to hide the 616 * warning, and avoid disabling lockdep for the rest of the kernel. 617 */ 618 lockdep_off(); 619 620 if (prepare_report(&flags, type, &ai, other_info)) { 621 /* 622 * Never report if value_change is FALSE, only if we it is 623 * either TRUE or MAYBE. In case of MAYBE, further filtering may 624 * be done once we know the full stack trace in print_report(). 625 */ 626 bool reported = value_change != KCSAN_VALUE_CHANGE_FALSE && 627 print_report(value_change, type, &ai, other_info); 628 629 if (reported && panic_on_warn) 630 panic("panic_on_warn set ...\n"); 631 632 release_report(&flags, other_info); 633 } 634 635 lockdep_on(); 636 out: 637 kcsan_enable_current(); 638 } 639