1 /* 2 * Detect hard and soft lockups on a system 3 * 4 * started by Don Zickus, Copyright (C) 2010 Red Hat, Inc. 5 * 6 * Note: Most of this code is borrowed heavily from the original softlockup 7 * detector, so thanks to Ingo for the initial implementation. 8 * Some chunks also taken from the old x86-specific nmi watchdog code, thanks 9 * to those contributors as well. 10 */ 11 12 #define pr_fmt(fmt) "NMI watchdog: " fmt 13 14 #include <linux/mm.h> 15 #include <linux/cpu.h> 16 #include <linux/nmi.h> 17 #include <linux/init.h> 18 #include <linux/module.h> 19 #include <linux/sysctl.h> 20 #include <linux/smpboot.h> 21 #include <linux/sched/rt.h> 22 #include <linux/tick.h> 23 #include <linux/workqueue.h> 24 25 #include <asm/irq_regs.h> 26 #include <linux/kvm_para.h> 27 #include <linux/kthread.h> 28 29 static DEFINE_MUTEX(watchdog_proc_mutex); 30 31 #if defined(CONFIG_HAVE_NMI_WATCHDOG) || defined(CONFIG_HARDLOCKUP_DETECTOR) 32 unsigned long __read_mostly watchdog_enabled = SOFT_WATCHDOG_ENABLED|NMI_WATCHDOG_ENABLED; 33 #else 34 unsigned long __read_mostly watchdog_enabled = SOFT_WATCHDOG_ENABLED; 35 #endif 36 int __read_mostly nmi_watchdog_enabled; 37 int __read_mostly soft_watchdog_enabled; 38 int __read_mostly watchdog_user_enabled; 39 int __read_mostly watchdog_thresh = 10; 40 41 #ifdef CONFIG_SMP 42 int __read_mostly sysctl_softlockup_all_cpu_backtrace; 43 int __read_mostly sysctl_hardlockup_all_cpu_backtrace; 44 #endif 45 static struct cpumask watchdog_cpumask __read_mostly; 46 unsigned long *watchdog_cpumask_bits = cpumask_bits(&watchdog_cpumask); 47 48 /* Helper for online, unparked cpus. */ 49 #define for_each_watchdog_cpu(cpu) \ 50 for_each_cpu_and((cpu), cpu_online_mask, &watchdog_cpumask) 51 52 /* 53 * The 'watchdog_running' variable is set to 1 when the watchdog threads 54 * are registered/started and is set to 0 when the watchdog threads are 55 * unregistered/stopped, so it is an indicator whether the threads exist. 56 */ 57 static int __read_mostly watchdog_running; 58 /* 59 * If a subsystem has a need to deactivate the watchdog temporarily, it 60 * can use the suspend/resume interface to achieve this. The content of 61 * the 'watchdog_suspended' variable reflects this state. Existing threads 62 * are parked/unparked by the lockup_detector_{suspend|resume} functions 63 * (see comment blocks pertaining to those functions for further details). 64 * 65 * 'watchdog_suspended' also prevents threads from being registered/started 66 * or unregistered/stopped via parameters in /proc/sys/kernel, so the state 67 * of 'watchdog_running' cannot change while the watchdog is deactivated 68 * temporarily (see related code in 'proc' handlers). 69 */ 70 static int __read_mostly watchdog_suspended; 71 72 static u64 __read_mostly sample_period; 73 74 static DEFINE_PER_CPU(unsigned long, watchdog_touch_ts); 75 static DEFINE_PER_CPU(struct task_struct *, softlockup_watchdog); 76 static DEFINE_PER_CPU(struct hrtimer, watchdog_hrtimer); 77 static DEFINE_PER_CPU(bool, softlockup_touch_sync); 78 static DEFINE_PER_CPU(bool, soft_watchdog_warn); 79 static DEFINE_PER_CPU(unsigned long, hrtimer_interrupts); 80 static DEFINE_PER_CPU(unsigned long, soft_lockup_hrtimer_cnt); 81 static DEFINE_PER_CPU(struct task_struct *, softlockup_task_ptr_saved); 82 static DEFINE_PER_CPU(unsigned long, hrtimer_interrupts_saved); 83 static unsigned long soft_lockup_nmi_warn; 84 85 unsigned int __read_mostly softlockup_panic = 86 CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC_VALUE; 87 88 static int __init softlockup_panic_setup(char *str) 89 { 90 softlockup_panic = simple_strtoul(str, NULL, 0); 91 92 return 1; 93 } 94 __setup("softlockup_panic=", softlockup_panic_setup); 95 96 static int __init nowatchdog_setup(char *str) 97 { 98 watchdog_enabled = 0; 99 return 1; 100 } 101 __setup("nowatchdog", nowatchdog_setup); 102 103 static int __init nosoftlockup_setup(char *str) 104 { 105 watchdog_enabled &= ~SOFT_WATCHDOG_ENABLED; 106 return 1; 107 } 108 __setup("nosoftlockup", nosoftlockup_setup); 109 110 #ifdef CONFIG_SMP 111 static int __init softlockup_all_cpu_backtrace_setup(char *str) 112 { 113 sysctl_softlockup_all_cpu_backtrace = 114 !!simple_strtol(str, NULL, 0); 115 return 1; 116 } 117 __setup("softlockup_all_cpu_backtrace=", softlockup_all_cpu_backtrace_setup); 118 static int __init hardlockup_all_cpu_backtrace_setup(char *str) 119 { 120 sysctl_hardlockup_all_cpu_backtrace = 121 !!simple_strtol(str, NULL, 0); 122 return 1; 123 } 124 __setup("hardlockup_all_cpu_backtrace=", hardlockup_all_cpu_backtrace_setup); 125 #endif 126 127 /* 128 * Hard-lockup warnings should be triggered after just a few seconds. Soft- 129 * lockups can have false positives under extreme conditions. So we generally 130 * want a higher threshold for soft lockups than for hard lockups. So we couple 131 * the thresholds with a factor: we make the soft threshold twice the amount of 132 * time the hard threshold is. 133 */ 134 static int get_softlockup_thresh(void) 135 { 136 return watchdog_thresh * 2; 137 } 138 139 /* 140 * Returns seconds, approximately. We don't need nanosecond 141 * resolution, and we don't need to waste time with a big divide when 142 * 2^30ns == 1.074s. 143 */ 144 static unsigned long get_timestamp(void) 145 { 146 return running_clock() >> 30LL; /* 2^30 ~= 10^9 */ 147 } 148 149 static void set_sample_period(void) 150 { 151 /* 152 * convert watchdog_thresh from seconds to ns 153 * the divide by 5 is to give hrtimer several chances (two 154 * or three with the current relation between the soft 155 * and hard thresholds) to increment before the 156 * hardlockup detector generates a warning 157 */ 158 sample_period = get_softlockup_thresh() * ((u64)NSEC_PER_SEC / 5); 159 } 160 161 /* Commands for resetting the watchdog */ 162 static void __touch_watchdog(void) 163 { 164 __this_cpu_write(watchdog_touch_ts, get_timestamp()); 165 } 166 167 /** 168 * touch_softlockup_watchdog_sched - touch watchdog on scheduler stalls 169 * 170 * Call when the scheduler may have stalled for legitimate reasons 171 * preventing the watchdog task from executing - e.g. the scheduler 172 * entering idle state. This should only be used for scheduler events. 173 * Use touch_softlockup_watchdog() for everything else. 174 */ 175 void touch_softlockup_watchdog_sched(void) 176 { 177 /* 178 * Preemption can be enabled. It doesn't matter which CPU's timestamp 179 * gets zeroed here, so use the raw_ operation. 180 */ 181 raw_cpu_write(watchdog_touch_ts, 0); 182 } 183 184 void touch_softlockup_watchdog(void) 185 { 186 touch_softlockup_watchdog_sched(); 187 wq_watchdog_touch(raw_smp_processor_id()); 188 } 189 EXPORT_SYMBOL(touch_softlockup_watchdog); 190 191 void touch_all_softlockup_watchdogs(void) 192 { 193 int cpu; 194 195 /* 196 * this is done lockless 197 * do we care if a 0 races with a timestamp? 198 * all it means is the softlock check starts one cycle later 199 */ 200 for_each_watchdog_cpu(cpu) 201 per_cpu(watchdog_touch_ts, cpu) = 0; 202 wq_watchdog_touch(-1); 203 } 204 205 void touch_softlockup_watchdog_sync(void) 206 { 207 __this_cpu_write(softlockup_touch_sync, true); 208 __this_cpu_write(watchdog_touch_ts, 0); 209 } 210 211 /* watchdog detector functions */ 212 bool is_hardlockup(void) 213 { 214 unsigned long hrint = __this_cpu_read(hrtimer_interrupts); 215 216 if (__this_cpu_read(hrtimer_interrupts_saved) == hrint) 217 return true; 218 219 __this_cpu_write(hrtimer_interrupts_saved, hrint); 220 return false; 221 } 222 223 static int is_softlockup(unsigned long touch_ts) 224 { 225 unsigned long now = get_timestamp(); 226 227 if ((watchdog_enabled & SOFT_WATCHDOG_ENABLED) && watchdog_thresh){ 228 /* Warn about unreasonable delays. */ 229 if (time_after(now, touch_ts + get_softlockup_thresh())) 230 return now - touch_ts; 231 } 232 return 0; 233 } 234 235 static void watchdog_interrupt_count(void) 236 { 237 __this_cpu_inc(hrtimer_interrupts); 238 } 239 240 /* 241 * These two functions are mostly architecture specific 242 * defining them as weak here. 243 */ 244 int __weak watchdog_nmi_enable(unsigned int cpu) 245 { 246 return 0; 247 } 248 void __weak watchdog_nmi_disable(unsigned int cpu) 249 { 250 } 251 252 static int watchdog_enable_all_cpus(void); 253 static void watchdog_disable_all_cpus(void); 254 255 /* watchdog kicker functions */ 256 static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer) 257 { 258 unsigned long touch_ts = __this_cpu_read(watchdog_touch_ts); 259 struct pt_regs *regs = get_irq_regs(); 260 int duration; 261 int softlockup_all_cpu_backtrace = sysctl_softlockup_all_cpu_backtrace; 262 263 /* kick the hardlockup detector */ 264 watchdog_interrupt_count(); 265 266 /* kick the softlockup detector */ 267 wake_up_process(__this_cpu_read(softlockup_watchdog)); 268 269 /* .. and repeat */ 270 hrtimer_forward_now(hrtimer, ns_to_ktime(sample_period)); 271 272 if (touch_ts == 0) { 273 if (unlikely(__this_cpu_read(softlockup_touch_sync))) { 274 /* 275 * If the time stamp was touched atomically 276 * make sure the scheduler tick is up to date. 277 */ 278 __this_cpu_write(softlockup_touch_sync, false); 279 sched_clock_tick(); 280 } 281 282 /* Clear the guest paused flag on watchdog reset */ 283 kvm_check_and_clear_guest_paused(); 284 __touch_watchdog(); 285 return HRTIMER_RESTART; 286 } 287 288 /* check for a softlockup 289 * This is done by making sure a high priority task is 290 * being scheduled. The task touches the watchdog to 291 * indicate it is getting cpu time. If it hasn't then 292 * this is a good indication some task is hogging the cpu 293 */ 294 duration = is_softlockup(touch_ts); 295 if (unlikely(duration)) { 296 /* 297 * If a virtual machine is stopped by the host it can look to 298 * the watchdog like a soft lockup, check to see if the host 299 * stopped the vm before we issue the warning 300 */ 301 if (kvm_check_and_clear_guest_paused()) 302 return HRTIMER_RESTART; 303 304 /* only warn once */ 305 if (__this_cpu_read(soft_watchdog_warn) == true) { 306 /* 307 * When multiple processes are causing softlockups the 308 * softlockup detector only warns on the first one 309 * because the code relies on a full quiet cycle to 310 * re-arm. The second process prevents the quiet cycle 311 * and never gets reported. Use task pointers to detect 312 * this. 313 */ 314 if (__this_cpu_read(softlockup_task_ptr_saved) != 315 current) { 316 __this_cpu_write(soft_watchdog_warn, false); 317 __touch_watchdog(); 318 } 319 return HRTIMER_RESTART; 320 } 321 322 if (softlockup_all_cpu_backtrace) { 323 /* Prevent multiple soft-lockup reports if one cpu is already 324 * engaged in dumping cpu back traces 325 */ 326 if (test_and_set_bit(0, &soft_lockup_nmi_warn)) { 327 /* Someone else will report us. Let's give up */ 328 __this_cpu_write(soft_watchdog_warn, true); 329 return HRTIMER_RESTART; 330 } 331 } 332 333 pr_emerg("BUG: soft lockup - CPU#%d stuck for %us! [%s:%d]\n", 334 smp_processor_id(), duration, 335 current->comm, task_pid_nr(current)); 336 __this_cpu_write(softlockup_task_ptr_saved, current); 337 print_modules(); 338 print_irqtrace_events(current); 339 if (regs) 340 show_regs(regs); 341 else 342 dump_stack(); 343 344 if (softlockup_all_cpu_backtrace) { 345 /* Avoid generating two back traces for current 346 * given that one is already made above 347 */ 348 trigger_allbutself_cpu_backtrace(); 349 350 clear_bit(0, &soft_lockup_nmi_warn); 351 /* Barrier to sync with other cpus */ 352 smp_mb__after_atomic(); 353 } 354 355 add_taint(TAINT_SOFTLOCKUP, LOCKDEP_STILL_OK); 356 if (softlockup_panic) 357 panic("softlockup: hung tasks"); 358 __this_cpu_write(soft_watchdog_warn, true); 359 } else 360 __this_cpu_write(soft_watchdog_warn, false); 361 362 return HRTIMER_RESTART; 363 } 364 365 static void watchdog_set_prio(unsigned int policy, unsigned int prio) 366 { 367 struct sched_param param = { .sched_priority = prio }; 368 369 sched_setscheduler(current, policy, ¶m); 370 } 371 372 static void watchdog_enable(unsigned int cpu) 373 { 374 struct hrtimer *hrtimer = raw_cpu_ptr(&watchdog_hrtimer); 375 376 /* kick off the timer for the hardlockup detector */ 377 hrtimer_init(hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); 378 hrtimer->function = watchdog_timer_fn; 379 380 /* Enable the perf event */ 381 watchdog_nmi_enable(cpu); 382 383 /* done here because hrtimer_start can only pin to smp_processor_id() */ 384 hrtimer_start(hrtimer, ns_to_ktime(sample_period), 385 HRTIMER_MODE_REL_PINNED); 386 387 /* initialize timestamp */ 388 watchdog_set_prio(SCHED_FIFO, MAX_RT_PRIO - 1); 389 __touch_watchdog(); 390 } 391 392 static void watchdog_disable(unsigned int cpu) 393 { 394 struct hrtimer *hrtimer = raw_cpu_ptr(&watchdog_hrtimer); 395 396 watchdog_set_prio(SCHED_NORMAL, 0); 397 hrtimer_cancel(hrtimer); 398 /* disable the perf event */ 399 watchdog_nmi_disable(cpu); 400 } 401 402 static void watchdog_cleanup(unsigned int cpu, bool online) 403 { 404 watchdog_disable(cpu); 405 } 406 407 static int watchdog_should_run(unsigned int cpu) 408 { 409 return __this_cpu_read(hrtimer_interrupts) != 410 __this_cpu_read(soft_lockup_hrtimer_cnt); 411 } 412 413 /* 414 * The watchdog thread function - touches the timestamp. 415 * 416 * It only runs once every sample_period seconds (4 seconds by 417 * default) to reset the softlockup timestamp. If this gets delayed 418 * for more than 2*watchdog_thresh seconds then the debug-printout 419 * triggers in watchdog_timer_fn(). 420 */ 421 static void watchdog(unsigned int cpu) 422 { 423 __this_cpu_write(soft_lockup_hrtimer_cnt, 424 __this_cpu_read(hrtimer_interrupts)); 425 __touch_watchdog(); 426 427 /* 428 * watchdog_nmi_enable() clears the NMI_WATCHDOG_ENABLED bit in the 429 * failure path. Check for failures that can occur asynchronously - 430 * for example, when CPUs are on-lined - and shut down the hardware 431 * perf event on each CPU accordingly. 432 * 433 * The only non-obvious place this bit can be cleared is through 434 * watchdog_nmi_enable(), so a pr_info() is placed there. Placing a 435 * pr_info here would be too noisy as it would result in a message 436 * every few seconds if the hardlockup was disabled but the softlockup 437 * enabled. 438 */ 439 if (!(watchdog_enabled & NMI_WATCHDOG_ENABLED)) 440 watchdog_nmi_disable(cpu); 441 } 442 443 static struct smp_hotplug_thread watchdog_threads = { 444 .store = &softlockup_watchdog, 445 .thread_should_run = watchdog_should_run, 446 .thread_fn = watchdog, 447 .thread_comm = "watchdog/%u", 448 .setup = watchdog_enable, 449 .cleanup = watchdog_cleanup, 450 .park = watchdog_disable, 451 .unpark = watchdog_enable, 452 }; 453 454 /* 455 * park all watchdog threads that are specified in 'watchdog_cpumask' 456 * 457 * This function returns an error if kthread_park() of a watchdog thread 458 * fails. In this situation, the watchdog threads of some CPUs can already 459 * be parked and the watchdog threads of other CPUs can still be runnable. 460 * Callers are expected to handle this special condition as appropriate in 461 * their context. 462 * 463 * This function may only be called in a context that is protected against 464 * races with CPU hotplug - for example, via get_online_cpus(). 465 */ 466 static int watchdog_park_threads(void) 467 { 468 int cpu, ret = 0; 469 470 for_each_watchdog_cpu(cpu) { 471 ret = kthread_park(per_cpu(softlockup_watchdog, cpu)); 472 if (ret) 473 break; 474 } 475 476 return ret; 477 } 478 479 /* 480 * unpark all watchdog threads that are specified in 'watchdog_cpumask' 481 * 482 * This function may only be called in a context that is protected against 483 * races with CPU hotplug - for example, via get_online_cpus(). 484 */ 485 static void watchdog_unpark_threads(void) 486 { 487 int cpu; 488 489 for_each_watchdog_cpu(cpu) 490 kthread_unpark(per_cpu(softlockup_watchdog, cpu)); 491 } 492 493 /* 494 * Suspend the hard and soft lockup detector by parking the watchdog threads. 495 */ 496 int lockup_detector_suspend(void) 497 { 498 int ret = 0; 499 500 get_online_cpus(); 501 mutex_lock(&watchdog_proc_mutex); 502 /* 503 * Multiple suspend requests can be active in parallel (counted by 504 * the 'watchdog_suspended' variable). If the watchdog threads are 505 * running, the first caller takes care that they will be parked. 506 * The state of 'watchdog_running' cannot change while a suspend 507 * request is active (see related code in 'proc' handlers). 508 */ 509 if (watchdog_running && !watchdog_suspended) 510 ret = watchdog_park_threads(); 511 512 if (ret == 0) 513 watchdog_suspended++; 514 else { 515 watchdog_disable_all_cpus(); 516 pr_err("Failed to suspend lockup detectors, disabled\n"); 517 watchdog_enabled = 0; 518 } 519 520 mutex_unlock(&watchdog_proc_mutex); 521 522 return ret; 523 } 524 525 /* 526 * Resume the hard and soft lockup detector by unparking the watchdog threads. 527 */ 528 void lockup_detector_resume(void) 529 { 530 mutex_lock(&watchdog_proc_mutex); 531 532 watchdog_suspended--; 533 /* 534 * The watchdog threads are unparked if they were previously running 535 * and if there is no more active suspend request. 536 */ 537 if (watchdog_running && !watchdog_suspended) 538 watchdog_unpark_threads(); 539 540 mutex_unlock(&watchdog_proc_mutex); 541 put_online_cpus(); 542 } 543 544 static int update_watchdog_all_cpus(void) 545 { 546 int ret; 547 548 ret = watchdog_park_threads(); 549 if (ret) 550 return ret; 551 552 watchdog_unpark_threads(); 553 554 return 0; 555 } 556 557 static int watchdog_enable_all_cpus(void) 558 { 559 int err = 0; 560 561 if (!watchdog_running) { 562 err = smpboot_register_percpu_thread_cpumask(&watchdog_threads, 563 &watchdog_cpumask); 564 if (err) 565 pr_err("Failed to create watchdog threads, disabled\n"); 566 else 567 watchdog_running = 1; 568 } else { 569 /* 570 * Enable/disable the lockup detectors or 571 * change the sample period 'on the fly'. 572 */ 573 err = update_watchdog_all_cpus(); 574 575 if (err) { 576 watchdog_disable_all_cpus(); 577 pr_err("Failed to update lockup detectors, disabled\n"); 578 } 579 } 580 581 if (err) 582 watchdog_enabled = 0; 583 584 return err; 585 } 586 587 static void watchdog_disable_all_cpus(void) 588 { 589 if (watchdog_running) { 590 watchdog_running = 0; 591 smpboot_unregister_percpu_thread(&watchdog_threads); 592 } 593 } 594 595 #ifdef CONFIG_SYSCTL 596 597 /* 598 * Update the run state of the lockup detectors. 599 */ 600 static int proc_watchdog_update(void) 601 { 602 int err = 0; 603 604 /* 605 * Watchdog threads won't be started if they are already active. 606 * The 'watchdog_running' variable in watchdog_*_all_cpus() takes 607 * care of this. If those threads are already active, the sample 608 * period will be updated and the lockup detectors will be enabled 609 * or disabled 'on the fly'. 610 */ 611 if (watchdog_enabled && watchdog_thresh) 612 err = watchdog_enable_all_cpus(); 613 else 614 watchdog_disable_all_cpus(); 615 616 return err; 617 618 } 619 620 /* 621 * common function for watchdog, nmi_watchdog and soft_watchdog parameter 622 * 623 * caller | table->data points to | 'which' contains the flag(s) 624 * -------------------|-----------------------|----------------------------- 625 * proc_watchdog | watchdog_user_enabled | NMI_WATCHDOG_ENABLED or'ed 626 * | | with SOFT_WATCHDOG_ENABLED 627 * -------------------|-----------------------|----------------------------- 628 * proc_nmi_watchdog | nmi_watchdog_enabled | NMI_WATCHDOG_ENABLED 629 * -------------------|-----------------------|----------------------------- 630 * proc_soft_watchdog | soft_watchdog_enabled | SOFT_WATCHDOG_ENABLED 631 */ 632 static int proc_watchdog_common(int which, struct ctl_table *table, int write, 633 void __user *buffer, size_t *lenp, loff_t *ppos) 634 { 635 int err, old, new; 636 int *watchdog_param = (int *)table->data; 637 638 get_online_cpus(); 639 mutex_lock(&watchdog_proc_mutex); 640 641 if (watchdog_suspended) { 642 /* no parameter changes allowed while watchdog is suspended */ 643 err = -EAGAIN; 644 goto out; 645 } 646 647 /* 648 * If the parameter is being read return the state of the corresponding 649 * bit(s) in 'watchdog_enabled', else update 'watchdog_enabled' and the 650 * run state of the lockup detectors. 651 */ 652 if (!write) { 653 *watchdog_param = (watchdog_enabled & which) != 0; 654 err = proc_dointvec_minmax(table, write, buffer, lenp, ppos); 655 } else { 656 err = proc_dointvec_minmax(table, write, buffer, lenp, ppos); 657 if (err) 658 goto out; 659 660 /* 661 * There is a race window between fetching the current value 662 * from 'watchdog_enabled' and storing the new value. During 663 * this race window, watchdog_nmi_enable() can sneak in and 664 * clear the NMI_WATCHDOG_ENABLED bit in 'watchdog_enabled'. 665 * The 'cmpxchg' detects this race and the loop retries. 666 */ 667 do { 668 old = watchdog_enabled; 669 /* 670 * If the parameter value is not zero set the 671 * corresponding bit(s), else clear it(them). 672 */ 673 if (*watchdog_param) 674 new = old | which; 675 else 676 new = old & ~which; 677 } while (cmpxchg(&watchdog_enabled, old, new) != old); 678 679 /* 680 * Update the run state of the lockup detectors. There is _no_ 681 * need to check the value returned by proc_watchdog_update() 682 * and to restore the previous value of 'watchdog_enabled' as 683 * both lockup detectors are disabled if proc_watchdog_update() 684 * returns an error. 685 */ 686 if (old == new) 687 goto out; 688 689 err = proc_watchdog_update(); 690 } 691 out: 692 mutex_unlock(&watchdog_proc_mutex); 693 put_online_cpus(); 694 return err; 695 } 696 697 /* 698 * /proc/sys/kernel/watchdog 699 */ 700 int proc_watchdog(struct ctl_table *table, int write, 701 void __user *buffer, size_t *lenp, loff_t *ppos) 702 { 703 return proc_watchdog_common(NMI_WATCHDOG_ENABLED|SOFT_WATCHDOG_ENABLED, 704 table, write, buffer, lenp, ppos); 705 } 706 707 /* 708 * /proc/sys/kernel/nmi_watchdog 709 */ 710 int proc_nmi_watchdog(struct ctl_table *table, int write, 711 void __user *buffer, size_t *lenp, loff_t *ppos) 712 { 713 return proc_watchdog_common(NMI_WATCHDOG_ENABLED, 714 table, write, buffer, lenp, ppos); 715 } 716 717 /* 718 * /proc/sys/kernel/soft_watchdog 719 */ 720 int proc_soft_watchdog(struct ctl_table *table, int write, 721 void __user *buffer, size_t *lenp, loff_t *ppos) 722 { 723 return proc_watchdog_common(SOFT_WATCHDOG_ENABLED, 724 table, write, buffer, lenp, ppos); 725 } 726 727 /* 728 * /proc/sys/kernel/watchdog_thresh 729 */ 730 int proc_watchdog_thresh(struct ctl_table *table, int write, 731 void __user *buffer, size_t *lenp, loff_t *ppos) 732 { 733 int err, old, new; 734 735 get_online_cpus(); 736 mutex_lock(&watchdog_proc_mutex); 737 738 if (watchdog_suspended) { 739 /* no parameter changes allowed while watchdog is suspended */ 740 err = -EAGAIN; 741 goto out; 742 } 743 744 old = ACCESS_ONCE(watchdog_thresh); 745 err = proc_dointvec_minmax(table, write, buffer, lenp, ppos); 746 747 if (err || !write) 748 goto out; 749 750 /* 751 * Update the sample period. Restore on failure. 752 */ 753 new = ACCESS_ONCE(watchdog_thresh); 754 if (old == new) 755 goto out; 756 757 set_sample_period(); 758 err = proc_watchdog_update(); 759 if (err) { 760 watchdog_thresh = old; 761 set_sample_period(); 762 } 763 out: 764 mutex_unlock(&watchdog_proc_mutex); 765 put_online_cpus(); 766 return err; 767 } 768 769 /* 770 * The cpumask is the mask of possible cpus that the watchdog can run 771 * on, not the mask of cpus it is actually running on. This allows the 772 * user to specify a mask that will include cpus that have not yet 773 * been brought online, if desired. 774 */ 775 int proc_watchdog_cpumask(struct ctl_table *table, int write, 776 void __user *buffer, size_t *lenp, loff_t *ppos) 777 { 778 int err; 779 780 get_online_cpus(); 781 mutex_lock(&watchdog_proc_mutex); 782 783 if (watchdog_suspended) { 784 /* no parameter changes allowed while watchdog is suspended */ 785 err = -EAGAIN; 786 goto out; 787 } 788 789 err = proc_do_large_bitmap(table, write, buffer, lenp, ppos); 790 if (!err && write) { 791 /* Remove impossible cpus to keep sysctl output cleaner. */ 792 cpumask_and(&watchdog_cpumask, &watchdog_cpumask, 793 cpu_possible_mask); 794 795 if (watchdog_running) { 796 /* 797 * Failure would be due to being unable to allocate 798 * a temporary cpumask, so we are likely not in a 799 * position to do much else to make things better. 800 */ 801 if (smpboot_update_cpumask_percpu_thread( 802 &watchdog_threads, &watchdog_cpumask) != 0) 803 pr_err("cpumask update failed\n"); 804 } 805 } 806 out: 807 mutex_unlock(&watchdog_proc_mutex); 808 put_online_cpus(); 809 return err; 810 } 811 812 #endif /* CONFIG_SYSCTL */ 813 814 void __init lockup_detector_init(void) 815 { 816 set_sample_period(); 817 818 #ifdef CONFIG_NO_HZ_FULL 819 if (tick_nohz_full_enabled()) { 820 pr_info("Disabling watchdog on nohz_full cores by default\n"); 821 cpumask_copy(&watchdog_cpumask, housekeeping_mask); 822 } else 823 cpumask_copy(&watchdog_cpumask, cpu_possible_mask); 824 #else 825 cpumask_copy(&watchdog_cpumask, cpu_possible_mask); 826 #endif 827 828 if (watchdog_enabled) 829 watchdog_enable_all_cpus(); 830 } 831