1b2441318SGreg Kroah-Hartman // SPDX-License-Identifier: GPL-2.0 258687acbSDon Zickus /* 358687acbSDon Zickus * Detect hard and soft lockups on a system 458687acbSDon Zickus * 558687acbSDon Zickus * started by Don Zickus, Copyright (C) 2010 Red Hat, Inc. 658687acbSDon Zickus * 786f5e6a7SFernando Luis Vázquez Cao * Note: Most of this code is borrowed heavily from the original softlockup 886f5e6a7SFernando Luis Vázquez Cao * detector, so thanks to Ingo for the initial implementation. 986f5e6a7SFernando Luis Vázquez Cao * Some chunks also taken from the old x86-specific nmi watchdog code, thanks 1058687acbSDon Zickus * to those contributors as well. 1158687acbSDon Zickus */ 1258687acbSDon Zickus 135f92a7b0SKefeng Wang #define pr_fmt(fmt) "watchdog: " fmt 144501980aSAndrew Morton 1558687acbSDon Zickus #include <linux/mm.h> 1658687acbSDon Zickus #include <linux/cpu.h> 1758687acbSDon Zickus #include <linux/nmi.h> 1858687acbSDon Zickus #include <linux/init.h> 1958687acbSDon Zickus #include <linux/module.h> 2058687acbSDon Zickus #include <linux/sysctl.h> 21fe4ba3c3SChris Metcalf #include <linux/tick.h> 22e6017571SIngo Molnar #include <linux/sched/clock.h> 23b17b0153SIngo Molnar #include <linux/sched/debug.h> 2478634061SFrederic Weisbecker #include <linux/sched/isolation.h> 259cf57731SPeter Zijlstra #include <linux/stop_machine.h> 2658687acbSDon Zickus 2758687acbSDon Zickus #include <asm/irq_regs.h> 285d1c0f4aSEric B Munson #include <linux/kvm_para.h> 2958687acbSDon Zickus 30946d1977SThomas Gleixner static DEFINE_MUTEX(watchdog_mutex); 31ab992dc3SPeter Zijlstra 3205a4a952SNicholas Piggin #if defined(CONFIG_HARDLOCKUP_DETECTOR) || defined(CONFIG_HAVE_NMI_WATCHDOG) 33df95d308SDouglas Anderson # define WATCHDOG_HARDLOCKUP_DEFAULT 1 3484d56e66SUlrich Obergfell #else 35df95d308SDouglas Anderson # define WATCHDOG_HARDLOCKUP_DEFAULT 0 3684d56e66SUlrich Obergfell #endif 3705a4a952SNicholas Piggin 3809154985SThomas Gleixner unsigned long __read_mostly watchdog_enabled; 3909154985SThomas Gleixner int __read_mostly watchdog_user_enabled = 1; 40df95d308SDouglas Anderson static int __read_mostly watchdog_hardlockup_user_enabled = WATCHDOG_HARDLOCKUP_DEFAULT; 41df95d308SDouglas Anderson static int __read_mostly watchdog_softlockup_user_enabled = 1; 427feeb9cdSThomas Gleixner int __read_mostly watchdog_thresh = 10; 43df95d308SDouglas Anderson static int __read_mostly watchdog_hardlockup_available; 447feeb9cdSThomas Gleixner 457feeb9cdSThomas Gleixner struct cpumask watchdog_cpumask __read_mostly; 467feeb9cdSThomas Gleixner unsigned long *watchdog_cpumask_bits = cpumask_bits(&watchdog_cpumask); 477feeb9cdSThomas Gleixner 4805a4a952SNicholas Piggin #ifdef CONFIG_HARDLOCKUP_DETECTOR 49f117955aSGuilherme G. Piccoli 50f117955aSGuilherme G. Piccoli # ifdef CONFIG_SMP 51f117955aSGuilherme G. Piccoli int __read_mostly sysctl_hardlockup_all_cpu_backtrace; 52f117955aSGuilherme G. Piccoli # endif /* CONFIG_SMP */ 53f117955aSGuilherme G. Piccoli 5405a4a952SNicholas Piggin /* 5505a4a952SNicholas Piggin * Should we panic when a soft-lockup or hard-lockup occurs: 5605a4a952SNicholas Piggin */ 5705a4a952SNicholas Piggin unsigned int __read_mostly hardlockup_panic = 5867fca000SRasmus Villemoes IS_ENABLED(CONFIG_BOOTPARAM_HARDLOCKUP_PANIC); 5905a4a952SNicholas Piggin /* 6005a4a952SNicholas Piggin * We may not want to enable hard lockup detection by default in all cases, 6105a4a952SNicholas Piggin * for example when running the kernel as a guest on a hypervisor. In these 6205a4a952SNicholas Piggin * cases this function can be called to disable hard lockup detection. This 6305a4a952SNicholas Piggin * function should only be executed once by the boot processor before the 6405a4a952SNicholas Piggin * kernel command line parameters are parsed, because otherwise it is not 6505a4a952SNicholas Piggin * possible to override this in hardlockup_panic_setup(). 6605a4a952SNicholas Piggin */ 677a355820SThomas Gleixner void __init hardlockup_detector_disable(void) 6805a4a952SNicholas Piggin { 69df95d308SDouglas Anderson watchdog_hardlockup_user_enabled = 0; 7005a4a952SNicholas Piggin } 7105a4a952SNicholas Piggin 7205a4a952SNicholas Piggin static int __init hardlockup_panic_setup(char *str) 7305a4a952SNicholas Piggin { 7405a4a952SNicholas Piggin if (!strncmp(str, "panic", 5)) 7505a4a952SNicholas Piggin hardlockup_panic = 1; 7605a4a952SNicholas Piggin else if (!strncmp(str, "nopanic", 7)) 7705a4a952SNicholas Piggin hardlockup_panic = 0; 7805a4a952SNicholas Piggin else if (!strncmp(str, "0", 1)) 79df95d308SDouglas Anderson watchdog_hardlockup_user_enabled = 0; 8005a4a952SNicholas Piggin else if (!strncmp(str, "1", 1)) 81df95d308SDouglas Anderson watchdog_hardlockup_user_enabled = 1; 8205a4a952SNicholas Piggin return 1; 8305a4a952SNicholas Piggin } 8405a4a952SNicholas Piggin __setup("nmi_watchdog=", hardlockup_panic_setup); 8505a4a952SNicholas Piggin 86368a7e2cSThomas Gleixner #endif /* CONFIG_HARDLOCKUP_DETECTOR */ 8705a4a952SNicholas Piggin 881f423c90SDouglas Anderson #if defined(CONFIG_HARDLOCKUP_DETECTOR_COUNTS_HRTIMER) 8981972551SDouglas Anderson 9077c12fc9SDouglas Anderson static DEFINE_PER_CPU(atomic_t, hrtimer_interrupts); 9177c12fc9SDouglas Anderson static DEFINE_PER_CPU(int, hrtimer_interrupts_saved); 921610611aSDouglas Anderson static DEFINE_PER_CPU(bool, watchdog_hardlockup_warned); 93ed92e1efSDouglas Anderson static DEFINE_PER_CPU(bool, watchdog_hardlockup_touched); 941610611aSDouglas Anderson static unsigned long watchdog_hardlockup_all_cpu_dumped; 9581972551SDouglas Anderson 96ed92e1efSDouglas Anderson notrace void arch_touch_nmi_watchdog(void) 97ed92e1efSDouglas Anderson { 98ed92e1efSDouglas Anderson /* 99ed92e1efSDouglas Anderson * Using __raw here because some code paths have 100ed92e1efSDouglas Anderson * preemption enabled. If preemption is enabled 101ed92e1efSDouglas Anderson * then interrupts should be enabled too, in which 102ed92e1efSDouglas Anderson * case we shouldn't have to worry about the watchdog 103ed92e1efSDouglas Anderson * going off. 104ed92e1efSDouglas Anderson */ 105ed92e1efSDouglas Anderson raw_cpu_write(watchdog_hardlockup_touched, true); 106ed92e1efSDouglas Anderson } 107ed92e1efSDouglas Anderson EXPORT_SYMBOL(arch_touch_nmi_watchdog); 108ed92e1efSDouglas Anderson 1091f423c90SDouglas Anderson void watchdog_hardlockup_touch_cpu(unsigned int cpu) 1101f423c90SDouglas Anderson { 1111f423c90SDouglas Anderson per_cpu(watchdog_hardlockup_touched, cpu) = true; 1121f423c90SDouglas Anderson 1131f423c90SDouglas Anderson /* Match with smp_rmb() in watchdog_hardlockup_check() */ 1141f423c90SDouglas Anderson smp_wmb(); 1151f423c90SDouglas Anderson } 1161f423c90SDouglas Anderson 11777c12fc9SDouglas Anderson static bool is_hardlockup(unsigned int cpu) 11881972551SDouglas Anderson { 11977c12fc9SDouglas Anderson int hrint = atomic_read(&per_cpu(hrtimer_interrupts, cpu)); 12081972551SDouglas Anderson 12177c12fc9SDouglas Anderson if (per_cpu(hrtimer_interrupts_saved, cpu) == hrint) 12281972551SDouglas Anderson return true; 12381972551SDouglas Anderson 12477c12fc9SDouglas Anderson /* 12577c12fc9SDouglas Anderson * NOTE: we don't need any fancy atomic_t or READ_ONCE/WRITE_ONCE 12677c12fc9SDouglas Anderson * for hrtimer_interrupts_saved. hrtimer_interrupts_saved is 12777c12fc9SDouglas Anderson * written/read by a single CPU. 12877c12fc9SDouglas Anderson */ 12977c12fc9SDouglas Anderson per_cpu(hrtimer_interrupts_saved, cpu) = hrint; 1301610611aSDouglas Anderson 13181972551SDouglas Anderson return false; 13281972551SDouglas Anderson } 13381972551SDouglas Anderson 1341f423c90SDouglas Anderson static unsigned long watchdog_hardlockup_kick(void) 13581972551SDouglas Anderson { 1361f423c90SDouglas Anderson return atomic_inc_return(raw_cpu_ptr(&hrtimer_interrupts)); 13781972551SDouglas Anderson } 13881972551SDouglas Anderson 13977c12fc9SDouglas Anderson void watchdog_hardlockup_check(unsigned int cpu, struct pt_regs *regs) 14081972551SDouglas Anderson { 1411f423c90SDouglas Anderson /* Match with smp_wmb() in watchdog_hardlockup_touch_cpu() */ 1421f423c90SDouglas Anderson smp_rmb(); 1431f423c90SDouglas Anderson 144ed92e1efSDouglas Anderson if (per_cpu(watchdog_hardlockup_touched, cpu)) { 145ed92e1efSDouglas Anderson per_cpu(watchdog_hardlockup_touched, cpu) = false; 146ed92e1efSDouglas Anderson return; 147ed92e1efSDouglas Anderson } 148ed92e1efSDouglas Anderson 1491610611aSDouglas Anderson /* 1501610611aSDouglas Anderson * Check for a hardlockup by making sure the CPU's timer 1511610611aSDouglas Anderson * interrupt is incrementing. The timer interrupt should have 15281972551SDouglas Anderson * fired multiple times before we overflow'd. If it hasn't 15381972551SDouglas Anderson * then this is a good indication the cpu is stuck 15481972551SDouglas Anderson */ 15577c12fc9SDouglas Anderson if (is_hardlockup(cpu)) { 1561610611aSDouglas Anderson unsigned int this_cpu = smp_processor_id(); 15777c12fc9SDouglas Anderson struct cpumask backtrace_mask = *cpu_online_mask; 15881972551SDouglas Anderson 1591610611aSDouglas Anderson /* Only print hardlockups once. */ 16077c12fc9SDouglas Anderson if (per_cpu(watchdog_hardlockup_warned, cpu)) 16181972551SDouglas Anderson return; 16281972551SDouglas Anderson 16377c12fc9SDouglas Anderson pr_emerg("Watchdog detected hard LOCKUP on cpu %d\n", cpu); 16481972551SDouglas Anderson print_modules(); 16581972551SDouglas Anderson print_irqtrace_events(current); 16677c12fc9SDouglas Anderson if (cpu == this_cpu) { 16781972551SDouglas Anderson if (regs) 16881972551SDouglas Anderson show_regs(regs); 16981972551SDouglas Anderson else 17081972551SDouglas Anderson dump_stack(); 17177c12fc9SDouglas Anderson cpumask_clear_cpu(cpu, &backtrace_mask); 17277c12fc9SDouglas Anderson } else { 17377c12fc9SDouglas Anderson if (trigger_single_cpu_backtrace(cpu)) 17477c12fc9SDouglas Anderson cpumask_clear_cpu(cpu, &backtrace_mask); 17577c12fc9SDouglas Anderson } 17681972551SDouglas Anderson 17781972551SDouglas Anderson /* 17877c12fc9SDouglas Anderson * Perform multi-CPU dump only once to avoid multiple 17977c12fc9SDouglas Anderson * hardlockups generating interleaving traces 18081972551SDouglas Anderson */ 18181972551SDouglas Anderson if (sysctl_hardlockup_all_cpu_backtrace && 1821610611aSDouglas Anderson !test_and_set_bit(0, &watchdog_hardlockup_all_cpu_dumped)) 18377c12fc9SDouglas Anderson trigger_cpumask_backtrace(&backtrace_mask); 18481972551SDouglas Anderson 18581972551SDouglas Anderson if (hardlockup_panic) 18681972551SDouglas Anderson nmi_panic(regs, "Hard LOCKUP"); 18781972551SDouglas Anderson 18877c12fc9SDouglas Anderson per_cpu(watchdog_hardlockup_warned, cpu) = true; 1891610611aSDouglas Anderson } else { 19077c12fc9SDouglas Anderson per_cpu(watchdog_hardlockup_warned, cpu) = false; 19181972551SDouglas Anderson } 19281972551SDouglas Anderson } 19381972551SDouglas Anderson 1941f423c90SDouglas Anderson #else /* CONFIG_HARDLOCKUP_DETECTOR_COUNTS_HRTIMER */ 19581972551SDouglas Anderson 1961f423c90SDouglas Anderson static inline unsigned long watchdog_hardlockup_kick(void) { return 0; } 19781972551SDouglas Anderson 1981f423c90SDouglas Anderson #endif /* !CONFIG_HARDLOCKUP_DETECTOR_COUNTS_HRTIMER */ 19981972551SDouglas Anderson 200ec6a9066SUlrich Obergfell /* 201d9b3629aSDouglas Anderson * These functions can be overridden based on the configured hardlockdup detector. 202a10a842fSNicholas Piggin * 203df95d308SDouglas Anderson * watchdog_hardlockup_enable/disable can be implemented to start and stop when 204d9b3629aSDouglas Anderson * softlockup watchdog start and stop. The detector must select the 205a10a842fSNicholas Piggin * SOFTLOCKUP_DETECTOR Kconfig. 20605a4a952SNicholas Piggin */ 207d9b3629aSDouglas Anderson void __weak watchdog_hardlockup_enable(unsigned int cpu) { } 208941154bdSThomas Gleixner 209d9b3629aSDouglas Anderson void __weak watchdog_hardlockup_disable(unsigned int cpu) { } 21005a4a952SNicholas Piggin 211930d8f8dSLecopzer Chen /* 212930d8f8dSLecopzer Chen * Watchdog-detector specific API. 213930d8f8dSLecopzer Chen * 214930d8f8dSLecopzer Chen * Return 0 when hardlockup watchdog is available, negative value otherwise. 215930d8f8dSLecopzer Chen * Note that the negative value means that a delayed probe might 216930d8f8dSLecopzer Chen * succeed later. 217930d8f8dSLecopzer Chen */ 218df95d308SDouglas Anderson int __weak __init watchdog_hardlockup_probe(void) 219a994a314SThomas Gleixner { 220d9b3629aSDouglas Anderson /* 221d9b3629aSDouglas Anderson * If CONFIG_HAVE_NMI_WATCHDOG is defined then an architecture 222d9b3629aSDouglas Anderson * is assumed to have the hard watchdog available and we return 0. 223d9b3629aSDouglas Anderson */ 224d9b3629aSDouglas Anderson if (IS_ENABLED(CONFIG_HAVE_NMI_WATCHDOG)) 225d9b3629aSDouglas Anderson return 0; 226d9b3629aSDouglas Anderson 227d9b3629aSDouglas Anderson /* 228d9b3629aSDouglas Anderson * Hardlockup detectors other than those using CONFIG_HAVE_NMI_WATCHDOG 229d9b3629aSDouglas Anderson * are required to implement a non-weak version of this probe function 230d9b3629aSDouglas Anderson * to tell whether they are available. If they don't override then 231d9b3629aSDouglas Anderson * we'll return -ENODEV. 232d9b3629aSDouglas Anderson */ 233d9b3629aSDouglas Anderson return -ENODEV; 234a994a314SThomas Gleixner } 235a994a314SThomas Gleixner 2366592ad2fSThomas Gleixner /** 237df95d308SDouglas Anderson * watchdog_hardlockup_stop - Stop the watchdog for reconfiguration 2386592ad2fSThomas Gleixner * 2396b9dc480SThomas Gleixner * The reconfiguration steps are: 240df95d308SDouglas Anderson * watchdog_hardlockup_stop(); 2416592ad2fSThomas Gleixner * update_variables(); 242df95d308SDouglas Anderson * watchdog_hardlockup_start(); 2436b9dc480SThomas Gleixner */ 244df95d308SDouglas Anderson void __weak watchdog_hardlockup_stop(void) { } 2456b9dc480SThomas Gleixner 2466b9dc480SThomas Gleixner /** 247df95d308SDouglas Anderson * watchdog_hardlockup_start - Start the watchdog after reconfiguration 2486592ad2fSThomas Gleixner * 249df95d308SDouglas Anderson * Counterpart to watchdog_hardlockup_stop(). 2506b9dc480SThomas Gleixner * 2516b9dc480SThomas Gleixner * The following variables have been updated in update_variables() and 2526b9dc480SThomas Gleixner * contain the currently valid configuration: 2537feeb9cdSThomas Gleixner * - watchdog_enabled 254a10a842fSNicholas Piggin * - watchdog_thresh 255a10a842fSNicholas Piggin * - watchdog_cpumask 256a10a842fSNicholas Piggin */ 257df95d308SDouglas Anderson void __weak watchdog_hardlockup_start(void) { } 258a10a842fSNicholas Piggin 25909154985SThomas Gleixner /** 26009154985SThomas Gleixner * lockup_detector_update_enable - Update the sysctl enable bit 26109154985SThomas Gleixner * 262df95d308SDouglas Anderson * Caller needs to make sure that the hard watchdogs are off, so this 263df95d308SDouglas Anderson * can't race with watchdog_hardlockup_disable(). 26409154985SThomas Gleixner */ 26509154985SThomas Gleixner static void lockup_detector_update_enable(void) 26609154985SThomas Gleixner { 26709154985SThomas Gleixner watchdog_enabled = 0; 26809154985SThomas Gleixner if (!watchdog_user_enabled) 26909154985SThomas Gleixner return; 270df95d308SDouglas Anderson if (watchdog_hardlockup_available && watchdog_hardlockup_user_enabled) 271df95d308SDouglas Anderson watchdog_enabled |= WATCHDOG_HARDLOCKUP_ENABLED; 272df95d308SDouglas Anderson if (watchdog_softlockup_user_enabled) 273df95d308SDouglas Anderson watchdog_enabled |= WATCHDOG_SOFTOCKUP_ENABLED; 27409154985SThomas Gleixner } 27509154985SThomas Gleixner 27605a4a952SNicholas Piggin #ifdef CONFIG_SOFTLOCKUP_DETECTOR 27705a4a952SNicholas Piggin 278fef06efcSPetr Mladek /* 279fef06efcSPetr Mladek * Delay the soflockup report when running a known slow code. 280fef06efcSPetr Mladek * It does _not_ affect the timestamp of the last successdul reschedule. 281fef06efcSPetr Mladek */ 282fef06efcSPetr Mladek #define SOFTLOCKUP_DELAY_REPORT ULONG_MAX 28311e31f60SThomas Gleixner 284f117955aSGuilherme G. Piccoli #ifdef CONFIG_SMP 285f117955aSGuilherme G. Piccoli int __read_mostly sysctl_softlockup_all_cpu_backtrace; 286f117955aSGuilherme G. Piccoli #endif 287f117955aSGuilherme G. Piccoli 288e7e04615SSantosh Sivaraj static struct cpumask watchdog_allowed_mask __read_mostly; 289e7e04615SSantosh Sivaraj 2902b9d7f23SThomas Gleixner /* Global variables, exported for sysctl */ 2912b9d7f23SThomas Gleixner unsigned int __read_mostly softlockup_panic = 29267fca000SRasmus Villemoes IS_ENABLED(CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC); 2932eb2527fSThomas Gleixner 2949cf57731SPeter Zijlstra static bool softlockup_initialized __read_mostly; 2950f34c400SChuansheng Liu static u64 __read_mostly sample_period; 29658687acbSDon Zickus 297fef06efcSPetr Mladek /* Timestamp taken after the last successful reschedule. */ 29858687acbSDon Zickus static DEFINE_PER_CPU(unsigned long, watchdog_touch_ts); 299fef06efcSPetr Mladek /* Timestamp of the last softlockup report. */ 300fef06efcSPetr Mladek static DEFINE_PER_CPU(unsigned long, watchdog_report_ts); 30158687acbSDon Zickus static DEFINE_PER_CPU(struct hrtimer, watchdog_hrtimer); 30258687acbSDon Zickus static DEFINE_PER_CPU(bool, softlockup_touch_sync); 303ed235875SAaron Tomlin static unsigned long soft_lockup_nmi_warn; 30458687acbSDon Zickus 30558687acbSDon Zickus static int __init nowatchdog_setup(char *str) 30658687acbSDon Zickus { 30709154985SThomas Gleixner watchdog_user_enabled = 0; 30858687acbSDon Zickus return 1; 30958687acbSDon Zickus } 31058687acbSDon Zickus __setup("nowatchdog", nowatchdog_setup); 31158687acbSDon Zickus 31258687acbSDon Zickus static int __init nosoftlockup_setup(char *str) 31358687acbSDon Zickus { 314df95d308SDouglas Anderson watchdog_softlockup_user_enabled = 0; 31558687acbSDon Zickus return 1; 31658687acbSDon Zickus } 31758687acbSDon Zickus __setup("nosoftlockup", nosoftlockup_setup); 318195daf66SUlrich Obergfell 31911295055SLaurence Oberman static int __init watchdog_thresh_setup(char *str) 32011295055SLaurence Oberman { 32111295055SLaurence Oberman get_option(&str, &watchdog_thresh); 32211295055SLaurence Oberman return 1; 32311295055SLaurence Oberman } 32411295055SLaurence Oberman __setup("watchdog_thresh=", watchdog_thresh_setup); 32511295055SLaurence Oberman 326941154bdSThomas Gleixner static void __lockup_detector_cleanup(void); 327941154bdSThomas Gleixner 3284eec42f3SMandeep Singh Baines /* 3294eec42f3SMandeep Singh Baines * Hard-lockup warnings should be triggered after just a few seconds. Soft- 3304eec42f3SMandeep Singh Baines * lockups can have false positives under extreme conditions. So we generally 3314eec42f3SMandeep Singh Baines * want a higher threshold for soft lockups than for hard lockups. So we couple 3324eec42f3SMandeep Singh Baines * the thresholds with a factor: we make the soft threshold twice the amount of 3334eec42f3SMandeep Singh Baines * time the hard threshold is. 3344eec42f3SMandeep Singh Baines */ 3356e9101aeSIngo Molnar static int get_softlockup_thresh(void) 3364eec42f3SMandeep Singh Baines { 3374eec42f3SMandeep Singh Baines return watchdog_thresh * 2; 3384eec42f3SMandeep Singh Baines } 33958687acbSDon Zickus 34058687acbSDon Zickus /* 34158687acbSDon Zickus * Returns seconds, approximately. We don't need nanosecond 34258687acbSDon Zickus * resolution, and we don't need to waste time with a big divide when 34358687acbSDon Zickus * 2^30ns == 1.074s. 34458687acbSDon Zickus */ 345c06b4f19SNamhyung Kim static unsigned long get_timestamp(void) 34658687acbSDon Zickus { 347545a2bf7SCyril Bur return running_clock() >> 30LL; /* 2^30 ~= 10^9 */ 34858687acbSDon Zickus } 34958687acbSDon Zickus 3500f34c400SChuansheng Liu static void set_sample_period(void) 35158687acbSDon Zickus { 35258687acbSDon Zickus /* 353586692a5SMandeep Singh Baines * convert watchdog_thresh from seconds to ns 35486f5e6a7SFernando Luis Vázquez Cao * the divide by 5 is to give hrtimer several chances (two 35586f5e6a7SFernando Luis Vázquez Cao * or three with the current relation between the soft 35686f5e6a7SFernando Luis Vázquez Cao * and hard thresholds) to increment before the 35786f5e6a7SFernando Luis Vázquez Cao * hardlockup detector generates a warning 35858687acbSDon Zickus */ 3590f34c400SChuansheng Liu sample_period = get_softlockup_thresh() * ((u64)NSEC_PER_SEC / 5); 3607edaeb68SThomas Gleixner watchdog_update_hrtimer_threshold(sample_period); 36158687acbSDon Zickus } 36258687acbSDon Zickus 363fef06efcSPetr Mladek static void update_report_ts(void) 364fef06efcSPetr Mladek { 365fef06efcSPetr Mladek __this_cpu_write(watchdog_report_ts, get_timestamp()); 366fef06efcSPetr Mladek } 367fef06efcSPetr Mladek 36858687acbSDon Zickus /* Commands for resetting the watchdog */ 3697c0012f5SPetr Mladek static void update_touch_ts(void) 37058687acbSDon Zickus { 371c06b4f19SNamhyung Kim __this_cpu_write(watchdog_touch_ts, get_timestamp()); 372fef06efcSPetr Mladek update_report_ts(); 37358687acbSDon Zickus } 37458687acbSDon Zickus 37503e0d461STejun Heo /** 37603e0d461STejun Heo * touch_softlockup_watchdog_sched - touch watchdog on scheduler stalls 37703e0d461STejun Heo * 37803e0d461STejun Heo * Call when the scheduler may have stalled for legitimate reasons 37903e0d461STejun Heo * preventing the watchdog task from executing - e.g. the scheduler 38003e0d461STejun Heo * entering idle state. This should only be used for scheduler events. 38103e0d461STejun Heo * Use touch_softlockup_watchdog() for everything else. 38203e0d461STejun Heo */ 383cb9d7fd5SVincent Whitchurch notrace void touch_softlockup_watchdog_sched(void) 38458687acbSDon Zickus { 3857861144bSAndrew Morton /* 386fef06efcSPetr Mladek * Preemption can be enabled. It doesn't matter which CPU's watchdog 387fef06efcSPetr Mladek * report period gets restarted here, so use the raw_ operation. 3887861144bSAndrew Morton */ 389fef06efcSPetr Mladek raw_cpu_write(watchdog_report_ts, SOFTLOCKUP_DELAY_REPORT); 39058687acbSDon Zickus } 39103e0d461STejun Heo 392cb9d7fd5SVincent Whitchurch notrace void touch_softlockup_watchdog(void) 39303e0d461STejun Heo { 39403e0d461STejun Heo touch_softlockup_watchdog_sched(); 39582607adcSTejun Heo wq_watchdog_touch(raw_smp_processor_id()); 39603e0d461STejun Heo } 3970167c781SIngo Molnar EXPORT_SYMBOL(touch_softlockup_watchdog); 39858687acbSDon Zickus 399332fbdbcSDon Zickus void touch_all_softlockup_watchdogs(void) 40058687acbSDon Zickus { 40158687acbSDon Zickus int cpu; 40258687acbSDon Zickus 40358687acbSDon Zickus /* 404d57108d4SThomas Gleixner * watchdog_mutex cannpt be taken here, as this might be called 405d57108d4SThomas Gleixner * from (soft)interrupt context, so the access to 406d57108d4SThomas Gleixner * watchdog_allowed_cpumask might race with a concurrent update. 407d57108d4SThomas Gleixner * 408d57108d4SThomas Gleixner * The watchdog time stamp can race against a concurrent real 409d57108d4SThomas Gleixner * update as well, the only side effect might be a cycle delay for 410d57108d4SThomas Gleixner * the softlockup check. 41158687acbSDon Zickus */ 41289e28ce6SWang Qing for_each_cpu(cpu, &watchdog_allowed_mask) { 413fef06efcSPetr Mladek per_cpu(watchdog_report_ts, cpu) = SOFTLOCKUP_DELAY_REPORT; 41489e28ce6SWang Qing wq_watchdog_touch(cpu); 41589e28ce6SWang Qing } 41658687acbSDon Zickus } 41758687acbSDon Zickus 41858687acbSDon Zickus void touch_softlockup_watchdog_sync(void) 41958687acbSDon Zickus { 420f7f66b05SChristoph Lameter __this_cpu_write(softlockup_touch_sync, true); 421fef06efcSPetr Mladek __this_cpu_write(watchdog_report_ts, SOFTLOCKUP_DELAY_REPORT); 42258687acbSDon Zickus } 42358687acbSDon Zickus 4240f90b88dSPetr Mladek static int is_softlockup(unsigned long touch_ts, 4250f90b88dSPetr Mladek unsigned long period_ts, 4260f90b88dSPetr Mladek unsigned long now) 42758687acbSDon Zickus { 428df95d308SDouglas Anderson if ((watchdog_enabled & WATCHDOG_SOFTOCKUP_ENABLED) && watchdog_thresh) { 429195daf66SUlrich Obergfell /* Warn about unreasonable delays. */ 430fef06efcSPetr Mladek if (time_after(now, period_ts + get_softlockup_thresh())) 43158687acbSDon Zickus return now - touch_ts; 432195daf66SUlrich Obergfell } 43358687acbSDon Zickus return 0; 43458687acbSDon Zickus } 43558687acbSDon Zickus 43605a4a952SNicholas Piggin /* watchdog detector functions */ 437be45bf53SPeter Zijlstra static DEFINE_PER_CPU(struct completion, softlockup_completion); 438be45bf53SPeter Zijlstra static DEFINE_PER_CPU(struct cpu_stop_work, softlockup_stop_work); 439be45bf53SPeter Zijlstra 4409cf57731SPeter Zijlstra /* 441b124ac45SWang Qing * The watchdog feed function - touches the timestamp. 4429cf57731SPeter Zijlstra * 4439cf57731SPeter Zijlstra * It only runs once every sample_period seconds (4 seconds by 4449cf57731SPeter Zijlstra * default) to reset the softlockup timestamp. If this gets delayed 4459cf57731SPeter Zijlstra * for more than 2*watchdog_thresh seconds then the debug-printout 4469cf57731SPeter Zijlstra * triggers in watchdog_timer_fn(). 4479cf57731SPeter Zijlstra */ 4489cf57731SPeter Zijlstra static int softlockup_fn(void *data) 4499cf57731SPeter Zijlstra { 4507c0012f5SPetr Mladek update_touch_ts(); 451be45bf53SPeter Zijlstra complete(this_cpu_ptr(&softlockup_completion)); 4529cf57731SPeter Zijlstra 4539cf57731SPeter Zijlstra return 0; 4549cf57731SPeter Zijlstra } 4559cf57731SPeter Zijlstra 45658687acbSDon Zickus /* watchdog kicker functions */ 45758687acbSDon Zickus static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer) 45858687acbSDon Zickus { 4590f90b88dSPetr Mladek unsigned long touch_ts, period_ts, now; 46058687acbSDon Zickus struct pt_regs *regs = get_irq_regs(); 46158687acbSDon Zickus int duration; 462ed235875SAaron Tomlin int softlockup_all_cpu_backtrace = sysctl_softlockup_all_cpu_backtrace; 4631f423c90SDouglas Anderson unsigned long hrtimer_interrupts; 46458687acbSDon Zickus 46501f0a027SThomas Gleixner if (!watchdog_enabled) 466b94f5118SDon Zickus return HRTIMER_NORESTART; 467b94f5118SDon Zickus 4681f423c90SDouglas Anderson hrtimer_interrupts = watchdog_hardlockup_kick(); 4691f423c90SDouglas Anderson 4701f423c90SDouglas Anderson /* test for hardlockups */ 4711f423c90SDouglas Anderson watchdog_buddy_check_hardlockup(hrtimer_interrupts); 47258687acbSDon Zickus 47358687acbSDon Zickus /* kick the softlockup detector */ 474be45bf53SPeter Zijlstra if (completion_done(this_cpu_ptr(&softlockup_completion))) { 475be45bf53SPeter Zijlstra reinit_completion(this_cpu_ptr(&softlockup_completion)); 4769cf57731SPeter Zijlstra stop_one_cpu_nowait(smp_processor_id(), 4779cf57731SPeter Zijlstra softlockup_fn, NULL, 4789cf57731SPeter Zijlstra this_cpu_ptr(&softlockup_stop_work)); 479be45bf53SPeter Zijlstra } 48058687acbSDon Zickus 48158687acbSDon Zickus /* .. and repeat */ 4820f34c400SChuansheng Liu hrtimer_forward_now(hrtimer, ns_to_ktime(sample_period)); 48358687acbSDon Zickus 4849bf3bc94SPetr Mladek /* 4850f90b88dSPetr Mladek * Read the current timestamp first. It might become invalid anytime 4860f90b88dSPetr Mladek * when a virtual machine is stopped by the host or when the watchog 4870f90b88dSPetr Mladek * is touched from NMI. 4880f90b88dSPetr Mladek */ 4890f90b88dSPetr Mladek now = get_timestamp(); 4900f90b88dSPetr Mladek /* 4919bf3bc94SPetr Mladek * If a virtual machine is stopped by the host it can look to 4920f90b88dSPetr Mladek * the watchdog like a soft lockup. This function touches the watchdog. 4939bf3bc94SPetr Mladek */ 4949bf3bc94SPetr Mladek kvm_check_and_clear_guest_paused(); 4950f90b88dSPetr Mladek /* 4960f90b88dSPetr Mladek * The stored timestamp is comparable with @now only when not touched. 4970f90b88dSPetr Mladek * It might get touched anytime from NMI. Make sure that is_softlockup() 4980f90b88dSPetr Mladek * uses the same (valid) value. 4990f90b88dSPetr Mladek */ 5000f90b88dSPetr Mladek period_ts = READ_ONCE(*this_cpu_ptr(&watchdog_report_ts)); 5019bf3bc94SPetr Mladek 5029bf3bc94SPetr Mladek /* Reset the interval when touched by known problematic code. */ 503fef06efcSPetr Mladek if (period_ts == SOFTLOCKUP_DELAY_REPORT) { 504909ea964SChristoph Lameter if (unlikely(__this_cpu_read(softlockup_touch_sync))) { 50558687acbSDon Zickus /* 50658687acbSDon Zickus * If the time stamp was touched atomically 50758687acbSDon Zickus * make sure the scheduler tick is up to date. 50858687acbSDon Zickus */ 509909ea964SChristoph Lameter __this_cpu_write(softlockup_touch_sync, false); 51058687acbSDon Zickus sched_clock_tick(); 51158687acbSDon Zickus } 5125d1c0f4aSEric B Munson 513fef06efcSPetr Mladek update_report_ts(); 51458687acbSDon Zickus return HRTIMER_RESTART; 51558687acbSDon Zickus } 51658687acbSDon Zickus 5170f90b88dSPetr Mladek /* Check for a softlockup. */ 5180f90b88dSPetr Mladek touch_ts = __this_cpu_read(watchdog_touch_ts); 5190f90b88dSPetr Mladek duration = is_softlockup(touch_ts, period_ts, now); 52058687acbSDon Zickus if (unlikely(duration)) { 5215d1c0f4aSEric B Munson /* 5229f113bf7SPetr Mladek * Prevent multiple soft-lockup reports if one cpu is already 5239f113bf7SPetr Mladek * engaged in dumping all cpu back traces. 524ed235875SAaron Tomlin */ 5259f113bf7SPetr Mladek if (softlockup_all_cpu_backtrace) { 5269f113bf7SPetr Mladek if (test_and_set_bit_lock(0, &soft_lockup_nmi_warn)) 527ed235875SAaron Tomlin return HRTIMER_RESTART; 528ed235875SAaron Tomlin } 529ed235875SAaron Tomlin 530c9ad17c9SPetr Mladek /* Start period for the next softlockup warning. */ 531fef06efcSPetr Mladek update_report_ts(); 532c9ad17c9SPetr Mladek 533656c3b79SFabian Frederick pr_emerg("BUG: soft lockup - CPU#%d stuck for %us! [%s:%d]\n", 53426e09c6eSDon Zickus smp_processor_id(), duration, 53558687acbSDon Zickus current->comm, task_pid_nr(current)); 53658687acbSDon Zickus print_modules(); 53758687acbSDon Zickus print_irqtrace_events(current); 53858687acbSDon Zickus if (regs) 53958687acbSDon Zickus show_regs(regs); 54058687acbSDon Zickus else 54158687acbSDon Zickus dump_stack(); 54258687acbSDon Zickus 543ed235875SAaron Tomlin if (softlockup_all_cpu_backtrace) { 544ed235875SAaron Tomlin trigger_allbutself_cpu_backtrace(); 5459f113bf7SPetr Mladek clear_bit_unlock(0, &soft_lockup_nmi_warn); 546ed235875SAaron Tomlin } 547ed235875SAaron Tomlin 54869361eefSJosh Hunt add_taint(TAINT_SOFTLOCKUP, LOCKDEP_STILL_OK); 54958687acbSDon Zickus if (softlockup_panic) 55058687acbSDon Zickus panic("softlockup: hung tasks"); 5511bc503cbSPetr Mladek } 55258687acbSDon Zickus 55358687acbSDon Zickus return HRTIMER_RESTART; 55458687acbSDon Zickus } 55558687acbSDon Zickus 556bcd951cfSThomas Gleixner static void watchdog_enable(unsigned int cpu) 557bcd951cfSThomas Gleixner { 55801f0a027SThomas Gleixner struct hrtimer *hrtimer = this_cpu_ptr(&watchdog_hrtimer); 559be45bf53SPeter Zijlstra struct completion *done = this_cpu_ptr(&softlockup_completion); 56058687acbSDon Zickus 5619cf57731SPeter Zijlstra WARN_ON_ONCE(cpu != smp_processor_id()); 5629cf57731SPeter Zijlstra 563be45bf53SPeter Zijlstra init_completion(done); 564be45bf53SPeter Zijlstra complete(done); 565be45bf53SPeter Zijlstra 56601f0a027SThomas Gleixner /* 567df95d308SDouglas Anderson * Start the timer first to prevent the hardlockup watchdog triggering 56801f0a027SThomas Gleixner * before the timer has a chance to fire. 56901f0a027SThomas Gleixner */ 570d2ab4cf4SSebastian Andrzej Siewior hrtimer_init(hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL_HARD); 5713935e895SBjørn Mork hrtimer->function = watchdog_timer_fn; 5720f34c400SChuansheng Liu hrtimer_start(hrtimer, ns_to_ktime(sample_period), 573d2ab4cf4SSebastian Andrzej Siewior HRTIMER_MODE_REL_PINNED_HARD); 57458687acbSDon Zickus 57501f0a027SThomas Gleixner /* Initialize timestamp */ 5767c0012f5SPetr Mladek update_touch_ts(); 577df95d308SDouglas Anderson /* Enable the hardlockup detector */ 578df95d308SDouglas Anderson if (watchdog_enabled & WATCHDOG_HARDLOCKUP_ENABLED) 579df95d308SDouglas Anderson watchdog_hardlockup_enable(cpu); 58058687acbSDon Zickus } 581bcd951cfSThomas Gleixner 582bcd951cfSThomas Gleixner static void watchdog_disable(unsigned int cpu) 583bcd951cfSThomas Gleixner { 58401f0a027SThomas Gleixner struct hrtimer *hrtimer = this_cpu_ptr(&watchdog_hrtimer); 585bcd951cfSThomas Gleixner 5869cf57731SPeter Zijlstra WARN_ON_ONCE(cpu != smp_processor_id()); 5879cf57731SPeter Zijlstra 58801f0a027SThomas Gleixner /* 589df95d308SDouglas Anderson * Disable the hardlockup detector first. That prevents that a large 590df95d308SDouglas Anderson * delay between disabling the timer and disabling the hardlockup 591df95d308SDouglas Anderson * detector causes a false positive. 59201f0a027SThomas Gleixner */ 593df95d308SDouglas Anderson watchdog_hardlockup_disable(cpu); 59401f0a027SThomas Gleixner hrtimer_cancel(hrtimer); 595be45bf53SPeter Zijlstra wait_for_completion(this_cpu_ptr(&softlockup_completion)); 596bcd951cfSThomas Gleixner } 597bcd951cfSThomas Gleixner 5989cf57731SPeter Zijlstra static int softlockup_stop_fn(void *data) 599b8900bc0SFrederic Weisbecker { 6009cf57731SPeter Zijlstra watchdog_disable(smp_processor_id()); 6019cf57731SPeter Zijlstra return 0; 602b8900bc0SFrederic Weisbecker } 603b8900bc0SFrederic Weisbecker 6049cf57731SPeter Zijlstra static void softlockup_stop_all(void) 605bcd951cfSThomas Gleixner { 6069cf57731SPeter Zijlstra int cpu; 607bcd951cfSThomas Gleixner 6089cf57731SPeter Zijlstra if (!softlockup_initialized) 6092eb2527fSThomas Gleixner return; 6102eb2527fSThomas Gleixner 6119cf57731SPeter Zijlstra for_each_cpu(cpu, &watchdog_allowed_mask) 6129cf57731SPeter Zijlstra smp_call_on_cpu(cpu, softlockup_stop_fn, NULL, false); 6132eb2527fSThomas Gleixner 6142eb2527fSThomas Gleixner cpumask_clear(&watchdog_allowed_mask); 6152eb2527fSThomas Gleixner } 6162eb2527fSThomas Gleixner 6179cf57731SPeter Zijlstra static int softlockup_start_fn(void *data) 6182eb2527fSThomas Gleixner { 6199cf57731SPeter Zijlstra watchdog_enable(smp_processor_id()); 6209cf57731SPeter Zijlstra return 0; 6219cf57731SPeter Zijlstra } 6229cf57731SPeter Zijlstra 6239cf57731SPeter Zijlstra static void softlockup_start_all(void) 6249cf57731SPeter Zijlstra { 6259cf57731SPeter Zijlstra int cpu; 6269cf57731SPeter Zijlstra 6272eb2527fSThomas Gleixner cpumask_copy(&watchdog_allowed_mask, &watchdog_cpumask); 6289cf57731SPeter Zijlstra for_each_cpu(cpu, &watchdog_allowed_mask) 6299cf57731SPeter Zijlstra smp_call_on_cpu(cpu, softlockup_start_fn, NULL, false); 6309cf57731SPeter Zijlstra } 6319cf57731SPeter Zijlstra 6329cf57731SPeter Zijlstra int lockup_detector_online_cpu(unsigned int cpu) 6339cf57731SPeter Zijlstra { 6347dd47617SThomas Gleixner if (cpumask_test_cpu(cpu, &watchdog_allowed_mask)) 6359cf57731SPeter Zijlstra watchdog_enable(cpu); 6369cf57731SPeter Zijlstra return 0; 6379cf57731SPeter Zijlstra } 6389cf57731SPeter Zijlstra 6399cf57731SPeter Zijlstra int lockup_detector_offline_cpu(unsigned int cpu) 6409cf57731SPeter Zijlstra { 6417dd47617SThomas Gleixner if (cpumask_test_cpu(cpu, &watchdog_allowed_mask)) 6429cf57731SPeter Zijlstra watchdog_disable(cpu); 6439cf57731SPeter Zijlstra return 0; 6442eb2527fSThomas Gleixner } 6452eb2527fSThomas Gleixner 6467c56a873SLaurent Dufour static void __lockup_detector_reconfigure(void) 6472eb2527fSThomas Gleixner { 648e31d6883SThomas Gleixner cpus_read_lock(); 649df95d308SDouglas Anderson watchdog_hardlockup_stop(); 6509cf57731SPeter Zijlstra 6519cf57731SPeter Zijlstra softlockup_stop_all(); 6522eb2527fSThomas Gleixner set_sample_period(); 65309154985SThomas Gleixner lockup_detector_update_enable(); 65409154985SThomas Gleixner if (watchdog_enabled && watchdog_thresh) 6559cf57731SPeter Zijlstra softlockup_start_all(); 6569cf57731SPeter Zijlstra 657df95d308SDouglas Anderson watchdog_hardlockup_start(); 658e31d6883SThomas Gleixner cpus_read_unlock(); 659e31d6883SThomas Gleixner /* 660e31d6883SThomas Gleixner * Must be called outside the cpus locked section to prevent 661e31d6883SThomas Gleixner * recursive locking in the perf code. 662e31d6883SThomas Gleixner */ 663e31d6883SThomas Gleixner __lockup_detector_cleanup(); 6642eb2527fSThomas Gleixner } 6652eb2527fSThomas Gleixner 6667c56a873SLaurent Dufour void lockup_detector_reconfigure(void) 6677c56a873SLaurent Dufour { 6687c56a873SLaurent Dufour mutex_lock(&watchdog_mutex); 6697c56a873SLaurent Dufour __lockup_detector_reconfigure(); 6707c56a873SLaurent Dufour mutex_unlock(&watchdog_mutex); 6717c56a873SLaurent Dufour } 6727c56a873SLaurent Dufour 6732eb2527fSThomas Gleixner /* 674b124ac45SWang Qing * Create the watchdog infrastructure and configure the detector(s). 6752eb2527fSThomas Gleixner */ 6765587185dSThomas Gleixner static __init void lockup_detector_setup(void) 6772eb2527fSThomas Gleixner { 6782eb2527fSThomas Gleixner /* 6792eb2527fSThomas Gleixner * If sysctl is off and watchdog got disabled on the command line, 6802eb2527fSThomas Gleixner * nothing to do here. 6812eb2527fSThomas Gleixner */ 68209154985SThomas Gleixner lockup_detector_update_enable(); 68309154985SThomas Gleixner 6842eb2527fSThomas Gleixner if (!IS_ENABLED(CONFIG_SYSCTL) && 6852eb2527fSThomas Gleixner !(watchdog_enabled && watchdog_thresh)) 6862eb2527fSThomas Gleixner return; 6872eb2527fSThomas Gleixner 6882eb2527fSThomas Gleixner mutex_lock(&watchdog_mutex); 6897c56a873SLaurent Dufour __lockup_detector_reconfigure(); 6909cf57731SPeter Zijlstra softlockup_initialized = true; 6912eb2527fSThomas Gleixner mutex_unlock(&watchdog_mutex); 6922eb2527fSThomas Gleixner } 6932eb2527fSThomas Gleixner 6942b9d7f23SThomas Gleixner #else /* CONFIG_SOFTLOCKUP_DETECTOR */ 6957c56a873SLaurent Dufour static void __lockup_detector_reconfigure(void) 6966592ad2fSThomas Gleixner { 697e31d6883SThomas Gleixner cpus_read_lock(); 698df95d308SDouglas Anderson watchdog_hardlockup_stop(); 69909154985SThomas Gleixner lockup_detector_update_enable(); 700df95d308SDouglas Anderson watchdog_hardlockup_start(); 701e31d6883SThomas Gleixner cpus_read_unlock(); 7026592ad2fSThomas Gleixner } 7037c56a873SLaurent Dufour void lockup_detector_reconfigure(void) 7047c56a873SLaurent Dufour { 7057c56a873SLaurent Dufour __lockup_detector_reconfigure(); 7067c56a873SLaurent Dufour } 7075587185dSThomas Gleixner static inline void lockup_detector_setup(void) 70834ddaa3eSThomas Gleixner { 7097c56a873SLaurent Dufour __lockup_detector_reconfigure(); 71034ddaa3eSThomas Gleixner } 7112b9d7f23SThomas Gleixner #endif /* !CONFIG_SOFTLOCKUP_DETECTOR */ 71205a4a952SNicholas Piggin 713941154bdSThomas Gleixner static void __lockup_detector_cleanup(void) 714941154bdSThomas Gleixner { 715941154bdSThomas Gleixner lockdep_assert_held(&watchdog_mutex); 716941154bdSThomas Gleixner hardlockup_detector_perf_cleanup(); 717941154bdSThomas Gleixner } 718941154bdSThomas Gleixner 719941154bdSThomas Gleixner /** 720941154bdSThomas Gleixner * lockup_detector_cleanup - Cleanup after cpu hotplug or sysctl changes 721941154bdSThomas Gleixner * 722941154bdSThomas Gleixner * Caller must not hold the cpu hotplug rwsem. 723941154bdSThomas Gleixner */ 724941154bdSThomas Gleixner void lockup_detector_cleanup(void) 725941154bdSThomas Gleixner { 726941154bdSThomas Gleixner mutex_lock(&watchdog_mutex); 727941154bdSThomas Gleixner __lockup_detector_cleanup(); 728941154bdSThomas Gleixner mutex_unlock(&watchdog_mutex); 729941154bdSThomas Gleixner } 730941154bdSThomas Gleixner 7316554fd8cSThomas Gleixner /** 7326554fd8cSThomas Gleixner * lockup_detector_soft_poweroff - Interface to stop lockup detector(s) 7336554fd8cSThomas Gleixner * 7346554fd8cSThomas Gleixner * Special interface for parisc. It prevents lockup detector warnings from 7356554fd8cSThomas Gleixner * the default pm_poweroff() function which busy loops forever. 7366554fd8cSThomas Gleixner */ 7376554fd8cSThomas Gleixner void lockup_detector_soft_poweroff(void) 7386554fd8cSThomas Gleixner { 7396554fd8cSThomas Gleixner watchdog_enabled = 0; 7406554fd8cSThomas Gleixner } 7416554fd8cSThomas Gleixner 74258cf690aSUlrich Obergfell #ifdef CONFIG_SYSCTL 74358cf690aSUlrich Obergfell 744b124ac45SWang Qing /* Propagate any changes to the watchdog infrastructure */ 745d57108d4SThomas Gleixner static void proc_watchdog_update(void) 74658687acbSDon Zickus { 747e8b62b2dSThomas Gleixner /* Remove impossible cpus to keep sysctl output clean. */ 748e8b62b2dSThomas Gleixner cpumask_and(&watchdog_cpumask, &watchdog_cpumask, cpu_possible_mask); 7497c56a873SLaurent Dufour __lockup_detector_reconfigure(); 750a0c9cbb9SUlrich Obergfell } 751a0c9cbb9SUlrich Obergfell 752a0c9cbb9SUlrich Obergfell /* 753ef246a21SUlrich Obergfell * common function for watchdog, nmi_watchdog and soft_watchdog parameter 754ef246a21SUlrich Obergfell * 7557feeb9cdSThomas Gleixner * caller | table->data points to | 'which' 756df95d308SDouglas Anderson * -------------------|----------------------------------|------------------------------- 757df95d308SDouglas Anderson * proc_watchdog | watchdog_user_enabled | WATCHDOG_HARDLOCKUP_ENABLED | 758df95d308SDouglas Anderson * | | WATCHDOG_SOFTOCKUP_ENABLED 759df95d308SDouglas Anderson * -------------------|----------------------------------|------------------------------- 760df95d308SDouglas Anderson * proc_nmi_watchdog | watchdog_hardlockup_user_enabled | WATCHDOG_HARDLOCKUP_ENABLED 761df95d308SDouglas Anderson * -------------------|----------------------------------|------------------------------- 762df95d308SDouglas Anderson * proc_soft_watchdog | watchdog_softlockup_user_enabled | WATCHDOG_SOFTOCKUP_ENABLED 763ef246a21SUlrich Obergfell */ 764ef246a21SUlrich Obergfell static int proc_watchdog_common(int which, struct ctl_table *table, int write, 76532927393SChristoph Hellwig void *buffer, size_t *lenp, loff_t *ppos) 766ef246a21SUlrich Obergfell { 76709154985SThomas Gleixner int err, old, *param = table->data; 768bcd951cfSThomas Gleixner 769946d1977SThomas Gleixner mutex_lock(&watchdog_mutex); 770ef246a21SUlrich Obergfell 771ef246a21SUlrich Obergfell if (!write) { 77209154985SThomas Gleixner /* 77309154985SThomas Gleixner * On read synchronize the userspace interface. This is a 77409154985SThomas Gleixner * racy snapshot. 77509154985SThomas Gleixner */ 77609154985SThomas Gleixner *param = (watchdog_enabled & which) != 0; 777b8900bc0SFrederic Weisbecker err = proc_dointvec_minmax(table, write, buffer, lenp, ppos); 778ef246a21SUlrich Obergfell } else { 77909154985SThomas Gleixner old = READ_ONCE(*param); 780ef246a21SUlrich Obergfell err = proc_dointvec_minmax(table, write, buffer, lenp, ppos); 78109154985SThomas Gleixner if (!err && old != READ_ONCE(*param)) 782d57108d4SThomas Gleixner proc_watchdog_update(); 783ef246a21SUlrich Obergfell } 784946d1977SThomas Gleixner mutex_unlock(&watchdog_mutex); 785ef246a21SUlrich Obergfell return err; 786ef246a21SUlrich Obergfell } 787ef246a21SUlrich Obergfell 788ef246a21SUlrich Obergfell /* 78983a80a39SUlrich Obergfell * /proc/sys/kernel/watchdog 79083a80a39SUlrich Obergfell */ 79183a80a39SUlrich Obergfell int proc_watchdog(struct ctl_table *table, int write, 79232927393SChristoph Hellwig void *buffer, size_t *lenp, loff_t *ppos) 79383a80a39SUlrich Obergfell { 794df95d308SDouglas Anderson return proc_watchdog_common(WATCHDOG_HARDLOCKUP_ENABLED | 795df95d308SDouglas Anderson WATCHDOG_SOFTOCKUP_ENABLED, 79683a80a39SUlrich Obergfell table, write, buffer, lenp, ppos); 79783a80a39SUlrich Obergfell } 79883a80a39SUlrich Obergfell 79983a80a39SUlrich Obergfell /* 80083a80a39SUlrich Obergfell * /proc/sys/kernel/nmi_watchdog 80183a80a39SUlrich Obergfell */ 80283a80a39SUlrich Obergfell int proc_nmi_watchdog(struct ctl_table *table, int write, 80332927393SChristoph Hellwig void *buffer, size_t *lenp, loff_t *ppos) 80483a80a39SUlrich Obergfell { 805df95d308SDouglas Anderson if (!watchdog_hardlockup_available && write) 806a994a314SThomas Gleixner return -ENOTSUPP; 807df95d308SDouglas Anderson return proc_watchdog_common(WATCHDOG_HARDLOCKUP_ENABLED, 80883a80a39SUlrich Obergfell table, write, buffer, lenp, ppos); 80983a80a39SUlrich Obergfell } 81083a80a39SUlrich Obergfell 81183a80a39SUlrich Obergfell /* 81283a80a39SUlrich Obergfell * /proc/sys/kernel/soft_watchdog 81383a80a39SUlrich Obergfell */ 81483a80a39SUlrich Obergfell int proc_soft_watchdog(struct ctl_table *table, int write, 81532927393SChristoph Hellwig void *buffer, size_t *lenp, loff_t *ppos) 81683a80a39SUlrich Obergfell { 817df95d308SDouglas Anderson return proc_watchdog_common(WATCHDOG_SOFTOCKUP_ENABLED, 81883a80a39SUlrich Obergfell table, write, buffer, lenp, ppos); 81983a80a39SUlrich Obergfell } 82083a80a39SUlrich Obergfell 82183a80a39SUlrich Obergfell /* 82283a80a39SUlrich Obergfell * /proc/sys/kernel/watchdog_thresh 82383a80a39SUlrich Obergfell */ 82483a80a39SUlrich Obergfell int proc_watchdog_thresh(struct ctl_table *table, int write, 82532927393SChristoph Hellwig void *buffer, size_t *lenp, loff_t *ppos) 82683a80a39SUlrich Obergfell { 827d57108d4SThomas Gleixner int err, old; 82883a80a39SUlrich Obergfell 829946d1977SThomas Gleixner mutex_lock(&watchdog_mutex); 83083a80a39SUlrich Obergfell 831d57108d4SThomas Gleixner old = READ_ONCE(watchdog_thresh); 83283a80a39SUlrich Obergfell err = proc_dointvec_minmax(table, write, buffer, lenp, ppos); 83383a80a39SUlrich Obergfell 834d57108d4SThomas Gleixner if (!err && write && old != READ_ONCE(watchdog_thresh)) 835d57108d4SThomas Gleixner proc_watchdog_update(); 836e04ab2bcSMandeep Singh Baines 837946d1977SThomas Gleixner mutex_unlock(&watchdog_mutex); 838b8900bc0SFrederic Weisbecker return err; 83958687acbSDon Zickus } 840fe4ba3c3SChris Metcalf 841fe4ba3c3SChris Metcalf /* 842fe4ba3c3SChris Metcalf * The cpumask is the mask of possible cpus that the watchdog can run 843fe4ba3c3SChris Metcalf * on, not the mask of cpus it is actually running on. This allows the 844fe4ba3c3SChris Metcalf * user to specify a mask that will include cpus that have not yet 845fe4ba3c3SChris Metcalf * been brought online, if desired. 846fe4ba3c3SChris Metcalf */ 847fe4ba3c3SChris Metcalf int proc_watchdog_cpumask(struct ctl_table *table, int write, 84832927393SChristoph Hellwig void *buffer, size_t *lenp, loff_t *ppos) 849fe4ba3c3SChris Metcalf { 850fe4ba3c3SChris Metcalf int err; 851fe4ba3c3SChris Metcalf 852946d1977SThomas Gleixner mutex_lock(&watchdog_mutex); 8538c073d27SUlrich Obergfell 854fe4ba3c3SChris Metcalf err = proc_do_large_bitmap(table, write, buffer, lenp, ppos); 85505ba3de7SThomas Gleixner if (!err && write) 856e8b62b2dSThomas Gleixner proc_watchdog_update(); 8575490125dSThomas Gleixner 858946d1977SThomas Gleixner mutex_unlock(&watchdog_mutex); 859fe4ba3c3SChris Metcalf return err; 860fe4ba3c3SChris Metcalf } 861dd0693fdSXiaoming Ni 862dd0693fdSXiaoming Ni static const int sixty = 60; 863dd0693fdSXiaoming Ni 864dd0693fdSXiaoming Ni static struct ctl_table watchdog_sysctls[] = { 865dd0693fdSXiaoming Ni { 866dd0693fdSXiaoming Ni .procname = "watchdog", 867dd0693fdSXiaoming Ni .data = &watchdog_user_enabled, 868dd0693fdSXiaoming Ni .maxlen = sizeof(int), 869dd0693fdSXiaoming Ni .mode = 0644, 870dd0693fdSXiaoming Ni .proc_handler = proc_watchdog, 871dd0693fdSXiaoming Ni .extra1 = SYSCTL_ZERO, 872dd0693fdSXiaoming Ni .extra2 = SYSCTL_ONE, 873dd0693fdSXiaoming Ni }, 874dd0693fdSXiaoming Ni { 875dd0693fdSXiaoming Ni .procname = "watchdog_thresh", 876dd0693fdSXiaoming Ni .data = &watchdog_thresh, 877dd0693fdSXiaoming Ni .maxlen = sizeof(int), 878dd0693fdSXiaoming Ni .mode = 0644, 879dd0693fdSXiaoming Ni .proc_handler = proc_watchdog_thresh, 880dd0693fdSXiaoming Ni .extra1 = SYSCTL_ZERO, 881dd0693fdSXiaoming Ni .extra2 = (void *)&sixty, 882dd0693fdSXiaoming Ni }, 883dd0693fdSXiaoming Ni { 884dd0693fdSXiaoming Ni .procname = "watchdog_cpumask", 885dd0693fdSXiaoming Ni .data = &watchdog_cpumask_bits, 886dd0693fdSXiaoming Ni .maxlen = NR_CPUS, 887dd0693fdSXiaoming Ni .mode = 0644, 888dd0693fdSXiaoming Ni .proc_handler = proc_watchdog_cpumask, 889dd0693fdSXiaoming Ni }, 890dd0693fdSXiaoming Ni #ifdef CONFIG_SOFTLOCKUP_DETECTOR 891dd0693fdSXiaoming Ni { 892dd0693fdSXiaoming Ni .procname = "soft_watchdog", 893df95d308SDouglas Anderson .data = &watchdog_softlockup_user_enabled, 894dd0693fdSXiaoming Ni .maxlen = sizeof(int), 895dd0693fdSXiaoming Ni .mode = 0644, 896dd0693fdSXiaoming Ni .proc_handler = proc_soft_watchdog, 897dd0693fdSXiaoming Ni .extra1 = SYSCTL_ZERO, 898dd0693fdSXiaoming Ni .extra2 = SYSCTL_ONE, 899dd0693fdSXiaoming Ni }, 900dd0693fdSXiaoming Ni { 901dd0693fdSXiaoming Ni .procname = "softlockup_panic", 902dd0693fdSXiaoming Ni .data = &softlockup_panic, 903dd0693fdSXiaoming Ni .maxlen = sizeof(int), 904dd0693fdSXiaoming Ni .mode = 0644, 905dd0693fdSXiaoming Ni .proc_handler = proc_dointvec_minmax, 906dd0693fdSXiaoming Ni .extra1 = SYSCTL_ZERO, 907dd0693fdSXiaoming Ni .extra2 = SYSCTL_ONE, 908dd0693fdSXiaoming Ni }, 909dd0693fdSXiaoming Ni #ifdef CONFIG_SMP 910dd0693fdSXiaoming Ni { 911dd0693fdSXiaoming Ni .procname = "softlockup_all_cpu_backtrace", 912dd0693fdSXiaoming Ni .data = &sysctl_softlockup_all_cpu_backtrace, 913dd0693fdSXiaoming Ni .maxlen = sizeof(int), 914dd0693fdSXiaoming Ni .mode = 0644, 915dd0693fdSXiaoming Ni .proc_handler = proc_dointvec_minmax, 916dd0693fdSXiaoming Ni .extra1 = SYSCTL_ZERO, 917dd0693fdSXiaoming Ni .extra2 = SYSCTL_ONE, 918dd0693fdSXiaoming Ni }, 919dd0693fdSXiaoming Ni #endif /* CONFIG_SMP */ 920dd0693fdSXiaoming Ni #endif 921dd0693fdSXiaoming Ni #ifdef CONFIG_HARDLOCKUP_DETECTOR 922dd0693fdSXiaoming Ni { 923dd0693fdSXiaoming Ni .procname = "hardlockup_panic", 924dd0693fdSXiaoming Ni .data = &hardlockup_panic, 925dd0693fdSXiaoming Ni .maxlen = sizeof(int), 926dd0693fdSXiaoming Ni .mode = 0644, 927dd0693fdSXiaoming Ni .proc_handler = proc_dointvec_minmax, 928dd0693fdSXiaoming Ni .extra1 = SYSCTL_ZERO, 929dd0693fdSXiaoming Ni .extra2 = SYSCTL_ONE, 930dd0693fdSXiaoming Ni }, 931dd0693fdSXiaoming Ni #ifdef CONFIG_SMP 932dd0693fdSXiaoming Ni { 933dd0693fdSXiaoming Ni .procname = "hardlockup_all_cpu_backtrace", 934dd0693fdSXiaoming Ni .data = &sysctl_hardlockup_all_cpu_backtrace, 935dd0693fdSXiaoming Ni .maxlen = sizeof(int), 936dd0693fdSXiaoming Ni .mode = 0644, 937dd0693fdSXiaoming Ni .proc_handler = proc_dointvec_minmax, 938dd0693fdSXiaoming Ni .extra1 = SYSCTL_ZERO, 939dd0693fdSXiaoming Ni .extra2 = SYSCTL_ONE, 940dd0693fdSXiaoming Ni }, 941dd0693fdSXiaoming Ni #endif /* CONFIG_SMP */ 942dd0693fdSXiaoming Ni #endif 943dd0693fdSXiaoming Ni {} 944dd0693fdSXiaoming Ni }; 945dd0693fdSXiaoming Ni 946*9ec272c5SDouglas Anderson static struct ctl_table watchdog_hardlockup_sysctl[] = { 947*9ec272c5SDouglas Anderson { 948*9ec272c5SDouglas Anderson .procname = "nmi_watchdog", 949*9ec272c5SDouglas Anderson .data = &watchdog_hardlockup_user_enabled, 950*9ec272c5SDouglas Anderson .maxlen = sizeof(int), 951*9ec272c5SDouglas Anderson .mode = 0444, 952*9ec272c5SDouglas Anderson .proc_handler = proc_nmi_watchdog, 953*9ec272c5SDouglas Anderson .extra1 = SYSCTL_ZERO, 954*9ec272c5SDouglas Anderson .extra2 = SYSCTL_ONE, 955*9ec272c5SDouglas Anderson }, 956*9ec272c5SDouglas Anderson {} 957*9ec272c5SDouglas Anderson }; 958*9ec272c5SDouglas Anderson 959dd0693fdSXiaoming Ni static void __init watchdog_sysctl_init(void) 960dd0693fdSXiaoming Ni { 961dd0693fdSXiaoming Ni register_sysctl_init("kernel", watchdog_sysctls); 962*9ec272c5SDouglas Anderson 963*9ec272c5SDouglas Anderson if (watchdog_hardlockup_available) 964*9ec272c5SDouglas Anderson watchdog_hardlockup_sysctl[0].mode = 0644; 965*9ec272c5SDouglas Anderson register_sysctl_init("kernel", watchdog_hardlockup_sysctl); 966dd0693fdSXiaoming Ni } 967*9ec272c5SDouglas Anderson 968dd0693fdSXiaoming Ni #else 969dd0693fdSXiaoming Ni #define watchdog_sysctl_init() do { } while (0) 97058687acbSDon Zickus #endif /* CONFIG_SYSCTL */ 97158687acbSDon Zickus 972930d8f8dSLecopzer Chen static void __init lockup_detector_delay_init(struct work_struct *work); 973930d8f8dSLecopzer Chen static bool allow_lockup_detector_init_retry __initdata; 974930d8f8dSLecopzer Chen 975930d8f8dSLecopzer Chen static struct work_struct detector_work __initdata = 976930d8f8dSLecopzer Chen __WORK_INITIALIZER(detector_work, lockup_detector_delay_init); 977930d8f8dSLecopzer Chen 978930d8f8dSLecopzer Chen static void __init lockup_detector_delay_init(struct work_struct *work) 979930d8f8dSLecopzer Chen { 980930d8f8dSLecopzer Chen int ret; 981930d8f8dSLecopzer Chen 982930d8f8dSLecopzer Chen ret = watchdog_hardlockup_probe(); 983930d8f8dSLecopzer Chen if (ret) { 984930d8f8dSLecopzer Chen pr_info("Delayed init of the lockup detector failed: %d\n", ret); 985930d8f8dSLecopzer Chen pr_info("Hard watchdog permanently disabled\n"); 986930d8f8dSLecopzer Chen return; 987930d8f8dSLecopzer Chen } 988930d8f8dSLecopzer Chen 989930d8f8dSLecopzer Chen allow_lockup_detector_init_retry = false; 990930d8f8dSLecopzer Chen 991930d8f8dSLecopzer Chen watchdog_hardlockup_available = true; 992930d8f8dSLecopzer Chen lockup_detector_setup(); 993930d8f8dSLecopzer Chen } 994930d8f8dSLecopzer Chen 995930d8f8dSLecopzer Chen /* 996930d8f8dSLecopzer Chen * lockup_detector_retry_init - retry init lockup detector if possible. 997930d8f8dSLecopzer Chen * 998930d8f8dSLecopzer Chen * Retry hardlockup detector init. It is useful when it requires some 999930d8f8dSLecopzer Chen * functionality that has to be initialized later on a particular 1000930d8f8dSLecopzer Chen * platform. 1001930d8f8dSLecopzer Chen */ 1002930d8f8dSLecopzer Chen void __init lockup_detector_retry_init(void) 1003930d8f8dSLecopzer Chen { 1004930d8f8dSLecopzer Chen /* Must be called before late init calls */ 1005930d8f8dSLecopzer Chen if (!allow_lockup_detector_init_retry) 1006930d8f8dSLecopzer Chen return; 1007930d8f8dSLecopzer Chen 1008930d8f8dSLecopzer Chen schedule_work(&detector_work); 1009930d8f8dSLecopzer Chen } 1010930d8f8dSLecopzer Chen 1011930d8f8dSLecopzer Chen /* 1012930d8f8dSLecopzer Chen * Ensure that optional delayed hardlockup init is proceed before 1013930d8f8dSLecopzer Chen * the init code and memory is freed. 1014930d8f8dSLecopzer Chen */ 1015930d8f8dSLecopzer Chen static int __init lockup_detector_check(void) 1016930d8f8dSLecopzer Chen { 1017930d8f8dSLecopzer Chen /* Prevent any later retry. */ 1018930d8f8dSLecopzer Chen allow_lockup_detector_init_retry = false; 1019930d8f8dSLecopzer Chen 1020930d8f8dSLecopzer Chen /* Make sure no work is pending. */ 1021930d8f8dSLecopzer Chen flush_work(&detector_work); 1022930d8f8dSLecopzer Chen 1023*9ec272c5SDouglas Anderson watchdog_sysctl_init(); 1024*9ec272c5SDouglas Anderson 1025930d8f8dSLecopzer Chen return 0; 1026930d8f8dSLecopzer Chen 1027930d8f8dSLecopzer Chen } 1028930d8f8dSLecopzer Chen late_initcall_sync(lockup_detector_check); 1029930d8f8dSLecopzer Chen 1030004417a6SPeter Zijlstra void __init lockup_detector_init(void) 103158687acbSDon Zickus { 103213316b31SFrederic Weisbecker if (tick_nohz_full_enabled()) 1033fe4ba3c3SChris Metcalf pr_info("Disabling watchdog on nohz_full cores by default\n"); 103413316b31SFrederic Weisbecker 1035de201559SFrederic Weisbecker cpumask_copy(&watchdog_cpumask, 103604d4e665SFrederic Weisbecker housekeeping_cpumask(HK_TYPE_TIMER)); 1037fe4ba3c3SChris Metcalf 1038df95d308SDouglas Anderson if (!watchdog_hardlockup_probe()) 1039df95d308SDouglas Anderson watchdog_hardlockup_available = true; 1040930d8f8dSLecopzer Chen else 1041930d8f8dSLecopzer Chen allow_lockup_detector_init_retry = true; 1042930d8f8dSLecopzer Chen 10435587185dSThomas Gleixner lockup_detector_setup(); 104458687acbSDon Zickus } 1045