xref: /openbmc/linux/kernel/watchdog.c (revision b94f5118)
158687acbSDon Zickus /*
258687acbSDon Zickus  * Detect hard and soft lockups on a system
358687acbSDon Zickus  *
458687acbSDon Zickus  * started by Don Zickus, Copyright (C) 2010 Red Hat, Inc.
558687acbSDon Zickus  *
686f5e6a7SFernando Luis Vázquez Cao  * Note: Most of this code is borrowed heavily from the original softlockup
786f5e6a7SFernando Luis Vázquez Cao  * detector, so thanks to Ingo for the initial implementation.
886f5e6a7SFernando Luis Vázquez Cao  * Some chunks also taken from the old x86-specific nmi watchdog code, thanks
958687acbSDon Zickus  * to those contributors as well.
1058687acbSDon Zickus  */
1158687acbSDon Zickus 
124501980aSAndrew Morton #define pr_fmt(fmt) "NMI watchdog: " fmt
134501980aSAndrew Morton 
1458687acbSDon Zickus #include <linux/mm.h>
1558687acbSDon Zickus #include <linux/cpu.h>
1658687acbSDon Zickus #include <linux/nmi.h>
1758687acbSDon Zickus #include <linux/init.h>
1858687acbSDon Zickus #include <linux/module.h>
1958687acbSDon Zickus #include <linux/sysctl.h>
20bcd951cfSThomas Gleixner #include <linux/smpboot.h>
218bd75c77SClark Williams #include <linux/sched/rt.h>
22fe4ba3c3SChris Metcalf #include <linux/tick.h>
2382607adcSTejun Heo #include <linux/workqueue.h>
2458687acbSDon Zickus 
2558687acbSDon Zickus #include <asm/irq_regs.h>
265d1c0f4aSEric B Munson #include <linux/kvm_para.h>
2781a4beefSUlrich Obergfell #include <linux/kthread.h>
2858687acbSDon Zickus 
29ab992dc3SPeter Zijlstra static DEFINE_MUTEX(watchdog_proc_mutex);
30ab992dc3SPeter Zijlstra 
31249e52e3SBabu Moger #if defined(CONFIG_HAVE_NMI_WATCHDOG) || defined(CONFIG_HARDLOCKUP_DETECTOR)
32249e52e3SBabu Moger unsigned long __read_mostly watchdog_enabled = SOFT_WATCHDOG_ENABLED|NMI_WATCHDOG_ENABLED;
3384d56e66SUlrich Obergfell #else
34249e52e3SBabu Moger unsigned long __read_mostly watchdog_enabled = SOFT_WATCHDOG_ENABLED;
3584d56e66SUlrich Obergfell #endif
3684d56e66SUlrich Obergfell int __read_mostly nmi_watchdog_enabled;
3784d56e66SUlrich Obergfell int __read_mostly soft_watchdog_enabled;
3884d56e66SUlrich Obergfell int __read_mostly watchdog_user_enabled;
394eec42f3SMandeep Singh Baines int __read_mostly watchdog_thresh = 10;
4084d56e66SUlrich Obergfell 
41ed235875SAaron Tomlin #ifdef CONFIG_SMP
42ed235875SAaron Tomlin int __read_mostly sysctl_softlockup_all_cpu_backtrace;
4355537871SJiri Kosina int __read_mostly sysctl_hardlockup_all_cpu_backtrace;
44ed235875SAaron Tomlin #endif
45fe4ba3c3SChris Metcalf static struct cpumask watchdog_cpumask __read_mostly;
46fe4ba3c3SChris Metcalf unsigned long *watchdog_cpumask_bits = cpumask_bits(&watchdog_cpumask);
47fe4ba3c3SChris Metcalf 
48fe4ba3c3SChris Metcalf /* Helper for online, unparked cpus. */
49fe4ba3c3SChris Metcalf #define for_each_watchdog_cpu(cpu) \
50fe4ba3c3SChris Metcalf 	for_each_cpu_and((cpu), cpu_online_mask, &watchdog_cpumask)
51ed235875SAaron Tomlin 
52b94f5118SDon Zickus atomic_t watchdog_park_in_progress = ATOMIC_INIT(0);
53b94f5118SDon Zickus 
54ec6a9066SUlrich Obergfell /*
55ec6a9066SUlrich Obergfell  * The 'watchdog_running' variable is set to 1 when the watchdog threads
56ec6a9066SUlrich Obergfell  * are registered/started and is set to 0 when the watchdog threads are
57ec6a9066SUlrich Obergfell  * unregistered/stopped, so it is an indicator whether the threads exist.
58ec6a9066SUlrich Obergfell  */
593c00ea82SFrederic Weisbecker static int __read_mostly watchdog_running;
60ec6a9066SUlrich Obergfell /*
61ec6a9066SUlrich Obergfell  * If a subsystem has a need to deactivate the watchdog temporarily, it
62ec6a9066SUlrich Obergfell  * can use the suspend/resume interface to achieve this. The content of
63ec6a9066SUlrich Obergfell  * the 'watchdog_suspended' variable reflects this state. Existing threads
64ec6a9066SUlrich Obergfell  * are parked/unparked by the lockup_detector_{suspend|resume} functions
65ec6a9066SUlrich Obergfell  * (see comment blocks pertaining to those functions for further details).
66ec6a9066SUlrich Obergfell  *
67ec6a9066SUlrich Obergfell  * 'watchdog_suspended' also prevents threads from being registered/started
68ec6a9066SUlrich Obergfell  * or unregistered/stopped via parameters in /proc/sys/kernel, so the state
69ec6a9066SUlrich Obergfell  * of 'watchdog_running' cannot change while the watchdog is deactivated
70ec6a9066SUlrich Obergfell  * temporarily (see related code in 'proc' handlers).
71ec6a9066SUlrich Obergfell  */
72ec6a9066SUlrich Obergfell static int __read_mostly watchdog_suspended;
73ec6a9066SUlrich Obergfell 
740f34c400SChuansheng Liu static u64 __read_mostly sample_period;
7558687acbSDon Zickus 
7658687acbSDon Zickus static DEFINE_PER_CPU(unsigned long, watchdog_touch_ts);
7758687acbSDon Zickus static DEFINE_PER_CPU(struct task_struct *, softlockup_watchdog);
7858687acbSDon Zickus static DEFINE_PER_CPU(struct hrtimer, watchdog_hrtimer);
7958687acbSDon Zickus static DEFINE_PER_CPU(bool, softlockup_touch_sync);
8058687acbSDon Zickus static DEFINE_PER_CPU(bool, soft_watchdog_warn);
81bcd951cfSThomas Gleixner static DEFINE_PER_CPU(unsigned long, hrtimer_interrupts);
82bcd951cfSThomas Gleixner static DEFINE_PER_CPU(unsigned long, soft_lockup_hrtimer_cnt);
83b1a8de1fSchai wen static DEFINE_PER_CPU(struct task_struct *, softlockup_task_ptr_saved);
8458687acbSDon Zickus static DEFINE_PER_CPU(unsigned long, hrtimer_interrupts_saved);
85ed235875SAaron Tomlin static unsigned long soft_lockup_nmi_warn;
8658687acbSDon Zickus 
8758687acbSDon Zickus unsigned int __read_mostly softlockup_panic =
8858687acbSDon Zickus 			CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC_VALUE;
8958687acbSDon Zickus 
9058687acbSDon Zickus static int __init softlockup_panic_setup(char *str)
9158687acbSDon Zickus {
9258687acbSDon Zickus 	softlockup_panic = simple_strtoul(str, NULL, 0);
9358687acbSDon Zickus 
9458687acbSDon Zickus 	return 1;
9558687acbSDon Zickus }
9658687acbSDon Zickus __setup("softlockup_panic=", softlockup_panic_setup);
9758687acbSDon Zickus 
9858687acbSDon Zickus static int __init nowatchdog_setup(char *str)
9958687acbSDon Zickus {
100195daf66SUlrich Obergfell 	watchdog_enabled = 0;
10158687acbSDon Zickus 	return 1;
10258687acbSDon Zickus }
10358687acbSDon Zickus __setup("nowatchdog", nowatchdog_setup);
10458687acbSDon Zickus 
10558687acbSDon Zickus static int __init nosoftlockup_setup(char *str)
10658687acbSDon Zickus {
107195daf66SUlrich Obergfell 	watchdog_enabled &= ~SOFT_WATCHDOG_ENABLED;
10858687acbSDon Zickus 	return 1;
10958687acbSDon Zickus }
11058687acbSDon Zickus __setup("nosoftlockup", nosoftlockup_setup);
111195daf66SUlrich Obergfell 
112ed235875SAaron Tomlin #ifdef CONFIG_SMP
113ed235875SAaron Tomlin static int __init softlockup_all_cpu_backtrace_setup(char *str)
114ed235875SAaron Tomlin {
115ed235875SAaron Tomlin 	sysctl_softlockup_all_cpu_backtrace =
116ed235875SAaron Tomlin 		!!simple_strtol(str, NULL, 0);
117ed235875SAaron Tomlin 	return 1;
118ed235875SAaron Tomlin }
119ed235875SAaron Tomlin __setup("softlockup_all_cpu_backtrace=", softlockup_all_cpu_backtrace_setup);
12055537871SJiri Kosina static int __init hardlockup_all_cpu_backtrace_setup(char *str)
12155537871SJiri Kosina {
12255537871SJiri Kosina 	sysctl_hardlockup_all_cpu_backtrace =
12355537871SJiri Kosina 		!!simple_strtol(str, NULL, 0);
12455537871SJiri Kosina 	return 1;
12555537871SJiri Kosina }
12655537871SJiri Kosina __setup("hardlockup_all_cpu_backtrace=", hardlockup_all_cpu_backtrace_setup);
127ed235875SAaron Tomlin #endif
12858687acbSDon Zickus 
1294eec42f3SMandeep Singh Baines /*
1304eec42f3SMandeep Singh Baines  * Hard-lockup warnings should be triggered after just a few seconds. Soft-
1314eec42f3SMandeep Singh Baines  * lockups can have false positives under extreme conditions. So we generally
1324eec42f3SMandeep Singh Baines  * want a higher threshold for soft lockups than for hard lockups. So we couple
1334eec42f3SMandeep Singh Baines  * the thresholds with a factor: we make the soft threshold twice the amount of
1344eec42f3SMandeep Singh Baines  * time the hard threshold is.
1354eec42f3SMandeep Singh Baines  */
1366e9101aeSIngo Molnar static int get_softlockup_thresh(void)
1374eec42f3SMandeep Singh Baines {
1384eec42f3SMandeep Singh Baines 	return watchdog_thresh * 2;
1394eec42f3SMandeep Singh Baines }
14058687acbSDon Zickus 
14158687acbSDon Zickus /*
14258687acbSDon Zickus  * Returns seconds, approximately.  We don't need nanosecond
14358687acbSDon Zickus  * resolution, and we don't need to waste time with a big divide when
14458687acbSDon Zickus  * 2^30ns == 1.074s.
14558687acbSDon Zickus  */
146c06b4f19SNamhyung Kim static unsigned long get_timestamp(void)
14758687acbSDon Zickus {
148545a2bf7SCyril Bur 	return running_clock() >> 30LL;  /* 2^30 ~= 10^9 */
14958687acbSDon Zickus }
15058687acbSDon Zickus 
1510f34c400SChuansheng Liu static void set_sample_period(void)
15258687acbSDon Zickus {
15358687acbSDon Zickus 	/*
154586692a5SMandeep Singh Baines 	 * convert watchdog_thresh from seconds to ns
15586f5e6a7SFernando Luis Vázquez Cao 	 * the divide by 5 is to give hrtimer several chances (two
15686f5e6a7SFernando Luis Vázquez Cao 	 * or three with the current relation between the soft
15786f5e6a7SFernando Luis Vázquez Cao 	 * and hard thresholds) to increment before the
15886f5e6a7SFernando Luis Vázquez Cao 	 * hardlockup detector generates a warning
15958687acbSDon Zickus 	 */
1600f34c400SChuansheng Liu 	sample_period = get_softlockup_thresh() * ((u64)NSEC_PER_SEC / 5);
16158687acbSDon Zickus }
16258687acbSDon Zickus 
16358687acbSDon Zickus /* Commands for resetting the watchdog */
16458687acbSDon Zickus static void __touch_watchdog(void)
16558687acbSDon Zickus {
166c06b4f19SNamhyung Kim 	__this_cpu_write(watchdog_touch_ts, get_timestamp());
16758687acbSDon Zickus }
16858687acbSDon Zickus 
16903e0d461STejun Heo /**
17003e0d461STejun Heo  * touch_softlockup_watchdog_sched - touch watchdog on scheduler stalls
17103e0d461STejun Heo  *
17203e0d461STejun Heo  * Call when the scheduler may have stalled for legitimate reasons
17303e0d461STejun Heo  * preventing the watchdog task from executing - e.g. the scheduler
17403e0d461STejun Heo  * entering idle state.  This should only be used for scheduler events.
17503e0d461STejun Heo  * Use touch_softlockup_watchdog() for everything else.
17603e0d461STejun Heo  */
17703e0d461STejun Heo void touch_softlockup_watchdog_sched(void)
17858687acbSDon Zickus {
1797861144bSAndrew Morton 	/*
1807861144bSAndrew Morton 	 * Preemption can be enabled.  It doesn't matter which CPU's timestamp
1817861144bSAndrew Morton 	 * gets zeroed here, so use the raw_ operation.
1827861144bSAndrew Morton 	 */
1837861144bSAndrew Morton 	raw_cpu_write(watchdog_touch_ts, 0);
18458687acbSDon Zickus }
18503e0d461STejun Heo 
18603e0d461STejun Heo void touch_softlockup_watchdog(void)
18703e0d461STejun Heo {
18803e0d461STejun Heo 	touch_softlockup_watchdog_sched();
18982607adcSTejun Heo 	wq_watchdog_touch(raw_smp_processor_id());
19003e0d461STejun Heo }
1910167c781SIngo Molnar EXPORT_SYMBOL(touch_softlockup_watchdog);
19258687acbSDon Zickus 
193332fbdbcSDon Zickus void touch_all_softlockup_watchdogs(void)
19458687acbSDon Zickus {
19558687acbSDon Zickus 	int cpu;
19658687acbSDon Zickus 
19758687acbSDon Zickus 	/*
19858687acbSDon Zickus 	 * this is done lockless
19958687acbSDon Zickus 	 * do we care if a 0 races with a timestamp?
20058687acbSDon Zickus 	 * all it means is the softlock check starts one cycle later
20158687acbSDon Zickus 	 */
202fe4ba3c3SChris Metcalf 	for_each_watchdog_cpu(cpu)
20358687acbSDon Zickus 		per_cpu(watchdog_touch_ts, cpu) = 0;
20482607adcSTejun Heo 	wq_watchdog_touch(-1);
20558687acbSDon Zickus }
20658687acbSDon Zickus 
20758687acbSDon Zickus void touch_softlockup_watchdog_sync(void)
20858687acbSDon Zickus {
209f7f66b05SChristoph Lameter 	__this_cpu_write(softlockup_touch_sync, true);
210f7f66b05SChristoph Lameter 	__this_cpu_write(watchdog_touch_ts, 0);
21158687acbSDon Zickus }
21258687acbSDon Zickus 
21358687acbSDon Zickus /* watchdog detector functions */
214249e52e3SBabu Moger bool is_hardlockup(void)
21558687acbSDon Zickus {
216909ea964SChristoph Lameter 	unsigned long hrint = __this_cpu_read(hrtimer_interrupts);
21758687acbSDon Zickus 
218909ea964SChristoph Lameter 	if (__this_cpu_read(hrtimer_interrupts_saved) == hrint)
219451637e4SYaowei Bai 		return true;
22058687acbSDon Zickus 
221909ea964SChristoph Lameter 	__this_cpu_write(hrtimer_interrupts_saved, hrint);
222451637e4SYaowei Bai 	return false;
22358687acbSDon Zickus }
22458687acbSDon Zickus 
22526e09c6eSDon Zickus static int is_softlockup(unsigned long touch_ts)
22658687acbSDon Zickus {
227c06b4f19SNamhyung Kim 	unsigned long now = get_timestamp();
22858687acbSDon Zickus 
22939d2da21SUlrich Obergfell 	if ((watchdog_enabled & SOFT_WATCHDOG_ENABLED) && watchdog_thresh){
230195daf66SUlrich Obergfell 		/* Warn about unreasonable delays. */
2314eec42f3SMandeep Singh Baines 		if (time_after(now, touch_ts + get_softlockup_thresh()))
23258687acbSDon Zickus 			return now - touch_ts;
233195daf66SUlrich Obergfell 	}
23458687acbSDon Zickus 	return 0;
23558687acbSDon Zickus }
23658687acbSDon Zickus 
23758687acbSDon Zickus static void watchdog_interrupt_count(void)
23858687acbSDon Zickus {
239909ea964SChristoph Lameter 	__this_cpu_inc(hrtimer_interrupts);
24058687acbSDon Zickus }
241bcd951cfSThomas Gleixner 
24273ce0511SBabu Moger /*
24373ce0511SBabu Moger  * These two functions are mostly architecture specific
24473ce0511SBabu Moger  * defining them as weak here.
24573ce0511SBabu Moger  */
24673ce0511SBabu Moger int __weak watchdog_nmi_enable(unsigned int cpu)
24773ce0511SBabu Moger {
24873ce0511SBabu Moger 	return 0;
24973ce0511SBabu Moger }
25073ce0511SBabu Moger void __weak watchdog_nmi_disable(unsigned int cpu)
25173ce0511SBabu Moger {
25273ce0511SBabu Moger }
25358687acbSDon Zickus 
25458cf690aSUlrich Obergfell static int watchdog_enable_all_cpus(void);
25558cf690aSUlrich Obergfell static void watchdog_disable_all_cpus(void);
25658cf690aSUlrich Obergfell 
25758687acbSDon Zickus /* watchdog kicker functions */
25858687acbSDon Zickus static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer)
25958687acbSDon Zickus {
260909ea964SChristoph Lameter 	unsigned long touch_ts = __this_cpu_read(watchdog_touch_ts);
26158687acbSDon Zickus 	struct pt_regs *regs = get_irq_regs();
26258687acbSDon Zickus 	int duration;
263ed235875SAaron Tomlin 	int softlockup_all_cpu_backtrace = sysctl_softlockup_all_cpu_backtrace;
26458687acbSDon Zickus 
265b94f5118SDon Zickus 	if (atomic_read(&watchdog_park_in_progress) != 0)
266b94f5118SDon Zickus 		return HRTIMER_NORESTART;
267b94f5118SDon Zickus 
26858687acbSDon Zickus 	/* kick the hardlockup detector */
26958687acbSDon Zickus 	watchdog_interrupt_count();
27058687acbSDon Zickus 
27158687acbSDon Zickus 	/* kick the softlockup detector */
272909ea964SChristoph Lameter 	wake_up_process(__this_cpu_read(softlockup_watchdog));
27358687acbSDon Zickus 
27458687acbSDon Zickus 	/* .. and repeat */
2750f34c400SChuansheng Liu 	hrtimer_forward_now(hrtimer, ns_to_ktime(sample_period));
27658687acbSDon Zickus 
27758687acbSDon Zickus 	if (touch_ts == 0) {
278909ea964SChristoph Lameter 		if (unlikely(__this_cpu_read(softlockup_touch_sync))) {
27958687acbSDon Zickus 			/*
28058687acbSDon Zickus 			 * If the time stamp was touched atomically
28158687acbSDon Zickus 			 * make sure the scheduler tick is up to date.
28258687acbSDon Zickus 			 */
283909ea964SChristoph Lameter 			__this_cpu_write(softlockup_touch_sync, false);
28458687acbSDon Zickus 			sched_clock_tick();
28558687acbSDon Zickus 		}
2865d1c0f4aSEric B Munson 
2875d1c0f4aSEric B Munson 		/* Clear the guest paused flag on watchdog reset */
2885d1c0f4aSEric B Munson 		kvm_check_and_clear_guest_paused();
28958687acbSDon Zickus 		__touch_watchdog();
29058687acbSDon Zickus 		return HRTIMER_RESTART;
29158687acbSDon Zickus 	}
29258687acbSDon Zickus 
29358687acbSDon Zickus 	/* check for a softlockup
29458687acbSDon Zickus 	 * This is done by making sure a high priority task is
29558687acbSDon Zickus 	 * being scheduled.  The task touches the watchdog to
29658687acbSDon Zickus 	 * indicate it is getting cpu time.  If it hasn't then
29758687acbSDon Zickus 	 * this is a good indication some task is hogging the cpu
29858687acbSDon Zickus 	 */
29926e09c6eSDon Zickus 	duration = is_softlockup(touch_ts);
30058687acbSDon Zickus 	if (unlikely(duration)) {
3015d1c0f4aSEric B Munson 		/*
3025d1c0f4aSEric B Munson 		 * If a virtual machine is stopped by the host it can look to
3035d1c0f4aSEric B Munson 		 * the watchdog like a soft lockup, check to see if the host
3045d1c0f4aSEric B Munson 		 * stopped the vm before we issue the warning
3055d1c0f4aSEric B Munson 		 */
3065d1c0f4aSEric B Munson 		if (kvm_check_and_clear_guest_paused())
3075d1c0f4aSEric B Munson 			return HRTIMER_RESTART;
3085d1c0f4aSEric B Munson 
30958687acbSDon Zickus 		/* only warn once */
310b1a8de1fSchai wen 		if (__this_cpu_read(soft_watchdog_warn) == true) {
311b1a8de1fSchai wen 			/*
312b1a8de1fSchai wen 			 * When multiple processes are causing softlockups the
313b1a8de1fSchai wen 			 * softlockup detector only warns on the first one
314b1a8de1fSchai wen 			 * because the code relies on a full quiet cycle to
315b1a8de1fSchai wen 			 * re-arm.  The second process prevents the quiet cycle
316b1a8de1fSchai wen 			 * and never gets reported.  Use task pointers to detect
317b1a8de1fSchai wen 			 * this.
318b1a8de1fSchai wen 			 */
319b1a8de1fSchai wen 			if (__this_cpu_read(softlockup_task_ptr_saved) !=
320b1a8de1fSchai wen 			    current) {
321b1a8de1fSchai wen 				__this_cpu_write(soft_watchdog_warn, false);
322b1a8de1fSchai wen 				__touch_watchdog();
323b1a8de1fSchai wen 			}
32458687acbSDon Zickus 			return HRTIMER_RESTART;
325b1a8de1fSchai wen 		}
32658687acbSDon Zickus 
327ed235875SAaron Tomlin 		if (softlockup_all_cpu_backtrace) {
328ed235875SAaron Tomlin 			/* Prevent multiple soft-lockup reports if one cpu is already
329ed235875SAaron Tomlin 			 * engaged in dumping cpu back traces
330ed235875SAaron Tomlin 			 */
331ed235875SAaron Tomlin 			if (test_and_set_bit(0, &soft_lockup_nmi_warn)) {
332ed235875SAaron Tomlin 				/* Someone else will report us. Let's give up */
333ed235875SAaron Tomlin 				__this_cpu_write(soft_watchdog_warn, true);
334ed235875SAaron Tomlin 				return HRTIMER_RESTART;
335ed235875SAaron Tomlin 			}
336ed235875SAaron Tomlin 		}
337ed235875SAaron Tomlin 
338656c3b79SFabian Frederick 		pr_emerg("BUG: soft lockup - CPU#%d stuck for %us! [%s:%d]\n",
33926e09c6eSDon Zickus 			smp_processor_id(), duration,
34058687acbSDon Zickus 			current->comm, task_pid_nr(current));
341b1a8de1fSchai wen 		__this_cpu_write(softlockup_task_ptr_saved, current);
34258687acbSDon Zickus 		print_modules();
34358687acbSDon Zickus 		print_irqtrace_events(current);
34458687acbSDon Zickus 		if (regs)
34558687acbSDon Zickus 			show_regs(regs);
34658687acbSDon Zickus 		else
34758687acbSDon Zickus 			dump_stack();
34858687acbSDon Zickus 
349ed235875SAaron Tomlin 		if (softlockup_all_cpu_backtrace) {
350ed235875SAaron Tomlin 			/* Avoid generating two back traces for current
351ed235875SAaron Tomlin 			 * given that one is already made above
352ed235875SAaron Tomlin 			 */
353ed235875SAaron Tomlin 			trigger_allbutself_cpu_backtrace();
354ed235875SAaron Tomlin 
355ed235875SAaron Tomlin 			clear_bit(0, &soft_lockup_nmi_warn);
356ed235875SAaron Tomlin 			/* Barrier to sync with other cpus */
357ed235875SAaron Tomlin 			smp_mb__after_atomic();
358ed235875SAaron Tomlin 		}
359ed235875SAaron Tomlin 
36069361eefSJosh Hunt 		add_taint(TAINT_SOFTLOCKUP, LOCKDEP_STILL_OK);
36158687acbSDon Zickus 		if (softlockup_panic)
36258687acbSDon Zickus 			panic("softlockup: hung tasks");
363909ea964SChristoph Lameter 		__this_cpu_write(soft_watchdog_warn, true);
36458687acbSDon Zickus 	} else
365909ea964SChristoph Lameter 		__this_cpu_write(soft_watchdog_warn, false);
36658687acbSDon Zickus 
36758687acbSDon Zickus 	return HRTIMER_RESTART;
36858687acbSDon Zickus }
36958687acbSDon Zickus 
370bcd951cfSThomas Gleixner static void watchdog_set_prio(unsigned int policy, unsigned int prio)
37158687acbSDon Zickus {
372bcd951cfSThomas Gleixner 	struct sched_param param = { .sched_priority = prio };
373bcd951cfSThomas Gleixner 
374bcd951cfSThomas Gleixner 	sched_setscheduler(current, policy, &param);
375bcd951cfSThomas Gleixner }
376bcd951cfSThomas Gleixner 
377bcd951cfSThomas Gleixner static void watchdog_enable(unsigned int cpu)
378bcd951cfSThomas Gleixner {
379f7f66b05SChristoph Lameter 	struct hrtimer *hrtimer = raw_cpu_ptr(&watchdog_hrtimer);
38058687acbSDon Zickus 
3813935e895SBjørn Mork 	/* kick off the timer for the hardlockup detector */
3823935e895SBjørn Mork 	hrtimer_init(hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
3833935e895SBjørn Mork 	hrtimer->function = watchdog_timer_fn;
3843935e895SBjørn Mork 
385bcd951cfSThomas Gleixner 	/* Enable the perf event */
386bcd951cfSThomas Gleixner 	watchdog_nmi_enable(cpu);
38758687acbSDon Zickus 
38858687acbSDon Zickus 	/* done here because hrtimer_start can only pin to smp_processor_id() */
3890f34c400SChuansheng Liu 	hrtimer_start(hrtimer, ns_to_ktime(sample_period),
39058687acbSDon Zickus 		      HRTIMER_MODE_REL_PINNED);
39158687acbSDon Zickus 
392bcd951cfSThomas Gleixner 	/* initialize timestamp */
393bcd951cfSThomas Gleixner 	watchdog_set_prio(SCHED_FIFO, MAX_RT_PRIO - 1);
39458687acbSDon Zickus 	__touch_watchdog();
39558687acbSDon Zickus }
396bcd951cfSThomas Gleixner 
397bcd951cfSThomas Gleixner static void watchdog_disable(unsigned int cpu)
398bcd951cfSThomas Gleixner {
399f7f66b05SChristoph Lameter 	struct hrtimer *hrtimer = raw_cpu_ptr(&watchdog_hrtimer);
400bcd951cfSThomas Gleixner 
401bcd951cfSThomas Gleixner 	watchdog_set_prio(SCHED_NORMAL, 0);
402bcd951cfSThomas Gleixner 	hrtimer_cancel(hrtimer);
403bcd951cfSThomas Gleixner 	/* disable the perf event */
404bcd951cfSThomas Gleixner 	watchdog_nmi_disable(cpu);
405bcd951cfSThomas Gleixner }
406bcd951cfSThomas Gleixner 
407b8900bc0SFrederic Weisbecker static void watchdog_cleanup(unsigned int cpu, bool online)
408b8900bc0SFrederic Weisbecker {
409b8900bc0SFrederic Weisbecker 	watchdog_disable(cpu);
410b8900bc0SFrederic Weisbecker }
411b8900bc0SFrederic Weisbecker 
412bcd951cfSThomas Gleixner static int watchdog_should_run(unsigned int cpu)
413bcd951cfSThomas Gleixner {
414bcd951cfSThomas Gleixner 	return __this_cpu_read(hrtimer_interrupts) !=
415bcd951cfSThomas Gleixner 		__this_cpu_read(soft_lockup_hrtimer_cnt);
416bcd951cfSThomas Gleixner }
417bcd951cfSThomas Gleixner 
418b60f796cSAndrew Morton /*
419bcd951cfSThomas Gleixner  * The watchdog thread function - touches the timestamp.
420bcd951cfSThomas Gleixner  *
4210f34c400SChuansheng Liu  * It only runs once every sample_period seconds (4 seconds by
422bcd951cfSThomas Gleixner  * default) to reset the softlockup timestamp. If this gets delayed
423bcd951cfSThomas Gleixner  * for more than 2*watchdog_thresh seconds then the debug-printout
424bcd951cfSThomas Gleixner  * triggers in watchdog_timer_fn().
425b60f796cSAndrew Morton  */
426bcd951cfSThomas Gleixner static void watchdog(unsigned int cpu)
427bcd951cfSThomas Gleixner {
428bcd951cfSThomas Gleixner 	__this_cpu_write(soft_lockup_hrtimer_cnt,
429bcd951cfSThomas Gleixner 			 __this_cpu_read(hrtimer_interrupts));
430bcd951cfSThomas Gleixner 	__touch_watchdog();
431bcfba4f4SUlrich Obergfell 
432bcfba4f4SUlrich Obergfell 	/*
433bcfba4f4SUlrich Obergfell 	 * watchdog_nmi_enable() clears the NMI_WATCHDOG_ENABLED bit in the
434bcfba4f4SUlrich Obergfell 	 * failure path. Check for failures that can occur asynchronously -
435bcfba4f4SUlrich Obergfell 	 * for example, when CPUs are on-lined - and shut down the hardware
436bcfba4f4SUlrich Obergfell 	 * perf event on each CPU accordingly.
437bcfba4f4SUlrich Obergfell 	 *
438bcfba4f4SUlrich Obergfell 	 * The only non-obvious place this bit can be cleared is through
439bcfba4f4SUlrich Obergfell 	 * watchdog_nmi_enable(), so a pr_info() is placed there.  Placing a
440bcfba4f4SUlrich Obergfell 	 * pr_info here would be too noisy as it would result in a message
441bcfba4f4SUlrich Obergfell 	 * every few seconds if the hardlockup was disabled but the softlockup
442bcfba4f4SUlrich Obergfell 	 * enabled.
443bcfba4f4SUlrich Obergfell 	 */
444bcfba4f4SUlrich Obergfell 	if (!(watchdog_enabled & NMI_WATCHDOG_ENABLED))
445bcfba4f4SUlrich Obergfell 		watchdog_nmi_disable(cpu);
44658687acbSDon Zickus }
44758687acbSDon Zickus 
448b8900bc0SFrederic Weisbecker static struct smp_hotplug_thread watchdog_threads = {
449b8900bc0SFrederic Weisbecker 	.store			= &softlockup_watchdog,
450b8900bc0SFrederic Weisbecker 	.thread_should_run	= watchdog_should_run,
451b8900bc0SFrederic Weisbecker 	.thread_fn		= watchdog,
452b8900bc0SFrederic Weisbecker 	.thread_comm		= "watchdog/%u",
453b8900bc0SFrederic Weisbecker 	.setup			= watchdog_enable,
454b8900bc0SFrederic Weisbecker 	.cleanup		= watchdog_cleanup,
455b8900bc0SFrederic Weisbecker 	.park			= watchdog_disable,
456b8900bc0SFrederic Weisbecker 	.unpark			= watchdog_enable,
457b8900bc0SFrederic Weisbecker };
458b8900bc0SFrederic Weisbecker 
45981a4beefSUlrich Obergfell /*
46081a4beefSUlrich Obergfell  * park all watchdog threads that are specified in 'watchdog_cpumask'
461ee7fed54SUlrich Obergfell  *
462ee7fed54SUlrich Obergfell  * This function returns an error if kthread_park() of a watchdog thread
463ee7fed54SUlrich Obergfell  * fails. In this situation, the watchdog threads of some CPUs can already
464ee7fed54SUlrich Obergfell  * be parked and the watchdog threads of other CPUs can still be runnable.
465ee7fed54SUlrich Obergfell  * Callers are expected to handle this special condition as appropriate in
466ee7fed54SUlrich Obergfell  * their context.
467a2a45b85SUlrich Obergfell  *
468a2a45b85SUlrich Obergfell  * This function may only be called in a context that is protected against
469a2a45b85SUlrich Obergfell  * races with CPU hotplug - for example, via get_online_cpus().
47081a4beefSUlrich Obergfell  */
47181a4beefSUlrich Obergfell static int watchdog_park_threads(void)
47281a4beefSUlrich Obergfell {
47381a4beefSUlrich Obergfell 	int cpu, ret = 0;
47481a4beefSUlrich Obergfell 
475b94f5118SDon Zickus 	atomic_set(&watchdog_park_in_progress, 1);
476b94f5118SDon Zickus 
47781a4beefSUlrich Obergfell 	for_each_watchdog_cpu(cpu) {
47881a4beefSUlrich Obergfell 		ret = kthread_park(per_cpu(softlockup_watchdog, cpu));
47981a4beefSUlrich Obergfell 		if (ret)
48081a4beefSUlrich Obergfell 			break;
48181a4beefSUlrich Obergfell 	}
48281a4beefSUlrich Obergfell 
483b94f5118SDon Zickus 	atomic_set(&watchdog_park_in_progress, 0);
484b94f5118SDon Zickus 
48581a4beefSUlrich Obergfell 	return ret;
48681a4beefSUlrich Obergfell }
48781a4beefSUlrich Obergfell 
48881a4beefSUlrich Obergfell /*
48981a4beefSUlrich Obergfell  * unpark all watchdog threads that are specified in 'watchdog_cpumask'
490a2a45b85SUlrich Obergfell  *
491a2a45b85SUlrich Obergfell  * This function may only be called in a context that is protected against
492a2a45b85SUlrich Obergfell  * races with CPU hotplug - for example, via get_online_cpus().
49381a4beefSUlrich Obergfell  */
49481a4beefSUlrich Obergfell static void watchdog_unpark_threads(void)
49581a4beefSUlrich Obergfell {
49681a4beefSUlrich Obergfell 	int cpu;
49781a4beefSUlrich Obergfell 
49881a4beefSUlrich Obergfell 	for_each_watchdog_cpu(cpu)
49981a4beefSUlrich Obergfell 		kthread_unpark(per_cpu(softlockup_watchdog, cpu));
50081a4beefSUlrich Obergfell }
50181a4beefSUlrich Obergfell 
5028c073d27SUlrich Obergfell /*
5038c073d27SUlrich Obergfell  * Suspend the hard and soft lockup detector by parking the watchdog threads.
5048c073d27SUlrich Obergfell  */
505ec6a9066SUlrich Obergfell int lockup_detector_suspend(void)
5068c073d27SUlrich Obergfell {
5078c073d27SUlrich Obergfell 	int ret = 0;
5088c073d27SUlrich Obergfell 
509ee89e71eSUlrich Obergfell 	get_online_cpus();
5108c073d27SUlrich Obergfell 	mutex_lock(&watchdog_proc_mutex);
5118c073d27SUlrich Obergfell 	/*
5128c073d27SUlrich Obergfell 	 * Multiple suspend requests can be active in parallel (counted by
5138c073d27SUlrich Obergfell 	 * the 'watchdog_suspended' variable). If the watchdog threads are
5148c073d27SUlrich Obergfell 	 * running, the first caller takes care that they will be parked.
5158c073d27SUlrich Obergfell 	 * The state of 'watchdog_running' cannot change while a suspend
516ec6a9066SUlrich Obergfell 	 * request is active (see related code in 'proc' handlers).
5178c073d27SUlrich Obergfell 	 */
5188c073d27SUlrich Obergfell 	if (watchdog_running && !watchdog_suspended)
5198c073d27SUlrich Obergfell 		ret = watchdog_park_threads();
5208c073d27SUlrich Obergfell 
5218c073d27SUlrich Obergfell 	if (ret == 0)
5228c073d27SUlrich Obergfell 		watchdog_suspended++;
523c993590cSUlrich Obergfell 	else {
524c993590cSUlrich Obergfell 		watchdog_disable_all_cpus();
525c993590cSUlrich Obergfell 		pr_err("Failed to suspend lockup detectors, disabled\n");
526c993590cSUlrich Obergfell 		watchdog_enabled = 0;
527c993590cSUlrich Obergfell 	}
5288c073d27SUlrich Obergfell 
5298c073d27SUlrich Obergfell 	mutex_unlock(&watchdog_proc_mutex);
5308c073d27SUlrich Obergfell 
5318c073d27SUlrich Obergfell 	return ret;
5328c073d27SUlrich Obergfell }
5338c073d27SUlrich Obergfell 
5348c073d27SUlrich Obergfell /*
5358c073d27SUlrich Obergfell  * Resume the hard and soft lockup detector by unparking the watchdog threads.
5368c073d27SUlrich Obergfell  */
537ec6a9066SUlrich Obergfell void lockup_detector_resume(void)
5388c073d27SUlrich Obergfell {
5398c073d27SUlrich Obergfell 	mutex_lock(&watchdog_proc_mutex);
5408c073d27SUlrich Obergfell 
5418c073d27SUlrich Obergfell 	watchdog_suspended--;
5428c073d27SUlrich Obergfell 	/*
5438c073d27SUlrich Obergfell 	 * The watchdog threads are unparked if they were previously running
5448c073d27SUlrich Obergfell 	 * and if there is no more active suspend request.
5458c073d27SUlrich Obergfell 	 */
5468c073d27SUlrich Obergfell 	if (watchdog_running && !watchdog_suspended)
5478c073d27SUlrich Obergfell 		watchdog_unpark_threads();
5488c073d27SUlrich Obergfell 
5498c073d27SUlrich Obergfell 	mutex_unlock(&watchdog_proc_mutex);
550ee89e71eSUlrich Obergfell 	put_online_cpus();
5518c073d27SUlrich Obergfell }
5528c073d27SUlrich Obergfell 
553b43cb43cSUlrich Obergfell static int update_watchdog_all_cpus(void)
5549809b18fSMichal Hocko {
555b43cb43cSUlrich Obergfell 	int ret;
556b43cb43cSUlrich Obergfell 
557b43cb43cSUlrich Obergfell 	ret = watchdog_park_threads();
558b43cb43cSUlrich Obergfell 	if (ret)
559b43cb43cSUlrich Obergfell 		return ret;
560b43cb43cSUlrich Obergfell 
561d4bdd0b2SUlrich Obergfell 	watchdog_unpark_threads();
562b43cb43cSUlrich Obergfell 
563b43cb43cSUlrich Obergfell 	return 0;
5649809b18fSMichal Hocko }
5659809b18fSMichal Hocko 
566b2f57c3aSUlrich Obergfell static int watchdog_enable_all_cpus(void)
567b8900bc0SFrederic Weisbecker {
568b8900bc0SFrederic Weisbecker 	int err = 0;
569b8900bc0SFrederic Weisbecker 
5703c00ea82SFrederic Weisbecker 	if (!watchdog_running) {
571230ec939SFrederic Weisbecker 		err = smpboot_register_percpu_thread_cpumask(&watchdog_threads,
572230ec939SFrederic Weisbecker 							     &watchdog_cpumask);
573b8900bc0SFrederic Weisbecker 		if (err)
574b8900bc0SFrederic Weisbecker 			pr_err("Failed to create watchdog threads, disabled\n");
575230ec939SFrederic Weisbecker 		else
5763c00ea82SFrederic Weisbecker 			watchdog_running = 1;
577b2f57c3aSUlrich Obergfell 	} else {
578b2f57c3aSUlrich Obergfell 		/*
579b2f57c3aSUlrich Obergfell 		 * Enable/disable the lockup detectors or
580b2f57c3aSUlrich Obergfell 		 * change the sample period 'on the fly'.
581b2f57c3aSUlrich Obergfell 		 */
582b43cb43cSUlrich Obergfell 		err = update_watchdog_all_cpus();
583b43cb43cSUlrich Obergfell 
584b43cb43cSUlrich Obergfell 		if (err) {
585b43cb43cSUlrich Obergfell 			watchdog_disable_all_cpus();
586b43cb43cSUlrich Obergfell 			pr_err("Failed to update lockup detectors, disabled\n");
587b8900bc0SFrederic Weisbecker 		}
588b43cb43cSUlrich Obergfell 	}
589b43cb43cSUlrich Obergfell 
590b43cb43cSUlrich Obergfell 	if (err)
591b43cb43cSUlrich Obergfell 		watchdog_enabled = 0;
592b8900bc0SFrederic Weisbecker 
593b8900bc0SFrederic Weisbecker 	return err;
594b8900bc0SFrederic Weisbecker }
595b8900bc0SFrederic Weisbecker 
59658687acbSDon Zickus static void watchdog_disable_all_cpus(void)
59758687acbSDon Zickus {
5983c00ea82SFrederic Weisbecker 	if (watchdog_running) {
5993c00ea82SFrederic Weisbecker 		watchdog_running = 0;
600b8900bc0SFrederic Weisbecker 		smpboot_unregister_percpu_thread(&watchdog_threads);
60158687acbSDon Zickus 	}
602bcd951cfSThomas Gleixner }
60358687acbSDon Zickus 
60458cf690aSUlrich Obergfell #ifdef CONFIG_SYSCTL
60558cf690aSUlrich Obergfell 
60658687acbSDon Zickus /*
607a0c9cbb9SUlrich Obergfell  * Update the run state of the lockup detectors.
60858687acbSDon Zickus  */
609a0c9cbb9SUlrich Obergfell static int proc_watchdog_update(void)
61058687acbSDon Zickus {
611a0c9cbb9SUlrich Obergfell 	int err = 0;
612a0c9cbb9SUlrich Obergfell 
613a0c9cbb9SUlrich Obergfell 	/*
614a0c9cbb9SUlrich Obergfell 	 * Watchdog threads won't be started if they are already active.
615a0c9cbb9SUlrich Obergfell 	 * The 'watchdog_running' variable in watchdog_*_all_cpus() takes
616a0c9cbb9SUlrich Obergfell 	 * care of this. If those threads are already active, the sample
617a0c9cbb9SUlrich Obergfell 	 * period will be updated and the lockup detectors will be enabled
618a0c9cbb9SUlrich Obergfell 	 * or disabled 'on the fly'.
619a0c9cbb9SUlrich Obergfell 	 */
620a0c9cbb9SUlrich Obergfell 	if (watchdog_enabled && watchdog_thresh)
621b2f57c3aSUlrich Obergfell 		err = watchdog_enable_all_cpus();
622a0c9cbb9SUlrich Obergfell 	else
623a0c9cbb9SUlrich Obergfell 		watchdog_disable_all_cpus();
624a0c9cbb9SUlrich Obergfell 
625a0c9cbb9SUlrich Obergfell 	return err;
626a0c9cbb9SUlrich Obergfell 
627a0c9cbb9SUlrich Obergfell }
628a0c9cbb9SUlrich Obergfell 
629a0c9cbb9SUlrich Obergfell /*
630ef246a21SUlrich Obergfell  * common function for watchdog, nmi_watchdog and soft_watchdog parameter
631ef246a21SUlrich Obergfell  *
632ef246a21SUlrich Obergfell  * caller             | table->data points to | 'which' contains the flag(s)
633ef246a21SUlrich Obergfell  * -------------------|-----------------------|-----------------------------
634ef246a21SUlrich Obergfell  * proc_watchdog      | watchdog_user_enabled | NMI_WATCHDOG_ENABLED or'ed
635ef246a21SUlrich Obergfell  *                    |                       | with SOFT_WATCHDOG_ENABLED
636ef246a21SUlrich Obergfell  * -------------------|-----------------------|-----------------------------
637ef246a21SUlrich Obergfell  * proc_nmi_watchdog  | nmi_watchdog_enabled  | NMI_WATCHDOG_ENABLED
638ef246a21SUlrich Obergfell  * -------------------|-----------------------|-----------------------------
639ef246a21SUlrich Obergfell  * proc_soft_watchdog | soft_watchdog_enabled | SOFT_WATCHDOG_ENABLED
640ef246a21SUlrich Obergfell  */
641ef246a21SUlrich Obergfell static int proc_watchdog_common(int which, struct ctl_table *table, int write,
642ef246a21SUlrich Obergfell 				void __user *buffer, size_t *lenp, loff_t *ppos)
643ef246a21SUlrich Obergfell {
644ef246a21SUlrich Obergfell 	int err, old, new;
645ef246a21SUlrich Obergfell 	int *watchdog_param = (int *)table->data;
646bcd951cfSThomas Gleixner 
6478614ddefSUlrich Obergfell 	get_online_cpus();
648ef246a21SUlrich Obergfell 	mutex_lock(&watchdog_proc_mutex);
649ef246a21SUlrich Obergfell 
6508c073d27SUlrich Obergfell 	if (watchdog_suspended) {
6518c073d27SUlrich Obergfell 		/* no parameter changes allowed while watchdog is suspended */
6528c073d27SUlrich Obergfell 		err = -EAGAIN;
6538c073d27SUlrich Obergfell 		goto out;
6548c073d27SUlrich Obergfell 	}
6558c073d27SUlrich Obergfell 
656ef246a21SUlrich Obergfell 	/*
657ef246a21SUlrich Obergfell 	 * If the parameter is being read return the state of the corresponding
658ef246a21SUlrich Obergfell 	 * bit(s) in 'watchdog_enabled', else update 'watchdog_enabled' and the
659ef246a21SUlrich Obergfell 	 * run state of the lockup detectors.
660ef246a21SUlrich Obergfell 	 */
661ef246a21SUlrich Obergfell 	if (!write) {
662ef246a21SUlrich Obergfell 		*watchdog_param = (watchdog_enabled & which) != 0;
663b8900bc0SFrederic Weisbecker 		err = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
664ef246a21SUlrich Obergfell 	} else {
665ef246a21SUlrich Obergfell 		err = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
666ef246a21SUlrich Obergfell 		if (err)
667ef246a21SUlrich Obergfell 			goto out;
668ef246a21SUlrich Obergfell 
669ef246a21SUlrich Obergfell 		/*
670ef246a21SUlrich Obergfell 		 * There is a race window between fetching the current value
671ef246a21SUlrich Obergfell 		 * from 'watchdog_enabled' and storing the new value. During
672ef246a21SUlrich Obergfell 		 * this race window, watchdog_nmi_enable() can sneak in and
673ef246a21SUlrich Obergfell 		 * clear the NMI_WATCHDOG_ENABLED bit in 'watchdog_enabled'.
674ef246a21SUlrich Obergfell 		 * The 'cmpxchg' detects this race and the loop retries.
675ef246a21SUlrich Obergfell 		 */
676ef246a21SUlrich Obergfell 		do {
677ef246a21SUlrich Obergfell 			old = watchdog_enabled;
678ef246a21SUlrich Obergfell 			/*
679ef246a21SUlrich Obergfell 			 * If the parameter value is not zero set the
680ef246a21SUlrich Obergfell 			 * corresponding bit(s), else clear it(them).
681ef246a21SUlrich Obergfell 			 */
682ef246a21SUlrich Obergfell 			if (*watchdog_param)
683ef246a21SUlrich Obergfell 				new = old | which;
684ef246a21SUlrich Obergfell 			else
685ef246a21SUlrich Obergfell 				new = old & ~which;
686ef246a21SUlrich Obergfell 		} while (cmpxchg(&watchdog_enabled, old, new) != old);
687ef246a21SUlrich Obergfell 
688ef246a21SUlrich Obergfell 		/*
689b43cb43cSUlrich Obergfell 		 * Update the run state of the lockup detectors. There is _no_
690b43cb43cSUlrich Obergfell 		 * need to check the value returned by proc_watchdog_update()
691b43cb43cSUlrich Obergfell 		 * and to restore the previous value of 'watchdog_enabled' as
692b43cb43cSUlrich Obergfell 		 * both lockup detectors are disabled if proc_watchdog_update()
693b43cb43cSUlrich Obergfell 		 * returns an error.
694ef246a21SUlrich Obergfell 		 */
695a1ee1932SJoshua Hunt 		if (old == new)
696a1ee1932SJoshua Hunt 			goto out;
697a1ee1932SJoshua Hunt 
698ef246a21SUlrich Obergfell 		err = proc_watchdog_update();
699ef246a21SUlrich Obergfell 	}
700ef246a21SUlrich Obergfell out:
701ef246a21SUlrich Obergfell 	mutex_unlock(&watchdog_proc_mutex);
7028614ddefSUlrich Obergfell 	put_online_cpus();
703ef246a21SUlrich Obergfell 	return err;
704ef246a21SUlrich Obergfell }
705ef246a21SUlrich Obergfell 
706ef246a21SUlrich Obergfell /*
70783a80a39SUlrich Obergfell  * /proc/sys/kernel/watchdog
70883a80a39SUlrich Obergfell  */
70983a80a39SUlrich Obergfell int proc_watchdog(struct ctl_table *table, int write,
71083a80a39SUlrich Obergfell 		  void __user *buffer, size_t *lenp, loff_t *ppos)
71183a80a39SUlrich Obergfell {
71283a80a39SUlrich Obergfell 	return proc_watchdog_common(NMI_WATCHDOG_ENABLED|SOFT_WATCHDOG_ENABLED,
71383a80a39SUlrich Obergfell 				    table, write, buffer, lenp, ppos);
71483a80a39SUlrich Obergfell }
71583a80a39SUlrich Obergfell 
71683a80a39SUlrich Obergfell /*
71783a80a39SUlrich Obergfell  * /proc/sys/kernel/nmi_watchdog
71883a80a39SUlrich Obergfell  */
71983a80a39SUlrich Obergfell int proc_nmi_watchdog(struct ctl_table *table, int write,
72083a80a39SUlrich Obergfell 		      void __user *buffer, size_t *lenp, loff_t *ppos)
72183a80a39SUlrich Obergfell {
72283a80a39SUlrich Obergfell 	return proc_watchdog_common(NMI_WATCHDOG_ENABLED,
72383a80a39SUlrich Obergfell 				    table, write, buffer, lenp, ppos);
72483a80a39SUlrich Obergfell }
72583a80a39SUlrich Obergfell 
72683a80a39SUlrich Obergfell /*
72783a80a39SUlrich Obergfell  * /proc/sys/kernel/soft_watchdog
72883a80a39SUlrich Obergfell  */
72983a80a39SUlrich Obergfell int proc_soft_watchdog(struct ctl_table *table, int write,
73083a80a39SUlrich Obergfell 			void __user *buffer, size_t *lenp, loff_t *ppos)
73183a80a39SUlrich Obergfell {
73283a80a39SUlrich Obergfell 	return proc_watchdog_common(SOFT_WATCHDOG_ENABLED,
73383a80a39SUlrich Obergfell 				    table, write, buffer, lenp, ppos);
73483a80a39SUlrich Obergfell }
73583a80a39SUlrich Obergfell 
73683a80a39SUlrich Obergfell /*
73783a80a39SUlrich Obergfell  * /proc/sys/kernel/watchdog_thresh
73883a80a39SUlrich Obergfell  */
73983a80a39SUlrich Obergfell int proc_watchdog_thresh(struct ctl_table *table, int write,
74083a80a39SUlrich Obergfell 			 void __user *buffer, size_t *lenp, loff_t *ppos)
74183a80a39SUlrich Obergfell {
742a1ee1932SJoshua Hunt 	int err, old, new;
74383a80a39SUlrich Obergfell 
7448614ddefSUlrich Obergfell 	get_online_cpus();
74583a80a39SUlrich Obergfell 	mutex_lock(&watchdog_proc_mutex);
74683a80a39SUlrich Obergfell 
7478c073d27SUlrich Obergfell 	if (watchdog_suspended) {
7488c073d27SUlrich Obergfell 		/* no parameter changes allowed while watchdog is suspended */
7498c073d27SUlrich Obergfell 		err = -EAGAIN;
7508c073d27SUlrich Obergfell 		goto out;
7518c073d27SUlrich Obergfell 	}
7528c073d27SUlrich Obergfell 
75383a80a39SUlrich Obergfell 	old = ACCESS_ONCE(watchdog_thresh);
75483a80a39SUlrich Obergfell 	err = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
75583a80a39SUlrich Obergfell 
756b8900bc0SFrederic Weisbecker 	if (err || !write)
757359e6fabSMichal Hocko 		goto out;
758e04ab2bcSMandeep Singh Baines 
75983a80a39SUlrich Obergfell 	/*
760d283c640SUlrich Obergfell 	 * Update the sample period. Restore on failure.
76183a80a39SUlrich Obergfell 	 */
762a1ee1932SJoshua Hunt 	new = ACCESS_ONCE(watchdog_thresh);
763a1ee1932SJoshua Hunt 	if (old == new)
764a1ee1932SJoshua Hunt 		goto out;
765a1ee1932SJoshua Hunt 
7660f34c400SChuansheng Liu 	set_sample_period();
76783a80a39SUlrich Obergfell 	err = proc_watchdog_update();
768d283c640SUlrich Obergfell 	if (err) {
76983a80a39SUlrich Obergfell 		watchdog_thresh = old;
770d283c640SUlrich Obergfell 		set_sample_period();
771d283c640SUlrich Obergfell 	}
772359e6fabSMichal Hocko out:
773359e6fabSMichal Hocko 	mutex_unlock(&watchdog_proc_mutex);
7748614ddefSUlrich Obergfell 	put_online_cpus();
775b8900bc0SFrederic Weisbecker 	return err;
77658687acbSDon Zickus }
777fe4ba3c3SChris Metcalf 
778fe4ba3c3SChris Metcalf /*
779fe4ba3c3SChris Metcalf  * The cpumask is the mask of possible cpus that the watchdog can run
780fe4ba3c3SChris Metcalf  * on, not the mask of cpus it is actually running on.  This allows the
781fe4ba3c3SChris Metcalf  * user to specify a mask that will include cpus that have not yet
782fe4ba3c3SChris Metcalf  * been brought online, if desired.
783fe4ba3c3SChris Metcalf  */
784fe4ba3c3SChris Metcalf int proc_watchdog_cpumask(struct ctl_table *table, int write,
785fe4ba3c3SChris Metcalf 			  void __user *buffer, size_t *lenp, loff_t *ppos)
786fe4ba3c3SChris Metcalf {
787fe4ba3c3SChris Metcalf 	int err;
788fe4ba3c3SChris Metcalf 
7898614ddefSUlrich Obergfell 	get_online_cpus();
790fe4ba3c3SChris Metcalf 	mutex_lock(&watchdog_proc_mutex);
7918c073d27SUlrich Obergfell 
7928c073d27SUlrich Obergfell 	if (watchdog_suspended) {
7938c073d27SUlrich Obergfell 		/* no parameter changes allowed while watchdog is suspended */
7948c073d27SUlrich Obergfell 		err = -EAGAIN;
7958c073d27SUlrich Obergfell 		goto out;
7968c073d27SUlrich Obergfell 	}
7978c073d27SUlrich Obergfell 
798fe4ba3c3SChris Metcalf 	err = proc_do_large_bitmap(table, write, buffer, lenp, ppos);
799fe4ba3c3SChris Metcalf 	if (!err && write) {
800fe4ba3c3SChris Metcalf 		/* Remove impossible cpus to keep sysctl output cleaner. */
801fe4ba3c3SChris Metcalf 		cpumask_and(&watchdog_cpumask, &watchdog_cpumask,
802fe4ba3c3SChris Metcalf 			    cpu_possible_mask);
803fe4ba3c3SChris Metcalf 
804fe4ba3c3SChris Metcalf 		if (watchdog_running) {
805fe4ba3c3SChris Metcalf 			/*
806fe4ba3c3SChris Metcalf 			 * Failure would be due to being unable to allocate
807fe4ba3c3SChris Metcalf 			 * a temporary cpumask, so we are likely not in a
808fe4ba3c3SChris Metcalf 			 * position to do much else to make things better.
809fe4ba3c3SChris Metcalf 			 */
810fe4ba3c3SChris Metcalf 			if (smpboot_update_cpumask_percpu_thread(
811fe4ba3c3SChris Metcalf 				    &watchdog_threads, &watchdog_cpumask) != 0)
812fe4ba3c3SChris Metcalf 				pr_err("cpumask update failed\n");
813fe4ba3c3SChris Metcalf 		}
814fe4ba3c3SChris Metcalf 	}
8158c073d27SUlrich Obergfell out:
816fe4ba3c3SChris Metcalf 	mutex_unlock(&watchdog_proc_mutex);
8178614ddefSUlrich Obergfell 	put_online_cpus();
818fe4ba3c3SChris Metcalf 	return err;
819fe4ba3c3SChris Metcalf }
820fe4ba3c3SChris Metcalf 
82158687acbSDon Zickus #endif /* CONFIG_SYSCTL */
82258687acbSDon Zickus 
823004417a6SPeter Zijlstra void __init lockup_detector_init(void)
82458687acbSDon Zickus {
8250f34c400SChuansheng Liu 	set_sample_period();
826b8900bc0SFrederic Weisbecker 
827fe4ba3c3SChris Metcalf #ifdef CONFIG_NO_HZ_FULL
828fe4ba3c3SChris Metcalf 	if (tick_nohz_full_enabled()) {
829fe4ba3c3SChris Metcalf 		pr_info("Disabling watchdog on nohz_full cores by default\n");
830314b08ffSFrederic Weisbecker 		cpumask_copy(&watchdog_cpumask, housekeeping_mask);
831fe4ba3c3SChris Metcalf 	} else
832fe4ba3c3SChris Metcalf 		cpumask_copy(&watchdog_cpumask, cpu_possible_mask);
833fe4ba3c3SChris Metcalf #else
834fe4ba3c3SChris Metcalf 	cpumask_copy(&watchdog_cpumask, cpu_possible_mask);
835fe4ba3c3SChris Metcalf #endif
836fe4ba3c3SChris Metcalf 
837195daf66SUlrich Obergfell 	if (watchdog_enabled)
838b2f57c3aSUlrich Obergfell 		watchdog_enable_all_cpus();
83958687acbSDon Zickus }
840