xref: /openbmc/linux/arch/powerpc/kernel/watchdog.c (revision c900529f3d9161bfde5cca0754f83b4d3c3e0220)
1b2441318SGreg Kroah-Hartman // SPDX-License-Identifier: GPL-2.0
22104180aSNicholas Piggin /*
32104180aSNicholas Piggin  * Watchdog support on powerpc systems.
42104180aSNicholas Piggin  *
52104180aSNicholas Piggin  * Copyright 2017, IBM Corporation.
62104180aSNicholas Piggin  *
72104180aSNicholas Piggin  * This uses code from arch/sparc/kernel/nmi.c and kernel/watchdog.c
82104180aSNicholas Piggin  */
9d8fa82e0SMichael Ellerman 
10d8fa82e0SMichael Ellerman #define pr_fmt(fmt) "watchdog: " fmt
11d8fa82e0SMichael Ellerman 
122104180aSNicholas Piggin #include <linux/kernel.h>
132104180aSNicholas Piggin #include <linux/param.h>
142104180aSNicholas Piggin #include <linux/init.h>
152104180aSNicholas Piggin #include <linux/percpu.h>
162104180aSNicholas Piggin #include <linux/cpu.h>
172104180aSNicholas Piggin #include <linux/nmi.h>
182104180aSNicholas Piggin #include <linux/module.h>
192104180aSNicholas Piggin #include <linux/export.h>
202104180aSNicholas Piggin #include <linux/kprobes.h>
212104180aSNicholas Piggin #include <linux/hardirq.h>
222104180aSNicholas Piggin #include <linux/reboot.h>
232104180aSNicholas Piggin #include <linux/slab.h>
242104180aSNicholas Piggin #include <linux/kdebug.h>
252104180aSNicholas Piggin #include <linux/sched/debug.h>
262104180aSNicholas Piggin #include <linux/delay.h>
272400c13cSSudeep Holla #include <linux/processor.h>
282104180aSNicholas Piggin #include <linux/smp.h>
292104180aSNicholas Piggin 
303a96570fSNicholas Piggin #include <asm/interrupt.h>
312104180aSNicholas Piggin #include <asm/paca.h>
329ae440fbSCédric Le Goater #include <asm/nmi.h>
332104180aSNicholas Piggin 
342104180aSNicholas Piggin /*
35723b1133SNicholas Piggin  * The powerpc watchdog ensures that each CPU is able to service timers.
36723b1133SNicholas Piggin  * The watchdog sets up a simple timer on each CPU to run once per timer
37723b1133SNicholas Piggin  * period, and updates a per-cpu timestamp and a "pending" cpumask. This is
38723b1133SNicholas Piggin  * the heartbeat.
392104180aSNicholas Piggin  *
40723b1133SNicholas Piggin  * Then there are two systems to check that the heartbeat is still running.
41723b1133SNicholas Piggin  * The local soft-NMI, and the SMP checker.
422104180aSNicholas Piggin  *
43723b1133SNicholas Piggin  * The soft-NMI checker can detect lockups on the local CPU. When interrupts
44723b1133SNicholas Piggin  * are disabled with local_irq_disable(), platforms that use soft-masking
45723b1133SNicholas Piggin  * can leave hardware interrupts enabled and handle them with a masked
46723b1133SNicholas Piggin  * interrupt handler. The masked handler can send the timer interrupt to the
47723b1133SNicholas Piggin  * watchdog's soft_nmi_interrupt(), which appears to Linux as an NMI
48723b1133SNicholas Piggin  * interrupt, and can be used to detect CPUs stuck with IRQs disabled.
49723b1133SNicholas Piggin  *
50723b1133SNicholas Piggin  * The soft-NMI checker will compare the heartbeat timestamp for this CPU
51723b1133SNicholas Piggin  * with the current time, and take action if the difference exceeds the
52723b1133SNicholas Piggin  * watchdog threshold.
53723b1133SNicholas Piggin  *
54723b1133SNicholas Piggin  * The limitation of the soft-NMI watchdog is that it does not work when
55723b1133SNicholas Piggin  * interrupts are hard disabled or otherwise not being serviced. This is
56723b1133SNicholas Piggin  * solved by also having a SMP watchdog where all CPUs check all other
57723b1133SNicholas Piggin  * CPUs heartbeat.
58723b1133SNicholas Piggin  *
591fd02f66SJulia Lawall  * The SMP checker can detect lockups on other CPUs. A global "pending"
60723b1133SNicholas Piggin  * cpumask is kept, containing all CPUs which enable the watchdog. Each
61723b1133SNicholas Piggin  * CPU clears their pending bit in their heartbeat timer. When the bitmask
62723b1133SNicholas Piggin  * becomes empty, the last CPU to clear its pending bit updates a global
63723b1133SNicholas Piggin  * timestamp and refills the pending bitmask.
64723b1133SNicholas Piggin  *
65723b1133SNicholas Piggin  * In the heartbeat timer, if any CPU notices that the global timestamp has
66723b1133SNicholas Piggin  * not been updated for a period exceeding the watchdog threshold, then it
67723b1133SNicholas Piggin  * means the CPU(s) with their bit still set in the pending mask have had
68723b1133SNicholas Piggin  * their heartbeat stop, and action is taken.
69723b1133SNicholas Piggin  *
707c18659dSWolfram Sang  * Some platforms implement true NMI IPIs, which can be used by the SMP
71723b1133SNicholas Piggin  * watchdog to detect an unresponsive CPU and pull it out of its stuck
72723b1133SNicholas Piggin  * state with the NMI IPI, to get crash/debug data from it. This way the
73723b1133SNicholas Piggin  * SMP watchdog can detect hardware interrupts off lockups.
742104180aSNicholas Piggin  */
752104180aSNicholas Piggin 
762104180aSNicholas Piggin static cpumask_t wd_cpus_enabled __read_mostly;
772104180aSNicholas Piggin 
782104180aSNicholas Piggin static u64 wd_panic_timeout_tb __read_mostly; /* timebase ticks until panic */
792104180aSNicholas Piggin static u64 wd_smp_panic_timeout_tb __read_mostly; /* panic other CPUs */
802104180aSNicholas Piggin 
812104180aSNicholas Piggin static u64 wd_timer_period_ms __read_mostly;  /* interval between heartbeat */
822104180aSNicholas Piggin 
837ae3f6e1SNicholas Piggin static DEFINE_PER_CPU(struct hrtimer, wd_hrtimer);
842104180aSNicholas Piggin static DEFINE_PER_CPU(u64, wd_timer_tb);
852104180aSNicholas Piggin 
86723b1133SNicholas Piggin /* SMP checker bits */
872104180aSNicholas Piggin static unsigned long __wd_smp_lock;
8876521c4bSNicholas Piggin static unsigned long __wd_reporting;
89e012c499SNicholas Piggin static unsigned long __wd_nmi_output;
902104180aSNicholas Piggin static cpumask_t wd_smp_cpus_pending;
912104180aSNicholas Piggin static cpumask_t wd_smp_cpus_stuck;
922104180aSNicholas Piggin static u64 wd_smp_last_reset_tb;
932104180aSNicholas Piggin 
94f5e74e83SLaurent Dufour #ifdef CONFIG_PPC_PSERIES
95f5e74e83SLaurent Dufour static u64 wd_timeout_pct;
96f5e74e83SLaurent Dufour #endif
97f5e74e83SLaurent Dufour 
9876521c4bSNicholas Piggin /*
9976521c4bSNicholas Piggin  * Try to take the exclusive watchdog action / NMI IPI / printing lock.
10076521c4bSNicholas Piggin  * wd_smp_lock must be held. If this fails, we should return and wait
10176521c4bSNicholas Piggin  * for the watchdog to kick in again (or another CPU to trigger it).
10276521c4bSNicholas Piggin  *
10376521c4bSNicholas Piggin  * Importantly, if hardlockup_panic is set, wd_try_report failure should
10476521c4bSNicholas Piggin  * not delay the panic, because whichever other CPU is reporting will
10576521c4bSNicholas Piggin  * call panic.
10676521c4bSNicholas Piggin  */
wd_try_report(void)10776521c4bSNicholas Piggin static bool wd_try_report(void)
10876521c4bSNicholas Piggin {
10976521c4bSNicholas Piggin 	if (__wd_reporting)
11076521c4bSNicholas Piggin 		return false;
11176521c4bSNicholas Piggin 	__wd_reporting = 1;
11276521c4bSNicholas Piggin 	return true;
11376521c4bSNicholas Piggin }
11476521c4bSNicholas Piggin 
11576521c4bSNicholas Piggin /* End printing after successful wd_try_report. wd_smp_lock not required. */
wd_end_reporting(void)11676521c4bSNicholas Piggin static void wd_end_reporting(void)
11776521c4bSNicholas Piggin {
11876521c4bSNicholas Piggin 	smp_mb(); /* End printing "critical section" */
11976521c4bSNicholas Piggin 	WARN_ON_ONCE(__wd_reporting == 0);
12076521c4bSNicholas Piggin 	WRITE_ONCE(__wd_reporting, 0);
12176521c4bSNicholas Piggin }
12276521c4bSNicholas Piggin 
wd_smp_lock(unsigned long * flags)1232104180aSNicholas Piggin static inline void wd_smp_lock(unsigned long *flags)
1242104180aSNicholas Piggin {
1252104180aSNicholas Piggin 	/*
1262104180aSNicholas Piggin 	 * Avoid locking layers if possible.
1272104180aSNicholas Piggin 	 * This may be called from low level interrupt handlers at some
1282104180aSNicholas Piggin 	 * point in future.
1292104180aSNicholas Piggin 	 */
130d8e2a405SNicholas Piggin 	raw_local_irq_save(*flags);
131d8e2a405SNicholas Piggin 	hard_irq_disable(); /* Make it soft-NMI safe */
132d8e2a405SNicholas Piggin 	while (unlikely(test_and_set_bit_lock(0, &__wd_smp_lock))) {
133d8e2a405SNicholas Piggin 		raw_local_irq_restore(*flags);
134d8e2a405SNicholas Piggin 		spin_until_cond(!test_bit(0, &__wd_smp_lock));
135d8e2a405SNicholas Piggin 		raw_local_irq_save(*flags);
136d8e2a405SNicholas Piggin 		hard_irq_disable();
137d8e2a405SNicholas Piggin 	}
1382104180aSNicholas Piggin }
1392104180aSNicholas Piggin 
wd_smp_unlock(unsigned long * flags)1402104180aSNicholas Piggin static inline void wd_smp_unlock(unsigned long *flags)
1412104180aSNicholas Piggin {
1422104180aSNicholas Piggin 	clear_bit_unlock(0, &__wd_smp_lock);
143d8e2a405SNicholas Piggin 	raw_local_irq_restore(*flags);
1442104180aSNicholas Piggin }
1452104180aSNicholas Piggin 
wd_lockup_ipi(struct pt_regs * regs)1462104180aSNicholas Piggin static void wd_lockup_ipi(struct pt_regs *regs)
1472104180aSNicholas Piggin {
1484e49226eSNicholas Piggin 	int cpu = raw_smp_processor_id();
1494e49226eSNicholas Piggin 	u64 tb = get_tb();
1504e49226eSNicholas Piggin 
1514e49226eSNicholas Piggin 	pr_emerg("CPU %d Hard LOCKUP\n", cpu);
1524e49226eSNicholas Piggin 	pr_emerg("CPU %d TB:%lld, last heartbeat TB:%lld (%lldms ago)\n",
1534e49226eSNicholas Piggin 		 cpu, tb, per_cpu(wd_timer_tb, cpu),
1544e49226eSNicholas Piggin 		 tb_to_ns(tb - per_cpu(wd_timer_tb, cpu)) / 1000000);
1552104180aSNicholas Piggin 	print_modules();
1562104180aSNicholas Piggin 	print_irqtrace_events(current);
1572104180aSNicholas Piggin 	if (regs)
1582104180aSNicholas Piggin 		show_regs(regs);
1592104180aSNicholas Piggin 	else
1602104180aSNicholas Piggin 		dump_stack();
1612104180aSNicholas Piggin 
162e012c499SNicholas Piggin 	/*
163e012c499SNicholas Piggin 	 * __wd_nmi_output must be set after we printk from NMI context.
164e012c499SNicholas Piggin 	 *
165e012c499SNicholas Piggin 	 * printk from NMI context defers printing to the console to irq_work.
166e012c499SNicholas Piggin 	 * If that NMI was taken in some code that is hard-locked, then irqs
167e012c499SNicholas Piggin 	 * are disabled so irq_work will never fire. That can result in the
168e012c499SNicholas Piggin 	 * hard lockup messages being delayed (indefinitely, until something
169e012c499SNicholas Piggin 	 * else kicks the console drivers).
170e012c499SNicholas Piggin 	 *
171e012c499SNicholas Piggin 	 * Setting __wd_nmi_output will cause another CPU to notice and kick
172e012c499SNicholas Piggin 	 * the console drivers for us.
173e012c499SNicholas Piggin 	 *
174e012c499SNicholas Piggin 	 * xchg is not needed here (it could be a smp_mb and store), but xchg
175e012c499SNicholas Piggin 	 * gives the memory ordering and atomicity required.
176e012c499SNicholas Piggin 	 */
177e012c499SNicholas Piggin 	xchg(&__wd_nmi_output, 1);
178e012c499SNicholas Piggin 
179842dc1dbSNicholas Piggin 	/* Do not panic from here because that can recurse into NMI IPI layer */
1802104180aSNicholas Piggin }
1812104180aSNicholas Piggin 
set_cpu_stuck(int cpu)1821f01bf90SNicholas Piggin static bool set_cpu_stuck(int cpu)
1832104180aSNicholas Piggin {
184858c93c3SNicholas Piggin 	cpumask_set_cpu(cpu, &wd_smp_cpus_stuck);
185858c93c3SNicholas Piggin 	cpumask_clear_cpu(cpu, &wd_smp_cpus_pending);
1865dad4ba6SNicholas Piggin 	/*
1875dad4ba6SNicholas Piggin 	 * See wd_smp_clear_cpu_pending()
1885dad4ba6SNicholas Piggin 	 */
1895dad4ba6SNicholas Piggin 	smp_mb();
1902104180aSNicholas Piggin 	if (cpumask_empty(&wd_smp_cpus_pending)) {
1911f01bf90SNicholas Piggin 		wd_smp_last_reset_tb = get_tb();
1922104180aSNicholas Piggin 		cpumask_andnot(&wd_smp_cpus_pending,
1932104180aSNicholas Piggin 				&wd_cpus_enabled,
1942104180aSNicholas Piggin 				&wd_smp_cpus_stuck);
195858c93c3SNicholas Piggin 		return true;
1962104180aSNicholas Piggin 	}
197858c93c3SNicholas Piggin 	return false;
19887607a30SNicholas Piggin }
1992104180aSNicholas Piggin 
watchdog_smp_panic(int cpu)2001f01bf90SNicholas Piggin static void watchdog_smp_panic(int cpu)
2012104180aSNicholas Piggin {
20276521c4bSNicholas Piggin 	static cpumask_t wd_smp_cpus_ipi; // protected by reporting
2032104180aSNicholas Piggin 	unsigned long flags;
2043d030e30SNicholas Piggin 	u64 tb, last_reset;
2052104180aSNicholas Piggin 	int c;
2062104180aSNicholas Piggin 
2072104180aSNicholas Piggin 	wd_smp_lock(&flags);
2082104180aSNicholas Piggin 	/* Double check some things under lock */
2091f01bf90SNicholas Piggin 	tb = get_tb();
2103d030e30SNicholas Piggin 	last_reset = wd_smp_last_reset_tb;
2113d030e30SNicholas Piggin 	if ((s64)(tb - last_reset) < (s64)wd_smp_panic_timeout_tb)
2122104180aSNicholas Piggin 		goto out;
2132104180aSNicholas Piggin 	if (cpumask_test_cpu(cpu, &wd_smp_cpus_pending))
2142104180aSNicholas Piggin 		goto out;
21576521c4bSNicholas Piggin 	if (!wd_try_report())
2162104180aSNicholas Piggin 		goto out;
21776521c4bSNicholas Piggin 	for_each_online_cpu(c) {
21876521c4bSNicholas Piggin 		if (!cpumask_test_cpu(c, &wd_smp_cpus_pending))
21976521c4bSNicholas Piggin 			continue;
22076521c4bSNicholas Piggin 		if (c == cpu)
22176521c4bSNicholas Piggin 			continue; // should not happen
22276521c4bSNicholas Piggin 
22376521c4bSNicholas Piggin 		__cpumask_set_cpu(c, &wd_smp_cpus_ipi);
2241f01bf90SNicholas Piggin 		if (set_cpu_stuck(c))
22576521c4bSNicholas Piggin 			break;
22676521c4bSNicholas Piggin 	}
22776521c4bSNicholas Piggin 	if (cpumask_empty(&wd_smp_cpus_ipi)) {
22876521c4bSNicholas Piggin 		wd_end_reporting();
22976521c4bSNicholas Piggin 		goto out;
23076521c4bSNicholas Piggin 	}
23176521c4bSNicholas Piggin 	wd_smp_unlock(&flags);
2322104180aSNicholas Piggin 
233d8fa82e0SMichael Ellerman 	pr_emerg("CPU %d detected hard LOCKUP on other CPUs %*pbl\n",
23476521c4bSNicholas Piggin 		 cpu, cpumask_pr_args(&wd_smp_cpus_ipi));
2354e49226eSNicholas Piggin 	pr_emerg("CPU %d TB:%lld, last SMP heartbeat TB:%lld (%lldms ago)\n",
2363d030e30SNicholas Piggin 		 cpu, tb, last_reset, tb_to_ns(tb - last_reset) / 1000000);
2372104180aSNicholas Piggin 
238d58fdd9dSNicholas Piggin 	if (!sysctl_hardlockup_all_cpu_backtrace) {
2392104180aSNicholas Piggin 		/*
240d58fdd9dSNicholas Piggin 		 * Try to trigger the stuck CPUs, unless we are going to
241d58fdd9dSNicholas Piggin 		 * get a backtrace on all of them anyway.
2422104180aSNicholas Piggin 		 */
24376521c4bSNicholas Piggin 		for_each_cpu(c, &wd_smp_cpus_ipi) {
244858c93c3SNicholas Piggin 			smp_send_nmi_ipi(c, wd_lockup_ipi, 1000000);
24576521c4bSNicholas Piggin 			__cpumask_clear_cpu(c, &wd_smp_cpus_ipi);
246858c93c3SNicholas Piggin 		}
24776521c4bSNicholas Piggin 	} else {
248*8d539b84SDouglas Anderson 		trigger_allbutcpu_cpu_backtrace(cpu);
24976521c4bSNicholas Piggin 		cpumask_clear(&wd_smp_cpus_ipi);
25076521c4bSNicholas Piggin 	}
2512104180aSNicholas Piggin 
2522104180aSNicholas Piggin 	if (hardlockup_panic)
2532104180aSNicholas Piggin 		nmi_panic(NULL, "Hard LOCKUP");
2548e236921SNicholas Piggin 
25576521c4bSNicholas Piggin 	wd_end_reporting();
25676521c4bSNicholas Piggin 
2578e236921SNicholas Piggin 	return;
2588e236921SNicholas Piggin 
2598e236921SNicholas Piggin out:
2608e236921SNicholas Piggin 	wd_smp_unlock(&flags);
2612104180aSNicholas Piggin }
2622104180aSNicholas Piggin 
wd_smp_clear_cpu_pending(int cpu)2631f01bf90SNicholas Piggin static void wd_smp_clear_cpu_pending(int cpu)
2642104180aSNicholas Piggin {
2652104180aSNicholas Piggin 	if (!cpumask_test_cpu(cpu, &wd_smp_cpus_pending)) {
2662104180aSNicholas Piggin 		if (unlikely(cpumask_test_cpu(cpu, &wd_smp_cpus_stuck))) {
2674e49226eSNicholas Piggin 			struct pt_regs *regs = get_irq_regs();
2682104180aSNicholas Piggin 			unsigned long flags;
2692104180aSNicholas Piggin 
2704e49226eSNicholas Piggin 			pr_emerg("CPU %d became unstuck TB:%lld\n",
2711f01bf90SNicholas Piggin 				 cpu, get_tb());
2724e49226eSNicholas Piggin 			print_irqtrace_events(current);
2734e49226eSNicholas Piggin 			if (regs)
2744e49226eSNicholas Piggin 				show_regs(regs);
2754e49226eSNicholas Piggin 			else
2764e49226eSNicholas Piggin 				dump_stack();
2774e49226eSNicholas Piggin 
27876521c4bSNicholas Piggin 			wd_smp_lock(&flags);
2792104180aSNicholas Piggin 			cpumask_clear_cpu(cpu, &wd_smp_cpus_stuck);
2802104180aSNicholas Piggin 			wd_smp_unlock(&flags);
2815dad4ba6SNicholas Piggin 		} else {
2825dad4ba6SNicholas Piggin 			/*
2835dad4ba6SNicholas Piggin 			 * The last CPU to clear pending should have reset the
2845dad4ba6SNicholas Piggin 			 * watchdog so we generally should not find it empty
2855dad4ba6SNicholas Piggin 			 * here if our CPU was clear. However it could happen
2865dad4ba6SNicholas Piggin 			 * due to a rare race with another CPU taking the
2875dad4ba6SNicholas Piggin 			 * last CPU out of the mask concurrently.
2885dad4ba6SNicholas Piggin 			 *
2895dad4ba6SNicholas Piggin 			 * We can't add a warning for it. But just in case
2905dad4ba6SNicholas Piggin 			 * there is a problem with the watchdog that is causing
2915dad4ba6SNicholas Piggin 			 * the mask to not be reset, try to kick it along here.
2925dad4ba6SNicholas Piggin 			 */
2935dad4ba6SNicholas Piggin 			if (unlikely(cpumask_empty(&wd_smp_cpus_pending)))
2945dad4ba6SNicholas Piggin 				goto none_pending;
2952104180aSNicholas Piggin 		}
2962104180aSNicholas Piggin 		return;
2972104180aSNicholas Piggin 	}
2985dad4ba6SNicholas Piggin 
299858c93c3SNicholas Piggin 	/*
300858c93c3SNicholas Piggin 	 * All other updates to wd_smp_cpus_pending are performed under
301858c93c3SNicholas Piggin 	 * wd_smp_lock. All of them are atomic except the case where the
302858c93c3SNicholas Piggin 	 * mask becomes empty and is reset. This will not happen here because
303858c93c3SNicholas Piggin 	 * cpu was tested to be in the bitmap (above), and a CPU only clears
304858c93c3SNicholas Piggin 	 * its own bit. _Except_ in the case where another CPU has detected a
305858c93c3SNicholas Piggin 	 * hard lockup on our CPU and takes us out of the pending mask. So in
306858c93c3SNicholas Piggin 	 * normal operation there will be no race here, no problem.
307858c93c3SNicholas Piggin 	 *
308858c93c3SNicholas Piggin 	 * In the lockup case, this atomic clear-bit vs a store that refills
309858c93c3SNicholas Piggin 	 * other bits in the accessed word wll not be a problem. The bit clear
310858c93c3SNicholas Piggin 	 * is atomic so it will not cause the store to get lost, and the store
311858c93c3SNicholas Piggin 	 * will never set this bit so it will not overwrite the bit clear. The
312858c93c3SNicholas Piggin 	 * only way for a stuck CPU to return to the pending bitmap is to
313858c93c3SNicholas Piggin 	 * become unstuck itself.
314858c93c3SNicholas Piggin 	 */
3152104180aSNicholas Piggin 	cpumask_clear_cpu(cpu, &wd_smp_cpus_pending);
3165dad4ba6SNicholas Piggin 
3175dad4ba6SNicholas Piggin 	/*
3185dad4ba6SNicholas Piggin 	 * Order the store to clear pending with the load(s) to check all
3195dad4ba6SNicholas Piggin 	 * words in the pending mask to check they are all empty. This orders
3205dad4ba6SNicholas Piggin 	 * with the same barrier on another CPU. This prevents two CPUs
3215dad4ba6SNicholas Piggin 	 * clearing the last 2 pending bits, but neither seeing the other's
3225dad4ba6SNicholas Piggin 	 * store when checking if the mask is empty, and missing an empty
3235dad4ba6SNicholas Piggin 	 * mask, which ends with a false positive.
3245dad4ba6SNicholas Piggin 	 */
3255dad4ba6SNicholas Piggin 	smp_mb();
3262104180aSNicholas Piggin 	if (cpumask_empty(&wd_smp_cpus_pending)) {
3272104180aSNicholas Piggin 		unsigned long flags;
3282104180aSNicholas Piggin 
3295dad4ba6SNicholas Piggin none_pending:
3305dad4ba6SNicholas Piggin 		/*
3315dad4ba6SNicholas Piggin 		 * Double check under lock because more than one CPU could see
3325dad4ba6SNicholas Piggin 		 * a clear mask with the lockless check after clearing their
3335dad4ba6SNicholas Piggin 		 * pending bits.
3345dad4ba6SNicholas Piggin 		 */
3352104180aSNicholas Piggin 		wd_smp_lock(&flags);
3362104180aSNicholas Piggin 		if (cpumask_empty(&wd_smp_cpus_pending)) {
3371f01bf90SNicholas Piggin 			wd_smp_last_reset_tb = get_tb();
3382104180aSNicholas Piggin 			cpumask_andnot(&wd_smp_cpus_pending,
3392104180aSNicholas Piggin 					&wd_cpus_enabled,
3402104180aSNicholas Piggin 					&wd_smp_cpus_stuck);
3412104180aSNicholas Piggin 		}
3422104180aSNicholas Piggin 		wd_smp_unlock(&flags);
3432104180aSNicholas Piggin 	}
3442104180aSNicholas Piggin }
3452104180aSNicholas Piggin 
watchdog_timer_interrupt(int cpu)3462104180aSNicholas Piggin static void watchdog_timer_interrupt(int cpu)
3472104180aSNicholas Piggin {
3482104180aSNicholas Piggin 	u64 tb = get_tb();
3492104180aSNicholas Piggin 
3502104180aSNicholas Piggin 	per_cpu(wd_timer_tb, cpu) = tb;
3512104180aSNicholas Piggin 
3521f01bf90SNicholas Piggin 	wd_smp_clear_cpu_pending(cpu);
3532104180aSNicholas Piggin 
3542104180aSNicholas Piggin 	if ((s64)(tb - wd_smp_last_reset_tb) >= (s64)wd_smp_panic_timeout_tb)
3551f01bf90SNicholas Piggin 		watchdog_smp_panic(cpu);
356e012c499SNicholas Piggin 
357e012c499SNicholas Piggin 	if (__wd_nmi_output && xchg(&__wd_nmi_output, 0)) {
358e012c499SNicholas Piggin 		/*
359e012c499SNicholas Piggin 		 * Something has called printk from NMI context. It might be
3602b461880SMichael Ellerman 		 * stuck, so this triggers a flush that will get that
361e012c499SNicholas Piggin 		 * printk output to the console.
362e012c499SNicholas Piggin 		 *
363e012c499SNicholas Piggin 		 * See wd_lockup_ipi.
364e012c499SNicholas Piggin 		 */
365e012c499SNicholas Piggin 		printk_trigger_flush();
366e012c499SNicholas Piggin 	}
3672104180aSNicholas Piggin }
3682104180aSNicholas Piggin 
DEFINE_INTERRUPT_HANDLER_NMI(soft_nmi_interrupt)3693a96570fSNicholas Piggin DEFINE_INTERRUPT_HANDLER_NMI(soft_nmi_interrupt)
3702104180aSNicholas Piggin {
3712104180aSNicholas Piggin 	unsigned long flags;
3722104180aSNicholas Piggin 	int cpu = raw_smp_processor_id();
3732104180aSNicholas Piggin 	u64 tb;
3742104180aSNicholas Piggin 
375118178e6SNicholas Piggin 	/* should only arrive from kernel, with irqs disabled */
376118178e6SNicholas Piggin 	WARN_ON_ONCE(!arch_irq_disabled_regs(regs));
377118178e6SNicholas Piggin 
3782104180aSNicholas Piggin 	if (!cpumask_test_cpu(cpu, &wd_cpus_enabled))
3793a96570fSNicholas Piggin 		return 0;
3802104180aSNicholas Piggin 
38104019bf8SNicholas Piggin 	__this_cpu_inc(irq_stat.soft_nmi_irqs);
38204019bf8SNicholas Piggin 
3832104180aSNicholas Piggin 	tb = get_tb();
3842104180aSNicholas Piggin 	if (tb - per_cpu(wd_timer_tb, cpu) >= wd_panic_timeout_tb) {
38576521c4bSNicholas Piggin 		/*
38676521c4bSNicholas Piggin 		 * Taking wd_smp_lock here means it is a soft-NMI lock, which
38776521c4bSNicholas Piggin 		 * means we can't take any regular or irqsafe spin locks while
38876521c4bSNicholas Piggin 		 * holding this lock. This is why timers can't printk while
38976521c4bSNicholas Piggin 		 * holding the lock.
39076521c4bSNicholas Piggin 		 */
3912104180aSNicholas Piggin 		wd_smp_lock(&flags);
3922104180aSNicholas Piggin 		if (cpumask_test_cpu(cpu, &wd_smp_cpus_stuck)) {
3932104180aSNicholas Piggin 			wd_smp_unlock(&flags);
394118178e6SNicholas Piggin 			return 0;
3952104180aSNicholas Piggin 		}
39676521c4bSNicholas Piggin 		if (!wd_try_report()) {
39776521c4bSNicholas Piggin 			wd_smp_unlock(&flags);
39876521c4bSNicholas Piggin 			/* Couldn't report, try again in 100ms */
39976521c4bSNicholas Piggin 			mtspr(SPRN_DEC, 100 * tb_ticks_per_usec * 1000);
40076521c4bSNicholas Piggin 			return 0;
40176521c4bSNicholas Piggin 		}
40276521c4bSNicholas Piggin 
4031f01bf90SNicholas Piggin 		set_cpu_stuck(cpu);
4042104180aSNicholas Piggin 
40576521c4bSNicholas Piggin 		wd_smp_unlock(&flags);
40676521c4bSNicholas Piggin 
4074e49226eSNicholas Piggin 		pr_emerg("CPU %d self-detected hard LOCKUP @ %pS\n",
4084e49226eSNicholas Piggin 			 cpu, (void *)regs->nip);
4094e49226eSNicholas Piggin 		pr_emerg("CPU %d TB:%lld, last heartbeat TB:%lld (%lldms ago)\n",
4104e49226eSNicholas Piggin 			 cpu, tb, per_cpu(wd_timer_tb, cpu),
4114e49226eSNicholas Piggin 			 tb_to_ns(tb - per_cpu(wd_timer_tb, cpu)) / 1000000);
4122104180aSNicholas Piggin 		print_modules();
4132104180aSNicholas Piggin 		print_irqtrace_events(current);
4142104180aSNicholas Piggin 		show_regs(regs);
4152104180aSNicholas Piggin 
416e012c499SNicholas Piggin 		xchg(&__wd_nmi_output, 1); // see wd_lockup_ipi
417e012c499SNicholas Piggin 
4182104180aSNicholas Piggin 		if (sysctl_hardlockup_all_cpu_backtrace)
419*8d539b84SDouglas Anderson 			trigger_allbutcpu_cpu_backtrace(cpu);
4202104180aSNicholas Piggin 
4212104180aSNicholas Piggin 		if (hardlockup_panic)
4222104180aSNicholas Piggin 			nmi_panic(regs, "Hard LOCKUP");
42376521c4bSNicholas Piggin 
42476521c4bSNicholas Piggin 		wd_end_reporting();
4252104180aSNicholas Piggin 	}
42676521c4bSNicholas Piggin 	/*
42776521c4bSNicholas Piggin 	 * We are okay to change DEC in soft_nmi_interrupt because the masked
42876521c4bSNicholas Piggin 	 * handler has marked a DEC as pending, so the timer interrupt will be
42976521c4bSNicholas Piggin 	 * replayed as soon as local irqs are enabled again.
43076521c4bSNicholas Piggin 	 */
4312104180aSNicholas Piggin 	if (wd_panic_timeout_tb < 0x7fffffff)
4322104180aSNicholas Piggin 		mtspr(SPRN_DEC, wd_panic_timeout_tb);
4332104180aSNicholas Piggin 
4343a96570fSNicholas Piggin 	return 0;
4352104180aSNicholas Piggin }
4362104180aSNicholas Piggin 
watchdog_timer_fn(struct hrtimer * hrtimer)4377ae3f6e1SNicholas Piggin static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer)
4382104180aSNicholas Piggin {
4392104180aSNicholas Piggin 	int cpu = smp_processor_id();
4402104180aSNicholas Piggin 
441df95d308SDouglas Anderson 	if (!(watchdog_enabled & WATCHDOG_HARDLOCKUP_ENABLED))
4427ae3f6e1SNicholas Piggin 		return HRTIMER_NORESTART;
4437ae3f6e1SNicholas Piggin 
4447ae3f6e1SNicholas Piggin 	if (!cpumask_test_cpu(cpu, &watchdog_cpumask))
4457ae3f6e1SNicholas Piggin 		return HRTIMER_NORESTART;
4467ae3f6e1SNicholas Piggin 
4472104180aSNicholas Piggin 	watchdog_timer_interrupt(cpu);
4482104180aSNicholas Piggin 
4497ae3f6e1SNicholas Piggin 	hrtimer_forward_now(hrtimer, ms_to_ktime(wd_timer_period_ms));
4507ae3f6e1SNicholas Piggin 
4517ae3f6e1SNicholas Piggin 	return HRTIMER_RESTART;
4522104180aSNicholas Piggin }
4532104180aSNicholas Piggin 
arch_touch_nmi_watchdog(void)4542104180aSNicholas Piggin void arch_touch_nmi_watchdog(void)
4552104180aSNicholas Piggin {
45626c5c6e1SNicholas Piggin 	unsigned long ticks = tb_ticks_per_usec * wd_timer_period_ms * 1000;
4572104180aSNicholas Piggin 	int cpu = smp_processor_id();
4585dad4ba6SNicholas Piggin 	u64 tb;
4592104180aSNicholas Piggin 
4605dad4ba6SNicholas Piggin 	if (!cpumask_test_cpu(cpu, &watchdog_cpumask))
4615dad4ba6SNicholas Piggin 		return;
4625dad4ba6SNicholas Piggin 
4635dad4ba6SNicholas Piggin 	tb = get_tb();
46480e4d70bSNicholas Piggin 	if (tb - per_cpu(wd_timer_tb, cpu) >= ticks) {
46580e4d70bSNicholas Piggin 		per_cpu(wd_timer_tb, cpu) = tb;
4661f01bf90SNicholas Piggin 		wd_smp_clear_cpu_pending(cpu);
46780e4d70bSNicholas Piggin 	}
4682104180aSNicholas Piggin }
4692104180aSNicholas Piggin EXPORT_SYMBOL(arch_touch_nmi_watchdog);
4702104180aSNicholas Piggin 
start_watchdog(void * arg)4717ae3f6e1SNicholas Piggin static void start_watchdog(void *arg)
4722104180aSNicholas Piggin {
4737ae3f6e1SNicholas Piggin 	struct hrtimer *hrtimer = this_cpu_ptr(&wd_hrtimer);
4747ae3f6e1SNicholas Piggin 	int cpu = smp_processor_id();
47596ea91e7SNicholas Piggin 	unsigned long flags;
47696ea91e7SNicholas Piggin 
4772104180aSNicholas Piggin 	if (cpumask_test_cpu(cpu, &wd_cpus_enabled)) {
4782104180aSNicholas Piggin 		WARN_ON(1);
4797ae3f6e1SNicholas Piggin 		return;
4802104180aSNicholas Piggin 	}
4812104180aSNicholas Piggin 
482df95d308SDouglas Anderson 	if (!(watchdog_enabled & WATCHDOG_HARDLOCKUP_ENABLED))
4837ae3f6e1SNicholas Piggin 		return;
4842104180aSNicholas Piggin 
4852104180aSNicholas Piggin 	if (!cpumask_test_cpu(cpu, &watchdog_cpumask))
4867ae3f6e1SNicholas Piggin 		return;
4872104180aSNicholas Piggin 
48896ea91e7SNicholas Piggin 	wd_smp_lock(&flags);
4892104180aSNicholas Piggin 	cpumask_set_cpu(cpu, &wd_cpus_enabled);
4902104180aSNicholas Piggin 	if (cpumask_weight(&wd_cpus_enabled) == 1) {
4912104180aSNicholas Piggin 		cpumask_set_cpu(cpu, &wd_smp_cpus_pending);
4922104180aSNicholas Piggin 		wd_smp_last_reset_tb = get_tb();
4932104180aSNicholas Piggin 	}
49496ea91e7SNicholas Piggin 	wd_smp_unlock(&flags);
49596ea91e7SNicholas Piggin 
4967ae3f6e1SNicholas Piggin 	*this_cpu_ptr(&wd_timer_tb) = get_tb();
4972104180aSNicholas Piggin 
4987ae3f6e1SNicholas Piggin 	hrtimer_init(hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
4997ae3f6e1SNicholas Piggin 	hrtimer->function = watchdog_timer_fn;
5007ae3f6e1SNicholas Piggin 	hrtimer_start(hrtimer, ms_to_ktime(wd_timer_period_ms),
5017ae3f6e1SNicholas Piggin 		      HRTIMER_MODE_REL_PINNED);
5022104180aSNicholas Piggin }
5032104180aSNicholas Piggin 
start_watchdog_on_cpu(unsigned int cpu)5047ae3f6e1SNicholas Piggin static int start_watchdog_on_cpu(unsigned int cpu)
5052104180aSNicholas Piggin {
5067ae3f6e1SNicholas Piggin 	return smp_call_function_single(cpu, start_watchdog, NULL, true);
5077ae3f6e1SNicholas Piggin }
5087ae3f6e1SNicholas Piggin 
stop_watchdog(void * arg)5097ae3f6e1SNicholas Piggin static void stop_watchdog(void *arg)
5107ae3f6e1SNicholas Piggin {
5117ae3f6e1SNicholas Piggin 	struct hrtimer *hrtimer = this_cpu_ptr(&wd_hrtimer);
5127ae3f6e1SNicholas Piggin 	int cpu = smp_processor_id();
51396ea91e7SNicholas Piggin 	unsigned long flags;
51496ea91e7SNicholas Piggin 
5152104180aSNicholas Piggin 	if (!cpumask_test_cpu(cpu, &wd_cpus_enabled))
5167ae3f6e1SNicholas Piggin 		return; /* Can happen in CPU unplug case */
5172104180aSNicholas Piggin 
5187ae3f6e1SNicholas Piggin 	hrtimer_cancel(hrtimer);
5192104180aSNicholas Piggin 
52096ea91e7SNicholas Piggin 	wd_smp_lock(&flags);
5212104180aSNicholas Piggin 	cpumask_clear_cpu(cpu, &wd_cpus_enabled);
52296ea91e7SNicholas Piggin 	wd_smp_unlock(&flags);
52396ea91e7SNicholas Piggin 
5241f01bf90SNicholas Piggin 	wd_smp_clear_cpu_pending(cpu);
5257ae3f6e1SNicholas Piggin }
5262104180aSNicholas Piggin 
stop_watchdog_on_cpu(unsigned int cpu)5277ae3f6e1SNicholas Piggin static int stop_watchdog_on_cpu(unsigned int cpu)
5287ae3f6e1SNicholas Piggin {
5297ae3f6e1SNicholas Piggin 	return smp_call_function_single(cpu, stop_watchdog, NULL, true);
5302104180aSNicholas Piggin }
5312104180aSNicholas Piggin 
watchdog_calc_timeouts(void)5322104180aSNicholas Piggin static void watchdog_calc_timeouts(void)
5332104180aSNicholas Piggin {
534f5e74e83SLaurent Dufour 	u64 threshold = watchdog_thresh;
535f5e74e83SLaurent Dufour 
536f5e74e83SLaurent Dufour #ifdef CONFIG_PPC_PSERIES
537f5e74e83SLaurent Dufour 	threshold += (READ_ONCE(wd_timeout_pct) * threshold) / 100;
538f5e74e83SLaurent Dufour #endif
539f5e74e83SLaurent Dufour 
540f5e74e83SLaurent Dufour 	wd_panic_timeout_tb = threshold * ppc_tb_freq;
5412104180aSNicholas Piggin 
5422104180aSNicholas Piggin 	/* Have the SMP detector trigger a bit later */
5432104180aSNicholas Piggin 	wd_smp_panic_timeout_tb = wd_panic_timeout_tb * 3 / 2;
5442104180aSNicholas Piggin 
5452104180aSNicholas Piggin 	/* 2/5 is the factor that the perf based detector uses */
5462104180aSNicholas Piggin 	wd_timer_period_ms = watchdog_thresh * 1000 * 2 / 5;
5472104180aSNicholas Piggin }
5482104180aSNicholas Piggin 
watchdog_hardlockup_stop(void)549df95d308SDouglas Anderson void watchdog_hardlockup_stop(void)
5502104180aSNicholas Piggin {
5512104180aSNicholas Piggin 	int cpu;
5522104180aSNicholas Piggin 
5532104180aSNicholas Piggin 	for_each_cpu(cpu, &wd_cpus_enabled)
5547ae3f6e1SNicholas Piggin 		stop_watchdog_on_cpu(cpu);
5556b9dc480SThomas Gleixner }
5566b9dc480SThomas Gleixner 
watchdog_hardlockup_start(void)557df95d308SDouglas Anderson void watchdog_hardlockup_start(void)
5586b9dc480SThomas Gleixner {
5596b9dc480SThomas Gleixner 	int cpu;
5606b9dc480SThomas Gleixner 
5616592ad2fSThomas Gleixner 	watchdog_calc_timeouts();
5622104180aSNicholas Piggin 	for_each_cpu_and(cpu, cpu_online_mask, &watchdog_cpumask)
5637ae3f6e1SNicholas Piggin 		start_watchdog_on_cpu(cpu);
5646592ad2fSThomas Gleixner }
5652104180aSNicholas Piggin 
5662104180aSNicholas Piggin /*
56734ddaa3eSThomas Gleixner  * Invoked from core watchdog init.
5682104180aSNicholas Piggin  */
watchdog_hardlockup_probe(void)569df95d308SDouglas Anderson int __init watchdog_hardlockup_probe(void)
5702104180aSNicholas Piggin {
5712104180aSNicholas Piggin 	int err;
5722104180aSNicholas Piggin 
57334ddaa3eSThomas Gleixner 	err = cpuhp_setup_state_nocalls(CPUHP_AP_ONLINE_DYN,
57434ddaa3eSThomas Gleixner 					"powerpc/watchdog:online",
5757ae3f6e1SNicholas Piggin 					start_watchdog_on_cpu,
5767ae3f6e1SNicholas Piggin 					stop_watchdog_on_cpu);
57734ddaa3eSThomas Gleixner 	if (err < 0) {
578d8fa82e0SMichael Ellerman 		pr_warn("could not be initialized");
57934ddaa3eSThomas Gleixner 		return err;
58034ddaa3eSThomas Gleixner 	}
5812104180aSNicholas Piggin 	return 0;
5822104180aSNicholas Piggin }
583f5e74e83SLaurent Dufour 
584f5e74e83SLaurent Dufour #ifdef CONFIG_PPC_PSERIES
watchdog_hardlockup_set_timeout_pct(u64 pct)585df95d308SDouglas Anderson void watchdog_hardlockup_set_timeout_pct(u64 pct)
586f5e74e83SLaurent Dufour {
587f5e74e83SLaurent Dufour 	pr_info("Set the NMI watchdog timeout factor to %llu%%\n", pct);
588f5e74e83SLaurent Dufour 	WRITE_ONCE(wd_timeout_pct, pct);
589f5e74e83SLaurent Dufour 	lockup_detector_reconfigure();
590f5e74e83SLaurent Dufour }
591f5e74e83SLaurent Dufour #endif
592