1b2441318SGreg Kroah-Hartman // SPDX-License-Identifier: GPL-2.0
22104180aSNicholas Piggin /*
32104180aSNicholas Piggin * Watchdog support on powerpc systems.
42104180aSNicholas Piggin *
52104180aSNicholas Piggin * Copyright 2017, IBM Corporation.
62104180aSNicholas Piggin *
72104180aSNicholas Piggin * This uses code from arch/sparc/kernel/nmi.c and kernel/watchdog.c
82104180aSNicholas Piggin */
9d8fa82e0SMichael Ellerman
10d8fa82e0SMichael Ellerman #define pr_fmt(fmt) "watchdog: " fmt
11d8fa82e0SMichael Ellerman
122104180aSNicholas Piggin #include <linux/kernel.h>
132104180aSNicholas Piggin #include <linux/param.h>
142104180aSNicholas Piggin #include <linux/init.h>
152104180aSNicholas Piggin #include <linux/percpu.h>
162104180aSNicholas Piggin #include <linux/cpu.h>
172104180aSNicholas Piggin #include <linux/nmi.h>
182104180aSNicholas Piggin #include <linux/module.h>
192104180aSNicholas Piggin #include <linux/export.h>
202104180aSNicholas Piggin #include <linux/kprobes.h>
212104180aSNicholas Piggin #include <linux/hardirq.h>
222104180aSNicholas Piggin #include <linux/reboot.h>
232104180aSNicholas Piggin #include <linux/slab.h>
242104180aSNicholas Piggin #include <linux/kdebug.h>
252104180aSNicholas Piggin #include <linux/sched/debug.h>
262104180aSNicholas Piggin #include <linux/delay.h>
272400c13cSSudeep Holla #include <linux/processor.h>
282104180aSNicholas Piggin #include <linux/smp.h>
292104180aSNicholas Piggin
303a96570fSNicholas Piggin #include <asm/interrupt.h>
312104180aSNicholas Piggin #include <asm/paca.h>
329ae440fbSCédric Le Goater #include <asm/nmi.h>
332104180aSNicholas Piggin
342104180aSNicholas Piggin /*
35723b1133SNicholas Piggin * The powerpc watchdog ensures that each CPU is able to service timers.
36723b1133SNicholas Piggin * The watchdog sets up a simple timer on each CPU to run once per timer
37723b1133SNicholas Piggin * period, and updates a per-cpu timestamp and a "pending" cpumask. This is
38723b1133SNicholas Piggin * the heartbeat.
392104180aSNicholas Piggin *
40723b1133SNicholas Piggin * Then there are two systems to check that the heartbeat is still running.
41723b1133SNicholas Piggin * The local soft-NMI, and the SMP checker.
422104180aSNicholas Piggin *
43723b1133SNicholas Piggin * The soft-NMI checker can detect lockups on the local CPU. When interrupts
44723b1133SNicholas Piggin * are disabled with local_irq_disable(), platforms that use soft-masking
45723b1133SNicholas Piggin * can leave hardware interrupts enabled and handle them with a masked
46723b1133SNicholas Piggin * interrupt handler. The masked handler can send the timer interrupt to the
47723b1133SNicholas Piggin * watchdog's soft_nmi_interrupt(), which appears to Linux as an NMI
48723b1133SNicholas Piggin * interrupt, and can be used to detect CPUs stuck with IRQs disabled.
49723b1133SNicholas Piggin *
50723b1133SNicholas Piggin * The soft-NMI checker will compare the heartbeat timestamp for this CPU
51723b1133SNicholas Piggin * with the current time, and take action if the difference exceeds the
52723b1133SNicholas Piggin * watchdog threshold.
53723b1133SNicholas Piggin *
54723b1133SNicholas Piggin * The limitation of the soft-NMI watchdog is that it does not work when
55723b1133SNicholas Piggin * interrupts are hard disabled or otherwise not being serviced. This is
56723b1133SNicholas Piggin * solved by also having a SMP watchdog where all CPUs check all other
57723b1133SNicholas Piggin * CPUs heartbeat.
58723b1133SNicholas Piggin *
591fd02f66SJulia Lawall * The SMP checker can detect lockups on other CPUs. A global "pending"
60723b1133SNicholas Piggin * cpumask is kept, containing all CPUs which enable the watchdog. Each
61723b1133SNicholas Piggin * CPU clears their pending bit in their heartbeat timer. When the bitmask
62723b1133SNicholas Piggin * becomes empty, the last CPU to clear its pending bit updates a global
63723b1133SNicholas Piggin * timestamp and refills the pending bitmask.
64723b1133SNicholas Piggin *
65723b1133SNicholas Piggin * In the heartbeat timer, if any CPU notices that the global timestamp has
66723b1133SNicholas Piggin * not been updated for a period exceeding the watchdog threshold, then it
67723b1133SNicholas Piggin * means the CPU(s) with their bit still set in the pending mask have had
68723b1133SNicholas Piggin * their heartbeat stop, and action is taken.
69723b1133SNicholas Piggin *
707c18659dSWolfram Sang * Some platforms implement true NMI IPIs, which can be used by the SMP
71723b1133SNicholas Piggin * watchdog to detect an unresponsive CPU and pull it out of its stuck
72723b1133SNicholas Piggin * state with the NMI IPI, to get crash/debug data from it. This way the
73723b1133SNicholas Piggin * SMP watchdog can detect hardware interrupts off lockups.
742104180aSNicholas Piggin */
752104180aSNicholas Piggin
762104180aSNicholas Piggin static cpumask_t wd_cpus_enabled __read_mostly;
772104180aSNicholas Piggin
782104180aSNicholas Piggin static u64 wd_panic_timeout_tb __read_mostly; /* timebase ticks until panic */
792104180aSNicholas Piggin static u64 wd_smp_panic_timeout_tb __read_mostly; /* panic other CPUs */
802104180aSNicholas Piggin
812104180aSNicholas Piggin static u64 wd_timer_period_ms __read_mostly; /* interval between heartbeat */
822104180aSNicholas Piggin
837ae3f6e1SNicholas Piggin static DEFINE_PER_CPU(struct hrtimer, wd_hrtimer);
842104180aSNicholas Piggin static DEFINE_PER_CPU(u64, wd_timer_tb);
852104180aSNicholas Piggin
86723b1133SNicholas Piggin /* SMP checker bits */
872104180aSNicholas Piggin static unsigned long __wd_smp_lock;
8876521c4bSNicholas Piggin static unsigned long __wd_reporting;
89e012c499SNicholas Piggin static unsigned long __wd_nmi_output;
902104180aSNicholas Piggin static cpumask_t wd_smp_cpus_pending;
912104180aSNicholas Piggin static cpumask_t wd_smp_cpus_stuck;
922104180aSNicholas Piggin static u64 wd_smp_last_reset_tb;
932104180aSNicholas Piggin
94f5e74e83SLaurent Dufour #ifdef CONFIG_PPC_PSERIES
95f5e74e83SLaurent Dufour static u64 wd_timeout_pct;
96f5e74e83SLaurent Dufour #endif
97f5e74e83SLaurent Dufour
9876521c4bSNicholas Piggin /*
9976521c4bSNicholas Piggin * Try to take the exclusive watchdog action / NMI IPI / printing lock.
10076521c4bSNicholas Piggin * wd_smp_lock must be held. If this fails, we should return and wait
10176521c4bSNicholas Piggin * for the watchdog to kick in again (or another CPU to trigger it).
10276521c4bSNicholas Piggin *
10376521c4bSNicholas Piggin * Importantly, if hardlockup_panic is set, wd_try_report failure should
10476521c4bSNicholas Piggin * not delay the panic, because whichever other CPU is reporting will
10576521c4bSNicholas Piggin * call panic.
10676521c4bSNicholas Piggin */
wd_try_report(void)10776521c4bSNicholas Piggin static bool wd_try_report(void)
10876521c4bSNicholas Piggin {
10976521c4bSNicholas Piggin if (__wd_reporting)
11076521c4bSNicholas Piggin return false;
11176521c4bSNicholas Piggin __wd_reporting = 1;
11276521c4bSNicholas Piggin return true;
11376521c4bSNicholas Piggin }
11476521c4bSNicholas Piggin
11576521c4bSNicholas Piggin /* End printing after successful wd_try_report. wd_smp_lock not required. */
wd_end_reporting(void)11676521c4bSNicholas Piggin static void wd_end_reporting(void)
11776521c4bSNicholas Piggin {
11876521c4bSNicholas Piggin smp_mb(); /* End printing "critical section" */
11976521c4bSNicholas Piggin WARN_ON_ONCE(__wd_reporting == 0);
12076521c4bSNicholas Piggin WRITE_ONCE(__wd_reporting, 0);
12176521c4bSNicholas Piggin }
12276521c4bSNicholas Piggin
wd_smp_lock(unsigned long * flags)1232104180aSNicholas Piggin static inline void wd_smp_lock(unsigned long *flags)
1242104180aSNicholas Piggin {
1252104180aSNicholas Piggin /*
1262104180aSNicholas Piggin * Avoid locking layers if possible.
1272104180aSNicholas Piggin * This may be called from low level interrupt handlers at some
1282104180aSNicholas Piggin * point in future.
1292104180aSNicholas Piggin */
130d8e2a405SNicholas Piggin raw_local_irq_save(*flags);
131d8e2a405SNicholas Piggin hard_irq_disable(); /* Make it soft-NMI safe */
132d8e2a405SNicholas Piggin while (unlikely(test_and_set_bit_lock(0, &__wd_smp_lock))) {
133d8e2a405SNicholas Piggin raw_local_irq_restore(*flags);
134d8e2a405SNicholas Piggin spin_until_cond(!test_bit(0, &__wd_smp_lock));
135d8e2a405SNicholas Piggin raw_local_irq_save(*flags);
136d8e2a405SNicholas Piggin hard_irq_disable();
137d8e2a405SNicholas Piggin }
1382104180aSNicholas Piggin }
1392104180aSNicholas Piggin
wd_smp_unlock(unsigned long * flags)1402104180aSNicholas Piggin static inline void wd_smp_unlock(unsigned long *flags)
1412104180aSNicholas Piggin {
1422104180aSNicholas Piggin clear_bit_unlock(0, &__wd_smp_lock);
143d8e2a405SNicholas Piggin raw_local_irq_restore(*flags);
1442104180aSNicholas Piggin }
1452104180aSNicholas Piggin
wd_lockup_ipi(struct pt_regs * regs)1462104180aSNicholas Piggin static void wd_lockup_ipi(struct pt_regs *regs)
1472104180aSNicholas Piggin {
1484e49226eSNicholas Piggin int cpu = raw_smp_processor_id();
1494e49226eSNicholas Piggin u64 tb = get_tb();
1504e49226eSNicholas Piggin
1514e49226eSNicholas Piggin pr_emerg("CPU %d Hard LOCKUP\n", cpu);
1524e49226eSNicholas Piggin pr_emerg("CPU %d TB:%lld, last heartbeat TB:%lld (%lldms ago)\n",
1534e49226eSNicholas Piggin cpu, tb, per_cpu(wd_timer_tb, cpu),
1544e49226eSNicholas Piggin tb_to_ns(tb - per_cpu(wd_timer_tb, cpu)) / 1000000);
1552104180aSNicholas Piggin print_modules();
1562104180aSNicholas Piggin print_irqtrace_events(current);
1572104180aSNicholas Piggin if (regs)
1582104180aSNicholas Piggin show_regs(regs);
1592104180aSNicholas Piggin else
1602104180aSNicholas Piggin dump_stack();
1612104180aSNicholas Piggin
162e012c499SNicholas Piggin /*
163e012c499SNicholas Piggin * __wd_nmi_output must be set after we printk from NMI context.
164e012c499SNicholas Piggin *
165e012c499SNicholas Piggin * printk from NMI context defers printing to the console to irq_work.
166e012c499SNicholas Piggin * If that NMI was taken in some code that is hard-locked, then irqs
167e012c499SNicholas Piggin * are disabled so irq_work will never fire. That can result in the
168e012c499SNicholas Piggin * hard lockup messages being delayed (indefinitely, until something
169e012c499SNicholas Piggin * else kicks the console drivers).
170e012c499SNicholas Piggin *
171e012c499SNicholas Piggin * Setting __wd_nmi_output will cause another CPU to notice and kick
172e012c499SNicholas Piggin * the console drivers for us.
173e012c499SNicholas Piggin *
174e012c499SNicholas Piggin * xchg is not needed here (it could be a smp_mb and store), but xchg
175e012c499SNicholas Piggin * gives the memory ordering and atomicity required.
176e012c499SNicholas Piggin */
177e012c499SNicholas Piggin xchg(&__wd_nmi_output, 1);
178e012c499SNicholas Piggin
179842dc1dbSNicholas Piggin /* Do not panic from here because that can recurse into NMI IPI layer */
1802104180aSNicholas Piggin }
1812104180aSNicholas Piggin
set_cpu_stuck(int cpu)1821f01bf90SNicholas Piggin static bool set_cpu_stuck(int cpu)
1832104180aSNicholas Piggin {
184858c93c3SNicholas Piggin cpumask_set_cpu(cpu, &wd_smp_cpus_stuck);
185858c93c3SNicholas Piggin cpumask_clear_cpu(cpu, &wd_smp_cpus_pending);
1865dad4ba6SNicholas Piggin /*
1875dad4ba6SNicholas Piggin * See wd_smp_clear_cpu_pending()
1885dad4ba6SNicholas Piggin */
1895dad4ba6SNicholas Piggin smp_mb();
1902104180aSNicholas Piggin if (cpumask_empty(&wd_smp_cpus_pending)) {
1911f01bf90SNicholas Piggin wd_smp_last_reset_tb = get_tb();
1922104180aSNicholas Piggin cpumask_andnot(&wd_smp_cpus_pending,
1932104180aSNicholas Piggin &wd_cpus_enabled,
1942104180aSNicholas Piggin &wd_smp_cpus_stuck);
195858c93c3SNicholas Piggin return true;
1962104180aSNicholas Piggin }
197858c93c3SNicholas Piggin return false;
19887607a30SNicholas Piggin }
1992104180aSNicholas Piggin
watchdog_smp_panic(int cpu)2001f01bf90SNicholas Piggin static void watchdog_smp_panic(int cpu)
2012104180aSNicholas Piggin {
20276521c4bSNicholas Piggin static cpumask_t wd_smp_cpus_ipi; // protected by reporting
2032104180aSNicholas Piggin unsigned long flags;
2043d030e30SNicholas Piggin u64 tb, last_reset;
2052104180aSNicholas Piggin int c;
2062104180aSNicholas Piggin
2072104180aSNicholas Piggin wd_smp_lock(&flags);
2082104180aSNicholas Piggin /* Double check some things under lock */
2091f01bf90SNicholas Piggin tb = get_tb();
2103d030e30SNicholas Piggin last_reset = wd_smp_last_reset_tb;
2113d030e30SNicholas Piggin if ((s64)(tb - last_reset) < (s64)wd_smp_panic_timeout_tb)
2122104180aSNicholas Piggin goto out;
2132104180aSNicholas Piggin if (cpumask_test_cpu(cpu, &wd_smp_cpus_pending))
2142104180aSNicholas Piggin goto out;
21576521c4bSNicholas Piggin if (!wd_try_report())
2162104180aSNicholas Piggin goto out;
21776521c4bSNicholas Piggin for_each_online_cpu(c) {
21876521c4bSNicholas Piggin if (!cpumask_test_cpu(c, &wd_smp_cpus_pending))
21976521c4bSNicholas Piggin continue;
22076521c4bSNicholas Piggin if (c == cpu)
22176521c4bSNicholas Piggin continue; // should not happen
22276521c4bSNicholas Piggin
22376521c4bSNicholas Piggin __cpumask_set_cpu(c, &wd_smp_cpus_ipi);
2241f01bf90SNicholas Piggin if (set_cpu_stuck(c))
22576521c4bSNicholas Piggin break;
22676521c4bSNicholas Piggin }
22776521c4bSNicholas Piggin if (cpumask_empty(&wd_smp_cpus_ipi)) {
22876521c4bSNicholas Piggin wd_end_reporting();
22976521c4bSNicholas Piggin goto out;
23076521c4bSNicholas Piggin }
23176521c4bSNicholas Piggin wd_smp_unlock(&flags);
2322104180aSNicholas Piggin
233d8fa82e0SMichael Ellerman pr_emerg("CPU %d detected hard LOCKUP on other CPUs %*pbl\n",
23476521c4bSNicholas Piggin cpu, cpumask_pr_args(&wd_smp_cpus_ipi));
2354e49226eSNicholas Piggin pr_emerg("CPU %d TB:%lld, last SMP heartbeat TB:%lld (%lldms ago)\n",
2363d030e30SNicholas Piggin cpu, tb, last_reset, tb_to_ns(tb - last_reset) / 1000000);
2372104180aSNicholas Piggin
238d58fdd9dSNicholas Piggin if (!sysctl_hardlockup_all_cpu_backtrace) {
2392104180aSNicholas Piggin /*
240d58fdd9dSNicholas Piggin * Try to trigger the stuck CPUs, unless we are going to
241d58fdd9dSNicholas Piggin * get a backtrace on all of them anyway.
2422104180aSNicholas Piggin */
24376521c4bSNicholas Piggin for_each_cpu(c, &wd_smp_cpus_ipi) {
244858c93c3SNicholas Piggin smp_send_nmi_ipi(c, wd_lockup_ipi, 1000000);
24576521c4bSNicholas Piggin __cpumask_clear_cpu(c, &wd_smp_cpus_ipi);
246858c93c3SNicholas Piggin }
24776521c4bSNicholas Piggin } else {
248*8d539b84SDouglas Anderson trigger_allbutcpu_cpu_backtrace(cpu);
24976521c4bSNicholas Piggin cpumask_clear(&wd_smp_cpus_ipi);
25076521c4bSNicholas Piggin }
2512104180aSNicholas Piggin
2522104180aSNicholas Piggin if (hardlockup_panic)
2532104180aSNicholas Piggin nmi_panic(NULL, "Hard LOCKUP");
2548e236921SNicholas Piggin
25576521c4bSNicholas Piggin wd_end_reporting();
25676521c4bSNicholas Piggin
2578e236921SNicholas Piggin return;
2588e236921SNicholas Piggin
2598e236921SNicholas Piggin out:
2608e236921SNicholas Piggin wd_smp_unlock(&flags);
2612104180aSNicholas Piggin }
2622104180aSNicholas Piggin
wd_smp_clear_cpu_pending(int cpu)2631f01bf90SNicholas Piggin static void wd_smp_clear_cpu_pending(int cpu)
2642104180aSNicholas Piggin {
2652104180aSNicholas Piggin if (!cpumask_test_cpu(cpu, &wd_smp_cpus_pending)) {
2662104180aSNicholas Piggin if (unlikely(cpumask_test_cpu(cpu, &wd_smp_cpus_stuck))) {
2674e49226eSNicholas Piggin struct pt_regs *regs = get_irq_regs();
2682104180aSNicholas Piggin unsigned long flags;
2692104180aSNicholas Piggin
2704e49226eSNicholas Piggin pr_emerg("CPU %d became unstuck TB:%lld\n",
2711f01bf90SNicholas Piggin cpu, get_tb());
2724e49226eSNicholas Piggin print_irqtrace_events(current);
2734e49226eSNicholas Piggin if (regs)
2744e49226eSNicholas Piggin show_regs(regs);
2754e49226eSNicholas Piggin else
2764e49226eSNicholas Piggin dump_stack();
2774e49226eSNicholas Piggin
27876521c4bSNicholas Piggin wd_smp_lock(&flags);
2792104180aSNicholas Piggin cpumask_clear_cpu(cpu, &wd_smp_cpus_stuck);
2802104180aSNicholas Piggin wd_smp_unlock(&flags);
2815dad4ba6SNicholas Piggin } else {
2825dad4ba6SNicholas Piggin /*
2835dad4ba6SNicholas Piggin * The last CPU to clear pending should have reset the
2845dad4ba6SNicholas Piggin * watchdog so we generally should not find it empty
2855dad4ba6SNicholas Piggin * here if our CPU was clear. However it could happen
2865dad4ba6SNicholas Piggin * due to a rare race with another CPU taking the
2875dad4ba6SNicholas Piggin * last CPU out of the mask concurrently.
2885dad4ba6SNicholas Piggin *
2895dad4ba6SNicholas Piggin * We can't add a warning for it. But just in case
2905dad4ba6SNicholas Piggin * there is a problem with the watchdog that is causing
2915dad4ba6SNicholas Piggin * the mask to not be reset, try to kick it along here.
2925dad4ba6SNicholas Piggin */
2935dad4ba6SNicholas Piggin if (unlikely(cpumask_empty(&wd_smp_cpus_pending)))
2945dad4ba6SNicholas Piggin goto none_pending;
2952104180aSNicholas Piggin }
2962104180aSNicholas Piggin return;
2972104180aSNicholas Piggin }
2985dad4ba6SNicholas Piggin
299858c93c3SNicholas Piggin /*
300858c93c3SNicholas Piggin * All other updates to wd_smp_cpus_pending are performed under
301858c93c3SNicholas Piggin * wd_smp_lock. All of them are atomic except the case where the
302858c93c3SNicholas Piggin * mask becomes empty and is reset. This will not happen here because
303858c93c3SNicholas Piggin * cpu was tested to be in the bitmap (above), and a CPU only clears
304858c93c3SNicholas Piggin * its own bit. _Except_ in the case where another CPU has detected a
305858c93c3SNicholas Piggin * hard lockup on our CPU and takes us out of the pending mask. So in
306858c93c3SNicholas Piggin * normal operation there will be no race here, no problem.
307858c93c3SNicholas Piggin *
308858c93c3SNicholas Piggin * In the lockup case, this atomic clear-bit vs a store that refills
309858c93c3SNicholas Piggin * other bits in the accessed word wll not be a problem. The bit clear
310858c93c3SNicholas Piggin * is atomic so it will not cause the store to get lost, and the store
311858c93c3SNicholas Piggin * will never set this bit so it will not overwrite the bit clear. The
312858c93c3SNicholas Piggin * only way for a stuck CPU to return to the pending bitmap is to
313858c93c3SNicholas Piggin * become unstuck itself.
314858c93c3SNicholas Piggin */
3152104180aSNicholas Piggin cpumask_clear_cpu(cpu, &wd_smp_cpus_pending);
3165dad4ba6SNicholas Piggin
3175dad4ba6SNicholas Piggin /*
3185dad4ba6SNicholas Piggin * Order the store to clear pending with the load(s) to check all
3195dad4ba6SNicholas Piggin * words in the pending mask to check they are all empty. This orders
3205dad4ba6SNicholas Piggin * with the same barrier on another CPU. This prevents two CPUs
3215dad4ba6SNicholas Piggin * clearing the last 2 pending bits, but neither seeing the other's
3225dad4ba6SNicholas Piggin * store when checking if the mask is empty, and missing an empty
3235dad4ba6SNicholas Piggin * mask, which ends with a false positive.
3245dad4ba6SNicholas Piggin */
3255dad4ba6SNicholas Piggin smp_mb();
3262104180aSNicholas Piggin if (cpumask_empty(&wd_smp_cpus_pending)) {
3272104180aSNicholas Piggin unsigned long flags;
3282104180aSNicholas Piggin
3295dad4ba6SNicholas Piggin none_pending:
3305dad4ba6SNicholas Piggin /*
3315dad4ba6SNicholas Piggin * Double check under lock because more than one CPU could see
3325dad4ba6SNicholas Piggin * a clear mask with the lockless check after clearing their
3335dad4ba6SNicholas Piggin * pending bits.
3345dad4ba6SNicholas Piggin */
3352104180aSNicholas Piggin wd_smp_lock(&flags);
3362104180aSNicholas Piggin if (cpumask_empty(&wd_smp_cpus_pending)) {
3371f01bf90SNicholas Piggin wd_smp_last_reset_tb = get_tb();
3382104180aSNicholas Piggin cpumask_andnot(&wd_smp_cpus_pending,
3392104180aSNicholas Piggin &wd_cpus_enabled,
3402104180aSNicholas Piggin &wd_smp_cpus_stuck);
3412104180aSNicholas Piggin }
3422104180aSNicholas Piggin wd_smp_unlock(&flags);
3432104180aSNicholas Piggin }
3442104180aSNicholas Piggin }
3452104180aSNicholas Piggin
watchdog_timer_interrupt(int cpu)3462104180aSNicholas Piggin static void watchdog_timer_interrupt(int cpu)
3472104180aSNicholas Piggin {
3482104180aSNicholas Piggin u64 tb = get_tb();
3492104180aSNicholas Piggin
3502104180aSNicholas Piggin per_cpu(wd_timer_tb, cpu) = tb;
3512104180aSNicholas Piggin
3521f01bf90SNicholas Piggin wd_smp_clear_cpu_pending(cpu);
3532104180aSNicholas Piggin
3542104180aSNicholas Piggin if ((s64)(tb - wd_smp_last_reset_tb) >= (s64)wd_smp_panic_timeout_tb)
3551f01bf90SNicholas Piggin watchdog_smp_panic(cpu);
356e012c499SNicholas Piggin
357e012c499SNicholas Piggin if (__wd_nmi_output && xchg(&__wd_nmi_output, 0)) {
358e012c499SNicholas Piggin /*
359e012c499SNicholas Piggin * Something has called printk from NMI context. It might be
3602b461880SMichael Ellerman * stuck, so this triggers a flush that will get that
361e012c499SNicholas Piggin * printk output to the console.
362e012c499SNicholas Piggin *
363e012c499SNicholas Piggin * See wd_lockup_ipi.
364e012c499SNicholas Piggin */
365e012c499SNicholas Piggin printk_trigger_flush();
366e012c499SNicholas Piggin }
3672104180aSNicholas Piggin }
3682104180aSNicholas Piggin
DEFINE_INTERRUPT_HANDLER_NMI(soft_nmi_interrupt)3693a96570fSNicholas Piggin DEFINE_INTERRUPT_HANDLER_NMI(soft_nmi_interrupt)
3702104180aSNicholas Piggin {
3712104180aSNicholas Piggin unsigned long flags;
3722104180aSNicholas Piggin int cpu = raw_smp_processor_id();
3732104180aSNicholas Piggin u64 tb;
3742104180aSNicholas Piggin
375118178e6SNicholas Piggin /* should only arrive from kernel, with irqs disabled */
376118178e6SNicholas Piggin WARN_ON_ONCE(!arch_irq_disabled_regs(regs));
377118178e6SNicholas Piggin
3782104180aSNicholas Piggin if (!cpumask_test_cpu(cpu, &wd_cpus_enabled))
3793a96570fSNicholas Piggin return 0;
3802104180aSNicholas Piggin
38104019bf8SNicholas Piggin __this_cpu_inc(irq_stat.soft_nmi_irqs);
38204019bf8SNicholas Piggin
3832104180aSNicholas Piggin tb = get_tb();
3842104180aSNicholas Piggin if (tb - per_cpu(wd_timer_tb, cpu) >= wd_panic_timeout_tb) {
38576521c4bSNicholas Piggin /*
38676521c4bSNicholas Piggin * Taking wd_smp_lock here means it is a soft-NMI lock, which
38776521c4bSNicholas Piggin * means we can't take any regular or irqsafe spin locks while
38876521c4bSNicholas Piggin * holding this lock. This is why timers can't printk while
38976521c4bSNicholas Piggin * holding the lock.
39076521c4bSNicholas Piggin */
3912104180aSNicholas Piggin wd_smp_lock(&flags);
3922104180aSNicholas Piggin if (cpumask_test_cpu(cpu, &wd_smp_cpus_stuck)) {
3932104180aSNicholas Piggin wd_smp_unlock(&flags);
394118178e6SNicholas Piggin return 0;
3952104180aSNicholas Piggin }
39676521c4bSNicholas Piggin if (!wd_try_report()) {
39776521c4bSNicholas Piggin wd_smp_unlock(&flags);
39876521c4bSNicholas Piggin /* Couldn't report, try again in 100ms */
39976521c4bSNicholas Piggin mtspr(SPRN_DEC, 100 * tb_ticks_per_usec * 1000);
40076521c4bSNicholas Piggin return 0;
40176521c4bSNicholas Piggin }
40276521c4bSNicholas Piggin
4031f01bf90SNicholas Piggin set_cpu_stuck(cpu);
4042104180aSNicholas Piggin
40576521c4bSNicholas Piggin wd_smp_unlock(&flags);
40676521c4bSNicholas Piggin
4074e49226eSNicholas Piggin pr_emerg("CPU %d self-detected hard LOCKUP @ %pS\n",
4084e49226eSNicholas Piggin cpu, (void *)regs->nip);
4094e49226eSNicholas Piggin pr_emerg("CPU %d TB:%lld, last heartbeat TB:%lld (%lldms ago)\n",
4104e49226eSNicholas Piggin cpu, tb, per_cpu(wd_timer_tb, cpu),
4114e49226eSNicholas Piggin tb_to_ns(tb - per_cpu(wd_timer_tb, cpu)) / 1000000);
4122104180aSNicholas Piggin print_modules();
4132104180aSNicholas Piggin print_irqtrace_events(current);
4142104180aSNicholas Piggin show_regs(regs);
4152104180aSNicholas Piggin
416e012c499SNicholas Piggin xchg(&__wd_nmi_output, 1); // see wd_lockup_ipi
417e012c499SNicholas Piggin
4182104180aSNicholas Piggin if (sysctl_hardlockup_all_cpu_backtrace)
419*8d539b84SDouglas Anderson trigger_allbutcpu_cpu_backtrace(cpu);
4202104180aSNicholas Piggin
4212104180aSNicholas Piggin if (hardlockup_panic)
4222104180aSNicholas Piggin nmi_panic(regs, "Hard LOCKUP");
42376521c4bSNicholas Piggin
42476521c4bSNicholas Piggin wd_end_reporting();
4252104180aSNicholas Piggin }
42676521c4bSNicholas Piggin /*
42776521c4bSNicholas Piggin * We are okay to change DEC in soft_nmi_interrupt because the masked
42876521c4bSNicholas Piggin * handler has marked a DEC as pending, so the timer interrupt will be
42976521c4bSNicholas Piggin * replayed as soon as local irqs are enabled again.
43076521c4bSNicholas Piggin */
4312104180aSNicholas Piggin if (wd_panic_timeout_tb < 0x7fffffff)
4322104180aSNicholas Piggin mtspr(SPRN_DEC, wd_panic_timeout_tb);
4332104180aSNicholas Piggin
4343a96570fSNicholas Piggin return 0;
4352104180aSNicholas Piggin }
4362104180aSNicholas Piggin
watchdog_timer_fn(struct hrtimer * hrtimer)4377ae3f6e1SNicholas Piggin static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer)
4382104180aSNicholas Piggin {
4392104180aSNicholas Piggin int cpu = smp_processor_id();
4402104180aSNicholas Piggin
441df95d308SDouglas Anderson if (!(watchdog_enabled & WATCHDOG_HARDLOCKUP_ENABLED))
4427ae3f6e1SNicholas Piggin return HRTIMER_NORESTART;
4437ae3f6e1SNicholas Piggin
4447ae3f6e1SNicholas Piggin if (!cpumask_test_cpu(cpu, &watchdog_cpumask))
4457ae3f6e1SNicholas Piggin return HRTIMER_NORESTART;
4467ae3f6e1SNicholas Piggin
4472104180aSNicholas Piggin watchdog_timer_interrupt(cpu);
4482104180aSNicholas Piggin
4497ae3f6e1SNicholas Piggin hrtimer_forward_now(hrtimer, ms_to_ktime(wd_timer_period_ms));
4507ae3f6e1SNicholas Piggin
4517ae3f6e1SNicholas Piggin return HRTIMER_RESTART;
4522104180aSNicholas Piggin }
4532104180aSNicholas Piggin
arch_touch_nmi_watchdog(void)4542104180aSNicholas Piggin void arch_touch_nmi_watchdog(void)
4552104180aSNicholas Piggin {
45626c5c6e1SNicholas Piggin unsigned long ticks = tb_ticks_per_usec * wd_timer_period_ms * 1000;
4572104180aSNicholas Piggin int cpu = smp_processor_id();
4585dad4ba6SNicholas Piggin u64 tb;
4592104180aSNicholas Piggin
4605dad4ba6SNicholas Piggin if (!cpumask_test_cpu(cpu, &watchdog_cpumask))
4615dad4ba6SNicholas Piggin return;
4625dad4ba6SNicholas Piggin
4635dad4ba6SNicholas Piggin tb = get_tb();
46480e4d70bSNicholas Piggin if (tb - per_cpu(wd_timer_tb, cpu) >= ticks) {
46580e4d70bSNicholas Piggin per_cpu(wd_timer_tb, cpu) = tb;
4661f01bf90SNicholas Piggin wd_smp_clear_cpu_pending(cpu);
46780e4d70bSNicholas Piggin }
4682104180aSNicholas Piggin }
4692104180aSNicholas Piggin EXPORT_SYMBOL(arch_touch_nmi_watchdog);
4702104180aSNicholas Piggin
start_watchdog(void * arg)4717ae3f6e1SNicholas Piggin static void start_watchdog(void *arg)
4722104180aSNicholas Piggin {
4737ae3f6e1SNicholas Piggin struct hrtimer *hrtimer = this_cpu_ptr(&wd_hrtimer);
4747ae3f6e1SNicholas Piggin int cpu = smp_processor_id();
47596ea91e7SNicholas Piggin unsigned long flags;
47696ea91e7SNicholas Piggin
4772104180aSNicholas Piggin if (cpumask_test_cpu(cpu, &wd_cpus_enabled)) {
4782104180aSNicholas Piggin WARN_ON(1);
4797ae3f6e1SNicholas Piggin return;
4802104180aSNicholas Piggin }
4812104180aSNicholas Piggin
482df95d308SDouglas Anderson if (!(watchdog_enabled & WATCHDOG_HARDLOCKUP_ENABLED))
4837ae3f6e1SNicholas Piggin return;
4842104180aSNicholas Piggin
4852104180aSNicholas Piggin if (!cpumask_test_cpu(cpu, &watchdog_cpumask))
4867ae3f6e1SNicholas Piggin return;
4872104180aSNicholas Piggin
48896ea91e7SNicholas Piggin wd_smp_lock(&flags);
4892104180aSNicholas Piggin cpumask_set_cpu(cpu, &wd_cpus_enabled);
4902104180aSNicholas Piggin if (cpumask_weight(&wd_cpus_enabled) == 1) {
4912104180aSNicholas Piggin cpumask_set_cpu(cpu, &wd_smp_cpus_pending);
4922104180aSNicholas Piggin wd_smp_last_reset_tb = get_tb();
4932104180aSNicholas Piggin }
49496ea91e7SNicholas Piggin wd_smp_unlock(&flags);
49596ea91e7SNicholas Piggin
4967ae3f6e1SNicholas Piggin *this_cpu_ptr(&wd_timer_tb) = get_tb();
4972104180aSNicholas Piggin
4987ae3f6e1SNicholas Piggin hrtimer_init(hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
4997ae3f6e1SNicholas Piggin hrtimer->function = watchdog_timer_fn;
5007ae3f6e1SNicholas Piggin hrtimer_start(hrtimer, ms_to_ktime(wd_timer_period_ms),
5017ae3f6e1SNicholas Piggin HRTIMER_MODE_REL_PINNED);
5022104180aSNicholas Piggin }
5032104180aSNicholas Piggin
start_watchdog_on_cpu(unsigned int cpu)5047ae3f6e1SNicholas Piggin static int start_watchdog_on_cpu(unsigned int cpu)
5052104180aSNicholas Piggin {
5067ae3f6e1SNicholas Piggin return smp_call_function_single(cpu, start_watchdog, NULL, true);
5077ae3f6e1SNicholas Piggin }
5087ae3f6e1SNicholas Piggin
stop_watchdog(void * arg)5097ae3f6e1SNicholas Piggin static void stop_watchdog(void *arg)
5107ae3f6e1SNicholas Piggin {
5117ae3f6e1SNicholas Piggin struct hrtimer *hrtimer = this_cpu_ptr(&wd_hrtimer);
5127ae3f6e1SNicholas Piggin int cpu = smp_processor_id();
51396ea91e7SNicholas Piggin unsigned long flags;
51496ea91e7SNicholas Piggin
5152104180aSNicholas Piggin if (!cpumask_test_cpu(cpu, &wd_cpus_enabled))
5167ae3f6e1SNicholas Piggin return; /* Can happen in CPU unplug case */
5172104180aSNicholas Piggin
5187ae3f6e1SNicholas Piggin hrtimer_cancel(hrtimer);
5192104180aSNicholas Piggin
52096ea91e7SNicholas Piggin wd_smp_lock(&flags);
5212104180aSNicholas Piggin cpumask_clear_cpu(cpu, &wd_cpus_enabled);
52296ea91e7SNicholas Piggin wd_smp_unlock(&flags);
52396ea91e7SNicholas Piggin
5241f01bf90SNicholas Piggin wd_smp_clear_cpu_pending(cpu);
5257ae3f6e1SNicholas Piggin }
5262104180aSNicholas Piggin
stop_watchdog_on_cpu(unsigned int cpu)5277ae3f6e1SNicholas Piggin static int stop_watchdog_on_cpu(unsigned int cpu)
5287ae3f6e1SNicholas Piggin {
5297ae3f6e1SNicholas Piggin return smp_call_function_single(cpu, stop_watchdog, NULL, true);
5302104180aSNicholas Piggin }
5312104180aSNicholas Piggin
watchdog_calc_timeouts(void)5322104180aSNicholas Piggin static void watchdog_calc_timeouts(void)
5332104180aSNicholas Piggin {
534f5e74e83SLaurent Dufour u64 threshold = watchdog_thresh;
535f5e74e83SLaurent Dufour
536f5e74e83SLaurent Dufour #ifdef CONFIG_PPC_PSERIES
537f5e74e83SLaurent Dufour threshold += (READ_ONCE(wd_timeout_pct) * threshold) / 100;
538f5e74e83SLaurent Dufour #endif
539f5e74e83SLaurent Dufour
540f5e74e83SLaurent Dufour wd_panic_timeout_tb = threshold * ppc_tb_freq;
5412104180aSNicholas Piggin
5422104180aSNicholas Piggin /* Have the SMP detector trigger a bit later */
5432104180aSNicholas Piggin wd_smp_panic_timeout_tb = wd_panic_timeout_tb * 3 / 2;
5442104180aSNicholas Piggin
5452104180aSNicholas Piggin /* 2/5 is the factor that the perf based detector uses */
5462104180aSNicholas Piggin wd_timer_period_ms = watchdog_thresh * 1000 * 2 / 5;
5472104180aSNicholas Piggin }
5482104180aSNicholas Piggin
watchdog_hardlockup_stop(void)549df95d308SDouglas Anderson void watchdog_hardlockup_stop(void)
5502104180aSNicholas Piggin {
5512104180aSNicholas Piggin int cpu;
5522104180aSNicholas Piggin
5532104180aSNicholas Piggin for_each_cpu(cpu, &wd_cpus_enabled)
5547ae3f6e1SNicholas Piggin stop_watchdog_on_cpu(cpu);
5556b9dc480SThomas Gleixner }
5566b9dc480SThomas Gleixner
watchdog_hardlockup_start(void)557df95d308SDouglas Anderson void watchdog_hardlockup_start(void)
5586b9dc480SThomas Gleixner {
5596b9dc480SThomas Gleixner int cpu;
5606b9dc480SThomas Gleixner
5616592ad2fSThomas Gleixner watchdog_calc_timeouts();
5622104180aSNicholas Piggin for_each_cpu_and(cpu, cpu_online_mask, &watchdog_cpumask)
5637ae3f6e1SNicholas Piggin start_watchdog_on_cpu(cpu);
5646592ad2fSThomas Gleixner }
5652104180aSNicholas Piggin
5662104180aSNicholas Piggin /*
56734ddaa3eSThomas Gleixner * Invoked from core watchdog init.
5682104180aSNicholas Piggin */
watchdog_hardlockup_probe(void)569df95d308SDouglas Anderson int __init watchdog_hardlockup_probe(void)
5702104180aSNicholas Piggin {
5712104180aSNicholas Piggin int err;
5722104180aSNicholas Piggin
57334ddaa3eSThomas Gleixner err = cpuhp_setup_state_nocalls(CPUHP_AP_ONLINE_DYN,
57434ddaa3eSThomas Gleixner "powerpc/watchdog:online",
5757ae3f6e1SNicholas Piggin start_watchdog_on_cpu,
5767ae3f6e1SNicholas Piggin stop_watchdog_on_cpu);
57734ddaa3eSThomas Gleixner if (err < 0) {
578d8fa82e0SMichael Ellerman pr_warn("could not be initialized");
57934ddaa3eSThomas Gleixner return err;
58034ddaa3eSThomas Gleixner }
5812104180aSNicholas Piggin return 0;
5822104180aSNicholas Piggin }
583f5e74e83SLaurent Dufour
584f5e74e83SLaurent Dufour #ifdef CONFIG_PPC_PSERIES
watchdog_hardlockup_set_timeout_pct(u64 pct)585df95d308SDouglas Anderson void watchdog_hardlockup_set_timeout_pct(u64 pct)
586f5e74e83SLaurent Dufour {
587f5e74e83SLaurent Dufour pr_info("Set the NMI watchdog timeout factor to %llu%%\n", pct);
588f5e74e83SLaurent Dufour WRITE_ONCE(wd_timeout_pct, pct);
589f5e74e83SLaurent Dufour lockup_detector_reconfigure();
590f5e74e83SLaurent Dufour }
591f5e74e83SLaurent Dufour #endif
592