1 /* 2 * Watchdog support on powerpc systems. 3 * 4 * Copyright 2017, IBM Corporation. 5 * 6 * This uses code from arch/sparc/kernel/nmi.c and kernel/watchdog.c 7 */ 8 #include <linux/kernel.h> 9 #include <linux/param.h> 10 #include <linux/init.h> 11 #include <linux/percpu.h> 12 #include <linux/cpu.h> 13 #include <linux/nmi.h> 14 #include <linux/module.h> 15 #include <linux/export.h> 16 #include <linux/kprobes.h> 17 #include <linux/hardirq.h> 18 #include <linux/reboot.h> 19 #include <linux/slab.h> 20 #include <linux/kdebug.h> 21 #include <linux/sched/debug.h> 22 #include <linux/delay.h> 23 #include <linux/smp.h> 24 25 #include <asm/paca.h> 26 27 /* 28 * The watchdog has a simple timer that runs on each CPU, once per timer 29 * period. This is the heartbeat. 30 * 31 * Then there are checks to see if the heartbeat has not triggered on a CPU 32 * for the panic timeout period. Currently the watchdog only supports an 33 * SMP check, so the heartbeat only turns on when we have 2 or more CPUs. 34 * 35 * This is not an NMI watchdog, but Linux uses that name for a generic 36 * watchdog in some cases, so NMI gets used in some places. 37 */ 38 39 static cpumask_t wd_cpus_enabled __read_mostly; 40 41 static u64 wd_panic_timeout_tb __read_mostly; /* timebase ticks until panic */ 42 static u64 wd_smp_panic_timeout_tb __read_mostly; /* panic other CPUs */ 43 44 static u64 wd_timer_period_ms __read_mostly; /* interval between heartbeat */ 45 46 static DEFINE_PER_CPU(struct timer_list, wd_timer); 47 static DEFINE_PER_CPU(u64, wd_timer_tb); 48 49 /* 50 * These are for the SMP checker. CPUs clear their pending bit in their 51 * heartbeat. If the bitmask becomes empty, the time is noted and the 52 * bitmask is refilled. 53 * 54 * All CPUs clear their bit in the pending mask every timer period. 55 * Once all have cleared, the time is noted and the bits are reset. 56 * If the time since all clear was greater than the panic timeout, 57 * we can panic with the list of stuck CPUs. 58 * 59 * This will work best with NMI IPIs for crash code so the stuck CPUs 60 * can be pulled out to get their backtraces. 61 */ 62 static unsigned long __wd_smp_lock; 63 static cpumask_t wd_smp_cpus_pending; 64 static cpumask_t wd_smp_cpus_stuck; 65 static u64 wd_smp_last_reset_tb; 66 67 static inline void wd_smp_lock(unsigned long *flags) 68 { 69 /* 70 * Avoid locking layers if possible. 71 * This may be called from low level interrupt handlers at some 72 * point in future. 73 */ 74 local_irq_save(*flags); 75 while (unlikely(test_and_set_bit_lock(0, &__wd_smp_lock))) 76 cpu_relax(); 77 } 78 79 static inline void wd_smp_unlock(unsigned long *flags) 80 { 81 clear_bit_unlock(0, &__wd_smp_lock); 82 local_irq_restore(*flags); 83 } 84 85 static void wd_lockup_ipi(struct pt_regs *regs) 86 { 87 pr_emerg("Watchdog CPU:%d Hard LOCKUP\n", raw_smp_processor_id()); 88 print_modules(); 89 print_irqtrace_events(current); 90 if (regs) 91 show_regs(regs); 92 else 93 dump_stack(); 94 95 if (hardlockup_panic) 96 nmi_panic(regs, "Hard LOCKUP"); 97 } 98 99 static void set_cpu_stuck(int cpu, u64 tb) 100 { 101 cpumask_set_cpu(cpu, &wd_smp_cpus_stuck); 102 cpumask_clear_cpu(cpu, &wd_smp_cpus_pending); 103 if (cpumask_empty(&wd_smp_cpus_pending)) { 104 wd_smp_last_reset_tb = tb; 105 cpumask_andnot(&wd_smp_cpus_pending, 106 &wd_cpus_enabled, 107 &wd_smp_cpus_stuck); 108 } 109 } 110 111 static void watchdog_smp_panic(int cpu, u64 tb) 112 { 113 unsigned long flags; 114 int c; 115 116 wd_smp_lock(&flags); 117 /* Double check some things under lock */ 118 if ((s64)(tb - wd_smp_last_reset_tb) < (s64)wd_smp_panic_timeout_tb) 119 goto out; 120 if (cpumask_test_cpu(cpu, &wd_smp_cpus_pending)) 121 goto out; 122 if (cpumask_weight(&wd_smp_cpus_pending) == 0) 123 goto out; 124 125 pr_emerg("Watchdog CPU:%d detected Hard LOCKUP other CPUS:%*pbl\n", 126 cpu, cpumask_pr_args(&wd_smp_cpus_pending)); 127 128 /* 129 * Try to trigger the stuck CPUs. 130 */ 131 for_each_cpu(c, &wd_smp_cpus_pending) { 132 if (c == cpu) 133 continue; 134 smp_send_nmi_ipi(c, wd_lockup_ipi, 1000000); 135 } 136 smp_flush_nmi_ipi(1000000); 137 138 /* Take the stuck CPU out of the watch group */ 139 for_each_cpu(c, &wd_smp_cpus_pending) 140 set_cpu_stuck(c, tb); 141 142 out: 143 wd_smp_unlock(&flags); 144 145 printk_safe_flush(); 146 /* 147 * printk_safe_flush() seems to require another print 148 * before anything actually goes out to console. 149 */ 150 if (sysctl_hardlockup_all_cpu_backtrace) 151 trigger_allbutself_cpu_backtrace(); 152 153 if (hardlockup_panic) 154 nmi_panic(NULL, "Hard LOCKUP"); 155 } 156 157 static void wd_smp_clear_cpu_pending(int cpu, u64 tb) 158 { 159 if (!cpumask_test_cpu(cpu, &wd_smp_cpus_pending)) { 160 if (unlikely(cpumask_test_cpu(cpu, &wd_smp_cpus_stuck))) { 161 unsigned long flags; 162 163 pr_emerg("Watchdog CPU:%d became unstuck\n", cpu); 164 wd_smp_lock(&flags); 165 cpumask_clear_cpu(cpu, &wd_smp_cpus_stuck); 166 wd_smp_unlock(&flags); 167 } 168 return; 169 } 170 cpumask_clear_cpu(cpu, &wd_smp_cpus_pending); 171 if (cpumask_empty(&wd_smp_cpus_pending)) { 172 unsigned long flags; 173 174 wd_smp_lock(&flags); 175 if (cpumask_empty(&wd_smp_cpus_pending)) { 176 wd_smp_last_reset_tb = tb; 177 cpumask_andnot(&wd_smp_cpus_pending, 178 &wd_cpus_enabled, 179 &wd_smp_cpus_stuck); 180 } 181 wd_smp_unlock(&flags); 182 } 183 } 184 185 static void watchdog_timer_interrupt(int cpu) 186 { 187 u64 tb = get_tb(); 188 189 per_cpu(wd_timer_tb, cpu) = tb; 190 191 wd_smp_clear_cpu_pending(cpu, tb); 192 193 if ((s64)(tb - wd_smp_last_reset_tb) >= (s64)wd_smp_panic_timeout_tb) 194 watchdog_smp_panic(cpu, tb); 195 } 196 197 void soft_nmi_interrupt(struct pt_regs *regs) 198 { 199 unsigned long flags; 200 int cpu = raw_smp_processor_id(); 201 u64 tb; 202 203 if (!cpumask_test_cpu(cpu, &wd_cpus_enabled)) 204 return; 205 206 nmi_enter(); 207 tb = get_tb(); 208 if (tb - per_cpu(wd_timer_tb, cpu) >= wd_panic_timeout_tb) { 209 per_cpu(wd_timer_tb, cpu) = tb; 210 211 wd_smp_lock(&flags); 212 if (cpumask_test_cpu(cpu, &wd_smp_cpus_stuck)) { 213 wd_smp_unlock(&flags); 214 goto out; 215 } 216 set_cpu_stuck(cpu, tb); 217 218 pr_emerg("Watchdog CPU:%d Hard LOCKUP\n", cpu); 219 print_modules(); 220 print_irqtrace_events(current); 221 if (regs) 222 show_regs(regs); 223 else 224 dump_stack(); 225 226 wd_smp_unlock(&flags); 227 228 if (sysctl_hardlockup_all_cpu_backtrace) 229 trigger_allbutself_cpu_backtrace(); 230 231 if (hardlockup_panic) 232 nmi_panic(regs, "Hard LOCKUP"); 233 } 234 if (wd_panic_timeout_tb < 0x7fffffff) 235 mtspr(SPRN_DEC, wd_panic_timeout_tb); 236 237 out: 238 nmi_exit(); 239 } 240 241 static void wd_timer_reset(unsigned int cpu, struct timer_list *t) 242 { 243 t->expires = jiffies + msecs_to_jiffies(wd_timer_period_ms); 244 if (wd_timer_period_ms > 1000) 245 t->expires = __round_jiffies_up(t->expires, cpu); 246 add_timer_on(t, cpu); 247 } 248 249 static void wd_timer_fn(unsigned long data) 250 { 251 struct timer_list *t = this_cpu_ptr(&wd_timer); 252 int cpu = smp_processor_id(); 253 254 watchdog_timer_interrupt(cpu); 255 256 wd_timer_reset(cpu, t); 257 } 258 259 void arch_touch_nmi_watchdog(void) 260 { 261 int cpu = smp_processor_id(); 262 263 watchdog_timer_interrupt(cpu); 264 } 265 EXPORT_SYMBOL(arch_touch_nmi_watchdog); 266 267 static void start_watchdog_timer_on(unsigned int cpu) 268 { 269 struct timer_list *t = per_cpu_ptr(&wd_timer, cpu); 270 271 per_cpu(wd_timer_tb, cpu) = get_tb(); 272 273 setup_pinned_timer(t, wd_timer_fn, 0); 274 wd_timer_reset(cpu, t); 275 } 276 277 static void stop_watchdog_timer_on(unsigned int cpu) 278 { 279 struct timer_list *t = per_cpu_ptr(&wd_timer, cpu); 280 281 del_timer_sync(t); 282 } 283 284 static int start_wd_on_cpu(unsigned int cpu) 285 { 286 if (cpumask_test_cpu(cpu, &wd_cpus_enabled)) { 287 WARN_ON(1); 288 return 0; 289 } 290 291 if (!(watchdog_enabled & NMI_WATCHDOG_ENABLED)) 292 return 0; 293 294 if (watchdog_suspended) 295 return 0; 296 297 if (!cpumask_test_cpu(cpu, &watchdog_cpumask)) 298 return 0; 299 300 cpumask_set_cpu(cpu, &wd_cpus_enabled); 301 if (cpumask_weight(&wd_cpus_enabled) == 1) { 302 cpumask_set_cpu(cpu, &wd_smp_cpus_pending); 303 wd_smp_last_reset_tb = get_tb(); 304 } 305 smp_wmb(); 306 start_watchdog_timer_on(cpu); 307 308 return 0; 309 } 310 311 static int stop_wd_on_cpu(unsigned int cpu) 312 { 313 if (!cpumask_test_cpu(cpu, &wd_cpus_enabled)) 314 return 0; /* Can happen in CPU unplug case */ 315 316 stop_watchdog_timer_on(cpu); 317 318 cpumask_clear_cpu(cpu, &wd_cpus_enabled); 319 wd_smp_clear_cpu_pending(cpu, get_tb()); 320 321 return 0; 322 } 323 324 static void watchdog_calc_timeouts(void) 325 { 326 wd_panic_timeout_tb = watchdog_thresh * ppc_tb_freq; 327 328 /* Have the SMP detector trigger a bit later */ 329 wd_smp_panic_timeout_tb = wd_panic_timeout_tb * 3 / 2; 330 331 /* 2/5 is the factor that the perf based detector uses */ 332 wd_timer_period_ms = watchdog_thresh * 1000 * 2 / 5; 333 } 334 335 void watchdog_nmi_reconfigure(void) 336 { 337 int cpu; 338 339 watchdog_calc_timeouts(); 340 341 for_each_cpu(cpu, &wd_cpus_enabled) 342 stop_wd_on_cpu(cpu); 343 344 for_each_cpu_and(cpu, cpu_online_mask, &watchdog_cpumask) 345 start_wd_on_cpu(cpu); 346 } 347 348 /* 349 * This runs after lockup_detector_init() which sets up watchdog_cpumask. 350 */ 351 static int __init powerpc_watchdog_init(void) 352 { 353 int err; 354 355 watchdog_calc_timeouts(); 356 357 err = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "powerpc/watchdog:online", 358 start_wd_on_cpu, stop_wd_on_cpu); 359 if (err < 0) 360 pr_warn("Watchdog could not be initialized"); 361 362 return 0; 363 } 364 arch_initcall(powerpc_watchdog_init); 365 366 static void handle_backtrace_ipi(struct pt_regs *regs) 367 { 368 nmi_cpu_backtrace(regs); 369 } 370 371 static void raise_backtrace_ipi(cpumask_t *mask) 372 { 373 unsigned int cpu; 374 375 for_each_cpu(cpu, mask) { 376 if (cpu == smp_processor_id()) 377 handle_backtrace_ipi(NULL); 378 else 379 smp_send_nmi_ipi(cpu, handle_backtrace_ipi, 1000000); 380 } 381 } 382 383 void arch_trigger_cpumask_backtrace(const cpumask_t *mask, bool exclude_self) 384 { 385 nmi_trigger_cpumask_backtrace(mask, exclude_self, raise_backtrace_ipi); 386 } 387