1 // SPDX-License-Identifier: GPL-2.0-only 2 /* Pseudo NMI support on sparc64 systems. 3 * 4 * Copyright (C) 2009 David S. Miller <davem@davemloft.net> 5 * 6 * The NMI watchdog support and infrastructure is based almost 7 * entirely upon the x86 NMI support code. 8 */ 9 #include <linux/kernel.h> 10 #include <linux/param.h> 11 #include <linux/init.h> 12 #include <linux/percpu.h> 13 #include <linux/nmi.h> 14 #include <linux/export.h> 15 #include <linux/kprobes.h> 16 #include <linux/kernel_stat.h> 17 #include <linux/reboot.h> 18 #include <linux/slab.h> 19 #include <linux/kdebug.h> 20 #include <linux/delay.h> 21 #include <linux/smp.h> 22 23 #include <asm/perf_event.h> 24 #include <asm/ptrace.h> 25 #include <asm/pcr.h> 26 27 #include "kstack.h" 28 29 /* We don't have a real NMI on sparc64, but we can fake one 30 * up using profiling counter overflow interrupts and interrupt 31 * levels. 32 * 33 * The profile overflow interrupts at level 15, so we use 34 * level 14 as our IRQ off level. 35 */ 36 37 static int panic_on_timeout; 38 39 /* nmi_active: 40 * >0: the NMI watchdog is active, but can be disabled 41 * <0: the NMI watchdog has not been set up, and cannot be enabled 42 * 0: the NMI watchdog is disabled, but can be enabled 43 */ 44 atomic_t nmi_active = ATOMIC_INIT(0); /* oprofile uses this */ 45 EXPORT_SYMBOL(nmi_active); 46 static int nmi_init_done; 47 static unsigned int nmi_hz = HZ; 48 static DEFINE_PER_CPU(short, wd_enabled); 49 static int endflag __initdata; 50 51 static DEFINE_PER_CPU(unsigned int, last_irq_sum); 52 static DEFINE_PER_CPU(long, alert_counter); 53 static DEFINE_PER_CPU(int, nmi_touch); 54 55 void arch_touch_nmi_watchdog(void) 56 { 57 if (atomic_read(&nmi_active)) { 58 int cpu; 59 60 for_each_present_cpu(cpu) { 61 if (per_cpu(nmi_touch, cpu) != 1) 62 per_cpu(nmi_touch, cpu) = 1; 63 } 64 } 65 } 66 EXPORT_SYMBOL(arch_touch_nmi_watchdog); 67 68 int __init watchdog_hardlockup_probe(void) 69 { 70 return 0; 71 } 72 73 static void die_nmi(const char *str, struct pt_regs *regs, int do_panic) 74 { 75 int this_cpu = smp_processor_id(); 76 77 if (notify_die(DIE_NMIWATCHDOG, str, regs, 0, 78 pt_regs_trap_type(regs), SIGINT) == NOTIFY_STOP) 79 return; 80 81 if (do_panic || panic_on_oops) 82 panic("Watchdog detected hard LOCKUP on cpu %d", this_cpu); 83 else 84 WARN(1, "Watchdog detected hard LOCKUP on cpu %d", this_cpu); 85 } 86 87 notrace __kprobes void perfctr_irq(int irq, struct pt_regs *regs) 88 { 89 unsigned int sum, touched = 0; 90 void *orig_sp; 91 92 clear_softint(1 << irq); 93 94 local_cpu_data().__nmi_count++; 95 96 nmi_enter(); 97 98 orig_sp = set_hardirq_stack(); 99 100 if (notify_die(DIE_NMI, "nmi", regs, 0, 101 pt_regs_trap_type(regs), SIGINT) == NOTIFY_STOP) 102 touched = 1; 103 else 104 pcr_ops->write_pcr(0, pcr_ops->pcr_nmi_disable); 105 106 sum = local_cpu_data().irq0_irqs; 107 if (__this_cpu_read(nmi_touch)) { 108 __this_cpu_write(nmi_touch, 0); 109 touched = 1; 110 } 111 if (!touched && __this_cpu_read(last_irq_sum) == sum) { 112 __this_cpu_inc(alert_counter); 113 if (__this_cpu_read(alert_counter) == 30 * nmi_hz) 114 die_nmi("BUG: NMI Watchdog detected LOCKUP", 115 regs, panic_on_timeout); 116 } else { 117 __this_cpu_write(last_irq_sum, sum); 118 __this_cpu_write(alert_counter, 0); 119 } 120 if (__this_cpu_read(wd_enabled)) { 121 pcr_ops->write_pic(0, pcr_ops->nmi_picl_value(nmi_hz)); 122 pcr_ops->write_pcr(0, pcr_ops->pcr_nmi_enable); 123 } 124 125 restore_hardirq_stack(orig_sp); 126 127 nmi_exit(); 128 } 129 130 static inline unsigned int get_nmi_count(int cpu) 131 { 132 return cpu_data(cpu).__nmi_count; 133 } 134 135 static __init void nmi_cpu_busy(void *data) 136 { 137 while (endflag == 0) 138 mb(); 139 } 140 141 static void report_broken_nmi(int cpu, int *prev_nmi_count) 142 { 143 printk(KERN_CONT "\n"); 144 145 printk(KERN_WARNING 146 "WARNING: CPU#%d: NMI appears to be stuck (%d->%d)!\n", 147 cpu, prev_nmi_count[cpu], get_nmi_count(cpu)); 148 149 printk(KERN_WARNING 150 "Please report this to bugzilla.kernel.org,\n"); 151 printk(KERN_WARNING 152 "and attach the output of the 'dmesg' command.\n"); 153 154 per_cpu(wd_enabled, cpu) = 0; 155 atomic_dec(&nmi_active); 156 } 157 158 void stop_nmi_watchdog(void *unused) 159 { 160 if (!__this_cpu_read(wd_enabled)) 161 return; 162 pcr_ops->write_pcr(0, pcr_ops->pcr_nmi_disable); 163 __this_cpu_write(wd_enabled, 0); 164 atomic_dec(&nmi_active); 165 } 166 167 static int __init check_nmi_watchdog(void) 168 { 169 unsigned int *prev_nmi_count; 170 int cpu, err; 171 172 if (!atomic_read(&nmi_active)) 173 return 0; 174 175 prev_nmi_count = kmalloc_array(nr_cpu_ids, sizeof(unsigned int), 176 GFP_KERNEL); 177 if (!prev_nmi_count) { 178 err = -ENOMEM; 179 goto error; 180 } 181 182 printk(KERN_INFO "Testing NMI watchdog ... "); 183 184 smp_call_function(nmi_cpu_busy, (void *)&endflag, 0); 185 186 for_each_possible_cpu(cpu) 187 prev_nmi_count[cpu] = get_nmi_count(cpu); 188 local_irq_enable(); 189 mdelay((20 * 1000) / nmi_hz); /* wait 20 ticks */ 190 191 for_each_online_cpu(cpu) { 192 if (!per_cpu(wd_enabled, cpu)) 193 continue; 194 if (get_nmi_count(cpu) - prev_nmi_count[cpu] <= 5) 195 report_broken_nmi(cpu, prev_nmi_count); 196 } 197 endflag = 1; 198 if (!atomic_read(&nmi_active)) { 199 kfree(prev_nmi_count); 200 atomic_set(&nmi_active, -1); 201 err = -ENODEV; 202 goto error; 203 } 204 printk("OK.\n"); 205 206 nmi_hz = 1; 207 208 kfree(prev_nmi_count); 209 return 0; 210 error: 211 on_each_cpu(stop_nmi_watchdog, NULL, 1); 212 return err; 213 } 214 215 void start_nmi_watchdog(void *unused) 216 { 217 if (__this_cpu_read(wd_enabled)) 218 return; 219 220 __this_cpu_write(wd_enabled, 1); 221 atomic_inc(&nmi_active); 222 223 pcr_ops->write_pcr(0, pcr_ops->pcr_nmi_disable); 224 pcr_ops->write_pic(0, pcr_ops->nmi_picl_value(nmi_hz)); 225 226 pcr_ops->write_pcr(0, pcr_ops->pcr_nmi_enable); 227 } 228 229 static void nmi_adjust_hz_one(void *unused) 230 { 231 if (!__this_cpu_read(wd_enabled)) 232 return; 233 234 pcr_ops->write_pcr(0, pcr_ops->pcr_nmi_disable); 235 pcr_ops->write_pic(0, pcr_ops->nmi_picl_value(nmi_hz)); 236 237 pcr_ops->write_pcr(0, pcr_ops->pcr_nmi_enable); 238 } 239 240 void nmi_adjust_hz(unsigned int new_hz) 241 { 242 nmi_hz = new_hz; 243 on_each_cpu(nmi_adjust_hz_one, NULL, 1); 244 } 245 EXPORT_SYMBOL_GPL(nmi_adjust_hz); 246 247 static int nmi_shutdown(struct notifier_block *nb, unsigned long cmd, void *p) 248 { 249 on_each_cpu(stop_nmi_watchdog, NULL, 1); 250 return 0; 251 } 252 253 static struct notifier_block nmi_reboot_notifier = { 254 .notifier_call = nmi_shutdown, 255 }; 256 257 int __init nmi_init(void) 258 { 259 int err; 260 261 on_each_cpu(start_nmi_watchdog, NULL, 1); 262 263 err = check_nmi_watchdog(); 264 if (!err) { 265 err = register_reboot_notifier(&nmi_reboot_notifier); 266 if (err) { 267 on_each_cpu(stop_nmi_watchdog, NULL, 1); 268 atomic_set(&nmi_active, -1); 269 } 270 } 271 272 nmi_init_done = 1; 273 274 return err; 275 } 276 277 static int __init setup_nmi_watchdog(char *str) 278 { 279 if (!strncmp(str, "panic", 5)) 280 panic_on_timeout = 1; 281 282 return 0; 283 } 284 __setup("nmi_watchdog=", setup_nmi_watchdog); 285 286 /* 287 * sparc specific NMI watchdog enable function. 288 * Enables watchdog if it is not enabled already. 289 */ 290 void watchdog_hardlockup_enable(unsigned int cpu) 291 { 292 if (atomic_read(&nmi_active) == -1) { 293 pr_warn("NMI watchdog cannot be enabled or disabled\n"); 294 return; 295 } 296 297 /* 298 * watchdog thread could start even before nmi_init is called. 299 * Just Return in that case. Let nmi_init finish the init 300 * process first. 301 */ 302 if (!nmi_init_done) 303 return; 304 305 smp_call_function_single(cpu, start_nmi_watchdog, NULL, 1); 306 } 307 /* 308 * sparc specific NMI watchdog disable function. 309 * Disables watchdog if it is not disabled already. 310 */ 311 void watchdog_hardlockup_disable(unsigned int cpu) 312 { 313 if (atomic_read(&nmi_active) == -1) 314 pr_warn_once("NMI watchdog cannot be enabled or disabled\n"); 315 else 316 smp_call_function_single(cpu, stop_nmi_watchdog, NULL, 1); 317 } 318