1 // SPDX-License-Identifier: GPL-2.0-only 2 /* Pseudo NMI support on sparc64 systems. 3 * 4 * Copyright (C) 2009 David S. Miller <davem@davemloft.net> 5 * 6 * The NMI watchdog support and infrastructure is based almost 7 * entirely upon the x86 NMI support code. 8 */ 9 #include <linux/kernel.h> 10 #include <linux/param.h> 11 #include <linux/init.h> 12 #include <linux/percpu.h> 13 #include <linux/nmi.h> 14 #include <linux/export.h> 15 #include <linux/kprobes.h> 16 #include <linux/kernel_stat.h> 17 #include <linux/reboot.h> 18 #include <linux/slab.h> 19 #include <linux/kdebug.h> 20 #include <linux/delay.h> 21 #include <linux/smp.h> 22 23 #include <asm/perf_event.h> 24 #include <asm/ptrace.h> 25 #include <asm/pcr.h> 26 27 #include "kstack.h" 28 29 /* We don't have a real NMI on sparc64, but we can fake one 30 * up using profiling counter overflow interrupts and interrupt 31 * levels. 32 * 33 * The profile overflow interrupts at level 15, so we use 34 * level 14 as our IRQ off level. 35 */ 36 37 static int panic_on_timeout; 38 39 /* nmi_active: 40 * >0: the NMI watchdog is active, but can be disabled 41 * <0: the NMI watchdog has not been set up, and cannot be enabled 42 * 0: the NMI watchdog is disabled, but can be enabled 43 */ 44 atomic_t nmi_active = ATOMIC_INIT(0); /* oprofile uses this */ 45 EXPORT_SYMBOL(nmi_active); 46 static int nmi_init_done; 47 static unsigned int nmi_hz = HZ; 48 static DEFINE_PER_CPU(short, wd_enabled); 49 static int endflag __initdata; 50 51 static DEFINE_PER_CPU(unsigned int, last_irq_sum); 52 static DEFINE_PER_CPU(long, alert_counter); 53 static DEFINE_PER_CPU(int, nmi_touch); 54 55 void arch_touch_nmi_watchdog(void) 56 { 57 if (atomic_read(&nmi_active)) { 58 int cpu; 59 60 for_each_present_cpu(cpu) { 61 if (per_cpu(nmi_touch, cpu) != 1) 62 per_cpu(nmi_touch, cpu) = 1; 63 } 64 } 65 } 66 EXPORT_SYMBOL(arch_touch_nmi_watchdog); 67 68 static void die_nmi(const char *str, struct pt_regs *regs, int do_panic) 69 { 70 int this_cpu = smp_processor_id(); 71 72 if (notify_die(DIE_NMIWATCHDOG, str, regs, 0, 73 pt_regs_trap_type(regs), SIGINT) == NOTIFY_STOP) 74 return; 75 76 if (do_panic || panic_on_oops) 77 panic("Watchdog detected hard LOCKUP on cpu %d", this_cpu); 78 else 79 WARN(1, "Watchdog detected hard LOCKUP on cpu %d", this_cpu); 80 } 81 82 notrace __kprobes void perfctr_irq(int irq, struct pt_regs *regs) 83 { 84 unsigned int sum, touched = 0; 85 void *orig_sp; 86 87 clear_softint(1 << irq); 88 89 local_cpu_data().__nmi_count++; 90 91 nmi_enter(); 92 93 orig_sp = set_hardirq_stack(); 94 95 if (notify_die(DIE_NMI, "nmi", regs, 0, 96 pt_regs_trap_type(regs), SIGINT) == NOTIFY_STOP) 97 touched = 1; 98 else 99 pcr_ops->write_pcr(0, pcr_ops->pcr_nmi_disable); 100 101 sum = local_cpu_data().irq0_irqs; 102 if (__this_cpu_read(nmi_touch)) { 103 __this_cpu_write(nmi_touch, 0); 104 touched = 1; 105 } 106 if (!touched && __this_cpu_read(last_irq_sum) == sum) { 107 __this_cpu_inc(alert_counter); 108 if (__this_cpu_read(alert_counter) == 30 * nmi_hz) 109 die_nmi("BUG: NMI Watchdog detected LOCKUP", 110 regs, panic_on_timeout); 111 } else { 112 __this_cpu_write(last_irq_sum, sum); 113 __this_cpu_write(alert_counter, 0); 114 } 115 if (__this_cpu_read(wd_enabled)) { 116 pcr_ops->write_pic(0, pcr_ops->nmi_picl_value(nmi_hz)); 117 pcr_ops->write_pcr(0, pcr_ops->pcr_nmi_enable); 118 } 119 120 restore_hardirq_stack(orig_sp); 121 122 nmi_exit(); 123 } 124 125 static inline unsigned int get_nmi_count(int cpu) 126 { 127 return cpu_data(cpu).__nmi_count; 128 } 129 130 static __init void nmi_cpu_busy(void *data) 131 { 132 while (endflag == 0) 133 mb(); 134 } 135 136 static void report_broken_nmi(int cpu, int *prev_nmi_count) 137 { 138 printk(KERN_CONT "\n"); 139 140 printk(KERN_WARNING 141 "WARNING: CPU#%d: NMI appears to be stuck (%d->%d)!\n", 142 cpu, prev_nmi_count[cpu], get_nmi_count(cpu)); 143 144 printk(KERN_WARNING 145 "Please report this to bugzilla.kernel.org,\n"); 146 printk(KERN_WARNING 147 "and attach the output of the 'dmesg' command.\n"); 148 149 per_cpu(wd_enabled, cpu) = 0; 150 atomic_dec(&nmi_active); 151 } 152 153 void stop_nmi_watchdog(void *unused) 154 { 155 if (!__this_cpu_read(wd_enabled)) 156 return; 157 pcr_ops->write_pcr(0, pcr_ops->pcr_nmi_disable); 158 __this_cpu_write(wd_enabled, 0); 159 atomic_dec(&nmi_active); 160 } 161 162 static int __init check_nmi_watchdog(void) 163 { 164 unsigned int *prev_nmi_count; 165 int cpu, err; 166 167 if (!atomic_read(&nmi_active)) 168 return 0; 169 170 prev_nmi_count = kmalloc_array(nr_cpu_ids, sizeof(unsigned int), 171 GFP_KERNEL); 172 if (!prev_nmi_count) { 173 err = -ENOMEM; 174 goto error; 175 } 176 177 printk(KERN_INFO "Testing NMI watchdog ... "); 178 179 smp_call_function(nmi_cpu_busy, (void *)&endflag, 0); 180 181 for_each_possible_cpu(cpu) 182 prev_nmi_count[cpu] = get_nmi_count(cpu); 183 local_irq_enable(); 184 mdelay((20 * 1000) / nmi_hz); /* wait 20 ticks */ 185 186 for_each_online_cpu(cpu) { 187 if (!per_cpu(wd_enabled, cpu)) 188 continue; 189 if (get_nmi_count(cpu) - prev_nmi_count[cpu] <= 5) 190 report_broken_nmi(cpu, prev_nmi_count); 191 } 192 endflag = 1; 193 if (!atomic_read(&nmi_active)) { 194 kfree(prev_nmi_count); 195 atomic_set(&nmi_active, -1); 196 err = -ENODEV; 197 goto error; 198 } 199 printk("OK.\n"); 200 201 nmi_hz = 1; 202 203 kfree(prev_nmi_count); 204 return 0; 205 error: 206 on_each_cpu(stop_nmi_watchdog, NULL, 1); 207 return err; 208 } 209 210 void start_nmi_watchdog(void *unused) 211 { 212 if (__this_cpu_read(wd_enabled)) 213 return; 214 215 __this_cpu_write(wd_enabled, 1); 216 atomic_inc(&nmi_active); 217 218 pcr_ops->write_pcr(0, pcr_ops->pcr_nmi_disable); 219 pcr_ops->write_pic(0, pcr_ops->nmi_picl_value(nmi_hz)); 220 221 pcr_ops->write_pcr(0, pcr_ops->pcr_nmi_enable); 222 } 223 224 static void nmi_adjust_hz_one(void *unused) 225 { 226 if (!__this_cpu_read(wd_enabled)) 227 return; 228 229 pcr_ops->write_pcr(0, pcr_ops->pcr_nmi_disable); 230 pcr_ops->write_pic(0, pcr_ops->nmi_picl_value(nmi_hz)); 231 232 pcr_ops->write_pcr(0, pcr_ops->pcr_nmi_enable); 233 } 234 235 void nmi_adjust_hz(unsigned int new_hz) 236 { 237 nmi_hz = new_hz; 238 on_each_cpu(nmi_adjust_hz_one, NULL, 1); 239 } 240 EXPORT_SYMBOL_GPL(nmi_adjust_hz); 241 242 static int nmi_shutdown(struct notifier_block *nb, unsigned long cmd, void *p) 243 { 244 on_each_cpu(stop_nmi_watchdog, NULL, 1); 245 return 0; 246 } 247 248 static struct notifier_block nmi_reboot_notifier = { 249 .notifier_call = nmi_shutdown, 250 }; 251 252 int __init nmi_init(void) 253 { 254 int err; 255 256 on_each_cpu(start_nmi_watchdog, NULL, 1); 257 258 err = check_nmi_watchdog(); 259 if (!err) { 260 err = register_reboot_notifier(&nmi_reboot_notifier); 261 if (err) { 262 on_each_cpu(stop_nmi_watchdog, NULL, 1); 263 atomic_set(&nmi_active, -1); 264 } 265 } 266 267 nmi_init_done = 1; 268 269 return err; 270 } 271 272 static int __init setup_nmi_watchdog(char *str) 273 { 274 if (!strncmp(str, "panic", 5)) 275 panic_on_timeout = 1; 276 277 return 0; 278 } 279 __setup("nmi_watchdog=", setup_nmi_watchdog); 280 281 /* 282 * sparc specific NMI watchdog enable function. 283 * Enables watchdog if it is not enabled already. 284 */ 285 int watchdog_nmi_enable(unsigned int cpu) 286 { 287 if (atomic_read(&nmi_active) == -1) { 288 pr_warn("NMI watchdog cannot be enabled or disabled\n"); 289 return -1; 290 } 291 292 /* 293 * watchdog thread could start even before nmi_init is called. 294 * Just Return in that case. Let nmi_init finish the init 295 * process first. 296 */ 297 if (!nmi_init_done) 298 return 0; 299 300 smp_call_function_single(cpu, start_nmi_watchdog, NULL, 1); 301 302 return 0; 303 } 304 /* 305 * sparc specific NMI watchdog disable function. 306 * Disables watchdog if it is not disabled already. 307 */ 308 void watchdog_nmi_disable(unsigned int cpu) 309 { 310 if (atomic_read(&nmi_active) == -1) 311 pr_warn_once("NMI watchdog cannot be enabled or disabled\n"); 312 else 313 smp_call_function_single(cpu, stop_nmi_watchdog, NULL, 1); 314 } 315