1 /* Pseudo NMI support on sparc64 systems. 2 * 3 * Copyright (C) 2009 David S. Miller <davem@davemloft.net> 4 * 5 * The NMI watchdog support and infrastructure is based almost 6 * entirely upon the x86 NMI support code. 7 */ 8 #include <linux/kernel.h> 9 #include <linux/param.h> 10 #include <linux/init.h> 11 #include <linux/percpu.h> 12 #include <linux/nmi.h> 13 #include <linux/export.h> 14 #include <linux/kprobes.h> 15 #include <linux/kernel_stat.h> 16 #include <linux/reboot.h> 17 #include <linux/slab.h> 18 #include <linux/kdebug.h> 19 #include <linux/delay.h> 20 #include <linux/smp.h> 21 22 #include <asm/perf_event.h> 23 #include <asm/ptrace.h> 24 #include <asm/pcr.h> 25 #include <asm/perfctr.h> 26 27 #include "kstack.h" 28 29 /* We don't have a real NMI on sparc64, but we can fake one 30 * up using profiling counter overflow interrupts and interrupt 31 * levels. 32 * 33 * The profile overflow interrupts at level 15, so we use 34 * level 14 as our IRQ off level. 35 */ 36 37 static int panic_on_timeout; 38 39 /* nmi_active: 40 * >0: the NMI watchdog is active, but can be disabled 41 * <0: the NMI watchdog has not been set up, and cannot be enabled 42 * 0: the NMI watchdog is disabled, but can be enabled 43 */ 44 atomic_t nmi_active = ATOMIC_INIT(0); /* oprofile uses this */ 45 EXPORT_SYMBOL(nmi_active); 46 47 static unsigned int nmi_hz = HZ; 48 static DEFINE_PER_CPU(short, wd_enabled); 49 static int endflag __initdata; 50 51 static DEFINE_PER_CPU(unsigned int, last_irq_sum); 52 static DEFINE_PER_CPU(long, alert_counter); 53 static DEFINE_PER_CPU(int, nmi_touch); 54 55 void touch_nmi_watchdog(void) 56 { 57 if (atomic_read(&nmi_active)) { 58 int cpu; 59 60 for_each_present_cpu(cpu) { 61 if (per_cpu(nmi_touch, cpu) != 1) 62 per_cpu(nmi_touch, cpu) = 1; 63 } 64 } 65 66 touch_softlockup_watchdog(); 67 } 68 EXPORT_SYMBOL(touch_nmi_watchdog); 69 70 static void die_nmi(const char *str, struct pt_regs *regs, int do_panic) 71 { 72 if (notify_die(DIE_NMIWATCHDOG, str, regs, 0, 73 pt_regs_trap_type(regs), SIGINT) == NOTIFY_STOP) 74 return; 75 76 console_verbose(); 77 bust_spinlocks(1); 78 79 printk(KERN_EMERG "%s", str); 80 printk(" on CPU%d, ip %08lx, registers:\n", 81 smp_processor_id(), regs->tpc); 82 show_regs(regs); 83 dump_stack(); 84 85 bust_spinlocks(0); 86 87 if (do_panic || panic_on_oops) 88 panic("Non maskable interrupt"); 89 90 nmi_exit(); 91 local_irq_enable(); 92 do_exit(SIGBUS); 93 } 94 95 notrace __kprobes void perfctr_irq(int irq, struct pt_regs *regs) 96 { 97 unsigned int sum, touched = 0; 98 void *orig_sp; 99 100 clear_softint(1 << irq); 101 102 local_cpu_data().__nmi_count++; 103 104 nmi_enter(); 105 106 orig_sp = set_hardirq_stack(); 107 108 if (notify_die(DIE_NMI, "nmi", regs, 0, 109 pt_regs_trap_type(regs), SIGINT) == NOTIFY_STOP) 110 touched = 1; 111 else 112 pcr_ops->write(PCR_PIC_PRIV); 113 114 sum = local_cpu_data().irq0_irqs; 115 if (__get_cpu_var(nmi_touch)) { 116 __get_cpu_var(nmi_touch) = 0; 117 touched = 1; 118 } 119 if (!touched && __get_cpu_var(last_irq_sum) == sum) { 120 __this_cpu_inc(alert_counter); 121 if (__this_cpu_read(alert_counter) == 30 * nmi_hz) 122 die_nmi("BUG: NMI Watchdog detected LOCKUP", 123 regs, panic_on_timeout); 124 } else { 125 __get_cpu_var(last_irq_sum) = sum; 126 __this_cpu_write(alert_counter, 0); 127 } 128 if (__get_cpu_var(wd_enabled)) { 129 write_pic(picl_value(nmi_hz)); 130 pcr_ops->write(pcr_enable); 131 } 132 133 restore_hardirq_stack(orig_sp); 134 135 nmi_exit(); 136 } 137 138 static inline unsigned int get_nmi_count(int cpu) 139 { 140 return cpu_data(cpu).__nmi_count; 141 } 142 143 static __init void nmi_cpu_busy(void *data) 144 { 145 local_irq_enable_in_hardirq(); 146 while (endflag == 0) 147 mb(); 148 } 149 150 static void report_broken_nmi(int cpu, int *prev_nmi_count) 151 { 152 printk(KERN_CONT "\n"); 153 154 printk(KERN_WARNING 155 "WARNING: CPU#%d: NMI appears to be stuck (%d->%d)!\n", 156 cpu, prev_nmi_count[cpu], get_nmi_count(cpu)); 157 158 printk(KERN_WARNING 159 "Please report this to bugzilla.kernel.org,\n"); 160 printk(KERN_WARNING 161 "and attach the output of the 'dmesg' command.\n"); 162 163 per_cpu(wd_enabled, cpu) = 0; 164 atomic_dec(&nmi_active); 165 } 166 167 void stop_nmi_watchdog(void *unused) 168 { 169 pcr_ops->write(PCR_PIC_PRIV); 170 __get_cpu_var(wd_enabled) = 0; 171 atomic_dec(&nmi_active); 172 } 173 174 static int __init check_nmi_watchdog(void) 175 { 176 unsigned int *prev_nmi_count; 177 int cpu, err; 178 179 if (!atomic_read(&nmi_active)) 180 return 0; 181 182 prev_nmi_count = kmalloc(nr_cpu_ids * sizeof(unsigned int), GFP_KERNEL); 183 if (!prev_nmi_count) { 184 err = -ENOMEM; 185 goto error; 186 } 187 188 printk(KERN_INFO "Testing NMI watchdog ... "); 189 190 smp_call_function(nmi_cpu_busy, (void *)&endflag, 0); 191 192 for_each_possible_cpu(cpu) 193 prev_nmi_count[cpu] = get_nmi_count(cpu); 194 local_irq_enable(); 195 mdelay((20 * 1000) / nmi_hz); /* wait 20 ticks */ 196 197 for_each_online_cpu(cpu) { 198 if (!per_cpu(wd_enabled, cpu)) 199 continue; 200 if (get_nmi_count(cpu) - prev_nmi_count[cpu] <= 5) 201 report_broken_nmi(cpu, prev_nmi_count); 202 } 203 endflag = 1; 204 if (!atomic_read(&nmi_active)) { 205 kfree(prev_nmi_count); 206 atomic_set(&nmi_active, -1); 207 err = -ENODEV; 208 goto error; 209 } 210 printk("OK.\n"); 211 212 nmi_hz = 1; 213 214 kfree(prev_nmi_count); 215 return 0; 216 error: 217 on_each_cpu(stop_nmi_watchdog, NULL, 1); 218 return err; 219 } 220 221 void start_nmi_watchdog(void *unused) 222 { 223 __get_cpu_var(wd_enabled) = 1; 224 atomic_inc(&nmi_active); 225 226 pcr_ops->write(PCR_PIC_PRIV); 227 write_pic(picl_value(nmi_hz)); 228 229 pcr_ops->write(pcr_enable); 230 } 231 232 static void nmi_adjust_hz_one(void *unused) 233 { 234 if (!__get_cpu_var(wd_enabled)) 235 return; 236 237 pcr_ops->write(PCR_PIC_PRIV); 238 write_pic(picl_value(nmi_hz)); 239 240 pcr_ops->write(pcr_enable); 241 } 242 243 void nmi_adjust_hz(unsigned int new_hz) 244 { 245 nmi_hz = new_hz; 246 on_each_cpu(nmi_adjust_hz_one, NULL, 1); 247 } 248 EXPORT_SYMBOL_GPL(nmi_adjust_hz); 249 250 static int nmi_shutdown(struct notifier_block *nb, unsigned long cmd, void *p) 251 { 252 on_each_cpu(stop_nmi_watchdog, NULL, 1); 253 return 0; 254 } 255 256 static struct notifier_block nmi_reboot_notifier = { 257 .notifier_call = nmi_shutdown, 258 }; 259 260 int __init nmi_init(void) 261 { 262 int err; 263 264 on_each_cpu(start_nmi_watchdog, NULL, 1); 265 266 err = check_nmi_watchdog(); 267 if (!err) { 268 err = register_reboot_notifier(&nmi_reboot_notifier); 269 if (err) { 270 on_each_cpu(stop_nmi_watchdog, NULL, 1); 271 atomic_set(&nmi_active, -1); 272 } 273 } 274 275 return err; 276 } 277 278 static int __init setup_nmi_watchdog(char *str) 279 { 280 if (!strncmp(str, "panic", 5)) 281 panic_on_timeout = 1; 282 283 return 0; 284 } 285 __setup("nmi_watchdog=", setup_nmi_watchdog); 286