1 /* Pseudo NMI support on sparc64 systems. 2 * 3 * Copyright (C) 2009 David S. Miller <davem@davemloft.net> 4 * 5 * The NMI watchdog support and infrastructure is based almost 6 * entirely upon the x86 NMI support code. 7 */ 8 #include <linux/kernel.h> 9 #include <linux/param.h> 10 #include <linux/init.h> 11 #include <linux/percpu.h> 12 #include <linux/nmi.h> 13 #include <linux/module.h> 14 #include <linux/kprobes.h> 15 #include <linux/kernel_stat.h> 16 #include <linux/reboot.h> 17 #include <linux/slab.h> 18 #include <linux/kdebug.h> 19 #include <linux/delay.h> 20 #include <linux/smp.h> 21 22 #include <asm/perf_event.h> 23 #include <asm/ptrace.h> 24 #include <asm/local.h> 25 #include <asm/pcr.h> 26 27 /* We don't have a real NMI on sparc64, but we can fake one 28 * up using profiling counter overflow interrupts and interrupt 29 * levels. 30 * 31 * The profile overflow interrupts at level 15, so we use 32 * level 14 as our IRQ off level. 33 */ 34 35 static int panic_on_timeout; 36 37 /* nmi_active: 38 * >0: the NMI watchdog is active, but can be disabled 39 * <0: the NMI watchdog has not been set up, and cannot be enabled 40 * 0: the NMI watchdog is disabled, but can be enabled 41 */ 42 atomic_t nmi_active = ATOMIC_INIT(0); /* oprofile uses this */ 43 EXPORT_SYMBOL(nmi_active); 44 45 static unsigned int nmi_hz = HZ; 46 static DEFINE_PER_CPU(short, wd_enabled); 47 static int endflag __initdata; 48 49 static DEFINE_PER_CPU(unsigned int, last_irq_sum); 50 static DEFINE_PER_CPU(long, alert_counter); 51 static DEFINE_PER_CPU(int, nmi_touch); 52 53 void touch_nmi_watchdog(void) 54 { 55 if (atomic_read(&nmi_active)) { 56 int cpu; 57 58 for_each_present_cpu(cpu) { 59 if (per_cpu(nmi_touch, cpu) != 1) 60 per_cpu(nmi_touch, cpu) = 1; 61 } 62 } 63 64 touch_softlockup_watchdog(); 65 } 66 EXPORT_SYMBOL(touch_nmi_watchdog); 67 68 static void die_nmi(const char *str, struct pt_regs *regs, int do_panic) 69 { 70 if (notify_die(DIE_NMIWATCHDOG, str, regs, 0, 71 pt_regs_trap_type(regs), SIGINT) == NOTIFY_STOP) 72 return; 73 74 console_verbose(); 75 bust_spinlocks(1); 76 77 printk(KERN_EMERG "%s", str); 78 printk(" on CPU%d, ip %08lx, registers:\n", 79 smp_processor_id(), regs->tpc); 80 show_regs(regs); 81 dump_stack(); 82 83 bust_spinlocks(0); 84 85 if (do_panic || panic_on_oops) 86 panic("Non maskable interrupt"); 87 88 nmi_exit(); 89 local_irq_enable(); 90 do_exit(SIGBUS); 91 } 92 93 notrace __kprobes void perfctr_irq(int irq, struct pt_regs *regs) 94 { 95 unsigned int sum, touched = 0; 96 int cpu = smp_processor_id(); 97 98 clear_softint(1 << irq); 99 100 local_cpu_data().__nmi_count++; 101 102 nmi_enter(); 103 104 if (notify_die(DIE_NMI, "nmi", regs, 0, 105 pt_regs_trap_type(regs), SIGINT) == NOTIFY_STOP) 106 touched = 1; 107 else 108 pcr_ops->write(PCR_PIC_PRIV); 109 110 sum = kstat_irqs_cpu(0, cpu); 111 if (__get_cpu_var(nmi_touch)) { 112 __get_cpu_var(nmi_touch) = 0; 113 touched = 1; 114 } 115 if (!touched && __get_cpu_var(last_irq_sum) == sum) { 116 __this_cpu_inc(per_cpu_var(alert_counter)); 117 if (__this_cpu_read(per_cpu_var(alert_counter)) == 30 * nmi_hz) 118 die_nmi("BUG: NMI Watchdog detected LOCKUP", 119 regs, panic_on_timeout); 120 } else { 121 __get_cpu_var(last_irq_sum) = sum; 122 __this_cpu_write(per_cpu_var(alert_counter), 0); 123 } 124 if (__get_cpu_var(wd_enabled)) { 125 write_pic(picl_value(nmi_hz)); 126 pcr_ops->write(pcr_enable); 127 } 128 129 nmi_exit(); 130 } 131 132 static inline unsigned int get_nmi_count(int cpu) 133 { 134 return cpu_data(cpu).__nmi_count; 135 } 136 137 static __init void nmi_cpu_busy(void *data) 138 { 139 local_irq_enable_in_hardirq(); 140 while (endflag == 0) 141 mb(); 142 } 143 144 static void report_broken_nmi(int cpu, int *prev_nmi_count) 145 { 146 printk(KERN_CONT "\n"); 147 148 printk(KERN_WARNING 149 "WARNING: CPU#%d: NMI appears to be stuck (%d->%d)!\n", 150 cpu, prev_nmi_count[cpu], get_nmi_count(cpu)); 151 152 printk(KERN_WARNING 153 "Please report this to bugzilla.kernel.org,\n"); 154 printk(KERN_WARNING 155 "and attach the output of the 'dmesg' command.\n"); 156 157 per_cpu(wd_enabled, cpu) = 0; 158 atomic_dec(&nmi_active); 159 } 160 161 void stop_nmi_watchdog(void *unused) 162 { 163 pcr_ops->write(PCR_PIC_PRIV); 164 __get_cpu_var(wd_enabled) = 0; 165 atomic_dec(&nmi_active); 166 } 167 168 static int __init check_nmi_watchdog(void) 169 { 170 unsigned int *prev_nmi_count; 171 int cpu, err; 172 173 if (!atomic_read(&nmi_active)) 174 return 0; 175 176 prev_nmi_count = kmalloc(nr_cpu_ids * sizeof(unsigned int), GFP_KERNEL); 177 if (!prev_nmi_count) { 178 err = -ENOMEM; 179 goto error; 180 } 181 182 printk(KERN_INFO "Testing NMI watchdog ... "); 183 184 smp_call_function(nmi_cpu_busy, (void *)&endflag, 0); 185 186 for_each_possible_cpu(cpu) 187 prev_nmi_count[cpu] = get_nmi_count(cpu); 188 local_irq_enable(); 189 mdelay((20 * 1000) / nmi_hz); /* wait 20 ticks */ 190 191 for_each_online_cpu(cpu) { 192 if (!per_cpu(wd_enabled, cpu)) 193 continue; 194 if (get_nmi_count(cpu) - prev_nmi_count[cpu] <= 5) 195 report_broken_nmi(cpu, prev_nmi_count); 196 } 197 endflag = 1; 198 if (!atomic_read(&nmi_active)) { 199 kfree(prev_nmi_count); 200 atomic_set(&nmi_active, -1); 201 err = -ENODEV; 202 goto error; 203 } 204 printk("OK.\n"); 205 206 nmi_hz = 1; 207 208 kfree(prev_nmi_count); 209 return 0; 210 error: 211 on_each_cpu(stop_nmi_watchdog, NULL, 1); 212 return err; 213 } 214 215 void start_nmi_watchdog(void *unused) 216 { 217 __get_cpu_var(wd_enabled) = 1; 218 atomic_inc(&nmi_active); 219 220 pcr_ops->write(PCR_PIC_PRIV); 221 write_pic(picl_value(nmi_hz)); 222 223 pcr_ops->write(pcr_enable); 224 } 225 226 static void nmi_adjust_hz_one(void *unused) 227 { 228 if (!__get_cpu_var(wd_enabled)) 229 return; 230 231 pcr_ops->write(PCR_PIC_PRIV); 232 write_pic(picl_value(nmi_hz)); 233 234 pcr_ops->write(pcr_enable); 235 } 236 237 void nmi_adjust_hz(unsigned int new_hz) 238 { 239 nmi_hz = new_hz; 240 on_each_cpu(nmi_adjust_hz_one, NULL, 1); 241 } 242 EXPORT_SYMBOL_GPL(nmi_adjust_hz); 243 244 static int nmi_shutdown(struct notifier_block *nb, unsigned long cmd, void *p) 245 { 246 on_each_cpu(stop_nmi_watchdog, NULL, 1); 247 return 0; 248 } 249 250 static struct notifier_block nmi_reboot_notifier = { 251 .notifier_call = nmi_shutdown, 252 }; 253 254 int __init nmi_init(void) 255 { 256 int err; 257 258 on_each_cpu(start_nmi_watchdog, NULL, 1); 259 260 err = check_nmi_watchdog(); 261 if (!err) { 262 err = register_reboot_notifier(&nmi_reboot_notifier); 263 if (err) { 264 on_each_cpu(stop_nmi_watchdog, NULL, 1); 265 atomic_set(&nmi_active, -1); 266 } 267 } 268 if (!err) 269 init_hw_perf_events(); 270 271 return err; 272 } 273 274 static int __init setup_nmi_watchdog(char *str) 275 { 276 if (!strncmp(str, "panic", 5)) 277 panic_on_timeout = 1; 278 279 return 0; 280 } 281 __setup("nmi_watchdog=", setup_nmi_watchdog); 282