1 /* Pseudo NMI support on sparc64 systems. 2 * 3 * Copyright (C) 2009 David S. Miller <davem@davemloft.net> 4 * 5 * The NMI watchdog support and infrastructure is based almost 6 * entirely upon the x86 NMI support code. 7 */ 8 #include <linux/kernel.h> 9 #include <linux/param.h> 10 #include <linux/init.h> 11 #include <linux/percpu.h> 12 #include <linux/nmi.h> 13 #include <linux/module.h> 14 #include <linux/kprobes.h> 15 #include <linux/kernel_stat.h> 16 #include <linux/reboot.h> 17 #include <linux/slab.h> 18 #include <linux/kdebug.h> 19 #include <linux/delay.h> 20 #include <linux/smp.h> 21 22 #include <asm/ptrace.h> 23 #include <asm/local.h> 24 #include <asm/pcr.h> 25 26 /* We don't have a real NMI on sparc64, but we can fake one 27 * up using profiling counter overflow interrupts and interrupt 28 * levels. 29 * 30 * The profile overflow interrupts at level 15, so we use 31 * level 14 as our IRQ off level. 32 */ 33 34 static int nmi_watchdog_active; 35 static int panic_on_timeout; 36 37 int nmi_usable; 38 EXPORT_SYMBOL_GPL(nmi_usable); 39 40 static unsigned int nmi_hz = HZ; 41 42 static DEFINE_PER_CPU(unsigned int, last_irq_sum); 43 static DEFINE_PER_CPU(local_t, alert_counter); 44 static DEFINE_PER_CPU(int, nmi_touch); 45 46 void touch_nmi_watchdog(void) 47 { 48 if (nmi_watchdog_active) { 49 int cpu; 50 51 for_each_present_cpu(cpu) { 52 if (per_cpu(nmi_touch, cpu) != 1) 53 per_cpu(nmi_touch, cpu) = 1; 54 } 55 } 56 57 touch_softlockup_watchdog(); 58 } 59 EXPORT_SYMBOL(touch_nmi_watchdog); 60 61 static void die_nmi(const char *str, struct pt_regs *regs, int do_panic) 62 { 63 if (notify_die(DIE_NMIWATCHDOG, str, regs, 0, 64 pt_regs_trap_type(regs), SIGINT) == NOTIFY_STOP) 65 return; 66 67 console_verbose(); 68 bust_spinlocks(1); 69 70 printk(KERN_EMERG "%s", str); 71 printk(" on CPU%d, ip %08lx, registers:\n", 72 smp_processor_id(), regs->tpc); 73 show_regs(regs); 74 dump_stack(); 75 76 bust_spinlocks(0); 77 78 if (do_panic || panic_on_oops) 79 panic("Non maskable interrupt"); 80 81 local_irq_enable(); 82 do_exit(SIGBUS); 83 } 84 85 notrace __kprobes void perfctr_irq(int irq, struct pt_regs *regs) 86 { 87 unsigned int sum, touched = 0; 88 int cpu = smp_processor_id(); 89 90 clear_softint(1 << irq); 91 pcr_ops->write(PCR_PIC_PRIV); 92 93 local_cpu_data().__nmi_count++; 94 95 if (notify_die(DIE_NMI, "nmi", regs, 0, 96 pt_regs_trap_type(regs), SIGINT) == NOTIFY_STOP) 97 touched = 1; 98 99 sum = kstat_irqs_cpu(0, cpu); 100 if (__get_cpu_var(nmi_touch)) { 101 __get_cpu_var(nmi_touch) = 0; 102 touched = 1; 103 } 104 if (!touched && __get_cpu_var(last_irq_sum) == sum) { 105 local_inc(&__get_cpu_var(alert_counter)); 106 if (local_read(&__get_cpu_var(alert_counter)) == 5 * nmi_hz) 107 die_nmi("BUG: NMI Watchdog detected LOCKUP", 108 regs, panic_on_timeout); 109 } else { 110 __get_cpu_var(last_irq_sum) = sum; 111 local_set(&__get_cpu_var(alert_counter), 0); 112 } 113 if (nmi_usable) { 114 write_pic(picl_value(nmi_hz)); 115 pcr_ops->write(pcr_enable); 116 } 117 } 118 119 static inline unsigned int get_nmi_count(int cpu) 120 { 121 return cpu_data(cpu).__nmi_count; 122 } 123 124 static int endflag __initdata; 125 126 static __init void nmi_cpu_busy(void *data) 127 { 128 local_irq_enable_in_hardirq(); 129 while (endflag == 0) 130 mb(); 131 } 132 133 static void report_broken_nmi(int cpu, int *prev_nmi_count) 134 { 135 printk(KERN_CONT "\n"); 136 137 printk(KERN_WARNING 138 "WARNING: CPU#%d: NMI appears to be stuck (%d->%d)!\n", 139 cpu, prev_nmi_count[cpu], get_nmi_count(cpu)); 140 141 printk(KERN_WARNING 142 "Please report this to bugzilla.kernel.org,\n"); 143 printk(KERN_WARNING 144 "and attach the output of the 'dmesg' command.\n"); 145 146 nmi_usable = 0; 147 } 148 149 static void stop_watchdog(void *unused) 150 { 151 pcr_ops->write(PCR_PIC_PRIV); 152 } 153 154 static int __init check_nmi_watchdog(void) 155 { 156 unsigned int *prev_nmi_count; 157 int cpu, err; 158 159 prev_nmi_count = kmalloc(nr_cpu_ids * sizeof(unsigned int), GFP_KERNEL); 160 if (!prev_nmi_count) { 161 err = -ENOMEM; 162 goto error; 163 } 164 165 printk(KERN_INFO "Testing NMI watchdog ... "); 166 167 smp_call_function(nmi_cpu_busy, (void *)&endflag, 0); 168 169 for_each_possible_cpu(cpu) 170 prev_nmi_count[cpu] = get_nmi_count(cpu); 171 local_irq_enable(); 172 mdelay((20 * 1000) / nmi_hz); /* wait 20 ticks */ 173 174 for_each_online_cpu(cpu) { 175 if (get_nmi_count(cpu) - prev_nmi_count[cpu] <= 5) 176 report_broken_nmi(cpu, prev_nmi_count); 177 } 178 endflag = 1; 179 if (!nmi_usable) { 180 kfree(prev_nmi_count); 181 err = -ENODEV; 182 goto error; 183 } 184 printk("OK.\n"); 185 186 nmi_hz = 1; 187 188 kfree(prev_nmi_count); 189 return 0; 190 error: 191 on_each_cpu(stop_watchdog, NULL, 1); 192 return err; 193 } 194 195 static void start_watchdog(void *unused) 196 { 197 pcr_ops->write(PCR_PIC_PRIV); 198 write_pic(picl_value(nmi_hz)); 199 200 pcr_ops->write(pcr_enable); 201 } 202 203 void nmi_adjust_hz(unsigned int new_hz) 204 { 205 nmi_hz = new_hz; 206 on_each_cpu(start_watchdog, NULL, 1); 207 } 208 EXPORT_SYMBOL_GPL(nmi_adjust_hz); 209 210 static int nmi_shutdown(struct notifier_block *nb, unsigned long cmd, void *p) 211 { 212 on_each_cpu(stop_watchdog, NULL, 1); 213 return 0; 214 } 215 216 static struct notifier_block nmi_reboot_notifier = { 217 .notifier_call = nmi_shutdown, 218 }; 219 220 int __init nmi_init(void) 221 { 222 int err; 223 224 nmi_usable = 1; 225 226 on_each_cpu(start_watchdog, NULL, 1); 227 228 err = check_nmi_watchdog(); 229 if (!err) { 230 err = register_reboot_notifier(&nmi_reboot_notifier); 231 if (err) { 232 nmi_usable = 0; 233 on_each_cpu(stop_watchdog, NULL, 1); 234 } 235 } 236 return err; 237 } 238 239 static int __init setup_nmi_watchdog(char *str) 240 { 241 if (!strncmp(str, "panic", 5)) 242 panic_on_timeout = 1; 243 244 return 0; 245 } 246 __setup("nmi_watchdog=", setup_nmi_watchdog); 247