1 /* Pseudo NMI support on sparc64 systems. 2 * 3 * Copyright (C) 2009 David S. Miller <davem@davemloft.net> 4 * 5 * The NMI watchdog support and infrastructure is based almost 6 * entirely upon the x86 NMI support code. 7 */ 8 #include <linux/kernel.h> 9 #include <linux/param.h> 10 #include <linux/init.h> 11 #include <linux/percpu.h> 12 #include <linux/nmi.h> 13 #include <linux/module.h> 14 #include <linux/kprobes.h> 15 #include <linux/kernel_stat.h> 16 #include <linux/reboot.h> 17 #include <linux/slab.h> 18 #include <linux/kdebug.h> 19 #include <linux/delay.h> 20 #include <linux/smp.h> 21 22 #include <asm/perf_counter.h> 23 #include <asm/ptrace.h> 24 #include <asm/local.h> 25 #include <asm/pcr.h> 26 27 /* We don't have a real NMI on sparc64, but we can fake one 28 * up using profiling counter overflow interrupts and interrupt 29 * levels. 30 * 31 * The profile overflow interrupts at level 15, so we use 32 * level 14 as our IRQ off level. 33 */ 34 35 static int panic_on_timeout; 36 37 /* nmi_active: 38 * >0: the NMI watchdog is active, but can be disabled 39 * <0: the NMI watchdog has not been set up, and cannot be enabled 40 * 0: the NMI watchdog is disabled, but can be enabled 41 */ 42 atomic_t nmi_active = ATOMIC_INIT(0); /* oprofile uses this */ 43 EXPORT_SYMBOL(nmi_active); 44 45 static unsigned int nmi_hz = HZ; 46 static DEFINE_PER_CPU(short, wd_enabled); 47 static int endflag __initdata; 48 49 static DEFINE_PER_CPU(unsigned int, last_irq_sum); 50 static DEFINE_PER_CPU(local_t, alert_counter); 51 static DEFINE_PER_CPU(int, nmi_touch); 52 53 void touch_nmi_watchdog(void) 54 { 55 if (atomic_read(&nmi_active)) { 56 int cpu; 57 58 for_each_present_cpu(cpu) { 59 if (per_cpu(nmi_touch, cpu) != 1) 60 per_cpu(nmi_touch, cpu) = 1; 61 } 62 } 63 64 touch_softlockup_watchdog(); 65 } 66 EXPORT_SYMBOL(touch_nmi_watchdog); 67 68 static void die_nmi(const char *str, struct pt_regs *regs, int do_panic) 69 { 70 if (notify_die(DIE_NMIWATCHDOG, str, regs, 0, 71 pt_regs_trap_type(regs), SIGINT) == NOTIFY_STOP) 72 return; 73 74 console_verbose(); 75 bust_spinlocks(1); 76 77 printk(KERN_EMERG "%s", str); 78 printk(" on CPU%d, ip %08lx, registers:\n", 79 smp_processor_id(), regs->tpc); 80 show_regs(regs); 81 dump_stack(); 82 83 bust_spinlocks(0); 84 85 if (do_panic || panic_on_oops) 86 panic("Non maskable interrupt"); 87 88 nmi_exit(); 89 local_irq_enable(); 90 do_exit(SIGBUS); 91 } 92 93 notrace __kprobes void perfctr_irq(int irq, struct pt_regs *regs) 94 { 95 unsigned int sum, touched = 0; 96 int cpu = smp_processor_id(); 97 98 clear_softint(1 << irq); 99 pcr_ops->write(PCR_PIC_PRIV); 100 101 local_cpu_data().__nmi_count++; 102 103 nmi_enter(); 104 105 if (notify_die(DIE_NMI, "nmi", regs, 0, 106 pt_regs_trap_type(regs), SIGINT) == NOTIFY_STOP) 107 touched = 1; 108 109 sum = kstat_irqs_cpu(0, cpu); 110 if (__get_cpu_var(nmi_touch)) { 111 __get_cpu_var(nmi_touch) = 0; 112 touched = 1; 113 } 114 if (!touched && __get_cpu_var(last_irq_sum) == sum) { 115 local_inc(&__get_cpu_var(alert_counter)); 116 if (local_read(&__get_cpu_var(alert_counter)) == 30 * nmi_hz) 117 die_nmi("BUG: NMI Watchdog detected LOCKUP", 118 regs, panic_on_timeout); 119 } else { 120 __get_cpu_var(last_irq_sum) = sum; 121 local_set(&__get_cpu_var(alert_counter), 0); 122 } 123 if (__get_cpu_var(wd_enabled)) { 124 write_pic(picl_value(nmi_hz)); 125 pcr_ops->write(pcr_enable); 126 } 127 128 nmi_exit(); 129 } 130 131 static inline unsigned int get_nmi_count(int cpu) 132 { 133 return cpu_data(cpu).__nmi_count; 134 } 135 136 static __init void nmi_cpu_busy(void *data) 137 { 138 local_irq_enable_in_hardirq(); 139 while (endflag == 0) 140 mb(); 141 } 142 143 static void report_broken_nmi(int cpu, int *prev_nmi_count) 144 { 145 printk(KERN_CONT "\n"); 146 147 printk(KERN_WARNING 148 "WARNING: CPU#%d: NMI appears to be stuck (%d->%d)!\n", 149 cpu, prev_nmi_count[cpu], get_nmi_count(cpu)); 150 151 printk(KERN_WARNING 152 "Please report this to bugzilla.kernel.org,\n"); 153 printk(KERN_WARNING 154 "and attach the output of the 'dmesg' command.\n"); 155 156 per_cpu(wd_enabled, cpu) = 0; 157 atomic_dec(&nmi_active); 158 } 159 160 void stop_nmi_watchdog(void *unused) 161 { 162 pcr_ops->write(PCR_PIC_PRIV); 163 __get_cpu_var(wd_enabled) = 0; 164 atomic_dec(&nmi_active); 165 } 166 167 static int __init check_nmi_watchdog(void) 168 { 169 unsigned int *prev_nmi_count; 170 int cpu, err; 171 172 if (!atomic_read(&nmi_active)) 173 return 0; 174 175 prev_nmi_count = kmalloc(nr_cpu_ids * sizeof(unsigned int), GFP_KERNEL); 176 if (!prev_nmi_count) { 177 err = -ENOMEM; 178 goto error; 179 } 180 181 printk(KERN_INFO "Testing NMI watchdog ... "); 182 183 smp_call_function(nmi_cpu_busy, (void *)&endflag, 0); 184 185 for_each_possible_cpu(cpu) 186 prev_nmi_count[cpu] = get_nmi_count(cpu); 187 local_irq_enable(); 188 mdelay((20 * 1000) / nmi_hz); /* wait 20 ticks */ 189 190 for_each_online_cpu(cpu) { 191 if (!per_cpu(wd_enabled, cpu)) 192 continue; 193 if (get_nmi_count(cpu) - prev_nmi_count[cpu] <= 5) 194 report_broken_nmi(cpu, prev_nmi_count); 195 } 196 endflag = 1; 197 if (!atomic_read(&nmi_active)) { 198 kfree(prev_nmi_count); 199 atomic_set(&nmi_active, -1); 200 err = -ENODEV; 201 goto error; 202 } 203 printk("OK.\n"); 204 205 nmi_hz = 1; 206 207 kfree(prev_nmi_count); 208 return 0; 209 error: 210 on_each_cpu(stop_nmi_watchdog, NULL, 1); 211 return err; 212 } 213 214 void start_nmi_watchdog(void *unused) 215 { 216 __get_cpu_var(wd_enabled) = 1; 217 atomic_inc(&nmi_active); 218 219 pcr_ops->write(PCR_PIC_PRIV); 220 write_pic(picl_value(nmi_hz)); 221 222 pcr_ops->write(pcr_enable); 223 } 224 225 static void nmi_adjust_hz_one(void *unused) 226 { 227 if (!__get_cpu_var(wd_enabled)) 228 return; 229 230 pcr_ops->write(PCR_PIC_PRIV); 231 write_pic(picl_value(nmi_hz)); 232 233 pcr_ops->write(pcr_enable); 234 } 235 236 void nmi_adjust_hz(unsigned int new_hz) 237 { 238 nmi_hz = new_hz; 239 on_each_cpu(nmi_adjust_hz_one, NULL, 1); 240 } 241 EXPORT_SYMBOL_GPL(nmi_adjust_hz); 242 243 static int nmi_shutdown(struct notifier_block *nb, unsigned long cmd, void *p) 244 { 245 on_each_cpu(stop_nmi_watchdog, NULL, 1); 246 return 0; 247 } 248 249 static struct notifier_block nmi_reboot_notifier = { 250 .notifier_call = nmi_shutdown, 251 }; 252 253 int __init nmi_init(void) 254 { 255 int err; 256 257 on_each_cpu(start_nmi_watchdog, NULL, 1); 258 259 err = check_nmi_watchdog(); 260 if (!err) { 261 err = register_reboot_notifier(&nmi_reboot_notifier); 262 if (err) { 263 on_each_cpu(stop_nmi_watchdog, NULL, 1); 264 atomic_set(&nmi_active, -1); 265 } 266 } 267 if (!err) 268 init_hw_perf_counters(); 269 270 return err; 271 } 272 273 static int __init setup_nmi_watchdog(char *str) 274 { 275 if (!strncmp(str, "panic", 5)) 276 panic_on_timeout = 1; 277 278 return 0; 279 } 280 __setup("nmi_watchdog=", setup_nmi_watchdog); 281