1 /* Pseudo NMI support on sparc64 systems. 2 * 3 * Copyright (C) 2009 David S. Miller <davem@davemloft.net> 4 * 5 * The NMI watchdog support and infrastructure is based almost 6 * entirely upon the x86 NMI support code. 7 */ 8 #include <linux/kernel.h> 9 #include <linux/param.h> 10 #include <linux/init.h> 11 #include <linux/percpu.h> 12 #include <linux/nmi.h> 13 #include <linux/module.h> 14 #include <linux/kprobes.h> 15 #include <linux/kernel_stat.h> 16 #include <linux/slab.h> 17 #include <linux/kdebug.h> 18 #include <linux/delay.h> 19 #include <linux/smp.h> 20 21 #include <asm/ptrace.h> 22 #include <asm/local.h> 23 #include <asm/pcr.h> 24 25 /* We don't have a real NMI on sparc64, but we can fake one 26 * up using profiling counter overflow interrupts and interrupt 27 * levels. 28 * 29 * The profile overflow interrupts at level 15, so we use 30 * level 14 as our IRQ off level. 31 */ 32 33 static int nmi_watchdog_active; 34 static int panic_on_timeout; 35 36 int nmi_usable; 37 EXPORT_SYMBOL_GPL(nmi_usable); 38 39 static unsigned int nmi_hz = HZ; 40 41 static DEFINE_PER_CPU(unsigned int, last_irq_sum); 42 static DEFINE_PER_CPU(local_t, alert_counter); 43 static DEFINE_PER_CPU(int, nmi_touch); 44 45 void touch_nmi_watchdog(void) 46 { 47 if (nmi_watchdog_active) { 48 int cpu; 49 50 for_each_present_cpu(cpu) { 51 if (per_cpu(nmi_touch, cpu) != 1) 52 per_cpu(nmi_touch, cpu) = 1; 53 } 54 } 55 56 touch_softlockup_watchdog(); 57 } 58 EXPORT_SYMBOL(touch_nmi_watchdog); 59 60 static void die_nmi(const char *str, struct pt_regs *regs, int do_panic) 61 { 62 if (notify_die(DIE_NMIWATCHDOG, str, regs, 0, 63 pt_regs_trap_type(regs), SIGINT) == NOTIFY_STOP) 64 return; 65 66 console_verbose(); 67 bust_spinlocks(1); 68 69 printk(KERN_EMERG "%s", str); 70 printk(" on CPU%d, ip %08lx, registers:\n", 71 smp_processor_id(), regs->tpc); 72 show_regs(regs); 73 dump_stack(); 74 75 bust_spinlocks(0); 76 77 if (do_panic || panic_on_oops) 78 panic("Non maskable interrupt"); 79 80 local_irq_enable(); 81 do_exit(SIGBUS); 82 } 83 84 notrace __kprobes void perfctr_irq(int irq, struct pt_regs *regs) 85 { 86 unsigned int sum, touched = 0; 87 int cpu = smp_processor_id(); 88 89 clear_softint(1 << irq); 90 pcr_ops->write(PCR_PIC_PRIV); 91 92 local_cpu_data().__nmi_count++; 93 94 if (notify_die(DIE_NMI, "nmi", regs, 0, 95 pt_regs_trap_type(regs), SIGINT) == NOTIFY_STOP) 96 touched = 1; 97 98 sum = kstat_irqs_cpu(0, cpu); 99 if (__get_cpu_var(nmi_touch)) { 100 __get_cpu_var(nmi_touch) = 0; 101 touched = 1; 102 } 103 if (!touched && __get_cpu_var(last_irq_sum) == sum) { 104 local_inc(&__get_cpu_var(alert_counter)); 105 if (local_read(&__get_cpu_var(alert_counter)) == 5 * nmi_hz) 106 die_nmi("BUG: NMI Watchdog detected LOCKUP", 107 regs, panic_on_timeout); 108 } else { 109 __get_cpu_var(last_irq_sum) = sum; 110 local_set(&__get_cpu_var(alert_counter), 0); 111 } 112 if (nmi_usable) { 113 write_pic(picl_value(nmi_hz)); 114 pcr_ops->write(pcr_enable); 115 } 116 } 117 118 static inline unsigned int get_nmi_count(int cpu) 119 { 120 return cpu_data(cpu).__nmi_count; 121 } 122 123 static int endflag __initdata; 124 125 static __init void nmi_cpu_busy(void *data) 126 { 127 local_irq_enable_in_hardirq(); 128 while (endflag == 0) 129 mb(); 130 } 131 132 static void report_broken_nmi(int cpu, int *prev_nmi_count) 133 { 134 printk(KERN_CONT "\n"); 135 136 printk(KERN_WARNING 137 "WARNING: CPU#%d: NMI appears to be stuck (%d->%d)!\n", 138 cpu, prev_nmi_count[cpu], get_nmi_count(cpu)); 139 140 printk(KERN_WARNING 141 "Please report this to bugzilla.kernel.org,\n"); 142 printk(KERN_WARNING 143 "and attach the output of the 'dmesg' command.\n"); 144 145 nmi_usable = 0; 146 } 147 148 static void stop_watchdog(void *unused) 149 { 150 pcr_ops->write(PCR_PIC_PRIV); 151 } 152 153 static int __init check_nmi_watchdog(void) 154 { 155 unsigned int *prev_nmi_count; 156 int cpu, err; 157 158 prev_nmi_count = kmalloc(nr_cpu_ids * sizeof(unsigned int), GFP_KERNEL); 159 if (!prev_nmi_count) { 160 err = -ENOMEM; 161 goto error; 162 } 163 164 printk(KERN_INFO "Testing NMI watchdog ... "); 165 166 smp_call_function(nmi_cpu_busy, (void *)&endflag, 0); 167 168 for_each_possible_cpu(cpu) 169 prev_nmi_count[cpu] = get_nmi_count(cpu); 170 local_irq_enable(); 171 mdelay((20 * 1000) / nmi_hz); /* wait 20 ticks */ 172 173 for_each_online_cpu(cpu) { 174 if (get_nmi_count(cpu) - prev_nmi_count[cpu] <= 5) 175 report_broken_nmi(cpu, prev_nmi_count); 176 } 177 endflag = 1; 178 if (!nmi_usable) { 179 kfree(prev_nmi_count); 180 err = -ENODEV; 181 goto error; 182 } 183 printk("OK.\n"); 184 185 nmi_hz = 1; 186 187 kfree(prev_nmi_count); 188 return 0; 189 error: 190 on_each_cpu(stop_watchdog, NULL, 1); 191 return err; 192 } 193 194 static void start_watchdog(void *unused) 195 { 196 pcr_ops->write(PCR_PIC_PRIV); 197 write_pic(picl_value(nmi_hz)); 198 199 pcr_ops->write(pcr_enable); 200 } 201 202 void nmi_adjust_hz(unsigned int new_hz) 203 { 204 nmi_hz = new_hz; 205 on_each_cpu(start_watchdog, NULL, 1); 206 } 207 EXPORT_SYMBOL_GPL(nmi_adjust_hz); 208 209 int __init nmi_init(void) 210 { 211 nmi_usable = 1; 212 213 on_each_cpu(start_watchdog, NULL, 1); 214 215 return check_nmi_watchdog(); 216 } 217 218 static int __init setup_nmi_watchdog(char *str) 219 { 220 if (!strncmp(str, "panic", 5)) 221 panic_on_timeout = 1; 222 223 return 0; 224 } 225 __setup("nmi_watchdog=", setup_nmi_watchdog); 226