1 /* 2 * NMI watchdog support on APIC systems 3 * 4 * Started by Ingo Molnar <mingo@redhat.com> 5 * 6 * Fixes: 7 * Mikael Pettersson : AMD K7 support for local APIC NMI watchdog. 8 * Mikael Pettersson : Power Management for local APIC NMI watchdog. 9 * Mikael Pettersson : Pentium 4 support for local APIC NMI watchdog. 10 * Pavel Machek and 11 * Mikael Pettersson : PM converted to driver model. Disable/enable API. 12 */ 13 14 #include <asm/apic.h> 15 16 #include <linux/nmi.h> 17 #include <linux/mm.h> 18 #include <linux/delay.h> 19 #include <linux/interrupt.h> 20 #include <linux/module.h> 21 #include <linux/sysdev.h> 22 #include <linux/sysctl.h> 23 #include <linux/percpu.h> 24 #include <linux/kprobes.h> 25 #include <linux/cpumask.h> 26 #include <linux/kernel_stat.h> 27 #include <linux/kdebug.h> 28 #include <linux/smp.h> 29 30 #include <asm/i8259.h> 31 #include <asm/io_apic.h> 32 #include <asm/smp.h> 33 #include <asm/nmi.h> 34 #include <asm/proto.h> 35 #include <asm/timer.h> 36 37 #include <asm/mce.h> 38 39 #include <mach_traps.h> 40 41 int unknown_nmi_panic; 42 int nmi_watchdog_enabled; 43 44 static cpumask_t backtrace_mask = CPU_MASK_NONE; 45 46 /* nmi_active: 47 * >0: the lapic NMI watchdog is active, but can be disabled 48 * <0: the lapic NMI watchdog has not been set up, and cannot 49 * be enabled 50 * 0: the lapic NMI watchdog is disabled, but can be enabled 51 */ 52 atomic_t nmi_active = ATOMIC_INIT(0); /* oprofile uses this */ 53 EXPORT_SYMBOL(nmi_active); 54 55 unsigned int nmi_watchdog = NMI_NONE; 56 EXPORT_SYMBOL(nmi_watchdog); 57 58 static int panic_on_timeout; 59 60 static unsigned int nmi_hz = HZ; 61 static DEFINE_PER_CPU(short, wd_enabled); 62 static int endflag __initdata; 63 64 static inline unsigned int get_nmi_count(int cpu) 65 { 66 #ifdef CONFIG_X86_64 67 return cpu_pda(cpu)->__nmi_count; 68 #else 69 return nmi_count(cpu); 70 #endif 71 } 72 73 static inline int mce_in_progress(void) 74 { 75 #if defined(CONFIG_X86_64) && defined(CONFIG_X86_MCE) 76 return atomic_read(&mce_entry) > 0; 77 #endif 78 return 0; 79 } 80 81 /* 82 * Take the local apic timer and PIT/HPET into account. We don't 83 * know which one is active, when we have highres/dyntick on 84 */ 85 static inline unsigned int get_timer_irqs(int cpu) 86 { 87 #ifdef CONFIG_X86_64 88 return read_pda(apic_timer_irqs) + read_pda(irq0_irqs); 89 #else 90 return per_cpu(irq_stat, cpu).apic_timer_irqs + 91 per_cpu(irq_stat, cpu).irq0_irqs; 92 #endif 93 } 94 95 #ifdef CONFIG_SMP 96 /* 97 * The performance counters used by NMI_LOCAL_APIC don't trigger when 98 * the CPU is idle. To make sure the NMI watchdog really ticks on all 99 * CPUs during the test make them busy. 100 */ 101 static __init void nmi_cpu_busy(void *data) 102 { 103 local_irq_enable_in_hardirq(); 104 /* 105 * Intentionally don't use cpu_relax here. This is 106 * to make sure that the performance counter really ticks, 107 * even if there is a simulator or similar that catches the 108 * pause instruction. On a real HT machine this is fine because 109 * all other CPUs are busy with "useless" delay loops and don't 110 * care if they get somewhat less cycles. 111 */ 112 while (endflag == 0) 113 mb(); 114 } 115 #endif 116 117 static void report_broken_nmi(int cpu, int *prev_nmi_count) 118 { 119 printk(KERN_CONT "\n"); 120 121 printk(KERN_WARNING 122 "WARNING: CPU#%d: NMI appears to be stuck (%d->%d)!\n", 123 cpu, prev_nmi_count[cpu], get_nmi_count(cpu)); 124 125 printk(KERN_WARNING 126 "Please report this to bugzilla.kernel.org,\n"); 127 printk(KERN_WARNING 128 "and attach the output of the 'dmesg' command.\n"); 129 130 per_cpu(wd_enabled, cpu) = 0; 131 atomic_dec(&nmi_active); 132 } 133 134 int __init check_nmi_watchdog(void) 135 { 136 unsigned int *prev_nmi_count; 137 int cpu; 138 139 if (!nmi_watchdog_active() || !atomic_read(&nmi_active)) 140 return 0; 141 142 prev_nmi_count = kmalloc(nr_cpu_ids * sizeof(int), GFP_KERNEL); 143 if (!prev_nmi_count) 144 goto error; 145 146 printk(KERN_INFO "Testing NMI watchdog ... "); 147 148 #ifdef CONFIG_SMP 149 if (nmi_watchdog == NMI_LOCAL_APIC) 150 smp_call_function(nmi_cpu_busy, (void *)&endflag, 0); 151 #endif 152 153 for_each_possible_cpu(cpu) 154 prev_nmi_count[cpu] = get_nmi_count(cpu); 155 local_irq_enable(); 156 mdelay((20 * 1000) / nmi_hz); /* wait 20 ticks */ 157 158 for_each_online_cpu(cpu) { 159 if (!per_cpu(wd_enabled, cpu)) 160 continue; 161 if (get_nmi_count(cpu) - prev_nmi_count[cpu] <= 5) 162 report_broken_nmi(cpu, prev_nmi_count); 163 } 164 endflag = 1; 165 if (!atomic_read(&nmi_active)) { 166 kfree(prev_nmi_count); 167 atomic_set(&nmi_active, -1); 168 goto error; 169 } 170 printk("OK.\n"); 171 172 /* 173 * now that we know it works we can reduce NMI frequency to 174 * something more reasonable; makes a difference in some configs 175 */ 176 if (nmi_watchdog == NMI_LOCAL_APIC) 177 nmi_hz = lapic_adjust_nmi_hz(1); 178 179 kfree(prev_nmi_count); 180 return 0; 181 error: 182 if (nmi_watchdog == NMI_IO_APIC && !timer_through_8259) 183 disable_8259A_irq(0); 184 #ifdef CONFIG_X86_32 185 timer_ack = 0; 186 #endif 187 return -1; 188 } 189 190 static int __init setup_nmi_watchdog(char *str) 191 { 192 unsigned int nmi; 193 194 if (!strncmp(str, "panic", 5)) { 195 panic_on_timeout = 1; 196 str = strchr(str, ','); 197 if (!str) 198 return 1; 199 ++str; 200 } 201 202 get_option(&str, &nmi); 203 204 if (nmi >= NMI_INVALID) 205 return 0; 206 207 nmi_watchdog = nmi; 208 return 1; 209 } 210 __setup("nmi_watchdog=", setup_nmi_watchdog); 211 212 /* 213 * Suspend/resume support 214 */ 215 #ifdef CONFIG_PM 216 217 static int nmi_pm_active; /* nmi_active before suspend */ 218 219 static int lapic_nmi_suspend(struct sys_device *dev, pm_message_t state) 220 { 221 /* only CPU0 goes here, other CPUs should be offline */ 222 nmi_pm_active = atomic_read(&nmi_active); 223 stop_apic_nmi_watchdog(NULL); 224 BUG_ON(atomic_read(&nmi_active) != 0); 225 return 0; 226 } 227 228 static int lapic_nmi_resume(struct sys_device *dev) 229 { 230 /* only CPU0 goes here, other CPUs should be offline */ 231 if (nmi_pm_active > 0) { 232 setup_apic_nmi_watchdog(NULL); 233 touch_nmi_watchdog(); 234 } 235 return 0; 236 } 237 238 static struct sysdev_class nmi_sysclass = { 239 .name = "lapic_nmi", 240 .resume = lapic_nmi_resume, 241 .suspend = lapic_nmi_suspend, 242 }; 243 244 static struct sys_device device_lapic_nmi = { 245 .id = 0, 246 .cls = &nmi_sysclass, 247 }; 248 249 static int __init init_lapic_nmi_sysfs(void) 250 { 251 int error; 252 253 /* 254 * should really be a BUG_ON but b/c this is an 255 * init call, it just doesn't work. -dcz 256 */ 257 if (nmi_watchdog != NMI_LOCAL_APIC) 258 return 0; 259 260 if (atomic_read(&nmi_active) < 0) 261 return 0; 262 263 error = sysdev_class_register(&nmi_sysclass); 264 if (!error) 265 error = sysdev_register(&device_lapic_nmi); 266 return error; 267 } 268 269 /* must come after the local APIC's device_initcall() */ 270 late_initcall(init_lapic_nmi_sysfs); 271 272 #endif /* CONFIG_PM */ 273 274 static void __acpi_nmi_enable(void *__unused) 275 { 276 apic_write(APIC_LVT0, APIC_DM_NMI); 277 } 278 279 /* 280 * Enable timer based NMIs on all CPUs: 281 */ 282 void acpi_nmi_enable(void) 283 { 284 if (atomic_read(&nmi_active) && nmi_watchdog == NMI_IO_APIC) 285 on_each_cpu(__acpi_nmi_enable, NULL, 1); 286 } 287 288 static void __acpi_nmi_disable(void *__unused) 289 { 290 apic_write(APIC_LVT0, APIC_DM_NMI | APIC_LVT_MASKED); 291 } 292 293 /* 294 * Disable timer based NMIs on all CPUs: 295 */ 296 void acpi_nmi_disable(void) 297 { 298 if (atomic_read(&nmi_active) && nmi_watchdog == NMI_IO_APIC) 299 on_each_cpu(__acpi_nmi_disable, NULL, 1); 300 } 301 302 /* 303 * This function is called as soon the LAPIC NMI watchdog driver has everything 304 * in place and it's ready to check if the NMIs belong to the NMI watchdog 305 */ 306 void cpu_nmi_set_wd_enabled(void) 307 { 308 __get_cpu_var(wd_enabled) = 1; 309 } 310 311 void setup_apic_nmi_watchdog(void *unused) 312 { 313 if (__get_cpu_var(wd_enabled)) 314 return; 315 316 /* cheap hack to support suspend/resume */ 317 /* if cpu0 is not active neither should the other cpus */ 318 if (smp_processor_id() != 0 && atomic_read(&nmi_active) <= 0) 319 return; 320 321 switch (nmi_watchdog) { 322 case NMI_LOCAL_APIC: 323 if (lapic_watchdog_init(nmi_hz) < 0) { 324 __get_cpu_var(wd_enabled) = 0; 325 return; 326 } 327 /* FALL THROUGH */ 328 case NMI_IO_APIC: 329 __get_cpu_var(wd_enabled) = 1; 330 atomic_inc(&nmi_active); 331 } 332 } 333 334 void stop_apic_nmi_watchdog(void *unused) 335 { 336 /* only support LOCAL and IO APICs for now */ 337 if (!nmi_watchdog_active()) 338 return; 339 if (__get_cpu_var(wd_enabled) == 0) 340 return; 341 if (nmi_watchdog == NMI_LOCAL_APIC) 342 lapic_watchdog_stop(); 343 __get_cpu_var(wd_enabled) = 0; 344 atomic_dec(&nmi_active); 345 } 346 347 /* 348 * the best way to detect whether a CPU has a 'hard lockup' problem 349 * is to check it's local APIC timer IRQ counts. If they are not 350 * changing then that CPU has some problem. 351 * 352 * as these watchdog NMI IRQs are generated on every CPU, we only 353 * have to check the current processor. 354 * 355 * since NMIs don't listen to _any_ locks, we have to be extremely 356 * careful not to rely on unsafe variables. The printk might lock 357 * up though, so we have to break up any console locks first ... 358 * [when there will be more tty-related locks, break them up here too!] 359 */ 360 361 static DEFINE_PER_CPU(unsigned, last_irq_sum); 362 static DEFINE_PER_CPU(local_t, alert_counter); 363 static DEFINE_PER_CPU(int, nmi_touch); 364 365 void touch_nmi_watchdog(void) 366 { 367 if (nmi_watchdog_active()) { 368 unsigned cpu; 369 370 /* 371 * Tell other CPUs to reset their alert counters. We cannot 372 * do it ourselves because the alert count increase is not 373 * atomic. 374 */ 375 for_each_present_cpu(cpu) { 376 if (per_cpu(nmi_touch, cpu) != 1) 377 per_cpu(nmi_touch, cpu) = 1; 378 } 379 } 380 381 /* 382 * Tickle the softlockup detector too: 383 */ 384 touch_softlockup_watchdog(); 385 } 386 EXPORT_SYMBOL(touch_nmi_watchdog); 387 388 notrace __kprobes int 389 nmi_watchdog_tick(struct pt_regs *regs, unsigned reason) 390 { 391 /* 392 * Since current_thread_info()-> is always on the stack, and we 393 * always switch the stack NMI-atomically, it's safe to use 394 * smp_processor_id(). 395 */ 396 unsigned int sum; 397 int touched = 0; 398 int cpu = smp_processor_id(); 399 int rc = 0; 400 401 /* check for other users first */ 402 if (notify_die(DIE_NMI, "nmi", regs, reason, 2, SIGINT) 403 == NOTIFY_STOP) { 404 rc = 1; 405 touched = 1; 406 } 407 408 sum = get_timer_irqs(cpu); 409 410 if (__get_cpu_var(nmi_touch)) { 411 __get_cpu_var(nmi_touch) = 0; 412 touched = 1; 413 } 414 415 if (cpu_isset(cpu, backtrace_mask)) { 416 static DEFINE_SPINLOCK(lock); /* Serialise the printks */ 417 418 spin_lock(&lock); 419 printk(KERN_WARNING "NMI backtrace for cpu %d\n", cpu); 420 dump_stack(); 421 spin_unlock(&lock); 422 cpu_clear(cpu, backtrace_mask); 423 } 424 425 /* Could check oops_in_progress here too, but it's safer not to */ 426 if (mce_in_progress()) 427 touched = 1; 428 429 /* if the none of the timers isn't firing, this cpu isn't doing much */ 430 if (!touched && __get_cpu_var(last_irq_sum) == sum) { 431 /* 432 * Ayiee, looks like this CPU is stuck ... 433 * wait a few IRQs (5 seconds) before doing the oops ... 434 */ 435 local_inc(&__get_cpu_var(alert_counter)); 436 if (local_read(&__get_cpu_var(alert_counter)) == 5 * nmi_hz) 437 /* 438 * die_nmi will return ONLY if NOTIFY_STOP happens.. 439 */ 440 die_nmi("BUG: NMI Watchdog detected LOCKUP", 441 regs, panic_on_timeout); 442 } else { 443 __get_cpu_var(last_irq_sum) = sum; 444 local_set(&__get_cpu_var(alert_counter), 0); 445 } 446 447 /* see if the nmi watchdog went off */ 448 if (!__get_cpu_var(wd_enabled)) 449 return rc; 450 switch (nmi_watchdog) { 451 case NMI_LOCAL_APIC: 452 rc |= lapic_wd_event(nmi_hz); 453 break; 454 case NMI_IO_APIC: 455 /* 456 * don't know how to accurately check for this. 457 * just assume it was a watchdog timer interrupt 458 * This matches the old behaviour. 459 */ 460 rc = 1; 461 break; 462 } 463 return rc; 464 } 465 466 #ifdef CONFIG_SYSCTL 467 468 static int __init setup_unknown_nmi_panic(char *str) 469 { 470 unknown_nmi_panic = 1; 471 return 1; 472 } 473 __setup("unknown_nmi_panic", setup_unknown_nmi_panic); 474 475 static int unknown_nmi_panic_callback(struct pt_regs *regs, int cpu) 476 { 477 unsigned char reason = get_nmi_reason(); 478 char buf[64]; 479 480 sprintf(buf, "NMI received for unknown reason %02x\n", reason); 481 die_nmi(buf, regs, 1); /* Always panic here */ 482 return 0; 483 } 484 485 /* 486 * proc handler for /proc/sys/kernel/nmi 487 */ 488 int proc_nmi_enabled(struct ctl_table *table, int write, struct file *file, 489 void __user *buffer, size_t *length, loff_t *ppos) 490 { 491 int old_state; 492 493 nmi_watchdog_enabled = (atomic_read(&nmi_active) > 0) ? 1 : 0; 494 old_state = nmi_watchdog_enabled; 495 proc_dointvec(table, write, file, buffer, length, ppos); 496 if (!!old_state == !!nmi_watchdog_enabled) 497 return 0; 498 499 if (atomic_read(&nmi_active) < 0 || !nmi_watchdog_active()) { 500 printk(KERN_WARNING 501 "NMI watchdog is permanently disabled\n"); 502 return -EIO; 503 } 504 505 if (nmi_watchdog == NMI_LOCAL_APIC) { 506 if (nmi_watchdog_enabled) 507 enable_lapic_nmi_watchdog(); 508 else 509 disable_lapic_nmi_watchdog(); 510 } else { 511 printk(KERN_WARNING 512 "NMI watchdog doesn't know what hardware to touch\n"); 513 return -EIO; 514 } 515 return 0; 516 } 517 518 #endif /* CONFIG_SYSCTL */ 519 520 int do_nmi_callback(struct pt_regs *regs, int cpu) 521 { 522 #ifdef CONFIG_SYSCTL 523 if (unknown_nmi_panic) 524 return unknown_nmi_panic_callback(regs, cpu); 525 #endif 526 return 0; 527 } 528 529 void __trigger_all_cpu_backtrace(void) 530 { 531 int i; 532 533 backtrace_mask = cpu_online_map; 534 /* Wait for up to 10 seconds for all CPUs to do the backtrace */ 535 for (i = 0; i < 10 * 1000; i++) { 536 if (cpus_empty(backtrace_mask)) 537 break; 538 mdelay(1); 539 } 540 } 541