1 /* 2 * local apic based NMI watchdog for various CPUs. 3 * 4 * This file also handles reservation of performance counters for coordination 5 * with other users (like oprofile). 6 * 7 * Note that these events normally don't tick when the CPU idles. This means 8 * the frequency varies with CPU load. 9 * 10 * Original code for K7/P6 written by Keith Owens 11 * 12 */ 13 14 #include <linux/percpu.h> 15 #include <linux/module.h> 16 #include <linux/kernel.h> 17 #include <linux/bitops.h> 18 #include <linux/smp.h> 19 #include <linux/nmi.h> 20 #include <linux/kprobes.h> 21 22 #include <asm/genapic.h> 23 #include <asm/intel_arch_perfmon.h> 24 25 struct nmi_watchdog_ctlblk { 26 unsigned int cccr_msr; 27 unsigned int perfctr_msr; /* the MSR to reset in NMI handler */ 28 unsigned int evntsel_msr; /* the MSR to select the events to handle */ 29 }; 30 31 /* Interface defining a CPU specific perfctr watchdog */ 32 struct wd_ops { 33 int (*reserve)(void); 34 void (*unreserve)(void); 35 int (*setup)(unsigned nmi_hz); 36 void (*rearm)(struct nmi_watchdog_ctlblk *wd, unsigned nmi_hz); 37 void (*stop)(void); 38 unsigned perfctr; 39 unsigned evntsel; 40 u64 checkbit; 41 }; 42 43 static const struct wd_ops *wd_ops; 44 45 /* 46 * this number is calculated from Intel's MSR_P4_CRU_ESCR5 register and it's 47 * offset from MSR_P4_BSU_ESCR0. 48 * 49 * It will be the max for all platforms (for now) 50 */ 51 #define NMI_MAX_COUNTER_BITS 66 52 53 /* 54 * perfctr_nmi_owner tracks the ownership of the perfctr registers: 55 * evtsel_nmi_owner tracks the ownership of the event selection 56 * - different performance counters/ event selection may be reserved for 57 * different subsystems this reservation system just tries to coordinate 58 * things a little 59 */ 60 static DECLARE_BITMAP(perfctr_nmi_owner, NMI_MAX_COUNTER_BITS); 61 static DECLARE_BITMAP(evntsel_nmi_owner, NMI_MAX_COUNTER_BITS); 62 63 static DEFINE_PER_CPU(struct nmi_watchdog_ctlblk, nmi_watchdog_ctlblk); 64 65 /* converts an msr to an appropriate reservation bit */ 66 static inline unsigned int nmi_perfctr_msr_to_bit(unsigned int msr) 67 { 68 /* returns the bit offset of the performance counter register */ 69 switch (boot_cpu_data.x86_vendor) { 70 case X86_VENDOR_AMD: 71 return (msr - MSR_K7_PERFCTR0); 72 case X86_VENDOR_INTEL: 73 if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) 74 return (msr - MSR_ARCH_PERFMON_PERFCTR0); 75 76 switch (boot_cpu_data.x86) { 77 case 6: 78 return (msr - MSR_P6_PERFCTR0); 79 case 15: 80 return (msr - MSR_P4_BPU_PERFCTR0); 81 } 82 } 83 return 0; 84 } 85 86 /* 87 * converts an msr to an appropriate reservation bit 88 * returns the bit offset of the event selection register 89 */ 90 static inline unsigned int nmi_evntsel_msr_to_bit(unsigned int msr) 91 { 92 /* returns the bit offset of the event selection register */ 93 switch (boot_cpu_data.x86_vendor) { 94 case X86_VENDOR_AMD: 95 return (msr - MSR_K7_EVNTSEL0); 96 case X86_VENDOR_INTEL: 97 if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) 98 return (msr - MSR_ARCH_PERFMON_EVENTSEL0); 99 100 switch (boot_cpu_data.x86) { 101 case 6: 102 return (msr - MSR_P6_EVNTSEL0); 103 case 15: 104 return (msr - MSR_P4_BSU_ESCR0); 105 } 106 } 107 return 0; 108 109 } 110 111 /* checks for a bit availability (hack for oprofile) */ 112 int avail_to_resrv_perfctr_nmi_bit(unsigned int counter) 113 { 114 BUG_ON(counter > NMI_MAX_COUNTER_BITS); 115 116 return (!test_bit(counter, perfctr_nmi_owner)); 117 } 118 119 /* checks the an msr for availability */ 120 int avail_to_resrv_perfctr_nmi(unsigned int msr) 121 { 122 unsigned int counter; 123 124 counter = nmi_perfctr_msr_to_bit(msr); 125 BUG_ON(counter > NMI_MAX_COUNTER_BITS); 126 127 return (!test_bit(counter, perfctr_nmi_owner)); 128 } 129 EXPORT_SYMBOL(avail_to_resrv_perfctr_nmi_bit); 130 131 int reserve_perfctr_nmi(unsigned int msr) 132 { 133 unsigned int counter; 134 135 counter = nmi_perfctr_msr_to_bit(msr); 136 /* register not managed by the allocator? */ 137 if (counter > NMI_MAX_COUNTER_BITS) 138 return 1; 139 140 if (!test_and_set_bit(counter, perfctr_nmi_owner)) 141 return 1; 142 return 0; 143 } 144 EXPORT_SYMBOL(reserve_perfctr_nmi); 145 146 void release_perfctr_nmi(unsigned int msr) 147 { 148 unsigned int counter; 149 150 counter = nmi_perfctr_msr_to_bit(msr); 151 /* register not managed by the allocator? */ 152 if (counter > NMI_MAX_COUNTER_BITS) 153 return; 154 155 clear_bit(counter, perfctr_nmi_owner); 156 } 157 EXPORT_SYMBOL(release_perfctr_nmi); 158 159 int reserve_evntsel_nmi(unsigned int msr) 160 { 161 unsigned int counter; 162 163 counter = nmi_evntsel_msr_to_bit(msr); 164 /* register not managed by the allocator? */ 165 if (counter > NMI_MAX_COUNTER_BITS) 166 return 1; 167 168 if (!test_and_set_bit(counter, evntsel_nmi_owner)) 169 return 1; 170 return 0; 171 } 172 EXPORT_SYMBOL(reserve_evntsel_nmi); 173 174 void release_evntsel_nmi(unsigned int msr) 175 { 176 unsigned int counter; 177 178 counter = nmi_evntsel_msr_to_bit(msr); 179 /* register not managed by the allocator? */ 180 if (counter > NMI_MAX_COUNTER_BITS) 181 return; 182 183 clear_bit(counter, evntsel_nmi_owner); 184 } 185 EXPORT_SYMBOL(release_evntsel_nmi); 186 187 void disable_lapic_nmi_watchdog(void) 188 { 189 BUG_ON(nmi_watchdog != NMI_LOCAL_APIC); 190 191 if (atomic_read(&nmi_active) <= 0) 192 return; 193 194 on_each_cpu(stop_apic_nmi_watchdog, NULL, 1); 195 196 if (wd_ops) 197 wd_ops->unreserve(); 198 199 BUG_ON(atomic_read(&nmi_active) != 0); 200 } 201 202 void enable_lapic_nmi_watchdog(void) 203 { 204 BUG_ON(nmi_watchdog != NMI_LOCAL_APIC); 205 206 /* are we already enabled */ 207 if (atomic_read(&nmi_active) != 0) 208 return; 209 210 /* are we lapic aware */ 211 if (!wd_ops) 212 return; 213 if (!wd_ops->reserve()) { 214 printk(KERN_ERR "NMI watchdog: cannot reserve perfctrs\n"); 215 return; 216 } 217 218 on_each_cpu(setup_apic_nmi_watchdog, NULL, 1); 219 touch_nmi_watchdog(); 220 } 221 222 /* 223 * Activate the NMI watchdog via the local APIC. 224 */ 225 226 static unsigned int adjust_for_32bit_ctr(unsigned int hz) 227 { 228 u64 counter_val; 229 unsigned int retval = hz; 230 231 /* 232 * On Intel CPUs with P6/ARCH_PERFMON only 32 bits in the counter 233 * are writable, with higher bits sign extending from bit 31. 234 * So, we can only program the counter with 31 bit values and 235 * 32nd bit should be 1, for 33.. to be 1. 236 * Find the appropriate nmi_hz 237 */ 238 counter_val = (u64)cpu_khz * 1000; 239 do_div(counter_val, retval); 240 if (counter_val > 0x7fffffffULL) { 241 u64 count = (u64)cpu_khz * 1000; 242 do_div(count, 0x7fffffffUL); 243 retval = count + 1; 244 } 245 return retval; 246 } 247 248 static void write_watchdog_counter(unsigned int perfctr_msr, 249 const char *descr, unsigned nmi_hz) 250 { 251 u64 count = (u64)cpu_khz * 1000; 252 253 do_div(count, nmi_hz); 254 if(descr) 255 pr_debug("setting %s to -0x%08Lx\n", descr, count); 256 wrmsrl(perfctr_msr, 0 - count); 257 } 258 259 static void write_watchdog_counter32(unsigned int perfctr_msr, 260 const char *descr, unsigned nmi_hz) 261 { 262 u64 count = (u64)cpu_khz * 1000; 263 264 do_div(count, nmi_hz); 265 if(descr) 266 pr_debug("setting %s to -0x%08Lx\n", descr, count); 267 wrmsr(perfctr_msr, (u32)(-count), 0); 268 } 269 270 /* 271 * AMD K7/K8/Family10h/Family11h support. 272 * AMD keeps this interface nicely stable so there is not much variety 273 */ 274 #define K7_EVNTSEL_ENABLE (1 << 22) 275 #define K7_EVNTSEL_INT (1 << 20) 276 #define K7_EVNTSEL_OS (1 << 17) 277 #define K7_EVNTSEL_USR (1 << 16) 278 #define K7_EVENT_CYCLES_PROCESSOR_IS_RUNNING 0x76 279 #define K7_NMI_EVENT K7_EVENT_CYCLES_PROCESSOR_IS_RUNNING 280 281 static int setup_k7_watchdog(unsigned nmi_hz) 282 { 283 unsigned int perfctr_msr, evntsel_msr; 284 unsigned int evntsel; 285 struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk); 286 287 perfctr_msr = wd_ops->perfctr; 288 evntsel_msr = wd_ops->evntsel; 289 290 wrmsrl(perfctr_msr, 0UL); 291 292 evntsel = K7_EVNTSEL_INT 293 | K7_EVNTSEL_OS 294 | K7_EVNTSEL_USR 295 | K7_NMI_EVENT; 296 297 /* setup the timer */ 298 wrmsr(evntsel_msr, evntsel, 0); 299 write_watchdog_counter(perfctr_msr, "K7_PERFCTR0",nmi_hz); 300 301 /* initialize the wd struct before enabling */ 302 wd->perfctr_msr = perfctr_msr; 303 wd->evntsel_msr = evntsel_msr; 304 wd->cccr_msr = 0; /* unused */ 305 306 /* ok, everything is initialized, announce that we're set */ 307 cpu_nmi_set_wd_enabled(); 308 309 apic_write(APIC_LVTPC, APIC_DM_NMI); 310 evntsel |= K7_EVNTSEL_ENABLE; 311 wrmsr(evntsel_msr, evntsel, 0); 312 313 return 1; 314 } 315 316 static void single_msr_stop_watchdog(void) 317 { 318 struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk); 319 320 wrmsr(wd->evntsel_msr, 0, 0); 321 } 322 323 static int single_msr_reserve(void) 324 { 325 if (!reserve_perfctr_nmi(wd_ops->perfctr)) 326 return 0; 327 328 if (!reserve_evntsel_nmi(wd_ops->evntsel)) { 329 release_perfctr_nmi(wd_ops->perfctr); 330 return 0; 331 } 332 return 1; 333 } 334 335 static void single_msr_unreserve(void) 336 { 337 release_evntsel_nmi(wd_ops->evntsel); 338 release_perfctr_nmi(wd_ops->perfctr); 339 } 340 341 static void __kprobes 342 single_msr_rearm(struct nmi_watchdog_ctlblk *wd, unsigned nmi_hz) 343 { 344 /* start the cycle over again */ 345 write_watchdog_counter(wd->perfctr_msr, NULL, nmi_hz); 346 } 347 348 static const struct wd_ops k7_wd_ops = { 349 .reserve = single_msr_reserve, 350 .unreserve = single_msr_unreserve, 351 .setup = setup_k7_watchdog, 352 .rearm = single_msr_rearm, 353 .stop = single_msr_stop_watchdog, 354 .perfctr = MSR_K7_PERFCTR0, 355 .evntsel = MSR_K7_EVNTSEL0, 356 .checkbit = 1ULL << 47, 357 }; 358 359 /* 360 * Intel Model 6 (PPro+,P2,P3,P-M,Core1) 361 */ 362 #define P6_EVNTSEL0_ENABLE (1 << 22) 363 #define P6_EVNTSEL_INT (1 << 20) 364 #define P6_EVNTSEL_OS (1 << 17) 365 #define P6_EVNTSEL_USR (1 << 16) 366 #define P6_EVENT_CPU_CLOCKS_NOT_HALTED 0x79 367 #define P6_NMI_EVENT P6_EVENT_CPU_CLOCKS_NOT_HALTED 368 369 static int setup_p6_watchdog(unsigned nmi_hz) 370 { 371 unsigned int perfctr_msr, evntsel_msr; 372 unsigned int evntsel; 373 struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk); 374 375 perfctr_msr = wd_ops->perfctr; 376 evntsel_msr = wd_ops->evntsel; 377 378 /* KVM doesn't implement this MSR */ 379 if (wrmsr_safe(perfctr_msr, 0, 0) < 0) 380 return 0; 381 382 evntsel = P6_EVNTSEL_INT 383 | P6_EVNTSEL_OS 384 | P6_EVNTSEL_USR 385 | P6_NMI_EVENT; 386 387 /* setup the timer */ 388 wrmsr(evntsel_msr, evntsel, 0); 389 nmi_hz = adjust_for_32bit_ctr(nmi_hz); 390 write_watchdog_counter32(perfctr_msr, "P6_PERFCTR0",nmi_hz); 391 392 /* initialize the wd struct before enabling */ 393 wd->perfctr_msr = perfctr_msr; 394 wd->evntsel_msr = evntsel_msr; 395 wd->cccr_msr = 0; /* unused */ 396 397 /* ok, everything is initialized, announce that we're set */ 398 cpu_nmi_set_wd_enabled(); 399 400 apic_write(APIC_LVTPC, APIC_DM_NMI); 401 evntsel |= P6_EVNTSEL0_ENABLE; 402 wrmsr(evntsel_msr, evntsel, 0); 403 404 return 1; 405 } 406 407 static void __kprobes p6_rearm(struct nmi_watchdog_ctlblk *wd, unsigned nmi_hz) 408 { 409 /* 410 * P6 based Pentium M need to re-unmask 411 * the apic vector but it doesn't hurt 412 * other P6 variant. 413 * ArchPerfom/Core Duo also needs this 414 */ 415 apic_write(APIC_LVTPC, APIC_DM_NMI); 416 417 /* P6/ARCH_PERFMON has 32 bit counter write */ 418 write_watchdog_counter32(wd->perfctr_msr, NULL,nmi_hz); 419 } 420 421 static const struct wd_ops p6_wd_ops = { 422 .reserve = single_msr_reserve, 423 .unreserve = single_msr_unreserve, 424 .setup = setup_p6_watchdog, 425 .rearm = p6_rearm, 426 .stop = single_msr_stop_watchdog, 427 .perfctr = MSR_P6_PERFCTR0, 428 .evntsel = MSR_P6_EVNTSEL0, 429 .checkbit = 1ULL << 39, 430 }; 431 432 /* 433 * Intel P4 performance counters. 434 * By far the most complicated of all. 435 */ 436 #define MSR_P4_MISC_ENABLE_PERF_AVAIL (1 << 7) 437 #define P4_ESCR_EVENT_SELECT(N) ((N) << 25) 438 #define P4_ESCR_OS (1 << 3) 439 #define P4_ESCR_USR (1 << 2) 440 #define P4_CCCR_OVF_PMI0 (1 << 26) 441 #define P4_CCCR_OVF_PMI1 (1 << 27) 442 #define P4_CCCR_THRESHOLD(N) ((N) << 20) 443 #define P4_CCCR_COMPLEMENT (1 << 19) 444 #define P4_CCCR_COMPARE (1 << 18) 445 #define P4_CCCR_REQUIRED (3 << 16) 446 #define P4_CCCR_ESCR_SELECT(N) ((N) << 13) 447 #define P4_CCCR_ENABLE (1 << 12) 448 #define P4_CCCR_OVF (1 << 31) 449 450 #define P4_CONTROLS 18 451 static unsigned int p4_controls[18] = { 452 MSR_P4_BPU_CCCR0, 453 MSR_P4_BPU_CCCR1, 454 MSR_P4_BPU_CCCR2, 455 MSR_P4_BPU_CCCR3, 456 MSR_P4_MS_CCCR0, 457 MSR_P4_MS_CCCR1, 458 MSR_P4_MS_CCCR2, 459 MSR_P4_MS_CCCR3, 460 MSR_P4_FLAME_CCCR0, 461 MSR_P4_FLAME_CCCR1, 462 MSR_P4_FLAME_CCCR2, 463 MSR_P4_FLAME_CCCR3, 464 MSR_P4_IQ_CCCR0, 465 MSR_P4_IQ_CCCR1, 466 MSR_P4_IQ_CCCR2, 467 MSR_P4_IQ_CCCR3, 468 MSR_P4_IQ_CCCR4, 469 MSR_P4_IQ_CCCR5, 470 }; 471 /* 472 * Set up IQ_COUNTER0 to behave like a clock, by having IQ_CCCR0 filter 473 * CRU_ESCR0 (with any non-null event selector) through a complemented 474 * max threshold. [IA32-Vol3, Section 14.9.9] 475 */ 476 static int setup_p4_watchdog(unsigned nmi_hz) 477 { 478 unsigned int perfctr_msr, evntsel_msr, cccr_msr; 479 unsigned int evntsel, cccr_val; 480 unsigned int misc_enable, dummy; 481 unsigned int ht_num; 482 struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk); 483 484 rdmsr(MSR_IA32_MISC_ENABLE, misc_enable, dummy); 485 if (!(misc_enable & MSR_P4_MISC_ENABLE_PERF_AVAIL)) 486 return 0; 487 488 #ifdef CONFIG_SMP 489 /* detect which hyperthread we are on */ 490 if (smp_num_siblings == 2) { 491 unsigned int ebx, apicid; 492 493 ebx = cpuid_ebx(1); 494 apicid = (ebx >> 24) & 0xff; 495 ht_num = apicid & 1; 496 } else 497 #endif 498 ht_num = 0; 499 500 /* 501 * performance counters are shared resources 502 * assign each hyperthread its own set 503 * (re-use the ESCR0 register, seems safe 504 * and keeps the cccr_val the same) 505 */ 506 if (!ht_num) { 507 /* logical cpu 0 */ 508 perfctr_msr = MSR_P4_IQ_PERFCTR0; 509 evntsel_msr = MSR_P4_CRU_ESCR0; 510 cccr_msr = MSR_P4_IQ_CCCR0; 511 cccr_val = P4_CCCR_OVF_PMI0 | P4_CCCR_ESCR_SELECT(4); 512 513 /* 514 * If we're on the kdump kernel or other situation, we may 515 * still have other performance counter registers set to 516 * interrupt and they'll keep interrupting forever because 517 * of the P4_CCCR_OVF quirk. So we need to ACK all the 518 * pending interrupts and disable all the registers here, 519 * before reenabling the NMI delivery. Refer to p4_rearm() 520 * about the P4_CCCR_OVF quirk. 521 */ 522 if (reset_devices) { 523 unsigned int low, high; 524 int i; 525 526 for (i = 0; i < P4_CONTROLS; i++) { 527 rdmsr(p4_controls[i], low, high); 528 low &= ~(P4_CCCR_ENABLE | P4_CCCR_OVF); 529 wrmsr(p4_controls[i], low, high); 530 } 531 } 532 } else { 533 /* logical cpu 1 */ 534 perfctr_msr = MSR_P4_IQ_PERFCTR1; 535 evntsel_msr = MSR_P4_CRU_ESCR0; 536 cccr_msr = MSR_P4_IQ_CCCR1; 537 538 /* Pentium 4 D processors don't support P4_CCCR_OVF_PMI1 */ 539 if (boot_cpu_data.x86_model == 4 && boot_cpu_data.x86_mask == 4) 540 cccr_val = P4_CCCR_OVF_PMI0; 541 else 542 cccr_val = P4_CCCR_OVF_PMI1; 543 cccr_val |= P4_CCCR_ESCR_SELECT(4); 544 } 545 546 evntsel = P4_ESCR_EVENT_SELECT(0x3F) 547 | P4_ESCR_OS 548 | P4_ESCR_USR; 549 550 cccr_val |= P4_CCCR_THRESHOLD(15) 551 | P4_CCCR_COMPLEMENT 552 | P4_CCCR_COMPARE 553 | P4_CCCR_REQUIRED; 554 555 wrmsr(evntsel_msr, evntsel, 0); 556 wrmsr(cccr_msr, cccr_val, 0); 557 write_watchdog_counter(perfctr_msr, "P4_IQ_COUNTER0", nmi_hz); 558 559 wd->perfctr_msr = perfctr_msr; 560 wd->evntsel_msr = evntsel_msr; 561 wd->cccr_msr = cccr_msr; 562 563 /* ok, everything is initialized, announce that we're set */ 564 cpu_nmi_set_wd_enabled(); 565 566 apic_write(APIC_LVTPC, APIC_DM_NMI); 567 cccr_val |= P4_CCCR_ENABLE; 568 wrmsr(cccr_msr, cccr_val, 0); 569 return 1; 570 } 571 572 static void stop_p4_watchdog(void) 573 { 574 struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk); 575 wrmsr(wd->cccr_msr, 0, 0); 576 wrmsr(wd->evntsel_msr, 0, 0); 577 } 578 579 static int p4_reserve(void) 580 { 581 if (!reserve_perfctr_nmi(MSR_P4_IQ_PERFCTR0)) 582 return 0; 583 #ifdef CONFIG_SMP 584 if (smp_num_siblings > 1 && !reserve_perfctr_nmi(MSR_P4_IQ_PERFCTR1)) 585 goto fail1; 586 #endif 587 if (!reserve_evntsel_nmi(MSR_P4_CRU_ESCR0)) 588 goto fail2; 589 /* RED-PEN why is ESCR1 not reserved here? */ 590 return 1; 591 fail2: 592 #ifdef CONFIG_SMP 593 if (smp_num_siblings > 1) 594 release_perfctr_nmi(MSR_P4_IQ_PERFCTR1); 595 fail1: 596 #endif 597 release_perfctr_nmi(MSR_P4_IQ_PERFCTR0); 598 return 0; 599 } 600 601 static void p4_unreserve(void) 602 { 603 #ifdef CONFIG_SMP 604 if (smp_num_siblings > 1) 605 release_perfctr_nmi(MSR_P4_IQ_PERFCTR1); 606 #endif 607 release_evntsel_nmi(MSR_P4_CRU_ESCR0); 608 release_perfctr_nmi(MSR_P4_IQ_PERFCTR0); 609 } 610 611 static void __kprobes p4_rearm(struct nmi_watchdog_ctlblk *wd, unsigned nmi_hz) 612 { 613 unsigned dummy; 614 /* 615 * P4 quirks: 616 * - An overflown perfctr will assert its interrupt 617 * until the OVF flag in its CCCR is cleared. 618 * - LVTPC is masked on interrupt and must be 619 * unmasked by the LVTPC handler. 620 */ 621 rdmsrl(wd->cccr_msr, dummy); 622 dummy &= ~P4_CCCR_OVF; 623 wrmsrl(wd->cccr_msr, dummy); 624 apic_write(APIC_LVTPC, APIC_DM_NMI); 625 /* start the cycle over again */ 626 write_watchdog_counter(wd->perfctr_msr, NULL, nmi_hz); 627 } 628 629 static const struct wd_ops p4_wd_ops = { 630 .reserve = p4_reserve, 631 .unreserve = p4_unreserve, 632 .setup = setup_p4_watchdog, 633 .rearm = p4_rearm, 634 .stop = stop_p4_watchdog, 635 /* RED-PEN this is wrong for the other sibling */ 636 .perfctr = MSR_P4_BPU_PERFCTR0, 637 .evntsel = MSR_P4_BSU_ESCR0, 638 .checkbit = 1ULL << 39, 639 }; 640 641 /* 642 * Watchdog using the Intel architected PerfMon. 643 * Used for Core2 and hopefully all future Intel CPUs. 644 */ 645 #define ARCH_PERFMON_NMI_EVENT_SEL ARCH_PERFMON_UNHALTED_CORE_CYCLES_SEL 646 #define ARCH_PERFMON_NMI_EVENT_UMASK ARCH_PERFMON_UNHALTED_CORE_CYCLES_UMASK 647 648 static struct wd_ops intel_arch_wd_ops; 649 650 static int setup_intel_arch_watchdog(unsigned nmi_hz) 651 { 652 unsigned int ebx; 653 union cpuid10_eax eax; 654 unsigned int unused; 655 unsigned int perfctr_msr, evntsel_msr; 656 unsigned int evntsel; 657 struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk); 658 659 /* 660 * Check whether the Architectural PerfMon supports 661 * Unhalted Core Cycles Event or not. 662 * NOTE: Corresponding bit = 0 in ebx indicates event present. 663 */ 664 cpuid(10, &(eax.full), &ebx, &unused, &unused); 665 if ((eax.split.mask_length < (ARCH_PERFMON_UNHALTED_CORE_CYCLES_INDEX+1)) || 666 (ebx & ARCH_PERFMON_UNHALTED_CORE_CYCLES_PRESENT)) 667 return 0; 668 669 perfctr_msr = wd_ops->perfctr; 670 evntsel_msr = wd_ops->evntsel; 671 672 wrmsrl(perfctr_msr, 0UL); 673 674 evntsel = ARCH_PERFMON_EVENTSEL_INT 675 | ARCH_PERFMON_EVENTSEL_OS 676 | ARCH_PERFMON_EVENTSEL_USR 677 | ARCH_PERFMON_NMI_EVENT_SEL 678 | ARCH_PERFMON_NMI_EVENT_UMASK; 679 680 /* setup the timer */ 681 wrmsr(evntsel_msr, evntsel, 0); 682 nmi_hz = adjust_for_32bit_ctr(nmi_hz); 683 write_watchdog_counter32(perfctr_msr, "INTEL_ARCH_PERFCTR0", nmi_hz); 684 685 wd->perfctr_msr = perfctr_msr; 686 wd->evntsel_msr = evntsel_msr; 687 wd->cccr_msr = 0; /* unused */ 688 689 /* ok, everything is initialized, announce that we're set */ 690 cpu_nmi_set_wd_enabled(); 691 692 apic_write(APIC_LVTPC, APIC_DM_NMI); 693 evntsel |= ARCH_PERFMON_EVENTSEL0_ENABLE; 694 wrmsr(evntsel_msr, evntsel, 0); 695 intel_arch_wd_ops.checkbit = 1ULL << (eax.split.bit_width - 1); 696 return 1; 697 } 698 699 static struct wd_ops intel_arch_wd_ops __read_mostly = { 700 .reserve = single_msr_reserve, 701 .unreserve = single_msr_unreserve, 702 .setup = setup_intel_arch_watchdog, 703 .rearm = p6_rearm, 704 .stop = single_msr_stop_watchdog, 705 .perfctr = MSR_ARCH_PERFMON_PERFCTR1, 706 .evntsel = MSR_ARCH_PERFMON_EVENTSEL1, 707 }; 708 709 static void probe_nmi_watchdog(void) 710 { 711 switch (boot_cpu_data.x86_vendor) { 712 case X86_VENDOR_AMD: 713 if (boot_cpu_data.x86 != 6 && boot_cpu_data.x86 != 15 && 714 boot_cpu_data.x86 != 16) 715 return; 716 wd_ops = &k7_wd_ops; 717 break; 718 case X86_VENDOR_INTEL: 719 /* 720 * Work around Core Duo (Yonah) errata AE49 where perfctr1 721 * doesn't have a working enable bit. 722 */ 723 if (boot_cpu_data.x86 == 6 && boot_cpu_data.x86_model == 14) { 724 intel_arch_wd_ops.perfctr = MSR_ARCH_PERFMON_PERFCTR0; 725 intel_arch_wd_ops.evntsel = MSR_ARCH_PERFMON_EVENTSEL0; 726 } 727 if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) { 728 wd_ops = &intel_arch_wd_ops; 729 break; 730 } 731 switch (boot_cpu_data.x86) { 732 case 6: 733 if (boot_cpu_data.x86_model > 13) 734 return; 735 736 wd_ops = &p6_wd_ops; 737 break; 738 case 15: 739 wd_ops = &p4_wd_ops; 740 break; 741 default: 742 return; 743 } 744 break; 745 } 746 } 747 748 /* Interface to nmi.c */ 749 750 int lapic_watchdog_init(unsigned nmi_hz) 751 { 752 if (!wd_ops) { 753 probe_nmi_watchdog(); 754 if (!wd_ops) { 755 printk(KERN_INFO "NMI watchdog: CPU not supported\n"); 756 return -1; 757 } 758 759 if (!wd_ops->reserve()) { 760 printk(KERN_ERR 761 "NMI watchdog: cannot reserve perfctrs\n"); 762 return -1; 763 } 764 } 765 766 if (!(wd_ops->setup(nmi_hz))) { 767 printk(KERN_ERR "Cannot setup NMI watchdog on CPU %d\n", 768 raw_smp_processor_id()); 769 return -1; 770 } 771 772 return 0; 773 } 774 775 void lapic_watchdog_stop(void) 776 { 777 if (wd_ops) 778 wd_ops->stop(); 779 } 780 781 unsigned lapic_adjust_nmi_hz(unsigned hz) 782 { 783 struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk); 784 if (wd->perfctr_msr == MSR_P6_PERFCTR0 || 785 wd->perfctr_msr == MSR_ARCH_PERFMON_PERFCTR1) 786 hz = adjust_for_32bit_ctr(hz); 787 return hz; 788 } 789 790 int __kprobes lapic_wd_event(unsigned nmi_hz) 791 { 792 struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk); 793 u64 ctr; 794 795 rdmsrl(wd->perfctr_msr, ctr); 796 if (ctr & wd_ops->checkbit) /* perfctr still running? */ 797 return 0; 798 799 wd_ops->rearm(wd, nmi_hz); 800 return 1; 801 } 802 803 int lapic_watchdog_ok(void) 804 { 805 return wd_ops != NULL; 806 } 807