1 /* 2 * local apic based NMI watchdog for various CPUs. 3 * 4 * This file also handles reservation of performance counters for coordination 5 * with other users (like oprofile). 6 * 7 * Note that these events normally don't tick when the CPU idles. This means 8 * the frequency varies with CPU load. 9 * 10 * Original code for K7/P6 written by Keith Owens 11 * 12 */ 13 14 #include <linux/percpu.h> 15 #include <linux/module.h> 16 #include <linux/kernel.h> 17 #include <linux/bitops.h> 18 #include <linux/smp.h> 19 #include <linux/nmi.h> 20 #include <asm/apic.h> 21 #include <asm/intel_arch_perfmon.h> 22 23 struct nmi_watchdog_ctlblk { 24 unsigned int cccr_msr; 25 unsigned int perfctr_msr; /* the MSR to reset in NMI handler */ 26 unsigned int evntsel_msr; /* the MSR to select the events to handle */ 27 }; 28 29 /* Interface defining a CPU specific perfctr watchdog */ 30 struct wd_ops { 31 int (*reserve)(void); 32 void (*unreserve)(void); 33 int (*setup)(unsigned nmi_hz); 34 void (*rearm)(struct nmi_watchdog_ctlblk *wd, unsigned nmi_hz); 35 void (*stop)(void); 36 unsigned perfctr; 37 unsigned evntsel; 38 u64 checkbit; 39 }; 40 41 static const struct wd_ops *wd_ops; 42 43 /* 44 * this number is calculated from Intel's MSR_P4_CRU_ESCR5 register and it's 45 * offset from MSR_P4_BSU_ESCR0. 46 * 47 * It will be the max for all platforms (for now) 48 */ 49 #define NMI_MAX_COUNTER_BITS 66 50 51 /* 52 * perfctr_nmi_owner tracks the ownership of the perfctr registers: 53 * evtsel_nmi_owner tracks the ownership of the event selection 54 * - different performance counters/ event selection may be reserved for 55 * different subsystems this reservation system just tries to coordinate 56 * things a little 57 */ 58 static DECLARE_BITMAP(perfctr_nmi_owner, NMI_MAX_COUNTER_BITS); 59 static DECLARE_BITMAP(evntsel_nmi_owner, NMI_MAX_COUNTER_BITS); 60 61 static DEFINE_PER_CPU(struct nmi_watchdog_ctlblk, nmi_watchdog_ctlblk); 62 63 /* converts an msr to an appropriate reservation bit */ 64 static inline unsigned int nmi_perfctr_msr_to_bit(unsigned int msr) 65 { 66 /* returns the bit offset of the performance counter register */ 67 switch (boot_cpu_data.x86_vendor) { 68 case X86_VENDOR_AMD: 69 return (msr - MSR_K7_PERFCTR0); 70 case X86_VENDOR_INTEL: 71 if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) 72 return (msr - MSR_ARCH_PERFMON_PERFCTR0); 73 74 switch (boot_cpu_data.x86) { 75 case 6: 76 return (msr - MSR_P6_PERFCTR0); 77 case 15: 78 return (msr - MSR_P4_BPU_PERFCTR0); 79 } 80 } 81 return 0; 82 } 83 84 /* 85 * converts an msr to an appropriate reservation bit 86 * returns the bit offset of the event selection register 87 */ 88 static inline unsigned int nmi_evntsel_msr_to_bit(unsigned int msr) 89 { 90 /* returns the bit offset of the event selection register */ 91 switch (boot_cpu_data.x86_vendor) { 92 case X86_VENDOR_AMD: 93 return (msr - MSR_K7_EVNTSEL0); 94 case X86_VENDOR_INTEL: 95 if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) 96 return (msr - MSR_ARCH_PERFMON_EVENTSEL0); 97 98 switch (boot_cpu_data.x86) { 99 case 6: 100 return (msr - MSR_P6_EVNTSEL0); 101 case 15: 102 return (msr - MSR_P4_BSU_ESCR0); 103 } 104 } 105 return 0; 106 107 } 108 109 /* checks for a bit availability (hack for oprofile) */ 110 int avail_to_resrv_perfctr_nmi_bit(unsigned int counter) 111 { 112 BUG_ON(counter > NMI_MAX_COUNTER_BITS); 113 114 return (!test_bit(counter, perfctr_nmi_owner)); 115 } 116 117 /* checks the an msr for availability */ 118 int avail_to_resrv_perfctr_nmi(unsigned int msr) 119 { 120 unsigned int counter; 121 122 counter = nmi_perfctr_msr_to_bit(msr); 123 BUG_ON(counter > NMI_MAX_COUNTER_BITS); 124 125 return (!test_bit(counter, perfctr_nmi_owner)); 126 } 127 EXPORT_SYMBOL(avail_to_resrv_perfctr_nmi_bit); 128 129 int reserve_perfctr_nmi(unsigned int msr) 130 { 131 unsigned int counter; 132 133 counter = nmi_perfctr_msr_to_bit(msr); 134 /* register not managed by the allocator? */ 135 if (counter > NMI_MAX_COUNTER_BITS) 136 return 1; 137 138 if (!test_and_set_bit(counter, perfctr_nmi_owner)) 139 return 1; 140 return 0; 141 } 142 EXPORT_SYMBOL(reserve_perfctr_nmi); 143 144 void release_perfctr_nmi(unsigned int msr) 145 { 146 unsigned int counter; 147 148 counter = nmi_perfctr_msr_to_bit(msr); 149 /* register not managed by the allocator? */ 150 if (counter > NMI_MAX_COUNTER_BITS) 151 return; 152 153 clear_bit(counter, perfctr_nmi_owner); 154 } 155 EXPORT_SYMBOL(release_perfctr_nmi); 156 157 int reserve_evntsel_nmi(unsigned int msr) 158 { 159 unsigned int counter; 160 161 counter = nmi_evntsel_msr_to_bit(msr); 162 /* register not managed by the allocator? */ 163 if (counter > NMI_MAX_COUNTER_BITS) 164 return 1; 165 166 if (!test_and_set_bit(counter, evntsel_nmi_owner)) 167 return 1; 168 return 0; 169 } 170 EXPORT_SYMBOL(reserve_evntsel_nmi); 171 172 void release_evntsel_nmi(unsigned int msr) 173 { 174 unsigned int counter; 175 176 counter = nmi_evntsel_msr_to_bit(msr); 177 /* register not managed by the allocator? */ 178 if (counter > NMI_MAX_COUNTER_BITS) 179 return; 180 181 clear_bit(counter, evntsel_nmi_owner); 182 } 183 EXPORT_SYMBOL(release_evntsel_nmi); 184 185 void disable_lapic_nmi_watchdog(void) 186 { 187 BUG_ON(nmi_watchdog != NMI_LOCAL_APIC); 188 189 if (atomic_read(&nmi_active) <= 0) 190 return; 191 192 on_each_cpu(stop_apic_nmi_watchdog, NULL, 1); 193 194 if (wd_ops) 195 wd_ops->unreserve(); 196 197 BUG_ON(atomic_read(&nmi_active) != 0); 198 } 199 200 void enable_lapic_nmi_watchdog(void) 201 { 202 BUG_ON(nmi_watchdog != NMI_LOCAL_APIC); 203 204 /* are we already enabled */ 205 if (atomic_read(&nmi_active) != 0) 206 return; 207 208 /* are we lapic aware */ 209 if (!wd_ops) 210 return; 211 if (!wd_ops->reserve()) { 212 printk(KERN_ERR "NMI watchdog: cannot reserve perfctrs\n"); 213 return; 214 } 215 216 on_each_cpu(setup_apic_nmi_watchdog, NULL, 1); 217 touch_nmi_watchdog(); 218 } 219 220 /* 221 * Activate the NMI watchdog via the local APIC. 222 */ 223 224 static unsigned int adjust_for_32bit_ctr(unsigned int hz) 225 { 226 u64 counter_val; 227 unsigned int retval = hz; 228 229 /* 230 * On Intel CPUs with P6/ARCH_PERFMON only 32 bits in the counter 231 * are writable, with higher bits sign extending from bit 31. 232 * So, we can only program the counter with 31 bit values and 233 * 32nd bit should be 1, for 33.. to be 1. 234 * Find the appropriate nmi_hz 235 */ 236 counter_val = (u64)cpu_khz * 1000; 237 do_div(counter_val, retval); 238 if (counter_val > 0x7fffffffULL) { 239 u64 count = (u64)cpu_khz * 1000; 240 do_div(count, 0x7fffffffUL); 241 retval = count + 1; 242 } 243 return retval; 244 } 245 246 static void write_watchdog_counter(unsigned int perfctr_msr, 247 const char *descr, unsigned nmi_hz) 248 { 249 u64 count = (u64)cpu_khz * 1000; 250 251 do_div(count, nmi_hz); 252 if(descr) 253 pr_debug("setting %s to -0x%08Lx\n", descr, count); 254 wrmsrl(perfctr_msr, 0 - count); 255 } 256 257 static void write_watchdog_counter32(unsigned int perfctr_msr, 258 const char *descr, unsigned nmi_hz) 259 { 260 u64 count = (u64)cpu_khz * 1000; 261 262 do_div(count, nmi_hz); 263 if(descr) 264 pr_debug("setting %s to -0x%08Lx\n", descr, count); 265 wrmsr(perfctr_msr, (u32)(-count), 0); 266 } 267 268 /* 269 * AMD K7/K8/Family10h/Family11h support. 270 * AMD keeps this interface nicely stable so there is not much variety 271 */ 272 #define K7_EVNTSEL_ENABLE (1 << 22) 273 #define K7_EVNTSEL_INT (1 << 20) 274 #define K7_EVNTSEL_OS (1 << 17) 275 #define K7_EVNTSEL_USR (1 << 16) 276 #define K7_EVENT_CYCLES_PROCESSOR_IS_RUNNING 0x76 277 #define K7_NMI_EVENT K7_EVENT_CYCLES_PROCESSOR_IS_RUNNING 278 279 static int setup_k7_watchdog(unsigned nmi_hz) 280 { 281 unsigned int perfctr_msr, evntsel_msr; 282 unsigned int evntsel; 283 struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk); 284 285 perfctr_msr = wd_ops->perfctr; 286 evntsel_msr = wd_ops->evntsel; 287 288 wrmsrl(perfctr_msr, 0UL); 289 290 evntsel = K7_EVNTSEL_INT 291 | K7_EVNTSEL_OS 292 | K7_EVNTSEL_USR 293 | K7_NMI_EVENT; 294 295 /* setup the timer */ 296 wrmsr(evntsel_msr, evntsel, 0); 297 write_watchdog_counter(perfctr_msr, "K7_PERFCTR0",nmi_hz); 298 299 /* initialize the wd struct before enabling */ 300 wd->perfctr_msr = perfctr_msr; 301 wd->evntsel_msr = evntsel_msr; 302 wd->cccr_msr = 0; /* unused */ 303 304 /* ok, everything is initialized, announce that we're set */ 305 cpu_nmi_set_wd_enabled(); 306 307 apic_write(APIC_LVTPC, APIC_DM_NMI); 308 evntsel |= K7_EVNTSEL_ENABLE; 309 wrmsr(evntsel_msr, evntsel, 0); 310 311 return 1; 312 } 313 314 static void single_msr_stop_watchdog(void) 315 { 316 struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk); 317 318 wrmsr(wd->evntsel_msr, 0, 0); 319 } 320 321 static int single_msr_reserve(void) 322 { 323 if (!reserve_perfctr_nmi(wd_ops->perfctr)) 324 return 0; 325 326 if (!reserve_evntsel_nmi(wd_ops->evntsel)) { 327 release_perfctr_nmi(wd_ops->perfctr); 328 return 0; 329 } 330 return 1; 331 } 332 333 static void single_msr_unreserve(void) 334 { 335 release_evntsel_nmi(wd_ops->evntsel); 336 release_perfctr_nmi(wd_ops->perfctr); 337 } 338 339 static void single_msr_rearm(struct nmi_watchdog_ctlblk *wd, unsigned nmi_hz) 340 { 341 /* start the cycle over again */ 342 write_watchdog_counter(wd->perfctr_msr, NULL, nmi_hz); 343 } 344 345 static const struct wd_ops k7_wd_ops = { 346 .reserve = single_msr_reserve, 347 .unreserve = single_msr_unreserve, 348 .setup = setup_k7_watchdog, 349 .rearm = single_msr_rearm, 350 .stop = single_msr_stop_watchdog, 351 .perfctr = MSR_K7_PERFCTR0, 352 .evntsel = MSR_K7_EVNTSEL0, 353 .checkbit = 1ULL << 47, 354 }; 355 356 /* 357 * Intel Model 6 (PPro+,P2,P3,P-M,Core1) 358 */ 359 #define P6_EVNTSEL0_ENABLE (1 << 22) 360 #define P6_EVNTSEL_INT (1 << 20) 361 #define P6_EVNTSEL_OS (1 << 17) 362 #define P6_EVNTSEL_USR (1 << 16) 363 #define P6_EVENT_CPU_CLOCKS_NOT_HALTED 0x79 364 #define P6_NMI_EVENT P6_EVENT_CPU_CLOCKS_NOT_HALTED 365 366 static int setup_p6_watchdog(unsigned nmi_hz) 367 { 368 unsigned int perfctr_msr, evntsel_msr; 369 unsigned int evntsel; 370 struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk); 371 372 perfctr_msr = wd_ops->perfctr; 373 evntsel_msr = wd_ops->evntsel; 374 375 /* KVM doesn't implement this MSR */ 376 if (wrmsr_safe(perfctr_msr, 0, 0) < 0) 377 return 0; 378 379 evntsel = P6_EVNTSEL_INT 380 | P6_EVNTSEL_OS 381 | P6_EVNTSEL_USR 382 | P6_NMI_EVENT; 383 384 /* setup the timer */ 385 wrmsr(evntsel_msr, evntsel, 0); 386 nmi_hz = adjust_for_32bit_ctr(nmi_hz); 387 write_watchdog_counter32(perfctr_msr, "P6_PERFCTR0",nmi_hz); 388 389 /* initialize the wd struct before enabling */ 390 wd->perfctr_msr = perfctr_msr; 391 wd->evntsel_msr = evntsel_msr; 392 wd->cccr_msr = 0; /* unused */ 393 394 /* ok, everything is initialized, announce that we're set */ 395 cpu_nmi_set_wd_enabled(); 396 397 apic_write(APIC_LVTPC, APIC_DM_NMI); 398 evntsel |= P6_EVNTSEL0_ENABLE; 399 wrmsr(evntsel_msr, evntsel, 0); 400 401 return 1; 402 } 403 404 static void p6_rearm(struct nmi_watchdog_ctlblk *wd, unsigned nmi_hz) 405 { 406 /* 407 * P6 based Pentium M need to re-unmask 408 * the apic vector but it doesn't hurt 409 * other P6 variant. 410 * ArchPerfom/Core Duo also needs this 411 */ 412 apic_write(APIC_LVTPC, APIC_DM_NMI); 413 414 /* P6/ARCH_PERFMON has 32 bit counter write */ 415 write_watchdog_counter32(wd->perfctr_msr, NULL,nmi_hz); 416 } 417 418 static const struct wd_ops p6_wd_ops = { 419 .reserve = single_msr_reserve, 420 .unreserve = single_msr_unreserve, 421 .setup = setup_p6_watchdog, 422 .rearm = p6_rearm, 423 .stop = single_msr_stop_watchdog, 424 .perfctr = MSR_P6_PERFCTR0, 425 .evntsel = MSR_P6_EVNTSEL0, 426 .checkbit = 1ULL << 39, 427 }; 428 429 /* 430 * Intel P4 performance counters. 431 * By far the most complicated of all. 432 */ 433 #define MSR_P4_MISC_ENABLE_PERF_AVAIL (1 << 7) 434 #define P4_ESCR_EVENT_SELECT(N) ((N) << 25) 435 #define P4_ESCR_OS (1 << 3) 436 #define P4_ESCR_USR (1 << 2) 437 #define P4_CCCR_OVF_PMI0 (1 << 26) 438 #define P4_CCCR_OVF_PMI1 (1 << 27) 439 #define P4_CCCR_THRESHOLD(N) ((N) << 20) 440 #define P4_CCCR_COMPLEMENT (1 << 19) 441 #define P4_CCCR_COMPARE (1 << 18) 442 #define P4_CCCR_REQUIRED (3 << 16) 443 #define P4_CCCR_ESCR_SELECT(N) ((N) << 13) 444 #define P4_CCCR_ENABLE (1 << 12) 445 #define P4_CCCR_OVF (1 << 31) 446 447 #define P4_CONTROLS 18 448 static unsigned int p4_controls[18] = { 449 MSR_P4_BPU_CCCR0, 450 MSR_P4_BPU_CCCR1, 451 MSR_P4_BPU_CCCR2, 452 MSR_P4_BPU_CCCR3, 453 MSR_P4_MS_CCCR0, 454 MSR_P4_MS_CCCR1, 455 MSR_P4_MS_CCCR2, 456 MSR_P4_MS_CCCR3, 457 MSR_P4_FLAME_CCCR0, 458 MSR_P4_FLAME_CCCR1, 459 MSR_P4_FLAME_CCCR2, 460 MSR_P4_FLAME_CCCR3, 461 MSR_P4_IQ_CCCR0, 462 MSR_P4_IQ_CCCR1, 463 MSR_P4_IQ_CCCR2, 464 MSR_P4_IQ_CCCR3, 465 MSR_P4_IQ_CCCR4, 466 MSR_P4_IQ_CCCR5, 467 }; 468 /* 469 * Set up IQ_COUNTER0 to behave like a clock, by having IQ_CCCR0 filter 470 * CRU_ESCR0 (with any non-null event selector) through a complemented 471 * max threshold. [IA32-Vol3, Section 14.9.9] 472 */ 473 static int setup_p4_watchdog(unsigned nmi_hz) 474 { 475 unsigned int perfctr_msr, evntsel_msr, cccr_msr; 476 unsigned int evntsel, cccr_val; 477 unsigned int misc_enable, dummy; 478 unsigned int ht_num; 479 struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk); 480 481 rdmsr(MSR_IA32_MISC_ENABLE, misc_enable, dummy); 482 if (!(misc_enable & MSR_P4_MISC_ENABLE_PERF_AVAIL)) 483 return 0; 484 485 #ifdef CONFIG_SMP 486 /* detect which hyperthread we are on */ 487 if (smp_num_siblings == 2) { 488 unsigned int ebx, apicid; 489 490 ebx = cpuid_ebx(1); 491 apicid = (ebx >> 24) & 0xff; 492 ht_num = apicid & 1; 493 } else 494 #endif 495 ht_num = 0; 496 497 /* 498 * performance counters are shared resources 499 * assign each hyperthread its own set 500 * (re-use the ESCR0 register, seems safe 501 * and keeps the cccr_val the same) 502 */ 503 if (!ht_num) { 504 /* logical cpu 0 */ 505 perfctr_msr = MSR_P4_IQ_PERFCTR0; 506 evntsel_msr = MSR_P4_CRU_ESCR0; 507 cccr_msr = MSR_P4_IQ_CCCR0; 508 cccr_val = P4_CCCR_OVF_PMI0 | P4_CCCR_ESCR_SELECT(4); 509 510 /* 511 * If we're on the kdump kernel or other situation, we may 512 * still have other performance counter registers set to 513 * interrupt and they'll keep interrupting forever because 514 * of the P4_CCCR_OVF quirk. So we need to ACK all the 515 * pending interrupts and disable all the registers here, 516 * before reenabling the NMI delivery. Refer to p4_rearm() 517 * about the P4_CCCR_OVF quirk. 518 */ 519 if (reset_devices) { 520 unsigned int low, high; 521 int i; 522 523 for (i = 0; i < P4_CONTROLS; i++) { 524 rdmsr(p4_controls[i], low, high); 525 low &= ~(P4_CCCR_ENABLE | P4_CCCR_OVF); 526 wrmsr(p4_controls[i], low, high); 527 } 528 } 529 } else { 530 /* logical cpu 1 */ 531 perfctr_msr = MSR_P4_IQ_PERFCTR1; 532 evntsel_msr = MSR_P4_CRU_ESCR0; 533 cccr_msr = MSR_P4_IQ_CCCR1; 534 535 /* Pentium 4 D processors don't support P4_CCCR_OVF_PMI1 */ 536 if (boot_cpu_data.x86_model == 4 && boot_cpu_data.x86_mask == 4) 537 cccr_val = P4_CCCR_OVF_PMI0; 538 else 539 cccr_val = P4_CCCR_OVF_PMI1; 540 cccr_val |= P4_CCCR_ESCR_SELECT(4); 541 } 542 543 evntsel = P4_ESCR_EVENT_SELECT(0x3F) 544 | P4_ESCR_OS 545 | P4_ESCR_USR; 546 547 cccr_val |= P4_CCCR_THRESHOLD(15) 548 | P4_CCCR_COMPLEMENT 549 | P4_CCCR_COMPARE 550 | P4_CCCR_REQUIRED; 551 552 wrmsr(evntsel_msr, evntsel, 0); 553 wrmsr(cccr_msr, cccr_val, 0); 554 write_watchdog_counter(perfctr_msr, "P4_IQ_COUNTER0", nmi_hz); 555 556 wd->perfctr_msr = perfctr_msr; 557 wd->evntsel_msr = evntsel_msr; 558 wd->cccr_msr = cccr_msr; 559 560 /* ok, everything is initialized, announce that we're set */ 561 cpu_nmi_set_wd_enabled(); 562 563 apic_write(APIC_LVTPC, APIC_DM_NMI); 564 cccr_val |= P4_CCCR_ENABLE; 565 wrmsr(cccr_msr, cccr_val, 0); 566 return 1; 567 } 568 569 static void stop_p4_watchdog(void) 570 { 571 struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk); 572 wrmsr(wd->cccr_msr, 0, 0); 573 wrmsr(wd->evntsel_msr, 0, 0); 574 } 575 576 static int p4_reserve(void) 577 { 578 if (!reserve_perfctr_nmi(MSR_P4_IQ_PERFCTR0)) 579 return 0; 580 #ifdef CONFIG_SMP 581 if (smp_num_siblings > 1 && !reserve_perfctr_nmi(MSR_P4_IQ_PERFCTR1)) 582 goto fail1; 583 #endif 584 if (!reserve_evntsel_nmi(MSR_P4_CRU_ESCR0)) 585 goto fail2; 586 /* RED-PEN why is ESCR1 not reserved here? */ 587 return 1; 588 fail2: 589 #ifdef CONFIG_SMP 590 if (smp_num_siblings > 1) 591 release_perfctr_nmi(MSR_P4_IQ_PERFCTR1); 592 fail1: 593 #endif 594 release_perfctr_nmi(MSR_P4_IQ_PERFCTR0); 595 return 0; 596 } 597 598 static void p4_unreserve(void) 599 { 600 #ifdef CONFIG_SMP 601 if (smp_num_siblings > 1) 602 release_perfctr_nmi(MSR_P4_IQ_PERFCTR1); 603 #endif 604 release_evntsel_nmi(MSR_P4_CRU_ESCR0); 605 release_perfctr_nmi(MSR_P4_IQ_PERFCTR0); 606 } 607 608 static void p4_rearm(struct nmi_watchdog_ctlblk *wd, unsigned nmi_hz) 609 { 610 unsigned dummy; 611 /* 612 * P4 quirks: 613 * - An overflown perfctr will assert its interrupt 614 * until the OVF flag in its CCCR is cleared. 615 * - LVTPC is masked on interrupt and must be 616 * unmasked by the LVTPC handler. 617 */ 618 rdmsrl(wd->cccr_msr, dummy); 619 dummy &= ~P4_CCCR_OVF; 620 wrmsrl(wd->cccr_msr, dummy); 621 apic_write(APIC_LVTPC, APIC_DM_NMI); 622 /* start the cycle over again */ 623 write_watchdog_counter(wd->perfctr_msr, NULL, nmi_hz); 624 } 625 626 static const struct wd_ops p4_wd_ops = { 627 .reserve = p4_reserve, 628 .unreserve = p4_unreserve, 629 .setup = setup_p4_watchdog, 630 .rearm = p4_rearm, 631 .stop = stop_p4_watchdog, 632 /* RED-PEN this is wrong for the other sibling */ 633 .perfctr = MSR_P4_BPU_PERFCTR0, 634 .evntsel = MSR_P4_BSU_ESCR0, 635 .checkbit = 1ULL << 39, 636 }; 637 638 /* 639 * Watchdog using the Intel architected PerfMon. 640 * Used for Core2 and hopefully all future Intel CPUs. 641 */ 642 #define ARCH_PERFMON_NMI_EVENT_SEL ARCH_PERFMON_UNHALTED_CORE_CYCLES_SEL 643 #define ARCH_PERFMON_NMI_EVENT_UMASK ARCH_PERFMON_UNHALTED_CORE_CYCLES_UMASK 644 645 static struct wd_ops intel_arch_wd_ops; 646 647 static int setup_intel_arch_watchdog(unsigned nmi_hz) 648 { 649 unsigned int ebx; 650 union cpuid10_eax eax; 651 unsigned int unused; 652 unsigned int perfctr_msr, evntsel_msr; 653 unsigned int evntsel; 654 struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk); 655 656 /* 657 * Check whether the Architectural PerfMon supports 658 * Unhalted Core Cycles Event or not. 659 * NOTE: Corresponding bit = 0 in ebx indicates event present. 660 */ 661 cpuid(10, &(eax.full), &ebx, &unused, &unused); 662 if ((eax.split.mask_length < (ARCH_PERFMON_UNHALTED_CORE_CYCLES_INDEX+1)) || 663 (ebx & ARCH_PERFMON_UNHALTED_CORE_CYCLES_PRESENT)) 664 return 0; 665 666 perfctr_msr = wd_ops->perfctr; 667 evntsel_msr = wd_ops->evntsel; 668 669 wrmsrl(perfctr_msr, 0UL); 670 671 evntsel = ARCH_PERFMON_EVENTSEL_INT 672 | ARCH_PERFMON_EVENTSEL_OS 673 | ARCH_PERFMON_EVENTSEL_USR 674 | ARCH_PERFMON_NMI_EVENT_SEL 675 | ARCH_PERFMON_NMI_EVENT_UMASK; 676 677 /* setup the timer */ 678 wrmsr(evntsel_msr, evntsel, 0); 679 nmi_hz = adjust_for_32bit_ctr(nmi_hz); 680 write_watchdog_counter32(perfctr_msr, "INTEL_ARCH_PERFCTR0", nmi_hz); 681 682 wd->perfctr_msr = perfctr_msr; 683 wd->evntsel_msr = evntsel_msr; 684 wd->cccr_msr = 0; /* unused */ 685 686 /* ok, everything is initialized, announce that we're set */ 687 cpu_nmi_set_wd_enabled(); 688 689 apic_write(APIC_LVTPC, APIC_DM_NMI); 690 evntsel |= ARCH_PERFMON_EVENTSEL0_ENABLE; 691 wrmsr(evntsel_msr, evntsel, 0); 692 intel_arch_wd_ops.checkbit = 1ULL << (eax.split.bit_width - 1); 693 return 1; 694 } 695 696 static struct wd_ops intel_arch_wd_ops __read_mostly = { 697 .reserve = single_msr_reserve, 698 .unreserve = single_msr_unreserve, 699 .setup = setup_intel_arch_watchdog, 700 .rearm = p6_rearm, 701 .stop = single_msr_stop_watchdog, 702 .perfctr = MSR_ARCH_PERFMON_PERFCTR1, 703 .evntsel = MSR_ARCH_PERFMON_EVENTSEL1, 704 }; 705 706 static void probe_nmi_watchdog(void) 707 { 708 switch (boot_cpu_data.x86_vendor) { 709 case X86_VENDOR_AMD: 710 if (boot_cpu_data.x86 != 6 && boot_cpu_data.x86 != 15 && 711 boot_cpu_data.x86 != 16) 712 return; 713 wd_ops = &k7_wd_ops; 714 break; 715 case X86_VENDOR_INTEL: 716 /* 717 * Work around Core Duo (Yonah) errata AE49 where perfctr1 718 * doesn't have a working enable bit. 719 */ 720 if (boot_cpu_data.x86 == 6 && boot_cpu_data.x86_model == 14) { 721 intel_arch_wd_ops.perfctr = MSR_ARCH_PERFMON_PERFCTR0; 722 intel_arch_wd_ops.evntsel = MSR_ARCH_PERFMON_EVENTSEL0; 723 } 724 if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) { 725 wd_ops = &intel_arch_wd_ops; 726 break; 727 } 728 switch (boot_cpu_data.x86) { 729 case 6: 730 if (boot_cpu_data.x86_model > 13) 731 return; 732 733 wd_ops = &p6_wd_ops; 734 break; 735 case 15: 736 wd_ops = &p4_wd_ops; 737 break; 738 default: 739 return; 740 } 741 break; 742 } 743 } 744 745 /* Interface to nmi.c */ 746 747 int lapic_watchdog_init(unsigned nmi_hz) 748 { 749 if (!wd_ops) { 750 probe_nmi_watchdog(); 751 if (!wd_ops) { 752 printk(KERN_INFO "NMI watchdog: CPU not supported\n"); 753 return -1; 754 } 755 756 if (!wd_ops->reserve()) { 757 printk(KERN_ERR 758 "NMI watchdog: cannot reserve perfctrs\n"); 759 return -1; 760 } 761 } 762 763 if (!(wd_ops->setup(nmi_hz))) { 764 printk(KERN_ERR "Cannot setup NMI watchdog on CPU %d\n", 765 raw_smp_processor_id()); 766 return -1; 767 } 768 769 return 0; 770 } 771 772 void lapic_watchdog_stop(void) 773 { 774 if (wd_ops) 775 wd_ops->stop(); 776 } 777 778 unsigned lapic_adjust_nmi_hz(unsigned hz) 779 { 780 struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk); 781 if (wd->perfctr_msr == MSR_P6_PERFCTR0 || 782 wd->perfctr_msr == MSR_ARCH_PERFMON_PERFCTR1) 783 hz = adjust_for_32bit_ctr(hz); 784 return hz; 785 } 786 787 int lapic_wd_event(unsigned nmi_hz) 788 { 789 struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk); 790 u64 ctr; 791 792 rdmsrl(wd->perfctr_msr, ctr); 793 if (ctr & wd_ops->checkbit) /* perfctr still running? */ 794 return 0; 795 796 wd_ops->rearm(wd, nmi_hz); 797 return 1; 798 } 799 800 int lapic_watchdog_ok(void) 801 { 802 return wd_ops != NULL; 803 } 804