1 /* local apic based NMI watchdog for various CPUs.
2    This file also handles reservation of performance counters for coordination
3    with other users (like oprofile).
4 
5    Note that these events normally don't tick when the CPU idles. This means
6    the frequency varies with CPU load.
7 
8    Original code for K7/P6 written by Keith Owens */
9 
10 #include <linux/percpu.h>
11 #include <linux/module.h>
12 #include <linux/kernel.h>
13 #include <linux/bitops.h>
14 #include <linux/smp.h>
15 #include <linux/nmi.h>
16 #include <asm/apic.h>
17 #include <asm/intel_arch_perfmon.h>
18 
19 struct nmi_watchdog_ctlblk {
20 	unsigned int cccr_msr;
21 	unsigned int perfctr_msr;  /* the MSR to reset in NMI handler */
22 	unsigned int evntsel_msr;  /* the MSR to select the events to handle */
23 };
24 
25 /* Interface defining a CPU specific perfctr watchdog */
26 struct wd_ops {
27 	int (*reserve)(void);
28 	void (*unreserve)(void);
29 	int (*setup)(unsigned nmi_hz);
30 	void (*rearm)(struct nmi_watchdog_ctlblk *wd, unsigned nmi_hz);
31 	void (*stop)(void);
32 	unsigned perfctr;
33 	unsigned evntsel;
34 	u64 checkbit;
35 };
36 
37 static const struct wd_ops *wd_ops;
38 
39 /* this number is calculated from Intel's MSR_P4_CRU_ESCR5 register and it's
40  * offset from MSR_P4_BSU_ESCR0.  It will be the max for all platforms (for now)
41  */
42 #define NMI_MAX_COUNTER_BITS 66
43 
44 /* perfctr_nmi_owner tracks the ownership of the perfctr registers:
45  * evtsel_nmi_owner tracks the ownership of the event selection
46  * - different performance counters/ event selection may be reserved for
47  *   different subsystems this reservation system just tries to coordinate
48  *   things a little
49  */
50 static DECLARE_BITMAP(perfctr_nmi_owner, NMI_MAX_COUNTER_BITS);
51 static DECLARE_BITMAP(evntsel_nmi_owner, NMI_MAX_COUNTER_BITS);
52 
53 static DEFINE_PER_CPU(struct nmi_watchdog_ctlblk, nmi_watchdog_ctlblk);
54 
55 /* converts an msr to an appropriate reservation bit */
56 static inline unsigned int nmi_perfctr_msr_to_bit(unsigned int msr)
57 {
58 	/* returns the bit offset of the performance counter register */
59 	switch (boot_cpu_data.x86_vendor) {
60 	case X86_VENDOR_AMD:
61 		return (msr - MSR_K7_PERFCTR0);
62 	case X86_VENDOR_INTEL:
63 		if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON))
64 			return (msr - MSR_ARCH_PERFMON_PERFCTR0);
65 
66 		switch (boot_cpu_data.x86) {
67 		case 6:
68 			return (msr - MSR_P6_PERFCTR0);
69 		case 15:
70 			return (msr - MSR_P4_BPU_PERFCTR0);
71 		}
72 	}
73 	return 0;
74 }
75 
76 /* converts an msr to an appropriate reservation bit */
77 /* returns the bit offset of the event selection register */
78 static inline unsigned int nmi_evntsel_msr_to_bit(unsigned int msr)
79 {
80 	/* returns the bit offset of the event selection register */
81 	switch (boot_cpu_data.x86_vendor) {
82 	case X86_VENDOR_AMD:
83 		return (msr - MSR_K7_EVNTSEL0);
84 	case X86_VENDOR_INTEL:
85 		if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON))
86 			return (msr - MSR_ARCH_PERFMON_EVENTSEL0);
87 
88 		switch (boot_cpu_data.x86) {
89 		case 6:
90 			return (msr - MSR_P6_EVNTSEL0);
91 		case 15:
92 			return (msr - MSR_P4_BSU_ESCR0);
93 		}
94 	}
95 	return 0;
96 
97 }
98 
99 /* checks for a bit availability (hack for oprofile) */
100 int avail_to_resrv_perfctr_nmi_bit(unsigned int counter)
101 {
102 	BUG_ON(counter > NMI_MAX_COUNTER_BITS);
103 
104 	return (!test_bit(counter, perfctr_nmi_owner));
105 }
106 
107 /* checks the an msr for availability */
108 int avail_to_resrv_perfctr_nmi(unsigned int msr)
109 {
110 	unsigned int counter;
111 
112 	counter = nmi_perfctr_msr_to_bit(msr);
113 	BUG_ON(counter > NMI_MAX_COUNTER_BITS);
114 
115 	return (!test_bit(counter, perfctr_nmi_owner));
116 }
117 
118 int reserve_perfctr_nmi(unsigned int msr)
119 {
120 	unsigned int counter;
121 
122 	counter = nmi_perfctr_msr_to_bit(msr);
123 	/* register not managed by the allocator? */
124 	if (counter > NMI_MAX_COUNTER_BITS)
125 		return 1;
126 
127 	if (!test_and_set_bit(counter, perfctr_nmi_owner))
128 		return 1;
129 	return 0;
130 }
131 
132 void release_perfctr_nmi(unsigned int msr)
133 {
134 	unsigned int counter;
135 
136 	counter = nmi_perfctr_msr_to_bit(msr);
137 	/* register not managed by the allocator? */
138 	if (counter > NMI_MAX_COUNTER_BITS)
139 		return;
140 
141 	clear_bit(counter, perfctr_nmi_owner);
142 }
143 
144 int reserve_evntsel_nmi(unsigned int msr)
145 {
146 	unsigned int counter;
147 
148 	counter = nmi_evntsel_msr_to_bit(msr);
149 	/* register not managed by the allocator? */
150 	if (counter > NMI_MAX_COUNTER_BITS)
151 		return 1;
152 
153 	if (!test_and_set_bit(counter, evntsel_nmi_owner))
154 		return 1;
155 	return 0;
156 }
157 
158 void release_evntsel_nmi(unsigned int msr)
159 {
160 	unsigned int counter;
161 
162 	counter = nmi_evntsel_msr_to_bit(msr);
163 	/* register not managed by the allocator? */
164 	if (counter > NMI_MAX_COUNTER_BITS)
165 		return;
166 
167 	clear_bit(counter, evntsel_nmi_owner);
168 }
169 
170 EXPORT_SYMBOL(avail_to_resrv_perfctr_nmi_bit);
171 EXPORT_SYMBOL(reserve_perfctr_nmi);
172 EXPORT_SYMBOL(release_perfctr_nmi);
173 EXPORT_SYMBOL(reserve_evntsel_nmi);
174 EXPORT_SYMBOL(release_evntsel_nmi);
175 
176 void disable_lapic_nmi_watchdog(void)
177 {
178 	BUG_ON(nmi_watchdog != NMI_LOCAL_APIC);
179 
180 	if (atomic_read(&nmi_active) <= 0)
181 		return;
182 
183 	on_each_cpu(stop_apic_nmi_watchdog, NULL, 0, 1);
184 	wd_ops->unreserve();
185 
186 	BUG_ON(atomic_read(&nmi_active) != 0);
187 }
188 
189 void enable_lapic_nmi_watchdog(void)
190 {
191 	BUG_ON(nmi_watchdog != NMI_LOCAL_APIC);
192 
193 	/* are we already enabled */
194 	if (atomic_read(&nmi_active) != 0)
195 		return;
196 
197 	/* are we lapic aware */
198 	if (!wd_ops)
199 		return;
200 	if (!wd_ops->reserve()) {
201 		printk(KERN_ERR "NMI watchdog: cannot reserve perfctrs\n");
202 		return;
203 	}
204 
205 	on_each_cpu(setup_apic_nmi_watchdog, NULL, 0, 1);
206 	touch_nmi_watchdog();
207 }
208 
209 /*
210  * Activate the NMI watchdog via the local APIC.
211  */
212 
213 static unsigned int adjust_for_32bit_ctr(unsigned int hz)
214 {
215 	u64 counter_val;
216 	unsigned int retval = hz;
217 
218 	/*
219 	 * On Intel CPUs with P6/ARCH_PERFMON only 32 bits in the counter
220 	 * are writable, with higher bits sign extending from bit 31.
221 	 * So, we can only program the counter with 31 bit values and
222 	 * 32nd bit should be 1, for 33.. to be 1.
223 	 * Find the appropriate nmi_hz
224 	 */
225 	counter_val = (u64)cpu_khz * 1000;
226 	do_div(counter_val, retval);
227  	if (counter_val > 0x7fffffffULL) {
228 		u64 count = (u64)cpu_khz * 1000;
229 		do_div(count, 0x7fffffffUL);
230 		retval = count + 1;
231 	}
232 	return retval;
233 }
234 
235 static void
236 write_watchdog_counter(unsigned int perfctr_msr, const char *descr, unsigned nmi_hz)
237 {
238 	u64 count = (u64)cpu_khz * 1000;
239 
240 	do_div(count, nmi_hz);
241 	if(descr)
242 		Dprintk("setting %s to -0x%08Lx\n", descr, count);
243 	wrmsrl(perfctr_msr, 0 - count);
244 }
245 
246 static void write_watchdog_counter32(unsigned int perfctr_msr,
247 		const char *descr, unsigned nmi_hz)
248 {
249 	u64 count = (u64)cpu_khz * 1000;
250 
251 	do_div(count, nmi_hz);
252 	if(descr)
253 		Dprintk("setting %s to -0x%08Lx\n", descr, count);
254 	wrmsr(perfctr_msr, (u32)(-count), 0);
255 }
256 
257 /* AMD K7/K8/Family10h/Family11h support. AMD keeps this interface
258    nicely stable so there is not much variety */
259 
260 #define K7_EVNTSEL_ENABLE	(1 << 22)
261 #define K7_EVNTSEL_INT		(1 << 20)
262 #define K7_EVNTSEL_OS		(1 << 17)
263 #define K7_EVNTSEL_USR		(1 << 16)
264 #define K7_EVENT_CYCLES_PROCESSOR_IS_RUNNING	0x76
265 #define K7_NMI_EVENT		K7_EVENT_CYCLES_PROCESSOR_IS_RUNNING
266 
267 static int setup_k7_watchdog(unsigned nmi_hz)
268 {
269 	unsigned int perfctr_msr, evntsel_msr;
270 	unsigned int evntsel;
271 	struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
272 
273 	perfctr_msr = wd_ops->perfctr;
274 	evntsel_msr = wd_ops->evntsel;
275 
276 	wrmsrl(perfctr_msr, 0UL);
277 
278 	evntsel = K7_EVNTSEL_INT
279 		| K7_EVNTSEL_OS
280 		| K7_EVNTSEL_USR
281 		| K7_NMI_EVENT;
282 
283 	/* setup the timer */
284 	wrmsr(evntsel_msr, evntsel, 0);
285 	write_watchdog_counter(perfctr_msr, "K7_PERFCTR0",nmi_hz);
286 	apic_write(APIC_LVTPC, APIC_DM_NMI);
287 	evntsel |= K7_EVNTSEL_ENABLE;
288 	wrmsr(evntsel_msr, evntsel, 0);
289 
290 	wd->perfctr_msr = perfctr_msr;
291 	wd->evntsel_msr = evntsel_msr;
292 	wd->cccr_msr = 0;  //unused
293 	return 1;
294 }
295 
296 static void single_msr_stop_watchdog(void)
297 {
298 	struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
299 
300 	wrmsr(wd->evntsel_msr, 0, 0);
301 }
302 
303 static int single_msr_reserve(void)
304 {
305 	if (!reserve_perfctr_nmi(wd_ops->perfctr))
306 		return 0;
307 
308 	if (!reserve_evntsel_nmi(wd_ops->evntsel)) {
309 		release_perfctr_nmi(wd_ops->perfctr);
310 		return 0;
311 	}
312 	return 1;
313 }
314 
315 static void single_msr_unreserve(void)
316 {
317 	release_evntsel_nmi(wd_ops->evntsel);
318 	release_perfctr_nmi(wd_ops->perfctr);
319 }
320 
321 static void single_msr_rearm(struct nmi_watchdog_ctlblk *wd, unsigned nmi_hz)
322 {
323 	/* start the cycle over again */
324 	write_watchdog_counter(wd->perfctr_msr, NULL, nmi_hz);
325 }
326 
327 static const struct wd_ops k7_wd_ops = {
328 	.reserve = single_msr_reserve,
329 	.unreserve = single_msr_unreserve,
330 	.setup = setup_k7_watchdog,
331 	.rearm = single_msr_rearm,
332 	.stop = single_msr_stop_watchdog,
333 	.perfctr = MSR_K7_PERFCTR0,
334 	.evntsel = MSR_K7_EVNTSEL0,
335 	.checkbit = 1ULL<<47,
336 };
337 
338 /* Intel Model 6 (PPro+,P2,P3,P-M,Core1) */
339 
340 #define P6_EVNTSEL0_ENABLE	(1 << 22)
341 #define P6_EVNTSEL_INT		(1 << 20)
342 #define P6_EVNTSEL_OS		(1 << 17)
343 #define P6_EVNTSEL_USR		(1 << 16)
344 #define P6_EVENT_CPU_CLOCKS_NOT_HALTED	0x79
345 #define P6_NMI_EVENT		P6_EVENT_CPU_CLOCKS_NOT_HALTED
346 
347 static int setup_p6_watchdog(unsigned nmi_hz)
348 {
349 	unsigned int perfctr_msr, evntsel_msr;
350 	unsigned int evntsel;
351 	struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
352 
353 	perfctr_msr = wd_ops->perfctr;
354 	evntsel_msr = wd_ops->evntsel;
355 
356 	/* KVM doesn't implement this MSR */
357 	if (wrmsr_safe(perfctr_msr, 0, 0) < 0)
358 		return 0;
359 
360 	evntsel = P6_EVNTSEL_INT
361 		| P6_EVNTSEL_OS
362 		| P6_EVNTSEL_USR
363 		| P6_NMI_EVENT;
364 
365 	/* setup the timer */
366 	wrmsr(evntsel_msr, evntsel, 0);
367 	nmi_hz = adjust_for_32bit_ctr(nmi_hz);
368 	write_watchdog_counter32(perfctr_msr, "P6_PERFCTR0",nmi_hz);
369 	apic_write(APIC_LVTPC, APIC_DM_NMI);
370 	evntsel |= P6_EVNTSEL0_ENABLE;
371 	wrmsr(evntsel_msr, evntsel, 0);
372 
373 	wd->perfctr_msr = perfctr_msr;
374 	wd->evntsel_msr = evntsel_msr;
375 	wd->cccr_msr = 0;  //unused
376 	return 1;
377 }
378 
379 static void p6_rearm(struct nmi_watchdog_ctlblk *wd, unsigned nmi_hz)
380 {
381 	/* P6 based Pentium M need to re-unmask
382 	 * the apic vector but it doesn't hurt
383 	 * other P6 variant.
384 	 * ArchPerfom/Core Duo also needs this */
385 	apic_write(APIC_LVTPC, APIC_DM_NMI);
386 	/* P6/ARCH_PERFMON has 32 bit counter write */
387 	write_watchdog_counter32(wd->perfctr_msr, NULL,nmi_hz);
388 }
389 
390 static const struct wd_ops p6_wd_ops = {
391 	.reserve = single_msr_reserve,
392 	.unreserve = single_msr_unreserve,
393 	.setup = setup_p6_watchdog,
394 	.rearm = p6_rearm,
395 	.stop = single_msr_stop_watchdog,
396 	.perfctr = MSR_P6_PERFCTR0,
397 	.evntsel = MSR_P6_EVNTSEL0,
398 	.checkbit = 1ULL<<39,
399 };
400 
401 /* Intel P4 performance counters. By far the most complicated of all. */
402 
403 #define MSR_P4_MISC_ENABLE_PERF_AVAIL	(1<<7)
404 #define P4_ESCR_EVENT_SELECT(N)	((N)<<25)
405 #define P4_ESCR_OS		(1<<3)
406 #define P4_ESCR_USR		(1<<2)
407 #define P4_CCCR_OVF_PMI0	(1<<26)
408 #define P4_CCCR_OVF_PMI1	(1<<27)
409 #define P4_CCCR_THRESHOLD(N)	((N)<<20)
410 #define P4_CCCR_COMPLEMENT	(1<<19)
411 #define P4_CCCR_COMPARE		(1<<18)
412 #define P4_CCCR_REQUIRED	(3<<16)
413 #define P4_CCCR_ESCR_SELECT(N)	((N)<<13)
414 #define P4_CCCR_ENABLE		(1<<12)
415 #define P4_CCCR_OVF 		(1<<31)
416 
417 /* Set up IQ_COUNTER0 to behave like a clock, by having IQ_CCCR0 filter
418    CRU_ESCR0 (with any non-null event selector) through a complemented
419    max threshold. [IA32-Vol3, Section 14.9.9] */
420 
421 static int setup_p4_watchdog(unsigned nmi_hz)
422 {
423 	unsigned int perfctr_msr, evntsel_msr, cccr_msr;
424 	unsigned int evntsel, cccr_val;
425 	unsigned int misc_enable, dummy;
426 	unsigned int ht_num;
427 	struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
428 
429 	rdmsr(MSR_IA32_MISC_ENABLE, misc_enable, dummy);
430 	if (!(misc_enable & MSR_P4_MISC_ENABLE_PERF_AVAIL))
431 		return 0;
432 
433 #ifdef CONFIG_SMP
434 	/* detect which hyperthread we are on */
435 	if (smp_num_siblings == 2) {
436 		unsigned int ebx, apicid;
437 
438         	ebx = cpuid_ebx(1);
439 	        apicid = (ebx >> 24) & 0xff;
440         	ht_num = apicid & 1;
441 	} else
442 #endif
443 		ht_num = 0;
444 
445 	/* performance counters are shared resources
446 	 * assign each hyperthread its own set
447 	 * (re-use the ESCR0 register, seems safe
448 	 * and keeps the cccr_val the same)
449 	 */
450 	if (!ht_num) {
451 		/* logical cpu 0 */
452 		perfctr_msr = MSR_P4_IQ_PERFCTR0;
453 		evntsel_msr = MSR_P4_CRU_ESCR0;
454 		cccr_msr = MSR_P4_IQ_CCCR0;
455 		cccr_val = P4_CCCR_OVF_PMI0 | P4_CCCR_ESCR_SELECT(4);
456 	} else {
457 		/* logical cpu 1 */
458 		perfctr_msr = MSR_P4_IQ_PERFCTR1;
459 		evntsel_msr = MSR_P4_CRU_ESCR0;
460 		cccr_msr = MSR_P4_IQ_CCCR1;
461 		cccr_val = P4_CCCR_OVF_PMI1 | P4_CCCR_ESCR_SELECT(4);
462 	}
463 
464 	evntsel = P4_ESCR_EVENT_SELECT(0x3F)
465 	 	| P4_ESCR_OS
466 		| P4_ESCR_USR;
467 
468 	cccr_val |= P4_CCCR_THRESHOLD(15)
469 		 | P4_CCCR_COMPLEMENT
470 		 | P4_CCCR_COMPARE
471 		 | P4_CCCR_REQUIRED;
472 
473 	wrmsr(evntsel_msr, evntsel, 0);
474 	wrmsr(cccr_msr, cccr_val, 0);
475 	write_watchdog_counter(perfctr_msr, "P4_IQ_COUNTER0", nmi_hz);
476 	apic_write(APIC_LVTPC, APIC_DM_NMI);
477 	cccr_val |= P4_CCCR_ENABLE;
478 	wrmsr(cccr_msr, cccr_val, 0);
479 	wd->perfctr_msr = perfctr_msr;
480 	wd->evntsel_msr = evntsel_msr;
481 	wd->cccr_msr = cccr_msr;
482 	return 1;
483 }
484 
485 static void stop_p4_watchdog(void)
486 {
487 	struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
488 	wrmsr(wd->cccr_msr, 0, 0);
489 	wrmsr(wd->evntsel_msr, 0, 0);
490 }
491 
492 static int p4_reserve(void)
493 {
494 	if (!reserve_perfctr_nmi(MSR_P4_IQ_PERFCTR0))
495 		return 0;
496 #ifdef CONFIG_SMP
497 	if (smp_num_siblings > 1 && !reserve_perfctr_nmi(MSR_P4_IQ_PERFCTR1))
498 		goto fail1;
499 #endif
500 	if (!reserve_evntsel_nmi(MSR_P4_CRU_ESCR0))
501 		goto fail2;
502 	/* RED-PEN why is ESCR1 not reserved here? */
503 	return 1;
504  fail2:
505 #ifdef CONFIG_SMP
506 	if (smp_num_siblings > 1)
507 		release_perfctr_nmi(MSR_P4_IQ_PERFCTR1);
508  fail1:
509 #endif
510 	release_perfctr_nmi(MSR_P4_IQ_PERFCTR0);
511 	return 0;
512 }
513 
514 static void p4_unreserve(void)
515 {
516 #ifdef CONFIG_SMP
517 	if (smp_num_siblings > 1)
518 		release_perfctr_nmi(MSR_P4_IQ_PERFCTR1);
519 #endif
520 	release_evntsel_nmi(MSR_P4_CRU_ESCR0);
521 	release_perfctr_nmi(MSR_P4_IQ_PERFCTR0);
522 }
523 
524 static void p4_rearm(struct nmi_watchdog_ctlblk *wd, unsigned nmi_hz)
525 {
526 	unsigned dummy;
527 	/*
528  	 * P4 quirks:
529 	 * - An overflown perfctr will assert its interrupt
530 	 *   until the OVF flag in its CCCR is cleared.
531 	 * - LVTPC is masked on interrupt and must be
532 	 *   unmasked by the LVTPC handler.
533 	 */
534 	rdmsrl(wd->cccr_msr, dummy);
535 	dummy &= ~P4_CCCR_OVF;
536 	wrmsrl(wd->cccr_msr, dummy);
537 	apic_write(APIC_LVTPC, APIC_DM_NMI);
538 	/* start the cycle over again */
539 	write_watchdog_counter(wd->perfctr_msr, NULL, nmi_hz);
540 }
541 
542 static const struct wd_ops p4_wd_ops = {
543 	.reserve = p4_reserve,
544 	.unreserve = p4_unreserve,
545 	.setup = setup_p4_watchdog,
546 	.rearm = p4_rearm,
547 	.stop = stop_p4_watchdog,
548 	/* RED-PEN this is wrong for the other sibling */
549 	.perfctr = MSR_P4_BPU_PERFCTR0,
550 	.evntsel = MSR_P4_BSU_ESCR0,
551 	.checkbit = 1ULL<<39,
552 };
553 
554 /* Watchdog using the Intel architected PerfMon. Used for Core2 and hopefully
555    all future Intel CPUs. */
556 
557 #define ARCH_PERFMON_NMI_EVENT_SEL	ARCH_PERFMON_UNHALTED_CORE_CYCLES_SEL
558 #define ARCH_PERFMON_NMI_EVENT_UMASK	ARCH_PERFMON_UNHALTED_CORE_CYCLES_UMASK
559 
560 static struct wd_ops intel_arch_wd_ops;
561 
562 static int setup_intel_arch_watchdog(unsigned nmi_hz)
563 {
564 	unsigned int ebx;
565 	union cpuid10_eax eax;
566 	unsigned int unused;
567 	unsigned int perfctr_msr, evntsel_msr;
568 	unsigned int evntsel;
569 	struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
570 
571 	/*
572 	 * Check whether the Architectural PerfMon supports
573 	 * Unhalted Core Cycles Event or not.
574 	 * NOTE: Corresponding bit = 0 in ebx indicates event present.
575 	 */
576 	cpuid(10, &(eax.full), &ebx, &unused, &unused);
577 	if ((eax.split.mask_length < (ARCH_PERFMON_UNHALTED_CORE_CYCLES_INDEX+1)) ||
578 	    (ebx & ARCH_PERFMON_UNHALTED_CORE_CYCLES_PRESENT))
579 		return 0;
580 
581 	perfctr_msr = wd_ops->perfctr;
582 	evntsel_msr = wd_ops->evntsel;
583 
584 	wrmsrl(perfctr_msr, 0UL);
585 
586 	evntsel = ARCH_PERFMON_EVENTSEL_INT
587 		| ARCH_PERFMON_EVENTSEL_OS
588 		| ARCH_PERFMON_EVENTSEL_USR
589 		| ARCH_PERFMON_NMI_EVENT_SEL
590 		| ARCH_PERFMON_NMI_EVENT_UMASK;
591 
592 	/* setup the timer */
593 	wrmsr(evntsel_msr, evntsel, 0);
594 	nmi_hz = adjust_for_32bit_ctr(nmi_hz);
595 	write_watchdog_counter32(perfctr_msr, "INTEL_ARCH_PERFCTR0", nmi_hz);
596 	apic_write(APIC_LVTPC, APIC_DM_NMI);
597 	evntsel |= ARCH_PERFMON_EVENTSEL0_ENABLE;
598 	wrmsr(evntsel_msr, evntsel, 0);
599 
600 	wd->perfctr_msr = perfctr_msr;
601 	wd->evntsel_msr = evntsel_msr;
602 	wd->cccr_msr = 0;  //unused
603 	intel_arch_wd_ops.checkbit = 1ULL << (eax.split.bit_width - 1);
604 	return 1;
605 }
606 
607 static struct wd_ops intel_arch_wd_ops __read_mostly = {
608 	.reserve = single_msr_reserve,
609 	.unreserve = single_msr_unreserve,
610 	.setup = setup_intel_arch_watchdog,
611 	.rearm = p6_rearm,
612 	.stop = single_msr_stop_watchdog,
613 	.perfctr = MSR_ARCH_PERFMON_PERFCTR1,
614 	.evntsel = MSR_ARCH_PERFMON_EVENTSEL1,
615 };
616 
617 static void probe_nmi_watchdog(void)
618 {
619 	switch (boot_cpu_data.x86_vendor) {
620 	case X86_VENDOR_AMD:
621 		if (boot_cpu_data.x86 != 6 && boot_cpu_data.x86 != 15 &&
622 		    boot_cpu_data.x86 != 16)
623 			return;
624 		wd_ops = &k7_wd_ops;
625 		break;
626 	case X86_VENDOR_INTEL:
627 		/* Work around Core Duo (Yonah) errata AE49 where perfctr1
628 		   doesn't have a working enable bit. */
629 		if (boot_cpu_data.x86 == 6 && boot_cpu_data.x86_model == 14) {
630 			intel_arch_wd_ops.perfctr = MSR_ARCH_PERFMON_PERFCTR0;
631 			intel_arch_wd_ops.evntsel = MSR_ARCH_PERFMON_EVENTSEL0;
632 		}
633 		if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) {
634 			wd_ops = &intel_arch_wd_ops;
635 			break;
636 		}
637 		switch (boot_cpu_data.x86) {
638 		case 6:
639 			if (boot_cpu_data.x86_model > 0xd)
640 				return;
641 
642 			wd_ops = &p6_wd_ops;
643 			break;
644 		case 15:
645 			wd_ops = &p4_wd_ops;
646 			break;
647 		default:
648 			return;
649 		}
650 		break;
651 	}
652 }
653 
654 /* Interface to nmi.c */
655 
656 int lapic_watchdog_init(unsigned nmi_hz)
657 {
658 	if (!wd_ops) {
659 		probe_nmi_watchdog();
660 		if (!wd_ops) {
661 			printk(KERN_INFO "NMI watchdog: CPU not supported\n");
662 			return -1;
663 		}
664 
665 		if (!wd_ops->reserve()) {
666 			printk(KERN_ERR
667 				"NMI watchdog: cannot reserve perfctrs\n");
668 			return -1;
669 		}
670 	}
671 
672 	if (!(wd_ops->setup(nmi_hz))) {
673 		printk(KERN_ERR "Cannot setup NMI watchdog on CPU %d\n",
674 		       raw_smp_processor_id());
675 		return -1;
676 	}
677 
678 	return 0;
679 }
680 
681 void lapic_watchdog_stop(void)
682 {
683 	if (wd_ops)
684 		wd_ops->stop();
685 }
686 
687 unsigned lapic_adjust_nmi_hz(unsigned hz)
688 {
689 	struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
690 	if (wd->perfctr_msr == MSR_P6_PERFCTR0 ||
691 	    wd->perfctr_msr == MSR_ARCH_PERFMON_PERFCTR1)
692 		hz = adjust_for_32bit_ctr(hz);
693 	return hz;
694 }
695 
696 int lapic_wd_event(unsigned nmi_hz)
697 {
698 	struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
699 	u64 ctr;
700 	rdmsrl(wd->perfctr_msr, ctr);
701 	if (ctr & wd_ops->checkbit) { /* perfctr still running? */
702 		return 0;
703 	}
704 	wd_ops->rearm(wd, nmi_hz);
705 	return 1;
706 }
707 
708 int lapic_watchdog_ok(void)
709 {
710 	return wd_ops != NULL;
711 }
712