xref: /openbmc/linux/arch/arm/kernel/perf_event_v6.c (revision 05bcf503)
1 /*
2  * ARMv6 Performance counter handling code.
3  *
4  * Copyright (C) 2009 picoChip Designs, Ltd., Jamie Iles
5  *
6  * ARMv6 has 2 configurable performance counters and a single cycle counter.
7  * They all share a single reset bit but can be written to zero so we can use
8  * that for a reset.
9  *
10  * The counters can't be individually enabled or disabled so when we remove
11  * one event and replace it with another we could get spurious counts from the
12  * wrong event. However, we can take advantage of the fact that the
13  * performance counters can export events to the event bus, and the event bus
14  * itself can be monitored. This requires that we *don't* export the events to
15  * the event bus. The procedure for disabling a configurable counter is:
16  *	- change the counter to count the ETMEXTOUT[0] signal (0x20). This
17  *	  effectively stops the counter from counting.
18  *	- disable the counter's interrupt generation (each counter has it's
19  *	  own interrupt enable bit).
20  * Once stopped, the counter value can be written as 0 to reset.
21  *
22  * To enable a counter:
23  *	- enable the counter's interrupt generation.
24  *	- set the new event type.
25  *
26  * Note: the dedicated cycle counter only counts cycles and can't be
27  * enabled/disabled independently of the others. When we want to disable the
28  * cycle counter, we have to just disable the interrupt reporting and start
29  * ignoring that counter. When re-enabling, we have to reset the value and
30  * enable the interrupt.
31  */
32 
33 #if defined(CONFIG_CPU_V6) || defined(CONFIG_CPU_V6K)
34 enum armv6_perf_types {
35 	ARMV6_PERFCTR_ICACHE_MISS	    = 0x0,
36 	ARMV6_PERFCTR_IBUF_STALL	    = 0x1,
37 	ARMV6_PERFCTR_DDEP_STALL	    = 0x2,
38 	ARMV6_PERFCTR_ITLB_MISS		    = 0x3,
39 	ARMV6_PERFCTR_DTLB_MISS		    = 0x4,
40 	ARMV6_PERFCTR_BR_EXEC		    = 0x5,
41 	ARMV6_PERFCTR_BR_MISPREDICT	    = 0x6,
42 	ARMV6_PERFCTR_INSTR_EXEC	    = 0x7,
43 	ARMV6_PERFCTR_DCACHE_HIT	    = 0x9,
44 	ARMV6_PERFCTR_DCACHE_ACCESS	    = 0xA,
45 	ARMV6_PERFCTR_DCACHE_MISS	    = 0xB,
46 	ARMV6_PERFCTR_DCACHE_WBACK	    = 0xC,
47 	ARMV6_PERFCTR_SW_PC_CHANGE	    = 0xD,
48 	ARMV6_PERFCTR_MAIN_TLB_MISS	    = 0xF,
49 	ARMV6_PERFCTR_EXPL_D_ACCESS	    = 0x10,
50 	ARMV6_PERFCTR_LSU_FULL_STALL	    = 0x11,
51 	ARMV6_PERFCTR_WBUF_DRAINED	    = 0x12,
52 	ARMV6_PERFCTR_CPU_CYCLES	    = 0xFF,
53 	ARMV6_PERFCTR_NOP		    = 0x20,
54 };
55 
56 enum armv6_counters {
57 	ARMV6_CYCLE_COUNTER = 0,
58 	ARMV6_COUNTER0,
59 	ARMV6_COUNTER1,
60 };
61 
62 /*
63  * The hardware events that we support. We do support cache operations but
64  * we have harvard caches and no way to combine instruction and data
65  * accesses/misses in hardware.
66  */
67 static const unsigned armv6_perf_map[PERF_COUNT_HW_MAX] = {
68 	[PERF_COUNT_HW_CPU_CYCLES]		= ARMV6_PERFCTR_CPU_CYCLES,
69 	[PERF_COUNT_HW_INSTRUCTIONS]		= ARMV6_PERFCTR_INSTR_EXEC,
70 	[PERF_COUNT_HW_CACHE_REFERENCES]	= HW_OP_UNSUPPORTED,
71 	[PERF_COUNT_HW_CACHE_MISSES]		= HW_OP_UNSUPPORTED,
72 	[PERF_COUNT_HW_BRANCH_INSTRUCTIONS]	= ARMV6_PERFCTR_BR_EXEC,
73 	[PERF_COUNT_HW_BRANCH_MISSES]		= ARMV6_PERFCTR_BR_MISPREDICT,
74 	[PERF_COUNT_HW_BUS_CYCLES]		= HW_OP_UNSUPPORTED,
75 	[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND]	= ARMV6_PERFCTR_IBUF_STALL,
76 	[PERF_COUNT_HW_STALLED_CYCLES_BACKEND]	= ARMV6_PERFCTR_LSU_FULL_STALL,
77 };
78 
79 static const unsigned armv6_perf_cache_map[PERF_COUNT_HW_CACHE_MAX]
80 					  [PERF_COUNT_HW_CACHE_OP_MAX]
81 					  [PERF_COUNT_HW_CACHE_RESULT_MAX] = {
82 	[C(L1D)] = {
83 		/*
84 		 * The performance counters don't differentiate between read
85 		 * and write accesses/misses so this isn't strictly correct,
86 		 * but it's the best we can do. Writes and reads get
87 		 * combined.
88 		 */
89 		[C(OP_READ)] = {
90 			[C(RESULT_ACCESS)]	= ARMV6_PERFCTR_DCACHE_ACCESS,
91 			[C(RESULT_MISS)]	= ARMV6_PERFCTR_DCACHE_MISS,
92 		},
93 		[C(OP_WRITE)] = {
94 			[C(RESULT_ACCESS)]	= ARMV6_PERFCTR_DCACHE_ACCESS,
95 			[C(RESULT_MISS)]	= ARMV6_PERFCTR_DCACHE_MISS,
96 		},
97 		[C(OP_PREFETCH)] = {
98 			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
99 			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED,
100 		},
101 	},
102 	[C(L1I)] = {
103 		[C(OP_READ)] = {
104 			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
105 			[C(RESULT_MISS)]	= ARMV6_PERFCTR_ICACHE_MISS,
106 		},
107 		[C(OP_WRITE)] = {
108 			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
109 			[C(RESULT_MISS)]	= ARMV6_PERFCTR_ICACHE_MISS,
110 		},
111 		[C(OP_PREFETCH)] = {
112 			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
113 			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED,
114 		},
115 	},
116 	[C(LL)] = {
117 		[C(OP_READ)] = {
118 			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
119 			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED,
120 		},
121 		[C(OP_WRITE)] = {
122 			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
123 			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED,
124 		},
125 		[C(OP_PREFETCH)] = {
126 			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
127 			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED,
128 		},
129 	},
130 	[C(DTLB)] = {
131 		/*
132 		 * The ARM performance counters can count micro DTLB misses,
133 		 * micro ITLB misses and main TLB misses. There isn't an event
134 		 * for TLB misses, so use the micro misses here and if users
135 		 * want the main TLB misses they can use a raw counter.
136 		 */
137 		[C(OP_READ)] = {
138 			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
139 			[C(RESULT_MISS)]	= ARMV6_PERFCTR_DTLB_MISS,
140 		},
141 		[C(OP_WRITE)] = {
142 			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
143 			[C(RESULT_MISS)]	= ARMV6_PERFCTR_DTLB_MISS,
144 		},
145 		[C(OP_PREFETCH)] = {
146 			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
147 			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED,
148 		},
149 	},
150 	[C(ITLB)] = {
151 		[C(OP_READ)] = {
152 			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
153 			[C(RESULT_MISS)]	= ARMV6_PERFCTR_ITLB_MISS,
154 		},
155 		[C(OP_WRITE)] = {
156 			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
157 			[C(RESULT_MISS)]	= ARMV6_PERFCTR_ITLB_MISS,
158 		},
159 		[C(OP_PREFETCH)] = {
160 			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
161 			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED,
162 		},
163 	},
164 	[C(BPU)] = {
165 		[C(OP_READ)] = {
166 			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
167 			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED,
168 		},
169 		[C(OP_WRITE)] = {
170 			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
171 			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED,
172 		},
173 		[C(OP_PREFETCH)] = {
174 			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
175 			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED,
176 		},
177 	},
178 	[C(NODE)] = {
179 		[C(OP_READ)] = {
180 			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
181 			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED,
182 		},
183 		[C(OP_WRITE)] = {
184 			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
185 			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED,
186 		},
187 		[C(OP_PREFETCH)] = {
188 			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
189 			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED,
190 		},
191 	},
192 };
193 
194 enum armv6mpcore_perf_types {
195 	ARMV6MPCORE_PERFCTR_ICACHE_MISS	    = 0x0,
196 	ARMV6MPCORE_PERFCTR_IBUF_STALL	    = 0x1,
197 	ARMV6MPCORE_PERFCTR_DDEP_STALL	    = 0x2,
198 	ARMV6MPCORE_PERFCTR_ITLB_MISS	    = 0x3,
199 	ARMV6MPCORE_PERFCTR_DTLB_MISS	    = 0x4,
200 	ARMV6MPCORE_PERFCTR_BR_EXEC	    = 0x5,
201 	ARMV6MPCORE_PERFCTR_BR_NOTPREDICT   = 0x6,
202 	ARMV6MPCORE_PERFCTR_BR_MISPREDICT   = 0x7,
203 	ARMV6MPCORE_PERFCTR_INSTR_EXEC	    = 0x8,
204 	ARMV6MPCORE_PERFCTR_DCACHE_RDACCESS = 0xA,
205 	ARMV6MPCORE_PERFCTR_DCACHE_RDMISS   = 0xB,
206 	ARMV6MPCORE_PERFCTR_DCACHE_WRACCESS = 0xC,
207 	ARMV6MPCORE_PERFCTR_DCACHE_WRMISS   = 0xD,
208 	ARMV6MPCORE_PERFCTR_DCACHE_EVICTION = 0xE,
209 	ARMV6MPCORE_PERFCTR_SW_PC_CHANGE    = 0xF,
210 	ARMV6MPCORE_PERFCTR_MAIN_TLB_MISS   = 0x10,
211 	ARMV6MPCORE_PERFCTR_EXPL_MEM_ACCESS = 0x11,
212 	ARMV6MPCORE_PERFCTR_LSU_FULL_STALL  = 0x12,
213 	ARMV6MPCORE_PERFCTR_WBUF_DRAINED    = 0x13,
214 	ARMV6MPCORE_PERFCTR_CPU_CYCLES	    = 0xFF,
215 };
216 
217 /*
218  * The hardware events that we support. We do support cache operations but
219  * we have harvard caches and no way to combine instruction and data
220  * accesses/misses in hardware.
221  */
222 static const unsigned armv6mpcore_perf_map[PERF_COUNT_HW_MAX] = {
223 	[PERF_COUNT_HW_CPU_CYCLES]		= ARMV6MPCORE_PERFCTR_CPU_CYCLES,
224 	[PERF_COUNT_HW_INSTRUCTIONS]		= ARMV6MPCORE_PERFCTR_INSTR_EXEC,
225 	[PERF_COUNT_HW_CACHE_REFERENCES]	= HW_OP_UNSUPPORTED,
226 	[PERF_COUNT_HW_CACHE_MISSES]		= HW_OP_UNSUPPORTED,
227 	[PERF_COUNT_HW_BRANCH_INSTRUCTIONS]	= ARMV6MPCORE_PERFCTR_BR_EXEC,
228 	[PERF_COUNT_HW_BRANCH_MISSES]		= ARMV6MPCORE_PERFCTR_BR_MISPREDICT,
229 	[PERF_COUNT_HW_BUS_CYCLES]		= HW_OP_UNSUPPORTED,
230 	[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND]	= ARMV6MPCORE_PERFCTR_IBUF_STALL,
231 	[PERF_COUNT_HW_STALLED_CYCLES_BACKEND]	= ARMV6MPCORE_PERFCTR_LSU_FULL_STALL,
232 };
233 
234 static const unsigned armv6mpcore_perf_cache_map[PERF_COUNT_HW_CACHE_MAX]
235 					[PERF_COUNT_HW_CACHE_OP_MAX]
236 					[PERF_COUNT_HW_CACHE_RESULT_MAX] = {
237 	[C(L1D)] = {
238 		[C(OP_READ)] = {
239 			[C(RESULT_ACCESS)]  =
240 				ARMV6MPCORE_PERFCTR_DCACHE_RDACCESS,
241 			[C(RESULT_MISS)]    =
242 				ARMV6MPCORE_PERFCTR_DCACHE_RDMISS,
243 		},
244 		[C(OP_WRITE)] = {
245 			[C(RESULT_ACCESS)]  =
246 				ARMV6MPCORE_PERFCTR_DCACHE_WRACCESS,
247 			[C(RESULT_MISS)]    =
248 				ARMV6MPCORE_PERFCTR_DCACHE_WRMISS,
249 		},
250 		[C(OP_PREFETCH)] = {
251 			[C(RESULT_ACCESS)]  = CACHE_OP_UNSUPPORTED,
252 			[C(RESULT_MISS)]    = CACHE_OP_UNSUPPORTED,
253 		},
254 	},
255 	[C(L1I)] = {
256 		[C(OP_READ)] = {
257 			[C(RESULT_ACCESS)]  = CACHE_OP_UNSUPPORTED,
258 			[C(RESULT_MISS)]    = ARMV6MPCORE_PERFCTR_ICACHE_MISS,
259 		},
260 		[C(OP_WRITE)] = {
261 			[C(RESULT_ACCESS)]  = CACHE_OP_UNSUPPORTED,
262 			[C(RESULT_MISS)]    = ARMV6MPCORE_PERFCTR_ICACHE_MISS,
263 		},
264 		[C(OP_PREFETCH)] = {
265 			[C(RESULT_ACCESS)]  = CACHE_OP_UNSUPPORTED,
266 			[C(RESULT_MISS)]    = CACHE_OP_UNSUPPORTED,
267 		},
268 	},
269 	[C(LL)] = {
270 		[C(OP_READ)] = {
271 			[C(RESULT_ACCESS)]  = CACHE_OP_UNSUPPORTED,
272 			[C(RESULT_MISS)]    = CACHE_OP_UNSUPPORTED,
273 		},
274 		[C(OP_WRITE)] = {
275 			[C(RESULT_ACCESS)]  = CACHE_OP_UNSUPPORTED,
276 			[C(RESULT_MISS)]    = CACHE_OP_UNSUPPORTED,
277 		},
278 		[C(OP_PREFETCH)] = {
279 			[C(RESULT_ACCESS)]  = CACHE_OP_UNSUPPORTED,
280 			[C(RESULT_MISS)]    = CACHE_OP_UNSUPPORTED,
281 		},
282 	},
283 	[C(DTLB)] = {
284 		/*
285 		 * The ARM performance counters can count micro DTLB misses,
286 		 * micro ITLB misses and main TLB misses. There isn't an event
287 		 * for TLB misses, so use the micro misses here and if users
288 		 * want the main TLB misses they can use a raw counter.
289 		 */
290 		[C(OP_READ)] = {
291 			[C(RESULT_ACCESS)]  = CACHE_OP_UNSUPPORTED,
292 			[C(RESULT_MISS)]    = ARMV6MPCORE_PERFCTR_DTLB_MISS,
293 		},
294 		[C(OP_WRITE)] = {
295 			[C(RESULT_ACCESS)]  = CACHE_OP_UNSUPPORTED,
296 			[C(RESULT_MISS)]    = ARMV6MPCORE_PERFCTR_DTLB_MISS,
297 		},
298 		[C(OP_PREFETCH)] = {
299 			[C(RESULT_ACCESS)]  = CACHE_OP_UNSUPPORTED,
300 			[C(RESULT_MISS)]    = CACHE_OP_UNSUPPORTED,
301 		},
302 	},
303 	[C(ITLB)] = {
304 		[C(OP_READ)] = {
305 			[C(RESULT_ACCESS)]  = CACHE_OP_UNSUPPORTED,
306 			[C(RESULT_MISS)]    = ARMV6MPCORE_PERFCTR_ITLB_MISS,
307 		},
308 		[C(OP_WRITE)] = {
309 			[C(RESULT_ACCESS)]  = CACHE_OP_UNSUPPORTED,
310 			[C(RESULT_MISS)]    = ARMV6MPCORE_PERFCTR_ITLB_MISS,
311 		},
312 		[C(OP_PREFETCH)] = {
313 			[C(RESULT_ACCESS)]  = CACHE_OP_UNSUPPORTED,
314 			[C(RESULT_MISS)]    = CACHE_OP_UNSUPPORTED,
315 		},
316 	},
317 	[C(BPU)] = {
318 		[C(OP_READ)] = {
319 			[C(RESULT_ACCESS)]  = CACHE_OP_UNSUPPORTED,
320 			[C(RESULT_MISS)]    = CACHE_OP_UNSUPPORTED,
321 		},
322 		[C(OP_WRITE)] = {
323 			[C(RESULT_ACCESS)]  = CACHE_OP_UNSUPPORTED,
324 			[C(RESULT_MISS)]    = CACHE_OP_UNSUPPORTED,
325 		},
326 		[C(OP_PREFETCH)] = {
327 			[C(RESULT_ACCESS)]  = CACHE_OP_UNSUPPORTED,
328 			[C(RESULT_MISS)]    = CACHE_OP_UNSUPPORTED,
329 		},
330 	},
331 	[C(NODE)] = {
332 		[C(OP_READ)] = {
333 			[C(RESULT_ACCESS)]  = CACHE_OP_UNSUPPORTED,
334 			[C(RESULT_MISS)]    = CACHE_OP_UNSUPPORTED,
335 		},
336 		[C(OP_WRITE)] = {
337 			[C(RESULT_ACCESS)]  = CACHE_OP_UNSUPPORTED,
338 			[C(RESULT_MISS)]    = CACHE_OP_UNSUPPORTED,
339 		},
340 		[C(OP_PREFETCH)] = {
341 			[C(RESULT_ACCESS)]  = CACHE_OP_UNSUPPORTED,
342 			[C(RESULT_MISS)]    = CACHE_OP_UNSUPPORTED,
343 		},
344 	},
345 };
346 
347 static inline unsigned long
348 armv6_pmcr_read(void)
349 {
350 	u32 val;
351 	asm volatile("mrc   p15, 0, %0, c15, c12, 0" : "=r"(val));
352 	return val;
353 }
354 
355 static inline void
356 armv6_pmcr_write(unsigned long val)
357 {
358 	asm volatile("mcr   p15, 0, %0, c15, c12, 0" : : "r"(val));
359 }
360 
361 #define ARMV6_PMCR_ENABLE		(1 << 0)
362 #define ARMV6_PMCR_CTR01_RESET		(1 << 1)
363 #define ARMV6_PMCR_CCOUNT_RESET		(1 << 2)
364 #define ARMV6_PMCR_CCOUNT_DIV		(1 << 3)
365 #define ARMV6_PMCR_COUNT0_IEN		(1 << 4)
366 #define ARMV6_PMCR_COUNT1_IEN		(1 << 5)
367 #define ARMV6_PMCR_CCOUNT_IEN		(1 << 6)
368 #define ARMV6_PMCR_COUNT0_OVERFLOW	(1 << 8)
369 #define ARMV6_PMCR_COUNT1_OVERFLOW	(1 << 9)
370 #define ARMV6_PMCR_CCOUNT_OVERFLOW	(1 << 10)
371 #define ARMV6_PMCR_EVT_COUNT0_SHIFT	20
372 #define ARMV6_PMCR_EVT_COUNT0_MASK	(0xFF << ARMV6_PMCR_EVT_COUNT0_SHIFT)
373 #define ARMV6_PMCR_EVT_COUNT1_SHIFT	12
374 #define ARMV6_PMCR_EVT_COUNT1_MASK	(0xFF << ARMV6_PMCR_EVT_COUNT1_SHIFT)
375 
376 #define ARMV6_PMCR_OVERFLOWED_MASK \
377 	(ARMV6_PMCR_COUNT0_OVERFLOW | ARMV6_PMCR_COUNT1_OVERFLOW | \
378 	 ARMV6_PMCR_CCOUNT_OVERFLOW)
379 
380 static inline int
381 armv6_pmcr_has_overflowed(unsigned long pmcr)
382 {
383 	return pmcr & ARMV6_PMCR_OVERFLOWED_MASK;
384 }
385 
386 static inline int
387 armv6_pmcr_counter_has_overflowed(unsigned long pmcr,
388 				  enum armv6_counters counter)
389 {
390 	int ret = 0;
391 
392 	if (ARMV6_CYCLE_COUNTER == counter)
393 		ret = pmcr & ARMV6_PMCR_CCOUNT_OVERFLOW;
394 	else if (ARMV6_COUNTER0 == counter)
395 		ret = pmcr & ARMV6_PMCR_COUNT0_OVERFLOW;
396 	else if (ARMV6_COUNTER1 == counter)
397 		ret = pmcr & ARMV6_PMCR_COUNT1_OVERFLOW;
398 	else
399 		WARN_ONCE(1, "invalid counter number (%d)\n", counter);
400 
401 	return ret;
402 }
403 
404 static inline u32
405 armv6pmu_read_counter(int counter)
406 {
407 	unsigned long value = 0;
408 
409 	if (ARMV6_CYCLE_COUNTER == counter)
410 		asm volatile("mrc   p15, 0, %0, c15, c12, 1" : "=r"(value));
411 	else if (ARMV6_COUNTER0 == counter)
412 		asm volatile("mrc   p15, 0, %0, c15, c12, 2" : "=r"(value));
413 	else if (ARMV6_COUNTER1 == counter)
414 		asm volatile("mrc   p15, 0, %0, c15, c12, 3" : "=r"(value));
415 	else
416 		WARN_ONCE(1, "invalid counter number (%d)\n", counter);
417 
418 	return value;
419 }
420 
421 static inline void
422 armv6pmu_write_counter(int counter,
423 		       u32 value)
424 {
425 	if (ARMV6_CYCLE_COUNTER == counter)
426 		asm volatile("mcr   p15, 0, %0, c15, c12, 1" : : "r"(value));
427 	else if (ARMV6_COUNTER0 == counter)
428 		asm volatile("mcr   p15, 0, %0, c15, c12, 2" : : "r"(value));
429 	else if (ARMV6_COUNTER1 == counter)
430 		asm volatile("mcr   p15, 0, %0, c15, c12, 3" : : "r"(value));
431 	else
432 		WARN_ONCE(1, "invalid counter number (%d)\n", counter);
433 }
434 
435 static void
436 armv6pmu_enable_event(struct hw_perf_event *hwc,
437 		      int idx)
438 {
439 	unsigned long val, mask, evt, flags;
440 	struct pmu_hw_events *events = cpu_pmu->get_hw_events();
441 
442 	if (ARMV6_CYCLE_COUNTER == idx) {
443 		mask	= 0;
444 		evt	= ARMV6_PMCR_CCOUNT_IEN;
445 	} else if (ARMV6_COUNTER0 == idx) {
446 		mask	= ARMV6_PMCR_EVT_COUNT0_MASK;
447 		evt	= (hwc->config_base << ARMV6_PMCR_EVT_COUNT0_SHIFT) |
448 			  ARMV6_PMCR_COUNT0_IEN;
449 	} else if (ARMV6_COUNTER1 == idx) {
450 		mask	= ARMV6_PMCR_EVT_COUNT1_MASK;
451 		evt	= (hwc->config_base << ARMV6_PMCR_EVT_COUNT1_SHIFT) |
452 			  ARMV6_PMCR_COUNT1_IEN;
453 	} else {
454 		WARN_ONCE(1, "invalid counter number (%d)\n", idx);
455 		return;
456 	}
457 
458 	/*
459 	 * Mask out the current event and set the counter to count the event
460 	 * that we're interested in.
461 	 */
462 	raw_spin_lock_irqsave(&events->pmu_lock, flags);
463 	val = armv6_pmcr_read();
464 	val &= ~mask;
465 	val |= evt;
466 	armv6_pmcr_write(val);
467 	raw_spin_unlock_irqrestore(&events->pmu_lock, flags);
468 }
469 
470 static irqreturn_t
471 armv6pmu_handle_irq(int irq_num,
472 		    void *dev)
473 {
474 	unsigned long pmcr = armv6_pmcr_read();
475 	struct perf_sample_data data;
476 	struct pmu_hw_events *cpuc;
477 	struct pt_regs *regs;
478 	int idx;
479 
480 	if (!armv6_pmcr_has_overflowed(pmcr))
481 		return IRQ_NONE;
482 
483 	regs = get_irq_regs();
484 
485 	/*
486 	 * The interrupts are cleared by writing the overflow flags back to
487 	 * the control register. All of the other bits don't have any effect
488 	 * if they are rewritten, so write the whole value back.
489 	 */
490 	armv6_pmcr_write(pmcr);
491 
492 	cpuc = &__get_cpu_var(cpu_hw_events);
493 	for (idx = 0; idx < cpu_pmu->num_events; ++idx) {
494 		struct perf_event *event = cpuc->events[idx];
495 		struct hw_perf_event *hwc;
496 
497 		/* Ignore if we don't have an event. */
498 		if (!event)
499 			continue;
500 
501 		/*
502 		 * We have a single interrupt for all counters. Check that
503 		 * each counter has overflowed before we process it.
504 		 */
505 		if (!armv6_pmcr_counter_has_overflowed(pmcr, idx))
506 			continue;
507 
508 		hwc = &event->hw;
509 		armpmu_event_update(event, hwc, idx);
510 		perf_sample_data_init(&data, 0, hwc->last_period);
511 		if (!armpmu_event_set_period(event, hwc, idx))
512 			continue;
513 
514 		if (perf_event_overflow(event, &data, regs))
515 			cpu_pmu->disable(hwc, idx);
516 	}
517 
518 	/*
519 	 * Handle the pending perf events.
520 	 *
521 	 * Note: this call *must* be run with interrupts disabled. For
522 	 * platforms that can have the PMU interrupts raised as an NMI, this
523 	 * will not work.
524 	 */
525 	irq_work_run();
526 
527 	return IRQ_HANDLED;
528 }
529 
530 static void
531 armv6pmu_start(void)
532 {
533 	unsigned long flags, val;
534 	struct pmu_hw_events *events = cpu_pmu->get_hw_events();
535 
536 	raw_spin_lock_irqsave(&events->pmu_lock, flags);
537 	val = armv6_pmcr_read();
538 	val |= ARMV6_PMCR_ENABLE;
539 	armv6_pmcr_write(val);
540 	raw_spin_unlock_irqrestore(&events->pmu_lock, flags);
541 }
542 
543 static void
544 armv6pmu_stop(void)
545 {
546 	unsigned long flags, val;
547 	struct pmu_hw_events *events = cpu_pmu->get_hw_events();
548 
549 	raw_spin_lock_irqsave(&events->pmu_lock, flags);
550 	val = armv6_pmcr_read();
551 	val &= ~ARMV6_PMCR_ENABLE;
552 	armv6_pmcr_write(val);
553 	raw_spin_unlock_irqrestore(&events->pmu_lock, flags);
554 }
555 
556 static int
557 armv6pmu_get_event_idx(struct pmu_hw_events *cpuc,
558 		       struct hw_perf_event *event)
559 {
560 	/* Always place a cycle counter into the cycle counter. */
561 	if (ARMV6_PERFCTR_CPU_CYCLES == event->config_base) {
562 		if (test_and_set_bit(ARMV6_CYCLE_COUNTER, cpuc->used_mask))
563 			return -EAGAIN;
564 
565 		return ARMV6_CYCLE_COUNTER;
566 	} else {
567 		/*
568 		 * For anything other than a cycle counter, try and use
569 		 * counter0 and counter1.
570 		 */
571 		if (!test_and_set_bit(ARMV6_COUNTER1, cpuc->used_mask))
572 			return ARMV6_COUNTER1;
573 
574 		if (!test_and_set_bit(ARMV6_COUNTER0, cpuc->used_mask))
575 			return ARMV6_COUNTER0;
576 
577 		/* The counters are all in use. */
578 		return -EAGAIN;
579 	}
580 }
581 
582 static void
583 armv6pmu_disable_event(struct hw_perf_event *hwc,
584 		       int idx)
585 {
586 	unsigned long val, mask, evt, flags;
587 	struct pmu_hw_events *events = cpu_pmu->get_hw_events();
588 
589 	if (ARMV6_CYCLE_COUNTER == idx) {
590 		mask	= ARMV6_PMCR_CCOUNT_IEN;
591 		evt	= 0;
592 	} else if (ARMV6_COUNTER0 == idx) {
593 		mask	= ARMV6_PMCR_COUNT0_IEN | ARMV6_PMCR_EVT_COUNT0_MASK;
594 		evt	= ARMV6_PERFCTR_NOP << ARMV6_PMCR_EVT_COUNT0_SHIFT;
595 	} else if (ARMV6_COUNTER1 == idx) {
596 		mask	= ARMV6_PMCR_COUNT1_IEN | ARMV6_PMCR_EVT_COUNT1_MASK;
597 		evt	= ARMV6_PERFCTR_NOP << ARMV6_PMCR_EVT_COUNT1_SHIFT;
598 	} else {
599 		WARN_ONCE(1, "invalid counter number (%d)\n", idx);
600 		return;
601 	}
602 
603 	/*
604 	 * Mask out the current event and set the counter to count the number
605 	 * of ETM bus signal assertion cycles. The external reporting should
606 	 * be disabled and so this should never increment.
607 	 */
608 	raw_spin_lock_irqsave(&events->pmu_lock, flags);
609 	val = armv6_pmcr_read();
610 	val &= ~mask;
611 	val |= evt;
612 	armv6_pmcr_write(val);
613 	raw_spin_unlock_irqrestore(&events->pmu_lock, flags);
614 }
615 
616 static void
617 armv6mpcore_pmu_disable_event(struct hw_perf_event *hwc,
618 			      int idx)
619 {
620 	unsigned long val, mask, flags, evt = 0;
621 	struct pmu_hw_events *events = cpu_pmu->get_hw_events();
622 
623 	if (ARMV6_CYCLE_COUNTER == idx) {
624 		mask	= ARMV6_PMCR_CCOUNT_IEN;
625 	} else if (ARMV6_COUNTER0 == idx) {
626 		mask	= ARMV6_PMCR_COUNT0_IEN;
627 	} else if (ARMV6_COUNTER1 == idx) {
628 		mask	= ARMV6_PMCR_COUNT1_IEN;
629 	} else {
630 		WARN_ONCE(1, "invalid counter number (%d)\n", idx);
631 		return;
632 	}
633 
634 	/*
635 	 * Unlike UP ARMv6, we don't have a way of stopping the counters. We
636 	 * simply disable the interrupt reporting.
637 	 */
638 	raw_spin_lock_irqsave(&events->pmu_lock, flags);
639 	val = armv6_pmcr_read();
640 	val &= ~mask;
641 	val |= evt;
642 	armv6_pmcr_write(val);
643 	raw_spin_unlock_irqrestore(&events->pmu_lock, flags);
644 }
645 
646 static int armv6_map_event(struct perf_event *event)
647 {
648 	return armpmu_map_event(event, &armv6_perf_map,
649 				&armv6_perf_cache_map, 0xFF);
650 }
651 
652 static struct arm_pmu armv6pmu = {
653 	.name			= "v6",
654 	.handle_irq		= armv6pmu_handle_irq,
655 	.enable			= armv6pmu_enable_event,
656 	.disable		= armv6pmu_disable_event,
657 	.read_counter		= armv6pmu_read_counter,
658 	.write_counter		= armv6pmu_write_counter,
659 	.get_event_idx		= armv6pmu_get_event_idx,
660 	.start			= armv6pmu_start,
661 	.stop			= armv6pmu_stop,
662 	.map_event		= armv6_map_event,
663 	.num_events		= 3,
664 	.max_period		= (1LLU << 32) - 1,
665 };
666 
667 static struct arm_pmu *__devinit armv6pmu_init(void)
668 {
669 	return &armv6pmu;
670 }
671 
672 /*
673  * ARMv6mpcore is almost identical to single core ARMv6 with the exception
674  * that some of the events have different enumerations and that there is no
675  * *hack* to stop the programmable counters. To stop the counters we simply
676  * disable the interrupt reporting and update the event. When unthrottling we
677  * reset the period and enable the interrupt reporting.
678  */
679 
680 static int armv6mpcore_map_event(struct perf_event *event)
681 {
682 	return armpmu_map_event(event, &armv6mpcore_perf_map,
683 				&armv6mpcore_perf_cache_map, 0xFF);
684 }
685 
686 static struct arm_pmu armv6mpcore_pmu = {
687 	.name			= "v6mpcore",
688 	.handle_irq		= armv6pmu_handle_irq,
689 	.enable			= armv6pmu_enable_event,
690 	.disable		= armv6mpcore_pmu_disable_event,
691 	.read_counter		= armv6pmu_read_counter,
692 	.write_counter		= armv6pmu_write_counter,
693 	.get_event_idx		= armv6pmu_get_event_idx,
694 	.start			= armv6pmu_start,
695 	.stop			= armv6pmu_stop,
696 	.map_event		= armv6mpcore_map_event,
697 	.num_events		= 3,
698 	.max_period		= (1LLU << 32) - 1,
699 };
700 
701 static struct arm_pmu *__devinit armv6mpcore_pmu_init(void)
702 {
703 	return &armv6mpcore_pmu;
704 }
705 #else
706 static struct arm_pmu *__devinit armv6pmu_init(void)
707 {
708 	return NULL;
709 }
710 
711 static struct arm_pmu *__devinit armv6mpcore_pmu_init(void)
712 {
713 	return NULL;
714 }
715 #endif	/* CONFIG_CPU_V6 || CONFIG_CPU_V6K */
716