xref: /openbmc/linux/arch/arm/kernel/perf_event_v6.c (revision df2634f43f5106947f3735a0b61a6527a4b278cd)
1 /*
2  * ARMv6 Performance counter handling code.
3  *
4  * Copyright (C) 2009 picoChip Designs, Ltd., Jamie Iles
5  *
6  * ARMv6 has 2 configurable performance counters and a single cycle counter.
7  * They all share a single reset bit but can be written to zero so we can use
8  * that for a reset.
9  *
10  * The counters can't be individually enabled or disabled so when we remove
11  * one event and replace it with another we could get spurious counts from the
12  * wrong event. However, we can take advantage of the fact that the
13  * performance counters can export events to the event bus, and the event bus
14  * itself can be monitored. This requires that we *don't* export the events to
15  * the event bus. The procedure for disabling a configurable counter is:
16  *	- change the counter to count the ETMEXTOUT[0] signal (0x20). This
17  *	  effectively stops the counter from counting.
18  *	- disable the counter's interrupt generation (each counter has it's
19  *	  own interrupt enable bit).
20  * Once stopped, the counter value can be written as 0 to reset.
21  *
22  * To enable a counter:
23  *	- enable the counter's interrupt generation.
24  *	- set the new event type.
25  *
26  * Note: the dedicated cycle counter only counts cycles and can't be
27  * enabled/disabled independently of the others. When we want to disable the
28  * cycle counter, we have to just disable the interrupt reporting and start
29  * ignoring that counter. When re-enabling, we have to reset the value and
30  * enable the interrupt.
31  */
32 
33 #ifdef CONFIG_CPU_V6
34 enum armv6_perf_types {
35 	ARMV6_PERFCTR_ICACHE_MISS	    = 0x0,
36 	ARMV6_PERFCTR_IBUF_STALL	    = 0x1,
37 	ARMV6_PERFCTR_DDEP_STALL	    = 0x2,
38 	ARMV6_PERFCTR_ITLB_MISS		    = 0x3,
39 	ARMV6_PERFCTR_DTLB_MISS		    = 0x4,
40 	ARMV6_PERFCTR_BR_EXEC		    = 0x5,
41 	ARMV6_PERFCTR_BR_MISPREDICT	    = 0x6,
42 	ARMV6_PERFCTR_INSTR_EXEC	    = 0x7,
43 	ARMV6_PERFCTR_DCACHE_HIT	    = 0x9,
44 	ARMV6_PERFCTR_DCACHE_ACCESS	    = 0xA,
45 	ARMV6_PERFCTR_DCACHE_MISS	    = 0xB,
46 	ARMV6_PERFCTR_DCACHE_WBACK	    = 0xC,
47 	ARMV6_PERFCTR_SW_PC_CHANGE	    = 0xD,
48 	ARMV6_PERFCTR_MAIN_TLB_MISS	    = 0xF,
49 	ARMV6_PERFCTR_EXPL_D_ACCESS	    = 0x10,
50 	ARMV6_PERFCTR_LSU_FULL_STALL	    = 0x11,
51 	ARMV6_PERFCTR_WBUF_DRAINED	    = 0x12,
52 	ARMV6_PERFCTR_CPU_CYCLES	    = 0xFF,
53 	ARMV6_PERFCTR_NOP		    = 0x20,
54 };
55 
56 enum armv6_counters {
57 	ARMV6_CYCLE_COUNTER = 1,
58 	ARMV6_COUNTER0,
59 	ARMV6_COUNTER1,
60 };
61 
62 /*
63  * The hardware events that we support. We do support cache operations but
64  * we have harvard caches and no way to combine instruction and data
65  * accesses/misses in hardware.
66  */
67 static const unsigned armv6_perf_map[PERF_COUNT_HW_MAX] = {
68 	[PERF_COUNT_HW_CPU_CYCLES]	    = ARMV6_PERFCTR_CPU_CYCLES,
69 	[PERF_COUNT_HW_INSTRUCTIONS]	    = ARMV6_PERFCTR_INSTR_EXEC,
70 	[PERF_COUNT_HW_CACHE_REFERENCES]    = HW_OP_UNSUPPORTED,
71 	[PERF_COUNT_HW_CACHE_MISSES]	    = HW_OP_UNSUPPORTED,
72 	[PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = ARMV6_PERFCTR_BR_EXEC,
73 	[PERF_COUNT_HW_BRANCH_MISSES]	    = ARMV6_PERFCTR_BR_MISPREDICT,
74 	[PERF_COUNT_HW_BUS_CYCLES]	    = HW_OP_UNSUPPORTED,
75 };
76 
77 static const unsigned armv6_perf_cache_map[PERF_COUNT_HW_CACHE_MAX]
78 					  [PERF_COUNT_HW_CACHE_OP_MAX]
79 					  [PERF_COUNT_HW_CACHE_RESULT_MAX] = {
80 	[C(L1D)] = {
81 		/*
82 		 * The performance counters don't differentiate between read
83 		 * and write accesses/misses so this isn't strictly correct,
84 		 * but it's the best we can do. Writes and reads get
85 		 * combined.
86 		 */
87 		[C(OP_READ)] = {
88 			[C(RESULT_ACCESS)]	= ARMV6_PERFCTR_DCACHE_ACCESS,
89 			[C(RESULT_MISS)]	= ARMV6_PERFCTR_DCACHE_MISS,
90 		},
91 		[C(OP_WRITE)] = {
92 			[C(RESULT_ACCESS)]	= ARMV6_PERFCTR_DCACHE_ACCESS,
93 			[C(RESULT_MISS)]	= ARMV6_PERFCTR_DCACHE_MISS,
94 		},
95 		[C(OP_PREFETCH)] = {
96 			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
97 			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED,
98 		},
99 	},
100 	[C(L1I)] = {
101 		[C(OP_READ)] = {
102 			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
103 			[C(RESULT_MISS)]	= ARMV6_PERFCTR_ICACHE_MISS,
104 		},
105 		[C(OP_WRITE)] = {
106 			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
107 			[C(RESULT_MISS)]	= ARMV6_PERFCTR_ICACHE_MISS,
108 		},
109 		[C(OP_PREFETCH)] = {
110 			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
111 			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED,
112 		},
113 	},
114 	[C(LL)] = {
115 		[C(OP_READ)] = {
116 			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
117 			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED,
118 		},
119 		[C(OP_WRITE)] = {
120 			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
121 			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED,
122 		},
123 		[C(OP_PREFETCH)] = {
124 			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
125 			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED,
126 		},
127 	},
128 	[C(DTLB)] = {
129 		/*
130 		 * The ARM performance counters can count micro DTLB misses,
131 		 * micro ITLB misses and main TLB misses. There isn't an event
132 		 * for TLB misses, so use the micro misses here and if users
133 		 * want the main TLB misses they can use a raw counter.
134 		 */
135 		[C(OP_READ)] = {
136 			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
137 			[C(RESULT_MISS)]	= ARMV6_PERFCTR_DTLB_MISS,
138 		},
139 		[C(OP_WRITE)] = {
140 			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
141 			[C(RESULT_MISS)]	= ARMV6_PERFCTR_DTLB_MISS,
142 		},
143 		[C(OP_PREFETCH)] = {
144 			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
145 			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED,
146 		},
147 	},
148 	[C(ITLB)] = {
149 		[C(OP_READ)] = {
150 			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
151 			[C(RESULT_MISS)]	= ARMV6_PERFCTR_ITLB_MISS,
152 		},
153 		[C(OP_WRITE)] = {
154 			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
155 			[C(RESULT_MISS)]	= ARMV6_PERFCTR_ITLB_MISS,
156 		},
157 		[C(OP_PREFETCH)] = {
158 			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
159 			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED,
160 		},
161 	},
162 	[C(BPU)] = {
163 		[C(OP_READ)] = {
164 			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
165 			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED,
166 		},
167 		[C(OP_WRITE)] = {
168 			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
169 			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED,
170 		},
171 		[C(OP_PREFETCH)] = {
172 			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
173 			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED,
174 		},
175 	},
176 };
177 
178 enum armv6mpcore_perf_types {
179 	ARMV6MPCORE_PERFCTR_ICACHE_MISS	    = 0x0,
180 	ARMV6MPCORE_PERFCTR_IBUF_STALL	    = 0x1,
181 	ARMV6MPCORE_PERFCTR_DDEP_STALL	    = 0x2,
182 	ARMV6MPCORE_PERFCTR_ITLB_MISS	    = 0x3,
183 	ARMV6MPCORE_PERFCTR_DTLB_MISS	    = 0x4,
184 	ARMV6MPCORE_PERFCTR_BR_EXEC	    = 0x5,
185 	ARMV6MPCORE_PERFCTR_BR_NOTPREDICT   = 0x6,
186 	ARMV6MPCORE_PERFCTR_BR_MISPREDICT   = 0x7,
187 	ARMV6MPCORE_PERFCTR_INSTR_EXEC	    = 0x8,
188 	ARMV6MPCORE_PERFCTR_DCACHE_RDACCESS = 0xA,
189 	ARMV6MPCORE_PERFCTR_DCACHE_RDMISS   = 0xB,
190 	ARMV6MPCORE_PERFCTR_DCACHE_WRACCESS = 0xC,
191 	ARMV6MPCORE_PERFCTR_DCACHE_WRMISS   = 0xD,
192 	ARMV6MPCORE_PERFCTR_DCACHE_EVICTION = 0xE,
193 	ARMV6MPCORE_PERFCTR_SW_PC_CHANGE    = 0xF,
194 	ARMV6MPCORE_PERFCTR_MAIN_TLB_MISS   = 0x10,
195 	ARMV6MPCORE_PERFCTR_EXPL_MEM_ACCESS = 0x11,
196 	ARMV6MPCORE_PERFCTR_LSU_FULL_STALL  = 0x12,
197 	ARMV6MPCORE_PERFCTR_WBUF_DRAINED    = 0x13,
198 	ARMV6MPCORE_PERFCTR_CPU_CYCLES	    = 0xFF,
199 };
200 
201 /*
202  * The hardware events that we support. We do support cache operations but
203  * we have harvard caches and no way to combine instruction and data
204  * accesses/misses in hardware.
205  */
206 static const unsigned armv6mpcore_perf_map[PERF_COUNT_HW_MAX] = {
207 	[PERF_COUNT_HW_CPU_CYCLES]	    = ARMV6MPCORE_PERFCTR_CPU_CYCLES,
208 	[PERF_COUNT_HW_INSTRUCTIONS]	    = ARMV6MPCORE_PERFCTR_INSTR_EXEC,
209 	[PERF_COUNT_HW_CACHE_REFERENCES]    = HW_OP_UNSUPPORTED,
210 	[PERF_COUNT_HW_CACHE_MISSES]	    = HW_OP_UNSUPPORTED,
211 	[PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = ARMV6MPCORE_PERFCTR_BR_EXEC,
212 	[PERF_COUNT_HW_BRANCH_MISSES]	    = ARMV6MPCORE_PERFCTR_BR_MISPREDICT,
213 	[PERF_COUNT_HW_BUS_CYCLES]	    = HW_OP_UNSUPPORTED,
214 };
215 
216 static const unsigned armv6mpcore_perf_cache_map[PERF_COUNT_HW_CACHE_MAX]
217 					[PERF_COUNT_HW_CACHE_OP_MAX]
218 					[PERF_COUNT_HW_CACHE_RESULT_MAX] = {
219 	[C(L1D)] = {
220 		[C(OP_READ)] = {
221 			[C(RESULT_ACCESS)]  =
222 				ARMV6MPCORE_PERFCTR_DCACHE_RDACCESS,
223 			[C(RESULT_MISS)]    =
224 				ARMV6MPCORE_PERFCTR_DCACHE_RDMISS,
225 		},
226 		[C(OP_WRITE)] = {
227 			[C(RESULT_ACCESS)]  =
228 				ARMV6MPCORE_PERFCTR_DCACHE_WRACCESS,
229 			[C(RESULT_MISS)]    =
230 				ARMV6MPCORE_PERFCTR_DCACHE_WRMISS,
231 		},
232 		[C(OP_PREFETCH)] = {
233 			[C(RESULT_ACCESS)]  = CACHE_OP_UNSUPPORTED,
234 			[C(RESULT_MISS)]    = CACHE_OP_UNSUPPORTED,
235 		},
236 	},
237 	[C(L1I)] = {
238 		[C(OP_READ)] = {
239 			[C(RESULT_ACCESS)]  = CACHE_OP_UNSUPPORTED,
240 			[C(RESULT_MISS)]    = ARMV6MPCORE_PERFCTR_ICACHE_MISS,
241 		},
242 		[C(OP_WRITE)] = {
243 			[C(RESULT_ACCESS)]  = CACHE_OP_UNSUPPORTED,
244 			[C(RESULT_MISS)]    = ARMV6MPCORE_PERFCTR_ICACHE_MISS,
245 		},
246 		[C(OP_PREFETCH)] = {
247 			[C(RESULT_ACCESS)]  = CACHE_OP_UNSUPPORTED,
248 			[C(RESULT_MISS)]    = CACHE_OP_UNSUPPORTED,
249 		},
250 	},
251 	[C(LL)] = {
252 		[C(OP_READ)] = {
253 			[C(RESULT_ACCESS)]  = CACHE_OP_UNSUPPORTED,
254 			[C(RESULT_MISS)]    = CACHE_OP_UNSUPPORTED,
255 		},
256 		[C(OP_WRITE)] = {
257 			[C(RESULT_ACCESS)]  = CACHE_OP_UNSUPPORTED,
258 			[C(RESULT_MISS)]    = CACHE_OP_UNSUPPORTED,
259 		},
260 		[C(OP_PREFETCH)] = {
261 			[C(RESULT_ACCESS)]  = CACHE_OP_UNSUPPORTED,
262 			[C(RESULT_MISS)]    = CACHE_OP_UNSUPPORTED,
263 		},
264 	},
265 	[C(DTLB)] = {
266 		/*
267 		 * The ARM performance counters can count micro DTLB misses,
268 		 * micro ITLB misses and main TLB misses. There isn't an event
269 		 * for TLB misses, so use the micro misses here and if users
270 		 * want the main TLB misses they can use a raw counter.
271 		 */
272 		[C(OP_READ)] = {
273 			[C(RESULT_ACCESS)]  = CACHE_OP_UNSUPPORTED,
274 			[C(RESULT_MISS)]    = ARMV6MPCORE_PERFCTR_DTLB_MISS,
275 		},
276 		[C(OP_WRITE)] = {
277 			[C(RESULT_ACCESS)]  = CACHE_OP_UNSUPPORTED,
278 			[C(RESULT_MISS)]    = ARMV6MPCORE_PERFCTR_DTLB_MISS,
279 		},
280 		[C(OP_PREFETCH)] = {
281 			[C(RESULT_ACCESS)]  = CACHE_OP_UNSUPPORTED,
282 			[C(RESULT_MISS)]    = CACHE_OP_UNSUPPORTED,
283 		},
284 	},
285 	[C(ITLB)] = {
286 		[C(OP_READ)] = {
287 			[C(RESULT_ACCESS)]  = CACHE_OP_UNSUPPORTED,
288 			[C(RESULT_MISS)]    = ARMV6MPCORE_PERFCTR_ITLB_MISS,
289 		},
290 		[C(OP_WRITE)] = {
291 			[C(RESULT_ACCESS)]  = CACHE_OP_UNSUPPORTED,
292 			[C(RESULT_MISS)]    = ARMV6MPCORE_PERFCTR_ITLB_MISS,
293 		},
294 		[C(OP_PREFETCH)] = {
295 			[C(RESULT_ACCESS)]  = CACHE_OP_UNSUPPORTED,
296 			[C(RESULT_MISS)]    = CACHE_OP_UNSUPPORTED,
297 		},
298 	},
299 	[C(BPU)] = {
300 		[C(OP_READ)] = {
301 			[C(RESULT_ACCESS)]  = CACHE_OP_UNSUPPORTED,
302 			[C(RESULT_MISS)]    = CACHE_OP_UNSUPPORTED,
303 		},
304 		[C(OP_WRITE)] = {
305 			[C(RESULT_ACCESS)]  = CACHE_OP_UNSUPPORTED,
306 			[C(RESULT_MISS)]    = CACHE_OP_UNSUPPORTED,
307 		},
308 		[C(OP_PREFETCH)] = {
309 			[C(RESULT_ACCESS)]  = CACHE_OP_UNSUPPORTED,
310 			[C(RESULT_MISS)]    = CACHE_OP_UNSUPPORTED,
311 		},
312 	},
313 };
314 
315 static inline unsigned long
316 armv6_pmcr_read(void)
317 {
318 	u32 val;
319 	asm volatile("mrc   p15, 0, %0, c15, c12, 0" : "=r"(val));
320 	return val;
321 }
322 
323 static inline void
324 armv6_pmcr_write(unsigned long val)
325 {
326 	asm volatile("mcr   p15, 0, %0, c15, c12, 0" : : "r"(val));
327 }
328 
329 #define ARMV6_PMCR_ENABLE		(1 << 0)
330 #define ARMV6_PMCR_CTR01_RESET		(1 << 1)
331 #define ARMV6_PMCR_CCOUNT_RESET		(1 << 2)
332 #define ARMV6_PMCR_CCOUNT_DIV		(1 << 3)
333 #define ARMV6_PMCR_COUNT0_IEN		(1 << 4)
334 #define ARMV6_PMCR_COUNT1_IEN		(1 << 5)
335 #define ARMV6_PMCR_CCOUNT_IEN		(1 << 6)
336 #define ARMV6_PMCR_COUNT0_OVERFLOW	(1 << 8)
337 #define ARMV6_PMCR_COUNT1_OVERFLOW	(1 << 9)
338 #define ARMV6_PMCR_CCOUNT_OVERFLOW	(1 << 10)
339 #define ARMV6_PMCR_EVT_COUNT0_SHIFT	20
340 #define ARMV6_PMCR_EVT_COUNT0_MASK	(0xFF << ARMV6_PMCR_EVT_COUNT0_SHIFT)
341 #define ARMV6_PMCR_EVT_COUNT1_SHIFT	12
342 #define ARMV6_PMCR_EVT_COUNT1_MASK	(0xFF << ARMV6_PMCR_EVT_COUNT1_SHIFT)
343 
344 #define ARMV6_PMCR_OVERFLOWED_MASK \
345 	(ARMV6_PMCR_COUNT0_OVERFLOW | ARMV6_PMCR_COUNT1_OVERFLOW | \
346 	 ARMV6_PMCR_CCOUNT_OVERFLOW)
347 
348 static inline int
349 armv6_pmcr_has_overflowed(unsigned long pmcr)
350 {
351 	return pmcr & ARMV6_PMCR_OVERFLOWED_MASK;
352 }
353 
354 static inline int
355 armv6_pmcr_counter_has_overflowed(unsigned long pmcr,
356 				  enum armv6_counters counter)
357 {
358 	int ret = 0;
359 
360 	if (ARMV6_CYCLE_COUNTER == counter)
361 		ret = pmcr & ARMV6_PMCR_CCOUNT_OVERFLOW;
362 	else if (ARMV6_COUNTER0 == counter)
363 		ret = pmcr & ARMV6_PMCR_COUNT0_OVERFLOW;
364 	else if (ARMV6_COUNTER1 == counter)
365 		ret = pmcr & ARMV6_PMCR_COUNT1_OVERFLOW;
366 	else
367 		WARN_ONCE(1, "invalid counter number (%d)\n", counter);
368 
369 	return ret;
370 }
371 
372 static inline u32
373 armv6pmu_read_counter(int counter)
374 {
375 	unsigned long value = 0;
376 
377 	if (ARMV6_CYCLE_COUNTER == counter)
378 		asm volatile("mrc   p15, 0, %0, c15, c12, 1" : "=r"(value));
379 	else if (ARMV6_COUNTER0 == counter)
380 		asm volatile("mrc   p15, 0, %0, c15, c12, 2" : "=r"(value));
381 	else if (ARMV6_COUNTER1 == counter)
382 		asm volatile("mrc   p15, 0, %0, c15, c12, 3" : "=r"(value));
383 	else
384 		WARN_ONCE(1, "invalid counter number (%d)\n", counter);
385 
386 	return value;
387 }
388 
389 static inline void
390 armv6pmu_write_counter(int counter,
391 		       u32 value)
392 {
393 	if (ARMV6_CYCLE_COUNTER == counter)
394 		asm volatile("mcr   p15, 0, %0, c15, c12, 1" : : "r"(value));
395 	else if (ARMV6_COUNTER0 == counter)
396 		asm volatile("mcr   p15, 0, %0, c15, c12, 2" : : "r"(value));
397 	else if (ARMV6_COUNTER1 == counter)
398 		asm volatile("mcr   p15, 0, %0, c15, c12, 3" : : "r"(value));
399 	else
400 		WARN_ONCE(1, "invalid counter number (%d)\n", counter);
401 }
402 
403 static void
404 armv6pmu_enable_event(struct hw_perf_event *hwc,
405 		      int idx)
406 {
407 	unsigned long val, mask, evt, flags;
408 
409 	if (ARMV6_CYCLE_COUNTER == idx) {
410 		mask	= 0;
411 		evt	= ARMV6_PMCR_CCOUNT_IEN;
412 	} else if (ARMV6_COUNTER0 == idx) {
413 		mask	= ARMV6_PMCR_EVT_COUNT0_MASK;
414 		evt	= (hwc->config_base << ARMV6_PMCR_EVT_COUNT0_SHIFT) |
415 			  ARMV6_PMCR_COUNT0_IEN;
416 	} else if (ARMV6_COUNTER1 == idx) {
417 		mask	= ARMV6_PMCR_EVT_COUNT1_MASK;
418 		evt	= (hwc->config_base << ARMV6_PMCR_EVT_COUNT1_SHIFT) |
419 			  ARMV6_PMCR_COUNT1_IEN;
420 	} else {
421 		WARN_ONCE(1, "invalid counter number (%d)\n", idx);
422 		return;
423 	}
424 
425 	/*
426 	 * Mask out the current event and set the counter to count the event
427 	 * that we're interested in.
428 	 */
429 	raw_spin_lock_irqsave(&pmu_lock, flags);
430 	val = armv6_pmcr_read();
431 	val &= ~mask;
432 	val |= evt;
433 	armv6_pmcr_write(val);
434 	raw_spin_unlock_irqrestore(&pmu_lock, flags);
435 }
436 
437 static irqreturn_t
438 armv6pmu_handle_irq(int irq_num,
439 		    void *dev)
440 {
441 	unsigned long pmcr = armv6_pmcr_read();
442 	struct perf_sample_data data;
443 	struct cpu_hw_events *cpuc;
444 	struct pt_regs *regs;
445 	int idx;
446 
447 	if (!armv6_pmcr_has_overflowed(pmcr))
448 		return IRQ_NONE;
449 
450 	regs = get_irq_regs();
451 
452 	/*
453 	 * The interrupts are cleared by writing the overflow flags back to
454 	 * the control register. All of the other bits don't have any effect
455 	 * if they are rewritten, so write the whole value back.
456 	 */
457 	armv6_pmcr_write(pmcr);
458 
459 	perf_sample_data_init(&data, 0);
460 
461 	cpuc = &__get_cpu_var(cpu_hw_events);
462 	for (idx = 0; idx <= armpmu->num_events; ++idx) {
463 		struct perf_event *event = cpuc->events[idx];
464 		struct hw_perf_event *hwc;
465 
466 		if (!test_bit(idx, cpuc->active_mask))
467 			continue;
468 
469 		/*
470 		 * We have a single interrupt for all counters. Check that
471 		 * each counter has overflowed before we process it.
472 		 */
473 		if (!armv6_pmcr_counter_has_overflowed(pmcr, idx))
474 			continue;
475 
476 		hwc = &event->hw;
477 		armpmu_event_update(event, hwc, idx);
478 		data.period = event->hw.last_period;
479 		if (!armpmu_event_set_period(event, hwc, idx))
480 			continue;
481 
482 		if (perf_event_overflow(event, 0, &data, regs))
483 			armpmu->disable(hwc, idx);
484 	}
485 
486 	/*
487 	 * Handle the pending perf events.
488 	 *
489 	 * Note: this call *must* be run with interrupts disabled. For
490 	 * platforms that can have the PMU interrupts raised as an NMI, this
491 	 * will not work.
492 	 */
493 	irq_work_run();
494 
495 	return IRQ_HANDLED;
496 }
497 
498 static void
499 armv6pmu_start(void)
500 {
501 	unsigned long flags, val;
502 
503 	raw_spin_lock_irqsave(&pmu_lock, flags);
504 	val = armv6_pmcr_read();
505 	val |= ARMV6_PMCR_ENABLE;
506 	armv6_pmcr_write(val);
507 	raw_spin_unlock_irqrestore(&pmu_lock, flags);
508 }
509 
510 static void
511 armv6pmu_stop(void)
512 {
513 	unsigned long flags, val;
514 
515 	raw_spin_lock_irqsave(&pmu_lock, flags);
516 	val = armv6_pmcr_read();
517 	val &= ~ARMV6_PMCR_ENABLE;
518 	armv6_pmcr_write(val);
519 	raw_spin_unlock_irqrestore(&pmu_lock, flags);
520 }
521 
522 static int
523 armv6pmu_get_event_idx(struct cpu_hw_events *cpuc,
524 		       struct hw_perf_event *event)
525 {
526 	/* Always place a cycle counter into the cycle counter. */
527 	if (ARMV6_PERFCTR_CPU_CYCLES == event->config_base) {
528 		if (test_and_set_bit(ARMV6_CYCLE_COUNTER, cpuc->used_mask))
529 			return -EAGAIN;
530 
531 		return ARMV6_CYCLE_COUNTER;
532 	} else {
533 		/*
534 		 * For anything other than a cycle counter, try and use
535 		 * counter0 and counter1.
536 		 */
537 		if (!test_and_set_bit(ARMV6_COUNTER1, cpuc->used_mask))
538 			return ARMV6_COUNTER1;
539 
540 		if (!test_and_set_bit(ARMV6_COUNTER0, cpuc->used_mask))
541 			return ARMV6_COUNTER0;
542 
543 		/* The counters are all in use. */
544 		return -EAGAIN;
545 	}
546 }
547 
548 static void
549 armv6pmu_disable_event(struct hw_perf_event *hwc,
550 		       int idx)
551 {
552 	unsigned long val, mask, evt, flags;
553 
554 	if (ARMV6_CYCLE_COUNTER == idx) {
555 		mask	= ARMV6_PMCR_CCOUNT_IEN;
556 		evt	= 0;
557 	} else if (ARMV6_COUNTER0 == idx) {
558 		mask	= ARMV6_PMCR_COUNT0_IEN | ARMV6_PMCR_EVT_COUNT0_MASK;
559 		evt	= ARMV6_PERFCTR_NOP << ARMV6_PMCR_EVT_COUNT0_SHIFT;
560 	} else if (ARMV6_COUNTER1 == idx) {
561 		mask	= ARMV6_PMCR_COUNT1_IEN | ARMV6_PMCR_EVT_COUNT1_MASK;
562 		evt	= ARMV6_PERFCTR_NOP << ARMV6_PMCR_EVT_COUNT1_SHIFT;
563 	} else {
564 		WARN_ONCE(1, "invalid counter number (%d)\n", idx);
565 		return;
566 	}
567 
568 	/*
569 	 * Mask out the current event and set the counter to count the number
570 	 * of ETM bus signal assertion cycles. The external reporting should
571 	 * be disabled and so this should never increment.
572 	 */
573 	raw_spin_lock_irqsave(&pmu_lock, flags);
574 	val = armv6_pmcr_read();
575 	val &= ~mask;
576 	val |= evt;
577 	armv6_pmcr_write(val);
578 	raw_spin_unlock_irqrestore(&pmu_lock, flags);
579 }
580 
581 static void
582 armv6mpcore_pmu_disable_event(struct hw_perf_event *hwc,
583 			      int idx)
584 {
585 	unsigned long val, mask, flags, evt = 0;
586 
587 	if (ARMV6_CYCLE_COUNTER == idx) {
588 		mask	= ARMV6_PMCR_CCOUNT_IEN;
589 	} else if (ARMV6_COUNTER0 == idx) {
590 		mask	= ARMV6_PMCR_COUNT0_IEN;
591 	} else if (ARMV6_COUNTER1 == idx) {
592 		mask	= ARMV6_PMCR_COUNT1_IEN;
593 	} else {
594 		WARN_ONCE(1, "invalid counter number (%d)\n", idx);
595 		return;
596 	}
597 
598 	/*
599 	 * Unlike UP ARMv6, we don't have a way of stopping the counters. We
600 	 * simply disable the interrupt reporting.
601 	 */
602 	raw_spin_lock_irqsave(&pmu_lock, flags);
603 	val = armv6_pmcr_read();
604 	val &= ~mask;
605 	val |= evt;
606 	armv6_pmcr_write(val);
607 	raw_spin_unlock_irqrestore(&pmu_lock, flags);
608 }
609 
610 static const struct arm_pmu armv6pmu = {
611 	.id			= ARM_PERF_PMU_ID_V6,
612 	.name			= "v6",
613 	.handle_irq		= armv6pmu_handle_irq,
614 	.enable			= armv6pmu_enable_event,
615 	.disable		= armv6pmu_disable_event,
616 	.read_counter		= armv6pmu_read_counter,
617 	.write_counter		= armv6pmu_write_counter,
618 	.get_event_idx		= armv6pmu_get_event_idx,
619 	.start			= armv6pmu_start,
620 	.stop			= armv6pmu_stop,
621 	.cache_map		= &armv6_perf_cache_map,
622 	.event_map		= &armv6_perf_map,
623 	.raw_event_mask		= 0xFF,
624 	.num_events		= 3,
625 	.max_period		= (1LLU << 32) - 1,
626 };
627 
628 static const struct arm_pmu *__init armv6pmu_init(void)
629 {
630 	return &armv6pmu;
631 }
632 
633 /*
634  * ARMv6mpcore is almost identical to single core ARMv6 with the exception
635  * that some of the events have different enumerations and that there is no
636  * *hack* to stop the programmable counters. To stop the counters we simply
637  * disable the interrupt reporting and update the event. When unthrottling we
638  * reset the period and enable the interrupt reporting.
639  */
640 static const struct arm_pmu armv6mpcore_pmu = {
641 	.id			= ARM_PERF_PMU_ID_V6MP,
642 	.name			= "v6mpcore",
643 	.handle_irq		= armv6pmu_handle_irq,
644 	.enable			= armv6pmu_enable_event,
645 	.disable		= armv6mpcore_pmu_disable_event,
646 	.read_counter		= armv6pmu_read_counter,
647 	.write_counter		= armv6pmu_write_counter,
648 	.get_event_idx		= armv6pmu_get_event_idx,
649 	.start			= armv6pmu_start,
650 	.stop			= armv6pmu_stop,
651 	.cache_map		= &armv6mpcore_perf_cache_map,
652 	.event_map		= &armv6mpcore_perf_map,
653 	.raw_event_mask		= 0xFF,
654 	.num_events		= 3,
655 	.max_period		= (1LLU << 32) - 1,
656 };
657 
658 static const struct arm_pmu *__init armv6mpcore_pmu_init(void)
659 {
660 	return &armv6mpcore_pmu;
661 }
662 #else
663 static const struct arm_pmu *__init armv6pmu_init(void)
664 {
665 	return NULL;
666 }
667 
668 static const struct arm_pmu *__init armv6mpcore_pmu_init(void)
669 {
670 	return NULL;
671 }
672 #endif	/* CONFIG_CPU_V6 */
673