xref: /openbmc/linux/arch/arm/kernel/perf_event_v6.c (revision 8fdff1dc)
1 /*
2  * ARMv6 Performance counter handling code.
3  *
4  * Copyright (C) 2009 picoChip Designs, Ltd., Jamie Iles
5  *
6  * ARMv6 has 2 configurable performance counters and a single cycle counter.
7  * They all share a single reset bit but can be written to zero so we can use
8  * that for a reset.
9  *
10  * The counters can't be individually enabled or disabled so when we remove
11  * one event and replace it with another we could get spurious counts from the
12  * wrong event. However, we can take advantage of the fact that the
13  * performance counters can export events to the event bus, and the event bus
14  * itself can be monitored. This requires that we *don't* export the events to
15  * the event bus. The procedure for disabling a configurable counter is:
16  *	- change the counter to count the ETMEXTOUT[0] signal (0x20). This
17  *	  effectively stops the counter from counting.
18  *	- disable the counter's interrupt generation (each counter has it's
19  *	  own interrupt enable bit).
20  * Once stopped, the counter value can be written as 0 to reset.
21  *
22  * To enable a counter:
23  *	- enable the counter's interrupt generation.
24  *	- set the new event type.
25  *
26  * Note: the dedicated cycle counter only counts cycles and can't be
27  * enabled/disabled independently of the others. When we want to disable the
28  * cycle counter, we have to just disable the interrupt reporting and start
29  * ignoring that counter. When re-enabling, we have to reset the value and
30  * enable the interrupt.
31  */
32 
33 #if defined(CONFIG_CPU_V6) || defined(CONFIG_CPU_V6K)
34 enum armv6_perf_types {
35 	ARMV6_PERFCTR_ICACHE_MISS	    = 0x0,
36 	ARMV6_PERFCTR_IBUF_STALL	    = 0x1,
37 	ARMV6_PERFCTR_DDEP_STALL	    = 0x2,
38 	ARMV6_PERFCTR_ITLB_MISS		    = 0x3,
39 	ARMV6_PERFCTR_DTLB_MISS		    = 0x4,
40 	ARMV6_PERFCTR_BR_EXEC		    = 0x5,
41 	ARMV6_PERFCTR_BR_MISPREDICT	    = 0x6,
42 	ARMV6_PERFCTR_INSTR_EXEC	    = 0x7,
43 	ARMV6_PERFCTR_DCACHE_HIT	    = 0x9,
44 	ARMV6_PERFCTR_DCACHE_ACCESS	    = 0xA,
45 	ARMV6_PERFCTR_DCACHE_MISS	    = 0xB,
46 	ARMV6_PERFCTR_DCACHE_WBACK	    = 0xC,
47 	ARMV6_PERFCTR_SW_PC_CHANGE	    = 0xD,
48 	ARMV6_PERFCTR_MAIN_TLB_MISS	    = 0xF,
49 	ARMV6_PERFCTR_EXPL_D_ACCESS	    = 0x10,
50 	ARMV6_PERFCTR_LSU_FULL_STALL	    = 0x11,
51 	ARMV6_PERFCTR_WBUF_DRAINED	    = 0x12,
52 	ARMV6_PERFCTR_CPU_CYCLES	    = 0xFF,
53 	ARMV6_PERFCTR_NOP		    = 0x20,
54 };
55 
56 enum armv6_counters {
57 	ARMV6_CYCLE_COUNTER = 0,
58 	ARMV6_COUNTER0,
59 	ARMV6_COUNTER1,
60 };
61 
62 /*
63  * The hardware events that we support. We do support cache operations but
64  * we have harvard caches and no way to combine instruction and data
65  * accesses/misses in hardware.
66  */
67 static const unsigned armv6_perf_map[PERF_COUNT_HW_MAX] = {
68 	[PERF_COUNT_HW_CPU_CYCLES]		= ARMV6_PERFCTR_CPU_CYCLES,
69 	[PERF_COUNT_HW_INSTRUCTIONS]		= ARMV6_PERFCTR_INSTR_EXEC,
70 	[PERF_COUNT_HW_CACHE_REFERENCES]	= HW_OP_UNSUPPORTED,
71 	[PERF_COUNT_HW_CACHE_MISSES]		= HW_OP_UNSUPPORTED,
72 	[PERF_COUNT_HW_BRANCH_INSTRUCTIONS]	= ARMV6_PERFCTR_BR_EXEC,
73 	[PERF_COUNT_HW_BRANCH_MISSES]		= ARMV6_PERFCTR_BR_MISPREDICT,
74 	[PERF_COUNT_HW_BUS_CYCLES]		= HW_OP_UNSUPPORTED,
75 	[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND]	= ARMV6_PERFCTR_IBUF_STALL,
76 	[PERF_COUNT_HW_STALLED_CYCLES_BACKEND]	= ARMV6_PERFCTR_LSU_FULL_STALL,
77 };
78 
79 static const unsigned armv6_perf_cache_map[PERF_COUNT_HW_CACHE_MAX]
80 					  [PERF_COUNT_HW_CACHE_OP_MAX]
81 					  [PERF_COUNT_HW_CACHE_RESULT_MAX] = {
82 	[C(L1D)] = {
83 		/*
84 		 * The performance counters don't differentiate between read
85 		 * and write accesses/misses so this isn't strictly correct,
86 		 * but it's the best we can do. Writes and reads get
87 		 * combined.
88 		 */
89 		[C(OP_READ)] = {
90 			[C(RESULT_ACCESS)]	= ARMV6_PERFCTR_DCACHE_ACCESS,
91 			[C(RESULT_MISS)]	= ARMV6_PERFCTR_DCACHE_MISS,
92 		},
93 		[C(OP_WRITE)] = {
94 			[C(RESULT_ACCESS)]	= ARMV6_PERFCTR_DCACHE_ACCESS,
95 			[C(RESULT_MISS)]	= ARMV6_PERFCTR_DCACHE_MISS,
96 		},
97 		[C(OP_PREFETCH)] = {
98 			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
99 			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED,
100 		},
101 	},
102 	[C(L1I)] = {
103 		[C(OP_READ)] = {
104 			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
105 			[C(RESULT_MISS)]	= ARMV6_PERFCTR_ICACHE_MISS,
106 		},
107 		[C(OP_WRITE)] = {
108 			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
109 			[C(RESULT_MISS)]	= ARMV6_PERFCTR_ICACHE_MISS,
110 		},
111 		[C(OP_PREFETCH)] = {
112 			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
113 			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED,
114 		},
115 	},
116 	[C(LL)] = {
117 		[C(OP_READ)] = {
118 			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
119 			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED,
120 		},
121 		[C(OP_WRITE)] = {
122 			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
123 			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED,
124 		},
125 		[C(OP_PREFETCH)] = {
126 			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
127 			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED,
128 		},
129 	},
130 	[C(DTLB)] = {
131 		/*
132 		 * The ARM performance counters can count micro DTLB misses,
133 		 * micro ITLB misses and main TLB misses. There isn't an event
134 		 * for TLB misses, so use the micro misses here and if users
135 		 * want the main TLB misses they can use a raw counter.
136 		 */
137 		[C(OP_READ)] = {
138 			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
139 			[C(RESULT_MISS)]	= ARMV6_PERFCTR_DTLB_MISS,
140 		},
141 		[C(OP_WRITE)] = {
142 			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
143 			[C(RESULT_MISS)]	= ARMV6_PERFCTR_DTLB_MISS,
144 		},
145 		[C(OP_PREFETCH)] = {
146 			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
147 			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED,
148 		},
149 	},
150 	[C(ITLB)] = {
151 		[C(OP_READ)] = {
152 			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
153 			[C(RESULT_MISS)]	= ARMV6_PERFCTR_ITLB_MISS,
154 		},
155 		[C(OP_WRITE)] = {
156 			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
157 			[C(RESULT_MISS)]	= ARMV6_PERFCTR_ITLB_MISS,
158 		},
159 		[C(OP_PREFETCH)] = {
160 			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
161 			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED,
162 		},
163 	},
164 	[C(BPU)] = {
165 		[C(OP_READ)] = {
166 			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
167 			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED,
168 		},
169 		[C(OP_WRITE)] = {
170 			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
171 			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED,
172 		},
173 		[C(OP_PREFETCH)] = {
174 			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
175 			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED,
176 		},
177 	},
178 	[C(NODE)] = {
179 		[C(OP_READ)] = {
180 			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
181 			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED,
182 		},
183 		[C(OP_WRITE)] = {
184 			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
185 			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED,
186 		},
187 		[C(OP_PREFETCH)] = {
188 			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
189 			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED,
190 		},
191 	},
192 };
193 
194 enum armv6mpcore_perf_types {
195 	ARMV6MPCORE_PERFCTR_ICACHE_MISS	    = 0x0,
196 	ARMV6MPCORE_PERFCTR_IBUF_STALL	    = 0x1,
197 	ARMV6MPCORE_PERFCTR_DDEP_STALL	    = 0x2,
198 	ARMV6MPCORE_PERFCTR_ITLB_MISS	    = 0x3,
199 	ARMV6MPCORE_PERFCTR_DTLB_MISS	    = 0x4,
200 	ARMV6MPCORE_PERFCTR_BR_EXEC	    = 0x5,
201 	ARMV6MPCORE_PERFCTR_BR_NOTPREDICT   = 0x6,
202 	ARMV6MPCORE_PERFCTR_BR_MISPREDICT   = 0x7,
203 	ARMV6MPCORE_PERFCTR_INSTR_EXEC	    = 0x8,
204 	ARMV6MPCORE_PERFCTR_DCACHE_RDACCESS = 0xA,
205 	ARMV6MPCORE_PERFCTR_DCACHE_RDMISS   = 0xB,
206 	ARMV6MPCORE_PERFCTR_DCACHE_WRACCESS = 0xC,
207 	ARMV6MPCORE_PERFCTR_DCACHE_WRMISS   = 0xD,
208 	ARMV6MPCORE_PERFCTR_DCACHE_EVICTION = 0xE,
209 	ARMV6MPCORE_PERFCTR_SW_PC_CHANGE    = 0xF,
210 	ARMV6MPCORE_PERFCTR_MAIN_TLB_MISS   = 0x10,
211 	ARMV6MPCORE_PERFCTR_EXPL_MEM_ACCESS = 0x11,
212 	ARMV6MPCORE_PERFCTR_LSU_FULL_STALL  = 0x12,
213 	ARMV6MPCORE_PERFCTR_WBUF_DRAINED    = 0x13,
214 	ARMV6MPCORE_PERFCTR_CPU_CYCLES	    = 0xFF,
215 };
216 
217 /*
218  * The hardware events that we support. We do support cache operations but
219  * we have harvard caches and no way to combine instruction and data
220  * accesses/misses in hardware.
221  */
222 static const unsigned armv6mpcore_perf_map[PERF_COUNT_HW_MAX] = {
223 	[PERF_COUNT_HW_CPU_CYCLES]		= ARMV6MPCORE_PERFCTR_CPU_CYCLES,
224 	[PERF_COUNT_HW_INSTRUCTIONS]		= ARMV6MPCORE_PERFCTR_INSTR_EXEC,
225 	[PERF_COUNT_HW_CACHE_REFERENCES]	= HW_OP_UNSUPPORTED,
226 	[PERF_COUNT_HW_CACHE_MISSES]		= HW_OP_UNSUPPORTED,
227 	[PERF_COUNT_HW_BRANCH_INSTRUCTIONS]	= ARMV6MPCORE_PERFCTR_BR_EXEC,
228 	[PERF_COUNT_HW_BRANCH_MISSES]		= ARMV6MPCORE_PERFCTR_BR_MISPREDICT,
229 	[PERF_COUNT_HW_BUS_CYCLES]		= HW_OP_UNSUPPORTED,
230 	[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND]	= ARMV6MPCORE_PERFCTR_IBUF_STALL,
231 	[PERF_COUNT_HW_STALLED_CYCLES_BACKEND]	= ARMV6MPCORE_PERFCTR_LSU_FULL_STALL,
232 };
233 
234 static const unsigned armv6mpcore_perf_cache_map[PERF_COUNT_HW_CACHE_MAX]
235 					[PERF_COUNT_HW_CACHE_OP_MAX]
236 					[PERF_COUNT_HW_CACHE_RESULT_MAX] = {
237 	[C(L1D)] = {
238 		[C(OP_READ)] = {
239 			[C(RESULT_ACCESS)]  =
240 				ARMV6MPCORE_PERFCTR_DCACHE_RDACCESS,
241 			[C(RESULT_MISS)]    =
242 				ARMV6MPCORE_PERFCTR_DCACHE_RDMISS,
243 		},
244 		[C(OP_WRITE)] = {
245 			[C(RESULT_ACCESS)]  =
246 				ARMV6MPCORE_PERFCTR_DCACHE_WRACCESS,
247 			[C(RESULT_MISS)]    =
248 				ARMV6MPCORE_PERFCTR_DCACHE_WRMISS,
249 		},
250 		[C(OP_PREFETCH)] = {
251 			[C(RESULT_ACCESS)]  = CACHE_OP_UNSUPPORTED,
252 			[C(RESULT_MISS)]    = CACHE_OP_UNSUPPORTED,
253 		},
254 	},
255 	[C(L1I)] = {
256 		[C(OP_READ)] = {
257 			[C(RESULT_ACCESS)]  = CACHE_OP_UNSUPPORTED,
258 			[C(RESULT_MISS)]    = ARMV6MPCORE_PERFCTR_ICACHE_MISS,
259 		},
260 		[C(OP_WRITE)] = {
261 			[C(RESULT_ACCESS)]  = CACHE_OP_UNSUPPORTED,
262 			[C(RESULT_MISS)]    = ARMV6MPCORE_PERFCTR_ICACHE_MISS,
263 		},
264 		[C(OP_PREFETCH)] = {
265 			[C(RESULT_ACCESS)]  = CACHE_OP_UNSUPPORTED,
266 			[C(RESULT_MISS)]    = CACHE_OP_UNSUPPORTED,
267 		},
268 	},
269 	[C(LL)] = {
270 		[C(OP_READ)] = {
271 			[C(RESULT_ACCESS)]  = CACHE_OP_UNSUPPORTED,
272 			[C(RESULT_MISS)]    = CACHE_OP_UNSUPPORTED,
273 		},
274 		[C(OP_WRITE)] = {
275 			[C(RESULT_ACCESS)]  = CACHE_OP_UNSUPPORTED,
276 			[C(RESULT_MISS)]    = CACHE_OP_UNSUPPORTED,
277 		},
278 		[C(OP_PREFETCH)] = {
279 			[C(RESULT_ACCESS)]  = CACHE_OP_UNSUPPORTED,
280 			[C(RESULT_MISS)]    = CACHE_OP_UNSUPPORTED,
281 		},
282 	},
283 	[C(DTLB)] = {
284 		/*
285 		 * The ARM performance counters can count micro DTLB misses,
286 		 * micro ITLB misses and main TLB misses. There isn't an event
287 		 * for TLB misses, so use the micro misses here and if users
288 		 * want the main TLB misses they can use a raw counter.
289 		 */
290 		[C(OP_READ)] = {
291 			[C(RESULT_ACCESS)]  = CACHE_OP_UNSUPPORTED,
292 			[C(RESULT_MISS)]    = ARMV6MPCORE_PERFCTR_DTLB_MISS,
293 		},
294 		[C(OP_WRITE)] = {
295 			[C(RESULT_ACCESS)]  = CACHE_OP_UNSUPPORTED,
296 			[C(RESULT_MISS)]    = ARMV6MPCORE_PERFCTR_DTLB_MISS,
297 		},
298 		[C(OP_PREFETCH)] = {
299 			[C(RESULT_ACCESS)]  = CACHE_OP_UNSUPPORTED,
300 			[C(RESULT_MISS)]    = CACHE_OP_UNSUPPORTED,
301 		},
302 	},
303 	[C(ITLB)] = {
304 		[C(OP_READ)] = {
305 			[C(RESULT_ACCESS)]  = CACHE_OP_UNSUPPORTED,
306 			[C(RESULT_MISS)]    = ARMV6MPCORE_PERFCTR_ITLB_MISS,
307 		},
308 		[C(OP_WRITE)] = {
309 			[C(RESULT_ACCESS)]  = CACHE_OP_UNSUPPORTED,
310 			[C(RESULT_MISS)]    = ARMV6MPCORE_PERFCTR_ITLB_MISS,
311 		},
312 		[C(OP_PREFETCH)] = {
313 			[C(RESULT_ACCESS)]  = CACHE_OP_UNSUPPORTED,
314 			[C(RESULT_MISS)]    = CACHE_OP_UNSUPPORTED,
315 		},
316 	},
317 	[C(BPU)] = {
318 		[C(OP_READ)] = {
319 			[C(RESULT_ACCESS)]  = CACHE_OP_UNSUPPORTED,
320 			[C(RESULT_MISS)]    = CACHE_OP_UNSUPPORTED,
321 		},
322 		[C(OP_WRITE)] = {
323 			[C(RESULT_ACCESS)]  = CACHE_OP_UNSUPPORTED,
324 			[C(RESULT_MISS)]    = CACHE_OP_UNSUPPORTED,
325 		},
326 		[C(OP_PREFETCH)] = {
327 			[C(RESULT_ACCESS)]  = CACHE_OP_UNSUPPORTED,
328 			[C(RESULT_MISS)]    = CACHE_OP_UNSUPPORTED,
329 		},
330 	},
331 	[C(NODE)] = {
332 		[C(OP_READ)] = {
333 			[C(RESULT_ACCESS)]  = CACHE_OP_UNSUPPORTED,
334 			[C(RESULT_MISS)]    = CACHE_OP_UNSUPPORTED,
335 		},
336 		[C(OP_WRITE)] = {
337 			[C(RESULT_ACCESS)]  = CACHE_OP_UNSUPPORTED,
338 			[C(RESULT_MISS)]    = CACHE_OP_UNSUPPORTED,
339 		},
340 		[C(OP_PREFETCH)] = {
341 			[C(RESULT_ACCESS)]  = CACHE_OP_UNSUPPORTED,
342 			[C(RESULT_MISS)]    = CACHE_OP_UNSUPPORTED,
343 		},
344 	},
345 };
346 
347 static inline unsigned long
348 armv6_pmcr_read(void)
349 {
350 	u32 val;
351 	asm volatile("mrc   p15, 0, %0, c15, c12, 0" : "=r"(val));
352 	return val;
353 }
354 
355 static inline void
356 armv6_pmcr_write(unsigned long val)
357 {
358 	asm volatile("mcr   p15, 0, %0, c15, c12, 0" : : "r"(val));
359 }
360 
361 #define ARMV6_PMCR_ENABLE		(1 << 0)
362 #define ARMV6_PMCR_CTR01_RESET		(1 << 1)
363 #define ARMV6_PMCR_CCOUNT_RESET		(1 << 2)
364 #define ARMV6_PMCR_CCOUNT_DIV		(1 << 3)
365 #define ARMV6_PMCR_COUNT0_IEN		(1 << 4)
366 #define ARMV6_PMCR_COUNT1_IEN		(1 << 5)
367 #define ARMV6_PMCR_CCOUNT_IEN		(1 << 6)
368 #define ARMV6_PMCR_COUNT0_OVERFLOW	(1 << 8)
369 #define ARMV6_PMCR_COUNT1_OVERFLOW	(1 << 9)
370 #define ARMV6_PMCR_CCOUNT_OVERFLOW	(1 << 10)
371 #define ARMV6_PMCR_EVT_COUNT0_SHIFT	20
372 #define ARMV6_PMCR_EVT_COUNT0_MASK	(0xFF << ARMV6_PMCR_EVT_COUNT0_SHIFT)
373 #define ARMV6_PMCR_EVT_COUNT1_SHIFT	12
374 #define ARMV6_PMCR_EVT_COUNT1_MASK	(0xFF << ARMV6_PMCR_EVT_COUNT1_SHIFT)
375 
376 #define ARMV6_PMCR_OVERFLOWED_MASK \
377 	(ARMV6_PMCR_COUNT0_OVERFLOW | ARMV6_PMCR_COUNT1_OVERFLOW | \
378 	 ARMV6_PMCR_CCOUNT_OVERFLOW)
379 
380 static inline int
381 armv6_pmcr_has_overflowed(unsigned long pmcr)
382 {
383 	return pmcr & ARMV6_PMCR_OVERFLOWED_MASK;
384 }
385 
386 static inline int
387 armv6_pmcr_counter_has_overflowed(unsigned long pmcr,
388 				  enum armv6_counters counter)
389 {
390 	int ret = 0;
391 
392 	if (ARMV6_CYCLE_COUNTER == counter)
393 		ret = pmcr & ARMV6_PMCR_CCOUNT_OVERFLOW;
394 	else if (ARMV6_COUNTER0 == counter)
395 		ret = pmcr & ARMV6_PMCR_COUNT0_OVERFLOW;
396 	else if (ARMV6_COUNTER1 == counter)
397 		ret = pmcr & ARMV6_PMCR_COUNT1_OVERFLOW;
398 	else
399 		WARN_ONCE(1, "invalid counter number (%d)\n", counter);
400 
401 	return ret;
402 }
403 
404 static inline u32 armv6pmu_read_counter(struct perf_event *event)
405 {
406 	struct hw_perf_event *hwc = &event->hw;
407 	int counter = hwc->idx;
408 	unsigned long value = 0;
409 
410 	if (ARMV6_CYCLE_COUNTER == counter)
411 		asm volatile("mrc   p15, 0, %0, c15, c12, 1" : "=r"(value));
412 	else if (ARMV6_COUNTER0 == counter)
413 		asm volatile("mrc   p15, 0, %0, c15, c12, 2" : "=r"(value));
414 	else if (ARMV6_COUNTER1 == counter)
415 		asm volatile("mrc   p15, 0, %0, c15, c12, 3" : "=r"(value));
416 	else
417 		WARN_ONCE(1, "invalid counter number (%d)\n", counter);
418 
419 	return value;
420 }
421 
422 static inline void armv6pmu_write_counter(struct perf_event *event, u32 value)
423 {
424 	struct hw_perf_event *hwc = &event->hw;
425 	int counter = hwc->idx;
426 
427 	if (ARMV6_CYCLE_COUNTER == counter)
428 		asm volatile("mcr   p15, 0, %0, c15, c12, 1" : : "r"(value));
429 	else if (ARMV6_COUNTER0 == counter)
430 		asm volatile("mcr   p15, 0, %0, c15, c12, 2" : : "r"(value));
431 	else if (ARMV6_COUNTER1 == counter)
432 		asm volatile("mcr   p15, 0, %0, c15, c12, 3" : : "r"(value));
433 	else
434 		WARN_ONCE(1, "invalid counter number (%d)\n", counter);
435 }
436 
437 static void armv6pmu_enable_event(struct perf_event *event)
438 {
439 	unsigned long val, mask, evt, flags;
440 	struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu);
441 	struct hw_perf_event *hwc = &event->hw;
442 	struct pmu_hw_events *events = cpu_pmu->get_hw_events();
443 	int idx = hwc->idx;
444 
445 	if (ARMV6_CYCLE_COUNTER == idx) {
446 		mask	= 0;
447 		evt	= ARMV6_PMCR_CCOUNT_IEN;
448 	} else if (ARMV6_COUNTER0 == idx) {
449 		mask	= ARMV6_PMCR_EVT_COUNT0_MASK;
450 		evt	= (hwc->config_base << ARMV6_PMCR_EVT_COUNT0_SHIFT) |
451 			  ARMV6_PMCR_COUNT0_IEN;
452 	} else if (ARMV6_COUNTER1 == idx) {
453 		mask	= ARMV6_PMCR_EVT_COUNT1_MASK;
454 		evt	= (hwc->config_base << ARMV6_PMCR_EVT_COUNT1_SHIFT) |
455 			  ARMV6_PMCR_COUNT1_IEN;
456 	} else {
457 		WARN_ONCE(1, "invalid counter number (%d)\n", idx);
458 		return;
459 	}
460 
461 	/*
462 	 * Mask out the current event and set the counter to count the event
463 	 * that we're interested in.
464 	 */
465 	raw_spin_lock_irqsave(&events->pmu_lock, flags);
466 	val = armv6_pmcr_read();
467 	val &= ~mask;
468 	val |= evt;
469 	armv6_pmcr_write(val);
470 	raw_spin_unlock_irqrestore(&events->pmu_lock, flags);
471 }
472 
473 static irqreturn_t
474 armv6pmu_handle_irq(int irq_num,
475 		    void *dev)
476 {
477 	unsigned long pmcr = armv6_pmcr_read();
478 	struct perf_sample_data data;
479 	struct arm_pmu *cpu_pmu = (struct arm_pmu *)dev;
480 	struct pmu_hw_events *cpuc = cpu_pmu->get_hw_events();
481 	struct pt_regs *regs;
482 	int idx;
483 
484 	if (!armv6_pmcr_has_overflowed(pmcr))
485 		return IRQ_NONE;
486 
487 	regs = get_irq_regs();
488 
489 	/*
490 	 * The interrupts are cleared by writing the overflow flags back to
491 	 * the control register. All of the other bits don't have any effect
492 	 * if they are rewritten, so write the whole value back.
493 	 */
494 	armv6_pmcr_write(pmcr);
495 
496 	for (idx = 0; idx < cpu_pmu->num_events; ++idx) {
497 		struct perf_event *event = cpuc->events[idx];
498 		struct hw_perf_event *hwc;
499 
500 		/* Ignore if we don't have an event. */
501 		if (!event)
502 			continue;
503 
504 		/*
505 		 * We have a single interrupt for all counters. Check that
506 		 * each counter has overflowed before we process it.
507 		 */
508 		if (!armv6_pmcr_counter_has_overflowed(pmcr, idx))
509 			continue;
510 
511 		hwc = &event->hw;
512 		armpmu_event_update(event);
513 		perf_sample_data_init(&data, 0, hwc->last_period);
514 		if (!armpmu_event_set_period(event))
515 			continue;
516 
517 		if (perf_event_overflow(event, &data, regs))
518 			cpu_pmu->disable(event);
519 	}
520 
521 	/*
522 	 * Handle the pending perf events.
523 	 *
524 	 * Note: this call *must* be run with interrupts disabled. For
525 	 * platforms that can have the PMU interrupts raised as an NMI, this
526 	 * will not work.
527 	 */
528 	irq_work_run();
529 
530 	return IRQ_HANDLED;
531 }
532 
533 static void armv6pmu_start(struct arm_pmu *cpu_pmu)
534 {
535 	unsigned long flags, val;
536 	struct pmu_hw_events *events = cpu_pmu->get_hw_events();
537 
538 	raw_spin_lock_irqsave(&events->pmu_lock, flags);
539 	val = armv6_pmcr_read();
540 	val |= ARMV6_PMCR_ENABLE;
541 	armv6_pmcr_write(val);
542 	raw_spin_unlock_irqrestore(&events->pmu_lock, flags);
543 }
544 
545 static void armv6pmu_stop(struct arm_pmu *cpu_pmu)
546 {
547 	unsigned long flags, val;
548 	struct pmu_hw_events *events = cpu_pmu->get_hw_events();
549 
550 	raw_spin_lock_irqsave(&events->pmu_lock, flags);
551 	val = armv6_pmcr_read();
552 	val &= ~ARMV6_PMCR_ENABLE;
553 	armv6_pmcr_write(val);
554 	raw_spin_unlock_irqrestore(&events->pmu_lock, flags);
555 }
556 
557 static int
558 armv6pmu_get_event_idx(struct pmu_hw_events *cpuc,
559 				struct perf_event *event)
560 {
561 	struct hw_perf_event *hwc = &event->hw;
562 	/* Always place a cycle counter into the cycle counter. */
563 	if (ARMV6_PERFCTR_CPU_CYCLES == hwc->config_base) {
564 		if (test_and_set_bit(ARMV6_CYCLE_COUNTER, cpuc->used_mask))
565 			return -EAGAIN;
566 
567 		return ARMV6_CYCLE_COUNTER;
568 	} else {
569 		/*
570 		 * For anything other than a cycle counter, try and use
571 		 * counter0 and counter1.
572 		 */
573 		if (!test_and_set_bit(ARMV6_COUNTER1, cpuc->used_mask))
574 			return ARMV6_COUNTER1;
575 
576 		if (!test_and_set_bit(ARMV6_COUNTER0, cpuc->used_mask))
577 			return ARMV6_COUNTER0;
578 
579 		/* The counters are all in use. */
580 		return -EAGAIN;
581 	}
582 }
583 
584 static void armv6pmu_disable_event(struct perf_event *event)
585 {
586 	unsigned long val, mask, evt, flags;
587 	struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu);
588 	struct hw_perf_event *hwc = &event->hw;
589 	struct pmu_hw_events *events = cpu_pmu->get_hw_events();
590 	int idx = hwc->idx;
591 
592 	if (ARMV6_CYCLE_COUNTER == idx) {
593 		mask	= ARMV6_PMCR_CCOUNT_IEN;
594 		evt	= 0;
595 	} else if (ARMV6_COUNTER0 == idx) {
596 		mask	= ARMV6_PMCR_COUNT0_IEN | ARMV6_PMCR_EVT_COUNT0_MASK;
597 		evt	= ARMV6_PERFCTR_NOP << ARMV6_PMCR_EVT_COUNT0_SHIFT;
598 	} else if (ARMV6_COUNTER1 == idx) {
599 		mask	= ARMV6_PMCR_COUNT1_IEN | ARMV6_PMCR_EVT_COUNT1_MASK;
600 		evt	= ARMV6_PERFCTR_NOP << ARMV6_PMCR_EVT_COUNT1_SHIFT;
601 	} else {
602 		WARN_ONCE(1, "invalid counter number (%d)\n", idx);
603 		return;
604 	}
605 
606 	/*
607 	 * Mask out the current event and set the counter to count the number
608 	 * of ETM bus signal assertion cycles. The external reporting should
609 	 * be disabled and so this should never increment.
610 	 */
611 	raw_spin_lock_irqsave(&events->pmu_lock, flags);
612 	val = armv6_pmcr_read();
613 	val &= ~mask;
614 	val |= evt;
615 	armv6_pmcr_write(val);
616 	raw_spin_unlock_irqrestore(&events->pmu_lock, flags);
617 }
618 
619 static void armv6mpcore_pmu_disable_event(struct perf_event *event)
620 {
621 	unsigned long val, mask, flags, evt = 0;
622 	struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu);
623 	struct hw_perf_event *hwc = &event->hw;
624 	struct pmu_hw_events *events = cpu_pmu->get_hw_events();
625 	int idx = hwc->idx;
626 
627 	if (ARMV6_CYCLE_COUNTER == idx) {
628 		mask	= ARMV6_PMCR_CCOUNT_IEN;
629 	} else if (ARMV6_COUNTER0 == idx) {
630 		mask	= ARMV6_PMCR_COUNT0_IEN;
631 	} else if (ARMV6_COUNTER1 == idx) {
632 		mask	= ARMV6_PMCR_COUNT1_IEN;
633 	} else {
634 		WARN_ONCE(1, "invalid counter number (%d)\n", idx);
635 		return;
636 	}
637 
638 	/*
639 	 * Unlike UP ARMv6, we don't have a way of stopping the counters. We
640 	 * simply disable the interrupt reporting.
641 	 */
642 	raw_spin_lock_irqsave(&events->pmu_lock, flags);
643 	val = armv6_pmcr_read();
644 	val &= ~mask;
645 	val |= evt;
646 	armv6_pmcr_write(val);
647 	raw_spin_unlock_irqrestore(&events->pmu_lock, flags);
648 }
649 
650 static int armv6_map_event(struct perf_event *event)
651 {
652 	return armpmu_map_event(event, &armv6_perf_map,
653 				&armv6_perf_cache_map, 0xFF);
654 }
655 
656 static int armv6pmu_init(struct arm_pmu *cpu_pmu)
657 {
658 	cpu_pmu->name		= "v6";
659 	cpu_pmu->handle_irq	= armv6pmu_handle_irq;
660 	cpu_pmu->enable		= armv6pmu_enable_event;
661 	cpu_pmu->disable	= armv6pmu_disable_event;
662 	cpu_pmu->read_counter	= armv6pmu_read_counter;
663 	cpu_pmu->write_counter	= armv6pmu_write_counter;
664 	cpu_pmu->get_event_idx	= armv6pmu_get_event_idx;
665 	cpu_pmu->start		= armv6pmu_start;
666 	cpu_pmu->stop		= armv6pmu_stop;
667 	cpu_pmu->map_event	= armv6_map_event;
668 	cpu_pmu->num_events	= 3;
669 	cpu_pmu->max_period	= (1LLU << 32) - 1;
670 
671 	return 0;
672 }
673 
674 /*
675  * ARMv6mpcore is almost identical to single core ARMv6 with the exception
676  * that some of the events have different enumerations and that there is no
677  * *hack* to stop the programmable counters. To stop the counters we simply
678  * disable the interrupt reporting and update the event. When unthrottling we
679  * reset the period and enable the interrupt reporting.
680  */
681 
682 static int armv6mpcore_map_event(struct perf_event *event)
683 {
684 	return armpmu_map_event(event, &armv6mpcore_perf_map,
685 				&armv6mpcore_perf_cache_map, 0xFF);
686 }
687 
688 static int armv6mpcore_pmu_init(struct arm_pmu *cpu_pmu)
689 {
690 	cpu_pmu->name		= "v6mpcore";
691 	cpu_pmu->handle_irq	= armv6pmu_handle_irq;
692 	cpu_pmu->enable		= armv6pmu_enable_event;
693 	cpu_pmu->disable	= armv6mpcore_pmu_disable_event;
694 	cpu_pmu->read_counter	= armv6pmu_read_counter;
695 	cpu_pmu->write_counter	= armv6pmu_write_counter;
696 	cpu_pmu->get_event_idx	= armv6pmu_get_event_idx;
697 	cpu_pmu->start		= armv6pmu_start;
698 	cpu_pmu->stop		= armv6pmu_stop;
699 	cpu_pmu->map_event	= armv6mpcore_map_event;
700 	cpu_pmu->num_events	= 3;
701 	cpu_pmu->max_period	= (1LLU << 32) - 1;
702 
703 	return 0;
704 }
705 #else
706 static int armv6pmu_init(struct arm_pmu *cpu_pmu)
707 {
708 	return -ENODEV;
709 }
710 
711 static int armv6mpcore_pmu_init(struct arm_pmu *cpu_pmu)
712 {
713 	return -ENODEV;
714 }
715 #endif	/* CONFIG_CPU_V6 || CONFIG_CPU_V6K */
716