xref: /openbmc/linux/arch/arc/include/asm/perf_event.h (revision 0dd450fe13da4aeacc69916ecfe39d3d0b531900)
19c57564eSVineet Gupta /*
2*0dd450feSMischa Jonker  * Linux performance counter support for ARC
3*0dd450feSMischa Jonker  *
4*0dd450feSMischa Jonker  * Copyright (C) 2011-2013 Synopsys, Inc. (www.synopsys.com)
59c57564eSVineet Gupta  *
69c57564eSVineet Gupta  * This program is free software; you can redistribute it and/or modify
79c57564eSVineet Gupta  * it under the terms of the GNU General Public License version 2 as
89c57564eSVineet Gupta  * published by the Free Software Foundation.
99c57564eSVineet Gupta  *
109c57564eSVineet Gupta  */
119c57564eSVineet Gupta 
129c57564eSVineet Gupta #ifndef __ASM_PERF_EVENT_H
139c57564eSVineet Gupta #define __ASM_PERF_EVENT_H
149c57564eSVineet Gupta 
15*0dd450feSMischa Jonker /* real maximum varies per CPU, this is the maximum supported by the driver */
16*0dd450feSMischa Jonker #define ARC_PMU_MAX_HWEVENTS	64
17*0dd450feSMischa Jonker 
18*0dd450feSMischa Jonker #define ARC_REG_CC_BUILD	0xF6
19*0dd450feSMischa Jonker #define ARC_REG_CC_INDEX	0x240
20*0dd450feSMischa Jonker #define ARC_REG_CC_NAME0	0x241
21*0dd450feSMischa Jonker #define ARC_REG_CC_NAME1	0x242
22*0dd450feSMischa Jonker 
23*0dd450feSMischa Jonker #define ARC_REG_PCT_BUILD	0xF5
24*0dd450feSMischa Jonker #define ARC_REG_PCT_COUNTL	0x250
25*0dd450feSMischa Jonker #define ARC_REG_PCT_COUNTH	0x251
26*0dd450feSMischa Jonker #define ARC_REG_PCT_SNAPL	0x252
27*0dd450feSMischa Jonker #define ARC_REG_PCT_SNAPH	0x253
28*0dd450feSMischa Jonker #define ARC_REG_PCT_CONFIG	0x254
29*0dd450feSMischa Jonker #define ARC_REG_PCT_CONTROL	0x255
30*0dd450feSMischa Jonker #define ARC_REG_PCT_INDEX	0x256
31*0dd450feSMischa Jonker 
32*0dd450feSMischa Jonker #define ARC_REG_PCT_CONTROL_CC	(1 << 16)	/* clear counts */
33*0dd450feSMischa Jonker #define ARC_REG_PCT_CONTROL_SN	(1 << 17)	/* snapshot */
34*0dd450feSMischa Jonker 
35*0dd450feSMischa Jonker struct arc_reg_pct_build {
36*0dd450feSMischa Jonker #ifdef CONFIG_CPU_BIG_ENDIAN
37*0dd450feSMischa Jonker 	unsigned int m:8, c:8, r:6, s:2, v:8;
38*0dd450feSMischa Jonker #else
39*0dd450feSMischa Jonker 	unsigned int v:8, s:2, r:6, c:8, m:8;
40*0dd450feSMischa Jonker #endif
41*0dd450feSMischa Jonker };
42*0dd450feSMischa Jonker 
43*0dd450feSMischa Jonker struct arc_reg_cc_build {
44*0dd450feSMischa Jonker #ifdef CONFIG_CPU_BIG_ENDIAN
45*0dd450feSMischa Jonker 	unsigned int c:16, r:8, v:8;
46*0dd450feSMischa Jonker #else
47*0dd450feSMischa Jonker 	unsigned int v:8, r:8, c:16;
48*0dd450feSMischa Jonker #endif
49*0dd450feSMischa Jonker };
50*0dd450feSMischa Jonker 
51*0dd450feSMischa Jonker #define PERF_COUNT_ARC_DCLM	(PERF_COUNT_HW_MAX + 0)
52*0dd450feSMischa Jonker #define PERF_COUNT_ARC_DCSM	(PERF_COUNT_HW_MAX + 1)
53*0dd450feSMischa Jonker #define PERF_COUNT_ARC_ICM	(PERF_COUNT_HW_MAX + 2)
54*0dd450feSMischa Jonker #define PERF_COUNT_ARC_BPOK	(PERF_COUNT_HW_MAX + 3)
55*0dd450feSMischa Jonker #define PERF_COUNT_ARC_EDTLB	(PERF_COUNT_HW_MAX + 4)
56*0dd450feSMischa Jonker #define PERF_COUNT_ARC_EITLB	(PERF_COUNT_HW_MAX + 5)
57*0dd450feSMischa Jonker #define PERF_COUNT_ARC_HW_MAX	(PERF_COUNT_HW_MAX + 6)
58*0dd450feSMischa Jonker 
59*0dd450feSMischa Jonker /*
60*0dd450feSMischa Jonker  * The "generalized" performance events seem to really be a copy
61*0dd450feSMischa Jonker  * of the available events on x86 processors; the mapping to ARC
62*0dd450feSMischa Jonker  * events is not always possible 1-to-1. Fortunately, there doesn't
63*0dd450feSMischa Jonker  * seem to be an exact definition for these events, so we can cheat
64*0dd450feSMischa Jonker  * a bit where necessary.
65*0dd450feSMischa Jonker  *
66*0dd450feSMischa Jonker  * In particular, the following PERF events may behave a bit differently
67*0dd450feSMischa Jonker  * compared to other architectures:
68*0dd450feSMischa Jonker  *
69*0dd450feSMischa Jonker  * PERF_COUNT_HW_CPU_CYCLES
70*0dd450feSMischa Jonker  *	Cycles not in halted state
71*0dd450feSMischa Jonker  *
72*0dd450feSMischa Jonker  * PERF_COUNT_HW_REF_CPU_CYCLES
73*0dd450feSMischa Jonker  *	Reference cycles not in halted state, same as PERF_COUNT_HW_CPU_CYCLES
74*0dd450feSMischa Jonker  *	for now as we don't do Dynamic Voltage/Frequency Scaling (yet)
75*0dd450feSMischa Jonker  *
76*0dd450feSMischa Jonker  * PERF_COUNT_HW_BUS_CYCLES
77*0dd450feSMischa Jonker  *	Unclear what this means, Intel uses 0x013c, which according to
78*0dd450feSMischa Jonker  *	their datasheet means "unhalted reference cycles". It sounds similar
79*0dd450feSMischa Jonker  *	to PERF_COUNT_HW_REF_CPU_CYCLES, and we use the same counter for it.
80*0dd450feSMischa Jonker  *
81*0dd450feSMischa Jonker  * PERF_COUNT_HW_STALLED_CYCLES_BACKEND
82*0dd450feSMischa Jonker  * PERF_COUNT_HW_STALLED_CYCLES_FRONTEND
83*0dd450feSMischa Jonker  *	The ARC 700 can either measure stalls per pipeline stage, or all stalls
84*0dd450feSMischa Jonker  *	combined; for now we assign all stalls to STALLED_CYCLES_BACKEND
85*0dd450feSMischa Jonker  *	and all pipeline flushes (e.g. caused by mispredicts, etc.) to
86*0dd450feSMischa Jonker  *	STALLED_CYCLES_FRONTEND.
87*0dd450feSMischa Jonker  *
88*0dd450feSMischa Jonker  *	We could start multiple performance counters and combine everything
89*0dd450feSMischa Jonker  *	afterwards, but that makes it complicated.
90*0dd450feSMischa Jonker  *
91*0dd450feSMischa Jonker  *	Note that I$ cache misses aren't counted by either of the two!
92*0dd450feSMischa Jonker  */
93*0dd450feSMischa Jonker 
94*0dd450feSMischa Jonker static const char * const arc_pmu_ev_hw_map[] = {
95*0dd450feSMischa Jonker 	[PERF_COUNT_HW_CPU_CYCLES] = "crun",
96*0dd450feSMischa Jonker 	[PERF_COUNT_HW_REF_CPU_CYCLES] = "crun",
97*0dd450feSMischa Jonker 	[PERF_COUNT_HW_BUS_CYCLES] = "crun",
98*0dd450feSMischa Jonker 	[PERF_COUNT_HW_INSTRUCTIONS] = "iall",
99*0dd450feSMischa Jonker 	[PERF_COUNT_HW_BRANCH_MISSES] = "bpfail",
100*0dd450feSMischa Jonker 	[PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = "ijmp",
101*0dd450feSMischa Jonker 	[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = "bflush",
102*0dd450feSMischa Jonker 	[PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = "bstall",
103*0dd450feSMischa Jonker 	[PERF_COUNT_ARC_DCLM] = "dclm",
104*0dd450feSMischa Jonker 	[PERF_COUNT_ARC_DCSM] = "dcsm",
105*0dd450feSMischa Jonker 	[PERF_COUNT_ARC_ICM] = "icm",
106*0dd450feSMischa Jonker 	[PERF_COUNT_ARC_BPOK] = "bpok",
107*0dd450feSMischa Jonker 	[PERF_COUNT_ARC_EDTLB] = "edtlb",
108*0dd450feSMischa Jonker 	[PERF_COUNT_ARC_EITLB] = "eitlb",
109*0dd450feSMischa Jonker };
110*0dd450feSMischa Jonker 
111*0dd450feSMischa Jonker #define C(_x)			PERF_COUNT_HW_CACHE_##_x
112*0dd450feSMischa Jonker #define CACHE_OP_UNSUPPORTED	0xffff
113*0dd450feSMischa Jonker 
114*0dd450feSMischa Jonker static const unsigned arc_pmu_cache_map[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = {
115*0dd450feSMischa Jonker 	[C(L1D)] = {
116*0dd450feSMischa Jonker 		[C(OP_READ)] = {
117*0dd450feSMischa Jonker 			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
118*0dd450feSMischa Jonker 			[C(RESULT_MISS)]	= PERF_COUNT_ARC_DCLM,
119*0dd450feSMischa Jonker 		},
120*0dd450feSMischa Jonker 		[C(OP_WRITE)] = {
121*0dd450feSMischa Jonker 			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
122*0dd450feSMischa Jonker 			[C(RESULT_MISS)]	= PERF_COUNT_ARC_DCSM,
123*0dd450feSMischa Jonker 		},
124*0dd450feSMischa Jonker 		[C(OP_PREFETCH)] = {
125*0dd450feSMischa Jonker 			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
126*0dd450feSMischa Jonker 			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED,
127*0dd450feSMischa Jonker 		},
128*0dd450feSMischa Jonker 	},
129*0dd450feSMischa Jonker 	[C(L1I)] = {
130*0dd450feSMischa Jonker 		[C(OP_READ)] = {
131*0dd450feSMischa Jonker 			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
132*0dd450feSMischa Jonker 			[C(RESULT_MISS)]	= PERF_COUNT_ARC_ICM,
133*0dd450feSMischa Jonker 		},
134*0dd450feSMischa Jonker 		[C(OP_WRITE)] = {
135*0dd450feSMischa Jonker 			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
136*0dd450feSMischa Jonker 			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED,
137*0dd450feSMischa Jonker 		},
138*0dd450feSMischa Jonker 		[C(OP_PREFETCH)] = {
139*0dd450feSMischa Jonker 			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
140*0dd450feSMischa Jonker 			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED,
141*0dd450feSMischa Jonker 		},
142*0dd450feSMischa Jonker 	},
143*0dd450feSMischa Jonker 	[C(LL)] = {
144*0dd450feSMischa Jonker 		[C(OP_READ)] = {
145*0dd450feSMischa Jonker 			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
146*0dd450feSMischa Jonker 			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED,
147*0dd450feSMischa Jonker 		},
148*0dd450feSMischa Jonker 		[C(OP_WRITE)] = {
149*0dd450feSMischa Jonker 			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
150*0dd450feSMischa Jonker 			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED,
151*0dd450feSMischa Jonker 		},
152*0dd450feSMischa Jonker 		[C(OP_PREFETCH)] = {
153*0dd450feSMischa Jonker 			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
154*0dd450feSMischa Jonker 			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED,
155*0dd450feSMischa Jonker 		},
156*0dd450feSMischa Jonker 	},
157*0dd450feSMischa Jonker 	[C(DTLB)] = {
158*0dd450feSMischa Jonker 		[C(OP_READ)] = {
159*0dd450feSMischa Jonker 			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
160*0dd450feSMischa Jonker 			[C(RESULT_MISS)]	= PERF_COUNT_ARC_EDTLB,
161*0dd450feSMischa Jonker 		},
162*0dd450feSMischa Jonker 		[C(OP_WRITE)] = {
163*0dd450feSMischa Jonker 			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
164*0dd450feSMischa Jonker 			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED,
165*0dd450feSMischa Jonker 		},
166*0dd450feSMischa Jonker 		[C(OP_PREFETCH)] = {
167*0dd450feSMischa Jonker 			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
168*0dd450feSMischa Jonker 			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED,
169*0dd450feSMischa Jonker 		},
170*0dd450feSMischa Jonker 	},
171*0dd450feSMischa Jonker 	[C(ITLB)] = {
172*0dd450feSMischa Jonker 		[C(OP_READ)] = {
173*0dd450feSMischa Jonker 			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
174*0dd450feSMischa Jonker 			[C(RESULT_MISS)]	= PERF_COUNT_ARC_EITLB,
175*0dd450feSMischa Jonker 		},
176*0dd450feSMischa Jonker 		[C(OP_WRITE)] = {
177*0dd450feSMischa Jonker 			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
178*0dd450feSMischa Jonker 			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED,
179*0dd450feSMischa Jonker 		},
180*0dd450feSMischa Jonker 		[C(OP_PREFETCH)] = {
181*0dd450feSMischa Jonker 			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
182*0dd450feSMischa Jonker 			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED,
183*0dd450feSMischa Jonker 		},
184*0dd450feSMischa Jonker 	},
185*0dd450feSMischa Jonker 	[C(BPU)] = {
186*0dd450feSMischa Jonker 		[C(OP_READ)] = {
187*0dd450feSMischa Jonker 			[C(RESULT_ACCESS)] = PERF_COUNT_HW_BRANCH_INSTRUCTIONS,
188*0dd450feSMischa Jonker 			[C(RESULT_MISS)]	= PERF_COUNT_HW_BRANCH_MISSES,
189*0dd450feSMischa Jonker 		},
190*0dd450feSMischa Jonker 		[C(OP_WRITE)] = {
191*0dd450feSMischa Jonker 			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
192*0dd450feSMischa Jonker 			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED,
193*0dd450feSMischa Jonker 		},
194*0dd450feSMischa Jonker 		[C(OP_PREFETCH)] = {
195*0dd450feSMischa Jonker 			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
196*0dd450feSMischa Jonker 			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED,
197*0dd450feSMischa Jonker 		},
198*0dd450feSMischa Jonker 	},
199*0dd450feSMischa Jonker 	[C(NODE)] = {
200*0dd450feSMischa Jonker 		[C(OP_READ)] = {
201*0dd450feSMischa Jonker 			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
202*0dd450feSMischa Jonker 			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED,
203*0dd450feSMischa Jonker 		},
204*0dd450feSMischa Jonker 		[C(OP_WRITE)] = {
205*0dd450feSMischa Jonker 			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
206*0dd450feSMischa Jonker 			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED,
207*0dd450feSMischa Jonker 		},
208*0dd450feSMischa Jonker 		[C(OP_PREFETCH)] = {
209*0dd450feSMischa Jonker 			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
210*0dd450feSMischa Jonker 			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED,
211*0dd450feSMischa Jonker 		},
212*0dd450feSMischa Jonker 	},
213*0dd450feSMischa Jonker };
214*0dd450feSMischa Jonker 
2159c57564eSVineet Gupta #endif /* __ASM_PERF_EVENT_H */
216