19c57564eSVineet Gupta /* 2*0dd450feSMischa Jonker * Linux performance counter support for ARC 3*0dd450feSMischa Jonker * 4*0dd450feSMischa Jonker * Copyright (C) 2011-2013 Synopsys, Inc. (www.synopsys.com) 59c57564eSVineet Gupta * 69c57564eSVineet Gupta * This program is free software; you can redistribute it and/or modify 79c57564eSVineet Gupta * it under the terms of the GNU General Public License version 2 as 89c57564eSVineet Gupta * published by the Free Software Foundation. 99c57564eSVineet Gupta * 109c57564eSVineet Gupta */ 119c57564eSVineet Gupta 129c57564eSVineet Gupta #ifndef __ASM_PERF_EVENT_H 139c57564eSVineet Gupta #define __ASM_PERF_EVENT_H 149c57564eSVineet Gupta 15*0dd450feSMischa Jonker /* real maximum varies per CPU, this is the maximum supported by the driver */ 16*0dd450feSMischa Jonker #define ARC_PMU_MAX_HWEVENTS 64 17*0dd450feSMischa Jonker 18*0dd450feSMischa Jonker #define ARC_REG_CC_BUILD 0xF6 19*0dd450feSMischa Jonker #define ARC_REG_CC_INDEX 0x240 20*0dd450feSMischa Jonker #define ARC_REG_CC_NAME0 0x241 21*0dd450feSMischa Jonker #define ARC_REG_CC_NAME1 0x242 22*0dd450feSMischa Jonker 23*0dd450feSMischa Jonker #define ARC_REG_PCT_BUILD 0xF5 24*0dd450feSMischa Jonker #define ARC_REG_PCT_COUNTL 0x250 25*0dd450feSMischa Jonker #define ARC_REG_PCT_COUNTH 0x251 26*0dd450feSMischa Jonker #define ARC_REG_PCT_SNAPL 0x252 27*0dd450feSMischa Jonker #define ARC_REG_PCT_SNAPH 0x253 28*0dd450feSMischa Jonker #define ARC_REG_PCT_CONFIG 0x254 29*0dd450feSMischa Jonker #define ARC_REG_PCT_CONTROL 0x255 30*0dd450feSMischa Jonker #define ARC_REG_PCT_INDEX 0x256 31*0dd450feSMischa Jonker 32*0dd450feSMischa Jonker #define ARC_REG_PCT_CONTROL_CC (1 << 16) /* clear counts */ 33*0dd450feSMischa Jonker #define ARC_REG_PCT_CONTROL_SN (1 << 17) /* snapshot */ 34*0dd450feSMischa Jonker 35*0dd450feSMischa Jonker struct arc_reg_pct_build { 36*0dd450feSMischa Jonker #ifdef CONFIG_CPU_BIG_ENDIAN 37*0dd450feSMischa Jonker unsigned int m:8, c:8, r:6, s:2, v:8; 38*0dd450feSMischa Jonker #else 39*0dd450feSMischa Jonker unsigned int v:8, s:2, r:6, c:8, m:8; 40*0dd450feSMischa Jonker #endif 41*0dd450feSMischa Jonker }; 42*0dd450feSMischa Jonker 43*0dd450feSMischa Jonker struct arc_reg_cc_build { 44*0dd450feSMischa Jonker #ifdef CONFIG_CPU_BIG_ENDIAN 45*0dd450feSMischa Jonker unsigned int c:16, r:8, v:8; 46*0dd450feSMischa Jonker #else 47*0dd450feSMischa Jonker unsigned int v:8, r:8, c:16; 48*0dd450feSMischa Jonker #endif 49*0dd450feSMischa Jonker }; 50*0dd450feSMischa Jonker 51*0dd450feSMischa Jonker #define PERF_COUNT_ARC_DCLM (PERF_COUNT_HW_MAX + 0) 52*0dd450feSMischa Jonker #define PERF_COUNT_ARC_DCSM (PERF_COUNT_HW_MAX + 1) 53*0dd450feSMischa Jonker #define PERF_COUNT_ARC_ICM (PERF_COUNT_HW_MAX + 2) 54*0dd450feSMischa Jonker #define PERF_COUNT_ARC_BPOK (PERF_COUNT_HW_MAX + 3) 55*0dd450feSMischa Jonker #define PERF_COUNT_ARC_EDTLB (PERF_COUNT_HW_MAX + 4) 56*0dd450feSMischa Jonker #define PERF_COUNT_ARC_EITLB (PERF_COUNT_HW_MAX + 5) 57*0dd450feSMischa Jonker #define PERF_COUNT_ARC_HW_MAX (PERF_COUNT_HW_MAX + 6) 58*0dd450feSMischa Jonker 59*0dd450feSMischa Jonker /* 60*0dd450feSMischa Jonker * The "generalized" performance events seem to really be a copy 61*0dd450feSMischa Jonker * of the available events on x86 processors; the mapping to ARC 62*0dd450feSMischa Jonker * events is not always possible 1-to-1. Fortunately, there doesn't 63*0dd450feSMischa Jonker * seem to be an exact definition for these events, so we can cheat 64*0dd450feSMischa Jonker * a bit where necessary. 65*0dd450feSMischa Jonker * 66*0dd450feSMischa Jonker * In particular, the following PERF events may behave a bit differently 67*0dd450feSMischa Jonker * compared to other architectures: 68*0dd450feSMischa Jonker * 69*0dd450feSMischa Jonker * PERF_COUNT_HW_CPU_CYCLES 70*0dd450feSMischa Jonker * Cycles not in halted state 71*0dd450feSMischa Jonker * 72*0dd450feSMischa Jonker * PERF_COUNT_HW_REF_CPU_CYCLES 73*0dd450feSMischa Jonker * Reference cycles not in halted state, same as PERF_COUNT_HW_CPU_CYCLES 74*0dd450feSMischa Jonker * for now as we don't do Dynamic Voltage/Frequency Scaling (yet) 75*0dd450feSMischa Jonker * 76*0dd450feSMischa Jonker * PERF_COUNT_HW_BUS_CYCLES 77*0dd450feSMischa Jonker * Unclear what this means, Intel uses 0x013c, which according to 78*0dd450feSMischa Jonker * their datasheet means "unhalted reference cycles". It sounds similar 79*0dd450feSMischa Jonker * to PERF_COUNT_HW_REF_CPU_CYCLES, and we use the same counter for it. 80*0dd450feSMischa Jonker * 81*0dd450feSMischa Jonker * PERF_COUNT_HW_STALLED_CYCLES_BACKEND 82*0dd450feSMischa Jonker * PERF_COUNT_HW_STALLED_CYCLES_FRONTEND 83*0dd450feSMischa Jonker * The ARC 700 can either measure stalls per pipeline stage, or all stalls 84*0dd450feSMischa Jonker * combined; for now we assign all stalls to STALLED_CYCLES_BACKEND 85*0dd450feSMischa Jonker * and all pipeline flushes (e.g. caused by mispredicts, etc.) to 86*0dd450feSMischa Jonker * STALLED_CYCLES_FRONTEND. 87*0dd450feSMischa Jonker * 88*0dd450feSMischa Jonker * We could start multiple performance counters and combine everything 89*0dd450feSMischa Jonker * afterwards, but that makes it complicated. 90*0dd450feSMischa Jonker * 91*0dd450feSMischa Jonker * Note that I$ cache misses aren't counted by either of the two! 92*0dd450feSMischa Jonker */ 93*0dd450feSMischa Jonker 94*0dd450feSMischa Jonker static const char * const arc_pmu_ev_hw_map[] = { 95*0dd450feSMischa Jonker [PERF_COUNT_HW_CPU_CYCLES] = "crun", 96*0dd450feSMischa Jonker [PERF_COUNT_HW_REF_CPU_CYCLES] = "crun", 97*0dd450feSMischa Jonker [PERF_COUNT_HW_BUS_CYCLES] = "crun", 98*0dd450feSMischa Jonker [PERF_COUNT_HW_INSTRUCTIONS] = "iall", 99*0dd450feSMischa Jonker [PERF_COUNT_HW_BRANCH_MISSES] = "bpfail", 100*0dd450feSMischa Jonker [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = "ijmp", 101*0dd450feSMischa Jonker [PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = "bflush", 102*0dd450feSMischa Jonker [PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = "bstall", 103*0dd450feSMischa Jonker [PERF_COUNT_ARC_DCLM] = "dclm", 104*0dd450feSMischa Jonker [PERF_COUNT_ARC_DCSM] = "dcsm", 105*0dd450feSMischa Jonker [PERF_COUNT_ARC_ICM] = "icm", 106*0dd450feSMischa Jonker [PERF_COUNT_ARC_BPOK] = "bpok", 107*0dd450feSMischa Jonker [PERF_COUNT_ARC_EDTLB] = "edtlb", 108*0dd450feSMischa Jonker [PERF_COUNT_ARC_EITLB] = "eitlb", 109*0dd450feSMischa Jonker }; 110*0dd450feSMischa Jonker 111*0dd450feSMischa Jonker #define C(_x) PERF_COUNT_HW_CACHE_##_x 112*0dd450feSMischa Jonker #define CACHE_OP_UNSUPPORTED 0xffff 113*0dd450feSMischa Jonker 114*0dd450feSMischa Jonker static const unsigned arc_pmu_cache_map[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = { 115*0dd450feSMischa Jonker [C(L1D)] = { 116*0dd450feSMischa Jonker [C(OP_READ)] = { 117*0dd450feSMischa Jonker [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, 118*0dd450feSMischa Jonker [C(RESULT_MISS)] = PERF_COUNT_ARC_DCLM, 119*0dd450feSMischa Jonker }, 120*0dd450feSMischa Jonker [C(OP_WRITE)] = { 121*0dd450feSMischa Jonker [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, 122*0dd450feSMischa Jonker [C(RESULT_MISS)] = PERF_COUNT_ARC_DCSM, 123*0dd450feSMischa Jonker }, 124*0dd450feSMischa Jonker [C(OP_PREFETCH)] = { 125*0dd450feSMischa Jonker [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, 126*0dd450feSMischa Jonker [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, 127*0dd450feSMischa Jonker }, 128*0dd450feSMischa Jonker }, 129*0dd450feSMischa Jonker [C(L1I)] = { 130*0dd450feSMischa Jonker [C(OP_READ)] = { 131*0dd450feSMischa Jonker [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, 132*0dd450feSMischa Jonker [C(RESULT_MISS)] = PERF_COUNT_ARC_ICM, 133*0dd450feSMischa Jonker }, 134*0dd450feSMischa Jonker [C(OP_WRITE)] = { 135*0dd450feSMischa Jonker [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, 136*0dd450feSMischa Jonker [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, 137*0dd450feSMischa Jonker }, 138*0dd450feSMischa Jonker [C(OP_PREFETCH)] = { 139*0dd450feSMischa Jonker [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, 140*0dd450feSMischa Jonker [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, 141*0dd450feSMischa Jonker }, 142*0dd450feSMischa Jonker }, 143*0dd450feSMischa Jonker [C(LL)] = { 144*0dd450feSMischa Jonker [C(OP_READ)] = { 145*0dd450feSMischa Jonker [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, 146*0dd450feSMischa Jonker [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, 147*0dd450feSMischa Jonker }, 148*0dd450feSMischa Jonker [C(OP_WRITE)] = { 149*0dd450feSMischa Jonker [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, 150*0dd450feSMischa Jonker [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, 151*0dd450feSMischa Jonker }, 152*0dd450feSMischa Jonker [C(OP_PREFETCH)] = { 153*0dd450feSMischa Jonker [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, 154*0dd450feSMischa Jonker [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, 155*0dd450feSMischa Jonker }, 156*0dd450feSMischa Jonker }, 157*0dd450feSMischa Jonker [C(DTLB)] = { 158*0dd450feSMischa Jonker [C(OP_READ)] = { 159*0dd450feSMischa Jonker [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, 160*0dd450feSMischa Jonker [C(RESULT_MISS)] = PERF_COUNT_ARC_EDTLB, 161*0dd450feSMischa Jonker }, 162*0dd450feSMischa Jonker [C(OP_WRITE)] = { 163*0dd450feSMischa Jonker [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, 164*0dd450feSMischa Jonker [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, 165*0dd450feSMischa Jonker }, 166*0dd450feSMischa Jonker [C(OP_PREFETCH)] = { 167*0dd450feSMischa Jonker [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, 168*0dd450feSMischa Jonker [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, 169*0dd450feSMischa Jonker }, 170*0dd450feSMischa Jonker }, 171*0dd450feSMischa Jonker [C(ITLB)] = { 172*0dd450feSMischa Jonker [C(OP_READ)] = { 173*0dd450feSMischa Jonker [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, 174*0dd450feSMischa Jonker [C(RESULT_MISS)] = PERF_COUNT_ARC_EITLB, 175*0dd450feSMischa Jonker }, 176*0dd450feSMischa Jonker [C(OP_WRITE)] = { 177*0dd450feSMischa Jonker [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, 178*0dd450feSMischa Jonker [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, 179*0dd450feSMischa Jonker }, 180*0dd450feSMischa Jonker [C(OP_PREFETCH)] = { 181*0dd450feSMischa Jonker [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, 182*0dd450feSMischa Jonker [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, 183*0dd450feSMischa Jonker }, 184*0dd450feSMischa Jonker }, 185*0dd450feSMischa Jonker [C(BPU)] = { 186*0dd450feSMischa Jonker [C(OP_READ)] = { 187*0dd450feSMischa Jonker [C(RESULT_ACCESS)] = PERF_COUNT_HW_BRANCH_INSTRUCTIONS, 188*0dd450feSMischa Jonker [C(RESULT_MISS)] = PERF_COUNT_HW_BRANCH_MISSES, 189*0dd450feSMischa Jonker }, 190*0dd450feSMischa Jonker [C(OP_WRITE)] = { 191*0dd450feSMischa Jonker [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, 192*0dd450feSMischa Jonker [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, 193*0dd450feSMischa Jonker }, 194*0dd450feSMischa Jonker [C(OP_PREFETCH)] = { 195*0dd450feSMischa Jonker [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, 196*0dd450feSMischa Jonker [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, 197*0dd450feSMischa Jonker }, 198*0dd450feSMischa Jonker }, 199*0dd450feSMischa Jonker [C(NODE)] = { 200*0dd450feSMischa Jonker [C(OP_READ)] = { 201*0dd450feSMischa Jonker [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, 202*0dd450feSMischa Jonker [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, 203*0dd450feSMischa Jonker }, 204*0dd450feSMischa Jonker [C(OP_WRITE)] = { 205*0dd450feSMischa Jonker [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, 206*0dd450feSMischa Jonker [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, 207*0dd450feSMischa Jonker }, 208*0dd450feSMischa Jonker [C(OP_PREFETCH)] = { 209*0dd450feSMischa Jonker [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, 210*0dd450feSMischa Jonker [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, 211*0dd450feSMischa Jonker }, 212*0dd450feSMischa Jonker }, 213*0dd450feSMischa Jonker }; 214*0dd450feSMischa Jonker 2159c57564eSVineet Gupta #endif /* __ASM_PERF_EVENT_H */ 216