1*291dcb98SKim Phillips // SPDX-License-Identifier: GPL-2.0 2*291dcb98SKim Phillips /* 3*291dcb98SKim Phillips * AMD specific. Provide textual annotation for IBS raw sample data. 4*291dcb98SKim Phillips */ 5*291dcb98SKim Phillips 6*291dcb98SKim Phillips #include <unistd.h> 7*291dcb98SKim Phillips #include <stdio.h> 8*291dcb98SKim Phillips #include <string.h> 9*291dcb98SKim Phillips #include <inttypes.h> 10*291dcb98SKim Phillips 11*291dcb98SKim Phillips #include <linux/string.h> 12*291dcb98SKim Phillips #include "../../arch/x86/include/asm/amd-ibs.h" 13*291dcb98SKim Phillips 14*291dcb98SKim Phillips #include "debug.h" 15*291dcb98SKim Phillips #include "session.h" 16*291dcb98SKim Phillips #include "evlist.h" 17*291dcb98SKim Phillips #include "sample-raw.h" 18*291dcb98SKim Phillips #include "pmu-events/pmu-events.h" 19*291dcb98SKim Phillips 20*291dcb98SKim Phillips static u32 cpu_family, cpu_model, ibs_fetch_type, ibs_op_type; 21*291dcb98SKim Phillips 22*291dcb98SKim Phillips static void pr_ibs_fetch_ctl(union ibs_fetch_ctl reg) 23*291dcb98SKim Phillips { 24*291dcb98SKim Phillips const char * const ic_miss_strs[] = { 25*291dcb98SKim Phillips " IcMiss 0", 26*291dcb98SKim Phillips " IcMiss 1", 27*291dcb98SKim Phillips }; 28*291dcb98SKim Phillips const char * const l1tlb_pgsz_strs[] = { 29*291dcb98SKim Phillips " L1TlbPgSz 4KB", 30*291dcb98SKim Phillips " L1TlbPgSz 2MB", 31*291dcb98SKim Phillips " L1TlbPgSz 1GB", 32*291dcb98SKim Phillips " L1TlbPgSz RESERVED" 33*291dcb98SKim Phillips }; 34*291dcb98SKim Phillips const char * const l1tlb_pgsz_strs_erratum1347[] = { 35*291dcb98SKim Phillips " L1TlbPgSz 4KB", 36*291dcb98SKim Phillips " L1TlbPgSz 16KB", 37*291dcb98SKim Phillips " L1TlbPgSz 2MB", 38*291dcb98SKim Phillips " L1TlbPgSz 1GB" 39*291dcb98SKim Phillips }; 40*291dcb98SKim Phillips const char *ic_miss_str = NULL; 41*291dcb98SKim Phillips const char *l1tlb_pgsz_str = NULL; 42*291dcb98SKim Phillips 43*291dcb98SKim Phillips if (cpu_family == 0x19 && cpu_model < 0x10) { 44*291dcb98SKim Phillips /* 45*291dcb98SKim Phillips * Erratum #1238 workaround is to ignore MSRC001_1030[IbsIcMiss] 46*291dcb98SKim Phillips * Erratum #1347 workaround is to use table provided in erratum 47*291dcb98SKim Phillips */ 48*291dcb98SKim Phillips if (reg.phy_addr_valid) 49*291dcb98SKim Phillips l1tlb_pgsz_str = l1tlb_pgsz_strs_erratum1347[reg.l1tlb_pgsz]; 50*291dcb98SKim Phillips } else { 51*291dcb98SKim Phillips if (reg.phy_addr_valid) 52*291dcb98SKim Phillips l1tlb_pgsz_str = l1tlb_pgsz_strs[reg.l1tlb_pgsz]; 53*291dcb98SKim Phillips ic_miss_str = ic_miss_strs[reg.ic_miss]; 54*291dcb98SKim Phillips } 55*291dcb98SKim Phillips 56*291dcb98SKim Phillips printf("ibs_fetch_ctl:\t%016llx MaxCnt %7d Cnt %7d Lat %5d En %d Val %d Comp %d%s " 57*291dcb98SKim Phillips "PhyAddrValid %d%s L1TlbMiss %d L2TlbMiss %d RandEn %d%s\n", 58*291dcb98SKim Phillips reg.val, reg.fetch_maxcnt << 4, reg.fetch_cnt << 4, reg.fetch_lat, 59*291dcb98SKim Phillips reg.fetch_en, reg.fetch_val, reg.fetch_comp, ic_miss_str ? : "", 60*291dcb98SKim Phillips reg.phy_addr_valid, l1tlb_pgsz_str ? : "", reg.l1tlb_miss, reg.l2tlb_miss, 61*291dcb98SKim Phillips reg.rand_en, reg.fetch_comp ? (reg.fetch_l2_miss ? " L2Miss 1" : " L2Miss 0") : ""); 62*291dcb98SKim Phillips } 63*291dcb98SKim Phillips 64*291dcb98SKim Phillips static void pr_ic_ibs_extd_ctl(union ic_ibs_extd_ctl reg) 65*291dcb98SKim Phillips { 66*291dcb98SKim Phillips printf("ic_ibs_ext_ctl:\t%016llx IbsItlbRefillLat %3d\n", reg.val, reg.itlb_refill_lat); 67*291dcb98SKim Phillips } 68*291dcb98SKim Phillips 69*291dcb98SKim Phillips static void pr_ibs_op_ctl(union ibs_op_ctl reg) 70*291dcb98SKim Phillips { 71*291dcb98SKim Phillips printf("ibs_op_ctl:\t%016llx MaxCnt %9d En %d Val %d CntCtl %d=%s CurCnt %9d\n", 72*291dcb98SKim Phillips reg.val, ((reg.opmaxcnt_ext << 16) | reg.opmaxcnt) << 4, reg.op_en, reg.op_val, 73*291dcb98SKim Phillips reg.cnt_ctl, reg.cnt_ctl ? "uOps" : "cycles", reg.opcurcnt); 74*291dcb98SKim Phillips } 75*291dcb98SKim Phillips 76*291dcb98SKim Phillips static void pr_ibs_op_data(union ibs_op_data reg) 77*291dcb98SKim Phillips { 78*291dcb98SKim Phillips printf("ibs_op_data:\t%016llx CompToRetCtr %5d TagToRetCtr %5d%s%s%s BrnRet %d " 79*291dcb98SKim Phillips " RipInvalid %d BrnFuse %d Microcode %d\n", 80*291dcb98SKim Phillips reg.val, reg.comp_to_ret_ctr, reg.tag_to_ret_ctr, 81*291dcb98SKim Phillips reg.op_brn_ret ? (reg.op_return ? " OpReturn 1" : " OpReturn 0") : "", 82*291dcb98SKim Phillips reg.op_brn_ret ? (reg.op_brn_taken ? " OpBrnTaken 1" : " OpBrnTaken 0") : "", 83*291dcb98SKim Phillips reg.op_brn_ret ? (reg.op_brn_misp ? " OpBrnMisp 1" : " OpBrnMisp 0") : "", 84*291dcb98SKim Phillips reg.op_brn_ret, reg.op_rip_invalid, reg.op_brn_fuse, reg.op_microcode); 85*291dcb98SKim Phillips } 86*291dcb98SKim Phillips 87*291dcb98SKim Phillips static void pr_ibs_op_data2(union ibs_op_data2 reg) 88*291dcb98SKim Phillips { 89*291dcb98SKim Phillips static const char * const data_src_str[] = { 90*291dcb98SKim Phillips "", 91*291dcb98SKim Phillips " DataSrc 1=(reserved)", 92*291dcb98SKim Phillips " DataSrc 2=Local node cache", 93*291dcb98SKim Phillips " DataSrc 3=DRAM", 94*291dcb98SKim Phillips " DataSrc 4=Remote node cache", 95*291dcb98SKim Phillips " DataSrc 5=(reserved)", 96*291dcb98SKim Phillips " DataSrc 6=(reserved)", 97*291dcb98SKim Phillips " DataSrc 7=Other" 98*291dcb98SKim Phillips }; 99*291dcb98SKim Phillips 100*291dcb98SKim Phillips printf("ibs_op_data2:\t%016llx %sRmtNode %d%s\n", reg.val, 101*291dcb98SKim Phillips reg.data_src == 2 ? (reg.cache_hit_st ? "CacheHitSt 1=O-State " 102*291dcb98SKim Phillips : "CacheHitSt 0=M-state ") : "", 103*291dcb98SKim Phillips reg.rmt_node, data_src_str[reg.data_src]); 104*291dcb98SKim Phillips } 105*291dcb98SKim Phillips 106*291dcb98SKim Phillips static void pr_ibs_op_data3(union ibs_op_data3 reg) 107*291dcb98SKim Phillips { 108*291dcb98SKim Phillips char l2_miss_str[sizeof(" L2Miss _")] = ""; 109*291dcb98SKim Phillips char op_mem_width_str[sizeof(" OpMemWidth _____ bytes")] = ""; 110*291dcb98SKim Phillips char op_dc_miss_open_mem_reqs_str[sizeof(" OpDcMissOpenMemReqs __")] = ""; 111*291dcb98SKim Phillips 112*291dcb98SKim Phillips /* 113*291dcb98SKim Phillips * Erratum #1293 114*291dcb98SKim Phillips * Ignore L2Miss and OpDcMissOpenMemReqs (and opdata2) if DcMissNoMabAlloc or SwPf set 115*291dcb98SKim Phillips */ 116*291dcb98SKim Phillips if (!(cpu_family == 0x19 && cpu_model < 0x10 && (reg.dc_miss_no_mab_alloc || reg.sw_pf))) { 117*291dcb98SKim Phillips snprintf(l2_miss_str, sizeof(l2_miss_str), " L2Miss %d", reg.l2_miss); 118*291dcb98SKim Phillips snprintf(op_dc_miss_open_mem_reqs_str, sizeof(op_dc_miss_open_mem_reqs_str), 119*291dcb98SKim Phillips " OpDcMissOpenMemReqs %2d", reg.op_dc_miss_open_mem_reqs); 120*291dcb98SKim Phillips } 121*291dcb98SKim Phillips 122*291dcb98SKim Phillips if (reg.op_mem_width) 123*291dcb98SKim Phillips snprintf(op_mem_width_str, sizeof(op_mem_width_str), 124*291dcb98SKim Phillips " OpMemWidth %2d bytes", 1 << (reg.op_mem_width - 1)); 125*291dcb98SKim Phillips 126*291dcb98SKim Phillips printf("ibs_op_data3:\t%016llx LdOp %d StOp %d DcL1TlbMiss %d DcL2TlbMiss %d " 127*291dcb98SKim Phillips "DcL1TlbHit2M %d DcL1TlbHit1G %d DcL2TlbHit2M %d DcMiss %d DcMisAcc %d " 128*291dcb98SKim Phillips "DcWcMemAcc %d DcUcMemAcc %d DcLockedOp %d DcMissNoMabAlloc %d DcLinAddrValid %d " 129*291dcb98SKim Phillips "DcPhyAddrValid %d DcL2TlbHit1G %d%s SwPf %d%s%s DcMissLat %5d TlbRefillLat %5d\n", 130*291dcb98SKim Phillips reg.val, reg.ld_op, reg.st_op, reg.dc_l1tlb_miss, reg.dc_l2tlb_miss, 131*291dcb98SKim Phillips reg.dc_l1tlb_hit_2m, reg.dc_l1tlb_hit_1g, reg.dc_l2tlb_hit_2m, reg.dc_miss, 132*291dcb98SKim Phillips reg.dc_mis_acc, reg.dc_wc_mem_acc, reg.dc_uc_mem_acc, reg.dc_locked_op, 133*291dcb98SKim Phillips reg.dc_miss_no_mab_alloc, reg.dc_lin_addr_valid, reg.dc_phy_addr_valid, 134*291dcb98SKim Phillips reg.dc_l2_tlb_hit_1g, l2_miss_str, reg.sw_pf, op_mem_width_str, 135*291dcb98SKim Phillips op_dc_miss_open_mem_reqs_str, reg.dc_miss_lat, reg.tlb_refill_lat); 136*291dcb98SKim Phillips } 137*291dcb98SKim Phillips 138*291dcb98SKim Phillips /* 139*291dcb98SKim Phillips * IBS Op/Execution MSRs always saved, in order, are: 140*291dcb98SKim Phillips * IBS_OP_CTL, IBS_OP_RIP, IBS_OP_DATA, IBS_OP_DATA2, 141*291dcb98SKim Phillips * IBS_OP_DATA3, IBS_DC_LINADDR, IBS_DC_PHYSADDR, BP_IBSTGT_RIP 142*291dcb98SKim Phillips */ 143*291dcb98SKim Phillips static void amd_dump_ibs_op(struct perf_sample *sample) 144*291dcb98SKim Phillips { 145*291dcb98SKim Phillips struct perf_ibs_data *data = sample->raw_data; 146*291dcb98SKim Phillips union ibs_op_ctl *op_ctl = (union ibs_op_ctl *)data->data; 147*291dcb98SKim Phillips __u64 *rip = (__u64 *)op_ctl + 1; 148*291dcb98SKim Phillips union ibs_op_data *op_data = (union ibs_op_data *)(rip + 1); 149*291dcb98SKim Phillips union ibs_op_data3 *op_data3 = (union ibs_op_data3 *)(rip + 3); 150*291dcb98SKim Phillips 151*291dcb98SKim Phillips pr_ibs_op_ctl(*op_ctl); 152*291dcb98SKim Phillips if (!op_data->op_rip_invalid) 153*291dcb98SKim Phillips printf("IbsOpRip:\t%016llx\n", *rip); 154*291dcb98SKim Phillips pr_ibs_op_data(*op_data); 155*291dcb98SKim Phillips /* 156*291dcb98SKim Phillips * Erratum #1293: ignore op_data2 if DcMissNoMabAlloc or SwPf are set 157*291dcb98SKim Phillips */ 158*291dcb98SKim Phillips if (!(cpu_family == 0x19 && cpu_model < 0x10 && 159*291dcb98SKim Phillips (op_data3->dc_miss_no_mab_alloc || op_data3->sw_pf))) 160*291dcb98SKim Phillips pr_ibs_op_data2(*(union ibs_op_data2 *)(rip + 2)); 161*291dcb98SKim Phillips pr_ibs_op_data3(*op_data3); 162*291dcb98SKim Phillips if (op_data3->dc_lin_addr_valid) 163*291dcb98SKim Phillips printf("IbsDCLinAd:\t%016llx\n", *(rip + 4)); 164*291dcb98SKim Phillips if (op_data3->dc_phy_addr_valid) 165*291dcb98SKim Phillips printf("IbsDCPhysAd:\t%016llx\n", *(rip + 5)); 166*291dcb98SKim Phillips if (op_data->op_brn_ret && *(rip + 6)) 167*291dcb98SKim Phillips printf("IbsBrTarget:\t%016llx\n", *(rip + 6)); 168*291dcb98SKim Phillips } 169*291dcb98SKim Phillips 170*291dcb98SKim Phillips /* 171*291dcb98SKim Phillips * IBS Fetch MSRs always saved, in order, are: 172*291dcb98SKim Phillips * IBS_FETCH_CTL, IBS_FETCH_LINADDR, IBS_FETCH_PHYSADDR, IC_IBS_EXTD_CTL 173*291dcb98SKim Phillips */ 174*291dcb98SKim Phillips static void amd_dump_ibs_fetch(struct perf_sample *sample) 175*291dcb98SKim Phillips { 176*291dcb98SKim Phillips struct perf_ibs_data *data = sample->raw_data; 177*291dcb98SKim Phillips union ibs_fetch_ctl *fetch_ctl = (union ibs_fetch_ctl *)data->data; 178*291dcb98SKim Phillips __u64 *addr = (__u64 *)fetch_ctl + 1; 179*291dcb98SKim Phillips union ic_ibs_extd_ctl *extd_ctl = (union ic_ibs_extd_ctl *)addr + 2; 180*291dcb98SKim Phillips 181*291dcb98SKim Phillips pr_ibs_fetch_ctl(*fetch_ctl); 182*291dcb98SKim Phillips printf("IbsFetchLinAd:\t%016llx\n", *addr++); 183*291dcb98SKim Phillips if (fetch_ctl->phy_addr_valid) 184*291dcb98SKim Phillips printf("IbsFetchPhysAd:\t%016llx\n", *addr); 185*291dcb98SKim Phillips pr_ic_ibs_extd_ctl(*extd_ctl); 186*291dcb98SKim Phillips } 187*291dcb98SKim Phillips 188*291dcb98SKim Phillips /* 189*291dcb98SKim Phillips * Test for enable and valid bits in captured control MSRs. 190*291dcb98SKim Phillips */ 191*291dcb98SKim Phillips static bool is_valid_ibs_fetch_sample(struct perf_sample *sample) 192*291dcb98SKim Phillips { 193*291dcb98SKim Phillips struct perf_ibs_data *data = sample->raw_data; 194*291dcb98SKim Phillips union ibs_fetch_ctl *fetch_ctl = (union ibs_fetch_ctl *)data->data; 195*291dcb98SKim Phillips 196*291dcb98SKim Phillips if (fetch_ctl->fetch_en && fetch_ctl->fetch_val) 197*291dcb98SKim Phillips return true; 198*291dcb98SKim Phillips 199*291dcb98SKim Phillips return false; 200*291dcb98SKim Phillips } 201*291dcb98SKim Phillips 202*291dcb98SKim Phillips static bool is_valid_ibs_op_sample(struct perf_sample *sample) 203*291dcb98SKim Phillips { 204*291dcb98SKim Phillips struct perf_ibs_data *data = sample->raw_data; 205*291dcb98SKim Phillips union ibs_op_ctl *op_ctl = (union ibs_op_ctl *)data->data; 206*291dcb98SKim Phillips 207*291dcb98SKim Phillips if (op_ctl->op_en && op_ctl->op_val) 208*291dcb98SKim Phillips return true; 209*291dcb98SKim Phillips 210*291dcb98SKim Phillips return false; 211*291dcb98SKim Phillips } 212*291dcb98SKim Phillips 213*291dcb98SKim Phillips /* AMD vendor specific raw sample function. Check for PERF_RECORD_SAMPLE events 214*291dcb98SKim Phillips * and if the event was triggered by IBS, display its raw data with decoded text. 215*291dcb98SKim Phillips * The function is only invoked when the dump flag -D is set. 216*291dcb98SKim Phillips */ 217*291dcb98SKim Phillips void evlist__amd_sample_raw(struct evlist *evlist, union perf_event *event, 218*291dcb98SKim Phillips struct perf_sample *sample) 219*291dcb98SKim Phillips { 220*291dcb98SKim Phillips struct evsel *evsel; 221*291dcb98SKim Phillips 222*291dcb98SKim Phillips if (event->header.type != PERF_RECORD_SAMPLE || !sample->raw_size) 223*291dcb98SKim Phillips return; 224*291dcb98SKim Phillips 225*291dcb98SKim Phillips evsel = evlist__event2evsel(evlist, event); 226*291dcb98SKim Phillips if (!evsel) 227*291dcb98SKim Phillips return; 228*291dcb98SKim Phillips 229*291dcb98SKim Phillips if (evsel->core.attr.type == ibs_fetch_type) { 230*291dcb98SKim Phillips if (!is_valid_ibs_fetch_sample(sample)) { 231*291dcb98SKim Phillips pr_debug("Invalid raw IBS Fetch MSR data encountered\n"); 232*291dcb98SKim Phillips return; 233*291dcb98SKim Phillips } 234*291dcb98SKim Phillips amd_dump_ibs_fetch(sample); 235*291dcb98SKim Phillips } else if (evsel->core.attr.type == ibs_op_type) { 236*291dcb98SKim Phillips if (!is_valid_ibs_op_sample(sample)) { 237*291dcb98SKim Phillips pr_debug("Invalid raw IBS Op MSR data encountered\n"); 238*291dcb98SKim Phillips return; 239*291dcb98SKim Phillips } 240*291dcb98SKim Phillips amd_dump_ibs_op(sample); 241*291dcb98SKim Phillips } 242*291dcb98SKim Phillips } 243*291dcb98SKim Phillips 244*291dcb98SKim Phillips static void parse_cpuid(struct perf_env *env) 245*291dcb98SKim Phillips { 246*291dcb98SKim Phillips const char *cpuid; 247*291dcb98SKim Phillips int ret; 248*291dcb98SKim Phillips 249*291dcb98SKim Phillips cpuid = perf_env__cpuid(env); 250*291dcb98SKim Phillips /* 251*291dcb98SKim Phillips * cpuid = "AuthenticAMD,family,model,stepping" 252*291dcb98SKim Phillips */ 253*291dcb98SKim Phillips ret = sscanf(cpuid, "%*[^,],%u,%u", &cpu_family, &cpu_model); 254*291dcb98SKim Phillips if (ret != 2) 255*291dcb98SKim Phillips pr_debug("problem parsing cpuid\n"); 256*291dcb98SKim Phillips } 257*291dcb98SKim Phillips 258*291dcb98SKim Phillips /* 259*291dcb98SKim Phillips * Find and assign the type number used for ibs_op or ibs_fetch samples. 260*291dcb98SKim Phillips * Device names can be large - we are only interested in the first 9 characters, 261*291dcb98SKim Phillips * to match "ibs_fetch". 262*291dcb98SKim Phillips */ 263*291dcb98SKim Phillips bool evlist__has_amd_ibs(struct evlist *evlist) 264*291dcb98SKim Phillips { 265*291dcb98SKim Phillips struct perf_env *env = evlist->env; 266*291dcb98SKim Phillips int ret, nr_pmu_mappings = perf_env__nr_pmu_mappings(env); 267*291dcb98SKim Phillips const char *pmu_mapping = perf_env__pmu_mappings(env); 268*291dcb98SKim Phillips char name[sizeof("ibs_fetch")]; 269*291dcb98SKim Phillips u32 type; 270*291dcb98SKim Phillips 271*291dcb98SKim Phillips while (nr_pmu_mappings--) { 272*291dcb98SKim Phillips ret = sscanf(pmu_mapping, "%u:%9s", &type, name); 273*291dcb98SKim Phillips if (ret == 2) { 274*291dcb98SKim Phillips if (strstarts(name, "ibs_op")) 275*291dcb98SKim Phillips ibs_op_type = type; 276*291dcb98SKim Phillips else if (strstarts(name, "ibs_fetch")) 277*291dcb98SKim Phillips ibs_fetch_type = type; 278*291dcb98SKim Phillips } 279*291dcb98SKim Phillips pmu_mapping += strlen(pmu_mapping) + 1 /* '\0' */; 280*291dcb98SKim Phillips } 281*291dcb98SKim Phillips 282*291dcb98SKim Phillips if (ibs_fetch_type || ibs_op_type) { 283*291dcb98SKim Phillips if (!cpu_family) 284*291dcb98SKim Phillips parse_cpuid(env); 285*291dcb98SKim Phillips return true; 286*291dcb98SKim Phillips } 287*291dcb98SKim Phillips 288*291dcb98SKim Phillips return false; 289*291dcb98SKim Phillips } 290