1ecd94f1bSKan Liang[
2ecd94f1bSKan Liang    {
3*49898fefSIan Rogers        "BriefDescription": "This category represents fraction of slots where the processor's Frontend undersupplies its Backend",
4*49898fefSIan Rogers        "MetricExpr": "IDQ_UOPS_NOT_DELIVERED.CORE / (4 * CPU_CLK_UNHALTED.THREAD)",
5*49898fefSIan Rogers        "MetricGroup": "TopdownL1",
6*49898fefSIan Rogers        "MetricName": "Frontend_Bound",
7*49898fefSIan Rogers        "PublicDescription": "This category represents fraction of slots where the processor's Frontend undersupplies its Backend. Frontend denotes the first part of the processor core responsible to fetch operations that are executed later on by the Backend part. Within the Frontend; a branch predictor predicts the next address to fetch; cache-lines are fetched from the memory subsystem; parsed into instructions; and lastly decoded into micro-operations (uops). Ideally the Frontend can issue Machine_Width uops every cycle to the Backend. Frontend Bound denotes unutilized issue-slots when there is no Backend stall; i.e. bubbles where Frontend delivered no uops while Backend could have accepted them. For example; stalls due to instruction-cache misses would be categorized under Frontend Bound."
8*49898fefSIan Rogers    },
9*49898fefSIan Rogers    {
10*49898fefSIan Rogers        "BriefDescription": "This category represents fraction of slots where the processor's Frontend undersupplies its Backend. SMT version; use when SMT is enabled and measuring per logical CPU.",
11*49898fefSIan Rogers        "MetricExpr": "IDQ_UOPS_NOT_DELIVERED.CORE / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))",
12*49898fefSIan Rogers        "MetricGroup": "TopdownL1_SMT",
13*49898fefSIan Rogers        "MetricName": "Frontend_Bound_SMT",
14*49898fefSIan Rogers        "PublicDescription": "This category represents fraction of slots where the processor's Frontend undersupplies its Backend. Frontend denotes the first part of the processor core responsible to fetch operations that are executed later on by the Backend part. Within the Frontend; a branch predictor predicts the next address to fetch; cache-lines are fetched from the memory subsystem; parsed into instructions; and lastly decoded into micro-operations (uops). Ideally the Frontend can issue Machine_Width uops every cycle to the Backend. Frontend Bound denotes unutilized issue-slots when there is no Backend stall; i.e. bubbles where Frontend delivered no uops while Backend could have accepted them. For example; stalls due to instruction-cache misses would be categorized under Frontend Bound. SMT version; use when SMT is enabled and measuring per logical CPU."
15*49898fefSIan Rogers    },
16*49898fefSIan Rogers    {
17*49898fefSIan Rogers        "BriefDescription": "This category represents fraction of slots wasted due to incorrect speculations",
18*49898fefSIan Rogers        "MetricExpr": "( UOPS_ISSUED.ANY - UOPS_RETIRED.RETIRE_SLOTS + 4 * INT_MISC.RECOVERY_CYCLES ) / (4 * CPU_CLK_UNHALTED.THREAD)",
19*49898fefSIan Rogers        "MetricGroup": "TopdownL1",
20*49898fefSIan Rogers        "MetricName": "Bad_Speculation",
21*49898fefSIan Rogers        "PublicDescription": "This category represents fraction of slots wasted due to incorrect speculations. This include slots used to issue uops that do not eventually get retired and slots for which the issue-pipeline was blocked due to recovery from earlier incorrect speculation. For example; wasted work due to miss-predicted branches are categorized under Bad Speculation category. Incorrect data speculation followed by Memory Ordering Nukes is another example."
22*49898fefSIan Rogers    },
23*49898fefSIan Rogers    {
24*49898fefSIan Rogers        "BriefDescription": "This category represents fraction of slots wasted due to incorrect speculations. SMT version; use when SMT is enabled and measuring per logical CPU.",
25*49898fefSIan Rogers        "MetricExpr": "( UOPS_ISSUED.ANY - UOPS_RETIRED.RETIRE_SLOTS + 4 * ( INT_MISC.RECOVERY_CYCLES_ANY / 2 ) ) / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))",
26*49898fefSIan Rogers        "MetricGroup": "TopdownL1_SMT",
27*49898fefSIan Rogers        "MetricName": "Bad_Speculation_SMT",
28*49898fefSIan Rogers        "PublicDescription": "This category represents fraction of slots wasted due to incorrect speculations. This include slots used to issue uops that do not eventually get retired and slots for which the issue-pipeline was blocked due to recovery from earlier incorrect speculation. For example; wasted work due to miss-predicted branches are categorized under Bad Speculation category. Incorrect data speculation followed by Memory Ordering Nukes is another example. SMT version; use when SMT is enabled and measuring per logical CPU."
29*49898fefSIan Rogers    },
30*49898fefSIan Rogers    {
31*49898fefSIan Rogers        "BriefDescription": "This category represents fraction of slots where no uops are being delivered due to a lack of required resources for accepting new uops in the Backend",
32*49898fefSIan Rogers        "MetricConstraint": "NO_NMI_WATCHDOG",
33*49898fefSIan Rogers        "MetricExpr": "1 - (IDQ_UOPS_NOT_DELIVERED.CORE / (4 * CPU_CLK_UNHALTED.THREAD)) - ( UOPS_ISSUED.ANY + 4 * INT_MISC.RECOVERY_CYCLES ) / (4 * CPU_CLK_UNHALTED.THREAD)",
34*49898fefSIan Rogers        "MetricGroup": "TopdownL1",
35*49898fefSIan Rogers        "MetricName": "Backend_Bound",
36*49898fefSIan Rogers        "PublicDescription": "This category represents fraction of slots where no uops are being delivered due to a lack of required resources for accepting new uops in the Backend. Backend is the portion of the processor core where the out-of-order scheduler dispatches ready uops into their respective execution units; and once completed these uops get retired according to program order. For example; stalls due to data-cache misses or stalls due to the divider unit being overloaded are both categorized under Backend Bound. Backend Bound is further divided into two main categories: Memory Bound and Core Bound."
37*49898fefSIan Rogers    },
38*49898fefSIan Rogers    {
39*49898fefSIan Rogers        "BriefDescription": "This category represents fraction of slots where no uops are being delivered due to a lack of required resources for accepting new uops in the Backend. SMT version; use when SMT is enabled and measuring per logical CPU.",
40*49898fefSIan Rogers        "MetricExpr": "1 - (IDQ_UOPS_NOT_DELIVERED.CORE / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))) - ( UOPS_ISSUED.ANY + 4 * ( INT_MISC.RECOVERY_CYCLES_ANY / 2 ) ) / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))",
41*49898fefSIan Rogers        "MetricGroup": "TopdownL1_SMT",
42*49898fefSIan Rogers        "MetricName": "Backend_Bound_SMT",
43*49898fefSIan Rogers        "PublicDescription": "This category represents fraction of slots where no uops are being delivered due to a lack of required resources for accepting new uops in the Backend. Backend is the portion of the processor core where the out-of-order scheduler dispatches ready uops into their respective execution units; and once completed these uops get retired according to program order. For example; stalls due to data-cache misses or stalls due to the divider unit being overloaded are both categorized under Backend Bound. Backend Bound is further divided into two main categories: Memory Bound and Core Bound. SMT version; use when SMT is enabled and measuring per logical CPU."
44*49898fefSIan Rogers    },
45*49898fefSIan Rogers    {
46*49898fefSIan Rogers        "BriefDescription": "This category represents fraction of slots utilized by useful work i.e. issued uops that eventually get retired",
47*49898fefSIan Rogers        "MetricExpr": "UOPS_RETIRED.RETIRE_SLOTS / (4 * CPU_CLK_UNHALTED.THREAD)",
48*49898fefSIan Rogers        "MetricGroup": "TopdownL1",
49*49898fefSIan Rogers        "MetricName": "Retiring",
50*49898fefSIan Rogers        "PublicDescription": "This category represents fraction of slots utilized by useful work i.e. issued uops that eventually get retired. Ideally; all pipeline slots would be attributed to the Retiring category.  Retiring of 100% would indicate the maximum Pipeline_Width throughput was achieved.  Maximizing Retiring typically increases the Instructions-per-cycle (see IPC metric). Note that a high Retiring value does not necessary mean there is no room for more performance.  For example; Heavy-operations or Microcode Assists are categorized under Retiring. They often indicate suboptimal performance and can often be optimized or avoided. "
51*49898fefSIan Rogers    },
52*49898fefSIan Rogers    {
53*49898fefSIan Rogers        "BriefDescription": "This category represents fraction of slots utilized by useful work i.e. issued uops that eventually get retired. SMT version; use when SMT is enabled and measuring per logical CPU.",
54*49898fefSIan Rogers        "MetricExpr": "UOPS_RETIRED.RETIRE_SLOTS / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))",
55*49898fefSIan Rogers        "MetricGroup": "TopdownL1_SMT",
56*49898fefSIan Rogers        "MetricName": "Retiring_SMT",
57*49898fefSIan Rogers        "PublicDescription": "This category represents fraction of slots utilized by useful work i.e. issued uops that eventually get retired. Ideally; all pipeline slots would be attributed to the Retiring category.  Retiring of 100% would indicate the maximum Pipeline_Width throughput was achieved.  Maximizing Retiring typically increases the Instructions-per-cycle (see IPC metric). Note that a high Retiring value does not necessary mean there is no room for more performance.  For example; Heavy-operations or Microcode Assists are categorized under Retiring. They often indicate suboptimal performance and can often be optimized or avoided. SMT version; use when SMT is enabled and measuring per logical CPU."
58*49898fefSIan Rogers    },
59*49898fefSIan Rogers    {
60*49898fefSIan Rogers        "BriefDescription": "Total pipeline cost of Branch Misprediction related bottlenecks",
61*49898fefSIan Rogers        "MetricExpr": "100 * ( ((BR_MISP_RETIRED.ALL_BRANCHES / ( BR_MISP_RETIRED.ALL_BRANCHES + MACHINE_CLEARS.COUNT )) * (( UOPS_ISSUED.ANY - UOPS_RETIRED.RETIRE_SLOTS + 4 * INT_MISC.RECOVERY_CYCLES ) / (4 * CPU_CLK_UNHALTED.THREAD))) + (4 * IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE / (4 * CPU_CLK_UNHALTED.THREAD)) * ((BR_MISP_RETIRED.ALL_BRANCHES / ( BR_MISP_RETIRED.ALL_BRANCHES + MACHINE_CLEARS.COUNT )) * INT_MISC.CLEAR_RESTEER_CYCLES / CPU_CLK_UNHALTED.THREAD) / #(4 * IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE / (4 * CPU_CLK_UNHALTED.THREAD)) )",
62*49898fefSIan Rogers        "MetricGroup": "Bad;BadSpec;BrMispredicts",
63*49898fefSIan Rogers        "MetricName": "Mispredictions"
64*49898fefSIan Rogers    },
65*49898fefSIan Rogers    {
66*49898fefSIan Rogers        "BriefDescription": "Total pipeline cost of Branch Misprediction related bottlenecks",
67*49898fefSIan Rogers        "MetricExpr": "100 * ( ((BR_MISP_RETIRED.ALL_BRANCHES / ( BR_MISP_RETIRED.ALL_BRANCHES + MACHINE_CLEARS.COUNT )) * (( UOPS_ISSUED.ANY - UOPS_RETIRED.RETIRE_SLOTS + 4 * ( INT_MISC.RECOVERY_CYCLES_ANY / 2 ) ) / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )))) + (4 * IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))) * ((BR_MISP_RETIRED.ALL_BRANCHES / ( BR_MISP_RETIRED.ALL_BRANCHES + MACHINE_CLEARS.COUNT )) * INT_MISC.CLEAR_RESTEER_CYCLES / CPU_CLK_UNHALTED.THREAD) / #(4 * IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))) )",
68*49898fefSIan Rogers        "MetricGroup": "Bad;BadSpec;BrMispredicts_SMT",
69*49898fefSIan Rogers        "MetricName": "Mispredictions_SMT"
70*49898fefSIan Rogers    },
71*49898fefSIan Rogers    {
72*49898fefSIan Rogers        "BriefDescription": "Total pipeline cost of (external) Memory Bandwidth related bottlenecks",
73*49898fefSIan Rogers        "MetricExpr": "100 * ((( CYCLE_ACTIVITY.STALLS_MEM_ANY + EXE_ACTIVITY.BOUND_ON_STORES ) / (CYCLE_ACTIVITY.STALLS_TOTAL + (EXE_ACTIVITY.1_PORTS_UTIL + (UOPS_RETIRED.RETIRE_SLOTS / (4 * CPU_CLK_UNHALTED.THREAD)) * EXE_ACTIVITY.2_PORTS_UTIL) + EXE_ACTIVITY.BOUND_ON_STORES)) * (1 - (IDQ_UOPS_NOT_DELIVERED.CORE / (4 * CPU_CLK_UNHALTED.THREAD)) - ( UOPS_ISSUED.ANY + 4 * INT_MISC.RECOVERY_CYCLES ) / (4 * CPU_CLK_UNHALTED.THREAD))) * ( ( ( (CYCLE_ACTIVITY.STALLS_L3_MISS / CPU_CLK_UNHALTED.THREAD + (( CYCLE_ACTIVITY.STALLS_L1D_MISS - CYCLE_ACTIVITY.STALLS_L2_MISS ) / CPU_CLK_UNHALTED.THREAD) - (( (MEM_LOAD_RETIRED.L2_HIT * ( 1 + (MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS) )) / ( (MEM_LOAD_RETIRED.L2_HIT * ( 1 + (MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS) )) + cpu@L1D_PEND_MISS.FB_FULL\\,cmask\\=1@ ) ) * (( CYCLE_ACTIVITY.STALLS_L1D_MISS - CYCLE_ACTIVITY.STALLS_L2_MISS ) / CPU_CLK_UNHALTED.THREAD))) - ( ( ( 1 - ( ( 19 * (MEM_LOAD_L3_MISS_RETIRED.REMOTE_DRAM * ( 1 + (MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS) )) + 10 * ( (MEM_LOAD_L3_MISS_RETIRED.LOCAL_DRAM * ( 1 + (MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS) )) + (MEM_LOAD_L3_MISS_RETIRED.REMOTE_FWD * ( 1 + (MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS) )) + (MEM_LOAD_L3_MISS_RETIRED.REMOTE_HITM * ( 1 + (MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS) )) ) ) / ( ( 19 * (MEM_LOAD_L3_MISS_RETIRED.REMOTE_DRAM * ( 1 + (MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS) )) + 10 * ( (MEM_LOAD_L3_MISS_RETIRED.LOCAL_DRAM * ( 1 + (MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS) )) + (MEM_LOAD_L3_MISS_RETIRED.REMOTE_FWD * ( 1 + (MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS) )) + (MEM_LOAD_L3_MISS_RETIRED.REMOTE_HITM * ( 1 + (MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS) )) ) ) + ( 25 * ( MEM_LOAD_RETIRED.LOCAL_PMM * ( 1 + (MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS) ) ) + 33 * ( MEM_LOAD_L3_MISS_RETIRED.REMOTE_PMM * ( 1 + (MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS) ) ) ) ) ) ) * (CYCLE_ACTIVITY.STALLS_L3_MISS / CPU_CLK_UNHALTED.THREAD + (( CYCLE_ACTIVITY.STALLS_L1D_MISS - CYCLE_ACTIVITY.STALLS_L2_MISS ) / CPU_CLK_UNHALTED.THREAD) - (( (MEM_LOAD_RETIRED.L2_HIT * ( 1 + (MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS) )) / ( (MEM_LOAD_RETIRED.L2_HIT * ( 1 + (MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS) )) + cpu@L1D_PEND_MISS.FB_FULL\\,cmask\\=1@ ) ) * (( CYCLE_ACTIVITY.STALLS_L1D_MISS - CYCLE_ACTIVITY.STALLS_L2_MISS ) / CPU_CLK_UNHALTED.THREAD))) ) if ( 1000000 * ( MEM_LOAD_L3_MISS_RETIRED.REMOTE_PMM + MEM_LOAD_RETIRED.LOCAL_PMM ) > MEM_LOAD_RETIRED.L1_MISS ) else 0 ) ) / #((( CYCLE_ACTIVITY.STALLS_MEM_ANY + EXE_ACTIVITY.BOUND_ON_STORES ) / (CYCLE_ACTIVITY.STALLS_TOTAL + (EXE_ACTIVITY.1_PORTS_UTIL + (UOPS_RETIRED.RETIRE_SLOTS / (4 * CPU_CLK_UNHALTED.THREAD)) * EXE_ACTIVITY.2_PORTS_UTIL) + EXE_ACTIVITY.BOUND_ON_STORES)) * (1 - (IDQ_UOPS_NOT_DELIVERED.CORE / (4 * CPU_CLK_UNHALTED.THREAD)) - ( UOPS_ISSUED.ANY + 4 * INT_MISC.RECOVERY_CYCLES ) / (4 * CPU_CLK_UNHALTED.THREAD))) ) * ( (min( CPU_CLK_UNHALTED.THREAD , cpu@OFFCORE_REQUESTS_OUTSTANDING.ALL_DATA_RD\\,cmask\\=4@ ) / CPU_CLK_UNHALTED.THREAD) / #( (CYCLE_ACTIVITY.STALLS_L3_MISS / CPU_CLK_UNHALTED.THREAD + (( CYCLE_ACTIVITY.STALLS_L1D_MISS - CYCLE_ACTIVITY.STALLS_L2_MISS ) / CPU_CLK_UNHALTED.THREAD) - (( (MEM_LOAD_RETIRED.L2_HIT * ( 1 + (MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS) )) / ( (MEM_LOAD_RETIRED.L2_HIT * ( 1 + (MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS) )) + cpu@L1D_PEND_MISS.FB_FULL\\,cmask\\=1@ ) ) * (( CYCLE_ACTIVITY.STALLS_L1D_MISS - CYCLE_ACTIVITY.STALLS_L2_MISS ) / CPU_CLK_UNHALTED.THREAD))) - ( ( ( 1 - ( ( 19 * (MEM_LOAD_L3_MISS_RETIRED.REMOTE_DRAM * ( 1 + (MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS) )) + 10 * ( (MEM_LOAD_L3_MISS_RETIRED.LOCAL_DRAM * ( 1 + (MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS) )) + (MEM_LOAD_L3_MISS_RETIRED.REMOTE_FWD * ( 1 + (MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS) )) + (MEM_LOAD_L3_MISS_RETIRED.REMOTE_HITM * ( 1 + (MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS) )) ) ) / ( ( 19 * (MEM_LOAD_L3_MISS_RETIRED.REMOTE_DRAM * ( 1 + (MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS) )) + 10 * ( (MEM_LOAD_L3_MISS_RETIRED.LOCAL_DRAM * ( 1 + (MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS) )) + (MEM_LOAD_L3_MISS_RETIRED.REMOTE_FWD * ( 1 + (MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS) )) + (MEM_LOAD_L3_MISS_RETIRED.REMOTE_HITM * ( 1 + (MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS) )) ) ) + ( 25 * ( MEM_LOAD_RETIRED.LOCAL_PMM * ( 1 + (MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS) ) ) + 33 * ( MEM_LOAD_L3_MISS_RETIRED.REMOTE_PMM * ( 1 + (MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS) ) ) ) ) ) ) * (CYCLE_ACTIVITY.STALLS_L3_MISS / CPU_CLK_UNHALTED.THREAD + (( CYCLE_ACTIVITY.STALLS_L1D_MISS - CYCLE_ACTIVITY.STALLS_L2_MISS ) / CPU_CLK_UNHALTED.THREAD) - (( (MEM_LOAD_RETIRED.L2_HIT * ( 1 + (MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS) )) / ( (MEM_LOAD_RETIRED.L2_HIT * ( 1 + (MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS) )) + cpu@L1D_PEND_MISS.FB_FULL\\,cmask\\=1@ ) ) * (( CYCLE_ACTIVITY.STALLS_L1D_MISS - CYCLE_ACTIVITY.STALLS_L2_MISS ) / CPU_CLK_UNHALTED.THREAD))) ) if ( 1000000 * ( MEM_LOAD_L3_MISS_RETIRED.REMOTE_PMM + MEM_LOAD_RETIRED.LOCAL_PMM ) > MEM_LOAD_RETIRED.L1_MISS ) else 0 ) ) ) + ( (( CYCLE_ACTIVITY.STALLS_L2_MISS - CYCLE_ACTIVITY.STALLS_L3_MISS ) / CPU_CLK_UNHALTED.THREAD) / #((( CYCLE_ACTIVITY.STALLS_MEM_ANY + EXE_ACTIVITY.BOUND_ON_STORES ) / (CYCLE_ACTIVITY.STALLS_TOTAL + (EXE_ACTIVITY.1_PORTS_UTIL + (UOPS_RETIRED.RETIRE_SLOTS / (4 * CPU_CLK_UNHALTED.THREAD)) * EXE_ACTIVITY.2_PORTS_UTIL) + EXE_ACTIVITY.BOUND_ON_STORES)) * (1 - (IDQ_UOPS_NOT_DELIVERED.CORE / (4 * CPU_CLK_UNHALTED.THREAD)) - ( UOPS_ISSUED.ANY + 4 * INT_MISC.RECOVERY_CYCLES ) / (4 * CPU_CLK_UNHALTED.THREAD))) ) * ( (OFFCORE_REQUESTS_BUFFER.SQ_FULL / CPU_CLK_UNHALTED.THREAD) / #(( CYCLE_ACTIVITY.STALLS_L2_MISS - CYCLE_ACTIVITY.STALLS_L3_MISS ) / CPU_CLK_UNHALTED.THREAD) ) ) + ( (max( ( CYCLE_ACTIVITY.STALLS_MEM_ANY - CYCLE_ACTIVITY.STALLS_L1D_MISS ) / CPU_CLK_UNHALTED.THREAD , 0 )) / #((( CYCLE_ACTIVITY.STALLS_MEM_ANY + EXE_ACTIVITY.BOUND_ON_STORES ) / (CYCLE_ACTIVITY.STALLS_TOTAL + (EXE_ACTIVITY.1_PORTS_UTIL + (UOPS_RETIRED.RETIRE_SLOTS / (4 * CPU_CLK_UNHALTED.THREAD)) * EXE_ACTIVITY.2_PORTS_UTIL) + EXE_ACTIVITY.BOUND_ON_STORES)) * (1 - (IDQ_UOPS_NOT_DELIVERED.CORE / (4 * CPU_CLK_UNHALTED.THREAD)) - ( UOPS_ISSUED.ANY + 4 * INT_MISC.RECOVERY_CYCLES ) / (4 * CPU_CLK_UNHALTED.THREAD))) ) * ( ((L1D_PEND_MISS.PENDING / ( MEM_LOAD_RETIRED.L1_MISS + MEM_LOAD_RETIRED.FB_HIT )) * cpu@L1D_PEND_MISS.FB_FULL\\,cmask\\=1@ / CPU_CLK_UNHALTED.THREAD) / #(max( ( CYCLE_ACTIVITY.STALLS_MEM_ANY - CYCLE_ACTIVITY.STALLS_L1D_MISS ) / CPU_CLK_UNHALTED.THREAD , 0 )) ) ",
74*49898fefSIan Rogers        "MetricGroup": "Mem;MemoryBW;Offcore",
75*49898fefSIan Rogers        "MetricName": "Memory_Bandwidth"
76*49898fefSIan Rogers    },
77*49898fefSIan Rogers    {
78*49898fefSIan Rogers        "BriefDescription": "Total pipeline cost of (external) Memory Bandwidth related bottlenecks",
79*49898fefSIan Rogers        "MetricExpr": "100 * ((( CYCLE_ACTIVITY.STALLS_MEM_ANY + EXE_ACTIVITY.BOUND_ON_STORES ) / (CYCLE_ACTIVITY.STALLS_TOTAL + (EXE_ACTIVITY.1_PORTS_UTIL + (UOPS_RETIRED.RETIRE_SLOTS / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))) * EXE_ACTIVITY.2_PORTS_UTIL) + EXE_ACTIVITY.BOUND_ON_STORES)) * (1 - (IDQ_UOPS_NOT_DELIVERED.CORE / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))) - ( UOPS_ISSUED.ANY + 4 * ( INT_MISC.RECOVERY_CYCLES_ANY / 2 ) ) / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )))) * ( ( ( (CYCLE_ACTIVITY.STALLS_L3_MISS / CPU_CLK_UNHALTED.THREAD + (( CYCLE_ACTIVITY.STALLS_L1D_MISS - CYCLE_ACTIVITY.STALLS_L2_MISS ) / CPU_CLK_UNHALTED.THREAD) - (( (MEM_LOAD_RETIRED.L2_HIT * ( 1 + (MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS) )) / ( (MEM_LOAD_RETIRED.L2_HIT * ( 1 + (MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS) )) + cpu@L1D_PEND_MISS.FB_FULL\\,cmask\\=1@ ) ) * (( CYCLE_ACTIVITY.STALLS_L1D_MISS - CYCLE_ACTIVITY.STALLS_L2_MISS ) / CPU_CLK_UNHALTED.THREAD))) - ( ( ( 1 - ( ( 19 * (MEM_LOAD_L3_MISS_RETIRED.REMOTE_DRAM * ( 1 + (MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS) )) + 10 * ( (MEM_LOAD_L3_MISS_RETIRED.LOCAL_DRAM * ( 1 + (MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS) )) + (MEM_LOAD_L3_MISS_RETIRED.REMOTE_FWD * ( 1 + (MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS) )) + (MEM_LOAD_L3_MISS_RETIRED.REMOTE_HITM * ( 1 + (MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS) )) ) ) / ( ( 19 * (MEM_LOAD_L3_MISS_RETIRED.REMOTE_DRAM * ( 1 + (MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS) )) + 10 * ( (MEM_LOAD_L3_MISS_RETIRED.LOCAL_DRAM * ( 1 + (MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS) )) + (MEM_LOAD_L3_MISS_RETIRED.REMOTE_FWD * ( 1 + (MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS) )) + (MEM_LOAD_L3_MISS_RETIRED.REMOTE_HITM * ( 1 + (MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS) )) ) ) + ( 25 * ( MEM_LOAD_RETIRED.LOCAL_PMM * ( 1 + (MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS) ) ) + 33 * ( MEM_LOAD_L3_MISS_RETIRED.REMOTE_PMM * ( 1 + (MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS) ) ) ) ) ) ) * (CYCLE_ACTIVITY.STALLS_L3_MISS / CPU_CLK_UNHALTED.THREAD + (( CYCLE_ACTIVITY.STALLS_L1D_MISS - CYCLE_ACTIVITY.STALLS_L2_MISS ) / CPU_CLK_UNHALTED.THREAD) - (( (MEM_LOAD_RETIRED.L2_HIT * ( 1 + (MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS) )) / ( (MEM_LOAD_RETIRED.L2_HIT * ( 1 + (MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS) )) + cpu@L1D_PEND_MISS.FB_FULL\\,cmask\\=1@ ) ) * (( CYCLE_ACTIVITY.STALLS_L1D_MISS - CYCLE_ACTIVITY.STALLS_L2_MISS ) / CPU_CLK_UNHALTED.THREAD))) ) if ( 1000000 * ( MEM_LOAD_L3_MISS_RETIRED.REMOTE_PMM + MEM_LOAD_RETIRED.LOCAL_PMM ) > MEM_LOAD_RETIRED.L1_MISS ) else 0 ) ) / #((( CYCLE_ACTIVITY.STALLS_MEM_ANY + EXE_ACTIVITY.BOUND_ON_STORES ) / (CYCLE_ACTIVITY.STALLS_TOTAL + (EXE_ACTIVITY.1_PORTS_UTIL + (UOPS_RETIRED.RETIRE_SLOTS / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))) * EXE_ACTIVITY.2_PORTS_UTIL) + EXE_ACTIVITY.BOUND_ON_STORES)) * (1 - (IDQ_UOPS_NOT_DELIVERED.CORE / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))) - ( UOPS_ISSUED.ANY + 4 * ( INT_MISC.RECOVERY_CYCLES_ANY / 2 ) ) / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )))) ) * ( (min( CPU_CLK_UNHALTED.THREAD , cpu@OFFCORE_REQUESTS_OUTSTANDING.ALL_DATA_RD\\,cmask\\=4@ ) / CPU_CLK_UNHALTED.THREAD) / #( (CYCLE_ACTIVITY.STALLS_L3_MISS / CPU_CLK_UNHALTED.THREAD + (( CYCLE_ACTIVITY.STALLS_L1D_MISS - CYCLE_ACTIVITY.STALLS_L2_MISS ) / CPU_CLK_UNHALTED.THREAD) - (( (MEM_LOAD_RETIRED.L2_HIT * ( 1 + (MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS) )) / ( (MEM_LOAD_RETIRED.L2_HIT * ( 1 + (MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS) )) + cpu@L1D_PEND_MISS.FB_FULL\\,cmask\\=1@ ) ) * (( CYCLE_ACTIVITY.STALLS_L1D_MISS - CYCLE_ACTIVITY.STALLS_L2_MISS ) / CPU_CLK_UNHALTED.THREAD))) - ( ( ( 1 - ( ( 19 * (MEM_LOAD_L3_MISS_RETIRED.REMOTE_DRAM * ( 1 + (MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS) )) + 10 * ( (MEM_LOAD_L3_MISS_RETIRED.LOCAL_DRAM * ( 1 + (MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS) )) + (MEM_LOAD_L3_MISS_RETIRED.REMOTE_FWD * ( 1 + (MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS) )) + (MEM_LOAD_L3_MISS_RETIRED.REMOTE_HITM * ( 1 + (MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS) )) ) ) / ( ( 19 * (MEM_LOAD_L3_MISS_RETIRED.REMOTE_DRAM * ( 1 + (MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS) )) + 10 * ( (MEM_LOAD_L3_MISS_RETIRED.LOCAL_DRAM * ( 1 + (MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS) )) + (MEM_LOAD_L3_MISS_RETIRED.REMOTE_FWD * ( 1 + (MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS) )) + (MEM_LOAD_L3_MISS_RETIRED.REMOTE_HITM * ( 1 + (MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS) )) ) ) + ( 25 * ( MEM_LOAD_RETIRED.LOCAL_PMM * ( 1 + (MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS) ) ) + 33 * ( MEM_LOAD_L3_MISS_RETIRED.REMOTE_PMM * ( 1 + (MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS) ) ) ) ) ) ) * (CYCLE_ACTIVITY.STALLS_L3_MISS / CPU_CLK_UNHALTED.THREAD + (( CYCLE_ACTIVITY.STALLS_L1D_MISS - CYCLE_ACTIVITY.STALLS_L2_MISS ) / CPU_CLK_UNHALTED.THREAD) - (( (MEM_LOAD_RETIRED.L2_HIT * ( 1 + (MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS) )) / ( (MEM_LOAD_RETIRED.L2_HIT * ( 1 + (MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS) )) + cpu@L1D_PEND_MISS.FB_FULL\\,cmask\\=1@ ) ) * (( CYCLE_ACTIVITY.STALLS_L1D_MISS - CYCLE_ACTIVITY.STALLS_L2_MISS ) / CPU_CLK_UNHALTED.THREAD))) ) if ( 1000000 * ( MEM_LOAD_L3_MISS_RETIRED.REMOTE_PMM + MEM_LOAD_RETIRED.LOCAL_PMM ) > MEM_LOAD_RETIRED.L1_MISS ) else 0 ) ) ) + ( (( CYCLE_ACTIVITY.STALLS_L2_MISS - CYCLE_ACTIVITY.STALLS_L3_MISS ) / CPU_CLK_UNHALTED.THREAD) / #((( CYCLE_ACTIVITY.STALLS_MEM_ANY + EXE_ACTIVITY.BOUND_ON_STORES ) / (CYCLE_ACTIVITY.STALLS_TOTAL + (EXE_ACTIVITY.1_PORTS_UTIL + (UOPS_RETIRED.RETIRE_SLOTS / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))) * EXE_ACTIVITY.2_PORTS_UTIL) + EXE_ACTIVITY.BOUND_ON_STORES)) * (1 - (IDQ_UOPS_NOT_DELIVERED.CORE / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))) - ( UOPS_ISSUED.ANY + 4 * ( INT_MISC.RECOVERY_CYCLES_ANY / 2 ) ) / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )))) ) * ( (( OFFCORE_REQUESTS_BUFFER.SQ_FULL / 2 ) / ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )) / #(( CYCLE_ACTIVITY.STALLS_L2_MISS - CYCLE_ACTIVITY.STALLS_L3_MISS ) / CPU_CLK_UNHALTED.THREAD) ) ) + ( (max( ( CYCLE_ACTIVITY.STALLS_MEM_ANY - CYCLE_ACTIVITY.STALLS_L1D_MISS ) / CPU_CLK_UNHALTED.THREAD , 0 )) / #((( CYCLE_ACTIVITY.STALLS_MEM_ANY + EXE_ACTIVITY.BOUND_ON_STORES ) / (CYCLE_ACTIVITY.STALLS_TOTAL + (EXE_ACTIVITY.1_PORTS_UTIL + (UOPS_RETIRED.RETIRE_SLOTS / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))) * EXE_ACTIVITY.2_PORTS_UTIL) + EXE_ACTIVITY.BOUND_ON_STORES)) * (1 - (IDQ_UOPS_NOT_DELIVERED.CORE / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))) - ( UOPS_ISSUED.ANY + 4 * ( INT_MISC.RECOVERY_CYCLES_ANY / 2 ) ) / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )))) ) * ( ((L1D_PEND_MISS.PENDING / ( MEM_LOAD_RETIRED.L1_MISS + MEM_LOAD_RETIRED.FB_HIT )) * cpu@L1D_PEND_MISS.FB_FULL\\,cmask\\=1@ / CPU_CLK_UNHALTED.THREAD) / #(max( ( CYCLE_ACTIVITY.STALLS_MEM_ANY - CYCLE_ACTIVITY.STALLS_L1D_MISS ) / CPU_CLK_UNHALTED.THREAD , 0 )) ) ",
80*49898fefSIan Rogers        "MetricGroup": "Mem;MemoryBW;Offcore_SMT",
81*49898fefSIan Rogers        "MetricName": "Memory_Bandwidth_SMT"
82*49898fefSIan Rogers    },
83*49898fefSIan Rogers    {
84*49898fefSIan Rogers        "BriefDescription": "Total pipeline cost of Memory Latency related bottlenecks (external memory and off-core caches)",
85*49898fefSIan Rogers        "MetricExpr": "100 * ((( CYCLE_ACTIVITY.STALLS_MEM_ANY + EXE_ACTIVITY.BOUND_ON_STORES ) / (CYCLE_ACTIVITY.STALLS_TOTAL + (EXE_ACTIVITY.1_PORTS_UTIL + (UOPS_RETIRED.RETIRE_SLOTS / (4 * CPU_CLK_UNHALTED.THREAD)) * EXE_ACTIVITY.2_PORTS_UTIL) + EXE_ACTIVITY.BOUND_ON_STORES)) * (1 - (IDQ_UOPS_NOT_DELIVERED.CORE / (4 * CPU_CLK_UNHALTED.THREAD)) - ( UOPS_ISSUED.ANY + 4 * INT_MISC.RECOVERY_CYCLES ) / (4 * CPU_CLK_UNHALTED.THREAD))) * ( ( ( (CYCLE_ACTIVITY.STALLS_L3_MISS / CPU_CLK_UNHALTED.THREAD + (( CYCLE_ACTIVITY.STALLS_L1D_MISS - CYCLE_ACTIVITY.STALLS_L2_MISS ) / CPU_CLK_UNHALTED.THREAD) - (( (MEM_LOAD_RETIRED.L2_HIT * ( 1 + (MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS) )) / ( (MEM_LOAD_RETIRED.L2_HIT * ( 1 + (MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS) )) + cpu@L1D_PEND_MISS.FB_FULL\\,cmask\\=1@ ) ) * (( CYCLE_ACTIVITY.STALLS_L1D_MISS - CYCLE_ACTIVITY.STALLS_L2_MISS ) / CPU_CLK_UNHALTED.THREAD))) - ( ( ( 1 - ( ( 19 * (MEM_LOAD_L3_MISS_RETIRED.REMOTE_DRAM * ( 1 + (MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS) )) + 10 * ( (MEM_LOAD_L3_MISS_RETIRED.LOCAL_DRAM * ( 1 + (MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS) )) + (MEM_LOAD_L3_MISS_RETIRED.REMOTE_FWD * ( 1 + (MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS) )) + (MEM_LOAD_L3_MISS_RETIRED.REMOTE_HITM * ( 1 + (MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS) )) ) ) / ( ( 19 * (MEM_LOAD_L3_MISS_RETIRED.REMOTE_DRAM * ( 1 + (MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS) )) + 10 * ( (MEM_LOAD_L3_MISS_RETIRED.LOCAL_DRAM * ( 1 + (MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS) )) + (MEM_LOAD_L3_MISS_RETIRED.REMOTE_FWD * ( 1 + (MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS) )) + (MEM_LOAD_L3_MISS_RETIRED.REMOTE_HITM * ( 1 + (MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS) )) ) ) + ( 25 * ( MEM_LOAD_RETIRED.LOCAL_PMM * ( 1 + (MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS) ) ) + 33 * ( MEM_LOAD_L3_MISS_RETIRED.REMOTE_PMM * ( 1 + (MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS) ) ) ) ) ) ) * (CYCLE_ACTIVITY.STALLS_L3_MISS / CPU_CLK_UNHALTED.THREAD + (( CYCLE_ACTIVITY.STALLS_L1D_MISS - CYCLE_ACTIVITY.STALLS_L2_MISS ) / CPU_CLK_UNHALTED.THREAD) - (( (MEM_LOAD_RETIRED.L2_HIT * ( 1 + (MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS) )) / ( (MEM_LOAD_RETIRED.L2_HIT * ( 1 + (MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS) )) + cpu@L1D_PEND_MISS.FB_FULL\\,cmask\\=1@ ) ) * (( CYCLE_ACTIVITY.STALLS_L1D_MISS - CYCLE_ACTIVITY.STALLS_L2_MISS ) / CPU_CLK_UNHALTED.THREAD))) ) if ( 1000000 * ( MEM_LOAD_L3_MISS_RETIRED.REMOTE_PMM + MEM_LOAD_RETIRED.LOCAL_PMM ) > MEM_LOAD_RETIRED.L1_MISS ) else 0 ) ) / #((( CYCLE_ACTIVITY.STALLS_MEM_ANY + EXE_ACTIVITY.BOUND_ON_STORES ) / (CYCLE_ACTIVITY.STALLS_TOTAL + (EXE_ACTIVITY.1_PORTS_UTIL + (UOPS_RETIRED.RETIRE_SLOTS / (4 * CPU_CLK_UNHALTED.THREAD)) * EXE_ACTIVITY.2_PORTS_UTIL) + EXE_ACTIVITY.BOUND_ON_STORES)) * (1 - (IDQ_UOPS_NOT_DELIVERED.CORE / (4 * CPU_CLK_UNHALTED.THREAD)) - ( UOPS_ISSUED.ANY + 4 * INT_MISC.RECOVERY_CYCLES ) / (4 * CPU_CLK_UNHALTED.THREAD))) ) * ( (min( CPU_CLK_UNHALTED.THREAD , OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DATA_RD ) / CPU_CLK_UNHALTED.THREAD - (min( CPU_CLK_UNHALTED.THREAD , cpu@OFFCORE_REQUESTS_OUTSTANDING.ALL_DATA_RD\\,cmask\\=4@ ) / CPU_CLK_UNHALTED.THREAD)) / #( (CYCLE_ACTIVITY.STALLS_L3_MISS / CPU_CLK_UNHALTED.THREAD + (( CYCLE_ACTIVITY.STALLS_L1D_MISS - CYCLE_ACTIVITY.STALLS_L2_MISS ) / CPU_CLK_UNHALTED.THREAD) - (( (MEM_LOAD_RETIRED.L2_HIT * ( 1 + (MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS) )) / ( (MEM_LOAD_RETIRED.L2_HIT * ( 1 + (MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS) )) + cpu@L1D_PEND_MISS.FB_FULL\\,cmask\\=1@ ) ) * (( CYCLE_ACTIVITY.STALLS_L1D_MISS - CYCLE_ACTIVITY.STALLS_L2_MISS ) / CPU_CLK_UNHALTED.THREAD))) - ( ( ( 1 - ( ( 19 * (MEM_LOAD_L3_MISS_RETIRED.REMOTE_DRAM * ( 1 + (MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS) )) + 10 * ( (MEM_LOAD_L3_MISS_RETIRED.LOCAL_DRAM * ( 1 + (MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS) )) + (MEM_LOAD_L3_MISS_RETIRED.REMOTE_FWD * ( 1 + (MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS) )) + (MEM_LOAD_L3_MISS_RETIRED.REMOTE_HITM * ( 1 + (MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS) )) ) ) / ( ( 19 * (MEM_LOAD_L3_MISS_RETIRED.REMOTE_DRAM * ( 1 + (MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS) )) + 10 * ( (MEM_LOAD_L3_MISS_RETIRED.LOCAL_DRAM * ( 1 + (MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS) )) + (MEM_LOAD_L3_MISS_RETIRED.REMOTE_FWD * ( 1 + (MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS) )) + (MEM_LOAD_L3_MISS_RETIRED.REMOTE_HITM * ( 1 + (MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS) )) ) ) + ( 25 * ( MEM_LOAD_RETIRED.LOCAL_PMM * ( 1 + (MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS) ) ) + 33 * ( MEM_LOAD_L3_MISS_RETIRED.REMOTE_PMM * ( 1 + (MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS) ) ) ) ) ) ) * (CYCLE_ACTIVITY.STALLS_L3_MISS / CPU_CLK_UNHALTED.THREAD + (( CYCLE_ACTIVITY.STALLS_L1D_MISS - CYCLE_ACTIVITY.STALLS_L2_MISS ) / CPU_CLK_UNHALTED.THREAD) - (( (MEM_LOAD_RETIRED.L2_HIT * ( 1 + (MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS) )) / ( (MEM_LOAD_RETIRED.L2_HIT * ( 1 + (MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS) )) + cpu@L1D_PEND_MISS.FB_FULL\\,cmask\\=1@ ) ) * (( CYCLE_ACTIVITY.STALLS_L1D_MISS - CYCLE_ACTIVITY.STALLS_L2_MISS ) / CPU_CLK_UNHALTED.THREAD))) ) if ( 1000000 * ( MEM_LOAD_L3_MISS_RETIRED.REMOTE_PMM + MEM_LOAD_RETIRED.LOCAL_PMM ) > MEM_LOAD_RETIRED.L1_MISS ) else 0 ) ) ) + ( (( CYCLE_ACTIVITY.STALLS_L2_MISS - CYCLE_ACTIVITY.STALLS_L3_MISS ) / CPU_CLK_UNHALTED.THREAD) / #((( CYCLE_ACTIVITY.STALLS_MEM_ANY + EXE_ACTIVITY.BOUND_ON_STORES ) / (CYCLE_ACTIVITY.STALLS_TOTAL + (EXE_ACTIVITY.1_PORTS_UTIL + (UOPS_RETIRED.RETIRE_SLOTS / (4 * CPU_CLK_UNHALTED.THREAD)) * EXE_ACTIVITY.2_PORTS_UTIL) + EXE_ACTIVITY.BOUND_ON_STORES)) * (1 - (IDQ_UOPS_NOT_DELIVERED.CORE / (4 * CPU_CLK_UNHALTED.THREAD)) - ( UOPS_ISSUED.ANY + 4 * INT_MISC.RECOVERY_CYCLES ) / (4 * CPU_CLK_UNHALTED.THREAD))) ) * ( (( (20.5 * ((CPU_CLK_UNHALTED.THREAD / CPU_CLK_UNHALTED.REF_TSC) * msr@tsc@ / 1000000000 / duration_time)) - (3.5 * ((CPU_CLK_UNHALTED.THREAD / CPU_CLK_UNHALTED.REF_TSC) * msr@tsc@ / 1000000000 / duration_time)) ) * MEM_LOAD_RETIRED.L3_HIT * (1 + (MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS) / 2) / CPU_CLK_UNHALTED.THREAD) / #(( CYCLE_ACTIVITY.STALLS_L2_MISS - CYCLE_ACTIVITY.STALLS_L3_MISS ) / CPU_CLK_UNHALTED.THREAD) ) + ( (( (MEM_LOAD_RETIRED.L2_HIT * ( 1 + (MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS) )) / ( (MEM_LOAD_RETIRED.L2_HIT * ( 1 + (MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS) )) + cpu@L1D_PEND_MISS.FB_FULL\\,cmask\\=1@ ) ) * (( CYCLE_ACTIVITY.STALLS_L1D_MISS - CYCLE_ACTIVITY.STALLS_L2_MISS ) / CPU_CLK_UNHALTED.THREAD)) / #((( CYCLE_ACTIVITY.STALLS_MEM_ANY + EXE_ACTIVITY.BOUND_ON_STORES ) / (CYCLE_ACTIVITY.STALLS_TOTAL + (EXE_ACTIVITY.1_PORTS_UTIL + (UOPS_RETIRED.RETIRE_SLOTS / (4 * CPU_CLK_UNHALTED.THREAD)) * EXE_ACTIVITY.2_PORTS_UTIL) + EXE_ACTIVITY.BOUND_ON_STORES)) * (1 - (IDQ_UOPS_NOT_DELIVERED.CORE / (4 * CPU_CLK_UNHALTED.THREAD)) - ( UOPS_ISSUED.ANY + 4 * INT_MISC.RECOVERY_CYCLES ) / (4 * CPU_CLK_UNHALTED.THREAD))) ) )",
86*49898fefSIan Rogers        "MetricGroup": "Mem;MemoryLat;Offcore",
87*49898fefSIan Rogers        "MetricName": "Memory_Latency"
88*49898fefSIan Rogers    },
89*49898fefSIan Rogers    {
90*49898fefSIan Rogers        "BriefDescription": "Total pipeline cost of Memory Latency related bottlenecks (external memory and off-core caches)",
91*49898fefSIan Rogers        "MetricExpr": "100 * ((( CYCLE_ACTIVITY.STALLS_MEM_ANY + EXE_ACTIVITY.BOUND_ON_STORES ) / (CYCLE_ACTIVITY.STALLS_TOTAL + (EXE_ACTIVITY.1_PORTS_UTIL + (UOPS_RETIRED.RETIRE_SLOTS / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))) * EXE_ACTIVITY.2_PORTS_UTIL) + EXE_ACTIVITY.BOUND_ON_STORES)) * (1 - (IDQ_UOPS_NOT_DELIVERED.CORE / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))) - ( UOPS_ISSUED.ANY + 4 * ( INT_MISC.RECOVERY_CYCLES_ANY / 2 ) ) / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )))) * ( ( ( (CYCLE_ACTIVITY.STALLS_L3_MISS / CPU_CLK_UNHALTED.THREAD + (( CYCLE_ACTIVITY.STALLS_L1D_MISS - CYCLE_ACTIVITY.STALLS_L2_MISS ) / CPU_CLK_UNHALTED.THREAD) - (( (MEM_LOAD_RETIRED.L2_HIT * ( 1 + (MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS) )) / ( (MEM_LOAD_RETIRED.L2_HIT * ( 1 + (MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS) )) + cpu@L1D_PEND_MISS.FB_FULL\\,cmask\\=1@ ) ) * (( CYCLE_ACTIVITY.STALLS_L1D_MISS - CYCLE_ACTIVITY.STALLS_L2_MISS ) / CPU_CLK_UNHALTED.THREAD))) - ( ( ( 1 - ( ( 19 * (MEM_LOAD_L3_MISS_RETIRED.REMOTE_DRAM * ( 1 + (MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS) )) + 10 * ( (MEM_LOAD_L3_MISS_RETIRED.LOCAL_DRAM * ( 1 + (MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS) )) + (MEM_LOAD_L3_MISS_RETIRED.REMOTE_FWD * ( 1 + (MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS) )) + (MEM_LOAD_L3_MISS_RETIRED.REMOTE_HITM * ( 1 + (MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS) )) ) ) / ( ( 19 * (MEM_LOAD_L3_MISS_RETIRED.REMOTE_DRAM * ( 1 + (MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS) )) + 10 * ( (MEM_LOAD_L3_MISS_RETIRED.LOCAL_DRAM * ( 1 + (MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS) )) + (MEM_LOAD_L3_MISS_RETIRED.REMOTE_FWD * ( 1 + (MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS) )) + (MEM_LOAD_L3_MISS_RETIRED.REMOTE_HITM * ( 1 + (MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS) )) ) ) + ( 25 * ( MEM_LOAD_RETIRED.LOCAL_PMM * ( 1 + (MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS) ) ) + 33 * ( MEM_LOAD_L3_MISS_RETIRED.REMOTE_PMM * ( 1 + (MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS) ) ) ) ) ) ) * (CYCLE_ACTIVITY.STALLS_L3_MISS / CPU_CLK_UNHALTED.THREAD + (( CYCLE_ACTIVITY.STALLS_L1D_MISS - CYCLE_ACTIVITY.STALLS_L2_MISS ) / CPU_CLK_UNHALTED.THREAD) - (( (MEM_LOAD_RETIRED.L2_HIT * ( 1 + (MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS) )) / ( (MEM_LOAD_RETIRED.L2_HIT * ( 1 + (MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS) )) + cpu@L1D_PEND_MISS.FB_FULL\\,cmask\\=1@ ) ) * (( CYCLE_ACTIVITY.STALLS_L1D_MISS - CYCLE_ACTIVITY.STALLS_L2_MISS ) / CPU_CLK_UNHALTED.THREAD))) ) if ( 1000000 * ( MEM_LOAD_L3_MISS_RETIRED.REMOTE_PMM + MEM_LOAD_RETIRED.LOCAL_PMM ) > MEM_LOAD_RETIRED.L1_MISS ) else 0 ) ) / #((( CYCLE_ACTIVITY.STALLS_MEM_ANY + EXE_ACTIVITY.BOUND_ON_STORES ) / (CYCLE_ACTIVITY.STALLS_TOTAL + (EXE_ACTIVITY.1_PORTS_UTIL + (UOPS_RETIRED.RETIRE_SLOTS / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))) * EXE_ACTIVITY.2_PORTS_UTIL) + EXE_ACTIVITY.BOUND_ON_STORES)) * (1 - (IDQ_UOPS_NOT_DELIVERED.CORE / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))) - ( UOPS_ISSUED.ANY + 4 * ( INT_MISC.RECOVERY_CYCLES_ANY / 2 ) ) / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )))) ) * ( (min( CPU_CLK_UNHALTED.THREAD , OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DATA_RD ) / CPU_CLK_UNHALTED.THREAD - (min( CPU_CLK_UNHALTED.THREAD , cpu@OFFCORE_REQUESTS_OUTSTANDING.ALL_DATA_RD\\,cmask\\=4@ ) / CPU_CLK_UNHALTED.THREAD)) / #( (CYCLE_ACTIVITY.STALLS_L3_MISS / CPU_CLK_UNHALTED.THREAD + (( CYCLE_ACTIVITY.STALLS_L1D_MISS - CYCLE_ACTIVITY.STALLS_L2_MISS ) / CPU_CLK_UNHALTED.THREAD) - (( (MEM_LOAD_RETIRED.L2_HIT * ( 1 + (MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS) )) / ( (MEM_LOAD_RETIRED.L2_HIT * ( 1 + (MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS) )) + cpu@L1D_PEND_MISS.FB_FULL\\,cmask\\=1@ ) ) * (( CYCLE_ACTIVITY.STALLS_L1D_MISS - CYCLE_ACTIVITY.STALLS_L2_MISS ) / CPU_CLK_UNHALTED.THREAD))) - ( ( ( 1 - ( ( 19 * (MEM_LOAD_L3_MISS_RETIRED.REMOTE_DRAM * ( 1 + (MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS) )) + 10 * ( (MEM_LOAD_L3_MISS_RETIRED.LOCAL_DRAM * ( 1 + (MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS) )) + (MEM_LOAD_L3_MISS_RETIRED.REMOTE_FWD * ( 1 + (MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS) )) + (MEM_LOAD_L3_MISS_RETIRED.REMOTE_HITM * ( 1 + (MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS) )) ) ) / ( ( 19 * (MEM_LOAD_L3_MISS_RETIRED.REMOTE_DRAM * ( 1 + (MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS) )) + 10 * ( (MEM_LOAD_L3_MISS_RETIRED.LOCAL_DRAM * ( 1 + (MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS) )) + (MEM_LOAD_L3_MISS_RETIRED.REMOTE_FWD * ( 1 + (MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS) )) + (MEM_LOAD_L3_MISS_RETIRED.REMOTE_HITM * ( 1 + (MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS) )) ) ) + ( 25 * ( MEM_LOAD_RETIRED.LOCAL_PMM * ( 1 + (MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS) ) ) + 33 * ( MEM_LOAD_L3_MISS_RETIRED.REMOTE_PMM * ( 1 + (MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS) ) ) ) ) ) ) * (CYCLE_ACTIVITY.STALLS_L3_MISS / CPU_CLK_UNHALTED.THREAD + (( CYCLE_ACTIVITY.STALLS_L1D_MISS - CYCLE_ACTIVITY.STALLS_L2_MISS ) / CPU_CLK_UNHALTED.THREAD) - (( (MEM_LOAD_RETIRED.L2_HIT * ( 1 + (MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS) )) / ( (MEM_LOAD_RETIRED.L2_HIT * ( 1 + (MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS) )) + cpu@L1D_PEND_MISS.FB_FULL\\,cmask\\=1@ ) ) * (( CYCLE_ACTIVITY.STALLS_L1D_MISS - CYCLE_ACTIVITY.STALLS_L2_MISS ) / CPU_CLK_UNHALTED.THREAD))) ) if ( 1000000 * ( MEM_LOAD_L3_MISS_RETIRED.REMOTE_PMM + MEM_LOAD_RETIRED.LOCAL_PMM ) > MEM_LOAD_RETIRED.L1_MISS ) else 0 ) ) ) + ( (( CYCLE_ACTIVITY.STALLS_L2_MISS - CYCLE_ACTIVITY.STALLS_L3_MISS ) / CPU_CLK_UNHALTED.THREAD) / #((( CYCLE_ACTIVITY.STALLS_MEM_ANY + EXE_ACTIVITY.BOUND_ON_STORES ) / (CYCLE_ACTIVITY.STALLS_TOTAL + (EXE_ACTIVITY.1_PORTS_UTIL + (UOPS_RETIRED.RETIRE_SLOTS / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))) * EXE_ACTIVITY.2_PORTS_UTIL) + EXE_ACTIVITY.BOUND_ON_STORES)) * (1 - (IDQ_UOPS_NOT_DELIVERED.CORE / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))) - ( UOPS_ISSUED.ANY + 4 * ( INT_MISC.RECOVERY_CYCLES_ANY / 2 ) ) / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )))) ) * ( (( (20.5 * ((CPU_CLK_UNHALTED.THREAD / CPU_CLK_UNHALTED.REF_TSC) * msr@tsc@ / 1000000000 / duration_time)) - (3.5 * ((CPU_CLK_UNHALTED.THREAD / CPU_CLK_UNHALTED.REF_TSC) * msr@tsc@ / 1000000000 / duration_time)) ) * MEM_LOAD_RETIRED.L3_HIT * (1 + (MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS) / 2) / CPU_CLK_UNHALTED.THREAD) / #(( CYCLE_ACTIVITY.STALLS_L2_MISS - CYCLE_ACTIVITY.STALLS_L3_MISS ) / CPU_CLK_UNHALTED.THREAD) ) + ( (( (MEM_LOAD_RETIRED.L2_HIT * ( 1 + (MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS) )) / ( (MEM_LOAD_RETIRED.L2_HIT * ( 1 + (MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS) )) + cpu@L1D_PEND_MISS.FB_FULL\\,cmask\\=1@ ) ) * (( CYCLE_ACTIVITY.STALLS_L1D_MISS - CYCLE_ACTIVITY.STALLS_L2_MISS ) / CPU_CLK_UNHALTED.THREAD)) / #((( CYCLE_ACTIVITY.STALLS_MEM_ANY + EXE_ACTIVITY.BOUND_ON_STORES ) / (CYCLE_ACTIVITY.STALLS_TOTAL + (EXE_ACTIVITY.1_PORTS_UTIL + (UOPS_RETIRED.RETIRE_SLOTS / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))) * EXE_ACTIVITY.2_PORTS_UTIL) + EXE_ACTIVITY.BOUND_ON_STORES)) * (1 - (IDQ_UOPS_NOT_DELIVERED.CORE / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))) - ( UOPS_ISSUED.ANY + 4 * ( INT_MISC.RECOVERY_CYCLES_ANY / 2 ) ) / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )))) ) )",
92*49898fefSIan Rogers        "MetricGroup": "Mem;MemoryLat;Offcore_SMT",
93*49898fefSIan Rogers        "MetricName": "Memory_Latency_SMT"
94*49898fefSIan Rogers    },
95*49898fefSIan Rogers    {
96*49898fefSIan Rogers        "BriefDescription": "Total pipeline cost of Memory Address Translation related bottlenecks (data-side TLBs)",
97*49898fefSIan Rogers        "MetricExpr": "100 * ((( CYCLE_ACTIVITY.STALLS_MEM_ANY + EXE_ACTIVITY.BOUND_ON_STORES ) / (CYCLE_ACTIVITY.STALLS_TOTAL + (EXE_ACTIVITY.1_PORTS_UTIL + (UOPS_RETIRED.RETIRE_SLOTS / (4 * CPU_CLK_UNHALTED.THREAD)) * EXE_ACTIVITY.2_PORTS_UTIL) + EXE_ACTIVITY.BOUND_ON_STORES)) * (1 - (IDQ_UOPS_NOT_DELIVERED.CORE / (4 * CPU_CLK_UNHALTED.THREAD)) - ( UOPS_ISSUED.ANY + 4 * INT_MISC.RECOVERY_CYCLES ) / (4 * CPU_CLK_UNHALTED.THREAD))) * ( ( (max( ( CYCLE_ACTIVITY.STALLS_MEM_ANY - CYCLE_ACTIVITY.STALLS_L1D_MISS ) / CPU_CLK_UNHALTED.THREAD , 0 )) / ((( CYCLE_ACTIVITY.STALLS_MEM_ANY + EXE_ACTIVITY.BOUND_ON_STORES ) / (CYCLE_ACTIVITY.STALLS_TOTAL + (EXE_ACTIVITY.1_PORTS_UTIL + (UOPS_RETIRED.RETIRE_SLOTS / (4 * CPU_CLK_UNHALTED.THREAD)) * EXE_ACTIVITY.2_PORTS_UTIL) + EXE_ACTIVITY.BOUND_ON_STORES)) * (1 - (IDQ_UOPS_NOT_DELIVERED.CORE / (4 * CPU_CLK_UNHALTED.THREAD)) - ( UOPS_ISSUED.ANY + 4 * INT_MISC.RECOVERY_CYCLES ) / (4 * CPU_CLK_UNHALTED.THREAD))) ) * ( (min( 9 * cpu@DTLB_LOAD_MISSES.STLB_HIT\\,cmask\\=1@ + DTLB_LOAD_MISSES.WALK_ACTIVE , max( CYCLE_ACTIVITY.CYCLES_MEM_ANY - CYCLE_ACTIVITY.CYCLES_L1D_MISS , 0 ) ) / CPU_CLK_UNHALTED.THREAD) / (max( ( CYCLE_ACTIVITY.STALLS_MEM_ANY - CYCLE_ACTIVITY.STALLS_L1D_MISS ) / CPU_CLK_UNHALTED.THREAD , 0 )) ) + ( (EXE_ACTIVITY.BOUND_ON_STORES / CPU_CLK_UNHALTED.THREAD) / #((( CYCLE_ACTIVITY.STALLS_MEM_ANY + EXE_ACTIVITY.BOUND_ON_STORES ) / (CYCLE_ACTIVITY.STALLS_TOTAL + (EXE_ACTIVITY.1_PORTS_UTIL + (UOPS_RETIRED.RETIRE_SLOTS / (4 * CPU_CLK_UNHALTED.THREAD)) * EXE_ACTIVITY.2_PORTS_UTIL) + EXE_ACTIVITY.BOUND_ON_STORES)) * (1 - (IDQ_UOPS_NOT_DELIVERED.CORE / (4 * CPU_CLK_UNHALTED.THREAD)) - ( UOPS_ISSUED.ANY + 4 * INT_MISC.RECOVERY_CYCLES ) / (4 * CPU_CLK_UNHALTED.THREAD))) ) * ( (( 9 * cpu@DTLB_STORE_MISSES.STLB_HIT\\,cmask\\=1@ + DTLB_STORE_MISSES.WALK_ACTIVE ) / CPU_CLK_UNHALTED.THREAD) / #(EXE_ACTIVITY.BOUND_ON_STORES / CPU_CLK_UNHALTED.THREAD) ) ) ",
98*49898fefSIan Rogers        "MetricGroup": "Mem;MemoryTLB",
99*49898fefSIan Rogers        "MetricName": "Memory_Data_TLBs"
100*49898fefSIan Rogers    },
101*49898fefSIan Rogers    {
102*49898fefSIan Rogers        "BriefDescription": "Total pipeline cost of Memory Address Translation related bottlenecks (data-side TLBs)",
103*49898fefSIan Rogers        "MetricExpr": "100 * ((( CYCLE_ACTIVITY.STALLS_MEM_ANY + EXE_ACTIVITY.BOUND_ON_STORES ) / (CYCLE_ACTIVITY.STALLS_TOTAL + (EXE_ACTIVITY.1_PORTS_UTIL + (UOPS_RETIRED.RETIRE_SLOTS / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))) * EXE_ACTIVITY.2_PORTS_UTIL) + EXE_ACTIVITY.BOUND_ON_STORES)) * (1 - (IDQ_UOPS_NOT_DELIVERED.CORE / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))) - ( UOPS_ISSUED.ANY + 4 * ( INT_MISC.RECOVERY_CYCLES_ANY / 2 ) ) / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )))) * ( ( (max( ( CYCLE_ACTIVITY.STALLS_MEM_ANY - CYCLE_ACTIVITY.STALLS_L1D_MISS ) / CPU_CLK_UNHALTED.THREAD , 0 )) / ((( CYCLE_ACTIVITY.STALLS_MEM_ANY + EXE_ACTIVITY.BOUND_ON_STORES ) / (CYCLE_ACTIVITY.STALLS_TOTAL + (EXE_ACTIVITY.1_PORTS_UTIL + (UOPS_RETIRED.RETIRE_SLOTS / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))) * EXE_ACTIVITY.2_PORTS_UTIL) + EXE_ACTIVITY.BOUND_ON_STORES)) * (1 - (IDQ_UOPS_NOT_DELIVERED.CORE / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))) - ( UOPS_ISSUED.ANY + 4 * ( INT_MISC.RECOVERY_CYCLES_ANY / 2 ) ) / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )))) ) * ( (min( 9 * cpu@DTLB_LOAD_MISSES.STLB_HIT\\,cmask\\=1@ + DTLB_LOAD_MISSES.WALK_ACTIVE , max( CYCLE_ACTIVITY.CYCLES_MEM_ANY - CYCLE_ACTIVITY.CYCLES_L1D_MISS , 0 ) ) / CPU_CLK_UNHALTED.THREAD) / (max( ( CYCLE_ACTIVITY.STALLS_MEM_ANY - CYCLE_ACTIVITY.STALLS_L1D_MISS ) / CPU_CLK_UNHALTED.THREAD , 0 )) ) + ( (EXE_ACTIVITY.BOUND_ON_STORES / CPU_CLK_UNHALTED.THREAD) / #((( CYCLE_ACTIVITY.STALLS_MEM_ANY + EXE_ACTIVITY.BOUND_ON_STORES ) / (CYCLE_ACTIVITY.STALLS_TOTAL + (EXE_ACTIVITY.1_PORTS_UTIL + (UOPS_RETIRED.RETIRE_SLOTS / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))) * EXE_ACTIVITY.2_PORTS_UTIL) + EXE_ACTIVITY.BOUND_ON_STORES)) * (1 - (IDQ_UOPS_NOT_DELIVERED.CORE / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))) - ( UOPS_ISSUED.ANY + 4 * ( INT_MISC.RECOVERY_CYCLES_ANY / 2 ) ) / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )))) ) * ( (( 9 * cpu@DTLB_STORE_MISSES.STLB_HIT\\,cmask\\=1@ + DTLB_STORE_MISSES.WALK_ACTIVE ) / ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )) / #(EXE_ACTIVITY.BOUND_ON_STORES / CPU_CLK_UNHALTED.THREAD) ) ) ",
104*49898fefSIan Rogers        "MetricGroup": "Mem;MemoryTLB;_SMT",
105*49898fefSIan Rogers        "MetricName": "Memory_Data_TLBs_SMT"
106*49898fefSIan Rogers    },
107*49898fefSIan Rogers    {
108*49898fefSIan Rogers        "BriefDescription": "Total pipeline cost of branch related instructions (used for program control-flow including function calls)",
109*49898fefSIan Rogers        "MetricExpr": "100 * (( BR_INST_RETIRED.CONDITIONAL + 3 * BR_INST_RETIRED.NEAR_CALL + (BR_INST_RETIRED.NEAR_TAKEN - ( BR_INST_RETIRED.CONDITIONAL - BR_INST_RETIRED.NOT_TAKEN ) - 2 * BR_INST_RETIRED.NEAR_CALL) ) / (4 * CPU_CLK_UNHALTED.THREAD))",
110*49898fefSIan Rogers        "MetricGroup": "Ret",
111*49898fefSIan Rogers        "MetricName": "Branching_Overhead"
112*49898fefSIan Rogers    },
113*49898fefSIan Rogers    {
114*49898fefSIan Rogers        "BriefDescription": "Total pipeline cost of branch related instructions (used for program control-flow including function calls)",
115*49898fefSIan Rogers        "MetricExpr": "100 * (( BR_INST_RETIRED.CONDITIONAL + 3 * BR_INST_RETIRED.NEAR_CALL + (BR_INST_RETIRED.NEAR_TAKEN - ( BR_INST_RETIRED.CONDITIONAL - BR_INST_RETIRED.NOT_TAKEN ) - 2 * BR_INST_RETIRED.NEAR_CALL) ) / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )))",
116*49898fefSIan Rogers        "MetricGroup": "Ret_SMT",
117*49898fefSIan Rogers        "MetricName": "Branching_Overhead_SMT"
118*49898fefSIan Rogers    },
119*49898fefSIan Rogers    {
120*49898fefSIan Rogers        "BriefDescription": "Total pipeline cost of instruction fetch related bottlenecks by large code footprint programs (i-side cache; TLB and BTB misses)",
121*49898fefSIan Rogers        "MetricExpr": "100 * (4 * IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE / (4 * CPU_CLK_UNHALTED.THREAD)) * ( (ICACHE_64B.IFTAG_STALL / CPU_CLK_UNHALTED.THREAD) + (( ICACHE_16B.IFDATA_STALL + 2 * cpu@ICACHE_16B.IFDATA_STALL\\,cmask\\=1\\,edge@ ) / CPU_CLK_UNHALTED.THREAD) + (9 * BACLEARS.ANY / CPU_CLK_UNHALTED.THREAD) ) / #(4 * IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE / (4 * CPU_CLK_UNHALTED.THREAD))",
122*49898fefSIan Rogers        "MetricGroup": "BigFoot;Fed;Frontend;IcMiss;MemoryTLB",
123*49898fefSIan Rogers        "MetricName": "Big_Code"
124*49898fefSIan Rogers    },
125*49898fefSIan Rogers    {
126*49898fefSIan Rogers        "BriefDescription": "Total pipeline cost of instruction fetch related bottlenecks by large code footprint programs (i-side cache; TLB and BTB misses)",
127*49898fefSIan Rogers        "MetricExpr": "100 * (4 * IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))) * ( (ICACHE_64B.IFTAG_STALL / CPU_CLK_UNHALTED.THREAD) + (( ICACHE_16B.IFDATA_STALL + 2 * cpu@ICACHE_16B.IFDATA_STALL\\,cmask\\=1\\,edge@ ) / CPU_CLK_UNHALTED.THREAD) + (9 * BACLEARS.ANY / CPU_CLK_UNHALTED.THREAD) ) / #(4 * IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )))",
128*49898fefSIan Rogers        "MetricGroup": "BigFoot;Fed;Frontend;IcMiss;MemoryTLB_SMT",
129*49898fefSIan Rogers        "MetricName": "Big_Code_SMT"
130*49898fefSIan Rogers    },
131*49898fefSIan Rogers    {
132*49898fefSIan Rogers        "BriefDescription": "Total pipeline cost of instruction fetch bandwidth related bottlenecks",
133*49898fefSIan Rogers        "MetricExpr": "100 * ( (IDQ_UOPS_NOT_DELIVERED.CORE / (4 * CPU_CLK_UNHALTED.THREAD)) - (4 * IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE / (4 * CPU_CLK_UNHALTED.THREAD)) * ((BR_MISP_RETIRED.ALL_BRANCHES / ( BR_MISP_RETIRED.ALL_BRANCHES + MACHINE_CLEARS.COUNT )) * INT_MISC.CLEAR_RESTEER_CYCLES / CPU_CLK_UNHALTED.THREAD) / #(4 * IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE / (4 * CPU_CLK_UNHALTED.THREAD)) ) - (100 * (4 * IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE / (4 * CPU_CLK_UNHALTED.THREAD)) * ( (ICACHE_64B.IFTAG_STALL / CPU_CLK_UNHALTED.THREAD) + (( ICACHE_16B.IFDATA_STALL + 2 * cpu@ICACHE_16B.IFDATA_STALL\\,cmask\\=1\\,edge@ ) / CPU_CLK_UNHALTED.THREAD) + (9 * BACLEARS.ANY / CPU_CLK_UNHALTED.THREAD) ) / #(4 * IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE / (4 * CPU_CLK_UNHALTED.THREAD)))",
134*49898fefSIan Rogers        "MetricGroup": "Fed;FetchBW;Frontend",
135*49898fefSIan Rogers        "MetricName": "Instruction_Fetch_BW"
136*49898fefSIan Rogers    },
137*49898fefSIan Rogers    {
138*49898fefSIan Rogers        "BriefDescription": "Total pipeline cost of instruction fetch bandwidth related bottlenecks",
139*49898fefSIan Rogers        "MetricExpr": "100 * ( (IDQ_UOPS_NOT_DELIVERED.CORE / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))) - (4 * IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))) * ((BR_MISP_RETIRED.ALL_BRANCHES / ( BR_MISP_RETIRED.ALL_BRANCHES + MACHINE_CLEARS.COUNT )) * INT_MISC.CLEAR_RESTEER_CYCLES / CPU_CLK_UNHALTED.THREAD) / #(4 * IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))) ) - (100 * (4 * IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))) * ( (ICACHE_64B.IFTAG_STALL / CPU_CLK_UNHALTED.THREAD) + (( ICACHE_16B.IFDATA_STALL + 2 * cpu@ICACHE_16B.IFDATA_STALL\\,cmask\\=1\\,edge@ ) / CPU_CLK_UNHALTED.THREAD) + (9 * BACLEARS.ANY / CPU_CLK_UNHALTED.THREAD) ) / #(4 * IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))))",
140*49898fefSIan Rogers        "MetricGroup": "Fed;FetchBW;Frontend_SMT",
141*49898fefSIan Rogers        "MetricName": "Instruction_Fetch_BW_SMT"
142*49898fefSIan Rogers    },
143*49898fefSIan Rogers    {
14461ec07f5SHaiyan Song        "BriefDescription": "Instructions Per Cycle (per Logical Processor)",
145ecd94f1bSKan Liang        "MetricExpr": "INST_RETIRED.ANY / CPU_CLK_UNHALTED.THREAD",
146*49898fefSIan Rogers        "MetricGroup": "Ret;Summary",
147ecd94f1bSKan Liang        "MetricName": "IPC"
148ecd94f1bSKan Liang    },
149ecd94f1bSKan Liang    {
150fd550098SAndi Kleen        "BriefDescription": "Uops Per Instruction",
15161ec07f5SHaiyan Song        "MetricExpr": "UOPS_RETIRED.RETIRE_SLOTS / INST_RETIRED.ANY",
152*49898fefSIan Rogers        "MetricGroup": "Pipeline;Ret;Retire",
153ecd94f1bSKan Liang        "MetricName": "UPI"
154ecd94f1bSKan Liang    },
155ecd94f1bSKan Liang    {
156fd550098SAndi Kleen        "BriefDescription": "Instruction per taken branch",
157*49898fefSIan Rogers        "MetricExpr": "UOPS_RETIRED.RETIRE_SLOTS / BR_INST_RETIRED.NEAR_TAKEN",
158*49898fefSIan Rogers        "MetricGroup": "Branches;Fed;FetchBW",
159*49898fefSIan Rogers        "MetricName": "UpTB"
160fd550098SAndi Kleen    },
161fd550098SAndi Kleen    {
16261ec07f5SHaiyan Song        "BriefDescription": "Cycles Per Instruction (per Logical Processor)",
163ed97cc6cSJin Yao        "MetricExpr": "1 / (INST_RETIRED.ANY / CPU_CLK_UNHALTED.THREAD)",
164*49898fefSIan Rogers        "MetricGroup": "Pipeline;Mem",
165ecd94f1bSKan Liang        "MetricName": "CPI"
166ecd94f1bSKan Liang    },
167ecd94f1bSKan Liang    {
16861ec07f5SHaiyan Song        "BriefDescription": "Per-Logical Processor actual clocks when the Logical Processor is active.",
169ecd94f1bSKan Liang        "MetricExpr": "CPU_CLK_UNHALTED.THREAD",
170ed97cc6cSJin Yao        "MetricGroup": "Pipeline",
171ecd94f1bSKan Liang        "MetricName": "CLKS"
172ecd94f1bSKan Liang    },
173ecd94f1bSKan Liang    {
174*49898fefSIan Rogers        "BriefDescription": "Total issue-pipeline slots (per-Physical Core till ICL; per-Logical Processor ICL onward)",
175*49898fefSIan Rogers        "MetricExpr": "4 * CPU_CLK_UNHALTED.THREAD",
176*49898fefSIan Rogers        "MetricGroup": "TmaL1",
177*49898fefSIan Rogers        "MetricName": "SLOTS"
178*49898fefSIan Rogers    },
179*49898fefSIan Rogers    {
180*49898fefSIan Rogers        "BriefDescription": "Total issue-pipeline slots (per-Physical Core till ICL; per-Logical Processor ICL onward)",
181*49898fefSIan Rogers        "MetricExpr": "4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )",
182*49898fefSIan Rogers        "MetricGroup": "TmaL1_SMT",
183*49898fefSIan Rogers        "MetricName": "SLOTS_SMT"
184*49898fefSIan Rogers    },
185*49898fefSIan Rogers    {
186*49898fefSIan Rogers        "BriefDescription": "The ratio of Executed- by Issued-Uops",
187*49898fefSIan Rogers        "MetricExpr": "UOPS_EXECUTED.THREAD / UOPS_ISSUED.ANY",
188*49898fefSIan Rogers        "MetricGroup": "Cor;Pipeline",
189*49898fefSIan Rogers        "MetricName": "Execute_per_Issue",
190*49898fefSIan Rogers        "PublicDescription": "The ratio of Executed- by Issued-Uops. Ratio > 1 suggests high rate of uop micro-fusions. Ratio < 1 suggest high rate of \"execute\" at rename stage."
191*49898fefSIan Rogers    },
192*49898fefSIan Rogers    {
193*49898fefSIan Rogers        "BriefDescription": "Instructions Per Cycle across hyper-threads (per physical core)",
194ed97cc6cSJin Yao        "MetricExpr": "INST_RETIRED.ANY / CPU_CLK_UNHALTED.THREAD",
195*49898fefSIan Rogers        "MetricGroup": "Ret;SMT;TmaL1",
196ed97cc6cSJin Yao        "MetricName": "CoreIPC"
197ecd94f1bSKan Liang    },
198ecd94f1bSKan Liang    {
199*49898fefSIan Rogers        "BriefDescription": "Instructions Per Cycle across hyper-threads (per physical core)",
200ed97cc6cSJin Yao        "MetricExpr": "INST_RETIRED.ANY / ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )",
201*49898fefSIan Rogers        "MetricGroup": "Ret;SMT;TmaL1_SMT",
202ed97cc6cSJin Yao        "MetricName": "CoreIPC_SMT"
203ed97cc6cSJin Yao    },
204ed97cc6cSJin Yao    {
205ed97cc6cSJin Yao        "BriefDescription": "Floating Point Operations Per Cycle",
206ed97cc6cSJin Yao        "MetricExpr": "( 1 * ( FP_ARITH_INST_RETIRED.SCALAR_SINGLE + FP_ARITH_INST_RETIRED.SCALAR_DOUBLE ) + 2 * FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE + 4 * ( FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE ) + 8 * ( FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.512B_PACKED_DOUBLE ) + 16 * FP_ARITH_INST_RETIRED.512B_PACKED_SINGLE ) / CPU_CLK_UNHALTED.THREAD",
207*49898fefSIan Rogers        "MetricGroup": "Ret;Flops",
208ed97cc6cSJin Yao        "MetricName": "FLOPc"
209ed97cc6cSJin Yao    },
210ed97cc6cSJin Yao    {
211ed97cc6cSJin Yao        "BriefDescription": "Floating Point Operations Per Cycle",
212ed97cc6cSJin Yao        "MetricExpr": "( 1 * ( FP_ARITH_INST_RETIRED.SCALAR_SINGLE + FP_ARITH_INST_RETIRED.SCALAR_DOUBLE ) + 2 * FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE + 4 * ( FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE ) + 8 * ( FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.512B_PACKED_DOUBLE ) + 16 * FP_ARITH_INST_RETIRED.512B_PACKED_SINGLE ) / ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )",
213*49898fefSIan Rogers        "MetricGroup": "Ret;Flops_SMT",
214ed97cc6cSJin Yao        "MetricName": "FLOPc_SMT"
215ed97cc6cSJin Yao    },
216ed97cc6cSJin Yao    {
217*49898fefSIan Rogers        "BriefDescription": "Actual per-core usage of the Floating Point execution units (regardless of the vector width)",
218*49898fefSIan Rogers        "MetricExpr": "( (FP_ARITH_INST_RETIRED.SCALAR_SINGLE + FP_ARITH_INST_RETIRED.SCALAR_DOUBLE) + (FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE + FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE + FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.512B_PACKED_DOUBLE + FP_ARITH_INST_RETIRED.512B_PACKED_SINGLE) ) / ( 2 * CPU_CLK_UNHALTED.THREAD )",
219*49898fefSIan Rogers        "MetricGroup": "Cor;Flops;HPC",
220*49898fefSIan Rogers        "MetricName": "FP_Arith_Utilization",
221*49898fefSIan Rogers        "PublicDescription": "Actual per-core usage of the Floating Point execution units (regardless of the vector width). Values > 1 are possible due to Fused-Multiply Add (FMA) counting."
222*49898fefSIan Rogers    },
223*49898fefSIan Rogers    {
224*49898fefSIan Rogers        "BriefDescription": "Actual per-core usage of the Floating Point execution units (regardless of the vector width). SMT version; use when SMT is enabled and measuring per logical CPU.",
225*49898fefSIan Rogers        "MetricExpr": "( (FP_ARITH_INST_RETIRED.SCALAR_SINGLE + FP_ARITH_INST_RETIRED.SCALAR_DOUBLE) + (FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE + FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE + FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.512B_PACKED_DOUBLE + FP_ARITH_INST_RETIRED.512B_PACKED_SINGLE) ) / ( 2 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ) )",
226*49898fefSIan Rogers        "MetricGroup": "Cor;Flops;HPC_SMT",
227*49898fefSIan Rogers        "MetricName": "FP_Arith_Utilization_SMT",
228*49898fefSIan Rogers        "PublicDescription": "Actual per-core usage of the Floating Point execution units (regardless of the vector width). Values > 1 are possible due to Fused-Multiply Add (FMA) counting. SMT version; use when SMT is enabled and measuring per logical CPU."
229*49898fefSIan Rogers    },
230*49898fefSIan Rogers    {
231ed97cc6cSJin Yao        "BriefDescription": "Instruction-Level-Parallelism (average number of uops executed when there is at least 1 uop executed)",
232ed97cc6cSJin Yao        "MetricExpr": "UOPS_EXECUTED.THREAD / (( UOPS_EXECUTED.CORE_CYCLES_GE_1 / 2 ) if #SMT_on else UOPS_EXECUTED.CORE_CYCLES_GE_1)",
233*49898fefSIan Rogers        "MetricGroup": "Backend;Cor;Pipeline;PortsUtil",
234ed97cc6cSJin Yao        "MetricName": "ILP"
235ed97cc6cSJin Yao    },
236ed97cc6cSJin Yao    {
237*49898fefSIan Rogers        "BriefDescription": "Branch Misprediction Cost: Fraction of TMA slots wasted per non-speculative branch misprediction (retired JEClear)",
238*49898fefSIan Rogers        "MetricExpr": " ( ((BR_MISP_RETIRED.ALL_BRANCHES / ( BR_MISP_RETIRED.ALL_BRANCHES + MACHINE_CLEARS.COUNT )) * (( UOPS_ISSUED.ANY - UOPS_RETIRED.RETIRE_SLOTS + 4 * INT_MISC.RECOVERY_CYCLES ) / (4 * CPU_CLK_UNHALTED.THREAD))) + (4 * IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE / (4 * CPU_CLK_UNHALTED.THREAD)) * ((BR_MISP_RETIRED.ALL_BRANCHES / ( BR_MISP_RETIRED.ALL_BRANCHES + MACHINE_CLEARS.COUNT )) * INT_MISC.CLEAR_RESTEER_CYCLES / CPU_CLK_UNHALTED.THREAD) / #(4 * IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE / (4 * CPU_CLK_UNHALTED.THREAD)) ) * (4 * CPU_CLK_UNHALTED.THREAD) / BR_MISP_RETIRED.ALL_BRANCHES",
239*49898fefSIan Rogers        "MetricGroup": "Bad;BrMispredicts",
240*49898fefSIan Rogers        "MetricName": "Branch_Misprediction_Cost"
241*49898fefSIan Rogers    },
242*49898fefSIan Rogers    {
243*49898fefSIan Rogers        "BriefDescription": "Branch Misprediction Cost: Fraction of TMA slots wasted per non-speculative branch misprediction (retired JEClear)",
244*49898fefSIan Rogers        "MetricExpr": " ( ((BR_MISP_RETIRED.ALL_BRANCHES / ( BR_MISP_RETIRED.ALL_BRANCHES + MACHINE_CLEARS.COUNT )) * (( UOPS_ISSUED.ANY - UOPS_RETIRED.RETIRE_SLOTS + 4 * ( INT_MISC.RECOVERY_CYCLES_ANY / 2 ) ) / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )))) + (4 * IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))) * ((BR_MISP_RETIRED.ALL_BRANCHES / ( BR_MISP_RETIRED.ALL_BRANCHES + MACHINE_CLEARS.COUNT )) * INT_MISC.CLEAR_RESTEER_CYCLES / CPU_CLK_UNHALTED.THREAD) / #(4 * IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))) ) * (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )) / BR_MISP_RETIRED.ALL_BRANCHES",
245*49898fefSIan Rogers        "MetricGroup": "Bad;BrMispredicts_SMT",
246*49898fefSIan Rogers        "MetricName": "Branch_Misprediction_Cost_SMT"
247*49898fefSIan Rogers    },
248*49898fefSIan Rogers    {
249ed97cc6cSJin Yao        "BriefDescription": "Number of Instructions per non-speculative Branch Misprediction (JEClear)",
250ed97cc6cSJin Yao        "MetricExpr": "INST_RETIRED.ANY / BR_MISP_RETIRED.ALL_BRANCHES",
251*49898fefSIan Rogers        "MetricGroup": "Bad;BadSpec;BrMispredicts",
252ed97cc6cSJin Yao        "MetricName": "IpMispredict"
253ed97cc6cSJin Yao    },
254ed97cc6cSJin Yao    {
255ed97cc6cSJin Yao        "BriefDescription": "Core actual clocks when any Logical Processor is active on the Physical Core",
256ed97cc6cSJin Yao        "MetricExpr": "( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else CPU_CLK_UNHALTED.THREAD",
257ed97cc6cSJin Yao        "MetricGroup": "SMT",
258ed97cc6cSJin Yao        "MetricName": "CORE_CLKS"
259fd550098SAndi Kleen    },
260fd550098SAndi Kleen    {
261038d3b53SJin Yao        "BriefDescription": "Instructions per Load (lower number means higher occurrence rate)",
262fd550098SAndi Kleen        "MetricExpr": "INST_RETIRED.ANY / MEM_INST_RETIRED.ALL_LOADS",
263ed97cc6cSJin Yao        "MetricGroup": "InsType",
264038d3b53SJin Yao        "MetricName": "IpLoad"
265fd550098SAndi Kleen    },
266fd550098SAndi Kleen    {
267038d3b53SJin Yao        "BriefDescription": "Instructions per Store (lower number means higher occurrence rate)",
268fd550098SAndi Kleen        "MetricExpr": "INST_RETIRED.ANY / MEM_INST_RETIRED.ALL_STORES",
269ed97cc6cSJin Yao        "MetricGroup": "InsType",
270038d3b53SJin Yao        "MetricName": "IpStore"
271fd550098SAndi Kleen    },
272fd550098SAndi Kleen    {
273038d3b53SJin Yao        "BriefDescription": "Instructions per Branch (lower number means higher occurrence rate)",
274fd550098SAndi Kleen        "MetricExpr": "INST_RETIRED.ANY / BR_INST_RETIRED.ALL_BRANCHES",
275*49898fefSIan Rogers        "MetricGroup": "Branches;Fed;InsType",
276038d3b53SJin Yao        "MetricName": "IpBranch"
277fd550098SAndi Kleen    },
278fd550098SAndi Kleen    {
279038d3b53SJin Yao        "BriefDescription": "Instructions per (near) call (lower number means higher occurrence rate)",
280fd550098SAndi Kleen        "MetricExpr": "INST_RETIRED.ANY / BR_INST_RETIRED.NEAR_CALL",
281*49898fefSIan Rogers        "MetricGroup": "Branches;Fed;PGO",
282fd550098SAndi Kleen        "MetricName": "IpCall"
283fd550098SAndi Kleen    },
284fd550098SAndi Kleen    {
285*49898fefSIan Rogers        "BriefDescription": "Instruction per taken branch",
286*49898fefSIan Rogers        "MetricExpr": "INST_RETIRED.ANY / BR_INST_RETIRED.NEAR_TAKEN",
287*49898fefSIan Rogers        "MetricGroup": "Branches;Fed;FetchBW;Frontend;PGO",
288*49898fefSIan Rogers        "MetricName": "IpTB"
289*49898fefSIan Rogers    },
290*49898fefSIan Rogers    {
291038d3b53SJin Yao        "BriefDescription": "Branch instructions per taken branch. ",
292038d3b53SJin Yao        "MetricExpr": "BR_INST_RETIRED.ALL_BRANCHES / BR_INST_RETIRED.NEAR_TAKEN",
293*49898fefSIan Rogers        "MetricGroup": "Branches;Fed;PGO",
294038d3b53SJin Yao        "MetricName": "BpTkBranch"
295038d3b53SJin Yao    },
296038d3b53SJin Yao    {
297038d3b53SJin Yao        "BriefDescription": "Instructions per Floating Point (FP) Operation (lower number means higher occurrence rate)",
298038d3b53SJin Yao        "MetricExpr": "INST_RETIRED.ANY / ( 1 * ( FP_ARITH_INST_RETIRED.SCALAR_SINGLE + FP_ARITH_INST_RETIRED.SCALAR_DOUBLE ) + 2 * FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE + 4 * ( FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE ) + 8 * ( FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.512B_PACKED_DOUBLE ) + 16 * FP_ARITH_INST_RETIRED.512B_PACKED_SINGLE )",
299*49898fefSIan Rogers        "MetricGroup": "Flops;InsType",
300038d3b53SJin Yao        "MetricName": "IpFLOP"
301038d3b53SJin Yao    },
302038d3b53SJin Yao    {
303*49898fefSIan Rogers        "BriefDescription": "Instructions per FP Arithmetic instruction (lower number means higher occurrence rate)",
304*49898fefSIan Rogers        "MetricExpr": "INST_RETIRED.ANY / ( (FP_ARITH_INST_RETIRED.SCALAR_SINGLE + FP_ARITH_INST_RETIRED.SCALAR_DOUBLE) + (FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE + FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE + FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.512B_PACKED_DOUBLE + FP_ARITH_INST_RETIRED.512B_PACKED_SINGLE) )",
305*49898fefSIan Rogers        "MetricGroup": "Flops;InsType",
306*49898fefSIan Rogers        "MetricName": "IpArith",
307*49898fefSIan Rogers        "PublicDescription": "Instructions per FP Arithmetic instruction (lower number means higher occurrence rate). May undercount due to FMA double counting. Approximated prior to BDW."
308*49898fefSIan Rogers    },
309*49898fefSIan Rogers    {
310*49898fefSIan Rogers        "BriefDescription": "Instructions per FP Arithmetic Scalar Single-Precision instruction (lower number means higher occurrence rate)",
311*49898fefSIan Rogers        "MetricExpr": "INST_RETIRED.ANY / FP_ARITH_INST_RETIRED.SCALAR_SINGLE",
312*49898fefSIan Rogers        "MetricGroup": "Flops;FpScalar;InsType",
313*49898fefSIan Rogers        "MetricName": "IpArith_Scalar_SP",
314*49898fefSIan Rogers        "PublicDescription": "Instructions per FP Arithmetic Scalar Single-Precision instruction (lower number means higher occurrence rate). May undercount due to FMA double counting."
315*49898fefSIan Rogers    },
316*49898fefSIan Rogers    {
317*49898fefSIan Rogers        "BriefDescription": "Instructions per FP Arithmetic Scalar Double-Precision instruction (lower number means higher occurrence rate)",
318*49898fefSIan Rogers        "MetricExpr": "INST_RETIRED.ANY / FP_ARITH_INST_RETIRED.SCALAR_DOUBLE",
319*49898fefSIan Rogers        "MetricGroup": "Flops;FpScalar;InsType",
320*49898fefSIan Rogers        "MetricName": "IpArith_Scalar_DP",
321*49898fefSIan Rogers        "PublicDescription": "Instructions per FP Arithmetic Scalar Double-Precision instruction (lower number means higher occurrence rate). May undercount due to FMA double counting."
322*49898fefSIan Rogers    },
323*49898fefSIan Rogers    {
324*49898fefSIan Rogers        "BriefDescription": "Instructions per FP Arithmetic AVX/SSE 128-bit instruction (lower number means higher occurrence rate)",
325*49898fefSIan Rogers        "MetricExpr": "INST_RETIRED.ANY / ( FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE + FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE )",
326*49898fefSIan Rogers        "MetricGroup": "Flops;FpVector;InsType",
327*49898fefSIan Rogers        "MetricName": "IpArith_AVX128",
328*49898fefSIan Rogers        "PublicDescription": "Instructions per FP Arithmetic AVX/SSE 128-bit instruction (lower number means higher occurrence rate). May undercount due to FMA double counting."
329*49898fefSIan Rogers    },
330*49898fefSIan Rogers    {
331*49898fefSIan Rogers        "BriefDescription": "Instructions per FP Arithmetic AVX* 256-bit instruction (lower number means higher occurrence rate)",
332*49898fefSIan Rogers        "MetricExpr": "INST_RETIRED.ANY / ( FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE + FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE )",
333*49898fefSIan Rogers        "MetricGroup": "Flops;FpVector;InsType",
334*49898fefSIan Rogers        "MetricName": "IpArith_AVX256",
335*49898fefSIan Rogers        "PublicDescription": "Instructions per FP Arithmetic AVX* 256-bit instruction (lower number means higher occurrence rate). May undercount due to FMA double counting."
336*49898fefSIan Rogers    },
337*49898fefSIan Rogers    {
338*49898fefSIan Rogers        "BriefDescription": "Instructions per FP Arithmetic AVX 512-bit instruction (lower number means higher occurrence rate)",
339*49898fefSIan Rogers        "MetricExpr": "INST_RETIRED.ANY / ( FP_ARITH_INST_RETIRED.512B_PACKED_DOUBLE + FP_ARITH_INST_RETIRED.512B_PACKED_SINGLE )",
340*49898fefSIan Rogers        "MetricGroup": "Flops;FpVector;InsType",
341*49898fefSIan Rogers        "MetricName": "IpArith_AVX512",
342*49898fefSIan Rogers        "PublicDescription": "Instructions per FP Arithmetic AVX 512-bit instruction (lower number means higher occurrence rate). May undercount due to FMA double counting."
343*49898fefSIan Rogers    },
344*49898fefSIan Rogers    {
345ed97cc6cSJin Yao        "BriefDescription": "Total number of retired Instructions, Sample with: INST_RETIRED.PREC_DIST",
34661ec07f5SHaiyan Song        "MetricExpr": "INST_RETIRED.ANY",
347ed97cc6cSJin Yao        "MetricGroup": "Summary;TmaL1",
348ecd94f1bSKan Liang        "MetricName": "Instructions"
349ecd94f1bSKan Liang    },
350ecd94f1bSKan Liang    {
351*49898fefSIan Rogers        "BriefDescription": "Average number of Uops issued by front-end when it issued something",
352*49898fefSIan Rogers        "MetricExpr": "UOPS_ISSUED.ANY / cpu@UOPS_ISSUED.ANY\\,cmask\\=1@",
353*49898fefSIan Rogers        "MetricGroup": "Fed;FetchBW",
354*49898fefSIan Rogers        "MetricName": "Fetch_UpC"
355*49898fefSIan Rogers    },
356*49898fefSIan Rogers    {
357038d3b53SJin Yao        "BriefDescription": "Fraction of Uops delivered by the LSD (Loop Stream Detector; aka Loop Cache)",
358038d3b53SJin Yao        "MetricExpr": "LSD.UOPS / (IDQ.DSB_UOPS + LSD.UOPS + IDQ.MITE_UOPS + IDQ.MS_UOPS)",
359*49898fefSIan Rogers        "MetricGroup": "Fed;LSD",
360038d3b53SJin Yao        "MetricName": "LSD_Coverage"
361038d3b53SJin Yao    },
362038d3b53SJin Yao    {
363038d3b53SJin Yao        "BriefDescription": "Fraction of Uops delivered by the DSB (aka Decoded ICache; or Uop Cache)",
364038d3b53SJin Yao        "MetricExpr": "IDQ.DSB_UOPS / (IDQ.DSB_UOPS + LSD.UOPS + IDQ.MITE_UOPS + IDQ.MS_UOPS)",
365*49898fefSIan Rogers        "MetricGroup": "DSB;Fed;FetchBW",
366038d3b53SJin Yao        "MetricName": "DSB_Coverage"
367038d3b53SJin Yao    },
368038d3b53SJin Yao    {
369*49898fefSIan Rogers        "BriefDescription": "Total penalty related to DSB (uop cache) misses - subset/see of/the Instruction_Fetch_BW Bottleneck.",
370*49898fefSIan Rogers        "MetricExpr": "(4 * IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE / (4 * CPU_CLK_UNHALTED.THREAD)) * (DSB2MITE_SWITCHES.PENALTY_CYCLES / CPU_CLK_UNHALTED.THREAD) / #(4 * IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE / (4 * CPU_CLK_UNHALTED.THREAD)) + ((IDQ_UOPS_NOT_DELIVERED.CORE / (4 * CPU_CLK_UNHALTED.THREAD)) - (4 * IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE / (4 * CPU_CLK_UNHALTED.THREAD))) * (( IDQ.ALL_MITE_CYCLES_ANY_UOPS - IDQ.ALL_MITE_CYCLES_4_UOPS ) / CPU_CLK_UNHALTED.THREAD / 2) / #((IDQ_UOPS_NOT_DELIVERED.CORE / (4 * CPU_CLK_UNHALTED.THREAD)) - (4 * IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE / (4 * CPU_CLK_UNHALTED.THREAD)))",
371*49898fefSIan Rogers        "MetricGroup": "DSBmiss;Fed",
372*49898fefSIan Rogers        "MetricName": "DSB_Misses_Cost"
373*49898fefSIan Rogers    },
374*49898fefSIan Rogers    {
375*49898fefSIan Rogers        "BriefDescription": "Total penalty related to DSB (uop cache) misses - subset/see of/the Instruction_Fetch_BW Bottleneck.",
376*49898fefSIan Rogers        "MetricExpr": "(4 * IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))) * (DSB2MITE_SWITCHES.PENALTY_CYCLES / CPU_CLK_UNHALTED.THREAD) / #(4 * IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))) + ((IDQ_UOPS_NOT_DELIVERED.CORE / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))) - (4 * IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )))) * (( IDQ.ALL_MITE_CYCLES_ANY_UOPS - IDQ.ALL_MITE_CYCLES_4_UOPS ) / ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ) / 2) / #((IDQ_UOPS_NOT_DELIVERED.CORE / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))) - (4 * IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))))",
377*49898fefSIan Rogers        "MetricGroup": "DSBmiss;Fed_SMT",
378*49898fefSIan Rogers        "MetricName": "DSB_Misses_Cost_SMT"
379*49898fefSIan Rogers    },
380*49898fefSIan Rogers    {
381*49898fefSIan Rogers        "BriefDescription": "Number of Instructions per non-speculative DSB miss",
382*49898fefSIan Rogers        "MetricExpr": "INST_RETIRED.ANY / FRONTEND_RETIRED.ANY_DSB_MISS",
383*49898fefSIan Rogers        "MetricGroup": "DSBmiss;Fed",
384*49898fefSIan Rogers        "MetricName": "IpDSB_Miss_Ret"
385*49898fefSIan Rogers    },
386*49898fefSIan Rogers    {
387*49898fefSIan Rogers        "BriefDescription": "Fraction of branches that are non-taken conditionals",
388*49898fefSIan Rogers        "MetricExpr": "BR_INST_RETIRED.NOT_TAKEN / BR_INST_RETIRED.ALL_BRANCHES",
389*49898fefSIan Rogers        "MetricGroup": "Bad;Branches;CodeGen;PGO",
390*49898fefSIan Rogers        "MetricName": "Cond_NT"
391*49898fefSIan Rogers    },
392*49898fefSIan Rogers    {
393*49898fefSIan Rogers        "BriefDescription": "Fraction of branches that are taken conditionals",
394*49898fefSIan Rogers        "MetricExpr": "( BR_INST_RETIRED.CONDITIONAL - BR_INST_RETIRED.NOT_TAKEN )  / BR_INST_RETIRED.ALL_BRANCHES",
395*49898fefSIan Rogers        "MetricGroup": "Bad;Branches;CodeGen;PGO",
396*49898fefSIan Rogers        "MetricName": "Cond_TK"
397*49898fefSIan Rogers    },
398*49898fefSIan Rogers    {
399*49898fefSIan Rogers        "BriefDescription": "Fraction of branches that are CALL or RET",
400*49898fefSIan Rogers        "MetricExpr": "( BR_INST_RETIRED.NEAR_CALL + BR_INST_RETIRED.NEAR_RETURN ) / BR_INST_RETIRED.ALL_BRANCHES",
401*49898fefSIan Rogers        "MetricGroup": "Bad;Branches",
402*49898fefSIan Rogers        "MetricName": "CallRet"
403*49898fefSIan Rogers    },
404*49898fefSIan Rogers    {
405*49898fefSIan Rogers        "BriefDescription": "Fraction of branches that are unconditional (direct or indirect) jumps",
406*49898fefSIan Rogers        "MetricExpr": "(BR_INST_RETIRED.NEAR_TAKEN - ( BR_INST_RETIRED.CONDITIONAL - BR_INST_RETIRED.NOT_TAKEN ) - 2 * BR_INST_RETIRED.NEAR_CALL) / BR_INST_RETIRED.ALL_BRANCHES",
407*49898fefSIan Rogers        "MetricGroup": "Bad;Branches",
408*49898fefSIan Rogers        "MetricName": "Jump"
409*49898fefSIan Rogers    },
410*49898fefSIan Rogers    {
411*49898fefSIan Rogers        "BriefDescription": "Actual Average Latency for L1 data-cache miss demand load instructions (in core cycles)",
41261ec07f5SHaiyan Song        "MetricExpr": "L1D_PEND_MISS.PENDING / ( MEM_LOAD_RETIRED.L1_MISS + MEM_LOAD_RETIRED.FB_HIT )",
413*49898fefSIan Rogers        "MetricGroup": "Mem;MemoryBound;MemoryLat",
414*49898fefSIan Rogers        "MetricName": "Load_Miss_Real_Latency",
415*49898fefSIan Rogers        "PublicDescription": "Actual Average Latency for L1 data-cache miss demand load instructions (in core cycles). Latency may be overestimated for multi-load instructions - e.g. repeat strings."
416ecd94f1bSKan Liang    },
417ecd94f1bSKan Liang    {
41861ec07f5SHaiyan Song        "BriefDescription": "Memory-Level-Parallelism (average number of L1 miss demand load when there is at least one such miss. Per-Logical Processor)",
419fd550098SAndi Kleen        "MetricExpr": "L1D_PEND_MISS.PENDING / L1D_PEND_MISS.PENDING_CYCLES",
420*49898fefSIan Rogers        "MetricGroup": "Mem;MemoryBound;MemoryBW",
421ecd94f1bSKan Liang        "MetricName": "MLP"
422ecd94f1bSKan Liang    },
423ecd94f1bSKan Liang    {
424fd550098SAndi Kleen        "BriefDescription": "Average data fill bandwidth to the L1 data cache [GB / sec]",
42561ec07f5SHaiyan Song        "MetricExpr": "64 * L1D.REPLACEMENT / 1000000000 / duration_time",
426*49898fefSIan Rogers        "MetricGroup": "Mem;MemoryBW",
427fd550098SAndi Kleen        "MetricName": "L1D_Cache_Fill_BW"
428fd550098SAndi Kleen    },
429fd550098SAndi Kleen    {
430fd550098SAndi Kleen        "BriefDescription": "Average data fill bandwidth to the L2 cache [GB / sec]",
43161ec07f5SHaiyan Song        "MetricExpr": "64 * L2_LINES_IN.ALL / 1000000000 / duration_time",
432*49898fefSIan Rogers        "MetricGroup": "Mem;MemoryBW",
433fd550098SAndi Kleen        "MetricName": "L2_Cache_Fill_BW"
434fd550098SAndi Kleen    },
435fd550098SAndi Kleen    {
436fd550098SAndi Kleen        "BriefDescription": "Average per-core data fill bandwidth to the L3 cache [GB / sec]",
43761ec07f5SHaiyan Song        "MetricExpr": "64 * LONGEST_LAT_CACHE.MISS / 1000000000 / duration_time",
438*49898fefSIan Rogers        "MetricGroup": "Mem;MemoryBW",
439fd550098SAndi Kleen        "MetricName": "L3_Cache_Fill_BW"
440fd550098SAndi Kleen    },
441fd550098SAndi Kleen    {
442ed97cc6cSJin Yao        "BriefDescription": "Average per-core data access bandwidth to the L3 cache [GB / sec]",
44361ec07f5SHaiyan Song        "MetricExpr": "64 * OFFCORE_REQUESTS.ALL_REQUESTS / 1000000000 / duration_time",
444*49898fefSIan Rogers        "MetricGroup": "Mem;MemoryBW;Offcore",
445fd550098SAndi Kleen        "MetricName": "L3_Cache_Access_BW"
446fd550098SAndi Kleen    },
447fd550098SAndi Kleen    {
448fd550098SAndi Kleen        "BriefDescription": "L1 cache true misses per kilo instruction for retired demand loads",
44961ec07f5SHaiyan Song        "MetricExpr": "1000 * MEM_LOAD_RETIRED.L1_MISS / INST_RETIRED.ANY",
450*49898fefSIan Rogers        "MetricGroup": "Mem;CacheMisses",
451fd550098SAndi Kleen        "MetricName": "L1MPKI"
452fd550098SAndi Kleen    },
453fd550098SAndi Kleen    {
454*49898fefSIan Rogers        "BriefDescription": "L1 cache true misses per kilo instruction for all demand loads (including speculative)",
455*49898fefSIan Rogers        "MetricExpr": "1000 * L2_RQSTS.ALL_DEMAND_DATA_RD / INST_RETIRED.ANY",
456*49898fefSIan Rogers        "MetricGroup": "Mem;CacheMisses",
457*49898fefSIan Rogers        "MetricName": "L1MPKI_Load"
458*49898fefSIan Rogers    },
459*49898fefSIan Rogers    {
460fd550098SAndi Kleen        "BriefDescription": "L2 cache true misses per kilo instruction for retired demand loads",
46161ec07f5SHaiyan Song        "MetricExpr": "1000 * MEM_LOAD_RETIRED.L2_MISS / INST_RETIRED.ANY",
462*49898fefSIan Rogers        "MetricGroup": "Mem;Backend;CacheMisses",
463fd550098SAndi Kleen        "MetricName": "L2MPKI"
464fd550098SAndi Kleen    },
465fd550098SAndi Kleen    {
466fd550098SAndi Kleen        "BriefDescription": "L2 cache misses per kilo instruction for all request types (including speculative)",
46761ec07f5SHaiyan Song        "MetricExpr": "1000 * L2_RQSTS.MISS / INST_RETIRED.ANY",
468*49898fefSIan Rogers        "MetricGroup": "Mem;CacheMisses;Offcore",
469fd550098SAndi Kleen        "MetricName": "L2MPKI_All"
470fd550098SAndi Kleen    },
471fd550098SAndi Kleen    {
472*49898fefSIan Rogers        "BriefDescription": "L2 cache misses per kilo instruction for all demand loads  (including speculative)",
473*49898fefSIan Rogers        "MetricExpr": "1000 * L2_RQSTS.DEMAND_DATA_RD_MISS / INST_RETIRED.ANY",
474*49898fefSIan Rogers        "MetricGroup": "Mem;CacheMisses",
475*49898fefSIan Rogers        "MetricName": "L2MPKI_Load"
476*49898fefSIan Rogers    },
477*49898fefSIan Rogers    {
478fd550098SAndi Kleen        "BriefDescription": "L2 cache hits per kilo instruction for all request types (including speculative)",
47961ec07f5SHaiyan Song        "MetricExpr": "1000 * ( L2_RQSTS.REFERENCES - L2_RQSTS.MISS ) / INST_RETIRED.ANY",
480*49898fefSIan Rogers        "MetricGroup": "Mem;CacheMisses",
481fd550098SAndi Kleen        "MetricName": "L2HPKI_All"
482fd550098SAndi Kleen    },
483fd550098SAndi Kleen    {
484*49898fefSIan Rogers        "BriefDescription": "L2 cache hits per kilo instruction for all demand loads  (including speculative)",
485*49898fefSIan Rogers        "MetricExpr": "1000 * L2_RQSTS.DEMAND_DATA_RD_HIT / INST_RETIRED.ANY",
486*49898fefSIan Rogers        "MetricGroup": "Mem;CacheMisses",
487*49898fefSIan Rogers        "MetricName": "L2HPKI_Load"
488*49898fefSIan Rogers    },
489*49898fefSIan Rogers    {
490fd550098SAndi Kleen        "BriefDescription": "L3 cache true misses per kilo instruction for retired demand loads",
49161ec07f5SHaiyan Song        "MetricExpr": "1000 * MEM_LOAD_RETIRED.L3_MISS / INST_RETIRED.ANY",
492*49898fefSIan Rogers        "MetricGroup": "Mem;CacheMisses",
493fd550098SAndi Kleen        "MetricName": "L3MPKI"
494fd550098SAndi Kleen    },
495fd550098SAndi Kleen    {
496*49898fefSIan Rogers        "BriefDescription": "Fill Buffer (FB) true hits per kilo instructions for retired demand loads",
497*49898fefSIan Rogers        "MetricExpr": "1000 * MEM_LOAD_RETIRED.FB_HIT / INST_RETIRED.ANY",
498*49898fefSIan Rogers        "MetricGroup": "Mem;CacheMisses",
499*49898fefSIan Rogers        "MetricName": "FB_HPKI"
500*49898fefSIan Rogers    },
501*49898fefSIan Rogers    {
502*49898fefSIan Rogers        "BriefDescription": "Utilization of the core's Page Walker(s) serving STLB misses triggered by instruction/Load/Store accesses",
503*49898fefSIan Rogers        "MetricConstraint": "NO_NMI_WATCHDOG",
504*49898fefSIan Rogers        "MetricExpr": "( ITLB_MISSES.WALK_PENDING + DTLB_LOAD_MISSES.WALK_PENDING + DTLB_STORE_MISSES.WALK_PENDING + EPT.WALK_PENDING ) / ( 2 * CPU_CLK_UNHALTED.THREAD )",
505*49898fefSIan Rogers        "MetricGroup": "Mem;MemoryTLB",
506*49898fefSIan Rogers        "MetricName": "Page_Walks_Utilization"
507*49898fefSIan Rogers    },
508*49898fefSIan Rogers    {
509*49898fefSIan Rogers        "BriefDescription": "Utilization of the core's Page Walker(s) serving STLB misses triggered by instruction/Load/Store accesses",
510*49898fefSIan Rogers        "MetricExpr": "( ITLB_MISSES.WALK_PENDING + DTLB_LOAD_MISSES.WALK_PENDING + DTLB_STORE_MISSES.WALK_PENDING + EPT.WALK_PENDING ) / ( 2 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ) )",
511*49898fefSIan Rogers        "MetricGroup": "Mem;MemoryTLB_SMT",
512*49898fefSIan Rogers        "MetricName": "Page_Walks_Utilization_SMT"
513*49898fefSIan Rogers    },
514*49898fefSIan Rogers    {
51561ec07f5SHaiyan Song        "BriefDescription": "Rate of silent evictions from the L2 cache per Kilo instruction where the evicted lines are dropped (no writeback to L3 or memory)",
51661ec07f5SHaiyan Song        "MetricExpr": "1000 * L2_LINES_OUT.SILENT / INST_RETIRED.ANY",
517*49898fefSIan Rogers        "MetricGroup": "L2Evicts;Mem;Server",
51861ec07f5SHaiyan Song        "MetricName": "L2_Evictions_Silent_PKI"
51961ec07f5SHaiyan Song    },
52061ec07f5SHaiyan Song    {
52161ec07f5SHaiyan Song        "BriefDescription": "Rate of non silent evictions from the L2 cache per Kilo instruction",
52261ec07f5SHaiyan Song        "MetricExpr": "1000 * L2_LINES_OUT.NON_SILENT / INST_RETIRED.ANY",
523*49898fefSIan Rogers        "MetricGroup": "L2Evicts;Mem;Server",
52461ec07f5SHaiyan Song        "MetricName": "L2_Evictions_NonSilent_PKI"
52561ec07f5SHaiyan Song    },
52661ec07f5SHaiyan Song    {
527fd550098SAndi Kleen        "BriefDescription": "Average CPU Utilization",
52861ec07f5SHaiyan Song        "MetricExpr": "CPU_CLK_UNHALTED.REF_TSC / msr@tsc@",
529ed97cc6cSJin Yao        "MetricGroup": "HPC;Summary",
530ecd94f1bSKan Liang        "MetricName": "CPU_Utilization"
531ecd94f1bSKan Liang    },
532ecd94f1bSKan Liang    {
533ed97cc6cSJin Yao        "BriefDescription": "Measured Average Frequency for unhalted processors [GHz]",
534ed97cc6cSJin Yao        "MetricExpr": "(CPU_CLK_UNHALTED.THREAD / CPU_CLK_UNHALTED.REF_TSC) * msr@tsc@ / 1000000000 / duration_time",
535ed97cc6cSJin Yao        "MetricGroup": "Summary;Power",
536ed97cc6cSJin Yao        "MetricName": "Average_Frequency"
537ed97cc6cSJin Yao    },
538ed97cc6cSJin Yao    {
539ecd94f1bSKan Liang        "BriefDescription": "Giga Floating Point Operations Per Second",
540038d3b53SJin Yao        "MetricExpr": "( ( 1 * ( FP_ARITH_INST_RETIRED.SCALAR_SINGLE + FP_ARITH_INST_RETIRED.SCALAR_DOUBLE ) + 2 * FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE + 4 * ( FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE ) + 8 * ( FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.512B_PACKED_DOUBLE ) + 16 * FP_ARITH_INST_RETIRED.512B_PACKED_SINGLE ) / 1000000000 ) / duration_time",
541*49898fefSIan Rogers        "MetricGroup": "Cor;Flops;HPC",
542ecd94f1bSKan Liang        "MetricName": "GFLOPs"
543ecd94f1bSKan Liang    },
544ecd94f1bSKan Liang    {
545fd550098SAndi Kleen        "BriefDescription": "Average Frequency Utilization relative nominal frequency",
54661ec07f5SHaiyan Song        "MetricExpr": "CPU_CLK_UNHALTED.THREAD / CPU_CLK_UNHALTED.REF_TSC",
547ecd94f1bSKan Liang        "MetricGroup": "Power",
548ecd94f1bSKan Liang        "MetricName": "Turbo_Utilization"
549ecd94f1bSKan Liang    },
550ecd94f1bSKan Liang    {
551*49898fefSIan Rogers        "BriefDescription": "Fraction of Core cycles where the core was running with power-delivery for baseline license level 0",
552*49898fefSIan Rogers        "MetricExpr": "CORE_POWER.LVL0_TURBO_LICENSE / CPU_CLK_UNHALTED.THREAD",
553*49898fefSIan Rogers        "MetricGroup": "Power",
554*49898fefSIan Rogers        "MetricName": "Power_License0_Utilization",
555*49898fefSIan Rogers        "PublicDescription": "Fraction of Core cycles where the core was running with power-delivery for baseline license level 0.  This includes non-AVX codes, SSE, AVX 128-bit, and low-current AVX 256-bit codes."
556*49898fefSIan Rogers    },
557*49898fefSIan Rogers    {
558*49898fefSIan Rogers        "BriefDescription": "Fraction of Core cycles where the core was running with power-delivery for baseline license level 0. SMT version; use when SMT is enabled and measuring per logical CPU.",
559*49898fefSIan Rogers        "MetricExpr": "CORE_POWER.LVL0_TURBO_LICENSE / 2 / ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )",
560*49898fefSIan Rogers        "MetricGroup": "Power_SMT",
561*49898fefSIan Rogers        "MetricName": "Power_License0_Utilization_SMT",
562*49898fefSIan Rogers        "PublicDescription": "Fraction of Core cycles where the core was running with power-delivery for baseline license level 0.  This includes non-AVX codes, SSE, AVX 128-bit, and low-current AVX 256-bit codes. SMT version; use when SMT is enabled and measuring per logical CPU."
563*49898fefSIan Rogers    },
564*49898fefSIan Rogers    {
565*49898fefSIan Rogers        "BriefDescription": "Fraction of Core cycles where the core was running with power-delivery for license level 1",
566*49898fefSIan Rogers        "MetricExpr": "CORE_POWER.LVL1_TURBO_LICENSE / CPU_CLK_UNHALTED.THREAD",
567*49898fefSIan Rogers        "MetricGroup": "Power",
568*49898fefSIan Rogers        "MetricName": "Power_License1_Utilization",
569*49898fefSIan Rogers        "PublicDescription": "Fraction of Core cycles where the core was running with power-delivery for license level 1.  This includes high current AVX 256-bit instructions as well as low current AVX 512-bit instructions."
570*49898fefSIan Rogers    },
571*49898fefSIan Rogers    {
572*49898fefSIan Rogers        "BriefDescription": "Fraction of Core cycles where the core was running with power-delivery for license level 1. SMT version; use when SMT is enabled and measuring per logical CPU.",
573*49898fefSIan Rogers        "MetricExpr": "CORE_POWER.LVL1_TURBO_LICENSE / 2 / ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )",
574*49898fefSIan Rogers        "MetricGroup": "Power_SMT",
575*49898fefSIan Rogers        "MetricName": "Power_License1_Utilization_SMT",
576*49898fefSIan Rogers        "PublicDescription": "Fraction of Core cycles where the core was running with power-delivery for license level 1.  This includes high current AVX 256-bit instructions as well as low current AVX 512-bit instructions. SMT version; use when SMT is enabled and measuring per logical CPU."
577*49898fefSIan Rogers    },
578*49898fefSIan Rogers    {
579*49898fefSIan Rogers        "BriefDescription": "Fraction of Core cycles where the core was running with power-delivery for license level 2 (introduced in SKX)",
580*49898fefSIan Rogers        "MetricExpr": "CORE_POWER.LVL2_TURBO_LICENSE / CPU_CLK_UNHALTED.THREAD",
581*49898fefSIan Rogers        "MetricGroup": "Power",
582*49898fefSIan Rogers        "MetricName": "Power_License2_Utilization",
583*49898fefSIan Rogers        "PublicDescription": "Fraction of Core cycles where the core was running with power-delivery for license level 2 (introduced in SKX).  This includes high current AVX 512-bit instructions."
584*49898fefSIan Rogers    },
585*49898fefSIan Rogers    {
586*49898fefSIan Rogers        "BriefDescription": "Fraction of Core cycles where the core was running with power-delivery for license level 2 (introduced in SKX). SMT version; use when SMT is enabled and measuring per logical CPU.",
587*49898fefSIan Rogers        "MetricExpr": "CORE_POWER.LVL2_TURBO_LICENSE / 2 / ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )",
588*49898fefSIan Rogers        "MetricGroup": "Power_SMT",
589*49898fefSIan Rogers        "MetricName": "Power_License2_Utilization_SMT",
590*49898fefSIan Rogers        "PublicDescription": "Fraction of Core cycles where the core was running with power-delivery for license level 2 (introduced in SKX).  This includes high current AVX 512-bit instructions. SMT version; use when SMT is enabled and measuring per logical CPU."
591*49898fefSIan Rogers    },
592*49898fefSIan Rogers    {
59361ec07f5SHaiyan Song        "BriefDescription": "Fraction of cycles where both hardware Logical Processors were active",
594ed97cc6cSJin Yao        "MetricExpr": "1 - CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / ( CPU_CLK_UNHALTED.REF_XCLK_ANY / 2 ) if #SMT_on else 0",
595ed97cc6cSJin Yao        "MetricGroup": "SMT",
596ecd94f1bSKan Liang        "MetricName": "SMT_2T_Utilization"
597ecd94f1bSKan Liang    },
598ecd94f1bSKan Liang    {
599038d3b53SJin Yao        "BriefDescription": "Fraction of cycles spent in the Operating System (OS) Kernel mode",
600ed97cc6cSJin Yao        "MetricExpr": "CPU_CLK_UNHALTED.THREAD_P:k / CPU_CLK_UNHALTED.THREAD",
601038d3b53SJin Yao        "MetricGroup": "OS",
602ecd94f1bSKan Liang        "MetricName": "Kernel_Utilization"
603ecd94f1bSKan Liang    },
604ecd94f1bSKan Liang    {
605*49898fefSIan Rogers        "BriefDescription": "Cycles Per Instruction for the Operating System (OS) Kernel mode",
606*49898fefSIan Rogers        "MetricExpr": "CPU_CLK_UNHALTED.THREAD_P:k / INST_RETIRED.ANY_P:k",
607*49898fefSIan Rogers        "MetricGroup": "OS",
608*49898fefSIan Rogers        "MetricName": "Kernel_CPI"
609*49898fefSIan Rogers    },
610*49898fefSIan Rogers    {
611fd550098SAndi Kleen        "BriefDescription": "Average external Memory Bandwidth Use for reads and writes [GB / sec]",
612ed97cc6cSJin Yao        "MetricExpr": "( 64 * ( uncore_imc@cas_count_read@ + uncore_imc@cas_count_write@ ) / 1000000000 ) / duration_time",
613*49898fefSIan Rogers        "MetricGroup": "HPC;Mem;MemoryBW;SoC",
614fd550098SAndi Kleen        "MetricName": "DRAM_BW_Use"
615fd550098SAndi Kleen    },
616fd550098SAndi Kleen    {
617fd550098SAndi Kleen        "BriefDescription": "Average latency of data read request to external memory (in nanoseconds). Accounts for demand loads and L1/L2 prefetches",
618038d3b53SJin Yao        "MetricExpr": "1000000000 * ( cha@event\\=0x36\\,umask\\=0x21\\,config\\=0x40433@ / cha@event\\=0x35\\,umask\\=0x21\\,config\\=0x40433@ ) / ( cha_0@event\\=0x0@ / duration_time )",
619*49898fefSIan Rogers        "MetricGroup": "Mem;MemoryLat;SoC",
620038d3b53SJin Yao        "MetricName": "MEM_Read_Latency"
621fd550098SAndi Kleen    },
622fd550098SAndi Kleen    {
623fd550098SAndi Kleen        "BriefDescription": "Average number of parallel data read requests to external memory. Accounts for demand loads and L1/L2 prefetches",
624038d3b53SJin Yao        "MetricExpr": "cha@event\\=0x36\\,umask\\=0x21\\,config\\=0x40433@ / cha@event\\=0x36\\,umask\\=0x21\\,config\\=0x40433\\,thresh\\=1@",
625*49898fefSIan Rogers        "MetricGroup": "Mem;MemoryBW;SoC",
626038d3b53SJin Yao        "MetricName": "MEM_Parallel_Reads"
627fd550098SAndi Kleen    },
628fd550098SAndi Kleen    {
629fd550098SAndi Kleen        "BriefDescription": "Average latency of data read request to external 3D X-Point memory [in nanoseconds]. Accounts for demand loads and L1/L2 data-read prefetches",
63092aa1c2bSIan Rogers        "MetricExpr": "( 1000000000 * ( imc@event\\=0xe0\\,umask\\=0x1@ / imc@event\\=0xe3@ ) / imc_0@event\\=0x0@ )",
631*49898fefSIan Rogers        "MetricGroup": "Mem;MemoryLat;SoC;Server",
632fd550098SAndi Kleen        "MetricName": "MEM_PMM_Read_Latency"
633fd550098SAndi Kleen    },
634fd550098SAndi Kleen    {
635*49898fefSIan Rogers        "BriefDescription": "Average latency of data read request to external DRAM memory [in nanoseconds]. Accounts for demand loads and L1/L2 data-read prefetches",
636*49898fefSIan Rogers        "MetricExpr": "1000000000 * ( UNC_M_RPQ_OCCUPANCY / UNC_M_RPQ_INSERTS ) / imc_0@event\\=0x0@",
637*49898fefSIan Rogers        "MetricGroup": "Mem;MemoryLat;SoC;Server",
638*49898fefSIan Rogers        "MetricName": "MEM_DRAM_Read_Latency"
639*49898fefSIan Rogers    },
640*49898fefSIan Rogers    {
641fd550098SAndi Kleen        "BriefDescription": "Average 3DXP Memory Bandwidth Use for reads [GB / sec]",
64292aa1c2bSIan Rogers        "MetricExpr": "( ( 64 * imc@event\\=0xe3@ / 1000000000 ) / duration_time )",
643*49898fefSIan Rogers        "MetricGroup": "Mem;MemoryBW;SoC;Server",
644fd550098SAndi Kleen        "MetricName": "PMM_Read_BW"
645fd550098SAndi Kleen    },
646fd550098SAndi Kleen    {
647fd550098SAndi Kleen        "BriefDescription": "Average 3DXP Memory Bandwidth Use for Writes [GB / sec]",
64892aa1c2bSIan Rogers        "MetricExpr": "( ( 64 * imc@event\\=0xe7@ / 1000000000 ) / duration_time )",
649*49898fefSIan Rogers        "MetricGroup": "Mem;MemoryBW;SoC;Server",
650fd550098SAndi Kleen        "MetricName": "PMM_Write_BW"
651fd550098SAndi Kleen    },
652fd550098SAndi Kleen    {
653038d3b53SJin Yao        "BriefDescription": "Average IO (network or disk) Bandwidth Use for Writes [GB / sec]",
654038d3b53SJin Yao        "MetricExpr": "( UNC_IIO_DATA_REQ_OF_CPU.MEM_READ.PART0 + UNC_IIO_DATA_REQ_OF_CPU.MEM_READ.PART1 + UNC_IIO_DATA_REQ_OF_CPU.MEM_READ.PART2 + UNC_IIO_DATA_REQ_OF_CPU.MEM_READ.PART3 ) * 4 / 1000000000 / duration_time",
655*49898fefSIan Rogers        "MetricGroup": "IoBW;Mem;SoC;Server",
656038d3b53SJin Yao        "MetricName": "IO_Write_BW"
657038d3b53SJin Yao    },
658038d3b53SJin Yao    {
659038d3b53SJin Yao        "BriefDescription": "Average IO (network or disk) Bandwidth Use for Reads [GB / sec]",
660038d3b53SJin Yao        "MetricExpr": "( UNC_IIO_DATA_REQ_OF_CPU.MEM_WRITE.PART0 + UNC_IIO_DATA_REQ_OF_CPU.MEM_WRITE.PART1 + UNC_IIO_DATA_REQ_OF_CPU.MEM_WRITE.PART2 + UNC_IIO_DATA_REQ_OF_CPU.MEM_WRITE.PART3 ) * 4 / 1000000000 / duration_time",
661*49898fefSIan Rogers        "MetricGroup": "IoBW;Mem;SoC;Server",
662038d3b53SJin Yao        "MetricName": "IO_Read_BW"
663038d3b53SJin Yao    },
664038d3b53SJin Yao    {
665fd550098SAndi Kleen        "BriefDescription": "Socket actual clocks when any core is active on that socket",
66661ec07f5SHaiyan Song        "MetricExpr": "cha_0@event\\=0x0@",
667038d3b53SJin Yao        "MetricGroup": "SoC",
668fd550098SAndi Kleen        "MetricName": "Socket_CLKS"
669fd550098SAndi Kleen    },
670fd550098SAndi Kleen    {
671038d3b53SJin Yao        "BriefDescription": "Instructions per Far Branch ( Far Branches apply upon transition from application to operating system, handling interrupts, exceptions) [lower number means higher occurrence rate]",
672ed97cc6cSJin Yao        "MetricExpr": "INST_RETIRED.ANY / BR_INST_RETIRED.FAR_BRANCH:u",
673038d3b53SJin Yao        "MetricGroup": "Branches;OS",
67461ec07f5SHaiyan Song        "MetricName": "IpFarBranch"
67561ec07f5SHaiyan Song    },
67661ec07f5SHaiyan Song    {
67761ec07f5SHaiyan Song        "BriefDescription": "C3 residency percent per core",
678ecd94f1bSKan Liang        "MetricExpr": "(cstate_core@c3\\-residency@ / msr@tsc@) * 100",
679ecd94f1bSKan Liang        "MetricGroup": "Power",
680ecd94f1bSKan Liang        "MetricName": "C3_Core_Residency"
681ecd94f1bSKan Liang    },
682ecd94f1bSKan Liang    {
68361ec07f5SHaiyan Song        "BriefDescription": "C6 residency percent per core",
684ecd94f1bSKan Liang        "MetricExpr": "(cstate_core@c6\\-residency@ / msr@tsc@) * 100",
685ecd94f1bSKan Liang        "MetricGroup": "Power",
686ecd94f1bSKan Liang        "MetricName": "C6_Core_Residency"
687ecd94f1bSKan Liang    },
688ecd94f1bSKan Liang    {
68961ec07f5SHaiyan Song        "BriefDescription": "C7 residency percent per core",
690ecd94f1bSKan Liang        "MetricExpr": "(cstate_core@c7\\-residency@ / msr@tsc@) * 100",
691ecd94f1bSKan Liang        "MetricGroup": "Power",
692ecd94f1bSKan Liang        "MetricName": "C7_Core_Residency"
693ecd94f1bSKan Liang    },
694ecd94f1bSKan Liang    {
69561ec07f5SHaiyan Song        "BriefDescription": "C2 residency percent per package",
696ecd94f1bSKan Liang        "MetricExpr": "(cstate_pkg@c2\\-residency@ / msr@tsc@) * 100",
697ecd94f1bSKan Liang        "MetricGroup": "Power",
698ecd94f1bSKan Liang        "MetricName": "C2_Pkg_Residency"
699ecd94f1bSKan Liang    },
700ecd94f1bSKan Liang    {
70161ec07f5SHaiyan Song        "BriefDescription": "C3 residency percent per package",
702ecd94f1bSKan Liang        "MetricExpr": "(cstate_pkg@c3\\-residency@ / msr@tsc@) * 100",
703ecd94f1bSKan Liang        "MetricGroup": "Power",
704ecd94f1bSKan Liang        "MetricName": "C3_Pkg_Residency"
705ecd94f1bSKan Liang    },
706ecd94f1bSKan Liang    {
70761ec07f5SHaiyan Song        "BriefDescription": "C6 residency percent per package",
708ecd94f1bSKan Liang        "MetricExpr": "(cstate_pkg@c6\\-residency@ / msr@tsc@) * 100",
709ecd94f1bSKan Liang        "MetricGroup": "Power",
710ecd94f1bSKan Liang        "MetricName": "C6_Pkg_Residency"
711ecd94f1bSKan Liang    },
712ecd94f1bSKan Liang    {
71361ec07f5SHaiyan Song        "BriefDescription": "C7 residency percent per package",
714ecd94f1bSKan Liang        "MetricExpr": "(cstate_pkg@c7\\-residency@ / msr@tsc@) * 100",
715ecd94f1bSKan Liang        "MetricGroup": "Power",
716ecd94f1bSKan Liang        "MetricName": "C7_Pkg_Residency"
717ecd94f1bSKan Liang    }
718ecd94f1bSKan Liang]
719