1 // SPDX-License-Identifier: GPL-2.0 2 #include <linux/compiler.h> 3 #include <string.h> 4 #include <perf/cpumap.h> 5 #include <perf/evlist.h> 6 #include "metricgroup.h" 7 #include "tests.h" 8 #include "pmu-events/pmu-events.h" 9 #include "evlist.h" 10 #include "rblist.h" 11 #include "debug.h" 12 #include "expr.h" 13 #include "stat.h" 14 15 static struct pmu_event pme_test[] = { 16 { 17 .metric_expr = "inst_retired.any / cpu_clk_unhalted.thread", 18 .metric_name = "IPC", 19 .metric_group = "group1", 20 }, 21 { 22 .metric_expr = "idq_uops_not_delivered.core / (4 * (( ( cpu_clk_unhalted.thread / 2 ) * " 23 "( 1 + cpu_clk_unhalted.one_thread_active / cpu_clk_unhalted.ref_xclk ) )))", 24 .metric_name = "Frontend_Bound_SMT", 25 }, 26 { 27 .metric_expr = "l1d\\-loads\\-misses / inst_retired.any", 28 .metric_name = "dcache_miss_cpi", 29 }, 30 { 31 .metric_expr = "l1i\\-loads\\-misses / inst_retired.any", 32 .metric_name = "icache_miss_cycles", 33 }, 34 { 35 .metric_expr = "(dcache_miss_cpi + icache_miss_cycles)", 36 .metric_name = "cache_miss_cycles", 37 .metric_group = "group1", 38 }, 39 { 40 .metric_expr = "l2_rqsts.demand_data_rd_hit + l2_rqsts.pf_hit + l2_rqsts.rfo_hit", 41 .metric_name = "DCache_L2_All_Hits", 42 }, 43 { 44 .metric_expr = "max(l2_rqsts.all_demand_data_rd - l2_rqsts.demand_data_rd_hit, 0) + " 45 "l2_rqsts.pf_miss + l2_rqsts.rfo_miss", 46 .metric_name = "DCache_L2_All_Miss", 47 }, 48 { 49 .metric_expr = "dcache_l2_all_hits + dcache_l2_all_miss", 50 .metric_name = "DCache_L2_All", 51 }, 52 { 53 .metric_expr = "d_ratio(dcache_l2_all_hits, dcache_l2_all)", 54 .metric_name = "DCache_L2_Hits", 55 }, 56 { 57 .metric_expr = "d_ratio(dcache_l2_all_miss, dcache_l2_all)", 58 .metric_name = "DCache_L2_Misses", 59 }, 60 { 61 .metric_expr = "ipc + m2", 62 .metric_name = "M1", 63 }, 64 { 65 .metric_expr = "ipc + m1", 66 .metric_name = "M2", 67 }, 68 { 69 .metric_expr = "1/m3", 70 .metric_name = "M3", 71 }, 72 { 73 .metric_expr = "64 * l1d.replacement / 1000000000 / duration_time", 74 .metric_name = "L1D_Cache_Fill_BW", 75 }, 76 { 77 .name = NULL, 78 } 79 }; 80 81 static struct pmu_events_map map = { 82 .cpuid = "test", 83 .version = "1", 84 .type = "core", 85 .table = pme_test, 86 }; 87 88 struct value { 89 const char *event; 90 u64 val; 91 }; 92 93 static u64 find_value(const char *name, struct value *values) 94 { 95 struct value *v = values; 96 97 while (v->event) { 98 if (!strcmp(name, v->event)) 99 return v->val; 100 v++; 101 }; 102 return 0; 103 } 104 105 static void load_runtime_stat(struct runtime_stat *st, struct evlist *evlist, 106 struct value *vals) 107 { 108 struct evsel *evsel; 109 u64 count; 110 111 evlist__for_each_entry(evlist, evsel) { 112 count = find_value(evsel->name, vals); 113 perf_stat__update_shadow_stats(evsel, count, 0, st); 114 if (!strcmp(evsel->name, "duration_time")) 115 update_stats(&walltime_nsecs_stats, count); 116 } 117 } 118 119 static double compute_single(struct rblist *metric_events, struct evlist *evlist, 120 struct runtime_stat *st, const char *name) 121 { 122 struct metric_expr *mexp; 123 struct metric_event *me; 124 struct evsel *evsel; 125 126 evlist__for_each_entry(evlist, evsel) { 127 me = metricgroup__lookup(metric_events, evsel, false); 128 if (me != NULL) { 129 list_for_each_entry (mexp, &me->head, nd) { 130 if (strcmp(mexp->metric_name, name)) 131 continue; 132 return test_generic_metric(mexp, 0, st); 133 } 134 } 135 } 136 return 0.; 137 } 138 139 static int __compute_metric(const char *name, struct value *vals, 140 const char *name1, double *ratio1, 141 const char *name2, double *ratio2) 142 { 143 struct rblist metric_events = { 144 .nr_entries = 0, 145 }; 146 struct perf_cpu_map *cpus; 147 struct runtime_stat st; 148 struct evlist *evlist; 149 int err; 150 151 /* 152 * We need to prepare evlist for stat mode running on CPU 0 153 * because that's where all the stats are going to be created. 154 */ 155 evlist = evlist__new(); 156 if (!evlist) 157 return -ENOMEM; 158 159 cpus = perf_cpu_map__new("0"); 160 if (!cpus) { 161 evlist__delete(evlist); 162 return -ENOMEM; 163 } 164 165 perf_evlist__set_maps(&evlist->core, cpus, NULL); 166 runtime_stat__init(&st); 167 168 /* Parse the metric into metric_events list. */ 169 err = metricgroup__parse_groups_test(evlist, &map, name, 170 false, false, 171 &metric_events); 172 if (err) 173 goto out; 174 175 err = evlist__alloc_stats(evlist, false); 176 if (err) 177 goto out; 178 179 /* Load the runtime stats with given numbers for events. */ 180 load_runtime_stat(&st, evlist, vals); 181 182 /* And execute the metric */ 183 if (name1 && ratio1) 184 *ratio1 = compute_single(&metric_events, evlist, &st, name1); 185 if (name2 && ratio2) 186 *ratio2 = compute_single(&metric_events, evlist, &st, name2); 187 188 out: 189 /* ... clenup. */ 190 metricgroup__rblist_exit(&metric_events); 191 runtime_stat__exit(&st); 192 evlist__free_stats(evlist); 193 perf_cpu_map__put(cpus); 194 evlist__delete(evlist); 195 return err; 196 } 197 198 static int compute_metric(const char *name, struct value *vals, double *ratio) 199 { 200 return __compute_metric(name, vals, name, ratio, NULL, NULL); 201 } 202 203 static int compute_metric_group(const char *name, struct value *vals, 204 const char *name1, double *ratio1, 205 const char *name2, double *ratio2) 206 { 207 return __compute_metric(name, vals, name1, ratio1, name2, ratio2); 208 } 209 210 static int test_ipc(void) 211 { 212 double ratio; 213 struct value vals[] = { 214 { .event = "inst_retired.any", .val = 300 }, 215 { .event = "cpu_clk_unhalted.thread", .val = 200 }, 216 { .event = NULL, }, 217 }; 218 219 TEST_ASSERT_VAL("failed to compute metric", 220 compute_metric("IPC", vals, &ratio) == 0); 221 222 TEST_ASSERT_VAL("IPC failed, wrong ratio", 223 ratio == 1.5); 224 return 0; 225 } 226 227 static int test_frontend(void) 228 { 229 double ratio; 230 struct value vals[] = { 231 { .event = "idq_uops_not_delivered.core", .val = 300 }, 232 { .event = "cpu_clk_unhalted.thread", .val = 200 }, 233 { .event = "cpu_clk_unhalted.one_thread_active", .val = 400 }, 234 { .event = "cpu_clk_unhalted.ref_xclk", .val = 600 }, 235 { .event = NULL, }, 236 }; 237 238 TEST_ASSERT_VAL("failed to compute metric", 239 compute_metric("Frontend_Bound_SMT", vals, &ratio) == 0); 240 241 TEST_ASSERT_VAL("Frontend_Bound_SMT failed, wrong ratio", 242 ratio == 0.45); 243 return 0; 244 } 245 246 static int test_cache_miss_cycles(void) 247 { 248 double ratio; 249 struct value vals[] = { 250 { .event = "l1d-loads-misses", .val = 300 }, 251 { .event = "l1i-loads-misses", .val = 200 }, 252 { .event = "inst_retired.any", .val = 400 }, 253 { .event = NULL, }, 254 }; 255 256 TEST_ASSERT_VAL("failed to compute metric", 257 compute_metric("cache_miss_cycles", vals, &ratio) == 0); 258 259 TEST_ASSERT_VAL("cache_miss_cycles failed, wrong ratio", 260 ratio == 1.25); 261 return 0; 262 } 263 264 265 /* 266 * DCache_L2_All_Hits = l2_rqsts.demand_data_rd_hit + l2_rqsts.pf_hit + l2_rqsts.rfo_hi 267 * DCache_L2_All_Miss = max(l2_rqsts.all_demand_data_rd - l2_rqsts.demand_data_rd_hit, 0) + 268 * l2_rqsts.pf_miss + l2_rqsts.rfo_miss 269 * DCache_L2_All = dcache_l2_all_hits + dcache_l2_all_miss 270 * DCache_L2_Hits = d_ratio(dcache_l2_all_hits, dcache_l2_all) 271 * DCache_L2_Misses = d_ratio(dcache_l2_all_miss, dcache_l2_all) 272 * 273 * l2_rqsts.demand_data_rd_hit = 100 274 * l2_rqsts.pf_hit = 200 275 * l2_rqsts.rfo_hi = 300 276 * l2_rqsts.all_demand_data_rd = 400 277 * l2_rqsts.pf_miss = 500 278 * l2_rqsts.rfo_miss = 600 279 * 280 * DCache_L2_All_Hits = 600 281 * DCache_L2_All_Miss = MAX(400 - 100, 0) + 500 + 600 = 1400 282 * DCache_L2_All = 600 + 1400 = 2000 283 * DCache_L2_Hits = 600 / 2000 = 0.3 284 * DCache_L2_Misses = 1400 / 2000 = 0.7 285 */ 286 static int test_dcache_l2(void) 287 { 288 double ratio; 289 struct value vals[] = { 290 { .event = "l2_rqsts.demand_data_rd_hit", .val = 100 }, 291 { .event = "l2_rqsts.pf_hit", .val = 200 }, 292 { .event = "l2_rqsts.rfo_hit", .val = 300 }, 293 { .event = "l2_rqsts.all_demand_data_rd", .val = 400 }, 294 { .event = "l2_rqsts.pf_miss", .val = 500 }, 295 { .event = "l2_rqsts.rfo_miss", .val = 600 }, 296 { .event = NULL, }, 297 }; 298 299 TEST_ASSERT_VAL("failed to compute metric", 300 compute_metric("DCache_L2_Hits", vals, &ratio) == 0); 301 302 TEST_ASSERT_VAL("DCache_L2_Hits failed, wrong ratio", 303 ratio == 0.3); 304 305 TEST_ASSERT_VAL("failed to compute metric", 306 compute_metric("DCache_L2_Misses", vals, &ratio) == 0); 307 308 TEST_ASSERT_VAL("DCache_L2_Misses failed, wrong ratio", 309 ratio == 0.7); 310 return 0; 311 } 312 313 static int test_recursion_fail(void) 314 { 315 double ratio; 316 struct value vals[] = { 317 { .event = "inst_retired.any", .val = 300 }, 318 { .event = "cpu_clk_unhalted.thread", .val = 200 }, 319 { .event = NULL, }, 320 }; 321 322 TEST_ASSERT_VAL("failed to find recursion", 323 compute_metric("M1", vals, &ratio) == -1); 324 325 TEST_ASSERT_VAL("failed to find recursion", 326 compute_metric("M3", vals, &ratio) == -1); 327 return 0; 328 } 329 330 static int test_memory_bandwidth(void) 331 { 332 double ratio; 333 struct value vals[] = { 334 { .event = "l1d.replacement", .val = 4000000 }, 335 { .event = "duration_time", .val = 200000000 }, 336 { .event = NULL, }, 337 }; 338 339 TEST_ASSERT_VAL("failed to compute metric", 340 compute_metric("L1D_Cache_Fill_BW", vals, &ratio) == 0); 341 TEST_ASSERT_VAL("L1D_Cache_Fill_BW, wrong ratio", 342 1.28 == ratio); 343 344 return 0; 345 } 346 347 static int test_metric_group(void) 348 { 349 double ratio1, ratio2; 350 struct value vals[] = { 351 { .event = "cpu_clk_unhalted.thread", .val = 200 }, 352 { .event = "l1d-loads-misses", .val = 300 }, 353 { .event = "l1i-loads-misses", .val = 200 }, 354 { .event = "inst_retired.any", .val = 400 }, 355 { .event = NULL, }, 356 }; 357 358 TEST_ASSERT_VAL("failed to find recursion", 359 compute_metric_group("group1", vals, 360 "IPC", &ratio1, 361 "cache_miss_cycles", &ratio2) == 0); 362 363 TEST_ASSERT_VAL("group IPC failed, wrong ratio", 364 ratio1 == 2.0); 365 366 TEST_ASSERT_VAL("group cache_miss_cycles failed, wrong ratio", 367 ratio2 == 1.25); 368 return 0; 369 } 370 371 int test__parse_metric(struct test *test __maybe_unused, int subtest __maybe_unused) 372 { 373 TEST_ASSERT_VAL("IPC failed", test_ipc() == 0); 374 TEST_ASSERT_VAL("frontend failed", test_frontend() == 0); 375 TEST_ASSERT_VAL("cache_miss_cycles failed", test_cache_miss_cycles() == 0); 376 TEST_ASSERT_VAL("DCache_L2 failed", test_dcache_l2() == 0); 377 TEST_ASSERT_VAL("recursion fail failed", test_recursion_fail() == 0); 378 TEST_ASSERT_VAL("test metric group", test_metric_group() == 0); 379 TEST_ASSERT_VAL("Memory bandwidth", test_memory_bandwidth() == 0); 380 return 0; 381 } 382