1 // SPDX-License-Identifier: GPL-2.0 2 #include <errno.h> 3 #include <inttypes.h> 4 #include <math.h> 5 #include <string.h> 6 #include "counts.h" 7 #include "debug.h" 8 #include "header.h" 9 #include "stat.h" 10 #include "session.h" 11 #include "target.h" 12 #include "evlist.h" 13 #include "evsel.h" 14 #include "thread_map.h" 15 #include <linux/zalloc.h> 16 17 void update_stats(struct stats *stats, u64 val) 18 { 19 double delta; 20 21 stats->n++; 22 delta = val - stats->mean; 23 stats->mean += delta / stats->n; 24 stats->M2 += delta*(val - stats->mean); 25 26 if (val > stats->max) 27 stats->max = val; 28 29 if (val < stats->min) 30 stats->min = val; 31 } 32 33 double avg_stats(struct stats *stats) 34 { 35 return stats->mean; 36 } 37 38 /* 39 * http://en.wikipedia.org/wiki/Algorithms_for_calculating_variance 40 * 41 * (\Sum n_i^2) - ((\Sum n_i)^2)/n 42 * s^2 = ------------------------------- 43 * n - 1 44 * 45 * http://en.wikipedia.org/wiki/Stddev 46 * 47 * The std dev of the mean is related to the std dev by: 48 * 49 * s 50 * s_mean = ------- 51 * sqrt(n) 52 * 53 */ 54 double stddev_stats(struct stats *stats) 55 { 56 double variance, variance_mean; 57 58 if (stats->n < 2) 59 return 0.0; 60 61 variance = stats->M2 / (stats->n - 1); 62 variance_mean = variance / stats->n; 63 64 return sqrt(variance_mean); 65 } 66 67 double rel_stddev_stats(double stddev, double avg) 68 { 69 double pct = 0.0; 70 71 if (avg) 72 pct = 100.0 * stddev/avg; 73 74 return pct; 75 } 76 77 bool __perf_evsel_stat__is(struct evsel *evsel, 78 enum perf_stat_evsel_id id) 79 { 80 struct perf_stat_evsel *ps = evsel->stats; 81 82 return ps->id == id; 83 } 84 85 #define ID(id, name) [PERF_STAT_EVSEL_ID__##id] = #name 86 static const char *id_str[PERF_STAT_EVSEL_ID__MAX] = { 87 ID(NONE, x), 88 ID(CYCLES_IN_TX, cpu/cycles-t/), 89 ID(TRANSACTION_START, cpu/tx-start/), 90 ID(ELISION_START, cpu/el-start/), 91 ID(CYCLES_IN_TX_CP, cpu/cycles-ct/), 92 ID(TOPDOWN_TOTAL_SLOTS, topdown-total-slots), 93 ID(TOPDOWN_SLOTS_ISSUED, topdown-slots-issued), 94 ID(TOPDOWN_SLOTS_RETIRED, topdown-slots-retired), 95 ID(TOPDOWN_FETCH_BUBBLES, topdown-fetch-bubbles), 96 ID(TOPDOWN_RECOVERY_BUBBLES, topdown-recovery-bubbles), 97 ID(SMI_NUM, msr/smi/), 98 ID(APERF, msr/aperf/), 99 }; 100 #undef ID 101 102 static void perf_stat_evsel_id_init(struct evsel *evsel) 103 { 104 struct perf_stat_evsel *ps = evsel->stats; 105 int i; 106 107 /* ps->id is 0 hence PERF_STAT_EVSEL_ID__NONE by default */ 108 109 for (i = 0; i < PERF_STAT_EVSEL_ID__MAX; i++) { 110 if (!strcmp(perf_evsel__name(evsel), id_str[i])) { 111 ps->id = i; 112 break; 113 } 114 } 115 } 116 117 static void perf_evsel__reset_stat_priv(struct evsel *evsel) 118 { 119 int i; 120 struct perf_stat_evsel *ps = evsel->stats; 121 122 for (i = 0; i < 3; i++) 123 init_stats(&ps->res_stats[i]); 124 125 perf_stat_evsel_id_init(evsel); 126 } 127 128 static int perf_evsel__alloc_stat_priv(struct evsel *evsel) 129 { 130 evsel->stats = zalloc(sizeof(struct perf_stat_evsel)); 131 if (evsel->stats == NULL) 132 return -ENOMEM; 133 perf_evsel__reset_stat_priv(evsel); 134 return 0; 135 } 136 137 static void perf_evsel__free_stat_priv(struct evsel *evsel) 138 { 139 struct perf_stat_evsel *ps = evsel->stats; 140 141 if (ps) 142 zfree(&ps->group_data); 143 zfree(&evsel->stats); 144 } 145 146 static int perf_evsel__alloc_prev_raw_counts(struct evsel *evsel, 147 int ncpus, int nthreads) 148 { 149 struct perf_counts *counts; 150 151 counts = perf_counts__new(ncpus, nthreads); 152 if (counts) 153 evsel->prev_raw_counts = counts; 154 155 return counts ? 0 : -ENOMEM; 156 } 157 158 static void perf_evsel__free_prev_raw_counts(struct evsel *evsel) 159 { 160 perf_counts__delete(evsel->prev_raw_counts); 161 evsel->prev_raw_counts = NULL; 162 } 163 164 static int perf_evsel__alloc_stats(struct evsel *evsel, bool alloc_raw) 165 { 166 int ncpus = perf_evsel__nr_cpus(evsel); 167 int nthreads = perf_thread_map__nr(evsel->core.threads); 168 169 if (perf_evsel__alloc_stat_priv(evsel) < 0 || 170 perf_evsel__alloc_counts(evsel, ncpus, nthreads) < 0 || 171 (alloc_raw && perf_evsel__alloc_prev_raw_counts(evsel, ncpus, nthreads) < 0)) 172 return -ENOMEM; 173 174 return 0; 175 } 176 177 int perf_evlist__alloc_stats(struct evlist *evlist, bool alloc_raw) 178 { 179 struct evsel *evsel; 180 181 evlist__for_each_entry(evlist, evsel) { 182 if (perf_evsel__alloc_stats(evsel, alloc_raw)) 183 goto out_free; 184 } 185 186 return 0; 187 188 out_free: 189 perf_evlist__free_stats(evlist); 190 return -1; 191 } 192 193 void perf_evlist__free_stats(struct evlist *evlist) 194 { 195 struct evsel *evsel; 196 197 evlist__for_each_entry(evlist, evsel) { 198 perf_evsel__free_stat_priv(evsel); 199 perf_evsel__free_counts(evsel); 200 perf_evsel__free_prev_raw_counts(evsel); 201 } 202 } 203 204 void perf_evlist__reset_stats(struct evlist *evlist) 205 { 206 struct evsel *evsel; 207 208 evlist__for_each_entry(evlist, evsel) { 209 perf_evsel__reset_stat_priv(evsel); 210 perf_evsel__reset_counts(evsel); 211 } 212 } 213 214 static void zero_per_pkg(struct evsel *counter) 215 { 216 if (counter->per_pkg_mask) 217 memset(counter->per_pkg_mask, 0, cpu__max_cpu()); 218 } 219 220 static int check_per_pkg(struct evsel *counter, 221 struct perf_counts_values *vals, int cpu, bool *skip) 222 { 223 unsigned long *mask = counter->per_pkg_mask; 224 struct perf_cpu_map *cpus = evsel__cpus(counter); 225 int s; 226 227 *skip = false; 228 229 if (!counter->per_pkg) 230 return 0; 231 232 if (perf_cpu_map__empty(cpus)) 233 return 0; 234 235 if (!mask) { 236 mask = zalloc(cpu__max_cpu()); 237 if (!mask) 238 return -ENOMEM; 239 240 counter->per_pkg_mask = mask; 241 } 242 243 /* 244 * we do not consider an event that has not run as a good 245 * instance to mark a package as used (skip=1). Otherwise 246 * we may run into a situation where the first CPU in a package 247 * is not running anything, yet the second is, and this function 248 * would mark the package as used after the first CPU and would 249 * not read the values from the second CPU. 250 */ 251 if (!(vals->run && vals->ena)) 252 return 0; 253 254 s = cpu_map__get_socket(cpus, cpu, NULL); 255 if (s < 0) 256 return -1; 257 258 *skip = test_and_set_bit(s, mask) == 1; 259 return 0; 260 } 261 262 static int 263 process_counter_values(struct perf_stat_config *config, struct evsel *evsel, 264 int cpu, int thread, 265 struct perf_counts_values *count) 266 { 267 struct perf_counts_values *aggr = &evsel->counts->aggr; 268 static struct perf_counts_values zero; 269 bool skip = false; 270 271 if (check_per_pkg(evsel, count, cpu, &skip)) { 272 pr_err("failed to read per-pkg counter\n"); 273 return -1; 274 } 275 276 if (skip) 277 count = &zero; 278 279 switch (config->aggr_mode) { 280 case AGGR_THREAD: 281 case AGGR_CORE: 282 case AGGR_DIE: 283 case AGGR_SOCKET: 284 case AGGR_NONE: 285 if (!evsel->snapshot) 286 perf_evsel__compute_deltas(evsel, cpu, thread, count); 287 perf_counts_values__scale(count, config->scale, NULL); 288 if ((config->aggr_mode == AGGR_NONE) && (!evsel->percore)) { 289 perf_stat__update_shadow_stats(evsel, count->val, 290 cpu, &rt_stat); 291 } 292 293 if (config->aggr_mode == AGGR_THREAD) { 294 if (config->stats) 295 perf_stat__update_shadow_stats(evsel, 296 count->val, 0, &config->stats[thread]); 297 else 298 perf_stat__update_shadow_stats(evsel, 299 count->val, 0, &rt_stat); 300 } 301 break; 302 case AGGR_GLOBAL: 303 aggr->val += count->val; 304 aggr->ena += count->ena; 305 aggr->run += count->run; 306 case AGGR_UNSET: 307 default: 308 break; 309 } 310 311 return 0; 312 } 313 314 static int process_counter_maps(struct perf_stat_config *config, 315 struct evsel *counter) 316 { 317 int nthreads = perf_thread_map__nr(counter->core.threads); 318 int ncpus = perf_evsel__nr_cpus(counter); 319 int cpu, thread; 320 321 if (counter->system_wide) 322 nthreads = 1; 323 324 for (thread = 0; thread < nthreads; thread++) { 325 for (cpu = 0; cpu < ncpus; cpu++) { 326 if (process_counter_values(config, counter, cpu, thread, 327 perf_counts(counter->counts, cpu, thread))) 328 return -1; 329 } 330 } 331 332 return 0; 333 } 334 335 int perf_stat_process_counter(struct perf_stat_config *config, 336 struct evsel *counter) 337 { 338 struct perf_counts_values *aggr = &counter->counts->aggr; 339 struct perf_stat_evsel *ps = counter->stats; 340 u64 *count = counter->counts->aggr.values; 341 int i, ret; 342 343 aggr->val = aggr->ena = aggr->run = 0; 344 345 /* 346 * We calculate counter's data every interval, 347 * and the display code shows ps->res_stats 348 * avg value. We need to zero the stats for 349 * interval mode, otherwise overall avg running 350 * averages will be shown for each interval. 351 */ 352 if (config->interval) 353 init_stats(ps->res_stats); 354 355 if (counter->per_pkg) 356 zero_per_pkg(counter); 357 358 ret = process_counter_maps(config, counter); 359 if (ret) 360 return ret; 361 362 if (config->aggr_mode != AGGR_GLOBAL) 363 return 0; 364 365 if (!counter->snapshot) 366 perf_evsel__compute_deltas(counter, -1, -1, aggr); 367 perf_counts_values__scale(aggr, config->scale, &counter->counts->scaled); 368 369 for (i = 0; i < 3; i++) 370 update_stats(&ps->res_stats[i], count[i]); 371 372 if (verbose > 0) { 373 fprintf(config->output, "%s: %" PRIu64 " %" PRIu64 " %" PRIu64 "\n", 374 perf_evsel__name(counter), count[0], count[1], count[2]); 375 } 376 377 /* 378 * Save the full runtime - to allow normalization during printout: 379 */ 380 perf_stat__update_shadow_stats(counter, *count, 0, &rt_stat); 381 382 return 0; 383 } 384 385 int perf_event__process_stat_event(struct perf_session *session, 386 union perf_event *event) 387 { 388 struct perf_counts_values count; 389 struct perf_record_stat *st = &event->stat; 390 struct evsel *counter; 391 392 count.val = st->val; 393 count.ena = st->ena; 394 count.run = st->run; 395 396 counter = perf_evlist__id2evsel(session->evlist, st->id); 397 if (!counter) { 398 pr_err("Failed to resolve counter for stat event.\n"); 399 return -EINVAL; 400 } 401 402 *perf_counts(counter->counts, st->cpu, st->thread) = count; 403 counter->supported = true; 404 return 0; 405 } 406 407 size_t perf_event__fprintf_stat(union perf_event *event, FILE *fp) 408 { 409 struct perf_record_stat *st = (struct perf_record_stat *)event; 410 size_t ret; 411 412 ret = fprintf(fp, "\n... id %" PRI_lu64 ", cpu %d, thread %d\n", 413 st->id, st->cpu, st->thread); 414 ret += fprintf(fp, "... value %" PRI_lu64 ", enabled %" PRI_lu64 ", running %" PRI_lu64 "\n", 415 st->val, st->ena, st->run); 416 417 return ret; 418 } 419 420 size_t perf_event__fprintf_stat_round(union perf_event *event, FILE *fp) 421 { 422 struct perf_record_stat_round *rd = (struct perf_record_stat_round *)event; 423 size_t ret; 424 425 ret = fprintf(fp, "\n... time %" PRI_lu64 ", type %s\n", rd->time, 426 rd->type == PERF_STAT_ROUND_TYPE__FINAL ? "FINAL" : "INTERVAL"); 427 428 return ret; 429 } 430 431 size_t perf_event__fprintf_stat_config(union perf_event *event, FILE *fp) 432 { 433 struct perf_stat_config sc; 434 size_t ret; 435 436 perf_event__read_stat_config(&sc, &event->stat_config); 437 438 ret = fprintf(fp, "\n"); 439 ret += fprintf(fp, "... aggr_mode %d\n", sc.aggr_mode); 440 ret += fprintf(fp, "... scale %d\n", sc.scale); 441 ret += fprintf(fp, "... interval %u\n", sc.interval); 442 443 return ret; 444 } 445 446 int create_perf_stat_counter(struct evsel *evsel, 447 struct perf_stat_config *config, 448 struct target *target) 449 { 450 struct perf_event_attr *attr = &evsel->core.attr; 451 struct evsel *leader = evsel->leader; 452 453 attr->read_format = PERF_FORMAT_TOTAL_TIME_ENABLED | 454 PERF_FORMAT_TOTAL_TIME_RUNNING; 455 456 /* 457 * The event is part of non trivial group, let's enable 458 * the group read (for leader) and ID retrieval for all 459 * members. 460 */ 461 if (leader->core.nr_members > 1) 462 attr->read_format |= PERF_FORMAT_ID|PERF_FORMAT_GROUP; 463 464 attr->inherit = !config->no_inherit; 465 466 /* 467 * Some events get initialized with sample_(period/type) set, 468 * like tracepoints. Clear it up for counting. 469 */ 470 attr->sample_period = 0; 471 472 if (config->identifier) 473 attr->sample_type = PERF_SAMPLE_IDENTIFIER; 474 475 /* 476 * Disabling all counters initially, they will be enabled 477 * either manually by us or by kernel via enable_on_exec 478 * set later. 479 */ 480 if (perf_evsel__is_group_leader(evsel)) { 481 attr->disabled = 1; 482 483 /* 484 * In case of initial_delay we enable tracee 485 * events manually. 486 */ 487 if (target__none(target) && !config->initial_delay) 488 attr->enable_on_exec = 1; 489 } 490 491 if (target__has_cpu(target) && !target__has_per_thread(target)) 492 return perf_evsel__open_per_cpu(evsel, evsel__cpus(evsel)); 493 494 return perf_evsel__open_per_thread(evsel, evsel->core.threads); 495 } 496 497 int perf_stat_synthesize_config(struct perf_stat_config *config, 498 struct perf_tool *tool, 499 struct evlist *evlist, 500 perf_event__handler_t process, 501 bool attrs) 502 { 503 int err; 504 505 if (attrs) { 506 err = perf_event__synthesize_attrs(tool, evlist, process); 507 if (err < 0) { 508 pr_err("Couldn't synthesize attrs.\n"); 509 return err; 510 } 511 } 512 513 err = perf_event__synthesize_extra_attr(tool, evlist, process, 514 attrs); 515 516 err = perf_event__synthesize_thread_map2(tool, evlist->core.threads, 517 process, NULL); 518 if (err < 0) { 519 pr_err("Couldn't synthesize thread map.\n"); 520 return err; 521 } 522 523 err = perf_event__synthesize_cpu_map(tool, evlist->core.cpus, 524 process, NULL); 525 if (err < 0) { 526 pr_err("Couldn't synthesize thread map.\n"); 527 return err; 528 } 529 530 err = perf_event__synthesize_stat_config(tool, config, process, NULL); 531 if (err < 0) { 532 pr_err("Couldn't synthesize config.\n"); 533 return err; 534 } 535 536 return 0; 537 } 538