1 // SPDX-License-Identifier: GPL-2.0 2 #include "debug.h" 3 #include "evlist.h" 4 #include "evsel.h" 5 #include "parse-events.h" 6 #include <errno.h> 7 #include <limits.h> 8 #include <stdlib.h> 9 #include <api/fs/fs.h> 10 #include <subcmd/parse-options.h> 11 #include <perf/cpumap.h> 12 #include "cloexec.h" 13 #include "record.h" 14 #include "../perf-sys.h" 15 16 typedef void (*setup_probe_fn_t)(struct evsel *evsel); 17 18 static int perf_do_probe_api(setup_probe_fn_t fn, int cpu, const char *str) 19 { 20 struct evlist *evlist; 21 struct evsel *evsel; 22 unsigned long flags = perf_event_open_cloexec_flag(); 23 int err = -EAGAIN, fd; 24 static pid_t pid = -1; 25 26 evlist = evlist__new(); 27 if (!evlist) 28 return -ENOMEM; 29 30 if (parse_events(evlist, str, NULL)) 31 goto out_delete; 32 33 evsel = evlist__first(evlist); 34 35 while (1) { 36 fd = sys_perf_event_open(&evsel->core.attr, pid, cpu, -1, flags); 37 if (fd < 0) { 38 if (pid == -1 && errno == EACCES) { 39 pid = 0; 40 continue; 41 } 42 goto out_delete; 43 } 44 break; 45 } 46 close(fd); 47 48 fn(evsel); 49 50 fd = sys_perf_event_open(&evsel->core.attr, pid, cpu, -1, flags); 51 if (fd < 0) { 52 if (errno == EINVAL) 53 err = -EINVAL; 54 goto out_delete; 55 } 56 close(fd); 57 err = 0; 58 59 out_delete: 60 evlist__delete(evlist); 61 return err; 62 } 63 64 static bool perf_probe_api(setup_probe_fn_t fn) 65 { 66 const char *try[] = {"cycles:u", "instructions:u", "cpu-clock:u", NULL}; 67 struct perf_cpu_map *cpus; 68 int cpu, ret, i = 0; 69 70 cpus = perf_cpu_map__new(NULL); 71 if (!cpus) 72 return false; 73 cpu = cpus->map[0]; 74 perf_cpu_map__put(cpus); 75 76 do { 77 ret = perf_do_probe_api(fn, cpu, try[i++]); 78 if (!ret) 79 return true; 80 } while (ret == -EAGAIN && try[i]); 81 82 return false; 83 } 84 85 static void perf_probe_sample_identifier(struct evsel *evsel) 86 { 87 evsel->core.attr.sample_type |= PERF_SAMPLE_IDENTIFIER; 88 } 89 90 static void perf_probe_comm_exec(struct evsel *evsel) 91 { 92 evsel->core.attr.comm_exec = 1; 93 } 94 95 static void perf_probe_context_switch(struct evsel *evsel) 96 { 97 evsel->core.attr.context_switch = 1; 98 } 99 100 bool perf_can_sample_identifier(void) 101 { 102 return perf_probe_api(perf_probe_sample_identifier); 103 } 104 105 static bool perf_can_comm_exec(void) 106 { 107 return perf_probe_api(perf_probe_comm_exec); 108 } 109 110 bool perf_can_record_switch_events(void) 111 { 112 return perf_probe_api(perf_probe_context_switch); 113 } 114 115 bool perf_can_record_cpu_wide(void) 116 { 117 struct perf_event_attr attr = { 118 .type = PERF_TYPE_SOFTWARE, 119 .config = PERF_COUNT_SW_CPU_CLOCK, 120 .exclude_kernel = 1, 121 }; 122 struct perf_cpu_map *cpus; 123 int cpu, fd; 124 125 cpus = perf_cpu_map__new(NULL); 126 if (!cpus) 127 return false; 128 cpu = cpus->map[0]; 129 perf_cpu_map__put(cpus); 130 131 fd = sys_perf_event_open(&attr, -1, cpu, -1, 0); 132 if (fd < 0) 133 return false; 134 close(fd); 135 136 return true; 137 } 138 139 /* 140 * Architectures are expected to know if AUX area sampling is supported by the 141 * hardware. Here we check for kernel support. 142 */ 143 bool perf_can_aux_sample(void) 144 { 145 struct perf_event_attr attr = { 146 .size = sizeof(struct perf_event_attr), 147 .exclude_kernel = 1, 148 /* 149 * Non-zero value causes the kernel to calculate the effective 150 * attribute size up to that byte. 151 */ 152 .aux_sample_size = 1, 153 }; 154 int fd; 155 156 fd = sys_perf_event_open(&attr, -1, 0, -1, 0); 157 /* 158 * If the kernel attribute is big enough to contain aux_sample_size 159 * then we assume that it is supported. We are relying on the kernel to 160 * validate the attribute size before anything else that could be wrong. 161 */ 162 if (fd < 0 && errno == E2BIG) 163 return false; 164 if (fd >= 0) 165 close(fd); 166 167 return true; 168 } 169 170 /* 171 * perf_evsel__config_leader_sampling() uses special rules for leader sampling. 172 * However, if the leader is an AUX area event, then assume the event to sample 173 * is the next event. 174 */ 175 static struct evsel *perf_evsel__read_sampler(struct evsel *evsel, 176 struct evlist *evlist) 177 { 178 struct evsel *leader = evsel->leader; 179 180 if (perf_evsel__is_aux_event(leader)) { 181 evlist__for_each_entry(evlist, evsel) { 182 if (evsel->leader == leader && evsel != evsel->leader) 183 return evsel; 184 } 185 } 186 187 return leader; 188 } 189 190 static void perf_evsel__config_leader_sampling(struct evsel *evsel, 191 struct evlist *evlist) 192 { 193 struct perf_event_attr *attr = &evsel->core.attr; 194 struct evsel *leader = evsel->leader; 195 struct evsel *read_sampler; 196 197 if (!leader->sample_read) 198 return; 199 200 read_sampler = perf_evsel__read_sampler(evsel, evlist); 201 202 if (evsel == read_sampler) 203 return; 204 205 /* 206 * Disable sampling for all group members other than the leader in 207 * case the leader 'leads' the sampling, except when the leader is an 208 * AUX area event, in which case the 2nd event in the group is the one 209 * that 'leads' the sampling. 210 */ 211 attr->freq = 0; 212 attr->sample_freq = 0; 213 attr->sample_period = 0; 214 attr->write_backward = 0; 215 216 /* 217 * We don't get a sample for slave events, we make them when delivering 218 * the group leader sample. Set the slave event to follow the master 219 * sample_type to ease up reporting. 220 * An AUX area event also has sample_type requirements, so also include 221 * the sample type bits from the leader's sample_type to cover that 222 * case. 223 */ 224 attr->sample_type = read_sampler->core.attr.sample_type | 225 leader->core.attr.sample_type; 226 } 227 228 void perf_evlist__config(struct evlist *evlist, struct record_opts *opts, 229 struct callchain_param *callchain) 230 { 231 struct evsel *evsel; 232 bool use_sample_identifier = false; 233 bool use_comm_exec; 234 bool sample_id = opts->sample_id; 235 236 /* 237 * Set the evsel leader links before we configure attributes, 238 * since some might depend on this info. 239 */ 240 if (opts->group) 241 perf_evlist__set_leader(evlist); 242 243 if (evlist->core.cpus->map[0] < 0) 244 opts->no_inherit = true; 245 246 use_comm_exec = perf_can_comm_exec(); 247 248 evlist__for_each_entry(evlist, evsel) { 249 perf_evsel__config(evsel, opts, callchain); 250 if (evsel->tracking && use_comm_exec) 251 evsel->core.attr.comm_exec = 1; 252 } 253 254 /* Configure leader sampling here now that the sample type is known */ 255 evlist__for_each_entry(evlist, evsel) 256 perf_evsel__config_leader_sampling(evsel, evlist); 257 258 if (opts->full_auxtrace) { 259 /* 260 * Need to be able to synthesize and parse selected events with 261 * arbitrary sample types, which requires always being able to 262 * match the id. 263 */ 264 use_sample_identifier = perf_can_sample_identifier(); 265 sample_id = true; 266 } else if (evlist->core.nr_entries > 1) { 267 struct evsel *first = evlist__first(evlist); 268 269 evlist__for_each_entry(evlist, evsel) { 270 if (evsel->core.attr.sample_type == first->core.attr.sample_type) 271 continue; 272 use_sample_identifier = perf_can_sample_identifier(); 273 break; 274 } 275 sample_id = true; 276 } 277 278 if (sample_id) { 279 evlist__for_each_entry(evlist, evsel) 280 perf_evsel__set_sample_id(evsel, use_sample_identifier); 281 } 282 283 perf_evlist__set_id_pos(evlist); 284 } 285 286 static int get_max_rate(unsigned int *rate) 287 { 288 return sysctl__read_int("kernel/perf_event_max_sample_rate", (int *)rate); 289 } 290 291 static int record_opts__config_freq(struct record_opts *opts) 292 { 293 bool user_freq = opts->user_freq != UINT_MAX; 294 unsigned int max_rate; 295 296 if (opts->user_interval != ULLONG_MAX) 297 opts->default_interval = opts->user_interval; 298 if (user_freq) 299 opts->freq = opts->user_freq; 300 301 /* 302 * User specified count overrides default frequency. 303 */ 304 if (opts->default_interval) 305 opts->freq = 0; 306 else if (opts->freq) { 307 opts->default_interval = opts->freq; 308 } else { 309 pr_err("frequency and count are zero, aborting\n"); 310 return -1; 311 } 312 313 if (get_max_rate(&max_rate)) 314 return 0; 315 316 /* 317 * User specified frequency is over current maximum. 318 */ 319 if (user_freq && (max_rate < opts->freq)) { 320 if (opts->strict_freq) { 321 pr_err("error: Maximum frequency rate (%'u Hz) exceeded.\n" 322 " Please use -F freq option with a lower value or consider\n" 323 " tweaking /proc/sys/kernel/perf_event_max_sample_rate.\n", 324 max_rate); 325 return -1; 326 } else { 327 pr_warning("warning: Maximum frequency rate (%'u Hz) exceeded, throttling from %'u Hz to %'u Hz.\n" 328 " The limit can be raised via /proc/sys/kernel/perf_event_max_sample_rate.\n" 329 " The kernel will lower it when perf's interrupts take too long.\n" 330 " Use --strict-freq to disable this throttling, refusing to record.\n", 331 max_rate, opts->freq, max_rate); 332 333 opts->freq = max_rate; 334 } 335 } 336 337 /* 338 * Default frequency is over current maximum. 339 */ 340 if (max_rate < opts->freq) { 341 pr_warning("Lowering default frequency rate to %u.\n" 342 "Please consider tweaking " 343 "/proc/sys/kernel/perf_event_max_sample_rate.\n", 344 max_rate); 345 opts->freq = max_rate; 346 } 347 348 return 0; 349 } 350 351 int record_opts__config(struct record_opts *opts) 352 { 353 return record_opts__config_freq(opts); 354 } 355 356 bool perf_evlist__can_select_event(struct evlist *evlist, const char *str) 357 { 358 struct evlist *temp_evlist; 359 struct evsel *evsel; 360 int err, fd, cpu; 361 bool ret = false; 362 pid_t pid = -1; 363 364 temp_evlist = evlist__new(); 365 if (!temp_evlist) 366 return false; 367 368 err = parse_events(temp_evlist, str, NULL); 369 if (err) 370 goto out_delete; 371 372 evsel = evlist__last(temp_evlist); 373 374 if (!evlist || perf_cpu_map__empty(evlist->core.cpus)) { 375 struct perf_cpu_map *cpus = perf_cpu_map__new(NULL); 376 377 cpu = cpus ? cpus->map[0] : 0; 378 perf_cpu_map__put(cpus); 379 } else { 380 cpu = evlist->core.cpus->map[0]; 381 } 382 383 while (1) { 384 fd = sys_perf_event_open(&evsel->core.attr, pid, cpu, -1, 385 perf_event_open_cloexec_flag()); 386 if (fd < 0) { 387 if (pid == -1 && errno == EACCES) { 388 pid = 0; 389 continue; 390 } 391 goto out_delete; 392 } 393 break; 394 } 395 close(fd); 396 ret = true; 397 398 out_delete: 399 evlist__delete(temp_evlist); 400 return ret; 401 } 402 403 int record__parse_freq(const struct option *opt, const char *str, int unset __maybe_unused) 404 { 405 unsigned int freq; 406 struct record_opts *opts = opt->value; 407 408 if (!str) 409 return -EINVAL; 410 411 if (strcasecmp(str, "max") == 0) { 412 if (get_max_rate(&freq)) { 413 pr_err("couldn't read /proc/sys/kernel/perf_event_max_sample_rate\n"); 414 return -1; 415 } 416 pr_info("info: Using a maximum frequency rate of %'d Hz\n", freq); 417 } else { 418 freq = atoi(str); 419 } 420 421 opts->user_freq = freq; 422 return 0; 423 } 424