1944138f0SNamhyung Kim // SPDX-License-Identifier: GPL-2.0 2944138f0SNamhyung Kim 3944138f0SNamhyung Kim /* Copyright (c) 2021 Facebook */ 4944138f0SNamhyung Kim /* Copyright (c) 2021 Google */ 5944138f0SNamhyung Kim 6944138f0SNamhyung Kim #include <assert.h> 7944138f0SNamhyung Kim #include <limits.h> 8944138f0SNamhyung Kim #include <unistd.h> 9944138f0SNamhyung Kim #include <sys/file.h> 10944138f0SNamhyung Kim #include <sys/time.h> 11944138f0SNamhyung Kim #include <sys/resource.h> 12944138f0SNamhyung Kim #include <linux/err.h> 13944138f0SNamhyung Kim #include <linux/zalloc.h> 14944138f0SNamhyung Kim #include <linux/perf_event.h> 15944138f0SNamhyung Kim #include <api/fs/fs.h> 16944138f0SNamhyung Kim #include <perf/bpf_perf.h> 17944138f0SNamhyung Kim 18944138f0SNamhyung Kim #include "affinity.h" 19944138f0SNamhyung Kim #include "bpf_counter.h" 20944138f0SNamhyung Kim #include "cgroup.h" 21944138f0SNamhyung Kim #include "counts.h" 22944138f0SNamhyung Kim #include "debug.h" 23944138f0SNamhyung Kim #include "evsel.h" 24944138f0SNamhyung Kim #include "evlist.h" 25944138f0SNamhyung Kim #include "target.h" 26944138f0SNamhyung Kim #include "cpumap.h" 27944138f0SNamhyung Kim #include "thread_map.h" 28944138f0SNamhyung Kim 29944138f0SNamhyung Kim #include "bpf_skel/bperf_cgroup.skel.h" 30944138f0SNamhyung Kim 31944138f0SNamhyung Kim static struct perf_event_attr cgrp_switch_attr = { 32944138f0SNamhyung Kim .type = PERF_TYPE_SOFTWARE, 33944138f0SNamhyung Kim .config = PERF_COUNT_SW_CGROUP_SWITCHES, 34944138f0SNamhyung Kim .size = sizeof(cgrp_switch_attr), 35944138f0SNamhyung Kim .sample_period = 1, 36944138f0SNamhyung Kim .disabled = 1, 37944138f0SNamhyung Kim }; 38944138f0SNamhyung Kim 39944138f0SNamhyung Kim static struct evsel *cgrp_switch; 40944138f0SNamhyung Kim static struct bperf_cgroup_bpf *skel; 41944138f0SNamhyung Kim 42944138f0SNamhyung Kim #define FD(evt, cpu) (*(int *)xyarray__entry(evt->core.fd, cpu, 0)) 43944138f0SNamhyung Kim 44944138f0SNamhyung Kim static int bperf_load_program(struct evlist *evlist) 45944138f0SNamhyung Kim { 46944138f0SNamhyung Kim struct bpf_link *link; 47944138f0SNamhyung Kim struct evsel *evsel; 48944138f0SNamhyung Kim struct cgroup *cgrp, *leader_cgrp; 49944138f0SNamhyung Kim __u32 i, cpu; 50944138f0SNamhyung Kim __u32 nr_cpus = evlist->core.all_cpus->nr; 51944138f0SNamhyung Kim int total_cpus = cpu__max_cpu(); 52944138f0SNamhyung Kim int map_size, map_fd; 53944138f0SNamhyung Kim int prog_fd, err; 54944138f0SNamhyung Kim 55944138f0SNamhyung Kim skel = bperf_cgroup_bpf__open(); 56944138f0SNamhyung Kim if (!skel) { 57944138f0SNamhyung Kim pr_err("Failed to open cgroup skeleton\n"); 58944138f0SNamhyung Kim return -1; 59944138f0SNamhyung Kim } 60944138f0SNamhyung Kim 61944138f0SNamhyung Kim skel->rodata->num_cpus = total_cpus; 62944138f0SNamhyung Kim skel->rodata->num_events = evlist->core.nr_entries / nr_cgroups; 63944138f0SNamhyung Kim 64944138f0SNamhyung Kim BUG_ON(evlist->core.nr_entries % nr_cgroups != 0); 65944138f0SNamhyung Kim 66944138f0SNamhyung Kim /* we need one copy of events per cpu for reading */ 67944138f0SNamhyung Kim map_size = total_cpus * evlist->core.nr_entries / nr_cgroups; 68944138f0SNamhyung Kim bpf_map__resize(skel->maps.events, map_size); 69944138f0SNamhyung Kim bpf_map__resize(skel->maps.cgrp_idx, nr_cgroups); 70944138f0SNamhyung Kim /* previous result is saved in a per-cpu array */ 71944138f0SNamhyung Kim map_size = evlist->core.nr_entries / nr_cgroups; 72944138f0SNamhyung Kim bpf_map__resize(skel->maps.prev_readings, map_size); 73944138f0SNamhyung Kim /* cgroup result needs all events (per-cpu) */ 74944138f0SNamhyung Kim map_size = evlist->core.nr_entries; 75944138f0SNamhyung Kim bpf_map__resize(skel->maps.cgrp_readings, map_size); 76944138f0SNamhyung Kim 77944138f0SNamhyung Kim set_max_rlimit(); 78944138f0SNamhyung Kim 79944138f0SNamhyung Kim err = bperf_cgroup_bpf__load(skel); 80944138f0SNamhyung Kim if (err) { 81944138f0SNamhyung Kim pr_err("Failed to load cgroup skeleton\n"); 82944138f0SNamhyung Kim goto out; 83944138f0SNamhyung Kim } 84944138f0SNamhyung Kim 85944138f0SNamhyung Kim if (cgroup_is_v2("perf_event") > 0) 86944138f0SNamhyung Kim skel->bss->use_cgroup_v2 = 1; 87944138f0SNamhyung Kim 88944138f0SNamhyung Kim err = -1; 89944138f0SNamhyung Kim 90944138f0SNamhyung Kim cgrp_switch = evsel__new(&cgrp_switch_attr); 91944138f0SNamhyung Kim if (evsel__open_per_cpu(cgrp_switch, evlist->core.all_cpus, -1) < 0) { 92944138f0SNamhyung Kim pr_err("Failed to open cgroup switches event\n"); 93944138f0SNamhyung Kim goto out; 94944138f0SNamhyung Kim } 95944138f0SNamhyung Kim 96944138f0SNamhyung Kim for (i = 0; i < nr_cpus; i++) { 97944138f0SNamhyung Kim link = bpf_program__attach_perf_event(skel->progs.on_cgrp_switch, 98944138f0SNamhyung Kim FD(cgrp_switch, i)); 99944138f0SNamhyung Kim if (IS_ERR(link)) { 100944138f0SNamhyung Kim pr_err("Failed to attach cgroup program\n"); 101944138f0SNamhyung Kim err = PTR_ERR(link); 102944138f0SNamhyung Kim goto out; 103944138f0SNamhyung Kim } 104944138f0SNamhyung Kim } 105944138f0SNamhyung Kim 106944138f0SNamhyung Kim /* 107944138f0SNamhyung Kim * Update cgrp_idx map from cgroup-id to event index. 108944138f0SNamhyung Kim */ 109944138f0SNamhyung Kim cgrp = NULL; 110944138f0SNamhyung Kim i = 0; 111944138f0SNamhyung Kim 112944138f0SNamhyung Kim evlist__for_each_entry(evlist, evsel) { 113944138f0SNamhyung Kim if (cgrp == NULL || evsel->cgrp == leader_cgrp) { 114944138f0SNamhyung Kim leader_cgrp = evsel->cgrp; 115944138f0SNamhyung Kim evsel->cgrp = NULL; 116944138f0SNamhyung Kim 117944138f0SNamhyung Kim /* open single copy of the events w/o cgroup */ 118944138f0SNamhyung Kim err = evsel__open_per_cpu(evsel, evlist->core.all_cpus, -1); 119944138f0SNamhyung Kim if (err) { 120944138f0SNamhyung Kim pr_err("Failed to open first cgroup events\n"); 121944138f0SNamhyung Kim goto out; 122944138f0SNamhyung Kim } 123944138f0SNamhyung Kim 124944138f0SNamhyung Kim map_fd = bpf_map__fd(skel->maps.events); 125944138f0SNamhyung Kim for (cpu = 0; cpu < nr_cpus; cpu++) { 126944138f0SNamhyung Kim int fd = FD(evsel, cpu); 127*38fe0e01SJiri Olsa __u32 idx = evsel->core.idx * total_cpus + 128944138f0SNamhyung Kim evlist->core.all_cpus->map[cpu]; 129944138f0SNamhyung Kim 130944138f0SNamhyung Kim err = bpf_map_update_elem(map_fd, &idx, &fd, 131944138f0SNamhyung Kim BPF_ANY); 132944138f0SNamhyung Kim if (err < 0) { 133944138f0SNamhyung Kim pr_err("Failed to update perf_event fd\n"); 134944138f0SNamhyung Kim goto out; 135944138f0SNamhyung Kim } 136944138f0SNamhyung Kim } 137944138f0SNamhyung Kim 138944138f0SNamhyung Kim evsel->cgrp = leader_cgrp; 139944138f0SNamhyung Kim } 140944138f0SNamhyung Kim evsel->supported = true; 141944138f0SNamhyung Kim 142944138f0SNamhyung Kim if (evsel->cgrp == cgrp) 143944138f0SNamhyung Kim continue; 144944138f0SNamhyung Kim 145944138f0SNamhyung Kim cgrp = evsel->cgrp; 146944138f0SNamhyung Kim 147944138f0SNamhyung Kim if (read_cgroup_id(cgrp) < 0) { 148944138f0SNamhyung Kim pr_err("Failed to get cgroup id\n"); 149944138f0SNamhyung Kim err = -1; 150944138f0SNamhyung Kim goto out; 151944138f0SNamhyung Kim } 152944138f0SNamhyung Kim 153944138f0SNamhyung Kim map_fd = bpf_map__fd(skel->maps.cgrp_idx); 154944138f0SNamhyung Kim err = bpf_map_update_elem(map_fd, &cgrp->id, &i, BPF_ANY); 155944138f0SNamhyung Kim if (err < 0) { 156944138f0SNamhyung Kim pr_err("Failed to update cgroup index map\n"); 157944138f0SNamhyung Kim goto out; 158944138f0SNamhyung Kim } 159944138f0SNamhyung Kim 160944138f0SNamhyung Kim i++; 161944138f0SNamhyung Kim } 162944138f0SNamhyung Kim 163944138f0SNamhyung Kim /* 164944138f0SNamhyung Kim * bperf uses BPF_PROG_TEST_RUN to get accurate reading. Check 165944138f0SNamhyung Kim * whether the kernel support it 166944138f0SNamhyung Kim */ 167944138f0SNamhyung Kim prog_fd = bpf_program__fd(skel->progs.trigger_read); 168944138f0SNamhyung Kim err = bperf_trigger_reading(prog_fd, 0); 169944138f0SNamhyung Kim if (err) { 170944138f0SNamhyung Kim pr_warning("The kernel does not support test_run for raw_tp BPF programs.\n" 171944138f0SNamhyung Kim "Therefore, --for-each-cgroup might show inaccurate readings\n"); 172944138f0SNamhyung Kim err = 0; 173944138f0SNamhyung Kim } 174944138f0SNamhyung Kim 175944138f0SNamhyung Kim out: 176944138f0SNamhyung Kim return err; 177944138f0SNamhyung Kim } 178944138f0SNamhyung Kim 179944138f0SNamhyung Kim static int bperf_cgrp__load(struct evsel *evsel, 180944138f0SNamhyung Kim struct target *target __maybe_unused) 181944138f0SNamhyung Kim { 182944138f0SNamhyung Kim static bool bperf_loaded = false; 183944138f0SNamhyung Kim 184944138f0SNamhyung Kim evsel->bperf_leader_prog_fd = -1; 185944138f0SNamhyung Kim evsel->bperf_leader_link_fd = -1; 186944138f0SNamhyung Kim 187944138f0SNamhyung Kim if (!bperf_loaded && bperf_load_program(evsel->evlist)) 188944138f0SNamhyung Kim return -1; 189944138f0SNamhyung Kim 190944138f0SNamhyung Kim bperf_loaded = true; 191944138f0SNamhyung Kim /* just to bypass bpf_counter_skip() */ 192944138f0SNamhyung Kim evsel->follower_skel = (struct bperf_follower_bpf *)skel; 193944138f0SNamhyung Kim 194944138f0SNamhyung Kim return 0; 195944138f0SNamhyung Kim } 196944138f0SNamhyung Kim 197944138f0SNamhyung Kim static int bperf_cgrp__install_pe(struct evsel *evsel __maybe_unused, 198944138f0SNamhyung Kim int cpu __maybe_unused, int fd __maybe_unused) 199944138f0SNamhyung Kim { 200944138f0SNamhyung Kim /* nothing to do */ 201944138f0SNamhyung Kim return 0; 202944138f0SNamhyung Kim } 203944138f0SNamhyung Kim 204944138f0SNamhyung Kim /* 205944138f0SNamhyung Kim * trigger the leader prog on each cpu, so the cgrp_reading map could get 206944138f0SNamhyung Kim * the latest results. 207944138f0SNamhyung Kim */ 208944138f0SNamhyung Kim static int bperf_cgrp__sync_counters(struct evlist *evlist) 209944138f0SNamhyung Kim { 210944138f0SNamhyung Kim int i, cpu; 211944138f0SNamhyung Kim int nr_cpus = evlist->core.all_cpus->nr; 212944138f0SNamhyung Kim int prog_fd = bpf_program__fd(skel->progs.trigger_read); 213944138f0SNamhyung Kim 214944138f0SNamhyung Kim for (i = 0; i < nr_cpus; i++) { 215944138f0SNamhyung Kim cpu = evlist->core.all_cpus->map[i]; 216944138f0SNamhyung Kim bperf_trigger_reading(prog_fd, cpu); 217944138f0SNamhyung Kim } 218944138f0SNamhyung Kim 219944138f0SNamhyung Kim return 0; 220944138f0SNamhyung Kim } 221944138f0SNamhyung Kim 222944138f0SNamhyung Kim static int bperf_cgrp__enable(struct evsel *evsel) 223944138f0SNamhyung Kim { 224*38fe0e01SJiri Olsa if (evsel->core.idx) 225944138f0SNamhyung Kim return 0; 226944138f0SNamhyung Kim 227944138f0SNamhyung Kim bperf_cgrp__sync_counters(evsel->evlist); 228944138f0SNamhyung Kim 229944138f0SNamhyung Kim skel->bss->enabled = 1; 230944138f0SNamhyung Kim return 0; 231944138f0SNamhyung Kim } 232944138f0SNamhyung Kim 233944138f0SNamhyung Kim static int bperf_cgrp__disable(struct evsel *evsel) 234944138f0SNamhyung Kim { 235*38fe0e01SJiri Olsa if (evsel->core.idx) 236944138f0SNamhyung Kim return 0; 237944138f0SNamhyung Kim 238944138f0SNamhyung Kim bperf_cgrp__sync_counters(evsel->evlist); 239944138f0SNamhyung Kim 240944138f0SNamhyung Kim skel->bss->enabled = 0; 241944138f0SNamhyung Kim return 0; 242944138f0SNamhyung Kim } 243944138f0SNamhyung Kim 244944138f0SNamhyung Kim static int bperf_cgrp__read(struct evsel *evsel) 245944138f0SNamhyung Kim { 246944138f0SNamhyung Kim struct evlist *evlist = evsel->evlist; 247944138f0SNamhyung Kim int i, cpu, nr_cpus = evlist->core.all_cpus->nr; 248944138f0SNamhyung Kim int total_cpus = cpu__max_cpu(); 249944138f0SNamhyung Kim struct perf_counts_values *counts; 250944138f0SNamhyung Kim struct bpf_perf_event_value *values; 251944138f0SNamhyung Kim int reading_map_fd, err = 0; 252944138f0SNamhyung Kim __u32 idx; 253944138f0SNamhyung Kim 254*38fe0e01SJiri Olsa if (evsel->core.idx) 255944138f0SNamhyung Kim return 0; 256944138f0SNamhyung Kim 257944138f0SNamhyung Kim bperf_cgrp__sync_counters(evsel->evlist); 258944138f0SNamhyung Kim 259944138f0SNamhyung Kim values = calloc(total_cpus, sizeof(*values)); 260944138f0SNamhyung Kim if (values == NULL) 261944138f0SNamhyung Kim return -ENOMEM; 262944138f0SNamhyung Kim 263944138f0SNamhyung Kim reading_map_fd = bpf_map__fd(skel->maps.cgrp_readings); 264944138f0SNamhyung Kim 265944138f0SNamhyung Kim evlist__for_each_entry(evlist, evsel) { 266*38fe0e01SJiri Olsa idx = evsel->core.idx; 267944138f0SNamhyung Kim err = bpf_map_lookup_elem(reading_map_fd, &idx, values); 268944138f0SNamhyung Kim if (err) { 269944138f0SNamhyung Kim pr_err("bpf map lookup falied: idx=%u, event=%s, cgrp=%s\n", 270944138f0SNamhyung Kim idx, evsel__name(evsel), evsel->cgrp->name); 271944138f0SNamhyung Kim goto out; 272944138f0SNamhyung Kim } 273944138f0SNamhyung Kim 274944138f0SNamhyung Kim for (i = 0; i < nr_cpus; i++) { 275944138f0SNamhyung Kim cpu = evlist->core.all_cpus->map[i]; 276944138f0SNamhyung Kim 277944138f0SNamhyung Kim counts = perf_counts(evsel->counts, i, 0); 278944138f0SNamhyung Kim counts->val = values[cpu].counter; 279944138f0SNamhyung Kim counts->ena = values[cpu].enabled; 280944138f0SNamhyung Kim counts->run = values[cpu].running; 281944138f0SNamhyung Kim } 282944138f0SNamhyung Kim } 283944138f0SNamhyung Kim 284944138f0SNamhyung Kim out: 285944138f0SNamhyung Kim free(values); 286944138f0SNamhyung Kim return err; 287944138f0SNamhyung Kim } 288944138f0SNamhyung Kim 289944138f0SNamhyung Kim static int bperf_cgrp__destroy(struct evsel *evsel) 290944138f0SNamhyung Kim { 291*38fe0e01SJiri Olsa if (evsel->core.idx) 292944138f0SNamhyung Kim return 0; 293944138f0SNamhyung Kim 294944138f0SNamhyung Kim bperf_cgroup_bpf__destroy(skel); 295944138f0SNamhyung Kim evsel__delete(cgrp_switch); // it'll destroy on_switch progs too 296944138f0SNamhyung Kim 297944138f0SNamhyung Kim return 0; 298944138f0SNamhyung Kim } 299944138f0SNamhyung Kim 300944138f0SNamhyung Kim struct bpf_counter_ops bperf_cgrp_ops = { 301944138f0SNamhyung Kim .load = bperf_cgrp__load, 302944138f0SNamhyung Kim .enable = bperf_cgrp__enable, 303944138f0SNamhyung Kim .disable = bperf_cgrp__disable, 304944138f0SNamhyung Kim .read = bperf_cgrp__read, 305944138f0SNamhyung Kim .install_pe = bperf_cgrp__install_pe, 306944138f0SNamhyung Kim .destroy = bperf_cgrp__destroy, 307944138f0SNamhyung Kim }; 308