1944138f0SNamhyung Kim // SPDX-License-Identifier: GPL-2.0
2944138f0SNamhyung Kim 
3944138f0SNamhyung Kim /* Copyright (c) 2021 Facebook */
4944138f0SNamhyung Kim /* Copyright (c) 2021 Google */
5944138f0SNamhyung Kim 
6944138f0SNamhyung Kim #include <assert.h>
7944138f0SNamhyung Kim #include <limits.h>
8944138f0SNamhyung Kim #include <unistd.h>
9944138f0SNamhyung Kim #include <sys/file.h>
10944138f0SNamhyung Kim #include <sys/time.h>
11944138f0SNamhyung Kim #include <sys/resource.h>
12944138f0SNamhyung Kim #include <linux/err.h>
13944138f0SNamhyung Kim #include <linux/zalloc.h>
14944138f0SNamhyung Kim #include <linux/perf_event.h>
15944138f0SNamhyung Kim #include <api/fs/fs.h>
16944138f0SNamhyung Kim #include <perf/bpf_perf.h>
17944138f0SNamhyung Kim 
18944138f0SNamhyung Kim #include "affinity.h"
19944138f0SNamhyung Kim #include "bpf_counter.h"
20944138f0SNamhyung Kim #include "cgroup.h"
21944138f0SNamhyung Kim #include "counts.h"
22944138f0SNamhyung Kim #include "debug.h"
23944138f0SNamhyung Kim #include "evsel.h"
24944138f0SNamhyung Kim #include "evlist.h"
25944138f0SNamhyung Kim #include "target.h"
26944138f0SNamhyung Kim #include "cpumap.h"
27944138f0SNamhyung Kim #include "thread_map.h"
28944138f0SNamhyung Kim 
29944138f0SNamhyung Kim #include "bpf_skel/bperf_cgroup.skel.h"
30944138f0SNamhyung Kim 
31944138f0SNamhyung Kim static struct perf_event_attr cgrp_switch_attr = {
32944138f0SNamhyung Kim 	.type = PERF_TYPE_SOFTWARE,
33944138f0SNamhyung Kim 	.config = PERF_COUNT_SW_CGROUP_SWITCHES,
34944138f0SNamhyung Kim 	.size = sizeof(cgrp_switch_attr),
35944138f0SNamhyung Kim 	.sample_period = 1,
36944138f0SNamhyung Kim 	.disabled = 1,
37944138f0SNamhyung Kim };
38944138f0SNamhyung Kim 
39944138f0SNamhyung Kim static struct evsel *cgrp_switch;
40944138f0SNamhyung Kim static struct bperf_cgroup_bpf *skel;
41944138f0SNamhyung Kim 
42944138f0SNamhyung Kim #define FD(evt, cpu) (*(int *)xyarray__entry(evt->core.fd, cpu, 0))
43944138f0SNamhyung Kim 
bperf_load_program(struct evlist * evlist)44944138f0SNamhyung Kim static int bperf_load_program(struct evlist *evlist)
45944138f0SNamhyung Kim {
46944138f0SNamhyung Kim 	struct bpf_link *link;
47944138f0SNamhyung Kim 	struct evsel *evsel;
48944138f0SNamhyung Kim 	struct cgroup *cgrp, *leader_cgrp;
490255571aSIan Rogers 	int i, j;
500255571aSIan Rogers 	struct perf_cpu cpu;
516d18804bSIan Rogers 	int total_cpus = cpu__max_cpu().cpu;
52944138f0SNamhyung Kim 	int map_size, map_fd;
53944138f0SNamhyung Kim 	int prog_fd, err;
54944138f0SNamhyung Kim 
55944138f0SNamhyung Kim 	skel = bperf_cgroup_bpf__open();
56944138f0SNamhyung Kim 	if (!skel) {
57944138f0SNamhyung Kim 		pr_err("Failed to open cgroup skeleton\n");
58944138f0SNamhyung Kim 		return -1;
59944138f0SNamhyung Kim 	}
60944138f0SNamhyung Kim 
61944138f0SNamhyung Kim 	skel->rodata->num_cpus = total_cpus;
62944138f0SNamhyung Kim 	skel->rodata->num_events = evlist->core.nr_entries / nr_cgroups;
63944138f0SNamhyung Kim 
64944138f0SNamhyung Kim 	BUG_ON(evlist->core.nr_entries % nr_cgroups != 0);
65944138f0SNamhyung Kim 
66944138f0SNamhyung Kim 	/* we need one copy of events per cpu for reading */
67944138f0SNamhyung Kim 	map_size = total_cpus * evlist->core.nr_entries / nr_cgroups;
68ddf0d4deSMuhammad Falak R Wani 	bpf_map__set_max_entries(skel->maps.events, map_size);
69ddf0d4deSMuhammad Falak R Wani 	bpf_map__set_max_entries(skel->maps.cgrp_idx, nr_cgroups);
70944138f0SNamhyung Kim 	/* previous result is saved in a per-cpu array */
71944138f0SNamhyung Kim 	map_size = evlist->core.nr_entries / nr_cgroups;
72ddf0d4deSMuhammad Falak R Wani 	bpf_map__set_max_entries(skel->maps.prev_readings, map_size);
73944138f0SNamhyung Kim 	/* cgroup result needs all events (per-cpu) */
74944138f0SNamhyung Kim 	map_size = evlist->core.nr_entries;
75ddf0d4deSMuhammad Falak R Wani 	bpf_map__set_max_entries(skel->maps.cgrp_readings, map_size);
76944138f0SNamhyung Kim 
77944138f0SNamhyung Kim 	set_max_rlimit();
78944138f0SNamhyung Kim 
79944138f0SNamhyung Kim 	err = bperf_cgroup_bpf__load(skel);
80944138f0SNamhyung Kim 	if (err) {
81944138f0SNamhyung Kim 		pr_err("Failed to load cgroup skeleton\n");
82944138f0SNamhyung Kim 		goto out;
83944138f0SNamhyung Kim 	}
84944138f0SNamhyung Kim 
85944138f0SNamhyung Kim 	if (cgroup_is_v2("perf_event") > 0)
86944138f0SNamhyung Kim 		skel->bss->use_cgroup_v2 = 1;
87944138f0SNamhyung Kim 
88944138f0SNamhyung Kim 	err = -1;
89944138f0SNamhyung Kim 
90944138f0SNamhyung Kim 	cgrp_switch = evsel__new(&cgrp_switch_attr);
91944138f0SNamhyung Kim 	if (evsel__open_per_cpu(cgrp_switch, evlist->core.all_cpus, -1) < 0) {
92944138f0SNamhyung Kim 		pr_err("Failed to open cgroup switches event\n");
93944138f0SNamhyung Kim 		goto out;
94944138f0SNamhyung Kim 	}
95944138f0SNamhyung Kim 
960255571aSIan Rogers 	perf_cpu_map__for_each_cpu(cpu, i, evlist->core.all_cpus) {
97944138f0SNamhyung Kim 		link = bpf_program__attach_perf_event(skel->progs.on_cgrp_switch,
983da35231SNamhyung Kim 						      FD(cgrp_switch, i));
99944138f0SNamhyung Kim 		if (IS_ERR(link)) {
100944138f0SNamhyung Kim 			pr_err("Failed to attach cgroup program\n");
101944138f0SNamhyung Kim 			err = PTR_ERR(link);
102944138f0SNamhyung Kim 			goto out;
103944138f0SNamhyung Kim 		}
104944138f0SNamhyung Kim 	}
105944138f0SNamhyung Kim 
106944138f0SNamhyung Kim 	/*
107944138f0SNamhyung Kim 	 * Update cgrp_idx map from cgroup-id to event index.
108944138f0SNamhyung Kim 	 */
109944138f0SNamhyung Kim 	cgrp = NULL;
110944138f0SNamhyung Kim 	i = 0;
111944138f0SNamhyung Kim 
112944138f0SNamhyung Kim 	evlist__for_each_entry(evlist, evsel) {
113944138f0SNamhyung Kim 		if (cgrp == NULL || evsel->cgrp == leader_cgrp) {
114944138f0SNamhyung Kim 			leader_cgrp = evsel->cgrp;
115944138f0SNamhyung Kim 			evsel->cgrp = NULL;
116944138f0SNamhyung Kim 
117944138f0SNamhyung Kim 			/* open single copy of the events w/o cgroup */
1188a92605dSNamhyung Kim 			err = evsel__open_per_cpu(evsel, evsel->core.cpus, -1);
119*2d656b0fSNamhyung Kim 			if (err == 0)
120*2d656b0fSNamhyung Kim 				evsel->supported = true;
121944138f0SNamhyung Kim 
122944138f0SNamhyung Kim 			map_fd = bpf_map__fd(skel->maps.events);
1238a92605dSNamhyung Kim 			perf_cpu_map__for_each_cpu(cpu, j, evsel->core.cpus) {
1243da35231SNamhyung Kim 				int fd = FD(evsel, j);
1250255571aSIan Rogers 				__u32 idx = evsel->core.idx * total_cpus + cpu.cpu;
126944138f0SNamhyung Kim 
127*2d656b0fSNamhyung Kim 				bpf_map_update_elem(map_fd, &idx, &fd, BPF_ANY);
128944138f0SNamhyung Kim 			}
129944138f0SNamhyung Kim 
130944138f0SNamhyung Kim 			evsel->cgrp = leader_cgrp;
131944138f0SNamhyung Kim 		}
132944138f0SNamhyung Kim 
133944138f0SNamhyung Kim 		if (evsel->cgrp == cgrp)
134944138f0SNamhyung Kim 			continue;
135944138f0SNamhyung Kim 
136944138f0SNamhyung Kim 		cgrp = evsel->cgrp;
137944138f0SNamhyung Kim 
138944138f0SNamhyung Kim 		if (read_cgroup_id(cgrp) < 0) {
139944138f0SNamhyung Kim 			pr_err("Failed to get cgroup id\n");
140944138f0SNamhyung Kim 			err = -1;
141944138f0SNamhyung Kim 			goto out;
142944138f0SNamhyung Kim 		}
143944138f0SNamhyung Kim 
144944138f0SNamhyung Kim 		map_fd = bpf_map__fd(skel->maps.cgrp_idx);
145944138f0SNamhyung Kim 		err = bpf_map_update_elem(map_fd, &cgrp->id, &i, BPF_ANY);
146944138f0SNamhyung Kim 		if (err < 0) {
147944138f0SNamhyung Kim 			pr_err("Failed to update cgroup index map\n");
148944138f0SNamhyung Kim 			goto out;
149944138f0SNamhyung Kim 		}
150944138f0SNamhyung Kim 
151944138f0SNamhyung Kim 		i++;
152944138f0SNamhyung Kim 	}
153944138f0SNamhyung Kim 
154944138f0SNamhyung Kim 	/*
155944138f0SNamhyung Kim 	 * bperf uses BPF_PROG_TEST_RUN to get accurate reading. Check
156944138f0SNamhyung Kim 	 * whether the kernel support it
157944138f0SNamhyung Kim 	 */
158944138f0SNamhyung Kim 	prog_fd = bpf_program__fd(skel->progs.trigger_read);
159944138f0SNamhyung Kim 	err = bperf_trigger_reading(prog_fd, 0);
160944138f0SNamhyung Kim 	if (err) {
161944138f0SNamhyung Kim 		pr_warning("The kernel does not support test_run for raw_tp BPF programs.\n"
162944138f0SNamhyung Kim 			   "Therefore, --for-each-cgroup might show inaccurate readings\n");
163944138f0SNamhyung Kim 		err = 0;
164944138f0SNamhyung Kim 	}
165944138f0SNamhyung Kim 
166944138f0SNamhyung Kim out:
167944138f0SNamhyung Kim 	return err;
168944138f0SNamhyung Kim }
169944138f0SNamhyung Kim 
bperf_cgrp__load(struct evsel * evsel,struct target * target __maybe_unused)170944138f0SNamhyung Kim static int bperf_cgrp__load(struct evsel *evsel,
171944138f0SNamhyung Kim 			    struct target *target __maybe_unused)
172944138f0SNamhyung Kim {
173944138f0SNamhyung Kim 	static bool bperf_loaded = false;
174944138f0SNamhyung Kim 
175944138f0SNamhyung Kim 	evsel->bperf_leader_prog_fd = -1;
176944138f0SNamhyung Kim 	evsel->bperf_leader_link_fd = -1;
177944138f0SNamhyung Kim 
178944138f0SNamhyung Kim 	if (!bperf_loaded && bperf_load_program(evsel->evlist))
179944138f0SNamhyung Kim 		return -1;
180944138f0SNamhyung Kim 
181944138f0SNamhyung Kim 	bperf_loaded = true;
182944138f0SNamhyung Kim 	/* just to bypass bpf_counter_skip() */
183944138f0SNamhyung Kim 	evsel->follower_skel = (struct bperf_follower_bpf *)skel;
184944138f0SNamhyung Kim 
185944138f0SNamhyung Kim 	return 0;
186944138f0SNamhyung Kim }
187944138f0SNamhyung Kim 
bperf_cgrp__install_pe(struct evsel * evsel __maybe_unused,int cpu __maybe_unused,int fd __maybe_unused)188944138f0SNamhyung Kim static int bperf_cgrp__install_pe(struct evsel *evsel __maybe_unused,
189944138f0SNamhyung Kim 				  int cpu __maybe_unused, int fd __maybe_unused)
190944138f0SNamhyung Kim {
191944138f0SNamhyung Kim 	/* nothing to do */
192944138f0SNamhyung Kim 	return 0;
193944138f0SNamhyung Kim }
194944138f0SNamhyung Kim 
195944138f0SNamhyung Kim /*
196944138f0SNamhyung Kim  * trigger the leader prog on each cpu, so the cgrp_reading map could get
197944138f0SNamhyung Kim  * the latest results.
198944138f0SNamhyung Kim  */
bperf_cgrp__sync_counters(struct evlist * evlist)199944138f0SNamhyung Kim static int bperf_cgrp__sync_counters(struct evlist *evlist)
200944138f0SNamhyung Kim {
2010255571aSIan Rogers 	struct perf_cpu cpu;
2020255571aSIan Rogers 	int idx;
203944138f0SNamhyung Kim 	int prog_fd = bpf_program__fd(skel->progs.trigger_read);
204944138f0SNamhyung Kim 
2050255571aSIan Rogers 	perf_cpu_map__for_each_cpu(cpu, idx, evlist->core.all_cpus)
2060255571aSIan Rogers 		bperf_trigger_reading(prog_fd, cpu.cpu);
207944138f0SNamhyung Kim 
208944138f0SNamhyung Kim 	return 0;
209944138f0SNamhyung Kim }
210944138f0SNamhyung Kim 
bperf_cgrp__enable(struct evsel * evsel)211944138f0SNamhyung Kim static int bperf_cgrp__enable(struct evsel *evsel)
212944138f0SNamhyung Kim {
21338fe0e01SJiri Olsa 	if (evsel->core.idx)
214944138f0SNamhyung Kim 		return 0;
215944138f0SNamhyung Kim 
216944138f0SNamhyung Kim 	bperf_cgrp__sync_counters(evsel->evlist);
217944138f0SNamhyung Kim 
218944138f0SNamhyung Kim 	skel->bss->enabled = 1;
219944138f0SNamhyung Kim 	return 0;
220944138f0SNamhyung Kim }
221944138f0SNamhyung Kim 
bperf_cgrp__disable(struct evsel * evsel)222944138f0SNamhyung Kim static int bperf_cgrp__disable(struct evsel *evsel)
223944138f0SNamhyung Kim {
22438fe0e01SJiri Olsa 	if (evsel->core.idx)
225944138f0SNamhyung Kim 		return 0;
226944138f0SNamhyung Kim 
227944138f0SNamhyung Kim 	bperf_cgrp__sync_counters(evsel->evlist);
228944138f0SNamhyung Kim 
229944138f0SNamhyung Kim 	skel->bss->enabled = 0;
230944138f0SNamhyung Kim 	return 0;
231944138f0SNamhyung Kim }
232944138f0SNamhyung Kim 
bperf_cgrp__read(struct evsel * evsel)233944138f0SNamhyung Kim static int bperf_cgrp__read(struct evsel *evsel)
234944138f0SNamhyung Kim {
235944138f0SNamhyung Kim 	struct evlist *evlist = evsel->evlist;
2366d18804bSIan Rogers 	int total_cpus = cpu__max_cpu().cpu;
237944138f0SNamhyung Kim 	struct perf_counts_values *counts;
238944138f0SNamhyung Kim 	struct bpf_perf_event_value *values;
239944138f0SNamhyung Kim 	int reading_map_fd, err = 0;
240944138f0SNamhyung Kim 
24138fe0e01SJiri Olsa 	if (evsel->core.idx)
242944138f0SNamhyung Kim 		return 0;
243944138f0SNamhyung Kim 
244944138f0SNamhyung Kim 	bperf_cgrp__sync_counters(evsel->evlist);
245944138f0SNamhyung Kim 
246944138f0SNamhyung Kim 	values = calloc(total_cpus, sizeof(*values));
247944138f0SNamhyung Kim 	if (values == NULL)
248944138f0SNamhyung Kim 		return -ENOMEM;
249944138f0SNamhyung Kim 
250944138f0SNamhyung Kim 	reading_map_fd = bpf_map__fd(skel->maps.cgrp_readings);
251944138f0SNamhyung Kim 
252944138f0SNamhyung Kim 	evlist__for_each_entry(evlist, evsel) {
2530255571aSIan Rogers 		__u32 idx = evsel->core.idx;
2540255571aSIan Rogers 		int i;
2550255571aSIan Rogers 		struct perf_cpu cpu;
2560255571aSIan Rogers 
257944138f0SNamhyung Kim 		err = bpf_map_lookup_elem(reading_map_fd, &idx, values);
258944138f0SNamhyung Kim 		if (err) {
259a2887b9bSMasanari Iida 			pr_err("bpf map lookup failed: idx=%u, event=%s, cgrp=%s\n",
260944138f0SNamhyung Kim 			       idx, evsel__name(evsel), evsel->cgrp->name);
261944138f0SNamhyung Kim 			goto out;
262944138f0SNamhyung Kim 		}
263944138f0SNamhyung Kim 
2648a92605dSNamhyung Kim 		perf_cpu_map__for_each_cpu(cpu, i, evsel->core.cpus) {
265944138f0SNamhyung Kim 			counts = perf_counts(evsel->counts, i, 0);
2660255571aSIan Rogers 			counts->val = values[cpu.cpu].counter;
2670255571aSIan Rogers 			counts->ena = values[cpu.cpu].enabled;
2680255571aSIan Rogers 			counts->run = values[cpu.cpu].running;
269944138f0SNamhyung Kim 		}
270944138f0SNamhyung Kim 	}
271944138f0SNamhyung Kim 
272944138f0SNamhyung Kim out:
273944138f0SNamhyung Kim 	free(values);
274944138f0SNamhyung Kim 	return err;
275944138f0SNamhyung Kim }
276944138f0SNamhyung Kim 
bperf_cgrp__destroy(struct evsel * evsel)277944138f0SNamhyung Kim static int bperf_cgrp__destroy(struct evsel *evsel)
278944138f0SNamhyung Kim {
27938fe0e01SJiri Olsa 	if (evsel->core.idx)
280944138f0SNamhyung Kim 		return 0;
281944138f0SNamhyung Kim 
282944138f0SNamhyung Kim 	bperf_cgroup_bpf__destroy(skel);
283944138f0SNamhyung Kim 	evsel__delete(cgrp_switch);  // it'll destroy on_switch progs too
284944138f0SNamhyung Kim 
285944138f0SNamhyung Kim 	return 0;
286944138f0SNamhyung Kim }
287944138f0SNamhyung Kim 
288944138f0SNamhyung Kim struct bpf_counter_ops bperf_cgrp_ops = {
289944138f0SNamhyung Kim 	.load       = bperf_cgrp__load,
290944138f0SNamhyung Kim 	.enable     = bperf_cgrp__enable,
291944138f0SNamhyung Kim 	.disable    = bperf_cgrp__disable,
292944138f0SNamhyung Kim 	.read       = bperf_cgrp__read,
293944138f0SNamhyung Kim 	.install_pe = bperf_cgrp__install_pe,
294944138f0SNamhyung Kim 	.destroy    = bperf_cgrp__destroy,
295944138f0SNamhyung Kim };
296