xref: /openbmc/linux/samples/bpf/xdp_sample.bpf.c (revision ee65728e)
1 // SPDX-License-Identifier: GPL-2.0
2 /*  GPLv2, Copyright(c) 2017 Jesper Dangaard Brouer, Red Hat, Inc. */
3 #include "xdp_sample.bpf.h"
4 
5 #include <bpf/bpf_tracing.h>
6 #include <bpf/bpf_core_read.h>
7 #include <bpf/bpf_helpers.h>
8 
9 array_map rx_cnt SEC(".maps");
10 array_map redir_err_cnt SEC(".maps");
11 array_map cpumap_enqueue_cnt SEC(".maps");
12 array_map cpumap_kthread_cnt SEC(".maps");
13 array_map exception_cnt SEC(".maps");
14 array_map devmap_xmit_cnt SEC(".maps");
15 
16 struct {
17 	__uint(type, BPF_MAP_TYPE_PERCPU_HASH);
18 	__uint(max_entries, 32 * 32);
19 	__type(key, u64);
20 	__type(value, struct datarec);
21 } devmap_xmit_cnt_multi SEC(".maps");
22 
23 const volatile int nr_cpus = 0;
24 
25 /* These can be set before loading so that redundant comparisons can be DCE'd by
26  * the verifier, and only actual matches are tried after loading tp_btf program.
27  * This allows sample to filter tracepoint stats based on net_device.
28  */
29 const volatile int from_match[32] = {};
30 const volatile int to_match[32] = {};
31 
32 int cpumap_map_id = 0;
33 
34 /* Find if b is part of set a, but if a is empty set then evaluate to true */
35 #define IN_SET(a, b)                                                 \
36 	({                                                           \
37 		bool __res = !(a)[0];                                \
38 		for (int i = 0; i < ARRAY_SIZE(a) && (a)[i]; i++) { \
39 			__res = (a)[i] == (b);                       \
40 			if (__res)                                   \
41 				break;                               \
42 		}                                                    \
43 		__res;                                               \
44 	})
45 
46 static __always_inline __u32 xdp_get_err_key(int err)
47 {
48 	switch (err) {
49 	case 0:
50 		return 0;
51 	case -EINVAL:
52 		return 2;
53 	case -ENETDOWN:
54 		return 3;
55 	case -EMSGSIZE:
56 		return 4;
57 	case -EOPNOTSUPP:
58 		return 5;
59 	case -ENOSPC:
60 		return 6;
61 	default:
62 		return 1;
63 	}
64 }
65 
66 static __always_inline int xdp_redirect_collect_stat(int from, int err)
67 {
68 	u32 cpu = bpf_get_smp_processor_id();
69 	u32 key = XDP_REDIRECT_ERROR;
70 	struct datarec *rec;
71 	u32 idx;
72 
73 	if (!IN_SET(from_match, from))
74 		return 0;
75 
76 	key = xdp_get_err_key(err);
77 
78 	idx = key * nr_cpus + cpu;
79 	rec = bpf_map_lookup_elem(&redir_err_cnt, &idx);
80 	if (!rec)
81 		return 0;
82 	if (key)
83 		NO_TEAR_INC(rec->dropped);
84 	else
85 		NO_TEAR_INC(rec->processed);
86 	return 0; /* Indicate event was filtered (no further processing)*/
87 	/*
88 	 * Returning 1 here would allow e.g. a perf-record tracepoint
89 	 * to see and record these events, but it doesn't work well
90 	 * in-practice as stopping perf-record also unload this
91 	 * bpf_prog.  Plus, there is additional overhead of doing so.
92 	 */
93 }
94 
95 SEC("tp_btf/xdp_redirect_err")
96 int BPF_PROG(tp_xdp_redirect_err, const struct net_device *dev,
97 	     const struct bpf_prog *xdp, const void *tgt, int err,
98 	     const struct bpf_map *map, u32 index)
99 {
100 	return xdp_redirect_collect_stat(dev->ifindex, err);
101 }
102 
103 SEC("tp_btf/xdp_redirect_map_err")
104 int BPF_PROG(tp_xdp_redirect_map_err, const struct net_device *dev,
105 	     const struct bpf_prog *xdp, const void *tgt, int err,
106 	     const struct bpf_map *map, u32 index)
107 {
108 	return xdp_redirect_collect_stat(dev->ifindex, err);
109 }
110 
111 SEC("tp_btf/xdp_redirect")
112 int BPF_PROG(tp_xdp_redirect, const struct net_device *dev,
113 	     const struct bpf_prog *xdp, const void *tgt, int err,
114 	     const struct bpf_map *map, u32 index)
115 {
116 	return xdp_redirect_collect_stat(dev->ifindex, err);
117 }
118 
119 SEC("tp_btf/xdp_redirect_map")
120 int BPF_PROG(tp_xdp_redirect_map, const struct net_device *dev,
121 	     const struct bpf_prog *xdp, const void *tgt, int err,
122 	     const struct bpf_map *map, u32 index)
123 {
124 	return xdp_redirect_collect_stat(dev->ifindex, err);
125 }
126 
127 SEC("tp_btf/xdp_cpumap_enqueue")
128 int BPF_PROG(tp_xdp_cpumap_enqueue, int map_id, unsigned int processed,
129 	     unsigned int drops, int to_cpu)
130 {
131 	u32 cpu = bpf_get_smp_processor_id();
132 	struct datarec *rec;
133 	u32 idx;
134 
135 	if (cpumap_map_id && cpumap_map_id != map_id)
136 		return 0;
137 
138 	idx = to_cpu * nr_cpus + cpu;
139 	rec = bpf_map_lookup_elem(&cpumap_enqueue_cnt, &idx);
140 	if (!rec)
141 		return 0;
142 	NO_TEAR_ADD(rec->processed, processed);
143 	NO_TEAR_ADD(rec->dropped, drops);
144 	/* Record bulk events, then userspace can calc average bulk size */
145 	if (processed > 0)
146 		NO_TEAR_INC(rec->issue);
147 	/* Inception: It's possible to detect overload situations, via
148 	 * this tracepoint.  This can be used for creating a feedback
149 	 * loop to XDP, which can take appropriate actions to mitigate
150 	 * this overload situation.
151 	 */
152 	return 0;
153 }
154 
155 SEC("tp_btf/xdp_cpumap_kthread")
156 int BPF_PROG(tp_xdp_cpumap_kthread, int map_id, unsigned int processed,
157 	     unsigned int drops, int sched, struct xdp_cpumap_stats *xdp_stats)
158 {
159 	struct datarec *rec;
160 	u32 cpu;
161 
162 	if (cpumap_map_id && cpumap_map_id != map_id)
163 		return 0;
164 
165 	cpu = bpf_get_smp_processor_id();
166 	rec = bpf_map_lookup_elem(&cpumap_kthread_cnt, &cpu);
167 	if (!rec)
168 		return 0;
169 	NO_TEAR_ADD(rec->processed, processed);
170 	NO_TEAR_ADD(rec->dropped, drops);
171 	NO_TEAR_ADD(rec->xdp_pass, xdp_stats->pass);
172 	NO_TEAR_ADD(rec->xdp_drop, xdp_stats->drop);
173 	NO_TEAR_ADD(rec->xdp_redirect, xdp_stats->redirect);
174 	/* Count times kthread yielded CPU via schedule call */
175 	if (sched)
176 		NO_TEAR_INC(rec->issue);
177 	return 0;
178 }
179 
180 SEC("tp_btf/xdp_exception")
181 int BPF_PROG(tp_xdp_exception, const struct net_device *dev,
182 	     const struct bpf_prog *xdp, u32 act)
183 {
184 	u32 cpu = bpf_get_smp_processor_id();
185 	struct datarec *rec;
186 	u32 key = act, idx;
187 
188 	if (!IN_SET(from_match, dev->ifindex))
189 		return 0;
190 	if (!IN_SET(to_match, dev->ifindex))
191 		return 0;
192 
193 	if (key > XDP_REDIRECT)
194 		key = XDP_REDIRECT + 1;
195 
196 	idx = key * nr_cpus + cpu;
197 	rec = bpf_map_lookup_elem(&exception_cnt, &idx);
198 	if (!rec)
199 		return 0;
200 	NO_TEAR_INC(rec->dropped);
201 
202 	return 0;
203 }
204 
205 SEC("tp_btf/xdp_devmap_xmit")
206 int BPF_PROG(tp_xdp_devmap_xmit, const struct net_device *from_dev,
207 	     const struct net_device *to_dev, int sent, int drops, int err)
208 {
209 	struct datarec *rec;
210 	int idx_in, idx_out;
211 	u32 cpu;
212 
213 	idx_in = from_dev->ifindex;
214 	idx_out = to_dev->ifindex;
215 
216 	if (!IN_SET(from_match, idx_in))
217 		return 0;
218 	if (!IN_SET(to_match, idx_out))
219 		return 0;
220 
221 	cpu = bpf_get_smp_processor_id();
222 	rec = bpf_map_lookup_elem(&devmap_xmit_cnt, &cpu);
223 	if (!rec)
224 		return 0;
225 	NO_TEAR_ADD(rec->processed, sent);
226 	NO_TEAR_ADD(rec->dropped, drops);
227 	/* Record bulk events, then userspace can calc average bulk size */
228 	NO_TEAR_INC(rec->info);
229 	/* Record error cases, where no frame were sent */
230 	/* Catch API error of drv ndo_xdp_xmit sent more than count */
231 	if (err || drops < 0)
232 		NO_TEAR_INC(rec->issue);
233 	return 0;
234 }
235 
236 SEC("tp_btf/xdp_devmap_xmit")
237 int BPF_PROG(tp_xdp_devmap_xmit_multi, const struct net_device *from_dev,
238 	     const struct net_device *to_dev, int sent, int drops, int err)
239 {
240 	struct datarec empty = {};
241 	struct datarec *rec;
242 	int idx_in, idx_out;
243 	u64 idx;
244 
245 	idx_in = from_dev->ifindex;
246 	idx_out = to_dev->ifindex;
247 	idx = idx_in;
248 	idx = idx << 32 | idx_out;
249 
250 	if (!IN_SET(from_match, idx_in))
251 		return 0;
252 	if (!IN_SET(to_match, idx_out))
253 		return 0;
254 
255 	bpf_map_update_elem(&devmap_xmit_cnt_multi, &idx, &empty, BPF_NOEXIST);
256 	rec = bpf_map_lookup_elem(&devmap_xmit_cnt_multi, &idx);
257 	if (!rec)
258 		return 0;
259 
260 	NO_TEAR_ADD(rec->processed, sent);
261 	NO_TEAR_ADD(rec->dropped, drops);
262 	NO_TEAR_INC(rec->info);
263 	if (err || drops < 0)
264 		NO_TEAR_INC(rec->issue);
265 	return 0;
266 }
267