xref: /openbmc/linux/kernel/bpf/net_namespace.c (revision a13f2ef1)
1 // SPDX-License-Identifier: GPL-2.0
2 
3 #include <linux/bpf.h>
4 #include <linux/filter.h>
5 #include <net/net_namespace.h>
6 
7 /*
8  * Functions to manage BPF programs attached to netns
9  */
10 
11 struct bpf_netns_link {
12 	struct bpf_link	link;
13 	enum bpf_attach_type type;
14 	enum netns_bpf_attach_type netns_type;
15 
16 	/* We don't hold a ref to net in order to auto-detach the link
17 	 * when netns is going away. Instead we rely on pernet
18 	 * pre_exit callback to clear this pointer. Must be accessed
19 	 * with netns_bpf_mutex held.
20 	 */
21 	struct net *net;
22 	struct list_head node; /* node in list of links attached to net */
23 };
24 
25 /* Protects updates to netns_bpf */
26 DEFINE_MUTEX(netns_bpf_mutex);
27 
28 /* Must be called with netns_bpf_mutex held. */
29 static void netns_bpf_run_array_detach(struct net *net,
30 				       enum netns_bpf_attach_type type)
31 {
32 	struct bpf_prog_array *run_array;
33 
34 	run_array = rcu_replace_pointer(net->bpf.run_array[type], NULL,
35 					lockdep_is_held(&netns_bpf_mutex));
36 	bpf_prog_array_free(run_array);
37 }
38 
39 static void bpf_netns_link_release(struct bpf_link *link)
40 {
41 	struct bpf_netns_link *net_link =
42 		container_of(link, struct bpf_netns_link, link);
43 	enum netns_bpf_attach_type type = net_link->netns_type;
44 	struct net *net;
45 
46 	mutex_lock(&netns_bpf_mutex);
47 
48 	/* We can race with cleanup_net, but if we see a non-NULL
49 	 * struct net pointer, pre_exit has not run yet and wait for
50 	 * netns_bpf_mutex.
51 	 */
52 	net = net_link->net;
53 	if (!net)
54 		goto out_unlock;
55 
56 	netns_bpf_run_array_detach(net, type);
57 	list_del(&net_link->node);
58 
59 out_unlock:
60 	mutex_unlock(&netns_bpf_mutex);
61 }
62 
63 static void bpf_netns_link_dealloc(struct bpf_link *link)
64 {
65 	struct bpf_netns_link *net_link =
66 		container_of(link, struct bpf_netns_link, link);
67 
68 	kfree(net_link);
69 }
70 
71 static int bpf_netns_link_update_prog(struct bpf_link *link,
72 				      struct bpf_prog *new_prog,
73 				      struct bpf_prog *old_prog)
74 {
75 	struct bpf_netns_link *net_link =
76 		container_of(link, struct bpf_netns_link, link);
77 	enum netns_bpf_attach_type type = net_link->netns_type;
78 	struct bpf_prog_array *run_array;
79 	struct net *net;
80 	int ret = 0;
81 
82 	if (old_prog && old_prog != link->prog)
83 		return -EPERM;
84 	if (new_prog->type != link->prog->type)
85 		return -EINVAL;
86 
87 	mutex_lock(&netns_bpf_mutex);
88 
89 	net = net_link->net;
90 	if (!net || !check_net(net)) {
91 		/* Link auto-detached or netns dying */
92 		ret = -ENOLINK;
93 		goto out_unlock;
94 	}
95 
96 	run_array = rcu_dereference_protected(net->bpf.run_array[type],
97 					      lockdep_is_held(&netns_bpf_mutex));
98 	WRITE_ONCE(run_array->items[0].prog, new_prog);
99 
100 	old_prog = xchg(&link->prog, new_prog);
101 	bpf_prog_put(old_prog);
102 
103 out_unlock:
104 	mutex_unlock(&netns_bpf_mutex);
105 	return ret;
106 }
107 
108 static int bpf_netns_link_fill_info(const struct bpf_link *link,
109 				    struct bpf_link_info *info)
110 {
111 	const struct bpf_netns_link *net_link =
112 		container_of(link, struct bpf_netns_link, link);
113 	unsigned int inum = 0;
114 	struct net *net;
115 
116 	mutex_lock(&netns_bpf_mutex);
117 	net = net_link->net;
118 	if (net && check_net(net))
119 		inum = net->ns.inum;
120 	mutex_unlock(&netns_bpf_mutex);
121 
122 	info->netns.netns_ino = inum;
123 	info->netns.attach_type = net_link->type;
124 	return 0;
125 }
126 
127 static void bpf_netns_link_show_fdinfo(const struct bpf_link *link,
128 				       struct seq_file *seq)
129 {
130 	struct bpf_link_info info = {};
131 
132 	bpf_netns_link_fill_info(link, &info);
133 	seq_printf(seq,
134 		   "netns_ino:\t%u\n"
135 		   "attach_type:\t%u\n",
136 		   info.netns.netns_ino,
137 		   info.netns.attach_type);
138 }
139 
140 static const struct bpf_link_ops bpf_netns_link_ops = {
141 	.release = bpf_netns_link_release,
142 	.dealloc = bpf_netns_link_dealloc,
143 	.update_prog = bpf_netns_link_update_prog,
144 	.fill_link_info = bpf_netns_link_fill_info,
145 	.show_fdinfo = bpf_netns_link_show_fdinfo,
146 };
147 
148 /* Must be called with netns_bpf_mutex held. */
149 static int __netns_bpf_prog_query(const union bpf_attr *attr,
150 				  union bpf_attr __user *uattr,
151 				  struct net *net,
152 				  enum netns_bpf_attach_type type)
153 {
154 	__u32 __user *prog_ids = u64_to_user_ptr(attr->query.prog_ids);
155 	struct bpf_prog_array *run_array;
156 	u32 prog_cnt = 0, flags = 0;
157 
158 	run_array = rcu_dereference_protected(net->bpf.run_array[type],
159 					      lockdep_is_held(&netns_bpf_mutex));
160 	if (run_array)
161 		prog_cnt = bpf_prog_array_length(run_array);
162 
163 	if (copy_to_user(&uattr->query.attach_flags, &flags, sizeof(flags)))
164 		return -EFAULT;
165 	if (copy_to_user(&uattr->query.prog_cnt, &prog_cnt, sizeof(prog_cnt)))
166 		return -EFAULT;
167 	if (!attr->query.prog_cnt || !prog_ids || !prog_cnt)
168 		return 0;
169 
170 	return bpf_prog_array_copy_to_user(run_array, prog_ids,
171 					   attr->query.prog_cnt);
172 }
173 
174 int netns_bpf_prog_query(const union bpf_attr *attr,
175 			 union bpf_attr __user *uattr)
176 {
177 	enum netns_bpf_attach_type type;
178 	struct net *net;
179 	int ret;
180 
181 	if (attr->query.query_flags)
182 		return -EINVAL;
183 
184 	type = to_netns_bpf_attach_type(attr->query.attach_type);
185 	if (type < 0)
186 		return -EINVAL;
187 
188 	net = get_net_ns_by_fd(attr->query.target_fd);
189 	if (IS_ERR(net))
190 		return PTR_ERR(net);
191 
192 	mutex_lock(&netns_bpf_mutex);
193 	ret = __netns_bpf_prog_query(attr, uattr, net, type);
194 	mutex_unlock(&netns_bpf_mutex);
195 
196 	put_net(net);
197 	return ret;
198 }
199 
200 int netns_bpf_prog_attach(const union bpf_attr *attr, struct bpf_prog *prog)
201 {
202 	struct bpf_prog_array *run_array;
203 	enum netns_bpf_attach_type type;
204 	struct bpf_prog *attached;
205 	struct net *net;
206 	int ret;
207 
208 	if (attr->target_fd || attr->attach_flags || attr->replace_bpf_fd)
209 		return -EINVAL;
210 
211 	type = to_netns_bpf_attach_type(attr->attach_type);
212 	if (type < 0)
213 		return -EINVAL;
214 
215 	net = current->nsproxy->net_ns;
216 	mutex_lock(&netns_bpf_mutex);
217 
218 	/* Attaching prog directly is not compatible with links */
219 	if (!list_empty(&net->bpf.links[type])) {
220 		ret = -EEXIST;
221 		goto out_unlock;
222 	}
223 
224 	switch (type) {
225 	case NETNS_BPF_FLOW_DISSECTOR:
226 		ret = flow_dissector_bpf_prog_attach_check(net, prog);
227 		break;
228 	default:
229 		ret = -EINVAL;
230 		break;
231 	}
232 	if (ret)
233 		goto out_unlock;
234 
235 	attached = net->bpf.progs[type];
236 	if (attached == prog) {
237 		/* The same program cannot be attached twice */
238 		ret = -EINVAL;
239 		goto out_unlock;
240 	}
241 
242 	run_array = rcu_dereference_protected(net->bpf.run_array[type],
243 					      lockdep_is_held(&netns_bpf_mutex));
244 	if (run_array) {
245 		WRITE_ONCE(run_array->items[0].prog, prog);
246 	} else {
247 		run_array = bpf_prog_array_alloc(1, GFP_KERNEL);
248 		if (!run_array) {
249 			ret = -ENOMEM;
250 			goto out_unlock;
251 		}
252 		run_array->items[0].prog = prog;
253 		rcu_assign_pointer(net->bpf.run_array[type], run_array);
254 	}
255 
256 	net->bpf.progs[type] = prog;
257 	if (attached)
258 		bpf_prog_put(attached);
259 
260 out_unlock:
261 	mutex_unlock(&netns_bpf_mutex);
262 
263 	return ret;
264 }
265 
266 /* Must be called with netns_bpf_mutex held. */
267 static int __netns_bpf_prog_detach(struct net *net,
268 				   enum netns_bpf_attach_type type,
269 				   struct bpf_prog *old)
270 {
271 	struct bpf_prog *attached;
272 
273 	/* Progs attached via links cannot be detached */
274 	if (!list_empty(&net->bpf.links[type]))
275 		return -EINVAL;
276 
277 	attached = net->bpf.progs[type];
278 	if (!attached || attached != old)
279 		return -ENOENT;
280 	netns_bpf_run_array_detach(net, type);
281 	net->bpf.progs[type] = NULL;
282 	bpf_prog_put(attached);
283 	return 0;
284 }
285 
286 int netns_bpf_prog_detach(const union bpf_attr *attr, enum bpf_prog_type ptype)
287 {
288 	enum netns_bpf_attach_type type;
289 	struct bpf_prog *prog;
290 	int ret;
291 
292 	if (attr->target_fd)
293 		return -EINVAL;
294 
295 	type = to_netns_bpf_attach_type(attr->attach_type);
296 	if (type < 0)
297 		return -EINVAL;
298 
299 	prog = bpf_prog_get_type(attr->attach_bpf_fd, ptype);
300 	if (IS_ERR(prog))
301 		return PTR_ERR(prog);
302 
303 	mutex_lock(&netns_bpf_mutex);
304 	ret = __netns_bpf_prog_detach(current->nsproxy->net_ns, type, prog);
305 	mutex_unlock(&netns_bpf_mutex);
306 
307 	bpf_prog_put(prog);
308 
309 	return ret;
310 }
311 
312 static int netns_bpf_link_attach(struct net *net, struct bpf_link *link,
313 				 enum netns_bpf_attach_type type)
314 {
315 	struct bpf_netns_link *net_link =
316 		container_of(link, struct bpf_netns_link, link);
317 	struct bpf_prog_array *run_array;
318 	int err;
319 
320 	mutex_lock(&netns_bpf_mutex);
321 
322 	/* Allow attaching only one prog or link for now */
323 	if (!list_empty(&net->bpf.links[type])) {
324 		err = -E2BIG;
325 		goto out_unlock;
326 	}
327 	/* Links are not compatible with attaching prog directly */
328 	if (net->bpf.progs[type]) {
329 		err = -EEXIST;
330 		goto out_unlock;
331 	}
332 
333 	switch (type) {
334 	case NETNS_BPF_FLOW_DISSECTOR:
335 		err = flow_dissector_bpf_prog_attach_check(net, link->prog);
336 		break;
337 	default:
338 		err = -EINVAL;
339 		break;
340 	}
341 	if (err)
342 		goto out_unlock;
343 
344 	run_array = bpf_prog_array_alloc(1, GFP_KERNEL);
345 	if (!run_array) {
346 		err = -ENOMEM;
347 		goto out_unlock;
348 	}
349 	run_array->items[0].prog = link->prog;
350 	rcu_assign_pointer(net->bpf.run_array[type], run_array);
351 
352 	list_add_tail(&net_link->node, &net->bpf.links[type]);
353 
354 out_unlock:
355 	mutex_unlock(&netns_bpf_mutex);
356 	return err;
357 }
358 
359 int netns_bpf_link_create(const union bpf_attr *attr, struct bpf_prog *prog)
360 {
361 	enum netns_bpf_attach_type netns_type;
362 	struct bpf_link_primer link_primer;
363 	struct bpf_netns_link *net_link;
364 	enum bpf_attach_type type;
365 	struct net *net;
366 	int err;
367 
368 	if (attr->link_create.flags)
369 		return -EINVAL;
370 
371 	type = attr->link_create.attach_type;
372 	netns_type = to_netns_bpf_attach_type(type);
373 	if (netns_type < 0)
374 		return -EINVAL;
375 
376 	net = get_net_ns_by_fd(attr->link_create.target_fd);
377 	if (IS_ERR(net))
378 		return PTR_ERR(net);
379 
380 	net_link = kzalloc(sizeof(*net_link), GFP_USER);
381 	if (!net_link) {
382 		err = -ENOMEM;
383 		goto out_put_net;
384 	}
385 	bpf_link_init(&net_link->link, BPF_LINK_TYPE_NETNS,
386 		      &bpf_netns_link_ops, prog);
387 	net_link->net = net;
388 	net_link->type = type;
389 	net_link->netns_type = netns_type;
390 
391 	err = bpf_link_prime(&net_link->link, &link_primer);
392 	if (err) {
393 		kfree(net_link);
394 		goto out_put_net;
395 	}
396 
397 	err = netns_bpf_link_attach(net, &net_link->link, netns_type);
398 	if (err) {
399 		bpf_link_cleanup(&link_primer);
400 		goto out_put_net;
401 	}
402 
403 	put_net(net);
404 	return bpf_link_settle(&link_primer);
405 
406 out_put_net:
407 	put_net(net);
408 	return err;
409 }
410 
411 static int __net_init netns_bpf_pernet_init(struct net *net)
412 {
413 	int type;
414 
415 	for (type = 0; type < MAX_NETNS_BPF_ATTACH_TYPE; type++)
416 		INIT_LIST_HEAD(&net->bpf.links[type]);
417 
418 	return 0;
419 }
420 
421 static void __net_exit netns_bpf_pernet_pre_exit(struct net *net)
422 {
423 	enum netns_bpf_attach_type type;
424 	struct bpf_netns_link *net_link;
425 
426 	mutex_lock(&netns_bpf_mutex);
427 	for (type = 0; type < MAX_NETNS_BPF_ATTACH_TYPE; type++) {
428 		netns_bpf_run_array_detach(net, type);
429 		list_for_each_entry(net_link, &net->bpf.links[type], node)
430 			net_link->net = NULL; /* auto-detach link */
431 		if (net->bpf.progs[type])
432 			bpf_prog_put(net->bpf.progs[type]);
433 	}
434 	mutex_unlock(&netns_bpf_mutex);
435 }
436 
437 static struct pernet_operations netns_bpf_pernet_ops __net_initdata = {
438 	.init = netns_bpf_pernet_init,
439 	.pre_exit = netns_bpf_pernet_pre_exit,
440 };
441 
442 static int __init netns_bpf_init(void)
443 {
444 	return register_pernet_subsys(&netns_bpf_pernet_ops);
445 }
446 
447 subsys_initcall(netns_bpf_init);
448