xref: /openbmc/linux/kernel/bpf/net_namespace.c (revision b8265621)
1 // SPDX-License-Identifier: GPL-2.0
2 
3 #include <linux/bpf.h>
4 #include <linux/filter.h>
5 #include <net/net_namespace.h>
6 
7 /*
8  * Functions to manage BPF programs attached to netns
9  */
10 
11 struct bpf_netns_link {
12 	struct bpf_link	link;
13 	enum bpf_attach_type type;
14 	enum netns_bpf_attach_type netns_type;
15 
16 	/* We don't hold a ref to net in order to auto-detach the link
17 	 * when netns is going away. Instead we rely on pernet
18 	 * pre_exit callback to clear this pointer. Must be accessed
19 	 * with netns_bpf_mutex held.
20 	 */
21 	struct net *net;
22 	struct list_head node; /* node in list of links attached to net */
23 };
24 
25 /* Protects updates to netns_bpf */
26 DEFINE_MUTEX(netns_bpf_mutex);
27 
28 static void netns_bpf_attach_type_unneed(enum netns_bpf_attach_type type)
29 {
30 	switch (type) {
31 #ifdef CONFIG_INET
32 	case NETNS_BPF_SK_LOOKUP:
33 		static_branch_dec(&bpf_sk_lookup_enabled);
34 		break;
35 #endif
36 	default:
37 		break;
38 	}
39 }
40 
41 static void netns_bpf_attach_type_need(enum netns_bpf_attach_type type)
42 {
43 	switch (type) {
44 #ifdef CONFIG_INET
45 	case NETNS_BPF_SK_LOOKUP:
46 		static_branch_inc(&bpf_sk_lookup_enabled);
47 		break;
48 #endif
49 	default:
50 		break;
51 	}
52 }
53 
54 /* Must be called with netns_bpf_mutex held. */
55 static void netns_bpf_run_array_detach(struct net *net,
56 				       enum netns_bpf_attach_type type)
57 {
58 	struct bpf_prog_array *run_array;
59 
60 	run_array = rcu_replace_pointer(net->bpf.run_array[type], NULL,
61 					lockdep_is_held(&netns_bpf_mutex));
62 	bpf_prog_array_free(run_array);
63 }
64 
65 static int link_index(struct net *net, enum netns_bpf_attach_type type,
66 		      struct bpf_netns_link *link)
67 {
68 	struct bpf_netns_link *pos;
69 	int i = 0;
70 
71 	list_for_each_entry(pos, &net->bpf.links[type], node) {
72 		if (pos == link)
73 			return i;
74 		i++;
75 	}
76 	return -ENOENT;
77 }
78 
79 static int link_count(struct net *net, enum netns_bpf_attach_type type)
80 {
81 	struct list_head *pos;
82 	int i = 0;
83 
84 	list_for_each(pos, &net->bpf.links[type])
85 		i++;
86 	return i;
87 }
88 
89 static void fill_prog_array(struct net *net, enum netns_bpf_attach_type type,
90 			    struct bpf_prog_array *prog_array)
91 {
92 	struct bpf_netns_link *pos;
93 	unsigned int i = 0;
94 
95 	list_for_each_entry(pos, &net->bpf.links[type], node) {
96 		prog_array->items[i].prog = pos->link.prog;
97 		i++;
98 	}
99 }
100 
101 static void bpf_netns_link_release(struct bpf_link *link)
102 {
103 	struct bpf_netns_link *net_link =
104 		container_of(link, struct bpf_netns_link, link);
105 	enum netns_bpf_attach_type type = net_link->netns_type;
106 	struct bpf_prog_array *old_array, *new_array;
107 	struct net *net;
108 	int cnt, idx;
109 
110 	mutex_lock(&netns_bpf_mutex);
111 
112 	/* We can race with cleanup_net, but if we see a non-NULL
113 	 * struct net pointer, pre_exit has not run yet and wait for
114 	 * netns_bpf_mutex.
115 	 */
116 	net = net_link->net;
117 	if (!net)
118 		goto out_unlock;
119 
120 	/* Mark attach point as unused */
121 	netns_bpf_attach_type_unneed(type);
122 
123 	/* Remember link position in case of safe delete */
124 	idx = link_index(net, type, net_link);
125 	list_del(&net_link->node);
126 
127 	cnt = link_count(net, type);
128 	if (!cnt) {
129 		netns_bpf_run_array_detach(net, type);
130 		goto out_unlock;
131 	}
132 
133 	old_array = rcu_dereference_protected(net->bpf.run_array[type],
134 					      lockdep_is_held(&netns_bpf_mutex));
135 	new_array = bpf_prog_array_alloc(cnt, GFP_KERNEL);
136 	if (!new_array) {
137 		WARN_ON(bpf_prog_array_delete_safe_at(old_array, idx));
138 		goto out_unlock;
139 	}
140 	fill_prog_array(net, type, new_array);
141 	rcu_assign_pointer(net->bpf.run_array[type], new_array);
142 	bpf_prog_array_free(old_array);
143 
144 out_unlock:
145 	mutex_unlock(&netns_bpf_mutex);
146 }
147 
148 static void bpf_netns_link_dealloc(struct bpf_link *link)
149 {
150 	struct bpf_netns_link *net_link =
151 		container_of(link, struct bpf_netns_link, link);
152 
153 	kfree(net_link);
154 }
155 
156 static int bpf_netns_link_update_prog(struct bpf_link *link,
157 				      struct bpf_prog *new_prog,
158 				      struct bpf_prog *old_prog)
159 {
160 	struct bpf_netns_link *net_link =
161 		container_of(link, struct bpf_netns_link, link);
162 	enum netns_bpf_attach_type type = net_link->netns_type;
163 	struct bpf_prog_array *run_array;
164 	struct net *net;
165 	int idx, ret;
166 
167 	if (old_prog && old_prog != link->prog)
168 		return -EPERM;
169 	if (new_prog->type != link->prog->type)
170 		return -EINVAL;
171 
172 	mutex_lock(&netns_bpf_mutex);
173 
174 	net = net_link->net;
175 	if (!net || !check_net(net)) {
176 		/* Link auto-detached or netns dying */
177 		ret = -ENOLINK;
178 		goto out_unlock;
179 	}
180 
181 	run_array = rcu_dereference_protected(net->bpf.run_array[type],
182 					      lockdep_is_held(&netns_bpf_mutex));
183 	idx = link_index(net, type, net_link);
184 	ret = bpf_prog_array_update_at(run_array, idx, new_prog);
185 	if (ret)
186 		goto out_unlock;
187 
188 	old_prog = xchg(&link->prog, new_prog);
189 	bpf_prog_put(old_prog);
190 
191 out_unlock:
192 	mutex_unlock(&netns_bpf_mutex);
193 	return ret;
194 }
195 
196 static int bpf_netns_link_fill_info(const struct bpf_link *link,
197 				    struct bpf_link_info *info)
198 {
199 	const struct bpf_netns_link *net_link =
200 		container_of(link, struct bpf_netns_link, link);
201 	unsigned int inum = 0;
202 	struct net *net;
203 
204 	mutex_lock(&netns_bpf_mutex);
205 	net = net_link->net;
206 	if (net && check_net(net))
207 		inum = net->ns.inum;
208 	mutex_unlock(&netns_bpf_mutex);
209 
210 	info->netns.netns_ino = inum;
211 	info->netns.attach_type = net_link->type;
212 	return 0;
213 }
214 
215 static void bpf_netns_link_show_fdinfo(const struct bpf_link *link,
216 				       struct seq_file *seq)
217 {
218 	struct bpf_link_info info = {};
219 
220 	bpf_netns_link_fill_info(link, &info);
221 	seq_printf(seq,
222 		   "netns_ino:\t%u\n"
223 		   "attach_type:\t%u\n",
224 		   info.netns.netns_ino,
225 		   info.netns.attach_type);
226 }
227 
228 static const struct bpf_link_ops bpf_netns_link_ops = {
229 	.release = bpf_netns_link_release,
230 	.dealloc = bpf_netns_link_dealloc,
231 	.update_prog = bpf_netns_link_update_prog,
232 	.fill_link_info = bpf_netns_link_fill_info,
233 	.show_fdinfo = bpf_netns_link_show_fdinfo,
234 };
235 
236 /* Must be called with netns_bpf_mutex held. */
237 static int __netns_bpf_prog_query(const union bpf_attr *attr,
238 				  union bpf_attr __user *uattr,
239 				  struct net *net,
240 				  enum netns_bpf_attach_type type)
241 {
242 	__u32 __user *prog_ids = u64_to_user_ptr(attr->query.prog_ids);
243 	struct bpf_prog_array *run_array;
244 	u32 prog_cnt = 0, flags = 0;
245 
246 	run_array = rcu_dereference_protected(net->bpf.run_array[type],
247 					      lockdep_is_held(&netns_bpf_mutex));
248 	if (run_array)
249 		prog_cnt = bpf_prog_array_length(run_array);
250 
251 	if (copy_to_user(&uattr->query.attach_flags, &flags, sizeof(flags)))
252 		return -EFAULT;
253 	if (copy_to_user(&uattr->query.prog_cnt, &prog_cnt, sizeof(prog_cnt)))
254 		return -EFAULT;
255 	if (!attr->query.prog_cnt || !prog_ids || !prog_cnt)
256 		return 0;
257 
258 	return bpf_prog_array_copy_to_user(run_array, prog_ids,
259 					   attr->query.prog_cnt);
260 }
261 
262 int netns_bpf_prog_query(const union bpf_attr *attr,
263 			 union bpf_attr __user *uattr)
264 {
265 	enum netns_bpf_attach_type type;
266 	struct net *net;
267 	int ret;
268 
269 	if (attr->query.query_flags)
270 		return -EINVAL;
271 
272 	type = to_netns_bpf_attach_type(attr->query.attach_type);
273 	if (type < 0)
274 		return -EINVAL;
275 
276 	net = get_net_ns_by_fd(attr->query.target_fd);
277 	if (IS_ERR(net))
278 		return PTR_ERR(net);
279 
280 	mutex_lock(&netns_bpf_mutex);
281 	ret = __netns_bpf_prog_query(attr, uattr, net, type);
282 	mutex_unlock(&netns_bpf_mutex);
283 
284 	put_net(net);
285 	return ret;
286 }
287 
288 int netns_bpf_prog_attach(const union bpf_attr *attr, struct bpf_prog *prog)
289 {
290 	struct bpf_prog_array *run_array;
291 	enum netns_bpf_attach_type type;
292 	struct bpf_prog *attached;
293 	struct net *net;
294 	int ret;
295 
296 	if (attr->target_fd || attr->attach_flags || attr->replace_bpf_fd)
297 		return -EINVAL;
298 
299 	type = to_netns_bpf_attach_type(attr->attach_type);
300 	if (type < 0)
301 		return -EINVAL;
302 
303 	net = current->nsproxy->net_ns;
304 	mutex_lock(&netns_bpf_mutex);
305 
306 	/* Attaching prog directly is not compatible with links */
307 	if (!list_empty(&net->bpf.links[type])) {
308 		ret = -EEXIST;
309 		goto out_unlock;
310 	}
311 
312 	switch (type) {
313 	case NETNS_BPF_FLOW_DISSECTOR:
314 		ret = flow_dissector_bpf_prog_attach_check(net, prog);
315 		break;
316 	default:
317 		ret = -EINVAL;
318 		break;
319 	}
320 	if (ret)
321 		goto out_unlock;
322 
323 	attached = net->bpf.progs[type];
324 	if (attached == prog) {
325 		/* The same program cannot be attached twice */
326 		ret = -EINVAL;
327 		goto out_unlock;
328 	}
329 
330 	run_array = rcu_dereference_protected(net->bpf.run_array[type],
331 					      lockdep_is_held(&netns_bpf_mutex));
332 	if (run_array) {
333 		WRITE_ONCE(run_array->items[0].prog, prog);
334 	} else {
335 		run_array = bpf_prog_array_alloc(1, GFP_KERNEL);
336 		if (!run_array) {
337 			ret = -ENOMEM;
338 			goto out_unlock;
339 		}
340 		run_array->items[0].prog = prog;
341 		rcu_assign_pointer(net->bpf.run_array[type], run_array);
342 	}
343 
344 	net->bpf.progs[type] = prog;
345 	if (attached)
346 		bpf_prog_put(attached);
347 
348 out_unlock:
349 	mutex_unlock(&netns_bpf_mutex);
350 
351 	return ret;
352 }
353 
354 /* Must be called with netns_bpf_mutex held. */
355 static int __netns_bpf_prog_detach(struct net *net,
356 				   enum netns_bpf_attach_type type,
357 				   struct bpf_prog *old)
358 {
359 	struct bpf_prog *attached;
360 
361 	/* Progs attached via links cannot be detached */
362 	if (!list_empty(&net->bpf.links[type]))
363 		return -EINVAL;
364 
365 	attached = net->bpf.progs[type];
366 	if (!attached || attached != old)
367 		return -ENOENT;
368 	netns_bpf_run_array_detach(net, type);
369 	net->bpf.progs[type] = NULL;
370 	bpf_prog_put(attached);
371 	return 0;
372 }
373 
374 int netns_bpf_prog_detach(const union bpf_attr *attr, enum bpf_prog_type ptype)
375 {
376 	enum netns_bpf_attach_type type;
377 	struct bpf_prog *prog;
378 	int ret;
379 
380 	if (attr->target_fd)
381 		return -EINVAL;
382 
383 	type = to_netns_bpf_attach_type(attr->attach_type);
384 	if (type < 0)
385 		return -EINVAL;
386 
387 	prog = bpf_prog_get_type(attr->attach_bpf_fd, ptype);
388 	if (IS_ERR(prog))
389 		return PTR_ERR(prog);
390 
391 	mutex_lock(&netns_bpf_mutex);
392 	ret = __netns_bpf_prog_detach(current->nsproxy->net_ns, type, prog);
393 	mutex_unlock(&netns_bpf_mutex);
394 
395 	bpf_prog_put(prog);
396 
397 	return ret;
398 }
399 
400 static int netns_bpf_max_progs(enum netns_bpf_attach_type type)
401 {
402 	switch (type) {
403 	case NETNS_BPF_FLOW_DISSECTOR:
404 		return 1;
405 	case NETNS_BPF_SK_LOOKUP:
406 		return 64;
407 	default:
408 		return 0;
409 	}
410 }
411 
412 static int netns_bpf_link_attach(struct net *net, struct bpf_link *link,
413 				 enum netns_bpf_attach_type type)
414 {
415 	struct bpf_netns_link *net_link =
416 		container_of(link, struct bpf_netns_link, link);
417 	struct bpf_prog_array *run_array;
418 	int cnt, err;
419 
420 	mutex_lock(&netns_bpf_mutex);
421 
422 	cnt = link_count(net, type);
423 	if (cnt >= netns_bpf_max_progs(type)) {
424 		err = -E2BIG;
425 		goto out_unlock;
426 	}
427 	/* Links are not compatible with attaching prog directly */
428 	if (net->bpf.progs[type]) {
429 		err = -EEXIST;
430 		goto out_unlock;
431 	}
432 
433 	switch (type) {
434 	case NETNS_BPF_FLOW_DISSECTOR:
435 		err = flow_dissector_bpf_prog_attach_check(net, link->prog);
436 		break;
437 	case NETNS_BPF_SK_LOOKUP:
438 		err = 0; /* nothing to check */
439 		break;
440 	default:
441 		err = -EINVAL;
442 		break;
443 	}
444 	if (err)
445 		goto out_unlock;
446 
447 	run_array = bpf_prog_array_alloc(cnt + 1, GFP_KERNEL);
448 	if (!run_array) {
449 		err = -ENOMEM;
450 		goto out_unlock;
451 	}
452 
453 	list_add_tail(&net_link->node, &net->bpf.links[type]);
454 
455 	fill_prog_array(net, type, run_array);
456 	run_array = rcu_replace_pointer(net->bpf.run_array[type], run_array,
457 					lockdep_is_held(&netns_bpf_mutex));
458 	bpf_prog_array_free(run_array);
459 
460 	/* Mark attach point as used */
461 	netns_bpf_attach_type_need(type);
462 
463 out_unlock:
464 	mutex_unlock(&netns_bpf_mutex);
465 	return err;
466 }
467 
468 int netns_bpf_link_create(const union bpf_attr *attr, struct bpf_prog *prog)
469 {
470 	enum netns_bpf_attach_type netns_type;
471 	struct bpf_link_primer link_primer;
472 	struct bpf_netns_link *net_link;
473 	enum bpf_attach_type type;
474 	struct net *net;
475 	int err;
476 
477 	if (attr->link_create.flags)
478 		return -EINVAL;
479 
480 	type = attr->link_create.attach_type;
481 	netns_type = to_netns_bpf_attach_type(type);
482 	if (netns_type < 0)
483 		return -EINVAL;
484 
485 	net = get_net_ns_by_fd(attr->link_create.target_fd);
486 	if (IS_ERR(net))
487 		return PTR_ERR(net);
488 
489 	net_link = kzalloc(sizeof(*net_link), GFP_USER);
490 	if (!net_link) {
491 		err = -ENOMEM;
492 		goto out_put_net;
493 	}
494 	bpf_link_init(&net_link->link, BPF_LINK_TYPE_NETNS,
495 		      &bpf_netns_link_ops, prog);
496 	net_link->net = net;
497 	net_link->type = type;
498 	net_link->netns_type = netns_type;
499 
500 	err = bpf_link_prime(&net_link->link, &link_primer);
501 	if (err) {
502 		kfree(net_link);
503 		goto out_put_net;
504 	}
505 
506 	err = netns_bpf_link_attach(net, &net_link->link, netns_type);
507 	if (err) {
508 		bpf_link_cleanup(&link_primer);
509 		goto out_put_net;
510 	}
511 
512 	put_net(net);
513 	return bpf_link_settle(&link_primer);
514 
515 out_put_net:
516 	put_net(net);
517 	return err;
518 }
519 
520 static int __net_init netns_bpf_pernet_init(struct net *net)
521 {
522 	int type;
523 
524 	for (type = 0; type < MAX_NETNS_BPF_ATTACH_TYPE; type++)
525 		INIT_LIST_HEAD(&net->bpf.links[type]);
526 
527 	return 0;
528 }
529 
530 static void __net_exit netns_bpf_pernet_pre_exit(struct net *net)
531 {
532 	enum netns_bpf_attach_type type;
533 	struct bpf_netns_link *net_link;
534 
535 	mutex_lock(&netns_bpf_mutex);
536 	for (type = 0; type < MAX_NETNS_BPF_ATTACH_TYPE; type++) {
537 		netns_bpf_run_array_detach(net, type);
538 		list_for_each_entry(net_link, &net->bpf.links[type], node) {
539 			net_link->net = NULL; /* auto-detach link */
540 			netns_bpf_attach_type_unneed(type);
541 		}
542 		if (net->bpf.progs[type])
543 			bpf_prog_put(net->bpf.progs[type]);
544 	}
545 	mutex_unlock(&netns_bpf_mutex);
546 }
547 
548 static struct pernet_operations netns_bpf_pernet_ops __net_initdata = {
549 	.init = netns_bpf_pernet_init,
550 	.pre_exit = netns_bpf_pernet_pre_exit,
551 };
552 
553 static int __init netns_bpf_init(void)
554 {
555 	return register_pernet_subsys(&netns_bpf_pernet_ops);
556 }
557 
558 subsys_initcall(netns_bpf_init);
559