xref: /openbmc/linux/kernel/bpf/cgroup.c (revision 7b146ceb)
1 /*
2  * Functions to manage eBPF programs attached to cgroups
3  *
4  * Copyright (c) 2016 Daniel Mack
5  *
6  * This file is subject to the terms and conditions of version 2 of the GNU
7  * General Public License.  See the file COPYING in the main directory of the
8  * Linux distribution for more details.
9  */
10 
11 #include <linux/kernel.h>
12 #include <linux/atomic.h>
13 #include <linux/cgroup.h>
14 #include <linux/filter.h>
15 #include <linux/slab.h>
16 #include <linux/sysctl.h>
17 #include <linux/bpf.h>
18 #include <linux/bpf-cgroup.h>
19 #include <net/sock.h>
20 
21 DEFINE_STATIC_KEY_FALSE(cgroup_bpf_enabled_key);
22 EXPORT_SYMBOL(cgroup_bpf_enabled_key);
23 
24 /**
25  * cgroup_bpf_put() - put references of all bpf programs
26  * @cgrp: the cgroup to modify
27  */
28 void cgroup_bpf_put(struct cgroup *cgrp)
29 {
30 	enum bpf_cgroup_storage_type stype;
31 	unsigned int type;
32 
33 	for (type = 0; type < ARRAY_SIZE(cgrp->bpf.progs); type++) {
34 		struct list_head *progs = &cgrp->bpf.progs[type];
35 		struct bpf_prog_list *pl, *tmp;
36 
37 		list_for_each_entry_safe(pl, tmp, progs, node) {
38 			list_del(&pl->node);
39 			bpf_prog_put(pl->prog);
40 			for_each_cgroup_storage_type(stype) {
41 				bpf_cgroup_storage_unlink(pl->storage[stype]);
42 				bpf_cgroup_storage_free(pl->storage[stype]);
43 			}
44 			kfree(pl);
45 			static_branch_dec(&cgroup_bpf_enabled_key);
46 		}
47 		bpf_prog_array_free(cgrp->bpf.effective[type]);
48 	}
49 }
50 
51 /* count number of elements in the list.
52  * it's slow but the list cannot be long
53  */
54 static u32 prog_list_length(struct list_head *head)
55 {
56 	struct bpf_prog_list *pl;
57 	u32 cnt = 0;
58 
59 	list_for_each_entry(pl, head, node) {
60 		if (!pl->prog)
61 			continue;
62 		cnt++;
63 	}
64 	return cnt;
65 }
66 
67 /* if parent has non-overridable prog attached,
68  * disallow attaching new programs to the descendent cgroup.
69  * if parent has overridable or multi-prog, allow attaching
70  */
71 static bool hierarchy_allows_attach(struct cgroup *cgrp,
72 				    enum bpf_attach_type type,
73 				    u32 new_flags)
74 {
75 	struct cgroup *p;
76 
77 	p = cgroup_parent(cgrp);
78 	if (!p)
79 		return true;
80 	do {
81 		u32 flags = p->bpf.flags[type];
82 		u32 cnt;
83 
84 		if (flags & BPF_F_ALLOW_MULTI)
85 			return true;
86 		cnt = prog_list_length(&p->bpf.progs[type]);
87 		WARN_ON_ONCE(cnt > 1);
88 		if (cnt == 1)
89 			return !!(flags & BPF_F_ALLOW_OVERRIDE);
90 		p = cgroup_parent(p);
91 	} while (p);
92 	return true;
93 }
94 
95 /* compute a chain of effective programs for a given cgroup:
96  * start from the list of programs in this cgroup and add
97  * all parent programs.
98  * Note that parent's F_ALLOW_OVERRIDE-type program is yielding
99  * to programs in this cgroup
100  */
101 static int compute_effective_progs(struct cgroup *cgrp,
102 				   enum bpf_attach_type type,
103 				   struct bpf_prog_array __rcu **array)
104 {
105 	enum bpf_cgroup_storage_type stype;
106 	struct bpf_prog_array *progs;
107 	struct bpf_prog_list *pl;
108 	struct cgroup *p = cgrp;
109 	int cnt = 0;
110 
111 	/* count number of effective programs by walking parents */
112 	do {
113 		if (cnt == 0 || (p->bpf.flags[type] & BPF_F_ALLOW_MULTI))
114 			cnt += prog_list_length(&p->bpf.progs[type]);
115 		p = cgroup_parent(p);
116 	} while (p);
117 
118 	progs = bpf_prog_array_alloc(cnt, GFP_KERNEL);
119 	if (!progs)
120 		return -ENOMEM;
121 
122 	/* populate the array with effective progs */
123 	cnt = 0;
124 	p = cgrp;
125 	do {
126 		if (cnt > 0 && !(p->bpf.flags[type] & BPF_F_ALLOW_MULTI))
127 			continue;
128 
129 		list_for_each_entry(pl, &p->bpf.progs[type], node) {
130 			if (!pl->prog)
131 				continue;
132 
133 			progs->items[cnt].prog = pl->prog;
134 			for_each_cgroup_storage_type(stype)
135 				progs->items[cnt].cgroup_storage[stype] =
136 					pl->storage[stype];
137 			cnt++;
138 		}
139 	} while ((p = cgroup_parent(p)));
140 
141 	rcu_assign_pointer(*array, progs);
142 	return 0;
143 }
144 
145 static void activate_effective_progs(struct cgroup *cgrp,
146 				     enum bpf_attach_type type,
147 				     struct bpf_prog_array __rcu *array)
148 {
149 	struct bpf_prog_array __rcu *old_array;
150 
151 	old_array = xchg(&cgrp->bpf.effective[type], array);
152 	/* free prog array after grace period, since __cgroup_bpf_run_*()
153 	 * might be still walking the array
154 	 */
155 	bpf_prog_array_free(old_array);
156 }
157 
158 /**
159  * cgroup_bpf_inherit() - inherit effective programs from parent
160  * @cgrp: the cgroup to modify
161  */
162 int cgroup_bpf_inherit(struct cgroup *cgrp)
163 {
164 /* has to use marco instead of const int, since compiler thinks
165  * that array below is variable length
166  */
167 #define	NR ARRAY_SIZE(cgrp->bpf.effective)
168 	struct bpf_prog_array __rcu *arrays[NR] = {};
169 	int i;
170 
171 	for (i = 0; i < NR; i++)
172 		INIT_LIST_HEAD(&cgrp->bpf.progs[i]);
173 
174 	for (i = 0; i < NR; i++)
175 		if (compute_effective_progs(cgrp, i, &arrays[i]))
176 			goto cleanup;
177 
178 	for (i = 0; i < NR; i++)
179 		activate_effective_progs(cgrp, i, arrays[i]);
180 
181 	return 0;
182 cleanup:
183 	for (i = 0; i < NR; i++)
184 		bpf_prog_array_free(arrays[i]);
185 	return -ENOMEM;
186 }
187 
188 static int update_effective_progs(struct cgroup *cgrp,
189 				  enum bpf_attach_type type)
190 {
191 	struct cgroup_subsys_state *css;
192 	int err;
193 
194 	/* allocate and recompute effective prog arrays */
195 	css_for_each_descendant_pre(css, &cgrp->self) {
196 		struct cgroup *desc = container_of(css, struct cgroup, self);
197 
198 		err = compute_effective_progs(desc, type, &desc->bpf.inactive);
199 		if (err)
200 			goto cleanup;
201 	}
202 
203 	/* all allocations were successful. Activate all prog arrays */
204 	css_for_each_descendant_pre(css, &cgrp->self) {
205 		struct cgroup *desc = container_of(css, struct cgroup, self);
206 
207 		activate_effective_progs(desc, type, desc->bpf.inactive);
208 		desc->bpf.inactive = NULL;
209 	}
210 
211 	return 0;
212 
213 cleanup:
214 	/* oom while computing effective. Free all computed effective arrays
215 	 * since they were not activated
216 	 */
217 	css_for_each_descendant_pre(css, &cgrp->self) {
218 		struct cgroup *desc = container_of(css, struct cgroup, self);
219 
220 		bpf_prog_array_free(desc->bpf.inactive);
221 		desc->bpf.inactive = NULL;
222 	}
223 
224 	return err;
225 }
226 
227 #define BPF_CGROUP_MAX_PROGS 64
228 
229 /**
230  * __cgroup_bpf_attach() - Attach the program to a cgroup, and
231  *                         propagate the change to descendants
232  * @cgrp: The cgroup which descendants to traverse
233  * @prog: A program to attach
234  * @type: Type of attach operation
235  * @flags: Option flags
236  *
237  * Must be called with cgroup_mutex held.
238  */
239 int __cgroup_bpf_attach(struct cgroup *cgrp, struct bpf_prog *prog,
240 			enum bpf_attach_type type, u32 flags)
241 {
242 	struct list_head *progs = &cgrp->bpf.progs[type];
243 	struct bpf_prog *old_prog = NULL;
244 	struct bpf_cgroup_storage *storage[MAX_BPF_CGROUP_STORAGE_TYPE],
245 		*old_storage[MAX_BPF_CGROUP_STORAGE_TYPE] = {NULL};
246 	enum bpf_cgroup_storage_type stype;
247 	struct bpf_prog_list *pl;
248 	bool pl_was_allocated;
249 	int err;
250 
251 	if ((flags & BPF_F_ALLOW_OVERRIDE) && (flags & BPF_F_ALLOW_MULTI))
252 		/* invalid combination */
253 		return -EINVAL;
254 
255 	if (!hierarchy_allows_attach(cgrp, type, flags))
256 		return -EPERM;
257 
258 	if (!list_empty(progs) && cgrp->bpf.flags[type] != flags)
259 		/* Disallow attaching non-overridable on top
260 		 * of existing overridable in this cgroup.
261 		 * Disallow attaching multi-prog if overridable or none
262 		 */
263 		return -EPERM;
264 
265 	if (prog_list_length(progs) >= BPF_CGROUP_MAX_PROGS)
266 		return -E2BIG;
267 
268 	for_each_cgroup_storage_type(stype) {
269 		storage[stype] = bpf_cgroup_storage_alloc(prog, stype);
270 		if (IS_ERR(storage[stype])) {
271 			storage[stype] = NULL;
272 			for_each_cgroup_storage_type(stype)
273 				bpf_cgroup_storage_free(storage[stype]);
274 			return -ENOMEM;
275 		}
276 	}
277 
278 	if (flags & BPF_F_ALLOW_MULTI) {
279 		list_for_each_entry(pl, progs, node) {
280 			if (pl->prog == prog) {
281 				/* disallow attaching the same prog twice */
282 				for_each_cgroup_storage_type(stype)
283 					bpf_cgroup_storage_free(storage[stype]);
284 				return -EINVAL;
285 			}
286 		}
287 
288 		pl = kmalloc(sizeof(*pl), GFP_KERNEL);
289 		if (!pl) {
290 			for_each_cgroup_storage_type(stype)
291 				bpf_cgroup_storage_free(storage[stype]);
292 			return -ENOMEM;
293 		}
294 
295 		pl_was_allocated = true;
296 		pl->prog = prog;
297 		for_each_cgroup_storage_type(stype)
298 			pl->storage[stype] = storage[stype];
299 		list_add_tail(&pl->node, progs);
300 	} else {
301 		if (list_empty(progs)) {
302 			pl = kmalloc(sizeof(*pl), GFP_KERNEL);
303 			if (!pl) {
304 				for_each_cgroup_storage_type(stype)
305 					bpf_cgroup_storage_free(storage[stype]);
306 				return -ENOMEM;
307 			}
308 			pl_was_allocated = true;
309 			list_add_tail(&pl->node, progs);
310 		} else {
311 			pl = list_first_entry(progs, typeof(*pl), node);
312 			old_prog = pl->prog;
313 			for_each_cgroup_storage_type(stype) {
314 				old_storage[stype] = pl->storage[stype];
315 				bpf_cgroup_storage_unlink(old_storage[stype]);
316 			}
317 			pl_was_allocated = false;
318 		}
319 		pl->prog = prog;
320 		for_each_cgroup_storage_type(stype)
321 			pl->storage[stype] = storage[stype];
322 	}
323 
324 	cgrp->bpf.flags[type] = flags;
325 
326 	err = update_effective_progs(cgrp, type);
327 	if (err)
328 		goto cleanup;
329 
330 	static_branch_inc(&cgroup_bpf_enabled_key);
331 	for_each_cgroup_storage_type(stype) {
332 		if (!old_storage[stype])
333 			continue;
334 		bpf_cgroup_storage_free(old_storage[stype]);
335 	}
336 	if (old_prog) {
337 		bpf_prog_put(old_prog);
338 		static_branch_dec(&cgroup_bpf_enabled_key);
339 	}
340 	for_each_cgroup_storage_type(stype)
341 		bpf_cgroup_storage_link(storage[stype], cgrp, type);
342 	return 0;
343 
344 cleanup:
345 	/* and cleanup the prog list */
346 	pl->prog = old_prog;
347 	for_each_cgroup_storage_type(stype) {
348 		bpf_cgroup_storage_free(pl->storage[stype]);
349 		pl->storage[stype] = old_storage[stype];
350 		bpf_cgroup_storage_link(old_storage[stype], cgrp, type);
351 	}
352 	if (pl_was_allocated) {
353 		list_del(&pl->node);
354 		kfree(pl);
355 	}
356 	return err;
357 }
358 
359 /**
360  * __cgroup_bpf_detach() - Detach the program from a cgroup, and
361  *                         propagate the change to descendants
362  * @cgrp: The cgroup which descendants to traverse
363  * @prog: A program to detach or NULL
364  * @type: Type of detach operation
365  *
366  * Must be called with cgroup_mutex held.
367  */
368 int __cgroup_bpf_detach(struct cgroup *cgrp, struct bpf_prog *prog,
369 			enum bpf_attach_type type)
370 {
371 	struct list_head *progs = &cgrp->bpf.progs[type];
372 	enum bpf_cgroup_storage_type stype;
373 	u32 flags = cgrp->bpf.flags[type];
374 	struct bpf_prog *old_prog = NULL;
375 	struct bpf_prog_list *pl;
376 	int err;
377 
378 	if (flags & BPF_F_ALLOW_MULTI) {
379 		if (!prog)
380 			/* to detach MULTI prog the user has to specify valid FD
381 			 * of the program to be detached
382 			 */
383 			return -EINVAL;
384 	} else {
385 		if (list_empty(progs))
386 			/* report error when trying to detach and nothing is attached */
387 			return -ENOENT;
388 	}
389 
390 	if (flags & BPF_F_ALLOW_MULTI) {
391 		/* find the prog and detach it */
392 		list_for_each_entry(pl, progs, node) {
393 			if (pl->prog != prog)
394 				continue;
395 			old_prog = prog;
396 			/* mark it deleted, so it's ignored while
397 			 * recomputing effective
398 			 */
399 			pl->prog = NULL;
400 			break;
401 		}
402 		if (!old_prog)
403 			return -ENOENT;
404 	} else {
405 		/* to maintain backward compatibility NONE and OVERRIDE cgroups
406 		 * allow detaching with invalid FD (prog==NULL)
407 		 */
408 		pl = list_first_entry(progs, typeof(*pl), node);
409 		old_prog = pl->prog;
410 		pl->prog = NULL;
411 	}
412 
413 	err = update_effective_progs(cgrp, type);
414 	if (err)
415 		goto cleanup;
416 
417 	/* now can actually delete it from this cgroup list */
418 	list_del(&pl->node);
419 	for_each_cgroup_storage_type(stype) {
420 		bpf_cgroup_storage_unlink(pl->storage[stype]);
421 		bpf_cgroup_storage_free(pl->storage[stype]);
422 	}
423 	kfree(pl);
424 	if (list_empty(progs))
425 		/* last program was detached, reset flags to zero */
426 		cgrp->bpf.flags[type] = 0;
427 
428 	bpf_prog_put(old_prog);
429 	static_branch_dec(&cgroup_bpf_enabled_key);
430 	return 0;
431 
432 cleanup:
433 	/* and restore back old_prog */
434 	pl->prog = old_prog;
435 	return err;
436 }
437 
438 /* Must be called with cgroup_mutex held to avoid races. */
439 int __cgroup_bpf_query(struct cgroup *cgrp, const union bpf_attr *attr,
440 		       union bpf_attr __user *uattr)
441 {
442 	__u32 __user *prog_ids = u64_to_user_ptr(attr->query.prog_ids);
443 	enum bpf_attach_type type = attr->query.attach_type;
444 	struct list_head *progs = &cgrp->bpf.progs[type];
445 	u32 flags = cgrp->bpf.flags[type];
446 	int cnt, ret = 0, i;
447 
448 	if (attr->query.query_flags & BPF_F_QUERY_EFFECTIVE)
449 		cnt = bpf_prog_array_length(cgrp->bpf.effective[type]);
450 	else
451 		cnt = prog_list_length(progs);
452 
453 	if (copy_to_user(&uattr->query.attach_flags, &flags, sizeof(flags)))
454 		return -EFAULT;
455 	if (copy_to_user(&uattr->query.prog_cnt, &cnt, sizeof(cnt)))
456 		return -EFAULT;
457 	if (attr->query.prog_cnt == 0 || !prog_ids || !cnt)
458 		/* return early if user requested only program count + flags */
459 		return 0;
460 	if (attr->query.prog_cnt < cnt) {
461 		cnt = attr->query.prog_cnt;
462 		ret = -ENOSPC;
463 	}
464 
465 	if (attr->query.query_flags & BPF_F_QUERY_EFFECTIVE) {
466 		return bpf_prog_array_copy_to_user(cgrp->bpf.effective[type],
467 						   prog_ids, cnt);
468 	} else {
469 		struct bpf_prog_list *pl;
470 		u32 id;
471 
472 		i = 0;
473 		list_for_each_entry(pl, progs, node) {
474 			id = pl->prog->aux->id;
475 			if (copy_to_user(prog_ids + i, &id, sizeof(id)))
476 				return -EFAULT;
477 			if (++i == cnt)
478 				break;
479 		}
480 	}
481 	return ret;
482 }
483 
484 int cgroup_bpf_prog_attach(const union bpf_attr *attr,
485 			   enum bpf_prog_type ptype, struct bpf_prog *prog)
486 {
487 	struct cgroup *cgrp;
488 	int ret;
489 
490 	cgrp = cgroup_get_from_fd(attr->target_fd);
491 	if (IS_ERR(cgrp))
492 		return PTR_ERR(cgrp);
493 
494 	ret = cgroup_bpf_attach(cgrp, prog, attr->attach_type,
495 				attr->attach_flags);
496 	cgroup_put(cgrp);
497 	return ret;
498 }
499 
500 int cgroup_bpf_prog_detach(const union bpf_attr *attr, enum bpf_prog_type ptype)
501 {
502 	struct bpf_prog *prog;
503 	struct cgroup *cgrp;
504 	int ret;
505 
506 	cgrp = cgroup_get_from_fd(attr->target_fd);
507 	if (IS_ERR(cgrp))
508 		return PTR_ERR(cgrp);
509 
510 	prog = bpf_prog_get_type(attr->attach_bpf_fd, ptype);
511 	if (IS_ERR(prog))
512 		prog = NULL;
513 
514 	ret = cgroup_bpf_detach(cgrp, prog, attr->attach_type, 0);
515 	if (prog)
516 		bpf_prog_put(prog);
517 
518 	cgroup_put(cgrp);
519 	return ret;
520 }
521 
522 int cgroup_bpf_prog_query(const union bpf_attr *attr,
523 			  union bpf_attr __user *uattr)
524 {
525 	struct cgroup *cgrp;
526 	int ret;
527 
528 	cgrp = cgroup_get_from_fd(attr->query.target_fd);
529 	if (IS_ERR(cgrp))
530 		return PTR_ERR(cgrp);
531 
532 	ret = cgroup_bpf_query(cgrp, attr, uattr);
533 
534 	cgroup_put(cgrp);
535 	return ret;
536 }
537 
538 /**
539  * __cgroup_bpf_run_filter_skb() - Run a program for packet filtering
540  * @sk: The socket sending or receiving traffic
541  * @skb: The skb that is being sent or received
542  * @type: The type of program to be exectuted
543  *
544  * If no socket is passed, or the socket is not of type INET or INET6,
545  * this function does nothing and returns 0.
546  *
547  * The program type passed in via @type must be suitable for network
548  * filtering. No further check is performed to assert that.
549  *
550  * This function will return %-EPERM if any if an attached program was found
551  * and if it returned != 1 during execution. In all other cases, 0 is returned.
552  */
553 int __cgroup_bpf_run_filter_skb(struct sock *sk,
554 				struct sk_buff *skb,
555 				enum bpf_attach_type type)
556 {
557 	unsigned int offset = skb->data - skb_network_header(skb);
558 	struct sock *save_sk;
559 	void *saved_data_end;
560 	struct cgroup *cgrp;
561 	int ret;
562 
563 	if (!sk || !sk_fullsock(sk))
564 		return 0;
565 
566 	if (sk->sk_family != AF_INET && sk->sk_family != AF_INET6)
567 		return 0;
568 
569 	cgrp = sock_cgroup_ptr(&sk->sk_cgrp_data);
570 	save_sk = skb->sk;
571 	skb->sk = sk;
572 	__skb_push(skb, offset);
573 
574 	/* compute pointers for the bpf prog */
575 	bpf_compute_and_save_data_end(skb, &saved_data_end);
576 
577 	ret = BPF_PROG_RUN_ARRAY(cgrp->bpf.effective[type], skb,
578 				 __bpf_prog_run_save_cb);
579 	bpf_restore_data_end(skb, saved_data_end);
580 	__skb_pull(skb, offset);
581 	skb->sk = save_sk;
582 	return ret == 1 ? 0 : -EPERM;
583 }
584 EXPORT_SYMBOL(__cgroup_bpf_run_filter_skb);
585 
586 /**
587  * __cgroup_bpf_run_filter_sk() - Run a program on a sock
588  * @sk: sock structure to manipulate
589  * @type: The type of program to be exectuted
590  *
591  * socket is passed is expected to be of type INET or INET6.
592  *
593  * The program type passed in via @type must be suitable for sock
594  * filtering. No further check is performed to assert that.
595  *
596  * This function will return %-EPERM if any if an attached program was found
597  * and if it returned != 1 during execution. In all other cases, 0 is returned.
598  */
599 int __cgroup_bpf_run_filter_sk(struct sock *sk,
600 			       enum bpf_attach_type type)
601 {
602 	struct cgroup *cgrp = sock_cgroup_ptr(&sk->sk_cgrp_data);
603 	int ret;
604 
605 	ret = BPF_PROG_RUN_ARRAY(cgrp->bpf.effective[type], sk, BPF_PROG_RUN);
606 	return ret == 1 ? 0 : -EPERM;
607 }
608 EXPORT_SYMBOL(__cgroup_bpf_run_filter_sk);
609 
610 /**
611  * __cgroup_bpf_run_filter_sock_addr() - Run a program on a sock and
612  *                                       provided by user sockaddr
613  * @sk: sock struct that will use sockaddr
614  * @uaddr: sockaddr struct provided by user
615  * @type: The type of program to be exectuted
616  * @t_ctx: Pointer to attach type specific context
617  *
618  * socket is expected to be of type INET or INET6.
619  *
620  * This function will return %-EPERM if an attached program is found and
621  * returned value != 1 during execution. In all other cases, 0 is returned.
622  */
623 int __cgroup_bpf_run_filter_sock_addr(struct sock *sk,
624 				      struct sockaddr *uaddr,
625 				      enum bpf_attach_type type,
626 				      void *t_ctx)
627 {
628 	struct bpf_sock_addr_kern ctx = {
629 		.sk = sk,
630 		.uaddr = uaddr,
631 		.t_ctx = t_ctx,
632 	};
633 	struct sockaddr_storage unspec;
634 	struct cgroup *cgrp;
635 	int ret;
636 
637 	/* Check socket family since not all sockets represent network
638 	 * endpoint (e.g. AF_UNIX).
639 	 */
640 	if (sk->sk_family != AF_INET && sk->sk_family != AF_INET6)
641 		return 0;
642 
643 	if (!ctx.uaddr) {
644 		memset(&unspec, 0, sizeof(unspec));
645 		ctx.uaddr = (struct sockaddr *)&unspec;
646 	}
647 
648 	cgrp = sock_cgroup_ptr(&sk->sk_cgrp_data);
649 	ret = BPF_PROG_RUN_ARRAY(cgrp->bpf.effective[type], &ctx, BPF_PROG_RUN);
650 
651 	return ret == 1 ? 0 : -EPERM;
652 }
653 EXPORT_SYMBOL(__cgroup_bpf_run_filter_sock_addr);
654 
655 /**
656  * __cgroup_bpf_run_filter_sock_ops() - Run a program on a sock
657  * @sk: socket to get cgroup from
658  * @sock_ops: bpf_sock_ops_kern struct to pass to program. Contains
659  * sk with connection information (IP addresses, etc.) May not contain
660  * cgroup info if it is a req sock.
661  * @type: The type of program to be exectuted
662  *
663  * socket passed is expected to be of type INET or INET6.
664  *
665  * The program type passed in via @type must be suitable for sock_ops
666  * filtering. No further check is performed to assert that.
667  *
668  * This function will return %-EPERM if any if an attached program was found
669  * and if it returned != 1 during execution. In all other cases, 0 is returned.
670  */
671 int __cgroup_bpf_run_filter_sock_ops(struct sock *sk,
672 				     struct bpf_sock_ops_kern *sock_ops,
673 				     enum bpf_attach_type type)
674 {
675 	struct cgroup *cgrp = sock_cgroup_ptr(&sk->sk_cgrp_data);
676 	int ret;
677 
678 	ret = BPF_PROG_RUN_ARRAY(cgrp->bpf.effective[type], sock_ops,
679 				 BPF_PROG_RUN);
680 	return ret == 1 ? 0 : -EPERM;
681 }
682 EXPORT_SYMBOL(__cgroup_bpf_run_filter_sock_ops);
683 
684 int __cgroup_bpf_check_dev_permission(short dev_type, u32 major, u32 minor,
685 				      short access, enum bpf_attach_type type)
686 {
687 	struct cgroup *cgrp;
688 	struct bpf_cgroup_dev_ctx ctx = {
689 		.access_type = (access << 16) | dev_type,
690 		.major = major,
691 		.minor = minor,
692 	};
693 	int allow = 1;
694 
695 	rcu_read_lock();
696 	cgrp = task_dfl_cgroup(current);
697 	allow = BPF_PROG_RUN_ARRAY(cgrp->bpf.effective[type], &ctx,
698 				   BPF_PROG_RUN);
699 	rcu_read_unlock();
700 
701 	return !allow;
702 }
703 EXPORT_SYMBOL(__cgroup_bpf_check_dev_permission);
704 
705 static const struct bpf_func_proto *
706 cgroup_base_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
707 {
708 	switch (func_id) {
709 	case BPF_FUNC_map_lookup_elem:
710 		return &bpf_map_lookup_elem_proto;
711 	case BPF_FUNC_map_update_elem:
712 		return &bpf_map_update_elem_proto;
713 	case BPF_FUNC_map_delete_elem:
714 		return &bpf_map_delete_elem_proto;
715 	case BPF_FUNC_get_current_uid_gid:
716 		return &bpf_get_current_uid_gid_proto;
717 	case BPF_FUNC_get_local_storage:
718 		return &bpf_get_local_storage_proto;
719 	case BPF_FUNC_get_current_cgroup_id:
720 		return &bpf_get_current_cgroup_id_proto;
721 	case BPF_FUNC_trace_printk:
722 		if (capable(CAP_SYS_ADMIN))
723 			return bpf_get_trace_printk_proto();
724 		/* fall through */
725 	default:
726 		return NULL;
727 	}
728 }
729 
730 static const struct bpf_func_proto *
731 cgroup_dev_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
732 {
733 	return cgroup_base_func_proto(func_id, prog);
734 }
735 
736 static bool cgroup_dev_is_valid_access(int off, int size,
737 				       enum bpf_access_type type,
738 				       const struct bpf_prog *prog,
739 				       struct bpf_insn_access_aux *info)
740 {
741 	const int size_default = sizeof(__u32);
742 
743 	if (type == BPF_WRITE)
744 		return false;
745 
746 	if (off < 0 || off + size > sizeof(struct bpf_cgroup_dev_ctx))
747 		return false;
748 	/* The verifier guarantees that size > 0. */
749 	if (off % size != 0)
750 		return false;
751 
752 	switch (off) {
753 	case bpf_ctx_range(struct bpf_cgroup_dev_ctx, access_type):
754 		bpf_ctx_record_field_size(info, size_default);
755 		if (!bpf_ctx_narrow_access_ok(off, size, size_default))
756 			return false;
757 		break;
758 	default:
759 		if (size != size_default)
760 			return false;
761 	}
762 
763 	return true;
764 }
765 
766 const struct bpf_prog_ops cg_dev_prog_ops = {
767 };
768 
769 const struct bpf_verifier_ops cg_dev_verifier_ops = {
770 	.get_func_proto		= cgroup_dev_func_proto,
771 	.is_valid_access	= cgroup_dev_is_valid_access,
772 };
773 
774 /**
775  * __cgroup_bpf_run_filter_sysctl - Run a program on sysctl
776  *
777  * @head: sysctl table header
778  * @table: sysctl table
779  * @write: sysctl is being read (= 0) or written (= 1)
780  * @type: type of program to be executed
781  *
782  * Program is run when sysctl is being accessed, either read or written, and
783  * can allow or deny such access.
784  *
785  * This function will return %-EPERM if an attached program is found and
786  * returned value != 1 during execution. In all other cases 0 is returned.
787  */
788 int __cgroup_bpf_run_filter_sysctl(struct ctl_table_header *head,
789 				   struct ctl_table *table, int write,
790 				   enum bpf_attach_type type)
791 {
792 	struct bpf_sysctl_kern ctx = {
793 		.head = head,
794 		.table = table,
795 		.write = write,
796 	};
797 	struct cgroup *cgrp;
798 	int ret;
799 
800 	rcu_read_lock();
801 	cgrp = task_dfl_cgroup(current);
802 	ret = BPF_PROG_RUN_ARRAY(cgrp->bpf.effective[type], &ctx, BPF_PROG_RUN);
803 	rcu_read_unlock();
804 
805 	return ret == 1 ? 0 : -EPERM;
806 }
807 EXPORT_SYMBOL(__cgroup_bpf_run_filter_sysctl);
808 
809 static const struct bpf_func_proto *
810 sysctl_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
811 {
812 	return cgroup_base_func_proto(func_id, prog);
813 }
814 
815 static bool sysctl_is_valid_access(int off, int size, enum bpf_access_type type,
816 				   const struct bpf_prog *prog,
817 				   struct bpf_insn_access_aux *info)
818 {
819 	const int size_default = sizeof(__u32);
820 
821 	if (off < 0 || off + size > sizeof(struct bpf_sysctl) ||
822 	    off % size || type != BPF_READ)
823 		return false;
824 
825 	switch (off) {
826 	case offsetof(struct bpf_sysctl, write):
827 		bpf_ctx_record_field_size(info, size_default);
828 		return bpf_ctx_narrow_access_ok(off, size, size_default);
829 	default:
830 		return false;
831 	}
832 }
833 
834 static u32 sysctl_convert_ctx_access(enum bpf_access_type type,
835 				     const struct bpf_insn *si,
836 				     struct bpf_insn *insn_buf,
837 				     struct bpf_prog *prog, u32 *target_size)
838 {
839 	struct bpf_insn *insn = insn_buf;
840 
841 	switch (si->off) {
842 	case offsetof(struct bpf_sysctl, write):
843 		*insn++ = BPF_LDX_MEM(
844 			BPF_SIZE(si->code), si->dst_reg, si->src_reg,
845 			bpf_target_off(struct bpf_sysctl_kern, write,
846 				       FIELD_SIZEOF(struct bpf_sysctl_kern,
847 						    write),
848 				       target_size));
849 		break;
850 	}
851 
852 	return insn - insn_buf;
853 }
854 
855 const struct bpf_verifier_ops cg_sysctl_verifier_ops = {
856 	.get_func_proto		= sysctl_func_proto,
857 	.is_valid_access	= sysctl_is_valid_access,
858 	.convert_ctx_access	= sysctl_convert_ctx_access,
859 };
860 
861 const struct bpf_prog_ops cg_sysctl_prog_ops = {
862 };
863