xref: /openbmc/linux/kernel/bpf/cgroup.c (revision 5d0e4d78)
1 /*
2  * Functions to manage eBPF programs attached to cgroups
3  *
4  * Copyright (c) 2016 Daniel Mack
5  *
6  * This file is subject to the terms and conditions of version 2 of the GNU
7  * General Public License.  See the file COPYING in the main directory of the
8  * Linux distribution for more details.
9  */
10 
11 #include <linux/kernel.h>
12 #include <linux/atomic.h>
13 #include <linux/cgroup.h>
14 #include <linux/slab.h>
15 #include <linux/bpf.h>
16 #include <linux/bpf-cgroup.h>
17 #include <net/sock.h>
18 
19 DEFINE_STATIC_KEY_FALSE(cgroup_bpf_enabled_key);
20 EXPORT_SYMBOL(cgroup_bpf_enabled_key);
21 
22 /**
23  * cgroup_bpf_put() - put references of all bpf programs
24  * @cgrp: the cgroup to modify
25  */
26 void cgroup_bpf_put(struct cgroup *cgrp)
27 {
28 	unsigned int type;
29 
30 	for (type = 0; type < ARRAY_SIZE(cgrp->bpf.prog); type++) {
31 		struct bpf_prog *prog = cgrp->bpf.prog[type];
32 
33 		if (prog) {
34 			bpf_prog_put(prog);
35 			static_branch_dec(&cgroup_bpf_enabled_key);
36 		}
37 	}
38 }
39 
40 /**
41  * cgroup_bpf_inherit() - inherit effective programs from parent
42  * @cgrp: the cgroup to modify
43  * @parent: the parent to inherit from
44  */
45 void cgroup_bpf_inherit(struct cgroup *cgrp, struct cgroup *parent)
46 {
47 	unsigned int type;
48 
49 	for (type = 0; type < ARRAY_SIZE(cgrp->bpf.effective); type++) {
50 		struct bpf_prog *e;
51 
52 		e = rcu_dereference_protected(parent->bpf.effective[type],
53 					      lockdep_is_held(&cgroup_mutex));
54 		rcu_assign_pointer(cgrp->bpf.effective[type], e);
55 		cgrp->bpf.disallow_override[type] = parent->bpf.disallow_override[type];
56 	}
57 }
58 
59 /**
60  * __cgroup_bpf_update() - Update the pinned program of a cgroup, and
61  *                         propagate the change to descendants
62  * @cgrp: The cgroup which descendants to traverse
63  * @parent: The parent of @cgrp, or %NULL if @cgrp is the root
64  * @prog: A new program to pin
65  * @type: Type of pinning operation (ingress/egress)
66  *
67  * Each cgroup has a set of two pointers for bpf programs; one for eBPF
68  * programs it owns, and which is effective for execution.
69  *
70  * If @prog is not %NULL, this function attaches a new program to the cgroup
71  * and releases the one that is currently attached, if any. @prog is then made
72  * the effective program of type @type in that cgroup.
73  *
74  * If @prog is %NULL, the currently attached program of type @type is released,
75  * and the effective program of the parent cgroup (if any) is inherited to
76  * @cgrp.
77  *
78  * Then, the descendants of @cgrp are walked and the effective program for
79  * each of them is set to the effective program of @cgrp unless the
80  * descendant has its own program attached, in which case the subbranch is
81  * skipped. This ensures that delegated subcgroups with own programs are left
82  * untouched.
83  *
84  * Must be called with cgroup_mutex held.
85  */
86 int __cgroup_bpf_update(struct cgroup *cgrp, struct cgroup *parent,
87 			struct bpf_prog *prog, enum bpf_attach_type type,
88 			bool new_overridable)
89 {
90 	struct bpf_prog *old_prog, *effective = NULL;
91 	struct cgroup_subsys_state *pos;
92 	bool overridable = true;
93 
94 	if (parent) {
95 		overridable = !parent->bpf.disallow_override[type];
96 		effective = rcu_dereference_protected(parent->bpf.effective[type],
97 						      lockdep_is_held(&cgroup_mutex));
98 	}
99 
100 	if (prog && effective && !overridable)
101 		/* if parent has non-overridable prog attached, disallow
102 		 * attaching new programs to descendent cgroup
103 		 */
104 		return -EPERM;
105 
106 	if (prog && effective && overridable != new_overridable)
107 		/* if parent has overridable prog attached, only
108 		 * allow overridable programs in descendent cgroup
109 		 */
110 		return -EPERM;
111 
112 	old_prog = cgrp->bpf.prog[type];
113 
114 	if (prog) {
115 		overridable = new_overridable;
116 		effective = prog;
117 		if (old_prog &&
118 		    cgrp->bpf.disallow_override[type] == new_overridable)
119 			/* disallow attaching non-overridable on top
120 			 * of existing overridable in this cgroup
121 			 * and vice versa
122 			 */
123 			return -EPERM;
124 	}
125 
126 	if (!prog && !old_prog)
127 		/* report error when trying to detach and nothing is attached */
128 		return -ENOENT;
129 
130 	cgrp->bpf.prog[type] = prog;
131 
132 	css_for_each_descendant_pre(pos, &cgrp->self) {
133 		struct cgroup *desc = container_of(pos, struct cgroup, self);
134 
135 		/* skip the subtree if the descendant has its own program */
136 		if (desc->bpf.prog[type] && desc != cgrp) {
137 			pos = css_rightmost_descendant(pos);
138 		} else {
139 			rcu_assign_pointer(desc->bpf.effective[type],
140 					   effective);
141 			desc->bpf.disallow_override[type] = !overridable;
142 		}
143 	}
144 
145 	if (prog)
146 		static_branch_inc(&cgroup_bpf_enabled_key);
147 
148 	if (old_prog) {
149 		bpf_prog_put(old_prog);
150 		static_branch_dec(&cgroup_bpf_enabled_key);
151 	}
152 	return 0;
153 }
154 
155 /**
156  * __cgroup_bpf_run_filter_skb() - Run a program for packet filtering
157  * @sk: The socket sending or receiving traffic
158  * @skb: The skb that is being sent or received
159  * @type: The type of program to be exectuted
160  *
161  * If no socket is passed, or the socket is not of type INET or INET6,
162  * this function does nothing and returns 0.
163  *
164  * The program type passed in via @type must be suitable for network
165  * filtering. No further check is performed to assert that.
166  *
167  * This function will return %-EPERM if any if an attached program was found
168  * and if it returned != 1 during execution. In all other cases, 0 is returned.
169  */
170 int __cgroup_bpf_run_filter_skb(struct sock *sk,
171 				struct sk_buff *skb,
172 				enum bpf_attach_type type)
173 {
174 	struct bpf_prog *prog;
175 	struct cgroup *cgrp;
176 	int ret = 0;
177 
178 	if (!sk || !sk_fullsock(sk))
179 		return 0;
180 
181 	if (sk->sk_family != AF_INET &&
182 	    sk->sk_family != AF_INET6)
183 		return 0;
184 
185 	cgrp = sock_cgroup_ptr(&sk->sk_cgrp_data);
186 
187 	rcu_read_lock();
188 
189 	prog = rcu_dereference(cgrp->bpf.effective[type]);
190 	if (prog) {
191 		unsigned int offset = skb->data - skb_network_header(skb);
192 		struct sock *save_sk = skb->sk;
193 
194 		skb->sk = sk;
195 		__skb_push(skb, offset);
196 		ret = bpf_prog_run_save_cb(prog, skb) == 1 ? 0 : -EPERM;
197 		__skb_pull(skb, offset);
198 		skb->sk = save_sk;
199 	}
200 
201 	rcu_read_unlock();
202 
203 	return ret;
204 }
205 EXPORT_SYMBOL(__cgroup_bpf_run_filter_skb);
206 
207 /**
208  * __cgroup_bpf_run_filter_sk() - Run a program on a sock
209  * @sk: sock structure to manipulate
210  * @type: The type of program to be exectuted
211  *
212  * socket is passed is expected to be of type INET or INET6.
213  *
214  * The program type passed in via @type must be suitable for sock
215  * filtering. No further check is performed to assert that.
216  *
217  * This function will return %-EPERM if any if an attached program was found
218  * and if it returned != 1 during execution. In all other cases, 0 is returned.
219  */
220 int __cgroup_bpf_run_filter_sk(struct sock *sk,
221 			       enum bpf_attach_type type)
222 {
223 	struct cgroup *cgrp = sock_cgroup_ptr(&sk->sk_cgrp_data);
224 	struct bpf_prog *prog;
225 	int ret = 0;
226 
227 
228 	rcu_read_lock();
229 
230 	prog = rcu_dereference(cgrp->bpf.effective[type]);
231 	if (prog)
232 		ret = BPF_PROG_RUN(prog, sk) == 1 ? 0 : -EPERM;
233 
234 	rcu_read_unlock();
235 
236 	return ret;
237 }
238 EXPORT_SYMBOL(__cgroup_bpf_run_filter_sk);
239 
240 /**
241  * __cgroup_bpf_run_filter_sock_ops() - Run a program on a sock
242  * @sk: socket to get cgroup from
243  * @sock_ops: bpf_sock_ops_kern struct to pass to program. Contains
244  * sk with connection information (IP addresses, etc.) May not contain
245  * cgroup info if it is a req sock.
246  * @type: The type of program to be exectuted
247  *
248  * socket passed is expected to be of type INET or INET6.
249  *
250  * The program type passed in via @type must be suitable for sock_ops
251  * filtering. No further check is performed to assert that.
252  *
253  * This function will return %-EPERM if any if an attached program was found
254  * and if it returned != 1 during execution. In all other cases, 0 is returned.
255  */
256 int __cgroup_bpf_run_filter_sock_ops(struct sock *sk,
257 				     struct bpf_sock_ops_kern *sock_ops,
258 				     enum bpf_attach_type type)
259 {
260 	struct cgroup *cgrp = sock_cgroup_ptr(&sk->sk_cgrp_data);
261 	struct bpf_prog *prog;
262 	int ret = 0;
263 
264 
265 	rcu_read_lock();
266 
267 	prog = rcu_dereference(cgrp->bpf.effective[type]);
268 	if (prog)
269 		ret = BPF_PROG_RUN(prog, sock_ops) == 1 ? 0 : -EPERM;
270 
271 	rcu_read_unlock();
272 
273 	return ret;
274 }
275 EXPORT_SYMBOL(__cgroup_bpf_run_filter_sock_ops);
276