1 /* 2 * Functions to manage eBPF programs attached to cgroups 3 * 4 * Copyright (c) 2016 Daniel Mack 5 * 6 * This file is subject to the terms and conditions of version 2 of the GNU 7 * General Public License. See the file COPYING in the main directory of the 8 * Linux distribution for more details. 9 */ 10 11 #include <linux/kernel.h> 12 #include <linux/atomic.h> 13 #include <linux/cgroup.h> 14 #include <linux/slab.h> 15 #include <linux/bpf.h> 16 #include <linux/bpf-cgroup.h> 17 #include <net/sock.h> 18 19 DEFINE_STATIC_KEY_FALSE(cgroup_bpf_enabled_key); 20 EXPORT_SYMBOL(cgroup_bpf_enabled_key); 21 22 /** 23 * cgroup_bpf_put() - put references of all bpf programs 24 * @cgrp: the cgroup to modify 25 */ 26 void cgroup_bpf_put(struct cgroup *cgrp) 27 { 28 unsigned int type; 29 30 for (type = 0; type < ARRAY_SIZE(cgrp->bpf.prog); type++) { 31 struct bpf_prog *prog = cgrp->bpf.prog[type]; 32 33 if (prog) { 34 bpf_prog_put(prog); 35 static_branch_dec(&cgroup_bpf_enabled_key); 36 } 37 } 38 } 39 40 /** 41 * cgroup_bpf_inherit() - inherit effective programs from parent 42 * @cgrp: the cgroup to modify 43 * @parent: the parent to inherit from 44 */ 45 void cgroup_bpf_inherit(struct cgroup *cgrp, struct cgroup *parent) 46 { 47 unsigned int type; 48 49 for (type = 0; type < ARRAY_SIZE(cgrp->bpf.effective); type++) { 50 struct bpf_prog *e; 51 52 e = rcu_dereference_protected(parent->bpf.effective[type], 53 lockdep_is_held(&cgroup_mutex)); 54 rcu_assign_pointer(cgrp->bpf.effective[type], e); 55 cgrp->bpf.disallow_override[type] = parent->bpf.disallow_override[type]; 56 } 57 } 58 59 /** 60 * __cgroup_bpf_update() - Update the pinned program of a cgroup, and 61 * propagate the change to descendants 62 * @cgrp: The cgroup which descendants to traverse 63 * @parent: The parent of @cgrp, or %NULL if @cgrp is the root 64 * @prog: A new program to pin 65 * @type: Type of pinning operation (ingress/egress) 66 * 67 * Each cgroup has a set of two pointers for bpf programs; one for eBPF 68 * programs it owns, and which is effective for execution. 69 * 70 * If @prog is not %NULL, this function attaches a new program to the cgroup 71 * and releases the one that is currently attached, if any. @prog is then made 72 * the effective program of type @type in that cgroup. 73 * 74 * If @prog is %NULL, the currently attached program of type @type is released, 75 * and the effective program of the parent cgroup (if any) is inherited to 76 * @cgrp. 77 * 78 * Then, the descendants of @cgrp are walked and the effective program for 79 * each of them is set to the effective program of @cgrp unless the 80 * descendant has its own program attached, in which case the subbranch is 81 * skipped. This ensures that delegated subcgroups with own programs are left 82 * untouched. 83 * 84 * Must be called with cgroup_mutex held. 85 */ 86 int __cgroup_bpf_update(struct cgroup *cgrp, struct cgroup *parent, 87 struct bpf_prog *prog, enum bpf_attach_type type, 88 bool new_overridable) 89 { 90 struct bpf_prog *old_prog, *effective = NULL; 91 struct cgroup_subsys_state *pos; 92 bool overridable = true; 93 94 if (parent) { 95 overridable = !parent->bpf.disallow_override[type]; 96 effective = rcu_dereference_protected(parent->bpf.effective[type], 97 lockdep_is_held(&cgroup_mutex)); 98 } 99 100 if (prog && effective && !overridable) 101 /* if parent has non-overridable prog attached, disallow 102 * attaching new programs to descendent cgroup 103 */ 104 return -EPERM; 105 106 if (prog && effective && overridable != new_overridable) 107 /* if parent has overridable prog attached, only 108 * allow overridable programs in descendent cgroup 109 */ 110 return -EPERM; 111 112 old_prog = cgrp->bpf.prog[type]; 113 114 if (prog) { 115 overridable = new_overridable; 116 effective = prog; 117 if (old_prog && 118 cgrp->bpf.disallow_override[type] == new_overridable) 119 /* disallow attaching non-overridable on top 120 * of existing overridable in this cgroup 121 * and vice versa 122 */ 123 return -EPERM; 124 } 125 126 if (!prog && !old_prog) 127 /* report error when trying to detach and nothing is attached */ 128 return -ENOENT; 129 130 cgrp->bpf.prog[type] = prog; 131 132 css_for_each_descendant_pre(pos, &cgrp->self) { 133 struct cgroup *desc = container_of(pos, struct cgroup, self); 134 135 /* skip the subtree if the descendant has its own program */ 136 if (desc->bpf.prog[type] && desc != cgrp) { 137 pos = css_rightmost_descendant(pos); 138 } else { 139 rcu_assign_pointer(desc->bpf.effective[type], 140 effective); 141 desc->bpf.disallow_override[type] = !overridable; 142 } 143 } 144 145 if (prog) 146 static_branch_inc(&cgroup_bpf_enabled_key); 147 148 if (old_prog) { 149 bpf_prog_put(old_prog); 150 static_branch_dec(&cgroup_bpf_enabled_key); 151 } 152 return 0; 153 } 154 155 /** 156 * __cgroup_bpf_run_filter_skb() - Run a program for packet filtering 157 * @sk: The socket sending or receiving traffic 158 * @skb: The skb that is being sent or received 159 * @type: The type of program to be exectuted 160 * 161 * If no socket is passed, or the socket is not of type INET or INET6, 162 * this function does nothing and returns 0. 163 * 164 * The program type passed in via @type must be suitable for network 165 * filtering. No further check is performed to assert that. 166 * 167 * This function will return %-EPERM if any if an attached program was found 168 * and if it returned != 1 during execution. In all other cases, 0 is returned. 169 */ 170 int __cgroup_bpf_run_filter_skb(struct sock *sk, 171 struct sk_buff *skb, 172 enum bpf_attach_type type) 173 { 174 struct bpf_prog *prog; 175 struct cgroup *cgrp; 176 int ret = 0; 177 178 if (!sk || !sk_fullsock(sk)) 179 return 0; 180 181 if (sk->sk_family != AF_INET && 182 sk->sk_family != AF_INET6) 183 return 0; 184 185 cgrp = sock_cgroup_ptr(&sk->sk_cgrp_data); 186 187 rcu_read_lock(); 188 189 prog = rcu_dereference(cgrp->bpf.effective[type]); 190 if (prog) { 191 unsigned int offset = skb->data - skb_network_header(skb); 192 struct sock *save_sk = skb->sk; 193 194 skb->sk = sk; 195 __skb_push(skb, offset); 196 ret = bpf_prog_run_save_cb(prog, skb) == 1 ? 0 : -EPERM; 197 __skb_pull(skb, offset); 198 skb->sk = save_sk; 199 } 200 201 rcu_read_unlock(); 202 203 return ret; 204 } 205 EXPORT_SYMBOL(__cgroup_bpf_run_filter_skb); 206 207 /** 208 * __cgroup_bpf_run_filter_sk() - Run a program on a sock 209 * @sk: sock structure to manipulate 210 * @type: The type of program to be exectuted 211 * 212 * socket is passed is expected to be of type INET or INET6. 213 * 214 * The program type passed in via @type must be suitable for sock 215 * filtering. No further check is performed to assert that. 216 * 217 * This function will return %-EPERM if any if an attached program was found 218 * and if it returned != 1 during execution. In all other cases, 0 is returned. 219 */ 220 int __cgroup_bpf_run_filter_sk(struct sock *sk, 221 enum bpf_attach_type type) 222 { 223 struct cgroup *cgrp = sock_cgroup_ptr(&sk->sk_cgrp_data); 224 struct bpf_prog *prog; 225 int ret = 0; 226 227 228 rcu_read_lock(); 229 230 prog = rcu_dereference(cgrp->bpf.effective[type]); 231 if (prog) 232 ret = BPF_PROG_RUN(prog, sk) == 1 ? 0 : -EPERM; 233 234 rcu_read_unlock(); 235 236 return ret; 237 } 238 EXPORT_SYMBOL(__cgroup_bpf_run_filter_sk); 239 240 /** 241 * __cgroup_bpf_run_filter_sock_ops() - Run a program on a sock 242 * @sk: socket to get cgroup from 243 * @sock_ops: bpf_sock_ops_kern struct to pass to program. Contains 244 * sk with connection information (IP addresses, etc.) May not contain 245 * cgroup info if it is a req sock. 246 * @type: The type of program to be exectuted 247 * 248 * socket passed is expected to be of type INET or INET6. 249 * 250 * The program type passed in via @type must be suitable for sock_ops 251 * filtering. No further check is performed to assert that. 252 * 253 * This function will return %-EPERM if any if an attached program was found 254 * and if it returned != 1 during execution. In all other cases, 0 is returned. 255 */ 256 int __cgroup_bpf_run_filter_sock_ops(struct sock *sk, 257 struct bpf_sock_ops_kern *sock_ops, 258 enum bpf_attach_type type) 259 { 260 struct cgroup *cgrp = sock_cgroup_ptr(&sk->sk_cgrp_data); 261 struct bpf_prog *prog; 262 int ret = 0; 263 264 265 rcu_read_lock(); 266 267 prog = rcu_dereference(cgrp->bpf.effective[type]); 268 if (prog) 269 ret = BPF_PROG_RUN(prog, sock_ops) == 1 ? 0 : -EPERM; 270 271 rcu_read_unlock(); 272 273 return ret; 274 } 275 EXPORT_SYMBOL(__cgroup_bpf_run_filter_sock_ops); 276