1f85d2086SThomas Gleixner // SPDX-License-Identifier: GPL-2.0-only
230070984SDaniel Mack /*
330070984SDaniel Mack * Functions to manage eBPF programs attached to cgroups
430070984SDaniel Mack *
530070984SDaniel Mack * Copyright (c) 2016 Daniel Mack
630070984SDaniel Mack */
730070984SDaniel Mack
830070984SDaniel Mack #include <linux/kernel.h>
930070984SDaniel Mack #include <linux/atomic.h>
1030070984SDaniel Mack #include <linux/cgroup.h>
117b146cebSAndrey Ignatov #include <linux/filter.h>
1230070984SDaniel Mack #include <linux/slab.h>
137b146cebSAndrey Ignatov #include <linux/sysctl.h>
14808649fbSAndrey Ignatov #include <linux/string.h>
1530070984SDaniel Mack #include <linux/bpf.h>
1630070984SDaniel Mack #include <linux/bpf-cgroup.h>
1769fd337aSStanislav Fomichev #include <linux/bpf_lsm.h>
1869fd337aSStanislav Fomichev #include <linux/bpf_verifier.h>
1930070984SDaniel Mack #include <net/sock.h>
200d01da6aSStanislav Fomichev #include <net/bpf_sk_storage.h>
2130070984SDaniel Mack
22e5c891a3SRoman Gushchin #include "../cgroup/cgroup-internal.h"
23e5c891a3SRoman Gushchin
246fc88c35SDave Marchevsky DEFINE_STATIC_KEY_ARRAY_FALSE(cgroup_bpf_enabled_key, MAX_CGROUP_BPF_ATTACH_TYPE);
2530070984SDaniel Mack EXPORT_SYMBOL(cgroup_bpf_enabled_key);
2630070984SDaniel Mack
27*0d86cd70SChen Ridong /*
28*0d86cd70SChen Ridong * cgroup bpf destruction makes heavy use of work items and there can be a lot
29*0d86cd70SChen Ridong * of concurrent destructions. Use a separate workqueue so that cgroup bpf
30*0d86cd70SChen Ridong * destruction work items don't end up filling up max_active of system_wq
31*0d86cd70SChen Ridong * which may lead to deadlock.
32*0d86cd70SChen Ridong */
33*0d86cd70SChen Ridong static struct workqueue_struct *cgroup_bpf_destroy_wq;
34*0d86cd70SChen Ridong
cgroup_bpf_wq_init(void)35*0d86cd70SChen Ridong static int __init cgroup_bpf_wq_init(void)
36*0d86cd70SChen Ridong {
37*0d86cd70SChen Ridong cgroup_bpf_destroy_wq = alloc_workqueue("cgroup_bpf_destroy", 0, 1);
38*0d86cd70SChen Ridong if (!cgroup_bpf_destroy_wq)
39*0d86cd70SChen Ridong panic("Failed to alloc workqueue for cgroup bpf destroy.\n");
40*0d86cd70SChen Ridong return 0;
41*0d86cd70SChen Ridong }
42*0d86cd70SChen Ridong core_initcall(cgroup_bpf_wq_init);
43*0d86cd70SChen Ridong
44055eb955SStanislav Fomichev /* __always_inline is necessary to prevent indirect call through run_prog
45055eb955SStanislav Fomichev * function pointer.
46055eb955SStanislav Fomichev */
47055eb955SStanislav Fomichev static __always_inline int
bpf_prog_run_array_cg(const struct cgroup_bpf * cgrp,enum cgroup_bpf_attach_type atype,const void * ctx,bpf_prog_run_fn run_prog,int retval,u32 * ret_flags)48d9d31cf8SStanislav Fomichev bpf_prog_run_array_cg(const struct cgroup_bpf *cgrp,
49055eb955SStanislav Fomichev enum cgroup_bpf_attach_type atype,
50055eb955SStanislav Fomichev const void *ctx, bpf_prog_run_fn run_prog,
51055eb955SStanislav Fomichev int retval, u32 *ret_flags)
52055eb955SStanislav Fomichev {
53055eb955SStanislav Fomichev const struct bpf_prog_array_item *item;
54055eb955SStanislav Fomichev const struct bpf_prog *prog;
55055eb955SStanislav Fomichev const struct bpf_prog_array *array;
56055eb955SStanislav Fomichev struct bpf_run_ctx *old_run_ctx;
57055eb955SStanislav Fomichev struct bpf_cg_run_ctx run_ctx;
58055eb955SStanislav Fomichev u32 func_ret;
59055eb955SStanislav Fomichev
60055eb955SStanislav Fomichev run_ctx.retval = retval;
61055eb955SStanislav Fomichev migrate_disable();
62055eb955SStanislav Fomichev rcu_read_lock();
63055eb955SStanislav Fomichev array = rcu_dereference(cgrp->effective[atype]);
64055eb955SStanislav Fomichev item = &array->items[0];
65055eb955SStanislav Fomichev old_run_ctx = bpf_set_run_ctx(&run_ctx.run_ctx);
66055eb955SStanislav Fomichev while ((prog = READ_ONCE(item->prog))) {
67055eb955SStanislav Fomichev run_ctx.prog_item = item;
68055eb955SStanislav Fomichev func_ret = run_prog(prog, ctx);
69d9d31cf8SStanislav Fomichev if (ret_flags) {
70055eb955SStanislav Fomichev *(ret_flags) |= (func_ret >> 1);
71d9d31cf8SStanislav Fomichev func_ret &= 1;
72055eb955SStanislav Fomichev }
73d9d31cf8SStanislav Fomichev if (!func_ret && !IS_ERR_VALUE((long)run_ctx.retval))
74055eb955SStanislav Fomichev run_ctx.retval = -EPERM;
75055eb955SStanislav Fomichev item++;
76055eb955SStanislav Fomichev }
77055eb955SStanislav Fomichev bpf_reset_run_ctx(old_run_ctx);
78055eb955SStanislav Fomichev rcu_read_unlock();
79055eb955SStanislav Fomichev migrate_enable();
80055eb955SStanislav Fomichev return run_ctx.retval;
81055eb955SStanislav Fomichev }
82055eb955SStanislav Fomichev
__cgroup_bpf_run_lsm_sock(const void * ctx,const struct bpf_insn * insn)8369fd337aSStanislav Fomichev unsigned int __cgroup_bpf_run_lsm_sock(const void *ctx,
8469fd337aSStanislav Fomichev const struct bpf_insn *insn)
8569fd337aSStanislav Fomichev {
8669fd337aSStanislav Fomichev const struct bpf_prog *shim_prog;
8769fd337aSStanislav Fomichev struct sock *sk;
8869fd337aSStanislav Fomichev struct cgroup *cgrp;
8969fd337aSStanislav Fomichev int ret = 0;
9069fd337aSStanislav Fomichev u64 *args;
9169fd337aSStanislav Fomichev
9269fd337aSStanislav Fomichev args = (u64 *)ctx;
9369fd337aSStanislav Fomichev sk = (void *)(unsigned long)args[0];
9469fd337aSStanislav Fomichev /*shim_prog = container_of(insn, struct bpf_prog, insnsi);*/
9569fd337aSStanislav Fomichev shim_prog = (const struct bpf_prog *)((void *)insn - offsetof(struct bpf_prog, insnsi));
9669fd337aSStanislav Fomichev
9769fd337aSStanislav Fomichev cgrp = sock_cgroup_ptr(&sk->sk_cgrp_data);
9869fd337aSStanislav Fomichev if (likely(cgrp))
9969fd337aSStanislav Fomichev ret = bpf_prog_run_array_cg(&cgrp->bpf,
10069fd337aSStanislav Fomichev shim_prog->aux->cgroup_atype,
10169fd337aSStanislav Fomichev ctx, bpf_prog_run, 0, NULL);
10269fd337aSStanislav Fomichev return ret;
10369fd337aSStanislav Fomichev }
10469fd337aSStanislav Fomichev
__cgroup_bpf_run_lsm_socket(const void * ctx,const struct bpf_insn * insn)10569fd337aSStanislav Fomichev unsigned int __cgroup_bpf_run_lsm_socket(const void *ctx,
10669fd337aSStanislav Fomichev const struct bpf_insn *insn)
10769fd337aSStanislav Fomichev {
10869fd337aSStanislav Fomichev const struct bpf_prog *shim_prog;
10969fd337aSStanislav Fomichev struct socket *sock;
11069fd337aSStanislav Fomichev struct cgroup *cgrp;
11169fd337aSStanislav Fomichev int ret = 0;
11269fd337aSStanislav Fomichev u64 *args;
11369fd337aSStanislav Fomichev
11469fd337aSStanislav Fomichev args = (u64 *)ctx;
11569fd337aSStanislav Fomichev sock = (void *)(unsigned long)args[0];
11669fd337aSStanislav Fomichev /*shim_prog = container_of(insn, struct bpf_prog, insnsi);*/
11769fd337aSStanislav Fomichev shim_prog = (const struct bpf_prog *)((void *)insn - offsetof(struct bpf_prog, insnsi));
11869fd337aSStanislav Fomichev
11969fd337aSStanislav Fomichev cgrp = sock_cgroup_ptr(&sock->sk->sk_cgrp_data);
12069fd337aSStanislav Fomichev if (likely(cgrp))
12169fd337aSStanislav Fomichev ret = bpf_prog_run_array_cg(&cgrp->bpf,
12269fd337aSStanislav Fomichev shim_prog->aux->cgroup_atype,
12369fd337aSStanislav Fomichev ctx, bpf_prog_run, 0, NULL);
12469fd337aSStanislav Fomichev return ret;
12569fd337aSStanislav Fomichev }
12669fd337aSStanislav Fomichev
__cgroup_bpf_run_lsm_current(const void * ctx,const struct bpf_insn * insn)12769fd337aSStanislav Fomichev unsigned int __cgroup_bpf_run_lsm_current(const void *ctx,
12869fd337aSStanislav Fomichev const struct bpf_insn *insn)
12969fd337aSStanislav Fomichev {
13069fd337aSStanislav Fomichev const struct bpf_prog *shim_prog;
13169fd337aSStanislav Fomichev struct cgroup *cgrp;
13269fd337aSStanislav Fomichev int ret = 0;
13369fd337aSStanislav Fomichev
13469fd337aSStanislav Fomichev /*shim_prog = container_of(insn, struct bpf_prog, insnsi);*/
13569fd337aSStanislav Fomichev shim_prog = (const struct bpf_prog *)((void *)insn - offsetof(struct bpf_prog, insnsi));
13669fd337aSStanislav Fomichev
13769fd337aSStanislav Fomichev /* We rely on trampoline's __bpf_prog_enter_lsm_cgroup to grab RCU read lock. */
13869fd337aSStanislav Fomichev cgrp = task_dfl_cgroup(current);
13969fd337aSStanislav Fomichev if (likely(cgrp))
14069fd337aSStanislav Fomichev ret = bpf_prog_run_array_cg(&cgrp->bpf,
14169fd337aSStanislav Fomichev shim_prog->aux->cgroup_atype,
14269fd337aSStanislav Fomichev ctx, bpf_prog_run, 0, NULL);
14369fd337aSStanislav Fomichev return ret;
14469fd337aSStanislav Fomichev }
14569fd337aSStanislav Fomichev
14669fd337aSStanislav Fomichev #ifdef CONFIG_BPF_LSM
147c0e19f2cSStanislav Fomichev struct cgroup_lsm_atype {
148c0e19f2cSStanislav Fomichev u32 attach_btf_id;
149c0e19f2cSStanislav Fomichev int refcnt;
150c0e19f2cSStanislav Fomichev };
151c0e19f2cSStanislav Fomichev
152c0e19f2cSStanislav Fomichev static struct cgroup_lsm_atype cgroup_lsm_atype[CGROUP_LSM_NUM];
153c0e19f2cSStanislav Fomichev
15469fd337aSStanislav Fomichev static enum cgroup_bpf_attach_type
bpf_cgroup_atype_find(enum bpf_attach_type attach_type,u32 attach_btf_id)15569fd337aSStanislav Fomichev bpf_cgroup_atype_find(enum bpf_attach_type attach_type, u32 attach_btf_id)
15669fd337aSStanislav Fomichev {
157c0e19f2cSStanislav Fomichev int i;
158c0e19f2cSStanislav Fomichev
159c0e19f2cSStanislav Fomichev lockdep_assert_held(&cgroup_mutex);
160c0e19f2cSStanislav Fomichev
16169fd337aSStanislav Fomichev if (attach_type != BPF_LSM_CGROUP)
16269fd337aSStanislav Fomichev return to_cgroup_bpf_attach_type(attach_type);
163c0e19f2cSStanislav Fomichev
164c0e19f2cSStanislav Fomichev for (i = 0; i < ARRAY_SIZE(cgroup_lsm_atype); i++)
165c0e19f2cSStanislav Fomichev if (cgroup_lsm_atype[i].attach_btf_id == attach_btf_id)
166c0e19f2cSStanislav Fomichev return CGROUP_LSM_START + i;
167c0e19f2cSStanislav Fomichev
168c0e19f2cSStanislav Fomichev for (i = 0; i < ARRAY_SIZE(cgroup_lsm_atype); i++)
169c0e19f2cSStanislav Fomichev if (cgroup_lsm_atype[i].attach_btf_id == 0)
170c0e19f2cSStanislav Fomichev return CGROUP_LSM_START + i;
171c0e19f2cSStanislav Fomichev
172c0e19f2cSStanislav Fomichev return -E2BIG;
173c0e19f2cSStanislav Fomichev
174c0e19f2cSStanislav Fomichev }
175c0e19f2cSStanislav Fomichev
bpf_cgroup_atype_get(u32 attach_btf_id,int cgroup_atype)176c0e19f2cSStanislav Fomichev void bpf_cgroup_atype_get(u32 attach_btf_id, int cgroup_atype)
177c0e19f2cSStanislav Fomichev {
178c0e19f2cSStanislav Fomichev int i = cgroup_atype - CGROUP_LSM_START;
179c0e19f2cSStanislav Fomichev
180c0e19f2cSStanislav Fomichev lockdep_assert_held(&cgroup_mutex);
181c0e19f2cSStanislav Fomichev
182c0e19f2cSStanislav Fomichev WARN_ON_ONCE(cgroup_lsm_atype[i].attach_btf_id &&
183c0e19f2cSStanislav Fomichev cgroup_lsm_atype[i].attach_btf_id != attach_btf_id);
184c0e19f2cSStanislav Fomichev
185c0e19f2cSStanislav Fomichev cgroup_lsm_atype[i].attach_btf_id = attach_btf_id;
186c0e19f2cSStanislav Fomichev cgroup_lsm_atype[i].refcnt++;
187c0e19f2cSStanislav Fomichev }
188c0e19f2cSStanislav Fomichev
bpf_cgroup_atype_put(int cgroup_atype)189c0e19f2cSStanislav Fomichev void bpf_cgroup_atype_put(int cgroup_atype)
190c0e19f2cSStanislav Fomichev {
191c0e19f2cSStanislav Fomichev int i = cgroup_atype - CGROUP_LSM_START;
192c0e19f2cSStanislav Fomichev
1934cdb91b0SKamalesh Babulal cgroup_lock();
194c0e19f2cSStanislav Fomichev if (--cgroup_lsm_atype[i].refcnt <= 0)
195c0e19f2cSStanislav Fomichev cgroup_lsm_atype[i].attach_btf_id = 0;
196c0e19f2cSStanislav Fomichev WARN_ON_ONCE(cgroup_lsm_atype[i].refcnt < 0);
1974cdb91b0SKamalesh Babulal cgroup_unlock();
19869fd337aSStanislav Fomichev }
19969fd337aSStanislav Fomichev #else
20069fd337aSStanislav Fomichev static enum cgroup_bpf_attach_type
bpf_cgroup_atype_find(enum bpf_attach_type attach_type,u32 attach_btf_id)20169fd337aSStanislav Fomichev bpf_cgroup_atype_find(enum bpf_attach_type attach_type, u32 attach_btf_id)
20269fd337aSStanislav Fomichev {
20369fd337aSStanislav Fomichev if (attach_type != BPF_LSM_CGROUP)
20469fd337aSStanislav Fomichev return to_cgroup_bpf_attach_type(attach_type);
20569fd337aSStanislav Fomichev return -EOPNOTSUPP;
20669fd337aSStanislav Fomichev }
20769fd337aSStanislav Fomichev #endif /* CONFIG_BPF_LSM */
20869fd337aSStanislav Fomichev
cgroup_bpf_offline(struct cgroup * cgrp)2094bfc0bb2SRoman Gushchin void cgroup_bpf_offline(struct cgroup *cgrp)
21030070984SDaniel Mack {
2114bfc0bb2SRoman Gushchin cgroup_get(cgrp);
2124bfc0bb2SRoman Gushchin percpu_ref_kill(&cgrp->bpf.refcnt);
2134bfc0bb2SRoman Gushchin }
2144bfc0bb2SRoman Gushchin
bpf_cgroup_storages_free(struct bpf_cgroup_storage * storages[])21500c4eddfSAndrii Nakryiko static void bpf_cgroup_storages_free(struct bpf_cgroup_storage *storages[])
21600c4eddfSAndrii Nakryiko {
21700c4eddfSAndrii Nakryiko enum bpf_cgroup_storage_type stype;
21800c4eddfSAndrii Nakryiko
21900c4eddfSAndrii Nakryiko for_each_cgroup_storage_type(stype)
22000c4eddfSAndrii Nakryiko bpf_cgroup_storage_free(storages[stype]);
22100c4eddfSAndrii Nakryiko }
22200c4eddfSAndrii Nakryiko
bpf_cgroup_storages_alloc(struct bpf_cgroup_storage * storages[],struct bpf_cgroup_storage * new_storages[],enum bpf_attach_type type,struct bpf_prog * prog,struct cgroup * cgrp)22300c4eddfSAndrii Nakryiko static int bpf_cgroup_storages_alloc(struct bpf_cgroup_storage *storages[],
2247d9c3427SYiFei Zhu struct bpf_cgroup_storage *new_storages[],
2257d9c3427SYiFei Zhu enum bpf_attach_type type,
2267d9c3427SYiFei Zhu struct bpf_prog *prog,
2277d9c3427SYiFei Zhu struct cgroup *cgrp)
22800c4eddfSAndrii Nakryiko {
22900c4eddfSAndrii Nakryiko enum bpf_cgroup_storage_type stype;
2307d9c3427SYiFei Zhu struct bpf_cgroup_storage_key key;
2317d9c3427SYiFei Zhu struct bpf_map *map;
2327d9c3427SYiFei Zhu
2337d9c3427SYiFei Zhu key.cgroup_inode_id = cgroup_id(cgrp);
2347d9c3427SYiFei Zhu key.attach_type = type;
23500c4eddfSAndrii Nakryiko
23600c4eddfSAndrii Nakryiko for_each_cgroup_storage_type(stype) {
2377d9c3427SYiFei Zhu map = prog->aux->cgroup_storage[stype];
2387d9c3427SYiFei Zhu if (!map)
2397d9c3427SYiFei Zhu continue;
2407d9c3427SYiFei Zhu
2417d9c3427SYiFei Zhu storages[stype] = cgroup_storage_lookup((void *)map, &key, false);
2427d9c3427SYiFei Zhu if (storages[stype])
2437d9c3427SYiFei Zhu continue;
2447d9c3427SYiFei Zhu
24500c4eddfSAndrii Nakryiko storages[stype] = bpf_cgroup_storage_alloc(prog, stype);
24600c4eddfSAndrii Nakryiko if (IS_ERR(storages[stype])) {
2477d9c3427SYiFei Zhu bpf_cgroup_storages_free(new_storages);
24800c4eddfSAndrii Nakryiko return -ENOMEM;
24900c4eddfSAndrii Nakryiko }
2507d9c3427SYiFei Zhu
2517d9c3427SYiFei Zhu new_storages[stype] = storages[stype];
25200c4eddfSAndrii Nakryiko }
25300c4eddfSAndrii Nakryiko
25400c4eddfSAndrii Nakryiko return 0;
25500c4eddfSAndrii Nakryiko }
25600c4eddfSAndrii Nakryiko
bpf_cgroup_storages_assign(struct bpf_cgroup_storage * dst[],struct bpf_cgroup_storage * src[])25700c4eddfSAndrii Nakryiko static void bpf_cgroup_storages_assign(struct bpf_cgroup_storage *dst[],
25800c4eddfSAndrii Nakryiko struct bpf_cgroup_storage *src[])
25900c4eddfSAndrii Nakryiko {
26000c4eddfSAndrii Nakryiko enum bpf_cgroup_storage_type stype;
26100c4eddfSAndrii Nakryiko
26200c4eddfSAndrii Nakryiko for_each_cgroup_storage_type(stype)
26300c4eddfSAndrii Nakryiko dst[stype] = src[stype];
26400c4eddfSAndrii Nakryiko }
26500c4eddfSAndrii Nakryiko
bpf_cgroup_storages_link(struct bpf_cgroup_storage * storages[],struct cgroup * cgrp,enum bpf_attach_type attach_type)26600c4eddfSAndrii Nakryiko static void bpf_cgroup_storages_link(struct bpf_cgroup_storage *storages[],
26700c4eddfSAndrii Nakryiko struct cgroup *cgrp,
26800c4eddfSAndrii Nakryiko enum bpf_attach_type attach_type)
26900c4eddfSAndrii Nakryiko {
27000c4eddfSAndrii Nakryiko enum bpf_cgroup_storage_type stype;
27100c4eddfSAndrii Nakryiko
27200c4eddfSAndrii Nakryiko for_each_cgroup_storage_type(stype)
27300c4eddfSAndrii Nakryiko bpf_cgroup_storage_link(storages[stype], cgrp, attach_type);
27400c4eddfSAndrii Nakryiko }
27500c4eddfSAndrii Nakryiko
276af6eea57SAndrii Nakryiko /* Called when bpf_cgroup_link is auto-detached from dying cgroup.
277af6eea57SAndrii Nakryiko * It drops cgroup and bpf_prog refcounts, and marks bpf_link as defunct. It
278af6eea57SAndrii Nakryiko * doesn't free link memory, which will eventually be done by bpf_link's
279af6eea57SAndrii Nakryiko * release() callback, when its last FD is closed.
280af6eea57SAndrii Nakryiko */
bpf_cgroup_link_auto_detach(struct bpf_cgroup_link * link)281af6eea57SAndrii Nakryiko static void bpf_cgroup_link_auto_detach(struct bpf_cgroup_link *link)
282af6eea57SAndrii Nakryiko {
283af6eea57SAndrii Nakryiko cgroup_put(link->cgroup);
284af6eea57SAndrii Nakryiko link->cgroup = NULL;
285af6eea57SAndrii Nakryiko }
286af6eea57SAndrii Nakryiko
2874bfc0bb2SRoman Gushchin /**
2884bfc0bb2SRoman Gushchin * cgroup_bpf_release() - put references of all bpf programs and
2894bfc0bb2SRoman Gushchin * release all cgroup bpf data
2904bfc0bb2SRoman Gushchin * @work: work structure embedded into the cgroup to modify
2914bfc0bb2SRoman Gushchin */
cgroup_bpf_release(struct work_struct * work)2924bfc0bb2SRoman Gushchin static void cgroup_bpf_release(struct work_struct *work)
2934bfc0bb2SRoman Gushchin {
294e10360f8SRoman Gushchin struct cgroup *p, *cgrp = container_of(work, struct cgroup,
2954bfc0bb2SRoman Gushchin bpf.release_work);
296dbcc1ba2SStanislav Fomichev struct bpf_prog_array *old_array;
2977d9c3427SYiFei Zhu struct list_head *storages = &cgrp->bpf.storages;
2987d9c3427SYiFei Zhu struct bpf_cgroup_storage *storage, *stmp;
2997d9c3427SYiFei Zhu
3006fc88c35SDave Marchevsky unsigned int atype;
30130070984SDaniel Mack
3024cdb91b0SKamalesh Babulal cgroup_lock();
303e5c891a3SRoman Gushchin
3046fc88c35SDave Marchevsky for (atype = 0; atype < ARRAY_SIZE(cgrp->bpf.progs); atype++) {
30500442143SStanislav Fomichev struct hlist_head *progs = &cgrp->bpf.progs[atype];
30600442143SStanislav Fomichev struct bpf_prog_list *pl;
30700442143SStanislav Fomichev struct hlist_node *pltmp;
30830070984SDaniel Mack
30900442143SStanislav Fomichev hlist_for_each_entry_safe(pl, pltmp, progs, node) {
31000442143SStanislav Fomichev hlist_del(&pl->node);
31169fd337aSStanislav Fomichev if (pl->prog) {
31269fd337aSStanislav Fomichev if (pl->prog->expected_attach_type == BPF_LSM_CGROUP)
31369fd337aSStanislav Fomichev bpf_trampoline_unlink_cgroup_shim(pl->prog);
314324bda9eSAlexei Starovoitov bpf_prog_put(pl->prog);
31569fd337aSStanislav Fomichev }
31669fd337aSStanislav Fomichev if (pl->link) {
31769fd337aSStanislav Fomichev if (pl->link->link.prog->expected_attach_type == BPF_LSM_CGROUP)
31869fd337aSStanislav Fomichev bpf_trampoline_unlink_cgroup_shim(pl->link->link.prog);
319af6eea57SAndrii Nakryiko bpf_cgroup_link_auto_detach(pl->link);
32069fd337aSStanislav Fomichev }
321324bda9eSAlexei Starovoitov kfree(pl);
3226fc88c35SDave Marchevsky static_branch_dec(&cgroup_bpf_enabled_key[atype]);
32330070984SDaniel Mack }
324dbcc1ba2SStanislav Fomichev old_array = rcu_dereference_protected(
3256fc88c35SDave Marchevsky cgrp->bpf.effective[atype],
326e5c891a3SRoman Gushchin lockdep_is_held(&cgroup_mutex));
327dbcc1ba2SStanislav Fomichev bpf_prog_array_free(old_array);
32830070984SDaniel Mack }
3294bfc0bb2SRoman Gushchin
3307d9c3427SYiFei Zhu list_for_each_entry_safe(storage, stmp, storages, list_cg) {
3317d9c3427SYiFei Zhu bpf_cgroup_storage_unlink(storage);
3327d9c3427SYiFei Zhu bpf_cgroup_storage_free(storage);
3337d9c3427SYiFei Zhu }
3347d9c3427SYiFei Zhu
3354cdb91b0SKamalesh Babulal cgroup_unlock();
336e5c891a3SRoman Gushchin
337e10360f8SRoman Gushchin for (p = cgroup_parent(cgrp); p; p = cgroup_parent(p))
338e10360f8SRoman Gushchin cgroup_bpf_put(p);
339e10360f8SRoman Gushchin
3404bfc0bb2SRoman Gushchin percpu_ref_exit(&cgrp->bpf.refcnt);
3414bfc0bb2SRoman Gushchin cgroup_put(cgrp);
3424bfc0bb2SRoman Gushchin }
3434bfc0bb2SRoman Gushchin
3444bfc0bb2SRoman Gushchin /**
3454bfc0bb2SRoman Gushchin * cgroup_bpf_release_fn() - callback used to schedule releasing
3464bfc0bb2SRoman Gushchin * of bpf cgroup data
3474bfc0bb2SRoman Gushchin * @ref: percpu ref counter structure
3484bfc0bb2SRoman Gushchin */
cgroup_bpf_release_fn(struct percpu_ref * ref)3494bfc0bb2SRoman Gushchin static void cgroup_bpf_release_fn(struct percpu_ref *ref)
3504bfc0bb2SRoman Gushchin {
3514bfc0bb2SRoman Gushchin struct cgroup *cgrp = container_of(ref, struct cgroup, bpf.refcnt);
3524bfc0bb2SRoman Gushchin
3534bfc0bb2SRoman Gushchin INIT_WORK(&cgrp->bpf.release_work, cgroup_bpf_release);
354*0d86cd70SChen Ridong queue_work(cgroup_bpf_destroy_wq, &cgrp->bpf.release_work);
35530070984SDaniel Mack }
35630070984SDaniel Mack
357af6eea57SAndrii Nakryiko /* Get underlying bpf_prog of bpf_prog_list entry, regardless if it's through
358af6eea57SAndrii Nakryiko * link or direct prog.
359af6eea57SAndrii Nakryiko */
prog_list_prog(struct bpf_prog_list * pl)360af6eea57SAndrii Nakryiko static struct bpf_prog *prog_list_prog(struct bpf_prog_list *pl)
361af6eea57SAndrii Nakryiko {
362af6eea57SAndrii Nakryiko if (pl->prog)
363af6eea57SAndrii Nakryiko return pl->prog;
364af6eea57SAndrii Nakryiko if (pl->link)
365af6eea57SAndrii Nakryiko return pl->link->link.prog;
366af6eea57SAndrii Nakryiko return NULL;
367af6eea57SAndrii Nakryiko }
368af6eea57SAndrii Nakryiko
369324bda9eSAlexei Starovoitov /* count number of elements in the list.
370324bda9eSAlexei Starovoitov * it's slow but the list cannot be long
371324bda9eSAlexei Starovoitov */
prog_list_length(struct hlist_head * head)37200442143SStanislav Fomichev static u32 prog_list_length(struct hlist_head *head)
373324bda9eSAlexei Starovoitov {
374324bda9eSAlexei Starovoitov struct bpf_prog_list *pl;
375324bda9eSAlexei Starovoitov u32 cnt = 0;
376324bda9eSAlexei Starovoitov
37700442143SStanislav Fomichev hlist_for_each_entry(pl, head, node) {
378af6eea57SAndrii Nakryiko if (!prog_list_prog(pl))
379324bda9eSAlexei Starovoitov continue;
380324bda9eSAlexei Starovoitov cnt++;
381324bda9eSAlexei Starovoitov }
382324bda9eSAlexei Starovoitov return cnt;
383324bda9eSAlexei Starovoitov }
384324bda9eSAlexei Starovoitov
385324bda9eSAlexei Starovoitov /* if parent has non-overridable prog attached,
386324bda9eSAlexei Starovoitov * disallow attaching new programs to the descendent cgroup.
387324bda9eSAlexei Starovoitov * if parent has overridable or multi-prog, allow attaching
388324bda9eSAlexei Starovoitov */
hierarchy_allows_attach(struct cgroup * cgrp,enum cgroup_bpf_attach_type atype)389324bda9eSAlexei Starovoitov static bool hierarchy_allows_attach(struct cgroup *cgrp,
3906fc88c35SDave Marchevsky enum cgroup_bpf_attach_type atype)
391324bda9eSAlexei Starovoitov {
392324bda9eSAlexei Starovoitov struct cgroup *p;
393324bda9eSAlexei Starovoitov
394324bda9eSAlexei Starovoitov p = cgroup_parent(cgrp);
395324bda9eSAlexei Starovoitov if (!p)
396324bda9eSAlexei Starovoitov return true;
397324bda9eSAlexei Starovoitov do {
3986fc88c35SDave Marchevsky u32 flags = p->bpf.flags[atype];
399324bda9eSAlexei Starovoitov u32 cnt;
400324bda9eSAlexei Starovoitov
401324bda9eSAlexei Starovoitov if (flags & BPF_F_ALLOW_MULTI)
402324bda9eSAlexei Starovoitov return true;
4036fc88c35SDave Marchevsky cnt = prog_list_length(&p->bpf.progs[atype]);
404324bda9eSAlexei Starovoitov WARN_ON_ONCE(cnt > 1);
405324bda9eSAlexei Starovoitov if (cnt == 1)
406324bda9eSAlexei Starovoitov return !!(flags & BPF_F_ALLOW_OVERRIDE);
407324bda9eSAlexei Starovoitov p = cgroup_parent(p);
408324bda9eSAlexei Starovoitov } while (p);
409324bda9eSAlexei Starovoitov return true;
410324bda9eSAlexei Starovoitov }
411324bda9eSAlexei Starovoitov
412324bda9eSAlexei Starovoitov /* compute a chain of effective programs for a given cgroup:
413324bda9eSAlexei Starovoitov * start from the list of programs in this cgroup and add
414324bda9eSAlexei Starovoitov * all parent programs.
415324bda9eSAlexei Starovoitov * Note that parent's F_ALLOW_OVERRIDE-type program is yielding
416324bda9eSAlexei Starovoitov * to programs in this cgroup
417324bda9eSAlexei Starovoitov */
compute_effective_progs(struct cgroup * cgrp,enum cgroup_bpf_attach_type atype,struct bpf_prog_array ** array)418324bda9eSAlexei Starovoitov static int compute_effective_progs(struct cgroup *cgrp,
4196fc88c35SDave Marchevsky enum cgroup_bpf_attach_type atype,
420dbcc1ba2SStanislav Fomichev struct bpf_prog_array **array)
421324bda9eSAlexei Starovoitov {
42200c4eddfSAndrii Nakryiko struct bpf_prog_array_item *item;
4233960f4fdSRoman Gushchin struct bpf_prog_array *progs;
424324bda9eSAlexei Starovoitov struct bpf_prog_list *pl;
425324bda9eSAlexei Starovoitov struct cgroup *p = cgrp;
426324bda9eSAlexei Starovoitov int cnt = 0;
427324bda9eSAlexei Starovoitov
428324bda9eSAlexei Starovoitov /* count number of effective programs by walking parents */
429324bda9eSAlexei Starovoitov do {
4306fc88c35SDave Marchevsky if (cnt == 0 || (p->bpf.flags[atype] & BPF_F_ALLOW_MULTI))
4316fc88c35SDave Marchevsky cnt += prog_list_length(&p->bpf.progs[atype]);
432324bda9eSAlexei Starovoitov p = cgroup_parent(p);
433324bda9eSAlexei Starovoitov } while (p);
434324bda9eSAlexei Starovoitov
435324bda9eSAlexei Starovoitov progs = bpf_prog_array_alloc(cnt, GFP_KERNEL);
436324bda9eSAlexei Starovoitov if (!progs)
437324bda9eSAlexei Starovoitov return -ENOMEM;
438324bda9eSAlexei Starovoitov
439324bda9eSAlexei Starovoitov /* populate the array with effective progs */
440324bda9eSAlexei Starovoitov cnt = 0;
441324bda9eSAlexei Starovoitov p = cgrp;
442324bda9eSAlexei Starovoitov do {
4436fc88c35SDave Marchevsky if (cnt > 0 && !(p->bpf.flags[atype] & BPF_F_ALLOW_MULTI))
444394e40a2SRoman Gushchin continue;
445394e40a2SRoman Gushchin
44600442143SStanislav Fomichev hlist_for_each_entry(pl, &p->bpf.progs[atype], node) {
447af6eea57SAndrii Nakryiko if (!prog_list_prog(pl))
448324bda9eSAlexei Starovoitov continue;
449394e40a2SRoman Gushchin
45000c4eddfSAndrii Nakryiko item = &progs->items[cnt];
451af6eea57SAndrii Nakryiko item->prog = prog_list_prog(pl);
45200c4eddfSAndrii Nakryiko bpf_cgroup_storages_assign(item->cgroup_storage,
45300c4eddfSAndrii Nakryiko pl->storage);
454394e40a2SRoman Gushchin cnt++;
455324bda9eSAlexei Starovoitov }
456394e40a2SRoman Gushchin } while ((p = cgroup_parent(p)));
457324bda9eSAlexei Starovoitov
458dbcc1ba2SStanislav Fomichev *array = progs;
459324bda9eSAlexei Starovoitov return 0;
460324bda9eSAlexei Starovoitov }
461324bda9eSAlexei Starovoitov
activate_effective_progs(struct cgroup * cgrp,enum cgroup_bpf_attach_type atype,struct bpf_prog_array * old_array)462324bda9eSAlexei Starovoitov static void activate_effective_progs(struct cgroup *cgrp,
4636fc88c35SDave Marchevsky enum cgroup_bpf_attach_type atype,
464dbcc1ba2SStanislav Fomichev struct bpf_prog_array *old_array)
465324bda9eSAlexei Starovoitov {
4666fc88c35SDave Marchevsky old_array = rcu_replace_pointer(cgrp->bpf.effective[atype], old_array,
467dbcc1ba2SStanislav Fomichev lockdep_is_held(&cgroup_mutex));
468324bda9eSAlexei Starovoitov /* free prog array after grace period, since __cgroup_bpf_run_*()
469324bda9eSAlexei Starovoitov * might be still walking the array
470324bda9eSAlexei Starovoitov */
471324bda9eSAlexei Starovoitov bpf_prog_array_free(old_array);
472324bda9eSAlexei Starovoitov }
473324bda9eSAlexei Starovoitov
47430070984SDaniel Mack /**
47530070984SDaniel Mack * cgroup_bpf_inherit() - inherit effective programs from parent
47630070984SDaniel Mack * @cgrp: the cgroup to modify
47730070984SDaniel Mack */
cgroup_bpf_inherit(struct cgroup * cgrp)478324bda9eSAlexei Starovoitov int cgroup_bpf_inherit(struct cgroup *cgrp)
47930070984SDaniel Mack {
480324bda9eSAlexei Starovoitov /* has to use marco instead of const int, since compiler thinks
481324bda9eSAlexei Starovoitov * that array below is variable length
482324bda9eSAlexei Starovoitov */
483324bda9eSAlexei Starovoitov #define NR ARRAY_SIZE(cgrp->bpf.effective)
484dbcc1ba2SStanislav Fomichev struct bpf_prog_array *arrays[NR] = {};
485e10360f8SRoman Gushchin struct cgroup *p;
4864bfc0bb2SRoman Gushchin int ret, i;
4874bfc0bb2SRoman Gushchin
4884bfc0bb2SRoman Gushchin ret = percpu_ref_init(&cgrp->bpf.refcnt, cgroup_bpf_release_fn, 0,
4894bfc0bb2SRoman Gushchin GFP_KERNEL);
4904bfc0bb2SRoman Gushchin if (ret)
4914bfc0bb2SRoman Gushchin return ret;
49230070984SDaniel Mack
493e10360f8SRoman Gushchin for (p = cgroup_parent(cgrp); p; p = cgroup_parent(p))
494e10360f8SRoman Gushchin cgroup_bpf_get(p);
495e10360f8SRoman Gushchin
496324bda9eSAlexei Starovoitov for (i = 0; i < NR; i++)
49700442143SStanislav Fomichev INIT_HLIST_HEAD(&cgrp->bpf.progs[i]);
49830070984SDaniel Mack
4997d9c3427SYiFei Zhu INIT_LIST_HEAD(&cgrp->bpf.storages);
5007d9c3427SYiFei Zhu
501324bda9eSAlexei Starovoitov for (i = 0; i < NR; i++)
502324bda9eSAlexei Starovoitov if (compute_effective_progs(cgrp, i, &arrays[i]))
503324bda9eSAlexei Starovoitov goto cleanup;
504324bda9eSAlexei Starovoitov
505324bda9eSAlexei Starovoitov for (i = 0; i < NR; i++)
506324bda9eSAlexei Starovoitov activate_effective_progs(cgrp, i, arrays[i]);
507324bda9eSAlexei Starovoitov
508324bda9eSAlexei Starovoitov return 0;
509324bda9eSAlexei Starovoitov cleanup:
510324bda9eSAlexei Starovoitov for (i = 0; i < NR; i++)
511324bda9eSAlexei Starovoitov bpf_prog_array_free(arrays[i]);
5124bfc0bb2SRoman Gushchin
5131d8006abSAndrii Nakryiko for (p = cgroup_parent(cgrp); p; p = cgroup_parent(p))
5141d8006abSAndrii Nakryiko cgroup_bpf_put(p);
5151d8006abSAndrii Nakryiko
5164bfc0bb2SRoman Gushchin percpu_ref_exit(&cgrp->bpf.refcnt);
5174bfc0bb2SRoman Gushchin
518324bda9eSAlexei Starovoitov return -ENOMEM;
51930070984SDaniel Mack }
520324bda9eSAlexei Starovoitov
update_effective_progs(struct cgroup * cgrp,enum cgroup_bpf_attach_type atype)52185fc4b16SRoman Gushchin static int update_effective_progs(struct cgroup *cgrp,
5226fc88c35SDave Marchevsky enum cgroup_bpf_attach_type atype)
52385fc4b16SRoman Gushchin {
52485fc4b16SRoman Gushchin struct cgroup_subsys_state *css;
52585fc4b16SRoman Gushchin int err;
52685fc4b16SRoman Gushchin
52785fc4b16SRoman Gushchin /* allocate and recompute effective prog arrays */
52885fc4b16SRoman Gushchin css_for_each_descendant_pre(css, &cgrp->self) {
52985fc4b16SRoman Gushchin struct cgroup *desc = container_of(css, struct cgroup, self);
53085fc4b16SRoman Gushchin
531e5c891a3SRoman Gushchin if (percpu_ref_is_zero(&desc->bpf.refcnt))
532e5c891a3SRoman Gushchin continue;
533e5c891a3SRoman Gushchin
5346fc88c35SDave Marchevsky err = compute_effective_progs(desc, atype, &desc->bpf.inactive);
53585fc4b16SRoman Gushchin if (err)
53685fc4b16SRoman Gushchin goto cleanup;
53785fc4b16SRoman Gushchin }
53885fc4b16SRoman Gushchin
53985fc4b16SRoman Gushchin /* all allocations were successful. Activate all prog arrays */
54085fc4b16SRoman Gushchin css_for_each_descendant_pre(css, &cgrp->self) {
54185fc4b16SRoman Gushchin struct cgroup *desc = container_of(css, struct cgroup, self);
54285fc4b16SRoman Gushchin
543e5c891a3SRoman Gushchin if (percpu_ref_is_zero(&desc->bpf.refcnt)) {
544e5c891a3SRoman Gushchin if (unlikely(desc->bpf.inactive)) {
545e5c891a3SRoman Gushchin bpf_prog_array_free(desc->bpf.inactive);
546e5c891a3SRoman Gushchin desc->bpf.inactive = NULL;
547e5c891a3SRoman Gushchin }
548e5c891a3SRoman Gushchin continue;
549e5c891a3SRoman Gushchin }
550e5c891a3SRoman Gushchin
5516fc88c35SDave Marchevsky activate_effective_progs(desc, atype, desc->bpf.inactive);
55285fc4b16SRoman Gushchin desc->bpf.inactive = NULL;
55385fc4b16SRoman Gushchin }
55485fc4b16SRoman Gushchin
55585fc4b16SRoman Gushchin return 0;
55685fc4b16SRoman Gushchin
55785fc4b16SRoman Gushchin cleanup:
55885fc4b16SRoman Gushchin /* oom while computing effective. Free all computed effective arrays
55985fc4b16SRoman Gushchin * since they were not activated
56085fc4b16SRoman Gushchin */
56185fc4b16SRoman Gushchin css_for_each_descendant_pre(css, &cgrp->self) {
56285fc4b16SRoman Gushchin struct cgroup *desc = container_of(css, struct cgroup, self);
56385fc4b16SRoman Gushchin
56485fc4b16SRoman Gushchin bpf_prog_array_free(desc->bpf.inactive);
56585fc4b16SRoman Gushchin desc->bpf.inactive = NULL;
56685fc4b16SRoman Gushchin }
56785fc4b16SRoman Gushchin
56885fc4b16SRoman Gushchin return err;
56985fc4b16SRoman Gushchin }
57085fc4b16SRoman Gushchin
571324bda9eSAlexei Starovoitov #define BPF_CGROUP_MAX_PROGS 64
57230070984SDaniel Mack
find_attach_entry(struct hlist_head * progs,struct bpf_prog * prog,struct bpf_cgroup_link * link,struct bpf_prog * replace_prog,bool allow_multi)57300442143SStanislav Fomichev static struct bpf_prog_list *find_attach_entry(struct hlist_head *progs,
574af6eea57SAndrii Nakryiko struct bpf_prog *prog,
575af6eea57SAndrii Nakryiko struct bpf_cgroup_link *link,
576af6eea57SAndrii Nakryiko struct bpf_prog *replace_prog,
577af6eea57SAndrii Nakryiko bool allow_multi)
578af6eea57SAndrii Nakryiko {
579af6eea57SAndrii Nakryiko struct bpf_prog_list *pl;
580af6eea57SAndrii Nakryiko
581af6eea57SAndrii Nakryiko /* single-attach case */
582af6eea57SAndrii Nakryiko if (!allow_multi) {
58300442143SStanislav Fomichev if (hlist_empty(progs))
584af6eea57SAndrii Nakryiko return NULL;
58500442143SStanislav Fomichev return hlist_entry(progs->first, typeof(*pl), node);
586af6eea57SAndrii Nakryiko }
587af6eea57SAndrii Nakryiko
58800442143SStanislav Fomichev hlist_for_each_entry(pl, progs, node) {
589248e00acSLorenz Bauer if (prog && pl->prog == prog && prog != replace_prog)
590af6eea57SAndrii Nakryiko /* disallow attaching the same prog twice */
591af6eea57SAndrii Nakryiko return ERR_PTR(-EINVAL);
592af6eea57SAndrii Nakryiko if (link && pl->link == link)
593af6eea57SAndrii Nakryiko /* disallow attaching the same link twice */
594af6eea57SAndrii Nakryiko return ERR_PTR(-EINVAL);
595af6eea57SAndrii Nakryiko }
596af6eea57SAndrii Nakryiko
597af6eea57SAndrii Nakryiko /* direct prog multi-attach w/ replacement case */
598af6eea57SAndrii Nakryiko if (replace_prog) {
59900442143SStanislav Fomichev hlist_for_each_entry(pl, progs, node) {
600af6eea57SAndrii Nakryiko if (pl->prog == replace_prog)
601af6eea57SAndrii Nakryiko /* a match found */
602af6eea57SAndrii Nakryiko return pl;
603af6eea57SAndrii Nakryiko }
604af6eea57SAndrii Nakryiko /* prog to replace not found for cgroup */
605af6eea57SAndrii Nakryiko return ERR_PTR(-ENOENT);
606af6eea57SAndrii Nakryiko }
607af6eea57SAndrii Nakryiko
608af6eea57SAndrii Nakryiko return NULL;
609af6eea57SAndrii Nakryiko }
610af6eea57SAndrii Nakryiko
61130070984SDaniel Mack /**
612af6eea57SAndrii Nakryiko * __cgroup_bpf_attach() - Attach the program or the link to a cgroup, and
61330070984SDaniel Mack * propagate the change to descendants
61430070984SDaniel Mack * @cgrp: The cgroup which descendants to traverse
615324bda9eSAlexei Starovoitov * @prog: A program to attach
616af6eea57SAndrii Nakryiko * @link: A link to attach
6177dd68b32SAndrey Ignatov * @replace_prog: Previously attached program to replace if BPF_F_REPLACE is set
618324bda9eSAlexei Starovoitov * @type: Type of attach operation
6191832f4efSValdis Kletnieks * @flags: Option flags
62030070984SDaniel Mack *
621af6eea57SAndrii Nakryiko * Exactly one of @prog or @link can be non-null.
62230070984SDaniel Mack * Must be called with cgroup_mutex held.
62330070984SDaniel Mack */
__cgroup_bpf_attach(struct cgroup * cgrp,struct bpf_prog * prog,struct bpf_prog * replace_prog,struct bpf_cgroup_link * link,enum bpf_attach_type type,u32 flags)624588e5d87SHe Fengqing static int __cgroup_bpf_attach(struct cgroup *cgrp,
625af6eea57SAndrii Nakryiko struct bpf_prog *prog, struct bpf_prog *replace_prog,
626af6eea57SAndrii Nakryiko struct bpf_cgroup_link *link,
627324bda9eSAlexei Starovoitov enum bpf_attach_type type, u32 flags)
62830070984SDaniel Mack {
6297dd68b32SAndrey Ignatov u32 saved_flags = (flags & (BPF_F_ALLOW_OVERRIDE | BPF_F_ALLOW_MULTI));
630324bda9eSAlexei Starovoitov struct bpf_prog *old_prog = NULL;
63162039c30SAndrii Nakryiko struct bpf_cgroup_storage *storage[MAX_BPF_CGROUP_STORAGE_TYPE] = {};
6327d9c3427SYiFei Zhu struct bpf_cgroup_storage *new_storage[MAX_BPF_CGROUP_STORAGE_TYPE] = {};
63369fd337aSStanislav Fomichev struct bpf_prog *new_prog = prog ? : link->link.prog;
6346fc88c35SDave Marchevsky enum cgroup_bpf_attach_type atype;
635af6eea57SAndrii Nakryiko struct bpf_prog_list *pl;
63600442143SStanislav Fomichev struct hlist_head *progs;
637324bda9eSAlexei Starovoitov int err;
63830070984SDaniel Mack
6397dd68b32SAndrey Ignatov if (((flags & BPF_F_ALLOW_OVERRIDE) && (flags & BPF_F_ALLOW_MULTI)) ||
6407dd68b32SAndrey Ignatov ((flags & BPF_F_REPLACE) && !(flags & BPF_F_ALLOW_MULTI)))
641324bda9eSAlexei Starovoitov /* invalid combination */
642324bda9eSAlexei Starovoitov return -EINVAL;
643af6eea57SAndrii Nakryiko if (link && (prog || replace_prog))
644af6eea57SAndrii Nakryiko /* only either link or prog/replace_prog can be specified */
645af6eea57SAndrii Nakryiko return -EINVAL;
646af6eea57SAndrii Nakryiko if (!!replace_prog != !!(flags & BPF_F_REPLACE))
647af6eea57SAndrii Nakryiko /* replace_prog implies BPF_F_REPLACE, and vice versa */
648af6eea57SAndrii Nakryiko return -EINVAL;
64930070984SDaniel Mack
65069fd337aSStanislav Fomichev atype = bpf_cgroup_atype_find(type, new_prog->aux->attach_btf_id);
6516fc88c35SDave Marchevsky if (atype < 0)
6526fc88c35SDave Marchevsky return -EINVAL;
6536fc88c35SDave Marchevsky
6546fc88c35SDave Marchevsky progs = &cgrp->bpf.progs[atype];
6556fc88c35SDave Marchevsky
6566fc88c35SDave Marchevsky if (!hierarchy_allows_attach(cgrp, atype))
657324bda9eSAlexei Starovoitov return -EPERM;
658324bda9eSAlexei Starovoitov
65900442143SStanislav Fomichev if (!hlist_empty(progs) && cgrp->bpf.flags[atype] != saved_flags)
660324bda9eSAlexei Starovoitov /* Disallow attaching non-overridable on top
661324bda9eSAlexei Starovoitov * of existing overridable in this cgroup.
662324bda9eSAlexei Starovoitov * Disallow attaching multi-prog if overridable or none
6637f677633SAlexei Starovoitov */
6647f677633SAlexei Starovoitov return -EPERM;
6657f677633SAlexei Starovoitov
666324bda9eSAlexei Starovoitov if (prog_list_length(progs) >= BPF_CGROUP_MAX_PROGS)
667324bda9eSAlexei Starovoitov return -E2BIG;
6687f677633SAlexei Starovoitov
669af6eea57SAndrii Nakryiko pl = find_attach_entry(progs, prog, link, replace_prog,
670af6eea57SAndrii Nakryiko flags & BPF_F_ALLOW_MULTI);
671af6eea57SAndrii Nakryiko if (IS_ERR(pl))
672af6eea57SAndrii Nakryiko return PTR_ERR(pl);
6731020c1f2SAndrey Ignatov
6747d9c3427SYiFei Zhu if (bpf_cgroup_storages_alloc(storage, new_storage, type,
6757d9c3427SYiFei Zhu prog ? : link->link.prog, cgrp))
676d7bf2c10SRoman Gushchin return -ENOMEM;
677d7bf2c10SRoman Gushchin
678af6eea57SAndrii Nakryiko if (pl) {
679324bda9eSAlexei Starovoitov old_prog = pl->prog;
6801020c1f2SAndrey Ignatov } else {
68100442143SStanislav Fomichev struct hlist_node *last = NULL;
68200442143SStanislav Fomichev
6831020c1f2SAndrey Ignatov pl = kmalloc(sizeof(*pl), GFP_KERNEL);
6841020c1f2SAndrey Ignatov if (!pl) {
6857d9c3427SYiFei Zhu bpf_cgroup_storages_free(new_storage);
6861020c1f2SAndrey Ignatov return -ENOMEM;
6877f677633SAlexei Starovoitov }
68800442143SStanislav Fomichev if (hlist_empty(progs))
68900442143SStanislav Fomichev hlist_add_head(&pl->node, progs);
69000442143SStanislav Fomichev else
69100442143SStanislav Fomichev hlist_for_each(last, progs) {
69200442143SStanislav Fomichev if (last->next)
69300442143SStanislav Fomichev continue;
69400442143SStanislav Fomichev hlist_add_behind(&pl->node, last);
69500442143SStanislav Fomichev break;
69600442143SStanislav Fomichev }
6971020c1f2SAndrey Ignatov }
6981020c1f2SAndrey Ignatov
699324bda9eSAlexei Starovoitov pl->prog = prog;
700af6eea57SAndrii Nakryiko pl->link = link;
70100c4eddfSAndrii Nakryiko bpf_cgroup_storages_assign(pl->storage, storage);
7026fc88c35SDave Marchevsky cgrp->bpf.flags[atype] = saved_flags;
703324bda9eSAlexei Starovoitov
70469fd337aSStanislav Fomichev if (type == BPF_LSM_CGROUP) {
70569fd337aSStanislav Fomichev err = bpf_trampoline_link_cgroup_shim(new_prog, atype);
706324bda9eSAlexei Starovoitov if (err)
707324bda9eSAlexei Starovoitov goto cleanup;
70869fd337aSStanislav Fomichev }
709324bda9eSAlexei Starovoitov
71069fd337aSStanislav Fomichev err = update_effective_progs(cgrp, atype);
71169fd337aSStanislav Fomichev if (err)
71269fd337aSStanislav Fomichev goto cleanup_trampoline;
71369fd337aSStanislav Fomichev
71469fd337aSStanislav Fomichev if (old_prog) {
71569fd337aSStanislav Fomichev if (type == BPF_LSM_CGROUP)
71669fd337aSStanislav Fomichev bpf_trampoline_unlink_cgroup_shim(old_prog);
71730070984SDaniel Mack bpf_prog_put(old_prog);
71869fd337aSStanislav Fomichev } else {
7196fc88c35SDave Marchevsky static_branch_inc(&cgroup_bpf_enabled_key[atype]);
72069fd337aSStanislav Fomichev }
7217d9c3427SYiFei Zhu bpf_cgroup_storages_link(new_storage, cgrp, type);
7227f677633SAlexei Starovoitov return 0;
723324bda9eSAlexei Starovoitov
72469fd337aSStanislav Fomichev cleanup_trampoline:
72569fd337aSStanislav Fomichev if (type == BPF_LSM_CGROUP)
72669fd337aSStanislav Fomichev bpf_trampoline_unlink_cgroup_shim(new_prog);
72769fd337aSStanislav Fomichev
728324bda9eSAlexei Starovoitov cleanup:
729af6eea57SAndrii Nakryiko if (old_prog) {
730324bda9eSAlexei Starovoitov pl->prog = old_prog;
731af6eea57SAndrii Nakryiko pl->link = NULL;
7328bad74f9SRoman Gushchin }
7337d9c3427SYiFei Zhu bpf_cgroup_storages_free(new_storage);
734af6eea57SAndrii Nakryiko if (!old_prog) {
73500442143SStanislav Fomichev hlist_del(&pl->node);
736324bda9eSAlexei Starovoitov kfree(pl);
737324bda9eSAlexei Starovoitov }
738324bda9eSAlexei Starovoitov return err;
739324bda9eSAlexei Starovoitov }
740324bda9eSAlexei Starovoitov
cgroup_bpf_attach(struct cgroup * cgrp,struct bpf_prog * prog,struct bpf_prog * replace_prog,struct bpf_cgroup_link * link,enum bpf_attach_type type,u32 flags)741588e5d87SHe Fengqing static int cgroup_bpf_attach(struct cgroup *cgrp,
742588e5d87SHe Fengqing struct bpf_prog *prog, struct bpf_prog *replace_prog,
743588e5d87SHe Fengqing struct bpf_cgroup_link *link,
744588e5d87SHe Fengqing enum bpf_attach_type type,
745588e5d87SHe Fengqing u32 flags)
746588e5d87SHe Fengqing {
747588e5d87SHe Fengqing int ret;
748588e5d87SHe Fengqing
7494cdb91b0SKamalesh Babulal cgroup_lock();
750588e5d87SHe Fengqing ret = __cgroup_bpf_attach(cgrp, prog, replace_prog, link, type, flags);
7514cdb91b0SKamalesh Babulal cgroup_unlock();
752588e5d87SHe Fengqing return ret;
753588e5d87SHe Fengqing }
754588e5d87SHe Fengqing
7550c991ebcSAndrii Nakryiko /* Swap updated BPF program for given link in effective program arrays across
7560c991ebcSAndrii Nakryiko * all descendant cgroups. This function is guaranteed to succeed.
7570c991ebcSAndrii Nakryiko */
replace_effective_prog(struct cgroup * cgrp,enum cgroup_bpf_attach_type atype,struct bpf_cgroup_link * link)7580c991ebcSAndrii Nakryiko static void replace_effective_prog(struct cgroup *cgrp,
7596fc88c35SDave Marchevsky enum cgroup_bpf_attach_type atype,
7600c991ebcSAndrii Nakryiko struct bpf_cgroup_link *link)
7610c991ebcSAndrii Nakryiko {
7620c991ebcSAndrii Nakryiko struct bpf_prog_array_item *item;
7630c991ebcSAndrii Nakryiko struct cgroup_subsys_state *css;
7640c991ebcSAndrii Nakryiko struct bpf_prog_array *progs;
7650c991ebcSAndrii Nakryiko struct bpf_prog_list *pl;
76600442143SStanislav Fomichev struct hlist_head *head;
7670c991ebcSAndrii Nakryiko struct cgroup *cg;
7680c991ebcSAndrii Nakryiko int pos;
7690c991ebcSAndrii Nakryiko
7700c991ebcSAndrii Nakryiko css_for_each_descendant_pre(css, &cgrp->self) {
7710c991ebcSAndrii Nakryiko struct cgroup *desc = container_of(css, struct cgroup, self);
7720c991ebcSAndrii Nakryiko
7730c991ebcSAndrii Nakryiko if (percpu_ref_is_zero(&desc->bpf.refcnt))
7740c991ebcSAndrii Nakryiko continue;
7750c991ebcSAndrii Nakryiko
7760c991ebcSAndrii Nakryiko /* find position of link in effective progs array */
7770c991ebcSAndrii Nakryiko for (pos = 0, cg = desc; cg; cg = cgroup_parent(cg)) {
7786fc88c35SDave Marchevsky if (pos && !(cg->bpf.flags[atype] & BPF_F_ALLOW_MULTI))
7790c991ebcSAndrii Nakryiko continue;
7800c991ebcSAndrii Nakryiko
7816fc88c35SDave Marchevsky head = &cg->bpf.progs[atype];
78200442143SStanislav Fomichev hlist_for_each_entry(pl, head, node) {
7830c991ebcSAndrii Nakryiko if (!prog_list_prog(pl))
7840c991ebcSAndrii Nakryiko continue;
7850c991ebcSAndrii Nakryiko if (pl->link == link)
7860c991ebcSAndrii Nakryiko goto found;
7870c991ebcSAndrii Nakryiko pos++;
7880c991ebcSAndrii Nakryiko }
7890c991ebcSAndrii Nakryiko }
7900c991ebcSAndrii Nakryiko found:
7910c991ebcSAndrii Nakryiko BUG_ON(!cg);
7920c991ebcSAndrii Nakryiko progs = rcu_dereference_protected(
7936fc88c35SDave Marchevsky desc->bpf.effective[atype],
7940c991ebcSAndrii Nakryiko lockdep_is_held(&cgroup_mutex));
7950c991ebcSAndrii Nakryiko item = &progs->items[pos];
7960c991ebcSAndrii Nakryiko WRITE_ONCE(item->prog, link->link.prog);
7970c991ebcSAndrii Nakryiko }
7980c991ebcSAndrii Nakryiko }
7990c991ebcSAndrii Nakryiko
800324bda9eSAlexei Starovoitov /**
8010c991ebcSAndrii Nakryiko * __cgroup_bpf_replace() - Replace link's program and propagate the change
8020c991ebcSAndrii Nakryiko * to descendants
803324bda9eSAlexei Starovoitov * @cgrp: The cgroup which descendants to traverse
8040c991ebcSAndrii Nakryiko * @link: A link for which to replace BPF program
805214bfd26SRandy Dunlap * @new_prog: &struct bpf_prog for the target BPF program with its refcnt
806214bfd26SRandy Dunlap * incremented
807324bda9eSAlexei Starovoitov *
808324bda9eSAlexei Starovoitov * Must be called with cgroup_mutex held.
809324bda9eSAlexei Starovoitov */
__cgroup_bpf_replace(struct cgroup * cgrp,struct bpf_cgroup_link * link,struct bpf_prog * new_prog)810f9d04127SAndrii Nakryiko static int __cgroup_bpf_replace(struct cgroup *cgrp,
811f9d04127SAndrii Nakryiko struct bpf_cgroup_link *link,
8120c991ebcSAndrii Nakryiko struct bpf_prog *new_prog)
813324bda9eSAlexei Starovoitov {
8146fc88c35SDave Marchevsky enum cgroup_bpf_attach_type atype;
8150c991ebcSAndrii Nakryiko struct bpf_prog *old_prog;
816324bda9eSAlexei Starovoitov struct bpf_prog_list *pl;
81700442143SStanislav Fomichev struct hlist_head *progs;
8180c991ebcSAndrii Nakryiko bool found = false;
819324bda9eSAlexei Starovoitov
82069fd337aSStanislav Fomichev atype = bpf_cgroup_atype_find(link->type, new_prog->aux->attach_btf_id);
8216fc88c35SDave Marchevsky if (atype < 0)
8226fc88c35SDave Marchevsky return -EINVAL;
8236fc88c35SDave Marchevsky
8246fc88c35SDave Marchevsky progs = &cgrp->bpf.progs[atype];
8256fc88c35SDave Marchevsky
8260c991ebcSAndrii Nakryiko if (link->link.prog->type != new_prog->type)
827324bda9eSAlexei Starovoitov return -EINVAL;
828324bda9eSAlexei Starovoitov
82900442143SStanislav Fomichev hlist_for_each_entry(pl, progs, node) {
8300c991ebcSAndrii Nakryiko if (pl->link == link) {
8310c991ebcSAndrii Nakryiko found = true;
832324bda9eSAlexei Starovoitov break;
833324bda9eSAlexei Starovoitov }
8340c991ebcSAndrii Nakryiko }
8350c991ebcSAndrii Nakryiko if (!found)
836324bda9eSAlexei Starovoitov return -ENOENT;
8370c991ebcSAndrii Nakryiko
8380c991ebcSAndrii Nakryiko old_prog = xchg(&link->link.prog, new_prog);
8396fc88c35SDave Marchevsky replace_effective_prog(cgrp, atype, link);
8400c991ebcSAndrii Nakryiko bpf_prog_put(old_prog);
8410c991ebcSAndrii Nakryiko return 0;
8420c991ebcSAndrii Nakryiko }
8430c991ebcSAndrii Nakryiko
cgroup_bpf_replace(struct bpf_link * link,struct bpf_prog * new_prog,struct bpf_prog * old_prog)844f9d04127SAndrii Nakryiko static int cgroup_bpf_replace(struct bpf_link *link, struct bpf_prog *new_prog,
845f9d04127SAndrii Nakryiko struct bpf_prog *old_prog)
846f9d04127SAndrii Nakryiko {
847f9d04127SAndrii Nakryiko struct bpf_cgroup_link *cg_link;
848f9d04127SAndrii Nakryiko int ret;
849f9d04127SAndrii Nakryiko
850f9d04127SAndrii Nakryiko cg_link = container_of(link, struct bpf_cgroup_link, link);
851f9d04127SAndrii Nakryiko
8524cdb91b0SKamalesh Babulal cgroup_lock();
853f9d04127SAndrii Nakryiko /* link might have been auto-released by dying cgroup, so fail */
854f9d04127SAndrii Nakryiko if (!cg_link->cgroup) {
8550c047ecbSJakub Sitnicki ret = -ENOLINK;
856f9d04127SAndrii Nakryiko goto out_unlock;
857f9d04127SAndrii Nakryiko }
858f9d04127SAndrii Nakryiko if (old_prog && link->prog != old_prog) {
859f9d04127SAndrii Nakryiko ret = -EPERM;
860f9d04127SAndrii Nakryiko goto out_unlock;
861f9d04127SAndrii Nakryiko }
862f9d04127SAndrii Nakryiko ret = __cgroup_bpf_replace(cg_link->cgroup, cg_link, new_prog);
863f9d04127SAndrii Nakryiko out_unlock:
8644cdb91b0SKamalesh Babulal cgroup_unlock();
865f9d04127SAndrii Nakryiko return ret;
866f9d04127SAndrii Nakryiko }
867f9d04127SAndrii Nakryiko
find_detach_entry(struct hlist_head * progs,struct bpf_prog * prog,struct bpf_cgroup_link * link,bool allow_multi)86800442143SStanislav Fomichev static struct bpf_prog_list *find_detach_entry(struct hlist_head *progs,
869af6eea57SAndrii Nakryiko struct bpf_prog *prog,
870af6eea57SAndrii Nakryiko struct bpf_cgroup_link *link,
871af6eea57SAndrii Nakryiko bool allow_multi)
872af6eea57SAndrii Nakryiko {
873af6eea57SAndrii Nakryiko struct bpf_prog_list *pl;
874af6eea57SAndrii Nakryiko
875af6eea57SAndrii Nakryiko if (!allow_multi) {
87600442143SStanislav Fomichev if (hlist_empty(progs))
877af6eea57SAndrii Nakryiko /* report error when trying to detach and nothing is attached */
878af6eea57SAndrii Nakryiko return ERR_PTR(-ENOENT);
879af6eea57SAndrii Nakryiko
880324bda9eSAlexei Starovoitov /* to maintain backward compatibility NONE and OVERRIDE cgroups
881af6eea57SAndrii Nakryiko * allow detaching with invalid FD (prog==NULL) in legacy mode
882324bda9eSAlexei Starovoitov */
88300442143SStanislav Fomichev return hlist_entry(progs->first, typeof(*pl), node);
884af6eea57SAndrii Nakryiko }
885af6eea57SAndrii Nakryiko
886af6eea57SAndrii Nakryiko if (!prog && !link)
887af6eea57SAndrii Nakryiko /* to detach MULTI prog the user has to specify valid FD
888af6eea57SAndrii Nakryiko * of the program or link to be detached
889af6eea57SAndrii Nakryiko */
890af6eea57SAndrii Nakryiko return ERR_PTR(-EINVAL);
891af6eea57SAndrii Nakryiko
892af6eea57SAndrii Nakryiko /* find the prog or link and detach it */
89300442143SStanislav Fomichev hlist_for_each_entry(pl, progs, node) {
894af6eea57SAndrii Nakryiko if (pl->prog == prog && pl->link == link)
895af6eea57SAndrii Nakryiko return pl;
896af6eea57SAndrii Nakryiko }
897af6eea57SAndrii Nakryiko return ERR_PTR(-ENOENT);
898af6eea57SAndrii Nakryiko }
899af6eea57SAndrii Nakryiko
900324bda9eSAlexei Starovoitov /**
9014c46091eSTadeusz Struk * purge_effective_progs() - After compute_effective_progs fails to alloc new
9024c46091eSTadeusz Struk * cgrp->bpf.inactive table we can recover by
9034c46091eSTadeusz Struk * recomputing the array in place.
9044c46091eSTadeusz Struk *
9054c46091eSTadeusz Struk * @cgrp: The cgroup which descendants to travers
9064c46091eSTadeusz Struk * @prog: A program to detach or NULL
9074c46091eSTadeusz Struk * @link: A link to detach or NULL
9084c46091eSTadeusz Struk * @atype: Type of detach operation
9094c46091eSTadeusz Struk */
purge_effective_progs(struct cgroup * cgrp,struct bpf_prog * prog,struct bpf_cgroup_link * link,enum cgroup_bpf_attach_type atype)9104c46091eSTadeusz Struk static void purge_effective_progs(struct cgroup *cgrp, struct bpf_prog *prog,
9114c46091eSTadeusz Struk struct bpf_cgroup_link *link,
9124c46091eSTadeusz Struk enum cgroup_bpf_attach_type atype)
9134c46091eSTadeusz Struk {
9144c46091eSTadeusz Struk struct cgroup_subsys_state *css;
9154c46091eSTadeusz Struk struct bpf_prog_array *progs;
9164c46091eSTadeusz Struk struct bpf_prog_list *pl;
91700442143SStanislav Fomichev struct hlist_head *head;
9184c46091eSTadeusz Struk struct cgroup *cg;
9194c46091eSTadeusz Struk int pos;
9204c46091eSTadeusz Struk
9214c46091eSTadeusz Struk /* recompute effective prog array in place */
9224c46091eSTadeusz Struk css_for_each_descendant_pre(css, &cgrp->self) {
9234c46091eSTadeusz Struk struct cgroup *desc = container_of(css, struct cgroup, self);
9244c46091eSTadeusz Struk
9254c46091eSTadeusz Struk if (percpu_ref_is_zero(&desc->bpf.refcnt))
9264c46091eSTadeusz Struk continue;
9274c46091eSTadeusz Struk
9284c46091eSTadeusz Struk /* find position of link or prog in effective progs array */
9294c46091eSTadeusz Struk for (pos = 0, cg = desc; cg; cg = cgroup_parent(cg)) {
9304c46091eSTadeusz Struk if (pos && !(cg->bpf.flags[atype] & BPF_F_ALLOW_MULTI))
9314c46091eSTadeusz Struk continue;
9324c46091eSTadeusz Struk
9334c46091eSTadeusz Struk head = &cg->bpf.progs[atype];
93400442143SStanislav Fomichev hlist_for_each_entry(pl, head, node) {
9354c46091eSTadeusz Struk if (!prog_list_prog(pl))
9364c46091eSTadeusz Struk continue;
9374c46091eSTadeusz Struk if (pl->prog == prog && pl->link == link)
9384c46091eSTadeusz Struk goto found;
9394c46091eSTadeusz Struk pos++;
9404c46091eSTadeusz Struk }
9414c46091eSTadeusz Struk }
9427d6620f1SPu Lehui
9437d6620f1SPu Lehui /* no link or prog match, skip the cgroup of this layer */
9447d6620f1SPu Lehui continue;
9454c46091eSTadeusz Struk found:
9464c46091eSTadeusz Struk progs = rcu_dereference_protected(
9474c46091eSTadeusz Struk desc->bpf.effective[atype],
9484c46091eSTadeusz Struk lockdep_is_held(&cgroup_mutex));
9494c46091eSTadeusz Struk
9504c46091eSTadeusz Struk /* Remove the program from the array */
9514c46091eSTadeusz Struk WARN_ONCE(bpf_prog_array_delete_safe_at(progs, pos),
9524c46091eSTadeusz Struk "Failed to purge a prog from array at index %d", pos);
9534c46091eSTadeusz Struk }
9544c46091eSTadeusz Struk }
9554c46091eSTadeusz Struk
9564c46091eSTadeusz Struk /**
957af6eea57SAndrii Nakryiko * __cgroup_bpf_detach() - Detach the program or link from a cgroup, and
958324bda9eSAlexei Starovoitov * propagate the change to descendants
959324bda9eSAlexei Starovoitov * @cgrp: The cgroup which descendants to traverse
960324bda9eSAlexei Starovoitov * @prog: A program to detach or NULL
961588e5d87SHe Fengqing * @link: A link to detach or NULL
962324bda9eSAlexei Starovoitov * @type: Type of detach operation
963324bda9eSAlexei Starovoitov *
964af6eea57SAndrii Nakryiko * At most one of @prog or @link can be non-NULL.
965324bda9eSAlexei Starovoitov * Must be called with cgroup_mutex held.
966324bda9eSAlexei Starovoitov */
__cgroup_bpf_detach(struct cgroup * cgrp,struct bpf_prog * prog,struct bpf_cgroup_link * link,enum bpf_attach_type type)967588e5d87SHe Fengqing static int __cgroup_bpf_detach(struct cgroup *cgrp, struct bpf_prog *prog,
968af6eea57SAndrii Nakryiko struct bpf_cgroup_link *link, enum bpf_attach_type type)
969324bda9eSAlexei Starovoitov {
9706fc88c35SDave Marchevsky enum cgroup_bpf_attach_type atype;
971af6eea57SAndrii Nakryiko struct bpf_prog *old_prog;
9726fc88c35SDave Marchevsky struct bpf_prog_list *pl;
97300442143SStanislav Fomichev struct hlist_head *progs;
97469fd337aSStanislav Fomichev u32 attach_btf_id = 0;
9756fc88c35SDave Marchevsky u32 flags;
976324bda9eSAlexei Starovoitov
97769fd337aSStanislav Fomichev if (prog)
97869fd337aSStanislav Fomichev attach_btf_id = prog->aux->attach_btf_id;
97969fd337aSStanislav Fomichev if (link)
98069fd337aSStanislav Fomichev attach_btf_id = link->link.prog->aux->attach_btf_id;
98169fd337aSStanislav Fomichev
98269fd337aSStanislav Fomichev atype = bpf_cgroup_atype_find(type, attach_btf_id);
9836fc88c35SDave Marchevsky if (atype < 0)
9846fc88c35SDave Marchevsky return -EINVAL;
9856fc88c35SDave Marchevsky
9866fc88c35SDave Marchevsky progs = &cgrp->bpf.progs[atype];
9876fc88c35SDave Marchevsky flags = cgrp->bpf.flags[atype];
9886fc88c35SDave Marchevsky
989af6eea57SAndrii Nakryiko if (prog && link)
990af6eea57SAndrii Nakryiko /* only one of prog or link can be specified */
991324bda9eSAlexei Starovoitov return -EINVAL;
992324bda9eSAlexei Starovoitov
993af6eea57SAndrii Nakryiko pl = find_detach_entry(progs, prog, link, flags & BPF_F_ALLOW_MULTI);
994af6eea57SAndrii Nakryiko if (IS_ERR(pl))
995af6eea57SAndrii Nakryiko return PTR_ERR(pl);
996af6eea57SAndrii Nakryiko
997af6eea57SAndrii Nakryiko /* mark it deleted, so it's ignored while recomputing effective */
998324bda9eSAlexei Starovoitov old_prog = pl->prog;
999324bda9eSAlexei Starovoitov pl->prog = NULL;
1000af6eea57SAndrii Nakryiko pl->link = NULL;
1001324bda9eSAlexei Starovoitov
10024c46091eSTadeusz Struk if (update_effective_progs(cgrp, atype)) {
10034c46091eSTadeusz Struk /* if update effective array failed replace the prog with a dummy prog*/
10044c46091eSTadeusz Struk pl->prog = old_prog;
10054c46091eSTadeusz Struk pl->link = link;
10064c46091eSTadeusz Struk purge_effective_progs(cgrp, old_prog, link, atype);
10074c46091eSTadeusz Struk }
1008324bda9eSAlexei Starovoitov
1009324bda9eSAlexei Starovoitov /* now can actually delete it from this cgroup list */
101000442143SStanislav Fomichev hlist_del(&pl->node);
101100442143SStanislav Fomichev
1012324bda9eSAlexei Starovoitov kfree(pl);
101300442143SStanislav Fomichev if (hlist_empty(progs))
1014324bda9eSAlexei Starovoitov /* last program was detached, reset flags to zero */
10156fc88c35SDave Marchevsky cgrp->bpf.flags[atype] = 0;
101669fd337aSStanislav Fomichev if (old_prog) {
101769fd337aSStanislav Fomichev if (type == BPF_LSM_CGROUP)
101869fd337aSStanislav Fomichev bpf_trampoline_unlink_cgroup_shim(old_prog);
1019324bda9eSAlexei Starovoitov bpf_prog_put(old_prog);
102069fd337aSStanislav Fomichev }
10216fc88c35SDave Marchevsky static_branch_dec(&cgroup_bpf_enabled_key[atype]);
1022324bda9eSAlexei Starovoitov return 0;
102330070984SDaniel Mack }
102430070984SDaniel Mack
cgroup_bpf_detach(struct cgroup * cgrp,struct bpf_prog * prog,enum bpf_attach_type type)1025588e5d87SHe Fengqing static int cgroup_bpf_detach(struct cgroup *cgrp, struct bpf_prog *prog,
1026588e5d87SHe Fengqing enum bpf_attach_type type)
1027588e5d87SHe Fengqing {
1028588e5d87SHe Fengqing int ret;
1029588e5d87SHe Fengqing
10304cdb91b0SKamalesh Babulal cgroup_lock();
1031588e5d87SHe Fengqing ret = __cgroup_bpf_detach(cgrp, prog, NULL, type);
10324cdb91b0SKamalesh Babulal cgroup_unlock();
1033588e5d87SHe Fengqing return ret;
1034588e5d87SHe Fengqing }
1035588e5d87SHe Fengqing
1036468e2f64SAlexei Starovoitov /* Must be called with cgroup_mutex held to avoid races. */
__cgroup_bpf_query(struct cgroup * cgrp,const union bpf_attr * attr,union bpf_attr __user * uattr)1037588e5d87SHe Fengqing static int __cgroup_bpf_query(struct cgroup *cgrp, const union bpf_attr *attr,
1038468e2f64SAlexei Starovoitov union bpf_attr __user *uattr)
1039468e2f64SAlexei Starovoitov {
1040b79c9fc9SStanislav Fomichev __u32 __user *prog_attach_flags = u64_to_user_ptr(attr->query.prog_attach_flags);
10410e426a3aSPu Lehui bool effective_query = attr->query.query_flags & BPF_F_QUERY_EFFECTIVE;
1042468e2f64SAlexei Starovoitov __u32 __user *prog_ids = u64_to_user_ptr(attr->query.prog_ids);
1043468e2f64SAlexei Starovoitov enum bpf_attach_type type = attr->query.attach_type;
1044b79c9fc9SStanislav Fomichev enum cgroup_bpf_attach_type from_atype, to_atype;
10456fc88c35SDave Marchevsky enum cgroup_bpf_attach_type atype;
1046dbcc1ba2SStanislav Fomichev struct bpf_prog_array *effective;
1047468e2f64SAlexei Starovoitov int cnt, ret = 0, i;
1048b79c9fc9SStanislav Fomichev int total_cnt = 0;
10496fc88c35SDave Marchevsky u32 flags;
1050468e2f64SAlexei Starovoitov
10510e426a3aSPu Lehui if (effective_query && prog_attach_flags)
10520e426a3aSPu Lehui return -EINVAL;
10530e426a3aSPu Lehui
1054b79c9fc9SStanislav Fomichev if (type == BPF_LSM_CGROUP) {
10550e426a3aSPu Lehui if (!effective_query && attr->query.prog_cnt &&
10560e426a3aSPu Lehui prog_ids && !prog_attach_flags)
10576fc88c35SDave Marchevsky return -EINVAL;
10586fc88c35SDave Marchevsky
1059b79c9fc9SStanislav Fomichev from_atype = CGROUP_LSM_START;
1060b79c9fc9SStanislav Fomichev to_atype = CGROUP_LSM_END;
1061b79c9fc9SStanislav Fomichev flags = 0;
1062b79c9fc9SStanislav Fomichev } else {
1063b79c9fc9SStanislav Fomichev from_atype = to_cgroup_bpf_attach_type(type);
1064b79c9fc9SStanislav Fomichev if (from_atype < 0)
1065b79c9fc9SStanislav Fomichev return -EINVAL;
1066b79c9fc9SStanislav Fomichev to_atype = from_atype;
1067b79c9fc9SStanislav Fomichev flags = cgrp->bpf.flags[from_atype];
1068b79c9fc9SStanislav Fomichev }
10696fc88c35SDave Marchevsky
1070b79c9fc9SStanislav Fomichev for (atype = from_atype; atype <= to_atype; atype++) {
10710e426a3aSPu Lehui if (effective_query) {
10726fc88c35SDave Marchevsky effective = rcu_dereference_protected(cgrp->bpf.effective[atype],
1073dbcc1ba2SStanislav Fomichev lockdep_is_held(&cgroup_mutex));
1074b79c9fc9SStanislav Fomichev total_cnt += bpf_prog_array_length(effective);
1075b79c9fc9SStanislav Fomichev } else {
1076b79c9fc9SStanislav Fomichev total_cnt += prog_list_length(&cgrp->bpf.progs[atype]);
1077b79c9fc9SStanislav Fomichev }
1078b79c9fc9SStanislav Fomichev }
1079468e2f64SAlexei Starovoitov
10800e426a3aSPu Lehui /* always output uattr->query.attach_flags as 0 during effective query */
10810e426a3aSPu Lehui flags = effective_query ? 0 : flags;
1082468e2f64SAlexei Starovoitov if (copy_to_user(&uattr->query.attach_flags, &flags, sizeof(flags)))
1083468e2f64SAlexei Starovoitov return -EFAULT;
1084b79c9fc9SStanislav Fomichev if (copy_to_user(&uattr->query.prog_cnt, &total_cnt, sizeof(total_cnt)))
1085468e2f64SAlexei Starovoitov return -EFAULT;
1086b79c9fc9SStanislav Fomichev if (attr->query.prog_cnt == 0 || !prog_ids || !total_cnt)
1087468e2f64SAlexei Starovoitov /* return early if user requested only program count + flags */
1088468e2f64SAlexei Starovoitov return 0;
1089b79c9fc9SStanislav Fomichev
1090b79c9fc9SStanislav Fomichev if (attr->query.prog_cnt < total_cnt) {
1091b79c9fc9SStanislav Fomichev total_cnt = attr->query.prog_cnt;
1092468e2f64SAlexei Starovoitov ret = -ENOSPC;
1093468e2f64SAlexei Starovoitov }
1094468e2f64SAlexei Starovoitov
1095b79c9fc9SStanislav Fomichev for (atype = from_atype; atype <= to_atype && total_cnt; atype++) {
10960e426a3aSPu Lehui if (effective_query) {
1097b79c9fc9SStanislav Fomichev effective = rcu_dereference_protected(cgrp->bpf.effective[atype],
1098b79c9fc9SStanislav Fomichev lockdep_is_held(&cgroup_mutex));
1099b79c9fc9SStanislav Fomichev cnt = min_t(int, bpf_prog_array_length(effective), total_cnt);
1100b79c9fc9SStanislav Fomichev ret = bpf_prog_array_copy_to_user(effective, prog_ids, cnt);
1101468e2f64SAlexei Starovoitov } else {
1102b79c9fc9SStanislav Fomichev struct hlist_head *progs;
1103468e2f64SAlexei Starovoitov struct bpf_prog_list *pl;
1104b79c9fc9SStanislav Fomichev struct bpf_prog *prog;
1105468e2f64SAlexei Starovoitov u32 id;
1106468e2f64SAlexei Starovoitov
1107b79c9fc9SStanislav Fomichev progs = &cgrp->bpf.progs[atype];
1108b79c9fc9SStanislav Fomichev cnt = min_t(int, prog_list_length(progs), total_cnt);
1109468e2f64SAlexei Starovoitov i = 0;
111000442143SStanislav Fomichev hlist_for_each_entry(pl, progs, node) {
1111af6eea57SAndrii Nakryiko prog = prog_list_prog(pl);
1112af6eea57SAndrii Nakryiko id = prog->aux->id;
1113468e2f64SAlexei Starovoitov if (copy_to_user(prog_ids + i, &id, sizeof(id)))
1114468e2f64SAlexei Starovoitov return -EFAULT;
1115468e2f64SAlexei Starovoitov if (++i == cnt)
1116468e2f64SAlexei Starovoitov break;
1117468e2f64SAlexei Starovoitov }
1118b79c9fc9SStanislav Fomichev
1119b79c9fc9SStanislav Fomichev if (prog_attach_flags) {
1120b79c9fc9SStanislav Fomichev flags = cgrp->bpf.flags[atype];
1121b79c9fc9SStanislav Fomichev
1122b79c9fc9SStanislav Fomichev for (i = 0; i < cnt; i++)
11230e426a3aSPu Lehui if (copy_to_user(prog_attach_flags + i,
11240e426a3aSPu Lehui &flags, sizeof(flags)))
1125b79c9fc9SStanislav Fomichev return -EFAULT;
1126b79c9fc9SStanislav Fomichev prog_attach_flags += cnt;
1127b79c9fc9SStanislav Fomichev }
11280e426a3aSPu Lehui }
1129b79c9fc9SStanislav Fomichev
1130b79c9fc9SStanislav Fomichev prog_ids += cnt;
1131b79c9fc9SStanislav Fomichev total_cnt -= cnt;
1132b79c9fc9SStanislav Fomichev }
1133468e2f64SAlexei Starovoitov return ret;
1134468e2f64SAlexei Starovoitov }
1135468e2f64SAlexei Starovoitov
cgroup_bpf_query(struct cgroup * cgrp,const union bpf_attr * attr,union bpf_attr __user * uattr)1136588e5d87SHe Fengqing static int cgroup_bpf_query(struct cgroup *cgrp, const union bpf_attr *attr,
1137588e5d87SHe Fengqing union bpf_attr __user *uattr)
1138588e5d87SHe Fengqing {
1139588e5d87SHe Fengqing int ret;
1140588e5d87SHe Fengqing
11414cdb91b0SKamalesh Babulal cgroup_lock();
1142588e5d87SHe Fengqing ret = __cgroup_bpf_query(cgrp, attr, uattr);
11434cdb91b0SKamalesh Babulal cgroup_unlock();
1144588e5d87SHe Fengqing return ret;
1145588e5d87SHe Fengqing }
1146588e5d87SHe Fengqing
cgroup_bpf_prog_attach(const union bpf_attr * attr,enum bpf_prog_type ptype,struct bpf_prog * prog)1147fdb5c453SSean Young int cgroup_bpf_prog_attach(const union bpf_attr *attr,
1148fdb5c453SSean Young enum bpf_prog_type ptype, struct bpf_prog *prog)
1149fdb5c453SSean Young {
11507dd68b32SAndrey Ignatov struct bpf_prog *replace_prog = NULL;
1151fdb5c453SSean Young struct cgroup *cgrp;
1152fdb5c453SSean Young int ret;
1153fdb5c453SSean Young
1154fdb5c453SSean Young cgrp = cgroup_get_from_fd(attr->target_fd);
1155fdb5c453SSean Young if (IS_ERR(cgrp))
1156fdb5c453SSean Young return PTR_ERR(cgrp);
1157fdb5c453SSean Young
11587dd68b32SAndrey Ignatov if ((attr->attach_flags & BPF_F_ALLOW_MULTI) &&
11597dd68b32SAndrey Ignatov (attr->attach_flags & BPF_F_REPLACE)) {
11607dd68b32SAndrey Ignatov replace_prog = bpf_prog_get_type(attr->replace_bpf_fd, ptype);
11617dd68b32SAndrey Ignatov if (IS_ERR(replace_prog)) {
11627dd68b32SAndrey Ignatov cgroup_put(cgrp);
11637dd68b32SAndrey Ignatov return PTR_ERR(replace_prog);
11647dd68b32SAndrey Ignatov }
11657dd68b32SAndrey Ignatov }
11667dd68b32SAndrey Ignatov
1167af6eea57SAndrii Nakryiko ret = cgroup_bpf_attach(cgrp, prog, replace_prog, NULL,
1168af6eea57SAndrii Nakryiko attr->attach_type, attr->attach_flags);
11697dd68b32SAndrey Ignatov
11707dd68b32SAndrey Ignatov if (replace_prog)
11717dd68b32SAndrey Ignatov bpf_prog_put(replace_prog);
1172fdb5c453SSean Young cgroup_put(cgrp);
1173fdb5c453SSean Young return ret;
1174fdb5c453SSean Young }
1175fdb5c453SSean Young
cgroup_bpf_prog_detach(const union bpf_attr * attr,enum bpf_prog_type ptype)1176fdb5c453SSean Young int cgroup_bpf_prog_detach(const union bpf_attr *attr, enum bpf_prog_type ptype)
1177fdb5c453SSean Young {
1178fdb5c453SSean Young struct bpf_prog *prog;
1179fdb5c453SSean Young struct cgroup *cgrp;
1180fdb5c453SSean Young int ret;
1181fdb5c453SSean Young
1182fdb5c453SSean Young cgrp = cgroup_get_from_fd(attr->target_fd);
1183fdb5c453SSean Young if (IS_ERR(cgrp))
1184fdb5c453SSean Young return PTR_ERR(cgrp);
1185fdb5c453SSean Young
1186fdb5c453SSean Young prog = bpf_prog_get_type(attr->attach_bpf_fd, ptype);
1187fdb5c453SSean Young if (IS_ERR(prog))
1188fdb5c453SSean Young prog = NULL;
1189fdb5c453SSean Young
1190af6eea57SAndrii Nakryiko ret = cgroup_bpf_detach(cgrp, prog, attr->attach_type);
1191fdb5c453SSean Young if (prog)
1192fdb5c453SSean Young bpf_prog_put(prog);
1193fdb5c453SSean Young
1194fdb5c453SSean Young cgroup_put(cgrp);
1195fdb5c453SSean Young return ret;
1196fdb5c453SSean Young }
1197fdb5c453SSean Young
bpf_cgroup_link_release(struct bpf_link * link)1198af6eea57SAndrii Nakryiko static void bpf_cgroup_link_release(struct bpf_link *link)
1199af6eea57SAndrii Nakryiko {
1200af6eea57SAndrii Nakryiko struct bpf_cgroup_link *cg_link =
1201af6eea57SAndrii Nakryiko container_of(link, struct bpf_cgroup_link, link);
120273b11c2aSAndrii Nakryiko struct cgroup *cg;
1203af6eea57SAndrii Nakryiko
1204af6eea57SAndrii Nakryiko /* link might have been auto-detached by dying cgroup already,
1205af6eea57SAndrii Nakryiko * in that case our work is done here
1206af6eea57SAndrii Nakryiko */
1207af6eea57SAndrii Nakryiko if (!cg_link->cgroup)
1208af6eea57SAndrii Nakryiko return;
1209af6eea57SAndrii Nakryiko
12104cdb91b0SKamalesh Babulal cgroup_lock();
1211af6eea57SAndrii Nakryiko
1212af6eea57SAndrii Nakryiko /* re-check cgroup under lock again */
1213af6eea57SAndrii Nakryiko if (!cg_link->cgroup) {
12144cdb91b0SKamalesh Babulal cgroup_unlock();
1215af6eea57SAndrii Nakryiko return;
1216af6eea57SAndrii Nakryiko }
1217af6eea57SAndrii Nakryiko
1218af6eea57SAndrii Nakryiko WARN_ON(__cgroup_bpf_detach(cg_link->cgroup, NULL, cg_link,
1219af6eea57SAndrii Nakryiko cg_link->type));
122069fd337aSStanislav Fomichev if (cg_link->type == BPF_LSM_CGROUP)
122169fd337aSStanislav Fomichev bpf_trampoline_unlink_cgroup_shim(cg_link->link.prog);
1222af6eea57SAndrii Nakryiko
122373b11c2aSAndrii Nakryiko cg = cg_link->cgroup;
122473b11c2aSAndrii Nakryiko cg_link->cgroup = NULL;
122573b11c2aSAndrii Nakryiko
12264cdb91b0SKamalesh Babulal cgroup_unlock();
122773b11c2aSAndrii Nakryiko
122873b11c2aSAndrii Nakryiko cgroup_put(cg);
1229af6eea57SAndrii Nakryiko }
1230af6eea57SAndrii Nakryiko
bpf_cgroup_link_dealloc(struct bpf_link * link)1231af6eea57SAndrii Nakryiko static void bpf_cgroup_link_dealloc(struct bpf_link *link)
1232af6eea57SAndrii Nakryiko {
1233af6eea57SAndrii Nakryiko struct bpf_cgroup_link *cg_link =
1234af6eea57SAndrii Nakryiko container_of(link, struct bpf_cgroup_link, link);
1235af6eea57SAndrii Nakryiko
1236af6eea57SAndrii Nakryiko kfree(cg_link);
1237af6eea57SAndrii Nakryiko }
1238af6eea57SAndrii Nakryiko
bpf_cgroup_link_detach(struct bpf_link * link)123973b11c2aSAndrii Nakryiko static int bpf_cgroup_link_detach(struct bpf_link *link)
124073b11c2aSAndrii Nakryiko {
124173b11c2aSAndrii Nakryiko bpf_cgroup_link_release(link);
124273b11c2aSAndrii Nakryiko
124373b11c2aSAndrii Nakryiko return 0;
124473b11c2aSAndrii Nakryiko }
124573b11c2aSAndrii Nakryiko
bpf_cgroup_link_show_fdinfo(const struct bpf_link * link,struct seq_file * seq)1246f2e10bffSAndrii Nakryiko static void bpf_cgroup_link_show_fdinfo(const struct bpf_link *link,
1247f2e10bffSAndrii Nakryiko struct seq_file *seq)
1248f2e10bffSAndrii Nakryiko {
1249f2e10bffSAndrii Nakryiko struct bpf_cgroup_link *cg_link =
1250f2e10bffSAndrii Nakryiko container_of(link, struct bpf_cgroup_link, link);
1251f2e10bffSAndrii Nakryiko u64 cg_id = 0;
1252f2e10bffSAndrii Nakryiko
12534cdb91b0SKamalesh Babulal cgroup_lock();
1254f2e10bffSAndrii Nakryiko if (cg_link->cgroup)
1255f2e10bffSAndrii Nakryiko cg_id = cgroup_id(cg_link->cgroup);
12564cdb91b0SKamalesh Babulal cgroup_unlock();
1257f2e10bffSAndrii Nakryiko
1258f2e10bffSAndrii Nakryiko seq_printf(seq,
1259f2e10bffSAndrii Nakryiko "cgroup_id:\t%llu\n"
1260f2e10bffSAndrii Nakryiko "attach_type:\t%d\n",
1261f2e10bffSAndrii Nakryiko cg_id,
1262f2e10bffSAndrii Nakryiko cg_link->type);
1263f2e10bffSAndrii Nakryiko }
1264f2e10bffSAndrii Nakryiko
bpf_cgroup_link_fill_link_info(const struct bpf_link * link,struct bpf_link_info * info)1265f2e10bffSAndrii Nakryiko static int bpf_cgroup_link_fill_link_info(const struct bpf_link *link,
1266f2e10bffSAndrii Nakryiko struct bpf_link_info *info)
1267f2e10bffSAndrii Nakryiko {
1268f2e10bffSAndrii Nakryiko struct bpf_cgroup_link *cg_link =
1269f2e10bffSAndrii Nakryiko container_of(link, struct bpf_cgroup_link, link);
1270f2e10bffSAndrii Nakryiko u64 cg_id = 0;
1271f2e10bffSAndrii Nakryiko
12724cdb91b0SKamalesh Babulal cgroup_lock();
1273f2e10bffSAndrii Nakryiko if (cg_link->cgroup)
1274f2e10bffSAndrii Nakryiko cg_id = cgroup_id(cg_link->cgroup);
12754cdb91b0SKamalesh Babulal cgroup_unlock();
1276f2e10bffSAndrii Nakryiko
1277f2e10bffSAndrii Nakryiko info->cgroup.cgroup_id = cg_id;
1278f2e10bffSAndrii Nakryiko info->cgroup.attach_type = cg_link->type;
1279f2e10bffSAndrii Nakryiko return 0;
1280f2e10bffSAndrii Nakryiko }
1281f2e10bffSAndrii Nakryiko
1282f2e10bffSAndrii Nakryiko static const struct bpf_link_ops bpf_cgroup_link_lops = {
1283af6eea57SAndrii Nakryiko .release = bpf_cgroup_link_release,
1284af6eea57SAndrii Nakryiko .dealloc = bpf_cgroup_link_dealloc,
128573b11c2aSAndrii Nakryiko .detach = bpf_cgroup_link_detach,
1286f9d04127SAndrii Nakryiko .update_prog = cgroup_bpf_replace,
1287f2e10bffSAndrii Nakryiko .show_fdinfo = bpf_cgroup_link_show_fdinfo,
1288f2e10bffSAndrii Nakryiko .fill_link_info = bpf_cgroup_link_fill_link_info,
1289af6eea57SAndrii Nakryiko };
1290af6eea57SAndrii Nakryiko
cgroup_bpf_link_attach(const union bpf_attr * attr,struct bpf_prog * prog)1291af6eea57SAndrii Nakryiko int cgroup_bpf_link_attach(const union bpf_attr *attr, struct bpf_prog *prog)
1292af6eea57SAndrii Nakryiko {
1293a3b80e10SAndrii Nakryiko struct bpf_link_primer link_primer;
1294af6eea57SAndrii Nakryiko struct bpf_cgroup_link *link;
1295af6eea57SAndrii Nakryiko struct cgroup *cgrp;
1296a3b80e10SAndrii Nakryiko int err;
1297af6eea57SAndrii Nakryiko
1298af6eea57SAndrii Nakryiko if (attr->link_create.flags)
1299af6eea57SAndrii Nakryiko return -EINVAL;
1300af6eea57SAndrii Nakryiko
1301af6eea57SAndrii Nakryiko cgrp = cgroup_get_from_fd(attr->link_create.target_fd);
1302af6eea57SAndrii Nakryiko if (IS_ERR(cgrp))
1303af6eea57SAndrii Nakryiko return PTR_ERR(cgrp);
1304af6eea57SAndrii Nakryiko
1305af6eea57SAndrii Nakryiko link = kzalloc(sizeof(*link), GFP_USER);
1306af6eea57SAndrii Nakryiko if (!link) {
1307af6eea57SAndrii Nakryiko err = -ENOMEM;
1308af6eea57SAndrii Nakryiko goto out_put_cgroup;
1309af6eea57SAndrii Nakryiko }
1310f2e10bffSAndrii Nakryiko bpf_link_init(&link->link, BPF_LINK_TYPE_CGROUP, &bpf_cgroup_link_lops,
1311f2e10bffSAndrii Nakryiko prog);
1312af6eea57SAndrii Nakryiko link->cgroup = cgrp;
1313af6eea57SAndrii Nakryiko link->type = attr->link_create.attach_type;
1314af6eea57SAndrii Nakryiko
1315a3b80e10SAndrii Nakryiko err = bpf_link_prime(&link->link, &link_primer);
1316a3b80e10SAndrii Nakryiko if (err) {
1317af6eea57SAndrii Nakryiko kfree(link);
1318af6eea57SAndrii Nakryiko goto out_put_cgroup;
1319af6eea57SAndrii Nakryiko }
1320af6eea57SAndrii Nakryiko
13216fc88c35SDave Marchevsky err = cgroup_bpf_attach(cgrp, NULL, NULL, link,
13226fc88c35SDave Marchevsky link->type, BPF_F_ALLOW_MULTI);
1323af6eea57SAndrii Nakryiko if (err) {
1324a3b80e10SAndrii Nakryiko bpf_link_cleanup(&link_primer);
1325af6eea57SAndrii Nakryiko goto out_put_cgroup;
1326af6eea57SAndrii Nakryiko }
1327af6eea57SAndrii Nakryiko
1328a3b80e10SAndrii Nakryiko return bpf_link_settle(&link_primer);
1329af6eea57SAndrii Nakryiko
1330af6eea57SAndrii Nakryiko out_put_cgroup:
1331af6eea57SAndrii Nakryiko cgroup_put(cgrp);
1332af6eea57SAndrii Nakryiko return err;
1333af6eea57SAndrii Nakryiko }
1334af6eea57SAndrii Nakryiko
cgroup_bpf_prog_query(const union bpf_attr * attr,union bpf_attr __user * uattr)1335fdb5c453SSean Young int cgroup_bpf_prog_query(const union bpf_attr *attr,
1336fdb5c453SSean Young union bpf_attr __user *uattr)
1337fdb5c453SSean Young {
1338fdb5c453SSean Young struct cgroup *cgrp;
1339fdb5c453SSean Young int ret;
1340fdb5c453SSean Young
1341fdb5c453SSean Young cgrp = cgroup_get_from_fd(attr->query.target_fd);
1342fdb5c453SSean Young if (IS_ERR(cgrp))
1343fdb5c453SSean Young return PTR_ERR(cgrp);
1344fdb5c453SSean Young
1345fdb5c453SSean Young ret = cgroup_bpf_query(cgrp, attr, uattr);
1346fdb5c453SSean Young
1347fdb5c453SSean Young cgroup_put(cgrp);
1348fdb5c453SSean Young return ret;
1349fdb5c453SSean Young }
1350fdb5c453SSean Young
135130070984SDaniel Mack /**
1352b2cd1257SDavid Ahern * __cgroup_bpf_run_filter_skb() - Run a program for packet filtering
13538f917bbaSWillem de Bruijn * @sk: The socket sending or receiving traffic
135430070984SDaniel Mack * @skb: The skb that is being sent or received
1355214bfd26SRandy Dunlap * @atype: The type of program to be executed
135630070984SDaniel Mack *
135730070984SDaniel Mack * If no socket is passed, or the socket is not of type INET or INET6,
135830070984SDaniel Mack * this function does nothing and returns 0.
135930070984SDaniel Mack *
136030070984SDaniel Mack * The program type passed in via @type must be suitable for network
136130070984SDaniel Mack * filtering. No further check is performed to assert that.
136230070984SDaniel Mack *
1363e7a3160dSbrakmo * For egress packets, this function can return:
1364e7a3160dSbrakmo * NET_XMIT_SUCCESS (0) - continue with packet output
1365e7a3160dSbrakmo * NET_XMIT_DROP (1) - drop packet and notify TCP to call cwr
1366e7a3160dSbrakmo * NET_XMIT_CN (2) - continue with packet output and notify TCP
1367e7a3160dSbrakmo * to call cwr
1368b44123b4SYiFei Zhu * -err - drop packet
1369e7a3160dSbrakmo *
1370e7a3160dSbrakmo * For ingress packets, this function will return -EPERM if any
1371e7a3160dSbrakmo * attached program was found and if it returned != 1 during execution.
1372e7a3160dSbrakmo * Otherwise 0 is returned.
137330070984SDaniel Mack */
__cgroup_bpf_run_filter_skb(struct sock * sk,struct sk_buff * skb,enum cgroup_bpf_attach_type atype)1374b2cd1257SDavid Ahern int __cgroup_bpf_run_filter_skb(struct sock *sk,
137530070984SDaniel Mack struct sk_buff *skb,
13766fc88c35SDave Marchevsky enum cgroup_bpf_attach_type atype)
137730070984SDaniel Mack {
1378324bda9eSAlexei Starovoitov unsigned int offset = skb->data - skb_network_header(skb);
1379324bda9eSAlexei Starovoitov struct sock *save_sk;
1380b39b5f41SSong Liu void *saved_data_end;
138130070984SDaniel Mack struct cgroup *cgrp;
1382324bda9eSAlexei Starovoitov int ret;
138330070984SDaniel Mack
138430070984SDaniel Mack if (!sk || !sk_fullsock(sk))
138530070984SDaniel Mack return 0;
138630070984SDaniel Mack
1387324bda9eSAlexei Starovoitov if (sk->sk_family != AF_INET && sk->sk_family != AF_INET6)
138830070984SDaniel Mack return 0;
138930070984SDaniel Mack
139030070984SDaniel Mack cgrp = sock_cgroup_ptr(&sk->sk_cgrp_data);
1391324bda9eSAlexei Starovoitov save_sk = skb->sk;
13928f917bbaSWillem de Bruijn skb->sk = sk;
139330070984SDaniel Mack __skb_push(skb, offset);
1394b39b5f41SSong Liu
1395b39b5f41SSong Liu /* compute pointers for the bpf prog */
1396b39b5f41SSong Liu bpf_compute_and_save_data_end(skb, &saved_data_end);
1397b39b5f41SSong Liu
13986fc88c35SDave Marchevsky if (atype == CGROUP_INET_EGRESS) {
1399055eb955SStanislav Fomichev u32 flags = 0;
1400055eb955SStanislav Fomichev bool cn;
1401055eb955SStanislav Fomichev
1402d9d31cf8SStanislav Fomichev ret = bpf_prog_run_array_cg(&cgrp->bpf, atype, skb,
1403d9d31cf8SStanislav Fomichev __bpf_prog_run_save_cb, 0, &flags);
1404055eb955SStanislav Fomichev
1405055eb955SStanislav Fomichev /* Return values of CGROUP EGRESS BPF programs are:
1406055eb955SStanislav Fomichev * 0: drop packet
1407055eb955SStanislav Fomichev * 1: keep packet
1408055eb955SStanislav Fomichev * 2: drop packet and cn
1409055eb955SStanislav Fomichev * 3: keep packet and cn
1410055eb955SStanislav Fomichev *
1411055eb955SStanislav Fomichev * The returned value is then converted to one of the NET_XMIT
1412055eb955SStanislav Fomichev * or an error code that is then interpreted as drop packet
1413055eb955SStanislav Fomichev * (and no cn):
1414055eb955SStanislav Fomichev * 0: NET_XMIT_SUCCESS skb should be transmitted
1415055eb955SStanislav Fomichev * 1: NET_XMIT_DROP skb should be dropped and cn
1416055eb955SStanislav Fomichev * 2: NET_XMIT_CN skb should be transmitted and cn
1417055eb955SStanislav Fomichev * 3: -err skb should be dropped
1418055eb955SStanislav Fomichev */
1419055eb955SStanislav Fomichev
1420055eb955SStanislav Fomichev cn = flags & BPF_RET_SET_CN;
1421055eb955SStanislav Fomichev if (ret && !IS_ERR_VALUE((long)ret))
1422055eb955SStanislav Fomichev ret = -EFAULT;
1423055eb955SStanislav Fomichev if (!ret)
1424055eb955SStanislav Fomichev ret = (cn ? NET_XMIT_CN : NET_XMIT_SUCCESS);
1425055eb955SStanislav Fomichev else
1426055eb955SStanislav Fomichev ret = (cn ? NET_XMIT_DROP : ret);
1427e7a3160dSbrakmo } else {
1428055eb955SStanislav Fomichev ret = bpf_prog_run_array_cg(&cgrp->bpf, atype,
1429d9d31cf8SStanislav Fomichev skb, __bpf_prog_run_save_cb, 0,
1430d9d31cf8SStanislav Fomichev NULL);
1431b44123b4SYiFei Zhu if (ret && !IS_ERR_VALUE((long)ret))
1432b44123b4SYiFei Zhu ret = -EFAULT;
1433e7a3160dSbrakmo }
1434b39b5f41SSong Liu bpf_restore_data_end(skb, saved_data_end);
143530070984SDaniel Mack __skb_pull(skb, offset);
14368f917bbaSWillem de Bruijn skb->sk = save_sk;
1437e7a3160dSbrakmo
1438e7a3160dSbrakmo return ret;
143930070984SDaniel Mack }
1440b2cd1257SDavid Ahern EXPORT_SYMBOL(__cgroup_bpf_run_filter_skb);
144161023658SDavid Ahern
144261023658SDavid Ahern /**
144361023658SDavid Ahern * __cgroup_bpf_run_filter_sk() - Run a program on a sock
144461023658SDavid Ahern * @sk: sock structure to manipulate
1445214bfd26SRandy Dunlap * @atype: The type of program to be executed
144661023658SDavid Ahern *
144761023658SDavid Ahern * socket is passed is expected to be of type INET or INET6.
144861023658SDavid Ahern *
144961023658SDavid Ahern * The program type passed in via @type must be suitable for sock
145061023658SDavid Ahern * filtering. No further check is performed to assert that.
145161023658SDavid Ahern *
145261023658SDavid Ahern * This function will return %-EPERM if any if an attached program was found
145361023658SDavid Ahern * and if it returned != 1 during execution. In all other cases, 0 is returned.
145461023658SDavid Ahern */
__cgroup_bpf_run_filter_sk(struct sock * sk,enum cgroup_bpf_attach_type atype)145561023658SDavid Ahern int __cgroup_bpf_run_filter_sk(struct sock *sk,
14566fc88c35SDave Marchevsky enum cgroup_bpf_attach_type atype)
145761023658SDavid Ahern {
145861023658SDavid Ahern struct cgroup *cgrp = sock_cgroup_ptr(&sk->sk_cgrp_data);
145961023658SDavid Ahern
1460d9d31cf8SStanislav Fomichev return bpf_prog_run_array_cg(&cgrp->bpf, atype, sk, bpf_prog_run, 0,
1461d9d31cf8SStanislav Fomichev NULL);
146261023658SDavid Ahern }
146361023658SDavid Ahern EXPORT_SYMBOL(__cgroup_bpf_run_filter_sk);
146440304b2aSLawrence Brakmo
146540304b2aSLawrence Brakmo /**
14664fbac77dSAndrey Ignatov * __cgroup_bpf_run_filter_sock_addr() - Run a program on a sock and
14674fbac77dSAndrey Ignatov * provided by user sockaddr
14684fbac77dSAndrey Ignatov * @sk: sock struct that will use sockaddr
14694fbac77dSAndrey Ignatov * @uaddr: sockaddr struct provided by user
14706d71331eSDaan De Meyer * @uaddrlen: Pointer to the size of the sockaddr struct provided by user. It is
14716d71331eSDaan De Meyer * read-only for AF_INET[6] uaddr but can be modified for AF_UNIX
14726d71331eSDaan De Meyer * uaddr.
1473214bfd26SRandy Dunlap * @atype: The type of program to be executed
14741cedee13SAndrey Ignatov * @t_ctx: Pointer to attach type specific context
147577241217SStanislav Fomichev * @flags: Pointer to u32 which contains higher bits of BPF program
147677241217SStanislav Fomichev * return value (OR'ed together).
14774fbac77dSAndrey Ignatov *
14784fbac77dSAndrey Ignatov * socket is expected to be of type INET or INET6.
14794fbac77dSAndrey Ignatov *
14804fbac77dSAndrey Ignatov * This function will return %-EPERM if an attached program is found and
14814fbac77dSAndrey Ignatov * returned value != 1 during execution. In all other cases, 0 is returned.
14824fbac77dSAndrey Ignatov */
__cgroup_bpf_run_filter_sock_addr(struct sock * sk,struct sockaddr * uaddr,int * uaddrlen,enum cgroup_bpf_attach_type atype,void * t_ctx,u32 * flags)14834fbac77dSAndrey Ignatov int __cgroup_bpf_run_filter_sock_addr(struct sock *sk,
14844fbac77dSAndrey Ignatov struct sockaddr *uaddr,
14856d71331eSDaan De Meyer int *uaddrlen,
14866fc88c35SDave Marchevsky enum cgroup_bpf_attach_type atype,
148777241217SStanislav Fomichev void *t_ctx,
148877241217SStanislav Fomichev u32 *flags)
14894fbac77dSAndrey Ignatov {
14904fbac77dSAndrey Ignatov struct bpf_sock_addr_kern ctx = {
14914fbac77dSAndrey Ignatov .sk = sk,
14924fbac77dSAndrey Ignatov .uaddr = uaddr,
14931cedee13SAndrey Ignatov .t_ctx = t_ctx,
14944fbac77dSAndrey Ignatov };
14951cedee13SAndrey Ignatov struct sockaddr_storage unspec;
14964fbac77dSAndrey Ignatov struct cgroup *cgrp;
14976d71331eSDaan De Meyer int ret;
14984fbac77dSAndrey Ignatov
14994fbac77dSAndrey Ignatov /* Check socket family since not all sockets represent network
15004fbac77dSAndrey Ignatov * endpoint (e.g. AF_UNIX).
15014fbac77dSAndrey Ignatov */
15024fbac77dSAndrey Ignatov if (sk->sk_family != AF_INET && sk->sk_family != AF_INET6)
15034fbac77dSAndrey Ignatov return 0;
15044fbac77dSAndrey Ignatov
15051cedee13SAndrey Ignatov if (!ctx.uaddr) {
15061cedee13SAndrey Ignatov memset(&unspec, 0, sizeof(unspec));
15071cedee13SAndrey Ignatov ctx.uaddr = (struct sockaddr *)&unspec;
15086d71331eSDaan De Meyer ctx.uaddrlen = 0;
15096d71331eSDaan De Meyer } else {
15106d71331eSDaan De Meyer ctx.uaddrlen = *uaddrlen;
15111cedee13SAndrey Ignatov }
15121cedee13SAndrey Ignatov
15134fbac77dSAndrey Ignatov cgrp = sock_cgroup_ptr(&sk->sk_cgrp_data);
15146d71331eSDaan De Meyer ret = bpf_prog_run_array_cg(&cgrp->bpf, atype, &ctx, bpf_prog_run,
1515d9d31cf8SStanislav Fomichev 0, flags);
15166d71331eSDaan De Meyer
15176d71331eSDaan De Meyer if (!ret && uaddr)
15186d71331eSDaan De Meyer *uaddrlen = ctx.uaddrlen;
15196d71331eSDaan De Meyer
15206d71331eSDaan De Meyer return ret;
15214fbac77dSAndrey Ignatov }
15224fbac77dSAndrey Ignatov EXPORT_SYMBOL(__cgroup_bpf_run_filter_sock_addr);
15234fbac77dSAndrey Ignatov
15244fbac77dSAndrey Ignatov /**
152540304b2aSLawrence Brakmo * __cgroup_bpf_run_filter_sock_ops() - Run a program on a sock
152640304b2aSLawrence Brakmo * @sk: socket to get cgroup from
152740304b2aSLawrence Brakmo * @sock_ops: bpf_sock_ops_kern struct to pass to program. Contains
152840304b2aSLawrence Brakmo * sk with connection information (IP addresses, etc.) May not contain
152940304b2aSLawrence Brakmo * cgroup info if it is a req sock.
1530214bfd26SRandy Dunlap * @atype: The type of program to be executed
153140304b2aSLawrence Brakmo *
153240304b2aSLawrence Brakmo * socket passed is expected to be of type INET or INET6.
153340304b2aSLawrence Brakmo *
153440304b2aSLawrence Brakmo * The program type passed in via @type must be suitable for sock_ops
153540304b2aSLawrence Brakmo * filtering. No further check is performed to assert that.
153640304b2aSLawrence Brakmo *
153740304b2aSLawrence Brakmo * This function will return %-EPERM if any if an attached program was found
153840304b2aSLawrence Brakmo * and if it returned != 1 during execution. In all other cases, 0 is returned.
153940304b2aSLawrence Brakmo */
__cgroup_bpf_run_filter_sock_ops(struct sock * sk,struct bpf_sock_ops_kern * sock_ops,enum cgroup_bpf_attach_type atype)154040304b2aSLawrence Brakmo int __cgroup_bpf_run_filter_sock_ops(struct sock *sk,
154140304b2aSLawrence Brakmo struct bpf_sock_ops_kern *sock_ops,
15426fc88c35SDave Marchevsky enum cgroup_bpf_attach_type atype)
154340304b2aSLawrence Brakmo {
154440304b2aSLawrence Brakmo struct cgroup *cgrp = sock_cgroup_ptr(&sk->sk_cgrp_data);
154540304b2aSLawrence Brakmo
1546055eb955SStanislav Fomichev return bpf_prog_run_array_cg(&cgrp->bpf, atype, sock_ops, bpf_prog_run,
1547d9d31cf8SStanislav Fomichev 0, NULL);
154840304b2aSLawrence Brakmo }
154940304b2aSLawrence Brakmo EXPORT_SYMBOL(__cgroup_bpf_run_filter_sock_ops);
1550ebc614f6SRoman Gushchin
__cgroup_bpf_check_dev_permission(short dev_type,u32 major,u32 minor,short access,enum cgroup_bpf_attach_type atype)1551ebc614f6SRoman Gushchin int __cgroup_bpf_check_dev_permission(short dev_type, u32 major, u32 minor,
15526fc88c35SDave Marchevsky short access, enum cgroup_bpf_attach_type atype)
1553ebc614f6SRoman Gushchin {
1554ebc614f6SRoman Gushchin struct cgroup *cgrp;
1555ebc614f6SRoman Gushchin struct bpf_cgroup_dev_ctx ctx = {
1556ebc614f6SRoman Gushchin .access_type = (access << 16) | dev_type,
1557ebc614f6SRoman Gushchin .major = major,
1558ebc614f6SRoman Gushchin .minor = minor,
1559ebc614f6SRoman Gushchin };
1560f10d0596SYiFei Zhu int ret;
1561ebc614f6SRoman Gushchin
1562ebc614f6SRoman Gushchin rcu_read_lock();
1563ebc614f6SRoman Gushchin cgrp = task_dfl_cgroup(current);
1564d9d31cf8SStanislav Fomichev ret = bpf_prog_run_array_cg(&cgrp->bpf, atype, &ctx, bpf_prog_run, 0,
1565d9d31cf8SStanislav Fomichev NULL);
1566ebc614f6SRoman Gushchin rcu_read_unlock();
1567ebc614f6SRoman Gushchin
1568f10d0596SYiFei Zhu return ret;
1569ebc614f6SRoman Gushchin }
1570ebc614f6SRoman Gushchin
BPF_CALL_2(bpf_get_local_storage,struct bpf_map *,map,u64,flags)1571dea6a4e1SStanislav Fomichev BPF_CALL_2(bpf_get_local_storage, struct bpf_map *, map, u64, flags)
1572dea6a4e1SStanislav Fomichev {
1573dea6a4e1SStanislav Fomichev /* flags argument is not used now,
1574dea6a4e1SStanislav Fomichev * but provides an ability to extend the API.
1575dea6a4e1SStanislav Fomichev * verifier checks that its value is correct.
1576dea6a4e1SStanislav Fomichev */
1577dea6a4e1SStanislav Fomichev enum bpf_cgroup_storage_type stype = cgroup_storage_type(map);
1578dea6a4e1SStanislav Fomichev struct bpf_cgroup_storage *storage;
1579dea6a4e1SStanislav Fomichev struct bpf_cg_run_ctx *ctx;
1580dea6a4e1SStanislav Fomichev void *ptr;
1581dea6a4e1SStanislav Fomichev
1582dea6a4e1SStanislav Fomichev /* get current cgroup storage from BPF run context */
1583dea6a4e1SStanislav Fomichev ctx = container_of(current->bpf_ctx, struct bpf_cg_run_ctx, run_ctx);
1584dea6a4e1SStanislav Fomichev storage = ctx->prog_item->cgroup_storage[stype];
1585dea6a4e1SStanislav Fomichev
1586dea6a4e1SStanislav Fomichev if (stype == BPF_CGROUP_STORAGE_SHARED)
1587dea6a4e1SStanislav Fomichev ptr = &READ_ONCE(storage->buf)->data[0];
1588dea6a4e1SStanislav Fomichev else
1589dea6a4e1SStanislav Fomichev ptr = this_cpu_ptr(storage->percpu_buf);
1590dea6a4e1SStanislav Fomichev
1591dea6a4e1SStanislav Fomichev return (unsigned long)ptr;
1592dea6a4e1SStanislav Fomichev }
1593dea6a4e1SStanislav Fomichev
1594dea6a4e1SStanislav Fomichev const struct bpf_func_proto bpf_get_local_storage_proto = {
1595dea6a4e1SStanislav Fomichev .func = bpf_get_local_storage,
1596dea6a4e1SStanislav Fomichev .gpl_only = false,
1597dea6a4e1SStanislav Fomichev .ret_type = RET_PTR_TO_MAP_VALUE,
1598dea6a4e1SStanislav Fomichev .arg1_type = ARG_CONST_MAP_PTR,
1599dea6a4e1SStanislav Fomichev .arg2_type = ARG_ANYTHING,
1600dea6a4e1SStanislav Fomichev };
1601dea6a4e1SStanislav Fomichev
BPF_CALL_0(bpf_get_retval)1602b44123b4SYiFei Zhu BPF_CALL_0(bpf_get_retval)
1603b44123b4SYiFei Zhu {
1604b44123b4SYiFei Zhu struct bpf_cg_run_ctx *ctx =
1605b44123b4SYiFei Zhu container_of(current->bpf_ctx, struct bpf_cg_run_ctx, run_ctx);
1606b44123b4SYiFei Zhu
1607b44123b4SYiFei Zhu return ctx->retval;
1608b44123b4SYiFei Zhu }
1609b44123b4SYiFei Zhu
161069fd337aSStanislav Fomichev const struct bpf_func_proto bpf_get_retval_proto = {
1611b44123b4SYiFei Zhu .func = bpf_get_retval,
1612b44123b4SYiFei Zhu .gpl_only = false,
1613b44123b4SYiFei Zhu .ret_type = RET_INTEGER,
1614b44123b4SYiFei Zhu };
1615b44123b4SYiFei Zhu
BPF_CALL_1(bpf_set_retval,int,retval)1616b44123b4SYiFei Zhu BPF_CALL_1(bpf_set_retval, int, retval)
1617b44123b4SYiFei Zhu {
1618b44123b4SYiFei Zhu struct bpf_cg_run_ctx *ctx =
1619b44123b4SYiFei Zhu container_of(current->bpf_ctx, struct bpf_cg_run_ctx, run_ctx);
1620b44123b4SYiFei Zhu
1621b44123b4SYiFei Zhu ctx->retval = retval;
1622b44123b4SYiFei Zhu return 0;
1623b44123b4SYiFei Zhu }
1624b44123b4SYiFei Zhu
162569fd337aSStanislav Fomichev const struct bpf_func_proto bpf_set_retval_proto = {
1626b44123b4SYiFei Zhu .func = bpf_set_retval,
1627b44123b4SYiFei Zhu .gpl_only = false,
1628b44123b4SYiFei Zhu .ret_type = RET_INTEGER,
1629b44123b4SYiFei Zhu .arg1_type = ARG_ANYTHING,
1630b44123b4SYiFei Zhu };
1631b44123b4SYiFei Zhu
1632ebc614f6SRoman Gushchin static const struct bpf_func_proto *
cgroup_dev_func_proto(enum bpf_func_id func_id,const struct bpf_prog * prog)1633dea6a4e1SStanislav Fomichev cgroup_dev_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
1634ebc614f6SRoman Gushchin {
1635dea6a4e1SStanislav Fomichev const struct bpf_func_proto *func_proto;
1636dea6a4e1SStanislav Fomichev
1637dea6a4e1SStanislav Fomichev func_proto = cgroup_common_func_proto(func_id, prog);
1638dea6a4e1SStanislav Fomichev if (func_proto)
1639dea6a4e1SStanislav Fomichev return func_proto;
1640dea6a4e1SStanislav Fomichev
1641dea6a4e1SStanislav Fomichev func_proto = cgroup_current_func_proto(func_id, prog);
1642dea6a4e1SStanislav Fomichev if (func_proto)
1643dea6a4e1SStanislav Fomichev return func_proto;
1644dea6a4e1SStanislav Fomichev
1645ebc614f6SRoman Gushchin switch (func_id) {
16460456ea17SStanislav Fomichev case BPF_FUNC_perf_event_output:
16470456ea17SStanislav Fomichev return &bpf_event_output_data_proto;
1648ebc614f6SRoman Gushchin default:
16490456ea17SStanislav Fomichev return bpf_base_func_proto(func_id);
1650ebc614f6SRoman Gushchin }
1651ebc614f6SRoman Gushchin }
1652ebc614f6SRoman Gushchin
cgroup_dev_is_valid_access(int off,int size,enum bpf_access_type type,const struct bpf_prog * prog,struct bpf_insn_access_aux * info)1653ebc614f6SRoman Gushchin static bool cgroup_dev_is_valid_access(int off, int size,
1654ebc614f6SRoman Gushchin enum bpf_access_type type,
16555e43f899SAndrey Ignatov const struct bpf_prog *prog,
1656ebc614f6SRoman Gushchin struct bpf_insn_access_aux *info)
1657ebc614f6SRoman Gushchin {
165806ef0ccbSYonghong Song const int size_default = sizeof(__u32);
165906ef0ccbSYonghong Song
1660ebc614f6SRoman Gushchin if (type == BPF_WRITE)
1661ebc614f6SRoman Gushchin return false;
1662ebc614f6SRoman Gushchin
1663ebc614f6SRoman Gushchin if (off < 0 || off + size > sizeof(struct bpf_cgroup_dev_ctx))
1664ebc614f6SRoman Gushchin return false;
1665ebc614f6SRoman Gushchin /* The verifier guarantees that size > 0. */
1666ebc614f6SRoman Gushchin if (off % size != 0)
1667ebc614f6SRoman Gushchin return false;
166806ef0ccbSYonghong Song
166906ef0ccbSYonghong Song switch (off) {
167006ef0ccbSYonghong Song case bpf_ctx_range(struct bpf_cgroup_dev_ctx, access_type):
167106ef0ccbSYonghong Song bpf_ctx_record_field_size(info, size_default);
167206ef0ccbSYonghong Song if (!bpf_ctx_narrow_access_ok(off, size, size_default))
1673ebc614f6SRoman Gushchin return false;
167406ef0ccbSYonghong Song break;
167506ef0ccbSYonghong Song default:
167606ef0ccbSYonghong Song if (size != size_default)
167706ef0ccbSYonghong Song return false;
167806ef0ccbSYonghong Song }
1679ebc614f6SRoman Gushchin
1680ebc614f6SRoman Gushchin return true;
1681ebc614f6SRoman Gushchin }
1682ebc614f6SRoman Gushchin
1683ebc614f6SRoman Gushchin const struct bpf_prog_ops cg_dev_prog_ops = {
1684ebc614f6SRoman Gushchin };
1685ebc614f6SRoman Gushchin
1686ebc614f6SRoman Gushchin const struct bpf_verifier_ops cg_dev_verifier_ops = {
1687ebc614f6SRoman Gushchin .get_func_proto = cgroup_dev_func_proto,
1688ebc614f6SRoman Gushchin .is_valid_access = cgroup_dev_is_valid_access,
1689ebc614f6SRoman Gushchin };
16907b146cebSAndrey Ignatov
16917b146cebSAndrey Ignatov /**
16927b146cebSAndrey Ignatov * __cgroup_bpf_run_filter_sysctl - Run a program on sysctl
16937b146cebSAndrey Ignatov *
16947b146cebSAndrey Ignatov * @head: sysctl table header
16957b146cebSAndrey Ignatov * @table: sysctl table
16967b146cebSAndrey Ignatov * @write: sysctl is being read (= 0) or written (= 1)
169732927393SChristoph Hellwig * @buf: pointer to buffer (in and out)
16984e63acdfSAndrey Ignatov * @pcount: value-result argument: value is size of buffer pointed to by @buf,
16994e63acdfSAndrey Ignatov * result is size of @new_buf if program set new value, initial value
17004e63acdfSAndrey Ignatov * otherwise
1701e1550bfeSAndrey Ignatov * @ppos: value-result argument: value is position at which read from or write
1702e1550bfeSAndrey Ignatov * to sysctl is happening, result is new position if program overrode it,
1703e1550bfeSAndrey Ignatov * initial value otherwise
1704214bfd26SRandy Dunlap * @atype: type of program to be executed
17057b146cebSAndrey Ignatov *
17067b146cebSAndrey Ignatov * Program is run when sysctl is being accessed, either read or written, and
17077b146cebSAndrey Ignatov * can allow or deny such access.
17087b146cebSAndrey Ignatov *
17097b146cebSAndrey Ignatov * This function will return %-EPERM if an attached program is found and
17107b146cebSAndrey Ignatov * returned value != 1 during execution. In all other cases 0 is returned.
17117b146cebSAndrey Ignatov */
__cgroup_bpf_run_filter_sysctl(struct ctl_table_header * head,struct ctl_table * table,int write,char ** buf,size_t * pcount,loff_t * ppos,enum cgroup_bpf_attach_type atype)17127b146cebSAndrey Ignatov int __cgroup_bpf_run_filter_sysctl(struct ctl_table_header *head,
17137b146cebSAndrey Ignatov struct ctl_table *table, int write,
17144bd6a735SMatthew Wilcox (Oracle) char **buf, size_t *pcount, loff_t *ppos,
17156fc88c35SDave Marchevsky enum cgroup_bpf_attach_type atype)
17167b146cebSAndrey Ignatov {
17177b146cebSAndrey Ignatov struct bpf_sysctl_kern ctx = {
17187b146cebSAndrey Ignatov .head = head,
17197b146cebSAndrey Ignatov .table = table,
17207b146cebSAndrey Ignatov .write = write,
1721e1550bfeSAndrey Ignatov .ppos = ppos,
17221d11b301SAndrey Ignatov .cur_val = NULL,
17231d11b301SAndrey Ignatov .cur_len = PAGE_SIZE,
17244e63acdfSAndrey Ignatov .new_val = NULL,
17254e63acdfSAndrey Ignatov .new_len = 0,
17264e63acdfSAndrey Ignatov .new_updated = 0,
17277b146cebSAndrey Ignatov };
17287b146cebSAndrey Ignatov struct cgroup *cgrp;
172932927393SChristoph Hellwig loff_t pos = 0;
17307b146cebSAndrey Ignatov int ret;
17317b146cebSAndrey Ignatov
17321d11b301SAndrey Ignatov ctx.cur_val = kmalloc_track_caller(ctx.cur_len, GFP_KERNEL);
173332927393SChristoph Hellwig if (!ctx.cur_val ||
173432927393SChristoph Hellwig table->proc_handler(table, 0, ctx.cur_val, &ctx.cur_len, &pos)) {
17351d11b301SAndrey Ignatov /* Let BPF program decide how to proceed. */
17361d11b301SAndrey Ignatov ctx.cur_len = 0;
17371d11b301SAndrey Ignatov }
17381d11b301SAndrey Ignatov
173932927393SChristoph Hellwig if (write && *buf && *pcount) {
17404e63acdfSAndrey Ignatov /* BPF program should be able to override new value with a
17414e63acdfSAndrey Ignatov * buffer bigger than provided by user.
17424e63acdfSAndrey Ignatov */
17434e63acdfSAndrey Ignatov ctx.new_val = kmalloc_track_caller(PAGE_SIZE, GFP_KERNEL);
174451356ac8SAndrey Ignatov ctx.new_len = min_t(size_t, PAGE_SIZE, *pcount);
174532927393SChristoph Hellwig if (ctx.new_val) {
174632927393SChristoph Hellwig memcpy(ctx.new_val, *buf, ctx.new_len);
174732927393SChristoph Hellwig } else {
17484e63acdfSAndrey Ignatov /* Let BPF program decide how to proceed. */
17494e63acdfSAndrey Ignatov ctx.new_len = 0;
17504e63acdfSAndrey Ignatov }
175132927393SChristoph Hellwig }
17524e63acdfSAndrey Ignatov
17537b146cebSAndrey Ignatov rcu_read_lock();
17547b146cebSAndrey Ignatov cgrp = task_dfl_cgroup(current);
1755d9d31cf8SStanislav Fomichev ret = bpf_prog_run_array_cg(&cgrp->bpf, atype, &ctx, bpf_prog_run, 0,
1756d9d31cf8SStanislav Fomichev NULL);
17577b146cebSAndrey Ignatov rcu_read_unlock();
17587b146cebSAndrey Ignatov
17591d11b301SAndrey Ignatov kfree(ctx.cur_val);
17601d11b301SAndrey Ignatov
17614e63acdfSAndrey Ignatov if (ret == 1 && ctx.new_updated) {
176232927393SChristoph Hellwig kfree(*buf);
176332927393SChristoph Hellwig *buf = ctx.new_val;
17644e63acdfSAndrey Ignatov *pcount = ctx.new_len;
17654e63acdfSAndrey Ignatov } else {
17664e63acdfSAndrey Ignatov kfree(ctx.new_val);
17674e63acdfSAndrey Ignatov }
17684e63acdfSAndrey Ignatov
1769f10d0596SYiFei Zhu return ret;
17707b146cebSAndrey Ignatov }
17717b146cebSAndrey Ignatov
17726705fea0SYueHaibing #ifdef CONFIG_NET
sockopt_alloc_buf(struct bpf_sockopt_kern * ctx,int max_optlen,struct bpf_sockopt_buf * buf)177320f2505fSStanislav Fomichev static int sockopt_alloc_buf(struct bpf_sockopt_kern *ctx, int max_optlen,
177420f2505fSStanislav Fomichev struct bpf_sockopt_buf *buf)
17750d01da6aSStanislav Fomichev {
1776d8fe449aSStanislav Fomichev if (unlikely(max_optlen < 0))
17770d01da6aSStanislav Fomichev return -EINVAL;
17780d01da6aSStanislav Fomichev
1779d8fe449aSStanislav Fomichev if (unlikely(max_optlen > PAGE_SIZE)) {
1780d8fe449aSStanislav Fomichev /* We don't expose optvals that are greater than PAGE_SIZE
1781d8fe449aSStanislav Fomichev * to the BPF program.
1782d8fe449aSStanislav Fomichev */
1783d8fe449aSStanislav Fomichev max_optlen = PAGE_SIZE;
1784d8fe449aSStanislav Fomichev }
1785d8fe449aSStanislav Fomichev
178620f2505fSStanislav Fomichev if (max_optlen <= sizeof(buf->data)) {
178720f2505fSStanislav Fomichev /* When the optval fits into BPF_SOCKOPT_KERN_BUF_SIZE
178820f2505fSStanislav Fomichev * bytes avoid the cost of kzalloc.
178920f2505fSStanislav Fomichev */
179020f2505fSStanislav Fomichev ctx->optval = buf->data;
179120f2505fSStanislav Fomichev ctx->optval_end = ctx->optval + max_optlen;
179220f2505fSStanislav Fomichev return max_optlen;
179320f2505fSStanislav Fomichev }
179420f2505fSStanislav Fomichev
17950d01da6aSStanislav Fomichev ctx->optval = kzalloc(max_optlen, GFP_USER);
17960d01da6aSStanislav Fomichev if (!ctx->optval)
17970d01da6aSStanislav Fomichev return -ENOMEM;
17980d01da6aSStanislav Fomichev
17990d01da6aSStanislav Fomichev ctx->optval_end = ctx->optval + max_optlen;
18000d01da6aSStanislav Fomichev
1801d8fe449aSStanislav Fomichev return max_optlen;
18020d01da6aSStanislav Fomichev }
18030d01da6aSStanislav Fomichev
sockopt_free_buf(struct bpf_sockopt_kern * ctx,struct bpf_sockopt_buf * buf)180420f2505fSStanislav Fomichev static void sockopt_free_buf(struct bpf_sockopt_kern *ctx,
180520f2505fSStanislav Fomichev struct bpf_sockopt_buf *buf)
18060d01da6aSStanislav Fomichev {
180720f2505fSStanislav Fomichev if (ctx->optval == buf->data)
180820f2505fSStanislav Fomichev return;
18090d01da6aSStanislav Fomichev kfree(ctx->optval);
18100d01da6aSStanislav Fomichev }
18110d01da6aSStanislav Fomichev
sockopt_buf_allocated(struct bpf_sockopt_kern * ctx,struct bpf_sockopt_buf * buf)181220f2505fSStanislav Fomichev static bool sockopt_buf_allocated(struct bpf_sockopt_kern *ctx,
181320f2505fSStanislav Fomichev struct bpf_sockopt_buf *buf)
181420f2505fSStanislav Fomichev {
181520f2505fSStanislav Fomichev return ctx->optval != buf->data;
181620f2505fSStanislav Fomichev }
181720f2505fSStanislav Fomichev
__cgroup_bpf_run_filter_setsockopt(struct sock * sk,int * level,int * optname,sockptr_t optval,int * optlen,char ** kernel_optval)18180d01da6aSStanislav Fomichev int __cgroup_bpf_run_filter_setsockopt(struct sock *sk, int *level,
181909fba016SBreno Leitao int *optname, sockptr_t optval,
18200d01da6aSStanislav Fomichev int *optlen, char **kernel_optval)
18210d01da6aSStanislav Fomichev {
18220d01da6aSStanislav Fomichev struct cgroup *cgrp = sock_cgroup_ptr(&sk->sk_cgrp_data);
182320f2505fSStanislav Fomichev struct bpf_sockopt_buf buf = {};
18240d01da6aSStanislav Fomichev struct bpf_sockopt_kern ctx = {
18250d01da6aSStanislav Fomichev .sk = sk,
18260d01da6aSStanislav Fomichev .level = *level,
18270d01da6aSStanislav Fomichev .optname = *optname,
18280d01da6aSStanislav Fomichev };
18299babe825SStanislav Fomichev int ret, max_optlen;
18300d01da6aSStanislav Fomichev
18319babe825SStanislav Fomichev /* Allocate a bit more than the initial user buffer for
18329babe825SStanislav Fomichev * BPF program. The canonical use case is overriding
18339babe825SStanislav Fomichev * TCP_CONGESTION(nv) to TCP_CONGESTION(cubic).
18349babe825SStanislav Fomichev */
18359babe825SStanislav Fomichev max_optlen = max_t(int, 16, *optlen);
183620f2505fSStanislav Fomichev max_optlen = sockopt_alloc_buf(&ctx, max_optlen, &buf);
1837d8fe449aSStanislav Fomichev if (max_optlen < 0)
1838d8fe449aSStanislav Fomichev return max_optlen;
18390d01da6aSStanislav Fomichev
18409babe825SStanislav Fomichev ctx.optlen = *optlen;
18419babe825SStanislav Fomichev
184209fba016SBreno Leitao if (copy_from_sockptr(ctx.optval, optval,
184309fba016SBreno Leitao min(*optlen, max_optlen))) {
18440d01da6aSStanislav Fomichev ret = -EFAULT;
18450d01da6aSStanislav Fomichev goto out;
18460d01da6aSStanislav Fomichev }
18470d01da6aSStanislav Fomichev
18480d01da6aSStanislav Fomichev lock_sock(sk);
1849055eb955SStanislav Fomichev ret = bpf_prog_run_array_cg(&cgrp->bpf, CGROUP_SETSOCKOPT,
1850d9d31cf8SStanislav Fomichev &ctx, bpf_prog_run, 0, NULL);
18510d01da6aSStanislav Fomichev release_sock(sk);
18520d01da6aSStanislav Fomichev
1853f10d0596SYiFei Zhu if (ret)
18540d01da6aSStanislav Fomichev goto out;
18550d01da6aSStanislav Fomichev
18560d01da6aSStanislav Fomichev if (ctx.optlen == -1) {
18570d01da6aSStanislav Fomichev /* optlen set to -1, bypass kernel */
18580d01da6aSStanislav Fomichev ret = 1;
18599babe825SStanislav Fomichev } else if (ctx.optlen > max_optlen || ctx.optlen < -1) {
18600d01da6aSStanislav Fomichev /* optlen is out of bounds */
186129ebbba7SStanislav Fomichev if (*optlen > PAGE_SIZE && ctx.optlen >= 0) {
186229ebbba7SStanislav Fomichev pr_info_once("bpf setsockopt: ignoring program buffer with optlen=%d (max_optlen=%d)\n",
186329ebbba7SStanislav Fomichev ctx.optlen, max_optlen);
186429ebbba7SStanislav Fomichev ret = 0;
186529ebbba7SStanislav Fomichev goto out;
186629ebbba7SStanislav Fomichev }
18670d01da6aSStanislav Fomichev ret = -EFAULT;
18680d01da6aSStanislav Fomichev } else {
18690d01da6aSStanislav Fomichev /* optlen within bounds, run kernel handler */
18700d01da6aSStanislav Fomichev ret = 0;
18710d01da6aSStanislav Fomichev
18720d01da6aSStanislav Fomichev /* export any potential modifications */
18730d01da6aSStanislav Fomichev *level = ctx.level;
18740d01da6aSStanislav Fomichev *optname = ctx.optname;
1875d8fe449aSStanislav Fomichev
1876d8fe449aSStanislav Fomichev /* optlen == 0 from BPF indicates that we should
1877d8fe449aSStanislav Fomichev * use original userspace data.
1878d8fe449aSStanislav Fomichev */
1879d8fe449aSStanislav Fomichev if (ctx.optlen != 0) {
18800d01da6aSStanislav Fomichev *optlen = ctx.optlen;
188120f2505fSStanislav Fomichev /* We've used bpf_sockopt_kern->buf as an intermediary
188220f2505fSStanislav Fomichev * storage, but the BPF program indicates that we need
188320f2505fSStanislav Fomichev * to pass this data to the kernel setsockopt handler.
188420f2505fSStanislav Fomichev * No way to export on-stack buf, have to allocate a
188520f2505fSStanislav Fomichev * new buffer.
188620f2505fSStanislav Fomichev */
188720f2505fSStanislav Fomichev if (!sockopt_buf_allocated(&ctx, &buf)) {
188820f2505fSStanislav Fomichev void *p = kmalloc(ctx.optlen, GFP_USER);
188920f2505fSStanislav Fomichev
189020f2505fSStanislav Fomichev if (!p) {
189120f2505fSStanislav Fomichev ret = -ENOMEM;
189220f2505fSStanislav Fomichev goto out;
189320f2505fSStanislav Fomichev }
189420f2505fSStanislav Fomichev memcpy(p, ctx.optval, ctx.optlen);
189520f2505fSStanislav Fomichev *kernel_optval = p;
189620f2505fSStanislav Fomichev } else {
18970d01da6aSStanislav Fomichev *kernel_optval = ctx.optval;
189820f2505fSStanislav Fomichev }
18994be34f3dSStanislav Fomichev /* export and don't free sockopt buf */
19004be34f3dSStanislav Fomichev return 0;
19010d01da6aSStanislav Fomichev }
1902d8fe449aSStanislav Fomichev }
19030d01da6aSStanislav Fomichev
19040d01da6aSStanislav Fomichev out:
190520f2505fSStanislav Fomichev sockopt_free_buf(&ctx, &buf);
19060d01da6aSStanislav Fomichev return ret;
19070d01da6aSStanislav Fomichev }
19080d01da6aSStanislav Fomichev
__cgroup_bpf_run_filter_getsockopt(struct sock * sk,int level,int optname,sockptr_t optval,sockptr_t optlen,int max_optlen,int retval)19090d01da6aSStanislav Fomichev int __cgroup_bpf_run_filter_getsockopt(struct sock *sk, int level,
19104a746fb2SBreno Leitao int optname, sockptr_t optval,
19114a746fb2SBreno Leitao sockptr_t optlen, int max_optlen,
19120d01da6aSStanislav Fomichev int retval)
19130d01da6aSStanislav Fomichev {
19140d01da6aSStanislav Fomichev struct cgroup *cgrp = sock_cgroup_ptr(&sk->sk_cgrp_data);
191520f2505fSStanislav Fomichev struct bpf_sockopt_buf buf = {};
19160d01da6aSStanislav Fomichev struct bpf_sockopt_kern ctx = {
19170d01da6aSStanislav Fomichev .sk = sk,
19180d01da6aSStanislav Fomichev .level = level,
19190d01da6aSStanislav Fomichev .optname = optname,
1920c4dcfdd4SYiFei Zhu .current_task = current,
19210d01da6aSStanislav Fomichev };
192229ebbba7SStanislav Fomichev int orig_optlen;
19230d01da6aSStanislav Fomichev int ret;
19240d01da6aSStanislav Fomichev
192529ebbba7SStanislav Fomichev orig_optlen = max_optlen;
19269babe825SStanislav Fomichev ctx.optlen = max_optlen;
192720f2505fSStanislav Fomichev max_optlen = sockopt_alloc_buf(&ctx, max_optlen, &buf);
1928d8fe449aSStanislav Fomichev if (max_optlen < 0)
1929d8fe449aSStanislav Fomichev return max_optlen;
1930d8fe449aSStanislav Fomichev
19310d01da6aSStanislav Fomichev if (!retval) {
19320d01da6aSStanislav Fomichev /* If kernel getsockopt finished successfully,
19330d01da6aSStanislav Fomichev * copy whatever was returned to the user back
19340d01da6aSStanislav Fomichev * into our temporary buffer. Set optlen to the
19350d01da6aSStanislav Fomichev * one that kernel returned as well to let
19360d01da6aSStanislav Fomichev * BPF programs inspect the value.
19370d01da6aSStanislav Fomichev */
19384a746fb2SBreno Leitao if (copy_from_sockptr(&ctx.optlen, optlen,
19394a746fb2SBreno Leitao sizeof(ctx.optlen))) {
19400d01da6aSStanislav Fomichev ret = -EFAULT;
19410d01da6aSStanislav Fomichev goto out;
19420d01da6aSStanislav Fomichev }
19430d01da6aSStanislav Fomichev
1944bb8b81e3SLoris Reiff if (ctx.optlen < 0) {
1945bb8b81e3SLoris Reiff ret = -EFAULT;
1946bb8b81e3SLoris Reiff goto out;
1947bb8b81e3SLoris Reiff }
194829ebbba7SStanislav Fomichev orig_optlen = ctx.optlen;
1949bb8b81e3SLoris Reiff
19504a746fb2SBreno Leitao if (copy_from_sockptr(ctx.optval, optval,
19514a746fb2SBreno Leitao min(ctx.optlen, max_optlen))) {
19520d01da6aSStanislav Fomichev ret = -EFAULT;
19530d01da6aSStanislav Fomichev goto out;
19540d01da6aSStanislav Fomichev }
19550d01da6aSStanislav Fomichev }
19560d01da6aSStanislav Fomichev
19570d01da6aSStanislav Fomichev lock_sock(sk);
1958055eb955SStanislav Fomichev ret = bpf_prog_run_array_cg(&cgrp->bpf, CGROUP_GETSOCKOPT,
1959d9d31cf8SStanislav Fomichev &ctx, bpf_prog_run, retval, NULL);
19600d01da6aSStanislav Fomichev release_sock(sk);
19610d01da6aSStanislav Fomichev
1962c4dcfdd4SYiFei Zhu if (ret < 0)
19630d01da6aSStanislav Fomichev goto out;
19640d01da6aSStanislav Fomichev
19654a746fb2SBreno Leitao if (!sockptr_is_null(optval) &&
19664a746fb2SBreno Leitao (ctx.optlen > max_optlen || ctx.optlen < 0)) {
196729ebbba7SStanislav Fomichev if (orig_optlen > PAGE_SIZE && ctx.optlen >= 0) {
196829ebbba7SStanislav Fomichev pr_info_once("bpf getsockopt: ignoring program buffer with optlen=%d (max_optlen=%d)\n",
196929ebbba7SStanislav Fomichev ctx.optlen, max_optlen);
197029ebbba7SStanislav Fomichev ret = retval;
197129ebbba7SStanislav Fomichev goto out;
197229ebbba7SStanislav Fomichev }
19730d01da6aSStanislav Fomichev ret = -EFAULT;
19740d01da6aSStanislav Fomichev goto out;
19750d01da6aSStanislav Fomichev }
19760d01da6aSStanislav Fomichev
1977d8fe449aSStanislav Fomichev if (ctx.optlen != 0) {
19784a746fb2SBreno Leitao if (!sockptr_is_null(optval) &&
19794a746fb2SBreno Leitao copy_to_sockptr(optval, ctx.optval, ctx.optlen)) {
198000e74ae0SStanislav Fomichev ret = -EFAULT;
198100e74ae0SStanislav Fomichev goto out;
198200e74ae0SStanislav Fomichev }
19834a746fb2SBreno Leitao if (copy_to_sockptr(optlen, &ctx.optlen, sizeof(ctx.optlen))) {
19840d01da6aSStanislav Fomichev ret = -EFAULT;
19850d01da6aSStanislav Fomichev goto out;
19860d01da6aSStanislav Fomichev }
1987d8fe449aSStanislav Fomichev }
19880d01da6aSStanislav Fomichev
19890d01da6aSStanislav Fomichev out:
199020f2505fSStanislav Fomichev sockopt_free_buf(&ctx, &buf);
19910d01da6aSStanislav Fomichev return ret;
19920d01da6aSStanislav Fomichev }
19939cacf81fSStanislav Fomichev
__cgroup_bpf_run_filter_getsockopt_kern(struct sock * sk,int level,int optname,void * optval,int * optlen,int retval)19949cacf81fSStanislav Fomichev int __cgroup_bpf_run_filter_getsockopt_kern(struct sock *sk, int level,
19959cacf81fSStanislav Fomichev int optname, void *optval,
19969cacf81fSStanislav Fomichev int *optlen, int retval)
19979cacf81fSStanislav Fomichev {
19989cacf81fSStanislav Fomichev struct cgroup *cgrp = sock_cgroup_ptr(&sk->sk_cgrp_data);
19999cacf81fSStanislav Fomichev struct bpf_sockopt_kern ctx = {
20009cacf81fSStanislav Fomichev .sk = sk,
20019cacf81fSStanislav Fomichev .level = level,
20029cacf81fSStanislav Fomichev .optname = optname,
20039cacf81fSStanislav Fomichev .optlen = *optlen,
20049cacf81fSStanislav Fomichev .optval = optval,
20059cacf81fSStanislav Fomichev .optval_end = optval + *optlen,
2006c4dcfdd4SYiFei Zhu .current_task = current,
20079cacf81fSStanislav Fomichev };
20089cacf81fSStanislav Fomichev int ret;
20099cacf81fSStanislav Fomichev
20109cacf81fSStanislav Fomichev /* Note that __cgroup_bpf_run_filter_getsockopt doesn't copy
20119cacf81fSStanislav Fomichev * user data back into BPF buffer when reval != 0. This is
20129cacf81fSStanislav Fomichev * done as an optimization to avoid extra copy, assuming
20139cacf81fSStanislav Fomichev * kernel won't populate the data in case of an error.
20149cacf81fSStanislav Fomichev * Here we always pass the data and memset() should
20159cacf81fSStanislav Fomichev * be called if that data shouldn't be "exported".
20169cacf81fSStanislav Fomichev */
20179cacf81fSStanislav Fomichev
2018055eb955SStanislav Fomichev ret = bpf_prog_run_array_cg(&cgrp->bpf, CGROUP_GETSOCKOPT,
2019d9d31cf8SStanislav Fomichev &ctx, bpf_prog_run, retval, NULL);
2020c4dcfdd4SYiFei Zhu if (ret < 0)
2021f10d0596SYiFei Zhu return ret;
20229cacf81fSStanislav Fomichev
20239cacf81fSStanislav Fomichev if (ctx.optlen > *optlen)
20249cacf81fSStanislav Fomichev return -EFAULT;
20259cacf81fSStanislav Fomichev
20269cacf81fSStanislav Fomichev /* BPF programs can shrink the buffer, export the modifications.
20279cacf81fSStanislav Fomichev */
20289cacf81fSStanislav Fomichev if (ctx.optlen != 0)
20299cacf81fSStanislav Fomichev *optlen = ctx.optlen;
20309cacf81fSStanislav Fomichev
2031c4dcfdd4SYiFei Zhu return ret;
20329cacf81fSStanislav Fomichev }
20336705fea0SYueHaibing #endif
20340d01da6aSStanislav Fomichev
sysctl_cpy_dir(const struct ctl_dir * dir,char ** bufp,size_t * lenp)2035808649fbSAndrey Ignatov static ssize_t sysctl_cpy_dir(const struct ctl_dir *dir, char **bufp,
2036808649fbSAndrey Ignatov size_t *lenp)
2037808649fbSAndrey Ignatov {
2038808649fbSAndrey Ignatov ssize_t tmp_ret = 0, ret;
2039808649fbSAndrey Ignatov
2040808649fbSAndrey Ignatov if (dir->header.parent) {
2041808649fbSAndrey Ignatov tmp_ret = sysctl_cpy_dir(dir->header.parent, bufp, lenp);
2042808649fbSAndrey Ignatov if (tmp_ret < 0)
2043808649fbSAndrey Ignatov return tmp_ret;
2044808649fbSAndrey Ignatov }
2045808649fbSAndrey Ignatov
2046808649fbSAndrey Ignatov ret = strscpy(*bufp, dir->header.ctl_table[0].procname, *lenp);
2047808649fbSAndrey Ignatov if (ret < 0)
2048808649fbSAndrey Ignatov return ret;
2049808649fbSAndrey Ignatov *bufp += ret;
2050808649fbSAndrey Ignatov *lenp -= ret;
2051808649fbSAndrey Ignatov ret += tmp_ret;
2052808649fbSAndrey Ignatov
2053808649fbSAndrey Ignatov /* Avoid leading slash. */
2054808649fbSAndrey Ignatov if (!ret)
2055808649fbSAndrey Ignatov return ret;
2056808649fbSAndrey Ignatov
2057808649fbSAndrey Ignatov tmp_ret = strscpy(*bufp, "/", *lenp);
2058808649fbSAndrey Ignatov if (tmp_ret < 0)
2059808649fbSAndrey Ignatov return tmp_ret;
2060808649fbSAndrey Ignatov *bufp += tmp_ret;
2061808649fbSAndrey Ignatov *lenp -= tmp_ret;
2062808649fbSAndrey Ignatov
2063808649fbSAndrey Ignatov return ret + tmp_ret;
2064808649fbSAndrey Ignatov }
2065808649fbSAndrey Ignatov
BPF_CALL_4(bpf_sysctl_get_name,struct bpf_sysctl_kern *,ctx,char *,buf,size_t,buf_len,u64,flags)2066808649fbSAndrey Ignatov BPF_CALL_4(bpf_sysctl_get_name, struct bpf_sysctl_kern *, ctx, char *, buf,
2067808649fbSAndrey Ignatov size_t, buf_len, u64, flags)
2068808649fbSAndrey Ignatov {
2069808649fbSAndrey Ignatov ssize_t tmp_ret = 0, ret;
2070808649fbSAndrey Ignatov
2071808649fbSAndrey Ignatov if (!buf)
2072808649fbSAndrey Ignatov return -EINVAL;
2073808649fbSAndrey Ignatov
2074808649fbSAndrey Ignatov if (!(flags & BPF_F_SYSCTL_BASE_NAME)) {
2075808649fbSAndrey Ignatov if (!ctx->head)
2076808649fbSAndrey Ignatov return -EINVAL;
2077808649fbSAndrey Ignatov tmp_ret = sysctl_cpy_dir(ctx->head->parent, &buf, &buf_len);
2078808649fbSAndrey Ignatov if (tmp_ret < 0)
2079808649fbSAndrey Ignatov return tmp_ret;
2080808649fbSAndrey Ignatov }
2081808649fbSAndrey Ignatov
2082808649fbSAndrey Ignatov ret = strscpy(buf, ctx->table->procname, buf_len);
2083808649fbSAndrey Ignatov
2084808649fbSAndrey Ignatov return ret < 0 ? ret : tmp_ret + ret;
2085808649fbSAndrey Ignatov }
2086808649fbSAndrey Ignatov
2087808649fbSAndrey Ignatov static const struct bpf_func_proto bpf_sysctl_get_name_proto = {
2088808649fbSAndrey Ignatov .func = bpf_sysctl_get_name,
2089808649fbSAndrey Ignatov .gpl_only = false,
2090808649fbSAndrey Ignatov .ret_type = RET_INTEGER,
2091808649fbSAndrey Ignatov .arg1_type = ARG_PTR_TO_CTX,
2092808649fbSAndrey Ignatov .arg2_type = ARG_PTR_TO_MEM,
2093808649fbSAndrey Ignatov .arg3_type = ARG_CONST_SIZE,
2094808649fbSAndrey Ignatov .arg4_type = ARG_ANYTHING,
2095808649fbSAndrey Ignatov };
2096808649fbSAndrey Ignatov
copy_sysctl_value(char * dst,size_t dst_len,char * src,size_t src_len)20971d11b301SAndrey Ignatov static int copy_sysctl_value(char *dst, size_t dst_len, char *src,
20981d11b301SAndrey Ignatov size_t src_len)
20991d11b301SAndrey Ignatov {
21001d11b301SAndrey Ignatov if (!dst)
21011d11b301SAndrey Ignatov return -EINVAL;
21021d11b301SAndrey Ignatov
21031d11b301SAndrey Ignatov if (!dst_len)
21041d11b301SAndrey Ignatov return -E2BIG;
21051d11b301SAndrey Ignatov
21061d11b301SAndrey Ignatov if (!src || !src_len) {
21071d11b301SAndrey Ignatov memset(dst, 0, dst_len);
21081d11b301SAndrey Ignatov return -EINVAL;
21091d11b301SAndrey Ignatov }
21101d11b301SAndrey Ignatov
21111d11b301SAndrey Ignatov memcpy(dst, src, min(dst_len, src_len));
21121d11b301SAndrey Ignatov
21131d11b301SAndrey Ignatov if (dst_len > src_len) {
21141d11b301SAndrey Ignatov memset(dst + src_len, '\0', dst_len - src_len);
21151d11b301SAndrey Ignatov return src_len;
21161d11b301SAndrey Ignatov }
21171d11b301SAndrey Ignatov
21181d11b301SAndrey Ignatov dst[dst_len - 1] = '\0';
21191d11b301SAndrey Ignatov
21201d11b301SAndrey Ignatov return -E2BIG;
21211d11b301SAndrey Ignatov }
21221d11b301SAndrey Ignatov
BPF_CALL_3(bpf_sysctl_get_current_value,struct bpf_sysctl_kern *,ctx,char *,buf,size_t,buf_len)21231d11b301SAndrey Ignatov BPF_CALL_3(bpf_sysctl_get_current_value, struct bpf_sysctl_kern *, ctx,
21241d11b301SAndrey Ignatov char *, buf, size_t, buf_len)
21251d11b301SAndrey Ignatov {
21261d11b301SAndrey Ignatov return copy_sysctl_value(buf, buf_len, ctx->cur_val, ctx->cur_len);
21271d11b301SAndrey Ignatov }
21281d11b301SAndrey Ignatov
21291d11b301SAndrey Ignatov static const struct bpf_func_proto bpf_sysctl_get_current_value_proto = {
21301d11b301SAndrey Ignatov .func = bpf_sysctl_get_current_value,
21311d11b301SAndrey Ignatov .gpl_only = false,
21321d11b301SAndrey Ignatov .ret_type = RET_INTEGER,
21331d11b301SAndrey Ignatov .arg1_type = ARG_PTR_TO_CTX,
21341d11b301SAndrey Ignatov .arg2_type = ARG_PTR_TO_UNINIT_MEM,
21351d11b301SAndrey Ignatov .arg3_type = ARG_CONST_SIZE,
21361d11b301SAndrey Ignatov };
21371d11b301SAndrey Ignatov
BPF_CALL_3(bpf_sysctl_get_new_value,struct bpf_sysctl_kern *,ctx,char *,buf,size_t,buf_len)21384e63acdfSAndrey Ignatov BPF_CALL_3(bpf_sysctl_get_new_value, struct bpf_sysctl_kern *, ctx, char *, buf,
21394e63acdfSAndrey Ignatov size_t, buf_len)
21404e63acdfSAndrey Ignatov {
21414e63acdfSAndrey Ignatov if (!ctx->write) {
21424e63acdfSAndrey Ignatov if (buf && buf_len)
21434e63acdfSAndrey Ignatov memset(buf, '\0', buf_len);
21444e63acdfSAndrey Ignatov return -EINVAL;
21454e63acdfSAndrey Ignatov }
21464e63acdfSAndrey Ignatov return copy_sysctl_value(buf, buf_len, ctx->new_val, ctx->new_len);
21474e63acdfSAndrey Ignatov }
21484e63acdfSAndrey Ignatov
21494e63acdfSAndrey Ignatov static const struct bpf_func_proto bpf_sysctl_get_new_value_proto = {
21504e63acdfSAndrey Ignatov .func = bpf_sysctl_get_new_value,
21514e63acdfSAndrey Ignatov .gpl_only = false,
21524e63acdfSAndrey Ignatov .ret_type = RET_INTEGER,
21534e63acdfSAndrey Ignatov .arg1_type = ARG_PTR_TO_CTX,
21544e63acdfSAndrey Ignatov .arg2_type = ARG_PTR_TO_UNINIT_MEM,
21554e63acdfSAndrey Ignatov .arg3_type = ARG_CONST_SIZE,
21564e63acdfSAndrey Ignatov };
21574e63acdfSAndrey Ignatov
BPF_CALL_3(bpf_sysctl_set_new_value,struct bpf_sysctl_kern *,ctx,const char *,buf,size_t,buf_len)21584e63acdfSAndrey Ignatov BPF_CALL_3(bpf_sysctl_set_new_value, struct bpf_sysctl_kern *, ctx,
21594e63acdfSAndrey Ignatov const char *, buf, size_t, buf_len)
21604e63acdfSAndrey Ignatov {
21614e63acdfSAndrey Ignatov if (!ctx->write || !ctx->new_val || !ctx->new_len || !buf || !buf_len)
21624e63acdfSAndrey Ignatov return -EINVAL;
21634e63acdfSAndrey Ignatov
21644e63acdfSAndrey Ignatov if (buf_len > PAGE_SIZE - 1)
21654e63acdfSAndrey Ignatov return -E2BIG;
21664e63acdfSAndrey Ignatov
21674e63acdfSAndrey Ignatov memcpy(ctx->new_val, buf, buf_len);
21684e63acdfSAndrey Ignatov ctx->new_len = buf_len;
21694e63acdfSAndrey Ignatov ctx->new_updated = 1;
21704e63acdfSAndrey Ignatov
21714e63acdfSAndrey Ignatov return 0;
21724e63acdfSAndrey Ignatov }
21734e63acdfSAndrey Ignatov
21744e63acdfSAndrey Ignatov static const struct bpf_func_proto bpf_sysctl_set_new_value_proto = {
21754e63acdfSAndrey Ignatov .func = bpf_sysctl_set_new_value,
21764e63acdfSAndrey Ignatov .gpl_only = false,
21774e63acdfSAndrey Ignatov .ret_type = RET_INTEGER,
21784e63acdfSAndrey Ignatov .arg1_type = ARG_PTR_TO_CTX,
2179216e3cd2SHao Luo .arg2_type = ARG_PTR_TO_MEM | MEM_RDONLY,
21804e63acdfSAndrey Ignatov .arg3_type = ARG_CONST_SIZE,
21814e63acdfSAndrey Ignatov };
21824e63acdfSAndrey Ignatov
21837b146cebSAndrey Ignatov static const struct bpf_func_proto *
sysctl_func_proto(enum bpf_func_id func_id,const struct bpf_prog * prog)21847b146cebSAndrey Ignatov sysctl_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
21857b146cebSAndrey Ignatov {
2186dea6a4e1SStanislav Fomichev const struct bpf_func_proto *func_proto;
2187dea6a4e1SStanislav Fomichev
2188dea6a4e1SStanislav Fomichev func_proto = cgroup_common_func_proto(func_id, prog);
2189dea6a4e1SStanislav Fomichev if (func_proto)
2190dea6a4e1SStanislav Fomichev return func_proto;
2191dea6a4e1SStanislav Fomichev
2192dea6a4e1SStanislav Fomichev func_proto = cgroup_current_func_proto(func_id, prog);
2193dea6a4e1SStanislav Fomichev if (func_proto)
2194dea6a4e1SStanislav Fomichev return func_proto;
2195dea6a4e1SStanislav Fomichev
2196808649fbSAndrey Ignatov switch (func_id) {
2197808649fbSAndrey Ignatov case BPF_FUNC_sysctl_get_name:
2198808649fbSAndrey Ignatov return &bpf_sysctl_get_name_proto;
21991d11b301SAndrey Ignatov case BPF_FUNC_sysctl_get_current_value:
22001d11b301SAndrey Ignatov return &bpf_sysctl_get_current_value_proto;
22014e63acdfSAndrey Ignatov case BPF_FUNC_sysctl_get_new_value:
22024e63acdfSAndrey Ignatov return &bpf_sysctl_get_new_value_proto;
22034e63acdfSAndrey Ignatov case BPF_FUNC_sysctl_set_new_value:
22044e63acdfSAndrey Ignatov return &bpf_sysctl_set_new_value_proto;
22055e0bc308SDmitrii Banshchikov case BPF_FUNC_ktime_get_coarse_ns:
22065e0bc308SDmitrii Banshchikov return &bpf_ktime_get_coarse_ns_proto;
2207dea6a4e1SStanislav Fomichev case BPF_FUNC_perf_event_output:
2208dea6a4e1SStanislav Fomichev return &bpf_event_output_data_proto;
2209808649fbSAndrey Ignatov default:
2210dea6a4e1SStanislav Fomichev return bpf_base_func_proto(func_id);
22117b146cebSAndrey Ignatov }
2212808649fbSAndrey Ignatov }
22137b146cebSAndrey Ignatov
sysctl_is_valid_access(int off,int size,enum bpf_access_type type,const struct bpf_prog * prog,struct bpf_insn_access_aux * info)22147b146cebSAndrey Ignatov static bool sysctl_is_valid_access(int off, int size, enum bpf_access_type type,
22157b146cebSAndrey Ignatov const struct bpf_prog *prog,
22167b146cebSAndrey Ignatov struct bpf_insn_access_aux *info)
22177b146cebSAndrey Ignatov {
22187b146cebSAndrey Ignatov const int size_default = sizeof(__u32);
22197b146cebSAndrey Ignatov
2220e1550bfeSAndrey Ignatov if (off < 0 || off + size > sizeof(struct bpf_sysctl) || off % size)
22217b146cebSAndrey Ignatov return false;
22227b146cebSAndrey Ignatov
22237b146cebSAndrey Ignatov switch (off) {
22247541c87cSIlya Leoshkevich case bpf_ctx_range(struct bpf_sysctl, write):
2225e1550bfeSAndrey Ignatov if (type != BPF_READ)
2226e1550bfeSAndrey Ignatov return false;
22277b146cebSAndrey Ignatov bpf_ctx_record_field_size(info, size_default);
22287b146cebSAndrey Ignatov return bpf_ctx_narrow_access_ok(off, size, size_default);
22297541c87cSIlya Leoshkevich case bpf_ctx_range(struct bpf_sysctl, file_pos):
2230e1550bfeSAndrey Ignatov if (type == BPF_READ) {
2231e1550bfeSAndrey Ignatov bpf_ctx_record_field_size(info, size_default);
2232e1550bfeSAndrey Ignatov return bpf_ctx_narrow_access_ok(off, size, size_default);
2233e1550bfeSAndrey Ignatov } else {
2234e1550bfeSAndrey Ignatov return size == size_default;
2235e1550bfeSAndrey Ignatov }
22367b146cebSAndrey Ignatov default:
22377b146cebSAndrey Ignatov return false;
22387b146cebSAndrey Ignatov }
22397b146cebSAndrey Ignatov }
22407b146cebSAndrey Ignatov
sysctl_convert_ctx_access(enum bpf_access_type type,const struct bpf_insn * si,struct bpf_insn * insn_buf,struct bpf_prog * prog,u32 * target_size)22417b146cebSAndrey Ignatov static u32 sysctl_convert_ctx_access(enum bpf_access_type type,
22427b146cebSAndrey Ignatov const struct bpf_insn *si,
22437b146cebSAndrey Ignatov struct bpf_insn *insn_buf,
22447b146cebSAndrey Ignatov struct bpf_prog *prog, u32 *target_size)
22457b146cebSAndrey Ignatov {
22467b146cebSAndrey Ignatov struct bpf_insn *insn = insn_buf;
2247d895a0f1SIlya Leoshkevich u32 read_size;
22487b146cebSAndrey Ignatov
22497b146cebSAndrey Ignatov switch (si->off) {
22507b146cebSAndrey Ignatov case offsetof(struct bpf_sysctl, write):
22517b146cebSAndrey Ignatov *insn++ = BPF_LDX_MEM(
22527b146cebSAndrey Ignatov BPF_SIZE(si->code), si->dst_reg, si->src_reg,
22537b146cebSAndrey Ignatov bpf_target_off(struct bpf_sysctl_kern, write,
2254c593642cSPankaj Bharadiya sizeof_field(struct bpf_sysctl_kern,
22557b146cebSAndrey Ignatov write),
22567b146cebSAndrey Ignatov target_size));
22577b146cebSAndrey Ignatov break;
2258e1550bfeSAndrey Ignatov case offsetof(struct bpf_sysctl, file_pos):
2259e1550bfeSAndrey Ignatov /* ppos is a pointer so it should be accessed via indirect
2260e1550bfeSAndrey Ignatov * loads and stores. Also for stores additional temporary
2261e1550bfeSAndrey Ignatov * register is used since neither src_reg nor dst_reg can be
2262e1550bfeSAndrey Ignatov * overridden.
2263e1550bfeSAndrey Ignatov */
2264e1550bfeSAndrey Ignatov if (type == BPF_WRITE) {
2265e1550bfeSAndrey Ignatov int treg = BPF_REG_9;
2266e1550bfeSAndrey Ignatov
2267e1550bfeSAndrey Ignatov if (si->src_reg == treg || si->dst_reg == treg)
2268e1550bfeSAndrey Ignatov --treg;
2269e1550bfeSAndrey Ignatov if (si->src_reg == treg || si->dst_reg == treg)
2270e1550bfeSAndrey Ignatov --treg;
2271e1550bfeSAndrey Ignatov *insn++ = BPF_STX_MEM(
2272e1550bfeSAndrey Ignatov BPF_DW, si->dst_reg, treg,
2273e1550bfeSAndrey Ignatov offsetof(struct bpf_sysctl_kern, tmp_reg));
2274e1550bfeSAndrey Ignatov *insn++ = BPF_LDX_MEM(
2275e1550bfeSAndrey Ignatov BPF_FIELD_SIZEOF(struct bpf_sysctl_kern, ppos),
2276e1550bfeSAndrey Ignatov treg, si->dst_reg,
2277e1550bfeSAndrey Ignatov offsetof(struct bpf_sysctl_kern, ppos));
22780d80a619SEduard Zingerman *insn++ = BPF_RAW_INSN(
22790d80a619SEduard Zingerman BPF_CLASS(si->code) | BPF_MEM | BPF_SIZEOF(u32),
22800d80a619SEduard Zingerman treg, si->src_reg,
2281d895a0f1SIlya Leoshkevich bpf_ctx_narrow_access_offset(
22820d80a619SEduard Zingerman 0, sizeof(u32), sizeof(loff_t)),
22830d80a619SEduard Zingerman si->imm);
2284e1550bfeSAndrey Ignatov *insn++ = BPF_LDX_MEM(
2285e1550bfeSAndrey Ignatov BPF_DW, treg, si->dst_reg,
2286e1550bfeSAndrey Ignatov offsetof(struct bpf_sysctl_kern, tmp_reg));
2287e1550bfeSAndrey Ignatov } else {
2288e1550bfeSAndrey Ignatov *insn++ = BPF_LDX_MEM(
2289e1550bfeSAndrey Ignatov BPF_FIELD_SIZEOF(struct bpf_sysctl_kern, ppos),
2290e1550bfeSAndrey Ignatov si->dst_reg, si->src_reg,
2291e1550bfeSAndrey Ignatov offsetof(struct bpf_sysctl_kern, ppos));
2292d895a0f1SIlya Leoshkevich read_size = bpf_size_to_bytes(BPF_SIZE(si->code));
2293e1550bfeSAndrey Ignatov *insn++ = BPF_LDX_MEM(
2294d895a0f1SIlya Leoshkevich BPF_SIZE(si->code), si->dst_reg, si->dst_reg,
2295d895a0f1SIlya Leoshkevich bpf_ctx_narrow_access_offset(
2296d895a0f1SIlya Leoshkevich 0, read_size, sizeof(loff_t)));
2297e1550bfeSAndrey Ignatov }
2298e1550bfeSAndrey Ignatov *target_size = sizeof(u32);
2299e1550bfeSAndrey Ignatov break;
23007b146cebSAndrey Ignatov }
23017b146cebSAndrey Ignatov
23027b146cebSAndrey Ignatov return insn - insn_buf;
23037b146cebSAndrey Ignatov }
23047b146cebSAndrey Ignatov
23057b146cebSAndrey Ignatov const struct bpf_verifier_ops cg_sysctl_verifier_ops = {
23067b146cebSAndrey Ignatov .get_func_proto = sysctl_func_proto,
23077b146cebSAndrey Ignatov .is_valid_access = sysctl_is_valid_access,
23087b146cebSAndrey Ignatov .convert_ctx_access = sysctl_convert_ctx_access,
23097b146cebSAndrey Ignatov };
23107b146cebSAndrey Ignatov
23117b146cebSAndrey Ignatov const struct bpf_prog_ops cg_sysctl_prog_ops = {
23127b146cebSAndrey Ignatov };
23130d01da6aSStanislav Fomichev
2314f1248deeSStanislav Fomichev #ifdef CONFIG_NET
BPF_CALL_1(bpf_get_netns_cookie_sockopt,struct bpf_sockopt_kern *,ctx)2315f1248deeSStanislav Fomichev BPF_CALL_1(bpf_get_netns_cookie_sockopt, struct bpf_sockopt_kern *, ctx)
2316f1248deeSStanislav Fomichev {
2317f1248deeSStanislav Fomichev const struct net *net = ctx ? sock_net(ctx->sk) : &init_net;
2318f1248deeSStanislav Fomichev
2319f1248deeSStanislav Fomichev return net->net_cookie;
2320f1248deeSStanislav Fomichev }
2321f1248deeSStanislav Fomichev
2322f1248deeSStanislav Fomichev static const struct bpf_func_proto bpf_get_netns_cookie_sockopt_proto = {
2323f1248deeSStanislav Fomichev .func = bpf_get_netns_cookie_sockopt,
2324f1248deeSStanislav Fomichev .gpl_only = false,
2325f1248deeSStanislav Fomichev .ret_type = RET_INTEGER,
2326f1248deeSStanislav Fomichev .arg1_type = ARG_PTR_TO_CTX_OR_NULL,
2327f1248deeSStanislav Fomichev };
2328f1248deeSStanislav Fomichev #endif
2329f1248deeSStanislav Fomichev
23300d01da6aSStanislav Fomichev static const struct bpf_func_proto *
cg_sockopt_func_proto(enum bpf_func_id func_id,const struct bpf_prog * prog)23310d01da6aSStanislav Fomichev cg_sockopt_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
23320d01da6aSStanislav Fomichev {
2333dea6a4e1SStanislav Fomichev const struct bpf_func_proto *func_proto;
2334dea6a4e1SStanislav Fomichev
2335dea6a4e1SStanislav Fomichev func_proto = cgroup_common_func_proto(func_id, prog);
2336dea6a4e1SStanislav Fomichev if (func_proto)
2337dea6a4e1SStanislav Fomichev return func_proto;
2338dea6a4e1SStanislav Fomichev
2339dea6a4e1SStanislav Fomichev func_proto = cgroup_current_func_proto(func_id, prog);
2340dea6a4e1SStanislav Fomichev if (func_proto)
2341dea6a4e1SStanislav Fomichev return func_proto;
2342dea6a4e1SStanislav Fomichev
23430d01da6aSStanislav Fomichev switch (func_id) {
23446705fea0SYueHaibing #ifdef CONFIG_NET
2345f1248deeSStanislav Fomichev case BPF_FUNC_get_netns_cookie:
2346f1248deeSStanislav Fomichev return &bpf_get_netns_cookie_sockopt_proto;
23470d01da6aSStanislav Fomichev case BPF_FUNC_sk_storage_get:
23480d01da6aSStanislav Fomichev return &bpf_sk_storage_get_proto;
23490d01da6aSStanislav Fomichev case BPF_FUNC_sk_storage_delete:
23500d01da6aSStanislav Fomichev return &bpf_sk_storage_delete_proto;
23512c531639SPrankur Gupta case BPF_FUNC_setsockopt:
23522c531639SPrankur Gupta if (prog->expected_attach_type == BPF_CGROUP_SETSOCKOPT)
23532c531639SPrankur Gupta return &bpf_sk_setsockopt_proto;
23542c531639SPrankur Gupta return NULL;
23552c531639SPrankur Gupta case BPF_FUNC_getsockopt:
23562c531639SPrankur Gupta if (prog->expected_attach_type == BPF_CGROUP_SETSOCKOPT)
23572c531639SPrankur Gupta return &bpf_sk_getsockopt_proto;
23582c531639SPrankur Gupta return NULL;
23596705fea0SYueHaibing #endif
23600d01da6aSStanislav Fomichev #ifdef CONFIG_INET
23610d01da6aSStanislav Fomichev case BPF_FUNC_tcp_sock:
23620d01da6aSStanislav Fomichev return &bpf_tcp_sock_proto;
23630d01da6aSStanislav Fomichev #endif
2364dea6a4e1SStanislav Fomichev case BPF_FUNC_perf_event_output:
2365dea6a4e1SStanislav Fomichev return &bpf_event_output_data_proto;
23660d01da6aSStanislav Fomichev default:
2367dea6a4e1SStanislav Fomichev return bpf_base_func_proto(func_id);
23680d01da6aSStanislav Fomichev }
23690d01da6aSStanislav Fomichev }
23700d01da6aSStanislav Fomichev
cg_sockopt_is_valid_access(int off,int size,enum bpf_access_type type,const struct bpf_prog * prog,struct bpf_insn_access_aux * info)23710d01da6aSStanislav Fomichev static bool cg_sockopt_is_valid_access(int off, int size,
23720d01da6aSStanislav Fomichev enum bpf_access_type type,
23730d01da6aSStanislav Fomichev const struct bpf_prog *prog,
23740d01da6aSStanislav Fomichev struct bpf_insn_access_aux *info)
23750d01da6aSStanislav Fomichev {
23760d01da6aSStanislav Fomichev const int size_default = sizeof(__u32);
23770d01da6aSStanislav Fomichev
23780d01da6aSStanislav Fomichev if (off < 0 || off >= sizeof(struct bpf_sockopt))
23790d01da6aSStanislav Fomichev return false;
23800d01da6aSStanislav Fomichev
23810d01da6aSStanislav Fomichev if (off % size != 0)
23820d01da6aSStanislav Fomichev return false;
23830d01da6aSStanislav Fomichev
23840d01da6aSStanislav Fomichev if (type == BPF_WRITE) {
23850d01da6aSStanislav Fomichev switch (off) {
23860d01da6aSStanislav Fomichev case offsetof(struct bpf_sockopt, retval):
23870d01da6aSStanislav Fomichev if (size != size_default)
23880d01da6aSStanislav Fomichev return false;
23890d01da6aSStanislav Fomichev return prog->expected_attach_type ==
23900d01da6aSStanislav Fomichev BPF_CGROUP_GETSOCKOPT;
23910d01da6aSStanislav Fomichev case offsetof(struct bpf_sockopt, optname):
2392df561f66SGustavo A. R. Silva fallthrough;
23930d01da6aSStanislav Fomichev case offsetof(struct bpf_sockopt, level):
23940d01da6aSStanislav Fomichev if (size != size_default)
23950d01da6aSStanislav Fomichev return false;
23960d01da6aSStanislav Fomichev return prog->expected_attach_type ==
23970d01da6aSStanislav Fomichev BPF_CGROUP_SETSOCKOPT;
23980d01da6aSStanislav Fomichev case offsetof(struct bpf_sockopt, optlen):
23990d01da6aSStanislav Fomichev return size == size_default;
24000d01da6aSStanislav Fomichev default:
24010d01da6aSStanislav Fomichev return false;
24020d01da6aSStanislav Fomichev }
24030d01da6aSStanislav Fomichev }
24040d01da6aSStanislav Fomichev
24050d01da6aSStanislav Fomichev switch (off) {
24060d01da6aSStanislav Fomichev case offsetof(struct bpf_sockopt, sk):
24070d01da6aSStanislav Fomichev if (size != sizeof(__u64))
24080d01da6aSStanislav Fomichev return false;
24090d01da6aSStanislav Fomichev info->reg_type = PTR_TO_SOCKET;
24100d01da6aSStanislav Fomichev break;
24110d01da6aSStanislav Fomichev case offsetof(struct bpf_sockopt, optval):
24120d01da6aSStanislav Fomichev if (size != sizeof(__u64))
24130d01da6aSStanislav Fomichev return false;
24140d01da6aSStanislav Fomichev info->reg_type = PTR_TO_PACKET;
24150d01da6aSStanislav Fomichev break;
24160d01da6aSStanislav Fomichev case offsetof(struct bpf_sockopt, optval_end):
24170d01da6aSStanislav Fomichev if (size != sizeof(__u64))
24180d01da6aSStanislav Fomichev return false;
24190d01da6aSStanislav Fomichev info->reg_type = PTR_TO_PACKET_END;
24200d01da6aSStanislav Fomichev break;
24210d01da6aSStanislav Fomichev case offsetof(struct bpf_sockopt, retval):
24220d01da6aSStanislav Fomichev if (size != size_default)
24230d01da6aSStanislav Fomichev return false;
24240d01da6aSStanislav Fomichev return prog->expected_attach_type == BPF_CGROUP_GETSOCKOPT;
24250d01da6aSStanislav Fomichev default:
24260d01da6aSStanislav Fomichev if (size != size_default)
24270d01da6aSStanislav Fomichev return false;
24280d01da6aSStanislav Fomichev break;
24290d01da6aSStanislav Fomichev }
24300d01da6aSStanislav Fomichev return true;
24310d01da6aSStanislav Fomichev }
24320d01da6aSStanislav Fomichev
24330d80a619SEduard Zingerman #define CG_SOCKOPT_READ_FIELD(F) \
24340d80a619SEduard Zingerman BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct bpf_sockopt_kern, F), \
24350d01da6aSStanislav Fomichev si->dst_reg, si->src_reg, \
24360d01da6aSStanislav Fomichev offsetof(struct bpf_sockopt_kern, F))
24370d01da6aSStanislav Fomichev
24380d80a619SEduard Zingerman #define CG_SOCKOPT_WRITE_FIELD(F) \
24390d80a619SEduard Zingerman BPF_RAW_INSN((BPF_FIELD_SIZEOF(struct bpf_sockopt_kern, F) | \
24400d80a619SEduard Zingerman BPF_MEM | BPF_CLASS(si->code)), \
24410d80a619SEduard Zingerman si->dst_reg, si->src_reg, \
24420d80a619SEduard Zingerman offsetof(struct bpf_sockopt_kern, F), \
24430d80a619SEduard Zingerman si->imm)
24440d80a619SEduard Zingerman
cg_sockopt_convert_ctx_access(enum bpf_access_type type,const struct bpf_insn * si,struct bpf_insn * insn_buf,struct bpf_prog * prog,u32 * target_size)24450d01da6aSStanislav Fomichev static u32 cg_sockopt_convert_ctx_access(enum bpf_access_type type,
24460d01da6aSStanislav Fomichev const struct bpf_insn *si,
24470d01da6aSStanislav Fomichev struct bpf_insn *insn_buf,
24480d01da6aSStanislav Fomichev struct bpf_prog *prog,
24490d01da6aSStanislav Fomichev u32 *target_size)
24500d01da6aSStanislav Fomichev {
24510d01da6aSStanislav Fomichev struct bpf_insn *insn = insn_buf;
24520d01da6aSStanislav Fomichev
24530d01da6aSStanislav Fomichev switch (si->off) {
24540d01da6aSStanislav Fomichev case offsetof(struct bpf_sockopt, sk):
24550d80a619SEduard Zingerman *insn++ = CG_SOCKOPT_READ_FIELD(sk);
24560d01da6aSStanislav Fomichev break;
24570d01da6aSStanislav Fomichev case offsetof(struct bpf_sockopt, level):
24580d01da6aSStanislav Fomichev if (type == BPF_WRITE)
24590d80a619SEduard Zingerman *insn++ = CG_SOCKOPT_WRITE_FIELD(level);
24600d01da6aSStanislav Fomichev else
24610d80a619SEduard Zingerman *insn++ = CG_SOCKOPT_READ_FIELD(level);
24620d01da6aSStanislav Fomichev break;
24630d01da6aSStanislav Fomichev case offsetof(struct bpf_sockopt, optname):
24640d01da6aSStanislav Fomichev if (type == BPF_WRITE)
24650d80a619SEduard Zingerman *insn++ = CG_SOCKOPT_WRITE_FIELD(optname);
24660d01da6aSStanislav Fomichev else
24670d80a619SEduard Zingerman *insn++ = CG_SOCKOPT_READ_FIELD(optname);
24680d01da6aSStanislav Fomichev break;
24690d01da6aSStanislav Fomichev case offsetof(struct bpf_sockopt, optlen):
24700d01da6aSStanislav Fomichev if (type == BPF_WRITE)
24710d80a619SEduard Zingerman *insn++ = CG_SOCKOPT_WRITE_FIELD(optlen);
24720d01da6aSStanislav Fomichev else
24730d80a619SEduard Zingerman *insn++ = CG_SOCKOPT_READ_FIELD(optlen);
24740d01da6aSStanislav Fomichev break;
24750d01da6aSStanislav Fomichev case offsetof(struct bpf_sockopt, retval):
2476c4dcfdd4SYiFei Zhu BUILD_BUG_ON(offsetof(struct bpf_cg_run_ctx, run_ctx) != 0);
2477c4dcfdd4SYiFei Zhu
2478c4dcfdd4SYiFei Zhu if (type == BPF_WRITE) {
2479c4dcfdd4SYiFei Zhu int treg = BPF_REG_9;
2480c4dcfdd4SYiFei Zhu
2481c4dcfdd4SYiFei Zhu if (si->src_reg == treg || si->dst_reg == treg)
2482c4dcfdd4SYiFei Zhu --treg;
2483c4dcfdd4SYiFei Zhu if (si->src_reg == treg || si->dst_reg == treg)
2484c4dcfdd4SYiFei Zhu --treg;
2485c4dcfdd4SYiFei Zhu *insn++ = BPF_STX_MEM(BPF_DW, si->dst_reg, treg,
2486c4dcfdd4SYiFei Zhu offsetof(struct bpf_sockopt_kern, tmp_reg));
2487c4dcfdd4SYiFei Zhu *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct bpf_sockopt_kern, current_task),
2488c4dcfdd4SYiFei Zhu treg, si->dst_reg,
2489c4dcfdd4SYiFei Zhu offsetof(struct bpf_sockopt_kern, current_task));
2490c4dcfdd4SYiFei Zhu *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct task_struct, bpf_ctx),
2491c4dcfdd4SYiFei Zhu treg, treg,
2492c4dcfdd4SYiFei Zhu offsetof(struct task_struct, bpf_ctx));
24930d80a619SEduard Zingerman *insn++ = BPF_RAW_INSN(BPF_CLASS(si->code) | BPF_MEM |
24940d80a619SEduard Zingerman BPF_FIELD_SIZEOF(struct bpf_cg_run_ctx, retval),
2495c4dcfdd4SYiFei Zhu treg, si->src_reg,
24960d80a619SEduard Zingerman offsetof(struct bpf_cg_run_ctx, retval),
24970d80a619SEduard Zingerman si->imm);
2498c4dcfdd4SYiFei Zhu *insn++ = BPF_LDX_MEM(BPF_DW, treg, si->dst_reg,
2499c4dcfdd4SYiFei Zhu offsetof(struct bpf_sockopt_kern, tmp_reg));
2500c4dcfdd4SYiFei Zhu } else {
2501c4dcfdd4SYiFei Zhu *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct bpf_sockopt_kern, current_task),
2502c4dcfdd4SYiFei Zhu si->dst_reg, si->src_reg,
2503c4dcfdd4SYiFei Zhu offsetof(struct bpf_sockopt_kern, current_task));
2504c4dcfdd4SYiFei Zhu *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct task_struct, bpf_ctx),
2505c4dcfdd4SYiFei Zhu si->dst_reg, si->dst_reg,
2506c4dcfdd4SYiFei Zhu offsetof(struct task_struct, bpf_ctx));
2507c4dcfdd4SYiFei Zhu *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct bpf_cg_run_ctx, retval),
2508c4dcfdd4SYiFei Zhu si->dst_reg, si->dst_reg,
2509c4dcfdd4SYiFei Zhu offsetof(struct bpf_cg_run_ctx, retval));
2510c4dcfdd4SYiFei Zhu }
25110d01da6aSStanislav Fomichev break;
25120d01da6aSStanislav Fomichev case offsetof(struct bpf_sockopt, optval):
25130d80a619SEduard Zingerman *insn++ = CG_SOCKOPT_READ_FIELD(optval);
25140d01da6aSStanislav Fomichev break;
25150d01da6aSStanislav Fomichev case offsetof(struct bpf_sockopt, optval_end):
25160d80a619SEduard Zingerman *insn++ = CG_SOCKOPT_READ_FIELD(optval_end);
25170d01da6aSStanislav Fomichev break;
25180d01da6aSStanislav Fomichev }
25190d01da6aSStanislav Fomichev
25200d01da6aSStanislav Fomichev return insn - insn_buf;
25210d01da6aSStanislav Fomichev }
25220d01da6aSStanislav Fomichev
cg_sockopt_get_prologue(struct bpf_insn * insn_buf,bool direct_write,const struct bpf_prog * prog)25230d01da6aSStanislav Fomichev static int cg_sockopt_get_prologue(struct bpf_insn *insn_buf,
25240d01da6aSStanislav Fomichev bool direct_write,
25250d01da6aSStanislav Fomichev const struct bpf_prog *prog)
25260d01da6aSStanislav Fomichev {
25270d01da6aSStanislav Fomichev /* Nothing to do for sockopt argument. The data is kzalloc'ated.
25280d01da6aSStanislav Fomichev */
25290d01da6aSStanislav Fomichev return 0;
25300d01da6aSStanislav Fomichev }
25310d01da6aSStanislav Fomichev
25320d01da6aSStanislav Fomichev const struct bpf_verifier_ops cg_sockopt_verifier_ops = {
25330d01da6aSStanislav Fomichev .get_func_proto = cg_sockopt_func_proto,
25340d01da6aSStanislav Fomichev .is_valid_access = cg_sockopt_is_valid_access,
25350d01da6aSStanislav Fomichev .convert_ctx_access = cg_sockopt_convert_ctx_access,
25360d01da6aSStanislav Fomichev .gen_prologue = cg_sockopt_get_prologue,
25370d01da6aSStanislav Fomichev };
25380d01da6aSStanislav Fomichev
25390d01da6aSStanislav Fomichev const struct bpf_prog_ops cg_sockopt_prog_ops = {
25400d01da6aSStanislav Fomichev };
2541dea6a4e1SStanislav Fomichev
2542dea6a4e1SStanislav Fomichev /* Common helpers for cgroup hooks. */
2543dea6a4e1SStanislav Fomichev const struct bpf_func_proto *
cgroup_common_func_proto(enum bpf_func_id func_id,const struct bpf_prog * prog)2544dea6a4e1SStanislav Fomichev cgroup_common_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
2545dea6a4e1SStanislav Fomichev {
2546dea6a4e1SStanislav Fomichev switch (func_id) {
2547dea6a4e1SStanislav Fomichev case BPF_FUNC_get_local_storage:
2548dea6a4e1SStanislav Fomichev return &bpf_get_local_storage_proto;
2549dea6a4e1SStanislav Fomichev case BPF_FUNC_get_retval:
2550bed89185SStanislav Fomichev switch (prog->expected_attach_type) {
2551bed89185SStanislav Fomichev case BPF_CGROUP_INET_INGRESS:
2552bed89185SStanislav Fomichev case BPF_CGROUP_INET_EGRESS:
2553bed89185SStanislav Fomichev case BPF_CGROUP_SOCK_OPS:
2554bed89185SStanislav Fomichev case BPF_CGROUP_UDP4_RECVMSG:
2555bed89185SStanislav Fomichev case BPF_CGROUP_UDP6_RECVMSG:
2556bed89185SStanislav Fomichev case BPF_CGROUP_INET4_GETPEERNAME:
2557bed89185SStanislav Fomichev case BPF_CGROUP_INET6_GETPEERNAME:
2558bed89185SStanislav Fomichev case BPF_CGROUP_INET4_GETSOCKNAME:
2559bed89185SStanislav Fomichev case BPF_CGROUP_INET6_GETSOCKNAME:
2560bed89185SStanislav Fomichev return NULL;
2561bed89185SStanislav Fomichev default:
2562dea6a4e1SStanislav Fomichev return &bpf_get_retval_proto;
2563bed89185SStanislav Fomichev }
2564dea6a4e1SStanislav Fomichev case BPF_FUNC_set_retval:
2565bed89185SStanislav Fomichev switch (prog->expected_attach_type) {
2566bed89185SStanislav Fomichev case BPF_CGROUP_INET_INGRESS:
2567bed89185SStanislav Fomichev case BPF_CGROUP_INET_EGRESS:
2568bed89185SStanislav Fomichev case BPF_CGROUP_SOCK_OPS:
2569bed89185SStanislav Fomichev case BPF_CGROUP_UDP4_RECVMSG:
2570bed89185SStanislav Fomichev case BPF_CGROUP_UDP6_RECVMSG:
2571bed89185SStanislav Fomichev case BPF_CGROUP_INET4_GETPEERNAME:
2572bed89185SStanislav Fomichev case BPF_CGROUP_INET6_GETPEERNAME:
2573bed89185SStanislav Fomichev case BPF_CGROUP_INET4_GETSOCKNAME:
2574bed89185SStanislav Fomichev case BPF_CGROUP_INET6_GETSOCKNAME:
2575bed89185SStanislav Fomichev return NULL;
2576bed89185SStanislav Fomichev default:
2577dea6a4e1SStanislav Fomichev return &bpf_set_retval_proto;
2578bed89185SStanislav Fomichev }
2579dea6a4e1SStanislav Fomichev default:
2580dea6a4e1SStanislav Fomichev return NULL;
2581dea6a4e1SStanislav Fomichev }
2582dea6a4e1SStanislav Fomichev }
2583dea6a4e1SStanislav Fomichev
2584dea6a4e1SStanislav Fomichev /* Common helpers for cgroup hooks with valid process context. */
2585dea6a4e1SStanislav Fomichev const struct bpf_func_proto *
cgroup_current_func_proto(enum bpf_func_id func_id,const struct bpf_prog * prog)2586dea6a4e1SStanislav Fomichev cgroup_current_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
2587dea6a4e1SStanislav Fomichev {
2588dea6a4e1SStanislav Fomichev switch (func_id) {
2589dea6a4e1SStanislav Fomichev case BPF_FUNC_get_current_uid_gid:
2590dea6a4e1SStanislav Fomichev return &bpf_get_current_uid_gid_proto;
2591bed89185SStanislav Fomichev case BPF_FUNC_get_current_pid_tgid:
2592bed89185SStanislav Fomichev return &bpf_get_current_pid_tgid_proto;
2593bed89185SStanislav Fomichev case BPF_FUNC_get_current_comm:
2594bed89185SStanislav Fomichev return &bpf_get_current_comm_proto;
2595bed89185SStanislav Fomichev #ifdef CONFIG_CGROUP_NET_CLASSID
2596bed89185SStanislav Fomichev case BPF_FUNC_get_cgroup_classid:
2597bed89185SStanislav Fomichev return &bpf_get_cgroup_classid_curr_proto;
2598bed89185SStanislav Fomichev #endif
2599dea6a4e1SStanislav Fomichev default:
2600dea6a4e1SStanislav Fomichev return NULL;
2601dea6a4e1SStanislav Fomichev }
2602dea6a4e1SStanislav Fomichev }
2603