xref: /openbmc/linux/kernel/bpf/cgroup.c (revision 5f8b7d4b2e9604d03ae06f1a2dd5a1f34c33e533)
1f85d2086SThomas Gleixner // SPDX-License-Identifier: GPL-2.0-only
230070984SDaniel Mack /*
330070984SDaniel Mack  * Functions to manage eBPF programs attached to cgroups
430070984SDaniel Mack  *
530070984SDaniel Mack  * Copyright (c) 2016 Daniel Mack
630070984SDaniel Mack  */
730070984SDaniel Mack 
830070984SDaniel Mack #include <linux/kernel.h>
930070984SDaniel Mack #include <linux/atomic.h>
1030070984SDaniel Mack #include <linux/cgroup.h>
117b146cebSAndrey Ignatov #include <linux/filter.h>
1230070984SDaniel Mack #include <linux/slab.h>
137b146cebSAndrey Ignatov #include <linux/sysctl.h>
14808649fbSAndrey Ignatov #include <linux/string.h>
1530070984SDaniel Mack #include <linux/bpf.h>
1630070984SDaniel Mack #include <linux/bpf-cgroup.h>
1769fd337aSStanislav Fomichev #include <linux/bpf_lsm.h>
1869fd337aSStanislav Fomichev #include <linux/bpf_verifier.h>
1930070984SDaniel Mack #include <net/sock.h>
200d01da6aSStanislav Fomichev #include <net/bpf_sk_storage.h>
2130070984SDaniel Mack 
22e5c891a3SRoman Gushchin #include "../cgroup/cgroup-internal.h"
23e5c891a3SRoman Gushchin 
246fc88c35SDave Marchevsky DEFINE_STATIC_KEY_ARRAY_FALSE(cgroup_bpf_enabled_key, MAX_CGROUP_BPF_ATTACH_TYPE);
2530070984SDaniel Mack EXPORT_SYMBOL(cgroup_bpf_enabled_key);
2630070984SDaniel Mack 
27*0d86cd70SChen Ridong /*
28*0d86cd70SChen Ridong  * cgroup bpf destruction makes heavy use of work items and there can be a lot
29*0d86cd70SChen Ridong  * of concurrent destructions.  Use a separate workqueue so that cgroup bpf
30*0d86cd70SChen Ridong  * destruction work items don't end up filling up max_active of system_wq
31*0d86cd70SChen Ridong  * which may lead to deadlock.
32*0d86cd70SChen Ridong  */
33*0d86cd70SChen Ridong static struct workqueue_struct *cgroup_bpf_destroy_wq;
34*0d86cd70SChen Ridong 
cgroup_bpf_wq_init(void)35*0d86cd70SChen Ridong static int __init cgroup_bpf_wq_init(void)
36*0d86cd70SChen Ridong {
37*0d86cd70SChen Ridong 	cgroup_bpf_destroy_wq = alloc_workqueue("cgroup_bpf_destroy", 0, 1);
38*0d86cd70SChen Ridong 	if (!cgroup_bpf_destroy_wq)
39*0d86cd70SChen Ridong 		panic("Failed to alloc workqueue for cgroup bpf destroy.\n");
40*0d86cd70SChen Ridong 	return 0;
41*0d86cd70SChen Ridong }
42*0d86cd70SChen Ridong core_initcall(cgroup_bpf_wq_init);
43*0d86cd70SChen Ridong 
44055eb955SStanislav Fomichev /* __always_inline is necessary to prevent indirect call through run_prog
45055eb955SStanislav Fomichev  * function pointer.
46055eb955SStanislav Fomichev  */
47055eb955SStanislav Fomichev static __always_inline int
bpf_prog_run_array_cg(const struct cgroup_bpf * cgrp,enum cgroup_bpf_attach_type atype,const void * ctx,bpf_prog_run_fn run_prog,int retval,u32 * ret_flags)48d9d31cf8SStanislav Fomichev bpf_prog_run_array_cg(const struct cgroup_bpf *cgrp,
49055eb955SStanislav Fomichev 		      enum cgroup_bpf_attach_type atype,
50055eb955SStanislav Fomichev 		      const void *ctx, bpf_prog_run_fn run_prog,
51055eb955SStanislav Fomichev 		      int retval, u32 *ret_flags)
52055eb955SStanislav Fomichev {
53055eb955SStanislav Fomichev 	const struct bpf_prog_array_item *item;
54055eb955SStanislav Fomichev 	const struct bpf_prog *prog;
55055eb955SStanislav Fomichev 	const struct bpf_prog_array *array;
56055eb955SStanislav Fomichev 	struct bpf_run_ctx *old_run_ctx;
57055eb955SStanislav Fomichev 	struct bpf_cg_run_ctx run_ctx;
58055eb955SStanislav Fomichev 	u32 func_ret;
59055eb955SStanislav Fomichev 
60055eb955SStanislav Fomichev 	run_ctx.retval = retval;
61055eb955SStanislav Fomichev 	migrate_disable();
62055eb955SStanislav Fomichev 	rcu_read_lock();
63055eb955SStanislav Fomichev 	array = rcu_dereference(cgrp->effective[atype]);
64055eb955SStanislav Fomichev 	item = &array->items[0];
65055eb955SStanislav Fomichev 	old_run_ctx = bpf_set_run_ctx(&run_ctx.run_ctx);
66055eb955SStanislav Fomichev 	while ((prog = READ_ONCE(item->prog))) {
67055eb955SStanislav Fomichev 		run_ctx.prog_item = item;
68055eb955SStanislav Fomichev 		func_ret = run_prog(prog, ctx);
69d9d31cf8SStanislav Fomichev 		if (ret_flags) {
70055eb955SStanislav Fomichev 			*(ret_flags) |= (func_ret >> 1);
71d9d31cf8SStanislav Fomichev 			func_ret &= 1;
72055eb955SStanislav Fomichev 		}
73d9d31cf8SStanislav Fomichev 		if (!func_ret && !IS_ERR_VALUE((long)run_ctx.retval))
74055eb955SStanislav Fomichev 			run_ctx.retval = -EPERM;
75055eb955SStanislav Fomichev 		item++;
76055eb955SStanislav Fomichev 	}
77055eb955SStanislav Fomichev 	bpf_reset_run_ctx(old_run_ctx);
78055eb955SStanislav Fomichev 	rcu_read_unlock();
79055eb955SStanislav Fomichev 	migrate_enable();
80055eb955SStanislav Fomichev 	return run_ctx.retval;
81055eb955SStanislav Fomichev }
82055eb955SStanislav Fomichev 
__cgroup_bpf_run_lsm_sock(const void * ctx,const struct bpf_insn * insn)8369fd337aSStanislav Fomichev unsigned int __cgroup_bpf_run_lsm_sock(const void *ctx,
8469fd337aSStanislav Fomichev 				       const struct bpf_insn *insn)
8569fd337aSStanislav Fomichev {
8669fd337aSStanislav Fomichev 	const struct bpf_prog *shim_prog;
8769fd337aSStanislav Fomichev 	struct sock *sk;
8869fd337aSStanislav Fomichev 	struct cgroup *cgrp;
8969fd337aSStanislav Fomichev 	int ret = 0;
9069fd337aSStanislav Fomichev 	u64 *args;
9169fd337aSStanislav Fomichev 
9269fd337aSStanislav Fomichev 	args = (u64 *)ctx;
9369fd337aSStanislav Fomichev 	sk = (void *)(unsigned long)args[0];
9469fd337aSStanislav Fomichev 	/*shim_prog = container_of(insn, struct bpf_prog, insnsi);*/
9569fd337aSStanislav Fomichev 	shim_prog = (const struct bpf_prog *)((void *)insn - offsetof(struct bpf_prog, insnsi));
9669fd337aSStanislav Fomichev 
9769fd337aSStanislav Fomichev 	cgrp = sock_cgroup_ptr(&sk->sk_cgrp_data);
9869fd337aSStanislav Fomichev 	if (likely(cgrp))
9969fd337aSStanislav Fomichev 		ret = bpf_prog_run_array_cg(&cgrp->bpf,
10069fd337aSStanislav Fomichev 					    shim_prog->aux->cgroup_atype,
10169fd337aSStanislav Fomichev 					    ctx, bpf_prog_run, 0, NULL);
10269fd337aSStanislav Fomichev 	return ret;
10369fd337aSStanislav Fomichev }
10469fd337aSStanislav Fomichev 
__cgroup_bpf_run_lsm_socket(const void * ctx,const struct bpf_insn * insn)10569fd337aSStanislav Fomichev unsigned int __cgroup_bpf_run_lsm_socket(const void *ctx,
10669fd337aSStanislav Fomichev 					 const struct bpf_insn *insn)
10769fd337aSStanislav Fomichev {
10869fd337aSStanislav Fomichev 	const struct bpf_prog *shim_prog;
10969fd337aSStanislav Fomichev 	struct socket *sock;
11069fd337aSStanislav Fomichev 	struct cgroup *cgrp;
11169fd337aSStanislav Fomichev 	int ret = 0;
11269fd337aSStanislav Fomichev 	u64 *args;
11369fd337aSStanislav Fomichev 
11469fd337aSStanislav Fomichev 	args = (u64 *)ctx;
11569fd337aSStanislav Fomichev 	sock = (void *)(unsigned long)args[0];
11669fd337aSStanislav Fomichev 	/*shim_prog = container_of(insn, struct bpf_prog, insnsi);*/
11769fd337aSStanislav Fomichev 	shim_prog = (const struct bpf_prog *)((void *)insn - offsetof(struct bpf_prog, insnsi));
11869fd337aSStanislav Fomichev 
11969fd337aSStanislav Fomichev 	cgrp = sock_cgroup_ptr(&sock->sk->sk_cgrp_data);
12069fd337aSStanislav Fomichev 	if (likely(cgrp))
12169fd337aSStanislav Fomichev 		ret = bpf_prog_run_array_cg(&cgrp->bpf,
12269fd337aSStanislav Fomichev 					    shim_prog->aux->cgroup_atype,
12369fd337aSStanislav Fomichev 					    ctx, bpf_prog_run, 0, NULL);
12469fd337aSStanislav Fomichev 	return ret;
12569fd337aSStanislav Fomichev }
12669fd337aSStanislav Fomichev 
__cgroup_bpf_run_lsm_current(const void * ctx,const struct bpf_insn * insn)12769fd337aSStanislav Fomichev unsigned int __cgroup_bpf_run_lsm_current(const void *ctx,
12869fd337aSStanislav Fomichev 					  const struct bpf_insn *insn)
12969fd337aSStanislav Fomichev {
13069fd337aSStanislav Fomichev 	const struct bpf_prog *shim_prog;
13169fd337aSStanislav Fomichev 	struct cgroup *cgrp;
13269fd337aSStanislav Fomichev 	int ret = 0;
13369fd337aSStanislav Fomichev 
13469fd337aSStanislav Fomichev 	/*shim_prog = container_of(insn, struct bpf_prog, insnsi);*/
13569fd337aSStanislav Fomichev 	shim_prog = (const struct bpf_prog *)((void *)insn - offsetof(struct bpf_prog, insnsi));
13669fd337aSStanislav Fomichev 
13769fd337aSStanislav Fomichev 	/* We rely on trampoline's __bpf_prog_enter_lsm_cgroup to grab RCU read lock. */
13869fd337aSStanislav Fomichev 	cgrp = task_dfl_cgroup(current);
13969fd337aSStanislav Fomichev 	if (likely(cgrp))
14069fd337aSStanislav Fomichev 		ret = bpf_prog_run_array_cg(&cgrp->bpf,
14169fd337aSStanislav Fomichev 					    shim_prog->aux->cgroup_atype,
14269fd337aSStanislav Fomichev 					    ctx, bpf_prog_run, 0, NULL);
14369fd337aSStanislav Fomichev 	return ret;
14469fd337aSStanislav Fomichev }
14569fd337aSStanislav Fomichev 
14669fd337aSStanislav Fomichev #ifdef CONFIG_BPF_LSM
147c0e19f2cSStanislav Fomichev struct cgroup_lsm_atype {
148c0e19f2cSStanislav Fomichev 	u32 attach_btf_id;
149c0e19f2cSStanislav Fomichev 	int refcnt;
150c0e19f2cSStanislav Fomichev };
151c0e19f2cSStanislav Fomichev 
152c0e19f2cSStanislav Fomichev static struct cgroup_lsm_atype cgroup_lsm_atype[CGROUP_LSM_NUM];
153c0e19f2cSStanislav Fomichev 
15469fd337aSStanislav Fomichev static enum cgroup_bpf_attach_type
bpf_cgroup_atype_find(enum bpf_attach_type attach_type,u32 attach_btf_id)15569fd337aSStanislav Fomichev bpf_cgroup_atype_find(enum bpf_attach_type attach_type, u32 attach_btf_id)
15669fd337aSStanislav Fomichev {
157c0e19f2cSStanislav Fomichev 	int i;
158c0e19f2cSStanislav Fomichev 
159c0e19f2cSStanislav Fomichev 	lockdep_assert_held(&cgroup_mutex);
160c0e19f2cSStanislav Fomichev 
16169fd337aSStanislav Fomichev 	if (attach_type != BPF_LSM_CGROUP)
16269fd337aSStanislav Fomichev 		return to_cgroup_bpf_attach_type(attach_type);
163c0e19f2cSStanislav Fomichev 
164c0e19f2cSStanislav Fomichev 	for (i = 0; i < ARRAY_SIZE(cgroup_lsm_atype); i++)
165c0e19f2cSStanislav Fomichev 		if (cgroup_lsm_atype[i].attach_btf_id == attach_btf_id)
166c0e19f2cSStanislav Fomichev 			return CGROUP_LSM_START + i;
167c0e19f2cSStanislav Fomichev 
168c0e19f2cSStanislav Fomichev 	for (i = 0; i < ARRAY_SIZE(cgroup_lsm_atype); i++)
169c0e19f2cSStanislav Fomichev 		if (cgroup_lsm_atype[i].attach_btf_id == 0)
170c0e19f2cSStanislav Fomichev 			return CGROUP_LSM_START + i;
171c0e19f2cSStanislav Fomichev 
172c0e19f2cSStanislav Fomichev 	return -E2BIG;
173c0e19f2cSStanislav Fomichev 
174c0e19f2cSStanislav Fomichev }
175c0e19f2cSStanislav Fomichev 
bpf_cgroup_atype_get(u32 attach_btf_id,int cgroup_atype)176c0e19f2cSStanislav Fomichev void bpf_cgroup_atype_get(u32 attach_btf_id, int cgroup_atype)
177c0e19f2cSStanislav Fomichev {
178c0e19f2cSStanislav Fomichev 	int i = cgroup_atype - CGROUP_LSM_START;
179c0e19f2cSStanislav Fomichev 
180c0e19f2cSStanislav Fomichev 	lockdep_assert_held(&cgroup_mutex);
181c0e19f2cSStanislav Fomichev 
182c0e19f2cSStanislav Fomichev 	WARN_ON_ONCE(cgroup_lsm_atype[i].attach_btf_id &&
183c0e19f2cSStanislav Fomichev 		     cgroup_lsm_atype[i].attach_btf_id != attach_btf_id);
184c0e19f2cSStanislav Fomichev 
185c0e19f2cSStanislav Fomichev 	cgroup_lsm_atype[i].attach_btf_id = attach_btf_id;
186c0e19f2cSStanislav Fomichev 	cgroup_lsm_atype[i].refcnt++;
187c0e19f2cSStanislav Fomichev }
188c0e19f2cSStanislav Fomichev 
bpf_cgroup_atype_put(int cgroup_atype)189c0e19f2cSStanislav Fomichev void bpf_cgroup_atype_put(int cgroup_atype)
190c0e19f2cSStanislav Fomichev {
191c0e19f2cSStanislav Fomichev 	int i = cgroup_atype - CGROUP_LSM_START;
192c0e19f2cSStanislav Fomichev 
1934cdb91b0SKamalesh Babulal 	cgroup_lock();
194c0e19f2cSStanislav Fomichev 	if (--cgroup_lsm_atype[i].refcnt <= 0)
195c0e19f2cSStanislav Fomichev 		cgroup_lsm_atype[i].attach_btf_id = 0;
196c0e19f2cSStanislav Fomichev 	WARN_ON_ONCE(cgroup_lsm_atype[i].refcnt < 0);
1974cdb91b0SKamalesh Babulal 	cgroup_unlock();
19869fd337aSStanislav Fomichev }
19969fd337aSStanislav Fomichev #else
20069fd337aSStanislav Fomichev static enum cgroup_bpf_attach_type
bpf_cgroup_atype_find(enum bpf_attach_type attach_type,u32 attach_btf_id)20169fd337aSStanislav Fomichev bpf_cgroup_atype_find(enum bpf_attach_type attach_type, u32 attach_btf_id)
20269fd337aSStanislav Fomichev {
20369fd337aSStanislav Fomichev 	if (attach_type != BPF_LSM_CGROUP)
20469fd337aSStanislav Fomichev 		return to_cgroup_bpf_attach_type(attach_type);
20569fd337aSStanislav Fomichev 	return -EOPNOTSUPP;
20669fd337aSStanislav Fomichev }
20769fd337aSStanislav Fomichev #endif /* CONFIG_BPF_LSM */
20869fd337aSStanislav Fomichev 
cgroup_bpf_offline(struct cgroup * cgrp)2094bfc0bb2SRoman Gushchin void cgroup_bpf_offline(struct cgroup *cgrp)
21030070984SDaniel Mack {
2114bfc0bb2SRoman Gushchin 	cgroup_get(cgrp);
2124bfc0bb2SRoman Gushchin 	percpu_ref_kill(&cgrp->bpf.refcnt);
2134bfc0bb2SRoman Gushchin }
2144bfc0bb2SRoman Gushchin 
bpf_cgroup_storages_free(struct bpf_cgroup_storage * storages[])21500c4eddfSAndrii Nakryiko static void bpf_cgroup_storages_free(struct bpf_cgroup_storage *storages[])
21600c4eddfSAndrii Nakryiko {
21700c4eddfSAndrii Nakryiko 	enum bpf_cgroup_storage_type stype;
21800c4eddfSAndrii Nakryiko 
21900c4eddfSAndrii Nakryiko 	for_each_cgroup_storage_type(stype)
22000c4eddfSAndrii Nakryiko 		bpf_cgroup_storage_free(storages[stype]);
22100c4eddfSAndrii Nakryiko }
22200c4eddfSAndrii Nakryiko 
bpf_cgroup_storages_alloc(struct bpf_cgroup_storage * storages[],struct bpf_cgroup_storage * new_storages[],enum bpf_attach_type type,struct bpf_prog * prog,struct cgroup * cgrp)22300c4eddfSAndrii Nakryiko static int bpf_cgroup_storages_alloc(struct bpf_cgroup_storage *storages[],
2247d9c3427SYiFei Zhu 				     struct bpf_cgroup_storage *new_storages[],
2257d9c3427SYiFei Zhu 				     enum bpf_attach_type type,
2267d9c3427SYiFei Zhu 				     struct bpf_prog *prog,
2277d9c3427SYiFei Zhu 				     struct cgroup *cgrp)
22800c4eddfSAndrii Nakryiko {
22900c4eddfSAndrii Nakryiko 	enum bpf_cgroup_storage_type stype;
2307d9c3427SYiFei Zhu 	struct bpf_cgroup_storage_key key;
2317d9c3427SYiFei Zhu 	struct bpf_map *map;
2327d9c3427SYiFei Zhu 
2337d9c3427SYiFei Zhu 	key.cgroup_inode_id = cgroup_id(cgrp);
2347d9c3427SYiFei Zhu 	key.attach_type = type;
23500c4eddfSAndrii Nakryiko 
23600c4eddfSAndrii Nakryiko 	for_each_cgroup_storage_type(stype) {
2377d9c3427SYiFei Zhu 		map = prog->aux->cgroup_storage[stype];
2387d9c3427SYiFei Zhu 		if (!map)
2397d9c3427SYiFei Zhu 			continue;
2407d9c3427SYiFei Zhu 
2417d9c3427SYiFei Zhu 		storages[stype] = cgroup_storage_lookup((void *)map, &key, false);
2427d9c3427SYiFei Zhu 		if (storages[stype])
2437d9c3427SYiFei Zhu 			continue;
2447d9c3427SYiFei Zhu 
24500c4eddfSAndrii Nakryiko 		storages[stype] = bpf_cgroup_storage_alloc(prog, stype);
24600c4eddfSAndrii Nakryiko 		if (IS_ERR(storages[stype])) {
2477d9c3427SYiFei Zhu 			bpf_cgroup_storages_free(new_storages);
24800c4eddfSAndrii Nakryiko 			return -ENOMEM;
24900c4eddfSAndrii Nakryiko 		}
2507d9c3427SYiFei Zhu 
2517d9c3427SYiFei Zhu 		new_storages[stype] = storages[stype];
25200c4eddfSAndrii Nakryiko 	}
25300c4eddfSAndrii Nakryiko 
25400c4eddfSAndrii Nakryiko 	return 0;
25500c4eddfSAndrii Nakryiko }
25600c4eddfSAndrii Nakryiko 
bpf_cgroup_storages_assign(struct bpf_cgroup_storage * dst[],struct bpf_cgroup_storage * src[])25700c4eddfSAndrii Nakryiko static void bpf_cgroup_storages_assign(struct bpf_cgroup_storage *dst[],
25800c4eddfSAndrii Nakryiko 				       struct bpf_cgroup_storage *src[])
25900c4eddfSAndrii Nakryiko {
26000c4eddfSAndrii Nakryiko 	enum bpf_cgroup_storage_type stype;
26100c4eddfSAndrii Nakryiko 
26200c4eddfSAndrii Nakryiko 	for_each_cgroup_storage_type(stype)
26300c4eddfSAndrii Nakryiko 		dst[stype] = src[stype];
26400c4eddfSAndrii Nakryiko }
26500c4eddfSAndrii Nakryiko 
bpf_cgroup_storages_link(struct bpf_cgroup_storage * storages[],struct cgroup * cgrp,enum bpf_attach_type attach_type)26600c4eddfSAndrii Nakryiko static void bpf_cgroup_storages_link(struct bpf_cgroup_storage *storages[],
26700c4eddfSAndrii Nakryiko 				     struct cgroup *cgrp,
26800c4eddfSAndrii Nakryiko 				     enum bpf_attach_type attach_type)
26900c4eddfSAndrii Nakryiko {
27000c4eddfSAndrii Nakryiko 	enum bpf_cgroup_storage_type stype;
27100c4eddfSAndrii Nakryiko 
27200c4eddfSAndrii Nakryiko 	for_each_cgroup_storage_type(stype)
27300c4eddfSAndrii Nakryiko 		bpf_cgroup_storage_link(storages[stype], cgrp, attach_type);
27400c4eddfSAndrii Nakryiko }
27500c4eddfSAndrii Nakryiko 
276af6eea57SAndrii Nakryiko /* Called when bpf_cgroup_link is auto-detached from dying cgroup.
277af6eea57SAndrii Nakryiko  * It drops cgroup and bpf_prog refcounts, and marks bpf_link as defunct. It
278af6eea57SAndrii Nakryiko  * doesn't free link memory, which will eventually be done by bpf_link's
279af6eea57SAndrii Nakryiko  * release() callback, when its last FD is closed.
280af6eea57SAndrii Nakryiko  */
bpf_cgroup_link_auto_detach(struct bpf_cgroup_link * link)281af6eea57SAndrii Nakryiko static void bpf_cgroup_link_auto_detach(struct bpf_cgroup_link *link)
282af6eea57SAndrii Nakryiko {
283af6eea57SAndrii Nakryiko 	cgroup_put(link->cgroup);
284af6eea57SAndrii Nakryiko 	link->cgroup = NULL;
285af6eea57SAndrii Nakryiko }
286af6eea57SAndrii Nakryiko 
2874bfc0bb2SRoman Gushchin /**
2884bfc0bb2SRoman Gushchin  * cgroup_bpf_release() - put references of all bpf programs and
2894bfc0bb2SRoman Gushchin  *                        release all cgroup bpf data
2904bfc0bb2SRoman Gushchin  * @work: work structure embedded into the cgroup to modify
2914bfc0bb2SRoman Gushchin  */
cgroup_bpf_release(struct work_struct * work)2924bfc0bb2SRoman Gushchin static void cgroup_bpf_release(struct work_struct *work)
2934bfc0bb2SRoman Gushchin {
294e10360f8SRoman Gushchin 	struct cgroup *p, *cgrp = container_of(work, struct cgroup,
2954bfc0bb2SRoman Gushchin 					       bpf.release_work);
296dbcc1ba2SStanislav Fomichev 	struct bpf_prog_array *old_array;
2977d9c3427SYiFei Zhu 	struct list_head *storages = &cgrp->bpf.storages;
2987d9c3427SYiFei Zhu 	struct bpf_cgroup_storage *storage, *stmp;
2997d9c3427SYiFei Zhu 
3006fc88c35SDave Marchevsky 	unsigned int atype;
30130070984SDaniel Mack 
3024cdb91b0SKamalesh Babulal 	cgroup_lock();
303e5c891a3SRoman Gushchin 
3046fc88c35SDave Marchevsky 	for (atype = 0; atype < ARRAY_SIZE(cgrp->bpf.progs); atype++) {
30500442143SStanislav Fomichev 		struct hlist_head *progs = &cgrp->bpf.progs[atype];
30600442143SStanislav Fomichev 		struct bpf_prog_list *pl;
30700442143SStanislav Fomichev 		struct hlist_node *pltmp;
30830070984SDaniel Mack 
30900442143SStanislav Fomichev 		hlist_for_each_entry_safe(pl, pltmp, progs, node) {
31000442143SStanislav Fomichev 			hlist_del(&pl->node);
31169fd337aSStanislav Fomichev 			if (pl->prog) {
31269fd337aSStanislav Fomichev 				if (pl->prog->expected_attach_type == BPF_LSM_CGROUP)
31369fd337aSStanislav Fomichev 					bpf_trampoline_unlink_cgroup_shim(pl->prog);
314324bda9eSAlexei Starovoitov 				bpf_prog_put(pl->prog);
31569fd337aSStanislav Fomichev 			}
31669fd337aSStanislav Fomichev 			if (pl->link) {
31769fd337aSStanislav Fomichev 				if (pl->link->link.prog->expected_attach_type == BPF_LSM_CGROUP)
31869fd337aSStanislav Fomichev 					bpf_trampoline_unlink_cgroup_shim(pl->link->link.prog);
319af6eea57SAndrii Nakryiko 				bpf_cgroup_link_auto_detach(pl->link);
32069fd337aSStanislav Fomichev 			}
321324bda9eSAlexei Starovoitov 			kfree(pl);
3226fc88c35SDave Marchevsky 			static_branch_dec(&cgroup_bpf_enabled_key[atype]);
32330070984SDaniel Mack 		}
324dbcc1ba2SStanislav Fomichev 		old_array = rcu_dereference_protected(
3256fc88c35SDave Marchevsky 				cgrp->bpf.effective[atype],
326e5c891a3SRoman Gushchin 				lockdep_is_held(&cgroup_mutex));
327dbcc1ba2SStanislav Fomichev 		bpf_prog_array_free(old_array);
32830070984SDaniel Mack 	}
3294bfc0bb2SRoman Gushchin 
3307d9c3427SYiFei Zhu 	list_for_each_entry_safe(storage, stmp, storages, list_cg) {
3317d9c3427SYiFei Zhu 		bpf_cgroup_storage_unlink(storage);
3327d9c3427SYiFei Zhu 		bpf_cgroup_storage_free(storage);
3337d9c3427SYiFei Zhu 	}
3347d9c3427SYiFei Zhu 
3354cdb91b0SKamalesh Babulal 	cgroup_unlock();
336e5c891a3SRoman Gushchin 
337e10360f8SRoman Gushchin 	for (p = cgroup_parent(cgrp); p; p = cgroup_parent(p))
338e10360f8SRoman Gushchin 		cgroup_bpf_put(p);
339e10360f8SRoman Gushchin 
3404bfc0bb2SRoman Gushchin 	percpu_ref_exit(&cgrp->bpf.refcnt);
3414bfc0bb2SRoman Gushchin 	cgroup_put(cgrp);
3424bfc0bb2SRoman Gushchin }
3434bfc0bb2SRoman Gushchin 
3444bfc0bb2SRoman Gushchin /**
3454bfc0bb2SRoman Gushchin  * cgroup_bpf_release_fn() - callback used to schedule releasing
3464bfc0bb2SRoman Gushchin  *                           of bpf cgroup data
3474bfc0bb2SRoman Gushchin  * @ref: percpu ref counter structure
3484bfc0bb2SRoman Gushchin  */
cgroup_bpf_release_fn(struct percpu_ref * ref)3494bfc0bb2SRoman Gushchin static void cgroup_bpf_release_fn(struct percpu_ref *ref)
3504bfc0bb2SRoman Gushchin {
3514bfc0bb2SRoman Gushchin 	struct cgroup *cgrp = container_of(ref, struct cgroup, bpf.refcnt);
3524bfc0bb2SRoman Gushchin 
3534bfc0bb2SRoman Gushchin 	INIT_WORK(&cgrp->bpf.release_work, cgroup_bpf_release);
354*0d86cd70SChen Ridong 	queue_work(cgroup_bpf_destroy_wq, &cgrp->bpf.release_work);
35530070984SDaniel Mack }
35630070984SDaniel Mack 
357af6eea57SAndrii Nakryiko /* Get underlying bpf_prog of bpf_prog_list entry, regardless if it's through
358af6eea57SAndrii Nakryiko  * link or direct prog.
359af6eea57SAndrii Nakryiko  */
prog_list_prog(struct bpf_prog_list * pl)360af6eea57SAndrii Nakryiko static struct bpf_prog *prog_list_prog(struct bpf_prog_list *pl)
361af6eea57SAndrii Nakryiko {
362af6eea57SAndrii Nakryiko 	if (pl->prog)
363af6eea57SAndrii Nakryiko 		return pl->prog;
364af6eea57SAndrii Nakryiko 	if (pl->link)
365af6eea57SAndrii Nakryiko 		return pl->link->link.prog;
366af6eea57SAndrii Nakryiko 	return NULL;
367af6eea57SAndrii Nakryiko }
368af6eea57SAndrii Nakryiko 
369324bda9eSAlexei Starovoitov /* count number of elements in the list.
370324bda9eSAlexei Starovoitov  * it's slow but the list cannot be long
371324bda9eSAlexei Starovoitov  */
prog_list_length(struct hlist_head * head)37200442143SStanislav Fomichev static u32 prog_list_length(struct hlist_head *head)
373324bda9eSAlexei Starovoitov {
374324bda9eSAlexei Starovoitov 	struct bpf_prog_list *pl;
375324bda9eSAlexei Starovoitov 	u32 cnt = 0;
376324bda9eSAlexei Starovoitov 
37700442143SStanislav Fomichev 	hlist_for_each_entry(pl, head, node) {
378af6eea57SAndrii Nakryiko 		if (!prog_list_prog(pl))
379324bda9eSAlexei Starovoitov 			continue;
380324bda9eSAlexei Starovoitov 		cnt++;
381324bda9eSAlexei Starovoitov 	}
382324bda9eSAlexei Starovoitov 	return cnt;
383324bda9eSAlexei Starovoitov }
384324bda9eSAlexei Starovoitov 
385324bda9eSAlexei Starovoitov /* if parent has non-overridable prog attached,
386324bda9eSAlexei Starovoitov  * disallow attaching new programs to the descendent cgroup.
387324bda9eSAlexei Starovoitov  * if parent has overridable or multi-prog, allow attaching
388324bda9eSAlexei Starovoitov  */
hierarchy_allows_attach(struct cgroup * cgrp,enum cgroup_bpf_attach_type atype)389324bda9eSAlexei Starovoitov static bool hierarchy_allows_attach(struct cgroup *cgrp,
3906fc88c35SDave Marchevsky 				    enum cgroup_bpf_attach_type atype)
391324bda9eSAlexei Starovoitov {
392324bda9eSAlexei Starovoitov 	struct cgroup *p;
393324bda9eSAlexei Starovoitov 
394324bda9eSAlexei Starovoitov 	p = cgroup_parent(cgrp);
395324bda9eSAlexei Starovoitov 	if (!p)
396324bda9eSAlexei Starovoitov 		return true;
397324bda9eSAlexei Starovoitov 	do {
3986fc88c35SDave Marchevsky 		u32 flags = p->bpf.flags[atype];
399324bda9eSAlexei Starovoitov 		u32 cnt;
400324bda9eSAlexei Starovoitov 
401324bda9eSAlexei Starovoitov 		if (flags & BPF_F_ALLOW_MULTI)
402324bda9eSAlexei Starovoitov 			return true;
4036fc88c35SDave Marchevsky 		cnt = prog_list_length(&p->bpf.progs[atype]);
404324bda9eSAlexei Starovoitov 		WARN_ON_ONCE(cnt > 1);
405324bda9eSAlexei Starovoitov 		if (cnt == 1)
406324bda9eSAlexei Starovoitov 			return !!(flags & BPF_F_ALLOW_OVERRIDE);
407324bda9eSAlexei Starovoitov 		p = cgroup_parent(p);
408324bda9eSAlexei Starovoitov 	} while (p);
409324bda9eSAlexei Starovoitov 	return true;
410324bda9eSAlexei Starovoitov }
411324bda9eSAlexei Starovoitov 
412324bda9eSAlexei Starovoitov /* compute a chain of effective programs for a given cgroup:
413324bda9eSAlexei Starovoitov  * start from the list of programs in this cgroup and add
414324bda9eSAlexei Starovoitov  * all parent programs.
415324bda9eSAlexei Starovoitov  * Note that parent's F_ALLOW_OVERRIDE-type program is yielding
416324bda9eSAlexei Starovoitov  * to programs in this cgroup
417324bda9eSAlexei Starovoitov  */
compute_effective_progs(struct cgroup * cgrp,enum cgroup_bpf_attach_type atype,struct bpf_prog_array ** array)418324bda9eSAlexei Starovoitov static int compute_effective_progs(struct cgroup *cgrp,
4196fc88c35SDave Marchevsky 				   enum cgroup_bpf_attach_type atype,
420dbcc1ba2SStanislav Fomichev 				   struct bpf_prog_array **array)
421324bda9eSAlexei Starovoitov {
42200c4eddfSAndrii Nakryiko 	struct bpf_prog_array_item *item;
4233960f4fdSRoman Gushchin 	struct bpf_prog_array *progs;
424324bda9eSAlexei Starovoitov 	struct bpf_prog_list *pl;
425324bda9eSAlexei Starovoitov 	struct cgroup *p = cgrp;
426324bda9eSAlexei Starovoitov 	int cnt = 0;
427324bda9eSAlexei Starovoitov 
428324bda9eSAlexei Starovoitov 	/* count number of effective programs by walking parents */
429324bda9eSAlexei Starovoitov 	do {
4306fc88c35SDave Marchevsky 		if (cnt == 0 || (p->bpf.flags[atype] & BPF_F_ALLOW_MULTI))
4316fc88c35SDave Marchevsky 			cnt += prog_list_length(&p->bpf.progs[atype]);
432324bda9eSAlexei Starovoitov 		p = cgroup_parent(p);
433324bda9eSAlexei Starovoitov 	} while (p);
434324bda9eSAlexei Starovoitov 
435324bda9eSAlexei Starovoitov 	progs = bpf_prog_array_alloc(cnt, GFP_KERNEL);
436324bda9eSAlexei Starovoitov 	if (!progs)
437324bda9eSAlexei Starovoitov 		return -ENOMEM;
438324bda9eSAlexei Starovoitov 
439324bda9eSAlexei Starovoitov 	/* populate the array with effective progs */
440324bda9eSAlexei Starovoitov 	cnt = 0;
441324bda9eSAlexei Starovoitov 	p = cgrp;
442324bda9eSAlexei Starovoitov 	do {
4436fc88c35SDave Marchevsky 		if (cnt > 0 && !(p->bpf.flags[atype] & BPF_F_ALLOW_MULTI))
444394e40a2SRoman Gushchin 			continue;
445394e40a2SRoman Gushchin 
44600442143SStanislav Fomichev 		hlist_for_each_entry(pl, &p->bpf.progs[atype], node) {
447af6eea57SAndrii Nakryiko 			if (!prog_list_prog(pl))
448324bda9eSAlexei Starovoitov 				continue;
449394e40a2SRoman Gushchin 
45000c4eddfSAndrii Nakryiko 			item = &progs->items[cnt];
451af6eea57SAndrii Nakryiko 			item->prog = prog_list_prog(pl);
45200c4eddfSAndrii Nakryiko 			bpf_cgroup_storages_assign(item->cgroup_storage,
45300c4eddfSAndrii Nakryiko 						   pl->storage);
454394e40a2SRoman Gushchin 			cnt++;
455324bda9eSAlexei Starovoitov 		}
456394e40a2SRoman Gushchin 	} while ((p = cgroup_parent(p)));
457324bda9eSAlexei Starovoitov 
458dbcc1ba2SStanislav Fomichev 	*array = progs;
459324bda9eSAlexei Starovoitov 	return 0;
460324bda9eSAlexei Starovoitov }
461324bda9eSAlexei Starovoitov 
activate_effective_progs(struct cgroup * cgrp,enum cgroup_bpf_attach_type atype,struct bpf_prog_array * old_array)462324bda9eSAlexei Starovoitov static void activate_effective_progs(struct cgroup *cgrp,
4636fc88c35SDave Marchevsky 				     enum cgroup_bpf_attach_type atype,
464dbcc1ba2SStanislav Fomichev 				     struct bpf_prog_array *old_array)
465324bda9eSAlexei Starovoitov {
4666fc88c35SDave Marchevsky 	old_array = rcu_replace_pointer(cgrp->bpf.effective[atype], old_array,
467dbcc1ba2SStanislav Fomichev 					lockdep_is_held(&cgroup_mutex));
468324bda9eSAlexei Starovoitov 	/* free prog array after grace period, since __cgroup_bpf_run_*()
469324bda9eSAlexei Starovoitov 	 * might be still walking the array
470324bda9eSAlexei Starovoitov 	 */
471324bda9eSAlexei Starovoitov 	bpf_prog_array_free(old_array);
472324bda9eSAlexei Starovoitov }
473324bda9eSAlexei Starovoitov 
47430070984SDaniel Mack /**
47530070984SDaniel Mack  * cgroup_bpf_inherit() - inherit effective programs from parent
47630070984SDaniel Mack  * @cgrp: the cgroup to modify
47730070984SDaniel Mack  */
cgroup_bpf_inherit(struct cgroup * cgrp)478324bda9eSAlexei Starovoitov int cgroup_bpf_inherit(struct cgroup *cgrp)
47930070984SDaniel Mack {
480324bda9eSAlexei Starovoitov /* has to use marco instead of const int, since compiler thinks
481324bda9eSAlexei Starovoitov  * that array below is variable length
482324bda9eSAlexei Starovoitov  */
483324bda9eSAlexei Starovoitov #define	NR ARRAY_SIZE(cgrp->bpf.effective)
484dbcc1ba2SStanislav Fomichev 	struct bpf_prog_array *arrays[NR] = {};
485e10360f8SRoman Gushchin 	struct cgroup *p;
4864bfc0bb2SRoman Gushchin 	int ret, i;
4874bfc0bb2SRoman Gushchin 
4884bfc0bb2SRoman Gushchin 	ret = percpu_ref_init(&cgrp->bpf.refcnt, cgroup_bpf_release_fn, 0,
4894bfc0bb2SRoman Gushchin 			      GFP_KERNEL);
4904bfc0bb2SRoman Gushchin 	if (ret)
4914bfc0bb2SRoman Gushchin 		return ret;
49230070984SDaniel Mack 
493e10360f8SRoman Gushchin 	for (p = cgroup_parent(cgrp); p; p = cgroup_parent(p))
494e10360f8SRoman Gushchin 		cgroup_bpf_get(p);
495e10360f8SRoman Gushchin 
496324bda9eSAlexei Starovoitov 	for (i = 0; i < NR; i++)
49700442143SStanislav Fomichev 		INIT_HLIST_HEAD(&cgrp->bpf.progs[i]);
49830070984SDaniel Mack 
4997d9c3427SYiFei Zhu 	INIT_LIST_HEAD(&cgrp->bpf.storages);
5007d9c3427SYiFei Zhu 
501324bda9eSAlexei Starovoitov 	for (i = 0; i < NR; i++)
502324bda9eSAlexei Starovoitov 		if (compute_effective_progs(cgrp, i, &arrays[i]))
503324bda9eSAlexei Starovoitov 			goto cleanup;
504324bda9eSAlexei Starovoitov 
505324bda9eSAlexei Starovoitov 	for (i = 0; i < NR; i++)
506324bda9eSAlexei Starovoitov 		activate_effective_progs(cgrp, i, arrays[i]);
507324bda9eSAlexei Starovoitov 
508324bda9eSAlexei Starovoitov 	return 0;
509324bda9eSAlexei Starovoitov cleanup:
510324bda9eSAlexei Starovoitov 	for (i = 0; i < NR; i++)
511324bda9eSAlexei Starovoitov 		bpf_prog_array_free(arrays[i]);
5124bfc0bb2SRoman Gushchin 
5131d8006abSAndrii Nakryiko 	for (p = cgroup_parent(cgrp); p; p = cgroup_parent(p))
5141d8006abSAndrii Nakryiko 		cgroup_bpf_put(p);
5151d8006abSAndrii Nakryiko 
5164bfc0bb2SRoman Gushchin 	percpu_ref_exit(&cgrp->bpf.refcnt);
5174bfc0bb2SRoman Gushchin 
518324bda9eSAlexei Starovoitov 	return -ENOMEM;
51930070984SDaniel Mack }
520324bda9eSAlexei Starovoitov 
update_effective_progs(struct cgroup * cgrp,enum cgroup_bpf_attach_type atype)52185fc4b16SRoman Gushchin static int update_effective_progs(struct cgroup *cgrp,
5226fc88c35SDave Marchevsky 				  enum cgroup_bpf_attach_type atype)
52385fc4b16SRoman Gushchin {
52485fc4b16SRoman Gushchin 	struct cgroup_subsys_state *css;
52585fc4b16SRoman Gushchin 	int err;
52685fc4b16SRoman Gushchin 
52785fc4b16SRoman Gushchin 	/* allocate and recompute effective prog arrays */
52885fc4b16SRoman Gushchin 	css_for_each_descendant_pre(css, &cgrp->self) {
52985fc4b16SRoman Gushchin 		struct cgroup *desc = container_of(css, struct cgroup, self);
53085fc4b16SRoman Gushchin 
531e5c891a3SRoman Gushchin 		if (percpu_ref_is_zero(&desc->bpf.refcnt))
532e5c891a3SRoman Gushchin 			continue;
533e5c891a3SRoman Gushchin 
5346fc88c35SDave Marchevsky 		err = compute_effective_progs(desc, atype, &desc->bpf.inactive);
53585fc4b16SRoman Gushchin 		if (err)
53685fc4b16SRoman Gushchin 			goto cleanup;
53785fc4b16SRoman Gushchin 	}
53885fc4b16SRoman Gushchin 
53985fc4b16SRoman Gushchin 	/* all allocations were successful. Activate all prog arrays */
54085fc4b16SRoman Gushchin 	css_for_each_descendant_pre(css, &cgrp->self) {
54185fc4b16SRoman Gushchin 		struct cgroup *desc = container_of(css, struct cgroup, self);
54285fc4b16SRoman Gushchin 
543e5c891a3SRoman Gushchin 		if (percpu_ref_is_zero(&desc->bpf.refcnt)) {
544e5c891a3SRoman Gushchin 			if (unlikely(desc->bpf.inactive)) {
545e5c891a3SRoman Gushchin 				bpf_prog_array_free(desc->bpf.inactive);
546e5c891a3SRoman Gushchin 				desc->bpf.inactive = NULL;
547e5c891a3SRoman Gushchin 			}
548e5c891a3SRoman Gushchin 			continue;
549e5c891a3SRoman Gushchin 		}
550e5c891a3SRoman Gushchin 
5516fc88c35SDave Marchevsky 		activate_effective_progs(desc, atype, desc->bpf.inactive);
55285fc4b16SRoman Gushchin 		desc->bpf.inactive = NULL;
55385fc4b16SRoman Gushchin 	}
55485fc4b16SRoman Gushchin 
55585fc4b16SRoman Gushchin 	return 0;
55685fc4b16SRoman Gushchin 
55785fc4b16SRoman Gushchin cleanup:
55885fc4b16SRoman Gushchin 	/* oom while computing effective. Free all computed effective arrays
55985fc4b16SRoman Gushchin 	 * since they were not activated
56085fc4b16SRoman Gushchin 	 */
56185fc4b16SRoman Gushchin 	css_for_each_descendant_pre(css, &cgrp->self) {
56285fc4b16SRoman Gushchin 		struct cgroup *desc = container_of(css, struct cgroup, self);
56385fc4b16SRoman Gushchin 
56485fc4b16SRoman Gushchin 		bpf_prog_array_free(desc->bpf.inactive);
56585fc4b16SRoman Gushchin 		desc->bpf.inactive = NULL;
56685fc4b16SRoman Gushchin 	}
56785fc4b16SRoman Gushchin 
56885fc4b16SRoman Gushchin 	return err;
56985fc4b16SRoman Gushchin }
57085fc4b16SRoman Gushchin 
571324bda9eSAlexei Starovoitov #define BPF_CGROUP_MAX_PROGS 64
57230070984SDaniel Mack 
find_attach_entry(struct hlist_head * progs,struct bpf_prog * prog,struct bpf_cgroup_link * link,struct bpf_prog * replace_prog,bool allow_multi)57300442143SStanislav Fomichev static struct bpf_prog_list *find_attach_entry(struct hlist_head *progs,
574af6eea57SAndrii Nakryiko 					       struct bpf_prog *prog,
575af6eea57SAndrii Nakryiko 					       struct bpf_cgroup_link *link,
576af6eea57SAndrii Nakryiko 					       struct bpf_prog *replace_prog,
577af6eea57SAndrii Nakryiko 					       bool allow_multi)
578af6eea57SAndrii Nakryiko {
579af6eea57SAndrii Nakryiko 	struct bpf_prog_list *pl;
580af6eea57SAndrii Nakryiko 
581af6eea57SAndrii Nakryiko 	/* single-attach case */
582af6eea57SAndrii Nakryiko 	if (!allow_multi) {
58300442143SStanislav Fomichev 		if (hlist_empty(progs))
584af6eea57SAndrii Nakryiko 			return NULL;
58500442143SStanislav Fomichev 		return hlist_entry(progs->first, typeof(*pl), node);
586af6eea57SAndrii Nakryiko 	}
587af6eea57SAndrii Nakryiko 
58800442143SStanislav Fomichev 	hlist_for_each_entry(pl, progs, node) {
589248e00acSLorenz Bauer 		if (prog && pl->prog == prog && prog != replace_prog)
590af6eea57SAndrii Nakryiko 			/* disallow attaching the same prog twice */
591af6eea57SAndrii Nakryiko 			return ERR_PTR(-EINVAL);
592af6eea57SAndrii Nakryiko 		if (link && pl->link == link)
593af6eea57SAndrii Nakryiko 			/* disallow attaching the same link twice */
594af6eea57SAndrii Nakryiko 			return ERR_PTR(-EINVAL);
595af6eea57SAndrii Nakryiko 	}
596af6eea57SAndrii Nakryiko 
597af6eea57SAndrii Nakryiko 	/* direct prog multi-attach w/ replacement case */
598af6eea57SAndrii Nakryiko 	if (replace_prog) {
59900442143SStanislav Fomichev 		hlist_for_each_entry(pl, progs, node) {
600af6eea57SAndrii Nakryiko 			if (pl->prog == replace_prog)
601af6eea57SAndrii Nakryiko 				/* a match found */
602af6eea57SAndrii Nakryiko 				return pl;
603af6eea57SAndrii Nakryiko 		}
604af6eea57SAndrii Nakryiko 		/* prog to replace not found for cgroup */
605af6eea57SAndrii Nakryiko 		return ERR_PTR(-ENOENT);
606af6eea57SAndrii Nakryiko 	}
607af6eea57SAndrii Nakryiko 
608af6eea57SAndrii Nakryiko 	return NULL;
609af6eea57SAndrii Nakryiko }
610af6eea57SAndrii Nakryiko 
61130070984SDaniel Mack /**
612af6eea57SAndrii Nakryiko  * __cgroup_bpf_attach() - Attach the program or the link to a cgroup, and
61330070984SDaniel Mack  *                         propagate the change to descendants
61430070984SDaniel Mack  * @cgrp: The cgroup which descendants to traverse
615324bda9eSAlexei Starovoitov  * @prog: A program to attach
616af6eea57SAndrii Nakryiko  * @link: A link to attach
6177dd68b32SAndrey Ignatov  * @replace_prog: Previously attached program to replace if BPF_F_REPLACE is set
618324bda9eSAlexei Starovoitov  * @type: Type of attach operation
6191832f4efSValdis Kletnieks  * @flags: Option flags
62030070984SDaniel Mack  *
621af6eea57SAndrii Nakryiko  * Exactly one of @prog or @link can be non-null.
62230070984SDaniel Mack  * Must be called with cgroup_mutex held.
62330070984SDaniel Mack  */
__cgroup_bpf_attach(struct cgroup * cgrp,struct bpf_prog * prog,struct bpf_prog * replace_prog,struct bpf_cgroup_link * link,enum bpf_attach_type type,u32 flags)624588e5d87SHe Fengqing static int __cgroup_bpf_attach(struct cgroup *cgrp,
625af6eea57SAndrii Nakryiko 			       struct bpf_prog *prog, struct bpf_prog *replace_prog,
626af6eea57SAndrii Nakryiko 			       struct bpf_cgroup_link *link,
627324bda9eSAlexei Starovoitov 			       enum bpf_attach_type type, u32 flags)
62830070984SDaniel Mack {
6297dd68b32SAndrey Ignatov 	u32 saved_flags = (flags & (BPF_F_ALLOW_OVERRIDE | BPF_F_ALLOW_MULTI));
630324bda9eSAlexei Starovoitov 	struct bpf_prog *old_prog = NULL;
63162039c30SAndrii Nakryiko 	struct bpf_cgroup_storage *storage[MAX_BPF_CGROUP_STORAGE_TYPE] = {};
6327d9c3427SYiFei Zhu 	struct bpf_cgroup_storage *new_storage[MAX_BPF_CGROUP_STORAGE_TYPE] = {};
63369fd337aSStanislav Fomichev 	struct bpf_prog *new_prog = prog ? : link->link.prog;
6346fc88c35SDave Marchevsky 	enum cgroup_bpf_attach_type atype;
635af6eea57SAndrii Nakryiko 	struct bpf_prog_list *pl;
63600442143SStanislav Fomichev 	struct hlist_head *progs;
637324bda9eSAlexei Starovoitov 	int err;
63830070984SDaniel Mack 
6397dd68b32SAndrey Ignatov 	if (((flags & BPF_F_ALLOW_OVERRIDE) && (flags & BPF_F_ALLOW_MULTI)) ||
6407dd68b32SAndrey Ignatov 	    ((flags & BPF_F_REPLACE) && !(flags & BPF_F_ALLOW_MULTI)))
641324bda9eSAlexei Starovoitov 		/* invalid combination */
642324bda9eSAlexei Starovoitov 		return -EINVAL;
643af6eea57SAndrii Nakryiko 	if (link && (prog || replace_prog))
644af6eea57SAndrii Nakryiko 		/* only either link or prog/replace_prog can be specified */
645af6eea57SAndrii Nakryiko 		return -EINVAL;
646af6eea57SAndrii Nakryiko 	if (!!replace_prog != !!(flags & BPF_F_REPLACE))
647af6eea57SAndrii Nakryiko 		/* replace_prog implies BPF_F_REPLACE, and vice versa */
648af6eea57SAndrii Nakryiko 		return -EINVAL;
64930070984SDaniel Mack 
65069fd337aSStanislav Fomichev 	atype = bpf_cgroup_atype_find(type, new_prog->aux->attach_btf_id);
6516fc88c35SDave Marchevsky 	if (atype < 0)
6526fc88c35SDave Marchevsky 		return -EINVAL;
6536fc88c35SDave Marchevsky 
6546fc88c35SDave Marchevsky 	progs = &cgrp->bpf.progs[atype];
6556fc88c35SDave Marchevsky 
6566fc88c35SDave Marchevsky 	if (!hierarchy_allows_attach(cgrp, atype))
657324bda9eSAlexei Starovoitov 		return -EPERM;
658324bda9eSAlexei Starovoitov 
65900442143SStanislav Fomichev 	if (!hlist_empty(progs) && cgrp->bpf.flags[atype] != saved_flags)
660324bda9eSAlexei Starovoitov 		/* Disallow attaching non-overridable on top
661324bda9eSAlexei Starovoitov 		 * of existing overridable in this cgroup.
662324bda9eSAlexei Starovoitov 		 * Disallow attaching multi-prog if overridable or none
6637f677633SAlexei Starovoitov 		 */
6647f677633SAlexei Starovoitov 		return -EPERM;
6657f677633SAlexei Starovoitov 
666324bda9eSAlexei Starovoitov 	if (prog_list_length(progs) >= BPF_CGROUP_MAX_PROGS)
667324bda9eSAlexei Starovoitov 		return -E2BIG;
6687f677633SAlexei Starovoitov 
669af6eea57SAndrii Nakryiko 	pl = find_attach_entry(progs, prog, link, replace_prog,
670af6eea57SAndrii Nakryiko 			       flags & BPF_F_ALLOW_MULTI);
671af6eea57SAndrii Nakryiko 	if (IS_ERR(pl))
672af6eea57SAndrii Nakryiko 		return PTR_ERR(pl);
6731020c1f2SAndrey Ignatov 
6747d9c3427SYiFei Zhu 	if (bpf_cgroup_storages_alloc(storage, new_storage, type,
6757d9c3427SYiFei Zhu 				      prog ? : link->link.prog, cgrp))
676d7bf2c10SRoman Gushchin 		return -ENOMEM;
677d7bf2c10SRoman Gushchin 
678af6eea57SAndrii Nakryiko 	if (pl) {
679324bda9eSAlexei Starovoitov 		old_prog = pl->prog;
6801020c1f2SAndrey Ignatov 	} else {
68100442143SStanislav Fomichev 		struct hlist_node *last = NULL;
68200442143SStanislav Fomichev 
6831020c1f2SAndrey Ignatov 		pl = kmalloc(sizeof(*pl), GFP_KERNEL);
6841020c1f2SAndrey Ignatov 		if (!pl) {
6857d9c3427SYiFei Zhu 			bpf_cgroup_storages_free(new_storage);
6861020c1f2SAndrey Ignatov 			return -ENOMEM;
6877f677633SAlexei Starovoitov 		}
68800442143SStanislav Fomichev 		if (hlist_empty(progs))
68900442143SStanislav Fomichev 			hlist_add_head(&pl->node, progs);
69000442143SStanislav Fomichev 		else
69100442143SStanislav Fomichev 			hlist_for_each(last, progs) {
69200442143SStanislav Fomichev 				if (last->next)
69300442143SStanislav Fomichev 					continue;
69400442143SStanislav Fomichev 				hlist_add_behind(&pl->node, last);
69500442143SStanislav Fomichev 				break;
69600442143SStanislav Fomichev 			}
6971020c1f2SAndrey Ignatov 	}
6981020c1f2SAndrey Ignatov 
699324bda9eSAlexei Starovoitov 	pl->prog = prog;
700af6eea57SAndrii Nakryiko 	pl->link = link;
70100c4eddfSAndrii Nakryiko 	bpf_cgroup_storages_assign(pl->storage, storage);
7026fc88c35SDave Marchevsky 	cgrp->bpf.flags[atype] = saved_flags;
703324bda9eSAlexei Starovoitov 
70469fd337aSStanislav Fomichev 	if (type == BPF_LSM_CGROUP) {
70569fd337aSStanislav Fomichev 		err = bpf_trampoline_link_cgroup_shim(new_prog, atype);
706324bda9eSAlexei Starovoitov 		if (err)
707324bda9eSAlexei Starovoitov 			goto cleanup;
70869fd337aSStanislav Fomichev 	}
709324bda9eSAlexei Starovoitov 
71069fd337aSStanislav Fomichev 	err = update_effective_progs(cgrp, atype);
71169fd337aSStanislav Fomichev 	if (err)
71269fd337aSStanislav Fomichev 		goto cleanup_trampoline;
71369fd337aSStanislav Fomichev 
71469fd337aSStanislav Fomichev 	if (old_prog) {
71569fd337aSStanislav Fomichev 		if (type == BPF_LSM_CGROUP)
71669fd337aSStanislav Fomichev 			bpf_trampoline_unlink_cgroup_shim(old_prog);
71730070984SDaniel Mack 		bpf_prog_put(old_prog);
71869fd337aSStanislav Fomichev 	} else {
7196fc88c35SDave Marchevsky 		static_branch_inc(&cgroup_bpf_enabled_key[atype]);
72069fd337aSStanislav Fomichev 	}
7217d9c3427SYiFei Zhu 	bpf_cgroup_storages_link(new_storage, cgrp, type);
7227f677633SAlexei Starovoitov 	return 0;
723324bda9eSAlexei Starovoitov 
72469fd337aSStanislav Fomichev cleanup_trampoline:
72569fd337aSStanislav Fomichev 	if (type == BPF_LSM_CGROUP)
72669fd337aSStanislav Fomichev 		bpf_trampoline_unlink_cgroup_shim(new_prog);
72769fd337aSStanislav Fomichev 
728324bda9eSAlexei Starovoitov cleanup:
729af6eea57SAndrii Nakryiko 	if (old_prog) {
730324bda9eSAlexei Starovoitov 		pl->prog = old_prog;
731af6eea57SAndrii Nakryiko 		pl->link = NULL;
7328bad74f9SRoman Gushchin 	}
7337d9c3427SYiFei Zhu 	bpf_cgroup_storages_free(new_storage);
734af6eea57SAndrii Nakryiko 	if (!old_prog) {
73500442143SStanislav Fomichev 		hlist_del(&pl->node);
736324bda9eSAlexei Starovoitov 		kfree(pl);
737324bda9eSAlexei Starovoitov 	}
738324bda9eSAlexei Starovoitov 	return err;
739324bda9eSAlexei Starovoitov }
740324bda9eSAlexei Starovoitov 
cgroup_bpf_attach(struct cgroup * cgrp,struct bpf_prog * prog,struct bpf_prog * replace_prog,struct bpf_cgroup_link * link,enum bpf_attach_type type,u32 flags)741588e5d87SHe Fengqing static int cgroup_bpf_attach(struct cgroup *cgrp,
742588e5d87SHe Fengqing 			     struct bpf_prog *prog, struct bpf_prog *replace_prog,
743588e5d87SHe Fengqing 			     struct bpf_cgroup_link *link,
744588e5d87SHe Fengqing 			     enum bpf_attach_type type,
745588e5d87SHe Fengqing 			     u32 flags)
746588e5d87SHe Fengqing {
747588e5d87SHe Fengqing 	int ret;
748588e5d87SHe Fengqing 
7494cdb91b0SKamalesh Babulal 	cgroup_lock();
750588e5d87SHe Fengqing 	ret = __cgroup_bpf_attach(cgrp, prog, replace_prog, link, type, flags);
7514cdb91b0SKamalesh Babulal 	cgroup_unlock();
752588e5d87SHe Fengqing 	return ret;
753588e5d87SHe Fengqing }
754588e5d87SHe Fengqing 
7550c991ebcSAndrii Nakryiko /* Swap updated BPF program for given link in effective program arrays across
7560c991ebcSAndrii Nakryiko  * all descendant cgroups. This function is guaranteed to succeed.
7570c991ebcSAndrii Nakryiko  */
replace_effective_prog(struct cgroup * cgrp,enum cgroup_bpf_attach_type atype,struct bpf_cgroup_link * link)7580c991ebcSAndrii Nakryiko static void replace_effective_prog(struct cgroup *cgrp,
7596fc88c35SDave Marchevsky 				   enum cgroup_bpf_attach_type atype,
7600c991ebcSAndrii Nakryiko 				   struct bpf_cgroup_link *link)
7610c991ebcSAndrii Nakryiko {
7620c991ebcSAndrii Nakryiko 	struct bpf_prog_array_item *item;
7630c991ebcSAndrii Nakryiko 	struct cgroup_subsys_state *css;
7640c991ebcSAndrii Nakryiko 	struct bpf_prog_array *progs;
7650c991ebcSAndrii Nakryiko 	struct bpf_prog_list *pl;
76600442143SStanislav Fomichev 	struct hlist_head *head;
7670c991ebcSAndrii Nakryiko 	struct cgroup *cg;
7680c991ebcSAndrii Nakryiko 	int pos;
7690c991ebcSAndrii Nakryiko 
7700c991ebcSAndrii Nakryiko 	css_for_each_descendant_pre(css, &cgrp->self) {
7710c991ebcSAndrii Nakryiko 		struct cgroup *desc = container_of(css, struct cgroup, self);
7720c991ebcSAndrii Nakryiko 
7730c991ebcSAndrii Nakryiko 		if (percpu_ref_is_zero(&desc->bpf.refcnt))
7740c991ebcSAndrii Nakryiko 			continue;
7750c991ebcSAndrii Nakryiko 
7760c991ebcSAndrii Nakryiko 		/* find position of link in effective progs array */
7770c991ebcSAndrii Nakryiko 		for (pos = 0, cg = desc; cg; cg = cgroup_parent(cg)) {
7786fc88c35SDave Marchevsky 			if (pos && !(cg->bpf.flags[atype] & BPF_F_ALLOW_MULTI))
7790c991ebcSAndrii Nakryiko 				continue;
7800c991ebcSAndrii Nakryiko 
7816fc88c35SDave Marchevsky 			head = &cg->bpf.progs[atype];
78200442143SStanislav Fomichev 			hlist_for_each_entry(pl, head, node) {
7830c991ebcSAndrii Nakryiko 				if (!prog_list_prog(pl))
7840c991ebcSAndrii Nakryiko 					continue;
7850c991ebcSAndrii Nakryiko 				if (pl->link == link)
7860c991ebcSAndrii Nakryiko 					goto found;
7870c991ebcSAndrii Nakryiko 				pos++;
7880c991ebcSAndrii Nakryiko 			}
7890c991ebcSAndrii Nakryiko 		}
7900c991ebcSAndrii Nakryiko found:
7910c991ebcSAndrii Nakryiko 		BUG_ON(!cg);
7920c991ebcSAndrii Nakryiko 		progs = rcu_dereference_protected(
7936fc88c35SDave Marchevsky 				desc->bpf.effective[atype],
7940c991ebcSAndrii Nakryiko 				lockdep_is_held(&cgroup_mutex));
7950c991ebcSAndrii Nakryiko 		item = &progs->items[pos];
7960c991ebcSAndrii Nakryiko 		WRITE_ONCE(item->prog, link->link.prog);
7970c991ebcSAndrii Nakryiko 	}
7980c991ebcSAndrii Nakryiko }
7990c991ebcSAndrii Nakryiko 
800324bda9eSAlexei Starovoitov /**
8010c991ebcSAndrii Nakryiko  * __cgroup_bpf_replace() - Replace link's program and propagate the change
8020c991ebcSAndrii Nakryiko  *                          to descendants
803324bda9eSAlexei Starovoitov  * @cgrp: The cgroup which descendants to traverse
8040c991ebcSAndrii Nakryiko  * @link: A link for which to replace BPF program
805214bfd26SRandy Dunlap  * @new_prog: &struct bpf_prog for the target BPF program with its refcnt
806214bfd26SRandy Dunlap  *            incremented
807324bda9eSAlexei Starovoitov  *
808324bda9eSAlexei Starovoitov  * Must be called with cgroup_mutex held.
809324bda9eSAlexei Starovoitov  */
__cgroup_bpf_replace(struct cgroup * cgrp,struct bpf_cgroup_link * link,struct bpf_prog * new_prog)810f9d04127SAndrii Nakryiko static int __cgroup_bpf_replace(struct cgroup *cgrp,
811f9d04127SAndrii Nakryiko 				struct bpf_cgroup_link *link,
8120c991ebcSAndrii Nakryiko 				struct bpf_prog *new_prog)
813324bda9eSAlexei Starovoitov {
8146fc88c35SDave Marchevsky 	enum cgroup_bpf_attach_type atype;
8150c991ebcSAndrii Nakryiko 	struct bpf_prog *old_prog;
816324bda9eSAlexei Starovoitov 	struct bpf_prog_list *pl;
81700442143SStanislav Fomichev 	struct hlist_head *progs;
8180c991ebcSAndrii Nakryiko 	bool found = false;
819324bda9eSAlexei Starovoitov 
82069fd337aSStanislav Fomichev 	atype = bpf_cgroup_atype_find(link->type, new_prog->aux->attach_btf_id);
8216fc88c35SDave Marchevsky 	if (atype < 0)
8226fc88c35SDave Marchevsky 		return -EINVAL;
8236fc88c35SDave Marchevsky 
8246fc88c35SDave Marchevsky 	progs = &cgrp->bpf.progs[atype];
8256fc88c35SDave Marchevsky 
8260c991ebcSAndrii Nakryiko 	if (link->link.prog->type != new_prog->type)
827324bda9eSAlexei Starovoitov 		return -EINVAL;
828324bda9eSAlexei Starovoitov 
82900442143SStanislav Fomichev 	hlist_for_each_entry(pl, progs, node) {
8300c991ebcSAndrii Nakryiko 		if (pl->link == link) {
8310c991ebcSAndrii Nakryiko 			found = true;
832324bda9eSAlexei Starovoitov 			break;
833324bda9eSAlexei Starovoitov 		}
8340c991ebcSAndrii Nakryiko 	}
8350c991ebcSAndrii Nakryiko 	if (!found)
836324bda9eSAlexei Starovoitov 		return -ENOENT;
8370c991ebcSAndrii Nakryiko 
8380c991ebcSAndrii Nakryiko 	old_prog = xchg(&link->link.prog, new_prog);
8396fc88c35SDave Marchevsky 	replace_effective_prog(cgrp, atype, link);
8400c991ebcSAndrii Nakryiko 	bpf_prog_put(old_prog);
8410c991ebcSAndrii Nakryiko 	return 0;
8420c991ebcSAndrii Nakryiko }
8430c991ebcSAndrii Nakryiko 
cgroup_bpf_replace(struct bpf_link * link,struct bpf_prog * new_prog,struct bpf_prog * old_prog)844f9d04127SAndrii Nakryiko static int cgroup_bpf_replace(struct bpf_link *link, struct bpf_prog *new_prog,
845f9d04127SAndrii Nakryiko 			      struct bpf_prog *old_prog)
846f9d04127SAndrii Nakryiko {
847f9d04127SAndrii Nakryiko 	struct bpf_cgroup_link *cg_link;
848f9d04127SAndrii Nakryiko 	int ret;
849f9d04127SAndrii Nakryiko 
850f9d04127SAndrii Nakryiko 	cg_link = container_of(link, struct bpf_cgroup_link, link);
851f9d04127SAndrii Nakryiko 
8524cdb91b0SKamalesh Babulal 	cgroup_lock();
853f9d04127SAndrii Nakryiko 	/* link might have been auto-released by dying cgroup, so fail */
854f9d04127SAndrii Nakryiko 	if (!cg_link->cgroup) {
8550c047ecbSJakub Sitnicki 		ret = -ENOLINK;
856f9d04127SAndrii Nakryiko 		goto out_unlock;
857f9d04127SAndrii Nakryiko 	}
858f9d04127SAndrii Nakryiko 	if (old_prog && link->prog != old_prog) {
859f9d04127SAndrii Nakryiko 		ret = -EPERM;
860f9d04127SAndrii Nakryiko 		goto out_unlock;
861f9d04127SAndrii Nakryiko 	}
862f9d04127SAndrii Nakryiko 	ret = __cgroup_bpf_replace(cg_link->cgroup, cg_link, new_prog);
863f9d04127SAndrii Nakryiko out_unlock:
8644cdb91b0SKamalesh Babulal 	cgroup_unlock();
865f9d04127SAndrii Nakryiko 	return ret;
866f9d04127SAndrii Nakryiko }
867f9d04127SAndrii Nakryiko 
find_detach_entry(struct hlist_head * progs,struct bpf_prog * prog,struct bpf_cgroup_link * link,bool allow_multi)86800442143SStanislav Fomichev static struct bpf_prog_list *find_detach_entry(struct hlist_head *progs,
869af6eea57SAndrii Nakryiko 					       struct bpf_prog *prog,
870af6eea57SAndrii Nakryiko 					       struct bpf_cgroup_link *link,
871af6eea57SAndrii Nakryiko 					       bool allow_multi)
872af6eea57SAndrii Nakryiko {
873af6eea57SAndrii Nakryiko 	struct bpf_prog_list *pl;
874af6eea57SAndrii Nakryiko 
875af6eea57SAndrii Nakryiko 	if (!allow_multi) {
87600442143SStanislav Fomichev 		if (hlist_empty(progs))
877af6eea57SAndrii Nakryiko 			/* report error when trying to detach and nothing is attached */
878af6eea57SAndrii Nakryiko 			return ERR_PTR(-ENOENT);
879af6eea57SAndrii Nakryiko 
880324bda9eSAlexei Starovoitov 		/* to maintain backward compatibility NONE and OVERRIDE cgroups
881af6eea57SAndrii Nakryiko 		 * allow detaching with invalid FD (prog==NULL) in legacy mode
882324bda9eSAlexei Starovoitov 		 */
88300442143SStanislav Fomichev 		return hlist_entry(progs->first, typeof(*pl), node);
884af6eea57SAndrii Nakryiko 	}
885af6eea57SAndrii Nakryiko 
886af6eea57SAndrii Nakryiko 	if (!prog && !link)
887af6eea57SAndrii Nakryiko 		/* to detach MULTI prog the user has to specify valid FD
888af6eea57SAndrii Nakryiko 		 * of the program or link to be detached
889af6eea57SAndrii Nakryiko 		 */
890af6eea57SAndrii Nakryiko 		return ERR_PTR(-EINVAL);
891af6eea57SAndrii Nakryiko 
892af6eea57SAndrii Nakryiko 	/* find the prog or link and detach it */
89300442143SStanislav Fomichev 	hlist_for_each_entry(pl, progs, node) {
894af6eea57SAndrii Nakryiko 		if (pl->prog == prog && pl->link == link)
895af6eea57SAndrii Nakryiko 			return pl;
896af6eea57SAndrii Nakryiko 	}
897af6eea57SAndrii Nakryiko 	return ERR_PTR(-ENOENT);
898af6eea57SAndrii Nakryiko }
899af6eea57SAndrii Nakryiko 
900324bda9eSAlexei Starovoitov /**
9014c46091eSTadeusz Struk  * purge_effective_progs() - After compute_effective_progs fails to alloc new
9024c46091eSTadeusz Struk  *                           cgrp->bpf.inactive table we can recover by
9034c46091eSTadeusz Struk  *                           recomputing the array in place.
9044c46091eSTadeusz Struk  *
9054c46091eSTadeusz Struk  * @cgrp: The cgroup which descendants to travers
9064c46091eSTadeusz Struk  * @prog: A program to detach or NULL
9074c46091eSTadeusz Struk  * @link: A link to detach or NULL
9084c46091eSTadeusz Struk  * @atype: Type of detach operation
9094c46091eSTadeusz Struk  */
purge_effective_progs(struct cgroup * cgrp,struct bpf_prog * prog,struct bpf_cgroup_link * link,enum cgroup_bpf_attach_type atype)9104c46091eSTadeusz Struk static void purge_effective_progs(struct cgroup *cgrp, struct bpf_prog *prog,
9114c46091eSTadeusz Struk 				  struct bpf_cgroup_link *link,
9124c46091eSTadeusz Struk 				  enum cgroup_bpf_attach_type atype)
9134c46091eSTadeusz Struk {
9144c46091eSTadeusz Struk 	struct cgroup_subsys_state *css;
9154c46091eSTadeusz Struk 	struct bpf_prog_array *progs;
9164c46091eSTadeusz Struk 	struct bpf_prog_list *pl;
91700442143SStanislav Fomichev 	struct hlist_head *head;
9184c46091eSTadeusz Struk 	struct cgroup *cg;
9194c46091eSTadeusz Struk 	int pos;
9204c46091eSTadeusz Struk 
9214c46091eSTadeusz Struk 	/* recompute effective prog array in place */
9224c46091eSTadeusz Struk 	css_for_each_descendant_pre(css, &cgrp->self) {
9234c46091eSTadeusz Struk 		struct cgroup *desc = container_of(css, struct cgroup, self);
9244c46091eSTadeusz Struk 
9254c46091eSTadeusz Struk 		if (percpu_ref_is_zero(&desc->bpf.refcnt))
9264c46091eSTadeusz Struk 			continue;
9274c46091eSTadeusz Struk 
9284c46091eSTadeusz Struk 		/* find position of link or prog in effective progs array */
9294c46091eSTadeusz Struk 		for (pos = 0, cg = desc; cg; cg = cgroup_parent(cg)) {
9304c46091eSTadeusz Struk 			if (pos && !(cg->bpf.flags[atype] & BPF_F_ALLOW_MULTI))
9314c46091eSTadeusz Struk 				continue;
9324c46091eSTadeusz Struk 
9334c46091eSTadeusz Struk 			head = &cg->bpf.progs[atype];
93400442143SStanislav Fomichev 			hlist_for_each_entry(pl, head, node) {
9354c46091eSTadeusz Struk 				if (!prog_list_prog(pl))
9364c46091eSTadeusz Struk 					continue;
9374c46091eSTadeusz Struk 				if (pl->prog == prog && pl->link == link)
9384c46091eSTadeusz Struk 					goto found;
9394c46091eSTadeusz Struk 				pos++;
9404c46091eSTadeusz Struk 			}
9414c46091eSTadeusz Struk 		}
9427d6620f1SPu Lehui 
9437d6620f1SPu Lehui 		/* no link or prog match, skip the cgroup of this layer */
9447d6620f1SPu Lehui 		continue;
9454c46091eSTadeusz Struk found:
9464c46091eSTadeusz Struk 		progs = rcu_dereference_protected(
9474c46091eSTadeusz Struk 				desc->bpf.effective[atype],
9484c46091eSTadeusz Struk 				lockdep_is_held(&cgroup_mutex));
9494c46091eSTadeusz Struk 
9504c46091eSTadeusz Struk 		/* Remove the program from the array */
9514c46091eSTadeusz Struk 		WARN_ONCE(bpf_prog_array_delete_safe_at(progs, pos),
9524c46091eSTadeusz Struk 			  "Failed to purge a prog from array at index %d", pos);
9534c46091eSTadeusz Struk 	}
9544c46091eSTadeusz Struk }
9554c46091eSTadeusz Struk 
9564c46091eSTadeusz Struk /**
957af6eea57SAndrii Nakryiko  * __cgroup_bpf_detach() - Detach the program or link from a cgroup, and
958324bda9eSAlexei Starovoitov  *                         propagate the change to descendants
959324bda9eSAlexei Starovoitov  * @cgrp: The cgroup which descendants to traverse
960324bda9eSAlexei Starovoitov  * @prog: A program to detach or NULL
961588e5d87SHe Fengqing  * @link: A link to detach or NULL
962324bda9eSAlexei Starovoitov  * @type: Type of detach operation
963324bda9eSAlexei Starovoitov  *
964af6eea57SAndrii Nakryiko  * At most one of @prog or @link can be non-NULL.
965324bda9eSAlexei Starovoitov  * Must be called with cgroup_mutex held.
966324bda9eSAlexei Starovoitov  */
__cgroup_bpf_detach(struct cgroup * cgrp,struct bpf_prog * prog,struct bpf_cgroup_link * link,enum bpf_attach_type type)967588e5d87SHe Fengqing static int __cgroup_bpf_detach(struct cgroup *cgrp, struct bpf_prog *prog,
968af6eea57SAndrii Nakryiko 			       struct bpf_cgroup_link *link, enum bpf_attach_type type)
969324bda9eSAlexei Starovoitov {
9706fc88c35SDave Marchevsky 	enum cgroup_bpf_attach_type atype;
971af6eea57SAndrii Nakryiko 	struct bpf_prog *old_prog;
9726fc88c35SDave Marchevsky 	struct bpf_prog_list *pl;
97300442143SStanislav Fomichev 	struct hlist_head *progs;
97469fd337aSStanislav Fomichev 	u32 attach_btf_id = 0;
9756fc88c35SDave Marchevsky 	u32 flags;
976324bda9eSAlexei Starovoitov 
97769fd337aSStanislav Fomichev 	if (prog)
97869fd337aSStanislav Fomichev 		attach_btf_id = prog->aux->attach_btf_id;
97969fd337aSStanislav Fomichev 	if (link)
98069fd337aSStanislav Fomichev 		attach_btf_id = link->link.prog->aux->attach_btf_id;
98169fd337aSStanislav Fomichev 
98269fd337aSStanislav Fomichev 	atype = bpf_cgroup_atype_find(type, attach_btf_id);
9836fc88c35SDave Marchevsky 	if (atype < 0)
9846fc88c35SDave Marchevsky 		return -EINVAL;
9856fc88c35SDave Marchevsky 
9866fc88c35SDave Marchevsky 	progs = &cgrp->bpf.progs[atype];
9876fc88c35SDave Marchevsky 	flags = cgrp->bpf.flags[atype];
9886fc88c35SDave Marchevsky 
989af6eea57SAndrii Nakryiko 	if (prog && link)
990af6eea57SAndrii Nakryiko 		/* only one of prog or link can be specified */
991324bda9eSAlexei Starovoitov 		return -EINVAL;
992324bda9eSAlexei Starovoitov 
993af6eea57SAndrii Nakryiko 	pl = find_detach_entry(progs, prog, link, flags & BPF_F_ALLOW_MULTI);
994af6eea57SAndrii Nakryiko 	if (IS_ERR(pl))
995af6eea57SAndrii Nakryiko 		return PTR_ERR(pl);
996af6eea57SAndrii Nakryiko 
997af6eea57SAndrii Nakryiko 	/* mark it deleted, so it's ignored while recomputing effective */
998324bda9eSAlexei Starovoitov 	old_prog = pl->prog;
999324bda9eSAlexei Starovoitov 	pl->prog = NULL;
1000af6eea57SAndrii Nakryiko 	pl->link = NULL;
1001324bda9eSAlexei Starovoitov 
10024c46091eSTadeusz Struk 	if (update_effective_progs(cgrp, atype)) {
10034c46091eSTadeusz Struk 		/* if update effective array failed replace the prog with a dummy prog*/
10044c46091eSTadeusz Struk 		pl->prog = old_prog;
10054c46091eSTadeusz Struk 		pl->link = link;
10064c46091eSTadeusz Struk 		purge_effective_progs(cgrp, old_prog, link, atype);
10074c46091eSTadeusz Struk 	}
1008324bda9eSAlexei Starovoitov 
1009324bda9eSAlexei Starovoitov 	/* now can actually delete it from this cgroup list */
101000442143SStanislav Fomichev 	hlist_del(&pl->node);
101100442143SStanislav Fomichev 
1012324bda9eSAlexei Starovoitov 	kfree(pl);
101300442143SStanislav Fomichev 	if (hlist_empty(progs))
1014324bda9eSAlexei Starovoitov 		/* last program was detached, reset flags to zero */
10156fc88c35SDave Marchevsky 		cgrp->bpf.flags[atype] = 0;
101669fd337aSStanislav Fomichev 	if (old_prog) {
101769fd337aSStanislav Fomichev 		if (type == BPF_LSM_CGROUP)
101869fd337aSStanislav Fomichev 			bpf_trampoline_unlink_cgroup_shim(old_prog);
1019324bda9eSAlexei Starovoitov 		bpf_prog_put(old_prog);
102069fd337aSStanislav Fomichev 	}
10216fc88c35SDave Marchevsky 	static_branch_dec(&cgroup_bpf_enabled_key[atype]);
1022324bda9eSAlexei Starovoitov 	return 0;
102330070984SDaniel Mack }
102430070984SDaniel Mack 
cgroup_bpf_detach(struct cgroup * cgrp,struct bpf_prog * prog,enum bpf_attach_type type)1025588e5d87SHe Fengqing static int cgroup_bpf_detach(struct cgroup *cgrp, struct bpf_prog *prog,
1026588e5d87SHe Fengqing 			     enum bpf_attach_type type)
1027588e5d87SHe Fengqing {
1028588e5d87SHe Fengqing 	int ret;
1029588e5d87SHe Fengqing 
10304cdb91b0SKamalesh Babulal 	cgroup_lock();
1031588e5d87SHe Fengqing 	ret = __cgroup_bpf_detach(cgrp, prog, NULL, type);
10324cdb91b0SKamalesh Babulal 	cgroup_unlock();
1033588e5d87SHe Fengqing 	return ret;
1034588e5d87SHe Fengqing }
1035588e5d87SHe Fengqing 
1036468e2f64SAlexei Starovoitov /* Must be called with cgroup_mutex held to avoid races. */
__cgroup_bpf_query(struct cgroup * cgrp,const union bpf_attr * attr,union bpf_attr __user * uattr)1037588e5d87SHe Fengqing static int __cgroup_bpf_query(struct cgroup *cgrp, const union bpf_attr *attr,
1038468e2f64SAlexei Starovoitov 			      union bpf_attr __user *uattr)
1039468e2f64SAlexei Starovoitov {
1040b79c9fc9SStanislav Fomichev 	__u32 __user *prog_attach_flags = u64_to_user_ptr(attr->query.prog_attach_flags);
10410e426a3aSPu Lehui 	bool effective_query = attr->query.query_flags & BPF_F_QUERY_EFFECTIVE;
1042468e2f64SAlexei Starovoitov 	__u32 __user *prog_ids = u64_to_user_ptr(attr->query.prog_ids);
1043468e2f64SAlexei Starovoitov 	enum bpf_attach_type type = attr->query.attach_type;
1044b79c9fc9SStanislav Fomichev 	enum cgroup_bpf_attach_type from_atype, to_atype;
10456fc88c35SDave Marchevsky 	enum cgroup_bpf_attach_type atype;
1046dbcc1ba2SStanislav Fomichev 	struct bpf_prog_array *effective;
1047468e2f64SAlexei Starovoitov 	int cnt, ret = 0, i;
1048b79c9fc9SStanislav Fomichev 	int total_cnt = 0;
10496fc88c35SDave Marchevsky 	u32 flags;
1050468e2f64SAlexei Starovoitov 
10510e426a3aSPu Lehui 	if (effective_query && prog_attach_flags)
10520e426a3aSPu Lehui 		return -EINVAL;
10530e426a3aSPu Lehui 
1054b79c9fc9SStanislav Fomichev 	if (type == BPF_LSM_CGROUP) {
10550e426a3aSPu Lehui 		if (!effective_query && attr->query.prog_cnt &&
10560e426a3aSPu Lehui 		    prog_ids && !prog_attach_flags)
10576fc88c35SDave Marchevsky 			return -EINVAL;
10586fc88c35SDave Marchevsky 
1059b79c9fc9SStanislav Fomichev 		from_atype = CGROUP_LSM_START;
1060b79c9fc9SStanislav Fomichev 		to_atype = CGROUP_LSM_END;
1061b79c9fc9SStanislav Fomichev 		flags = 0;
1062b79c9fc9SStanislav Fomichev 	} else {
1063b79c9fc9SStanislav Fomichev 		from_atype = to_cgroup_bpf_attach_type(type);
1064b79c9fc9SStanislav Fomichev 		if (from_atype < 0)
1065b79c9fc9SStanislav Fomichev 			return -EINVAL;
1066b79c9fc9SStanislav Fomichev 		to_atype = from_atype;
1067b79c9fc9SStanislav Fomichev 		flags = cgrp->bpf.flags[from_atype];
1068b79c9fc9SStanislav Fomichev 	}
10696fc88c35SDave Marchevsky 
1070b79c9fc9SStanislav Fomichev 	for (atype = from_atype; atype <= to_atype; atype++) {
10710e426a3aSPu Lehui 		if (effective_query) {
10726fc88c35SDave Marchevsky 			effective = rcu_dereference_protected(cgrp->bpf.effective[atype],
1073dbcc1ba2SStanislav Fomichev 							      lockdep_is_held(&cgroup_mutex));
1074b79c9fc9SStanislav Fomichev 			total_cnt += bpf_prog_array_length(effective);
1075b79c9fc9SStanislav Fomichev 		} else {
1076b79c9fc9SStanislav Fomichev 			total_cnt += prog_list_length(&cgrp->bpf.progs[atype]);
1077b79c9fc9SStanislav Fomichev 		}
1078b79c9fc9SStanislav Fomichev 	}
1079468e2f64SAlexei Starovoitov 
10800e426a3aSPu Lehui 	/* always output uattr->query.attach_flags as 0 during effective query */
10810e426a3aSPu Lehui 	flags = effective_query ? 0 : flags;
1082468e2f64SAlexei Starovoitov 	if (copy_to_user(&uattr->query.attach_flags, &flags, sizeof(flags)))
1083468e2f64SAlexei Starovoitov 		return -EFAULT;
1084b79c9fc9SStanislav Fomichev 	if (copy_to_user(&uattr->query.prog_cnt, &total_cnt, sizeof(total_cnt)))
1085468e2f64SAlexei Starovoitov 		return -EFAULT;
1086b79c9fc9SStanislav Fomichev 	if (attr->query.prog_cnt == 0 || !prog_ids || !total_cnt)
1087468e2f64SAlexei Starovoitov 		/* return early if user requested only program count + flags */
1088468e2f64SAlexei Starovoitov 		return 0;
1089b79c9fc9SStanislav Fomichev 
1090b79c9fc9SStanislav Fomichev 	if (attr->query.prog_cnt < total_cnt) {
1091b79c9fc9SStanislav Fomichev 		total_cnt = attr->query.prog_cnt;
1092468e2f64SAlexei Starovoitov 		ret = -ENOSPC;
1093468e2f64SAlexei Starovoitov 	}
1094468e2f64SAlexei Starovoitov 
1095b79c9fc9SStanislav Fomichev 	for (atype = from_atype; atype <= to_atype && total_cnt; atype++) {
10960e426a3aSPu Lehui 		if (effective_query) {
1097b79c9fc9SStanislav Fomichev 			effective = rcu_dereference_protected(cgrp->bpf.effective[atype],
1098b79c9fc9SStanislav Fomichev 							      lockdep_is_held(&cgroup_mutex));
1099b79c9fc9SStanislav Fomichev 			cnt = min_t(int, bpf_prog_array_length(effective), total_cnt);
1100b79c9fc9SStanislav Fomichev 			ret = bpf_prog_array_copy_to_user(effective, prog_ids, cnt);
1101468e2f64SAlexei Starovoitov 		} else {
1102b79c9fc9SStanislav Fomichev 			struct hlist_head *progs;
1103468e2f64SAlexei Starovoitov 			struct bpf_prog_list *pl;
1104b79c9fc9SStanislav Fomichev 			struct bpf_prog *prog;
1105468e2f64SAlexei Starovoitov 			u32 id;
1106468e2f64SAlexei Starovoitov 
1107b79c9fc9SStanislav Fomichev 			progs = &cgrp->bpf.progs[atype];
1108b79c9fc9SStanislav Fomichev 			cnt = min_t(int, prog_list_length(progs), total_cnt);
1109468e2f64SAlexei Starovoitov 			i = 0;
111000442143SStanislav Fomichev 			hlist_for_each_entry(pl, progs, node) {
1111af6eea57SAndrii Nakryiko 				prog = prog_list_prog(pl);
1112af6eea57SAndrii Nakryiko 				id = prog->aux->id;
1113468e2f64SAlexei Starovoitov 				if (copy_to_user(prog_ids + i, &id, sizeof(id)))
1114468e2f64SAlexei Starovoitov 					return -EFAULT;
1115468e2f64SAlexei Starovoitov 				if (++i == cnt)
1116468e2f64SAlexei Starovoitov 					break;
1117468e2f64SAlexei Starovoitov 			}
1118b79c9fc9SStanislav Fomichev 
1119b79c9fc9SStanislav Fomichev 			if (prog_attach_flags) {
1120b79c9fc9SStanislav Fomichev 				flags = cgrp->bpf.flags[atype];
1121b79c9fc9SStanislav Fomichev 
1122b79c9fc9SStanislav Fomichev 				for (i = 0; i < cnt; i++)
11230e426a3aSPu Lehui 					if (copy_to_user(prog_attach_flags + i,
11240e426a3aSPu Lehui 							 &flags, sizeof(flags)))
1125b79c9fc9SStanislav Fomichev 						return -EFAULT;
1126b79c9fc9SStanislav Fomichev 				prog_attach_flags += cnt;
1127b79c9fc9SStanislav Fomichev 			}
11280e426a3aSPu Lehui 		}
1129b79c9fc9SStanislav Fomichev 
1130b79c9fc9SStanislav Fomichev 		prog_ids += cnt;
1131b79c9fc9SStanislav Fomichev 		total_cnt -= cnt;
1132b79c9fc9SStanislav Fomichev 	}
1133468e2f64SAlexei Starovoitov 	return ret;
1134468e2f64SAlexei Starovoitov }
1135468e2f64SAlexei Starovoitov 
cgroup_bpf_query(struct cgroup * cgrp,const union bpf_attr * attr,union bpf_attr __user * uattr)1136588e5d87SHe Fengqing static int cgroup_bpf_query(struct cgroup *cgrp, const union bpf_attr *attr,
1137588e5d87SHe Fengqing 			    union bpf_attr __user *uattr)
1138588e5d87SHe Fengqing {
1139588e5d87SHe Fengqing 	int ret;
1140588e5d87SHe Fengqing 
11414cdb91b0SKamalesh Babulal 	cgroup_lock();
1142588e5d87SHe Fengqing 	ret = __cgroup_bpf_query(cgrp, attr, uattr);
11434cdb91b0SKamalesh Babulal 	cgroup_unlock();
1144588e5d87SHe Fengqing 	return ret;
1145588e5d87SHe Fengqing }
1146588e5d87SHe Fengqing 
cgroup_bpf_prog_attach(const union bpf_attr * attr,enum bpf_prog_type ptype,struct bpf_prog * prog)1147fdb5c453SSean Young int cgroup_bpf_prog_attach(const union bpf_attr *attr,
1148fdb5c453SSean Young 			   enum bpf_prog_type ptype, struct bpf_prog *prog)
1149fdb5c453SSean Young {
11507dd68b32SAndrey Ignatov 	struct bpf_prog *replace_prog = NULL;
1151fdb5c453SSean Young 	struct cgroup *cgrp;
1152fdb5c453SSean Young 	int ret;
1153fdb5c453SSean Young 
1154fdb5c453SSean Young 	cgrp = cgroup_get_from_fd(attr->target_fd);
1155fdb5c453SSean Young 	if (IS_ERR(cgrp))
1156fdb5c453SSean Young 		return PTR_ERR(cgrp);
1157fdb5c453SSean Young 
11587dd68b32SAndrey Ignatov 	if ((attr->attach_flags & BPF_F_ALLOW_MULTI) &&
11597dd68b32SAndrey Ignatov 	    (attr->attach_flags & BPF_F_REPLACE)) {
11607dd68b32SAndrey Ignatov 		replace_prog = bpf_prog_get_type(attr->replace_bpf_fd, ptype);
11617dd68b32SAndrey Ignatov 		if (IS_ERR(replace_prog)) {
11627dd68b32SAndrey Ignatov 			cgroup_put(cgrp);
11637dd68b32SAndrey Ignatov 			return PTR_ERR(replace_prog);
11647dd68b32SAndrey Ignatov 		}
11657dd68b32SAndrey Ignatov 	}
11667dd68b32SAndrey Ignatov 
1167af6eea57SAndrii Nakryiko 	ret = cgroup_bpf_attach(cgrp, prog, replace_prog, NULL,
1168af6eea57SAndrii Nakryiko 				attr->attach_type, attr->attach_flags);
11697dd68b32SAndrey Ignatov 
11707dd68b32SAndrey Ignatov 	if (replace_prog)
11717dd68b32SAndrey Ignatov 		bpf_prog_put(replace_prog);
1172fdb5c453SSean Young 	cgroup_put(cgrp);
1173fdb5c453SSean Young 	return ret;
1174fdb5c453SSean Young }
1175fdb5c453SSean Young 
cgroup_bpf_prog_detach(const union bpf_attr * attr,enum bpf_prog_type ptype)1176fdb5c453SSean Young int cgroup_bpf_prog_detach(const union bpf_attr *attr, enum bpf_prog_type ptype)
1177fdb5c453SSean Young {
1178fdb5c453SSean Young 	struct bpf_prog *prog;
1179fdb5c453SSean Young 	struct cgroup *cgrp;
1180fdb5c453SSean Young 	int ret;
1181fdb5c453SSean Young 
1182fdb5c453SSean Young 	cgrp = cgroup_get_from_fd(attr->target_fd);
1183fdb5c453SSean Young 	if (IS_ERR(cgrp))
1184fdb5c453SSean Young 		return PTR_ERR(cgrp);
1185fdb5c453SSean Young 
1186fdb5c453SSean Young 	prog = bpf_prog_get_type(attr->attach_bpf_fd, ptype);
1187fdb5c453SSean Young 	if (IS_ERR(prog))
1188fdb5c453SSean Young 		prog = NULL;
1189fdb5c453SSean Young 
1190af6eea57SAndrii Nakryiko 	ret = cgroup_bpf_detach(cgrp, prog, attr->attach_type);
1191fdb5c453SSean Young 	if (prog)
1192fdb5c453SSean Young 		bpf_prog_put(prog);
1193fdb5c453SSean Young 
1194fdb5c453SSean Young 	cgroup_put(cgrp);
1195fdb5c453SSean Young 	return ret;
1196fdb5c453SSean Young }
1197fdb5c453SSean Young 
bpf_cgroup_link_release(struct bpf_link * link)1198af6eea57SAndrii Nakryiko static void bpf_cgroup_link_release(struct bpf_link *link)
1199af6eea57SAndrii Nakryiko {
1200af6eea57SAndrii Nakryiko 	struct bpf_cgroup_link *cg_link =
1201af6eea57SAndrii Nakryiko 		container_of(link, struct bpf_cgroup_link, link);
120273b11c2aSAndrii Nakryiko 	struct cgroup *cg;
1203af6eea57SAndrii Nakryiko 
1204af6eea57SAndrii Nakryiko 	/* link might have been auto-detached by dying cgroup already,
1205af6eea57SAndrii Nakryiko 	 * in that case our work is done here
1206af6eea57SAndrii Nakryiko 	 */
1207af6eea57SAndrii Nakryiko 	if (!cg_link->cgroup)
1208af6eea57SAndrii Nakryiko 		return;
1209af6eea57SAndrii Nakryiko 
12104cdb91b0SKamalesh Babulal 	cgroup_lock();
1211af6eea57SAndrii Nakryiko 
1212af6eea57SAndrii Nakryiko 	/* re-check cgroup under lock again */
1213af6eea57SAndrii Nakryiko 	if (!cg_link->cgroup) {
12144cdb91b0SKamalesh Babulal 		cgroup_unlock();
1215af6eea57SAndrii Nakryiko 		return;
1216af6eea57SAndrii Nakryiko 	}
1217af6eea57SAndrii Nakryiko 
1218af6eea57SAndrii Nakryiko 	WARN_ON(__cgroup_bpf_detach(cg_link->cgroup, NULL, cg_link,
1219af6eea57SAndrii Nakryiko 				    cg_link->type));
122069fd337aSStanislav Fomichev 	if (cg_link->type == BPF_LSM_CGROUP)
122169fd337aSStanislav Fomichev 		bpf_trampoline_unlink_cgroup_shim(cg_link->link.prog);
1222af6eea57SAndrii Nakryiko 
122373b11c2aSAndrii Nakryiko 	cg = cg_link->cgroup;
122473b11c2aSAndrii Nakryiko 	cg_link->cgroup = NULL;
122573b11c2aSAndrii Nakryiko 
12264cdb91b0SKamalesh Babulal 	cgroup_unlock();
122773b11c2aSAndrii Nakryiko 
122873b11c2aSAndrii Nakryiko 	cgroup_put(cg);
1229af6eea57SAndrii Nakryiko }
1230af6eea57SAndrii Nakryiko 
bpf_cgroup_link_dealloc(struct bpf_link * link)1231af6eea57SAndrii Nakryiko static void bpf_cgroup_link_dealloc(struct bpf_link *link)
1232af6eea57SAndrii Nakryiko {
1233af6eea57SAndrii Nakryiko 	struct bpf_cgroup_link *cg_link =
1234af6eea57SAndrii Nakryiko 		container_of(link, struct bpf_cgroup_link, link);
1235af6eea57SAndrii Nakryiko 
1236af6eea57SAndrii Nakryiko 	kfree(cg_link);
1237af6eea57SAndrii Nakryiko }
1238af6eea57SAndrii Nakryiko 
bpf_cgroup_link_detach(struct bpf_link * link)123973b11c2aSAndrii Nakryiko static int bpf_cgroup_link_detach(struct bpf_link *link)
124073b11c2aSAndrii Nakryiko {
124173b11c2aSAndrii Nakryiko 	bpf_cgroup_link_release(link);
124273b11c2aSAndrii Nakryiko 
124373b11c2aSAndrii Nakryiko 	return 0;
124473b11c2aSAndrii Nakryiko }
124573b11c2aSAndrii Nakryiko 
bpf_cgroup_link_show_fdinfo(const struct bpf_link * link,struct seq_file * seq)1246f2e10bffSAndrii Nakryiko static void bpf_cgroup_link_show_fdinfo(const struct bpf_link *link,
1247f2e10bffSAndrii Nakryiko 					struct seq_file *seq)
1248f2e10bffSAndrii Nakryiko {
1249f2e10bffSAndrii Nakryiko 	struct bpf_cgroup_link *cg_link =
1250f2e10bffSAndrii Nakryiko 		container_of(link, struct bpf_cgroup_link, link);
1251f2e10bffSAndrii Nakryiko 	u64 cg_id = 0;
1252f2e10bffSAndrii Nakryiko 
12534cdb91b0SKamalesh Babulal 	cgroup_lock();
1254f2e10bffSAndrii Nakryiko 	if (cg_link->cgroup)
1255f2e10bffSAndrii Nakryiko 		cg_id = cgroup_id(cg_link->cgroup);
12564cdb91b0SKamalesh Babulal 	cgroup_unlock();
1257f2e10bffSAndrii Nakryiko 
1258f2e10bffSAndrii Nakryiko 	seq_printf(seq,
1259f2e10bffSAndrii Nakryiko 		   "cgroup_id:\t%llu\n"
1260f2e10bffSAndrii Nakryiko 		   "attach_type:\t%d\n",
1261f2e10bffSAndrii Nakryiko 		   cg_id,
1262f2e10bffSAndrii Nakryiko 		   cg_link->type);
1263f2e10bffSAndrii Nakryiko }
1264f2e10bffSAndrii Nakryiko 
bpf_cgroup_link_fill_link_info(const struct bpf_link * link,struct bpf_link_info * info)1265f2e10bffSAndrii Nakryiko static int bpf_cgroup_link_fill_link_info(const struct bpf_link *link,
1266f2e10bffSAndrii Nakryiko 					  struct bpf_link_info *info)
1267f2e10bffSAndrii Nakryiko {
1268f2e10bffSAndrii Nakryiko 	struct bpf_cgroup_link *cg_link =
1269f2e10bffSAndrii Nakryiko 		container_of(link, struct bpf_cgroup_link, link);
1270f2e10bffSAndrii Nakryiko 	u64 cg_id = 0;
1271f2e10bffSAndrii Nakryiko 
12724cdb91b0SKamalesh Babulal 	cgroup_lock();
1273f2e10bffSAndrii Nakryiko 	if (cg_link->cgroup)
1274f2e10bffSAndrii Nakryiko 		cg_id = cgroup_id(cg_link->cgroup);
12754cdb91b0SKamalesh Babulal 	cgroup_unlock();
1276f2e10bffSAndrii Nakryiko 
1277f2e10bffSAndrii Nakryiko 	info->cgroup.cgroup_id = cg_id;
1278f2e10bffSAndrii Nakryiko 	info->cgroup.attach_type = cg_link->type;
1279f2e10bffSAndrii Nakryiko 	return 0;
1280f2e10bffSAndrii Nakryiko }
1281f2e10bffSAndrii Nakryiko 
1282f2e10bffSAndrii Nakryiko static const struct bpf_link_ops bpf_cgroup_link_lops = {
1283af6eea57SAndrii Nakryiko 	.release = bpf_cgroup_link_release,
1284af6eea57SAndrii Nakryiko 	.dealloc = bpf_cgroup_link_dealloc,
128573b11c2aSAndrii Nakryiko 	.detach = bpf_cgroup_link_detach,
1286f9d04127SAndrii Nakryiko 	.update_prog = cgroup_bpf_replace,
1287f2e10bffSAndrii Nakryiko 	.show_fdinfo = bpf_cgroup_link_show_fdinfo,
1288f2e10bffSAndrii Nakryiko 	.fill_link_info = bpf_cgroup_link_fill_link_info,
1289af6eea57SAndrii Nakryiko };
1290af6eea57SAndrii Nakryiko 
cgroup_bpf_link_attach(const union bpf_attr * attr,struct bpf_prog * prog)1291af6eea57SAndrii Nakryiko int cgroup_bpf_link_attach(const union bpf_attr *attr, struct bpf_prog *prog)
1292af6eea57SAndrii Nakryiko {
1293a3b80e10SAndrii Nakryiko 	struct bpf_link_primer link_primer;
1294af6eea57SAndrii Nakryiko 	struct bpf_cgroup_link *link;
1295af6eea57SAndrii Nakryiko 	struct cgroup *cgrp;
1296a3b80e10SAndrii Nakryiko 	int err;
1297af6eea57SAndrii Nakryiko 
1298af6eea57SAndrii Nakryiko 	if (attr->link_create.flags)
1299af6eea57SAndrii Nakryiko 		return -EINVAL;
1300af6eea57SAndrii Nakryiko 
1301af6eea57SAndrii Nakryiko 	cgrp = cgroup_get_from_fd(attr->link_create.target_fd);
1302af6eea57SAndrii Nakryiko 	if (IS_ERR(cgrp))
1303af6eea57SAndrii Nakryiko 		return PTR_ERR(cgrp);
1304af6eea57SAndrii Nakryiko 
1305af6eea57SAndrii Nakryiko 	link = kzalloc(sizeof(*link), GFP_USER);
1306af6eea57SAndrii Nakryiko 	if (!link) {
1307af6eea57SAndrii Nakryiko 		err = -ENOMEM;
1308af6eea57SAndrii Nakryiko 		goto out_put_cgroup;
1309af6eea57SAndrii Nakryiko 	}
1310f2e10bffSAndrii Nakryiko 	bpf_link_init(&link->link, BPF_LINK_TYPE_CGROUP, &bpf_cgroup_link_lops,
1311f2e10bffSAndrii Nakryiko 		      prog);
1312af6eea57SAndrii Nakryiko 	link->cgroup = cgrp;
1313af6eea57SAndrii Nakryiko 	link->type = attr->link_create.attach_type;
1314af6eea57SAndrii Nakryiko 
1315a3b80e10SAndrii Nakryiko 	err = bpf_link_prime(&link->link, &link_primer);
1316a3b80e10SAndrii Nakryiko 	if (err) {
1317af6eea57SAndrii Nakryiko 		kfree(link);
1318af6eea57SAndrii Nakryiko 		goto out_put_cgroup;
1319af6eea57SAndrii Nakryiko 	}
1320af6eea57SAndrii Nakryiko 
13216fc88c35SDave Marchevsky 	err = cgroup_bpf_attach(cgrp, NULL, NULL, link,
13226fc88c35SDave Marchevsky 				link->type, BPF_F_ALLOW_MULTI);
1323af6eea57SAndrii Nakryiko 	if (err) {
1324a3b80e10SAndrii Nakryiko 		bpf_link_cleanup(&link_primer);
1325af6eea57SAndrii Nakryiko 		goto out_put_cgroup;
1326af6eea57SAndrii Nakryiko 	}
1327af6eea57SAndrii Nakryiko 
1328a3b80e10SAndrii Nakryiko 	return bpf_link_settle(&link_primer);
1329af6eea57SAndrii Nakryiko 
1330af6eea57SAndrii Nakryiko out_put_cgroup:
1331af6eea57SAndrii Nakryiko 	cgroup_put(cgrp);
1332af6eea57SAndrii Nakryiko 	return err;
1333af6eea57SAndrii Nakryiko }
1334af6eea57SAndrii Nakryiko 
cgroup_bpf_prog_query(const union bpf_attr * attr,union bpf_attr __user * uattr)1335fdb5c453SSean Young int cgroup_bpf_prog_query(const union bpf_attr *attr,
1336fdb5c453SSean Young 			  union bpf_attr __user *uattr)
1337fdb5c453SSean Young {
1338fdb5c453SSean Young 	struct cgroup *cgrp;
1339fdb5c453SSean Young 	int ret;
1340fdb5c453SSean Young 
1341fdb5c453SSean Young 	cgrp = cgroup_get_from_fd(attr->query.target_fd);
1342fdb5c453SSean Young 	if (IS_ERR(cgrp))
1343fdb5c453SSean Young 		return PTR_ERR(cgrp);
1344fdb5c453SSean Young 
1345fdb5c453SSean Young 	ret = cgroup_bpf_query(cgrp, attr, uattr);
1346fdb5c453SSean Young 
1347fdb5c453SSean Young 	cgroup_put(cgrp);
1348fdb5c453SSean Young 	return ret;
1349fdb5c453SSean Young }
1350fdb5c453SSean Young 
135130070984SDaniel Mack /**
1352b2cd1257SDavid Ahern  * __cgroup_bpf_run_filter_skb() - Run a program for packet filtering
13538f917bbaSWillem de Bruijn  * @sk: The socket sending or receiving traffic
135430070984SDaniel Mack  * @skb: The skb that is being sent or received
1355214bfd26SRandy Dunlap  * @atype: The type of program to be executed
135630070984SDaniel Mack  *
135730070984SDaniel Mack  * If no socket is passed, or the socket is not of type INET or INET6,
135830070984SDaniel Mack  * this function does nothing and returns 0.
135930070984SDaniel Mack  *
136030070984SDaniel Mack  * The program type passed in via @type must be suitable for network
136130070984SDaniel Mack  * filtering. No further check is performed to assert that.
136230070984SDaniel Mack  *
1363e7a3160dSbrakmo  * For egress packets, this function can return:
1364e7a3160dSbrakmo  *   NET_XMIT_SUCCESS    (0)	- continue with packet output
1365e7a3160dSbrakmo  *   NET_XMIT_DROP       (1)	- drop packet and notify TCP to call cwr
1366e7a3160dSbrakmo  *   NET_XMIT_CN         (2)	- continue with packet output and notify TCP
1367e7a3160dSbrakmo  *				  to call cwr
1368b44123b4SYiFei Zhu  *   -err			- drop packet
1369e7a3160dSbrakmo  *
1370e7a3160dSbrakmo  * For ingress packets, this function will return -EPERM if any
1371e7a3160dSbrakmo  * attached program was found and if it returned != 1 during execution.
1372e7a3160dSbrakmo  * Otherwise 0 is returned.
137330070984SDaniel Mack  */
__cgroup_bpf_run_filter_skb(struct sock * sk,struct sk_buff * skb,enum cgroup_bpf_attach_type atype)1374b2cd1257SDavid Ahern int __cgroup_bpf_run_filter_skb(struct sock *sk,
137530070984SDaniel Mack 				struct sk_buff *skb,
13766fc88c35SDave Marchevsky 				enum cgroup_bpf_attach_type atype)
137730070984SDaniel Mack {
1378324bda9eSAlexei Starovoitov 	unsigned int offset = skb->data - skb_network_header(skb);
1379324bda9eSAlexei Starovoitov 	struct sock *save_sk;
1380b39b5f41SSong Liu 	void *saved_data_end;
138130070984SDaniel Mack 	struct cgroup *cgrp;
1382324bda9eSAlexei Starovoitov 	int ret;
138330070984SDaniel Mack 
138430070984SDaniel Mack 	if (!sk || !sk_fullsock(sk))
138530070984SDaniel Mack 		return 0;
138630070984SDaniel Mack 
1387324bda9eSAlexei Starovoitov 	if (sk->sk_family != AF_INET && sk->sk_family != AF_INET6)
138830070984SDaniel Mack 		return 0;
138930070984SDaniel Mack 
139030070984SDaniel Mack 	cgrp = sock_cgroup_ptr(&sk->sk_cgrp_data);
1391324bda9eSAlexei Starovoitov 	save_sk = skb->sk;
13928f917bbaSWillem de Bruijn 	skb->sk = sk;
139330070984SDaniel Mack 	__skb_push(skb, offset);
1394b39b5f41SSong Liu 
1395b39b5f41SSong Liu 	/* compute pointers for the bpf prog */
1396b39b5f41SSong Liu 	bpf_compute_and_save_data_end(skb, &saved_data_end);
1397b39b5f41SSong Liu 
13986fc88c35SDave Marchevsky 	if (atype == CGROUP_INET_EGRESS) {
1399055eb955SStanislav Fomichev 		u32 flags = 0;
1400055eb955SStanislav Fomichev 		bool cn;
1401055eb955SStanislav Fomichev 
1402d9d31cf8SStanislav Fomichev 		ret = bpf_prog_run_array_cg(&cgrp->bpf, atype, skb,
1403d9d31cf8SStanislav Fomichev 					    __bpf_prog_run_save_cb, 0, &flags);
1404055eb955SStanislav Fomichev 
1405055eb955SStanislav Fomichev 		/* Return values of CGROUP EGRESS BPF programs are:
1406055eb955SStanislav Fomichev 		 *   0: drop packet
1407055eb955SStanislav Fomichev 		 *   1: keep packet
1408055eb955SStanislav Fomichev 		 *   2: drop packet and cn
1409055eb955SStanislav Fomichev 		 *   3: keep packet and cn
1410055eb955SStanislav Fomichev 		 *
1411055eb955SStanislav Fomichev 		 * The returned value is then converted to one of the NET_XMIT
1412055eb955SStanislav Fomichev 		 * or an error code that is then interpreted as drop packet
1413055eb955SStanislav Fomichev 		 * (and no cn):
1414055eb955SStanislav Fomichev 		 *   0: NET_XMIT_SUCCESS  skb should be transmitted
1415055eb955SStanislav Fomichev 		 *   1: NET_XMIT_DROP     skb should be dropped and cn
1416055eb955SStanislav Fomichev 		 *   2: NET_XMIT_CN       skb should be transmitted and cn
1417055eb955SStanislav Fomichev 		 *   3: -err              skb should be dropped
1418055eb955SStanislav Fomichev 		 */
1419055eb955SStanislav Fomichev 
1420055eb955SStanislav Fomichev 		cn = flags & BPF_RET_SET_CN;
1421055eb955SStanislav Fomichev 		if (ret && !IS_ERR_VALUE((long)ret))
1422055eb955SStanislav Fomichev 			ret = -EFAULT;
1423055eb955SStanislav Fomichev 		if (!ret)
1424055eb955SStanislav Fomichev 			ret = (cn ? NET_XMIT_CN : NET_XMIT_SUCCESS);
1425055eb955SStanislav Fomichev 		else
1426055eb955SStanislav Fomichev 			ret = (cn ? NET_XMIT_DROP : ret);
1427e7a3160dSbrakmo 	} else {
1428055eb955SStanislav Fomichev 		ret = bpf_prog_run_array_cg(&cgrp->bpf, atype,
1429d9d31cf8SStanislav Fomichev 					    skb, __bpf_prog_run_save_cb, 0,
1430d9d31cf8SStanislav Fomichev 					    NULL);
1431b44123b4SYiFei Zhu 		if (ret && !IS_ERR_VALUE((long)ret))
1432b44123b4SYiFei Zhu 			ret = -EFAULT;
1433e7a3160dSbrakmo 	}
1434b39b5f41SSong Liu 	bpf_restore_data_end(skb, saved_data_end);
143530070984SDaniel Mack 	__skb_pull(skb, offset);
14368f917bbaSWillem de Bruijn 	skb->sk = save_sk;
1437e7a3160dSbrakmo 
1438e7a3160dSbrakmo 	return ret;
143930070984SDaniel Mack }
1440b2cd1257SDavid Ahern EXPORT_SYMBOL(__cgroup_bpf_run_filter_skb);
144161023658SDavid Ahern 
144261023658SDavid Ahern /**
144361023658SDavid Ahern  * __cgroup_bpf_run_filter_sk() - Run a program on a sock
144461023658SDavid Ahern  * @sk: sock structure to manipulate
1445214bfd26SRandy Dunlap  * @atype: The type of program to be executed
144661023658SDavid Ahern  *
144761023658SDavid Ahern  * socket is passed is expected to be of type INET or INET6.
144861023658SDavid Ahern  *
144961023658SDavid Ahern  * The program type passed in via @type must be suitable for sock
145061023658SDavid Ahern  * filtering. No further check is performed to assert that.
145161023658SDavid Ahern  *
145261023658SDavid Ahern  * This function will return %-EPERM if any if an attached program was found
145361023658SDavid Ahern  * and if it returned != 1 during execution. In all other cases, 0 is returned.
145461023658SDavid Ahern  */
__cgroup_bpf_run_filter_sk(struct sock * sk,enum cgroup_bpf_attach_type atype)145561023658SDavid Ahern int __cgroup_bpf_run_filter_sk(struct sock *sk,
14566fc88c35SDave Marchevsky 			       enum cgroup_bpf_attach_type atype)
145761023658SDavid Ahern {
145861023658SDavid Ahern 	struct cgroup *cgrp = sock_cgroup_ptr(&sk->sk_cgrp_data);
145961023658SDavid Ahern 
1460d9d31cf8SStanislav Fomichev 	return bpf_prog_run_array_cg(&cgrp->bpf, atype, sk, bpf_prog_run, 0,
1461d9d31cf8SStanislav Fomichev 				     NULL);
146261023658SDavid Ahern }
146361023658SDavid Ahern EXPORT_SYMBOL(__cgroup_bpf_run_filter_sk);
146440304b2aSLawrence Brakmo 
146540304b2aSLawrence Brakmo /**
14664fbac77dSAndrey Ignatov  * __cgroup_bpf_run_filter_sock_addr() - Run a program on a sock and
14674fbac77dSAndrey Ignatov  *                                       provided by user sockaddr
14684fbac77dSAndrey Ignatov  * @sk: sock struct that will use sockaddr
14694fbac77dSAndrey Ignatov  * @uaddr: sockaddr struct provided by user
14706d71331eSDaan De Meyer  * @uaddrlen: Pointer to the size of the sockaddr struct provided by user. It is
14716d71331eSDaan De Meyer  *            read-only for AF_INET[6] uaddr but can be modified for AF_UNIX
14726d71331eSDaan De Meyer  *            uaddr.
1473214bfd26SRandy Dunlap  * @atype: The type of program to be executed
14741cedee13SAndrey Ignatov  * @t_ctx: Pointer to attach type specific context
147577241217SStanislav Fomichev  * @flags: Pointer to u32 which contains higher bits of BPF program
147677241217SStanislav Fomichev  *         return value (OR'ed together).
14774fbac77dSAndrey Ignatov  *
14784fbac77dSAndrey Ignatov  * socket is expected to be of type INET or INET6.
14794fbac77dSAndrey Ignatov  *
14804fbac77dSAndrey Ignatov  * This function will return %-EPERM if an attached program is found and
14814fbac77dSAndrey Ignatov  * returned value != 1 during execution. In all other cases, 0 is returned.
14824fbac77dSAndrey Ignatov  */
__cgroup_bpf_run_filter_sock_addr(struct sock * sk,struct sockaddr * uaddr,int * uaddrlen,enum cgroup_bpf_attach_type atype,void * t_ctx,u32 * flags)14834fbac77dSAndrey Ignatov int __cgroup_bpf_run_filter_sock_addr(struct sock *sk,
14844fbac77dSAndrey Ignatov 				      struct sockaddr *uaddr,
14856d71331eSDaan De Meyer 				      int *uaddrlen,
14866fc88c35SDave Marchevsky 				      enum cgroup_bpf_attach_type atype,
148777241217SStanislav Fomichev 				      void *t_ctx,
148877241217SStanislav Fomichev 				      u32 *flags)
14894fbac77dSAndrey Ignatov {
14904fbac77dSAndrey Ignatov 	struct bpf_sock_addr_kern ctx = {
14914fbac77dSAndrey Ignatov 		.sk = sk,
14924fbac77dSAndrey Ignatov 		.uaddr = uaddr,
14931cedee13SAndrey Ignatov 		.t_ctx = t_ctx,
14944fbac77dSAndrey Ignatov 	};
14951cedee13SAndrey Ignatov 	struct sockaddr_storage unspec;
14964fbac77dSAndrey Ignatov 	struct cgroup *cgrp;
14976d71331eSDaan De Meyer 	int ret;
14984fbac77dSAndrey Ignatov 
14994fbac77dSAndrey Ignatov 	/* Check socket family since not all sockets represent network
15004fbac77dSAndrey Ignatov 	 * endpoint (e.g. AF_UNIX).
15014fbac77dSAndrey Ignatov 	 */
15024fbac77dSAndrey Ignatov 	if (sk->sk_family != AF_INET && sk->sk_family != AF_INET6)
15034fbac77dSAndrey Ignatov 		return 0;
15044fbac77dSAndrey Ignatov 
15051cedee13SAndrey Ignatov 	if (!ctx.uaddr) {
15061cedee13SAndrey Ignatov 		memset(&unspec, 0, sizeof(unspec));
15071cedee13SAndrey Ignatov 		ctx.uaddr = (struct sockaddr *)&unspec;
15086d71331eSDaan De Meyer 		ctx.uaddrlen = 0;
15096d71331eSDaan De Meyer 	} else {
15106d71331eSDaan De Meyer 		ctx.uaddrlen = *uaddrlen;
15111cedee13SAndrey Ignatov 	}
15121cedee13SAndrey Ignatov 
15134fbac77dSAndrey Ignatov 	cgrp = sock_cgroup_ptr(&sk->sk_cgrp_data);
15146d71331eSDaan De Meyer 	ret = bpf_prog_run_array_cg(&cgrp->bpf, atype, &ctx, bpf_prog_run,
1515d9d31cf8SStanislav Fomichev 				    0, flags);
15166d71331eSDaan De Meyer 
15176d71331eSDaan De Meyer 	if (!ret && uaddr)
15186d71331eSDaan De Meyer 		*uaddrlen = ctx.uaddrlen;
15196d71331eSDaan De Meyer 
15206d71331eSDaan De Meyer 	return ret;
15214fbac77dSAndrey Ignatov }
15224fbac77dSAndrey Ignatov EXPORT_SYMBOL(__cgroup_bpf_run_filter_sock_addr);
15234fbac77dSAndrey Ignatov 
15244fbac77dSAndrey Ignatov /**
152540304b2aSLawrence Brakmo  * __cgroup_bpf_run_filter_sock_ops() - Run a program on a sock
152640304b2aSLawrence Brakmo  * @sk: socket to get cgroup from
152740304b2aSLawrence Brakmo  * @sock_ops: bpf_sock_ops_kern struct to pass to program. Contains
152840304b2aSLawrence Brakmo  * sk with connection information (IP addresses, etc.) May not contain
152940304b2aSLawrence Brakmo  * cgroup info if it is a req sock.
1530214bfd26SRandy Dunlap  * @atype: The type of program to be executed
153140304b2aSLawrence Brakmo  *
153240304b2aSLawrence Brakmo  * socket passed is expected to be of type INET or INET6.
153340304b2aSLawrence Brakmo  *
153440304b2aSLawrence Brakmo  * The program type passed in via @type must be suitable for sock_ops
153540304b2aSLawrence Brakmo  * filtering. No further check is performed to assert that.
153640304b2aSLawrence Brakmo  *
153740304b2aSLawrence Brakmo  * This function will return %-EPERM if any if an attached program was found
153840304b2aSLawrence Brakmo  * and if it returned != 1 during execution. In all other cases, 0 is returned.
153940304b2aSLawrence Brakmo  */
__cgroup_bpf_run_filter_sock_ops(struct sock * sk,struct bpf_sock_ops_kern * sock_ops,enum cgroup_bpf_attach_type atype)154040304b2aSLawrence Brakmo int __cgroup_bpf_run_filter_sock_ops(struct sock *sk,
154140304b2aSLawrence Brakmo 				     struct bpf_sock_ops_kern *sock_ops,
15426fc88c35SDave Marchevsky 				     enum cgroup_bpf_attach_type atype)
154340304b2aSLawrence Brakmo {
154440304b2aSLawrence Brakmo 	struct cgroup *cgrp = sock_cgroup_ptr(&sk->sk_cgrp_data);
154540304b2aSLawrence Brakmo 
1546055eb955SStanislav Fomichev 	return bpf_prog_run_array_cg(&cgrp->bpf, atype, sock_ops, bpf_prog_run,
1547d9d31cf8SStanislav Fomichev 				     0, NULL);
154840304b2aSLawrence Brakmo }
154940304b2aSLawrence Brakmo EXPORT_SYMBOL(__cgroup_bpf_run_filter_sock_ops);
1550ebc614f6SRoman Gushchin 
__cgroup_bpf_check_dev_permission(short dev_type,u32 major,u32 minor,short access,enum cgroup_bpf_attach_type atype)1551ebc614f6SRoman Gushchin int __cgroup_bpf_check_dev_permission(short dev_type, u32 major, u32 minor,
15526fc88c35SDave Marchevsky 				      short access, enum cgroup_bpf_attach_type atype)
1553ebc614f6SRoman Gushchin {
1554ebc614f6SRoman Gushchin 	struct cgroup *cgrp;
1555ebc614f6SRoman Gushchin 	struct bpf_cgroup_dev_ctx ctx = {
1556ebc614f6SRoman Gushchin 		.access_type = (access << 16) | dev_type,
1557ebc614f6SRoman Gushchin 		.major = major,
1558ebc614f6SRoman Gushchin 		.minor = minor,
1559ebc614f6SRoman Gushchin 	};
1560f10d0596SYiFei Zhu 	int ret;
1561ebc614f6SRoman Gushchin 
1562ebc614f6SRoman Gushchin 	rcu_read_lock();
1563ebc614f6SRoman Gushchin 	cgrp = task_dfl_cgroup(current);
1564d9d31cf8SStanislav Fomichev 	ret = bpf_prog_run_array_cg(&cgrp->bpf, atype, &ctx, bpf_prog_run, 0,
1565d9d31cf8SStanislav Fomichev 				    NULL);
1566ebc614f6SRoman Gushchin 	rcu_read_unlock();
1567ebc614f6SRoman Gushchin 
1568f10d0596SYiFei Zhu 	return ret;
1569ebc614f6SRoman Gushchin }
1570ebc614f6SRoman Gushchin 
BPF_CALL_2(bpf_get_local_storage,struct bpf_map *,map,u64,flags)1571dea6a4e1SStanislav Fomichev BPF_CALL_2(bpf_get_local_storage, struct bpf_map *, map, u64, flags)
1572dea6a4e1SStanislav Fomichev {
1573dea6a4e1SStanislav Fomichev 	/* flags argument is not used now,
1574dea6a4e1SStanislav Fomichev 	 * but provides an ability to extend the API.
1575dea6a4e1SStanislav Fomichev 	 * verifier checks that its value is correct.
1576dea6a4e1SStanislav Fomichev 	 */
1577dea6a4e1SStanislav Fomichev 	enum bpf_cgroup_storage_type stype = cgroup_storage_type(map);
1578dea6a4e1SStanislav Fomichev 	struct bpf_cgroup_storage *storage;
1579dea6a4e1SStanislav Fomichev 	struct bpf_cg_run_ctx *ctx;
1580dea6a4e1SStanislav Fomichev 	void *ptr;
1581dea6a4e1SStanislav Fomichev 
1582dea6a4e1SStanislav Fomichev 	/* get current cgroup storage from BPF run context */
1583dea6a4e1SStanislav Fomichev 	ctx = container_of(current->bpf_ctx, struct bpf_cg_run_ctx, run_ctx);
1584dea6a4e1SStanislav Fomichev 	storage = ctx->prog_item->cgroup_storage[stype];
1585dea6a4e1SStanislav Fomichev 
1586dea6a4e1SStanislav Fomichev 	if (stype == BPF_CGROUP_STORAGE_SHARED)
1587dea6a4e1SStanislav Fomichev 		ptr = &READ_ONCE(storage->buf)->data[0];
1588dea6a4e1SStanislav Fomichev 	else
1589dea6a4e1SStanislav Fomichev 		ptr = this_cpu_ptr(storage->percpu_buf);
1590dea6a4e1SStanislav Fomichev 
1591dea6a4e1SStanislav Fomichev 	return (unsigned long)ptr;
1592dea6a4e1SStanislav Fomichev }
1593dea6a4e1SStanislav Fomichev 
1594dea6a4e1SStanislav Fomichev const struct bpf_func_proto bpf_get_local_storage_proto = {
1595dea6a4e1SStanislav Fomichev 	.func		= bpf_get_local_storage,
1596dea6a4e1SStanislav Fomichev 	.gpl_only	= false,
1597dea6a4e1SStanislav Fomichev 	.ret_type	= RET_PTR_TO_MAP_VALUE,
1598dea6a4e1SStanislav Fomichev 	.arg1_type	= ARG_CONST_MAP_PTR,
1599dea6a4e1SStanislav Fomichev 	.arg2_type	= ARG_ANYTHING,
1600dea6a4e1SStanislav Fomichev };
1601dea6a4e1SStanislav Fomichev 
BPF_CALL_0(bpf_get_retval)1602b44123b4SYiFei Zhu BPF_CALL_0(bpf_get_retval)
1603b44123b4SYiFei Zhu {
1604b44123b4SYiFei Zhu 	struct bpf_cg_run_ctx *ctx =
1605b44123b4SYiFei Zhu 		container_of(current->bpf_ctx, struct bpf_cg_run_ctx, run_ctx);
1606b44123b4SYiFei Zhu 
1607b44123b4SYiFei Zhu 	return ctx->retval;
1608b44123b4SYiFei Zhu }
1609b44123b4SYiFei Zhu 
161069fd337aSStanislav Fomichev const struct bpf_func_proto bpf_get_retval_proto = {
1611b44123b4SYiFei Zhu 	.func		= bpf_get_retval,
1612b44123b4SYiFei Zhu 	.gpl_only	= false,
1613b44123b4SYiFei Zhu 	.ret_type	= RET_INTEGER,
1614b44123b4SYiFei Zhu };
1615b44123b4SYiFei Zhu 
BPF_CALL_1(bpf_set_retval,int,retval)1616b44123b4SYiFei Zhu BPF_CALL_1(bpf_set_retval, int, retval)
1617b44123b4SYiFei Zhu {
1618b44123b4SYiFei Zhu 	struct bpf_cg_run_ctx *ctx =
1619b44123b4SYiFei Zhu 		container_of(current->bpf_ctx, struct bpf_cg_run_ctx, run_ctx);
1620b44123b4SYiFei Zhu 
1621b44123b4SYiFei Zhu 	ctx->retval = retval;
1622b44123b4SYiFei Zhu 	return 0;
1623b44123b4SYiFei Zhu }
1624b44123b4SYiFei Zhu 
162569fd337aSStanislav Fomichev const struct bpf_func_proto bpf_set_retval_proto = {
1626b44123b4SYiFei Zhu 	.func		= bpf_set_retval,
1627b44123b4SYiFei Zhu 	.gpl_only	= false,
1628b44123b4SYiFei Zhu 	.ret_type	= RET_INTEGER,
1629b44123b4SYiFei Zhu 	.arg1_type	= ARG_ANYTHING,
1630b44123b4SYiFei Zhu };
1631b44123b4SYiFei Zhu 
1632ebc614f6SRoman Gushchin static const struct bpf_func_proto *
cgroup_dev_func_proto(enum bpf_func_id func_id,const struct bpf_prog * prog)1633dea6a4e1SStanislav Fomichev cgroup_dev_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
1634ebc614f6SRoman Gushchin {
1635dea6a4e1SStanislav Fomichev 	const struct bpf_func_proto *func_proto;
1636dea6a4e1SStanislav Fomichev 
1637dea6a4e1SStanislav Fomichev 	func_proto = cgroup_common_func_proto(func_id, prog);
1638dea6a4e1SStanislav Fomichev 	if (func_proto)
1639dea6a4e1SStanislav Fomichev 		return func_proto;
1640dea6a4e1SStanislav Fomichev 
1641dea6a4e1SStanislav Fomichev 	func_proto = cgroup_current_func_proto(func_id, prog);
1642dea6a4e1SStanislav Fomichev 	if (func_proto)
1643dea6a4e1SStanislav Fomichev 		return func_proto;
1644dea6a4e1SStanislav Fomichev 
1645ebc614f6SRoman Gushchin 	switch (func_id) {
16460456ea17SStanislav Fomichev 	case BPF_FUNC_perf_event_output:
16470456ea17SStanislav Fomichev 		return &bpf_event_output_data_proto;
1648ebc614f6SRoman Gushchin 	default:
16490456ea17SStanislav Fomichev 		return bpf_base_func_proto(func_id);
1650ebc614f6SRoman Gushchin 	}
1651ebc614f6SRoman Gushchin }
1652ebc614f6SRoman Gushchin 
cgroup_dev_is_valid_access(int off,int size,enum bpf_access_type type,const struct bpf_prog * prog,struct bpf_insn_access_aux * info)1653ebc614f6SRoman Gushchin static bool cgroup_dev_is_valid_access(int off, int size,
1654ebc614f6SRoman Gushchin 				       enum bpf_access_type type,
16555e43f899SAndrey Ignatov 				       const struct bpf_prog *prog,
1656ebc614f6SRoman Gushchin 				       struct bpf_insn_access_aux *info)
1657ebc614f6SRoman Gushchin {
165806ef0ccbSYonghong Song 	const int size_default = sizeof(__u32);
165906ef0ccbSYonghong Song 
1660ebc614f6SRoman Gushchin 	if (type == BPF_WRITE)
1661ebc614f6SRoman Gushchin 		return false;
1662ebc614f6SRoman Gushchin 
1663ebc614f6SRoman Gushchin 	if (off < 0 || off + size > sizeof(struct bpf_cgroup_dev_ctx))
1664ebc614f6SRoman Gushchin 		return false;
1665ebc614f6SRoman Gushchin 	/* The verifier guarantees that size > 0. */
1666ebc614f6SRoman Gushchin 	if (off % size != 0)
1667ebc614f6SRoman Gushchin 		return false;
166806ef0ccbSYonghong Song 
166906ef0ccbSYonghong Song 	switch (off) {
167006ef0ccbSYonghong Song 	case bpf_ctx_range(struct bpf_cgroup_dev_ctx, access_type):
167106ef0ccbSYonghong Song 		bpf_ctx_record_field_size(info, size_default);
167206ef0ccbSYonghong Song 		if (!bpf_ctx_narrow_access_ok(off, size, size_default))
1673ebc614f6SRoman Gushchin 			return false;
167406ef0ccbSYonghong Song 		break;
167506ef0ccbSYonghong Song 	default:
167606ef0ccbSYonghong Song 		if (size != size_default)
167706ef0ccbSYonghong Song 			return false;
167806ef0ccbSYonghong Song 	}
1679ebc614f6SRoman Gushchin 
1680ebc614f6SRoman Gushchin 	return true;
1681ebc614f6SRoman Gushchin }
1682ebc614f6SRoman Gushchin 
1683ebc614f6SRoman Gushchin const struct bpf_prog_ops cg_dev_prog_ops = {
1684ebc614f6SRoman Gushchin };
1685ebc614f6SRoman Gushchin 
1686ebc614f6SRoman Gushchin const struct bpf_verifier_ops cg_dev_verifier_ops = {
1687ebc614f6SRoman Gushchin 	.get_func_proto		= cgroup_dev_func_proto,
1688ebc614f6SRoman Gushchin 	.is_valid_access	= cgroup_dev_is_valid_access,
1689ebc614f6SRoman Gushchin };
16907b146cebSAndrey Ignatov 
16917b146cebSAndrey Ignatov /**
16927b146cebSAndrey Ignatov  * __cgroup_bpf_run_filter_sysctl - Run a program on sysctl
16937b146cebSAndrey Ignatov  *
16947b146cebSAndrey Ignatov  * @head: sysctl table header
16957b146cebSAndrey Ignatov  * @table: sysctl table
16967b146cebSAndrey Ignatov  * @write: sysctl is being read (= 0) or written (= 1)
169732927393SChristoph Hellwig  * @buf: pointer to buffer (in and out)
16984e63acdfSAndrey Ignatov  * @pcount: value-result argument: value is size of buffer pointed to by @buf,
16994e63acdfSAndrey Ignatov  *	result is size of @new_buf if program set new value, initial value
17004e63acdfSAndrey Ignatov  *	otherwise
1701e1550bfeSAndrey Ignatov  * @ppos: value-result argument: value is position at which read from or write
1702e1550bfeSAndrey Ignatov  *	to sysctl is happening, result is new position if program overrode it,
1703e1550bfeSAndrey Ignatov  *	initial value otherwise
1704214bfd26SRandy Dunlap  * @atype: type of program to be executed
17057b146cebSAndrey Ignatov  *
17067b146cebSAndrey Ignatov  * Program is run when sysctl is being accessed, either read or written, and
17077b146cebSAndrey Ignatov  * can allow or deny such access.
17087b146cebSAndrey Ignatov  *
17097b146cebSAndrey Ignatov  * This function will return %-EPERM if an attached program is found and
17107b146cebSAndrey Ignatov  * returned value != 1 during execution. In all other cases 0 is returned.
17117b146cebSAndrey Ignatov  */
__cgroup_bpf_run_filter_sysctl(struct ctl_table_header * head,struct ctl_table * table,int write,char ** buf,size_t * pcount,loff_t * ppos,enum cgroup_bpf_attach_type atype)17127b146cebSAndrey Ignatov int __cgroup_bpf_run_filter_sysctl(struct ctl_table_header *head,
17137b146cebSAndrey Ignatov 				   struct ctl_table *table, int write,
17144bd6a735SMatthew Wilcox (Oracle) 				   char **buf, size_t *pcount, loff_t *ppos,
17156fc88c35SDave Marchevsky 				   enum cgroup_bpf_attach_type atype)
17167b146cebSAndrey Ignatov {
17177b146cebSAndrey Ignatov 	struct bpf_sysctl_kern ctx = {
17187b146cebSAndrey Ignatov 		.head = head,
17197b146cebSAndrey Ignatov 		.table = table,
17207b146cebSAndrey Ignatov 		.write = write,
1721e1550bfeSAndrey Ignatov 		.ppos = ppos,
17221d11b301SAndrey Ignatov 		.cur_val = NULL,
17231d11b301SAndrey Ignatov 		.cur_len = PAGE_SIZE,
17244e63acdfSAndrey Ignatov 		.new_val = NULL,
17254e63acdfSAndrey Ignatov 		.new_len = 0,
17264e63acdfSAndrey Ignatov 		.new_updated = 0,
17277b146cebSAndrey Ignatov 	};
17287b146cebSAndrey Ignatov 	struct cgroup *cgrp;
172932927393SChristoph Hellwig 	loff_t pos = 0;
17307b146cebSAndrey Ignatov 	int ret;
17317b146cebSAndrey Ignatov 
17321d11b301SAndrey Ignatov 	ctx.cur_val = kmalloc_track_caller(ctx.cur_len, GFP_KERNEL);
173332927393SChristoph Hellwig 	if (!ctx.cur_val ||
173432927393SChristoph Hellwig 	    table->proc_handler(table, 0, ctx.cur_val, &ctx.cur_len, &pos)) {
17351d11b301SAndrey Ignatov 		/* Let BPF program decide how to proceed. */
17361d11b301SAndrey Ignatov 		ctx.cur_len = 0;
17371d11b301SAndrey Ignatov 	}
17381d11b301SAndrey Ignatov 
173932927393SChristoph Hellwig 	if (write && *buf && *pcount) {
17404e63acdfSAndrey Ignatov 		/* BPF program should be able to override new value with a
17414e63acdfSAndrey Ignatov 		 * buffer bigger than provided by user.
17424e63acdfSAndrey Ignatov 		 */
17434e63acdfSAndrey Ignatov 		ctx.new_val = kmalloc_track_caller(PAGE_SIZE, GFP_KERNEL);
174451356ac8SAndrey Ignatov 		ctx.new_len = min_t(size_t, PAGE_SIZE, *pcount);
174532927393SChristoph Hellwig 		if (ctx.new_val) {
174632927393SChristoph Hellwig 			memcpy(ctx.new_val, *buf, ctx.new_len);
174732927393SChristoph Hellwig 		} else {
17484e63acdfSAndrey Ignatov 			/* Let BPF program decide how to proceed. */
17494e63acdfSAndrey Ignatov 			ctx.new_len = 0;
17504e63acdfSAndrey Ignatov 		}
175132927393SChristoph Hellwig 	}
17524e63acdfSAndrey Ignatov 
17537b146cebSAndrey Ignatov 	rcu_read_lock();
17547b146cebSAndrey Ignatov 	cgrp = task_dfl_cgroup(current);
1755d9d31cf8SStanislav Fomichev 	ret = bpf_prog_run_array_cg(&cgrp->bpf, atype, &ctx, bpf_prog_run, 0,
1756d9d31cf8SStanislav Fomichev 				    NULL);
17577b146cebSAndrey Ignatov 	rcu_read_unlock();
17587b146cebSAndrey Ignatov 
17591d11b301SAndrey Ignatov 	kfree(ctx.cur_val);
17601d11b301SAndrey Ignatov 
17614e63acdfSAndrey Ignatov 	if (ret == 1 && ctx.new_updated) {
176232927393SChristoph Hellwig 		kfree(*buf);
176332927393SChristoph Hellwig 		*buf = ctx.new_val;
17644e63acdfSAndrey Ignatov 		*pcount = ctx.new_len;
17654e63acdfSAndrey Ignatov 	} else {
17664e63acdfSAndrey Ignatov 		kfree(ctx.new_val);
17674e63acdfSAndrey Ignatov 	}
17684e63acdfSAndrey Ignatov 
1769f10d0596SYiFei Zhu 	return ret;
17707b146cebSAndrey Ignatov }
17717b146cebSAndrey Ignatov 
17726705fea0SYueHaibing #ifdef CONFIG_NET
sockopt_alloc_buf(struct bpf_sockopt_kern * ctx,int max_optlen,struct bpf_sockopt_buf * buf)177320f2505fSStanislav Fomichev static int sockopt_alloc_buf(struct bpf_sockopt_kern *ctx, int max_optlen,
177420f2505fSStanislav Fomichev 			     struct bpf_sockopt_buf *buf)
17750d01da6aSStanislav Fomichev {
1776d8fe449aSStanislav Fomichev 	if (unlikely(max_optlen < 0))
17770d01da6aSStanislav Fomichev 		return -EINVAL;
17780d01da6aSStanislav Fomichev 
1779d8fe449aSStanislav Fomichev 	if (unlikely(max_optlen > PAGE_SIZE)) {
1780d8fe449aSStanislav Fomichev 		/* We don't expose optvals that are greater than PAGE_SIZE
1781d8fe449aSStanislav Fomichev 		 * to the BPF program.
1782d8fe449aSStanislav Fomichev 		 */
1783d8fe449aSStanislav Fomichev 		max_optlen = PAGE_SIZE;
1784d8fe449aSStanislav Fomichev 	}
1785d8fe449aSStanislav Fomichev 
178620f2505fSStanislav Fomichev 	if (max_optlen <= sizeof(buf->data)) {
178720f2505fSStanislav Fomichev 		/* When the optval fits into BPF_SOCKOPT_KERN_BUF_SIZE
178820f2505fSStanislav Fomichev 		 * bytes avoid the cost of kzalloc.
178920f2505fSStanislav Fomichev 		 */
179020f2505fSStanislav Fomichev 		ctx->optval = buf->data;
179120f2505fSStanislav Fomichev 		ctx->optval_end = ctx->optval + max_optlen;
179220f2505fSStanislav Fomichev 		return max_optlen;
179320f2505fSStanislav Fomichev 	}
179420f2505fSStanislav Fomichev 
17950d01da6aSStanislav Fomichev 	ctx->optval = kzalloc(max_optlen, GFP_USER);
17960d01da6aSStanislav Fomichev 	if (!ctx->optval)
17970d01da6aSStanislav Fomichev 		return -ENOMEM;
17980d01da6aSStanislav Fomichev 
17990d01da6aSStanislav Fomichev 	ctx->optval_end = ctx->optval + max_optlen;
18000d01da6aSStanislav Fomichev 
1801d8fe449aSStanislav Fomichev 	return max_optlen;
18020d01da6aSStanislav Fomichev }
18030d01da6aSStanislav Fomichev 
sockopt_free_buf(struct bpf_sockopt_kern * ctx,struct bpf_sockopt_buf * buf)180420f2505fSStanislav Fomichev static void sockopt_free_buf(struct bpf_sockopt_kern *ctx,
180520f2505fSStanislav Fomichev 			     struct bpf_sockopt_buf *buf)
18060d01da6aSStanislav Fomichev {
180720f2505fSStanislav Fomichev 	if (ctx->optval == buf->data)
180820f2505fSStanislav Fomichev 		return;
18090d01da6aSStanislav Fomichev 	kfree(ctx->optval);
18100d01da6aSStanislav Fomichev }
18110d01da6aSStanislav Fomichev 
sockopt_buf_allocated(struct bpf_sockopt_kern * ctx,struct bpf_sockopt_buf * buf)181220f2505fSStanislav Fomichev static bool sockopt_buf_allocated(struct bpf_sockopt_kern *ctx,
181320f2505fSStanislav Fomichev 				  struct bpf_sockopt_buf *buf)
181420f2505fSStanislav Fomichev {
181520f2505fSStanislav Fomichev 	return ctx->optval != buf->data;
181620f2505fSStanislav Fomichev }
181720f2505fSStanislav Fomichev 
__cgroup_bpf_run_filter_setsockopt(struct sock * sk,int * level,int * optname,sockptr_t optval,int * optlen,char ** kernel_optval)18180d01da6aSStanislav Fomichev int __cgroup_bpf_run_filter_setsockopt(struct sock *sk, int *level,
181909fba016SBreno Leitao 				       int *optname, sockptr_t optval,
18200d01da6aSStanislav Fomichev 				       int *optlen, char **kernel_optval)
18210d01da6aSStanislav Fomichev {
18220d01da6aSStanislav Fomichev 	struct cgroup *cgrp = sock_cgroup_ptr(&sk->sk_cgrp_data);
182320f2505fSStanislav Fomichev 	struct bpf_sockopt_buf buf = {};
18240d01da6aSStanislav Fomichev 	struct bpf_sockopt_kern ctx = {
18250d01da6aSStanislav Fomichev 		.sk = sk,
18260d01da6aSStanislav Fomichev 		.level = *level,
18270d01da6aSStanislav Fomichev 		.optname = *optname,
18280d01da6aSStanislav Fomichev 	};
18299babe825SStanislav Fomichev 	int ret, max_optlen;
18300d01da6aSStanislav Fomichev 
18319babe825SStanislav Fomichev 	/* Allocate a bit more than the initial user buffer for
18329babe825SStanislav Fomichev 	 * BPF program. The canonical use case is overriding
18339babe825SStanislav Fomichev 	 * TCP_CONGESTION(nv) to TCP_CONGESTION(cubic).
18349babe825SStanislav Fomichev 	 */
18359babe825SStanislav Fomichev 	max_optlen = max_t(int, 16, *optlen);
183620f2505fSStanislav Fomichev 	max_optlen = sockopt_alloc_buf(&ctx, max_optlen, &buf);
1837d8fe449aSStanislav Fomichev 	if (max_optlen < 0)
1838d8fe449aSStanislav Fomichev 		return max_optlen;
18390d01da6aSStanislav Fomichev 
18409babe825SStanislav Fomichev 	ctx.optlen = *optlen;
18419babe825SStanislav Fomichev 
184209fba016SBreno Leitao 	if (copy_from_sockptr(ctx.optval, optval,
184309fba016SBreno Leitao 			      min(*optlen, max_optlen))) {
18440d01da6aSStanislav Fomichev 		ret = -EFAULT;
18450d01da6aSStanislav Fomichev 		goto out;
18460d01da6aSStanislav Fomichev 	}
18470d01da6aSStanislav Fomichev 
18480d01da6aSStanislav Fomichev 	lock_sock(sk);
1849055eb955SStanislav Fomichev 	ret = bpf_prog_run_array_cg(&cgrp->bpf, CGROUP_SETSOCKOPT,
1850d9d31cf8SStanislav Fomichev 				    &ctx, bpf_prog_run, 0, NULL);
18510d01da6aSStanislav Fomichev 	release_sock(sk);
18520d01da6aSStanislav Fomichev 
1853f10d0596SYiFei Zhu 	if (ret)
18540d01da6aSStanislav Fomichev 		goto out;
18550d01da6aSStanislav Fomichev 
18560d01da6aSStanislav Fomichev 	if (ctx.optlen == -1) {
18570d01da6aSStanislav Fomichev 		/* optlen set to -1, bypass kernel */
18580d01da6aSStanislav Fomichev 		ret = 1;
18599babe825SStanislav Fomichev 	} else if (ctx.optlen > max_optlen || ctx.optlen < -1) {
18600d01da6aSStanislav Fomichev 		/* optlen is out of bounds */
186129ebbba7SStanislav Fomichev 		if (*optlen > PAGE_SIZE && ctx.optlen >= 0) {
186229ebbba7SStanislav Fomichev 			pr_info_once("bpf setsockopt: ignoring program buffer with optlen=%d (max_optlen=%d)\n",
186329ebbba7SStanislav Fomichev 				     ctx.optlen, max_optlen);
186429ebbba7SStanislav Fomichev 			ret = 0;
186529ebbba7SStanislav Fomichev 			goto out;
186629ebbba7SStanislav Fomichev 		}
18670d01da6aSStanislav Fomichev 		ret = -EFAULT;
18680d01da6aSStanislav Fomichev 	} else {
18690d01da6aSStanislav Fomichev 		/* optlen within bounds, run kernel handler */
18700d01da6aSStanislav Fomichev 		ret = 0;
18710d01da6aSStanislav Fomichev 
18720d01da6aSStanislav Fomichev 		/* export any potential modifications */
18730d01da6aSStanislav Fomichev 		*level = ctx.level;
18740d01da6aSStanislav Fomichev 		*optname = ctx.optname;
1875d8fe449aSStanislav Fomichev 
1876d8fe449aSStanislav Fomichev 		/* optlen == 0 from BPF indicates that we should
1877d8fe449aSStanislav Fomichev 		 * use original userspace data.
1878d8fe449aSStanislav Fomichev 		 */
1879d8fe449aSStanislav Fomichev 		if (ctx.optlen != 0) {
18800d01da6aSStanislav Fomichev 			*optlen = ctx.optlen;
188120f2505fSStanislav Fomichev 			/* We've used bpf_sockopt_kern->buf as an intermediary
188220f2505fSStanislav Fomichev 			 * storage, but the BPF program indicates that we need
188320f2505fSStanislav Fomichev 			 * to pass this data to the kernel setsockopt handler.
188420f2505fSStanislav Fomichev 			 * No way to export on-stack buf, have to allocate a
188520f2505fSStanislav Fomichev 			 * new buffer.
188620f2505fSStanislav Fomichev 			 */
188720f2505fSStanislav Fomichev 			if (!sockopt_buf_allocated(&ctx, &buf)) {
188820f2505fSStanislav Fomichev 				void *p = kmalloc(ctx.optlen, GFP_USER);
188920f2505fSStanislav Fomichev 
189020f2505fSStanislav Fomichev 				if (!p) {
189120f2505fSStanislav Fomichev 					ret = -ENOMEM;
189220f2505fSStanislav Fomichev 					goto out;
189320f2505fSStanislav Fomichev 				}
189420f2505fSStanislav Fomichev 				memcpy(p, ctx.optval, ctx.optlen);
189520f2505fSStanislav Fomichev 				*kernel_optval = p;
189620f2505fSStanislav Fomichev 			} else {
18970d01da6aSStanislav Fomichev 				*kernel_optval = ctx.optval;
189820f2505fSStanislav Fomichev 			}
18994be34f3dSStanislav Fomichev 			/* export and don't free sockopt buf */
19004be34f3dSStanislav Fomichev 			return 0;
19010d01da6aSStanislav Fomichev 		}
1902d8fe449aSStanislav Fomichev 	}
19030d01da6aSStanislav Fomichev 
19040d01da6aSStanislav Fomichev out:
190520f2505fSStanislav Fomichev 	sockopt_free_buf(&ctx, &buf);
19060d01da6aSStanislav Fomichev 	return ret;
19070d01da6aSStanislav Fomichev }
19080d01da6aSStanislav Fomichev 
__cgroup_bpf_run_filter_getsockopt(struct sock * sk,int level,int optname,sockptr_t optval,sockptr_t optlen,int max_optlen,int retval)19090d01da6aSStanislav Fomichev int __cgroup_bpf_run_filter_getsockopt(struct sock *sk, int level,
19104a746fb2SBreno Leitao 				       int optname, sockptr_t optval,
19114a746fb2SBreno Leitao 				       sockptr_t optlen, int max_optlen,
19120d01da6aSStanislav Fomichev 				       int retval)
19130d01da6aSStanislav Fomichev {
19140d01da6aSStanislav Fomichev 	struct cgroup *cgrp = sock_cgroup_ptr(&sk->sk_cgrp_data);
191520f2505fSStanislav Fomichev 	struct bpf_sockopt_buf buf = {};
19160d01da6aSStanislav Fomichev 	struct bpf_sockopt_kern ctx = {
19170d01da6aSStanislav Fomichev 		.sk = sk,
19180d01da6aSStanislav Fomichev 		.level = level,
19190d01da6aSStanislav Fomichev 		.optname = optname,
1920c4dcfdd4SYiFei Zhu 		.current_task = current,
19210d01da6aSStanislav Fomichev 	};
192229ebbba7SStanislav Fomichev 	int orig_optlen;
19230d01da6aSStanislav Fomichev 	int ret;
19240d01da6aSStanislav Fomichev 
192529ebbba7SStanislav Fomichev 	orig_optlen = max_optlen;
19269babe825SStanislav Fomichev 	ctx.optlen = max_optlen;
192720f2505fSStanislav Fomichev 	max_optlen = sockopt_alloc_buf(&ctx, max_optlen, &buf);
1928d8fe449aSStanislav Fomichev 	if (max_optlen < 0)
1929d8fe449aSStanislav Fomichev 		return max_optlen;
1930d8fe449aSStanislav Fomichev 
19310d01da6aSStanislav Fomichev 	if (!retval) {
19320d01da6aSStanislav Fomichev 		/* If kernel getsockopt finished successfully,
19330d01da6aSStanislav Fomichev 		 * copy whatever was returned to the user back
19340d01da6aSStanislav Fomichev 		 * into our temporary buffer. Set optlen to the
19350d01da6aSStanislav Fomichev 		 * one that kernel returned as well to let
19360d01da6aSStanislav Fomichev 		 * BPF programs inspect the value.
19370d01da6aSStanislav Fomichev 		 */
19384a746fb2SBreno Leitao 		if (copy_from_sockptr(&ctx.optlen, optlen,
19394a746fb2SBreno Leitao 				      sizeof(ctx.optlen))) {
19400d01da6aSStanislav Fomichev 			ret = -EFAULT;
19410d01da6aSStanislav Fomichev 			goto out;
19420d01da6aSStanislav Fomichev 		}
19430d01da6aSStanislav Fomichev 
1944bb8b81e3SLoris Reiff 		if (ctx.optlen < 0) {
1945bb8b81e3SLoris Reiff 			ret = -EFAULT;
1946bb8b81e3SLoris Reiff 			goto out;
1947bb8b81e3SLoris Reiff 		}
194829ebbba7SStanislav Fomichev 		orig_optlen = ctx.optlen;
1949bb8b81e3SLoris Reiff 
19504a746fb2SBreno Leitao 		if (copy_from_sockptr(ctx.optval, optval,
19514a746fb2SBreno Leitao 				      min(ctx.optlen, max_optlen))) {
19520d01da6aSStanislav Fomichev 			ret = -EFAULT;
19530d01da6aSStanislav Fomichev 			goto out;
19540d01da6aSStanislav Fomichev 		}
19550d01da6aSStanislav Fomichev 	}
19560d01da6aSStanislav Fomichev 
19570d01da6aSStanislav Fomichev 	lock_sock(sk);
1958055eb955SStanislav Fomichev 	ret = bpf_prog_run_array_cg(&cgrp->bpf, CGROUP_GETSOCKOPT,
1959d9d31cf8SStanislav Fomichev 				    &ctx, bpf_prog_run, retval, NULL);
19600d01da6aSStanislav Fomichev 	release_sock(sk);
19610d01da6aSStanislav Fomichev 
1962c4dcfdd4SYiFei Zhu 	if (ret < 0)
19630d01da6aSStanislav Fomichev 		goto out;
19640d01da6aSStanislav Fomichev 
19654a746fb2SBreno Leitao 	if (!sockptr_is_null(optval) &&
19664a746fb2SBreno Leitao 	    (ctx.optlen > max_optlen || ctx.optlen < 0)) {
196729ebbba7SStanislav Fomichev 		if (orig_optlen > PAGE_SIZE && ctx.optlen >= 0) {
196829ebbba7SStanislav Fomichev 			pr_info_once("bpf getsockopt: ignoring program buffer with optlen=%d (max_optlen=%d)\n",
196929ebbba7SStanislav Fomichev 				     ctx.optlen, max_optlen);
197029ebbba7SStanislav Fomichev 			ret = retval;
197129ebbba7SStanislav Fomichev 			goto out;
197229ebbba7SStanislav Fomichev 		}
19730d01da6aSStanislav Fomichev 		ret = -EFAULT;
19740d01da6aSStanislav Fomichev 		goto out;
19750d01da6aSStanislav Fomichev 	}
19760d01da6aSStanislav Fomichev 
1977d8fe449aSStanislav Fomichev 	if (ctx.optlen != 0) {
19784a746fb2SBreno Leitao 		if (!sockptr_is_null(optval) &&
19794a746fb2SBreno Leitao 		    copy_to_sockptr(optval, ctx.optval, ctx.optlen)) {
198000e74ae0SStanislav Fomichev 			ret = -EFAULT;
198100e74ae0SStanislav Fomichev 			goto out;
198200e74ae0SStanislav Fomichev 		}
19834a746fb2SBreno Leitao 		if (copy_to_sockptr(optlen, &ctx.optlen, sizeof(ctx.optlen))) {
19840d01da6aSStanislav Fomichev 			ret = -EFAULT;
19850d01da6aSStanislav Fomichev 			goto out;
19860d01da6aSStanislav Fomichev 		}
1987d8fe449aSStanislav Fomichev 	}
19880d01da6aSStanislav Fomichev 
19890d01da6aSStanislav Fomichev out:
199020f2505fSStanislav Fomichev 	sockopt_free_buf(&ctx, &buf);
19910d01da6aSStanislav Fomichev 	return ret;
19920d01da6aSStanislav Fomichev }
19939cacf81fSStanislav Fomichev 
__cgroup_bpf_run_filter_getsockopt_kern(struct sock * sk,int level,int optname,void * optval,int * optlen,int retval)19949cacf81fSStanislav Fomichev int __cgroup_bpf_run_filter_getsockopt_kern(struct sock *sk, int level,
19959cacf81fSStanislav Fomichev 					    int optname, void *optval,
19969cacf81fSStanislav Fomichev 					    int *optlen, int retval)
19979cacf81fSStanislav Fomichev {
19989cacf81fSStanislav Fomichev 	struct cgroup *cgrp = sock_cgroup_ptr(&sk->sk_cgrp_data);
19999cacf81fSStanislav Fomichev 	struct bpf_sockopt_kern ctx = {
20009cacf81fSStanislav Fomichev 		.sk = sk,
20019cacf81fSStanislav Fomichev 		.level = level,
20029cacf81fSStanislav Fomichev 		.optname = optname,
20039cacf81fSStanislav Fomichev 		.optlen = *optlen,
20049cacf81fSStanislav Fomichev 		.optval = optval,
20059cacf81fSStanislav Fomichev 		.optval_end = optval + *optlen,
2006c4dcfdd4SYiFei Zhu 		.current_task = current,
20079cacf81fSStanislav Fomichev 	};
20089cacf81fSStanislav Fomichev 	int ret;
20099cacf81fSStanislav Fomichev 
20109cacf81fSStanislav Fomichev 	/* Note that __cgroup_bpf_run_filter_getsockopt doesn't copy
20119cacf81fSStanislav Fomichev 	 * user data back into BPF buffer when reval != 0. This is
20129cacf81fSStanislav Fomichev 	 * done as an optimization to avoid extra copy, assuming
20139cacf81fSStanislav Fomichev 	 * kernel won't populate the data in case of an error.
20149cacf81fSStanislav Fomichev 	 * Here we always pass the data and memset() should
20159cacf81fSStanislav Fomichev 	 * be called if that data shouldn't be "exported".
20169cacf81fSStanislav Fomichev 	 */
20179cacf81fSStanislav Fomichev 
2018055eb955SStanislav Fomichev 	ret = bpf_prog_run_array_cg(&cgrp->bpf, CGROUP_GETSOCKOPT,
2019d9d31cf8SStanislav Fomichev 				    &ctx, bpf_prog_run, retval, NULL);
2020c4dcfdd4SYiFei Zhu 	if (ret < 0)
2021f10d0596SYiFei Zhu 		return ret;
20229cacf81fSStanislav Fomichev 
20239cacf81fSStanislav Fomichev 	if (ctx.optlen > *optlen)
20249cacf81fSStanislav Fomichev 		return -EFAULT;
20259cacf81fSStanislav Fomichev 
20269cacf81fSStanislav Fomichev 	/* BPF programs can shrink the buffer, export the modifications.
20279cacf81fSStanislav Fomichev 	 */
20289cacf81fSStanislav Fomichev 	if (ctx.optlen != 0)
20299cacf81fSStanislav Fomichev 		*optlen = ctx.optlen;
20309cacf81fSStanislav Fomichev 
2031c4dcfdd4SYiFei Zhu 	return ret;
20329cacf81fSStanislav Fomichev }
20336705fea0SYueHaibing #endif
20340d01da6aSStanislav Fomichev 
sysctl_cpy_dir(const struct ctl_dir * dir,char ** bufp,size_t * lenp)2035808649fbSAndrey Ignatov static ssize_t sysctl_cpy_dir(const struct ctl_dir *dir, char **bufp,
2036808649fbSAndrey Ignatov 			      size_t *lenp)
2037808649fbSAndrey Ignatov {
2038808649fbSAndrey Ignatov 	ssize_t tmp_ret = 0, ret;
2039808649fbSAndrey Ignatov 
2040808649fbSAndrey Ignatov 	if (dir->header.parent) {
2041808649fbSAndrey Ignatov 		tmp_ret = sysctl_cpy_dir(dir->header.parent, bufp, lenp);
2042808649fbSAndrey Ignatov 		if (tmp_ret < 0)
2043808649fbSAndrey Ignatov 			return tmp_ret;
2044808649fbSAndrey Ignatov 	}
2045808649fbSAndrey Ignatov 
2046808649fbSAndrey Ignatov 	ret = strscpy(*bufp, dir->header.ctl_table[0].procname, *lenp);
2047808649fbSAndrey Ignatov 	if (ret < 0)
2048808649fbSAndrey Ignatov 		return ret;
2049808649fbSAndrey Ignatov 	*bufp += ret;
2050808649fbSAndrey Ignatov 	*lenp -= ret;
2051808649fbSAndrey Ignatov 	ret += tmp_ret;
2052808649fbSAndrey Ignatov 
2053808649fbSAndrey Ignatov 	/* Avoid leading slash. */
2054808649fbSAndrey Ignatov 	if (!ret)
2055808649fbSAndrey Ignatov 		return ret;
2056808649fbSAndrey Ignatov 
2057808649fbSAndrey Ignatov 	tmp_ret = strscpy(*bufp, "/", *lenp);
2058808649fbSAndrey Ignatov 	if (tmp_ret < 0)
2059808649fbSAndrey Ignatov 		return tmp_ret;
2060808649fbSAndrey Ignatov 	*bufp += tmp_ret;
2061808649fbSAndrey Ignatov 	*lenp -= tmp_ret;
2062808649fbSAndrey Ignatov 
2063808649fbSAndrey Ignatov 	return ret + tmp_ret;
2064808649fbSAndrey Ignatov }
2065808649fbSAndrey Ignatov 
BPF_CALL_4(bpf_sysctl_get_name,struct bpf_sysctl_kern *,ctx,char *,buf,size_t,buf_len,u64,flags)2066808649fbSAndrey Ignatov BPF_CALL_4(bpf_sysctl_get_name, struct bpf_sysctl_kern *, ctx, char *, buf,
2067808649fbSAndrey Ignatov 	   size_t, buf_len, u64, flags)
2068808649fbSAndrey Ignatov {
2069808649fbSAndrey Ignatov 	ssize_t tmp_ret = 0, ret;
2070808649fbSAndrey Ignatov 
2071808649fbSAndrey Ignatov 	if (!buf)
2072808649fbSAndrey Ignatov 		return -EINVAL;
2073808649fbSAndrey Ignatov 
2074808649fbSAndrey Ignatov 	if (!(flags & BPF_F_SYSCTL_BASE_NAME)) {
2075808649fbSAndrey Ignatov 		if (!ctx->head)
2076808649fbSAndrey Ignatov 			return -EINVAL;
2077808649fbSAndrey Ignatov 		tmp_ret = sysctl_cpy_dir(ctx->head->parent, &buf, &buf_len);
2078808649fbSAndrey Ignatov 		if (tmp_ret < 0)
2079808649fbSAndrey Ignatov 			return tmp_ret;
2080808649fbSAndrey Ignatov 	}
2081808649fbSAndrey Ignatov 
2082808649fbSAndrey Ignatov 	ret = strscpy(buf, ctx->table->procname, buf_len);
2083808649fbSAndrey Ignatov 
2084808649fbSAndrey Ignatov 	return ret < 0 ? ret : tmp_ret + ret;
2085808649fbSAndrey Ignatov }
2086808649fbSAndrey Ignatov 
2087808649fbSAndrey Ignatov static const struct bpf_func_proto bpf_sysctl_get_name_proto = {
2088808649fbSAndrey Ignatov 	.func		= bpf_sysctl_get_name,
2089808649fbSAndrey Ignatov 	.gpl_only	= false,
2090808649fbSAndrey Ignatov 	.ret_type	= RET_INTEGER,
2091808649fbSAndrey Ignatov 	.arg1_type	= ARG_PTR_TO_CTX,
2092808649fbSAndrey Ignatov 	.arg2_type	= ARG_PTR_TO_MEM,
2093808649fbSAndrey Ignatov 	.arg3_type	= ARG_CONST_SIZE,
2094808649fbSAndrey Ignatov 	.arg4_type	= ARG_ANYTHING,
2095808649fbSAndrey Ignatov };
2096808649fbSAndrey Ignatov 
copy_sysctl_value(char * dst,size_t dst_len,char * src,size_t src_len)20971d11b301SAndrey Ignatov static int copy_sysctl_value(char *dst, size_t dst_len, char *src,
20981d11b301SAndrey Ignatov 			     size_t src_len)
20991d11b301SAndrey Ignatov {
21001d11b301SAndrey Ignatov 	if (!dst)
21011d11b301SAndrey Ignatov 		return -EINVAL;
21021d11b301SAndrey Ignatov 
21031d11b301SAndrey Ignatov 	if (!dst_len)
21041d11b301SAndrey Ignatov 		return -E2BIG;
21051d11b301SAndrey Ignatov 
21061d11b301SAndrey Ignatov 	if (!src || !src_len) {
21071d11b301SAndrey Ignatov 		memset(dst, 0, dst_len);
21081d11b301SAndrey Ignatov 		return -EINVAL;
21091d11b301SAndrey Ignatov 	}
21101d11b301SAndrey Ignatov 
21111d11b301SAndrey Ignatov 	memcpy(dst, src, min(dst_len, src_len));
21121d11b301SAndrey Ignatov 
21131d11b301SAndrey Ignatov 	if (dst_len > src_len) {
21141d11b301SAndrey Ignatov 		memset(dst + src_len, '\0', dst_len - src_len);
21151d11b301SAndrey Ignatov 		return src_len;
21161d11b301SAndrey Ignatov 	}
21171d11b301SAndrey Ignatov 
21181d11b301SAndrey Ignatov 	dst[dst_len - 1] = '\0';
21191d11b301SAndrey Ignatov 
21201d11b301SAndrey Ignatov 	return -E2BIG;
21211d11b301SAndrey Ignatov }
21221d11b301SAndrey Ignatov 
BPF_CALL_3(bpf_sysctl_get_current_value,struct bpf_sysctl_kern *,ctx,char *,buf,size_t,buf_len)21231d11b301SAndrey Ignatov BPF_CALL_3(bpf_sysctl_get_current_value, struct bpf_sysctl_kern *, ctx,
21241d11b301SAndrey Ignatov 	   char *, buf, size_t, buf_len)
21251d11b301SAndrey Ignatov {
21261d11b301SAndrey Ignatov 	return copy_sysctl_value(buf, buf_len, ctx->cur_val, ctx->cur_len);
21271d11b301SAndrey Ignatov }
21281d11b301SAndrey Ignatov 
21291d11b301SAndrey Ignatov static const struct bpf_func_proto bpf_sysctl_get_current_value_proto = {
21301d11b301SAndrey Ignatov 	.func		= bpf_sysctl_get_current_value,
21311d11b301SAndrey Ignatov 	.gpl_only	= false,
21321d11b301SAndrey Ignatov 	.ret_type	= RET_INTEGER,
21331d11b301SAndrey Ignatov 	.arg1_type	= ARG_PTR_TO_CTX,
21341d11b301SAndrey Ignatov 	.arg2_type	= ARG_PTR_TO_UNINIT_MEM,
21351d11b301SAndrey Ignatov 	.arg3_type	= ARG_CONST_SIZE,
21361d11b301SAndrey Ignatov };
21371d11b301SAndrey Ignatov 
BPF_CALL_3(bpf_sysctl_get_new_value,struct bpf_sysctl_kern *,ctx,char *,buf,size_t,buf_len)21384e63acdfSAndrey Ignatov BPF_CALL_3(bpf_sysctl_get_new_value, struct bpf_sysctl_kern *, ctx, char *, buf,
21394e63acdfSAndrey Ignatov 	   size_t, buf_len)
21404e63acdfSAndrey Ignatov {
21414e63acdfSAndrey Ignatov 	if (!ctx->write) {
21424e63acdfSAndrey Ignatov 		if (buf && buf_len)
21434e63acdfSAndrey Ignatov 			memset(buf, '\0', buf_len);
21444e63acdfSAndrey Ignatov 		return -EINVAL;
21454e63acdfSAndrey Ignatov 	}
21464e63acdfSAndrey Ignatov 	return copy_sysctl_value(buf, buf_len, ctx->new_val, ctx->new_len);
21474e63acdfSAndrey Ignatov }
21484e63acdfSAndrey Ignatov 
21494e63acdfSAndrey Ignatov static const struct bpf_func_proto bpf_sysctl_get_new_value_proto = {
21504e63acdfSAndrey Ignatov 	.func		= bpf_sysctl_get_new_value,
21514e63acdfSAndrey Ignatov 	.gpl_only	= false,
21524e63acdfSAndrey Ignatov 	.ret_type	= RET_INTEGER,
21534e63acdfSAndrey Ignatov 	.arg1_type	= ARG_PTR_TO_CTX,
21544e63acdfSAndrey Ignatov 	.arg2_type	= ARG_PTR_TO_UNINIT_MEM,
21554e63acdfSAndrey Ignatov 	.arg3_type	= ARG_CONST_SIZE,
21564e63acdfSAndrey Ignatov };
21574e63acdfSAndrey Ignatov 
BPF_CALL_3(bpf_sysctl_set_new_value,struct bpf_sysctl_kern *,ctx,const char *,buf,size_t,buf_len)21584e63acdfSAndrey Ignatov BPF_CALL_3(bpf_sysctl_set_new_value, struct bpf_sysctl_kern *, ctx,
21594e63acdfSAndrey Ignatov 	   const char *, buf, size_t, buf_len)
21604e63acdfSAndrey Ignatov {
21614e63acdfSAndrey Ignatov 	if (!ctx->write || !ctx->new_val || !ctx->new_len || !buf || !buf_len)
21624e63acdfSAndrey Ignatov 		return -EINVAL;
21634e63acdfSAndrey Ignatov 
21644e63acdfSAndrey Ignatov 	if (buf_len > PAGE_SIZE - 1)
21654e63acdfSAndrey Ignatov 		return -E2BIG;
21664e63acdfSAndrey Ignatov 
21674e63acdfSAndrey Ignatov 	memcpy(ctx->new_val, buf, buf_len);
21684e63acdfSAndrey Ignatov 	ctx->new_len = buf_len;
21694e63acdfSAndrey Ignatov 	ctx->new_updated = 1;
21704e63acdfSAndrey Ignatov 
21714e63acdfSAndrey Ignatov 	return 0;
21724e63acdfSAndrey Ignatov }
21734e63acdfSAndrey Ignatov 
21744e63acdfSAndrey Ignatov static const struct bpf_func_proto bpf_sysctl_set_new_value_proto = {
21754e63acdfSAndrey Ignatov 	.func		= bpf_sysctl_set_new_value,
21764e63acdfSAndrey Ignatov 	.gpl_only	= false,
21774e63acdfSAndrey Ignatov 	.ret_type	= RET_INTEGER,
21784e63acdfSAndrey Ignatov 	.arg1_type	= ARG_PTR_TO_CTX,
2179216e3cd2SHao Luo 	.arg2_type	= ARG_PTR_TO_MEM | MEM_RDONLY,
21804e63acdfSAndrey Ignatov 	.arg3_type	= ARG_CONST_SIZE,
21814e63acdfSAndrey Ignatov };
21824e63acdfSAndrey Ignatov 
21837b146cebSAndrey Ignatov static const struct bpf_func_proto *
sysctl_func_proto(enum bpf_func_id func_id,const struct bpf_prog * prog)21847b146cebSAndrey Ignatov sysctl_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
21857b146cebSAndrey Ignatov {
2186dea6a4e1SStanislav Fomichev 	const struct bpf_func_proto *func_proto;
2187dea6a4e1SStanislav Fomichev 
2188dea6a4e1SStanislav Fomichev 	func_proto = cgroup_common_func_proto(func_id, prog);
2189dea6a4e1SStanislav Fomichev 	if (func_proto)
2190dea6a4e1SStanislav Fomichev 		return func_proto;
2191dea6a4e1SStanislav Fomichev 
2192dea6a4e1SStanislav Fomichev 	func_proto = cgroup_current_func_proto(func_id, prog);
2193dea6a4e1SStanislav Fomichev 	if (func_proto)
2194dea6a4e1SStanislav Fomichev 		return func_proto;
2195dea6a4e1SStanislav Fomichev 
2196808649fbSAndrey Ignatov 	switch (func_id) {
2197808649fbSAndrey Ignatov 	case BPF_FUNC_sysctl_get_name:
2198808649fbSAndrey Ignatov 		return &bpf_sysctl_get_name_proto;
21991d11b301SAndrey Ignatov 	case BPF_FUNC_sysctl_get_current_value:
22001d11b301SAndrey Ignatov 		return &bpf_sysctl_get_current_value_proto;
22014e63acdfSAndrey Ignatov 	case BPF_FUNC_sysctl_get_new_value:
22024e63acdfSAndrey Ignatov 		return &bpf_sysctl_get_new_value_proto;
22034e63acdfSAndrey Ignatov 	case BPF_FUNC_sysctl_set_new_value:
22044e63acdfSAndrey Ignatov 		return &bpf_sysctl_set_new_value_proto;
22055e0bc308SDmitrii Banshchikov 	case BPF_FUNC_ktime_get_coarse_ns:
22065e0bc308SDmitrii Banshchikov 		return &bpf_ktime_get_coarse_ns_proto;
2207dea6a4e1SStanislav Fomichev 	case BPF_FUNC_perf_event_output:
2208dea6a4e1SStanislav Fomichev 		return &bpf_event_output_data_proto;
2209808649fbSAndrey Ignatov 	default:
2210dea6a4e1SStanislav Fomichev 		return bpf_base_func_proto(func_id);
22117b146cebSAndrey Ignatov 	}
2212808649fbSAndrey Ignatov }
22137b146cebSAndrey Ignatov 
sysctl_is_valid_access(int off,int size,enum bpf_access_type type,const struct bpf_prog * prog,struct bpf_insn_access_aux * info)22147b146cebSAndrey Ignatov static bool sysctl_is_valid_access(int off, int size, enum bpf_access_type type,
22157b146cebSAndrey Ignatov 				   const struct bpf_prog *prog,
22167b146cebSAndrey Ignatov 				   struct bpf_insn_access_aux *info)
22177b146cebSAndrey Ignatov {
22187b146cebSAndrey Ignatov 	const int size_default = sizeof(__u32);
22197b146cebSAndrey Ignatov 
2220e1550bfeSAndrey Ignatov 	if (off < 0 || off + size > sizeof(struct bpf_sysctl) || off % size)
22217b146cebSAndrey Ignatov 		return false;
22227b146cebSAndrey Ignatov 
22237b146cebSAndrey Ignatov 	switch (off) {
22247541c87cSIlya Leoshkevich 	case bpf_ctx_range(struct bpf_sysctl, write):
2225e1550bfeSAndrey Ignatov 		if (type != BPF_READ)
2226e1550bfeSAndrey Ignatov 			return false;
22277b146cebSAndrey Ignatov 		bpf_ctx_record_field_size(info, size_default);
22287b146cebSAndrey Ignatov 		return bpf_ctx_narrow_access_ok(off, size, size_default);
22297541c87cSIlya Leoshkevich 	case bpf_ctx_range(struct bpf_sysctl, file_pos):
2230e1550bfeSAndrey Ignatov 		if (type == BPF_READ) {
2231e1550bfeSAndrey Ignatov 			bpf_ctx_record_field_size(info, size_default);
2232e1550bfeSAndrey Ignatov 			return bpf_ctx_narrow_access_ok(off, size, size_default);
2233e1550bfeSAndrey Ignatov 		} else {
2234e1550bfeSAndrey Ignatov 			return size == size_default;
2235e1550bfeSAndrey Ignatov 		}
22367b146cebSAndrey Ignatov 	default:
22377b146cebSAndrey Ignatov 		return false;
22387b146cebSAndrey Ignatov 	}
22397b146cebSAndrey Ignatov }
22407b146cebSAndrey Ignatov 
sysctl_convert_ctx_access(enum bpf_access_type type,const struct bpf_insn * si,struct bpf_insn * insn_buf,struct bpf_prog * prog,u32 * target_size)22417b146cebSAndrey Ignatov static u32 sysctl_convert_ctx_access(enum bpf_access_type type,
22427b146cebSAndrey Ignatov 				     const struct bpf_insn *si,
22437b146cebSAndrey Ignatov 				     struct bpf_insn *insn_buf,
22447b146cebSAndrey Ignatov 				     struct bpf_prog *prog, u32 *target_size)
22457b146cebSAndrey Ignatov {
22467b146cebSAndrey Ignatov 	struct bpf_insn *insn = insn_buf;
2247d895a0f1SIlya Leoshkevich 	u32 read_size;
22487b146cebSAndrey Ignatov 
22497b146cebSAndrey Ignatov 	switch (si->off) {
22507b146cebSAndrey Ignatov 	case offsetof(struct bpf_sysctl, write):
22517b146cebSAndrey Ignatov 		*insn++ = BPF_LDX_MEM(
22527b146cebSAndrey Ignatov 			BPF_SIZE(si->code), si->dst_reg, si->src_reg,
22537b146cebSAndrey Ignatov 			bpf_target_off(struct bpf_sysctl_kern, write,
2254c593642cSPankaj Bharadiya 				       sizeof_field(struct bpf_sysctl_kern,
22557b146cebSAndrey Ignatov 						    write),
22567b146cebSAndrey Ignatov 				       target_size));
22577b146cebSAndrey Ignatov 		break;
2258e1550bfeSAndrey Ignatov 	case offsetof(struct bpf_sysctl, file_pos):
2259e1550bfeSAndrey Ignatov 		/* ppos is a pointer so it should be accessed via indirect
2260e1550bfeSAndrey Ignatov 		 * loads and stores. Also for stores additional temporary
2261e1550bfeSAndrey Ignatov 		 * register is used since neither src_reg nor dst_reg can be
2262e1550bfeSAndrey Ignatov 		 * overridden.
2263e1550bfeSAndrey Ignatov 		 */
2264e1550bfeSAndrey Ignatov 		if (type == BPF_WRITE) {
2265e1550bfeSAndrey Ignatov 			int treg = BPF_REG_9;
2266e1550bfeSAndrey Ignatov 
2267e1550bfeSAndrey Ignatov 			if (si->src_reg == treg || si->dst_reg == treg)
2268e1550bfeSAndrey Ignatov 				--treg;
2269e1550bfeSAndrey Ignatov 			if (si->src_reg == treg || si->dst_reg == treg)
2270e1550bfeSAndrey Ignatov 				--treg;
2271e1550bfeSAndrey Ignatov 			*insn++ = BPF_STX_MEM(
2272e1550bfeSAndrey Ignatov 				BPF_DW, si->dst_reg, treg,
2273e1550bfeSAndrey Ignatov 				offsetof(struct bpf_sysctl_kern, tmp_reg));
2274e1550bfeSAndrey Ignatov 			*insn++ = BPF_LDX_MEM(
2275e1550bfeSAndrey Ignatov 				BPF_FIELD_SIZEOF(struct bpf_sysctl_kern, ppos),
2276e1550bfeSAndrey Ignatov 				treg, si->dst_reg,
2277e1550bfeSAndrey Ignatov 				offsetof(struct bpf_sysctl_kern, ppos));
22780d80a619SEduard Zingerman 			*insn++ = BPF_RAW_INSN(
22790d80a619SEduard Zingerman 				BPF_CLASS(si->code) | BPF_MEM | BPF_SIZEOF(u32),
22800d80a619SEduard Zingerman 				treg, si->src_reg,
2281d895a0f1SIlya Leoshkevich 				bpf_ctx_narrow_access_offset(
22820d80a619SEduard Zingerman 					0, sizeof(u32), sizeof(loff_t)),
22830d80a619SEduard Zingerman 				si->imm);
2284e1550bfeSAndrey Ignatov 			*insn++ = BPF_LDX_MEM(
2285e1550bfeSAndrey Ignatov 				BPF_DW, treg, si->dst_reg,
2286e1550bfeSAndrey Ignatov 				offsetof(struct bpf_sysctl_kern, tmp_reg));
2287e1550bfeSAndrey Ignatov 		} else {
2288e1550bfeSAndrey Ignatov 			*insn++ = BPF_LDX_MEM(
2289e1550bfeSAndrey Ignatov 				BPF_FIELD_SIZEOF(struct bpf_sysctl_kern, ppos),
2290e1550bfeSAndrey Ignatov 				si->dst_reg, si->src_reg,
2291e1550bfeSAndrey Ignatov 				offsetof(struct bpf_sysctl_kern, ppos));
2292d895a0f1SIlya Leoshkevich 			read_size = bpf_size_to_bytes(BPF_SIZE(si->code));
2293e1550bfeSAndrey Ignatov 			*insn++ = BPF_LDX_MEM(
2294d895a0f1SIlya Leoshkevich 				BPF_SIZE(si->code), si->dst_reg, si->dst_reg,
2295d895a0f1SIlya Leoshkevich 				bpf_ctx_narrow_access_offset(
2296d895a0f1SIlya Leoshkevich 					0, read_size, sizeof(loff_t)));
2297e1550bfeSAndrey Ignatov 		}
2298e1550bfeSAndrey Ignatov 		*target_size = sizeof(u32);
2299e1550bfeSAndrey Ignatov 		break;
23007b146cebSAndrey Ignatov 	}
23017b146cebSAndrey Ignatov 
23027b146cebSAndrey Ignatov 	return insn - insn_buf;
23037b146cebSAndrey Ignatov }
23047b146cebSAndrey Ignatov 
23057b146cebSAndrey Ignatov const struct bpf_verifier_ops cg_sysctl_verifier_ops = {
23067b146cebSAndrey Ignatov 	.get_func_proto		= sysctl_func_proto,
23077b146cebSAndrey Ignatov 	.is_valid_access	= sysctl_is_valid_access,
23087b146cebSAndrey Ignatov 	.convert_ctx_access	= sysctl_convert_ctx_access,
23097b146cebSAndrey Ignatov };
23107b146cebSAndrey Ignatov 
23117b146cebSAndrey Ignatov const struct bpf_prog_ops cg_sysctl_prog_ops = {
23127b146cebSAndrey Ignatov };
23130d01da6aSStanislav Fomichev 
2314f1248deeSStanislav Fomichev #ifdef CONFIG_NET
BPF_CALL_1(bpf_get_netns_cookie_sockopt,struct bpf_sockopt_kern *,ctx)2315f1248deeSStanislav Fomichev BPF_CALL_1(bpf_get_netns_cookie_sockopt, struct bpf_sockopt_kern *, ctx)
2316f1248deeSStanislav Fomichev {
2317f1248deeSStanislav Fomichev 	const struct net *net = ctx ? sock_net(ctx->sk) : &init_net;
2318f1248deeSStanislav Fomichev 
2319f1248deeSStanislav Fomichev 	return net->net_cookie;
2320f1248deeSStanislav Fomichev }
2321f1248deeSStanislav Fomichev 
2322f1248deeSStanislav Fomichev static const struct bpf_func_proto bpf_get_netns_cookie_sockopt_proto = {
2323f1248deeSStanislav Fomichev 	.func		= bpf_get_netns_cookie_sockopt,
2324f1248deeSStanislav Fomichev 	.gpl_only	= false,
2325f1248deeSStanislav Fomichev 	.ret_type	= RET_INTEGER,
2326f1248deeSStanislav Fomichev 	.arg1_type	= ARG_PTR_TO_CTX_OR_NULL,
2327f1248deeSStanislav Fomichev };
2328f1248deeSStanislav Fomichev #endif
2329f1248deeSStanislav Fomichev 
23300d01da6aSStanislav Fomichev static const struct bpf_func_proto *
cg_sockopt_func_proto(enum bpf_func_id func_id,const struct bpf_prog * prog)23310d01da6aSStanislav Fomichev cg_sockopt_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
23320d01da6aSStanislav Fomichev {
2333dea6a4e1SStanislav Fomichev 	const struct bpf_func_proto *func_proto;
2334dea6a4e1SStanislav Fomichev 
2335dea6a4e1SStanislav Fomichev 	func_proto = cgroup_common_func_proto(func_id, prog);
2336dea6a4e1SStanislav Fomichev 	if (func_proto)
2337dea6a4e1SStanislav Fomichev 		return func_proto;
2338dea6a4e1SStanislav Fomichev 
2339dea6a4e1SStanislav Fomichev 	func_proto = cgroup_current_func_proto(func_id, prog);
2340dea6a4e1SStanislav Fomichev 	if (func_proto)
2341dea6a4e1SStanislav Fomichev 		return func_proto;
2342dea6a4e1SStanislav Fomichev 
23430d01da6aSStanislav Fomichev 	switch (func_id) {
23446705fea0SYueHaibing #ifdef CONFIG_NET
2345f1248deeSStanislav Fomichev 	case BPF_FUNC_get_netns_cookie:
2346f1248deeSStanislav Fomichev 		return &bpf_get_netns_cookie_sockopt_proto;
23470d01da6aSStanislav Fomichev 	case BPF_FUNC_sk_storage_get:
23480d01da6aSStanislav Fomichev 		return &bpf_sk_storage_get_proto;
23490d01da6aSStanislav Fomichev 	case BPF_FUNC_sk_storage_delete:
23500d01da6aSStanislav Fomichev 		return &bpf_sk_storage_delete_proto;
23512c531639SPrankur Gupta 	case BPF_FUNC_setsockopt:
23522c531639SPrankur Gupta 		if (prog->expected_attach_type == BPF_CGROUP_SETSOCKOPT)
23532c531639SPrankur Gupta 			return &bpf_sk_setsockopt_proto;
23542c531639SPrankur Gupta 		return NULL;
23552c531639SPrankur Gupta 	case BPF_FUNC_getsockopt:
23562c531639SPrankur Gupta 		if (prog->expected_attach_type == BPF_CGROUP_SETSOCKOPT)
23572c531639SPrankur Gupta 			return &bpf_sk_getsockopt_proto;
23582c531639SPrankur Gupta 		return NULL;
23596705fea0SYueHaibing #endif
23600d01da6aSStanislav Fomichev #ifdef CONFIG_INET
23610d01da6aSStanislav Fomichev 	case BPF_FUNC_tcp_sock:
23620d01da6aSStanislav Fomichev 		return &bpf_tcp_sock_proto;
23630d01da6aSStanislav Fomichev #endif
2364dea6a4e1SStanislav Fomichev 	case BPF_FUNC_perf_event_output:
2365dea6a4e1SStanislav Fomichev 		return &bpf_event_output_data_proto;
23660d01da6aSStanislav Fomichev 	default:
2367dea6a4e1SStanislav Fomichev 		return bpf_base_func_proto(func_id);
23680d01da6aSStanislav Fomichev 	}
23690d01da6aSStanislav Fomichev }
23700d01da6aSStanislav Fomichev 
cg_sockopt_is_valid_access(int off,int size,enum bpf_access_type type,const struct bpf_prog * prog,struct bpf_insn_access_aux * info)23710d01da6aSStanislav Fomichev static bool cg_sockopt_is_valid_access(int off, int size,
23720d01da6aSStanislav Fomichev 				       enum bpf_access_type type,
23730d01da6aSStanislav Fomichev 				       const struct bpf_prog *prog,
23740d01da6aSStanislav Fomichev 				       struct bpf_insn_access_aux *info)
23750d01da6aSStanislav Fomichev {
23760d01da6aSStanislav Fomichev 	const int size_default = sizeof(__u32);
23770d01da6aSStanislav Fomichev 
23780d01da6aSStanislav Fomichev 	if (off < 0 || off >= sizeof(struct bpf_sockopt))
23790d01da6aSStanislav Fomichev 		return false;
23800d01da6aSStanislav Fomichev 
23810d01da6aSStanislav Fomichev 	if (off % size != 0)
23820d01da6aSStanislav Fomichev 		return false;
23830d01da6aSStanislav Fomichev 
23840d01da6aSStanislav Fomichev 	if (type == BPF_WRITE) {
23850d01da6aSStanislav Fomichev 		switch (off) {
23860d01da6aSStanislav Fomichev 		case offsetof(struct bpf_sockopt, retval):
23870d01da6aSStanislav Fomichev 			if (size != size_default)
23880d01da6aSStanislav Fomichev 				return false;
23890d01da6aSStanislav Fomichev 			return prog->expected_attach_type ==
23900d01da6aSStanislav Fomichev 				BPF_CGROUP_GETSOCKOPT;
23910d01da6aSStanislav Fomichev 		case offsetof(struct bpf_sockopt, optname):
2392df561f66SGustavo A. R. Silva 			fallthrough;
23930d01da6aSStanislav Fomichev 		case offsetof(struct bpf_sockopt, level):
23940d01da6aSStanislav Fomichev 			if (size != size_default)
23950d01da6aSStanislav Fomichev 				return false;
23960d01da6aSStanislav Fomichev 			return prog->expected_attach_type ==
23970d01da6aSStanislav Fomichev 				BPF_CGROUP_SETSOCKOPT;
23980d01da6aSStanislav Fomichev 		case offsetof(struct bpf_sockopt, optlen):
23990d01da6aSStanislav Fomichev 			return size == size_default;
24000d01da6aSStanislav Fomichev 		default:
24010d01da6aSStanislav Fomichev 			return false;
24020d01da6aSStanislav Fomichev 		}
24030d01da6aSStanislav Fomichev 	}
24040d01da6aSStanislav Fomichev 
24050d01da6aSStanislav Fomichev 	switch (off) {
24060d01da6aSStanislav Fomichev 	case offsetof(struct bpf_sockopt, sk):
24070d01da6aSStanislav Fomichev 		if (size != sizeof(__u64))
24080d01da6aSStanislav Fomichev 			return false;
24090d01da6aSStanislav Fomichev 		info->reg_type = PTR_TO_SOCKET;
24100d01da6aSStanislav Fomichev 		break;
24110d01da6aSStanislav Fomichev 	case offsetof(struct bpf_sockopt, optval):
24120d01da6aSStanislav Fomichev 		if (size != sizeof(__u64))
24130d01da6aSStanislav Fomichev 			return false;
24140d01da6aSStanislav Fomichev 		info->reg_type = PTR_TO_PACKET;
24150d01da6aSStanislav Fomichev 		break;
24160d01da6aSStanislav Fomichev 	case offsetof(struct bpf_sockopt, optval_end):
24170d01da6aSStanislav Fomichev 		if (size != sizeof(__u64))
24180d01da6aSStanislav Fomichev 			return false;
24190d01da6aSStanislav Fomichev 		info->reg_type = PTR_TO_PACKET_END;
24200d01da6aSStanislav Fomichev 		break;
24210d01da6aSStanislav Fomichev 	case offsetof(struct bpf_sockopt, retval):
24220d01da6aSStanislav Fomichev 		if (size != size_default)
24230d01da6aSStanislav Fomichev 			return false;
24240d01da6aSStanislav Fomichev 		return prog->expected_attach_type == BPF_CGROUP_GETSOCKOPT;
24250d01da6aSStanislav Fomichev 	default:
24260d01da6aSStanislav Fomichev 		if (size != size_default)
24270d01da6aSStanislav Fomichev 			return false;
24280d01da6aSStanislav Fomichev 		break;
24290d01da6aSStanislav Fomichev 	}
24300d01da6aSStanislav Fomichev 	return true;
24310d01da6aSStanislav Fomichev }
24320d01da6aSStanislav Fomichev 
24330d80a619SEduard Zingerman #define CG_SOCKOPT_READ_FIELD(F)					\
24340d80a619SEduard Zingerman 	BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct bpf_sockopt_kern, F),	\
24350d01da6aSStanislav Fomichev 		    si->dst_reg, si->src_reg,				\
24360d01da6aSStanislav Fomichev 		    offsetof(struct bpf_sockopt_kern, F))
24370d01da6aSStanislav Fomichev 
24380d80a619SEduard Zingerman #define CG_SOCKOPT_WRITE_FIELD(F)					\
24390d80a619SEduard Zingerman 	BPF_RAW_INSN((BPF_FIELD_SIZEOF(struct bpf_sockopt_kern, F) |	\
24400d80a619SEduard Zingerman 		      BPF_MEM | BPF_CLASS(si->code)),			\
24410d80a619SEduard Zingerman 		     si->dst_reg, si->src_reg,				\
24420d80a619SEduard Zingerman 		     offsetof(struct bpf_sockopt_kern, F),		\
24430d80a619SEduard Zingerman 		     si->imm)
24440d80a619SEduard Zingerman 
cg_sockopt_convert_ctx_access(enum bpf_access_type type,const struct bpf_insn * si,struct bpf_insn * insn_buf,struct bpf_prog * prog,u32 * target_size)24450d01da6aSStanislav Fomichev static u32 cg_sockopt_convert_ctx_access(enum bpf_access_type type,
24460d01da6aSStanislav Fomichev 					 const struct bpf_insn *si,
24470d01da6aSStanislav Fomichev 					 struct bpf_insn *insn_buf,
24480d01da6aSStanislav Fomichev 					 struct bpf_prog *prog,
24490d01da6aSStanislav Fomichev 					 u32 *target_size)
24500d01da6aSStanislav Fomichev {
24510d01da6aSStanislav Fomichev 	struct bpf_insn *insn = insn_buf;
24520d01da6aSStanislav Fomichev 
24530d01da6aSStanislav Fomichev 	switch (si->off) {
24540d01da6aSStanislav Fomichev 	case offsetof(struct bpf_sockopt, sk):
24550d80a619SEduard Zingerman 		*insn++ = CG_SOCKOPT_READ_FIELD(sk);
24560d01da6aSStanislav Fomichev 		break;
24570d01da6aSStanislav Fomichev 	case offsetof(struct bpf_sockopt, level):
24580d01da6aSStanislav Fomichev 		if (type == BPF_WRITE)
24590d80a619SEduard Zingerman 			*insn++ = CG_SOCKOPT_WRITE_FIELD(level);
24600d01da6aSStanislav Fomichev 		else
24610d80a619SEduard Zingerman 			*insn++ = CG_SOCKOPT_READ_FIELD(level);
24620d01da6aSStanislav Fomichev 		break;
24630d01da6aSStanislav Fomichev 	case offsetof(struct bpf_sockopt, optname):
24640d01da6aSStanislav Fomichev 		if (type == BPF_WRITE)
24650d80a619SEduard Zingerman 			*insn++ = CG_SOCKOPT_WRITE_FIELD(optname);
24660d01da6aSStanislav Fomichev 		else
24670d80a619SEduard Zingerman 			*insn++ = CG_SOCKOPT_READ_FIELD(optname);
24680d01da6aSStanislav Fomichev 		break;
24690d01da6aSStanislav Fomichev 	case offsetof(struct bpf_sockopt, optlen):
24700d01da6aSStanislav Fomichev 		if (type == BPF_WRITE)
24710d80a619SEduard Zingerman 			*insn++ = CG_SOCKOPT_WRITE_FIELD(optlen);
24720d01da6aSStanislav Fomichev 		else
24730d80a619SEduard Zingerman 			*insn++ = CG_SOCKOPT_READ_FIELD(optlen);
24740d01da6aSStanislav Fomichev 		break;
24750d01da6aSStanislav Fomichev 	case offsetof(struct bpf_sockopt, retval):
2476c4dcfdd4SYiFei Zhu 		BUILD_BUG_ON(offsetof(struct bpf_cg_run_ctx, run_ctx) != 0);
2477c4dcfdd4SYiFei Zhu 
2478c4dcfdd4SYiFei Zhu 		if (type == BPF_WRITE) {
2479c4dcfdd4SYiFei Zhu 			int treg = BPF_REG_9;
2480c4dcfdd4SYiFei Zhu 
2481c4dcfdd4SYiFei Zhu 			if (si->src_reg == treg || si->dst_reg == treg)
2482c4dcfdd4SYiFei Zhu 				--treg;
2483c4dcfdd4SYiFei Zhu 			if (si->src_reg == treg || si->dst_reg == treg)
2484c4dcfdd4SYiFei Zhu 				--treg;
2485c4dcfdd4SYiFei Zhu 			*insn++ = BPF_STX_MEM(BPF_DW, si->dst_reg, treg,
2486c4dcfdd4SYiFei Zhu 					      offsetof(struct bpf_sockopt_kern, tmp_reg));
2487c4dcfdd4SYiFei Zhu 			*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct bpf_sockopt_kern, current_task),
2488c4dcfdd4SYiFei Zhu 					      treg, si->dst_reg,
2489c4dcfdd4SYiFei Zhu 					      offsetof(struct bpf_sockopt_kern, current_task));
2490c4dcfdd4SYiFei Zhu 			*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct task_struct, bpf_ctx),
2491c4dcfdd4SYiFei Zhu 					      treg, treg,
2492c4dcfdd4SYiFei Zhu 					      offsetof(struct task_struct, bpf_ctx));
24930d80a619SEduard Zingerman 			*insn++ = BPF_RAW_INSN(BPF_CLASS(si->code) | BPF_MEM |
24940d80a619SEduard Zingerman 					       BPF_FIELD_SIZEOF(struct bpf_cg_run_ctx, retval),
2495c4dcfdd4SYiFei Zhu 					       treg, si->src_reg,
24960d80a619SEduard Zingerman 					       offsetof(struct bpf_cg_run_ctx, retval),
24970d80a619SEduard Zingerman 					       si->imm);
2498c4dcfdd4SYiFei Zhu 			*insn++ = BPF_LDX_MEM(BPF_DW, treg, si->dst_reg,
2499c4dcfdd4SYiFei Zhu 					      offsetof(struct bpf_sockopt_kern, tmp_reg));
2500c4dcfdd4SYiFei Zhu 		} else {
2501c4dcfdd4SYiFei Zhu 			*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct bpf_sockopt_kern, current_task),
2502c4dcfdd4SYiFei Zhu 					      si->dst_reg, si->src_reg,
2503c4dcfdd4SYiFei Zhu 					      offsetof(struct bpf_sockopt_kern, current_task));
2504c4dcfdd4SYiFei Zhu 			*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct task_struct, bpf_ctx),
2505c4dcfdd4SYiFei Zhu 					      si->dst_reg, si->dst_reg,
2506c4dcfdd4SYiFei Zhu 					      offsetof(struct task_struct, bpf_ctx));
2507c4dcfdd4SYiFei Zhu 			*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct bpf_cg_run_ctx, retval),
2508c4dcfdd4SYiFei Zhu 					      si->dst_reg, si->dst_reg,
2509c4dcfdd4SYiFei Zhu 					      offsetof(struct bpf_cg_run_ctx, retval));
2510c4dcfdd4SYiFei Zhu 		}
25110d01da6aSStanislav Fomichev 		break;
25120d01da6aSStanislav Fomichev 	case offsetof(struct bpf_sockopt, optval):
25130d80a619SEduard Zingerman 		*insn++ = CG_SOCKOPT_READ_FIELD(optval);
25140d01da6aSStanislav Fomichev 		break;
25150d01da6aSStanislav Fomichev 	case offsetof(struct bpf_sockopt, optval_end):
25160d80a619SEduard Zingerman 		*insn++ = CG_SOCKOPT_READ_FIELD(optval_end);
25170d01da6aSStanislav Fomichev 		break;
25180d01da6aSStanislav Fomichev 	}
25190d01da6aSStanislav Fomichev 
25200d01da6aSStanislav Fomichev 	return insn - insn_buf;
25210d01da6aSStanislav Fomichev }
25220d01da6aSStanislav Fomichev 
cg_sockopt_get_prologue(struct bpf_insn * insn_buf,bool direct_write,const struct bpf_prog * prog)25230d01da6aSStanislav Fomichev static int cg_sockopt_get_prologue(struct bpf_insn *insn_buf,
25240d01da6aSStanislav Fomichev 				   bool direct_write,
25250d01da6aSStanislav Fomichev 				   const struct bpf_prog *prog)
25260d01da6aSStanislav Fomichev {
25270d01da6aSStanislav Fomichev 	/* Nothing to do for sockopt argument. The data is kzalloc'ated.
25280d01da6aSStanislav Fomichev 	 */
25290d01da6aSStanislav Fomichev 	return 0;
25300d01da6aSStanislav Fomichev }
25310d01da6aSStanislav Fomichev 
25320d01da6aSStanislav Fomichev const struct bpf_verifier_ops cg_sockopt_verifier_ops = {
25330d01da6aSStanislav Fomichev 	.get_func_proto		= cg_sockopt_func_proto,
25340d01da6aSStanislav Fomichev 	.is_valid_access	= cg_sockopt_is_valid_access,
25350d01da6aSStanislav Fomichev 	.convert_ctx_access	= cg_sockopt_convert_ctx_access,
25360d01da6aSStanislav Fomichev 	.gen_prologue		= cg_sockopt_get_prologue,
25370d01da6aSStanislav Fomichev };
25380d01da6aSStanislav Fomichev 
25390d01da6aSStanislav Fomichev const struct bpf_prog_ops cg_sockopt_prog_ops = {
25400d01da6aSStanislav Fomichev };
2541dea6a4e1SStanislav Fomichev 
2542dea6a4e1SStanislav Fomichev /* Common helpers for cgroup hooks. */
2543dea6a4e1SStanislav Fomichev const struct bpf_func_proto *
cgroup_common_func_proto(enum bpf_func_id func_id,const struct bpf_prog * prog)2544dea6a4e1SStanislav Fomichev cgroup_common_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
2545dea6a4e1SStanislav Fomichev {
2546dea6a4e1SStanislav Fomichev 	switch (func_id) {
2547dea6a4e1SStanislav Fomichev 	case BPF_FUNC_get_local_storage:
2548dea6a4e1SStanislav Fomichev 		return &bpf_get_local_storage_proto;
2549dea6a4e1SStanislav Fomichev 	case BPF_FUNC_get_retval:
2550bed89185SStanislav Fomichev 		switch (prog->expected_attach_type) {
2551bed89185SStanislav Fomichev 		case BPF_CGROUP_INET_INGRESS:
2552bed89185SStanislav Fomichev 		case BPF_CGROUP_INET_EGRESS:
2553bed89185SStanislav Fomichev 		case BPF_CGROUP_SOCK_OPS:
2554bed89185SStanislav Fomichev 		case BPF_CGROUP_UDP4_RECVMSG:
2555bed89185SStanislav Fomichev 		case BPF_CGROUP_UDP6_RECVMSG:
2556bed89185SStanislav Fomichev 		case BPF_CGROUP_INET4_GETPEERNAME:
2557bed89185SStanislav Fomichev 		case BPF_CGROUP_INET6_GETPEERNAME:
2558bed89185SStanislav Fomichev 		case BPF_CGROUP_INET4_GETSOCKNAME:
2559bed89185SStanislav Fomichev 		case BPF_CGROUP_INET6_GETSOCKNAME:
2560bed89185SStanislav Fomichev 			return NULL;
2561bed89185SStanislav Fomichev 		default:
2562dea6a4e1SStanislav Fomichev 			return &bpf_get_retval_proto;
2563bed89185SStanislav Fomichev 		}
2564dea6a4e1SStanislav Fomichev 	case BPF_FUNC_set_retval:
2565bed89185SStanislav Fomichev 		switch (prog->expected_attach_type) {
2566bed89185SStanislav Fomichev 		case BPF_CGROUP_INET_INGRESS:
2567bed89185SStanislav Fomichev 		case BPF_CGROUP_INET_EGRESS:
2568bed89185SStanislav Fomichev 		case BPF_CGROUP_SOCK_OPS:
2569bed89185SStanislav Fomichev 		case BPF_CGROUP_UDP4_RECVMSG:
2570bed89185SStanislav Fomichev 		case BPF_CGROUP_UDP6_RECVMSG:
2571bed89185SStanislav Fomichev 		case BPF_CGROUP_INET4_GETPEERNAME:
2572bed89185SStanislav Fomichev 		case BPF_CGROUP_INET6_GETPEERNAME:
2573bed89185SStanislav Fomichev 		case BPF_CGROUP_INET4_GETSOCKNAME:
2574bed89185SStanislav Fomichev 		case BPF_CGROUP_INET6_GETSOCKNAME:
2575bed89185SStanislav Fomichev 			return NULL;
2576bed89185SStanislav Fomichev 		default:
2577dea6a4e1SStanislav Fomichev 			return &bpf_set_retval_proto;
2578bed89185SStanislav Fomichev 		}
2579dea6a4e1SStanislav Fomichev 	default:
2580dea6a4e1SStanislav Fomichev 		return NULL;
2581dea6a4e1SStanislav Fomichev 	}
2582dea6a4e1SStanislav Fomichev }
2583dea6a4e1SStanislav Fomichev 
2584dea6a4e1SStanislav Fomichev /* Common helpers for cgroup hooks with valid process context. */
2585dea6a4e1SStanislav Fomichev const struct bpf_func_proto *
cgroup_current_func_proto(enum bpf_func_id func_id,const struct bpf_prog * prog)2586dea6a4e1SStanislav Fomichev cgroup_current_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
2587dea6a4e1SStanislav Fomichev {
2588dea6a4e1SStanislav Fomichev 	switch (func_id) {
2589dea6a4e1SStanislav Fomichev 	case BPF_FUNC_get_current_uid_gid:
2590dea6a4e1SStanislav Fomichev 		return &bpf_get_current_uid_gid_proto;
2591bed89185SStanislav Fomichev 	case BPF_FUNC_get_current_pid_tgid:
2592bed89185SStanislav Fomichev 		return &bpf_get_current_pid_tgid_proto;
2593bed89185SStanislav Fomichev 	case BPF_FUNC_get_current_comm:
2594bed89185SStanislav Fomichev 		return &bpf_get_current_comm_proto;
2595bed89185SStanislav Fomichev #ifdef CONFIG_CGROUP_NET_CLASSID
2596bed89185SStanislav Fomichev 	case BPF_FUNC_get_cgroup_classid:
2597bed89185SStanislav Fomichev 		return &bpf_get_cgroup_classid_curr_proto;
2598bed89185SStanislav Fomichev #endif
2599dea6a4e1SStanislav Fomichev 	default:
2600dea6a4e1SStanislav Fomichev 		return NULL;
2601dea6a4e1SStanislav Fomichev 	}
2602dea6a4e1SStanislav Fomichev }
2603