xref: /openbmc/linux/kernel/bpf/helpers.c (revision 1a931707ad4a46e79d4ecfee56d8f6e8cc8d4f28)
15b497af4SThomas Gleixner // SPDX-License-Identifier: GPL-2.0-only
2d0003ec0SAlexei Starovoitov /* Copyright (c) 2011-2014 PLUMgrid, http://plumgrid.com
3d0003ec0SAlexei Starovoitov  */
4d0003ec0SAlexei Starovoitov #include <linux/bpf.h>
53bd916eeSYonghong Song #include <linux/btf.h>
6aef2fedaSJakub Kicinski #include <linux/bpf-cgroup.h>
7fda01efcSDavid Vernet #include <linux/cgroup.h>
8d0003ec0SAlexei Starovoitov #include <linux/rcupdate.h>
903e69b50SDaniel Borkmann #include <linux/random.h>
10c04167ceSDaniel Borkmann #include <linux/smp.h>
112d0e30c3SDaniel Borkmann #include <linux/topology.h>
1217ca8cbfSDaniel Borkmann #include <linux/ktime.h>
13ffeedafbSAlexei Starovoitov #include <linux/sched.h>
14ffeedafbSAlexei Starovoitov #include <linux/uidgid.h>
15f3694e00SDaniel Borkmann #include <linux/filter.h>
16d7a4cb9bSAndrey Ignatov #include <linux/ctype.h>
175576b991SMartin KaFai Lau #include <linux/jiffies.h>
18b4490c5cSCarlos Neira #include <linux/pid_namespace.h>
1947e34cb7SDave Marchevsky #include <linux/poison.h>
20b4490c5cSCarlos Neira #include <linux/proc_ns.h>
21ff40e510SDaniel Borkmann #include <linux/sched/task.h>
22376040e4SKenny Yu #include <linux/security.h>
23958cf2e2SKumar Kartikeya Dwivedi #include <linux/btf_ids.h>
24d7a4cb9bSAndrey Ignatov #include <linux/bpf_mem_alloc.h>
25d7a4cb9bSAndrey Ignatov 
26d0003ec0SAlexei Starovoitov #include "../../lib/kstrtox.h"
27d0003ec0SAlexei Starovoitov 
28d0003ec0SAlexei Starovoitov /* If kernel subsystem is allowing eBPF programs to call this function,
29d0003ec0SAlexei Starovoitov  * inside its own verifier_ops->get_func_proto() callback it should return
30d0003ec0SAlexei Starovoitov  * bpf_map_lookup_elem_proto, so that verifier can properly check the arguments
31d0003ec0SAlexei Starovoitov  *
32d0003ec0SAlexei Starovoitov  * Different map implementations will rely on rcu in map methods
33d0003ec0SAlexei Starovoitov  * lookup/update/delete, therefore eBPF programs must run under rcu lock
34d0003ec0SAlexei Starovoitov  * if program is allowed to access maps, so check rcu_read_lock_held() or
35d0003ec0SAlexei Starovoitov  * rcu_read_lock_trace_held() in all three functions.
36f3694e00SDaniel Borkmann  */
BPF_CALL_2(bpf_map_lookup_elem,struct bpf_map *,map,void *,key)37d0003ec0SAlexei Starovoitov BPF_CALL_2(bpf_map_lookup_elem, struct bpf_map *, map, void *, key)
38694cea39SToke Høiland-Jørgensen {
39f3694e00SDaniel Borkmann 	WARN_ON_ONCE(!rcu_read_lock_held() && !rcu_read_lock_trace_held() &&
40d0003ec0SAlexei Starovoitov 		     !rcu_read_lock_bh_held());
41d0003ec0SAlexei Starovoitov 	return (unsigned long) map->ops->map_lookup_elem(map, key);
42a2c83fffSDaniel Borkmann }
43d0003ec0SAlexei Starovoitov 
44d0003ec0SAlexei Starovoitov const struct bpf_func_proto bpf_map_lookup_elem_proto = {
4536bbef52SDaniel Borkmann 	.func		= bpf_map_lookup_elem,
46d0003ec0SAlexei Starovoitov 	.gpl_only	= false,
47d0003ec0SAlexei Starovoitov 	.pkt_access	= true,
48d0003ec0SAlexei Starovoitov 	.ret_type	= RET_PTR_TO_MAP_VALUE_OR_NULL,
49d0003ec0SAlexei Starovoitov 	.arg1_type	= ARG_CONST_MAP_PTR,
50d0003ec0SAlexei Starovoitov 	.arg2_type	= ARG_PTR_TO_MAP_KEY,
51f3694e00SDaniel Borkmann };
52f3694e00SDaniel Borkmann 
BPF_CALL_4(bpf_map_update_elem,struct bpf_map *,map,void *,key,void *,value,u64,flags)53d0003ec0SAlexei Starovoitov BPF_CALL_4(bpf_map_update_elem, struct bpf_map *, map, void *, key,
54694cea39SToke Høiland-Jørgensen 	   void *, value, u64, flags)
55f3694e00SDaniel Borkmann {
56d0003ec0SAlexei Starovoitov 	WARN_ON_ONCE(!rcu_read_lock_held() && !rcu_read_lock_trace_held() &&
57d0003ec0SAlexei Starovoitov 		     !rcu_read_lock_bh_held());
58a2c83fffSDaniel Borkmann 	return map->ops->map_update_elem(map, key, value, flags);
59d0003ec0SAlexei Starovoitov }
60d0003ec0SAlexei Starovoitov 
6136bbef52SDaniel Borkmann const struct bpf_func_proto bpf_map_update_elem_proto = {
62d0003ec0SAlexei Starovoitov 	.func		= bpf_map_update_elem,
63d0003ec0SAlexei Starovoitov 	.gpl_only	= false,
64d0003ec0SAlexei Starovoitov 	.pkt_access	= true,
65d0003ec0SAlexei Starovoitov 	.ret_type	= RET_INTEGER,
66d0003ec0SAlexei Starovoitov 	.arg1_type	= ARG_CONST_MAP_PTR,
67d0003ec0SAlexei Starovoitov 	.arg2_type	= ARG_PTR_TO_MAP_KEY,
68d0003ec0SAlexei Starovoitov 	.arg3_type	= ARG_PTR_TO_MAP_VALUE,
69f3694e00SDaniel Borkmann 	.arg4_type	= ARG_ANYTHING,
70d0003ec0SAlexei Starovoitov };
71694cea39SToke Høiland-Jørgensen 
BPF_CALL_2(bpf_map_delete_elem,struct bpf_map *,map,void *,key)72d0003ec0SAlexei Starovoitov BPF_CALL_2(bpf_map_delete_elem, struct bpf_map *, map, void *, key)
73d0003ec0SAlexei Starovoitov {
74d0003ec0SAlexei Starovoitov 	WARN_ON_ONCE(!rcu_read_lock_held() && !rcu_read_lock_trace_held() &&
75a2c83fffSDaniel Borkmann 		     !rcu_read_lock_bh_held());
76d0003ec0SAlexei Starovoitov 	return map->ops->map_delete_elem(map, key);
77d0003ec0SAlexei Starovoitov }
7836bbef52SDaniel Borkmann 
79d0003ec0SAlexei Starovoitov const struct bpf_func_proto bpf_map_delete_elem_proto = {
80d0003ec0SAlexei Starovoitov 	.func		= bpf_map_delete_elem,
81d0003ec0SAlexei Starovoitov 	.gpl_only	= false,
82d0003ec0SAlexei Starovoitov 	.pkt_access	= true,
8303e69b50SDaniel Borkmann 	.ret_type	= RET_INTEGER,
84f1a2e44aSMauricio Vasquez B 	.arg1_type	= ARG_CONST_MAP_PTR,
85f1a2e44aSMauricio Vasquez B 	.arg2_type	= ARG_PTR_TO_MAP_KEY,
86f1a2e44aSMauricio Vasquez B };
87f1a2e44aSMauricio Vasquez B 
BPF_CALL_3(bpf_map_push_elem,struct bpf_map *,map,void *,value,u64,flags)88f1a2e44aSMauricio Vasquez B BPF_CALL_3(bpf_map_push_elem, struct bpf_map *, map, void *, value, u64, flags)
89f1a2e44aSMauricio Vasquez B {
90f1a2e44aSMauricio Vasquez B 	return map->ops->map_push_elem(map, value, flags);
91f1a2e44aSMauricio Vasquez B }
92f1a2e44aSMauricio Vasquez B 
93f1a2e44aSMauricio Vasquez B const struct bpf_func_proto bpf_map_push_elem_proto = {
94f1a2e44aSMauricio Vasquez B 	.func		= bpf_map_push_elem,
95f1a2e44aSMauricio Vasquez B 	.gpl_only	= false,
96f1a2e44aSMauricio Vasquez B 	.pkt_access	= true,
97f1a2e44aSMauricio Vasquez B 	.ret_type	= RET_INTEGER,
98f1a2e44aSMauricio Vasquez B 	.arg1_type	= ARG_CONST_MAP_PTR,
99f1a2e44aSMauricio Vasquez B 	.arg2_type	= ARG_PTR_TO_MAP_VALUE,
100f1a2e44aSMauricio Vasquez B 	.arg3_type	= ARG_ANYTHING,
101f1a2e44aSMauricio Vasquez B };
102f1a2e44aSMauricio Vasquez B 
BPF_CALL_2(bpf_map_pop_elem,struct bpf_map *,map,void *,value)103f1a2e44aSMauricio Vasquez B BPF_CALL_2(bpf_map_pop_elem, struct bpf_map *, map, void *, value)
104f1a2e44aSMauricio Vasquez B {
105f1a2e44aSMauricio Vasquez B 	return map->ops->map_pop_elem(map, value);
106f1a2e44aSMauricio Vasquez B }
107f1a2e44aSMauricio Vasquez B 
108f1a2e44aSMauricio Vasquez B const struct bpf_func_proto bpf_map_pop_elem_proto = {
10916d1e00cSJoanne Koong 	.func		= bpf_map_pop_elem,
110f1a2e44aSMauricio Vasquez B 	.gpl_only	= false,
111f1a2e44aSMauricio Vasquez B 	.ret_type	= RET_INTEGER,
112f1a2e44aSMauricio Vasquez B 	.arg1_type	= ARG_CONST_MAP_PTR,
113f1a2e44aSMauricio Vasquez B 	.arg2_type	= ARG_PTR_TO_MAP_VALUE | MEM_UNINIT | MEM_WRITE,
114f1a2e44aSMauricio Vasquez B };
115f1a2e44aSMauricio Vasquez B 
BPF_CALL_2(bpf_map_peek_elem,struct bpf_map *,map,void *,value)116f1a2e44aSMauricio Vasquez B BPF_CALL_2(bpf_map_peek_elem, struct bpf_map *, map, void *, value)
117f1a2e44aSMauricio Vasquez B {
118301a33d5SMircea Cirjaliu 	return map->ops->map_peek_elem(map, value);
119f1a2e44aSMauricio Vasquez B }
120f1a2e44aSMauricio Vasquez B 
121f1a2e44aSMauricio Vasquez B const struct bpf_func_proto bpf_map_peek_elem_proto = {
12216d1e00cSJoanne Koong 	.func		= bpf_map_peek_elem,
123f1a2e44aSMauricio Vasquez B 	.gpl_only	= false,
124f1a2e44aSMauricio Vasquez B 	.ret_type	= RET_INTEGER,
12507343110SFeng Zhou 	.arg1_type	= ARG_CONST_MAP_PTR,
12607343110SFeng Zhou 	.arg2_type	= ARG_PTR_TO_MAP_VALUE | MEM_UNINIT | MEM_WRITE,
12707343110SFeng Zhou };
12807343110SFeng Zhou 
BPF_CALL_3(bpf_map_lookup_percpu_elem,struct bpf_map *,map,void *,key,u32,cpu)12907343110SFeng Zhou BPF_CALL_3(bpf_map_lookup_percpu_elem, struct bpf_map *, map, void *, key, u32, cpu)
13007343110SFeng Zhou {
13107343110SFeng Zhou 	WARN_ON_ONCE(!rcu_read_lock_held() && !rcu_read_lock_bh_held());
13207343110SFeng Zhou 	return (unsigned long) map->ops->map_lookup_percpu_elem(map, key, cpu);
13307343110SFeng Zhou }
13407343110SFeng Zhou 
13507343110SFeng Zhou const struct bpf_func_proto bpf_map_lookup_percpu_elem_proto = {
13607343110SFeng Zhou 	.func		= bpf_map_lookup_percpu_elem,
13707343110SFeng Zhou 	.gpl_only	= false,
13807343110SFeng Zhou 	.pkt_access	= true,
13907343110SFeng Zhou 	.ret_type	= RET_PTR_TO_MAP_VALUE_OR_NULL,
14007343110SFeng Zhou 	.arg1_type	= ARG_CONST_MAP_PTR,
14103e69b50SDaniel Borkmann 	.arg2_type	= ARG_PTR_TO_MAP_KEY,
1423ad00405SDaniel Borkmann 	.arg3_type	= ARG_ANYTHING,
14303e69b50SDaniel Borkmann };
14403e69b50SDaniel Borkmann 
14503e69b50SDaniel Borkmann const struct bpf_func_proto bpf_get_prandom_u32_proto = {
146c04167ceSDaniel Borkmann 	.func		= bpf_user_rnd_u32,
147f3694e00SDaniel Borkmann 	.gpl_only	= false,
148c04167ceSDaniel Borkmann 	.ret_type	= RET_INTEGER,
14980b48c44SDaniel Borkmann };
150c04167ceSDaniel Borkmann 
BPF_CALL_0(bpf_get_smp_processor_id)151c04167ceSDaniel Borkmann BPF_CALL_0(bpf_get_smp_processor_id)
152c04167ceSDaniel Borkmann {
153c04167ceSDaniel Borkmann 	return smp_processor_id();
154c04167ceSDaniel Borkmann }
155c04167ceSDaniel Borkmann 
156c04167ceSDaniel Borkmann const struct bpf_func_proto bpf_get_smp_processor_id_proto = {
15717ca8cbfSDaniel Borkmann 	.func		= bpf_get_smp_processor_id,
1582d0e30c3SDaniel Borkmann 	.gpl_only	= false,
1592d0e30c3SDaniel Borkmann 	.ret_type	= RET_INTEGER,
1602d0e30c3SDaniel Borkmann };
1612d0e30c3SDaniel Borkmann 
BPF_CALL_0(bpf_get_numa_node_id)1622d0e30c3SDaniel Borkmann BPF_CALL_0(bpf_get_numa_node_id)
1632d0e30c3SDaniel Borkmann {
1642d0e30c3SDaniel Borkmann 	return numa_node_id();
1652d0e30c3SDaniel Borkmann }
1662d0e30c3SDaniel Borkmann 
1672d0e30c3SDaniel Borkmann const struct bpf_func_proto bpf_get_numa_node_id_proto = {
1682d0e30c3SDaniel Borkmann 	.func		= bpf_get_numa_node_id,
169f3694e00SDaniel Borkmann 	.gpl_only	= false,
17017ca8cbfSDaniel Borkmann 	.ret_type	= RET_INTEGER,
17117ca8cbfSDaniel Borkmann };
17217ca8cbfSDaniel Borkmann 
BPF_CALL_0(bpf_ktime_get_ns)17317ca8cbfSDaniel Borkmann BPF_CALL_0(bpf_ktime_get_ns)
17417ca8cbfSDaniel Borkmann {
17517ca8cbfSDaniel Borkmann 	/* NMI safe access to clock monotonic */
17617ca8cbfSDaniel Borkmann 	return ktime_get_mono_fast_ns();
177082b57e3SMaciej Żenczykowski }
17817ca8cbfSDaniel Borkmann 
17917ca8cbfSDaniel Borkmann const struct bpf_func_proto bpf_ktime_get_ns_proto = {
180ffeedafbSAlexei Starovoitov 	.func		= bpf_ktime_get_ns,
18171d19214SMaciej Żenczykowski 	.gpl_only	= false,
18271d19214SMaciej Żenczykowski 	.ret_type	= RET_INTEGER,
18371d19214SMaciej Żenczykowski };
18471d19214SMaciej Żenczykowski 
BPF_CALL_0(bpf_ktime_get_boot_ns)18571d19214SMaciej Żenczykowski BPF_CALL_0(bpf_ktime_get_boot_ns)
18671d19214SMaciej Żenczykowski {
18771d19214SMaciej Żenczykowski 	/* NMI safe access to clock boottime */
18871d19214SMaciej Żenczykowski 	return ktime_get_boot_fast_ns();
18971d19214SMaciej Żenczykowski }
19071d19214SMaciej Żenczykowski 
19171d19214SMaciej Żenczykowski const struct bpf_func_proto bpf_ktime_get_boot_ns_proto = {
19271d19214SMaciej Żenczykowski 	.func		= bpf_ktime_get_boot_ns,
193d0551261SDmitrii Banshchikov 	.gpl_only	= false,
194d0551261SDmitrii Banshchikov 	.ret_type	= RET_INTEGER,
195d0551261SDmitrii Banshchikov };
196d0551261SDmitrii Banshchikov 
BPF_CALL_0(bpf_ktime_get_coarse_ns)197d0551261SDmitrii Banshchikov BPF_CALL_0(bpf_ktime_get_coarse_ns)
198d0551261SDmitrii Banshchikov {
199d0551261SDmitrii Banshchikov 	return ktime_get_coarse_ns();
200d0551261SDmitrii Banshchikov }
201d0551261SDmitrii Banshchikov 
202d0551261SDmitrii Banshchikov const struct bpf_func_proto bpf_ktime_get_coarse_ns_proto = {
203d0551261SDmitrii Banshchikov 	.func		= bpf_ktime_get_coarse_ns,
204c8996c98SJesper Dangaard Brouer 	.gpl_only	= false,
205c8996c98SJesper Dangaard Brouer 	.ret_type	= RET_INTEGER,
206c8996c98SJesper Dangaard Brouer };
207c8996c98SJesper Dangaard Brouer 
BPF_CALL_0(bpf_ktime_get_tai_ns)208c8996c98SJesper Dangaard Brouer BPF_CALL_0(bpf_ktime_get_tai_ns)
209c8996c98SJesper Dangaard Brouer {
210c8996c98SJesper Dangaard Brouer 	/* NMI safe access to clock tai */
211c8996c98SJesper Dangaard Brouer 	return ktime_get_tai_fast_ns();
212c8996c98SJesper Dangaard Brouer }
213c8996c98SJesper Dangaard Brouer 
214c8996c98SJesper Dangaard Brouer const struct bpf_func_proto bpf_ktime_get_tai_ns_proto = {
215c8996c98SJesper Dangaard Brouer 	.func		= bpf_ktime_get_tai_ns,
216f3694e00SDaniel Borkmann 	.gpl_only	= false,
217ffeedafbSAlexei Starovoitov 	.ret_type	= RET_INTEGER,
218ffeedafbSAlexei Starovoitov };
219ffeedafbSAlexei Starovoitov 
BPF_CALL_0(bpf_get_current_pid_tgid)2206088b582SDaniel Borkmann BPF_CALL_0(bpf_get_current_pid_tgid)
221ffeedafbSAlexei Starovoitov {
222ffeedafbSAlexei Starovoitov 	struct task_struct *task = current;
223ffeedafbSAlexei Starovoitov 
224ffeedafbSAlexei Starovoitov 	if (unlikely(!task))
225ffeedafbSAlexei Starovoitov 		return -EINVAL;
226ffeedafbSAlexei Starovoitov 
227ffeedafbSAlexei Starovoitov 	return (u64) task->tgid << 32 | task->pid;
228ffeedafbSAlexei Starovoitov }
229ffeedafbSAlexei Starovoitov 
230ffeedafbSAlexei Starovoitov const struct bpf_func_proto bpf_get_current_pid_tgid_proto = {
231ffeedafbSAlexei Starovoitov 	.func		= bpf_get_current_pid_tgid,
232f3694e00SDaniel Borkmann 	.gpl_only	= false,
233ffeedafbSAlexei Starovoitov 	.ret_type	= RET_INTEGER,
234ffeedafbSAlexei Starovoitov };
235ffeedafbSAlexei Starovoitov 
BPF_CALL_0(bpf_get_current_uid_gid)236ffeedafbSAlexei Starovoitov BPF_CALL_0(bpf_get_current_uid_gid)
237ffeedafbSAlexei Starovoitov {
2386088b582SDaniel Borkmann 	struct task_struct *task = current;
239ffeedafbSAlexei Starovoitov 	kuid_t uid;
240ffeedafbSAlexei Starovoitov 	kgid_t gid;
241ffeedafbSAlexei Starovoitov 
242ffeedafbSAlexei Starovoitov 	if (unlikely(!task))
243ffeedafbSAlexei Starovoitov 		return -EINVAL;
244ffeedafbSAlexei Starovoitov 
245ffeedafbSAlexei Starovoitov 	current_uid_gid(&uid, &gid);
246ffeedafbSAlexei Starovoitov 	return (u64) from_kgid(&init_user_ns, gid) << 32 |
247ffeedafbSAlexei Starovoitov 		     from_kuid(&init_user_ns, uid);
248ffeedafbSAlexei Starovoitov }
249ffeedafbSAlexei Starovoitov 
250ffeedafbSAlexei Starovoitov const struct bpf_func_proto bpf_get_current_uid_gid_proto = {
251ffeedafbSAlexei Starovoitov 	.func		= bpf_get_current_uid_gid,
252f3694e00SDaniel Borkmann 	.gpl_only	= false,
253ffeedafbSAlexei Starovoitov 	.ret_type	= RET_INTEGER,
254ffeedafbSAlexei Starovoitov };
255ffeedafbSAlexei Starovoitov 
BPF_CALL_2(bpf_get_current_comm,char *,buf,u32,size)256074f528eSDaniel Borkmann BPF_CALL_2(bpf_get_current_comm, char *, buf, u32, size)
257074f528eSDaniel Borkmann {
258ffeedafbSAlexei Starovoitov 	struct task_struct *task = current;
25903b9c7faSYuntao Wang 
26003b9c7faSYuntao Wang 	if (unlikely(!task))
261ffeedafbSAlexei Starovoitov 		goto err_clear;
262074f528eSDaniel Borkmann 
263074f528eSDaniel Borkmann 	/* Verifier guarantees that size > 0 */
264074f528eSDaniel Borkmann 	strscpy_pad(buf, task->comm, size);
265ffeedafbSAlexei Starovoitov 	return 0;
266ffeedafbSAlexei Starovoitov err_clear:
267ffeedafbSAlexei Starovoitov 	memset(buf, 0, size);
268ffeedafbSAlexei Starovoitov 	return -EINVAL;
269ffeedafbSAlexei Starovoitov }
270ffeedafbSAlexei Starovoitov 
27139f19ebbSAlexei Starovoitov const struct bpf_func_proto bpf_get_current_comm_proto = {
27239f19ebbSAlexei Starovoitov 	.func		= bpf_get_current_comm,
273ffeedafbSAlexei Starovoitov 	.gpl_only	= false,
274bf6fa2c8SYonghong Song 	.ret_type	= RET_INTEGER,
275d83525caSAlexei Starovoitov 	.arg1_type	= ARG_PTR_TO_UNINIT_MEM,
276d83525caSAlexei Starovoitov 	.arg2_type	= ARG_CONST_SIZE,
277d83525caSAlexei Starovoitov };
278d83525caSAlexei Starovoitov 
279d83525caSAlexei Starovoitov #if defined(CONFIG_QUEUED_SPINLOCKS) || defined(CONFIG_BPF_ARCH_SPINLOCK)
280d83525caSAlexei Starovoitov 
__bpf_spin_lock(struct bpf_spin_lock * lock)281d83525caSAlexei Starovoitov static inline void __bpf_spin_lock(struct bpf_spin_lock *lock)
282d83525caSAlexei Starovoitov {
283d83525caSAlexei Starovoitov 	arch_spinlock_t *l = (void *)lock;
284d83525caSAlexei Starovoitov 	union {
285d83525caSAlexei Starovoitov 		__u32 val;
286d83525caSAlexei Starovoitov 		arch_spinlock_t lock;
287d83525caSAlexei Starovoitov 	} u = { .lock = __ARCH_SPIN_LOCK_UNLOCKED };
288d83525caSAlexei Starovoitov 
289d83525caSAlexei Starovoitov 	compiletime_assert(u.val == 0, "__ARCH_SPIN_LOCK_UNLOCKED not 0");
290d83525caSAlexei Starovoitov 	BUILD_BUG_ON(sizeof(*l) != sizeof(__u32));
291d83525caSAlexei Starovoitov 	BUILD_BUG_ON(sizeof(*lock) != sizeof(__u32));
292d83525caSAlexei Starovoitov 	preempt_disable();
293d83525caSAlexei Starovoitov 	arch_spin_lock(l);
294d83525caSAlexei Starovoitov }
295d83525caSAlexei Starovoitov 
__bpf_spin_unlock(struct bpf_spin_lock * lock)296d83525caSAlexei Starovoitov static inline void __bpf_spin_unlock(struct bpf_spin_lock *lock)
297d83525caSAlexei Starovoitov {
298d83525caSAlexei Starovoitov 	arch_spinlock_t *l = (void *)lock;
299d83525caSAlexei Starovoitov 
300d83525caSAlexei Starovoitov 	arch_spin_unlock(l);
301d83525caSAlexei Starovoitov 	preempt_enable();
302d83525caSAlexei Starovoitov }
303d83525caSAlexei Starovoitov 
304d83525caSAlexei Starovoitov #else
305d83525caSAlexei Starovoitov 
__bpf_spin_lock(struct bpf_spin_lock * lock)306d83525caSAlexei Starovoitov static inline void __bpf_spin_lock(struct bpf_spin_lock *lock)
307d83525caSAlexei Starovoitov {
308d83525caSAlexei Starovoitov 	atomic_t *l = (void *)lock;
309d83525caSAlexei Starovoitov 
310d83525caSAlexei Starovoitov 	BUILD_BUG_ON(sizeof(*l) != sizeof(*lock));
311d83525caSAlexei Starovoitov 	do {
312d83525caSAlexei Starovoitov 		atomic_cond_read_relaxed(l, !VAL);
313d83525caSAlexei Starovoitov 	} while (atomic_xchg(l, 1));
314d83525caSAlexei Starovoitov }
315d83525caSAlexei Starovoitov 
__bpf_spin_unlock(struct bpf_spin_lock * lock)316d83525caSAlexei Starovoitov static inline void __bpf_spin_unlock(struct bpf_spin_lock *lock)
317d83525caSAlexei Starovoitov {
318d83525caSAlexei Starovoitov 	atomic_t *l = (void *)lock;
319d83525caSAlexei Starovoitov 
320d83525caSAlexei Starovoitov 	atomic_set_release(l, 0);
321c1b3fed3SAlexei Starovoitov }
322d83525caSAlexei Starovoitov 
323d83525caSAlexei Starovoitov #endif
324d83525caSAlexei Starovoitov 
325d83525caSAlexei Starovoitov static DEFINE_PER_CPU(unsigned long, irqsave_flags);
326d83525caSAlexei Starovoitov 
__bpf_spin_lock_irqsave(struct bpf_spin_lock * lock)327d83525caSAlexei Starovoitov static inline void __bpf_spin_lock_irqsave(struct bpf_spin_lock *lock)
328c1b3fed3SAlexei Starovoitov {
329c1b3fed3SAlexei Starovoitov 	unsigned long flags;
330c1b3fed3SAlexei Starovoitov 
331c1b3fed3SAlexei Starovoitov 	local_irq_save(flags);
332c1b3fed3SAlexei Starovoitov 	__bpf_spin_lock(lock);
333d83525caSAlexei Starovoitov 	__this_cpu_write(irqsave_flags, flags);
334d83525caSAlexei Starovoitov }
335d83525caSAlexei Starovoitov 
NOTRACE_BPF_CALL_1(bpf_spin_lock,struct bpf_spin_lock *,lock)336d83525caSAlexei Starovoitov NOTRACE_BPF_CALL_1(bpf_spin_lock, struct bpf_spin_lock *, lock)
337d83525caSAlexei Starovoitov {
338d83525caSAlexei Starovoitov 	__bpf_spin_lock_irqsave(lock);
339d83525caSAlexei Starovoitov 	return 0;
340d83525caSAlexei Starovoitov }
3414e814da0SKumar Kartikeya Dwivedi 
342d83525caSAlexei Starovoitov const struct bpf_func_proto bpf_spin_lock_proto = {
343d83525caSAlexei Starovoitov 	.func		= bpf_spin_lock,
344c1b3fed3SAlexei Starovoitov 	.gpl_only	= false,
345d83525caSAlexei Starovoitov 	.ret_type	= RET_VOID,
346d83525caSAlexei Starovoitov 	.arg1_type	= ARG_PTR_TO_SPIN_LOCK,
347d83525caSAlexei Starovoitov 	.arg1_btf_id    = BPF_PTR_POISON,
348d83525caSAlexei Starovoitov };
349d83525caSAlexei Starovoitov 
__bpf_spin_unlock_irqrestore(struct bpf_spin_lock * lock)350d83525caSAlexei Starovoitov static inline void __bpf_spin_unlock_irqrestore(struct bpf_spin_lock *lock)
351c1b3fed3SAlexei Starovoitov {
352c1b3fed3SAlexei Starovoitov 	unsigned long flags;
353c1b3fed3SAlexei Starovoitov 
354c1b3fed3SAlexei Starovoitov 	flags = __this_cpu_read(irqsave_flags);
355c1b3fed3SAlexei Starovoitov 	__bpf_spin_unlock(lock);
356d83525caSAlexei Starovoitov 	local_irq_restore(flags);
357d83525caSAlexei Starovoitov }
358d83525caSAlexei Starovoitov 
NOTRACE_BPF_CALL_1(bpf_spin_unlock,struct bpf_spin_lock *,lock)359d83525caSAlexei Starovoitov NOTRACE_BPF_CALL_1(bpf_spin_unlock, struct bpf_spin_lock *, lock)
360d83525caSAlexei Starovoitov {
361d83525caSAlexei Starovoitov 	__bpf_spin_unlock_irqrestore(lock);
362d83525caSAlexei Starovoitov 	return 0;
363d83525caSAlexei Starovoitov }
3644e814da0SKumar Kartikeya Dwivedi 
365d83525caSAlexei Starovoitov const struct bpf_func_proto bpf_spin_unlock_proto = {
366d83525caSAlexei Starovoitov 	.func		= bpf_spin_unlock,
36796049f3aSAlexei Starovoitov 	.gpl_only	= false,
36896049f3aSAlexei Starovoitov 	.ret_type	= RET_VOID,
36996049f3aSAlexei Starovoitov 	.arg1_type	= ARG_PTR_TO_SPIN_LOCK,
37096049f3aSAlexei Starovoitov 	.arg1_btf_id    = BPF_PTR_POISON,
37196049f3aSAlexei Starovoitov };
37296049f3aSAlexei Starovoitov 
copy_map_value_locked(struct bpf_map * map,void * dst,void * src,bool lock_src)373db559117SKumar Kartikeya Dwivedi void copy_map_value_locked(struct bpf_map *map, void *dst, void *src,
37496049f3aSAlexei Starovoitov 			   bool lock_src)
375db559117SKumar Kartikeya Dwivedi {
37696049f3aSAlexei Starovoitov 	struct bpf_spin_lock *lock;
377c1b3fed3SAlexei Starovoitov 
37896049f3aSAlexei Starovoitov 	if (lock_src)
379c1b3fed3SAlexei Starovoitov 		lock = src + map->record->spin_lock_off;
38096049f3aSAlexei Starovoitov 	else
38196049f3aSAlexei Starovoitov 		lock = dst + map->record->spin_lock_off;
38296049f3aSAlexei Starovoitov 	preempt_disable();
3835576b991SMartin KaFai Lau 	__bpf_spin_lock_irqsave(lock);
3845576b991SMartin KaFai Lau 	copy_map_value(map, dst, src);
3855576b991SMartin KaFai Lau 	__bpf_spin_unlock_irqrestore(lock);
3865576b991SMartin KaFai Lau 	preempt_enable();
3875576b991SMartin KaFai Lau }
3885576b991SMartin KaFai Lau 
BPF_CALL_0(bpf_jiffies64)3895576b991SMartin KaFai Lau BPF_CALL_0(bpf_jiffies64)
3905576b991SMartin KaFai Lau {
3915576b991SMartin KaFai Lau 	return get_jiffies_64();
3925576b991SMartin KaFai Lau }
3935576b991SMartin KaFai Lau 
394bf6fa2c8SYonghong Song const struct bpf_func_proto bpf_jiffies64_proto = {
395bf6fa2c8SYonghong Song 	.func		= bpf_jiffies64,
396bf6fa2c8SYonghong Song 	.gpl_only	= false,
3972d3a1e36SYonghong Song 	.ret_type	= RET_INTEGER,
3982d3a1e36SYonghong Song };
399bf6fa2c8SYonghong Song 
4002d3a1e36SYonghong Song #ifdef CONFIG_CGROUPS
BPF_CALL_0(bpf_get_current_cgroup_id)4012d3a1e36SYonghong Song BPF_CALL_0(bpf_get_current_cgroup_id)
4022d3a1e36SYonghong Song {
4032d3a1e36SYonghong Song 	struct cgroup *cgrp;
4042d3a1e36SYonghong Song 	u64 cgrp_id;
4052d3a1e36SYonghong Song 
406bf6fa2c8SYonghong Song 	rcu_read_lock();
407bf6fa2c8SYonghong Song 	cgrp = task_dfl_cgroup(current);
408bf6fa2c8SYonghong Song 	cgrp_id = cgroup_id(cgrp);
409bf6fa2c8SYonghong Song 	rcu_read_unlock();
410bf6fa2c8SYonghong Song 
411bf6fa2c8SYonghong Song 	return cgrp_id;
412bf6fa2c8SYonghong Song }
413cd339431SRoman Gushchin 
4140f09abd1SDaniel Borkmann const struct bpf_func_proto bpf_get_current_cgroup_id_proto = {
4150f09abd1SDaniel Borkmann 	.func		= bpf_get_current_cgroup_id,
4162d3a1e36SYonghong Song 	.gpl_only	= false,
4170f09abd1SDaniel Borkmann 	.ret_type	= RET_INTEGER,
4182d3a1e36SYonghong Song };
4190f09abd1SDaniel Borkmann 
BPF_CALL_1(bpf_get_current_ancestor_cgroup_id,int,ancestor_level)4202d3a1e36SYonghong Song BPF_CALL_1(bpf_get_current_ancestor_cgroup_id, int, ancestor_level)
4212d3a1e36SYonghong Song {
4220f09abd1SDaniel Borkmann 	struct cgroup *cgrp;
4232d3a1e36SYonghong Song 	struct cgroup *ancestor;
4242d3a1e36SYonghong Song 	u64 cgrp_id;
4252d3a1e36SYonghong Song 
4262d3a1e36SYonghong Song 	rcu_read_lock();
4270f09abd1SDaniel Borkmann 	cgrp = task_dfl_cgroup(current);
4280f09abd1SDaniel Borkmann 	ancestor = cgroup_ancestor(cgrp, ancestor_level);
4290f09abd1SDaniel Borkmann 	cgrp_id = ancestor ? cgroup_id(ancestor) : 0;
4300f09abd1SDaniel Borkmann 	rcu_read_unlock();
4310f09abd1SDaniel Borkmann 
4320f09abd1SDaniel Borkmann 	return cgrp_id;
4330f09abd1SDaniel Borkmann }
4340f09abd1SDaniel Borkmann 
4358a67f2deSStanislav Fomichev const struct bpf_func_proto bpf_get_current_ancestor_cgroup_id_proto = {
4360f09abd1SDaniel Borkmann 	.func		= bpf_get_current_ancestor_cgroup_id,
437d7a4cb9bSAndrey Ignatov 	.gpl_only	= false,
438d7a4cb9bSAndrey Ignatov 	.ret_type	= RET_INTEGER,
439d7a4cb9bSAndrey Ignatov 	.arg1_type	= ARG_ANYTHING,
440d7a4cb9bSAndrey Ignatov };
441d7a4cb9bSAndrey Ignatov #endif /* CONFIG_CGROUPS */
442d7a4cb9bSAndrey Ignatov 
443d7a4cb9bSAndrey Ignatov #define BPF_STRTOX_BASE_MASK 0x1F
444d7a4cb9bSAndrey Ignatov 
__bpf_strtoull(const char * buf,size_t buf_len,u64 flags,unsigned long long * res,bool * is_negative)445d7a4cb9bSAndrey Ignatov static int __bpf_strtoull(const char *buf, size_t buf_len, u64 flags,
446d7a4cb9bSAndrey Ignatov 			  unsigned long long *res, bool *is_negative)
447d7a4cb9bSAndrey Ignatov {
448d7a4cb9bSAndrey Ignatov 	unsigned int base = flags & BPF_STRTOX_BASE_MASK;
449d7a4cb9bSAndrey Ignatov 	const char *cur_buf = buf;
450d7a4cb9bSAndrey Ignatov 	size_t cur_len = buf_len;
451d7a4cb9bSAndrey Ignatov 	unsigned int consumed;
452d7a4cb9bSAndrey Ignatov 	size_t val_len;
453d7a4cb9bSAndrey Ignatov 	char str[64];
454d7a4cb9bSAndrey Ignatov 
455d7a4cb9bSAndrey Ignatov 	if (!buf || !buf_len || !res || !is_negative)
456d7a4cb9bSAndrey Ignatov 		return -EINVAL;
457d7a4cb9bSAndrey Ignatov 
458d7a4cb9bSAndrey Ignatov 	if (base != 0 && base != 8 && base != 10 && base != 16)
459d7a4cb9bSAndrey Ignatov 		return -EINVAL;
460d7a4cb9bSAndrey Ignatov 
461d7a4cb9bSAndrey Ignatov 	if (flags & ~BPF_STRTOX_BASE_MASK)
462d7a4cb9bSAndrey Ignatov 		return -EINVAL;
463d7a4cb9bSAndrey Ignatov 
464d7a4cb9bSAndrey Ignatov 	while (cur_buf < buf + buf_len && isspace(*cur_buf))
465d7a4cb9bSAndrey Ignatov 		++cur_buf;
466d7a4cb9bSAndrey Ignatov 
467d7a4cb9bSAndrey Ignatov 	*is_negative = (cur_buf < buf + buf_len && *cur_buf == '-');
468d7a4cb9bSAndrey Ignatov 	if (*is_negative)
469d7a4cb9bSAndrey Ignatov 		++cur_buf;
470d7a4cb9bSAndrey Ignatov 
471d7a4cb9bSAndrey Ignatov 	consumed = cur_buf - buf;
472d7a4cb9bSAndrey Ignatov 	cur_len -= consumed;
473d7a4cb9bSAndrey Ignatov 	if (!cur_len)
474d7a4cb9bSAndrey Ignatov 		return -EINVAL;
475d7a4cb9bSAndrey Ignatov 
476d7a4cb9bSAndrey Ignatov 	cur_len = min(cur_len, sizeof(str) - 1);
477d7a4cb9bSAndrey Ignatov 	memcpy(str, cur_buf, cur_len);
478d7a4cb9bSAndrey Ignatov 	str[cur_len] = '\0';
479d7a4cb9bSAndrey Ignatov 	cur_buf = str;
480d7a4cb9bSAndrey Ignatov 
481d7a4cb9bSAndrey Ignatov 	cur_buf = _parse_integer_fixup_radix(cur_buf, &base);
482d7a4cb9bSAndrey Ignatov 	val_len = _parse_integer(cur_buf, base, res);
483d7a4cb9bSAndrey Ignatov 
484d7a4cb9bSAndrey Ignatov 	if (val_len & KSTRTOX_OVERFLOW)
485d7a4cb9bSAndrey Ignatov 		return -ERANGE;
486d7a4cb9bSAndrey Ignatov 
487d7a4cb9bSAndrey Ignatov 	if (val_len == 0)
488d7a4cb9bSAndrey Ignatov 		return -EINVAL;
489d7a4cb9bSAndrey Ignatov 
490d7a4cb9bSAndrey Ignatov 	cur_buf += val_len;
491d7a4cb9bSAndrey Ignatov 	consumed += cur_buf - str;
492d7a4cb9bSAndrey Ignatov 
493d7a4cb9bSAndrey Ignatov 	return consumed;
494d7a4cb9bSAndrey Ignatov }
495d7a4cb9bSAndrey Ignatov 
__bpf_strtoll(const char * buf,size_t buf_len,u64 flags,long long * res)496d7a4cb9bSAndrey Ignatov static int __bpf_strtoll(const char *buf, size_t buf_len, u64 flags,
497d7a4cb9bSAndrey Ignatov 			 long long *res)
498d7a4cb9bSAndrey Ignatov {
499d7a4cb9bSAndrey Ignatov 	unsigned long long _res;
500d7a4cb9bSAndrey Ignatov 	bool is_negative;
501d7a4cb9bSAndrey Ignatov 	int err;
502d7a4cb9bSAndrey Ignatov 
503d7a4cb9bSAndrey Ignatov 	err = __bpf_strtoull(buf, buf_len, flags, &_res, &is_negative);
504d7a4cb9bSAndrey Ignatov 	if (err < 0)
505d7a4cb9bSAndrey Ignatov 		return err;
506d7a4cb9bSAndrey Ignatov 	if (is_negative) {
507d7a4cb9bSAndrey Ignatov 		if ((long long)-_res > 0)
508d7a4cb9bSAndrey Ignatov 			return -ERANGE;
509d7a4cb9bSAndrey Ignatov 		*res = -_res;
510d7a4cb9bSAndrey Ignatov 	} else {
511d7a4cb9bSAndrey Ignatov 		if ((long long)_res < 0)
512d7a4cb9bSAndrey Ignatov 			return -ERANGE;
513d7a4cb9bSAndrey Ignatov 		*res = _res;
514d7a4cb9bSAndrey Ignatov 	}
515d7a4cb9bSAndrey Ignatov 	return err;
516d7a4cb9bSAndrey Ignatov }
517d7a4cb9bSAndrey Ignatov 
BPF_CALL_4(bpf_strtol,const char *,buf,size_t,buf_len,u64,flags,s64 *,res)518d7a4cb9bSAndrey Ignatov BPF_CALL_4(bpf_strtol, const char *, buf, size_t, buf_len, u64, flags,
519d7a4cb9bSAndrey Ignatov 	   s64 *, res)
520d7a4cb9bSAndrey Ignatov {
521d7a4cb9bSAndrey Ignatov 	long long _res;
522d7a4cb9bSAndrey Ignatov 	int err;
523d7a4cb9bSAndrey Ignatov 
524d7a4cb9bSAndrey Ignatov 	*res = 0;
525d7a4cb9bSAndrey Ignatov 	err = __bpf_strtoll(buf, buf_len, flags, &_res);
526d7a4cb9bSAndrey Ignatov 	if (err < 0)
527d7a4cb9bSAndrey Ignatov 		return err;
528d7a4cb9bSAndrey Ignatov 	if (_res != (long)_res)
529d7a4cb9bSAndrey Ignatov 		return -ERANGE;
530d7a4cb9bSAndrey Ignatov 	*res = _res;
531216e3cd2SHao Luo 	return err;
532d7a4cb9bSAndrey Ignatov }
533d7a4cb9bSAndrey Ignatov 
534d7a4cb9bSAndrey Ignatov const struct bpf_func_proto bpf_strtol_proto = {
535d7a4cb9bSAndrey Ignatov 	.func		= bpf_strtol,
536d7a4cb9bSAndrey Ignatov 	.gpl_only	= false,
537d7a4cb9bSAndrey Ignatov 	.ret_type	= RET_INTEGER,
538d7a4cb9bSAndrey Ignatov 	.arg1_type	= ARG_PTR_TO_MEM | MEM_RDONLY,
539d7a4cb9bSAndrey Ignatov 	.arg2_type	= ARG_CONST_SIZE,
540d7a4cb9bSAndrey Ignatov 	.arg3_type	= ARG_ANYTHING,
541d7a4cb9bSAndrey Ignatov 	.arg4_type	= ARG_PTR_TO_FIXED_SIZE_MEM | MEM_UNINIT | MEM_WRITE | MEM_ALIGNED,
542d7a4cb9bSAndrey Ignatov 	.arg4_size	= sizeof(s64),
543d7a4cb9bSAndrey Ignatov };
544d7a4cb9bSAndrey Ignatov 
BPF_CALL_4(bpf_strtoul,const char *,buf,size_t,buf_len,u64,flags,u64 *,res)545d7a4cb9bSAndrey Ignatov BPF_CALL_4(bpf_strtoul, const char *, buf, size_t, buf_len, u64, flags,
546d7a4cb9bSAndrey Ignatov 	   u64 *, res)
547d7a4cb9bSAndrey Ignatov {
548d7a4cb9bSAndrey Ignatov 	unsigned long long _res;
549d7a4cb9bSAndrey Ignatov 	bool is_negative;
550d7a4cb9bSAndrey Ignatov 	int err;
551d7a4cb9bSAndrey Ignatov 
552d7a4cb9bSAndrey Ignatov 	*res = 0;
553d7a4cb9bSAndrey Ignatov 	err = __bpf_strtoull(buf, buf_len, flags, &_res, &is_negative);
554d7a4cb9bSAndrey Ignatov 	if (err < 0)
555d7a4cb9bSAndrey Ignatov 		return err;
556d7a4cb9bSAndrey Ignatov 	if (is_negative)
557d7a4cb9bSAndrey Ignatov 		return -EINVAL;
558d7a4cb9bSAndrey Ignatov 	if (_res != (unsigned long)_res)
559216e3cd2SHao Luo 		return -ERANGE;
560d7a4cb9bSAndrey Ignatov 	*res = _res;
561d7a4cb9bSAndrey Ignatov 	return err;
562d7a4cb9bSAndrey Ignatov }
563d7a4cb9bSAndrey Ignatov 
564b4490c5cSCarlos Neira const struct bpf_func_proto bpf_strtoul_proto = {
565c5fb1993SHou Tao 	.func		= bpf_strtoul,
566c5fb1993SHou Tao 	.gpl_only	= false,
567c5fb1993SHou Tao 	.ret_type	= RET_INTEGER,
568c5fb1993SHou Tao 	.arg1_type	= ARG_PTR_TO_MEM | MEM_RDONLY,
569c5fb1993SHou Tao 	.arg2_type	= ARG_CONST_SIZE,
570dc368e1cSJoanne Koong 	.arg3_type	= ARG_ANYTHING,
571c5fb1993SHou Tao 	.arg4_type	= ARG_PTR_TO_FIXED_SIZE_MEM | MEM_UNINIT | MEM_WRITE | MEM_ALIGNED,
572c5fb1993SHou Tao 	.arg4_size	= sizeof(u64),
573c5fb1993SHou Tao };
574c5fb1993SHou Tao 
BPF_CALL_3(bpf_strncmp,const char *,s1,u32,s1_sz,const char *,s2)575c5fb1993SHou Tao BPF_CALL_3(bpf_strncmp, const char *, s1, u32, s1_sz, const char *, s2)
576c5fb1993SHou Tao {
577c5fb1993SHou Tao 	return strncmp(s1, s2, s1_sz);
578c5fb1993SHou Tao }
579b4490c5cSCarlos Neira 
580b4490c5cSCarlos Neira static const struct bpf_func_proto bpf_strncmp_proto = {
581b4490c5cSCarlos Neira 	.func		= bpf_strncmp,
582b4490c5cSCarlos Neira 	.gpl_only	= false,
583b4490c5cSCarlos Neira 	.ret_type	= RET_INTEGER,
584b4490c5cSCarlos Neira 	.arg1_type	= ARG_PTR_TO_MEM | MEM_RDONLY,
585b4490c5cSCarlos Neira 	.arg2_type	= ARG_CONST_SIZE,
586b4490c5cSCarlos Neira 	.arg3_type	= ARG_PTR_TO_CONST_STR,
587b4490c5cSCarlos Neira };
588b4490c5cSCarlos Neira 
BPF_CALL_4(bpf_get_ns_current_pid_tgid,u64,dev,u64,ino,struct bpf_pidns_info *,nsdata,u32,size)589b4490c5cSCarlos Neira BPF_CALL_4(bpf_get_ns_current_pid_tgid, u64, dev, u64, ino,
590b4490c5cSCarlos Neira 	   struct bpf_pidns_info *, nsdata, u32, size)
591b4490c5cSCarlos Neira {
592b4490c5cSCarlos Neira 	struct task_struct *task = current;
593b4490c5cSCarlos Neira 	struct pid_namespace *pidns;
594b4490c5cSCarlos Neira 	int err = -EINVAL;
595b4490c5cSCarlos Neira 
596b4490c5cSCarlos Neira 	if (unlikely(size != sizeof(struct bpf_pidns_info)))
597b4490c5cSCarlos Neira 		goto clear;
598b4490c5cSCarlos Neira 
599b4490c5cSCarlos Neira 	if (unlikely((u64)(dev_t)dev != dev))
600b4490c5cSCarlos Neira 		goto clear;
601b4490c5cSCarlos Neira 
602b4490c5cSCarlos Neira 	if (unlikely(!task))
603b4490c5cSCarlos Neira 		goto clear;
604b4490c5cSCarlos Neira 
605b4490c5cSCarlos Neira 	pidns = task_active_pid_ns(task);
606b4490c5cSCarlos Neira 	if (unlikely(!pidns)) {
607b4490c5cSCarlos Neira 		err = -ENOENT;
608b4490c5cSCarlos Neira 		goto clear;
609b4490c5cSCarlos Neira 	}
610b4490c5cSCarlos Neira 
611b4490c5cSCarlos Neira 	if (!ns_match(&pidns->ns, (dev_t)dev, ino))
612b4490c5cSCarlos Neira 		goto clear;
613b4490c5cSCarlos Neira 
614b4490c5cSCarlos Neira 	nsdata->pid = task_pid_nr_ns(task, pidns);
615b4490c5cSCarlos Neira 	nsdata->tgid = task_tgid_nr_ns(task, pidns);
616b4490c5cSCarlos Neira 	return 0;
617b4490c5cSCarlos Neira clear:
618b4490c5cSCarlos Neira 	memset((void *)nsdata, 0, (size_t) size);
619b4490c5cSCarlos Neira 	return err;
620b4490c5cSCarlos Neira }
6216890896bSStanislav Fomichev 
6226890896bSStanislav Fomichev const struct bpf_func_proto bpf_get_ns_current_pid_tgid_proto = {
6236890896bSStanislav Fomichev 	.func		= bpf_get_ns_current_pid_tgid,
6246890896bSStanislav Fomichev 	.gpl_only	= false,
6256890896bSStanislav Fomichev 	.ret_type	= RET_INTEGER,
6266890896bSStanislav Fomichev 	.arg1_type	= ARG_ANYTHING,
6276890896bSStanislav Fomichev 	.arg2_type	= ARG_ANYTHING,
6286890896bSStanislav Fomichev 	.arg3_type      = ARG_PTR_TO_UNINIT_MEM,
6296890896bSStanislav Fomichev 	.arg4_type      = ARG_CONST_SIZE,
6306890896bSStanislav Fomichev };
6316890896bSStanislav Fomichev 
6326890896bSStanislav Fomichev static const struct bpf_func_proto bpf_get_raw_smp_processor_id_proto = {
6336890896bSStanislav Fomichev 	.func		= bpf_get_raw_cpu_id,
6346890896bSStanislav Fomichev 	.gpl_only	= false,
6356890896bSStanislav Fomichev 	.ret_type	= RET_INTEGER,
6366890896bSStanislav Fomichev };
6376890896bSStanislav Fomichev 
BPF_CALL_5(bpf_event_output_data,void *,ctx,struct bpf_map *,map,u64,flags,void *,data,u64,size)6386890896bSStanislav Fomichev BPF_CALL_5(bpf_event_output_data, void *, ctx, struct bpf_map *, map,
6396890896bSStanislav Fomichev 	   u64, flags, void *, data, u64, size)
6406890896bSStanislav Fomichev {
6416890896bSStanislav Fomichev 	if (unlikely(flags & ~(BPF_F_INDEX_MASK)))
6426890896bSStanislav Fomichev 		return -EINVAL;
6436890896bSStanislav Fomichev 
644216e3cd2SHao Luo 	return bpf_event_output(map, flags, data, size, NULL, 0, NULL);
6456890896bSStanislav Fomichev }
6466890896bSStanislav Fomichev 
6476890896bSStanislav Fomichev const struct bpf_func_proto bpf_event_output_data_proto =  {
64807be4c4aSAlexei Starovoitov 	.func		= bpf_event_output_data,
64907be4c4aSAlexei Starovoitov 	.gpl_only       = true,
65007be4c4aSAlexei Starovoitov 	.ret_type       = RET_INTEGER,
65107be4c4aSAlexei Starovoitov 	.arg1_type      = ARG_PTR_TO_CTX,
65207be4c4aSAlexei Starovoitov 	.arg2_type      = ARG_CONST_MAP_PTR,
65307be4c4aSAlexei Starovoitov 	.arg3_type      = ARG_ANYTHING,
65407be4c4aSAlexei Starovoitov 	.arg4_type      = ARG_PTR_TO_MEM | MEM_RDONLY,
65507be4c4aSAlexei Starovoitov 	.arg5_type      = ARG_CONST_SIZE_OR_ZERO,
65607be4c4aSAlexei Starovoitov };
65707be4c4aSAlexei Starovoitov 
BPF_CALL_3(bpf_copy_from_user,void *,dst,u32,size,const void __user *,user_ptr)65807be4c4aSAlexei Starovoitov BPF_CALL_3(bpf_copy_from_user, void *, dst, u32, size,
65907be4c4aSAlexei Starovoitov 	   const void __user *, user_ptr)
66007be4c4aSAlexei Starovoitov {
66107be4c4aSAlexei Starovoitov 	int ret = copy_from_user(dst, user_ptr, size);
66207be4c4aSAlexei Starovoitov 
66307be4c4aSAlexei Starovoitov 	if (unlikely(ret)) {
66401685c5bSYonghong Song 		memset(dst, 0, size);
66507be4c4aSAlexei Starovoitov 		ret = -EFAULT;
66607be4c4aSAlexei Starovoitov 	}
66707be4c4aSAlexei Starovoitov 
66807be4c4aSAlexei Starovoitov 	return ret;
66907be4c4aSAlexei Starovoitov }
67007be4c4aSAlexei Starovoitov 
671376040e4SKenny Yu const struct bpf_func_proto bpf_copy_from_user_proto = {
672376040e4SKenny Yu 	.func		= bpf_copy_from_user,
673376040e4SKenny Yu 	.gpl_only	= false,
674376040e4SKenny Yu 	.might_sleep	= true,
675376040e4SKenny Yu 	.ret_type	= RET_INTEGER,
676376040e4SKenny Yu 	.arg1_type	= ARG_PTR_TO_UNINIT_MEM,
677376040e4SKenny Yu 	.arg2_type	= ARG_CONST_SIZE_OR_ZERO,
678376040e4SKenny Yu 	.arg3_type	= ARG_ANYTHING,
679376040e4SKenny Yu };
680376040e4SKenny Yu 
BPF_CALL_5(bpf_copy_from_user_task,void *,dst,u32,size,const void __user *,user_ptr,struct task_struct *,tsk,u64,flags)681376040e4SKenny Yu BPF_CALL_5(bpf_copy_from_user_task, void *, dst, u32, size,
682376040e4SKenny Yu 	   const void __user *, user_ptr, struct task_struct *, tsk, u64, flags)
683376040e4SKenny Yu {
684376040e4SKenny Yu 	int ret;
685376040e4SKenny Yu 
686376040e4SKenny Yu 	/* flags is not used yet */
687376040e4SKenny Yu 	if (unlikely(flags))
688376040e4SKenny Yu 		return -EINVAL;
689376040e4SKenny Yu 
690376040e4SKenny Yu 	if (unlikely(!size))
691376040e4SKenny Yu 		return 0;
692376040e4SKenny Yu 
693376040e4SKenny Yu 	ret = access_process_vm(tsk, (unsigned long)user_ptr, dst, size, 0);
6940407a65fSKenta Tada 	if (ret == size)
69501685c5bSYonghong Song 		return 0;
696376040e4SKenny Yu 
697376040e4SKenny Yu 	memset(dst, 0, size);
698376040e4SKenny Yu 	/* Return -EFAULT for partial read */
699376040e4SKenny Yu 	return ret < 0 ? ret : -EFAULT;
700376040e4SKenny Yu }
701376040e4SKenny Yu 
702376040e4SKenny Yu const struct bpf_func_proto bpf_copy_from_user_task_proto = {
703376040e4SKenny Yu 	.func		= bpf_copy_from_user_task,
704376040e4SKenny Yu 	.gpl_only	= true,
705eaa6bcb7SHao Luo 	.might_sleep	= true,
706eaa6bcb7SHao Luo 	.ret_type	= RET_INTEGER,
707eaa6bcb7SHao Luo 	.arg1_type	= ARG_PTR_TO_UNINIT_MEM,
708eaa6bcb7SHao Luo 	.arg2_type	= ARG_CONST_SIZE_OR_ZERO,
709eaa6bcb7SHao Luo 	.arg3_type	= ARG_ANYTHING,
710eaa6bcb7SHao Luo 	.arg4_type	= ARG_PTR_TO_BTF_ID,
711eaa6bcb7SHao Luo 	.arg4_btf_id	= &btf_tracing_ids[BTF_TRACING_TYPE_TASK],
712eaa6bcb7SHao Luo 	.arg5_type	= ARG_ANYTHING
713eaa6bcb7SHao Luo };
714eaa6bcb7SHao Luo 
BPF_CALL_2(bpf_per_cpu_ptr,const void *,ptr,u32,cpu)715eaa6bcb7SHao Luo BPF_CALL_2(bpf_per_cpu_ptr, const void *, ptr, u32, cpu)
71634d3a78cSHao Luo {
717eaa6bcb7SHao Luo 	if (cpu >= nr_cpu_ids)
718eaa6bcb7SHao Luo 		return (unsigned long)NULL;
719eaa6bcb7SHao Luo 
720eaa6bcb7SHao Luo 	return (unsigned long)per_cpu_ptr((const void __percpu *)ptr, cpu);
72163d9b80dSHao Luo }
72263d9b80dSHao Luo 
72363d9b80dSHao Luo const struct bpf_func_proto bpf_per_cpu_ptr_proto = {
72463d9b80dSHao Luo 	.func		= bpf_per_cpu_ptr,
72563d9b80dSHao Luo 	.gpl_only	= false,
72663d9b80dSHao Luo 	.ret_type	= RET_PTR_TO_MEM_OR_BTF_ID | PTR_MAYBE_NULL | MEM_RDONLY,
72763d9b80dSHao Luo 	.arg1_type	= ARG_PTR_TO_PERCPU_BTF_ID,
72863d9b80dSHao Luo 	.arg2_type	= ARG_ANYTHING,
72934d3a78cSHao Luo };
73063d9b80dSHao Luo 
BPF_CALL_1(bpf_this_cpu_ptr,const void *,percpu_ptr)73163d9b80dSHao Luo BPF_CALL_1(bpf_this_cpu_ptr, const void *, percpu_ptr)
73263d9b80dSHao Luo {
733d9c9e4dbSFlorent Revest 	return (unsigned long)this_cpu_ptr((const void __percpu *)percpu_ptr);
734d9c9e4dbSFlorent Revest }
735d9c9e4dbSFlorent Revest 
736d9c9e4dbSFlorent Revest const struct bpf_func_proto bpf_this_cpu_ptr_proto = {
737d9c9e4dbSFlorent Revest 	.func		= bpf_this_cpu_ptr,
738d9c9e4dbSFlorent Revest 	.gpl_only	= false,
739d9c9e4dbSFlorent Revest 	.ret_type	= RET_PTR_TO_MEM_OR_BTF_ID | MEM_RDONLY,
740d9c9e4dbSFlorent Revest 	.arg1_type	= ARG_PTR_TO_PERCPU_BTF_ID,
741d9c9e4dbSFlorent Revest };
742d9c9e4dbSFlorent Revest 
bpf_trace_copy_string(char * buf,void * unsafe_ptr,char fmt_ptype,size_t bufsz)743d9c9e4dbSFlorent Revest static int bpf_trace_copy_string(char *buf, void *unsafe_ptr, char fmt_ptype,
744d9c9e4dbSFlorent Revest 		size_t bufsz)
745d9c9e4dbSFlorent Revest {
746d9c9e4dbSFlorent Revest 	void __user *user_ptr = (__force void __user *)unsafe_ptr;
747d9c9e4dbSFlorent Revest 
748d9c9e4dbSFlorent Revest 	buf[0] = 0;
749d9c9e4dbSFlorent Revest 
750d9c9e4dbSFlorent Revest 	switch (fmt_ptype) {
751d9c9e4dbSFlorent Revest 	case 's':
752d9c9e4dbSFlorent Revest #ifdef CONFIG_ARCH_HAS_NON_OVERLAPPING_ADDRESS_SPACE
753d9c9e4dbSFlorent Revest 		if ((unsigned long)unsafe_ptr < TASK_SIZE)
754d9c9e4dbSFlorent Revest 			return strncpy_from_user_nofault(buf, user_ptr, bufsz);
755d9c9e4dbSFlorent Revest 		fallthrough;
7568afcc19fSFlorent Revest #endif
7578afcc19fSFlorent Revest 	case 'k':
758d9c9e4dbSFlorent Revest 		return strncpy_from_kernel_nofault(buf, unsafe_ptr, bufsz);
7598afcc19fSFlorent Revest 	case 'u':
760d9c9e4dbSFlorent Revest 		return strncpy_from_user_nofault(buf, user_ptr, bufsz);
761e2d5b2bbSFlorent Revest 	}
7620af02eb2SFlorent Revest 
763e2d5b2bbSFlorent Revest 	return -EINVAL;
7640af02eb2SFlorent Revest }
765d9c9e4dbSFlorent Revest 
766e2d5b2bbSFlorent Revest /* Per-cpu temp buffers used by printf-like helpers to store the bprintf binary
767e2d5b2bbSFlorent Revest  * arguments representation.
768d9c9e4dbSFlorent Revest  */
769d9c9e4dbSFlorent Revest #define MAX_BPRINTF_BIN_ARGS	512
770d9c9e4dbSFlorent Revest 
771e2d5b2bbSFlorent Revest /* Support executing three nested bprintf helper calls on a given CPU */
772e2d5b2bbSFlorent Revest #define MAX_BPRINTF_NEST_LEVEL	3
773d9c9e4dbSFlorent Revest struct bpf_bprintf_buffers {
774d9c9e4dbSFlorent Revest 	char bin_args[MAX_BPRINTF_BIN_ARGS];
775e2d5b2bbSFlorent Revest 	char buf[MAX_BPRINTF_BUF];
7760af02eb2SFlorent Revest };
777e2d5b2bbSFlorent Revest 
778d9c9e4dbSFlorent Revest static DEFINE_PER_CPU(struct bpf_bprintf_buffers[MAX_BPRINTF_NEST_LEVEL], bpf_bprintf_bufs);
779d9c9e4dbSFlorent Revest static DEFINE_PER_CPU(int, bpf_bprintf_nest_level);
780d9c9e4dbSFlorent Revest 
try_get_buffers(struct bpf_bprintf_buffers ** bufs)781e2d5b2bbSFlorent Revest static int try_get_buffers(struct bpf_bprintf_buffers **bufs)
782e2d5b2bbSFlorent Revest {
783d9c9e4dbSFlorent Revest 	int nest_level;
784d9c9e4dbSFlorent Revest 
785d9c9e4dbSFlorent Revest 	preempt_disable();
786d9c9e4dbSFlorent Revest 	nest_level = this_cpu_inc_return(bpf_bprintf_nest_level);
78748cac3f4SFlorent Revest 	if (WARN_ON_ONCE(nest_level > MAX_BPRINTF_NEST_LEVEL)) {
788d9c9e4dbSFlorent Revest 		this_cpu_dec(bpf_bprintf_nest_level);
789e2d5b2bbSFlorent Revest 		preempt_enable();
790e2d5b2bbSFlorent Revest 		return -EBUSY;
791d9c9e4dbSFlorent Revest 	}
792d9c9e4dbSFlorent Revest 	*bufs = this_cpu_ptr(&bpf_bprintf_bufs[nest_level - 1]);
793d9c9e4dbSFlorent Revest 
794d9c9e4dbSFlorent Revest 	return 0;
795d9c9e4dbSFlorent Revest }
79648cac3f4SFlorent Revest 
bpf_bprintf_cleanup(struct bpf_bprintf_data * data)797d9c9e4dbSFlorent Revest void bpf_bprintf_cleanup(struct bpf_bprintf_data *data)
798d9c9e4dbSFlorent Revest {
799d9c9e4dbSFlorent Revest 	if (!data->bin_args && !data->buf)
800d9c9e4dbSFlorent Revest 		return;
80148cac3f4SFlorent Revest 	if (WARN_ON_ONCE(this_cpu_read(bpf_bprintf_nest_level) == 0))
802d9c9e4dbSFlorent Revest 		return;
80348cac3f4SFlorent Revest 	this_cpu_dec(bpf_bprintf_nest_level);
80448cac3f4SFlorent Revest 	preempt_enable();
805d9c9e4dbSFlorent Revest }
806d9c9e4dbSFlorent Revest 
80748cac3f4SFlorent Revest /*
808d9c9e4dbSFlorent Revest  * bpf_bprintf_prepare - Generic pass on format strings for bprintf-like helpers
80948cac3f4SFlorent Revest  *
81048cac3f4SFlorent Revest  * Returns a negative value if fmt is an invalid format string or 0 otherwise.
811d9c9e4dbSFlorent Revest  *
81248cac3f4SFlorent Revest  * This can be used in two ways:
81348cac3f4SFlorent Revest  * - Format string verification only: when data->get_bin_args is false
81448cac3f4SFlorent Revest  * - Arguments preparation: in addition to the above verification, it writes in
815d9c9e4dbSFlorent Revest  *   data->bin_args a binary representation of arguments usable by bstr_printf
81648cac3f4SFlorent Revest  *   where pointers from BPF have been sanitized.
817d9c9e4dbSFlorent Revest  *
818d9c9e4dbSFlorent Revest  * In argument preparation mode, if 0 is returned, safe temporary buffers are
819d9c9e4dbSFlorent Revest  * allocated and bpf_bprintf_cleanup should be called to free them after use.
820d9c9e4dbSFlorent Revest  */
bpf_bprintf_prepare(char * fmt,u32 fmt_size,const u64 * raw_args,u32 num_args,struct bpf_bprintf_data * data)821d9c9e4dbSFlorent Revest int bpf_bprintf_prepare(char *fmt, u32 fmt_size, const u64 *raw_args,
822d9c9e4dbSFlorent Revest 			u32 num_args, struct bpf_bprintf_data *data)
82348cac3f4SFlorent Revest {
82448cac3f4SFlorent Revest 	bool get_buffers = (data->get_bin_args && num_args) || data->get_buf;
82548cac3f4SFlorent Revest 	char *unsafe_ptr = NULL, *tmp_buf = NULL, *tmp_buf_end, *fmt_end;
82648cac3f4SFlorent Revest 	struct bpf_bprintf_buffers *buffers = NULL;
8278afcc19fSFlorent Revest 	size_t sizeof_cur_arg, sizeof_cur_ip;
82848cac3f4SFlorent Revest 	int err, i, num_spec = 0;
82948cac3f4SFlorent Revest 	u64 cur_arg;
83048cac3f4SFlorent Revest 	char fmt_ptype, cur_ip[16], ip_spec[] = "%pXX";
831d9c9e4dbSFlorent Revest 
832d9c9e4dbSFlorent Revest 	fmt_end = strnchr(fmt, fmt_size, 0);
833d9c9e4dbSFlorent Revest 	if (!fmt_end)
83448cac3f4SFlorent Revest 		return -EINVAL;
835d9c9e4dbSFlorent Revest 	fmt_size = fmt_end - fmt;
836d9c9e4dbSFlorent Revest 
837d9c9e4dbSFlorent Revest 	if (get_buffers && try_get_buffers(&buffers))
838d9c9e4dbSFlorent Revest 		return -EBUSY;
839d9c9e4dbSFlorent Revest 
840d9c9e4dbSFlorent Revest 	if (data->get_bin_args) {
841d9c9e4dbSFlorent Revest 		if (num_args)
842d9c9e4dbSFlorent Revest 			tmp_buf = buffers->bin_args;
843d9c9e4dbSFlorent Revest 		tmp_buf_end = tmp_buf + MAX_BPRINTF_BIN_ARGS;
844d9c9e4dbSFlorent Revest 		data->bin_args = (u32 *)tmp_buf;
845d9c9e4dbSFlorent Revest 	}
846d9c9e4dbSFlorent Revest 
84748cac3f4SFlorent Revest 	if (data->get_buf)
848d9c9e4dbSFlorent Revest 		data->buf = buffers->buf;
849d9c9e4dbSFlorent Revest 
850d9c9e4dbSFlorent Revest 	for (i = 0; i < fmt_size; i++) {
851d9c9e4dbSFlorent Revest 		if ((!isprint(fmt[i]) && !isspace(fmt[i])) || !isascii(fmt[i])) {
852d9c9e4dbSFlorent Revest 			err = -EINVAL;
853d9c9e4dbSFlorent Revest 			goto out;
854d9c9e4dbSFlorent Revest 		}
855d9c9e4dbSFlorent Revest 
856d9c9e4dbSFlorent Revest 		if (fmt[i] != '%')
857d9c9e4dbSFlorent Revest 			continue;
858d9c9e4dbSFlorent Revest 
859d9c9e4dbSFlorent Revest 		if (fmt[i + 1] == '%') {
860d9c9e4dbSFlorent Revest 			i++;
861d9c9e4dbSFlorent Revest 			continue;
862d9c9e4dbSFlorent Revest 		}
863d9c9e4dbSFlorent Revest 
864d9c9e4dbSFlorent Revest 		if (num_spec >= num_args) {
865d9c9e4dbSFlorent Revest 			err = -EINVAL;
86648cac3f4SFlorent Revest 			goto out;
867d9c9e4dbSFlorent Revest 		}
868d9c9e4dbSFlorent Revest 
869d9c9e4dbSFlorent Revest 		/* The string is zero-terminated so if fmt[i] != 0, we can
870d9c9e4dbSFlorent Revest 		 * always access fmt[i + 1], in the worst case it will be a 0
871d9c9e4dbSFlorent Revest 		 */
872d9c9e4dbSFlorent Revest 		i++;
873d9c9e4dbSFlorent Revest 
874d9c9e4dbSFlorent Revest 		/* skip optional "[0 +-][num]" width formatting field */
875d9c9e4dbSFlorent Revest 		while (fmt[i] == '0' || fmt[i] == '+'  || fmt[i] == '-' ||
876d9c9e4dbSFlorent Revest 		       fmt[i] == ' ')
87748cac3f4SFlorent Revest 			i++;
87848cac3f4SFlorent Revest 		if (fmt[i] >= '1' && fmt[i] <= '9') {
879d9c9e4dbSFlorent Revest 			i++;
88048cac3f4SFlorent Revest 			while (fmt[i] >= '0' && fmt[i] <= '9')
881d9c9e4dbSFlorent Revest 				i++;
88248cac3f4SFlorent Revest 		}
88348cac3f4SFlorent Revest 
88448cac3f4SFlorent Revest 		if (fmt[i] == 'p') {
88548cac3f4SFlorent Revest 			sizeof_cur_arg = sizeof(long);
88648cac3f4SFlorent Revest 
88748cac3f4SFlorent Revest 			if ((fmt[i + 1] == 'k' || fmt[i + 1] == 'u') &&
88848cac3f4SFlorent Revest 			    fmt[i + 2] == 's') {
88948cac3f4SFlorent Revest 				fmt_ptype = fmt[i + 1];
89048cac3f4SFlorent Revest 				i += 2;
89148cac3f4SFlorent Revest 				goto fmt_str;
89248cac3f4SFlorent Revest 			}
89348cac3f4SFlorent Revest 
89448cac3f4SFlorent Revest 			if (fmt[i + 1] == 0 || isspace(fmt[i + 1]) ||
89548cac3f4SFlorent Revest 			    ispunct(fmt[i + 1]) || fmt[i + 1] == 'K' ||
89648cac3f4SFlorent Revest 			    fmt[i + 1] == 'x' || fmt[i + 1] == 's' ||
89748cac3f4SFlorent Revest 			    fmt[i + 1] == 'S') {
898d9c9e4dbSFlorent Revest 				/* just kernel pointers */
899d9c9e4dbSFlorent Revest 				if (tmp_buf)
900d9c9e4dbSFlorent Revest 					cur_arg = raw_args[num_spec];
901d9c9e4dbSFlorent Revest 				i++;
902d9c9e4dbSFlorent Revest 				goto nocopy_fmt;
903d9c9e4dbSFlorent Revest 			}
904d9c9e4dbSFlorent Revest 
905d9c9e4dbSFlorent Revest 			if (fmt[i + 1] == 'B') {
906d9c9e4dbSFlorent Revest 				if (tmp_buf)  {
90748cac3f4SFlorent Revest 					err = snprintf(tmp_buf,
90848cac3f4SFlorent Revest 						       (tmp_buf_end - tmp_buf),
90948cac3f4SFlorent Revest 						       "%pB",
91048cac3f4SFlorent Revest 						       (void *)(long)raw_args[num_spec]);
91148cac3f4SFlorent Revest 					tmp_buf += (err + 1);
91248cac3f4SFlorent Revest 				}
913d9c9e4dbSFlorent Revest 
91448cac3f4SFlorent Revest 				i++;
915d9c9e4dbSFlorent Revest 				num_spec++;
916d9c9e4dbSFlorent Revest 				continue;
917d9c9e4dbSFlorent Revest 			}
91848cac3f4SFlorent Revest 
91948cac3f4SFlorent Revest 			/* only support "%pI4", "%pi4", "%pI6" and "%pi6". */
920d9c9e4dbSFlorent Revest 			if ((fmt[i + 1] != 'i' && fmt[i + 1] != 'I') ||
92148cac3f4SFlorent Revest 			    (fmt[i + 2] != '4' && fmt[i + 2] != '6')) {
922d9c9e4dbSFlorent Revest 				err = -EINVAL;
92348cac3f4SFlorent Revest 				goto out;
92448cac3f4SFlorent Revest 			}
92548cac3f4SFlorent Revest 
92648cac3f4SFlorent Revest 			i += 2;
92748cac3f4SFlorent Revest 			if (!tmp_buf)
92848cac3f4SFlorent Revest 				goto nocopy_fmt;
92948cac3f4SFlorent Revest 
93048cac3f4SFlorent Revest 			sizeof_cur_ip = (fmt[i] == '4') ? 4 : 16;
93148cac3f4SFlorent Revest 			if (tmp_buf_end - tmp_buf < sizeof_cur_ip) {
93248cac3f4SFlorent Revest 				err = -ENOSPC;
93348cac3f4SFlorent Revest 				goto out;
93448cac3f4SFlorent Revest 			}
93548cac3f4SFlorent Revest 
936d9c9e4dbSFlorent Revest 			unsafe_ptr = (char *)(long)raw_args[num_spec];
937d9c9e4dbSFlorent Revest 			err = copy_from_kernel_nofault(cur_ip, unsafe_ptr,
938d9c9e4dbSFlorent Revest 						       sizeof_cur_ip);
939d9c9e4dbSFlorent Revest 			if (err < 0)
940d9c9e4dbSFlorent Revest 				memset(cur_ip, 0, sizeof_cur_ip);
941d9c9e4dbSFlorent Revest 
942d9c9e4dbSFlorent Revest 			/* hack: bstr_printf expects IP addresses to be
943d9c9e4dbSFlorent Revest 			 * pre-formatted as strings, ironically, the easiest way
944d9c9e4dbSFlorent Revest 			 * to do that is to call snprintf.
945d9c9e4dbSFlorent Revest 			 */
94648cac3f4SFlorent Revest 			ip_spec[2] = fmt[i - 1];
94748cac3f4SFlorent Revest 			ip_spec[3] = fmt[i];
94848cac3f4SFlorent Revest 			err = snprintf(tmp_buf, tmp_buf_end - tmp_buf,
94948cac3f4SFlorent Revest 				       ip_spec, &cur_ip);
950d9c9e4dbSFlorent Revest 
95148cac3f4SFlorent Revest 			tmp_buf += err + 1;
952d9c9e4dbSFlorent Revest 			num_spec++;
953d9c9e4dbSFlorent Revest 
954d9c9e4dbSFlorent Revest 			continue;
955d9c9e4dbSFlorent Revest 		} else if (fmt[i] == 's') {
95648cac3f4SFlorent Revest 			fmt_ptype = fmt[i];
95748cac3f4SFlorent Revest fmt_str:
958d9c9e4dbSFlorent Revest 			if (fmt[i + 1] != 0 &&
959d9c9e4dbSFlorent Revest 			    !isspace(fmt[i + 1]) &&
960d9c9e4dbSFlorent Revest 			    !ispunct(fmt[i + 1])) {
961d9c9e4dbSFlorent Revest 				err = -EINVAL;
962d9c9e4dbSFlorent Revest 				goto out;
963d9c9e4dbSFlorent Revest 			}
96448cac3f4SFlorent Revest 
965d9c9e4dbSFlorent Revest 			if (!tmp_buf)
96648cac3f4SFlorent Revest 				goto nocopy_fmt;
9673478cfcfSKuniyuki Iwashima 
9683478cfcfSKuniyuki Iwashima 			if (tmp_buf_end == tmp_buf) {
9693478cfcfSKuniyuki Iwashima 				err = -ENOSPC;
9703478cfcfSKuniyuki Iwashima 				goto out;
9713478cfcfSKuniyuki Iwashima 			}
9723478cfcfSKuniyuki Iwashima 
9733478cfcfSKuniyuki Iwashima 			unsafe_ptr = (char *)(long)raw_args[num_spec];
9743478cfcfSKuniyuki Iwashima 			err = bpf_trace_copy_string(tmp_buf, unsafe_ptr,
9753478cfcfSKuniyuki Iwashima 						    fmt_ptype,
9763478cfcfSKuniyuki Iwashima 						    tmp_buf_end - tmp_buf);
9773478cfcfSKuniyuki Iwashima 			if (err < 0) {
9783478cfcfSKuniyuki Iwashima 				tmp_buf[0] = '\0';
9793478cfcfSKuniyuki Iwashima 				err = 1;
9803478cfcfSKuniyuki Iwashima 			}
981d9c9e4dbSFlorent Revest 
982d9c9e4dbSFlorent Revest 			tmp_buf += err;
98348cac3f4SFlorent Revest 			num_spec++;
984d9c9e4dbSFlorent Revest 
985d9c9e4dbSFlorent Revest 			continue;
98648cac3f4SFlorent Revest 		} else if (fmt[i] == 'c') {
987d9c9e4dbSFlorent Revest 			if (!tmp_buf)
988d9c9e4dbSFlorent Revest 				goto nocopy_fmt;
989d9c9e4dbSFlorent Revest 
99048cac3f4SFlorent Revest 			if (tmp_buf_end == tmp_buf) {
991d9c9e4dbSFlorent Revest 				err = -ENOSPC;
992d9c9e4dbSFlorent Revest 				goto out;
993d9c9e4dbSFlorent Revest 			}
994d9c9e4dbSFlorent Revest 
995d9c9e4dbSFlorent Revest 			*tmp_buf = raw_args[num_spec];
996d9c9e4dbSFlorent Revest 			tmp_buf++;
99748cac3f4SFlorent Revest 			num_spec++;
998d9c9e4dbSFlorent Revest 
999d9c9e4dbSFlorent Revest 			continue;
100048cac3f4SFlorent Revest 		}
1001d9c9e4dbSFlorent Revest 
100248cac3f4SFlorent Revest 		sizeof_cur_arg = sizeof(int);
100348cac3f4SFlorent Revest 
100448cac3f4SFlorent Revest 		if (fmt[i] == 'l') {
100548cac3f4SFlorent Revest 			sizeof_cur_arg = sizeof(long);
100648cac3f4SFlorent Revest 			i++;
100748cac3f4SFlorent Revest 		}
100848cac3f4SFlorent Revest 		if (fmt[i] == 'l') {
100948cac3f4SFlorent Revest 			sizeof_cur_arg = sizeof(long long);
101048cac3f4SFlorent Revest 			i++;
101148cac3f4SFlorent Revest 		}
101248cac3f4SFlorent Revest 
101348cac3f4SFlorent Revest 		if (fmt[i] != 'i' && fmt[i] != 'd' && fmt[i] != 'u' &&
101448cac3f4SFlorent Revest 		    fmt[i] != 'x' && fmt[i] != 'X') {
101548cac3f4SFlorent Revest 			err = -EINVAL;
101648cac3f4SFlorent Revest 			goto out;
1017d9c9e4dbSFlorent Revest 		}
1018d9c9e4dbSFlorent Revest 
1019d9c9e4dbSFlorent Revest 		if (tmp_buf)
1020d9c9e4dbSFlorent Revest 			cur_arg = raw_args[num_spec];
1021d9c9e4dbSFlorent Revest nocopy_fmt:
1022d9c9e4dbSFlorent Revest 		if (tmp_buf) {
102348cac3f4SFlorent Revest 			tmp_buf = PTR_ALIGN(tmp_buf, sizeof(u32));
102448cac3f4SFlorent Revest 			if (tmp_buf_end - tmp_buf < sizeof_cur_arg) {
1025d9c9e4dbSFlorent Revest 				err = -ENOSPC;
1026d9c9e4dbSFlorent Revest 				goto out;
1027d9c9e4dbSFlorent Revest 			}
10287b15523aSFlorent Revest 
10297b15523aSFlorent Revest 			if (sizeof_cur_arg == 8) {
10307b15523aSFlorent Revest 				*(u32 *)tmp_buf = *(u32 *)&cur_arg;
10317b15523aSFlorent Revest 				*(u32 *)(tmp_buf + 4) = *((u32 *)&cur_arg + 1);
103248cac3f4SFlorent Revest 			} else {
10337b15523aSFlorent Revest 				*(u32 *)tmp_buf = (u32)(long)cur_arg;
1034335ff499SDave Marchevsky 			}
10357b15523aSFlorent Revest 			tmp_buf += sizeof_cur_arg;
10367b15523aSFlorent Revest 		}
10377b15523aSFlorent Revest 		num_spec++;
10387b15523aSFlorent Revest 	}
10397b15523aSFlorent Revest 
10407b15523aSFlorent Revest 	err = 0;
10417b15523aSFlorent Revest out:
104248cac3f4SFlorent Revest 	if (err)
10437b15523aSFlorent Revest 		bpf_bprintf_cleanup(data);
10447b15523aSFlorent Revest 	return err;
10457b15523aSFlorent Revest }
104648cac3f4SFlorent Revest 
BPF_CALL_5(bpf_snprintf,char *,str,u32,str_size,char *,fmt,const void *,args,u32,data_len)10477b15523aSFlorent Revest BPF_CALL_5(bpf_snprintf, char *, str, u32, str_size, char *, fmt,
104848cac3f4SFlorent Revest 	   const void *, args, u32, data_len)
10497b15523aSFlorent Revest {
10507b15523aSFlorent Revest 	struct bpf_bprintf_data data = {
10517b15523aSFlorent Revest 		.get_bin_args	= true,
10527b15523aSFlorent Revest 	};
10537b15523aSFlorent Revest 	int err, num_args;
10547b15523aSFlorent Revest 
10557b15523aSFlorent Revest 	if (data_len % 8 || data_len > MAX_BPRINTF_VARARGS * 8 ||
10567b15523aSFlorent Revest 	    (data_len && !args))
10577b15523aSFlorent Revest 		return -EINVAL;
10587b15523aSFlorent Revest 	num_args = data_len / 8;
10597b15523aSFlorent Revest 
1060216e3cd2SHao Luo 	/* ARG_PTR_TO_CONST_STR guarantees that fmt is zero-terminated so we
10617b15523aSFlorent Revest 	 * can safely give an unbounded size.
10627b15523aSFlorent Revest 	 */
10637b15523aSFlorent Revest 	err = bpf_bprintf_prepare(fmt, UINT_MAX, args, num_args, &data);
1064b00628b1SAlexei Starovoitov 	if (err < 0)
1065b00628b1SAlexei Starovoitov 		return err;
1066b00628b1SAlexei Starovoitov 
1067b00628b1SAlexei Starovoitov 	err = bstr_printf(str, str_size, fmt, data.bin_args);
1068b00628b1SAlexei Starovoitov 
1069b00628b1SAlexei Starovoitov 	bpf_bprintf_cleanup(&data);
1070b00628b1SAlexei Starovoitov 
1071b00628b1SAlexei Starovoitov 	return err + 1;
1072b00628b1SAlexei Starovoitov }
1073b00628b1SAlexei Starovoitov 
1074b00628b1SAlexei Starovoitov const struct bpf_func_proto bpf_snprintf_proto = {
1075b00628b1SAlexei Starovoitov 	.func		= bpf_snprintf,
1076b00628b1SAlexei Starovoitov 	.gpl_only	= true,
1077b00628b1SAlexei Starovoitov 	.ret_type	= RET_INTEGER,
1078b00628b1SAlexei Starovoitov 	.arg1_type	= ARG_PTR_TO_MEM_OR_NULL,
1079b00628b1SAlexei Starovoitov 	.arg2_type	= ARG_CONST_SIZE_OR_ZERO,
1080b00628b1SAlexei Starovoitov 	.arg3_type	= ARG_PTR_TO_CONST_STR,
1081b00628b1SAlexei Starovoitov 	.arg4_type	= ARG_PTR_TO_MEM | PTR_MAYBE_NULL | MEM_RDONLY,
1082b00628b1SAlexei Starovoitov 	.arg5_type	= ARG_CONST_SIZE_OR_ZERO,
1083b00628b1SAlexei Starovoitov };
1084b00628b1SAlexei Starovoitov 
1085b00628b1SAlexei Starovoitov struct bpf_async_cb {
1086b00628b1SAlexei Starovoitov 	struct bpf_map *map;
1087b00628b1SAlexei Starovoitov 	struct bpf_prog *prog;
1088b00628b1SAlexei Starovoitov 	void __rcu *callback_fn;
1089b00628b1SAlexei Starovoitov 	void *value;
1090b00628b1SAlexei Starovoitov 	struct rcu_head rcu;
1091b00628b1SAlexei Starovoitov 	u64 flags;
1092c561d110STom Rix };
1093b00628b1SAlexei Starovoitov 
1094b00628b1SAlexei Starovoitov /* BPF map elements can contain 'struct bpf_timer'.
1095b00628b1SAlexei Starovoitov  * Such map owns all of its BPF timers.
1096b00628b1SAlexei Starovoitov  * 'struct bpf_timer' is allocated as part of map element allocation
1097b00628b1SAlexei Starovoitov  * and it's zero initialized.
1098b00628b1SAlexei Starovoitov  * That space is used to keep 'struct bpf_async_kern'.
1099b00628b1SAlexei Starovoitov  * bpf_timer_init() allocates 'struct bpf_hrtimer', inits hrtimer, and
1100b00628b1SAlexei Starovoitov  * remembers 'struct bpf_map *' pointer it's part of.
1101b00628b1SAlexei Starovoitov  * bpf_timer_set_callback() increments prog refcnt and assign bpf callback_fn.
1102b00628b1SAlexei Starovoitov  * bpf_timer_start() arms the timer.
1103b00628b1SAlexei Starovoitov  * If user space reference to a map goes to zero at this point
1104b00628b1SAlexei Starovoitov  * ops->map_release_uref callback is responsible for cancelling the timers,
1105102acbacSKees Cook  * freeing their memory, and decrementing prog's refcnts.
1106b00628b1SAlexei Starovoitov  * bpf_timer_cancel() cancels the timer and decrements prog's refcnt.
1107b00628b1SAlexei Starovoitov  * Inner maps can contain bpf timers as well. ops->map_release_uref is
1108b00628b1SAlexei Starovoitov  * freeing the timers when inner map is replaced or deleted by user space.
11093bd916eeSYonghong Song  */
1110b00628b1SAlexei Starovoitov struct bpf_hrtimer {
1111b00628b1SAlexei Starovoitov 	struct bpf_async_cb cb;
1112b00628b1SAlexei Starovoitov 	struct hrtimer timer;
1113b00628b1SAlexei Starovoitov 	atomic_t cancelling;
1114b00628b1SAlexei Starovoitov };
1115b00628b1SAlexei Starovoitov 
1116b00628b1SAlexei Starovoitov /* the actual struct hidden inside uapi struct bpf_timer */
1117b00628b1SAlexei Starovoitov struct bpf_async_kern {
1118b00628b1SAlexei Starovoitov 	union {
1119b00628b1SAlexei Starovoitov 		struct bpf_async_cb *cb;
1120b00628b1SAlexei Starovoitov 		struct bpf_hrtimer *timer;
1121b00628b1SAlexei Starovoitov 	};
1122b00628b1SAlexei Starovoitov 	/* bpf_spin_lock is used here instead of spinlock_t to make
1123b00628b1SAlexei Starovoitov 	 * sure that it always fits into space reserved by struct bpf_timer
1124b00628b1SAlexei Starovoitov 	 * regardless of LOCKDEP and spinlock debug flags.
1125b00628b1SAlexei Starovoitov 	 */
1126b00628b1SAlexei Starovoitov 	struct bpf_spin_lock lock;
1127b00628b1SAlexei Starovoitov } __attribute__((aligned(8)));
1128b00628b1SAlexei Starovoitov 
1129b00628b1SAlexei Starovoitov enum bpf_async_type {
1130b00628b1SAlexei Starovoitov 	BPF_ASYNC_TYPE_TIMER = 0,
1131102acbacSKees Cook };
1132bfc6bb74SAlexei Starovoitov 
1133b00628b1SAlexei Starovoitov static DEFINE_PER_CPU(struct bpf_hrtimer *, hrtimer_running);
1134b00628b1SAlexei Starovoitov 
bpf_timer_cb(struct hrtimer * hrtimer)1135b00628b1SAlexei Starovoitov static enum hrtimer_restart bpf_timer_cb(struct hrtimer *hrtimer)
1136b00628b1SAlexei Starovoitov {
1137b00628b1SAlexei Starovoitov 	struct bpf_hrtimer *t = container_of(hrtimer, struct bpf_hrtimer, timer);
1138b00628b1SAlexei Starovoitov 	struct bpf_map *map = t->cb.map;
1139b00628b1SAlexei Starovoitov 	void *value = t->cb.value;
1140b00628b1SAlexei Starovoitov 	bpf_callback_t callback_fn;
1141b00628b1SAlexei Starovoitov 	void *key;
1142b00628b1SAlexei Starovoitov 	u32 idx;
1143b00628b1SAlexei Starovoitov 
1144b00628b1SAlexei Starovoitov 	BTF_TYPE_EMIT(struct bpf_timer);
1145b00628b1SAlexei Starovoitov 	callback_fn = rcu_dereference_check(t->cb.callback_fn, rcu_read_lock_bh_held());
1146b00628b1SAlexei Starovoitov 	if (!callback_fn)
1147b00628b1SAlexei Starovoitov 		goto out;
1148b00628b1SAlexei Starovoitov 
1149b00628b1SAlexei Starovoitov 	/* bpf_timer_cb() runs in hrtimer_run_softirq. It doesn't migrate and
1150b00628b1SAlexei Starovoitov 	 * cannot be preempted by another bpf_timer_cb() on the same cpu.
1151b00628b1SAlexei Starovoitov 	 * Remember the timer this callback is servicing to prevent
1152b00628b1SAlexei Starovoitov 	 * deadlock if callback_fn() calls bpf_timer_cancel() or
1153b00628b1SAlexei Starovoitov 	 * bpf_map_delete_elem() on the same timer.
1154b00628b1SAlexei Starovoitov 	 */
1155b00628b1SAlexei Starovoitov 	this_cpu_write(hrtimer_running, t);
1156b00628b1SAlexei Starovoitov 	if (map->map_type == BPF_MAP_TYPE_ARRAY) {
1157b00628b1SAlexei Starovoitov 		struct bpf_array *array = container_of(map, struct bpf_array, map);
1158b00628b1SAlexei Starovoitov 
1159b00628b1SAlexei Starovoitov 		/* compute the key */
1160b00628b1SAlexei Starovoitov 		idx = ((char *)value - array->value) / array->elem_size;
1161b00628b1SAlexei Starovoitov 		key = &idx;
1162b00628b1SAlexei Starovoitov 	} else { /* hash or lru */
1163b00628b1SAlexei Starovoitov 		key = value - round_up(map->key_size, 8);
1164b00628b1SAlexei Starovoitov 	}
1165b00628b1SAlexei Starovoitov 
1166b00628b1SAlexei Starovoitov 	callback_fn((u64)(long)map, (u64)(long)key, (u64)(long)value, 0, 0);
1167b00628b1SAlexei Starovoitov 	/* The verifier checked that return value is zero. */
1168b00628b1SAlexei Starovoitov 
1169b00628b1SAlexei Starovoitov 	this_cpu_write(hrtimer_running, NULL);
1170b00628b1SAlexei Starovoitov out:
1171b00628b1SAlexei Starovoitov 	return HRTIMER_NORESTART;
1172b00628b1SAlexei Starovoitov }
1173b00628b1SAlexei Starovoitov 
__bpf_async_init(struct bpf_async_kern * async,struct bpf_map * map,u64 flags,enum bpf_async_type type)1174b00628b1SAlexei Starovoitov static int __bpf_async_init(struct bpf_async_kern *async, struct bpf_map *map, u64 flags,
1175b00628b1SAlexei Starovoitov 			    enum bpf_async_type type)
1176b00628b1SAlexei Starovoitov {
1177b00628b1SAlexei Starovoitov 	struct bpf_async_cb *cb;
1178db559117SKumar Kartikeya Dwivedi 	struct bpf_hrtimer *t;
1179b00628b1SAlexei Starovoitov 	clockid_t clockid;
1180b00628b1SAlexei Starovoitov 	size_t size;
1181b00628b1SAlexei Starovoitov 	int ret = 0;
1182b00628b1SAlexei Starovoitov 
1183b00628b1SAlexei Starovoitov 	if (in_nmi())
1184b00628b1SAlexei Starovoitov 		return -EOPNOTSUPP;
1185b00628b1SAlexei Starovoitov 
1186b00628b1SAlexei Starovoitov 	switch (type) {
1187b00628b1SAlexei Starovoitov 	case BPF_ASYNC_TYPE_TIMER:
1188b00628b1SAlexei Starovoitov 		size = sizeof(struct bpf_hrtimer);
1189b00628b1SAlexei Starovoitov 		break;
1190b00628b1SAlexei Starovoitov 	default:
1191b00628b1SAlexei Starovoitov 		return -EINVAL;
1192b00628b1SAlexei Starovoitov 	}
1193b00628b1SAlexei Starovoitov 
1194b00628b1SAlexei Starovoitov 	__bpf_spin_lock_irqsave(&async->lock);
1195b00628b1SAlexei Starovoitov 	t = async->timer;
1196b00628b1SAlexei Starovoitov 	if (t) {
1197b00628b1SAlexei Starovoitov 		ret = -EBUSY;
1198b00628b1SAlexei Starovoitov 		goto out;
1199b00628b1SAlexei Starovoitov 	}
1200b00628b1SAlexei Starovoitov 
1201b00628b1SAlexei Starovoitov 	/* allocate hrtimer via map_kmalloc to use memcg accounting */
1202b00628b1SAlexei Starovoitov 	cb = bpf_map_kmalloc_node(map, size, GFP_ATOMIC, map->numa_node);
1203b00628b1SAlexei Starovoitov 	if (!cb) {
1204b00628b1SAlexei Starovoitov 		ret = -ENOMEM;
1205b00628b1SAlexei Starovoitov 		goto out;
1206b00628b1SAlexei Starovoitov 	}
1207b00628b1SAlexei Starovoitov 
1208b00628b1SAlexei Starovoitov 	if (type == BPF_ASYNC_TYPE_TIMER) {
1209b00628b1SAlexei Starovoitov 		clockid = flags & (MAX_CLOCKS - 1);
1210b00628b1SAlexei Starovoitov 		t = (struct bpf_hrtimer *)cb;
1211b00628b1SAlexei Starovoitov 
1212b00628b1SAlexei Starovoitov 		atomic_set(&t->cancelling, 0);
1213b00628b1SAlexei Starovoitov 		hrtimer_init(&t->timer, clockid, HRTIMER_MODE_REL_SOFT);
1214b00628b1SAlexei Starovoitov 		t->timer.function = bpf_timer_cb;
1215b00628b1SAlexei Starovoitov 		cb->value = (void *)async - map->record->timer_off;
1216b00628b1SAlexei Starovoitov 	}
1217b00628b1SAlexei Starovoitov 	cb->map = map;
1218b00628b1SAlexei Starovoitov 	cb->prog = NULL;
1219b00628b1SAlexei Starovoitov 	cb->flags = flags;
1220b00628b1SAlexei Starovoitov 	rcu_assign_pointer(cb->callback_fn, NULL);
1221b00628b1SAlexei Starovoitov 
1222b00628b1SAlexei Starovoitov 	WRITE_ONCE(async->cb, cb);
1223b00628b1SAlexei Starovoitov 	/* Guarantee the order between async->cb and map->usercnt. So
1224b00628b1SAlexei Starovoitov 	 * when there are concurrent uref release and bpf timer init, either
1225b00628b1SAlexei Starovoitov 	 * bpf_timer_cancel_and_free() called by uref release reads a no-NULL
1226b00628b1SAlexei Starovoitov 	 * timer or atomic64_read() below returns a zero usercnt.
1227b00628b1SAlexei Starovoitov 	 */
1228b00628b1SAlexei Starovoitov 	smp_mb();
1229b00628b1SAlexei Starovoitov 	if (!atomic64_read(&map->usercnt)) {
1230b00628b1SAlexei Starovoitov 		/* maps with timers must be either held by user space
1231b00628b1SAlexei Starovoitov 		 * or pinned in bpffs.
1232b00628b1SAlexei Starovoitov 		 */
1233b00628b1SAlexei Starovoitov 		WRITE_ONCE(async->cb, NULL);
1234b00628b1SAlexei Starovoitov 		kfree(cb);
1235b00628b1SAlexei Starovoitov 		ret = -EPERM;
1236b00628b1SAlexei Starovoitov 	}
1237b00628b1SAlexei Starovoitov out:
1238b00628b1SAlexei Starovoitov 	__bpf_spin_unlock_irqrestore(&async->lock);
1239b00628b1SAlexei Starovoitov 	return ret;
1240b00628b1SAlexei Starovoitov }
1241b00628b1SAlexei Starovoitov 
BPF_CALL_3(bpf_timer_init,struct bpf_async_kern *,timer,struct bpf_map *,map,u64,flags)1242b00628b1SAlexei Starovoitov BPF_CALL_3(bpf_timer_init, struct bpf_async_kern *, timer, struct bpf_map *, map,
1243b00628b1SAlexei Starovoitov 	   u64, flags)
1244b00628b1SAlexei Starovoitov {
1245b00628b1SAlexei Starovoitov 	clock_t clockid = flags & (MAX_CLOCKS - 1);
1246b00628b1SAlexei Starovoitov 
1247b00628b1SAlexei Starovoitov 	BUILD_BUG_ON(MAX_CLOCKS != 16);
1248b00628b1SAlexei Starovoitov 	BUILD_BUG_ON(sizeof(struct bpf_async_kern) > sizeof(struct bpf_timer));
1249b00628b1SAlexei Starovoitov 	BUILD_BUG_ON(__alignof__(struct bpf_async_kern) != __alignof__(struct bpf_timer));
1250b00628b1SAlexei Starovoitov 
1251b00628b1SAlexei Starovoitov 	if (flags >= MAX_CLOCKS ||
1252b00628b1SAlexei Starovoitov 	    /* similar to timerfd except _ALARM variants are not supported */
1253b00628b1SAlexei Starovoitov 	    (clockid != CLOCK_MONOTONIC &&
1254b00628b1SAlexei Starovoitov 	     clockid != CLOCK_REALTIME &&
1255b00628b1SAlexei Starovoitov 	     clockid != CLOCK_BOOTTIME))
1256b00628b1SAlexei Starovoitov 		return -EINVAL;
1257b00628b1SAlexei Starovoitov 
1258b00628b1SAlexei Starovoitov 	return __bpf_async_init(timer, map, flags, BPF_ASYNC_TYPE_TIMER);
1259b00628b1SAlexei Starovoitov }
1260b00628b1SAlexei Starovoitov 
1261b00628b1SAlexei Starovoitov static const struct bpf_func_proto bpf_timer_init_proto = {
1262b00628b1SAlexei Starovoitov 	.func		= bpf_timer_init,
1263b00628b1SAlexei Starovoitov 	.gpl_only	= true,
1264b00628b1SAlexei Starovoitov 	.ret_type	= RET_INTEGER,
1265b00628b1SAlexei Starovoitov 	.arg1_type	= ARG_PTR_TO_TIMER,
1266b00628b1SAlexei Starovoitov 	.arg2_type	= ARG_CONST_MAP_PTR,
1267b00628b1SAlexei Starovoitov 	.arg3_type	= ARG_ANYTHING,
1268b00628b1SAlexei Starovoitov };
1269b00628b1SAlexei Starovoitov 
BPF_CALL_3(bpf_timer_set_callback,struct bpf_async_kern *,timer,void *,callback_fn,struct bpf_prog_aux *,aux)1270b00628b1SAlexei Starovoitov BPF_CALL_3(bpf_timer_set_callback, struct bpf_async_kern *, timer, void *, callback_fn,
1271b00628b1SAlexei Starovoitov 	   struct bpf_prog_aux *, aux)
1272b00628b1SAlexei Starovoitov {
1273b00628b1SAlexei Starovoitov 	struct bpf_prog *prev, *prog = aux->prog;
1274b00628b1SAlexei Starovoitov 	struct bpf_hrtimer *t;
1275b00628b1SAlexei Starovoitov 	int ret = 0;
1276b00628b1SAlexei Starovoitov 
1277b00628b1SAlexei Starovoitov 	if (in_nmi())
1278b00628b1SAlexei Starovoitov 		return -EOPNOTSUPP;
1279b00628b1SAlexei Starovoitov 	__bpf_spin_lock_irqsave(&timer->lock);
1280b00628b1SAlexei Starovoitov 	t = timer->timer;
1281b00628b1SAlexei Starovoitov 	if (!t) {
1282b00628b1SAlexei Starovoitov 		ret = -EINVAL;
1283b00628b1SAlexei Starovoitov 		goto out;
1284b00628b1SAlexei Starovoitov 	}
1285b00628b1SAlexei Starovoitov 	if (!atomic64_read(&t->cb.map->usercnt)) {
1286b00628b1SAlexei Starovoitov 		/* maps with timers must be either held by user space
1287b00628b1SAlexei Starovoitov 		 * or pinned in bpffs. Otherwise timer might still be
1288b00628b1SAlexei Starovoitov 		 * running even when bpf prog is detached and user space
1289b00628b1SAlexei Starovoitov 		 * is gone, since map_release_uref won't ever be called.
1290b00628b1SAlexei Starovoitov 		 */
1291b00628b1SAlexei Starovoitov 		ret = -EPERM;
1292b00628b1SAlexei Starovoitov 		goto out;
1293b00628b1SAlexei Starovoitov 	}
1294b00628b1SAlexei Starovoitov 	prev = t->cb.prog;
1295b00628b1SAlexei Starovoitov 	if (prev != prog) {
1296b00628b1SAlexei Starovoitov 		/* Bump prog refcnt once. Every bpf_timer_set_callback()
1297b00628b1SAlexei Starovoitov 		 * can pick different callback_fn-s within the same prog.
1298b00628b1SAlexei Starovoitov 		 */
1299b00628b1SAlexei Starovoitov 		prog = bpf_prog_inc_not_zero(prog);
1300b00628b1SAlexei Starovoitov 		if (IS_ERR(prog)) {
1301b00628b1SAlexei Starovoitov 			ret = PTR_ERR(prog);
1302b00628b1SAlexei Starovoitov 			goto out;
1303b00628b1SAlexei Starovoitov 		}
1304b00628b1SAlexei Starovoitov 		if (prev)
1305b00628b1SAlexei Starovoitov 			/* Drop prev prog refcnt when swapping with new prog */
1306b00628b1SAlexei Starovoitov 			bpf_prog_put(prev);
1307b00628b1SAlexei Starovoitov 		t->cb.prog = prog;
1308b00628b1SAlexei Starovoitov 	}
1309b00628b1SAlexei Starovoitov 	rcu_assign_pointer(t->cb.callback_fn, callback_fn);
1310b00628b1SAlexei Starovoitov out:
1311b00628b1SAlexei Starovoitov 	__bpf_spin_unlock_irqrestore(&timer->lock);
1312b00628b1SAlexei Starovoitov 	return ret;
1313b00628b1SAlexei Starovoitov }
1314b00628b1SAlexei Starovoitov 
1315b00628b1SAlexei Starovoitov static const struct bpf_func_proto bpf_timer_set_callback_proto = {
1316b00628b1SAlexei Starovoitov 	.func		= bpf_timer_set_callback,
1317b00628b1SAlexei Starovoitov 	.gpl_only	= true,
1318b00628b1SAlexei Starovoitov 	.ret_type	= RET_INTEGER,
1319b00628b1SAlexei Starovoitov 	.arg1_type	= ARG_PTR_TO_TIMER,
1320b00628b1SAlexei Starovoitov 	.arg2_type	= ARG_PTR_TO_FUNC,
1321b00628b1SAlexei Starovoitov };
1322b00628b1SAlexei Starovoitov 
BPF_CALL_3(bpf_timer_start,struct bpf_async_kern *,timer,u64,nsecs,u64,flags)1323b00628b1SAlexei Starovoitov BPF_CALL_3(bpf_timer_start, struct bpf_async_kern *, timer, u64, nsecs, u64, flags)
1324b00628b1SAlexei Starovoitov {
1325b00628b1SAlexei Starovoitov 	struct bpf_hrtimer *t;
1326b00628b1SAlexei Starovoitov 	int ret = 0;
1327b00628b1SAlexei Starovoitov 	enum hrtimer_mode mode;
1328b00628b1SAlexei Starovoitov 
1329b00628b1SAlexei Starovoitov 	if (in_nmi())
1330b00628b1SAlexei Starovoitov 		return -EOPNOTSUPP;
1331b00628b1SAlexei Starovoitov 	if (flags > BPF_F_TIMER_ABS)
1332b00628b1SAlexei Starovoitov 		return -EINVAL;
1333b00628b1SAlexei Starovoitov 	__bpf_spin_lock_irqsave(&timer->lock);
1334b00628b1SAlexei Starovoitov 	t = timer->timer;
1335b00628b1SAlexei Starovoitov 	if (!t || !t->cb.prog) {
1336b00628b1SAlexei Starovoitov 		ret = -EINVAL;
1337b00628b1SAlexei Starovoitov 		goto out;
1338b00628b1SAlexei Starovoitov 	}
1339b00628b1SAlexei Starovoitov 
1340b00628b1SAlexei Starovoitov 	if (flags & BPF_F_TIMER_ABS)
1341b00628b1SAlexei Starovoitov 		mode = HRTIMER_MODE_ABS_SOFT;
1342b00628b1SAlexei Starovoitov 	else
1343b00628b1SAlexei Starovoitov 		mode = HRTIMER_MODE_REL_SOFT;
1344b00628b1SAlexei Starovoitov 
1345b00628b1SAlexei Starovoitov 	hrtimer_start(&t->timer, ns_to_ktime(nsecs), mode);
1346b00628b1SAlexei Starovoitov out:
1347b00628b1SAlexei Starovoitov 	__bpf_spin_unlock_irqrestore(&timer->lock);
1348b00628b1SAlexei Starovoitov 	return ret;
1349b00628b1SAlexei Starovoitov }
1350b00628b1SAlexei Starovoitov 
1351b00628b1SAlexei Starovoitov static const struct bpf_func_proto bpf_timer_start_proto = {
1352b00628b1SAlexei Starovoitov 	.func		= bpf_timer_start,
1353b00628b1SAlexei Starovoitov 	.gpl_only	= true,
1354b00628b1SAlexei Starovoitov 	.ret_type	= RET_INTEGER,
1355b00628b1SAlexei Starovoitov 	.arg1_type	= ARG_PTR_TO_TIMER,
1356b00628b1SAlexei Starovoitov 	.arg2_type	= ARG_ANYTHING,
1357b00628b1SAlexei Starovoitov 	.arg3_type	= ARG_ANYTHING,
1358b00628b1SAlexei Starovoitov };
1359b00628b1SAlexei Starovoitov 
drop_prog_refcnt(struct bpf_async_cb * async)1360b00628b1SAlexei Starovoitov static void drop_prog_refcnt(struct bpf_async_cb *async)
1361b00628b1SAlexei Starovoitov {
1362b00628b1SAlexei Starovoitov 	struct bpf_prog *prog = async->prog;
1363b00628b1SAlexei Starovoitov 
1364b00628b1SAlexei Starovoitov 	if (prog) {
1365b00628b1SAlexei Starovoitov 		bpf_prog_put(prog);
1366b00628b1SAlexei Starovoitov 		async->prog = NULL;
1367b00628b1SAlexei Starovoitov 		rcu_assign_pointer(async->callback_fn, NULL);
1368b00628b1SAlexei Starovoitov 	}
1369b00628b1SAlexei Starovoitov }
1370b00628b1SAlexei Starovoitov 
BPF_CALL_1(bpf_timer_cancel,struct bpf_async_kern *,timer)1371b00628b1SAlexei Starovoitov BPF_CALL_1(bpf_timer_cancel, struct bpf_async_kern *, timer)
1372b00628b1SAlexei Starovoitov {
1373b00628b1SAlexei Starovoitov 	struct bpf_hrtimer *t, *cur_t;
1374b00628b1SAlexei Starovoitov 	bool inc = false;
1375b00628b1SAlexei Starovoitov 	int ret = 0;
1376b00628b1SAlexei Starovoitov 
1377b00628b1SAlexei Starovoitov 	if (in_nmi())
1378c0a5a21cSKumar Kartikeya Dwivedi 		return -EOPNOTSUPP;
1379c0a5a21cSKumar Kartikeya Dwivedi 	rcu_read_lock();
1380c0a5a21cSKumar Kartikeya Dwivedi 	__bpf_spin_lock_irqsave(&timer->lock);
1381c0a5a21cSKumar Kartikeya Dwivedi 	t = timer->timer;
1382c0a5a21cSKumar Kartikeya Dwivedi 	if (!t) {
1383c0a5a21cSKumar Kartikeya Dwivedi 		ret = -EINVAL;
1384c0a5a21cSKumar Kartikeya Dwivedi 		goto out;
1385c0a5a21cSKumar Kartikeya Dwivedi 	}
138647e34cb7SDave Marchevsky 
138747e34cb7SDave Marchevsky 	cur_t = this_cpu_read(hrtimer_running);
1388c0a5a21cSKumar Kartikeya Dwivedi 	if (cur_t == t) {
1389dc368e1cSJoanne Koong 		/* If bpf callback_fn is trying to bpf_timer_cancel()
1390c0a5a21cSKumar Kartikeya Dwivedi 		 * its own timer the hrtimer_cancel() will deadlock
1391c0a5a21cSKumar Kartikeya Dwivedi 		 * since it waits for callback_fn to finish.
1392c0a5a21cSKumar Kartikeya Dwivedi 		 */
1393c0a5a21cSKumar Kartikeya Dwivedi 		ret = -EDEADLK;
1394c0a5a21cSKumar Kartikeya Dwivedi 		goto out;
1395c0a5a21cSKumar Kartikeya Dwivedi 	}
1396c0a5a21cSKumar Kartikeya Dwivedi 
1397c0a5a21cSKumar Kartikeya Dwivedi 	/* Only account in-flight cancellations when invoked from a timer
1398c0a5a21cSKumar Kartikeya Dwivedi 	 * callback, since we want to avoid waiting only if other _callbacks_
1399263ae152SJoanne Koong 	 * are waiting on us, to avoid introducing lockups. Non-callback paths
1400263ae152SJoanne Koong 	 * are ok, since nobody would synchronously wait for their completion.
1401263ae152SJoanne Koong 	 */
1402263ae152SJoanne Koong 	if (!cur_t)
1403263ae152SJoanne Koong 		goto drop;
140413bbbfbeSJoanne Koong 	atomic_inc(&t->cancelling);
140513bbbfbeSJoanne Koong 	/* Need full barrier after relaxed atomic_inc */
140613bbbfbeSJoanne Koong 	smp_mb__after_atomic();
140727060531SKumar Kartikeya Dwivedi 	inc = true;
140813bbbfbeSJoanne Koong 	if (atomic_read(&cur_t->cancelling)) {
140913bbbfbeSJoanne Koong 		/* We're cancelling timer t, while some other timer callback is
141013bbbfbeSJoanne Koong 		 * attempting to cancel us. In such a case, it might be possible
1411263ae152SJoanne Koong 		 * that timer t belongs to the other callback, or some other
1412263ae152SJoanne Koong 		 * callback waiting upon it (creating transitive dependencies
1413263ae152SJoanne Koong 		 * upon us), and we will enter a deadlock if we continue
1414263ae152SJoanne Koong 		 * cancelling and waiting for it synchronously, since it might
1415263ae152SJoanne Koong 		 * do the same. Bail!
1416263ae152SJoanne Koong 		 */
141727060531SKumar Kartikeya Dwivedi 		ret = -EDEADLK;
141813bbbfbeSJoanne Koong 		goto out;
141913bbbfbeSJoanne Koong 	}
142013bbbfbeSJoanne Koong drop:
142113bbbfbeSJoanne Koong 	drop_prog_refcnt(&t->cb);
1422bc34dee6SJoanne Koong out:
1423263ae152SJoanne Koong 	__bpf_spin_unlock_irqrestore(&timer->lock);
1424263ae152SJoanne Koong 	/* Cancel the timer and wait for associated callback to finish
1425263ae152SJoanne Koong 	 * if it was running.
1426263ae152SJoanne Koong 	 */
1427bc34dee6SJoanne Koong 	ret = ret ?: hrtimer_cancel(&t->timer);
1428263ae152SJoanne Koong 	if (inc)
1429263ae152SJoanne Koong 		atomic_dec(&t->cancelling);
1430263ae152SJoanne Koong 	rcu_read_unlock();
1431263ae152SJoanne Koong 	return ret;
1432263ae152SJoanne Koong }
1433263ae152SJoanne Koong 
1434263ae152SJoanne Koong static const struct bpf_func_proto bpf_timer_cancel_proto = {
1435263ae152SJoanne Koong 	.func		= bpf_timer_cancel,
1436bc34dee6SJoanne Koong 	.gpl_only	= true,
1437263ae152SJoanne Koong 	.ret_type	= RET_INTEGER,
1438263ae152SJoanne Koong 	.arg1_type	= ARG_PTR_TO_TIMER,
1439263ae152SJoanne Koong };
1440263ae152SJoanne Koong 
144127060531SKumar Kartikeya Dwivedi /* This function is called by map_delete/update_elem for individual element and
144213bbbfbeSJoanne Koong  * by ops->map_release_uref when the user space reference to a map reaches zero.
144313bbbfbeSJoanne Koong  */
bpf_timer_cancel_and_free(void * val)144413bbbfbeSJoanne Koong void bpf_timer_cancel_and_free(void *val)
144513bbbfbeSJoanne Koong {
144613bbbfbeSJoanne Koong 	struct bpf_async_kern *timer = val;
144713bbbfbeSJoanne Koong 	struct bpf_hrtimer *t;
144813bbbfbeSJoanne Koong 
144913bbbfbeSJoanne Koong 	/* Performance optimization: read timer->timer without lock first. */
145013bbbfbeSJoanne Koong 	if (!READ_ONCE(timer->timer))
1451263ae152SJoanne Koong 		return;
1452263ae152SJoanne Koong 
1453263ae152SJoanne Koong 	__bpf_spin_lock_irqsave(&timer->lock);
1454263ae152SJoanne Koong 	/* re-read it under lock */
145500f14641SRoberto Sassu 	t = timer->timer;
145600f14641SRoberto Sassu 	if (!t)
1457263ae152SJoanne Koong 		goto out;
1458263ae152SJoanne Koong 	drop_prog_refcnt(&t->cb);
1459263ae152SJoanne Koong 	/* The subsequent bpf_timer_start/cancel() helpers won't be able to use
1460263ae152SJoanne Koong 	 * this timer, since it won't be initialized.
1461263ae152SJoanne Koong 	 */
1462263ae152SJoanne Koong 	WRITE_ONCE(timer->timer, NULL);
1463263ae152SJoanne Koong out:
1464263ae152SJoanne Koong 	__bpf_spin_unlock_irqrestore(&timer->lock);
1465263ae152SJoanne Koong 	if (!t)
1466263ae152SJoanne Koong 		return;
1467263ae152SJoanne Koong 	/* Cancel the timer and wait for callback to complete if it was running.
1468263ae152SJoanne Koong 	 * If hrtimer_cancel() can be safely called it's safe to call kfree(t)
1469263ae152SJoanne Koong 	 * right after for both preallocated and non-preallocated maps.
1470263ae152SJoanne Koong 	 * The timer->timer = NULL was already done and no code path can
1471263ae152SJoanne Koong 	 * see address 't' anymore.
1472263ae152SJoanne Koong 	 *
1473263ae152SJoanne Koong 	 * Check that bpf_map_delete/update_elem() wasn't called from timer
1474263ae152SJoanne Koong 	 * callback_fn. In such case don't call hrtimer_cancel() (since it will
1475263ae152SJoanne Koong 	 * deadlock) and don't call hrtimer_try_to_cancel() (since it will just
1476dc368e1cSJoanne Koong 	 * return -1). Though callback_fn is still running on this cpu it's
1477263ae152SJoanne Koong 	 * safe to do kfree(t) because bpf_timer_cb() read everything it needed
1478263ae152SJoanne Koong 	 * from 't'. The bpf subprog callback_fn won't be able to access 't',
1479263ae152SJoanne Koong 	 * since timer->timer = NULL was already done. The timer will be
1480263ae152SJoanne Koong 	 * effectively cancelled because bpf_timer_cb() will return
1481263ae152SJoanne Koong 	 * HRTIMER_NORESTART.
1482263ae152SJoanne Koong 	 */
1483263ae152SJoanne Koong 	if (this_cpu_read(hrtimer_running) != t)
1484263ae152SJoanne Koong 		hrtimer_cancel(&t->timer);
1485263ae152SJoanne Koong 	kfree_rcu(t, cb.rcu);
148627060531SKumar Kartikeya Dwivedi }
1487f8d3da4eSJoanne Koong 
BPF_CALL_2(bpf_kptr_xchg,void *,map_value,void *,ptr)148813bbbfbeSJoanne Koong BPF_CALL_2(bpf_kptr_xchg, void *, map_value, void *, ptr)
148913bbbfbeSJoanne Koong {
149013bbbfbeSJoanne Koong 	unsigned long *kptr = map_value;
1491f8d3da4eSJoanne Koong 
149213bbbfbeSJoanne Koong 	return xchg(kptr, (unsigned long)ptr);
149313bbbfbeSJoanne Koong }
149413bbbfbeSJoanne Koong 
149513bbbfbeSJoanne Koong /* Unlike other PTR_TO_BTF_ID helpers the btf_id in bpf_kptr_xchg()
149613bbbfbeSJoanne Koong  * helper is determined dynamically by the verifier. Use BPF_PTR_POISON to
149713bbbfbeSJoanne Koong  * denote type that verifier will determine.
1498*76d16077SKumar Kartikeya Dwivedi  */
1499*76d16077SKumar Kartikeya Dwivedi static const struct bpf_func_proto bpf_kptr_xchg_proto = {
1500*76d16077SKumar Kartikeya Dwivedi 	.func         = bpf_kptr_xchg,
1501*76d16077SKumar Kartikeya Dwivedi 	.gpl_only     = false,
1502*76d16077SKumar Kartikeya Dwivedi 	.ret_type     = RET_PTR_TO_BTF_ID_OR_NULL,
150313bbbfbeSJoanne Koong 	.ret_btf_id   = BPF_PTR_POISON,
150413bbbfbeSJoanne Koong 	.arg1_type    = ARG_PTR_TO_KPTR,
150513bbbfbeSJoanne Koong 	.arg2_type    = ARG_PTR_TO_BTF_ID_OR_NULL | OBJ_RELEASE,
150613bbbfbeSJoanne Koong 	.arg2_btf_id  = BPF_PTR_POISON,
1507dc368e1cSJoanne Koong };
150813bbbfbeSJoanne Koong 
150913bbbfbeSJoanne Koong /* Since the upper 8 bits of dynptr->size is reserved, the
151013bbbfbeSJoanne Koong  * maximum supported size is 2^24 - 1.
151113bbbfbeSJoanne Koong  */
151213bbbfbeSJoanne Koong #define DYNPTR_MAX_SIZE	((1UL << 24) - 1)
151327060531SKumar Kartikeya Dwivedi #define DYNPTR_TYPE_SHIFT	28
151413bbbfbeSJoanne Koong #define DYNPTR_SIZE_MASK	0xFFFFFF
1515f8d3da4eSJoanne Koong #define DYNPTR_RDONLY_BIT	BIT(31)
151613bbbfbeSJoanne Koong 
__bpf_dynptr_is_rdonly(const struct bpf_dynptr_kern * ptr)151713bbbfbeSJoanne Koong static bool __bpf_dynptr_is_rdonly(const struct bpf_dynptr_kern *ptr)
151827060531SKumar Kartikeya Dwivedi {
1519f8d3da4eSJoanne Koong 	return ptr->size & DYNPTR_RDONLY_BIT;
152013bbbfbeSJoanne Koong }
152113bbbfbeSJoanne Koong 
bpf_dynptr_set_rdonly(struct bpf_dynptr_kern * ptr)152213bbbfbeSJoanne Koong void bpf_dynptr_set_rdonly(struct bpf_dynptr_kern *ptr)
1523f8d3da4eSJoanne Koong {
152413bbbfbeSJoanne Koong 	ptr->size |= DYNPTR_RDONLY_BIT;
152513bbbfbeSJoanne Koong }
152613bbbfbeSJoanne Koong 
bpf_dynptr_set_type(struct bpf_dynptr_kern * ptr,enum bpf_dynptr_type type)152713bbbfbeSJoanne Koong static void bpf_dynptr_set_type(struct bpf_dynptr_kern *ptr, enum bpf_dynptr_type type)
152813bbbfbeSJoanne Koong {
152913bbbfbeSJoanne Koong 	ptr->size |= type << DYNPTR_TYPE_SHIFT;
1530*76d16077SKumar Kartikeya Dwivedi }
1531*76d16077SKumar Kartikeya Dwivedi 
bpf_dynptr_get_type(const struct bpf_dynptr_kern * ptr)1532*76d16077SKumar Kartikeya Dwivedi static enum bpf_dynptr_type bpf_dynptr_get_type(const struct bpf_dynptr_kern *ptr)
1533*76d16077SKumar Kartikeya Dwivedi {
1534*76d16077SKumar Kartikeya Dwivedi 	return (ptr->size & ~(DYNPTR_RDONLY_BIT)) >> DYNPTR_TYPE_SHIFT;
153513bbbfbeSJoanne Koong }
153613bbbfbeSJoanne Koong 
__bpf_dynptr_size(const struct bpf_dynptr_kern * ptr)153713bbbfbeSJoanne Koong u32 __bpf_dynptr_size(const struct bpf_dynptr_kern *ptr)
153813bbbfbeSJoanne Koong {
1539dc368e1cSJoanne Koong 	return ptr->size & DYNPTR_SIZE_MASK;
154013bbbfbeSJoanne Koong }
154113bbbfbeSJoanne Koong 
bpf_dynptr_set_size(struct bpf_dynptr_kern * ptr,u32 new_size)154213bbbfbeSJoanne Koong static void bpf_dynptr_set_size(struct bpf_dynptr_kern *ptr, u32 new_size)
154327060531SKumar Kartikeya Dwivedi {
154413bbbfbeSJoanne Koong 	u32 metadata = ptr->size & ~DYNPTR_SIZE_MASK;
154513bbbfbeSJoanne Koong 
154613bbbfbeSJoanne Koong 	ptr->size = new_size | metadata;
1547f8d3da4eSJoanne Koong }
154813bbbfbeSJoanne Koong 
bpf_dynptr_check_size(u32 size)154913bbbfbeSJoanne Koong int bpf_dynptr_check_size(u32 size)
155027060531SKumar Kartikeya Dwivedi {
155134d4ef57SJoanne Koong 	return size > DYNPTR_MAX_SIZE ? -E2BIG : 0;
155234d4ef57SJoanne Koong }
155334d4ef57SJoanne Koong 
bpf_dynptr_init(struct bpf_dynptr_kern * ptr,void * data,enum bpf_dynptr_type type,u32 offset,u32 size)155434d4ef57SJoanne Koong void bpf_dynptr_init(struct bpf_dynptr_kern *ptr, void *data,
155534d4ef57SJoanne Koong 		     enum bpf_dynptr_type type, u32 offset, u32 size)
155634d4ef57SJoanne Koong {
155734d4ef57SJoanne Koong 	ptr->data = data;
155834d4ef57SJoanne Koong 	ptr->offset = offset;
155934d4ef57SJoanne Koong 	ptr->size = size;
156034d4ef57SJoanne Koong 	bpf_dynptr_set_type(ptr, type);
156134d4ef57SJoanne Koong }
156234d4ef57SJoanne Koong 
bpf_dynptr_set_null(struct bpf_dynptr_kern * ptr)156334d4ef57SJoanne Koong void bpf_dynptr_set_null(struct bpf_dynptr_kern *ptr)
156434d4ef57SJoanne Koong {
156534d4ef57SJoanne Koong 	memset(ptr, 0, sizeof(*ptr));
156634d4ef57SJoanne Koong }
1567dc368e1cSJoanne Koong 
bpf_dynptr_check_off_len(const struct bpf_dynptr_kern * ptr,u32 offset,u32 len)156834d4ef57SJoanne Koong static int bpf_dynptr_check_off_len(const struct bpf_dynptr_kern *ptr, u32 offset, u32 len)
156934d4ef57SJoanne Koong {
157034d4ef57SJoanne Koong 	u32 size = __bpf_dynptr_size(ptr);
157127060531SKumar Kartikeya Dwivedi 
157234d4ef57SJoanne Koong 	if (len > size || offset > size - len)
157334d4ef57SJoanne Koong 		return -E2BIG;
157434d4ef57SJoanne Koong 
157534d4ef57SJoanne Koong 	return 0;
1576f470378cSJohn Fastabend }
1577a396eda5SDaniel Xu 
BPF_CALL_4(bpf_dynptr_from_mem,void *,data,u32,size,u64,flags,struct bpf_dynptr_kern *,ptr)1578f470378cSJohn Fastabend BPF_CALL_4(bpf_dynptr_from_mem, void *, data, u32, size, u64, flags, struct bpf_dynptr_kern *, ptr)
1579f470378cSJohn Fastabend {
1580f470378cSJohn Fastabend 	int err;
1581f470378cSJohn Fastabend 
1582dd6e10fbSDaniel Xu 	BTF_TYPE_EMIT(struct bpf_dynptr);
1583f470378cSJohn Fastabend 
15846890896bSStanislav Fomichev 	err = bpf_dynptr_check_size(size);
15856890896bSStanislav Fomichev 	if (err)
15866890896bSStanislav Fomichev 		goto error;
15876890896bSStanislav Fomichev 
15886890896bSStanislav Fomichev 	/* flags is currently unsupported */
15896890896bSStanislav Fomichev 	if (flags) {
15906890896bSStanislav Fomichev 		err = -EINVAL;
15916890896bSStanislav Fomichev 		goto error;
15926890896bSStanislav Fomichev 	}
15936890896bSStanislav Fomichev 
15946890896bSStanislav Fomichev 	bpf_dynptr_init(ptr, data, BPF_DYNPTR_TYPE_LOCAL, 0, size);
15956890896bSStanislav Fomichev 
15966890896bSStanislav Fomichev 	return 0;
15976890896bSStanislav Fomichev 
15986890896bSStanislav Fomichev error:
15996890896bSStanislav Fomichev 	bpf_dynptr_set_null(ptr);
160007343110SFeng Zhou 	return err;
160107343110SFeng Zhou }
16026890896bSStanislav Fomichev 
16036890896bSStanislav Fomichev static const struct bpf_func_proto bpf_dynptr_from_mem_proto = {
16046890896bSStanislav Fomichev 	.func		= bpf_dynptr_from_mem,
16056890896bSStanislav Fomichev 	.gpl_only	= false,
16066890896bSStanislav Fomichev 	.ret_type	= RET_INTEGER,
16076890896bSStanislav Fomichev 	.arg1_type	= ARG_PTR_TO_UNINIT_MEM,
16086890896bSStanislav Fomichev 	.arg2_type	= ARG_CONST_SIZE_OR_ZERO,
16096890896bSStanislav Fomichev 	.arg3_type	= ARG_ANYTHING,
16106890896bSStanislav Fomichev 	.arg4_type	= ARG_PTR_TO_DYNPTR | DYNPTR_TYPE_LOCAL | MEM_UNINIT | MEM_WRITE,
16116890896bSStanislav Fomichev };
161271d19214SMaciej Żenczykowski 
BPF_CALL_5(bpf_dynptr_read,void *,dst,u32,len,const struct bpf_dynptr_kern *,src,u32,offset,u64,flags)161371d19214SMaciej Żenczykowski BPF_CALL_5(bpf_dynptr_read, void *, dst, u32, len, const struct bpf_dynptr_kern *, src,
1614c8996c98SJesper Dangaard Brouer 	   u32, offset, u64, flags)
1615c8996c98SJesper Dangaard Brouer {
1616457f4436SAndrii Nakryiko 	enum bpf_dynptr_type type;
1617457f4436SAndrii Nakryiko 	int err;
1618457f4436SAndrii Nakryiko 
1619457f4436SAndrii Nakryiko 	if (!src->data || flags)
1620457f4436SAndrii Nakryiko 		return -EINVAL;
1621457f4436SAndrii Nakryiko 
1622457f4436SAndrii Nakryiko 	err = bpf_dynptr_check_off_len(src, offset, len);
1623457f4436SAndrii Nakryiko 	if (err)
1624457f4436SAndrii Nakryiko 		return err;
1625457f4436SAndrii Nakryiko 
1626c5fb1993SHou Tao 	type = bpf_dynptr_get_type(src);
1627c5fb1993SHou Tao 
16288a67f2deSStanislav Fomichev 	switch (type) {
16298a67f2deSStanislav Fomichev 	case BPF_DYNPTR_TYPE_LOCAL:
16308a67f2deSStanislav Fomichev 	case BPF_DYNPTR_TYPE_RINGBUF:
16318a67f2deSStanislav Fomichev 		/* Source and destination may possibly overlap, hence use memmove to
16326890896bSStanislav Fomichev 		 * copy the data. E.g. bpf_dynptr_from_mem may create two dynptr
16336890896bSStanislav Fomichev 		 * pointing to overlapping PTR_TO_MAP_VALUE regions.
16346890896bSStanislav Fomichev 		 */
16356890896bSStanislav Fomichev 		memmove(dst, src->data + src->offset + offset, len);
16362c78ee89SAlexei Starovoitov 		return 0;
16376890896bSStanislav Fomichev 	case BPF_DYNPTR_TYPE_SKB:
16386890896bSStanislav Fomichev 		return __bpf_skb_load_bytes(src->data, src->offset + offset, dst, len);
16396890896bSStanislav Fomichev 	case BPF_DYNPTR_TYPE_XDP:
16406890896bSStanislav Fomichev 		return __bpf_xdp_load_bytes(src->data, src->offset + offset, dst, len);
16416890896bSStanislav Fomichev 	default:
16426890896bSStanislav Fomichev 		WARN_ONCE(true, "bpf_dynptr_read: unknown dynptr type %d\n", type);
16436890896bSStanislav Fomichev 		return -EFAULT;
16446890896bSStanislav Fomichev 	}
16456890896bSStanislav Fomichev }
1646b7906b70SAndrii Nakryiko 
1647eaa6bcb7SHao Luo static const struct bpf_func_proto bpf_dynptr_read_proto = {
1648b7906b70SAndrii Nakryiko 	.func		= bpf_dynptr_read,
164963d9b80dSHao Luo 	.gpl_only	= false,
1650b00628b1SAlexei Starovoitov 	.ret_type	= RET_INTEGER,
1651b00628b1SAlexei Starovoitov 	.arg1_type	= ARG_PTR_TO_UNINIT_MEM,
1652b00628b1SAlexei Starovoitov 	.arg2_type	= ARG_CONST_SIZE_OR_ZERO,
1653b00628b1SAlexei Starovoitov 	.arg3_type	= ARG_PTR_TO_DYNPTR | MEM_RDONLY,
1654b00628b1SAlexei Starovoitov 	.arg4_type	= ARG_ANYTHING,
1655b00628b1SAlexei Starovoitov 	.arg5_type	= ARG_ANYTHING,
1656b00628b1SAlexei Starovoitov };
1657b00628b1SAlexei Starovoitov 
BPF_CALL_5(bpf_dynptr_write,const struct bpf_dynptr_kern *,dst,u32,offset,void *,src,u32,len,u64,flags)1658c0a5a21cSKumar Kartikeya Dwivedi BPF_CALL_5(bpf_dynptr_write, const struct bpf_dynptr_kern *, dst, u32, offset, void *, src,
1659c0a5a21cSKumar Kartikeya Dwivedi 	   u32, len, u64, flags)
16605679ff2fSKumar Kartikeya Dwivedi {
16615679ff2fSKumar Kartikeya Dwivedi 	enum bpf_dynptr_type type;
16625679ff2fSKumar Kartikeya Dwivedi 	int err;
16635679ff2fSKumar Kartikeya Dwivedi 
166420571567SDavid Vernet 	if (!dst->data || __bpf_dynptr_is_rdonly(dst))
166520571567SDavid Vernet 		return -EINVAL;
16668addbfc7SKumar Kartikeya Dwivedi 
16678addbfc7SKumar Kartikeya Dwivedi 	err = bpf_dynptr_check_off_len(dst, offset, len);
16688addbfc7SKumar Kartikeya Dwivedi 	if (err)
16698addbfc7SKumar Kartikeya Dwivedi 		return err;
16708addbfc7SKumar Kartikeya Dwivedi 
16718addbfc7SKumar Kartikeya Dwivedi 	type = bpf_dynptr_get_type(dst);
16728addbfc7SKumar Kartikeya Dwivedi 
16738addbfc7SKumar Kartikeya Dwivedi 	switch (type) {
16748addbfc7SKumar Kartikeya Dwivedi 	case BPF_DYNPTR_TYPE_LOCAL:
16758addbfc7SKumar Kartikeya Dwivedi 	case BPF_DYNPTR_TYPE_RINGBUF:
16768addbfc7SKumar Kartikeya Dwivedi 		if (flags)
16778addbfc7SKumar Kartikeya Dwivedi 			return -EINVAL;
16788addbfc7SKumar Kartikeya Dwivedi 		/* Source and destination may possibly overlap, hence use memmove to
16798addbfc7SKumar Kartikeya Dwivedi 		 * copy the data. E.g. bpf_dynptr_from_mem may create two dynptr
1680c4bcfb38SYonghong Song 		 * pointing to overlapping PTR_TO_MAP_VALUE regions.
1681c4bcfb38SYonghong Song 		 */
1682c4bcfb38SYonghong Song 		memmove(dst->data + dst->offset + offset, src, len);
1683c4bcfb38SYonghong Song 		return 0;
1684c4bcfb38SYonghong Song 	case BPF_DYNPTR_TYPE_SKB:
1685c4bcfb38SYonghong Song 		return __bpf_skb_store_bytes(dst->data, dst->offset + offset, src, len,
16866890896bSStanislav Fomichev 					     flags);
1687f470378cSJohn Fastabend 	case BPF_DYNPTR_TYPE_XDP:
1688f470378cSJohn Fastabend 		if (flags)
1689f470378cSJohn Fastabend 			return -EINVAL;
1690f470378cSJohn Fastabend 		return __bpf_xdp_store_bytes(dst->data, dst->offset + offset, src, len);
1691f470378cSJohn Fastabend 	default:
1692f470378cSJohn Fastabend 		WARN_ONCE(true, "bpf_dynptr_write: unknown dynptr type %d\n", type);
1693f470378cSJohn Fastabend 		return -EFAULT;
169461ca36c8STobias Klauser 	}
169561ca36c8STobias Klauser }
1696f470378cSJohn Fastabend 
1697f470378cSJohn Fastabend static const struct bpf_func_proto bpf_dynptr_write_proto = {
1698a396eda5SDaniel Xu 	.func		= bpf_dynptr_write,
1699a396eda5SDaniel Xu 	.gpl_only	= false,
1700f470378cSJohn Fastabend 	.ret_type	= RET_INTEGER,
1701f470378cSJohn Fastabend 	.arg1_type	= ARG_PTR_TO_DYNPTR | MEM_RDONLY,
1702f470378cSJohn Fastabend 	.arg2_type	= ARG_ANYTHING,
170371330842SDaniel Borkmann 	.arg3_type	= ARG_PTR_TO_MEM | MEM_RDONLY,
1704ff40e510SDaniel Borkmann 	.arg4_type	= ARG_CONST_SIZE_OR_ZERO,
1705f470378cSJohn Fastabend 	.arg5_type	= ARG_ANYTHING,
1706f470378cSJohn Fastabend };
1707f470378cSJohn Fastabend 
BPF_CALL_3(bpf_dynptr_data,const struct bpf_dynptr_kern *,ptr,u32,offset,u32,len)170871330842SDaniel Borkmann BPF_CALL_3(bpf_dynptr_data, const struct bpf_dynptr_kern *, ptr, u32, offset, u32, len)
1709ff40e510SDaniel Borkmann {
171061ca36c8STobias Klauser 	enum bpf_dynptr_type type;
171161ca36c8STobias Klauser 	int err;
17127b15523aSFlorent Revest 
17137b15523aSFlorent Revest 	if (!ptr->data)
1714dd6e10fbSDaniel Xu 		return 0;
1715dd6e10fbSDaniel Xu 
171610aceb62SDave Marchevsky 	err = bpf_dynptr_check_off_len(ptr, offset, len);
171710aceb62SDave Marchevsky 	if (err)
1718f470378cSJohn Fastabend 		return 0;
17196890896bSStanislav Fomichev 
17206890896bSStanislav Fomichev 	if (__bpf_dynptr_is_rdonly(ptr))
17216890896bSStanislav Fomichev 		return 0;
172213379059SArtem Savkov 
1723f0c5941fSKumar Kartikeya Dwivedi 	type = bpf_dynptr_get_type(ptr);
1724f0c5941fSKumar Kartikeya Dwivedi 
1725f0c5941fSKumar Kartikeya Dwivedi 	switch (type) {
1726f0c5941fSKumar Kartikeya Dwivedi 	case BPF_DYNPTR_TYPE_LOCAL:
1727f0c5941fSKumar Kartikeya Dwivedi 	case BPF_DYNPTR_TYPE_RINGBUF:
1728f0c5941fSKumar Kartikeya Dwivedi 		return (unsigned long)(ptr->data + ptr->offset + offset);
1729f0c5941fSKumar Kartikeya Dwivedi 	case BPF_DYNPTR_TYPE_SKB:
1730f0c5941fSKumar Kartikeya Dwivedi 	case BPF_DYNPTR_TYPE_XDP:
1731f0c5941fSKumar Kartikeya Dwivedi 		/* skb and xdp dynptrs should use bpf_dynptr_slice / bpf_dynptr_slice_rdwr */
1732f0c5941fSKumar Kartikeya Dwivedi 		return 0;
1733f0c5941fSKumar Kartikeya Dwivedi 	default:
1734f0c5941fSKumar Kartikeya Dwivedi 		WARN_ONCE(true, "bpf_dynptr_data: unknown dynptr type %d\n", type);
1735f0c5941fSKumar Kartikeya Dwivedi 		return 0;
1736f0c5941fSKumar Kartikeya Dwivedi 	}
1737f0c5941fSKumar Kartikeya Dwivedi }
1738f0c5941fSKumar Kartikeya Dwivedi 
1739f0c5941fSKumar Kartikeya Dwivedi static const struct bpf_func_proto bpf_dynptr_data_proto = {
1740f0c5941fSKumar Kartikeya Dwivedi 	.func		= bpf_dynptr_data,
1741f0c5941fSKumar Kartikeya Dwivedi 	.gpl_only	= false,
1742f0c5941fSKumar Kartikeya Dwivedi 	.ret_type	= RET_PTR_TO_DYNPTR_MEM_OR_NULL,
1743f0c5941fSKumar Kartikeya Dwivedi 	.arg1_type	= ARG_PTR_TO_DYNPTR | MEM_RDONLY,
1744f0c5941fSKumar Kartikeya Dwivedi 	.arg2_type	= ARG_ANYTHING,
1745f0c5941fSKumar Kartikeya Dwivedi 	.arg3_type	= ARG_CONST_ALLOC_SIZE_OR_ZERO,
1746f0c5941fSKumar Kartikeya Dwivedi };
1747f0c5941fSKumar Kartikeya Dwivedi 
1748f0c5941fSKumar Kartikeya Dwivedi const struct bpf_func_proto bpf_get_current_task_proto __weak;
1749f0c5941fSKumar Kartikeya Dwivedi const struct bpf_func_proto bpf_get_current_task_btf_proto __weak;
1750958cf2e2SKumar Kartikeya Dwivedi const struct bpf_func_proto bpf_probe_read_user_proto __weak;
1751958cf2e2SKumar Kartikeya Dwivedi const struct bpf_func_proto bpf_probe_read_user_str_proto __weak;
1752958cf2e2SKumar Kartikeya Dwivedi const struct bpf_func_proto bpf_probe_read_kernel_proto __weak;
1753958cf2e2SKumar Kartikeya Dwivedi const struct bpf_func_proto bpf_probe_read_kernel_str_proto __weak;
1754958cf2e2SKumar Kartikeya Dwivedi const struct bpf_func_proto bpf_task_pt_regs_proto __weak;
1755958cf2e2SKumar Kartikeya Dwivedi 
1756958cf2e2SKumar Kartikeya Dwivedi const struct bpf_func_proto *
bpf_base_func_proto(enum bpf_func_id func_id)1757958cf2e2SKumar Kartikeya Dwivedi bpf_base_func_proto(enum bpf_func_id func_id)
1758958cf2e2SKumar Kartikeya Dwivedi {
1759958cf2e2SKumar Kartikeya Dwivedi 	switch (func_id) {
1760958cf2e2SKumar Kartikeya Dwivedi 	case BPF_FUNC_map_lookup_elem:
1761f0c5941fSKumar Kartikeya Dwivedi 		return &bpf_map_lookup_elem_proto;
1762f0c5941fSKumar Kartikeya Dwivedi 	case BPF_FUNC_map_update_elem:
1763f0c5941fSKumar Kartikeya Dwivedi 		return &bpf_map_update_elem_proto;
1764958cf2e2SKumar Kartikeya Dwivedi 	case BPF_FUNC_map_delete_elem:
1765958cf2e2SKumar Kartikeya Dwivedi 		return &bpf_map_delete_elem_proto;
1766958cf2e2SKumar Kartikeya Dwivedi 	case BPF_FUNC_map_push_elem:
1767958cf2e2SKumar Kartikeya Dwivedi 		return &bpf_map_push_elem_proto;
1768958cf2e2SKumar Kartikeya Dwivedi 	case BPF_FUNC_map_pop_elem:
1769958cf2e2SKumar Kartikeya Dwivedi 		return &bpf_map_pop_elem_proto;
1770958cf2e2SKumar Kartikeya Dwivedi 	case BPF_FUNC_map_peek_elem:
1771958cf2e2SKumar Kartikeya Dwivedi 		return &bpf_map_peek_elem_proto;
1772958cf2e2SKumar Kartikeya Dwivedi 	case BPF_FUNC_map_lookup_percpu_elem:
1773958cf2e2SKumar Kartikeya Dwivedi 		return &bpf_map_lookup_percpu_elem_proto;
1774958cf2e2SKumar Kartikeya Dwivedi 	case BPF_FUNC_get_prandom_u32:
1775958cf2e2SKumar Kartikeya Dwivedi 		return &bpf_get_prandom_u32_proto;
1776958cf2e2SKumar Kartikeya Dwivedi 	case BPF_FUNC_get_smp_processor_id:
1777958cf2e2SKumar Kartikeya Dwivedi 		return &bpf_get_raw_smp_processor_id_proto;
1778958cf2e2SKumar Kartikeya Dwivedi 	case BPF_FUNC_get_numa_node_id:
1779958cf2e2SKumar Kartikeya Dwivedi 		return &bpf_get_numa_node_id_proto;
1780958cf2e2SKumar Kartikeya Dwivedi 	case BPF_FUNC_tail_call:
1781958cf2e2SKumar Kartikeya Dwivedi 		return &bpf_tail_call_proto;
1782ac9f0605SKumar Kartikeya Dwivedi 	case BPF_FUNC_ktime_get_ns:
1783ac9f0605SKumar Kartikeya Dwivedi 		return &bpf_ktime_get_ns_proto;
1784ac9f0605SKumar Kartikeya Dwivedi 	case BPF_FUNC_ktime_get_boot_ns:
1785ac9f0605SKumar Kartikeya Dwivedi 		return &bpf_ktime_get_boot_ns_proto;
1786ac9f0605SKumar Kartikeya Dwivedi 	case BPF_FUNC_ktime_get_tai_ns:
1787ac9f0605SKumar Kartikeya Dwivedi 		return &bpf_ktime_get_tai_ns_proto;
1788ac9f0605SKumar Kartikeya Dwivedi 	case BPF_FUNC_ringbuf_output:
1789ac9f0605SKumar Kartikeya Dwivedi 		return &bpf_ringbuf_output_proto;
1790ac9f0605SKumar Kartikeya Dwivedi 	case BPF_FUNC_ringbuf_reserve:
1791ac9f0605SKumar Kartikeya Dwivedi 		return &bpf_ringbuf_reserve_proto;
17928cab76ecSKumar Kartikeya Dwivedi 	case BPF_FUNC_ringbuf_submit:
17938cab76ecSKumar Kartikeya Dwivedi 		return &bpf_ringbuf_submit_proto;
17948cab76ecSKumar Kartikeya Dwivedi 	case BPF_FUNC_ringbuf_discard:
17958cab76ecSKumar Kartikeya Dwivedi 		return &bpf_ringbuf_discard_proto;
17968cab76ecSKumar Kartikeya Dwivedi 	case BPF_FUNC_ringbuf_query:
17978cab76ecSKumar Kartikeya Dwivedi 		return &bpf_ringbuf_query_proto;
17988cab76ecSKumar Kartikeya Dwivedi 	case BPF_FUNC_strncmp:
17998cab76ecSKumar Kartikeya Dwivedi 		return &bpf_strncmp_proto;
18008cab76ecSKumar Kartikeya Dwivedi 	case BPF_FUNC_strtol:
18018cab76ecSKumar Kartikeya Dwivedi 		return &bpf_strtol_proto;
18028cab76ecSKumar Kartikeya Dwivedi 	case BPF_FUNC_strtoul:
18038cab76ecSKumar Kartikeya Dwivedi 		return &bpf_strtoul_proto;
18048cab76ecSKumar Kartikeya Dwivedi 	default:
18058cab76ecSKumar Kartikeya Dwivedi 		break;
18068cab76ecSKumar Kartikeya Dwivedi 	}
18078cab76ecSKumar Kartikeya Dwivedi 
18088cab76ecSKumar Kartikeya Dwivedi 	if (!bpf_capable())
18098cab76ecSKumar Kartikeya Dwivedi 		return NULL;
18108cab76ecSKumar Kartikeya Dwivedi 
18118cab76ecSKumar Kartikeya Dwivedi 	switch (func_id) {
18128cab76ecSKumar Kartikeya Dwivedi 	case BPF_FUNC_spin_lock:
18138cab76ecSKumar Kartikeya Dwivedi 		return &bpf_spin_lock_proto;
18148cab76ecSKumar Kartikeya Dwivedi 	case BPF_FUNC_spin_unlock:
18158cab76ecSKumar Kartikeya Dwivedi 		return &bpf_spin_unlock_proto;
18168cab76ecSKumar Kartikeya Dwivedi 	case BPF_FUNC_jiffies64:
18178cab76ecSKumar Kartikeya Dwivedi 		return &bpf_jiffies64_proto;
18188cab76ecSKumar Kartikeya Dwivedi 	case BPF_FUNC_per_cpu_ptr:
18198cab76ecSKumar Kartikeya Dwivedi 		return &bpf_per_cpu_ptr_proto;
18208cab76ecSKumar Kartikeya Dwivedi 	case BPF_FUNC_this_cpu_ptr:
18218cab76ecSKumar Kartikeya Dwivedi 		return &bpf_this_cpu_ptr_proto;
18228cab76ecSKumar Kartikeya Dwivedi 	case BPF_FUNC_timer_init:
18238cab76ecSKumar Kartikeya Dwivedi 		return &bpf_timer_init_proto;
18248cab76ecSKumar Kartikeya Dwivedi 	case BPF_FUNC_timer_set_callback:
18258cab76ecSKumar Kartikeya Dwivedi 		return &bpf_timer_set_callback_proto;
18268cab76ecSKumar Kartikeya Dwivedi 	case BPF_FUNC_timer_start:
18278cab76ecSKumar Kartikeya Dwivedi 		return &bpf_timer_start_proto;
18288cab76ecSKumar Kartikeya Dwivedi 	case BPF_FUNC_timer_cancel:
18298cab76ecSKumar Kartikeya Dwivedi 		return &bpf_timer_cancel_proto;
18308cab76ecSKumar Kartikeya Dwivedi 	case BPF_FUNC_kptr_xchg:
18318cab76ecSKumar Kartikeya Dwivedi 		return &bpf_kptr_xchg_proto;
18328cab76ecSKumar Kartikeya Dwivedi 	case BPF_FUNC_for_each_map_elem:
18338cab76ecSKumar Kartikeya Dwivedi 		return &bpf_for_each_map_elem_proto;
18348cab76ecSKumar Kartikeya Dwivedi 	case BPF_FUNC_loop:
18358cab76ecSKumar Kartikeya Dwivedi 		return &bpf_loop_proto;
183690660309SDavid Vernet 	case BPF_FUNC_user_ringbuf_drain:
183790660309SDavid Vernet 		return &bpf_user_ringbuf_drain_proto;
183890660309SDavid Vernet 	case BPF_FUNC_ringbuf_reserve_dynptr:
183990660309SDavid Vernet 		return &bpf_ringbuf_reserve_dynptr_proto;
184090660309SDavid Vernet 	case BPF_FUNC_ringbuf_submit_dynptr:
184190660309SDavid Vernet 		return &bpf_ringbuf_submit_dynptr_proto;
184290660309SDavid Vernet 	case BPF_FUNC_ringbuf_discard_dynptr:
184390660309SDavid Vernet 		return &bpf_ringbuf_discard_dynptr_proto;
1844156ed20dSDavid Vernet 	case BPF_FUNC_dynptr_from_mem:
184590660309SDavid Vernet 		return &bpf_dynptr_from_mem_proto;
184690660309SDavid Vernet 	case BPF_FUNC_dynptr_read:
184790660309SDavid Vernet 		return &bpf_dynptr_read_proto;
1848fca1aa75SYonghong Song 	case BPF_FUNC_dynptr_write:
1849fca1aa75SYonghong Song 		return &bpf_dynptr_write_proto;
1850fca1aa75SYonghong Song 	case BPF_FUNC_dynptr_data:
1851fca1aa75SYonghong Song 		return &bpf_dynptr_data_proto;
1852fca1aa75SYonghong Song #ifdef CONFIG_CGROUPS
1853fca1aa75SYonghong Song 	case BPF_FUNC_cgrp_storage_get:
1854fca1aa75SYonghong Song 		return &bpf_cgrp_storage_get_proto;
1855156ed20dSDavid Vernet 	case BPF_FUNC_cgrp_storage_delete:
1856156ed20dSDavid Vernet 		return &bpf_cgrp_storage_delete_proto;
1857156ed20dSDavid Vernet 	case BPF_FUNC_get_current_cgroup_id:
1858156ed20dSDavid Vernet 		return &bpf_get_current_cgroup_id_proto;
1859156ed20dSDavid Vernet 	case BPF_FUNC_get_current_ancestor_cgroup_id:
1860156ed20dSDavid Vernet 		return &bpf_get_current_ancestor_cgroup_id_proto;
1861156ed20dSDavid Vernet #endif
1862156ed20dSDavid Vernet 	default:
1863156ed20dSDavid Vernet 		break;
1864156ed20dSDavid Vernet 	}
1865156ed20dSDavid Vernet 
1866156ed20dSDavid Vernet 	if (!perfmon_capable())
1867156ed20dSDavid Vernet 		return NULL;
1868156ed20dSDavid Vernet 
1869156ed20dSDavid Vernet 	switch (func_id) {
1870156ed20dSDavid Vernet 	case BPF_FUNC_trace_printk:
1871156ed20dSDavid Vernet 		return bpf_get_trace_printk_proto();
1872156ed20dSDavid Vernet 	case BPF_FUNC_get_current_task:
1873156ed20dSDavid Vernet 		return &bpf_get_current_task_proto;
1874156ed20dSDavid Vernet 	case BPF_FUNC_get_current_task_btf:
1875156ed20dSDavid Vernet 		return &bpf_get_current_task_btf_proto;
1876156ed20dSDavid Vernet 	case BPF_FUNC_probe_read_user:
1877156ed20dSDavid Vernet 		return &bpf_probe_read_user_proto;
1878156ed20dSDavid Vernet 	case BPF_FUNC_probe_read_kernel:
1879156ed20dSDavid Vernet 		return security_locked_down(LOCKDOWN_BPF_READ_KERNEL) < 0 ?
1880156ed20dSDavid Vernet 		       NULL : &bpf_probe_read_kernel_proto;
1881156ed20dSDavid Vernet 	case BPF_FUNC_probe_read_user_str:
1882156ed20dSDavid Vernet 		return &bpf_probe_read_user_str_proto;
1883156ed20dSDavid Vernet 	case BPF_FUNC_probe_read_kernel_str:
1884156ed20dSDavid Vernet 		return security_locked_down(LOCKDOWN_BPF_READ_KERNEL) < 0 ?
1885156ed20dSDavid Vernet 		       NULL : &bpf_probe_read_kernel_str_proto;
1886156ed20dSDavid Vernet 	case BPF_FUNC_snprintf_btf:
1887156ed20dSDavid Vernet 		return &bpf_snprintf_btf_proto;
1888156ed20dSDavid Vernet 	case BPF_FUNC_snprintf:
1889156ed20dSDavid Vernet 		return &bpf_snprintf_proto;
1890156ed20dSDavid Vernet 	case BPF_FUNC_task_pt_regs:
1891156ed20dSDavid Vernet 		return &bpf_task_pt_regs_proto;
1892156ed20dSDavid Vernet 	case BPF_FUNC_trace_vprintk:
1893156ed20dSDavid Vernet 		return bpf_get_trace_vprintk_proto();
1894156ed20dSDavid Vernet 	default:
1895156ed20dSDavid Vernet 		return NULL;
1896fca1aa75SYonghong Song 	}
1897fca1aa75SYonghong Song }
1898fca1aa75SYonghong Song 
1899fca1aa75SYonghong Song void __bpf_obj_drop_impl(void *p, const struct btf_record *rec);
190090660309SDavid Vernet 
bpf_list_head_free(const struct btf_field * field,void * list_head,struct bpf_spin_lock * spin_lock)190190660309SDavid Vernet void bpf_list_head_free(const struct btf_field *field, void *list_head,
190290660309SDavid Vernet 			struct bpf_spin_lock *spin_lock)
190390660309SDavid Vernet {
190490660309SDavid Vernet 	struct list_head *head = list_head, *orig_head = list_head;
190590660309SDavid Vernet 
190690660309SDavid Vernet 	BUILD_BUG_ON(sizeof(struct list_head) > sizeof(struct bpf_list_head));
1907156ed20dSDavid Vernet 	BUILD_BUG_ON(__alignof__(struct list_head) > __alignof__(struct bpf_list_head));
1908156ed20dSDavid Vernet 
1909156ed20dSDavid Vernet 	/* Do the actual list draining outside the lock to not hold the lock for
191090660309SDavid Vernet 	 * too long, and also prevent deadlocks if tracing programs end up
1911156ed20dSDavid Vernet 	 * executing on entry/exit of functions called inside the critical
191290660309SDavid Vernet 	 * section, and end up doing map ops that call bpf_list_head_free for
191390660309SDavid Vernet 	 * the same map value again.
191490660309SDavid Vernet 	 */
191525c5e92dSDavid Vernet 	__bpf_spin_lock_irqsave(spin_lock);
191690660309SDavid Vernet 	if (!head->next || list_empty(head))
191790660309SDavid Vernet 		goto unlock;
191890660309SDavid Vernet 	head = head->next;
191990660309SDavid Vernet unlock:
192090660309SDavid Vernet 	INIT_LIST_HEAD(orig_head);
192190660309SDavid Vernet 	__bpf_spin_unlock_irqrestore(spin_lock);
192290660309SDavid Vernet 
1923156ed20dSDavid Vernet 	while (head != orig_head) {
192490660309SDavid Vernet 		void *obj = head;
192590660309SDavid Vernet 
1926fda01efcSDavid Vernet 		obj -= field->graph_root.node_offset;
1927fda01efcSDavid Vernet 		head = head->next;
1928fda01efcSDavid Vernet 		/* The contained type can also have resources, including a
1929fda01efcSDavid Vernet 		 * bpf_list_head which needs to be freed.
1930fda01efcSDavid Vernet 		 */
1931fda01efcSDavid Vernet 		migrate_disable();
1932fda01efcSDavid Vernet 		__bpf_obj_drop_impl(obj, field->graph_root.value_rec);
1933fda01efcSDavid Vernet 		migrate_enable();
1934fda01efcSDavid Vernet 	}
1935fda01efcSDavid Vernet }
1936fda01efcSDavid Vernet 
1937fda01efcSDavid Vernet /* Like rbtree_postorder_for_each_entry_safe, but 'pos' and 'n' are
1938fda01efcSDavid Vernet  * 'rb_node *', so field name of rb_node within containing struct is not
1939fda01efcSDavid Vernet  * needed.
1940fda01efcSDavid Vernet  *
1941fda01efcSDavid Vernet  * Since bpf_rb_tree's node type has a corresponding struct btf_field with
1942fda01efcSDavid Vernet  * graph_root.node_offset, it's not necessary to know field name
1943fda01efcSDavid Vernet  * or type of node struct
1944fda01efcSDavid Vernet  */
1945fda01efcSDavid Vernet #define bpf_rbtree_postorder_for_each_entry_safe(pos, n, root) \
1946fda01efcSDavid Vernet 	for (pos = rb_first_postorder(root); \
1947fda01efcSDavid Vernet 	    pos && ({ n = rb_next_postorder(pos); 1; }); \
1948fda01efcSDavid Vernet 	    pos = n)
1949fda01efcSDavid Vernet 
bpf_rb_root_free(const struct btf_field * field,void * rb_root,struct bpf_spin_lock * spin_lock)1950fda01efcSDavid Vernet void bpf_rb_root_free(const struct btf_field *field, void *rb_root,
1951fda01efcSDavid Vernet 		      struct bpf_spin_lock *spin_lock)
1952fda01efcSDavid Vernet {
1953fda01efcSDavid Vernet 	struct rb_root_cached orig_root, *root = rb_root;
1954fda01efcSDavid Vernet 	struct rb_node *pos, *n;
1955fda01efcSDavid Vernet 	void *obj;
1956fda01efcSDavid Vernet 
1957fda01efcSDavid Vernet 	BUILD_BUG_ON(sizeof(struct rb_root_cached) > sizeof(struct bpf_rb_root));
1958fda01efcSDavid Vernet 	BUILD_BUG_ON(__alignof__(struct rb_root_cached) > __alignof__(struct bpf_rb_root));
1959fda01efcSDavid Vernet 
1960fda01efcSDavid Vernet 	__bpf_spin_lock_irqsave(spin_lock);
1961fda01efcSDavid Vernet 	orig_root = *root;
1962fda01efcSDavid Vernet 	*root = RB_ROOT_CACHED;
1963fda01efcSDavid Vernet 	__bpf_spin_unlock_irqrestore(spin_lock);
1964fda01efcSDavid Vernet 
1965fda01efcSDavid Vernet 	bpf_rbtree_postorder_for_each_entry_safe(pos, n, &orig_root.rb_root) {
1966fda01efcSDavid Vernet 		obj = pos;
1967fda01efcSDavid Vernet 		obj -= field->graph_root.node_offset;
1968fda01efcSDavid Vernet 
1969fda01efcSDavid Vernet 
1970fda01efcSDavid Vernet 		migrate_disable();
197136aa10ffSDavid Vernet 		__bpf_obj_drop_impl(obj, field->graph_root.value_rec);
1972fda01efcSDavid Vernet 		migrate_enable();
1973fda01efcSDavid Vernet 	}
1974fda01efcSDavid Vernet }
1975fda01efcSDavid Vernet 
1976fda01efcSDavid Vernet __diag_push();
1977fda01efcSDavid Vernet __diag_ignore_all("-Wmissing-prototypes",
1978fda01efcSDavid Vernet 		  "Global functions as their definitions will be in vmlinux BTF");
1979fda01efcSDavid Vernet 
bpf_obj_new_impl(u64 local_type_id__k,void * meta__ign)1980fda01efcSDavid Vernet __bpf_kfunc void *bpf_obj_new_impl(u64 local_type_id__k, void *meta__ign)
1981fda01efcSDavid Vernet {
1982fda01efcSDavid Vernet 	struct btf_struct_meta *meta = meta__ign;
1983fda01efcSDavid Vernet 	u64 size = local_type_id__k;
19845ca78670SDavid Vernet 	void *p;
19855ca78670SDavid Vernet 
19865ca78670SDavid Vernet 	p = bpf_mem_alloc(&bpf_global_ma, size);
19875ca78670SDavid Vernet 	if (!p)
19885ca78670SDavid Vernet 		return NULL;
19895ca78670SDavid Vernet 	if (meta)
19905ca78670SDavid Vernet 		bpf_obj_init(meta->record, p);
19915ca78670SDavid Vernet 	return p;
19925ca78670SDavid Vernet }
19935ca78670SDavid Vernet 
19945ca78670SDavid Vernet /* Must be called under migrate_disable(), as required by bpf_mem_free */
__bpf_obj_drop_impl(void * p,const struct btf_record * rec)19955ca78670SDavid Vernet void __bpf_obj_drop_impl(void *p, const struct btf_record *rec)
19965ca78670SDavid Vernet {
19975ca78670SDavid Vernet 	if (rec && rec->refcount_off >= 0 &&
19985ca78670SDavid Vernet 	    !refcount_dec_and_test((refcount_t *)(p + rec->refcount_off))) {
19995ca78670SDavid Vernet 		/* Object is refcounted and refcount_dec didn't result in 0
20005ca78670SDavid Vernet 		 * refcount. Return without freeing the object
20015ca78670SDavid Vernet 		 */
20025ca78670SDavid Vernet 		return;
2003fda01efcSDavid Vernet 	}
2004fda01efcSDavid Vernet 
20053f0e6f2bSDavid Vernet 	if (rec)
20063f0e6f2bSDavid Vernet 		bpf_obj_free_fields(rec, p);
20073f0e6f2bSDavid Vernet 
20083f0e6f2bSDavid Vernet 	if (rec && rec->refcount_off >= 0)
20093f0e6f2bSDavid Vernet 		bpf_mem_free_rcu(&bpf_global_ma, p);
20103f0e6f2bSDavid Vernet 	else
20113f0e6f2bSDavid Vernet 		bpf_mem_free(&bpf_global_ma, p);
20123f0e6f2bSDavid Vernet }
20133f0e6f2bSDavid Vernet 
bpf_obj_drop_impl(void * p__alloc,void * meta__ign)20143f0e6f2bSDavid Vernet __bpf_kfunc void bpf_obj_drop_impl(void *p__alloc, void *meta__ign)
20153f0e6f2bSDavid Vernet {
20163f0e6f2bSDavid Vernet 	struct btf_struct_meta *meta = meta__ign;
20173f0e6f2bSDavid Vernet 	void *p = p__alloc;
20183f0e6f2bSDavid Vernet 
20193f0e6f2bSDavid Vernet 	__bpf_obj_drop_impl(p, meta ? meta->record : NULL);
20203f0e6f2bSDavid Vernet }
20213f0e6f2bSDavid Vernet 
bpf_refcount_acquire_impl(void * p__refcounted_kptr,void * meta__ign)20223f0e6f2bSDavid Vernet __bpf_kfunc void *bpf_refcount_acquire_impl(void *p__refcounted_kptr, void *meta__ign)
20233f0e6f2bSDavid Vernet {
2024fd264ca0SYonghong Song 	struct btf_struct_meta *meta = meta__ign;
2025fd264ca0SYonghong Song 	struct bpf_refcount *ref;
2026fd264ca0SYonghong Song 
2027fd264ca0SYonghong Song 	/* Could just cast directly to refcount_t *, but need some code using
2028fd264ca0SYonghong Song 	 * bpf_refcount type so that it is emitted in vmlinux BTF
2029a35b9af4SYonghong Song 	 */
2030a35b9af4SYonghong Song 	ref = (struct bpf_refcount *)(p__refcounted_kptr + meta->record->refcount_off);
2031a35b9af4SYonghong Song 	if (!refcount_inc_not_zero((refcount_t *)ref))
2032a35b9af4SYonghong Song 		return NULL;
2033a35b9af4SYonghong Song 
20349bb00b28SYonghong Song 	/* Verifier strips KF_RET_NULL if input is owned ref, see is_kfunc_ret_null
20359bb00b28SYonghong Song 	 * in verifier.c
20369bb00b28SYonghong Song 	 */
20379bb00b28SYonghong Song 	return (void *)p__refcounted_kptr;
20389bb00b28SYonghong Song }
20399bb00b28SYonghong Song 
__bpf_list_add(struct bpf_list_node_kern * node,struct bpf_list_head * head,bool tail,struct btf_record * rec,u64 off)20409bb00b28SYonghong Song static int __bpf_list_add(struct bpf_list_node_kern *node,
20419bb00b28SYonghong Song 			  struct bpf_list_head *head,
20429bb00b28SYonghong Song 			  bool tail, struct btf_record *rec, u64 off)
20439bb00b28SYonghong Song {
2044958cf2e2SKumar Kartikeya Dwivedi 	struct list_head *n = &node->list_head, *h = (void *)head;
2045958cf2e2SKumar Kartikeya Dwivedi 
2046958cf2e2SKumar Kartikeya Dwivedi 	/* If list_head was 0-initialized by map, bpf_obj_init_field wasn't
204713379059SArtem Savkov 	 * called on its fields, so init here
204813379059SArtem Savkov 	 */
204913379059SArtem Savkov 	if (unlikely(!h->next))
2050958cf2e2SKumar Kartikeya Dwivedi 		INIT_LIST_HEAD(h);
2051ac9f0605SKumar Kartikeya Dwivedi 
20528cab76ecSKumar Kartikeya Dwivedi 	/* node->owner != NULL implies !list_empty(n), no need to separately
20538cab76ecSKumar Kartikeya Dwivedi 	 * check the latter
20548cab76ecSKumar Kartikeya Dwivedi 	 */
20558cab76ecSKumar Kartikeya Dwivedi 	if (cmpxchg(&node->owner, NULL, BPF_PTR_POISON)) {
205690660309SDavid Vernet 		/* Only called from BPF prog, no need to migrate_disable */
2057fca1aa75SYonghong Song 		__bpf_obj_drop_impl((void *)n - off, rec);
205890660309SDavid Vernet 		return -EINVAL;
205990660309SDavid Vernet 	}
2060fda01efcSDavid Vernet 
2061fda01efcSDavid Vernet 	tail ? list_add_tail(n, h) : list_add(n, h);
2062fda01efcSDavid Vernet 	WRITE_ONCE(node->owner, head);
2063fda01efcSDavid Vernet 
20645ca78670SDavid Vernet 	return 0;
2065fda01efcSDavid Vernet }
20663f0e6f2bSDavid Vernet 
bpf_list_push_front_impl(struct bpf_list_head * head,struct bpf_list_node * node,void * meta__ign,u64 off)2067958cf2e2SKumar Kartikeya Dwivedi __bpf_kfunc int bpf_list_push_front_impl(struct bpf_list_head *head,
206813379059SArtem Savkov 					 struct bpf_list_node *node,
2069958cf2e2SKumar Kartikeya Dwivedi 					 void *meta__ign, u64 off)
207013379059SArtem Savkov {
2071958cf2e2SKumar Kartikeya Dwivedi 	struct bpf_list_node_kern *n = (void *)node;
207213379059SArtem Savkov 	struct btf_struct_meta *meta = meta__ign;
207313379059SArtem Savkov 
2074cfe14564SYonghong Song 	return __bpf_list_add(n, head, false, meta ? meta->record : NULL, off);
207590660309SDavid Vernet }
207690660309SDavid Vernet 
bpf_list_push_back_impl(struct bpf_list_head * head,struct bpf_list_node * node,void * meta__ign,u64 off)207790660309SDavid Vernet __bpf_kfunc int bpf_list_push_back_impl(struct bpf_list_head *head,
2078fda01efcSDavid Vernet 					struct bpf_list_node *node,
2079fda01efcSDavid Vernet 					void *meta__ign, u64 off)
2080fda01efcSDavid Vernet {
2081fda01efcSDavid Vernet 	struct bpf_list_node_kern *n = (void *)node;
208290660309SDavid Vernet 	struct btf_struct_meta *meta = meta__ign;
2083cfe14564SYonghong Song 
2084fd264ca0SYonghong Song 	return __bpf_list_add(n, head, true, meta ? meta->record : NULL, off);
2085a35b9af4SYonghong Song }
20869bb00b28SYonghong Song 
__bpf_list_del(struct bpf_list_head * head,bool tail)20879bb00b28SYonghong Song static struct bpf_list_node *__bpf_list_del(struct bpf_list_head *head, bool tail)
2088cfe14564SYonghong Song {
2089cfe14564SYonghong Song 	struct list_head *n, *h = (void *)head;
2090cfe14564SYonghong Song 	struct bpf_list_node_kern *node;
2091cfe14564SYonghong Song 
2092cfe14564SYonghong Song 	/* If list_head was 0-initialized by map, bpf_obj_init_field wasn't
2093cfe14564SYonghong Song 	 * called on its fields, so init here
2094cfe14564SYonghong Song 	 */
209513379059SArtem Savkov 	if (unlikely(!h->next))
209613379059SArtem Savkov 		INIT_LIST_HEAD(h);
20972fcc6081SDavid Vernet 	if (list_empty(h))
209890660309SDavid Vernet 		return NULL;
209990660309SDavid Vernet 
21002fcc6081SDavid Vernet 	n = tail ? h->prev : h->next;
21012fcc6081SDavid Vernet 	node = container_of(n, struct bpf_list_node_kern, list_head);
210290660309SDavid Vernet 	if (WARN_ON_ONCE(READ_ONCE(node->owner) != head))
2103fda01efcSDavid Vernet 		return NULL;
2104fda01efcSDavid Vernet 
21052fcc6081SDavid Vernet 	list_del_init(n);
21062fcc6081SDavid Vernet 	WRITE_ONCE(node->owner, NULL);
2107fda01efcSDavid Vernet 	return (struct bpf_list_node *)n;
2108fda01efcSDavid Vernet }
210990660309SDavid Vernet 
bpf_list_pop_front(struct bpf_list_head * head)21108cab76ecSKumar Kartikeya Dwivedi __bpf_kfunc struct bpf_list_node *bpf_list_pop_front(struct bpf_list_head *head)
21118cab76ecSKumar Kartikeya Dwivedi {
211290660309SDavid Vernet 	return __bpf_list_del(head, false);
211390660309SDavid Vernet }
2114cfe14564SYonghong Song 
bpf_list_pop_back(struct bpf_list_head * head)211590660309SDavid Vernet __bpf_kfunc struct bpf_list_node *bpf_list_pop_back(struct bpf_list_head *head)
211690660309SDavid Vernet {
2117cfe14564SYonghong Song 	return __bpf_list_del(head, true);
211813379059SArtem Savkov }
211913379059SArtem Savkov 
bpf_rbtree_remove(struct bpf_rb_root * root,struct bpf_rb_node * node)212013379059SArtem Savkov __bpf_kfunc struct bpf_rb_node *bpf_rbtree_remove(struct bpf_rb_root *root,
2121 						  struct bpf_rb_node *node)
2122 {
2123 	struct bpf_rb_node_kern *node_internal = (struct bpf_rb_node_kern *)node;
2124 	struct rb_root_cached *r = (struct rb_root_cached *)root;
2125 	struct rb_node *n = &node_internal->rb_node;
2126 
2127 	/* node_internal->owner != root implies either RB_EMPTY_NODE(n) or
2128 	 * n is owned by some other tree. No need to check RB_EMPTY_NODE(n)
2129 	 */
2130 	if (READ_ONCE(node_internal->owner) != root)
2131 		return NULL;
2132 
2133 	rb_erase_cached(n, r);
2134 	RB_CLEAR_NODE(n);
2135 	WRITE_ONCE(node_internal->owner, NULL);
2136 	return (struct bpf_rb_node *)n;
2137 }
2138 
2139 /* Need to copy rbtree_add_cached's logic here because our 'less' is a BPF
2140  * program
2141  */
__bpf_rbtree_add(struct bpf_rb_root * root,struct bpf_rb_node_kern * node,void * less,struct btf_record * rec,u64 off)2142 static int __bpf_rbtree_add(struct bpf_rb_root *root,
2143 			    struct bpf_rb_node_kern *node,
2144 			    void *less, struct btf_record *rec, u64 off)
2145 {
2146 	struct rb_node **link = &((struct rb_root_cached *)root)->rb_root.rb_node;
2147 	struct rb_node *parent = NULL, *n = &node->rb_node;
2148 	bpf_callback_t cb = (bpf_callback_t)less;
2149 	bool leftmost = true;
2150 
2151 	/* node->owner != NULL implies !RB_EMPTY_NODE(n), no need to separately
2152 	 * check the latter
2153 	 */
2154 	if (cmpxchg(&node->owner, NULL, BPF_PTR_POISON)) {
2155 		/* Only called from BPF prog, no need to migrate_disable */
2156 		__bpf_obj_drop_impl((void *)n - off, rec);
2157 		return -EINVAL;
2158 	}
2159 
2160 	while (*link) {
2161 		parent = *link;
2162 		if (cb((uintptr_t)node, (uintptr_t)parent, 0, 0, 0)) {
2163 			link = &parent->rb_left;
2164 		} else {
2165 			link = &parent->rb_right;
2166 			leftmost = false;
2167 		}
2168 	}
2169 
2170 	rb_link_node(n, parent, link);
2171 	rb_insert_color_cached(n, (struct rb_root_cached *)root, leftmost);
2172 	WRITE_ONCE(node->owner, root);
2173 	return 0;
2174 }
2175 
bpf_rbtree_add_impl(struct bpf_rb_root * root,struct bpf_rb_node * node,bool (less)(struct bpf_rb_node * a,const struct bpf_rb_node * b),void * meta__ign,u64 off)2176 __bpf_kfunc int bpf_rbtree_add_impl(struct bpf_rb_root *root, struct bpf_rb_node *node,
2177 				    bool (less)(struct bpf_rb_node *a, const struct bpf_rb_node *b),
2178 				    void *meta__ign, u64 off)
2179 {
2180 	struct btf_struct_meta *meta = meta__ign;
2181 	struct bpf_rb_node_kern *n = (void *)node;
2182 
2183 	return __bpf_rbtree_add(root, n, (void *)less, meta ? meta->record : NULL, off);
2184 }
2185 
bpf_rbtree_first(struct bpf_rb_root * root)2186 __bpf_kfunc struct bpf_rb_node *bpf_rbtree_first(struct bpf_rb_root *root)
2187 {
2188 	struct rb_root_cached *r = (struct rb_root_cached *)root;
2189 
2190 	return (struct bpf_rb_node *)rb_first_cached(r);
2191 }
2192 
2193 /**
2194  * bpf_task_acquire - Acquire a reference to a task. A task acquired by this
2195  * kfunc which is not stored in a map as a kptr, must be released by calling
2196  * bpf_task_release().
2197  * @p: The task on which a reference is being acquired.
2198  */
bpf_task_acquire(struct task_struct * p)2199 __bpf_kfunc struct task_struct *bpf_task_acquire(struct task_struct *p)
2200 {
2201 	if (refcount_inc_not_zero(&p->rcu_users))
2202 		return p;
2203 	return NULL;
2204 }
2205 
2206 /**
2207  * bpf_task_release - Release the reference acquired on a task.
2208  * @p: The task on which a reference is being released.
2209  */
bpf_task_release(struct task_struct * p)2210 __bpf_kfunc void bpf_task_release(struct task_struct *p)
2211 {
2212 	put_task_struct_rcu_user(p);
2213 }
2214 
2215 #ifdef CONFIG_CGROUPS
2216 /**
2217  * bpf_cgroup_acquire - Acquire a reference to a cgroup. A cgroup acquired by
2218  * this kfunc which is not stored in a map as a kptr, must be released by
2219  * calling bpf_cgroup_release().
2220  * @cgrp: The cgroup on which a reference is being acquired.
2221  */
bpf_cgroup_acquire(struct cgroup * cgrp)2222 __bpf_kfunc struct cgroup *bpf_cgroup_acquire(struct cgroup *cgrp)
2223 {
2224 	return cgroup_tryget(cgrp) ? cgrp : NULL;
2225 }
2226 
2227 /**
2228  * bpf_cgroup_release - Release the reference acquired on a cgroup.
2229  * If this kfunc is invoked in an RCU read region, the cgroup is guaranteed to
2230  * not be freed until the current grace period has ended, even if its refcount
2231  * drops to 0.
2232  * @cgrp: The cgroup on which a reference is being released.
2233  */
bpf_cgroup_release(struct cgroup * cgrp)2234 __bpf_kfunc void bpf_cgroup_release(struct cgroup *cgrp)
2235 {
2236 	cgroup_put(cgrp);
2237 }
2238 
2239 /**
2240  * bpf_cgroup_ancestor - Perform a lookup on an entry in a cgroup's ancestor
2241  * array. A cgroup returned by this kfunc which is not subsequently stored in a
2242  * map, must be released by calling bpf_cgroup_release().
2243  * @cgrp: The cgroup for which we're performing a lookup.
2244  * @level: The level of ancestor to look up.
2245  */
bpf_cgroup_ancestor(struct cgroup * cgrp,int level)2246 __bpf_kfunc struct cgroup *bpf_cgroup_ancestor(struct cgroup *cgrp, int level)
2247 {
2248 	struct cgroup *ancestor;
2249 
2250 	if (level > cgrp->level || level < 0)
2251 		return NULL;
2252 
2253 	/* cgrp's refcnt could be 0 here, but ancestors can still be accessed */
2254 	ancestor = cgrp->ancestors[level];
2255 	if (!cgroup_tryget(ancestor))
2256 		return NULL;
2257 	return ancestor;
2258 }
2259 
2260 /**
2261  * bpf_cgroup_from_id - Find a cgroup from its ID. A cgroup returned by this
2262  * kfunc which is not subsequently stored in a map, must be released by calling
2263  * bpf_cgroup_release().
2264  * @cgid: cgroup id.
2265  */
bpf_cgroup_from_id(u64 cgid)2266 __bpf_kfunc struct cgroup *bpf_cgroup_from_id(u64 cgid)
2267 {
2268 	struct cgroup *cgrp;
2269 
2270 	cgrp = cgroup_get_from_id(cgid);
2271 	if (IS_ERR(cgrp))
2272 		return NULL;
2273 	return cgrp;
2274 }
2275 
2276 /**
2277  * bpf_task_under_cgroup - wrap task_under_cgroup_hierarchy() as a kfunc, test
2278  * task's membership of cgroup ancestry.
2279  * @task: the task to be tested
2280  * @ancestor: possible ancestor of @task's cgroup
2281  *
2282  * Tests whether @task's default cgroup hierarchy is a descendant of @ancestor.
2283  * It follows all the same rules as cgroup_is_descendant, and only applies
2284  * to the default hierarchy.
2285  */
bpf_task_under_cgroup(struct task_struct * task,struct cgroup * ancestor)2286 __bpf_kfunc long bpf_task_under_cgroup(struct task_struct *task,
2287 				       struct cgroup *ancestor)
2288 {
2289 	long ret;
2290 
2291 	rcu_read_lock();
2292 	ret = task_under_cgroup_hierarchy(task, ancestor);
2293 	rcu_read_unlock();
2294 	return ret;
2295 }
2296 #endif /* CONFIG_CGROUPS */
2297 
2298 /**
2299  * bpf_task_from_pid - Find a struct task_struct from its pid by looking it up
2300  * in the root pid namespace idr. If a task is returned, it must either be
2301  * stored in a map, or released with bpf_task_release().
2302  * @pid: The pid of the task being looked up.
2303  */
bpf_task_from_pid(s32 pid)2304 __bpf_kfunc struct task_struct *bpf_task_from_pid(s32 pid)
2305 {
2306 	struct task_struct *p;
2307 
2308 	rcu_read_lock();
2309 	p = find_task_by_pid_ns(pid, &init_pid_ns);
2310 	if (p)
2311 		p = bpf_task_acquire(p);
2312 	rcu_read_unlock();
2313 
2314 	return p;
2315 }
2316 
2317 /**
2318  * bpf_dynptr_slice() - Obtain a read-only pointer to the dynptr data.
2319  * @ptr: The dynptr whose data slice to retrieve
2320  * @offset: Offset into the dynptr
2321  * @buffer__opt: User-provided buffer to copy contents into.  May be NULL
2322  * @buffer__szk: Size (in bytes) of the buffer if present. This is the
2323  *               length of the requested slice. This must be a constant.
2324  *
2325  * For non-skb and non-xdp type dynptrs, there is no difference between
2326  * bpf_dynptr_slice and bpf_dynptr_data.
2327  *
2328  *  If buffer__opt is NULL, the call will fail if buffer_opt was needed.
2329  *
2330  * If the intention is to write to the data slice, please use
2331  * bpf_dynptr_slice_rdwr.
2332  *
2333  * The user must check that the returned pointer is not null before using it.
2334  *
2335  * Please note that in the case of skb and xdp dynptrs, bpf_dynptr_slice
2336  * does not change the underlying packet data pointers, so a call to
2337  * bpf_dynptr_slice will not invalidate any ctx->data/data_end pointers in
2338  * the bpf program.
2339  *
2340  * Return: NULL if the call failed (eg invalid dynptr), pointer to a read-only
2341  * data slice (can be either direct pointer to the data or a pointer to the user
2342  * provided buffer, with its contents containing the data, if unable to obtain
2343  * direct pointer)
2344  */
bpf_dynptr_slice(const struct bpf_dynptr_kern * ptr,u32 offset,void * buffer__opt,u32 buffer__szk)2345 __bpf_kfunc void *bpf_dynptr_slice(const struct bpf_dynptr_kern *ptr, u32 offset,
2346 				   void *buffer__opt, u32 buffer__szk)
2347 {
2348 	enum bpf_dynptr_type type;
2349 	u32 len = buffer__szk;
2350 	int err;
2351 
2352 	if (!ptr->data)
2353 		return NULL;
2354 
2355 	err = bpf_dynptr_check_off_len(ptr, offset, len);
2356 	if (err)
2357 		return NULL;
2358 
2359 	type = bpf_dynptr_get_type(ptr);
2360 
2361 	switch (type) {
2362 	case BPF_DYNPTR_TYPE_LOCAL:
2363 	case BPF_DYNPTR_TYPE_RINGBUF:
2364 		return ptr->data + ptr->offset + offset;
2365 	case BPF_DYNPTR_TYPE_SKB:
2366 		if (buffer__opt)
2367 			return skb_header_pointer(ptr->data, ptr->offset + offset, len, buffer__opt);
2368 		else
2369 			return skb_pointer_if_linear(ptr->data, ptr->offset + offset, len);
2370 	case BPF_DYNPTR_TYPE_XDP:
2371 	{
2372 		void *xdp_ptr = bpf_xdp_pointer(ptr->data, ptr->offset + offset, len);
2373 		if (!IS_ERR_OR_NULL(xdp_ptr))
2374 			return xdp_ptr;
2375 
2376 		if (!buffer__opt)
2377 			return NULL;
2378 		bpf_xdp_copy_buf(ptr->data, ptr->offset + offset, buffer__opt, len, false);
2379 		return buffer__opt;
2380 	}
2381 	default:
2382 		WARN_ONCE(true, "unknown dynptr type %d\n", type);
2383 		return NULL;
2384 	}
2385 }
2386 
2387 /**
2388  * bpf_dynptr_slice_rdwr() - Obtain a writable pointer to the dynptr data.
2389  * @ptr: The dynptr whose data slice to retrieve
2390  * @offset: Offset into the dynptr
2391  * @buffer__opt: User-provided buffer to copy contents into. May be NULL
2392  * @buffer__szk: Size (in bytes) of the buffer if present. This is the
2393  *               length of the requested slice. This must be a constant.
2394  *
2395  * For non-skb and non-xdp type dynptrs, there is no difference between
2396  * bpf_dynptr_slice and bpf_dynptr_data.
2397  *
2398  * If buffer__opt is NULL, the call will fail if buffer_opt was needed.
2399  *
2400  * The returned pointer is writable and may point to either directly the dynptr
2401  * data at the requested offset or to the buffer if unable to obtain a direct
2402  * data pointer to (example: the requested slice is to the paged area of an skb
2403  * packet). In the case where the returned pointer is to the buffer, the user
2404  * is responsible for persisting writes through calling bpf_dynptr_write(). This
2405  * usually looks something like this pattern:
2406  *
2407  * struct eth_hdr *eth = bpf_dynptr_slice_rdwr(&dynptr, 0, buffer, sizeof(buffer));
2408  * if (!eth)
2409  *	return TC_ACT_SHOT;
2410  *
2411  * // mutate eth header //
2412  *
2413  * if (eth == buffer)
2414  *	bpf_dynptr_write(&ptr, 0, buffer, sizeof(buffer), 0);
2415  *
2416  * Please note that, as in the example above, the user must check that the
2417  * returned pointer is not null before using it.
2418  *
2419  * Please also note that in the case of skb and xdp dynptrs, bpf_dynptr_slice_rdwr
2420  * does not change the underlying packet data pointers, so a call to
2421  * bpf_dynptr_slice_rdwr will not invalidate any ctx->data/data_end pointers in
2422  * the bpf program.
2423  *
2424  * Return: NULL if the call failed (eg invalid dynptr), pointer to a
2425  * data slice (can be either direct pointer to the data or a pointer to the user
2426  * provided buffer, with its contents containing the data, if unable to obtain
2427  * direct pointer)
2428  */
bpf_dynptr_slice_rdwr(const struct bpf_dynptr_kern * ptr,u32 offset,void * buffer__opt,u32 buffer__szk)2429 __bpf_kfunc void *bpf_dynptr_slice_rdwr(const struct bpf_dynptr_kern *ptr, u32 offset,
2430 					void *buffer__opt, u32 buffer__szk)
2431 {
2432 	if (!ptr->data || __bpf_dynptr_is_rdonly(ptr))
2433 		return NULL;
2434 
2435 	/* bpf_dynptr_slice_rdwr is the same logic as bpf_dynptr_slice.
2436 	 *
2437 	 * For skb-type dynptrs, it is safe to write into the returned pointer
2438 	 * if the bpf program allows skb data writes. There are two possiblities
2439 	 * that may occur when calling bpf_dynptr_slice_rdwr:
2440 	 *
2441 	 * 1) The requested slice is in the head of the skb. In this case, the
2442 	 * returned pointer is directly to skb data, and if the skb is cloned, the
2443 	 * verifier will have uncloned it (see bpf_unclone_prologue()) already.
2444 	 * The pointer can be directly written into.
2445 	 *
2446 	 * 2) Some portion of the requested slice is in the paged buffer area.
2447 	 * In this case, the requested data will be copied out into the buffer
2448 	 * and the returned pointer will be a pointer to the buffer. The skb
2449 	 * will not be pulled. To persist the write, the user will need to call
2450 	 * bpf_dynptr_write(), which will pull the skb and commit the write.
2451 	 *
2452 	 * Similarly for xdp programs, if the requested slice is not across xdp
2453 	 * fragments, then a direct pointer will be returned, otherwise the data
2454 	 * will be copied out into the buffer and the user will need to call
2455 	 * bpf_dynptr_write() to commit changes.
2456 	 */
2457 	return bpf_dynptr_slice(ptr, offset, buffer__opt, buffer__szk);
2458 }
2459 
bpf_dynptr_adjust(struct bpf_dynptr_kern * ptr,u32 start,u32 end)2460 __bpf_kfunc int bpf_dynptr_adjust(struct bpf_dynptr_kern *ptr, u32 start, u32 end)
2461 {
2462 	u32 size;
2463 
2464 	if (!ptr->data || start > end)
2465 		return -EINVAL;
2466 
2467 	size = __bpf_dynptr_size(ptr);
2468 
2469 	if (start > size || end > size)
2470 		return -ERANGE;
2471 
2472 	ptr->offset += start;
2473 	bpf_dynptr_set_size(ptr, end - start);
2474 
2475 	return 0;
2476 }
2477 
bpf_dynptr_is_null(struct bpf_dynptr_kern * ptr)2478 __bpf_kfunc bool bpf_dynptr_is_null(struct bpf_dynptr_kern *ptr)
2479 {
2480 	return !ptr->data;
2481 }
2482 
bpf_dynptr_is_rdonly(struct bpf_dynptr_kern * ptr)2483 __bpf_kfunc bool bpf_dynptr_is_rdonly(struct bpf_dynptr_kern *ptr)
2484 {
2485 	if (!ptr->data)
2486 		return false;
2487 
2488 	return __bpf_dynptr_is_rdonly(ptr);
2489 }
2490 
bpf_dynptr_size(const struct bpf_dynptr_kern * ptr)2491 __bpf_kfunc __u32 bpf_dynptr_size(const struct bpf_dynptr_kern *ptr)
2492 {
2493 	if (!ptr->data)
2494 		return -EINVAL;
2495 
2496 	return __bpf_dynptr_size(ptr);
2497 }
2498 
bpf_dynptr_clone(struct bpf_dynptr_kern * ptr,struct bpf_dynptr_kern * clone__uninit)2499 __bpf_kfunc int bpf_dynptr_clone(struct bpf_dynptr_kern *ptr,
2500 				 struct bpf_dynptr_kern *clone__uninit)
2501 {
2502 	if (!ptr->data) {
2503 		bpf_dynptr_set_null(clone__uninit);
2504 		return -EINVAL;
2505 	}
2506 
2507 	*clone__uninit = *ptr;
2508 
2509 	return 0;
2510 }
2511 
bpf_cast_to_kern_ctx(void * obj)2512 __bpf_kfunc void *bpf_cast_to_kern_ctx(void *obj)
2513 {
2514 	return obj;
2515 }
2516 
bpf_rdonly_cast(void * obj__ign,u32 btf_id__k)2517 __bpf_kfunc void *bpf_rdonly_cast(void *obj__ign, u32 btf_id__k)
2518 {
2519 	return obj__ign;
2520 }
2521 
bpf_rcu_read_lock(void)2522 __bpf_kfunc void bpf_rcu_read_lock(void)
2523 {
2524 	rcu_read_lock();
2525 }
2526 
bpf_rcu_read_unlock(void)2527 __bpf_kfunc void bpf_rcu_read_unlock(void)
2528 {
2529 	rcu_read_unlock();
2530 }
2531 
2532 __diag_pop();
2533 
2534 BTF_SET8_START(generic_btf_ids)
2535 #ifdef CONFIG_KEXEC_CORE
2536 BTF_ID_FLAGS(func, crash_kexec, KF_DESTRUCTIVE)
2537 #endif
2538 BTF_ID_FLAGS(func, bpf_obj_new_impl, KF_ACQUIRE | KF_RET_NULL)
2539 BTF_ID_FLAGS(func, bpf_obj_drop_impl, KF_RELEASE)
2540 BTF_ID_FLAGS(func, bpf_refcount_acquire_impl, KF_ACQUIRE | KF_RET_NULL)
2541 BTF_ID_FLAGS(func, bpf_list_push_front_impl)
2542 BTF_ID_FLAGS(func, bpf_list_push_back_impl)
2543 BTF_ID_FLAGS(func, bpf_list_pop_front, KF_ACQUIRE | KF_RET_NULL)
2544 BTF_ID_FLAGS(func, bpf_list_pop_back, KF_ACQUIRE | KF_RET_NULL)
2545 BTF_ID_FLAGS(func, bpf_task_acquire, KF_ACQUIRE | KF_RCU | KF_RET_NULL)
2546 BTF_ID_FLAGS(func, bpf_task_release, KF_RELEASE)
2547 BTF_ID_FLAGS(func, bpf_rbtree_remove, KF_ACQUIRE | KF_RET_NULL)
2548 BTF_ID_FLAGS(func, bpf_rbtree_add_impl)
2549 BTF_ID_FLAGS(func, bpf_rbtree_first, KF_RET_NULL)
2550 
2551 #ifdef CONFIG_CGROUPS
2552 BTF_ID_FLAGS(func, bpf_cgroup_acquire, KF_ACQUIRE | KF_RCU | KF_RET_NULL)
2553 BTF_ID_FLAGS(func, bpf_cgroup_release, KF_RELEASE)
2554 BTF_ID_FLAGS(func, bpf_cgroup_ancestor, KF_ACQUIRE | KF_RCU | KF_RET_NULL)
2555 BTF_ID_FLAGS(func, bpf_cgroup_from_id, KF_ACQUIRE | KF_RET_NULL)
2556 BTF_ID_FLAGS(func, bpf_task_under_cgroup, KF_RCU)
2557 #endif
2558 BTF_ID_FLAGS(func, bpf_task_from_pid, KF_ACQUIRE | KF_RET_NULL)
2559 BTF_SET8_END(generic_btf_ids)
2560 
2561 static const struct btf_kfunc_id_set generic_kfunc_set = {
2562 	.owner = THIS_MODULE,
2563 	.set   = &generic_btf_ids,
2564 };
2565 
2566 
2567 BTF_ID_LIST(generic_dtor_ids)
2568 BTF_ID(struct, task_struct)
2569 BTF_ID(func, bpf_task_release)
2570 #ifdef CONFIG_CGROUPS
2571 BTF_ID(struct, cgroup)
2572 BTF_ID(func, bpf_cgroup_release)
2573 #endif
2574 
2575 BTF_SET8_START(common_btf_ids)
2576 BTF_ID_FLAGS(func, bpf_cast_to_kern_ctx)
2577 BTF_ID_FLAGS(func, bpf_rdonly_cast)
2578 BTF_ID_FLAGS(func, bpf_rcu_read_lock)
2579 BTF_ID_FLAGS(func, bpf_rcu_read_unlock)
2580 BTF_ID_FLAGS(func, bpf_dynptr_slice, KF_RET_NULL)
2581 BTF_ID_FLAGS(func, bpf_dynptr_slice_rdwr, KF_RET_NULL)
2582 BTF_ID_FLAGS(func, bpf_iter_num_new, KF_ITER_NEW)
2583 BTF_ID_FLAGS(func, bpf_iter_num_next, KF_ITER_NEXT | KF_RET_NULL)
2584 BTF_ID_FLAGS(func, bpf_iter_num_destroy, KF_ITER_DESTROY)
2585 BTF_ID_FLAGS(func, bpf_dynptr_adjust)
2586 BTF_ID_FLAGS(func, bpf_dynptr_is_null)
2587 BTF_ID_FLAGS(func, bpf_dynptr_is_rdonly)
2588 BTF_ID_FLAGS(func, bpf_dynptr_size)
2589 BTF_ID_FLAGS(func, bpf_dynptr_clone)
2590 BTF_SET8_END(common_btf_ids)
2591 
2592 static const struct btf_kfunc_id_set common_kfunc_set = {
2593 	.owner = THIS_MODULE,
2594 	.set   = &common_btf_ids,
2595 };
2596 
kfunc_init(void)2597 static int __init kfunc_init(void)
2598 {
2599 	int ret;
2600 	const struct btf_id_dtor_kfunc generic_dtors[] = {
2601 		{
2602 			.btf_id       = generic_dtor_ids[0],
2603 			.kfunc_btf_id = generic_dtor_ids[1]
2604 		},
2605 #ifdef CONFIG_CGROUPS
2606 		{
2607 			.btf_id       = generic_dtor_ids[2],
2608 			.kfunc_btf_id = generic_dtor_ids[3]
2609 		},
2610 #endif
2611 	};
2612 
2613 	ret = register_btf_kfunc_id_set(BPF_PROG_TYPE_TRACING, &generic_kfunc_set);
2614 	ret = ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_SCHED_CLS, &generic_kfunc_set);
2615 	ret = ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_STRUCT_OPS, &generic_kfunc_set);
2616 	ret = ret ?: register_btf_id_dtor_kfuncs(generic_dtors,
2617 						  ARRAY_SIZE(generic_dtors),
2618 						  THIS_MODULE);
2619 	return ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_UNSPEC, &common_kfunc_set);
2620 }
2621 
2622 late_initcall(kfunc_init);
2623