15b497af4SThomas Gleixner // SPDX-License-Identifier: GPL-2.0-only
2d0003ec0SAlexei Starovoitov /* Copyright (c) 2011-2014 PLUMgrid, http://plumgrid.com
3d0003ec0SAlexei Starovoitov */
4d0003ec0SAlexei Starovoitov #include <linux/bpf.h>
53bd916eeSYonghong Song #include <linux/btf.h>
6aef2fedaSJakub Kicinski #include <linux/bpf-cgroup.h>
7fda01efcSDavid Vernet #include <linux/cgroup.h>
8d0003ec0SAlexei Starovoitov #include <linux/rcupdate.h>
903e69b50SDaniel Borkmann #include <linux/random.h>
10c04167ceSDaniel Borkmann #include <linux/smp.h>
112d0e30c3SDaniel Borkmann #include <linux/topology.h>
1217ca8cbfSDaniel Borkmann #include <linux/ktime.h>
13ffeedafbSAlexei Starovoitov #include <linux/sched.h>
14ffeedafbSAlexei Starovoitov #include <linux/uidgid.h>
15f3694e00SDaniel Borkmann #include <linux/filter.h>
16d7a4cb9bSAndrey Ignatov #include <linux/ctype.h>
175576b991SMartin KaFai Lau #include <linux/jiffies.h>
18b4490c5cSCarlos Neira #include <linux/pid_namespace.h>
1947e34cb7SDave Marchevsky #include <linux/poison.h>
20b4490c5cSCarlos Neira #include <linux/proc_ns.h>
21ff40e510SDaniel Borkmann #include <linux/sched/task.h>
22376040e4SKenny Yu #include <linux/security.h>
23958cf2e2SKumar Kartikeya Dwivedi #include <linux/btf_ids.h>
24d7a4cb9bSAndrey Ignatov #include <linux/bpf_mem_alloc.h>
25d7a4cb9bSAndrey Ignatov
26d0003ec0SAlexei Starovoitov #include "../../lib/kstrtox.h"
27d0003ec0SAlexei Starovoitov
28d0003ec0SAlexei Starovoitov /* If kernel subsystem is allowing eBPF programs to call this function,
29d0003ec0SAlexei Starovoitov * inside its own verifier_ops->get_func_proto() callback it should return
30d0003ec0SAlexei Starovoitov * bpf_map_lookup_elem_proto, so that verifier can properly check the arguments
31d0003ec0SAlexei Starovoitov *
32d0003ec0SAlexei Starovoitov * Different map implementations will rely on rcu in map methods
33d0003ec0SAlexei Starovoitov * lookup/update/delete, therefore eBPF programs must run under rcu lock
34d0003ec0SAlexei Starovoitov * if program is allowed to access maps, so check rcu_read_lock_held() or
35d0003ec0SAlexei Starovoitov * rcu_read_lock_trace_held() in all three functions.
36f3694e00SDaniel Borkmann */
BPF_CALL_2(bpf_map_lookup_elem,struct bpf_map *,map,void *,key)37d0003ec0SAlexei Starovoitov BPF_CALL_2(bpf_map_lookup_elem, struct bpf_map *, map, void *, key)
38694cea39SToke Høiland-Jørgensen {
39f3694e00SDaniel Borkmann WARN_ON_ONCE(!rcu_read_lock_held() && !rcu_read_lock_trace_held() &&
40d0003ec0SAlexei Starovoitov !rcu_read_lock_bh_held());
41d0003ec0SAlexei Starovoitov return (unsigned long) map->ops->map_lookup_elem(map, key);
42a2c83fffSDaniel Borkmann }
43d0003ec0SAlexei Starovoitov
44d0003ec0SAlexei Starovoitov const struct bpf_func_proto bpf_map_lookup_elem_proto = {
4536bbef52SDaniel Borkmann .func = bpf_map_lookup_elem,
46d0003ec0SAlexei Starovoitov .gpl_only = false,
47d0003ec0SAlexei Starovoitov .pkt_access = true,
48d0003ec0SAlexei Starovoitov .ret_type = RET_PTR_TO_MAP_VALUE_OR_NULL,
49d0003ec0SAlexei Starovoitov .arg1_type = ARG_CONST_MAP_PTR,
50d0003ec0SAlexei Starovoitov .arg2_type = ARG_PTR_TO_MAP_KEY,
51f3694e00SDaniel Borkmann };
52f3694e00SDaniel Borkmann
BPF_CALL_4(bpf_map_update_elem,struct bpf_map *,map,void *,key,void *,value,u64,flags)53d0003ec0SAlexei Starovoitov BPF_CALL_4(bpf_map_update_elem, struct bpf_map *, map, void *, key,
54694cea39SToke Høiland-Jørgensen void *, value, u64, flags)
55f3694e00SDaniel Borkmann {
56d0003ec0SAlexei Starovoitov WARN_ON_ONCE(!rcu_read_lock_held() && !rcu_read_lock_trace_held() &&
57d0003ec0SAlexei Starovoitov !rcu_read_lock_bh_held());
58a2c83fffSDaniel Borkmann return map->ops->map_update_elem(map, key, value, flags);
59d0003ec0SAlexei Starovoitov }
60d0003ec0SAlexei Starovoitov
6136bbef52SDaniel Borkmann const struct bpf_func_proto bpf_map_update_elem_proto = {
62d0003ec0SAlexei Starovoitov .func = bpf_map_update_elem,
63d0003ec0SAlexei Starovoitov .gpl_only = false,
64d0003ec0SAlexei Starovoitov .pkt_access = true,
65d0003ec0SAlexei Starovoitov .ret_type = RET_INTEGER,
66d0003ec0SAlexei Starovoitov .arg1_type = ARG_CONST_MAP_PTR,
67d0003ec0SAlexei Starovoitov .arg2_type = ARG_PTR_TO_MAP_KEY,
68d0003ec0SAlexei Starovoitov .arg3_type = ARG_PTR_TO_MAP_VALUE,
69f3694e00SDaniel Borkmann .arg4_type = ARG_ANYTHING,
70d0003ec0SAlexei Starovoitov };
71694cea39SToke Høiland-Jørgensen
BPF_CALL_2(bpf_map_delete_elem,struct bpf_map *,map,void *,key)72d0003ec0SAlexei Starovoitov BPF_CALL_2(bpf_map_delete_elem, struct bpf_map *, map, void *, key)
73d0003ec0SAlexei Starovoitov {
74d0003ec0SAlexei Starovoitov WARN_ON_ONCE(!rcu_read_lock_held() && !rcu_read_lock_trace_held() &&
75a2c83fffSDaniel Borkmann !rcu_read_lock_bh_held());
76d0003ec0SAlexei Starovoitov return map->ops->map_delete_elem(map, key);
77d0003ec0SAlexei Starovoitov }
7836bbef52SDaniel Borkmann
79d0003ec0SAlexei Starovoitov const struct bpf_func_proto bpf_map_delete_elem_proto = {
80d0003ec0SAlexei Starovoitov .func = bpf_map_delete_elem,
81d0003ec0SAlexei Starovoitov .gpl_only = false,
82d0003ec0SAlexei Starovoitov .pkt_access = true,
8303e69b50SDaniel Borkmann .ret_type = RET_INTEGER,
84f1a2e44aSMauricio Vasquez B .arg1_type = ARG_CONST_MAP_PTR,
85f1a2e44aSMauricio Vasquez B .arg2_type = ARG_PTR_TO_MAP_KEY,
86f1a2e44aSMauricio Vasquez B };
87f1a2e44aSMauricio Vasquez B
BPF_CALL_3(bpf_map_push_elem,struct bpf_map *,map,void *,value,u64,flags)88f1a2e44aSMauricio Vasquez B BPF_CALL_3(bpf_map_push_elem, struct bpf_map *, map, void *, value, u64, flags)
89f1a2e44aSMauricio Vasquez B {
90f1a2e44aSMauricio Vasquez B return map->ops->map_push_elem(map, value, flags);
91f1a2e44aSMauricio Vasquez B }
92f1a2e44aSMauricio Vasquez B
93f1a2e44aSMauricio Vasquez B const struct bpf_func_proto bpf_map_push_elem_proto = {
94f1a2e44aSMauricio Vasquez B .func = bpf_map_push_elem,
95f1a2e44aSMauricio Vasquez B .gpl_only = false,
96f1a2e44aSMauricio Vasquez B .pkt_access = true,
97f1a2e44aSMauricio Vasquez B .ret_type = RET_INTEGER,
98f1a2e44aSMauricio Vasquez B .arg1_type = ARG_CONST_MAP_PTR,
99f1a2e44aSMauricio Vasquez B .arg2_type = ARG_PTR_TO_MAP_VALUE,
100f1a2e44aSMauricio Vasquez B .arg3_type = ARG_ANYTHING,
101f1a2e44aSMauricio Vasquez B };
102f1a2e44aSMauricio Vasquez B
BPF_CALL_2(bpf_map_pop_elem,struct bpf_map *,map,void *,value)103f1a2e44aSMauricio Vasquez B BPF_CALL_2(bpf_map_pop_elem, struct bpf_map *, map, void *, value)
104f1a2e44aSMauricio Vasquez B {
105f1a2e44aSMauricio Vasquez B return map->ops->map_pop_elem(map, value);
106f1a2e44aSMauricio Vasquez B }
107f1a2e44aSMauricio Vasquez B
108f1a2e44aSMauricio Vasquez B const struct bpf_func_proto bpf_map_pop_elem_proto = {
10916d1e00cSJoanne Koong .func = bpf_map_pop_elem,
110f1a2e44aSMauricio Vasquez B .gpl_only = false,
111f1a2e44aSMauricio Vasquez B .ret_type = RET_INTEGER,
112f1a2e44aSMauricio Vasquez B .arg1_type = ARG_CONST_MAP_PTR,
113f1a2e44aSMauricio Vasquez B .arg2_type = ARG_PTR_TO_MAP_VALUE | MEM_UNINIT | MEM_WRITE,
114f1a2e44aSMauricio Vasquez B };
115f1a2e44aSMauricio Vasquez B
BPF_CALL_2(bpf_map_peek_elem,struct bpf_map *,map,void *,value)116f1a2e44aSMauricio Vasquez B BPF_CALL_2(bpf_map_peek_elem, struct bpf_map *, map, void *, value)
117f1a2e44aSMauricio Vasquez B {
118301a33d5SMircea Cirjaliu return map->ops->map_peek_elem(map, value);
119f1a2e44aSMauricio Vasquez B }
120f1a2e44aSMauricio Vasquez B
121f1a2e44aSMauricio Vasquez B const struct bpf_func_proto bpf_map_peek_elem_proto = {
12216d1e00cSJoanne Koong .func = bpf_map_peek_elem,
123f1a2e44aSMauricio Vasquez B .gpl_only = false,
124f1a2e44aSMauricio Vasquez B .ret_type = RET_INTEGER,
12507343110SFeng Zhou .arg1_type = ARG_CONST_MAP_PTR,
12607343110SFeng Zhou .arg2_type = ARG_PTR_TO_MAP_VALUE | MEM_UNINIT | MEM_WRITE,
12707343110SFeng Zhou };
12807343110SFeng Zhou
BPF_CALL_3(bpf_map_lookup_percpu_elem,struct bpf_map *,map,void *,key,u32,cpu)12907343110SFeng Zhou BPF_CALL_3(bpf_map_lookup_percpu_elem, struct bpf_map *, map, void *, key, u32, cpu)
13007343110SFeng Zhou {
13107343110SFeng Zhou WARN_ON_ONCE(!rcu_read_lock_held() && !rcu_read_lock_bh_held());
13207343110SFeng Zhou return (unsigned long) map->ops->map_lookup_percpu_elem(map, key, cpu);
13307343110SFeng Zhou }
13407343110SFeng Zhou
13507343110SFeng Zhou const struct bpf_func_proto bpf_map_lookup_percpu_elem_proto = {
13607343110SFeng Zhou .func = bpf_map_lookup_percpu_elem,
13707343110SFeng Zhou .gpl_only = false,
13807343110SFeng Zhou .pkt_access = true,
13907343110SFeng Zhou .ret_type = RET_PTR_TO_MAP_VALUE_OR_NULL,
14007343110SFeng Zhou .arg1_type = ARG_CONST_MAP_PTR,
14103e69b50SDaniel Borkmann .arg2_type = ARG_PTR_TO_MAP_KEY,
1423ad00405SDaniel Borkmann .arg3_type = ARG_ANYTHING,
14303e69b50SDaniel Borkmann };
14403e69b50SDaniel Borkmann
14503e69b50SDaniel Borkmann const struct bpf_func_proto bpf_get_prandom_u32_proto = {
146c04167ceSDaniel Borkmann .func = bpf_user_rnd_u32,
147f3694e00SDaniel Borkmann .gpl_only = false,
148c04167ceSDaniel Borkmann .ret_type = RET_INTEGER,
14980b48c44SDaniel Borkmann };
150c04167ceSDaniel Borkmann
BPF_CALL_0(bpf_get_smp_processor_id)151c04167ceSDaniel Borkmann BPF_CALL_0(bpf_get_smp_processor_id)
152c04167ceSDaniel Borkmann {
153c04167ceSDaniel Borkmann return smp_processor_id();
154c04167ceSDaniel Borkmann }
155c04167ceSDaniel Borkmann
156c04167ceSDaniel Borkmann const struct bpf_func_proto bpf_get_smp_processor_id_proto = {
15717ca8cbfSDaniel Borkmann .func = bpf_get_smp_processor_id,
1582d0e30c3SDaniel Borkmann .gpl_only = false,
1592d0e30c3SDaniel Borkmann .ret_type = RET_INTEGER,
1602d0e30c3SDaniel Borkmann };
1612d0e30c3SDaniel Borkmann
BPF_CALL_0(bpf_get_numa_node_id)1622d0e30c3SDaniel Borkmann BPF_CALL_0(bpf_get_numa_node_id)
1632d0e30c3SDaniel Borkmann {
1642d0e30c3SDaniel Borkmann return numa_node_id();
1652d0e30c3SDaniel Borkmann }
1662d0e30c3SDaniel Borkmann
1672d0e30c3SDaniel Borkmann const struct bpf_func_proto bpf_get_numa_node_id_proto = {
1682d0e30c3SDaniel Borkmann .func = bpf_get_numa_node_id,
169f3694e00SDaniel Borkmann .gpl_only = false,
17017ca8cbfSDaniel Borkmann .ret_type = RET_INTEGER,
17117ca8cbfSDaniel Borkmann };
17217ca8cbfSDaniel Borkmann
BPF_CALL_0(bpf_ktime_get_ns)17317ca8cbfSDaniel Borkmann BPF_CALL_0(bpf_ktime_get_ns)
17417ca8cbfSDaniel Borkmann {
17517ca8cbfSDaniel Borkmann /* NMI safe access to clock monotonic */
17617ca8cbfSDaniel Borkmann return ktime_get_mono_fast_ns();
177082b57e3SMaciej Żenczykowski }
17817ca8cbfSDaniel Borkmann
17917ca8cbfSDaniel Borkmann const struct bpf_func_proto bpf_ktime_get_ns_proto = {
180ffeedafbSAlexei Starovoitov .func = bpf_ktime_get_ns,
18171d19214SMaciej Żenczykowski .gpl_only = false,
18271d19214SMaciej Żenczykowski .ret_type = RET_INTEGER,
18371d19214SMaciej Żenczykowski };
18471d19214SMaciej Żenczykowski
BPF_CALL_0(bpf_ktime_get_boot_ns)18571d19214SMaciej Żenczykowski BPF_CALL_0(bpf_ktime_get_boot_ns)
18671d19214SMaciej Żenczykowski {
18771d19214SMaciej Żenczykowski /* NMI safe access to clock boottime */
18871d19214SMaciej Żenczykowski return ktime_get_boot_fast_ns();
18971d19214SMaciej Żenczykowski }
19071d19214SMaciej Żenczykowski
19171d19214SMaciej Żenczykowski const struct bpf_func_proto bpf_ktime_get_boot_ns_proto = {
19271d19214SMaciej Żenczykowski .func = bpf_ktime_get_boot_ns,
193d0551261SDmitrii Banshchikov .gpl_only = false,
194d0551261SDmitrii Banshchikov .ret_type = RET_INTEGER,
195d0551261SDmitrii Banshchikov };
196d0551261SDmitrii Banshchikov
BPF_CALL_0(bpf_ktime_get_coarse_ns)197d0551261SDmitrii Banshchikov BPF_CALL_0(bpf_ktime_get_coarse_ns)
198d0551261SDmitrii Banshchikov {
199d0551261SDmitrii Banshchikov return ktime_get_coarse_ns();
200d0551261SDmitrii Banshchikov }
201d0551261SDmitrii Banshchikov
202d0551261SDmitrii Banshchikov const struct bpf_func_proto bpf_ktime_get_coarse_ns_proto = {
203d0551261SDmitrii Banshchikov .func = bpf_ktime_get_coarse_ns,
204c8996c98SJesper Dangaard Brouer .gpl_only = false,
205c8996c98SJesper Dangaard Brouer .ret_type = RET_INTEGER,
206c8996c98SJesper Dangaard Brouer };
207c8996c98SJesper Dangaard Brouer
BPF_CALL_0(bpf_ktime_get_tai_ns)208c8996c98SJesper Dangaard Brouer BPF_CALL_0(bpf_ktime_get_tai_ns)
209c8996c98SJesper Dangaard Brouer {
210c8996c98SJesper Dangaard Brouer /* NMI safe access to clock tai */
211c8996c98SJesper Dangaard Brouer return ktime_get_tai_fast_ns();
212c8996c98SJesper Dangaard Brouer }
213c8996c98SJesper Dangaard Brouer
214c8996c98SJesper Dangaard Brouer const struct bpf_func_proto bpf_ktime_get_tai_ns_proto = {
215c8996c98SJesper Dangaard Brouer .func = bpf_ktime_get_tai_ns,
216f3694e00SDaniel Borkmann .gpl_only = false,
217ffeedafbSAlexei Starovoitov .ret_type = RET_INTEGER,
218ffeedafbSAlexei Starovoitov };
219ffeedafbSAlexei Starovoitov
BPF_CALL_0(bpf_get_current_pid_tgid)2206088b582SDaniel Borkmann BPF_CALL_0(bpf_get_current_pid_tgid)
221ffeedafbSAlexei Starovoitov {
222ffeedafbSAlexei Starovoitov struct task_struct *task = current;
223ffeedafbSAlexei Starovoitov
224ffeedafbSAlexei Starovoitov if (unlikely(!task))
225ffeedafbSAlexei Starovoitov return -EINVAL;
226ffeedafbSAlexei Starovoitov
227ffeedafbSAlexei Starovoitov return (u64) task->tgid << 32 | task->pid;
228ffeedafbSAlexei Starovoitov }
229ffeedafbSAlexei Starovoitov
230ffeedafbSAlexei Starovoitov const struct bpf_func_proto bpf_get_current_pid_tgid_proto = {
231ffeedafbSAlexei Starovoitov .func = bpf_get_current_pid_tgid,
232f3694e00SDaniel Borkmann .gpl_only = false,
233ffeedafbSAlexei Starovoitov .ret_type = RET_INTEGER,
234ffeedafbSAlexei Starovoitov };
235ffeedafbSAlexei Starovoitov
BPF_CALL_0(bpf_get_current_uid_gid)236ffeedafbSAlexei Starovoitov BPF_CALL_0(bpf_get_current_uid_gid)
237ffeedafbSAlexei Starovoitov {
2386088b582SDaniel Borkmann struct task_struct *task = current;
239ffeedafbSAlexei Starovoitov kuid_t uid;
240ffeedafbSAlexei Starovoitov kgid_t gid;
241ffeedafbSAlexei Starovoitov
242ffeedafbSAlexei Starovoitov if (unlikely(!task))
243ffeedafbSAlexei Starovoitov return -EINVAL;
244ffeedafbSAlexei Starovoitov
245ffeedafbSAlexei Starovoitov current_uid_gid(&uid, &gid);
246ffeedafbSAlexei Starovoitov return (u64) from_kgid(&init_user_ns, gid) << 32 |
247ffeedafbSAlexei Starovoitov from_kuid(&init_user_ns, uid);
248ffeedafbSAlexei Starovoitov }
249ffeedafbSAlexei Starovoitov
250ffeedafbSAlexei Starovoitov const struct bpf_func_proto bpf_get_current_uid_gid_proto = {
251ffeedafbSAlexei Starovoitov .func = bpf_get_current_uid_gid,
252f3694e00SDaniel Borkmann .gpl_only = false,
253ffeedafbSAlexei Starovoitov .ret_type = RET_INTEGER,
254ffeedafbSAlexei Starovoitov };
255ffeedafbSAlexei Starovoitov
BPF_CALL_2(bpf_get_current_comm,char *,buf,u32,size)256074f528eSDaniel Borkmann BPF_CALL_2(bpf_get_current_comm, char *, buf, u32, size)
257074f528eSDaniel Borkmann {
258ffeedafbSAlexei Starovoitov struct task_struct *task = current;
25903b9c7faSYuntao Wang
26003b9c7faSYuntao Wang if (unlikely(!task))
261ffeedafbSAlexei Starovoitov goto err_clear;
262074f528eSDaniel Borkmann
263074f528eSDaniel Borkmann /* Verifier guarantees that size > 0 */
264074f528eSDaniel Borkmann strscpy_pad(buf, task->comm, size);
265ffeedafbSAlexei Starovoitov return 0;
266ffeedafbSAlexei Starovoitov err_clear:
267ffeedafbSAlexei Starovoitov memset(buf, 0, size);
268ffeedafbSAlexei Starovoitov return -EINVAL;
269ffeedafbSAlexei Starovoitov }
270ffeedafbSAlexei Starovoitov
27139f19ebbSAlexei Starovoitov const struct bpf_func_proto bpf_get_current_comm_proto = {
27239f19ebbSAlexei Starovoitov .func = bpf_get_current_comm,
273ffeedafbSAlexei Starovoitov .gpl_only = false,
274bf6fa2c8SYonghong Song .ret_type = RET_INTEGER,
275d83525caSAlexei Starovoitov .arg1_type = ARG_PTR_TO_UNINIT_MEM,
276d83525caSAlexei Starovoitov .arg2_type = ARG_CONST_SIZE,
277d83525caSAlexei Starovoitov };
278d83525caSAlexei Starovoitov
279d83525caSAlexei Starovoitov #if defined(CONFIG_QUEUED_SPINLOCKS) || defined(CONFIG_BPF_ARCH_SPINLOCK)
280d83525caSAlexei Starovoitov
__bpf_spin_lock(struct bpf_spin_lock * lock)281d83525caSAlexei Starovoitov static inline void __bpf_spin_lock(struct bpf_spin_lock *lock)
282d83525caSAlexei Starovoitov {
283d83525caSAlexei Starovoitov arch_spinlock_t *l = (void *)lock;
284d83525caSAlexei Starovoitov union {
285d83525caSAlexei Starovoitov __u32 val;
286d83525caSAlexei Starovoitov arch_spinlock_t lock;
287d83525caSAlexei Starovoitov } u = { .lock = __ARCH_SPIN_LOCK_UNLOCKED };
288d83525caSAlexei Starovoitov
289d83525caSAlexei Starovoitov compiletime_assert(u.val == 0, "__ARCH_SPIN_LOCK_UNLOCKED not 0");
290d83525caSAlexei Starovoitov BUILD_BUG_ON(sizeof(*l) != sizeof(__u32));
291d83525caSAlexei Starovoitov BUILD_BUG_ON(sizeof(*lock) != sizeof(__u32));
292d83525caSAlexei Starovoitov preempt_disable();
293d83525caSAlexei Starovoitov arch_spin_lock(l);
294d83525caSAlexei Starovoitov }
295d83525caSAlexei Starovoitov
__bpf_spin_unlock(struct bpf_spin_lock * lock)296d83525caSAlexei Starovoitov static inline void __bpf_spin_unlock(struct bpf_spin_lock *lock)
297d83525caSAlexei Starovoitov {
298d83525caSAlexei Starovoitov arch_spinlock_t *l = (void *)lock;
299d83525caSAlexei Starovoitov
300d83525caSAlexei Starovoitov arch_spin_unlock(l);
301d83525caSAlexei Starovoitov preempt_enable();
302d83525caSAlexei Starovoitov }
303d83525caSAlexei Starovoitov
304d83525caSAlexei Starovoitov #else
305d83525caSAlexei Starovoitov
__bpf_spin_lock(struct bpf_spin_lock * lock)306d83525caSAlexei Starovoitov static inline void __bpf_spin_lock(struct bpf_spin_lock *lock)
307d83525caSAlexei Starovoitov {
308d83525caSAlexei Starovoitov atomic_t *l = (void *)lock;
309d83525caSAlexei Starovoitov
310d83525caSAlexei Starovoitov BUILD_BUG_ON(sizeof(*l) != sizeof(*lock));
311d83525caSAlexei Starovoitov do {
312d83525caSAlexei Starovoitov atomic_cond_read_relaxed(l, !VAL);
313d83525caSAlexei Starovoitov } while (atomic_xchg(l, 1));
314d83525caSAlexei Starovoitov }
315d83525caSAlexei Starovoitov
__bpf_spin_unlock(struct bpf_spin_lock * lock)316d83525caSAlexei Starovoitov static inline void __bpf_spin_unlock(struct bpf_spin_lock *lock)
317d83525caSAlexei Starovoitov {
318d83525caSAlexei Starovoitov atomic_t *l = (void *)lock;
319d83525caSAlexei Starovoitov
320d83525caSAlexei Starovoitov atomic_set_release(l, 0);
321c1b3fed3SAlexei Starovoitov }
322d83525caSAlexei Starovoitov
323d83525caSAlexei Starovoitov #endif
324d83525caSAlexei Starovoitov
325d83525caSAlexei Starovoitov static DEFINE_PER_CPU(unsigned long, irqsave_flags);
326d83525caSAlexei Starovoitov
__bpf_spin_lock_irqsave(struct bpf_spin_lock * lock)327d83525caSAlexei Starovoitov static inline void __bpf_spin_lock_irqsave(struct bpf_spin_lock *lock)
328c1b3fed3SAlexei Starovoitov {
329c1b3fed3SAlexei Starovoitov unsigned long flags;
330c1b3fed3SAlexei Starovoitov
331c1b3fed3SAlexei Starovoitov local_irq_save(flags);
332c1b3fed3SAlexei Starovoitov __bpf_spin_lock(lock);
333d83525caSAlexei Starovoitov __this_cpu_write(irqsave_flags, flags);
334d83525caSAlexei Starovoitov }
335d83525caSAlexei Starovoitov
NOTRACE_BPF_CALL_1(bpf_spin_lock,struct bpf_spin_lock *,lock)336d83525caSAlexei Starovoitov NOTRACE_BPF_CALL_1(bpf_spin_lock, struct bpf_spin_lock *, lock)
337d83525caSAlexei Starovoitov {
338d83525caSAlexei Starovoitov __bpf_spin_lock_irqsave(lock);
339d83525caSAlexei Starovoitov return 0;
340d83525caSAlexei Starovoitov }
3414e814da0SKumar Kartikeya Dwivedi
342d83525caSAlexei Starovoitov const struct bpf_func_proto bpf_spin_lock_proto = {
343d83525caSAlexei Starovoitov .func = bpf_spin_lock,
344c1b3fed3SAlexei Starovoitov .gpl_only = false,
345d83525caSAlexei Starovoitov .ret_type = RET_VOID,
346d83525caSAlexei Starovoitov .arg1_type = ARG_PTR_TO_SPIN_LOCK,
347d83525caSAlexei Starovoitov .arg1_btf_id = BPF_PTR_POISON,
348d83525caSAlexei Starovoitov };
349d83525caSAlexei Starovoitov
__bpf_spin_unlock_irqrestore(struct bpf_spin_lock * lock)350d83525caSAlexei Starovoitov static inline void __bpf_spin_unlock_irqrestore(struct bpf_spin_lock *lock)
351c1b3fed3SAlexei Starovoitov {
352c1b3fed3SAlexei Starovoitov unsigned long flags;
353c1b3fed3SAlexei Starovoitov
354c1b3fed3SAlexei Starovoitov flags = __this_cpu_read(irqsave_flags);
355c1b3fed3SAlexei Starovoitov __bpf_spin_unlock(lock);
356d83525caSAlexei Starovoitov local_irq_restore(flags);
357d83525caSAlexei Starovoitov }
358d83525caSAlexei Starovoitov
NOTRACE_BPF_CALL_1(bpf_spin_unlock,struct bpf_spin_lock *,lock)359d83525caSAlexei Starovoitov NOTRACE_BPF_CALL_1(bpf_spin_unlock, struct bpf_spin_lock *, lock)
360d83525caSAlexei Starovoitov {
361d83525caSAlexei Starovoitov __bpf_spin_unlock_irqrestore(lock);
362d83525caSAlexei Starovoitov return 0;
363d83525caSAlexei Starovoitov }
3644e814da0SKumar Kartikeya Dwivedi
365d83525caSAlexei Starovoitov const struct bpf_func_proto bpf_spin_unlock_proto = {
366d83525caSAlexei Starovoitov .func = bpf_spin_unlock,
36796049f3aSAlexei Starovoitov .gpl_only = false,
36896049f3aSAlexei Starovoitov .ret_type = RET_VOID,
36996049f3aSAlexei Starovoitov .arg1_type = ARG_PTR_TO_SPIN_LOCK,
37096049f3aSAlexei Starovoitov .arg1_btf_id = BPF_PTR_POISON,
37196049f3aSAlexei Starovoitov };
37296049f3aSAlexei Starovoitov
copy_map_value_locked(struct bpf_map * map,void * dst,void * src,bool lock_src)373db559117SKumar Kartikeya Dwivedi void copy_map_value_locked(struct bpf_map *map, void *dst, void *src,
37496049f3aSAlexei Starovoitov bool lock_src)
375db559117SKumar Kartikeya Dwivedi {
37696049f3aSAlexei Starovoitov struct bpf_spin_lock *lock;
377c1b3fed3SAlexei Starovoitov
37896049f3aSAlexei Starovoitov if (lock_src)
379c1b3fed3SAlexei Starovoitov lock = src + map->record->spin_lock_off;
38096049f3aSAlexei Starovoitov else
38196049f3aSAlexei Starovoitov lock = dst + map->record->spin_lock_off;
38296049f3aSAlexei Starovoitov preempt_disable();
3835576b991SMartin KaFai Lau __bpf_spin_lock_irqsave(lock);
3845576b991SMartin KaFai Lau copy_map_value(map, dst, src);
3855576b991SMartin KaFai Lau __bpf_spin_unlock_irqrestore(lock);
3865576b991SMartin KaFai Lau preempt_enable();
3875576b991SMartin KaFai Lau }
3885576b991SMartin KaFai Lau
BPF_CALL_0(bpf_jiffies64)3895576b991SMartin KaFai Lau BPF_CALL_0(bpf_jiffies64)
3905576b991SMartin KaFai Lau {
3915576b991SMartin KaFai Lau return get_jiffies_64();
3925576b991SMartin KaFai Lau }
3935576b991SMartin KaFai Lau
394bf6fa2c8SYonghong Song const struct bpf_func_proto bpf_jiffies64_proto = {
395bf6fa2c8SYonghong Song .func = bpf_jiffies64,
396bf6fa2c8SYonghong Song .gpl_only = false,
3972d3a1e36SYonghong Song .ret_type = RET_INTEGER,
3982d3a1e36SYonghong Song };
399bf6fa2c8SYonghong Song
4002d3a1e36SYonghong Song #ifdef CONFIG_CGROUPS
BPF_CALL_0(bpf_get_current_cgroup_id)4012d3a1e36SYonghong Song BPF_CALL_0(bpf_get_current_cgroup_id)
4022d3a1e36SYonghong Song {
4032d3a1e36SYonghong Song struct cgroup *cgrp;
4042d3a1e36SYonghong Song u64 cgrp_id;
4052d3a1e36SYonghong Song
406bf6fa2c8SYonghong Song rcu_read_lock();
407bf6fa2c8SYonghong Song cgrp = task_dfl_cgroup(current);
408bf6fa2c8SYonghong Song cgrp_id = cgroup_id(cgrp);
409bf6fa2c8SYonghong Song rcu_read_unlock();
410bf6fa2c8SYonghong Song
411bf6fa2c8SYonghong Song return cgrp_id;
412bf6fa2c8SYonghong Song }
413cd339431SRoman Gushchin
4140f09abd1SDaniel Borkmann const struct bpf_func_proto bpf_get_current_cgroup_id_proto = {
4150f09abd1SDaniel Borkmann .func = bpf_get_current_cgroup_id,
4162d3a1e36SYonghong Song .gpl_only = false,
4170f09abd1SDaniel Borkmann .ret_type = RET_INTEGER,
4182d3a1e36SYonghong Song };
4190f09abd1SDaniel Borkmann
BPF_CALL_1(bpf_get_current_ancestor_cgroup_id,int,ancestor_level)4202d3a1e36SYonghong Song BPF_CALL_1(bpf_get_current_ancestor_cgroup_id, int, ancestor_level)
4212d3a1e36SYonghong Song {
4220f09abd1SDaniel Borkmann struct cgroup *cgrp;
4232d3a1e36SYonghong Song struct cgroup *ancestor;
4242d3a1e36SYonghong Song u64 cgrp_id;
4252d3a1e36SYonghong Song
4262d3a1e36SYonghong Song rcu_read_lock();
4270f09abd1SDaniel Borkmann cgrp = task_dfl_cgroup(current);
4280f09abd1SDaniel Borkmann ancestor = cgroup_ancestor(cgrp, ancestor_level);
4290f09abd1SDaniel Borkmann cgrp_id = ancestor ? cgroup_id(ancestor) : 0;
4300f09abd1SDaniel Borkmann rcu_read_unlock();
4310f09abd1SDaniel Borkmann
4320f09abd1SDaniel Borkmann return cgrp_id;
4330f09abd1SDaniel Borkmann }
4340f09abd1SDaniel Borkmann
4358a67f2deSStanislav Fomichev const struct bpf_func_proto bpf_get_current_ancestor_cgroup_id_proto = {
4360f09abd1SDaniel Borkmann .func = bpf_get_current_ancestor_cgroup_id,
437d7a4cb9bSAndrey Ignatov .gpl_only = false,
438d7a4cb9bSAndrey Ignatov .ret_type = RET_INTEGER,
439d7a4cb9bSAndrey Ignatov .arg1_type = ARG_ANYTHING,
440d7a4cb9bSAndrey Ignatov };
441d7a4cb9bSAndrey Ignatov #endif /* CONFIG_CGROUPS */
442d7a4cb9bSAndrey Ignatov
443d7a4cb9bSAndrey Ignatov #define BPF_STRTOX_BASE_MASK 0x1F
444d7a4cb9bSAndrey Ignatov
__bpf_strtoull(const char * buf,size_t buf_len,u64 flags,unsigned long long * res,bool * is_negative)445d7a4cb9bSAndrey Ignatov static int __bpf_strtoull(const char *buf, size_t buf_len, u64 flags,
446d7a4cb9bSAndrey Ignatov unsigned long long *res, bool *is_negative)
447d7a4cb9bSAndrey Ignatov {
448d7a4cb9bSAndrey Ignatov unsigned int base = flags & BPF_STRTOX_BASE_MASK;
449d7a4cb9bSAndrey Ignatov const char *cur_buf = buf;
450d7a4cb9bSAndrey Ignatov size_t cur_len = buf_len;
451d7a4cb9bSAndrey Ignatov unsigned int consumed;
452d7a4cb9bSAndrey Ignatov size_t val_len;
453d7a4cb9bSAndrey Ignatov char str[64];
454d7a4cb9bSAndrey Ignatov
455d7a4cb9bSAndrey Ignatov if (!buf || !buf_len || !res || !is_negative)
456d7a4cb9bSAndrey Ignatov return -EINVAL;
457d7a4cb9bSAndrey Ignatov
458d7a4cb9bSAndrey Ignatov if (base != 0 && base != 8 && base != 10 && base != 16)
459d7a4cb9bSAndrey Ignatov return -EINVAL;
460d7a4cb9bSAndrey Ignatov
461d7a4cb9bSAndrey Ignatov if (flags & ~BPF_STRTOX_BASE_MASK)
462d7a4cb9bSAndrey Ignatov return -EINVAL;
463d7a4cb9bSAndrey Ignatov
464d7a4cb9bSAndrey Ignatov while (cur_buf < buf + buf_len && isspace(*cur_buf))
465d7a4cb9bSAndrey Ignatov ++cur_buf;
466d7a4cb9bSAndrey Ignatov
467d7a4cb9bSAndrey Ignatov *is_negative = (cur_buf < buf + buf_len && *cur_buf == '-');
468d7a4cb9bSAndrey Ignatov if (*is_negative)
469d7a4cb9bSAndrey Ignatov ++cur_buf;
470d7a4cb9bSAndrey Ignatov
471d7a4cb9bSAndrey Ignatov consumed = cur_buf - buf;
472d7a4cb9bSAndrey Ignatov cur_len -= consumed;
473d7a4cb9bSAndrey Ignatov if (!cur_len)
474d7a4cb9bSAndrey Ignatov return -EINVAL;
475d7a4cb9bSAndrey Ignatov
476d7a4cb9bSAndrey Ignatov cur_len = min(cur_len, sizeof(str) - 1);
477d7a4cb9bSAndrey Ignatov memcpy(str, cur_buf, cur_len);
478d7a4cb9bSAndrey Ignatov str[cur_len] = '\0';
479d7a4cb9bSAndrey Ignatov cur_buf = str;
480d7a4cb9bSAndrey Ignatov
481d7a4cb9bSAndrey Ignatov cur_buf = _parse_integer_fixup_radix(cur_buf, &base);
482d7a4cb9bSAndrey Ignatov val_len = _parse_integer(cur_buf, base, res);
483d7a4cb9bSAndrey Ignatov
484d7a4cb9bSAndrey Ignatov if (val_len & KSTRTOX_OVERFLOW)
485d7a4cb9bSAndrey Ignatov return -ERANGE;
486d7a4cb9bSAndrey Ignatov
487d7a4cb9bSAndrey Ignatov if (val_len == 0)
488d7a4cb9bSAndrey Ignatov return -EINVAL;
489d7a4cb9bSAndrey Ignatov
490d7a4cb9bSAndrey Ignatov cur_buf += val_len;
491d7a4cb9bSAndrey Ignatov consumed += cur_buf - str;
492d7a4cb9bSAndrey Ignatov
493d7a4cb9bSAndrey Ignatov return consumed;
494d7a4cb9bSAndrey Ignatov }
495d7a4cb9bSAndrey Ignatov
__bpf_strtoll(const char * buf,size_t buf_len,u64 flags,long long * res)496d7a4cb9bSAndrey Ignatov static int __bpf_strtoll(const char *buf, size_t buf_len, u64 flags,
497d7a4cb9bSAndrey Ignatov long long *res)
498d7a4cb9bSAndrey Ignatov {
499d7a4cb9bSAndrey Ignatov unsigned long long _res;
500d7a4cb9bSAndrey Ignatov bool is_negative;
501d7a4cb9bSAndrey Ignatov int err;
502d7a4cb9bSAndrey Ignatov
503d7a4cb9bSAndrey Ignatov err = __bpf_strtoull(buf, buf_len, flags, &_res, &is_negative);
504d7a4cb9bSAndrey Ignatov if (err < 0)
505d7a4cb9bSAndrey Ignatov return err;
506d7a4cb9bSAndrey Ignatov if (is_negative) {
507d7a4cb9bSAndrey Ignatov if ((long long)-_res > 0)
508d7a4cb9bSAndrey Ignatov return -ERANGE;
509d7a4cb9bSAndrey Ignatov *res = -_res;
510d7a4cb9bSAndrey Ignatov } else {
511d7a4cb9bSAndrey Ignatov if ((long long)_res < 0)
512d7a4cb9bSAndrey Ignatov return -ERANGE;
513d7a4cb9bSAndrey Ignatov *res = _res;
514d7a4cb9bSAndrey Ignatov }
515d7a4cb9bSAndrey Ignatov return err;
516d7a4cb9bSAndrey Ignatov }
517d7a4cb9bSAndrey Ignatov
BPF_CALL_4(bpf_strtol,const char *,buf,size_t,buf_len,u64,flags,s64 *,res)518d7a4cb9bSAndrey Ignatov BPF_CALL_4(bpf_strtol, const char *, buf, size_t, buf_len, u64, flags,
519d7a4cb9bSAndrey Ignatov s64 *, res)
520d7a4cb9bSAndrey Ignatov {
521d7a4cb9bSAndrey Ignatov long long _res;
522d7a4cb9bSAndrey Ignatov int err;
523d7a4cb9bSAndrey Ignatov
524d7a4cb9bSAndrey Ignatov *res = 0;
525d7a4cb9bSAndrey Ignatov err = __bpf_strtoll(buf, buf_len, flags, &_res);
526d7a4cb9bSAndrey Ignatov if (err < 0)
527d7a4cb9bSAndrey Ignatov return err;
528d7a4cb9bSAndrey Ignatov if (_res != (long)_res)
529d7a4cb9bSAndrey Ignatov return -ERANGE;
530d7a4cb9bSAndrey Ignatov *res = _res;
531216e3cd2SHao Luo return err;
532d7a4cb9bSAndrey Ignatov }
533d7a4cb9bSAndrey Ignatov
534d7a4cb9bSAndrey Ignatov const struct bpf_func_proto bpf_strtol_proto = {
535d7a4cb9bSAndrey Ignatov .func = bpf_strtol,
536d7a4cb9bSAndrey Ignatov .gpl_only = false,
537d7a4cb9bSAndrey Ignatov .ret_type = RET_INTEGER,
538d7a4cb9bSAndrey Ignatov .arg1_type = ARG_PTR_TO_MEM | MEM_RDONLY,
539d7a4cb9bSAndrey Ignatov .arg2_type = ARG_CONST_SIZE,
540d7a4cb9bSAndrey Ignatov .arg3_type = ARG_ANYTHING,
541d7a4cb9bSAndrey Ignatov .arg4_type = ARG_PTR_TO_FIXED_SIZE_MEM | MEM_UNINIT | MEM_WRITE | MEM_ALIGNED,
542d7a4cb9bSAndrey Ignatov .arg4_size = sizeof(s64),
543d7a4cb9bSAndrey Ignatov };
544d7a4cb9bSAndrey Ignatov
BPF_CALL_4(bpf_strtoul,const char *,buf,size_t,buf_len,u64,flags,u64 *,res)545d7a4cb9bSAndrey Ignatov BPF_CALL_4(bpf_strtoul, const char *, buf, size_t, buf_len, u64, flags,
546d7a4cb9bSAndrey Ignatov u64 *, res)
547d7a4cb9bSAndrey Ignatov {
548d7a4cb9bSAndrey Ignatov unsigned long long _res;
549d7a4cb9bSAndrey Ignatov bool is_negative;
550d7a4cb9bSAndrey Ignatov int err;
551d7a4cb9bSAndrey Ignatov
552d7a4cb9bSAndrey Ignatov *res = 0;
553d7a4cb9bSAndrey Ignatov err = __bpf_strtoull(buf, buf_len, flags, &_res, &is_negative);
554d7a4cb9bSAndrey Ignatov if (err < 0)
555d7a4cb9bSAndrey Ignatov return err;
556d7a4cb9bSAndrey Ignatov if (is_negative)
557d7a4cb9bSAndrey Ignatov return -EINVAL;
558d7a4cb9bSAndrey Ignatov if (_res != (unsigned long)_res)
559216e3cd2SHao Luo return -ERANGE;
560d7a4cb9bSAndrey Ignatov *res = _res;
561d7a4cb9bSAndrey Ignatov return err;
562d7a4cb9bSAndrey Ignatov }
563d7a4cb9bSAndrey Ignatov
564b4490c5cSCarlos Neira const struct bpf_func_proto bpf_strtoul_proto = {
565c5fb1993SHou Tao .func = bpf_strtoul,
566c5fb1993SHou Tao .gpl_only = false,
567c5fb1993SHou Tao .ret_type = RET_INTEGER,
568c5fb1993SHou Tao .arg1_type = ARG_PTR_TO_MEM | MEM_RDONLY,
569c5fb1993SHou Tao .arg2_type = ARG_CONST_SIZE,
570dc368e1cSJoanne Koong .arg3_type = ARG_ANYTHING,
571c5fb1993SHou Tao .arg4_type = ARG_PTR_TO_FIXED_SIZE_MEM | MEM_UNINIT | MEM_WRITE | MEM_ALIGNED,
572c5fb1993SHou Tao .arg4_size = sizeof(u64),
573c5fb1993SHou Tao };
574c5fb1993SHou Tao
BPF_CALL_3(bpf_strncmp,const char *,s1,u32,s1_sz,const char *,s2)575c5fb1993SHou Tao BPF_CALL_3(bpf_strncmp, const char *, s1, u32, s1_sz, const char *, s2)
576c5fb1993SHou Tao {
577c5fb1993SHou Tao return strncmp(s1, s2, s1_sz);
578c5fb1993SHou Tao }
579b4490c5cSCarlos Neira
580b4490c5cSCarlos Neira static const struct bpf_func_proto bpf_strncmp_proto = {
581b4490c5cSCarlos Neira .func = bpf_strncmp,
582b4490c5cSCarlos Neira .gpl_only = false,
583b4490c5cSCarlos Neira .ret_type = RET_INTEGER,
584b4490c5cSCarlos Neira .arg1_type = ARG_PTR_TO_MEM | MEM_RDONLY,
585b4490c5cSCarlos Neira .arg2_type = ARG_CONST_SIZE,
586b4490c5cSCarlos Neira .arg3_type = ARG_PTR_TO_CONST_STR,
587b4490c5cSCarlos Neira };
588b4490c5cSCarlos Neira
BPF_CALL_4(bpf_get_ns_current_pid_tgid,u64,dev,u64,ino,struct bpf_pidns_info *,nsdata,u32,size)589b4490c5cSCarlos Neira BPF_CALL_4(bpf_get_ns_current_pid_tgid, u64, dev, u64, ino,
590b4490c5cSCarlos Neira struct bpf_pidns_info *, nsdata, u32, size)
591b4490c5cSCarlos Neira {
592b4490c5cSCarlos Neira struct task_struct *task = current;
593b4490c5cSCarlos Neira struct pid_namespace *pidns;
594b4490c5cSCarlos Neira int err = -EINVAL;
595b4490c5cSCarlos Neira
596b4490c5cSCarlos Neira if (unlikely(size != sizeof(struct bpf_pidns_info)))
597b4490c5cSCarlos Neira goto clear;
598b4490c5cSCarlos Neira
599b4490c5cSCarlos Neira if (unlikely((u64)(dev_t)dev != dev))
600b4490c5cSCarlos Neira goto clear;
601b4490c5cSCarlos Neira
602b4490c5cSCarlos Neira if (unlikely(!task))
603b4490c5cSCarlos Neira goto clear;
604b4490c5cSCarlos Neira
605b4490c5cSCarlos Neira pidns = task_active_pid_ns(task);
606b4490c5cSCarlos Neira if (unlikely(!pidns)) {
607b4490c5cSCarlos Neira err = -ENOENT;
608b4490c5cSCarlos Neira goto clear;
609b4490c5cSCarlos Neira }
610b4490c5cSCarlos Neira
611b4490c5cSCarlos Neira if (!ns_match(&pidns->ns, (dev_t)dev, ino))
612b4490c5cSCarlos Neira goto clear;
613b4490c5cSCarlos Neira
614b4490c5cSCarlos Neira nsdata->pid = task_pid_nr_ns(task, pidns);
615b4490c5cSCarlos Neira nsdata->tgid = task_tgid_nr_ns(task, pidns);
616b4490c5cSCarlos Neira return 0;
617b4490c5cSCarlos Neira clear:
618b4490c5cSCarlos Neira memset((void *)nsdata, 0, (size_t) size);
619b4490c5cSCarlos Neira return err;
620b4490c5cSCarlos Neira }
6216890896bSStanislav Fomichev
6226890896bSStanislav Fomichev const struct bpf_func_proto bpf_get_ns_current_pid_tgid_proto = {
6236890896bSStanislav Fomichev .func = bpf_get_ns_current_pid_tgid,
6246890896bSStanislav Fomichev .gpl_only = false,
6256890896bSStanislav Fomichev .ret_type = RET_INTEGER,
6266890896bSStanislav Fomichev .arg1_type = ARG_ANYTHING,
6276890896bSStanislav Fomichev .arg2_type = ARG_ANYTHING,
6286890896bSStanislav Fomichev .arg3_type = ARG_PTR_TO_UNINIT_MEM,
6296890896bSStanislav Fomichev .arg4_type = ARG_CONST_SIZE,
6306890896bSStanislav Fomichev };
6316890896bSStanislav Fomichev
6326890896bSStanislav Fomichev static const struct bpf_func_proto bpf_get_raw_smp_processor_id_proto = {
6336890896bSStanislav Fomichev .func = bpf_get_raw_cpu_id,
6346890896bSStanislav Fomichev .gpl_only = false,
6356890896bSStanislav Fomichev .ret_type = RET_INTEGER,
6366890896bSStanislav Fomichev };
6376890896bSStanislav Fomichev
BPF_CALL_5(bpf_event_output_data,void *,ctx,struct bpf_map *,map,u64,flags,void *,data,u64,size)6386890896bSStanislav Fomichev BPF_CALL_5(bpf_event_output_data, void *, ctx, struct bpf_map *, map,
6396890896bSStanislav Fomichev u64, flags, void *, data, u64, size)
6406890896bSStanislav Fomichev {
6416890896bSStanislav Fomichev if (unlikely(flags & ~(BPF_F_INDEX_MASK)))
6426890896bSStanislav Fomichev return -EINVAL;
6436890896bSStanislav Fomichev
644216e3cd2SHao Luo return bpf_event_output(map, flags, data, size, NULL, 0, NULL);
6456890896bSStanislav Fomichev }
6466890896bSStanislav Fomichev
6476890896bSStanislav Fomichev const struct bpf_func_proto bpf_event_output_data_proto = {
64807be4c4aSAlexei Starovoitov .func = bpf_event_output_data,
64907be4c4aSAlexei Starovoitov .gpl_only = true,
65007be4c4aSAlexei Starovoitov .ret_type = RET_INTEGER,
65107be4c4aSAlexei Starovoitov .arg1_type = ARG_PTR_TO_CTX,
65207be4c4aSAlexei Starovoitov .arg2_type = ARG_CONST_MAP_PTR,
65307be4c4aSAlexei Starovoitov .arg3_type = ARG_ANYTHING,
65407be4c4aSAlexei Starovoitov .arg4_type = ARG_PTR_TO_MEM | MEM_RDONLY,
65507be4c4aSAlexei Starovoitov .arg5_type = ARG_CONST_SIZE_OR_ZERO,
65607be4c4aSAlexei Starovoitov };
65707be4c4aSAlexei Starovoitov
BPF_CALL_3(bpf_copy_from_user,void *,dst,u32,size,const void __user *,user_ptr)65807be4c4aSAlexei Starovoitov BPF_CALL_3(bpf_copy_from_user, void *, dst, u32, size,
65907be4c4aSAlexei Starovoitov const void __user *, user_ptr)
66007be4c4aSAlexei Starovoitov {
66107be4c4aSAlexei Starovoitov int ret = copy_from_user(dst, user_ptr, size);
66207be4c4aSAlexei Starovoitov
66307be4c4aSAlexei Starovoitov if (unlikely(ret)) {
66401685c5bSYonghong Song memset(dst, 0, size);
66507be4c4aSAlexei Starovoitov ret = -EFAULT;
66607be4c4aSAlexei Starovoitov }
66707be4c4aSAlexei Starovoitov
66807be4c4aSAlexei Starovoitov return ret;
66907be4c4aSAlexei Starovoitov }
67007be4c4aSAlexei Starovoitov
671376040e4SKenny Yu const struct bpf_func_proto bpf_copy_from_user_proto = {
672376040e4SKenny Yu .func = bpf_copy_from_user,
673376040e4SKenny Yu .gpl_only = false,
674376040e4SKenny Yu .might_sleep = true,
675376040e4SKenny Yu .ret_type = RET_INTEGER,
676376040e4SKenny Yu .arg1_type = ARG_PTR_TO_UNINIT_MEM,
677376040e4SKenny Yu .arg2_type = ARG_CONST_SIZE_OR_ZERO,
678376040e4SKenny Yu .arg3_type = ARG_ANYTHING,
679376040e4SKenny Yu };
680376040e4SKenny Yu
BPF_CALL_5(bpf_copy_from_user_task,void *,dst,u32,size,const void __user *,user_ptr,struct task_struct *,tsk,u64,flags)681376040e4SKenny Yu BPF_CALL_5(bpf_copy_from_user_task, void *, dst, u32, size,
682376040e4SKenny Yu const void __user *, user_ptr, struct task_struct *, tsk, u64, flags)
683376040e4SKenny Yu {
684376040e4SKenny Yu int ret;
685376040e4SKenny Yu
686376040e4SKenny Yu /* flags is not used yet */
687376040e4SKenny Yu if (unlikely(flags))
688376040e4SKenny Yu return -EINVAL;
689376040e4SKenny Yu
690376040e4SKenny Yu if (unlikely(!size))
691376040e4SKenny Yu return 0;
692376040e4SKenny Yu
693376040e4SKenny Yu ret = access_process_vm(tsk, (unsigned long)user_ptr, dst, size, 0);
6940407a65fSKenta Tada if (ret == size)
69501685c5bSYonghong Song return 0;
696376040e4SKenny Yu
697376040e4SKenny Yu memset(dst, 0, size);
698376040e4SKenny Yu /* Return -EFAULT for partial read */
699376040e4SKenny Yu return ret < 0 ? ret : -EFAULT;
700376040e4SKenny Yu }
701376040e4SKenny Yu
702376040e4SKenny Yu const struct bpf_func_proto bpf_copy_from_user_task_proto = {
703376040e4SKenny Yu .func = bpf_copy_from_user_task,
704376040e4SKenny Yu .gpl_only = true,
705eaa6bcb7SHao Luo .might_sleep = true,
706eaa6bcb7SHao Luo .ret_type = RET_INTEGER,
707eaa6bcb7SHao Luo .arg1_type = ARG_PTR_TO_UNINIT_MEM,
708eaa6bcb7SHao Luo .arg2_type = ARG_CONST_SIZE_OR_ZERO,
709eaa6bcb7SHao Luo .arg3_type = ARG_ANYTHING,
710eaa6bcb7SHao Luo .arg4_type = ARG_PTR_TO_BTF_ID,
711eaa6bcb7SHao Luo .arg4_btf_id = &btf_tracing_ids[BTF_TRACING_TYPE_TASK],
712eaa6bcb7SHao Luo .arg5_type = ARG_ANYTHING
713eaa6bcb7SHao Luo };
714eaa6bcb7SHao Luo
BPF_CALL_2(bpf_per_cpu_ptr,const void *,ptr,u32,cpu)715eaa6bcb7SHao Luo BPF_CALL_2(bpf_per_cpu_ptr, const void *, ptr, u32, cpu)
71634d3a78cSHao Luo {
717eaa6bcb7SHao Luo if (cpu >= nr_cpu_ids)
718eaa6bcb7SHao Luo return (unsigned long)NULL;
719eaa6bcb7SHao Luo
720eaa6bcb7SHao Luo return (unsigned long)per_cpu_ptr((const void __percpu *)ptr, cpu);
72163d9b80dSHao Luo }
72263d9b80dSHao Luo
72363d9b80dSHao Luo const struct bpf_func_proto bpf_per_cpu_ptr_proto = {
72463d9b80dSHao Luo .func = bpf_per_cpu_ptr,
72563d9b80dSHao Luo .gpl_only = false,
72663d9b80dSHao Luo .ret_type = RET_PTR_TO_MEM_OR_BTF_ID | PTR_MAYBE_NULL | MEM_RDONLY,
72763d9b80dSHao Luo .arg1_type = ARG_PTR_TO_PERCPU_BTF_ID,
72863d9b80dSHao Luo .arg2_type = ARG_ANYTHING,
72934d3a78cSHao Luo };
73063d9b80dSHao Luo
BPF_CALL_1(bpf_this_cpu_ptr,const void *,percpu_ptr)73163d9b80dSHao Luo BPF_CALL_1(bpf_this_cpu_ptr, const void *, percpu_ptr)
73263d9b80dSHao Luo {
733d9c9e4dbSFlorent Revest return (unsigned long)this_cpu_ptr((const void __percpu *)percpu_ptr);
734d9c9e4dbSFlorent Revest }
735d9c9e4dbSFlorent Revest
736d9c9e4dbSFlorent Revest const struct bpf_func_proto bpf_this_cpu_ptr_proto = {
737d9c9e4dbSFlorent Revest .func = bpf_this_cpu_ptr,
738d9c9e4dbSFlorent Revest .gpl_only = false,
739d9c9e4dbSFlorent Revest .ret_type = RET_PTR_TO_MEM_OR_BTF_ID | MEM_RDONLY,
740d9c9e4dbSFlorent Revest .arg1_type = ARG_PTR_TO_PERCPU_BTF_ID,
741d9c9e4dbSFlorent Revest };
742d9c9e4dbSFlorent Revest
bpf_trace_copy_string(char * buf,void * unsafe_ptr,char fmt_ptype,size_t bufsz)743d9c9e4dbSFlorent Revest static int bpf_trace_copy_string(char *buf, void *unsafe_ptr, char fmt_ptype,
744d9c9e4dbSFlorent Revest size_t bufsz)
745d9c9e4dbSFlorent Revest {
746d9c9e4dbSFlorent Revest void __user *user_ptr = (__force void __user *)unsafe_ptr;
747d9c9e4dbSFlorent Revest
748d9c9e4dbSFlorent Revest buf[0] = 0;
749d9c9e4dbSFlorent Revest
750d9c9e4dbSFlorent Revest switch (fmt_ptype) {
751d9c9e4dbSFlorent Revest case 's':
752d9c9e4dbSFlorent Revest #ifdef CONFIG_ARCH_HAS_NON_OVERLAPPING_ADDRESS_SPACE
753d9c9e4dbSFlorent Revest if ((unsigned long)unsafe_ptr < TASK_SIZE)
754d9c9e4dbSFlorent Revest return strncpy_from_user_nofault(buf, user_ptr, bufsz);
755d9c9e4dbSFlorent Revest fallthrough;
7568afcc19fSFlorent Revest #endif
7578afcc19fSFlorent Revest case 'k':
758d9c9e4dbSFlorent Revest return strncpy_from_kernel_nofault(buf, unsafe_ptr, bufsz);
7598afcc19fSFlorent Revest case 'u':
760d9c9e4dbSFlorent Revest return strncpy_from_user_nofault(buf, user_ptr, bufsz);
761e2d5b2bbSFlorent Revest }
7620af02eb2SFlorent Revest
763e2d5b2bbSFlorent Revest return -EINVAL;
7640af02eb2SFlorent Revest }
765d9c9e4dbSFlorent Revest
766e2d5b2bbSFlorent Revest /* Per-cpu temp buffers used by printf-like helpers to store the bprintf binary
767e2d5b2bbSFlorent Revest * arguments representation.
768d9c9e4dbSFlorent Revest */
769d9c9e4dbSFlorent Revest #define MAX_BPRINTF_BIN_ARGS 512
770d9c9e4dbSFlorent Revest
771e2d5b2bbSFlorent Revest /* Support executing three nested bprintf helper calls on a given CPU */
772e2d5b2bbSFlorent Revest #define MAX_BPRINTF_NEST_LEVEL 3
773d9c9e4dbSFlorent Revest struct bpf_bprintf_buffers {
774d9c9e4dbSFlorent Revest char bin_args[MAX_BPRINTF_BIN_ARGS];
775e2d5b2bbSFlorent Revest char buf[MAX_BPRINTF_BUF];
7760af02eb2SFlorent Revest };
777e2d5b2bbSFlorent Revest
778d9c9e4dbSFlorent Revest static DEFINE_PER_CPU(struct bpf_bprintf_buffers[MAX_BPRINTF_NEST_LEVEL], bpf_bprintf_bufs);
779d9c9e4dbSFlorent Revest static DEFINE_PER_CPU(int, bpf_bprintf_nest_level);
780d9c9e4dbSFlorent Revest
try_get_buffers(struct bpf_bprintf_buffers ** bufs)781e2d5b2bbSFlorent Revest static int try_get_buffers(struct bpf_bprintf_buffers **bufs)
782e2d5b2bbSFlorent Revest {
783d9c9e4dbSFlorent Revest int nest_level;
784d9c9e4dbSFlorent Revest
785d9c9e4dbSFlorent Revest preempt_disable();
786d9c9e4dbSFlorent Revest nest_level = this_cpu_inc_return(bpf_bprintf_nest_level);
78748cac3f4SFlorent Revest if (WARN_ON_ONCE(nest_level > MAX_BPRINTF_NEST_LEVEL)) {
788d9c9e4dbSFlorent Revest this_cpu_dec(bpf_bprintf_nest_level);
789e2d5b2bbSFlorent Revest preempt_enable();
790e2d5b2bbSFlorent Revest return -EBUSY;
791d9c9e4dbSFlorent Revest }
792d9c9e4dbSFlorent Revest *bufs = this_cpu_ptr(&bpf_bprintf_bufs[nest_level - 1]);
793d9c9e4dbSFlorent Revest
794d9c9e4dbSFlorent Revest return 0;
795d9c9e4dbSFlorent Revest }
79648cac3f4SFlorent Revest
bpf_bprintf_cleanup(struct bpf_bprintf_data * data)797d9c9e4dbSFlorent Revest void bpf_bprintf_cleanup(struct bpf_bprintf_data *data)
798d9c9e4dbSFlorent Revest {
799d9c9e4dbSFlorent Revest if (!data->bin_args && !data->buf)
800d9c9e4dbSFlorent Revest return;
80148cac3f4SFlorent Revest if (WARN_ON_ONCE(this_cpu_read(bpf_bprintf_nest_level) == 0))
802d9c9e4dbSFlorent Revest return;
80348cac3f4SFlorent Revest this_cpu_dec(bpf_bprintf_nest_level);
80448cac3f4SFlorent Revest preempt_enable();
805d9c9e4dbSFlorent Revest }
806d9c9e4dbSFlorent Revest
80748cac3f4SFlorent Revest /*
808d9c9e4dbSFlorent Revest * bpf_bprintf_prepare - Generic pass on format strings for bprintf-like helpers
80948cac3f4SFlorent Revest *
81048cac3f4SFlorent Revest * Returns a negative value if fmt is an invalid format string or 0 otherwise.
811d9c9e4dbSFlorent Revest *
81248cac3f4SFlorent Revest * This can be used in two ways:
81348cac3f4SFlorent Revest * - Format string verification only: when data->get_bin_args is false
81448cac3f4SFlorent Revest * - Arguments preparation: in addition to the above verification, it writes in
815d9c9e4dbSFlorent Revest * data->bin_args a binary representation of arguments usable by bstr_printf
81648cac3f4SFlorent Revest * where pointers from BPF have been sanitized.
817d9c9e4dbSFlorent Revest *
818d9c9e4dbSFlorent Revest * In argument preparation mode, if 0 is returned, safe temporary buffers are
819d9c9e4dbSFlorent Revest * allocated and bpf_bprintf_cleanup should be called to free them after use.
820d9c9e4dbSFlorent Revest */
bpf_bprintf_prepare(char * fmt,u32 fmt_size,const u64 * raw_args,u32 num_args,struct bpf_bprintf_data * data)821d9c9e4dbSFlorent Revest int bpf_bprintf_prepare(char *fmt, u32 fmt_size, const u64 *raw_args,
822d9c9e4dbSFlorent Revest u32 num_args, struct bpf_bprintf_data *data)
82348cac3f4SFlorent Revest {
82448cac3f4SFlorent Revest bool get_buffers = (data->get_bin_args && num_args) || data->get_buf;
82548cac3f4SFlorent Revest char *unsafe_ptr = NULL, *tmp_buf = NULL, *tmp_buf_end, *fmt_end;
82648cac3f4SFlorent Revest struct bpf_bprintf_buffers *buffers = NULL;
8278afcc19fSFlorent Revest size_t sizeof_cur_arg, sizeof_cur_ip;
82848cac3f4SFlorent Revest int err, i, num_spec = 0;
82948cac3f4SFlorent Revest u64 cur_arg;
83048cac3f4SFlorent Revest char fmt_ptype, cur_ip[16], ip_spec[] = "%pXX";
831d9c9e4dbSFlorent Revest
832d9c9e4dbSFlorent Revest fmt_end = strnchr(fmt, fmt_size, 0);
833d9c9e4dbSFlorent Revest if (!fmt_end)
83448cac3f4SFlorent Revest return -EINVAL;
835d9c9e4dbSFlorent Revest fmt_size = fmt_end - fmt;
836d9c9e4dbSFlorent Revest
837d9c9e4dbSFlorent Revest if (get_buffers && try_get_buffers(&buffers))
838d9c9e4dbSFlorent Revest return -EBUSY;
839d9c9e4dbSFlorent Revest
840d9c9e4dbSFlorent Revest if (data->get_bin_args) {
841d9c9e4dbSFlorent Revest if (num_args)
842d9c9e4dbSFlorent Revest tmp_buf = buffers->bin_args;
843d9c9e4dbSFlorent Revest tmp_buf_end = tmp_buf + MAX_BPRINTF_BIN_ARGS;
844d9c9e4dbSFlorent Revest data->bin_args = (u32 *)tmp_buf;
845d9c9e4dbSFlorent Revest }
846d9c9e4dbSFlorent Revest
84748cac3f4SFlorent Revest if (data->get_buf)
848d9c9e4dbSFlorent Revest data->buf = buffers->buf;
849d9c9e4dbSFlorent Revest
850d9c9e4dbSFlorent Revest for (i = 0; i < fmt_size; i++) {
851d9c9e4dbSFlorent Revest if ((!isprint(fmt[i]) && !isspace(fmt[i])) || !isascii(fmt[i])) {
852d9c9e4dbSFlorent Revest err = -EINVAL;
853d9c9e4dbSFlorent Revest goto out;
854d9c9e4dbSFlorent Revest }
855d9c9e4dbSFlorent Revest
856d9c9e4dbSFlorent Revest if (fmt[i] != '%')
857d9c9e4dbSFlorent Revest continue;
858d9c9e4dbSFlorent Revest
859d9c9e4dbSFlorent Revest if (fmt[i + 1] == '%') {
860d9c9e4dbSFlorent Revest i++;
861d9c9e4dbSFlorent Revest continue;
862d9c9e4dbSFlorent Revest }
863d9c9e4dbSFlorent Revest
864d9c9e4dbSFlorent Revest if (num_spec >= num_args) {
865d9c9e4dbSFlorent Revest err = -EINVAL;
86648cac3f4SFlorent Revest goto out;
867d9c9e4dbSFlorent Revest }
868d9c9e4dbSFlorent Revest
869d9c9e4dbSFlorent Revest /* The string is zero-terminated so if fmt[i] != 0, we can
870d9c9e4dbSFlorent Revest * always access fmt[i + 1], in the worst case it will be a 0
871d9c9e4dbSFlorent Revest */
872d9c9e4dbSFlorent Revest i++;
873d9c9e4dbSFlorent Revest
874d9c9e4dbSFlorent Revest /* skip optional "[0 +-][num]" width formatting field */
875d9c9e4dbSFlorent Revest while (fmt[i] == '0' || fmt[i] == '+' || fmt[i] == '-' ||
876d9c9e4dbSFlorent Revest fmt[i] == ' ')
87748cac3f4SFlorent Revest i++;
87848cac3f4SFlorent Revest if (fmt[i] >= '1' && fmt[i] <= '9') {
879d9c9e4dbSFlorent Revest i++;
88048cac3f4SFlorent Revest while (fmt[i] >= '0' && fmt[i] <= '9')
881d9c9e4dbSFlorent Revest i++;
88248cac3f4SFlorent Revest }
88348cac3f4SFlorent Revest
88448cac3f4SFlorent Revest if (fmt[i] == 'p') {
88548cac3f4SFlorent Revest sizeof_cur_arg = sizeof(long);
88648cac3f4SFlorent Revest
88748cac3f4SFlorent Revest if ((fmt[i + 1] == 'k' || fmt[i + 1] == 'u') &&
88848cac3f4SFlorent Revest fmt[i + 2] == 's') {
88948cac3f4SFlorent Revest fmt_ptype = fmt[i + 1];
89048cac3f4SFlorent Revest i += 2;
89148cac3f4SFlorent Revest goto fmt_str;
89248cac3f4SFlorent Revest }
89348cac3f4SFlorent Revest
89448cac3f4SFlorent Revest if (fmt[i + 1] == 0 || isspace(fmt[i + 1]) ||
89548cac3f4SFlorent Revest ispunct(fmt[i + 1]) || fmt[i + 1] == 'K' ||
89648cac3f4SFlorent Revest fmt[i + 1] == 'x' || fmt[i + 1] == 's' ||
89748cac3f4SFlorent Revest fmt[i + 1] == 'S') {
898d9c9e4dbSFlorent Revest /* just kernel pointers */
899d9c9e4dbSFlorent Revest if (tmp_buf)
900d9c9e4dbSFlorent Revest cur_arg = raw_args[num_spec];
901d9c9e4dbSFlorent Revest i++;
902d9c9e4dbSFlorent Revest goto nocopy_fmt;
903d9c9e4dbSFlorent Revest }
904d9c9e4dbSFlorent Revest
905d9c9e4dbSFlorent Revest if (fmt[i + 1] == 'B') {
906d9c9e4dbSFlorent Revest if (tmp_buf) {
90748cac3f4SFlorent Revest err = snprintf(tmp_buf,
90848cac3f4SFlorent Revest (tmp_buf_end - tmp_buf),
90948cac3f4SFlorent Revest "%pB",
91048cac3f4SFlorent Revest (void *)(long)raw_args[num_spec]);
91148cac3f4SFlorent Revest tmp_buf += (err + 1);
91248cac3f4SFlorent Revest }
913d9c9e4dbSFlorent Revest
91448cac3f4SFlorent Revest i++;
915d9c9e4dbSFlorent Revest num_spec++;
916d9c9e4dbSFlorent Revest continue;
917d9c9e4dbSFlorent Revest }
91848cac3f4SFlorent Revest
91948cac3f4SFlorent Revest /* only support "%pI4", "%pi4", "%pI6" and "%pi6". */
920d9c9e4dbSFlorent Revest if ((fmt[i + 1] != 'i' && fmt[i + 1] != 'I') ||
92148cac3f4SFlorent Revest (fmt[i + 2] != '4' && fmt[i + 2] != '6')) {
922d9c9e4dbSFlorent Revest err = -EINVAL;
92348cac3f4SFlorent Revest goto out;
92448cac3f4SFlorent Revest }
92548cac3f4SFlorent Revest
92648cac3f4SFlorent Revest i += 2;
92748cac3f4SFlorent Revest if (!tmp_buf)
92848cac3f4SFlorent Revest goto nocopy_fmt;
92948cac3f4SFlorent Revest
93048cac3f4SFlorent Revest sizeof_cur_ip = (fmt[i] == '4') ? 4 : 16;
93148cac3f4SFlorent Revest if (tmp_buf_end - tmp_buf < sizeof_cur_ip) {
93248cac3f4SFlorent Revest err = -ENOSPC;
93348cac3f4SFlorent Revest goto out;
93448cac3f4SFlorent Revest }
93548cac3f4SFlorent Revest
936d9c9e4dbSFlorent Revest unsafe_ptr = (char *)(long)raw_args[num_spec];
937d9c9e4dbSFlorent Revest err = copy_from_kernel_nofault(cur_ip, unsafe_ptr,
938d9c9e4dbSFlorent Revest sizeof_cur_ip);
939d9c9e4dbSFlorent Revest if (err < 0)
940d9c9e4dbSFlorent Revest memset(cur_ip, 0, sizeof_cur_ip);
941d9c9e4dbSFlorent Revest
942d9c9e4dbSFlorent Revest /* hack: bstr_printf expects IP addresses to be
943d9c9e4dbSFlorent Revest * pre-formatted as strings, ironically, the easiest way
944d9c9e4dbSFlorent Revest * to do that is to call snprintf.
945d9c9e4dbSFlorent Revest */
94648cac3f4SFlorent Revest ip_spec[2] = fmt[i - 1];
94748cac3f4SFlorent Revest ip_spec[3] = fmt[i];
94848cac3f4SFlorent Revest err = snprintf(tmp_buf, tmp_buf_end - tmp_buf,
94948cac3f4SFlorent Revest ip_spec, &cur_ip);
950d9c9e4dbSFlorent Revest
95148cac3f4SFlorent Revest tmp_buf += err + 1;
952d9c9e4dbSFlorent Revest num_spec++;
953d9c9e4dbSFlorent Revest
954d9c9e4dbSFlorent Revest continue;
955d9c9e4dbSFlorent Revest } else if (fmt[i] == 's') {
95648cac3f4SFlorent Revest fmt_ptype = fmt[i];
95748cac3f4SFlorent Revest fmt_str:
958d9c9e4dbSFlorent Revest if (fmt[i + 1] != 0 &&
959d9c9e4dbSFlorent Revest !isspace(fmt[i + 1]) &&
960d9c9e4dbSFlorent Revest !ispunct(fmt[i + 1])) {
961d9c9e4dbSFlorent Revest err = -EINVAL;
962d9c9e4dbSFlorent Revest goto out;
963d9c9e4dbSFlorent Revest }
96448cac3f4SFlorent Revest
965d9c9e4dbSFlorent Revest if (!tmp_buf)
96648cac3f4SFlorent Revest goto nocopy_fmt;
9673478cfcfSKuniyuki Iwashima
9683478cfcfSKuniyuki Iwashima if (tmp_buf_end == tmp_buf) {
9693478cfcfSKuniyuki Iwashima err = -ENOSPC;
9703478cfcfSKuniyuki Iwashima goto out;
9713478cfcfSKuniyuki Iwashima }
9723478cfcfSKuniyuki Iwashima
9733478cfcfSKuniyuki Iwashima unsafe_ptr = (char *)(long)raw_args[num_spec];
9743478cfcfSKuniyuki Iwashima err = bpf_trace_copy_string(tmp_buf, unsafe_ptr,
9753478cfcfSKuniyuki Iwashima fmt_ptype,
9763478cfcfSKuniyuki Iwashima tmp_buf_end - tmp_buf);
9773478cfcfSKuniyuki Iwashima if (err < 0) {
9783478cfcfSKuniyuki Iwashima tmp_buf[0] = '\0';
9793478cfcfSKuniyuki Iwashima err = 1;
9803478cfcfSKuniyuki Iwashima }
981d9c9e4dbSFlorent Revest
982d9c9e4dbSFlorent Revest tmp_buf += err;
98348cac3f4SFlorent Revest num_spec++;
984d9c9e4dbSFlorent Revest
985d9c9e4dbSFlorent Revest continue;
98648cac3f4SFlorent Revest } else if (fmt[i] == 'c') {
987d9c9e4dbSFlorent Revest if (!tmp_buf)
988d9c9e4dbSFlorent Revest goto nocopy_fmt;
989d9c9e4dbSFlorent Revest
99048cac3f4SFlorent Revest if (tmp_buf_end == tmp_buf) {
991d9c9e4dbSFlorent Revest err = -ENOSPC;
992d9c9e4dbSFlorent Revest goto out;
993d9c9e4dbSFlorent Revest }
994d9c9e4dbSFlorent Revest
995d9c9e4dbSFlorent Revest *tmp_buf = raw_args[num_spec];
996d9c9e4dbSFlorent Revest tmp_buf++;
99748cac3f4SFlorent Revest num_spec++;
998d9c9e4dbSFlorent Revest
999d9c9e4dbSFlorent Revest continue;
100048cac3f4SFlorent Revest }
1001d9c9e4dbSFlorent Revest
100248cac3f4SFlorent Revest sizeof_cur_arg = sizeof(int);
100348cac3f4SFlorent Revest
100448cac3f4SFlorent Revest if (fmt[i] == 'l') {
100548cac3f4SFlorent Revest sizeof_cur_arg = sizeof(long);
100648cac3f4SFlorent Revest i++;
100748cac3f4SFlorent Revest }
100848cac3f4SFlorent Revest if (fmt[i] == 'l') {
100948cac3f4SFlorent Revest sizeof_cur_arg = sizeof(long long);
101048cac3f4SFlorent Revest i++;
101148cac3f4SFlorent Revest }
101248cac3f4SFlorent Revest
101348cac3f4SFlorent Revest if (fmt[i] != 'i' && fmt[i] != 'd' && fmt[i] != 'u' &&
101448cac3f4SFlorent Revest fmt[i] != 'x' && fmt[i] != 'X') {
101548cac3f4SFlorent Revest err = -EINVAL;
101648cac3f4SFlorent Revest goto out;
1017d9c9e4dbSFlorent Revest }
1018d9c9e4dbSFlorent Revest
1019d9c9e4dbSFlorent Revest if (tmp_buf)
1020d9c9e4dbSFlorent Revest cur_arg = raw_args[num_spec];
1021d9c9e4dbSFlorent Revest nocopy_fmt:
1022d9c9e4dbSFlorent Revest if (tmp_buf) {
102348cac3f4SFlorent Revest tmp_buf = PTR_ALIGN(tmp_buf, sizeof(u32));
102448cac3f4SFlorent Revest if (tmp_buf_end - tmp_buf < sizeof_cur_arg) {
1025d9c9e4dbSFlorent Revest err = -ENOSPC;
1026d9c9e4dbSFlorent Revest goto out;
1027d9c9e4dbSFlorent Revest }
10287b15523aSFlorent Revest
10297b15523aSFlorent Revest if (sizeof_cur_arg == 8) {
10307b15523aSFlorent Revest *(u32 *)tmp_buf = *(u32 *)&cur_arg;
10317b15523aSFlorent Revest *(u32 *)(tmp_buf + 4) = *((u32 *)&cur_arg + 1);
103248cac3f4SFlorent Revest } else {
10337b15523aSFlorent Revest *(u32 *)tmp_buf = (u32)(long)cur_arg;
1034335ff499SDave Marchevsky }
10357b15523aSFlorent Revest tmp_buf += sizeof_cur_arg;
10367b15523aSFlorent Revest }
10377b15523aSFlorent Revest num_spec++;
10387b15523aSFlorent Revest }
10397b15523aSFlorent Revest
10407b15523aSFlorent Revest err = 0;
10417b15523aSFlorent Revest out:
104248cac3f4SFlorent Revest if (err)
10437b15523aSFlorent Revest bpf_bprintf_cleanup(data);
10447b15523aSFlorent Revest return err;
10457b15523aSFlorent Revest }
104648cac3f4SFlorent Revest
BPF_CALL_5(bpf_snprintf,char *,str,u32,str_size,char *,fmt,const void *,args,u32,data_len)10477b15523aSFlorent Revest BPF_CALL_5(bpf_snprintf, char *, str, u32, str_size, char *, fmt,
104848cac3f4SFlorent Revest const void *, args, u32, data_len)
10497b15523aSFlorent Revest {
10507b15523aSFlorent Revest struct bpf_bprintf_data data = {
10517b15523aSFlorent Revest .get_bin_args = true,
10527b15523aSFlorent Revest };
10537b15523aSFlorent Revest int err, num_args;
10547b15523aSFlorent Revest
10557b15523aSFlorent Revest if (data_len % 8 || data_len > MAX_BPRINTF_VARARGS * 8 ||
10567b15523aSFlorent Revest (data_len && !args))
10577b15523aSFlorent Revest return -EINVAL;
10587b15523aSFlorent Revest num_args = data_len / 8;
10597b15523aSFlorent Revest
1060216e3cd2SHao Luo /* ARG_PTR_TO_CONST_STR guarantees that fmt is zero-terminated so we
10617b15523aSFlorent Revest * can safely give an unbounded size.
10627b15523aSFlorent Revest */
10637b15523aSFlorent Revest err = bpf_bprintf_prepare(fmt, UINT_MAX, args, num_args, &data);
1064b00628b1SAlexei Starovoitov if (err < 0)
1065b00628b1SAlexei Starovoitov return err;
1066b00628b1SAlexei Starovoitov
1067b00628b1SAlexei Starovoitov err = bstr_printf(str, str_size, fmt, data.bin_args);
1068b00628b1SAlexei Starovoitov
1069b00628b1SAlexei Starovoitov bpf_bprintf_cleanup(&data);
1070b00628b1SAlexei Starovoitov
1071b00628b1SAlexei Starovoitov return err + 1;
1072b00628b1SAlexei Starovoitov }
1073b00628b1SAlexei Starovoitov
1074b00628b1SAlexei Starovoitov const struct bpf_func_proto bpf_snprintf_proto = {
1075b00628b1SAlexei Starovoitov .func = bpf_snprintf,
1076b00628b1SAlexei Starovoitov .gpl_only = true,
1077b00628b1SAlexei Starovoitov .ret_type = RET_INTEGER,
1078b00628b1SAlexei Starovoitov .arg1_type = ARG_PTR_TO_MEM_OR_NULL,
1079b00628b1SAlexei Starovoitov .arg2_type = ARG_CONST_SIZE_OR_ZERO,
1080b00628b1SAlexei Starovoitov .arg3_type = ARG_PTR_TO_CONST_STR,
1081b00628b1SAlexei Starovoitov .arg4_type = ARG_PTR_TO_MEM | PTR_MAYBE_NULL | MEM_RDONLY,
1082b00628b1SAlexei Starovoitov .arg5_type = ARG_CONST_SIZE_OR_ZERO,
1083b00628b1SAlexei Starovoitov };
1084b00628b1SAlexei Starovoitov
1085b00628b1SAlexei Starovoitov struct bpf_async_cb {
1086b00628b1SAlexei Starovoitov struct bpf_map *map;
1087b00628b1SAlexei Starovoitov struct bpf_prog *prog;
1088b00628b1SAlexei Starovoitov void __rcu *callback_fn;
1089b00628b1SAlexei Starovoitov void *value;
1090b00628b1SAlexei Starovoitov struct rcu_head rcu;
1091b00628b1SAlexei Starovoitov u64 flags;
1092c561d110STom Rix };
1093b00628b1SAlexei Starovoitov
1094b00628b1SAlexei Starovoitov /* BPF map elements can contain 'struct bpf_timer'.
1095b00628b1SAlexei Starovoitov * Such map owns all of its BPF timers.
1096b00628b1SAlexei Starovoitov * 'struct bpf_timer' is allocated as part of map element allocation
1097b00628b1SAlexei Starovoitov * and it's zero initialized.
1098b00628b1SAlexei Starovoitov * That space is used to keep 'struct bpf_async_kern'.
1099b00628b1SAlexei Starovoitov * bpf_timer_init() allocates 'struct bpf_hrtimer', inits hrtimer, and
1100b00628b1SAlexei Starovoitov * remembers 'struct bpf_map *' pointer it's part of.
1101b00628b1SAlexei Starovoitov * bpf_timer_set_callback() increments prog refcnt and assign bpf callback_fn.
1102b00628b1SAlexei Starovoitov * bpf_timer_start() arms the timer.
1103b00628b1SAlexei Starovoitov * If user space reference to a map goes to zero at this point
1104b00628b1SAlexei Starovoitov * ops->map_release_uref callback is responsible for cancelling the timers,
1105102acbacSKees Cook * freeing their memory, and decrementing prog's refcnts.
1106b00628b1SAlexei Starovoitov * bpf_timer_cancel() cancels the timer and decrements prog's refcnt.
1107b00628b1SAlexei Starovoitov * Inner maps can contain bpf timers as well. ops->map_release_uref is
1108b00628b1SAlexei Starovoitov * freeing the timers when inner map is replaced or deleted by user space.
11093bd916eeSYonghong Song */
1110b00628b1SAlexei Starovoitov struct bpf_hrtimer {
1111b00628b1SAlexei Starovoitov struct bpf_async_cb cb;
1112b00628b1SAlexei Starovoitov struct hrtimer timer;
1113b00628b1SAlexei Starovoitov atomic_t cancelling;
1114b00628b1SAlexei Starovoitov };
1115b00628b1SAlexei Starovoitov
1116b00628b1SAlexei Starovoitov /* the actual struct hidden inside uapi struct bpf_timer */
1117b00628b1SAlexei Starovoitov struct bpf_async_kern {
1118b00628b1SAlexei Starovoitov union {
1119b00628b1SAlexei Starovoitov struct bpf_async_cb *cb;
1120b00628b1SAlexei Starovoitov struct bpf_hrtimer *timer;
1121b00628b1SAlexei Starovoitov };
1122b00628b1SAlexei Starovoitov /* bpf_spin_lock is used here instead of spinlock_t to make
1123b00628b1SAlexei Starovoitov * sure that it always fits into space reserved by struct bpf_timer
1124b00628b1SAlexei Starovoitov * regardless of LOCKDEP and spinlock debug flags.
1125b00628b1SAlexei Starovoitov */
1126b00628b1SAlexei Starovoitov struct bpf_spin_lock lock;
1127b00628b1SAlexei Starovoitov } __attribute__((aligned(8)));
1128b00628b1SAlexei Starovoitov
1129b00628b1SAlexei Starovoitov enum bpf_async_type {
1130b00628b1SAlexei Starovoitov BPF_ASYNC_TYPE_TIMER = 0,
1131102acbacSKees Cook };
1132bfc6bb74SAlexei Starovoitov
1133b00628b1SAlexei Starovoitov static DEFINE_PER_CPU(struct bpf_hrtimer *, hrtimer_running);
1134b00628b1SAlexei Starovoitov
bpf_timer_cb(struct hrtimer * hrtimer)1135b00628b1SAlexei Starovoitov static enum hrtimer_restart bpf_timer_cb(struct hrtimer *hrtimer)
1136b00628b1SAlexei Starovoitov {
1137b00628b1SAlexei Starovoitov struct bpf_hrtimer *t = container_of(hrtimer, struct bpf_hrtimer, timer);
1138b00628b1SAlexei Starovoitov struct bpf_map *map = t->cb.map;
1139b00628b1SAlexei Starovoitov void *value = t->cb.value;
1140b00628b1SAlexei Starovoitov bpf_callback_t callback_fn;
1141b00628b1SAlexei Starovoitov void *key;
1142b00628b1SAlexei Starovoitov u32 idx;
1143b00628b1SAlexei Starovoitov
1144b00628b1SAlexei Starovoitov BTF_TYPE_EMIT(struct bpf_timer);
1145b00628b1SAlexei Starovoitov callback_fn = rcu_dereference_check(t->cb.callback_fn, rcu_read_lock_bh_held());
1146b00628b1SAlexei Starovoitov if (!callback_fn)
1147b00628b1SAlexei Starovoitov goto out;
1148b00628b1SAlexei Starovoitov
1149b00628b1SAlexei Starovoitov /* bpf_timer_cb() runs in hrtimer_run_softirq. It doesn't migrate and
1150b00628b1SAlexei Starovoitov * cannot be preempted by another bpf_timer_cb() on the same cpu.
1151b00628b1SAlexei Starovoitov * Remember the timer this callback is servicing to prevent
1152b00628b1SAlexei Starovoitov * deadlock if callback_fn() calls bpf_timer_cancel() or
1153b00628b1SAlexei Starovoitov * bpf_map_delete_elem() on the same timer.
1154b00628b1SAlexei Starovoitov */
1155b00628b1SAlexei Starovoitov this_cpu_write(hrtimer_running, t);
1156b00628b1SAlexei Starovoitov if (map->map_type == BPF_MAP_TYPE_ARRAY) {
1157b00628b1SAlexei Starovoitov struct bpf_array *array = container_of(map, struct bpf_array, map);
1158b00628b1SAlexei Starovoitov
1159b00628b1SAlexei Starovoitov /* compute the key */
1160b00628b1SAlexei Starovoitov idx = ((char *)value - array->value) / array->elem_size;
1161b00628b1SAlexei Starovoitov key = &idx;
1162b00628b1SAlexei Starovoitov } else { /* hash or lru */
1163b00628b1SAlexei Starovoitov key = value - round_up(map->key_size, 8);
1164b00628b1SAlexei Starovoitov }
1165b00628b1SAlexei Starovoitov
1166b00628b1SAlexei Starovoitov callback_fn((u64)(long)map, (u64)(long)key, (u64)(long)value, 0, 0);
1167b00628b1SAlexei Starovoitov /* The verifier checked that return value is zero. */
1168b00628b1SAlexei Starovoitov
1169b00628b1SAlexei Starovoitov this_cpu_write(hrtimer_running, NULL);
1170b00628b1SAlexei Starovoitov out:
1171b00628b1SAlexei Starovoitov return HRTIMER_NORESTART;
1172b00628b1SAlexei Starovoitov }
1173b00628b1SAlexei Starovoitov
__bpf_async_init(struct bpf_async_kern * async,struct bpf_map * map,u64 flags,enum bpf_async_type type)1174b00628b1SAlexei Starovoitov static int __bpf_async_init(struct bpf_async_kern *async, struct bpf_map *map, u64 flags,
1175b00628b1SAlexei Starovoitov enum bpf_async_type type)
1176b00628b1SAlexei Starovoitov {
1177b00628b1SAlexei Starovoitov struct bpf_async_cb *cb;
1178db559117SKumar Kartikeya Dwivedi struct bpf_hrtimer *t;
1179b00628b1SAlexei Starovoitov clockid_t clockid;
1180b00628b1SAlexei Starovoitov size_t size;
1181b00628b1SAlexei Starovoitov int ret = 0;
1182b00628b1SAlexei Starovoitov
1183b00628b1SAlexei Starovoitov if (in_nmi())
1184b00628b1SAlexei Starovoitov return -EOPNOTSUPP;
1185b00628b1SAlexei Starovoitov
1186b00628b1SAlexei Starovoitov switch (type) {
1187b00628b1SAlexei Starovoitov case BPF_ASYNC_TYPE_TIMER:
1188b00628b1SAlexei Starovoitov size = sizeof(struct bpf_hrtimer);
1189b00628b1SAlexei Starovoitov break;
1190b00628b1SAlexei Starovoitov default:
1191b00628b1SAlexei Starovoitov return -EINVAL;
1192b00628b1SAlexei Starovoitov }
1193b00628b1SAlexei Starovoitov
1194b00628b1SAlexei Starovoitov __bpf_spin_lock_irqsave(&async->lock);
1195b00628b1SAlexei Starovoitov t = async->timer;
1196b00628b1SAlexei Starovoitov if (t) {
1197b00628b1SAlexei Starovoitov ret = -EBUSY;
1198b00628b1SAlexei Starovoitov goto out;
1199b00628b1SAlexei Starovoitov }
1200b00628b1SAlexei Starovoitov
1201b00628b1SAlexei Starovoitov /* allocate hrtimer via map_kmalloc to use memcg accounting */
1202b00628b1SAlexei Starovoitov cb = bpf_map_kmalloc_node(map, size, GFP_ATOMIC, map->numa_node);
1203b00628b1SAlexei Starovoitov if (!cb) {
1204b00628b1SAlexei Starovoitov ret = -ENOMEM;
1205b00628b1SAlexei Starovoitov goto out;
1206b00628b1SAlexei Starovoitov }
1207b00628b1SAlexei Starovoitov
1208b00628b1SAlexei Starovoitov if (type == BPF_ASYNC_TYPE_TIMER) {
1209b00628b1SAlexei Starovoitov clockid = flags & (MAX_CLOCKS - 1);
1210b00628b1SAlexei Starovoitov t = (struct bpf_hrtimer *)cb;
1211b00628b1SAlexei Starovoitov
1212b00628b1SAlexei Starovoitov atomic_set(&t->cancelling, 0);
1213b00628b1SAlexei Starovoitov hrtimer_init(&t->timer, clockid, HRTIMER_MODE_REL_SOFT);
1214b00628b1SAlexei Starovoitov t->timer.function = bpf_timer_cb;
1215b00628b1SAlexei Starovoitov cb->value = (void *)async - map->record->timer_off;
1216b00628b1SAlexei Starovoitov }
1217b00628b1SAlexei Starovoitov cb->map = map;
1218b00628b1SAlexei Starovoitov cb->prog = NULL;
1219b00628b1SAlexei Starovoitov cb->flags = flags;
1220b00628b1SAlexei Starovoitov rcu_assign_pointer(cb->callback_fn, NULL);
1221b00628b1SAlexei Starovoitov
1222b00628b1SAlexei Starovoitov WRITE_ONCE(async->cb, cb);
1223b00628b1SAlexei Starovoitov /* Guarantee the order between async->cb and map->usercnt. So
1224b00628b1SAlexei Starovoitov * when there are concurrent uref release and bpf timer init, either
1225b00628b1SAlexei Starovoitov * bpf_timer_cancel_and_free() called by uref release reads a no-NULL
1226b00628b1SAlexei Starovoitov * timer or atomic64_read() below returns a zero usercnt.
1227b00628b1SAlexei Starovoitov */
1228b00628b1SAlexei Starovoitov smp_mb();
1229b00628b1SAlexei Starovoitov if (!atomic64_read(&map->usercnt)) {
1230b00628b1SAlexei Starovoitov /* maps with timers must be either held by user space
1231b00628b1SAlexei Starovoitov * or pinned in bpffs.
1232b00628b1SAlexei Starovoitov */
1233b00628b1SAlexei Starovoitov WRITE_ONCE(async->cb, NULL);
1234b00628b1SAlexei Starovoitov kfree(cb);
1235b00628b1SAlexei Starovoitov ret = -EPERM;
1236b00628b1SAlexei Starovoitov }
1237b00628b1SAlexei Starovoitov out:
1238b00628b1SAlexei Starovoitov __bpf_spin_unlock_irqrestore(&async->lock);
1239b00628b1SAlexei Starovoitov return ret;
1240b00628b1SAlexei Starovoitov }
1241b00628b1SAlexei Starovoitov
BPF_CALL_3(bpf_timer_init,struct bpf_async_kern *,timer,struct bpf_map *,map,u64,flags)1242b00628b1SAlexei Starovoitov BPF_CALL_3(bpf_timer_init, struct bpf_async_kern *, timer, struct bpf_map *, map,
1243b00628b1SAlexei Starovoitov u64, flags)
1244b00628b1SAlexei Starovoitov {
1245b00628b1SAlexei Starovoitov clock_t clockid = flags & (MAX_CLOCKS - 1);
1246b00628b1SAlexei Starovoitov
1247b00628b1SAlexei Starovoitov BUILD_BUG_ON(MAX_CLOCKS != 16);
1248b00628b1SAlexei Starovoitov BUILD_BUG_ON(sizeof(struct bpf_async_kern) > sizeof(struct bpf_timer));
1249b00628b1SAlexei Starovoitov BUILD_BUG_ON(__alignof__(struct bpf_async_kern) != __alignof__(struct bpf_timer));
1250b00628b1SAlexei Starovoitov
1251b00628b1SAlexei Starovoitov if (flags >= MAX_CLOCKS ||
1252b00628b1SAlexei Starovoitov /* similar to timerfd except _ALARM variants are not supported */
1253b00628b1SAlexei Starovoitov (clockid != CLOCK_MONOTONIC &&
1254b00628b1SAlexei Starovoitov clockid != CLOCK_REALTIME &&
1255b00628b1SAlexei Starovoitov clockid != CLOCK_BOOTTIME))
1256b00628b1SAlexei Starovoitov return -EINVAL;
1257b00628b1SAlexei Starovoitov
1258b00628b1SAlexei Starovoitov return __bpf_async_init(timer, map, flags, BPF_ASYNC_TYPE_TIMER);
1259b00628b1SAlexei Starovoitov }
1260b00628b1SAlexei Starovoitov
1261b00628b1SAlexei Starovoitov static const struct bpf_func_proto bpf_timer_init_proto = {
1262b00628b1SAlexei Starovoitov .func = bpf_timer_init,
1263b00628b1SAlexei Starovoitov .gpl_only = true,
1264b00628b1SAlexei Starovoitov .ret_type = RET_INTEGER,
1265b00628b1SAlexei Starovoitov .arg1_type = ARG_PTR_TO_TIMER,
1266b00628b1SAlexei Starovoitov .arg2_type = ARG_CONST_MAP_PTR,
1267b00628b1SAlexei Starovoitov .arg3_type = ARG_ANYTHING,
1268b00628b1SAlexei Starovoitov };
1269b00628b1SAlexei Starovoitov
BPF_CALL_3(bpf_timer_set_callback,struct bpf_async_kern *,timer,void *,callback_fn,struct bpf_prog_aux *,aux)1270b00628b1SAlexei Starovoitov BPF_CALL_3(bpf_timer_set_callback, struct bpf_async_kern *, timer, void *, callback_fn,
1271b00628b1SAlexei Starovoitov struct bpf_prog_aux *, aux)
1272b00628b1SAlexei Starovoitov {
1273b00628b1SAlexei Starovoitov struct bpf_prog *prev, *prog = aux->prog;
1274b00628b1SAlexei Starovoitov struct bpf_hrtimer *t;
1275b00628b1SAlexei Starovoitov int ret = 0;
1276b00628b1SAlexei Starovoitov
1277b00628b1SAlexei Starovoitov if (in_nmi())
1278b00628b1SAlexei Starovoitov return -EOPNOTSUPP;
1279b00628b1SAlexei Starovoitov __bpf_spin_lock_irqsave(&timer->lock);
1280b00628b1SAlexei Starovoitov t = timer->timer;
1281b00628b1SAlexei Starovoitov if (!t) {
1282b00628b1SAlexei Starovoitov ret = -EINVAL;
1283b00628b1SAlexei Starovoitov goto out;
1284b00628b1SAlexei Starovoitov }
1285b00628b1SAlexei Starovoitov if (!atomic64_read(&t->cb.map->usercnt)) {
1286b00628b1SAlexei Starovoitov /* maps with timers must be either held by user space
1287b00628b1SAlexei Starovoitov * or pinned in bpffs. Otherwise timer might still be
1288b00628b1SAlexei Starovoitov * running even when bpf prog is detached and user space
1289b00628b1SAlexei Starovoitov * is gone, since map_release_uref won't ever be called.
1290b00628b1SAlexei Starovoitov */
1291b00628b1SAlexei Starovoitov ret = -EPERM;
1292b00628b1SAlexei Starovoitov goto out;
1293b00628b1SAlexei Starovoitov }
1294b00628b1SAlexei Starovoitov prev = t->cb.prog;
1295b00628b1SAlexei Starovoitov if (prev != prog) {
1296b00628b1SAlexei Starovoitov /* Bump prog refcnt once. Every bpf_timer_set_callback()
1297b00628b1SAlexei Starovoitov * can pick different callback_fn-s within the same prog.
1298b00628b1SAlexei Starovoitov */
1299b00628b1SAlexei Starovoitov prog = bpf_prog_inc_not_zero(prog);
1300b00628b1SAlexei Starovoitov if (IS_ERR(prog)) {
1301b00628b1SAlexei Starovoitov ret = PTR_ERR(prog);
1302b00628b1SAlexei Starovoitov goto out;
1303b00628b1SAlexei Starovoitov }
1304b00628b1SAlexei Starovoitov if (prev)
1305b00628b1SAlexei Starovoitov /* Drop prev prog refcnt when swapping with new prog */
1306b00628b1SAlexei Starovoitov bpf_prog_put(prev);
1307b00628b1SAlexei Starovoitov t->cb.prog = prog;
1308b00628b1SAlexei Starovoitov }
1309b00628b1SAlexei Starovoitov rcu_assign_pointer(t->cb.callback_fn, callback_fn);
1310b00628b1SAlexei Starovoitov out:
1311b00628b1SAlexei Starovoitov __bpf_spin_unlock_irqrestore(&timer->lock);
1312b00628b1SAlexei Starovoitov return ret;
1313b00628b1SAlexei Starovoitov }
1314b00628b1SAlexei Starovoitov
1315b00628b1SAlexei Starovoitov static const struct bpf_func_proto bpf_timer_set_callback_proto = {
1316b00628b1SAlexei Starovoitov .func = bpf_timer_set_callback,
1317b00628b1SAlexei Starovoitov .gpl_only = true,
1318b00628b1SAlexei Starovoitov .ret_type = RET_INTEGER,
1319b00628b1SAlexei Starovoitov .arg1_type = ARG_PTR_TO_TIMER,
1320b00628b1SAlexei Starovoitov .arg2_type = ARG_PTR_TO_FUNC,
1321b00628b1SAlexei Starovoitov };
1322b00628b1SAlexei Starovoitov
BPF_CALL_3(bpf_timer_start,struct bpf_async_kern *,timer,u64,nsecs,u64,flags)1323b00628b1SAlexei Starovoitov BPF_CALL_3(bpf_timer_start, struct bpf_async_kern *, timer, u64, nsecs, u64, flags)
1324b00628b1SAlexei Starovoitov {
1325b00628b1SAlexei Starovoitov struct bpf_hrtimer *t;
1326b00628b1SAlexei Starovoitov int ret = 0;
1327b00628b1SAlexei Starovoitov enum hrtimer_mode mode;
1328b00628b1SAlexei Starovoitov
1329b00628b1SAlexei Starovoitov if (in_nmi())
1330b00628b1SAlexei Starovoitov return -EOPNOTSUPP;
1331b00628b1SAlexei Starovoitov if (flags > BPF_F_TIMER_ABS)
1332b00628b1SAlexei Starovoitov return -EINVAL;
1333b00628b1SAlexei Starovoitov __bpf_spin_lock_irqsave(&timer->lock);
1334b00628b1SAlexei Starovoitov t = timer->timer;
1335b00628b1SAlexei Starovoitov if (!t || !t->cb.prog) {
1336b00628b1SAlexei Starovoitov ret = -EINVAL;
1337b00628b1SAlexei Starovoitov goto out;
1338b00628b1SAlexei Starovoitov }
1339b00628b1SAlexei Starovoitov
1340b00628b1SAlexei Starovoitov if (flags & BPF_F_TIMER_ABS)
1341b00628b1SAlexei Starovoitov mode = HRTIMER_MODE_ABS_SOFT;
1342b00628b1SAlexei Starovoitov else
1343b00628b1SAlexei Starovoitov mode = HRTIMER_MODE_REL_SOFT;
1344b00628b1SAlexei Starovoitov
1345b00628b1SAlexei Starovoitov hrtimer_start(&t->timer, ns_to_ktime(nsecs), mode);
1346b00628b1SAlexei Starovoitov out:
1347b00628b1SAlexei Starovoitov __bpf_spin_unlock_irqrestore(&timer->lock);
1348b00628b1SAlexei Starovoitov return ret;
1349b00628b1SAlexei Starovoitov }
1350b00628b1SAlexei Starovoitov
1351b00628b1SAlexei Starovoitov static const struct bpf_func_proto bpf_timer_start_proto = {
1352b00628b1SAlexei Starovoitov .func = bpf_timer_start,
1353b00628b1SAlexei Starovoitov .gpl_only = true,
1354b00628b1SAlexei Starovoitov .ret_type = RET_INTEGER,
1355b00628b1SAlexei Starovoitov .arg1_type = ARG_PTR_TO_TIMER,
1356b00628b1SAlexei Starovoitov .arg2_type = ARG_ANYTHING,
1357b00628b1SAlexei Starovoitov .arg3_type = ARG_ANYTHING,
1358b00628b1SAlexei Starovoitov };
1359b00628b1SAlexei Starovoitov
drop_prog_refcnt(struct bpf_async_cb * async)1360b00628b1SAlexei Starovoitov static void drop_prog_refcnt(struct bpf_async_cb *async)
1361b00628b1SAlexei Starovoitov {
1362b00628b1SAlexei Starovoitov struct bpf_prog *prog = async->prog;
1363b00628b1SAlexei Starovoitov
1364b00628b1SAlexei Starovoitov if (prog) {
1365b00628b1SAlexei Starovoitov bpf_prog_put(prog);
1366b00628b1SAlexei Starovoitov async->prog = NULL;
1367b00628b1SAlexei Starovoitov rcu_assign_pointer(async->callback_fn, NULL);
1368b00628b1SAlexei Starovoitov }
1369b00628b1SAlexei Starovoitov }
1370b00628b1SAlexei Starovoitov
BPF_CALL_1(bpf_timer_cancel,struct bpf_async_kern *,timer)1371b00628b1SAlexei Starovoitov BPF_CALL_1(bpf_timer_cancel, struct bpf_async_kern *, timer)
1372b00628b1SAlexei Starovoitov {
1373b00628b1SAlexei Starovoitov struct bpf_hrtimer *t, *cur_t;
1374b00628b1SAlexei Starovoitov bool inc = false;
1375b00628b1SAlexei Starovoitov int ret = 0;
1376b00628b1SAlexei Starovoitov
1377b00628b1SAlexei Starovoitov if (in_nmi())
1378c0a5a21cSKumar Kartikeya Dwivedi return -EOPNOTSUPP;
1379c0a5a21cSKumar Kartikeya Dwivedi rcu_read_lock();
1380c0a5a21cSKumar Kartikeya Dwivedi __bpf_spin_lock_irqsave(&timer->lock);
1381c0a5a21cSKumar Kartikeya Dwivedi t = timer->timer;
1382c0a5a21cSKumar Kartikeya Dwivedi if (!t) {
1383c0a5a21cSKumar Kartikeya Dwivedi ret = -EINVAL;
1384c0a5a21cSKumar Kartikeya Dwivedi goto out;
1385c0a5a21cSKumar Kartikeya Dwivedi }
138647e34cb7SDave Marchevsky
138747e34cb7SDave Marchevsky cur_t = this_cpu_read(hrtimer_running);
1388c0a5a21cSKumar Kartikeya Dwivedi if (cur_t == t) {
1389dc368e1cSJoanne Koong /* If bpf callback_fn is trying to bpf_timer_cancel()
1390c0a5a21cSKumar Kartikeya Dwivedi * its own timer the hrtimer_cancel() will deadlock
1391c0a5a21cSKumar Kartikeya Dwivedi * since it waits for callback_fn to finish.
1392c0a5a21cSKumar Kartikeya Dwivedi */
1393c0a5a21cSKumar Kartikeya Dwivedi ret = -EDEADLK;
1394c0a5a21cSKumar Kartikeya Dwivedi goto out;
1395c0a5a21cSKumar Kartikeya Dwivedi }
1396c0a5a21cSKumar Kartikeya Dwivedi
1397c0a5a21cSKumar Kartikeya Dwivedi /* Only account in-flight cancellations when invoked from a timer
1398c0a5a21cSKumar Kartikeya Dwivedi * callback, since we want to avoid waiting only if other _callbacks_
1399263ae152SJoanne Koong * are waiting on us, to avoid introducing lockups. Non-callback paths
1400263ae152SJoanne Koong * are ok, since nobody would synchronously wait for their completion.
1401263ae152SJoanne Koong */
1402263ae152SJoanne Koong if (!cur_t)
1403263ae152SJoanne Koong goto drop;
140413bbbfbeSJoanne Koong atomic_inc(&t->cancelling);
140513bbbfbeSJoanne Koong /* Need full barrier after relaxed atomic_inc */
140613bbbfbeSJoanne Koong smp_mb__after_atomic();
140727060531SKumar Kartikeya Dwivedi inc = true;
140813bbbfbeSJoanne Koong if (atomic_read(&cur_t->cancelling)) {
140913bbbfbeSJoanne Koong /* We're cancelling timer t, while some other timer callback is
141013bbbfbeSJoanne Koong * attempting to cancel us. In such a case, it might be possible
1411263ae152SJoanne Koong * that timer t belongs to the other callback, or some other
1412263ae152SJoanne Koong * callback waiting upon it (creating transitive dependencies
1413263ae152SJoanne Koong * upon us), and we will enter a deadlock if we continue
1414263ae152SJoanne Koong * cancelling and waiting for it synchronously, since it might
1415263ae152SJoanne Koong * do the same. Bail!
1416263ae152SJoanne Koong */
141727060531SKumar Kartikeya Dwivedi ret = -EDEADLK;
141813bbbfbeSJoanne Koong goto out;
141913bbbfbeSJoanne Koong }
142013bbbfbeSJoanne Koong drop:
142113bbbfbeSJoanne Koong drop_prog_refcnt(&t->cb);
1422bc34dee6SJoanne Koong out:
1423263ae152SJoanne Koong __bpf_spin_unlock_irqrestore(&timer->lock);
1424263ae152SJoanne Koong /* Cancel the timer and wait for associated callback to finish
1425263ae152SJoanne Koong * if it was running.
1426263ae152SJoanne Koong */
1427bc34dee6SJoanne Koong ret = ret ?: hrtimer_cancel(&t->timer);
1428263ae152SJoanne Koong if (inc)
1429263ae152SJoanne Koong atomic_dec(&t->cancelling);
1430263ae152SJoanne Koong rcu_read_unlock();
1431263ae152SJoanne Koong return ret;
1432263ae152SJoanne Koong }
1433263ae152SJoanne Koong
1434263ae152SJoanne Koong static const struct bpf_func_proto bpf_timer_cancel_proto = {
1435263ae152SJoanne Koong .func = bpf_timer_cancel,
1436bc34dee6SJoanne Koong .gpl_only = true,
1437263ae152SJoanne Koong .ret_type = RET_INTEGER,
1438263ae152SJoanne Koong .arg1_type = ARG_PTR_TO_TIMER,
1439263ae152SJoanne Koong };
1440263ae152SJoanne Koong
144127060531SKumar Kartikeya Dwivedi /* This function is called by map_delete/update_elem for individual element and
144213bbbfbeSJoanne Koong * by ops->map_release_uref when the user space reference to a map reaches zero.
144313bbbfbeSJoanne Koong */
bpf_timer_cancel_and_free(void * val)144413bbbfbeSJoanne Koong void bpf_timer_cancel_and_free(void *val)
144513bbbfbeSJoanne Koong {
144613bbbfbeSJoanne Koong struct bpf_async_kern *timer = val;
144713bbbfbeSJoanne Koong struct bpf_hrtimer *t;
144813bbbfbeSJoanne Koong
144913bbbfbeSJoanne Koong /* Performance optimization: read timer->timer without lock first. */
145013bbbfbeSJoanne Koong if (!READ_ONCE(timer->timer))
1451263ae152SJoanne Koong return;
1452263ae152SJoanne Koong
1453263ae152SJoanne Koong __bpf_spin_lock_irqsave(&timer->lock);
1454263ae152SJoanne Koong /* re-read it under lock */
145500f14641SRoberto Sassu t = timer->timer;
145600f14641SRoberto Sassu if (!t)
1457263ae152SJoanne Koong goto out;
1458263ae152SJoanne Koong drop_prog_refcnt(&t->cb);
1459263ae152SJoanne Koong /* The subsequent bpf_timer_start/cancel() helpers won't be able to use
1460263ae152SJoanne Koong * this timer, since it won't be initialized.
1461263ae152SJoanne Koong */
1462263ae152SJoanne Koong WRITE_ONCE(timer->timer, NULL);
1463263ae152SJoanne Koong out:
1464263ae152SJoanne Koong __bpf_spin_unlock_irqrestore(&timer->lock);
1465263ae152SJoanne Koong if (!t)
1466263ae152SJoanne Koong return;
1467263ae152SJoanne Koong /* Cancel the timer and wait for callback to complete if it was running.
1468263ae152SJoanne Koong * If hrtimer_cancel() can be safely called it's safe to call kfree(t)
1469263ae152SJoanne Koong * right after for both preallocated and non-preallocated maps.
1470263ae152SJoanne Koong * The timer->timer = NULL was already done and no code path can
1471263ae152SJoanne Koong * see address 't' anymore.
1472263ae152SJoanne Koong *
1473263ae152SJoanne Koong * Check that bpf_map_delete/update_elem() wasn't called from timer
1474263ae152SJoanne Koong * callback_fn. In such case don't call hrtimer_cancel() (since it will
1475263ae152SJoanne Koong * deadlock) and don't call hrtimer_try_to_cancel() (since it will just
1476dc368e1cSJoanne Koong * return -1). Though callback_fn is still running on this cpu it's
1477263ae152SJoanne Koong * safe to do kfree(t) because bpf_timer_cb() read everything it needed
1478263ae152SJoanne Koong * from 't'. The bpf subprog callback_fn won't be able to access 't',
1479263ae152SJoanne Koong * since timer->timer = NULL was already done. The timer will be
1480263ae152SJoanne Koong * effectively cancelled because bpf_timer_cb() will return
1481263ae152SJoanne Koong * HRTIMER_NORESTART.
1482263ae152SJoanne Koong */
1483263ae152SJoanne Koong if (this_cpu_read(hrtimer_running) != t)
1484263ae152SJoanne Koong hrtimer_cancel(&t->timer);
1485263ae152SJoanne Koong kfree_rcu(t, cb.rcu);
148627060531SKumar Kartikeya Dwivedi }
1487f8d3da4eSJoanne Koong
BPF_CALL_2(bpf_kptr_xchg,void *,map_value,void *,ptr)148813bbbfbeSJoanne Koong BPF_CALL_2(bpf_kptr_xchg, void *, map_value, void *, ptr)
148913bbbfbeSJoanne Koong {
149013bbbfbeSJoanne Koong unsigned long *kptr = map_value;
1491f8d3da4eSJoanne Koong
149213bbbfbeSJoanne Koong return xchg(kptr, (unsigned long)ptr);
149313bbbfbeSJoanne Koong }
149413bbbfbeSJoanne Koong
149513bbbfbeSJoanne Koong /* Unlike other PTR_TO_BTF_ID helpers the btf_id in bpf_kptr_xchg()
149613bbbfbeSJoanne Koong * helper is determined dynamically by the verifier. Use BPF_PTR_POISON to
149713bbbfbeSJoanne Koong * denote type that verifier will determine.
1498*76d16077SKumar Kartikeya Dwivedi */
1499*76d16077SKumar Kartikeya Dwivedi static const struct bpf_func_proto bpf_kptr_xchg_proto = {
1500*76d16077SKumar Kartikeya Dwivedi .func = bpf_kptr_xchg,
1501*76d16077SKumar Kartikeya Dwivedi .gpl_only = false,
1502*76d16077SKumar Kartikeya Dwivedi .ret_type = RET_PTR_TO_BTF_ID_OR_NULL,
150313bbbfbeSJoanne Koong .ret_btf_id = BPF_PTR_POISON,
150413bbbfbeSJoanne Koong .arg1_type = ARG_PTR_TO_KPTR,
150513bbbfbeSJoanne Koong .arg2_type = ARG_PTR_TO_BTF_ID_OR_NULL | OBJ_RELEASE,
150613bbbfbeSJoanne Koong .arg2_btf_id = BPF_PTR_POISON,
1507dc368e1cSJoanne Koong };
150813bbbfbeSJoanne Koong
150913bbbfbeSJoanne Koong /* Since the upper 8 bits of dynptr->size is reserved, the
151013bbbfbeSJoanne Koong * maximum supported size is 2^24 - 1.
151113bbbfbeSJoanne Koong */
151213bbbfbeSJoanne Koong #define DYNPTR_MAX_SIZE ((1UL << 24) - 1)
151327060531SKumar Kartikeya Dwivedi #define DYNPTR_TYPE_SHIFT 28
151413bbbfbeSJoanne Koong #define DYNPTR_SIZE_MASK 0xFFFFFF
1515f8d3da4eSJoanne Koong #define DYNPTR_RDONLY_BIT BIT(31)
151613bbbfbeSJoanne Koong
__bpf_dynptr_is_rdonly(const struct bpf_dynptr_kern * ptr)151713bbbfbeSJoanne Koong static bool __bpf_dynptr_is_rdonly(const struct bpf_dynptr_kern *ptr)
151827060531SKumar Kartikeya Dwivedi {
1519f8d3da4eSJoanne Koong return ptr->size & DYNPTR_RDONLY_BIT;
152013bbbfbeSJoanne Koong }
152113bbbfbeSJoanne Koong
bpf_dynptr_set_rdonly(struct bpf_dynptr_kern * ptr)152213bbbfbeSJoanne Koong void bpf_dynptr_set_rdonly(struct bpf_dynptr_kern *ptr)
1523f8d3da4eSJoanne Koong {
152413bbbfbeSJoanne Koong ptr->size |= DYNPTR_RDONLY_BIT;
152513bbbfbeSJoanne Koong }
152613bbbfbeSJoanne Koong
bpf_dynptr_set_type(struct bpf_dynptr_kern * ptr,enum bpf_dynptr_type type)152713bbbfbeSJoanne Koong static void bpf_dynptr_set_type(struct bpf_dynptr_kern *ptr, enum bpf_dynptr_type type)
152813bbbfbeSJoanne Koong {
152913bbbfbeSJoanne Koong ptr->size |= type << DYNPTR_TYPE_SHIFT;
1530*76d16077SKumar Kartikeya Dwivedi }
1531*76d16077SKumar Kartikeya Dwivedi
bpf_dynptr_get_type(const struct bpf_dynptr_kern * ptr)1532*76d16077SKumar Kartikeya Dwivedi static enum bpf_dynptr_type bpf_dynptr_get_type(const struct bpf_dynptr_kern *ptr)
1533*76d16077SKumar Kartikeya Dwivedi {
1534*76d16077SKumar Kartikeya Dwivedi return (ptr->size & ~(DYNPTR_RDONLY_BIT)) >> DYNPTR_TYPE_SHIFT;
153513bbbfbeSJoanne Koong }
153613bbbfbeSJoanne Koong
__bpf_dynptr_size(const struct bpf_dynptr_kern * ptr)153713bbbfbeSJoanne Koong u32 __bpf_dynptr_size(const struct bpf_dynptr_kern *ptr)
153813bbbfbeSJoanne Koong {
1539dc368e1cSJoanne Koong return ptr->size & DYNPTR_SIZE_MASK;
154013bbbfbeSJoanne Koong }
154113bbbfbeSJoanne Koong
bpf_dynptr_set_size(struct bpf_dynptr_kern * ptr,u32 new_size)154213bbbfbeSJoanne Koong static void bpf_dynptr_set_size(struct bpf_dynptr_kern *ptr, u32 new_size)
154327060531SKumar Kartikeya Dwivedi {
154413bbbfbeSJoanne Koong u32 metadata = ptr->size & ~DYNPTR_SIZE_MASK;
154513bbbfbeSJoanne Koong
154613bbbfbeSJoanne Koong ptr->size = new_size | metadata;
1547f8d3da4eSJoanne Koong }
154813bbbfbeSJoanne Koong
bpf_dynptr_check_size(u32 size)154913bbbfbeSJoanne Koong int bpf_dynptr_check_size(u32 size)
155027060531SKumar Kartikeya Dwivedi {
155134d4ef57SJoanne Koong return size > DYNPTR_MAX_SIZE ? -E2BIG : 0;
155234d4ef57SJoanne Koong }
155334d4ef57SJoanne Koong
bpf_dynptr_init(struct bpf_dynptr_kern * ptr,void * data,enum bpf_dynptr_type type,u32 offset,u32 size)155434d4ef57SJoanne Koong void bpf_dynptr_init(struct bpf_dynptr_kern *ptr, void *data,
155534d4ef57SJoanne Koong enum bpf_dynptr_type type, u32 offset, u32 size)
155634d4ef57SJoanne Koong {
155734d4ef57SJoanne Koong ptr->data = data;
155834d4ef57SJoanne Koong ptr->offset = offset;
155934d4ef57SJoanne Koong ptr->size = size;
156034d4ef57SJoanne Koong bpf_dynptr_set_type(ptr, type);
156134d4ef57SJoanne Koong }
156234d4ef57SJoanne Koong
bpf_dynptr_set_null(struct bpf_dynptr_kern * ptr)156334d4ef57SJoanne Koong void bpf_dynptr_set_null(struct bpf_dynptr_kern *ptr)
156434d4ef57SJoanne Koong {
156534d4ef57SJoanne Koong memset(ptr, 0, sizeof(*ptr));
156634d4ef57SJoanne Koong }
1567dc368e1cSJoanne Koong
bpf_dynptr_check_off_len(const struct bpf_dynptr_kern * ptr,u32 offset,u32 len)156834d4ef57SJoanne Koong static int bpf_dynptr_check_off_len(const struct bpf_dynptr_kern *ptr, u32 offset, u32 len)
156934d4ef57SJoanne Koong {
157034d4ef57SJoanne Koong u32 size = __bpf_dynptr_size(ptr);
157127060531SKumar Kartikeya Dwivedi
157234d4ef57SJoanne Koong if (len > size || offset > size - len)
157334d4ef57SJoanne Koong return -E2BIG;
157434d4ef57SJoanne Koong
157534d4ef57SJoanne Koong return 0;
1576f470378cSJohn Fastabend }
1577a396eda5SDaniel Xu
BPF_CALL_4(bpf_dynptr_from_mem,void *,data,u32,size,u64,flags,struct bpf_dynptr_kern *,ptr)1578f470378cSJohn Fastabend BPF_CALL_4(bpf_dynptr_from_mem, void *, data, u32, size, u64, flags, struct bpf_dynptr_kern *, ptr)
1579f470378cSJohn Fastabend {
1580f470378cSJohn Fastabend int err;
1581f470378cSJohn Fastabend
1582dd6e10fbSDaniel Xu BTF_TYPE_EMIT(struct bpf_dynptr);
1583f470378cSJohn Fastabend
15846890896bSStanislav Fomichev err = bpf_dynptr_check_size(size);
15856890896bSStanislav Fomichev if (err)
15866890896bSStanislav Fomichev goto error;
15876890896bSStanislav Fomichev
15886890896bSStanislav Fomichev /* flags is currently unsupported */
15896890896bSStanislav Fomichev if (flags) {
15906890896bSStanislav Fomichev err = -EINVAL;
15916890896bSStanislav Fomichev goto error;
15926890896bSStanislav Fomichev }
15936890896bSStanislav Fomichev
15946890896bSStanislav Fomichev bpf_dynptr_init(ptr, data, BPF_DYNPTR_TYPE_LOCAL, 0, size);
15956890896bSStanislav Fomichev
15966890896bSStanislav Fomichev return 0;
15976890896bSStanislav Fomichev
15986890896bSStanislav Fomichev error:
15996890896bSStanislav Fomichev bpf_dynptr_set_null(ptr);
160007343110SFeng Zhou return err;
160107343110SFeng Zhou }
16026890896bSStanislav Fomichev
16036890896bSStanislav Fomichev static const struct bpf_func_proto bpf_dynptr_from_mem_proto = {
16046890896bSStanislav Fomichev .func = bpf_dynptr_from_mem,
16056890896bSStanislav Fomichev .gpl_only = false,
16066890896bSStanislav Fomichev .ret_type = RET_INTEGER,
16076890896bSStanislav Fomichev .arg1_type = ARG_PTR_TO_UNINIT_MEM,
16086890896bSStanislav Fomichev .arg2_type = ARG_CONST_SIZE_OR_ZERO,
16096890896bSStanislav Fomichev .arg3_type = ARG_ANYTHING,
16106890896bSStanislav Fomichev .arg4_type = ARG_PTR_TO_DYNPTR | DYNPTR_TYPE_LOCAL | MEM_UNINIT | MEM_WRITE,
16116890896bSStanislav Fomichev };
161271d19214SMaciej Żenczykowski
BPF_CALL_5(bpf_dynptr_read,void *,dst,u32,len,const struct bpf_dynptr_kern *,src,u32,offset,u64,flags)161371d19214SMaciej Żenczykowski BPF_CALL_5(bpf_dynptr_read, void *, dst, u32, len, const struct bpf_dynptr_kern *, src,
1614c8996c98SJesper Dangaard Brouer u32, offset, u64, flags)
1615c8996c98SJesper Dangaard Brouer {
1616457f4436SAndrii Nakryiko enum bpf_dynptr_type type;
1617457f4436SAndrii Nakryiko int err;
1618457f4436SAndrii Nakryiko
1619457f4436SAndrii Nakryiko if (!src->data || flags)
1620457f4436SAndrii Nakryiko return -EINVAL;
1621457f4436SAndrii Nakryiko
1622457f4436SAndrii Nakryiko err = bpf_dynptr_check_off_len(src, offset, len);
1623457f4436SAndrii Nakryiko if (err)
1624457f4436SAndrii Nakryiko return err;
1625457f4436SAndrii Nakryiko
1626c5fb1993SHou Tao type = bpf_dynptr_get_type(src);
1627c5fb1993SHou Tao
16288a67f2deSStanislav Fomichev switch (type) {
16298a67f2deSStanislav Fomichev case BPF_DYNPTR_TYPE_LOCAL:
16308a67f2deSStanislav Fomichev case BPF_DYNPTR_TYPE_RINGBUF:
16318a67f2deSStanislav Fomichev /* Source and destination may possibly overlap, hence use memmove to
16326890896bSStanislav Fomichev * copy the data. E.g. bpf_dynptr_from_mem may create two dynptr
16336890896bSStanislav Fomichev * pointing to overlapping PTR_TO_MAP_VALUE regions.
16346890896bSStanislav Fomichev */
16356890896bSStanislav Fomichev memmove(dst, src->data + src->offset + offset, len);
16362c78ee89SAlexei Starovoitov return 0;
16376890896bSStanislav Fomichev case BPF_DYNPTR_TYPE_SKB:
16386890896bSStanislav Fomichev return __bpf_skb_load_bytes(src->data, src->offset + offset, dst, len);
16396890896bSStanislav Fomichev case BPF_DYNPTR_TYPE_XDP:
16406890896bSStanislav Fomichev return __bpf_xdp_load_bytes(src->data, src->offset + offset, dst, len);
16416890896bSStanislav Fomichev default:
16426890896bSStanislav Fomichev WARN_ONCE(true, "bpf_dynptr_read: unknown dynptr type %d\n", type);
16436890896bSStanislav Fomichev return -EFAULT;
16446890896bSStanislav Fomichev }
16456890896bSStanislav Fomichev }
1646b7906b70SAndrii Nakryiko
1647eaa6bcb7SHao Luo static const struct bpf_func_proto bpf_dynptr_read_proto = {
1648b7906b70SAndrii Nakryiko .func = bpf_dynptr_read,
164963d9b80dSHao Luo .gpl_only = false,
1650b00628b1SAlexei Starovoitov .ret_type = RET_INTEGER,
1651b00628b1SAlexei Starovoitov .arg1_type = ARG_PTR_TO_UNINIT_MEM,
1652b00628b1SAlexei Starovoitov .arg2_type = ARG_CONST_SIZE_OR_ZERO,
1653b00628b1SAlexei Starovoitov .arg3_type = ARG_PTR_TO_DYNPTR | MEM_RDONLY,
1654b00628b1SAlexei Starovoitov .arg4_type = ARG_ANYTHING,
1655b00628b1SAlexei Starovoitov .arg5_type = ARG_ANYTHING,
1656b00628b1SAlexei Starovoitov };
1657b00628b1SAlexei Starovoitov
BPF_CALL_5(bpf_dynptr_write,const struct bpf_dynptr_kern *,dst,u32,offset,void *,src,u32,len,u64,flags)1658c0a5a21cSKumar Kartikeya Dwivedi BPF_CALL_5(bpf_dynptr_write, const struct bpf_dynptr_kern *, dst, u32, offset, void *, src,
1659c0a5a21cSKumar Kartikeya Dwivedi u32, len, u64, flags)
16605679ff2fSKumar Kartikeya Dwivedi {
16615679ff2fSKumar Kartikeya Dwivedi enum bpf_dynptr_type type;
16625679ff2fSKumar Kartikeya Dwivedi int err;
16635679ff2fSKumar Kartikeya Dwivedi
166420571567SDavid Vernet if (!dst->data || __bpf_dynptr_is_rdonly(dst))
166520571567SDavid Vernet return -EINVAL;
16668addbfc7SKumar Kartikeya Dwivedi
16678addbfc7SKumar Kartikeya Dwivedi err = bpf_dynptr_check_off_len(dst, offset, len);
16688addbfc7SKumar Kartikeya Dwivedi if (err)
16698addbfc7SKumar Kartikeya Dwivedi return err;
16708addbfc7SKumar Kartikeya Dwivedi
16718addbfc7SKumar Kartikeya Dwivedi type = bpf_dynptr_get_type(dst);
16728addbfc7SKumar Kartikeya Dwivedi
16738addbfc7SKumar Kartikeya Dwivedi switch (type) {
16748addbfc7SKumar Kartikeya Dwivedi case BPF_DYNPTR_TYPE_LOCAL:
16758addbfc7SKumar Kartikeya Dwivedi case BPF_DYNPTR_TYPE_RINGBUF:
16768addbfc7SKumar Kartikeya Dwivedi if (flags)
16778addbfc7SKumar Kartikeya Dwivedi return -EINVAL;
16788addbfc7SKumar Kartikeya Dwivedi /* Source and destination may possibly overlap, hence use memmove to
16798addbfc7SKumar Kartikeya Dwivedi * copy the data. E.g. bpf_dynptr_from_mem may create two dynptr
1680c4bcfb38SYonghong Song * pointing to overlapping PTR_TO_MAP_VALUE regions.
1681c4bcfb38SYonghong Song */
1682c4bcfb38SYonghong Song memmove(dst->data + dst->offset + offset, src, len);
1683c4bcfb38SYonghong Song return 0;
1684c4bcfb38SYonghong Song case BPF_DYNPTR_TYPE_SKB:
1685c4bcfb38SYonghong Song return __bpf_skb_store_bytes(dst->data, dst->offset + offset, src, len,
16866890896bSStanislav Fomichev flags);
1687f470378cSJohn Fastabend case BPF_DYNPTR_TYPE_XDP:
1688f470378cSJohn Fastabend if (flags)
1689f470378cSJohn Fastabend return -EINVAL;
1690f470378cSJohn Fastabend return __bpf_xdp_store_bytes(dst->data, dst->offset + offset, src, len);
1691f470378cSJohn Fastabend default:
1692f470378cSJohn Fastabend WARN_ONCE(true, "bpf_dynptr_write: unknown dynptr type %d\n", type);
1693f470378cSJohn Fastabend return -EFAULT;
169461ca36c8STobias Klauser }
169561ca36c8STobias Klauser }
1696f470378cSJohn Fastabend
1697f470378cSJohn Fastabend static const struct bpf_func_proto bpf_dynptr_write_proto = {
1698a396eda5SDaniel Xu .func = bpf_dynptr_write,
1699a396eda5SDaniel Xu .gpl_only = false,
1700f470378cSJohn Fastabend .ret_type = RET_INTEGER,
1701f470378cSJohn Fastabend .arg1_type = ARG_PTR_TO_DYNPTR | MEM_RDONLY,
1702f470378cSJohn Fastabend .arg2_type = ARG_ANYTHING,
170371330842SDaniel Borkmann .arg3_type = ARG_PTR_TO_MEM | MEM_RDONLY,
1704ff40e510SDaniel Borkmann .arg4_type = ARG_CONST_SIZE_OR_ZERO,
1705f470378cSJohn Fastabend .arg5_type = ARG_ANYTHING,
1706f470378cSJohn Fastabend };
1707f470378cSJohn Fastabend
BPF_CALL_3(bpf_dynptr_data,const struct bpf_dynptr_kern *,ptr,u32,offset,u32,len)170871330842SDaniel Borkmann BPF_CALL_3(bpf_dynptr_data, const struct bpf_dynptr_kern *, ptr, u32, offset, u32, len)
1709ff40e510SDaniel Borkmann {
171061ca36c8STobias Klauser enum bpf_dynptr_type type;
171161ca36c8STobias Klauser int err;
17127b15523aSFlorent Revest
17137b15523aSFlorent Revest if (!ptr->data)
1714dd6e10fbSDaniel Xu return 0;
1715dd6e10fbSDaniel Xu
171610aceb62SDave Marchevsky err = bpf_dynptr_check_off_len(ptr, offset, len);
171710aceb62SDave Marchevsky if (err)
1718f470378cSJohn Fastabend return 0;
17196890896bSStanislav Fomichev
17206890896bSStanislav Fomichev if (__bpf_dynptr_is_rdonly(ptr))
17216890896bSStanislav Fomichev return 0;
172213379059SArtem Savkov
1723f0c5941fSKumar Kartikeya Dwivedi type = bpf_dynptr_get_type(ptr);
1724f0c5941fSKumar Kartikeya Dwivedi
1725f0c5941fSKumar Kartikeya Dwivedi switch (type) {
1726f0c5941fSKumar Kartikeya Dwivedi case BPF_DYNPTR_TYPE_LOCAL:
1727f0c5941fSKumar Kartikeya Dwivedi case BPF_DYNPTR_TYPE_RINGBUF:
1728f0c5941fSKumar Kartikeya Dwivedi return (unsigned long)(ptr->data + ptr->offset + offset);
1729f0c5941fSKumar Kartikeya Dwivedi case BPF_DYNPTR_TYPE_SKB:
1730f0c5941fSKumar Kartikeya Dwivedi case BPF_DYNPTR_TYPE_XDP:
1731f0c5941fSKumar Kartikeya Dwivedi /* skb and xdp dynptrs should use bpf_dynptr_slice / bpf_dynptr_slice_rdwr */
1732f0c5941fSKumar Kartikeya Dwivedi return 0;
1733f0c5941fSKumar Kartikeya Dwivedi default:
1734f0c5941fSKumar Kartikeya Dwivedi WARN_ONCE(true, "bpf_dynptr_data: unknown dynptr type %d\n", type);
1735f0c5941fSKumar Kartikeya Dwivedi return 0;
1736f0c5941fSKumar Kartikeya Dwivedi }
1737f0c5941fSKumar Kartikeya Dwivedi }
1738f0c5941fSKumar Kartikeya Dwivedi
1739f0c5941fSKumar Kartikeya Dwivedi static const struct bpf_func_proto bpf_dynptr_data_proto = {
1740f0c5941fSKumar Kartikeya Dwivedi .func = bpf_dynptr_data,
1741f0c5941fSKumar Kartikeya Dwivedi .gpl_only = false,
1742f0c5941fSKumar Kartikeya Dwivedi .ret_type = RET_PTR_TO_DYNPTR_MEM_OR_NULL,
1743f0c5941fSKumar Kartikeya Dwivedi .arg1_type = ARG_PTR_TO_DYNPTR | MEM_RDONLY,
1744f0c5941fSKumar Kartikeya Dwivedi .arg2_type = ARG_ANYTHING,
1745f0c5941fSKumar Kartikeya Dwivedi .arg3_type = ARG_CONST_ALLOC_SIZE_OR_ZERO,
1746f0c5941fSKumar Kartikeya Dwivedi };
1747f0c5941fSKumar Kartikeya Dwivedi
1748f0c5941fSKumar Kartikeya Dwivedi const struct bpf_func_proto bpf_get_current_task_proto __weak;
1749f0c5941fSKumar Kartikeya Dwivedi const struct bpf_func_proto bpf_get_current_task_btf_proto __weak;
1750958cf2e2SKumar Kartikeya Dwivedi const struct bpf_func_proto bpf_probe_read_user_proto __weak;
1751958cf2e2SKumar Kartikeya Dwivedi const struct bpf_func_proto bpf_probe_read_user_str_proto __weak;
1752958cf2e2SKumar Kartikeya Dwivedi const struct bpf_func_proto bpf_probe_read_kernel_proto __weak;
1753958cf2e2SKumar Kartikeya Dwivedi const struct bpf_func_proto bpf_probe_read_kernel_str_proto __weak;
1754958cf2e2SKumar Kartikeya Dwivedi const struct bpf_func_proto bpf_task_pt_regs_proto __weak;
1755958cf2e2SKumar Kartikeya Dwivedi
1756958cf2e2SKumar Kartikeya Dwivedi const struct bpf_func_proto *
bpf_base_func_proto(enum bpf_func_id func_id)1757958cf2e2SKumar Kartikeya Dwivedi bpf_base_func_proto(enum bpf_func_id func_id)
1758958cf2e2SKumar Kartikeya Dwivedi {
1759958cf2e2SKumar Kartikeya Dwivedi switch (func_id) {
1760958cf2e2SKumar Kartikeya Dwivedi case BPF_FUNC_map_lookup_elem:
1761f0c5941fSKumar Kartikeya Dwivedi return &bpf_map_lookup_elem_proto;
1762f0c5941fSKumar Kartikeya Dwivedi case BPF_FUNC_map_update_elem:
1763f0c5941fSKumar Kartikeya Dwivedi return &bpf_map_update_elem_proto;
1764958cf2e2SKumar Kartikeya Dwivedi case BPF_FUNC_map_delete_elem:
1765958cf2e2SKumar Kartikeya Dwivedi return &bpf_map_delete_elem_proto;
1766958cf2e2SKumar Kartikeya Dwivedi case BPF_FUNC_map_push_elem:
1767958cf2e2SKumar Kartikeya Dwivedi return &bpf_map_push_elem_proto;
1768958cf2e2SKumar Kartikeya Dwivedi case BPF_FUNC_map_pop_elem:
1769958cf2e2SKumar Kartikeya Dwivedi return &bpf_map_pop_elem_proto;
1770958cf2e2SKumar Kartikeya Dwivedi case BPF_FUNC_map_peek_elem:
1771958cf2e2SKumar Kartikeya Dwivedi return &bpf_map_peek_elem_proto;
1772958cf2e2SKumar Kartikeya Dwivedi case BPF_FUNC_map_lookup_percpu_elem:
1773958cf2e2SKumar Kartikeya Dwivedi return &bpf_map_lookup_percpu_elem_proto;
1774958cf2e2SKumar Kartikeya Dwivedi case BPF_FUNC_get_prandom_u32:
1775958cf2e2SKumar Kartikeya Dwivedi return &bpf_get_prandom_u32_proto;
1776958cf2e2SKumar Kartikeya Dwivedi case BPF_FUNC_get_smp_processor_id:
1777958cf2e2SKumar Kartikeya Dwivedi return &bpf_get_raw_smp_processor_id_proto;
1778958cf2e2SKumar Kartikeya Dwivedi case BPF_FUNC_get_numa_node_id:
1779958cf2e2SKumar Kartikeya Dwivedi return &bpf_get_numa_node_id_proto;
1780958cf2e2SKumar Kartikeya Dwivedi case BPF_FUNC_tail_call:
1781958cf2e2SKumar Kartikeya Dwivedi return &bpf_tail_call_proto;
1782ac9f0605SKumar Kartikeya Dwivedi case BPF_FUNC_ktime_get_ns:
1783ac9f0605SKumar Kartikeya Dwivedi return &bpf_ktime_get_ns_proto;
1784ac9f0605SKumar Kartikeya Dwivedi case BPF_FUNC_ktime_get_boot_ns:
1785ac9f0605SKumar Kartikeya Dwivedi return &bpf_ktime_get_boot_ns_proto;
1786ac9f0605SKumar Kartikeya Dwivedi case BPF_FUNC_ktime_get_tai_ns:
1787ac9f0605SKumar Kartikeya Dwivedi return &bpf_ktime_get_tai_ns_proto;
1788ac9f0605SKumar Kartikeya Dwivedi case BPF_FUNC_ringbuf_output:
1789ac9f0605SKumar Kartikeya Dwivedi return &bpf_ringbuf_output_proto;
1790ac9f0605SKumar Kartikeya Dwivedi case BPF_FUNC_ringbuf_reserve:
1791ac9f0605SKumar Kartikeya Dwivedi return &bpf_ringbuf_reserve_proto;
17928cab76ecSKumar Kartikeya Dwivedi case BPF_FUNC_ringbuf_submit:
17938cab76ecSKumar Kartikeya Dwivedi return &bpf_ringbuf_submit_proto;
17948cab76ecSKumar Kartikeya Dwivedi case BPF_FUNC_ringbuf_discard:
17958cab76ecSKumar Kartikeya Dwivedi return &bpf_ringbuf_discard_proto;
17968cab76ecSKumar Kartikeya Dwivedi case BPF_FUNC_ringbuf_query:
17978cab76ecSKumar Kartikeya Dwivedi return &bpf_ringbuf_query_proto;
17988cab76ecSKumar Kartikeya Dwivedi case BPF_FUNC_strncmp:
17998cab76ecSKumar Kartikeya Dwivedi return &bpf_strncmp_proto;
18008cab76ecSKumar Kartikeya Dwivedi case BPF_FUNC_strtol:
18018cab76ecSKumar Kartikeya Dwivedi return &bpf_strtol_proto;
18028cab76ecSKumar Kartikeya Dwivedi case BPF_FUNC_strtoul:
18038cab76ecSKumar Kartikeya Dwivedi return &bpf_strtoul_proto;
18048cab76ecSKumar Kartikeya Dwivedi default:
18058cab76ecSKumar Kartikeya Dwivedi break;
18068cab76ecSKumar Kartikeya Dwivedi }
18078cab76ecSKumar Kartikeya Dwivedi
18088cab76ecSKumar Kartikeya Dwivedi if (!bpf_capable())
18098cab76ecSKumar Kartikeya Dwivedi return NULL;
18108cab76ecSKumar Kartikeya Dwivedi
18118cab76ecSKumar Kartikeya Dwivedi switch (func_id) {
18128cab76ecSKumar Kartikeya Dwivedi case BPF_FUNC_spin_lock:
18138cab76ecSKumar Kartikeya Dwivedi return &bpf_spin_lock_proto;
18148cab76ecSKumar Kartikeya Dwivedi case BPF_FUNC_spin_unlock:
18158cab76ecSKumar Kartikeya Dwivedi return &bpf_spin_unlock_proto;
18168cab76ecSKumar Kartikeya Dwivedi case BPF_FUNC_jiffies64:
18178cab76ecSKumar Kartikeya Dwivedi return &bpf_jiffies64_proto;
18188cab76ecSKumar Kartikeya Dwivedi case BPF_FUNC_per_cpu_ptr:
18198cab76ecSKumar Kartikeya Dwivedi return &bpf_per_cpu_ptr_proto;
18208cab76ecSKumar Kartikeya Dwivedi case BPF_FUNC_this_cpu_ptr:
18218cab76ecSKumar Kartikeya Dwivedi return &bpf_this_cpu_ptr_proto;
18228cab76ecSKumar Kartikeya Dwivedi case BPF_FUNC_timer_init:
18238cab76ecSKumar Kartikeya Dwivedi return &bpf_timer_init_proto;
18248cab76ecSKumar Kartikeya Dwivedi case BPF_FUNC_timer_set_callback:
18258cab76ecSKumar Kartikeya Dwivedi return &bpf_timer_set_callback_proto;
18268cab76ecSKumar Kartikeya Dwivedi case BPF_FUNC_timer_start:
18278cab76ecSKumar Kartikeya Dwivedi return &bpf_timer_start_proto;
18288cab76ecSKumar Kartikeya Dwivedi case BPF_FUNC_timer_cancel:
18298cab76ecSKumar Kartikeya Dwivedi return &bpf_timer_cancel_proto;
18308cab76ecSKumar Kartikeya Dwivedi case BPF_FUNC_kptr_xchg:
18318cab76ecSKumar Kartikeya Dwivedi return &bpf_kptr_xchg_proto;
18328cab76ecSKumar Kartikeya Dwivedi case BPF_FUNC_for_each_map_elem:
18338cab76ecSKumar Kartikeya Dwivedi return &bpf_for_each_map_elem_proto;
18348cab76ecSKumar Kartikeya Dwivedi case BPF_FUNC_loop:
18358cab76ecSKumar Kartikeya Dwivedi return &bpf_loop_proto;
183690660309SDavid Vernet case BPF_FUNC_user_ringbuf_drain:
183790660309SDavid Vernet return &bpf_user_ringbuf_drain_proto;
183890660309SDavid Vernet case BPF_FUNC_ringbuf_reserve_dynptr:
183990660309SDavid Vernet return &bpf_ringbuf_reserve_dynptr_proto;
184090660309SDavid Vernet case BPF_FUNC_ringbuf_submit_dynptr:
184190660309SDavid Vernet return &bpf_ringbuf_submit_dynptr_proto;
184290660309SDavid Vernet case BPF_FUNC_ringbuf_discard_dynptr:
184390660309SDavid Vernet return &bpf_ringbuf_discard_dynptr_proto;
1844156ed20dSDavid Vernet case BPF_FUNC_dynptr_from_mem:
184590660309SDavid Vernet return &bpf_dynptr_from_mem_proto;
184690660309SDavid Vernet case BPF_FUNC_dynptr_read:
184790660309SDavid Vernet return &bpf_dynptr_read_proto;
1848fca1aa75SYonghong Song case BPF_FUNC_dynptr_write:
1849fca1aa75SYonghong Song return &bpf_dynptr_write_proto;
1850fca1aa75SYonghong Song case BPF_FUNC_dynptr_data:
1851fca1aa75SYonghong Song return &bpf_dynptr_data_proto;
1852fca1aa75SYonghong Song #ifdef CONFIG_CGROUPS
1853fca1aa75SYonghong Song case BPF_FUNC_cgrp_storage_get:
1854fca1aa75SYonghong Song return &bpf_cgrp_storage_get_proto;
1855156ed20dSDavid Vernet case BPF_FUNC_cgrp_storage_delete:
1856156ed20dSDavid Vernet return &bpf_cgrp_storage_delete_proto;
1857156ed20dSDavid Vernet case BPF_FUNC_get_current_cgroup_id:
1858156ed20dSDavid Vernet return &bpf_get_current_cgroup_id_proto;
1859156ed20dSDavid Vernet case BPF_FUNC_get_current_ancestor_cgroup_id:
1860156ed20dSDavid Vernet return &bpf_get_current_ancestor_cgroup_id_proto;
1861156ed20dSDavid Vernet #endif
1862156ed20dSDavid Vernet default:
1863156ed20dSDavid Vernet break;
1864156ed20dSDavid Vernet }
1865156ed20dSDavid Vernet
1866156ed20dSDavid Vernet if (!perfmon_capable())
1867156ed20dSDavid Vernet return NULL;
1868156ed20dSDavid Vernet
1869156ed20dSDavid Vernet switch (func_id) {
1870156ed20dSDavid Vernet case BPF_FUNC_trace_printk:
1871156ed20dSDavid Vernet return bpf_get_trace_printk_proto();
1872156ed20dSDavid Vernet case BPF_FUNC_get_current_task:
1873156ed20dSDavid Vernet return &bpf_get_current_task_proto;
1874156ed20dSDavid Vernet case BPF_FUNC_get_current_task_btf:
1875156ed20dSDavid Vernet return &bpf_get_current_task_btf_proto;
1876156ed20dSDavid Vernet case BPF_FUNC_probe_read_user:
1877156ed20dSDavid Vernet return &bpf_probe_read_user_proto;
1878156ed20dSDavid Vernet case BPF_FUNC_probe_read_kernel:
1879156ed20dSDavid Vernet return security_locked_down(LOCKDOWN_BPF_READ_KERNEL) < 0 ?
1880156ed20dSDavid Vernet NULL : &bpf_probe_read_kernel_proto;
1881156ed20dSDavid Vernet case BPF_FUNC_probe_read_user_str:
1882156ed20dSDavid Vernet return &bpf_probe_read_user_str_proto;
1883156ed20dSDavid Vernet case BPF_FUNC_probe_read_kernel_str:
1884156ed20dSDavid Vernet return security_locked_down(LOCKDOWN_BPF_READ_KERNEL) < 0 ?
1885156ed20dSDavid Vernet NULL : &bpf_probe_read_kernel_str_proto;
1886156ed20dSDavid Vernet case BPF_FUNC_snprintf_btf:
1887156ed20dSDavid Vernet return &bpf_snprintf_btf_proto;
1888156ed20dSDavid Vernet case BPF_FUNC_snprintf:
1889156ed20dSDavid Vernet return &bpf_snprintf_proto;
1890156ed20dSDavid Vernet case BPF_FUNC_task_pt_regs:
1891156ed20dSDavid Vernet return &bpf_task_pt_regs_proto;
1892156ed20dSDavid Vernet case BPF_FUNC_trace_vprintk:
1893156ed20dSDavid Vernet return bpf_get_trace_vprintk_proto();
1894156ed20dSDavid Vernet default:
1895156ed20dSDavid Vernet return NULL;
1896fca1aa75SYonghong Song }
1897fca1aa75SYonghong Song }
1898fca1aa75SYonghong Song
1899fca1aa75SYonghong Song void __bpf_obj_drop_impl(void *p, const struct btf_record *rec);
190090660309SDavid Vernet
bpf_list_head_free(const struct btf_field * field,void * list_head,struct bpf_spin_lock * spin_lock)190190660309SDavid Vernet void bpf_list_head_free(const struct btf_field *field, void *list_head,
190290660309SDavid Vernet struct bpf_spin_lock *spin_lock)
190390660309SDavid Vernet {
190490660309SDavid Vernet struct list_head *head = list_head, *orig_head = list_head;
190590660309SDavid Vernet
190690660309SDavid Vernet BUILD_BUG_ON(sizeof(struct list_head) > sizeof(struct bpf_list_head));
1907156ed20dSDavid Vernet BUILD_BUG_ON(__alignof__(struct list_head) > __alignof__(struct bpf_list_head));
1908156ed20dSDavid Vernet
1909156ed20dSDavid Vernet /* Do the actual list draining outside the lock to not hold the lock for
191090660309SDavid Vernet * too long, and also prevent deadlocks if tracing programs end up
1911156ed20dSDavid Vernet * executing on entry/exit of functions called inside the critical
191290660309SDavid Vernet * section, and end up doing map ops that call bpf_list_head_free for
191390660309SDavid Vernet * the same map value again.
191490660309SDavid Vernet */
191525c5e92dSDavid Vernet __bpf_spin_lock_irqsave(spin_lock);
191690660309SDavid Vernet if (!head->next || list_empty(head))
191790660309SDavid Vernet goto unlock;
191890660309SDavid Vernet head = head->next;
191990660309SDavid Vernet unlock:
192090660309SDavid Vernet INIT_LIST_HEAD(orig_head);
192190660309SDavid Vernet __bpf_spin_unlock_irqrestore(spin_lock);
192290660309SDavid Vernet
1923156ed20dSDavid Vernet while (head != orig_head) {
192490660309SDavid Vernet void *obj = head;
192590660309SDavid Vernet
1926fda01efcSDavid Vernet obj -= field->graph_root.node_offset;
1927fda01efcSDavid Vernet head = head->next;
1928fda01efcSDavid Vernet /* The contained type can also have resources, including a
1929fda01efcSDavid Vernet * bpf_list_head which needs to be freed.
1930fda01efcSDavid Vernet */
1931fda01efcSDavid Vernet migrate_disable();
1932fda01efcSDavid Vernet __bpf_obj_drop_impl(obj, field->graph_root.value_rec);
1933fda01efcSDavid Vernet migrate_enable();
1934fda01efcSDavid Vernet }
1935fda01efcSDavid Vernet }
1936fda01efcSDavid Vernet
1937fda01efcSDavid Vernet /* Like rbtree_postorder_for_each_entry_safe, but 'pos' and 'n' are
1938fda01efcSDavid Vernet * 'rb_node *', so field name of rb_node within containing struct is not
1939fda01efcSDavid Vernet * needed.
1940fda01efcSDavid Vernet *
1941fda01efcSDavid Vernet * Since bpf_rb_tree's node type has a corresponding struct btf_field with
1942fda01efcSDavid Vernet * graph_root.node_offset, it's not necessary to know field name
1943fda01efcSDavid Vernet * or type of node struct
1944fda01efcSDavid Vernet */
1945fda01efcSDavid Vernet #define bpf_rbtree_postorder_for_each_entry_safe(pos, n, root) \
1946fda01efcSDavid Vernet for (pos = rb_first_postorder(root); \
1947fda01efcSDavid Vernet pos && ({ n = rb_next_postorder(pos); 1; }); \
1948fda01efcSDavid Vernet pos = n)
1949fda01efcSDavid Vernet
bpf_rb_root_free(const struct btf_field * field,void * rb_root,struct bpf_spin_lock * spin_lock)1950fda01efcSDavid Vernet void bpf_rb_root_free(const struct btf_field *field, void *rb_root,
1951fda01efcSDavid Vernet struct bpf_spin_lock *spin_lock)
1952fda01efcSDavid Vernet {
1953fda01efcSDavid Vernet struct rb_root_cached orig_root, *root = rb_root;
1954fda01efcSDavid Vernet struct rb_node *pos, *n;
1955fda01efcSDavid Vernet void *obj;
1956fda01efcSDavid Vernet
1957fda01efcSDavid Vernet BUILD_BUG_ON(sizeof(struct rb_root_cached) > sizeof(struct bpf_rb_root));
1958fda01efcSDavid Vernet BUILD_BUG_ON(__alignof__(struct rb_root_cached) > __alignof__(struct bpf_rb_root));
1959fda01efcSDavid Vernet
1960fda01efcSDavid Vernet __bpf_spin_lock_irqsave(spin_lock);
1961fda01efcSDavid Vernet orig_root = *root;
1962fda01efcSDavid Vernet *root = RB_ROOT_CACHED;
1963fda01efcSDavid Vernet __bpf_spin_unlock_irqrestore(spin_lock);
1964fda01efcSDavid Vernet
1965fda01efcSDavid Vernet bpf_rbtree_postorder_for_each_entry_safe(pos, n, &orig_root.rb_root) {
1966fda01efcSDavid Vernet obj = pos;
1967fda01efcSDavid Vernet obj -= field->graph_root.node_offset;
1968fda01efcSDavid Vernet
1969fda01efcSDavid Vernet
1970fda01efcSDavid Vernet migrate_disable();
197136aa10ffSDavid Vernet __bpf_obj_drop_impl(obj, field->graph_root.value_rec);
1972fda01efcSDavid Vernet migrate_enable();
1973fda01efcSDavid Vernet }
1974fda01efcSDavid Vernet }
1975fda01efcSDavid Vernet
1976fda01efcSDavid Vernet __diag_push();
1977fda01efcSDavid Vernet __diag_ignore_all("-Wmissing-prototypes",
1978fda01efcSDavid Vernet "Global functions as their definitions will be in vmlinux BTF");
1979fda01efcSDavid Vernet
bpf_obj_new_impl(u64 local_type_id__k,void * meta__ign)1980fda01efcSDavid Vernet __bpf_kfunc void *bpf_obj_new_impl(u64 local_type_id__k, void *meta__ign)
1981fda01efcSDavid Vernet {
1982fda01efcSDavid Vernet struct btf_struct_meta *meta = meta__ign;
1983fda01efcSDavid Vernet u64 size = local_type_id__k;
19845ca78670SDavid Vernet void *p;
19855ca78670SDavid Vernet
19865ca78670SDavid Vernet p = bpf_mem_alloc(&bpf_global_ma, size);
19875ca78670SDavid Vernet if (!p)
19885ca78670SDavid Vernet return NULL;
19895ca78670SDavid Vernet if (meta)
19905ca78670SDavid Vernet bpf_obj_init(meta->record, p);
19915ca78670SDavid Vernet return p;
19925ca78670SDavid Vernet }
19935ca78670SDavid Vernet
19945ca78670SDavid Vernet /* Must be called under migrate_disable(), as required by bpf_mem_free */
__bpf_obj_drop_impl(void * p,const struct btf_record * rec)19955ca78670SDavid Vernet void __bpf_obj_drop_impl(void *p, const struct btf_record *rec)
19965ca78670SDavid Vernet {
19975ca78670SDavid Vernet if (rec && rec->refcount_off >= 0 &&
19985ca78670SDavid Vernet !refcount_dec_and_test((refcount_t *)(p + rec->refcount_off))) {
19995ca78670SDavid Vernet /* Object is refcounted and refcount_dec didn't result in 0
20005ca78670SDavid Vernet * refcount. Return without freeing the object
20015ca78670SDavid Vernet */
20025ca78670SDavid Vernet return;
2003fda01efcSDavid Vernet }
2004fda01efcSDavid Vernet
20053f0e6f2bSDavid Vernet if (rec)
20063f0e6f2bSDavid Vernet bpf_obj_free_fields(rec, p);
20073f0e6f2bSDavid Vernet
20083f0e6f2bSDavid Vernet if (rec && rec->refcount_off >= 0)
20093f0e6f2bSDavid Vernet bpf_mem_free_rcu(&bpf_global_ma, p);
20103f0e6f2bSDavid Vernet else
20113f0e6f2bSDavid Vernet bpf_mem_free(&bpf_global_ma, p);
20123f0e6f2bSDavid Vernet }
20133f0e6f2bSDavid Vernet
bpf_obj_drop_impl(void * p__alloc,void * meta__ign)20143f0e6f2bSDavid Vernet __bpf_kfunc void bpf_obj_drop_impl(void *p__alloc, void *meta__ign)
20153f0e6f2bSDavid Vernet {
20163f0e6f2bSDavid Vernet struct btf_struct_meta *meta = meta__ign;
20173f0e6f2bSDavid Vernet void *p = p__alloc;
20183f0e6f2bSDavid Vernet
20193f0e6f2bSDavid Vernet __bpf_obj_drop_impl(p, meta ? meta->record : NULL);
20203f0e6f2bSDavid Vernet }
20213f0e6f2bSDavid Vernet
bpf_refcount_acquire_impl(void * p__refcounted_kptr,void * meta__ign)20223f0e6f2bSDavid Vernet __bpf_kfunc void *bpf_refcount_acquire_impl(void *p__refcounted_kptr, void *meta__ign)
20233f0e6f2bSDavid Vernet {
2024fd264ca0SYonghong Song struct btf_struct_meta *meta = meta__ign;
2025fd264ca0SYonghong Song struct bpf_refcount *ref;
2026fd264ca0SYonghong Song
2027fd264ca0SYonghong Song /* Could just cast directly to refcount_t *, but need some code using
2028fd264ca0SYonghong Song * bpf_refcount type so that it is emitted in vmlinux BTF
2029a35b9af4SYonghong Song */
2030a35b9af4SYonghong Song ref = (struct bpf_refcount *)(p__refcounted_kptr + meta->record->refcount_off);
2031a35b9af4SYonghong Song if (!refcount_inc_not_zero((refcount_t *)ref))
2032a35b9af4SYonghong Song return NULL;
2033a35b9af4SYonghong Song
20349bb00b28SYonghong Song /* Verifier strips KF_RET_NULL if input is owned ref, see is_kfunc_ret_null
20359bb00b28SYonghong Song * in verifier.c
20369bb00b28SYonghong Song */
20379bb00b28SYonghong Song return (void *)p__refcounted_kptr;
20389bb00b28SYonghong Song }
20399bb00b28SYonghong Song
__bpf_list_add(struct bpf_list_node_kern * node,struct bpf_list_head * head,bool tail,struct btf_record * rec,u64 off)20409bb00b28SYonghong Song static int __bpf_list_add(struct bpf_list_node_kern *node,
20419bb00b28SYonghong Song struct bpf_list_head *head,
20429bb00b28SYonghong Song bool tail, struct btf_record *rec, u64 off)
20439bb00b28SYonghong Song {
2044958cf2e2SKumar Kartikeya Dwivedi struct list_head *n = &node->list_head, *h = (void *)head;
2045958cf2e2SKumar Kartikeya Dwivedi
2046958cf2e2SKumar Kartikeya Dwivedi /* If list_head was 0-initialized by map, bpf_obj_init_field wasn't
204713379059SArtem Savkov * called on its fields, so init here
204813379059SArtem Savkov */
204913379059SArtem Savkov if (unlikely(!h->next))
2050958cf2e2SKumar Kartikeya Dwivedi INIT_LIST_HEAD(h);
2051ac9f0605SKumar Kartikeya Dwivedi
20528cab76ecSKumar Kartikeya Dwivedi /* node->owner != NULL implies !list_empty(n), no need to separately
20538cab76ecSKumar Kartikeya Dwivedi * check the latter
20548cab76ecSKumar Kartikeya Dwivedi */
20558cab76ecSKumar Kartikeya Dwivedi if (cmpxchg(&node->owner, NULL, BPF_PTR_POISON)) {
205690660309SDavid Vernet /* Only called from BPF prog, no need to migrate_disable */
2057fca1aa75SYonghong Song __bpf_obj_drop_impl((void *)n - off, rec);
205890660309SDavid Vernet return -EINVAL;
205990660309SDavid Vernet }
2060fda01efcSDavid Vernet
2061fda01efcSDavid Vernet tail ? list_add_tail(n, h) : list_add(n, h);
2062fda01efcSDavid Vernet WRITE_ONCE(node->owner, head);
2063fda01efcSDavid Vernet
20645ca78670SDavid Vernet return 0;
2065fda01efcSDavid Vernet }
20663f0e6f2bSDavid Vernet
bpf_list_push_front_impl(struct bpf_list_head * head,struct bpf_list_node * node,void * meta__ign,u64 off)2067958cf2e2SKumar Kartikeya Dwivedi __bpf_kfunc int bpf_list_push_front_impl(struct bpf_list_head *head,
206813379059SArtem Savkov struct bpf_list_node *node,
2069958cf2e2SKumar Kartikeya Dwivedi void *meta__ign, u64 off)
207013379059SArtem Savkov {
2071958cf2e2SKumar Kartikeya Dwivedi struct bpf_list_node_kern *n = (void *)node;
207213379059SArtem Savkov struct btf_struct_meta *meta = meta__ign;
207313379059SArtem Savkov
2074cfe14564SYonghong Song return __bpf_list_add(n, head, false, meta ? meta->record : NULL, off);
207590660309SDavid Vernet }
207690660309SDavid Vernet
bpf_list_push_back_impl(struct bpf_list_head * head,struct bpf_list_node * node,void * meta__ign,u64 off)207790660309SDavid Vernet __bpf_kfunc int bpf_list_push_back_impl(struct bpf_list_head *head,
2078fda01efcSDavid Vernet struct bpf_list_node *node,
2079fda01efcSDavid Vernet void *meta__ign, u64 off)
2080fda01efcSDavid Vernet {
2081fda01efcSDavid Vernet struct bpf_list_node_kern *n = (void *)node;
208290660309SDavid Vernet struct btf_struct_meta *meta = meta__ign;
2083cfe14564SYonghong Song
2084fd264ca0SYonghong Song return __bpf_list_add(n, head, true, meta ? meta->record : NULL, off);
2085a35b9af4SYonghong Song }
20869bb00b28SYonghong Song
__bpf_list_del(struct bpf_list_head * head,bool tail)20879bb00b28SYonghong Song static struct bpf_list_node *__bpf_list_del(struct bpf_list_head *head, bool tail)
2088cfe14564SYonghong Song {
2089cfe14564SYonghong Song struct list_head *n, *h = (void *)head;
2090cfe14564SYonghong Song struct bpf_list_node_kern *node;
2091cfe14564SYonghong Song
2092cfe14564SYonghong Song /* If list_head was 0-initialized by map, bpf_obj_init_field wasn't
2093cfe14564SYonghong Song * called on its fields, so init here
2094cfe14564SYonghong Song */
209513379059SArtem Savkov if (unlikely(!h->next))
209613379059SArtem Savkov INIT_LIST_HEAD(h);
20972fcc6081SDavid Vernet if (list_empty(h))
209890660309SDavid Vernet return NULL;
209990660309SDavid Vernet
21002fcc6081SDavid Vernet n = tail ? h->prev : h->next;
21012fcc6081SDavid Vernet node = container_of(n, struct bpf_list_node_kern, list_head);
210290660309SDavid Vernet if (WARN_ON_ONCE(READ_ONCE(node->owner) != head))
2103fda01efcSDavid Vernet return NULL;
2104fda01efcSDavid Vernet
21052fcc6081SDavid Vernet list_del_init(n);
21062fcc6081SDavid Vernet WRITE_ONCE(node->owner, NULL);
2107fda01efcSDavid Vernet return (struct bpf_list_node *)n;
2108fda01efcSDavid Vernet }
210990660309SDavid Vernet
bpf_list_pop_front(struct bpf_list_head * head)21108cab76ecSKumar Kartikeya Dwivedi __bpf_kfunc struct bpf_list_node *bpf_list_pop_front(struct bpf_list_head *head)
21118cab76ecSKumar Kartikeya Dwivedi {
211290660309SDavid Vernet return __bpf_list_del(head, false);
211390660309SDavid Vernet }
2114cfe14564SYonghong Song
bpf_list_pop_back(struct bpf_list_head * head)211590660309SDavid Vernet __bpf_kfunc struct bpf_list_node *bpf_list_pop_back(struct bpf_list_head *head)
211690660309SDavid Vernet {
2117cfe14564SYonghong Song return __bpf_list_del(head, true);
211813379059SArtem Savkov }
211913379059SArtem Savkov
bpf_rbtree_remove(struct bpf_rb_root * root,struct bpf_rb_node * node)212013379059SArtem Savkov __bpf_kfunc struct bpf_rb_node *bpf_rbtree_remove(struct bpf_rb_root *root,
2121 struct bpf_rb_node *node)
2122 {
2123 struct bpf_rb_node_kern *node_internal = (struct bpf_rb_node_kern *)node;
2124 struct rb_root_cached *r = (struct rb_root_cached *)root;
2125 struct rb_node *n = &node_internal->rb_node;
2126
2127 /* node_internal->owner != root implies either RB_EMPTY_NODE(n) or
2128 * n is owned by some other tree. No need to check RB_EMPTY_NODE(n)
2129 */
2130 if (READ_ONCE(node_internal->owner) != root)
2131 return NULL;
2132
2133 rb_erase_cached(n, r);
2134 RB_CLEAR_NODE(n);
2135 WRITE_ONCE(node_internal->owner, NULL);
2136 return (struct bpf_rb_node *)n;
2137 }
2138
2139 /* Need to copy rbtree_add_cached's logic here because our 'less' is a BPF
2140 * program
2141 */
__bpf_rbtree_add(struct bpf_rb_root * root,struct bpf_rb_node_kern * node,void * less,struct btf_record * rec,u64 off)2142 static int __bpf_rbtree_add(struct bpf_rb_root *root,
2143 struct bpf_rb_node_kern *node,
2144 void *less, struct btf_record *rec, u64 off)
2145 {
2146 struct rb_node **link = &((struct rb_root_cached *)root)->rb_root.rb_node;
2147 struct rb_node *parent = NULL, *n = &node->rb_node;
2148 bpf_callback_t cb = (bpf_callback_t)less;
2149 bool leftmost = true;
2150
2151 /* node->owner != NULL implies !RB_EMPTY_NODE(n), no need to separately
2152 * check the latter
2153 */
2154 if (cmpxchg(&node->owner, NULL, BPF_PTR_POISON)) {
2155 /* Only called from BPF prog, no need to migrate_disable */
2156 __bpf_obj_drop_impl((void *)n - off, rec);
2157 return -EINVAL;
2158 }
2159
2160 while (*link) {
2161 parent = *link;
2162 if (cb((uintptr_t)node, (uintptr_t)parent, 0, 0, 0)) {
2163 link = &parent->rb_left;
2164 } else {
2165 link = &parent->rb_right;
2166 leftmost = false;
2167 }
2168 }
2169
2170 rb_link_node(n, parent, link);
2171 rb_insert_color_cached(n, (struct rb_root_cached *)root, leftmost);
2172 WRITE_ONCE(node->owner, root);
2173 return 0;
2174 }
2175
bpf_rbtree_add_impl(struct bpf_rb_root * root,struct bpf_rb_node * node,bool (less)(struct bpf_rb_node * a,const struct bpf_rb_node * b),void * meta__ign,u64 off)2176 __bpf_kfunc int bpf_rbtree_add_impl(struct bpf_rb_root *root, struct bpf_rb_node *node,
2177 bool (less)(struct bpf_rb_node *a, const struct bpf_rb_node *b),
2178 void *meta__ign, u64 off)
2179 {
2180 struct btf_struct_meta *meta = meta__ign;
2181 struct bpf_rb_node_kern *n = (void *)node;
2182
2183 return __bpf_rbtree_add(root, n, (void *)less, meta ? meta->record : NULL, off);
2184 }
2185
bpf_rbtree_first(struct bpf_rb_root * root)2186 __bpf_kfunc struct bpf_rb_node *bpf_rbtree_first(struct bpf_rb_root *root)
2187 {
2188 struct rb_root_cached *r = (struct rb_root_cached *)root;
2189
2190 return (struct bpf_rb_node *)rb_first_cached(r);
2191 }
2192
2193 /**
2194 * bpf_task_acquire - Acquire a reference to a task. A task acquired by this
2195 * kfunc which is not stored in a map as a kptr, must be released by calling
2196 * bpf_task_release().
2197 * @p: The task on which a reference is being acquired.
2198 */
bpf_task_acquire(struct task_struct * p)2199 __bpf_kfunc struct task_struct *bpf_task_acquire(struct task_struct *p)
2200 {
2201 if (refcount_inc_not_zero(&p->rcu_users))
2202 return p;
2203 return NULL;
2204 }
2205
2206 /**
2207 * bpf_task_release - Release the reference acquired on a task.
2208 * @p: The task on which a reference is being released.
2209 */
bpf_task_release(struct task_struct * p)2210 __bpf_kfunc void bpf_task_release(struct task_struct *p)
2211 {
2212 put_task_struct_rcu_user(p);
2213 }
2214
2215 #ifdef CONFIG_CGROUPS
2216 /**
2217 * bpf_cgroup_acquire - Acquire a reference to a cgroup. A cgroup acquired by
2218 * this kfunc which is not stored in a map as a kptr, must be released by
2219 * calling bpf_cgroup_release().
2220 * @cgrp: The cgroup on which a reference is being acquired.
2221 */
bpf_cgroup_acquire(struct cgroup * cgrp)2222 __bpf_kfunc struct cgroup *bpf_cgroup_acquire(struct cgroup *cgrp)
2223 {
2224 return cgroup_tryget(cgrp) ? cgrp : NULL;
2225 }
2226
2227 /**
2228 * bpf_cgroup_release - Release the reference acquired on a cgroup.
2229 * If this kfunc is invoked in an RCU read region, the cgroup is guaranteed to
2230 * not be freed until the current grace period has ended, even if its refcount
2231 * drops to 0.
2232 * @cgrp: The cgroup on which a reference is being released.
2233 */
bpf_cgroup_release(struct cgroup * cgrp)2234 __bpf_kfunc void bpf_cgroup_release(struct cgroup *cgrp)
2235 {
2236 cgroup_put(cgrp);
2237 }
2238
2239 /**
2240 * bpf_cgroup_ancestor - Perform a lookup on an entry in a cgroup's ancestor
2241 * array. A cgroup returned by this kfunc which is not subsequently stored in a
2242 * map, must be released by calling bpf_cgroup_release().
2243 * @cgrp: The cgroup for which we're performing a lookup.
2244 * @level: The level of ancestor to look up.
2245 */
bpf_cgroup_ancestor(struct cgroup * cgrp,int level)2246 __bpf_kfunc struct cgroup *bpf_cgroup_ancestor(struct cgroup *cgrp, int level)
2247 {
2248 struct cgroup *ancestor;
2249
2250 if (level > cgrp->level || level < 0)
2251 return NULL;
2252
2253 /* cgrp's refcnt could be 0 here, but ancestors can still be accessed */
2254 ancestor = cgrp->ancestors[level];
2255 if (!cgroup_tryget(ancestor))
2256 return NULL;
2257 return ancestor;
2258 }
2259
2260 /**
2261 * bpf_cgroup_from_id - Find a cgroup from its ID. A cgroup returned by this
2262 * kfunc which is not subsequently stored in a map, must be released by calling
2263 * bpf_cgroup_release().
2264 * @cgid: cgroup id.
2265 */
bpf_cgroup_from_id(u64 cgid)2266 __bpf_kfunc struct cgroup *bpf_cgroup_from_id(u64 cgid)
2267 {
2268 struct cgroup *cgrp;
2269
2270 cgrp = cgroup_get_from_id(cgid);
2271 if (IS_ERR(cgrp))
2272 return NULL;
2273 return cgrp;
2274 }
2275
2276 /**
2277 * bpf_task_under_cgroup - wrap task_under_cgroup_hierarchy() as a kfunc, test
2278 * task's membership of cgroup ancestry.
2279 * @task: the task to be tested
2280 * @ancestor: possible ancestor of @task's cgroup
2281 *
2282 * Tests whether @task's default cgroup hierarchy is a descendant of @ancestor.
2283 * It follows all the same rules as cgroup_is_descendant, and only applies
2284 * to the default hierarchy.
2285 */
bpf_task_under_cgroup(struct task_struct * task,struct cgroup * ancestor)2286 __bpf_kfunc long bpf_task_under_cgroup(struct task_struct *task,
2287 struct cgroup *ancestor)
2288 {
2289 long ret;
2290
2291 rcu_read_lock();
2292 ret = task_under_cgroup_hierarchy(task, ancestor);
2293 rcu_read_unlock();
2294 return ret;
2295 }
2296 #endif /* CONFIG_CGROUPS */
2297
2298 /**
2299 * bpf_task_from_pid - Find a struct task_struct from its pid by looking it up
2300 * in the root pid namespace idr. If a task is returned, it must either be
2301 * stored in a map, or released with bpf_task_release().
2302 * @pid: The pid of the task being looked up.
2303 */
bpf_task_from_pid(s32 pid)2304 __bpf_kfunc struct task_struct *bpf_task_from_pid(s32 pid)
2305 {
2306 struct task_struct *p;
2307
2308 rcu_read_lock();
2309 p = find_task_by_pid_ns(pid, &init_pid_ns);
2310 if (p)
2311 p = bpf_task_acquire(p);
2312 rcu_read_unlock();
2313
2314 return p;
2315 }
2316
2317 /**
2318 * bpf_dynptr_slice() - Obtain a read-only pointer to the dynptr data.
2319 * @ptr: The dynptr whose data slice to retrieve
2320 * @offset: Offset into the dynptr
2321 * @buffer__opt: User-provided buffer to copy contents into. May be NULL
2322 * @buffer__szk: Size (in bytes) of the buffer if present. This is the
2323 * length of the requested slice. This must be a constant.
2324 *
2325 * For non-skb and non-xdp type dynptrs, there is no difference between
2326 * bpf_dynptr_slice and bpf_dynptr_data.
2327 *
2328 * If buffer__opt is NULL, the call will fail if buffer_opt was needed.
2329 *
2330 * If the intention is to write to the data slice, please use
2331 * bpf_dynptr_slice_rdwr.
2332 *
2333 * The user must check that the returned pointer is not null before using it.
2334 *
2335 * Please note that in the case of skb and xdp dynptrs, bpf_dynptr_slice
2336 * does not change the underlying packet data pointers, so a call to
2337 * bpf_dynptr_slice will not invalidate any ctx->data/data_end pointers in
2338 * the bpf program.
2339 *
2340 * Return: NULL if the call failed (eg invalid dynptr), pointer to a read-only
2341 * data slice (can be either direct pointer to the data or a pointer to the user
2342 * provided buffer, with its contents containing the data, if unable to obtain
2343 * direct pointer)
2344 */
bpf_dynptr_slice(const struct bpf_dynptr_kern * ptr,u32 offset,void * buffer__opt,u32 buffer__szk)2345 __bpf_kfunc void *bpf_dynptr_slice(const struct bpf_dynptr_kern *ptr, u32 offset,
2346 void *buffer__opt, u32 buffer__szk)
2347 {
2348 enum bpf_dynptr_type type;
2349 u32 len = buffer__szk;
2350 int err;
2351
2352 if (!ptr->data)
2353 return NULL;
2354
2355 err = bpf_dynptr_check_off_len(ptr, offset, len);
2356 if (err)
2357 return NULL;
2358
2359 type = bpf_dynptr_get_type(ptr);
2360
2361 switch (type) {
2362 case BPF_DYNPTR_TYPE_LOCAL:
2363 case BPF_DYNPTR_TYPE_RINGBUF:
2364 return ptr->data + ptr->offset + offset;
2365 case BPF_DYNPTR_TYPE_SKB:
2366 if (buffer__opt)
2367 return skb_header_pointer(ptr->data, ptr->offset + offset, len, buffer__opt);
2368 else
2369 return skb_pointer_if_linear(ptr->data, ptr->offset + offset, len);
2370 case BPF_DYNPTR_TYPE_XDP:
2371 {
2372 void *xdp_ptr = bpf_xdp_pointer(ptr->data, ptr->offset + offset, len);
2373 if (!IS_ERR_OR_NULL(xdp_ptr))
2374 return xdp_ptr;
2375
2376 if (!buffer__opt)
2377 return NULL;
2378 bpf_xdp_copy_buf(ptr->data, ptr->offset + offset, buffer__opt, len, false);
2379 return buffer__opt;
2380 }
2381 default:
2382 WARN_ONCE(true, "unknown dynptr type %d\n", type);
2383 return NULL;
2384 }
2385 }
2386
2387 /**
2388 * bpf_dynptr_slice_rdwr() - Obtain a writable pointer to the dynptr data.
2389 * @ptr: The dynptr whose data slice to retrieve
2390 * @offset: Offset into the dynptr
2391 * @buffer__opt: User-provided buffer to copy contents into. May be NULL
2392 * @buffer__szk: Size (in bytes) of the buffer if present. This is the
2393 * length of the requested slice. This must be a constant.
2394 *
2395 * For non-skb and non-xdp type dynptrs, there is no difference between
2396 * bpf_dynptr_slice and bpf_dynptr_data.
2397 *
2398 * If buffer__opt is NULL, the call will fail if buffer_opt was needed.
2399 *
2400 * The returned pointer is writable and may point to either directly the dynptr
2401 * data at the requested offset or to the buffer if unable to obtain a direct
2402 * data pointer to (example: the requested slice is to the paged area of an skb
2403 * packet). In the case where the returned pointer is to the buffer, the user
2404 * is responsible for persisting writes through calling bpf_dynptr_write(). This
2405 * usually looks something like this pattern:
2406 *
2407 * struct eth_hdr *eth = bpf_dynptr_slice_rdwr(&dynptr, 0, buffer, sizeof(buffer));
2408 * if (!eth)
2409 * return TC_ACT_SHOT;
2410 *
2411 * // mutate eth header //
2412 *
2413 * if (eth == buffer)
2414 * bpf_dynptr_write(&ptr, 0, buffer, sizeof(buffer), 0);
2415 *
2416 * Please note that, as in the example above, the user must check that the
2417 * returned pointer is not null before using it.
2418 *
2419 * Please also note that in the case of skb and xdp dynptrs, bpf_dynptr_slice_rdwr
2420 * does not change the underlying packet data pointers, so a call to
2421 * bpf_dynptr_slice_rdwr will not invalidate any ctx->data/data_end pointers in
2422 * the bpf program.
2423 *
2424 * Return: NULL if the call failed (eg invalid dynptr), pointer to a
2425 * data slice (can be either direct pointer to the data or a pointer to the user
2426 * provided buffer, with its contents containing the data, if unable to obtain
2427 * direct pointer)
2428 */
bpf_dynptr_slice_rdwr(const struct bpf_dynptr_kern * ptr,u32 offset,void * buffer__opt,u32 buffer__szk)2429 __bpf_kfunc void *bpf_dynptr_slice_rdwr(const struct bpf_dynptr_kern *ptr, u32 offset,
2430 void *buffer__opt, u32 buffer__szk)
2431 {
2432 if (!ptr->data || __bpf_dynptr_is_rdonly(ptr))
2433 return NULL;
2434
2435 /* bpf_dynptr_slice_rdwr is the same logic as bpf_dynptr_slice.
2436 *
2437 * For skb-type dynptrs, it is safe to write into the returned pointer
2438 * if the bpf program allows skb data writes. There are two possiblities
2439 * that may occur when calling bpf_dynptr_slice_rdwr:
2440 *
2441 * 1) The requested slice is in the head of the skb. In this case, the
2442 * returned pointer is directly to skb data, and if the skb is cloned, the
2443 * verifier will have uncloned it (see bpf_unclone_prologue()) already.
2444 * The pointer can be directly written into.
2445 *
2446 * 2) Some portion of the requested slice is in the paged buffer area.
2447 * In this case, the requested data will be copied out into the buffer
2448 * and the returned pointer will be a pointer to the buffer. The skb
2449 * will not be pulled. To persist the write, the user will need to call
2450 * bpf_dynptr_write(), which will pull the skb and commit the write.
2451 *
2452 * Similarly for xdp programs, if the requested slice is not across xdp
2453 * fragments, then a direct pointer will be returned, otherwise the data
2454 * will be copied out into the buffer and the user will need to call
2455 * bpf_dynptr_write() to commit changes.
2456 */
2457 return bpf_dynptr_slice(ptr, offset, buffer__opt, buffer__szk);
2458 }
2459
bpf_dynptr_adjust(struct bpf_dynptr_kern * ptr,u32 start,u32 end)2460 __bpf_kfunc int bpf_dynptr_adjust(struct bpf_dynptr_kern *ptr, u32 start, u32 end)
2461 {
2462 u32 size;
2463
2464 if (!ptr->data || start > end)
2465 return -EINVAL;
2466
2467 size = __bpf_dynptr_size(ptr);
2468
2469 if (start > size || end > size)
2470 return -ERANGE;
2471
2472 ptr->offset += start;
2473 bpf_dynptr_set_size(ptr, end - start);
2474
2475 return 0;
2476 }
2477
bpf_dynptr_is_null(struct bpf_dynptr_kern * ptr)2478 __bpf_kfunc bool bpf_dynptr_is_null(struct bpf_dynptr_kern *ptr)
2479 {
2480 return !ptr->data;
2481 }
2482
bpf_dynptr_is_rdonly(struct bpf_dynptr_kern * ptr)2483 __bpf_kfunc bool bpf_dynptr_is_rdonly(struct bpf_dynptr_kern *ptr)
2484 {
2485 if (!ptr->data)
2486 return false;
2487
2488 return __bpf_dynptr_is_rdonly(ptr);
2489 }
2490
bpf_dynptr_size(const struct bpf_dynptr_kern * ptr)2491 __bpf_kfunc __u32 bpf_dynptr_size(const struct bpf_dynptr_kern *ptr)
2492 {
2493 if (!ptr->data)
2494 return -EINVAL;
2495
2496 return __bpf_dynptr_size(ptr);
2497 }
2498
bpf_dynptr_clone(struct bpf_dynptr_kern * ptr,struct bpf_dynptr_kern * clone__uninit)2499 __bpf_kfunc int bpf_dynptr_clone(struct bpf_dynptr_kern *ptr,
2500 struct bpf_dynptr_kern *clone__uninit)
2501 {
2502 if (!ptr->data) {
2503 bpf_dynptr_set_null(clone__uninit);
2504 return -EINVAL;
2505 }
2506
2507 *clone__uninit = *ptr;
2508
2509 return 0;
2510 }
2511
bpf_cast_to_kern_ctx(void * obj)2512 __bpf_kfunc void *bpf_cast_to_kern_ctx(void *obj)
2513 {
2514 return obj;
2515 }
2516
bpf_rdonly_cast(void * obj__ign,u32 btf_id__k)2517 __bpf_kfunc void *bpf_rdonly_cast(void *obj__ign, u32 btf_id__k)
2518 {
2519 return obj__ign;
2520 }
2521
bpf_rcu_read_lock(void)2522 __bpf_kfunc void bpf_rcu_read_lock(void)
2523 {
2524 rcu_read_lock();
2525 }
2526
bpf_rcu_read_unlock(void)2527 __bpf_kfunc void bpf_rcu_read_unlock(void)
2528 {
2529 rcu_read_unlock();
2530 }
2531
2532 __diag_pop();
2533
2534 BTF_SET8_START(generic_btf_ids)
2535 #ifdef CONFIG_KEXEC_CORE
2536 BTF_ID_FLAGS(func, crash_kexec, KF_DESTRUCTIVE)
2537 #endif
2538 BTF_ID_FLAGS(func, bpf_obj_new_impl, KF_ACQUIRE | KF_RET_NULL)
2539 BTF_ID_FLAGS(func, bpf_obj_drop_impl, KF_RELEASE)
2540 BTF_ID_FLAGS(func, bpf_refcount_acquire_impl, KF_ACQUIRE | KF_RET_NULL)
2541 BTF_ID_FLAGS(func, bpf_list_push_front_impl)
2542 BTF_ID_FLAGS(func, bpf_list_push_back_impl)
2543 BTF_ID_FLAGS(func, bpf_list_pop_front, KF_ACQUIRE | KF_RET_NULL)
2544 BTF_ID_FLAGS(func, bpf_list_pop_back, KF_ACQUIRE | KF_RET_NULL)
2545 BTF_ID_FLAGS(func, bpf_task_acquire, KF_ACQUIRE | KF_RCU | KF_RET_NULL)
2546 BTF_ID_FLAGS(func, bpf_task_release, KF_RELEASE)
2547 BTF_ID_FLAGS(func, bpf_rbtree_remove, KF_ACQUIRE | KF_RET_NULL)
2548 BTF_ID_FLAGS(func, bpf_rbtree_add_impl)
2549 BTF_ID_FLAGS(func, bpf_rbtree_first, KF_RET_NULL)
2550
2551 #ifdef CONFIG_CGROUPS
2552 BTF_ID_FLAGS(func, bpf_cgroup_acquire, KF_ACQUIRE | KF_RCU | KF_RET_NULL)
2553 BTF_ID_FLAGS(func, bpf_cgroup_release, KF_RELEASE)
2554 BTF_ID_FLAGS(func, bpf_cgroup_ancestor, KF_ACQUIRE | KF_RCU | KF_RET_NULL)
2555 BTF_ID_FLAGS(func, bpf_cgroup_from_id, KF_ACQUIRE | KF_RET_NULL)
2556 BTF_ID_FLAGS(func, bpf_task_under_cgroup, KF_RCU)
2557 #endif
2558 BTF_ID_FLAGS(func, bpf_task_from_pid, KF_ACQUIRE | KF_RET_NULL)
2559 BTF_SET8_END(generic_btf_ids)
2560
2561 static const struct btf_kfunc_id_set generic_kfunc_set = {
2562 .owner = THIS_MODULE,
2563 .set = &generic_btf_ids,
2564 };
2565
2566
2567 BTF_ID_LIST(generic_dtor_ids)
2568 BTF_ID(struct, task_struct)
2569 BTF_ID(func, bpf_task_release)
2570 #ifdef CONFIG_CGROUPS
2571 BTF_ID(struct, cgroup)
2572 BTF_ID(func, bpf_cgroup_release)
2573 #endif
2574
2575 BTF_SET8_START(common_btf_ids)
2576 BTF_ID_FLAGS(func, bpf_cast_to_kern_ctx)
2577 BTF_ID_FLAGS(func, bpf_rdonly_cast)
2578 BTF_ID_FLAGS(func, bpf_rcu_read_lock)
2579 BTF_ID_FLAGS(func, bpf_rcu_read_unlock)
2580 BTF_ID_FLAGS(func, bpf_dynptr_slice, KF_RET_NULL)
2581 BTF_ID_FLAGS(func, bpf_dynptr_slice_rdwr, KF_RET_NULL)
2582 BTF_ID_FLAGS(func, bpf_iter_num_new, KF_ITER_NEW)
2583 BTF_ID_FLAGS(func, bpf_iter_num_next, KF_ITER_NEXT | KF_RET_NULL)
2584 BTF_ID_FLAGS(func, bpf_iter_num_destroy, KF_ITER_DESTROY)
2585 BTF_ID_FLAGS(func, bpf_dynptr_adjust)
2586 BTF_ID_FLAGS(func, bpf_dynptr_is_null)
2587 BTF_ID_FLAGS(func, bpf_dynptr_is_rdonly)
2588 BTF_ID_FLAGS(func, bpf_dynptr_size)
2589 BTF_ID_FLAGS(func, bpf_dynptr_clone)
2590 BTF_SET8_END(common_btf_ids)
2591
2592 static const struct btf_kfunc_id_set common_kfunc_set = {
2593 .owner = THIS_MODULE,
2594 .set = &common_btf_ids,
2595 };
2596
kfunc_init(void)2597 static int __init kfunc_init(void)
2598 {
2599 int ret;
2600 const struct btf_id_dtor_kfunc generic_dtors[] = {
2601 {
2602 .btf_id = generic_dtor_ids[0],
2603 .kfunc_btf_id = generic_dtor_ids[1]
2604 },
2605 #ifdef CONFIG_CGROUPS
2606 {
2607 .btf_id = generic_dtor_ids[2],
2608 .kfunc_btf_id = generic_dtor_ids[3]
2609 },
2610 #endif
2611 };
2612
2613 ret = register_btf_kfunc_id_set(BPF_PROG_TYPE_TRACING, &generic_kfunc_set);
2614 ret = ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_SCHED_CLS, &generic_kfunc_set);
2615 ret = ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_STRUCT_OPS, &generic_kfunc_set);
2616 ret = ret ?: register_btf_id_dtor_kfuncs(generic_dtors,
2617 ARRAY_SIZE(generic_dtors),
2618 THIS_MODULE);
2619 return ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_UNSPEC, &common_kfunc_set);
2620 }
2621
2622 late_initcall(kfunc_init);
2623