15b497af4SThomas Gleixner // SPDX-License-Identifier: GPL-2.0-only 2d0003ec0SAlexei Starovoitov /* Copyright (c) 2011-2014 PLUMgrid, http://plumgrid.com 3d0003ec0SAlexei Starovoitov */ 4d0003ec0SAlexei Starovoitov #include <linux/bpf.h> 53bd916eeSYonghong Song #include <linux/btf.h> 6aef2fedaSJakub Kicinski #include <linux/bpf-cgroup.h> 7d0003ec0SAlexei Starovoitov #include <linux/rcupdate.h> 803e69b50SDaniel Borkmann #include <linux/random.h> 9c04167ceSDaniel Borkmann #include <linux/smp.h> 102d0e30c3SDaniel Borkmann #include <linux/topology.h> 1117ca8cbfSDaniel Borkmann #include <linux/ktime.h> 12ffeedafbSAlexei Starovoitov #include <linux/sched.h> 13ffeedafbSAlexei Starovoitov #include <linux/uidgid.h> 14f3694e00SDaniel Borkmann #include <linux/filter.h> 15d7a4cb9bSAndrey Ignatov #include <linux/ctype.h> 165576b991SMartin KaFai Lau #include <linux/jiffies.h> 17b4490c5cSCarlos Neira #include <linux/pid_namespace.h> 1847e34cb7SDave Marchevsky #include <linux/poison.h> 19b4490c5cSCarlos Neira #include <linux/proc_ns.h> 20ff40e510SDaniel Borkmann #include <linux/security.h> 21376040e4SKenny Yu #include <linux/btf_ids.h> 22958cf2e2SKumar Kartikeya Dwivedi #include <linux/bpf_mem_alloc.h> 23d7a4cb9bSAndrey Ignatov 24d7a4cb9bSAndrey Ignatov #include "../../lib/kstrtox.h" 25d0003ec0SAlexei Starovoitov 26d0003ec0SAlexei Starovoitov /* If kernel subsystem is allowing eBPF programs to call this function, 27d0003ec0SAlexei Starovoitov * inside its own verifier_ops->get_func_proto() callback it should return 28d0003ec0SAlexei Starovoitov * bpf_map_lookup_elem_proto, so that verifier can properly check the arguments 29d0003ec0SAlexei Starovoitov * 30d0003ec0SAlexei Starovoitov * Different map implementations will rely on rcu in map methods 31d0003ec0SAlexei Starovoitov * lookup/update/delete, therefore eBPF programs must run under rcu lock 32d0003ec0SAlexei Starovoitov * if program is allowed to access maps, so check rcu_read_lock_held in 33d0003ec0SAlexei Starovoitov * all three functions. 34d0003ec0SAlexei Starovoitov */ 35f3694e00SDaniel Borkmann BPF_CALL_2(bpf_map_lookup_elem, struct bpf_map *, map, void *, key) 36d0003ec0SAlexei Starovoitov { 37694cea39SToke Høiland-Jørgensen WARN_ON_ONCE(!rcu_read_lock_held() && !rcu_read_lock_bh_held()); 38f3694e00SDaniel Borkmann return (unsigned long) map->ops->map_lookup_elem(map, key); 39d0003ec0SAlexei Starovoitov } 40d0003ec0SAlexei Starovoitov 41a2c83fffSDaniel Borkmann const struct bpf_func_proto bpf_map_lookup_elem_proto = { 42d0003ec0SAlexei Starovoitov .func = bpf_map_lookup_elem, 43d0003ec0SAlexei Starovoitov .gpl_only = false, 4436bbef52SDaniel Borkmann .pkt_access = true, 45d0003ec0SAlexei Starovoitov .ret_type = RET_PTR_TO_MAP_VALUE_OR_NULL, 46d0003ec0SAlexei Starovoitov .arg1_type = ARG_CONST_MAP_PTR, 47d0003ec0SAlexei Starovoitov .arg2_type = ARG_PTR_TO_MAP_KEY, 48d0003ec0SAlexei Starovoitov }; 49d0003ec0SAlexei Starovoitov 50f3694e00SDaniel Borkmann BPF_CALL_4(bpf_map_update_elem, struct bpf_map *, map, void *, key, 51f3694e00SDaniel Borkmann void *, value, u64, flags) 52d0003ec0SAlexei Starovoitov { 53694cea39SToke Høiland-Jørgensen WARN_ON_ONCE(!rcu_read_lock_held() && !rcu_read_lock_bh_held()); 54f3694e00SDaniel Borkmann return map->ops->map_update_elem(map, key, value, flags); 55d0003ec0SAlexei Starovoitov } 56d0003ec0SAlexei Starovoitov 57a2c83fffSDaniel Borkmann const struct bpf_func_proto bpf_map_update_elem_proto = { 58d0003ec0SAlexei Starovoitov .func = bpf_map_update_elem, 59d0003ec0SAlexei Starovoitov .gpl_only = false, 6036bbef52SDaniel Borkmann .pkt_access = true, 61d0003ec0SAlexei Starovoitov .ret_type = RET_INTEGER, 62d0003ec0SAlexei Starovoitov .arg1_type = ARG_CONST_MAP_PTR, 63d0003ec0SAlexei Starovoitov .arg2_type = ARG_PTR_TO_MAP_KEY, 64d0003ec0SAlexei Starovoitov .arg3_type = ARG_PTR_TO_MAP_VALUE, 65d0003ec0SAlexei Starovoitov .arg4_type = ARG_ANYTHING, 66d0003ec0SAlexei Starovoitov }; 67d0003ec0SAlexei Starovoitov 68f3694e00SDaniel Borkmann BPF_CALL_2(bpf_map_delete_elem, struct bpf_map *, map, void *, key) 69d0003ec0SAlexei Starovoitov { 70694cea39SToke Høiland-Jørgensen WARN_ON_ONCE(!rcu_read_lock_held() && !rcu_read_lock_bh_held()); 71d0003ec0SAlexei Starovoitov return map->ops->map_delete_elem(map, key); 72d0003ec0SAlexei Starovoitov } 73d0003ec0SAlexei Starovoitov 74a2c83fffSDaniel Borkmann const struct bpf_func_proto bpf_map_delete_elem_proto = { 75d0003ec0SAlexei Starovoitov .func = bpf_map_delete_elem, 76d0003ec0SAlexei Starovoitov .gpl_only = false, 7736bbef52SDaniel Borkmann .pkt_access = true, 78d0003ec0SAlexei Starovoitov .ret_type = RET_INTEGER, 79d0003ec0SAlexei Starovoitov .arg1_type = ARG_CONST_MAP_PTR, 80d0003ec0SAlexei Starovoitov .arg2_type = ARG_PTR_TO_MAP_KEY, 81d0003ec0SAlexei Starovoitov }; 8203e69b50SDaniel Borkmann 83f1a2e44aSMauricio Vasquez B BPF_CALL_3(bpf_map_push_elem, struct bpf_map *, map, void *, value, u64, flags) 84f1a2e44aSMauricio Vasquez B { 85f1a2e44aSMauricio Vasquez B return map->ops->map_push_elem(map, value, flags); 86f1a2e44aSMauricio Vasquez B } 87f1a2e44aSMauricio Vasquez B 88f1a2e44aSMauricio Vasquez B const struct bpf_func_proto bpf_map_push_elem_proto = { 89f1a2e44aSMauricio Vasquez B .func = bpf_map_push_elem, 90f1a2e44aSMauricio Vasquez B .gpl_only = false, 91f1a2e44aSMauricio Vasquez B .pkt_access = true, 92f1a2e44aSMauricio Vasquez B .ret_type = RET_INTEGER, 93f1a2e44aSMauricio Vasquez B .arg1_type = ARG_CONST_MAP_PTR, 94f1a2e44aSMauricio Vasquez B .arg2_type = ARG_PTR_TO_MAP_VALUE, 95f1a2e44aSMauricio Vasquez B .arg3_type = ARG_ANYTHING, 96f1a2e44aSMauricio Vasquez B }; 97f1a2e44aSMauricio Vasquez B 98f1a2e44aSMauricio Vasquez B BPF_CALL_2(bpf_map_pop_elem, struct bpf_map *, map, void *, value) 99f1a2e44aSMauricio Vasquez B { 100f1a2e44aSMauricio Vasquez B return map->ops->map_pop_elem(map, value); 101f1a2e44aSMauricio Vasquez B } 102f1a2e44aSMauricio Vasquez B 103f1a2e44aSMauricio Vasquez B const struct bpf_func_proto bpf_map_pop_elem_proto = { 104f1a2e44aSMauricio Vasquez B .func = bpf_map_pop_elem, 105f1a2e44aSMauricio Vasquez B .gpl_only = false, 106f1a2e44aSMauricio Vasquez B .ret_type = RET_INTEGER, 107f1a2e44aSMauricio Vasquez B .arg1_type = ARG_CONST_MAP_PTR, 10816d1e00cSJoanne Koong .arg2_type = ARG_PTR_TO_MAP_VALUE | MEM_UNINIT, 109f1a2e44aSMauricio Vasquez B }; 110f1a2e44aSMauricio Vasquez B 111f1a2e44aSMauricio Vasquez B BPF_CALL_2(bpf_map_peek_elem, struct bpf_map *, map, void *, value) 112f1a2e44aSMauricio Vasquez B { 113f1a2e44aSMauricio Vasquez B return map->ops->map_peek_elem(map, value); 114f1a2e44aSMauricio Vasquez B } 115f1a2e44aSMauricio Vasquez B 116f1a2e44aSMauricio Vasquez B const struct bpf_func_proto bpf_map_peek_elem_proto = { 117301a33d5SMircea Cirjaliu .func = bpf_map_peek_elem, 118f1a2e44aSMauricio Vasquez B .gpl_only = false, 119f1a2e44aSMauricio Vasquez B .ret_type = RET_INTEGER, 120f1a2e44aSMauricio Vasquez B .arg1_type = ARG_CONST_MAP_PTR, 12116d1e00cSJoanne Koong .arg2_type = ARG_PTR_TO_MAP_VALUE | MEM_UNINIT, 122f1a2e44aSMauricio Vasquez B }; 123f1a2e44aSMauricio Vasquez B 12407343110SFeng Zhou BPF_CALL_3(bpf_map_lookup_percpu_elem, struct bpf_map *, map, void *, key, u32, cpu) 12507343110SFeng Zhou { 12607343110SFeng Zhou WARN_ON_ONCE(!rcu_read_lock_held() && !rcu_read_lock_bh_held()); 12707343110SFeng Zhou return (unsigned long) map->ops->map_lookup_percpu_elem(map, key, cpu); 12807343110SFeng Zhou } 12907343110SFeng Zhou 13007343110SFeng Zhou const struct bpf_func_proto bpf_map_lookup_percpu_elem_proto = { 13107343110SFeng Zhou .func = bpf_map_lookup_percpu_elem, 13207343110SFeng Zhou .gpl_only = false, 13307343110SFeng Zhou .pkt_access = true, 13407343110SFeng Zhou .ret_type = RET_PTR_TO_MAP_VALUE_OR_NULL, 13507343110SFeng Zhou .arg1_type = ARG_CONST_MAP_PTR, 13607343110SFeng Zhou .arg2_type = ARG_PTR_TO_MAP_KEY, 13707343110SFeng Zhou .arg3_type = ARG_ANYTHING, 13807343110SFeng Zhou }; 13907343110SFeng Zhou 14003e69b50SDaniel Borkmann const struct bpf_func_proto bpf_get_prandom_u32_proto = { 1413ad00405SDaniel Borkmann .func = bpf_user_rnd_u32, 14203e69b50SDaniel Borkmann .gpl_only = false, 14303e69b50SDaniel Borkmann .ret_type = RET_INTEGER, 14403e69b50SDaniel Borkmann }; 145c04167ceSDaniel Borkmann 146f3694e00SDaniel Borkmann BPF_CALL_0(bpf_get_smp_processor_id) 147c04167ceSDaniel Borkmann { 14880b48c44SDaniel Borkmann return smp_processor_id(); 149c04167ceSDaniel Borkmann } 150c04167ceSDaniel Borkmann 151c04167ceSDaniel Borkmann const struct bpf_func_proto bpf_get_smp_processor_id_proto = { 152c04167ceSDaniel Borkmann .func = bpf_get_smp_processor_id, 153c04167ceSDaniel Borkmann .gpl_only = false, 154c04167ceSDaniel Borkmann .ret_type = RET_INTEGER, 155c04167ceSDaniel Borkmann }; 15617ca8cbfSDaniel Borkmann 1572d0e30c3SDaniel Borkmann BPF_CALL_0(bpf_get_numa_node_id) 1582d0e30c3SDaniel Borkmann { 1592d0e30c3SDaniel Borkmann return numa_node_id(); 1602d0e30c3SDaniel Borkmann } 1612d0e30c3SDaniel Borkmann 1622d0e30c3SDaniel Borkmann const struct bpf_func_proto bpf_get_numa_node_id_proto = { 1632d0e30c3SDaniel Borkmann .func = bpf_get_numa_node_id, 1642d0e30c3SDaniel Borkmann .gpl_only = false, 1652d0e30c3SDaniel Borkmann .ret_type = RET_INTEGER, 1662d0e30c3SDaniel Borkmann }; 1672d0e30c3SDaniel Borkmann 168f3694e00SDaniel Borkmann BPF_CALL_0(bpf_ktime_get_ns) 16917ca8cbfSDaniel Borkmann { 17017ca8cbfSDaniel Borkmann /* NMI safe access to clock monotonic */ 17117ca8cbfSDaniel Borkmann return ktime_get_mono_fast_ns(); 17217ca8cbfSDaniel Borkmann } 17317ca8cbfSDaniel Borkmann 17417ca8cbfSDaniel Borkmann const struct bpf_func_proto bpf_ktime_get_ns_proto = { 17517ca8cbfSDaniel Borkmann .func = bpf_ktime_get_ns, 176082b57e3SMaciej Żenczykowski .gpl_only = false, 17717ca8cbfSDaniel Borkmann .ret_type = RET_INTEGER, 17817ca8cbfSDaniel Borkmann }; 179ffeedafbSAlexei Starovoitov 18071d19214SMaciej Żenczykowski BPF_CALL_0(bpf_ktime_get_boot_ns) 18171d19214SMaciej Żenczykowski { 18271d19214SMaciej Żenczykowski /* NMI safe access to clock boottime */ 18371d19214SMaciej Żenczykowski return ktime_get_boot_fast_ns(); 18471d19214SMaciej Żenczykowski } 18571d19214SMaciej Żenczykowski 18671d19214SMaciej Żenczykowski const struct bpf_func_proto bpf_ktime_get_boot_ns_proto = { 18771d19214SMaciej Żenczykowski .func = bpf_ktime_get_boot_ns, 18871d19214SMaciej Żenczykowski .gpl_only = false, 18971d19214SMaciej Żenczykowski .ret_type = RET_INTEGER, 19071d19214SMaciej Żenczykowski }; 19171d19214SMaciej Żenczykowski 192d0551261SDmitrii Banshchikov BPF_CALL_0(bpf_ktime_get_coarse_ns) 193d0551261SDmitrii Banshchikov { 194d0551261SDmitrii Banshchikov return ktime_get_coarse_ns(); 195d0551261SDmitrii Banshchikov } 196d0551261SDmitrii Banshchikov 197d0551261SDmitrii Banshchikov const struct bpf_func_proto bpf_ktime_get_coarse_ns_proto = { 198d0551261SDmitrii Banshchikov .func = bpf_ktime_get_coarse_ns, 199d0551261SDmitrii Banshchikov .gpl_only = false, 200d0551261SDmitrii Banshchikov .ret_type = RET_INTEGER, 201d0551261SDmitrii Banshchikov }; 202d0551261SDmitrii Banshchikov 203c8996c98SJesper Dangaard Brouer BPF_CALL_0(bpf_ktime_get_tai_ns) 204c8996c98SJesper Dangaard Brouer { 205c8996c98SJesper Dangaard Brouer /* NMI safe access to clock tai */ 206c8996c98SJesper Dangaard Brouer return ktime_get_tai_fast_ns(); 207c8996c98SJesper Dangaard Brouer } 208c8996c98SJesper Dangaard Brouer 209c8996c98SJesper Dangaard Brouer const struct bpf_func_proto bpf_ktime_get_tai_ns_proto = { 210c8996c98SJesper Dangaard Brouer .func = bpf_ktime_get_tai_ns, 211c8996c98SJesper Dangaard Brouer .gpl_only = false, 212c8996c98SJesper Dangaard Brouer .ret_type = RET_INTEGER, 213c8996c98SJesper Dangaard Brouer }; 214c8996c98SJesper Dangaard Brouer 215f3694e00SDaniel Borkmann BPF_CALL_0(bpf_get_current_pid_tgid) 216ffeedafbSAlexei Starovoitov { 217ffeedafbSAlexei Starovoitov struct task_struct *task = current; 218ffeedafbSAlexei Starovoitov 2196088b582SDaniel Borkmann if (unlikely(!task)) 220ffeedafbSAlexei Starovoitov return -EINVAL; 221ffeedafbSAlexei Starovoitov 222ffeedafbSAlexei Starovoitov return (u64) task->tgid << 32 | task->pid; 223ffeedafbSAlexei Starovoitov } 224ffeedafbSAlexei Starovoitov 225ffeedafbSAlexei Starovoitov const struct bpf_func_proto bpf_get_current_pid_tgid_proto = { 226ffeedafbSAlexei Starovoitov .func = bpf_get_current_pid_tgid, 227ffeedafbSAlexei Starovoitov .gpl_only = false, 228ffeedafbSAlexei Starovoitov .ret_type = RET_INTEGER, 229ffeedafbSAlexei Starovoitov }; 230ffeedafbSAlexei Starovoitov 231f3694e00SDaniel Borkmann BPF_CALL_0(bpf_get_current_uid_gid) 232ffeedafbSAlexei Starovoitov { 233ffeedafbSAlexei Starovoitov struct task_struct *task = current; 234ffeedafbSAlexei Starovoitov kuid_t uid; 235ffeedafbSAlexei Starovoitov kgid_t gid; 236ffeedafbSAlexei Starovoitov 2376088b582SDaniel Borkmann if (unlikely(!task)) 238ffeedafbSAlexei Starovoitov return -EINVAL; 239ffeedafbSAlexei Starovoitov 240ffeedafbSAlexei Starovoitov current_uid_gid(&uid, &gid); 241ffeedafbSAlexei Starovoitov return (u64) from_kgid(&init_user_ns, gid) << 32 | 242ffeedafbSAlexei Starovoitov from_kuid(&init_user_ns, uid); 243ffeedafbSAlexei Starovoitov } 244ffeedafbSAlexei Starovoitov 245ffeedafbSAlexei Starovoitov const struct bpf_func_proto bpf_get_current_uid_gid_proto = { 246ffeedafbSAlexei Starovoitov .func = bpf_get_current_uid_gid, 247ffeedafbSAlexei Starovoitov .gpl_only = false, 248ffeedafbSAlexei Starovoitov .ret_type = RET_INTEGER, 249ffeedafbSAlexei Starovoitov }; 250ffeedafbSAlexei Starovoitov 251f3694e00SDaniel Borkmann BPF_CALL_2(bpf_get_current_comm, char *, buf, u32, size) 252ffeedafbSAlexei Starovoitov { 253ffeedafbSAlexei Starovoitov struct task_struct *task = current; 254ffeedafbSAlexei Starovoitov 255074f528eSDaniel Borkmann if (unlikely(!task)) 256074f528eSDaniel Borkmann goto err_clear; 257ffeedafbSAlexei Starovoitov 25803b9c7faSYuntao Wang /* Verifier guarantees that size > 0 */ 25903b9c7faSYuntao Wang strscpy(buf, task->comm, size); 260ffeedafbSAlexei Starovoitov return 0; 261074f528eSDaniel Borkmann err_clear: 262074f528eSDaniel Borkmann memset(buf, 0, size); 263074f528eSDaniel Borkmann return -EINVAL; 264ffeedafbSAlexei Starovoitov } 265ffeedafbSAlexei Starovoitov 266ffeedafbSAlexei Starovoitov const struct bpf_func_proto bpf_get_current_comm_proto = { 267ffeedafbSAlexei Starovoitov .func = bpf_get_current_comm, 268ffeedafbSAlexei Starovoitov .gpl_only = false, 269ffeedafbSAlexei Starovoitov .ret_type = RET_INTEGER, 27039f19ebbSAlexei Starovoitov .arg1_type = ARG_PTR_TO_UNINIT_MEM, 27139f19ebbSAlexei Starovoitov .arg2_type = ARG_CONST_SIZE, 272ffeedafbSAlexei Starovoitov }; 273bf6fa2c8SYonghong Song 274d83525caSAlexei Starovoitov #if defined(CONFIG_QUEUED_SPINLOCKS) || defined(CONFIG_BPF_ARCH_SPINLOCK) 275d83525caSAlexei Starovoitov 276d83525caSAlexei Starovoitov static inline void __bpf_spin_lock(struct bpf_spin_lock *lock) 277d83525caSAlexei Starovoitov { 278d83525caSAlexei Starovoitov arch_spinlock_t *l = (void *)lock; 279d83525caSAlexei Starovoitov union { 280d83525caSAlexei Starovoitov __u32 val; 281d83525caSAlexei Starovoitov arch_spinlock_t lock; 282d83525caSAlexei Starovoitov } u = { .lock = __ARCH_SPIN_LOCK_UNLOCKED }; 283d83525caSAlexei Starovoitov 284d83525caSAlexei Starovoitov compiletime_assert(u.val == 0, "__ARCH_SPIN_LOCK_UNLOCKED not 0"); 285d83525caSAlexei Starovoitov BUILD_BUG_ON(sizeof(*l) != sizeof(__u32)); 286d83525caSAlexei Starovoitov BUILD_BUG_ON(sizeof(*lock) != sizeof(__u32)); 287d83525caSAlexei Starovoitov arch_spin_lock(l); 288d83525caSAlexei Starovoitov } 289d83525caSAlexei Starovoitov 290d83525caSAlexei Starovoitov static inline void __bpf_spin_unlock(struct bpf_spin_lock *lock) 291d83525caSAlexei Starovoitov { 292d83525caSAlexei Starovoitov arch_spinlock_t *l = (void *)lock; 293d83525caSAlexei Starovoitov 294d83525caSAlexei Starovoitov arch_spin_unlock(l); 295d83525caSAlexei Starovoitov } 296d83525caSAlexei Starovoitov 297d83525caSAlexei Starovoitov #else 298d83525caSAlexei Starovoitov 299d83525caSAlexei Starovoitov static inline void __bpf_spin_lock(struct bpf_spin_lock *lock) 300d83525caSAlexei Starovoitov { 301d83525caSAlexei Starovoitov atomic_t *l = (void *)lock; 302d83525caSAlexei Starovoitov 303d83525caSAlexei Starovoitov BUILD_BUG_ON(sizeof(*l) != sizeof(*lock)); 304d83525caSAlexei Starovoitov do { 305d83525caSAlexei Starovoitov atomic_cond_read_relaxed(l, !VAL); 306d83525caSAlexei Starovoitov } while (atomic_xchg(l, 1)); 307d83525caSAlexei Starovoitov } 308d83525caSAlexei Starovoitov 309d83525caSAlexei Starovoitov static inline void __bpf_spin_unlock(struct bpf_spin_lock *lock) 310d83525caSAlexei Starovoitov { 311d83525caSAlexei Starovoitov atomic_t *l = (void *)lock; 312d83525caSAlexei Starovoitov 313d83525caSAlexei Starovoitov atomic_set_release(l, 0); 314d83525caSAlexei Starovoitov } 315d83525caSAlexei Starovoitov 316d83525caSAlexei Starovoitov #endif 317d83525caSAlexei Starovoitov 318d83525caSAlexei Starovoitov static DEFINE_PER_CPU(unsigned long, irqsave_flags); 319d83525caSAlexei Starovoitov 320c1b3fed3SAlexei Starovoitov static inline void __bpf_spin_lock_irqsave(struct bpf_spin_lock *lock) 321d83525caSAlexei Starovoitov { 322d83525caSAlexei Starovoitov unsigned long flags; 323d83525caSAlexei Starovoitov 324d83525caSAlexei Starovoitov local_irq_save(flags); 325d83525caSAlexei Starovoitov __bpf_spin_lock(lock); 326d83525caSAlexei Starovoitov __this_cpu_write(irqsave_flags, flags); 327c1b3fed3SAlexei Starovoitov } 328c1b3fed3SAlexei Starovoitov 329c1b3fed3SAlexei Starovoitov notrace BPF_CALL_1(bpf_spin_lock, struct bpf_spin_lock *, lock) 330c1b3fed3SAlexei Starovoitov { 331c1b3fed3SAlexei Starovoitov __bpf_spin_lock_irqsave(lock); 332d83525caSAlexei Starovoitov return 0; 333d83525caSAlexei Starovoitov } 334d83525caSAlexei Starovoitov 335d83525caSAlexei Starovoitov const struct bpf_func_proto bpf_spin_lock_proto = { 336d83525caSAlexei Starovoitov .func = bpf_spin_lock, 337d83525caSAlexei Starovoitov .gpl_only = false, 338d83525caSAlexei Starovoitov .ret_type = RET_VOID, 339d83525caSAlexei Starovoitov .arg1_type = ARG_PTR_TO_SPIN_LOCK, 3404e814da0SKumar Kartikeya Dwivedi .arg1_btf_id = BPF_PTR_POISON, 341d83525caSAlexei Starovoitov }; 342d83525caSAlexei Starovoitov 343c1b3fed3SAlexei Starovoitov static inline void __bpf_spin_unlock_irqrestore(struct bpf_spin_lock *lock) 344d83525caSAlexei Starovoitov { 345d83525caSAlexei Starovoitov unsigned long flags; 346d83525caSAlexei Starovoitov 347d83525caSAlexei Starovoitov flags = __this_cpu_read(irqsave_flags); 348d83525caSAlexei Starovoitov __bpf_spin_unlock(lock); 349d83525caSAlexei Starovoitov local_irq_restore(flags); 350c1b3fed3SAlexei Starovoitov } 351c1b3fed3SAlexei Starovoitov 352c1b3fed3SAlexei Starovoitov notrace BPF_CALL_1(bpf_spin_unlock, struct bpf_spin_lock *, lock) 353c1b3fed3SAlexei Starovoitov { 354c1b3fed3SAlexei Starovoitov __bpf_spin_unlock_irqrestore(lock); 355d83525caSAlexei Starovoitov return 0; 356d83525caSAlexei Starovoitov } 357d83525caSAlexei Starovoitov 358d83525caSAlexei Starovoitov const struct bpf_func_proto bpf_spin_unlock_proto = { 359d83525caSAlexei Starovoitov .func = bpf_spin_unlock, 360d83525caSAlexei Starovoitov .gpl_only = false, 361d83525caSAlexei Starovoitov .ret_type = RET_VOID, 362d83525caSAlexei Starovoitov .arg1_type = ARG_PTR_TO_SPIN_LOCK, 3634e814da0SKumar Kartikeya Dwivedi .arg1_btf_id = BPF_PTR_POISON, 364d83525caSAlexei Starovoitov }; 365d83525caSAlexei Starovoitov 36696049f3aSAlexei Starovoitov void copy_map_value_locked(struct bpf_map *map, void *dst, void *src, 36796049f3aSAlexei Starovoitov bool lock_src) 36896049f3aSAlexei Starovoitov { 36996049f3aSAlexei Starovoitov struct bpf_spin_lock *lock; 37096049f3aSAlexei Starovoitov 37196049f3aSAlexei Starovoitov if (lock_src) 372db559117SKumar Kartikeya Dwivedi lock = src + map->record->spin_lock_off; 37396049f3aSAlexei Starovoitov else 374db559117SKumar Kartikeya Dwivedi lock = dst + map->record->spin_lock_off; 37596049f3aSAlexei Starovoitov preempt_disable(); 376c1b3fed3SAlexei Starovoitov __bpf_spin_lock_irqsave(lock); 37796049f3aSAlexei Starovoitov copy_map_value(map, dst, src); 378c1b3fed3SAlexei Starovoitov __bpf_spin_unlock_irqrestore(lock); 37996049f3aSAlexei Starovoitov preempt_enable(); 38096049f3aSAlexei Starovoitov } 38196049f3aSAlexei Starovoitov 3825576b991SMartin KaFai Lau BPF_CALL_0(bpf_jiffies64) 3835576b991SMartin KaFai Lau { 3845576b991SMartin KaFai Lau return get_jiffies_64(); 3855576b991SMartin KaFai Lau } 3865576b991SMartin KaFai Lau 3875576b991SMartin KaFai Lau const struct bpf_func_proto bpf_jiffies64_proto = { 3885576b991SMartin KaFai Lau .func = bpf_jiffies64, 3895576b991SMartin KaFai Lau .gpl_only = false, 3905576b991SMartin KaFai Lau .ret_type = RET_INTEGER, 3915576b991SMartin KaFai Lau }; 3925576b991SMartin KaFai Lau 393bf6fa2c8SYonghong Song #ifdef CONFIG_CGROUPS 394bf6fa2c8SYonghong Song BPF_CALL_0(bpf_get_current_cgroup_id) 395bf6fa2c8SYonghong Song { 3962d3a1e36SYonghong Song struct cgroup *cgrp; 3972d3a1e36SYonghong Song u64 cgrp_id; 398bf6fa2c8SYonghong Song 3992d3a1e36SYonghong Song rcu_read_lock(); 4002d3a1e36SYonghong Song cgrp = task_dfl_cgroup(current); 4012d3a1e36SYonghong Song cgrp_id = cgroup_id(cgrp); 4022d3a1e36SYonghong Song rcu_read_unlock(); 4032d3a1e36SYonghong Song 4042d3a1e36SYonghong Song return cgrp_id; 405bf6fa2c8SYonghong Song } 406bf6fa2c8SYonghong Song 407bf6fa2c8SYonghong Song const struct bpf_func_proto bpf_get_current_cgroup_id_proto = { 408bf6fa2c8SYonghong Song .func = bpf_get_current_cgroup_id, 409bf6fa2c8SYonghong Song .gpl_only = false, 410bf6fa2c8SYonghong Song .ret_type = RET_INTEGER, 411bf6fa2c8SYonghong Song }; 412cd339431SRoman Gushchin 4130f09abd1SDaniel Borkmann BPF_CALL_1(bpf_get_current_ancestor_cgroup_id, int, ancestor_level) 4140f09abd1SDaniel Borkmann { 4152d3a1e36SYonghong Song struct cgroup *cgrp; 4160f09abd1SDaniel Borkmann struct cgroup *ancestor; 4172d3a1e36SYonghong Song u64 cgrp_id; 4180f09abd1SDaniel Borkmann 4192d3a1e36SYonghong Song rcu_read_lock(); 4202d3a1e36SYonghong Song cgrp = task_dfl_cgroup(current); 4210f09abd1SDaniel Borkmann ancestor = cgroup_ancestor(cgrp, ancestor_level); 4222d3a1e36SYonghong Song cgrp_id = ancestor ? cgroup_id(ancestor) : 0; 4232d3a1e36SYonghong Song rcu_read_unlock(); 4242d3a1e36SYonghong Song 4252d3a1e36SYonghong Song return cgrp_id; 4260f09abd1SDaniel Borkmann } 4270f09abd1SDaniel Borkmann 4280f09abd1SDaniel Borkmann const struct bpf_func_proto bpf_get_current_ancestor_cgroup_id_proto = { 4290f09abd1SDaniel Borkmann .func = bpf_get_current_ancestor_cgroup_id, 4300f09abd1SDaniel Borkmann .gpl_only = false, 4310f09abd1SDaniel Borkmann .ret_type = RET_INTEGER, 4320f09abd1SDaniel Borkmann .arg1_type = ARG_ANYTHING, 4330f09abd1SDaniel Borkmann }; 4348a67f2deSStanislav Fomichev #endif /* CONFIG_CGROUPS */ 4350f09abd1SDaniel Borkmann 436d7a4cb9bSAndrey Ignatov #define BPF_STRTOX_BASE_MASK 0x1F 437d7a4cb9bSAndrey Ignatov 438d7a4cb9bSAndrey Ignatov static int __bpf_strtoull(const char *buf, size_t buf_len, u64 flags, 439d7a4cb9bSAndrey Ignatov unsigned long long *res, bool *is_negative) 440d7a4cb9bSAndrey Ignatov { 441d7a4cb9bSAndrey Ignatov unsigned int base = flags & BPF_STRTOX_BASE_MASK; 442d7a4cb9bSAndrey Ignatov const char *cur_buf = buf; 443d7a4cb9bSAndrey Ignatov size_t cur_len = buf_len; 444d7a4cb9bSAndrey Ignatov unsigned int consumed; 445d7a4cb9bSAndrey Ignatov size_t val_len; 446d7a4cb9bSAndrey Ignatov char str[64]; 447d7a4cb9bSAndrey Ignatov 448d7a4cb9bSAndrey Ignatov if (!buf || !buf_len || !res || !is_negative) 449d7a4cb9bSAndrey Ignatov return -EINVAL; 450d7a4cb9bSAndrey Ignatov 451d7a4cb9bSAndrey Ignatov if (base != 0 && base != 8 && base != 10 && base != 16) 452d7a4cb9bSAndrey Ignatov return -EINVAL; 453d7a4cb9bSAndrey Ignatov 454d7a4cb9bSAndrey Ignatov if (flags & ~BPF_STRTOX_BASE_MASK) 455d7a4cb9bSAndrey Ignatov return -EINVAL; 456d7a4cb9bSAndrey Ignatov 457d7a4cb9bSAndrey Ignatov while (cur_buf < buf + buf_len && isspace(*cur_buf)) 458d7a4cb9bSAndrey Ignatov ++cur_buf; 459d7a4cb9bSAndrey Ignatov 460d7a4cb9bSAndrey Ignatov *is_negative = (cur_buf < buf + buf_len && *cur_buf == '-'); 461d7a4cb9bSAndrey Ignatov if (*is_negative) 462d7a4cb9bSAndrey Ignatov ++cur_buf; 463d7a4cb9bSAndrey Ignatov 464d7a4cb9bSAndrey Ignatov consumed = cur_buf - buf; 465d7a4cb9bSAndrey Ignatov cur_len -= consumed; 466d7a4cb9bSAndrey Ignatov if (!cur_len) 467d7a4cb9bSAndrey Ignatov return -EINVAL; 468d7a4cb9bSAndrey Ignatov 469d7a4cb9bSAndrey Ignatov cur_len = min(cur_len, sizeof(str) - 1); 470d7a4cb9bSAndrey Ignatov memcpy(str, cur_buf, cur_len); 471d7a4cb9bSAndrey Ignatov str[cur_len] = '\0'; 472d7a4cb9bSAndrey Ignatov cur_buf = str; 473d7a4cb9bSAndrey Ignatov 474d7a4cb9bSAndrey Ignatov cur_buf = _parse_integer_fixup_radix(cur_buf, &base); 475d7a4cb9bSAndrey Ignatov val_len = _parse_integer(cur_buf, base, res); 476d7a4cb9bSAndrey Ignatov 477d7a4cb9bSAndrey Ignatov if (val_len & KSTRTOX_OVERFLOW) 478d7a4cb9bSAndrey Ignatov return -ERANGE; 479d7a4cb9bSAndrey Ignatov 480d7a4cb9bSAndrey Ignatov if (val_len == 0) 481d7a4cb9bSAndrey Ignatov return -EINVAL; 482d7a4cb9bSAndrey Ignatov 483d7a4cb9bSAndrey Ignatov cur_buf += val_len; 484d7a4cb9bSAndrey Ignatov consumed += cur_buf - str; 485d7a4cb9bSAndrey Ignatov 486d7a4cb9bSAndrey Ignatov return consumed; 487d7a4cb9bSAndrey Ignatov } 488d7a4cb9bSAndrey Ignatov 489d7a4cb9bSAndrey Ignatov static int __bpf_strtoll(const char *buf, size_t buf_len, u64 flags, 490d7a4cb9bSAndrey Ignatov long long *res) 491d7a4cb9bSAndrey Ignatov { 492d7a4cb9bSAndrey Ignatov unsigned long long _res; 493d7a4cb9bSAndrey Ignatov bool is_negative; 494d7a4cb9bSAndrey Ignatov int err; 495d7a4cb9bSAndrey Ignatov 496d7a4cb9bSAndrey Ignatov err = __bpf_strtoull(buf, buf_len, flags, &_res, &is_negative); 497d7a4cb9bSAndrey Ignatov if (err < 0) 498d7a4cb9bSAndrey Ignatov return err; 499d7a4cb9bSAndrey Ignatov if (is_negative) { 500d7a4cb9bSAndrey Ignatov if ((long long)-_res > 0) 501d7a4cb9bSAndrey Ignatov return -ERANGE; 502d7a4cb9bSAndrey Ignatov *res = -_res; 503d7a4cb9bSAndrey Ignatov } else { 504d7a4cb9bSAndrey Ignatov if ((long long)_res < 0) 505d7a4cb9bSAndrey Ignatov return -ERANGE; 506d7a4cb9bSAndrey Ignatov *res = _res; 507d7a4cb9bSAndrey Ignatov } 508d7a4cb9bSAndrey Ignatov return err; 509d7a4cb9bSAndrey Ignatov } 510d7a4cb9bSAndrey Ignatov 511d7a4cb9bSAndrey Ignatov BPF_CALL_4(bpf_strtol, const char *, buf, size_t, buf_len, u64, flags, 512d7a4cb9bSAndrey Ignatov long *, res) 513d7a4cb9bSAndrey Ignatov { 514d7a4cb9bSAndrey Ignatov long long _res; 515d7a4cb9bSAndrey Ignatov int err; 516d7a4cb9bSAndrey Ignatov 517d7a4cb9bSAndrey Ignatov err = __bpf_strtoll(buf, buf_len, flags, &_res); 518d7a4cb9bSAndrey Ignatov if (err < 0) 519d7a4cb9bSAndrey Ignatov return err; 520d7a4cb9bSAndrey Ignatov if (_res != (long)_res) 521d7a4cb9bSAndrey Ignatov return -ERANGE; 522d7a4cb9bSAndrey Ignatov *res = _res; 523d7a4cb9bSAndrey Ignatov return err; 524d7a4cb9bSAndrey Ignatov } 525d7a4cb9bSAndrey Ignatov 526d7a4cb9bSAndrey Ignatov const struct bpf_func_proto bpf_strtol_proto = { 527d7a4cb9bSAndrey Ignatov .func = bpf_strtol, 528d7a4cb9bSAndrey Ignatov .gpl_only = false, 529d7a4cb9bSAndrey Ignatov .ret_type = RET_INTEGER, 530216e3cd2SHao Luo .arg1_type = ARG_PTR_TO_MEM | MEM_RDONLY, 531d7a4cb9bSAndrey Ignatov .arg2_type = ARG_CONST_SIZE, 532d7a4cb9bSAndrey Ignatov .arg3_type = ARG_ANYTHING, 533d7a4cb9bSAndrey Ignatov .arg4_type = ARG_PTR_TO_LONG, 534d7a4cb9bSAndrey Ignatov }; 535d7a4cb9bSAndrey Ignatov 536d7a4cb9bSAndrey Ignatov BPF_CALL_4(bpf_strtoul, const char *, buf, size_t, buf_len, u64, flags, 537d7a4cb9bSAndrey Ignatov unsigned long *, res) 538d7a4cb9bSAndrey Ignatov { 539d7a4cb9bSAndrey Ignatov unsigned long long _res; 540d7a4cb9bSAndrey Ignatov bool is_negative; 541d7a4cb9bSAndrey Ignatov int err; 542d7a4cb9bSAndrey Ignatov 543d7a4cb9bSAndrey Ignatov err = __bpf_strtoull(buf, buf_len, flags, &_res, &is_negative); 544d7a4cb9bSAndrey Ignatov if (err < 0) 545d7a4cb9bSAndrey Ignatov return err; 546d7a4cb9bSAndrey Ignatov if (is_negative) 547d7a4cb9bSAndrey Ignatov return -EINVAL; 548d7a4cb9bSAndrey Ignatov if (_res != (unsigned long)_res) 549d7a4cb9bSAndrey Ignatov return -ERANGE; 550d7a4cb9bSAndrey Ignatov *res = _res; 551d7a4cb9bSAndrey Ignatov return err; 552d7a4cb9bSAndrey Ignatov } 553d7a4cb9bSAndrey Ignatov 554d7a4cb9bSAndrey Ignatov const struct bpf_func_proto bpf_strtoul_proto = { 555d7a4cb9bSAndrey Ignatov .func = bpf_strtoul, 556d7a4cb9bSAndrey Ignatov .gpl_only = false, 557d7a4cb9bSAndrey Ignatov .ret_type = RET_INTEGER, 558216e3cd2SHao Luo .arg1_type = ARG_PTR_TO_MEM | MEM_RDONLY, 559d7a4cb9bSAndrey Ignatov .arg2_type = ARG_CONST_SIZE, 560d7a4cb9bSAndrey Ignatov .arg3_type = ARG_ANYTHING, 561d7a4cb9bSAndrey Ignatov .arg4_type = ARG_PTR_TO_LONG, 562d7a4cb9bSAndrey Ignatov }; 563b4490c5cSCarlos Neira 564c5fb1993SHou Tao BPF_CALL_3(bpf_strncmp, const char *, s1, u32, s1_sz, const char *, s2) 565c5fb1993SHou Tao { 566c5fb1993SHou Tao return strncmp(s1, s2, s1_sz); 567c5fb1993SHou Tao } 568c5fb1993SHou Tao 569dc368e1cSJoanne Koong static const struct bpf_func_proto bpf_strncmp_proto = { 570c5fb1993SHou Tao .func = bpf_strncmp, 571c5fb1993SHou Tao .gpl_only = false, 572c5fb1993SHou Tao .ret_type = RET_INTEGER, 573c5fb1993SHou Tao .arg1_type = ARG_PTR_TO_MEM, 574c5fb1993SHou Tao .arg2_type = ARG_CONST_SIZE, 575c5fb1993SHou Tao .arg3_type = ARG_PTR_TO_CONST_STR, 576c5fb1993SHou Tao }; 577c5fb1993SHou Tao 578b4490c5cSCarlos Neira BPF_CALL_4(bpf_get_ns_current_pid_tgid, u64, dev, u64, ino, 579b4490c5cSCarlos Neira struct bpf_pidns_info *, nsdata, u32, size) 580b4490c5cSCarlos Neira { 581b4490c5cSCarlos Neira struct task_struct *task = current; 582b4490c5cSCarlos Neira struct pid_namespace *pidns; 583b4490c5cSCarlos Neira int err = -EINVAL; 584b4490c5cSCarlos Neira 585b4490c5cSCarlos Neira if (unlikely(size != sizeof(struct bpf_pidns_info))) 586b4490c5cSCarlos Neira goto clear; 587b4490c5cSCarlos Neira 588b4490c5cSCarlos Neira if (unlikely((u64)(dev_t)dev != dev)) 589b4490c5cSCarlos Neira goto clear; 590b4490c5cSCarlos Neira 591b4490c5cSCarlos Neira if (unlikely(!task)) 592b4490c5cSCarlos Neira goto clear; 593b4490c5cSCarlos Neira 594b4490c5cSCarlos Neira pidns = task_active_pid_ns(task); 595b4490c5cSCarlos Neira if (unlikely(!pidns)) { 596b4490c5cSCarlos Neira err = -ENOENT; 597b4490c5cSCarlos Neira goto clear; 598b4490c5cSCarlos Neira } 599b4490c5cSCarlos Neira 600b4490c5cSCarlos Neira if (!ns_match(&pidns->ns, (dev_t)dev, ino)) 601b4490c5cSCarlos Neira goto clear; 602b4490c5cSCarlos Neira 603b4490c5cSCarlos Neira nsdata->pid = task_pid_nr_ns(task, pidns); 604b4490c5cSCarlos Neira nsdata->tgid = task_tgid_nr_ns(task, pidns); 605b4490c5cSCarlos Neira return 0; 606b4490c5cSCarlos Neira clear: 607b4490c5cSCarlos Neira memset((void *)nsdata, 0, (size_t) size); 608b4490c5cSCarlos Neira return err; 609b4490c5cSCarlos Neira } 610b4490c5cSCarlos Neira 611b4490c5cSCarlos Neira const struct bpf_func_proto bpf_get_ns_current_pid_tgid_proto = { 612b4490c5cSCarlos Neira .func = bpf_get_ns_current_pid_tgid, 613b4490c5cSCarlos Neira .gpl_only = false, 614b4490c5cSCarlos Neira .ret_type = RET_INTEGER, 615b4490c5cSCarlos Neira .arg1_type = ARG_ANYTHING, 616b4490c5cSCarlos Neira .arg2_type = ARG_ANYTHING, 617b4490c5cSCarlos Neira .arg3_type = ARG_PTR_TO_UNINIT_MEM, 618b4490c5cSCarlos Neira .arg4_type = ARG_CONST_SIZE, 619b4490c5cSCarlos Neira }; 6206890896bSStanislav Fomichev 6216890896bSStanislav Fomichev static const struct bpf_func_proto bpf_get_raw_smp_processor_id_proto = { 6226890896bSStanislav Fomichev .func = bpf_get_raw_cpu_id, 6236890896bSStanislav Fomichev .gpl_only = false, 6246890896bSStanislav Fomichev .ret_type = RET_INTEGER, 6256890896bSStanislav Fomichev }; 6266890896bSStanislav Fomichev 6276890896bSStanislav Fomichev BPF_CALL_5(bpf_event_output_data, void *, ctx, struct bpf_map *, map, 6286890896bSStanislav Fomichev u64, flags, void *, data, u64, size) 6296890896bSStanislav Fomichev { 6306890896bSStanislav Fomichev if (unlikely(flags & ~(BPF_F_INDEX_MASK))) 6316890896bSStanislav Fomichev return -EINVAL; 6326890896bSStanislav Fomichev 6336890896bSStanislav Fomichev return bpf_event_output(map, flags, data, size, NULL, 0, NULL); 6346890896bSStanislav Fomichev } 6356890896bSStanislav Fomichev 6366890896bSStanislav Fomichev const struct bpf_func_proto bpf_event_output_data_proto = { 6376890896bSStanislav Fomichev .func = bpf_event_output_data, 6386890896bSStanislav Fomichev .gpl_only = true, 6396890896bSStanislav Fomichev .ret_type = RET_INTEGER, 6406890896bSStanislav Fomichev .arg1_type = ARG_PTR_TO_CTX, 6416890896bSStanislav Fomichev .arg2_type = ARG_CONST_MAP_PTR, 6426890896bSStanislav Fomichev .arg3_type = ARG_ANYTHING, 643216e3cd2SHao Luo .arg4_type = ARG_PTR_TO_MEM | MEM_RDONLY, 6446890896bSStanislav Fomichev .arg5_type = ARG_CONST_SIZE_OR_ZERO, 6456890896bSStanislav Fomichev }; 6466890896bSStanislav Fomichev 64707be4c4aSAlexei Starovoitov BPF_CALL_3(bpf_copy_from_user, void *, dst, u32, size, 64807be4c4aSAlexei Starovoitov const void __user *, user_ptr) 64907be4c4aSAlexei Starovoitov { 65007be4c4aSAlexei Starovoitov int ret = copy_from_user(dst, user_ptr, size); 65107be4c4aSAlexei Starovoitov 65207be4c4aSAlexei Starovoitov if (unlikely(ret)) { 65307be4c4aSAlexei Starovoitov memset(dst, 0, size); 65407be4c4aSAlexei Starovoitov ret = -EFAULT; 65507be4c4aSAlexei Starovoitov } 65607be4c4aSAlexei Starovoitov 65707be4c4aSAlexei Starovoitov return ret; 65807be4c4aSAlexei Starovoitov } 65907be4c4aSAlexei Starovoitov 66007be4c4aSAlexei Starovoitov const struct bpf_func_proto bpf_copy_from_user_proto = { 66107be4c4aSAlexei Starovoitov .func = bpf_copy_from_user, 66207be4c4aSAlexei Starovoitov .gpl_only = false, 66307be4c4aSAlexei Starovoitov .ret_type = RET_INTEGER, 66407be4c4aSAlexei Starovoitov .arg1_type = ARG_PTR_TO_UNINIT_MEM, 66507be4c4aSAlexei Starovoitov .arg2_type = ARG_CONST_SIZE_OR_ZERO, 66607be4c4aSAlexei Starovoitov .arg3_type = ARG_ANYTHING, 66707be4c4aSAlexei Starovoitov }; 66807be4c4aSAlexei Starovoitov 669376040e4SKenny Yu BPF_CALL_5(bpf_copy_from_user_task, void *, dst, u32, size, 670376040e4SKenny Yu const void __user *, user_ptr, struct task_struct *, tsk, u64, flags) 671376040e4SKenny Yu { 672376040e4SKenny Yu int ret; 673376040e4SKenny Yu 674376040e4SKenny Yu /* flags is not used yet */ 675376040e4SKenny Yu if (unlikely(flags)) 676376040e4SKenny Yu return -EINVAL; 677376040e4SKenny Yu 678376040e4SKenny Yu if (unlikely(!size)) 679376040e4SKenny Yu return 0; 680376040e4SKenny Yu 681376040e4SKenny Yu ret = access_process_vm(tsk, (unsigned long)user_ptr, dst, size, 0); 682376040e4SKenny Yu if (ret == size) 683376040e4SKenny Yu return 0; 684376040e4SKenny Yu 685376040e4SKenny Yu memset(dst, 0, size); 686376040e4SKenny Yu /* Return -EFAULT for partial read */ 687376040e4SKenny Yu return ret < 0 ? ret : -EFAULT; 688376040e4SKenny Yu } 689376040e4SKenny Yu 690376040e4SKenny Yu const struct bpf_func_proto bpf_copy_from_user_task_proto = { 691376040e4SKenny Yu .func = bpf_copy_from_user_task, 6920407a65fSKenta Tada .gpl_only = true, 693376040e4SKenny Yu .ret_type = RET_INTEGER, 694376040e4SKenny Yu .arg1_type = ARG_PTR_TO_UNINIT_MEM, 695376040e4SKenny Yu .arg2_type = ARG_CONST_SIZE_OR_ZERO, 696376040e4SKenny Yu .arg3_type = ARG_ANYTHING, 697376040e4SKenny Yu .arg4_type = ARG_PTR_TO_BTF_ID, 698376040e4SKenny Yu .arg4_btf_id = &btf_tracing_ids[BTF_TRACING_TYPE_TASK], 699376040e4SKenny Yu .arg5_type = ARG_ANYTHING 700376040e4SKenny Yu }; 701376040e4SKenny Yu 702eaa6bcb7SHao Luo BPF_CALL_2(bpf_per_cpu_ptr, const void *, ptr, u32, cpu) 703eaa6bcb7SHao Luo { 704eaa6bcb7SHao Luo if (cpu >= nr_cpu_ids) 705eaa6bcb7SHao Luo return (unsigned long)NULL; 706eaa6bcb7SHao Luo 707eaa6bcb7SHao Luo return (unsigned long)per_cpu_ptr((const void __percpu *)ptr, cpu); 708eaa6bcb7SHao Luo } 709eaa6bcb7SHao Luo 710eaa6bcb7SHao Luo const struct bpf_func_proto bpf_per_cpu_ptr_proto = { 711eaa6bcb7SHao Luo .func = bpf_per_cpu_ptr, 712eaa6bcb7SHao Luo .gpl_only = false, 71334d3a78cSHao Luo .ret_type = RET_PTR_TO_MEM_OR_BTF_ID | PTR_MAYBE_NULL | MEM_RDONLY, 714eaa6bcb7SHao Luo .arg1_type = ARG_PTR_TO_PERCPU_BTF_ID, 715eaa6bcb7SHao Luo .arg2_type = ARG_ANYTHING, 716eaa6bcb7SHao Luo }; 717eaa6bcb7SHao Luo 71863d9b80dSHao Luo BPF_CALL_1(bpf_this_cpu_ptr, const void *, percpu_ptr) 71963d9b80dSHao Luo { 72063d9b80dSHao Luo return (unsigned long)this_cpu_ptr((const void __percpu *)percpu_ptr); 72163d9b80dSHao Luo } 72263d9b80dSHao Luo 72363d9b80dSHao Luo const struct bpf_func_proto bpf_this_cpu_ptr_proto = { 72463d9b80dSHao Luo .func = bpf_this_cpu_ptr, 72563d9b80dSHao Luo .gpl_only = false, 72634d3a78cSHao Luo .ret_type = RET_PTR_TO_MEM_OR_BTF_ID | MEM_RDONLY, 72763d9b80dSHao Luo .arg1_type = ARG_PTR_TO_PERCPU_BTF_ID, 72863d9b80dSHao Luo }; 72963d9b80dSHao Luo 730d9c9e4dbSFlorent Revest static int bpf_trace_copy_string(char *buf, void *unsafe_ptr, char fmt_ptype, 731d9c9e4dbSFlorent Revest size_t bufsz) 732d9c9e4dbSFlorent Revest { 733d9c9e4dbSFlorent Revest void __user *user_ptr = (__force void __user *)unsafe_ptr; 734d9c9e4dbSFlorent Revest 735d9c9e4dbSFlorent Revest buf[0] = 0; 736d9c9e4dbSFlorent Revest 737d9c9e4dbSFlorent Revest switch (fmt_ptype) { 738d9c9e4dbSFlorent Revest case 's': 739d9c9e4dbSFlorent Revest #ifdef CONFIG_ARCH_HAS_NON_OVERLAPPING_ADDRESS_SPACE 740d9c9e4dbSFlorent Revest if ((unsigned long)unsafe_ptr < TASK_SIZE) 741d9c9e4dbSFlorent Revest return strncpy_from_user_nofault(buf, user_ptr, bufsz); 742d9c9e4dbSFlorent Revest fallthrough; 743d9c9e4dbSFlorent Revest #endif 744d9c9e4dbSFlorent Revest case 'k': 745d9c9e4dbSFlorent Revest return strncpy_from_kernel_nofault(buf, unsafe_ptr, bufsz); 746d9c9e4dbSFlorent Revest case 'u': 747d9c9e4dbSFlorent Revest return strncpy_from_user_nofault(buf, user_ptr, bufsz); 748d9c9e4dbSFlorent Revest } 749d9c9e4dbSFlorent Revest 750d9c9e4dbSFlorent Revest return -EINVAL; 751d9c9e4dbSFlorent Revest } 752d9c9e4dbSFlorent Revest 7538afcc19fSFlorent Revest /* Per-cpu temp buffers used by printf-like helpers to store the bprintf binary 7548afcc19fSFlorent Revest * arguments representation. 755d9c9e4dbSFlorent Revest */ 7568afcc19fSFlorent Revest #define MAX_BPRINTF_BUF_LEN 512 757d9c9e4dbSFlorent Revest 758e2d5b2bbSFlorent Revest /* Support executing three nested bprintf helper calls on a given CPU */ 7590af02eb2SFlorent Revest #define MAX_BPRINTF_NEST_LEVEL 3 760e2d5b2bbSFlorent Revest struct bpf_bprintf_buffers { 7610af02eb2SFlorent Revest char tmp_bufs[MAX_BPRINTF_NEST_LEVEL][MAX_BPRINTF_BUF_LEN]; 762d9c9e4dbSFlorent Revest }; 763e2d5b2bbSFlorent Revest static DEFINE_PER_CPU(struct bpf_bprintf_buffers, bpf_bprintf_bufs); 764e2d5b2bbSFlorent Revest static DEFINE_PER_CPU(int, bpf_bprintf_nest_level); 765d9c9e4dbSFlorent Revest 766d9c9e4dbSFlorent Revest static int try_get_fmt_tmp_buf(char **tmp_buf) 767d9c9e4dbSFlorent Revest { 768e2d5b2bbSFlorent Revest struct bpf_bprintf_buffers *bufs; 769e2d5b2bbSFlorent Revest int nest_level; 770d9c9e4dbSFlorent Revest 771d9c9e4dbSFlorent Revest preempt_disable(); 772e2d5b2bbSFlorent Revest nest_level = this_cpu_inc_return(bpf_bprintf_nest_level); 7730af02eb2SFlorent Revest if (WARN_ON_ONCE(nest_level > MAX_BPRINTF_NEST_LEVEL)) { 774e2d5b2bbSFlorent Revest this_cpu_dec(bpf_bprintf_nest_level); 775d9c9e4dbSFlorent Revest preempt_enable(); 776d9c9e4dbSFlorent Revest return -EBUSY; 777d9c9e4dbSFlorent Revest } 778e2d5b2bbSFlorent Revest bufs = this_cpu_ptr(&bpf_bprintf_bufs); 779e2d5b2bbSFlorent Revest *tmp_buf = bufs->tmp_bufs[nest_level - 1]; 780d9c9e4dbSFlorent Revest 781d9c9e4dbSFlorent Revest return 0; 782d9c9e4dbSFlorent Revest } 783d9c9e4dbSFlorent Revest 78448cac3f4SFlorent Revest void bpf_bprintf_cleanup(void) 785d9c9e4dbSFlorent Revest { 786e2d5b2bbSFlorent Revest if (this_cpu_read(bpf_bprintf_nest_level)) { 787e2d5b2bbSFlorent Revest this_cpu_dec(bpf_bprintf_nest_level); 788d9c9e4dbSFlorent Revest preempt_enable(); 789d9c9e4dbSFlorent Revest } 790d9c9e4dbSFlorent Revest } 791d9c9e4dbSFlorent Revest 792d9c9e4dbSFlorent Revest /* 79348cac3f4SFlorent Revest * bpf_bprintf_prepare - Generic pass on format strings for bprintf-like helpers 794d9c9e4dbSFlorent Revest * 795d9c9e4dbSFlorent Revest * Returns a negative value if fmt is an invalid format string or 0 otherwise. 796d9c9e4dbSFlorent Revest * 797d9c9e4dbSFlorent Revest * This can be used in two ways: 79848cac3f4SFlorent Revest * - Format string verification only: when bin_args is NULL 799d9c9e4dbSFlorent Revest * - Arguments preparation: in addition to the above verification, it writes in 80048cac3f4SFlorent Revest * bin_args a binary representation of arguments usable by bstr_printf where 80148cac3f4SFlorent Revest * pointers from BPF have been sanitized. 802d9c9e4dbSFlorent Revest * 803d9c9e4dbSFlorent Revest * In argument preparation mode, if 0 is returned, safe temporary buffers are 80448cac3f4SFlorent Revest * allocated and bpf_bprintf_cleanup should be called to free them after use. 805d9c9e4dbSFlorent Revest */ 80648cac3f4SFlorent Revest int bpf_bprintf_prepare(char *fmt, u32 fmt_size, const u64 *raw_args, 80748cac3f4SFlorent Revest u32 **bin_args, u32 num_args) 808d9c9e4dbSFlorent Revest { 80948cac3f4SFlorent Revest char *unsafe_ptr = NULL, *tmp_buf = NULL, *tmp_buf_end, *fmt_end; 81048cac3f4SFlorent Revest size_t sizeof_cur_arg, sizeof_cur_ip; 81148cac3f4SFlorent Revest int err, i, num_spec = 0; 812d9c9e4dbSFlorent Revest u64 cur_arg; 81348cac3f4SFlorent Revest char fmt_ptype, cur_ip[16], ip_spec[] = "%pXX"; 814d9c9e4dbSFlorent Revest 815d9c9e4dbSFlorent Revest fmt_end = strnchr(fmt, fmt_size, 0); 816d9c9e4dbSFlorent Revest if (!fmt_end) 817d9c9e4dbSFlorent Revest return -EINVAL; 818d9c9e4dbSFlorent Revest fmt_size = fmt_end - fmt; 819d9c9e4dbSFlorent Revest 82048cac3f4SFlorent Revest if (bin_args) { 82148cac3f4SFlorent Revest if (num_args && try_get_fmt_tmp_buf(&tmp_buf)) 82248cac3f4SFlorent Revest return -EBUSY; 82348cac3f4SFlorent Revest 8248afcc19fSFlorent Revest tmp_buf_end = tmp_buf + MAX_BPRINTF_BUF_LEN; 82548cac3f4SFlorent Revest *bin_args = (u32 *)tmp_buf; 82648cac3f4SFlorent Revest } 82748cac3f4SFlorent Revest 828d9c9e4dbSFlorent Revest for (i = 0; i < fmt_size; i++) { 829d9c9e4dbSFlorent Revest if ((!isprint(fmt[i]) && !isspace(fmt[i])) || !isascii(fmt[i])) { 830d9c9e4dbSFlorent Revest err = -EINVAL; 83148cac3f4SFlorent Revest goto out; 832d9c9e4dbSFlorent Revest } 833d9c9e4dbSFlorent Revest 834d9c9e4dbSFlorent Revest if (fmt[i] != '%') 835d9c9e4dbSFlorent Revest continue; 836d9c9e4dbSFlorent Revest 837d9c9e4dbSFlorent Revest if (fmt[i + 1] == '%') { 838d9c9e4dbSFlorent Revest i++; 839d9c9e4dbSFlorent Revest continue; 840d9c9e4dbSFlorent Revest } 841d9c9e4dbSFlorent Revest 842d9c9e4dbSFlorent Revest if (num_spec >= num_args) { 843d9c9e4dbSFlorent Revest err = -EINVAL; 84448cac3f4SFlorent Revest goto out; 845d9c9e4dbSFlorent Revest } 846d9c9e4dbSFlorent Revest 847d9c9e4dbSFlorent Revest /* The string is zero-terminated so if fmt[i] != 0, we can 848d9c9e4dbSFlorent Revest * always access fmt[i + 1], in the worst case it will be a 0 849d9c9e4dbSFlorent Revest */ 850d9c9e4dbSFlorent Revest i++; 851d9c9e4dbSFlorent Revest 852d9c9e4dbSFlorent Revest /* skip optional "[0 +-][num]" width formatting field */ 853d9c9e4dbSFlorent Revest while (fmt[i] == '0' || fmt[i] == '+' || fmt[i] == '-' || 854d9c9e4dbSFlorent Revest fmt[i] == ' ') 855d9c9e4dbSFlorent Revest i++; 856d9c9e4dbSFlorent Revest if (fmt[i] >= '1' && fmt[i] <= '9') { 857d9c9e4dbSFlorent Revest i++; 858d9c9e4dbSFlorent Revest while (fmt[i] >= '0' && fmt[i] <= '9') 859d9c9e4dbSFlorent Revest i++; 860d9c9e4dbSFlorent Revest } 861d9c9e4dbSFlorent Revest 862d9c9e4dbSFlorent Revest if (fmt[i] == 'p') { 86348cac3f4SFlorent Revest sizeof_cur_arg = sizeof(long); 864d9c9e4dbSFlorent Revest 865d9c9e4dbSFlorent Revest if ((fmt[i + 1] == 'k' || fmt[i + 1] == 'u') && 866d9c9e4dbSFlorent Revest fmt[i + 2] == 's') { 867d9c9e4dbSFlorent Revest fmt_ptype = fmt[i + 1]; 868d9c9e4dbSFlorent Revest i += 2; 869d9c9e4dbSFlorent Revest goto fmt_str; 870d9c9e4dbSFlorent Revest } 871d9c9e4dbSFlorent Revest 872d9c9e4dbSFlorent Revest if (fmt[i + 1] == 0 || isspace(fmt[i + 1]) || 873d9c9e4dbSFlorent Revest ispunct(fmt[i + 1]) || fmt[i + 1] == 'K' || 87448cac3f4SFlorent Revest fmt[i + 1] == 'x' || fmt[i + 1] == 's' || 87548cac3f4SFlorent Revest fmt[i + 1] == 'S') { 876d9c9e4dbSFlorent Revest /* just kernel pointers */ 87748cac3f4SFlorent Revest if (tmp_buf) 878d9c9e4dbSFlorent Revest cur_arg = raw_args[num_spec]; 87948cac3f4SFlorent Revest i++; 88048cac3f4SFlorent Revest goto nocopy_fmt; 88148cac3f4SFlorent Revest } 88248cac3f4SFlorent Revest 88348cac3f4SFlorent Revest if (fmt[i + 1] == 'B') { 88448cac3f4SFlorent Revest if (tmp_buf) { 88548cac3f4SFlorent Revest err = snprintf(tmp_buf, 88648cac3f4SFlorent Revest (tmp_buf_end - tmp_buf), 88748cac3f4SFlorent Revest "%pB", 88848cac3f4SFlorent Revest (void *)(long)raw_args[num_spec]); 88948cac3f4SFlorent Revest tmp_buf += (err + 1); 89048cac3f4SFlorent Revest } 89148cac3f4SFlorent Revest 89248cac3f4SFlorent Revest i++; 89348cac3f4SFlorent Revest num_spec++; 89448cac3f4SFlorent Revest continue; 895d9c9e4dbSFlorent Revest } 896d9c9e4dbSFlorent Revest 897d9c9e4dbSFlorent Revest /* only support "%pI4", "%pi4", "%pI6" and "%pi6". */ 898d9c9e4dbSFlorent Revest if ((fmt[i + 1] != 'i' && fmt[i + 1] != 'I') || 899d9c9e4dbSFlorent Revest (fmt[i + 2] != '4' && fmt[i + 2] != '6')) { 900d9c9e4dbSFlorent Revest err = -EINVAL; 901d9c9e4dbSFlorent Revest goto out; 902d9c9e4dbSFlorent Revest } 903d9c9e4dbSFlorent Revest 90448cac3f4SFlorent Revest i += 2; 90548cac3f4SFlorent Revest if (!tmp_buf) 90648cac3f4SFlorent Revest goto nocopy_fmt; 90748cac3f4SFlorent Revest 90848cac3f4SFlorent Revest sizeof_cur_ip = (fmt[i] == '4') ? 4 : 16; 90948cac3f4SFlorent Revest if (tmp_buf_end - tmp_buf < sizeof_cur_ip) { 910d9c9e4dbSFlorent Revest err = -ENOSPC; 91148cac3f4SFlorent Revest goto out; 912d9c9e4dbSFlorent Revest } 913d9c9e4dbSFlorent Revest 914d9c9e4dbSFlorent Revest unsafe_ptr = (char *)(long)raw_args[num_spec]; 91548cac3f4SFlorent Revest err = copy_from_kernel_nofault(cur_ip, unsafe_ptr, 91648cac3f4SFlorent Revest sizeof_cur_ip); 917d9c9e4dbSFlorent Revest if (err < 0) 91848cac3f4SFlorent Revest memset(cur_ip, 0, sizeof_cur_ip); 919d9c9e4dbSFlorent Revest 92048cac3f4SFlorent Revest /* hack: bstr_printf expects IP addresses to be 92148cac3f4SFlorent Revest * pre-formatted as strings, ironically, the easiest way 92248cac3f4SFlorent Revest * to do that is to call snprintf. 92348cac3f4SFlorent Revest */ 92448cac3f4SFlorent Revest ip_spec[2] = fmt[i - 1]; 92548cac3f4SFlorent Revest ip_spec[3] = fmt[i]; 92648cac3f4SFlorent Revest err = snprintf(tmp_buf, tmp_buf_end - tmp_buf, 92748cac3f4SFlorent Revest ip_spec, &cur_ip); 92848cac3f4SFlorent Revest 92948cac3f4SFlorent Revest tmp_buf += err + 1; 93048cac3f4SFlorent Revest num_spec++; 93148cac3f4SFlorent Revest 93248cac3f4SFlorent Revest continue; 933d9c9e4dbSFlorent Revest } else if (fmt[i] == 's') { 934d9c9e4dbSFlorent Revest fmt_ptype = fmt[i]; 935d9c9e4dbSFlorent Revest fmt_str: 936d9c9e4dbSFlorent Revest if (fmt[i + 1] != 0 && 937d9c9e4dbSFlorent Revest !isspace(fmt[i + 1]) && 938d9c9e4dbSFlorent Revest !ispunct(fmt[i + 1])) { 939d9c9e4dbSFlorent Revest err = -EINVAL; 940d9c9e4dbSFlorent Revest goto out; 941d9c9e4dbSFlorent Revest } 942d9c9e4dbSFlorent Revest 94348cac3f4SFlorent Revest if (!tmp_buf) 94448cac3f4SFlorent Revest goto nocopy_fmt; 94548cac3f4SFlorent Revest 94648cac3f4SFlorent Revest if (tmp_buf_end == tmp_buf) { 947d9c9e4dbSFlorent Revest err = -ENOSPC; 94848cac3f4SFlorent Revest goto out; 949d9c9e4dbSFlorent Revest } 950d9c9e4dbSFlorent Revest 951d9c9e4dbSFlorent Revest unsafe_ptr = (char *)(long)raw_args[num_spec]; 952d9c9e4dbSFlorent Revest err = bpf_trace_copy_string(tmp_buf, unsafe_ptr, 95348cac3f4SFlorent Revest fmt_ptype, 95448cac3f4SFlorent Revest tmp_buf_end - tmp_buf); 955d9c9e4dbSFlorent Revest if (err < 0) { 956d9c9e4dbSFlorent Revest tmp_buf[0] = '\0'; 957d9c9e4dbSFlorent Revest err = 1; 958d9c9e4dbSFlorent Revest } 959d9c9e4dbSFlorent Revest 960d9c9e4dbSFlorent Revest tmp_buf += err; 96148cac3f4SFlorent Revest num_spec++; 962d9c9e4dbSFlorent Revest 96348cac3f4SFlorent Revest continue; 9643478cfcfSKuniyuki Iwashima } else if (fmt[i] == 'c') { 9653478cfcfSKuniyuki Iwashima if (!tmp_buf) 9663478cfcfSKuniyuki Iwashima goto nocopy_fmt; 9673478cfcfSKuniyuki Iwashima 9683478cfcfSKuniyuki Iwashima if (tmp_buf_end == tmp_buf) { 9693478cfcfSKuniyuki Iwashima err = -ENOSPC; 9703478cfcfSKuniyuki Iwashima goto out; 9713478cfcfSKuniyuki Iwashima } 9723478cfcfSKuniyuki Iwashima 9733478cfcfSKuniyuki Iwashima *tmp_buf = raw_args[num_spec]; 9743478cfcfSKuniyuki Iwashima tmp_buf++; 9753478cfcfSKuniyuki Iwashima num_spec++; 9763478cfcfSKuniyuki Iwashima 9773478cfcfSKuniyuki Iwashima continue; 978d9c9e4dbSFlorent Revest } 979d9c9e4dbSFlorent Revest 98048cac3f4SFlorent Revest sizeof_cur_arg = sizeof(int); 981d9c9e4dbSFlorent Revest 982d9c9e4dbSFlorent Revest if (fmt[i] == 'l') { 98348cac3f4SFlorent Revest sizeof_cur_arg = sizeof(long); 984d9c9e4dbSFlorent Revest i++; 985d9c9e4dbSFlorent Revest } 986d9c9e4dbSFlorent Revest if (fmt[i] == 'l') { 98748cac3f4SFlorent Revest sizeof_cur_arg = sizeof(long long); 988d9c9e4dbSFlorent Revest i++; 989d9c9e4dbSFlorent Revest } 990d9c9e4dbSFlorent Revest 991d9c9e4dbSFlorent Revest if (fmt[i] != 'i' && fmt[i] != 'd' && fmt[i] != 'u' && 992d9c9e4dbSFlorent Revest fmt[i] != 'x' && fmt[i] != 'X') { 993d9c9e4dbSFlorent Revest err = -EINVAL; 99448cac3f4SFlorent Revest goto out; 995d9c9e4dbSFlorent Revest } 996d9c9e4dbSFlorent Revest 99748cac3f4SFlorent Revest if (tmp_buf) 998d9c9e4dbSFlorent Revest cur_arg = raw_args[num_spec]; 99948cac3f4SFlorent Revest nocopy_fmt: 100048cac3f4SFlorent Revest if (tmp_buf) { 100148cac3f4SFlorent Revest tmp_buf = PTR_ALIGN(tmp_buf, sizeof(u32)); 100248cac3f4SFlorent Revest if (tmp_buf_end - tmp_buf < sizeof_cur_arg) { 100348cac3f4SFlorent Revest err = -ENOSPC; 100448cac3f4SFlorent Revest goto out; 100548cac3f4SFlorent Revest } 100648cac3f4SFlorent Revest 100748cac3f4SFlorent Revest if (sizeof_cur_arg == 8) { 100848cac3f4SFlorent Revest *(u32 *)tmp_buf = *(u32 *)&cur_arg; 100948cac3f4SFlorent Revest *(u32 *)(tmp_buf + 4) = *((u32 *)&cur_arg + 1); 101048cac3f4SFlorent Revest } else { 101148cac3f4SFlorent Revest *(u32 *)tmp_buf = (u32)(long)cur_arg; 101248cac3f4SFlorent Revest } 101348cac3f4SFlorent Revest tmp_buf += sizeof_cur_arg; 1014d9c9e4dbSFlorent Revest } 1015d9c9e4dbSFlorent Revest num_spec++; 1016d9c9e4dbSFlorent Revest } 1017d9c9e4dbSFlorent Revest 1018d9c9e4dbSFlorent Revest err = 0; 1019d9c9e4dbSFlorent Revest out: 102048cac3f4SFlorent Revest if (err) 102148cac3f4SFlorent Revest bpf_bprintf_cleanup(); 1022d9c9e4dbSFlorent Revest return err; 1023d9c9e4dbSFlorent Revest } 1024d9c9e4dbSFlorent Revest 10257b15523aSFlorent Revest BPF_CALL_5(bpf_snprintf, char *, str, u32, str_size, char *, fmt, 10267b15523aSFlorent Revest const void *, data, u32, data_len) 10277b15523aSFlorent Revest { 10287b15523aSFlorent Revest int err, num_args; 102948cac3f4SFlorent Revest u32 *bin_args; 10307b15523aSFlorent Revest 1031335ff499SDave Marchevsky if (data_len % 8 || data_len > MAX_BPRINTF_VARARGS * 8 || 10327b15523aSFlorent Revest (data_len && !data)) 10337b15523aSFlorent Revest return -EINVAL; 10347b15523aSFlorent Revest num_args = data_len / 8; 10357b15523aSFlorent Revest 10367b15523aSFlorent Revest /* ARG_PTR_TO_CONST_STR guarantees that fmt is zero-terminated so we 10377b15523aSFlorent Revest * can safely give an unbounded size. 10387b15523aSFlorent Revest */ 103948cac3f4SFlorent Revest err = bpf_bprintf_prepare(fmt, UINT_MAX, data, &bin_args, num_args); 10407b15523aSFlorent Revest if (err < 0) 10417b15523aSFlorent Revest return err; 10427b15523aSFlorent Revest 104348cac3f4SFlorent Revest err = bstr_printf(str, str_size, fmt, bin_args); 10447b15523aSFlorent Revest 104548cac3f4SFlorent Revest bpf_bprintf_cleanup(); 10467b15523aSFlorent Revest 10477b15523aSFlorent Revest return err + 1; 10487b15523aSFlorent Revest } 10497b15523aSFlorent Revest 10507b15523aSFlorent Revest const struct bpf_func_proto bpf_snprintf_proto = { 10517b15523aSFlorent Revest .func = bpf_snprintf, 10527b15523aSFlorent Revest .gpl_only = true, 10537b15523aSFlorent Revest .ret_type = RET_INTEGER, 10547b15523aSFlorent Revest .arg1_type = ARG_PTR_TO_MEM_OR_NULL, 10557b15523aSFlorent Revest .arg2_type = ARG_CONST_SIZE_OR_ZERO, 10567b15523aSFlorent Revest .arg3_type = ARG_PTR_TO_CONST_STR, 1057216e3cd2SHao Luo .arg4_type = ARG_PTR_TO_MEM | PTR_MAYBE_NULL | MEM_RDONLY, 10587b15523aSFlorent Revest .arg5_type = ARG_CONST_SIZE_OR_ZERO, 10597b15523aSFlorent Revest }; 10607b15523aSFlorent Revest 1061b00628b1SAlexei Starovoitov /* BPF map elements can contain 'struct bpf_timer'. 1062b00628b1SAlexei Starovoitov * Such map owns all of its BPF timers. 1063b00628b1SAlexei Starovoitov * 'struct bpf_timer' is allocated as part of map element allocation 1064b00628b1SAlexei Starovoitov * and it's zero initialized. 1065b00628b1SAlexei Starovoitov * That space is used to keep 'struct bpf_timer_kern'. 1066b00628b1SAlexei Starovoitov * bpf_timer_init() allocates 'struct bpf_hrtimer', inits hrtimer, and 1067b00628b1SAlexei Starovoitov * remembers 'struct bpf_map *' pointer it's part of. 1068b00628b1SAlexei Starovoitov * bpf_timer_set_callback() increments prog refcnt and assign bpf callback_fn. 1069b00628b1SAlexei Starovoitov * bpf_timer_start() arms the timer. 1070b00628b1SAlexei Starovoitov * If user space reference to a map goes to zero at this point 1071b00628b1SAlexei Starovoitov * ops->map_release_uref callback is responsible for cancelling the timers, 1072b00628b1SAlexei Starovoitov * freeing their memory, and decrementing prog's refcnts. 1073b00628b1SAlexei Starovoitov * bpf_timer_cancel() cancels the timer and decrements prog's refcnt. 1074b00628b1SAlexei Starovoitov * Inner maps can contain bpf timers as well. ops->map_release_uref is 1075b00628b1SAlexei Starovoitov * freeing the timers when inner map is replaced or deleted by user space. 1076b00628b1SAlexei Starovoitov */ 1077b00628b1SAlexei Starovoitov struct bpf_hrtimer { 1078b00628b1SAlexei Starovoitov struct hrtimer timer; 1079b00628b1SAlexei Starovoitov struct bpf_map *map; 1080b00628b1SAlexei Starovoitov struct bpf_prog *prog; 1081b00628b1SAlexei Starovoitov void __rcu *callback_fn; 1082b00628b1SAlexei Starovoitov void *value; 1083b00628b1SAlexei Starovoitov }; 1084b00628b1SAlexei Starovoitov 1085b00628b1SAlexei Starovoitov /* the actual struct hidden inside uapi struct bpf_timer */ 1086b00628b1SAlexei Starovoitov struct bpf_timer_kern { 1087b00628b1SAlexei Starovoitov struct bpf_hrtimer *timer; 1088b00628b1SAlexei Starovoitov /* bpf_spin_lock is used here instead of spinlock_t to make 1089c561d110STom Rix * sure that it always fits into space reserved by struct bpf_timer 1090b00628b1SAlexei Starovoitov * regardless of LOCKDEP and spinlock debug flags. 1091b00628b1SAlexei Starovoitov */ 1092b00628b1SAlexei Starovoitov struct bpf_spin_lock lock; 1093b00628b1SAlexei Starovoitov } __attribute__((aligned(8))); 1094b00628b1SAlexei Starovoitov 1095b00628b1SAlexei Starovoitov static DEFINE_PER_CPU(struct bpf_hrtimer *, hrtimer_running); 1096b00628b1SAlexei Starovoitov 1097b00628b1SAlexei Starovoitov static enum hrtimer_restart bpf_timer_cb(struct hrtimer *hrtimer) 1098b00628b1SAlexei Starovoitov { 1099b00628b1SAlexei Starovoitov struct bpf_hrtimer *t = container_of(hrtimer, struct bpf_hrtimer, timer); 1100b00628b1SAlexei Starovoitov struct bpf_map *map = t->map; 1101b00628b1SAlexei Starovoitov void *value = t->value; 1102102acbacSKees Cook bpf_callback_t callback_fn; 1103b00628b1SAlexei Starovoitov void *key; 1104b00628b1SAlexei Starovoitov u32 idx; 1105b00628b1SAlexei Starovoitov 11063bd916eeSYonghong Song BTF_TYPE_EMIT(struct bpf_timer); 1107b00628b1SAlexei Starovoitov callback_fn = rcu_dereference_check(t->callback_fn, rcu_read_lock_bh_held()); 1108b00628b1SAlexei Starovoitov if (!callback_fn) 1109b00628b1SAlexei Starovoitov goto out; 1110b00628b1SAlexei Starovoitov 1111b00628b1SAlexei Starovoitov /* bpf_timer_cb() runs in hrtimer_run_softirq. It doesn't migrate and 1112b00628b1SAlexei Starovoitov * cannot be preempted by another bpf_timer_cb() on the same cpu. 1113b00628b1SAlexei Starovoitov * Remember the timer this callback is servicing to prevent 1114b00628b1SAlexei Starovoitov * deadlock if callback_fn() calls bpf_timer_cancel() or 1115b00628b1SAlexei Starovoitov * bpf_map_delete_elem() on the same timer. 1116b00628b1SAlexei Starovoitov */ 1117b00628b1SAlexei Starovoitov this_cpu_write(hrtimer_running, t); 1118b00628b1SAlexei Starovoitov if (map->map_type == BPF_MAP_TYPE_ARRAY) { 1119b00628b1SAlexei Starovoitov struct bpf_array *array = container_of(map, struct bpf_array, map); 1120b00628b1SAlexei Starovoitov 1121b00628b1SAlexei Starovoitov /* compute the key */ 1122b00628b1SAlexei Starovoitov idx = ((char *)value - array->value) / array->elem_size; 1123b00628b1SAlexei Starovoitov key = &idx; 1124b00628b1SAlexei Starovoitov } else { /* hash or lru */ 1125b00628b1SAlexei Starovoitov key = value - round_up(map->key_size, 8); 1126b00628b1SAlexei Starovoitov } 1127b00628b1SAlexei Starovoitov 1128102acbacSKees Cook callback_fn((u64)(long)map, (u64)(long)key, (u64)(long)value, 0, 0); 1129bfc6bb74SAlexei Starovoitov /* The verifier checked that return value is zero. */ 1130b00628b1SAlexei Starovoitov 1131b00628b1SAlexei Starovoitov this_cpu_write(hrtimer_running, NULL); 1132b00628b1SAlexei Starovoitov out: 1133b00628b1SAlexei Starovoitov return HRTIMER_NORESTART; 1134b00628b1SAlexei Starovoitov } 1135b00628b1SAlexei Starovoitov 1136b00628b1SAlexei Starovoitov BPF_CALL_3(bpf_timer_init, struct bpf_timer_kern *, timer, struct bpf_map *, map, 1137b00628b1SAlexei Starovoitov u64, flags) 1138b00628b1SAlexei Starovoitov { 1139b00628b1SAlexei Starovoitov clockid_t clockid = flags & (MAX_CLOCKS - 1); 1140b00628b1SAlexei Starovoitov struct bpf_hrtimer *t; 1141b00628b1SAlexei Starovoitov int ret = 0; 1142b00628b1SAlexei Starovoitov 1143b00628b1SAlexei Starovoitov BUILD_BUG_ON(MAX_CLOCKS != 16); 1144b00628b1SAlexei Starovoitov BUILD_BUG_ON(sizeof(struct bpf_timer_kern) > sizeof(struct bpf_timer)); 1145b00628b1SAlexei Starovoitov BUILD_BUG_ON(__alignof__(struct bpf_timer_kern) != __alignof__(struct bpf_timer)); 1146b00628b1SAlexei Starovoitov 1147b00628b1SAlexei Starovoitov if (in_nmi()) 1148b00628b1SAlexei Starovoitov return -EOPNOTSUPP; 1149b00628b1SAlexei Starovoitov 1150b00628b1SAlexei Starovoitov if (flags >= MAX_CLOCKS || 1151b00628b1SAlexei Starovoitov /* similar to timerfd except _ALARM variants are not supported */ 1152b00628b1SAlexei Starovoitov (clockid != CLOCK_MONOTONIC && 1153b00628b1SAlexei Starovoitov clockid != CLOCK_REALTIME && 1154b00628b1SAlexei Starovoitov clockid != CLOCK_BOOTTIME)) 1155b00628b1SAlexei Starovoitov return -EINVAL; 1156b00628b1SAlexei Starovoitov __bpf_spin_lock_irqsave(&timer->lock); 1157b00628b1SAlexei Starovoitov t = timer->timer; 1158b00628b1SAlexei Starovoitov if (t) { 1159b00628b1SAlexei Starovoitov ret = -EBUSY; 1160b00628b1SAlexei Starovoitov goto out; 1161b00628b1SAlexei Starovoitov } 1162b00628b1SAlexei Starovoitov if (!atomic64_read(&map->usercnt)) { 1163b00628b1SAlexei Starovoitov /* maps with timers must be either held by user space 1164b00628b1SAlexei Starovoitov * or pinned in bpffs. 1165b00628b1SAlexei Starovoitov */ 1166b00628b1SAlexei Starovoitov ret = -EPERM; 1167b00628b1SAlexei Starovoitov goto out; 1168b00628b1SAlexei Starovoitov } 1169b00628b1SAlexei Starovoitov /* allocate hrtimer via map_kmalloc to use memcg accounting */ 1170b00628b1SAlexei Starovoitov t = bpf_map_kmalloc_node(map, sizeof(*t), GFP_ATOMIC, map->numa_node); 1171b00628b1SAlexei Starovoitov if (!t) { 1172b00628b1SAlexei Starovoitov ret = -ENOMEM; 1173b00628b1SAlexei Starovoitov goto out; 1174b00628b1SAlexei Starovoitov } 1175db559117SKumar Kartikeya Dwivedi t->value = (void *)timer - map->record->timer_off; 1176b00628b1SAlexei Starovoitov t->map = map; 1177b00628b1SAlexei Starovoitov t->prog = NULL; 1178b00628b1SAlexei Starovoitov rcu_assign_pointer(t->callback_fn, NULL); 1179b00628b1SAlexei Starovoitov hrtimer_init(&t->timer, clockid, HRTIMER_MODE_REL_SOFT); 1180b00628b1SAlexei Starovoitov t->timer.function = bpf_timer_cb; 1181b00628b1SAlexei Starovoitov timer->timer = t; 1182b00628b1SAlexei Starovoitov out: 1183b00628b1SAlexei Starovoitov __bpf_spin_unlock_irqrestore(&timer->lock); 1184b00628b1SAlexei Starovoitov return ret; 1185b00628b1SAlexei Starovoitov } 1186b00628b1SAlexei Starovoitov 1187b00628b1SAlexei Starovoitov static const struct bpf_func_proto bpf_timer_init_proto = { 1188b00628b1SAlexei Starovoitov .func = bpf_timer_init, 1189b00628b1SAlexei Starovoitov .gpl_only = true, 1190b00628b1SAlexei Starovoitov .ret_type = RET_INTEGER, 1191b00628b1SAlexei Starovoitov .arg1_type = ARG_PTR_TO_TIMER, 1192b00628b1SAlexei Starovoitov .arg2_type = ARG_CONST_MAP_PTR, 1193b00628b1SAlexei Starovoitov .arg3_type = ARG_ANYTHING, 1194b00628b1SAlexei Starovoitov }; 1195b00628b1SAlexei Starovoitov 1196b00628b1SAlexei Starovoitov BPF_CALL_3(bpf_timer_set_callback, struct bpf_timer_kern *, timer, void *, callback_fn, 1197b00628b1SAlexei Starovoitov struct bpf_prog_aux *, aux) 1198b00628b1SAlexei Starovoitov { 1199b00628b1SAlexei Starovoitov struct bpf_prog *prev, *prog = aux->prog; 1200b00628b1SAlexei Starovoitov struct bpf_hrtimer *t; 1201b00628b1SAlexei Starovoitov int ret = 0; 1202b00628b1SAlexei Starovoitov 1203b00628b1SAlexei Starovoitov if (in_nmi()) 1204b00628b1SAlexei Starovoitov return -EOPNOTSUPP; 1205b00628b1SAlexei Starovoitov __bpf_spin_lock_irqsave(&timer->lock); 1206b00628b1SAlexei Starovoitov t = timer->timer; 1207b00628b1SAlexei Starovoitov if (!t) { 1208b00628b1SAlexei Starovoitov ret = -EINVAL; 1209b00628b1SAlexei Starovoitov goto out; 1210b00628b1SAlexei Starovoitov } 1211b00628b1SAlexei Starovoitov if (!atomic64_read(&t->map->usercnt)) { 1212b00628b1SAlexei Starovoitov /* maps with timers must be either held by user space 1213b00628b1SAlexei Starovoitov * or pinned in bpffs. Otherwise timer might still be 1214b00628b1SAlexei Starovoitov * running even when bpf prog is detached and user space 1215b00628b1SAlexei Starovoitov * is gone, since map_release_uref won't ever be called. 1216b00628b1SAlexei Starovoitov */ 1217b00628b1SAlexei Starovoitov ret = -EPERM; 1218b00628b1SAlexei Starovoitov goto out; 1219b00628b1SAlexei Starovoitov } 1220b00628b1SAlexei Starovoitov prev = t->prog; 1221b00628b1SAlexei Starovoitov if (prev != prog) { 1222b00628b1SAlexei Starovoitov /* Bump prog refcnt once. Every bpf_timer_set_callback() 1223b00628b1SAlexei Starovoitov * can pick different callback_fn-s within the same prog. 1224b00628b1SAlexei Starovoitov */ 1225b00628b1SAlexei Starovoitov prog = bpf_prog_inc_not_zero(prog); 1226b00628b1SAlexei Starovoitov if (IS_ERR(prog)) { 1227b00628b1SAlexei Starovoitov ret = PTR_ERR(prog); 1228b00628b1SAlexei Starovoitov goto out; 1229b00628b1SAlexei Starovoitov } 1230b00628b1SAlexei Starovoitov if (prev) 1231b00628b1SAlexei Starovoitov /* Drop prev prog refcnt when swapping with new prog */ 1232b00628b1SAlexei Starovoitov bpf_prog_put(prev); 1233b00628b1SAlexei Starovoitov t->prog = prog; 1234b00628b1SAlexei Starovoitov } 1235b00628b1SAlexei Starovoitov rcu_assign_pointer(t->callback_fn, callback_fn); 1236b00628b1SAlexei Starovoitov out: 1237b00628b1SAlexei Starovoitov __bpf_spin_unlock_irqrestore(&timer->lock); 1238b00628b1SAlexei Starovoitov return ret; 1239b00628b1SAlexei Starovoitov } 1240b00628b1SAlexei Starovoitov 1241b00628b1SAlexei Starovoitov static const struct bpf_func_proto bpf_timer_set_callback_proto = { 1242b00628b1SAlexei Starovoitov .func = bpf_timer_set_callback, 1243b00628b1SAlexei Starovoitov .gpl_only = true, 1244b00628b1SAlexei Starovoitov .ret_type = RET_INTEGER, 1245b00628b1SAlexei Starovoitov .arg1_type = ARG_PTR_TO_TIMER, 1246b00628b1SAlexei Starovoitov .arg2_type = ARG_PTR_TO_FUNC, 1247b00628b1SAlexei Starovoitov }; 1248b00628b1SAlexei Starovoitov 1249b00628b1SAlexei Starovoitov BPF_CALL_3(bpf_timer_start, struct bpf_timer_kern *, timer, u64, nsecs, u64, flags) 1250b00628b1SAlexei Starovoitov { 1251b00628b1SAlexei Starovoitov struct bpf_hrtimer *t; 1252b00628b1SAlexei Starovoitov int ret = 0; 1253b00628b1SAlexei Starovoitov 1254b00628b1SAlexei Starovoitov if (in_nmi()) 1255b00628b1SAlexei Starovoitov return -EOPNOTSUPP; 1256b00628b1SAlexei Starovoitov if (flags) 1257b00628b1SAlexei Starovoitov return -EINVAL; 1258b00628b1SAlexei Starovoitov __bpf_spin_lock_irqsave(&timer->lock); 1259b00628b1SAlexei Starovoitov t = timer->timer; 1260b00628b1SAlexei Starovoitov if (!t || !t->prog) { 1261b00628b1SAlexei Starovoitov ret = -EINVAL; 1262b00628b1SAlexei Starovoitov goto out; 1263b00628b1SAlexei Starovoitov } 1264b00628b1SAlexei Starovoitov hrtimer_start(&t->timer, ns_to_ktime(nsecs), HRTIMER_MODE_REL_SOFT); 1265b00628b1SAlexei Starovoitov out: 1266b00628b1SAlexei Starovoitov __bpf_spin_unlock_irqrestore(&timer->lock); 1267b00628b1SAlexei Starovoitov return ret; 1268b00628b1SAlexei Starovoitov } 1269b00628b1SAlexei Starovoitov 1270b00628b1SAlexei Starovoitov static const struct bpf_func_proto bpf_timer_start_proto = { 1271b00628b1SAlexei Starovoitov .func = bpf_timer_start, 1272b00628b1SAlexei Starovoitov .gpl_only = true, 1273b00628b1SAlexei Starovoitov .ret_type = RET_INTEGER, 1274b00628b1SAlexei Starovoitov .arg1_type = ARG_PTR_TO_TIMER, 1275b00628b1SAlexei Starovoitov .arg2_type = ARG_ANYTHING, 1276b00628b1SAlexei Starovoitov .arg3_type = ARG_ANYTHING, 1277b00628b1SAlexei Starovoitov }; 1278b00628b1SAlexei Starovoitov 1279b00628b1SAlexei Starovoitov static void drop_prog_refcnt(struct bpf_hrtimer *t) 1280b00628b1SAlexei Starovoitov { 1281b00628b1SAlexei Starovoitov struct bpf_prog *prog = t->prog; 1282b00628b1SAlexei Starovoitov 1283b00628b1SAlexei Starovoitov if (prog) { 1284b00628b1SAlexei Starovoitov bpf_prog_put(prog); 1285b00628b1SAlexei Starovoitov t->prog = NULL; 1286b00628b1SAlexei Starovoitov rcu_assign_pointer(t->callback_fn, NULL); 1287b00628b1SAlexei Starovoitov } 1288b00628b1SAlexei Starovoitov } 1289b00628b1SAlexei Starovoitov 1290b00628b1SAlexei Starovoitov BPF_CALL_1(bpf_timer_cancel, struct bpf_timer_kern *, timer) 1291b00628b1SAlexei Starovoitov { 1292b00628b1SAlexei Starovoitov struct bpf_hrtimer *t; 1293b00628b1SAlexei Starovoitov int ret = 0; 1294b00628b1SAlexei Starovoitov 1295b00628b1SAlexei Starovoitov if (in_nmi()) 1296b00628b1SAlexei Starovoitov return -EOPNOTSUPP; 1297b00628b1SAlexei Starovoitov __bpf_spin_lock_irqsave(&timer->lock); 1298b00628b1SAlexei Starovoitov t = timer->timer; 1299b00628b1SAlexei Starovoitov if (!t) { 1300b00628b1SAlexei Starovoitov ret = -EINVAL; 1301b00628b1SAlexei Starovoitov goto out; 1302b00628b1SAlexei Starovoitov } 1303b00628b1SAlexei Starovoitov if (this_cpu_read(hrtimer_running) == t) { 1304b00628b1SAlexei Starovoitov /* If bpf callback_fn is trying to bpf_timer_cancel() 1305b00628b1SAlexei Starovoitov * its own timer the hrtimer_cancel() will deadlock 1306b00628b1SAlexei Starovoitov * since it waits for callback_fn to finish 1307b00628b1SAlexei Starovoitov */ 1308b00628b1SAlexei Starovoitov ret = -EDEADLK; 1309b00628b1SAlexei Starovoitov goto out; 1310b00628b1SAlexei Starovoitov } 1311b00628b1SAlexei Starovoitov drop_prog_refcnt(t); 1312b00628b1SAlexei Starovoitov out: 1313b00628b1SAlexei Starovoitov __bpf_spin_unlock_irqrestore(&timer->lock); 1314b00628b1SAlexei Starovoitov /* Cancel the timer and wait for associated callback to finish 1315b00628b1SAlexei Starovoitov * if it was running. 1316b00628b1SAlexei Starovoitov */ 1317b00628b1SAlexei Starovoitov ret = ret ?: hrtimer_cancel(&t->timer); 1318b00628b1SAlexei Starovoitov return ret; 1319b00628b1SAlexei Starovoitov } 1320b00628b1SAlexei Starovoitov 1321b00628b1SAlexei Starovoitov static const struct bpf_func_proto bpf_timer_cancel_proto = { 1322b00628b1SAlexei Starovoitov .func = bpf_timer_cancel, 1323b00628b1SAlexei Starovoitov .gpl_only = true, 1324b00628b1SAlexei Starovoitov .ret_type = RET_INTEGER, 1325b00628b1SAlexei Starovoitov .arg1_type = ARG_PTR_TO_TIMER, 1326b00628b1SAlexei Starovoitov }; 1327b00628b1SAlexei Starovoitov 1328b00628b1SAlexei Starovoitov /* This function is called by map_delete/update_elem for individual element and 1329b00628b1SAlexei Starovoitov * by ops->map_release_uref when the user space reference to a map reaches zero. 1330b00628b1SAlexei Starovoitov */ 1331b00628b1SAlexei Starovoitov void bpf_timer_cancel_and_free(void *val) 1332b00628b1SAlexei Starovoitov { 1333b00628b1SAlexei Starovoitov struct bpf_timer_kern *timer = val; 1334b00628b1SAlexei Starovoitov struct bpf_hrtimer *t; 1335b00628b1SAlexei Starovoitov 1336b00628b1SAlexei Starovoitov /* Performance optimization: read timer->timer without lock first. */ 1337b00628b1SAlexei Starovoitov if (!READ_ONCE(timer->timer)) 1338b00628b1SAlexei Starovoitov return; 1339b00628b1SAlexei Starovoitov 1340b00628b1SAlexei Starovoitov __bpf_spin_lock_irqsave(&timer->lock); 1341b00628b1SAlexei Starovoitov /* re-read it under lock */ 1342b00628b1SAlexei Starovoitov t = timer->timer; 1343b00628b1SAlexei Starovoitov if (!t) 1344b00628b1SAlexei Starovoitov goto out; 1345b00628b1SAlexei Starovoitov drop_prog_refcnt(t); 1346b00628b1SAlexei Starovoitov /* The subsequent bpf_timer_start/cancel() helpers won't be able to use 1347b00628b1SAlexei Starovoitov * this timer, since it won't be initialized. 1348b00628b1SAlexei Starovoitov */ 1349b00628b1SAlexei Starovoitov timer->timer = NULL; 1350b00628b1SAlexei Starovoitov out: 1351b00628b1SAlexei Starovoitov __bpf_spin_unlock_irqrestore(&timer->lock); 1352b00628b1SAlexei Starovoitov if (!t) 1353b00628b1SAlexei Starovoitov return; 1354b00628b1SAlexei Starovoitov /* Cancel the timer and wait for callback to complete if it was running. 1355b00628b1SAlexei Starovoitov * If hrtimer_cancel() can be safely called it's safe to call kfree(t) 1356b00628b1SAlexei Starovoitov * right after for both preallocated and non-preallocated maps. 1357b00628b1SAlexei Starovoitov * The timer->timer = NULL was already done and no code path can 1358b00628b1SAlexei Starovoitov * see address 't' anymore. 1359b00628b1SAlexei Starovoitov * 1360b00628b1SAlexei Starovoitov * Check that bpf_map_delete/update_elem() wasn't called from timer 1361b00628b1SAlexei Starovoitov * callback_fn. In such case don't call hrtimer_cancel() (since it will 1362b00628b1SAlexei Starovoitov * deadlock) and don't call hrtimer_try_to_cancel() (since it will just 1363b00628b1SAlexei Starovoitov * return -1). Though callback_fn is still running on this cpu it's 1364b00628b1SAlexei Starovoitov * safe to do kfree(t) because bpf_timer_cb() read everything it needed 1365b00628b1SAlexei Starovoitov * from 't'. The bpf subprog callback_fn won't be able to access 't', 1366b00628b1SAlexei Starovoitov * since timer->timer = NULL was already done. The timer will be 1367b00628b1SAlexei Starovoitov * effectively cancelled because bpf_timer_cb() will return 1368b00628b1SAlexei Starovoitov * HRTIMER_NORESTART. 1369b00628b1SAlexei Starovoitov */ 1370b00628b1SAlexei Starovoitov if (this_cpu_read(hrtimer_running) != t) 1371b00628b1SAlexei Starovoitov hrtimer_cancel(&t->timer); 1372b00628b1SAlexei Starovoitov kfree(t); 1373b00628b1SAlexei Starovoitov } 1374b00628b1SAlexei Starovoitov 1375c0a5a21cSKumar Kartikeya Dwivedi BPF_CALL_2(bpf_kptr_xchg, void *, map_value, void *, ptr) 1376c0a5a21cSKumar Kartikeya Dwivedi { 1377c0a5a21cSKumar Kartikeya Dwivedi unsigned long *kptr = map_value; 1378c0a5a21cSKumar Kartikeya Dwivedi 1379c0a5a21cSKumar Kartikeya Dwivedi return xchg(kptr, (unsigned long)ptr); 1380c0a5a21cSKumar Kartikeya Dwivedi } 1381c0a5a21cSKumar Kartikeya Dwivedi 1382c0a5a21cSKumar Kartikeya Dwivedi /* Unlike other PTR_TO_BTF_ID helpers the btf_id in bpf_kptr_xchg() 138347e34cb7SDave Marchevsky * helper is determined dynamically by the verifier. Use BPF_PTR_POISON to 138447e34cb7SDave Marchevsky * denote type that verifier will determine. 1385c0a5a21cSKumar Kartikeya Dwivedi */ 1386dc368e1cSJoanne Koong static const struct bpf_func_proto bpf_kptr_xchg_proto = { 1387c0a5a21cSKumar Kartikeya Dwivedi .func = bpf_kptr_xchg, 1388c0a5a21cSKumar Kartikeya Dwivedi .gpl_only = false, 1389c0a5a21cSKumar Kartikeya Dwivedi .ret_type = RET_PTR_TO_BTF_ID_OR_NULL, 1390c0a5a21cSKumar Kartikeya Dwivedi .ret_btf_id = BPF_PTR_POISON, 1391c0a5a21cSKumar Kartikeya Dwivedi .arg1_type = ARG_PTR_TO_KPTR, 1392c0a5a21cSKumar Kartikeya Dwivedi .arg2_type = ARG_PTR_TO_BTF_ID_OR_NULL | OBJ_RELEASE, 1393c0a5a21cSKumar Kartikeya Dwivedi .arg2_btf_id = BPF_PTR_POISON, 1394c0a5a21cSKumar Kartikeya Dwivedi }; 1395c0a5a21cSKumar Kartikeya Dwivedi 1396263ae152SJoanne Koong /* Since the upper 8 bits of dynptr->size is reserved, the 1397263ae152SJoanne Koong * maximum supported size is 2^24 - 1. 1398263ae152SJoanne Koong */ 1399263ae152SJoanne Koong #define DYNPTR_MAX_SIZE ((1UL << 24) - 1) 1400263ae152SJoanne Koong #define DYNPTR_TYPE_SHIFT 28 140113bbbfbeSJoanne Koong #define DYNPTR_SIZE_MASK 0xFFFFFF 140213bbbfbeSJoanne Koong #define DYNPTR_RDONLY_BIT BIT(31) 140313bbbfbeSJoanne Koong 140413bbbfbeSJoanne Koong static bool bpf_dynptr_is_rdonly(struct bpf_dynptr_kern *ptr) 140513bbbfbeSJoanne Koong { 140613bbbfbeSJoanne Koong return ptr->size & DYNPTR_RDONLY_BIT; 140713bbbfbeSJoanne Koong } 1408263ae152SJoanne Koong 1409263ae152SJoanne Koong static void bpf_dynptr_set_type(struct bpf_dynptr_kern *ptr, enum bpf_dynptr_type type) 1410263ae152SJoanne Koong { 1411263ae152SJoanne Koong ptr->size |= type << DYNPTR_TYPE_SHIFT; 1412263ae152SJoanne Koong } 1413263ae152SJoanne Koong 141451df4865SRoberto Sassu u32 bpf_dynptr_get_size(struct bpf_dynptr_kern *ptr) 141513bbbfbeSJoanne Koong { 141613bbbfbeSJoanne Koong return ptr->size & DYNPTR_SIZE_MASK; 141713bbbfbeSJoanne Koong } 141813bbbfbeSJoanne Koong 1419bc34dee6SJoanne Koong int bpf_dynptr_check_size(u32 size) 1420263ae152SJoanne Koong { 1421263ae152SJoanne Koong return size > DYNPTR_MAX_SIZE ? -E2BIG : 0; 1422263ae152SJoanne Koong } 1423263ae152SJoanne Koong 1424bc34dee6SJoanne Koong void bpf_dynptr_init(struct bpf_dynptr_kern *ptr, void *data, 1425263ae152SJoanne Koong enum bpf_dynptr_type type, u32 offset, u32 size) 1426263ae152SJoanne Koong { 1427263ae152SJoanne Koong ptr->data = data; 1428263ae152SJoanne Koong ptr->offset = offset; 1429263ae152SJoanne Koong ptr->size = size; 1430263ae152SJoanne Koong bpf_dynptr_set_type(ptr, type); 1431263ae152SJoanne Koong } 1432263ae152SJoanne Koong 1433bc34dee6SJoanne Koong void bpf_dynptr_set_null(struct bpf_dynptr_kern *ptr) 1434263ae152SJoanne Koong { 1435263ae152SJoanne Koong memset(ptr, 0, sizeof(*ptr)); 1436263ae152SJoanne Koong } 1437263ae152SJoanne Koong 143813bbbfbeSJoanne Koong static int bpf_dynptr_check_off_len(struct bpf_dynptr_kern *ptr, u32 offset, u32 len) 143913bbbfbeSJoanne Koong { 144013bbbfbeSJoanne Koong u32 size = bpf_dynptr_get_size(ptr); 144113bbbfbeSJoanne Koong 144213bbbfbeSJoanne Koong if (len > size || offset > size - len) 144313bbbfbeSJoanne Koong return -E2BIG; 144413bbbfbeSJoanne Koong 144513bbbfbeSJoanne Koong return 0; 144613bbbfbeSJoanne Koong } 144713bbbfbeSJoanne Koong 1448263ae152SJoanne Koong BPF_CALL_4(bpf_dynptr_from_mem, void *, data, u32, size, u64, flags, struct bpf_dynptr_kern *, ptr) 1449263ae152SJoanne Koong { 1450263ae152SJoanne Koong int err; 1451263ae152SJoanne Koong 145200f14641SRoberto Sassu BTF_TYPE_EMIT(struct bpf_dynptr); 145300f14641SRoberto Sassu 1454263ae152SJoanne Koong err = bpf_dynptr_check_size(size); 1455263ae152SJoanne Koong if (err) 1456263ae152SJoanne Koong goto error; 1457263ae152SJoanne Koong 1458263ae152SJoanne Koong /* flags is currently unsupported */ 1459263ae152SJoanne Koong if (flags) { 1460263ae152SJoanne Koong err = -EINVAL; 1461263ae152SJoanne Koong goto error; 1462263ae152SJoanne Koong } 1463263ae152SJoanne Koong 1464263ae152SJoanne Koong bpf_dynptr_init(ptr, data, BPF_DYNPTR_TYPE_LOCAL, 0, size); 1465263ae152SJoanne Koong 1466263ae152SJoanne Koong return 0; 1467263ae152SJoanne Koong 1468263ae152SJoanne Koong error: 1469263ae152SJoanne Koong bpf_dynptr_set_null(ptr); 1470263ae152SJoanne Koong return err; 1471263ae152SJoanne Koong } 1472263ae152SJoanne Koong 1473dc368e1cSJoanne Koong static const struct bpf_func_proto bpf_dynptr_from_mem_proto = { 1474263ae152SJoanne Koong .func = bpf_dynptr_from_mem, 1475263ae152SJoanne Koong .gpl_only = false, 1476263ae152SJoanne Koong .ret_type = RET_INTEGER, 1477263ae152SJoanne Koong .arg1_type = ARG_PTR_TO_UNINIT_MEM, 1478263ae152SJoanne Koong .arg2_type = ARG_CONST_SIZE_OR_ZERO, 1479263ae152SJoanne Koong .arg3_type = ARG_ANYTHING, 1480263ae152SJoanne Koong .arg4_type = ARG_PTR_TO_DYNPTR | DYNPTR_TYPE_LOCAL | MEM_UNINIT, 1481263ae152SJoanne Koong }; 1482263ae152SJoanne Koong 1483f8d3da4eSJoanne Koong BPF_CALL_5(bpf_dynptr_read, void *, dst, u32, len, struct bpf_dynptr_kern *, src, 1484f8d3da4eSJoanne Koong u32, offset, u64, flags) 148513bbbfbeSJoanne Koong { 148613bbbfbeSJoanne Koong int err; 148713bbbfbeSJoanne Koong 1488f8d3da4eSJoanne Koong if (!src->data || flags) 148913bbbfbeSJoanne Koong return -EINVAL; 149013bbbfbeSJoanne Koong 149113bbbfbeSJoanne Koong err = bpf_dynptr_check_off_len(src, offset, len); 149213bbbfbeSJoanne Koong if (err) 149313bbbfbeSJoanne Koong return err; 149413bbbfbeSJoanne Koong 149513bbbfbeSJoanne Koong memcpy(dst, src->data + src->offset + offset, len); 149613bbbfbeSJoanne Koong 149713bbbfbeSJoanne Koong return 0; 149813bbbfbeSJoanne Koong } 149913bbbfbeSJoanne Koong 1500dc368e1cSJoanne Koong static const struct bpf_func_proto bpf_dynptr_read_proto = { 150113bbbfbeSJoanne Koong .func = bpf_dynptr_read, 150213bbbfbeSJoanne Koong .gpl_only = false, 150313bbbfbeSJoanne Koong .ret_type = RET_INTEGER, 150413bbbfbeSJoanne Koong .arg1_type = ARG_PTR_TO_UNINIT_MEM, 150513bbbfbeSJoanne Koong .arg2_type = ARG_CONST_SIZE_OR_ZERO, 150613bbbfbeSJoanne Koong .arg3_type = ARG_PTR_TO_DYNPTR, 150713bbbfbeSJoanne Koong .arg4_type = ARG_ANYTHING, 1508f8d3da4eSJoanne Koong .arg5_type = ARG_ANYTHING, 150913bbbfbeSJoanne Koong }; 151013bbbfbeSJoanne Koong 1511f8d3da4eSJoanne Koong BPF_CALL_5(bpf_dynptr_write, struct bpf_dynptr_kern *, dst, u32, offset, void *, src, 1512f8d3da4eSJoanne Koong u32, len, u64, flags) 151313bbbfbeSJoanne Koong { 151413bbbfbeSJoanne Koong int err; 151513bbbfbeSJoanne Koong 1516f8d3da4eSJoanne Koong if (!dst->data || flags || bpf_dynptr_is_rdonly(dst)) 151713bbbfbeSJoanne Koong return -EINVAL; 151813bbbfbeSJoanne Koong 151913bbbfbeSJoanne Koong err = bpf_dynptr_check_off_len(dst, offset, len); 152013bbbfbeSJoanne Koong if (err) 152113bbbfbeSJoanne Koong return err; 152213bbbfbeSJoanne Koong 152313bbbfbeSJoanne Koong memcpy(dst->data + dst->offset + offset, src, len); 152413bbbfbeSJoanne Koong 152513bbbfbeSJoanne Koong return 0; 152613bbbfbeSJoanne Koong } 152713bbbfbeSJoanne Koong 1528dc368e1cSJoanne Koong static const struct bpf_func_proto bpf_dynptr_write_proto = { 152913bbbfbeSJoanne Koong .func = bpf_dynptr_write, 153013bbbfbeSJoanne Koong .gpl_only = false, 153113bbbfbeSJoanne Koong .ret_type = RET_INTEGER, 153213bbbfbeSJoanne Koong .arg1_type = ARG_PTR_TO_DYNPTR, 153313bbbfbeSJoanne Koong .arg2_type = ARG_ANYTHING, 153413bbbfbeSJoanne Koong .arg3_type = ARG_PTR_TO_MEM | MEM_RDONLY, 153513bbbfbeSJoanne Koong .arg4_type = ARG_CONST_SIZE_OR_ZERO, 1536f8d3da4eSJoanne Koong .arg5_type = ARG_ANYTHING, 153713bbbfbeSJoanne Koong }; 153813bbbfbeSJoanne Koong 153934d4ef57SJoanne Koong BPF_CALL_3(bpf_dynptr_data, struct bpf_dynptr_kern *, ptr, u32, offset, u32, len) 154034d4ef57SJoanne Koong { 154134d4ef57SJoanne Koong int err; 154234d4ef57SJoanne Koong 154334d4ef57SJoanne Koong if (!ptr->data) 154434d4ef57SJoanne Koong return 0; 154534d4ef57SJoanne Koong 154634d4ef57SJoanne Koong err = bpf_dynptr_check_off_len(ptr, offset, len); 154734d4ef57SJoanne Koong if (err) 154834d4ef57SJoanne Koong return 0; 154934d4ef57SJoanne Koong 155034d4ef57SJoanne Koong if (bpf_dynptr_is_rdonly(ptr)) 155134d4ef57SJoanne Koong return 0; 155234d4ef57SJoanne Koong 155334d4ef57SJoanne Koong return (unsigned long)(ptr->data + ptr->offset + offset); 155434d4ef57SJoanne Koong } 155534d4ef57SJoanne Koong 1556dc368e1cSJoanne Koong static const struct bpf_func_proto bpf_dynptr_data_proto = { 155734d4ef57SJoanne Koong .func = bpf_dynptr_data, 155834d4ef57SJoanne Koong .gpl_only = false, 155934d4ef57SJoanne Koong .ret_type = RET_PTR_TO_DYNPTR_MEM_OR_NULL, 156034d4ef57SJoanne Koong .arg1_type = ARG_PTR_TO_DYNPTR, 156134d4ef57SJoanne Koong .arg2_type = ARG_ANYTHING, 156234d4ef57SJoanne Koong .arg3_type = ARG_CONST_ALLOC_SIZE_OR_ZERO, 156334d4ef57SJoanne Koong }; 156434d4ef57SJoanne Koong 1565f470378cSJohn Fastabend const struct bpf_func_proto bpf_get_current_task_proto __weak; 1566a396eda5SDaniel Xu const struct bpf_func_proto bpf_get_current_task_btf_proto __weak; 1567f470378cSJohn Fastabend const struct bpf_func_proto bpf_probe_read_user_proto __weak; 1568f470378cSJohn Fastabend const struct bpf_func_proto bpf_probe_read_user_str_proto __weak; 1569f470378cSJohn Fastabend const struct bpf_func_proto bpf_probe_read_kernel_proto __weak; 1570f470378cSJohn Fastabend const struct bpf_func_proto bpf_probe_read_kernel_str_proto __weak; 1571dd6e10fbSDaniel Xu const struct bpf_func_proto bpf_task_pt_regs_proto __weak; 1572f470378cSJohn Fastabend 15736890896bSStanislav Fomichev const struct bpf_func_proto * 15746890896bSStanislav Fomichev bpf_base_func_proto(enum bpf_func_id func_id) 15756890896bSStanislav Fomichev { 15766890896bSStanislav Fomichev switch (func_id) { 15776890896bSStanislav Fomichev case BPF_FUNC_map_lookup_elem: 15786890896bSStanislav Fomichev return &bpf_map_lookup_elem_proto; 15796890896bSStanislav Fomichev case BPF_FUNC_map_update_elem: 15806890896bSStanislav Fomichev return &bpf_map_update_elem_proto; 15816890896bSStanislav Fomichev case BPF_FUNC_map_delete_elem: 15826890896bSStanislav Fomichev return &bpf_map_delete_elem_proto; 15836890896bSStanislav Fomichev case BPF_FUNC_map_push_elem: 15846890896bSStanislav Fomichev return &bpf_map_push_elem_proto; 15856890896bSStanislav Fomichev case BPF_FUNC_map_pop_elem: 15866890896bSStanislav Fomichev return &bpf_map_pop_elem_proto; 15876890896bSStanislav Fomichev case BPF_FUNC_map_peek_elem: 15886890896bSStanislav Fomichev return &bpf_map_peek_elem_proto; 158907343110SFeng Zhou case BPF_FUNC_map_lookup_percpu_elem: 159007343110SFeng Zhou return &bpf_map_lookup_percpu_elem_proto; 15916890896bSStanislav Fomichev case BPF_FUNC_get_prandom_u32: 15926890896bSStanislav Fomichev return &bpf_get_prandom_u32_proto; 15936890896bSStanislav Fomichev case BPF_FUNC_get_smp_processor_id: 15946890896bSStanislav Fomichev return &bpf_get_raw_smp_processor_id_proto; 15956890896bSStanislav Fomichev case BPF_FUNC_get_numa_node_id: 15966890896bSStanislav Fomichev return &bpf_get_numa_node_id_proto; 15976890896bSStanislav Fomichev case BPF_FUNC_tail_call: 15986890896bSStanislav Fomichev return &bpf_tail_call_proto; 15996890896bSStanislav Fomichev case BPF_FUNC_ktime_get_ns: 16006890896bSStanislav Fomichev return &bpf_ktime_get_ns_proto; 160171d19214SMaciej Żenczykowski case BPF_FUNC_ktime_get_boot_ns: 160271d19214SMaciej Żenczykowski return &bpf_ktime_get_boot_ns_proto; 1603c8996c98SJesper Dangaard Brouer case BPF_FUNC_ktime_get_tai_ns: 1604c8996c98SJesper Dangaard Brouer return &bpf_ktime_get_tai_ns_proto; 1605457f4436SAndrii Nakryiko case BPF_FUNC_ringbuf_output: 1606457f4436SAndrii Nakryiko return &bpf_ringbuf_output_proto; 1607457f4436SAndrii Nakryiko case BPF_FUNC_ringbuf_reserve: 1608457f4436SAndrii Nakryiko return &bpf_ringbuf_reserve_proto; 1609457f4436SAndrii Nakryiko case BPF_FUNC_ringbuf_submit: 1610457f4436SAndrii Nakryiko return &bpf_ringbuf_submit_proto; 1611457f4436SAndrii Nakryiko case BPF_FUNC_ringbuf_discard: 1612457f4436SAndrii Nakryiko return &bpf_ringbuf_discard_proto; 1613457f4436SAndrii Nakryiko case BPF_FUNC_ringbuf_query: 1614457f4436SAndrii Nakryiko return &bpf_ringbuf_query_proto; 1615c5fb1993SHou Tao case BPF_FUNC_strncmp: 1616c5fb1993SHou Tao return &bpf_strncmp_proto; 16178a67f2deSStanislav Fomichev case BPF_FUNC_strtol: 16188a67f2deSStanislav Fomichev return &bpf_strtol_proto; 16198a67f2deSStanislav Fomichev case BPF_FUNC_strtoul: 16208a67f2deSStanislav Fomichev return &bpf_strtoul_proto; 16216890896bSStanislav Fomichev default: 16226890896bSStanislav Fomichev break; 16236890896bSStanislav Fomichev } 16246890896bSStanislav Fomichev 16252c78ee89SAlexei Starovoitov if (!bpf_capable()) 16266890896bSStanislav Fomichev return NULL; 16276890896bSStanislav Fomichev 16286890896bSStanislav Fomichev switch (func_id) { 16296890896bSStanislav Fomichev case BPF_FUNC_spin_lock: 16306890896bSStanislav Fomichev return &bpf_spin_lock_proto; 16316890896bSStanislav Fomichev case BPF_FUNC_spin_unlock: 16326890896bSStanislav Fomichev return &bpf_spin_unlock_proto; 16336890896bSStanislav Fomichev case BPF_FUNC_jiffies64: 16346890896bSStanislav Fomichev return &bpf_jiffies64_proto; 1635b7906b70SAndrii Nakryiko case BPF_FUNC_per_cpu_ptr: 1636eaa6bcb7SHao Luo return &bpf_per_cpu_ptr_proto; 1637b7906b70SAndrii Nakryiko case BPF_FUNC_this_cpu_ptr: 163863d9b80dSHao Luo return &bpf_this_cpu_ptr_proto; 1639b00628b1SAlexei Starovoitov case BPF_FUNC_timer_init: 1640b00628b1SAlexei Starovoitov return &bpf_timer_init_proto; 1641b00628b1SAlexei Starovoitov case BPF_FUNC_timer_set_callback: 1642b00628b1SAlexei Starovoitov return &bpf_timer_set_callback_proto; 1643b00628b1SAlexei Starovoitov case BPF_FUNC_timer_start: 1644b00628b1SAlexei Starovoitov return &bpf_timer_start_proto; 1645b00628b1SAlexei Starovoitov case BPF_FUNC_timer_cancel: 1646b00628b1SAlexei Starovoitov return &bpf_timer_cancel_proto; 1647c0a5a21cSKumar Kartikeya Dwivedi case BPF_FUNC_kptr_xchg: 1648c0a5a21cSKumar Kartikeya Dwivedi return &bpf_kptr_xchg_proto; 16495679ff2fSKumar Kartikeya Dwivedi case BPF_FUNC_for_each_map_elem: 16505679ff2fSKumar Kartikeya Dwivedi return &bpf_for_each_map_elem_proto; 16515679ff2fSKumar Kartikeya Dwivedi case BPF_FUNC_loop: 16525679ff2fSKumar Kartikeya Dwivedi return &bpf_loop_proto; 165320571567SDavid Vernet case BPF_FUNC_user_ringbuf_drain: 165420571567SDavid Vernet return &bpf_user_ringbuf_drain_proto; 16558addbfc7SKumar Kartikeya Dwivedi case BPF_FUNC_ringbuf_reserve_dynptr: 16568addbfc7SKumar Kartikeya Dwivedi return &bpf_ringbuf_reserve_dynptr_proto; 16578addbfc7SKumar Kartikeya Dwivedi case BPF_FUNC_ringbuf_submit_dynptr: 16588addbfc7SKumar Kartikeya Dwivedi return &bpf_ringbuf_submit_dynptr_proto; 16598addbfc7SKumar Kartikeya Dwivedi case BPF_FUNC_ringbuf_discard_dynptr: 16608addbfc7SKumar Kartikeya Dwivedi return &bpf_ringbuf_discard_dynptr_proto; 16618addbfc7SKumar Kartikeya Dwivedi case BPF_FUNC_dynptr_from_mem: 16628addbfc7SKumar Kartikeya Dwivedi return &bpf_dynptr_from_mem_proto; 16638addbfc7SKumar Kartikeya Dwivedi case BPF_FUNC_dynptr_read: 16648addbfc7SKumar Kartikeya Dwivedi return &bpf_dynptr_read_proto; 16658addbfc7SKumar Kartikeya Dwivedi case BPF_FUNC_dynptr_write: 16668addbfc7SKumar Kartikeya Dwivedi return &bpf_dynptr_write_proto; 16678addbfc7SKumar Kartikeya Dwivedi case BPF_FUNC_dynptr_data: 16688addbfc7SKumar Kartikeya Dwivedi return &bpf_dynptr_data_proto; 1669c4bcfb38SYonghong Song #ifdef CONFIG_CGROUPS 1670c4bcfb38SYonghong Song case BPF_FUNC_cgrp_storage_get: 1671c4bcfb38SYonghong Song return &bpf_cgrp_storage_get_proto; 1672c4bcfb38SYonghong Song case BPF_FUNC_cgrp_storage_delete: 1673c4bcfb38SYonghong Song return &bpf_cgrp_storage_delete_proto; 1674c4bcfb38SYonghong Song #endif 16756890896bSStanislav Fomichev default: 1676f470378cSJohn Fastabend break; 1677f470378cSJohn Fastabend } 1678f470378cSJohn Fastabend 1679f470378cSJohn Fastabend if (!perfmon_capable()) 1680f470378cSJohn Fastabend return NULL; 1681f470378cSJohn Fastabend 1682f470378cSJohn Fastabend switch (func_id) { 168361ca36c8STobias Klauser case BPF_FUNC_trace_printk: 168461ca36c8STobias Klauser return bpf_get_trace_printk_proto(); 1685f470378cSJohn Fastabend case BPF_FUNC_get_current_task: 1686f470378cSJohn Fastabend return &bpf_get_current_task_proto; 1687a396eda5SDaniel Xu case BPF_FUNC_get_current_task_btf: 1688a396eda5SDaniel Xu return &bpf_get_current_task_btf_proto; 1689f470378cSJohn Fastabend case BPF_FUNC_probe_read_user: 1690f470378cSJohn Fastabend return &bpf_probe_read_user_proto; 1691f470378cSJohn Fastabend case BPF_FUNC_probe_read_kernel: 169271330842SDaniel Borkmann return security_locked_down(LOCKDOWN_BPF_READ_KERNEL) < 0 ? 1693ff40e510SDaniel Borkmann NULL : &bpf_probe_read_kernel_proto; 1694f470378cSJohn Fastabend case BPF_FUNC_probe_read_user_str: 1695f470378cSJohn Fastabend return &bpf_probe_read_user_str_proto; 1696f470378cSJohn Fastabend case BPF_FUNC_probe_read_kernel_str: 169771330842SDaniel Borkmann return security_locked_down(LOCKDOWN_BPF_READ_KERNEL) < 0 ? 1698ff40e510SDaniel Borkmann NULL : &bpf_probe_read_kernel_str_proto; 169961ca36c8STobias Klauser case BPF_FUNC_snprintf_btf: 170061ca36c8STobias Klauser return &bpf_snprintf_btf_proto; 17017b15523aSFlorent Revest case BPF_FUNC_snprintf: 17027b15523aSFlorent Revest return &bpf_snprintf_proto; 1703dd6e10fbSDaniel Xu case BPF_FUNC_task_pt_regs: 1704dd6e10fbSDaniel Xu return &bpf_task_pt_regs_proto; 170510aceb62SDave Marchevsky case BPF_FUNC_trace_vprintk: 170610aceb62SDave Marchevsky return bpf_get_trace_vprintk_proto(); 1707f470378cSJohn Fastabend default: 17086890896bSStanislav Fomichev return NULL; 17096890896bSStanislav Fomichev } 17106890896bSStanislav Fomichev } 171113379059SArtem Savkov 1712f0c5941fSKumar Kartikeya Dwivedi void bpf_list_head_free(const struct btf_field *field, void *list_head, 1713f0c5941fSKumar Kartikeya Dwivedi struct bpf_spin_lock *spin_lock) 1714f0c5941fSKumar Kartikeya Dwivedi { 1715f0c5941fSKumar Kartikeya Dwivedi struct list_head *head = list_head, *orig_head = list_head; 1716f0c5941fSKumar Kartikeya Dwivedi 1717f0c5941fSKumar Kartikeya Dwivedi BUILD_BUG_ON(sizeof(struct list_head) > sizeof(struct bpf_list_head)); 1718f0c5941fSKumar Kartikeya Dwivedi BUILD_BUG_ON(__alignof__(struct list_head) > __alignof__(struct bpf_list_head)); 1719f0c5941fSKumar Kartikeya Dwivedi 1720f0c5941fSKumar Kartikeya Dwivedi /* Do the actual list draining outside the lock to not hold the lock for 1721f0c5941fSKumar Kartikeya Dwivedi * too long, and also prevent deadlocks if tracing programs end up 1722f0c5941fSKumar Kartikeya Dwivedi * executing on entry/exit of functions called inside the critical 1723f0c5941fSKumar Kartikeya Dwivedi * section, and end up doing map ops that call bpf_list_head_free for 1724f0c5941fSKumar Kartikeya Dwivedi * the same map value again. 1725f0c5941fSKumar Kartikeya Dwivedi */ 1726f0c5941fSKumar Kartikeya Dwivedi __bpf_spin_lock_irqsave(spin_lock); 1727f0c5941fSKumar Kartikeya Dwivedi if (!head->next || list_empty(head)) 1728f0c5941fSKumar Kartikeya Dwivedi goto unlock; 1729f0c5941fSKumar Kartikeya Dwivedi head = head->next; 1730f0c5941fSKumar Kartikeya Dwivedi unlock: 1731f0c5941fSKumar Kartikeya Dwivedi INIT_LIST_HEAD(orig_head); 1732f0c5941fSKumar Kartikeya Dwivedi __bpf_spin_unlock_irqrestore(spin_lock); 1733f0c5941fSKumar Kartikeya Dwivedi 1734f0c5941fSKumar Kartikeya Dwivedi while (head != orig_head) { 1735f0c5941fSKumar Kartikeya Dwivedi void *obj = head; 1736f0c5941fSKumar Kartikeya Dwivedi 1737f0c5941fSKumar Kartikeya Dwivedi obj -= field->list_head.node_offset; 1738f0c5941fSKumar Kartikeya Dwivedi head = head->next; 1739958cf2e2SKumar Kartikeya Dwivedi /* The contained type can also have resources, including a 1740958cf2e2SKumar Kartikeya Dwivedi * bpf_list_head which needs to be freed. 1741958cf2e2SKumar Kartikeya Dwivedi */ 1742958cf2e2SKumar Kartikeya Dwivedi bpf_obj_free_fields(field->list_head.value_rec, obj); 1743958cf2e2SKumar Kartikeya Dwivedi /* bpf_mem_free requires migrate_disable(), since we can be 1744958cf2e2SKumar Kartikeya Dwivedi * called from map free path as well apart from BPF program (as 1745958cf2e2SKumar Kartikeya Dwivedi * part of map ops doing bpf_obj_free_fields). 1746958cf2e2SKumar Kartikeya Dwivedi */ 1747958cf2e2SKumar Kartikeya Dwivedi migrate_disable(); 1748958cf2e2SKumar Kartikeya Dwivedi bpf_mem_free(&bpf_global_ma, obj); 1749958cf2e2SKumar Kartikeya Dwivedi migrate_enable(); 1750f0c5941fSKumar Kartikeya Dwivedi } 1751f0c5941fSKumar Kartikeya Dwivedi } 1752f0c5941fSKumar Kartikeya Dwivedi 1753958cf2e2SKumar Kartikeya Dwivedi __diag_push(); 1754958cf2e2SKumar Kartikeya Dwivedi __diag_ignore_all("-Wmissing-prototypes", 1755958cf2e2SKumar Kartikeya Dwivedi "Global functions as their definitions will be in vmlinux BTF"); 1756958cf2e2SKumar Kartikeya Dwivedi 1757958cf2e2SKumar Kartikeya Dwivedi void *bpf_obj_new_impl(u64 local_type_id__k, void *meta__ign) 1758958cf2e2SKumar Kartikeya Dwivedi { 1759958cf2e2SKumar Kartikeya Dwivedi struct btf_struct_meta *meta = meta__ign; 1760958cf2e2SKumar Kartikeya Dwivedi u64 size = local_type_id__k; 1761958cf2e2SKumar Kartikeya Dwivedi void *p; 1762958cf2e2SKumar Kartikeya Dwivedi 1763958cf2e2SKumar Kartikeya Dwivedi p = bpf_mem_alloc(&bpf_global_ma, size); 1764958cf2e2SKumar Kartikeya Dwivedi if (!p) 1765958cf2e2SKumar Kartikeya Dwivedi return NULL; 1766958cf2e2SKumar Kartikeya Dwivedi if (meta) 1767958cf2e2SKumar Kartikeya Dwivedi bpf_obj_init(meta->field_offs, p); 1768958cf2e2SKumar Kartikeya Dwivedi return p; 1769958cf2e2SKumar Kartikeya Dwivedi } 1770958cf2e2SKumar Kartikeya Dwivedi 1771ac9f0605SKumar Kartikeya Dwivedi void bpf_obj_drop_impl(void *p__alloc, void *meta__ign) 1772ac9f0605SKumar Kartikeya Dwivedi { 1773ac9f0605SKumar Kartikeya Dwivedi struct btf_struct_meta *meta = meta__ign; 1774ac9f0605SKumar Kartikeya Dwivedi void *p = p__alloc; 1775ac9f0605SKumar Kartikeya Dwivedi 1776ac9f0605SKumar Kartikeya Dwivedi if (meta) 1777ac9f0605SKumar Kartikeya Dwivedi bpf_obj_free_fields(meta->record, p); 1778ac9f0605SKumar Kartikeya Dwivedi bpf_mem_free(&bpf_global_ma, p); 1779ac9f0605SKumar Kartikeya Dwivedi } 1780ac9f0605SKumar Kartikeya Dwivedi 17818cab76ecSKumar Kartikeya Dwivedi static void __bpf_list_add(struct bpf_list_node *node, struct bpf_list_head *head, bool tail) 17828cab76ecSKumar Kartikeya Dwivedi { 17838cab76ecSKumar Kartikeya Dwivedi struct list_head *n = (void *)node, *h = (void *)head; 17848cab76ecSKumar Kartikeya Dwivedi 17858cab76ecSKumar Kartikeya Dwivedi if (unlikely(!h->next)) 17868cab76ecSKumar Kartikeya Dwivedi INIT_LIST_HEAD(h); 17878cab76ecSKumar Kartikeya Dwivedi if (unlikely(!n->next)) 17888cab76ecSKumar Kartikeya Dwivedi INIT_LIST_HEAD(n); 17898cab76ecSKumar Kartikeya Dwivedi tail ? list_add_tail(n, h) : list_add(n, h); 17908cab76ecSKumar Kartikeya Dwivedi } 17918cab76ecSKumar Kartikeya Dwivedi 17928cab76ecSKumar Kartikeya Dwivedi void bpf_list_push_front(struct bpf_list_head *head, struct bpf_list_node *node) 17938cab76ecSKumar Kartikeya Dwivedi { 17948cab76ecSKumar Kartikeya Dwivedi return __bpf_list_add(node, head, false); 17958cab76ecSKumar Kartikeya Dwivedi } 17968cab76ecSKumar Kartikeya Dwivedi 17978cab76ecSKumar Kartikeya Dwivedi void bpf_list_push_back(struct bpf_list_head *head, struct bpf_list_node *node) 17988cab76ecSKumar Kartikeya Dwivedi { 17998cab76ecSKumar Kartikeya Dwivedi return __bpf_list_add(node, head, true); 18008cab76ecSKumar Kartikeya Dwivedi } 18018cab76ecSKumar Kartikeya Dwivedi 18028cab76ecSKumar Kartikeya Dwivedi static struct bpf_list_node *__bpf_list_del(struct bpf_list_head *head, bool tail) 18038cab76ecSKumar Kartikeya Dwivedi { 18048cab76ecSKumar Kartikeya Dwivedi struct list_head *n, *h = (void *)head; 18058cab76ecSKumar Kartikeya Dwivedi 18068cab76ecSKumar Kartikeya Dwivedi if (unlikely(!h->next)) 18078cab76ecSKumar Kartikeya Dwivedi INIT_LIST_HEAD(h); 18088cab76ecSKumar Kartikeya Dwivedi if (list_empty(h)) 18098cab76ecSKumar Kartikeya Dwivedi return NULL; 18108cab76ecSKumar Kartikeya Dwivedi n = tail ? h->prev : h->next; 18118cab76ecSKumar Kartikeya Dwivedi list_del_init(n); 18128cab76ecSKumar Kartikeya Dwivedi return (struct bpf_list_node *)n; 18138cab76ecSKumar Kartikeya Dwivedi } 18148cab76ecSKumar Kartikeya Dwivedi 18158cab76ecSKumar Kartikeya Dwivedi struct bpf_list_node *bpf_list_pop_front(struct bpf_list_head *head) 18168cab76ecSKumar Kartikeya Dwivedi { 18178cab76ecSKumar Kartikeya Dwivedi return __bpf_list_del(head, false); 18188cab76ecSKumar Kartikeya Dwivedi } 18198cab76ecSKumar Kartikeya Dwivedi 18208cab76ecSKumar Kartikeya Dwivedi struct bpf_list_node *bpf_list_pop_back(struct bpf_list_head *head) 18218cab76ecSKumar Kartikeya Dwivedi { 18228cab76ecSKumar Kartikeya Dwivedi return __bpf_list_del(head, true); 18238cab76ecSKumar Kartikeya Dwivedi } 18248cab76ecSKumar Kartikeya Dwivedi 182590660309SDavid Vernet /** 182690660309SDavid Vernet * bpf_task_acquire - Acquire a reference to a task. A task acquired by this 182790660309SDavid Vernet * kfunc which is not stored in a map as a kptr, must be released by calling 182890660309SDavid Vernet * bpf_task_release(). 182990660309SDavid Vernet * @p: The task on which a reference is being acquired. 183090660309SDavid Vernet */ 183190660309SDavid Vernet struct task_struct *bpf_task_acquire(struct task_struct *p) 183290660309SDavid Vernet { 183390660309SDavid Vernet refcount_inc(&p->rcu_users); 183490660309SDavid Vernet return p; 183590660309SDavid Vernet } 183690660309SDavid Vernet 183790660309SDavid Vernet /** 183890660309SDavid Vernet * bpf_task_kptr_get - Acquire a reference on a struct task_struct kptr. A task 183990660309SDavid Vernet * kptr acquired by this kfunc which is not subsequently stored in a map, must 184090660309SDavid Vernet * be released by calling bpf_task_release(). 184190660309SDavid Vernet * @pp: A pointer to a task kptr on which a reference is being acquired. 184290660309SDavid Vernet */ 184390660309SDavid Vernet struct task_struct *bpf_task_kptr_get(struct task_struct **pp) 184490660309SDavid Vernet { 184590660309SDavid Vernet struct task_struct *p; 184690660309SDavid Vernet 184790660309SDavid Vernet rcu_read_lock(); 184890660309SDavid Vernet p = READ_ONCE(*pp); 184990660309SDavid Vernet 185090660309SDavid Vernet /* Another context could remove the task from the map and release it at 185190660309SDavid Vernet * any time, including after we've done the lookup above. This is safe 185290660309SDavid Vernet * because we're in an RCU read region, so the task is guaranteed to 185390660309SDavid Vernet * remain valid until at least the rcu_read_unlock() below. 185490660309SDavid Vernet */ 185590660309SDavid Vernet if (p && !refcount_inc_not_zero(&p->rcu_users)) 185690660309SDavid Vernet /* If the task had been removed from the map and freed as 185790660309SDavid Vernet * described above, refcount_inc_not_zero() will return false. 185890660309SDavid Vernet * The task will be freed at some point after the current RCU 185990660309SDavid Vernet * gp has ended, so just return NULL to the user. 186090660309SDavid Vernet */ 186190660309SDavid Vernet p = NULL; 186290660309SDavid Vernet rcu_read_unlock(); 186390660309SDavid Vernet 186490660309SDavid Vernet return p; 186590660309SDavid Vernet } 186690660309SDavid Vernet 186790660309SDavid Vernet /** 186890660309SDavid Vernet * bpf_task_release - Release the reference acquired on a struct task_struct *. 186990660309SDavid Vernet * If this kfunc is invoked in an RCU read region, the task_struct is 187090660309SDavid Vernet * guaranteed to not be freed until the current grace period has ended, even if 187190660309SDavid Vernet * its refcount drops to 0. 187290660309SDavid Vernet * @p: The task on which a reference is being released. 187390660309SDavid Vernet */ 187490660309SDavid Vernet void bpf_task_release(struct task_struct *p) 187590660309SDavid Vernet { 187690660309SDavid Vernet if (!p) 187790660309SDavid Vernet return; 187890660309SDavid Vernet 187990660309SDavid Vernet put_task_struct_rcu_user(p); 188090660309SDavid Vernet } 188190660309SDavid Vernet 1882fd264ca0SYonghong Song void *bpf_cast_to_kern_ctx(void *obj) 1883fd264ca0SYonghong Song { 1884fd264ca0SYonghong Song return obj; 1885fd264ca0SYonghong Song } 1886fd264ca0SYonghong Song 1887*a35b9af4SYonghong Song void *bpf_rdonly_cast(void *obj__ign, u32 btf_id__k) 1888*a35b9af4SYonghong Song { 1889*a35b9af4SYonghong Song return obj__ign; 1890*a35b9af4SYonghong Song } 1891*a35b9af4SYonghong Song 1892958cf2e2SKumar Kartikeya Dwivedi __diag_pop(); 1893958cf2e2SKumar Kartikeya Dwivedi 1894958cf2e2SKumar Kartikeya Dwivedi BTF_SET8_START(generic_btf_ids) 189513379059SArtem Savkov #ifdef CONFIG_KEXEC_CORE 189613379059SArtem Savkov BTF_ID_FLAGS(func, crash_kexec, KF_DESTRUCTIVE) 189713379059SArtem Savkov #endif 1898958cf2e2SKumar Kartikeya Dwivedi BTF_ID_FLAGS(func, bpf_obj_new_impl, KF_ACQUIRE | KF_RET_NULL) 1899ac9f0605SKumar Kartikeya Dwivedi BTF_ID_FLAGS(func, bpf_obj_drop_impl, KF_RELEASE) 19008cab76ecSKumar Kartikeya Dwivedi BTF_ID_FLAGS(func, bpf_list_push_front) 19018cab76ecSKumar Kartikeya Dwivedi BTF_ID_FLAGS(func, bpf_list_push_back) 19028cab76ecSKumar Kartikeya Dwivedi BTF_ID_FLAGS(func, bpf_list_pop_front, KF_ACQUIRE | KF_RET_NULL) 19038cab76ecSKumar Kartikeya Dwivedi BTF_ID_FLAGS(func, bpf_list_pop_back, KF_ACQUIRE | KF_RET_NULL) 190490660309SDavid Vernet BTF_ID_FLAGS(func, bpf_task_acquire, KF_ACQUIRE | KF_TRUSTED_ARGS) 190590660309SDavid Vernet BTF_ID_FLAGS(func, bpf_task_kptr_get, KF_ACQUIRE | KF_KPTR_GET | KF_RET_NULL) 190690660309SDavid Vernet BTF_ID_FLAGS(func, bpf_task_release, KF_RELEASE) 1907958cf2e2SKumar Kartikeya Dwivedi BTF_SET8_END(generic_btf_ids) 190813379059SArtem Savkov 1909958cf2e2SKumar Kartikeya Dwivedi static const struct btf_kfunc_id_set generic_kfunc_set = { 191013379059SArtem Savkov .owner = THIS_MODULE, 1911958cf2e2SKumar Kartikeya Dwivedi .set = &generic_btf_ids, 191213379059SArtem Savkov }; 191313379059SArtem Savkov 1914cfe14564SYonghong Song 191590660309SDavid Vernet BTF_ID_LIST(generic_dtor_ids) 191690660309SDavid Vernet BTF_ID(struct, task_struct) 191790660309SDavid Vernet BTF_ID(func, bpf_task_release) 191890660309SDavid Vernet 1919cfe14564SYonghong Song BTF_SET8_START(common_btf_ids) 1920fd264ca0SYonghong Song BTF_ID_FLAGS(func, bpf_cast_to_kern_ctx) 1921*a35b9af4SYonghong Song BTF_ID_FLAGS(func, bpf_rdonly_cast) 1922cfe14564SYonghong Song BTF_SET8_END(common_btf_ids) 1923cfe14564SYonghong Song 1924cfe14564SYonghong Song static const struct btf_kfunc_id_set common_kfunc_set = { 1925cfe14564SYonghong Song .owner = THIS_MODULE, 1926cfe14564SYonghong Song .set = &common_btf_ids, 1927cfe14564SYonghong Song }; 1928cfe14564SYonghong Song 192913379059SArtem Savkov static int __init kfunc_init(void) 193013379059SArtem Savkov { 19318cab76ecSKumar Kartikeya Dwivedi int ret; 193290660309SDavid Vernet const struct btf_id_dtor_kfunc generic_dtors[] = { 193390660309SDavid Vernet { 193490660309SDavid Vernet .btf_id = generic_dtor_ids[0], 193590660309SDavid Vernet .kfunc_btf_id = generic_dtor_ids[1] 193690660309SDavid Vernet }, 193790660309SDavid Vernet }; 19388cab76ecSKumar Kartikeya Dwivedi 19398cab76ecSKumar Kartikeya Dwivedi ret = register_btf_kfunc_id_set(BPF_PROG_TYPE_TRACING, &generic_kfunc_set); 194090660309SDavid Vernet ret = ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_SCHED_CLS, &generic_kfunc_set); 194190660309SDavid Vernet ret = ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_STRUCT_OPS, &generic_kfunc_set); 1942cfe14564SYonghong Song ret = ret ?: register_btf_id_dtor_kfuncs(generic_dtors, 194390660309SDavid Vernet ARRAY_SIZE(generic_dtors), 194490660309SDavid Vernet THIS_MODULE); 1945cfe14564SYonghong Song return ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_UNSPEC, &common_kfunc_set); 194613379059SArtem Savkov } 194713379059SArtem Savkov 194813379059SArtem Savkov late_initcall(kfunc_init); 1949