15b497af4SThomas Gleixner // SPDX-License-Identifier: GPL-2.0-only 2d0003ec0SAlexei Starovoitov /* Copyright (c) 2011-2014 PLUMgrid, http://plumgrid.com 3d0003ec0SAlexei Starovoitov */ 4d0003ec0SAlexei Starovoitov #include <linux/bpf.h> 53bd916eeSYonghong Song #include <linux/btf.h> 6aef2fedaSJakub Kicinski #include <linux/bpf-cgroup.h> 7d0003ec0SAlexei Starovoitov #include <linux/rcupdate.h> 803e69b50SDaniel Borkmann #include <linux/random.h> 9c04167ceSDaniel Borkmann #include <linux/smp.h> 102d0e30c3SDaniel Borkmann #include <linux/topology.h> 1117ca8cbfSDaniel Borkmann #include <linux/ktime.h> 12ffeedafbSAlexei Starovoitov #include <linux/sched.h> 13ffeedafbSAlexei Starovoitov #include <linux/uidgid.h> 14f3694e00SDaniel Borkmann #include <linux/filter.h> 15d7a4cb9bSAndrey Ignatov #include <linux/ctype.h> 165576b991SMartin KaFai Lau #include <linux/jiffies.h> 17b4490c5cSCarlos Neira #include <linux/pid_namespace.h> 18b4490c5cSCarlos Neira #include <linux/proc_ns.h> 19ff40e510SDaniel Borkmann #include <linux/security.h> 20376040e4SKenny Yu #include <linux/btf_ids.h> 21d7a4cb9bSAndrey Ignatov 22d7a4cb9bSAndrey Ignatov #include "../../lib/kstrtox.h" 23d0003ec0SAlexei Starovoitov 24d0003ec0SAlexei Starovoitov /* If kernel subsystem is allowing eBPF programs to call this function, 25d0003ec0SAlexei Starovoitov * inside its own verifier_ops->get_func_proto() callback it should return 26d0003ec0SAlexei Starovoitov * bpf_map_lookup_elem_proto, so that verifier can properly check the arguments 27d0003ec0SAlexei Starovoitov * 28d0003ec0SAlexei Starovoitov * Different map implementations will rely on rcu in map methods 29d0003ec0SAlexei Starovoitov * lookup/update/delete, therefore eBPF programs must run under rcu lock 30d0003ec0SAlexei Starovoitov * if program is allowed to access maps, so check rcu_read_lock_held in 31d0003ec0SAlexei Starovoitov * all three functions. 32d0003ec0SAlexei Starovoitov */ 33f3694e00SDaniel Borkmann BPF_CALL_2(bpf_map_lookup_elem, struct bpf_map *, map, void *, key) 34d0003ec0SAlexei Starovoitov { 35694cea39SToke Høiland-Jørgensen WARN_ON_ONCE(!rcu_read_lock_held() && !rcu_read_lock_bh_held()); 36f3694e00SDaniel Borkmann return (unsigned long) map->ops->map_lookup_elem(map, key); 37d0003ec0SAlexei Starovoitov } 38d0003ec0SAlexei Starovoitov 39a2c83fffSDaniel Borkmann const struct bpf_func_proto bpf_map_lookup_elem_proto = { 40d0003ec0SAlexei Starovoitov .func = bpf_map_lookup_elem, 41d0003ec0SAlexei Starovoitov .gpl_only = false, 4236bbef52SDaniel Borkmann .pkt_access = true, 43d0003ec0SAlexei Starovoitov .ret_type = RET_PTR_TO_MAP_VALUE_OR_NULL, 44d0003ec0SAlexei Starovoitov .arg1_type = ARG_CONST_MAP_PTR, 45d0003ec0SAlexei Starovoitov .arg2_type = ARG_PTR_TO_MAP_KEY, 46d0003ec0SAlexei Starovoitov }; 47d0003ec0SAlexei Starovoitov 48f3694e00SDaniel Borkmann BPF_CALL_4(bpf_map_update_elem, struct bpf_map *, map, void *, key, 49f3694e00SDaniel Borkmann void *, value, u64, flags) 50d0003ec0SAlexei Starovoitov { 51694cea39SToke Høiland-Jørgensen WARN_ON_ONCE(!rcu_read_lock_held() && !rcu_read_lock_bh_held()); 52f3694e00SDaniel Borkmann return map->ops->map_update_elem(map, key, value, flags); 53d0003ec0SAlexei Starovoitov } 54d0003ec0SAlexei Starovoitov 55a2c83fffSDaniel Borkmann const struct bpf_func_proto bpf_map_update_elem_proto = { 56d0003ec0SAlexei Starovoitov .func = bpf_map_update_elem, 57d0003ec0SAlexei Starovoitov .gpl_only = false, 5836bbef52SDaniel Borkmann .pkt_access = true, 59d0003ec0SAlexei Starovoitov .ret_type = RET_INTEGER, 60d0003ec0SAlexei Starovoitov .arg1_type = ARG_CONST_MAP_PTR, 61d0003ec0SAlexei Starovoitov .arg2_type = ARG_PTR_TO_MAP_KEY, 62d0003ec0SAlexei Starovoitov .arg3_type = ARG_PTR_TO_MAP_VALUE, 63d0003ec0SAlexei Starovoitov .arg4_type = ARG_ANYTHING, 64d0003ec0SAlexei Starovoitov }; 65d0003ec0SAlexei Starovoitov 66f3694e00SDaniel Borkmann BPF_CALL_2(bpf_map_delete_elem, struct bpf_map *, map, void *, key) 67d0003ec0SAlexei Starovoitov { 68694cea39SToke Høiland-Jørgensen WARN_ON_ONCE(!rcu_read_lock_held() && !rcu_read_lock_bh_held()); 69d0003ec0SAlexei Starovoitov return map->ops->map_delete_elem(map, key); 70d0003ec0SAlexei Starovoitov } 71d0003ec0SAlexei Starovoitov 72a2c83fffSDaniel Borkmann const struct bpf_func_proto bpf_map_delete_elem_proto = { 73d0003ec0SAlexei Starovoitov .func = bpf_map_delete_elem, 74d0003ec0SAlexei Starovoitov .gpl_only = false, 7536bbef52SDaniel Borkmann .pkt_access = true, 76d0003ec0SAlexei Starovoitov .ret_type = RET_INTEGER, 77d0003ec0SAlexei Starovoitov .arg1_type = ARG_CONST_MAP_PTR, 78d0003ec0SAlexei Starovoitov .arg2_type = ARG_PTR_TO_MAP_KEY, 79d0003ec0SAlexei Starovoitov }; 8003e69b50SDaniel Borkmann 81f1a2e44aSMauricio Vasquez B BPF_CALL_3(bpf_map_push_elem, struct bpf_map *, map, void *, value, u64, flags) 82f1a2e44aSMauricio Vasquez B { 83f1a2e44aSMauricio Vasquez B return map->ops->map_push_elem(map, value, flags); 84f1a2e44aSMauricio Vasquez B } 85f1a2e44aSMauricio Vasquez B 86f1a2e44aSMauricio Vasquez B const struct bpf_func_proto bpf_map_push_elem_proto = { 87f1a2e44aSMauricio Vasquez B .func = bpf_map_push_elem, 88f1a2e44aSMauricio Vasquez B .gpl_only = false, 89f1a2e44aSMauricio Vasquez B .pkt_access = true, 90f1a2e44aSMauricio Vasquez B .ret_type = RET_INTEGER, 91f1a2e44aSMauricio Vasquez B .arg1_type = ARG_CONST_MAP_PTR, 92f1a2e44aSMauricio Vasquez B .arg2_type = ARG_PTR_TO_MAP_VALUE, 93f1a2e44aSMauricio Vasquez B .arg3_type = ARG_ANYTHING, 94f1a2e44aSMauricio Vasquez B }; 95f1a2e44aSMauricio Vasquez B 96f1a2e44aSMauricio Vasquez B BPF_CALL_2(bpf_map_pop_elem, struct bpf_map *, map, void *, value) 97f1a2e44aSMauricio Vasquez B { 98f1a2e44aSMauricio Vasquez B return map->ops->map_pop_elem(map, value); 99f1a2e44aSMauricio Vasquez B } 100f1a2e44aSMauricio Vasquez B 101f1a2e44aSMauricio Vasquez B const struct bpf_func_proto bpf_map_pop_elem_proto = { 102f1a2e44aSMauricio Vasquez B .func = bpf_map_pop_elem, 103f1a2e44aSMauricio Vasquez B .gpl_only = false, 104f1a2e44aSMauricio Vasquez B .ret_type = RET_INTEGER, 105f1a2e44aSMauricio Vasquez B .arg1_type = ARG_CONST_MAP_PTR, 10616d1e00cSJoanne Koong .arg2_type = ARG_PTR_TO_MAP_VALUE | MEM_UNINIT, 107f1a2e44aSMauricio Vasquez B }; 108f1a2e44aSMauricio Vasquez B 109f1a2e44aSMauricio Vasquez B BPF_CALL_2(bpf_map_peek_elem, struct bpf_map *, map, void *, value) 110f1a2e44aSMauricio Vasquez B { 111f1a2e44aSMauricio Vasquez B return map->ops->map_peek_elem(map, value); 112f1a2e44aSMauricio Vasquez B } 113f1a2e44aSMauricio Vasquez B 114f1a2e44aSMauricio Vasquez B const struct bpf_func_proto bpf_map_peek_elem_proto = { 115301a33d5SMircea Cirjaliu .func = bpf_map_peek_elem, 116f1a2e44aSMauricio Vasquez B .gpl_only = false, 117f1a2e44aSMauricio Vasquez B .ret_type = RET_INTEGER, 118f1a2e44aSMauricio Vasquez B .arg1_type = ARG_CONST_MAP_PTR, 11916d1e00cSJoanne Koong .arg2_type = ARG_PTR_TO_MAP_VALUE | MEM_UNINIT, 120f1a2e44aSMauricio Vasquez B }; 121f1a2e44aSMauricio Vasquez B 12207343110SFeng Zhou BPF_CALL_3(bpf_map_lookup_percpu_elem, struct bpf_map *, map, void *, key, u32, cpu) 12307343110SFeng Zhou { 12407343110SFeng Zhou WARN_ON_ONCE(!rcu_read_lock_held() && !rcu_read_lock_bh_held()); 12507343110SFeng Zhou return (unsigned long) map->ops->map_lookup_percpu_elem(map, key, cpu); 12607343110SFeng Zhou } 12707343110SFeng Zhou 12807343110SFeng Zhou const struct bpf_func_proto bpf_map_lookup_percpu_elem_proto = { 12907343110SFeng Zhou .func = bpf_map_lookup_percpu_elem, 13007343110SFeng Zhou .gpl_only = false, 13107343110SFeng Zhou .pkt_access = true, 13207343110SFeng Zhou .ret_type = RET_PTR_TO_MAP_VALUE_OR_NULL, 13307343110SFeng Zhou .arg1_type = ARG_CONST_MAP_PTR, 13407343110SFeng Zhou .arg2_type = ARG_PTR_TO_MAP_KEY, 13507343110SFeng Zhou .arg3_type = ARG_ANYTHING, 13607343110SFeng Zhou }; 13707343110SFeng Zhou 13803e69b50SDaniel Borkmann const struct bpf_func_proto bpf_get_prandom_u32_proto = { 1393ad00405SDaniel Borkmann .func = bpf_user_rnd_u32, 14003e69b50SDaniel Borkmann .gpl_only = false, 14103e69b50SDaniel Borkmann .ret_type = RET_INTEGER, 14203e69b50SDaniel Borkmann }; 143c04167ceSDaniel Borkmann 144f3694e00SDaniel Borkmann BPF_CALL_0(bpf_get_smp_processor_id) 145c04167ceSDaniel Borkmann { 14680b48c44SDaniel Borkmann return smp_processor_id(); 147c04167ceSDaniel Borkmann } 148c04167ceSDaniel Borkmann 149c04167ceSDaniel Borkmann const struct bpf_func_proto bpf_get_smp_processor_id_proto = { 150c04167ceSDaniel Borkmann .func = bpf_get_smp_processor_id, 151c04167ceSDaniel Borkmann .gpl_only = false, 152c04167ceSDaniel Borkmann .ret_type = RET_INTEGER, 153c04167ceSDaniel Borkmann }; 15417ca8cbfSDaniel Borkmann 1552d0e30c3SDaniel Borkmann BPF_CALL_0(bpf_get_numa_node_id) 1562d0e30c3SDaniel Borkmann { 1572d0e30c3SDaniel Borkmann return numa_node_id(); 1582d0e30c3SDaniel Borkmann } 1592d0e30c3SDaniel Borkmann 1602d0e30c3SDaniel Borkmann const struct bpf_func_proto bpf_get_numa_node_id_proto = { 1612d0e30c3SDaniel Borkmann .func = bpf_get_numa_node_id, 1622d0e30c3SDaniel Borkmann .gpl_only = false, 1632d0e30c3SDaniel Borkmann .ret_type = RET_INTEGER, 1642d0e30c3SDaniel Borkmann }; 1652d0e30c3SDaniel Borkmann 166f3694e00SDaniel Borkmann BPF_CALL_0(bpf_ktime_get_ns) 16717ca8cbfSDaniel Borkmann { 16817ca8cbfSDaniel Borkmann /* NMI safe access to clock monotonic */ 16917ca8cbfSDaniel Borkmann return ktime_get_mono_fast_ns(); 17017ca8cbfSDaniel Borkmann } 17117ca8cbfSDaniel Borkmann 17217ca8cbfSDaniel Borkmann const struct bpf_func_proto bpf_ktime_get_ns_proto = { 17317ca8cbfSDaniel Borkmann .func = bpf_ktime_get_ns, 174082b57e3SMaciej Żenczykowski .gpl_only = false, 17517ca8cbfSDaniel Borkmann .ret_type = RET_INTEGER, 17617ca8cbfSDaniel Borkmann }; 177ffeedafbSAlexei Starovoitov 17871d19214SMaciej Żenczykowski BPF_CALL_0(bpf_ktime_get_boot_ns) 17971d19214SMaciej Żenczykowski { 18071d19214SMaciej Żenczykowski /* NMI safe access to clock boottime */ 18171d19214SMaciej Żenczykowski return ktime_get_boot_fast_ns(); 18271d19214SMaciej Żenczykowski } 18371d19214SMaciej Żenczykowski 18471d19214SMaciej Żenczykowski const struct bpf_func_proto bpf_ktime_get_boot_ns_proto = { 18571d19214SMaciej Żenczykowski .func = bpf_ktime_get_boot_ns, 18671d19214SMaciej Żenczykowski .gpl_only = false, 18771d19214SMaciej Żenczykowski .ret_type = RET_INTEGER, 18871d19214SMaciej Żenczykowski }; 18971d19214SMaciej Żenczykowski 190d0551261SDmitrii Banshchikov BPF_CALL_0(bpf_ktime_get_coarse_ns) 191d0551261SDmitrii Banshchikov { 192d0551261SDmitrii Banshchikov return ktime_get_coarse_ns(); 193d0551261SDmitrii Banshchikov } 194d0551261SDmitrii Banshchikov 195d0551261SDmitrii Banshchikov const struct bpf_func_proto bpf_ktime_get_coarse_ns_proto = { 196d0551261SDmitrii Banshchikov .func = bpf_ktime_get_coarse_ns, 197d0551261SDmitrii Banshchikov .gpl_only = false, 198d0551261SDmitrii Banshchikov .ret_type = RET_INTEGER, 199d0551261SDmitrii Banshchikov }; 200d0551261SDmitrii Banshchikov 201f3694e00SDaniel Borkmann BPF_CALL_0(bpf_get_current_pid_tgid) 202ffeedafbSAlexei Starovoitov { 203ffeedafbSAlexei Starovoitov struct task_struct *task = current; 204ffeedafbSAlexei Starovoitov 2056088b582SDaniel Borkmann if (unlikely(!task)) 206ffeedafbSAlexei Starovoitov return -EINVAL; 207ffeedafbSAlexei Starovoitov 208ffeedafbSAlexei Starovoitov return (u64) task->tgid << 32 | task->pid; 209ffeedafbSAlexei Starovoitov } 210ffeedafbSAlexei Starovoitov 211ffeedafbSAlexei Starovoitov const struct bpf_func_proto bpf_get_current_pid_tgid_proto = { 212ffeedafbSAlexei Starovoitov .func = bpf_get_current_pid_tgid, 213ffeedafbSAlexei Starovoitov .gpl_only = false, 214ffeedafbSAlexei Starovoitov .ret_type = RET_INTEGER, 215ffeedafbSAlexei Starovoitov }; 216ffeedafbSAlexei Starovoitov 217f3694e00SDaniel Borkmann BPF_CALL_0(bpf_get_current_uid_gid) 218ffeedafbSAlexei Starovoitov { 219ffeedafbSAlexei Starovoitov struct task_struct *task = current; 220ffeedafbSAlexei Starovoitov kuid_t uid; 221ffeedafbSAlexei Starovoitov kgid_t gid; 222ffeedafbSAlexei Starovoitov 2236088b582SDaniel Borkmann if (unlikely(!task)) 224ffeedafbSAlexei Starovoitov return -EINVAL; 225ffeedafbSAlexei Starovoitov 226ffeedafbSAlexei Starovoitov current_uid_gid(&uid, &gid); 227ffeedafbSAlexei Starovoitov return (u64) from_kgid(&init_user_ns, gid) << 32 | 228ffeedafbSAlexei Starovoitov from_kuid(&init_user_ns, uid); 229ffeedafbSAlexei Starovoitov } 230ffeedafbSAlexei Starovoitov 231ffeedafbSAlexei Starovoitov const struct bpf_func_proto bpf_get_current_uid_gid_proto = { 232ffeedafbSAlexei Starovoitov .func = bpf_get_current_uid_gid, 233ffeedafbSAlexei Starovoitov .gpl_only = false, 234ffeedafbSAlexei Starovoitov .ret_type = RET_INTEGER, 235ffeedafbSAlexei Starovoitov }; 236ffeedafbSAlexei Starovoitov 237f3694e00SDaniel Borkmann BPF_CALL_2(bpf_get_current_comm, char *, buf, u32, size) 238ffeedafbSAlexei Starovoitov { 239ffeedafbSAlexei Starovoitov struct task_struct *task = current; 240ffeedafbSAlexei Starovoitov 241074f528eSDaniel Borkmann if (unlikely(!task)) 242074f528eSDaniel Borkmann goto err_clear; 243ffeedafbSAlexei Starovoitov 24403b9c7faSYuntao Wang /* Verifier guarantees that size > 0 */ 24503b9c7faSYuntao Wang strscpy(buf, task->comm, size); 246ffeedafbSAlexei Starovoitov return 0; 247074f528eSDaniel Borkmann err_clear: 248074f528eSDaniel Borkmann memset(buf, 0, size); 249074f528eSDaniel Borkmann return -EINVAL; 250ffeedafbSAlexei Starovoitov } 251ffeedafbSAlexei Starovoitov 252ffeedafbSAlexei Starovoitov const struct bpf_func_proto bpf_get_current_comm_proto = { 253ffeedafbSAlexei Starovoitov .func = bpf_get_current_comm, 254ffeedafbSAlexei Starovoitov .gpl_only = false, 255ffeedafbSAlexei Starovoitov .ret_type = RET_INTEGER, 25639f19ebbSAlexei Starovoitov .arg1_type = ARG_PTR_TO_UNINIT_MEM, 25739f19ebbSAlexei Starovoitov .arg2_type = ARG_CONST_SIZE, 258ffeedafbSAlexei Starovoitov }; 259bf6fa2c8SYonghong Song 260d83525caSAlexei Starovoitov #if defined(CONFIG_QUEUED_SPINLOCKS) || defined(CONFIG_BPF_ARCH_SPINLOCK) 261d83525caSAlexei Starovoitov 262d83525caSAlexei Starovoitov static inline void __bpf_spin_lock(struct bpf_spin_lock *lock) 263d83525caSAlexei Starovoitov { 264d83525caSAlexei Starovoitov arch_spinlock_t *l = (void *)lock; 265d83525caSAlexei Starovoitov union { 266d83525caSAlexei Starovoitov __u32 val; 267d83525caSAlexei Starovoitov arch_spinlock_t lock; 268d83525caSAlexei Starovoitov } u = { .lock = __ARCH_SPIN_LOCK_UNLOCKED }; 269d83525caSAlexei Starovoitov 270d83525caSAlexei Starovoitov compiletime_assert(u.val == 0, "__ARCH_SPIN_LOCK_UNLOCKED not 0"); 271d83525caSAlexei Starovoitov BUILD_BUG_ON(sizeof(*l) != sizeof(__u32)); 272d83525caSAlexei Starovoitov BUILD_BUG_ON(sizeof(*lock) != sizeof(__u32)); 273d83525caSAlexei Starovoitov arch_spin_lock(l); 274d83525caSAlexei Starovoitov } 275d83525caSAlexei Starovoitov 276d83525caSAlexei Starovoitov static inline void __bpf_spin_unlock(struct bpf_spin_lock *lock) 277d83525caSAlexei Starovoitov { 278d83525caSAlexei Starovoitov arch_spinlock_t *l = (void *)lock; 279d83525caSAlexei Starovoitov 280d83525caSAlexei Starovoitov arch_spin_unlock(l); 281d83525caSAlexei Starovoitov } 282d83525caSAlexei Starovoitov 283d83525caSAlexei Starovoitov #else 284d83525caSAlexei Starovoitov 285d83525caSAlexei Starovoitov static inline void __bpf_spin_lock(struct bpf_spin_lock *lock) 286d83525caSAlexei Starovoitov { 287d83525caSAlexei Starovoitov atomic_t *l = (void *)lock; 288d83525caSAlexei Starovoitov 289d83525caSAlexei Starovoitov BUILD_BUG_ON(sizeof(*l) != sizeof(*lock)); 290d83525caSAlexei Starovoitov do { 291d83525caSAlexei Starovoitov atomic_cond_read_relaxed(l, !VAL); 292d83525caSAlexei Starovoitov } while (atomic_xchg(l, 1)); 293d83525caSAlexei Starovoitov } 294d83525caSAlexei Starovoitov 295d83525caSAlexei Starovoitov static inline void __bpf_spin_unlock(struct bpf_spin_lock *lock) 296d83525caSAlexei Starovoitov { 297d83525caSAlexei Starovoitov atomic_t *l = (void *)lock; 298d83525caSAlexei Starovoitov 299d83525caSAlexei Starovoitov atomic_set_release(l, 0); 300d83525caSAlexei Starovoitov } 301d83525caSAlexei Starovoitov 302d83525caSAlexei Starovoitov #endif 303d83525caSAlexei Starovoitov 304d83525caSAlexei Starovoitov static DEFINE_PER_CPU(unsigned long, irqsave_flags); 305d83525caSAlexei Starovoitov 306c1b3fed3SAlexei Starovoitov static inline void __bpf_spin_lock_irqsave(struct bpf_spin_lock *lock) 307d83525caSAlexei Starovoitov { 308d83525caSAlexei Starovoitov unsigned long flags; 309d83525caSAlexei Starovoitov 310d83525caSAlexei Starovoitov local_irq_save(flags); 311d83525caSAlexei Starovoitov __bpf_spin_lock(lock); 312d83525caSAlexei Starovoitov __this_cpu_write(irqsave_flags, flags); 313c1b3fed3SAlexei Starovoitov } 314c1b3fed3SAlexei Starovoitov 315c1b3fed3SAlexei Starovoitov notrace BPF_CALL_1(bpf_spin_lock, struct bpf_spin_lock *, lock) 316c1b3fed3SAlexei Starovoitov { 317c1b3fed3SAlexei Starovoitov __bpf_spin_lock_irqsave(lock); 318d83525caSAlexei Starovoitov return 0; 319d83525caSAlexei Starovoitov } 320d83525caSAlexei Starovoitov 321d83525caSAlexei Starovoitov const struct bpf_func_proto bpf_spin_lock_proto = { 322d83525caSAlexei Starovoitov .func = bpf_spin_lock, 323d83525caSAlexei Starovoitov .gpl_only = false, 324d83525caSAlexei Starovoitov .ret_type = RET_VOID, 325d83525caSAlexei Starovoitov .arg1_type = ARG_PTR_TO_SPIN_LOCK, 326d83525caSAlexei Starovoitov }; 327d83525caSAlexei Starovoitov 328c1b3fed3SAlexei Starovoitov static inline void __bpf_spin_unlock_irqrestore(struct bpf_spin_lock *lock) 329d83525caSAlexei Starovoitov { 330d83525caSAlexei Starovoitov unsigned long flags; 331d83525caSAlexei Starovoitov 332d83525caSAlexei Starovoitov flags = __this_cpu_read(irqsave_flags); 333d83525caSAlexei Starovoitov __bpf_spin_unlock(lock); 334d83525caSAlexei Starovoitov local_irq_restore(flags); 335c1b3fed3SAlexei Starovoitov } 336c1b3fed3SAlexei Starovoitov 337c1b3fed3SAlexei Starovoitov notrace BPF_CALL_1(bpf_spin_unlock, struct bpf_spin_lock *, lock) 338c1b3fed3SAlexei Starovoitov { 339c1b3fed3SAlexei Starovoitov __bpf_spin_unlock_irqrestore(lock); 340d83525caSAlexei Starovoitov return 0; 341d83525caSAlexei Starovoitov } 342d83525caSAlexei Starovoitov 343d83525caSAlexei Starovoitov const struct bpf_func_proto bpf_spin_unlock_proto = { 344d83525caSAlexei Starovoitov .func = bpf_spin_unlock, 345d83525caSAlexei Starovoitov .gpl_only = false, 346d83525caSAlexei Starovoitov .ret_type = RET_VOID, 347d83525caSAlexei Starovoitov .arg1_type = ARG_PTR_TO_SPIN_LOCK, 348d83525caSAlexei Starovoitov }; 349d83525caSAlexei Starovoitov 35096049f3aSAlexei Starovoitov void copy_map_value_locked(struct bpf_map *map, void *dst, void *src, 35196049f3aSAlexei Starovoitov bool lock_src) 35296049f3aSAlexei Starovoitov { 35396049f3aSAlexei Starovoitov struct bpf_spin_lock *lock; 35496049f3aSAlexei Starovoitov 35596049f3aSAlexei Starovoitov if (lock_src) 35696049f3aSAlexei Starovoitov lock = src + map->spin_lock_off; 35796049f3aSAlexei Starovoitov else 35896049f3aSAlexei Starovoitov lock = dst + map->spin_lock_off; 35996049f3aSAlexei Starovoitov preempt_disable(); 360c1b3fed3SAlexei Starovoitov __bpf_spin_lock_irqsave(lock); 36196049f3aSAlexei Starovoitov copy_map_value(map, dst, src); 362c1b3fed3SAlexei Starovoitov __bpf_spin_unlock_irqrestore(lock); 36396049f3aSAlexei Starovoitov preempt_enable(); 36496049f3aSAlexei Starovoitov } 36596049f3aSAlexei Starovoitov 3665576b991SMartin KaFai Lau BPF_CALL_0(bpf_jiffies64) 3675576b991SMartin KaFai Lau { 3685576b991SMartin KaFai Lau return get_jiffies_64(); 3695576b991SMartin KaFai Lau } 3705576b991SMartin KaFai Lau 3715576b991SMartin KaFai Lau const struct bpf_func_proto bpf_jiffies64_proto = { 3725576b991SMartin KaFai Lau .func = bpf_jiffies64, 3735576b991SMartin KaFai Lau .gpl_only = false, 3745576b991SMartin KaFai Lau .ret_type = RET_INTEGER, 3755576b991SMartin KaFai Lau }; 3765576b991SMartin KaFai Lau 377bf6fa2c8SYonghong Song #ifdef CONFIG_CGROUPS 378bf6fa2c8SYonghong Song BPF_CALL_0(bpf_get_current_cgroup_id) 379bf6fa2c8SYonghong Song { 3802d3a1e36SYonghong Song struct cgroup *cgrp; 3812d3a1e36SYonghong Song u64 cgrp_id; 382bf6fa2c8SYonghong Song 3832d3a1e36SYonghong Song rcu_read_lock(); 3842d3a1e36SYonghong Song cgrp = task_dfl_cgroup(current); 3852d3a1e36SYonghong Song cgrp_id = cgroup_id(cgrp); 3862d3a1e36SYonghong Song rcu_read_unlock(); 3872d3a1e36SYonghong Song 3882d3a1e36SYonghong Song return cgrp_id; 389bf6fa2c8SYonghong Song } 390bf6fa2c8SYonghong Song 391bf6fa2c8SYonghong Song const struct bpf_func_proto bpf_get_current_cgroup_id_proto = { 392bf6fa2c8SYonghong Song .func = bpf_get_current_cgroup_id, 393bf6fa2c8SYonghong Song .gpl_only = false, 394bf6fa2c8SYonghong Song .ret_type = RET_INTEGER, 395bf6fa2c8SYonghong Song }; 396cd339431SRoman Gushchin 3970f09abd1SDaniel Borkmann BPF_CALL_1(bpf_get_current_ancestor_cgroup_id, int, ancestor_level) 3980f09abd1SDaniel Borkmann { 3992d3a1e36SYonghong Song struct cgroup *cgrp; 4000f09abd1SDaniel Borkmann struct cgroup *ancestor; 4012d3a1e36SYonghong Song u64 cgrp_id; 4020f09abd1SDaniel Borkmann 4032d3a1e36SYonghong Song rcu_read_lock(); 4042d3a1e36SYonghong Song cgrp = task_dfl_cgroup(current); 4050f09abd1SDaniel Borkmann ancestor = cgroup_ancestor(cgrp, ancestor_level); 4062d3a1e36SYonghong Song cgrp_id = ancestor ? cgroup_id(ancestor) : 0; 4072d3a1e36SYonghong Song rcu_read_unlock(); 4082d3a1e36SYonghong Song 4092d3a1e36SYonghong Song return cgrp_id; 4100f09abd1SDaniel Borkmann } 4110f09abd1SDaniel Borkmann 4120f09abd1SDaniel Borkmann const struct bpf_func_proto bpf_get_current_ancestor_cgroup_id_proto = { 4130f09abd1SDaniel Borkmann .func = bpf_get_current_ancestor_cgroup_id, 4140f09abd1SDaniel Borkmann .gpl_only = false, 4150f09abd1SDaniel Borkmann .ret_type = RET_INTEGER, 4160f09abd1SDaniel Borkmann .arg1_type = ARG_ANYTHING, 4170f09abd1SDaniel Borkmann }; 4180f09abd1SDaniel Borkmann 4198bad74f9SRoman Gushchin #ifdef CONFIG_CGROUP_BPF 420cd339431SRoman Gushchin 421cd339431SRoman Gushchin BPF_CALL_2(bpf_get_local_storage, struct bpf_map *, map, u64, flags) 422cd339431SRoman Gushchin { 4238bad74f9SRoman Gushchin /* flags argument is not used now, 4248bad74f9SRoman Gushchin * but provides an ability to extend the API. 4258bad74f9SRoman Gushchin * verifier checks that its value is correct. 426cd339431SRoman Gushchin */ 4278bad74f9SRoman Gushchin enum bpf_cgroup_storage_type stype = cgroup_storage_type(map); 428c7603cfaSAndrii Nakryiko struct bpf_cgroup_storage *storage; 429c7603cfaSAndrii Nakryiko struct bpf_cg_run_ctx *ctx; 430b741f163SRoman Gushchin void *ptr; 4318bad74f9SRoman Gushchin 432c7603cfaSAndrii Nakryiko /* get current cgroup storage from BPF run context */ 433c7603cfaSAndrii Nakryiko ctx = container_of(current->bpf_ctx, struct bpf_cg_run_ctx, run_ctx); 434c7603cfaSAndrii Nakryiko storage = ctx->prog_item->cgroup_storage[stype]; 435f294b37eSRoman Gushchin 436b741f163SRoman Gushchin if (stype == BPF_CGROUP_STORAGE_SHARED) 437b741f163SRoman Gushchin ptr = &READ_ONCE(storage->buf)->data[0]; 438b741f163SRoman Gushchin else 439b741f163SRoman Gushchin ptr = this_cpu_ptr(storage->percpu_buf); 440b741f163SRoman Gushchin 441b741f163SRoman Gushchin return (unsigned long)ptr; 442cd339431SRoman Gushchin } 443cd339431SRoman Gushchin 444cd339431SRoman Gushchin const struct bpf_func_proto bpf_get_local_storage_proto = { 445cd339431SRoman Gushchin .func = bpf_get_local_storage, 446cd339431SRoman Gushchin .gpl_only = false, 447cd339431SRoman Gushchin .ret_type = RET_PTR_TO_MAP_VALUE, 448cd339431SRoman Gushchin .arg1_type = ARG_CONST_MAP_PTR, 449cd339431SRoman Gushchin .arg2_type = ARG_ANYTHING, 450cd339431SRoman Gushchin }; 451bf6fa2c8SYonghong Song #endif 452d7a4cb9bSAndrey Ignatov 453d7a4cb9bSAndrey Ignatov #define BPF_STRTOX_BASE_MASK 0x1F 454d7a4cb9bSAndrey Ignatov 455d7a4cb9bSAndrey Ignatov static int __bpf_strtoull(const char *buf, size_t buf_len, u64 flags, 456d7a4cb9bSAndrey Ignatov unsigned long long *res, bool *is_negative) 457d7a4cb9bSAndrey Ignatov { 458d7a4cb9bSAndrey Ignatov unsigned int base = flags & BPF_STRTOX_BASE_MASK; 459d7a4cb9bSAndrey Ignatov const char *cur_buf = buf; 460d7a4cb9bSAndrey Ignatov size_t cur_len = buf_len; 461d7a4cb9bSAndrey Ignatov unsigned int consumed; 462d7a4cb9bSAndrey Ignatov size_t val_len; 463d7a4cb9bSAndrey Ignatov char str[64]; 464d7a4cb9bSAndrey Ignatov 465d7a4cb9bSAndrey Ignatov if (!buf || !buf_len || !res || !is_negative) 466d7a4cb9bSAndrey Ignatov return -EINVAL; 467d7a4cb9bSAndrey Ignatov 468d7a4cb9bSAndrey Ignatov if (base != 0 && base != 8 && base != 10 && base != 16) 469d7a4cb9bSAndrey Ignatov return -EINVAL; 470d7a4cb9bSAndrey Ignatov 471d7a4cb9bSAndrey Ignatov if (flags & ~BPF_STRTOX_BASE_MASK) 472d7a4cb9bSAndrey Ignatov return -EINVAL; 473d7a4cb9bSAndrey Ignatov 474d7a4cb9bSAndrey Ignatov while (cur_buf < buf + buf_len && isspace(*cur_buf)) 475d7a4cb9bSAndrey Ignatov ++cur_buf; 476d7a4cb9bSAndrey Ignatov 477d7a4cb9bSAndrey Ignatov *is_negative = (cur_buf < buf + buf_len && *cur_buf == '-'); 478d7a4cb9bSAndrey Ignatov if (*is_negative) 479d7a4cb9bSAndrey Ignatov ++cur_buf; 480d7a4cb9bSAndrey Ignatov 481d7a4cb9bSAndrey Ignatov consumed = cur_buf - buf; 482d7a4cb9bSAndrey Ignatov cur_len -= consumed; 483d7a4cb9bSAndrey Ignatov if (!cur_len) 484d7a4cb9bSAndrey Ignatov return -EINVAL; 485d7a4cb9bSAndrey Ignatov 486d7a4cb9bSAndrey Ignatov cur_len = min(cur_len, sizeof(str) - 1); 487d7a4cb9bSAndrey Ignatov memcpy(str, cur_buf, cur_len); 488d7a4cb9bSAndrey Ignatov str[cur_len] = '\0'; 489d7a4cb9bSAndrey Ignatov cur_buf = str; 490d7a4cb9bSAndrey Ignatov 491d7a4cb9bSAndrey Ignatov cur_buf = _parse_integer_fixup_radix(cur_buf, &base); 492d7a4cb9bSAndrey Ignatov val_len = _parse_integer(cur_buf, base, res); 493d7a4cb9bSAndrey Ignatov 494d7a4cb9bSAndrey Ignatov if (val_len & KSTRTOX_OVERFLOW) 495d7a4cb9bSAndrey Ignatov return -ERANGE; 496d7a4cb9bSAndrey Ignatov 497d7a4cb9bSAndrey Ignatov if (val_len == 0) 498d7a4cb9bSAndrey Ignatov return -EINVAL; 499d7a4cb9bSAndrey Ignatov 500d7a4cb9bSAndrey Ignatov cur_buf += val_len; 501d7a4cb9bSAndrey Ignatov consumed += cur_buf - str; 502d7a4cb9bSAndrey Ignatov 503d7a4cb9bSAndrey Ignatov return consumed; 504d7a4cb9bSAndrey Ignatov } 505d7a4cb9bSAndrey Ignatov 506d7a4cb9bSAndrey Ignatov static int __bpf_strtoll(const char *buf, size_t buf_len, u64 flags, 507d7a4cb9bSAndrey Ignatov long long *res) 508d7a4cb9bSAndrey Ignatov { 509d7a4cb9bSAndrey Ignatov unsigned long long _res; 510d7a4cb9bSAndrey Ignatov bool is_negative; 511d7a4cb9bSAndrey Ignatov int err; 512d7a4cb9bSAndrey Ignatov 513d7a4cb9bSAndrey Ignatov err = __bpf_strtoull(buf, buf_len, flags, &_res, &is_negative); 514d7a4cb9bSAndrey Ignatov if (err < 0) 515d7a4cb9bSAndrey Ignatov return err; 516d7a4cb9bSAndrey Ignatov if (is_negative) { 517d7a4cb9bSAndrey Ignatov if ((long long)-_res > 0) 518d7a4cb9bSAndrey Ignatov return -ERANGE; 519d7a4cb9bSAndrey Ignatov *res = -_res; 520d7a4cb9bSAndrey Ignatov } else { 521d7a4cb9bSAndrey Ignatov if ((long long)_res < 0) 522d7a4cb9bSAndrey Ignatov return -ERANGE; 523d7a4cb9bSAndrey Ignatov *res = _res; 524d7a4cb9bSAndrey Ignatov } 525d7a4cb9bSAndrey Ignatov return err; 526d7a4cb9bSAndrey Ignatov } 527d7a4cb9bSAndrey Ignatov 528d7a4cb9bSAndrey Ignatov BPF_CALL_4(bpf_strtol, const char *, buf, size_t, buf_len, u64, flags, 529d7a4cb9bSAndrey Ignatov long *, res) 530d7a4cb9bSAndrey Ignatov { 531d7a4cb9bSAndrey Ignatov long long _res; 532d7a4cb9bSAndrey Ignatov int err; 533d7a4cb9bSAndrey Ignatov 534d7a4cb9bSAndrey Ignatov err = __bpf_strtoll(buf, buf_len, flags, &_res); 535d7a4cb9bSAndrey Ignatov if (err < 0) 536d7a4cb9bSAndrey Ignatov return err; 537d7a4cb9bSAndrey Ignatov if (_res != (long)_res) 538d7a4cb9bSAndrey Ignatov return -ERANGE; 539d7a4cb9bSAndrey Ignatov *res = _res; 540d7a4cb9bSAndrey Ignatov return err; 541d7a4cb9bSAndrey Ignatov } 542d7a4cb9bSAndrey Ignatov 543d7a4cb9bSAndrey Ignatov const struct bpf_func_proto bpf_strtol_proto = { 544d7a4cb9bSAndrey Ignatov .func = bpf_strtol, 545d7a4cb9bSAndrey Ignatov .gpl_only = false, 546d7a4cb9bSAndrey Ignatov .ret_type = RET_INTEGER, 547216e3cd2SHao Luo .arg1_type = ARG_PTR_TO_MEM | MEM_RDONLY, 548d7a4cb9bSAndrey Ignatov .arg2_type = ARG_CONST_SIZE, 549d7a4cb9bSAndrey Ignatov .arg3_type = ARG_ANYTHING, 550d7a4cb9bSAndrey Ignatov .arg4_type = ARG_PTR_TO_LONG, 551d7a4cb9bSAndrey Ignatov }; 552d7a4cb9bSAndrey Ignatov 553d7a4cb9bSAndrey Ignatov BPF_CALL_4(bpf_strtoul, const char *, buf, size_t, buf_len, u64, flags, 554d7a4cb9bSAndrey Ignatov unsigned long *, res) 555d7a4cb9bSAndrey Ignatov { 556d7a4cb9bSAndrey Ignatov unsigned long long _res; 557d7a4cb9bSAndrey Ignatov bool is_negative; 558d7a4cb9bSAndrey Ignatov int err; 559d7a4cb9bSAndrey Ignatov 560d7a4cb9bSAndrey Ignatov err = __bpf_strtoull(buf, buf_len, flags, &_res, &is_negative); 561d7a4cb9bSAndrey Ignatov if (err < 0) 562d7a4cb9bSAndrey Ignatov return err; 563d7a4cb9bSAndrey Ignatov if (is_negative) 564d7a4cb9bSAndrey Ignatov return -EINVAL; 565d7a4cb9bSAndrey Ignatov if (_res != (unsigned long)_res) 566d7a4cb9bSAndrey Ignatov return -ERANGE; 567d7a4cb9bSAndrey Ignatov *res = _res; 568d7a4cb9bSAndrey Ignatov return err; 569d7a4cb9bSAndrey Ignatov } 570d7a4cb9bSAndrey Ignatov 571d7a4cb9bSAndrey Ignatov const struct bpf_func_proto bpf_strtoul_proto = { 572d7a4cb9bSAndrey Ignatov .func = bpf_strtoul, 573d7a4cb9bSAndrey Ignatov .gpl_only = false, 574d7a4cb9bSAndrey Ignatov .ret_type = RET_INTEGER, 575216e3cd2SHao Luo .arg1_type = ARG_PTR_TO_MEM | MEM_RDONLY, 576d7a4cb9bSAndrey Ignatov .arg2_type = ARG_CONST_SIZE, 577d7a4cb9bSAndrey Ignatov .arg3_type = ARG_ANYTHING, 578d7a4cb9bSAndrey Ignatov .arg4_type = ARG_PTR_TO_LONG, 579d7a4cb9bSAndrey Ignatov }; 5808bad74f9SRoman Gushchin #endif 581b4490c5cSCarlos Neira 582c5fb1993SHou Tao BPF_CALL_3(bpf_strncmp, const char *, s1, u32, s1_sz, const char *, s2) 583c5fb1993SHou Tao { 584c5fb1993SHou Tao return strncmp(s1, s2, s1_sz); 585c5fb1993SHou Tao } 586c5fb1993SHou Tao 587c5fb1993SHou Tao const struct bpf_func_proto bpf_strncmp_proto = { 588c5fb1993SHou Tao .func = bpf_strncmp, 589c5fb1993SHou Tao .gpl_only = false, 590c5fb1993SHou Tao .ret_type = RET_INTEGER, 591c5fb1993SHou Tao .arg1_type = ARG_PTR_TO_MEM, 592c5fb1993SHou Tao .arg2_type = ARG_CONST_SIZE, 593c5fb1993SHou Tao .arg3_type = ARG_PTR_TO_CONST_STR, 594c5fb1993SHou Tao }; 595c5fb1993SHou Tao 596b4490c5cSCarlos Neira BPF_CALL_4(bpf_get_ns_current_pid_tgid, u64, dev, u64, ino, 597b4490c5cSCarlos Neira struct bpf_pidns_info *, nsdata, u32, size) 598b4490c5cSCarlos Neira { 599b4490c5cSCarlos Neira struct task_struct *task = current; 600b4490c5cSCarlos Neira struct pid_namespace *pidns; 601b4490c5cSCarlos Neira int err = -EINVAL; 602b4490c5cSCarlos Neira 603b4490c5cSCarlos Neira if (unlikely(size != sizeof(struct bpf_pidns_info))) 604b4490c5cSCarlos Neira goto clear; 605b4490c5cSCarlos Neira 606b4490c5cSCarlos Neira if (unlikely((u64)(dev_t)dev != dev)) 607b4490c5cSCarlos Neira goto clear; 608b4490c5cSCarlos Neira 609b4490c5cSCarlos Neira if (unlikely(!task)) 610b4490c5cSCarlos Neira goto clear; 611b4490c5cSCarlos Neira 612b4490c5cSCarlos Neira pidns = task_active_pid_ns(task); 613b4490c5cSCarlos Neira if (unlikely(!pidns)) { 614b4490c5cSCarlos Neira err = -ENOENT; 615b4490c5cSCarlos Neira goto clear; 616b4490c5cSCarlos Neira } 617b4490c5cSCarlos Neira 618b4490c5cSCarlos Neira if (!ns_match(&pidns->ns, (dev_t)dev, ino)) 619b4490c5cSCarlos Neira goto clear; 620b4490c5cSCarlos Neira 621b4490c5cSCarlos Neira nsdata->pid = task_pid_nr_ns(task, pidns); 622b4490c5cSCarlos Neira nsdata->tgid = task_tgid_nr_ns(task, pidns); 623b4490c5cSCarlos Neira return 0; 624b4490c5cSCarlos Neira clear: 625b4490c5cSCarlos Neira memset((void *)nsdata, 0, (size_t) size); 626b4490c5cSCarlos Neira return err; 627b4490c5cSCarlos Neira } 628b4490c5cSCarlos Neira 629b4490c5cSCarlos Neira const struct bpf_func_proto bpf_get_ns_current_pid_tgid_proto = { 630b4490c5cSCarlos Neira .func = bpf_get_ns_current_pid_tgid, 631b4490c5cSCarlos Neira .gpl_only = false, 632b4490c5cSCarlos Neira .ret_type = RET_INTEGER, 633b4490c5cSCarlos Neira .arg1_type = ARG_ANYTHING, 634b4490c5cSCarlos Neira .arg2_type = ARG_ANYTHING, 635b4490c5cSCarlos Neira .arg3_type = ARG_PTR_TO_UNINIT_MEM, 636b4490c5cSCarlos Neira .arg4_type = ARG_CONST_SIZE, 637b4490c5cSCarlos Neira }; 6386890896bSStanislav Fomichev 6396890896bSStanislav Fomichev static const struct bpf_func_proto bpf_get_raw_smp_processor_id_proto = { 6406890896bSStanislav Fomichev .func = bpf_get_raw_cpu_id, 6416890896bSStanislav Fomichev .gpl_only = false, 6426890896bSStanislav Fomichev .ret_type = RET_INTEGER, 6436890896bSStanislav Fomichev }; 6446890896bSStanislav Fomichev 6456890896bSStanislav Fomichev BPF_CALL_5(bpf_event_output_data, void *, ctx, struct bpf_map *, map, 6466890896bSStanislav Fomichev u64, flags, void *, data, u64, size) 6476890896bSStanislav Fomichev { 6486890896bSStanislav Fomichev if (unlikely(flags & ~(BPF_F_INDEX_MASK))) 6496890896bSStanislav Fomichev return -EINVAL; 6506890896bSStanislav Fomichev 6516890896bSStanislav Fomichev return bpf_event_output(map, flags, data, size, NULL, 0, NULL); 6526890896bSStanislav Fomichev } 6536890896bSStanislav Fomichev 6546890896bSStanislav Fomichev const struct bpf_func_proto bpf_event_output_data_proto = { 6556890896bSStanislav Fomichev .func = bpf_event_output_data, 6566890896bSStanislav Fomichev .gpl_only = true, 6576890896bSStanislav Fomichev .ret_type = RET_INTEGER, 6586890896bSStanislav Fomichev .arg1_type = ARG_PTR_TO_CTX, 6596890896bSStanislav Fomichev .arg2_type = ARG_CONST_MAP_PTR, 6606890896bSStanislav Fomichev .arg3_type = ARG_ANYTHING, 661216e3cd2SHao Luo .arg4_type = ARG_PTR_TO_MEM | MEM_RDONLY, 6626890896bSStanislav Fomichev .arg5_type = ARG_CONST_SIZE_OR_ZERO, 6636890896bSStanislav Fomichev }; 6646890896bSStanislav Fomichev 66507be4c4aSAlexei Starovoitov BPF_CALL_3(bpf_copy_from_user, void *, dst, u32, size, 66607be4c4aSAlexei Starovoitov const void __user *, user_ptr) 66707be4c4aSAlexei Starovoitov { 66807be4c4aSAlexei Starovoitov int ret = copy_from_user(dst, user_ptr, size); 66907be4c4aSAlexei Starovoitov 67007be4c4aSAlexei Starovoitov if (unlikely(ret)) { 67107be4c4aSAlexei Starovoitov memset(dst, 0, size); 67207be4c4aSAlexei Starovoitov ret = -EFAULT; 67307be4c4aSAlexei Starovoitov } 67407be4c4aSAlexei Starovoitov 67507be4c4aSAlexei Starovoitov return ret; 67607be4c4aSAlexei Starovoitov } 67707be4c4aSAlexei Starovoitov 67807be4c4aSAlexei Starovoitov const struct bpf_func_proto bpf_copy_from_user_proto = { 67907be4c4aSAlexei Starovoitov .func = bpf_copy_from_user, 68007be4c4aSAlexei Starovoitov .gpl_only = false, 68107be4c4aSAlexei Starovoitov .ret_type = RET_INTEGER, 68207be4c4aSAlexei Starovoitov .arg1_type = ARG_PTR_TO_UNINIT_MEM, 68307be4c4aSAlexei Starovoitov .arg2_type = ARG_CONST_SIZE_OR_ZERO, 68407be4c4aSAlexei Starovoitov .arg3_type = ARG_ANYTHING, 68507be4c4aSAlexei Starovoitov }; 68607be4c4aSAlexei Starovoitov 687376040e4SKenny Yu BPF_CALL_5(bpf_copy_from_user_task, void *, dst, u32, size, 688376040e4SKenny Yu const void __user *, user_ptr, struct task_struct *, tsk, u64, flags) 689376040e4SKenny Yu { 690376040e4SKenny Yu int ret; 691376040e4SKenny Yu 692376040e4SKenny Yu /* flags is not used yet */ 693376040e4SKenny Yu if (unlikely(flags)) 694376040e4SKenny Yu return -EINVAL; 695376040e4SKenny Yu 696376040e4SKenny Yu if (unlikely(!size)) 697376040e4SKenny Yu return 0; 698376040e4SKenny Yu 699376040e4SKenny Yu ret = access_process_vm(tsk, (unsigned long)user_ptr, dst, size, 0); 700376040e4SKenny Yu if (ret == size) 701376040e4SKenny Yu return 0; 702376040e4SKenny Yu 703376040e4SKenny Yu memset(dst, 0, size); 704376040e4SKenny Yu /* Return -EFAULT for partial read */ 705376040e4SKenny Yu return ret < 0 ? ret : -EFAULT; 706376040e4SKenny Yu } 707376040e4SKenny Yu 708376040e4SKenny Yu const struct bpf_func_proto bpf_copy_from_user_task_proto = { 709376040e4SKenny Yu .func = bpf_copy_from_user_task, 7100407a65fSKenta Tada .gpl_only = true, 711376040e4SKenny Yu .ret_type = RET_INTEGER, 712376040e4SKenny Yu .arg1_type = ARG_PTR_TO_UNINIT_MEM, 713376040e4SKenny Yu .arg2_type = ARG_CONST_SIZE_OR_ZERO, 714376040e4SKenny Yu .arg3_type = ARG_ANYTHING, 715376040e4SKenny Yu .arg4_type = ARG_PTR_TO_BTF_ID, 716376040e4SKenny Yu .arg4_btf_id = &btf_tracing_ids[BTF_TRACING_TYPE_TASK], 717376040e4SKenny Yu .arg5_type = ARG_ANYTHING 718376040e4SKenny Yu }; 719376040e4SKenny Yu 720eaa6bcb7SHao Luo BPF_CALL_2(bpf_per_cpu_ptr, const void *, ptr, u32, cpu) 721eaa6bcb7SHao Luo { 722eaa6bcb7SHao Luo if (cpu >= nr_cpu_ids) 723eaa6bcb7SHao Luo return (unsigned long)NULL; 724eaa6bcb7SHao Luo 725eaa6bcb7SHao Luo return (unsigned long)per_cpu_ptr((const void __percpu *)ptr, cpu); 726eaa6bcb7SHao Luo } 727eaa6bcb7SHao Luo 728eaa6bcb7SHao Luo const struct bpf_func_proto bpf_per_cpu_ptr_proto = { 729eaa6bcb7SHao Luo .func = bpf_per_cpu_ptr, 730eaa6bcb7SHao Luo .gpl_only = false, 73134d3a78cSHao Luo .ret_type = RET_PTR_TO_MEM_OR_BTF_ID | PTR_MAYBE_NULL | MEM_RDONLY, 732eaa6bcb7SHao Luo .arg1_type = ARG_PTR_TO_PERCPU_BTF_ID, 733eaa6bcb7SHao Luo .arg2_type = ARG_ANYTHING, 734eaa6bcb7SHao Luo }; 735eaa6bcb7SHao Luo 73663d9b80dSHao Luo BPF_CALL_1(bpf_this_cpu_ptr, const void *, percpu_ptr) 73763d9b80dSHao Luo { 73863d9b80dSHao Luo return (unsigned long)this_cpu_ptr((const void __percpu *)percpu_ptr); 73963d9b80dSHao Luo } 74063d9b80dSHao Luo 74163d9b80dSHao Luo const struct bpf_func_proto bpf_this_cpu_ptr_proto = { 74263d9b80dSHao Luo .func = bpf_this_cpu_ptr, 74363d9b80dSHao Luo .gpl_only = false, 74434d3a78cSHao Luo .ret_type = RET_PTR_TO_MEM_OR_BTF_ID | MEM_RDONLY, 74563d9b80dSHao Luo .arg1_type = ARG_PTR_TO_PERCPU_BTF_ID, 74663d9b80dSHao Luo }; 74763d9b80dSHao Luo 748d9c9e4dbSFlorent Revest static int bpf_trace_copy_string(char *buf, void *unsafe_ptr, char fmt_ptype, 749d9c9e4dbSFlorent Revest size_t bufsz) 750d9c9e4dbSFlorent Revest { 751d9c9e4dbSFlorent Revest void __user *user_ptr = (__force void __user *)unsafe_ptr; 752d9c9e4dbSFlorent Revest 753d9c9e4dbSFlorent Revest buf[0] = 0; 754d9c9e4dbSFlorent Revest 755d9c9e4dbSFlorent Revest switch (fmt_ptype) { 756d9c9e4dbSFlorent Revest case 's': 757d9c9e4dbSFlorent Revest #ifdef CONFIG_ARCH_HAS_NON_OVERLAPPING_ADDRESS_SPACE 758d9c9e4dbSFlorent Revest if ((unsigned long)unsafe_ptr < TASK_SIZE) 759d9c9e4dbSFlorent Revest return strncpy_from_user_nofault(buf, user_ptr, bufsz); 760d9c9e4dbSFlorent Revest fallthrough; 761d9c9e4dbSFlorent Revest #endif 762d9c9e4dbSFlorent Revest case 'k': 763d9c9e4dbSFlorent Revest return strncpy_from_kernel_nofault(buf, unsafe_ptr, bufsz); 764d9c9e4dbSFlorent Revest case 'u': 765d9c9e4dbSFlorent Revest return strncpy_from_user_nofault(buf, user_ptr, bufsz); 766d9c9e4dbSFlorent Revest } 767d9c9e4dbSFlorent Revest 768d9c9e4dbSFlorent Revest return -EINVAL; 769d9c9e4dbSFlorent Revest } 770d9c9e4dbSFlorent Revest 7718afcc19fSFlorent Revest /* Per-cpu temp buffers used by printf-like helpers to store the bprintf binary 7728afcc19fSFlorent Revest * arguments representation. 773d9c9e4dbSFlorent Revest */ 7748afcc19fSFlorent Revest #define MAX_BPRINTF_BUF_LEN 512 775d9c9e4dbSFlorent Revest 776e2d5b2bbSFlorent Revest /* Support executing three nested bprintf helper calls on a given CPU */ 7770af02eb2SFlorent Revest #define MAX_BPRINTF_NEST_LEVEL 3 778e2d5b2bbSFlorent Revest struct bpf_bprintf_buffers { 7790af02eb2SFlorent Revest char tmp_bufs[MAX_BPRINTF_NEST_LEVEL][MAX_BPRINTF_BUF_LEN]; 780d9c9e4dbSFlorent Revest }; 781e2d5b2bbSFlorent Revest static DEFINE_PER_CPU(struct bpf_bprintf_buffers, bpf_bprintf_bufs); 782e2d5b2bbSFlorent Revest static DEFINE_PER_CPU(int, bpf_bprintf_nest_level); 783d9c9e4dbSFlorent Revest 784d9c9e4dbSFlorent Revest static int try_get_fmt_tmp_buf(char **tmp_buf) 785d9c9e4dbSFlorent Revest { 786e2d5b2bbSFlorent Revest struct bpf_bprintf_buffers *bufs; 787e2d5b2bbSFlorent Revest int nest_level; 788d9c9e4dbSFlorent Revest 789d9c9e4dbSFlorent Revest preempt_disable(); 790e2d5b2bbSFlorent Revest nest_level = this_cpu_inc_return(bpf_bprintf_nest_level); 7910af02eb2SFlorent Revest if (WARN_ON_ONCE(nest_level > MAX_BPRINTF_NEST_LEVEL)) { 792e2d5b2bbSFlorent Revest this_cpu_dec(bpf_bprintf_nest_level); 793d9c9e4dbSFlorent Revest preempt_enable(); 794d9c9e4dbSFlorent Revest return -EBUSY; 795d9c9e4dbSFlorent Revest } 796e2d5b2bbSFlorent Revest bufs = this_cpu_ptr(&bpf_bprintf_bufs); 797e2d5b2bbSFlorent Revest *tmp_buf = bufs->tmp_bufs[nest_level - 1]; 798d9c9e4dbSFlorent Revest 799d9c9e4dbSFlorent Revest return 0; 800d9c9e4dbSFlorent Revest } 801d9c9e4dbSFlorent Revest 80248cac3f4SFlorent Revest void bpf_bprintf_cleanup(void) 803d9c9e4dbSFlorent Revest { 804e2d5b2bbSFlorent Revest if (this_cpu_read(bpf_bprintf_nest_level)) { 805e2d5b2bbSFlorent Revest this_cpu_dec(bpf_bprintf_nest_level); 806d9c9e4dbSFlorent Revest preempt_enable(); 807d9c9e4dbSFlorent Revest } 808d9c9e4dbSFlorent Revest } 809d9c9e4dbSFlorent Revest 810d9c9e4dbSFlorent Revest /* 81148cac3f4SFlorent Revest * bpf_bprintf_prepare - Generic pass on format strings for bprintf-like helpers 812d9c9e4dbSFlorent Revest * 813d9c9e4dbSFlorent Revest * Returns a negative value if fmt is an invalid format string or 0 otherwise. 814d9c9e4dbSFlorent Revest * 815d9c9e4dbSFlorent Revest * This can be used in two ways: 81648cac3f4SFlorent Revest * - Format string verification only: when bin_args is NULL 817d9c9e4dbSFlorent Revest * - Arguments preparation: in addition to the above verification, it writes in 81848cac3f4SFlorent Revest * bin_args a binary representation of arguments usable by bstr_printf where 81948cac3f4SFlorent Revest * pointers from BPF have been sanitized. 820d9c9e4dbSFlorent Revest * 821d9c9e4dbSFlorent Revest * In argument preparation mode, if 0 is returned, safe temporary buffers are 82248cac3f4SFlorent Revest * allocated and bpf_bprintf_cleanup should be called to free them after use. 823d9c9e4dbSFlorent Revest */ 82448cac3f4SFlorent Revest int bpf_bprintf_prepare(char *fmt, u32 fmt_size, const u64 *raw_args, 82548cac3f4SFlorent Revest u32 **bin_args, u32 num_args) 826d9c9e4dbSFlorent Revest { 82748cac3f4SFlorent Revest char *unsafe_ptr = NULL, *tmp_buf = NULL, *tmp_buf_end, *fmt_end; 82848cac3f4SFlorent Revest size_t sizeof_cur_arg, sizeof_cur_ip; 82948cac3f4SFlorent Revest int err, i, num_spec = 0; 830d9c9e4dbSFlorent Revest u64 cur_arg; 83148cac3f4SFlorent Revest char fmt_ptype, cur_ip[16], ip_spec[] = "%pXX"; 832d9c9e4dbSFlorent Revest 833d9c9e4dbSFlorent Revest fmt_end = strnchr(fmt, fmt_size, 0); 834d9c9e4dbSFlorent Revest if (!fmt_end) 835d9c9e4dbSFlorent Revest return -EINVAL; 836d9c9e4dbSFlorent Revest fmt_size = fmt_end - fmt; 837d9c9e4dbSFlorent Revest 83848cac3f4SFlorent Revest if (bin_args) { 83948cac3f4SFlorent Revest if (num_args && try_get_fmt_tmp_buf(&tmp_buf)) 84048cac3f4SFlorent Revest return -EBUSY; 84148cac3f4SFlorent Revest 8428afcc19fSFlorent Revest tmp_buf_end = tmp_buf + MAX_BPRINTF_BUF_LEN; 84348cac3f4SFlorent Revest *bin_args = (u32 *)tmp_buf; 84448cac3f4SFlorent Revest } 84548cac3f4SFlorent Revest 846d9c9e4dbSFlorent Revest for (i = 0; i < fmt_size; i++) { 847d9c9e4dbSFlorent Revest if ((!isprint(fmt[i]) && !isspace(fmt[i])) || !isascii(fmt[i])) { 848d9c9e4dbSFlorent Revest err = -EINVAL; 84948cac3f4SFlorent Revest goto out; 850d9c9e4dbSFlorent Revest } 851d9c9e4dbSFlorent Revest 852d9c9e4dbSFlorent Revest if (fmt[i] != '%') 853d9c9e4dbSFlorent Revest continue; 854d9c9e4dbSFlorent Revest 855d9c9e4dbSFlorent Revest if (fmt[i + 1] == '%') { 856d9c9e4dbSFlorent Revest i++; 857d9c9e4dbSFlorent Revest continue; 858d9c9e4dbSFlorent Revest } 859d9c9e4dbSFlorent Revest 860d9c9e4dbSFlorent Revest if (num_spec >= num_args) { 861d9c9e4dbSFlorent Revest err = -EINVAL; 86248cac3f4SFlorent Revest goto out; 863d9c9e4dbSFlorent Revest } 864d9c9e4dbSFlorent Revest 865d9c9e4dbSFlorent Revest /* The string is zero-terminated so if fmt[i] != 0, we can 866d9c9e4dbSFlorent Revest * always access fmt[i + 1], in the worst case it will be a 0 867d9c9e4dbSFlorent Revest */ 868d9c9e4dbSFlorent Revest i++; 869d9c9e4dbSFlorent Revest 870d9c9e4dbSFlorent Revest /* skip optional "[0 +-][num]" width formatting field */ 871d9c9e4dbSFlorent Revest while (fmt[i] == '0' || fmt[i] == '+' || fmt[i] == '-' || 872d9c9e4dbSFlorent Revest fmt[i] == ' ') 873d9c9e4dbSFlorent Revest i++; 874d9c9e4dbSFlorent Revest if (fmt[i] >= '1' && fmt[i] <= '9') { 875d9c9e4dbSFlorent Revest i++; 876d9c9e4dbSFlorent Revest while (fmt[i] >= '0' && fmt[i] <= '9') 877d9c9e4dbSFlorent Revest i++; 878d9c9e4dbSFlorent Revest } 879d9c9e4dbSFlorent Revest 880d9c9e4dbSFlorent Revest if (fmt[i] == 'p') { 88148cac3f4SFlorent Revest sizeof_cur_arg = sizeof(long); 882d9c9e4dbSFlorent Revest 883d9c9e4dbSFlorent Revest if ((fmt[i + 1] == 'k' || fmt[i + 1] == 'u') && 884d9c9e4dbSFlorent Revest fmt[i + 2] == 's') { 885d9c9e4dbSFlorent Revest fmt_ptype = fmt[i + 1]; 886d9c9e4dbSFlorent Revest i += 2; 887d9c9e4dbSFlorent Revest goto fmt_str; 888d9c9e4dbSFlorent Revest } 889d9c9e4dbSFlorent Revest 890d9c9e4dbSFlorent Revest if (fmt[i + 1] == 0 || isspace(fmt[i + 1]) || 891d9c9e4dbSFlorent Revest ispunct(fmt[i + 1]) || fmt[i + 1] == 'K' || 89248cac3f4SFlorent Revest fmt[i + 1] == 'x' || fmt[i + 1] == 's' || 89348cac3f4SFlorent Revest fmt[i + 1] == 'S') { 894d9c9e4dbSFlorent Revest /* just kernel pointers */ 89548cac3f4SFlorent Revest if (tmp_buf) 896d9c9e4dbSFlorent Revest cur_arg = raw_args[num_spec]; 89748cac3f4SFlorent Revest i++; 89848cac3f4SFlorent Revest goto nocopy_fmt; 89948cac3f4SFlorent Revest } 90048cac3f4SFlorent Revest 90148cac3f4SFlorent Revest if (fmt[i + 1] == 'B') { 90248cac3f4SFlorent Revest if (tmp_buf) { 90348cac3f4SFlorent Revest err = snprintf(tmp_buf, 90448cac3f4SFlorent Revest (tmp_buf_end - tmp_buf), 90548cac3f4SFlorent Revest "%pB", 90648cac3f4SFlorent Revest (void *)(long)raw_args[num_spec]); 90748cac3f4SFlorent Revest tmp_buf += (err + 1); 90848cac3f4SFlorent Revest } 90948cac3f4SFlorent Revest 91048cac3f4SFlorent Revest i++; 91148cac3f4SFlorent Revest num_spec++; 91248cac3f4SFlorent Revest continue; 913d9c9e4dbSFlorent Revest } 914d9c9e4dbSFlorent Revest 915d9c9e4dbSFlorent Revest /* only support "%pI4", "%pi4", "%pI6" and "%pi6". */ 916d9c9e4dbSFlorent Revest if ((fmt[i + 1] != 'i' && fmt[i + 1] != 'I') || 917d9c9e4dbSFlorent Revest (fmt[i + 2] != '4' && fmt[i + 2] != '6')) { 918d9c9e4dbSFlorent Revest err = -EINVAL; 919d9c9e4dbSFlorent Revest goto out; 920d9c9e4dbSFlorent Revest } 921d9c9e4dbSFlorent Revest 92248cac3f4SFlorent Revest i += 2; 92348cac3f4SFlorent Revest if (!tmp_buf) 92448cac3f4SFlorent Revest goto nocopy_fmt; 92548cac3f4SFlorent Revest 92648cac3f4SFlorent Revest sizeof_cur_ip = (fmt[i] == '4') ? 4 : 16; 92748cac3f4SFlorent Revest if (tmp_buf_end - tmp_buf < sizeof_cur_ip) { 928d9c9e4dbSFlorent Revest err = -ENOSPC; 92948cac3f4SFlorent Revest goto out; 930d9c9e4dbSFlorent Revest } 931d9c9e4dbSFlorent Revest 932d9c9e4dbSFlorent Revest unsafe_ptr = (char *)(long)raw_args[num_spec]; 93348cac3f4SFlorent Revest err = copy_from_kernel_nofault(cur_ip, unsafe_ptr, 93448cac3f4SFlorent Revest sizeof_cur_ip); 935d9c9e4dbSFlorent Revest if (err < 0) 93648cac3f4SFlorent Revest memset(cur_ip, 0, sizeof_cur_ip); 937d9c9e4dbSFlorent Revest 93848cac3f4SFlorent Revest /* hack: bstr_printf expects IP addresses to be 93948cac3f4SFlorent Revest * pre-formatted as strings, ironically, the easiest way 94048cac3f4SFlorent Revest * to do that is to call snprintf. 94148cac3f4SFlorent Revest */ 94248cac3f4SFlorent Revest ip_spec[2] = fmt[i - 1]; 94348cac3f4SFlorent Revest ip_spec[3] = fmt[i]; 94448cac3f4SFlorent Revest err = snprintf(tmp_buf, tmp_buf_end - tmp_buf, 94548cac3f4SFlorent Revest ip_spec, &cur_ip); 94648cac3f4SFlorent Revest 94748cac3f4SFlorent Revest tmp_buf += err + 1; 94848cac3f4SFlorent Revest num_spec++; 94948cac3f4SFlorent Revest 95048cac3f4SFlorent Revest continue; 951d9c9e4dbSFlorent Revest } else if (fmt[i] == 's') { 952d9c9e4dbSFlorent Revest fmt_ptype = fmt[i]; 953d9c9e4dbSFlorent Revest fmt_str: 954d9c9e4dbSFlorent Revest if (fmt[i + 1] != 0 && 955d9c9e4dbSFlorent Revest !isspace(fmt[i + 1]) && 956d9c9e4dbSFlorent Revest !ispunct(fmt[i + 1])) { 957d9c9e4dbSFlorent Revest err = -EINVAL; 958d9c9e4dbSFlorent Revest goto out; 959d9c9e4dbSFlorent Revest } 960d9c9e4dbSFlorent Revest 96148cac3f4SFlorent Revest if (!tmp_buf) 96248cac3f4SFlorent Revest goto nocopy_fmt; 96348cac3f4SFlorent Revest 96448cac3f4SFlorent Revest if (tmp_buf_end == tmp_buf) { 965d9c9e4dbSFlorent Revest err = -ENOSPC; 96648cac3f4SFlorent Revest goto out; 967d9c9e4dbSFlorent Revest } 968d9c9e4dbSFlorent Revest 969d9c9e4dbSFlorent Revest unsafe_ptr = (char *)(long)raw_args[num_spec]; 970d9c9e4dbSFlorent Revest err = bpf_trace_copy_string(tmp_buf, unsafe_ptr, 97148cac3f4SFlorent Revest fmt_ptype, 97248cac3f4SFlorent Revest tmp_buf_end - tmp_buf); 973d9c9e4dbSFlorent Revest if (err < 0) { 974d9c9e4dbSFlorent Revest tmp_buf[0] = '\0'; 975d9c9e4dbSFlorent Revest err = 1; 976d9c9e4dbSFlorent Revest } 977d9c9e4dbSFlorent Revest 978d9c9e4dbSFlorent Revest tmp_buf += err; 97948cac3f4SFlorent Revest num_spec++; 980d9c9e4dbSFlorent Revest 98148cac3f4SFlorent Revest continue; 9823478cfcfSKuniyuki Iwashima } else if (fmt[i] == 'c') { 9833478cfcfSKuniyuki Iwashima if (!tmp_buf) 9843478cfcfSKuniyuki Iwashima goto nocopy_fmt; 9853478cfcfSKuniyuki Iwashima 9863478cfcfSKuniyuki Iwashima if (tmp_buf_end == tmp_buf) { 9873478cfcfSKuniyuki Iwashima err = -ENOSPC; 9883478cfcfSKuniyuki Iwashima goto out; 9893478cfcfSKuniyuki Iwashima } 9903478cfcfSKuniyuki Iwashima 9913478cfcfSKuniyuki Iwashima *tmp_buf = raw_args[num_spec]; 9923478cfcfSKuniyuki Iwashima tmp_buf++; 9933478cfcfSKuniyuki Iwashima num_spec++; 9943478cfcfSKuniyuki Iwashima 9953478cfcfSKuniyuki Iwashima continue; 996d9c9e4dbSFlorent Revest } 997d9c9e4dbSFlorent Revest 99848cac3f4SFlorent Revest sizeof_cur_arg = sizeof(int); 999d9c9e4dbSFlorent Revest 1000d9c9e4dbSFlorent Revest if (fmt[i] == 'l') { 100148cac3f4SFlorent Revest sizeof_cur_arg = sizeof(long); 1002d9c9e4dbSFlorent Revest i++; 1003d9c9e4dbSFlorent Revest } 1004d9c9e4dbSFlorent Revest if (fmt[i] == 'l') { 100548cac3f4SFlorent Revest sizeof_cur_arg = sizeof(long long); 1006d9c9e4dbSFlorent Revest i++; 1007d9c9e4dbSFlorent Revest } 1008d9c9e4dbSFlorent Revest 1009d9c9e4dbSFlorent Revest if (fmt[i] != 'i' && fmt[i] != 'd' && fmt[i] != 'u' && 1010d9c9e4dbSFlorent Revest fmt[i] != 'x' && fmt[i] != 'X') { 1011d9c9e4dbSFlorent Revest err = -EINVAL; 101248cac3f4SFlorent Revest goto out; 1013d9c9e4dbSFlorent Revest } 1014d9c9e4dbSFlorent Revest 101548cac3f4SFlorent Revest if (tmp_buf) 1016d9c9e4dbSFlorent Revest cur_arg = raw_args[num_spec]; 101748cac3f4SFlorent Revest nocopy_fmt: 101848cac3f4SFlorent Revest if (tmp_buf) { 101948cac3f4SFlorent Revest tmp_buf = PTR_ALIGN(tmp_buf, sizeof(u32)); 102048cac3f4SFlorent Revest if (tmp_buf_end - tmp_buf < sizeof_cur_arg) { 102148cac3f4SFlorent Revest err = -ENOSPC; 102248cac3f4SFlorent Revest goto out; 102348cac3f4SFlorent Revest } 102448cac3f4SFlorent Revest 102548cac3f4SFlorent Revest if (sizeof_cur_arg == 8) { 102648cac3f4SFlorent Revest *(u32 *)tmp_buf = *(u32 *)&cur_arg; 102748cac3f4SFlorent Revest *(u32 *)(tmp_buf + 4) = *((u32 *)&cur_arg + 1); 102848cac3f4SFlorent Revest } else { 102948cac3f4SFlorent Revest *(u32 *)tmp_buf = (u32)(long)cur_arg; 103048cac3f4SFlorent Revest } 103148cac3f4SFlorent Revest tmp_buf += sizeof_cur_arg; 1032d9c9e4dbSFlorent Revest } 1033d9c9e4dbSFlorent Revest num_spec++; 1034d9c9e4dbSFlorent Revest } 1035d9c9e4dbSFlorent Revest 1036d9c9e4dbSFlorent Revest err = 0; 1037d9c9e4dbSFlorent Revest out: 103848cac3f4SFlorent Revest if (err) 103948cac3f4SFlorent Revest bpf_bprintf_cleanup(); 1040d9c9e4dbSFlorent Revest return err; 1041d9c9e4dbSFlorent Revest } 1042d9c9e4dbSFlorent Revest 10437b15523aSFlorent Revest BPF_CALL_5(bpf_snprintf, char *, str, u32, str_size, char *, fmt, 10447b15523aSFlorent Revest const void *, data, u32, data_len) 10457b15523aSFlorent Revest { 10467b15523aSFlorent Revest int err, num_args; 104748cac3f4SFlorent Revest u32 *bin_args; 10487b15523aSFlorent Revest 1049335ff499SDave Marchevsky if (data_len % 8 || data_len > MAX_BPRINTF_VARARGS * 8 || 10507b15523aSFlorent Revest (data_len && !data)) 10517b15523aSFlorent Revest return -EINVAL; 10527b15523aSFlorent Revest num_args = data_len / 8; 10537b15523aSFlorent Revest 10547b15523aSFlorent Revest /* ARG_PTR_TO_CONST_STR guarantees that fmt is zero-terminated so we 10557b15523aSFlorent Revest * can safely give an unbounded size. 10567b15523aSFlorent Revest */ 105748cac3f4SFlorent Revest err = bpf_bprintf_prepare(fmt, UINT_MAX, data, &bin_args, num_args); 10587b15523aSFlorent Revest if (err < 0) 10597b15523aSFlorent Revest return err; 10607b15523aSFlorent Revest 106148cac3f4SFlorent Revest err = bstr_printf(str, str_size, fmt, bin_args); 10627b15523aSFlorent Revest 106348cac3f4SFlorent Revest bpf_bprintf_cleanup(); 10647b15523aSFlorent Revest 10657b15523aSFlorent Revest return err + 1; 10667b15523aSFlorent Revest } 10677b15523aSFlorent Revest 10687b15523aSFlorent Revest const struct bpf_func_proto bpf_snprintf_proto = { 10697b15523aSFlorent Revest .func = bpf_snprintf, 10707b15523aSFlorent Revest .gpl_only = true, 10717b15523aSFlorent Revest .ret_type = RET_INTEGER, 10727b15523aSFlorent Revest .arg1_type = ARG_PTR_TO_MEM_OR_NULL, 10737b15523aSFlorent Revest .arg2_type = ARG_CONST_SIZE_OR_ZERO, 10747b15523aSFlorent Revest .arg3_type = ARG_PTR_TO_CONST_STR, 1075216e3cd2SHao Luo .arg4_type = ARG_PTR_TO_MEM | PTR_MAYBE_NULL | MEM_RDONLY, 10767b15523aSFlorent Revest .arg5_type = ARG_CONST_SIZE_OR_ZERO, 10777b15523aSFlorent Revest }; 10787b15523aSFlorent Revest 1079b00628b1SAlexei Starovoitov /* BPF map elements can contain 'struct bpf_timer'. 1080b00628b1SAlexei Starovoitov * Such map owns all of its BPF timers. 1081b00628b1SAlexei Starovoitov * 'struct bpf_timer' is allocated as part of map element allocation 1082b00628b1SAlexei Starovoitov * and it's zero initialized. 1083b00628b1SAlexei Starovoitov * That space is used to keep 'struct bpf_timer_kern'. 1084b00628b1SAlexei Starovoitov * bpf_timer_init() allocates 'struct bpf_hrtimer', inits hrtimer, and 1085b00628b1SAlexei Starovoitov * remembers 'struct bpf_map *' pointer it's part of. 1086b00628b1SAlexei Starovoitov * bpf_timer_set_callback() increments prog refcnt and assign bpf callback_fn. 1087b00628b1SAlexei Starovoitov * bpf_timer_start() arms the timer. 1088b00628b1SAlexei Starovoitov * If user space reference to a map goes to zero at this point 1089b00628b1SAlexei Starovoitov * ops->map_release_uref callback is responsible for cancelling the timers, 1090b00628b1SAlexei Starovoitov * freeing their memory, and decrementing prog's refcnts. 1091b00628b1SAlexei Starovoitov * bpf_timer_cancel() cancels the timer and decrements prog's refcnt. 1092b00628b1SAlexei Starovoitov * Inner maps can contain bpf timers as well. ops->map_release_uref is 1093b00628b1SAlexei Starovoitov * freeing the timers when inner map is replaced or deleted by user space. 1094b00628b1SAlexei Starovoitov */ 1095b00628b1SAlexei Starovoitov struct bpf_hrtimer { 1096b00628b1SAlexei Starovoitov struct hrtimer timer; 1097b00628b1SAlexei Starovoitov struct bpf_map *map; 1098b00628b1SAlexei Starovoitov struct bpf_prog *prog; 1099b00628b1SAlexei Starovoitov void __rcu *callback_fn; 1100b00628b1SAlexei Starovoitov void *value; 1101b00628b1SAlexei Starovoitov }; 1102b00628b1SAlexei Starovoitov 1103b00628b1SAlexei Starovoitov /* the actual struct hidden inside uapi struct bpf_timer */ 1104b00628b1SAlexei Starovoitov struct bpf_timer_kern { 1105b00628b1SAlexei Starovoitov struct bpf_hrtimer *timer; 1106b00628b1SAlexei Starovoitov /* bpf_spin_lock is used here instead of spinlock_t to make 1107c561d110STom Rix * sure that it always fits into space reserved by struct bpf_timer 1108b00628b1SAlexei Starovoitov * regardless of LOCKDEP and spinlock debug flags. 1109b00628b1SAlexei Starovoitov */ 1110b00628b1SAlexei Starovoitov struct bpf_spin_lock lock; 1111b00628b1SAlexei Starovoitov } __attribute__((aligned(8))); 1112b00628b1SAlexei Starovoitov 1113b00628b1SAlexei Starovoitov static DEFINE_PER_CPU(struct bpf_hrtimer *, hrtimer_running); 1114b00628b1SAlexei Starovoitov 1115b00628b1SAlexei Starovoitov static enum hrtimer_restart bpf_timer_cb(struct hrtimer *hrtimer) 1116b00628b1SAlexei Starovoitov { 1117b00628b1SAlexei Starovoitov struct bpf_hrtimer *t = container_of(hrtimer, struct bpf_hrtimer, timer); 1118b00628b1SAlexei Starovoitov struct bpf_map *map = t->map; 1119b00628b1SAlexei Starovoitov void *value = t->value; 1120102acbacSKees Cook bpf_callback_t callback_fn; 1121b00628b1SAlexei Starovoitov void *key; 1122b00628b1SAlexei Starovoitov u32 idx; 1123b00628b1SAlexei Starovoitov 11243bd916eeSYonghong Song BTF_TYPE_EMIT(struct bpf_timer); 1125b00628b1SAlexei Starovoitov callback_fn = rcu_dereference_check(t->callback_fn, rcu_read_lock_bh_held()); 1126b00628b1SAlexei Starovoitov if (!callback_fn) 1127b00628b1SAlexei Starovoitov goto out; 1128b00628b1SAlexei Starovoitov 1129b00628b1SAlexei Starovoitov /* bpf_timer_cb() runs in hrtimer_run_softirq. It doesn't migrate and 1130b00628b1SAlexei Starovoitov * cannot be preempted by another bpf_timer_cb() on the same cpu. 1131b00628b1SAlexei Starovoitov * Remember the timer this callback is servicing to prevent 1132b00628b1SAlexei Starovoitov * deadlock if callback_fn() calls bpf_timer_cancel() or 1133b00628b1SAlexei Starovoitov * bpf_map_delete_elem() on the same timer. 1134b00628b1SAlexei Starovoitov */ 1135b00628b1SAlexei Starovoitov this_cpu_write(hrtimer_running, t); 1136b00628b1SAlexei Starovoitov if (map->map_type == BPF_MAP_TYPE_ARRAY) { 1137b00628b1SAlexei Starovoitov struct bpf_array *array = container_of(map, struct bpf_array, map); 1138b00628b1SAlexei Starovoitov 1139b00628b1SAlexei Starovoitov /* compute the key */ 1140b00628b1SAlexei Starovoitov idx = ((char *)value - array->value) / array->elem_size; 1141b00628b1SAlexei Starovoitov key = &idx; 1142b00628b1SAlexei Starovoitov } else { /* hash or lru */ 1143b00628b1SAlexei Starovoitov key = value - round_up(map->key_size, 8); 1144b00628b1SAlexei Starovoitov } 1145b00628b1SAlexei Starovoitov 1146102acbacSKees Cook callback_fn((u64)(long)map, (u64)(long)key, (u64)(long)value, 0, 0); 1147bfc6bb74SAlexei Starovoitov /* The verifier checked that return value is zero. */ 1148b00628b1SAlexei Starovoitov 1149b00628b1SAlexei Starovoitov this_cpu_write(hrtimer_running, NULL); 1150b00628b1SAlexei Starovoitov out: 1151b00628b1SAlexei Starovoitov return HRTIMER_NORESTART; 1152b00628b1SAlexei Starovoitov } 1153b00628b1SAlexei Starovoitov 1154b00628b1SAlexei Starovoitov BPF_CALL_3(bpf_timer_init, struct bpf_timer_kern *, timer, struct bpf_map *, map, 1155b00628b1SAlexei Starovoitov u64, flags) 1156b00628b1SAlexei Starovoitov { 1157b00628b1SAlexei Starovoitov clockid_t clockid = flags & (MAX_CLOCKS - 1); 1158b00628b1SAlexei Starovoitov struct bpf_hrtimer *t; 1159b00628b1SAlexei Starovoitov int ret = 0; 1160b00628b1SAlexei Starovoitov 1161b00628b1SAlexei Starovoitov BUILD_BUG_ON(MAX_CLOCKS != 16); 1162b00628b1SAlexei Starovoitov BUILD_BUG_ON(sizeof(struct bpf_timer_kern) > sizeof(struct bpf_timer)); 1163b00628b1SAlexei Starovoitov BUILD_BUG_ON(__alignof__(struct bpf_timer_kern) != __alignof__(struct bpf_timer)); 1164b00628b1SAlexei Starovoitov 1165b00628b1SAlexei Starovoitov if (in_nmi()) 1166b00628b1SAlexei Starovoitov return -EOPNOTSUPP; 1167b00628b1SAlexei Starovoitov 1168b00628b1SAlexei Starovoitov if (flags >= MAX_CLOCKS || 1169b00628b1SAlexei Starovoitov /* similar to timerfd except _ALARM variants are not supported */ 1170b00628b1SAlexei Starovoitov (clockid != CLOCK_MONOTONIC && 1171b00628b1SAlexei Starovoitov clockid != CLOCK_REALTIME && 1172b00628b1SAlexei Starovoitov clockid != CLOCK_BOOTTIME)) 1173b00628b1SAlexei Starovoitov return -EINVAL; 1174b00628b1SAlexei Starovoitov __bpf_spin_lock_irqsave(&timer->lock); 1175b00628b1SAlexei Starovoitov t = timer->timer; 1176b00628b1SAlexei Starovoitov if (t) { 1177b00628b1SAlexei Starovoitov ret = -EBUSY; 1178b00628b1SAlexei Starovoitov goto out; 1179b00628b1SAlexei Starovoitov } 1180b00628b1SAlexei Starovoitov if (!atomic64_read(&map->usercnt)) { 1181b00628b1SAlexei Starovoitov /* maps with timers must be either held by user space 1182b00628b1SAlexei Starovoitov * or pinned in bpffs. 1183b00628b1SAlexei Starovoitov */ 1184b00628b1SAlexei Starovoitov ret = -EPERM; 1185b00628b1SAlexei Starovoitov goto out; 1186b00628b1SAlexei Starovoitov } 1187b00628b1SAlexei Starovoitov /* allocate hrtimer via map_kmalloc to use memcg accounting */ 1188b00628b1SAlexei Starovoitov t = bpf_map_kmalloc_node(map, sizeof(*t), GFP_ATOMIC, map->numa_node); 1189b00628b1SAlexei Starovoitov if (!t) { 1190b00628b1SAlexei Starovoitov ret = -ENOMEM; 1191b00628b1SAlexei Starovoitov goto out; 1192b00628b1SAlexei Starovoitov } 1193b00628b1SAlexei Starovoitov t->value = (void *)timer - map->timer_off; 1194b00628b1SAlexei Starovoitov t->map = map; 1195b00628b1SAlexei Starovoitov t->prog = NULL; 1196b00628b1SAlexei Starovoitov rcu_assign_pointer(t->callback_fn, NULL); 1197b00628b1SAlexei Starovoitov hrtimer_init(&t->timer, clockid, HRTIMER_MODE_REL_SOFT); 1198b00628b1SAlexei Starovoitov t->timer.function = bpf_timer_cb; 1199b00628b1SAlexei Starovoitov timer->timer = t; 1200b00628b1SAlexei Starovoitov out: 1201b00628b1SAlexei Starovoitov __bpf_spin_unlock_irqrestore(&timer->lock); 1202b00628b1SAlexei Starovoitov return ret; 1203b00628b1SAlexei Starovoitov } 1204b00628b1SAlexei Starovoitov 1205b00628b1SAlexei Starovoitov static const struct bpf_func_proto bpf_timer_init_proto = { 1206b00628b1SAlexei Starovoitov .func = bpf_timer_init, 1207b00628b1SAlexei Starovoitov .gpl_only = true, 1208b00628b1SAlexei Starovoitov .ret_type = RET_INTEGER, 1209b00628b1SAlexei Starovoitov .arg1_type = ARG_PTR_TO_TIMER, 1210b00628b1SAlexei Starovoitov .arg2_type = ARG_CONST_MAP_PTR, 1211b00628b1SAlexei Starovoitov .arg3_type = ARG_ANYTHING, 1212b00628b1SAlexei Starovoitov }; 1213b00628b1SAlexei Starovoitov 1214b00628b1SAlexei Starovoitov BPF_CALL_3(bpf_timer_set_callback, struct bpf_timer_kern *, timer, void *, callback_fn, 1215b00628b1SAlexei Starovoitov struct bpf_prog_aux *, aux) 1216b00628b1SAlexei Starovoitov { 1217b00628b1SAlexei Starovoitov struct bpf_prog *prev, *prog = aux->prog; 1218b00628b1SAlexei Starovoitov struct bpf_hrtimer *t; 1219b00628b1SAlexei Starovoitov int ret = 0; 1220b00628b1SAlexei Starovoitov 1221b00628b1SAlexei Starovoitov if (in_nmi()) 1222b00628b1SAlexei Starovoitov return -EOPNOTSUPP; 1223b00628b1SAlexei Starovoitov __bpf_spin_lock_irqsave(&timer->lock); 1224b00628b1SAlexei Starovoitov t = timer->timer; 1225b00628b1SAlexei Starovoitov if (!t) { 1226b00628b1SAlexei Starovoitov ret = -EINVAL; 1227b00628b1SAlexei Starovoitov goto out; 1228b00628b1SAlexei Starovoitov } 1229b00628b1SAlexei Starovoitov if (!atomic64_read(&t->map->usercnt)) { 1230b00628b1SAlexei Starovoitov /* maps with timers must be either held by user space 1231b00628b1SAlexei Starovoitov * or pinned in bpffs. Otherwise timer might still be 1232b00628b1SAlexei Starovoitov * running even when bpf prog is detached and user space 1233b00628b1SAlexei Starovoitov * is gone, since map_release_uref won't ever be called. 1234b00628b1SAlexei Starovoitov */ 1235b00628b1SAlexei Starovoitov ret = -EPERM; 1236b00628b1SAlexei Starovoitov goto out; 1237b00628b1SAlexei Starovoitov } 1238b00628b1SAlexei Starovoitov prev = t->prog; 1239b00628b1SAlexei Starovoitov if (prev != prog) { 1240b00628b1SAlexei Starovoitov /* Bump prog refcnt once. Every bpf_timer_set_callback() 1241b00628b1SAlexei Starovoitov * can pick different callback_fn-s within the same prog. 1242b00628b1SAlexei Starovoitov */ 1243b00628b1SAlexei Starovoitov prog = bpf_prog_inc_not_zero(prog); 1244b00628b1SAlexei Starovoitov if (IS_ERR(prog)) { 1245b00628b1SAlexei Starovoitov ret = PTR_ERR(prog); 1246b00628b1SAlexei Starovoitov goto out; 1247b00628b1SAlexei Starovoitov } 1248b00628b1SAlexei Starovoitov if (prev) 1249b00628b1SAlexei Starovoitov /* Drop prev prog refcnt when swapping with new prog */ 1250b00628b1SAlexei Starovoitov bpf_prog_put(prev); 1251b00628b1SAlexei Starovoitov t->prog = prog; 1252b00628b1SAlexei Starovoitov } 1253b00628b1SAlexei Starovoitov rcu_assign_pointer(t->callback_fn, callback_fn); 1254b00628b1SAlexei Starovoitov out: 1255b00628b1SAlexei Starovoitov __bpf_spin_unlock_irqrestore(&timer->lock); 1256b00628b1SAlexei Starovoitov return ret; 1257b00628b1SAlexei Starovoitov } 1258b00628b1SAlexei Starovoitov 1259b00628b1SAlexei Starovoitov static const struct bpf_func_proto bpf_timer_set_callback_proto = { 1260b00628b1SAlexei Starovoitov .func = bpf_timer_set_callback, 1261b00628b1SAlexei Starovoitov .gpl_only = true, 1262b00628b1SAlexei Starovoitov .ret_type = RET_INTEGER, 1263b00628b1SAlexei Starovoitov .arg1_type = ARG_PTR_TO_TIMER, 1264b00628b1SAlexei Starovoitov .arg2_type = ARG_PTR_TO_FUNC, 1265b00628b1SAlexei Starovoitov }; 1266b00628b1SAlexei Starovoitov 1267b00628b1SAlexei Starovoitov BPF_CALL_3(bpf_timer_start, struct bpf_timer_kern *, timer, u64, nsecs, u64, flags) 1268b00628b1SAlexei Starovoitov { 1269b00628b1SAlexei Starovoitov struct bpf_hrtimer *t; 1270b00628b1SAlexei Starovoitov int ret = 0; 1271b00628b1SAlexei Starovoitov 1272b00628b1SAlexei Starovoitov if (in_nmi()) 1273b00628b1SAlexei Starovoitov return -EOPNOTSUPP; 1274b00628b1SAlexei Starovoitov if (flags) 1275b00628b1SAlexei Starovoitov return -EINVAL; 1276b00628b1SAlexei Starovoitov __bpf_spin_lock_irqsave(&timer->lock); 1277b00628b1SAlexei Starovoitov t = timer->timer; 1278b00628b1SAlexei Starovoitov if (!t || !t->prog) { 1279b00628b1SAlexei Starovoitov ret = -EINVAL; 1280b00628b1SAlexei Starovoitov goto out; 1281b00628b1SAlexei Starovoitov } 1282b00628b1SAlexei Starovoitov hrtimer_start(&t->timer, ns_to_ktime(nsecs), HRTIMER_MODE_REL_SOFT); 1283b00628b1SAlexei Starovoitov out: 1284b00628b1SAlexei Starovoitov __bpf_spin_unlock_irqrestore(&timer->lock); 1285b00628b1SAlexei Starovoitov return ret; 1286b00628b1SAlexei Starovoitov } 1287b00628b1SAlexei Starovoitov 1288b00628b1SAlexei Starovoitov static const struct bpf_func_proto bpf_timer_start_proto = { 1289b00628b1SAlexei Starovoitov .func = bpf_timer_start, 1290b00628b1SAlexei Starovoitov .gpl_only = true, 1291b00628b1SAlexei Starovoitov .ret_type = RET_INTEGER, 1292b00628b1SAlexei Starovoitov .arg1_type = ARG_PTR_TO_TIMER, 1293b00628b1SAlexei Starovoitov .arg2_type = ARG_ANYTHING, 1294b00628b1SAlexei Starovoitov .arg3_type = ARG_ANYTHING, 1295b00628b1SAlexei Starovoitov }; 1296b00628b1SAlexei Starovoitov 1297b00628b1SAlexei Starovoitov static void drop_prog_refcnt(struct bpf_hrtimer *t) 1298b00628b1SAlexei Starovoitov { 1299b00628b1SAlexei Starovoitov struct bpf_prog *prog = t->prog; 1300b00628b1SAlexei Starovoitov 1301b00628b1SAlexei Starovoitov if (prog) { 1302b00628b1SAlexei Starovoitov bpf_prog_put(prog); 1303b00628b1SAlexei Starovoitov t->prog = NULL; 1304b00628b1SAlexei Starovoitov rcu_assign_pointer(t->callback_fn, NULL); 1305b00628b1SAlexei Starovoitov } 1306b00628b1SAlexei Starovoitov } 1307b00628b1SAlexei Starovoitov 1308b00628b1SAlexei Starovoitov BPF_CALL_1(bpf_timer_cancel, struct bpf_timer_kern *, timer) 1309b00628b1SAlexei Starovoitov { 1310b00628b1SAlexei Starovoitov struct bpf_hrtimer *t; 1311b00628b1SAlexei Starovoitov int ret = 0; 1312b00628b1SAlexei Starovoitov 1313b00628b1SAlexei Starovoitov if (in_nmi()) 1314b00628b1SAlexei Starovoitov return -EOPNOTSUPP; 1315b00628b1SAlexei Starovoitov __bpf_spin_lock_irqsave(&timer->lock); 1316b00628b1SAlexei Starovoitov t = timer->timer; 1317b00628b1SAlexei Starovoitov if (!t) { 1318b00628b1SAlexei Starovoitov ret = -EINVAL; 1319b00628b1SAlexei Starovoitov goto out; 1320b00628b1SAlexei Starovoitov } 1321b00628b1SAlexei Starovoitov if (this_cpu_read(hrtimer_running) == t) { 1322b00628b1SAlexei Starovoitov /* If bpf callback_fn is trying to bpf_timer_cancel() 1323b00628b1SAlexei Starovoitov * its own timer the hrtimer_cancel() will deadlock 1324b00628b1SAlexei Starovoitov * since it waits for callback_fn to finish 1325b00628b1SAlexei Starovoitov */ 1326b00628b1SAlexei Starovoitov ret = -EDEADLK; 1327b00628b1SAlexei Starovoitov goto out; 1328b00628b1SAlexei Starovoitov } 1329b00628b1SAlexei Starovoitov drop_prog_refcnt(t); 1330b00628b1SAlexei Starovoitov out: 1331b00628b1SAlexei Starovoitov __bpf_spin_unlock_irqrestore(&timer->lock); 1332b00628b1SAlexei Starovoitov /* Cancel the timer and wait for associated callback to finish 1333b00628b1SAlexei Starovoitov * if it was running. 1334b00628b1SAlexei Starovoitov */ 1335b00628b1SAlexei Starovoitov ret = ret ?: hrtimer_cancel(&t->timer); 1336b00628b1SAlexei Starovoitov return ret; 1337b00628b1SAlexei Starovoitov } 1338b00628b1SAlexei Starovoitov 1339b00628b1SAlexei Starovoitov static const struct bpf_func_proto bpf_timer_cancel_proto = { 1340b00628b1SAlexei Starovoitov .func = bpf_timer_cancel, 1341b00628b1SAlexei Starovoitov .gpl_only = true, 1342b00628b1SAlexei Starovoitov .ret_type = RET_INTEGER, 1343b00628b1SAlexei Starovoitov .arg1_type = ARG_PTR_TO_TIMER, 1344b00628b1SAlexei Starovoitov }; 1345b00628b1SAlexei Starovoitov 1346b00628b1SAlexei Starovoitov /* This function is called by map_delete/update_elem for individual element and 1347b00628b1SAlexei Starovoitov * by ops->map_release_uref when the user space reference to a map reaches zero. 1348b00628b1SAlexei Starovoitov */ 1349b00628b1SAlexei Starovoitov void bpf_timer_cancel_and_free(void *val) 1350b00628b1SAlexei Starovoitov { 1351b00628b1SAlexei Starovoitov struct bpf_timer_kern *timer = val; 1352b00628b1SAlexei Starovoitov struct bpf_hrtimer *t; 1353b00628b1SAlexei Starovoitov 1354b00628b1SAlexei Starovoitov /* Performance optimization: read timer->timer without lock first. */ 1355b00628b1SAlexei Starovoitov if (!READ_ONCE(timer->timer)) 1356b00628b1SAlexei Starovoitov return; 1357b00628b1SAlexei Starovoitov 1358b00628b1SAlexei Starovoitov __bpf_spin_lock_irqsave(&timer->lock); 1359b00628b1SAlexei Starovoitov /* re-read it under lock */ 1360b00628b1SAlexei Starovoitov t = timer->timer; 1361b00628b1SAlexei Starovoitov if (!t) 1362b00628b1SAlexei Starovoitov goto out; 1363b00628b1SAlexei Starovoitov drop_prog_refcnt(t); 1364b00628b1SAlexei Starovoitov /* The subsequent bpf_timer_start/cancel() helpers won't be able to use 1365b00628b1SAlexei Starovoitov * this timer, since it won't be initialized. 1366b00628b1SAlexei Starovoitov */ 1367b00628b1SAlexei Starovoitov timer->timer = NULL; 1368b00628b1SAlexei Starovoitov out: 1369b00628b1SAlexei Starovoitov __bpf_spin_unlock_irqrestore(&timer->lock); 1370b00628b1SAlexei Starovoitov if (!t) 1371b00628b1SAlexei Starovoitov return; 1372b00628b1SAlexei Starovoitov /* Cancel the timer and wait for callback to complete if it was running. 1373b00628b1SAlexei Starovoitov * If hrtimer_cancel() can be safely called it's safe to call kfree(t) 1374b00628b1SAlexei Starovoitov * right after for both preallocated and non-preallocated maps. 1375b00628b1SAlexei Starovoitov * The timer->timer = NULL was already done and no code path can 1376b00628b1SAlexei Starovoitov * see address 't' anymore. 1377b00628b1SAlexei Starovoitov * 1378b00628b1SAlexei Starovoitov * Check that bpf_map_delete/update_elem() wasn't called from timer 1379b00628b1SAlexei Starovoitov * callback_fn. In such case don't call hrtimer_cancel() (since it will 1380b00628b1SAlexei Starovoitov * deadlock) and don't call hrtimer_try_to_cancel() (since it will just 1381b00628b1SAlexei Starovoitov * return -1). Though callback_fn is still running on this cpu it's 1382b00628b1SAlexei Starovoitov * safe to do kfree(t) because bpf_timer_cb() read everything it needed 1383b00628b1SAlexei Starovoitov * from 't'. The bpf subprog callback_fn won't be able to access 't', 1384b00628b1SAlexei Starovoitov * since timer->timer = NULL was already done. The timer will be 1385b00628b1SAlexei Starovoitov * effectively cancelled because bpf_timer_cb() will return 1386b00628b1SAlexei Starovoitov * HRTIMER_NORESTART. 1387b00628b1SAlexei Starovoitov */ 1388b00628b1SAlexei Starovoitov if (this_cpu_read(hrtimer_running) != t) 1389b00628b1SAlexei Starovoitov hrtimer_cancel(&t->timer); 1390b00628b1SAlexei Starovoitov kfree(t); 1391b00628b1SAlexei Starovoitov } 1392b00628b1SAlexei Starovoitov 1393c0a5a21cSKumar Kartikeya Dwivedi BPF_CALL_2(bpf_kptr_xchg, void *, map_value, void *, ptr) 1394c0a5a21cSKumar Kartikeya Dwivedi { 1395c0a5a21cSKumar Kartikeya Dwivedi unsigned long *kptr = map_value; 1396c0a5a21cSKumar Kartikeya Dwivedi 1397c0a5a21cSKumar Kartikeya Dwivedi return xchg(kptr, (unsigned long)ptr); 1398c0a5a21cSKumar Kartikeya Dwivedi } 1399c0a5a21cSKumar Kartikeya Dwivedi 1400c0a5a21cSKumar Kartikeya Dwivedi /* Unlike other PTR_TO_BTF_ID helpers the btf_id in bpf_kptr_xchg() 1401c0a5a21cSKumar Kartikeya Dwivedi * helper is determined dynamically by the verifier. 1402c0a5a21cSKumar Kartikeya Dwivedi */ 1403c0a5a21cSKumar Kartikeya Dwivedi #define BPF_PTR_POISON ((void *)((0xeB9FUL << 2) + POISON_POINTER_DELTA)) 1404c0a5a21cSKumar Kartikeya Dwivedi 1405c0a5a21cSKumar Kartikeya Dwivedi const struct bpf_func_proto bpf_kptr_xchg_proto = { 1406c0a5a21cSKumar Kartikeya Dwivedi .func = bpf_kptr_xchg, 1407c0a5a21cSKumar Kartikeya Dwivedi .gpl_only = false, 1408c0a5a21cSKumar Kartikeya Dwivedi .ret_type = RET_PTR_TO_BTF_ID_OR_NULL, 1409c0a5a21cSKumar Kartikeya Dwivedi .ret_btf_id = BPF_PTR_POISON, 1410c0a5a21cSKumar Kartikeya Dwivedi .arg1_type = ARG_PTR_TO_KPTR, 1411c0a5a21cSKumar Kartikeya Dwivedi .arg2_type = ARG_PTR_TO_BTF_ID_OR_NULL | OBJ_RELEASE, 1412c0a5a21cSKumar Kartikeya Dwivedi .arg2_btf_id = BPF_PTR_POISON, 1413c0a5a21cSKumar Kartikeya Dwivedi }; 1414c0a5a21cSKumar Kartikeya Dwivedi 1415*263ae152SJoanne Koong /* Since the upper 8 bits of dynptr->size is reserved, the 1416*263ae152SJoanne Koong * maximum supported size is 2^24 - 1. 1417*263ae152SJoanne Koong */ 1418*263ae152SJoanne Koong #define DYNPTR_MAX_SIZE ((1UL << 24) - 1) 1419*263ae152SJoanne Koong #define DYNPTR_TYPE_SHIFT 28 1420*263ae152SJoanne Koong 1421*263ae152SJoanne Koong static void bpf_dynptr_set_type(struct bpf_dynptr_kern *ptr, enum bpf_dynptr_type type) 1422*263ae152SJoanne Koong { 1423*263ae152SJoanne Koong ptr->size |= type << DYNPTR_TYPE_SHIFT; 1424*263ae152SJoanne Koong } 1425*263ae152SJoanne Koong 1426*263ae152SJoanne Koong static int bpf_dynptr_check_size(u32 size) 1427*263ae152SJoanne Koong { 1428*263ae152SJoanne Koong return size > DYNPTR_MAX_SIZE ? -E2BIG : 0; 1429*263ae152SJoanne Koong } 1430*263ae152SJoanne Koong 1431*263ae152SJoanne Koong static void bpf_dynptr_init(struct bpf_dynptr_kern *ptr, void *data, 1432*263ae152SJoanne Koong enum bpf_dynptr_type type, u32 offset, u32 size) 1433*263ae152SJoanne Koong { 1434*263ae152SJoanne Koong ptr->data = data; 1435*263ae152SJoanne Koong ptr->offset = offset; 1436*263ae152SJoanne Koong ptr->size = size; 1437*263ae152SJoanne Koong bpf_dynptr_set_type(ptr, type); 1438*263ae152SJoanne Koong } 1439*263ae152SJoanne Koong 1440*263ae152SJoanne Koong static void bpf_dynptr_set_null(struct bpf_dynptr_kern *ptr) 1441*263ae152SJoanne Koong { 1442*263ae152SJoanne Koong memset(ptr, 0, sizeof(*ptr)); 1443*263ae152SJoanne Koong } 1444*263ae152SJoanne Koong 1445*263ae152SJoanne Koong BPF_CALL_4(bpf_dynptr_from_mem, void *, data, u32, size, u64, flags, struct bpf_dynptr_kern *, ptr) 1446*263ae152SJoanne Koong { 1447*263ae152SJoanne Koong int err; 1448*263ae152SJoanne Koong 1449*263ae152SJoanne Koong err = bpf_dynptr_check_size(size); 1450*263ae152SJoanne Koong if (err) 1451*263ae152SJoanne Koong goto error; 1452*263ae152SJoanne Koong 1453*263ae152SJoanne Koong /* flags is currently unsupported */ 1454*263ae152SJoanne Koong if (flags) { 1455*263ae152SJoanne Koong err = -EINVAL; 1456*263ae152SJoanne Koong goto error; 1457*263ae152SJoanne Koong } 1458*263ae152SJoanne Koong 1459*263ae152SJoanne Koong bpf_dynptr_init(ptr, data, BPF_DYNPTR_TYPE_LOCAL, 0, size); 1460*263ae152SJoanne Koong 1461*263ae152SJoanne Koong return 0; 1462*263ae152SJoanne Koong 1463*263ae152SJoanne Koong error: 1464*263ae152SJoanne Koong bpf_dynptr_set_null(ptr); 1465*263ae152SJoanne Koong return err; 1466*263ae152SJoanne Koong } 1467*263ae152SJoanne Koong 1468*263ae152SJoanne Koong const struct bpf_func_proto bpf_dynptr_from_mem_proto = { 1469*263ae152SJoanne Koong .func = bpf_dynptr_from_mem, 1470*263ae152SJoanne Koong .gpl_only = false, 1471*263ae152SJoanne Koong .ret_type = RET_INTEGER, 1472*263ae152SJoanne Koong .arg1_type = ARG_PTR_TO_UNINIT_MEM, 1473*263ae152SJoanne Koong .arg2_type = ARG_CONST_SIZE_OR_ZERO, 1474*263ae152SJoanne Koong .arg3_type = ARG_ANYTHING, 1475*263ae152SJoanne Koong .arg4_type = ARG_PTR_TO_DYNPTR | DYNPTR_TYPE_LOCAL | MEM_UNINIT, 1476*263ae152SJoanne Koong }; 1477*263ae152SJoanne Koong 1478f470378cSJohn Fastabend const struct bpf_func_proto bpf_get_current_task_proto __weak; 1479a396eda5SDaniel Xu const struct bpf_func_proto bpf_get_current_task_btf_proto __weak; 1480f470378cSJohn Fastabend const struct bpf_func_proto bpf_probe_read_user_proto __weak; 1481f470378cSJohn Fastabend const struct bpf_func_proto bpf_probe_read_user_str_proto __weak; 1482f470378cSJohn Fastabend const struct bpf_func_proto bpf_probe_read_kernel_proto __weak; 1483f470378cSJohn Fastabend const struct bpf_func_proto bpf_probe_read_kernel_str_proto __weak; 1484dd6e10fbSDaniel Xu const struct bpf_func_proto bpf_task_pt_regs_proto __weak; 1485f470378cSJohn Fastabend 14866890896bSStanislav Fomichev const struct bpf_func_proto * 14876890896bSStanislav Fomichev bpf_base_func_proto(enum bpf_func_id func_id) 14886890896bSStanislav Fomichev { 14896890896bSStanislav Fomichev switch (func_id) { 14906890896bSStanislav Fomichev case BPF_FUNC_map_lookup_elem: 14916890896bSStanislav Fomichev return &bpf_map_lookup_elem_proto; 14926890896bSStanislav Fomichev case BPF_FUNC_map_update_elem: 14936890896bSStanislav Fomichev return &bpf_map_update_elem_proto; 14946890896bSStanislav Fomichev case BPF_FUNC_map_delete_elem: 14956890896bSStanislav Fomichev return &bpf_map_delete_elem_proto; 14966890896bSStanislav Fomichev case BPF_FUNC_map_push_elem: 14976890896bSStanislav Fomichev return &bpf_map_push_elem_proto; 14986890896bSStanislav Fomichev case BPF_FUNC_map_pop_elem: 14996890896bSStanislav Fomichev return &bpf_map_pop_elem_proto; 15006890896bSStanislav Fomichev case BPF_FUNC_map_peek_elem: 15016890896bSStanislav Fomichev return &bpf_map_peek_elem_proto; 150207343110SFeng Zhou case BPF_FUNC_map_lookup_percpu_elem: 150307343110SFeng Zhou return &bpf_map_lookup_percpu_elem_proto; 15046890896bSStanislav Fomichev case BPF_FUNC_get_prandom_u32: 15056890896bSStanislav Fomichev return &bpf_get_prandom_u32_proto; 15066890896bSStanislav Fomichev case BPF_FUNC_get_smp_processor_id: 15076890896bSStanislav Fomichev return &bpf_get_raw_smp_processor_id_proto; 15086890896bSStanislav Fomichev case BPF_FUNC_get_numa_node_id: 15096890896bSStanislav Fomichev return &bpf_get_numa_node_id_proto; 15106890896bSStanislav Fomichev case BPF_FUNC_tail_call: 15116890896bSStanislav Fomichev return &bpf_tail_call_proto; 15126890896bSStanislav Fomichev case BPF_FUNC_ktime_get_ns: 15136890896bSStanislav Fomichev return &bpf_ktime_get_ns_proto; 151471d19214SMaciej Żenczykowski case BPF_FUNC_ktime_get_boot_ns: 151571d19214SMaciej Żenczykowski return &bpf_ktime_get_boot_ns_proto; 1516457f4436SAndrii Nakryiko case BPF_FUNC_ringbuf_output: 1517457f4436SAndrii Nakryiko return &bpf_ringbuf_output_proto; 1518457f4436SAndrii Nakryiko case BPF_FUNC_ringbuf_reserve: 1519457f4436SAndrii Nakryiko return &bpf_ringbuf_reserve_proto; 1520457f4436SAndrii Nakryiko case BPF_FUNC_ringbuf_submit: 1521457f4436SAndrii Nakryiko return &bpf_ringbuf_submit_proto; 1522457f4436SAndrii Nakryiko case BPF_FUNC_ringbuf_discard: 1523457f4436SAndrii Nakryiko return &bpf_ringbuf_discard_proto; 1524457f4436SAndrii Nakryiko case BPF_FUNC_ringbuf_query: 1525457f4436SAndrii Nakryiko return &bpf_ringbuf_query_proto; 152669c087baSYonghong Song case BPF_FUNC_for_each_map_elem: 152769c087baSYonghong Song return &bpf_for_each_map_elem_proto; 1528e6f2dd0fSJoanne Koong case BPF_FUNC_loop: 1529e6f2dd0fSJoanne Koong return &bpf_loop_proto; 1530c5fb1993SHou Tao case BPF_FUNC_strncmp: 1531c5fb1993SHou Tao return &bpf_strncmp_proto; 1532*263ae152SJoanne Koong case BPF_FUNC_dynptr_from_mem: 1533*263ae152SJoanne Koong return &bpf_dynptr_from_mem_proto; 15346890896bSStanislav Fomichev default: 15356890896bSStanislav Fomichev break; 15366890896bSStanislav Fomichev } 15376890896bSStanislav Fomichev 15382c78ee89SAlexei Starovoitov if (!bpf_capable()) 15396890896bSStanislav Fomichev return NULL; 15406890896bSStanislav Fomichev 15416890896bSStanislav Fomichev switch (func_id) { 15426890896bSStanislav Fomichev case BPF_FUNC_spin_lock: 15436890896bSStanislav Fomichev return &bpf_spin_lock_proto; 15446890896bSStanislav Fomichev case BPF_FUNC_spin_unlock: 15456890896bSStanislav Fomichev return &bpf_spin_unlock_proto; 15466890896bSStanislav Fomichev case BPF_FUNC_jiffies64: 15476890896bSStanislav Fomichev return &bpf_jiffies64_proto; 1548b7906b70SAndrii Nakryiko case BPF_FUNC_per_cpu_ptr: 1549eaa6bcb7SHao Luo return &bpf_per_cpu_ptr_proto; 1550b7906b70SAndrii Nakryiko case BPF_FUNC_this_cpu_ptr: 155163d9b80dSHao Luo return &bpf_this_cpu_ptr_proto; 1552b00628b1SAlexei Starovoitov case BPF_FUNC_timer_init: 1553b00628b1SAlexei Starovoitov return &bpf_timer_init_proto; 1554b00628b1SAlexei Starovoitov case BPF_FUNC_timer_set_callback: 1555b00628b1SAlexei Starovoitov return &bpf_timer_set_callback_proto; 1556b00628b1SAlexei Starovoitov case BPF_FUNC_timer_start: 1557b00628b1SAlexei Starovoitov return &bpf_timer_start_proto; 1558b00628b1SAlexei Starovoitov case BPF_FUNC_timer_cancel: 1559b00628b1SAlexei Starovoitov return &bpf_timer_cancel_proto; 1560c0a5a21cSKumar Kartikeya Dwivedi case BPF_FUNC_kptr_xchg: 1561c0a5a21cSKumar Kartikeya Dwivedi return &bpf_kptr_xchg_proto; 15626890896bSStanislav Fomichev default: 1563f470378cSJohn Fastabend break; 1564f470378cSJohn Fastabend } 1565f470378cSJohn Fastabend 1566f470378cSJohn Fastabend if (!perfmon_capable()) 1567f470378cSJohn Fastabend return NULL; 1568f470378cSJohn Fastabend 1569f470378cSJohn Fastabend switch (func_id) { 157061ca36c8STobias Klauser case BPF_FUNC_trace_printk: 157161ca36c8STobias Klauser return bpf_get_trace_printk_proto(); 1572f470378cSJohn Fastabend case BPF_FUNC_get_current_task: 1573f470378cSJohn Fastabend return &bpf_get_current_task_proto; 1574a396eda5SDaniel Xu case BPF_FUNC_get_current_task_btf: 1575a396eda5SDaniel Xu return &bpf_get_current_task_btf_proto; 1576f470378cSJohn Fastabend case BPF_FUNC_probe_read_user: 1577f470378cSJohn Fastabend return &bpf_probe_read_user_proto; 1578f470378cSJohn Fastabend case BPF_FUNC_probe_read_kernel: 157971330842SDaniel Borkmann return security_locked_down(LOCKDOWN_BPF_READ_KERNEL) < 0 ? 1580ff40e510SDaniel Borkmann NULL : &bpf_probe_read_kernel_proto; 1581f470378cSJohn Fastabend case BPF_FUNC_probe_read_user_str: 1582f470378cSJohn Fastabend return &bpf_probe_read_user_str_proto; 1583f470378cSJohn Fastabend case BPF_FUNC_probe_read_kernel_str: 158471330842SDaniel Borkmann return security_locked_down(LOCKDOWN_BPF_READ_KERNEL) < 0 ? 1585ff40e510SDaniel Borkmann NULL : &bpf_probe_read_kernel_str_proto; 158661ca36c8STobias Klauser case BPF_FUNC_snprintf_btf: 158761ca36c8STobias Klauser return &bpf_snprintf_btf_proto; 15887b15523aSFlorent Revest case BPF_FUNC_snprintf: 15897b15523aSFlorent Revest return &bpf_snprintf_proto; 1590dd6e10fbSDaniel Xu case BPF_FUNC_task_pt_regs: 1591dd6e10fbSDaniel Xu return &bpf_task_pt_regs_proto; 159210aceb62SDave Marchevsky case BPF_FUNC_trace_vprintk: 159310aceb62SDave Marchevsky return bpf_get_trace_vprintk_proto(); 1594f470378cSJohn Fastabend default: 15956890896bSStanislav Fomichev return NULL; 15966890896bSStanislav Fomichev } 15976890896bSStanislav Fomichev } 1598