12541517cSAlexei Starovoitov /* Copyright (c) 2011-2015 PLUMgrid, http://plumgrid.com 20515e599SAlexei Starovoitov * Copyright (c) 2016 Facebook 32541517cSAlexei Starovoitov * 42541517cSAlexei Starovoitov * This program is free software; you can redistribute it and/or 52541517cSAlexei Starovoitov * modify it under the terms of version 2 of the GNU General Public 62541517cSAlexei Starovoitov * License as published by the Free Software Foundation. 72541517cSAlexei Starovoitov */ 82541517cSAlexei Starovoitov #include <linux/kernel.h> 92541517cSAlexei Starovoitov #include <linux/types.h> 102541517cSAlexei Starovoitov #include <linux/slab.h> 112541517cSAlexei Starovoitov #include <linux/bpf.h> 120515e599SAlexei Starovoitov #include <linux/bpf_perf_event.h> 132541517cSAlexei Starovoitov #include <linux/filter.h> 142541517cSAlexei Starovoitov #include <linux/uaccess.h> 159c959c86SAlexei Starovoitov #include <linux/ctype.h> 169802d865SJosef Bacik #include <linux/kprobes.h> 17540adea3SMasami Hiramatsu #include <linux/error-injection.h> 189802d865SJosef Bacik 199802d865SJosef Bacik #include "trace_probe.h" 202541517cSAlexei Starovoitov #include "trace.h" 212541517cSAlexei Starovoitov 22035226b9SGianluca Borello u64 bpf_get_stackid(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5); 23035226b9SGianluca Borello 242541517cSAlexei Starovoitov /** 252541517cSAlexei Starovoitov * trace_call_bpf - invoke BPF program 26e87c6bc3SYonghong Song * @call: tracepoint event 272541517cSAlexei Starovoitov * @ctx: opaque context pointer 282541517cSAlexei Starovoitov * 292541517cSAlexei Starovoitov * kprobe handlers execute BPF programs via this helper. 302541517cSAlexei Starovoitov * Can be used from static tracepoints in the future. 312541517cSAlexei Starovoitov * 322541517cSAlexei Starovoitov * Return: BPF programs always return an integer which is interpreted by 332541517cSAlexei Starovoitov * kprobe handler as: 342541517cSAlexei Starovoitov * 0 - return from kprobe (event is filtered out) 352541517cSAlexei Starovoitov * 1 - store kprobe event into ring buffer 362541517cSAlexei Starovoitov * Other values are reserved and currently alias to 1 372541517cSAlexei Starovoitov */ 38e87c6bc3SYonghong Song unsigned int trace_call_bpf(struct trace_event_call *call, void *ctx) 392541517cSAlexei Starovoitov { 402541517cSAlexei Starovoitov unsigned int ret; 412541517cSAlexei Starovoitov 422541517cSAlexei Starovoitov if (in_nmi()) /* not supported yet */ 432541517cSAlexei Starovoitov return 1; 442541517cSAlexei Starovoitov 452541517cSAlexei Starovoitov preempt_disable(); 462541517cSAlexei Starovoitov 472541517cSAlexei Starovoitov if (unlikely(__this_cpu_inc_return(bpf_prog_active) != 1)) { 482541517cSAlexei Starovoitov /* 492541517cSAlexei Starovoitov * since some bpf program is already running on this cpu, 502541517cSAlexei Starovoitov * don't call into another bpf program (same or different) 512541517cSAlexei Starovoitov * and don't send kprobe event into ring-buffer, 522541517cSAlexei Starovoitov * so return zero here 532541517cSAlexei Starovoitov */ 542541517cSAlexei Starovoitov ret = 0; 552541517cSAlexei Starovoitov goto out; 562541517cSAlexei Starovoitov } 572541517cSAlexei Starovoitov 58e87c6bc3SYonghong Song /* 59e87c6bc3SYonghong Song * Instead of moving rcu_read_lock/rcu_dereference/rcu_read_unlock 60e87c6bc3SYonghong Song * to all call sites, we did a bpf_prog_array_valid() there to check 61e87c6bc3SYonghong Song * whether call->prog_array is empty or not, which is 62e87c6bc3SYonghong Song * a heurisitc to speed up execution. 63e87c6bc3SYonghong Song * 64e87c6bc3SYonghong Song * If bpf_prog_array_valid() fetched prog_array was 65e87c6bc3SYonghong Song * non-NULL, we go into trace_call_bpf() and do the actual 66e87c6bc3SYonghong Song * proper rcu_dereference() under RCU lock. 67e87c6bc3SYonghong Song * If it turns out that prog_array is NULL then, we bail out. 68e87c6bc3SYonghong Song * For the opposite, if the bpf_prog_array_valid() fetched pointer 69e87c6bc3SYonghong Song * was NULL, you'll skip the prog_array with the risk of missing 70e87c6bc3SYonghong Song * out of events when it was updated in between this and the 71e87c6bc3SYonghong Song * rcu_dereference() which is accepted risk. 72e87c6bc3SYonghong Song */ 73e87c6bc3SYonghong Song ret = BPF_PROG_RUN_ARRAY_CHECK(call->prog_array, ctx, BPF_PROG_RUN); 742541517cSAlexei Starovoitov 752541517cSAlexei Starovoitov out: 762541517cSAlexei Starovoitov __this_cpu_dec(bpf_prog_active); 772541517cSAlexei Starovoitov preempt_enable(); 782541517cSAlexei Starovoitov 792541517cSAlexei Starovoitov return ret; 802541517cSAlexei Starovoitov } 812541517cSAlexei Starovoitov EXPORT_SYMBOL_GPL(trace_call_bpf); 822541517cSAlexei Starovoitov 839802d865SJosef Bacik #ifdef CONFIG_BPF_KPROBE_OVERRIDE 849802d865SJosef Bacik BPF_CALL_2(bpf_override_return, struct pt_regs *, regs, unsigned long, rc) 859802d865SJosef Bacik { 869802d865SJosef Bacik regs_set_return_value(regs, rc); 87540adea3SMasami Hiramatsu override_function_with_return(regs); 889802d865SJosef Bacik return 0; 899802d865SJosef Bacik } 909802d865SJosef Bacik 919802d865SJosef Bacik static const struct bpf_func_proto bpf_override_return_proto = { 929802d865SJosef Bacik .func = bpf_override_return, 939802d865SJosef Bacik .gpl_only = true, 949802d865SJosef Bacik .ret_type = RET_INTEGER, 959802d865SJosef Bacik .arg1_type = ARG_PTR_TO_CTX, 969802d865SJosef Bacik .arg2_type = ARG_ANYTHING, 979802d865SJosef Bacik }; 989802d865SJosef Bacik #endif 999802d865SJosef Bacik 100f3694e00SDaniel Borkmann BPF_CALL_3(bpf_probe_read, void *, dst, u32, size, const void *, unsafe_ptr) 1012541517cSAlexei Starovoitov { 102eb33f2ccSGianluca Borello int ret; 1032541517cSAlexei Starovoitov 104074f528eSDaniel Borkmann ret = probe_kernel_read(dst, unsafe_ptr, size); 105074f528eSDaniel Borkmann if (unlikely(ret < 0)) 106074f528eSDaniel Borkmann memset(dst, 0, size); 107074f528eSDaniel Borkmann 108074f528eSDaniel Borkmann return ret; 1092541517cSAlexei Starovoitov } 1102541517cSAlexei Starovoitov 1112541517cSAlexei Starovoitov static const struct bpf_func_proto bpf_probe_read_proto = { 1122541517cSAlexei Starovoitov .func = bpf_probe_read, 1132541517cSAlexei Starovoitov .gpl_only = true, 1142541517cSAlexei Starovoitov .ret_type = RET_INTEGER, 11539f19ebbSAlexei Starovoitov .arg1_type = ARG_PTR_TO_UNINIT_MEM, 1169c019e2bSYonghong Song .arg2_type = ARG_CONST_SIZE_OR_ZERO, 1172541517cSAlexei Starovoitov .arg3_type = ARG_ANYTHING, 1182541517cSAlexei Starovoitov }; 1192541517cSAlexei Starovoitov 120f3694e00SDaniel Borkmann BPF_CALL_3(bpf_probe_write_user, void *, unsafe_ptr, const void *, src, 121f3694e00SDaniel Borkmann u32, size) 12296ae5227SSargun Dhillon { 12396ae5227SSargun Dhillon /* 12496ae5227SSargun Dhillon * Ensure we're in user context which is safe for the helper to 12596ae5227SSargun Dhillon * run. This helper has no business in a kthread. 12696ae5227SSargun Dhillon * 12796ae5227SSargun Dhillon * access_ok() should prevent writing to non-user memory, but in 12896ae5227SSargun Dhillon * some situations (nommu, temporary switch, etc) access_ok() does 12996ae5227SSargun Dhillon * not provide enough validation, hence the check on KERNEL_DS. 13096ae5227SSargun Dhillon */ 13196ae5227SSargun Dhillon 13296ae5227SSargun Dhillon if (unlikely(in_interrupt() || 13396ae5227SSargun Dhillon current->flags & (PF_KTHREAD | PF_EXITING))) 13496ae5227SSargun Dhillon return -EPERM; 135db68ce10SAl Viro if (unlikely(uaccess_kernel())) 13696ae5227SSargun Dhillon return -EPERM; 13796ae5227SSargun Dhillon if (!access_ok(VERIFY_WRITE, unsafe_ptr, size)) 13896ae5227SSargun Dhillon return -EPERM; 13996ae5227SSargun Dhillon 14096ae5227SSargun Dhillon return probe_kernel_write(unsafe_ptr, src, size); 14196ae5227SSargun Dhillon } 14296ae5227SSargun Dhillon 14396ae5227SSargun Dhillon static const struct bpf_func_proto bpf_probe_write_user_proto = { 14496ae5227SSargun Dhillon .func = bpf_probe_write_user, 14596ae5227SSargun Dhillon .gpl_only = true, 14696ae5227SSargun Dhillon .ret_type = RET_INTEGER, 14796ae5227SSargun Dhillon .arg1_type = ARG_ANYTHING, 14839f19ebbSAlexei Starovoitov .arg2_type = ARG_PTR_TO_MEM, 14939f19ebbSAlexei Starovoitov .arg3_type = ARG_CONST_SIZE, 15096ae5227SSargun Dhillon }; 15196ae5227SSargun Dhillon 15296ae5227SSargun Dhillon static const struct bpf_func_proto *bpf_get_probe_write_proto(void) 15396ae5227SSargun Dhillon { 15496ae5227SSargun Dhillon pr_warn_ratelimited("%s[%d] is installing a program with bpf_probe_write_user helper that may corrupt user memory!", 15596ae5227SSargun Dhillon current->comm, task_pid_nr(current)); 15696ae5227SSargun Dhillon 15796ae5227SSargun Dhillon return &bpf_probe_write_user_proto; 15896ae5227SSargun Dhillon } 15996ae5227SSargun Dhillon 1609c959c86SAlexei Starovoitov /* 1617bda4b40SJohn Fastabend * Only limited trace_printk() conversion specifiers allowed: 1627bda4b40SJohn Fastabend * %d %i %u %x %ld %li %lu %lx %lld %lli %llu %llx %p %s 1639c959c86SAlexei Starovoitov */ 164f3694e00SDaniel Borkmann BPF_CALL_5(bpf_trace_printk, char *, fmt, u32, fmt_size, u64, arg1, 165f3694e00SDaniel Borkmann u64, arg2, u64, arg3) 1669c959c86SAlexei Starovoitov { 1678d3b7dceSAlexei Starovoitov bool str_seen = false; 1689c959c86SAlexei Starovoitov int mod[3] = {}; 1699c959c86SAlexei Starovoitov int fmt_cnt = 0; 1708d3b7dceSAlexei Starovoitov u64 unsafe_addr; 1718d3b7dceSAlexei Starovoitov char buf[64]; 1729c959c86SAlexei Starovoitov int i; 1739c959c86SAlexei Starovoitov 1749c959c86SAlexei Starovoitov /* 1759c959c86SAlexei Starovoitov * bpf_check()->check_func_arg()->check_stack_boundary() 1769c959c86SAlexei Starovoitov * guarantees that fmt points to bpf program stack, 1779c959c86SAlexei Starovoitov * fmt_size bytes of it were initialized and fmt_size > 0 1789c959c86SAlexei Starovoitov */ 1799c959c86SAlexei Starovoitov if (fmt[--fmt_size] != 0) 1809c959c86SAlexei Starovoitov return -EINVAL; 1819c959c86SAlexei Starovoitov 1829c959c86SAlexei Starovoitov /* check format string for allowed specifiers */ 1839c959c86SAlexei Starovoitov for (i = 0; i < fmt_size; i++) { 1849c959c86SAlexei Starovoitov if ((!isprint(fmt[i]) && !isspace(fmt[i])) || !isascii(fmt[i])) 1859c959c86SAlexei Starovoitov return -EINVAL; 1869c959c86SAlexei Starovoitov 1879c959c86SAlexei Starovoitov if (fmt[i] != '%') 1889c959c86SAlexei Starovoitov continue; 1899c959c86SAlexei Starovoitov 1909c959c86SAlexei Starovoitov if (fmt_cnt >= 3) 1919c959c86SAlexei Starovoitov return -EINVAL; 1929c959c86SAlexei Starovoitov 1939c959c86SAlexei Starovoitov /* fmt[i] != 0 && fmt[last] == 0, so we can access fmt[i + 1] */ 1949c959c86SAlexei Starovoitov i++; 1959c959c86SAlexei Starovoitov if (fmt[i] == 'l') { 1969c959c86SAlexei Starovoitov mod[fmt_cnt]++; 1979c959c86SAlexei Starovoitov i++; 1988d3b7dceSAlexei Starovoitov } else if (fmt[i] == 'p' || fmt[i] == 's') { 1999c959c86SAlexei Starovoitov mod[fmt_cnt]++; 2009c959c86SAlexei Starovoitov i++; 2019c959c86SAlexei Starovoitov if (!isspace(fmt[i]) && !ispunct(fmt[i]) && fmt[i] != 0) 2029c959c86SAlexei Starovoitov return -EINVAL; 2039c959c86SAlexei Starovoitov fmt_cnt++; 2048d3b7dceSAlexei Starovoitov if (fmt[i - 1] == 's') { 2058d3b7dceSAlexei Starovoitov if (str_seen) 2068d3b7dceSAlexei Starovoitov /* allow only one '%s' per fmt string */ 2078d3b7dceSAlexei Starovoitov return -EINVAL; 2088d3b7dceSAlexei Starovoitov str_seen = true; 2098d3b7dceSAlexei Starovoitov 2108d3b7dceSAlexei Starovoitov switch (fmt_cnt) { 2118d3b7dceSAlexei Starovoitov case 1: 212f3694e00SDaniel Borkmann unsafe_addr = arg1; 213f3694e00SDaniel Borkmann arg1 = (long) buf; 2148d3b7dceSAlexei Starovoitov break; 2158d3b7dceSAlexei Starovoitov case 2: 216f3694e00SDaniel Borkmann unsafe_addr = arg2; 217f3694e00SDaniel Borkmann arg2 = (long) buf; 2188d3b7dceSAlexei Starovoitov break; 2198d3b7dceSAlexei Starovoitov case 3: 220f3694e00SDaniel Borkmann unsafe_addr = arg3; 221f3694e00SDaniel Borkmann arg3 = (long) buf; 2228d3b7dceSAlexei Starovoitov break; 2238d3b7dceSAlexei Starovoitov } 2248d3b7dceSAlexei Starovoitov buf[0] = 0; 2258d3b7dceSAlexei Starovoitov strncpy_from_unsafe(buf, 2268d3b7dceSAlexei Starovoitov (void *) (long) unsafe_addr, 2278d3b7dceSAlexei Starovoitov sizeof(buf)); 2288d3b7dceSAlexei Starovoitov } 2299c959c86SAlexei Starovoitov continue; 2309c959c86SAlexei Starovoitov } 2319c959c86SAlexei Starovoitov 2329c959c86SAlexei Starovoitov if (fmt[i] == 'l') { 2339c959c86SAlexei Starovoitov mod[fmt_cnt]++; 2349c959c86SAlexei Starovoitov i++; 2359c959c86SAlexei Starovoitov } 2369c959c86SAlexei Starovoitov 2377bda4b40SJohn Fastabend if (fmt[i] != 'i' && fmt[i] != 'd' && 2387bda4b40SJohn Fastabend fmt[i] != 'u' && fmt[i] != 'x') 2399c959c86SAlexei Starovoitov return -EINVAL; 2409c959c86SAlexei Starovoitov fmt_cnt++; 2419c959c86SAlexei Starovoitov } 2429c959c86SAlexei Starovoitov 24388a5c690SDaniel Borkmann /* Horrid workaround for getting va_list handling working with different 24488a5c690SDaniel Borkmann * argument type combinations generically for 32 and 64 bit archs. 24588a5c690SDaniel Borkmann */ 24688a5c690SDaniel Borkmann #define __BPF_TP_EMIT() __BPF_ARG3_TP() 24788a5c690SDaniel Borkmann #define __BPF_TP(...) \ 24888a5c690SDaniel Borkmann __trace_printk(1 /* Fake ip will not be printed. */, \ 24988a5c690SDaniel Borkmann fmt, ##__VA_ARGS__) 25088a5c690SDaniel Borkmann 25188a5c690SDaniel Borkmann #define __BPF_ARG1_TP(...) \ 25288a5c690SDaniel Borkmann ((mod[0] == 2 || (mod[0] == 1 && __BITS_PER_LONG == 64)) \ 25388a5c690SDaniel Borkmann ? __BPF_TP(arg1, ##__VA_ARGS__) \ 25488a5c690SDaniel Borkmann : ((mod[0] == 1 || (mod[0] == 0 && __BITS_PER_LONG == 32)) \ 25588a5c690SDaniel Borkmann ? __BPF_TP((long)arg1, ##__VA_ARGS__) \ 25688a5c690SDaniel Borkmann : __BPF_TP((u32)arg1, ##__VA_ARGS__))) 25788a5c690SDaniel Borkmann 25888a5c690SDaniel Borkmann #define __BPF_ARG2_TP(...) \ 25988a5c690SDaniel Borkmann ((mod[1] == 2 || (mod[1] == 1 && __BITS_PER_LONG == 64)) \ 26088a5c690SDaniel Borkmann ? __BPF_ARG1_TP(arg2, ##__VA_ARGS__) \ 26188a5c690SDaniel Borkmann : ((mod[1] == 1 || (mod[1] == 0 && __BITS_PER_LONG == 32)) \ 26288a5c690SDaniel Borkmann ? __BPF_ARG1_TP((long)arg2, ##__VA_ARGS__) \ 26388a5c690SDaniel Borkmann : __BPF_ARG1_TP((u32)arg2, ##__VA_ARGS__))) 26488a5c690SDaniel Borkmann 26588a5c690SDaniel Borkmann #define __BPF_ARG3_TP(...) \ 26688a5c690SDaniel Borkmann ((mod[2] == 2 || (mod[2] == 1 && __BITS_PER_LONG == 64)) \ 26788a5c690SDaniel Borkmann ? __BPF_ARG2_TP(arg3, ##__VA_ARGS__) \ 26888a5c690SDaniel Borkmann : ((mod[2] == 1 || (mod[2] == 0 && __BITS_PER_LONG == 32)) \ 26988a5c690SDaniel Borkmann ? __BPF_ARG2_TP((long)arg3, ##__VA_ARGS__) \ 27088a5c690SDaniel Borkmann : __BPF_ARG2_TP((u32)arg3, ##__VA_ARGS__))) 27188a5c690SDaniel Borkmann 27288a5c690SDaniel Borkmann return __BPF_TP_EMIT(); 2739c959c86SAlexei Starovoitov } 2749c959c86SAlexei Starovoitov 2759c959c86SAlexei Starovoitov static const struct bpf_func_proto bpf_trace_printk_proto = { 2769c959c86SAlexei Starovoitov .func = bpf_trace_printk, 2779c959c86SAlexei Starovoitov .gpl_only = true, 2789c959c86SAlexei Starovoitov .ret_type = RET_INTEGER, 27939f19ebbSAlexei Starovoitov .arg1_type = ARG_PTR_TO_MEM, 28039f19ebbSAlexei Starovoitov .arg2_type = ARG_CONST_SIZE, 2819c959c86SAlexei Starovoitov }; 2829c959c86SAlexei Starovoitov 2830756ea3eSAlexei Starovoitov const struct bpf_func_proto *bpf_get_trace_printk_proto(void) 2840756ea3eSAlexei Starovoitov { 2850756ea3eSAlexei Starovoitov /* 2860756ea3eSAlexei Starovoitov * this program might be calling bpf_trace_printk, 2870756ea3eSAlexei Starovoitov * so allocate per-cpu printk buffers 2880756ea3eSAlexei Starovoitov */ 2890756ea3eSAlexei Starovoitov trace_printk_init_buffers(); 2900756ea3eSAlexei Starovoitov 2910756ea3eSAlexei Starovoitov return &bpf_trace_printk_proto; 2920756ea3eSAlexei Starovoitov } 2930756ea3eSAlexei Starovoitov 294908432caSYonghong Song static __always_inline int 295908432caSYonghong Song get_map_perf_counter(struct bpf_map *map, u64 flags, 296908432caSYonghong Song u64 *value, u64 *enabled, u64 *running) 29735578d79SKaixu Xia { 29835578d79SKaixu Xia struct bpf_array *array = container_of(map, struct bpf_array, map); 2996816a7ffSDaniel Borkmann unsigned int cpu = smp_processor_id(); 3006816a7ffSDaniel Borkmann u64 index = flags & BPF_F_INDEX_MASK; 3013b1efb19SDaniel Borkmann struct bpf_event_entry *ee; 30235578d79SKaixu Xia 3036816a7ffSDaniel Borkmann if (unlikely(flags & ~(BPF_F_INDEX_MASK))) 3046816a7ffSDaniel Borkmann return -EINVAL; 3056816a7ffSDaniel Borkmann if (index == BPF_F_CURRENT_CPU) 3066816a7ffSDaniel Borkmann index = cpu; 30735578d79SKaixu Xia if (unlikely(index >= array->map.max_entries)) 30835578d79SKaixu Xia return -E2BIG; 30935578d79SKaixu Xia 3103b1efb19SDaniel Borkmann ee = READ_ONCE(array->ptrs[index]); 3111ca1cc98SDaniel Borkmann if (!ee) 31235578d79SKaixu Xia return -ENOENT; 31335578d79SKaixu Xia 314908432caSYonghong Song return perf_event_read_local(ee->event, value, enabled, running); 315908432caSYonghong Song } 316908432caSYonghong Song 317908432caSYonghong Song BPF_CALL_2(bpf_perf_event_read, struct bpf_map *, map, u64, flags) 318908432caSYonghong Song { 319908432caSYonghong Song u64 value = 0; 320908432caSYonghong Song int err; 321908432caSYonghong Song 322908432caSYonghong Song err = get_map_perf_counter(map, flags, &value, NULL, NULL); 32335578d79SKaixu Xia /* 324f91840a3SAlexei Starovoitov * this api is ugly since we miss [-22..-2] range of valid 325f91840a3SAlexei Starovoitov * counter values, but that's uapi 32635578d79SKaixu Xia */ 327f91840a3SAlexei Starovoitov if (err) 328f91840a3SAlexei Starovoitov return err; 329f91840a3SAlexei Starovoitov return value; 33035578d79SKaixu Xia } 33135578d79SKaixu Xia 33262544ce8SAlexei Starovoitov static const struct bpf_func_proto bpf_perf_event_read_proto = { 33335578d79SKaixu Xia .func = bpf_perf_event_read, 3341075ef59SAlexei Starovoitov .gpl_only = true, 33535578d79SKaixu Xia .ret_type = RET_INTEGER, 33635578d79SKaixu Xia .arg1_type = ARG_CONST_MAP_PTR, 33735578d79SKaixu Xia .arg2_type = ARG_ANYTHING, 33835578d79SKaixu Xia }; 33935578d79SKaixu Xia 340908432caSYonghong Song BPF_CALL_4(bpf_perf_event_read_value, struct bpf_map *, map, u64, flags, 341908432caSYonghong Song struct bpf_perf_event_value *, buf, u32, size) 342908432caSYonghong Song { 343908432caSYonghong Song int err = -EINVAL; 344908432caSYonghong Song 345908432caSYonghong Song if (unlikely(size != sizeof(struct bpf_perf_event_value))) 346908432caSYonghong Song goto clear; 347908432caSYonghong Song err = get_map_perf_counter(map, flags, &buf->counter, &buf->enabled, 348908432caSYonghong Song &buf->running); 349908432caSYonghong Song if (unlikely(err)) 350908432caSYonghong Song goto clear; 351908432caSYonghong Song return 0; 352908432caSYonghong Song clear: 353908432caSYonghong Song memset(buf, 0, size); 354908432caSYonghong Song return err; 355908432caSYonghong Song } 356908432caSYonghong Song 357908432caSYonghong Song static const struct bpf_func_proto bpf_perf_event_read_value_proto = { 358908432caSYonghong Song .func = bpf_perf_event_read_value, 359908432caSYonghong Song .gpl_only = true, 360908432caSYonghong Song .ret_type = RET_INTEGER, 361908432caSYonghong Song .arg1_type = ARG_CONST_MAP_PTR, 362908432caSYonghong Song .arg2_type = ARG_ANYTHING, 363908432caSYonghong Song .arg3_type = ARG_PTR_TO_UNINIT_MEM, 364908432caSYonghong Song .arg4_type = ARG_CONST_SIZE, 365908432caSYonghong Song }; 366908432caSYonghong Song 367283ca526SDaniel Borkmann static DEFINE_PER_CPU(struct perf_sample_data, bpf_trace_sd); 36820b9d7acSDaniel Borkmann 3698e7a3920SDaniel Borkmann static __always_inline u64 3708e7a3920SDaniel Borkmann __bpf_perf_event_output(struct pt_regs *regs, struct bpf_map *map, 371283ca526SDaniel Borkmann u64 flags, struct perf_sample_data *sd) 372a43eec30SAlexei Starovoitov { 373a43eec30SAlexei Starovoitov struct bpf_array *array = container_of(map, struct bpf_array, map); 374d7931330SDaniel Borkmann unsigned int cpu = smp_processor_id(); 3751e33759cSDaniel Borkmann u64 index = flags & BPF_F_INDEX_MASK; 3763b1efb19SDaniel Borkmann struct bpf_event_entry *ee; 377a43eec30SAlexei Starovoitov struct perf_event *event; 378a43eec30SAlexei Starovoitov 3791e33759cSDaniel Borkmann if (index == BPF_F_CURRENT_CPU) 380d7931330SDaniel Borkmann index = cpu; 381a43eec30SAlexei Starovoitov if (unlikely(index >= array->map.max_entries)) 382a43eec30SAlexei Starovoitov return -E2BIG; 383a43eec30SAlexei Starovoitov 3843b1efb19SDaniel Borkmann ee = READ_ONCE(array->ptrs[index]); 3851ca1cc98SDaniel Borkmann if (!ee) 386a43eec30SAlexei Starovoitov return -ENOENT; 387a43eec30SAlexei Starovoitov 3883b1efb19SDaniel Borkmann event = ee->event; 389a43eec30SAlexei Starovoitov if (unlikely(event->attr.type != PERF_TYPE_SOFTWARE || 390a43eec30SAlexei Starovoitov event->attr.config != PERF_COUNT_SW_BPF_OUTPUT)) 391a43eec30SAlexei Starovoitov return -EINVAL; 392a43eec30SAlexei Starovoitov 393d7931330SDaniel Borkmann if (unlikely(event->oncpu != cpu)) 394a43eec30SAlexei Starovoitov return -EOPNOTSUPP; 395a43eec30SAlexei Starovoitov 39620b9d7acSDaniel Borkmann perf_event_output(event, sd, regs); 397a43eec30SAlexei Starovoitov return 0; 398a43eec30SAlexei Starovoitov } 399a43eec30SAlexei Starovoitov 400f3694e00SDaniel Borkmann BPF_CALL_5(bpf_perf_event_output, struct pt_regs *, regs, struct bpf_map *, map, 401f3694e00SDaniel Borkmann u64, flags, void *, data, u64, size) 4028e7a3920SDaniel Borkmann { 403283ca526SDaniel Borkmann struct perf_sample_data *sd = this_cpu_ptr(&bpf_trace_sd); 4048e7a3920SDaniel Borkmann struct perf_raw_record raw = { 4058e7a3920SDaniel Borkmann .frag = { 4068e7a3920SDaniel Borkmann .size = size, 4078e7a3920SDaniel Borkmann .data = data, 4088e7a3920SDaniel Borkmann }, 4098e7a3920SDaniel Borkmann }; 4108e7a3920SDaniel Borkmann 4118e7a3920SDaniel Borkmann if (unlikely(flags & ~(BPF_F_INDEX_MASK))) 4128e7a3920SDaniel Borkmann return -EINVAL; 4138e7a3920SDaniel Borkmann 414283ca526SDaniel Borkmann perf_sample_data_init(sd, 0, 0); 415283ca526SDaniel Borkmann sd->raw = &raw; 416283ca526SDaniel Borkmann 417283ca526SDaniel Borkmann return __bpf_perf_event_output(regs, map, flags, sd); 4188e7a3920SDaniel Borkmann } 4198e7a3920SDaniel Borkmann 420a43eec30SAlexei Starovoitov static const struct bpf_func_proto bpf_perf_event_output_proto = { 421a43eec30SAlexei Starovoitov .func = bpf_perf_event_output, 4221075ef59SAlexei Starovoitov .gpl_only = true, 423a43eec30SAlexei Starovoitov .ret_type = RET_INTEGER, 424a43eec30SAlexei Starovoitov .arg1_type = ARG_PTR_TO_CTX, 425a43eec30SAlexei Starovoitov .arg2_type = ARG_CONST_MAP_PTR, 426a43eec30SAlexei Starovoitov .arg3_type = ARG_ANYTHING, 42739f19ebbSAlexei Starovoitov .arg4_type = ARG_PTR_TO_MEM, 428a60dd35dSGianluca Borello .arg5_type = ARG_CONST_SIZE_OR_ZERO, 429a43eec30SAlexei Starovoitov }; 430a43eec30SAlexei Starovoitov 431bd570ff9SDaniel Borkmann static DEFINE_PER_CPU(struct pt_regs, bpf_pt_regs); 432283ca526SDaniel Borkmann static DEFINE_PER_CPU(struct perf_sample_data, bpf_misc_sd); 433bd570ff9SDaniel Borkmann 434555c8a86SDaniel Borkmann u64 bpf_event_output(struct bpf_map *map, u64 flags, void *meta, u64 meta_size, 435555c8a86SDaniel Borkmann void *ctx, u64 ctx_size, bpf_ctx_copy_t ctx_copy) 436bd570ff9SDaniel Borkmann { 437283ca526SDaniel Borkmann struct perf_sample_data *sd = this_cpu_ptr(&bpf_misc_sd); 438bd570ff9SDaniel Borkmann struct pt_regs *regs = this_cpu_ptr(&bpf_pt_regs); 439555c8a86SDaniel Borkmann struct perf_raw_frag frag = { 440555c8a86SDaniel Borkmann .copy = ctx_copy, 441555c8a86SDaniel Borkmann .size = ctx_size, 442555c8a86SDaniel Borkmann .data = ctx, 443555c8a86SDaniel Borkmann }; 444555c8a86SDaniel Borkmann struct perf_raw_record raw = { 445555c8a86SDaniel Borkmann .frag = { 446183fc153SAndrew Morton { 447555c8a86SDaniel Borkmann .next = ctx_size ? &frag : NULL, 448183fc153SAndrew Morton }, 449555c8a86SDaniel Borkmann .size = meta_size, 450555c8a86SDaniel Borkmann .data = meta, 451555c8a86SDaniel Borkmann }, 452555c8a86SDaniel Borkmann }; 453bd570ff9SDaniel Borkmann 454bd570ff9SDaniel Borkmann perf_fetch_caller_regs(regs); 455283ca526SDaniel Borkmann perf_sample_data_init(sd, 0, 0); 456283ca526SDaniel Borkmann sd->raw = &raw; 457bd570ff9SDaniel Borkmann 458283ca526SDaniel Borkmann return __bpf_perf_event_output(regs, map, flags, sd); 459bd570ff9SDaniel Borkmann } 460bd570ff9SDaniel Borkmann 461f3694e00SDaniel Borkmann BPF_CALL_0(bpf_get_current_task) 462606274c5SAlexei Starovoitov { 463606274c5SAlexei Starovoitov return (long) current; 464606274c5SAlexei Starovoitov } 465606274c5SAlexei Starovoitov 466606274c5SAlexei Starovoitov static const struct bpf_func_proto bpf_get_current_task_proto = { 467606274c5SAlexei Starovoitov .func = bpf_get_current_task, 468606274c5SAlexei Starovoitov .gpl_only = true, 469606274c5SAlexei Starovoitov .ret_type = RET_INTEGER, 470606274c5SAlexei Starovoitov }; 471606274c5SAlexei Starovoitov 472f3694e00SDaniel Borkmann BPF_CALL_2(bpf_current_task_under_cgroup, struct bpf_map *, map, u32, idx) 47360d20f91SSargun Dhillon { 47460d20f91SSargun Dhillon struct bpf_array *array = container_of(map, struct bpf_array, map); 47560d20f91SSargun Dhillon struct cgroup *cgrp; 47660d20f91SSargun Dhillon 47760d20f91SSargun Dhillon if (unlikely(in_interrupt())) 47860d20f91SSargun Dhillon return -EINVAL; 47960d20f91SSargun Dhillon if (unlikely(idx >= array->map.max_entries)) 48060d20f91SSargun Dhillon return -E2BIG; 48160d20f91SSargun Dhillon 48260d20f91SSargun Dhillon cgrp = READ_ONCE(array->ptrs[idx]); 48360d20f91SSargun Dhillon if (unlikely(!cgrp)) 48460d20f91SSargun Dhillon return -EAGAIN; 48560d20f91SSargun Dhillon 48660d20f91SSargun Dhillon return task_under_cgroup_hierarchy(current, cgrp); 48760d20f91SSargun Dhillon } 48860d20f91SSargun Dhillon 48960d20f91SSargun Dhillon static const struct bpf_func_proto bpf_current_task_under_cgroup_proto = { 49060d20f91SSargun Dhillon .func = bpf_current_task_under_cgroup, 49160d20f91SSargun Dhillon .gpl_only = false, 49260d20f91SSargun Dhillon .ret_type = RET_INTEGER, 49360d20f91SSargun Dhillon .arg1_type = ARG_CONST_MAP_PTR, 49460d20f91SSargun Dhillon .arg2_type = ARG_ANYTHING, 49560d20f91SSargun Dhillon }; 49660d20f91SSargun Dhillon 497a5e8c070SGianluca Borello BPF_CALL_3(bpf_probe_read_str, void *, dst, u32, size, 498a5e8c070SGianluca Borello const void *, unsafe_ptr) 499a5e8c070SGianluca Borello { 500a5e8c070SGianluca Borello int ret; 501a5e8c070SGianluca Borello 502a5e8c070SGianluca Borello /* 503a5e8c070SGianluca Borello * The strncpy_from_unsafe() call will likely not fill the entire 504a5e8c070SGianluca Borello * buffer, but that's okay in this circumstance as we're probing 505a5e8c070SGianluca Borello * arbitrary memory anyway similar to bpf_probe_read() and might 506a5e8c070SGianluca Borello * as well probe the stack. Thus, memory is explicitly cleared 507a5e8c070SGianluca Borello * only in error case, so that improper users ignoring return 508a5e8c070SGianluca Borello * code altogether don't copy garbage; otherwise length of string 509a5e8c070SGianluca Borello * is returned that can be used for bpf_perf_event_output() et al. 510a5e8c070SGianluca Borello */ 511a5e8c070SGianluca Borello ret = strncpy_from_unsafe(dst, unsafe_ptr, size); 512a5e8c070SGianluca Borello if (unlikely(ret < 0)) 513a5e8c070SGianluca Borello memset(dst, 0, size); 514a5e8c070SGianluca Borello 515a5e8c070SGianluca Borello return ret; 516a5e8c070SGianluca Borello } 517a5e8c070SGianluca Borello 518a5e8c070SGianluca Borello static const struct bpf_func_proto bpf_probe_read_str_proto = { 519a5e8c070SGianluca Borello .func = bpf_probe_read_str, 520a5e8c070SGianluca Borello .gpl_only = true, 521a5e8c070SGianluca Borello .ret_type = RET_INTEGER, 522a5e8c070SGianluca Borello .arg1_type = ARG_PTR_TO_UNINIT_MEM, 5235c4e1201SGianluca Borello .arg2_type = ARG_CONST_SIZE_OR_ZERO, 524a5e8c070SGianluca Borello .arg3_type = ARG_ANYTHING, 525a5e8c070SGianluca Borello }; 526a5e8c070SGianluca Borello 5279fd82b61SAlexei Starovoitov static const struct bpf_func_proto *tracing_func_proto(enum bpf_func_id func_id) 5282541517cSAlexei Starovoitov { 5292541517cSAlexei Starovoitov switch (func_id) { 5302541517cSAlexei Starovoitov case BPF_FUNC_map_lookup_elem: 5312541517cSAlexei Starovoitov return &bpf_map_lookup_elem_proto; 5322541517cSAlexei Starovoitov case BPF_FUNC_map_update_elem: 5332541517cSAlexei Starovoitov return &bpf_map_update_elem_proto; 5342541517cSAlexei Starovoitov case BPF_FUNC_map_delete_elem: 5352541517cSAlexei Starovoitov return &bpf_map_delete_elem_proto; 5362541517cSAlexei Starovoitov case BPF_FUNC_probe_read: 5372541517cSAlexei Starovoitov return &bpf_probe_read_proto; 538d9847d31SAlexei Starovoitov case BPF_FUNC_ktime_get_ns: 539d9847d31SAlexei Starovoitov return &bpf_ktime_get_ns_proto; 54004fd61abSAlexei Starovoitov case BPF_FUNC_tail_call: 54104fd61abSAlexei Starovoitov return &bpf_tail_call_proto; 542ffeedafbSAlexei Starovoitov case BPF_FUNC_get_current_pid_tgid: 543ffeedafbSAlexei Starovoitov return &bpf_get_current_pid_tgid_proto; 544606274c5SAlexei Starovoitov case BPF_FUNC_get_current_task: 545606274c5SAlexei Starovoitov return &bpf_get_current_task_proto; 546ffeedafbSAlexei Starovoitov case BPF_FUNC_get_current_uid_gid: 547ffeedafbSAlexei Starovoitov return &bpf_get_current_uid_gid_proto; 548ffeedafbSAlexei Starovoitov case BPF_FUNC_get_current_comm: 549ffeedafbSAlexei Starovoitov return &bpf_get_current_comm_proto; 5509c959c86SAlexei Starovoitov case BPF_FUNC_trace_printk: 5510756ea3eSAlexei Starovoitov return bpf_get_trace_printk_proto(); 552ab1973d3SAlexei Starovoitov case BPF_FUNC_get_smp_processor_id: 553ab1973d3SAlexei Starovoitov return &bpf_get_smp_processor_id_proto; 5542d0e30c3SDaniel Borkmann case BPF_FUNC_get_numa_node_id: 5552d0e30c3SDaniel Borkmann return &bpf_get_numa_node_id_proto; 55635578d79SKaixu Xia case BPF_FUNC_perf_event_read: 55735578d79SKaixu Xia return &bpf_perf_event_read_proto; 55896ae5227SSargun Dhillon case BPF_FUNC_probe_write_user: 55996ae5227SSargun Dhillon return bpf_get_probe_write_proto(); 56060d20f91SSargun Dhillon case BPF_FUNC_current_task_under_cgroup: 56160d20f91SSargun Dhillon return &bpf_current_task_under_cgroup_proto; 5628937bd80SAlexei Starovoitov case BPF_FUNC_get_prandom_u32: 5638937bd80SAlexei Starovoitov return &bpf_get_prandom_u32_proto; 564a5e8c070SGianluca Borello case BPF_FUNC_probe_read_str: 565a5e8c070SGianluca Borello return &bpf_probe_read_str_proto; 5669fd82b61SAlexei Starovoitov default: 5679fd82b61SAlexei Starovoitov return NULL; 5689fd82b61SAlexei Starovoitov } 5699fd82b61SAlexei Starovoitov } 5709fd82b61SAlexei Starovoitov 5719fd82b61SAlexei Starovoitov static const struct bpf_func_proto *kprobe_prog_func_proto(enum bpf_func_id func_id) 5729fd82b61SAlexei Starovoitov { 5739fd82b61SAlexei Starovoitov switch (func_id) { 574a43eec30SAlexei Starovoitov case BPF_FUNC_perf_event_output: 575a43eec30SAlexei Starovoitov return &bpf_perf_event_output_proto; 576d5a3b1f6SAlexei Starovoitov case BPF_FUNC_get_stackid: 577d5a3b1f6SAlexei Starovoitov return &bpf_get_stackid_proto; 578908432caSYonghong Song case BPF_FUNC_perf_event_read_value: 579908432caSYonghong Song return &bpf_perf_event_read_value_proto; 5809802d865SJosef Bacik #ifdef CONFIG_BPF_KPROBE_OVERRIDE 5819802d865SJosef Bacik case BPF_FUNC_override_return: 5829802d865SJosef Bacik return &bpf_override_return_proto; 5839802d865SJosef Bacik #endif 5842541517cSAlexei Starovoitov default: 5859fd82b61SAlexei Starovoitov return tracing_func_proto(func_id); 5862541517cSAlexei Starovoitov } 5872541517cSAlexei Starovoitov } 5882541517cSAlexei Starovoitov 5892541517cSAlexei Starovoitov /* bpf+kprobe programs can access fields of 'struct pt_regs' */ 59019de99f7SAlexei Starovoitov static bool kprobe_prog_is_valid_access(int off, int size, enum bpf_access_type type, 59123994631SYonghong Song struct bpf_insn_access_aux *info) 5922541517cSAlexei Starovoitov { 5932541517cSAlexei Starovoitov if (off < 0 || off >= sizeof(struct pt_regs)) 5942541517cSAlexei Starovoitov return false; 5952541517cSAlexei Starovoitov if (type != BPF_READ) 5962541517cSAlexei Starovoitov return false; 5972541517cSAlexei Starovoitov if (off % size != 0) 5982541517cSAlexei Starovoitov return false; 5992d071c64SDaniel Borkmann /* 6002d071c64SDaniel Borkmann * Assertion for 32 bit to make sure last 8 byte access 6012d071c64SDaniel Borkmann * (BPF_DW) to the last 4 byte member is disallowed. 6022d071c64SDaniel Borkmann */ 6032d071c64SDaniel Borkmann if (off + size > sizeof(struct pt_regs)) 6042d071c64SDaniel Borkmann return false; 6052d071c64SDaniel Borkmann 6062541517cSAlexei Starovoitov return true; 6072541517cSAlexei Starovoitov } 6082541517cSAlexei Starovoitov 6097de16e3aSJakub Kicinski const struct bpf_verifier_ops kprobe_verifier_ops = { 6102541517cSAlexei Starovoitov .get_func_proto = kprobe_prog_func_proto, 6112541517cSAlexei Starovoitov .is_valid_access = kprobe_prog_is_valid_access, 6122541517cSAlexei Starovoitov }; 6132541517cSAlexei Starovoitov 6147de16e3aSJakub Kicinski const struct bpf_prog_ops kprobe_prog_ops = { 6157de16e3aSJakub Kicinski }; 6167de16e3aSJakub Kicinski 617f3694e00SDaniel Borkmann BPF_CALL_5(bpf_perf_event_output_tp, void *, tp_buff, struct bpf_map *, map, 618f3694e00SDaniel Borkmann u64, flags, void *, data, u64, size) 6199940d67cSAlexei Starovoitov { 620f3694e00SDaniel Borkmann struct pt_regs *regs = *(struct pt_regs **)tp_buff; 621f3694e00SDaniel Borkmann 6229940d67cSAlexei Starovoitov /* 6239940d67cSAlexei Starovoitov * r1 points to perf tracepoint buffer where first 8 bytes are hidden 6249940d67cSAlexei Starovoitov * from bpf program and contain a pointer to 'struct pt_regs'. Fetch it 625f3694e00SDaniel Borkmann * from there and call the same bpf_perf_event_output() helper inline. 6269940d67cSAlexei Starovoitov */ 627f3694e00SDaniel Borkmann return ____bpf_perf_event_output(regs, map, flags, data, size); 6289940d67cSAlexei Starovoitov } 6299940d67cSAlexei Starovoitov 6309940d67cSAlexei Starovoitov static const struct bpf_func_proto bpf_perf_event_output_proto_tp = { 6319940d67cSAlexei Starovoitov .func = bpf_perf_event_output_tp, 6329940d67cSAlexei Starovoitov .gpl_only = true, 6339940d67cSAlexei Starovoitov .ret_type = RET_INTEGER, 6349940d67cSAlexei Starovoitov .arg1_type = ARG_PTR_TO_CTX, 6359940d67cSAlexei Starovoitov .arg2_type = ARG_CONST_MAP_PTR, 6369940d67cSAlexei Starovoitov .arg3_type = ARG_ANYTHING, 63739f19ebbSAlexei Starovoitov .arg4_type = ARG_PTR_TO_MEM, 638a60dd35dSGianluca Borello .arg5_type = ARG_CONST_SIZE_OR_ZERO, 6399940d67cSAlexei Starovoitov }; 6409940d67cSAlexei Starovoitov 641f3694e00SDaniel Borkmann BPF_CALL_3(bpf_get_stackid_tp, void *, tp_buff, struct bpf_map *, map, 642f3694e00SDaniel Borkmann u64, flags) 6439940d67cSAlexei Starovoitov { 644f3694e00SDaniel Borkmann struct pt_regs *regs = *(struct pt_regs **)tp_buff; 6459940d67cSAlexei Starovoitov 646f3694e00SDaniel Borkmann /* 647f3694e00SDaniel Borkmann * Same comment as in bpf_perf_event_output_tp(), only that this time 648f3694e00SDaniel Borkmann * the other helper's function body cannot be inlined due to being 649f3694e00SDaniel Borkmann * external, thus we need to call raw helper function. 650f3694e00SDaniel Borkmann */ 651f3694e00SDaniel Borkmann return bpf_get_stackid((unsigned long) regs, (unsigned long) map, 652f3694e00SDaniel Borkmann flags, 0, 0); 6539940d67cSAlexei Starovoitov } 6549940d67cSAlexei Starovoitov 6559940d67cSAlexei Starovoitov static const struct bpf_func_proto bpf_get_stackid_proto_tp = { 6569940d67cSAlexei Starovoitov .func = bpf_get_stackid_tp, 6579940d67cSAlexei Starovoitov .gpl_only = true, 6589940d67cSAlexei Starovoitov .ret_type = RET_INTEGER, 6599940d67cSAlexei Starovoitov .arg1_type = ARG_PTR_TO_CTX, 6609940d67cSAlexei Starovoitov .arg2_type = ARG_CONST_MAP_PTR, 6619940d67cSAlexei Starovoitov .arg3_type = ARG_ANYTHING, 6629940d67cSAlexei Starovoitov }; 6639940d67cSAlexei Starovoitov 6644bebdc7aSYonghong Song BPF_CALL_3(bpf_perf_prog_read_value_tp, struct bpf_perf_event_data_kern *, ctx, 6654bebdc7aSYonghong Song struct bpf_perf_event_value *, buf, u32, size) 6664bebdc7aSYonghong Song { 6674bebdc7aSYonghong Song int err = -EINVAL; 6684bebdc7aSYonghong Song 6694bebdc7aSYonghong Song if (unlikely(size != sizeof(struct bpf_perf_event_value))) 6704bebdc7aSYonghong Song goto clear; 6714bebdc7aSYonghong Song err = perf_event_read_local(ctx->event, &buf->counter, &buf->enabled, 6724bebdc7aSYonghong Song &buf->running); 6734bebdc7aSYonghong Song if (unlikely(err)) 6744bebdc7aSYonghong Song goto clear; 6754bebdc7aSYonghong Song return 0; 6764bebdc7aSYonghong Song clear: 6774bebdc7aSYonghong Song memset(buf, 0, size); 6784bebdc7aSYonghong Song return err; 6794bebdc7aSYonghong Song } 6804bebdc7aSYonghong Song 6814bebdc7aSYonghong Song static const struct bpf_func_proto bpf_perf_prog_read_value_proto_tp = { 6824bebdc7aSYonghong Song .func = bpf_perf_prog_read_value_tp, 6834bebdc7aSYonghong Song .gpl_only = true, 6844bebdc7aSYonghong Song .ret_type = RET_INTEGER, 6854bebdc7aSYonghong Song .arg1_type = ARG_PTR_TO_CTX, 6864bebdc7aSYonghong Song .arg2_type = ARG_PTR_TO_UNINIT_MEM, 6874bebdc7aSYonghong Song .arg3_type = ARG_CONST_SIZE, 6884bebdc7aSYonghong Song }; 6894bebdc7aSYonghong Song 6909fd82b61SAlexei Starovoitov static const struct bpf_func_proto *tp_prog_func_proto(enum bpf_func_id func_id) 6919fd82b61SAlexei Starovoitov { 6929fd82b61SAlexei Starovoitov switch (func_id) { 6939fd82b61SAlexei Starovoitov case BPF_FUNC_perf_event_output: 6949940d67cSAlexei Starovoitov return &bpf_perf_event_output_proto_tp; 6959fd82b61SAlexei Starovoitov case BPF_FUNC_get_stackid: 6969940d67cSAlexei Starovoitov return &bpf_get_stackid_proto_tp; 6974bebdc7aSYonghong Song case BPF_FUNC_perf_prog_read_value: 6984bebdc7aSYonghong Song return &bpf_perf_prog_read_value_proto_tp; 6999fd82b61SAlexei Starovoitov default: 7009fd82b61SAlexei Starovoitov return tracing_func_proto(func_id); 7019fd82b61SAlexei Starovoitov } 7029fd82b61SAlexei Starovoitov } 7039fd82b61SAlexei Starovoitov 70419de99f7SAlexei Starovoitov static bool tp_prog_is_valid_access(int off, int size, enum bpf_access_type type, 70523994631SYonghong Song struct bpf_insn_access_aux *info) 7069fd82b61SAlexei Starovoitov { 7079fd82b61SAlexei Starovoitov if (off < sizeof(void *) || off >= PERF_MAX_TRACE_SIZE) 7089fd82b61SAlexei Starovoitov return false; 7099fd82b61SAlexei Starovoitov if (type != BPF_READ) 7109fd82b61SAlexei Starovoitov return false; 7119fd82b61SAlexei Starovoitov if (off % size != 0) 7129fd82b61SAlexei Starovoitov return false; 7132d071c64SDaniel Borkmann 7142d071c64SDaniel Borkmann BUILD_BUG_ON(PERF_MAX_TRACE_SIZE % sizeof(__u64)); 7159fd82b61SAlexei Starovoitov return true; 7169fd82b61SAlexei Starovoitov } 7179fd82b61SAlexei Starovoitov 7187de16e3aSJakub Kicinski const struct bpf_verifier_ops tracepoint_verifier_ops = { 7199fd82b61SAlexei Starovoitov .get_func_proto = tp_prog_func_proto, 7209fd82b61SAlexei Starovoitov .is_valid_access = tp_prog_is_valid_access, 7219fd82b61SAlexei Starovoitov }; 7229fd82b61SAlexei Starovoitov 7237de16e3aSJakub Kicinski const struct bpf_prog_ops tracepoint_prog_ops = { 7247de16e3aSJakub Kicinski }; 7257de16e3aSJakub Kicinski 7260515e599SAlexei Starovoitov static bool pe_prog_is_valid_access(int off, int size, enum bpf_access_type type, 72723994631SYonghong Song struct bpf_insn_access_aux *info) 7280515e599SAlexei Starovoitov { 729f96da094SDaniel Borkmann const int size_sp = FIELD_SIZEOF(struct bpf_perf_event_data, 730f96da094SDaniel Borkmann sample_period); 73131fd8581SYonghong Song 7320515e599SAlexei Starovoitov if (off < 0 || off >= sizeof(struct bpf_perf_event_data)) 7330515e599SAlexei Starovoitov return false; 7340515e599SAlexei Starovoitov if (type != BPF_READ) 7350515e599SAlexei Starovoitov return false; 7360515e599SAlexei Starovoitov if (off % size != 0) 7370515e599SAlexei Starovoitov return false; 73831fd8581SYonghong Song 739f96da094SDaniel Borkmann switch (off) { 740f96da094SDaniel Borkmann case bpf_ctx_range(struct bpf_perf_event_data, sample_period): 741f96da094SDaniel Borkmann bpf_ctx_record_field_size(info, size_sp); 742f96da094SDaniel Borkmann if (!bpf_ctx_narrow_access_ok(off, size, size_sp)) 74323994631SYonghong Song return false; 744f96da094SDaniel Borkmann break; 745f96da094SDaniel Borkmann default: 7460515e599SAlexei Starovoitov if (size != sizeof(long)) 7470515e599SAlexei Starovoitov return false; 7480515e599SAlexei Starovoitov } 749f96da094SDaniel Borkmann 7500515e599SAlexei Starovoitov return true; 7510515e599SAlexei Starovoitov } 7520515e599SAlexei Starovoitov 7536b8cc1d1SDaniel Borkmann static u32 pe_prog_convert_ctx_access(enum bpf_access_type type, 7546b8cc1d1SDaniel Borkmann const struct bpf_insn *si, 7550515e599SAlexei Starovoitov struct bpf_insn *insn_buf, 756f96da094SDaniel Borkmann struct bpf_prog *prog, u32 *target_size) 7570515e599SAlexei Starovoitov { 7580515e599SAlexei Starovoitov struct bpf_insn *insn = insn_buf; 7590515e599SAlexei Starovoitov 7606b8cc1d1SDaniel Borkmann switch (si->off) { 7610515e599SAlexei Starovoitov case offsetof(struct bpf_perf_event_data, sample_period): 762f035a515SDaniel Borkmann *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct bpf_perf_event_data_kern, 7636b8cc1d1SDaniel Borkmann data), si->dst_reg, si->src_reg, 7640515e599SAlexei Starovoitov offsetof(struct bpf_perf_event_data_kern, data)); 7656b8cc1d1SDaniel Borkmann *insn++ = BPF_LDX_MEM(BPF_DW, si->dst_reg, si->dst_reg, 766f96da094SDaniel Borkmann bpf_target_off(struct perf_sample_data, period, 8, 767f96da094SDaniel Borkmann target_size)); 7680515e599SAlexei Starovoitov break; 7690515e599SAlexei Starovoitov default: 770f035a515SDaniel Borkmann *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct bpf_perf_event_data_kern, 7716b8cc1d1SDaniel Borkmann regs), si->dst_reg, si->src_reg, 7720515e599SAlexei Starovoitov offsetof(struct bpf_perf_event_data_kern, regs)); 7736b8cc1d1SDaniel Borkmann *insn++ = BPF_LDX_MEM(BPF_SIZEOF(long), si->dst_reg, si->dst_reg, 7746b8cc1d1SDaniel Borkmann si->off); 7750515e599SAlexei Starovoitov break; 7760515e599SAlexei Starovoitov } 7770515e599SAlexei Starovoitov 7780515e599SAlexei Starovoitov return insn - insn_buf; 7790515e599SAlexei Starovoitov } 7800515e599SAlexei Starovoitov 7817de16e3aSJakub Kicinski const struct bpf_verifier_ops perf_event_verifier_ops = { 7820515e599SAlexei Starovoitov .get_func_proto = tp_prog_func_proto, 7830515e599SAlexei Starovoitov .is_valid_access = pe_prog_is_valid_access, 7840515e599SAlexei Starovoitov .convert_ctx_access = pe_prog_convert_ctx_access, 7850515e599SAlexei Starovoitov }; 7867de16e3aSJakub Kicinski 7877de16e3aSJakub Kicinski const struct bpf_prog_ops perf_event_prog_ops = { 7887de16e3aSJakub Kicinski }; 789e87c6bc3SYonghong Song 790e87c6bc3SYonghong Song static DEFINE_MUTEX(bpf_event_mutex); 791e87c6bc3SYonghong Song 792c8c088baSYonghong Song #define BPF_TRACE_MAX_PROGS 64 793c8c088baSYonghong Song 794e87c6bc3SYonghong Song int perf_event_attach_bpf_prog(struct perf_event *event, 795e87c6bc3SYonghong Song struct bpf_prog *prog) 796e87c6bc3SYonghong Song { 797e87c6bc3SYonghong Song struct bpf_prog_array __rcu *old_array; 798e87c6bc3SYonghong Song struct bpf_prog_array *new_array; 799e87c6bc3SYonghong Song int ret = -EEXIST; 800e87c6bc3SYonghong Song 8019802d865SJosef Bacik /* 802b4da3340SMasami Hiramatsu * Kprobe override only works if they are on the function entry, 803b4da3340SMasami Hiramatsu * and only if they are on the opt-in list. 8049802d865SJosef Bacik */ 8059802d865SJosef Bacik if (prog->kprobe_override && 806b4da3340SMasami Hiramatsu (!trace_kprobe_on_func_entry(event->tp_event) || 8079802d865SJosef Bacik !trace_kprobe_error_injectable(event->tp_event))) 8089802d865SJosef Bacik return -EINVAL; 8099802d865SJosef Bacik 810e87c6bc3SYonghong Song mutex_lock(&bpf_event_mutex); 811e87c6bc3SYonghong Song 812e87c6bc3SYonghong Song if (event->prog) 81307c41a29SYonghong Song goto unlock; 814e87c6bc3SYonghong Song 81507c41a29SYonghong Song old_array = event->tp_event->prog_array; 816c8c088baSYonghong Song if (old_array && 817c8c088baSYonghong Song bpf_prog_array_length(old_array) >= BPF_TRACE_MAX_PROGS) { 818c8c088baSYonghong Song ret = -E2BIG; 819c8c088baSYonghong Song goto unlock; 820c8c088baSYonghong Song } 821c8c088baSYonghong Song 822e87c6bc3SYonghong Song ret = bpf_prog_array_copy(old_array, NULL, prog, &new_array); 823e87c6bc3SYonghong Song if (ret < 0) 82407c41a29SYonghong Song goto unlock; 825e87c6bc3SYonghong Song 826e87c6bc3SYonghong Song /* set the new array to event->tp_event and set event->prog */ 827e87c6bc3SYonghong Song event->prog = prog; 828e87c6bc3SYonghong Song rcu_assign_pointer(event->tp_event->prog_array, new_array); 829e87c6bc3SYonghong Song bpf_prog_array_free(old_array); 830e87c6bc3SYonghong Song 83107c41a29SYonghong Song unlock: 832e87c6bc3SYonghong Song mutex_unlock(&bpf_event_mutex); 833e87c6bc3SYonghong Song return ret; 834e87c6bc3SYonghong Song } 835e87c6bc3SYonghong Song 836e87c6bc3SYonghong Song void perf_event_detach_bpf_prog(struct perf_event *event) 837e87c6bc3SYonghong Song { 838e87c6bc3SYonghong Song struct bpf_prog_array __rcu *old_array; 839e87c6bc3SYonghong Song struct bpf_prog_array *new_array; 840e87c6bc3SYonghong Song int ret; 841e87c6bc3SYonghong Song 842e87c6bc3SYonghong Song mutex_lock(&bpf_event_mutex); 843e87c6bc3SYonghong Song 844e87c6bc3SYonghong Song if (!event->prog) 84507c41a29SYonghong Song goto unlock; 846e87c6bc3SYonghong Song 84707c41a29SYonghong Song old_array = event->tp_event->prog_array; 848e87c6bc3SYonghong Song ret = bpf_prog_array_copy(old_array, event->prog, NULL, &new_array); 849e87c6bc3SYonghong Song if (ret < 0) { 850e87c6bc3SYonghong Song bpf_prog_array_delete_safe(old_array, event->prog); 851e87c6bc3SYonghong Song } else { 852e87c6bc3SYonghong Song rcu_assign_pointer(event->tp_event->prog_array, new_array); 853e87c6bc3SYonghong Song bpf_prog_array_free(old_array); 854e87c6bc3SYonghong Song } 855e87c6bc3SYonghong Song 856e87c6bc3SYonghong Song bpf_prog_put(event->prog); 857e87c6bc3SYonghong Song event->prog = NULL; 858e87c6bc3SYonghong Song 85907c41a29SYonghong Song unlock: 860e87c6bc3SYonghong Song mutex_unlock(&bpf_event_mutex); 861e87c6bc3SYonghong Song } 862f371b304SYonghong Song 863f4e2298eSYonghong Song int perf_event_query_prog_array(struct perf_event *event, void __user *info) 864f371b304SYonghong Song { 865f371b304SYonghong Song struct perf_event_query_bpf __user *uquery = info; 866f371b304SYonghong Song struct perf_event_query_bpf query = {}; 867f371b304SYonghong Song int ret; 868f371b304SYonghong Song 869f371b304SYonghong Song if (!capable(CAP_SYS_ADMIN)) 870f371b304SYonghong Song return -EPERM; 871f371b304SYonghong Song if (event->attr.type != PERF_TYPE_TRACEPOINT) 872f371b304SYonghong Song return -EINVAL; 873f371b304SYonghong Song if (copy_from_user(&query, uquery, sizeof(query))) 874f371b304SYonghong Song return -EFAULT; 875f371b304SYonghong Song 876f371b304SYonghong Song mutex_lock(&bpf_event_mutex); 877f371b304SYonghong Song ret = bpf_prog_array_copy_info(event->tp_event->prog_array, 878f371b304SYonghong Song uquery->ids, 879f371b304SYonghong Song query.ids_len, 880f371b304SYonghong Song &uquery->prog_cnt); 881f371b304SYonghong Song mutex_unlock(&bpf_event_mutex); 882f371b304SYonghong Song 883f371b304SYonghong Song return ret; 884f371b304SYonghong Song } 885