1 // SPDX-License-Identifier: GPL-2.0 2 /* Copyright (c) 2011-2015 PLUMgrid, http://plumgrid.com 3 * Copyright (c) 2016 Facebook 4 */ 5 #include <linux/kernel.h> 6 #include <linux/types.h> 7 #include <linux/slab.h> 8 #include <linux/bpf.h> 9 #include <linux/bpf_perf_event.h> 10 #include <linux/filter.h> 11 #include <linux/uaccess.h> 12 #include <linux/ctype.h> 13 #include <linux/kprobes.h> 14 #include <linux/syscalls.h> 15 #include <linux/error-injection.h> 16 17 #include <asm/tlb.h> 18 19 #include "trace_probe.h" 20 #include "trace.h" 21 22 #define bpf_event_rcu_dereference(p) \ 23 rcu_dereference_protected(p, lockdep_is_held(&bpf_event_mutex)) 24 25 #ifdef CONFIG_MODULES 26 struct bpf_trace_module { 27 struct module *module; 28 struct list_head list; 29 }; 30 31 static LIST_HEAD(bpf_trace_modules); 32 static DEFINE_MUTEX(bpf_module_mutex); 33 34 static struct bpf_raw_event_map *bpf_get_raw_tracepoint_module(const char *name) 35 { 36 struct bpf_raw_event_map *btp, *ret = NULL; 37 struct bpf_trace_module *btm; 38 unsigned int i; 39 40 mutex_lock(&bpf_module_mutex); 41 list_for_each_entry(btm, &bpf_trace_modules, list) { 42 for (i = 0; i < btm->module->num_bpf_raw_events; ++i) { 43 btp = &btm->module->bpf_raw_events[i]; 44 if (!strcmp(btp->tp->name, name)) { 45 if (try_module_get(btm->module)) 46 ret = btp; 47 goto out; 48 } 49 } 50 } 51 out: 52 mutex_unlock(&bpf_module_mutex); 53 return ret; 54 } 55 #else 56 static struct bpf_raw_event_map *bpf_get_raw_tracepoint_module(const char *name) 57 { 58 return NULL; 59 } 60 #endif /* CONFIG_MODULES */ 61 62 u64 bpf_get_stackid(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5); 63 u64 bpf_get_stack(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5); 64 65 /** 66 * trace_call_bpf - invoke BPF program 67 * @call: tracepoint event 68 * @ctx: opaque context pointer 69 * 70 * kprobe handlers execute BPF programs via this helper. 71 * Can be used from static tracepoints in the future. 72 * 73 * Return: BPF programs always return an integer which is interpreted by 74 * kprobe handler as: 75 * 0 - return from kprobe (event is filtered out) 76 * 1 - store kprobe event into ring buffer 77 * Other values are reserved and currently alias to 1 78 */ 79 unsigned int trace_call_bpf(struct trace_event_call *call, void *ctx) 80 { 81 unsigned int ret; 82 83 if (in_nmi()) /* not supported yet */ 84 return 1; 85 86 preempt_disable(); 87 88 if (unlikely(__this_cpu_inc_return(bpf_prog_active) != 1)) { 89 /* 90 * since some bpf program is already running on this cpu, 91 * don't call into another bpf program (same or different) 92 * and don't send kprobe event into ring-buffer, 93 * so return zero here 94 */ 95 ret = 0; 96 goto out; 97 } 98 99 /* 100 * Instead of moving rcu_read_lock/rcu_dereference/rcu_read_unlock 101 * to all call sites, we did a bpf_prog_array_valid() there to check 102 * whether call->prog_array is empty or not, which is 103 * a heurisitc to speed up execution. 104 * 105 * If bpf_prog_array_valid() fetched prog_array was 106 * non-NULL, we go into trace_call_bpf() and do the actual 107 * proper rcu_dereference() under RCU lock. 108 * If it turns out that prog_array is NULL then, we bail out. 109 * For the opposite, if the bpf_prog_array_valid() fetched pointer 110 * was NULL, you'll skip the prog_array with the risk of missing 111 * out of events when it was updated in between this and the 112 * rcu_dereference() which is accepted risk. 113 */ 114 ret = BPF_PROG_RUN_ARRAY_CHECK(call->prog_array, ctx, BPF_PROG_RUN); 115 116 out: 117 __this_cpu_dec(bpf_prog_active); 118 preempt_enable(); 119 120 return ret; 121 } 122 EXPORT_SYMBOL_GPL(trace_call_bpf); 123 124 #ifdef CONFIG_BPF_KPROBE_OVERRIDE 125 BPF_CALL_2(bpf_override_return, struct pt_regs *, regs, unsigned long, rc) 126 { 127 regs_set_return_value(regs, rc); 128 override_function_with_return(regs); 129 return 0; 130 } 131 132 static const struct bpf_func_proto bpf_override_return_proto = { 133 .func = bpf_override_return, 134 .gpl_only = true, 135 .ret_type = RET_INTEGER, 136 .arg1_type = ARG_PTR_TO_CTX, 137 .arg2_type = ARG_ANYTHING, 138 }; 139 #endif 140 141 BPF_CALL_3(bpf_probe_read, void *, dst, u32, size, const void *, unsafe_ptr) 142 { 143 int ret; 144 145 ret = security_locked_down(LOCKDOWN_BPF_READ); 146 if (ret < 0) 147 goto out; 148 149 ret = probe_kernel_read(dst, unsafe_ptr, size); 150 if (unlikely(ret < 0)) 151 out: 152 memset(dst, 0, size); 153 154 return ret; 155 } 156 157 static const struct bpf_func_proto bpf_probe_read_proto = { 158 .func = bpf_probe_read, 159 .gpl_only = true, 160 .ret_type = RET_INTEGER, 161 .arg1_type = ARG_PTR_TO_UNINIT_MEM, 162 .arg2_type = ARG_CONST_SIZE_OR_ZERO, 163 .arg3_type = ARG_ANYTHING, 164 }; 165 166 BPF_CALL_3(bpf_probe_write_user, void *, unsafe_ptr, const void *, src, 167 u32, size) 168 { 169 /* 170 * Ensure we're in user context which is safe for the helper to 171 * run. This helper has no business in a kthread. 172 * 173 * access_ok() should prevent writing to non-user memory, but in 174 * some situations (nommu, temporary switch, etc) access_ok() does 175 * not provide enough validation, hence the check on KERNEL_DS. 176 * 177 * nmi_uaccess_okay() ensures the probe is not run in an interim 178 * state, when the task or mm are switched. This is specifically 179 * required to prevent the use of temporary mm. 180 */ 181 182 if (unlikely(in_interrupt() || 183 current->flags & (PF_KTHREAD | PF_EXITING))) 184 return -EPERM; 185 if (unlikely(uaccess_kernel())) 186 return -EPERM; 187 if (unlikely(!nmi_uaccess_okay())) 188 return -EPERM; 189 if (!access_ok(unsafe_ptr, size)) 190 return -EPERM; 191 192 return probe_kernel_write(unsafe_ptr, src, size); 193 } 194 195 static const struct bpf_func_proto bpf_probe_write_user_proto = { 196 .func = bpf_probe_write_user, 197 .gpl_only = true, 198 .ret_type = RET_INTEGER, 199 .arg1_type = ARG_ANYTHING, 200 .arg2_type = ARG_PTR_TO_MEM, 201 .arg3_type = ARG_CONST_SIZE, 202 }; 203 204 static const struct bpf_func_proto *bpf_get_probe_write_proto(void) 205 { 206 pr_warn_ratelimited("%s[%d] is installing a program with bpf_probe_write_user helper that may corrupt user memory!", 207 current->comm, task_pid_nr(current)); 208 209 return &bpf_probe_write_user_proto; 210 } 211 212 /* 213 * Only limited trace_printk() conversion specifiers allowed: 214 * %d %i %u %x %ld %li %lu %lx %lld %lli %llu %llx %p %s 215 */ 216 BPF_CALL_5(bpf_trace_printk, char *, fmt, u32, fmt_size, u64, arg1, 217 u64, arg2, u64, arg3) 218 { 219 bool str_seen = false; 220 int mod[3] = {}; 221 int fmt_cnt = 0; 222 u64 unsafe_addr; 223 char buf[64]; 224 int i; 225 226 /* 227 * bpf_check()->check_func_arg()->check_stack_boundary() 228 * guarantees that fmt points to bpf program stack, 229 * fmt_size bytes of it were initialized and fmt_size > 0 230 */ 231 if (fmt[--fmt_size] != 0) 232 return -EINVAL; 233 234 /* check format string for allowed specifiers */ 235 for (i = 0; i < fmt_size; i++) { 236 if ((!isprint(fmt[i]) && !isspace(fmt[i])) || !isascii(fmt[i])) 237 return -EINVAL; 238 239 if (fmt[i] != '%') 240 continue; 241 242 if (fmt_cnt >= 3) 243 return -EINVAL; 244 245 /* fmt[i] != 0 && fmt[last] == 0, so we can access fmt[i + 1] */ 246 i++; 247 if (fmt[i] == 'l') { 248 mod[fmt_cnt]++; 249 i++; 250 } else if (fmt[i] == 'p' || fmt[i] == 's') { 251 mod[fmt_cnt]++; 252 /* disallow any further format extensions */ 253 if (fmt[i + 1] != 0 && 254 !isspace(fmt[i + 1]) && 255 !ispunct(fmt[i + 1])) 256 return -EINVAL; 257 fmt_cnt++; 258 if (fmt[i] == 's') { 259 if (str_seen) 260 /* allow only one '%s' per fmt string */ 261 return -EINVAL; 262 str_seen = true; 263 264 switch (fmt_cnt) { 265 case 1: 266 unsafe_addr = arg1; 267 arg1 = (long) buf; 268 break; 269 case 2: 270 unsafe_addr = arg2; 271 arg2 = (long) buf; 272 break; 273 case 3: 274 unsafe_addr = arg3; 275 arg3 = (long) buf; 276 break; 277 } 278 buf[0] = 0; 279 strncpy_from_unsafe(buf, 280 (void *) (long) unsafe_addr, 281 sizeof(buf)); 282 } 283 continue; 284 } 285 286 if (fmt[i] == 'l') { 287 mod[fmt_cnt]++; 288 i++; 289 } 290 291 if (fmt[i] != 'i' && fmt[i] != 'd' && 292 fmt[i] != 'u' && fmt[i] != 'x') 293 return -EINVAL; 294 fmt_cnt++; 295 } 296 297 /* Horrid workaround for getting va_list handling working with different 298 * argument type combinations generically for 32 and 64 bit archs. 299 */ 300 #define __BPF_TP_EMIT() __BPF_ARG3_TP() 301 #define __BPF_TP(...) \ 302 __trace_printk(0 /* Fake ip */, \ 303 fmt, ##__VA_ARGS__) 304 305 #define __BPF_ARG1_TP(...) \ 306 ((mod[0] == 2 || (mod[0] == 1 && __BITS_PER_LONG == 64)) \ 307 ? __BPF_TP(arg1, ##__VA_ARGS__) \ 308 : ((mod[0] == 1 || (mod[0] == 0 && __BITS_PER_LONG == 32)) \ 309 ? __BPF_TP((long)arg1, ##__VA_ARGS__) \ 310 : __BPF_TP((u32)arg1, ##__VA_ARGS__))) 311 312 #define __BPF_ARG2_TP(...) \ 313 ((mod[1] == 2 || (mod[1] == 1 && __BITS_PER_LONG == 64)) \ 314 ? __BPF_ARG1_TP(arg2, ##__VA_ARGS__) \ 315 : ((mod[1] == 1 || (mod[1] == 0 && __BITS_PER_LONG == 32)) \ 316 ? __BPF_ARG1_TP((long)arg2, ##__VA_ARGS__) \ 317 : __BPF_ARG1_TP((u32)arg2, ##__VA_ARGS__))) 318 319 #define __BPF_ARG3_TP(...) \ 320 ((mod[2] == 2 || (mod[2] == 1 && __BITS_PER_LONG == 64)) \ 321 ? __BPF_ARG2_TP(arg3, ##__VA_ARGS__) \ 322 : ((mod[2] == 1 || (mod[2] == 0 && __BITS_PER_LONG == 32)) \ 323 ? __BPF_ARG2_TP((long)arg3, ##__VA_ARGS__) \ 324 : __BPF_ARG2_TP((u32)arg3, ##__VA_ARGS__))) 325 326 return __BPF_TP_EMIT(); 327 } 328 329 static const struct bpf_func_proto bpf_trace_printk_proto = { 330 .func = bpf_trace_printk, 331 .gpl_only = true, 332 .ret_type = RET_INTEGER, 333 .arg1_type = ARG_PTR_TO_MEM, 334 .arg2_type = ARG_CONST_SIZE, 335 }; 336 337 const struct bpf_func_proto *bpf_get_trace_printk_proto(void) 338 { 339 /* 340 * this program might be calling bpf_trace_printk, 341 * so allocate per-cpu printk buffers 342 */ 343 trace_printk_init_buffers(); 344 345 return &bpf_trace_printk_proto; 346 } 347 348 static __always_inline int 349 get_map_perf_counter(struct bpf_map *map, u64 flags, 350 u64 *value, u64 *enabled, u64 *running) 351 { 352 struct bpf_array *array = container_of(map, struct bpf_array, map); 353 unsigned int cpu = smp_processor_id(); 354 u64 index = flags & BPF_F_INDEX_MASK; 355 struct bpf_event_entry *ee; 356 357 if (unlikely(flags & ~(BPF_F_INDEX_MASK))) 358 return -EINVAL; 359 if (index == BPF_F_CURRENT_CPU) 360 index = cpu; 361 if (unlikely(index >= array->map.max_entries)) 362 return -E2BIG; 363 364 ee = READ_ONCE(array->ptrs[index]); 365 if (!ee) 366 return -ENOENT; 367 368 return perf_event_read_local(ee->event, value, enabled, running); 369 } 370 371 BPF_CALL_2(bpf_perf_event_read, struct bpf_map *, map, u64, flags) 372 { 373 u64 value = 0; 374 int err; 375 376 err = get_map_perf_counter(map, flags, &value, NULL, NULL); 377 /* 378 * this api is ugly since we miss [-22..-2] range of valid 379 * counter values, but that's uapi 380 */ 381 if (err) 382 return err; 383 return value; 384 } 385 386 static const struct bpf_func_proto bpf_perf_event_read_proto = { 387 .func = bpf_perf_event_read, 388 .gpl_only = true, 389 .ret_type = RET_INTEGER, 390 .arg1_type = ARG_CONST_MAP_PTR, 391 .arg2_type = ARG_ANYTHING, 392 }; 393 394 BPF_CALL_4(bpf_perf_event_read_value, struct bpf_map *, map, u64, flags, 395 struct bpf_perf_event_value *, buf, u32, size) 396 { 397 int err = -EINVAL; 398 399 if (unlikely(size != sizeof(struct bpf_perf_event_value))) 400 goto clear; 401 err = get_map_perf_counter(map, flags, &buf->counter, &buf->enabled, 402 &buf->running); 403 if (unlikely(err)) 404 goto clear; 405 return 0; 406 clear: 407 memset(buf, 0, size); 408 return err; 409 } 410 411 static const struct bpf_func_proto bpf_perf_event_read_value_proto = { 412 .func = bpf_perf_event_read_value, 413 .gpl_only = true, 414 .ret_type = RET_INTEGER, 415 .arg1_type = ARG_CONST_MAP_PTR, 416 .arg2_type = ARG_ANYTHING, 417 .arg3_type = ARG_PTR_TO_UNINIT_MEM, 418 .arg4_type = ARG_CONST_SIZE, 419 }; 420 421 static __always_inline u64 422 __bpf_perf_event_output(struct pt_regs *regs, struct bpf_map *map, 423 u64 flags, struct perf_sample_data *sd) 424 { 425 struct bpf_array *array = container_of(map, struct bpf_array, map); 426 unsigned int cpu = smp_processor_id(); 427 u64 index = flags & BPF_F_INDEX_MASK; 428 struct bpf_event_entry *ee; 429 struct perf_event *event; 430 431 if (index == BPF_F_CURRENT_CPU) 432 index = cpu; 433 if (unlikely(index >= array->map.max_entries)) 434 return -E2BIG; 435 436 ee = READ_ONCE(array->ptrs[index]); 437 if (!ee) 438 return -ENOENT; 439 440 event = ee->event; 441 if (unlikely(event->attr.type != PERF_TYPE_SOFTWARE || 442 event->attr.config != PERF_COUNT_SW_BPF_OUTPUT)) 443 return -EINVAL; 444 445 if (unlikely(event->oncpu != cpu)) 446 return -EOPNOTSUPP; 447 448 return perf_event_output(event, sd, regs); 449 } 450 451 /* 452 * Support executing tracepoints in normal, irq, and nmi context that each call 453 * bpf_perf_event_output 454 */ 455 struct bpf_trace_sample_data { 456 struct perf_sample_data sds[3]; 457 }; 458 459 static DEFINE_PER_CPU(struct bpf_trace_sample_data, bpf_trace_sds); 460 static DEFINE_PER_CPU(int, bpf_trace_nest_level); 461 BPF_CALL_5(bpf_perf_event_output, struct pt_regs *, regs, struct bpf_map *, map, 462 u64, flags, void *, data, u64, size) 463 { 464 struct bpf_trace_sample_data *sds = this_cpu_ptr(&bpf_trace_sds); 465 int nest_level = this_cpu_inc_return(bpf_trace_nest_level); 466 struct perf_raw_record raw = { 467 .frag = { 468 .size = size, 469 .data = data, 470 }, 471 }; 472 struct perf_sample_data *sd; 473 int err; 474 475 if (WARN_ON_ONCE(nest_level > ARRAY_SIZE(sds->sds))) { 476 err = -EBUSY; 477 goto out; 478 } 479 480 sd = &sds->sds[nest_level - 1]; 481 482 if (unlikely(flags & ~(BPF_F_INDEX_MASK))) { 483 err = -EINVAL; 484 goto out; 485 } 486 487 perf_sample_data_init(sd, 0, 0); 488 sd->raw = &raw; 489 490 err = __bpf_perf_event_output(regs, map, flags, sd); 491 492 out: 493 this_cpu_dec(bpf_trace_nest_level); 494 return err; 495 } 496 497 static const struct bpf_func_proto bpf_perf_event_output_proto = { 498 .func = bpf_perf_event_output, 499 .gpl_only = true, 500 .ret_type = RET_INTEGER, 501 .arg1_type = ARG_PTR_TO_CTX, 502 .arg2_type = ARG_CONST_MAP_PTR, 503 .arg3_type = ARG_ANYTHING, 504 .arg4_type = ARG_PTR_TO_MEM, 505 .arg5_type = ARG_CONST_SIZE_OR_ZERO, 506 }; 507 508 static DEFINE_PER_CPU(int, bpf_event_output_nest_level); 509 struct bpf_nested_pt_regs { 510 struct pt_regs regs[3]; 511 }; 512 static DEFINE_PER_CPU(struct bpf_nested_pt_regs, bpf_pt_regs); 513 static DEFINE_PER_CPU(struct bpf_trace_sample_data, bpf_misc_sds); 514 515 u64 bpf_event_output(struct bpf_map *map, u64 flags, void *meta, u64 meta_size, 516 void *ctx, u64 ctx_size, bpf_ctx_copy_t ctx_copy) 517 { 518 int nest_level = this_cpu_inc_return(bpf_event_output_nest_level); 519 struct perf_raw_frag frag = { 520 .copy = ctx_copy, 521 .size = ctx_size, 522 .data = ctx, 523 }; 524 struct perf_raw_record raw = { 525 .frag = { 526 { 527 .next = ctx_size ? &frag : NULL, 528 }, 529 .size = meta_size, 530 .data = meta, 531 }, 532 }; 533 struct perf_sample_data *sd; 534 struct pt_regs *regs; 535 u64 ret; 536 537 if (WARN_ON_ONCE(nest_level > ARRAY_SIZE(bpf_misc_sds.sds))) { 538 ret = -EBUSY; 539 goto out; 540 } 541 sd = this_cpu_ptr(&bpf_misc_sds.sds[nest_level - 1]); 542 regs = this_cpu_ptr(&bpf_pt_regs.regs[nest_level - 1]); 543 544 perf_fetch_caller_regs(regs); 545 perf_sample_data_init(sd, 0, 0); 546 sd->raw = &raw; 547 548 ret = __bpf_perf_event_output(regs, map, flags, sd); 549 out: 550 this_cpu_dec(bpf_event_output_nest_level); 551 return ret; 552 } 553 554 BPF_CALL_0(bpf_get_current_task) 555 { 556 return (long) current; 557 } 558 559 static const struct bpf_func_proto bpf_get_current_task_proto = { 560 .func = bpf_get_current_task, 561 .gpl_only = true, 562 .ret_type = RET_INTEGER, 563 }; 564 565 BPF_CALL_2(bpf_current_task_under_cgroup, struct bpf_map *, map, u32, idx) 566 { 567 struct bpf_array *array = container_of(map, struct bpf_array, map); 568 struct cgroup *cgrp; 569 570 if (unlikely(idx >= array->map.max_entries)) 571 return -E2BIG; 572 573 cgrp = READ_ONCE(array->ptrs[idx]); 574 if (unlikely(!cgrp)) 575 return -EAGAIN; 576 577 return task_under_cgroup_hierarchy(current, cgrp); 578 } 579 580 static const struct bpf_func_proto bpf_current_task_under_cgroup_proto = { 581 .func = bpf_current_task_under_cgroup, 582 .gpl_only = false, 583 .ret_type = RET_INTEGER, 584 .arg1_type = ARG_CONST_MAP_PTR, 585 .arg2_type = ARG_ANYTHING, 586 }; 587 588 BPF_CALL_3(bpf_probe_read_str, void *, dst, u32, size, 589 const void *, unsafe_ptr) 590 { 591 int ret; 592 593 ret = security_locked_down(LOCKDOWN_BPF_READ); 594 if (ret < 0) 595 goto out; 596 597 /* 598 * The strncpy_from_unsafe() call will likely not fill the entire 599 * buffer, but that's okay in this circumstance as we're probing 600 * arbitrary memory anyway similar to bpf_probe_read() and might 601 * as well probe the stack. Thus, memory is explicitly cleared 602 * only in error case, so that improper users ignoring return 603 * code altogether don't copy garbage; otherwise length of string 604 * is returned that can be used for bpf_perf_event_output() et al. 605 */ 606 ret = strncpy_from_unsafe(dst, unsafe_ptr, size); 607 if (unlikely(ret < 0)) 608 out: 609 memset(dst, 0, size); 610 611 return ret; 612 } 613 614 static const struct bpf_func_proto bpf_probe_read_str_proto = { 615 .func = bpf_probe_read_str, 616 .gpl_only = true, 617 .ret_type = RET_INTEGER, 618 .arg1_type = ARG_PTR_TO_UNINIT_MEM, 619 .arg2_type = ARG_CONST_SIZE_OR_ZERO, 620 .arg3_type = ARG_ANYTHING, 621 }; 622 623 struct send_signal_irq_work { 624 struct irq_work irq_work; 625 struct task_struct *task; 626 u32 sig; 627 }; 628 629 static DEFINE_PER_CPU(struct send_signal_irq_work, send_signal_work); 630 631 static void do_bpf_send_signal(struct irq_work *entry) 632 { 633 struct send_signal_irq_work *work; 634 635 work = container_of(entry, struct send_signal_irq_work, irq_work); 636 group_send_sig_info(work->sig, SEND_SIG_PRIV, work->task, PIDTYPE_TGID); 637 } 638 639 BPF_CALL_1(bpf_send_signal, u32, sig) 640 { 641 struct send_signal_irq_work *work = NULL; 642 643 /* Similar to bpf_probe_write_user, task needs to be 644 * in a sound condition and kernel memory access be 645 * permitted in order to send signal to the current 646 * task. 647 */ 648 if (unlikely(current->flags & (PF_KTHREAD | PF_EXITING))) 649 return -EPERM; 650 if (unlikely(uaccess_kernel())) 651 return -EPERM; 652 if (unlikely(!nmi_uaccess_okay())) 653 return -EPERM; 654 655 if (in_nmi()) { 656 /* Do an early check on signal validity. Otherwise, 657 * the error is lost in deferred irq_work. 658 */ 659 if (unlikely(!valid_signal(sig))) 660 return -EINVAL; 661 662 work = this_cpu_ptr(&send_signal_work); 663 if (work->irq_work.flags & IRQ_WORK_BUSY) 664 return -EBUSY; 665 666 /* Add the current task, which is the target of sending signal, 667 * to the irq_work. The current task may change when queued 668 * irq works get executed. 669 */ 670 work->task = current; 671 work->sig = sig; 672 irq_work_queue(&work->irq_work); 673 return 0; 674 } 675 676 return group_send_sig_info(sig, SEND_SIG_PRIV, current, PIDTYPE_TGID); 677 } 678 679 static const struct bpf_func_proto bpf_send_signal_proto = { 680 .func = bpf_send_signal, 681 .gpl_only = false, 682 .ret_type = RET_INTEGER, 683 .arg1_type = ARG_ANYTHING, 684 }; 685 686 static const struct bpf_func_proto * 687 tracing_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) 688 { 689 switch (func_id) { 690 case BPF_FUNC_map_lookup_elem: 691 return &bpf_map_lookup_elem_proto; 692 case BPF_FUNC_map_update_elem: 693 return &bpf_map_update_elem_proto; 694 case BPF_FUNC_map_delete_elem: 695 return &bpf_map_delete_elem_proto; 696 case BPF_FUNC_map_push_elem: 697 return &bpf_map_push_elem_proto; 698 case BPF_FUNC_map_pop_elem: 699 return &bpf_map_pop_elem_proto; 700 case BPF_FUNC_map_peek_elem: 701 return &bpf_map_peek_elem_proto; 702 case BPF_FUNC_probe_read: 703 return &bpf_probe_read_proto; 704 case BPF_FUNC_ktime_get_ns: 705 return &bpf_ktime_get_ns_proto; 706 case BPF_FUNC_tail_call: 707 return &bpf_tail_call_proto; 708 case BPF_FUNC_get_current_pid_tgid: 709 return &bpf_get_current_pid_tgid_proto; 710 case BPF_FUNC_get_current_task: 711 return &bpf_get_current_task_proto; 712 case BPF_FUNC_get_current_uid_gid: 713 return &bpf_get_current_uid_gid_proto; 714 case BPF_FUNC_get_current_comm: 715 return &bpf_get_current_comm_proto; 716 case BPF_FUNC_trace_printk: 717 return bpf_get_trace_printk_proto(); 718 case BPF_FUNC_get_smp_processor_id: 719 return &bpf_get_smp_processor_id_proto; 720 case BPF_FUNC_get_numa_node_id: 721 return &bpf_get_numa_node_id_proto; 722 case BPF_FUNC_perf_event_read: 723 return &bpf_perf_event_read_proto; 724 case BPF_FUNC_probe_write_user: 725 return bpf_get_probe_write_proto(); 726 case BPF_FUNC_current_task_under_cgroup: 727 return &bpf_current_task_under_cgroup_proto; 728 case BPF_FUNC_get_prandom_u32: 729 return &bpf_get_prandom_u32_proto; 730 case BPF_FUNC_probe_read_str: 731 return &bpf_probe_read_str_proto; 732 #ifdef CONFIG_CGROUPS 733 case BPF_FUNC_get_current_cgroup_id: 734 return &bpf_get_current_cgroup_id_proto; 735 #endif 736 case BPF_FUNC_send_signal: 737 return &bpf_send_signal_proto; 738 default: 739 return NULL; 740 } 741 } 742 743 static const struct bpf_func_proto * 744 kprobe_prog_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) 745 { 746 switch (func_id) { 747 case BPF_FUNC_perf_event_output: 748 return &bpf_perf_event_output_proto; 749 case BPF_FUNC_get_stackid: 750 return &bpf_get_stackid_proto; 751 case BPF_FUNC_get_stack: 752 return &bpf_get_stack_proto; 753 case BPF_FUNC_perf_event_read_value: 754 return &bpf_perf_event_read_value_proto; 755 #ifdef CONFIG_BPF_KPROBE_OVERRIDE 756 case BPF_FUNC_override_return: 757 return &bpf_override_return_proto; 758 #endif 759 default: 760 return tracing_func_proto(func_id, prog); 761 } 762 } 763 764 /* bpf+kprobe programs can access fields of 'struct pt_regs' */ 765 static bool kprobe_prog_is_valid_access(int off, int size, enum bpf_access_type type, 766 const struct bpf_prog *prog, 767 struct bpf_insn_access_aux *info) 768 { 769 if (off < 0 || off >= sizeof(struct pt_regs)) 770 return false; 771 if (type != BPF_READ) 772 return false; 773 if (off % size != 0) 774 return false; 775 /* 776 * Assertion for 32 bit to make sure last 8 byte access 777 * (BPF_DW) to the last 4 byte member is disallowed. 778 */ 779 if (off + size > sizeof(struct pt_regs)) 780 return false; 781 782 return true; 783 } 784 785 const struct bpf_verifier_ops kprobe_verifier_ops = { 786 .get_func_proto = kprobe_prog_func_proto, 787 .is_valid_access = kprobe_prog_is_valid_access, 788 }; 789 790 const struct bpf_prog_ops kprobe_prog_ops = { 791 }; 792 793 BPF_CALL_5(bpf_perf_event_output_tp, void *, tp_buff, struct bpf_map *, map, 794 u64, flags, void *, data, u64, size) 795 { 796 struct pt_regs *regs = *(struct pt_regs **)tp_buff; 797 798 /* 799 * r1 points to perf tracepoint buffer where first 8 bytes are hidden 800 * from bpf program and contain a pointer to 'struct pt_regs'. Fetch it 801 * from there and call the same bpf_perf_event_output() helper inline. 802 */ 803 return ____bpf_perf_event_output(regs, map, flags, data, size); 804 } 805 806 static const struct bpf_func_proto bpf_perf_event_output_proto_tp = { 807 .func = bpf_perf_event_output_tp, 808 .gpl_only = true, 809 .ret_type = RET_INTEGER, 810 .arg1_type = ARG_PTR_TO_CTX, 811 .arg2_type = ARG_CONST_MAP_PTR, 812 .arg3_type = ARG_ANYTHING, 813 .arg4_type = ARG_PTR_TO_MEM, 814 .arg5_type = ARG_CONST_SIZE_OR_ZERO, 815 }; 816 817 BPF_CALL_3(bpf_get_stackid_tp, void *, tp_buff, struct bpf_map *, map, 818 u64, flags) 819 { 820 struct pt_regs *regs = *(struct pt_regs **)tp_buff; 821 822 /* 823 * Same comment as in bpf_perf_event_output_tp(), only that this time 824 * the other helper's function body cannot be inlined due to being 825 * external, thus we need to call raw helper function. 826 */ 827 return bpf_get_stackid((unsigned long) regs, (unsigned long) map, 828 flags, 0, 0); 829 } 830 831 static const struct bpf_func_proto bpf_get_stackid_proto_tp = { 832 .func = bpf_get_stackid_tp, 833 .gpl_only = true, 834 .ret_type = RET_INTEGER, 835 .arg1_type = ARG_PTR_TO_CTX, 836 .arg2_type = ARG_CONST_MAP_PTR, 837 .arg3_type = ARG_ANYTHING, 838 }; 839 840 BPF_CALL_4(bpf_get_stack_tp, void *, tp_buff, void *, buf, u32, size, 841 u64, flags) 842 { 843 struct pt_regs *regs = *(struct pt_regs **)tp_buff; 844 845 return bpf_get_stack((unsigned long) regs, (unsigned long) buf, 846 (unsigned long) size, flags, 0); 847 } 848 849 static const struct bpf_func_proto bpf_get_stack_proto_tp = { 850 .func = bpf_get_stack_tp, 851 .gpl_only = true, 852 .ret_type = RET_INTEGER, 853 .arg1_type = ARG_PTR_TO_CTX, 854 .arg2_type = ARG_PTR_TO_UNINIT_MEM, 855 .arg3_type = ARG_CONST_SIZE_OR_ZERO, 856 .arg4_type = ARG_ANYTHING, 857 }; 858 859 static const struct bpf_func_proto * 860 tp_prog_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) 861 { 862 switch (func_id) { 863 case BPF_FUNC_perf_event_output: 864 return &bpf_perf_event_output_proto_tp; 865 case BPF_FUNC_get_stackid: 866 return &bpf_get_stackid_proto_tp; 867 case BPF_FUNC_get_stack: 868 return &bpf_get_stack_proto_tp; 869 default: 870 return tracing_func_proto(func_id, prog); 871 } 872 } 873 874 static bool tp_prog_is_valid_access(int off, int size, enum bpf_access_type type, 875 const struct bpf_prog *prog, 876 struct bpf_insn_access_aux *info) 877 { 878 if (off < sizeof(void *) || off >= PERF_MAX_TRACE_SIZE) 879 return false; 880 if (type != BPF_READ) 881 return false; 882 if (off % size != 0) 883 return false; 884 885 BUILD_BUG_ON(PERF_MAX_TRACE_SIZE % sizeof(__u64)); 886 return true; 887 } 888 889 const struct bpf_verifier_ops tracepoint_verifier_ops = { 890 .get_func_proto = tp_prog_func_proto, 891 .is_valid_access = tp_prog_is_valid_access, 892 }; 893 894 const struct bpf_prog_ops tracepoint_prog_ops = { 895 }; 896 897 BPF_CALL_3(bpf_perf_prog_read_value, struct bpf_perf_event_data_kern *, ctx, 898 struct bpf_perf_event_value *, buf, u32, size) 899 { 900 int err = -EINVAL; 901 902 if (unlikely(size != sizeof(struct bpf_perf_event_value))) 903 goto clear; 904 err = perf_event_read_local(ctx->event, &buf->counter, &buf->enabled, 905 &buf->running); 906 if (unlikely(err)) 907 goto clear; 908 return 0; 909 clear: 910 memset(buf, 0, size); 911 return err; 912 } 913 914 static const struct bpf_func_proto bpf_perf_prog_read_value_proto = { 915 .func = bpf_perf_prog_read_value, 916 .gpl_only = true, 917 .ret_type = RET_INTEGER, 918 .arg1_type = ARG_PTR_TO_CTX, 919 .arg2_type = ARG_PTR_TO_UNINIT_MEM, 920 .arg3_type = ARG_CONST_SIZE, 921 }; 922 923 static const struct bpf_func_proto * 924 pe_prog_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) 925 { 926 switch (func_id) { 927 case BPF_FUNC_perf_event_output: 928 return &bpf_perf_event_output_proto_tp; 929 case BPF_FUNC_get_stackid: 930 return &bpf_get_stackid_proto_tp; 931 case BPF_FUNC_get_stack: 932 return &bpf_get_stack_proto_tp; 933 case BPF_FUNC_perf_prog_read_value: 934 return &bpf_perf_prog_read_value_proto; 935 default: 936 return tracing_func_proto(func_id, prog); 937 } 938 } 939 940 /* 941 * bpf_raw_tp_regs are separate from bpf_pt_regs used from skb/xdp 942 * to avoid potential recursive reuse issue when/if tracepoints are added 943 * inside bpf_*_event_output, bpf_get_stackid and/or bpf_get_stack. 944 * 945 * Since raw tracepoints run despite bpf_prog_active, support concurrent usage 946 * in normal, irq, and nmi context. 947 */ 948 struct bpf_raw_tp_regs { 949 struct pt_regs regs[3]; 950 }; 951 static DEFINE_PER_CPU(struct bpf_raw_tp_regs, bpf_raw_tp_regs); 952 static DEFINE_PER_CPU(int, bpf_raw_tp_nest_level); 953 static struct pt_regs *get_bpf_raw_tp_regs(void) 954 { 955 struct bpf_raw_tp_regs *tp_regs = this_cpu_ptr(&bpf_raw_tp_regs); 956 int nest_level = this_cpu_inc_return(bpf_raw_tp_nest_level); 957 958 if (WARN_ON_ONCE(nest_level > ARRAY_SIZE(tp_regs->regs))) { 959 this_cpu_dec(bpf_raw_tp_nest_level); 960 return ERR_PTR(-EBUSY); 961 } 962 963 return &tp_regs->regs[nest_level - 1]; 964 } 965 966 static void put_bpf_raw_tp_regs(void) 967 { 968 this_cpu_dec(bpf_raw_tp_nest_level); 969 } 970 971 BPF_CALL_5(bpf_perf_event_output_raw_tp, struct bpf_raw_tracepoint_args *, args, 972 struct bpf_map *, map, u64, flags, void *, data, u64, size) 973 { 974 struct pt_regs *regs = get_bpf_raw_tp_regs(); 975 int ret; 976 977 if (IS_ERR(regs)) 978 return PTR_ERR(regs); 979 980 perf_fetch_caller_regs(regs); 981 ret = ____bpf_perf_event_output(regs, map, flags, data, size); 982 983 put_bpf_raw_tp_regs(); 984 return ret; 985 } 986 987 static const struct bpf_func_proto bpf_perf_event_output_proto_raw_tp = { 988 .func = bpf_perf_event_output_raw_tp, 989 .gpl_only = true, 990 .ret_type = RET_INTEGER, 991 .arg1_type = ARG_PTR_TO_CTX, 992 .arg2_type = ARG_CONST_MAP_PTR, 993 .arg3_type = ARG_ANYTHING, 994 .arg4_type = ARG_PTR_TO_MEM, 995 .arg5_type = ARG_CONST_SIZE_OR_ZERO, 996 }; 997 998 BPF_CALL_3(bpf_get_stackid_raw_tp, struct bpf_raw_tracepoint_args *, args, 999 struct bpf_map *, map, u64, flags) 1000 { 1001 struct pt_regs *regs = get_bpf_raw_tp_regs(); 1002 int ret; 1003 1004 if (IS_ERR(regs)) 1005 return PTR_ERR(regs); 1006 1007 perf_fetch_caller_regs(regs); 1008 /* similar to bpf_perf_event_output_tp, but pt_regs fetched differently */ 1009 ret = bpf_get_stackid((unsigned long) regs, (unsigned long) map, 1010 flags, 0, 0); 1011 put_bpf_raw_tp_regs(); 1012 return ret; 1013 } 1014 1015 static const struct bpf_func_proto bpf_get_stackid_proto_raw_tp = { 1016 .func = bpf_get_stackid_raw_tp, 1017 .gpl_only = true, 1018 .ret_type = RET_INTEGER, 1019 .arg1_type = ARG_PTR_TO_CTX, 1020 .arg2_type = ARG_CONST_MAP_PTR, 1021 .arg3_type = ARG_ANYTHING, 1022 }; 1023 1024 BPF_CALL_4(bpf_get_stack_raw_tp, struct bpf_raw_tracepoint_args *, args, 1025 void *, buf, u32, size, u64, flags) 1026 { 1027 struct pt_regs *regs = get_bpf_raw_tp_regs(); 1028 int ret; 1029 1030 if (IS_ERR(regs)) 1031 return PTR_ERR(regs); 1032 1033 perf_fetch_caller_regs(regs); 1034 ret = bpf_get_stack((unsigned long) regs, (unsigned long) buf, 1035 (unsigned long) size, flags, 0); 1036 put_bpf_raw_tp_regs(); 1037 return ret; 1038 } 1039 1040 static const struct bpf_func_proto bpf_get_stack_proto_raw_tp = { 1041 .func = bpf_get_stack_raw_tp, 1042 .gpl_only = true, 1043 .ret_type = RET_INTEGER, 1044 .arg1_type = ARG_PTR_TO_CTX, 1045 .arg2_type = ARG_PTR_TO_MEM, 1046 .arg3_type = ARG_CONST_SIZE_OR_ZERO, 1047 .arg4_type = ARG_ANYTHING, 1048 }; 1049 1050 static const struct bpf_func_proto * 1051 raw_tp_prog_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) 1052 { 1053 switch (func_id) { 1054 case BPF_FUNC_perf_event_output: 1055 return &bpf_perf_event_output_proto_raw_tp; 1056 case BPF_FUNC_get_stackid: 1057 return &bpf_get_stackid_proto_raw_tp; 1058 case BPF_FUNC_get_stack: 1059 return &bpf_get_stack_proto_raw_tp; 1060 default: 1061 return tracing_func_proto(func_id, prog); 1062 } 1063 } 1064 1065 static bool raw_tp_prog_is_valid_access(int off, int size, 1066 enum bpf_access_type type, 1067 const struct bpf_prog *prog, 1068 struct bpf_insn_access_aux *info) 1069 { 1070 /* largest tracepoint in the kernel has 12 args */ 1071 if (off < 0 || off >= sizeof(__u64) * 12) 1072 return false; 1073 if (type != BPF_READ) 1074 return false; 1075 if (off % size != 0) 1076 return false; 1077 return true; 1078 } 1079 1080 const struct bpf_verifier_ops raw_tracepoint_verifier_ops = { 1081 .get_func_proto = raw_tp_prog_func_proto, 1082 .is_valid_access = raw_tp_prog_is_valid_access, 1083 }; 1084 1085 const struct bpf_prog_ops raw_tracepoint_prog_ops = { 1086 }; 1087 1088 static bool raw_tp_writable_prog_is_valid_access(int off, int size, 1089 enum bpf_access_type type, 1090 const struct bpf_prog *prog, 1091 struct bpf_insn_access_aux *info) 1092 { 1093 if (off == 0) { 1094 if (size != sizeof(u64) || type != BPF_READ) 1095 return false; 1096 info->reg_type = PTR_TO_TP_BUFFER; 1097 } 1098 return raw_tp_prog_is_valid_access(off, size, type, prog, info); 1099 } 1100 1101 const struct bpf_verifier_ops raw_tracepoint_writable_verifier_ops = { 1102 .get_func_proto = raw_tp_prog_func_proto, 1103 .is_valid_access = raw_tp_writable_prog_is_valid_access, 1104 }; 1105 1106 const struct bpf_prog_ops raw_tracepoint_writable_prog_ops = { 1107 }; 1108 1109 static bool pe_prog_is_valid_access(int off, int size, enum bpf_access_type type, 1110 const struct bpf_prog *prog, 1111 struct bpf_insn_access_aux *info) 1112 { 1113 const int size_u64 = sizeof(u64); 1114 1115 if (off < 0 || off >= sizeof(struct bpf_perf_event_data)) 1116 return false; 1117 if (type != BPF_READ) 1118 return false; 1119 if (off % size != 0) { 1120 if (sizeof(unsigned long) != 4) 1121 return false; 1122 if (size != 8) 1123 return false; 1124 if (off % size != 4) 1125 return false; 1126 } 1127 1128 switch (off) { 1129 case bpf_ctx_range(struct bpf_perf_event_data, sample_period): 1130 bpf_ctx_record_field_size(info, size_u64); 1131 if (!bpf_ctx_narrow_access_ok(off, size, size_u64)) 1132 return false; 1133 break; 1134 case bpf_ctx_range(struct bpf_perf_event_data, addr): 1135 bpf_ctx_record_field_size(info, size_u64); 1136 if (!bpf_ctx_narrow_access_ok(off, size, size_u64)) 1137 return false; 1138 break; 1139 default: 1140 if (size != sizeof(long)) 1141 return false; 1142 } 1143 1144 return true; 1145 } 1146 1147 static u32 pe_prog_convert_ctx_access(enum bpf_access_type type, 1148 const struct bpf_insn *si, 1149 struct bpf_insn *insn_buf, 1150 struct bpf_prog *prog, u32 *target_size) 1151 { 1152 struct bpf_insn *insn = insn_buf; 1153 1154 switch (si->off) { 1155 case offsetof(struct bpf_perf_event_data, sample_period): 1156 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct bpf_perf_event_data_kern, 1157 data), si->dst_reg, si->src_reg, 1158 offsetof(struct bpf_perf_event_data_kern, data)); 1159 *insn++ = BPF_LDX_MEM(BPF_DW, si->dst_reg, si->dst_reg, 1160 bpf_target_off(struct perf_sample_data, period, 8, 1161 target_size)); 1162 break; 1163 case offsetof(struct bpf_perf_event_data, addr): 1164 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct bpf_perf_event_data_kern, 1165 data), si->dst_reg, si->src_reg, 1166 offsetof(struct bpf_perf_event_data_kern, data)); 1167 *insn++ = BPF_LDX_MEM(BPF_DW, si->dst_reg, si->dst_reg, 1168 bpf_target_off(struct perf_sample_data, addr, 8, 1169 target_size)); 1170 break; 1171 default: 1172 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct bpf_perf_event_data_kern, 1173 regs), si->dst_reg, si->src_reg, 1174 offsetof(struct bpf_perf_event_data_kern, regs)); 1175 *insn++ = BPF_LDX_MEM(BPF_SIZEOF(long), si->dst_reg, si->dst_reg, 1176 si->off); 1177 break; 1178 } 1179 1180 return insn - insn_buf; 1181 } 1182 1183 const struct bpf_verifier_ops perf_event_verifier_ops = { 1184 .get_func_proto = pe_prog_func_proto, 1185 .is_valid_access = pe_prog_is_valid_access, 1186 .convert_ctx_access = pe_prog_convert_ctx_access, 1187 }; 1188 1189 const struct bpf_prog_ops perf_event_prog_ops = { 1190 }; 1191 1192 static DEFINE_MUTEX(bpf_event_mutex); 1193 1194 #define BPF_TRACE_MAX_PROGS 64 1195 1196 int perf_event_attach_bpf_prog(struct perf_event *event, 1197 struct bpf_prog *prog) 1198 { 1199 struct bpf_prog_array *old_array; 1200 struct bpf_prog_array *new_array; 1201 int ret = -EEXIST; 1202 1203 /* 1204 * Kprobe override only works if they are on the function entry, 1205 * and only if they are on the opt-in list. 1206 */ 1207 if (prog->kprobe_override && 1208 (!trace_kprobe_on_func_entry(event->tp_event) || 1209 !trace_kprobe_error_injectable(event->tp_event))) 1210 return -EINVAL; 1211 1212 mutex_lock(&bpf_event_mutex); 1213 1214 if (event->prog) 1215 goto unlock; 1216 1217 old_array = bpf_event_rcu_dereference(event->tp_event->prog_array); 1218 if (old_array && 1219 bpf_prog_array_length(old_array) >= BPF_TRACE_MAX_PROGS) { 1220 ret = -E2BIG; 1221 goto unlock; 1222 } 1223 1224 ret = bpf_prog_array_copy(old_array, NULL, prog, &new_array); 1225 if (ret < 0) 1226 goto unlock; 1227 1228 /* set the new array to event->tp_event and set event->prog */ 1229 event->prog = prog; 1230 rcu_assign_pointer(event->tp_event->prog_array, new_array); 1231 bpf_prog_array_free(old_array); 1232 1233 unlock: 1234 mutex_unlock(&bpf_event_mutex); 1235 return ret; 1236 } 1237 1238 void perf_event_detach_bpf_prog(struct perf_event *event) 1239 { 1240 struct bpf_prog_array *old_array; 1241 struct bpf_prog_array *new_array; 1242 int ret; 1243 1244 mutex_lock(&bpf_event_mutex); 1245 1246 if (!event->prog) 1247 goto unlock; 1248 1249 old_array = bpf_event_rcu_dereference(event->tp_event->prog_array); 1250 ret = bpf_prog_array_copy(old_array, event->prog, NULL, &new_array); 1251 if (ret == -ENOENT) 1252 goto unlock; 1253 if (ret < 0) { 1254 bpf_prog_array_delete_safe(old_array, event->prog); 1255 } else { 1256 rcu_assign_pointer(event->tp_event->prog_array, new_array); 1257 bpf_prog_array_free(old_array); 1258 } 1259 1260 bpf_prog_put(event->prog); 1261 event->prog = NULL; 1262 1263 unlock: 1264 mutex_unlock(&bpf_event_mutex); 1265 } 1266 1267 int perf_event_query_prog_array(struct perf_event *event, void __user *info) 1268 { 1269 struct perf_event_query_bpf __user *uquery = info; 1270 struct perf_event_query_bpf query = {}; 1271 struct bpf_prog_array *progs; 1272 u32 *ids, prog_cnt, ids_len; 1273 int ret; 1274 1275 if (!capable(CAP_SYS_ADMIN)) 1276 return -EPERM; 1277 if (event->attr.type != PERF_TYPE_TRACEPOINT) 1278 return -EINVAL; 1279 if (copy_from_user(&query, uquery, sizeof(query))) 1280 return -EFAULT; 1281 1282 ids_len = query.ids_len; 1283 if (ids_len > BPF_TRACE_MAX_PROGS) 1284 return -E2BIG; 1285 ids = kcalloc(ids_len, sizeof(u32), GFP_USER | __GFP_NOWARN); 1286 if (!ids) 1287 return -ENOMEM; 1288 /* 1289 * The above kcalloc returns ZERO_SIZE_PTR when ids_len = 0, which 1290 * is required when user only wants to check for uquery->prog_cnt. 1291 * There is no need to check for it since the case is handled 1292 * gracefully in bpf_prog_array_copy_info. 1293 */ 1294 1295 mutex_lock(&bpf_event_mutex); 1296 progs = bpf_event_rcu_dereference(event->tp_event->prog_array); 1297 ret = bpf_prog_array_copy_info(progs, ids, ids_len, &prog_cnt); 1298 mutex_unlock(&bpf_event_mutex); 1299 1300 if (copy_to_user(&uquery->prog_cnt, &prog_cnt, sizeof(prog_cnt)) || 1301 copy_to_user(uquery->ids, ids, ids_len * sizeof(u32))) 1302 ret = -EFAULT; 1303 1304 kfree(ids); 1305 return ret; 1306 } 1307 1308 extern struct bpf_raw_event_map __start__bpf_raw_tp[]; 1309 extern struct bpf_raw_event_map __stop__bpf_raw_tp[]; 1310 1311 struct bpf_raw_event_map *bpf_get_raw_tracepoint(const char *name) 1312 { 1313 struct bpf_raw_event_map *btp = __start__bpf_raw_tp; 1314 1315 for (; btp < __stop__bpf_raw_tp; btp++) { 1316 if (!strcmp(btp->tp->name, name)) 1317 return btp; 1318 } 1319 1320 return bpf_get_raw_tracepoint_module(name); 1321 } 1322 1323 void bpf_put_raw_tracepoint(struct bpf_raw_event_map *btp) 1324 { 1325 struct module *mod = __module_address((unsigned long)btp); 1326 1327 if (mod) 1328 module_put(mod); 1329 } 1330 1331 static __always_inline 1332 void __bpf_trace_run(struct bpf_prog *prog, u64 *args) 1333 { 1334 rcu_read_lock(); 1335 preempt_disable(); 1336 (void) BPF_PROG_RUN(prog, args); 1337 preempt_enable(); 1338 rcu_read_unlock(); 1339 } 1340 1341 #define UNPACK(...) __VA_ARGS__ 1342 #define REPEAT_1(FN, DL, X, ...) FN(X) 1343 #define REPEAT_2(FN, DL, X, ...) FN(X) UNPACK DL REPEAT_1(FN, DL, __VA_ARGS__) 1344 #define REPEAT_3(FN, DL, X, ...) FN(X) UNPACK DL REPEAT_2(FN, DL, __VA_ARGS__) 1345 #define REPEAT_4(FN, DL, X, ...) FN(X) UNPACK DL REPEAT_3(FN, DL, __VA_ARGS__) 1346 #define REPEAT_5(FN, DL, X, ...) FN(X) UNPACK DL REPEAT_4(FN, DL, __VA_ARGS__) 1347 #define REPEAT_6(FN, DL, X, ...) FN(X) UNPACK DL REPEAT_5(FN, DL, __VA_ARGS__) 1348 #define REPEAT_7(FN, DL, X, ...) FN(X) UNPACK DL REPEAT_6(FN, DL, __VA_ARGS__) 1349 #define REPEAT_8(FN, DL, X, ...) FN(X) UNPACK DL REPEAT_7(FN, DL, __VA_ARGS__) 1350 #define REPEAT_9(FN, DL, X, ...) FN(X) UNPACK DL REPEAT_8(FN, DL, __VA_ARGS__) 1351 #define REPEAT_10(FN, DL, X, ...) FN(X) UNPACK DL REPEAT_9(FN, DL, __VA_ARGS__) 1352 #define REPEAT_11(FN, DL, X, ...) FN(X) UNPACK DL REPEAT_10(FN, DL, __VA_ARGS__) 1353 #define REPEAT_12(FN, DL, X, ...) FN(X) UNPACK DL REPEAT_11(FN, DL, __VA_ARGS__) 1354 #define REPEAT(X, FN, DL, ...) REPEAT_##X(FN, DL, __VA_ARGS__) 1355 1356 #define SARG(X) u64 arg##X 1357 #define COPY(X) args[X] = arg##X 1358 1359 #define __DL_COM (,) 1360 #define __DL_SEM (;) 1361 1362 #define __SEQ_0_11 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11 1363 1364 #define BPF_TRACE_DEFN_x(x) \ 1365 void bpf_trace_run##x(struct bpf_prog *prog, \ 1366 REPEAT(x, SARG, __DL_COM, __SEQ_0_11)) \ 1367 { \ 1368 u64 args[x]; \ 1369 REPEAT(x, COPY, __DL_SEM, __SEQ_0_11); \ 1370 __bpf_trace_run(prog, args); \ 1371 } \ 1372 EXPORT_SYMBOL_GPL(bpf_trace_run##x) 1373 BPF_TRACE_DEFN_x(1); 1374 BPF_TRACE_DEFN_x(2); 1375 BPF_TRACE_DEFN_x(3); 1376 BPF_TRACE_DEFN_x(4); 1377 BPF_TRACE_DEFN_x(5); 1378 BPF_TRACE_DEFN_x(6); 1379 BPF_TRACE_DEFN_x(7); 1380 BPF_TRACE_DEFN_x(8); 1381 BPF_TRACE_DEFN_x(9); 1382 BPF_TRACE_DEFN_x(10); 1383 BPF_TRACE_DEFN_x(11); 1384 BPF_TRACE_DEFN_x(12); 1385 1386 static int __bpf_probe_register(struct bpf_raw_event_map *btp, struct bpf_prog *prog) 1387 { 1388 struct tracepoint *tp = btp->tp; 1389 1390 /* 1391 * check that program doesn't access arguments beyond what's 1392 * available in this tracepoint 1393 */ 1394 if (prog->aux->max_ctx_offset > btp->num_args * sizeof(u64)) 1395 return -EINVAL; 1396 1397 if (prog->aux->max_tp_access > btp->writable_size) 1398 return -EINVAL; 1399 1400 return tracepoint_probe_register(tp, (void *)btp->bpf_func, prog); 1401 } 1402 1403 int bpf_probe_register(struct bpf_raw_event_map *btp, struct bpf_prog *prog) 1404 { 1405 return __bpf_probe_register(btp, prog); 1406 } 1407 1408 int bpf_probe_unregister(struct bpf_raw_event_map *btp, struct bpf_prog *prog) 1409 { 1410 return tracepoint_probe_unregister(btp->tp, (void *)btp->bpf_func, prog); 1411 } 1412 1413 int bpf_get_perf_event_info(const struct perf_event *event, u32 *prog_id, 1414 u32 *fd_type, const char **buf, 1415 u64 *probe_offset, u64 *probe_addr) 1416 { 1417 bool is_tracepoint, is_syscall_tp; 1418 struct bpf_prog *prog; 1419 int flags, err = 0; 1420 1421 prog = event->prog; 1422 if (!prog) 1423 return -ENOENT; 1424 1425 /* not supporting BPF_PROG_TYPE_PERF_EVENT yet */ 1426 if (prog->type == BPF_PROG_TYPE_PERF_EVENT) 1427 return -EOPNOTSUPP; 1428 1429 *prog_id = prog->aux->id; 1430 flags = event->tp_event->flags; 1431 is_tracepoint = flags & TRACE_EVENT_FL_TRACEPOINT; 1432 is_syscall_tp = is_syscall_trace_event(event->tp_event); 1433 1434 if (is_tracepoint || is_syscall_tp) { 1435 *buf = is_tracepoint ? event->tp_event->tp->name 1436 : event->tp_event->name; 1437 *fd_type = BPF_FD_TYPE_TRACEPOINT; 1438 *probe_offset = 0x0; 1439 *probe_addr = 0x0; 1440 } else { 1441 /* kprobe/uprobe */ 1442 err = -EOPNOTSUPP; 1443 #ifdef CONFIG_KPROBE_EVENTS 1444 if (flags & TRACE_EVENT_FL_KPROBE) 1445 err = bpf_get_kprobe_info(event, fd_type, buf, 1446 probe_offset, probe_addr, 1447 event->attr.type == PERF_TYPE_TRACEPOINT); 1448 #endif 1449 #ifdef CONFIG_UPROBE_EVENTS 1450 if (flags & TRACE_EVENT_FL_UPROBE) 1451 err = bpf_get_uprobe_info(event, fd_type, buf, 1452 probe_offset, 1453 event->attr.type == PERF_TYPE_TRACEPOINT); 1454 #endif 1455 } 1456 1457 return err; 1458 } 1459 1460 static int __init send_signal_irq_work_init(void) 1461 { 1462 int cpu; 1463 struct send_signal_irq_work *work; 1464 1465 for_each_possible_cpu(cpu) { 1466 work = per_cpu_ptr(&send_signal_work, cpu); 1467 init_irq_work(&work->irq_work, do_bpf_send_signal); 1468 } 1469 return 0; 1470 } 1471 1472 subsys_initcall(send_signal_irq_work_init); 1473 1474 #ifdef CONFIG_MODULES 1475 static int bpf_event_notify(struct notifier_block *nb, unsigned long op, 1476 void *module) 1477 { 1478 struct bpf_trace_module *btm, *tmp; 1479 struct module *mod = module; 1480 1481 if (mod->num_bpf_raw_events == 0 || 1482 (op != MODULE_STATE_COMING && op != MODULE_STATE_GOING)) 1483 return 0; 1484 1485 mutex_lock(&bpf_module_mutex); 1486 1487 switch (op) { 1488 case MODULE_STATE_COMING: 1489 btm = kzalloc(sizeof(*btm), GFP_KERNEL); 1490 if (btm) { 1491 btm->module = module; 1492 list_add(&btm->list, &bpf_trace_modules); 1493 } 1494 break; 1495 case MODULE_STATE_GOING: 1496 list_for_each_entry_safe(btm, tmp, &bpf_trace_modules, list) { 1497 if (btm->module == module) { 1498 list_del(&btm->list); 1499 kfree(btm); 1500 break; 1501 } 1502 } 1503 break; 1504 } 1505 1506 mutex_unlock(&bpf_module_mutex); 1507 1508 return 0; 1509 } 1510 1511 static struct notifier_block bpf_module_nb = { 1512 .notifier_call = bpf_event_notify, 1513 }; 1514 1515 static int __init bpf_event_init(void) 1516 { 1517 register_module_notifier(&bpf_module_nb); 1518 return 0; 1519 } 1520 1521 fs_initcall(bpf_event_init); 1522 #endif /* CONFIG_MODULES */ 1523