1 /* Copyright (c) 2011-2015 PLUMgrid, http://plumgrid.com 2 * Copyright (c) 2016 Facebook 3 * 4 * This program is free software; you can redistribute it and/or 5 * modify it under the terms of version 2 of the GNU General Public 6 * License as published by the Free Software Foundation. 7 */ 8 #include <linux/kernel.h> 9 #include <linux/types.h> 10 #include <linux/slab.h> 11 #include <linux/bpf.h> 12 #include <linux/bpf_perf_event.h> 13 #include <linux/filter.h> 14 #include <linux/uaccess.h> 15 #include <linux/ctype.h> 16 #include "trace.h" 17 18 u64 bpf_get_stackid(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5); 19 20 /** 21 * trace_call_bpf - invoke BPF program 22 * @call: tracepoint event 23 * @ctx: opaque context pointer 24 * 25 * kprobe handlers execute BPF programs via this helper. 26 * Can be used from static tracepoints in the future. 27 * 28 * Return: BPF programs always return an integer which is interpreted by 29 * kprobe handler as: 30 * 0 - return from kprobe (event is filtered out) 31 * 1 - store kprobe event into ring buffer 32 * Other values are reserved and currently alias to 1 33 */ 34 unsigned int trace_call_bpf(struct trace_event_call *call, void *ctx) 35 { 36 unsigned int ret; 37 38 if (in_nmi()) /* not supported yet */ 39 return 1; 40 41 preempt_disable(); 42 43 if (unlikely(__this_cpu_inc_return(bpf_prog_active) != 1)) { 44 /* 45 * since some bpf program is already running on this cpu, 46 * don't call into another bpf program (same or different) 47 * and don't send kprobe event into ring-buffer, 48 * so return zero here 49 */ 50 ret = 0; 51 goto out; 52 } 53 54 /* 55 * Instead of moving rcu_read_lock/rcu_dereference/rcu_read_unlock 56 * to all call sites, we did a bpf_prog_array_valid() there to check 57 * whether call->prog_array is empty or not, which is 58 * a heurisitc to speed up execution. 59 * 60 * If bpf_prog_array_valid() fetched prog_array was 61 * non-NULL, we go into trace_call_bpf() and do the actual 62 * proper rcu_dereference() under RCU lock. 63 * If it turns out that prog_array is NULL then, we bail out. 64 * For the opposite, if the bpf_prog_array_valid() fetched pointer 65 * was NULL, you'll skip the prog_array with the risk of missing 66 * out of events when it was updated in between this and the 67 * rcu_dereference() which is accepted risk. 68 */ 69 ret = BPF_PROG_RUN_ARRAY_CHECK(call->prog_array, ctx, BPF_PROG_RUN); 70 71 out: 72 __this_cpu_dec(bpf_prog_active); 73 preempt_enable(); 74 75 return ret; 76 } 77 EXPORT_SYMBOL_GPL(trace_call_bpf); 78 79 BPF_CALL_3(bpf_probe_read, void *, dst, u32, size, const void *, unsafe_ptr) 80 { 81 int ret; 82 83 ret = probe_kernel_read(dst, unsafe_ptr, size); 84 if (unlikely(ret < 0)) 85 memset(dst, 0, size); 86 87 return ret; 88 } 89 90 static const struct bpf_func_proto bpf_probe_read_proto = { 91 .func = bpf_probe_read, 92 .gpl_only = true, 93 .ret_type = RET_INTEGER, 94 .arg1_type = ARG_PTR_TO_UNINIT_MEM, 95 .arg2_type = ARG_CONST_SIZE_OR_ZERO, 96 .arg3_type = ARG_ANYTHING, 97 }; 98 99 BPF_CALL_3(bpf_probe_write_user, void *, unsafe_ptr, const void *, src, 100 u32, size) 101 { 102 /* 103 * Ensure we're in user context which is safe for the helper to 104 * run. This helper has no business in a kthread. 105 * 106 * access_ok() should prevent writing to non-user memory, but in 107 * some situations (nommu, temporary switch, etc) access_ok() does 108 * not provide enough validation, hence the check on KERNEL_DS. 109 */ 110 111 if (unlikely(in_interrupt() || 112 current->flags & (PF_KTHREAD | PF_EXITING))) 113 return -EPERM; 114 if (unlikely(uaccess_kernel())) 115 return -EPERM; 116 if (!access_ok(VERIFY_WRITE, unsafe_ptr, size)) 117 return -EPERM; 118 119 return probe_kernel_write(unsafe_ptr, src, size); 120 } 121 122 static const struct bpf_func_proto bpf_probe_write_user_proto = { 123 .func = bpf_probe_write_user, 124 .gpl_only = true, 125 .ret_type = RET_INTEGER, 126 .arg1_type = ARG_ANYTHING, 127 .arg2_type = ARG_PTR_TO_MEM, 128 .arg3_type = ARG_CONST_SIZE, 129 }; 130 131 static const struct bpf_func_proto *bpf_get_probe_write_proto(void) 132 { 133 pr_warn_ratelimited("%s[%d] is installing a program with bpf_probe_write_user helper that may corrupt user memory!", 134 current->comm, task_pid_nr(current)); 135 136 return &bpf_probe_write_user_proto; 137 } 138 139 /* 140 * Only limited trace_printk() conversion specifiers allowed: 141 * %d %i %u %x %ld %li %lu %lx %lld %lli %llu %llx %p %s 142 */ 143 BPF_CALL_5(bpf_trace_printk, char *, fmt, u32, fmt_size, u64, arg1, 144 u64, arg2, u64, arg3) 145 { 146 bool str_seen = false; 147 int mod[3] = {}; 148 int fmt_cnt = 0; 149 u64 unsafe_addr; 150 char buf[64]; 151 int i; 152 153 /* 154 * bpf_check()->check_func_arg()->check_stack_boundary() 155 * guarantees that fmt points to bpf program stack, 156 * fmt_size bytes of it were initialized and fmt_size > 0 157 */ 158 if (fmt[--fmt_size] != 0) 159 return -EINVAL; 160 161 /* check format string for allowed specifiers */ 162 for (i = 0; i < fmt_size; i++) { 163 if ((!isprint(fmt[i]) && !isspace(fmt[i])) || !isascii(fmt[i])) 164 return -EINVAL; 165 166 if (fmt[i] != '%') 167 continue; 168 169 if (fmt_cnt >= 3) 170 return -EINVAL; 171 172 /* fmt[i] != 0 && fmt[last] == 0, so we can access fmt[i + 1] */ 173 i++; 174 if (fmt[i] == 'l') { 175 mod[fmt_cnt]++; 176 i++; 177 } else if (fmt[i] == 'p' || fmt[i] == 's') { 178 mod[fmt_cnt]++; 179 i++; 180 if (!isspace(fmt[i]) && !ispunct(fmt[i]) && fmt[i] != 0) 181 return -EINVAL; 182 fmt_cnt++; 183 if (fmt[i - 1] == 's') { 184 if (str_seen) 185 /* allow only one '%s' per fmt string */ 186 return -EINVAL; 187 str_seen = true; 188 189 switch (fmt_cnt) { 190 case 1: 191 unsafe_addr = arg1; 192 arg1 = (long) buf; 193 break; 194 case 2: 195 unsafe_addr = arg2; 196 arg2 = (long) buf; 197 break; 198 case 3: 199 unsafe_addr = arg3; 200 arg3 = (long) buf; 201 break; 202 } 203 buf[0] = 0; 204 strncpy_from_unsafe(buf, 205 (void *) (long) unsafe_addr, 206 sizeof(buf)); 207 } 208 continue; 209 } 210 211 if (fmt[i] == 'l') { 212 mod[fmt_cnt]++; 213 i++; 214 } 215 216 if (fmt[i] != 'i' && fmt[i] != 'd' && 217 fmt[i] != 'u' && fmt[i] != 'x') 218 return -EINVAL; 219 fmt_cnt++; 220 } 221 222 /* Horrid workaround for getting va_list handling working with different 223 * argument type combinations generically for 32 and 64 bit archs. 224 */ 225 #define __BPF_TP_EMIT() __BPF_ARG3_TP() 226 #define __BPF_TP(...) \ 227 __trace_printk(1 /* Fake ip will not be printed. */, \ 228 fmt, ##__VA_ARGS__) 229 230 #define __BPF_ARG1_TP(...) \ 231 ((mod[0] == 2 || (mod[0] == 1 && __BITS_PER_LONG == 64)) \ 232 ? __BPF_TP(arg1, ##__VA_ARGS__) \ 233 : ((mod[0] == 1 || (mod[0] == 0 && __BITS_PER_LONG == 32)) \ 234 ? __BPF_TP((long)arg1, ##__VA_ARGS__) \ 235 : __BPF_TP((u32)arg1, ##__VA_ARGS__))) 236 237 #define __BPF_ARG2_TP(...) \ 238 ((mod[1] == 2 || (mod[1] == 1 && __BITS_PER_LONG == 64)) \ 239 ? __BPF_ARG1_TP(arg2, ##__VA_ARGS__) \ 240 : ((mod[1] == 1 || (mod[1] == 0 && __BITS_PER_LONG == 32)) \ 241 ? __BPF_ARG1_TP((long)arg2, ##__VA_ARGS__) \ 242 : __BPF_ARG1_TP((u32)arg2, ##__VA_ARGS__))) 243 244 #define __BPF_ARG3_TP(...) \ 245 ((mod[2] == 2 || (mod[2] == 1 && __BITS_PER_LONG == 64)) \ 246 ? __BPF_ARG2_TP(arg3, ##__VA_ARGS__) \ 247 : ((mod[2] == 1 || (mod[2] == 0 && __BITS_PER_LONG == 32)) \ 248 ? __BPF_ARG2_TP((long)arg3, ##__VA_ARGS__) \ 249 : __BPF_ARG2_TP((u32)arg3, ##__VA_ARGS__))) 250 251 return __BPF_TP_EMIT(); 252 } 253 254 static const struct bpf_func_proto bpf_trace_printk_proto = { 255 .func = bpf_trace_printk, 256 .gpl_only = true, 257 .ret_type = RET_INTEGER, 258 .arg1_type = ARG_PTR_TO_MEM, 259 .arg2_type = ARG_CONST_SIZE, 260 }; 261 262 const struct bpf_func_proto *bpf_get_trace_printk_proto(void) 263 { 264 /* 265 * this program might be calling bpf_trace_printk, 266 * so allocate per-cpu printk buffers 267 */ 268 trace_printk_init_buffers(); 269 270 return &bpf_trace_printk_proto; 271 } 272 273 static __always_inline int 274 get_map_perf_counter(struct bpf_map *map, u64 flags, 275 u64 *value, u64 *enabled, u64 *running) 276 { 277 struct bpf_array *array = container_of(map, struct bpf_array, map); 278 unsigned int cpu = smp_processor_id(); 279 u64 index = flags & BPF_F_INDEX_MASK; 280 struct bpf_event_entry *ee; 281 282 if (unlikely(flags & ~(BPF_F_INDEX_MASK))) 283 return -EINVAL; 284 if (index == BPF_F_CURRENT_CPU) 285 index = cpu; 286 if (unlikely(index >= array->map.max_entries)) 287 return -E2BIG; 288 289 ee = READ_ONCE(array->ptrs[index]); 290 if (!ee) 291 return -ENOENT; 292 293 return perf_event_read_local(ee->event, value, enabled, running); 294 } 295 296 BPF_CALL_2(bpf_perf_event_read, struct bpf_map *, map, u64, flags) 297 { 298 u64 value = 0; 299 int err; 300 301 err = get_map_perf_counter(map, flags, &value, NULL, NULL); 302 /* 303 * this api is ugly since we miss [-22..-2] range of valid 304 * counter values, but that's uapi 305 */ 306 if (err) 307 return err; 308 return value; 309 } 310 311 static const struct bpf_func_proto bpf_perf_event_read_proto = { 312 .func = bpf_perf_event_read, 313 .gpl_only = true, 314 .ret_type = RET_INTEGER, 315 .arg1_type = ARG_CONST_MAP_PTR, 316 .arg2_type = ARG_ANYTHING, 317 }; 318 319 BPF_CALL_4(bpf_perf_event_read_value, struct bpf_map *, map, u64, flags, 320 struct bpf_perf_event_value *, buf, u32, size) 321 { 322 int err = -EINVAL; 323 324 if (unlikely(size != sizeof(struct bpf_perf_event_value))) 325 goto clear; 326 err = get_map_perf_counter(map, flags, &buf->counter, &buf->enabled, 327 &buf->running); 328 if (unlikely(err)) 329 goto clear; 330 return 0; 331 clear: 332 memset(buf, 0, size); 333 return err; 334 } 335 336 static const struct bpf_func_proto bpf_perf_event_read_value_proto = { 337 .func = bpf_perf_event_read_value, 338 .gpl_only = true, 339 .ret_type = RET_INTEGER, 340 .arg1_type = ARG_CONST_MAP_PTR, 341 .arg2_type = ARG_ANYTHING, 342 .arg3_type = ARG_PTR_TO_UNINIT_MEM, 343 .arg4_type = ARG_CONST_SIZE, 344 }; 345 346 static DEFINE_PER_CPU(struct perf_sample_data, bpf_sd); 347 348 static __always_inline u64 349 __bpf_perf_event_output(struct pt_regs *regs, struct bpf_map *map, 350 u64 flags, struct perf_raw_record *raw) 351 { 352 struct bpf_array *array = container_of(map, struct bpf_array, map); 353 struct perf_sample_data *sd = this_cpu_ptr(&bpf_sd); 354 unsigned int cpu = smp_processor_id(); 355 u64 index = flags & BPF_F_INDEX_MASK; 356 struct bpf_event_entry *ee; 357 struct perf_event *event; 358 359 if (index == BPF_F_CURRENT_CPU) 360 index = cpu; 361 if (unlikely(index >= array->map.max_entries)) 362 return -E2BIG; 363 364 ee = READ_ONCE(array->ptrs[index]); 365 if (!ee) 366 return -ENOENT; 367 368 event = ee->event; 369 if (unlikely(event->attr.type != PERF_TYPE_SOFTWARE || 370 event->attr.config != PERF_COUNT_SW_BPF_OUTPUT)) 371 return -EINVAL; 372 373 if (unlikely(event->oncpu != cpu)) 374 return -EOPNOTSUPP; 375 376 perf_sample_data_init(sd, 0, 0); 377 sd->raw = raw; 378 perf_event_output(event, sd, regs); 379 return 0; 380 } 381 382 BPF_CALL_5(bpf_perf_event_output, struct pt_regs *, regs, struct bpf_map *, map, 383 u64, flags, void *, data, u64, size) 384 { 385 struct perf_raw_record raw = { 386 .frag = { 387 .size = size, 388 .data = data, 389 }, 390 }; 391 392 if (unlikely(flags & ~(BPF_F_INDEX_MASK))) 393 return -EINVAL; 394 395 return __bpf_perf_event_output(regs, map, flags, &raw); 396 } 397 398 static const struct bpf_func_proto bpf_perf_event_output_proto = { 399 .func = bpf_perf_event_output, 400 .gpl_only = true, 401 .ret_type = RET_INTEGER, 402 .arg1_type = ARG_PTR_TO_CTX, 403 .arg2_type = ARG_CONST_MAP_PTR, 404 .arg3_type = ARG_ANYTHING, 405 .arg4_type = ARG_PTR_TO_MEM, 406 .arg5_type = ARG_CONST_SIZE_OR_ZERO, 407 }; 408 409 static DEFINE_PER_CPU(struct pt_regs, bpf_pt_regs); 410 411 u64 bpf_event_output(struct bpf_map *map, u64 flags, void *meta, u64 meta_size, 412 void *ctx, u64 ctx_size, bpf_ctx_copy_t ctx_copy) 413 { 414 struct pt_regs *regs = this_cpu_ptr(&bpf_pt_regs); 415 struct perf_raw_frag frag = { 416 .copy = ctx_copy, 417 .size = ctx_size, 418 .data = ctx, 419 }; 420 struct perf_raw_record raw = { 421 .frag = { 422 { 423 .next = ctx_size ? &frag : NULL, 424 }, 425 .size = meta_size, 426 .data = meta, 427 }, 428 }; 429 430 perf_fetch_caller_regs(regs); 431 432 return __bpf_perf_event_output(regs, map, flags, &raw); 433 } 434 435 BPF_CALL_0(bpf_get_current_task) 436 { 437 return (long) current; 438 } 439 440 static const struct bpf_func_proto bpf_get_current_task_proto = { 441 .func = bpf_get_current_task, 442 .gpl_only = true, 443 .ret_type = RET_INTEGER, 444 }; 445 446 BPF_CALL_2(bpf_current_task_under_cgroup, struct bpf_map *, map, u32, idx) 447 { 448 struct bpf_array *array = container_of(map, struct bpf_array, map); 449 struct cgroup *cgrp; 450 451 if (unlikely(in_interrupt())) 452 return -EINVAL; 453 if (unlikely(idx >= array->map.max_entries)) 454 return -E2BIG; 455 456 cgrp = READ_ONCE(array->ptrs[idx]); 457 if (unlikely(!cgrp)) 458 return -EAGAIN; 459 460 return task_under_cgroup_hierarchy(current, cgrp); 461 } 462 463 static const struct bpf_func_proto bpf_current_task_under_cgroup_proto = { 464 .func = bpf_current_task_under_cgroup, 465 .gpl_only = false, 466 .ret_type = RET_INTEGER, 467 .arg1_type = ARG_CONST_MAP_PTR, 468 .arg2_type = ARG_ANYTHING, 469 }; 470 471 BPF_CALL_3(bpf_probe_read_str, void *, dst, u32, size, 472 const void *, unsafe_ptr) 473 { 474 int ret; 475 476 /* 477 * The strncpy_from_unsafe() call will likely not fill the entire 478 * buffer, but that's okay in this circumstance as we're probing 479 * arbitrary memory anyway similar to bpf_probe_read() and might 480 * as well probe the stack. Thus, memory is explicitly cleared 481 * only in error case, so that improper users ignoring return 482 * code altogether don't copy garbage; otherwise length of string 483 * is returned that can be used for bpf_perf_event_output() et al. 484 */ 485 ret = strncpy_from_unsafe(dst, unsafe_ptr, size); 486 if (unlikely(ret < 0)) 487 memset(dst, 0, size); 488 489 return ret; 490 } 491 492 static const struct bpf_func_proto bpf_probe_read_str_proto = { 493 .func = bpf_probe_read_str, 494 .gpl_only = true, 495 .ret_type = RET_INTEGER, 496 .arg1_type = ARG_PTR_TO_UNINIT_MEM, 497 .arg2_type = ARG_CONST_SIZE_OR_ZERO, 498 .arg3_type = ARG_ANYTHING, 499 }; 500 501 static const struct bpf_func_proto *tracing_func_proto(enum bpf_func_id func_id) 502 { 503 switch (func_id) { 504 case BPF_FUNC_map_lookup_elem: 505 return &bpf_map_lookup_elem_proto; 506 case BPF_FUNC_map_update_elem: 507 return &bpf_map_update_elem_proto; 508 case BPF_FUNC_map_delete_elem: 509 return &bpf_map_delete_elem_proto; 510 case BPF_FUNC_probe_read: 511 return &bpf_probe_read_proto; 512 case BPF_FUNC_ktime_get_ns: 513 return &bpf_ktime_get_ns_proto; 514 case BPF_FUNC_tail_call: 515 return &bpf_tail_call_proto; 516 case BPF_FUNC_get_current_pid_tgid: 517 return &bpf_get_current_pid_tgid_proto; 518 case BPF_FUNC_get_current_task: 519 return &bpf_get_current_task_proto; 520 case BPF_FUNC_get_current_uid_gid: 521 return &bpf_get_current_uid_gid_proto; 522 case BPF_FUNC_get_current_comm: 523 return &bpf_get_current_comm_proto; 524 case BPF_FUNC_trace_printk: 525 return bpf_get_trace_printk_proto(); 526 case BPF_FUNC_get_smp_processor_id: 527 return &bpf_get_smp_processor_id_proto; 528 case BPF_FUNC_get_numa_node_id: 529 return &bpf_get_numa_node_id_proto; 530 case BPF_FUNC_perf_event_read: 531 return &bpf_perf_event_read_proto; 532 case BPF_FUNC_probe_write_user: 533 return bpf_get_probe_write_proto(); 534 case BPF_FUNC_current_task_under_cgroup: 535 return &bpf_current_task_under_cgroup_proto; 536 case BPF_FUNC_get_prandom_u32: 537 return &bpf_get_prandom_u32_proto; 538 case BPF_FUNC_probe_read_str: 539 return &bpf_probe_read_str_proto; 540 default: 541 return NULL; 542 } 543 } 544 545 static const struct bpf_func_proto *kprobe_prog_func_proto(enum bpf_func_id func_id) 546 { 547 switch (func_id) { 548 case BPF_FUNC_perf_event_output: 549 return &bpf_perf_event_output_proto; 550 case BPF_FUNC_get_stackid: 551 return &bpf_get_stackid_proto; 552 case BPF_FUNC_perf_event_read_value: 553 return &bpf_perf_event_read_value_proto; 554 default: 555 return tracing_func_proto(func_id); 556 } 557 } 558 559 /* bpf+kprobe programs can access fields of 'struct pt_regs' */ 560 static bool kprobe_prog_is_valid_access(int off, int size, enum bpf_access_type type, 561 struct bpf_insn_access_aux *info) 562 { 563 if (off < 0 || off >= sizeof(struct pt_regs)) 564 return false; 565 if (type != BPF_READ) 566 return false; 567 if (off % size != 0) 568 return false; 569 /* 570 * Assertion for 32 bit to make sure last 8 byte access 571 * (BPF_DW) to the last 4 byte member is disallowed. 572 */ 573 if (off + size > sizeof(struct pt_regs)) 574 return false; 575 576 return true; 577 } 578 579 const struct bpf_verifier_ops kprobe_verifier_ops = { 580 .get_func_proto = kprobe_prog_func_proto, 581 .is_valid_access = kprobe_prog_is_valid_access, 582 }; 583 584 const struct bpf_prog_ops kprobe_prog_ops = { 585 }; 586 587 BPF_CALL_5(bpf_perf_event_output_tp, void *, tp_buff, struct bpf_map *, map, 588 u64, flags, void *, data, u64, size) 589 { 590 struct pt_regs *regs = *(struct pt_regs **)tp_buff; 591 592 /* 593 * r1 points to perf tracepoint buffer where first 8 bytes are hidden 594 * from bpf program and contain a pointer to 'struct pt_regs'. Fetch it 595 * from there and call the same bpf_perf_event_output() helper inline. 596 */ 597 return ____bpf_perf_event_output(regs, map, flags, data, size); 598 } 599 600 static const struct bpf_func_proto bpf_perf_event_output_proto_tp = { 601 .func = bpf_perf_event_output_tp, 602 .gpl_only = true, 603 .ret_type = RET_INTEGER, 604 .arg1_type = ARG_PTR_TO_CTX, 605 .arg2_type = ARG_CONST_MAP_PTR, 606 .arg3_type = ARG_ANYTHING, 607 .arg4_type = ARG_PTR_TO_MEM, 608 .arg5_type = ARG_CONST_SIZE_OR_ZERO, 609 }; 610 611 BPF_CALL_3(bpf_get_stackid_tp, void *, tp_buff, struct bpf_map *, map, 612 u64, flags) 613 { 614 struct pt_regs *regs = *(struct pt_regs **)tp_buff; 615 616 /* 617 * Same comment as in bpf_perf_event_output_tp(), only that this time 618 * the other helper's function body cannot be inlined due to being 619 * external, thus we need to call raw helper function. 620 */ 621 return bpf_get_stackid((unsigned long) regs, (unsigned long) map, 622 flags, 0, 0); 623 } 624 625 static const struct bpf_func_proto bpf_get_stackid_proto_tp = { 626 .func = bpf_get_stackid_tp, 627 .gpl_only = true, 628 .ret_type = RET_INTEGER, 629 .arg1_type = ARG_PTR_TO_CTX, 630 .arg2_type = ARG_CONST_MAP_PTR, 631 .arg3_type = ARG_ANYTHING, 632 }; 633 634 BPF_CALL_3(bpf_perf_prog_read_value_tp, struct bpf_perf_event_data_kern *, ctx, 635 struct bpf_perf_event_value *, buf, u32, size) 636 { 637 int err = -EINVAL; 638 639 if (unlikely(size != sizeof(struct bpf_perf_event_value))) 640 goto clear; 641 err = perf_event_read_local(ctx->event, &buf->counter, &buf->enabled, 642 &buf->running); 643 if (unlikely(err)) 644 goto clear; 645 return 0; 646 clear: 647 memset(buf, 0, size); 648 return err; 649 } 650 651 static const struct bpf_func_proto bpf_perf_prog_read_value_proto_tp = { 652 .func = bpf_perf_prog_read_value_tp, 653 .gpl_only = true, 654 .ret_type = RET_INTEGER, 655 .arg1_type = ARG_PTR_TO_CTX, 656 .arg2_type = ARG_PTR_TO_UNINIT_MEM, 657 .arg3_type = ARG_CONST_SIZE, 658 }; 659 660 static const struct bpf_func_proto *tp_prog_func_proto(enum bpf_func_id func_id) 661 { 662 switch (func_id) { 663 case BPF_FUNC_perf_event_output: 664 return &bpf_perf_event_output_proto_tp; 665 case BPF_FUNC_get_stackid: 666 return &bpf_get_stackid_proto_tp; 667 case BPF_FUNC_perf_prog_read_value: 668 return &bpf_perf_prog_read_value_proto_tp; 669 default: 670 return tracing_func_proto(func_id); 671 } 672 } 673 674 static bool tp_prog_is_valid_access(int off, int size, enum bpf_access_type type, 675 struct bpf_insn_access_aux *info) 676 { 677 if (off < sizeof(void *) || off >= PERF_MAX_TRACE_SIZE) 678 return false; 679 if (type != BPF_READ) 680 return false; 681 if (off % size != 0) 682 return false; 683 684 BUILD_BUG_ON(PERF_MAX_TRACE_SIZE % sizeof(__u64)); 685 return true; 686 } 687 688 const struct bpf_verifier_ops tracepoint_verifier_ops = { 689 .get_func_proto = tp_prog_func_proto, 690 .is_valid_access = tp_prog_is_valid_access, 691 }; 692 693 const struct bpf_prog_ops tracepoint_prog_ops = { 694 }; 695 696 static bool pe_prog_is_valid_access(int off, int size, enum bpf_access_type type, 697 struct bpf_insn_access_aux *info) 698 { 699 const int size_sp = FIELD_SIZEOF(struct bpf_perf_event_data, 700 sample_period); 701 702 if (off < 0 || off >= sizeof(struct bpf_perf_event_data)) 703 return false; 704 if (type != BPF_READ) 705 return false; 706 if (off % size != 0) 707 return false; 708 709 switch (off) { 710 case bpf_ctx_range(struct bpf_perf_event_data, sample_period): 711 bpf_ctx_record_field_size(info, size_sp); 712 if (!bpf_ctx_narrow_access_ok(off, size, size_sp)) 713 return false; 714 break; 715 default: 716 if (size != sizeof(long)) 717 return false; 718 } 719 720 return true; 721 } 722 723 static u32 pe_prog_convert_ctx_access(enum bpf_access_type type, 724 const struct bpf_insn *si, 725 struct bpf_insn *insn_buf, 726 struct bpf_prog *prog, u32 *target_size) 727 { 728 struct bpf_insn *insn = insn_buf; 729 730 switch (si->off) { 731 case offsetof(struct bpf_perf_event_data, sample_period): 732 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct bpf_perf_event_data_kern, 733 data), si->dst_reg, si->src_reg, 734 offsetof(struct bpf_perf_event_data_kern, data)); 735 *insn++ = BPF_LDX_MEM(BPF_DW, si->dst_reg, si->dst_reg, 736 bpf_target_off(struct perf_sample_data, period, 8, 737 target_size)); 738 break; 739 default: 740 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct bpf_perf_event_data_kern, 741 regs), si->dst_reg, si->src_reg, 742 offsetof(struct bpf_perf_event_data_kern, regs)); 743 *insn++ = BPF_LDX_MEM(BPF_SIZEOF(long), si->dst_reg, si->dst_reg, 744 si->off); 745 break; 746 } 747 748 return insn - insn_buf; 749 } 750 751 const struct bpf_verifier_ops perf_event_verifier_ops = { 752 .get_func_proto = tp_prog_func_proto, 753 .is_valid_access = pe_prog_is_valid_access, 754 .convert_ctx_access = pe_prog_convert_ctx_access, 755 }; 756 757 const struct bpf_prog_ops perf_event_prog_ops = { 758 }; 759 760 static DEFINE_MUTEX(bpf_event_mutex); 761 762 int perf_event_attach_bpf_prog(struct perf_event *event, 763 struct bpf_prog *prog) 764 { 765 struct bpf_prog_array __rcu *old_array; 766 struct bpf_prog_array *new_array; 767 int ret = -EEXIST; 768 769 mutex_lock(&bpf_event_mutex); 770 771 if (event->prog) 772 goto unlock; 773 774 old_array = event->tp_event->prog_array; 775 ret = bpf_prog_array_copy(old_array, NULL, prog, &new_array); 776 if (ret < 0) 777 goto unlock; 778 779 /* set the new array to event->tp_event and set event->prog */ 780 event->prog = prog; 781 rcu_assign_pointer(event->tp_event->prog_array, new_array); 782 bpf_prog_array_free(old_array); 783 784 unlock: 785 mutex_unlock(&bpf_event_mutex); 786 return ret; 787 } 788 789 void perf_event_detach_bpf_prog(struct perf_event *event) 790 { 791 struct bpf_prog_array __rcu *old_array; 792 struct bpf_prog_array *new_array; 793 int ret; 794 795 mutex_lock(&bpf_event_mutex); 796 797 if (!event->prog) 798 goto unlock; 799 800 old_array = event->tp_event->prog_array; 801 ret = bpf_prog_array_copy(old_array, event->prog, NULL, &new_array); 802 if (ret < 0) { 803 bpf_prog_array_delete_safe(old_array, event->prog); 804 } else { 805 rcu_assign_pointer(event->tp_event->prog_array, new_array); 806 bpf_prog_array_free(old_array); 807 } 808 809 bpf_prog_put(event->prog); 810 event->prog = NULL; 811 812 unlock: 813 mutex_unlock(&bpf_event_mutex); 814 } 815