1 /* Copyright (c) 2011-2015 PLUMgrid, http://plumgrid.com 2 * Copyright (c) 2016 Facebook 3 * 4 * This program is free software; you can redistribute it and/or 5 * modify it under the terms of version 2 of the GNU General Public 6 * License as published by the Free Software Foundation. 7 */ 8 #include <linux/kernel.h> 9 #include <linux/types.h> 10 #include <linux/slab.h> 11 #include <linux/bpf.h> 12 #include <linux/bpf_perf_event.h> 13 #include <linux/filter.h> 14 #include <linux/uaccess.h> 15 #include <linux/ctype.h> 16 #include "trace.h" 17 18 u64 bpf_get_stackid(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5); 19 20 /** 21 * trace_call_bpf - invoke BPF program 22 * @call: tracepoint event 23 * @ctx: opaque context pointer 24 * 25 * kprobe handlers execute BPF programs via this helper. 26 * Can be used from static tracepoints in the future. 27 * 28 * Return: BPF programs always return an integer which is interpreted by 29 * kprobe handler as: 30 * 0 - return from kprobe (event is filtered out) 31 * 1 - store kprobe event into ring buffer 32 * Other values are reserved and currently alias to 1 33 */ 34 unsigned int trace_call_bpf(struct trace_event_call *call, void *ctx) 35 { 36 unsigned int ret; 37 38 if (in_nmi()) /* not supported yet */ 39 return 1; 40 41 preempt_disable(); 42 43 if (unlikely(__this_cpu_inc_return(bpf_prog_active) != 1)) { 44 /* 45 * since some bpf program is already running on this cpu, 46 * don't call into another bpf program (same or different) 47 * and don't send kprobe event into ring-buffer, 48 * so return zero here 49 */ 50 ret = 0; 51 goto out; 52 } 53 54 /* 55 * Instead of moving rcu_read_lock/rcu_dereference/rcu_read_unlock 56 * to all call sites, we did a bpf_prog_array_valid() there to check 57 * whether call->prog_array is empty or not, which is 58 * a heurisitc to speed up execution. 59 * 60 * If bpf_prog_array_valid() fetched prog_array was 61 * non-NULL, we go into trace_call_bpf() and do the actual 62 * proper rcu_dereference() under RCU lock. 63 * If it turns out that prog_array is NULL then, we bail out. 64 * For the opposite, if the bpf_prog_array_valid() fetched pointer 65 * was NULL, you'll skip the prog_array with the risk of missing 66 * out of events when it was updated in between this and the 67 * rcu_dereference() which is accepted risk. 68 */ 69 ret = BPF_PROG_RUN_ARRAY_CHECK(call->prog_array, ctx, BPF_PROG_RUN); 70 71 out: 72 __this_cpu_dec(bpf_prog_active); 73 preempt_enable(); 74 75 return ret; 76 } 77 EXPORT_SYMBOL_GPL(trace_call_bpf); 78 79 BPF_CALL_3(bpf_probe_read, void *, dst, u32, size, const void *, unsafe_ptr) 80 { 81 int ret; 82 83 ret = probe_kernel_read(dst, unsafe_ptr, size); 84 if (unlikely(ret < 0)) 85 memset(dst, 0, size); 86 87 return ret; 88 } 89 90 static const struct bpf_func_proto bpf_probe_read_proto = { 91 .func = bpf_probe_read, 92 .gpl_only = true, 93 .ret_type = RET_INTEGER, 94 .arg1_type = ARG_PTR_TO_UNINIT_MEM, 95 .arg2_type = ARG_CONST_SIZE_OR_ZERO, 96 .arg3_type = ARG_ANYTHING, 97 }; 98 99 BPF_CALL_3(bpf_probe_write_user, void *, unsafe_ptr, const void *, src, 100 u32, size) 101 { 102 /* 103 * Ensure we're in user context which is safe for the helper to 104 * run. This helper has no business in a kthread. 105 * 106 * access_ok() should prevent writing to non-user memory, but in 107 * some situations (nommu, temporary switch, etc) access_ok() does 108 * not provide enough validation, hence the check on KERNEL_DS. 109 */ 110 111 if (unlikely(in_interrupt() || 112 current->flags & (PF_KTHREAD | PF_EXITING))) 113 return -EPERM; 114 if (unlikely(uaccess_kernel())) 115 return -EPERM; 116 if (!access_ok(VERIFY_WRITE, unsafe_ptr, size)) 117 return -EPERM; 118 119 return probe_kernel_write(unsafe_ptr, src, size); 120 } 121 122 static const struct bpf_func_proto bpf_probe_write_user_proto = { 123 .func = bpf_probe_write_user, 124 .gpl_only = true, 125 .ret_type = RET_INTEGER, 126 .arg1_type = ARG_ANYTHING, 127 .arg2_type = ARG_PTR_TO_MEM, 128 .arg3_type = ARG_CONST_SIZE, 129 }; 130 131 static const struct bpf_func_proto *bpf_get_probe_write_proto(void) 132 { 133 pr_warn_ratelimited("%s[%d] is installing a program with bpf_probe_write_user helper that may corrupt user memory!", 134 current->comm, task_pid_nr(current)); 135 136 return &bpf_probe_write_user_proto; 137 } 138 139 /* 140 * Only limited trace_printk() conversion specifiers allowed: 141 * %d %i %u %x %ld %li %lu %lx %lld %lli %llu %llx %p %s 142 */ 143 BPF_CALL_5(bpf_trace_printk, char *, fmt, u32, fmt_size, u64, arg1, 144 u64, arg2, u64, arg3) 145 { 146 bool str_seen = false; 147 int mod[3] = {}; 148 int fmt_cnt = 0; 149 u64 unsafe_addr; 150 char buf[64]; 151 int i; 152 153 /* 154 * bpf_check()->check_func_arg()->check_stack_boundary() 155 * guarantees that fmt points to bpf program stack, 156 * fmt_size bytes of it were initialized and fmt_size > 0 157 */ 158 if (fmt[--fmt_size] != 0) 159 return -EINVAL; 160 161 /* check format string for allowed specifiers */ 162 for (i = 0; i < fmt_size; i++) { 163 if ((!isprint(fmt[i]) && !isspace(fmt[i])) || !isascii(fmt[i])) 164 return -EINVAL; 165 166 if (fmt[i] != '%') 167 continue; 168 169 if (fmt_cnt >= 3) 170 return -EINVAL; 171 172 /* fmt[i] != 0 && fmt[last] == 0, so we can access fmt[i + 1] */ 173 i++; 174 if (fmt[i] == 'l') { 175 mod[fmt_cnt]++; 176 i++; 177 } else if (fmt[i] == 'p' || fmt[i] == 's') { 178 mod[fmt_cnt]++; 179 i++; 180 if (!isspace(fmt[i]) && !ispunct(fmt[i]) && fmt[i] != 0) 181 return -EINVAL; 182 fmt_cnt++; 183 if (fmt[i - 1] == 's') { 184 if (str_seen) 185 /* allow only one '%s' per fmt string */ 186 return -EINVAL; 187 str_seen = true; 188 189 switch (fmt_cnt) { 190 case 1: 191 unsafe_addr = arg1; 192 arg1 = (long) buf; 193 break; 194 case 2: 195 unsafe_addr = arg2; 196 arg2 = (long) buf; 197 break; 198 case 3: 199 unsafe_addr = arg3; 200 arg3 = (long) buf; 201 break; 202 } 203 buf[0] = 0; 204 strncpy_from_unsafe(buf, 205 (void *) (long) unsafe_addr, 206 sizeof(buf)); 207 } 208 continue; 209 } 210 211 if (fmt[i] == 'l') { 212 mod[fmt_cnt]++; 213 i++; 214 } 215 216 if (fmt[i] != 'i' && fmt[i] != 'd' && 217 fmt[i] != 'u' && fmt[i] != 'x') 218 return -EINVAL; 219 fmt_cnt++; 220 } 221 222 /* Horrid workaround for getting va_list handling working with different 223 * argument type combinations generically for 32 and 64 bit archs. 224 */ 225 #define __BPF_TP_EMIT() __BPF_ARG3_TP() 226 #define __BPF_TP(...) \ 227 __trace_printk(1 /* Fake ip will not be printed. */, \ 228 fmt, ##__VA_ARGS__) 229 230 #define __BPF_ARG1_TP(...) \ 231 ((mod[0] == 2 || (mod[0] == 1 && __BITS_PER_LONG == 64)) \ 232 ? __BPF_TP(arg1, ##__VA_ARGS__) \ 233 : ((mod[0] == 1 || (mod[0] == 0 && __BITS_PER_LONG == 32)) \ 234 ? __BPF_TP((long)arg1, ##__VA_ARGS__) \ 235 : __BPF_TP((u32)arg1, ##__VA_ARGS__))) 236 237 #define __BPF_ARG2_TP(...) \ 238 ((mod[1] == 2 || (mod[1] == 1 && __BITS_PER_LONG == 64)) \ 239 ? __BPF_ARG1_TP(arg2, ##__VA_ARGS__) \ 240 : ((mod[1] == 1 || (mod[1] == 0 && __BITS_PER_LONG == 32)) \ 241 ? __BPF_ARG1_TP((long)arg2, ##__VA_ARGS__) \ 242 : __BPF_ARG1_TP((u32)arg2, ##__VA_ARGS__))) 243 244 #define __BPF_ARG3_TP(...) \ 245 ((mod[2] == 2 || (mod[2] == 1 && __BITS_PER_LONG == 64)) \ 246 ? __BPF_ARG2_TP(arg3, ##__VA_ARGS__) \ 247 : ((mod[2] == 1 || (mod[2] == 0 && __BITS_PER_LONG == 32)) \ 248 ? __BPF_ARG2_TP((long)arg3, ##__VA_ARGS__) \ 249 : __BPF_ARG2_TP((u32)arg3, ##__VA_ARGS__))) 250 251 return __BPF_TP_EMIT(); 252 } 253 254 static const struct bpf_func_proto bpf_trace_printk_proto = { 255 .func = bpf_trace_printk, 256 .gpl_only = true, 257 .ret_type = RET_INTEGER, 258 .arg1_type = ARG_PTR_TO_MEM, 259 .arg2_type = ARG_CONST_SIZE, 260 }; 261 262 const struct bpf_func_proto *bpf_get_trace_printk_proto(void) 263 { 264 /* 265 * this program might be calling bpf_trace_printk, 266 * so allocate per-cpu printk buffers 267 */ 268 trace_printk_init_buffers(); 269 270 return &bpf_trace_printk_proto; 271 } 272 273 static __always_inline int 274 get_map_perf_counter(struct bpf_map *map, u64 flags, 275 u64 *value, u64 *enabled, u64 *running) 276 { 277 struct bpf_array *array = container_of(map, struct bpf_array, map); 278 unsigned int cpu = smp_processor_id(); 279 u64 index = flags & BPF_F_INDEX_MASK; 280 struct bpf_event_entry *ee; 281 282 if (unlikely(flags & ~(BPF_F_INDEX_MASK))) 283 return -EINVAL; 284 if (index == BPF_F_CURRENT_CPU) 285 index = cpu; 286 if (unlikely(index >= array->map.max_entries)) 287 return -E2BIG; 288 289 ee = READ_ONCE(array->ptrs[index]); 290 if (!ee) 291 return -ENOENT; 292 293 return perf_event_read_local(ee->event, value, enabled, running); 294 } 295 296 BPF_CALL_2(bpf_perf_event_read, struct bpf_map *, map, u64, flags) 297 { 298 u64 value = 0; 299 int err; 300 301 err = get_map_perf_counter(map, flags, &value, NULL, NULL); 302 /* 303 * this api is ugly since we miss [-22..-2] range of valid 304 * counter values, but that's uapi 305 */ 306 if (err) 307 return err; 308 return value; 309 } 310 311 static const struct bpf_func_proto bpf_perf_event_read_proto = { 312 .func = bpf_perf_event_read, 313 .gpl_only = true, 314 .ret_type = RET_INTEGER, 315 .arg1_type = ARG_CONST_MAP_PTR, 316 .arg2_type = ARG_ANYTHING, 317 }; 318 319 BPF_CALL_4(bpf_perf_event_read_value, struct bpf_map *, map, u64, flags, 320 struct bpf_perf_event_value *, buf, u32, size) 321 { 322 int err = -EINVAL; 323 324 if (unlikely(size != sizeof(struct bpf_perf_event_value))) 325 goto clear; 326 err = get_map_perf_counter(map, flags, &buf->counter, &buf->enabled, 327 &buf->running); 328 if (unlikely(err)) 329 goto clear; 330 return 0; 331 clear: 332 memset(buf, 0, size); 333 return err; 334 } 335 336 static const struct bpf_func_proto bpf_perf_event_read_value_proto = { 337 .func = bpf_perf_event_read_value, 338 .gpl_only = true, 339 .ret_type = RET_INTEGER, 340 .arg1_type = ARG_CONST_MAP_PTR, 341 .arg2_type = ARG_ANYTHING, 342 .arg3_type = ARG_PTR_TO_UNINIT_MEM, 343 .arg4_type = ARG_CONST_SIZE, 344 }; 345 346 static DEFINE_PER_CPU(struct perf_sample_data, bpf_trace_sd); 347 348 static __always_inline u64 349 __bpf_perf_event_output(struct pt_regs *regs, struct bpf_map *map, 350 u64 flags, struct perf_sample_data *sd) 351 { 352 struct bpf_array *array = container_of(map, struct bpf_array, map); 353 unsigned int cpu = smp_processor_id(); 354 u64 index = flags & BPF_F_INDEX_MASK; 355 struct bpf_event_entry *ee; 356 struct perf_event *event; 357 358 if (index == BPF_F_CURRENT_CPU) 359 index = cpu; 360 if (unlikely(index >= array->map.max_entries)) 361 return -E2BIG; 362 363 ee = READ_ONCE(array->ptrs[index]); 364 if (!ee) 365 return -ENOENT; 366 367 event = ee->event; 368 if (unlikely(event->attr.type != PERF_TYPE_SOFTWARE || 369 event->attr.config != PERF_COUNT_SW_BPF_OUTPUT)) 370 return -EINVAL; 371 372 if (unlikely(event->oncpu != cpu)) 373 return -EOPNOTSUPP; 374 375 perf_event_output(event, sd, regs); 376 return 0; 377 } 378 379 BPF_CALL_5(bpf_perf_event_output, struct pt_regs *, regs, struct bpf_map *, map, 380 u64, flags, void *, data, u64, size) 381 { 382 struct perf_sample_data *sd = this_cpu_ptr(&bpf_trace_sd); 383 struct perf_raw_record raw = { 384 .frag = { 385 .size = size, 386 .data = data, 387 }, 388 }; 389 390 if (unlikely(flags & ~(BPF_F_INDEX_MASK))) 391 return -EINVAL; 392 393 perf_sample_data_init(sd, 0, 0); 394 sd->raw = &raw; 395 396 return __bpf_perf_event_output(regs, map, flags, sd); 397 } 398 399 static const struct bpf_func_proto bpf_perf_event_output_proto = { 400 .func = bpf_perf_event_output, 401 .gpl_only = true, 402 .ret_type = RET_INTEGER, 403 .arg1_type = ARG_PTR_TO_CTX, 404 .arg2_type = ARG_CONST_MAP_PTR, 405 .arg3_type = ARG_ANYTHING, 406 .arg4_type = ARG_PTR_TO_MEM, 407 .arg5_type = ARG_CONST_SIZE_OR_ZERO, 408 }; 409 410 static DEFINE_PER_CPU(struct pt_regs, bpf_pt_regs); 411 static DEFINE_PER_CPU(struct perf_sample_data, bpf_misc_sd); 412 413 u64 bpf_event_output(struct bpf_map *map, u64 flags, void *meta, u64 meta_size, 414 void *ctx, u64 ctx_size, bpf_ctx_copy_t ctx_copy) 415 { 416 struct perf_sample_data *sd = this_cpu_ptr(&bpf_misc_sd); 417 struct pt_regs *regs = this_cpu_ptr(&bpf_pt_regs); 418 struct perf_raw_frag frag = { 419 .copy = ctx_copy, 420 .size = ctx_size, 421 .data = ctx, 422 }; 423 struct perf_raw_record raw = { 424 .frag = { 425 { 426 .next = ctx_size ? &frag : NULL, 427 }, 428 .size = meta_size, 429 .data = meta, 430 }, 431 }; 432 433 perf_fetch_caller_regs(regs); 434 perf_sample_data_init(sd, 0, 0); 435 sd->raw = &raw; 436 437 return __bpf_perf_event_output(regs, map, flags, sd); 438 } 439 440 BPF_CALL_0(bpf_get_current_task) 441 { 442 return (long) current; 443 } 444 445 static const struct bpf_func_proto bpf_get_current_task_proto = { 446 .func = bpf_get_current_task, 447 .gpl_only = true, 448 .ret_type = RET_INTEGER, 449 }; 450 451 BPF_CALL_2(bpf_current_task_under_cgroup, struct bpf_map *, map, u32, idx) 452 { 453 struct bpf_array *array = container_of(map, struct bpf_array, map); 454 struct cgroup *cgrp; 455 456 if (unlikely(in_interrupt())) 457 return -EINVAL; 458 if (unlikely(idx >= array->map.max_entries)) 459 return -E2BIG; 460 461 cgrp = READ_ONCE(array->ptrs[idx]); 462 if (unlikely(!cgrp)) 463 return -EAGAIN; 464 465 return task_under_cgroup_hierarchy(current, cgrp); 466 } 467 468 static const struct bpf_func_proto bpf_current_task_under_cgroup_proto = { 469 .func = bpf_current_task_under_cgroup, 470 .gpl_only = false, 471 .ret_type = RET_INTEGER, 472 .arg1_type = ARG_CONST_MAP_PTR, 473 .arg2_type = ARG_ANYTHING, 474 }; 475 476 BPF_CALL_3(bpf_probe_read_str, void *, dst, u32, size, 477 const void *, unsafe_ptr) 478 { 479 int ret; 480 481 /* 482 * The strncpy_from_unsafe() call will likely not fill the entire 483 * buffer, but that's okay in this circumstance as we're probing 484 * arbitrary memory anyway similar to bpf_probe_read() and might 485 * as well probe the stack. Thus, memory is explicitly cleared 486 * only in error case, so that improper users ignoring return 487 * code altogether don't copy garbage; otherwise length of string 488 * is returned that can be used for bpf_perf_event_output() et al. 489 */ 490 ret = strncpy_from_unsafe(dst, unsafe_ptr, size); 491 if (unlikely(ret < 0)) 492 memset(dst, 0, size); 493 494 return ret; 495 } 496 497 static const struct bpf_func_proto bpf_probe_read_str_proto = { 498 .func = bpf_probe_read_str, 499 .gpl_only = true, 500 .ret_type = RET_INTEGER, 501 .arg1_type = ARG_PTR_TO_UNINIT_MEM, 502 .arg2_type = ARG_CONST_SIZE_OR_ZERO, 503 .arg3_type = ARG_ANYTHING, 504 }; 505 506 static const struct bpf_func_proto *tracing_func_proto(enum bpf_func_id func_id) 507 { 508 switch (func_id) { 509 case BPF_FUNC_map_lookup_elem: 510 return &bpf_map_lookup_elem_proto; 511 case BPF_FUNC_map_update_elem: 512 return &bpf_map_update_elem_proto; 513 case BPF_FUNC_map_delete_elem: 514 return &bpf_map_delete_elem_proto; 515 case BPF_FUNC_probe_read: 516 return &bpf_probe_read_proto; 517 case BPF_FUNC_ktime_get_ns: 518 return &bpf_ktime_get_ns_proto; 519 case BPF_FUNC_tail_call: 520 return &bpf_tail_call_proto; 521 case BPF_FUNC_get_current_pid_tgid: 522 return &bpf_get_current_pid_tgid_proto; 523 case BPF_FUNC_get_current_task: 524 return &bpf_get_current_task_proto; 525 case BPF_FUNC_get_current_uid_gid: 526 return &bpf_get_current_uid_gid_proto; 527 case BPF_FUNC_get_current_comm: 528 return &bpf_get_current_comm_proto; 529 case BPF_FUNC_trace_printk: 530 return bpf_get_trace_printk_proto(); 531 case BPF_FUNC_get_smp_processor_id: 532 return &bpf_get_smp_processor_id_proto; 533 case BPF_FUNC_get_numa_node_id: 534 return &bpf_get_numa_node_id_proto; 535 case BPF_FUNC_perf_event_read: 536 return &bpf_perf_event_read_proto; 537 case BPF_FUNC_probe_write_user: 538 return bpf_get_probe_write_proto(); 539 case BPF_FUNC_current_task_under_cgroup: 540 return &bpf_current_task_under_cgroup_proto; 541 case BPF_FUNC_get_prandom_u32: 542 return &bpf_get_prandom_u32_proto; 543 case BPF_FUNC_probe_read_str: 544 return &bpf_probe_read_str_proto; 545 default: 546 return NULL; 547 } 548 } 549 550 static const struct bpf_func_proto *kprobe_prog_func_proto(enum bpf_func_id func_id) 551 { 552 switch (func_id) { 553 case BPF_FUNC_perf_event_output: 554 return &bpf_perf_event_output_proto; 555 case BPF_FUNC_get_stackid: 556 return &bpf_get_stackid_proto; 557 case BPF_FUNC_perf_event_read_value: 558 return &bpf_perf_event_read_value_proto; 559 default: 560 return tracing_func_proto(func_id); 561 } 562 } 563 564 /* bpf+kprobe programs can access fields of 'struct pt_regs' */ 565 static bool kprobe_prog_is_valid_access(int off, int size, enum bpf_access_type type, 566 struct bpf_insn_access_aux *info) 567 { 568 if (off < 0 || off >= sizeof(struct pt_regs)) 569 return false; 570 if (type != BPF_READ) 571 return false; 572 if (off % size != 0) 573 return false; 574 /* 575 * Assertion for 32 bit to make sure last 8 byte access 576 * (BPF_DW) to the last 4 byte member is disallowed. 577 */ 578 if (off + size > sizeof(struct pt_regs)) 579 return false; 580 581 return true; 582 } 583 584 const struct bpf_verifier_ops kprobe_verifier_ops = { 585 .get_func_proto = kprobe_prog_func_proto, 586 .is_valid_access = kprobe_prog_is_valid_access, 587 }; 588 589 const struct bpf_prog_ops kprobe_prog_ops = { 590 }; 591 592 BPF_CALL_5(bpf_perf_event_output_tp, void *, tp_buff, struct bpf_map *, map, 593 u64, flags, void *, data, u64, size) 594 { 595 struct pt_regs *regs = *(struct pt_regs **)tp_buff; 596 597 /* 598 * r1 points to perf tracepoint buffer where first 8 bytes are hidden 599 * from bpf program and contain a pointer to 'struct pt_regs'. Fetch it 600 * from there and call the same bpf_perf_event_output() helper inline. 601 */ 602 return ____bpf_perf_event_output(regs, map, flags, data, size); 603 } 604 605 static const struct bpf_func_proto bpf_perf_event_output_proto_tp = { 606 .func = bpf_perf_event_output_tp, 607 .gpl_only = true, 608 .ret_type = RET_INTEGER, 609 .arg1_type = ARG_PTR_TO_CTX, 610 .arg2_type = ARG_CONST_MAP_PTR, 611 .arg3_type = ARG_ANYTHING, 612 .arg4_type = ARG_PTR_TO_MEM, 613 .arg5_type = ARG_CONST_SIZE_OR_ZERO, 614 }; 615 616 BPF_CALL_3(bpf_get_stackid_tp, void *, tp_buff, struct bpf_map *, map, 617 u64, flags) 618 { 619 struct pt_regs *regs = *(struct pt_regs **)tp_buff; 620 621 /* 622 * Same comment as in bpf_perf_event_output_tp(), only that this time 623 * the other helper's function body cannot be inlined due to being 624 * external, thus we need to call raw helper function. 625 */ 626 return bpf_get_stackid((unsigned long) regs, (unsigned long) map, 627 flags, 0, 0); 628 } 629 630 static const struct bpf_func_proto bpf_get_stackid_proto_tp = { 631 .func = bpf_get_stackid_tp, 632 .gpl_only = true, 633 .ret_type = RET_INTEGER, 634 .arg1_type = ARG_PTR_TO_CTX, 635 .arg2_type = ARG_CONST_MAP_PTR, 636 .arg3_type = ARG_ANYTHING, 637 }; 638 639 BPF_CALL_3(bpf_perf_prog_read_value_tp, struct bpf_perf_event_data_kern *, ctx, 640 struct bpf_perf_event_value *, buf, u32, size) 641 { 642 int err = -EINVAL; 643 644 if (unlikely(size != sizeof(struct bpf_perf_event_value))) 645 goto clear; 646 err = perf_event_read_local(ctx->event, &buf->counter, &buf->enabled, 647 &buf->running); 648 if (unlikely(err)) 649 goto clear; 650 return 0; 651 clear: 652 memset(buf, 0, size); 653 return err; 654 } 655 656 static const struct bpf_func_proto bpf_perf_prog_read_value_proto_tp = { 657 .func = bpf_perf_prog_read_value_tp, 658 .gpl_only = true, 659 .ret_type = RET_INTEGER, 660 .arg1_type = ARG_PTR_TO_CTX, 661 .arg2_type = ARG_PTR_TO_UNINIT_MEM, 662 .arg3_type = ARG_CONST_SIZE, 663 }; 664 665 static const struct bpf_func_proto *tp_prog_func_proto(enum bpf_func_id func_id) 666 { 667 switch (func_id) { 668 case BPF_FUNC_perf_event_output: 669 return &bpf_perf_event_output_proto_tp; 670 case BPF_FUNC_get_stackid: 671 return &bpf_get_stackid_proto_tp; 672 case BPF_FUNC_perf_prog_read_value: 673 return &bpf_perf_prog_read_value_proto_tp; 674 default: 675 return tracing_func_proto(func_id); 676 } 677 } 678 679 static bool tp_prog_is_valid_access(int off, int size, enum bpf_access_type type, 680 struct bpf_insn_access_aux *info) 681 { 682 if (off < sizeof(void *) || off >= PERF_MAX_TRACE_SIZE) 683 return false; 684 if (type != BPF_READ) 685 return false; 686 if (off % size != 0) 687 return false; 688 689 BUILD_BUG_ON(PERF_MAX_TRACE_SIZE % sizeof(__u64)); 690 return true; 691 } 692 693 const struct bpf_verifier_ops tracepoint_verifier_ops = { 694 .get_func_proto = tp_prog_func_proto, 695 .is_valid_access = tp_prog_is_valid_access, 696 }; 697 698 const struct bpf_prog_ops tracepoint_prog_ops = { 699 }; 700 701 static bool pe_prog_is_valid_access(int off, int size, enum bpf_access_type type, 702 struct bpf_insn_access_aux *info) 703 { 704 const int size_sp = FIELD_SIZEOF(struct bpf_perf_event_data, 705 sample_period); 706 707 if (off < 0 || off >= sizeof(struct bpf_perf_event_data)) 708 return false; 709 if (type != BPF_READ) 710 return false; 711 if (off % size != 0) 712 return false; 713 714 switch (off) { 715 case bpf_ctx_range(struct bpf_perf_event_data, sample_period): 716 bpf_ctx_record_field_size(info, size_sp); 717 if (!bpf_ctx_narrow_access_ok(off, size, size_sp)) 718 return false; 719 break; 720 default: 721 if (size != sizeof(long)) 722 return false; 723 } 724 725 return true; 726 } 727 728 static u32 pe_prog_convert_ctx_access(enum bpf_access_type type, 729 const struct bpf_insn *si, 730 struct bpf_insn *insn_buf, 731 struct bpf_prog *prog, u32 *target_size) 732 { 733 struct bpf_insn *insn = insn_buf; 734 735 switch (si->off) { 736 case offsetof(struct bpf_perf_event_data, sample_period): 737 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct bpf_perf_event_data_kern, 738 data), si->dst_reg, si->src_reg, 739 offsetof(struct bpf_perf_event_data_kern, data)); 740 *insn++ = BPF_LDX_MEM(BPF_DW, si->dst_reg, si->dst_reg, 741 bpf_target_off(struct perf_sample_data, period, 8, 742 target_size)); 743 break; 744 default: 745 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct bpf_perf_event_data_kern, 746 regs), si->dst_reg, si->src_reg, 747 offsetof(struct bpf_perf_event_data_kern, regs)); 748 *insn++ = BPF_LDX_MEM(BPF_SIZEOF(long), si->dst_reg, si->dst_reg, 749 si->off); 750 break; 751 } 752 753 return insn - insn_buf; 754 } 755 756 const struct bpf_verifier_ops perf_event_verifier_ops = { 757 .get_func_proto = tp_prog_func_proto, 758 .is_valid_access = pe_prog_is_valid_access, 759 .convert_ctx_access = pe_prog_convert_ctx_access, 760 }; 761 762 const struct bpf_prog_ops perf_event_prog_ops = { 763 }; 764 765 static DEFINE_MUTEX(bpf_event_mutex); 766 767 #define BPF_TRACE_MAX_PROGS 64 768 769 int perf_event_attach_bpf_prog(struct perf_event *event, 770 struct bpf_prog *prog) 771 { 772 struct bpf_prog_array __rcu *old_array; 773 struct bpf_prog_array *new_array; 774 int ret = -EEXIST; 775 776 mutex_lock(&bpf_event_mutex); 777 778 if (event->prog) 779 goto unlock; 780 781 old_array = event->tp_event->prog_array; 782 if (old_array && 783 bpf_prog_array_length(old_array) >= BPF_TRACE_MAX_PROGS) { 784 ret = -E2BIG; 785 goto unlock; 786 } 787 788 ret = bpf_prog_array_copy(old_array, NULL, prog, &new_array); 789 if (ret < 0) 790 goto unlock; 791 792 /* set the new array to event->tp_event and set event->prog */ 793 event->prog = prog; 794 rcu_assign_pointer(event->tp_event->prog_array, new_array); 795 bpf_prog_array_free(old_array); 796 797 unlock: 798 mutex_unlock(&bpf_event_mutex); 799 return ret; 800 } 801 802 void perf_event_detach_bpf_prog(struct perf_event *event) 803 { 804 struct bpf_prog_array __rcu *old_array; 805 struct bpf_prog_array *new_array; 806 int ret; 807 808 mutex_lock(&bpf_event_mutex); 809 810 if (!event->prog) 811 goto unlock; 812 813 old_array = event->tp_event->prog_array; 814 ret = bpf_prog_array_copy(old_array, event->prog, NULL, &new_array); 815 if (ret < 0) { 816 bpf_prog_array_delete_safe(old_array, event->prog); 817 } else { 818 rcu_assign_pointer(event->tp_event->prog_array, new_array); 819 bpf_prog_array_free(old_array); 820 } 821 822 bpf_prog_put(event->prog); 823 event->prog = NULL; 824 825 unlock: 826 mutex_unlock(&bpf_event_mutex); 827 } 828