1 // SPDX-License-Identifier: GPL-2.0-only 2 /* Copyright (c) 2016 Facebook 3 */ 4 #include <linux/bpf.h> 5 #include <linux/jhash.h> 6 #include <linux/filter.h> 7 #include <linux/kernel.h> 8 #include <linux/stacktrace.h> 9 #include <linux/perf_event.h> 10 #include <linux/irq_work.h> 11 #include <linux/btf_ids.h> 12 #include <linux/buildid.h> 13 #include "percpu_freelist.h" 14 15 #define STACK_CREATE_FLAG_MASK \ 16 (BPF_F_NUMA_NODE | BPF_F_RDONLY | BPF_F_WRONLY | \ 17 BPF_F_STACK_BUILD_ID) 18 19 struct stack_map_bucket { 20 struct pcpu_freelist_node fnode; 21 u32 hash; 22 u32 nr; 23 u64 data[]; 24 }; 25 26 struct bpf_stack_map { 27 struct bpf_map map; 28 void *elems; 29 struct pcpu_freelist freelist; 30 u32 n_buckets; 31 struct stack_map_bucket *buckets[]; 32 }; 33 34 /* irq_work to run up_read() for build_id lookup in nmi context */ 35 struct stack_map_irq_work { 36 struct irq_work irq_work; 37 struct mm_struct *mm; 38 }; 39 40 static void do_up_read(struct irq_work *entry) 41 { 42 struct stack_map_irq_work *work; 43 44 if (WARN_ON_ONCE(IS_ENABLED(CONFIG_PREEMPT_RT))) 45 return; 46 47 work = container_of(entry, struct stack_map_irq_work, irq_work); 48 mmap_read_unlock_non_owner(work->mm); 49 } 50 51 static DEFINE_PER_CPU(struct stack_map_irq_work, up_read_work); 52 53 static inline bool stack_map_use_build_id(struct bpf_map *map) 54 { 55 return (map->map_flags & BPF_F_STACK_BUILD_ID); 56 } 57 58 static inline int stack_map_data_size(struct bpf_map *map) 59 { 60 return stack_map_use_build_id(map) ? 61 sizeof(struct bpf_stack_build_id) : sizeof(u64); 62 } 63 64 static int prealloc_elems_and_freelist(struct bpf_stack_map *smap) 65 { 66 u32 elem_size = sizeof(struct stack_map_bucket) + smap->map.value_size; 67 int err; 68 69 smap->elems = bpf_map_area_alloc(elem_size * smap->map.max_entries, 70 smap->map.numa_node); 71 if (!smap->elems) 72 return -ENOMEM; 73 74 err = pcpu_freelist_init(&smap->freelist); 75 if (err) 76 goto free_elems; 77 78 pcpu_freelist_populate(&smap->freelist, smap->elems, elem_size, 79 smap->map.max_entries); 80 return 0; 81 82 free_elems: 83 bpf_map_area_free(smap->elems); 84 return err; 85 } 86 87 /* Called from syscall */ 88 static struct bpf_map *stack_map_alloc(union bpf_attr *attr) 89 { 90 u32 value_size = attr->value_size; 91 struct bpf_stack_map *smap; 92 u64 cost, n_buckets; 93 int err; 94 95 if (!bpf_capable()) 96 return ERR_PTR(-EPERM); 97 98 if (attr->map_flags & ~STACK_CREATE_FLAG_MASK) 99 return ERR_PTR(-EINVAL); 100 101 /* check sanity of attributes */ 102 if (attr->max_entries == 0 || attr->key_size != 4 || 103 value_size < 8 || value_size % 8) 104 return ERR_PTR(-EINVAL); 105 106 BUILD_BUG_ON(sizeof(struct bpf_stack_build_id) % sizeof(u64)); 107 if (attr->map_flags & BPF_F_STACK_BUILD_ID) { 108 if (value_size % sizeof(struct bpf_stack_build_id) || 109 value_size / sizeof(struct bpf_stack_build_id) 110 > sysctl_perf_event_max_stack) 111 return ERR_PTR(-EINVAL); 112 } else if (value_size / 8 > sysctl_perf_event_max_stack) 113 return ERR_PTR(-EINVAL); 114 115 /* hash table size must be power of 2 */ 116 n_buckets = roundup_pow_of_two(attr->max_entries); 117 if (!n_buckets) 118 return ERR_PTR(-E2BIG); 119 120 cost = n_buckets * sizeof(struct stack_map_bucket *) + sizeof(*smap); 121 cost += n_buckets * (value_size + sizeof(struct stack_map_bucket)); 122 smap = bpf_map_area_alloc(cost, bpf_map_attr_numa_node(attr)); 123 if (!smap) 124 return ERR_PTR(-ENOMEM); 125 126 bpf_map_init_from_attr(&smap->map, attr); 127 smap->map.value_size = value_size; 128 smap->n_buckets = n_buckets; 129 130 err = get_callchain_buffers(sysctl_perf_event_max_stack); 131 if (err) 132 goto free_smap; 133 134 err = prealloc_elems_and_freelist(smap); 135 if (err) 136 goto put_buffers; 137 138 return &smap->map; 139 140 put_buffers: 141 put_callchain_buffers(); 142 free_smap: 143 bpf_map_area_free(smap); 144 return ERR_PTR(err); 145 } 146 147 static void stack_map_get_build_id_offset(struct bpf_stack_build_id *id_offs, 148 u64 *ips, u32 trace_nr, bool user) 149 { 150 int i; 151 struct vm_area_struct *vma; 152 bool irq_work_busy = false; 153 struct stack_map_irq_work *work = NULL; 154 155 if (irqs_disabled()) { 156 if (!IS_ENABLED(CONFIG_PREEMPT_RT)) { 157 work = this_cpu_ptr(&up_read_work); 158 if (irq_work_is_busy(&work->irq_work)) { 159 /* cannot queue more up_read, fallback */ 160 irq_work_busy = true; 161 } 162 } else { 163 /* 164 * PREEMPT_RT does not allow to trylock mmap sem in 165 * interrupt disabled context. Force the fallback code. 166 */ 167 irq_work_busy = true; 168 } 169 } 170 171 /* 172 * We cannot do up_read() when the irq is disabled, because of 173 * risk to deadlock with rq_lock. To do build_id lookup when the 174 * irqs are disabled, we need to run up_read() in irq_work. We use 175 * a percpu variable to do the irq_work. If the irq_work is 176 * already used by another lookup, we fall back to report ips. 177 * 178 * Same fallback is used for kernel stack (!user) on a stackmap 179 * with build_id. 180 */ 181 if (!user || !current || !current->mm || irq_work_busy || 182 !mmap_read_trylock_non_owner(current->mm)) { 183 /* cannot access current->mm, fall back to ips */ 184 for (i = 0; i < trace_nr; i++) { 185 id_offs[i].status = BPF_STACK_BUILD_ID_IP; 186 id_offs[i].ip = ips[i]; 187 memset(id_offs[i].build_id, 0, BUILD_ID_SIZE_MAX); 188 } 189 return; 190 } 191 192 for (i = 0; i < trace_nr; i++) { 193 vma = find_vma(current->mm, ips[i]); 194 if (!vma || build_id_parse(vma, id_offs[i].build_id, NULL)) { 195 /* per entry fall back to ips */ 196 id_offs[i].status = BPF_STACK_BUILD_ID_IP; 197 id_offs[i].ip = ips[i]; 198 memset(id_offs[i].build_id, 0, BUILD_ID_SIZE_MAX); 199 continue; 200 } 201 id_offs[i].offset = (vma->vm_pgoff << PAGE_SHIFT) + ips[i] 202 - vma->vm_start; 203 id_offs[i].status = BPF_STACK_BUILD_ID_VALID; 204 } 205 206 if (!work) { 207 mmap_read_unlock_non_owner(current->mm); 208 } else { 209 work->mm = current->mm; 210 irq_work_queue(&work->irq_work); 211 } 212 } 213 214 static struct perf_callchain_entry * 215 get_callchain_entry_for_task(struct task_struct *task, u32 init_nr) 216 { 217 #ifdef CONFIG_STACKTRACE 218 struct perf_callchain_entry *entry; 219 int rctx; 220 221 entry = get_callchain_entry(&rctx); 222 223 if (!entry) 224 return NULL; 225 226 entry->nr = init_nr + 227 stack_trace_save_tsk(task, (unsigned long *)(entry->ip + init_nr), 228 sysctl_perf_event_max_stack - init_nr, 0); 229 230 /* stack_trace_save_tsk() works on unsigned long array, while 231 * perf_callchain_entry uses u64 array. For 32-bit systems, it is 232 * necessary to fix this mismatch. 233 */ 234 if (__BITS_PER_LONG != 64) { 235 unsigned long *from = (unsigned long *) entry->ip; 236 u64 *to = entry->ip; 237 int i; 238 239 /* copy data from the end to avoid using extra buffer */ 240 for (i = entry->nr - 1; i >= (int)init_nr; i--) 241 to[i] = (u64)(from[i]); 242 } 243 244 put_callchain_entry(rctx); 245 246 return entry; 247 #else /* CONFIG_STACKTRACE */ 248 return NULL; 249 #endif 250 } 251 252 static long __bpf_get_stackid(struct bpf_map *map, 253 struct perf_callchain_entry *trace, u64 flags) 254 { 255 struct bpf_stack_map *smap = container_of(map, struct bpf_stack_map, map); 256 struct stack_map_bucket *bucket, *new_bucket, *old_bucket; 257 u32 max_depth = map->value_size / stack_map_data_size(map); 258 /* stack_map_alloc() checks that max_depth <= sysctl_perf_event_max_stack */ 259 u32 init_nr = sysctl_perf_event_max_stack - max_depth; 260 u32 skip = flags & BPF_F_SKIP_FIELD_MASK; 261 u32 hash, id, trace_nr, trace_len; 262 bool user = flags & BPF_F_USER_STACK; 263 u64 *ips; 264 bool hash_matches; 265 266 /* get_perf_callchain() guarantees that trace->nr >= init_nr 267 * and trace-nr <= sysctl_perf_event_max_stack, so trace_nr <= max_depth 268 */ 269 trace_nr = trace->nr - init_nr; 270 271 if (trace_nr <= skip) 272 /* skipping more than usable stack trace */ 273 return -EFAULT; 274 275 trace_nr -= skip; 276 trace_len = trace_nr * sizeof(u64); 277 ips = trace->ip + skip + init_nr; 278 hash = jhash2((u32 *)ips, trace_len / sizeof(u32), 0); 279 id = hash & (smap->n_buckets - 1); 280 bucket = READ_ONCE(smap->buckets[id]); 281 282 hash_matches = bucket && bucket->hash == hash; 283 /* fast cmp */ 284 if (hash_matches && flags & BPF_F_FAST_STACK_CMP) 285 return id; 286 287 if (stack_map_use_build_id(map)) { 288 /* for build_id+offset, pop a bucket before slow cmp */ 289 new_bucket = (struct stack_map_bucket *) 290 pcpu_freelist_pop(&smap->freelist); 291 if (unlikely(!new_bucket)) 292 return -ENOMEM; 293 new_bucket->nr = trace_nr; 294 stack_map_get_build_id_offset( 295 (struct bpf_stack_build_id *)new_bucket->data, 296 ips, trace_nr, user); 297 trace_len = trace_nr * sizeof(struct bpf_stack_build_id); 298 if (hash_matches && bucket->nr == trace_nr && 299 memcmp(bucket->data, new_bucket->data, trace_len) == 0) { 300 pcpu_freelist_push(&smap->freelist, &new_bucket->fnode); 301 return id; 302 } 303 if (bucket && !(flags & BPF_F_REUSE_STACKID)) { 304 pcpu_freelist_push(&smap->freelist, &new_bucket->fnode); 305 return -EEXIST; 306 } 307 } else { 308 if (hash_matches && bucket->nr == trace_nr && 309 memcmp(bucket->data, ips, trace_len) == 0) 310 return id; 311 if (bucket && !(flags & BPF_F_REUSE_STACKID)) 312 return -EEXIST; 313 314 new_bucket = (struct stack_map_bucket *) 315 pcpu_freelist_pop(&smap->freelist); 316 if (unlikely(!new_bucket)) 317 return -ENOMEM; 318 memcpy(new_bucket->data, ips, trace_len); 319 } 320 321 new_bucket->hash = hash; 322 new_bucket->nr = trace_nr; 323 324 old_bucket = xchg(&smap->buckets[id], new_bucket); 325 if (old_bucket) 326 pcpu_freelist_push(&smap->freelist, &old_bucket->fnode); 327 return id; 328 } 329 330 BPF_CALL_3(bpf_get_stackid, struct pt_regs *, regs, struct bpf_map *, map, 331 u64, flags) 332 { 333 u32 max_depth = map->value_size / stack_map_data_size(map); 334 /* stack_map_alloc() checks that max_depth <= sysctl_perf_event_max_stack */ 335 u32 init_nr = sysctl_perf_event_max_stack - max_depth; 336 bool user = flags & BPF_F_USER_STACK; 337 struct perf_callchain_entry *trace; 338 bool kernel = !user; 339 340 if (unlikely(flags & ~(BPF_F_SKIP_FIELD_MASK | BPF_F_USER_STACK | 341 BPF_F_FAST_STACK_CMP | BPF_F_REUSE_STACKID))) 342 return -EINVAL; 343 344 trace = get_perf_callchain(regs, init_nr, kernel, user, 345 sysctl_perf_event_max_stack, false, false); 346 347 if (unlikely(!trace)) 348 /* couldn't fetch the stack trace */ 349 return -EFAULT; 350 351 return __bpf_get_stackid(map, trace, flags); 352 } 353 354 const struct bpf_func_proto bpf_get_stackid_proto = { 355 .func = bpf_get_stackid, 356 .gpl_only = true, 357 .ret_type = RET_INTEGER, 358 .arg1_type = ARG_PTR_TO_CTX, 359 .arg2_type = ARG_CONST_MAP_PTR, 360 .arg3_type = ARG_ANYTHING, 361 }; 362 363 static __u64 count_kernel_ip(struct perf_callchain_entry *trace) 364 { 365 __u64 nr_kernel = 0; 366 367 while (nr_kernel < trace->nr) { 368 if (trace->ip[nr_kernel] == PERF_CONTEXT_USER) 369 break; 370 nr_kernel++; 371 } 372 return nr_kernel; 373 } 374 375 BPF_CALL_3(bpf_get_stackid_pe, struct bpf_perf_event_data_kern *, ctx, 376 struct bpf_map *, map, u64, flags) 377 { 378 struct perf_event *event = ctx->event; 379 struct perf_callchain_entry *trace; 380 bool kernel, user; 381 __u64 nr_kernel; 382 int ret; 383 384 /* perf_sample_data doesn't have callchain, use bpf_get_stackid */ 385 if (!(event->attr.sample_type & __PERF_SAMPLE_CALLCHAIN_EARLY)) 386 return bpf_get_stackid((unsigned long)(ctx->regs), 387 (unsigned long) map, flags, 0, 0); 388 389 if (unlikely(flags & ~(BPF_F_SKIP_FIELD_MASK | BPF_F_USER_STACK | 390 BPF_F_FAST_STACK_CMP | BPF_F_REUSE_STACKID))) 391 return -EINVAL; 392 393 user = flags & BPF_F_USER_STACK; 394 kernel = !user; 395 396 trace = ctx->data->callchain; 397 if (unlikely(!trace)) 398 return -EFAULT; 399 400 nr_kernel = count_kernel_ip(trace); 401 402 if (kernel) { 403 __u64 nr = trace->nr; 404 405 trace->nr = nr_kernel; 406 ret = __bpf_get_stackid(map, trace, flags); 407 408 /* restore nr */ 409 trace->nr = nr; 410 } else { /* user */ 411 u64 skip = flags & BPF_F_SKIP_FIELD_MASK; 412 413 skip += nr_kernel; 414 if (skip > BPF_F_SKIP_FIELD_MASK) 415 return -EFAULT; 416 417 flags = (flags & ~BPF_F_SKIP_FIELD_MASK) | skip; 418 ret = __bpf_get_stackid(map, trace, flags); 419 } 420 return ret; 421 } 422 423 const struct bpf_func_proto bpf_get_stackid_proto_pe = { 424 .func = bpf_get_stackid_pe, 425 .gpl_only = false, 426 .ret_type = RET_INTEGER, 427 .arg1_type = ARG_PTR_TO_CTX, 428 .arg2_type = ARG_CONST_MAP_PTR, 429 .arg3_type = ARG_ANYTHING, 430 }; 431 432 static long __bpf_get_stack(struct pt_regs *regs, struct task_struct *task, 433 struct perf_callchain_entry *trace_in, 434 void *buf, u32 size, u64 flags) 435 { 436 u32 init_nr, trace_nr, copy_len, elem_size, num_elem; 437 bool user_build_id = flags & BPF_F_USER_BUILD_ID; 438 u32 skip = flags & BPF_F_SKIP_FIELD_MASK; 439 bool user = flags & BPF_F_USER_STACK; 440 struct perf_callchain_entry *trace; 441 bool kernel = !user; 442 int err = -EINVAL; 443 u64 *ips; 444 445 if (unlikely(flags & ~(BPF_F_SKIP_FIELD_MASK | BPF_F_USER_STACK | 446 BPF_F_USER_BUILD_ID))) 447 goto clear; 448 if (kernel && user_build_id) 449 goto clear; 450 451 elem_size = (user && user_build_id) ? sizeof(struct bpf_stack_build_id) 452 : sizeof(u64); 453 if (unlikely(size % elem_size)) 454 goto clear; 455 456 /* cannot get valid user stack for task without user_mode regs */ 457 if (task && user && !user_mode(regs)) 458 goto err_fault; 459 460 num_elem = size / elem_size; 461 if (sysctl_perf_event_max_stack < num_elem) 462 init_nr = 0; 463 else 464 init_nr = sysctl_perf_event_max_stack - num_elem; 465 466 if (trace_in) 467 trace = trace_in; 468 else if (kernel && task) 469 trace = get_callchain_entry_for_task(task, init_nr); 470 else 471 trace = get_perf_callchain(regs, init_nr, kernel, user, 472 sysctl_perf_event_max_stack, 473 false, false); 474 if (unlikely(!trace)) 475 goto err_fault; 476 477 trace_nr = trace->nr - init_nr; 478 if (trace_nr < skip) 479 goto err_fault; 480 481 trace_nr -= skip; 482 trace_nr = (trace_nr <= num_elem) ? trace_nr : num_elem; 483 copy_len = trace_nr * elem_size; 484 ips = trace->ip + skip + init_nr; 485 if (user && user_build_id) 486 stack_map_get_build_id_offset(buf, ips, trace_nr, user); 487 else 488 memcpy(buf, ips, copy_len); 489 490 if (size > copy_len) 491 memset(buf + copy_len, 0, size - copy_len); 492 return copy_len; 493 494 err_fault: 495 err = -EFAULT; 496 clear: 497 memset(buf, 0, size); 498 return err; 499 } 500 501 BPF_CALL_4(bpf_get_stack, struct pt_regs *, regs, void *, buf, u32, size, 502 u64, flags) 503 { 504 return __bpf_get_stack(regs, NULL, NULL, buf, size, flags); 505 } 506 507 const struct bpf_func_proto bpf_get_stack_proto = { 508 .func = bpf_get_stack, 509 .gpl_only = true, 510 .ret_type = RET_INTEGER, 511 .arg1_type = ARG_PTR_TO_CTX, 512 .arg2_type = ARG_PTR_TO_UNINIT_MEM, 513 .arg3_type = ARG_CONST_SIZE_OR_ZERO, 514 .arg4_type = ARG_ANYTHING, 515 }; 516 517 BPF_CALL_4(bpf_get_task_stack, struct task_struct *, task, void *, buf, 518 u32, size, u64, flags) 519 { 520 struct pt_regs *regs; 521 long res; 522 523 if (!try_get_task_stack(task)) 524 return -EFAULT; 525 526 regs = task_pt_regs(task); 527 res = __bpf_get_stack(regs, task, NULL, buf, size, flags); 528 put_task_stack(task); 529 530 return res; 531 } 532 533 BTF_ID_LIST_SINGLE(bpf_get_task_stack_btf_ids, struct, task_struct) 534 535 const struct bpf_func_proto bpf_get_task_stack_proto = { 536 .func = bpf_get_task_stack, 537 .gpl_only = false, 538 .ret_type = RET_INTEGER, 539 .arg1_type = ARG_PTR_TO_BTF_ID, 540 .arg1_btf_id = &bpf_get_task_stack_btf_ids[0], 541 .arg2_type = ARG_PTR_TO_UNINIT_MEM, 542 .arg3_type = ARG_CONST_SIZE_OR_ZERO, 543 .arg4_type = ARG_ANYTHING, 544 }; 545 546 BPF_CALL_4(bpf_get_stack_pe, struct bpf_perf_event_data_kern *, ctx, 547 void *, buf, u32, size, u64, flags) 548 { 549 struct pt_regs *regs = (struct pt_regs *)(ctx->regs); 550 struct perf_event *event = ctx->event; 551 struct perf_callchain_entry *trace; 552 bool kernel, user; 553 int err = -EINVAL; 554 __u64 nr_kernel; 555 556 if (!(event->attr.sample_type & __PERF_SAMPLE_CALLCHAIN_EARLY)) 557 return __bpf_get_stack(regs, NULL, NULL, buf, size, flags); 558 559 if (unlikely(flags & ~(BPF_F_SKIP_FIELD_MASK | BPF_F_USER_STACK | 560 BPF_F_USER_BUILD_ID))) 561 goto clear; 562 563 user = flags & BPF_F_USER_STACK; 564 kernel = !user; 565 566 err = -EFAULT; 567 trace = ctx->data->callchain; 568 if (unlikely(!trace)) 569 goto clear; 570 571 nr_kernel = count_kernel_ip(trace); 572 573 if (kernel) { 574 __u64 nr = trace->nr; 575 576 trace->nr = nr_kernel; 577 err = __bpf_get_stack(regs, NULL, trace, buf, size, flags); 578 579 /* restore nr */ 580 trace->nr = nr; 581 } else { /* user */ 582 u64 skip = flags & BPF_F_SKIP_FIELD_MASK; 583 584 skip += nr_kernel; 585 if (skip > BPF_F_SKIP_FIELD_MASK) 586 goto clear; 587 588 flags = (flags & ~BPF_F_SKIP_FIELD_MASK) | skip; 589 err = __bpf_get_stack(regs, NULL, trace, buf, size, flags); 590 } 591 return err; 592 593 clear: 594 memset(buf, 0, size); 595 return err; 596 597 } 598 599 const struct bpf_func_proto bpf_get_stack_proto_pe = { 600 .func = bpf_get_stack_pe, 601 .gpl_only = true, 602 .ret_type = RET_INTEGER, 603 .arg1_type = ARG_PTR_TO_CTX, 604 .arg2_type = ARG_PTR_TO_UNINIT_MEM, 605 .arg3_type = ARG_CONST_SIZE_OR_ZERO, 606 .arg4_type = ARG_ANYTHING, 607 }; 608 609 /* Called from eBPF program */ 610 static void *stack_map_lookup_elem(struct bpf_map *map, void *key) 611 { 612 return ERR_PTR(-EOPNOTSUPP); 613 } 614 615 /* Called from syscall */ 616 int bpf_stackmap_copy(struct bpf_map *map, void *key, void *value) 617 { 618 struct bpf_stack_map *smap = container_of(map, struct bpf_stack_map, map); 619 struct stack_map_bucket *bucket, *old_bucket; 620 u32 id = *(u32 *)key, trace_len; 621 622 if (unlikely(id >= smap->n_buckets)) 623 return -ENOENT; 624 625 bucket = xchg(&smap->buckets[id], NULL); 626 if (!bucket) 627 return -ENOENT; 628 629 trace_len = bucket->nr * stack_map_data_size(map); 630 memcpy(value, bucket->data, trace_len); 631 memset(value + trace_len, 0, map->value_size - trace_len); 632 633 old_bucket = xchg(&smap->buckets[id], bucket); 634 if (old_bucket) 635 pcpu_freelist_push(&smap->freelist, &old_bucket->fnode); 636 return 0; 637 } 638 639 static int stack_map_get_next_key(struct bpf_map *map, void *key, 640 void *next_key) 641 { 642 struct bpf_stack_map *smap = container_of(map, 643 struct bpf_stack_map, map); 644 u32 id; 645 646 WARN_ON_ONCE(!rcu_read_lock_held()); 647 648 if (!key) { 649 id = 0; 650 } else { 651 id = *(u32 *)key; 652 if (id >= smap->n_buckets || !smap->buckets[id]) 653 id = 0; 654 else 655 id++; 656 } 657 658 while (id < smap->n_buckets && !smap->buckets[id]) 659 id++; 660 661 if (id >= smap->n_buckets) 662 return -ENOENT; 663 664 *(u32 *)next_key = id; 665 return 0; 666 } 667 668 static int stack_map_update_elem(struct bpf_map *map, void *key, void *value, 669 u64 map_flags) 670 { 671 return -EINVAL; 672 } 673 674 /* Called from syscall or from eBPF program */ 675 static int stack_map_delete_elem(struct bpf_map *map, void *key) 676 { 677 struct bpf_stack_map *smap = container_of(map, struct bpf_stack_map, map); 678 struct stack_map_bucket *old_bucket; 679 u32 id = *(u32 *)key; 680 681 if (unlikely(id >= smap->n_buckets)) 682 return -E2BIG; 683 684 old_bucket = xchg(&smap->buckets[id], NULL); 685 if (old_bucket) { 686 pcpu_freelist_push(&smap->freelist, &old_bucket->fnode); 687 return 0; 688 } else { 689 return -ENOENT; 690 } 691 } 692 693 /* Called when map->refcnt goes to zero, either from workqueue or from syscall */ 694 static void stack_map_free(struct bpf_map *map) 695 { 696 struct bpf_stack_map *smap = container_of(map, struct bpf_stack_map, map); 697 698 bpf_map_area_free(smap->elems); 699 pcpu_freelist_destroy(&smap->freelist); 700 bpf_map_area_free(smap); 701 put_callchain_buffers(); 702 } 703 704 static int stack_trace_map_btf_id; 705 const struct bpf_map_ops stack_trace_map_ops = { 706 .map_meta_equal = bpf_map_meta_equal, 707 .map_alloc = stack_map_alloc, 708 .map_free = stack_map_free, 709 .map_get_next_key = stack_map_get_next_key, 710 .map_lookup_elem = stack_map_lookup_elem, 711 .map_update_elem = stack_map_update_elem, 712 .map_delete_elem = stack_map_delete_elem, 713 .map_check_btf = map_check_no_btf, 714 .map_btf_name = "bpf_stack_map", 715 .map_btf_id = &stack_trace_map_btf_id, 716 }; 717 718 static int __init stack_map_init(void) 719 { 720 int cpu; 721 struct stack_map_irq_work *work; 722 723 for_each_possible_cpu(cpu) { 724 work = per_cpu_ptr(&up_read_work, cpu); 725 init_irq_work(&work->irq_work, do_up_read); 726 } 727 return 0; 728 } 729 subsys_initcall(stack_map_init); 730