1 // SPDX-License-Identifier: GPL-2.0-only 2 /* Copyright (c) 2016 Facebook 3 */ 4 #include <linux/bpf.h> 5 #include <linux/jhash.h> 6 #include <linux/filter.h> 7 #include <linux/kernel.h> 8 #include <linux/stacktrace.h> 9 #include <linux/perf_event.h> 10 #include <linux/irq_work.h> 11 #include <linux/btf_ids.h> 12 #include <linux/buildid.h> 13 #include "percpu_freelist.h" 14 15 #define STACK_CREATE_FLAG_MASK \ 16 (BPF_F_NUMA_NODE | BPF_F_RDONLY | BPF_F_WRONLY | \ 17 BPF_F_STACK_BUILD_ID) 18 19 struct stack_map_bucket { 20 struct pcpu_freelist_node fnode; 21 u32 hash; 22 u32 nr; 23 u64 data[]; 24 }; 25 26 struct bpf_stack_map { 27 struct bpf_map map; 28 void *elems; 29 struct pcpu_freelist freelist; 30 u32 n_buckets; 31 struct stack_map_bucket *buckets[]; 32 }; 33 34 /* irq_work to run up_read() for build_id lookup in nmi context */ 35 struct stack_map_irq_work { 36 struct irq_work irq_work; 37 struct mm_struct *mm; 38 }; 39 40 static void do_up_read(struct irq_work *entry) 41 { 42 struct stack_map_irq_work *work; 43 44 if (WARN_ON_ONCE(IS_ENABLED(CONFIG_PREEMPT_RT))) 45 return; 46 47 work = container_of(entry, struct stack_map_irq_work, irq_work); 48 mmap_read_unlock_non_owner(work->mm); 49 } 50 51 static DEFINE_PER_CPU(struct stack_map_irq_work, up_read_work); 52 53 static inline bool stack_map_use_build_id(struct bpf_map *map) 54 { 55 return (map->map_flags & BPF_F_STACK_BUILD_ID); 56 } 57 58 static inline int stack_map_data_size(struct bpf_map *map) 59 { 60 return stack_map_use_build_id(map) ? 61 sizeof(struct bpf_stack_build_id) : sizeof(u64); 62 } 63 64 static int prealloc_elems_and_freelist(struct bpf_stack_map *smap) 65 { 66 u32 elem_size = sizeof(struct stack_map_bucket) + smap->map.value_size; 67 int err; 68 69 smap->elems = bpf_map_area_alloc(elem_size * smap->map.max_entries, 70 smap->map.numa_node); 71 if (!smap->elems) 72 return -ENOMEM; 73 74 err = pcpu_freelist_init(&smap->freelist); 75 if (err) 76 goto free_elems; 77 78 pcpu_freelist_populate(&smap->freelist, smap->elems, elem_size, 79 smap->map.max_entries); 80 return 0; 81 82 free_elems: 83 bpf_map_area_free(smap->elems); 84 return err; 85 } 86 87 /* Called from syscall */ 88 static struct bpf_map *stack_map_alloc(union bpf_attr *attr) 89 { 90 u32 value_size = attr->value_size; 91 struct bpf_stack_map *smap; 92 u64 cost, n_buckets; 93 int err; 94 95 if (!bpf_capable()) 96 return ERR_PTR(-EPERM); 97 98 if (attr->map_flags & ~STACK_CREATE_FLAG_MASK) 99 return ERR_PTR(-EINVAL); 100 101 /* check sanity of attributes */ 102 if (attr->max_entries == 0 || attr->key_size != 4 || 103 value_size < 8 || value_size % 8) 104 return ERR_PTR(-EINVAL); 105 106 BUILD_BUG_ON(sizeof(struct bpf_stack_build_id) % sizeof(u64)); 107 if (attr->map_flags & BPF_F_STACK_BUILD_ID) { 108 if (value_size % sizeof(struct bpf_stack_build_id) || 109 value_size / sizeof(struct bpf_stack_build_id) 110 > sysctl_perf_event_max_stack) 111 return ERR_PTR(-EINVAL); 112 } else if (value_size / 8 > sysctl_perf_event_max_stack) 113 return ERR_PTR(-EINVAL); 114 115 /* hash table size must be power of 2 */ 116 n_buckets = roundup_pow_of_two(attr->max_entries); 117 if (!n_buckets) 118 return ERR_PTR(-E2BIG); 119 120 cost = n_buckets * sizeof(struct stack_map_bucket *) + sizeof(*smap); 121 cost += n_buckets * (value_size + sizeof(struct stack_map_bucket)); 122 smap = bpf_map_area_alloc(cost, bpf_map_attr_numa_node(attr)); 123 if (!smap) 124 return ERR_PTR(-ENOMEM); 125 126 bpf_map_init_from_attr(&smap->map, attr); 127 smap->map.value_size = value_size; 128 smap->n_buckets = n_buckets; 129 130 err = get_callchain_buffers(sysctl_perf_event_max_stack); 131 if (err) 132 goto free_smap; 133 134 err = prealloc_elems_and_freelist(smap); 135 if (err) 136 goto put_buffers; 137 138 return &smap->map; 139 140 put_buffers: 141 put_callchain_buffers(); 142 free_smap: 143 bpf_map_area_free(smap); 144 return ERR_PTR(err); 145 } 146 147 static void stack_map_get_build_id_offset(struct bpf_stack_build_id *id_offs, 148 u64 *ips, u32 trace_nr, bool user) 149 { 150 int i; 151 struct vm_area_struct *vma; 152 bool irq_work_busy = false; 153 struct stack_map_irq_work *work = NULL; 154 155 if (irqs_disabled()) { 156 if (!IS_ENABLED(CONFIG_PREEMPT_RT)) { 157 work = this_cpu_ptr(&up_read_work); 158 if (irq_work_is_busy(&work->irq_work)) { 159 /* cannot queue more up_read, fallback */ 160 irq_work_busy = true; 161 } 162 } else { 163 /* 164 * PREEMPT_RT does not allow to trylock mmap sem in 165 * interrupt disabled context. Force the fallback code. 166 */ 167 irq_work_busy = true; 168 } 169 } 170 171 /* 172 * We cannot do up_read() when the irq is disabled, because of 173 * risk to deadlock with rq_lock. To do build_id lookup when the 174 * irqs are disabled, we need to run up_read() in irq_work. We use 175 * a percpu variable to do the irq_work. If the irq_work is 176 * already used by another lookup, we fall back to report ips. 177 * 178 * Same fallback is used for kernel stack (!user) on a stackmap 179 * with build_id. 180 */ 181 if (!user || !current || !current->mm || irq_work_busy || 182 !mmap_read_trylock_non_owner(current->mm)) { 183 /* cannot access current->mm, fall back to ips */ 184 for (i = 0; i < trace_nr; i++) { 185 id_offs[i].status = BPF_STACK_BUILD_ID_IP; 186 id_offs[i].ip = ips[i]; 187 memset(id_offs[i].build_id, 0, BUILD_ID_SIZE_MAX); 188 } 189 return; 190 } 191 192 for (i = 0; i < trace_nr; i++) { 193 vma = find_vma(current->mm, ips[i]); 194 if (!vma || build_id_parse(vma, id_offs[i].build_id, NULL)) { 195 /* per entry fall back to ips */ 196 id_offs[i].status = BPF_STACK_BUILD_ID_IP; 197 id_offs[i].ip = ips[i]; 198 memset(id_offs[i].build_id, 0, BUILD_ID_SIZE_MAX); 199 continue; 200 } 201 id_offs[i].offset = (vma->vm_pgoff << PAGE_SHIFT) + ips[i] 202 - vma->vm_start; 203 id_offs[i].status = BPF_STACK_BUILD_ID_VALID; 204 } 205 206 if (!work) { 207 mmap_read_unlock_non_owner(current->mm); 208 } else { 209 work->mm = current->mm; 210 irq_work_queue(&work->irq_work); 211 } 212 } 213 214 static struct perf_callchain_entry * 215 get_callchain_entry_for_task(struct task_struct *task, u32 init_nr) 216 { 217 #ifdef CONFIG_STACKTRACE 218 struct perf_callchain_entry *entry; 219 int rctx; 220 221 entry = get_callchain_entry(&rctx); 222 223 if (!entry) 224 return NULL; 225 226 entry->nr = init_nr + 227 stack_trace_save_tsk(task, (unsigned long *)(entry->ip + init_nr), 228 sysctl_perf_event_max_stack - init_nr, 0); 229 230 /* stack_trace_save_tsk() works on unsigned long array, while 231 * perf_callchain_entry uses u64 array. For 32-bit systems, it is 232 * necessary to fix this mismatch. 233 */ 234 if (__BITS_PER_LONG != 64) { 235 unsigned long *from = (unsigned long *) entry->ip; 236 u64 *to = entry->ip; 237 int i; 238 239 /* copy data from the end to avoid using extra buffer */ 240 for (i = entry->nr - 1; i >= (int)init_nr; i--) 241 to[i] = (u64)(from[i]); 242 } 243 244 put_callchain_entry(rctx); 245 246 return entry; 247 #else /* CONFIG_STACKTRACE */ 248 return NULL; 249 #endif 250 } 251 252 static long __bpf_get_stackid(struct bpf_map *map, 253 struct perf_callchain_entry *trace, u64 flags) 254 { 255 struct bpf_stack_map *smap = container_of(map, struct bpf_stack_map, map); 256 struct stack_map_bucket *bucket, *new_bucket, *old_bucket; 257 u32 max_depth = map->value_size / stack_map_data_size(map); 258 /* stack_map_alloc() checks that max_depth <= sysctl_perf_event_max_stack */ 259 u32 init_nr = sysctl_perf_event_max_stack - max_depth; 260 u32 skip = flags & BPF_F_SKIP_FIELD_MASK; 261 u32 hash, id, trace_nr, trace_len; 262 bool user = flags & BPF_F_USER_STACK; 263 u64 *ips; 264 bool hash_matches; 265 266 /* get_perf_callchain() guarantees that trace->nr >= init_nr 267 * and trace-nr <= sysctl_perf_event_max_stack, so trace_nr <= max_depth 268 */ 269 trace_nr = trace->nr - init_nr; 270 271 if (trace_nr <= skip) 272 /* skipping more than usable stack trace */ 273 return -EFAULT; 274 275 trace_nr -= skip; 276 trace_len = trace_nr * sizeof(u64); 277 ips = trace->ip + skip + init_nr; 278 hash = jhash2((u32 *)ips, trace_len / sizeof(u32), 0); 279 id = hash & (smap->n_buckets - 1); 280 bucket = READ_ONCE(smap->buckets[id]); 281 282 hash_matches = bucket && bucket->hash == hash; 283 /* fast cmp */ 284 if (hash_matches && flags & BPF_F_FAST_STACK_CMP) 285 return id; 286 287 if (stack_map_use_build_id(map)) { 288 /* for build_id+offset, pop a bucket before slow cmp */ 289 new_bucket = (struct stack_map_bucket *) 290 pcpu_freelist_pop(&smap->freelist); 291 if (unlikely(!new_bucket)) 292 return -ENOMEM; 293 new_bucket->nr = trace_nr; 294 stack_map_get_build_id_offset( 295 (struct bpf_stack_build_id *)new_bucket->data, 296 ips, trace_nr, user); 297 trace_len = trace_nr * sizeof(struct bpf_stack_build_id); 298 if (hash_matches && bucket->nr == trace_nr && 299 memcmp(bucket->data, new_bucket->data, trace_len) == 0) { 300 pcpu_freelist_push(&smap->freelist, &new_bucket->fnode); 301 return id; 302 } 303 if (bucket && !(flags & BPF_F_REUSE_STACKID)) { 304 pcpu_freelist_push(&smap->freelist, &new_bucket->fnode); 305 return -EEXIST; 306 } 307 } else { 308 if (hash_matches && bucket->nr == trace_nr && 309 memcmp(bucket->data, ips, trace_len) == 0) 310 return id; 311 if (bucket && !(flags & BPF_F_REUSE_STACKID)) 312 return -EEXIST; 313 314 new_bucket = (struct stack_map_bucket *) 315 pcpu_freelist_pop(&smap->freelist); 316 if (unlikely(!new_bucket)) 317 return -ENOMEM; 318 memcpy(new_bucket->data, ips, trace_len); 319 } 320 321 new_bucket->hash = hash; 322 new_bucket->nr = trace_nr; 323 324 old_bucket = xchg(&smap->buckets[id], new_bucket); 325 if (old_bucket) 326 pcpu_freelist_push(&smap->freelist, &old_bucket->fnode); 327 return id; 328 } 329 330 BPF_CALL_3(bpf_get_stackid, struct pt_regs *, regs, struct bpf_map *, map, 331 u64, flags) 332 { 333 u32 max_depth = map->value_size / stack_map_data_size(map); 334 /* stack_map_alloc() checks that max_depth <= sysctl_perf_event_max_stack */ 335 u32 init_nr = sysctl_perf_event_max_stack - max_depth; 336 bool user = flags & BPF_F_USER_STACK; 337 struct perf_callchain_entry *trace; 338 bool kernel = !user; 339 340 if (unlikely(flags & ~(BPF_F_SKIP_FIELD_MASK | BPF_F_USER_STACK | 341 BPF_F_FAST_STACK_CMP | BPF_F_REUSE_STACKID))) 342 return -EINVAL; 343 344 trace = get_perf_callchain(regs, init_nr, kernel, user, 345 sysctl_perf_event_max_stack, false, false); 346 347 if (unlikely(!trace)) 348 /* couldn't fetch the stack trace */ 349 return -EFAULT; 350 351 return __bpf_get_stackid(map, trace, flags); 352 } 353 354 const struct bpf_func_proto bpf_get_stackid_proto = { 355 .func = bpf_get_stackid, 356 .gpl_only = true, 357 .ret_type = RET_INTEGER, 358 .arg1_type = ARG_PTR_TO_CTX, 359 .arg2_type = ARG_CONST_MAP_PTR, 360 .arg3_type = ARG_ANYTHING, 361 }; 362 363 static __u64 count_kernel_ip(struct perf_callchain_entry *trace) 364 { 365 __u64 nr_kernel = 0; 366 367 while (nr_kernel < trace->nr) { 368 if (trace->ip[nr_kernel] == PERF_CONTEXT_USER) 369 break; 370 nr_kernel++; 371 } 372 return nr_kernel; 373 } 374 375 BPF_CALL_3(bpf_get_stackid_pe, struct bpf_perf_event_data_kern *, ctx, 376 struct bpf_map *, map, u64, flags) 377 { 378 struct perf_event *event = ctx->event; 379 struct perf_callchain_entry *trace; 380 bool kernel, user; 381 __u64 nr_kernel; 382 int ret; 383 384 /* perf_sample_data doesn't have callchain, use bpf_get_stackid */ 385 if (!(event->attr.sample_type & __PERF_SAMPLE_CALLCHAIN_EARLY)) 386 return bpf_get_stackid((unsigned long)(ctx->regs), 387 (unsigned long) map, flags, 0, 0); 388 389 if (unlikely(flags & ~(BPF_F_SKIP_FIELD_MASK | BPF_F_USER_STACK | 390 BPF_F_FAST_STACK_CMP | BPF_F_REUSE_STACKID))) 391 return -EINVAL; 392 393 user = flags & BPF_F_USER_STACK; 394 kernel = !user; 395 396 trace = ctx->data->callchain; 397 if (unlikely(!trace)) 398 return -EFAULT; 399 400 nr_kernel = count_kernel_ip(trace); 401 402 if (kernel) { 403 __u64 nr = trace->nr; 404 405 trace->nr = nr_kernel; 406 ret = __bpf_get_stackid(map, trace, flags); 407 408 /* restore nr */ 409 trace->nr = nr; 410 } else { /* user */ 411 u64 skip = flags & BPF_F_SKIP_FIELD_MASK; 412 413 skip += nr_kernel; 414 if (skip > BPF_F_SKIP_FIELD_MASK) 415 return -EFAULT; 416 417 flags = (flags & ~BPF_F_SKIP_FIELD_MASK) | skip; 418 ret = __bpf_get_stackid(map, trace, flags); 419 } 420 return ret; 421 } 422 423 const struct bpf_func_proto bpf_get_stackid_proto_pe = { 424 .func = bpf_get_stackid_pe, 425 .gpl_only = false, 426 .ret_type = RET_INTEGER, 427 .arg1_type = ARG_PTR_TO_CTX, 428 .arg2_type = ARG_CONST_MAP_PTR, 429 .arg3_type = ARG_ANYTHING, 430 }; 431 432 static long __bpf_get_stack(struct pt_regs *regs, struct task_struct *task, 433 struct perf_callchain_entry *trace_in, 434 void *buf, u32 size, u64 flags) 435 { 436 u32 init_nr, trace_nr, copy_len, elem_size, num_elem; 437 bool user_build_id = flags & BPF_F_USER_BUILD_ID; 438 u32 skip = flags & BPF_F_SKIP_FIELD_MASK; 439 bool user = flags & BPF_F_USER_STACK; 440 struct perf_callchain_entry *trace; 441 bool kernel = !user; 442 int err = -EINVAL; 443 u64 *ips; 444 445 if (unlikely(flags & ~(BPF_F_SKIP_FIELD_MASK | BPF_F_USER_STACK | 446 BPF_F_USER_BUILD_ID))) 447 goto clear; 448 if (kernel && user_build_id) 449 goto clear; 450 451 elem_size = (user && user_build_id) ? sizeof(struct bpf_stack_build_id) 452 : sizeof(u64); 453 if (unlikely(size % elem_size)) 454 goto clear; 455 456 /* cannot get valid user stack for task without user_mode regs */ 457 if (task && user && !user_mode(regs)) 458 goto err_fault; 459 460 num_elem = size / elem_size; 461 if (sysctl_perf_event_max_stack < num_elem) 462 init_nr = 0; 463 else 464 init_nr = sysctl_perf_event_max_stack - num_elem; 465 466 if (trace_in) 467 trace = trace_in; 468 else if (kernel && task) 469 trace = get_callchain_entry_for_task(task, init_nr); 470 else 471 trace = get_perf_callchain(regs, init_nr, kernel, user, 472 sysctl_perf_event_max_stack, 473 false, false); 474 if (unlikely(!trace)) 475 goto err_fault; 476 477 trace_nr = trace->nr - init_nr; 478 if (trace_nr < skip) 479 goto err_fault; 480 481 trace_nr -= skip; 482 trace_nr = (trace_nr <= num_elem) ? trace_nr : num_elem; 483 copy_len = trace_nr * elem_size; 484 ips = trace->ip + skip + init_nr; 485 if (user && user_build_id) 486 stack_map_get_build_id_offset(buf, ips, trace_nr, user); 487 else 488 memcpy(buf, ips, copy_len); 489 490 if (size > copy_len) 491 memset(buf + copy_len, 0, size - copy_len); 492 return copy_len; 493 494 err_fault: 495 err = -EFAULT; 496 clear: 497 memset(buf, 0, size); 498 return err; 499 } 500 501 BPF_CALL_4(bpf_get_stack, struct pt_regs *, regs, void *, buf, u32, size, 502 u64, flags) 503 { 504 return __bpf_get_stack(regs, NULL, NULL, buf, size, flags); 505 } 506 507 const struct bpf_func_proto bpf_get_stack_proto = { 508 .func = bpf_get_stack, 509 .gpl_only = true, 510 .ret_type = RET_INTEGER, 511 .arg1_type = ARG_PTR_TO_CTX, 512 .arg2_type = ARG_PTR_TO_UNINIT_MEM, 513 .arg3_type = ARG_CONST_SIZE_OR_ZERO, 514 .arg4_type = ARG_ANYTHING, 515 }; 516 517 BPF_CALL_4(bpf_get_task_stack, struct task_struct *, task, void *, buf, 518 u32, size, u64, flags) 519 { 520 struct pt_regs *regs = task_pt_regs(task); 521 522 return __bpf_get_stack(regs, task, NULL, buf, size, flags); 523 } 524 525 BTF_ID_LIST_SINGLE(bpf_get_task_stack_btf_ids, struct, task_struct) 526 527 const struct bpf_func_proto bpf_get_task_stack_proto = { 528 .func = bpf_get_task_stack, 529 .gpl_only = false, 530 .ret_type = RET_INTEGER, 531 .arg1_type = ARG_PTR_TO_BTF_ID, 532 .arg1_btf_id = &bpf_get_task_stack_btf_ids[0], 533 .arg2_type = ARG_PTR_TO_UNINIT_MEM, 534 .arg3_type = ARG_CONST_SIZE_OR_ZERO, 535 .arg4_type = ARG_ANYTHING, 536 }; 537 538 BPF_CALL_4(bpf_get_stack_pe, struct bpf_perf_event_data_kern *, ctx, 539 void *, buf, u32, size, u64, flags) 540 { 541 struct pt_regs *regs = (struct pt_regs *)(ctx->regs); 542 struct perf_event *event = ctx->event; 543 struct perf_callchain_entry *trace; 544 bool kernel, user; 545 int err = -EINVAL; 546 __u64 nr_kernel; 547 548 if (!(event->attr.sample_type & __PERF_SAMPLE_CALLCHAIN_EARLY)) 549 return __bpf_get_stack(regs, NULL, NULL, buf, size, flags); 550 551 if (unlikely(flags & ~(BPF_F_SKIP_FIELD_MASK | BPF_F_USER_STACK | 552 BPF_F_USER_BUILD_ID))) 553 goto clear; 554 555 user = flags & BPF_F_USER_STACK; 556 kernel = !user; 557 558 err = -EFAULT; 559 trace = ctx->data->callchain; 560 if (unlikely(!trace)) 561 goto clear; 562 563 nr_kernel = count_kernel_ip(trace); 564 565 if (kernel) { 566 __u64 nr = trace->nr; 567 568 trace->nr = nr_kernel; 569 err = __bpf_get_stack(regs, NULL, trace, buf, size, flags); 570 571 /* restore nr */ 572 trace->nr = nr; 573 } else { /* user */ 574 u64 skip = flags & BPF_F_SKIP_FIELD_MASK; 575 576 skip += nr_kernel; 577 if (skip > BPF_F_SKIP_FIELD_MASK) 578 goto clear; 579 580 flags = (flags & ~BPF_F_SKIP_FIELD_MASK) | skip; 581 err = __bpf_get_stack(regs, NULL, trace, buf, size, flags); 582 } 583 return err; 584 585 clear: 586 memset(buf, 0, size); 587 return err; 588 589 } 590 591 const struct bpf_func_proto bpf_get_stack_proto_pe = { 592 .func = bpf_get_stack_pe, 593 .gpl_only = true, 594 .ret_type = RET_INTEGER, 595 .arg1_type = ARG_PTR_TO_CTX, 596 .arg2_type = ARG_PTR_TO_UNINIT_MEM, 597 .arg3_type = ARG_CONST_SIZE_OR_ZERO, 598 .arg4_type = ARG_ANYTHING, 599 }; 600 601 /* Called from eBPF program */ 602 static void *stack_map_lookup_elem(struct bpf_map *map, void *key) 603 { 604 return ERR_PTR(-EOPNOTSUPP); 605 } 606 607 /* Called from syscall */ 608 int bpf_stackmap_copy(struct bpf_map *map, void *key, void *value) 609 { 610 struct bpf_stack_map *smap = container_of(map, struct bpf_stack_map, map); 611 struct stack_map_bucket *bucket, *old_bucket; 612 u32 id = *(u32 *)key, trace_len; 613 614 if (unlikely(id >= smap->n_buckets)) 615 return -ENOENT; 616 617 bucket = xchg(&smap->buckets[id], NULL); 618 if (!bucket) 619 return -ENOENT; 620 621 trace_len = bucket->nr * stack_map_data_size(map); 622 memcpy(value, bucket->data, trace_len); 623 memset(value + trace_len, 0, map->value_size - trace_len); 624 625 old_bucket = xchg(&smap->buckets[id], bucket); 626 if (old_bucket) 627 pcpu_freelist_push(&smap->freelist, &old_bucket->fnode); 628 return 0; 629 } 630 631 static int stack_map_get_next_key(struct bpf_map *map, void *key, 632 void *next_key) 633 { 634 struct bpf_stack_map *smap = container_of(map, 635 struct bpf_stack_map, map); 636 u32 id; 637 638 WARN_ON_ONCE(!rcu_read_lock_held()); 639 640 if (!key) { 641 id = 0; 642 } else { 643 id = *(u32 *)key; 644 if (id >= smap->n_buckets || !smap->buckets[id]) 645 id = 0; 646 else 647 id++; 648 } 649 650 while (id < smap->n_buckets && !smap->buckets[id]) 651 id++; 652 653 if (id >= smap->n_buckets) 654 return -ENOENT; 655 656 *(u32 *)next_key = id; 657 return 0; 658 } 659 660 static int stack_map_update_elem(struct bpf_map *map, void *key, void *value, 661 u64 map_flags) 662 { 663 return -EINVAL; 664 } 665 666 /* Called from syscall or from eBPF program */ 667 static int stack_map_delete_elem(struct bpf_map *map, void *key) 668 { 669 struct bpf_stack_map *smap = container_of(map, struct bpf_stack_map, map); 670 struct stack_map_bucket *old_bucket; 671 u32 id = *(u32 *)key; 672 673 if (unlikely(id >= smap->n_buckets)) 674 return -E2BIG; 675 676 old_bucket = xchg(&smap->buckets[id], NULL); 677 if (old_bucket) { 678 pcpu_freelist_push(&smap->freelist, &old_bucket->fnode); 679 return 0; 680 } else { 681 return -ENOENT; 682 } 683 } 684 685 /* Called when map->refcnt goes to zero, either from workqueue or from syscall */ 686 static void stack_map_free(struct bpf_map *map) 687 { 688 struct bpf_stack_map *smap = container_of(map, struct bpf_stack_map, map); 689 690 bpf_map_area_free(smap->elems); 691 pcpu_freelist_destroy(&smap->freelist); 692 bpf_map_area_free(smap); 693 put_callchain_buffers(); 694 } 695 696 static int stack_trace_map_btf_id; 697 const struct bpf_map_ops stack_trace_map_ops = { 698 .map_meta_equal = bpf_map_meta_equal, 699 .map_alloc = stack_map_alloc, 700 .map_free = stack_map_free, 701 .map_get_next_key = stack_map_get_next_key, 702 .map_lookup_elem = stack_map_lookup_elem, 703 .map_update_elem = stack_map_update_elem, 704 .map_delete_elem = stack_map_delete_elem, 705 .map_check_btf = map_check_no_btf, 706 .map_btf_name = "bpf_stack_map", 707 .map_btf_id = &stack_trace_map_btf_id, 708 }; 709 710 static int __init stack_map_init(void) 711 { 712 int cpu; 713 struct stack_map_irq_work *work; 714 715 for_each_possible_cpu(cpu) { 716 work = per_cpu_ptr(&up_read_work, cpu); 717 init_irq_work(&work->irq_work, do_up_read); 718 } 719 return 0; 720 } 721 subsys_initcall(stack_map_init); 722