1 // SPDX-License-Identifier: GPL-2.0-only 2 /* Copyright (c) 2011-2014 PLUMgrid, http://plumgrid.com 3 */ 4 #include <linux/bpf.h> 5 #include <linux/rcupdate.h> 6 #include <linux/random.h> 7 #include <linux/smp.h> 8 #include <linux/topology.h> 9 #include <linux/ktime.h> 10 #include <linux/sched.h> 11 #include <linux/uidgid.h> 12 #include <linux/filter.h> 13 #include <linux/ctype.h> 14 #include <linux/jiffies.h> 15 #include <linux/pid_namespace.h> 16 #include <linux/proc_ns.h> 17 18 #include "../../lib/kstrtox.h" 19 20 /* If kernel subsystem is allowing eBPF programs to call this function, 21 * inside its own verifier_ops->get_func_proto() callback it should return 22 * bpf_map_lookup_elem_proto, so that verifier can properly check the arguments 23 * 24 * Different map implementations will rely on rcu in map methods 25 * lookup/update/delete, therefore eBPF programs must run under rcu lock 26 * if program is allowed to access maps, so check rcu_read_lock_held in 27 * all three functions. 28 */ 29 BPF_CALL_2(bpf_map_lookup_elem, struct bpf_map *, map, void *, key) 30 { 31 WARN_ON_ONCE(!rcu_read_lock_held()); 32 return (unsigned long) map->ops->map_lookup_elem(map, key); 33 } 34 35 const struct bpf_func_proto bpf_map_lookup_elem_proto = { 36 .func = bpf_map_lookup_elem, 37 .gpl_only = false, 38 .pkt_access = true, 39 .ret_type = RET_PTR_TO_MAP_VALUE_OR_NULL, 40 .arg1_type = ARG_CONST_MAP_PTR, 41 .arg2_type = ARG_PTR_TO_MAP_KEY, 42 }; 43 44 BPF_CALL_4(bpf_map_update_elem, struct bpf_map *, map, void *, key, 45 void *, value, u64, flags) 46 { 47 WARN_ON_ONCE(!rcu_read_lock_held()); 48 return map->ops->map_update_elem(map, key, value, flags); 49 } 50 51 const struct bpf_func_proto bpf_map_update_elem_proto = { 52 .func = bpf_map_update_elem, 53 .gpl_only = false, 54 .pkt_access = true, 55 .ret_type = RET_INTEGER, 56 .arg1_type = ARG_CONST_MAP_PTR, 57 .arg2_type = ARG_PTR_TO_MAP_KEY, 58 .arg3_type = ARG_PTR_TO_MAP_VALUE, 59 .arg4_type = ARG_ANYTHING, 60 }; 61 62 BPF_CALL_2(bpf_map_delete_elem, struct bpf_map *, map, void *, key) 63 { 64 WARN_ON_ONCE(!rcu_read_lock_held()); 65 return map->ops->map_delete_elem(map, key); 66 } 67 68 const struct bpf_func_proto bpf_map_delete_elem_proto = { 69 .func = bpf_map_delete_elem, 70 .gpl_only = false, 71 .pkt_access = true, 72 .ret_type = RET_INTEGER, 73 .arg1_type = ARG_CONST_MAP_PTR, 74 .arg2_type = ARG_PTR_TO_MAP_KEY, 75 }; 76 77 BPF_CALL_3(bpf_map_push_elem, struct bpf_map *, map, void *, value, u64, flags) 78 { 79 return map->ops->map_push_elem(map, value, flags); 80 } 81 82 const struct bpf_func_proto bpf_map_push_elem_proto = { 83 .func = bpf_map_push_elem, 84 .gpl_only = false, 85 .pkt_access = true, 86 .ret_type = RET_INTEGER, 87 .arg1_type = ARG_CONST_MAP_PTR, 88 .arg2_type = ARG_PTR_TO_MAP_VALUE, 89 .arg3_type = ARG_ANYTHING, 90 }; 91 92 BPF_CALL_2(bpf_map_pop_elem, struct bpf_map *, map, void *, value) 93 { 94 return map->ops->map_pop_elem(map, value); 95 } 96 97 const struct bpf_func_proto bpf_map_pop_elem_proto = { 98 .func = bpf_map_pop_elem, 99 .gpl_only = false, 100 .ret_type = RET_INTEGER, 101 .arg1_type = ARG_CONST_MAP_PTR, 102 .arg2_type = ARG_PTR_TO_UNINIT_MAP_VALUE, 103 }; 104 105 BPF_CALL_2(bpf_map_peek_elem, struct bpf_map *, map, void *, value) 106 { 107 return map->ops->map_peek_elem(map, value); 108 } 109 110 const struct bpf_func_proto bpf_map_peek_elem_proto = { 111 .func = bpf_map_pop_elem, 112 .gpl_only = false, 113 .ret_type = RET_INTEGER, 114 .arg1_type = ARG_CONST_MAP_PTR, 115 .arg2_type = ARG_PTR_TO_UNINIT_MAP_VALUE, 116 }; 117 118 const struct bpf_func_proto bpf_get_prandom_u32_proto = { 119 .func = bpf_user_rnd_u32, 120 .gpl_only = false, 121 .ret_type = RET_INTEGER, 122 }; 123 124 BPF_CALL_0(bpf_get_smp_processor_id) 125 { 126 return smp_processor_id(); 127 } 128 129 const struct bpf_func_proto bpf_get_smp_processor_id_proto = { 130 .func = bpf_get_smp_processor_id, 131 .gpl_only = false, 132 .ret_type = RET_INTEGER, 133 }; 134 135 BPF_CALL_0(bpf_get_numa_node_id) 136 { 137 return numa_node_id(); 138 } 139 140 const struct bpf_func_proto bpf_get_numa_node_id_proto = { 141 .func = bpf_get_numa_node_id, 142 .gpl_only = false, 143 .ret_type = RET_INTEGER, 144 }; 145 146 BPF_CALL_0(bpf_ktime_get_ns) 147 { 148 /* NMI safe access to clock monotonic */ 149 return ktime_get_mono_fast_ns(); 150 } 151 152 const struct bpf_func_proto bpf_ktime_get_ns_proto = { 153 .func = bpf_ktime_get_ns, 154 .gpl_only = false, 155 .ret_type = RET_INTEGER, 156 }; 157 158 BPF_CALL_0(bpf_ktime_get_boot_ns) 159 { 160 /* NMI safe access to clock boottime */ 161 return ktime_get_boot_fast_ns(); 162 } 163 164 const struct bpf_func_proto bpf_ktime_get_boot_ns_proto = { 165 .func = bpf_ktime_get_boot_ns, 166 .gpl_only = false, 167 .ret_type = RET_INTEGER, 168 }; 169 170 BPF_CALL_0(bpf_get_current_pid_tgid) 171 { 172 struct task_struct *task = current; 173 174 if (unlikely(!task)) 175 return -EINVAL; 176 177 return (u64) task->tgid << 32 | task->pid; 178 } 179 180 const struct bpf_func_proto bpf_get_current_pid_tgid_proto = { 181 .func = bpf_get_current_pid_tgid, 182 .gpl_only = false, 183 .ret_type = RET_INTEGER, 184 }; 185 186 BPF_CALL_0(bpf_get_current_uid_gid) 187 { 188 struct task_struct *task = current; 189 kuid_t uid; 190 kgid_t gid; 191 192 if (unlikely(!task)) 193 return -EINVAL; 194 195 current_uid_gid(&uid, &gid); 196 return (u64) from_kgid(&init_user_ns, gid) << 32 | 197 from_kuid(&init_user_ns, uid); 198 } 199 200 const struct bpf_func_proto bpf_get_current_uid_gid_proto = { 201 .func = bpf_get_current_uid_gid, 202 .gpl_only = false, 203 .ret_type = RET_INTEGER, 204 }; 205 206 BPF_CALL_2(bpf_get_current_comm, char *, buf, u32, size) 207 { 208 struct task_struct *task = current; 209 210 if (unlikely(!task)) 211 goto err_clear; 212 213 strncpy(buf, task->comm, size); 214 215 /* Verifier guarantees that size > 0. For task->comm exceeding 216 * size, guarantee that buf is %NUL-terminated. Unconditionally 217 * done here to save the size test. 218 */ 219 buf[size - 1] = 0; 220 return 0; 221 err_clear: 222 memset(buf, 0, size); 223 return -EINVAL; 224 } 225 226 const struct bpf_func_proto bpf_get_current_comm_proto = { 227 .func = bpf_get_current_comm, 228 .gpl_only = false, 229 .ret_type = RET_INTEGER, 230 .arg1_type = ARG_PTR_TO_UNINIT_MEM, 231 .arg2_type = ARG_CONST_SIZE, 232 }; 233 234 #if defined(CONFIG_QUEUED_SPINLOCKS) || defined(CONFIG_BPF_ARCH_SPINLOCK) 235 236 static inline void __bpf_spin_lock(struct bpf_spin_lock *lock) 237 { 238 arch_spinlock_t *l = (void *)lock; 239 union { 240 __u32 val; 241 arch_spinlock_t lock; 242 } u = { .lock = __ARCH_SPIN_LOCK_UNLOCKED }; 243 244 compiletime_assert(u.val == 0, "__ARCH_SPIN_LOCK_UNLOCKED not 0"); 245 BUILD_BUG_ON(sizeof(*l) != sizeof(__u32)); 246 BUILD_BUG_ON(sizeof(*lock) != sizeof(__u32)); 247 arch_spin_lock(l); 248 } 249 250 static inline void __bpf_spin_unlock(struct bpf_spin_lock *lock) 251 { 252 arch_spinlock_t *l = (void *)lock; 253 254 arch_spin_unlock(l); 255 } 256 257 #else 258 259 static inline void __bpf_spin_lock(struct bpf_spin_lock *lock) 260 { 261 atomic_t *l = (void *)lock; 262 263 BUILD_BUG_ON(sizeof(*l) != sizeof(*lock)); 264 do { 265 atomic_cond_read_relaxed(l, !VAL); 266 } while (atomic_xchg(l, 1)); 267 } 268 269 static inline void __bpf_spin_unlock(struct bpf_spin_lock *lock) 270 { 271 atomic_t *l = (void *)lock; 272 273 atomic_set_release(l, 0); 274 } 275 276 #endif 277 278 static DEFINE_PER_CPU(unsigned long, irqsave_flags); 279 280 notrace BPF_CALL_1(bpf_spin_lock, struct bpf_spin_lock *, lock) 281 { 282 unsigned long flags; 283 284 local_irq_save(flags); 285 __bpf_spin_lock(lock); 286 __this_cpu_write(irqsave_flags, flags); 287 return 0; 288 } 289 290 const struct bpf_func_proto bpf_spin_lock_proto = { 291 .func = bpf_spin_lock, 292 .gpl_only = false, 293 .ret_type = RET_VOID, 294 .arg1_type = ARG_PTR_TO_SPIN_LOCK, 295 }; 296 297 notrace BPF_CALL_1(bpf_spin_unlock, struct bpf_spin_lock *, lock) 298 { 299 unsigned long flags; 300 301 flags = __this_cpu_read(irqsave_flags); 302 __bpf_spin_unlock(lock); 303 local_irq_restore(flags); 304 return 0; 305 } 306 307 const struct bpf_func_proto bpf_spin_unlock_proto = { 308 .func = bpf_spin_unlock, 309 .gpl_only = false, 310 .ret_type = RET_VOID, 311 .arg1_type = ARG_PTR_TO_SPIN_LOCK, 312 }; 313 314 void copy_map_value_locked(struct bpf_map *map, void *dst, void *src, 315 bool lock_src) 316 { 317 struct bpf_spin_lock *lock; 318 319 if (lock_src) 320 lock = src + map->spin_lock_off; 321 else 322 lock = dst + map->spin_lock_off; 323 preempt_disable(); 324 ____bpf_spin_lock(lock); 325 copy_map_value(map, dst, src); 326 ____bpf_spin_unlock(lock); 327 preempt_enable(); 328 } 329 330 BPF_CALL_0(bpf_jiffies64) 331 { 332 return get_jiffies_64(); 333 } 334 335 const struct bpf_func_proto bpf_jiffies64_proto = { 336 .func = bpf_jiffies64, 337 .gpl_only = false, 338 .ret_type = RET_INTEGER, 339 }; 340 341 #ifdef CONFIG_CGROUPS 342 BPF_CALL_0(bpf_get_current_cgroup_id) 343 { 344 struct cgroup *cgrp = task_dfl_cgroup(current); 345 346 return cgroup_id(cgrp); 347 } 348 349 const struct bpf_func_proto bpf_get_current_cgroup_id_proto = { 350 .func = bpf_get_current_cgroup_id, 351 .gpl_only = false, 352 .ret_type = RET_INTEGER, 353 }; 354 355 BPF_CALL_1(bpf_get_current_ancestor_cgroup_id, int, ancestor_level) 356 { 357 struct cgroup *cgrp = task_dfl_cgroup(current); 358 struct cgroup *ancestor; 359 360 ancestor = cgroup_ancestor(cgrp, ancestor_level); 361 if (!ancestor) 362 return 0; 363 return cgroup_id(ancestor); 364 } 365 366 const struct bpf_func_proto bpf_get_current_ancestor_cgroup_id_proto = { 367 .func = bpf_get_current_ancestor_cgroup_id, 368 .gpl_only = false, 369 .ret_type = RET_INTEGER, 370 .arg1_type = ARG_ANYTHING, 371 }; 372 373 #ifdef CONFIG_CGROUP_BPF 374 DECLARE_PER_CPU(struct bpf_cgroup_storage*, 375 bpf_cgroup_storage[MAX_BPF_CGROUP_STORAGE_TYPE]); 376 377 BPF_CALL_2(bpf_get_local_storage, struct bpf_map *, map, u64, flags) 378 { 379 /* flags argument is not used now, 380 * but provides an ability to extend the API. 381 * verifier checks that its value is correct. 382 */ 383 enum bpf_cgroup_storage_type stype = cgroup_storage_type(map); 384 struct bpf_cgroup_storage *storage; 385 void *ptr; 386 387 storage = this_cpu_read(bpf_cgroup_storage[stype]); 388 389 if (stype == BPF_CGROUP_STORAGE_SHARED) 390 ptr = &READ_ONCE(storage->buf)->data[0]; 391 else 392 ptr = this_cpu_ptr(storage->percpu_buf); 393 394 return (unsigned long)ptr; 395 } 396 397 const struct bpf_func_proto bpf_get_local_storage_proto = { 398 .func = bpf_get_local_storage, 399 .gpl_only = false, 400 .ret_type = RET_PTR_TO_MAP_VALUE, 401 .arg1_type = ARG_CONST_MAP_PTR, 402 .arg2_type = ARG_ANYTHING, 403 }; 404 #endif 405 406 #define BPF_STRTOX_BASE_MASK 0x1F 407 408 static int __bpf_strtoull(const char *buf, size_t buf_len, u64 flags, 409 unsigned long long *res, bool *is_negative) 410 { 411 unsigned int base = flags & BPF_STRTOX_BASE_MASK; 412 const char *cur_buf = buf; 413 size_t cur_len = buf_len; 414 unsigned int consumed; 415 size_t val_len; 416 char str[64]; 417 418 if (!buf || !buf_len || !res || !is_negative) 419 return -EINVAL; 420 421 if (base != 0 && base != 8 && base != 10 && base != 16) 422 return -EINVAL; 423 424 if (flags & ~BPF_STRTOX_BASE_MASK) 425 return -EINVAL; 426 427 while (cur_buf < buf + buf_len && isspace(*cur_buf)) 428 ++cur_buf; 429 430 *is_negative = (cur_buf < buf + buf_len && *cur_buf == '-'); 431 if (*is_negative) 432 ++cur_buf; 433 434 consumed = cur_buf - buf; 435 cur_len -= consumed; 436 if (!cur_len) 437 return -EINVAL; 438 439 cur_len = min(cur_len, sizeof(str) - 1); 440 memcpy(str, cur_buf, cur_len); 441 str[cur_len] = '\0'; 442 cur_buf = str; 443 444 cur_buf = _parse_integer_fixup_radix(cur_buf, &base); 445 val_len = _parse_integer(cur_buf, base, res); 446 447 if (val_len & KSTRTOX_OVERFLOW) 448 return -ERANGE; 449 450 if (val_len == 0) 451 return -EINVAL; 452 453 cur_buf += val_len; 454 consumed += cur_buf - str; 455 456 return consumed; 457 } 458 459 static int __bpf_strtoll(const char *buf, size_t buf_len, u64 flags, 460 long long *res) 461 { 462 unsigned long long _res; 463 bool is_negative; 464 int err; 465 466 err = __bpf_strtoull(buf, buf_len, flags, &_res, &is_negative); 467 if (err < 0) 468 return err; 469 if (is_negative) { 470 if ((long long)-_res > 0) 471 return -ERANGE; 472 *res = -_res; 473 } else { 474 if ((long long)_res < 0) 475 return -ERANGE; 476 *res = _res; 477 } 478 return err; 479 } 480 481 BPF_CALL_4(bpf_strtol, const char *, buf, size_t, buf_len, u64, flags, 482 long *, res) 483 { 484 long long _res; 485 int err; 486 487 err = __bpf_strtoll(buf, buf_len, flags, &_res); 488 if (err < 0) 489 return err; 490 if (_res != (long)_res) 491 return -ERANGE; 492 *res = _res; 493 return err; 494 } 495 496 const struct bpf_func_proto bpf_strtol_proto = { 497 .func = bpf_strtol, 498 .gpl_only = false, 499 .ret_type = RET_INTEGER, 500 .arg1_type = ARG_PTR_TO_MEM, 501 .arg2_type = ARG_CONST_SIZE, 502 .arg3_type = ARG_ANYTHING, 503 .arg4_type = ARG_PTR_TO_LONG, 504 }; 505 506 BPF_CALL_4(bpf_strtoul, const char *, buf, size_t, buf_len, u64, flags, 507 unsigned long *, res) 508 { 509 unsigned long long _res; 510 bool is_negative; 511 int err; 512 513 err = __bpf_strtoull(buf, buf_len, flags, &_res, &is_negative); 514 if (err < 0) 515 return err; 516 if (is_negative) 517 return -EINVAL; 518 if (_res != (unsigned long)_res) 519 return -ERANGE; 520 *res = _res; 521 return err; 522 } 523 524 const struct bpf_func_proto bpf_strtoul_proto = { 525 .func = bpf_strtoul, 526 .gpl_only = false, 527 .ret_type = RET_INTEGER, 528 .arg1_type = ARG_PTR_TO_MEM, 529 .arg2_type = ARG_CONST_SIZE, 530 .arg3_type = ARG_ANYTHING, 531 .arg4_type = ARG_PTR_TO_LONG, 532 }; 533 #endif 534 535 BPF_CALL_4(bpf_get_ns_current_pid_tgid, u64, dev, u64, ino, 536 struct bpf_pidns_info *, nsdata, u32, size) 537 { 538 struct task_struct *task = current; 539 struct pid_namespace *pidns; 540 int err = -EINVAL; 541 542 if (unlikely(size != sizeof(struct bpf_pidns_info))) 543 goto clear; 544 545 if (unlikely((u64)(dev_t)dev != dev)) 546 goto clear; 547 548 if (unlikely(!task)) 549 goto clear; 550 551 pidns = task_active_pid_ns(task); 552 if (unlikely(!pidns)) { 553 err = -ENOENT; 554 goto clear; 555 } 556 557 if (!ns_match(&pidns->ns, (dev_t)dev, ino)) 558 goto clear; 559 560 nsdata->pid = task_pid_nr_ns(task, pidns); 561 nsdata->tgid = task_tgid_nr_ns(task, pidns); 562 return 0; 563 clear: 564 memset((void *)nsdata, 0, (size_t) size); 565 return err; 566 } 567 568 const struct bpf_func_proto bpf_get_ns_current_pid_tgid_proto = { 569 .func = bpf_get_ns_current_pid_tgid, 570 .gpl_only = false, 571 .ret_type = RET_INTEGER, 572 .arg1_type = ARG_ANYTHING, 573 .arg2_type = ARG_ANYTHING, 574 .arg3_type = ARG_PTR_TO_UNINIT_MEM, 575 .arg4_type = ARG_CONST_SIZE, 576 }; 577 578 static const struct bpf_func_proto bpf_get_raw_smp_processor_id_proto = { 579 .func = bpf_get_raw_cpu_id, 580 .gpl_only = false, 581 .ret_type = RET_INTEGER, 582 }; 583 584 BPF_CALL_5(bpf_event_output_data, void *, ctx, struct bpf_map *, map, 585 u64, flags, void *, data, u64, size) 586 { 587 if (unlikely(flags & ~(BPF_F_INDEX_MASK))) 588 return -EINVAL; 589 590 return bpf_event_output(map, flags, data, size, NULL, 0, NULL); 591 } 592 593 const struct bpf_func_proto bpf_event_output_data_proto = { 594 .func = bpf_event_output_data, 595 .gpl_only = true, 596 .ret_type = RET_INTEGER, 597 .arg1_type = ARG_PTR_TO_CTX, 598 .arg2_type = ARG_CONST_MAP_PTR, 599 .arg3_type = ARG_ANYTHING, 600 .arg4_type = ARG_PTR_TO_MEM, 601 .arg5_type = ARG_CONST_SIZE_OR_ZERO, 602 }; 603 604 const struct bpf_func_proto bpf_get_current_task_proto __weak; 605 const struct bpf_func_proto bpf_probe_read_user_proto __weak; 606 const struct bpf_func_proto bpf_probe_read_user_str_proto __weak; 607 const struct bpf_func_proto bpf_probe_read_kernel_proto __weak; 608 const struct bpf_func_proto bpf_probe_read_kernel_str_proto __weak; 609 610 const struct bpf_func_proto * 611 bpf_base_func_proto(enum bpf_func_id func_id) 612 { 613 switch (func_id) { 614 case BPF_FUNC_map_lookup_elem: 615 return &bpf_map_lookup_elem_proto; 616 case BPF_FUNC_map_update_elem: 617 return &bpf_map_update_elem_proto; 618 case BPF_FUNC_map_delete_elem: 619 return &bpf_map_delete_elem_proto; 620 case BPF_FUNC_map_push_elem: 621 return &bpf_map_push_elem_proto; 622 case BPF_FUNC_map_pop_elem: 623 return &bpf_map_pop_elem_proto; 624 case BPF_FUNC_map_peek_elem: 625 return &bpf_map_peek_elem_proto; 626 case BPF_FUNC_get_prandom_u32: 627 return &bpf_get_prandom_u32_proto; 628 case BPF_FUNC_get_smp_processor_id: 629 return &bpf_get_raw_smp_processor_id_proto; 630 case BPF_FUNC_get_numa_node_id: 631 return &bpf_get_numa_node_id_proto; 632 case BPF_FUNC_tail_call: 633 return &bpf_tail_call_proto; 634 case BPF_FUNC_ktime_get_ns: 635 return &bpf_ktime_get_ns_proto; 636 case BPF_FUNC_ktime_get_boot_ns: 637 return &bpf_ktime_get_boot_ns_proto; 638 case BPF_FUNC_ringbuf_output: 639 return &bpf_ringbuf_output_proto; 640 case BPF_FUNC_ringbuf_reserve: 641 return &bpf_ringbuf_reserve_proto; 642 case BPF_FUNC_ringbuf_submit: 643 return &bpf_ringbuf_submit_proto; 644 case BPF_FUNC_ringbuf_discard: 645 return &bpf_ringbuf_discard_proto; 646 case BPF_FUNC_ringbuf_query: 647 return &bpf_ringbuf_query_proto; 648 default: 649 break; 650 } 651 652 if (!bpf_capable()) 653 return NULL; 654 655 switch (func_id) { 656 case BPF_FUNC_spin_lock: 657 return &bpf_spin_lock_proto; 658 case BPF_FUNC_spin_unlock: 659 return &bpf_spin_unlock_proto; 660 case BPF_FUNC_trace_printk: 661 if (!perfmon_capable()) 662 return NULL; 663 return bpf_get_trace_printk_proto(); 664 case BPF_FUNC_jiffies64: 665 return &bpf_jiffies64_proto; 666 default: 667 break; 668 } 669 670 if (!perfmon_capable()) 671 return NULL; 672 673 switch (func_id) { 674 case BPF_FUNC_get_current_task: 675 return &bpf_get_current_task_proto; 676 case BPF_FUNC_probe_read_user: 677 return &bpf_probe_read_user_proto; 678 case BPF_FUNC_probe_read_kernel: 679 return &bpf_probe_read_kernel_proto; 680 case BPF_FUNC_probe_read_user_str: 681 return &bpf_probe_read_user_str_proto; 682 case BPF_FUNC_probe_read_kernel_str: 683 return &bpf_probe_read_kernel_str_proto; 684 default: 685 return NULL; 686 } 687 } 688