1 // SPDX-License-Identifier: GPL-2.0-only 2 /* Copyright (c) 2011-2014 PLUMgrid, http://plumgrid.com 3 */ 4 #include <linux/bpf.h> 5 #include <linux/rcupdate.h> 6 #include <linux/random.h> 7 #include <linux/smp.h> 8 #include <linux/topology.h> 9 #include <linux/ktime.h> 10 #include <linux/sched.h> 11 #include <linux/uidgid.h> 12 #include <linux/filter.h> 13 #include <linux/ctype.h> 14 #include <linux/jiffies.h> 15 #include <linux/pid_namespace.h> 16 #include <linux/proc_ns.h> 17 18 #include "../../lib/kstrtox.h" 19 20 /* If kernel subsystem is allowing eBPF programs to call this function, 21 * inside its own verifier_ops->get_func_proto() callback it should return 22 * bpf_map_lookup_elem_proto, so that verifier can properly check the arguments 23 * 24 * Different map implementations will rely on rcu in map methods 25 * lookup/update/delete, therefore eBPF programs must run under rcu lock 26 * if program is allowed to access maps, so check rcu_read_lock_held in 27 * all three functions. 28 */ 29 BPF_CALL_2(bpf_map_lookup_elem, struct bpf_map *, map, void *, key) 30 { 31 WARN_ON_ONCE(!rcu_read_lock_held()); 32 return (unsigned long) map->ops->map_lookup_elem(map, key); 33 } 34 35 const struct bpf_func_proto bpf_map_lookup_elem_proto = { 36 .func = bpf_map_lookup_elem, 37 .gpl_only = false, 38 .pkt_access = true, 39 .ret_type = RET_PTR_TO_MAP_VALUE_OR_NULL, 40 .arg1_type = ARG_CONST_MAP_PTR, 41 .arg2_type = ARG_PTR_TO_MAP_KEY, 42 }; 43 44 BPF_CALL_4(bpf_map_update_elem, struct bpf_map *, map, void *, key, 45 void *, value, u64, flags) 46 { 47 WARN_ON_ONCE(!rcu_read_lock_held()); 48 return map->ops->map_update_elem(map, key, value, flags); 49 } 50 51 const struct bpf_func_proto bpf_map_update_elem_proto = { 52 .func = bpf_map_update_elem, 53 .gpl_only = false, 54 .pkt_access = true, 55 .ret_type = RET_INTEGER, 56 .arg1_type = ARG_CONST_MAP_PTR, 57 .arg2_type = ARG_PTR_TO_MAP_KEY, 58 .arg3_type = ARG_PTR_TO_MAP_VALUE, 59 .arg4_type = ARG_ANYTHING, 60 }; 61 62 BPF_CALL_2(bpf_map_delete_elem, struct bpf_map *, map, void *, key) 63 { 64 WARN_ON_ONCE(!rcu_read_lock_held()); 65 return map->ops->map_delete_elem(map, key); 66 } 67 68 const struct bpf_func_proto bpf_map_delete_elem_proto = { 69 .func = bpf_map_delete_elem, 70 .gpl_only = false, 71 .pkt_access = true, 72 .ret_type = RET_INTEGER, 73 .arg1_type = ARG_CONST_MAP_PTR, 74 .arg2_type = ARG_PTR_TO_MAP_KEY, 75 }; 76 77 BPF_CALL_3(bpf_map_push_elem, struct bpf_map *, map, void *, value, u64, flags) 78 { 79 return map->ops->map_push_elem(map, value, flags); 80 } 81 82 const struct bpf_func_proto bpf_map_push_elem_proto = { 83 .func = bpf_map_push_elem, 84 .gpl_only = false, 85 .pkt_access = true, 86 .ret_type = RET_INTEGER, 87 .arg1_type = ARG_CONST_MAP_PTR, 88 .arg2_type = ARG_PTR_TO_MAP_VALUE, 89 .arg3_type = ARG_ANYTHING, 90 }; 91 92 BPF_CALL_2(bpf_map_pop_elem, struct bpf_map *, map, void *, value) 93 { 94 return map->ops->map_pop_elem(map, value); 95 } 96 97 const struct bpf_func_proto bpf_map_pop_elem_proto = { 98 .func = bpf_map_pop_elem, 99 .gpl_only = false, 100 .ret_type = RET_INTEGER, 101 .arg1_type = ARG_CONST_MAP_PTR, 102 .arg2_type = ARG_PTR_TO_UNINIT_MAP_VALUE, 103 }; 104 105 BPF_CALL_2(bpf_map_peek_elem, struct bpf_map *, map, void *, value) 106 { 107 return map->ops->map_peek_elem(map, value); 108 } 109 110 const struct bpf_func_proto bpf_map_peek_elem_proto = { 111 .func = bpf_map_peek_elem, 112 .gpl_only = false, 113 .ret_type = RET_INTEGER, 114 .arg1_type = ARG_CONST_MAP_PTR, 115 .arg2_type = ARG_PTR_TO_UNINIT_MAP_VALUE, 116 }; 117 118 const struct bpf_func_proto bpf_get_prandom_u32_proto = { 119 .func = bpf_user_rnd_u32, 120 .gpl_only = false, 121 .ret_type = RET_INTEGER, 122 }; 123 124 BPF_CALL_0(bpf_get_smp_processor_id) 125 { 126 return smp_processor_id(); 127 } 128 129 const struct bpf_func_proto bpf_get_smp_processor_id_proto = { 130 .func = bpf_get_smp_processor_id, 131 .gpl_only = false, 132 .ret_type = RET_INTEGER, 133 }; 134 135 BPF_CALL_0(bpf_get_numa_node_id) 136 { 137 return numa_node_id(); 138 } 139 140 const struct bpf_func_proto bpf_get_numa_node_id_proto = { 141 .func = bpf_get_numa_node_id, 142 .gpl_only = false, 143 .ret_type = RET_INTEGER, 144 }; 145 146 BPF_CALL_0(bpf_ktime_get_ns) 147 { 148 /* NMI safe access to clock monotonic */ 149 return ktime_get_mono_fast_ns(); 150 } 151 152 const struct bpf_func_proto bpf_ktime_get_ns_proto = { 153 .func = bpf_ktime_get_ns, 154 .gpl_only = false, 155 .ret_type = RET_INTEGER, 156 }; 157 158 BPF_CALL_0(bpf_ktime_get_boot_ns) 159 { 160 /* NMI safe access to clock boottime */ 161 return ktime_get_boot_fast_ns(); 162 } 163 164 const struct bpf_func_proto bpf_ktime_get_boot_ns_proto = { 165 .func = bpf_ktime_get_boot_ns, 166 .gpl_only = false, 167 .ret_type = RET_INTEGER, 168 }; 169 170 BPF_CALL_0(bpf_ktime_get_coarse_ns) 171 { 172 return ktime_get_coarse_ns(); 173 } 174 175 const struct bpf_func_proto bpf_ktime_get_coarse_ns_proto = { 176 .func = bpf_ktime_get_coarse_ns, 177 .gpl_only = false, 178 .ret_type = RET_INTEGER, 179 }; 180 181 BPF_CALL_0(bpf_get_current_pid_tgid) 182 { 183 struct task_struct *task = current; 184 185 if (unlikely(!task)) 186 return -EINVAL; 187 188 return (u64) task->tgid << 32 | task->pid; 189 } 190 191 const struct bpf_func_proto bpf_get_current_pid_tgid_proto = { 192 .func = bpf_get_current_pid_tgid, 193 .gpl_only = false, 194 .ret_type = RET_INTEGER, 195 }; 196 197 BPF_CALL_0(bpf_get_current_uid_gid) 198 { 199 struct task_struct *task = current; 200 kuid_t uid; 201 kgid_t gid; 202 203 if (unlikely(!task)) 204 return -EINVAL; 205 206 current_uid_gid(&uid, &gid); 207 return (u64) from_kgid(&init_user_ns, gid) << 32 | 208 from_kuid(&init_user_ns, uid); 209 } 210 211 const struct bpf_func_proto bpf_get_current_uid_gid_proto = { 212 .func = bpf_get_current_uid_gid, 213 .gpl_only = false, 214 .ret_type = RET_INTEGER, 215 }; 216 217 BPF_CALL_2(bpf_get_current_comm, char *, buf, u32, size) 218 { 219 struct task_struct *task = current; 220 221 if (unlikely(!task)) 222 goto err_clear; 223 224 strncpy(buf, task->comm, size); 225 226 /* Verifier guarantees that size > 0. For task->comm exceeding 227 * size, guarantee that buf is %NUL-terminated. Unconditionally 228 * done here to save the size test. 229 */ 230 buf[size - 1] = 0; 231 return 0; 232 err_clear: 233 memset(buf, 0, size); 234 return -EINVAL; 235 } 236 237 const struct bpf_func_proto bpf_get_current_comm_proto = { 238 .func = bpf_get_current_comm, 239 .gpl_only = false, 240 .ret_type = RET_INTEGER, 241 .arg1_type = ARG_PTR_TO_UNINIT_MEM, 242 .arg2_type = ARG_CONST_SIZE, 243 }; 244 245 #if defined(CONFIG_QUEUED_SPINLOCKS) || defined(CONFIG_BPF_ARCH_SPINLOCK) 246 247 static inline void __bpf_spin_lock(struct bpf_spin_lock *lock) 248 { 249 arch_spinlock_t *l = (void *)lock; 250 union { 251 __u32 val; 252 arch_spinlock_t lock; 253 } u = { .lock = __ARCH_SPIN_LOCK_UNLOCKED }; 254 255 compiletime_assert(u.val == 0, "__ARCH_SPIN_LOCK_UNLOCKED not 0"); 256 BUILD_BUG_ON(sizeof(*l) != sizeof(__u32)); 257 BUILD_BUG_ON(sizeof(*lock) != sizeof(__u32)); 258 arch_spin_lock(l); 259 } 260 261 static inline void __bpf_spin_unlock(struct bpf_spin_lock *lock) 262 { 263 arch_spinlock_t *l = (void *)lock; 264 265 arch_spin_unlock(l); 266 } 267 268 #else 269 270 static inline void __bpf_spin_lock(struct bpf_spin_lock *lock) 271 { 272 atomic_t *l = (void *)lock; 273 274 BUILD_BUG_ON(sizeof(*l) != sizeof(*lock)); 275 do { 276 atomic_cond_read_relaxed(l, !VAL); 277 } while (atomic_xchg(l, 1)); 278 } 279 280 static inline void __bpf_spin_unlock(struct bpf_spin_lock *lock) 281 { 282 atomic_t *l = (void *)lock; 283 284 atomic_set_release(l, 0); 285 } 286 287 #endif 288 289 static DEFINE_PER_CPU(unsigned long, irqsave_flags); 290 291 notrace BPF_CALL_1(bpf_spin_lock, struct bpf_spin_lock *, lock) 292 { 293 unsigned long flags; 294 295 local_irq_save(flags); 296 __bpf_spin_lock(lock); 297 __this_cpu_write(irqsave_flags, flags); 298 return 0; 299 } 300 301 const struct bpf_func_proto bpf_spin_lock_proto = { 302 .func = bpf_spin_lock, 303 .gpl_only = false, 304 .ret_type = RET_VOID, 305 .arg1_type = ARG_PTR_TO_SPIN_LOCK, 306 }; 307 308 notrace BPF_CALL_1(bpf_spin_unlock, struct bpf_spin_lock *, lock) 309 { 310 unsigned long flags; 311 312 flags = __this_cpu_read(irqsave_flags); 313 __bpf_spin_unlock(lock); 314 local_irq_restore(flags); 315 return 0; 316 } 317 318 const struct bpf_func_proto bpf_spin_unlock_proto = { 319 .func = bpf_spin_unlock, 320 .gpl_only = false, 321 .ret_type = RET_VOID, 322 .arg1_type = ARG_PTR_TO_SPIN_LOCK, 323 }; 324 325 void copy_map_value_locked(struct bpf_map *map, void *dst, void *src, 326 bool lock_src) 327 { 328 struct bpf_spin_lock *lock; 329 330 if (lock_src) 331 lock = src + map->spin_lock_off; 332 else 333 lock = dst + map->spin_lock_off; 334 preempt_disable(); 335 ____bpf_spin_lock(lock); 336 copy_map_value(map, dst, src); 337 ____bpf_spin_unlock(lock); 338 preempt_enable(); 339 } 340 341 BPF_CALL_0(bpf_jiffies64) 342 { 343 return get_jiffies_64(); 344 } 345 346 const struct bpf_func_proto bpf_jiffies64_proto = { 347 .func = bpf_jiffies64, 348 .gpl_only = false, 349 .ret_type = RET_INTEGER, 350 }; 351 352 #ifdef CONFIG_CGROUPS 353 BPF_CALL_0(bpf_get_current_cgroup_id) 354 { 355 struct cgroup *cgrp = task_dfl_cgroup(current); 356 357 return cgroup_id(cgrp); 358 } 359 360 const struct bpf_func_proto bpf_get_current_cgroup_id_proto = { 361 .func = bpf_get_current_cgroup_id, 362 .gpl_only = false, 363 .ret_type = RET_INTEGER, 364 }; 365 366 BPF_CALL_1(bpf_get_current_ancestor_cgroup_id, int, ancestor_level) 367 { 368 struct cgroup *cgrp = task_dfl_cgroup(current); 369 struct cgroup *ancestor; 370 371 ancestor = cgroup_ancestor(cgrp, ancestor_level); 372 if (!ancestor) 373 return 0; 374 return cgroup_id(ancestor); 375 } 376 377 const struct bpf_func_proto bpf_get_current_ancestor_cgroup_id_proto = { 378 .func = bpf_get_current_ancestor_cgroup_id, 379 .gpl_only = false, 380 .ret_type = RET_INTEGER, 381 .arg1_type = ARG_ANYTHING, 382 }; 383 384 #ifdef CONFIG_CGROUP_BPF 385 DECLARE_PER_CPU(struct bpf_cgroup_storage_info, 386 bpf_cgroup_storage_info[BPF_CGROUP_STORAGE_NEST_MAX]); 387 388 BPF_CALL_2(bpf_get_local_storage, struct bpf_map *, map, u64, flags) 389 { 390 /* flags argument is not used now, 391 * but provides an ability to extend the API. 392 * verifier checks that its value is correct. 393 */ 394 enum bpf_cgroup_storage_type stype = cgroup_storage_type(map); 395 struct bpf_cgroup_storage *storage = NULL; 396 void *ptr; 397 int i; 398 399 for (i = 0; i < BPF_CGROUP_STORAGE_NEST_MAX; i++) { 400 if (unlikely(this_cpu_read(bpf_cgroup_storage_info[i].task) != current)) 401 continue; 402 403 storage = this_cpu_read(bpf_cgroup_storage_info[i].storage[stype]); 404 break; 405 } 406 407 if (stype == BPF_CGROUP_STORAGE_SHARED) 408 ptr = &READ_ONCE(storage->buf)->data[0]; 409 else 410 ptr = this_cpu_ptr(storage->percpu_buf); 411 412 return (unsigned long)ptr; 413 } 414 415 const struct bpf_func_proto bpf_get_local_storage_proto = { 416 .func = bpf_get_local_storage, 417 .gpl_only = false, 418 .ret_type = RET_PTR_TO_MAP_VALUE, 419 .arg1_type = ARG_CONST_MAP_PTR, 420 .arg2_type = ARG_ANYTHING, 421 }; 422 #endif 423 424 #define BPF_STRTOX_BASE_MASK 0x1F 425 426 static int __bpf_strtoull(const char *buf, size_t buf_len, u64 flags, 427 unsigned long long *res, bool *is_negative) 428 { 429 unsigned int base = flags & BPF_STRTOX_BASE_MASK; 430 const char *cur_buf = buf; 431 size_t cur_len = buf_len; 432 unsigned int consumed; 433 size_t val_len; 434 char str[64]; 435 436 if (!buf || !buf_len || !res || !is_negative) 437 return -EINVAL; 438 439 if (base != 0 && base != 8 && base != 10 && base != 16) 440 return -EINVAL; 441 442 if (flags & ~BPF_STRTOX_BASE_MASK) 443 return -EINVAL; 444 445 while (cur_buf < buf + buf_len && isspace(*cur_buf)) 446 ++cur_buf; 447 448 *is_negative = (cur_buf < buf + buf_len && *cur_buf == '-'); 449 if (*is_negative) 450 ++cur_buf; 451 452 consumed = cur_buf - buf; 453 cur_len -= consumed; 454 if (!cur_len) 455 return -EINVAL; 456 457 cur_len = min(cur_len, sizeof(str) - 1); 458 memcpy(str, cur_buf, cur_len); 459 str[cur_len] = '\0'; 460 cur_buf = str; 461 462 cur_buf = _parse_integer_fixup_radix(cur_buf, &base); 463 val_len = _parse_integer(cur_buf, base, res); 464 465 if (val_len & KSTRTOX_OVERFLOW) 466 return -ERANGE; 467 468 if (val_len == 0) 469 return -EINVAL; 470 471 cur_buf += val_len; 472 consumed += cur_buf - str; 473 474 return consumed; 475 } 476 477 static int __bpf_strtoll(const char *buf, size_t buf_len, u64 flags, 478 long long *res) 479 { 480 unsigned long long _res; 481 bool is_negative; 482 int err; 483 484 err = __bpf_strtoull(buf, buf_len, flags, &_res, &is_negative); 485 if (err < 0) 486 return err; 487 if (is_negative) { 488 if ((long long)-_res > 0) 489 return -ERANGE; 490 *res = -_res; 491 } else { 492 if ((long long)_res < 0) 493 return -ERANGE; 494 *res = _res; 495 } 496 return err; 497 } 498 499 BPF_CALL_4(bpf_strtol, const char *, buf, size_t, buf_len, u64, flags, 500 long *, res) 501 { 502 long long _res; 503 int err; 504 505 err = __bpf_strtoll(buf, buf_len, flags, &_res); 506 if (err < 0) 507 return err; 508 if (_res != (long)_res) 509 return -ERANGE; 510 *res = _res; 511 return err; 512 } 513 514 const struct bpf_func_proto bpf_strtol_proto = { 515 .func = bpf_strtol, 516 .gpl_only = false, 517 .ret_type = RET_INTEGER, 518 .arg1_type = ARG_PTR_TO_MEM, 519 .arg2_type = ARG_CONST_SIZE, 520 .arg3_type = ARG_ANYTHING, 521 .arg4_type = ARG_PTR_TO_LONG, 522 }; 523 524 BPF_CALL_4(bpf_strtoul, const char *, buf, size_t, buf_len, u64, flags, 525 unsigned long *, res) 526 { 527 unsigned long long _res; 528 bool is_negative; 529 int err; 530 531 err = __bpf_strtoull(buf, buf_len, flags, &_res, &is_negative); 532 if (err < 0) 533 return err; 534 if (is_negative) 535 return -EINVAL; 536 if (_res != (unsigned long)_res) 537 return -ERANGE; 538 *res = _res; 539 return err; 540 } 541 542 const struct bpf_func_proto bpf_strtoul_proto = { 543 .func = bpf_strtoul, 544 .gpl_only = false, 545 .ret_type = RET_INTEGER, 546 .arg1_type = ARG_PTR_TO_MEM, 547 .arg2_type = ARG_CONST_SIZE, 548 .arg3_type = ARG_ANYTHING, 549 .arg4_type = ARG_PTR_TO_LONG, 550 }; 551 #endif 552 553 BPF_CALL_4(bpf_get_ns_current_pid_tgid, u64, dev, u64, ino, 554 struct bpf_pidns_info *, nsdata, u32, size) 555 { 556 struct task_struct *task = current; 557 struct pid_namespace *pidns; 558 int err = -EINVAL; 559 560 if (unlikely(size != sizeof(struct bpf_pidns_info))) 561 goto clear; 562 563 if (unlikely((u64)(dev_t)dev != dev)) 564 goto clear; 565 566 if (unlikely(!task)) 567 goto clear; 568 569 pidns = task_active_pid_ns(task); 570 if (unlikely(!pidns)) { 571 err = -ENOENT; 572 goto clear; 573 } 574 575 if (!ns_match(&pidns->ns, (dev_t)dev, ino)) 576 goto clear; 577 578 nsdata->pid = task_pid_nr_ns(task, pidns); 579 nsdata->tgid = task_tgid_nr_ns(task, pidns); 580 return 0; 581 clear: 582 memset((void *)nsdata, 0, (size_t) size); 583 return err; 584 } 585 586 const struct bpf_func_proto bpf_get_ns_current_pid_tgid_proto = { 587 .func = bpf_get_ns_current_pid_tgid, 588 .gpl_only = false, 589 .ret_type = RET_INTEGER, 590 .arg1_type = ARG_ANYTHING, 591 .arg2_type = ARG_ANYTHING, 592 .arg3_type = ARG_PTR_TO_UNINIT_MEM, 593 .arg4_type = ARG_CONST_SIZE, 594 }; 595 596 static const struct bpf_func_proto bpf_get_raw_smp_processor_id_proto = { 597 .func = bpf_get_raw_cpu_id, 598 .gpl_only = false, 599 .ret_type = RET_INTEGER, 600 }; 601 602 BPF_CALL_5(bpf_event_output_data, void *, ctx, struct bpf_map *, map, 603 u64, flags, void *, data, u64, size) 604 { 605 if (unlikely(flags & ~(BPF_F_INDEX_MASK))) 606 return -EINVAL; 607 608 return bpf_event_output(map, flags, data, size, NULL, 0, NULL); 609 } 610 611 const struct bpf_func_proto bpf_event_output_data_proto = { 612 .func = bpf_event_output_data, 613 .gpl_only = true, 614 .ret_type = RET_INTEGER, 615 .arg1_type = ARG_PTR_TO_CTX, 616 .arg2_type = ARG_CONST_MAP_PTR, 617 .arg3_type = ARG_ANYTHING, 618 .arg4_type = ARG_PTR_TO_MEM, 619 .arg5_type = ARG_CONST_SIZE_OR_ZERO, 620 }; 621 622 BPF_CALL_3(bpf_copy_from_user, void *, dst, u32, size, 623 const void __user *, user_ptr) 624 { 625 int ret = copy_from_user(dst, user_ptr, size); 626 627 if (unlikely(ret)) { 628 memset(dst, 0, size); 629 ret = -EFAULT; 630 } 631 632 return ret; 633 } 634 635 const struct bpf_func_proto bpf_copy_from_user_proto = { 636 .func = bpf_copy_from_user, 637 .gpl_only = false, 638 .ret_type = RET_INTEGER, 639 .arg1_type = ARG_PTR_TO_UNINIT_MEM, 640 .arg2_type = ARG_CONST_SIZE_OR_ZERO, 641 .arg3_type = ARG_ANYTHING, 642 }; 643 644 BPF_CALL_2(bpf_per_cpu_ptr, const void *, ptr, u32, cpu) 645 { 646 if (cpu >= nr_cpu_ids) 647 return (unsigned long)NULL; 648 649 return (unsigned long)per_cpu_ptr((const void __percpu *)ptr, cpu); 650 } 651 652 const struct bpf_func_proto bpf_per_cpu_ptr_proto = { 653 .func = bpf_per_cpu_ptr, 654 .gpl_only = false, 655 .ret_type = RET_PTR_TO_MEM_OR_BTF_ID_OR_NULL, 656 .arg1_type = ARG_PTR_TO_PERCPU_BTF_ID, 657 .arg2_type = ARG_ANYTHING, 658 }; 659 660 BPF_CALL_1(bpf_this_cpu_ptr, const void *, percpu_ptr) 661 { 662 return (unsigned long)this_cpu_ptr((const void __percpu *)percpu_ptr); 663 } 664 665 const struct bpf_func_proto bpf_this_cpu_ptr_proto = { 666 .func = bpf_this_cpu_ptr, 667 .gpl_only = false, 668 .ret_type = RET_PTR_TO_MEM_OR_BTF_ID, 669 .arg1_type = ARG_PTR_TO_PERCPU_BTF_ID, 670 }; 671 672 static int bpf_trace_copy_string(char *buf, void *unsafe_ptr, char fmt_ptype, 673 size_t bufsz) 674 { 675 void __user *user_ptr = (__force void __user *)unsafe_ptr; 676 677 buf[0] = 0; 678 679 switch (fmt_ptype) { 680 case 's': 681 #ifdef CONFIG_ARCH_HAS_NON_OVERLAPPING_ADDRESS_SPACE 682 if ((unsigned long)unsafe_ptr < TASK_SIZE) 683 return strncpy_from_user_nofault(buf, user_ptr, bufsz); 684 fallthrough; 685 #endif 686 case 'k': 687 return strncpy_from_kernel_nofault(buf, unsafe_ptr, bufsz); 688 case 'u': 689 return strncpy_from_user_nofault(buf, user_ptr, bufsz); 690 } 691 692 return -EINVAL; 693 } 694 695 /* Per-cpu temp buffers which can be used by printf-like helpers for %s or %p 696 */ 697 #define MAX_PRINTF_BUF_LEN 512 698 699 struct bpf_printf_buf { 700 char tmp_buf[MAX_PRINTF_BUF_LEN]; 701 }; 702 static DEFINE_PER_CPU(struct bpf_printf_buf, bpf_printf_buf); 703 static DEFINE_PER_CPU(int, bpf_printf_buf_used); 704 705 static int try_get_fmt_tmp_buf(char **tmp_buf) 706 { 707 struct bpf_printf_buf *bufs; 708 int used; 709 710 preempt_disable(); 711 used = this_cpu_inc_return(bpf_printf_buf_used); 712 if (WARN_ON_ONCE(used > 1)) { 713 this_cpu_dec(bpf_printf_buf_used); 714 preempt_enable(); 715 return -EBUSY; 716 } 717 bufs = this_cpu_ptr(&bpf_printf_buf); 718 *tmp_buf = bufs->tmp_buf; 719 720 return 0; 721 } 722 723 void bpf_bprintf_cleanup(void) 724 { 725 if (this_cpu_read(bpf_printf_buf_used)) { 726 this_cpu_dec(bpf_printf_buf_used); 727 preempt_enable(); 728 } 729 } 730 731 /* 732 * bpf_bprintf_prepare - Generic pass on format strings for bprintf-like helpers 733 * 734 * Returns a negative value if fmt is an invalid format string or 0 otherwise. 735 * 736 * This can be used in two ways: 737 * - Format string verification only: when bin_args is NULL 738 * - Arguments preparation: in addition to the above verification, it writes in 739 * bin_args a binary representation of arguments usable by bstr_printf where 740 * pointers from BPF have been sanitized. 741 * 742 * In argument preparation mode, if 0 is returned, safe temporary buffers are 743 * allocated and bpf_bprintf_cleanup should be called to free them after use. 744 */ 745 int bpf_bprintf_prepare(char *fmt, u32 fmt_size, const u64 *raw_args, 746 u32 **bin_args, u32 num_args) 747 { 748 char *unsafe_ptr = NULL, *tmp_buf = NULL, *tmp_buf_end, *fmt_end; 749 size_t sizeof_cur_arg, sizeof_cur_ip; 750 int err, i, num_spec = 0; 751 u64 cur_arg; 752 char fmt_ptype, cur_ip[16], ip_spec[] = "%pXX"; 753 754 fmt_end = strnchr(fmt, fmt_size, 0); 755 if (!fmt_end) 756 return -EINVAL; 757 fmt_size = fmt_end - fmt; 758 759 if (bin_args) { 760 if (num_args && try_get_fmt_tmp_buf(&tmp_buf)) 761 return -EBUSY; 762 763 tmp_buf_end = tmp_buf + MAX_PRINTF_BUF_LEN; 764 *bin_args = (u32 *)tmp_buf; 765 } 766 767 for (i = 0; i < fmt_size; i++) { 768 if ((!isprint(fmt[i]) && !isspace(fmt[i])) || !isascii(fmt[i])) { 769 err = -EINVAL; 770 goto out; 771 } 772 773 if (fmt[i] != '%') 774 continue; 775 776 if (fmt[i + 1] == '%') { 777 i++; 778 continue; 779 } 780 781 if (num_spec >= num_args) { 782 err = -EINVAL; 783 goto out; 784 } 785 786 /* The string is zero-terminated so if fmt[i] != 0, we can 787 * always access fmt[i + 1], in the worst case it will be a 0 788 */ 789 i++; 790 791 /* skip optional "[0 +-][num]" width formatting field */ 792 while (fmt[i] == '0' || fmt[i] == '+' || fmt[i] == '-' || 793 fmt[i] == ' ') 794 i++; 795 if (fmt[i] >= '1' && fmt[i] <= '9') { 796 i++; 797 while (fmt[i] >= '0' && fmt[i] <= '9') 798 i++; 799 } 800 801 if (fmt[i] == 'p') { 802 sizeof_cur_arg = sizeof(long); 803 804 if ((fmt[i + 1] == 'k' || fmt[i + 1] == 'u') && 805 fmt[i + 2] == 's') { 806 fmt_ptype = fmt[i + 1]; 807 i += 2; 808 goto fmt_str; 809 } 810 811 if (fmt[i + 1] == 0 || isspace(fmt[i + 1]) || 812 ispunct(fmt[i + 1]) || fmt[i + 1] == 'K' || 813 fmt[i + 1] == 'x' || fmt[i + 1] == 's' || 814 fmt[i + 1] == 'S') { 815 /* just kernel pointers */ 816 if (tmp_buf) 817 cur_arg = raw_args[num_spec]; 818 i++; 819 goto nocopy_fmt; 820 } 821 822 if (fmt[i + 1] == 'B') { 823 if (tmp_buf) { 824 err = snprintf(tmp_buf, 825 (tmp_buf_end - tmp_buf), 826 "%pB", 827 (void *)(long)raw_args[num_spec]); 828 tmp_buf += (err + 1); 829 } 830 831 i++; 832 num_spec++; 833 continue; 834 } 835 836 /* only support "%pI4", "%pi4", "%pI6" and "%pi6". */ 837 if ((fmt[i + 1] != 'i' && fmt[i + 1] != 'I') || 838 (fmt[i + 2] != '4' && fmt[i + 2] != '6')) { 839 err = -EINVAL; 840 goto out; 841 } 842 843 i += 2; 844 if (!tmp_buf) 845 goto nocopy_fmt; 846 847 sizeof_cur_ip = (fmt[i] == '4') ? 4 : 16; 848 if (tmp_buf_end - tmp_buf < sizeof_cur_ip) { 849 err = -ENOSPC; 850 goto out; 851 } 852 853 unsafe_ptr = (char *)(long)raw_args[num_spec]; 854 err = copy_from_kernel_nofault(cur_ip, unsafe_ptr, 855 sizeof_cur_ip); 856 if (err < 0) 857 memset(cur_ip, 0, sizeof_cur_ip); 858 859 /* hack: bstr_printf expects IP addresses to be 860 * pre-formatted as strings, ironically, the easiest way 861 * to do that is to call snprintf. 862 */ 863 ip_spec[2] = fmt[i - 1]; 864 ip_spec[3] = fmt[i]; 865 err = snprintf(tmp_buf, tmp_buf_end - tmp_buf, 866 ip_spec, &cur_ip); 867 868 tmp_buf += err + 1; 869 num_spec++; 870 871 continue; 872 } else if (fmt[i] == 's') { 873 fmt_ptype = fmt[i]; 874 fmt_str: 875 if (fmt[i + 1] != 0 && 876 !isspace(fmt[i + 1]) && 877 !ispunct(fmt[i + 1])) { 878 err = -EINVAL; 879 goto out; 880 } 881 882 if (!tmp_buf) 883 goto nocopy_fmt; 884 885 if (tmp_buf_end == tmp_buf) { 886 err = -ENOSPC; 887 goto out; 888 } 889 890 unsafe_ptr = (char *)(long)raw_args[num_spec]; 891 err = bpf_trace_copy_string(tmp_buf, unsafe_ptr, 892 fmt_ptype, 893 tmp_buf_end - tmp_buf); 894 if (err < 0) { 895 tmp_buf[0] = '\0'; 896 err = 1; 897 } 898 899 tmp_buf += err; 900 num_spec++; 901 902 continue; 903 } 904 905 sizeof_cur_arg = sizeof(int); 906 907 if (fmt[i] == 'l') { 908 sizeof_cur_arg = sizeof(long); 909 i++; 910 } 911 if (fmt[i] == 'l') { 912 sizeof_cur_arg = sizeof(long long); 913 i++; 914 } 915 916 if (fmt[i] != 'i' && fmt[i] != 'd' && fmt[i] != 'u' && 917 fmt[i] != 'x' && fmt[i] != 'X') { 918 err = -EINVAL; 919 goto out; 920 } 921 922 if (tmp_buf) 923 cur_arg = raw_args[num_spec]; 924 nocopy_fmt: 925 if (tmp_buf) { 926 tmp_buf = PTR_ALIGN(tmp_buf, sizeof(u32)); 927 if (tmp_buf_end - tmp_buf < sizeof_cur_arg) { 928 err = -ENOSPC; 929 goto out; 930 } 931 932 if (sizeof_cur_arg == 8) { 933 *(u32 *)tmp_buf = *(u32 *)&cur_arg; 934 *(u32 *)(tmp_buf + 4) = *((u32 *)&cur_arg + 1); 935 } else { 936 *(u32 *)tmp_buf = (u32)(long)cur_arg; 937 } 938 tmp_buf += sizeof_cur_arg; 939 } 940 num_spec++; 941 } 942 943 err = 0; 944 out: 945 if (err) 946 bpf_bprintf_cleanup(); 947 return err; 948 } 949 950 #define MAX_SNPRINTF_VARARGS 12 951 952 BPF_CALL_5(bpf_snprintf, char *, str, u32, str_size, char *, fmt, 953 const void *, data, u32, data_len) 954 { 955 int err, num_args; 956 u32 *bin_args; 957 958 if (data_len % 8 || data_len > MAX_SNPRINTF_VARARGS * 8 || 959 (data_len && !data)) 960 return -EINVAL; 961 num_args = data_len / 8; 962 963 /* ARG_PTR_TO_CONST_STR guarantees that fmt is zero-terminated so we 964 * can safely give an unbounded size. 965 */ 966 err = bpf_bprintf_prepare(fmt, UINT_MAX, data, &bin_args, num_args); 967 if (err < 0) 968 return err; 969 970 err = bstr_printf(str, str_size, fmt, bin_args); 971 972 bpf_bprintf_cleanup(); 973 974 return err + 1; 975 } 976 977 const struct bpf_func_proto bpf_snprintf_proto = { 978 .func = bpf_snprintf, 979 .gpl_only = true, 980 .ret_type = RET_INTEGER, 981 .arg1_type = ARG_PTR_TO_MEM_OR_NULL, 982 .arg2_type = ARG_CONST_SIZE_OR_ZERO, 983 .arg3_type = ARG_PTR_TO_CONST_STR, 984 .arg4_type = ARG_PTR_TO_MEM_OR_NULL, 985 .arg5_type = ARG_CONST_SIZE_OR_ZERO, 986 }; 987 988 const struct bpf_func_proto bpf_get_current_task_proto __weak; 989 const struct bpf_func_proto bpf_probe_read_user_proto __weak; 990 const struct bpf_func_proto bpf_probe_read_user_str_proto __weak; 991 const struct bpf_func_proto bpf_probe_read_kernel_proto __weak; 992 const struct bpf_func_proto bpf_probe_read_kernel_str_proto __weak; 993 994 const struct bpf_func_proto * 995 bpf_base_func_proto(enum bpf_func_id func_id) 996 { 997 switch (func_id) { 998 case BPF_FUNC_map_lookup_elem: 999 return &bpf_map_lookup_elem_proto; 1000 case BPF_FUNC_map_update_elem: 1001 return &bpf_map_update_elem_proto; 1002 case BPF_FUNC_map_delete_elem: 1003 return &bpf_map_delete_elem_proto; 1004 case BPF_FUNC_map_push_elem: 1005 return &bpf_map_push_elem_proto; 1006 case BPF_FUNC_map_pop_elem: 1007 return &bpf_map_pop_elem_proto; 1008 case BPF_FUNC_map_peek_elem: 1009 return &bpf_map_peek_elem_proto; 1010 case BPF_FUNC_get_prandom_u32: 1011 return &bpf_get_prandom_u32_proto; 1012 case BPF_FUNC_get_smp_processor_id: 1013 return &bpf_get_raw_smp_processor_id_proto; 1014 case BPF_FUNC_get_numa_node_id: 1015 return &bpf_get_numa_node_id_proto; 1016 case BPF_FUNC_tail_call: 1017 return &bpf_tail_call_proto; 1018 case BPF_FUNC_ktime_get_ns: 1019 return &bpf_ktime_get_ns_proto; 1020 case BPF_FUNC_ktime_get_boot_ns: 1021 return &bpf_ktime_get_boot_ns_proto; 1022 case BPF_FUNC_ktime_get_coarse_ns: 1023 return &bpf_ktime_get_coarse_ns_proto; 1024 case BPF_FUNC_ringbuf_output: 1025 return &bpf_ringbuf_output_proto; 1026 case BPF_FUNC_ringbuf_reserve: 1027 return &bpf_ringbuf_reserve_proto; 1028 case BPF_FUNC_ringbuf_submit: 1029 return &bpf_ringbuf_submit_proto; 1030 case BPF_FUNC_ringbuf_discard: 1031 return &bpf_ringbuf_discard_proto; 1032 case BPF_FUNC_ringbuf_query: 1033 return &bpf_ringbuf_query_proto; 1034 case BPF_FUNC_for_each_map_elem: 1035 return &bpf_for_each_map_elem_proto; 1036 default: 1037 break; 1038 } 1039 1040 if (!bpf_capable()) 1041 return NULL; 1042 1043 switch (func_id) { 1044 case BPF_FUNC_spin_lock: 1045 return &bpf_spin_lock_proto; 1046 case BPF_FUNC_spin_unlock: 1047 return &bpf_spin_unlock_proto; 1048 case BPF_FUNC_jiffies64: 1049 return &bpf_jiffies64_proto; 1050 case BPF_FUNC_per_cpu_ptr: 1051 return &bpf_per_cpu_ptr_proto; 1052 case BPF_FUNC_this_cpu_ptr: 1053 return &bpf_this_cpu_ptr_proto; 1054 default: 1055 break; 1056 } 1057 1058 if (!perfmon_capable()) 1059 return NULL; 1060 1061 switch (func_id) { 1062 case BPF_FUNC_trace_printk: 1063 return bpf_get_trace_printk_proto(); 1064 case BPF_FUNC_get_current_task: 1065 return &bpf_get_current_task_proto; 1066 case BPF_FUNC_probe_read_user: 1067 return &bpf_probe_read_user_proto; 1068 case BPF_FUNC_probe_read_kernel: 1069 return &bpf_probe_read_kernel_proto; 1070 case BPF_FUNC_probe_read_user_str: 1071 return &bpf_probe_read_user_str_proto; 1072 case BPF_FUNC_probe_read_kernel_str: 1073 return &bpf_probe_read_kernel_str_proto; 1074 case BPF_FUNC_snprintf_btf: 1075 return &bpf_snprintf_btf_proto; 1076 case BPF_FUNC_snprintf: 1077 return &bpf_snprintf_proto; 1078 default: 1079 return NULL; 1080 } 1081 } 1082