1 // SPDX-License-Identifier: GPL-2.0-only 2 /* Copyright (c) 2011-2014 PLUMgrid, http://plumgrid.com 3 */ 4 #include <linux/bpf.h> 5 #include <linux/bpf-cgroup.h> 6 #include <linux/bpf_trace.h> 7 #include <linux/bpf_lirc.h> 8 #include <linux/bpf_verifier.h> 9 #include <linux/btf.h> 10 #include <linux/syscalls.h> 11 #include <linux/slab.h> 12 #include <linux/sched/signal.h> 13 #include <linux/vmalloc.h> 14 #include <linux/mmzone.h> 15 #include <linux/anon_inodes.h> 16 #include <linux/fdtable.h> 17 #include <linux/file.h> 18 #include <linux/fs.h> 19 #include <linux/license.h> 20 #include <linux/filter.h> 21 #include <linux/kernel.h> 22 #include <linux/idr.h> 23 #include <linux/cred.h> 24 #include <linux/timekeeping.h> 25 #include <linux/ctype.h> 26 #include <linux/nospec.h> 27 #include <linux/audit.h> 28 #include <uapi/linux/btf.h> 29 #include <linux/pgtable.h> 30 #include <linux/bpf_lsm.h> 31 #include <linux/poll.h> 32 #include <linux/bpf-netns.h> 33 #include <linux/rcupdate_trace.h> 34 #include <linux/memcontrol.h> 35 #include <linux/trace_events.h> 36 37 #define IS_FD_ARRAY(map) ((map)->map_type == BPF_MAP_TYPE_PERF_EVENT_ARRAY || \ 38 (map)->map_type == BPF_MAP_TYPE_CGROUP_ARRAY || \ 39 (map)->map_type == BPF_MAP_TYPE_ARRAY_OF_MAPS) 40 #define IS_FD_PROG_ARRAY(map) ((map)->map_type == BPF_MAP_TYPE_PROG_ARRAY) 41 #define IS_FD_HASH(map) ((map)->map_type == BPF_MAP_TYPE_HASH_OF_MAPS) 42 #define IS_FD_MAP(map) (IS_FD_ARRAY(map) || IS_FD_PROG_ARRAY(map) || \ 43 IS_FD_HASH(map)) 44 45 #define BPF_OBJ_FLAG_MASK (BPF_F_RDONLY | BPF_F_WRONLY) 46 47 DEFINE_PER_CPU(int, bpf_prog_active); 48 static DEFINE_IDR(prog_idr); 49 static DEFINE_SPINLOCK(prog_idr_lock); 50 static DEFINE_IDR(map_idr); 51 static DEFINE_SPINLOCK(map_idr_lock); 52 static DEFINE_IDR(link_idr); 53 static DEFINE_SPINLOCK(link_idr_lock); 54 55 int sysctl_unprivileged_bpf_disabled __read_mostly = 56 IS_BUILTIN(CONFIG_BPF_UNPRIV_DEFAULT_OFF) ? 2 : 0; 57 58 static const struct bpf_map_ops * const bpf_map_types[] = { 59 #define BPF_PROG_TYPE(_id, _name, prog_ctx_type, kern_ctx_type) 60 #define BPF_MAP_TYPE(_id, _ops) \ 61 [_id] = &_ops, 62 #define BPF_LINK_TYPE(_id, _name) 63 #include <linux/bpf_types.h> 64 #undef BPF_PROG_TYPE 65 #undef BPF_MAP_TYPE 66 #undef BPF_LINK_TYPE 67 }; 68 69 /* 70 * If we're handed a bigger struct than we know of, ensure all the unknown bits 71 * are 0 - i.e. new user-space does not rely on any kernel feature extensions 72 * we don't know about yet. 73 * 74 * There is a ToCToU between this function call and the following 75 * copy_from_user() call. However, this is not a concern since this function is 76 * meant to be a future-proofing of bits. 77 */ 78 int bpf_check_uarg_tail_zero(bpfptr_t uaddr, 79 size_t expected_size, 80 size_t actual_size) 81 { 82 int res; 83 84 if (unlikely(actual_size > PAGE_SIZE)) /* silly large */ 85 return -E2BIG; 86 87 if (actual_size <= expected_size) 88 return 0; 89 90 if (uaddr.is_kernel) 91 res = memchr_inv(uaddr.kernel + expected_size, 0, 92 actual_size - expected_size) == NULL; 93 else 94 res = check_zeroed_user(uaddr.user + expected_size, 95 actual_size - expected_size); 96 if (res < 0) 97 return res; 98 return res ? 0 : -E2BIG; 99 } 100 101 const struct bpf_map_ops bpf_map_offload_ops = { 102 .map_meta_equal = bpf_map_meta_equal, 103 .map_alloc = bpf_map_offload_map_alloc, 104 .map_free = bpf_map_offload_map_free, 105 .map_check_btf = map_check_no_btf, 106 }; 107 108 static struct bpf_map *find_and_alloc_map(union bpf_attr *attr) 109 { 110 const struct bpf_map_ops *ops; 111 u32 type = attr->map_type; 112 struct bpf_map *map; 113 int err; 114 115 if (type >= ARRAY_SIZE(bpf_map_types)) 116 return ERR_PTR(-EINVAL); 117 type = array_index_nospec(type, ARRAY_SIZE(bpf_map_types)); 118 ops = bpf_map_types[type]; 119 if (!ops) 120 return ERR_PTR(-EINVAL); 121 122 if (ops->map_alloc_check) { 123 err = ops->map_alloc_check(attr); 124 if (err) 125 return ERR_PTR(err); 126 } 127 if (attr->map_ifindex) 128 ops = &bpf_map_offload_ops; 129 map = ops->map_alloc(attr); 130 if (IS_ERR(map)) 131 return map; 132 map->ops = ops; 133 map->map_type = type; 134 return map; 135 } 136 137 static void bpf_map_write_active_inc(struct bpf_map *map) 138 { 139 atomic64_inc(&map->writecnt); 140 } 141 142 static void bpf_map_write_active_dec(struct bpf_map *map) 143 { 144 atomic64_dec(&map->writecnt); 145 } 146 147 bool bpf_map_write_active(const struct bpf_map *map) 148 { 149 return atomic64_read(&map->writecnt) != 0; 150 } 151 152 static u32 bpf_map_value_size(const struct bpf_map *map) 153 { 154 if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH || 155 map->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH || 156 map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY || 157 map->map_type == BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE) 158 return round_up(map->value_size, 8) * num_possible_cpus(); 159 else if (IS_FD_MAP(map)) 160 return sizeof(u32); 161 else 162 return map->value_size; 163 } 164 165 static void maybe_wait_bpf_programs(struct bpf_map *map) 166 { 167 /* Wait for any running BPF programs to complete so that 168 * userspace, when we return to it, knows that all programs 169 * that could be running use the new map value. 170 */ 171 if (map->map_type == BPF_MAP_TYPE_HASH_OF_MAPS || 172 map->map_type == BPF_MAP_TYPE_ARRAY_OF_MAPS) 173 synchronize_rcu(); 174 } 175 176 static int bpf_map_update_value(struct bpf_map *map, struct fd f, void *key, 177 void *value, __u64 flags) 178 { 179 int err; 180 181 /* Need to create a kthread, thus must support schedule */ 182 if (bpf_map_is_dev_bound(map)) { 183 return bpf_map_offload_update_elem(map, key, value, flags); 184 } else if (map->map_type == BPF_MAP_TYPE_CPUMAP || 185 map->map_type == BPF_MAP_TYPE_STRUCT_OPS) { 186 return map->ops->map_update_elem(map, key, value, flags); 187 } else if (map->map_type == BPF_MAP_TYPE_SOCKHASH || 188 map->map_type == BPF_MAP_TYPE_SOCKMAP) { 189 return sock_map_update_elem_sys(map, key, value, flags); 190 } else if (IS_FD_PROG_ARRAY(map)) { 191 return bpf_fd_array_map_update_elem(map, f.file, key, value, 192 flags); 193 } 194 195 bpf_disable_instrumentation(); 196 if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH || 197 map->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH) { 198 err = bpf_percpu_hash_update(map, key, value, flags); 199 } else if (map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY) { 200 err = bpf_percpu_array_update(map, key, value, flags); 201 } else if (map->map_type == BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE) { 202 err = bpf_percpu_cgroup_storage_update(map, key, value, 203 flags); 204 } else if (IS_FD_ARRAY(map)) { 205 rcu_read_lock(); 206 err = bpf_fd_array_map_update_elem(map, f.file, key, value, 207 flags); 208 rcu_read_unlock(); 209 } else if (map->map_type == BPF_MAP_TYPE_HASH_OF_MAPS) { 210 rcu_read_lock(); 211 err = bpf_fd_htab_map_update_elem(map, f.file, key, value, 212 flags); 213 rcu_read_unlock(); 214 } else if (map->map_type == BPF_MAP_TYPE_REUSEPORT_SOCKARRAY) { 215 /* rcu_read_lock() is not needed */ 216 err = bpf_fd_reuseport_array_update_elem(map, key, value, 217 flags); 218 } else if (map->map_type == BPF_MAP_TYPE_QUEUE || 219 map->map_type == BPF_MAP_TYPE_STACK || 220 map->map_type == BPF_MAP_TYPE_BLOOM_FILTER) { 221 err = map->ops->map_push_elem(map, value, flags); 222 } else { 223 rcu_read_lock(); 224 err = map->ops->map_update_elem(map, key, value, flags); 225 rcu_read_unlock(); 226 } 227 bpf_enable_instrumentation(); 228 maybe_wait_bpf_programs(map); 229 230 return err; 231 } 232 233 static int bpf_map_copy_value(struct bpf_map *map, void *key, void *value, 234 __u64 flags) 235 { 236 void *ptr; 237 int err; 238 239 if (bpf_map_is_dev_bound(map)) 240 return bpf_map_offload_lookup_elem(map, key, value); 241 242 bpf_disable_instrumentation(); 243 if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH || 244 map->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH) { 245 err = bpf_percpu_hash_copy(map, key, value); 246 } else if (map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY) { 247 err = bpf_percpu_array_copy(map, key, value); 248 } else if (map->map_type == BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE) { 249 err = bpf_percpu_cgroup_storage_copy(map, key, value); 250 } else if (map->map_type == BPF_MAP_TYPE_STACK_TRACE) { 251 err = bpf_stackmap_copy(map, key, value); 252 } else if (IS_FD_ARRAY(map) || IS_FD_PROG_ARRAY(map)) { 253 err = bpf_fd_array_map_lookup_elem(map, key, value); 254 } else if (IS_FD_HASH(map)) { 255 err = bpf_fd_htab_map_lookup_elem(map, key, value); 256 } else if (map->map_type == BPF_MAP_TYPE_REUSEPORT_SOCKARRAY) { 257 err = bpf_fd_reuseport_array_lookup_elem(map, key, value); 258 } else if (map->map_type == BPF_MAP_TYPE_QUEUE || 259 map->map_type == BPF_MAP_TYPE_STACK || 260 map->map_type == BPF_MAP_TYPE_BLOOM_FILTER) { 261 err = map->ops->map_peek_elem(map, value); 262 } else if (map->map_type == BPF_MAP_TYPE_STRUCT_OPS) { 263 /* struct_ops map requires directly updating "value" */ 264 err = bpf_struct_ops_map_sys_lookup_elem(map, key, value); 265 } else { 266 rcu_read_lock(); 267 if (map->ops->map_lookup_elem_sys_only) 268 ptr = map->ops->map_lookup_elem_sys_only(map, key); 269 else 270 ptr = map->ops->map_lookup_elem(map, key); 271 if (IS_ERR(ptr)) { 272 err = PTR_ERR(ptr); 273 } else if (!ptr) { 274 err = -ENOENT; 275 } else { 276 err = 0; 277 if (flags & BPF_F_LOCK) 278 /* lock 'ptr' and copy everything but lock */ 279 copy_map_value_locked(map, value, ptr, true); 280 else 281 copy_map_value(map, value, ptr); 282 /* mask lock and timer, since value wasn't zero inited */ 283 check_and_init_map_value(map, value); 284 } 285 rcu_read_unlock(); 286 } 287 288 bpf_enable_instrumentation(); 289 maybe_wait_bpf_programs(map); 290 291 return err; 292 } 293 294 /* Please, do not use this function outside from the map creation path 295 * (e.g. in map update path) without taking care of setting the active 296 * memory cgroup (see at bpf_map_kmalloc_node() for example). 297 */ 298 static void *__bpf_map_area_alloc(u64 size, int numa_node, bool mmapable) 299 { 300 /* We really just want to fail instead of triggering OOM killer 301 * under memory pressure, therefore we set __GFP_NORETRY to kmalloc, 302 * which is used for lower order allocation requests. 303 * 304 * It has been observed that higher order allocation requests done by 305 * vmalloc with __GFP_NORETRY being set might fail due to not trying 306 * to reclaim memory from the page cache, thus we set 307 * __GFP_RETRY_MAYFAIL to avoid such situations. 308 */ 309 310 const gfp_t gfp = __GFP_NOWARN | __GFP_ZERO | __GFP_ACCOUNT; 311 unsigned int flags = 0; 312 unsigned long align = 1; 313 void *area; 314 315 if (size >= SIZE_MAX) 316 return NULL; 317 318 /* kmalloc()'ed memory can't be mmap()'ed */ 319 if (mmapable) { 320 BUG_ON(!PAGE_ALIGNED(size)); 321 align = SHMLBA; 322 flags = VM_USERMAP; 323 } else if (size <= (PAGE_SIZE << PAGE_ALLOC_COSTLY_ORDER)) { 324 area = kmalloc_node(size, gfp | GFP_USER | __GFP_NORETRY, 325 numa_node); 326 if (area != NULL) 327 return area; 328 } 329 330 return __vmalloc_node_range(size, align, VMALLOC_START, VMALLOC_END, 331 gfp | GFP_KERNEL | __GFP_RETRY_MAYFAIL, PAGE_KERNEL, 332 flags, numa_node, __builtin_return_address(0)); 333 } 334 335 void *bpf_map_area_alloc(u64 size, int numa_node) 336 { 337 return __bpf_map_area_alloc(size, numa_node, false); 338 } 339 340 void *bpf_map_area_mmapable_alloc(u64 size, int numa_node) 341 { 342 return __bpf_map_area_alloc(size, numa_node, true); 343 } 344 345 void bpf_map_area_free(void *area) 346 { 347 kvfree(area); 348 } 349 350 static u32 bpf_map_flags_retain_permanent(u32 flags) 351 { 352 /* Some map creation flags are not tied to the map object but 353 * rather to the map fd instead, so they have no meaning upon 354 * map object inspection since multiple file descriptors with 355 * different (access) properties can exist here. Thus, given 356 * this has zero meaning for the map itself, lets clear these 357 * from here. 358 */ 359 return flags & ~(BPF_F_RDONLY | BPF_F_WRONLY); 360 } 361 362 void bpf_map_init_from_attr(struct bpf_map *map, union bpf_attr *attr) 363 { 364 map->map_type = attr->map_type; 365 map->key_size = attr->key_size; 366 map->value_size = attr->value_size; 367 map->max_entries = attr->max_entries; 368 map->map_flags = bpf_map_flags_retain_permanent(attr->map_flags); 369 map->numa_node = bpf_map_attr_numa_node(attr); 370 map->map_extra = attr->map_extra; 371 } 372 373 static int bpf_map_alloc_id(struct bpf_map *map) 374 { 375 int id; 376 377 idr_preload(GFP_KERNEL); 378 spin_lock_bh(&map_idr_lock); 379 id = idr_alloc_cyclic(&map_idr, map, 1, INT_MAX, GFP_ATOMIC); 380 if (id > 0) 381 map->id = id; 382 spin_unlock_bh(&map_idr_lock); 383 idr_preload_end(); 384 385 if (WARN_ON_ONCE(!id)) 386 return -ENOSPC; 387 388 return id > 0 ? 0 : id; 389 } 390 391 void bpf_map_free_id(struct bpf_map *map, bool do_idr_lock) 392 { 393 unsigned long flags; 394 395 /* Offloaded maps are removed from the IDR store when their device 396 * disappears - even if someone holds an fd to them they are unusable, 397 * the memory is gone, all ops will fail; they are simply waiting for 398 * refcnt to drop to be freed. 399 */ 400 if (!map->id) 401 return; 402 403 if (do_idr_lock) 404 spin_lock_irqsave(&map_idr_lock, flags); 405 else 406 __acquire(&map_idr_lock); 407 408 idr_remove(&map_idr, map->id); 409 map->id = 0; 410 411 if (do_idr_lock) 412 spin_unlock_irqrestore(&map_idr_lock, flags); 413 else 414 __release(&map_idr_lock); 415 } 416 417 #ifdef CONFIG_MEMCG_KMEM 418 static void bpf_map_save_memcg(struct bpf_map *map) 419 { 420 map->memcg = get_mem_cgroup_from_mm(current->mm); 421 } 422 423 static void bpf_map_release_memcg(struct bpf_map *map) 424 { 425 mem_cgroup_put(map->memcg); 426 } 427 428 void *bpf_map_kmalloc_node(const struct bpf_map *map, size_t size, gfp_t flags, 429 int node) 430 { 431 struct mem_cgroup *old_memcg; 432 void *ptr; 433 434 old_memcg = set_active_memcg(map->memcg); 435 ptr = kmalloc_node(size, flags | __GFP_ACCOUNT, node); 436 set_active_memcg(old_memcg); 437 438 return ptr; 439 } 440 441 void *bpf_map_kzalloc(const struct bpf_map *map, size_t size, gfp_t flags) 442 { 443 struct mem_cgroup *old_memcg; 444 void *ptr; 445 446 old_memcg = set_active_memcg(map->memcg); 447 ptr = kzalloc(size, flags | __GFP_ACCOUNT); 448 set_active_memcg(old_memcg); 449 450 return ptr; 451 } 452 453 void __percpu *bpf_map_alloc_percpu(const struct bpf_map *map, size_t size, 454 size_t align, gfp_t flags) 455 { 456 struct mem_cgroup *old_memcg; 457 void __percpu *ptr; 458 459 old_memcg = set_active_memcg(map->memcg); 460 ptr = __alloc_percpu_gfp(size, align, flags | __GFP_ACCOUNT); 461 set_active_memcg(old_memcg); 462 463 return ptr; 464 } 465 466 #else 467 static void bpf_map_save_memcg(struct bpf_map *map) 468 { 469 } 470 471 static void bpf_map_release_memcg(struct bpf_map *map) 472 { 473 } 474 #endif 475 476 /* called from workqueue */ 477 static void bpf_map_free_deferred(struct work_struct *work) 478 { 479 struct bpf_map *map = container_of(work, struct bpf_map, work); 480 481 security_bpf_map_free(map); 482 bpf_map_release_memcg(map); 483 /* implementation dependent freeing */ 484 map->ops->map_free(map); 485 } 486 487 static void bpf_map_put_uref(struct bpf_map *map) 488 { 489 if (atomic64_dec_and_test(&map->usercnt)) { 490 if (map->ops->map_release_uref) 491 map->ops->map_release_uref(map); 492 } 493 } 494 495 /* decrement map refcnt and schedule it for freeing via workqueue 496 * (unrelying map implementation ops->map_free() might sleep) 497 */ 498 static void __bpf_map_put(struct bpf_map *map, bool do_idr_lock) 499 { 500 if (atomic64_dec_and_test(&map->refcnt)) { 501 /* bpf_map_free_id() must be called first */ 502 bpf_map_free_id(map, do_idr_lock); 503 btf_put(map->btf); 504 INIT_WORK(&map->work, bpf_map_free_deferred); 505 schedule_work(&map->work); 506 } 507 } 508 509 void bpf_map_put(struct bpf_map *map) 510 { 511 __bpf_map_put(map, true); 512 } 513 EXPORT_SYMBOL_GPL(bpf_map_put); 514 515 void bpf_map_put_with_uref(struct bpf_map *map) 516 { 517 bpf_map_put_uref(map); 518 bpf_map_put(map); 519 } 520 521 static int bpf_map_release(struct inode *inode, struct file *filp) 522 { 523 struct bpf_map *map = filp->private_data; 524 525 if (map->ops->map_release) 526 map->ops->map_release(map, filp); 527 528 bpf_map_put_with_uref(map); 529 return 0; 530 } 531 532 static fmode_t map_get_sys_perms(struct bpf_map *map, struct fd f) 533 { 534 fmode_t mode = f.file->f_mode; 535 536 /* Our file permissions may have been overridden by global 537 * map permissions facing syscall side. 538 */ 539 if (READ_ONCE(map->frozen)) 540 mode &= ~FMODE_CAN_WRITE; 541 return mode; 542 } 543 544 #ifdef CONFIG_PROC_FS 545 /* Provides an approximation of the map's memory footprint. 546 * Used only to provide a backward compatibility and display 547 * a reasonable "memlock" info. 548 */ 549 static unsigned long bpf_map_memory_footprint(const struct bpf_map *map) 550 { 551 unsigned long size; 552 553 size = round_up(map->key_size + bpf_map_value_size(map), 8); 554 555 return round_up(map->max_entries * size, PAGE_SIZE); 556 } 557 558 static void bpf_map_show_fdinfo(struct seq_file *m, struct file *filp) 559 { 560 struct bpf_map *map = filp->private_data; 561 u32 type = 0, jited = 0; 562 563 if (map_type_contains_progs(map)) { 564 spin_lock(&map->owner.lock); 565 type = map->owner.type; 566 jited = map->owner.jited; 567 spin_unlock(&map->owner.lock); 568 } 569 570 seq_printf(m, 571 "map_type:\t%u\n" 572 "key_size:\t%u\n" 573 "value_size:\t%u\n" 574 "max_entries:\t%u\n" 575 "map_flags:\t%#x\n" 576 "map_extra:\t%#llx\n" 577 "memlock:\t%lu\n" 578 "map_id:\t%u\n" 579 "frozen:\t%u\n", 580 map->map_type, 581 map->key_size, 582 map->value_size, 583 map->max_entries, 584 map->map_flags, 585 (unsigned long long)map->map_extra, 586 bpf_map_memory_footprint(map), 587 map->id, 588 READ_ONCE(map->frozen)); 589 if (type) { 590 seq_printf(m, "owner_prog_type:\t%u\n", type); 591 seq_printf(m, "owner_jited:\t%u\n", jited); 592 } 593 } 594 #endif 595 596 static ssize_t bpf_dummy_read(struct file *filp, char __user *buf, size_t siz, 597 loff_t *ppos) 598 { 599 /* We need this handler such that alloc_file() enables 600 * f_mode with FMODE_CAN_READ. 601 */ 602 return -EINVAL; 603 } 604 605 static ssize_t bpf_dummy_write(struct file *filp, const char __user *buf, 606 size_t siz, loff_t *ppos) 607 { 608 /* We need this handler such that alloc_file() enables 609 * f_mode with FMODE_CAN_WRITE. 610 */ 611 return -EINVAL; 612 } 613 614 /* called for any extra memory-mapped regions (except initial) */ 615 static void bpf_map_mmap_open(struct vm_area_struct *vma) 616 { 617 struct bpf_map *map = vma->vm_file->private_data; 618 619 if (vma->vm_flags & VM_MAYWRITE) 620 bpf_map_write_active_inc(map); 621 } 622 623 /* called for all unmapped memory region (including initial) */ 624 static void bpf_map_mmap_close(struct vm_area_struct *vma) 625 { 626 struct bpf_map *map = vma->vm_file->private_data; 627 628 if (vma->vm_flags & VM_MAYWRITE) 629 bpf_map_write_active_dec(map); 630 } 631 632 static const struct vm_operations_struct bpf_map_default_vmops = { 633 .open = bpf_map_mmap_open, 634 .close = bpf_map_mmap_close, 635 }; 636 637 static int bpf_map_mmap(struct file *filp, struct vm_area_struct *vma) 638 { 639 struct bpf_map *map = filp->private_data; 640 int err; 641 642 if (!map->ops->map_mmap || map_value_has_spin_lock(map) || 643 map_value_has_timer(map)) 644 return -ENOTSUPP; 645 646 if (!(vma->vm_flags & VM_SHARED)) 647 return -EINVAL; 648 649 mutex_lock(&map->freeze_mutex); 650 651 if (vma->vm_flags & VM_WRITE) { 652 if (map->frozen) { 653 err = -EPERM; 654 goto out; 655 } 656 /* map is meant to be read-only, so do not allow mapping as 657 * writable, because it's possible to leak a writable page 658 * reference and allows user-space to still modify it after 659 * freezing, while verifier will assume contents do not change 660 */ 661 if (map->map_flags & BPF_F_RDONLY_PROG) { 662 err = -EACCES; 663 goto out; 664 } 665 } 666 667 /* set default open/close callbacks */ 668 vma->vm_ops = &bpf_map_default_vmops; 669 vma->vm_private_data = map; 670 vma->vm_flags &= ~VM_MAYEXEC; 671 if (!(vma->vm_flags & VM_WRITE)) 672 /* disallow re-mapping with PROT_WRITE */ 673 vma->vm_flags &= ~VM_MAYWRITE; 674 675 err = map->ops->map_mmap(map, vma); 676 if (err) 677 goto out; 678 679 if (vma->vm_flags & VM_MAYWRITE) 680 bpf_map_write_active_inc(map); 681 out: 682 mutex_unlock(&map->freeze_mutex); 683 return err; 684 } 685 686 static __poll_t bpf_map_poll(struct file *filp, struct poll_table_struct *pts) 687 { 688 struct bpf_map *map = filp->private_data; 689 690 if (map->ops->map_poll) 691 return map->ops->map_poll(map, filp, pts); 692 693 return EPOLLERR; 694 } 695 696 const struct file_operations bpf_map_fops = { 697 #ifdef CONFIG_PROC_FS 698 .show_fdinfo = bpf_map_show_fdinfo, 699 #endif 700 .release = bpf_map_release, 701 .read = bpf_dummy_read, 702 .write = bpf_dummy_write, 703 .mmap = bpf_map_mmap, 704 .poll = bpf_map_poll, 705 }; 706 707 int bpf_map_new_fd(struct bpf_map *map, int flags) 708 { 709 int ret; 710 711 ret = security_bpf_map(map, OPEN_FMODE(flags)); 712 if (ret < 0) 713 return ret; 714 715 return anon_inode_getfd("bpf-map", &bpf_map_fops, map, 716 flags | O_CLOEXEC); 717 } 718 719 int bpf_get_file_flag(int flags) 720 { 721 if ((flags & BPF_F_RDONLY) && (flags & BPF_F_WRONLY)) 722 return -EINVAL; 723 if (flags & BPF_F_RDONLY) 724 return O_RDONLY; 725 if (flags & BPF_F_WRONLY) 726 return O_WRONLY; 727 return O_RDWR; 728 } 729 730 /* helper macro to check that unused fields 'union bpf_attr' are zero */ 731 #define CHECK_ATTR(CMD) \ 732 memchr_inv((void *) &attr->CMD##_LAST_FIELD + \ 733 sizeof(attr->CMD##_LAST_FIELD), 0, \ 734 sizeof(*attr) - \ 735 offsetof(union bpf_attr, CMD##_LAST_FIELD) - \ 736 sizeof(attr->CMD##_LAST_FIELD)) != NULL 737 738 /* dst and src must have at least "size" number of bytes. 739 * Return strlen on success and < 0 on error. 740 */ 741 int bpf_obj_name_cpy(char *dst, const char *src, unsigned int size) 742 { 743 const char *end = src + size; 744 const char *orig_src = src; 745 746 memset(dst, 0, size); 747 /* Copy all isalnum(), '_' and '.' chars. */ 748 while (src < end && *src) { 749 if (!isalnum(*src) && 750 *src != '_' && *src != '.') 751 return -EINVAL; 752 *dst++ = *src++; 753 } 754 755 /* No '\0' found in "size" number of bytes */ 756 if (src == end) 757 return -EINVAL; 758 759 return src - orig_src; 760 } 761 762 int map_check_no_btf(const struct bpf_map *map, 763 const struct btf *btf, 764 const struct btf_type *key_type, 765 const struct btf_type *value_type) 766 { 767 return -ENOTSUPP; 768 } 769 770 static int map_check_btf(struct bpf_map *map, const struct btf *btf, 771 u32 btf_key_id, u32 btf_value_id) 772 { 773 const struct btf_type *key_type, *value_type; 774 u32 key_size, value_size; 775 int ret = 0; 776 777 /* Some maps allow key to be unspecified. */ 778 if (btf_key_id) { 779 key_type = btf_type_id_size(btf, &btf_key_id, &key_size); 780 if (!key_type || key_size != map->key_size) 781 return -EINVAL; 782 } else { 783 key_type = btf_type_by_id(btf, 0); 784 if (!map->ops->map_check_btf) 785 return -EINVAL; 786 } 787 788 value_type = btf_type_id_size(btf, &btf_value_id, &value_size); 789 if (!value_type || value_size != map->value_size) 790 return -EINVAL; 791 792 map->spin_lock_off = btf_find_spin_lock(btf, value_type); 793 794 if (map_value_has_spin_lock(map)) { 795 if (map->map_flags & BPF_F_RDONLY_PROG) 796 return -EACCES; 797 if (map->map_type != BPF_MAP_TYPE_HASH && 798 map->map_type != BPF_MAP_TYPE_ARRAY && 799 map->map_type != BPF_MAP_TYPE_CGROUP_STORAGE && 800 map->map_type != BPF_MAP_TYPE_SK_STORAGE && 801 map->map_type != BPF_MAP_TYPE_INODE_STORAGE && 802 map->map_type != BPF_MAP_TYPE_TASK_STORAGE) 803 return -ENOTSUPP; 804 if (map->spin_lock_off + sizeof(struct bpf_spin_lock) > 805 map->value_size) { 806 WARN_ONCE(1, 807 "verifier bug spin_lock_off %d value_size %d\n", 808 map->spin_lock_off, map->value_size); 809 return -EFAULT; 810 } 811 } 812 813 map->timer_off = btf_find_timer(btf, value_type); 814 if (map_value_has_timer(map)) { 815 if (map->map_flags & BPF_F_RDONLY_PROG) 816 return -EACCES; 817 if (map->map_type != BPF_MAP_TYPE_HASH && 818 map->map_type != BPF_MAP_TYPE_LRU_HASH && 819 map->map_type != BPF_MAP_TYPE_ARRAY) 820 return -EOPNOTSUPP; 821 } 822 823 if (map->ops->map_check_btf) 824 ret = map->ops->map_check_btf(map, btf, key_type, value_type); 825 826 return ret; 827 } 828 829 #define BPF_MAP_CREATE_LAST_FIELD map_extra 830 /* called via syscall */ 831 static int map_create(union bpf_attr *attr) 832 { 833 int numa_node = bpf_map_attr_numa_node(attr); 834 struct bpf_map *map; 835 int f_flags; 836 int err; 837 838 err = CHECK_ATTR(BPF_MAP_CREATE); 839 if (err) 840 return -EINVAL; 841 842 if (attr->btf_vmlinux_value_type_id) { 843 if (attr->map_type != BPF_MAP_TYPE_STRUCT_OPS || 844 attr->btf_key_type_id || attr->btf_value_type_id) 845 return -EINVAL; 846 } else if (attr->btf_key_type_id && !attr->btf_value_type_id) { 847 return -EINVAL; 848 } 849 850 if (attr->map_type != BPF_MAP_TYPE_BLOOM_FILTER && 851 attr->map_extra != 0) 852 return -EINVAL; 853 854 f_flags = bpf_get_file_flag(attr->map_flags); 855 if (f_flags < 0) 856 return f_flags; 857 858 if (numa_node != NUMA_NO_NODE && 859 ((unsigned int)numa_node >= nr_node_ids || 860 !node_online(numa_node))) 861 return -EINVAL; 862 863 /* find map type and init map: hashtable vs rbtree vs bloom vs ... */ 864 map = find_and_alloc_map(attr); 865 if (IS_ERR(map)) 866 return PTR_ERR(map); 867 868 err = bpf_obj_name_cpy(map->name, attr->map_name, 869 sizeof(attr->map_name)); 870 if (err < 0) 871 goto free_map; 872 873 atomic64_set(&map->refcnt, 1); 874 atomic64_set(&map->usercnt, 1); 875 mutex_init(&map->freeze_mutex); 876 spin_lock_init(&map->owner.lock); 877 878 map->spin_lock_off = -EINVAL; 879 map->timer_off = -EINVAL; 880 if (attr->btf_key_type_id || attr->btf_value_type_id || 881 /* Even the map's value is a kernel's struct, 882 * the bpf_prog.o must have BTF to begin with 883 * to figure out the corresponding kernel's 884 * counter part. Thus, attr->btf_fd has 885 * to be valid also. 886 */ 887 attr->btf_vmlinux_value_type_id) { 888 struct btf *btf; 889 890 btf = btf_get_by_fd(attr->btf_fd); 891 if (IS_ERR(btf)) { 892 err = PTR_ERR(btf); 893 goto free_map; 894 } 895 if (btf_is_kernel(btf)) { 896 btf_put(btf); 897 err = -EACCES; 898 goto free_map; 899 } 900 map->btf = btf; 901 902 if (attr->btf_value_type_id) { 903 err = map_check_btf(map, btf, attr->btf_key_type_id, 904 attr->btf_value_type_id); 905 if (err) 906 goto free_map; 907 } 908 909 map->btf_key_type_id = attr->btf_key_type_id; 910 map->btf_value_type_id = attr->btf_value_type_id; 911 map->btf_vmlinux_value_type_id = 912 attr->btf_vmlinux_value_type_id; 913 } 914 915 err = security_bpf_map_alloc(map); 916 if (err) 917 goto free_map; 918 919 err = bpf_map_alloc_id(map); 920 if (err) 921 goto free_map_sec; 922 923 bpf_map_save_memcg(map); 924 925 err = bpf_map_new_fd(map, f_flags); 926 if (err < 0) { 927 /* failed to allocate fd. 928 * bpf_map_put_with_uref() is needed because the above 929 * bpf_map_alloc_id() has published the map 930 * to the userspace and the userspace may 931 * have refcnt-ed it through BPF_MAP_GET_FD_BY_ID. 932 */ 933 bpf_map_put_with_uref(map); 934 return err; 935 } 936 937 return err; 938 939 free_map_sec: 940 security_bpf_map_free(map); 941 free_map: 942 btf_put(map->btf); 943 map->ops->map_free(map); 944 return err; 945 } 946 947 /* if error is returned, fd is released. 948 * On success caller should complete fd access with matching fdput() 949 */ 950 struct bpf_map *__bpf_map_get(struct fd f) 951 { 952 if (!f.file) 953 return ERR_PTR(-EBADF); 954 if (f.file->f_op != &bpf_map_fops) { 955 fdput(f); 956 return ERR_PTR(-EINVAL); 957 } 958 959 return f.file->private_data; 960 } 961 962 void bpf_map_inc(struct bpf_map *map) 963 { 964 atomic64_inc(&map->refcnt); 965 } 966 EXPORT_SYMBOL_GPL(bpf_map_inc); 967 968 void bpf_map_inc_with_uref(struct bpf_map *map) 969 { 970 atomic64_inc(&map->refcnt); 971 atomic64_inc(&map->usercnt); 972 } 973 EXPORT_SYMBOL_GPL(bpf_map_inc_with_uref); 974 975 struct bpf_map *bpf_map_get(u32 ufd) 976 { 977 struct fd f = fdget(ufd); 978 struct bpf_map *map; 979 980 map = __bpf_map_get(f); 981 if (IS_ERR(map)) 982 return map; 983 984 bpf_map_inc(map); 985 fdput(f); 986 987 return map; 988 } 989 EXPORT_SYMBOL(bpf_map_get); 990 991 struct bpf_map *bpf_map_get_with_uref(u32 ufd) 992 { 993 struct fd f = fdget(ufd); 994 struct bpf_map *map; 995 996 map = __bpf_map_get(f); 997 if (IS_ERR(map)) 998 return map; 999 1000 bpf_map_inc_with_uref(map); 1001 fdput(f); 1002 1003 return map; 1004 } 1005 1006 /* map_idr_lock should have been held */ 1007 static struct bpf_map *__bpf_map_inc_not_zero(struct bpf_map *map, bool uref) 1008 { 1009 int refold; 1010 1011 refold = atomic64_fetch_add_unless(&map->refcnt, 1, 0); 1012 if (!refold) 1013 return ERR_PTR(-ENOENT); 1014 if (uref) 1015 atomic64_inc(&map->usercnt); 1016 1017 return map; 1018 } 1019 1020 struct bpf_map *bpf_map_inc_not_zero(struct bpf_map *map) 1021 { 1022 spin_lock_bh(&map_idr_lock); 1023 map = __bpf_map_inc_not_zero(map, false); 1024 spin_unlock_bh(&map_idr_lock); 1025 1026 return map; 1027 } 1028 EXPORT_SYMBOL_GPL(bpf_map_inc_not_zero); 1029 1030 int __weak bpf_stackmap_copy(struct bpf_map *map, void *key, void *value) 1031 { 1032 return -ENOTSUPP; 1033 } 1034 1035 static void *__bpf_copy_key(void __user *ukey, u64 key_size) 1036 { 1037 if (key_size) 1038 return vmemdup_user(ukey, key_size); 1039 1040 if (ukey) 1041 return ERR_PTR(-EINVAL); 1042 1043 return NULL; 1044 } 1045 1046 static void *___bpf_copy_key(bpfptr_t ukey, u64 key_size) 1047 { 1048 if (key_size) 1049 return kvmemdup_bpfptr(ukey, key_size); 1050 1051 if (!bpfptr_is_null(ukey)) 1052 return ERR_PTR(-EINVAL); 1053 1054 return NULL; 1055 } 1056 1057 /* last field in 'union bpf_attr' used by this command */ 1058 #define BPF_MAP_LOOKUP_ELEM_LAST_FIELD flags 1059 1060 static int map_lookup_elem(union bpf_attr *attr) 1061 { 1062 void __user *ukey = u64_to_user_ptr(attr->key); 1063 void __user *uvalue = u64_to_user_ptr(attr->value); 1064 int ufd = attr->map_fd; 1065 struct bpf_map *map; 1066 void *key, *value; 1067 u32 value_size; 1068 struct fd f; 1069 int err; 1070 1071 if (CHECK_ATTR(BPF_MAP_LOOKUP_ELEM)) 1072 return -EINVAL; 1073 1074 if (attr->flags & ~BPF_F_LOCK) 1075 return -EINVAL; 1076 1077 f = fdget(ufd); 1078 map = __bpf_map_get(f); 1079 if (IS_ERR(map)) 1080 return PTR_ERR(map); 1081 if (!(map_get_sys_perms(map, f) & FMODE_CAN_READ)) { 1082 err = -EPERM; 1083 goto err_put; 1084 } 1085 1086 if ((attr->flags & BPF_F_LOCK) && 1087 !map_value_has_spin_lock(map)) { 1088 err = -EINVAL; 1089 goto err_put; 1090 } 1091 1092 key = __bpf_copy_key(ukey, map->key_size); 1093 if (IS_ERR(key)) { 1094 err = PTR_ERR(key); 1095 goto err_put; 1096 } 1097 1098 value_size = bpf_map_value_size(map); 1099 1100 err = -ENOMEM; 1101 value = kvmalloc(value_size, GFP_USER | __GFP_NOWARN); 1102 if (!value) 1103 goto free_key; 1104 1105 if (map->map_type == BPF_MAP_TYPE_BLOOM_FILTER) { 1106 if (copy_from_user(value, uvalue, value_size)) 1107 err = -EFAULT; 1108 else 1109 err = bpf_map_copy_value(map, key, value, attr->flags); 1110 goto free_value; 1111 } 1112 1113 err = bpf_map_copy_value(map, key, value, attr->flags); 1114 if (err) 1115 goto free_value; 1116 1117 err = -EFAULT; 1118 if (copy_to_user(uvalue, value, value_size) != 0) 1119 goto free_value; 1120 1121 err = 0; 1122 1123 free_value: 1124 kvfree(value); 1125 free_key: 1126 kvfree(key); 1127 err_put: 1128 fdput(f); 1129 return err; 1130 } 1131 1132 1133 #define BPF_MAP_UPDATE_ELEM_LAST_FIELD flags 1134 1135 static int map_update_elem(union bpf_attr *attr, bpfptr_t uattr) 1136 { 1137 bpfptr_t ukey = make_bpfptr(attr->key, uattr.is_kernel); 1138 bpfptr_t uvalue = make_bpfptr(attr->value, uattr.is_kernel); 1139 int ufd = attr->map_fd; 1140 struct bpf_map *map; 1141 void *key, *value; 1142 u32 value_size; 1143 struct fd f; 1144 int err; 1145 1146 if (CHECK_ATTR(BPF_MAP_UPDATE_ELEM)) 1147 return -EINVAL; 1148 1149 f = fdget(ufd); 1150 map = __bpf_map_get(f); 1151 if (IS_ERR(map)) 1152 return PTR_ERR(map); 1153 bpf_map_write_active_inc(map); 1154 if (!(map_get_sys_perms(map, f) & FMODE_CAN_WRITE)) { 1155 err = -EPERM; 1156 goto err_put; 1157 } 1158 1159 if ((attr->flags & BPF_F_LOCK) && 1160 !map_value_has_spin_lock(map)) { 1161 err = -EINVAL; 1162 goto err_put; 1163 } 1164 1165 key = ___bpf_copy_key(ukey, map->key_size); 1166 if (IS_ERR(key)) { 1167 err = PTR_ERR(key); 1168 goto err_put; 1169 } 1170 1171 value_size = bpf_map_value_size(map); 1172 1173 err = -ENOMEM; 1174 value = kvmalloc(value_size, GFP_USER | __GFP_NOWARN); 1175 if (!value) 1176 goto free_key; 1177 1178 err = -EFAULT; 1179 if (copy_from_bpfptr(value, uvalue, value_size) != 0) 1180 goto free_value; 1181 1182 err = bpf_map_update_value(map, f, key, value, attr->flags); 1183 1184 free_value: 1185 kvfree(value); 1186 free_key: 1187 kvfree(key); 1188 err_put: 1189 bpf_map_write_active_dec(map); 1190 fdput(f); 1191 return err; 1192 } 1193 1194 #define BPF_MAP_DELETE_ELEM_LAST_FIELD key 1195 1196 static int map_delete_elem(union bpf_attr *attr) 1197 { 1198 void __user *ukey = u64_to_user_ptr(attr->key); 1199 int ufd = attr->map_fd; 1200 struct bpf_map *map; 1201 struct fd f; 1202 void *key; 1203 int err; 1204 1205 if (CHECK_ATTR(BPF_MAP_DELETE_ELEM)) 1206 return -EINVAL; 1207 1208 f = fdget(ufd); 1209 map = __bpf_map_get(f); 1210 if (IS_ERR(map)) 1211 return PTR_ERR(map); 1212 bpf_map_write_active_inc(map); 1213 if (!(map_get_sys_perms(map, f) & FMODE_CAN_WRITE)) { 1214 err = -EPERM; 1215 goto err_put; 1216 } 1217 1218 key = __bpf_copy_key(ukey, map->key_size); 1219 if (IS_ERR(key)) { 1220 err = PTR_ERR(key); 1221 goto err_put; 1222 } 1223 1224 if (bpf_map_is_dev_bound(map)) { 1225 err = bpf_map_offload_delete_elem(map, key); 1226 goto out; 1227 } else if (IS_FD_PROG_ARRAY(map) || 1228 map->map_type == BPF_MAP_TYPE_STRUCT_OPS) { 1229 /* These maps require sleepable context */ 1230 err = map->ops->map_delete_elem(map, key); 1231 goto out; 1232 } 1233 1234 bpf_disable_instrumentation(); 1235 rcu_read_lock(); 1236 err = map->ops->map_delete_elem(map, key); 1237 rcu_read_unlock(); 1238 bpf_enable_instrumentation(); 1239 maybe_wait_bpf_programs(map); 1240 out: 1241 kvfree(key); 1242 err_put: 1243 bpf_map_write_active_dec(map); 1244 fdput(f); 1245 return err; 1246 } 1247 1248 /* last field in 'union bpf_attr' used by this command */ 1249 #define BPF_MAP_GET_NEXT_KEY_LAST_FIELD next_key 1250 1251 static int map_get_next_key(union bpf_attr *attr) 1252 { 1253 void __user *ukey = u64_to_user_ptr(attr->key); 1254 void __user *unext_key = u64_to_user_ptr(attr->next_key); 1255 int ufd = attr->map_fd; 1256 struct bpf_map *map; 1257 void *key, *next_key; 1258 struct fd f; 1259 int err; 1260 1261 if (CHECK_ATTR(BPF_MAP_GET_NEXT_KEY)) 1262 return -EINVAL; 1263 1264 f = fdget(ufd); 1265 map = __bpf_map_get(f); 1266 if (IS_ERR(map)) 1267 return PTR_ERR(map); 1268 if (!(map_get_sys_perms(map, f) & FMODE_CAN_READ)) { 1269 err = -EPERM; 1270 goto err_put; 1271 } 1272 1273 if (ukey) { 1274 key = __bpf_copy_key(ukey, map->key_size); 1275 if (IS_ERR(key)) { 1276 err = PTR_ERR(key); 1277 goto err_put; 1278 } 1279 } else { 1280 key = NULL; 1281 } 1282 1283 err = -ENOMEM; 1284 next_key = kvmalloc(map->key_size, GFP_USER); 1285 if (!next_key) 1286 goto free_key; 1287 1288 if (bpf_map_is_dev_bound(map)) { 1289 err = bpf_map_offload_get_next_key(map, key, next_key); 1290 goto out; 1291 } 1292 1293 rcu_read_lock(); 1294 err = map->ops->map_get_next_key(map, key, next_key); 1295 rcu_read_unlock(); 1296 out: 1297 if (err) 1298 goto free_next_key; 1299 1300 err = -EFAULT; 1301 if (copy_to_user(unext_key, next_key, map->key_size) != 0) 1302 goto free_next_key; 1303 1304 err = 0; 1305 1306 free_next_key: 1307 kvfree(next_key); 1308 free_key: 1309 kvfree(key); 1310 err_put: 1311 fdput(f); 1312 return err; 1313 } 1314 1315 int generic_map_delete_batch(struct bpf_map *map, 1316 const union bpf_attr *attr, 1317 union bpf_attr __user *uattr) 1318 { 1319 void __user *keys = u64_to_user_ptr(attr->batch.keys); 1320 u32 cp, max_count; 1321 int err = 0; 1322 void *key; 1323 1324 if (attr->batch.elem_flags & ~BPF_F_LOCK) 1325 return -EINVAL; 1326 1327 if ((attr->batch.elem_flags & BPF_F_LOCK) && 1328 !map_value_has_spin_lock(map)) { 1329 return -EINVAL; 1330 } 1331 1332 max_count = attr->batch.count; 1333 if (!max_count) 1334 return 0; 1335 1336 key = kvmalloc(map->key_size, GFP_USER | __GFP_NOWARN); 1337 if (!key) 1338 return -ENOMEM; 1339 1340 for (cp = 0; cp < max_count; cp++) { 1341 err = -EFAULT; 1342 if (copy_from_user(key, keys + cp * map->key_size, 1343 map->key_size)) 1344 break; 1345 1346 if (bpf_map_is_dev_bound(map)) { 1347 err = bpf_map_offload_delete_elem(map, key); 1348 break; 1349 } 1350 1351 bpf_disable_instrumentation(); 1352 rcu_read_lock(); 1353 err = map->ops->map_delete_elem(map, key); 1354 rcu_read_unlock(); 1355 bpf_enable_instrumentation(); 1356 if (err) 1357 break; 1358 cond_resched(); 1359 } 1360 if (copy_to_user(&uattr->batch.count, &cp, sizeof(cp))) 1361 err = -EFAULT; 1362 1363 kvfree(key); 1364 1365 maybe_wait_bpf_programs(map); 1366 return err; 1367 } 1368 1369 int generic_map_update_batch(struct bpf_map *map, 1370 const union bpf_attr *attr, 1371 union bpf_attr __user *uattr) 1372 { 1373 void __user *values = u64_to_user_ptr(attr->batch.values); 1374 void __user *keys = u64_to_user_ptr(attr->batch.keys); 1375 u32 value_size, cp, max_count; 1376 int ufd = attr->batch.map_fd; 1377 void *key, *value; 1378 struct fd f; 1379 int err = 0; 1380 1381 if (attr->batch.elem_flags & ~BPF_F_LOCK) 1382 return -EINVAL; 1383 1384 if ((attr->batch.elem_flags & BPF_F_LOCK) && 1385 !map_value_has_spin_lock(map)) { 1386 return -EINVAL; 1387 } 1388 1389 value_size = bpf_map_value_size(map); 1390 1391 max_count = attr->batch.count; 1392 if (!max_count) 1393 return 0; 1394 1395 key = kvmalloc(map->key_size, GFP_USER | __GFP_NOWARN); 1396 if (!key) 1397 return -ENOMEM; 1398 1399 value = kvmalloc(value_size, GFP_USER | __GFP_NOWARN); 1400 if (!value) { 1401 kvfree(key); 1402 return -ENOMEM; 1403 } 1404 1405 f = fdget(ufd); /* bpf_map_do_batch() guarantees ufd is valid */ 1406 for (cp = 0; cp < max_count; cp++) { 1407 err = -EFAULT; 1408 if (copy_from_user(key, keys + cp * map->key_size, 1409 map->key_size) || 1410 copy_from_user(value, values + cp * value_size, value_size)) 1411 break; 1412 1413 err = bpf_map_update_value(map, f, key, value, 1414 attr->batch.elem_flags); 1415 1416 if (err) 1417 break; 1418 cond_resched(); 1419 } 1420 1421 if (copy_to_user(&uattr->batch.count, &cp, sizeof(cp))) 1422 err = -EFAULT; 1423 1424 kvfree(value); 1425 kvfree(key); 1426 fdput(f); 1427 return err; 1428 } 1429 1430 #define MAP_LOOKUP_RETRIES 3 1431 1432 int generic_map_lookup_batch(struct bpf_map *map, 1433 const union bpf_attr *attr, 1434 union bpf_attr __user *uattr) 1435 { 1436 void __user *uobatch = u64_to_user_ptr(attr->batch.out_batch); 1437 void __user *ubatch = u64_to_user_ptr(attr->batch.in_batch); 1438 void __user *values = u64_to_user_ptr(attr->batch.values); 1439 void __user *keys = u64_to_user_ptr(attr->batch.keys); 1440 void *buf, *buf_prevkey, *prev_key, *key, *value; 1441 int err, retry = MAP_LOOKUP_RETRIES; 1442 u32 value_size, cp, max_count; 1443 1444 if (attr->batch.elem_flags & ~BPF_F_LOCK) 1445 return -EINVAL; 1446 1447 if ((attr->batch.elem_flags & BPF_F_LOCK) && 1448 !map_value_has_spin_lock(map)) 1449 return -EINVAL; 1450 1451 value_size = bpf_map_value_size(map); 1452 1453 max_count = attr->batch.count; 1454 if (!max_count) 1455 return 0; 1456 1457 if (put_user(0, &uattr->batch.count)) 1458 return -EFAULT; 1459 1460 buf_prevkey = kvmalloc(map->key_size, GFP_USER | __GFP_NOWARN); 1461 if (!buf_prevkey) 1462 return -ENOMEM; 1463 1464 buf = kvmalloc(map->key_size + value_size, GFP_USER | __GFP_NOWARN); 1465 if (!buf) { 1466 kvfree(buf_prevkey); 1467 return -ENOMEM; 1468 } 1469 1470 err = -EFAULT; 1471 prev_key = NULL; 1472 if (ubatch && copy_from_user(buf_prevkey, ubatch, map->key_size)) 1473 goto free_buf; 1474 key = buf; 1475 value = key + map->key_size; 1476 if (ubatch) 1477 prev_key = buf_prevkey; 1478 1479 for (cp = 0; cp < max_count;) { 1480 rcu_read_lock(); 1481 err = map->ops->map_get_next_key(map, prev_key, key); 1482 rcu_read_unlock(); 1483 if (err) 1484 break; 1485 err = bpf_map_copy_value(map, key, value, 1486 attr->batch.elem_flags); 1487 1488 if (err == -ENOENT) { 1489 if (retry) { 1490 retry--; 1491 continue; 1492 } 1493 err = -EINTR; 1494 break; 1495 } 1496 1497 if (err) 1498 goto free_buf; 1499 1500 if (copy_to_user(keys + cp * map->key_size, key, 1501 map->key_size)) { 1502 err = -EFAULT; 1503 goto free_buf; 1504 } 1505 if (copy_to_user(values + cp * value_size, value, value_size)) { 1506 err = -EFAULT; 1507 goto free_buf; 1508 } 1509 1510 if (!prev_key) 1511 prev_key = buf_prevkey; 1512 1513 swap(prev_key, key); 1514 retry = MAP_LOOKUP_RETRIES; 1515 cp++; 1516 cond_resched(); 1517 } 1518 1519 if (err == -EFAULT) 1520 goto free_buf; 1521 1522 if ((copy_to_user(&uattr->batch.count, &cp, sizeof(cp)) || 1523 (cp && copy_to_user(uobatch, prev_key, map->key_size)))) 1524 err = -EFAULT; 1525 1526 free_buf: 1527 kvfree(buf_prevkey); 1528 kvfree(buf); 1529 return err; 1530 } 1531 1532 #define BPF_MAP_LOOKUP_AND_DELETE_ELEM_LAST_FIELD flags 1533 1534 static int map_lookup_and_delete_elem(union bpf_attr *attr) 1535 { 1536 void __user *ukey = u64_to_user_ptr(attr->key); 1537 void __user *uvalue = u64_to_user_ptr(attr->value); 1538 int ufd = attr->map_fd; 1539 struct bpf_map *map; 1540 void *key, *value; 1541 u32 value_size; 1542 struct fd f; 1543 int err; 1544 1545 if (CHECK_ATTR(BPF_MAP_LOOKUP_AND_DELETE_ELEM)) 1546 return -EINVAL; 1547 1548 if (attr->flags & ~BPF_F_LOCK) 1549 return -EINVAL; 1550 1551 f = fdget(ufd); 1552 map = __bpf_map_get(f); 1553 if (IS_ERR(map)) 1554 return PTR_ERR(map); 1555 bpf_map_write_active_inc(map); 1556 if (!(map_get_sys_perms(map, f) & FMODE_CAN_READ) || 1557 !(map_get_sys_perms(map, f) & FMODE_CAN_WRITE)) { 1558 err = -EPERM; 1559 goto err_put; 1560 } 1561 1562 if (attr->flags && 1563 (map->map_type == BPF_MAP_TYPE_QUEUE || 1564 map->map_type == BPF_MAP_TYPE_STACK)) { 1565 err = -EINVAL; 1566 goto err_put; 1567 } 1568 1569 if ((attr->flags & BPF_F_LOCK) && 1570 !map_value_has_spin_lock(map)) { 1571 err = -EINVAL; 1572 goto err_put; 1573 } 1574 1575 key = __bpf_copy_key(ukey, map->key_size); 1576 if (IS_ERR(key)) { 1577 err = PTR_ERR(key); 1578 goto err_put; 1579 } 1580 1581 value_size = bpf_map_value_size(map); 1582 1583 err = -ENOMEM; 1584 value = kvmalloc(value_size, GFP_USER | __GFP_NOWARN); 1585 if (!value) 1586 goto free_key; 1587 1588 err = -ENOTSUPP; 1589 if (map->map_type == BPF_MAP_TYPE_QUEUE || 1590 map->map_type == BPF_MAP_TYPE_STACK) { 1591 err = map->ops->map_pop_elem(map, value); 1592 } else if (map->map_type == BPF_MAP_TYPE_HASH || 1593 map->map_type == BPF_MAP_TYPE_PERCPU_HASH || 1594 map->map_type == BPF_MAP_TYPE_LRU_HASH || 1595 map->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH) { 1596 if (!bpf_map_is_dev_bound(map)) { 1597 bpf_disable_instrumentation(); 1598 rcu_read_lock(); 1599 err = map->ops->map_lookup_and_delete_elem(map, key, value, attr->flags); 1600 rcu_read_unlock(); 1601 bpf_enable_instrumentation(); 1602 } 1603 } 1604 1605 if (err) 1606 goto free_value; 1607 1608 if (copy_to_user(uvalue, value, value_size) != 0) { 1609 err = -EFAULT; 1610 goto free_value; 1611 } 1612 1613 err = 0; 1614 1615 free_value: 1616 kvfree(value); 1617 free_key: 1618 kvfree(key); 1619 err_put: 1620 bpf_map_write_active_dec(map); 1621 fdput(f); 1622 return err; 1623 } 1624 1625 #define BPF_MAP_FREEZE_LAST_FIELD map_fd 1626 1627 static int map_freeze(const union bpf_attr *attr) 1628 { 1629 int err = 0, ufd = attr->map_fd; 1630 struct bpf_map *map; 1631 struct fd f; 1632 1633 if (CHECK_ATTR(BPF_MAP_FREEZE)) 1634 return -EINVAL; 1635 1636 f = fdget(ufd); 1637 map = __bpf_map_get(f); 1638 if (IS_ERR(map)) 1639 return PTR_ERR(map); 1640 1641 if (map->map_type == BPF_MAP_TYPE_STRUCT_OPS || 1642 map_value_has_timer(map)) { 1643 fdput(f); 1644 return -ENOTSUPP; 1645 } 1646 1647 mutex_lock(&map->freeze_mutex); 1648 if (bpf_map_write_active(map)) { 1649 err = -EBUSY; 1650 goto err_put; 1651 } 1652 if (READ_ONCE(map->frozen)) { 1653 err = -EBUSY; 1654 goto err_put; 1655 } 1656 if (!bpf_capable()) { 1657 err = -EPERM; 1658 goto err_put; 1659 } 1660 1661 WRITE_ONCE(map->frozen, true); 1662 err_put: 1663 mutex_unlock(&map->freeze_mutex); 1664 fdput(f); 1665 return err; 1666 } 1667 1668 static const struct bpf_prog_ops * const bpf_prog_types[] = { 1669 #define BPF_PROG_TYPE(_id, _name, prog_ctx_type, kern_ctx_type) \ 1670 [_id] = & _name ## _prog_ops, 1671 #define BPF_MAP_TYPE(_id, _ops) 1672 #define BPF_LINK_TYPE(_id, _name) 1673 #include <linux/bpf_types.h> 1674 #undef BPF_PROG_TYPE 1675 #undef BPF_MAP_TYPE 1676 #undef BPF_LINK_TYPE 1677 }; 1678 1679 static int find_prog_type(enum bpf_prog_type type, struct bpf_prog *prog) 1680 { 1681 const struct bpf_prog_ops *ops; 1682 1683 if (type >= ARRAY_SIZE(bpf_prog_types)) 1684 return -EINVAL; 1685 type = array_index_nospec(type, ARRAY_SIZE(bpf_prog_types)); 1686 ops = bpf_prog_types[type]; 1687 if (!ops) 1688 return -EINVAL; 1689 1690 if (!bpf_prog_is_dev_bound(prog->aux)) 1691 prog->aux->ops = ops; 1692 else 1693 prog->aux->ops = &bpf_offload_prog_ops; 1694 prog->type = type; 1695 return 0; 1696 } 1697 1698 enum bpf_audit { 1699 BPF_AUDIT_LOAD, 1700 BPF_AUDIT_UNLOAD, 1701 BPF_AUDIT_MAX, 1702 }; 1703 1704 static const char * const bpf_audit_str[BPF_AUDIT_MAX] = { 1705 [BPF_AUDIT_LOAD] = "LOAD", 1706 [BPF_AUDIT_UNLOAD] = "UNLOAD", 1707 }; 1708 1709 static void bpf_audit_prog(const struct bpf_prog *prog, unsigned int op) 1710 { 1711 struct audit_context *ctx = NULL; 1712 struct audit_buffer *ab; 1713 1714 if (WARN_ON_ONCE(op >= BPF_AUDIT_MAX)) 1715 return; 1716 if (audit_enabled == AUDIT_OFF) 1717 return; 1718 if (op == BPF_AUDIT_LOAD) 1719 ctx = audit_context(); 1720 ab = audit_log_start(ctx, GFP_ATOMIC, AUDIT_BPF); 1721 if (unlikely(!ab)) 1722 return; 1723 audit_log_format(ab, "prog-id=%u op=%s", 1724 prog->aux->id, bpf_audit_str[op]); 1725 audit_log_end(ab); 1726 } 1727 1728 static int bpf_prog_alloc_id(struct bpf_prog *prog) 1729 { 1730 int id; 1731 1732 idr_preload(GFP_KERNEL); 1733 spin_lock_bh(&prog_idr_lock); 1734 id = idr_alloc_cyclic(&prog_idr, prog, 1, INT_MAX, GFP_ATOMIC); 1735 if (id > 0) 1736 prog->aux->id = id; 1737 spin_unlock_bh(&prog_idr_lock); 1738 idr_preload_end(); 1739 1740 /* id is in [1, INT_MAX) */ 1741 if (WARN_ON_ONCE(!id)) 1742 return -ENOSPC; 1743 1744 return id > 0 ? 0 : id; 1745 } 1746 1747 void bpf_prog_free_id(struct bpf_prog *prog, bool do_idr_lock) 1748 { 1749 unsigned long flags; 1750 1751 /* cBPF to eBPF migrations are currently not in the idr store. 1752 * Offloaded programs are removed from the store when their device 1753 * disappears - even if someone grabs an fd to them they are unusable, 1754 * simply waiting for refcnt to drop to be freed. 1755 */ 1756 if (!prog->aux->id) 1757 return; 1758 1759 if (do_idr_lock) 1760 spin_lock_irqsave(&prog_idr_lock, flags); 1761 else 1762 __acquire(&prog_idr_lock); 1763 1764 idr_remove(&prog_idr, prog->aux->id); 1765 prog->aux->id = 0; 1766 1767 if (do_idr_lock) 1768 spin_unlock_irqrestore(&prog_idr_lock, flags); 1769 else 1770 __release(&prog_idr_lock); 1771 } 1772 1773 static void __bpf_prog_put_rcu(struct rcu_head *rcu) 1774 { 1775 struct bpf_prog_aux *aux = container_of(rcu, struct bpf_prog_aux, rcu); 1776 1777 kvfree(aux->func_info); 1778 kfree(aux->func_info_aux); 1779 free_uid(aux->user); 1780 security_bpf_prog_free(aux); 1781 bpf_prog_free(aux->prog); 1782 } 1783 1784 static void __bpf_prog_put_noref(struct bpf_prog *prog, bool deferred) 1785 { 1786 bpf_prog_kallsyms_del_all(prog); 1787 btf_put(prog->aux->btf); 1788 kvfree(prog->aux->jited_linfo); 1789 kvfree(prog->aux->linfo); 1790 kfree(prog->aux->kfunc_tab); 1791 if (prog->aux->attach_btf) 1792 btf_put(prog->aux->attach_btf); 1793 1794 if (deferred) { 1795 if (prog->aux->sleepable) 1796 call_rcu_tasks_trace(&prog->aux->rcu, __bpf_prog_put_rcu); 1797 else 1798 call_rcu(&prog->aux->rcu, __bpf_prog_put_rcu); 1799 } else { 1800 __bpf_prog_put_rcu(&prog->aux->rcu); 1801 } 1802 } 1803 1804 static void bpf_prog_put_deferred(struct work_struct *work) 1805 { 1806 struct bpf_prog_aux *aux; 1807 struct bpf_prog *prog; 1808 1809 aux = container_of(work, struct bpf_prog_aux, work); 1810 prog = aux->prog; 1811 perf_event_bpf_event(prog, PERF_BPF_EVENT_PROG_UNLOAD, 0); 1812 bpf_audit_prog(prog, BPF_AUDIT_UNLOAD); 1813 __bpf_prog_put_noref(prog, true); 1814 } 1815 1816 static void __bpf_prog_put(struct bpf_prog *prog, bool do_idr_lock) 1817 { 1818 struct bpf_prog_aux *aux = prog->aux; 1819 1820 if (atomic64_dec_and_test(&aux->refcnt)) { 1821 /* bpf_prog_free_id() must be called first */ 1822 bpf_prog_free_id(prog, do_idr_lock); 1823 1824 if (in_irq() || irqs_disabled()) { 1825 INIT_WORK(&aux->work, bpf_prog_put_deferred); 1826 schedule_work(&aux->work); 1827 } else { 1828 bpf_prog_put_deferred(&aux->work); 1829 } 1830 } 1831 } 1832 1833 void bpf_prog_put(struct bpf_prog *prog) 1834 { 1835 __bpf_prog_put(prog, true); 1836 } 1837 EXPORT_SYMBOL_GPL(bpf_prog_put); 1838 1839 static int bpf_prog_release(struct inode *inode, struct file *filp) 1840 { 1841 struct bpf_prog *prog = filp->private_data; 1842 1843 bpf_prog_put(prog); 1844 return 0; 1845 } 1846 1847 struct bpf_prog_kstats { 1848 u64 nsecs; 1849 u64 cnt; 1850 u64 misses; 1851 }; 1852 1853 static void bpf_prog_get_stats(const struct bpf_prog *prog, 1854 struct bpf_prog_kstats *stats) 1855 { 1856 u64 nsecs = 0, cnt = 0, misses = 0; 1857 int cpu; 1858 1859 for_each_possible_cpu(cpu) { 1860 const struct bpf_prog_stats *st; 1861 unsigned int start; 1862 u64 tnsecs, tcnt, tmisses; 1863 1864 st = per_cpu_ptr(prog->stats, cpu); 1865 do { 1866 start = u64_stats_fetch_begin_irq(&st->syncp); 1867 tnsecs = u64_stats_read(&st->nsecs); 1868 tcnt = u64_stats_read(&st->cnt); 1869 tmisses = u64_stats_read(&st->misses); 1870 } while (u64_stats_fetch_retry_irq(&st->syncp, start)); 1871 nsecs += tnsecs; 1872 cnt += tcnt; 1873 misses += tmisses; 1874 } 1875 stats->nsecs = nsecs; 1876 stats->cnt = cnt; 1877 stats->misses = misses; 1878 } 1879 1880 #ifdef CONFIG_PROC_FS 1881 static void bpf_prog_show_fdinfo(struct seq_file *m, struct file *filp) 1882 { 1883 const struct bpf_prog *prog = filp->private_data; 1884 char prog_tag[sizeof(prog->tag) * 2 + 1] = { }; 1885 struct bpf_prog_kstats stats; 1886 1887 bpf_prog_get_stats(prog, &stats); 1888 bin2hex(prog_tag, prog->tag, sizeof(prog->tag)); 1889 seq_printf(m, 1890 "prog_type:\t%u\n" 1891 "prog_jited:\t%u\n" 1892 "prog_tag:\t%s\n" 1893 "memlock:\t%llu\n" 1894 "prog_id:\t%u\n" 1895 "run_time_ns:\t%llu\n" 1896 "run_cnt:\t%llu\n" 1897 "recursion_misses:\t%llu\n" 1898 "verified_insns:\t%u\n", 1899 prog->type, 1900 prog->jited, 1901 prog_tag, 1902 prog->pages * 1ULL << PAGE_SHIFT, 1903 prog->aux->id, 1904 stats.nsecs, 1905 stats.cnt, 1906 stats.misses, 1907 prog->aux->verified_insns); 1908 } 1909 #endif 1910 1911 const struct file_operations bpf_prog_fops = { 1912 #ifdef CONFIG_PROC_FS 1913 .show_fdinfo = bpf_prog_show_fdinfo, 1914 #endif 1915 .release = bpf_prog_release, 1916 .read = bpf_dummy_read, 1917 .write = bpf_dummy_write, 1918 }; 1919 1920 int bpf_prog_new_fd(struct bpf_prog *prog) 1921 { 1922 int ret; 1923 1924 ret = security_bpf_prog(prog); 1925 if (ret < 0) 1926 return ret; 1927 1928 return anon_inode_getfd("bpf-prog", &bpf_prog_fops, prog, 1929 O_RDWR | O_CLOEXEC); 1930 } 1931 1932 static struct bpf_prog *____bpf_prog_get(struct fd f) 1933 { 1934 if (!f.file) 1935 return ERR_PTR(-EBADF); 1936 if (f.file->f_op != &bpf_prog_fops) { 1937 fdput(f); 1938 return ERR_PTR(-EINVAL); 1939 } 1940 1941 return f.file->private_data; 1942 } 1943 1944 void bpf_prog_add(struct bpf_prog *prog, int i) 1945 { 1946 atomic64_add(i, &prog->aux->refcnt); 1947 } 1948 EXPORT_SYMBOL_GPL(bpf_prog_add); 1949 1950 void bpf_prog_sub(struct bpf_prog *prog, int i) 1951 { 1952 /* Only to be used for undoing previous bpf_prog_add() in some 1953 * error path. We still know that another entity in our call 1954 * path holds a reference to the program, thus atomic_sub() can 1955 * be safely used in such cases! 1956 */ 1957 WARN_ON(atomic64_sub_return(i, &prog->aux->refcnt) == 0); 1958 } 1959 EXPORT_SYMBOL_GPL(bpf_prog_sub); 1960 1961 void bpf_prog_inc(struct bpf_prog *prog) 1962 { 1963 atomic64_inc(&prog->aux->refcnt); 1964 } 1965 EXPORT_SYMBOL_GPL(bpf_prog_inc); 1966 1967 /* prog_idr_lock should have been held */ 1968 struct bpf_prog *bpf_prog_inc_not_zero(struct bpf_prog *prog) 1969 { 1970 int refold; 1971 1972 refold = atomic64_fetch_add_unless(&prog->aux->refcnt, 1, 0); 1973 1974 if (!refold) 1975 return ERR_PTR(-ENOENT); 1976 1977 return prog; 1978 } 1979 EXPORT_SYMBOL_GPL(bpf_prog_inc_not_zero); 1980 1981 bool bpf_prog_get_ok(struct bpf_prog *prog, 1982 enum bpf_prog_type *attach_type, bool attach_drv) 1983 { 1984 /* not an attachment, just a refcount inc, always allow */ 1985 if (!attach_type) 1986 return true; 1987 1988 if (prog->type != *attach_type) 1989 return false; 1990 if (bpf_prog_is_dev_bound(prog->aux) && !attach_drv) 1991 return false; 1992 1993 return true; 1994 } 1995 1996 static struct bpf_prog *__bpf_prog_get(u32 ufd, enum bpf_prog_type *attach_type, 1997 bool attach_drv) 1998 { 1999 struct fd f = fdget(ufd); 2000 struct bpf_prog *prog; 2001 2002 prog = ____bpf_prog_get(f); 2003 if (IS_ERR(prog)) 2004 return prog; 2005 if (!bpf_prog_get_ok(prog, attach_type, attach_drv)) { 2006 prog = ERR_PTR(-EINVAL); 2007 goto out; 2008 } 2009 2010 bpf_prog_inc(prog); 2011 out: 2012 fdput(f); 2013 return prog; 2014 } 2015 2016 struct bpf_prog *bpf_prog_get(u32 ufd) 2017 { 2018 return __bpf_prog_get(ufd, NULL, false); 2019 } 2020 2021 struct bpf_prog *bpf_prog_get_type_dev(u32 ufd, enum bpf_prog_type type, 2022 bool attach_drv) 2023 { 2024 return __bpf_prog_get(ufd, &type, attach_drv); 2025 } 2026 EXPORT_SYMBOL_GPL(bpf_prog_get_type_dev); 2027 2028 /* Initially all BPF programs could be loaded w/o specifying 2029 * expected_attach_type. Later for some of them specifying expected_attach_type 2030 * at load time became required so that program could be validated properly. 2031 * Programs of types that are allowed to be loaded both w/ and w/o (for 2032 * backward compatibility) expected_attach_type, should have the default attach 2033 * type assigned to expected_attach_type for the latter case, so that it can be 2034 * validated later at attach time. 2035 * 2036 * bpf_prog_load_fixup_attach_type() sets expected_attach_type in @attr if 2037 * prog type requires it but has some attach types that have to be backward 2038 * compatible. 2039 */ 2040 static void bpf_prog_load_fixup_attach_type(union bpf_attr *attr) 2041 { 2042 switch (attr->prog_type) { 2043 case BPF_PROG_TYPE_CGROUP_SOCK: 2044 /* Unfortunately BPF_ATTACH_TYPE_UNSPEC enumeration doesn't 2045 * exist so checking for non-zero is the way to go here. 2046 */ 2047 if (!attr->expected_attach_type) 2048 attr->expected_attach_type = 2049 BPF_CGROUP_INET_SOCK_CREATE; 2050 break; 2051 case BPF_PROG_TYPE_SK_REUSEPORT: 2052 if (!attr->expected_attach_type) 2053 attr->expected_attach_type = 2054 BPF_SK_REUSEPORT_SELECT; 2055 break; 2056 } 2057 } 2058 2059 static int 2060 bpf_prog_load_check_attach(enum bpf_prog_type prog_type, 2061 enum bpf_attach_type expected_attach_type, 2062 struct btf *attach_btf, u32 btf_id, 2063 struct bpf_prog *dst_prog) 2064 { 2065 if (btf_id) { 2066 if (btf_id > BTF_MAX_TYPE) 2067 return -EINVAL; 2068 2069 if (!attach_btf && !dst_prog) 2070 return -EINVAL; 2071 2072 switch (prog_type) { 2073 case BPF_PROG_TYPE_TRACING: 2074 case BPF_PROG_TYPE_LSM: 2075 case BPF_PROG_TYPE_STRUCT_OPS: 2076 case BPF_PROG_TYPE_EXT: 2077 break; 2078 default: 2079 return -EINVAL; 2080 } 2081 } 2082 2083 if (attach_btf && (!btf_id || dst_prog)) 2084 return -EINVAL; 2085 2086 if (dst_prog && prog_type != BPF_PROG_TYPE_TRACING && 2087 prog_type != BPF_PROG_TYPE_EXT) 2088 return -EINVAL; 2089 2090 switch (prog_type) { 2091 case BPF_PROG_TYPE_CGROUP_SOCK: 2092 switch (expected_attach_type) { 2093 case BPF_CGROUP_INET_SOCK_CREATE: 2094 case BPF_CGROUP_INET_SOCK_RELEASE: 2095 case BPF_CGROUP_INET4_POST_BIND: 2096 case BPF_CGROUP_INET6_POST_BIND: 2097 return 0; 2098 default: 2099 return -EINVAL; 2100 } 2101 case BPF_PROG_TYPE_CGROUP_SOCK_ADDR: 2102 switch (expected_attach_type) { 2103 case BPF_CGROUP_INET4_BIND: 2104 case BPF_CGROUP_INET6_BIND: 2105 case BPF_CGROUP_INET4_CONNECT: 2106 case BPF_CGROUP_INET6_CONNECT: 2107 case BPF_CGROUP_INET4_GETPEERNAME: 2108 case BPF_CGROUP_INET6_GETPEERNAME: 2109 case BPF_CGROUP_INET4_GETSOCKNAME: 2110 case BPF_CGROUP_INET6_GETSOCKNAME: 2111 case BPF_CGROUP_UDP4_SENDMSG: 2112 case BPF_CGROUP_UDP6_SENDMSG: 2113 case BPF_CGROUP_UDP4_RECVMSG: 2114 case BPF_CGROUP_UDP6_RECVMSG: 2115 return 0; 2116 default: 2117 return -EINVAL; 2118 } 2119 case BPF_PROG_TYPE_CGROUP_SKB: 2120 switch (expected_attach_type) { 2121 case BPF_CGROUP_INET_INGRESS: 2122 case BPF_CGROUP_INET_EGRESS: 2123 return 0; 2124 default: 2125 return -EINVAL; 2126 } 2127 case BPF_PROG_TYPE_CGROUP_SOCKOPT: 2128 switch (expected_attach_type) { 2129 case BPF_CGROUP_SETSOCKOPT: 2130 case BPF_CGROUP_GETSOCKOPT: 2131 return 0; 2132 default: 2133 return -EINVAL; 2134 } 2135 case BPF_PROG_TYPE_SK_LOOKUP: 2136 if (expected_attach_type == BPF_SK_LOOKUP) 2137 return 0; 2138 return -EINVAL; 2139 case BPF_PROG_TYPE_SK_REUSEPORT: 2140 switch (expected_attach_type) { 2141 case BPF_SK_REUSEPORT_SELECT: 2142 case BPF_SK_REUSEPORT_SELECT_OR_MIGRATE: 2143 return 0; 2144 default: 2145 return -EINVAL; 2146 } 2147 case BPF_PROG_TYPE_SYSCALL: 2148 case BPF_PROG_TYPE_EXT: 2149 if (expected_attach_type) 2150 return -EINVAL; 2151 fallthrough; 2152 default: 2153 return 0; 2154 } 2155 } 2156 2157 static bool is_net_admin_prog_type(enum bpf_prog_type prog_type) 2158 { 2159 switch (prog_type) { 2160 case BPF_PROG_TYPE_SCHED_CLS: 2161 case BPF_PROG_TYPE_SCHED_ACT: 2162 case BPF_PROG_TYPE_XDP: 2163 case BPF_PROG_TYPE_LWT_IN: 2164 case BPF_PROG_TYPE_LWT_OUT: 2165 case BPF_PROG_TYPE_LWT_XMIT: 2166 case BPF_PROG_TYPE_LWT_SEG6LOCAL: 2167 case BPF_PROG_TYPE_SK_SKB: 2168 case BPF_PROG_TYPE_SK_MSG: 2169 case BPF_PROG_TYPE_LIRC_MODE2: 2170 case BPF_PROG_TYPE_FLOW_DISSECTOR: 2171 case BPF_PROG_TYPE_CGROUP_DEVICE: 2172 case BPF_PROG_TYPE_CGROUP_SOCK: 2173 case BPF_PROG_TYPE_CGROUP_SOCK_ADDR: 2174 case BPF_PROG_TYPE_CGROUP_SOCKOPT: 2175 case BPF_PROG_TYPE_CGROUP_SYSCTL: 2176 case BPF_PROG_TYPE_SOCK_OPS: 2177 case BPF_PROG_TYPE_EXT: /* extends any prog */ 2178 return true; 2179 case BPF_PROG_TYPE_CGROUP_SKB: 2180 /* always unpriv */ 2181 case BPF_PROG_TYPE_SK_REUSEPORT: 2182 /* equivalent to SOCKET_FILTER. need CAP_BPF only */ 2183 default: 2184 return false; 2185 } 2186 } 2187 2188 static bool is_perfmon_prog_type(enum bpf_prog_type prog_type) 2189 { 2190 switch (prog_type) { 2191 case BPF_PROG_TYPE_KPROBE: 2192 case BPF_PROG_TYPE_TRACEPOINT: 2193 case BPF_PROG_TYPE_PERF_EVENT: 2194 case BPF_PROG_TYPE_RAW_TRACEPOINT: 2195 case BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE: 2196 case BPF_PROG_TYPE_TRACING: 2197 case BPF_PROG_TYPE_LSM: 2198 case BPF_PROG_TYPE_STRUCT_OPS: /* has access to struct sock */ 2199 case BPF_PROG_TYPE_EXT: /* extends any prog */ 2200 return true; 2201 default: 2202 return false; 2203 } 2204 } 2205 2206 /* last field in 'union bpf_attr' used by this command */ 2207 #define BPF_PROG_LOAD_LAST_FIELD core_relo_rec_size 2208 2209 static int bpf_prog_load(union bpf_attr *attr, bpfptr_t uattr) 2210 { 2211 enum bpf_prog_type type = attr->prog_type; 2212 struct bpf_prog *prog, *dst_prog = NULL; 2213 struct btf *attach_btf = NULL; 2214 int err; 2215 char license[128]; 2216 bool is_gpl; 2217 2218 if (CHECK_ATTR(BPF_PROG_LOAD)) 2219 return -EINVAL; 2220 2221 if (attr->prog_flags & ~(BPF_F_STRICT_ALIGNMENT | 2222 BPF_F_ANY_ALIGNMENT | 2223 BPF_F_TEST_STATE_FREQ | 2224 BPF_F_SLEEPABLE | 2225 BPF_F_TEST_RND_HI32 | 2226 BPF_F_XDP_HAS_FRAGS)) 2227 return -EINVAL; 2228 2229 if (!IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) && 2230 (attr->prog_flags & BPF_F_ANY_ALIGNMENT) && 2231 !bpf_capable()) 2232 return -EPERM; 2233 2234 /* copy eBPF program license from user space */ 2235 if (strncpy_from_bpfptr(license, 2236 make_bpfptr(attr->license, uattr.is_kernel), 2237 sizeof(license) - 1) < 0) 2238 return -EFAULT; 2239 license[sizeof(license) - 1] = 0; 2240 2241 /* eBPF programs must be GPL compatible to use GPL-ed functions */ 2242 is_gpl = license_is_gpl_compatible(license); 2243 2244 if (attr->insn_cnt == 0 || 2245 attr->insn_cnt > (bpf_capable() ? BPF_COMPLEXITY_LIMIT_INSNS : BPF_MAXINSNS)) 2246 return -E2BIG; 2247 if (type != BPF_PROG_TYPE_SOCKET_FILTER && 2248 type != BPF_PROG_TYPE_CGROUP_SKB && 2249 !bpf_capable()) 2250 return -EPERM; 2251 2252 if (is_net_admin_prog_type(type) && !capable(CAP_NET_ADMIN) && !capable(CAP_SYS_ADMIN)) 2253 return -EPERM; 2254 if (is_perfmon_prog_type(type) && !perfmon_capable()) 2255 return -EPERM; 2256 2257 /* attach_prog_fd/attach_btf_obj_fd can specify fd of either bpf_prog 2258 * or btf, we need to check which one it is 2259 */ 2260 if (attr->attach_prog_fd) { 2261 dst_prog = bpf_prog_get(attr->attach_prog_fd); 2262 if (IS_ERR(dst_prog)) { 2263 dst_prog = NULL; 2264 attach_btf = btf_get_by_fd(attr->attach_btf_obj_fd); 2265 if (IS_ERR(attach_btf)) 2266 return -EINVAL; 2267 if (!btf_is_kernel(attach_btf)) { 2268 /* attaching through specifying bpf_prog's BTF 2269 * objects directly might be supported eventually 2270 */ 2271 btf_put(attach_btf); 2272 return -ENOTSUPP; 2273 } 2274 } 2275 } else if (attr->attach_btf_id) { 2276 /* fall back to vmlinux BTF, if BTF type ID is specified */ 2277 attach_btf = bpf_get_btf_vmlinux(); 2278 if (IS_ERR(attach_btf)) 2279 return PTR_ERR(attach_btf); 2280 if (!attach_btf) 2281 return -EINVAL; 2282 btf_get(attach_btf); 2283 } 2284 2285 bpf_prog_load_fixup_attach_type(attr); 2286 if (bpf_prog_load_check_attach(type, attr->expected_attach_type, 2287 attach_btf, attr->attach_btf_id, 2288 dst_prog)) { 2289 if (dst_prog) 2290 bpf_prog_put(dst_prog); 2291 if (attach_btf) 2292 btf_put(attach_btf); 2293 return -EINVAL; 2294 } 2295 2296 /* plain bpf_prog allocation */ 2297 prog = bpf_prog_alloc(bpf_prog_size(attr->insn_cnt), GFP_USER); 2298 if (!prog) { 2299 if (dst_prog) 2300 bpf_prog_put(dst_prog); 2301 if (attach_btf) 2302 btf_put(attach_btf); 2303 return -ENOMEM; 2304 } 2305 2306 prog->expected_attach_type = attr->expected_attach_type; 2307 prog->aux->attach_btf = attach_btf; 2308 prog->aux->attach_btf_id = attr->attach_btf_id; 2309 prog->aux->dst_prog = dst_prog; 2310 prog->aux->offload_requested = !!attr->prog_ifindex; 2311 prog->aux->sleepable = attr->prog_flags & BPF_F_SLEEPABLE; 2312 prog->aux->xdp_has_frags = attr->prog_flags & BPF_F_XDP_HAS_FRAGS; 2313 2314 err = security_bpf_prog_alloc(prog->aux); 2315 if (err) 2316 goto free_prog; 2317 2318 prog->aux->user = get_current_user(); 2319 prog->len = attr->insn_cnt; 2320 2321 err = -EFAULT; 2322 if (copy_from_bpfptr(prog->insns, 2323 make_bpfptr(attr->insns, uattr.is_kernel), 2324 bpf_prog_insn_size(prog)) != 0) 2325 goto free_prog_sec; 2326 2327 prog->orig_prog = NULL; 2328 prog->jited = 0; 2329 2330 atomic64_set(&prog->aux->refcnt, 1); 2331 prog->gpl_compatible = is_gpl ? 1 : 0; 2332 2333 if (bpf_prog_is_dev_bound(prog->aux)) { 2334 err = bpf_prog_offload_init(prog, attr); 2335 if (err) 2336 goto free_prog_sec; 2337 } 2338 2339 /* find program type: socket_filter vs tracing_filter */ 2340 err = find_prog_type(type, prog); 2341 if (err < 0) 2342 goto free_prog_sec; 2343 2344 prog->aux->load_time = ktime_get_boottime_ns(); 2345 err = bpf_obj_name_cpy(prog->aux->name, attr->prog_name, 2346 sizeof(attr->prog_name)); 2347 if (err < 0) 2348 goto free_prog_sec; 2349 2350 /* run eBPF verifier */ 2351 err = bpf_check(&prog, attr, uattr); 2352 if (err < 0) 2353 goto free_used_maps; 2354 2355 prog = bpf_prog_select_runtime(prog, &err); 2356 if (err < 0) 2357 goto free_used_maps; 2358 2359 err = bpf_prog_alloc_id(prog); 2360 if (err) 2361 goto free_used_maps; 2362 2363 /* Upon success of bpf_prog_alloc_id(), the BPF prog is 2364 * effectively publicly exposed. However, retrieving via 2365 * bpf_prog_get_fd_by_id() will take another reference, 2366 * therefore it cannot be gone underneath us. 2367 * 2368 * Only for the time /after/ successful bpf_prog_new_fd() 2369 * and before returning to userspace, we might just hold 2370 * one reference and any parallel close on that fd could 2371 * rip everything out. Hence, below notifications must 2372 * happen before bpf_prog_new_fd(). 2373 * 2374 * Also, any failure handling from this point onwards must 2375 * be using bpf_prog_put() given the program is exposed. 2376 */ 2377 bpf_prog_kallsyms_add(prog); 2378 perf_event_bpf_event(prog, PERF_BPF_EVENT_PROG_LOAD, 0); 2379 bpf_audit_prog(prog, BPF_AUDIT_LOAD); 2380 2381 err = bpf_prog_new_fd(prog); 2382 if (err < 0) 2383 bpf_prog_put(prog); 2384 return err; 2385 2386 free_used_maps: 2387 /* In case we have subprogs, we need to wait for a grace 2388 * period before we can tear down JIT memory since symbols 2389 * are already exposed under kallsyms. 2390 */ 2391 __bpf_prog_put_noref(prog, prog->aux->func_cnt); 2392 return err; 2393 free_prog_sec: 2394 free_uid(prog->aux->user); 2395 security_bpf_prog_free(prog->aux); 2396 free_prog: 2397 if (prog->aux->attach_btf) 2398 btf_put(prog->aux->attach_btf); 2399 bpf_prog_free(prog); 2400 return err; 2401 } 2402 2403 #define BPF_OBJ_LAST_FIELD file_flags 2404 2405 static int bpf_obj_pin(const union bpf_attr *attr) 2406 { 2407 if (CHECK_ATTR(BPF_OBJ) || attr->file_flags != 0) 2408 return -EINVAL; 2409 2410 return bpf_obj_pin_user(attr->bpf_fd, u64_to_user_ptr(attr->pathname)); 2411 } 2412 2413 static int bpf_obj_get(const union bpf_attr *attr) 2414 { 2415 if (CHECK_ATTR(BPF_OBJ) || attr->bpf_fd != 0 || 2416 attr->file_flags & ~BPF_OBJ_FLAG_MASK) 2417 return -EINVAL; 2418 2419 return bpf_obj_get_user(u64_to_user_ptr(attr->pathname), 2420 attr->file_flags); 2421 } 2422 2423 void bpf_link_init(struct bpf_link *link, enum bpf_link_type type, 2424 const struct bpf_link_ops *ops, struct bpf_prog *prog) 2425 { 2426 atomic64_set(&link->refcnt, 1); 2427 link->type = type; 2428 link->id = 0; 2429 link->ops = ops; 2430 link->prog = prog; 2431 } 2432 2433 static void bpf_link_free_id(int id) 2434 { 2435 if (!id) 2436 return; 2437 2438 spin_lock_bh(&link_idr_lock); 2439 idr_remove(&link_idr, id); 2440 spin_unlock_bh(&link_idr_lock); 2441 } 2442 2443 /* Clean up bpf_link and corresponding anon_inode file and FD. After 2444 * anon_inode is created, bpf_link can't be just kfree()'d due to deferred 2445 * anon_inode's release() call. This helper marksbpf_link as 2446 * defunct, releases anon_inode file and puts reserved FD. bpf_prog's refcnt 2447 * is not decremented, it's the responsibility of a calling code that failed 2448 * to complete bpf_link initialization. 2449 */ 2450 void bpf_link_cleanup(struct bpf_link_primer *primer) 2451 { 2452 primer->link->prog = NULL; 2453 bpf_link_free_id(primer->id); 2454 fput(primer->file); 2455 put_unused_fd(primer->fd); 2456 } 2457 2458 void bpf_link_inc(struct bpf_link *link) 2459 { 2460 atomic64_inc(&link->refcnt); 2461 } 2462 2463 /* bpf_link_free is guaranteed to be called from process context */ 2464 static void bpf_link_free(struct bpf_link *link) 2465 { 2466 bpf_link_free_id(link->id); 2467 if (link->prog) { 2468 /* detach BPF program, clean up used resources */ 2469 link->ops->release(link); 2470 bpf_prog_put(link->prog); 2471 } 2472 /* free bpf_link and its containing memory */ 2473 link->ops->dealloc(link); 2474 } 2475 2476 static void bpf_link_put_deferred(struct work_struct *work) 2477 { 2478 struct bpf_link *link = container_of(work, struct bpf_link, work); 2479 2480 bpf_link_free(link); 2481 } 2482 2483 /* bpf_link_put can be called from atomic context, but ensures that resources 2484 * are freed from process context 2485 */ 2486 void bpf_link_put(struct bpf_link *link) 2487 { 2488 if (!atomic64_dec_and_test(&link->refcnt)) 2489 return; 2490 2491 if (in_atomic()) { 2492 INIT_WORK(&link->work, bpf_link_put_deferred); 2493 schedule_work(&link->work); 2494 } else { 2495 bpf_link_free(link); 2496 } 2497 } 2498 EXPORT_SYMBOL(bpf_link_put); 2499 2500 static int bpf_link_release(struct inode *inode, struct file *filp) 2501 { 2502 struct bpf_link *link = filp->private_data; 2503 2504 bpf_link_put(link); 2505 return 0; 2506 } 2507 2508 #ifdef CONFIG_PROC_FS 2509 #define BPF_PROG_TYPE(_id, _name, prog_ctx_type, kern_ctx_type) 2510 #define BPF_MAP_TYPE(_id, _ops) 2511 #define BPF_LINK_TYPE(_id, _name) [_id] = #_name, 2512 static const char *bpf_link_type_strs[] = { 2513 [BPF_LINK_TYPE_UNSPEC] = "<invalid>", 2514 #include <linux/bpf_types.h> 2515 }; 2516 #undef BPF_PROG_TYPE 2517 #undef BPF_MAP_TYPE 2518 #undef BPF_LINK_TYPE 2519 2520 static void bpf_link_show_fdinfo(struct seq_file *m, struct file *filp) 2521 { 2522 const struct bpf_link *link = filp->private_data; 2523 const struct bpf_prog *prog = link->prog; 2524 char prog_tag[sizeof(prog->tag) * 2 + 1] = { }; 2525 2526 bin2hex(prog_tag, prog->tag, sizeof(prog->tag)); 2527 seq_printf(m, 2528 "link_type:\t%s\n" 2529 "link_id:\t%u\n" 2530 "prog_tag:\t%s\n" 2531 "prog_id:\t%u\n", 2532 bpf_link_type_strs[link->type], 2533 link->id, 2534 prog_tag, 2535 prog->aux->id); 2536 if (link->ops->show_fdinfo) 2537 link->ops->show_fdinfo(link, m); 2538 } 2539 #endif 2540 2541 static const struct file_operations bpf_link_fops = { 2542 #ifdef CONFIG_PROC_FS 2543 .show_fdinfo = bpf_link_show_fdinfo, 2544 #endif 2545 .release = bpf_link_release, 2546 .read = bpf_dummy_read, 2547 .write = bpf_dummy_write, 2548 }; 2549 2550 static int bpf_link_alloc_id(struct bpf_link *link) 2551 { 2552 int id; 2553 2554 idr_preload(GFP_KERNEL); 2555 spin_lock_bh(&link_idr_lock); 2556 id = idr_alloc_cyclic(&link_idr, link, 1, INT_MAX, GFP_ATOMIC); 2557 spin_unlock_bh(&link_idr_lock); 2558 idr_preload_end(); 2559 2560 return id; 2561 } 2562 2563 /* Prepare bpf_link to be exposed to user-space by allocating anon_inode file, 2564 * reserving unused FD and allocating ID from link_idr. This is to be paired 2565 * with bpf_link_settle() to install FD and ID and expose bpf_link to 2566 * user-space, if bpf_link is successfully attached. If not, bpf_link and 2567 * pre-allocated resources are to be freed with bpf_cleanup() call. All the 2568 * transient state is passed around in struct bpf_link_primer. 2569 * This is preferred way to create and initialize bpf_link, especially when 2570 * there are complicated and expensive operations in between creating bpf_link 2571 * itself and attaching it to BPF hook. By using bpf_link_prime() and 2572 * bpf_link_settle() kernel code using bpf_link doesn't have to perform 2573 * expensive (and potentially failing) roll back operations in a rare case 2574 * that file, FD, or ID can't be allocated. 2575 */ 2576 int bpf_link_prime(struct bpf_link *link, struct bpf_link_primer *primer) 2577 { 2578 struct file *file; 2579 int fd, id; 2580 2581 fd = get_unused_fd_flags(O_CLOEXEC); 2582 if (fd < 0) 2583 return fd; 2584 2585 2586 id = bpf_link_alloc_id(link); 2587 if (id < 0) { 2588 put_unused_fd(fd); 2589 return id; 2590 } 2591 2592 file = anon_inode_getfile("bpf_link", &bpf_link_fops, link, O_CLOEXEC); 2593 if (IS_ERR(file)) { 2594 bpf_link_free_id(id); 2595 put_unused_fd(fd); 2596 return PTR_ERR(file); 2597 } 2598 2599 primer->link = link; 2600 primer->file = file; 2601 primer->fd = fd; 2602 primer->id = id; 2603 return 0; 2604 } 2605 2606 int bpf_link_settle(struct bpf_link_primer *primer) 2607 { 2608 /* make bpf_link fetchable by ID */ 2609 spin_lock_bh(&link_idr_lock); 2610 primer->link->id = primer->id; 2611 spin_unlock_bh(&link_idr_lock); 2612 /* make bpf_link fetchable by FD */ 2613 fd_install(primer->fd, primer->file); 2614 /* pass through installed FD */ 2615 return primer->fd; 2616 } 2617 2618 int bpf_link_new_fd(struct bpf_link *link) 2619 { 2620 return anon_inode_getfd("bpf-link", &bpf_link_fops, link, O_CLOEXEC); 2621 } 2622 2623 struct bpf_link *bpf_link_get_from_fd(u32 ufd) 2624 { 2625 struct fd f = fdget(ufd); 2626 struct bpf_link *link; 2627 2628 if (!f.file) 2629 return ERR_PTR(-EBADF); 2630 if (f.file->f_op != &bpf_link_fops) { 2631 fdput(f); 2632 return ERR_PTR(-EINVAL); 2633 } 2634 2635 link = f.file->private_data; 2636 bpf_link_inc(link); 2637 fdput(f); 2638 2639 return link; 2640 } 2641 EXPORT_SYMBOL(bpf_link_get_from_fd); 2642 2643 struct bpf_tracing_link { 2644 struct bpf_link link; 2645 enum bpf_attach_type attach_type; 2646 struct bpf_trampoline *trampoline; 2647 struct bpf_prog *tgt_prog; 2648 }; 2649 2650 static void bpf_tracing_link_release(struct bpf_link *link) 2651 { 2652 struct bpf_tracing_link *tr_link = 2653 container_of(link, struct bpf_tracing_link, link); 2654 2655 WARN_ON_ONCE(bpf_trampoline_unlink_prog(link->prog, 2656 tr_link->trampoline)); 2657 2658 bpf_trampoline_put(tr_link->trampoline); 2659 2660 /* tgt_prog is NULL if target is a kernel function */ 2661 if (tr_link->tgt_prog) 2662 bpf_prog_put(tr_link->tgt_prog); 2663 } 2664 2665 static void bpf_tracing_link_dealloc(struct bpf_link *link) 2666 { 2667 struct bpf_tracing_link *tr_link = 2668 container_of(link, struct bpf_tracing_link, link); 2669 2670 kfree(tr_link); 2671 } 2672 2673 static void bpf_tracing_link_show_fdinfo(const struct bpf_link *link, 2674 struct seq_file *seq) 2675 { 2676 struct bpf_tracing_link *tr_link = 2677 container_of(link, struct bpf_tracing_link, link); 2678 2679 seq_printf(seq, 2680 "attach_type:\t%d\n", 2681 tr_link->attach_type); 2682 } 2683 2684 static int bpf_tracing_link_fill_link_info(const struct bpf_link *link, 2685 struct bpf_link_info *info) 2686 { 2687 struct bpf_tracing_link *tr_link = 2688 container_of(link, struct bpf_tracing_link, link); 2689 2690 info->tracing.attach_type = tr_link->attach_type; 2691 bpf_trampoline_unpack_key(tr_link->trampoline->key, 2692 &info->tracing.target_obj_id, 2693 &info->tracing.target_btf_id); 2694 2695 return 0; 2696 } 2697 2698 static const struct bpf_link_ops bpf_tracing_link_lops = { 2699 .release = bpf_tracing_link_release, 2700 .dealloc = bpf_tracing_link_dealloc, 2701 .show_fdinfo = bpf_tracing_link_show_fdinfo, 2702 .fill_link_info = bpf_tracing_link_fill_link_info, 2703 }; 2704 2705 static int bpf_tracing_prog_attach(struct bpf_prog *prog, 2706 int tgt_prog_fd, 2707 u32 btf_id) 2708 { 2709 struct bpf_link_primer link_primer; 2710 struct bpf_prog *tgt_prog = NULL; 2711 struct bpf_trampoline *tr = NULL; 2712 struct bpf_tracing_link *link; 2713 u64 key = 0; 2714 int err; 2715 2716 switch (prog->type) { 2717 case BPF_PROG_TYPE_TRACING: 2718 if (prog->expected_attach_type != BPF_TRACE_FENTRY && 2719 prog->expected_attach_type != BPF_TRACE_FEXIT && 2720 prog->expected_attach_type != BPF_MODIFY_RETURN) { 2721 err = -EINVAL; 2722 goto out_put_prog; 2723 } 2724 break; 2725 case BPF_PROG_TYPE_EXT: 2726 if (prog->expected_attach_type != 0) { 2727 err = -EINVAL; 2728 goto out_put_prog; 2729 } 2730 break; 2731 case BPF_PROG_TYPE_LSM: 2732 if (prog->expected_attach_type != BPF_LSM_MAC) { 2733 err = -EINVAL; 2734 goto out_put_prog; 2735 } 2736 break; 2737 default: 2738 err = -EINVAL; 2739 goto out_put_prog; 2740 } 2741 2742 if (!!tgt_prog_fd != !!btf_id) { 2743 err = -EINVAL; 2744 goto out_put_prog; 2745 } 2746 2747 if (tgt_prog_fd) { 2748 /* For now we only allow new targets for BPF_PROG_TYPE_EXT */ 2749 if (prog->type != BPF_PROG_TYPE_EXT) { 2750 err = -EINVAL; 2751 goto out_put_prog; 2752 } 2753 2754 tgt_prog = bpf_prog_get(tgt_prog_fd); 2755 if (IS_ERR(tgt_prog)) { 2756 err = PTR_ERR(tgt_prog); 2757 tgt_prog = NULL; 2758 goto out_put_prog; 2759 } 2760 2761 key = bpf_trampoline_compute_key(tgt_prog, NULL, btf_id); 2762 } 2763 2764 link = kzalloc(sizeof(*link), GFP_USER); 2765 if (!link) { 2766 err = -ENOMEM; 2767 goto out_put_prog; 2768 } 2769 bpf_link_init(&link->link, BPF_LINK_TYPE_TRACING, 2770 &bpf_tracing_link_lops, prog); 2771 link->attach_type = prog->expected_attach_type; 2772 2773 mutex_lock(&prog->aux->dst_mutex); 2774 2775 /* There are a few possible cases here: 2776 * 2777 * - if prog->aux->dst_trampoline is set, the program was just loaded 2778 * and not yet attached to anything, so we can use the values stored 2779 * in prog->aux 2780 * 2781 * - if prog->aux->dst_trampoline is NULL, the program has already been 2782 * attached to a target and its initial target was cleared (below) 2783 * 2784 * - if tgt_prog != NULL, the caller specified tgt_prog_fd + 2785 * target_btf_id using the link_create API. 2786 * 2787 * - if tgt_prog == NULL when this function was called using the old 2788 * raw_tracepoint_open API, and we need a target from prog->aux 2789 * 2790 * - if prog->aux->dst_trampoline and tgt_prog is NULL, the program 2791 * was detached and is going for re-attachment. 2792 */ 2793 if (!prog->aux->dst_trampoline && !tgt_prog) { 2794 /* 2795 * Allow re-attach for TRACING and LSM programs. If it's 2796 * currently linked, bpf_trampoline_link_prog will fail. 2797 * EXT programs need to specify tgt_prog_fd, so they 2798 * re-attach in separate code path. 2799 */ 2800 if (prog->type != BPF_PROG_TYPE_TRACING && 2801 prog->type != BPF_PROG_TYPE_LSM) { 2802 err = -EINVAL; 2803 goto out_unlock; 2804 } 2805 btf_id = prog->aux->attach_btf_id; 2806 key = bpf_trampoline_compute_key(NULL, prog->aux->attach_btf, btf_id); 2807 } 2808 2809 if (!prog->aux->dst_trampoline || 2810 (key && key != prog->aux->dst_trampoline->key)) { 2811 /* If there is no saved target, or the specified target is 2812 * different from the destination specified at load time, we 2813 * need a new trampoline and a check for compatibility 2814 */ 2815 struct bpf_attach_target_info tgt_info = {}; 2816 2817 err = bpf_check_attach_target(NULL, prog, tgt_prog, btf_id, 2818 &tgt_info); 2819 if (err) 2820 goto out_unlock; 2821 2822 tr = bpf_trampoline_get(key, &tgt_info); 2823 if (!tr) { 2824 err = -ENOMEM; 2825 goto out_unlock; 2826 } 2827 } else { 2828 /* The caller didn't specify a target, or the target was the 2829 * same as the destination supplied during program load. This 2830 * means we can reuse the trampoline and reference from program 2831 * load time, and there is no need to allocate a new one. This 2832 * can only happen once for any program, as the saved values in 2833 * prog->aux are cleared below. 2834 */ 2835 tr = prog->aux->dst_trampoline; 2836 tgt_prog = prog->aux->dst_prog; 2837 } 2838 2839 err = bpf_link_prime(&link->link, &link_primer); 2840 if (err) 2841 goto out_unlock; 2842 2843 err = bpf_trampoline_link_prog(prog, tr); 2844 if (err) { 2845 bpf_link_cleanup(&link_primer); 2846 link = NULL; 2847 goto out_unlock; 2848 } 2849 2850 link->tgt_prog = tgt_prog; 2851 link->trampoline = tr; 2852 2853 /* Always clear the trampoline and target prog from prog->aux to make 2854 * sure the original attach destination is not kept alive after a 2855 * program is (re-)attached to another target. 2856 */ 2857 if (prog->aux->dst_prog && 2858 (tgt_prog_fd || tr != prog->aux->dst_trampoline)) 2859 /* got extra prog ref from syscall, or attaching to different prog */ 2860 bpf_prog_put(prog->aux->dst_prog); 2861 if (prog->aux->dst_trampoline && tr != prog->aux->dst_trampoline) 2862 /* we allocated a new trampoline, so free the old one */ 2863 bpf_trampoline_put(prog->aux->dst_trampoline); 2864 2865 prog->aux->dst_prog = NULL; 2866 prog->aux->dst_trampoline = NULL; 2867 mutex_unlock(&prog->aux->dst_mutex); 2868 2869 return bpf_link_settle(&link_primer); 2870 out_unlock: 2871 if (tr && tr != prog->aux->dst_trampoline) 2872 bpf_trampoline_put(tr); 2873 mutex_unlock(&prog->aux->dst_mutex); 2874 kfree(link); 2875 out_put_prog: 2876 if (tgt_prog_fd && tgt_prog) 2877 bpf_prog_put(tgt_prog); 2878 return err; 2879 } 2880 2881 struct bpf_raw_tp_link { 2882 struct bpf_link link; 2883 struct bpf_raw_event_map *btp; 2884 }; 2885 2886 static void bpf_raw_tp_link_release(struct bpf_link *link) 2887 { 2888 struct bpf_raw_tp_link *raw_tp = 2889 container_of(link, struct bpf_raw_tp_link, link); 2890 2891 bpf_probe_unregister(raw_tp->btp, raw_tp->link.prog); 2892 bpf_put_raw_tracepoint(raw_tp->btp); 2893 } 2894 2895 static void bpf_raw_tp_link_dealloc(struct bpf_link *link) 2896 { 2897 struct bpf_raw_tp_link *raw_tp = 2898 container_of(link, struct bpf_raw_tp_link, link); 2899 2900 kfree(raw_tp); 2901 } 2902 2903 static void bpf_raw_tp_link_show_fdinfo(const struct bpf_link *link, 2904 struct seq_file *seq) 2905 { 2906 struct bpf_raw_tp_link *raw_tp_link = 2907 container_of(link, struct bpf_raw_tp_link, link); 2908 2909 seq_printf(seq, 2910 "tp_name:\t%s\n", 2911 raw_tp_link->btp->tp->name); 2912 } 2913 2914 static int bpf_raw_tp_link_fill_link_info(const struct bpf_link *link, 2915 struct bpf_link_info *info) 2916 { 2917 struct bpf_raw_tp_link *raw_tp_link = 2918 container_of(link, struct bpf_raw_tp_link, link); 2919 char __user *ubuf = u64_to_user_ptr(info->raw_tracepoint.tp_name); 2920 const char *tp_name = raw_tp_link->btp->tp->name; 2921 u32 ulen = info->raw_tracepoint.tp_name_len; 2922 size_t tp_len = strlen(tp_name); 2923 2924 if (!ulen ^ !ubuf) 2925 return -EINVAL; 2926 2927 info->raw_tracepoint.tp_name_len = tp_len + 1; 2928 2929 if (!ubuf) 2930 return 0; 2931 2932 if (ulen >= tp_len + 1) { 2933 if (copy_to_user(ubuf, tp_name, tp_len + 1)) 2934 return -EFAULT; 2935 } else { 2936 char zero = '\0'; 2937 2938 if (copy_to_user(ubuf, tp_name, ulen - 1)) 2939 return -EFAULT; 2940 if (put_user(zero, ubuf + ulen - 1)) 2941 return -EFAULT; 2942 return -ENOSPC; 2943 } 2944 2945 return 0; 2946 } 2947 2948 static const struct bpf_link_ops bpf_raw_tp_link_lops = { 2949 .release = bpf_raw_tp_link_release, 2950 .dealloc = bpf_raw_tp_link_dealloc, 2951 .show_fdinfo = bpf_raw_tp_link_show_fdinfo, 2952 .fill_link_info = bpf_raw_tp_link_fill_link_info, 2953 }; 2954 2955 #ifdef CONFIG_PERF_EVENTS 2956 struct bpf_perf_link { 2957 struct bpf_link link; 2958 struct file *perf_file; 2959 }; 2960 2961 static void bpf_perf_link_release(struct bpf_link *link) 2962 { 2963 struct bpf_perf_link *perf_link = container_of(link, struct bpf_perf_link, link); 2964 struct perf_event *event = perf_link->perf_file->private_data; 2965 2966 perf_event_free_bpf_prog(event); 2967 fput(perf_link->perf_file); 2968 } 2969 2970 static void bpf_perf_link_dealloc(struct bpf_link *link) 2971 { 2972 struct bpf_perf_link *perf_link = container_of(link, struct bpf_perf_link, link); 2973 2974 kfree(perf_link); 2975 } 2976 2977 static const struct bpf_link_ops bpf_perf_link_lops = { 2978 .release = bpf_perf_link_release, 2979 .dealloc = bpf_perf_link_dealloc, 2980 }; 2981 2982 static int bpf_perf_link_attach(const union bpf_attr *attr, struct bpf_prog *prog) 2983 { 2984 struct bpf_link_primer link_primer; 2985 struct bpf_perf_link *link; 2986 struct perf_event *event; 2987 struct file *perf_file; 2988 int err; 2989 2990 if (attr->link_create.flags) 2991 return -EINVAL; 2992 2993 perf_file = perf_event_get(attr->link_create.target_fd); 2994 if (IS_ERR(perf_file)) 2995 return PTR_ERR(perf_file); 2996 2997 link = kzalloc(sizeof(*link), GFP_USER); 2998 if (!link) { 2999 err = -ENOMEM; 3000 goto out_put_file; 3001 } 3002 bpf_link_init(&link->link, BPF_LINK_TYPE_PERF_EVENT, &bpf_perf_link_lops, prog); 3003 link->perf_file = perf_file; 3004 3005 err = bpf_link_prime(&link->link, &link_primer); 3006 if (err) { 3007 kfree(link); 3008 goto out_put_file; 3009 } 3010 3011 event = perf_file->private_data; 3012 err = perf_event_set_bpf_prog(event, prog, attr->link_create.perf_event.bpf_cookie); 3013 if (err) { 3014 bpf_link_cleanup(&link_primer); 3015 goto out_put_file; 3016 } 3017 /* perf_event_set_bpf_prog() doesn't take its own refcnt on prog */ 3018 bpf_prog_inc(prog); 3019 3020 return bpf_link_settle(&link_primer); 3021 3022 out_put_file: 3023 fput(perf_file); 3024 return err; 3025 } 3026 #else 3027 static int bpf_perf_link_attach(const union bpf_attr *attr, struct bpf_prog *prog) 3028 { 3029 return -EOPNOTSUPP; 3030 } 3031 #endif /* CONFIG_PERF_EVENTS */ 3032 3033 #define BPF_RAW_TRACEPOINT_OPEN_LAST_FIELD raw_tracepoint.prog_fd 3034 3035 static int bpf_raw_tracepoint_open(const union bpf_attr *attr) 3036 { 3037 struct bpf_link_primer link_primer; 3038 struct bpf_raw_tp_link *link; 3039 struct bpf_raw_event_map *btp; 3040 struct bpf_prog *prog; 3041 const char *tp_name; 3042 char buf[128]; 3043 int err; 3044 3045 if (CHECK_ATTR(BPF_RAW_TRACEPOINT_OPEN)) 3046 return -EINVAL; 3047 3048 prog = bpf_prog_get(attr->raw_tracepoint.prog_fd); 3049 if (IS_ERR(prog)) 3050 return PTR_ERR(prog); 3051 3052 switch (prog->type) { 3053 case BPF_PROG_TYPE_TRACING: 3054 case BPF_PROG_TYPE_EXT: 3055 case BPF_PROG_TYPE_LSM: 3056 if (attr->raw_tracepoint.name) { 3057 /* The attach point for this category of programs 3058 * should be specified via btf_id during program load. 3059 */ 3060 err = -EINVAL; 3061 goto out_put_prog; 3062 } 3063 if (prog->type == BPF_PROG_TYPE_TRACING && 3064 prog->expected_attach_type == BPF_TRACE_RAW_TP) { 3065 tp_name = prog->aux->attach_func_name; 3066 break; 3067 } 3068 err = bpf_tracing_prog_attach(prog, 0, 0); 3069 if (err >= 0) 3070 return err; 3071 goto out_put_prog; 3072 case BPF_PROG_TYPE_RAW_TRACEPOINT: 3073 case BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE: 3074 if (strncpy_from_user(buf, 3075 u64_to_user_ptr(attr->raw_tracepoint.name), 3076 sizeof(buf) - 1) < 0) { 3077 err = -EFAULT; 3078 goto out_put_prog; 3079 } 3080 buf[sizeof(buf) - 1] = 0; 3081 tp_name = buf; 3082 break; 3083 default: 3084 err = -EINVAL; 3085 goto out_put_prog; 3086 } 3087 3088 btp = bpf_get_raw_tracepoint(tp_name); 3089 if (!btp) { 3090 err = -ENOENT; 3091 goto out_put_prog; 3092 } 3093 3094 link = kzalloc(sizeof(*link), GFP_USER); 3095 if (!link) { 3096 err = -ENOMEM; 3097 goto out_put_btp; 3098 } 3099 bpf_link_init(&link->link, BPF_LINK_TYPE_RAW_TRACEPOINT, 3100 &bpf_raw_tp_link_lops, prog); 3101 link->btp = btp; 3102 3103 err = bpf_link_prime(&link->link, &link_primer); 3104 if (err) { 3105 kfree(link); 3106 goto out_put_btp; 3107 } 3108 3109 err = bpf_probe_register(link->btp, prog); 3110 if (err) { 3111 bpf_link_cleanup(&link_primer); 3112 goto out_put_btp; 3113 } 3114 3115 return bpf_link_settle(&link_primer); 3116 3117 out_put_btp: 3118 bpf_put_raw_tracepoint(btp); 3119 out_put_prog: 3120 bpf_prog_put(prog); 3121 return err; 3122 } 3123 3124 static int bpf_prog_attach_check_attach_type(const struct bpf_prog *prog, 3125 enum bpf_attach_type attach_type) 3126 { 3127 switch (prog->type) { 3128 case BPF_PROG_TYPE_CGROUP_SOCK: 3129 case BPF_PROG_TYPE_CGROUP_SOCK_ADDR: 3130 case BPF_PROG_TYPE_CGROUP_SOCKOPT: 3131 case BPF_PROG_TYPE_SK_LOOKUP: 3132 return attach_type == prog->expected_attach_type ? 0 : -EINVAL; 3133 case BPF_PROG_TYPE_CGROUP_SKB: 3134 if (!capable(CAP_NET_ADMIN)) 3135 /* cg-skb progs can be loaded by unpriv user. 3136 * check permissions at attach time. 3137 */ 3138 return -EPERM; 3139 return prog->enforce_expected_attach_type && 3140 prog->expected_attach_type != attach_type ? 3141 -EINVAL : 0; 3142 default: 3143 return 0; 3144 } 3145 } 3146 3147 static enum bpf_prog_type 3148 attach_type_to_prog_type(enum bpf_attach_type attach_type) 3149 { 3150 switch (attach_type) { 3151 case BPF_CGROUP_INET_INGRESS: 3152 case BPF_CGROUP_INET_EGRESS: 3153 return BPF_PROG_TYPE_CGROUP_SKB; 3154 case BPF_CGROUP_INET_SOCK_CREATE: 3155 case BPF_CGROUP_INET_SOCK_RELEASE: 3156 case BPF_CGROUP_INET4_POST_BIND: 3157 case BPF_CGROUP_INET6_POST_BIND: 3158 return BPF_PROG_TYPE_CGROUP_SOCK; 3159 case BPF_CGROUP_INET4_BIND: 3160 case BPF_CGROUP_INET6_BIND: 3161 case BPF_CGROUP_INET4_CONNECT: 3162 case BPF_CGROUP_INET6_CONNECT: 3163 case BPF_CGROUP_INET4_GETPEERNAME: 3164 case BPF_CGROUP_INET6_GETPEERNAME: 3165 case BPF_CGROUP_INET4_GETSOCKNAME: 3166 case BPF_CGROUP_INET6_GETSOCKNAME: 3167 case BPF_CGROUP_UDP4_SENDMSG: 3168 case BPF_CGROUP_UDP6_SENDMSG: 3169 case BPF_CGROUP_UDP4_RECVMSG: 3170 case BPF_CGROUP_UDP6_RECVMSG: 3171 return BPF_PROG_TYPE_CGROUP_SOCK_ADDR; 3172 case BPF_CGROUP_SOCK_OPS: 3173 return BPF_PROG_TYPE_SOCK_OPS; 3174 case BPF_CGROUP_DEVICE: 3175 return BPF_PROG_TYPE_CGROUP_DEVICE; 3176 case BPF_SK_MSG_VERDICT: 3177 return BPF_PROG_TYPE_SK_MSG; 3178 case BPF_SK_SKB_STREAM_PARSER: 3179 case BPF_SK_SKB_STREAM_VERDICT: 3180 case BPF_SK_SKB_VERDICT: 3181 return BPF_PROG_TYPE_SK_SKB; 3182 case BPF_LIRC_MODE2: 3183 return BPF_PROG_TYPE_LIRC_MODE2; 3184 case BPF_FLOW_DISSECTOR: 3185 return BPF_PROG_TYPE_FLOW_DISSECTOR; 3186 case BPF_CGROUP_SYSCTL: 3187 return BPF_PROG_TYPE_CGROUP_SYSCTL; 3188 case BPF_CGROUP_GETSOCKOPT: 3189 case BPF_CGROUP_SETSOCKOPT: 3190 return BPF_PROG_TYPE_CGROUP_SOCKOPT; 3191 case BPF_TRACE_ITER: 3192 return BPF_PROG_TYPE_TRACING; 3193 case BPF_SK_LOOKUP: 3194 return BPF_PROG_TYPE_SK_LOOKUP; 3195 case BPF_XDP: 3196 return BPF_PROG_TYPE_XDP; 3197 default: 3198 return BPF_PROG_TYPE_UNSPEC; 3199 } 3200 } 3201 3202 #define BPF_PROG_ATTACH_LAST_FIELD replace_bpf_fd 3203 3204 #define BPF_F_ATTACH_MASK \ 3205 (BPF_F_ALLOW_OVERRIDE | BPF_F_ALLOW_MULTI | BPF_F_REPLACE) 3206 3207 static int bpf_prog_attach(const union bpf_attr *attr) 3208 { 3209 enum bpf_prog_type ptype; 3210 struct bpf_prog *prog; 3211 int ret; 3212 3213 if (CHECK_ATTR(BPF_PROG_ATTACH)) 3214 return -EINVAL; 3215 3216 if (attr->attach_flags & ~BPF_F_ATTACH_MASK) 3217 return -EINVAL; 3218 3219 ptype = attach_type_to_prog_type(attr->attach_type); 3220 if (ptype == BPF_PROG_TYPE_UNSPEC) 3221 return -EINVAL; 3222 3223 prog = bpf_prog_get_type(attr->attach_bpf_fd, ptype); 3224 if (IS_ERR(prog)) 3225 return PTR_ERR(prog); 3226 3227 if (bpf_prog_attach_check_attach_type(prog, attr->attach_type)) { 3228 bpf_prog_put(prog); 3229 return -EINVAL; 3230 } 3231 3232 switch (ptype) { 3233 case BPF_PROG_TYPE_SK_SKB: 3234 case BPF_PROG_TYPE_SK_MSG: 3235 ret = sock_map_get_from_fd(attr, prog); 3236 break; 3237 case BPF_PROG_TYPE_LIRC_MODE2: 3238 ret = lirc_prog_attach(attr, prog); 3239 break; 3240 case BPF_PROG_TYPE_FLOW_DISSECTOR: 3241 ret = netns_bpf_prog_attach(attr, prog); 3242 break; 3243 case BPF_PROG_TYPE_CGROUP_DEVICE: 3244 case BPF_PROG_TYPE_CGROUP_SKB: 3245 case BPF_PROG_TYPE_CGROUP_SOCK: 3246 case BPF_PROG_TYPE_CGROUP_SOCK_ADDR: 3247 case BPF_PROG_TYPE_CGROUP_SOCKOPT: 3248 case BPF_PROG_TYPE_CGROUP_SYSCTL: 3249 case BPF_PROG_TYPE_SOCK_OPS: 3250 ret = cgroup_bpf_prog_attach(attr, ptype, prog); 3251 break; 3252 default: 3253 ret = -EINVAL; 3254 } 3255 3256 if (ret) 3257 bpf_prog_put(prog); 3258 return ret; 3259 } 3260 3261 #define BPF_PROG_DETACH_LAST_FIELD attach_type 3262 3263 static int bpf_prog_detach(const union bpf_attr *attr) 3264 { 3265 enum bpf_prog_type ptype; 3266 3267 if (CHECK_ATTR(BPF_PROG_DETACH)) 3268 return -EINVAL; 3269 3270 ptype = attach_type_to_prog_type(attr->attach_type); 3271 3272 switch (ptype) { 3273 case BPF_PROG_TYPE_SK_MSG: 3274 case BPF_PROG_TYPE_SK_SKB: 3275 return sock_map_prog_detach(attr, ptype); 3276 case BPF_PROG_TYPE_LIRC_MODE2: 3277 return lirc_prog_detach(attr); 3278 case BPF_PROG_TYPE_FLOW_DISSECTOR: 3279 return netns_bpf_prog_detach(attr, ptype); 3280 case BPF_PROG_TYPE_CGROUP_DEVICE: 3281 case BPF_PROG_TYPE_CGROUP_SKB: 3282 case BPF_PROG_TYPE_CGROUP_SOCK: 3283 case BPF_PROG_TYPE_CGROUP_SOCK_ADDR: 3284 case BPF_PROG_TYPE_CGROUP_SOCKOPT: 3285 case BPF_PROG_TYPE_CGROUP_SYSCTL: 3286 case BPF_PROG_TYPE_SOCK_OPS: 3287 return cgroup_bpf_prog_detach(attr, ptype); 3288 default: 3289 return -EINVAL; 3290 } 3291 } 3292 3293 #define BPF_PROG_QUERY_LAST_FIELD query.prog_cnt 3294 3295 static int bpf_prog_query(const union bpf_attr *attr, 3296 union bpf_attr __user *uattr) 3297 { 3298 if (!capable(CAP_NET_ADMIN)) 3299 return -EPERM; 3300 if (CHECK_ATTR(BPF_PROG_QUERY)) 3301 return -EINVAL; 3302 if (attr->query.query_flags & ~BPF_F_QUERY_EFFECTIVE) 3303 return -EINVAL; 3304 3305 switch (attr->query.attach_type) { 3306 case BPF_CGROUP_INET_INGRESS: 3307 case BPF_CGROUP_INET_EGRESS: 3308 case BPF_CGROUP_INET_SOCK_CREATE: 3309 case BPF_CGROUP_INET_SOCK_RELEASE: 3310 case BPF_CGROUP_INET4_BIND: 3311 case BPF_CGROUP_INET6_BIND: 3312 case BPF_CGROUP_INET4_POST_BIND: 3313 case BPF_CGROUP_INET6_POST_BIND: 3314 case BPF_CGROUP_INET4_CONNECT: 3315 case BPF_CGROUP_INET6_CONNECT: 3316 case BPF_CGROUP_INET4_GETPEERNAME: 3317 case BPF_CGROUP_INET6_GETPEERNAME: 3318 case BPF_CGROUP_INET4_GETSOCKNAME: 3319 case BPF_CGROUP_INET6_GETSOCKNAME: 3320 case BPF_CGROUP_UDP4_SENDMSG: 3321 case BPF_CGROUP_UDP6_SENDMSG: 3322 case BPF_CGROUP_UDP4_RECVMSG: 3323 case BPF_CGROUP_UDP6_RECVMSG: 3324 case BPF_CGROUP_SOCK_OPS: 3325 case BPF_CGROUP_DEVICE: 3326 case BPF_CGROUP_SYSCTL: 3327 case BPF_CGROUP_GETSOCKOPT: 3328 case BPF_CGROUP_SETSOCKOPT: 3329 return cgroup_bpf_prog_query(attr, uattr); 3330 case BPF_LIRC_MODE2: 3331 return lirc_prog_query(attr, uattr); 3332 case BPF_FLOW_DISSECTOR: 3333 case BPF_SK_LOOKUP: 3334 return netns_bpf_prog_query(attr, uattr); 3335 case BPF_SK_SKB_STREAM_PARSER: 3336 case BPF_SK_SKB_STREAM_VERDICT: 3337 case BPF_SK_MSG_VERDICT: 3338 case BPF_SK_SKB_VERDICT: 3339 return sock_map_bpf_prog_query(attr, uattr); 3340 default: 3341 return -EINVAL; 3342 } 3343 } 3344 3345 #define BPF_PROG_TEST_RUN_LAST_FIELD test.batch_size 3346 3347 static int bpf_prog_test_run(const union bpf_attr *attr, 3348 union bpf_attr __user *uattr) 3349 { 3350 struct bpf_prog *prog; 3351 int ret = -ENOTSUPP; 3352 3353 if (CHECK_ATTR(BPF_PROG_TEST_RUN)) 3354 return -EINVAL; 3355 3356 if ((attr->test.ctx_size_in && !attr->test.ctx_in) || 3357 (!attr->test.ctx_size_in && attr->test.ctx_in)) 3358 return -EINVAL; 3359 3360 if ((attr->test.ctx_size_out && !attr->test.ctx_out) || 3361 (!attr->test.ctx_size_out && attr->test.ctx_out)) 3362 return -EINVAL; 3363 3364 prog = bpf_prog_get(attr->test.prog_fd); 3365 if (IS_ERR(prog)) 3366 return PTR_ERR(prog); 3367 3368 if (prog->aux->ops->test_run) 3369 ret = prog->aux->ops->test_run(prog, attr, uattr); 3370 3371 bpf_prog_put(prog); 3372 return ret; 3373 } 3374 3375 #define BPF_OBJ_GET_NEXT_ID_LAST_FIELD next_id 3376 3377 static int bpf_obj_get_next_id(const union bpf_attr *attr, 3378 union bpf_attr __user *uattr, 3379 struct idr *idr, 3380 spinlock_t *lock) 3381 { 3382 u32 next_id = attr->start_id; 3383 int err = 0; 3384 3385 if (CHECK_ATTR(BPF_OBJ_GET_NEXT_ID) || next_id >= INT_MAX) 3386 return -EINVAL; 3387 3388 if (!capable(CAP_SYS_ADMIN)) 3389 return -EPERM; 3390 3391 next_id++; 3392 spin_lock_bh(lock); 3393 if (!idr_get_next(idr, &next_id)) 3394 err = -ENOENT; 3395 spin_unlock_bh(lock); 3396 3397 if (!err) 3398 err = put_user(next_id, &uattr->next_id); 3399 3400 return err; 3401 } 3402 3403 struct bpf_map *bpf_map_get_curr_or_next(u32 *id) 3404 { 3405 struct bpf_map *map; 3406 3407 spin_lock_bh(&map_idr_lock); 3408 again: 3409 map = idr_get_next(&map_idr, id); 3410 if (map) { 3411 map = __bpf_map_inc_not_zero(map, false); 3412 if (IS_ERR(map)) { 3413 (*id)++; 3414 goto again; 3415 } 3416 } 3417 spin_unlock_bh(&map_idr_lock); 3418 3419 return map; 3420 } 3421 3422 struct bpf_prog *bpf_prog_get_curr_or_next(u32 *id) 3423 { 3424 struct bpf_prog *prog; 3425 3426 spin_lock_bh(&prog_idr_lock); 3427 again: 3428 prog = idr_get_next(&prog_idr, id); 3429 if (prog) { 3430 prog = bpf_prog_inc_not_zero(prog); 3431 if (IS_ERR(prog)) { 3432 (*id)++; 3433 goto again; 3434 } 3435 } 3436 spin_unlock_bh(&prog_idr_lock); 3437 3438 return prog; 3439 } 3440 3441 #define BPF_PROG_GET_FD_BY_ID_LAST_FIELD prog_id 3442 3443 struct bpf_prog *bpf_prog_by_id(u32 id) 3444 { 3445 struct bpf_prog *prog; 3446 3447 if (!id) 3448 return ERR_PTR(-ENOENT); 3449 3450 spin_lock_bh(&prog_idr_lock); 3451 prog = idr_find(&prog_idr, id); 3452 if (prog) 3453 prog = bpf_prog_inc_not_zero(prog); 3454 else 3455 prog = ERR_PTR(-ENOENT); 3456 spin_unlock_bh(&prog_idr_lock); 3457 return prog; 3458 } 3459 3460 static int bpf_prog_get_fd_by_id(const union bpf_attr *attr) 3461 { 3462 struct bpf_prog *prog; 3463 u32 id = attr->prog_id; 3464 int fd; 3465 3466 if (CHECK_ATTR(BPF_PROG_GET_FD_BY_ID)) 3467 return -EINVAL; 3468 3469 if (!capable(CAP_SYS_ADMIN)) 3470 return -EPERM; 3471 3472 prog = bpf_prog_by_id(id); 3473 if (IS_ERR(prog)) 3474 return PTR_ERR(prog); 3475 3476 fd = bpf_prog_new_fd(prog); 3477 if (fd < 0) 3478 bpf_prog_put(prog); 3479 3480 return fd; 3481 } 3482 3483 #define BPF_MAP_GET_FD_BY_ID_LAST_FIELD open_flags 3484 3485 static int bpf_map_get_fd_by_id(const union bpf_attr *attr) 3486 { 3487 struct bpf_map *map; 3488 u32 id = attr->map_id; 3489 int f_flags; 3490 int fd; 3491 3492 if (CHECK_ATTR(BPF_MAP_GET_FD_BY_ID) || 3493 attr->open_flags & ~BPF_OBJ_FLAG_MASK) 3494 return -EINVAL; 3495 3496 if (!capable(CAP_SYS_ADMIN)) 3497 return -EPERM; 3498 3499 f_flags = bpf_get_file_flag(attr->open_flags); 3500 if (f_flags < 0) 3501 return f_flags; 3502 3503 spin_lock_bh(&map_idr_lock); 3504 map = idr_find(&map_idr, id); 3505 if (map) 3506 map = __bpf_map_inc_not_zero(map, true); 3507 else 3508 map = ERR_PTR(-ENOENT); 3509 spin_unlock_bh(&map_idr_lock); 3510 3511 if (IS_ERR(map)) 3512 return PTR_ERR(map); 3513 3514 fd = bpf_map_new_fd(map, f_flags); 3515 if (fd < 0) 3516 bpf_map_put_with_uref(map); 3517 3518 return fd; 3519 } 3520 3521 static const struct bpf_map *bpf_map_from_imm(const struct bpf_prog *prog, 3522 unsigned long addr, u32 *off, 3523 u32 *type) 3524 { 3525 const struct bpf_map *map; 3526 int i; 3527 3528 mutex_lock(&prog->aux->used_maps_mutex); 3529 for (i = 0, *off = 0; i < prog->aux->used_map_cnt; i++) { 3530 map = prog->aux->used_maps[i]; 3531 if (map == (void *)addr) { 3532 *type = BPF_PSEUDO_MAP_FD; 3533 goto out; 3534 } 3535 if (!map->ops->map_direct_value_meta) 3536 continue; 3537 if (!map->ops->map_direct_value_meta(map, addr, off)) { 3538 *type = BPF_PSEUDO_MAP_VALUE; 3539 goto out; 3540 } 3541 } 3542 map = NULL; 3543 3544 out: 3545 mutex_unlock(&prog->aux->used_maps_mutex); 3546 return map; 3547 } 3548 3549 static struct bpf_insn *bpf_insn_prepare_dump(const struct bpf_prog *prog, 3550 const struct cred *f_cred) 3551 { 3552 const struct bpf_map *map; 3553 struct bpf_insn *insns; 3554 u32 off, type; 3555 u64 imm; 3556 u8 code; 3557 int i; 3558 3559 insns = kmemdup(prog->insnsi, bpf_prog_insn_size(prog), 3560 GFP_USER); 3561 if (!insns) 3562 return insns; 3563 3564 for (i = 0; i < prog->len; i++) { 3565 code = insns[i].code; 3566 3567 if (code == (BPF_JMP | BPF_TAIL_CALL)) { 3568 insns[i].code = BPF_JMP | BPF_CALL; 3569 insns[i].imm = BPF_FUNC_tail_call; 3570 /* fall-through */ 3571 } 3572 if (code == (BPF_JMP | BPF_CALL) || 3573 code == (BPF_JMP | BPF_CALL_ARGS)) { 3574 if (code == (BPF_JMP | BPF_CALL_ARGS)) 3575 insns[i].code = BPF_JMP | BPF_CALL; 3576 if (!bpf_dump_raw_ok(f_cred)) 3577 insns[i].imm = 0; 3578 continue; 3579 } 3580 if (BPF_CLASS(code) == BPF_LDX && BPF_MODE(code) == BPF_PROBE_MEM) { 3581 insns[i].code = BPF_LDX | BPF_SIZE(code) | BPF_MEM; 3582 continue; 3583 } 3584 3585 if (code != (BPF_LD | BPF_IMM | BPF_DW)) 3586 continue; 3587 3588 imm = ((u64)insns[i + 1].imm << 32) | (u32)insns[i].imm; 3589 map = bpf_map_from_imm(prog, imm, &off, &type); 3590 if (map) { 3591 insns[i].src_reg = type; 3592 insns[i].imm = map->id; 3593 insns[i + 1].imm = off; 3594 continue; 3595 } 3596 } 3597 3598 return insns; 3599 } 3600 3601 static int set_info_rec_size(struct bpf_prog_info *info) 3602 { 3603 /* 3604 * Ensure info.*_rec_size is the same as kernel expected size 3605 * 3606 * or 3607 * 3608 * Only allow zero *_rec_size if both _rec_size and _cnt are 3609 * zero. In this case, the kernel will set the expected 3610 * _rec_size back to the info. 3611 */ 3612 3613 if ((info->nr_func_info || info->func_info_rec_size) && 3614 info->func_info_rec_size != sizeof(struct bpf_func_info)) 3615 return -EINVAL; 3616 3617 if ((info->nr_line_info || info->line_info_rec_size) && 3618 info->line_info_rec_size != sizeof(struct bpf_line_info)) 3619 return -EINVAL; 3620 3621 if ((info->nr_jited_line_info || info->jited_line_info_rec_size) && 3622 info->jited_line_info_rec_size != sizeof(__u64)) 3623 return -EINVAL; 3624 3625 info->func_info_rec_size = sizeof(struct bpf_func_info); 3626 info->line_info_rec_size = sizeof(struct bpf_line_info); 3627 info->jited_line_info_rec_size = sizeof(__u64); 3628 3629 return 0; 3630 } 3631 3632 static int bpf_prog_get_info_by_fd(struct file *file, 3633 struct bpf_prog *prog, 3634 const union bpf_attr *attr, 3635 union bpf_attr __user *uattr) 3636 { 3637 struct bpf_prog_info __user *uinfo = u64_to_user_ptr(attr->info.info); 3638 struct bpf_prog_info info; 3639 u32 info_len = attr->info.info_len; 3640 struct bpf_prog_kstats stats; 3641 char __user *uinsns; 3642 u32 ulen; 3643 int err; 3644 3645 err = bpf_check_uarg_tail_zero(USER_BPFPTR(uinfo), sizeof(info), info_len); 3646 if (err) 3647 return err; 3648 info_len = min_t(u32, sizeof(info), info_len); 3649 3650 memset(&info, 0, sizeof(info)); 3651 if (copy_from_user(&info, uinfo, info_len)) 3652 return -EFAULT; 3653 3654 info.type = prog->type; 3655 info.id = prog->aux->id; 3656 info.load_time = prog->aux->load_time; 3657 info.created_by_uid = from_kuid_munged(current_user_ns(), 3658 prog->aux->user->uid); 3659 info.gpl_compatible = prog->gpl_compatible; 3660 3661 memcpy(info.tag, prog->tag, sizeof(prog->tag)); 3662 memcpy(info.name, prog->aux->name, sizeof(prog->aux->name)); 3663 3664 mutex_lock(&prog->aux->used_maps_mutex); 3665 ulen = info.nr_map_ids; 3666 info.nr_map_ids = prog->aux->used_map_cnt; 3667 ulen = min_t(u32, info.nr_map_ids, ulen); 3668 if (ulen) { 3669 u32 __user *user_map_ids = u64_to_user_ptr(info.map_ids); 3670 u32 i; 3671 3672 for (i = 0; i < ulen; i++) 3673 if (put_user(prog->aux->used_maps[i]->id, 3674 &user_map_ids[i])) { 3675 mutex_unlock(&prog->aux->used_maps_mutex); 3676 return -EFAULT; 3677 } 3678 } 3679 mutex_unlock(&prog->aux->used_maps_mutex); 3680 3681 err = set_info_rec_size(&info); 3682 if (err) 3683 return err; 3684 3685 bpf_prog_get_stats(prog, &stats); 3686 info.run_time_ns = stats.nsecs; 3687 info.run_cnt = stats.cnt; 3688 info.recursion_misses = stats.misses; 3689 3690 info.verified_insns = prog->aux->verified_insns; 3691 3692 if (!bpf_capable()) { 3693 info.jited_prog_len = 0; 3694 info.xlated_prog_len = 0; 3695 info.nr_jited_ksyms = 0; 3696 info.nr_jited_func_lens = 0; 3697 info.nr_func_info = 0; 3698 info.nr_line_info = 0; 3699 info.nr_jited_line_info = 0; 3700 goto done; 3701 } 3702 3703 ulen = info.xlated_prog_len; 3704 info.xlated_prog_len = bpf_prog_insn_size(prog); 3705 if (info.xlated_prog_len && ulen) { 3706 struct bpf_insn *insns_sanitized; 3707 bool fault; 3708 3709 if (prog->blinded && !bpf_dump_raw_ok(file->f_cred)) { 3710 info.xlated_prog_insns = 0; 3711 goto done; 3712 } 3713 insns_sanitized = bpf_insn_prepare_dump(prog, file->f_cred); 3714 if (!insns_sanitized) 3715 return -ENOMEM; 3716 uinsns = u64_to_user_ptr(info.xlated_prog_insns); 3717 ulen = min_t(u32, info.xlated_prog_len, ulen); 3718 fault = copy_to_user(uinsns, insns_sanitized, ulen); 3719 kfree(insns_sanitized); 3720 if (fault) 3721 return -EFAULT; 3722 } 3723 3724 if (bpf_prog_is_dev_bound(prog->aux)) { 3725 err = bpf_prog_offload_info_fill(&info, prog); 3726 if (err) 3727 return err; 3728 goto done; 3729 } 3730 3731 /* NOTE: the following code is supposed to be skipped for offload. 3732 * bpf_prog_offload_info_fill() is the place to fill similar fields 3733 * for offload. 3734 */ 3735 ulen = info.jited_prog_len; 3736 if (prog->aux->func_cnt) { 3737 u32 i; 3738 3739 info.jited_prog_len = 0; 3740 for (i = 0; i < prog->aux->func_cnt; i++) 3741 info.jited_prog_len += prog->aux->func[i]->jited_len; 3742 } else { 3743 info.jited_prog_len = prog->jited_len; 3744 } 3745 3746 if (info.jited_prog_len && ulen) { 3747 if (bpf_dump_raw_ok(file->f_cred)) { 3748 uinsns = u64_to_user_ptr(info.jited_prog_insns); 3749 ulen = min_t(u32, info.jited_prog_len, ulen); 3750 3751 /* for multi-function programs, copy the JITed 3752 * instructions for all the functions 3753 */ 3754 if (prog->aux->func_cnt) { 3755 u32 len, free, i; 3756 u8 *img; 3757 3758 free = ulen; 3759 for (i = 0; i < prog->aux->func_cnt; i++) { 3760 len = prog->aux->func[i]->jited_len; 3761 len = min_t(u32, len, free); 3762 img = (u8 *) prog->aux->func[i]->bpf_func; 3763 if (copy_to_user(uinsns, img, len)) 3764 return -EFAULT; 3765 uinsns += len; 3766 free -= len; 3767 if (!free) 3768 break; 3769 } 3770 } else { 3771 if (copy_to_user(uinsns, prog->bpf_func, ulen)) 3772 return -EFAULT; 3773 } 3774 } else { 3775 info.jited_prog_insns = 0; 3776 } 3777 } 3778 3779 ulen = info.nr_jited_ksyms; 3780 info.nr_jited_ksyms = prog->aux->func_cnt ? : 1; 3781 if (ulen) { 3782 if (bpf_dump_raw_ok(file->f_cred)) { 3783 unsigned long ksym_addr; 3784 u64 __user *user_ksyms; 3785 u32 i; 3786 3787 /* copy the address of the kernel symbol 3788 * corresponding to each function 3789 */ 3790 ulen = min_t(u32, info.nr_jited_ksyms, ulen); 3791 user_ksyms = u64_to_user_ptr(info.jited_ksyms); 3792 if (prog->aux->func_cnt) { 3793 for (i = 0; i < ulen; i++) { 3794 ksym_addr = (unsigned long) 3795 prog->aux->func[i]->bpf_func; 3796 if (put_user((u64) ksym_addr, 3797 &user_ksyms[i])) 3798 return -EFAULT; 3799 } 3800 } else { 3801 ksym_addr = (unsigned long) prog->bpf_func; 3802 if (put_user((u64) ksym_addr, &user_ksyms[0])) 3803 return -EFAULT; 3804 } 3805 } else { 3806 info.jited_ksyms = 0; 3807 } 3808 } 3809 3810 ulen = info.nr_jited_func_lens; 3811 info.nr_jited_func_lens = prog->aux->func_cnt ? : 1; 3812 if (ulen) { 3813 if (bpf_dump_raw_ok(file->f_cred)) { 3814 u32 __user *user_lens; 3815 u32 func_len, i; 3816 3817 /* copy the JITed image lengths for each function */ 3818 ulen = min_t(u32, info.nr_jited_func_lens, ulen); 3819 user_lens = u64_to_user_ptr(info.jited_func_lens); 3820 if (prog->aux->func_cnt) { 3821 for (i = 0; i < ulen; i++) { 3822 func_len = 3823 prog->aux->func[i]->jited_len; 3824 if (put_user(func_len, &user_lens[i])) 3825 return -EFAULT; 3826 } 3827 } else { 3828 func_len = prog->jited_len; 3829 if (put_user(func_len, &user_lens[0])) 3830 return -EFAULT; 3831 } 3832 } else { 3833 info.jited_func_lens = 0; 3834 } 3835 } 3836 3837 if (prog->aux->btf) 3838 info.btf_id = btf_obj_id(prog->aux->btf); 3839 3840 ulen = info.nr_func_info; 3841 info.nr_func_info = prog->aux->func_info_cnt; 3842 if (info.nr_func_info && ulen) { 3843 char __user *user_finfo; 3844 3845 user_finfo = u64_to_user_ptr(info.func_info); 3846 ulen = min_t(u32, info.nr_func_info, ulen); 3847 if (copy_to_user(user_finfo, prog->aux->func_info, 3848 info.func_info_rec_size * ulen)) 3849 return -EFAULT; 3850 } 3851 3852 ulen = info.nr_line_info; 3853 info.nr_line_info = prog->aux->nr_linfo; 3854 if (info.nr_line_info && ulen) { 3855 __u8 __user *user_linfo; 3856 3857 user_linfo = u64_to_user_ptr(info.line_info); 3858 ulen = min_t(u32, info.nr_line_info, ulen); 3859 if (copy_to_user(user_linfo, prog->aux->linfo, 3860 info.line_info_rec_size * ulen)) 3861 return -EFAULT; 3862 } 3863 3864 ulen = info.nr_jited_line_info; 3865 if (prog->aux->jited_linfo) 3866 info.nr_jited_line_info = prog->aux->nr_linfo; 3867 else 3868 info.nr_jited_line_info = 0; 3869 if (info.nr_jited_line_info && ulen) { 3870 if (bpf_dump_raw_ok(file->f_cred)) { 3871 __u64 __user *user_linfo; 3872 u32 i; 3873 3874 user_linfo = u64_to_user_ptr(info.jited_line_info); 3875 ulen = min_t(u32, info.nr_jited_line_info, ulen); 3876 for (i = 0; i < ulen; i++) { 3877 if (put_user((__u64)(long)prog->aux->jited_linfo[i], 3878 &user_linfo[i])) 3879 return -EFAULT; 3880 } 3881 } else { 3882 info.jited_line_info = 0; 3883 } 3884 } 3885 3886 ulen = info.nr_prog_tags; 3887 info.nr_prog_tags = prog->aux->func_cnt ? : 1; 3888 if (ulen) { 3889 __u8 __user (*user_prog_tags)[BPF_TAG_SIZE]; 3890 u32 i; 3891 3892 user_prog_tags = u64_to_user_ptr(info.prog_tags); 3893 ulen = min_t(u32, info.nr_prog_tags, ulen); 3894 if (prog->aux->func_cnt) { 3895 for (i = 0; i < ulen; i++) { 3896 if (copy_to_user(user_prog_tags[i], 3897 prog->aux->func[i]->tag, 3898 BPF_TAG_SIZE)) 3899 return -EFAULT; 3900 } 3901 } else { 3902 if (copy_to_user(user_prog_tags[0], 3903 prog->tag, BPF_TAG_SIZE)) 3904 return -EFAULT; 3905 } 3906 } 3907 3908 done: 3909 if (copy_to_user(uinfo, &info, info_len) || 3910 put_user(info_len, &uattr->info.info_len)) 3911 return -EFAULT; 3912 3913 return 0; 3914 } 3915 3916 static int bpf_map_get_info_by_fd(struct file *file, 3917 struct bpf_map *map, 3918 const union bpf_attr *attr, 3919 union bpf_attr __user *uattr) 3920 { 3921 struct bpf_map_info __user *uinfo = u64_to_user_ptr(attr->info.info); 3922 struct bpf_map_info info; 3923 u32 info_len = attr->info.info_len; 3924 int err; 3925 3926 err = bpf_check_uarg_tail_zero(USER_BPFPTR(uinfo), sizeof(info), info_len); 3927 if (err) 3928 return err; 3929 info_len = min_t(u32, sizeof(info), info_len); 3930 3931 memset(&info, 0, sizeof(info)); 3932 info.type = map->map_type; 3933 info.id = map->id; 3934 info.key_size = map->key_size; 3935 info.value_size = map->value_size; 3936 info.max_entries = map->max_entries; 3937 info.map_flags = map->map_flags; 3938 info.map_extra = map->map_extra; 3939 memcpy(info.name, map->name, sizeof(map->name)); 3940 3941 if (map->btf) { 3942 info.btf_id = btf_obj_id(map->btf); 3943 info.btf_key_type_id = map->btf_key_type_id; 3944 info.btf_value_type_id = map->btf_value_type_id; 3945 } 3946 info.btf_vmlinux_value_type_id = map->btf_vmlinux_value_type_id; 3947 3948 if (bpf_map_is_dev_bound(map)) { 3949 err = bpf_map_offload_info_fill(&info, map); 3950 if (err) 3951 return err; 3952 } 3953 3954 if (copy_to_user(uinfo, &info, info_len) || 3955 put_user(info_len, &uattr->info.info_len)) 3956 return -EFAULT; 3957 3958 return 0; 3959 } 3960 3961 static int bpf_btf_get_info_by_fd(struct file *file, 3962 struct btf *btf, 3963 const union bpf_attr *attr, 3964 union bpf_attr __user *uattr) 3965 { 3966 struct bpf_btf_info __user *uinfo = u64_to_user_ptr(attr->info.info); 3967 u32 info_len = attr->info.info_len; 3968 int err; 3969 3970 err = bpf_check_uarg_tail_zero(USER_BPFPTR(uinfo), sizeof(*uinfo), info_len); 3971 if (err) 3972 return err; 3973 3974 return btf_get_info_by_fd(btf, attr, uattr); 3975 } 3976 3977 static int bpf_link_get_info_by_fd(struct file *file, 3978 struct bpf_link *link, 3979 const union bpf_attr *attr, 3980 union bpf_attr __user *uattr) 3981 { 3982 struct bpf_link_info __user *uinfo = u64_to_user_ptr(attr->info.info); 3983 struct bpf_link_info info; 3984 u32 info_len = attr->info.info_len; 3985 int err; 3986 3987 err = bpf_check_uarg_tail_zero(USER_BPFPTR(uinfo), sizeof(info), info_len); 3988 if (err) 3989 return err; 3990 info_len = min_t(u32, sizeof(info), info_len); 3991 3992 memset(&info, 0, sizeof(info)); 3993 if (copy_from_user(&info, uinfo, info_len)) 3994 return -EFAULT; 3995 3996 info.type = link->type; 3997 info.id = link->id; 3998 info.prog_id = link->prog->aux->id; 3999 4000 if (link->ops->fill_link_info) { 4001 err = link->ops->fill_link_info(link, &info); 4002 if (err) 4003 return err; 4004 } 4005 4006 if (copy_to_user(uinfo, &info, info_len) || 4007 put_user(info_len, &uattr->info.info_len)) 4008 return -EFAULT; 4009 4010 return 0; 4011 } 4012 4013 4014 #define BPF_OBJ_GET_INFO_BY_FD_LAST_FIELD info.info 4015 4016 static int bpf_obj_get_info_by_fd(const union bpf_attr *attr, 4017 union bpf_attr __user *uattr) 4018 { 4019 int ufd = attr->info.bpf_fd; 4020 struct fd f; 4021 int err; 4022 4023 if (CHECK_ATTR(BPF_OBJ_GET_INFO_BY_FD)) 4024 return -EINVAL; 4025 4026 f = fdget(ufd); 4027 if (!f.file) 4028 return -EBADFD; 4029 4030 if (f.file->f_op == &bpf_prog_fops) 4031 err = bpf_prog_get_info_by_fd(f.file, f.file->private_data, attr, 4032 uattr); 4033 else if (f.file->f_op == &bpf_map_fops) 4034 err = bpf_map_get_info_by_fd(f.file, f.file->private_data, attr, 4035 uattr); 4036 else if (f.file->f_op == &btf_fops) 4037 err = bpf_btf_get_info_by_fd(f.file, f.file->private_data, attr, uattr); 4038 else if (f.file->f_op == &bpf_link_fops) 4039 err = bpf_link_get_info_by_fd(f.file, f.file->private_data, 4040 attr, uattr); 4041 else 4042 err = -EINVAL; 4043 4044 fdput(f); 4045 return err; 4046 } 4047 4048 #define BPF_BTF_LOAD_LAST_FIELD btf_log_level 4049 4050 static int bpf_btf_load(const union bpf_attr *attr, bpfptr_t uattr) 4051 { 4052 if (CHECK_ATTR(BPF_BTF_LOAD)) 4053 return -EINVAL; 4054 4055 if (!bpf_capable()) 4056 return -EPERM; 4057 4058 return btf_new_fd(attr, uattr); 4059 } 4060 4061 #define BPF_BTF_GET_FD_BY_ID_LAST_FIELD btf_id 4062 4063 static int bpf_btf_get_fd_by_id(const union bpf_attr *attr) 4064 { 4065 if (CHECK_ATTR(BPF_BTF_GET_FD_BY_ID)) 4066 return -EINVAL; 4067 4068 if (!capable(CAP_SYS_ADMIN)) 4069 return -EPERM; 4070 4071 return btf_get_fd_by_id(attr->btf_id); 4072 } 4073 4074 static int bpf_task_fd_query_copy(const union bpf_attr *attr, 4075 union bpf_attr __user *uattr, 4076 u32 prog_id, u32 fd_type, 4077 const char *buf, u64 probe_offset, 4078 u64 probe_addr) 4079 { 4080 char __user *ubuf = u64_to_user_ptr(attr->task_fd_query.buf); 4081 u32 len = buf ? strlen(buf) : 0, input_len; 4082 int err = 0; 4083 4084 if (put_user(len, &uattr->task_fd_query.buf_len)) 4085 return -EFAULT; 4086 input_len = attr->task_fd_query.buf_len; 4087 if (input_len && ubuf) { 4088 if (!len) { 4089 /* nothing to copy, just make ubuf NULL terminated */ 4090 char zero = '\0'; 4091 4092 if (put_user(zero, ubuf)) 4093 return -EFAULT; 4094 } else if (input_len >= len + 1) { 4095 /* ubuf can hold the string with NULL terminator */ 4096 if (copy_to_user(ubuf, buf, len + 1)) 4097 return -EFAULT; 4098 } else { 4099 /* ubuf cannot hold the string with NULL terminator, 4100 * do a partial copy with NULL terminator. 4101 */ 4102 char zero = '\0'; 4103 4104 err = -ENOSPC; 4105 if (copy_to_user(ubuf, buf, input_len - 1)) 4106 return -EFAULT; 4107 if (put_user(zero, ubuf + input_len - 1)) 4108 return -EFAULT; 4109 } 4110 } 4111 4112 if (put_user(prog_id, &uattr->task_fd_query.prog_id) || 4113 put_user(fd_type, &uattr->task_fd_query.fd_type) || 4114 put_user(probe_offset, &uattr->task_fd_query.probe_offset) || 4115 put_user(probe_addr, &uattr->task_fd_query.probe_addr)) 4116 return -EFAULT; 4117 4118 return err; 4119 } 4120 4121 #define BPF_TASK_FD_QUERY_LAST_FIELD task_fd_query.probe_addr 4122 4123 static int bpf_task_fd_query(const union bpf_attr *attr, 4124 union bpf_attr __user *uattr) 4125 { 4126 pid_t pid = attr->task_fd_query.pid; 4127 u32 fd = attr->task_fd_query.fd; 4128 const struct perf_event *event; 4129 struct task_struct *task; 4130 struct file *file; 4131 int err; 4132 4133 if (CHECK_ATTR(BPF_TASK_FD_QUERY)) 4134 return -EINVAL; 4135 4136 if (!capable(CAP_SYS_ADMIN)) 4137 return -EPERM; 4138 4139 if (attr->task_fd_query.flags != 0) 4140 return -EINVAL; 4141 4142 task = get_pid_task(find_vpid(pid), PIDTYPE_PID); 4143 if (!task) 4144 return -ENOENT; 4145 4146 err = 0; 4147 file = fget_task(task, fd); 4148 put_task_struct(task); 4149 if (!file) 4150 return -EBADF; 4151 4152 if (file->f_op == &bpf_link_fops) { 4153 struct bpf_link *link = file->private_data; 4154 4155 if (link->ops == &bpf_raw_tp_link_lops) { 4156 struct bpf_raw_tp_link *raw_tp = 4157 container_of(link, struct bpf_raw_tp_link, link); 4158 struct bpf_raw_event_map *btp = raw_tp->btp; 4159 4160 err = bpf_task_fd_query_copy(attr, uattr, 4161 raw_tp->link.prog->aux->id, 4162 BPF_FD_TYPE_RAW_TRACEPOINT, 4163 btp->tp->name, 0, 0); 4164 goto put_file; 4165 } 4166 goto out_not_supp; 4167 } 4168 4169 event = perf_get_event(file); 4170 if (!IS_ERR(event)) { 4171 u64 probe_offset, probe_addr; 4172 u32 prog_id, fd_type; 4173 const char *buf; 4174 4175 err = bpf_get_perf_event_info(event, &prog_id, &fd_type, 4176 &buf, &probe_offset, 4177 &probe_addr); 4178 if (!err) 4179 err = bpf_task_fd_query_copy(attr, uattr, prog_id, 4180 fd_type, buf, 4181 probe_offset, 4182 probe_addr); 4183 goto put_file; 4184 } 4185 4186 out_not_supp: 4187 err = -ENOTSUPP; 4188 put_file: 4189 fput(file); 4190 return err; 4191 } 4192 4193 #define BPF_MAP_BATCH_LAST_FIELD batch.flags 4194 4195 #define BPF_DO_BATCH(fn) \ 4196 do { \ 4197 if (!fn) { \ 4198 err = -ENOTSUPP; \ 4199 goto err_put; \ 4200 } \ 4201 err = fn(map, attr, uattr); \ 4202 } while (0) 4203 4204 static int bpf_map_do_batch(const union bpf_attr *attr, 4205 union bpf_attr __user *uattr, 4206 int cmd) 4207 { 4208 bool has_read = cmd == BPF_MAP_LOOKUP_BATCH || 4209 cmd == BPF_MAP_LOOKUP_AND_DELETE_BATCH; 4210 bool has_write = cmd != BPF_MAP_LOOKUP_BATCH; 4211 struct bpf_map *map; 4212 int err, ufd; 4213 struct fd f; 4214 4215 if (CHECK_ATTR(BPF_MAP_BATCH)) 4216 return -EINVAL; 4217 4218 ufd = attr->batch.map_fd; 4219 f = fdget(ufd); 4220 map = __bpf_map_get(f); 4221 if (IS_ERR(map)) 4222 return PTR_ERR(map); 4223 if (has_write) 4224 bpf_map_write_active_inc(map); 4225 if (has_read && !(map_get_sys_perms(map, f) & FMODE_CAN_READ)) { 4226 err = -EPERM; 4227 goto err_put; 4228 } 4229 if (has_write && !(map_get_sys_perms(map, f) & FMODE_CAN_WRITE)) { 4230 err = -EPERM; 4231 goto err_put; 4232 } 4233 4234 if (cmd == BPF_MAP_LOOKUP_BATCH) 4235 BPF_DO_BATCH(map->ops->map_lookup_batch); 4236 else if (cmd == BPF_MAP_LOOKUP_AND_DELETE_BATCH) 4237 BPF_DO_BATCH(map->ops->map_lookup_and_delete_batch); 4238 else if (cmd == BPF_MAP_UPDATE_BATCH) 4239 BPF_DO_BATCH(map->ops->map_update_batch); 4240 else 4241 BPF_DO_BATCH(map->ops->map_delete_batch); 4242 err_put: 4243 if (has_write) 4244 bpf_map_write_active_dec(map); 4245 fdput(f); 4246 return err; 4247 } 4248 4249 static int tracing_bpf_link_attach(const union bpf_attr *attr, bpfptr_t uattr, 4250 struct bpf_prog *prog) 4251 { 4252 if (attr->link_create.attach_type != prog->expected_attach_type) 4253 return -EINVAL; 4254 4255 if (prog->expected_attach_type == BPF_TRACE_ITER) 4256 return bpf_iter_link_attach(attr, uattr, prog); 4257 else if (prog->type == BPF_PROG_TYPE_EXT) 4258 return bpf_tracing_prog_attach(prog, 4259 attr->link_create.target_fd, 4260 attr->link_create.target_btf_id); 4261 return -EINVAL; 4262 } 4263 4264 #define BPF_LINK_CREATE_LAST_FIELD link_create.kprobe_multi.cookies 4265 static int link_create(union bpf_attr *attr, bpfptr_t uattr) 4266 { 4267 enum bpf_prog_type ptype; 4268 struct bpf_prog *prog; 4269 int ret; 4270 4271 if (CHECK_ATTR(BPF_LINK_CREATE)) 4272 return -EINVAL; 4273 4274 prog = bpf_prog_get(attr->link_create.prog_fd); 4275 if (IS_ERR(prog)) 4276 return PTR_ERR(prog); 4277 4278 ret = bpf_prog_attach_check_attach_type(prog, 4279 attr->link_create.attach_type); 4280 if (ret) 4281 goto out; 4282 4283 switch (prog->type) { 4284 case BPF_PROG_TYPE_EXT: 4285 ret = tracing_bpf_link_attach(attr, uattr, prog); 4286 goto out; 4287 case BPF_PROG_TYPE_PERF_EVENT: 4288 case BPF_PROG_TYPE_TRACEPOINT: 4289 if (attr->link_create.attach_type != BPF_PERF_EVENT) { 4290 ret = -EINVAL; 4291 goto out; 4292 } 4293 ptype = prog->type; 4294 break; 4295 case BPF_PROG_TYPE_KPROBE: 4296 if (attr->link_create.attach_type != BPF_PERF_EVENT && 4297 attr->link_create.attach_type != BPF_TRACE_KPROBE_MULTI) { 4298 ret = -EINVAL; 4299 goto out; 4300 } 4301 ptype = prog->type; 4302 break; 4303 default: 4304 ptype = attach_type_to_prog_type(attr->link_create.attach_type); 4305 if (ptype == BPF_PROG_TYPE_UNSPEC || ptype != prog->type) { 4306 ret = -EINVAL; 4307 goto out; 4308 } 4309 break; 4310 } 4311 4312 switch (ptype) { 4313 case BPF_PROG_TYPE_CGROUP_SKB: 4314 case BPF_PROG_TYPE_CGROUP_SOCK: 4315 case BPF_PROG_TYPE_CGROUP_SOCK_ADDR: 4316 case BPF_PROG_TYPE_SOCK_OPS: 4317 case BPF_PROG_TYPE_CGROUP_DEVICE: 4318 case BPF_PROG_TYPE_CGROUP_SYSCTL: 4319 case BPF_PROG_TYPE_CGROUP_SOCKOPT: 4320 ret = cgroup_bpf_link_attach(attr, prog); 4321 break; 4322 case BPF_PROG_TYPE_TRACING: 4323 ret = tracing_bpf_link_attach(attr, uattr, prog); 4324 break; 4325 case BPF_PROG_TYPE_FLOW_DISSECTOR: 4326 case BPF_PROG_TYPE_SK_LOOKUP: 4327 ret = netns_bpf_link_create(attr, prog); 4328 break; 4329 #ifdef CONFIG_NET 4330 case BPF_PROG_TYPE_XDP: 4331 ret = bpf_xdp_link_attach(attr, prog); 4332 break; 4333 #endif 4334 case BPF_PROG_TYPE_PERF_EVENT: 4335 case BPF_PROG_TYPE_TRACEPOINT: 4336 ret = bpf_perf_link_attach(attr, prog); 4337 break; 4338 case BPF_PROG_TYPE_KPROBE: 4339 if (attr->link_create.attach_type == BPF_PERF_EVENT) 4340 ret = bpf_perf_link_attach(attr, prog); 4341 else 4342 ret = bpf_kprobe_multi_link_attach(attr, prog); 4343 break; 4344 default: 4345 ret = -EINVAL; 4346 } 4347 4348 out: 4349 if (ret < 0) 4350 bpf_prog_put(prog); 4351 return ret; 4352 } 4353 4354 #define BPF_LINK_UPDATE_LAST_FIELD link_update.old_prog_fd 4355 4356 static int link_update(union bpf_attr *attr) 4357 { 4358 struct bpf_prog *old_prog = NULL, *new_prog; 4359 struct bpf_link *link; 4360 u32 flags; 4361 int ret; 4362 4363 if (CHECK_ATTR(BPF_LINK_UPDATE)) 4364 return -EINVAL; 4365 4366 flags = attr->link_update.flags; 4367 if (flags & ~BPF_F_REPLACE) 4368 return -EINVAL; 4369 4370 link = bpf_link_get_from_fd(attr->link_update.link_fd); 4371 if (IS_ERR(link)) 4372 return PTR_ERR(link); 4373 4374 new_prog = bpf_prog_get(attr->link_update.new_prog_fd); 4375 if (IS_ERR(new_prog)) { 4376 ret = PTR_ERR(new_prog); 4377 goto out_put_link; 4378 } 4379 4380 if (flags & BPF_F_REPLACE) { 4381 old_prog = bpf_prog_get(attr->link_update.old_prog_fd); 4382 if (IS_ERR(old_prog)) { 4383 ret = PTR_ERR(old_prog); 4384 old_prog = NULL; 4385 goto out_put_progs; 4386 } 4387 } else if (attr->link_update.old_prog_fd) { 4388 ret = -EINVAL; 4389 goto out_put_progs; 4390 } 4391 4392 if (link->ops->update_prog) 4393 ret = link->ops->update_prog(link, new_prog, old_prog); 4394 else 4395 ret = -EINVAL; 4396 4397 out_put_progs: 4398 if (old_prog) 4399 bpf_prog_put(old_prog); 4400 if (ret) 4401 bpf_prog_put(new_prog); 4402 out_put_link: 4403 bpf_link_put(link); 4404 return ret; 4405 } 4406 4407 #define BPF_LINK_DETACH_LAST_FIELD link_detach.link_fd 4408 4409 static int link_detach(union bpf_attr *attr) 4410 { 4411 struct bpf_link *link; 4412 int ret; 4413 4414 if (CHECK_ATTR(BPF_LINK_DETACH)) 4415 return -EINVAL; 4416 4417 link = bpf_link_get_from_fd(attr->link_detach.link_fd); 4418 if (IS_ERR(link)) 4419 return PTR_ERR(link); 4420 4421 if (link->ops->detach) 4422 ret = link->ops->detach(link); 4423 else 4424 ret = -EOPNOTSUPP; 4425 4426 bpf_link_put(link); 4427 return ret; 4428 } 4429 4430 static struct bpf_link *bpf_link_inc_not_zero(struct bpf_link *link) 4431 { 4432 return atomic64_fetch_add_unless(&link->refcnt, 1, 0) ? link : ERR_PTR(-ENOENT); 4433 } 4434 4435 struct bpf_link *bpf_link_by_id(u32 id) 4436 { 4437 struct bpf_link *link; 4438 4439 if (!id) 4440 return ERR_PTR(-ENOENT); 4441 4442 spin_lock_bh(&link_idr_lock); 4443 /* before link is "settled", ID is 0, pretend it doesn't exist yet */ 4444 link = idr_find(&link_idr, id); 4445 if (link) { 4446 if (link->id) 4447 link = bpf_link_inc_not_zero(link); 4448 else 4449 link = ERR_PTR(-EAGAIN); 4450 } else { 4451 link = ERR_PTR(-ENOENT); 4452 } 4453 spin_unlock_bh(&link_idr_lock); 4454 return link; 4455 } 4456 4457 #define BPF_LINK_GET_FD_BY_ID_LAST_FIELD link_id 4458 4459 static int bpf_link_get_fd_by_id(const union bpf_attr *attr) 4460 { 4461 struct bpf_link *link; 4462 u32 id = attr->link_id; 4463 int fd; 4464 4465 if (CHECK_ATTR(BPF_LINK_GET_FD_BY_ID)) 4466 return -EINVAL; 4467 4468 if (!capable(CAP_SYS_ADMIN)) 4469 return -EPERM; 4470 4471 link = bpf_link_by_id(id); 4472 if (IS_ERR(link)) 4473 return PTR_ERR(link); 4474 4475 fd = bpf_link_new_fd(link); 4476 if (fd < 0) 4477 bpf_link_put(link); 4478 4479 return fd; 4480 } 4481 4482 DEFINE_MUTEX(bpf_stats_enabled_mutex); 4483 4484 static int bpf_stats_release(struct inode *inode, struct file *file) 4485 { 4486 mutex_lock(&bpf_stats_enabled_mutex); 4487 static_key_slow_dec(&bpf_stats_enabled_key.key); 4488 mutex_unlock(&bpf_stats_enabled_mutex); 4489 return 0; 4490 } 4491 4492 static const struct file_operations bpf_stats_fops = { 4493 .release = bpf_stats_release, 4494 }; 4495 4496 static int bpf_enable_runtime_stats(void) 4497 { 4498 int fd; 4499 4500 mutex_lock(&bpf_stats_enabled_mutex); 4501 4502 /* Set a very high limit to avoid overflow */ 4503 if (static_key_count(&bpf_stats_enabled_key.key) > INT_MAX / 2) { 4504 mutex_unlock(&bpf_stats_enabled_mutex); 4505 return -EBUSY; 4506 } 4507 4508 fd = anon_inode_getfd("bpf-stats", &bpf_stats_fops, NULL, O_CLOEXEC); 4509 if (fd >= 0) 4510 static_key_slow_inc(&bpf_stats_enabled_key.key); 4511 4512 mutex_unlock(&bpf_stats_enabled_mutex); 4513 return fd; 4514 } 4515 4516 #define BPF_ENABLE_STATS_LAST_FIELD enable_stats.type 4517 4518 static int bpf_enable_stats(union bpf_attr *attr) 4519 { 4520 4521 if (CHECK_ATTR(BPF_ENABLE_STATS)) 4522 return -EINVAL; 4523 4524 if (!capable(CAP_SYS_ADMIN)) 4525 return -EPERM; 4526 4527 switch (attr->enable_stats.type) { 4528 case BPF_STATS_RUN_TIME: 4529 return bpf_enable_runtime_stats(); 4530 default: 4531 break; 4532 } 4533 return -EINVAL; 4534 } 4535 4536 #define BPF_ITER_CREATE_LAST_FIELD iter_create.flags 4537 4538 static int bpf_iter_create(union bpf_attr *attr) 4539 { 4540 struct bpf_link *link; 4541 int err; 4542 4543 if (CHECK_ATTR(BPF_ITER_CREATE)) 4544 return -EINVAL; 4545 4546 if (attr->iter_create.flags) 4547 return -EINVAL; 4548 4549 link = bpf_link_get_from_fd(attr->iter_create.link_fd); 4550 if (IS_ERR(link)) 4551 return PTR_ERR(link); 4552 4553 err = bpf_iter_new_fd(link); 4554 bpf_link_put(link); 4555 4556 return err; 4557 } 4558 4559 #define BPF_PROG_BIND_MAP_LAST_FIELD prog_bind_map.flags 4560 4561 static int bpf_prog_bind_map(union bpf_attr *attr) 4562 { 4563 struct bpf_prog *prog; 4564 struct bpf_map *map; 4565 struct bpf_map **used_maps_old, **used_maps_new; 4566 int i, ret = 0; 4567 4568 if (CHECK_ATTR(BPF_PROG_BIND_MAP)) 4569 return -EINVAL; 4570 4571 if (attr->prog_bind_map.flags) 4572 return -EINVAL; 4573 4574 prog = bpf_prog_get(attr->prog_bind_map.prog_fd); 4575 if (IS_ERR(prog)) 4576 return PTR_ERR(prog); 4577 4578 map = bpf_map_get(attr->prog_bind_map.map_fd); 4579 if (IS_ERR(map)) { 4580 ret = PTR_ERR(map); 4581 goto out_prog_put; 4582 } 4583 4584 mutex_lock(&prog->aux->used_maps_mutex); 4585 4586 used_maps_old = prog->aux->used_maps; 4587 4588 for (i = 0; i < prog->aux->used_map_cnt; i++) 4589 if (used_maps_old[i] == map) { 4590 bpf_map_put(map); 4591 goto out_unlock; 4592 } 4593 4594 used_maps_new = kmalloc_array(prog->aux->used_map_cnt + 1, 4595 sizeof(used_maps_new[0]), 4596 GFP_KERNEL); 4597 if (!used_maps_new) { 4598 ret = -ENOMEM; 4599 goto out_unlock; 4600 } 4601 4602 memcpy(used_maps_new, used_maps_old, 4603 sizeof(used_maps_old[0]) * prog->aux->used_map_cnt); 4604 used_maps_new[prog->aux->used_map_cnt] = map; 4605 4606 prog->aux->used_map_cnt++; 4607 prog->aux->used_maps = used_maps_new; 4608 4609 kfree(used_maps_old); 4610 4611 out_unlock: 4612 mutex_unlock(&prog->aux->used_maps_mutex); 4613 4614 if (ret) 4615 bpf_map_put(map); 4616 out_prog_put: 4617 bpf_prog_put(prog); 4618 return ret; 4619 } 4620 4621 static int __sys_bpf(int cmd, bpfptr_t uattr, unsigned int size) 4622 { 4623 union bpf_attr attr; 4624 int err; 4625 4626 if (sysctl_unprivileged_bpf_disabled && !bpf_capable()) 4627 return -EPERM; 4628 4629 err = bpf_check_uarg_tail_zero(uattr, sizeof(attr), size); 4630 if (err) 4631 return err; 4632 size = min_t(u32, size, sizeof(attr)); 4633 4634 /* copy attributes from user space, may be less than sizeof(bpf_attr) */ 4635 memset(&attr, 0, sizeof(attr)); 4636 if (copy_from_bpfptr(&attr, uattr, size) != 0) 4637 return -EFAULT; 4638 4639 err = security_bpf(cmd, &attr, size); 4640 if (err < 0) 4641 return err; 4642 4643 switch (cmd) { 4644 case BPF_MAP_CREATE: 4645 err = map_create(&attr); 4646 break; 4647 case BPF_MAP_LOOKUP_ELEM: 4648 err = map_lookup_elem(&attr); 4649 break; 4650 case BPF_MAP_UPDATE_ELEM: 4651 err = map_update_elem(&attr, uattr); 4652 break; 4653 case BPF_MAP_DELETE_ELEM: 4654 err = map_delete_elem(&attr); 4655 break; 4656 case BPF_MAP_GET_NEXT_KEY: 4657 err = map_get_next_key(&attr); 4658 break; 4659 case BPF_MAP_FREEZE: 4660 err = map_freeze(&attr); 4661 break; 4662 case BPF_PROG_LOAD: 4663 err = bpf_prog_load(&attr, uattr); 4664 break; 4665 case BPF_OBJ_PIN: 4666 err = bpf_obj_pin(&attr); 4667 break; 4668 case BPF_OBJ_GET: 4669 err = bpf_obj_get(&attr); 4670 break; 4671 case BPF_PROG_ATTACH: 4672 err = bpf_prog_attach(&attr); 4673 break; 4674 case BPF_PROG_DETACH: 4675 err = bpf_prog_detach(&attr); 4676 break; 4677 case BPF_PROG_QUERY: 4678 err = bpf_prog_query(&attr, uattr.user); 4679 break; 4680 case BPF_PROG_TEST_RUN: 4681 err = bpf_prog_test_run(&attr, uattr.user); 4682 break; 4683 case BPF_PROG_GET_NEXT_ID: 4684 err = bpf_obj_get_next_id(&attr, uattr.user, 4685 &prog_idr, &prog_idr_lock); 4686 break; 4687 case BPF_MAP_GET_NEXT_ID: 4688 err = bpf_obj_get_next_id(&attr, uattr.user, 4689 &map_idr, &map_idr_lock); 4690 break; 4691 case BPF_BTF_GET_NEXT_ID: 4692 err = bpf_obj_get_next_id(&attr, uattr.user, 4693 &btf_idr, &btf_idr_lock); 4694 break; 4695 case BPF_PROG_GET_FD_BY_ID: 4696 err = bpf_prog_get_fd_by_id(&attr); 4697 break; 4698 case BPF_MAP_GET_FD_BY_ID: 4699 err = bpf_map_get_fd_by_id(&attr); 4700 break; 4701 case BPF_OBJ_GET_INFO_BY_FD: 4702 err = bpf_obj_get_info_by_fd(&attr, uattr.user); 4703 break; 4704 case BPF_RAW_TRACEPOINT_OPEN: 4705 err = bpf_raw_tracepoint_open(&attr); 4706 break; 4707 case BPF_BTF_LOAD: 4708 err = bpf_btf_load(&attr, uattr); 4709 break; 4710 case BPF_BTF_GET_FD_BY_ID: 4711 err = bpf_btf_get_fd_by_id(&attr); 4712 break; 4713 case BPF_TASK_FD_QUERY: 4714 err = bpf_task_fd_query(&attr, uattr.user); 4715 break; 4716 case BPF_MAP_LOOKUP_AND_DELETE_ELEM: 4717 err = map_lookup_and_delete_elem(&attr); 4718 break; 4719 case BPF_MAP_LOOKUP_BATCH: 4720 err = bpf_map_do_batch(&attr, uattr.user, BPF_MAP_LOOKUP_BATCH); 4721 break; 4722 case BPF_MAP_LOOKUP_AND_DELETE_BATCH: 4723 err = bpf_map_do_batch(&attr, uattr.user, 4724 BPF_MAP_LOOKUP_AND_DELETE_BATCH); 4725 break; 4726 case BPF_MAP_UPDATE_BATCH: 4727 err = bpf_map_do_batch(&attr, uattr.user, BPF_MAP_UPDATE_BATCH); 4728 break; 4729 case BPF_MAP_DELETE_BATCH: 4730 err = bpf_map_do_batch(&attr, uattr.user, BPF_MAP_DELETE_BATCH); 4731 break; 4732 case BPF_LINK_CREATE: 4733 err = link_create(&attr, uattr); 4734 break; 4735 case BPF_LINK_UPDATE: 4736 err = link_update(&attr); 4737 break; 4738 case BPF_LINK_GET_FD_BY_ID: 4739 err = bpf_link_get_fd_by_id(&attr); 4740 break; 4741 case BPF_LINK_GET_NEXT_ID: 4742 err = bpf_obj_get_next_id(&attr, uattr.user, 4743 &link_idr, &link_idr_lock); 4744 break; 4745 case BPF_ENABLE_STATS: 4746 err = bpf_enable_stats(&attr); 4747 break; 4748 case BPF_ITER_CREATE: 4749 err = bpf_iter_create(&attr); 4750 break; 4751 case BPF_LINK_DETACH: 4752 err = link_detach(&attr); 4753 break; 4754 case BPF_PROG_BIND_MAP: 4755 err = bpf_prog_bind_map(&attr); 4756 break; 4757 default: 4758 err = -EINVAL; 4759 break; 4760 } 4761 4762 return err; 4763 } 4764 4765 SYSCALL_DEFINE3(bpf, int, cmd, union bpf_attr __user *, uattr, unsigned int, size) 4766 { 4767 return __sys_bpf(cmd, USER_BPFPTR(uattr), size); 4768 } 4769 4770 static bool syscall_prog_is_valid_access(int off, int size, 4771 enum bpf_access_type type, 4772 const struct bpf_prog *prog, 4773 struct bpf_insn_access_aux *info) 4774 { 4775 if (off < 0 || off >= U16_MAX) 4776 return false; 4777 if (off % size != 0) 4778 return false; 4779 return true; 4780 } 4781 4782 BPF_CALL_3(bpf_sys_bpf, int, cmd, union bpf_attr *, attr, u32, attr_size) 4783 { 4784 struct bpf_prog * __maybe_unused prog; 4785 4786 switch (cmd) { 4787 case BPF_MAP_CREATE: 4788 case BPF_MAP_UPDATE_ELEM: 4789 case BPF_MAP_FREEZE: 4790 case BPF_PROG_LOAD: 4791 case BPF_BTF_LOAD: 4792 case BPF_LINK_CREATE: 4793 case BPF_RAW_TRACEPOINT_OPEN: 4794 break; 4795 #ifdef CONFIG_BPF_JIT /* __bpf_prog_enter_sleepable used by trampoline and JIT */ 4796 case BPF_PROG_TEST_RUN: 4797 if (attr->test.data_in || attr->test.data_out || 4798 attr->test.ctx_out || attr->test.duration || 4799 attr->test.repeat || attr->test.flags) 4800 return -EINVAL; 4801 4802 prog = bpf_prog_get_type(attr->test.prog_fd, BPF_PROG_TYPE_SYSCALL); 4803 if (IS_ERR(prog)) 4804 return PTR_ERR(prog); 4805 4806 if (attr->test.ctx_size_in < prog->aux->max_ctx_offset || 4807 attr->test.ctx_size_in > U16_MAX) { 4808 bpf_prog_put(prog); 4809 return -EINVAL; 4810 } 4811 4812 if (!__bpf_prog_enter_sleepable(prog)) { 4813 /* recursion detected */ 4814 bpf_prog_put(prog); 4815 return -EBUSY; 4816 } 4817 attr->test.retval = bpf_prog_run(prog, (void *) (long) attr->test.ctx_in); 4818 __bpf_prog_exit_sleepable(prog, 0 /* bpf_prog_run does runtime stats */); 4819 bpf_prog_put(prog); 4820 return 0; 4821 #endif 4822 default: 4823 return -EINVAL; 4824 } 4825 return __sys_bpf(cmd, KERNEL_BPFPTR(attr), attr_size); 4826 } 4827 EXPORT_SYMBOL(bpf_sys_bpf); 4828 4829 static const struct bpf_func_proto bpf_sys_bpf_proto = { 4830 .func = bpf_sys_bpf, 4831 .gpl_only = false, 4832 .ret_type = RET_INTEGER, 4833 .arg1_type = ARG_ANYTHING, 4834 .arg2_type = ARG_PTR_TO_MEM | MEM_RDONLY, 4835 .arg3_type = ARG_CONST_SIZE, 4836 }; 4837 4838 const struct bpf_func_proto * __weak 4839 tracing_prog_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) 4840 { 4841 return bpf_base_func_proto(func_id); 4842 } 4843 4844 BPF_CALL_1(bpf_sys_close, u32, fd) 4845 { 4846 /* When bpf program calls this helper there should not be 4847 * an fdget() without matching completed fdput(). 4848 * This helper is allowed in the following callchain only: 4849 * sys_bpf->prog_test_run->bpf_prog->bpf_sys_close 4850 */ 4851 return close_fd(fd); 4852 } 4853 4854 static const struct bpf_func_proto bpf_sys_close_proto = { 4855 .func = bpf_sys_close, 4856 .gpl_only = false, 4857 .ret_type = RET_INTEGER, 4858 .arg1_type = ARG_ANYTHING, 4859 }; 4860 4861 BPF_CALL_4(bpf_kallsyms_lookup_name, const char *, name, int, name_sz, int, flags, u64 *, res) 4862 { 4863 if (flags) 4864 return -EINVAL; 4865 4866 if (name_sz <= 1 || name[name_sz - 1]) 4867 return -EINVAL; 4868 4869 if (!bpf_dump_raw_ok(current_cred())) 4870 return -EPERM; 4871 4872 *res = kallsyms_lookup_name(name); 4873 return *res ? 0 : -ENOENT; 4874 } 4875 4876 const struct bpf_func_proto bpf_kallsyms_lookup_name_proto = { 4877 .func = bpf_kallsyms_lookup_name, 4878 .gpl_only = false, 4879 .ret_type = RET_INTEGER, 4880 .arg1_type = ARG_PTR_TO_MEM, 4881 .arg2_type = ARG_CONST_SIZE_OR_ZERO, 4882 .arg3_type = ARG_ANYTHING, 4883 .arg4_type = ARG_PTR_TO_LONG, 4884 }; 4885 4886 static const struct bpf_func_proto * 4887 syscall_prog_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) 4888 { 4889 switch (func_id) { 4890 case BPF_FUNC_sys_bpf: 4891 return &bpf_sys_bpf_proto; 4892 case BPF_FUNC_btf_find_by_name_kind: 4893 return &bpf_btf_find_by_name_kind_proto; 4894 case BPF_FUNC_sys_close: 4895 return &bpf_sys_close_proto; 4896 case BPF_FUNC_kallsyms_lookup_name: 4897 return &bpf_kallsyms_lookup_name_proto; 4898 default: 4899 return tracing_prog_func_proto(func_id, prog); 4900 } 4901 } 4902 4903 const struct bpf_verifier_ops bpf_syscall_verifier_ops = { 4904 .get_func_proto = syscall_prog_func_proto, 4905 .is_valid_access = syscall_prog_is_valid_access, 4906 }; 4907 4908 const struct bpf_prog_ops bpf_syscall_prog_ops = { 4909 .test_run = bpf_prog_test_run_syscall, 4910 }; 4911