syscall.c (2c0248d68880fc0e783af1048b3367ee5d4412f0) syscall.c (19809c2da28aee5860ad9a2eff760730a0710df0)
1/* Copyright (c) 2011-2014 PLUMgrid, http://plumgrid.com
2 *
3 * This program is free software; you can redistribute it and/or
4 * modify it under the terms of version 2 of the GNU General Public
5 * License as published by the Free Software Foundation.
6 *
7 * This program is distributed in the hope that it will be useful, but
8 * WITHOUT ANY WARRANTY; without even the implied warranty of

--- 13 unchanged lines hidden (view full) ---

22#include <linux/filter.h>
23#include <linux/version.h>
24#include <linux/kernel.h>
25
26DEFINE_PER_CPU(int, bpf_prog_active);
27
28int sysctl_unprivileged_bpf_disabled __read_mostly;
29
1/* Copyright (c) 2011-2014 PLUMgrid, http://plumgrid.com
2 *
3 * This program is free software; you can redistribute it and/or
4 * modify it under the terms of version 2 of the GNU General Public
5 * License as published by the Free Software Foundation.
6 *
7 * This program is distributed in the hope that it will be useful, but
8 * WITHOUT ANY WARRANTY; without even the implied warranty of

--- 13 unchanged lines hidden (view full) ---

22#include <linux/filter.h>
23#include <linux/version.h>
24#include <linux/kernel.h>
25
26DEFINE_PER_CPU(int, bpf_prog_active);
27
28int sysctl_unprivileged_bpf_disabled __read_mostly;
29
30static LIST_HEAD(bpf_map_types);
30static const struct bpf_map_ops * const bpf_map_types[] = {
31#define BPF_PROG_TYPE(_id, _ops)
32#define BPF_MAP_TYPE(_id, _ops) \
33 [_id] = &_ops,
34#include <linux/bpf_types.h>
35#undef BPF_PROG_TYPE
36#undef BPF_MAP_TYPE
37};
31
32static struct bpf_map *find_and_alloc_map(union bpf_attr *attr)
33{
38
39static struct bpf_map *find_and_alloc_map(union bpf_attr *attr)
40{
34 struct bpf_map_type_list *tl;
35 struct bpf_map *map;
36
41 struct bpf_map *map;
42
37 list_for_each_entry(tl, &bpf_map_types, list_node) {
38 if (tl->type == attr->map_type) {
39 map = tl->ops->map_alloc(attr);
40 if (IS_ERR(map))
41 return map;
42 map->ops = tl->ops;
43 map->map_type = attr->map_type;
44 return map;
45 }
46 }
47 return ERR_PTR(-EINVAL);
48}
43 if (attr->map_type >= ARRAY_SIZE(bpf_map_types) ||
44 !bpf_map_types[attr->map_type])
45 return ERR_PTR(-EINVAL);
49
46
50/* boot time registration of different map implementations */
51void bpf_register_map_type(struct bpf_map_type_list *tl)
52{
53 list_add(&tl->list_node, &bpf_map_types);
47 map = bpf_map_types[attr->map_type]->map_alloc(attr);
48 if (IS_ERR(map))
49 return map;
50 map->ops = bpf_map_types[attr->map_type];
51 map->map_type = attr->map_type;
52 return map;
54}
55
56void *bpf_map_area_alloc(size_t size)
57{
58 /* We definitely need __GFP_NORETRY, so OOM killer doesn't
59 * trigger under memory pressure as we really just want to
60 * fail instead.
61 */
62 const gfp_t flags = __GFP_NOWARN | __GFP_NORETRY | __GFP_ZERO;
63 void *area;
64
65 if (size <= (PAGE_SIZE << PAGE_ALLOC_COSTLY_ORDER)) {
66 area = kmalloc(size, GFP_USER | flags);
67 if (area != NULL)
68 return area;
69 }
70
53}
54
55void *bpf_map_area_alloc(size_t size)
56{
57 /* We definitely need __GFP_NORETRY, so OOM killer doesn't
58 * trigger under memory pressure as we really just want to
59 * fail instead.
60 */
61 const gfp_t flags = __GFP_NOWARN | __GFP_NORETRY | __GFP_ZERO;
62 void *area;
63
64 if (size <= (PAGE_SIZE << PAGE_ALLOC_COSTLY_ORDER)) {
65 area = kmalloc(size, GFP_USER | flags);
66 if (area != NULL)
67 return area;
68 }
69
71 return __vmalloc(size, GFP_KERNEL | __GFP_HIGHMEM | flags,
72 PAGE_KERNEL);
70 return __vmalloc(size, GFP_KERNEL | flags, PAGE_KERNEL);
73}
74
75void bpf_map_area_free(void *area)
76{
77 kvfree(area);
78}
79
80int bpf_map_precharge_memlock(u32 pages)

--- 129 unchanged lines hidden (view full) ---

210/* helper macro to check that unused fields 'union bpf_attr' are zero */
211#define CHECK_ATTR(CMD) \
212 memchr_inv((void *) &attr->CMD##_LAST_FIELD + \
213 sizeof(attr->CMD##_LAST_FIELD), 0, \
214 sizeof(*attr) - \
215 offsetof(union bpf_attr, CMD##_LAST_FIELD) - \
216 sizeof(attr->CMD##_LAST_FIELD)) != NULL
217
71}
72
73void bpf_map_area_free(void *area)
74{
75 kvfree(area);
76}
77
78int bpf_map_precharge_memlock(u32 pages)

--- 129 unchanged lines hidden (view full) ---

208/* helper macro to check that unused fields 'union bpf_attr' are zero */
209#define CHECK_ATTR(CMD) \
210 memchr_inv((void *) &attr->CMD##_LAST_FIELD + \
211 sizeof(attr->CMD##_LAST_FIELD), 0, \
212 sizeof(*attr) - \
213 offsetof(union bpf_attr, CMD##_LAST_FIELD) - \
214 sizeof(attr->CMD##_LAST_FIELD)) != NULL
215
218#define BPF_MAP_CREATE_LAST_FIELD map_flags
216#define BPF_MAP_CREATE_LAST_FIELD inner_map_fd
219/* called via syscall */
220static int map_create(union bpf_attr *attr)
221{
222 struct bpf_map *map;
223 int err;
224
225 err = CHECK_ATTR(BPF_MAP_CREATE);
226 if (err)

--- 120 unchanged lines hidden (view full) ---

347
348 if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH ||
349 map->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH) {
350 err = bpf_percpu_hash_copy(map, key, value);
351 } else if (map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY) {
352 err = bpf_percpu_array_copy(map, key, value);
353 } else if (map->map_type == BPF_MAP_TYPE_STACK_TRACE) {
354 err = bpf_stackmap_copy(map, key, value);
217/* called via syscall */
218static int map_create(union bpf_attr *attr)
219{
220 struct bpf_map *map;
221 int err;
222
223 err = CHECK_ATTR(BPF_MAP_CREATE);
224 if (err)

--- 120 unchanged lines hidden (view full) ---

345
346 if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH ||
347 map->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH) {
348 err = bpf_percpu_hash_copy(map, key, value);
349 } else if (map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY) {
350 err = bpf_percpu_array_copy(map, key, value);
351 } else if (map->map_type == BPF_MAP_TYPE_STACK_TRACE) {
352 err = bpf_stackmap_copy(map, key, value);
353 } else if (map->map_type == BPF_MAP_TYPE_ARRAY_OF_MAPS ||
354 map->map_type == BPF_MAP_TYPE_HASH_OF_MAPS) {
355 err = -ENOTSUPP;
355 } else {
356 rcu_read_lock();
357 ptr = map->ops->map_lookup_elem(map, key);
358 if (ptr)
359 memcpy(value, ptr, value_size);
360 rcu_read_unlock();
361 err = ptr ? 0 : -ENOENT;
362 }

--- 70 unchanged lines hidden (view full) ---

433 __this_cpu_inc(bpf_prog_active);
434 if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH ||
435 map->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH) {
436 err = bpf_percpu_hash_update(map, key, value, attr->flags);
437 } else if (map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY) {
438 err = bpf_percpu_array_update(map, key, value, attr->flags);
439 } else if (map->map_type == BPF_MAP_TYPE_PERF_EVENT_ARRAY ||
440 map->map_type == BPF_MAP_TYPE_PROG_ARRAY ||
356 } else {
357 rcu_read_lock();
358 ptr = map->ops->map_lookup_elem(map, key);
359 if (ptr)
360 memcpy(value, ptr, value_size);
361 rcu_read_unlock();
362 err = ptr ? 0 : -ENOENT;
363 }

--- 70 unchanged lines hidden (view full) ---

434 __this_cpu_inc(bpf_prog_active);
435 if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH ||
436 map->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH) {
437 err = bpf_percpu_hash_update(map, key, value, attr->flags);
438 } else if (map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY) {
439 err = bpf_percpu_array_update(map, key, value, attr->flags);
440 } else if (map->map_type == BPF_MAP_TYPE_PERF_EVENT_ARRAY ||
441 map->map_type == BPF_MAP_TYPE_PROG_ARRAY ||
441 map->map_type == BPF_MAP_TYPE_CGROUP_ARRAY) {
442 map->map_type == BPF_MAP_TYPE_CGROUP_ARRAY ||
443 map->map_type == BPF_MAP_TYPE_ARRAY_OF_MAPS) {
442 rcu_read_lock();
443 err = bpf_fd_array_map_update_elem(map, f.file, key, value,
444 attr->flags);
445 rcu_read_unlock();
444 rcu_read_lock();
445 err = bpf_fd_array_map_update_elem(map, f.file, key, value,
446 attr->flags);
447 rcu_read_unlock();
448 } else if (map->map_type == BPF_MAP_TYPE_HASH_OF_MAPS) {
449 rcu_read_lock();
450 err = bpf_fd_htab_map_update_elem(map, f.file, key, value,
451 attr->flags);
452 rcu_read_unlock();
446 } else {
447 rcu_read_lock();
448 err = map->ops->map_update_elem(map, key, value, attr->flags);
449 rcu_read_unlock();
450 }
451 __this_cpu_dec(bpf_prog_active);
452 preempt_enable();
453

--- 69 unchanged lines hidden (view full) ---

523 if (CHECK_ATTR(BPF_MAP_GET_NEXT_KEY))
524 return -EINVAL;
525
526 f = fdget(ufd);
527 map = __bpf_map_get(f);
528 if (IS_ERR(map))
529 return PTR_ERR(map);
530
453 } else {
454 rcu_read_lock();
455 err = map->ops->map_update_elem(map, key, value, attr->flags);
456 rcu_read_unlock();
457 }
458 __this_cpu_dec(bpf_prog_active);
459 preempt_enable();
460

--- 69 unchanged lines hidden (view full) ---

530 if (CHECK_ATTR(BPF_MAP_GET_NEXT_KEY))
531 return -EINVAL;
532
533 f = fdget(ufd);
534 map = __bpf_map_get(f);
535 if (IS_ERR(map))
536 return PTR_ERR(map);
537
531 err = -ENOMEM;
532 key = kmalloc(map->key_size, GFP_USER);
533 if (!key)
534 goto err_put;
538 if (ukey) {
539 err = -ENOMEM;
540 key = kmalloc(map->key_size, GFP_USER);
541 if (!key)
542 goto err_put;
535
543
536 err = -EFAULT;
537 if (copy_from_user(key, ukey, map->key_size) != 0)
538 goto free_key;
544 err = -EFAULT;
545 if (copy_from_user(key, ukey, map->key_size) != 0)
546 goto free_key;
547 } else {
548 key = NULL;
549 }
539
540 err = -ENOMEM;
541 next_key = kmalloc(map->key_size, GFP_USER);
542 if (!next_key)
543 goto free_key;
544
545 rcu_read_lock();
546 err = map->ops->map_get_next_key(map, key, next_key);

--- 12 unchanged lines hidden (view full) ---

559 kfree(next_key);
560free_key:
561 kfree(key);
562err_put:
563 fdput(f);
564 return err;
565}
566
550
551 err = -ENOMEM;
552 next_key = kmalloc(map->key_size, GFP_USER);
553 if (!next_key)
554 goto free_key;
555
556 rcu_read_lock();
557 err = map->ops->map_get_next_key(map, key, next_key);

--- 12 unchanged lines hidden (view full) ---

570 kfree(next_key);
571free_key:
572 kfree(key);
573err_put:
574 fdput(f);
575 return err;
576}
577
567static LIST_HEAD(bpf_prog_types);
578static const struct bpf_verifier_ops * const bpf_prog_types[] = {
579#define BPF_PROG_TYPE(_id, _ops) \
580 [_id] = &_ops,
581#define BPF_MAP_TYPE(_id, _ops)
582#include <linux/bpf_types.h>
583#undef BPF_PROG_TYPE
584#undef BPF_MAP_TYPE
585};
568
569static int find_prog_type(enum bpf_prog_type type, struct bpf_prog *prog)
570{
586
587static int find_prog_type(enum bpf_prog_type type, struct bpf_prog *prog)
588{
571 struct bpf_prog_type_list *tl;
589 if (type >= ARRAY_SIZE(bpf_prog_types) || !bpf_prog_types[type])
590 return -EINVAL;
572
591
573 list_for_each_entry(tl, &bpf_prog_types, list_node) {
574 if (tl->type == type) {
575 prog->aux->ops = tl->ops;
576 prog->type = type;
577 return 0;
578 }
579 }
580
581 return -EINVAL;
592 prog->aux->ops = bpf_prog_types[type];
593 prog->type = type;
594 return 0;
582}
583
595}
596
584void bpf_register_prog_type(struct bpf_prog_type_list *tl)
585{
586 list_add(&tl->list_node, &bpf_prog_types);
587}
588
589/* fixup insn->imm field of bpf_call instructions:
590 * if (insn->imm == BPF_FUNC_map_lookup_elem)
591 * insn->imm = bpf_map_lookup_elem - __bpf_call_base;
592 * else if (insn->imm == BPF_FUNC_map_update_elem)
593 * insn->imm = bpf_map_update_elem - __bpf_call_base;
594 * else ...
595 *
596 * this function is called after eBPF program passed verification
597 */
598static void fixup_bpf_calls(struct bpf_prog *prog)
599{
600 const struct bpf_func_proto *fn;
601 int i;
602
603 for (i = 0; i < prog->len; i++) {
604 struct bpf_insn *insn = &prog->insnsi[i];
605
606 if (insn->code == (BPF_JMP | BPF_CALL)) {
607 /* we reach here when program has bpf_call instructions
608 * and it passed bpf_check(), means that
609 * ops->get_func_proto must have been supplied, check it
610 */
611 BUG_ON(!prog->aux->ops->get_func_proto);
612
613 if (insn->imm == BPF_FUNC_get_route_realm)
614 prog->dst_needed = 1;
615 if (insn->imm == BPF_FUNC_get_prandom_u32)
616 bpf_user_rnd_init_once();
617 if (insn->imm == BPF_FUNC_xdp_adjust_head)
618 prog->xdp_adjust_head = 1;
619 if (insn->imm == BPF_FUNC_tail_call) {
620 /* If we tail call into other programs, we
621 * cannot make any assumptions since they
622 * can be replaced dynamically during runtime
623 * in the program array.
624 */
625 prog->cb_access = 1;
626 prog->xdp_adjust_head = 1;
627
628 /* mark bpf_tail_call as different opcode
629 * to avoid conditional branch in
630 * interpeter for every normal call
631 * and to prevent accidental JITing by
632 * JIT compiler that doesn't support
633 * bpf_tail_call yet
634 */
635 insn->imm = 0;
636 insn->code |= BPF_X;
637 continue;
638 }
639
640 fn = prog->aux->ops->get_func_proto(insn->imm);
641 /* all functions that have prototype and verifier allowed
642 * programs to call them, must be real in-kernel functions
643 */
644 BUG_ON(!fn->func);
645 insn->imm = fn->func - __bpf_call_base;
646 }
647 }
648}
649
650/* drop refcnt on maps used by eBPF program and free auxilary data */
651static void free_used_maps(struct bpf_prog_aux *aux)
652{
653 int i;
654
655 for (i = 0; i < aux->used_map_cnt; i++)
656 bpf_map_put(aux->used_maps[i]);
657

--- 237 unchanged lines hidden (view full) ---

895 if (err < 0)
896 goto free_prog;
897
898 /* run eBPF verifier */
899 err = bpf_check(&prog, attr);
900 if (err < 0)
901 goto free_used_maps;
902
597/* drop refcnt on maps used by eBPF program and free auxilary data */
598static void free_used_maps(struct bpf_prog_aux *aux)
599{
600 int i;
601
602 for (i = 0; i < aux->used_map_cnt; i++)
603 bpf_map_put(aux->used_maps[i]);
604

--- 237 unchanged lines hidden (view full) ---

842 if (err < 0)
843 goto free_prog;
844
845 /* run eBPF verifier */
846 err = bpf_check(&prog, attr);
847 if (err < 0)
848 goto free_used_maps;
849
903 /* fixup BPF_CALL->imm field */
904 fixup_bpf_calls(prog);
905
906 /* eBPF program is ready to be JITed */
907 prog = bpf_prog_select_runtime(prog, &err);
908 if (err < 0)
909 goto free_used_maps;
910
911 err = bpf_prog_new_fd(prog);
912 if (err < 0)
913 /* failed to allocate fd */

--- 109 unchanged lines hidden (view full) ---

1023 default:
1024 return -EINVAL;
1025 }
1026
1027 return ret;
1028}
1029#endif /* CONFIG_CGROUP_BPF */
1030
850 /* eBPF program is ready to be JITed */
851 prog = bpf_prog_select_runtime(prog, &err);
852 if (err < 0)
853 goto free_used_maps;
854
855 err = bpf_prog_new_fd(prog);
856 if (err < 0)
857 /* failed to allocate fd */

--- 109 unchanged lines hidden (view full) ---

967 default:
968 return -EINVAL;
969 }
970
971 return ret;
972}
973#endif /* CONFIG_CGROUP_BPF */
974
975#define BPF_PROG_TEST_RUN_LAST_FIELD test.duration
976
977static int bpf_prog_test_run(const union bpf_attr *attr,
978 union bpf_attr __user *uattr)
979{
980 struct bpf_prog *prog;
981 int ret = -ENOTSUPP;
982
983 if (CHECK_ATTR(BPF_PROG_TEST_RUN))
984 return -EINVAL;
985
986 prog = bpf_prog_get(attr->test.prog_fd);
987 if (IS_ERR(prog))
988 return PTR_ERR(prog);
989
990 if (prog->aux->ops->test_run)
991 ret = prog->aux->ops->test_run(prog, attr, uattr);
992
993 bpf_prog_put(prog);
994 return ret;
995}
996
1031SYSCALL_DEFINE3(bpf, int, cmd, union bpf_attr __user *, uattr, unsigned int, size)
1032{
1033 union bpf_attr attr = {};
1034 int err;
1035
1036 if (!capable(CAP_SYS_ADMIN) && sysctl_unprivileged_bpf_disabled)
1037 return -EPERM;
1038

--- 50 unchanged lines hidden (view full) ---

1089 err = bpf_prog_load(&attr);
1090 break;
1091 case BPF_OBJ_PIN:
1092 err = bpf_obj_pin(&attr);
1093 break;
1094 case BPF_OBJ_GET:
1095 err = bpf_obj_get(&attr);
1096 break;
997SYSCALL_DEFINE3(bpf, int, cmd, union bpf_attr __user *, uattr, unsigned int, size)
998{
999 union bpf_attr attr = {};
1000 int err;
1001
1002 if (!capable(CAP_SYS_ADMIN) && sysctl_unprivileged_bpf_disabled)
1003 return -EPERM;
1004

--- 50 unchanged lines hidden (view full) ---

1055 err = bpf_prog_load(&attr);
1056 break;
1057 case BPF_OBJ_PIN:
1058 err = bpf_obj_pin(&attr);
1059 break;
1060 case BPF_OBJ_GET:
1061 err = bpf_obj_get(&attr);
1062 break;
1097
1098#ifdef CONFIG_CGROUP_BPF
1099 case BPF_PROG_ATTACH:
1100 err = bpf_prog_attach(&attr);
1101 break;
1102 case BPF_PROG_DETACH:
1103 err = bpf_prog_detach(&attr);
1104 break;
1105#endif
1063#ifdef CONFIG_CGROUP_BPF
1064 case BPF_PROG_ATTACH:
1065 err = bpf_prog_attach(&attr);
1066 break;
1067 case BPF_PROG_DETACH:
1068 err = bpf_prog_detach(&attr);
1069 break;
1070#endif
1106
1071 case BPF_PROG_TEST_RUN:
1072 err = bpf_prog_test_run(&attr, uattr);
1073 break;
1107 default:
1108 err = -EINVAL;
1109 break;
1110 }
1111
1112 return err;
1113}
1074 default:
1075 err = -EINVAL;
1076 break;
1077 }
1078
1079 return err;
1080}