xref: /openbmc/linux/kernel/bpf/syscall.c (revision e1f7c9ee)
1 /* Copyright (c) 2011-2014 PLUMgrid, http://plumgrid.com
2  *
3  * This program is free software; you can redistribute it and/or
4  * modify it under the terms of version 2 of the GNU General Public
5  * License as published by the Free Software Foundation.
6  *
7  * This program is distributed in the hope that it will be useful, but
8  * WITHOUT ANY WARRANTY; without even the implied warranty of
9  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
10  * General Public License for more details.
11  */
12 #include <linux/bpf.h>
13 #include <linux/syscalls.h>
14 #include <linux/slab.h>
15 #include <linux/anon_inodes.h>
16 #include <linux/file.h>
17 #include <linux/license.h>
18 #include <linux/filter.h>
19 
20 static LIST_HEAD(bpf_map_types);
21 
22 static struct bpf_map *find_and_alloc_map(union bpf_attr *attr)
23 {
24 	struct bpf_map_type_list *tl;
25 	struct bpf_map *map;
26 
27 	list_for_each_entry(tl, &bpf_map_types, list_node) {
28 		if (tl->type == attr->map_type) {
29 			map = tl->ops->map_alloc(attr);
30 			if (IS_ERR(map))
31 				return map;
32 			map->ops = tl->ops;
33 			map->map_type = attr->map_type;
34 			return map;
35 		}
36 	}
37 	return ERR_PTR(-EINVAL);
38 }
39 
40 /* boot time registration of different map implementations */
41 void bpf_register_map_type(struct bpf_map_type_list *tl)
42 {
43 	list_add(&tl->list_node, &bpf_map_types);
44 }
45 
46 /* called from workqueue */
47 static void bpf_map_free_deferred(struct work_struct *work)
48 {
49 	struct bpf_map *map = container_of(work, struct bpf_map, work);
50 
51 	/* implementation dependent freeing */
52 	map->ops->map_free(map);
53 }
54 
55 /* decrement map refcnt and schedule it for freeing via workqueue
56  * (unrelying map implementation ops->map_free() might sleep)
57  */
58 void bpf_map_put(struct bpf_map *map)
59 {
60 	if (atomic_dec_and_test(&map->refcnt)) {
61 		INIT_WORK(&map->work, bpf_map_free_deferred);
62 		schedule_work(&map->work);
63 	}
64 }
65 
66 static int bpf_map_release(struct inode *inode, struct file *filp)
67 {
68 	struct bpf_map *map = filp->private_data;
69 
70 	bpf_map_put(map);
71 	return 0;
72 }
73 
74 static const struct file_operations bpf_map_fops = {
75 	.release = bpf_map_release,
76 };
77 
78 /* helper macro to check that unused fields 'union bpf_attr' are zero */
79 #define CHECK_ATTR(CMD) \
80 	memchr_inv((void *) &attr->CMD##_LAST_FIELD + \
81 		   sizeof(attr->CMD##_LAST_FIELD), 0, \
82 		   sizeof(*attr) - \
83 		   offsetof(union bpf_attr, CMD##_LAST_FIELD) - \
84 		   sizeof(attr->CMD##_LAST_FIELD)) != NULL
85 
86 #define BPF_MAP_CREATE_LAST_FIELD max_entries
87 /* called via syscall */
88 static int map_create(union bpf_attr *attr)
89 {
90 	struct bpf_map *map;
91 	int err;
92 
93 	err = CHECK_ATTR(BPF_MAP_CREATE);
94 	if (err)
95 		return -EINVAL;
96 
97 	/* find map type and init map: hashtable vs rbtree vs bloom vs ... */
98 	map = find_and_alloc_map(attr);
99 	if (IS_ERR(map))
100 		return PTR_ERR(map);
101 
102 	atomic_set(&map->refcnt, 1);
103 
104 	err = anon_inode_getfd("bpf-map", &bpf_map_fops, map, O_RDWR | O_CLOEXEC);
105 
106 	if (err < 0)
107 		/* failed to allocate fd */
108 		goto free_map;
109 
110 	return err;
111 
112 free_map:
113 	map->ops->map_free(map);
114 	return err;
115 }
116 
117 /* if error is returned, fd is released.
118  * On success caller should complete fd access with matching fdput()
119  */
120 struct bpf_map *bpf_map_get(struct fd f)
121 {
122 	struct bpf_map *map;
123 
124 	if (!f.file)
125 		return ERR_PTR(-EBADF);
126 
127 	if (f.file->f_op != &bpf_map_fops) {
128 		fdput(f);
129 		return ERR_PTR(-EINVAL);
130 	}
131 
132 	map = f.file->private_data;
133 
134 	return map;
135 }
136 
137 /* helper to convert user pointers passed inside __aligned_u64 fields */
138 static void __user *u64_to_ptr(__u64 val)
139 {
140 	return (void __user *) (unsigned long) val;
141 }
142 
143 /* last field in 'union bpf_attr' used by this command */
144 #define BPF_MAP_LOOKUP_ELEM_LAST_FIELD value
145 
146 static int map_lookup_elem(union bpf_attr *attr)
147 {
148 	void __user *ukey = u64_to_ptr(attr->key);
149 	void __user *uvalue = u64_to_ptr(attr->value);
150 	int ufd = attr->map_fd;
151 	struct fd f = fdget(ufd);
152 	struct bpf_map *map;
153 	void *key, *value;
154 	int err;
155 
156 	if (CHECK_ATTR(BPF_MAP_LOOKUP_ELEM))
157 		return -EINVAL;
158 
159 	map = bpf_map_get(f);
160 	if (IS_ERR(map))
161 		return PTR_ERR(map);
162 
163 	err = -ENOMEM;
164 	key = kmalloc(map->key_size, GFP_USER);
165 	if (!key)
166 		goto err_put;
167 
168 	err = -EFAULT;
169 	if (copy_from_user(key, ukey, map->key_size) != 0)
170 		goto free_key;
171 
172 	err = -ESRCH;
173 	rcu_read_lock();
174 	value = map->ops->map_lookup_elem(map, key);
175 	if (!value)
176 		goto err_unlock;
177 
178 	err = -EFAULT;
179 	if (copy_to_user(uvalue, value, map->value_size) != 0)
180 		goto err_unlock;
181 
182 	err = 0;
183 
184 err_unlock:
185 	rcu_read_unlock();
186 free_key:
187 	kfree(key);
188 err_put:
189 	fdput(f);
190 	return err;
191 }
192 
193 #define BPF_MAP_UPDATE_ELEM_LAST_FIELD value
194 
195 static int map_update_elem(union bpf_attr *attr)
196 {
197 	void __user *ukey = u64_to_ptr(attr->key);
198 	void __user *uvalue = u64_to_ptr(attr->value);
199 	int ufd = attr->map_fd;
200 	struct fd f = fdget(ufd);
201 	struct bpf_map *map;
202 	void *key, *value;
203 	int err;
204 
205 	if (CHECK_ATTR(BPF_MAP_UPDATE_ELEM))
206 		return -EINVAL;
207 
208 	map = bpf_map_get(f);
209 	if (IS_ERR(map))
210 		return PTR_ERR(map);
211 
212 	err = -ENOMEM;
213 	key = kmalloc(map->key_size, GFP_USER);
214 	if (!key)
215 		goto err_put;
216 
217 	err = -EFAULT;
218 	if (copy_from_user(key, ukey, map->key_size) != 0)
219 		goto free_key;
220 
221 	err = -ENOMEM;
222 	value = kmalloc(map->value_size, GFP_USER);
223 	if (!value)
224 		goto free_key;
225 
226 	err = -EFAULT;
227 	if (copy_from_user(value, uvalue, map->value_size) != 0)
228 		goto free_value;
229 
230 	/* eBPF program that use maps are running under rcu_read_lock(),
231 	 * therefore all map accessors rely on this fact, so do the same here
232 	 */
233 	rcu_read_lock();
234 	err = map->ops->map_update_elem(map, key, value);
235 	rcu_read_unlock();
236 
237 free_value:
238 	kfree(value);
239 free_key:
240 	kfree(key);
241 err_put:
242 	fdput(f);
243 	return err;
244 }
245 
246 #define BPF_MAP_DELETE_ELEM_LAST_FIELD key
247 
248 static int map_delete_elem(union bpf_attr *attr)
249 {
250 	void __user *ukey = u64_to_ptr(attr->key);
251 	int ufd = attr->map_fd;
252 	struct fd f = fdget(ufd);
253 	struct bpf_map *map;
254 	void *key;
255 	int err;
256 
257 	if (CHECK_ATTR(BPF_MAP_DELETE_ELEM))
258 		return -EINVAL;
259 
260 	map = bpf_map_get(f);
261 	if (IS_ERR(map))
262 		return PTR_ERR(map);
263 
264 	err = -ENOMEM;
265 	key = kmalloc(map->key_size, GFP_USER);
266 	if (!key)
267 		goto err_put;
268 
269 	err = -EFAULT;
270 	if (copy_from_user(key, ukey, map->key_size) != 0)
271 		goto free_key;
272 
273 	rcu_read_lock();
274 	err = map->ops->map_delete_elem(map, key);
275 	rcu_read_unlock();
276 
277 free_key:
278 	kfree(key);
279 err_put:
280 	fdput(f);
281 	return err;
282 }
283 
284 /* last field in 'union bpf_attr' used by this command */
285 #define BPF_MAP_GET_NEXT_KEY_LAST_FIELD next_key
286 
287 static int map_get_next_key(union bpf_attr *attr)
288 {
289 	void __user *ukey = u64_to_ptr(attr->key);
290 	void __user *unext_key = u64_to_ptr(attr->next_key);
291 	int ufd = attr->map_fd;
292 	struct fd f = fdget(ufd);
293 	struct bpf_map *map;
294 	void *key, *next_key;
295 	int err;
296 
297 	if (CHECK_ATTR(BPF_MAP_GET_NEXT_KEY))
298 		return -EINVAL;
299 
300 	map = bpf_map_get(f);
301 	if (IS_ERR(map))
302 		return PTR_ERR(map);
303 
304 	err = -ENOMEM;
305 	key = kmalloc(map->key_size, GFP_USER);
306 	if (!key)
307 		goto err_put;
308 
309 	err = -EFAULT;
310 	if (copy_from_user(key, ukey, map->key_size) != 0)
311 		goto free_key;
312 
313 	err = -ENOMEM;
314 	next_key = kmalloc(map->key_size, GFP_USER);
315 	if (!next_key)
316 		goto free_key;
317 
318 	rcu_read_lock();
319 	err = map->ops->map_get_next_key(map, key, next_key);
320 	rcu_read_unlock();
321 	if (err)
322 		goto free_next_key;
323 
324 	err = -EFAULT;
325 	if (copy_to_user(unext_key, next_key, map->key_size) != 0)
326 		goto free_next_key;
327 
328 	err = 0;
329 
330 free_next_key:
331 	kfree(next_key);
332 free_key:
333 	kfree(key);
334 err_put:
335 	fdput(f);
336 	return err;
337 }
338 
339 static LIST_HEAD(bpf_prog_types);
340 
341 static int find_prog_type(enum bpf_prog_type type, struct bpf_prog *prog)
342 {
343 	struct bpf_prog_type_list *tl;
344 
345 	list_for_each_entry(tl, &bpf_prog_types, list_node) {
346 		if (tl->type == type) {
347 			prog->aux->ops = tl->ops;
348 			prog->aux->prog_type = type;
349 			return 0;
350 		}
351 	}
352 	return -EINVAL;
353 }
354 
355 void bpf_register_prog_type(struct bpf_prog_type_list *tl)
356 {
357 	list_add(&tl->list_node, &bpf_prog_types);
358 }
359 
360 /* fixup insn->imm field of bpf_call instructions:
361  * if (insn->imm == BPF_FUNC_map_lookup_elem)
362  *      insn->imm = bpf_map_lookup_elem - __bpf_call_base;
363  * else if (insn->imm == BPF_FUNC_map_update_elem)
364  *      insn->imm = bpf_map_update_elem - __bpf_call_base;
365  * else ...
366  *
367  * this function is called after eBPF program passed verification
368  */
369 static void fixup_bpf_calls(struct bpf_prog *prog)
370 {
371 	const struct bpf_func_proto *fn;
372 	int i;
373 
374 	for (i = 0; i < prog->len; i++) {
375 		struct bpf_insn *insn = &prog->insnsi[i];
376 
377 		if (insn->code == (BPF_JMP | BPF_CALL)) {
378 			/* we reach here when program has bpf_call instructions
379 			 * and it passed bpf_check(), means that
380 			 * ops->get_func_proto must have been supplied, check it
381 			 */
382 			BUG_ON(!prog->aux->ops->get_func_proto);
383 
384 			fn = prog->aux->ops->get_func_proto(insn->imm);
385 			/* all functions that have prototype and verifier allowed
386 			 * programs to call them, must be real in-kernel functions
387 			 */
388 			BUG_ON(!fn->func);
389 			insn->imm = fn->func - __bpf_call_base;
390 		}
391 	}
392 }
393 
394 /* drop refcnt on maps used by eBPF program and free auxilary data */
395 static void free_used_maps(struct bpf_prog_aux *aux)
396 {
397 	int i;
398 
399 	for (i = 0; i < aux->used_map_cnt; i++)
400 		bpf_map_put(aux->used_maps[i]);
401 
402 	kfree(aux->used_maps);
403 }
404 
405 void bpf_prog_put(struct bpf_prog *prog)
406 {
407 	if (atomic_dec_and_test(&prog->aux->refcnt)) {
408 		free_used_maps(prog->aux);
409 		bpf_prog_free(prog);
410 	}
411 }
412 
413 static int bpf_prog_release(struct inode *inode, struct file *filp)
414 {
415 	struct bpf_prog *prog = filp->private_data;
416 
417 	bpf_prog_put(prog);
418 	return 0;
419 }
420 
421 static const struct file_operations bpf_prog_fops = {
422         .release = bpf_prog_release,
423 };
424 
425 static struct bpf_prog *get_prog(struct fd f)
426 {
427 	struct bpf_prog *prog;
428 
429 	if (!f.file)
430 		return ERR_PTR(-EBADF);
431 
432 	if (f.file->f_op != &bpf_prog_fops) {
433 		fdput(f);
434 		return ERR_PTR(-EINVAL);
435 	}
436 
437 	prog = f.file->private_data;
438 
439 	return prog;
440 }
441 
442 /* called by sockets/tracing/seccomp before attaching program to an event
443  * pairs with bpf_prog_put()
444  */
445 struct bpf_prog *bpf_prog_get(u32 ufd)
446 {
447 	struct fd f = fdget(ufd);
448 	struct bpf_prog *prog;
449 
450 	prog = get_prog(f);
451 
452 	if (IS_ERR(prog))
453 		return prog;
454 
455 	atomic_inc(&prog->aux->refcnt);
456 	fdput(f);
457 	return prog;
458 }
459 
460 /* last field in 'union bpf_attr' used by this command */
461 #define	BPF_PROG_LOAD_LAST_FIELD log_buf
462 
463 static int bpf_prog_load(union bpf_attr *attr)
464 {
465 	enum bpf_prog_type type = attr->prog_type;
466 	struct bpf_prog *prog;
467 	int err;
468 	char license[128];
469 	bool is_gpl;
470 
471 	if (CHECK_ATTR(BPF_PROG_LOAD))
472 		return -EINVAL;
473 
474 	/* copy eBPF program license from user space */
475 	if (strncpy_from_user(license, u64_to_ptr(attr->license),
476 			      sizeof(license) - 1) < 0)
477 		return -EFAULT;
478 	license[sizeof(license) - 1] = 0;
479 
480 	/* eBPF programs must be GPL compatible to use GPL-ed functions */
481 	is_gpl = license_is_gpl_compatible(license);
482 
483 	if (attr->insn_cnt >= BPF_MAXINSNS)
484 		return -EINVAL;
485 
486 	/* plain bpf_prog allocation */
487 	prog = bpf_prog_alloc(bpf_prog_size(attr->insn_cnt), GFP_USER);
488 	if (!prog)
489 		return -ENOMEM;
490 
491 	prog->len = attr->insn_cnt;
492 
493 	err = -EFAULT;
494 	if (copy_from_user(prog->insns, u64_to_ptr(attr->insns),
495 			   prog->len * sizeof(struct bpf_insn)) != 0)
496 		goto free_prog;
497 
498 	prog->orig_prog = NULL;
499 	prog->jited = false;
500 
501 	atomic_set(&prog->aux->refcnt, 1);
502 	prog->aux->is_gpl_compatible = is_gpl;
503 
504 	/* find program type: socket_filter vs tracing_filter */
505 	err = find_prog_type(type, prog);
506 	if (err < 0)
507 		goto free_prog;
508 
509 	/* run eBPF verifier */
510 	err = bpf_check(prog, attr);
511 
512 	if (err < 0)
513 		goto free_used_maps;
514 
515 	/* fixup BPF_CALL->imm field */
516 	fixup_bpf_calls(prog);
517 
518 	/* eBPF program is ready to be JITed */
519 	bpf_prog_select_runtime(prog);
520 
521 	err = anon_inode_getfd("bpf-prog", &bpf_prog_fops, prog, O_RDWR | O_CLOEXEC);
522 
523 	if (err < 0)
524 		/* failed to allocate fd */
525 		goto free_used_maps;
526 
527 	return err;
528 
529 free_used_maps:
530 	free_used_maps(prog->aux);
531 free_prog:
532 	bpf_prog_free(prog);
533 	return err;
534 }
535 
536 SYSCALL_DEFINE3(bpf, int, cmd, union bpf_attr __user *, uattr, unsigned int, size)
537 {
538 	union bpf_attr attr = {};
539 	int err;
540 
541 	/* the syscall is limited to root temporarily. This restriction will be
542 	 * lifted when security audit is clean. Note that eBPF+tracing must have
543 	 * this restriction, since it may pass kernel data to user space
544 	 */
545 	if (!capable(CAP_SYS_ADMIN))
546 		return -EPERM;
547 
548 	if (!access_ok(VERIFY_READ, uattr, 1))
549 		return -EFAULT;
550 
551 	if (size > PAGE_SIZE)	/* silly large */
552 		return -E2BIG;
553 
554 	/* If we're handed a bigger struct than we know of,
555 	 * ensure all the unknown bits are 0 - i.e. new
556 	 * user-space does not rely on any kernel feature
557 	 * extensions we dont know about yet.
558 	 */
559 	if (size > sizeof(attr)) {
560 		unsigned char __user *addr;
561 		unsigned char __user *end;
562 		unsigned char val;
563 
564 		addr = (void __user *)uattr + sizeof(attr);
565 		end  = (void __user *)uattr + size;
566 
567 		for (; addr < end; addr++) {
568 			err = get_user(val, addr);
569 			if (err)
570 				return err;
571 			if (val)
572 				return -E2BIG;
573 		}
574 		size = sizeof(attr);
575 	}
576 
577 	/* copy attributes from user space, may be less than sizeof(bpf_attr) */
578 	if (copy_from_user(&attr, uattr, size) != 0)
579 		return -EFAULT;
580 
581 	switch (cmd) {
582 	case BPF_MAP_CREATE:
583 		err = map_create(&attr);
584 		break;
585 	case BPF_MAP_LOOKUP_ELEM:
586 		err = map_lookup_elem(&attr);
587 		break;
588 	case BPF_MAP_UPDATE_ELEM:
589 		err = map_update_elem(&attr);
590 		break;
591 	case BPF_MAP_DELETE_ELEM:
592 		err = map_delete_elem(&attr);
593 		break;
594 	case BPF_MAP_GET_NEXT_KEY:
595 		err = map_get_next_key(&attr);
596 		break;
597 	case BPF_PROG_LOAD:
598 		err = bpf_prog_load(&attr);
599 		break;
600 	default:
601 		err = -EINVAL;
602 		break;
603 	}
604 
605 	return err;
606 }
607