xref: /openbmc/linux/net/bpf/test_run.c (revision d82a6c5ef9dc0aab296936e1aa4ad28fd5162a55)
1 // SPDX-License-Identifier: GPL-2.0-only
2 /* Copyright (c) 2017 Facebook
3  */
4 #include <linux/bpf.h>
5 #include <linux/btf.h>
6 #include <linux/btf_ids.h>
7 #include <linux/slab.h>
8 #include <linux/init.h>
9 #include <linux/vmalloc.h>
10 #include <linux/etherdevice.h>
11 #include <linux/filter.h>
12 #include <linux/rcupdate_trace.h>
13 #include <linux/sched/signal.h>
14 #include <net/bpf_sk_storage.h>
15 #include <net/sock.h>
16 #include <net/tcp.h>
17 #include <net/net_namespace.h>
18 #include <linux/error-injection.h>
19 #include <linux/smp.h>
20 #include <linux/sock_diag.h>
21 #include <net/xdp.h>
22 
23 #define CREATE_TRACE_POINTS
24 #include <trace/events/bpf_test_run.h>
25 
26 struct bpf_test_timer {
27 	enum { NO_PREEMPT, NO_MIGRATE } mode;
28 	u32 i;
29 	u64 time_start, time_spent;
30 };
31 
32 static void bpf_test_timer_enter(struct bpf_test_timer *t)
33 	__acquires(rcu)
34 {
35 	rcu_read_lock();
36 	if (t->mode == NO_PREEMPT)
37 		preempt_disable();
38 	else
39 		migrate_disable();
40 
41 	t->time_start = ktime_get_ns();
42 }
43 
44 static void bpf_test_timer_leave(struct bpf_test_timer *t)
45 	__releases(rcu)
46 {
47 	t->time_start = 0;
48 
49 	if (t->mode == NO_PREEMPT)
50 		preempt_enable();
51 	else
52 		migrate_enable();
53 	rcu_read_unlock();
54 }
55 
56 static bool bpf_test_timer_continue(struct bpf_test_timer *t, u32 repeat, int *err, u32 *duration)
57 	__must_hold(rcu)
58 {
59 	t->i++;
60 	if (t->i >= repeat) {
61 		/* We're done. */
62 		t->time_spent += ktime_get_ns() - t->time_start;
63 		do_div(t->time_spent, t->i);
64 		*duration = t->time_spent > U32_MAX ? U32_MAX : (u32)t->time_spent;
65 		*err = 0;
66 		goto reset;
67 	}
68 
69 	if (signal_pending(current)) {
70 		/* During iteration: we've been cancelled, abort. */
71 		*err = -EINTR;
72 		goto reset;
73 	}
74 
75 	if (need_resched()) {
76 		/* During iteration: we need to reschedule between runs. */
77 		t->time_spent += ktime_get_ns() - t->time_start;
78 		bpf_test_timer_leave(t);
79 		cond_resched();
80 		bpf_test_timer_enter(t);
81 	}
82 
83 	/* Do another round. */
84 	return true;
85 
86 reset:
87 	t->i = 0;
88 	return false;
89 }
90 
91 static int bpf_test_run(struct bpf_prog *prog, void *ctx, u32 repeat,
92 			u32 *retval, u32 *time, bool xdp)
93 {
94 	struct bpf_prog_array_item item = {.prog = prog};
95 	struct bpf_run_ctx *old_ctx;
96 	struct bpf_cg_run_ctx run_ctx;
97 	struct bpf_test_timer t = { NO_MIGRATE };
98 	enum bpf_cgroup_storage_type stype;
99 	int ret;
100 
101 	for_each_cgroup_storage_type(stype) {
102 		item.cgroup_storage[stype] = bpf_cgroup_storage_alloc(prog, stype);
103 		if (IS_ERR(item.cgroup_storage[stype])) {
104 			item.cgroup_storage[stype] = NULL;
105 			for_each_cgroup_storage_type(stype)
106 				bpf_cgroup_storage_free(item.cgroup_storage[stype]);
107 			return -ENOMEM;
108 		}
109 	}
110 
111 	if (!repeat)
112 		repeat = 1;
113 
114 	bpf_test_timer_enter(&t);
115 	old_ctx = bpf_set_run_ctx(&run_ctx.run_ctx);
116 	do {
117 		run_ctx.prog_item = &item;
118 		if (xdp)
119 			*retval = bpf_prog_run_xdp(prog, ctx);
120 		else
121 			*retval = bpf_prog_run(prog, ctx);
122 	} while (bpf_test_timer_continue(&t, repeat, &ret, time));
123 	bpf_reset_run_ctx(old_ctx);
124 	bpf_test_timer_leave(&t);
125 
126 	for_each_cgroup_storage_type(stype)
127 		bpf_cgroup_storage_free(item.cgroup_storage[stype]);
128 
129 	return ret;
130 }
131 
132 static int bpf_test_finish(const union bpf_attr *kattr,
133 			   union bpf_attr __user *uattr, const void *data,
134 			   struct skb_shared_info *sinfo, u32 size,
135 			   u32 retval, u32 duration)
136 {
137 	void __user *data_out = u64_to_user_ptr(kattr->test.data_out);
138 	int err = -EFAULT;
139 	u32 copy_size = size;
140 
141 	/* Clamp copy if the user has provided a size hint, but copy the full
142 	 * buffer if not to retain old behaviour.
143 	 */
144 	if (kattr->test.data_size_out &&
145 	    copy_size > kattr->test.data_size_out) {
146 		copy_size = kattr->test.data_size_out;
147 		err = -ENOSPC;
148 	}
149 
150 	if (data_out) {
151 		int len = sinfo ? copy_size - sinfo->xdp_frags_size : copy_size;
152 
153 		if (len < 0) {
154 			err = -ENOSPC;
155 			goto out;
156 		}
157 
158 		if (copy_to_user(data_out, data, len))
159 			goto out;
160 
161 		if (sinfo) {
162 			int i, offset = len;
163 			u32 data_len;
164 
165 			for (i = 0; i < sinfo->nr_frags; i++) {
166 				skb_frag_t *frag = &sinfo->frags[i];
167 
168 				if (offset >= copy_size) {
169 					err = -ENOSPC;
170 					break;
171 				}
172 
173 				data_len = min_t(u32, copy_size - offset,
174 						 skb_frag_size(frag));
175 
176 				if (copy_to_user(data_out + offset,
177 						 skb_frag_address(frag),
178 						 data_len))
179 					goto out;
180 
181 				offset += data_len;
182 			}
183 		}
184 	}
185 
186 	if (copy_to_user(&uattr->test.data_size_out, &size, sizeof(size)))
187 		goto out;
188 	if (copy_to_user(&uattr->test.retval, &retval, sizeof(retval)))
189 		goto out;
190 	if (copy_to_user(&uattr->test.duration, &duration, sizeof(duration)))
191 		goto out;
192 	if (err != -ENOSPC)
193 		err = 0;
194 out:
195 	trace_bpf_test_finish(&err);
196 	return err;
197 }
198 
199 /* Integer types of various sizes and pointer combinations cover variety of
200  * architecture dependent calling conventions. 7+ can be supported in the
201  * future.
202  */
203 __diag_push();
204 __diag_ignore(GCC, 8, "-Wmissing-prototypes",
205 	      "Global functions as their definitions will be in vmlinux BTF");
206 int noinline bpf_fentry_test1(int a)
207 {
208 	return a + 1;
209 }
210 EXPORT_SYMBOL_GPL(bpf_fentry_test1);
211 ALLOW_ERROR_INJECTION(bpf_fentry_test1, ERRNO);
212 
213 int noinline bpf_fentry_test2(int a, u64 b)
214 {
215 	return a + b;
216 }
217 
218 int noinline bpf_fentry_test3(char a, int b, u64 c)
219 {
220 	return a + b + c;
221 }
222 
223 int noinline bpf_fentry_test4(void *a, char b, int c, u64 d)
224 {
225 	return (long)a + b + c + d;
226 }
227 
228 int noinline bpf_fentry_test5(u64 a, void *b, short c, int d, u64 e)
229 {
230 	return a + (long)b + c + d + e;
231 }
232 
233 int noinline bpf_fentry_test6(u64 a, void *b, short c, int d, void *e, u64 f)
234 {
235 	return a + (long)b + c + d + (long)e + f;
236 }
237 
238 struct bpf_fentry_test_t {
239 	struct bpf_fentry_test_t *a;
240 };
241 
242 int noinline bpf_fentry_test7(struct bpf_fentry_test_t *arg)
243 {
244 	return (long)arg;
245 }
246 
247 int noinline bpf_fentry_test8(struct bpf_fentry_test_t *arg)
248 {
249 	return (long)arg->a;
250 }
251 
252 int noinline bpf_modify_return_test(int a, int *b)
253 {
254 	*b += 1;
255 	return a + *b;
256 }
257 
258 u64 noinline bpf_kfunc_call_test1(struct sock *sk, u32 a, u64 b, u32 c, u64 d)
259 {
260 	return a + b + c + d;
261 }
262 
263 int noinline bpf_kfunc_call_test2(struct sock *sk, u32 a, u32 b)
264 {
265 	return a + b;
266 }
267 
268 struct sock * noinline bpf_kfunc_call_test3(struct sock *sk)
269 {
270 	return sk;
271 }
272 
273 struct prog_test_ref_kfunc {
274 	int a;
275 	int b;
276 	struct prog_test_ref_kfunc *next;
277 };
278 
279 static struct prog_test_ref_kfunc prog_test_struct = {
280 	.a = 42,
281 	.b = 108,
282 	.next = &prog_test_struct,
283 };
284 
285 noinline struct prog_test_ref_kfunc *
286 bpf_kfunc_call_test_acquire(unsigned long *scalar_ptr)
287 {
288 	/* randomly return NULL */
289 	if (get_jiffies_64() % 2)
290 		return NULL;
291 	return &prog_test_struct;
292 }
293 
294 noinline void bpf_kfunc_call_test_release(struct prog_test_ref_kfunc *p)
295 {
296 }
297 
298 struct prog_test_pass1 {
299 	int x0;
300 	struct {
301 		int x1;
302 		struct {
303 			int x2;
304 			struct {
305 				int x3;
306 			};
307 		};
308 	};
309 };
310 
311 struct prog_test_pass2 {
312 	int len;
313 	short arr1[4];
314 	struct {
315 		char arr2[4];
316 		unsigned long arr3[8];
317 	} x;
318 };
319 
320 struct prog_test_fail1 {
321 	void *p;
322 	int x;
323 };
324 
325 struct prog_test_fail2 {
326 	int x8;
327 	struct prog_test_pass1 x;
328 };
329 
330 struct prog_test_fail3 {
331 	int len;
332 	char arr1[2];
333 	char arr2[];
334 };
335 
336 noinline void bpf_kfunc_call_test_pass_ctx(struct __sk_buff *skb)
337 {
338 }
339 
340 noinline void bpf_kfunc_call_test_pass1(struct prog_test_pass1 *p)
341 {
342 }
343 
344 noinline void bpf_kfunc_call_test_pass2(struct prog_test_pass2 *p)
345 {
346 }
347 
348 noinline void bpf_kfunc_call_test_fail1(struct prog_test_fail1 *p)
349 {
350 }
351 
352 noinline void bpf_kfunc_call_test_fail2(struct prog_test_fail2 *p)
353 {
354 }
355 
356 noinline void bpf_kfunc_call_test_fail3(struct prog_test_fail3 *p)
357 {
358 }
359 
360 noinline void bpf_kfunc_call_test_mem_len_pass1(void *mem, int mem__sz)
361 {
362 }
363 
364 noinline void bpf_kfunc_call_test_mem_len_fail1(void *mem, int len)
365 {
366 }
367 
368 noinline void bpf_kfunc_call_test_mem_len_fail2(u64 *mem, int len)
369 {
370 }
371 
372 __diag_pop();
373 
374 ALLOW_ERROR_INJECTION(bpf_modify_return_test, ERRNO);
375 
376 BTF_SET_START(test_sk_check_kfunc_ids)
377 BTF_ID(func, bpf_kfunc_call_test1)
378 BTF_ID(func, bpf_kfunc_call_test2)
379 BTF_ID(func, bpf_kfunc_call_test3)
380 BTF_ID(func, bpf_kfunc_call_test_acquire)
381 BTF_ID(func, bpf_kfunc_call_test_release)
382 BTF_ID(func, bpf_kfunc_call_test_pass_ctx)
383 BTF_ID(func, bpf_kfunc_call_test_pass1)
384 BTF_ID(func, bpf_kfunc_call_test_pass2)
385 BTF_ID(func, bpf_kfunc_call_test_fail1)
386 BTF_ID(func, bpf_kfunc_call_test_fail2)
387 BTF_ID(func, bpf_kfunc_call_test_fail3)
388 BTF_ID(func, bpf_kfunc_call_test_mem_len_pass1)
389 BTF_ID(func, bpf_kfunc_call_test_mem_len_fail1)
390 BTF_ID(func, bpf_kfunc_call_test_mem_len_fail2)
391 BTF_SET_END(test_sk_check_kfunc_ids)
392 
393 BTF_SET_START(test_sk_acquire_kfunc_ids)
394 BTF_ID(func, bpf_kfunc_call_test_acquire)
395 BTF_SET_END(test_sk_acquire_kfunc_ids)
396 
397 BTF_SET_START(test_sk_release_kfunc_ids)
398 BTF_ID(func, bpf_kfunc_call_test_release)
399 BTF_SET_END(test_sk_release_kfunc_ids)
400 
401 BTF_SET_START(test_sk_ret_null_kfunc_ids)
402 BTF_ID(func, bpf_kfunc_call_test_acquire)
403 BTF_SET_END(test_sk_ret_null_kfunc_ids)
404 
405 static void *bpf_test_init(const union bpf_attr *kattr, u32 user_size,
406 			   u32 size, u32 headroom, u32 tailroom)
407 {
408 	void __user *data_in = u64_to_user_ptr(kattr->test.data_in);
409 	void *data;
410 
411 	if (size < ETH_HLEN || size > PAGE_SIZE - headroom - tailroom)
412 		return ERR_PTR(-EINVAL);
413 
414 	if (user_size > size)
415 		return ERR_PTR(-EMSGSIZE);
416 
417 	data = kzalloc(size + headroom + tailroom, GFP_USER);
418 	if (!data)
419 		return ERR_PTR(-ENOMEM);
420 
421 	if (copy_from_user(data + headroom, data_in, user_size)) {
422 		kfree(data);
423 		return ERR_PTR(-EFAULT);
424 	}
425 
426 	return data;
427 }
428 
429 int bpf_prog_test_run_tracing(struct bpf_prog *prog,
430 			      const union bpf_attr *kattr,
431 			      union bpf_attr __user *uattr)
432 {
433 	struct bpf_fentry_test_t arg = {};
434 	u16 side_effect = 0, ret = 0;
435 	int b = 2, err = -EFAULT;
436 	u32 retval = 0;
437 
438 	if (kattr->test.flags || kattr->test.cpu)
439 		return -EINVAL;
440 
441 	switch (prog->expected_attach_type) {
442 	case BPF_TRACE_FENTRY:
443 	case BPF_TRACE_FEXIT:
444 		if (bpf_fentry_test1(1) != 2 ||
445 		    bpf_fentry_test2(2, 3) != 5 ||
446 		    bpf_fentry_test3(4, 5, 6) != 15 ||
447 		    bpf_fentry_test4((void *)7, 8, 9, 10) != 34 ||
448 		    bpf_fentry_test5(11, (void *)12, 13, 14, 15) != 65 ||
449 		    bpf_fentry_test6(16, (void *)17, 18, 19, (void *)20, 21) != 111 ||
450 		    bpf_fentry_test7((struct bpf_fentry_test_t *)0) != 0 ||
451 		    bpf_fentry_test8(&arg) != 0)
452 			goto out;
453 		break;
454 	case BPF_MODIFY_RETURN:
455 		ret = bpf_modify_return_test(1, &b);
456 		if (b != 2)
457 			side_effect = 1;
458 		break;
459 	default:
460 		goto out;
461 	}
462 
463 	retval = ((u32)side_effect << 16) | ret;
464 	if (copy_to_user(&uattr->test.retval, &retval, sizeof(retval)))
465 		goto out;
466 
467 	err = 0;
468 out:
469 	trace_bpf_test_finish(&err);
470 	return err;
471 }
472 
473 struct bpf_raw_tp_test_run_info {
474 	struct bpf_prog *prog;
475 	void *ctx;
476 	u32 retval;
477 };
478 
479 static void
480 __bpf_prog_test_run_raw_tp(void *data)
481 {
482 	struct bpf_raw_tp_test_run_info *info = data;
483 
484 	rcu_read_lock();
485 	info->retval = bpf_prog_run(info->prog, info->ctx);
486 	rcu_read_unlock();
487 }
488 
489 int bpf_prog_test_run_raw_tp(struct bpf_prog *prog,
490 			     const union bpf_attr *kattr,
491 			     union bpf_attr __user *uattr)
492 {
493 	void __user *ctx_in = u64_to_user_ptr(kattr->test.ctx_in);
494 	__u32 ctx_size_in = kattr->test.ctx_size_in;
495 	struct bpf_raw_tp_test_run_info info;
496 	int cpu = kattr->test.cpu, err = 0;
497 	int current_cpu;
498 
499 	/* doesn't support data_in/out, ctx_out, duration, or repeat */
500 	if (kattr->test.data_in || kattr->test.data_out ||
501 	    kattr->test.ctx_out || kattr->test.duration ||
502 	    kattr->test.repeat)
503 		return -EINVAL;
504 
505 	if (ctx_size_in < prog->aux->max_ctx_offset ||
506 	    ctx_size_in > MAX_BPF_FUNC_ARGS * sizeof(u64))
507 		return -EINVAL;
508 
509 	if ((kattr->test.flags & BPF_F_TEST_RUN_ON_CPU) == 0 && cpu != 0)
510 		return -EINVAL;
511 
512 	if (ctx_size_in) {
513 		info.ctx = memdup_user(ctx_in, ctx_size_in);
514 		if (IS_ERR(info.ctx))
515 			return PTR_ERR(info.ctx);
516 	} else {
517 		info.ctx = NULL;
518 	}
519 
520 	info.prog = prog;
521 
522 	current_cpu = get_cpu();
523 	if ((kattr->test.flags & BPF_F_TEST_RUN_ON_CPU) == 0 ||
524 	    cpu == current_cpu) {
525 		__bpf_prog_test_run_raw_tp(&info);
526 	} else if (cpu >= nr_cpu_ids || !cpu_online(cpu)) {
527 		/* smp_call_function_single() also checks cpu_online()
528 		 * after csd_lock(). However, since cpu is from user
529 		 * space, let's do an extra quick check to filter out
530 		 * invalid value before smp_call_function_single().
531 		 */
532 		err = -ENXIO;
533 	} else {
534 		err = smp_call_function_single(cpu, __bpf_prog_test_run_raw_tp,
535 					       &info, 1);
536 	}
537 	put_cpu();
538 
539 	if (!err &&
540 	    copy_to_user(&uattr->test.retval, &info.retval, sizeof(u32)))
541 		err = -EFAULT;
542 
543 	kfree(info.ctx);
544 	return err;
545 }
546 
547 static void *bpf_ctx_init(const union bpf_attr *kattr, u32 max_size)
548 {
549 	void __user *data_in = u64_to_user_ptr(kattr->test.ctx_in);
550 	void __user *data_out = u64_to_user_ptr(kattr->test.ctx_out);
551 	u32 size = kattr->test.ctx_size_in;
552 	void *data;
553 	int err;
554 
555 	if (!data_in && !data_out)
556 		return NULL;
557 
558 	data = kzalloc(max_size, GFP_USER);
559 	if (!data)
560 		return ERR_PTR(-ENOMEM);
561 
562 	if (data_in) {
563 		err = bpf_check_uarg_tail_zero(USER_BPFPTR(data_in), max_size, size);
564 		if (err) {
565 			kfree(data);
566 			return ERR_PTR(err);
567 		}
568 
569 		size = min_t(u32, max_size, size);
570 		if (copy_from_user(data, data_in, size)) {
571 			kfree(data);
572 			return ERR_PTR(-EFAULT);
573 		}
574 	}
575 	return data;
576 }
577 
578 static int bpf_ctx_finish(const union bpf_attr *kattr,
579 			  union bpf_attr __user *uattr, const void *data,
580 			  u32 size)
581 {
582 	void __user *data_out = u64_to_user_ptr(kattr->test.ctx_out);
583 	int err = -EFAULT;
584 	u32 copy_size = size;
585 
586 	if (!data || !data_out)
587 		return 0;
588 
589 	if (copy_size > kattr->test.ctx_size_out) {
590 		copy_size = kattr->test.ctx_size_out;
591 		err = -ENOSPC;
592 	}
593 
594 	if (copy_to_user(data_out, data, copy_size))
595 		goto out;
596 	if (copy_to_user(&uattr->test.ctx_size_out, &size, sizeof(size)))
597 		goto out;
598 	if (err != -ENOSPC)
599 		err = 0;
600 out:
601 	return err;
602 }
603 
604 /**
605  * range_is_zero - test whether buffer is initialized
606  * @buf: buffer to check
607  * @from: check from this position
608  * @to: check up until (excluding) this position
609  *
610  * This function returns true if the there is a non-zero byte
611  * in the buf in the range [from,to).
612  */
613 static inline bool range_is_zero(void *buf, size_t from, size_t to)
614 {
615 	return !memchr_inv((u8 *)buf + from, 0, to - from);
616 }
617 
618 static int convert___skb_to_skb(struct sk_buff *skb, struct __sk_buff *__skb)
619 {
620 	struct qdisc_skb_cb *cb = (struct qdisc_skb_cb *)skb->cb;
621 
622 	if (!__skb)
623 		return 0;
624 
625 	/* make sure the fields we don't use are zeroed */
626 	if (!range_is_zero(__skb, 0, offsetof(struct __sk_buff, mark)))
627 		return -EINVAL;
628 
629 	/* mark is allowed */
630 
631 	if (!range_is_zero(__skb, offsetofend(struct __sk_buff, mark),
632 			   offsetof(struct __sk_buff, priority)))
633 		return -EINVAL;
634 
635 	/* priority is allowed */
636 	/* ingress_ifindex is allowed */
637 	/* ifindex is allowed */
638 
639 	if (!range_is_zero(__skb, offsetofend(struct __sk_buff, ifindex),
640 			   offsetof(struct __sk_buff, cb)))
641 		return -EINVAL;
642 
643 	/* cb is allowed */
644 
645 	if (!range_is_zero(__skb, offsetofend(struct __sk_buff, cb),
646 			   offsetof(struct __sk_buff, tstamp)))
647 		return -EINVAL;
648 
649 	/* tstamp is allowed */
650 	/* wire_len is allowed */
651 	/* gso_segs is allowed */
652 
653 	if (!range_is_zero(__skb, offsetofend(struct __sk_buff, gso_segs),
654 			   offsetof(struct __sk_buff, gso_size)))
655 		return -EINVAL;
656 
657 	/* gso_size is allowed */
658 
659 	if (!range_is_zero(__skb, offsetofend(struct __sk_buff, gso_size),
660 			   offsetof(struct __sk_buff, hwtstamp)))
661 		return -EINVAL;
662 
663 	/* hwtstamp is allowed */
664 
665 	if (!range_is_zero(__skb, offsetofend(struct __sk_buff, hwtstamp),
666 			   sizeof(struct __sk_buff)))
667 		return -EINVAL;
668 
669 	skb->mark = __skb->mark;
670 	skb->priority = __skb->priority;
671 	skb->skb_iif = __skb->ingress_ifindex;
672 	skb->tstamp = __skb->tstamp;
673 	memcpy(&cb->data, __skb->cb, QDISC_CB_PRIV_LEN);
674 
675 	if (__skb->wire_len == 0) {
676 		cb->pkt_len = skb->len;
677 	} else {
678 		if (__skb->wire_len < skb->len ||
679 		    __skb->wire_len > GSO_MAX_SIZE)
680 			return -EINVAL;
681 		cb->pkt_len = __skb->wire_len;
682 	}
683 
684 	if (__skb->gso_segs > GSO_MAX_SEGS)
685 		return -EINVAL;
686 	skb_shinfo(skb)->gso_segs = __skb->gso_segs;
687 	skb_shinfo(skb)->gso_size = __skb->gso_size;
688 	skb_shinfo(skb)->hwtstamps.hwtstamp = __skb->hwtstamp;
689 
690 	return 0;
691 }
692 
693 static void convert_skb_to___skb(struct sk_buff *skb, struct __sk_buff *__skb)
694 {
695 	struct qdisc_skb_cb *cb = (struct qdisc_skb_cb *)skb->cb;
696 
697 	if (!__skb)
698 		return;
699 
700 	__skb->mark = skb->mark;
701 	__skb->priority = skb->priority;
702 	__skb->ingress_ifindex = skb->skb_iif;
703 	__skb->ifindex = skb->dev->ifindex;
704 	__skb->tstamp = skb->tstamp;
705 	memcpy(__skb->cb, &cb->data, QDISC_CB_PRIV_LEN);
706 	__skb->wire_len = cb->pkt_len;
707 	__skb->gso_segs = skb_shinfo(skb)->gso_segs;
708 	__skb->hwtstamp = skb_shinfo(skb)->hwtstamps.hwtstamp;
709 }
710 
711 static struct proto bpf_dummy_proto = {
712 	.name   = "bpf_dummy",
713 	.owner  = THIS_MODULE,
714 	.obj_size = sizeof(struct sock),
715 };
716 
717 int bpf_prog_test_run_skb(struct bpf_prog *prog, const union bpf_attr *kattr,
718 			  union bpf_attr __user *uattr)
719 {
720 	bool is_l2 = false, is_direct_pkt_access = false;
721 	struct net *net = current->nsproxy->net_ns;
722 	struct net_device *dev = net->loopback_dev;
723 	u32 size = kattr->test.data_size_in;
724 	u32 repeat = kattr->test.repeat;
725 	struct __sk_buff *ctx = NULL;
726 	u32 retval, duration;
727 	int hh_len = ETH_HLEN;
728 	struct sk_buff *skb;
729 	struct sock *sk;
730 	void *data;
731 	int ret;
732 
733 	if (kattr->test.flags || kattr->test.cpu)
734 		return -EINVAL;
735 
736 	data = bpf_test_init(kattr, kattr->test.data_size_in,
737 			     size, NET_SKB_PAD + NET_IP_ALIGN,
738 			     SKB_DATA_ALIGN(sizeof(struct skb_shared_info)));
739 	if (IS_ERR(data))
740 		return PTR_ERR(data);
741 
742 	ctx = bpf_ctx_init(kattr, sizeof(struct __sk_buff));
743 	if (IS_ERR(ctx)) {
744 		kfree(data);
745 		return PTR_ERR(ctx);
746 	}
747 
748 	switch (prog->type) {
749 	case BPF_PROG_TYPE_SCHED_CLS:
750 	case BPF_PROG_TYPE_SCHED_ACT:
751 		is_l2 = true;
752 		fallthrough;
753 	case BPF_PROG_TYPE_LWT_IN:
754 	case BPF_PROG_TYPE_LWT_OUT:
755 	case BPF_PROG_TYPE_LWT_XMIT:
756 		is_direct_pkt_access = true;
757 		break;
758 	default:
759 		break;
760 	}
761 
762 	sk = sk_alloc(net, AF_UNSPEC, GFP_USER, &bpf_dummy_proto, 1);
763 	if (!sk) {
764 		kfree(data);
765 		kfree(ctx);
766 		return -ENOMEM;
767 	}
768 	sock_init_data(NULL, sk);
769 
770 	skb = build_skb(data, 0);
771 	if (!skb) {
772 		kfree(data);
773 		kfree(ctx);
774 		sk_free(sk);
775 		return -ENOMEM;
776 	}
777 	skb->sk = sk;
778 
779 	skb_reserve(skb, NET_SKB_PAD + NET_IP_ALIGN);
780 	__skb_put(skb, size);
781 	if (ctx && ctx->ifindex > 1) {
782 		dev = dev_get_by_index(net, ctx->ifindex);
783 		if (!dev) {
784 			ret = -ENODEV;
785 			goto out;
786 		}
787 	}
788 	skb->protocol = eth_type_trans(skb, dev);
789 	skb_reset_network_header(skb);
790 
791 	switch (skb->protocol) {
792 	case htons(ETH_P_IP):
793 		sk->sk_family = AF_INET;
794 		if (sizeof(struct iphdr) <= skb_headlen(skb)) {
795 			sk->sk_rcv_saddr = ip_hdr(skb)->saddr;
796 			sk->sk_daddr = ip_hdr(skb)->daddr;
797 		}
798 		break;
799 #if IS_ENABLED(CONFIG_IPV6)
800 	case htons(ETH_P_IPV6):
801 		sk->sk_family = AF_INET6;
802 		if (sizeof(struct ipv6hdr) <= skb_headlen(skb)) {
803 			sk->sk_v6_rcv_saddr = ipv6_hdr(skb)->saddr;
804 			sk->sk_v6_daddr = ipv6_hdr(skb)->daddr;
805 		}
806 		break;
807 #endif
808 	default:
809 		break;
810 	}
811 
812 	if (is_l2)
813 		__skb_push(skb, hh_len);
814 	if (is_direct_pkt_access)
815 		bpf_compute_data_pointers(skb);
816 	ret = convert___skb_to_skb(skb, ctx);
817 	if (ret)
818 		goto out;
819 	ret = bpf_test_run(prog, skb, repeat, &retval, &duration, false);
820 	if (ret)
821 		goto out;
822 	if (!is_l2) {
823 		if (skb_headroom(skb) < hh_len) {
824 			int nhead = HH_DATA_ALIGN(hh_len - skb_headroom(skb));
825 
826 			if (pskb_expand_head(skb, nhead, 0, GFP_USER)) {
827 				ret = -ENOMEM;
828 				goto out;
829 			}
830 		}
831 		memset(__skb_push(skb, hh_len), 0, hh_len);
832 	}
833 	convert_skb_to___skb(skb, ctx);
834 
835 	size = skb->len;
836 	/* bpf program can never convert linear skb to non-linear */
837 	if (WARN_ON_ONCE(skb_is_nonlinear(skb)))
838 		size = skb_headlen(skb);
839 	ret = bpf_test_finish(kattr, uattr, skb->data, NULL, size, retval,
840 			      duration);
841 	if (!ret)
842 		ret = bpf_ctx_finish(kattr, uattr, ctx,
843 				     sizeof(struct __sk_buff));
844 out:
845 	if (dev && dev != net->loopback_dev)
846 		dev_put(dev);
847 	kfree_skb(skb);
848 	sk_free(sk);
849 	kfree(ctx);
850 	return ret;
851 }
852 
853 static int xdp_convert_md_to_buff(struct xdp_md *xdp_md, struct xdp_buff *xdp)
854 {
855 	unsigned int ingress_ifindex, rx_queue_index;
856 	struct netdev_rx_queue *rxqueue;
857 	struct net_device *device;
858 
859 	if (!xdp_md)
860 		return 0;
861 
862 	if (xdp_md->egress_ifindex != 0)
863 		return -EINVAL;
864 
865 	ingress_ifindex = xdp_md->ingress_ifindex;
866 	rx_queue_index = xdp_md->rx_queue_index;
867 
868 	if (!ingress_ifindex && rx_queue_index)
869 		return -EINVAL;
870 
871 	if (ingress_ifindex) {
872 		device = dev_get_by_index(current->nsproxy->net_ns,
873 					  ingress_ifindex);
874 		if (!device)
875 			return -ENODEV;
876 
877 		if (rx_queue_index >= device->real_num_rx_queues)
878 			goto free_dev;
879 
880 		rxqueue = __netif_get_rx_queue(device, rx_queue_index);
881 
882 		if (!xdp_rxq_info_is_reg(&rxqueue->xdp_rxq))
883 			goto free_dev;
884 
885 		xdp->rxq = &rxqueue->xdp_rxq;
886 		/* The device is now tracked in the xdp->rxq for later
887 		 * dev_put()
888 		 */
889 	}
890 
891 	xdp->data = xdp->data_meta + xdp_md->data;
892 	return 0;
893 
894 free_dev:
895 	dev_put(device);
896 	return -EINVAL;
897 }
898 
899 static void xdp_convert_buff_to_md(struct xdp_buff *xdp, struct xdp_md *xdp_md)
900 {
901 	if (!xdp_md)
902 		return;
903 
904 	xdp_md->data = xdp->data - xdp->data_meta;
905 	xdp_md->data_end = xdp->data_end - xdp->data_meta;
906 
907 	if (xdp_md->ingress_ifindex)
908 		dev_put(xdp->rxq->dev);
909 }
910 
911 int bpf_prog_test_run_xdp(struct bpf_prog *prog, const union bpf_attr *kattr,
912 			  union bpf_attr __user *uattr)
913 {
914 	u32 tailroom = SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
915 	u32 size = kattr->test.data_size_in;
916 	u32 headroom = XDP_PACKET_HEADROOM;
917 	u32 retval, duration, max_data_sz;
918 	u32 repeat = kattr->test.repeat;
919 	struct netdev_rx_queue *rxqueue;
920 	struct skb_shared_info *sinfo;
921 	struct xdp_buff xdp = {};
922 	int i, ret = -EINVAL;
923 	struct xdp_md *ctx;
924 	void *data;
925 
926 	if (prog->expected_attach_type == BPF_XDP_DEVMAP ||
927 	    prog->expected_attach_type == BPF_XDP_CPUMAP)
928 		return -EINVAL;
929 
930 	ctx = bpf_ctx_init(kattr, sizeof(struct xdp_md));
931 	if (IS_ERR(ctx))
932 		return PTR_ERR(ctx);
933 
934 	if (ctx) {
935 		/* There can't be user provided data before the meta data */
936 		if (ctx->data_meta || ctx->data_end != size ||
937 		    ctx->data > ctx->data_end ||
938 		    unlikely(xdp_metalen_invalid(ctx->data)))
939 			goto free_ctx;
940 		/* Meta data is allocated from the headroom */
941 		headroom -= ctx->data;
942 	}
943 
944 	max_data_sz = 4096 - headroom - tailroom;
945 	size = min_t(u32, size, max_data_sz);
946 
947 	data = bpf_test_init(kattr, size, max_data_sz, headroom, tailroom);
948 	if (IS_ERR(data)) {
949 		ret = PTR_ERR(data);
950 		goto free_ctx;
951 	}
952 
953 	rxqueue = __netif_get_rx_queue(current->nsproxy->net_ns->loopback_dev, 0);
954 	rxqueue->xdp_rxq.frag_size = headroom + max_data_sz + tailroom;
955 	xdp_init_buff(&xdp, rxqueue->xdp_rxq.frag_size, &rxqueue->xdp_rxq);
956 	xdp_prepare_buff(&xdp, data, headroom, size, true);
957 	sinfo = xdp_get_shared_info_from_buff(&xdp);
958 
959 	ret = xdp_convert_md_to_buff(ctx, &xdp);
960 	if (ret)
961 		goto free_data;
962 
963 	if (unlikely(kattr->test.data_size_in > size)) {
964 		void __user *data_in = u64_to_user_ptr(kattr->test.data_in);
965 
966 		while (size < kattr->test.data_size_in) {
967 			struct page *page;
968 			skb_frag_t *frag;
969 			u32 data_len;
970 
971 			if (sinfo->nr_frags == MAX_SKB_FRAGS) {
972 				ret = -ENOMEM;
973 				goto out;
974 			}
975 
976 			page = alloc_page(GFP_KERNEL);
977 			if (!page) {
978 				ret = -ENOMEM;
979 				goto out;
980 			}
981 
982 			frag = &sinfo->frags[sinfo->nr_frags++];
983 			__skb_frag_set_page(frag, page);
984 
985 			data_len = min_t(u32, kattr->test.data_size_in - size,
986 					 PAGE_SIZE);
987 			skb_frag_size_set(frag, data_len);
988 
989 			if (copy_from_user(page_address(page), data_in + size,
990 					   data_len)) {
991 				ret = -EFAULT;
992 				goto out;
993 			}
994 			sinfo->xdp_frags_size += data_len;
995 			size += data_len;
996 		}
997 		xdp_buff_set_frags_flag(&xdp);
998 	}
999 
1000 	if (repeat > 1)
1001 		bpf_prog_change_xdp(NULL, prog);
1002 
1003 	ret = bpf_test_run(prog, &xdp, repeat, &retval, &duration, true);
1004 	/* We convert the xdp_buff back to an xdp_md before checking the return
1005 	 * code so the reference count of any held netdevice will be decremented
1006 	 * even if the test run failed.
1007 	 */
1008 	xdp_convert_buff_to_md(&xdp, ctx);
1009 	if (ret)
1010 		goto out;
1011 
1012 	size = xdp.data_end - xdp.data_meta + sinfo->xdp_frags_size;
1013 	ret = bpf_test_finish(kattr, uattr, xdp.data_meta, sinfo, size,
1014 			      retval, duration);
1015 	if (!ret)
1016 		ret = bpf_ctx_finish(kattr, uattr, ctx,
1017 				     sizeof(struct xdp_md));
1018 
1019 out:
1020 	if (repeat > 1)
1021 		bpf_prog_change_xdp(prog, NULL);
1022 free_data:
1023 	for (i = 0; i < sinfo->nr_frags; i++)
1024 		__free_page(skb_frag_page(&sinfo->frags[i]));
1025 	kfree(data);
1026 free_ctx:
1027 	kfree(ctx);
1028 	return ret;
1029 }
1030 
1031 static int verify_user_bpf_flow_keys(struct bpf_flow_keys *ctx)
1032 {
1033 	/* make sure the fields we don't use are zeroed */
1034 	if (!range_is_zero(ctx, 0, offsetof(struct bpf_flow_keys, flags)))
1035 		return -EINVAL;
1036 
1037 	/* flags is allowed */
1038 
1039 	if (!range_is_zero(ctx, offsetofend(struct bpf_flow_keys, flags),
1040 			   sizeof(struct bpf_flow_keys)))
1041 		return -EINVAL;
1042 
1043 	return 0;
1044 }
1045 
1046 int bpf_prog_test_run_flow_dissector(struct bpf_prog *prog,
1047 				     const union bpf_attr *kattr,
1048 				     union bpf_attr __user *uattr)
1049 {
1050 	struct bpf_test_timer t = { NO_PREEMPT };
1051 	u32 size = kattr->test.data_size_in;
1052 	struct bpf_flow_dissector ctx = {};
1053 	u32 repeat = kattr->test.repeat;
1054 	struct bpf_flow_keys *user_ctx;
1055 	struct bpf_flow_keys flow_keys;
1056 	const struct ethhdr *eth;
1057 	unsigned int flags = 0;
1058 	u32 retval, duration;
1059 	void *data;
1060 	int ret;
1061 
1062 	if (prog->type != BPF_PROG_TYPE_FLOW_DISSECTOR)
1063 		return -EINVAL;
1064 
1065 	if (kattr->test.flags || kattr->test.cpu)
1066 		return -EINVAL;
1067 
1068 	if (size < ETH_HLEN)
1069 		return -EINVAL;
1070 
1071 	data = bpf_test_init(kattr, kattr->test.data_size_in, size, 0, 0);
1072 	if (IS_ERR(data))
1073 		return PTR_ERR(data);
1074 
1075 	eth = (struct ethhdr *)data;
1076 
1077 	if (!repeat)
1078 		repeat = 1;
1079 
1080 	user_ctx = bpf_ctx_init(kattr, sizeof(struct bpf_flow_keys));
1081 	if (IS_ERR(user_ctx)) {
1082 		kfree(data);
1083 		return PTR_ERR(user_ctx);
1084 	}
1085 	if (user_ctx) {
1086 		ret = verify_user_bpf_flow_keys(user_ctx);
1087 		if (ret)
1088 			goto out;
1089 		flags = user_ctx->flags;
1090 	}
1091 
1092 	ctx.flow_keys = &flow_keys;
1093 	ctx.data = data;
1094 	ctx.data_end = (__u8 *)data + size;
1095 
1096 	bpf_test_timer_enter(&t);
1097 	do {
1098 		retval = bpf_flow_dissect(prog, &ctx, eth->h_proto, ETH_HLEN,
1099 					  size, flags);
1100 	} while (bpf_test_timer_continue(&t, repeat, &ret, &duration));
1101 	bpf_test_timer_leave(&t);
1102 
1103 	if (ret < 0)
1104 		goto out;
1105 
1106 	ret = bpf_test_finish(kattr, uattr, &flow_keys, NULL,
1107 			      sizeof(flow_keys), retval, duration);
1108 	if (!ret)
1109 		ret = bpf_ctx_finish(kattr, uattr, user_ctx,
1110 				     sizeof(struct bpf_flow_keys));
1111 
1112 out:
1113 	kfree(user_ctx);
1114 	kfree(data);
1115 	return ret;
1116 }
1117 
1118 int bpf_prog_test_run_sk_lookup(struct bpf_prog *prog, const union bpf_attr *kattr,
1119 				union bpf_attr __user *uattr)
1120 {
1121 	struct bpf_test_timer t = { NO_PREEMPT };
1122 	struct bpf_prog_array *progs = NULL;
1123 	struct bpf_sk_lookup_kern ctx = {};
1124 	u32 repeat = kattr->test.repeat;
1125 	struct bpf_sk_lookup *user_ctx;
1126 	u32 retval, duration;
1127 	int ret = -EINVAL;
1128 
1129 	if (prog->type != BPF_PROG_TYPE_SK_LOOKUP)
1130 		return -EINVAL;
1131 
1132 	if (kattr->test.flags || kattr->test.cpu)
1133 		return -EINVAL;
1134 
1135 	if (kattr->test.data_in || kattr->test.data_size_in || kattr->test.data_out ||
1136 	    kattr->test.data_size_out)
1137 		return -EINVAL;
1138 
1139 	if (!repeat)
1140 		repeat = 1;
1141 
1142 	user_ctx = bpf_ctx_init(kattr, sizeof(*user_ctx));
1143 	if (IS_ERR(user_ctx))
1144 		return PTR_ERR(user_ctx);
1145 
1146 	if (!user_ctx)
1147 		return -EINVAL;
1148 
1149 	if (user_ctx->sk)
1150 		goto out;
1151 
1152 	if (!range_is_zero(user_ctx, offsetofend(typeof(*user_ctx), local_port), sizeof(*user_ctx)))
1153 		goto out;
1154 
1155 	if (user_ctx->local_port > U16_MAX) {
1156 		ret = -ERANGE;
1157 		goto out;
1158 	}
1159 
1160 	ctx.family = (u16)user_ctx->family;
1161 	ctx.protocol = (u16)user_ctx->protocol;
1162 	ctx.dport = (u16)user_ctx->local_port;
1163 	ctx.sport = user_ctx->remote_port;
1164 
1165 	switch (ctx.family) {
1166 	case AF_INET:
1167 		ctx.v4.daddr = (__force __be32)user_ctx->local_ip4;
1168 		ctx.v4.saddr = (__force __be32)user_ctx->remote_ip4;
1169 		break;
1170 
1171 #if IS_ENABLED(CONFIG_IPV6)
1172 	case AF_INET6:
1173 		ctx.v6.daddr = (struct in6_addr *)user_ctx->local_ip6;
1174 		ctx.v6.saddr = (struct in6_addr *)user_ctx->remote_ip6;
1175 		break;
1176 #endif
1177 
1178 	default:
1179 		ret = -EAFNOSUPPORT;
1180 		goto out;
1181 	}
1182 
1183 	progs = bpf_prog_array_alloc(1, GFP_KERNEL);
1184 	if (!progs) {
1185 		ret = -ENOMEM;
1186 		goto out;
1187 	}
1188 
1189 	progs->items[0].prog = prog;
1190 
1191 	bpf_test_timer_enter(&t);
1192 	do {
1193 		ctx.selected_sk = NULL;
1194 		retval = BPF_PROG_SK_LOOKUP_RUN_ARRAY(progs, ctx, bpf_prog_run);
1195 	} while (bpf_test_timer_continue(&t, repeat, &ret, &duration));
1196 	bpf_test_timer_leave(&t);
1197 
1198 	if (ret < 0)
1199 		goto out;
1200 
1201 	user_ctx->cookie = 0;
1202 	if (ctx.selected_sk) {
1203 		if (ctx.selected_sk->sk_reuseport && !ctx.no_reuseport) {
1204 			ret = -EOPNOTSUPP;
1205 			goto out;
1206 		}
1207 
1208 		user_ctx->cookie = sock_gen_cookie(ctx.selected_sk);
1209 	}
1210 
1211 	ret = bpf_test_finish(kattr, uattr, NULL, NULL, 0, retval, duration);
1212 	if (!ret)
1213 		ret = bpf_ctx_finish(kattr, uattr, user_ctx, sizeof(*user_ctx));
1214 
1215 out:
1216 	bpf_prog_array_free(progs);
1217 	kfree(user_ctx);
1218 	return ret;
1219 }
1220 
1221 int bpf_prog_test_run_syscall(struct bpf_prog *prog,
1222 			      const union bpf_attr *kattr,
1223 			      union bpf_attr __user *uattr)
1224 {
1225 	void __user *ctx_in = u64_to_user_ptr(kattr->test.ctx_in);
1226 	__u32 ctx_size_in = kattr->test.ctx_size_in;
1227 	void *ctx = NULL;
1228 	u32 retval;
1229 	int err = 0;
1230 
1231 	/* doesn't support data_in/out, ctx_out, duration, or repeat or flags */
1232 	if (kattr->test.data_in || kattr->test.data_out ||
1233 	    kattr->test.ctx_out || kattr->test.duration ||
1234 	    kattr->test.repeat || kattr->test.flags)
1235 		return -EINVAL;
1236 
1237 	if (ctx_size_in < prog->aux->max_ctx_offset ||
1238 	    ctx_size_in > U16_MAX)
1239 		return -EINVAL;
1240 
1241 	if (ctx_size_in) {
1242 		ctx = memdup_user(ctx_in, ctx_size_in);
1243 		if (IS_ERR(ctx))
1244 			return PTR_ERR(ctx);
1245 	}
1246 
1247 	rcu_read_lock_trace();
1248 	retval = bpf_prog_run_pin_on_cpu(prog, ctx);
1249 	rcu_read_unlock_trace();
1250 
1251 	if (copy_to_user(&uattr->test.retval, &retval, sizeof(u32))) {
1252 		err = -EFAULT;
1253 		goto out;
1254 	}
1255 	if (ctx_size_in)
1256 		if (copy_to_user(ctx_in, ctx, ctx_size_in))
1257 			err = -EFAULT;
1258 out:
1259 	kfree(ctx);
1260 	return err;
1261 }
1262 
1263 static const struct btf_kfunc_id_set bpf_prog_test_kfunc_set = {
1264 	.owner        = THIS_MODULE,
1265 	.check_set    = &test_sk_check_kfunc_ids,
1266 	.acquire_set  = &test_sk_acquire_kfunc_ids,
1267 	.release_set  = &test_sk_release_kfunc_ids,
1268 	.ret_null_set = &test_sk_ret_null_kfunc_ids,
1269 };
1270 
1271 static int __init bpf_prog_test_run_init(void)
1272 {
1273 	return register_btf_kfunc_id_set(BPF_PROG_TYPE_SCHED_CLS, &bpf_prog_test_kfunc_set);
1274 }
1275 late_initcall(bpf_prog_test_run_init);
1276