xref: /openbmc/linux/net/core/filter.c (revision f9900dd0)
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3  * Linux Socket Filter - Kernel level socket filtering
4  *
5  * Based on the design of the Berkeley Packet Filter. The new
6  * internal format has been designed by PLUMgrid:
7  *
8  *	Copyright (c) 2011 - 2014 PLUMgrid, http://plumgrid.com
9  *
10  * Authors:
11  *
12  *	Jay Schulist <jschlst@samba.org>
13  *	Alexei Starovoitov <ast@plumgrid.com>
14  *	Daniel Borkmann <dborkman@redhat.com>
15  *
16  * Andi Kleen - Fix a few bad bugs and races.
17  * Kris Katterjohn - Added many additional checks in bpf_check_classic()
18  */
19 
20 #include <linux/atomic.h>
21 #include <linux/module.h>
22 #include <linux/types.h>
23 #include <linux/mm.h>
24 #include <linux/fcntl.h>
25 #include <linux/socket.h>
26 #include <linux/sock_diag.h>
27 #include <linux/in.h>
28 #include <linux/inet.h>
29 #include <linux/netdevice.h>
30 #include <linux/if_packet.h>
31 #include <linux/if_arp.h>
32 #include <linux/gfp.h>
33 #include <net/inet_common.h>
34 #include <net/ip.h>
35 #include <net/protocol.h>
36 #include <net/netlink.h>
37 #include <linux/skbuff.h>
38 #include <linux/skmsg.h>
39 #include <net/sock.h>
40 #include <net/flow_dissector.h>
41 #include <linux/errno.h>
42 #include <linux/timer.h>
43 #include <linux/uaccess.h>
44 #include <asm/unaligned.h>
45 #include <linux/filter.h>
46 #include <linux/ratelimit.h>
47 #include <linux/seccomp.h>
48 #include <linux/if_vlan.h>
49 #include <linux/bpf.h>
50 #include <linux/btf.h>
51 #include <net/sch_generic.h>
52 #include <net/cls_cgroup.h>
53 #include <net/dst_metadata.h>
54 #include <net/dst.h>
55 #include <net/sock_reuseport.h>
56 #include <net/busy_poll.h>
57 #include <net/tcp.h>
58 #include <net/xfrm.h>
59 #include <net/udp.h>
60 #include <linux/bpf_trace.h>
61 #include <net/xdp_sock.h>
62 #include <linux/inetdevice.h>
63 #include <net/inet_hashtables.h>
64 #include <net/inet6_hashtables.h>
65 #include <net/ip_fib.h>
66 #include <net/nexthop.h>
67 #include <net/flow.h>
68 #include <net/arp.h>
69 #include <net/ipv6.h>
70 #include <net/net_namespace.h>
71 #include <linux/seg6_local.h>
72 #include <net/seg6.h>
73 #include <net/seg6_local.h>
74 #include <net/lwtunnel.h>
75 #include <net/ipv6_stubs.h>
76 #include <net/bpf_sk_storage.h>
77 #include <net/transp_v6.h>
78 #include <linux/btf_ids.h>
79 #include <net/tls.h>
80 #include <net/xdp.h>
81 
82 static const struct bpf_func_proto *
83 bpf_sk_base_func_proto(enum bpf_func_id func_id);
84 
85 int copy_bpf_fprog_from_user(struct sock_fprog *dst, sockptr_t src, int len)
86 {
87 	if (in_compat_syscall()) {
88 		struct compat_sock_fprog f32;
89 
90 		if (len != sizeof(f32))
91 			return -EINVAL;
92 		if (copy_from_sockptr(&f32, src, sizeof(f32)))
93 			return -EFAULT;
94 		memset(dst, 0, sizeof(*dst));
95 		dst->len = f32.len;
96 		dst->filter = compat_ptr(f32.filter);
97 	} else {
98 		if (len != sizeof(*dst))
99 			return -EINVAL;
100 		if (copy_from_sockptr(dst, src, sizeof(*dst)))
101 			return -EFAULT;
102 	}
103 
104 	return 0;
105 }
106 EXPORT_SYMBOL_GPL(copy_bpf_fprog_from_user);
107 
108 /**
109  *	sk_filter_trim_cap - run a packet through a socket filter
110  *	@sk: sock associated with &sk_buff
111  *	@skb: buffer to filter
112  *	@cap: limit on how short the eBPF program may trim the packet
113  *
114  * Run the eBPF program and then cut skb->data to correct size returned by
115  * the program. If pkt_len is 0 we toss packet. If skb->len is smaller
116  * than pkt_len we keep whole skb->data. This is the socket level
117  * wrapper to bpf_prog_run. It returns 0 if the packet should
118  * be accepted or -EPERM if the packet should be tossed.
119  *
120  */
121 int sk_filter_trim_cap(struct sock *sk, struct sk_buff *skb, unsigned int cap)
122 {
123 	int err;
124 	struct sk_filter *filter;
125 
126 	/*
127 	 * If the skb was allocated from pfmemalloc reserves, only
128 	 * allow SOCK_MEMALLOC sockets to use it as this socket is
129 	 * helping free memory
130 	 */
131 	if (skb_pfmemalloc(skb) && !sock_flag(sk, SOCK_MEMALLOC)) {
132 		NET_INC_STATS(sock_net(sk), LINUX_MIB_PFMEMALLOCDROP);
133 		return -ENOMEM;
134 	}
135 	err = BPF_CGROUP_RUN_PROG_INET_INGRESS(sk, skb);
136 	if (err)
137 		return err;
138 
139 	err = security_sock_rcv_skb(sk, skb);
140 	if (err)
141 		return err;
142 
143 	rcu_read_lock();
144 	filter = rcu_dereference(sk->sk_filter);
145 	if (filter) {
146 		struct sock *save_sk = skb->sk;
147 		unsigned int pkt_len;
148 
149 		skb->sk = sk;
150 		pkt_len = bpf_prog_run_save_cb(filter->prog, skb);
151 		skb->sk = save_sk;
152 		err = pkt_len ? pskb_trim(skb, max(cap, pkt_len)) : -EPERM;
153 	}
154 	rcu_read_unlock();
155 
156 	return err;
157 }
158 EXPORT_SYMBOL(sk_filter_trim_cap);
159 
160 BPF_CALL_1(bpf_skb_get_pay_offset, struct sk_buff *, skb)
161 {
162 	return skb_get_poff(skb);
163 }
164 
165 BPF_CALL_3(bpf_skb_get_nlattr, struct sk_buff *, skb, u32, a, u32, x)
166 {
167 	struct nlattr *nla;
168 
169 	if (skb_is_nonlinear(skb))
170 		return 0;
171 
172 	if (skb->len < sizeof(struct nlattr))
173 		return 0;
174 
175 	if (a > skb->len - sizeof(struct nlattr))
176 		return 0;
177 
178 	nla = nla_find((struct nlattr *) &skb->data[a], skb->len - a, x);
179 	if (nla)
180 		return (void *) nla - (void *) skb->data;
181 
182 	return 0;
183 }
184 
185 BPF_CALL_3(bpf_skb_get_nlattr_nest, struct sk_buff *, skb, u32, a, u32, x)
186 {
187 	struct nlattr *nla;
188 
189 	if (skb_is_nonlinear(skb))
190 		return 0;
191 
192 	if (skb->len < sizeof(struct nlattr))
193 		return 0;
194 
195 	if (a > skb->len - sizeof(struct nlattr))
196 		return 0;
197 
198 	nla = (struct nlattr *) &skb->data[a];
199 	if (nla->nla_len > skb->len - a)
200 		return 0;
201 
202 	nla = nla_find_nested(nla, x);
203 	if (nla)
204 		return (void *) nla - (void *) skb->data;
205 
206 	return 0;
207 }
208 
209 BPF_CALL_4(bpf_skb_load_helper_8, const struct sk_buff *, skb, const void *,
210 	   data, int, headlen, int, offset)
211 {
212 	u8 tmp, *ptr;
213 	const int len = sizeof(tmp);
214 
215 	if (offset >= 0) {
216 		if (headlen - offset >= len)
217 			return *(u8 *)(data + offset);
218 		if (!skb_copy_bits(skb, offset, &tmp, sizeof(tmp)))
219 			return tmp;
220 	} else {
221 		ptr = bpf_internal_load_pointer_neg_helper(skb, offset, len);
222 		if (likely(ptr))
223 			return *(u8 *)ptr;
224 	}
225 
226 	return -EFAULT;
227 }
228 
229 BPF_CALL_2(bpf_skb_load_helper_8_no_cache, const struct sk_buff *, skb,
230 	   int, offset)
231 {
232 	return ____bpf_skb_load_helper_8(skb, skb->data, skb->len - skb->data_len,
233 					 offset);
234 }
235 
236 BPF_CALL_4(bpf_skb_load_helper_16, const struct sk_buff *, skb, const void *,
237 	   data, int, headlen, int, offset)
238 {
239 	u16 tmp, *ptr;
240 	const int len = sizeof(tmp);
241 
242 	if (offset >= 0) {
243 		if (headlen - offset >= len)
244 			return get_unaligned_be16(data + offset);
245 		if (!skb_copy_bits(skb, offset, &tmp, sizeof(tmp)))
246 			return be16_to_cpu(tmp);
247 	} else {
248 		ptr = bpf_internal_load_pointer_neg_helper(skb, offset, len);
249 		if (likely(ptr))
250 			return get_unaligned_be16(ptr);
251 	}
252 
253 	return -EFAULT;
254 }
255 
256 BPF_CALL_2(bpf_skb_load_helper_16_no_cache, const struct sk_buff *, skb,
257 	   int, offset)
258 {
259 	return ____bpf_skb_load_helper_16(skb, skb->data, skb->len - skb->data_len,
260 					  offset);
261 }
262 
263 BPF_CALL_4(bpf_skb_load_helper_32, const struct sk_buff *, skb, const void *,
264 	   data, int, headlen, int, offset)
265 {
266 	u32 tmp, *ptr;
267 	const int len = sizeof(tmp);
268 
269 	if (likely(offset >= 0)) {
270 		if (headlen - offset >= len)
271 			return get_unaligned_be32(data + offset);
272 		if (!skb_copy_bits(skb, offset, &tmp, sizeof(tmp)))
273 			return be32_to_cpu(tmp);
274 	} else {
275 		ptr = bpf_internal_load_pointer_neg_helper(skb, offset, len);
276 		if (likely(ptr))
277 			return get_unaligned_be32(ptr);
278 	}
279 
280 	return -EFAULT;
281 }
282 
283 BPF_CALL_2(bpf_skb_load_helper_32_no_cache, const struct sk_buff *, skb,
284 	   int, offset)
285 {
286 	return ____bpf_skb_load_helper_32(skb, skb->data, skb->len - skb->data_len,
287 					  offset);
288 }
289 
290 static u32 convert_skb_access(int skb_field, int dst_reg, int src_reg,
291 			      struct bpf_insn *insn_buf)
292 {
293 	struct bpf_insn *insn = insn_buf;
294 
295 	switch (skb_field) {
296 	case SKF_AD_MARK:
297 		BUILD_BUG_ON(sizeof_field(struct sk_buff, mark) != 4);
298 
299 		*insn++ = BPF_LDX_MEM(BPF_W, dst_reg, src_reg,
300 				      offsetof(struct sk_buff, mark));
301 		break;
302 
303 	case SKF_AD_PKTTYPE:
304 		*insn++ = BPF_LDX_MEM(BPF_B, dst_reg, src_reg, PKT_TYPE_OFFSET);
305 		*insn++ = BPF_ALU32_IMM(BPF_AND, dst_reg, PKT_TYPE_MAX);
306 #ifdef __BIG_ENDIAN_BITFIELD
307 		*insn++ = BPF_ALU32_IMM(BPF_RSH, dst_reg, 5);
308 #endif
309 		break;
310 
311 	case SKF_AD_QUEUE:
312 		BUILD_BUG_ON(sizeof_field(struct sk_buff, queue_mapping) != 2);
313 
314 		*insn++ = BPF_LDX_MEM(BPF_H, dst_reg, src_reg,
315 				      offsetof(struct sk_buff, queue_mapping));
316 		break;
317 
318 	case SKF_AD_VLAN_TAG:
319 		BUILD_BUG_ON(sizeof_field(struct sk_buff, vlan_tci) != 2);
320 
321 		/* dst_reg = *(u16 *) (src_reg + offsetof(vlan_tci)) */
322 		*insn++ = BPF_LDX_MEM(BPF_H, dst_reg, src_reg,
323 				      offsetof(struct sk_buff, vlan_tci));
324 		break;
325 	case SKF_AD_VLAN_TAG_PRESENT:
326 		*insn++ = BPF_LDX_MEM(BPF_B, dst_reg, src_reg, PKT_VLAN_PRESENT_OFFSET);
327 		if (PKT_VLAN_PRESENT_BIT)
328 			*insn++ = BPF_ALU32_IMM(BPF_RSH, dst_reg, PKT_VLAN_PRESENT_BIT);
329 		if (PKT_VLAN_PRESENT_BIT < 7)
330 			*insn++ = BPF_ALU32_IMM(BPF_AND, dst_reg, 1);
331 		break;
332 	}
333 
334 	return insn - insn_buf;
335 }
336 
337 static bool convert_bpf_extensions(struct sock_filter *fp,
338 				   struct bpf_insn **insnp)
339 {
340 	struct bpf_insn *insn = *insnp;
341 	u32 cnt;
342 
343 	switch (fp->k) {
344 	case SKF_AD_OFF + SKF_AD_PROTOCOL:
345 		BUILD_BUG_ON(sizeof_field(struct sk_buff, protocol) != 2);
346 
347 		/* A = *(u16 *) (CTX + offsetof(protocol)) */
348 		*insn++ = BPF_LDX_MEM(BPF_H, BPF_REG_A, BPF_REG_CTX,
349 				      offsetof(struct sk_buff, protocol));
350 		/* A = ntohs(A) [emitting a nop or swap16] */
351 		*insn = BPF_ENDIAN(BPF_FROM_BE, BPF_REG_A, 16);
352 		break;
353 
354 	case SKF_AD_OFF + SKF_AD_PKTTYPE:
355 		cnt = convert_skb_access(SKF_AD_PKTTYPE, BPF_REG_A, BPF_REG_CTX, insn);
356 		insn += cnt - 1;
357 		break;
358 
359 	case SKF_AD_OFF + SKF_AD_IFINDEX:
360 	case SKF_AD_OFF + SKF_AD_HATYPE:
361 		BUILD_BUG_ON(sizeof_field(struct net_device, ifindex) != 4);
362 		BUILD_BUG_ON(sizeof_field(struct net_device, type) != 2);
363 
364 		*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, dev),
365 				      BPF_REG_TMP, BPF_REG_CTX,
366 				      offsetof(struct sk_buff, dev));
367 		/* if (tmp != 0) goto pc + 1 */
368 		*insn++ = BPF_JMP_IMM(BPF_JNE, BPF_REG_TMP, 0, 1);
369 		*insn++ = BPF_EXIT_INSN();
370 		if (fp->k == SKF_AD_OFF + SKF_AD_IFINDEX)
371 			*insn = BPF_LDX_MEM(BPF_W, BPF_REG_A, BPF_REG_TMP,
372 					    offsetof(struct net_device, ifindex));
373 		else
374 			*insn = BPF_LDX_MEM(BPF_H, BPF_REG_A, BPF_REG_TMP,
375 					    offsetof(struct net_device, type));
376 		break;
377 
378 	case SKF_AD_OFF + SKF_AD_MARK:
379 		cnt = convert_skb_access(SKF_AD_MARK, BPF_REG_A, BPF_REG_CTX, insn);
380 		insn += cnt - 1;
381 		break;
382 
383 	case SKF_AD_OFF + SKF_AD_RXHASH:
384 		BUILD_BUG_ON(sizeof_field(struct sk_buff, hash) != 4);
385 
386 		*insn = BPF_LDX_MEM(BPF_W, BPF_REG_A, BPF_REG_CTX,
387 				    offsetof(struct sk_buff, hash));
388 		break;
389 
390 	case SKF_AD_OFF + SKF_AD_QUEUE:
391 		cnt = convert_skb_access(SKF_AD_QUEUE, BPF_REG_A, BPF_REG_CTX, insn);
392 		insn += cnt - 1;
393 		break;
394 
395 	case SKF_AD_OFF + SKF_AD_VLAN_TAG:
396 		cnt = convert_skb_access(SKF_AD_VLAN_TAG,
397 					 BPF_REG_A, BPF_REG_CTX, insn);
398 		insn += cnt - 1;
399 		break;
400 
401 	case SKF_AD_OFF + SKF_AD_VLAN_TAG_PRESENT:
402 		cnt = convert_skb_access(SKF_AD_VLAN_TAG_PRESENT,
403 					 BPF_REG_A, BPF_REG_CTX, insn);
404 		insn += cnt - 1;
405 		break;
406 
407 	case SKF_AD_OFF + SKF_AD_VLAN_TPID:
408 		BUILD_BUG_ON(sizeof_field(struct sk_buff, vlan_proto) != 2);
409 
410 		/* A = *(u16 *) (CTX + offsetof(vlan_proto)) */
411 		*insn++ = BPF_LDX_MEM(BPF_H, BPF_REG_A, BPF_REG_CTX,
412 				      offsetof(struct sk_buff, vlan_proto));
413 		/* A = ntohs(A) [emitting a nop or swap16] */
414 		*insn = BPF_ENDIAN(BPF_FROM_BE, BPF_REG_A, 16);
415 		break;
416 
417 	case SKF_AD_OFF + SKF_AD_PAY_OFFSET:
418 	case SKF_AD_OFF + SKF_AD_NLATTR:
419 	case SKF_AD_OFF + SKF_AD_NLATTR_NEST:
420 	case SKF_AD_OFF + SKF_AD_CPU:
421 	case SKF_AD_OFF + SKF_AD_RANDOM:
422 		/* arg1 = CTX */
423 		*insn++ = BPF_MOV64_REG(BPF_REG_ARG1, BPF_REG_CTX);
424 		/* arg2 = A */
425 		*insn++ = BPF_MOV64_REG(BPF_REG_ARG2, BPF_REG_A);
426 		/* arg3 = X */
427 		*insn++ = BPF_MOV64_REG(BPF_REG_ARG3, BPF_REG_X);
428 		/* Emit call(arg1=CTX, arg2=A, arg3=X) */
429 		switch (fp->k) {
430 		case SKF_AD_OFF + SKF_AD_PAY_OFFSET:
431 			*insn = BPF_EMIT_CALL(bpf_skb_get_pay_offset);
432 			break;
433 		case SKF_AD_OFF + SKF_AD_NLATTR:
434 			*insn = BPF_EMIT_CALL(bpf_skb_get_nlattr);
435 			break;
436 		case SKF_AD_OFF + SKF_AD_NLATTR_NEST:
437 			*insn = BPF_EMIT_CALL(bpf_skb_get_nlattr_nest);
438 			break;
439 		case SKF_AD_OFF + SKF_AD_CPU:
440 			*insn = BPF_EMIT_CALL(bpf_get_raw_cpu_id);
441 			break;
442 		case SKF_AD_OFF + SKF_AD_RANDOM:
443 			*insn = BPF_EMIT_CALL(bpf_user_rnd_u32);
444 			bpf_user_rnd_init_once();
445 			break;
446 		}
447 		break;
448 
449 	case SKF_AD_OFF + SKF_AD_ALU_XOR_X:
450 		/* A ^= X */
451 		*insn = BPF_ALU32_REG(BPF_XOR, BPF_REG_A, BPF_REG_X);
452 		break;
453 
454 	default:
455 		/* This is just a dummy call to avoid letting the compiler
456 		 * evict __bpf_call_base() as an optimization. Placed here
457 		 * where no-one bothers.
458 		 */
459 		BUG_ON(__bpf_call_base(0, 0, 0, 0, 0) != 0);
460 		return false;
461 	}
462 
463 	*insnp = insn;
464 	return true;
465 }
466 
467 static bool convert_bpf_ld_abs(struct sock_filter *fp, struct bpf_insn **insnp)
468 {
469 	const bool unaligned_ok = IS_BUILTIN(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS);
470 	int size = bpf_size_to_bytes(BPF_SIZE(fp->code));
471 	bool endian = BPF_SIZE(fp->code) == BPF_H ||
472 		      BPF_SIZE(fp->code) == BPF_W;
473 	bool indirect = BPF_MODE(fp->code) == BPF_IND;
474 	const int ip_align = NET_IP_ALIGN;
475 	struct bpf_insn *insn = *insnp;
476 	int offset = fp->k;
477 
478 	if (!indirect &&
479 	    ((unaligned_ok && offset >= 0) ||
480 	     (!unaligned_ok && offset >= 0 &&
481 	      offset + ip_align >= 0 &&
482 	      offset + ip_align % size == 0))) {
483 		bool ldx_off_ok = offset <= S16_MAX;
484 
485 		*insn++ = BPF_MOV64_REG(BPF_REG_TMP, BPF_REG_H);
486 		if (offset)
487 			*insn++ = BPF_ALU64_IMM(BPF_SUB, BPF_REG_TMP, offset);
488 		*insn++ = BPF_JMP_IMM(BPF_JSLT, BPF_REG_TMP,
489 				      size, 2 + endian + (!ldx_off_ok * 2));
490 		if (ldx_off_ok) {
491 			*insn++ = BPF_LDX_MEM(BPF_SIZE(fp->code), BPF_REG_A,
492 					      BPF_REG_D, offset);
493 		} else {
494 			*insn++ = BPF_MOV64_REG(BPF_REG_TMP, BPF_REG_D);
495 			*insn++ = BPF_ALU64_IMM(BPF_ADD, BPF_REG_TMP, offset);
496 			*insn++ = BPF_LDX_MEM(BPF_SIZE(fp->code), BPF_REG_A,
497 					      BPF_REG_TMP, 0);
498 		}
499 		if (endian)
500 			*insn++ = BPF_ENDIAN(BPF_FROM_BE, BPF_REG_A, size * 8);
501 		*insn++ = BPF_JMP_A(8);
502 	}
503 
504 	*insn++ = BPF_MOV64_REG(BPF_REG_ARG1, BPF_REG_CTX);
505 	*insn++ = BPF_MOV64_REG(BPF_REG_ARG2, BPF_REG_D);
506 	*insn++ = BPF_MOV64_REG(BPF_REG_ARG3, BPF_REG_H);
507 	if (!indirect) {
508 		*insn++ = BPF_MOV64_IMM(BPF_REG_ARG4, offset);
509 	} else {
510 		*insn++ = BPF_MOV64_REG(BPF_REG_ARG4, BPF_REG_X);
511 		if (fp->k)
512 			*insn++ = BPF_ALU64_IMM(BPF_ADD, BPF_REG_ARG4, offset);
513 	}
514 
515 	switch (BPF_SIZE(fp->code)) {
516 	case BPF_B:
517 		*insn++ = BPF_EMIT_CALL(bpf_skb_load_helper_8);
518 		break;
519 	case BPF_H:
520 		*insn++ = BPF_EMIT_CALL(bpf_skb_load_helper_16);
521 		break;
522 	case BPF_W:
523 		*insn++ = BPF_EMIT_CALL(bpf_skb_load_helper_32);
524 		break;
525 	default:
526 		return false;
527 	}
528 
529 	*insn++ = BPF_JMP_IMM(BPF_JSGE, BPF_REG_A, 0, 2);
530 	*insn++ = BPF_ALU32_REG(BPF_XOR, BPF_REG_A, BPF_REG_A);
531 	*insn   = BPF_EXIT_INSN();
532 
533 	*insnp = insn;
534 	return true;
535 }
536 
537 /**
538  *	bpf_convert_filter - convert filter program
539  *	@prog: the user passed filter program
540  *	@len: the length of the user passed filter program
541  *	@new_prog: allocated 'struct bpf_prog' or NULL
542  *	@new_len: pointer to store length of converted program
543  *	@seen_ld_abs: bool whether we've seen ld_abs/ind
544  *
545  * Remap 'sock_filter' style classic BPF (cBPF) instruction set to 'bpf_insn'
546  * style extended BPF (eBPF).
547  * Conversion workflow:
548  *
549  * 1) First pass for calculating the new program length:
550  *   bpf_convert_filter(old_prog, old_len, NULL, &new_len, &seen_ld_abs)
551  *
552  * 2) 2nd pass to remap in two passes: 1st pass finds new
553  *    jump offsets, 2nd pass remapping:
554  *   bpf_convert_filter(old_prog, old_len, new_prog, &new_len, &seen_ld_abs)
555  */
556 static int bpf_convert_filter(struct sock_filter *prog, int len,
557 			      struct bpf_prog *new_prog, int *new_len,
558 			      bool *seen_ld_abs)
559 {
560 	int new_flen = 0, pass = 0, target, i, stack_off;
561 	struct bpf_insn *new_insn, *first_insn = NULL;
562 	struct sock_filter *fp;
563 	int *addrs = NULL;
564 	u8 bpf_src;
565 
566 	BUILD_BUG_ON(BPF_MEMWORDS * sizeof(u32) > MAX_BPF_STACK);
567 	BUILD_BUG_ON(BPF_REG_FP + 1 != MAX_BPF_REG);
568 
569 	if (len <= 0 || len > BPF_MAXINSNS)
570 		return -EINVAL;
571 
572 	if (new_prog) {
573 		first_insn = new_prog->insnsi;
574 		addrs = kcalloc(len, sizeof(*addrs),
575 				GFP_KERNEL | __GFP_NOWARN);
576 		if (!addrs)
577 			return -ENOMEM;
578 	}
579 
580 do_pass:
581 	new_insn = first_insn;
582 	fp = prog;
583 
584 	/* Classic BPF related prologue emission. */
585 	if (new_prog) {
586 		/* Classic BPF expects A and X to be reset first. These need
587 		 * to be guaranteed to be the first two instructions.
588 		 */
589 		*new_insn++ = BPF_ALU32_REG(BPF_XOR, BPF_REG_A, BPF_REG_A);
590 		*new_insn++ = BPF_ALU32_REG(BPF_XOR, BPF_REG_X, BPF_REG_X);
591 
592 		/* All programs must keep CTX in callee saved BPF_REG_CTX.
593 		 * In eBPF case it's done by the compiler, here we need to
594 		 * do this ourself. Initial CTX is present in BPF_REG_ARG1.
595 		 */
596 		*new_insn++ = BPF_MOV64_REG(BPF_REG_CTX, BPF_REG_ARG1);
597 		if (*seen_ld_abs) {
598 			/* For packet access in classic BPF, cache skb->data
599 			 * in callee-saved BPF R8 and skb->len - skb->data_len
600 			 * (headlen) in BPF R9. Since classic BPF is read-only
601 			 * on CTX, we only need to cache it once.
602 			 */
603 			*new_insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, data),
604 						  BPF_REG_D, BPF_REG_CTX,
605 						  offsetof(struct sk_buff, data));
606 			*new_insn++ = BPF_LDX_MEM(BPF_W, BPF_REG_H, BPF_REG_CTX,
607 						  offsetof(struct sk_buff, len));
608 			*new_insn++ = BPF_LDX_MEM(BPF_W, BPF_REG_TMP, BPF_REG_CTX,
609 						  offsetof(struct sk_buff, data_len));
610 			*new_insn++ = BPF_ALU32_REG(BPF_SUB, BPF_REG_H, BPF_REG_TMP);
611 		}
612 	} else {
613 		new_insn += 3;
614 	}
615 
616 	for (i = 0; i < len; fp++, i++) {
617 		struct bpf_insn tmp_insns[32] = { };
618 		struct bpf_insn *insn = tmp_insns;
619 
620 		if (addrs)
621 			addrs[i] = new_insn - first_insn;
622 
623 		switch (fp->code) {
624 		/* All arithmetic insns and skb loads map as-is. */
625 		case BPF_ALU | BPF_ADD | BPF_X:
626 		case BPF_ALU | BPF_ADD | BPF_K:
627 		case BPF_ALU | BPF_SUB | BPF_X:
628 		case BPF_ALU | BPF_SUB | BPF_K:
629 		case BPF_ALU | BPF_AND | BPF_X:
630 		case BPF_ALU | BPF_AND | BPF_K:
631 		case BPF_ALU | BPF_OR | BPF_X:
632 		case BPF_ALU | BPF_OR | BPF_K:
633 		case BPF_ALU | BPF_LSH | BPF_X:
634 		case BPF_ALU | BPF_LSH | BPF_K:
635 		case BPF_ALU | BPF_RSH | BPF_X:
636 		case BPF_ALU | BPF_RSH | BPF_K:
637 		case BPF_ALU | BPF_XOR | BPF_X:
638 		case BPF_ALU | BPF_XOR | BPF_K:
639 		case BPF_ALU | BPF_MUL | BPF_X:
640 		case BPF_ALU | BPF_MUL | BPF_K:
641 		case BPF_ALU | BPF_DIV | BPF_X:
642 		case BPF_ALU | BPF_DIV | BPF_K:
643 		case BPF_ALU | BPF_MOD | BPF_X:
644 		case BPF_ALU | BPF_MOD | BPF_K:
645 		case BPF_ALU | BPF_NEG:
646 		case BPF_LD | BPF_ABS | BPF_W:
647 		case BPF_LD | BPF_ABS | BPF_H:
648 		case BPF_LD | BPF_ABS | BPF_B:
649 		case BPF_LD | BPF_IND | BPF_W:
650 		case BPF_LD | BPF_IND | BPF_H:
651 		case BPF_LD | BPF_IND | BPF_B:
652 			/* Check for overloaded BPF extension and
653 			 * directly convert it if found, otherwise
654 			 * just move on with mapping.
655 			 */
656 			if (BPF_CLASS(fp->code) == BPF_LD &&
657 			    BPF_MODE(fp->code) == BPF_ABS &&
658 			    convert_bpf_extensions(fp, &insn))
659 				break;
660 			if (BPF_CLASS(fp->code) == BPF_LD &&
661 			    convert_bpf_ld_abs(fp, &insn)) {
662 				*seen_ld_abs = true;
663 				break;
664 			}
665 
666 			if (fp->code == (BPF_ALU | BPF_DIV | BPF_X) ||
667 			    fp->code == (BPF_ALU | BPF_MOD | BPF_X)) {
668 				*insn++ = BPF_MOV32_REG(BPF_REG_X, BPF_REG_X);
669 				/* Error with exception code on div/mod by 0.
670 				 * For cBPF programs, this was always return 0.
671 				 */
672 				*insn++ = BPF_JMP_IMM(BPF_JNE, BPF_REG_X, 0, 2);
673 				*insn++ = BPF_ALU32_REG(BPF_XOR, BPF_REG_A, BPF_REG_A);
674 				*insn++ = BPF_EXIT_INSN();
675 			}
676 
677 			*insn = BPF_RAW_INSN(fp->code, BPF_REG_A, BPF_REG_X, 0, fp->k);
678 			break;
679 
680 		/* Jump transformation cannot use BPF block macros
681 		 * everywhere as offset calculation and target updates
682 		 * require a bit more work than the rest, i.e. jump
683 		 * opcodes map as-is, but offsets need adjustment.
684 		 */
685 
686 #define BPF_EMIT_JMP							\
687 	do {								\
688 		const s32 off_min = S16_MIN, off_max = S16_MAX;		\
689 		s32 off;						\
690 									\
691 		if (target >= len || target < 0)			\
692 			goto err;					\
693 		off = addrs ? addrs[target] - addrs[i] - 1 : 0;		\
694 		/* Adjust pc relative offset for 2nd or 3rd insn. */	\
695 		off -= insn - tmp_insns;				\
696 		/* Reject anything not fitting into insn->off. */	\
697 		if (off < off_min || off > off_max)			\
698 			goto err;					\
699 		insn->off = off;					\
700 	} while (0)
701 
702 		case BPF_JMP | BPF_JA:
703 			target = i + fp->k + 1;
704 			insn->code = fp->code;
705 			BPF_EMIT_JMP;
706 			break;
707 
708 		case BPF_JMP | BPF_JEQ | BPF_K:
709 		case BPF_JMP | BPF_JEQ | BPF_X:
710 		case BPF_JMP | BPF_JSET | BPF_K:
711 		case BPF_JMP | BPF_JSET | BPF_X:
712 		case BPF_JMP | BPF_JGT | BPF_K:
713 		case BPF_JMP | BPF_JGT | BPF_X:
714 		case BPF_JMP | BPF_JGE | BPF_K:
715 		case BPF_JMP | BPF_JGE | BPF_X:
716 			if (BPF_SRC(fp->code) == BPF_K && (int) fp->k < 0) {
717 				/* BPF immediates are signed, zero extend
718 				 * immediate into tmp register and use it
719 				 * in compare insn.
720 				 */
721 				*insn++ = BPF_MOV32_IMM(BPF_REG_TMP, fp->k);
722 
723 				insn->dst_reg = BPF_REG_A;
724 				insn->src_reg = BPF_REG_TMP;
725 				bpf_src = BPF_X;
726 			} else {
727 				insn->dst_reg = BPF_REG_A;
728 				insn->imm = fp->k;
729 				bpf_src = BPF_SRC(fp->code);
730 				insn->src_reg = bpf_src == BPF_X ? BPF_REG_X : 0;
731 			}
732 
733 			/* Common case where 'jump_false' is next insn. */
734 			if (fp->jf == 0) {
735 				insn->code = BPF_JMP | BPF_OP(fp->code) | bpf_src;
736 				target = i + fp->jt + 1;
737 				BPF_EMIT_JMP;
738 				break;
739 			}
740 
741 			/* Convert some jumps when 'jump_true' is next insn. */
742 			if (fp->jt == 0) {
743 				switch (BPF_OP(fp->code)) {
744 				case BPF_JEQ:
745 					insn->code = BPF_JMP | BPF_JNE | bpf_src;
746 					break;
747 				case BPF_JGT:
748 					insn->code = BPF_JMP | BPF_JLE | bpf_src;
749 					break;
750 				case BPF_JGE:
751 					insn->code = BPF_JMP | BPF_JLT | bpf_src;
752 					break;
753 				default:
754 					goto jmp_rest;
755 				}
756 
757 				target = i + fp->jf + 1;
758 				BPF_EMIT_JMP;
759 				break;
760 			}
761 jmp_rest:
762 			/* Other jumps are mapped into two insns: Jxx and JA. */
763 			target = i + fp->jt + 1;
764 			insn->code = BPF_JMP | BPF_OP(fp->code) | bpf_src;
765 			BPF_EMIT_JMP;
766 			insn++;
767 
768 			insn->code = BPF_JMP | BPF_JA;
769 			target = i + fp->jf + 1;
770 			BPF_EMIT_JMP;
771 			break;
772 
773 		/* ldxb 4 * ([14] & 0xf) is remaped into 6 insns. */
774 		case BPF_LDX | BPF_MSH | BPF_B: {
775 			struct sock_filter tmp = {
776 				.code	= BPF_LD | BPF_ABS | BPF_B,
777 				.k	= fp->k,
778 			};
779 
780 			*seen_ld_abs = true;
781 
782 			/* X = A */
783 			*insn++ = BPF_MOV64_REG(BPF_REG_X, BPF_REG_A);
784 			/* A = BPF_R0 = *(u8 *) (skb->data + K) */
785 			convert_bpf_ld_abs(&tmp, &insn);
786 			insn++;
787 			/* A &= 0xf */
788 			*insn++ = BPF_ALU32_IMM(BPF_AND, BPF_REG_A, 0xf);
789 			/* A <<= 2 */
790 			*insn++ = BPF_ALU32_IMM(BPF_LSH, BPF_REG_A, 2);
791 			/* tmp = X */
792 			*insn++ = BPF_MOV64_REG(BPF_REG_TMP, BPF_REG_X);
793 			/* X = A */
794 			*insn++ = BPF_MOV64_REG(BPF_REG_X, BPF_REG_A);
795 			/* A = tmp */
796 			*insn = BPF_MOV64_REG(BPF_REG_A, BPF_REG_TMP);
797 			break;
798 		}
799 		/* RET_K is remaped into 2 insns. RET_A case doesn't need an
800 		 * extra mov as BPF_REG_0 is already mapped into BPF_REG_A.
801 		 */
802 		case BPF_RET | BPF_A:
803 		case BPF_RET | BPF_K:
804 			if (BPF_RVAL(fp->code) == BPF_K)
805 				*insn++ = BPF_MOV32_RAW(BPF_K, BPF_REG_0,
806 							0, fp->k);
807 			*insn = BPF_EXIT_INSN();
808 			break;
809 
810 		/* Store to stack. */
811 		case BPF_ST:
812 		case BPF_STX:
813 			stack_off = fp->k * 4  + 4;
814 			*insn = BPF_STX_MEM(BPF_W, BPF_REG_FP, BPF_CLASS(fp->code) ==
815 					    BPF_ST ? BPF_REG_A : BPF_REG_X,
816 					    -stack_off);
817 			/* check_load_and_stores() verifies that classic BPF can
818 			 * load from stack only after write, so tracking
819 			 * stack_depth for ST|STX insns is enough
820 			 */
821 			if (new_prog && new_prog->aux->stack_depth < stack_off)
822 				new_prog->aux->stack_depth = stack_off;
823 			break;
824 
825 		/* Load from stack. */
826 		case BPF_LD | BPF_MEM:
827 		case BPF_LDX | BPF_MEM:
828 			stack_off = fp->k * 4  + 4;
829 			*insn = BPF_LDX_MEM(BPF_W, BPF_CLASS(fp->code) == BPF_LD  ?
830 					    BPF_REG_A : BPF_REG_X, BPF_REG_FP,
831 					    -stack_off);
832 			break;
833 
834 		/* A = K or X = K */
835 		case BPF_LD | BPF_IMM:
836 		case BPF_LDX | BPF_IMM:
837 			*insn = BPF_MOV32_IMM(BPF_CLASS(fp->code) == BPF_LD ?
838 					      BPF_REG_A : BPF_REG_X, fp->k);
839 			break;
840 
841 		/* X = A */
842 		case BPF_MISC | BPF_TAX:
843 			*insn = BPF_MOV64_REG(BPF_REG_X, BPF_REG_A);
844 			break;
845 
846 		/* A = X */
847 		case BPF_MISC | BPF_TXA:
848 			*insn = BPF_MOV64_REG(BPF_REG_A, BPF_REG_X);
849 			break;
850 
851 		/* A = skb->len or X = skb->len */
852 		case BPF_LD | BPF_W | BPF_LEN:
853 		case BPF_LDX | BPF_W | BPF_LEN:
854 			*insn = BPF_LDX_MEM(BPF_W, BPF_CLASS(fp->code) == BPF_LD ?
855 					    BPF_REG_A : BPF_REG_X, BPF_REG_CTX,
856 					    offsetof(struct sk_buff, len));
857 			break;
858 
859 		/* Access seccomp_data fields. */
860 		case BPF_LDX | BPF_ABS | BPF_W:
861 			/* A = *(u32 *) (ctx + K) */
862 			*insn = BPF_LDX_MEM(BPF_W, BPF_REG_A, BPF_REG_CTX, fp->k);
863 			break;
864 
865 		/* Unknown instruction. */
866 		default:
867 			goto err;
868 		}
869 
870 		insn++;
871 		if (new_prog)
872 			memcpy(new_insn, tmp_insns,
873 			       sizeof(*insn) * (insn - tmp_insns));
874 		new_insn += insn - tmp_insns;
875 	}
876 
877 	if (!new_prog) {
878 		/* Only calculating new length. */
879 		*new_len = new_insn - first_insn;
880 		if (*seen_ld_abs)
881 			*new_len += 4; /* Prologue bits. */
882 		return 0;
883 	}
884 
885 	pass++;
886 	if (new_flen != new_insn - first_insn) {
887 		new_flen = new_insn - first_insn;
888 		if (pass > 2)
889 			goto err;
890 		goto do_pass;
891 	}
892 
893 	kfree(addrs);
894 	BUG_ON(*new_len != new_flen);
895 	return 0;
896 err:
897 	kfree(addrs);
898 	return -EINVAL;
899 }
900 
901 /* Security:
902  *
903  * As we dont want to clear mem[] array for each packet going through
904  * __bpf_prog_run(), we check that filter loaded by user never try to read
905  * a cell if not previously written, and we check all branches to be sure
906  * a malicious user doesn't try to abuse us.
907  */
908 static int check_load_and_stores(const struct sock_filter *filter, int flen)
909 {
910 	u16 *masks, memvalid = 0; /* One bit per cell, 16 cells */
911 	int pc, ret = 0;
912 
913 	BUILD_BUG_ON(BPF_MEMWORDS > 16);
914 
915 	masks = kmalloc_array(flen, sizeof(*masks), GFP_KERNEL);
916 	if (!masks)
917 		return -ENOMEM;
918 
919 	memset(masks, 0xff, flen * sizeof(*masks));
920 
921 	for (pc = 0; pc < flen; pc++) {
922 		memvalid &= masks[pc];
923 
924 		switch (filter[pc].code) {
925 		case BPF_ST:
926 		case BPF_STX:
927 			memvalid |= (1 << filter[pc].k);
928 			break;
929 		case BPF_LD | BPF_MEM:
930 		case BPF_LDX | BPF_MEM:
931 			if (!(memvalid & (1 << filter[pc].k))) {
932 				ret = -EINVAL;
933 				goto error;
934 			}
935 			break;
936 		case BPF_JMP | BPF_JA:
937 			/* A jump must set masks on target */
938 			masks[pc + 1 + filter[pc].k] &= memvalid;
939 			memvalid = ~0;
940 			break;
941 		case BPF_JMP | BPF_JEQ | BPF_K:
942 		case BPF_JMP | BPF_JEQ | BPF_X:
943 		case BPF_JMP | BPF_JGE | BPF_K:
944 		case BPF_JMP | BPF_JGE | BPF_X:
945 		case BPF_JMP | BPF_JGT | BPF_K:
946 		case BPF_JMP | BPF_JGT | BPF_X:
947 		case BPF_JMP | BPF_JSET | BPF_K:
948 		case BPF_JMP | BPF_JSET | BPF_X:
949 			/* A jump must set masks on targets */
950 			masks[pc + 1 + filter[pc].jt] &= memvalid;
951 			masks[pc + 1 + filter[pc].jf] &= memvalid;
952 			memvalid = ~0;
953 			break;
954 		}
955 	}
956 error:
957 	kfree(masks);
958 	return ret;
959 }
960 
961 static bool chk_code_allowed(u16 code_to_probe)
962 {
963 	static const bool codes[] = {
964 		/* 32 bit ALU operations */
965 		[BPF_ALU | BPF_ADD | BPF_K] = true,
966 		[BPF_ALU | BPF_ADD | BPF_X] = true,
967 		[BPF_ALU | BPF_SUB | BPF_K] = true,
968 		[BPF_ALU | BPF_SUB | BPF_X] = true,
969 		[BPF_ALU | BPF_MUL | BPF_K] = true,
970 		[BPF_ALU | BPF_MUL | BPF_X] = true,
971 		[BPF_ALU | BPF_DIV | BPF_K] = true,
972 		[BPF_ALU | BPF_DIV | BPF_X] = true,
973 		[BPF_ALU | BPF_MOD | BPF_K] = true,
974 		[BPF_ALU | BPF_MOD | BPF_X] = true,
975 		[BPF_ALU | BPF_AND | BPF_K] = true,
976 		[BPF_ALU | BPF_AND | BPF_X] = true,
977 		[BPF_ALU | BPF_OR | BPF_K] = true,
978 		[BPF_ALU | BPF_OR | BPF_X] = true,
979 		[BPF_ALU | BPF_XOR | BPF_K] = true,
980 		[BPF_ALU | BPF_XOR | BPF_X] = true,
981 		[BPF_ALU | BPF_LSH | BPF_K] = true,
982 		[BPF_ALU | BPF_LSH | BPF_X] = true,
983 		[BPF_ALU | BPF_RSH | BPF_K] = true,
984 		[BPF_ALU | BPF_RSH | BPF_X] = true,
985 		[BPF_ALU | BPF_NEG] = true,
986 		/* Load instructions */
987 		[BPF_LD | BPF_W | BPF_ABS] = true,
988 		[BPF_LD | BPF_H | BPF_ABS] = true,
989 		[BPF_LD | BPF_B | BPF_ABS] = true,
990 		[BPF_LD | BPF_W | BPF_LEN] = true,
991 		[BPF_LD | BPF_W | BPF_IND] = true,
992 		[BPF_LD | BPF_H | BPF_IND] = true,
993 		[BPF_LD | BPF_B | BPF_IND] = true,
994 		[BPF_LD | BPF_IMM] = true,
995 		[BPF_LD | BPF_MEM] = true,
996 		[BPF_LDX | BPF_W | BPF_LEN] = true,
997 		[BPF_LDX | BPF_B | BPF_MSH] = true,
998 		[BPF_LDX | BPF_IMM] = true,
999 		[BPF_LDX | BPF_MEM] = true,
1000 		/* Store instructions */
1001 		[BPF_ST] = true,
1002 		[BPF_STX] = true,
1003 		/* Misc instructions */
1004 		[BPF_MISC | BPF_TAX] = true,
1005 		[BPF_MISC | BPF_TXA] = true,
1006 		/* Return instructions */
1007 		[BPF_RET | BPF_K] = true,
1008 		[BPF_RET | BPF_A] = true,
1009 		/* Jump instructions */
1010 		[BPF_JMP | BPF_JA] = true,
1011 		[BPF_JMP | BPF_JEQ | BPF_K] = true,
1012 		[BPF_JMP | BPF_JEQ | BPF_X] = true,
1013 		[BPF_JMP | BPF_JGE | BPF_K] = true,
1014 		[BPF_JMP | BPF_JGE | BPF_X] = true,
1015 		[BPF_JMP | BPF_JGT | BPF_K] = true,
1016 		[BPF_JMP | BPF_JGT | BPF_X] = true,
1017 		[BPF_JMP | BPF_JSET | BPF_K] = true,
1018 		[BPF_JMP | BPF_JSET | BPF_X] = true,
1019 	};
1020 
1021 	if (code_to_probe >= ARRAY_SIZE(codes))
1022 		return false;
1023 
1024 	return codes[code_to_probe];
1025 }
1026 
1027 static bool bpf_check_basics_ok(const struct sock_filter *filter,
1028 				unsigned int flen)
1029 {
1030 	if (filter == NULL)
1031 		return false;
1032 	if (flen == 0 || flen > BPF_MAXINSNS)
1033 		return false;
1034 
1035 	return true;
1036 }
1037 
1038 /**
1039  *	bpf_check_classic - verify socket filter code
1040  *	@filter: filter to verify
1041  *	@flen: length of filter
1042  *
1043  * Check the user's filter code. If we let some ugly
1044  * filter code slip through kaboom! The filter must contain
1045  * no references or jumps that are out of range, no illegal
1046  * instructions, and must end with a RET instruction.
1047  *
1048  * All jumps are forward as they are not signed.
1049  *
1050  * Returns 0 if the rule set is legal or -EINVAL if not.
1051  */
1052 static int bpf_check_classic(const struct sock_filter *filter,
1053 			     unsigned int flen)
1054 {
1055 	bool anc_found;
1056 	int pc;
1057 
1058 	/* Check the filter code now */
1059 	for (pc = 0; pc < flen; pc++) {
1060 		const struct sock_filter *ftest = &filter[pc];
1061 
1062 		/* May we actually operate on this code? */
1063 		if (!chk_code_allowed(ftest->code))
1064 			return -EINVAL;
1065 
1066 		/* Some instructions need special checks */
1067 		switch (ftest->code) {
1068 		case BPF_ALU | BPF_DIV | BPF_K:
1069 		case BPF_ALU | BPF_MOD | BPF_K:
1070 			/* Check for division by zero */
1071 			if (ftest->k == 0)
1072 				return -EINVAL;
1073 			break;
1074 		case BPF_ALU | BPF_LSH | BPF_K:
1075 		case BPF_ALU | BPF_RSH | BPF_K:
1076 			if (ftest->k >= 32)
1077 				return -EINVAL;
1078 			break;
1079 		case BPF_LD | BPF_MEM:
1080 		case BPF_LDX | BPF_MEM:
1081 		case BPF_ST:
1082 		case BPF_STX:
1083 			/* Check for invalid memory addresses */
1084 			if (ftest->k >= BPF_MEMWORDS)
1085 				return -EINVAL;
1086 			break;
1087 		case BPF_JMP | BPF_JA:
1088 			/* Note, the large ftest->k might cause loops.
1089 			 * Compare this with conditional jumps below,
1090 			 * where offsets are limited. --ANK (981016)
1091 			 */
1092 			if (ftest->k >= (unsigned int)(flen - pc - 1))
1093 				return -EINVAL;
1094 			break;
1095 		case BPF_JMP | BPF_JEQ | BPF_K:
1096 		case BPF_JMP | BPF_JEQ | BPF_X:
1097 		case BPF_JMP | BPF_JGE | BPF_K:
1098 		case BPF_JMP | BPF_JGE | BPF_X:
1099 		case BPF_JMP | BPF_JGT | BPF_K:
1100 		case BPF_JMP | BPF_JGT | BPF_X:
1101 		case BPF_JMP | BPF_JSET | BPF_K:
1102 		case BPF_JMP | BPF_JSET | BPF_X:
1103 			/* Both conditionals must be safe */
1104 			if (pc + ftest->jt + 1 >= flen ||
1105 			    pc + ftest->jf + 1 >= flen)
1106 				return -EINVAL;
1107 			break;
1108 		case BPF_LD | BPF_W | BPF_ABS:
1109 		case BPF_LD | BPF_H | BPF_ABS:
1110 		case BPF_LD | BPF_B | BPF_ABS:
1111 			anc_found = false;
1112 			if (bpf_anc_helper(ftest) & BPF_ANC)
1113 				anc_found = true;
1114 			/* Ancillary operation unknown or unsupported */
1115 			if (anc_found == false && ftest->k >= SKF_AD_OFF)
1116 				return -EINVAL;
1117 		}
1118 	}
1119 
1120 	/* Last instruction must be a RET code */
1121 	switch (filter[flen - 1].code) {
1122 	case BPF_RET | BPF_K:
1123 	case BPF_RET | BPF_A:
1124 		return check_load_and_stores(filter, flen);
1125 	}
1126 
1127 	return -EINVAL;
1128 }
1129 
1130 static int bpf_prog_store_orig_filter(struct bpf_prog *fp,
1131 				      const struct sock_fprog *fprog)
1132 {
1133 	unsigned int fsize = bpf_classic_proglen(fprog);
1134 	struct sock_fprog_kern *fkprog;
1135 
1136 	fp->orig_prog = kmalloc(sizeof(*fkprog), GFP_KERNEL);
1137 	if (!fp->orig_prog)
1138 		return -ENOMEM;
1139 
1140 	fkprog = fp->orig_prog;
1141 	fkprog->len = fprog->len;
1142 
1143 	fkprog->filter = kmemdup(fp->insns, fsize,
1144 				 GFP_KERNEL | __GFP_NOWARN);
1145 	if (!fkprog->filter) {
1146 		kfree(fp->orig_prog);
1147 		return -ENOMEM;
1148 	}
1149 
1150 	return 0;
1151 }
1152 
1153 static void bpf_release_orig_filter(struct bpf_prog *fp)
1154 {
1155 	struct sock_fprog_kern *fprog = fp->orig_prog;
1156 
1157 	if (fprog) {
1158 		kfree(fprog->filter);
1159 		kfree(fprog);
1160 	}
1161 }
1162 
1163 static void __bpf_prog_release(struct bpf_prog *prog)
1164 {
1165 	if (prog->type == BPF_PROG_TYPE_SOCKET_FILTER) {
1166 		bpf_prog_put(prog);
1167 	} else {
1168 		bpf_release_orig_filter(prog);
1169 		bpf_prog_free(prog);
1170 	}
1171 }
1172 
1173 static void __sk_filter_release(struct sk_filter *fp)
1174 {
1175 	__bpf_prog_release(fp->prog);
1176 	kfree(fp);
1177 }
1178 
1179 /**
1180  * 	sk_filter_release_rcu - Release a socket filter by rcu_head
1181  *	@rcu: rcu_head that contains the sk_filter to free
1182  */
1183 static void sk_filter_release_rcu(struct rcu_head *rcu)
1184 {
1185 	struct sk_filter *fp = container_of(rcu, struct sk_filter, rcu);
1186 
1187 	__sk_filter_release(fp);
1188 }
1189 
1190 /**
1191  *	sk_filter_release - release a socket filter
1192  *	@fp: filter to remove
1193  *
1194  *	Remove a filter from a socket and release its resources.
1195  */
1196 static void sk_filter_release(struct sk_filter *fp)
1197 {
1198 	if (refcount_dec_and_test(&fp->refcnt))
1199 		call_rcu(&fp->rcu, sk_filter_release_rcu);
1200 }
1201 
1202 void sk_filter_uncharge(struct sock *sk, struct sk_filter *fp)
1203 {
1204 	u32 filter_size = bpf_prog_size(fp->prog->len);
1205 
1206 	atomic_sub(filter_size, &sk->sk_omem_alloc);
1207 	sk_filter_release(fp);
1208 }
1209 
1210 /* try to charge the socket memory if there is space available
1211  * return true on success
1212  */
1213 static bool __sk_filter_charge(struct sock *sk, struct sk_filter *fp)
1214 {
1215 	u32 filter_size = bpf_prog_size(fp->prog->len);
1216 
1217 	/* same check as in sock_kmalloc() */
1218 	if (filter_size <= sysctl_optmem_max &&
1219 	    atomic_read(&sk->sk_omem_alloc) + filter_size < sysctl_optmem_max) {
1220 		atomic_add(filter_size, &sk->sk_omem_alloc);
1221 		return true;
1222 	}
1223 	return false;
1224 }
1225 
1226 bool sk_filter_charge(struct sock *sk, struct sk_filter *fp)
1227 {
1228 	if (!refcount_inc_not_zero(&fp->refcnt))
1229 		return false;
1230 
1231 	if (!__sk_filter_charge(sk, fp)) {
1232 		sk_filter_release(fp);
1233 		return false;
1234 	}
1235 	return true;
1236 }
1237 
1238 static struct bpf_prog *bpf_migrate_filter(struct bpf_prog *fp)
1239 {
1240 	struct sock_filter *old_prog;
1241 	struct bpf_prog *old_fp;
1242 	int err, new_len, old_len = fp->len;
1243 	bool seen_ld_abs = false;
1244 
1245 	/* We are free to overwrite insns et al right here as it won't be used at
1246 	 * this point in time anymore internally after the migration to the eBPF
1247 	 * instruction representation.
1248 	 */
1249 	BUILD_BUG_ON(sizeof(struct sock_filter) !=
1250 		     sizeof(struct bpf_insn));
1251 
1252 	/* Conversion cannot happen on overlapping memory areas,
1253 	 * so we need to keep the user BPF around until the 2nd
1254 	 * pass. At this time, the user BPF is stored in fp->insns.
1255 	 */
1256 	old_prog = kmemdup(fp->insns, old_len * sizeof(struct sock_filter),
1257 			   GFP_KERNEL | __GFP_NOWARN);
1258 	if (!old_prog) {
1259 		err = -ENOMEM;
1260 		goto out_err;
1261 	}
1262 
1263 	/* 1st pass: calculate the new program length. */
1264 	err = bpf_convert_filter(old_prog, old_len, NULL, &new_len,
1265 				 &seen_ld_abs);
1266 	if (err)
1267 		goto out_err_free;
1268 
1269 	/* Expand fp for appending the new filter representation. */
1270 	old_fp = fp;
1271 	fp = bpf_prog_realloc(old_fp, bpf_prog_size(new_len), 0);
1272 	if (!fp) {
1273 		/* The old_fp is still around in case we couldn't
1274 		 * allocate new memory, so uncharge on that one.
1275 		 */
1276 		fp = old_fp;
1277 		err = -ENOMEM;
1278 		goto out_err_free;
1279 	}
1280 
1281 	fp->len = new_len;
1282 
1283 	/* 2nd pass: remap sock_filter insns into bpf_insn insns. */
1284 	err = bpf_convert_filter(old_prog, old_len, fp, &new_len,
1285 				 &seen_ld_abs);
1286 	if (err)
1287 		/* 2nd bpf_convert_filter() can fail only if it fails
1288 		 * to allocate memory, remapping must succeed. Note,
1289 		 * that at this time old_fp has already been released
1290 		 * by krealloc().
1291 		 */
1292 		goto out_err_free;
1293 
1294 	fp = bpf_prog_select_runtime(fp, &err);
1295 	if (err)
1296 		goto out_err_free;
1297 
1298 	kfree(old_prog);
1299 	return fp;
1300 
1301 out_err_free:
1302 	kfree(old_prog);
1303 out_err:
1304 	__bpf_prog_release(fp);
1305 	return ERR_PTR(err);
1306 }
1307 
1308 static struct bpf_prog *bpf_prepare_filter(struct bpf_prog *fp,
1309 					   bpf_aux_classic_check_t trans)
1310 {
1311 	int err;
1312 
1313 	fp->bpf_func = NULL;
1314 	fp->jited = 0;
1315 
1316 	err = bpf_check_classic(fp->insns, fp->len);
1317 	if (err) {
1318 		__bpf_prog_release(fp);
1319 		return ERR_PTR(err);
1320 	}
1321 
1322 	/* There might be additional checks and transformations
1323 	 * needed on classic filters, f.e. in case of seccomp.
1324 	 */
1325 	if (trans) {
1326 		err = trans(fp->insns, fp->len);
1327 		if (err) {
1328 			__bpf_prog_release(fp);
1329 			return ERR_PTR(err);
1330 		}
1331 	}
1332 
1333 	/* Probe if we can JIT compile the filter and if so, do
1334 	 * the compilation of the filter.
1335 	 */
1336 	bpf_jit_compile(fp);
1337 
1338 	/* JIT compiler couldn't process this filter, so do the eBPF translation
1339 	 * for the optimized interpreter.
1340 	 */
1341 	if (!fp->jited)
1342 		fp = bpf_migrate_filter(fp);
1343 
1344 	return fp;
1345 }
1346 
1347 /**
1348  *	bpf_prog_create - create an unattached filter
1349  *	@pfp: the unattached filter that is created
1350  *	@fprog: the filter program
1351  *
1352  * Create a filter independent of any socket. We first run some
1353  * sanity checks on it to make sure it does not explode on us later.
1354  * If an error occurs or there is insufficient memory for the filter
1355  * a negative errno code is returned. On success the return is zero.
1356  */
1357 int bpf_prog_create(struct bpf_prog **pfp, struct sock_fprog_kern *fprog)
1358 {
1359 	unsigned int fsize = bpf_classic_proglen(fprog);
1360 	struct bpf_prog *fp;
1361 
1362 	/* Make sure new filter is there and in the right amounts. */
1363 	if (!bpf_check_basics_ok(fprog->filter, fprog->len))
1364 		return -EINVAL;
1365 
1366 	fp = bpf_prog_alloc(bpf_prog_size(fprog->len), 0);
1367 	if (!fp)
1368 		return -ENOMEM;
1369 
1370 	memcpy(fp->insns, fprog->filter, fsize);
1371 
1372 	fp->len = fprog->len;
1373 	/* Since unattached filters are not copied back to user
1374 	 * space through sk_get_filter(), we do not need to hold
1375 	 * a copy here, and can spare us the work.
1376 	 */
1377 	fp->orig_prog = NULL;
1378 
1379 	/* bpf_prepare_filter() already takes care of freeing
1380 	 * memory in case something goes wrong.
1381 	 */
1382 	fp = bpf_prepare_filter(fp, NULL);
1383 	if (IS_ERR(fp))
1384 		return PTR_ERR(fp);
1385 
1386 	*pfp = fp;
1387 	return 0;
1388 }
1389 EXPORT_SYMBOL_GPL(bpf_prog_create);
1390 
1391 /**
1392  *	bpf_prog_create_from_user - create an unattached filter from user buffer
1393  *	@pfp: the unattached filter that is created
1394  *	@fprog: the filter program
1395  *	@trans: post-classic verifier transformation handler
1396  *	@save_orig: save classic BPF program
1397  *
1398  * This function effectively does the same as bpf_prog_create(), only
1399  * that it builds up its insns buffer from user space provided buffer.
1400  * It also allows for passing a bpf_aux_classic_check_t handler.
1401  */
1402 int bpf_prog_create_from_user(struct bpf_prog **pfp, struct sock_fprog *fprog,
1403 			      bpf_aux_classic_check_t trans, bool save_orig)
1404 {
1405 	unsigned int fsize = bpf_classic_proglen(fprog);
1406 	struct bpf_prog *fp;
1407 	int err;
1408 
1409 	/* Make sure new filter is there and in the right amounts. */
1410 	if (!bpf_check_basics_ok(fprog->filter, fprog->len))
1411 		return -EINVAL;
1412 
1413 	fp = bpf_prog_alloc(bpf_prog_size(fprog->len), 0);
1414 	if (!fp)
1415 		return -ENOMEM;
1416 
1417 	if (copy_from_user(fp->insns, fprog->filter, fsize)) {
1418 		__bpf_prog_free(fp);
1419 		return -EFAULT;
1420 	}
1421 
1422 	fp->len = fprog->len;
1423 	fp->orig_prog = NULL;
1424 
1425 	if (save_orig) {
1426 		err = bpf_prog_store_orig_filter(fp, fprog);
1427 		if (err) {
1428 			__bpf_prog_free(fp);
1429 			return -ENOMEM;
1430 		}
1431 	}
1432 
1433 	/* bpf_prepare_filter() already takes care of freeing
1434 	 * memory in case something goes wrong.
1435 	 */
1436 	fp = bpf_prepare_filter(fp, trans);
1437 	if (IS_ERR(fp))
1438 		return PTR_ERR(fp);
1439 
1440 	*pfp = fp;
1441 	return 0;
1442 }
1443 EXPORT_SYMBOL_GPL(bpf_prog_create_from_user);
1444 
1445 void bpf_prog_destroy(struct bpf_prog *fp)
1446 {
1447 	__bpf_prog_release(fp);
1448 }
1449 EXPORT_SYMBOL_GPL(bpf_prog_destroy);
1450 
1451 static int __sk_attach_prog(struct bpf_prog *prog, struct sock *sk)
1452 {
1453 	struct sk_filter *fp, *old_fp;
1454 
1455 	fp = kmalloc(sizeof(*fp), GFP_KERNEL);
1456 	if (!fp)
1457 		return -ENOMEM;
1458 
1459 	fp->prog = prog;
1460 
1461 	if (!__sk_filter_charge(sk, fp)) {
1462 		kfree(fp);
1463 		return -ENOMEM;
1464 	}
1465 	refcount_set(&fp->refcnt, 1);
1466 
1467 	old_fp = rcu_dereference_protected(sk->sk_filter,
1468 					   lockdep_sock_is_held(sk));
1469 	rcu_assign_pointer(sk->sk_filter, fp);
1470 
1471 	if (old_fp)
1472 		sk_filter_uncharge(sk, old_fp);
1473 
1474 	return 0;
1475 }
1476 
1477 static
1478 struct bpf_prog *__get_filter(struct sock_fprog *fprog, struct sock *sk)
1479 {
1480 	unsigned int fsize = bpf_classic_proglen(fprog);
1481 	struct bpf_prog *prog;
1482 	int err;
1483 
1484 	if (sock_flag(sk, SOCK_FILTER_LOCKED))
1485 		return ERR_PTR(-EPERM);
1486 
1487 	/* Make sure new filter is there and in the right amounts. */
1488 	if (!bpf_check_basics_ok(fprog->filter, fprog->len))
1489 		return ERR_PTR(-EINVAL);
1490 
1491 	prog = bpf_prog_alloc(bpf_prog_size(fprog->len), 0);
1492 	if (!prog)
1493 		return ERR_PTR(-ENOMEM);
1494 
1495 	if (copy_from_user(prog->insns, fprog->filter, fsize)) {
1496 		__bpf_prog_free(prog);
1497 		return ERR_PTR(-EFAULT);
1498 	}
1499 
1500 	prog->len = fprog->len;
1501 
1502 	err = bpf_prog_store_orig_filter(prog, fprog);
1503 	if (err) {
1504 		__bpf_prog_free(prog);
1505 		return ERR_PTR(-ENOMEM);
1506 	}
1507 
1508 	/* bpf_prepare_filter() already takes care of freeing
1509 	 * memory in case something goes wrong.
1510 	 */
1511 	return bpf_prepare_filter(prog, NULL);
1512 }
1513 
1514 /**
1515  *	sk_attach_filter - attach a socket filter
1516  *	@fprog: the filter program
1517  *	@sk: the socket to use
1518  *
1519  * Attach the user's filter code. We first run some sanity checks on
1520  * it to make sure it does not explode on us later. If an error
1521  * occurs or there is insufficient memory for the filter a negative
1522  * errno code is returned. On success the return is zero.
1523  */
1524 int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk)
1525 {
1526 	struct bpf_prog *prog = __get_filter(fprog, sk);
1527 	int err;
1528 
1529 	if (IS_ERR(prog))
1530 		return PTR_ERR(prog);
1531 
1532 	err = __sk_attach_prog(prog, sk);
1533 	if (err < 0) {
1534 		__bpf_prog_release(prog);
1535 		return err;
1536 	}
1537 
1538 	return 0;
1539 }
1540 EXPORT_SYMBOL_GPL(sk_attach_filter);
1541 
1542 int sk_reuseport_attach_filter(struct sock_fprog *fprog, struct sock *sk)
1543 {
1544 	struct bpf_prog *prog = __get_filter(fprog, sk);
1545 	int err;
1546 
1547 	if (IS_ERR(prog))
1548 		return PTR_ERR(prog);
1549 
1550 	if (bpf_prog_size(prog->len) > sysctl_optmem_max)
1551 		err = -ENOMEM;
1552 	else
1553 		err = reuseport_attach_prog(sk, prog);
1554 
1555 	if (err)
1556 		__bpf_prog_release(prog);
1557 
1558 	return err;
1559 }
1560 
1561 static struct bpf_prog *__get_bpf(u32 ufd, struct sock *sk)
1562 {
1563 	if (sock_flag(sk, SOCK_FILTER_LOCKED))
1564 		return ERR_PTR(-EPERM);
1565 
1566 	return bpf_prog_get_type(ufd, BPF_PROG_TYPE_SOCKET_FILTER);
1567 }
1568 
1569 int sk_attach_bpf(u32 ufd, struct sock *sk)
1570 {
1571 	struct bpf_prog *prog = __get_bpf(ufd, sk);
1572 	int err;
1573 
1574 	if (IS_ERR(prog))
1575 		return PTR_ERR(prog);
1576 
1577 	err = __sk_attach_prog(prog, sk);
1578 	if (err < 0) {
1579 		bpf_prog_put(prog);
1580 		return err;
1581 	}
1582 
1583 	return 0;
1584 }
1585 
1586 int sk_reuseport_attach_bpf(u32 ufd, struct sock *sk)
1587 {
1588 	struct bpf_prog *prog;
1589 	int err;
1590 
1591 	if (sock_flag(sk, SOCK_FILTER_LOCKED))
1592 		return -EPERM;
1593 
1594 	prog = bpf_prog_get_type(ufd, BPF_PROG_TYPE_SOCKET_FILTER);
1595 	if (PTR_ERR(prog) == -EINVAL)
1596 		prog = bpf_prog_get_type(ufd, BPF_PROG_TYPE_SK_REUSEPORT);
1597 	if (IS_ERR(prog))
1598 		return PTR_ERR(prog);
1599 
1600 	if (prog->type == BPF_PROG_TYPE_SK_REUSEPORT) {
1601 		/* Like other non BPF_PROG_TYPE_SOCKET_FILTER
1602 		 * bpf prog (e.g. sockmap).  It depends on the
1603 		 * limitation imposed by bpf_prog_load().
1604 		 * Hence, sysctl_optmem_max is not checked.
1605 		 */
1606 		if ((sk->sk_type != SOCK_STREAM &&
1607 		     sk->sk_type != SOCK_DGRAM) ||
1608 		    (sk->sk_protocol != IPPROTO_UDP &&
1609 		     sk->sk_protocol != IPPROTO_TCP) ||
1610 		    (sk->sk_family != AF_INET &&
1611 		     sk->sk_family != AF_INET6)) {
1612 			err = -ENOTSUPP;
1613 			goto err_prog_put;
1614 		}
1615 	} else {
1616 		/* BPF_PROG_TYPE_SOCKET_FILTER */
1617 		if (bpf_prog_size(prog->len) > sysctl_optmem_max) {
1618 			err = -ENOMEM;
1619 			goto err_prog_put;
1620 		}
1621 	}
1622 
1623 	err = reuseport_attach_prog(sk, prog);
1624 err_prog_put:
1625 	if (err)
1626 		bpf_prog_put(prog);
1627 
1628 	return err;
1629 }
1630 
1631 void sk_reuseport_prog_free(struct bpf_prog *prog)
1632 {
1633 	if (!prog)
1634 		return;
1635 
1636 	if (prog->type == BPF_PROG_TYPE_SK_REUSEPORT)
1637 		bpf_prog_put(prog);
1638 	else
1639 		bpf_prog_destroy(prog);
1640 }
1641 
1642 struct bpf_scratchpad {
1643 	union {
1644 		__be32 diff[MAX_BPF_STACK / sizeof(__be32)];
1645 		u8     buff[MAX_BPF_STACK];
1646 	};
1647 };
1648 
1649 static DEFINE_PER_CPU(struct bpf_scratchpad, bpf_sp);
1650 
1651 static inline int __bpf_try_make_writable(struct sk_buff *skb,
1652 					  unsigned int write_len)
1653 {
1654 	return skb_ensure_writable(skb, write_len);
1655 }
1656 
1657 static inline int bpf_try_make_writable(struct sk_buff *skb,
1658 					unsigned int write_len)
1659 {
1660 	int err = __bpf_try_make_writable(skb, write_len);
1661 
1662 	bpf_compute_data_pointers(skb);
1663 	return err;
1664 }
1665 
1666 static int bpf_try_make_head_writable(struct sk_buff *skb)
1667 {
1668 	return bpf_try_make_writable(skb, skb_headlen(skb));
1669 }
1670 
1671 static inline void bpf_push_mac_rcsum(struct sk_buff *skb)
1672 {
1673 	if (skb_at_tc_ingress(skb))
1674 		skb_postpush_rcsum(skb, skb_mac_header(skb), skb->mac_len);
1675 }
1676 
1677 static inline void bpf_pull_mac_rcsum(struct sk_buff *skb)
1678 {
1679 	if (skb_at_tc_ingress(skb))
1680 		skb_postpull_rcsum(skb, skb_mac_header(skb), skb->mac_len);
1681 }
1682 
1683 BPF_CALL_5(bpf_skb_store_bytes, struct sk_buff *, skb, u32, offset,
1684 	   const void *, from, u32, len, u64, flags)
1685 {
1686 	void *ptr;
1687 
1688 	if (unlikely(flags & ~(BPF_F_RECOMPUTE_CSUM | BPF_F_INVALIDATE_HASH)))
1689 		return -EINVAL;
1690 	if (unlikely(offset > 0xffff))
1691 		return -EFAULT;
1692 	if (unlikely(bpf_try_make_writable(skb, offset + len)))
1693 		return -EFAULT;
1694 
1695 	ptr = skb->data + offset;
1696 	if (flags & BPF_F_RECOMPUTE_CSUM)
1697 		__skb_postpull_rcsum(skb, ptr, len, offset);
1698 
1699 	memcpy(ptr, from, len);
1700 
1701 	if (flags & BPF_F_RECOMPUTE_CSUM)
1702 		__skb_postpush_rcsum(skb, ptr, len, offset);
1703 	if (flags & BPF_F_INVALIDATE_HASH)
1704 		skb_clear_hash(skb);
1705 
1706 	return 0;
1707 }
1708 
1709 static const struct bpf_func_proto bpf_skb_store_bytes_proto = {
1710 	.func		= bpf_skb_store_bytes,
1711 	.gpl_only	= false,
1712 	.ret_type	= RET_INTEGER,
1713 	.arg1_type	= ARG_PTR_TO_CTX,
1714 	.arg2_type	= ARG_ANYTHING,
1715 	.arg3_type	= ARG_PTR_TO_MEM | MEM_RDONLY,
1716 	.arg4_type	= ARG_CONST_SIZE,
1717 	.arg5_type	= ARG_ANYTHING,
1718 };
1719 
1720 BPF_CALL_4(bpf_skb_load_bytes, const struct sk_buff *, skb, u32, offset,
1721 	   void *, to, u32, len)
1722 {
1723 	void *ptr;
1724 
1725 	if (unlikely(offset > 0xffff))
1726 		goto err_clear;
1727 
1728 	ptr = skb_header_pointer(skb, offset, len, to);
1729 	if (unlikely(!ptr))
1730 		goto err_clear;
1731 	if (ptr != to)
1732 		memcpy(to, ptr, len);
1733 
1734 	return 0;
1735 err_clear:
1736 	memset(to, 0, len);
1737 	return -EFAULT;
1738 }
1739 
1740 static const struct bpf_func_proto bpf_skb_load_bytes_proto = {
1741 	.func		= bpf_skb_load_bytes,
1742 	.gpl_only	= false,
1743 	.ret_type	= RET_INTEGER,
1744 	.arg1_type	= ARG_PTR_TO_CTX,
1745 	.arg2_type	= ARG_ANYTHING,
1746 	.arg3_type	= ARG_PTR_TO_UNINIT_MEM,
1747 	.arg4_type	= ARG_CONST_SIZE,
1748 };
1749 
1750 BPF_CALL_4(bpf_flow_dissector_load_bytes,
1751 	   const struct bpf_flow_dissector *, ctx, u32, offset,
1752 	   void *, to, u32, len)
1753 {
1754 	void *ptr;
1755 
1756 	if (unlikely(offset > 0xffff))
1757 		goto err_clear;
1758 
1759 	if (unlikely(!ctx->skb))
1760 		goto err_clear;
1761 
1762 	ptr = skb_header_pointer(ctx->skb, offset, len, to);
1763 	if (unlikely(!ptr))
1764 		goto err_clear;
1765 	if (ptr != to)
1766 		memcpy(to, ptr, len);
1767 
1768 	return 0;
1769 err_clear:
1770 	memset(to, 0, len);
1771 	return -EFAULT;
1772 }
1773 
1774 static const struct bpf_func_proto bpf_flow_dissector_load_bytes_proto = {
1775 	.func		= bpf_flow_dissector_load_bytes,
1776 	.gpl_only	= false,
1777 	.ret_type	= RET_INTEGER,
1778 	.arg1_type	= ARG_PTR_TO_CTX,
1779 	.arg2_type	= ARG_ANYTHING,
1780 	.arg3_type	= ARG_PTR_TO_UNINIT_MEM,
1781 	.arg4_type	= ARG_CONST_SIZE,
1782 };
1783 
1784 BPF_CALL_5(bpf_skb_load_bytes_relative, const struct sk_buff *, skb,
1785 	   u32, offset, void *, to, u32, len, u32, start_header)
1786 {
1787 	u8 *end = skb_tail_pointer(skb);
1788 	u8 *start, *ptr;
1789 
1790 	if (unlikely(offset > 0xffff))
1791 		goto err_clear;
1792 
1793 	switch (start_header) {
1794 	case BPF_HDR_START_MAC:
1795 		if (unlikely(!skb_mac_header_was_set(skb)))
1796 			goto err_clear;
1797 		start = skb_mac_header(skb);
1798 		break;
1799 	case BPF_HDR_START_NET:
1800 		start = skb_network_header(skb);
1801 		break;
1802 	default:
1803 		goto err_clear;
1804 	}
1805 
1806 	ptr = start + offset;
1807 
1808 	if (likely(ptr + len <= end)) {
1809 		memcpy(to, ptr, len);
1810 		return 0;
1811 	}
1812 
1813 err_clear:
1814 	memset(to, 0, len);
1815 	return -EFAULT;
1816 }
1817 
1818 static const struct bpf_func_proto bpf_skb_load_bytes_relative_proto = {
1819 	.func		= bpf_skb_load_bytes_relative,
1820 	.gpl_only	= false,
1821 	.ret_type	= RET_INTEGER,
1822 	.arg1_type	= ARG_PTR_TO_CTX,
1823 	.arg2_type	= ARG_ANYTHING,
1824 	.arg3_type	= ARG_PTR_TO_UNINIT_MEM,
1825 	.arg4_type	= ARG_CONST_SIZE,
1826 	.arg5_type	= ARG_ANYTHING,
1827 };
1828 
1829 BPF_CALL_2(bpf_skb_pull_data, struct sk_buff *, skb, u32, len)
1830 {
1831 	/* Idea is the following: should the needed direct read/write
1832 	 * test fail during runtime, we can pull in more data and redo
1833 	 * again, since implicitly, we invalidate previous checks here.
1834 	 *
1835 	 * Or, since we know how much we need to make read/writeable,
1836 	 * this can be done once at the program beginning for direct
1837 	 * access case. By this we overcome limitations of only current
1838 	 * headroom being accessible.
1839 	 */
1840 	return bpf_try_make_writable(skb, len ? : skb_headlen(skb));
1841 }
1842 
1843 static const struct bpf_func_proto bpf_skb_pull_data_proto = {
1844 	.func		= bpf_skb_pull_data,
1845 	.gpl_only	= false,
1846 	.ret_type	= RET_INTEGER,
1847 	.arg1_type	= ARG_PTR_TO_CTX,
1848 	.arg2_type	= ARG_ANYTHING,
1849 };
1850 
1851 BPF_CALL_1(bpf_sk_fullsock, struct sock *, sk)
1852 {
1853 	return sk_fullsock(sk) ? (unsigned long)sk : (unsigned long)NULL;
1854 }
1855 
1856 static const struct bpf_func_proto bpf_sk_fullsock_proto = {
1857 	.func		= bpf_sk_fullsock,
1858 	.gpl_only	= false,
1859 	.ret_type	= RET_PTR_TO_SOCKET_OR_NULL,
1860 	.arg1_type	= ARG_PTR_TO_SOCK_COMMON,
1861 };
1862 
1863 static inline int sk_skb_try_make_writable(struct sk_buff *skb,
1864 					   unsigned int write_len)
1865 {
1866 	return __bpf_try_make_writable(skb, write_len);
1867 }
1868 
1869 BPF_CALL_2(sk_skb_pull_data, struct sk_buff *, skb, u32, len)
1870 {
1871 	/* Idea is the following: should the needed direct read/write
1872 	 * test fail during runtime, we can pull in more data and redo
1873 	 * again, since implicitly, we invalidate previous checks here.
1874 	 *
1875 	 * Or, since we know how much we need to make read/writeable,
1876 	 * this can be done once at the program beginning for direct
1877 	 * access case. By this we overcome limitations of only current
1878 	 * headroom being accessible.
1879 	 */
1880 	return sk_skb_try_make_writable(skb, len ? : skb_headlen(skb));
1881 }
1882 
1883 static const struct bpf_func_proto sk_skb_pull_data_proto = {
1884 	.func		= sk_skb_pull_data,
1885 	.gpl_only	= false,
1886 	.ret_type	= RET_INTEGER,
1887 	.arg1_type	= ARG_PTR_TO_CTX,
1888 	.arg2_type	= ARG_ANYTHING,
1889 };
1890 
1891 BPF_CALL_5(bpf_l3_csum_replace, struct sk_buff *, skb, u32, offset,
1892 	   u64, from, u64, to, u64, flags)
1893 {
1894 	__sum16 *ptr;
1895 
1896 	if (unlikely(flags & ~(BPF_F_HDR_FIELD_MASK)))
1897 		return -EINVAL;
1898 	if (unlikely(offset > 0xffff || offset & 1))
1899 		return -EFAULT;
1900 	if (unlikely(bpf_try_make_writable(skb, offset + sizeof(*ptr))))
1901 		return -EFAULT;
1902 
1903 	ptr = (__sum16 *)(skb->data + offset);
1904 	switch (flags & BPF_F_HDR_FIELD_MASK) {
1905 	case 0:
1906 		if (unlikely(from != 0))
1907 			return -EINVAL;
1908 
1909 		csum_replace_by_diff(ptr, to);
1910 		break;
1911 	case 2:
1912 		csum_replace2(ptr, from, to);
1913 		break;
1914 	case 4:
1915 		csum_replace4(ptr, from, to);
1916 		break;
1917 	default:
1918 		return -EINVAL;
1919 	}
1920 
1921 	return 0;
1922 }
1923 
1924 static const struct bpf_func_proto bpf_l3_csum_replace_proto = {
1925 	.func		= bpf_l3_csum_replace,
1926 	.gpl_only	= false,
1927 	.ret_type	= RET_INTEGER,
1928 	.arg1_type	= ARG_PTR_TO_CTX,
1929 	.arg2_type	= ARG_ANYTHING,
1930 	.arg3_type	= ARG_ANYTHING,
1931 	.arg4_type	= ARG_ANYTHING,
1932 	.arg5_type	= ARG_ANYTHING,
1933 };
1934 
1935 BPF_CALL_5(bpf_l4_csum_replace, struct sk_buff *, skb, u32, offset,
1936 	   u64, from, u64, to, u64, flags)
1937 {
1938 	bool is_pseudo = flags & BPF_F_PSEUDO_HDR;
1939 	bool is_mmzero = flags & BPF_F_MARK_MANGLED_0;
1940 	bool do_mforce = flags & BPF_F_MARK_ENFORCE;
1941 	__sum16 *ptr;
1942 
1943 	if (unlikely(flags & ~(BPF_F_MARK_MANGLED_0 | BPF_F_MARK_ENFORCE |
1944 			       BPF_F_PSEUDO_HDR | BPF_F_HDR_FIELD_MASK)))
1945 		return -EINVAL;
1946 	if (unlikely(offset > 0xffff || offset & 1))
1947 		return -EFAULT;
1948 	if (unlikely(bpf_try_make_writable(skb, offset + sizeof(*ptr))))
1949 		return -EFAULT;
1950 
1951 	ptr = (__sum16 *)(skb->data + offset);
1952 	if (is_mmzero && !do_mforce && !*ptr)
1953 		return 0;
1954 
1955 	switch (flags & BPF_F_HDR_FIELD_MASK) {
1956 	case 0:
1957 		if (unlikely(from != 0))
1958 			return -EINVAL;
1959 
1960 		inet_proto_csum_replace_by_diff(ptr, skb, to, is_pseudo);
1961 		break;
1962 	case 2:
1963 		inet_proto_csum_replace2(ptr, skb, from, to, is_pseudo);
1964 		break;
1965 	case 4:
1966 		inet_proto_csum_replace4(ptr, skb, from, to, is_pseudo);
1967 		break;
1968 	default:
1969 		return -EINVAL;
1970 	}
1971 
1972 	if (is_mmzero && !*ptr)
1973 		*ptr = CSUM_MANGLED_0;
1974 	return 0;
1975 }
1976 
1977 static const struct bpf_func_proto bpf_l4_csum_replace_proto = {
1978 	.func		= bpf_l4_csum_replace,
1979 	.gpl_only	= false,
1980 	.ret_type	= RET_INTEGER,
1981 	.arg1_type	= ARG_PTR_TO_CTX,
1982 	.arg2_type	= ARG_ANYTHING,
1983 	.arg3_type	= ARG_ANYTHING,
1984 	.arg4_type	= ARG_ANYTHING,
1985 	.arg5_type	= ARG_ANYTHING,
1986 };
1987 
1988 BPF_CALL_5(bpf_csum_diff, __be32 *, from, u32, from_size,
1989 	   __be32 *, to, u32, to_size, __wsum, seed)
1990 {
1991 	struct bpf_scratchpad *sp = this_cpu_ptr(&bpf_sp);
1992 	u32 diff_size = from_size + to_size;
1993 	int i, j = 0;
1994 
1995 	/* This is quite flexible, some examples:
1996 	 *
1997 	 * from_size == 0, to_size > 0,  seed := csum --> pushing data
1998 	 * from_size > 0,  to_size == 0, seed := csum --> pulling data
1999 	 * from_size > 0,  to_size > 0,  seed := 0    --> diffing data
2000 	 *
2001 	 * Even for diffing, from_size and to_size don't need to be equal.
2002 	 */
2003 	if (unlikely(((from_size | to_size) & (sizeof(__be32) - 1)) ||
2004 		     diff_size > sizeof(sp->diff)))
2005 		return -EINVAL;
2006 
2007 	for (i = 0; i < from_size / sizeof(__be32); i++, j++)
2008 		sp->diff[j] = ~from[i];
2009 	for (i = 0; i <   to_size / sizeof(__be32); i++, j++)
2010 		sp->diff[j] = to[i];
2011 
2012 	return csum_partial(sp->diff, diff_size, seed);
2013 }
2014 
2015 static const struct bpf_func_proto bpf_csum_diff_proto = {
2016 	.func		= bpf_csum_diff,
2017 	.gpl_only	= false,
2018 	.pkt_access	= true,
2019 	.ret_type	= RET_INTEGER,
2020 	.arg1_type	= ARG_PTR_TO_MEM | PTR_MAYBE_NULL | MEM_RDONLY,
2021 	.arg2_type	= ARG_CONST_SIZE_OR_ZERO,
2022 	.arg3_type	= ARG_PTR_TO_MEM | PTR_MAYBE_NULL | MEM_RDONLY,
2023 	.arg4_type	= ARG_CONST_SIZE_OR_ZERO,
2024 	.arg5_type	= ARG_ANYTHING,
2025 };
2026 
2027 BPF_CALL_2(bpf_csum_update, struct sk_buff *, skb, __wsum, csum)
2028 {
2029 	/* The interface is to be used in combination with bpf_csum_diff()
2030 	 * for direct packet writes. csum rotation for alignment as well
2031 	 * as emulating csum_sub() can be done from the eBPF program.
2032 	 */
2033 	if (skb->ip_summed == CHECKSUM_COMPLETE)
2034 		return (skb->csum = csum_add(skb->csum, csum));
2035 
2036 	return -ENOTSUPP;
2037 }
2038 
2039 static const struct bpf_func_proto bpf_csum_update_proto = {
2040 	.func		= bpf_csum_update,
2041 	.gpl_only	= false,
2042 	.ret_type	= RET_INTEGER,
2043 	.arg1_type	= ARG_PTR_TO_CTX,
2044 	.arg2_type	= ARG_ANYTHING,
2045 };
2046 
2047 BPF_CALL_2(bpf_csum_level, struct sk_buff *, skb, u64, level)
2048 {
2049 	/* The interface is to be used in combination with bpf_skb_adjust_room()
2050 	 * for encap/decap of packet headers when BPF_F_ADJ_ROOM_NO_CSUM_RESET
2051 	 * is passed as flags, for example.
2052 	 */
2053 	switch (level) {
2054 	case BPF_CSUM_LEVEL_INC:
2055 		__skb_incr_checksum_unnecessary(skb);
2056 		break;
2057 	case BPF_CSUM_LEVEL_DEC:
2058 		__skb_decr_checksum_unnecessary(skb);
2059 		break;
2060 	case BPF_CSUM_LEVEL_RESET:
2061 		__skb_reset_checksum_unnecessary(skb);
2062 		break;
2063 	case BPF_CSUM_LEVEL_QUERY:
2064 		return skb->ip_summed == CHECKSUM_UNNECESSARY ?
2065 		       skb->csum_level : -EACCES;
2066 	default:
2067 		return -EINVAL;
2068 	}
2069 
2070 	return 0;
2071 }
2072 
2073 static const struct bpf_func_proto bpf_csum_level_proto = {
2074 	.func		= bpf_csum_level,
2075 	.gpl_only	= false,
2076 	.ret_type	= RET_INTEGER,
2077 	.arg1_type	= ARG_PTR_TO_CTX,
2078 	.arg2_type	= ARG_ANYTHING,
2079 };
2080 
2081 static inline int __bpf_rx_skb(struct net_device *dev, struct sk_buff *skb)
2082 {
2083 	return dev_forward_skb_nomtu(dev, skb);
2084 }
2085 
2086 static inline int __bpf_rx_skb_no_mac(struct net_device *dev,
2087 				      struct sk_buff *skb)
2088 {
2089 	int ret = ____dev_forward_skb(dev, skb, false);
2090 
2091 	if (likely(!ret)) {
2092 		skb->dev = dev;
2093 		ret = netif_rx(skb);
2094 	}
2095 
2096 	return ret;
2097 }
2098 
2099 static inline int __bpf_tx_skb(struct net_device *dev, struct sk_buff *skb)
2100 {
2101 	int ret;
2102 
2103 	if (dev_xmit_recursion()) {
2104 		net_crit_ratelimited("bpf: recursion limit reached on datapath, buggy bpf program?\n");
2105 		kfree_skb(skb);
2106 		return -ENETDOWN;
2107 	}
2108 
2109 	skb->dev = dev;
2110 	skb->tstamp = 0;
2111 
2112 	dev_xmit_recursion_inc();
2113 	ret = dev_queue_xmit(skb);
2114 	dev_xmit_recursion_dec();
2115 
2116 	return ret;
2117 }
2118 
2119 static int __bpf_redirect_no_mac(struct sk_buff *skb, struct net_device *dev,
2120 				 u32 flags)
2121 {
2122 	unsigned int mlen = skb_network_offset(skb);
2123 
2124 	if (mlen) {
2125 		__skb_pull(skb, mlen);
2126 
2127 		/* At ingress, the mac header has already been pulled once.
2128 		 * At egress, skb_pospull_rcsum has to be done in case that
2129 		 * the skb is originated from ingress (i.e. a forwarded skb)
2130 		 * to ensure that rcsum starts at net header.
2131 		 */
2132 		if (!skb_at_tc_ingress(skb))
2133 			skb_postpull_rcsum(skb, skb_mac_header(skb), mlen);
2134 	}
2135 	skb_pop_mac_header(skb);
2136 	skb_reset_mac_len(skb);
2137 	return flags & BPF_F_INGRESS ?
2138 	       __bpf_rx_skb_no_mac(dev, skb) : __bpf_tx_skb(dev, skb);
2139 }
2140 
2141 static int __bpf_redirect_common(struct sk_buff *skb, struct net_device *dev,
2142 				 u32 flags)
2143 {
2144 	/* Verify that a link layer header is carried */
2145 	if (unlikely(skb->mac_header >= skb->network_header)) {
2146 		kfree_skb(skb);
2147 		return -ERANGE;
2148 	}
2149 
2150 	bpf_push_mac_rcsum(skb);
2151 	return flags & BPF_F_INGRESS ?
2152 	       __bpf_rx_skb(dev, skb) : __bpf_tx_skb(dev, skb);
2153 }
2154 
2155 static int __bpf_redirect(struct sk_buff *skb, struct net_device *dev,
2156 			  u32 flags)
2157 {
2158 	if (dev_is_mac_header_xmit(dev))
2159 		return __bpf_redirect_common(skb, dev, flags);
2160 	else
2161 		return __bpf_redirect_no_mac(skb, dev, flags);
2162 }
2163 
2164 #if IS_ENABLED(CONFIG_IPV6)
2165 static int bpf_out_neigh_v6(struct net *net, struct sk_buff *skb,
2166 			    struct net_device *dev, struct bpf_nh_params *nh)
2167 {
2168 	u32 hh_len = LL_RESERVED_SPACE(dev);
2169 	const struct in6_addr *nexthop;
2170 	struct dst_entry *dst = NULL;
2171 	struct neighbour *neigh;
2172 
2173 	if (dev_xmit_recursion()) {
2174 		net_crit_ratelimited("bpf: recursion limit reached on datapath, buggy bpf program?\n");
2175 		goto out_drop;
2176 	}
2177 
2178 	skb->dev = dev;
2179 	skb->tstamp = 0;
2180 
2181 	if (unlikely(skb_headroom(skb) < hh_len && dev->header_ops)) {
2182 		skb = skb_expand_head(skb, hh_len);
2183 		if (!skb)
2184 			return -ENOMEM;
2185 	}
2186 
2187 	rcu_read_lock_bh();
2188 	if (!nh) {
2189 		dst = skb_dst(skb);
2190 		nexthop = rt6_nexthop(container_of(dst, struct rt6_info, dst),
2191 				      &ipv6_hdr(skb)->daddr);
2192 	} else {
2193 		nexthop = &nh->ipv6_nh;
2194 	}
2195 	neigh = ip_neigh_gw6(dev, nexthop);
2196 	if (likely(!IS_ERR(neigh))) {
2197 		int ret;
2198 
2199 		sock_confirm_neigh(skb, neigh);
2200 		dev_xmit_recursion_inc();
2201 		ret = neigh_output(neigh, skb, false);
2202 		dev_xmit_recursion_dec();
2203 		rcu_read_unlock_bh();
2204 		return ret;
2205 	}
2206 	rcu_read_unlock_bh();
2207 	if (dst)
2208 		IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES);
2209 out_drop:
2210 	kfree_skb(skb);
2211 	return -ENETDOWN;
2212 }
2213 
2214 static int __bpf_redirect_neigh_v6(struct sk_buff *skb, struct net_device *dev,
2215 				   struct bpf_nh_params *nh)
2216 {
2217 	const struct ipv6hdr *ip6h = ipv6_hdr(skb);
2218 	struct net *net = dev_net(dev);
2219 	int err, ret = NET_XMIT_DROP;
2220 
2221 	if (!nh) {
2222 		struct dst_entry *dst;
2223 		struct flowi6 fl6 = {
2224 			.flowi6_flags = FLOWI_FLAG_ANYSRC,
2225 			.flowi6_mark  = skb->mark,
2226 			.flowlabel    = ip6_flowinfo(ip6h),
2227 			.flowi6_oif   = dev->ifindex,
2228 			.flowi6_proto = ip6h->nexthdr,
2229 			.daddr	      = ip6h->daddr,
2230 			.saddr	      = ip6h->saddr,
2231 		};
2232 
2233 		dst = ipv6_stub->ipv6_dst_lookup_flow(net, NULL, &fl6, NULL);
2234 		if (IS_ERR(dst))
2235 			goto out_drop;
2236 
2237 		skb_dst_set(skb, dst);
2238 	} else if (nh->nh_family != AF_INET6) {
2239 		goto out_drop;
2240 	}
2241 
2242 	err = bpf_out_neigh_v6(net, skb, dev, nh);
2243 	if (unlikely(net_xmit_eval(err)))
2244 		dev->stats.tx_errors++;
2245 	else
2246 		ret = NET_XMIT_SUCCESS;
2247 	goto out_xmit;
2248 out_drop:
2249 	dev->stats.tx_errors++;
2250 	kfree_skb(skb);
2251 out_xmit:
2252 	return ret;
2253 }
2254 #else
2255 static int __bpf_redirect_neigh_v6(struct sk_buff *skb, struct net_device *dev,
2256 				   struct bpf_nh_params *nh)
2257 {
2258 	kfree_skb(skb);
2259 	return NET_XMIT_DROP;
2260 }
2261 #endif /* CONFIG_IPV6 */
2262 
2263 #if IS_ENABLED(CONFIG_INET)
2264 static int bpf_out_neigh_v4(struct net *net, struct sk_buff *skb,
2265 			    struct net_device *dev, struct bpf_nh_params *nh)
2266 {
2267 	u32 hh_len = LL_RESERVED_SPACE(dev);
2268 	struct neighbour *neigh;
2269 	bool is_v6gw = false;
2270 
2271 	if (dev_xmit_recursion()) {
2272 		net_crit_ratelimited("bpf: recursion limit reached on datapath, buggy bpf program?\n");
2273 		goto out_drop;
2274 	}
2275 
2276 	skb->dev = dev;
2277 	skb->tstamp = 0;
2278 
2279 	if (unlikely(skb_headroom(skb) < hh_len && dev->header_ops)) {
2280 		skb = skb_expand_head(skb, hh_len);
2281 		if (!skb)
2282 			return -ENOMEM;
2283 	}
2284 
2285 	rcu_read_lock_bh();
2286 	if (!nh) {
2287 		struct dst_entry *dst = skb_dst(skb);
2288 		struct rtable *rt = container_of(dst, struct rtable, dst);
2289 
2290 		neigh = ip_neigh_for_gw(rt, skb, &is_v6gw);
2291 	} else if (nh->nh_family == AF_INET6) {
2292 		neigh = ip_neigh_gw6(dev, &nh->ipv6_nh);
2293 		is_v6gw = true;
2294 	} else if (nh->nh_family == AF_INET) {
2295 		neigh = ip_neigh_gw4(dev, nh->ipv4_nh);
2296 	} else {
2297 		rcu_read_unlock_bh();
2298 		goto out_drop;
2299 	}
2300 
2301 	if (likely(!IS_ERR(neigh))) {
2302 		int ret;
2303 
2304 		sock_confirm_neigh(skb, neigh);
2305 		dev_xmit_recursion_inc();
2306 		ret = neigh_output(neigh, skb, is_v6gw);
2307 		dev_xmit_recursion_dec();
2308 		rcu_read_unlock_bh();
2309 		return ret;
2310 	}
2311 	rcu_read_unlock_bh();
2312 out_drop:
2313 	kfree_skb(skb);
2314 	return -ENETDOWN;
2315 }
2316 
2317 static int __bpf_redirect_neigh_v4(struct sk_buff *skb, struct net_device *dev,
2318 				   struct bpf_nh_params *nh)
2319 {
2320 	const struct iphdr *ip4h = ip_hdr(skb);
2321 	struct net *net = dev_net(dev);
2322 	int err, ret = NET_XMIT_DROP;
2323 
2324 	if (!nh) {
2325 		struct flowi4 fl4 = {
2326 			.flowi4_flags = FLOWI_FLAG_ANYSRC,
2327 			.flowi4_mark  = skb->mark,
2328 			.flowi4_tos   = RT_TOS(ip4h->tos),
2329 			.flowi4_oif   = dev->ifindex,
2330 			.flowi4_proto = ip4h->protocol,
2331 			.daddr	      = ip4h->daddr,
2332 			.saddr	      = ip4h->saddr,
2333 		};
2334 		struct rtable *rt;
2335 
2336 		rt = ip_route_output_flow(net, &fl4, NULL);
2337 		if (IS_ERR(rt))
2338 			goto out_drop;
2339 		if (rt->rt_type != RTN_UNICAST && rt->rt_type != RTN_LOCAL) {
2340 			ip_rt_put(rt);
2341 			goto out_drop;
2342 		}
2343 
2344 		skb_dst_set(skb, &rt->dst);
2345 	}
2346 
2347 	err = bpf_out_neigh_v4(net, skb, dev, nh);
2348 	if (unlikely(net_xmit_eval(err)))
2349 		dev->stats.tx_errors++;
2350 	else
2351 		ret = NET_XMIT_SUCCESS;
2352 	goto out_xmit;
2353 out_drop:
2354 	dev->stats.tx_errors++;
2355 	kfree_skb(skb);
2356 out_xmit:
2357 	return ret;
2358 }
2359 #else
2360 static int __bpf_redirect_neigh_v4(struct sk_buff *skb, struct net_device *dev,
2361 				   struct bpf_nh_params *nh)
2362 {
2363 	kfree_skb(skb);
2364 	return NET_XMIT_DROP;
2365 }
2366 #endif /* CONFIG_INET */
2367 
2368 static int __bpf_redirect_neigh(struct sk_buff *skb, struct net_device *dev,
2369 				struct bpf_nh_params *nh)
2370 {
2371 	struct ethhdr *ethh = eth_hdr(skb);
2372 
2373 	if (unlikely(skb->mac_header >= skb->network_header))
2374 		goto out;
2375 	bpf_push_mac_rcsum(skb);
2376 	if (is_multicast_ether_addr(ethh->h_dest))
2377 		goto out;
2378 
2379 	skb_pull(skb, sizeof(*ethh));
2380 	skb_unset_mac_header(skb);
2381 	skb_reset_network_header(skb);
2382 
2383 	if (skb->protocol == htons(ETH_P_IP))
2384 		return __bpf_redirect_neigh_v4(skb, dev, nh);
2385 	else if (skb->protocol == htons(ETH_P_IPV6))
2386 		return __bpf_redirect_neigh_v6(skb, dev, nh);
2387 out:
2388 	kfree_skb(skb);
2389 	return -ENOTSUPP;
2390 }
2391 
2392 /* Internal, non-exposed redirect flags. */
2393 enum {
2394 	BPF_F_NEIGH	= (1ULL << 1),
2395 	BPF_F_PEER	= (1ULL << 2),
2396 	BPF_F_NEXTHOP	= (1ULL << 3),
2397 #define BPF_F_REDIRECT_INTERNAL	(BPF_F_NEIGH | BPF_F_PEER | BPF_F_NEXTHOP)
2398 };
2399 
2400 BPF_CALL_3(bpf_clone_redirect, struct sk_buff *, skb, u32, ifindex, u64, flags)
2401 {
2402 	struct net_device *dev;
2403 	struct sk_buff *clone;
2404 	int ret;
2405 
2406 	if (unlikely(flags & (~(BPF_F_INGRESS) | BPF_F_REDIRECT_INTERNAL)))
2407 		return -EINVAL;
2408 
2409 	dev = dev_get_by_index_rcu(dev_net(skb->dev), ifindex);
2410 	if (unlikely(!dev))
2411 		return -EINVAL;
2412 
2413 	clone = skb_clone(skb, GFP_ATOMIC);
2414 	if (unlikely(!clone))
2415 		return -ENOMEM;
2416 
2417 	/* For direct write, we need to keep the invariant that the skbs
2418 	 * we're dealing with need to be uncloned. Should uncloning fail
2419 	 * here, we need to free the just generated clone to unclone once
2420 	 * again.
2421 	 */
2422 	ret = bpf_try_make_head_writable(skb);
2423 	if (unlikely(ret)) {
2424 		kfree_skb(clone);
2425 		return -ENOMEM;
2426 	}
2427 
2428 	return __bpf_redirect(clone, dev, flags);
2429 }
2430 
2431 static const struct bpf_func_proto bpf_clone_redirect_proto = {
2432 	.func           = bpf_clone_redirect,
2433 	.gpl_only       = false,
2434 	.ret_type       = RET_INTEGER,
2435 	.arg1_type      = ARG_PTR_TO_CTX,
2436 	.arg2_type      = ARG_ANYTHING,
2437 	.arg3_type      = ARG_ANYTHING,
2438 };
2439 
2440 DEFINE_PER_CPU(struct bpf_redirect_info, bpf_redirect_info);
2441 EXPORT_PER_CPU_SYMBOL_GPL(bpf_redirect_info);
2442 
2443 int skb_do_redirect(struct sk_buff *skb)
2444 {
2445 	struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info);
2446 	struct net *net = dev_net(skb->dev);
2447 	struct net_device *dev;
2448 	u32 flags = ri->flags;
2449 
2450 	dev = dev_get_by_index_rcu(net, ri->tgt_index);
2451 	ri->tgt_index = 0;
2452 	ri->flags = 0;
2453 	if (unlikely(!dev))
2454 		goto out_drop;
2455 	if (flags & BPF_F_PEER) {
2456 		const struct net_device_ops *ops = dev->netdev_ops;
2457 
2458 		if (unlikely(!ops->ndo_get_peer_dev ||
2459 			     !skb_at_tc_ingress(skb)))
2460 			goto out_drop;
2461 		dev = ops->ndo_get_peer_dev(dev);
2462 		if (unlikely(!dev ||
2463 			     !(dev->flags & IFF_UP) ||
2464 			     net_eq(net, dev_net(dev))))
2465 			goto out_drop;
2466 		skb->dev = dev;
2467 		return -EAGAIN;
2468 	}
2469 	return flags & BPF_F_NEIGH ?
2470 	       __bpf_redirect_neigh(skb, dev, flags & BPF_F_NEXTHOP ?
2471 				    &ri->nh : NULL) :
2472 	       __bpf_redirect(skb, dev, flags);
2473 out_drop:
2474 	kfree_skb(skb);
2475 	return -EINVAL;
2476 }
2477 
2478 BPF_CALL_2(bpf_redirect, u32, ifindex, u64, flags)
2479 {
2480 	struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info);
2481 
2482 	if (unlikely(flags & (~(BPF_F_INGRESS) | BPF_F_REDIRECT_INTERNAL)))
2483 		return TC_ACT_SHOT;
2484 
2485 	ri->flags = flags;
2486 	ri->tgt_index = ifindex;
2487 
2488 	return TC_ACT_REDIRECT;
2489 }
2490 
2491 static const struct bpf_func_proto bpf_redirect_proto = {
2492 	.func           = bpf_redirect,
2493 	.gpl_only       = false,
2494 	.ret_type       = RET_INTEGER,
2495 	.arg1_type      = ARG_ANYTHING,
2496 	.arg2_type      = ARG_ANYTHING,
2497 };
2498 
2499 BPF_CALL_2(bpf_redirect_peer, u32, ifindex, u64, flags)
2500 {
2501 	struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info);
2502 
2503 	if (unlikely(flags))
2504 		return TC_ACT_SHOT;
2505 
2506 	ri->flags = BPF_F_PEER;
2507 	ri->tgt_index = ifindex;
2508 
2509 	return TC_ACT_REDIRECT;
2510 }
2511 
2512 static const struct bpf_func_proto bpf_redirect_peer_proto = {
2513 	.func           = bpf_redirect_peer,
2514 	.gpl_only       = false,
2515 	.ret_type       = RET_INTEGER,
2516 	.arg1_type      = ARG_ANYTHING,
2517 	.arg2_type      = ARG_ANYTHING,
2518 };
2519 
2520 BPF_CALL_4(bpf_redirect_neigh, u32, ifindex, struct bpf_redir_neigh *, params,
2521 	   int, plen, u64, flags)
2522 {
2523 	struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info);
2524 
2525 	if (unlikely((plen && plen < sizeof(*params)) || flags))
2526 		return TC_ACT_SHOT;
2527 
2528 	ri->flags = BPF_F_NEIGH | (plen ? BPF_F_NEXTHOP : 0);
2529 	ri->tgt_index = ifindex;
2530 
2531 	BUILD_BUG_ON(sizeof(struct bpf_redir_neigh) != sizeof(struct bpf_nh_params));
2532 	if (plen)
2533 		memcpy(&ri->nh, params, sizeof(ri->nh));
2534 
2535 	return TC_ACT_REDIRECT;
2536 }
2537 
2538 static const struct bpf_func_proto bpf_redirect_neigh_proto = {
2539 	.func		= bpf_redirect_neigh,
2540 	.gpl_only	= false,
2541 	.ret_type	= RET_INTEGER,
2542 	.arg1_type	= ARG_ANYTHING,
2543 	.arg2_type      = ARG_PTR_TO_MEM | PTR_MAYBE_NULL | MEM_RDONLY,
2544 	.arg3_type      = ARG_CONST_SIZE_OR_ZERO,
2545 	.arg4_type	= ARG_ANYTHING,
2546 };
2547 
2548 BPF_CALL_2(bpf_msg_apply_bytes, struct sk_msg *, msg, u32, bytes)
2549 {
2550 	msg->apply_bytes = bytes;
2551 	return 0;
2552 }
2553 
2554 static const struct bpf_func_proto bpf_msg_apply_bytes_proto = {
2555 	.func           = bpf_msg_apply_bytes,
2556 	.gpl_only       = false,
2557 	.ret_type       = RET_INTEGER,
2558 	.arg1_type	= ARG_PTR_TO_CTX,
2559 	.arg2_type      = ARG_ANYTHING,
2560 };
2561 
2562 BPF_CALL_2(bpf_msg_cork_bytes, struct sk_msg *, msg, u32, bytes)
2563 {
2564 	msg->cork_bytes = bytes;
2565 	return 0;
2566 }
2567 
2568 static const struct bpf_func_proto bpf_msg_cork_bytes_proto = {
2569 	.func           = bpf_msg_cork_bytes,
2570 	.gpl_only       = false,
2571 	.ret_type       = RET_INTEGER,
2572 	.arg1_type	= ARG_PTR_TO_CTX,
2573 	.arg2_type      = ARG_ANYTHING,
2574 };
2575 
2576 BPF_CALL_4(bpf_msg_pull_data, struct sk_msg *, msg, u32, start,
2577 	   u32, end, u64, flags)
2578 {
2579 	u32 len = 0, offset = 0, copy = 0, poffset = 0, bytes = end - start;
2580 	u32 first_sge, last_sge, i, shift, bytes_sg_total;
2581 	struct scatterlist *sge;
2582 	u8 *raw, *to, *from;
2583 	struct page *page;
2584 
2585 	if (unlikely(flags || end <= start))
2586 		return -EINVAL;
2587 
2588 	/* First find the starting scatterlist element */
2589 	i = msg->sg.start;
2590 	do {
2591 		offset += len;
2592 		len = sk_msg_elem(msg, i)->length;
2593 		if (start < offset + len)
2594 			break;
2595 		sk_msg_iter_var_next(i);
2596 	} while (i != msg->sg.end);
2597 
2598 	if (unlikely(start >= offset + len))
2599 		return -EINVAL;
2600 
2601 	first_sge = i;
2602 	/* The start may point into the sg element so we need to also
2603 	 * account for the headroom.
2604 	 */
2605 	bytes_sg_total = start - offset + bytes;
2606 	if (!test_bit(i, &msg->sg.copy) && bytes_sg_total <= len)
2607 		goto out;
2608 
2609 	/* At this point we need to linearize multiple scatterlist
2610 	 * elements or a single shared page. Either way we need to
2611 	 * copy into a linear buffer exclusively owned by BPF. Then
2612 	 * place the buffer in the scatterlist and fixup the original
2613 	 * entries by removing the entries now in the linear buffer
2614 	 * and shifting the remaining entries. For now we do not try
2615 	 * to copy partial entries to avoid complexity of running out
2616 	 * of sg_entry slots. The downside is reading a single byte
2617 	 * will copy the entire sg entry.
2618 	 */
2619 	do {
2620 		copy += sk_msg_elem(msg, i)->length;
2621 		sk_msg_iter_var_next(i);
2622 		if (bytes_sg_total <= copy)
2623 			break;
2624 	} while (i != msg->sg.end);
2625 	last_sge = i;
2626 
2627 	if (unlikely(bytes_sg_total > copy))
2628 		return -EINVAL;
2629 
2630 	page = alloc_pages(__GFP_NOWARN | GFP_ATOMIC | __GFP_COMP,
2631 			   get_order(copy));
2632 	if (unlikely(!page))
2633 		return -ENOMEM;
2634 
2635 	raw = page_address(page);
2636 	i = first_sge;
2637 	do {
2638 		sge = sk_msg_elem(msg, i);
2639 		from = sg_virt(sge);
2640 		len = sge->length;
2641 		to = raw + poffset;
2642 
2643 		memcpy(to, from, len);
2644 		poffset += len;
2645 		sge->length = 0;
2646 		put_page(sg_page(sge));
2647 
2648 		sk_msg_iter_var_next(i);
2649 	} while (i != last_sge);
2650 
2651 	sg_set_page(&msg->sg.data[first_sge], page, copy, 0);
2652 
2653 	/* To repair sg ring we need to shift entries. If we only
2654 	 * had a single entry though we can just replace it and
2655 	 * be done. Otherwise walk the ring and shift the entries.
2656 	 */
2657 	WARN_ON_ONCE(last_sge == first_sge);
2658 	shift = last_sge > first_sge ?
2659 		last_sge - first_sge - 1 :
2660 		NR_MSG_FRAG_IDS - first_sge + last_sge - 1;
2661 	if (!shift)
2662 		goto out;
2663 
2664 	i = first_sge;
2665 	sk_msg_iter_var_next(i);
2666 	do {
2667 		u32 move_from;
2668 
2669 		if (i + shift >= NR_MSG_FRAG_IDS)
2670 			move_from = i + shift - NR_MSG_FRAG_IDS;
2671 		else
2672 			move_from = i + shift;
2673 		if (move_from == msg->sg.end)
2674 			break;
2675 
2676 		msg->sg.data[i] = msg->sg.data[move_from];
2677 		msg->sg.data[move_from].length = 0;
2678 		msg->sg.data[move_from].page_link = 0;
2679 		msg->sg.data[move_from].offset = 0;
2680 		sk_msg_iter_var_next(i);
2681 	} while (1);
2682 
2683 	msg->sg.end = msg->sg.end - shift > msg->sg.end ?
2684 		      msg->sg.end - shift + NR_MSG_FRAG_IDS :
2685 		      msg->sg.end - shift;
2686 out:
2687 	msg->data = sg_virt(&msg->sg.data[first_sge]) + start - offset;
2688 	msg->data_end = msg->data + bytes;
2689 	return 0;
2690 }
2691 
2692 static const struct bpf_func_proto bpf_msg_pull_data_proto = {
2693 	.func		= bpf_msg_pull_data,
2694 	.gpl_only	= false,
2695 	.ret_type	= RET_INTEGER,
2696 	.arg1_type	= ARG_PTR_TO_CTX,
2697 	.arg2_type	= ARG_ANYTHING,
2698 	.arg3_type	= ARG_ANYTHING,
2699 	.arg4_type	= ARG_ANYTHING,
2700 };
2701 
2702 BPF_CALL_4(bpf_msg_push_data, struct sk_msg *, msg, u32, start,
2703 	   u32, len, u64, flags)
2704 {
2705 	struct scatterlist sge, nsge, nnsge, rsge = {0}, *psge;
2706 	u32 new, i = 0, l = 0, space, copy = 0, offset = 0;
2707 	u8 *raw, *to, *from;
2708 	struct page *page;
2709 
2710 	if (unlikely(flags))
2711 		return -EINVAL;
2712 
2713 	/* First find the starting scatterlist element */
2714 	i = msg->sg.start;
2715 	do {
2716 		offset += l;
2717 		l = sk_msg_elem(msg, i)->length;
2718 
2719 		if (start < offset + l)
2720 			break;
2721 		sk_msg_iter_var_next(i);
2722 	} while (i != msg->sg.end);
2723 
2724 	if (start >= offset + l)
2725 		return -EINVAL;
2726 
2727 	space = MAX_MSG_FRAGS - sk_msg_elem_used(msg);
2728 
2729 	/* If no space available will fallback to copy, we need at
2730 	 * least one scatterlist elem available to push data into
2731 	 * when start aligns to the beginning of an element or two
2732 	 * when it falls inside an element. We handle the start equals
2733 	 * offset case because its the common case for inserting a
2734 	 * header.
2735 	 */
2736 	if (!space || (space == 1 && start != offset))
2737 		copy = msg->sg.data[i].length;
2738 
2739 	page = alloc_pages(__GFP_NOWARN | GFP_ATOMIC | __GFP_COMP,
2740 			   get_order(copy + len));
2741 	if (unlikely(!page))
2742 		return -ENOMEM;
2743 
2744 	if (copy) {
2745 		int front, back;
2746 
2747 		raw = page_address(page);
2748 
2749 		psge = sk_msg_elem(msg, i);
2750 		front = start - offset;
2751 		back = psge->length - front;
2752 		from = sg_virt(psge);
2753 
2754 		if (front)
2755 			memcpy(raw, from, front);
2756 
2757 		if (back) {
2758 			from += front;
2759 			to = raw + front + len;
2760 
2761 			memcpy(to, from, back);
2762 		}
2763 
2764 		put_page(sg_page(psge));
2765 	} else if (start - offset) {
2766 		psge = sk_msg_elem(msg, i);
2767 		rsge = sk_msg_elem_cpy(msg, i);
2768 
2769 		psge->length = start - offset;
2770 		rsge.length -= psge->length;
2771 		rsge.offset += start;
2772 
2773 		sk_msg_iter_var_next(i);
2774 		sg_unmark_end(psge);
2775 		sg_unmark_end(&rsge);
2776 		sk_msg_iter_next(msg, end);
2777 	}
2778 
2779 	/* Slot(s) to place newly allocated data */
2780 	new = i;
2781 
2782 	/* Shift one or two slots as needed */
2783 	if (!copy) {
2784 		sge = sk_msg_elem_cpy(msg, i);
2785 
2786 		sk_msg_iter_var_next(i);
2787 		sg_unmark_end(&sge);
2788 		sk_msg_iter_next(msg, end);
2789 
2790 		nsge = sk_msg_elem_cpy(msg, i);
2791 		if (rsge.length) {
2792 			sk_msg_iter_var_next(i);
2793 			nnsge = sk_msg_elem_cpy(msg, i);
2794 		}
2795 
2796 		while (i != msg->sg.end) {
2797 			msg->sg.data[i] = sge;
2798 			sge = nsge;
2799 			sk_msg_iter_var_next(i);
2800 			if (rsge.length) {
2801 				nsge = nnsge;
2802 				nnsge = sk_msg_elem_cpy(msg, i);
2803 			} else {
2804 				nsge = sk_msg_elem_cpy(msg, i);
2805 			}
2806 		}
2807 	}
2808 
2809 	/* Place newly allocated data buffer */
2810 	sk_mem_charge(msg->sk, len);
2811 	msg->sg.size += len;
2812 	__clear_bit(new, &msg->sg.copy);
2813 	sg_set_page(&msg->sg.data[new], page, len + copy, 0);
2814 	if (rsge.length) {
2815 		get_page(sg_page(&rsge));
2816 		sk_msg_iter_var_next(new);
2817 		msg->sg.data[new] = rsge;
2818 	}
2819 
2820 	sk_msg_compute_data_pointers(msg);
2821 	return 0;
2822 }
2823 
2824 static const struct bpf_func_proto bpf_msg_push_data_proto = {
2825 	.func		= bpf_msg_push_data,
2826 	.gpl_only	= false,
2827 	.ret_type	= RET_INTEGER,
2828 	.arg1_type	= ARG_PTR_TO_CTX,
2829 	.arg2_type	= ARG_ANYTHING,
2830 	.arg3_type	= ARG_ANYTHING,
2831 	.arg4_type	= ARG_ANYTHING,
2832 };
2833 
2834 static void sk_msg_shift_left(struct sk_msg *msg, int i)
2835 {
2836 	int prev;
2837 
2838 	do {
2839 		prev = i;
2840 		sk_msg_iter_var_next(i);
2841 		msg->sg.data[prev] = msg->sg.data[i];
2842 	} while (i != msg->sg.end);
2843 
2844 	sk_msg_iter_prev(msg, end);
2845 }
2846 
2847 static void sk_msg_shift_right(struct sk_msg *msg, int i)
2848 {
2849 	struct scatterlist tmp, sge;
2850 
2851 	sk_msg_iter_next(msg, end);
2852 	sge = sk_msg_elem_cpy(msg, i);
2853 	sk_msg_iter_var_next(i);
2854 	tmp = sk_msg_elem_cpy(msg, i);
2855 
2856 	while (i != msg->sg.end) {
2857 		msg->sg.data[i] = sge;
2858 		sk_msg_iter_var_next(i);
2859 		sge = tmp;
2860 		tmp = sk_msg_elem_cpy(msg, i);
2861 	}
2862 }
2863 
2864 BPF_CALL_4(bpf_msg_pop_data, struct sk_msg *, msg, u32, start,
2865 	   u32, len, u64, flags)
2866 {
2867 	u32 i = 0, l = 0, space, offset = 0;
2868 	u64 last = start + len;
2869 	int pop;
2870 
2871 	if (unlikely(flags))
2872 		return -EINVAL;
2873 
2874 	/* First find the starting scatterlist element */
2875 	i = msg->sg.start;
2876 	do {
2877 		offset += l;
2878 		l = sk_msg_elem(msg, i)->length;
2879 
2880 		if (start < offset + l)
2881 			break;
2882 		sk_msg_iter_var_next(i);
2883 	} while (i != msg->sg.end);
2884 
2885 	/* Bounds checks: start and pop must be inside message */
2886 	if (start >= offset + l || last >= msg->sg.size)
2887 		return -EINVAL;
2888 
2889 	space = MAX_MSG_FRAGS - sk_msg_elem_used(msg);
2890 
2891 	pop = len;
2892 	/* --------------| offset
2893 	 * -| start      |-------- len -------|
2894 	 *
2895 	 *  |----- a ----|-------- pop -------|----- b ----|
2896 	 *  |______________________________________________| length
2897 	 *
2898 	 *
2899 	 * a:   region at front of scatter element to save
2900 	 * b:   region at back of scatter element to save when length > A + pop
2901 	 * pop: region to pop from element, same as input 'pop' here will be
2902 	 *      decremented below per iteration.
2903 	 *
2904 	 * Two top-level cases to handle when start != offset, first B is non
2905 	 * zero and second B is zero corresponding to when a pop includes more
2906 	 * than one element.
2907 	 *
2908 	 * Then if B is non-zero AND there is no space allocate space and
2909 	 * compact A, B regions into page. If there is space shift ring to
2910 	 * the rigth free'ing the next element in ring to place B, leaving
2911 	 * A untouched except to reduce length.
2912 	 */
2913 	if (start != offset) {
2914 		struct scatterlist *nsge, *sge = sk_msg_elem(msg, i);
2915 		int a = start;
2916 		int b = sge->length - pop - a;
2917 
2918 		sk_msg_iter_var_next(i);
2919 
2920 		if (pop < sge->length - a) {
2921 			if (space) {
2922 				sge->length = a;
2923 				sk_msg_shift_right(msg, i);
2924 				nsge = sk_msg_elem(msg, i);
2925 				get_page(sg_page(sge));
2926 				sg_set_page(nsge,
2927 					    sg_page(sge),
2928 					    b, sge->offset + pop + a);
2929 			} else {
2930 				struct page *page, *orig;
2931 				u8 *to, *from;
2932 
2933 				page = alloc_pages(__GFP_NOWARN |
2934 						   __GFP_COMP   | GFP_ATOMIC,
2935 						   get_order(a + b));
2936 				if (unlikely(!page))
2937 					return -ENOMEM;
2938 
2939 				sge->length = a;
2940 				orig = sg_page(sge);
2941 				from = sg_virt(sge);
2942 				to = page_address(page);
2943 				memcpy(to, from, a);
2944 				memcpy(to + a, from + a + pop, b);
2945 				sg_set_page(sge, page, a + b, 0);
2946 				put_page(orig);
2947 			}
2948 			pop = 0;
2949 		} else if (pop >= sge->length - a) {
2950 			pop -= (sge->length - a);
2951 			sge->length = a;
2952 		}
2953 	}
2954 
2955 	/* From above the current layout _must_ be as follows,
2956 	 *
2957 	 * -| offset
2958 	 * -| start
2959 	 *
2960 	 *  |---- pop ---|---------------- b ------------|
2961 	 *  |____________________________________________| length
2962 	 *
2963 	 * Offset and start of the current msg elem are equal because in the
2964 	 * previous case we handled offset != start and either consumed the
2965 	 * entire element and advanced to the next element OR pop == 0.
2966 	 *
2967 	 * Two cases to handle here are first pop is less than the length
2968 	 * leaving some remainder b above. Simply adjust the element's layout
2969 	 * in this case. Or pop >= length of the element so that b = 0. In this
2970 	 * case advance to next element decrementing pop.
2971 	 */
2972 	while (pop) {
2973 		struct scatterlist *sge = sk_msg_elem(msg, i);
2974 
2975 		if (pop < sge->length) {
2976 			sge->length -= pop;
2977 			sge->offset += pop;
2978 			pop = 0;
2979 		} else {
2980 			pop -= sge->length;
2981 			sk_msg_shift_left(msg, i);
2982 		}
2983 		sk_msg_iter_var_next(i);
2984 	}
2985 
2986 	sk_mem_uncharge(msg->sk, len - pop);
2987 	msg->sg.size -= (len - pop);
2988 	sk_msg_compute_data_pointers(msg);
2989 	return 0;
2990 }
2991 
2992 static const struct bpf_func_proto bpf_msg_pop_data_proto = {
2993 	.func		= bpf_msg_pop_data,
2994 	.gpl_only	= false,
2995 	.ret_type	= RET_INTEGER,
2996 	.arg1_type	= ARG_PTR_TO_CTX,
2997 	.arg2_type	= ARG_ANYTHING,
2998 	.arg3_type	= ARG_ANYTHING,
2999 	.arg4_type	= ARG_ANYTHING,
3000 };
3001 
3002 #ifdef CONFIG_CGROUP_NET_CLASSID
3003 BPF_CALL_0(bpf_get_cgroup_classid_curr)
3004 {
3005 	return __task_get_classid(current);
3006 }
3007 
3008 static const struct bpf_func_proto bpf_get_cgroup_classid_curr_proto = {
3009 	.func		= bpf_get_cgroup_classid_curr,
3010 	.gpl_only	= false,
3011 	.ret_type	= RET_INTEGER,
3012 };
3013 
3014 BPF_CALL_1(bpf_skb_cgroup_classid, const struct sk_buff *, skb)
3015 {
3016 	struct sock *sk = skb_to_full_sk(skb);
3017 
3018 	if (!sk || !sk_fullsock(sk))
3019 		return 0;
3020 
3021 	return sock_cgroup_classid(&sk->sk_cgrp_data);
3022 }
3023 
3024 static const struct bpf_func_proto bpf_skb_cgroup_classid_proto = {
3025 	.func		= bpf_skb_cgroup_classid,
3026 	.gpl_only	= false,
3027 	.ret_type	= RET_INTEGER,
3028 	.arg1_type	= ARG_PTR_TO_CTX,
3029 };
3030 #endif
3031 
3032 BPF_CALL_1(bpf_get_cgroup_classid, const struct sk_buff *, skb)
3033 {
3034 	return task_get_classid(skb);
3035 }
3036 
3037 static const struct bpf_func_proto bpf_get_cgroup_classid_proto = {
3038 	.func           = bpf_get_cgroup_classid,
3039 	.gpl_only       = false,
3040 	.ret_type       = RET_INTEGER,
3041 	.arg1_type      = ARG_PTR_TO_CTX,
3042 };
3043 
3044 BPF_CALL_1(bpf_get_route_realm, const struct sk_buff *, skb)
3045 {
3046 	return dst_tclassid(skb);
3047 }
3048 
3049 static const struct bpf_func_proto bpf_get_route_realm_proto = {
3050 	.func           = bpf_get_route_realm,
3051 	.gpl_only       = false,
3052 	.ret_type       = RET_INTEGER,
3053 	.arg1_type      = ARG_PTR_TO_CTX,
3054 };
3055 
3056 BPF_CALL_1(bpf_get_hash_recalc, struct sk_buff *, skb)
3057 {
3058 	/* If skb_clear_hash() was called due to mangling, we can
3059 	 * trigger SW recalculation here. Later access to hash
3060 	 * can then use the inline skb->hash via context directly
3061 	 * instead of calling this helper again.
3062 	 */
3063 	return skb_get_hash(skb);
3064 }
3065 
3066 static const struct bpf_func_proto bpf_get_hash_recalc_proto = {
3067 	.func		= bpf_get_hash_recalc,
3068 	.gpl_only	= false,
3069 	.ret_type	= RET_INTEGER,
3070 	.arg1_type	= ARG_PTR_TO_CTX,
3071 };
3072 
3073 BPF_CALL_1(bpf_set_hash_invalid, struct sk_buff *, skb)
3074 {
3075 	/* After all direct packet write, this can be used once for
3076 	 * triggering a lazy recalc on next skb_get_hash() invocation.
3077 	 */
3078 	skb_clear_hash(skb);
3079 	return 0;
3080 }
3081 
3082 static const struct bpf_func_proto bpf_set_hash_invalid_proto = {
3083 	.func		= bpf_set_hash_invalid,
3084 	.gpl_only	= false,
3085 	.ret_type	= RET_INTEGER,
3086 	.arg1_type	= ARG_PTR_TO_CTX,
3087 };
3088 
3089 BPF_CALL_2(bpf_set_hash, struct sk_buff *, skb, u32, hash)
3090 {
3091 	/* Set user specified hash as L4(+), so that it gets returned
3092 	 * on skb_get_hash() call unless BPF prog later on triggers a
3093 	 * skb_clear_hash().
3094 	 */
3095 	__skb_set_sw_hash(skb, hash, true);
3096 	return 0;
3097 }
3098 
3099 static const struct bpf_func_proto bpf_set_hash_proto = {
3100 	.func		= bpf_set_hash,
3101 	.gpl_only	= false,
3102 	.ret_type	= RET_INTEGER,
3103 	.arg1_type	= ARG_PTR_TO_CTX,
3104 	.arg2_type	= ARG_ANYTHING,
3105 };
3106 
3107 BPF_CALL_3(bpf_skb_vlan_push, struct sk_buff *, skb, __be16, vlan_proto,
3108 	   u16, vlan_tci)
3109 {
3110 	int ret;
3111 
3112 	if (unlikely(vlan_proto != htons(ETH_P_8021Q) &&
3113 		     vlan_proto != htons(ETH_P_8021AD)))
3114 		vlan_proto = htons(ETH_P_8021Q);
3115 
3116 	bpf_push_mac_rcsum(skb);
3117 	ret = skb_vlan_push(skb, vlan_proto, vlan_tci);
3118 	bpf_pull_mac_rcsum(skb);
3119 
3120 	bpf_compute_data_pointers(skb);
3121 	return ret;
3122 }
3123 
3124 static const struct bpf_func_proto bpf_skb_vlan_push_proto = {
3125 	.func           = bpf_skb_vlan_push,
3126 	.gpl_only       = false,
3127 	.ret_type       = RET_INTEGER,
3128 	.arg1_type      = ARG_PTR_TO_CTX,
3129 	.arg2_type      = ARG_ANYTHING,
3130 	.arg3_type      = ARG_ANYTHING,
3131 };
3132 
3133 BPF_CALL_1(bpf_skb_vlan_pop, struct sk_buff *, skb)
3134 {
3135 	int ret;
3136 
3137 	bpf_push_mac_rcsum(skb);
3138 	ret = skb_vlan_pop(skb);
3139 	bpf_pull_mac_rcsum(skb);
3140 
3141 	bpf_compute_data_pointers(skb);
3142 	return ret;
3143 }
3144 
3145 static const struct bpf_func_proto bpf_skb_vlan_pop_proto = {
3146 	.func           = bpf_skb_vlan_pop,
3147 	.gpl_only       = false,
3148 	.ret_type       = RET_INTEGER,
3149 	.arg1_type      = ARG_PTR_TO_CTX,
3150 };
3151 
3152 static int bpf_skb_generic_push(struct sk_buff *skb, u32 off, u32 len)
3153 {
3154 	/* Caller already did skb_cow() with len as headroom,
3155 	 * so no need to do it here.
3156 	 */
3157 	skb_push(skb, len);
3158 	memmove(skb->data, skb->data + len, off);
3159 	memset(skb->data + off, 0, len);
3160 
3161 	/* No skb_postpush_rcsum(skb, skb->data + off, len)
3162 	 * needed here as it does not change the skb->csum
3163 	 * result for checksum complete when summing over
3164 	 * zeroed blocks.
3165 	 */
3166 	return 0;
3167 }
3168 
3169 static int bpf_skb_generic_pop(struct sk_buff *skb, u32 off, u32 len)
3170 {
3171 	/* skb_ensure_writable() is not needed here, as we're
3172 	 * already working on an uncloned skb.
3173 	 */
3174 	if (unlikely(!pskb_may_pull(skb, off + len)))
3175 		return -ENOMEM;
3176 
3177 	skb_postpull_rcsum(skb, skb->data + off, len);
3178 	memmove(skb->data + len, skb->data, off);
3179 	__skb_pull(skb, len);
3180 
3181 	return 0;
3182 }
3183 
3184 static int bpf_skb_net_hdr_push(struct sk_buff *skb, u32 off, u32 len)
3185 {
3186 	bool trans_same = skb->transport_header == skb->network_header;
3187 	int ret;
3188 
3189 	/* There's no need for __skb_push()/__skb_pull() pair to
3190 	 * get to the start of the mac header as we're guaranteed
3191 	 * to always start from here under eBPF.
3192 	 */
3193 	ret = bpf_skb_generic_push(skb, off, len);
3194 	if (likely(!ret)) {
3195 		skb->mac_header -= len;
3196 		skb->network_header -= len;
3197 		if (trans_same)
3198 			skb->transport_header = skb->network_header;
3199 	}
3200 
3201 	return ret;
3202 }
3203 
3204 static int bpf_skb_net_hdr_pop(struct sk_buff *skb, u32 off, u32 len)
3205 {
3206 	bool trans_same = skb->transport_header == skb->network_header;
3207 	int ret;
3208 
3209 	/* Same here, __skb_push()/__skb_pull() pair not needed. */
3210 	ret = bpf_skb_generic_pop(skb, off, len);
3211 	if (likely(!ret)) {
3212 		skb->mac_header += len;
3213 		skb->network_header += len;
3214 		if (trans_same)
3215 			skb->transport_header = skb->network_header;
3216 	}
3217 
3218 	return ret;
3219 }
3220 
3221 static int bpf_skb_proto_4_to_6(struct sk_buff *skb)
3222 {
3223 	const u32 len_diff = sizeof(struct ipv6hdr) - sizeof(struct iphdr);
3224 	u32 off = skb_mac_header_len(skb);
3225 	int ret;
3226 
3227 	ret = skb_cow(skb, len_diff);
3228 	if (unlikely(ret < 0))
3229 		return ret;
3230 
3231 	ret = bpf_skb_net_hdr_push(skb, off, len_diff);
3232 	if (unlikely(ret < 0))
3233 		return ret;
3234 
3235 	if (skb_is_gso(skb)) {
3236 		struct skb_shared_info *shinfo = skb_shinfo(skb);
3237 
3238 		/* SKB_GSO_TCPV4 needs to be changed into SKB_GSO_TCPV6. */
3239 		if (shinfo->gso_type & SKB_GSO_TCPV4) {
3240 			shinfo->gso_type &= ~SKB_GSO_TCPV4;
3241 			shinfo->gso_type |=  SKB_GSO_TCPV6;
3242 		}
3243 	}
3244 
3245 	skb->protocol = htons(ETH_P_IPV6);
3246 	skb_clear_hash(skb);
3247 
3248 	return 0;
3249 }
3250 
3251 static int bpf_skb_proto_6_to_4(struct sk_buff *skb)
3252 {
3253 	const u32 len_diff = sizeof(struct ipv6hdr) - sizeof(struct iphdr);
3254 	u32 off = skb_mac_header_len(skb);
3255 	int ret;
3256 
3257 	ret = skb_unclone(skb, GFP_ATOMIC);
3258 	if (unlikely(ret < 0))
3259 		return ret;
3260 
3261 	ret = bpf_skb_net_hdr_pop(skb, off, len_diff);
3262 	if (unlikely(ret < 0))
3263 		return ret;
3264 
3265 	if (skb_is_gso(skb)) {
3266 		struct skb_shared_info *shinfo = skb_shinfo(skb);
3267 
3268 		/* SKB_GSO_TCPV6 needs to be changed into SKB_GSO_TCPV4. */
3269 		if (shinfo->gso_type & SKB_GSO_TCPV6) {
3270 			shinfo->gso_type &= ~SKB_GSO_TCPV6;
3271 			shinfo->gso_type |=  SKB_GSO_TCPV4;
3272 		}
3273 	}
3274 
3275 	skb->protocol = htons(ETH_P_IP);
3276 	skb_clear_hash(skb);
3277 
3278 	return 0;
3279 }
3280 
3281 static int bpf_skb_proto_xlat(struct sk_buff *skb, __be16 to_proto)
3282 {
3283 	__be16 from_proto = skb->protocol;
3284 
3285 	if (from_proto == htons(ETH_P_IP) &&
3286 	      to_proto == htons(ETH_P_IPV6))
3287 		return bpf_skb_proto_4_to_6(skb);
3288 
3289 	if (from_proto == htons(ETH_P_IPV6) &&
3290 	      to_proto == htons(ETH_P_IP))
3291 		return bpf_skb_proto_6_to_4(skb);
3292 
3293 	return -ENOTSUPP;
3294 }
3295 
3296 BPF_CALL_3(bpf_skb_change_proto, struct sk_buff *, skb, __be16, proto,
3297 	   u64, flags)
3298 {
3299 	int ret;
3300 
3301 	if (unlikely(flags))
3302 		return -EINVAL;
3303 
3304 	/* General idea is that this helper does the basic groundwork
3305 	 * needed for changing the protocol, and eBPF program fills the
3306 	 * rest through bpf_skb_store_bytes(), bpf_lX_csum_replace()
3307 	 * and other helpers, rather than passing a raw buffer here.
3308 	 *
3309 	 * The rationale is to keep this minimal and without a need to
3310 	 * deal with raw packet data. F.e. even if we would pass buffers
3311 	 * here, the program still needs to call the bpf_lX_csum_replace()
3312 	 * helpers anyway. Plus, this way we keep also separation of
3313 	 * concerns, since f.e. bpf_skb_store_bytes() should only take
3314 	 * care of stores.
3315 	 *
3316 	 * Currently, additional options and extension header space are
3317 	 * not supported, but flags register is reserved so we can adapt
3318 	 * that. For offloads, we mark packet as dodgy, so that headers
3319 	 * need to be verified first.
3320 	 */
3321 	ret = bpf_skb_proto_xlat(skb, proto);
3322 	bpf_compute_data_pointers(skb);
3323 	return ret;
3324 }
3325 
3326 static const struct bpf_func_proto bpf_skb_change_proto_proto = {
3327 	.func		= bpf_skb_change_proto,
3328 	.gpl_only	= false,
3329 	.ret_type	= RET_INTEGER,
3330 	.arg1_type	= ARG_PTR_TO_CTX,
3331 	.arg2_type	= ARG_ANYTHING,
3332 	.arg3_type	= ARG_ANYTHING,
3333 };
3334 
3335 BPF_CALL_2(bpf_skb_change_type, struct sk_buff *, skb, u32, pkt_type)
3336 {
3337 	/* We only allow a restricted subset to be changed for now. */
3338 	if (unlikely(!skb_pkt_type_ok(skb->pkt_type) ||
3339 		     !skb_pkt_type_ok(pkt_type)))
3340 		return -EINVAL;
3341 
3342 	skb->pkt_type = pkt_type;
3343 	return 0;
3344 }
3345 
3346 static const struct bpf_func_proto bpf_skb_change_type_proto = {
3347 	.func		= bpf_skb_change_type,
3348 	.gpl_only	= false,
3349 	.ret_type	= RET_INTEGER,
3350 	.arg1_type	= ARG_PTR_TO_CTX,
3351 	.arg2_type	= ARG_ANYTHING,
3352 };
3353 
3354 static u32 bpf_skb_net_base_len(const struct sk_buff *skb)
3355 {
3356 	switch (skb->protocol) {
3357 	case htons(ETH_P_IP):
3358 		return sizeof(struct iphdr);
3359 	case htons(ETH_P_IPV6):
3360 		return sizeof(struct ipv6hdr);
3361 	default:
3362 		return ~0U;
3363 	}
3364 }
3365 
3366 #define BPF_F_ADJ_ROOM_ENCAP_L3_MASK	(BPF_F_ADJ_ROOM_ENCAP_L3_IPV4 | \
3367 					 BPF_F_ADJ_ROOM_ENCAP_L3_IPV6)
3368 
3369 #define BPF_F_ADJ_ROOM_MASK		(BPF_F_ADJ_ROOM_FIXED_GSO | \
3370 					 BPF_F_ADJ_ROOM_ENCAP_L3_MASK | \
3371 					 BPF_F_ADJ_ROOM_ENCAP_L4_GRE | \
3372 					 BPF_F_ADJ_ROOM_ENCAP_L4_UDP | \
3373 					 BPF_F_ADJ_ROOM_ENCAP_L2_ETH | \
3374 					 BPF_F_ADJ_ROOM_ENCAP_L2( \
3375 					  BPF_ADJ_ROOM_ENCAP_L2_MASK))
3376 
3377 static int bpf_skb_net_grow(struct sk_buff *skb, u32 off, u32 len_diff,
3378 			    u64 flags)
3379 {
3380 	u8 inner_mac_len = flags >> BPF_ADJ_ROOM_ENCAP_L2_SHIFT;
3381 	bool encap = flags & BPF_F_ADJ_ROOM_ENCAP_L3_MASK;
3382 	u16 mac_len = 0, inner_net = 0, inner_trans = 0;
3383 	unsigned int gso_type = SKB_GSO_DODGY;
3384 	int ret;
3385 
3386 	if (skb_is_gso(skb) && !skb_is_gso_tcp(skb)) {
3387 		/* udp gso_size delineates datagrams, only allow if fixed */
3388 		if (!(skb_shinfo(skb)->gso_type & SKB_GSO_UDP_L4) ||
3389 		    !(flags & BPF_F_ADJ_ROOM_FIXED_GSO))
3390 			return -ENOTSUPP;
3391 	}
3392 
3393 	ret = skb_cow_head(skb, len_diff);
3394 	if (unlikely(ret < 0))
3395 		return ret;
3396 
3397 	if (encap) {
3398 		if (skb->protocol != htons(ETH_P_IP) &&
3399 		    skb->protocol != htons(ETH_P_IPV6))
3400 			return -ENOTSUPP;
3401 
3402 		if (flags & BPF_F_ADJ_ROOM_ENCAP_L3_IPV4 &&
3403 		    flags & BPF_F_ADJ_ROOM_ENCAP_L3_IPV6)
3404 			return -EINVAL;
3405 
3406 		if (flags & BPF_F_ADJ_ROOM_ENCAP_L4_GRE &&
3407 		    flags & BPF_F_ADJ_ROOM_ENCAP_L4_UDP)
3408 			return -EINVAL;
3409 
3410 		if (flags & BPF_F_ADJ_ROOM_ENCAP_L2_ETH &&
3411 		    inner_mac_len < ETH_HLEN)
3412 			return -EINVAL;
3413 
3414 		if (skb->encapsulation)
3415 			return -EALREADY;
3416 
3417 		mac_len = skb->network_header - skb->mac_header;
3418 		inner_net = skb->network_header;
3419 		if (inner_mac_len > len_diff)
3420 			return -EINVAL;
3421 		inner_trans = skb->transport_header;
3422 	}
3423 
3424 	ret = bpf_skb_net_hdr_push(skb, off, len_diff);
3425 	if (unlikely(ret < 0))
3426 		return ret;
3427 
3428 	if (encap) {
3429 		skb->inner_mac_header = inner_net - inner_mac_len;
3430 		skb->inner_network_header = inner_net;
3431 		skb->inner_transport_header = inner_trans;
3432 
3433 		if (flags & BPF_F_ADJ_ROOM_ENCAP_L2_ETH)
3434 			skb_set_inner_protocol(skb, htons(ETH_P_TEB));
3435 		else
3436 			skb_set_inner_protocol(skb, skb->protocol);
3437 
3438 		skb->encapsulation = 1;
3439 		skb_set_network_header(skb, mac_len);
3440 
3441 		if (flags & BPF_F_ADJ_ROOM_ENCAP_L4_UDP)
3442 			gso_type |= SKB_GSO_UDP_TUNNEL;
3443 		else if (flags & BPF_F_ADJ_ROOM_ENCAP_L4_GRE)
3444 			gso_type |= SKB_GSO_GRE;
3445 		else if (flags & BPF_F_ADJ_ROOM_ENCAP_L3_IPV6)
3446 			gso_type |= SKB_GSO_IPXIP6;
3447 		else if (flags & BPF_F_ADJ_ROOM_ENCAP_L3_IPV4)
3448 			gso_type |= SKB_GSO_IPXIP4;
3449 
3450 		if (flags & BPF_F_ADJ_ROOM_ENCAP_L4_GRE ||
3451 		    flags & BPF_F_ADJ_ROOM_ENCAP_L4_UDP) {
3452 			int nh_len = flags & BPF_F_ADJ_ROOM_ENCAP_L3_IPV6 ?
3453 					sizeof(struct ipv6hdr) :
3454 					sizeof(struct iphdr);
3455 
3456 			skb_set_transport_header(skb, mac_len + nh_len);
3457 		}
3458 
3459 		/* Match skb->protocol to new outer l3 protocol */
3460 		if (skb->protocol == htons(ETH_P_IP) &&
3461 		    flags & BPF_F_ADJ_ROOM_ENCAP_L3_IPV6)
3462 			skb->protocol = htons(ETH_P_IPV6);
3463 		else if (skb->protocol == htons(ETH_P_IPV6) &&
3464 			 flags & BPF_F_ADJ_ROOM_ENCAP_L3_IPV4)
3465 			skb->protocol = htons(ETH_P_IP);
3466 	}
3467 
3468 	if (skb_is_gso(skb)) {
3469 		struct skb_shared_info *shinfo = skb_shinfo(skb);
3470 
3471 		/* Due to header grow, MSS needs to be downgraded. */
3472 		if (!(flags & BPF_F_ADJ_ROOM_FIXED_GSO))
3473 			skb_decrease_gso_size(shinfo, len_diff);
3474 
3475 		/* Header must be checked, and gso_segs recomputed. */
3476 		shinfo->gso_type |= gso_type;
3477 		shinfo->gso_segs = 0;
3478 	}
3479 
3480 	return 0;
3481 }
3482 
3483 static int bpf_skb_net_shrink(struct sk_buff *skb, u32 off, u32 len_diff,
3484 			      u64 flags)
3485 {
3486 	int ret;
3487 
3488 	if (unlikely(flags & ~(BPF_F_ADJ_ROOM_FIXED_GSO |
3489 			       BPF_F_ADJ_ROOM_NO_CSUM_RESET)))
3490 		return -EINVAL;
3491 
3492 	if (skb_is_gso(skb) && !skb_is_gso_tcp(skb)) {
3493 		/* udp gso_size delineates datagrams, only allow if fixed */
3494 		if (!(skb_shinfo(skb)->gso_type & SKB_GSO_UDP_L4) ||
3495 		    !(flags & BPF_F_ADJ_ROOM_FIXED_GSO))
3496 			return -ENOTSUPP;
3497 	}
3498 
3499 	ret = skb_unclone(skb, GFP_ATOMIC);
3500 	if (unlikely(ret < 0))
3501 		return ret;
3502 
3503 	ret = bpf_skb_net_hdr_pop(skb, off, len_diff);
3504 	if (unlikely(ret < 0))
3505 		return ret;
3506 
3507 	if (skb_is_gso(skb)) {
3508 		struct skb_shared_info *shinfo = skb_shinfo(skb);
3509 
3510 		/* Due to header shrink, MSS can be upgraded. */
3511 		if (!(flags & BPF_F_ADJ_ROOM_FIXED_GSO))
3512 			skb_increase_gso_size(shinfo, len_diff);
3513 
3514 		/* Header must be checked, and gso_segs recomputed. */
3515 		shinfo->gso_type |= SKB_GSO_DODGY;
3516 		shinfo->gso_segs = 0;
3517 	}
3518 
3519 	return 0;
3520 }
3521 
3522 #define BPF_SKB_MAX_LEN SKB_MAX_ALLOC
3523 
3524 BPF_CALL_4(sk_skb_adjust_room, struct sk_buff *, skb, s32, len_diff,
3525 	   u32, mode, u64, flags)
3526 {
3527 	u32 len_diff_abs = abs(len_diff);
3528 	bool shrink = len_diff < 0;
3529 	int ret = 0;
3530 
3531 	if (unlikely(flags || mode))
3532 		return -EINVAL;
3533 	if (unlikely(len_diff_abs > 0xfffU))
3534 		return -EFAULT;
3535 
3536 	if (!shrink) {
3537 		ret = skb_cow(skb, len_diff);
3538 		if (unlikely(ret < 0))
3539 			return ret;
3540 		__skb_push(skb, len_diff_abs);
3541 		memset(skb->data, 0, len_diff_abs);
3542 	} else {
3543 		if (unlikely(!pskb_may_pull(skb, len_diff_abs)))
3544 			return -ENOMEM;
3545 		__skb_pull(skb, len_diff_abs);
3546 	}
3547 	if (tls_sw_has_ctx_rx(skb->sk)) {
3548 		struct strp_msg *rxm = strp_msg(skb);
3549 
3550 		rxm->full_len += len_diff;
3551 	}
3552 	return ret;
3553 }
3554 
3555 static const struct bpf_func_proto sk_skb_adjust_room_proto = {
3556 	.func		= sk_skb_adjust_room,
3557 	.gpl_only	= false,
3558 	.ret_type	= RET_INTEGER,
3559 	.arg1_type	= ARG_PTR_TO_CTX,
3560 	.arg2_type	= ARG_ANYTHING,
3561 	.arg3_type	= ARG_ANYTHING,
3562 	.arg4_type	= ARG_ANYTHING,
3563 };
3564 
3565 BPF_CALL_4(bpf_skb_adjust_room, struct sk_buff *, skb, s32, len_diff,
3566 	   u32, mode, u64, flags)
3567 {
3568 	u32 len_cur, len_diff_abs = abs(len_diff);
3569 	u32 len_min = bpf_skb_net_base_len(skb);
3570 	u32 len_max = BPF_SKB_MAX_LEN;
3571 	__be16 proto = skb->protocol;
3572 	bool shrink = len_diff < 0;
3573 	u32 off;
3574 	int ret;
3575 
3576 	if (unlikely(flags & ~(BPF_F_ADJ_ROOM_MASK |
3577 			       BPF_F_ADJ_ROOM_NO_CSUM_RESET)))
3578 		return -EINVAL;
3579 	if (unlikely(len_diff_abs > 0xfffU))
3580 		return -EFAULT;
3581 	if (unlikely(proto != htons(ETH_P_IP) &&
3582 		     proto != htons(ETH_P_IPV6)))
3583 		return -ENOTSUPP;
3584 
3585 	off = skb_mac_header_len(skb);
3586 	switch (mode) {
3587 	case BPF_ADJ_ROOM_NET:
3588 		off += bpf_skb_net_base_len(skb);
3589 		break;
3590 	case BPF_ADJ_ROOM_MAC:
3591 		break;
3592 	default:
3593 		return -ENOTSUPP;
3594 	}
3595 
3596 	len_cur = skb->len - skb_network_offset(skb);
3597 	if ((shrink && (len_diff_abs >= len_cur ||
3598 			len_cur - len_diff_abs < len_min)) ||
3599 	    (!shrink && (skb->len + len_diff_abs > len_max &&
3600 			 !skb_is_gso(skb))))
3601 		return -ENOTSUPP;
3602 
3603 	ret = shrink ? bpf_skb_net_shrink(skb, off, len_diff_abs, flags) :
3604 		       bpf_skb_net_grow(skb, off, len_diff_abs, flags);
3605 	if (!ret && !(flags & BPF_F_ADJ_ROOM_NO_CSUM_RESET))
3606 		__skb_reset_checksum_unnecessary(skb);
3607 
3608 	bpf_compute_data_pointers(skb);
3609 	return ret;
3610 }
3611 
3612 static const struct bpf_func_proto bpf_skb_adjust_room_proto = {
3613 	.func		= bpf_skb_adjust_room,
3614 	.gpl_only	= false,
3615 	.ret_type	= RET_INTEGER,
3616 	.arg1_type	= ARG_PTR_TO_CTX,
3617 	.arg2_type	= ARG_ANYTHING,
3618 	.arg3_type	= ARG_ANYTHING,
3619 	.arg4_type	= ARG_ANYTHING,
3620 };
3621 
3622 static u32 __bpf_skb_min_len(const struct sk_buff *skb)
3623 {
3624 	u32 min_len = skb_network_offset(skb);
3625 
3626 	if (skb_transport_header_was_set(skb))
3627 		min_len = skb_transport_offset(skb);
3628 	if (skb->ip_summed == CHECKSUM_PARTIAL)
3629 		min_len = skb_checksum_start_offset(skb) +
3630 			  skb->csum_offset + sizeof(__sum16);
3631 	return min_len;
3632 }
3633 
3634 static int bpf_skb_grow_rcsum(struct sk_buff *skb, unsigned int new_len)
3635 {
3636 	unsigned int old_len = skb->len;
3637 	int ret;
3638 
3639 	ret = __skb_grow_rcsum(skb, new_len);
3640 	if (!ret)
3641 		memset(skb->data + old_len, 0, new_len - old_len);
3642 	return ret;
3643 }
3644 
3645 static int bpf_skb_trim_rcsum(struct sk_buff *skb, unsigned int new_len)
3646 {
3647 	return __skb_trim_rcsum(skb, new_len);
3648 }
3649 
3650 static inline int __bpf_skb_change_tail(struct sk_buff *skb, u32 new_len,
3651 					u64 flags)
3652 {
3653 	u32 max_len = BPF_SKB_MAX_LEN;
3654 	u32 min_len = __bpf_skb_min_len(skb);
3655 	int ret;
3656 
3657 	if (unlikely(flags || new_len > max_len || new_len < min_len))
3658 		return -EINVAL;
3659 	if (skb->encapsulation)
3660 		return -ENOTSUPP;
3661 
3662 	/* The basic idea of this helper is that it's performing the
3663 	 * needed work to either grow or trim an skb, and eBPF program
3664 	 * rewrites the rest via helpers like bpf_skb_store_bytes(),
3665 	 * bpf_lX_csum_replace() and others rather than passing a raw
3666 	 * buffer here. This one is a slow path helper and intended
3667 	 * for replies with control messages.
3668 	 *
3669 	 * Like in bpf_skb_change_proto(), we want to keep this rather
3670 	 * minimal and without protocol specifics so that we are able
3671 	 * to separate concerns as in bpf_skb_store_bytes() should only
3672 	 * be the one responsible for writing buffers.
3673 	 *
3674 	 * It's really expected to be a slow path operation here for
3675 	 * control message replies, so we're implicitly linearizing,
3676 	 * uncloning and drop offloads from the skb by this.
3677 	 */
3678 	ret = __bpf_try_make_writable(skb, skb->len);
3679 	if (!ret) {
3680 		if (new_len > skb->len)
3681 			ret = bpf_skb_grow_rcsum(skb, new_len);
3682 		else if (new_len < skb->len)
3683 			ret = bpf_skb_trim_rcsum(skb, new_len);
3684 		if (!ret && skb_is_gso(skb))
3685 			skb_gso_reset(skb);
3686 	}
3687 	return ret;
3688 }
3689 
3690 BPF_CALL_3(bpf_skb_change_tail, struct sk_buff *, skb, u32, new_len,
3691 	   u64, flags)
3692 {
3693 	int ret = __bpf_skb_change_tail(skb, new_len, flags);
3694 
3695 	bpf_compute_data_pointers(skb);
3696 	return ret;
3697 }
3698 
3699 static const struct bpf_func_proto bpf_skb_change_tail_proto = {
3700 	.func		= bpf_skb_change_tail,
3701 	.gpl_only	= false,
3702 	.ret_type	= RET_INTEGER,
3703 	.arg1_type	= ARG_PTR_TO_CTX,
3704 	.arg2_type	= ARG_ANYTHING,
3705 	.arg3_type	= ARG_ANYTHING,
3706 };
3707 
3708 BPF_CALL_3(sk_skb_change_tail, struct sk_buff *, skb, u32, new_len,
3709 	   u64, flags)
3710 {
3711 	return __bpf_skb_change_tail(skb, new_len, flags);
3712 }
3713 
3714 static const struct bpf_func_proto sk_skb_change_tail_proto = {
3715 	.func		= sk_skb_change_tail,
3716 	.gpl_only	= false,
3717 	.ret_type	= RET_INTEGER,
3718 	.arg1_type	= ARG_PTR_TO_CTX,
3719 	.arg2_type	= ARG_ANYTHING,
3720 	.arg3_type	= ARG_ANYTHING,
3721 };
3722 
3723 static inline int __bpf_skb_change_head(struct sk_buff *skb, u32 head_room,
3724 					u64 flags)
3725 {
3726 	u32 max_len = BPF_SKB_MAX_LEN;
3727 	u32 new_len = skb->len + head_room;
3728 	int ret;
3729 
3730 	if (unlikely(flags || (!skb_is_gso(skb) && new_len > max_len) ||
3731 		     new_len < skb->len))
3732 		return -EINVAL;
3733 
3734 	ret = skb_cow(skb, head_room);
3735 	if (likely(!ret)) {
3736 		/* Idea for this helper is that we currently only
3737 		 * allow to expand on mac header. This means that
3738 		 * skb->protocol network header, etc, stay as is.
3739 		 * Compared to bpf_skb_change_tail(), we're more
3740 		 * flexible due to not needing to linearize or
3741 		 * reset GSO. Intention for this helper is to be
3742 		 * used by an L3 skb that needs to push mac header
3743 		 * for redirection into L2 device.
3744 		 */
3745 		__skb_push(skb, head_room);
3746 		memset(skb->data, 0, head_room);
3747 		skb_reset_mac_header(skb);
3748 		skb_reset_mac_len(skb);
3749 	}
3750 
3751 	return ret;
3752 }
3753 
3754 BPF_CALL_3(bpf_skb_change_head, struct sk_buff *, skb, u32, head_room,
3755 	   u64, flags)
3756 {
3757 	int ret = __bpf_skb_change_head(skb, head_room, flags);
3758 
3759 	bpf_compute_data_pointers(skb);
3760 	return ret;
3761 }
3762 
3763 static const struct bpf_func_proto bpf_skb_change_head_proto = {
3764 	.func		= bpf_skb_change_head,
3765 	.gpl_only	= false,
3766 	.ret_type	= RET_INTEGER,
3767 	.arg1_type	= ARG_PTR_TO_CTX,
3768 	.arg2_type	= ARG_ANYTHING,
3769 	.arg3_type	= ARG_ANYTHING,
3770 };
3771 
3772 BPF_CALL_3(sk_skb_change_head, struct sk_buff *, skb, u32, head_room,
3773 	   u64, flags)
3774 {
3775 	return __bpf_skb_change_head(skb, head_room, flags);
3776 }
3777 
3778 static const struct bpf_func_proto sk_skb_change_head_proto = {
3779 	.func		= sk_skb_change_head,
3780 	.gpl_only	= false,
3781 	.ret_type	= RET_INTEGER,
3782 	.arg1_type	= ARG_PTR_TO_CTX,
3783 	.arg2_type	= ARG_ANYTHING,
3784 	.arg3_type	= ARG_ANYTHING,
3785 };
3786 static unsigned long xdp_get_metalen(const struct xdp_buff *xdp)
3787 {
3788 	return xdp_data_meta_unsupported(xdp) ? 0 :
3789 	       xdp->data - xdp->data_meta;
3790 }
3791 
3792 BPF_CALL_2(bpf_xdp_adjust_head, struct xdp_buff *, xdp, int, offset)
3793 {
3794 	void *xdp_frame_end = xdp->data_hard_start + sizeof(struct xdp_frame);
3795 	unsigned long metalen = xdp_get_metalen(xdp);
3796 	void *data_start = xdp_frame_end + metalen;
3797 	void *data = xdp->data + offset;
3798 
3799 	if (unlikely(data < data_start ||
3800 		     data > xdp->data_end - ETH_HLEN))
3801 		return -EINVAL;
3802 
3803 	if (metalen)
3804 		memmove(xdp->data_meta + offset,
3805 			xdp->data_meta, metalen);
3806 	xdp->data_meta += offset;
3807 	xdp->data = data;
3808 
3809 	return 0;
3810 }
3811 
3812 static const struct bpf_func_proto bpf_xdp_adjust_head_proto = {
3813 	.func		= bpf_xdp_adjust_head,
3814 	.gpl_only	= false,
3815 	.ret_type	= RET_INTEGER,
3816 	.arg1_type	= ARG_PTR_TO_CTX,
3817 	.arg2_type	= ARG_ANYTHING,
3818 };
3819 
3820 BPF_CALL_2(bpf_xdp_adjust_tail, struct xdp_buff *, xdp, int, offset)
3821 {
3822 	void *data_hard_end = xdp_data_hard_end(xdp); /* use xdp->frame_sz */
3823 	void *data_end = xdp->data_end + offset;
3824 
3825 	/* Notice that xdp_data_hard_end have reserved some tailroom */
3826 	if (unlikely(data_end > data_hard_end))
3827 		return -EINVAL;
3828 
3829 	/* ALL drivers MUST init xdp->frame_sz, chicken check below */
3830 	if (unlikely(xdp->frame_sz > PAGE_SIZE)) {
3831 		WARN_ONCE(1, "Too BIG xdp->frame_sz = %d\n", xdp->frame_sz);
3832 		return -EINVAL;
3833 	}
3834 
3835 	if (unlikely(data_end < xdp->data + ETH_HLEN))
3836 		return -EINVAL;
3837 
3838 	/* Clear memory area on grow, can contain uninit kernel memory */
3839 	if (offset > 0)
3840 		memset(xdp->data_end, 0, offset);
3841 
3842 	xdp->data_end = data_end;
3843 
3844 	return 0;
3845 }
3846 
3847 static const struct bpf_func_proto bpf_xdp_adjust_tail_proto = {
3848 	.func		= bpf_xdp_adjust_tail,
3849 	.gpl_only	= false,
3850 	.ret_type	= RET_INTEGER,
3851 	.arg1_type	= ARG_PTR_TO_CTX,
3852 	.arg2_type	= ARG_ANYTHING,
3853 };
3854 
3855 BPF_CALL_2(bpf_xdp_adjust_meta, struct xdp_buff *, xdp, int, offset)
3856 {
3857 	void *xdp_frame_end = xdp->data_hard_start + sizeof(struct xdp_frame);
3858 	void *meta = xdp->data_meta + offset;
3859 	unsigned long metalen = xdp->data - meta;
3860 
3861 	if (xdp_data_meta_unsupported(xdp))
3862 		return -ENOTSUPP;
3863 	if (unlikely(meta < xdp_frame_end ||
3864 		     meta > xdp->data))
3865 		return -EINVAL;
3866 	if (unlikely(xdp_metalen_invalid(metalen)))
3867 		return -EACCES;
3868 
3869 	xdp->data_meta = meta;
3870 
3871 	return 0;
3872 }
3873 
3874 static const struct bpf_func_proto bpf_xdp_adjust_meta_proto = {
3875 	.func		= bpf_xdp_adjust_meta,
3876 	.gpl_only	= false,
3877 	.ret_type	= RET_INTEGER,
3878 	.arg1_type	= ARG_PTR_TO_CTX,
3879 	.arg2_type	= ARG_ANYTHING,
3880 };
3881 
3882 /* XDP_REDIRECT works by a three-step process, implemented in the functions
3883  * below:
3884  *
3885  * 1. The bpf_redirect() and bpf_redirect_map() helpers will lookup the target
3886  *    of the redirect and store it (along with some other metadata) in a per-CPU
3887  *    struct bpf_redirect_info.
3888  *
3889  * 2. When the program returns the XDP_REDIRECT return code, the driver will
3890  *    call xdp_do_redirect() which will use the information in struct
3891  *    bpf_redirect_info to actually enqueue the frame into a map type-specific
3892  *    bulk queue structure.
3893  *
3894  * 3. Before exiting its NAPI poll loop, the driver will call xdp_do_flush(),
3895  *    which will flush all the different bulk queues, thus completing the
3896  *    redirect.
3897  *
3898  * Pointers to the map entries will be kept around for this whole sequence of
3899  * steps, protected by RCU. However, there is no top-level rcu_read_lock() in
3900  * the core code; instead, the RCU protection relies on everything happening
3901  * inside a single NAPI poll sequence, which means it's between a pair of calls
3902  * to local_bh_disable()/local_bh_enable().
3903  *
3904  * The map entries are marked as __rcu and the map code makes sure to
3905  * dereference those pointers with rcu_dereference_check() in a way that works
3906  * for both sections that to hold an rcu_read_lock() and sections that are
3907  * called from NAPI without a separate rcu_read_lock(). The code below does not
3908  * use RCU annotations, but relies on those in the map code.
3909  */
3910 void xdp_do_flush(void)
3911 {
3912 	__dev_flush();
3913 	__cpu_map_flush();
3914 	__xsk_map_flush();
3915 }
3916 EXPORT_SYMBOL_GPL(xdp_do_flush);
3917 
3918 void bpf_clear_redirect_map(struct bpf_map *map)
3919 {
3920 	struct bpf_redirect_info *ri;
3921 	int cpu;
3922 
3923 	for_each_possible_cpu(cpu) {
3924 		ri = per_cpu_ptr(&bpf_redirect_info, cpu);
3925 		/* Avoid polluting remote cacheline due to writes if
3926 		 * not needed. Once we pass this test, we need the
3927 		 * cmpxchg() to make sure it hasn't been changed in
3928 		 * the meantime by remote CPU.
3929 		 */
3930 		if (unlikely(READ_ONCE(ri->map) == map))
3931 			cmpxchg(&ri->map, map, NULL);
3932 	}
3933 }
3934 
3935 DEFINE_STATIC_KEY_FALSE(bpf_master_redirect_enabled_key);
3936 EXPORT_SYMBOL_GPL(bpf_master_redirect_enabled_key);
3937 
3938 u32 xdp_master_redirect(struct xdp_buff *xdp)
3939 {
3940 	struct net_device *master, *slave;
3941 	struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info);
3942 
3943 	master = netdev_master_upper_dev_get_rcu(xdp->rxq->dev);
3944 	slave = master->netdev_ops->ndo_xdp_get_xmit_slave(master, xdp);
3945 	if (slave && slave != xdp->rxq->dev) {
3946 		/* The target device is different from the receiving device, so
3947 		 * redirect it to the new device.
3948 		 * Using XDP_REDIRECT gets the correct behaviour from XDP enabled
3949 		 * drivers to unmap the packet from their rx ring.
3950 		 */
3951 		ri->tgt_index = slave->ifindex;
3952 		ri->map_id = INT_MAX;
3953 		ri->map_type = BPF_MAP_TYPE_UNSPEC;
3954 		return XDP_REDIRECT;
3955 	}
3956 	return XDP_TX;
3957 }
3958 EXPORT_SYMBOL_GPL(xdp_master_redirect);
3959 
3960 static inline int __xdp_do_redirect_xsk(struct bpf_redirect_info *ri,
3961 					struct net_device *dev,
3962 					struct xdp_buff *xdp,
3963 					struct bpf_prog *xdp_prog)
3964 {
3965 	enum bpf_map_type map_type = ri->map_type;
3966 	void *fwd = ri->tgt_value;
3967 	u32 map_id = ri->map_id;
3968 	int err;
3969 
3970 	ri->map_id = 0; /* Valid map id idr range: [1,INT_MAX[ */
3971 	ri->map_type = BPF_MAP_TYPE_UNSPEC;
3972 
3973 	err = __xsk_map_redirect(fwd, xdp);
3974 	if (unlikely(err))
3975 		goto err;
3976 
3977 	_trace_xdp_redirect_map(dev, xdp_prog, fwd, map_type, map_id, ri->tgt_index);
3978 	return 0;
3979 err:
3980 	_trace_xdp_redirect_map_err(dev, xdp_prog, fwd, map_type, map_id, ri->tgt_index, err);
3981 	return err;
3982 }
3983 
3984 static __always_inline int __xdp_do_redirect_frame(struct bpf_redirect_info *ri,
3985 						   struct net_device *dev,
3986 						   struct xdp_frame *xdpf,
3987 						   struct bpf_prog *xdp_prog)
3988 {
3989 	enum bpf_map_type map_type = ri->map_type;
3990 	void *fwd = ri->tgt_value;
3991 	u32 map_id = ri->map_id;
3992 	struct bpf_map *map;
3993 	int err;
3994 
3995 	ri->map_id = 0; /* Valid map id idr range: [1,INT_MAX[ */
3996 	ri->map_type = BPF_MAP_TYPE_UNSPEC;
3997 
3998 	if (unlikely(!xdpf)) {
3999 		err = -EOVERFLOW;
4000 		goto err;
4001 	}
4002 
4003 	switch (map_type) {
4004 	case BPF_MAP_TYPE_DEVMAP:
4005 		fallthrough;
4006 	case BPF_MAP_TYPE_DEVMAP_HASH:
4007 		map = READ_ONCE(ri->map);
4008 		if (unlikely(map)) {
4009 			WRITE_ONCE(ri->map, NULL);
4010 			err = dev_map_enqueue_multi(xdpf, dev, map,
4011 						    ri->flags & BPF_F_EXCLUDE_INGRESS);
4012 		} else {
4013 			err = dev_map_enqueue(fwd, xdpf, dev);
4014 		}
4015 		break;
4016 	case BPF_MAP_TYPE_CPUMAP:
4017 		err = cpu_map_enqueue(fwd, xdpf, dev);
4018 		break;
4019 	case BPF_MAP_TYPE_UNSPEC:
4020 		if (map_id == INT_MAX) {
4021 			fwd = dev_get_by_index_rcu(dev_net(dev), ri->tgt_index);
4022 			if (unlikely(!fwd)) {
4023 				err = -EINVAL;
4024 				break;
4025 			}
4026 			err = dev_xdp_enqueue(fwd, xdpf, dev);
4027 			break;
4028 		}
4029 		fallthrough;
4030 	default:
4031 		err = -EBADRQC;
4032 	}
4033 
4034 	if (unlikely(err))
4035 		goto err;
4036 
4037 	_trace_xdp_redirect_map(dev, xdp_prog, fwd, map_type, map_id, ri->tgt_index);
4038 	return 0;
4039 err:
4040 	_trace_xdp_redirect_map_err(dev, xdp_prog, fwd, map_type, map_id, ri->tgt_index, err);
4041 	return err;
4042 }
4043 
4044 int xdp_do_redirect(struct net_device *dev, struct xdp_buff *xdp,
4045 		    struct bpf_prog *xdp_prog)
4046 {
4047 	struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info);
4048 	enum bpf_map_type map_type = ri->map_type;
4049 
4050 	if (map_type == BPF_MAP_TYPE_XSKMAP)
4051 		return __xdp_do_redirect_xsk(ri, dev, xdp, xdp_prog);
4052 
4053 	return __xdp_do_redirect_frame(ri, dev, xdp_convert_buff_to_frame(xdp),
4054 				       xdp_prog);
4055 }
4056 EXPORT_SYMBOL_GPL(xdp_do_redirect);
4057 
4058 int xdp_do_redirect_frame(struct net_device *dev, struct xdp_buff *xdp,
4059 			  struct xdp_frame *xdpf, struct bpf_prog *xdp_prog)
4060 {
4061 	struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info);
4062 	enum bpf_map_type map_type = ri->map_type;
4063 
4064 	if (map_type == BPF_MAP_TYPE_XSKMAP)
4065 		return __xdp_do_redirect_xsk(ri, dev, xdp, xdp_prog);
4066 
4067 	return __xdp_do_redirect_frame(ri, dev, xdpf, xdp_prog);
4068 }
4069 EXPORT_SYMBOL_GPL(xdp_do_redirect_frame);
4070 
4071 static int xdp_do_generic_redirect_map(struct net_device *dev,
4072 				       struct sk_buff *skb,
4073 				       struct xdp_buff *xdp,
4074 				       struct bpf_prog *xdp_prog,
4075 				       void *fwd,
4076 				       enum bpf_map_type map_type, u32 map_id)
4077 {
4078 	struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info);
4079 	struct bpf_map *map;
4080 	int err;
4081 
4082 	switch (map_type) {
4083 	case BPF_MAP_TYPE_DEVMAP:
4084 		fallthrough;
4085 	case BPF_MAP_TYPE_DEVMAP_HASH:
4086 		map = READ_ONCE(ri->map);
4087 		if (unlikely(map)) {
4088 			WRITE_ONCE(ri->map, NULL);
4089 			err = dev_map_redirect_multi(dev, skb, xdp_prog, map,
4090 						     ri->flags & BPF_F_EXCLUDE_INGRESS);
4091 		} else {
4092 			err = dev_map_generic_redirect(fwd, skb, xdp_prog);
4093 		}
4094 		if (unlikely(err))
4095 			goto err;
4096 		break;
4097 	case BPF_MAP_TYPE_XSKMAP:
4098 		err = xsk_generic_rcv(fwd, xdp);
4099 		if (err)
4100 			goto err;
4101 		consume_skb(skb);
4102 		break;
4103 	case BPF_MAP_TYPE_CPUMAP:
4104 		err = cpu_map_generic_redirect(fwd, skb);
4105 		if (unlikely(err))
4106 			goto err;
4107 		break;
4108 	default:
4109 		err = -EBADRQC;
4110 		goto err;
4111 	}
4112 
4113 	_trace_xdp_redirect_map(dev, xdp_prog, fwd, map_type, map_id, ri->tgt_index);
4114 	return 0;
4115 err:
4116 	_trace_xdp_redirect_map_err(dev, xdp_prog, fwd, map_type, map_id, ri->tgt_index, err);
4117 	return err;
4118 }
4119 
4120 int xdp_do_generic_redirect(struct net_device *dev, struct sk_buff *skb,
4121 			    struct xdp_buff *xdp, struct bpf_prog *xdp_prog)
4122 {
4123 	struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info);
4124 	enum bpf_map_type map_type = ri->map_type;
4125 	void *fwd = ri->tgt_value;
4126 	u32 map_id = ri->map_id;
4127 	int err;
4128 
4129 	ri->map_id = 0; /* Valid map id idr range: [1,INT_MAX[ */
4130 	ri->map_type = BPF_MAP_TYPE_UNSPEC;
4131 
4132 	if (map_type == BPF_MAP_TYPE_UNSPEC && map_id == INT_MAX) {
4133 		fwd = dev_get_by_index_rcu(dev_net(dev), ri->tgt_index);
4134 		if (unlikely(!fwd)) {
4135 			err = -EINVAL;
4136 			goto err;
4137 		}
4138 
4139 		err = xdp_ok_fwd_dev(fwd, skb->len);
4140 		if (unlikely(err))
4141 			goto err;
4142 
4143 		skb->dev = fwd;
4144 		_trace_xdp_redirect(dev, xdp_prog, ri->tgt_index);
4145 		generic_xdp_tx(skb, xdp_prog);
4146 		return 0;
4147 	}
4148 
4149 	return xdp_do_generic_redirect_map(dev, skb, xdp, xdp_prog, fwd, map_type, map_id);
4150 err:
4151 	_trace_xdp_redirect_err(dev, xdp_prog, ri->tgt_index, err);
4152 	return err;
4153 }
4154 
4155 BPF_CALL_2(bpf_xdp_redirect, u32, ifindex, u64, flags)
4156 {
4157 	struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info);
4158 
4159 	if (unlikely(flags))
4160 		return XDP_ABORTED;
4161 
4162 	/* NB! Map type UNSPEC and map_id == INT_MAX (never generated
4163 	 * by map_idr) is used for ifindex based XDP redirect.
4164 	 */
4165 	ri->tgt_index = ifindex;
4166 	ri->map_id = INT_MAX;
4167 	ri->map_type = BPF_MAP_TYPE_UNSPEC;
4168 
4169 	return XDP_REDIRECT;
4170 }
4171 
4172 static const struct bpf_func_proto bpf_xdp_redirect_proto = {
4173 	.func           = bpf_xdp_redirect,
4174 	.gpl_only       = false,
4175 	.ret_type       = RET_INTEGER,
4176 	.arg1_type      = ARG_ANYTHING,
4177 	.arg2_type      = ARG_ANYTHING,
4178 };
4179 
4180 BPF_CALL_3(bpf_xdp_redirect_map, struct bpf_map *, map, u32, ifindex,
4181 	   u64, flags)
4182 {
4183 	return map->ops->map_redirect(map, ifindex, flags);
4184 }
4185 
4186 static const struct bpf_func_proto bpf_xdp_redirect_map_proto = {
4187 	.func           = bpf_xdp_redirect_map,
4188 	.gpl_only       = false,
4189 	.ret_type       = RET_INTEGER,
4190 	.arg1_type      = ARG_CONST_MAP_PTR,
4191 	.arg2_type      = ARG_ANYTHING,
4192 	.arg3_type      = ARG_ANYTHING,
4193 };
4194 
4195 static unsigned long bpf_skb_copy(void *dst_buff, const void *skb,
4196 				  unsigned long off, unsigned long len)
4197 {
4198 	void *ptr = skb_header_pointer(skb, off, len, dst_buff);
4199 
4200 	if (unlikely(!ptr))
4201 		return len;
4202 	if (ptr != dst_buff)
4203 		memcpy(dst_buff, ptr, len);
4204 
4205 	return 0;
4206 }
4207 
4208 BPF_CALL_5(bpf_skb_event_output, struct sk_buff *, skb, struct bpf_map *, map,
4209 	   u64, flags, void *, meta, u64, meta_size)
4210 {
4211 	u64 skb_size = (flags & BPF_F_CTXLEN_MASK) >> 32;
4212 
4213 	if (unlikely(flags & ~(BPF_F_CTXLEN_MASK | BPF_F_INDEX_MASK)))
4214 		return -EINVAL;
4215 	if (unlikely(!skb || skb_size > skb->len))
4216 		return -EFAULT;
4217 
4218 	return bpf_event_output(map, flags, meta, meta_size, skb, skb_size,
4219 				bpf_skb_copy);
4220 }
4221 
4222 static const struct bpf_func_proto bpf_skb_event_output_proto = {
4223 	.func		= bpf_skb_event_output,
4224 	.gpl_only	= true,
4225 	.ret_type	= RET_INTEGER,
4226 	.arg1_type	= ARG_PTR_TO_CTX,
4227 	.arg2_type	= ARG_CONST_MAP_PTR,
4228 	.arg3_type	= ARG_ANYTHING,
4229 	.arg4_type	= ARG_PTR_TO_MEM | MEM_RDONLY,
4230 	.arg5_type	= ARG_CONST_SIZE_OR_ZERO,
4231 };
4232 
4233 BTF_ID_LIST_SINGLE(bpf_skb_output_btf_ids, struct, sk_buff)
4234 
4235 const struct bpf_func_proto bpf_skb_output_proto = {
4236 	.func		= bpf_skb_event_output,
4237 	.gpl_only	= true,
4238 	.ret_type	= RET_INTEGER,
4239 	.arg1_type	= ARG_PTR_TO_BTF_ID,
4240 	.arg1_btf_id	= &bpf_skb_output_btf_ids[0],
4241 	.arg2_type	= ARG_CONST_MAP_PTR,
4242 	.arg3_type	= ARG_ANYTHING,
4243 	.arg4_type	= ARG_PTR_TO_MEM | MEM_RDONLY,
4244 	.arg5_type	= ARG_CONST_SIZE_OR_ZERO,
4245 };
4246 
4247 static unsigned short bpf_tunnel_key_af(u64 flags)
4248 {
4249 	return flags & BPF_F_TUNINFO_IPV6 ? AF_INET6 : AF_INET;
4250 }
4251 
4252 BPF_CALL_4(bpf_skb_get_tunnel_key, struct sk_buff *, skb, struct bpf_tunnel_key *, to,
4253 	   u32, size, u64, flags)
4254 {
4255 	const struct ip_tunnel_info *info = skb_tunnel_info(skb);
4256 	u8 compat[sizeof(struct bpf_tunnel_key)];
4257 	void *to_orig = to;
4258 	int err;
4259 
4260 	if (unlikely(!info || (flags & ~(BPF_F_TUNINFO_IPV6)))) {
4261 		err = -EINVAL;
4262 		goto err_clear;
4263 	}
4264 	if (ip_tunnel_info_af(info) != bpf_tunnel_key_af(flags)) {
4265 		err = -EPROTO;
4266 		goto err_clear;
4267 	}
4268 	if (unlikely(size != sizeof(struct bpf_tunnel_key))) {
4269 		err = -EINVAL;
4270 		switch (size) {
4271 		case offsetof(struct bpf_tunnel_key, tunnel_label):
4272 		case offsetof(struct bpf_tunnel_key, tunnel_ext):
4273 			goto set_compat;
4274 		case offsetof(struct bpf_tunnel_key, remote_ipv6[1]):
4275 			/* Fixup deprecated structure layouts here, so we have
4276 			 * a common path later on.
4277 			 */
4278 			if (ip_tunnel_info_af(info) != AF_INET)
4279 				goto err_clear;
4280 set_compat:
4281 			to = (struct bpf_tunnel_key *)compat;
4282 			break;
4283 		default:
4284 			goto err_clear;
4285 		}
4286 	}
4287 
4288 	to->tunnel_id = be64_to_cpu(info->key.tun_id);
4289 	to->tunnel_tos = info->key.tos;
4290 	to->tunnel_ttl = info->key.ttl;
4291 	to->tunnel_ext = 0;
4292 
4293 	if (flags & BPF_F_TUNINFO_IPV6) {
4294 		memcpy(to->remote_ipv6, &info->key.u.ipv6.src,
4295 		       sizeof(to->remote_ipv6));
4296 		to->tunnel_label = be32_to_cpu(info->key.label);
4297 	} else {
4298 		to->remote_ipv4 = be32_to_cpu(info->key.u.ipv4.src);
4299 		memset(&to->remote_ipv6[1], 0, sizeof(__u32) * 3);
4300 		to->tunnel_label = 0;
4301 	}
4302 
4303 	if (unlikely(size != sizeof(struct bpf_tunnel_key)))
4304 		memcpy(to_orig, to, size);
4305 
4306 	return 0;
4307 err_clear:
4308 	memset(to_orig, 0, size);
4309 	return err;
4310 }
4311 
4312 static const struct bpf_func_proto bpf_skb_get_tunnel_key_proto = {
4313 	.func		= bpf_skb_get_tunnel_key,
4314 	.gpl_only	= false,
4315 	.ret_type	= RET_INTEGER,
4316 	.arg1_type	= ARG_PTR_TO_CTX,
4317 	.arg2_type	= ARG_PTR_TO_UNINIT_MEM,
4318 	.arg3_type	= ARG_CONST_SIZE,
4319 	.arg4_type	= ARG_ANYTHING,
4320 };
4321 
4322 BPF_CALL_3(bpf_skb_get_tunnel_opt, struct sk_buff *, skb, u8 *, to, u32, size)
4323 {
4324 	const struct ip_tunnel_info *info = skb_tunnel_info(skb);
4325 	int err;
4326 
4327 	if (unlikely(!info ||
4328 		     !(info->key.tun_flags & TUNNEL_OPTIONS_PRESENT))) {
4329 		err = -ENOENT;
4330 		goto err_clear;
4331 	}
4332 	if (unlikely(size < info->options_len)) {
4333 		err = -ENOMEM;
4334 		goto err_clear;
4335 	}
4336 
4337 	ip_tunnel_info_opts_get(to, info);
4338 	if (size > info->options_len)
4339 		memset(to + info->options_len, 0, size - info->options_len);
4340 
4341 	return info->options_len;
4342 err_clear:
4343 	memset(to, 0, size);
4344 	return err;
4345 }
4346 
4347 static const struct bpf_func_proto bpf_skb_get_tunnel_opt_proto = {
4348 	.func		= bpf_skb_get_tunnel_opt,
4349 	.gpl_only	= false,
4350 	.ret_type	= RET_INTEGER,
4351 	.arg1_type	= ARG_PTR_TO_CTX,
4352 	.arg2_type	= ARG_PTR_TO_UNINIT_MEM,
4353 	.arg3_type	= ARG_CONST_SIZE,
4354 };
4355 
4356 static struct metadata_dst __percpu *md_dst;
4357 
4358 BPF_CALL_4(bpf_skb_set_tunnel_key, struct sk_buff *, skb,
4359 	   const struct bpf_tunnel_key *, from, u32, size, u64, flags)
4360 {
4361 	struct metadata_dst *md = this_cpu_ptr(md_dst);
4362 	u8 compat[sizeof(struct bpf_tunnel_key)];
4363 	struct ip_tunnel_info *info;
4364 
4365 	if (unlikely(flags & ~(BPF_F_TUNINFO_IPV6 | BPF_F_ZERO_CSUM_TX |
4366 			       BPF_F_DONT_FRAGMENT | BPF_F_SEQ_NUMBER)))
4367 		return -EINVAL;
4368 	if (unlikely(size != sizeof(struct bpf_tunnel_key))) {
4369 		switch (size) {
4370 		case offsetof(struct bpf_tunnel_key, tunnel_label):
4371 		case offsetof(struct bpf_tunnel_key, tunnel_ext):
4372 		case offsetof(struct bpf_tunnel_key, remote_ipv6[1]):
4373 			/* Fixup deprecated structure layouts here, so we have
4374 			 * a common path later on.
4375 			 */
4376 			memcpy(compat, from, size);
4377 			memset(compat + size, 0, sizeof(compat) - size);
4378 			from = (const struct bpf_tunnel_key *) compat;
4379 			break;
4380 		default:
4381 			return -EINVAL;
4382 		}
4383 	}
4384 	if (unlikely((!(flags & BPF_F_TUNINFO_IPV6) && from->tunnel_label) ||
4385 		     from->tunnel_ext))
4386 		return -EINVAL;
4387 
4388 	skb_dst_drop(skb);
4389 	dst_hold((struct dst_entry *) md);
4390 	skb_dst_set(skb, (struct dst_entry *) md);
4391 
4392 	info = &md->u.tun_info;
4393 	memset(info, 0, sizeof(*info));
4394 	info->mode = IP_TUNNEL_INFO_TX;
4395 
4396 	info->key.tun_flags = TUNNEL_KEY | TUNNEL_CSUM | TUNNEL_NOCACHE;
4397 	if (flags & BPF_F_DONT_FRAGMENT)
4398 		info->key.tun_flags |= TUNNEL_DONT_FRAGMENT;
4399 	if (flags & BPF_F_ZERO_CSUM_TX)
4400 		info->key.tun_flags &= ~TUNNEL_CSUM;
4401 	if (flags & BPF_F_SEQ_NUMBER)
4402 		info->key.tun_flags |= TUNNEL_SEQ;
4403 
4404 	info->key.tun_id = cpu_to_be64(from->tunnel_id);
4405 	info->key.tos = from->tunnel_tos;
4406 	info->key.ttl = from->tunnel_ttl;
4407 
4408 	if (flags & BPF_F_TUNINFO_IPV6) {
4409 		info->mode |= IP_TUNNEL_INFO_IPV6;
4410 		memcpy(&info->key.u.ipv6.dst, from->remote_ipv6,
4411 		       sizeof(from->remote_ipv6));
4412 		info->key.label = cpu_to_be32(from->tunnel_label) &
4413 				  IPV6_FLOWLABEL_MASK;
4414 	} else {
4415 		info->key.u.ipv4.dst = cpu_to_be32(from->remote_ipv4);
4416 	}
4417 
4418 	return 0;
4419 }
4420 
4421 static const struct bpf_func_proto bpf_skb_set_tunnel_key_proto = {
4422 	.func		= bpf_skb_set_tunnel_key,
4423 	.gpl_only	= false,
4424 	.ret_type	= RET_INTEGER,
4425 	.arg1_type	= ARG_PTR_TO_CTX,
4426 	.arg2_type	= ARG_PTR_TO_MEM | MEM_RDONLY,
4427 	.arg3_type	= ARG_CONST_SIZE,
4428 	.arg4_type	= ARG_ANYTHING,
4429 };
4430 
4431 BPF_CALL_3(bpf_skb_set_tunnel_opt, struct sk_buff *, skb,
4432 	   const u8 *, from, u32, size)
4433 {
4434 	struct ip_tunnel_info *info = skb_tunnel_info(skb);
4435 	const struct metadata_dst *md = this_cpu_ptr(md_dst);
4436 
4437 	if (unlikely(info != &md->u.tun_info || (size & (sizeof(u32) - 1))))
4438 		return -EINVAL;
4439 	if (unlikely(size > IP_TUNNEL_OPTS_MAX))
4440 		return -ENOMEM;
4441 
4442 	ip_tunnel_info_opts_set(info, from, size, TUNNEL_OPTIONS_PRESENT);
4443 
4444 	return 0;
4445 }
4446 
4447 static const struct bpf_func_proto bpf_skb_set_tunnel_opt_proto = {
4448 	.func		= bpf_skb_set_tunnel_opt,
4449 	.gpl_only	= false,
4450 	.ret_type	= RET_INTEGER,
4451 	.arg1_type	= ARG_PTR_TO_CTX,
4452 	.arg2_type	= ARG_PTR_TO_MEM | MEM_RDONLY,
4453 	.arg3_type	= ARG_CONST_SIZE,
4454 };
4455 
4456 static const struct bpf_func_proto *
4457 bpf_get_skb_set_tunnel_proto(enum bpf_func_id which)
4458 {
4459 	if (!md_dst) {
4460 		struct metadata_dst __percpu *tmp;
4461 
4462 		tmp = metadata_dst_alloc_percpu(IP_TUNNEL_OPTS_MAX,
4463 						METADATA_IP_TUNNEL,
4464 						GFP_KERNEL);
4465 		if (!tmp)
4466 			return NULL;
4467 		if (cmpxchg(&md_dst, NULL, tmp))
4468 			metadata_dst_free_percpu(tmp);
4469 	}
4470 
4471 	switch (which) {
4472 	case BPF_FUNC_skb_set_tunnel_key:
4473 		return &bpf_skb_set_tunnel_key_proto;
4474 	case BPF_FUNC_skb_set_tunnel_opt:
4475 		return &bpf_skb_set_tunnel_opt_proto;
4476 	default:
4477 		return NULL;
4478 	}
4479 }
4480 
4481 BPF_CALL_3(bpf_skb_under_cgroup, struct sk_buff *, skb, struct bpf_map *, map,
4482 	   u32, idx)
4483 {
4484 	struct bpf_array *array = container_of(map, struct bpf_array, map);
4485 	struct cgroup *cgrp;
4486 	struct sock *sk;
4487 
4488 	sk = skb_to_full_sk(skb);
4489 	if (!sk || !sk_fullsock(sk))
4490 		return -ENOENT;
4491 	if (unlikely(idx >= array->map.max_entries))
4492 		return -E2BIG;
4493 
4494 	cgrp = READ_ONCE(array->ptrs[idx]);
4495 	if (unlikely(!cgrp))
4496 		return -EAGAIN;
4497 
4498 	return sk_under_cgroup_hierarchy(sk, cgrp);
4499 }
4500 
4501 static const struct bpf_func_proto bpf_skb_under_cgroup_proto = {
4502 	.func		= bpf_skb_under_cgroup,
4503 	.gpl_only	= false,
4504 	.ret_type	= RET_INTEGER,
4505 	.arg1_type	= ARG_PTR_TO_CTX,
4506 	.arg2_type	= ARG_CONST_MAP_PTR,
4507 	.arg3_type	= ARG_ANYTHING,
4508 };
4509 
4510 #ifdef CONFIG_SOCK_CGROUP_DATA
4511 static inline u64 __bpf_sk_cgroup_id(struct sock *sk)
4512 {
4513 	struct cgroup *cgrp;
4514 
4515 	sk = sk_to_full_sk(sk);
4516 	if (!sk || !sk_fullsock(sk))
4517 		return 0;
4518 
4519 	cgrp = sock_cgroup_ptr(&sk->sk_cgrp_data);
4520 	return cgroup_id(cgrp);
4521 }
4522 
4523 BPF_CALL_1(bpf_skb_cgroup_id, const struct sk_buff *, skb)
4524 {
4525 	return __bpf_sk_cgroup_id(skb->sk);
4526 }
4527 
4528 static const struct bpf_func_proto bpf_skb_cgroup_id_proto = {
4529 	.func           = bpf_skb_cgroup_id,
4530 	.gpl_only       = false,
4531 	.ret_type       = RET_INTEGER,
4532 	.arg1_type      = ARG_PTR_TO_CTX,
4533 };
4534 
4535 static inline u64 __bpf_sk_ancestor_cgroup_id(struct sock *sk,
4536 					      int ancestor_level)
4537 {
4538 	struct cgroup *ancestor;
4539 	struct cgroup *cgrp;
4540 
4541 	sk = sk_to_full_sk(sk);
4542 	if (!sk || !sk_fullsock(sk))
4543 		return 0;
4544 
4545 	cgrp = sock_cgroup_ptr(&sk->sk_cgrp_data);
4546 	ancestor = cgroup_ancestor(cgrp, ancestor_level);
4547 	if (!ancestor)
4548 		return 0;
4549 
4550 	return cgroup_id(ancestor);
4551 }
4552 
4553 BPF_CALL_2(bpf_skb_ancestor_cgroup_id, const struct sk_buff *, skb, int,
4554 	   ancestor_level)
4555 {
4556 	return __bpf_sk_ancestor_cgroup_id(skb->sk, ancestor_level);
4557 }
4558 
4559 static const struct bpf_func_proto bpf_skb_ancestor_cgroup_id_proto = {
4560 	.func           = bpf_skb_ancestor_cgroup_id,
4561 	.gpl_only       = false,
4562 	.ret_type       = RET_INTEGER,
4563 	.arg1_type      = ARG_PTR_TO_CTX,
4564 	.arg2_type      = ARG_ANYTHING,
4565 };
4566 
4567 BPF_CALL_1(bpf_sk_cgroup_id, struct sock *, sk)
4568 {
4569 	return __bpf_sk_cgroup_id(sk);
4570 }
4571 
4572 static const struct bpf_func_proto bpf_sk_cgroup_id_proto = {
4573 	.func           = bpf_sk_cgroup_id,
4574 	.gpl_only       = false,
4575 	.ret_type       = RET_INTEGER,
4576 	.arg1_type      = ARG_PTR_TO_BTF_ID_SOCK_COMMON,
4577 };
4578 
4579 BPF_CALL_2(bpf_sk_ancestor_cgroup_id, struct sock *, sk, int, ancestor_level)
4580 {
4581 	return __bpf_sk_ancestor_cgroup_id(sk, ancestor_level);
4582 }
4583 
4584 static const struct bpf_func_proto bpf_sk_ancestor_cgroup_id_proto = {
4585 	.func           = bpf_sk_ancestor_cgroup_id,
4586 	.gpl_only       = false,
4587 	.ret_type       = RET_INTEGER,
4588 	.arg1_type      = ARG_PTR_TO_BTF_ID_SOCK_COMMON,
4589 	.arg2_type      = ARG_ANYTHING,
4590 };
4591 #endif
4592 
4593 static unsigned long bpf_xdp_copy(void *dst_buff, const void *src_buff,
4594 				  unsigned long off, unsigned long len)
4595 {
4596 	memcpy(dst_buff, src_buff + off, len);
4597 	return 0;
4598 }
4599 
4600 BPF_CALL_5(bpf_xdp_event_output, struct xdp_buff *, xdp, struct bpf_map *, map,
4601 	   u64, flags, void *, meta, u64, meta_size)
4602 {
4603 	u64 xdp_size = (flags & BPF_F_CTXLEN_MASK) >> 32;
4604 
4605 	if (unlikely(flags & ~(BPF_F_CTXLEN_MASK | BPF_F_INDEX_MASK)))
4606 		return -EINVAL;
4607 	if (unlikely(!xdp ||
4608 		     xdp_size > (unsigned long)(xdp->data_end - xdp->data)))
4609 		return -EFAULT;
4610 
4611 	return bpf_event_output(map, flags, meta, meta_size, xdp->data,
4612 				xdp_size, bpf_xdp_copy);
4613 }
4614 
4615 static const struct bpf_func_proto bpf_xdp_event_output_proto = {
4616 	.func		= bpf_xdp_event_output,
4617 	.gpl_only	= true,
4618 	.ret_type	= RET_INTEGER,
4619 	.arg1_type	= ARG_PTR_TO_CTX,
4620 	.arg2_type	= ARG_CONST_MAP_PTR,
4621 	.arg3_type	= ARG_ANYTHING,
4622 	.arg4_type	= ARG_PTR_TO_MEM | MEM_RDONLY,
4623 	.arg5_type	= ARG_CONST_SIZE_OR_ZERO,
4624 };
4625 
4626 BTF_ID_LIST_SINGLE(bpf_xdp_output_btf_ids, struct, xdp_buff)
4627 
4628 const struct bpf_func_proto bpf_xdp_output_proto = {
4629 	.func		= bpf_xdp_event_output,
4630 	.gpl_only	= true,
4631 	.ret_type	= RET_INTEGER,
4632 	.arg1_type	= ARG_PTR_TO_BTF_ID,
4633 	.arg1_btf_id	= &bpf_xdp_output_btf_ids[0],
4634 	.arg2_type	= ARG_CONST_MAP_PTR,
4635 	.arg3_type	= ARG_ANYTHING,
4636 	.arg4_type	= ARG_PTR_TO_MEM | MEM_RDONLY,
4637 	.arg5_type	= ARG_CONST_SIZE_OR_ZERO,
4638 };
4639 
4640 BPF_CALL_1(bpf_get_socket_cookie, struct sk_buff *, skb)
4641 {
4642 	return skb->sk ? __sock_gen_cookie(skb->sk) : 0;
4643 }
4644 
4645 static const struct bpf_func_proto bpf_get_socket_cookie_proto = {
4646 	.func           = bpf_get_socket_cookie,
4647 	.gpl_only       = false,
4648 	.ret_type       = RET_INTEGER,
4649 	.arg1_type      = ARG_PTR_TO_CTX,
4650 };
4651 
4652 BPF_CALL_1(bpf_get_socket_cookie_sock_addr, struct bpf_sock_addr_kern *, ctx)
4653 {
4654 	return __sock_gen_cookie(ctx->sk);
4655 }
4656 
4657 static const struct bpf_func_proto bpf_get_socket_cookie_sock_addr_proto = {
4658 	.func		= bpf_get_socket_cookie_sock_addr,
4659 	.gpl_only	= false,
4660 	.ret_type	= RET_INTEGER,
4661 	.arg1_type	= ARG_PTR_TO_CTX,
4662 };
4663 
4664 BPF_CALL_1(bpf_get_socket_cookie_sock, struct sock *, ctx)
4665 {
4666 	return __sock_gen_cookie(ctx);
4667 }
4668 
4669 static const struct bpf_func_proto bpf_get_socket_cookie_sock_proto = {
4670 	.func		= bpf_get_socket_cookie_sock,
4671 	.gpl_only	= false,
4672 	.ret_type	= RET_INTEGER,
4673 	.arg1_type	= ARG_PTR_TO_CTX,
4674 };
4675 
4676 BPF_CALL_1(bpf_get_socket_ptr_cookie, struct sock *, sk)
4677 {
4678 	return sk ? sock_gen_cookie(sk) : 0;
4679 }
4680 
4681 const struct bpf_func_proto bpf_get_socket_ptr_cookie_proto = {
4682 	.func		= bpf_get_socket_ptr_cookie,
4683 	.gpl_only	= false,
4684 	.ret_type	= RET_INTEGER,
4685 	.arg1_type	= ARG_PTR_TO_BTF_ID_SOCK_COMMON,
4686 };
4687 
4688 BPF_CALL_1(bpf_get_socket_cookie_sock_ops, struct bpf_sock_ops_kern *, ctx)
4689 {
4690 	return __sock_gen_cookie(ctx->sk);
4691 }
4692 
4693 static const struct bpf_func_proto bpf_get_socket_cookie_sock_ops_proto = {
4694 	.func		= bpf_get_socket_cookie_sock_ops,
4695 	.gpl_only	= false,
4696 	.ret_type	= RET_INTEGER,
4697 	.arg1_type	= ARG_PTR_TO_CTX,
4698 };
4699 
4700 static u64 __bpf_get_netns_cookie(struct sock *sk)
4701 {
4702 	const struct net *net = sk ? sock_net(sk) : &init_net;
4703 
4704 	return net->net_cookie;
4705 }
4706 
4707 BPF_CALL_1(bpf_get_netns_cookie_sock, struct sock *, ctx)
4708 {
4709 	return __bpf_get_netns_cookie(ctx);
4710 }
4711 
4712 static const struct bpf_func_proto bpf_get_netns_cookie_sock_proto = {
4713 	.func		= bpf_get_netns_cookie_sock,
4714 	.gpl_only	= false,
4715 	.ret_type	= RET_INTEGER,
4716 	.arg1_type	= ARG_PTR_TO_CTX_OR_NULL,
4717 };
4718 
4719 BPF_CALL_1(bpf_get_netns_cookie_sock_addr, struct bpf_sock_addr_kern *, ctx)
4720 {
4721 	return __bpf_get_netns_cookie(ctx ? ctx->sk : NULL);
4722 }
4723 
4724 static const struct bpf_func_proto bpf_get_netns_cookie_sock_addr_proto = {
4725 	.func		= bpf_get_netns_cookie_sock_addr,
4726 	.gpl_only	= false,
4727 	.ret_type	= RET_INTEGER,
4728 	.arg1_type	= ARG_PTR_TO_CTX_OR_NULL,
4729 };
4730 
4731 BPF_CALL_1(bpf_get_netns_cookie_sock_ops, struct bpf_sock_ops_kern *, ctx)
4732 {
4733 	return __bpf_get_netns_cookie(ctx ? ctx->sk : NULL);
4734 }
4735 
4736 static const struct bpf_func_proto bpf_get_netns_cookie_sock_ops_proto = {
4737 	.func		= bpf_get_netns_cookie_sock_ops,
4738 	.gpl_only	= false,
4739 	.ret_type	= RET_INTEGER,
4740 	.arg1_type	= ARG_PTR_TO_CTX_OR_NULL,
4741 };
4742 
4743 BPF_CALL_1(bpf_get_netns_cookie_sk_msg, struct sk_msg *, ctx)
4744 {
4745 	return __bpf_get_netns_cookie(ctx ? ctx->sk : NULL);
4746 }
4747 
4748 static const struct bpf_func_proto bpf_get_netns_cookie_sk_msg_proto = {
4749 	.func		= bpf_get_netns_cookie_sk_msg,
4750 	.gpl_only	= false,
4751 	.ret_type	= RET_INTEGER,
4752 	.arg1_type	= ARG_PTR_TO_CTX_OR_NULL,
4753 };
4754 
4755 BPF_CALL_1(bpf_get_socket_uid, struct sk_buff *, skb)
4756 {
4757 	struct sock *sk = sk_to_full_sk(skb->sk);
4758 	kuid_t kuid;
4759 
4760 	if (!sk || !sk_fullsock(sk))
4761 		return overflowuid;
4762 	kuid = sock_net_uid(sock_net(sk), sk);
4763 	return from_kuid_munged(sock_net(sk)->user_ns, kuid);
4764 }
4765 
4766 static const struct bpf_func_proto bpf_get_socket_uid_proto = {
4767 	.func           = bpf_get_socket_uid,
4768 	.gpl_only       = false,
4769 	.ret_type       = RET_INTEGER,
4770 	.arg1_type      = ARG_PTR_TO_CTX,
4771 };
4772 
4773 static int _bpf_setsockopt(struct sock *sk, int level, int optname,
4774 			   char *optval, int optlen)
4775 {
4776 	char devname[IFNAMSIZ];
4777 	int val, valbool;
4778 	struct net *net;
4779 	int ifindex;
4780 	int ret = 0;
4781 
4782 	if (!sk_fullsock(sk))
4783 		return -EINVAL;
4784 
4785 	sock_owned_by_me(sk);
4786 
4787 	if (level == SOL_SOCKET) {
4788 		if (optlen != sizeof(int) && optname != SO_BINDTODEVICE)
4789 			return -EINVAL;
4790 		val = *((int *)optval);
4791 		valbool = val ? 1 : 0;
4792 
4793 		/* Only some socketops are supported */
4794 		switch (optname) {
4795 		case SO_RCVBUF:
4796 			val = min_t(u32, val, sysctl_rmem_max);
4797 			val = min_t(int, val, INT_MAX / 2);
4798 			sk->sk_userlocks |= SOCK_RCVBUF_LOCK;
4799 			WRITE_ONCE(sk->sk_rcvbuf,
4800 				   max_t(int, val * 2, SOCK_MIN_RCVBUF));
4801 			break;
4802 		case SO_SNDBUF:
4803 			val = min_t(u32, val, sysctl_wmem_max);
4804 			val = min_t(int, val, INT_MAX / 2);
4805 			sk->sk_userlocks |= SOCK_SNDBUF_LOCK;
4806 			WRITE_ONCE(sk->sk_sndbuf,
4807 				   max_t(int, val * 2, SOCK_MIN_SNDBUF));
4808 			break;
4809 		case SO_MAX_PACING_RATE: /* 32bit version */
4810 			if (val != ~0U)
4811 				cmpxchg(&sk->sk_pacing_status,
4812 					SK_PACING_NONE,
4813 					SK_PACING_NEEDED);
4814 			sk->sk_max_pacing_rate = (val == ~0U) ?
4815 						 ~0UL : (unsigned int)val;
4816 			sk->sk_pacing_rate = min(sk->sk_pacing_rate,
4817 						 sk->sk_max_pacing_rate);
4818 			break;
4819 		case SO_PRIORITY:
4820 			sk->sk_priority = val;
4821 			break;
4822 		case SO_RCVLOWAT:
4823 			if (val < 0)
4824 				val = INT_MAX;
4825 			WRITE_ONCE(sk->sk_rcvlowat, val ? : 1);
4826 			break;
4827 		case SO_MARK:
4828 			if (sk->sk_mark != val) {
4829 				sk->sk_mark = val;
4830 				sk_dst_reset(sk);
4831 			}
4832 			break;
4833 		case SO_BINDTODEVICE:
4834 			optlen = min_t(long, optlen, IFNAMSIZ - 1);
4835 			strncpy(devname, optval, optlen);
4836 			devname[optlen] = 0;
4837 
4838 			ifindex = 0;
4839 			if (devname[0] != '\0') {
4840 				struct net_device *dev;
4841 
4842 				ret = -ENODEV;
4843 
4844 				net = sock_net(sk);
4845 				dev = dev_get_by_name(net, devname);
4846 				if (!dev)
4847 					break;
4848 				ifindex = dev->ifindex;
4849 				dev_put(dev);
4850 			}
4851 			fallthrough;
4852 		case SO_BINDTOIFINDEX:
4853 			if (optname == SO_BINDTOIFINDEX)
4854 				ifindex = val;
4855 			ret = sock_bindtoindex(sk, ifindex, false);
4856 			break;
4857 		case SO_KEEPALIVE:
4858 			if (sk->sk_prot->keepalive)
4859 				sk->sk_prot->keepalive(sk, valbool);
4860 			sock_valbool_flag(sk, SOCK_KEEPOPEN, valbool);
4861 			break;
4862 		case SO_REUSEPORT:
4863 			sk->sk_reuseport = valbool;
4864 			break;
4865 		default:
4866 			ret = -EINVAL;
4867 		}
4868 #ifdef CONFIG_INET
4869 	} else if (level == SOL_IP) {
4870 		if (optlen != sizeof(int) || sk->sk_family != AF_INET)
4871 			return -EINVAL;
4872 
4873 		val = *((int *)optval);
4874 		/* Only some options are supported */
4875 		switch (optname) {
4876 		case IP_TOS:
4877 			if (val < -1 || val > 0xff) {
4878 				ret = -EINVAL;
4879 			} else {
4880 				struct inet_sock *inet = inet_sk(sk);
4881 
4882 				if (val == -1)
4883 					val = 0;
4884 				inet->tos = val;
4885 			}
4886 			break;
4887 		default:
4888 			ret = -EINVAL;
4889 		}
4890 #if IS_ENABLED(CONFIG_IPV6)
4891 	} else if (level == SOL_IPV6) {
4892 		if (optlen != sizeof(int) || sk->sk_family != AF_INET6)
4893 			return -EINVAL;
4894 
4895 		val = *((int *)optval);
4896 		/* Only some options are supported */
4897 		switch (optname) {
4898 		case IPV6_TCLASS:
4899 			if (val < -1 || val > 0xff) {
4900 				ret = -EINVAL;
4901 			} else {
4902 				struct ipv6_pinfo *np = inet6_sk(sk);
4903 
4904 				if (val == -1)
4905 					val = 0;
4906 				np->tclass = val;
4907 			}
4908 			break;
4909 		default:
4910 			ret = -EINVAL;
4911 		}
4912 #endif
4913 	} else if (level == SOL_TCP &&
4914 		   sk->sk_prot->setsockopt == tcp_setsockopt) {
4915 		if (optname == TCP_CONGESTION) {
4916 			char name[TCP_CA_NAME_MAX];
4917 
4918 			strncpy(name, optval, min_t(long, optlen,
4919 						    TCP_CA_NAME_MAX-1));
4920 			name[TCP_CA_NAME_MAX-1] = 0;
4921 			ret = tcp_set_congestion_control(sk, name, false, true);
4922 		} else {
4923 			struct inet_connection_sock *icsk = inet_csk(sk);
4924 			struct tcp_sock *tp = tcp_sk(sk);
4925 			unsigned long timeout;
4926 
4927 			if (optlen != sizeof(int))
4928 				return -EINVAL;
4929 
4930 			val = *((int *)optval);
4931 			/* Only some options are supported */
4932 			switch (optname) {
4933 			case TCP_BPF_IW:
4934 				if (val <= 0 || tp->data_segs_out > tp->syn_data)
4935 					ret = -EINVAL;
4936 				else
4937 					tp->snd_cwnd = val;
4938 				break;
4939 			case TCP_BPF_SNDCWND_CLAMP:
4940 				if (val <= 0) {
4941 					ret = -EINVAL;
4942 				} else {
4943 					tp->snd_cwnd_clamp = val;
4944 					tp->snd_ssthresh = val;
4945 				}
4946 				break;
4947 			case TCP_BPF_DELACK_MAX:
4948 				timeout = usecs_to_jiffies(val);
4949 				if (timeout > TCP_DELACK_MAX ||
4950 				    timeout < TCP_TIMEOUT_MIN)
4951 					return -EINVAL;
4952 				inet_csk(sk)->icsk_delack_max = timeout;
4953 				break;
4954 			case TCP_BPF_RTO_MIN:
4955 				timeout = usecs_to_jiffies(val);
4956 				if (timeout > TCP_RTO_MIN ||
4957 				    timeout < TCP_TIMEOUT_MIN)
4958 					return -EINVAL;
4959 				inet_csk(sk)->icsk_rto_min = timeout;
4960 				break;
4961 			case TCP_SAVE_SYN:
4962 				if (val < 0 || val > 1)
4963 					ret = -EINVAL;
4964 				else
4965 					tp->save_syn = val;
4966 				break;
4967 			case TCP_KEEPIDLE:
4968 				ret = tcp_sock_set_keepidle_locked(sk, val);
4969 				break;
4970 			case TCP_KEEPINTVL:
4971 				if (val < 1 || val > MAX_TCP_KEEPINTVL)
4972 					ret = -EINVAL;
4973 				else
4974 					tp->keepalive_intvl = val * HZ;
4975 				break;
4976 			case TCP_KEEPCNT:
4977 				if (val < 1 || val > MAX_TCP_KEEPCNT)
4978 					ret = -EINVAL;
4979 				else
4980 					tp->keepalive_probes = val;
4981 				break;
4982 			case TCP_SYNCNT:
4983 				if (val < 1 || val > MAX_TCP_SYNCNT)
4984 					ret = -EINVAL;
4985 				else
4986 					icsk->icsk_syn_retries = val;
4987 				break;
4988 			case TCP_USER_TIMEOUT:
4989 				if (val < 0)
4990 					ret = -EINVAL;
4991 				else
4992 					icsk->icsk_user_timeout = val;
4993 				break;
4994 			case TCP_NOTSENT_LOWAT:
4995 				tp->notsent_lowat = val;
4996 				sk->sk_write_space(sk);
4997 				break;
4998 			case TCP_WINDOW_CLAMP:
4999 				ret = tcp_set_window_clamp(sk, val);
5000 				break;
5001 			default:
5002 				ret = -EINVAL;
5003 			}
5004 		}
5005 #endif
5006 	} else {
5007 		ret = -EINVAL;
5008 	}
5009 	return ret;
5010 }
5011 
5012 static int _bpf_getsockopt(struct sock *sk, int level, int optname,
5013 			   char *optval, int optlen)
5014 {
5015 	if (!sk_fullsock(sk))
5016 		goto err_clear;
5017 
5018 	sock_owned_by_me(sk);
5019 
5020 	if (level == SOL_SOCKET) {
5021 		if (optlen != sizeof(int))
5022 			goto err_clear;
5023 
5024 		switch (optname) {
5025 		case SO_RCVBUF:
5026 			*((int *)optval) = sk->sk_rcvbuf;
5027 			break;
5028 		case SO_SNDBUF:
5029 			*((int *)optval) = sk->sk_sndbuf;
5030 			break;
5031 		case SO_MARK:
5032 			*((int *)optval) = sk->sk_mark;
5033 			break;
5034 		case SO_PRIORITY:
5035 			*((int *)optval) = sk->sk_priority;
5036 			break;
5037 		case SO_BINDTOIFINDEX:
5038 			*((int *)optval) = sk->sk_bound_dev_if;
5039 			break;
5040 		case SO_REUSEPORT:
5041 			*((int *)optval) = sk->sk_reuseport;
5042 			break;
5043 		default:
5044 			goto err_clear;
5045 		}
5046 #ifdef CONFIG_INET
5047 	} else if (level == SOL_TCP && sk->sk_prot->getsockopt == tcp_getsockopt) {
5048 		struct inet_connection_sock *icsk;
5049 		struct tcp_sock *tp;
5050 
5051 		switch (optname) {
5052 		case TCP_CONGESTION:
5053 			icsk = inet_csk(sk);
5054 
5055 			if (!icsk->icsk_ca_ops || optlen <= 1)
5056 				goto err_clear;
5057 			strncpy(optval, icsk->icsk_ca_ops->name, optlen);
5058 			optval[optlen - 1] = 0;
5059 			break;
5060 		case TCP_SAVED_SYN:
5061 			tp = tcp_sk(sk);
5062 
5063 			if (optlen <= 0 || !tp->saved_syn ||
5064 			    optlen > tcp_saved_syn_len(tp->saved_syn))
5065 				goto err_clear;
5066 			memcpy(optval, tp->saved_syn->data, optlen);
5067 			break;
5068 		default:
5069 			goto err_clear;
5070 		}
5071 	} else if (level == SOL_IP) {
5072 		struct inet_sock *inet = inet_sk(sk);
5073 
5074 		if (optlen != sizeof(int) || sk->sk_family != AF_INET)
5075 			goto err_clear;
5076 
5077 		/* Only some options are supported */
5078 		switch (optname) {
5079 		case IP_TOS:
5080 			*((int *)optval) = (int)inet->tos;
5081 			break;
5082 		default:
5083 			goto err_clear;
5084 		}
5085 #if IS_ENABLED(CONFIG_IPV6)
5086 	} else if (level == SOL_IPV6) {
5087 		struct ipv6_pinfo *np = inet6_sk(sk);
5088 
5089 		if (optlen != sizeof(int) || sk->sk_family != AF_INET6)
5090 			goto err_clear;
5091 
5092 		/* Only some options are supported */
5093 		switch (optname) {
5094 		case IPV6_TCLASS:
5095 			*((int *)optval) = (int)np->tclass;
5096 			break;
5097 		default:
5098 			goto err_clear;
5099 		}
5100 #endif
5101 #endif
5102 	} else {
5103 		goto err_clear;
5104 	}
5105 	return 0;
5106 err_clear:
5107 	memset(optval, 0, optlen);
5108 	return -EINVAL;
5109 }
5110 
5111 BPF_CALL_5(bpf_sk_setsockopt, struct sock *, sk, int, level,
5112 	   int, optname, char *, optval, int, optlen)
5113 {
5114 	if (level == SOL_TCP && optname == TCP_CONGESTION) {
5115 		if (optlen >= sizeof("cdg") - 1 &&
5116 		    !strncmp("cdg", optval, optlen))
5117 			return -ENOTSUPP;
5118 	}
5119 
5120 	return _bpf_setsockopt(sk, level, optname, optval, optlen);
5121 }
5122 
5123 const struct bpf_func_proto bpf_sk_setsockopt_proto = {
5124 	.func		= bpf_sk_setsockopt,
5125 	.gpl_only	= false,
5126 	.ret_type	= RET_INTEGER,
5127 	.arg1_type	= ARG_PTR_TO_BTF_ID_SOCK_COMMON,
5128 	.arg2_type	= ARG_ANYTHING,
5129 	.arg3_type	= ARG_ANYTHING,
5130 	.arg4_type	= ARG_PTR_TO_MEM | MEM_RDONLY,
5131 	.arg5_type	= ARG_CONST_SIZE,
5132 };
5133 
5134 BPF_CALL_5(bpf_sk_getsockopt, struct sock *, sk, int, level,
5135 	   int, optname, char *, optval, int, optlen)
5136 {
5137 	return _bpf_getsockopt(sk, level, optname, optval, optlen);
5138 }
5139 
5140 const struct bpf_func_proto bpf_sk_getsockopt_proto = {
5141 	.func		= bpf_sk_getsockopt,
5142 	.gpl_only	= false,
5143 	.ret_type	= RET_INTEGER,
5144 	.arg1_type	= ARG_PTR_TO_BTF_ID_SOCK_COMMON,
5145 	.arg2_type	= ARG_ANYTHING,
5146 	.arg3_type	= ARG_ANYTHING,
5147 	.arg4_type	= ARG_PTR_TO_UNINIT_MEM,
5148 	.arg5_type	= ARG_CONST_SIZE,
5149 };
5150 
5151 BPF_CALL_5(bpf_sock_addr_setsockopt, struct bpf_sock_addr_kern *, ctx,
5152 	   int, level, int, optname, char *, optval, int, optlen)
5153 {
5154 	return _bpf_setsockopt(ctx->sk, level, optname, optval, optlen);
5155 }
5156 
5157 static const struct bpf_func_proto bpf_sock_addr_setsockopt_proto = {
5158 	.func		= bpf_sock_addr_setsockopt,
5159 	.gpl_only	= false,
5160 	.ret_type	= RET_INTEGER,
5161 	.arg1_type	= ARG_PTR_TO_CTX,
5162 	.arg2_type	= ARG_ANYTHING,
5163 	.arg3_type	= ARG_ANYTHING,
5164 	.arg4_type	= ARG_PTR_TO_MEM | MEM_RDONLY,
5165 	.arg5_type	= ARG_CONST_SIZE,
5166 };
5167 
5168 BPF_CALL_5(bpf_sock_addr_getsockopt, struct bpf_sock_addr_kern *, ctx,
5169 	   int, level, int, optname, char *, optval, int, optlen)
5170 {
5171 	return _bpf_getsockopt(ctx->sk, level, optname, optval, optlen);
5172 }
5173 
5174 static const struct bpf_func_proto bpf_sock_addr_getsockopt_proto = {
5175 	.func		= bpf_sock_addr_getsockopt,
5176 	.gpl_only	= false,
5177 	.ret_type	= RET_INTEGER,
5178 	.arg1_type	= ARG_PTR_TO_CTX,
5179 	.arg2_type	= ARG_ANYTHING,
5180 	.arg3_type	= ARG_ANYTHING,
5181 	.arg4_type	= ARG_PTR_TO_UNINIT_MEM,
5182 	.arg5_type	= ARG_CONST_SIZE,
5183 };
5184 
5185 BPF_CALL_5(bpf_sock_ops_setsockopt, struct bpf_sock_ops_kern *, bpf_sock,
5186 	   int, level, int, optname, char *, optval, int, optlen)
5187 {
5188 	return _bpf_setsockopt(bpf_sock->sk, level, optname, optval, optlen);
5189 }
5190 
5191 static const struct bpf_func_proto bpf_sock_ops_setsockopt_proto = {
5192 	.func		= bpf_sock_ops_setsockopt,
5193 	.gpl_only	= false,
5194 	.ret_type	= RET_INTEGER,
5195 	.arg1_type	= ARG_PTR_TO_CTX,
5196 	.arg2_type	= ARG_ANYTHING,
5197 	.arg3_type	= ARG_ANYTHING,
5198 	.arg4_type	= ARG_PTR_TO_MEM | MEM_RDONLY,
5199 	.arg5_type	= ARG_CONST_SIZE,
5200 };
5201 
5202 static int bpf_sock_ops_get_syn(struct bpf_sock_ops_kern *bpf_sock,
5203 				int optname, const u8 **start)
5204 {
5205 	struct sk_buff *syn_skb = bpf_sock->syn_skb;
5206 	const u8 *hdr_start;
5207 	int ret;
5208 
5209 	if (syn_skb) {
5210 		/* sk is a request_sock here */
5211 
5212 		if (optname == TCP_BPF_SYN) {
5213 			hdr_start = syn_skb->data;
5214 			ret = tcp_hdrlen(syn_skb);
5215 		} else if (optname == TCP_BPF_SYN_IP) {
5216 			hdr_start = skb_network_header(syn_skb);
5217 			ret = skb_network_header_len(syn_skb) +
5218 				tcp_hdrlen(syn_skb);
5219 		} else {
5220 			/* optname == TCP_BPF_SYN_MAC */
5221 			hdr_start = skb_mac_header(syn_skb);
5222 			ret = skb_mac_header_len(syn_skb) +
5223 				skb_network_header_len(syn_skb) +
5224 				tcp_hdrlen(syn_skb);
5225 		}
5226 	} else {
5227 		struct sock *sk = bpf_sock->sk;
5228 		struct saved_syn *saved_syn;
5229 
5230 		if (sk->sk_state == TCP_NEW_SYN_RECV)
5231 			/* synack retransmit. bpf_sock->syn_skb will
5232 			 * not be available.  It has to resort to
5233 			 * saved_syn (if it is saved).
5234 			 */
5235 			saved_syn = inet_reqsk(sk)->saved_syn;
5236 		else
5237 			saved_syn = tcp_sk(sk)->saved_syn;
5238 
5239 		if (!saved_syn)
5240 			return -ENOENT;
5241 
5242 		if (optname == TCP_BPF_SYN) {
5243 			hdr_start = saved_syn->data +
5244 				saved_syn->mac_hdrlen +
5245 				saved_syn->network_hdrlen;
5246 			ret = saved_syn->tcp_hdrlen;
5247 		} else if (optname == TCP_BPF_SYN_IP) {
5248 			hdr_start = saved_syn->data +
5249 				saved_syn->mac_hdrlen;
5250 			ret = saved_syn->network_hdrlen +
5251 				saved_syn->tcp_hdrlen;
5252 		} else {
5253 			/* optname == TCP_BPF_SYN_MAC */
5254 
5255 			/* TCP_SAVE_SYN may not have saved the mac hdr */
5256 			if (!saved_syn->mac_hdrlen)
5257 				return -ENOENT;
5258 
5259 			hdr_start = saved_syn->data;
5260 			ret = saved_syn->mac_hdrlen +
5261 				saved_syn->network_hdrlen +
5262 				saved_syn->tcp_hdrlen;
5263 		}
5264 	}
5265 
5266 	*start = hdr_start;
5267 	return ret;
5268 }
5269 
5270 BPF_CALL_5(bpf_sock_ops_getsockopt, struct bpf_sock_ops_kern *, bpf_sock,
5271 	   int, level, int, optname, char *, optval, int, optlen)
5272 {
5273 	if (IS_ENABLED(CONFIG_INET) && level == SOL_TCP &&
5274 	    optname >= TCP_BPF_SYN && optname <= TCP_BPF_SYN_MAC) {
5275 		int ret, copy_len = 0;
5276 		const u8 *start;
5277 
5278 		ret = bpf_sock_ops_get_syn(bpf_sock, optname, &start);
5279 		if (ret > 0) {
5280 			copy_len = ret;
5281 			if (optlen < copy_len) {
5282 				copy_len = optlen;
5283 				ret = -ENOSPC;
5284 			}
5285 
5286 			memcpy(optval, start, copy_len);
5287 		}
5288 
5289 		/* Zero out unused buffer at the end */
5290 		memset(optval + copy_len, 0, optlen - copy_len);
5291 
5292 		return ret;
5293 	}
5294 
5295 	return _bpf_getsockopt(bpf_sock->sk, level, optname, optval, optlen);
5296 }
5297 
5298 static const struct bpf_func_proto bpf_sock_ops_getsockopt_proto = {
5299 	.func		= bpf_sock_ops_getsockopt,
5300 	.gpl_only	= false,
5301 	.ret_type	= RET_INTEGER,
5302 	.arg1_type	= ARG_PTR_TO_CTX,
5303 	.arg2_type	= ARG_ANYTHING,
5304 	.arg3_type	= ARG_ANYTHING,
5305 	.arg4_type	= ARG_PTR_TO_UNINIT_MEM,
5306 	.arg5_type	= ARG_CONST_SIZE,
5307 };
5308 
5309 BPF_CALL_2(bpf_sock_ops_cb_flags_set, struct bpf_sock_ops_kern *, bpf_sock,
5310 	   int, argval)
5311 {
5312 	struct sock *sk = bpf_sock->sk;
5313 	int val = argval & BPF_SOCK_OPS_ALL_CB_FLAGS;
5314 
5315 	if (!IS_ENABLED(CONFIG_INET) || !sk_fullsock(sk))
5316 		return -EINVAL;
5317 
5318 	tcp_sk(sk)->bpf_sock_ops_cb_flags = val;
5319 
5320 	return argval & (~BPF_SOCK_OPS_ALL_CB_FLAGS);
5321 }
5322 
5323 static const struct bpf_func_proto bpf_sock_ops_cb_flags_set_proto = {
5324 	.func		= bpf_sock_ops_cb_flags_set,
5325 	.gpl_only	= false,
5326 	.ret_type	= RET_INTEGER,
5327 	.arg1_type	= ARG_PTR_TO_CTX,
5328 	.arg2_type	= ARG_ANYTHING,
5329 };
5330 
5331 const struct ipv6_bpf_stub *ipv6_bpf_stub __read_mostly;
5332 EXPORT_SYMBOL_GPL(ipv6_bpf_stub);
5333 
5334 BPF_CALL_3(bpf_bind, struct bpf_sock_addr_kern *, ctx, struct sockaddr *, addr,
5335 	   int, addr_len)
5336 {
5337 #ifdef CONFIG_INET
5338 	struct sock *sk = ctx->sk;
5339 	u32 flags = BIND_FROM_BPF;
5340 	int err;
5341 
5342 	err = -EINVAL;
5343 	if (addr_len < offsetofend(struct sockaddr, sa_family))
5344 		return err;
5345 	if (addr->sa_family == AF_INET) {
5346 		if (addr_len < sizeof(struct sockaddr_in))
5347 			return err;
5348 		if (((struct sockaddr_in *)addr)->sin_port == htons(0))
5349 			flags |= BIND_FORCE_ADDRESS_NO_PORT;
5350 		return __inet_bind(sk, addr, addr_len, flags);
5351 #if IS_ENABLED(CONFIG_IPV6)
5352 	} else if (addr->sa_family == AF_INET6) {
5353 		if (addr_len < SIN6_LEN_RFC2133)
5354 			return err;
5355 		if (((struct sockaddr_in6 *)addr)->sin6_port == htons(0))
5356 			flags |= BIND_FORCE_ADDRESS_NO_PORT;
5357 		/* ipv6_bpf_stub cannot be NULL, since it's called from
5358 		 * bpf_cgroup_inet6_connect hook and ipv6 is already loaded
5359 		 */
5360 		return ipv6_bpf_stub->inet6_bind(sk, addr, addr_len, flags);
5361 #endif /* CONFIG_IPV6 */
5362 	}
5363 #endif /* CONFIG_INET */
5364 
5365 	return -EAFNOSUPPORT;
5366 }
5367 
5368 static const struct bpf_func_proto bpf_bind_proto = {
5369 	.func		= bpf_bind,
5370 	.gpl_only	= false,
5371 	.ret_type	= RET_INTEGER,
5372 	.arg1_type	= ARG_PTR_TO_CTX,
5373 	.arg2_type	= ARG_PTR_TO_MEM | MEM_RDONLY,
5374 	.arg3_type	= ARG_CONST_SIZE,
5375 };
5376 
5377 #ifdef CONFIG_XFRM
5378 BPF_CALL_5(bpf_skb_get_xfrm_state, struct sk_buff *, skb, u32, index,
5379 	   struct bpf_xfrm_state *, to, u32, size, u64, flags)
5380 {
5381 	const struct sec_path *sp = skb_sec_path(skb);
5382 	const struct xfrm_state *x;
5383 
5384 	if (!sp || unlikely(index >= sp->len || flags))
5385 		goto err_clear;
5386 
5387 	x = sp->xvec[index];
5388 
5389 	if (unlikely(size != sizeof(struct bpf_xfrm_state)))
5390 		goto err_clear;
5391 
5392 	to->reqid = x->props.reqid;
5393 	to->spi = x->id.spi;
5394 	to->family = x->props.family;
5395 	to->ext = 0;
5396 
5397 	if (to->family == AF_INET6) {
5398 		memcpy(to->remote_ipv6, x->props.saddr.a6,
5399 		       sizeof(to->remote_ipv6));
5400 	} else {
5401 		to->remote_ipv4 = x->props.saddr.a4;
5402 		memset(&to->remote_ipv6[1], 0, sizeof(__u32) * 3);
5403 	}
5404 
5405 	return 0;
5406 err_clear:
5407 	memset(to, 0, size);
5408 	return -EINVAL;
5409 }
5410 
5411 static const struct bpf_func_proto bpf_skb_get_xfrm_state_proto = {
5412 	.func		= bpf_skb_get_xfrm_state,
5413 	.gpl_only	= false,
5414 	.ret_type	= RET_INTEGER,
5415 	.arg1_type	= ARG_PTR_TO_CTX,
5416 	.arg2_type	= ARG_ANYTHING,
5417 	.arg3_type	= ARG_PTR_TO_UNINIT_MEM,
5418 	.arg4_type	= ARG_CONST_SIZE,
5419 	.arg5_type	= ARG_ANYTHING,
5420 };
5421 #endif
5422 
5423 #if IS_ENABLED(CONFIG_INET) || IS_ENABLED(CONFIG_IPV6)
5424 static int bpf_fib_set_fwd_params(struct bpf_fib_lookup *params,
5425 				  const struct neighbour *neigh,
5426 				  const struct net_device *dev, u32 mtu)
5427 {
5428 	memcpy(params->dmac, neigh->ha, ETH_ALEN);
5429 	memcpy(params->smac, dev->dev_addr, ETH_ALEN);
5430 	params->h_vlan_TCI = 0;
5431 	params->h_vlan_proto = 0;
5432 	if (mtu)
5433 		params->mtu_result = mtu; /* union with tot_len */
5434 
5435 	return 0;
5436 }
5437 #endif
5438 
5439 #if IS_ENABLED(CONFIG_INET)
5440 static int bpf_ipv4_fib_lookup(struct net *net, struct bpf_fib_lookup *params,
5441 			       u32 flags, bool check_mtu)
5442 {
5443 	struct fib_nh_common *nhc;
5444 	struct in_device *in_dev;
5445 	struct neighbour *neigh;
5446 	struct net_device *dev;
5447 	struct fib_result res;
5448 	struct flowi4 fl4;
5449 	u32 mtu = 0;
5450 	int err;
5451 
5452 	dev = dev_get_by_index_rcu(net, params->ifindex);
5453 	if (unlikely(!dev))
5454 		return -ENODEV;
5455 
5456 	/* verify forwarding is enabled on this interface */
5457 	in_dev = __in_dev_get_rcu(dev);
5458 	if (unlikely(!in_dev || !IN_DEV_FORWARD(in_dev)))
5459 		return BPF_FIB_LKUP_RET_FWD_DISABLED;
5460 
5461 	if (flags & BPF_FIB_LOOKUP_OUTPUT) {
5462 		fl4.flowi4_iif = 1;
5463 		fl4.flowi4_oif = params->ifindex;
5464 	} else {
5465 		fl4.flowi4_iif = params->ifindex;
5466 		fl4.flowi4_oif = 0;
5467 	}
5468 	fl4.flowi4_tos = params->tos & IPTOS_RT_MASK;
5469 	fl4.flowi4_scope = RT_SCOPE_UNIVERSE;
5470 	fl4.flowi4_flags = 0;
5471 
5472 	fl4.flowi4_proto = params->l4_protocol;
5473 	fl4.daddr = params->ipv4_dst;
5474 	fl4.saddr = params->ipv4_src;
5475 	fl4.fl4_sport = params->sport;
5476 	fl4.fl4_dport = params->dport;
5477 	fl4.flowi4_multipath_hash = 0;
5478 
5479 	if (flags & BPF_FIB_LOOKUP_DIRECT) {
5480 		u32 tbid = l3mdev_fib_table_rcu(dev) ? : RT_TABLE_MAIN;
5481 		struct fib_table *tb;
5482 
5483 		tb = fib_get_table(net, tbid);
5484 		if (unlikely(!tb))
5485 			return BPF_FIB_LKUP_RET_NOT_FWDED;
5486 
5487 		err = fib_table_lookup(tb, &fl4, &res, FIB_LOOKUP_NOREF);
5488 	} else {
5489 		fl4.flowi4_mark = 0;
5490 		fl4.flowi4_secid = 0;
5491 		fl4.flowi4_tun_key.tun_id = 0;
5492 		fl4.flowi4_uid = sock_net_uid(net, NULL);
5493 
5494 		err = fib_lookup(net, &fl4, &res, FIB_LOOKUP_NOREF);
5495 	}
5496 
5497 	if (err) {
5498 		/* map fib lookup errors to RTN_ type */
5499 		if (err == -EINVAL)
5500 			return BPF_FIB_LKUP_RET_BLACKHOLE;
5501 		if (err == -EHOSTUNREACH)
5502 			return BPF_FIB_LKUP_RET_UNREACHABLE;
5503 		if (err == -EACCES)
5504 			return BPF_FIB_LKUP_RET_PROHIBIT;
5505 
5506 		return BPF_FIB_LKUP_RET_NOT_FWDED;
5507 	}
5508 
5509 	if (res.type != RTN_UNICAST)
5510 		return BPF_FIB_LKUP_RET_NOT_FWDED;
5511 
5512 	if (fib_info_num_path(res.fi) > 1)
5513 		fib_select_path(net, &res, &fl4, NULL);
5514 
5515 	if (check_mtu) {
5516 		mtu = ip_mtu_from_fib_result(&res, params->ipv4_dst);
5517 		if (params->tot_len > mtu) {
5518 			params->mtu_result = mtu; /* union with tot_len */
5519 			return BPF_FIB_LKUP_RET_FRAG_NEEDED;
5520 		}
5521 	}
5522 
5523 	nhc = res.nhc;
5524 
5525 	/* do not handle lwt encaps right now */
5526 	if (nhc->nhc_lwtstate)
5527 		return BPF_FIB_LKUP_RET_UNSUPP_LWT;
5528 
5529 	dev = nhc->nhc_dev;
5530 
5531 	params->rt_metric = res.fi->fib_priority;
5532 	params->ifindex = dev->ifindex;
5533 
5534 	/* xdp and cls_bpf programs are run in RCU-bh so
5535 	 * rcu_read_lock_bh is not needed here
5536 	 */
5537 	if (likely(nhc->nhc_gw_family != AF_INET6)) {
5538 		if (nhc->nhc_gw_family)
5539 			params->ipv4_dst = nhc->nhc_gw.ipv4;
5540 
5541 		neigh = __ipv4_neigh_lookup_noref(dev,
5542 						 (__force u32)params->ipv4_dst);
5543 	} else {
5544 		struct in6_addr *dst = (struct in6_addr *)params->ipv6_dst;
5545 
5546 		params->family = AF_INET6;
5547 		*dst = nhc->nhc_gw.ipv6;
5548 		neigh = __ipv6_neigh_lookup_noref_stub(dev, dst);
5549 	}
5550 
5551 	if (!neigh)
5552 		return BPF_FIB_LKUP_RET_NO_NEIGH;
5553 
5554 	return bpf_fib_set_fwd_params(params, neigh, dev, mtu);
5555 }
5556 #endif
5557 
5558 #if IS_ENABLED(CONFIG_IPV6)
5559 static int bpf_ipv6_fib_lookup(struct net *net, struct bpf_fib_lookup *params,
5560 			       u32 flags, bool check_mtu)
5561 {
5562 	struct in6_addr *src = (struct in6_addr *) params->ipv6_src;
5563 	struct in6_addr *dst = (struct in6_addr *) params->ipv6_dst;
5564 	struct fib6_result res = {};
5565 	struct neighbour *neigh;
5566 	struct net_device *dev;
5567 	struct inet6_dev *idev;
5568 	struct flowi6 fl6;
5569 	int strict = 0;
5570 	int oif, err;
5571 	u32 mtu = 0;
5572 
5573 	/* link local addresses are never forwarded */
5574 	if (rt6_need_strict(dst) || rt6_need_strict(src))
5575 		return BPF_FIB_LKUP_RET_NOT_FWDED;
5576 
5577 	dev = dev_get_by_index_rcu(net, params->ifindex);
5578 	if (unlikely(!dev))
5579 		return -ENODEV;
5580 
5581 	idev = __in6_dev_get_safely(dev);
5582 	if (unlikely(!idev || !idev->cnf.forwarding))
5583 		return BPF_FIB_LKUP_RET_FWD_DISABLED;
5584 
5585 	if (flags & BPF_FIB_LOOKUP_OUTPUT) {
5586 		fl6.flowi6_iif = 1;
5587 		oif = fl6.flowi6_oif = params->ifindex;
5588 	} else {
5589 		oif = fl6.flowi6_iif = params->ifindex;
5590 		fl6.flowi6_oif = 0;
5591 		strict = RT6_LOOKUP_F_HAS_SADDR;
5592 	}
5593 	fl6.flowlabel = params->flowinfo;
5594 	fl6.flowi6_scope = 0;
5595 	fl6.flowi6_flags = 0;
5596 	fl6.mp_hash = 0;
5597 
5598 	fl6.flowi6_proto = params->l4_protocol;
5599 	fl6.daddr = *dst;
5600 	fl6.saddr = *src;
5601 	fl6.fl6_sport = params->sport;
5602 	fl6.fl6_dport = params->dport;
5603 
5604 	if (flags & BPF_FIB_LOOKUP_DIRECT) {
5605 		u32 tbid = l3mdev_fib_table_rcu(dev) ? : RT_TABLE_MAIN;
5606 		struct fib6_table *tb;
5607 
5608 		tb = ipv6_stub->fib6_get_table(net, tbid);
5609 		if (unlikely(!tb))
5610 			return BPF_FIB_LKUP_RET_NOT_FWDED;
5611 
5612 		err = ipv6_stub->fib6_table_lookup(net, tb, oif, &fl6, &res,
5613 						   strict);
5614 	} else {
5615 		fl6.flowi6_mark = 0;
5616 		fl6.flowi6_secid = 0;
5617 		fl6.flowi6_tun_key.tun_id = 0;
5618 		fl6.flowi6_uid = sock_net_uid(net, NULL);
5619 
5620 		err = ipv6_stub->fib6_lookup(net, oif, &fl6, &res, strict);
5621 	}
5622 
5623 	if (unlikely(err || IS_ERR_OR_NULL(res.f6i) ||
5624 		     res.f6i == net->ipv6.fib6_null_entry))
5625 		return BPF_FIB_LKUP_RET_NOT_FWDED;
5626 
5627 	switch (res.fib6_type) {
5628 	/* only unicast is forwarded */
5629 	case RTN_UNICAST:
5630 		break;
5631 	case RTN_BLACKHOLE:
5632 		return BPF_FIB_LKUP_RET_BLACKHOLE;
5633 	case RTN_UNREACHABLE:
5634 		return BPF_FIB_LKUP_RET_UNREACHABLE;
5635 	case RTN_PROHIBIT:
5636 		return BPF_FIB_LKUP_RET_PROHIBIT;
5637 	default:
5638 		return BPF_FIB_LKUP_RET_NOT_FWDED;
5639 	}
5640 
5641 	ipv6_stub->fib6_select_path(net, &res, &fl6, fl6.flowi6_oif,
5642 				    fl6.flowi6_oif != 0, NULL, strict);
5643 
5644 	if (check_mtu) {
5645 		mtu = ipv6_stub->ip6_mtu_from_fib6(&res, dst, src);
5646 		if (params->tot_len > mtu) {
5647 			params->mtu_result = mtu; /* union with tot_len */
5648 			return BPF_FIB_LKUP_RET_FRAG_NEEDED;
5649 		}
5650 	}
5651 
5652 	if (res.nh->fib_nh_lws)
5653 		return BPF_FIB_LKUP_RET_UNSUPP_LWT;
5654 
5655 	if (res.nh->fib_nh_gw_family)
5656 		*dst = res.nh->fib_nh_gw6;
5657 
5658 	dev = res.nh->fib_nh_dev;
5659 	params->rt_metric = res.f6i->fib6_metric;
5660 	params->ifindex = dev->ifindex;
5661 
5662 	/* xdp and cls_bpf programs are run in RCU-bh so rcu_read_lock_bh is
5663 	 * not needed here.
5664 	 */
5665 	neigh = __ipv6_neigh_lookup_noref_stub(dev, dst);
5666 	if (!neigh)
5667 		return BPF_FIB_LKUP_RET_NO_NEIGH;
5668 
5669 	return bpf_fib_set_fwd_params(params, neigh, dev, mtu);
5670 }
5671 #endif
5672 
5673 BPF_CALL_4(bpf_xdp_fib_lookup, struct xdp_buff *, ctx,
5674 	   struct bpf_fib_lookup *, params, int, plen, u32, flags)
5675 {
5676 	if (plen < sizeof(*params))
5677 		return -EINVAL;
5678 
5679 	if (flags & ~(BPF_FIB_LOOKUP_DIRECT | BPF_FIB_LOOKUP_OUTPUT))
5680 		return -EINVAL;
5681 
5682 	switch (params->family) {
5683 #if IS_ENABLED(CONFIG_INET)
5684 	case AF_INET:
5685 		return bpf_ipv4_fib_lookup(dev_net(ctx->rxq->dev), params,
5686 					   flags, true);
5687 #endif
5688 #if IS_ENABLED(CONFIG_IPV6)
5689 	case AF_INET6:
5690 		return bpf_ipv6_fib_lookup(dev_net(ctx->rxq->dev), params,
5691 					   flags, true);
5692 #endif
5693 	}
5694 	return -EAFNOSUPPORT;
5695 }
5696 
5697 static const struct bpf_func_proto bpf_xdp_fib_lookup_proto = {
5698 	.func		= bpf_xdp_fib_lookup,
5699 	.gpl_only	= true,
5700 	.ret_type	= RET_INTEGER,
5701 	.arg1_type      = ARG_PTR_TO_CTX,
5702 	.arg2_type      = ARG_PTR_TO_MEM,
5703 	.arg3_type      = ARG_CONST_SIZE,
5704 	.arg4_type	= ARG_ANYTHING,
5705 };
5706 
5707 BPF_CALL_4(bpf_skb_fib_lookup, struct sk_buff *, skb,
5708 	   struct bpf_fib_lookup *, params, int, plen, u32, flags)
5709 {
5710 	struct net *net = dev_net(skb->dev);
5711 	int rc = -EAFNOSUPPORT;
5712 	bool check_mtu = false;
5713 
5714 	if (plen < sizeof(*params))
5715 		return -EINVAL;
5716 
5717 	if (flags & ~(BPF_FIB_LOOKUP_DIRECT | BPF_FIB_LOOKUP_OUTPUT))
5718 		return -EINVAL;
5719 
5720 	if (params->tot_len)
5721 		check_mtu = true;
5722 
5723 	switch (params->family) {
5724 #if IS_ENABLED(CONFIG_INET)
5725 	case AF_INET:
5726 		rc = bpf_ipv4_fib_lookup(net, params, flags, check_mtu);
5727 		break;
5728 #endif
5729 #if IS_ENABLED(CONFIG_IPV6)
5730 	case AF_INET6:
5731 		rc = bpf_ipv6_fib_lookup(net, params, flags, check_mtu);
5732 		break;
5733 #endif
5734 	}
5735 
5736 	if (rc == BPF_FIB_LKUP_RET_SUCCESS && !check_mtu) {
5737 		struct net_device *dev;
5738 
5739 		/* When tot_len isn't provided by user, check skb
5740 		 * against MTU of FIB lookup resulting net_device
5741 		 */
5742 		dev = dev_get_by_index_rcu(net, params->ifindex);
5743 		if (!is_skb_forwardable(dev, skb))
5744 			rc = BPF_FIB_LKUP_RET_FRAG_NEEDED;
5745 
5746 		params->mtu_result = dev->mtu; /* union with tot_len */
5747 	}
5748 
5749 	return rc;
5750 }
5751 
5752 static const struct bpf_func_proto bpf_skb_fib_lookup_proto = {
5753 	.func		= bpf_skb_fib_lookup,
5754 	.gpl_only	= true,
5755 	.ret_type	= RET_INTEGER,
5756 	.arg1_type      = ARG_PTR_TO_CTX,
5757 	.arg2_type      = ARG_PTR_TO_MEM,
5758 	.arg3_type      = ARG_CONST_SIZE,
5759 	.arg4_type	= ARG_ANYTHING,
5760 };
5761 
5762 static struct net_device *__dev_via_ifindex(struct net_device *dev_curr,
5763 					    u32 ifindex)
5764 {
5765 	struct net *netns = dev_net(dev_curr);
5766 
5767 	/* Non-redirect use-cases can use ifindex=0 and save ifindex lookup */
5768 	if (ifindex == 0)
5769 		return dev_curr;
5770 
5771 	return dev_get_by_index_rcu(netns, ifindex);
5772 }
5773 
5774 BPF_CALL_5(bpf_skb_check_mtu, struct sk_buff *, skb,
5775 	   u32, ifindex, u32 *, mtu_len, s32, len_diff, u64, flags)
5776 {
5777 	int ret = BPF_MTU_CHK_RET_FRAG_NEEDED;
5778 	struct net_device *dev = skb->dev;
5779 	int skb_len, dev_len;
5780 	int mtu;
5781 
5782 	if (unlikely(flags & ~(BPF_MTU_CHK_SEGS)))
5783 		return -EINVAL;
5784 
5785 	if (unlikely(flags & BPF_MTU_CHK_SEGS && (len_diff || *mtu_len)))
5786 		return -EINVAL;
5787 
5788 	dev = __dev_via_ifindex(dev, ifindex);
5789 	if (unlikely(!dev))
5790 		return -ENODEV;
5791 
5792 	mtu = READ_ONCE(dev->mtu);
5793 
5794 	dev_len = mtu + dev->hard_header_len;
5795 
5796 	/* If set use *mtu_len as input, L3 as iph->tot_len (like fib_lookup) */
5797 	skb_len = *mtu_len ? *mtu_len + dev->hard_header_len : skb->len;
5798 
5799 	skb_len += len_diff; /* minus result pass check */
5800 	if (skb_len <= dev_len) {
5801 		ret = BPF_MTU_CHK_RET_SUCCESS;
5802 		goto out;
5803 	}
5804 	/* At this point, skb->len exceed MTU, but as it include length of all
5805 	 * segments, it can still be below MTU.  The SKB can possibly get
5806 	 * re-segmented in transmit path (see validate_xmit_skb).  Thus, user
5807 	 * must choose if segs are to be MTU checked.
5808 	 */
5809 	if (skb_is_gso(skb)) {
5810 		ret = BPF_MTU_CHK_RET_SUCCESS;
5811 
5812 		if (flags & BPF_MTU_CHK_SEGS &&
5813 		    !skb_gso_validate_network_len(skb, mtu))
5814 			ret = BPF_MTU_CHK_RET_SEGS_TOOBIG;
5815 	}
5816 out:
5817 	/* BPF verifier guarantees valid pointer */
5818 	*mtu_len = mtu;
5819 
5820 	return ret;
5821 }
5822 
5823 BPF_CALL_5(bpf_xdp_check_mtu, struct xdp_buff *, xdp,
5824 	   u32, ifindex, u32 *, mtu_len, s32, len_diff, u64, flags)
5825 {
5826 	struct net_device *dev = xdp->rxq->dev;
5827 	int xdp_len = xdp->data_end - xdp->data;
5828 	int ret = BPF_MTU_CHK_RET_SUCCESS;
5829 	int mtu, dev_len;
5830 
5831 	/* XDP variant doesn't support multi-buffer segment check (yet) */
5832 	if (unlikely(flags))
5833 		return -EINVAL;
5834 
5835 	dev = __dev_via_ifindex(dev, ifindex);
5836 	if (unlikely(!dev))
5837 		return -ENODEV;
5838 
5839 	mtu = READ_ONCE(dev->mtu);
5840 
5841 	/* Add L2-header as dev MTU is L3 size */
5842 	dev_len = mtu + dev->hard_header_len;
5843 
5844 	/* Use *mtu_len as input, L3 as iph->tot_len (like fib_lookup) */
5845 	if (*mtu_len)
5846 		xdp_len = *mtu_len + dev->hard_header_len;
5847 
5848 	xdp_len += len_diff; /* minus result pass check */
5849 	if (xdp_len > dev_len)
5850 		ret = BPF_MTU_CHK_RET_FRAG_NEEDED;
5851 
5852 	/* BPF verifier guarantees valid pointer */
5853 	*mtu_len = mtu;
5854 
5855 	return ret;
5856 }
5857 
5858 static const struct bpf_func_proto bpf_skb_check_mtu_proto = {
5859 	.func		= bpf_skb_check_mtu,
5860 	.gpl_only	= true,
5861 	.ret_type	= RET_INTEGER,
5862 	.arg1_type      = ARG_PTR_TO_CTX,
5863 	.arg2_type      = ARG_ANYTHING,
5864 	.arg3_type      = ARG_PTR_TO_INT,
5865 	.arg4_type      = ARG_ANYTHING,
5866 	.arg5_type      = ARG_ANYTHING,
5867 };
5868 
5869 static const struct bpf_func_proto bpf_xdp_check_mtu_proto = {
5870 	.func		= bpf_xdp_check_mtu,
5871 	.gpl_only	= true,
5872 	.ret_type	= RET_INTEGER,
5873 	.arg1_type      = ARG_PTR_TO_CTX,
5874 	.arg2_type      = ARG_ANYTHING,
5875 	.arg3_type      = ARG_PTR_TO_INT,
5876 	.arg4_type      = ARG_ANYTHING,
5877 	.arg5_type      = ARG_ANYTHING,
5878 };
5879 
5880 #if IS_ENABLED(CONFIG_IPV6_SEG6_BPF)
5881 static int bpf_push_seg6_encap(struct sk_buff *skb, u32 type, void *hdr, u32 len)
5882 {
5883 	int err;
5884 	struct ipv6_sr_hdr *srh = (struct ipv6_sr_hdr *)hdr;
5885 
5886 	if (!seg6_validate_srh(srh, len, false))
5887 		return -EINVAL;
5888 
5889 	switch (type) {
5890 	case BPF_LWT_ENCAP_SEG6_INLINE:
5891 		if (skb->protocol != htons(ETH_P_IPV6))
5892 			return -EBADMSG;
5893 
5894 		err = seg6_do_srh_inline(skb, srh);
5895 		break;
5896 	case BPF_LWT_ENCAP_SEG6:
5897 		skb_reset_inner_headers(skb);
5898 		skb->encapsulation = 1;
5899 		err = seg6_do_srh_encap(skb, srh, IPPROTO_IPV6);
5900 		break;
5901 	default:
5902 		return -EINVAL;
5903 	}
5904 
5905 	bpf_compute_data_pointers(skb);
5906 	if (err)
5907 		return err;
5908 
5909 	ipv6_hdr(skb)->payload_len = htons(skb->len - sizeof(struct ipv6hdr));
5910 	skb_set_transport_header(skb, sizeof(struct ipv6hdr));
5911 
5912 	return seg6_lookup_nexthop(skb, NULL, 0);
5913 }
5914 #endif /* CONFIG_IPV6_SEG6_BPF */
5915 
5916 #if IS_ENABLED(CONFIG_LWTUNNEL_BPF)
5917 static int bpf_push_ip_encap(struct sk_buff *skb, void *hdr, u32 len,
5918 			     bool ingress)
5919 {
5920 	return bpf_lwt_push_ip_encap(skb, hdr, len, ingress);
5921 }
5922 #endif
5923 
5924 BPF_CALL_4(bpf_lwt_in_push_encap, struct sk_buff *, skb, u32, type, void *, hdr,
5925 	   u32, len)
5926 {
5927 	switch (type) {
5928 #if IS_ENABLED(CONFIG_IPV6_SEG6_BPF)
5929 	case BPF_LWT_ENCAP_SEG6:
5930 	case BPF_LWT_ENCAP_SEG6_INLINE:
5931 		return bpf_push_seg6_encap(skb, type, hdr, len);
5932 #endif
5933 #if IS_ENABLED(CONFIG_LWTUNNEL_BPF)
5934 	case BPF_LWT_ENCAP_IP:
5935 		return bpf_push_ip_encap(skb, hdr, len, true /* ingress */);
5936 #endif
5937 	default:
5938 		return -EINVAL;
5939 	}
5940 }
5941 
5942 BPF_CALL_4(bpf_lwt_xmit_push_encap, struct sk_buff *, skb, u32, type,
5943 	   void *, hdr, u32, len)
5944 {
5945 	switch (type) {
5946 #if IS_ENABLED(CONFIG_LWTUNNEL_BPF)
5947 	case BPF_LWT_ENCAP_IP:
5948 		return bpf_push_ip_encap(skb, hdr, len, false /* egress */);
5949 #endif
5950 	default:
5951 		return -EINVAL;
5952 	}
5953 }
5954 
5955 static const struct bpf_func_proto bpf_lwt_in_push_encap_proto = {
5956 	.func		= bpf_lwt_in_push_encap,
5957 	.gpl_only	= false,
5958 	.ret_type	= RET_INTEGER,
5959 	.arg1_type	= ARG_PTR_TO_CTX,
5960 	.arg2_type	= ARG_ANYTHING,
5961 	.arg3_type	= ARG_PTR_TO_MEM | MEM_RDONLY,
5962 	.arg4_type	= ARG_CONST_SIZE
5963 };
5964 
5965 static const struct bpf_func_proto bpf_lwt_xmit_push_encap_proto = {
5966 	.func		= bpf_lwt_xmit_push_encap,
5967 	.gpl_only	= false,
5968 	.ret_type	= RET_INTEGER,
5969 	.arg1_type	= ARG_PTR_TO_CTX,
5970 	.arg2_type	= ARG_ANYTHING,
5971 	.arg3_type	= ARG_PTR_TO_MEM | MEM_RDONLY,
5972 	.arg4_type	= ARG_CONST_SIZE
5973 };
5974 
5975 #if IS_ENABLED(CONFIG_IPV6_SEG6_BPF)
5976 BPF_CALL_4(bpf_lwt_seg6_store_bytes, struct sk_buff *, skb, u32, offset,
5977 	   const void *, from, u32, len)
5978 {
5979 	struct seg6_bpf_srh_state *srh_state =
5980 		this_cpu_ptr(&seg6_bpf_srh_states);
5981 	struct ipv6_sr_hdr *srh = srh_state->srh;
5982 	void *srh_tlvs, *srh_end, *ptr;
5983 	int srhoff = 0;
5984 
5985 	if (srh == NULL)
5986 		return -EINVAL;
5987 
5988 	srh_tlvs = (void *)((char *)srh + ((srh->first_segment + 1) << 4));
5989 	srh_end = (void *)((char *)srh + sizeof(*srh) + srh_state->hdrlen);
5990 
5991 	ptr = skb->data + offset;
5992 	if (ptr >= srh_tlvs && ptr + len <= srh_end)
5993 		srh_state->valid = false;
5994 	else if (ptr < (void *)&srh->flags ||
5995 		 ptr + len > (void *)&srh->segments)
5996 		return -EFAULT;
5997 
5998 	if (unlikely(bpf_try_make_writable(skb, offset + len)))
5999 		return -EFAULT;
6000 	if (ipv6_find_hdr(skb, &srhoff, IPPROTO_ROUTING, NULL, NULL) < 0)
6001 		return -EINVAL;
6002 	srh_state->srh = (struct ipv6_sr_hdr *)(skb->data + srhoff);
6003 
6004 	memcpy(skb->data + offset, from, len);
6005 	return 0;
6006 }
6007 
6008 static const struct bpf_func_proto bpf_lwt_seg6_store_bytes_proto = {
6009 	.func		= bpf_lwt_seg6_store_bytes,
6010 	.gpl_only	= false,
6011 	.ret_type	= RET_INTEGER,
6012 	.arg1_type	= ARG_PTR_TO_CTX,
6013 	.arg2_type	= ARG_ANYTHING,
6014 	.arg3_type	= ARG_PTR_TO_MEM | MEM_RDONLY,
6015 	.arg4_type	= ARG_CONST_SIZE
6016 };
6017 
6018 static void bpf_update_srh_state(struct sk_buff *skb)
6019 {
6020 	struct seg6_bpf_srh_state *srh_state =
6021 		this_cpu_ptr(&seg6_bpf_srh_states);
6022 	int srhoff = 0;
6023 
6024 	if (ipv6_find_hdr(skb, &srhoff, IPPROTO_ROUTING, NULL, NULL) < 0) {
6025 		srh_state->srh = NULL;
6026 	} else {
6027 		srh_state->srh = (struct ipv6_sr_hdr *)(skb->data + srhoff);
6028 		srh_state->hdrlen = srh_state->srh->hdrlen << 3;
6029 		srh_state->valid = true;
6030 	}
6031 }
6032 
6033 BPF_CALL_4(bpf_lwt_seg6_action, struct sk_buff *, skb,
6034 	   u32, action, void *, param, u32, param_len)
6035 {
6036 	struct seg6_bpf_srh_state *srh_state =
6037 		this_cpu_ptr(&seg6_bpf_srh_states);
6038 	int hdroff = 0;
6039 	int err;
6040 
6041 	switch (action) {
6042 	case SEG6_LOCAL_ACTION_END_X:
6043 		if (!seg6_bpf_has_valid_srh(skb))
6044 			return -EBADMSG;
6045 		if (param_len != sizeof(struct in6_addr))
6046 			return -EINVAL;
6047 		return seg6_lookup_nexthop(skb, (struct in6_addr *)param, 0);
6048 	case SEG6_LOCAL_ACTION_END_T:
6049 		if (!seg6_bpf_has_valid_srh(skb))
6050 			return -EBADMSG;
6051 		if (param_len != sizeof(int))
6052 			return -EINVAL;
6053 		return seg6_lookup_nexthop(skb, NULL, *(int *)param);
6054 	case SEG6_LOCAL_ACTION_END_DT6:
6055 		if (!seg6_bpf_has_valid_srh(skb))
6056 			return -EBADMSG;
6057 		if (param_len != sizeof(int))
6058 			return -EINVAL;
6059 
6060 		if (ipv6_find_hdr(skb, &hdroff, IPPROTO_IPV6, NULL, NULL) < 0)
6061 			return -EBADMSG;
6062 		if (!pskb_pull(skb, hdroff))
6063 			return -EBADMSG;
6064 
6065 		skb_postpull_rcsum(skb, skb_network_header(skb), hdroff);
6066 		skb_reset_network_header(skb);
6067 		skb_reset_transport_header(skb);
6068 		skb->encapsulation = 0;
6069 
6070 		bpf_compute_data_pointers(skb);
6071 		bpf_update_srh_state(skb);
6072 		return seg6_lookup_nexthop(skb, NULL, *(int *)param);
6073 	case SEG6_LOCAL_ACTION_END_B6:
6074 		if (srh_state->srh && !seg6_bpf_has_valid_srh(skb))
6075 			return -EBADMSG;
6076 		err = bpf_push_seg6_encap(skb, BPF_LWT_ENCAP_SEG6_INLINE,
6077 					  param, param_len);
6078 		if (!err)
6079 			bpf_update_srh_state(skb);
6080 
6081 		return err;
6082 	case SEG6_LOCAL_ACTION_END_B6_ENCAP:
6083 		if (srh_state->srh && !seg6_bpf_has_valid_srh(skb))
6084 			return -EBADMSG;
6085 		err = bpf_push_seg6_encap(skb, BPF_LWT_ENCAP_SEG6,
6086 					  param, param_len);
6087 		if (!err)
6088 			bpf_update_srh_state(skb);
6089 
6090 		return err;
6091 	default:
6092 		return -EINVAL;
6093 	}
6094 }
6095 
6096 static const struct bpf_func_proto bpf_lwt_seg6_action_proto = {
6097 	.func		= bpf_lwt_seg6_action,
6098 	.gpl_only	= false,
6099 	.ret_type	= RET_INTEGER,
6100 	.arg1_type	= ARG_PTR_TO_CTX,
6101 	.arg2_type	= ARG_ANYTHING,
6102 	.arg3_type	= ARG_PTR_TO_MEM | MEM_RDONLY,
6103 	.arg4_type	= ARG_CONST_SIZE
6104 };
6105 
6106 BPF_CALL_3(bpf_lwt_seg6_adjust_srh, struct sk_buff *, skb, u32, offset,
6107 	   s32, len)
6108 {
6109 	struct seg6_bpf_srh_state *srh_state =
6110 		this_cpu_ptr(&seg6_bpf_srh_states);
6111 	struct ipv6_sr_hdr *srh = srh_state->srh;
6112 	void *srh_end, *srh_tlvs, *ptr;
6113 	struct ipv6hdr *hdr;
6114 	int srhoff = 0;
6115 	int ret;
6116 
6117 	if (unlikely(srh == NULL))
6118 		return -EINVAL;
6119 
6120 	srh_tlvs = (void *)((unsigned char *)srh + sizeof(*srh) +
6121 			((srh->first_segment + 1) << 4));
6122 	srh_end = (void *)((unsigned char *)srh + sizeof(*srh) +
6123 			srh_state->hdrlen);
6124 	ptr = skb->data + offset;
6125 
6126 	if (unlikely(ptr < srh_tlvs || ptr > srh_end))
6127 		return -EFAULT;
6128 	if (unlikely(len < 0 && (void *)((char *)ptr - len) > srh_end))
6129 		return -EFAULT;
6130 
6131 	if (len > 0) {
6132 		ret = skb_cow_head(skb, len);
6133 		if (unlikely(ret < 0))
6134 			return ret;
6135 
6136 		ret = bpf_skb_net_hdr_push(skb, offset, len);
6137 	} else {
6138 		ret = bpf_skb_net_hdr_pop(skb, offset, -1 * len);
6139 	}
6140 
6141 	bpf_compute_data_pointers(skb);
6142 	if (unlikely(ret < 0))
6143 		return ret;
6144 
6145 	hdr = (struct ipv6hdr *)skb->data;
6146 	hdr->payload_len = htons(skb->len - sizeof(struct ipv6hdr));
6147 
6148 	if (ipv6_find_hdr(skb, &srhoff, IPPROTO_ROUTING, NULL, NULL) < 0)
6149 		return -EINVAL;
6150 	srh_state->srh = (struct ipv6_sr_hdr *)(skb->data + srhoff);
6151 	srh_state->hdrlen += len;
6152 	srh_state->valid = false;
6153 	return 0;
6154 }
6155 
6156 static const struct bpf_func_proto bpf_lwt_seg6_adjust_srh_proto = {
6157 	.func		= bpf_lwt_seg6_adjust_srh,
6158 	.gpl_only	= false,
6159 	.ret_type	= RET_INTEGER,
6160 	.arg1_type	= ARG_PTR_TO_CTX,
6161 	.arg2_type	= ARG_ANYTHING,
6162 	.arg3_type	= ARG_ANYTHING,
6163 };
6164 #endif /* CONFIG_IPV6_SEG6_BPF */
6165 
6166 #ifdef CONFIG_INET
6167 static struct sock *sk_lookup(struct net *net, struct bpf_sock_tuple *tuple,
6168 			      int dif, int sdif, u8 family, u8 proto)
6169 {
6170 	bool refcounted = false;
6171 	struct sock *sk = NULL;
6172 
6173 	if (family == AF_INET) {
6174 		__be32 src4 = tuple->ipv4.saddr;
6175 		__be32 dst4 = tuple->ipv4.daddr;
6176 
6177 		if (proto == IPPROTO_TCP)
6178 			sk = __inet_lookup(net, &tcp_hashinfo, NULL, 0,
6179 					   src4, tuple->ipv4.sport,
6180 					   dst4, tuple->ipv4.dport,
6181 					   dif, sdif, &refcounted);
6182 		else
6183 			sk = __udp4_lib_lookup(net, src4, tuple->ipv4.sport,
6184 					       dst4, tuple->ipv4.dport,
6185 					       dif, sdif, &udp_table, NULL);
6186 #if IS_ENABLED(CONFIG_IPV6)
6187 	} else {
6188 		struct in6_addr *src6 = (struct in6_addr *)&tuple->ipv6.saddr;
6189 		struct in6_addr *dst6 = (struct in6_addr *)&tuple->ipv6.daddr;
6190 
6191 		if (proto == IPPROTO_TCP)
6192 			sk = __inet6_lookup(net, &tcp_hashinfo, NULL, 0,
6193 					    src6, tuple->ipv6.sport,
6194 					    dst6, ntohs(tuple->ipv6.dport),
6195 					    dif, sdif, &refcounted);
6196 		else if (likely(ipv6_bpf_stub))
6197 			sk = ipv6_bpf_stub->udp6_lib_lookup(net,
6198 							    src6, tuple->ipv6.sport,
6199 							    dst6, tuple->ipv6.dport,
6200 							    dif, sdif,
6201 							    &udp_table, NULL);
6202 #endif
6203 	}
6204 
6205 	if (unlikely(sk && !refcounted && !sock_flag(sk, SOCK_RCU_FREE))) {
6206 		WARN_ONCE(1, "Found non-RCU, unreferenced socket!");
6207 		sk = NULL;
6208 	}
6209 	return sk;
6210 }
6211 
6212 /* bpf_skc_lookup performs the core lookup for different types of sockets,
6213  * taking a reference on the socket if it doesn't have the flag SOCK_RCU_FREE.
6214  * Returns the socket as an 'unsigned long' to simplify the casting in the
6215  * callers to satisfy BPF_CALL declarations.
6216  */
6217 static struct sock *
6218 __bpf_skc_lookup(struct sk_buff *skb, struct bpf_sock_tuple *tuple, u32 len,
6219 		 struct net *caller_net, u32 ifindex, u8 proto, u64 netns_id,
6220 		 u64 flags)
6221 {
6222 	struct sock *sk = NULL;
6223 	u8 family = AF_UNSPEC;
6224 	struct net *net;
6225 	int sdif;
6226 
6227 	if (len == sizeof(tuple->ipv4))
6228 		family = AF_INET;
6229 	else if (len == sizeof(tuple->ipv6))
6230 		family = AF_INET6;
6231 	else
6232 		return NULL;
6233 
6234 	if (unlikely(family == AF_UNSPEC || flags ||
6235 		     !((s32)netns_id < 0 || netns_id <= S32_MAX)))
6236 		goto out;
6237 
6238 	if (family == AF_INET)
6239 		sdif = inet_sdif(skb);
6240 	else
6241 		sdif = inet6_sdif(skb);
6242 
6243 	if ((s32)netns_id < 0) {
6244 		net = caller_net;
6245 		sk = sk_lookup(net, tuple, ifindex, sdif, family, proto);
6246 	} else {
6247 		net = get_net_ns_by_id(caller_net, netns_id);
6248 		if (unlikely(!net))
6249 			goto out;
6250 		sk = sk_lookup(net, tuple, ifindex, sdif, family, proto);
6251 		put_net(net);
6252 	}
6253 
6254 out:
6255 	return sk;
6256 }
6257 
6258 static struct sock *
6259 __bpf_sk_lookup(struct sk_buff *skb, struct bpf_sock_tuple *tuple, u32 len,
6260 		struct net *caller_net, u32 ifindex, u8 proto, u64 netns_id,
6261 		u64 flags)
6262 {
6263 	struct sock *sk = __bpf_skc_lookup(skb, tuple, len, caller_net,
6264 					   ifindex, proto, netns_id, flags);
6265 
6266 	if (sk) {
6267 		sk = sk_to_full_sk(sk);
6268 		if (!sk_fullsock(sk)) {
6269 			sock_gen_put(sk);
6270 			return NULL;
6271 		}
6272 	}
6273 
6274 	return sk;
6275 }
6276 
6277 static struct sock *
6278 bpf_skc_lookup(struct sk_buff *skb, struct bpf_sock_tuple *tuple, u32 len,
6279 	       u8 proto, u64 netns_id, u64 flags)
6280 {
6281 	struct net *caller_net;
6282 	int ifindex;
6283 
6284 	if (skb->dev) {
6285 		caller_net = dev_net(skb->dev);
6286 		ifindex = skb->dev->ifindex;
6287 	} else {
6288 		caller_net = sock_net(skb->sk);
6289 		ifindex = 0;
6290 	}
6291 
6292 	return __bpf_skc_lookup(skb, tuple, len, caller_net, ifindex, proto,
6293 				netns_id, flags);
6294 }
6295 
6296 static struct sock *
6297 bpf_sk_lookup(struct sk_buff *skb, struct bpf_sock_tuple *tuple, u32 len,
6298 	      u8 proto, u64 netns_id, u64 flags)
6299 {
6300 	struct sock *sk = bpf_skc_lookup(skb, tuple, len, proto, netns_id,
6301 					 flags);
6302 
6303 	if (sk) {
6304 		sk = sk_to_full_sk(sk);
6305 		if (!sk_fullsock(sk)) {
6306 			sock_gen_put(sk);
6307 			return NULL;
6308 		}
6309 	}
6310 
6311 	return sk;
6312 }
6313 
6314 BPF_CALL_5(bpf_skc_lookup_tcp, struct sk_buff *, skb,
6315 	   struct bpf_sock_tuple *, tuple, u32, len, u64, netns_id, u64, flags)
6316 {
6317 	return (unsigned long)bpf_skc_lookup(skb, tuple, len, IPPROTO_TCP,
6318 					     netns_id, flags);
6319 }
6320 
6321 static const struct bpf_func_proto bpf_skc_lookup_tcp_proto = {
6322 	.func		= bpf_skc_lookup_tcp,
6323 	.gpl_only	= false,
6324 	.pkt_access	= true,
6325 	.ret_type	= RET_PTR_TO_SOCK_COMMON_OR_NULL,
6326 	.arg1_type	= ARG_PTR_TO_CTX,
6327 	.arg2_type	= ARG_PTR_TO_MEM | MEM_RDONLY,
6328 	.arg3_type	= ARG_CONST_SIZE,
6329 	.arg4_type	= ARG_ANYTHING,
6330 	.arg5_type	= ARG_ANYTHING,
6331 };
6332 
6333 BPF_CALL_5(bpf_sk_lookup_tcp, struct sk_buff *, skb,
6334 	   struct bpf_sock_tuple *, tuple, u32, len, u64, netns_id, u64, flags)
6335 {
6336 	return (unsigned long)bpf_sk_lookup(skb, tuple, len, IPPROTO_TCP,
6337 					    netns_id, flags);
6338 }
6339 
6340 static const struct bpf_func_proto bpf_sk_lookup_tcp_proto = {
6341 	.func		= bpf_sk_lookup_tcp,
6342 	.gpl_only	= false,
6343 	.pkt_access	= true,
6344 	.ret_type	= RET_PTR_TO_SOCKET_OR_NULL,
6345 	.arg1_type	= ARG_PTR_TO_CTX,
6346 	.arg2_type	= ARG_PTR_TO_MEM | MEM_RDONLY,
6347 	.arg3_type	= ARG_CONST_SIZE,
6348 	.arg4_type	= ARG_ANYTHING,
6349 	.arg5_type	= ARG_ANYTHING,
6350 };
6351 
6352 BPF_CALL_5(bpf_sk_lookup_udp, struct sk_buff *, skb,
6353 	   struct bpf_sock_tuple *, tuple, u32, len, u64, netns_id, u64, flags)
6354 {
6355 	return (unsigned long)bpf_sk_lookup(skb, tuple, len, IPPROTO_UDP,
6356 					    netns_id, flags);
6357 }
6358 
6359 static const struct bpf_func_proto bpf_sk_lookup_udp_proto = {
6360 	.func		= bpf_sk_lookup_udp,
6361 	.gpl_only	= false,
6362 	.pkt_access	= true,
6363 	.ret_type	= RET_PTR_TO_SOCKET_OR_NULL,
6364 	.arg1_type	= ARG_PTR_TO_CTX,
6365 	.arg2_type	= ARG_PTR_TO_MEM | MEM_RDONLY,
6366 	.arg3_type	= ARG_CONST_SIZE,
6367 	.arg4_type	= ARG_ANYTHING,
6368 	.arg5_type	= ARG_ANYTHING,
6369 };
6370 
6371 BPF_CALL_1(bpf_sk_release, struct sock *, sk)
6372 {
6373 	if (sk && sk_is_refcounted(sk))
6374 		sock_gen_put(sk);
6375 	return 0;
6376 }
6377 
6378 static const struct bpf_func_proto bpf_sk_release_proto = {
6379 	.func		= bpf_sk_release,
6380 	.gpl_only	= false,
6381 	.ret_type	= RET_INTEGER,
6382 	.arg1_type	= ARG_PTR_TO_BTF_ID_SOCK_COMMON,
6383 };
6384 
6385 BPF_CALL_5(bpf_xdp_sk_lookup_udp, struct xdp_buff *, ctx,
6386 	   struct bpf_sock_tuple *, tuple, u32, len, u32, netns_id, u64, flags)
6387 {
6388 	struct net *caller_net = dev_net(ctx->rxq->dev);
6389 	int ifindex = ctx->rxq->dev->ifindex;
6390 
6391 	return (unsigned long)__bpf_sk_lookup(NULL, tuple, len, caller_net,
6392 					      ifindex, IPPROTO_UDP, netns_id,
6393 					      flags);
6394 }
6395 
6396 static const struct bpf_func_proto bpf_xdp_sk_lookup_udp_proto = {
6397 	.func           = bpf_xdp_sk_lookup_udp,
6398 	.gpl_only       = false,
6399 	.pkt_access     = true,
6400 	.ret_type       = RET_PTR_TO_SOCKET_OR_NULL,
6401 	.arg1_type      = ARG_PTR_TO_CTX,
6402 	.arg2_type      = ARG_PTR_TO_MEM | MEM_RDONLY,
6403 	.arg3_type      = ARG_CONST_SIZE,
6404 	.arg4_type      = ARG_ANYTHING,
6405 	.arg5_type      = ARG_ANYTHING,
6406 };
6407 
6408 BPF_CALL_5(bpf_xdp_skc_lookup_tcp, struct xdp_buff *, ctx,
6409 	   struct bpf_sock_tuple *, tuple, u32, len, u32, netns_id, u64, flags)
6410 {
6411 	struct net *caller_net = dev_net(ctx->rxq->dev);
6412 	int ifindex = ctx->rxq->dev->ifindex;
6413 
6414 	return (unsigned long)__bpf_skc_lookup(NULL, tuple, len, caller_net,
6415 					       ifindex, IPPROTO_TCP, netns_id,
6416 					       flags);
6417 }
6418 
6419 static const struct bpf_func_proto bpf_xdp_skc_lookup_tcp_proto = {
6420 	.func           = bpf_xdp_skc_lookup_tcp,
6421 	.gpl_only       = false,
6422 	.pkt_access     = true,
6423 	.ret_type       = RET_PTR_TO_SOCK_COMMON_OR_NULL,
6424 	.arg1_type      = ARG_PTR_TO_CTX,
6425 	.arg2_type      = ARG_PTR_TO_MEM | MEM_RDONLY,
6426 	.arg3_type      = ARG_CONST_SIZE,
6427 	.arg4_type      = ARG_ANYTHING,
6428 	.arg5_type      = ARG_ANYTHING,
6429 };
6430 
6431 BPF_CALL_5(bpf_xdp_sk_lookup_tcp, struct xdp_buff *, ctx,
6432 	   struct bpf_sock_tuple *, tuple, u32, len, u32, netns_id, u64, flags)
6433 {
6434 	struct net *caller_net = dev_net(ctx->rxq->dev);
6435 	int ifindex = ctx->rxq->dev->ifindex;
6436 
6437 	return (unsigned long)__bpf_sk_lookup(NULL, tuple, len, caller_net,
6438 					      ifindex, IPPROTO_TCP, netns_id,
6439 					      flags);
6440 }
6441 
6442 static const struct bpf_func_proto bpf_xdp_sk_lookup_tcp_proto = {
6443 	.func           = bpf_xdp_sk_lookup_tcp,
6444 	.gpl_only       = false,
6445 	.pkt_access     = true,
6446 	.ret_type       = RET_PTR_TO_SOCKET_OR_NULL,
6447 	.arg1_type      = ARG_PTR_TO_CTX,
6448 	.arg2_type      = ARG_PTR_TO_MEM | MEM_RDONLY,
6449 	.arg3_type      = ARG_CONST_SIZE,
6450 	.arg4_type      = ARG_ANYTHING,
6451 	.arg5_type      = ARG_ANYTHING,
6452 };
6453 
6454 BPF_CALL_5(bpf_sock_addr_skc_lookup_tcp, struct bpf_sock_addr_kern *, ctx,
6455 	   struct bpf_sock_tuple *, tuple, u32, len, u64, netns_id, u64, flags)
6456 {
6457 	return (unsigned long)__bpf_skc_lookup(NULL, tuple, len,
6458 					       sock_net(ctx->sk), 0,
6459 					       IPPROTO_TCP, netns_id, flags);
6460 }
6461 
6462 static const struct bpf_func_proto bpf_sock_addr_skc_lookup_tcp_proto = {
6463 	.func		= bpf_sock_addr_skc_lookup_tcp,
6464 	.gpl_only	= false,
6465 	.ret_type	= RET_PTR_TO_SOCK_COMMON_OR_NULL,
6466 	.arg1_type	= ARG_PTR_TO_CTX,
6467 	.arg2_type	= ARG_PTR_TO_MEM | MEM_RDONLY,
6468 	.arg3_type	= ARG_CONST_SIZE,
6469 	.arg4_type	= ARG_ANYTHING,
6470 	.arg5_type	= ARG_ANYTHING,
6471 };
6472 
6473 BPF_CALL_5(bpf_sock_addr_sk_lookup_tcp, struct bpf_sock_addr_kern *, ctx,
6474 	   struct bpf_sock_tuple *, tuple, u32, len, u64, netns_id, u64, flags)
6475 {
6476 	return (unsigned long)__bpf_sk_lookup(NULL, tuple, len,
6477 					      sock_net(ctx->sk), 0, IPPROTO_TCP,
6478 					      netns_id, flags);
6479 }
6480 
6481 static const struct bpf_func_proto bpf_sock_addr_sk_lookup_tcp_proto = {
6482 	.func		= bpf_sock_addr_sk_lookup_tcp,
6483 	.gpl_only	= false,
6484 	.ret_type	= RET_PTR_TO_SOCKET_OR_NULL,
6485 	.arg1_type	= ARG_PTR_TO_CTX,
6486 	.arg2_type	= ARG_PTR_TO_MEM | MEM_RDONLY,
6487 	.arg3_type	= ARG_CONST_SIZE,
6488 	.arg4_type	= ARG_ANYTHING,
6489 	.arg5_type	= ARG_ANYTHING,
6490 };
6491 
6492 BPF_CALL_5(bpf_sock_addr_sk_lookup_udp, struct bpf_sock_addr_kern *, ctx,
6493 	   struct bpf_sock_tuple *, tuple, u32, len, u64, netns_id, u64, flags)
6494 {
6495 	return (unsigned long)__bpf_sk_lookup(NULL, tuple, len,
6496 					      sock_net(ctx->sk), 0, IPPROTO_UDP,
6497 					      netns_id, flags);
6498 }
6499 
6500 static const struct bpf_func_proto bpf_sock_addr_sk_lookup_udp_proto = {
6501 	.func		= bpf_sock_addr_sk_lookup_udp,
6502 	.gpl_only	= false,
6503 	.ret_type	= RET_PTR_TO_SOCKET_OR_NULL,
6504 	.arg1_type	= ARG_PTR_TO_CTX,
6505 	.arg2_type	= ARG_PTR_TO_MEM | MEM_RDONLY,
6506 	.arg3_type	= ARG_CONST_SIZE,
6507 	.arg4_type	= ARG_ANYTHING,
6508 	.arg5_type	= ARG_ANYTHING,
6509 };
6510 
6511 bool bpf_tcp_sock_is_valid_access(int off, int size, enum bpf_access_type type,
6512 				  struct bpf_insn_access_aux *info)
6513 {
6514 	if (off < 0 || off >= offsetofend(struct bpf_tcp_sock,
6515 					  icsk_retransmits))
6516 		return false;
6517 
6518 	if (off % size != 0)
6519 		return false;
6520 
6521 	switch (off) {
6522 	case offsetof(struct bpf_tcp_sock, bytes_received):
6523 	case offsetof(struct bpf_tcp_sock, bytes_acked):
6524 		return size == sizeof(__u64);
6525 	default:
6526 		return size == sizeof(__u32);
6527 	}
6528 }
6529 
6530 u32 bpf_tcp_sock_convert_ctx_access(enum bpf_access_type type,
6531 				    const struct bpf_insn *si,
6532 				    struct bpf_insn *insn_buf,
6533 				    struct bpf_prog *prog, u32 *target_size)
6534 {
6535 	struct bpf_insn *insn = insn_buf;
6536 
6537 #define BPF_TCP_SOCK_GET_COMMON(FIELD)					\
6538 	do {								\
6539 		BUILD_BUG_ON(sizeof_field(struct tcp_sock, FIELD) >	\
6540 			     sizeof_field(struct bpf_tcp_sock, FIELD));	\
6541 		*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct tcp_sock, FIELD),\
6542 				      si->dst_reg, si->src_reg,		\
6543 				      offsetof(struct tcp_sock, FIELD)); \
6544 	} while (0)
6545 
6546 #define BPF_INET_SOCK_GET_COMMON(FIELD)					\
6547 	do {								\
6548 		BUILD_BUG_ON(sizeof_field(struct inet_connection_sock,	\
6549 					  FIELD) >			\
6550 			     sizeof_field(struct bpf_tcp_sock, FIELD));	\
6551 		*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(			\
6552 					struct inet_connection_sock,	\
6553 					FIELD),				\
6554 				      si->dst_reg, si->src_reg,		\
6555 				      offsetof(				\
6556 					struct inet_connection_sock,	\
6557 					FIELD));			\
6558 	} while (0)
6559 
6560 	if (insn > insn_buf)
6561 		return insn - insn_buf;
6562 
6563 	switch (si->off) {
6564 	case offsetof(struct bpf_tcp_sock, rtt_min):
6565 		BUILD_BUG_ON(sizeof_field(struct tcp_sock, rtt_min) !=
6566 			     sizeof(struct minmax));
6567 		BUILD_BUG_ON(sizeof(struct minmax) <
6568 			     sizeof(struct minmax_sample));
6569 
6570 		*insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg,
6571 				      offsetof(struct tcp_sock, rtt_min) +
6572 				      offsetof(struct minmax_sample, v));
6573 		break;
6574 	case offsetof(struct bpf_tcp_sock, snd_cwnd):
6575 		BPF_TCP_SOCK_GET_COMMON(snd_cwnd);
6576 		break;
6577 	case offsetof(struct bpf_tcp_sock, srtt_us):
6578 		BPF_TCP_SOCK_GET_COMMON(srtt_us);
6579 		break;
6580 	case offsetof(struct bpf_tcp_sock, snd_ssthresh):
6581 		BPF_TCP_SOCK_GET_COMMON(snd_ssthresh);
6582 		break;
6583 	case offsetof(struct bpf_tcp_sock, rcv_nxt):
6584 		BPF_TCP_SOCK_GET_COMMON(rcv_nxt);
6585 		break;
6586 	case offsetof(struct bpf_tcp_sock, snd_nxt):
6587 		BPF_TCP_SOCK_GET_COMMON(snd_nxt);
6588 		break;
6589 	case offsetof(struct bpf_tcp_sock, snd_una):
6590 		BPF_TCP_SOCK_GET_COMMON(snd_una);
6591 		break;
6592 	case offsetof(struct bpf_tcp_sock, mss_cache):
6593 		BPF_TCP_SOCK_GET_COMMON(mss_cache);
6594 		break;
6595 	case offsetof(struct bpf_tcp_sock, ecn_flags):
6596 		BPF_TCP_SOCK_GET_COMMON(ecn_flags);
6597 		break;
6598 	case offsetof(struct bpf_tcp_sock, rate_delivered):
6599 		BPF_TCP_SOCK_GET_COMMON(rate_delivered);
6600 		break;
6601 	case offsetof(struct bpf_tcp_sock, rate_interval_us):
6602 		BPF_TCP_SOCK_GET_COMMON(rate_interval_us);
6603 		break;
6604 	case offsetof(struct bpf_tcp_sock, packets_out):
6605 		BPF_TCP_SOCK_GET_COMMON(packets_out);
6606 		break;
6607 	case offsetof(struct bpf_tcp_sock, retrans_out):
6608 		BPF_TCP_SOCK_GET_COMMON(retrans_out);
6609 		break;
6610 	case offsetof(struct bpf_tcp_sock, total_retrans):
6611 		BPF_TCP_SOCK_GET_COMMON(total_retrans);
6612 		break;
6613 	case offsetof(struct bpf_tcp_sock, segs_in):
6614 		BPF_TCP_SOCK_GET_COMMON(segs_in);
6615 		break;
6616 	case offsetof(struct bpf_tcp_sock, data_segs_in):
6617 		BPF_TCP_SOCK_GET_COMMON(data_segs_in);
6618 		break;
6619 	case offsetof(struct bpf_tcp_sock, segs_out):
6620 		BPF_TCP_SOCK_GET_COMMON(segs_out);
6621 		break;
6622 	case offsetof(struct bpf_tcp_sock, data_segs_out):
6623 		BPF_TCP_SOCK_GET_COMMON(data_segs_out);
6624 		break;
6625 	case offsetof(struct bpf_tcp_sock, lost_out):
6626 		BPF_TCP_SOCK_GET_COMMON(lost_out);
6627 		break;
6628 	case offsetof(struct bpf_tcp_sock, sacked_out):
6629 		BPF_TCP_SOCK_GET_COMMON(sacked_out);
6630 		break;
6631 	case offsetof(struct bpf_tcp_sock, bytes_received):
6632 		BPF_TCP_SOCK_GET_COMMON(bytes_received);
6633 		break;
6634 	case offsetof(struct bpf_tcp_sock, bytes_acked):
6635 		BPF_TCP_SOCK_GET_COMMON(bytes_acked);
6636 		break;
6637 	case offsetof(struct bpf_tcp_sock, dsack_dups):
6638 		BPF_TCP_SOCK_GET_COMMON(dsack_dups);
6639 		break;
6640 	case offsetof(struct bpf_tcp_sock, delivered):
6641 		BPF_TCP_SOCK_GET_COMMON(delivered);
6642 		break;
6643 	case offsetof(struct bpf_tcp_sock, delivered_ce):
6644 		BPF_TCP_SOCK_GET_COMMON(delivered_ce);
6645 		break;
6646 	case offsetof(struct bpf_tcp_sock, icsk_retransmits):
6647 		BPF_INET_SOCK_GET_COMMON(icsk_retransmits);
6648 		break;
6649 	}
6650 
6651 	return insn - insn_buf;
6652 }
6653 
6654 BPF_CALL_1(bpf_tcp_sock, struct sock *, sk)
6655 {
6656 	if (sk_fullsock(sk) && sk->sk_protocol == IPPROTO_TCP)
6657 		return (unsigned long)sk;
6658 
6659 	return (unsigned long)NULL;
6660 }
6661 
6662 const struct bpf_func_proto bpf_tcp_sock_proto = {
6663 	.func		= bpf_tcp_sock,
6664 	.gpl_only	= false,
6665 	.ret_type	= RET_PTR_TO_TCP_SOCK_OR_NULL,
6666 	.arg1_type	= ARG_PTR_TO_SOCK_COMMON,
6667 };
6668 
6669 BPF_CALL_1(bpf_get_listener_sock, struct sock *, sk)
6670 {
6671 	sk = sk_to_full_sk(sk);
6672 
6673 	if (sk->sk_state == TCP_LISTEN && sock_flag(sk, SOCK_RCU_FREE))
6674 		return (unsigned long)sk;
6675 
6676 	return (unsigned long)NULL;
6677 }
6678 
6679 static const struct bpf_func_proto bpf_get_listener_sock_proto = {
6680 	.func		= bpf_get_listener_sock,
6681 	.gpl_only	= false,
6682 	.ret_type	= RET_PTR_TO_SOCKET_OR_NULL,
6683 	.arg1_type	= ARG_PTR_TO_SOCK_COMMON,
6684 };
6685 
6686 BPF_CALL_1(bpf_skb_ecn_set_ce, struct sk_buff *, skb)
6687 {
6688 	unsigned int iphdr_len;
6689 
6690 	switch (skb_protocol(skb, true)) {
6691 	case cpu_to_be16(ETH_P_IP):
6692 		iphdr_len = sizeof(struct iphdr);
6693 		break;
6694 	case cpu_to_be16(ETH_P_IPV6):
6695 		iphdr_len = sizeof(struct ipv6hdr);
6696 		break;
6697 	default:
6698 		return 0;
6699 	}
6700 
6701 	if (skb_headlen(skb) < iphdr_len)
6702 		return 0;
6703 
6704 	if (skb_cloned(skb) && !skb_clone_writable(skb, iphdr_len))
6705 		return 0;
6706 
6707 	return INET_ECN_set_ce(skb);
6708 }
6709 
6710 bool bpf_xdp_sock_is_valid_access(int off, int size, enum bpf_access_type type,
6711 				  struct bpf_insn_access_aux *info)
6712 {
6713 	if (off < 0 || off >= offsetofend(struct bpf_xdp_sock, queue_id))
6714 		return false;
6715 
6716 	if (off % size != 0)
6717 		return false;
6718 
6719 	switch (off) {
6720 	default:
6721 		return size == sizeof(__u32);
6722 	}
6723 }
6724 
6725 u32 bpf_xdp_sock_convert_ctx_access(enum bpf_access_type type,
6726 				    const struct bpf_insn *si,
6727 				    struct bpf_insn *insn_buf,
6728 				    struct bpf_prog *prog, u32 *target_size)
6729 {
6730 	struct bpf_insn *insn = insn_buf;
6731 
6732 #define BPF_XDP_SOCK_GET(FIELD)						\
6733 	do {								\
6734 		BUILD_BUG_ON(sizeof_field(struct xdp_sock, FIELD) >	\
6735 			     sizeof_field(struct bpf_xdp_sock, FIELD));	\
6736 		*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct xdp_sock, FIELD),\
6737 				      si->dst_reg, si->src_reg,		\
6738 				      offsetof(struct xdp_sock, FIELD)); \
6739 	} while (0)
6740 
6741 	switch (si->off) {
6742 	case offsetof(struct bpf_xdp_sock, queue_id):
6743 		BPF_XDP_SOCK_GET(queue_id);
6744 		break;
6745 	}
6746 
6747 	return insn - insn_buf;
6748 }
6749 
6750 static const struct bpf_func_proto bpf_skb_ecn_set_ce_proto = {
6751 	.func           = bpf_skb_ecn_set_ce,
6752 	.gpl_only       = false,
6753 	.ret_type       = RET_INTEGER,
6754 	.arg1_type      = ARG_PTR_TO_CTX,
6755 };
6756 
6757 BPF_CALL_5(bpf_tcp_check_syncookie, struct sock *, sk, void *, iph, u32, iph_len,
6758 	   struct tcphdr *, th, u32, th_len)
6759 {
6760 #ifdef CONFIG_SYN_COOKIES
6761 	u32 cookie;
6762 	int ret;
6763 
6764 	if (unlikely(!sk || th_len < sizeof(*th)))
6765 		return -EINVAL;
6766 
6767 	/* sk_listener() allows TCP_NEW_SYN_RECV, which makes no sense here. */
6768 	if (sk->sk_protocol != IPPROTO_TCP || sk->sk_state != TCP_LISTEN)
6769 		return -EINVAL;
6770 
6771 	if (!sock_net(sk)->ipv4.sysctl_tcp_syncookies)
6772 		return -EINVAL;
6773 
6774 	if (!th->ack || th->rst || th->syn)
6775 		return -ENOENT;
6776 
6777 	if (tcp_synq_no_recent_overflow(sk))
6778 		return -ENOENT;
6779 
6780 	cookie = ntohl(th->ack_seq) - 1;
6781 
6782 	switch (sk->sk_family) {
6783 	case AF_INET:
6784 		if (unlikely(iph_len < sizeof(struct iphdr)))
6785 			return -EINVAL;
6786 
6787 		ret = __cookie_v4_check((struct iphdr *)iph, th, cookie);
6788 		break;
6789 
6790 #if IS_BUILTIN(CONFIG_IPV6)
6791 	case AF_INET6:
6792 		if (unlikely(iph_len < sizeof(struct ipv6hdr)))
6793 			return -EINVAL;
6794 
6795 		ret = __cookie_v6_check((struct ipv6hdr *)iph, th, cookie);
6796 		break;
6797 #endif /* CONFIG_IPV6 */
6798 
6799 	default:
6800 		return -EPROTONOSUPPORT;
6801 	}
6802 
6803 	if (ret > 0)
6804 		return 0;
6805 
6806 	return -ENOENT;
6807 #else
6808 	return -ENOTSUPP;
6809 #endif
6810 }
6811 
6812 static const struct bpf_func_proto bpf_tcp_check_syncookie_proto = {
6813 	.func		= bpf_tcp_check_syncookie,
6814 	.gpl_only	= true,
6815 	.pkt_access	= true,
6816 	.ret_type	= RET_INTEGER,
6817 	.arg1_type	= ARG_PTR_TO_BTF_ID_SOCK_COMMON,
6818 	.arg2_type	= ARG_PTR_TO_MEM | MEM_RDONLY,
6819 	.arg3_type	= ARG_CONST_SIZE,
6820 	.arg4_type	= ARG_PTR_TO_MEM | MEM_RDONLY,
6821 	.arg5_type	= ARG_CONST_SIZE,
6822 };
6823 
6824 BPF_CALL_5(bpf_tcp_gen_syncookie, struct sock *, sk, void *, iph, u32, iph_len,
6825 	   struct tcphdr *, th, u32, th_len)
6826 {
6827 #ifdef CONFIG_SYN_COOKIES
6828 	u32 cookie;
6829 	u16 mss;
6830 
6831 	if (unlikely(!sk || th_len < sizeof(*th) || th_len != th->doff * 4))
6832 		return -EINVAL;
6833 
6834 	if (sk->sk_protocol != IPPROTO_TCP || sk->sk_state != TCP_LISTEN)
6835 		return -EINVAL;
6836 
6837 	if (!sock_net(sk)->ipv4.sysctl_tcp_syncookies)
6838 		return -ENOENT;
6839 
6840 	if (!th->syn || th->ack || th->fin || th->rst)
6841 		return -EINVAL;
6842 
6843 	if (unlikely(iph_len < sizeof(struct iphdr)))
6844 		return -EINVAL;
6845 
6846 	/* Both struct iphdr and struct ipv6hdr have the version field at the
6847 	 * same offset so we can cast to the shorter header (struct iphdr).
6848 	 */
6849 	switch (((struct iphdr *)iph)->version) {
6850 	case 4:
6851 		if (sk->sk_family == AF_INET6 && sk->sk_ipv6only)
6852 			return -EINVAL;
6853 
6854 		mss = tcp_v4_get_syncookie(sk, iph, th, &cookie);
6855 		break;
6856 
6857 #if IS_BUILTIN(CONFIG_IPV6)
6858 	case 6:
6859 		if (unlikely(iph_len < sizeof(struct ipv6hdr)))
6860 			return -EINVAL;
6861 
6862 		if (sk->sk_family != AF_INET6)
6863 			return -EINVAL;
6864 
6865 		mss = tcp_v6_get_syncookie(sk, iph, th, &cookie);
6866 		break;
6867 #endif /* CONFIG_IPV6 */
6868 
6869 	default:
6870 		return -EPROTONOSUPPORT;
6871 	}
6872 	if (mss == 0)
6873 		return -ENOENT;
6874 
6875 	return cookie | ((u64)mss << 32);
6876 #else
6877 	return -EOPNOTSUPP;
6878 #endif /* CONFIG_SYN_COOKIES */
6879 }
6880 
6881 static const struct bpf_func_proto bpf_tcp_gen_syncookie_proto = {
6882 	.func		= bpf_tcp_gen_syncookie,
6883 	.gpl_only	= true, /* __cookie_v*_init_sequence() is GPL */
6884 	.pkt_access	= true,
6885 	.ret_type	= RET_INTEGER,
6886 	.arg1_type	= ARG_PTR_TO_BTF_ID_SOCK_COMMON,
6887 	.arg2_type	= ARG_PTR_TO_MEM | MEM_RDONLY,
6888 	.arg3_type	= ARG_CONST_SIZE,
6889 	.arg4_type	= ARG_PTR_TO_MEM | MEM_RDONLY,
6890 	.arg5_type	= ARG_CONST_SIZE,
6891 };
6892 
6893 BPF_CALL_3(bpf_sk_assign, struct sk_buff *, skb, struct sock *, sk, u64, flags)
6894 {
6895 	if (!sk || flags != 0)
6896 		return -EINVAL;
6897 	if (!skb_at_tc_ingress(skb))
6898 		return -EOPNOTSUPP;
6899 	if (unlikely(dev_net(skb->dev) != sock_net(sk)))
6900 		return -ENETUNREACH;
6901 	if (unlikely(sk_fullsock(sk) && sk->sk_reuseport))
6902 		return -ESOCKTNOSUPPORT;
6903 	if (sk_is_refcounted(sk) &&
6904 	    unlikely(!refcount_inc_not_zero(&sk->sk_refcnt)))
6905 		return -ENOENT;
6906 
6907 	skb_orphan(skb);
6908 	skb->sk = sk;
6909 	skb->destructor = sock_pfree;
6910 
6911 	return 0;
6912 }
6913 
6914 static const struct bpf_func_proto bpf_sk_assign_proto = {
6915 	.func		= bpf_sk_assign,
6916 	.gpl_only	= false,
6917 	.ret_type	= RET_INTEGER,
6918 	.arg1_type      = ARG_PTR_TO_CTX,
6919 	.arg2_type      = ARG_PTR_TO_BTF_ID_SOCK_COMMON,
6920 	.arg3_type	= ARG_ANYTHING,
6921 };
6922 
6923 static const u8 *bpf_search_tcp_opt(const u8 *op, const u8 *opend,
6924 				    u8 search_kind, const u8 *magic,
6925 				    u8 magic_len, bool *eol)
6926 {
6927 	u8 kind, kind_len;
6928 
6929 	*eol = false;
6930 
6931 	while (op < opend) {
6932 		kind = op[0];
6933 
6934 		if (kind == TCPOPT_EOL) {
6935 			*eol = true;
6936 			return ERR_PTR(-ENOMSG);
6937 		} else if (kind == TCPOPT_NOP) {
6938 			op++;
6939 			continue;
6940 		}
6941 
6942 		if (opend - op < 2 || opend - op < op[1] || op[1] < 2)
6943 			/* Something is wrong in the received header.
6944 			 * Follow the TCP stack's tcp_parse_options()
6945 			 * and just bail here.
6946 			 */
6947 			return ERR_PTR(-EFAULT);
6948 
6949 		kind_len = op[1];
6950 		if (search_kind == kind) {
6951 			if (!magic_len)
6952 				return op;
6953 
6954 			if (magic_len > kind_len - 2)
6955 				return ERR_PTR(-ENOMSG);
6956 
6957 			if (!memcmp(&op[2], magic, magic_len))
6958 				return op;
6959 		}
6960 
6961 		op += kind_len;
6962 	}
6963 
6964 	return ERR_PTR(-ENOMSG);
6965 }
6966 
6967 BPF_CALL_4(bpf_sock_ops_load_hdr_opt, struct bpf_sock_ops_kern *, bpf_sock,
6968 	   void *, search_res, u32, len, u64, flags)
6969 {
6970 	bool eol, load_syn = flags & BPF_LOAD_HDR_OPT_TCP_SYN;
6971 	const u8 *op, *opend, *magic, *search = search_res;
6972 	u8 search_kind, search_len, copy_len, magic_len;
6973 	int ret;
6974 
6975 	/* 2 byte is the minimal option len except TCPOPT_NOP and
6976 	 * TCPOPT_EOL which are useless for the bpf prog to learn
6977 	 * and this helper disallow loading them also.
6978 	 */
6979 	if (len < 2 || flags & ~BPF_LOAD_HDR_OPT_TCP_SYN)
6980 		return -EINVAL;
6981 
6982 	search_kind = search[0];
6983 	search_len = search[1];
6984 
6985 	if (search_len > len || search_kind == TCPOPT_NOP ||
6986 	    search_kind == TCPOPT_EOL)
6987 		return -EINVAL;
6988 
6989 	if (search_kind == TCPOPT_EXP || search_kind == 253) {
6990 		/* 16 or 32 bit magic.  +2 for kind and kind length */
6991 		if (search_len != 4 && search_len != 6)
6992 			return -EINVAL;
6993 		magic = &search[2];
6994 		magic_len = search_len - 2;
6995 	} else {
6996 		if (search_len)
6997 			return -EINVAL;
6998 		magic = NULL;
6999 		magic_len = 0;
7000 	}
7001 
7002 	if (load_syn) {
7003 		ret = bpf_sock_ops_get_syn(bpf_sock, TCP_BPF_SYN, &op);
7004 		if (ret < 0)
7005 			return ret;
7006 
7007 		opend = op + ret;
7008 		op += sizeof(struct tcphdr);
7009 	} else {
7010 		if (!bpf_sock->skb ||
7011 		    bpf_sock->op == BPF_SOCK_OPS_HDR_OPT_LEN_CB)
7012 			/* This bpf_sock->op cannot call this helper */
7013 			return -EPERM;
7014 
7015 		opend = bpf_sock->skb_data_end;
7016 		op = bpf_sock->skb->data + sizeof(struct tcphdr);
7017 	}
7018 
7019 	op = bpf_search_tcp_opt(op, opend, search_kind, magic, magic_len,
7020 				&eol);
7021 	if (IS_ERR(op))
7022 		return PTR_ERR(op);
7023 
7024 	copy_len = op[1];
7025 	ret = copy_len;
7026 	if (copy_len > len) {
7027 		ret = -ENOSPC;
7028 		copy_len = len;
7029 	}
7030 
7031 	memcpy(search_res, op, copy_len);
7032 	return ret;
7033 }
7034 
7035 static const struct bpf_func_proto bpf_sock_ops_load_hdr_opt_proto = {
7036 	.func		= bpf_sock_ops_load_hdr_opt,
7037 	.gpl_only	= false,
7038 	.ret_type	= RET_INTEGER,
7039 	.arg1_type	= ARG_PTR_TO_CTX,
7040 	.arg2_type	= ARG_PTR_TO_MEM,
7041 	.arg3_type	= ARG_CONST_SIZE,
7042 	.arg4_type	= ARG_ANYTHING,
7043 };
7044 
7045 BPF_CALL_4(bpf_sock_ops_store_hdr_opt, struct bpf_sock_ops_kern *, bpf_sock,
7046 	   const void *, from, u32, len, u64, flags)
7047 {
7048 	u8 new_kind, new_kind_len, magic_len = 0, *opend;
7049 	const u8 *op, *new_op, *magic = NULL;
7050 	struct sk_buff *skb;
7051 	bool eol;
7052 
7053 	if (bpf_sock->op != BPF_SOCK_OPS_WRITE_HDR_OPT_CB)
7054 		return -EPERM;
7055 
7056 	if (len < 2 || flags)
7057 		return -EINVAL;
7058 
7059 	new_op = from;
7060 	new_kind = new_op[0];
7061 	new_kind_len = new_op[1];
7062 
7063 	if (new_kind_len > len || new_kind == TCPOPT_NOP ||
7064 	    new_kind == TCPOPT_EOL)
7065 		return -EINVAL;
7066 
7067 	if (new_kind_len > bpf_sock->remaining_opt_len)
7068 		return -ENOSPC;
7069 
7070 	/* 253 is another experimental kind */
7071 	if (new_kind == TCPOPT_EXP || new_kind == 253)  {
7072 		if (new_kind_len < 4)
7073 			return -EINVAL;
7074 		/* Match for the 2 byte magic also.
7075 		 * RFC 6994: the magic could be 2 or 4 bytes.
7076 		 * Hence, matching by 2 byte only is on the
7077 		 * conservative side but it is the right
7078 		 * thing to do for the 'search-for-duplication'
7079 		 * purpose.
7080 		 */
7081 		magic = &new_op[2];
7082 		magic_len = 2;
7083 	}
7084 
7085 	/* Check for duplication */
7086 	skb = bpf_sock->skb;
7087 	op = skb->data + sizeof(struct tcphdr);
7088 	opend = bpf_sock->skb_data_end;
7089 
7090 	op = bpf_search_tcp_opt(op, opend, new_kind, magic, magic_len,
7091 				&eol);
7092 	if (!IS_ERR(op))
7093 		return -EEXIST;
7094 
7095 	if (PTR_ERR(op) != -ENOMSG)
7096 		return PTR_ERR(op);
7097 
7098 	if (eol)
7099 		/* The option has been ended.  Treat it as no more
7100 		 * header option can be written.
7101 		 */
7102 		return -ENOSPC;
7103 
7104 	/* No duplication found.  Store the header option. */
7105 	memcpy(opend, from, new_kind_len);
7106 
7107 	bpf_sock->remaining_opt_len -= new_kind_len;
7108 	bpf_sock->skb_data_end += new_kind_len;
7109 
7110 	return 0;
7111 }
7112 
7113 static const struct bpf_func_proto bpf_sock_ops_store_hdr_opt_proto = {
7114 	.func		= bpf_sock_ops_store_hdr_opt,
7115 	.gpl_only	= false,
7116 	.ret_type	= RET_INTEGER,
7117 	.arg1_type	= ARG_PTR_TO_CTX,
7118 	.arg2_type	= ARG_PTR_TO_MEM | MEM_RDONLY,
7119 	.arg3_type	= ARG_CONST_SIZE,
7120 	.arg4_type	= ARG_ANYTHING,
7121 };
7122 
7123 BPF_CALL_3(bpf_sock_ops_reserve_hdr_opt, struct bpf_sock_ops_kern *, bpf_sock,
7124 	   u32, len, u64, flags)
7125 {
7126 	if (bpf_sock->op != BPF_SOCK_OPS_HDR_OPT_LEN_CB)
7127 		return -EPERM;
7128 
7129 	if (flags || len < 2)
7130 		return -EINVAL;
7131 
7132 	if (len > bpf_sock->remaining_opt_len)
7133 		return -ENOSPC;
7134 
7135 	bpf_sock->remaining_opt_len -= len;
7136 
7137 	return 0;
7138 }
7139 
7140 static const struct bpf_func_proto bpf_sock_ops_reserve_hdr_opt_proto = {
7141 	.func		= bpf_sock_ops_reserve_hdr_opt,
7142 	.gpl_only	= false,
7143 	.ret_type	= RET_INTEGER,
7144 	.arg1_type	= ARG_PTR_TO_CTX,
7145 	.arg2_type	= ARG_ANYTHING,
7146 	.arg3_type	= ARG_ANYTHING,
7147 };
7148 
7149 #endif /* CONFIG_INET */
7150 
7151 bool bpf_helper_changes_pkt_data(void *func)
7152 {
7153 	if (func == bpf_skb_vlan_push ||
7154 	    func == bpf_skb_vlan_pop ||
7155 	    func == bpf_skb_store_bytes ||
7156 	    func == bpf_skb_change_proto ||
7157 	    func == bpf_skb_change_head ||
7158 	    func == sk_skb_change_head ||
7159 	    func == bpf_skb_change_tail ||
7160 	    func == sk_skb_change_tail ||
7161 	    func == bpf_skb_adjust_room ||
7162 	    func == sk_skb_adjust_room ||
7163 	    func == bpf_skb_pull_data ||
7164 	    func == sk_skb_pull_data ||
7165 	    func == bpf_clone_redirect ||
7166 	    func == bpf_l3_csum_replace ||
7167 	    func == bpf_l4_csum_replace ||
7168 	    func == bpf_xdp_adjust_head ||
7169 	    func == bpf_xdp_adjust_meta ||
7170 	    func == bpf_msg_pull_data ||
7171 	    func == bpf_msg_push_data ||
7172 	    func == bpf_msg_pop_data ||
7173 	    func == bpf_xdp_adjust_tail ||
7174 #if IS_ENABLED(CONFIG_IPV6_SEG6_BPF)
7175 	    func == bpf_lwt_seg6_store_bytes ||
7176 	    func == bpf_lwt_seg6_adjust_srh ||
7177 	    func == bpf_lwt_seg6_action ||
7178 #endif
7179 #ifdef CONFIG_INET
7180 	    func == bpf_sock_ops_store_hdr_opt ||
7181 #endif
7182 	    func == bpf_lwt_in_push_encap ||
7183 	    func == bpf_lwt_xmit_push_encap)
7184 		return true;
7185 
7186 	return false;
7187 }
7188 
7189 const struct bpf_func_proto bpf_event_output_data_proto __weak;
7190 const struct bpf_func_proto bpf_sk_storage_get_cg_sock_proto __weak;
7191 
7192 static const struct bpf_func_proto *
7193 sock_filter_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
7194 {
7195 	switch (func_id) {
7196 	/* inet and inet6 sockets are created in a process
7197 	 * context so there is always a valid uid/gid
7198 	 */
7199 	case BPF_FUNC_get_current_uid_gid:
7200 		return &bpf_get_current_uid_gid_proto;
7201 	case BPF_FUNC_get_local_storage:
7202 		return &bpf_get_local_storage_proto;
7203 	case BPF_FUNC_get_socket_cookie:
7204 		return &bpf_get_socket_cookie_sock_proto;
7205 	case BPF_FUNC_get_netns_cookie:
7206 		return &bpf_get_netns_cookie_sock_proto;
7207 	case BPF_FUNC_perf_event_output:
7208 		return &bpf_event_output_data_proto;
7209 	case BPF_FUNC_get_current_pid_tgid:
7210 		return &bpf_get_current_pid_tgid_proto;
7211 	case BPF_FUNC_get_current_comm:
7212 		return &bpf_get_current_comm_proto;
7213 #ifdef CONFIG_CGROUPS
7214 	case BPF_FUNC_get_current_cgroup_id:
7215 		return &bpf_get_current_cgroup_id_proto;
7216 	case BPF_FUNC_get_current_ancestor_cgroup_id:
7217 		return &bpf_get_current_ancestor_cgroup_id_proto;
7218 #endif
7219 #ifdef CONFIG_CGROUP_NET_CLASSID
7220 	case BPF_FUNC_get_cgroup_classid:
7221 		return &bpf_get_cgroup_classid_curr_proto;
7222 #endif
7223 	case BPF_FUNC_sk_storage_get:
7224 		return &bpf_sk_storage_get_cg_sock_proto;
7225 	case BPF_FUNC_ktime_get_coarse_ns:
7226 		return &bpf_ktime_get_coarse_ns_proto;
7227 	default:
7228 		return bpf_base_func_proto(func_id);
7229 	}
7230 }
7231 
7232 static const struct bpf_func_proto *
7233 sock_addr_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
7234 {
7235 	switch (func_id) {
7236 	/* inet and inet6 sockets are created in a process
7237 	 * context so there is always a valid uid/gid
7238 	 */
7239 	case BPF_FUNC_get_current_uid_gid:
7240 		return &bpf_get_current_uid_gid_proto;
7241 	case BPF_FUNC_bind:
7242 		switch (prog->expected_attach_type) {
7243 		case BPF_CGROUP_INET4_CONNECT:
7244 		case BPF_CGROUP_INET6_CONNECT:
7245 			return &bpf_bind_proto;
7246 		default:
7247 			return NULL;
7248 		}
7249 	case BPF_FUNC_get_socket_cookie:
7250 		return &bpf_get_socket_cookie_sock_addr_proto;
7251 	case BPF_FUNC_get_netns_cookie:
7252 		return &bpf_get_netns_cookie_sock_addr_proto;
7253 	case BPF_FUNC_get_local_storage:
7254 		return &bpf_get_local_storage_proto;
7255 	case BPF_FUNC_perf_event_output:
7256 		return &bpf_event_output_data_proto;
7257 	case BPF_FUNC_get_current_pid_tgid:
7258 		return &bpf_get_current_pid_tgid_proto;
7259 	case BPF_FUNC_get_current_comm:
7260 		return &bpf_get_current_comm_proto;
7261 #ifdef CONFIG_CGROUPS
7262 	case BPF_FUNC_get_current_cgroup_id:
7263 		return &bpf_get_current_cgroup_id_proto;
7264 	case BPF_FUNC_get_current_ancestor_cgroup_id:
7265 		return &bpf_get_current_ancestor_cgroup_id_proto;
7266 #endif
7267 #ifdef CONFIG_CGROUP_NET_CLASSID
7268 	case BPF_FUNC_get_cgroup_classid:
7269 		return &bpf_get_cgroup_classid_curr_proto;
7270 #endif
7271 #ifdef CONFIG_INET
7272 	case BPF_FUNC_sk_lookup_tcp:
7273 		return &bpf_sock_addr_sk_lookup_tcp_proto;
7274 	case BPF_FUNC_sk_lookup_udp:
7275 		return &bpf_sock_addr_sk_lookup_udp_proto;
7276 	case BPF_FUNC_sk_release:
7277 		return &bpf_sk_release_proto;
7278 	case BPF_FUNC_skc_lookup_tcp:
7279 		return &bpf_sock_addr_skc_lookup_tcp_proto;
7280 #endif /* CONFIG_INET */
7281 	case BPF_FUNC_sk_storage_get:
7282 		return &bpf_sk_storage_get_proto;
7283 	case BPF_FUNC_sk_storage_delete:
7284 		return &bpf_sk_storage_delete_proto;
7285 	case BPF_FUNC_setsockopt:
7286 		switch (prog->expected_attach_type) {
7287 		case BPF_CGROUP_INET4_BIND:
7288 		case BPF_CGROUP_INET6_BIND:
7289 		case BPF_CGROUP_INET4_CONNECT:
7290 		case BPF_CGROUP_INET6_CONNECT:
7291 		case BPF_CGROUP_UDP4_RECVMSG:
7292 		case BPF_CGROUP_UDP6_RECVMSG:
7293 		case BPF_CGROUP_UDP4_SENDMSG:
7294 		case BPF_CGROUP_UDP6_SENDMSG:
7295 		case BPF_CGROUP_INET4_GETPEERNAME:
7296 		case BPF_CGROUP_INET6_GETPEERNAME:
7297 		case BPF_CGROUP_INET4_GETSOCKNAME:
7298 		case BPF_CGROUP_INET6_GETSOCKNAME:
7299 			return &bpf_sock_addr_setsockopt_proto;
7300 		default:
7301 			return NULL;
7302 		}
7303 	case BPF_FUNC_getsockopt:
7304 		switch (prog->expected_attach_type) {
7305 		case BPF_CGROUP_INET4_BIND:
7306 		case BPF_CGROUP_INET6_BIND:
7307 		case BPF_CGROUP_INET4_CONNECT:
7308 		case BPF_CGROUP_INET6_CONNECT:
7309 		case BPF_CGROUP_UDP4_RECVMSG:
7310 		case BPF_CGROUP_UDP6_RECVMSG:
7311 		case BPF_CGROUP_UDP4_SENDMSG:
7312 		case BPF_CGROUP_UDP6_SENDMSG:
7313 		case BPF_CGROUP_INET4_GETPEERNAME:
7314 		case BPF_CGROUP_INET6_GETPEERNAME:
7315 		case BPF_CGROUP_INET4_GETSOCKNAME:
7316 		case BPF_CGROUP_INET6_GETSOCKNAME:
7317 			return &bpf_sock_addr_getsockopt_proto;
7318 		default:
7319 			return NULL;
7320 		}
7321 	default:
7322 		return bpf_sk_base_func_proto(func_id);
7323 	}
7324 }
7325 
7326 static const struct bpf_func_proto *
7327 sk_filter_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
7328 {
7329 	switch (func_id) {
7330 	case BPF_FUNC_skb_load_bytes:
7331 		return &bpf_skb_load_bytes_proto;
7332 	case BPF_FUNC_skb_load_bytes_relative:
7333 		return &bpf_skb_load_bytes_relative_proto;
7334 	case BPF_FUNC_get_socket_cookie:
7335 		return &bpf_get_socket_cookie_proto;
7336 	case BPF_FUNC_get_socket_uid:
7337 		return &bpf_get_socket_uid_proto;
7338 	case BPF_FUNC_perf_event_output:
7339 		return &bpf_skb_event_output_proto;
7340 	default:
7341 		return bpf_sk_base_func_proto(func_id);
7342 	}
7343 }
7344 
7345 const struct bpf_func_proto bpf_sk_storage_get_proto __weak;
7346 const struct bpf_func_proto bpf_sk_storage_delete_proto __weak;
7347 
7348 static const struct bpf_func_proto *
7349 cg_skb_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
7350 {
7351 	switch (func_id) {
7352 	case BPF_FUNC_get_local_storage:
7353 		return &bpf_get_local_storage_proto;
7354 	case BPF_FUNC_sk_fullsock:
7355 		return &bpf_sk_fullsock_proto;
7356 	case BPF_FUNC_sk_storage_get:
7357 		return &bpf_sk_storage_get_proto;
7358 	case BPF_FUNC_sk_storage_delete:
7359 		return &bpf_sk_storage_delete_proto;
7360 	case BPF_FUNC_perf_event_output:
7361 		return &bpf_skb_event_output_proto;
7362 #ifdef CONFIG_SOCK_CGROUP_DATA
7363 	case BPF_FUNC_skb_cgroup_id:
7364 		return &bpf_skb_cgroup_id_proto;
7365 	case BPF_FUNC_skb_ancestor_cgroup_id:
7366 		return &bpf_skb_ancestor_cgroup_id_proto;
7367 	case BPF_FUNC_sk_cgroup_id:
7368 		return &bpf_sk_cgroup_id_proto;
7369 	case BPF_FUNC_sk_ancestor_cgroup_id:
7370 		return &bpf_sk_ancestor_cgroup_id_proto;
7371 #endif
7372 #ifdef CONFIG_INET
7373 	case BPF_FUNC_sk_lookup_tcp:
7374 		return &bpf_sk_lookup_tcp_proto;
7375 	case BPF_FUNC_sk_lookup_udp:
7376 		return &bpf_sk_lookup_udp_proto;
7377 	case BPF_FUNC_sk_release:
7378 		return &bpf_sk_release_proto;
7379 	case BPF_FUNC_skc_lookup_tcp:
7380 		return &bpf_skc_lookup_tcp_proto;
7381 	case BPF_FUNC_tcp_sock:
7382 		return &bpf_tcp_sock_proto;
7383 	case BPF_FUNC_get_listener_sock:
7384 		return &bpf_get_listener_sock_proto;
7385 	case BPF_FUNC_skb_ecn_set_ce:
7386 		return &bpf_skb_ecn_set_ce_proto;
7387 #endif
7388 	default:
7389 		return sk_filter_func_proto(func_id, prog);
7390 	}
7391 }
7392 
7393 static const struct bpf_func_proto *
7394 tc_cls_act_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
7395 {
7396 	switch (func_id) {
7397 	case BPF_FUNC_skb_store_bytes:
7398 		return &bpf_skb_store_bytes_proto;
7399 	case BPF_FUNC_skb_load_bytes:
7400 		return &bpf_skb_load_bytes_proto;
7401 	case BPF_FUNC_skb_load_bytes_relative:
7402 		return &bpf_skb_load_bytes_relative_proto;
7403 	case BPF_FUNC_skb_pull_data:
7404 		return &bpf_skb_pull_data_proto;
7405 	case BPF_FUNC_csum_diff:
7406 		return &bpf_csum_diff_proto;
7407 	case BPF_FUNC_csum_update:
7408 		return &bpf_csum_update_proto;
7409 	case BPF_FUNC_csum_level:
7410 		return &bpf_csum_level_proto;
7411 	case BPF_FUNC_l3_csum_replace:
7412 		return &bpf_l3_csum_replace_proto;
7413 	case BPF_FUNC_l4_csum_replace:
7414 		return &bpf_l4_csum_replace_proto;
7415 	case BPF_FUNC_clone_redirect:
7416 		return &bpf_clone_redirect_proto;
7417 	case BPF_FUNC_get_cgroup_classid:
7418 		return &bpf_get_cgroup_classid_proto;
7419 	case BPF_FUNC_skb_vlan_push:
7420 		return &bpf_skb_vlan_push_proto;
7421 	case BPF_FUNC_skb_vlan_pop:
7422 		return &bpf_skb_vlan_pop_proto;
7423 	case BPF_FUNC_skb_change_proto:
7424 		return &bpf_skb_change_proto_proto;
7425 	case BPF_FUNC_skb_change_type:
7426 		return &bpf_skb_change_type_proto;
7427 	case BPF_FUNC_skb_adjust_room:
7428 		return &bpf_skb_adjust_room_proto;
7429 	case BPF_FUNC_skb_change_tail:
7430 		return &bpf_skb_change_tail_proto;
7431 	case BPF_FUNC_skb_change_head:
7432 		return &bpf_skb_change_head_proto;
7433 	case BPF_FUNC_skb_get_tunnel_key:
7434 		return &bpf_skb_get_tunnel_key_proto;
7435 	case BPF_FUNC_skb_set_tunnel_key:
7436 		return bpf_get_skb_set_tunnel_proto(func_id);
7437 	case BPF_FUNC_skb_get_tunnel_opt:
7438 		return &bpf_skb_get_tunnel_opt_proto;
7439 	case BPF_FUNC_skb_set_tunnel_opt:
7440 		return bpf_get_skb_set_tunnel_proto(func_id);
7441 	case BPF_FUNC_redirect:
7442 		return &bpf_redirect_proto;
7443 	case BPF_FUNC_redirect_neigh:
7444 		return &bpf_redirect_neigh_proto;
7445 	case BPF_FUNC_redirect_peer:
7446 		return &bpf_redirect_peer_proto;
7447 	case BPF_FUNC_get_route_realm:
7448 		return &bpf_get_route_realm_proto;
7449 	case BPF_FUNC_get_hash_recalc:
7450 		return &bpf_get_hash_recalc_proto;
7451 	case BPF_FUNC_set_hash_invalid:
7452 		return &bpf_set_hash_invalid_proto;
7453 	case BPF_FUNC_set_hash:
7454 		return &bpf_set_hash_proto;
7455 	case BPF_FUNC_perf_event_output:
7456 		return &bpf_skb_event_output_proto;
7457 	case BPF_FUNC_get_smp_processor_id:
7458 		return &bpf_get_smp_processor_id_proto;
7459 	case BPF_FUNC_skb_under_cgroup:
7460 		return &bpf_skb_under_cgroup_proto;
7461 	case BPF_FUNC_get_socket_cookie:
7462 		return &bpf_get_socket_cookie_proto;
7463 	case BPF_FUNC_get_socket_uid:
7464 		return &bpf_get_socket_uid_proto;
7465 	case BPF_FUNC_fib_lookup:
7466 		return &bpf_skb_fib_lookup_proto;
7467 	case BPF_FUNC_check_mtu:
7468 		return &bpf_skb_check_mtu_proto;
7469 	case BPF_FUNC_sk_fullsock:
7470 		return &bpf_sk_fullsock_proto;
7471 	case BPF_FUNC_sk_storage_get:
7472 		return &bpf_sk_storage_get_proto;
7473 	case BPF_FUNC_sk_storage_delete:
7474 		return &bpf_sk_storage_delete_proto;
7475 #ifdef CONFIG_XFRM
7476 	case BPF_FUNC_skb_get_xfrm_state:
7477 		return &bpf_skb_get_xfrm_state_proto;
7478 #endif
7479 #ifdef CONFIG_CGROUP_NET_CLASSID
7480 	case BPF_FUNC_skb_cgroup_classid:
7481 		return &bpf_skb_cgroup_classid_proto;
7482 #endif
7483 #ifdef CONFIG_SOCK_CGROUP_DATA
7484 	case BPF_FUNC_skb_cgroup_id:
7485 		return &bpf_skb_cgroup_id_proto;
7486 	case BPF_FUNC_skb_ancestor_cgroup_id:
7487 		return &bpf_skb_ancestor_cgroup_id_proto;
7488 #endif
7489 #ifdef CONFIG_INET
7490 	case BPF_FUNC_sk_lookup_tcp:
7491 		return &bpf_sk_lookup_tcp_proto;
7492 	case BPF_FUNC_sk_lookup_udp:
7493 		return &bpf_sk_lookup_udp_proto;
7494 	case BPF_FUNC_sk_release:
7495 		return &bpf_sk_release_proto;
7496 	case BPF_FUNC_tcp_sock:
7497 		return &bpf_tcp_sock_proto;
7498 	case BPF_FUNC_get_listener_sock:
7499 		return &bpf_get_listener_sock_proto;
7500 	case BPF_FUNC_skc_lookup_tcp:
7501 		return &bpf_skc_lookup_tcp_proto;
7502 	case BPF_FUNC_tcp_check_syncookie:
7503 		return &bpf_tcp_check_syncookie_proto;
7504 	case BPF_FUNC_skb_ecn_set_ce:
7505 		return &bpf_skb_ecn_set_ce_proto;
7506 	case BPF_FUNC_tcp_gen_syncookie:
7507 		return &bpf_tcp_gen_syncookie_proto;
7508 	case BPF_FUNC_sk_assign:
7509 		return &bpf_sk_assign_proto;
7510 #endif
7511 	default:
7512 		return bpf_sk_base_func_proto(func_id);
7513 	}
7514 }
7515 
7516 static const struct bpf_func_proto *
7517 xdp_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
7518 {
7519 	switch (func_id) {
7520 	case BPF_FUNC_perf_event_output:
7521 		return &bpf_xdp_event_output_proto;
7522 	case BPF_FUNC_get_smp_processor_id:
7523 		return &bpf_get_smp_processor_id_proto;
7524 	case BPF_FUNC_csum_diff:
7525 		return &bpf_csum_diff_proto;
7526 	case BPF_FUNC_xdp_adjust_head:
7527 		return &bpf_xdp_adjust_head_proto;
7528 	case BPF_FUNC_xdp_adjust_meta:
7529 		return &bpf_xdp_adjust_meta_proto;
7530 	case BPF_FUNC_redirect:
7531 		return &bpf_xdp_redirect_proto;
7532 	case BPF_FUNC_redirect_map:
7533 		return &bpf_xdp_redirect_map_proto;
7534 	case BPF_FUNC_xdp_adjust_tail:
7535 		return &bpf_xdp_adjust_tail_proto;
7536 	case BPF_FUNC_fib_lookup:
7537 		return &bpf_xdp_fib_lookup_proto;
7538 	case BPF_FUNC_check_mtu:
7539 		return &bpf_xdp_check_mtu_proto;
7540 #ifdef CONFIG_INET
7541 	case BPF_FUNC_sk_lookup_udp:
7542 		return &bpf_xdp_sk_lookup_udp_proto;
7543 	case BPF_FUNC_sk_lookup_tcp:
7544 		return &bpf_xdp_sk_lookup_tcp_proto;
7545 	case BPF_FUNC_sk_release:
7546 		return &bpf_sk_release_proto;
7547 	case BPF_FUNC_skc_lookup_tcp:
7548 		return &bpf_xdp_skc_lookup_tcp_proto;
7549 	case BPF_FUNC_tcp_check_syncookie:
7550 		return &bpf_tcp_check_syncookie_proto;
7551 	case BPF_FUNC_tcp_gen_syncookie:
7552 		return &bpf_tcp_gen_syncookie_proto;
7553 #endif
7554 	default:
7555 		return bpf_sk_base_func_proto(func_id);
7556 	}
7557 }
7558 
7559 const struct bpf_func_proto bpf_sock_map_update_proto __weak;
7560 const struct bpf_func_proto bpf_sock_hash_update_proto __weak;
7561 
7562 static const struct bpf_func_proto *
7563 sock_ops_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
7564 {
7565 	switch (func_id) {
7566 	case BPF_FUNC_setsockopt:
7567 		return &bpf_sock_ops_setsockopt_proto;
7568 	case BPF_FUNC_getsockopt:
7569 		return &bpf_sock_ops_getsockopt_proto;
7570 	case BPF_FUNC_sock_ops_cb_flags_set:
7571 		return &bpf_sock_ops_cb_flags_set_proto;
7572 	case BPF_FUNC_sock_map_update:
7573 		return &bpf_sock_map_update_proto;
7574 	case BPF_FUNC_sock_hash_update:
7575 		return &bpf_sock_hash_update_proto;
7576 	case BPF_FUNC_get_socket_cookie:
7577 		return &bpf_get_socket_cookie_sock_ops_proto;
7578 	case BPF_FUNC_get_local_storage:
7579 		return &bpf_get_local_storage_proto;
7580 	case BPF_FUNC_perf_event_output:
7581 		return &bpf_event_output_data_proto;
7582 	case BPF_FUNC_sk_storage_get:
7583 		return &bpf_sk_storage_get_proto;
7584 	case BPF_FUNC_sk_storage_delete:
7585 		return &bpf_sk_storage_delete_proto;
7586 	case BPF_FUNC_get_netns_cookie:
7587 		return &bpf_get_netns_cookie_sock_ops_proto;
7588 #ifdef CONFIG_INET
7589 	case BPF_FUNC_load_hdr_opt:
7590 		return &bpf_sock_ops_load_hdr_opt_proto;
7591 	case BPF_FUNC_store_hdr_opt:
7592 		return &bpf_sock_ops_store_hdr_opt_proto;
7593 	case BPF_FUNC_reserve_hdr_opt:
7594 		return &bpf_sock_ops_reserve_hdr_opt_proto;
7595 	case BPF_FUNC_tcp_sock:
7596 		return &bpf_tcp_sock_proto;
7597 #endif /* CONFIG_INET */
7598 	default:
7599 		return bpf_sk_base_func_proto(func_id);
7600 	}
7601 }
7602 
7603 const struct bpf_func_proto bpf_msg_redirect_map_proto __weak;
7604 const struct bpf_func_proto bpf_msg_redirect_hash_proto __weak;
7605 
7606 static const struct bpf_func_proto *
7607 sk_msg_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
7608 {
7609 	switch (func_id) {
7610 	case BPF_FUNC_msg_redirect_map:
7611 		return &bpf_msg_redirect_map_proto;
7612 	case BPF_FUNC_msg_redirect_hash:
7613 		return &bpf_msg_redirect_hash_proto;
7614 	case BPF_FUNC_msg_apply_bytes:
7615 		return &bpf_msg_apply_bytes_proto;
7616 	case BPF_FUNC_msg_cork_bytes:
7617 		return &bpf_msg_cork_bytes_proto;
7618 	case BPF_FUNC_msg_pull_data:
7619 		return &bpf_msg_pull_data_proto;
7620 	case BPF_FUNC_msg_push_data:
7621 		return &bpf_msg_push_data_proto;
7622 	case BPF_FUNC_msg_pop_data:
7623 		return &bpf_msg_pop_data_proto;
7624 	case BPF_FUNC_perf_event_output:
7625 		return &bpf_event_output_data_proto;
7626 	case BPF_FUNC_get_current_uid_gid:
7627 		return &bpf_get_current_uid_gid_proto;
7628 	case BPF_FUNC_get_current_pid_tgid:
7629 		return &bpf_get_current_pid_tgid_proto;
7630 	case BPF_FUNC_sk_storage_get:
7631 		return &bpf_sk_storage_get_proto;
7632 	case BPF_FUNC_sk_storage_delete:
7633 		return &bpf_sk_storage_delete_proto;
7634 	case BPF_FUNC_get_netns_cookie:
7635 		return &bpf_get_netns_cookie_sk_msg_proto;
7636 #ifdef CONFIG_CGROUPS
7637 	case BPF_FUNC_get_current_cgroup_id:
7638 		return &bpf_get_current_cgroup_id_proto;
7639 	case BPF_FUNC_get_current_ancestor_cgroup_id:
7640 		return &bpf_get_current_ancestor_cgroup_id_proto;
7641 #endif
7642 #ifdef CONFIG_CGROUP_NET_CLASSID
7643 	case BPF_FUNC_get_cgroup_classid:
7644 		return &bpf_get_cgroup_classid_curr_proto;
7645 #endif
7646 	default:
7647 		return bpf_sk_base_func_proto(func_id);
7648 	}
7649 }
7650 
7651 const struct bpf_func_proto bpf_sk_redirect_map_proto __weak;
7652 const struct bpf_func_proto bpf_sk_redirect_hash_proto __weak;
7653 
7654 static const struct bpf_func_proto *
7655 sk_skb_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
7656 {
7657 	switch (func_id) {
7658 	case BPF_FUNC_skb_store_bytes:
7659 		return &bpf_skb_store_bytes_proto;
7660 	case BPF_FUNC_skb_load_bytes:
7661 		return &bpf_skb_load_bytes_proto;
7662 	case BPF_FUNC_skb_pull_data:
7663 		return &sk_skb_pull_data_proto;
7664 	case BPF_FUNC_skb_change_tail:
7665 		return &sk_skb_change_tail_proto;
7666 	case BPF_FUNC_skb_change_head:
7667 		return &sk_skb_change_head_proto;
7668 	case BPF_FUNC_skb_adjust_room:
7669 		return &sk_skb_adjust_room_proto;
7670 	case BPF_FUNC_get_socket_cookie:
7671 		return &bpf_get_socket_cookie_proto;
7672 	case BPF_FUNC_get_socket_uid:
7673 		return &bpf_get_socket_uid_proto;
7674 	case BPF_FUNC_sk_redirect_map:
7675 		return &bpf_sk_redirect_map_proto;
7676 	case BPF_FUNC_sk_redirect_hash:
7677 		return &bpf_sk_redirect_hash_proto;
7678 	case BPF_FUNC_perf_event_output:
7679 		return &bpf_skb_event_output_proto;
7680 #ifdef CONFIG_INET
7681 	case BPF_FUNC_sk_lookup_tcp:
7682 		return &bpf_sk_lookup_tcp_proto;
7683 	case BPF_FUNC_sk_lookup_udp:
7684 		return &bpf_sk_lookup_udp_proto;
7685 	case BPF_FUNC_sk_release:
7686 		return &bpf_sk_release_proto;
7687 	case BPF_FUNC_skc_lookup_tcp:
7688 		return &bpf_skc_lookup_tcp_proto;
7689 #endif
7690 	default:
7691 		return bpf_sk_base_func_proto(func_id);
7692 	}
7693 }
7694 
7695 static const struct bpf_func_proto *
7696 flow_dissector_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
7697 {
7698 	switch (func_id) {
7699 	case BPF_FUNC_skb_load_bytes:
7700 		return &bpf_flow_dissector_load_bytes_proto;
7701 	default:
7702 		return bpf_sk_base_func_proto(func_id);
7703 	}
7704 }
7705 
7706 static const struct bpf_func_proto *
7707 lwt_out_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
7708 {
7709 	switch (func_id) {
7710 	case BPF_FUNC_skb_load_bytes:
7711 		return &bpf_skb_load_bytes_proto;
7712 	case BPF_FUNC_skb_pull_data:
7713 		return &bpf_skb_pull_data_proto;
7714 	case BPF_FUNC_csum_diff:
7715 		return &bpf_csum_diff_proto;
7716 	case BPF_FUNC_get_cgroup_classid:
7717 		return &bpf_get_cgroup_classid_proto;
7718 	case BPF_FUNC_get_route_realm:
7719 		return &bpf_get_route_realm_proto;
7720 	case BPF_FUNC_get_hash_recalc:
7721 		return &bpf_get_hash_recalc_proto;
7722 	case BPF_FUNC_perf_event_output:
7723 		return &bpf_skb_event_output_proto;
7724 	case BPF_FUNC_get_smp_processor_id:
7725 		return &bpf_get_smp_processor_id_proto;
7726 	case BPF_FUNC_skb_under_cgroup:
7727 		return &bpf_skb_under_cgroup_proto;
7728 	default:
7729 		return bpf_sk_base_func_proto(func_id);
7730 	}
7731 }
7732 
7733 static const struct bpf_func_proto *
7734 lwt_in_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
7735 {
7736 	switch (func_id) {
7737 	case BPF_FUNC_lwt_push_encap:
7738 		return &bpf_lwt_in_push_encap_proto;
7739 	default:
7740 		return lwt_out_func_proto(func_id, prog);
7741 	}
7742 }
7743 
7744 static const struct bpf_func_proto *
7745 lwt_xmit_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
7746 {
7747 	switch (func_id) {
7748 	case BPF_FUNC_skb_get_tunnel_key:
7749 		return &bpf_skb_get_tunnel_key_proto;
7750 	case BPF_FUNC_skb_set_tunnel_key:
7751 		return bpf_get_skb_set_tunnel_proto(func_id);
7752 	case BPF_FUNC_skb_get_tunnel_opt:
7753 		return &bpf_skb_get_tunnel_opt_proto;
7754 	case BPF_FUNC_skb_set_tunnel_opt:
7755 		return bpf_get_skb_set_tunnel_proto(func_id);
7756 	case BPF_FUNC_redirect:
7757 		return &bpf_redirect_proto;
7758 	case BPF_FUNC_clone_redirect:
7759 		return &bpf_clone_redirect_proto;
7760 	case BPF_FUNC_skb_change_tail:
7761 		return &bpf_skb_change_tail_proto;
7762 	case BPF_FUNC_skb_change_head:
7763 		return &bpf_skb_change_head_proto;
7764 	case BPF_FUNC_skb_store_bytes:
7765 		return &bpf_skb_store_bytes_proto;
7766 	case BPF_FUNC_csum_update:
7767 		return &bpf_csum_update_proto;
7768 	case BPF_FUNC_csum_level:
7769 		return &bpf_csum_level_proto;
7770 	case BPF_FUNC_l3_csum_replace:
7771 		return &bpf_l3_csum_replace_proto;
7772 	case BPF_FUNC_l4_csum_replace:
7773 		return &bpf_l4_csum_replace_proto;
7774 	case BPF_FUNC_set_hash_invalid:
7775 		return &bpf_set_hash_invalid_proto;
7776 	case BPF_FUNC_lwt_push_encap:
7777 		return &bpf_lwt_xmit_push_encap_proto;
7778 	default:
7779 		return lwt_out_func_proto(func_id, prog);
7780 	}
7781 }
7782 
7783 static const struct bpf_func_proto *
7784 lwt_seg6local_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
7785 {
7786 	switch (func_id) {
7787 #if IS_ENABLED(CONFIG_IPV6_SEG6_BPF)
7788 	case BPF_FUNC_lwt_seg6_store_bytes:
7789 		return &bpf_lwt_seg6_store_bytes_proto;
7790 	case BPF_FUNC_lwt_seg6_action:
7791 		return &bpf_lwt_seg6_action_proto;
7792 	case BPF_FUNC_lwt_seg6_adjust_srh:
7793 		return &bpf_lwt_seg6_adjust_srh_proto;
7794 #endif
7795 	default:
7796 		return lwt_out_func_proto(func_id, prog);
7797 	}
7798 }
7799 
7800 static bool bpf_skb_is_valid_access(int off, int size, enum bpf_access_type type,
7801 				    const struct bpf_prog *prog,
7802 				    struct bpf_insn_access_aux *info)
7803 {
7804 	const int size_default = sizeof(__u32);
7805 
7806 	if (off < 0 || off >= sizeof(struct __sk_buff))
7807 		return false;
7808 
7809 	/* The verifier guarantees that size > 0. */
7810 	if (off % size != 0)
7811 		return false;
7812 
7813 	switch (off) {
7814 	case bpf_ctx_range_till(struct __sk_buff, cb[0], cb[4]):
7815 		if (off + size > offsetofend(struct __sk_buff, cb[4]))
7816 			return false;
7817 		break;
7818 	case bpf_ctx_range_till(struct __sk_buff, remote_ip6[0], remote_ip6[3]):
7819 	case bpf_ctx_range_till(struct __sk_buff, local_ip6[0], local_ip6[3]):
7820 	case bpf_ctx_range_till(struct __sk_buff, remote_ip4, remote_ip4):
7821 	case bpf_ctx_range_till(struct __sk_buff, local_ip4, local_ip4):
7822 	case bpf_ctx_range(struct __sk_buff, data):
7823 	case bpf_ctx_range(struct __sk_buff, data_meta):
7824 	case bpf_ctx_range(struct __sk_buff, data_end):
7825 		if (size != size_default)
7826 			return false;
7827 		break;
7828 	case bpf_ctx_range_ptr(struct __sk_buff, flow_keys):
7829 		return false;
7830 	case bpf_ctx_range(struct __sk_buff, hwtstamp):
7831 		if (type == BPF_WRITE || size != sizeof(__u64))
7832 			return false;
7833 		break;
7834 	case bpf_ctx_range(struct __sk_buff, tstamp):
7835 		if (size != sizeof(__u64))
7836 			return false;
7837 		break;
7838 	case offsetof(struct __sk_buff, sk):
7839 		if (type == BPF_WRITE || size != sizeof(__u64))
7840 			return false;
7841 		info->reg_type = PTR_TO_SOCK_COMMON_OR_NULL;
7842 		break;
7843 	case offsetofend(struct __sk_buff, gso_size) ... offsetof(struct __sk_buff, hwtstamp) - 1:
7844 		/* Explicitly prohibit access to padding in __sk_buff. */
7845 		return false;
7846 	default:
7847 		/* Only narrow read access allowed for now. */
7848 		if (type == BPF_WRITE) {
7849 			if (size != size_default)
7850 				return false;
7851 		} else {
7852 			bpf_ctx_record_field_size(info, size_default);
7853 			if (!bpf_ctx_narrow_access_ok(off, size, size_default))
7854 				return false;
7855 		}
7856 	}
7857 
7858 	return true;
7859 }
7860 
7861 static bool sk_filter_is_valid_access(int off, int size,
7862 				      enum bpf_access_type type,
7863 				      const struct bpf_prog *prog,
7864 				      struct bpf_insn_access_aux *info)
7865 {
7866 	switch (off) {
7867 	case bpf_ctx_range(struct __sk_buff, tc_classid):
7868 	case bpf_ctx_range(struct __sk_buff, data):
7869 	case bpf_ctx_range(struct __sk_buff, data_meta):
7870 	case bpf_ctx_range(struct __sk_buff, data_end):
7871 	case bpf_ctx_range_till(struct __sk_buff, family, local_port):
7872 	case bpf_ctx_range(struct __sk_buff, tstamp):
7873 	case bpf_ctx_range(struct __sk_buff, wire_len):
7874 	case bpf_ctx_range(struct __sk_buff, hwtstamp):
7875 		return false;
7876 	}
7877 
7878 	if (type == BPF_WRITE) {
7879 		switch (off) {
7880 		case bpf_ctx_range_till(struct __sk_buff, cb[0], cb[4]):
7881 			break;
7882 		default:
7883 			return false;
7884 		}
7885 	}
7886 
7887 	return bpf_skb_is_valid_access(off, size, type, prog, info);
7888 }
7889 
7890 static bool cg_skb_is_valid_access(int off, int size,
7891 				   enum bpf_access_type type,
7892 				   const struct bpf_prog *prog,
7893 				   struct bpf_insn_access_aux *info)
7894 {
7895 	switch (off) {
7896 	case bpf_ctx_range(struct __sk_buff, tc_classid):
7897 	case bpf_ctx_range(struct __sk_buff, data_meta):
7898 	case bpf_ctx_range(struct __sk_buff, wire_len):
7899 		return false;
7900 	case bpf_ctx_range(struct __sk_buff, data):
7901 	case bpf_ctx_range(struct __sk_buff, data_end):
7902 		if (!bpf_capable())
7903 			return false;
7904 		break;
7905 	}
7906 
7907 	if (type == BPF_WRITE) {
7908 		switch (off) {
7909 		case bpf_ctx_range(struct __sk_buff, mark):
7910 		case bpf_ctx_range(struct __sk_buff, priority):
7911 		case bpf_ctx_range_till(struct __sk_buff, cb[0], cb[4]):
7912 			break;
7913 		case bpf_ctx_range(struct __sk_buff, tstamp):
7914 			if (!bpf_capable())
7915 				return false;
7916 			break;
7917 		default:
7918 			return false;
7919 		}
7920 	}
7921 
7922 	switch (off) {
7923 	case bpf_ctx_range(struct __sk_buff, data):
7924 		info->reg_type = PTR_TO_PACKET;
7925 		break;
7926 	case bpf_ctx_range(struct __sk_buff, data_end):
7927 		info->reg_type = PTR_TO_PACKET_END;
7928 		break;
7929 	}
7930 
7931 	return bpf_skb_is_valid_access(off, size, type, prog, info);
7932 }
7933 
7934 static bool lwt_is_valid_access(int off, int size,
7935 				enum bpf_access_type type,
7936 				const struct bpf_prog *prog,
7937 				struct bpf_insn_access_aux *info)
7938 {
7939 	switch (off) {
7940 	case bpf_ctx_range(struct __sk_buff, tc_classid):
7941 	case bpf_ctx_range_till(struct __sk_buff, family, local_port):
7942 	case bpf_ctx_range(struct __sk_buff, data_meta):
7943 	case bpf_ctx_range(struct __sk_buff, tstamp):
7944 	case bpf_ctx_range(struct __sk_buff, wire_len):
7945 	case bpf_ctx_range(struct __sk_buff, hwtstamp):
7946 		return false;
7947 	}
7948 
7949 	if (type == BPF_WRITE) {
7950 		switch (off) {
7951 		case bpf_ctx_range(struct __sk_buff, mark):
7952 		case bpf_ctx_range(struct __sk_buff, priority):
7953 		case bpf_ctx_range_till(struct __sk_buff, cb[0], cb[4]):
7954 			break;
7955 		default:
7956 			return false;
7957 		}
7958 	}
7959 
7960 	switch (off) {
7961 	case bpf_ctx_range(struct __sk_buff, data):
7962 		info->reg_type = PTR_TO_PACKET;
7963 		break;
7964 	case bpf_ctx_range(struct __sk_buff, data_end):
7965 		info->reg_type = PTR_TO_PACKET_END;
7966 		break;
7967 	}
7968 
7969 	return bpf_skb_is_valid_access(off, size, type, prog, info);
7970 }
7971 
7972 /* Attach type specific accesses */
7973 static bool __sock_filter_check_attach_type(int off,
7974 					    enum bpf_access_type access_type,
7975 					    enum bpf_attach_type attach_type)
7976 {
7977 	switch (off) {
7978 	case offsetof(struct bpf_sock, bound_dev_if):
7979 	case offsetof(struct bpf_sock, mark):
7980 	case offsetof(struct bpf_sock, priority):
7981 		switch (attach_type) {
7982 		case BPF_CGROUP_INET_SOCK_CREATE:
7983 		case BPF_CGROUP_INET_SOCK_RELEASE:
7984 			goto full_access;
7985 		default:
7986 			return false;
7987 		}
7988 	case bpf_ctx_range(struct bpf_sock, src_ip4):
7989 		switch (attach_type) {
7990 		case BPF_CGROUP_INET4_POST_BIND:
7991 			goto read_only;
7992 		default:
7993 			return false;
7994 		}
7995 	case bpf_ctx_range_till(struct bpf_sock, src_ip6[0], src_ip6[3]):
7996 		switch (attach_type) {
7997 		case BPF_CGROUP_INET6_POST_BIND:
7998 			goto read_only;
7999 		default:
8000 			return false;
8001 		}
8002 	case bpf_ctx_range(struct bpf_sock, src_port):
8003 		switch (attach_type) {
8004 		case BPF_CGROUP_INET4_POST_BIND:
8005 		case BPF_CGROUP_INET6_POST_BIND:
8006 			goto read_only;
8007 		default:
8008 			return false;
8009 		}
8010 	}
8011 read_only:
8012 	return access_type == BPF_READ;
8013 full_access:
8014 	return true;
8015 }
8016 
8017 bool bpf_sock_common_is_valid_access(int off, int size,
8018 				     enum bpf_access_type type,
8019 				     struct bpf_insn_access_aux *info)
8020 {
8021 	switch (off) {
8022 	case bpf_ctx_range_till(struct bpf_sock, type, priority):
8023 		return false;
8024 	default:
8025 		return bpf_sock_is_valid_access(off, size, type, info);
8026 	}
8027 }
8028 
8029 bool bpf_sock_is_valid_access(int off, int size, enum bpf_access_type type,
8030 			      struct bpf_insn_access_aux *info)
8031 {
8032 	const int size_default = sizeof(__u32);
8033 
8034 	if (off < 0 || off >= sizeof(struct bpf_sock))
8035 		return false;
8036 	if (off % size != 0)
8037 		return false;
8038 
8039 	switch (off) {
8040 	case offsetof(struct bpf_sock, state):
8041 	case offsetof(struct bpf_sock, family):
8042 	case offsetof(struct bpf_sock, type):
8043 	case offsetof(struct bpf_sock, protocol):
8044 	case offsetof(struct bpf_sock, dst_port):
8045 	case offsetof(struct bpf_sock, src_port):
8046 	case offsetof(struct bpf_sock, rx_queue_mapping):
8047 	case bpf_ctx_range(struct bpf_sock, src_ip4):
8048 	case bpf_ctx_range_till(struct bpf_sock, src_ip6[0], src_ip6[3]):
8049 	case bpf_ctx_range(struct bpf_sock, dst_ip4):
8050 	case bpf_ctx_range_till(struct bpf_sock, dst_ip6[0], dst_ip6[3]):
8051 		bpf_ctx_record_field_size(info, size_default);
8052 		return bpf_ctx_narrow_access_ok(off, size, size_default);
8053 	}
8054 
8055 	return size == size_default;
8056 }
8057 
8058 static bool sock_filter_is_valid_access(int off, int size,
8059 					enum bpf_access_type type,
8060 					const struct bpf_prog *prog,
8061 					struct bpf_insn_access_aux *info)
8062 {
8063 	if (!bpf_sock_is_valid_access(off, size, type, info))
8064 		return false;
8065 	return __sock_filter_check_attach_type(off, type,
8066 					       prog->expected_attach_type);
8067 }
8068 
8069 static int bpf_noop_prologue(struct bpf_insn *insn_buf, bool direct_write,
8070 			     const struct bpf_prog *prog)
8071 {
8072 	/* Neither direct read nor direct write requires any preliminary
8073 	 * action.
8074 	 */
8075 	return 0;
8076 }
8077 
8078 static int bpf_unclone_prologue(struct bpf_insn *insn_buf, bool direct_write,
8079 				const struct bpf_prog *prog, int drop_verdict)
8080 {
8081 	struct bpf_insn *insn = insn_buf;
8082 
8083 	if (!direct_write)
8084 		return 0;
8085 
8086 	/* if (!skb->cloned)
8087 	 *       goto start;
8088 	 *
8089 	 * (Fast-path, otherwise approximation that we might be
8090 	 *  a clone, do the rest in helper.)
8091 	 */
8092 	*insn++ = BPF_LDX_MEM(BPF_B, BPF_REG_6, BPF_REG_1, CLONED_OFFSET);
8093 	*insn++ = BPF_ALU32_IMM(BPF_AND, BPF_REG_6, CLONED_MASK);
8094 	*insn++ = BPF_JMP_IMM(BPF_JEQ, BPF_REG_6, 0, 7);
8095 
8096 	/* ret = bpf_skb_pull_data(skb, 0); */
8097 	*insn++ = BPF_MOV64_REG(BPF_REG_6, BPF_REG_1);
8098 	*insn++ = BPF_ALU64_REG(BPF_XOR, BPF_REG_2, BPF_REG_2);
8099 	*insn++ = BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
8100 			       BPF_FUNC_skb_pull_data);
8101 	/* if (!ret)
8102 	 *      goto restore;
8103 	 * return TC_ACT_SHOT;
8104 	 */
8105 	*insn++ = BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 2);
8106 	*insn++ = BPF_ALU32_IMM(BPF_MOV, BPF_REG_0, drop_verdict);
8107 	*insn++ = BPF_EXIT_INSN();
8108 
8109 	/* restore: */
8110 	*insn++ = BPF_MOV64_REG(BPF_REG_1, BPF_REG_6);
8111 	/* start: */
8112 	*insn++ = prog->insnsi[0];
8113 
8114 	return insn - insn_buf;
8115 }
8116 
8117 static int bpf_gen_ld_abs(const struct bpf_insn *orig,
8118 			  struct bpf_insn *insn_buf)
8119 {
8120 	bool indirect = BPF_MODE(orig->code) == BPF_IND;
8121 	struct bpf_insn *insn = insn_buf;
8122 
8123 	if (!indirect) {
8124 		*insn++ = BPF_MOV64_IMM(BPF_REG_2, orig->imm);
8125 	} else {
8126 		*insn++ = BPF_MOV64_REG(BPF_REG_2, orig->src_reg);
8127 		if (orig->imm)
8128 			*insn++ = BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, orig->imm);
8129 	}
8130 	/* We're guaranteed here that CTX is in R6. */
8131 	*insn++ = BPF_MOV64_REG(BPF_REG_1, BPF_REG_CTX);
8132 
8133 	switch (BPF_SIZE(orig->code)) {
8134 	case BPF_B:
8135 		*insn++ = BPF_EMIT_CALL(bpf_skb_load_helper_8_no_cache);
8136 		break;
8137 	case BPF_H:
8138 		*insn++ = BPF_EMIT_CALL(bpf_skb_load_helper_16_no_cache);
8139 		break;
8140 	case BPF_W:
8141 		*insn++ = BPF_EMIT_CALL(bpf_skb_load_helper_32_no_cache);
8142 		break;
8143 	}
8144 
8145 	*insn++ = BPF_JMP_IMM(BPF_JSGE, BPF_REG_0, 0, 2);
8146 	*insn++ = BPF_ALU32_REG(BPF_XOR, BPF_REG_0, BPF_REG_0);
8147 	*insn++ = BPF_EXIT_INSN();
8148 
8149 	return insn - insn_buf;
8150 }
8151 
8152 static int tc_cls_act_prologue(struct bpf_insn *insn_buf, bool direct_write,
8153 			       const struct bpf_prog *prog)
8154 {
8155 	return bpf_unclone_prologue(insn_buf, direct_write, prog, TC_ACT_SHOT);
8156 }
8157 
8158 static bool tc_cls_act_is_valid_access(int off, int size,
8159 				       enum bpf_access_type type,
8160 				       const struct bpf_prog *prog,
8161 				       struct bpf_insn_access_aux *info)
8162 {
8163 	if (type == BPF_WRITE) {
8164 		switch (off) {
8165 		case bpf_ctx_range(struct __sk_buff, mark):
8166 		case bpf_ctx_range(struct __sk_buff, tc_index):
8167 		case bpf_ctx_range(struct __sk_buff, priority):
8168 		case bpf_ctx_range(struct __sk_buff, tc_classid):
8169 		case bpf_ctx_range_till(struct __sk_buff, cb[0], cb[4]):
8170 		case bpf_ctx_range(struct __sk_buff, tstamp):
8171 		case bpf_ctx_range(struct __sk_buff, queue_mapping):
8172 			break;
8173 		default:
8174 			return false;
8175 		}
8176 	}
8177 
8178 	switch (off) {
8179 	case bpf_ctx_range(struct __sk_buff, data):
8180 		info->reg_type = PTR_TO_PACKET;
8181 		break;
8182 	case bpf_ctx_range(struct __sk_buff, data_meta):
8183 		info->reg_type = PTR_TO_PACKET_META;
8184 		break;
8185 	case bpf_ctx_range(struct __sk_buff, data_end):
8186 		info->reg_type = PTR_TO_PACKET_END;
8187 		break;
8188 	case bpf_ctx_range_till(struct __sk_buff, family, local_port):
8189 		return false;
8190 	}
8191 
8192 	return bpf_skb_is_valid_access(off, size, type, prog, info);
8193 }
8194 
8195 static bool __is_valid_xdp_access(int off, int size)
8196 {
8197 	if (off < 0 || off >= sizeof(struct xdp_md))
8198 		return false;
8199 	if (off % size != 0)
8200 		return false;
8201 	if (size != sizeof(__u32))
8202 		return false;
8203 
8204 	return true;
8205 }
8206 
8207 static bool xdp_is_valid_access(int off, int size,
8208 				enum bpf_access_type type,
8209 				const struct bpf_prog *prog,
8210 				struct bpf_insn_access_aux *info)
8211 {
8212 	if (prog->expected_attach_type != BPF_XDP_DEVMAP) {
8213 		switch (off) {
8214 		case offsetof(struct xdp_md, egress_ifindex):
8215 			return false;
8216 		}
8217 	}
8218 
8219 	if (type == BPF_WRITE) {
8220 		if (bpf_prog_is_dev_bound(prog->aux)) {
8221 			switch (off) {
8222 			case offsetof(struct xdp_md, rx_queue_index):
8223 				return __is_valid_xdp_access(off, size);
8224 			}
8225 		}
8226 		return false;
8227 	}
8228 
8229 	switch (off) {
8230 	case offsetof(struct xdp_md, data):
8231 		info->reg_type = PTR_TO_PACKET;
8232 		break;
8233 	case offsetof(struct xdp_md, data_meta):
8234 		info->reg_type = PTR_TO_PACKET_META;
8235 		break;
8236 	case offsetof(struct xdp_md, data_end):
8237 		info->reg_type = PTR_TO_PACKET_END;
8238 		break;
8239 	}
8240 
8241 	return __is_valid_xdp_access(off, size);
8242 }
8243 
8244 void bpf_warn_invalid_xdp_action(struct net_device *dev, struct bpf_prog *prog, u32 act)
8245 {
8246 	const u32 act_max = XDP_REDIRECT;
8247 
8248 	pr_warn_once("%s XDP return value %u on prog %s (id %d) dev %s, expect packet loss!\n",
8249 		     act > act_max ? "Illegal" : "Driver unsupported",
8250 		     act, prog->aux->name, prog->aux->id, dev ? dev->name : "N/A");
8251 }
8252 EXPORT_SYMBOL_GPL(bpf_warn_invalid_xdp_action);
8253 
8254 static bool sock_addr_is_valid_access(int off, int size,
8255 				      enum bpf_access_type type,
8256 				      const struct bpf_prog *prog,
8257 				      struct bpf_insn_access_aux *info)
8258 {
8259 	const int size_default = sizeof(__u32);
8260 
8261 	if (off < 0 || off >= sizeof(struct bpf_sock_addr))
8262 		return false;
8263 	if (off % size != 0)
8264 		return false;
8265 
8266 	/* Disallow access to IPv6 fields from IPv4 contex and vise
8267 	 * versa.
8268 	 */
8269 	switch (off) {
8270 	case bpf_ctx_range(struct bpf_sock_addr, user_ip4):
8271 		switch (prog->expected_attach_type) {
8272 		case BPF_CGROUP_INET4_BIND:
8273 		case BPF_CGROUP_INET4_CONNECT:
8274 		case BPF_CGROUP_INET4_GETPEERNAME:
8275 		case BPF_CGROUP_INET4_GETSOCKNAME:
8276 		case BPF_CGROUP_UDP4_SENDMSG:
8277 		case BPF_CGROUP_UDP4_RECVMSG:
8278 			break;
8279 		default:
8280 			return false;
8281 		}
8282 		break;
8283 	case bpf_ctx_range_till(struct bpf_sock_addr, user_ip6[0], user_ip6[3]):
8284 		switch (prog->expected_attach_type) {
8285 		case BPF_CGROUP_INET6_BIND:
8286 		case BPF_CGROUP_INET6_CONNECT:
8287 		case BPF_CGROUP_INET6_GETPEERNAME:
8288 		case BPF_CGROUP_INET6_GETSOCKNAME:
8289 		case BPF_CGROUP_UDP6_SENDMSG:
8290 		case BPF_CGROUP_UDP6_RECVMSG:
8291 			break;
8292 		default:
8293 			return false;
8294 		}
8295 		break;
8296 	case bpf_ctx_range(struct bpf_sock_addr, msg_src_ip4):
8297 		switch (prog->expected_attach_type) {
8298 		case BPF_CGROUP_UDP4_SENDMSG:
8299 			break;
8300 		default:
8301 			return false;
8302 		}
8303 		break;
8304 	case bpf_ctx_range_till(struct bpf_sock_addr, msg_src_ip6[0],
8305 				msg_src_ip6[3]):
8306 		switch (prog->expected_attach_type) {
8307 		case BPF_CGROUP_UDP6_SENDMSG:
8308 			break;
8309 		default:
8310 			return false;
8311 		}
8312 		break;
8313 	}
8314 
8315 	switch (off) {
8316 	case bpf_ctx_range(struct bpf_sock_addr, user_ip4):
8317 	case bpf_ctx_range_till(struct bpf_sock_addr, user_ip6[0], user_ip6[3]):
8318 	case bpf_ctx_range(struct bpf_sock_addr, msg_src_ip4):
8319 	case bpf_ctx_range_till(struct bpf_sock_addr, msg_src_ip6[0],
8320 				msg_src_ip6[3]):
8321 	case bpf_ctx_range(struct bpf_sock_addr, user_port):
8322 		if (type == BPF_READ) {
8323 			bpf_ctx_record_field_size(info, size_default);
8324 
8325 			if (bpf_ctx_wide_access_ok(off, size,
8326 						   struct bpf_sock_addr,
8327 						   user_ip6))
8328 				return true;
8329 
8330 			if (bpf_ctx_wide_access_ok(off, size,
8331 						   struct bpf_sock_addr,
8332 						   msg_src_ip6))
8333 				return true;
8334 
8335 			if (!bpf_ctx_narrow_access_ok(off, size, size_default))
8336 				return false;
8337 		} else {
8338 			if (bpf_ctx_wide_access_ok(off, size,
8339 						   struct bpf_sock_addr,
8340 						   user_ip6))
8341 				return true;
8342 
8343 			if (bpf_ctx_wide_access_ok(off, size,
8344 						   struct bpf_sock_addr,
8345 						   msg_src_ip6))
8346 				return true;
8347 
8348 			if (size != size_default)
8349 				return false;
8350 		}
8351 		break;
8352 	case offsetof(struct bpf_sock_addr, sk):
8353 		if (type != BPF_READ)
8354 			return false;
8355 		if (size != sizeof(__u64))
8356 			return false;
8357 		info->reg_type = PTR_TO_SOCKET;
8358 		break;
8359 	default:
8360 		if (type == BPF_READ) {
8361 			if (size != size_default)
8362 				return false;
8363 		} else {
8364 			return false;
8365 		}
8366 	}
8367 
8368 	return true;
8369 }
8370 
8371 static bool sock_ops_is_valid_access(int off, int size,
8372 				     enum bpf_access_type type,
8373 				     const struct bpf_prog *prog,
8374 				     struct bpf_insn_access_aux *info)
8375 {
8376 	const int size_default = sizeof(__u32);
8377 
8378 	if (off < 0 || off >= sizeof(struct bpf_sock_ops))
8379 		return false;
8380 
8381 	/* The verifier guarantees that size > 0. */
8382 	if (off % size != 0)
8383 		return false;
8384 
8385 	if (type == BPF_WRITE) {
8386 		switch (off) {
8387 		case offsetof(struct bpf_sock_ops, reply):
8388 		case offsetof(struct bpf_sock_ops, sk_txhash):
8389 			if (size != size_default)
8390 				return false;
8391 			break;
8392 		default:
8393 			return false;
8394 		}
8395 	} else {
8396 		switch (off) {
8397 		case bpf_ctx_range_till(struct bpf_sock_ops, bytes_received,
8398 					bytes_acked):
8399 			if (size != sizeof(__u64))
8400 				return false;
8401 			break;
8402 		case offsetof(struct bpf_sock_ops, sk):
8403 			if (size != sizeof(__u64))
8404 				return false;
8405 			info->reg_type = PTR_TO_SOCKET_OR_NULL;
8406 			break;
8407 		case offsetof(struct bpf_sock_ops, skb_data):
8408 			if (size != sizeof(__u64))
8409 				return false;
8410 			info->reg_type = PTR_TO_PACKET;
8411 			break;
8412 		case offsetof(struct bpf_sock_ops, skb_data_end):
8413 			if (size != sizeof(__u64))
8414 				return false;
8415 			info->reg_type = PTR_TO_PACKET_END;
8416 			break;
8417 		case offsetof(struct bpf_sock_ops, skb_tcp_flags):
8418 			bpf_ctx_record_field_size(info, size_default);
8419 			return bpf_ctx_narrow_access_ok(off, size,
8420 							size_default);
8421 		default:
8422 			if (size != size_default)
8423 				return false;
8424 			break;
8425 		}
8426 	}
8427 
8428 	return true;
8429 }
8430 
8431 static int sk_skb_prologue(struct bpf_insn *insn_buf, bool direct_write,
8432 			   const struct bpf_prog *prog)
8433 {
8434 	return bpf_unclone_prologue(insn_buf, direct_write, prog, SK_DROP);
8435 }
8436 
8437 static bool sk_skb_is_valid_access(int off, int size,
8438 				   enum bpf_access_type type,
8439 				   const struct bpf_prog *prog,
8440 				   struct bpf_insn_access_aux *info)
8441 {
8442 	switch (off) {
8443 	case bpf_ctx_range(struct __sk_buff, tc_classid):
8444 	case bpf_ctx_range(struct __sk_buff, data_meta):
8445 	case bpf_ctx_range(struct __sk_buff, tstamp):
8446 	case bpf_ctx_range(struct __sk_buff, wire_len):
8447 	case bpf_ctx_range(struct __sk_buff, hwtstamp):
8448 		return false;
8449 	}
8450 
8451 	if (type == BPF_WRITE) {
8452 		switch (off) {
8453 		case bpf_ctx_range(struct __sk_buff, tc_index):
8454 		case bpf_ctx_range(struct __sk_buff, priority):
8455 			break;
8456 		default:
8457 			return false;
8458 		}
8459 	}
8460 
8461 	switch (off) {
8462 	case bpf_ctx_range(struct __sk_buff, mark):
8463 		return false;
8464 	case bpf_ctx_range(struct __sk_buff, data):
8465 		info->reg_type = PTR_TO_PACKET;
8466 		break;
8467 	case bpf_ctx_range(struct __sk_buff, data_end):
8468 		info->reg_type = PTR_TO_PACKET_END;
8469 		break;
8470 	}
8471 
8472 	return bpf_skb_is_valid_access(off, size, type, prog, info);
8473 }
8474 
8475 static bool sk_msg_is_valid_access(int off, int size,
8476 				   enum bpf_access_type type,
8477 				   const struct bpf_prog *prog,
8478 				   struct bpf_insn_access_aux *info)
8479 {
8480 	if (type == BPF_WRITE)
8481 		return false;
8482 
8483 	if (off % size != 0)
8484 		return false;
8485 
8486 	switch (off) {
8487 	case offsetof(struct sk_msg_md, data):
8488 		info->reg_type = PTR_TO_PACKET;
8489 		if (size != sizeof(__u64))
8490 			return false;
8491 		break;
8492 	case offsetof(struct sk_msg_md, data_end):
8493 		info->reg_type = PTR_TO_PACKET_END;
8494 		if (size != sizeof(__u64))
8495 			return false;
8496 		break;
8497 	case offsetof(struct sk_msg_md, sk):
8498 		if (size != sizeof(__u64))
8499 			return false;
8500 		info->reg_type = PTR_TO_SOCKET;
8501 		break;
8502 	case bpf_ctx_range(struct sk_msg_md, family):
8503 	case bpf_ctx_range(struct sk_msg_md, remote_ip4):
8504 	case bpf_ctx_range(struct sk_msg_md, local_ip4):
8505 	case bpf_ctx_range_till(struct sk_msg_md, remote_ip6[0], remote_ip6[3]):
8506 	case bpf_ctx_range_till(struct sk_msg_md, local_ip6[0], local_ip6[3]):
8507 	case bpf_ctx_range(struct sk_msg_md, remote_port):
8508 	case bpf_ctx_range(struct sk_msg_md, local_port):
8509 	case bpf_ctx_range(struct sk_msg_md, size):
8510 		if (size != sizeof(__u32))
8511 			return false;
8512 		break;
8513 	default:
8514 		return false;
8515 	}
8516 	return true;
8517 }
8518 
8519 static bool flow_dissector_is_valid_access(int off, int size,
8520 					   enum bpf_access_type type,
8521 					   const struct bpf_prog *prog,
8522 					   struct bpf_insn_access_aux *info)
8523 {
8524 	const int size_default = sizeof(__u32);
8525 
8526 	if (off < 0 || off >= sizeof(struct __sk_buff))
8527 		return false;
8528 
8529 	if (type == BPF_WRITE)
8530 		return false;
8531 
8532 	switch (off) {
8533 	case bpf_ctx_range(struct __sk_buff, data):
8534 		if (size != size_default)
8535 			return false;
8536 		info->reg_type = PTR_TO_PACKET;
8537 		return true;
8538 	case bpf_ctx_range(struct __sk_buff, data_end):
8539 		if (size != size_default)
8540 			return false;
8541 		info->reg_type = PTR_TO_PACKET_END;
8542 		return true;
8543 	case bpf_ctx_range_ptr(struct __sk_buff, flow_keys):
8544 		if (size != sizeof(__u64))
8545 			return false;
8546 		info->reg_type = PTR_TO_FLOW_KEYS;
8547 		return true;
8548 	default:
8549 		return false;
8550 	}
8551 }
8552 
8553 static u32 flow_dissector_convert_ctx_access(enum bpf_access_type type,
8554 					     const struct bpf_insn *si,
8555 					     struct bpf_insn *insn_buf,
8556 					     struct bpf_prog *prog,
8557 					     u32 *target_size)
8558 
8559 {
8560 	struct bpf_insn *insn = insn_buf;
8561 
8562 	switch (si->off) {
8563 	case offsetof(struct __sk_buff, data):
8564 		*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct bpf_flow_dissector, data),
8565 				      si->dst_reg, si->src_reg,
8566 				      offsetof(struct bpf_flow_dissector, data));
8567 		break;
8568 
8569 	case offsetof(struct __sk_buff, data_end):
8570 		*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct bpf_flow_dissector, data_end),
8571 				      si->dst_reg, si->src_reg,
8572 				      offsetof(struct bpf_flow_dissector, data_end));
8573 		break;
8574 
8575 	case offsetof(struct __sk_buff, flow_keys):
8576 		*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct bpf_flow_dissector, flow_keys),
8577 				      si->dst_reg, si->src_reg,
8578 				      offsetof(struct bpf_flow_dissector, flow_keys));
8579 		break;
8580 	}
8581 
8582 	return insn - insn_buf;
8583 }
8584 
8585 static struct bpf_insn *bpf_convert_shinfo_access(const struct bpf_insn *si,
8586 						  struct bpf_insn *insn)
8587 {
8588 	/* si->dst_reg = skb_shinfo(SKB); */
8589 #ifdef NET_SKBUFF_DATA_USES_OFFSET
8590 	*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, end),
8591 			      BPF_REG_AX, si->src_reg,
8592 			      offsetof(struct sk_buff, end));
8593 	*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, head),
8594 			      si->dst_reg, si->src_reg,
8595 			      offsetof(struct sk_buff, head));
8596 	*insn++ = BPF_ALU64_REG(BPF_ADD, si->dst_reg, BPF_REG_AX);
8597 #else
8598 	*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, end),
8599 			      si->dst_reg, si->src_reg,
8600 			      offsetof(struct sk_buff, end));
8601 #endif
8602 
8603 	return insn;
8604 }
8605 
8606 static u32 bpf_convert_ctx_access(enum bpf_access_type type,
8607 				  const struct bpf_insn *si,
8608 				  struct bpf_insn *insn_buf,
8609 				  struct bpf_prog *prog, u32 *target_size)
8610 {
8611 	struct bpf_insn *insn = insn_buf;
8612 	int off;
8613 
8614 	switch (si->off) {
8615 	case offsetof(struct __sk_buff, len):
8616 		*insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg,
8617 				      bpf_target_off(struct sk_buff, len, 4,
8618 						     target_size));
8619 		break;
8620 
8621 	case offsetof(struct __sk_buff, protocol):
8622 		*insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->src_reg,
8623 				      bpf_target_off(struct sk_buff, protocol, 2,
8624 						     target_size));
8625 		break;
8626 
8627 	case offsetof(struct __sk_buff, vlan_proto):
8628 		*insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->src_reg,
8629 				      bpf_target_off(struct sk_buff, vlan_proto, 2,
8630 						     target_size));
8631 		break;
8632 
8633 	case offsetof(struct __sk_buff, priority):
8634 		if (type == BPF_WRITE)
8635 			*insn++ = BPF_STX_MEM(BPF_W, si->dst_reg, si->src_reg,
8636 					      bpf_target_off(struct sk_buff, priority, 4,
8637 							     target_size));
8638 		else
8639 			*insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg,
8640 					      bpf_target_off(struct sk_buff, priority, 4,
8641 							     target_size));
8642 		break;
8643 
8644 	case offsetof(struct __sk_buff, ingress_ifindex):
8645 		*insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg,
8646 				      bpf_target_off(struct sk_buff, skb_iif, 4,
8647 						     target_size));
8648 		break;
8649 
8650 	case offsetof(struct __sk_buff, ifindex):
8651 		*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, dev),
8652 				      si->dst_reg, si->src_reg,
8653 				      offsetof(struct sk_buff, dev));
8654 		*insn++ = BPF_JMP_IMM(BPF_JEQ, si->dst_reg, 0, 1);
8655 		*insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg,
8656 				      bpf_target_off(struct net_device, ifindex, 4,
8657 						     target_size));
8658 		break;
8659 
8660 	case offsetof(struct __sk_buff, hash):
8661 		*insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg,
8662 				      bpf_target_off(struct sk_buff, hash, 4,
8663 						     target_size));
8664 		break;
8665 
8666 	case offsetof(struct __sk_buff, mark):
8667 		if (type == BPF_WRITE)
8668 			*insn++ = BPF_STX_MEM(BPF_W, si->dst_reg, si->src_reg,
8669 					      bpf_target_off(struct sk_buff, mark, 4,
8670 							     target_size));
8671 		else
8672 			*insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg,
8673 					      bpf_target_off(struct sk_buff, mark, 4,
8674 							     target_size));
8675 		break;
8676 
8677 	case offsetof(struct __sk_buff, pkt_type):
8678 		*target_size = 1;
8679 		*insn++ = BPF_LDX_MEM(BPF_B, si->dst_reg, si->src_reg,
8680 				      PKT_TYPE_OFFSET);
8681 		*insn++ = BPF_ALU32_IMM(BPF_AND, si->dst_reg, PKT_TYPE_MAX);
8682 #ifdef __BIG_ENDIAN_BITFIELD
8683 		*insn++ = BPF_ALU32_IMM(BPF_RSH, si->dst_reg, 5);
8684 #endif
8685 		break;
8686 
8687 	case offsetof(struct __sk_buff, queue_mapping):
8688 		if (type == BPF_WRITE) {
8689 			*insn++ = BPF_JMP_IMM(BPF_JGE, si->src_reg, NO_QUEUE_MAPPING, 1);
8690 			*insn++ = BPF_STX_MEM(BPF_H, si->dst_reg, si->src_reg,
8691 					      bpf_target_off(struct sk_buff,
8692 							     queue_mapping,
8693 							     2, target_size));
8694 		} else {
8695 			*insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->src_reg,
8696 					      bpf_target_off(struct sk_buff,
8697 							     queue_mapping,
8698 							     2, target_size));
8699 		}
8700 		break;
8701 
8702 	case offsetof(struct __sk_buff, vlan_present):
8703 		*target_size = 1;
8704 		*insn++ = BPF_LDX_MEM(BPF_B, si->dst_reg, si->src_reg,
8705 				      PKT_VLAN_PRESENT_OFFSET);
8706 		if (PKT_VLAN_PRESENT_BIT)
8707 			*insn++ = BPF_ALU32_IMM(BPF_RSH, si->dst_reg, PKT_VLAN_PRESENT_BIT);
8708 		if (PKT_VLAN_PRESENT_BIT < 7)
8709 			*insn++ = BPF_ALU32_IMM(BPF_AND, si->dst_reg, 1);
8710 		break;
8711 
8712 	case offsetof(struct __sk_buff, vlan_tci):
8713 		*insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->src_reg,
8714 				      bpf_target_off(struct sk_buff, vlan_tci, 2,
8715 						     target_size));
8716 		break;
8717 
8718 	case offsetof(struct __sk_buff, cb[0]) ...
8719 	     offsetofend(struct __sk_buff, cb[4]) - 1:
8720 		BUILD_BUG_ON(sizeof_field(struct qdisc_skb_cb, data) < 20);
8721 		BUILD_BUG_ON((offsetof(struct sk_buff, cb) +
8722 			      offsetof(struct qdisc_skb_cb, data)) %
8723 			     sizeof(__u64));
8724 
8725 		prog->cb_access = 1;
8726 		off  = si->off;
8727 		off -= offsetof(struct __sk_buff, cb[0]);
8728 		off += offsetof(struct sk_buff, cb);
8729 		off += offsetof(struct qdisc_skb_cb, data);
8730 		if (type == BPF_WRITE)
8731 			*insn++ = BPF_STX_MEM(BPF_SIZE(si->code), si->dst_reg,
8732 					      si->src_reg, off);
8733 		else
8734 			*insn++ = BPF_LDX_MEM(BPF_SIZE(si->code), si->dst_reg,
8735 					      si->src_reg, off);
8736 		break;
8737 
8738 	case offsetof(struct __sk_buff, tc_classid):
8739 		BUILD_BUG_ON(sizeof_field(struct qdisc_skb_cb, tc_classid) != 2);
8740 
8741 		off  = si->off;
8742 		off -= offsetof(struct __sk_buff, tc_classid);
8743 		off += offsetof(struct sk_buff, cb);
8744 		off += offsetof(struct qdisc_skb_cb, tc_classid);
8745 		*target_size = 2;
8746 		if (type == BPF_WRITE)
8747 			*insn++ = BPF_STX_MEM(BPF_H, si->dst_reg,
8748 					      si->src_reg, off);
8749 		else
8750 			*insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg,
8751 					      si->src_reg, off);
8752 		break;
8753 
8754 	case offsetof(struct __sk_buff, data):
8755 		*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, data),
8756 				      si->dst_reg, si->src_reg,
8757 				      offsetof(struct sk_buff, data));
8758 		break;
8759 
8760 	case offsetof(struct __sk_buff, data_meta):
8761 		off  = si->off;
8762 		off -= offsetof(struct __sk_buff, data_meta);
8763 		off += offsetof(struct sk_buff, cb);
8764 		off += offsetof(struct bpf_skb_data_end, data_meta);
8765 		*insn++ = BPF_LDX_MEM(BPF_SIZEOF(void *), si->dst_reg,
8766 				      si->src_reg, off);
8767 		break;
8768 
8769 	case offsetof(struct __sk_buff, data_end):
8770 		off  = si->off;
8771 		off -= offsetof(struct __sk_buff, data_end);
8772 		off += offsetof(struct sk_buff, cb);
8773 		off += offsetof(struct bpf_skb_data_end, data_end);
8774 		*insn++ = BPF_LDX_MEM(BPF_SIZEOF(void *), si->dst_reg,
8775 				      si->src_reg, off);
8776 		break;
8777 
8778 	case offsetof(struct __sk_buff, tc_index):
8779 #ifdef CONFIG_NET_SCHED
8780 		if (type == BPF_WRITE)
8781 			*insn++ = BPF_STX_MEM(BPF_H, si->dst_reg, si->src_reg,
8782 					      bpf_target_off(struct sk_buff, tc_index, 2,
8783 							     target_size));
8784 		else
8785 			*insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->src_reg,
8786 					      bpf_target_off(struct sk_buff, tc_index, 2,
8787 							     target_size));
8788 #else
8789 		*target_size = 2;
8790 		if (type == BPF_WRITE)
8791 			*insn++ = BPF_MOV64_REG(si->dst_reg, si->dst_reg);
8792 		else
8793 			*insn++ = BPF_MOV64_IMM(si->dst_reg, 0);
8794 #endif
8795 		break;
8796 
8797 	case offsetof(struct __sk_buff, napi_id):
8798 #if defined(CONFIG_NET_RX_BUSY_POLL)
8799 		*insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg,
8800 				      bpf_target_off(struct sk_buff, napi_id, 4,
8801 						     target_size));
8802 		*insn++ = BPF_JMP_IMM(BPF_JGE, si->dst_reg, MIN_NAPI_ID, 1);
8803 		*insn++ = BPF_MOV64_IMM(si->dst_reg, 0);
8804 #else
8805 		*target_size = 4;
8806 		*insn++ = BPF_MOV64_IMM(si->dst_reg, 0);
8807 #endif
8808 		break;
8809 	case offsetof(struct __sk_buff, family):
8810 		BUILD_BUG_ON(sizeof_field(struct sock_common, skc_family) != 2);
8811 
8812 		*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, sk),
8813 				      si->dst_reg, si->src_reg,
8814 				      offsetof(struct sk_buff, sk));
8815 		*insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->dst_reg,
8816 				      bpf_target_off(struct sock_common,
8817 						     skc_family,
8818 						     2, target_size));
8819 		break;
8820 	case offsetof(struct __sk_buff, remote_ip4):
8821 		BUILD_BUG_ON(sizeof_field(struct sock_common, skc_daddr) != 4);
8822 
8823 		*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, sk),
8824 				      si->dst_reg, si->src_reg,
8825 				      offsetof(struct sk_buff, sk));
8826 		*insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg,
8827 				      bpf_target_off(struct sock_common,
8828 						     skc_daddr,
8829 						     4, target_size));
8830 		break;
8831 	case offsetof(struct __sk_buff, local_ip4):
8832 		BUILD_BUG_ON(sizeof_field(struct sock_common,
8833 					  skc_rcv_saddr) != 4);
8834 
8835 		*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, sk),
8836 				      si->dst_reg, si->src_reg,
8837 				      offsetof(struct sk_buff, sk));
8838 		*insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg,
8839 				      bpf_target_off(struct sock_common,
8840 						     skc_rcv_saddr,
8841 						     4, target_size));
8842 		break;
8843 	case offsetof(struct __sk_buff, remote_ip6[0]) ...
8844 	     offsetof(struct __sk_buff, remote_ip6[3]):
8845 #if IS_ENABLED(CONFIG_IPV6)
8846 		BUILD_BUG_ON(sizeof_field(struct sock_common,
8847 					  skc_v6_daddr.s6_addr32[0]) != 4);
8848 
8849 		off = si->off;
8850 		off -= offsetof(struct __sk_buff, remote_ip6[0]);
8851 
8852 		*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, sk),
8853 				      si->dst_reg, si->src_reg,
8854 				      offsetof(struct sk_buff, sk));
8855 		*insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg,
8856 				      offsetof(struct sock_common,
8857 					       skc_v6_daddr.s6_addr32[0]) +
8858 				      off);
8859 #else
8860 		*insn++ = BPF_MOV32_IMM(si->dst_reg, 0);
8861 #endif
8862 		break;
8863 	case offsetof(struct __sk_buff, local_ip6[0]) ...
8864 	     offsetof(struct __sk_buff, local_ip6[3]):
8865 #if IS_ENABLED(CONFIG_IPV6)
8866 		BUILD_BUG_ON(sizeof_field(struct sock_common,
8867 					  skc_v6_rcv_saddr.s6_addr32[0]) != 4);
8868 
8869 		off = si->off;
8870 		off -= offsetof(struct __sk_buff, local_ip6[0]);
8871 
8872 		*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, sk),
8873 				      si->dst_reg, si->src_reg,
8874 				      offsetof(struct sk_buff, sk));
8875 		*insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg,
8876 				      offsetof(struct sock_common,
8877 					       skc_v6_rcv_saddr.s6_addr32[0]) +
8878 				      off);
8879 #else
8880 		*insn++ = BPF_MOV32_IMM(si->dst_reg, 0);
8881 #endif
8882 		break;
8883 
8884 	case offsetof(struct __sk_buff, remote_port):
8885 		BUILD_BUG_ON(sizeof_field(struct sock_common, skc_dport) != 2);
8886 
8887 		*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, sk),
8888 				      si->dst_reg, si->src_reg,
8889 				      offsetof(struct sk_buff, sk));
8890 		*insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->dst_reg,
8891 				      bpf_target_off(struct sock_common,
8892 						     skc_dport,
8893 						     2, target_size));
8894 #ifndef __BIG_ENDIAN_BITFIELD
8895 		*insn++ = BPF_ALU32_IMM(BPF_LSH, si->dst_reg, 16);
8896 #endif
8897 		break;
8898 
8899 	case offsetof(struct __sk_buff, local_port):
8900 		BUILD_BUG_ON(sizeof_field(struct sock_common, skc_num) != 2);
8901 
8902 		*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, sk),
8903 				      si->dst_reg, si->src_reg,
8904 				      offsetof(struct sk_buff, sk));
8905 		*insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->dst_reg,
8906 				      bpf_target_off(struct sock_common,
8907 						     skc_num, 2, target_size));
8908 		break;
8909 
8910 	case offsetof(struct __sk_buff, tstamp):
8911 		BUILD_BUG_ON(sizeof_field(struct sk_buff, tstamp) != 8);
8912 
8913 		if (type == BPF_WRITE)
8914 			*insn++ = BPF_STX_MEM(BPF_DW,
8915 					      si->dst_reg, si->src_reg,
8916 					      bpf_target_off(struct sk_buff,
8917 							     tstamp, 8,
8918 							     target_size));
8919 		else
8920 			*insn++ = BPF_LDX_MEM(BPF_DW,
8921 					      si->dst_reg, si->src_reg,
8922 					      bpf_target_off(struct sk_buff,
8923 							     tstamp, 8,
8924 							     target_size));
8925 		break;
8926 
8927 	case offsetof(struct __sk_buff, gso_segs):
8928 		insn = bpf_convert_shinfo_access(si, insn);
8929 		*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct skb_shared_info, gso_segs),
8930 				      si->dst_reg, si->dst_reg,
8931 				      bpf_target_off(struct skb_shared_info,
8932 						     gso_segs, 2,
8933 						     target_size));
8934 		break;
8935 	case offsetof(struct __sk_buff, gso_size):
8936 		insn = bpf_convert_shinfo_access(si, insn);
8937 		*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct skb_shared_info, gso_size),
8938 				      si->dst_reg, si->dst_reg,
8939 				      bpf_target_off(struct skb_shared_info,
8940 						     gso_size, 2,
8941 						     target_size));
8942 		break;
8943 	case offsetof(struct __sk_buff, wire_len):
8944 		BUILD_BUG_ON(sizeof_field(struct qdisc_skb_cb, pkt_len) != 4);
8945 
8946 		off = si->off;
8947 		off -= offsetof(struct __sk_buff, wire_len);
8948 		off += offsetof(struct sk_buff, cb);
8949 		off += offsetof(struct qdisc_skb_cb, pkt_len);
8950 		*target_size = 4;
8951 		*insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg, off);
8952 		break;
8953 
8954 	case offsetof(struct __sk_buff, sk):
8955 		*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, sk),
8956 				      si->dst_reg, si->src_reg,
8957 				      offsetof(struct sk_buff, sk));
8958 		break;
8959 	case offsetof(struct __sk_buff, hwtstamp):
8960 		BUILD_BUG_ON(sizeof_field(struct skb_shared_hwtstamps, hwtstamp) != 8);
8961 		BUILD_BUG_ON(offsetof(struct skb_shared_hwtstamps, hwtstamp) != 0);
8962 
8963 		insn = bpf_convert_shinfo_access(si, insn);
8964 		*insn++ = BPF_LDX_MEM(BPF_DW,
8965 				      si->dst_reg, si->dst_reg,
8966 				      bpf_target_off(struct skb_shared_info,
8967 						     hwtstamps, 8,
8968 						     target_size));
8969 		break;
8970 	}
8971 
8972 	return insn - insn_buf;
8973 }
8974 
8975 u32 bpf_sock_convert_ctx_access(enum bpf_access_type type,
8976 				const struct bpf_insn *si,
8977 				struct bpf_insn *insn_buf,
8978 				struct bpf_prog *prog, u32 *target_size)
8979 {
8980 	struct bpf_insn *insn = insn_buf;
8981 	int off;
8982 
8983 	switch (si->off) {
8984 	case offsetof(struct bpf_sock, bound_dev_if):
8985 		BUILD_BUG_ON(sizeof_field(struct sock, sk_bound_dev_if) != 4);
8986 
8987 		if (type == BPF_WRITE)
8988 			*insn++ = BPF_STX_MEM(BPF_W, si->dst_reg, si->src_reg,
8989 					offsetof(struct sock, sk_bound_dev_if));
8990 		else
8991 			*insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg,
8992 				      offsetof(struct sock, sk_bound_dev_if));
8993 		break;
8994 
8995 	case offsetof(struct bpf_sock, mark):
8996 		BUILD_BUG_ON(sizeof_field(struct sock, sk_mark) != 4);
8997 
8998 		if (type == BPF_WRITE)
8999 			*insn++ = BPF_STX_MEM(BPF_W, si->dst_reg, si->src_reg,
9000 					offsetof(struct sock, sk_mark));
9001 		else
9002 			*insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg,
9003 				      offsetof(struct sock, sk_mark));
9004 		break;
9005 
9006 	case offsetof(struct bpf_sock, priority):
9007 		BUILD_BUG_ON(sizeof_field(struct sock, sk_priority) != 4);
9008 
9009 		if (type == BPF_WRITE)
9010 			*insn++ = BPF_STX_MEM(BPF_W, si->dst_reg, si->src_reg,
9011 					offsetof(struct sock, sk_priority));
9012 		else
9013 			*insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg,
9014 				      offsetof(struct sock, sk_priority));
9015 		break;
9016 
9017 	case offsetof(struct bpf_sock, family):
9018 		*insn++ = BPF_LDX_MEM(
9019 			BPF_FIELD_SIZEOF(struct sock_common, skc_family),
9020 			si->dst_reg, si->src_reg,
9021 			bpf_target_off(struct sock_common,
9022 				       skc_family,
9023 				       sizeof_field(struct sock_common,
9024 						    skc_family),
9025 				       target_size));
9026 		break;
9027 
9028 	case offsetof(struct bpf_sock, type):
9029 		*insn++ = BPF_LDX_MEM(
9030 			BPF_FIELD_SIZEOF(struct sock, sk_type),
9031 			si->dst_reg, si->src_reg,
9032 			bpf_target_off(struct sock, sk_type,
9033 				       sizeof_field(struct sock, sk_type),
9034 				       target_size));
9035 		break;
9036 
9037 	case offsetof(struct bpf_sock, protocol):
9038 		*insn++ = BPF_LDX_MEM(
9039 			BPF_FIELD_SIZEOF(struct sock, sk_protocol),
9040 			si->dst_reg, si->src_reg,
9041 			bpf_target_off(struct sock, sk_protocol,
9042 				       sizeof_field(struct sock, sk_protocol),
9043 				       target_size));
9044 		break;
9045 
9046 	case offsetof(struct bpf_sock, src_ip4):
9047 		*insn++ = BPF_LDX_MEM(
9048 			BPF_SIZE(si->code), si->dst_reg, si->src_reg,
9049 			bpf_target_off(struct sock_common, skc_rcv_saddr,
9050 				       sizeof_field(struct sock_common,
9051 						    skc_rcv_saddr),
9052 				       target_size));
9053 		break;
9054 
9055 	case offsetof(struct bpf_sock, dst_ip4):
9056 		*insn++ = BPF_LDX_MEM(
9057 			BPF_SIZE(si->code), si->dst_reg, si->src_reg,
9058 			bpf_target_off(struct sock_common, skc_daddr,
9059 				       sizeof_field(struct sock_common,
9060 						    skc_daddr),
9061 				       target_size));
9062 		break;
9063 
9064 	case bpf_ctx_range_till(struct bpf_sock, src_ip6[0], src_ip6[3]):
9065 #if IS_ENABLED(CONFIG_IPV6)
9066 		off = si->off;
9067 		off -= offsetof(struct bpf_sock, src_ip6[0]);
9068 		*insn++ = BPF_LDX_MEM(
9069 			BPF_SIZE(si->code), si->dst_reg, si->src_reg,
9070 			bpf_target_off(
9071 				struct sock_common,
9072 				skc_v6_rcv_saddr.s6_addr32[0],
9073 				sizeof_field(struct sock_common,
9074 					     skc_v6_rcv_saddr.s6_addr32[0]),
9075 				target_size) + off);
9076 #else
9077 		(void)off;
9078 		*insn++ = BPF_MOV32_IMM(si->dst_reg, 0);
9079 #endif
9080 		break;
9081 
9082 	case bpf_ctx_range_till(struct bpf_sock, dst_ip6[0], dst_ip6[3]):
9083 #if IS_ENABLED(CONFIG_IPV6)
9084 		off = si->off;
9085 		off -= offsetof(struct bpf_sock, dst_ip6[0]);
9086 		*insn++ = BPF_LDX_MEM(
9087 			BPF_SIZE(si->code), si->dst_reg, si->src_reg,
9088 			bpf_target_off(struct sock_common,
9089 				       skc_v6_daddr.s6_addr32[0],
9090 				       sizeof_field(struct sock_common,
9091 						    skc_v6_daddr.s6_addr32[0]),
9092 				       target_size) + off);
9093 #else
9094 		*insn++ = BPF_MOV32_IMM(si->dst_reg, 0);
9095 		*target_size = 4;
9096 #endif
9097 		break;
9098 
9099 	case offsetof(struct bpf_sock, src_port):
9100 		*insn++ = BPF_LDX_MEM(
9101 			BPF_FIELD_SIZEOF(struct sock_common, skc_num),
9102 			si->dst_reg, si->src_reg,
9103 			bpf_target_off(struct sock_common, skc_num,
9104 				       sizeof_field(struct sock_common,
9105 						    skc_num),
9106 				       target_size));
9107 		break;
9108 
9109 	case offsetof(struct bpf_sock, dst_port):
9110 		*insn++ = BPF_LDX_MEM(
9111 			BPF_FIELD_SIZEOF(struct sock_common, skc_dport),
9112 			si->dst_reg, si->src_reg,
9113 			bpf_target_off(struct sock_common, skc_dport,
9114 				       sizeof_field(struct sock_common,
9115 						    skc_dport),
9116 				       target_size));
9117 		break;
9118 
9119 	case offsetof(struct bpf_sock, state):
9120 		*insn++ = BPF_LDX_MEM(
9121 			BPF_FIELD_SIZEOF(struct sock_common, skc_state),
9122 			si->dst_reg, si->src_reg,
9123 			bpf_target_off(struct sock_common, skc_state,
9124 				       sizeof_field(struct sock_common,
9125 						    skc_state),
9126 				       target_size));
9127 		break;
9128 	case offsetof(struct bpf_sock, rx_queue_mapping):
9129 #ifdef CONFIG_SOCK_RX_QUEUE_MAPPING
9130 		*insn++ = BPF_LDX_MEM(
9131 			BPF_FIELD_SIZEOF(struct sock, sk_rx_queue_mapping),
9132 			si->dst_reg, si->src_reg,
9133 			bpf_target_off(struct sock, sk_rx_queue_mapping,
9134 				       sizeof_field(struct sock,
9135 						    sk_rx_queue_mapping),
9136 				       target_size));
9137 		*insn++ = BPF_JMP_IMM(BPF_JNE, si->dst_reg, NO_QUEUE_MAPPING,
9138 				      1);
9139 		*insn++ = BPF_MOV64_IMM(si->dst_reg, -1);
9140 #else
9141 		*insn++ = BPF_MOV64_IMM(si->dst_reg, -1);
9142 		*target_size = 2;
9143 #endif
9144 		break;
9145 	}
9146 
9147 	return insn - insn_buf;
9148 }
9149 
9150 static u32 tc_cls_act_convert_ctx_access(enum bpf_access_type type,
9151 					 const struct bpf_insn *si,
9152 					 struct bpf_insn *insn_buf,
9153 					 struct bpf_prog *prog, u32 *target_size)
9154 {
9155 	struct bpf_insn *insn = insn_buf;
9156 
9157 	switch (si->off) {
9158 	case offsetof(struct __sk_buff, ifindex):
9159 		*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, dev),
9160 				      si->dst_reg, si->src_reg,
9161 				      offsetof(struct sk_buff, dev));
9162 		*insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg,
9163 				      bpf_target_off(struct net_device, ifindex, 4,
9164 						     target_size));
9165 		break;
9166 	default:
9167 		return bpf_convert_ctx_access(type, si, insn_buf, prog,
9168 					      target_size);
9169 	}
9170 
9171 	return insn - insn_buf;
9172 }
9173 
9174 static u32 xdp_convert_ctx_access(enum bpf_access_type type,
9175 				  const struct bpf_insn *si,
9176 				  struct bpf_insn *insn_buf,
9177 				  struct bpf_prog *prog, u32 *target_size)
9178 {
9179 	struct bpf_insn *insn = insn_buf;
9180 
9181 	switch (si->off) {
9182 	case offsetof(struct xdp_md, data):
9183 		*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct xdp_buff, data),
9184 				      si->dst_reg, si->src_reg,
9185 				      offsetof(struct xdp_buff, data));
9186 		break;
9187 	case offsetof(struct xdp_md, data_meta):
9188 		*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct xdp_buff, data_meta),
9189 				      si->dst_reg, si->src_reg,
9190 				      offsetof(struct xdp_buff, data_meta));
9191 		break;
9192 	case offsetof(struct xdp_md, data_end):
9193 		*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct xdp_buff, data_end),
9194 				      si->dst_reg, si->src_reg,
9195 				      offsetof(struct xdp_buff, data_end));
9196 		break;
9197 	case offsetof(struct xdp_md, ingress_ifindex):
9198 		*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct xdp_buff, rxq),
9199 				      si->dst_reg, si->src_reg,
9200 				      offsetof(struct xdp_buff, rxq));
9201 		*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct xdp_rxq_info, dev),
9202 				      si->dst_reg, si->dst_reg,
9203 				      offsetof(struct xdp_rxq_info, dev));
9204 		*insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg,
9205 				      offsetof(struct net_device, ifindex));
9206 		break;
9207 	case offsetof(struct xdp_md, rx_queue_index):
9208 		*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct xdp_buff, rxq),
9209 				      si->dst_reg, si->src_reg,
9210 				      offsetof(struct xdp_buff, rxq));
9211 		*insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg,
9212 				      offsetof(struct xdp_rxq_info,
9213 					       queue_index));
9214 		break;
9215 	case offsetof(struct xdp_md, egress_ifindex):
9216 		*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct xdp_buff, txq),
9217 				      si->dst_reg, si->src_reg,
9218 				      offsetof(struct xdp_buff, txq));
9219 		*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct xdp_txq_info, dev),
9220 				      si->dst_reg, si->dst_reg,
9221 				      offsetof(struct xdp_txq_info, dev));
9222 		*insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg,
9223 				      offsetof(struct net_device, ifindex));
9224 		break;
9225 	}
9226 
9227 	return insn - insn_buf;
9228 }
9229 
9230 /* SOCK_ADDR_LOAD_NESTED_FIELD() loads Nested Field S.F.NF where S is type of
9231  * context Structure, F is Field in context structure that contains a pointer
9232  * to Nested Structure of type NS that has the field NF.
9233  *
9234  * SIZE encodes the load size (BPF_B, BPF_H, etc). It's up to caller to make
9235  * sure that SIZE is not greater than actual size of S.F.NF.
9236  *
9237  * If offset OFF is provided, the load happens from that offset relative to
9238  * offset of NF.
9239  */
9240 #define SOCK_ADDR_LOAD_NESTED_FIELD_SIZE_OFF(S, NS, F, NF, SIZE, OFF)	       \
9241 	do {								       \
9242 		*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(S, F), si->dst_reg,     \
9243 				      si->src_reg, offsetof(S, F));	       \
9244 		*insn++ = BPF_LDX_MEM(					       \
9245 			SIZE, si->dst_reg, si->dst_reg,			       \
9246 			bpf_target_off(NS, NF, sizeof_field(NS, NF),	       \
9247 				       target_size)			       \
9248 				+ OFF);					       \
9249 	} while (0)
9250 
9251 #define SOCK_ADDR_LOAD_NESTED_FIELD(S, NS, F, NF)			       \
9252 	SOCK_ADDR_LOAD_NESTED_FIELD_SIZE_OFF(S, NS, F, NF,		       \
9253 					     BPF_FIELD_SIZEOF(NS, NF), 0)
9254 
9255 /* SOCK_ADDR_STORE_NESTED_FIELD_OFF() has semantic similar to
9256  * SOCK_ADDR_LOAD_NESTED_FIELD_SIZE_OFF() but for store operation.
9257  *
9258  * In addition it uses Temporary Field TF (member of struct S) as the 3rd
9259  * "register" since two registers available in convert_ctx_access are not
9260  * enough: we can't override neither SRC, since it contains value to store, nor
9261  * DST since it contains pointer to context that may be used by later
9262  * instructions. But we need a temporary place to save pointer to nested
9263  * structure whose field we want to store to.
9264  */
9265 #define SOCK_ADDR_STORE_NESTED_FIELD_OFF(S, NS, F, NF, SIZE, OFF, TF)	       \
9266 	do {								       \
9267 		int tmp_reg = BPF_REG_9;				       \
9268 		if (si->src_reg == tmp_reg || si->dst_reg == tmp_reg)	       \
9269 			--tmp_reg;					       \
9270 		if (si->src_reg == tmp_reg || si->dst_reg == tmp_reg)	       \
9271 			--tmp_reg;					       \
9272 		*insn++ = BPF_STX_MEM(BPF_DW, si->dst_reg, tmp_reg,	       \
9273 				      offsetof(S, TF));			       \
9274 		*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(S, F), tmp_reg,	       \
9275 				      si->dst_reg, offsetof(S, F));	       \
9276 		*insn++ = BPF_STX_MEM(SIZE, tmp_reg, si->src_reg,	       \
9277 			bpf_target_off(NS, NF, sizeof_field(NS, NF),	       \
9278 				       target_size)			       \
9279 				+ OFF);					       \
9280 		*insn++ = BPF_LDX_MEM(BPF_DW, tmp_reg, si->dst_reg,	       \
9281 				      offsetof(S, TF));			       \
9282 	} while (0)
9283 
9284 #define SOCK_ADDR_LOAD_OR_STORE_NESTED_FIELD_SIZE_OFF(S, NS, F, NF, SIZE, OFF, \
9285 						      TF)		       \
9286 	do {								       \
9287 		if (type == BPF_WRITE) {				       \
9288 			SOCK_ADDR_STORE_NESTED_FIELD_OFF(S, NS, F, NF, SIZE,   \
9289 							 OFF, TF);	       \
9290 		} else {						       \
9291 			SOCK_ADDR_LOAD_NESTED_FIELD_SIZE_OFF(		       \
9292 				S, NS, F, NF, SIZE, OFF);  \
9293 		}							       \
9294 	} while (0)
9295 
9296 #define SOCK_ADDR_LOAD_OR_STORE_NESTED_FIELD(S, NS, F, NF, TF)		       \
9297 	SOCK_ADDR_LOAD_OR_STORE_NESTED_FIELD_SIZE_OFF(			       \
9298 		S, NS, F, NF, BPF_FIELD_SIZEOF(NS, NF), 0, TF)
9299 
9300 static u32 sock_addr_convert_ctx_access(enum bpf_access_type type,
9301 					const struct bpf_insn *si,
9302 					struct bpf_insn *insn_buf,
9303 					struct bpf_prog *prog, u32 *target_size)
9304 {
9305 	int off, port_size = sizeof_field(struct sockaddr_in6, sin6_port);
9306 	struct bpf_insn *insn = insn_buf;
9307 
9308 	switch (si->off) {
9309 	case offsetof(struct bpf_sock_addr, user_family):
9310 		SOCK_ADDR_LOAD_NESTED_FIELD(struct bpf_sock_addr_kern,
9311 					    struct sockaddr, uaddr, sa_family);
9312 		break;
9313 
9314 	case offsetof(struct bpf_sock_addr, user_ip4):
9315 		SOCK_ADDR_LOAD_OR_STORE_NESTED_FIELD_SIZE_OFF(
9316 			struct bpf_sock_addr_kern, struct sockaddr_in, uaddr,
9317 			sin_addr, BPF_SIZE(si->code), 0, tmp_reg);
9318 		break;
9319 
9320 	case bpf_ctx_range_till(struct bpf_sock_addr, user_ip6[0], user_ip6[3]):
9321 		off = si->off;
9322 		off -= offsetof(struct bpf_sock_addr, user_ip6[0]);
9323 		SOCK_ADDR_LOAD_OR_STORE_NESTED_FIELD_SIZE_OFF(
9324 			struct bpf_sock_addr_kern, struct sockaddr_in6, uaddr,
9325 			sin6_addr.s6_addr32[0], BPF_SIZE(si->code), off,
9326 			tmp_reg);
9327 		break;
9328 
9329 	case offsetof(struct bpf_sock_addr, user_port):
9330 		/* To get port we need to know sa_family first and then treat
9331 		 * sockaddr as either sockaddr_in or sockaddr_in6.
9332 		 * Though we can simplify since port field has same offset and
9333 		 * size in both structures.
9334 		 * Here we check this invariant and use just one of the
9335 		 * structures if it's true.
9336 		 */
9337 		BUILD_BUG_ON(offsetof(struct sockaddr_in, sin_port) !=
9338 			     offsetof(struct sockaddr_in6, sin6_port));
9339 		BUILD_BUG_ON(sizeof_field(struct sockaddr_in, sin_port) !=
9340 			     sizeof_field(struct sockaddr_in6, sin6_port));
9341 		/* Account for sin6_port being smaller than user_port. */
9342 		port_size = min(port_size, BPF_LDST_BYTES(si));
9343 		SOCK_ADDR_LOAD_OR_STORE_NESTED_FIELD_SIZE_OFF(
9344 			struct bpf_sock_addr_kern, struct sockaddr_in6, uaddr,
9345 			sin6_port, bytes_to_bpf_size(port_size), 0, tmp_reg);
9346 		break;
9347 
9348 	case offsetof(struct bpf_sock_addr, family):
9349 		SOCK_ADDR_LOAD_NESTED_FIELD(struct bpf_sock_addr_kern,
9350 					    struct sock, sk, sk_family);
9351 		break;
9352 
9353 	case offsetof(struct bpf_sock_addr, type):
9354 		SOCK_ADDR_LOAD_NESTED_FIELD(struct bpf_sock_addr_kern,
9355 					    struct sock, sk, sk_type);
9356 		break;
9357 
9358 	case offsetof(struct bpf_sock_addr, protocol):
9359 		SOCK_ADDR_LOAD_NESTED_FIELD(struct bpf_sock_addr_kern,
9360 					    struct sock, sk, sk_protocol);
9361 		break;
9362 
9363 	case offsetof(struct bpf_sock_addr, msg_src_ip4):
9364 		/* Treat t_ctx as struct in_addr for msg_src_ip4. */
9365 		SOCK_ADDR_LOAD_OR_STORE_NESTED_FIELD_SIZE_OFF(
9366 			struct bpf_sock_addr_kern, struct in_addr, t_ctx,
9367 			s_addr, BPF_SIZE(si->code), 0, tmp_reg);
9368 		break;
9369 
9370 	case bpf_ctx_range_till(struct bpf_sock_addr, msg_src_ip6[0],
9371 				msg_src_ip6[3]):
9372 		off = si->off;
9373 		off -= offsetof(struct bpf_sock_addr, msg_src_ip6[0]);
9374 		/* Treat t_ctx as struct in6_addr for msg_src_ip6. */
9375 		SOCK_ADDR_LOAD_OR_STORE_NESTED_FIELD_SIZE_OFF(
9376 			struct bpf_sock_addr_kern, struct in6_addr, t_ctx,
9377 			s6_addr32[0], BPF_SIZE(si->code), off, tmp_reg);
9378 		break;
9379 	case offsetof(struct bpf_sock_addr, sk):
9380 		*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct bpf_sock_addr_kern, sk),
9381 				      si->dst_reg, si->src_reg,
9382 				      offsetof(struct bpf_sock_addr_kern, sk));
9383 		break;
9384 	}
9385 
9386 	return insn - insn_buf;
9387 }
9388 
9389 static u32 sock_ops_convert_ctx_access(enum bpf_access_type type,
9390 				       const struct bpf_insn *si,
9391 				       struct bpf_insn *insn_buf,
9392 				       struct bpf_prog *prog,
9393 				       u32 *target_size)
9394 {
9395 	struct bpf_insn *insn = insn_buf;
9396 	int off;
9397 
9398 /* Helper macro for adding read access to tcp_sock or sock fields. */
9399 #define SOCK_OPS_GET_FIELD(BPF_FIELD, OBJ_FIELD, OBJ)			      \
9400 	do {								      \
9401 		int fullsock_reg = si->dst_reg, reg = BPF_REG_9, jmp = 2;     \
9402 		BUILD_BUG_ON(sizeof_field(OBJ, OBJ_FIELD) >		      \
9403 			     sizeof_field(struct bpf_sock_ops, BPF_FIELD));   \
9404 		if (si->dst_reg == reg || si->src_reg == reg)		      \
9405 			reg--;						      \
9406 		if (si->dst_reg == reg || si->src_reg == reg)		      \
9407 			reg--;						      \
9408 		if (si->dst_reg == si->src_reg) {			      \
9409 			*insn++ = BPF_STX_MEM(BPF_DW, si->src_reg, reg,	      \
9410 					  offsetof(struct bpf_sock_ops_kern,  \
9411 					  temp));			      \
9412 			fullsock_reg = reg;				      \
9413 			jmp += 2;					      \
9414 		}							      \
9415 		*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(			      \
9416 						struct bpf_sock_ops_kern,     \
9417 						is_fullsock),		      \
9418 				      fullsock_reg, si->src_reg,	      \
9419 				      offsetof(struct bpf_sock_ops_kern,      \
9420 					       is_fullsock));		      \
9421 		*insn++ = BPF_JMP_IMM(BPF_JEQ, fullsock_reg, 0, jmp);	      \
9422 		if (si->dst_reg == si->src_reg)				      \
9423 			*insn++ = BPF_LDX_MEM(BPF_DW, reg, si->src_reg,	      \
9424 				      offsetof(struct bpf_sock_ops_kern,      \
9425 				      temp));				      \
9426 		*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(			      \
9427 						struct bpf_sock_ops_kern, sk),\
9428 				      si->dst_reg, si->src_reg,		      \
9429 				      offsetof(struct bpf_sock_ops_kern, sk));\
9430 		*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(OBJ,		      \
9431 						       OBJ_FIELD),	      \
9432 				      si->dst_reg, si->dst_reg,		      \
9433 				      offsetof(OBJ, OBJ_FIELD));	      \
9434 		if (si->dst_reg == si->src_reg)	{			      \
9435 			*insn++ = BPF_JMP_A(1);				      \
9436 			*insn++ = BPF_LDX_MEM(BPF_DW, reg, si->src_reg,	      \
9437 				      offsetof(struct bpf_sock_ops_kern,      \
9438 				      temp));				      \
9439 		}							      \
9440 	} while (0)
9441 
9442 #define SOCK_OPS_GET_SK()							      \
9443 	do {								      \
9444 		int fullsock_reg = si->dst_reg, reg = BPF_REG_9, jmp = 1;     \
9445 		if (si->dst_reg == reg || si->src_reg == reg)		      \
9446 			reg--;						      \
9447 		if (si->dst_reg == reg || si->src_reg == reg)		      \
9448 			reg--;						      \
9449 		if (si->dst_reg == si->src_reg) {			      \
9450 			*insn++ = BPF_STX_MEM(BPF_DW, si->src_reg, reg,	      \
9451 					  offsetof(struct bpf_sock_ops_kern,  \
9452 					  temp));			      \
9453 			fullsock_reg = reg;				      \
9454 			jmp += 2;					      \
9455 		}							      \
9456 		*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(			      \
9457 						struct bpf_sock_ops_kern,     \
9458 						is_fullsock),		      \
9459 				      fullsock_reg, si->src_reg,	      \
9460 				      offsetof(struct bpf_sock_ops_kern,      \
9461 					       is_fullsock));		      \
9462 		*insn++ = BPF_JMP_IMM(BPF_JEQ, fullsock_reg, 0, jmp);	      \
9463 		if (si->dst_reg == si->src_reg)				      \
9464 			*insn++ = BPF_LDX_MEM(BPF_DW, reg, si->src_reg,	      \
9465 				      offsetof(struct bpf_sock_ops_kern,      \
9466 				      temp));				      \
9467 		*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(			      \
9468 						struct bpf_sock_ops_kern, sk),\
9469 				      si->dst_reg, si->src_reg,		      \
9470 				      offsetof(struct bpf_sock_ops_kern, sk));\
9471 		if (si->dst_reg == si->src_reg)	{			      \
9472 			*insn++ = BPF_JMP_A(1);				      \
9473 			*insn++ = BPF_LDX_MEM(BPF_DW, reg, si->src_reg,	      \
9474 				      offsetof(struct bpf_sock_ops_kern,      \
9475 				      temp));				      \
9476 		}							      \
9477 	} while (0)
9478 
9479 #define SOCK_OPS_GET_TCP_SOCK_FIELD(FIELD) \
9480 		SOCK_OPS_GET_FIELD(FIELD, FIELD, struct tcp_sock)
9481 
9482 /* Helper macro for adding write access to tcp_sock or sock fields.
9483  * The macro is called with two registers, dst_reg which contains a pointer
9484  * to ctx (context) and src_reg which contains the value that should be
9485  * stored. However, we need an additional register since we cannot overwrite
9486  * dst_reg because it may be used later in the program.
9487  * Instead we "borrow" one of the other register. We first save its value
9488  * into a new (temp) field in bpf_sock_ops_kern, use it, and then restore
9489  * it at the end of the macro.
9490  */
9491 #define SOCK_OPS_SET_FIELD(BPF_FIELD, OBJ_FIELD, OBJ)			      \
9492 	do {								      \
9493 		int reg = BPF_REG_9;					      \
9494 		BUILD_BUG_ON(sizeof_field(OBJ, OBJ_FIELD) >		      \
9495 			     sizeof_field(struct bpf_sock_ops, BPF_FIELD));   \
9496 		if (si->dst_reg == reg || si->src_reg == reg)		      \
9497 			reg--;						      \
9498 		if (si->dst_reg == reg || si->src_reg == reg)		      \
9499 			reg--;						      \
9500 		*insn++ = BPF_STX_MEM(BPF_DW, si->dst_reg, reg,		      \
9501 				      offsetof(struct bpf_sock_ops_kern,      \
9502 					       temp));			      \
9503 		*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(			      \
9504 						struct bpf_sock_ops_kern,     \
9505 						is_fullsock),		      \
9506 				      reg, si->dst_reg,			      \
9507 				      offsetof(struct bpf_sock_ops_kern,      \
9508 					       is_fullsock));		      \
9509 		*insn++ = BPF_JMP_IMM(BPF_JEQ, reg, 0, 2);		      \
9510 		*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(			      \
9511 						struct bpf_sock_ops_kern, sk),\
9512 				      reg, si->dst_reg,			      \
9513 				      offsetof(struct bpf_sock_ops_kern, sk));\
9514 		*insn++ = BPF_STX_MEM(BPF_FIELD_SIZEOF(OBJ, OBJ_FIELD),	      \
9515 				      reg, si->src_reg,			      \
9516 				      offsetof(OBJ, OBJ_FIELD));	      \
9517 		*insn++ = BPF_LDX_MEM(BPF_DW, reg, si->dst_reg,		      \
9518 				      offsetof(struct bpf_sock_ops_kern,      \
9519 					       temp));			      \
9520 	} while (0)
9521 
9522 #define SOCK_OPS_GET_OR_SET_FIELD(BPF_FIELD, OBJ_FIELD, OBJ, TYPE)	      \
9523 	do {								      \
9524 		if (TYPE == BPF_WRITE)					      \
9525 			SOCK_OPS_SET_FIELD(BPF_FIELD, OBJ_FIELD, OBJ);	      \
9526 		else							      \
9527 			SOCK_OPS_GET_FIELD(BPF_FIELD, OBJ_FIELD, OBJ);	      \
9528 	} while (0)
9529 
9530 	if (insn > insn_buf)
9531 		return insn - insn_buf;
9532 
9533 	switch (si->off) {
9534 	case offsetof(struct bpf_sock_ops, op):
9535 		*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct bpf_sock_ops_kern,
9536 						       op),
9537 				      si->dst_reg, si->src_reg,
9538 				      offsetof(struct bpf_sock_ops_kern, op));
9539 		break;
9540 
9541 	case offsetof(struct bpf_sock_ops, replylong[0]) ...
9542 	     offsetof(struct bpf_sock_ops, replylong[3]):
9543 		BUILD_BUG_ON(sizeof_field(struct bpf_sock_ops, reply) !=
9544 			     sizeof_field(struct bpf_sock_ops_kern, reply));
9545 		BUILD_BUG_ON(sizeof_field(struct bpf_sock_ops, replylong) !=
9546 			     sizeof_field(struct bpf_sock_ops_kern, replylong));
9547 		off = si->off;
9548 		off -= offsetof(struct bpf_sock_ops, replylong[0]);
9549 		off += offsetof(struct bpf_sock_ops_kern, replylong[0]);
9550 		if (type == BPF_WRITE)
9551 			*insn++ = BPF_STX_MEM(BPF_W, si->dst_reg, si->src_reg,
9552 					      off);
9553 		else
9554 			*insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg,
9555 					      off);
9556 		break;
9557 
9558 	case offsetof(struct bpf_sock_ops, family):
9559 		BUILD_BUG_ON(sizeof_field(struct sock_common, skc_family) != 2);
9560 
9561 		*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(
9562 					      struct bpf_sock_ops_kern, sk),
9563 				      si->dst_reg, si->src_reg,
9564 				      offsetof(struct bpf_sock_ops_kern, sk));
9565 		*insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->dst_reg,
9566 				      offsetof(struct sock_common, skc_family));
9567 		break;
9568 
9569 	case offsetof(struct bpf_sock_ops, remote_ip4):
9570 		BUILD_BUG_ON(sizeof_field(struct sock_common, skc_daddr) != 4);
9571 
9572 		*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(
9573 						struct bpf_sock_ops_kern, sk),
9574 				      si->dst_reg, si->src_reg,
9575 				      offsetof(struct bpf_sock_ops_kern, sk));
9576 		*insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg,
9577 				      offsetof(struct sock_common, skc_daddr));
9578 		break;
9579 
9580 	case offsetof(struct bpf_sock_ops, local_ip4):
9581 		BUILD_BUG_ON(sizeof_field(struct sock_common,
9582 					  skc_rcv_saddr) != 4);
9583 
9584 		*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(
9585 					      struct bpf_sock_ops_kern, sk),
9586 				      si->dst_reg, si->src_reg,
9587 				      offsetof(struct bpf_sock_ops_kern, sk));
9588 		*insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg,
9589 				      offsetof(struct sock_common,
9590 					       skc_rcv_saddr));
9591 		break;
9592 
9593 	case offsetof(struct bpf_sock_ops, remote_ip6[0]) ...
9594 	     offsetof(struct bpf_sock_ops, remote_ip6[3]):
9595 #if IS_ENABLED(CONFIG_IPV6)
9596 		BUILD_BUG_ON(sizeof_field(struct sock_common,
9597 					  skc_v6_daddr.s6_addr32[0]) != 4);
9598 
9599 		off = si->off;
9600 		off -= offsetof(struct bpf_sock_ops, remote_ip6[0]);
9601 		*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(
9602 						struct bpf_sock_ops_kern, sk),
9603 				      si->dst_reg, si->src_reg,
9604 				      offsetof(struct bpf_sock_ops_kern, sk));
9605 		*insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg,
9606 				      offsetof(struct sock_common,
9607 					       skc_v6_daddr.s6_addr32[0]) +
9608 				      off);
9609 #else
9610 		*insn++ = BPF_MOV32_IMM(si->dst_reg, 0);
9611 #endif
9612 		break;
9613 
9614 	case offsetof(struct bpf_sock_ops, local_ip6[0]) ...
9615 	     offsetof(struct bpf_sock_ops, local_ip6[3]):
9616 #if IS_ENABLED(CONFIG_IPV6)
9617 		BUILD_BUG_ON(sizeof_field(struct sock_common,
9618 					  skc_v6_rcv_saddr.s6_addr32[0]) != 4);
9619 
9620 		off = si->off;
9621 		off -= offsetof(struct bpf_sock_ops, local_ip6[0]);
9622 		*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(
9623 						struct bpf_sock_ops_kern, sk),
9624 				      si->dst_reg, si->src_reg,
9625 				      offsetof(struct bpf_sock_ops_kern, sk));
9626 		*insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg,
9627 				      offsetof(struct sock_common,
9628 					       skc_v6_rcv_saddr.s6_addr32[0]) +
9629 				      off);
9630 #else
9631 		*insn++ = BPF_MOV32_IMM(si->dst_reg, 0);
9632 #endif
9633 		break;
9634 
9635 	case offsetof(struct bpf_sock_ops, remote_port):
9636 		BUILD_BUG_ON(sizeof_field(struct sock_common, skc_dport) != 2);
9637 
9638 		*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(
9639 						struct bpf_sock_ops_kern, sk),
9640 				      si->dst_reg, si->src_reg,
9641 				      offsetof(struct bpf_sock_ops_kern, sk));
9642 		*insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->dst_reg,
9643 				      offsetof(struct sock_common, skc_dport));
9644 #ifndef __BIG_ENDIAN_BITFIELD
9645 		*insn++ = BPF_ALU32_IMM(BPF_LSH, si->dst_reg, 16);
9646 #endif
9647 		break;
9648 
9649 	case offsetof(struct bpf_sock_ops, local_port):
9650 		BUILD_BUG_ON(sizeof_field(struct sock_common, skc_num) != 2);
9651 
9652 		*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(
9653 						struct bpf_sock_ops_kern, sk),
9654 				      si->dst_reg, si->src_reg,
9655 				      offsetof(struct bpf_sock_ops_kern, sk));
9656 		*insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->dst_reg,
9657 				      offsetof(struct sock_common, skc_num));
9658 		break;
9659 
9660 	case offsetof(struct bpf_sock_ops, is_fullsock):
9661 		*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(
9662 						struct bpf_sock_ops_kern,
9663 						is_fullsock),
9664 				      si->dst_reg, si->src_reg,
9665 				      offsetof(struct bpf_sock_ops_kern,
9666 					       is_fullsock));
9667 		break;
9668 
9669 	case offsetof(struct bpf_sock_ops, state):
9670 		BUILD_BUG_ON(sizeof_field(struct sock_common, skc_state) != 1);
9671 
9672 		*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(
9673 						struct bpf_sock_ops_kern, sk),
9674 				      si->dst_reg, si->src_reg,
9675 				      offsetof(struct bpf_sock_ops_kern, sk));
9676 		*insn++ = BPF_LDX_MEM(BPF_B, si->dst_reg, si->dst_reg,
9677 				      offsetof(struct sock_common, skc_state));
9678 		break;
9679 
9680 	case offsetof(struct bpf_sock_ops, rtt_min):
9681 		BUILD_BUG_ON(sizeof_field(struct tcp_sock, rtt_min) !=
9682 			     sizeof(struct minmax));
9683 		BUILD_BUG_ON(sizeof(struct minmax) <
9684 			     sizeof(struct minmax_sample));
9685 
9686 		*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(
9687 						struct bpf_sock_ops_kern, sk),
9688 				      si->dst_reg, si->src_reg,
9689 				      offsetof(struct bpf_sock_ops_kern, sk));
9690 		*insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg,
9691 				      offsetof(struct tcp_sock, rtt_min) +
9692 				      sizeof_field(struct minmax_sample, t));
9693 		break;
9694 
9695 	case offsetof(struct bpf_sock_ops, bpf_sock_ops_cb_flags):
9696 		SOCK_OPS_GET_FIELD(bpf_sock_ops_cb_flags, bpf_sock_ops_cb_flags,
9697 				   struct tcp_sock);
9698 		break;
9699 
9700 	case offsetof(struct bpf_sock_ops, sk_txhash):
9701 		SOCK_OPS_GET_OR_SET_FIELD(sk_txhash, sk_txhash,
9702 					  struct sock, type);
9703 		break;
9704 	case offsetof(struct bpf_sock_ops, snd_cwnd):
9705 		SOCK_OPS_GET_TCP_SOCK_FIELD(snd_cwnd);
9706 		break;
9707 	case offsetof(struct bpf_sock_ops, srtt_us):
9708 		SOCK_OPS_GET_TCP_SOCK_FIELD(srtt_us);
9709 		break;
9710 	case offsetof(struct bpf_sock_ops, snd_ssthresh):
9711 		SOCK_OPS_GET_TCP_SOCK_FIELD(snd_ssthresh);
9712 		break;
9713 	case offsetof(struct bpf_sock_ops, rcv_nxt):
9714 		SOCK_OPS_GET_TCP_SOCK_FIELD(rcv_nxt);
9715 		break;
9716 	case offsetof(struct bpf_sock_ops, snd_nxt):
9717 		SOCK_OPS_GET_TCP_SOCK_FIELD(snd_nxt);
9718 		break;
9719 	case offsetof(struct bpf_sock_ops, snd_una):
9720 		SOCK_OPS_GET_TCP_SOCK_FIELD(snd_una);
9721 		break;
9722 	case offsetof(struct bpf_sock_ops, mss_cache):
9723 		SOCK_OPS_GET_TCP_SOCK_FIELD(mss_cache);
9724 		break;
9725 	case offsetof(struct bpf_sock_ops, ecn_flags):
9726 		SOCK_OPS_GET_TCP_SOCK_FIELD(ecn_flags);
9727 		break;
9728 	case offsetof(struct bpf_sock_ops, rate_delivered):
9729 		SOCK_OPS_GET_TCP_SOCK_FIELD(rate_delivered);
9730 		break;
9731 	case offsetof(struct bpf_sock_ops, rate_interval_us):
9732 		SOCK_OPS_GET_TCP_SOCK_FIELD(rate_interval_us);
9733 		break;
9734 	case offsetof(struct bpf_sock_ops, packets_out):
9735 		SOCK_OPS_GET_TCP_SOCK_FIELD(packets_out);
9736 		break;
9737 	case offsetof(struct bpf_sock_ops, retrans_out):
9738 		SOCK_OPS_GET_TCP_SOCK_FIELD(retrans_out);
9739 		break;
9740 	case offsetof(struct bpf_sock_ops, total_retrans):
9741 		SOCK_OPS_GET_TCP_SOCK_FIELD(total_retrans);
9742 		break;
9743 	case offsetof(struct bpf_sock_ops, segs_in):
9744 		SOCK_OPS_GET_TCP_SOCK_FIELD(segs_in);
9745 		break;
9746 	case offsetof(struct bpf_sock_ops, data_segs_in):
9747 		SOCK_OPS_GET_TCP_SOCK_FIELD(data_segs_in);
9748 		break;
9749 	case offsetof(struct bpf_sock_ops, segs_out):
9750 		SOCK_OPS_GET_TCP_SOCK_FIELD(segs_out);
9751 		break;
9752 	case offsetof(struct bpf_sock_ops, data_segs_out):
9753 		SOCK_OPS_GET_TCP_SOCK_FIELD(data_segs_out);
9754 		break;
9755 	case offsetof(struct bpf_sock_ops, lost_out):
9756 		SOCK_OPS_GET_TCP_SOCK_FIELD(lost_out);
9757 		break;
9758 	case offsetof(struct bpf_sock_ops, sacked_out):
9759 		SOCK_OPS_GET_TCP_SOCK_FIELD(sacked_out);
9760 		break;
9761 	case offsetof(struct bpf_sock_ops, bytes_received):
9762 		SOCK_OPS_GET_TCP_SOCK_FIELD(bytes_received);
9763 		break;
9764 	case offsetof(struct bpf_sock_ops, bytes_acked):
9765 		SOCK_OPS_GET_TCP_SOCK_FIELD(bytes_acked);
9766 		break;
9767 	case offsetof(struct bpf_sock_ops, sk):
9768 		SOCK_OPS_GET_SK();
9769 		break;
9770 	case offsetof(struct bpf_sock_ops, skb_data_end):
9771 		*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct bpf_sock_ops_kern,
9772 						       skb_data_end),
9773 				      si->dst_reg, si->src_reg,
9774 				      offsetof(struct bpf_sock_ops_kern,
9775 					       skb_data_end));
9776 		break;
9777 	case offsetof(struct bpf_sock_ops, skb_data):
9778 		*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct bpf_sock_ops_kern,
9779 						       skb),
9780 				      si->dst_reg, si->src_reg,
9781 				      offsetof(struct bpf_sock_ops_kern,
9782 					       skb));
9783 		*insn++ = BPF_JMP_IMM(BPF_JEQ, si->dst_reg, 0, 1);
9784 		*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, data),
9785 				      si->dst_reg, si->dst_reg,
9786 				      offsetof(struct sk_buff, data));
9787 		break;
9788 	case offsetof(struct bpf_sock_ops, skb_len):
9789 		*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct bpf_sock_ops_kern,
9790 						       skb),
9791 				      si->dst_reg, si->src_reg,
9792 				      offsetof(struct bpf_sock_ops_kern,
9793 					       skb));
9794 		*insn++ = BPF_JMP_IMM(BPF_JEQ, si->dst_reg, 0, 1);
9795 		*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, len),
9796 				      si->dst_reg, si->dst_reg,
9797 				      offsetof(struct sk_buff, len));
9798 		break;
9799 	case offsetof(struct bpf_sock_ops, skb_tcp_flags):
9800 		off = offsetof(struct sk_buff, cb);
9801 		off += offsetof(struct tcp_skb_cb, tcp_flags);
9802 		*target_size = sizeof_field(struct tcp_skb_cb, tcp_flags);
9803 		*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct bpf_sock_ops_kern,
9804 						       skb),
9805 				      si->dst_reg, si->src_reg,
9806 				      offsetof(struct bpf_sock_ops_kern,
9807 					       skb));
9808 		*insn++ = BPF_JMP_IMM(BPF_JEQ, si->dst_reg, 0, 1);
9809 		*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct tcp_skb_cb,
9810 						       tcp_flags),
9811 				      si->dst_reg, si->dst_reg, off);
9812 		break;
9813 	}
9814 	return insn - insn_buf;
9815 }
9816 
9817 /* data_end = skb->data + skb_headlen() */
9818 static struct bpf_insn *bpf_convert_data_end_access(const struct bpf_insn *si,
9819 						    struct bpf_insn *insn)
9820 {
9821 	int reg;
9822 	int temp_reg_off = offsetof(struct sk_buff, cb) +
9823 			   offsetof(struct sk_skb_cb, temp_reg);
9824 
9825 	if (si->src_reg == si->dst_reg) {
9826 		/* We need an extra register, choose and save a register. */
9827 		reg = BPF_REG_9;
9828 		if (si->src_reg == reg || si->dst_reg == reg)
9829 			reg--;
9830 		if (si->src_reg == reg || si->dst_reg == reg)
9831 			reg--;
9832 		*insn++ = BPF_STX_MEM(BPF_DW, si->src_reg, reg, temp_reg_off);
9833 	} else {
9834 		reg = si->dst_reg;
9835 	}
9836 
9837 	/* reg = skb->data */
9838 	*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, data),
9839 			      reg, si->src_reg,
9840 			      offsetof(struct sk_buff, data));
9841 	/* AX = skb->len */
9842 	*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, len),
9843 			      BPF_REG_AX, si->src_reg,
9844 			      offsetof(struct sk_buff, len));
9845 	/* reg = skb->data + skb->len */
9846 	*insn++ = BPF_ALU64_REG(BPF_ADD, reg, BPF_REG_AX);
9847 	/* AX = skb->data_len */
9848 	*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, data_len),
9849 			      BPF_REG_AX, si->src_reg,
9850 			      offsetof(struct sk_buff, data_len));
9851 
9852 	/* reg = skb->data + skb->len - skb->data_len */
9853 	*insn++ = BPF_ALU64_REG(BPF_SUB, reg, BPF_REG_AX);
9854 
9855 	if (si->src_reg == si->dst_reg) {
9856 		/* Restore the saved register */
9857 		*insn++ = BPF_MOV64_REG(BPF_REG_AX, si->src_reg);
9858 		*insn++ = BPF_MOV64_REG(si->dst_reg, reg);
9859 		*insn++ = BPF_LDX_MEM(BPF_DW, reg, BPF_REG_AX, temp_reg_off);
9860 	}
9861 
9862 	return insn;
9863 }
9864 
9865 static u32 sk_skb_convert_ctx_access(enum bpf_access_type type,
9866 				     const struct bpf_insn *si,
9867 				     struct bpf_insn *insn_buf,
9868 				     struct bpf_prog *prog, u32 *target_size)
9869 {
9870 	struct bpf_insn *insn = insn_buf;
9871 	int off;
9872 
9873 	switch (si->off) {
9874 	case offsetof(struct __sk_buff, data_end):
9875 		insn = bpf_convert_data_end_access(si, insn);
9876 		break;
9877 	case offsetof(struct __sk_buff, cb[0]) ...
9878 	     offsetofend(struct __sk_buff, cb[4]) - 1:
9879 		BUILD_BUG_ON(sizeof_field(struct sk_skb_cb, data) < 20);
9880 		BUILD_BUG_ON((offsetof(struct sk_buff, cb) +
9881 			      offsetof(struct sk_skb_cb, data)) %
9882 			     sizeof(__u64));
9883 
9884 		prog->cb_access = 1;
9885 		off  = si->off;
9886 		off -= offsetof(struct __sk_buff, cb[0]);
9887 		off += offsetof(struct sk_buff, cb);
9888 		off += offsetof(struct sk_skb_cb, data);
9889 		if (type == BPF_WRITE)
9890 			*insn++ = BPF_STX_MEM(BPF_SIZE(si->code), si->dst_reg,
9891 					      si->src_reg, off);
9892 		else
9893 			*insn++ = BPF_LDX_MEM(BPF_SIZE(si->code), si->dst_reg,
9894 					      si->src_reg, off);
9895 		break;
9896 
9897 
9898 	default:
9899 		return bpf_convert_ctx_access(type, si, insn_buf, prog,
9900 					      target_size);
9901 	}
9902 
9903 	return insn - insn_buf;
9904 }
9905 
9906 static u32 sk_msg_convert_ctx_access(enum bpf_access_type type,
9907 				     const struct bpf_insn *si,
9908 				     struct bpf_insn *insn_buf,
9909 				     struct bpf_prog *prog, u32 *target_size)
9910 {
9911 	struct bpf_insn *insn = insn_buf;
9912 #if IS_ENABLED(CONFIG_IPV6)
9913 	int off;
9914 #endif
9915 
9916 	/* convert ctx uses the fact sg element is first in struct */
9917 	BUILD_BUG_ON(offsetof(struct sk_msg, sg) != 0);
9918 
9919 	switch (si->off) {
9920 	case offsetof(struct sk_msg_md, data):
9921 		*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_msg, data),
9922 				      si->dst_reg, si->src_reg,
9923 				      offsetof(struct sk_msg, data));
9924 		break;
9925 	case offsetof(struct sk_msg_md, data_end):
9926 		*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_msg, data_end),
9927 				      si->dst_reg, si->src_reg,
9928 				      offsetof(struct sk_msg, data_end));
9929 		break;
9930 	case offsetof(struct sk_msg_md, family):
9931 		BUILD_BUG_ON(sizeof_field(struct sock_common, skc_family) != 2);
9932 
9933 		*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(
9934 					      struct sk_msg, sk),
9935 				      si->dst_reg, si->src_reg,
9936 				      offsetof(struct sk_msg, sk));
9937 		*insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->dst_reg,
9938 				      offsetof(struct sock_common, skc_family));
9939 		break;
9940 
9941 	case offsetof(struct sk_msg_md, remote_ip4):
9942 		BUILD_BUG_ON(sizeof_field(struct sock_common, skc_daddr) != 4);
9943 
9944 		*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(
9945 						struct sk_msg, sk),
9946 				      si->dst_reg, si->src_reg,
9947 				      offsetof(struct sk_msg, sk));
9948 		*insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg,
9949 				      offsetof(struct sock_common, skc_daddr));
9950 		break;
9951 
9952 	case offsetof(struct sk_msg_md, local_ip4):
9953 		BUILD_BUG_ON(sizeof_field(struct sock_common,
9954 					  skc_rcv_saddr) != 4);
9955 
9956 		*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(
9957 					      struct sk_msg, sk),
9958 				      si->dst_reg, si->src_reg,
9959 				      offsetof(struct sk_msg, sk));
9960 		*insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg,
9961 				      offsetof(struct sock_common,
9962 					       skc_rcv_saddr));
9963 		break;
9964 
9965 	case offsetof(struct sk_msg_md, remote_ip6[0]) ...
9966 	     offsetof(struct sk_msg_md, remote_ip6[3]):
9967 #if IS_ENABLED(CONFIG_IPV6)
9968 		BUILD_BUG_ON(sizeof_field(struct sock_common,
9969 					  skc_v6_daddr.s6_addr32[0]) != 4);
9970 
9971 		off = si->off;
9972 		off -= offsetof(struct sk_msg_md, remote_ip6[0]);
9973 		*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(
9974 						struct sk_msg, sk),
9975 				      si->dst_reg, si->src_reg,
9976 				      offsetof(struct sk_msg, sk));
9977 		*insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg,
9978 				      offsetof(struct sock_common,
9979 					       skc_v6_daddr.s6_addr32[0]) +
9980 				      off);
9981 #else
9982 		*insn++ = BPF_MOV32_IMM(si->dst_reg, 0);
9983 #endif
9984 		break;
9985 
9986 	case offsetof(struct sk_msg_md, local_ip6[0]) ...
9987 	     offsetof(struct sk_msg_md, local_ip6[3]):
9988 #if IS_ENABLED(CONFIG_IPV6)
9989 		BUILD_BUG_ON(sizeof_field(struct sock_common,
9990 					  skc_v6_rcv_saddr.s6_addr32[0]) != 4);
9991 
9992 		off = si->off;
9993 		off -= offsetof(struct sk_msg_md, local_ip6[0]);
9994 		*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(
9995 						struct sk_msg, sk),
9996 				      si->dst_reg, si->src_reg,
9997 				      offsetof(struct sk_msg, sk));
9998 		*insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg,
9999 				      offsetof(struct sock_common,
10000 					       skc_v6_rcv_saddr.s6_addr32[0]) +
10001 				      off);
10002 #else
10003 		*insn++ = BPF_MOV32_IMM(si->dst_reg, 0);
10004 #endif
10005 		break;
10006 
10007 	case offsetof(struct sk_msg_md, remote_port):
10008 		BUILD_BUG_ON(sizeof_field(struct sock_common, skc_dport) != 2);
10009 
10010 		*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(
10011 						struct sk_msg, sk),
10012 				      si->dst_reg, si->src_reg,
10013 				      offsetof(struct sk_msg, sk));
10014 		*insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->dst_reg,
10015 				      offsetof(struct sock_common, skc_dport));
10016 #ifndef __BIG_ENDIAN_BITFIELD
10017 		*insn++ = BPF_ALU32_IMM(BPF_LSH, si->dst_reg, 16);
10018 #endif
10019 		break;
10020 
10021 	case offsetof(struct sk_msg_md, local_port):
10022 		BUILD_BUG_ON(sizeof_field(struct sock_common, skc_num) != 2);
10023 
10024 		*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(
10025 						struct sk_msg, sk),
10026 				      si->dst_reg, si->src_reg,
10027 				      offsetof(struct sk_msg, sk));
10028 		*insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->dst_reg,
10029 				      offsetof(struct sock_common, skc_num));
10030 		break;
10031 
10032 	case offsetof(struct sk_msg_md, size):
10033 		*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_msg_sg, size),
10034 				      si->dst_reg, si->src_reg,
10035 				      offsetof(struct sk_msg_sg, size));
10036 		break;
10037 
10038 	case offsetof(struct sk_msg_md, sk):
10039 		*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_msg, sk),
10040 				      si->dst_reg, si->src_reg,
10041 				      offsetof(struct sk_msg, sk));
10042 		break;
10043 	}
10044 
10045 	return insn - insn_buf;
10046 }
10047 
10048 const struct bpf_verifier_ops sk_filter_verifier_ops = {
10049 	.get_func_proto		= sk_filter_func_proto,
10050 	.is_valid_access	= sk_filter_is_valid_access,
10051 	.convert_ctx_access	= bpf_convert_ctx_access,
10052 	.gen_ld_abs		= bpf_gen_ld_abs,
10053 };
10054 
10055 const struct bpf_prog_ops sk_filter_prog_ops = {
10056 	.test_run		= bpf_prog_test_run_skb,
10057 };
10058 
10059 const struct bpf_verifier_ops tc_cls_act_verifier_ops = {
10060 	.get_func_proto		= tc_cls_act_func_proto,
10061 	.is_valid_access	= tc_cls_act_is_valid_access,
10062 	.convert_ctx_access	= tc_cls_act_convert_ctx_access,
10063 	.gen_prologue		= tc_cls_act_prologue,
10064 	.gen_ld_abs		= bpf_gen_ld_abs,
10065 	.check_kfunc_call	= bpf_prog_test_check_kfunc_call,
10066 };
10067 
10068 const struct bpf_prog_ops tc_cls_act_prog_ops = {
10069 	.test_run		= bpf_prog_test_run_skb,
10070 };
10071 
10072 const struct bpf_verifier_ops xdp_verifier_ops = {
10073 	.get_func_proto		= xdp_func_proto,
10074 	.is_valid_access	= xdp_is_valid_access,
10075 	.convert_ctx_access	= xdp_convert_ctx_access,
10076 	.gen_prologue		= bpf_noop_prologue,
10077 };
10078 
10079 const struct bpf_prog_ops xdp_prog_ops = {
10080 	.test_run		= bpf_prog_test_run_xdp,
10081 };
10082 
10083 const struct bpf_verifier_ops cg_skb_verifier_ops = {
10084 	.get_func_proto		= cg_skb_func_proto,
10085 	.is_valid_access	= cg_skb_is_valid_access,
10086 	.convert_ctx_access	= bpf_convert_ctx_access,
10087 };
10088 
10089 const struct bpf_prog_ops cg_skb_prog_ops = {
10090 	.test_run		= bpf_prog_test_run_skb,
10091 };
10092 
10093 const struct bpf_verifier_ops lwt_in_verifier_ops = {
10094 	.get_func_proto		= lwt_in_func_proto,
10095 	.is_valid_access	= lwt_is_valid_access,
10096 	.convert_ctx_access	= bpf_convert_ctx_access,
10097 };
10098 
10099 const struct bpf_prog_ops lwt_in_prog_ops = {
10100 	.test_run		= bpf_prog_test_run_skb,
10101 };
10102 
10103 const struct bpf_verifier_ops lwt_out_verifier_ops = {
10104 	.get_func_proto		= lwt_out_func_proto,
10105 	.is_valid_access	= lwt_is_valid_access,
10106 	.convert_ctx_access	= bpf_convert_ctx_access,
10107 };
10108 
10109 const struct bpf_prog_ops lwt_out_prog_ops = {
10110 	.test_run		= bpf_prog_test_run_skb,
10111 };
10112 
10113 const struct bpf_verifier_ops lwt_xmit_verifier_ops = {
10114 	.get_func_proto		= lwt_xmit_func_proto,
10115 	.is_valid_access	= lwt_is_valid_access,
10116 	.convert_ctx_access	= bpf_convert_ctx_access,
10117 	.gen_prologue		= tc_cls_act_prologue,
10118 };
10119 
10120 const struct bpf_prog_ops lwt_xmit_prog_ops = {
10121 	.test_run		= bpf_prog_test_run_skb,
10122 };
10123 
10124 const struct bpf_verifier_ops lwt_seg6local_verifier_ops = {
10125 	.get_func_proto		= lwt_seg6local_func_proto,
10126 	.is_valid_access	= lwt_is_valid_access,
10127 	.convert_ctx_access	= bpf_convert_ctx_access,
10128 };
10129 
10130 const struct bpf_prog_ops lwt_seg6local_prog_ops = {
10131 	.test_run		= bpf_prog_test_run_skb,
10132 };
10133 
10134 const struct bpf_verifier_ops cg_sock_verifier_ops = {
10135 	.get_func_proto		= sock_filter_func_proto,
10136 	.is_valid_access	= sock_filter_is_valid_access,
10137 	.convert_ctx_access	= bpf_sock_convert_ctx_access,
10138 };
10139 
10140 const struct bpf_prog_ops cg_sock_prog_ops = {
10141 };
10142 
10143 const struct bpf_verifier_ops cg_sock_addr_verifier_ops = {
10144 	.get_func_proto		= sock_addr_func_proto,
10145 	.is_valid_access	= sock_addr_is_valid_access,
10146 	.convert_ctx_access	= sock_addr_convert_ctx_access,
10147 };
10148 
10149 const struct bpf_prog_ops cg_sock_addr_prog_ops = {
10150 };
10151 
10152 const struct bpf_verifier_ops sock_ops_verifier_ops = {
10153 	.get_func_proto		= sock_ops_func_proto,
10154 	.is_valid_access	= sock_ops_is_valid_access,
10155 	.convert_ctx_access	= sock_ops_convert_ctx_access,
10156 };
10157 
10158 const struct bpf_prog_ops sock_ops_prog_ops = {
10159 };
10160 
10161 const struct bpf_verifier_ops sk_skb_verifier_ops = {
10162 	.get_func_proto		= sk_skb_func_proto,
10163 	.is_valid_access	= sk_skb_is_valid_access,
10164 	.convert_ctx_access	= sk_skb_convert_ctx_access,
10165 	.gen_prologue		= sk_skb_prologue,
10166 };
10167 
10168 const struct bpf_prog_ops sk_skb_prog_ops = {
10169 };
10170 
10171 const struct bpf_verifier_ops sk_msg_verifier_ops = {
10172 	.get_func_proto		= sk_msg_func_proto,
10173 	.is_valid_access	= sk_msg_is_valid_access,
10174 	.convert_ctx_access	= sk_msg_convert_ctx_access,
10175 	.gen_prologue		= bpf_noop_prologue,
10176 };
10177 
10178 const struct bpf_prog_ops sk_msg_prog_ops = {
10179 };
10180 
10181 const struct bpf_verifier_ops flow_dissector_verifier_ops = {
10182 	.get_func_proto		= flow_dissector_func_proto,
10183 	.is_valid_access	= flow_dissector_is_valid_access,
10184 	.convert_ctx_access	= flow_dissector_convert_ctx_access,
10185 };
10186 
10187 const struct bpf_prog_ops flow_dissector_prog_ops = {
10188 	.test_run		= bpf_prog_test_run_flow_dissector,
10189 };
10190 
10191 int sk_detach_filter(struct sock *sk)
10192 {
10193 	int ret = -ENOENT;
10194 	struct sk_filter *filter;
10195 
10196 	if (sock_flag(sk, SOCK_FILTER_LOCKED))
10197 		return -EPERM;
10198 
10199 	filter = rcu_dereference_protected(sk->sk_filter,
10200 					   lockdep_sock_is_held(sk));
10201 	if (filter) {
10202 		RCU_INIT_POINTER(sk->sk_filter, NULL);
10203 		sk_filter_uncharge(sk, filter);
10204 		ret = 0;
10205 	}
10206 
10207 	return ret;
10208 }
10209 EXPORT_SYMBOL_GPL(sk_detach_filter);
10210 
10211 int sk_get_filter(struct sock *sk, struct sock_filter __user *ubuf,
10212 		  unsigned int len)
10213 {
10214 	struct sock_fprog_kern *fprog;
10215 	struct sk_filter *filter;
10216 	int ret = 0;
10217 
10218 	lock_sock(sk);
10219 	filter = rcu_dereference_protected(sk->sk_filter,
10220 					   lockdep_sock_is_held(sk));
10221 	if (!filter)
10222 		goto out;
10223 
10224 	/* We're copying the filter that has been originally attached,
10225 	 * so no conversion/decode needed anymore. eBPF programs that
10226 	 * have no original program cannot be dumped through this.
10227 	 */
10228 	ret = -EACCES;
10229 	fprog = filter->prog->orig_prog;
10230 	if (!fprog)
10231 		goto out;
10232 
10233 	ret = fprog->len;
10234 	if (!len)
10235 		/* User space only enquires number of filter blocks. */
10236 		goto out;
10237 
10238 	ret = -EINVAL;
10239 	if (len < fprog->len)
10240 		goto out;
10241 
10242 	ret = -EFAULT;
10243 	if (copy_to_user(ubuf, fprog->filter, bpf_classic_proglen(fprog)))
10244 		goto out;
10245 
10246 	/* Instead of bytes, the API requests to return the number
10247 	 * of filter blocks.
10248 	 */
10249 	ret = fprog->len;
10250 out:
10251 	release_sock(sk);
10252 	return ret;
10253 }
10254 
10255 #ifdef CONFIG_INET
10256 static void bpf_init_reuseport_kern(struct sk_reuseport_kern *reuse_kern,
10257 				    struct sock_reuseport *reuse,
10258 				    struct sock *sk, struct sk_buff *skb,
10259 				    struct sock *migrating_sk,
10260 				    u32 hash)
10261 {
10262 	reuse_kern->skb = skb;
10263 	reuse_kern->sk = sk;
10264 	reuse_kern->selected_sk = NULL;
10265 	reuse_kern->migrating_sk = migrating_sk;
10266 	reuse_kern->data_end = skb->data + skb_headlen(skb);
10267 	reuse_kern->hash = hash;
10268 	reuse_kern->reuseport_id = reuse->reuseport_id;
10269 	reuse_kern->bind_inany = reuse->bind_inany;
10270 }
10271 
10272 struct sock *bpf_run_sk_reuseport(struct sock_reuseport *reuse, struct sock *sk,
10273 				  struct bpf_prog *prog, struct sk_buff *skb,
10274 				  struct sock *migrating_sk,
10275 				  u32 hash)
10276 {
10277 	struct sk_reuseport_kern reuse_kern;
10278 	enum sk_action action;
10279 
10280 	bpf_init_reuseport_kern(&reuse_kern, reuse, sk, skb, migrating_sk, hash);
10281 	action = bpf_prog_run(prog, &reuse_kern);
10282 
10283 	if (action == SK_PASS)
10284 		return reuse_kern.selected_sk;
10285 	else
10286 		return ERR_PTR(-ECONNREFUSED);
10287 }
10288 
10289 BPF_CALL_4(sk_select_reuseport, struct sk_reuseport_kern *, reuse_kern,
10290 	   struct bpf_map *, map, void *, key, u32, flags)
10291 {
10292 	bool is_sockarray = map->map_type == BPF_MAP_TYPE_REUSEPORT_SOCKARRAY;
10293 	struct sock_reuseport *reuse;
10294 	struct sock *selected_sk;
10295 
10296 	selected_sk = map->ops->map_lookup_elem(map, key);
10297 	if (!selected_sk)
10298 		return -ENOENT;
10299 
10300 	reuse = rcu_dereference(selected_sk->sk_reuseport_cb);
10301 	if (!reuse) {
10302 		/* Lookup in sock_map can return TCP ESTABLISHED sockets. */
10303 		if (sk_is_refcounted(selected_sk))
10304 			sock_put(selected_sk);
10305 
10306 		/* reuseport_array has only sk with non NULL sk_reuseport_cb.
10307 		 * The only (!reuse) case here is - the sk has already been
10308 		 * unhashed (e.g. by close()), so treat it as -ENOENT.
10309 		 *
10310 		 * Other maps (e.g. sock_map) do not provide this guarantee and
10311 		 * the sk may never be in the reuseport group to begin with.
10312 		 */
10313 		return is_sockarray ? -ENOENT : -EINVAL;
10314 	}
10315 
10316 	if (unlikely(reuse->reuseport_id != reuse_kern->reuseport_id)) {
10317 		struct sock *sk = reuse_kern->sk;
10318 
10319 		if (sk->sk_protocol != selected_sk->sk_protocol)
10320 			return -EPROTOTYPE;
10321 		else if (sk->sk_family != selected_sk->sk_family)
10322 			return -EAFNOSUPPORT;
10323 
10324 		/* Catch all. Likely bound to a different sockaddr. */
10325 		return -EBADFD;
10326 	}
10327 
10328 	reuse_kern->selected_sk = selected_sk;
10329 
10330 	return 0;
10331 }
10332 
10333 static const struct bpf_func_proto sk_select_reuseport_proto = {
10334 	.func           = sk_select_reuseport,
10335 	.gpl_only       = false,
10336 	.ret_type       = RET_INTEGER,
10337 	.arg1_type	= ARG_PTR_TO_CTX,
10338 	.arg2_type      = ARG_CONST_MAP_PTR,
10339 	.arg3_type      = ARG_PTR_TO_MAP_KEY,
10340 	.arg4_type	= ARG_ANYTHING,
10341 };
10342 
10343 BPF_CALL_4(sk_reuseport_load_bytes,
10344 	   const struct sk_reuseport_kern *, reuse_kern, u32, offset,
10345 	   void *, to, u32, len)
10346 {
10347 	return ____bpf_skb_load_bytes(reuse_kern->skb, offset, to, len);
10348 }
10349 
10350 static const struct bpf_func_proto sk_reuseport_load_bytes_proto = {
10351 	.func		= sk_reuseport_load_bytes,
10352 	.gpl_only	= false,
10353 	.ret_type	= RET_INTEGER,
10354 	.arg1_type	= ARG_PTR_TO_CTX,
10355 	.arg2_type	= ARG_ANYTHING,
10356 	.arg3_type	= ARG_PTR_TO_UNINIT_MEM,
10357 	.arg4_type	= ARG_CONST_SIZE,
10358 };
10359 
10360 BPF_CALL_5(sk_reuseport_load_bytes_relative,
10361 	   const struct sk_reuseport_kern *, reuse_kern, u32, offset,
10362 	   void *, to, u32, len, u32, start_header)
10363 {
10364 	return ____bpf_skb_load_bytes_relative(reuse_kern->skb, offset, to,
10365 					       len, start_header);
10366 }
10367 
10368 static const struct bpf_func_proto sk_reuseport_load_bytes_relative_proto = {
10369 	.func		= sk_reuseport_load_bytes_relative,
10370 	.gpl_only	= false,
10371 	.ret_type	= RET_INTEGER,
10372 	.arg1_type	= ARG_PTR_TO_CTX,
10373 	.arg2_type	= ARG_ANYTHING,
10374 	.arg3_type	= ARG_PTR_TO_UNINIT_MEM,
10375 	.arg4_type	= ARG_CONST_SIZE,
10376 	.arg5_type	= ARG_ANYTHING,
10377 };
10378 
10379 static const struct bpf_func_proto *
10380 sk_reuseport_func_proto(enum bpf_func_id func_id,
10381 			const struct bpf_prog *prog)
10382 {
10383 	switch (func_id) {
10384 	case BPF_FUNC_sk_select_reuseport:
10385 		return &sk_select_reuseport_proto;
10386 	case BPF_FUNC_skb_load_bytes:
10387 		return &sk_reuseport_load_bytes_proto;
10388 	case BPF_FUNC_skb_load_bytes_relative:
10389 		return &sk_reuseport_load_bytes_relative_proto;
10390 	case BPF_FUNC_get_socket_cookie:
10391 		return &bpf_get_socket_ptr_cookie_proto;
10392 	case BPF_FUNC_ktime_get_coarse_ns:
10393 		return &bpf_ktime_get_coarse_ns_proto;
10394 	default:
10395 		return bpf_base_func_proto(func_id);
10396 	}
10397 }
10398 
10399 static bool
10400 sk_reuseport_is_valid_access(int off, int size,
10401 			     enum bpf_access_type type,
10402 			     const struct bpf_prog *prog,
10403 			     struct bpf_insn_access_aux *info)
10404 {
10405 	const u32 size_default = sizeof(__u32);
10406 
10407 	if (off < 0 || off >= sizeof(struct sk_reuseport_md) ||
10408 	    off % size || type != BPF_READ)
10409 		return false;
10410 
10411 	switch (off) {
10412 	case offsetof(struct sk_reuseport_md, data):
10413 		info->reg_type = PTR_TO_PACKET;
10414 		return size == sizeof(__u64);
10415 
10416 	case offsetof(struct sk_reuseport_md, data_end):
10417 		info->reg_type = PTR_TO_PACKET_END;
10418 		return size == sizeof(__u64);
10419 
10420 	case offsetof(struct sk_reuseport_md, hash):
10421 		return size == size_default;
10422 
10423 	case offsetof(struct sk_reuseport_md, sk):
10424 		info->reg_type = PTR_TO_SOCKET;
10425 		return size == sizeof(__u64);
10426 
10427 	case offsetof(struct sk_reuseport_md, migrating_sk):
10428 		info->reg_type = PTR_TO_SOCK_COMMON_OR_NULL;
10429 		return size == sizeof(__u64);
10430 
10431 	/* Fields that allow narrowing */
10432 	case bpf_ctx_range(struct sk_reuseport_md, eth_protocol):
10433 		if (size < sizeof_field(struct sk_buff, protocol))
10434 			return false;
10435 		fallthrough;
10436 	case bpf_ctx_range(struct sk_reuseport_md, ip_protocol):
10437 	case bpf_ctx_range(struct sk_reuseport_md, bind_inany):
10438 	case bpf_ctx_range(struct sk_reuseport_md, len):
10439 		bpf_ctx_record_field_size(info, size_default);
10440 		return bpf_ctx_narrow_access_ok(off, size, size_default);
10441 
10442 	default:
10443 		return false;
10444 	}
10445 }
10446 
10447 #define SK_REUSEPORT_LOAD_FIELD(F) ({					\
10448 	*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_reuseport_kern, F), \
10449 			      si->dst_reg, si->src_reg,			\
10450 			      bpf_target_off(struct sk_reuseport_kern, F, \
10451 					     sizeof_field(struct sk_reuseport_kern, F), \
10452 					     target_size));		\
10453 	})
10454 
10455 #define SK_REUSEPORT_LOAD_SKB_FIELD(SKB_FIELD)				\
10456 	SOCK_ADDR_LOAD_NESTED_FIELD(struct sk_reuseport_kern,		\
10457 				    struct sk_buff,			\
10458 				    skb,				\
10459 				    SKB_FIELD)
10460 
10461 #define SK_REUSEPORT_LOAD_SK_FIELD(SK_FIELD)				\
10462 	SOCK_ADDR_LOAD_NESTED_FIELD(struct sk_reuseport_kern,		\
10463 				    struct sock,			\
10464 				    sk,					\
10465 				    SK_FIELD)
10466 
10467 static u32 sk_reuseport_convert_ctx_access(enum bpf_access_type type,
10468 					   const struct bpf_insn *si,
10469 					   struct bpf_insn *insn_buf,
10470 					   struct bpf_prog *prog,
10471 					   u32 *target_size)
10472 {
10473 	struct bpf_insn *insn = insn_buf;
10474 
10475 	switch (si->off) {
10476 	case offsetof(struct sk_reuseport_md, data):
10477 		SK_REUSEPORT_LOAD_SKB_FIELD(data);
10478 		break;
10479 
10480 	case offsetof(struct sk_reuseport_md, len):
10481 		SK_REUSEPORT_LOAD_SKB_FIELD(len);
10482 		break;
10483 
10484 	case offsetof(struct sk_reuseport_md, eth_protocol):
10485 		SK_REUSEPORT_LOAD_SKB_FIELD(protocol);
10486 		break;
10487 
10488 	case offsetof(struct sk_reuseport_md, ip_protocol):
10489 		SK_REUSEPORT_LOAD_SK_FIELD(sk_protocol);
10490 		break;
10491 
10492 	case offsetof(struct sk_reuseport_md, data_end):
10493 		SK_REUSEPORT_LOAD_FIELD(data_end);
10494 		break;
10495 
10496 	case offsetof(struct sk_reuseport_md, hash):
10497 		SK_REUSEPORT_LOAD_FIELD(hash);
10498 		break;
10499 
10500 	case offsetof(struct sk_reuseport_md, bind_inany):
10501 		SK_REUSEPORT_LOAD_FIELD(bind_inany);
10502 		break;
10503 
10504 	case offsetof(struct sk_reuseport_md, sk):
10505 		SK_REUSEPORT_LOAD_FIELD(sk);
10506 		break;
10507 
10508 	case offsetof(struct sk_reuseport_md, migrating_sk):
10509 		SK_REUSEPORT_LOAD_FIELD(migrating_sk);
10510 		break;
10511 	}
10512 
10513 	return insn - insn_buf;
10514 }
10515 
10516 const struct bpf_verifier_ops sk_reuseport_verifier_ops = {
10517 	.get_func_proto		= sk_reuseport_func_proto,
10518 	.is_valid_access	= sk_reuseport_is_valid_access,
10519 	.convert_ctx_access	= sk_reuseport_convert_ctx_access,
10520 };
10521 
10522 const struct bpf_prog_ops sk_reuseport_prog_ops = {
10523 };
10524 
10525 DEFINE_STATIC_KEY_FALSE(bpf_sk_lookup_enabled);
10526 EXPORT_SYMBOL(bpf_sk_lookup_enabled);
10527 
10528 BPF_CALL_3(bpf_sk_lookup_assign, struct bpf_sk_lookup_kern *, ctx,
10529 	   struct sock *, sk, u64, flags)
10530 {
10531 	if (unlikely(flags & ~(BPF_SK_LOOKUP_F_REPLACE |
10532 			       BPF_SK_LOOKUP_F_NO_REUSEPORT)))
10533 		return -EINVAL;
10534 	if (unlikely(sk && sk_is_refcounted(sk)))
10535 		return -ESOCKTNOSUPPORT; /* reject non-RCU freed sockets */
10536 	if (unlikely(sk && sk_is_tcp(sk) && sk->sk_state != TCP_LISTEN))
10537 		return -ESOCKTNOSUPPORT; /* only accept TCP socket in LISTEN */
10538 	if (unlikely(sk && sk_is_udp(sk) && sk->sk_state != TCP_CLOSE))
10539 		return -ESOCKTNOSUPPORT; /* only accept UDP socket in CLOSE */
10540 
10541 	/* Check if socket is suitable for packet L3/L4 protocol */
10542 	if (sk && sk->sk_protocol != ctx->protocol)
10543 		return -EPROTOTYPE;
10544 	if (sk && sk->sk_family != ctx->family &&
10545 	    (sk->sk_family == AF_INET || ipv6_only_sock(sk)))
10546 		return -EAFNOSUPPORT;
10547 
10548 	if (ctx->selected_sk && !(flags & BPF_SK_LOOKUP_F_REPLACE))
10549 		return -EEXIST;
10550 
10551 	/* Select socket as lookup result */
10552 	ctx->selected_sk = sk;
10553 	ctx->no_reuseport = flags & BPF_SK_LOOKUP_F_NO_REUSEPORT;
10554 	return 0;
10555 }
10556 
10557 static const struct bpf_func_proto bpf_sk_lookup_assign_proto = {
10558 	.func		= bpf_sk_lookup_assign,
10559 	.gpl_only	= false,
10560 	.ret_type	= RET_INTEGER,
10561 	.arg1_type	= ARG_PTR_TO_CTX,
10562 	.arg2_type	= ARG_PTR_TO_SOCKET_OR_NULL,
10563 	.arg3_type	= ARG_ANYTHING,
10564 };
10565 
10566 static const struct bpf_func_proto *
10567 sk_lookup_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
10568 {
10569 	switch (func_id) {
10570 	case BPF_FUNC_perf_event_output:
10571 		return &bpf_event_output_data_proto;
10572 	case BPF_FUNC_sk_assign:
10573 		return &bpf_sk_lookup_assign_proto;
10574 	case BPF_FUNC_sk_release:
10575 		return &bpf_sk_release_proto;
10576 	default:
10577 		return bpf_sk_base_func_proto(func_id);
10578 	}
10579 }
10580 
10581 static bool sk_lookup_is_valid_access(int off, int size,
10582 				      enum bpf_access_type type,
10583 				      const struct bpf_prog *prog,
10584 				      struct bpf_insn_access_aux *info)
10585 {
10586 	if (off < 0 || off >= sizeof(struct bpf_sk_lookup))
10587 		return false;
10588 	if (off % size != 0)
10589 		return false;
10590 	if (type != BPF_READ)
10591 		return false;
10592 
10593 	switch (off) {
10594 	case offsetof(struct bpf_sk_lookup, sk):
10595 		info->reg_type = PTR_TO_SOCKET_OR_NULL;
10596 		return size == sizeof(__u64);
10597 
10598 	case bpf_ctx_range(struct bpf_sk_lookup, family):
10599 	case bpf_ctx_range(struct bpf_sk_lookup, protocol):
10600 	case bpf_ctx_range(struct bpf_sk_lookup, remote_ip4):
10601 	case bpf_ctx_range(struct bpf_sk_lookup, local_ip4):
10602 	case bpf_ctx_range_till(struct bpf_sk_lookup, remote_ip6[0], remote_ip6[3]):
10603 	case bpf_ctx_range_till(struct bpf_sk_lookup, local_ip6[0], local_ip6[3]):
10604 	case bpf_ctx_range(struct bpf_sk_lookup, remote_port):
10605 	case bpf_ctx_range(struct bpf_sk_lookup, local_port):
10606 	case bpf_ctx_range(struct bpf_sk_lookup, ingress_ifindex):
10607 		bpf_ctx_record_field_size(info, sizeof(__u32));
10608 		return bpf_ctx_narrow_access_ok(off, size, sizeof(__u32));
10609 
10610 	default:
10611 		return false;
10612 	}
10613 }
10614 
10615 static u32 sk_lookup_convert_ctx_access(enum bpf_access_type type,
10616 					const struct bpf_insn *si,
10617 					struct bpf_insn *insn_buf,
10618 					struct bpf_prog *prog,
10619 					u32 *target_size)
10620 {
10621 	struct bpf_insn *insn = insn_buf;
10622 
10623 	switch (si->off) {
10624 	case offsetof(struct bpf_sk_lookup, sk):
10625 		*insn++ = BPF_LDX_MEM(BPF_SIZEOF(void *), si->dst_reg, si->src_reg,
10626 				      offsetof(struct bpf_sk_lookup_kern, selected_sk));
10627 		break;
10628 
10629 	case offsetof(struct bpf_sk_lookup, family):
10630 		*insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->src_reg,
10631 				      bpf_target_off(struct bpf_sk_lookup_kern,
10632 						     family, 2, target_size));
10633 		break;
10634 
10635 	case offsetof(struct bpf_sk_lookup, protocol):
10636 		*insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->src_reg,
10637 				      bpf_target_off(struct bpf_sk_lookup_kern,
10638 						     protocol, 2, target_size));
10639 		break;
10640 
10641 	case offsetof(struct bpf_sk_lookup, remote_ip4):
10642 		*insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg,
10643 				      bpf_target_off(struct bpf_sk_lookup_kern,
10644 						     v4.saddr, 4, target_size));
10645 		break;
10646 
10647 	case offsetof(struct bpf_sk_lookup, local_ip4):
10648 		*insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg,
10649 				      bpf_target_off(struct bpf_sk_lookup_kern,
10650 						     v4.daddr, 4, target_size));
10651 		break;
10652 
10653 	case bpf_ctx_range_till(struct bpf_sk_lookup,
10654 				remote_ip6[0], remote_ip6[3]): {
10655 #if IS_ENABLED(CONFIG_IPV6)
10656 		int off = si->off;
10657 
10658 		off -= offsetof(struct bpf_sk_lookup, remote_ip6[0]);
10659 		off += bpf_target_off(struct in6_addr, s6_addr32[0], 4, target_size);
10660 		*insn++ = BPF_LDX_MEM(BPF_SIZEOF(void *), si->dst_reg, si->src_reg,
10661 				      offsetof(struct bpf_sk_lookup_kern, v6.saddr));
10662 		*insn++ = BPF_JMP_IMM(BPF_JEQ, si->dst_reg, 0, 1);
10663 		*insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg, off);
10664 #else
10665 		*insn++ = BPF_MOV32_IMM(si->dst_reg, 0);
10666 #endif
10667 		break;
10668 	}
10669 	case bpf_ctx_range_till(struct bpf_sk_lookup,
10670 				local_ip6[0], local_ip6[3]): {
10671 #if IS_ENABLED(CONFIG_IPV6)
10672 		int off = si->off;
10673 
10674 		off -= offsetof(struct bpf_sk_lookup, local_ip6[0]);
10675 		off += bpf_target_off(struct in6_addr, s6_addr32[0], 4, target_size);
10676 		*insn++ = BPF_LDX_MEM(BPF_SIZEOF(void *), si->dst_reg, si->src_reg,
10677 				      offsetof(struct bpf_sk_lookup_kern, v6.daddr));
10678 		*insn++ = BPF_JMP_IMM(BPF_JEQ, si->dst_reg, 0, 1);
10679 		*insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg, off);
10680 #else
10681 		*insn++ = BPF_MOV32_IMM(si->dst_reg, 0);
10682 #endif
10683 		break;
10684 	}
10685 	case offsetof(struct bpf_sk_lookup, remote_port):
10686 		*insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->src_reg,
10687 				      bpf_target_off(struct bpf_sk_lookup_kern,
10688 						     sport, 2, target_size));
10689 		break;
10690 
10691 	case offsetof(struct bpf_sk_lookup, local_port):
10692 		*insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->src_reg,
10693 				      bpf_target_off(struct bpf_sk_lookup_kern,
10694 						     dport, 2, target_size));
10695 		break;
10696 
10697 	case offsetof(struct bpf_sk_lookup, ingress_ifindex):
10698 		*insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg,
10699 				      bpf_target_off(struct bpf_sk_lookup_kern,
10700 						     ingress_ifindex, 4, target_size));
10701 		break;
10702 	}
10703 
10704 	return insn - insn_buf;
10705 }
10706 
10707 const struct bpf_prog_ops sk_lookup_prog_ops = {
10708 	.test_run = bpf_prog_test_run_sk_lookup,
10709 };
10710 
10711 const struct bpf_verifier_ops sk_lookup_verifier_ops = {
10712 	.get_func_proto		= sk_lookup_func_proto,
10713 	.is_valid_access	= sk_lookup_is_valid_access,
10714 	.convert_ctx_access	= sk_lookup_convert_ctx_access,
10715 };
10716 
10717 #endif /* CONFIG_INET */
10718 
10719 DEFINE_BPF_DISPATCHER(xdp)
10720 
10721 void bpf_prog_change_xdp(struct bpf_prog *prev_prog, struct bpf_prog *prog)
10722 {
10723 	bpf_dispatcher_change_prog(BPF_DISPATCHER_PTR(xdp), prev_prog, prog);
10724 }
10725 
10726 BTF_ID_LIST_GLOBAL(btf_sock_ids, MAX_BTF_SOCK_TYPE)
10727 #define BTF_SOCK_TYPE(name, type) BTF_ID(struct, type)
10728 BTF_SOCK_TYPE_xxx
10729 #undef BTF_SOCK_TYPE
10730 
10731 BPF_CALL_1(bpf_skc_to_tcp6_sock, struct sock *, sk)
10732 {
10733 	/* tcp6_sock type is not generated in dwarf and hence btf,
10734 	 * trigger an explicit type generation here.
10735 	 */
10736 	BTF_TYPE_EMIT(struct tcp6_sock);
10737 	if (sk && sk_fullsock(sk) && sk->sk_protocol == IPPROTO_TCP &&
10738 	    sk->sk_family == AF_INET6)
10739 		return (unsigned long)sk;
10740 
10741 	return (unsigned long)NULL;
10742 }
10743 
10744 const struct bpf_func_proto bpf_skc_to_tcp6_sock_proto = {
10745 	.func			= bpf_skc_to_tcp6_sock,
10746 	.gpl_only		= false,
10747 	.ret_type		= RET_PTR_TO_BTF_ID_OR_NULL,
10748 	.arg1_type		= ARG_PTR_TO_BTF_ID_SOCK_COMMON,
10749 	.ret_btf_id		= &btf_sock_ids[BTF_SOCK_TYPE_TCP6],
10750 };
10751 
10752 BPF_CALL_1(bpf_skc_to_tcp_sock, struct sock *, sk)
10753 {
10754 	if (sk && sk_fullsock(sk) && sk->sk_protocol == IPPROTO_TCP)
10755 		return (unsigned long)sk;
10756 
10757 	return (unsigned long)NULL;
10758 }
10759 
10760 const struct bpf_func_proto bpf_skc_to_tcp_sock_proto = {
10761 	.func			= bpf_skc_to_tcp_sock,
10762 	.gpl_only		= false,
10763 	.ret_type		= RET_PTR_TO_BTF_ID_OR_NULL,
10764 	.arg1_type		= ARG_PTR_TO_BTF_ID_SOCK_COMMON,
10765 	.ret_btf_id		= &btf_sock_ids[BTF_SOCK_TYPE_TCP],
10766 };
10767 
10768 BPF_CALL_1(bpf_skc_to_tcp_timewait_sock, struct sock *, sk)
10769 {
10770 	/* BTF types for tcp_timewait_sock and inet_timewait_sock are not
10771 	 * generated if CONFIG_INET=n. Trigger an explicit generation here.
10772 	 */
10773 	BTF_TYPE_EMIT(struct inet_timewait_sock);
10774 	BTF_TYPE_EMIT(struct tcp_timewait_sock);
10775 
10776 #ifdef CONFIG_INET
10777 	if (sk && sk->sk_prot == &tcp_prot && sk->sk_state == TCP_TIME_WAIT)
10778 		return (unsigned long)sk;
10779 #endif
10780 
10781 #if IS_BUILTIN(CONFIG_IPV6)
10782 	if (sk && sk->sk_prot == &tcpv6_prot && sk->sk_state == TCP_TIME_WAIT)
10783 		return (unsigned long)sk;
10784 #endif
10785 
10786 	return (unsigned long)NULL;
10787 }
10788 
10789 const struct bpf_func_proto bpf_skc_to_tcp_timewait_sock_proto = {
10790 	.func			= bpf_skc_to_tcp_timewait_sock,
10791 	.gpl_only		= false,
10792 	.ret_type		= RET_PTR_TO_BTF_ID_OR_NULL,
10793 	.arg1_type		= ARG_PTR_TO_BTF_ID_SOCK_COMMON,
10794 	.ret_btf_id		= &btf_sock_ids[BTF_SOCK_TYPE_TCP_TW],
10795 };
10796 
10797 BPF_CALL_1(bpf_skc_to_tcp_request_sock, struct sock *, sk)
10798 {
10799 #ifdef CONFIG_INET
10800 	if (sk && sk->sk_prot == &tcp_prot && sk->sk_state == TCP_NEW_SYN_RECV)
10801 		return (unsigned long)sk;
10802 #endif
10803 
10804 #if IS_BUILTIN(CONFIG_IPV6)
10805 	if (sk && sk->sk_prot == &tcpv6_prot && sk->sk_state == TCP_NEW_SYN_RECV)
10806 		return (unsigned long)sk;
10807 #endif
10808 
10809 	return (unsigned long)NULL;
10810 }
10811 
10812 const struct bpf_func_proto bpf_skc_to_tcp_request_sock_proto = {
10813 	.func			= bpf_skc_to_tcp_request_sock,
10814 	.gpl_only		= false,
10815 	.ret_type		= RET_PTR_TO_BTF_ID_OR_NULL,
10816 	.arg1_type		= ARG_PTR_TO_BTF_ID_SOCK_COMMON,
10817 	.ret_btf_id		= &btf_sock_ids[BTF_SOCK_TYPE_TCP_REQ],
10818 };
10819 
10820 BPF_CALL_1(bpf_skc_to_udp6_sock, struct sock *, sk)
10821 {
10822 	/* udp6_sock type is not generated in dwarf and hence btf,
10823 	 * trigger an explicit type generation here.
10824 	 */
10825 	BTF_TYPE_EMIT(struct udp6_sock);
10826 	if (sk && sk_fullsock(sk) && sk->sk_protocol == IPPROTO_UDP &&
10827 	    sk->sk_type == SOCK_DGRAM && sk->sk_family == AF_INET6)
10828 		return (unsigned long)sk;
10829 
10830 	return (unsigned long)NULL;
10831 }
10832 
10833 const struct bpf_func_proto bpf_skc_to_udp6_sock_proto = {
10834 	.func			= bpf_skc_to_udp6_sock,
10835 	.gpl_only		= false,
10836 	.ret_type		= RET_PTR_TO_BTF_ID_OR_NULL,
10837 	.arg1_type		= ARG_PTR_TO_BTF_ID_SOCK_COMMON,
10838 	.ret_btf_id		= &btf_sock_ids[BTF_SOCK_TYPE_UDP6],
10839 };
10840 
10841 BPF_CALL_1(bpf_skc_to_unix_sock, struct sock *, sk)
10842 {
10843 	/* unix_sock type is not generated in dwarf and hence btf,
10844 	 * trigger an explicit type generation here.
10845 	 */
10846 	BTF_TYPE_EMIT(struct unix_sock);
10847 	if (sk && sk_fullsock(sk) && sk->sk_family == AF_UNIX)
10848 		return (unsigned long)sk;
10849 
10850 	return (unsigned long)NULL;
10851 }
10852 
10853 const struct bpf_func_proto bpf_skc_to_unix_sock_proto = {
10854 	.func			= bpf_skc_to_unix_sock,
10855 	.gpl_only		= false,
10856 	.ret_type		= RET_PTR_TO_BTF_ID_OR_NULL,
10857 	.arg1_type		= ARG_PTR_TO_BTF_ID_SOCK_COMMON,
10858 	.ret_btf_id		= &btf_sock_ids[BTF_SOCK_TYPE_UNIX],
10859 };
10860 
10861 BPF_CALL_1(bpf_sock_from_file, struct file *, file)
10862 {
10863 	return (unsigned long)sock_from_file(file);
10864 }
10865 
10866 BTF_ID_LIST(bpf_sock_from_file_btf_ids)
10867 BTF_ID(struct, socket)
10868 BTF_ID(struct, file)
10869 
10870 const struct bpf_func_proto bpf_sock_from_file_proto = {
10871 	.func		= bpf_sock_from_file,
10872 	.gpl_only	= false,
10873 	.ret_type	= RET_PTR_TO_BTF_ID_OR_NULL,
10874 	.ret_btf_id	= &bpf_sock_from_file_btf_ids[0],
10875 	.arg1_type	= ARG_PTR_TO_BTF_ID,
10876 	.arg1_btf_id	= &bpf_sock_from_file_btf_ids[1],
10877 };
10878 
10879 static const struct bpf_func_proto *
10880 bpf_sk_base_func_proto(enum bpf_func_id func_id)
10881 {
10882 	const struct bpf_func_proto *func;
10883 
10884 	switch (func_id) {
10885 	case BPF_FUNC_skc_to_tcp6_sock:
10886 		func = &bpf_skc_to_tcp6_sock_proto;
10887 		break;
10888 	case BPF_FUNC_skc_to_tcp_sock:
10889 		func = &bpf_skc_to_tcp_sock_proto;
10890 		break;
10891 	case BPF_FUNC_skc_to_tcp_timewait_sock:
10892 		func = &bpf_skc_to_tcp_timewait_sock_proto;
10893 		break;
10894 	case BPF_FUNC_skc_to_tcp_request_sock:
10895 		func = &bpf_skc_to_tcp_request_sock_proto;
10896 		break;
10897 	case BPF_FUNC_skc_to_udp6_sock:
10898 		func = &bpf_skc_to_udp6_sock_proto;
10899 		break;
10900 	case BPF_FUNC_skc_to_unix_sock:
10901 		func = &bpf_skc_to_unix_sock_proto;
10902 		break;
10903 	case BPF_FUNC_ktime_get_coarse_ns:
10904 		return &bpf_ktime_get_coarse_ns_proto;
10905 	default:
10906 		return bpf_base_func_proto(func_id);
10907 	}
10908 
10909 	if (!perfmon_capable())
10910 		return NULL;
10911 
10912 	return func;
10913 }
10914