1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* 3 * Linux Socket Filter - Kernel level socket filtering 4 * 5 * Based on the design of the Berkeley Packet Filter. The new 6 * internal format has been designed by PLUMgrid: 7 * 8 * Copyright (c) 2011 - 2014 PLUMgrid, http://plumgrid.com 9 * 10 * Authors: 11 * 12 * Jay Schulist <jschlst@samba.org> 13 * Alexei Starovoitov <ast@plumgrid.com> 14 * Daniel Borkmann <dborkman@redhat.com> 15 * 16 * Andi Kleen - Fix a few bad bugs and races. 17 * Kris Katterjohn - Added many additional checks in bpf_check_classic() 18 */ 19 20 #include <linux/atomic.h> 21 #include <linux/module.h> 22 #include <linux/types.h> 23 #include <linux/mm.h> 24 #include <linux/fcntl.h> 25 #include <linux/socket.h> 26 #include <linux/sock_diag.h> 27 #include <linux/in.h> 28 #include <linux/inet.h> 29 #include <linux/netdevice.h> 30 #include <linux/if_packet.h> 31 #include <linux/if_arp.h> 32 #include <linux/gfp.h> 33 #include <net/inet_common.h> 34 #include <net/ip.h> 35 #include <net/protocol.h> 36 #include <net/netlink.h> 37 #include <linux/skbuff.h> 38 #include <linux/skmsg.h> 39 #include <net/sock.h> 40 #include <net/flow_dissector.h> 41 #include <linux/errno.h> 42 #include <linux/timer.h> 43 #include <linux/uaccess.h> 44 #include <asm/unaligned.h> 45 #include <linux/filter.h> 46 #include <linux/ratelimit.h> 47 #include <linux/seccomp.h> 48 #include <linux/if_vlan.h> 49 #include <linux/bpf.h> 50 #include <linux/btf.h> 51 #include <net/sch_generic.h> 52 #include <net/cls_cgroup.h> 53 #include <net/dst_metadata.h> 54 #include <net/dst.h> 55 #include <net/sock_reuseport.h> 56 #include <net/busy_poll.h> 57 #include <net/tcp.h> 58 #include <net/xfrm.h> 59 #include <net/udp.h> 60 #include <linux/bpf_trace.h> 61 #include <net/xdp_sock.h> 62 #include <linux/inetdevice.h> 63 #include <net/inet_hashtables.h> 64 #include <net/inet6_hashtables.h> 65 #include <net/ip_fib.h> 66 #include <net/nexthop.h> 67 #include <net/flow.h> 68 #include <net/arp.h> 69 #include <net/ipv6.h> 70 #include <net/net_namespace.h> 71 #include <linux/seg6_local.h> 72 #include <net/seg6.h> 73 #include <net/seg6_local.h> 74 #include <net/lwtunnel.h> 75 #include <net/ipv6_stubs.h> 76 #include <net/bpf_sk_storage.h> 77 #include <net/transp_v6.h> 78 #include <linux/btf_ids.h> 79 #include <net/tls.h> 80 #include <net/xdp.h> 81 #include <net/mptcp.h> 82 83 static const struct bpf_func_proto * 84 bpf_sk_base_func_proto(enum bpf_func_id func_id); 85 86 int copy_bpf_fprog_from_user(struct sock_fprog *dst, sockptr_t src, int len) 87 { 88 if (in_compat_syscall()) { 89 struct compat_sock_fprog f32; 90 91 if (len != sizeof(f32)) 92 return -EINVAL; 93 if (copy_from_sockptr(&f32, src, sizeof(f32))) 94 return -EFAULT; 95 memset(dst, 0, sizeof(*dst)); 96 dst->len = f32.len; 97 dst->filter = compat_ptr(f32.filter); 98 } else { 99 if (len != sizeof(*dst)) 100 return -EINVAL; 101 if (copy_from_sockptr(dst, src, sizeof(*dst))) 102 return -EFAULT; 103 } 104 105 return 0; 106 } 107 EXPORT_SYMBOL_GPL(copy_bpf_fprog_from_user); 108 109 /** 110 * sk_filter_trim_cap - run a packet through a socket filter 111 * @sk: sock associated with &sk_buff 112 * @skb: buffer to filter 113 * @cap: limit on how short the eBPF program may trim the packet 114 * 115 * Run the eBPF program and then cut skb->data to correct size returned by 116 * the program. If pkt_len is 0 we toss packet. If skb->len is smaller 117 * than pkt_len we keep whole skb->data. This is the socket level 118 * wrapper to bpf_prog_run. It returns 0 if the packet should 119 * be accepted or -EPERM if the packet should be tossed. 120 * 121 */ 122 int sk_filter_trim_cap(struct sock *sk, struct sk_buff *skb, unsigned int cap) 123 { 124 int err; 125 struct sk_filter *filter; 126 127 /* 128 * If the skb was allocated from pfmemalloc reserves, only 129 * allow SOCK_MEMALLOC sockets to use it as this socket is 130 * helping free memory 131 */ 132 if (skb_pfmemalloc(skb) && !sock_flag(sk, SOCK_MEMALLOC)) { 133 NET_INC_STATS(sock_net(sk), LINUX_MIB_PFMEMALLOCDROP); 134 return -ENOMEM; 135 } 136 err = BPF_CGROUP_RUN_PROG_INET_INGRESS(sk, skb); 137 if (err) 138 return err; 139 140 err = security_sock_rcv_skb(sk, skb); 141 if (err) 142 return err; 143 144 rcu_read_lock(); 145 filter = rcu_dereference(sk->sk_filter); 146 if (filter) { 147 struct sock *save_sk = skb->sk; 148 unsigned int pkt_len; 149 150 skb->sk = sk; 151 pkt_len = bpf_prog_run_save_cb(filter->prog, skb); 152 skb->sk = save_sk; 153 err = pkt_len ? pskb_trim(skb, max(cap, pkt_len)) : -EPERM; 154 } 155 rcu_read_unlock(); 156 157 return err; 158 } 159 EXPORT_SYMBOL(sk_filter_trim_cap); 160 161 BPF_CALL_1(bpf_skb_get_pay_offset, struct sk_buff *, skb) 162 { 163 return skb_get_poff(skb); 164 } 165 166 BPF_CALL_3(bpf_skb_get_nlattr, struct sk_buff *, skb, u32, a, u32, x) 167 { 168 struct nlattr *nla; 169 170 if (skb_is_nonlinear(skb)) 171 return 0; 172 173 if (skb->len < sizeof(struct nlattr)) 174 return 0; 175 176 if (a > skb->len - sizeof(struct nlattr)) 177 return 0; 178 179 nla = nla_find((struct nlattr *) &skb->data[a], skb->len - a, x); 180 if (nla) 181 return (void *) nla - (void *) skb->data; 182 183 return 0; 184 } 185 186 BPF_CALL_3(bpf_skb_get_nlattr_nest, struct sk_buff *, skb, u32, a, u32, x) 187 { 188 struct nlattr *nla; 189 190 if (skb_is_nonlinear(skb)) 191 return 0; 192 193 if (skb->len < sizeof(struct nlattr)) 194 return 0; 195 196 if (a > skb->len - sizeof(struct nlattr)) 197 return 0; 198 199 nla = (struct nlattr *) &skb->data[a]; 200 if (nla->nla_len > skb->len - a) 201 return 0; 202 203 nla = nla_find_nested(nla, x); 204 if (nla) 205 return (void *) nla - (void *) skb->data; 206 207 return 0; 208 } 209 210 BPF_CALL_4(bpf_skb_load_helper_8, const struct sk_buff *, skb, const void *, 211 data, int, headlen, int, offset) 212 { 213 u8 tmp, *ptr; 214 const int len = sizeof(tmp); 215 216 if (offset >= 0) { 217 if (headlen - offset >= len) 218 return *(u8 *)(data + offset); 219 if (!skb_copy_bits(skb, offset, &tmp, sizeof(tmp))) 220 return tmp; 221 } else { 222 ptr = bpf_internal_load_pointer_neg_helper(skb, offset, len); 223 if (likely(ptr)) 224 return *(u8 *)ptr; 225 } 226 227 return -EFAULT; 228 } 229 230 BPF_CALL_2(bpf_skb_load_helper_8_no_cache, const struct sk_buff *, skb, 231 int, offset) 232 { 233 return ____bpf_skb_load_helper_8(skb, skb->data, skb->len - skb->data_len, 234 offset); 235 } 236 237 BPF_CALL_4(bpf_skb_load_helper_16, const struct sk_buff *, skb, const void *, 238 data, int, headlen, int, offset) 239 { 240 __be16 tmp, *ptr; 241 const int len = sizeof(tmp); 242 243 if (offset >= 0) { 244 if (headlen - offset >= len) 245 return get_unaligned_be16(data + offset); 246 if (!skb_copy_bits(skb, offset, &tmp, sizeof(tmp))) 247 return be16_to_cpu(tmp); 248 } else { 249 ptr = bpf_internal_load_pointer_neg_helper(skb, offset, len); 250 if (likely(ptr)) 251 return get_unaligned_be16(ptr); 252 } 253 254 return -EFAULT; 255 } 256 257 BPF_CALL_2(bpf_skb_load_helper_16_no_cache, const struct sk_buff *, skb, 258 int, offset) 259 { 260 return ____bpf_skb_load_helper_16(skb, skb->data, skb->len - skb->data_len, 261 offset); 262 } 263 264 BPF_CALL_4(bpf_skb_load_helper_32, const struct sk_buff *, skb, const void *, 265 data, int, headlen, int, offset) 266 { 267 __be32 tmp, *ptr; 268 const int len = sizeof(tmp); 269 270 if (likely(offset >= 0)) { 271 if (headlen - offset >= len) 272 return get_unaligned_be32(data + offset); 273 if (!skb_copy_bits(skb, offset, &tmp, sizeof(tmp))) 274 return be32_to_cpu(tmp); 275 } else { 276 ptr = bpf_internal_load_pointer_neg_helper(skb, offset, len); 277 if (likely(ptr)) 278 return get_unaligned_be32(ptr); 279 } 280 281 return -EFAULT; 282 } 283 284 BPF_CALL_2(bpf_skb_load_helper_32_no_cache, const struct sk_buff *, skb, 285 int, offset) 286 { 287 return ____bpf_skb_load_helper_32(skb, skb->data, skb->len - skb->data_len, 288 offset); 289 } 290 291 static u32 convert_skb_access(int skb_field, int dst_reg, int src_reg, 292 struct bpf_insn *insn_buf) 293 { 294 struct bpf_insn *insn = insn_buf; 295 296 switch (skb_field) { 297 case SKF_AD_MARK: 298 BUILD_BUG_ON(sizeof_field(struct sk_buff, mark) != 4); 299 300 *insn++ = BPF_LDX_MEM(BPF_W, dst_reg, src_reg, 301 offsetof(struct sk_buff, mark)); 302 break; 303 304 case SKF_AD_PKTTYPE: 305 *insn++ = BPF_LDX_MEM(BPF_B, dst_reg, src_reg, PKT_TYPE_OFFSET); 306 *insn++ = BPF_ALU32_IMM(BPF_AND, dst_reg, PKT_TYPE_MAX); 307 #ifdef __BIG_ENDIAN_BITFIELD 308 *insn++ = BPF_ALU32_IMM(BPF_RSH, dst_reg, 5); 309 #endif 310 break; 311 312 case SKF_AD_QUEUE: 313 BUILD_BUG_ON(sizeof_field(struct sk_buff, queue_mapping) != 2); 314 315 *insn++ = BPF_LDX_MEM(BPF_H, dst_reg, src_reg, 316 offsetof(struct sk_buff, queue_mapping)); 317 break; 318 319 case SKF_AD_VLAN_TAG: 320 BUILD_BUG_ON(sizeof_field(struct sk_buff, vlan_tci) != 2); 321 322 /* dst_reg = *(u16 *) (src_reg + offsetof(vlan_tci)) */ 323 *insn++ = BPF_LDX_MEM(BPF_H, dst_reg, src_reg, 324 offsetof(struct sk_buff, vlan_tci)); 325 break; 326 case SKF_AD_VLAN_TAG_PRESENT: 327 *insn++ = BPF_LDX_MEM(BPF_B, dst_reg, src_reg, PKT_VLAN_PRESENT_OFFSET); 328 if (PKT_VLAN_PRESENT_BIT) 329 *insn++ = BPF_ALU32_IMM(BPF_RSH, dst_reg, PKT_VLAN_PRESENT_BIT); 330 if (PKT_VLAN_PRESENT_BIT < 7) 331 *insn++ = BPF_ALU32_IMM(BPF_AND, dst_reg, 1); 332 break; 333 } 334 335 return insn - insn_buf; 336 } 337 338 static bool convert_bpf_extensions(struct sock_filter *fp, 339 struct bpf_insn **insnp) 340 { 341 struct bpf_insn *insn = *insnp; 342 u32 cnt; 343 344 switch (fp->k) { 345 case SKF_AD_OFF + SKF_AD_PROTOCOL: 346 BUILD_BUG_ON(sizeof_field(struct sk_buff, protocol) != 2); 347 348 /* A = *(u16 *) (CTX + offsetof(protocol)) */ 349 *insn++ = BPF_LDX_MEM(BPF_H, BPF_REG_A, BPF_REG_CTX, 350 offsetof(struct sk_buff, protocol)); 351 /* A = ntohs(A) [emitting a nop or swap16] */ 352 *insn = BPF_ENDIAN(BPF_FROM_BE, BPF_REG_A, 16); 353 break; 354 355 case SKF_AD_OFF + SKF_AD_PKTTYPE: 356 cnt = convert_skb_access(SKF_AD_PKTTYPE, BPF_REG_A, BPF_REG_CTX, insn); 357 insn += cnt - 1; 358 break; 359 360 case SKF_AD_OFF + SKF_AD_IFINDEX: 361 case SKF_AD_OFF + SKF_AD_HATYPE: 362 BUILD_BUG_ON(sizeof_field(struct net_device, ifindex) != 4); 363 BUILD_BUG_ON(sizeof_field(struct net_device, type) != 2); 364 365 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, dev), 366 BPF_REG_TMP, BPF_REG_CTX, 367 offsetof(struct sk_buff, dev)); 368 /* if (tmp != 0) goto pc + 1 */ 369 *insn++ = BPF_JMP_IMM(BPF_JNE, BPF_REG_TMP, 0, 1); 370 *insn++ = BPF_EXIT_INSN(); 371 if (fp->k == SKF_AD_OFF + SKF_AD_IFINDEX) 372 *insn = BPF_LDX_MEM(BPF_W, BPF_REG_A, BPF_REG_TMP, 373 offsetof(struct net_device, ifindex)); 374 else 375 *insn = BPF_LDX_MEM(BPF_H, BPF_REG_A, BPF_REG_TMP, 376 offsetof(struct net_device, type)); 377 break; 378 379 case SKF_AD_OFF + SKF_AD_MARK: 380 cnt = convert_skb_access(SKF_AD_MARK, BPF_REG_A, BPF_REG_CTX, insn); 381 insn += cnt - 1; 382 break; 383 384 case SKF_AD_OFF + SKF_AD_RXHASH: 385 BUILD_BUG_ON(sizeof_field(struct sk_buff, hash) != 4); 386 387 *insn = BPF_LDX_MEM(BPF_W, BPF_REG_A, BPF_REG_CTX, 388 offsetof(struct sk_buff, hash)); 389 break; 390 391 case SKF_AD_OFF + SKF_AD_QUEUE: 392 cnt = convert_skb_access(SKF_AD_QUEUE, BPF_REG_A, BPF_REG_CTX, insn); 393 insn += cnt - 1; 394 break; 395 396 case SKF_AD_OFF + SKF_AD_VLAN_TAG: 397 cnt = convert_skb_access(SKF_AD_VLAN_TAG, 398 BPF_REG_A, BPF_REG_CTX, insn); 399 insn += cnt - 1; 400 break; 401 402 case SKF_AD_OFF + SKF_AD_VLAN_TAG_PRESENT: 403 cnt = convert_skb_access(SKF_AD_VLAN_TAG_PRESENT, 404 BPF_REG_A, BPF_REG_CTX, insn); 405 insn += cnt - 1; 406 break; 407 408 case SKF_AD_OFF + SKF_AD_VLAN_TPID: 409 BUILD_BUG_ON(sizeof_field(struct sk_buff, vlan_proto) != 2); 410 411 /* A = *(u16 *) (CTX + offsetof(vlan_proto)) */ 412 *insn++ = BPF_LDX_MEM(BPF_H, BPF_REG_A, BPF_REG_CTX, 413 offsetof(struct sk_buff, vlan_proto)); 414 /* A = ntohs(A) [emitting a nop or swap16] */ 415 *insn = BPF_ENDIAN(BPF_FROM_BE, BPF_REG_A, 16); 416 break; 417 418 case SKF_AD_OFF + SKF_AD_PAY_OFFSET: 419 case SKF_AD_OFF + SKF_AD_NLATTR: 420 case SKF_AD_OFF + SKF_AD_NLATTR_NEST: 421 case SKF_AD_OFF + SKF_AD_CPU: 422 case SKF_AD_OFF + SKF_AD_RANDOM: 423 /* arg1 = CTX */ 424 *insn++ = BPF_MOV64_REG(BPF_REG_ARG1, BPF_REG_CTX); 425 /* arg2 = A */ 426 *insn++ = BPF_MOV64_REG(BPF_REG_ARG2, BPF_REG_A); 427 /* arg3 = X */ 428 *insn++ = BPF_MOV64_REG(BPF_REG_ARG3, BPF_REG_X); 429 /* Emit call(arg1=CTX, arg2=A, arg3=X) */ 430 switch (fp->k) { 431 case SKF_AD_OFF + SKF_AD_PAY_OFFSET: 432 *insn = BPF_EMIT_CALL(bpf_skb_get_pay_offset); 433 break; 434 case SKF_AD_OFF + SKF_AD_NLATTR: 435 *insn = BPF_EMIT_CALL(bpf_skb_get_nlattr); 436 break; 437 case SKF_AD_OFF + SKF_AD_NLATTR_NEST: 438 *insn = BPF_EMIT_CALL(bpf_skb_get_nlattr_nest); 439 break; 440 case SKF_AD_OFF + SKF_AD_CPU: 441 *insn = BPF_EMIT_CALL(bpf_get_raw_cpu_id); 442 break; 443 case SKF_AD_OFF + SKF_AD_RANDOM: 444 *insn = BPF_EMIT_CALL(bpf_user_rnd_u32); 445 bpf_user_rnd_init_once(); 446 break; 447 } 448 break; 449 450 case SKF_AD_OFF + SKF_AD_ALU_XOR_X: 451 /* A ^= X */ 452 *insn = BPF_ALU32_REG(BPF_XOR, BPF_REG_A, BPF_REG_X); 453 break; 454 455 default: 456 /* This is just a dummy call to avoid letting the compiler 457 * evict __bpf_call_base() as an optimization. Placed here 458 * where no-one bothers. 459 */ 460 BUG_ON(__bpf_call_base(0, 0, 0, 0, 0) != 0); 461 return false; 462 } 463 464 *insnp = insn; 465 return true; 466 } 467 468 static bool convert_bpf_ld_abs(struct sock_filter *fp, struct bpf_insn **insnp) 469 { 470 const bool unaligned_ok = IS_BUILTIN(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS); 471 int size = bpf_size_to_bytes(BPF_SIZE(fp->code)); 472 bool endian = BPF_SIZE(fp->code) == BPF_H || 473 BPF_SIZE(fp->code) == BPF_W; 474 bool indirect = BPF_MODE(fp->code) == BPF_IND; 475 const int ip_align = NET_IP_ALIGN; 476 struct bpf_insn *insn = *insnp; 477 int offset = fp->k; 478 479 if (!indirect && 480 ((unaligned_ok && offset >= 0) || 481 (!unaligned_ok && offset >= 0 && 482 offset + ip_align >= 0 && 483 offset + ip_align % size == 0))) { 484 bool ldx_off_ok = offset <= S16_MAX; 485 486 *insn++ = BPF_MOV64_REG(BPF_REG_TMP, BPF_REG_H); 487 if (offset) 488 *insn++ = BPF_ALU64_IMM(BPF_SUB, BPF_REG_TMP, offset); 489 *insn++ = BPF_JMP_IMM(BPF_JSLT, BPF_REG_TMP, 490 size, 2 + endian + (!ldx_off_ok * 2)); 491 if (ldx_off_ok) { 492 *insn++ = BPF_LDX_MEM(BPF_SIZE(fp->code), BPF_REG_A, 493 BPF_REG_D, offset); 494 } else { 495 *insn++ = BPF_MOV64_REG(BPF_REG_TMP, BPF_REG_D); 496 *insn++ = BPF_ALU64_IMM(BPF_ADD, BPF_REG_TMP, offset); 497 *insn++ = BPF_LDX_MEM(BPF_SIZE(fp->code), BPF_REG_A, 498 BPF_REG_TMP, 0); 499 } 500 if (endian) 501 *insn++ = BPF_ENDIAN(BPF_FROM_BE, BPF_REG_A, size * 8); 502 *insn++ = BPF_JMP_A(8); 503 } 504 505 *insn++ = BPF_MOV64_REG(BPF_REG_ARG1, BPF_REG_CTX); 506 *insn++ = BPF_MOV64_REG(BPF_REG_ARG2, BPF_REG_D); 507 *insn++ = BPF_MOV64_REG(BPF_REG_ARG3, BPF_REG_H); 508 if (!indirect) { 509 *insn++ = BPF_MOV64_IMM(BPF_REG_ARG4, offset); 510 } else { 511 *insn++ = BPF_MOV64_REG(BPF_REG_ARG4, BPF_REG_X); 512 if (fp->k) 513 *insn++ = BPF_ALU64_IMM(BPF_ADD, BPF_REG_ARG4, offset); 514 } 515 516 switch (BPF_SIZE(fp->code)) { 517 case BPF_B: 518 *insn++ = BPF_EMIT_CALL(bpf_skb_load_helper_8); 519 break; 520 case BPF_H: 521 *insn++ = BPF_EMIT_CALL(bpf_skb_load_helper_16); 522 break; 523 case BPF_W: 524 *insn++ = BPF_EMIT_CALL(bpf_skb_load_helper_32); 525 break; 526 default: 527 return false; 528 } 529 530 *insn++ = BPF_JMP_IMM(BPF_JSGE, BPF_REG_A, 0, 2); 531 *insn++ = BPF_ALU32_REG(BPF_XOR, BPF_REG_A, BPF_REG_A); 532 *insn = BPF_EXIT_INSN(); 533 534 *insnp = insn; 535 return true; 536 } 537 538 /** 539 * bpf_convert_filter - convert filter program 540 * @prog: the user passed filter program 541 * @len: the length of the user passed filter program 542 * @new_prog: allocated 'struct bpf_prog' or NULL 543 * @new_len: pointer to store length of converted program 544 * @seen_ld_abs: bool whether we've seen ld_abs/ind 545 * 546 * Remap 'sock_filter' style classic BPF (cBPF) instruction set to 'bpf_insn' 547 * style extended BPF (eBPF). 548 * Conversion workflow: 549 * 550 * 1) First pass for calculating the new program length: 551 * bpf_convert_filter(old_prog, old_len, NULL, &new_len, &seen_ld_abs) 552 * 553 * 2) 2nd pass to remap in two passes: 1st pass finds new 554 * jump offsets, 2nd pass remapping: 555 * bpf_convert_filter(old_prog, old_len, new_prog, &new_len, &seen_ld_abs) 556 */ 557 static int bpf_convert_filter(struct sock_filter *prog, int len, 558 struct bpf_prog *new_prog, int *new_len, 559 bool *seen_ld_abs) 560 { 561 int new_flen = 0, pass = 0, target, i, stack_off; 562 struct bpf_insn *new_insn, *first_insn = NULL; 563 struct sock_filter *fp; 564 int *addrs = NULL; 565 u8 bpf_src; 566 567 BUILD_BUG_ON(BPF_MEMWORDS * sizeof(u32) > MAX_BPF_STACK); 568 BUILD_BUG_ON(BPF_REG_FP + 1 != MAX_BPF_REG); 569 570 if (len <= 0 || len > BPF_MAXINSNS) 571 return -EINVAL; 572 573 if (new_prog) { 574 first_insn = new_prog->insnsi; 575 addrs = kcalloc(len, sizeof(*addrs), 576 GFP_KERNEL | __GFP_NOWARN); 577 if (!addrs) 578 return -ENOMEM; 579 } 580 581 do_pass: 582 new_insn = first_insn; 583 fp = prog; 584 585 /* Classic BPF related prologue emission. */ 586 if (new_prog) { 587 /* Classic BPF expects A and X to be reset first. These need 588 * to be guaranteed to be the first two instructions. 589 */ 590 *new_insn++ = BPF_ALU32_REG(BPF_XOR, BPF_REG_A, BPF_REG_A); 591 *new_insn++ = BPF_ALU32_REG(BPF_XOR, BPF_REG_X, BPF_REG_X); 592 593 /* All programs must keep CTX in callee saved BPF_REG_CTX. 594 * In eBPF case it's done by the compiler, here we need to 595 * do this ourself. Initial CTX is present in BPF_REG_ARG1. 596 */ 597 *new_insn++ = BPF_MOV64_REG(BPF_REG_CTX, BPF_REG_ARG1); 598 if (*seen_ld_abs) { 599 /* For packet access in classic BPF, cache skb->data 600 * in callee-saved BPF R8 and skb->len - skb->data_len 601 * (headlen) in BPF R9. Since classic BPF is read-only 602 * on CTX, we only need to cache it once. 603 */ 604 *new_insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, data), 605 BPF_REG_D, BPF_REG_CTX, 606 offsetof(struct sk_buff, data)); 607 *new_insn++ = BPF_LDX_MEM(BPF_W, BPF_REG_H, BPF_REG_CTX, 608 offsetof(struct sk_buff, len)); 609 *new_insn++ = BPF_LDX_MEM(BPF_W, BPF_REG_TMP, BPF_REG_CTX, 610 offsetof(struct sk_buff, data_len)); 611 *new_insn++ = BPF_ALU32_REG(BPF_SUB, BPF_REG_H, BPF_REG_TMP); 612 } 613 } else { 614 new_insn += 3; 615 } 616 617 for (i = 0; i < len; fp++, i++) { 618 struct bpf_insn tmp_insns[32] = { }; 619 struct bpf_insn *insn = tmp_insns; 620 621 if (addrs) 622 addrs[i] = new_insn - first_insn; 623 624 switch (fp->code) { 625 /* All arithmetic insns and skb loads map as-is. */ 626 case BPF_ALU | BPF_ADD | BPF_X: 627 case BPF_ALU | BPF_ADD | BPF_K: 628 case BPF_ALU | BPF_SUB | BPF_X: 629 case BPF_ALU | BPF_SUB | BPF_K: 630 case BPF_ALU | BPF_AND | BPF_X: 631 case BPF_ALU | BPF_AND | BPF_K: 632 case BPF_ALU | BPF_OR | BPF_X: 633 case BPF_ALU | BPF_OR | BPF_K: 634 case BPF_ALU | BPF_LSH | BPF_X: 635 case BPF_ALU | BPF_LSH | BPF_K: 636 case BPF_ALU | BPF_RSH | BPF_X: 637 case BPF_ALU | BPF_RSH | BPF_K: 638 case BPF_ALU | BPF_XOR | BPF_X: 639 case BPF_ALU | BPF_XOR | BPF_K: 640 case BPF_ALU | BPF_MUL | BPF_X: 641 case BPF_ALU | BPF_MUL | BPF_K: 642 case BPF_ALU | BPF_DIV | BPF_X: 643 case BPF_ALU | BPF_DIV | BPF_K: 644 case BPF_ALU | BPF_MOD | BPF_X: 645 case BPF_ALU | BPF_MOD | BPF_K: 646 case BPF_ALU | BPF_NEG: 647 case BPF_LD | BPF_ABS | BPF_W: 648 case BPF_LD | BPF_ABS | BPF_H: 649 case BPF_LD | BPF_ABS | BPF_B: 650 case BPF_LD | BPF_IND | BPF_W: 651 case BPF_LD | BPF_IND | BPF_H: 652 case BPF_LD | BPF_IND | BPF_B: 653 /* Check for overloaded BPF extension and 654 * directly convert it if found, otherwise 655 * just move on with mapping. 656 */ 657 if (BPF_CLASS(fp->code) == BPF_LD && 658 BPF_MODE(fp->code) == BPF_ABS && 659 convert_bpf_extensions(fp, &insn)) 660 break; 661 if (BPF_CLASS(fp->code) == BPF_LD && 662 convert_bpf_ld_abs(fp, &insn)) { 663 *seen_ld_abs = true; 664 break; 665 } 666 667 if (fp->code == (BPF_ALU | BPF_DIV | BPF_X) || 668 fp->code == (BPF_ALU | BPF_MOD | BPF_X)) { 669 *insn++ = BPF_MOV32_REG(BPF_REG_X, BPF_REG_X); 670 /* Error with exception code on div/mod by 0. 671 * For cBPF programs, this was always return 0. 672 */ 673 *insn++ = BPF_JMP_IMM(BPF_JNE, BPF_REG_X, 0, 2); 674 *insn++ = BPF_ALU32_REG(BPF_XOR, BPF_REG_A, BPF_REG_A); 675 *insn++ = BPF_EXIT_INSN(); 676 } 677 678 *insn = BPF_RAW_INSN(fp->code, BPF_REG_A, BPF_REG_X, 0, fp->k); 679 break; 680 681 /* Jump transformation cannot use BPF block macros 682 * everywhere as offset calculation and target updates 683 * require a bit more work than the rest, i.e. jump 684 * opcodes map as-is, but offsets need adjustment. 685 */ 686 687 #define BPF_EMIT_JMP \ 688 do { \ 689 const s32 off_min = S16_MIN, off_max = S16_MAX; \ 690 s32 off; \ 691 \ 692 if (target >= len || target < 0) \ 693 goto err; \ 694 off = addrs ? addrs[target] - addrs[i] - 1 : 0; \ 695 /* Adjust pc relative offset for 2nd or 3rd insn. */ \ 696 off -= insn - tmp_insns; \ 697 /* Reject anything not fitting into insn->off. */ \ 698 if (off < off_min || off > off_max) \ 699 goto err; \ 700 insn->off = off; \ 701 } while (0) 702 703 case BPF_JMP | BPF_JA: 704 target = i + fp->k + 1; 705 insn->code = fp->code; 706 BPF_EMIT_JMP; 707 break; 708 709 case BPF_JMP | BPF_JEQ | BPF_K: 710 case BPF_JMP | BPF_JEQ | BPF_X: 711 case BPF_JMP | BPF_JSET | BPF_K: 712 case BPF_JMP | BPF_JSET | BPF_X: 713 case BPF_JMP | BPF_JGT | BPF_K: 714 case BPF_JMP | BPF_JGT | BPF_X: 715 case BPF_JMP | BPF_JGE | BPF_K: 716 case BPF_JMP | BPF_JGE | BPF_X: 717 if (BPF_SRC(fp->code) == BPF_K && (int) fp->k < 0) { 718 /* BPF immediates are signed, zero extend 719 * immediate into tmp register and use it 720 * in compare insn. 721 */ 722 *insn++ = BPF_MOV32_IMM(BPF_REG_TMP, fp->k); 723 724 insn->dst_reg = BPF_REG_A; 725 insn->src_reg = BPF_REG_TMP; 726 bpf_src = BPF_X; 727 } else { 728 insn->dst_reg = BPF_REG_A; 729 insn->imm = fp->k; 730 bpf_src = BPF_SRC(fp->code); 731 insn->src_reg = bpf_src == BPF_X ? BPF_REG_X : 0; 732 } 733 734 /* Common case where 'jump_false' is next insn. */ 735 if (fp->jf == 0) { 736 insn->code = BPF_JMP | BPF_OP(fp->code) | bpf_src; 737 target = i + fp->jt + 1; 738 BPF_EMIT_JMP; 739 break; 740 } 741 742 /* Convert some jumps when 'jump_true' is next insn. */ 743 if (fp->jt == 0) { 744 switch (BPF_OP(fp->code)) { 745 case BPF_JEQ: 746 insn->code = BPF_JMP | BPF_JNE | bpf_src; 747 break; 748 case BPF_JGT: 749 insn->code = BPF_JMP | BPF_JLE | bpf_src; 750 break; 751 case BPF_JGE: 752 insn->code = BPF_JMP | BPF_JLT | bpf_src; 753 break; 754 default: 755 goto jmp_rest; 756 } 757 758 target = i + fp->jf + 1; 759 BPF_EMIT_JMP; 760 break; 761 } 762 jmp_rest: 763 /* Other jumps are mapped into two insns: Jxx and JA. */ 764 target = i + fp->jt + 1; 765 insn->code = BPF_JMP | BPF_OP(fp->code) | bpf_src; 766 BPF_EMIT_JMP; 767 insn++; 768 769 insn->code = BPF_JMP | BPF_JA; 770 target = i + fp->jf + 1; 771 BPF_EMIT_JMP; 772 break; 773 774 /* ldxb 4 * ([14] & 0xf) is remaped into 6 insns. */ 775 case BPF_LDX | BPF_MSH | BPF_B: { 776 struct sock_filter tmp = { 777 .code = BPF_LD | BPF_ABS | BPF_B, 778 .k = fp->k, 779 }; 780 781 *seen_ld_abs = true; 782 783 /* X = A */ 784 *insn++ = BPF_MOV64_REG(BPF_REG_X, BPF_REG_A); 785 /* A = BPF_R0 = *(u8 *) (skb->data + K) */ 786 convert_bpf_ld_abs(&tmp, &insn); 787 insn++; 788 /* A &= 0xf */ 789 *insn++ = BPF_ALU32_IMM(BPF_AND, BPF_REG_A, 0xf); 790 /* A <<= 2 */ 791 *insn++ = BPF_ALU32_IMM(BPF_LSH, BPF_REG_A, 2); 792 /* tmp = X */ 793 *insn++ = BPF_MOV64_REG(BPF_REG_TMP, BPF_REG_X); 794 /* X = A */ 795 *insn++ = BPF_MOV64_REG(BPF_REG_X, BPF_REG_A); 796 /* A = tmp */ 797 *insn = BPF_MOV64_REG(BPF_REG_A, BPF_REG_TMP); 798 break; 799 } 800 /* RET_K is remaped into 2 insns. RET_A case doesn't need an 801 * extra mov as BPF_REG_0 is already mapped into BPF_REG_A. 802 */ 803 case BPF_RET | BPF_A: 804 case BPF_RET | BPF_K: 805 if (BPF_RVAL(fp->code) == BPF_K) 806 *insn++ = BPF_MOV32_RAW(BPF_K, BPF_REG_0, 807 0, fp->k); 808 *insn = BPF_EXIT_INSN(); 809 break; 810 811 /* Store to stack. */ 812 case BPF_ST: 813 case BPF_STX: 814 stack_off = fp->k * 4 + 4; 815 *insn = BPF_STX_MEM(BPF_W, BPF_REG_FP, BPF_CLASS(fp->code) == 816 BPF_ST ? BPF_REG_A : BPF_REG_X, 817 -stack_off); 818 /* check_load_and_stores() verifies that classic BPF can 819 * load from stack only after write, so tracking 820 * stack_depth for ST|STX insns is enough 821 */ 822 if (new_prog && new_prog->aux->stack_depth < stack_off) 823 new_prog->aux->stack_depth = stack_off; 824 break; 825 826 /* Load from stack. */ 827 case BPF_LD | BPF_MEM: 828 case BPF_LDX | BPF_MEM: 829 stack_off = fp->k * 4 + 4; 830 *insn = BPF_LDX_MEM(BPF_W, BPF_CLASS(fp->code) == BPF_LD ? 831 BPF_REG_A : BPF_REG_X, BPF_REG_FP, 832 -stack_off); 833 break; 834 835 /* A = K or X = K */ 836 case BPF_LD | BPF_IMM: 837 case BPF_LDX | BPF_IMM: 838 *insn = BPF_MOV32_IMM(BPF_CLASS(fp->code) == BPF_LD ? 839 BPF_REG_A : BPF_REG_X, fp->k); 840 break; 841 842 /* X = A */ 843 case BPF_MISC | BPF_TAX: 844 *insn = BPF_MOV64_REG(BPF_REG_X, BPF_REG_A); 845 break; 846 847 /* A = X */ 848 case BPF_MISC | BPF_TXA: 849 *insn = BPF_MOV64_REG(BPF_REG_A, BPF_REG_X); 850 break; 851 852 /* A = skb->len or X = skb->len */ 853 case BPF_LD | BPF_W | BPF_LEN: 854 case BPF_LDX | BPF_W | BPF_LEN: 855 *insn = BPF_LDX_MEM(BPF_W, BPF_CLASS(fp->code) == BPF_LD ? 856 BPF_REG_A : BPF_REG_X, BPF_REG_CTX, 857 offsetof(struct sk_buff, len)); 858 break; 859 860 /* Access seccomp_data fields. */ 861 case BPF_LDX | BPF_ABS | BPF_W: 862 /* A = *(u32 *) (ctx + K) */ 863 *insn = BPF_LDX_MEM(BPF_W, BPF_REG_A, BPF_REG_CTX, fp->k); 864 break; 865 866 /* Unknown instruction. */ 867 default: 868 goto err; 869 } 870 871 insn++; 872 if (new_prog) 873 memcpy(new_insn, tmp_insns, 874 sizeof(*insn) * (insn - tmp_insns)); 875 new_insn += insn - tmp_insns; 876 } 877 878 if (!new_prog) { 879 /* Only calculating new length. */ 880 *new_len = new_insn - first_insn; 881 if (*seen_ld_abs) 882 *new_len += 4; /* Prologue bits. */ 883 return 0; 884 } 885 886 pass++; 887 if (new_flen != new_insn - first_insn) { 888 new_flen = new_insn - first_insn; 889 if (pass > 2) 890 goto err; 891 goto do_pass; 892 } 893 894 kfree(addrs); 895 BUG_ON(*new_len != new_flen); 896 return 0; 897 err: 898 kfree(addrs); 899 return -EINVAL; 900 } 901 902 /* Security: 903 * 904 * As we dont want to clear mem[] array for each packet going through 905 * __bpf_prog_run(), we check that filter loaded by user never try to read 906 * a cell if not previously written, and we check all branches to be sure 907 * a malicious user doesn't try to abuse us. 908 */ 909 static int check_load_and_stores(const struct sock_filter *filter, int flen) 910 { 911 u16 *masks, memvalid = 0; /* One bit per cell, 16 cells */ 912 int pc, ret = 0; 913 914 BUILD_BUG_ON(BPF_MEMWORDS > 16); 915 916 masks = kmalloc_array(flen, sizeof(*masks), GFP_KERNEL); 917 if (!masks) 918 return -ENOMEM; 919 920 memset(masks, 0xff, flen * sizeof(*masks)); 921 922 for (pc = 0; pc < flen; pc++) { 923 memvalid &= masks[pc]; 924 925 switch (filter[pc].code) { 926 case BPF_ST: 927 case BPF_STX: 928 memvalid |= (1 << filter[pc].k); 929 break; 930 case BPF_LD | BPF_MEM: 931 case BPF_LDX | BPF_MEM: 932 if (!(memvalid & (1 << filter[pc].k))) { 933 ret = -EINVAL; 934 goto error; 935 } 936 break; 937 case BPF_JMP | BPF_JA: 938 /* A jump must set masks on target */ 939 masks[pc + 1 + filter[pc].k] &= memvalid; 940 memvalid = ~0; 941 break; 942 case BPF_JMP | BPF_JEQ | BPF_K: 943 case BPF_JMP | BPF_JEQ | BPF_X: 944 case BPF_JMP | BPF_JGE | BPF_K: 945 case BPF_JMP | BPF_JGE | BPF_X: 946 case BPF_JMP | BPF_JGT | BPF_K: 947 case BPF_JMP | BPF_JGT | BPF_X: 948 case BPF_JMP | BPF_JSET | BPF_K: 949 case BPF_JMP | BPF_JSET | BPF_X: 950 /* A jump must set masks on targets */ 951 masks[pc + 1 + filter[pc].jt] &= memvalid; 952 masks[pc + 1 + filter[pc].jf] &= memvalid; 953 memvalid = ~0; 954 break; 955 } 956 } 957 error: 958 kfree(masks); 959 return ret; 960 } 961 962 static bool chk_code_allowed(u16 code_to_probe) 963 { 964 static const bool codes[] = { 965 /* 32 bit ALU operations */ 966 [BPF_ALU | BPF_ADD | BPF_K] = true, 967 [BPF_ALU | BPF_ADD | BPF_X] = true, 968 [BPF_ALU | BPF_SUB | BPF_K] = true, 969 [BPF_ALU | BPF_SUB | BPF_X] = true, 970 [BPF_ALU | BPF_MUL | BPF_K] = true, 971 [BPF_ALU | BPF_MUL | BPF_X] = true, 972 [BPF_ALU | BPF_DIV | BPF_K] = true, 973 [BPF_ALU | BPF_DIV | BPF_X] = true, 974 [BPF_ALU | BPF_MOD | BPF_K] = true, 975 [BPF_ALU | BPF_MOD | BPF_X] = true, 976 [BPF_ALU | BPF_AND | BPF_K] = true, 977 [BPF_ALU | BPF_AND | BPF_X] = true, 978 [BPF_ALU | BPF_OR | BPF_K] = true, 979 [BPF_ALU | BPF_OR | BPF_X] = true, 980 [BPF_ALU | BPF_XOR | BPF_K] = true, 981 [BPF_ALU | BPF_XOR | BPF_X] = true, 982 [BPF_ALU | BPF_LSH | BPF_K] = true, 983 [BPF_ALU | BPF_LSH | BPF_X] = true, 984 [BPF_ALU | BPF_RSH | BPF_K] = true, 985 [BPF_ALU | BPF_RSH | BPF_X] = true, 986 [BPF_ALU | BPF_NEG] = true, 987 /* Load instructions */ 988 [BPF_LD | BPF_W | BPF_ABS] = true, 989 [BPF_LD | BPF_H | BPF_ABS] = true, 990 [BPF_LD | BPF_B | BPF_ABS] = true, 991 [BPF_LD | BPF_W | BPF_LEN] = true, 992 [BPF_LD | BPF_W | BPF_IND] = true, 993 [BPF_LD | BPF_H | BPF_IND] = true, 994 [BPF_LD | BPF_B | BPF_IND] = true, 995 [BPF_LD | BPF_IMM] = true, 996 [BPF_LD | BPF_MEM] = true, 997 [BPF_LDX | BPF_W | BPF_LEN] = true, 998 [BPF_LDX | BPF_B | BPF_MSH] = true, 999 [BPF_LDX | BPF_IMM] = true, 1000 [BPF_LDX | BPF_MEM] = true, 1001 /* Store instructions */ 1002 [BPF_ST] = true, 1003 [BPF_STX] = true, 1004 /* Misc instructions */ 1005 [BPF_MISC | BPF_TAX] = true, 1006 [BPF_MISC | BPF_TXA] = true, 1007 /* Return instructions */ 1008 [BPF_RET | BPF_K] = true, 1009 [BPF_RET | BPF_A] = true, 1010 /* Jump instructions */ 1011 [BPF_JMP | BPF_JA] = true, 1012 [BPF_JMP | BPF_JEQ | BPF_K] = true, 1013 [BPF_JMP | BPF_JEQ | BPF_X] = true, 1014 [BPF_JMP | BPF_JGE | BPF_K] = true, 1015 [BPF_JMP | BPF_JGE | BPF_X] = true, 1016 [BPF_JMP | BPF_JGT | BPF_K] = true, 1017 [BPF_JMP | BPF_JGT | BPF_X] = true, 1018 [BPF_JMP | BPF_JSET | BPF_K] = true, 1019 [BPF_JMP | BPF_JSET | BPF_X] = true, 1020 }; 1021 1022 if (code_to_probe >= ARRAY_SIZE(codes)) 1023 return false; 1024 1025 return codes[code_to_probe]; 1026 } 1027 1028 static bool bpf_check_basics_ok(const struct sock_filter *filter, 1029 unsigned int flen) 1030 { 1031 if (filter == NULL) 1032 return false; 1033 if (flen == 0 || flen > BPF_MAXINSNS) 1034 return false; 1035 1036 return true; 1037 } 1038 1039 /** 1040 * bpf_check_classic - verify socket filter code 1041 * @filter: filter to verify 1042 * @flen: length of filter 1043 * 1044 * Check the user's filter code. If we let some ugly 1045 * filter code slip through kaboom! The filter must contain 1046 * no references or jumps that are out of range, no illegal 1047 * instructions, and must end with a RET instruction. 1048 * 1049 * All jumps are forward as they are not signed. 1050 * 1051 * Returns 0 if the rule set is legal or -EINVAL if not. 1052 */ 1053 static int bpf_check_classic(const struct sock_filter *filter, 1054 unsigned int flen) 1055 { 1056 bool anc_found; 1057 int pc; 1058 1059 /* Check the filter code now */ 1060 for (pc = 0; pc < flen; pc++) { 1061 const struct sock_filter *ftest = &filter[pc]; 1062 1063 /* May we actually operate on this code? */ 1064 if (!chk_code_allowed(ftest->code)) 1065 return -EINVAL; 1066 1067 /* Some instructions need special checks */ 1068 switch (ftest->code) { 1069 case BPF_ALU | BPF_DIV | BPF_K: 1070 case BPF_ALU | BPF_MOD | BPF_K: 1071 /* Check for division by zero */ 1072 if (ftest->k == 0) 1073 return -EINVAL; 1074 break; 1075 case BPF_ALU | BPF_LSH | BPF_K: 1076 case BPF_ALU | BPF_RSH | BPF_K: 1077 if (ftest->k >= 32) 1078 return -EINVAL; 1079 break; 1080 case BPF_LD | BPF_MEM: 1081 case BPF_LDX | BPF_MEM: 1082 case BPF_ST: 1083 case BPF_STX: 1084 /* Check for invalid memory addresses */ 1085 if (ftest->k >= BPF_MEMWORDS) 1086 return -EINVAL; 1087 break; 1088 case BPF_JMP | BPF_JA: 1089 /* Note, the large ftest->k might cause loops. 1090 * Compare this with conditional jumps below, 1091 * where offsets are limited. --ANK (981016) 1092 */ 1093 if (ftest->k >= (unsigned int)(flen - pc - 1)) 1094 return -EINVAL; 1095 break; 1096 case BPF_JMP | BPF_JEQ | BPF_K: 1097 case BPF_JMP | BPF_JEQ | BPF_X: 1098 case BPF_JMP | BPF_JGE | BPF_K: 1099 case BPF_JMP | BPF_JGE | BPF_X: 1100 case BPF_JMP | BPF_JGT | BPF_K: 1101 case BPF_JMP | BPF_JGT | BPF_X: 1102 case BPF_JMP | BPF_JSET | BPF_K: 1103 case BPF_JMP | BPF_JSET | BPF_X: 1104 /* Both conditionals must be safe */ 1105 if (pc + ftest->jt + 1 >= flen || 1106 pc + ftest->jf + 1 >= flen) 1107 return -EINVAL; 1108 break; 1109 case BPF_LD | BPF_W | BPF_ABS: 1110 case BPF_LD | BPF_H | BPF_ABS: 1111 case BPF_LD | BPF_B | BPF_ABS: 1112 anc_found = false; 1113 if (bpf_anc_helper(ftest) & BPF_ANC) 1114 anc_found = true; 1115 /* Ancillary operation unknown or unsupported */ 1116 if (anc_found == false && ftest->k >= SKF_AD_OFF) 1117 return -EINVAL; 1118 } 1119 } 1120 1121 /* Last instruction must be a RET code */ 1122 switch (filter[flen - 1].code) { 1123 case BPF_RET | BPF_K: 1124 case BPF_RET | BPF_A: 1125 return check_load_and_stores(filter, flen); 1126 } 1127 1128 return -EINVAL; 1129 } 1130 1131 static int bpf_prog_store_orig_filter(struct bpf_prog *fp, 1132 const struct sock_fprog *fprog) 1133 { 1134 unsigned int fsize = bpf_classic_proglen(fprog); 1135 struct sock_fprog_kern *fkprog; 1136 1137 fp->orig_prog = kmalloc(sizeof(*fkprog), GFP_KERNEL); 1138 if (!fp->orig_prog) 1139 return -ENOMEM; 1140 1141 fkprog = fp->orig_prog; 1142 fkprog->len = fprog->len; 1143 1144 fkprog->filter = kmemdup(fp->insns, fsize, 1145 GFP_KERNEL | __GFP_NOWARN); 1146 if (!fkprog->filter) { 1147 kfree(fp->orig_prog); 1148 return -ENOMEM; 1149 } 1150 1151 return 0; 1152 } 1153 1154 static void bpf_release_orig_filter(struct bpf_prog *fp) 1155 { 1156 struct sock_fprog_kern *fprog = fp->orig_prog; 1157 1158 if (fprog) { 1159 kfree(fprog->filter); 1160 kfree(fprog); 1161 } 1162 } 1163 1164 static void __bpf_prog_release(struct bpf_prog *prog) 1165 { 1166 if (prog->type == BPF_PROG_TYPE_SOCKET_FILTER) { 1167 bpf_prog_put(prog); 1168 } else { 1169 bpf_release_orig_filter(prog); 1170 bpf_prog_free(prog); 1171 } 1172 } 1173 1174 static void __sk_filter_release(struct sk_filter *fp) 1175 { 1176 __bpf_prog_release(fp->prog); 1177 kfree(fp); 1178 } 1179 1180 /** 1181 * sk_filter_release_rcu - Release a socket filter by rcu_head 1182 * @rcu: rcu_head that contains the sk_filter to free 1183 */ 1184 static void sk_filter_release_rcu(struct rcu_head *rcu) 1185 { 1186 struct sk_filter *fp = container_of(rcu, struct sk_filter, rcu); 1187 1188 __sk_filter_release(fp); 1189 } 1190 1191 /** 1192 * sk_filter_release - release a socket filter 1193 * @fp: filter to remove 1194 * 1195 * Remove a filter from a socket and release its resources. 1196 */ 1197 static void sk_filter_release(struct sk_filter *fp) 1198 { 1199 if (refcount_dec_and_test(&fp->refcnt)) 1200 call_rcu(&fp->rcu, sk_filter_release_rcu); 1201 } 1202 1203 void sk_filter_uncharge(struct sock *sk, struct sk_filter *fp) 1204 { 1205 u32 filter_size = bpf_prog_size(fp->prog->len); 1206 1207 atomic_sub(filter_size, &sk->sk_omem_alloc); 1208 sk_filter_release(fp); 1209 } 1210 1211 /* try to charge the socket memory if there is space available 1212 * return true on success 1213 */ 1214 static bool __sk_filter_charge(struct sock *sk, struct sk_filter *fp) 1215 { 1216 u32 filter_size = bpf_prog_size(fp->prog->len); 1217 1218 /* same check as in sock_kmalloc() */ 1219 if (filter_size <= sysctl_optmem_max && 1220 atomic_read(&sk->sk_omem_alloc) + filter_size < sysctl_optmem_max) { 1221 atomic_add(filter_size, &sk->sk_omem_alloc); 1222 return true; 1223 } 1224 return false; 1225 } 1226 1227 bool sk_filter_charge(struct sock *sk, struct sk_filter *fp) 1228 { 1229 if (!refcount_inc_not_zero(&fp->refcnt)) 1230 return false; 1231 1232 if (!__sk_filter_charge(sk, fp)) { 1233 sk_filter_release(fp); 1234 return false; 1235 } 1236 return true; 1237 } 1238 1239 static struct bpf_prog *bpf_migrate_filter(struct bpf_prog *fp) 1240 { 1241 struct sock_filter *old_prog; 1242 struct bpf_prog *old_fp; 1243 int err, new_len, old_len = fp->len; 1244 bool seen_ld_abs = false; 1245 1246 /* We are free to overwrite insns et al right here as it won't be used at 1247 * this point in time anymore internally after the migration to the eBPF 1248 * instruction representation. 1249 */ 1250 BUILD_BUG_ON(sizeof(struct sock_filter) != 1251 sizeof(struct bpf_insn)); 1252 1253 /* Conversion cannot happen on overlapping memory areas, 1254 * so we need to keep the user BPF around until the 2nd 1255 * pass. At this time, the user BPF is stored in fp->insns. 1256 */ 1257 old_prog = kmemdup(fp->insns, old_len * sizeof(struct sock_filter), 1258 GFP_KERNEL | __GFP_NOWARN); 1259 if (!old_prog) { 1260 err = -ENOMEM; 1261 goto out_err; 1262 } 1263 1264 /* 1st pass: calculate the new program length. */ 1265 err = bpf_convert_filter(old_prog, old_len, NULL, &new_len, 1266 &seen_ld_abs); 1267 if (err) 1268 goto out_err_free; 1269 1270 /* Expand fp for appending the new filter representation. */ 1271 old_fp = fp; 1272 fp = bpf_prog_realloc(old_fp, bpf_prog_size(new_len), 0); 1273 if (!fp) { 1274 /* The old_fp is still around in case we couldn't 1275 * allocate new memory, so uncharge on that one. 1276 */ 1277 fp = old_fp; 1278 err = -ENOMEM; 1279 goto out_err_free; 1280 } 1281 1282 fp->len = new_len; 1283 1284 /* 2nd pass: remap sock_filter insns into bpf_insn insns. */ 1285 err = bpf_convert_filter(old_prog, old_len, fp, &new_len, 1286 &seen_ld_abs); 1287 if (err) 1288 /* 2nd bpf_convert_filter() can fail only if it fails 1289 * to allocate memory, remapping must succeed. Note, 1290 * that at this time old_fp has already been released 1291 * by krealloc(). 1292 */ 1293 goto out_err_free; 1294 1295 fp = bpf_prog_select_runtime(fp, &err); 1296 if (err) 1297 goto out_err_free; 1298 1299 kfree(old_prog); 1300 return fp; 1301 1302 out_err_free: 1303 kfree(old_prog); 1304 out_err: 1305 __bpf_prog_release(fp); 1306 return ERR_PTR(err); 1307 } 1308 1309 static struct bpf_prog *bpf_prepare_filter(struct bpf_prog *fp, 1310 bpf_aux_classic_check_t trans) 1311 { 1312 int err; 1313 1314 fp->bpf_func = NULL; 1315 fp->jited = 0; 1316 1317 err = bpf_check_classic(fp->insns, fp->len); 1318 if (err) { 1319 __bpf_prog_release(fp); 1320 return ERR_PTR(err); 1321 } 1322 1323 /* There might be additional checks and transformations 1324 * needed on classic filters, f.e. in case of seccomp. 1325 */ 1326 if (trans) { 1327 err = trans(fp->insns, fp->len); 1328 if (err) { 1329 __bpf_prog_release(fp); 1330 return ERR_PTR(err); 1331 } 1332 } 1333 1334 /* Probe if we can JIT compile the filter and if so, do 1335 * the compilation of the filter. 1336 */ 1337 bpf_jit_compile(fp); 1338 1339 /* JIT compiler couldn't process this filter, so do the eBPF translation 1340 * for the optimized interpreter. 1341 */ 1342 if (!fp->jited) 1343 fp = bpf_migrate_filter(fp); 1344 1345 return fp; 1346 } 1347 1348 /** 1349 * bpf_prog_create - create an unattached filter 1350 * @pfp: the unattached filter that is created 1351 * @fprog: the filter program 1352 * 1353 * Create a filter independent of any socket. We first run some 1354 * sanity checks on it to make sure it does not explode on us later. 1355 * If an error occurs or there is insufficient memory for the filter 1356 * a negative errno code is returned. On success the return is zero. 1357 */ 1358 int bpf_prog_create(struct bpf_prog **pfp, struct sock_fprog_kern *fprog) 1359 { 1360 unsigned int fsize = bpf_classic_proglen(fprog); 1361 struct bpf_prog *fp; 1362 1363 /* Make sure new filter is there and in the right amounts. */ 1364 if (!bpf_check_basics_ok(fprog->filter, fprog->len)) 1365 return -EINVAL; 1366 1367 fp = bpf_prog_alloc(bpf_prog_size(fprog->len), 0); 1368 if (!fp) 1369 return -ENOMEM; 1370 1371 memcpy(fp->insns, fprog->filter, fsize); 1372 1373 fp->len = fprog->len; 1374 /* Since unattached filters are not copied back to user 1375 * space through sk_get_filter(), we do not need to hold 1376 * a copy here, and can spare us the work. 1377 */ 1378 fp->orig_prog = NULL; 1379 1380 /* bpf_prepare_filter() already takes care of freeing 1381 * memory in case something goes wrong. 1382 */ 1383 fp = bpf_prepare_filter(fp, NULL); 1384 if (IS_ERR(fp)) 1385 return PTR_ERR(fp); 1386 1387 *pfp = fp; 1388 return 0; 1389 } 1390 EXPORT_SYMBOL_GPL(bpf_prog_create); 1391 1392 /** 1393 * bpf_prog_create_from_user - create an unattached filter from user buffer 1394 * @pfp: the unattached filter that is created 1395 * @fprog: the filter program 1396 * @trans: post-classic verifier transformation handler 1397 * @save_orig: save classic BPF program 1398 * 1399 * This function effectively does the same as bpf_prog_create(), only 1400 * that it builds up its insns buffer from user space provided buffer. 1401 * It also allows for passing a bpf_aux_classic_check_t handler. 1402 */ 1403 int bpf_prog_create_from_user(struct bpf_prog **pfp, struct sock_fprog *fprog, 1404 bpf_aux_classic_check_t trans, bool save_orig) 1405 { 1406 unsigned int fsize = bpf_classic_proglen(fprog); 1407 struct bpf_prog *fp; 1408 int err; 1409 1410 /* Make sure new filter is there and in the right amounts. */ 1411 if (!bpf_check_basics_ok(fprog->filter, fprog->len)) 1412 return -EINVAL; 1413 1414 fp = bpf_prog_alloc(bpf_prog_size(fprog->len), 0); 1415 if (!fp) 1416 return -ENOMEM; 1417 1418 if (copy_from_user(fp->insns, fprog->filter, fsize)) { 1419 __bpf_prog_free(fp); 1420 return -EFAULT; 1421 } 1422 1423 fp->len = fprog->len; 1424 fp->orig_prog = NULL; 1425 1426 if (save_orig) { 1427 err = bpf_prog_store_orig_filter(fp, fprog); 1428 if (err) { 1429 __bpf_prog_free(fp); 1430 return -ENOMEM; 1431 } 1432 } 1433 1434 /* bpf_prepare_filter() already takes care of freeing 1435 * memory in case something goes wrong. 1436 */ 1437 fp = bpf_prepare_filter(fp, trans); 1438 if (IS_ERR(fp)) 1439 return PTR_ERR(fp); 1440 1441 *pfp = fp; 1442 return 0; 1443 } 1444 EXPORT_SYMBOL_GPL(bpf_prog_create_from_user); 1445 1446 void bpf_prog_destroy(struct bpf_prog *fp) 1447 { 1448 __bpf_prog_release(fp); 1449 } 1450 EXPORT_SYMBOL_GPL(bpf_prog_destroy); 1451 1452 static int __sk_attach_prog(struct bpf_prog *prog, struct sock *sk) 1453 { 1454 struct sk_filter *fp, *old_fp; 1455 1456 fp = kmalloc(sizeof(*fp), GFP_KERNEL); 1457 if (!fp) 1458 return -ENOMEM; 1459 1460 fp->prog = prog; 1461 1462 if (!__sk_filter_charge(sk, fp)) { 1463 kfree(fp); 1464 return -ENOMEM; 1465 } 1466 refcount_set(&fp->refcnt, 1); 1467 1468 old_fp = rcu_dereference_protected(sk->sk_filter, 1469 lockdep_sock_is_held(sk)); 1470 rcu_assign_pointer(sk->sk_filter, fp); 1471 1472 if (old_fp) 1473 sk_filter_uncharge(sk, old_fp); 1474 1475 return 0; 1476 } 1477 1478 static 1479 struct bpf_prog *__get_filter(struct sock_fprog *fprog, struct sock *sk) 1480 { 1481 unsigned int fsize = bpf_classic_proglen(fprog); 1482 struct bpf_prog *prog; 1483 int err; 1484 1485 if (sock_flag(sk, SOCK_FILTER_LOCKED)) 1486 return ERR_PTR(-EPERM); 1487 1488 /* Make sure new filter is there and in the right amounts. */ 1489 if (!bpf_check_basics_ok(fprog->filter, fprog->len)) 1490 return ERR_PTR(-EINVAL); 1491 1492 prog = bpf_prog_alloc(bpf_prog_size(fprog->len), 0); 1493 if (!prog) 1494 return ERR_PTR(-ENOMEM); 1495 1496 if (copy_from_user(prog->insns, fprog->filter, fsize)) { 1497 __bpf_prog_free(prog); 1498 return ERR_PTR(-EFAULT); 1499 } 1500 1501 prog->len = fprog->len; 1502 1503 err = bpf_prog_store_orig_filter(prog, fprog); 1504 if (err) { 1505 __bpf_prog_free(prog); 1506 return ERR_PTR(-ENOMEM); 1507 } 1508 1509 /* bpf_prepare_filter() already takes care of freeing 1510 * memory in case something goes wrong. 1511 */ 1512 return bpf_prepare_filter(prog, NULL); 1513 } 1514 1515 /** 1516 * sk_attach_filter - attach a socket filter 1517 * @fprog: the filter program 1518 * @sk: the socket to use 1519 * 1520 * Attach the user's filter code. We first run some sanity checks on 1521 * it to make sure it does not explode on us later. If an error 1522 * occurs or there is insufficient memory for the filter a negative 1523 * errno code is returned. On success the return is zero. 1524 */ 1525 int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk) 1526 { 1527 struct bpf_prog *prog = __get_filter(fprog, sk); 1528 int err; 1529 1530 if (IS_ERR(prog)) 1531 return PTR_ERR(prog); 1532 1533 err = __sk_attach_prog(prog, sk); 1534 if (err < 0) { 1535 __bpf_prog_release(prog); 1536 return err; 1537 } 1538 1539 return 0; 1540 } 1541 EXPORT_SYMBOL_GPL(sk_attach_filter); 1542 1543 int sk_reuseport_attach_filter(struct sock_fprog *fprog, struct sock *sk) 1544 { 1545 struct bpf_prog *prog = __get_filter(fprog, sk); 1546 int err; 1547 1548 if (IS_ERR(prog)) 1549 return PTR_ERR(prog); 1550 1551 if (bpf_prog_size(prog->len) > sysctl_optmem_max) 1552 err = -ENOMEM; 1553 else 1554 err = reuseport_attach_prog(sk, prog); 1555 1556 if (err) 1557 __bpf_prog_release(prog); 1558 1559 return err; 1560 } 1561 1562 static struct bpf_prog *__get_bpf(u32 ufd, struct sock *sk) 1563 { 1564 if (sock_flag(sk, SOCK_FILTER_LOCKED)) 1565 return ERR_PTR(-EPERM); 1566 1567 return bpf_prog_get_type(ufd, BPF_PROG_TYPE_SOCKET_FILTER); 1568 } 1569 1570 int sk_attach_bpf(u32 ufd, struct sock *sk) 1571 { 1572 struct bpf_prog *prog = __get_bpf(ufd, sk); 1573 int err; 1574 1575 if (IS_ERR(prog)) 1576 return PTR_ERR(prog); 1577 1578 err = __sk_attach_prog(prog, sk); 1579 if (err < 0) { 1580 bpf_prog_put(prog); 1581 return err; 1582 } 1583 1584 return 0; 1585 } 1586 1587 int sk_reuseport_attach_bpf(u32 ufd, struct sock *sk) 1588 { 1589 struct bpf_prog *prog; 1590 int err; 1591 1592 if (sock_flag(sk, SOCK_FILTER_LOCKED)) 1593 return -EPERM; 1594 1595 prog = bpf_prog_get_type(ufd, BPF_PROG_TYPE_SOCKET_FILTER); 1596 if (PTR_ERR(prog) == -EINVAL) 1597 prog = bpf_prog_get_type(ufd, BPF_PROG_TYPE_SK_REUSEPORT); 1598 if (IS_ERR(prog)) 1599 return PTR_ERR(prog); 1600 1601 if (prog->type == BPF_PROG_TYPE_SK_REUSEPORT) { 1602 /* Like other non BPF_PROG_TYPE_SOCKET_FILTER 1603 * bpf prog (e.g. sockmap). It depends on the 1604 * limitation imposed by bpf_prog_load(). 1605 * Hence, sysctl_optmem_max is not checked. 1606 */ 1607 if ((sk->sk_type != SOCK_STREAM && 1608 sk->sk_type != SOCK_DGRAM) || 1609 (sk->sk_protocol != IPPROTO_UDP && 1610 sk->sk_protocol != IPPROTO_TCP) || 1611 (sk->sk_family != AF_INET && 1612 sk->sk_family != AF_INET6)) { 1613 err = -ENOTSUPP; 1614 goto err_prog_put; 1615 } 1616 } else { 1617 /* BPF_PROG_TYPE_SOCKET_FILTER */ 1618 if (bpf_prog_size(prog->len) > sysctl_optmem_max) { 1619 err = -ENOMEM; 1620 goto err_prog_put; 1621 } 1622 } 1623 1624 err = reuseport_attach_prog(sk, prog); 1625 err_prog_put: 1626 if (err) 1627 bpf_prog_put(prog); 1628 1629 return err; 1630 } 1631 1632 void sk_reuseport_prog_free(struct bpf_prog *prog) 1633 { 1634 if (!prog) 1635 return; 1636 1637 if (prog->type == BPF_PROG_TYPE_SK_REUSEPORT) 1638 bpf_prog_put(prog); 1639 else 1640 bpf_prog_destroy(prog); 1641 } 1642 1643 struct bpf_scratchpad { 1644 union { 1645 __be32 diff[MAX_BPF_STACK / sizeof(__be32)]; 1646 u8 buff[MAX_BPF_STACK]; 1647 }; 1648 }; 1649 1650 static DEFINE_PER_CPU(struct bpf_scratchpad, bpf_sp); 1651 1652 static inline int __bpf_try_make_writable(struct sk_buff *skb, 1653 unsigned int write_len) 1654 { 1655 return skb_ensure_writable(skb, write_len); 1656 } 1657 1658 static inline int bpf_try_make_writable(struct sk_buff *skb, 1659 unsigned int write_len) 1660 { 1661 int err = __bpf_try_make_writable(skb, write_len); 1662 1663 bpf_compute_data_pointers(skb); 1664 return err; 1665 } 1666 1667 static int bpf_try_make_head_writable(struct sk_buff *skb) 1668 { 1669 return bpf_try_make_writable(skb, skb_headlen(skb)); 1670 } 1671 1672 static inline void bpf_push_mac_rcsum(struct sk_buff *skb) 1673 { 1674 if (skb_at_tc_ingress(skb)) 1675 skb_postpush_rcsum(skb, skb_mac_header(skb), skb->mac_len); 1676 } 1677 1678 static inline void bpf_pull_mac_rcsum(struct sk_buff *skb) 1679 { 1680 if (skb_at_tc_ingress(skb)) 1681 skb_postpull_rcsum(skb, skb_mac_header(skb), skb->mac_len); 1682 } 1683 1684 BPF_CALL_5(bpf_skb_store_bytes, struct sk_buff *, skb, u32, offset, 1685 const void *, from, u32, len, u64, flags) 1686 { 1687 void *ptr; 1688 1689 if (unlikely(flags & ~(BPF_F_RECOMPUTE_CSUM | BPF_F_INVALIDATE_HASH))) 1690 return -EINVAL; 1691 if (unlikely(offset > INT_MAX)) 1692 return -EFAULT; 1693 if (unlikely(bpf_try_make_writable(skb, offset + len))) 1694 return -EFAULT; 1695 1696 ptr = skb->data + offset; 1697 if (flags & BPF_F_RECOMPUTE_CSUM) 1698 __skb_postpull_rcsum(skb, ptr, len, offset); 1699 1700 memcpy(ptr, from, len); 1701 1702 if (flags & BPF_F_RECOMPUTE_CSUM) 1703 __skb_postpush_rcsum(skb, ptr, len, offset); 1704 if (flags & BPF_F_INVALIDATE_HASH) 1705 skb_clear_hash(skb); 1706 1707 return 0; 1708 } 1709 1710 static const struct bpf_func_proto bpf_skb_store_bytes_proto = { 1711 .func = bpf_skb_store_bytes, 1712 .gpl_only = false, 1713 .ret_type = RET_INTEGER, 1714 .arg1_type = ARG_PTR_TO_CTX, 1715 .arg2_type = ARG_ANYTHING, 1716 .arg3_type = ARG_PTR_TO_MEM | MEM_RDONLY, 1717 .arg4_type = ARG_CONST_SIZE, 1718 .arg5_type = ARG_ANYTHING, 1719 }; 1720 1721 BPF_CALL_4(bpf_skb_load_bytes, const struct sk_buff *, skb, u32, offset, 1722 void *, to, u32, len) 1723 { 1724 void *ptr; 1725 1726 if (unlikely(offset > INT_MAX)) 1727 goto err_clear; 1728 1729 ptr = skb_header_pointer(skb, offset, len, to); 1730 if (unlikely(!ptr)) 1731 goto err_clear; 1732 if (ptr != to) 1733 memcpy(to, ptr, len); 1734 1735 return 0; 1736 err_clear: 1737 memset(to, 0, len); 1738 return -EFAULT; 1739 } 1740 1741 static const struct bpf_func_proto bpf_skb_load_bytes_proto = { 1742 .func = bpf_skb_load_bytes, 1743 .gpl_only = false, 1744 .ret_type = RET_INTEGER, 1745 .arg1_type = ARG_PTR_TO_CTX, 1746 .arg2_type = ARG_ANYTHING, 1747 .arg3_type = ARG_PTR_TO_UNINIT_MEM, 1748 .arg4_type = ARG_CONST_SIZE, 1749 }; 1750 1751 BPF_CALL_4(bpf_flow_dissector_load_bytes, 1752 const struct bpf_flow_dissector *, ctx, u32, offset, 1753 void *, to, u32, len) 1754 { 1755 void *ptr; 1756 1757 if (unlikely(offset > 0xffff)) 1758 goto err_clear; 1759 1760 if (unlikely(!ctx->skb)) 1761 goto err_clear; 1762 1763 ptr = skb_header_pointer(ctx->skb, offset, len, to); 1764 if (unlikely(!ptr)) 1765 goto err_clear; 1766 if (ptr != to) 1767 memcpy(to, ptr, len); 1768 1769 return 0; 1770 err_clear: 1771 memset(to, 0, len); 1772 return -EFAULT; 1773 } 1774 1775 static const struct bpf_func_proto bpf_flow_dissector_load_bytes_proto = { 1776 .func = bpf_flow_dissector_load_bytes, 1777 .gpl_only = false, 1778 .ret_type = RET_INTEGER, 1779 .arg1_type = ARG_PTR_TO_CTX, 1780 .arg2_type = ARG_ANYTHING, 1781 .arg3_type = ARG_PTR_TO_UNINIT_MEM, 1782 .arg4_type = ARG_CONST_SIZE, 1783 }; 1784 1785 BPF_CALL_5(bpf_skb_load_bytes_relative, const struct sk_buff *, skb, 1786 u32, offset, void *, to, u32, len, u32, start_header) 1787 { 1788 u8 *end = skb_tail_pointer(skb); 1789 u8 *start, *ptr; 1790 1791 if (unlikely(offset > 0xffff)) 1792 goto err_clear; 1793 1794 switch (start_header) { 1795 case BPF_HDR_START_MAC: 1796 if (unlikely(!skb_mac_header_was_set(skb))) 1797 goto err_clear; 1798 start = skb_mac_header(skb); 1799 break; 1800 case BPF_HDR_START_NET: 1801 start = skb_network_header(skb); 1802 break; 1803 default: 1804 goto err_clear; 1805 } 1806 1807 ptr = start + offset; 1808 1809 if (likely(ptr + len <= end)) { 1810 memcpy(to, ptr, len); 1811 return 0; 1812 } 1813 1814 err_clear: 1815 memset(to, 0, len); 1816 return -EFAULT; 1817 } 1818 1819 static const struct bpf_func_proto bpf_skb_load_bytes_relative_proto = { 1820 .func = bpf_skb_load_bytes_relative, 1821 .gpl_only = false, 1822 .ret_type = RET_INTEGER, 1823 .arg1_type = ARG_PTR_TO_CTX, 1824 .arg2_type = ARG_ANYTHING, 1825 .arg3_type = ARG_PTR_TO_UNINIT_MEM, 1826 .arg4_type = ARG_CONST_SIZE, 1827 .arg5_type = ARG_ANYTHING, 1828 }; 1829 1830 BPF_CALL_2(bpf_skb_pull_data, struct sk_buff *, skb, u32, len) 1831 { 1832 /* Idea is the following: should the needed direct read/write 1833 * test fail during runtime, we can pull in more data and redo 1834 * again, since implicitly, we invalidate previous checks here. 1835 * 1836 * Or, since we know how much we need to make read/writeable, 1837 * this can be done once at the program beginning for direct 1838 * access case. By this we overcome limitations of only current 1839 * headroom being accessible. 1840 */ 1841 return bpf_try_make_writable(skb, len ? : skb_headlen(skb)); 1842 } 1843 1844 static const struct bpf_func_proto bpf_skb_pull_data_proto = { 1845 .func = bpf_skb_pull_data, 1846 .gpl_only = false, 1847 .ret_type = RET_INTEGER, 1848 .arg1_type = ARG_PTR_TO_CTX, 1849 .arg2_type = ARG_ANYTHING, 1850 }; 1851 1852 BPF_CALL_1(bpf_sk_fullsock, struct sock *, sk) 1853 { 1854 return sk_fullsock(sk) ? (unsigned long)sk : (unsigned long)NULL; 1855 } 1856 1857 static const struct bpf_func_proto bpf_sk_fullsock_proto = { 1858 .func = bpf_sk_fullsock, 1859 .gpl_only = false, 1860 .ret_type = RET_PTR_TO_SOCKET_OR_NULL, 1861 .arg1_type = ARG_PTR_TO_SOCK_COMMON, 1862 }; 1863 1864 static inline int sk_skb_try_make_writable(struct sk_buff *skb, 1865 unsigned int write_len) 1866 { 1867 return __bpf_try_make_writable(skb, write_len); 1868 } 1869 1870 BPF_CALL_2(sk_skb_pull_data, struct sk_buff *, skb, u32, len) 1871 { 1872 /* Idea is the following: should the needed direct read/write 1873 * test fail during runtime, we can pull in more data and redo 1874 * again, since implicitly, we invalidate previous checks here. 1875 * 1876 * Or, since we know how much we need to make read/writeable, 1877 * this can be done once at the program beginning for direct 1878 * access case. By this we overcome limitations of only current 1879 * headroom being accessible. 1880 */ 1881 return sk_skb_try_make_writable(skb, len ? : skb_headlen(skb)); 1882 } 1883 1884 static const struct bpf_func_proto sk_skb_pull_data_proto = { 1885 .func = sk_skb_pull_data, 1886 .gpl_only = false, 1887 .ret_type = RET_INTEGER, 1888 .arg1_type = ARG_PTR_TO_CTX, 1889 .arg2_type = ARG_ANYTHING, 1890 }; 1891 1892 BPF_CALL_5(bpf_l3_csum_replace, struct sk_buff *, skb, u32, offset, 1893 u64, from, u64, to, u64, flags) 1894 { 1895 __sum16 *ptr; 1896 1897 if (unlikely(flags & ~(BPF_F_HDR_FIELD_MASK))) 1898 return -EINVAL; 1899 if (unlikely(offset > 0xffff || offset & 1)) 1900 return -EFAULT; 1901 if (unlikely(bpf_try_make_writable(skb, offset + sizeof(*ptr)))) 1902 return -EFAULT; 1903 1904 ptr = (__sum16 *)(skb->data + offset); 1905 switch (flags & BPF_F_HDR_FIELD_MASK) { 1906 case 0: 1907 if (unlikely(from != 0)) 1908 return -EINVAL; 1909 1910 csum_replace_by_diff(ptr, to); 1911 break; 1912 case 2: 1913 csum_replace2(ptr, from, to); 1914 break; 1915 case 4: 1916 csum_replace4(ptr, from, to); 1917 break; 1918 default: 1919 return -EINVAL; 1920 } 1921 1922 return 0; 1923 } 1924 1925 static const struct bpf_func_proto bpf_l3_csum_replace_proto = { 1926 .func = bpf_l3_csum_replace, 1927 .gpl_only = false, 1928 .ret_type = RET_INTEGER, 1929 .arg1_type = ARG_PTR_TO_CTX, 1930 .arg2_type = ARG_ANYTHING, 1931 .arg3_type = ARG_ANYTHING, 1932 .arg4_type = ARG_ANYTHING, 1933 .arg5_type = ARG_ANYTHING, 1934 }; 1935 1936 BPF_CALL_5(bpf_l4_csum_replace, struct sk_buff *, skb, u32, offset, 1937 u64, from, u64, to, u64, flags) 1938 { 1939 bool is_pseudo = flags & BPF_F_PSEUDO_HDR; 1940 bool is_mmzero = flags & BPF_F_MARK_MANGLED_0; 1941 bool do_mforce = flags & BPF_F_MARK_ENFORCE; 1942 __sum16 *ptr; 1943 1944 if (unlikely(flags & ~(BPF_F_MARK_MANGLED_0 | BPF_F_MARK_ENFORCE | 1945 BPF_F_PSEUDO_HDR | BPF_F_HDR_FIELD_MASK))) 1946 return -EINVAL; 1947 if (unlikely(offset > 0xffff || offset & 1)) 1948 return -EFAULT; 1949 if (unlikely(bpf_try_make_writable(skb, offset + sizeof(*ptr)))) 1950 return -EFAULT; 1951 1952 ptr = (__sum16 *)(skb->data + offset); 1953 if (is_mmzero && !do_mforce && !*ptr) 1954 return 0; 1955 1956 switch (flags & BPF_F_HDR_FIELD_MASK) { 1957 case 0: 1958 if (unlikely(from != 0)) 1959 return -EINVAL; 1960 1961 inet_proto_csum_replace_by_diff(ptr, skb, to, is_pseudo); 1962 break; 1963 case 2: 1964 inet_proto_csum_replace2(ptr, skb, from, to, is_pseudo); 1965 break; 1966 case 4: 1967 inet_proto_csum_replace4(ptr, skb, from, to, is_pseudo); 1968 break; 1969 default: 1970 return -EINVAL; 1971 } 1972 1973 if (is_mmzero && !*ptr) 1974 *ptr = CSUM_MANGLED_0; 1975 return 0; 1976 } 1977 1978 static const struct bpf_func_proto bpf_l4_csum_replace_proto = { 1979 .func = bpf_l4_csum_replace, 1980 .gpl_only = false, 1981 .ret_type = RET_INTEGER, 1982 .arg1_type = ARG_PTR_TO_CTX, 1983 .arg2_type = ARG_ANYTHING, 1984 .arg3_type = ARG_ANYTHING, 1985 .arg4_type = ARG_ANYTHING, 1986 .arg5_type = ARG_ANYTHING, 1987 }; 1988 1989 BPF_CALL_5(bpf_csum_diff, __be32 *, from, u32, from_size, 1990 __be32 *, to, u32, to_size, __wsum, seed) 1991 { 1992 struct bpf_scratchpad *sp = this_cpu_ptr(&bpf_sp); 1993 u32 diff_size = from_size + to_size; 1994 int i, j = 0; 1995 1996 /* This is quite flexible, some examples: 1997 * 1998 * from_size == 0, to_size > 0, seed := csum --> pushing data 1999 * from_size > 0, to_size == 0, seed := csum --> pulling data 2000 * from_size > 0, to_size > 0, seed := 0 --> diffing data 2001 * 2002 * Even for diffing, from_size and to_size don't need to be equal. 2003 */ 2004 if (unlikely(((from_size | to_size) & (sizeof(__be32) - 1)) || 2005 diff_size > sizeof(sp->diff))) 2006 return -EINVAL; 2007 2008 for (i = 0; i < from_size / sizeof(__be32); i++, j++) 2009 sp->diff[j] = ~from[i]; 2010 for (i = 0; i < to_size / sizeof(__be32); i++, j++) 2011 sp->diff[j] = to[i]; 2012 2013 return csum_partial(sp->diff, diff_size, seed); 2014 } 2015 2016 static const struct bpf_func_proto bpf_csum_diff_proto = { 2017 .func = bpf_csum_diff, 2018 .gpl_only = false, 2019 .pkt_access = true, 2020 .ret_type = RET_INTEGER, 2021 .arg1_type = ARG_PTR_TO_MEM | PTR_MAYBE_NULL | MEM_RDONLY, 2022 .arg2_type = ARG_CONST_SIZE_OR_ZERO, 2023 .arg3_type = ARG_PTR_TO_MEM | PTR_MAYBE_NULL | MEM_RDONLY, 2024 .arg4_type = ARG_CONST_SIZE_OR_ZERO, 2025 .arg5_type = ARG_ANYTHING, 2026 }; 2027 2028 BPF_CALL_2(bpf_csum_update, struct sk_buff *, skb, __wsum, csum) 2029 { 2030 /* The interface is to be used in combination with bpf_csum_diff() 2031 * for direct packet writes. csum rotation for alignment as well 2032 * as emulating csum_sub() can be done from the eBPF program. 2033 */ 2034 if (skb->ip_summed == CHECKSUM_COMPLETE) 2035 return (skb->csum = csum_add(skb->csum, csum)); 2036 2037 return -ENOTSUPP; 2038 } 2039 2040 static const struct bpf_func_proto bpf_csum_update_proto = { 2041 .func = bpf_csum_update, 2042 .gpl_only = false, 2043 .ret_type = RET_INTEGER, 2044 .arg1_type = ARG_PTR_TO_CTX, 2045 .arg2_type = ARG_ANYTHING, 2046 }; 2047 2048 BPF_CALL_2(bpf_csum_level, struct sk_buff *, skb, u64, level) 2049 { 2050 /* The interface is to be used in combination with bpf_skb_adjust_room() 2051 * for encap/decap of packet headers when BPF_F_ADJ_ROOM_NO_CSUM_RESET 2052 * is passed as flags, for example. 2053 */ 2054 switch (level) { 2055 case BPF_CSUM_LEVEL_INC: 2056 __skb_incr_checksum_unnecessary(skb); 2057 break; 2058 case BPF_CSUM_LEVEL_DEC: 2059 __skb_decr_checksum_unnecessary(skb); 2060 break; 2061 case BPF_CSUM_LEVEL_RESET: 2062 __skb_reset_checksum_unnecessary(skb); 2063 break; 2064 case BPF_CSUM_LEVEL_QUERY: 2065 return skb->ip_summed == CHECKSUM_UNNECESSARY ? 2066 skb->csum_level : -EACCES; 2067 default: 2068 return -EINVAL; 2069 } 2070 2071 return 0; 2072 } 2073 2074 static const struct bpf_func_proto bpf_csum_level_proto = { 2075 .func = bpf_csum_level, 2076 .gpl_only = false, 2077 .ret_type = RET_INTEGER, 2078 .arg1_type = ARG_PTR_TO_CTX, 2079 .arg2_type = ARG_ANYTHING, 2080 }; 2081 2082 static inline int __bpf_rx_skb(struct net_device *dev, struct sk_buff *skb) 2083 { 2084 return dev_forward_skb_nomtu(dev, skb); 2085 } 2086 2087 static inline int __bpf_rx_skb_no_mac(struct net_device *dev, 2088 struct sk_buff *skb) 2089 { 2090 int ret = ____dev_forward_skb(dev, skb, false); 2091 2092 if (likely(!ret)) { 2093 skb->dev = dev; 2094 ret = netif_rx(skb); 2095 } 2096 2097 return ret; 2098 } 2099 2100 static inline int __bpf_tx_skb(struct net_device *dev, struct sk_buff *skb) 2101 { 2102 int ret; 2103 2104 if (dev_xmit_recursion()) { 2105 net_crit_ratelimited("bpf: recursion limit reached on datapath, buggy bpf program?\n"); 2106 kfree_skb(skb); 2107 return -ENETDOWN; 2108 } 2109 2110 skb->dev = dev; 2111 skb_clear_tstamp(skb); 2112 2113 dev_xmit_recursion_inc(); 2114 ret = dev_queue_xmit(skb); 2115 dev_xmit_recursion_dec(); 2116 2117 return ret; 2118 } 2119 2120 static int __bpf_redirect_no_mac(struct sk_buff *skb, struct net_device *dev, 2121 u32 flags) 2122 { 2123 unsigned int mlen = skb_network_offset(skb); 2124 2125 if (mlen) { 2126 __skb_pull(skb, mlen); 2127 2128 /* At ingress, the mac header has already been pulled once. 2129 * At egress, skb_pospull_rcsum has to be done in case that 2130 * the skb is originated from ingress (i.e. a forwarded skb) 2131 * to ensure that rcsum starts at net header. 2132 */ 2133 if (!skb_at_tc_ingress(skb)) 2134 skb_postpull_rcsum(skb, skb_mac_header(skb), mlen); 2135 } 2136 skb_pop_mac_header(skb); 2137 skb_reset_mac_len(skb); 2138 return flags & BPF_F_INGRESS ? 2139 __bpf_rx_skb_no_mac(dev, skb) : __bpf_tx_skb(dev, skb); 2140 } 2141 2142 static int __bpf_redirect_common(struct sk_buff *skb, struct net_device *dev, 2143 u32 flags) 2144 { 2145 /* Verify that a link layer header is carried */ 2146 if (unlikely(skb->mac_header >= skb->network_header)) { 2147 kfree_skb(skb); 2148 return -ERANGE; 2149 } 2150 2151 bpf_push_mac_rcsum(skb); 2152 return flags & BPF_F_INGRESS ? 2153 __bpf_rx_skb(dev, skb) : __bpf_tx_skb(dev, skb); 2154 } 2155 2156 static int __bpf_redirect(struct sk_buff *skb, struct net_device *dev, 2157 u32 flags) 2158 { 2159 if (dev_is_mac_header_xmit(dev)) 2160 return __bpf_redirect_common(skb, dev, flags); 2161 else 2162 return __bpf_redirect_no_mac(skb, dev, flags); 2163 } 2164 2165 #if IS_ENABLED(CONFIG_IPV6) 2166 static int bpf_out_neigh_v6(struct net *net, struct sk_buff *skb, 2167 struct net_device *dev, struct bpf_nh_params *nh) 2168 { 2169 u32 hh_len = LL_RESERVED_SPACE(dev); 2170 const struct in6_addr *nexthop; 2171 struct dst_entry *dst = NULL; 2172 struct neighbour *neigh; 2173 2174 if (dev_xmit_recursion()) { 2175 net_crit_ratelimited("bpf: recursion limit reached on datapath, buggy bpf program?\n"); 2176 goto out_drop; 2177 } 2178 2179 skb->dev = dev; 2180 skb_clear_tstamp(skb); 2181 2182 if (unlikely(skb_headroom(skb) < hh_len && dev->header_ops)) { 2183 skb = skb_expand_head(skb, hh_len); 2184 if (!skb) 2185 return -ENOMEM; 2186 } 2187 2188 rcu_read_lock_bh(); 2189 if (!nh) { 2190 dst = skb_dst(skb); 2191 nexthop = rt6_nexthop(container_of(dst, struct rt6_info, dst), 2192 &ipv6_hdr(skb)->daddr); 2193 } else { 2194 nexthop = &nh->ipv6_nh; 2195 } 2196 neigh = ip_neigh_gw6(dev, nexthop); 2197 if (likely(!IS_ERR(neigh))) { 2198 int ret; 2199 2200 sock_confirm_neigh(skb, neigh); 2201 dev_xmit_recursion_inc(); 2202 ret = neigh_output(neigh, skb, false); 2203 dev_xmit_recursion_dec(); 2204 rcu_read_unlock_bh(); 2205 return ret; 2206 } 2207 rcu_read_unlock_bh(); 2208 if (dst) 2209 IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES); 2210 out_drop: 2211 kfree_skb(skb); 2212 return -ENETDOWN; 2213 } 2214 2215 static int __bpf_redirect_neigh_v6(struct sk_buff *skb, struct net_device *dev, 2216 struct bpf_nh_params *nh) 2217 { 2218 const struct ipv6hdr *ip6h = ipv6_hdr(skb); 2219 struct net *net = dev_net(dev); 2220 int err, ret = NET_XMIT_DROP; 2221 2222 if (!nh) { 2223 struct dst_entry *dst; 2224 struct flowi6 fl6 = { 2225 .flowi6_flags = FLOWI_FLAG_ANYSRC, 2226 .flowi6_mark = skb->mark, 2227 .flowlabel = ip6_flowinfo(ip6h), 2228 .flowi6_oif = dev->ifindex, 2229 .flowi6_proto = ip6h->nexthdr, 2230 .daddr = ip6h->daddr, 2231 .saddr = ip6h->saddr, 2232 }; 2233 2234 dst = ipv6_stub->ipv6_dst_lookup_flow(net, NULL, &fl6, NULL); 2235 if (IS_ERR(dst)) 2236 goto out_drop; 2237 2238 skb_dst_set(skb, dst); 2239 } else if (nh->nh_family != AF_INET6) { 2240 goto out_drop; 2241 } 2242 2243 err = bpf_out_neigh_v6(net, skb, dev, nh); 2244 if (unlikely(net_xmit_eval(err))) 2245 dev->stats.tx_errors++; 2246 else 2247 ret = NET_XMIT_SUCCESS; 2248 goto out_xmit; 2249 out_drop: 2250 dev->stats.tx_errors++; 2251 kfree_skb(skb); 2252 out_xmit: 2253 return ret; 2254 } 2255 #else 2256 static int __bpf_redirect_neigh_v6(struct sk_buff *skb, struct net_device *dev, 2257 struct bpf_nh_params *nh) 2258 { 2259 kfree_skb(skb); 2260 return NET_XMIT_DROP; 2261 } 2262 #endif /* CONFIG_IPV6 */ 2263 2264 #if IS_ENABLED(CONFIG_INET) 2265 static int bpf_out_neigh_v4(struct net *net, struct sk_buff *skb, 2266 struct net_device *dev, struct bpf_nh_params *nh) 2267 { 2268 u32 hh_len = LL_RESERVED_SPACE(dev); 2269 struct neighbour *neigh; 2270 bool is_v6gw = false; 2271 2272 if (dev_xmit_recursion()) { 2273 net_crit_ratelimited("bpf: recursion limit reached on datapath, buggy bpf program?\n"); 2274 goto out_drop; 2275 } 2276 2277 skb->dev = dev; 2278 skb_clear_tstamp(skb); 2279 2280 if (unlikely(skb_headroom(skb) < hh_len && dev->header_ops)) { 2281 skb = skb_expand_head(skb, hh_len); 2282 if (!skb) 2283 return -ENOMEM; 2284 } 2285 2286 rcu_read_lock_bh(); 2287 if (!nh) { 2288 struct dst_entry *dst = skb_dst(skb); 2289 struct rtable *rt = container_of(dst, struct rtable, dst); 2290 2291 neigh = ip_neigh_for_gw(rt, skb, &is_v6gw); 2292 } else if (nh->nh_family == AF_INET6) { 2293 neigh = ip_neigh_gw6(dev, &nh->ipv6_nh); 2294 is_v6gw = true; 2295 } else if (nh->nh_family == AF_INET) { 2296 neigh = ip_neigh_gw4(dev, nh->ipv4_nh); 2297 } else { 2298 rcu_read_unlock_bh(); 2299 goto out_drop; 2300 } 2301 2302 if (likely(!IS_ERR(neigh))) { 2303 int ret; 2304 2305 sock_confirm_neigh(skb, neigh); 2306 dev_xmit_recursion_inc(); 2307 ret = neigh_output(neigh, skb, is_v6gw); 2308 dev_xmit_recursion_dec(); 2309 rcu_read_unlock_bh(); 2310 return ret; 2311 } 2312 rcu_read_unlock_bh(); 2313 out_drop: 2314 kfree_skb(skb); 2315 return -ENETDOWN; 2316 } 2317 2318 static int __bpf_redirect_neigh_v4(struct sk_buff *skb, struct net_device *dev, 2319 struct bpf_nh_params *nh) 2320 { 2321 const struct iphdr *ip4h = ip_hdr(skb); 2322 struct net *net = dev_net(dev); 2323 int err, ret = NET_XMIT_DROP; 2324 2325 if (!nh) { 2326 struct flowi4 fl4 = { 2327 .flowi4_flags = FLOWI_FLAG_ANYSRC, 2328 .flowi4_mark = skb->mark, 2329 .flowi4_tos = RT_TOS(ip4h->tos), 2330 .flowi4_oif = dev->ifindex, 2331 .flowi4_proto = ip4h->protocol, 2332 .daddr = ip4h->daddr, 2333 .saddr = ip4h->saddr, 2334 }; 2335 struct rtable *rt; 2336 2337 rt = ip_route_output_flow(net, &fl4, NULL); 2338 if (IS_ERR(rt)) 2339 goto out_drop; 2340 if (rt->rt_type != RTN_UNICAST && rt->rt_type != RTN_LOCAL) { 2341 ip_rt_put(rt); 2342 goto out_drop; 2343 } 2344 2345 skb_dst_set(skb, &rt->dst); 2346 } 2347 2348 err = bpf_out_neigh_v4(net, skb, dev, nh); 2349 if (unlikely(net_xmit_eval(err))) 2350 dev->stats.tx_errors++; 2351 else 2352 ret = NET_XMIT_SUCCESS; 2353 goto out_xmit; 2354 out_drop: 2355 dev->stats.tx_errors++; 2356 kfree_skb(skb); 2357 out_xmit: 2358 return ret; 2359 } 2360 #else 2361 static int __bpf_redirect_neigh_v4(struct sk_buff *skb, struct net_device *dev, 2362 struct bpf_nh_params *nh) 2363 { 2364 kfree_skb(skb); 2365 return NET_XMIT_DROP; 2366 } 2367 #endif /* CONFIG_INET */ 2368 2369 static int __bpf_redirect_neigh(struct sk_buff *skb, struct net_device *dev, 2370 struct bpf_nh_params *nh) 2371 { 2372 struct ethhdr *ethh = eth_hdr(skb); 2373 2374 if (unlikely(skb->mac_header >= skb->network_header)) 2375 goto out; 2376 bpf_push_mac_rcsum(skb); 2377 if (is_multicast_ether_addr(ethh->h_dest)) 2378 goto out; 2379 2380 skb_pull(skb, sizeof(*ethh)); 2381 skb_unset_mac_header(skb); 2382 skb_reset_network_header(skb); 2383 2384 if (skb->protocol == htons(ETH_P_IP)) 2385 return __bpf_redirect_neigh_v4(skb, dev, nh); 2386 else if (skb->protocol == htons(ETH_P_IPV6)) 2387 return __bpf_redirect_neigh_v6(skb, dev, nh); 2388 out: 2389 kfree_skb(skb); 2390 return -ENOTSUPP; 2391 } 2392 2393 /* Internal, non-exposed redirect flags. */ 2394 enum { 2395 BPF_F_NEIGH = (1ULL << 1), 2396 BPF_F_PEER = (1ULL << 2), 2397 BPF_F_NEXTHOP = (1ULL << 3), 2398 #define BPF_F_REDIRECT_INTERNAL (BPF_F_NEIGH | BPF_F_PEER | BPF_F_NEXTHOP) 2399 }; 2400 2401 BPF_CALL_3(bpf_clone_redirect, struct sk_buff *, skb, u32, ifindex, u64, flags) 2402 { 2403 struct net_device *dev; 2404 struct sk_buff *clone; 2405 int ret; 2406 2407 if (unlikely(flags & (~(BPF_F_INGRESS) | BPF_F_REDIRECT_INTERNAL))) 2408 return -EINVAL; 2409 2410 dev = dev_get_by_index_rcu(dev_net(skb->dev), ifindex); 2411 if (unlikely(!dev)) 2412 return -EINVAL; 2413 2414 clone = skb_clone(skb, GFP_ATOMIC); 2415 if (unlikely(!clone)) 2416 return -ENOMEM; 2417 2418 /* For direct write, we need to keep the invariant that the skbs 2419 * we're dealing with need to be uncloned. Should uncloning fail 2420 * here, we need to free the just generated clone to unclone once 2421 * again. 2422 */ 2423 ret = bpf_try_make_head_writable(skb); 2424 if (unlikely(ret)) { 2425 kfree_skb(clone); 2426 return -ENOMEM; 2427 } 2428 2429 return __bpf_redirect(clone, dev, flags); 2430 } 2431 2432 static const struct bpf_func_proto bpf_clone_redirect_proto = { 2433 .func = bpf_clone_redirect, 2434 .gpl_only = false, 2435 .ret_type = RET_INTEGER, 2436 .arg1_type = ARG_PTR_TO_CTX, 2437 .arg2_type = ARG_ANYTHING, 2438 .arg3_type = ARG_ANYTHING, 2439 }; 2440 2441 DEFINE_PER_CPU(struct bpf_redirect_info, bpf_redirect_info); 2442 EXPORT_PER_CPU_SYMBOL_GPL(bpf_redirect_info); 2443 2444 int skb_do_redirect(struct sk_buff *skb) 2445 { 2446 struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info); 2447 struct net *net = dev_net(skb->dev); 2448 struct net_device *dev; 2449 u32 flags = ri->flags; 2450 2451 dev = dev_get_by_index_rcu(net, ri->tgt_index); 2452 ri->tgt_index = 0; 2453 ri->flags = 0; 2454 if (unlikely(!dev)) 2455 goto out_drop; 2456 if (flags & BPF_F_PEER) { 2457 const struct net_device_ops *ops = dev->netdev_ops; 2458 2459 if (unlikely(!ops->ndo_get_peer_dev || 2460 !skb_at_tc_ingress(skb))) 2461 goto out_drop; 2462 dev = ops->ndo_get_peer_dev(dev); 2463 if (unlikely(!dev || 2464 !(dev->flags & IFF_UP) || 2465 net_eq(net, dev_net(dev)))) 2466 goto out_drop; 2467 skb->dev = dev; 2468 return -EAGAIN; 2469 } 2470 return flags & BPF_F_NEIGH ? 2471 __bpf_redirect_neigh(skb, dev, flags & BPF_F_NEXTHOP ? 2472 &ri->nh : NULL) : 2473 __bpf_redirect(skb, dev, flags); 2474 out_drop: 2475 kfree_skb(skb); 2476 return -EINVAL; 2477 } 2478 2479 BPF_CALL_2(bpf_redirect, u32, ifindex, u64, flags) 2480 { 2481 struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info); 2482 2483 if (unlikely(flags & (~(BPF_F_INGRESS) | BPF_F_REDIRECT_INTERNAL))) 2484 return TC_ACT_SHOT; 2485 2486 ri->flags = flags; 2487 ri->tgt_index = ifindex; 2488 2489 return TC_ACT_REDIRECT; 2490 } 2491 2492 static const struct bpf_func_proto bpf_redirect_proto = { 2493 .func = bpf_redirect, 2494 .gpl_only = false, 2495 .ret_type = RET_INTEGER, 2496 .arg1_type = ARG_ANYTHING, 2497 .arg2_type = ARG_ANYTHING, 2498 }; 2499 2500 BPF_CALL_2(bpf_redirect_peer, u32, ifindex, u64, flags) 2501 { 2502 struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info); 2503 2504 if (unlikely(flags)) 2505 return TC_ACT_SHOT; 2506 2507 ri->flags = BPF_F_PEER; 2508 ri->tgt_index = ifindex; 2509 2510 return TC_ACT_REDIRECT; 2511 } 2512 2513 static const struct bpf_func_proto bpf_redirect_peer_proto = { 2514 .func = bpf_redirect_peer, 2515 .gpl_only = false, 2516 .ret_type = RET_INTEGER, 2517 .arg1_type = ARG_ANYTHING, 2518 .arg2_type = ARG_ANYTHING, 2519 }; 2520 2521 BPF_CALL_4(bpf_redirect_neigh, u32, ifindex, struct bpf_redir_neigh *, params, 2522 int, plen, u64, flags) 2523 { 2524 struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info); 2525 2526 if (unlikely((plen && plen < sizeof(*params)) || flags)) 2527 return TC_ACT_SHOT; 2528 2529 ri->flags = BPF_F_NEIGH | (plen ? BPF_F_NEXTHOP : 0); 2530 ri->tgt_index = ifindex; 2531 2532 BUILD_BUG_ON(sizeof(struct bpf_redir_neigh) != sizeof(struct bpf_nh_params)); 2533 if (plen) 2534 memcpy(&ri->nh, params, sizeof(ri->nh)); 2535 2536 return TC_ACT_REDIRECT; 2537 } 2538 2539 static const struct bpf_func_proto bpf_redirect_neigh_proto = { 2540 .func = bpf_redirect_neigh, 2541 .gpl_only = false, 2542 .ret_type = RET_INTEGER, 2543 .arg1_type = ARG_ANYTHING, 2544 .arg2_type = ARG_PTR_TO_MEM | PTR_MAYBE_NULL | MEM_RDONLY, 2545 .arg3_type = ARG_CONST_SIZE_OR_ZERO, 2546 .arg4_type = ARG_ANYTHING, 2547 }; 2548 2549 BPF_CALL_2(bpf_msg_apply_bytes, struct sk_msg *, msg, u32, bytes) 2550 { 2551 msg->apply_bytes = bytes; 2552 return 0; 2553 } 2554 2555 static const struct bpf_func_proto bpf_msg_apply_bytes_proto = { 2556 .func = bpf_msg_apply_bytes, 2557 .gpl_only = false, 2558 .ret_type = RET_INTEGER, 2559 .arg1_type = ARG_PTR_TO_CTX, 2560 .arg2_type = ARG_ANYTHING, 2561 }; 2562 2563 BPF_CALL_2(bpf_msg_cork_bytes, struct sk_msg *, msg, u32, bytes) 2564 { 2565 msg->cork_bytes = bytes; 2566 return 0; 2567 } 2568 2569 static const struct bpf_func_proto bpf_msg_cork_bytes_proto = { 2570 .func = bpf_msg_cork_bytes, 2571 .gpl_only = false, 2572 .ret_type = RET_INTEGER, 2573 .arg1_type = ARG_PTR_TO_CTX, 2574 .arg2_type = ARG_ANYTHING, 2575 }; 2576 2577 BPF_CALL_4(bpf_msg_pull_data, struct sk_msg *, msg, u32, start, 2578 u32, end, u64, flags) 2579 { 2580 u32 len = 0, offset = 0, copy = 0, poffset = 0, bytes = end - start; 2581 u32 first_sge, last_sge, i, shift, bytes_sg_total; 2582 struct scatterlist *sge; 2583 u8 *raw, *to, *from; 2584 struct page *page; 2585 2586 if (unlikely(flags || end <= start)) 2587 return -EINVAL; 2588 2589 /* First find the starting scatterlist element */ 2590 i = msg->sg.start; 2591 do { 2592 offset += len; 2593 len = sk_msg_elem(msg, i)->length; 2594 if (start < offset + len) 2595 break; 2596 sk_msg_iter_var_next(i); 2597 } while (i != msg->sg.end); 2598 2599 if (unlikely(start >= offset + len)) 2600 return -EINVAL; 2601 2602 first_sge = i; 2603 /* The start may point into the sg element so we need to also 2604 * account for the headroom. 2605 */ 2606 bytes_sg_total = start - offset + bytes; 2607 if (!test_bit(i, msg->sg.copy) && bytes_sg_total <= len) 2608 goto out; 2609 2610 /* At this point we need to linearize multiple scatterlist 2611 * elements or a single shared page. Either way we need to 2612 * copy into a linear buffer exclusively owned by BPF. Then 2613 * place the buffer in the scatterlist and fixup the original 2614 * entries by removing the entries now in the linear buffer 2615 * and shifting the remaining entries. For now we do not try 2616 * to copy partial entries to avoid complexity of running out 2617 * of sg_entry slots. The downside is reading a single byte 2618 * will copy the entire sg entry. 2619 */ 2620 do { 2621 copy += sk_msg_elem(msg, i)->length; 2622 sk_msg_iter_var_next(i); 2623 if (bytes_sg_total <= copy) 2624 break; 2625 } while (i != msg->sg.end); 2626 last_sge = i; 2627 2628 if (unlikely(bytes_sg_total > copy)) 2629 return -EINVAL; 2630 2631 page = alloc_pages(__GFP_NOWARN | GFP_ATOMIC | __GFP_COMP, 2632 get_order(copy)); 2633 if (unlikely(!page)) 2634 return -ENOMEM; 2635 2636 raw = page_address(page); 2637 i = first_sge; 2638 do { 2639 sge = sk_msg_elem(msg, i); 2640 from = sg_virt(sge); 2641 len = sge->length; 2642 to = raw + poffset; 2643 2644 memcpy(to, from, len); 2645 poffset += len; 2646 sge->length = 0; 2647 put_page(sg_page(sge)); 2648 2649 sk_msg_iter_var_next(i); 2650 } while (i != last_sge); 2651 2652 sg_set_page(&msg->sg.data[first_sge], page, copy, 0); 2653 2654 /* To repair sg ring we need to shift entries. If we only 2655 * had a single entry though we can just replace it and 2656 * be done. Otherwise walk the ring and shift the entries. 2657 */ 2658 WARN_ON_ONCE(last_sge == first_sge); 2659 shift = last_sge > first_sge ? 2660 last_sge - first_sge - 1 : 2661 NR_MSG_FRAG_IDS - first_sge + last_sge - 1; 2662 if (!shift) 2663 goto out; 2664 2665 i = first_sge; 2666 sk_msg_iter_var_next(i); 2667 do { 2668 u32 move_from; 2669 2670 if (i + shift >= NR_MSG_FRAG_IDS) 2671 move_from = i + shift - NR_MSG_FRAG_IDS; 2672 else 2673 move_from = i + shift; 2674 if (move_from == msg->sg.end) 2675 break; 2676 2677 msg->sg.data[i] = msg->sg.data[move_from]; 2678 msg->sg.data[move_from].length = 0; 2679 msg->sg.data[move_from].page_link = 0; 2680 msg->sg.data[move_from].offset = 0; 2681 sk_msg_iter_var_next(i); 2682 } while (1); 2683 2684 msg->sg.end = msg->sg.end - shift > msg->sg.end ? 2685 msg->sg.end - shift + NR_MSG_FRAG_IDS : 2686 msg->sg.end - shift; 2687 out: 2688 msg->data = sg_virt(&msg->sg.data[first_sge]) + start - offset; 2689 msg->data_end = msg->data + bytes; 2690 return 0; 2691 } 2692 2693 static const struct bpf_func_proto bpf_msg_pull_data_proto = { 2694 .func = bpf_msg_pull_data, 2695 .gpl_only = false, 2696 .ret_type = RET_INTEGER, 2697 .arg1_type = ARG_PTR_TO_CTX, 2698 .arg2_type = ARG_ANYTHING, 2699 .arg3_type = ARG_ANYTHING, 2700 .arg4_type = ARG_ANYTHING, 2701 }; 2702 2703 BPF_CALL_4(bpf_msg_push_data, struct sk_msg *, msg, u32, start, 2704 u32, len, u64, flags) 2705 { 2706 struct scatterlist sge, nsge, nnsge, rsge = {0}, *psge; 2707 u32 new, i = 0, l = 0, space, copy = 0, offset = 0; 2708 u8 *raw, *to, *from; 2709 struct page *page; 2710 2711 if (unlikely(flags)) 2712 return -EINVAL; 2713 2714 if (unlikely(len == 0)) 2715 return 0; 2716 2717 /* First find the starting scatterlist element */ 2718 i = msg->sg.start; 2719 do { 2720 offset += l; 2721 l = sk_msg_elem(msg, i)->length; 2722 2723 if (start < offset + l) 2724 break; 2725 sk_msg_iter_var_next(i); 2726 } while (i != msg->sg.end); 2727 2728 if (start >= offset + l) 2729 return -EINVAL; 2730 2731 space = MAX_MSG_FRAGS - sk_msg_elem_used(msg); 2732 2733 /* If no space available will fallback to copy, we need at 2734 * least one scatterlist elem available to push data into 2735 * when start aligns to the beginning of an element or two 2736 * when it falls inside an element. We handle the start equals 2737 * offset case because its the common case for inserting a 2738 * header. 2739 */ 2740 if (!space || (space == 1 && start != offset)) 2741 copy = msg->sg.data[i].length; 2742 2743 page = alloc_pages(__GFP_NOWARN | GFP_ATOMIC | __GFP_COMP, 2744 get_order(copy + len)); 2745 if (unlikely(!page)) 2746 return -ENOMEM; 2747 2748 if (copy) { 2749 int front, back; 2750 2751 raw = page_address(page); 2752 2753 psge = sk_msg_elem(msg, i); 2754 front = start - offset; 2755 back = psge->length - front; 2756 from = sg_virt(psge); 2757 2758 if (front) 2759 memcpy(raw, from, front); 2760 2761 if (back) { 2762 from += front; 2763 to = raw + front + len; 2764 2765 memcpy(to, from, back); 2766 } 2767 2768 put_page(sg_page(psge)); 2769 } else if (start - offset) { 2770 psge = sk_msg_elem(msg, i); 2771 rsge = sk_msg_elem_cpy(msg, i); 2772 2773 psge->length = start - offset; 2774 rsge.length -= psge->length; 2775 rsge.offset += start; 2776 2777 sk_msg_iter_var_next(i); 2778 sg_unmark_end(psge); 2779 sg_unmark_end(&rsge); 2780 sk_msg_iter_next(msg, end); 2781 } 2782 2783 /* Slot(s) to place newly allocated data */ 2784 new = i; 2785 2786 /* Shift one or two slots as needed */ 2787 if (!copy) { 2788 sge = sk_msg_elem_cpy(msg, i); 2789 2790 sk_msg_iter_var_next(i); 2791 sg_unmark_end(&sge); 2792 sk_msg_iter_next(msg, end); 2793 2794 nsge = sk_msg_elem_cpy(msg, i); 2795 if (rsge.length) { 2796 sk_msg_iter_var_next(i); 2797 nnsge = sk_msg_elem_cpy(msg, i); 2798 } 2799 2800 while (i != msg->sg.end) { 2801 msg->sg.data[i] = sge; 2802 sge = nsge; 2803 sk_msg_iter_var_next(i); 2804 if (rsge.length) { 2805 nsge = nnsge; 2806 nnsge = sk_msg_elem_cpy(msg, i); 2807 } else { 2808 nsge = sk_msg_elem_cpy(msg, i); 2809 } 2810 } 2811 } 2812 2813 /* Place newly allocated data buffer */ 2814 sk_mem_charge(msg->sk, len); 2815 msg->sg.size += len; 2816 __clear_bit(new, msg->sg.copy); 2817 sg_set_page(&msg->sg.data[new], page, len + copy, 0); 2818 if (rsge.length) { 2819 get_page(sg_page(&rsge)); 2820 sk_msg_iter_var_next(new); 2821 msg->sg.data[new] = rsge; 2822 } 2823 2824 sk_msg_compute_data_pointers(msg); 2825 return 0; 2826 } 2827 2828 static const struct bpf_func_proto bpf_msg_push_data_proto = { 2829 .func = bpf_msg_push_data, 2830 .gpl_only = false, 2831 .ret_type = RET_INTEGER, 2832 .arg1_type = ARG_PTR_TO_CTX, 2833 .arg2_type = ARG_ANYTHING, 2834 .arg3_type = ARG_ANYTHING, 2835 .arg4_type = ARG_ANYTHING, 2836 }; 2837 2838 static void sk_msg_shift_left(struct sk_msg *msg, int i) 2839 { 2840 int prev; 2841 2842 do { 2843 prev = i; 2844 sk_msg_iter_var_next(i); 2845 msg->sg.data[prev] = msg->sg.data[i]; 2846 } while (i != msg->sg.end); 2847 2848 sk_msg_iter_prev(msg, end); 2849 } 2850 2851 static void sk_msg_shift_right(struct sk_msg *msg, int i) 2852 { 2853 struct scatterlist tmp, sge; 2854 2855 sk_msg_iter_next(msg, end); 2856 sge = sk_msg_elem_cpy(msg, i); 2857 sk_msg_iter_var_next(i); 2858 tmp = sk_msg_elem_cpy(msg, i); 2859 2860 while (i != msg->sg.end) { 2861 msg->sg.data[i] = sge; 2862 sk_msg_iter_var_next(i); 2863 sge = tmp; 2864 tmp = sk_msg_elem_cpy(msg, i); 2865 } 2866 } 2867 2868 BPF_CALL_4(bpf_msg_pop_data, struct sk_msg *, msg, u32, start, 2869 u32, len, u64, flags) 2870 { 2871 u32 i = 0, l = 0, space, offset = 0; 2872 u64 last = start + len; 2873 int pop; 2874 2875 if (unlikely(flags)) 2876 return -EINVAL; 2877 2878 /* First find the starting scatterlist element */ 2879 i = msg->sg.start; 2880 do { 2881 offset += l; 2882 l = sk_msg_elem(msg, i)->length; 2883 2884 if (start < offset + l) 2885 break; 2886 sk_msg_iter_var_next(i); 2887 } while (i != msg->sg.end); 2888 2889 /* Bounds checks: start and pop must be inside message */ 2890 if (start >= offset + l || last >= msg->sg.size) 2891 return -EINVAL; 2892 2893 space = MAX_MSG_FRAGS - sk_msg_elem_used(msg); 2894 2895 pop = len; 2896 /* --------------| offset 2897 * -| start |-------- len -------| 2898 * 2899 * |----- a ----|-------- pop -------|----- b ----| 2900 * |______________________________________________| length 2901 * 2902 * 2903 * a: region at front of scatter element to save 2904 * b: region at back of scatter element to save when length > A + pop 2905 * pop: region to pop from element, same as input 'pop' here will be 2906 * decremented below per iteration. 2907 * 2908 * Two top-level cases to handle when start != offset, first B is non 2909 * zero and second B is zero corresponding to when a pop includes more 2910 * than one element. 2911 * 2912 * Then if B is non-zero AND there is no space allocate space and 2913 * compact A, B regions into page. If there is space shift ring to 2914 * the rigth free'ing the next element in ring to place B, leaving 2915 * A untouched except to reduce length. 2916 */ 2917 if (start != offset) { 2918 struct scatterlist *nsge, *sge = sk_msg_elem(msg, i); 2919 int a = start; 2920 int b = sge->length - pop - a; 2921 2922 sk_msg_iter_var_next(i); 2923 2924 if (pop < sge->length - a) { 2925 if (space) { 2926 sge->length = a; 2927 sk_msg_shift_right(msg, i); 2928 nsge = sk_msg_elem(msg, i); 2929 get_page(sg_page(sge)); 2930 sg_set_page(nsge, 2931 sg_page(sge), 2932 b, sge->offset + pop + a); 2933 } else { 2934 struct page *page, *orig; 2935 u8 *to, *from; 2936 2937 page = alloc_pages(__GFP_NOWARN | 2938 __GFP_COMP | GFP_ATOMIC, 2939 get_order(a + b)); 2940 if (unlikely(!page)) 2941 return -ENOMEM; 2942 2943 sge->length = a; 2944 orig = sg_page(sge); 2945 from = sg_virt(sge); 2946 to = page_address(page); 2947 memcpy(to, from, a); 2948 memcpy(to + a, from + a + pop, b); 2949 sg_set_page(sge, page, a + b, 0); 2950 put_page(orig); 2951 } 2952 pop = 0; 2953 } else if (pop >= sge->length - a) { 2954 pop -= (sge->length - a); 2955 sge->length = a; 2956 } 2957 } 2958 2959 /* From above the current layout _must_ be as follows, 2960 * 2961 * -| offset 2962 * -| start 2963 * 2964 * |---- pop ---|---------------- b ------------| 2965 * |____________________________________________| length 2966 * 2967 * Offset and start of the current msg elem are equal because in the 2968 * previous case we handled offset != start and either consumed the 2969 * entire element and advanced to the next element OR pop == 0. 2970 * 2971 * Two cases to handle here are first pop is less than the length 2972 * leaving some remainder b above. Simply adjust the element's layout 2973 * in this case. Or pop >= length of the element so that b = 0. In this 2974 * case advance to next element decrementing pop. 2975 */ 2976 while (pop) { 2977 struct scatterlist *sge = sk_msg_elem(msg, i); 2978 2979 if (pop < sge->length) { 2980 sge->length -= pop; 2981 sge->offset += pop; 2982 pop = 0; 2983 } else { 2984 pop -= sge->length; 2985 sk_msg_shift_left(msg, i); 2986 } 2987 sk_msg_iter_var_next(i); 2988 } 2989 2990 sk_mem_uncharge(msg->sk, len - pop); 2991 msg->sg.size -= (len - pop); 2992 sk_msg_compute_data_pointers(msg); 2993 return 0; 2994 } 2995 2996 static const struct bpf_func_proto bpf_msg_pop_data_proto = { 2997 .func = bpf_msg_pop_data, 2998 .gpl_only = false, 2999 .ret_type = RET_INTEGER, 3000 .arg1_type = ARG_PTR_TO_CTX, 3001 .arg2_type = ARG_ANYTHING, 3002 .arg3_type = ARG_ANYTHING, 3003 .arg4_type = ARG_ANYTHING, 3004 }; 3005 3006 #ifdef CONFIG_CGROUP_NET_CLASSID 3007 BPF_CALL_0(bpf_get_cgroup_classid_curr) 3008 { 3009 return __task_get_classid(current); 3010 } 3011 3012 static const struct bpf_func_proto bpf_get_cgroup_classid_curr_proto = { 3013 .func = bpf_get_cgroup_classid_curr, 3014 .gpl_only = false, 3015 .ret_type = RET_INTEGER, 3016 }; 3017 3018 BPF_CALL_1(bpf_skb_cgroup_classid, const struct sk_buff *, skb) 3019 { 3020 struct sock *sk = skb_to_full_sk(skb); 3021 3022 if (!sk || !sk_fullsock(sk)) 3023 return 0; 3024 3025 return sock_cgroup_classid(&sk->sk_cgrp_data); 3026 } 3027 3028 static const struct bpf_func_proto bpf_skb_cgroup_classid_proto = { 3029 .func = bpf_skb_cgroup_classid, 3030 .gpl_only = false, 3031 .ret_type = RET_INTEGER, 3032 .arg1_type = ARG_PTR_TO_CTX, 3033 }; 3034 #endif 3035 3036 BPF_CALL_1(bpf_get_cgroup_classid, const struct sk_buff *, skb) 3037 { 3038 return task_get_classid(skb); 3039 } 3040 3041 static const struct bpf_func_proto bpf_get_cgroup_classid_proto = { 3042 .func = bpf_get_cgroup_classid, 3043 .gpl_only = false, 3044 .ret_type = RET_INTEGER, 3045 .arg1_type = ARG_PTR_TO_CTX, 3046 }; 3047 3048 BPF_CALL_1(bpf_get_route_realm, const struct sk_buff *, skb) 3049 { 3050 return dst_tclassid(skb); 3051 } 3052 3053 static const struct bpf_func_proto bpf_get_route_realm_proto = { 3054 .func = bpf_get_route_realm, 3055 .gpl_only = false, 3056 .ret_type = RET_INTEGER, 3057 .arg1_type = ARG_PTR_TO_CTX, 3058 }; 3059 3060 BPF_CALL_1(bpf_get_hash_recalc, struct sk_buff *, skb) 3061 { 3062 /* If skb_clear_hash() was called due to mangling, we can 3063 * trigger SW recalculation here. Later access to hash 3064 * can then use the inline skb->hash via context directly 3065 * instead of calling this helper again. 3066 */ 3067 return skb_get_hash(skb); 3068 } 3069 3070 static const struct bpf_func_proto bpf_get_hash_recalc_proto = { 3071 .func = bpf_get_hash_recalc, 3072 .gpl_only = false, 3073 .ret_type = RET_INTEGER, 3074 .arg1_type = ARG_PTR_TO_CTX, 3075 }; 3076 3077 BPF_CALL_1(bpf_set_hash_invalid, struct sk_buff *, skb) 3078 { 3079 /* After all direct packet write, this can be used once for 3080 * triggering a lazy recalc on next skb_get_hash() invocation. 3081 */ 3082 skb_clear_hash(skb); 3083 return 0; 3084 } 3085 3086 static const struct bpf_func_proto bpf_set_hash_invalid_proto = { 3087 .func = bpf_set_hash_invalid, 3088 .gpl_only = false, 3089 .ret_type = RET_INTEGER, 3090 .arg1_type = ARG_PTR_TO_CTX, 3091 }; 3092 3093 BPF_CALL_2(bpf_set_hash, struct sk_buff *, skb, u32, hash) 3094 { 3095 /* Set user specified hash as L4(+), so that it gets returned 3096 * on skb_get_hash() call unless BPF prog later on triggers a 3097 * skb_clear_hash(). 3098 */ 3099 __skb_set_sw_hash(skb, hash, true); 3100 return 0; 3101 } 3102 3103 static const struct bpf_func_proto bpf_set_hash_proto = { 3104 .func = bpf_set_hash, 3105 .gpl_only = false, 3106 .ret_type = RET_INTEGER, 3107 .arg1_type = ARG_PTR_TO_CTX, 3108 .arg2_type = ARG_ANYTHING, 3109 }; 3110 3111 BPF_CALL_3(bpf_skb_vlan_push, struct sk_buff *, skb, __be16, vlan_proto, 3112 u16, vlan_tci) 3113 { 3114 int ret; 3115 3116 if (unlikely(vlan_proto != htons(ETH_P_8021Q) && 3117 vlan_proto != htons(ETH_P_8021AD))) 3118 vlan_proto = htons(ETH_P_8021Q); 3119 3120 bpf_push_mac_rcsum(skb); 3121 ret = skb_vlan_push(skb, vlan_proto, vlan_tci); 3122 bpf_pull_mac_rcsum(skb); 3123 3124 bpf_compute_data_pointers(skb); 3125 return ret; 3126 } 3127 3128 static const struct bpf_func_proto bpf_skb_vlan_push_proto = { 3129 .func = bpf_skb_vlan_push, 3130 .gpl_only = false, 3131 .ret_type = RET_INTEGER, 3132 .arg1_type = ARG_PTR_TO_CTX, 3133 .arg2_type = ARG_ANYTHING, 3134 .arg3_type = ARG_ANYTHING, 3135 }; 3136 3137 BPF_CALL_1(bpf_skb_vlan_pop, struct sk_buff *, skb) 3138 { 3139 int ret; 3140 3141 bpf_push_mac_rcsum(skb); 3142 ret = skb_vlan_pop(skb); 3143 bpf_pull_mac_rcsum(skb); 3144 3145 bpf_compute_data_pointers(skb); 3146 return ret; 3147 } 3148 3149 static const struct bpf_func_proto bpf_skb_vlan_pop_proto = { 3150 .func = bpf_skb_vlan_pop, 3151 .gpl_only = false, 3152 .ret_type = RET_INTEGER, 3153 .arg1_type = ARG_PTR_TO_CTX, 3154 }; 3155 3156 static int bpf_skb_generic_push(struct sk_buff *skb, u32 off, u32 len) 3157 { 3158 /* Caller already did skb_cow() with len as headroom, 3159 * so no need to do it here. 3160 */ 3161 skb_push(skb, len); 3162 memmove(skb->data, skb->data + len, off); 3163 memset(skb->data + off, 0, len); 3164 3165 /* No skb_postpush_rcsum(skb, skb->data + off, len) 3166 * needed here as it does not change the skb->csum 3167 * result for checksum complete when summing over 3168 * zeroed blocks. 3169 */ 3170 return 0; 3171 } 3172 3173 static int bpf_skb_generic_pop(struct sk_buff *skb, u32 off, u32 len) 3174 { 3175 /* skb_ensure_writable() is not needed here, as we're 3176 * already working on an uncloned skb. 3177 */ 3178 if (unlikely(!pskb_may_pull(skb, off + len))) 3179 return -ENOMEM; 3180 3181 skb_postpull_rcsum(skb, skb->data + off, len); 3182 memmove(skb->data + len, skb->data, off); 3183 __skb_pull(skb, len); 3184 3185 return 0; 3186 } 3187 3188 static int bpf_skb_net_hdr_push(struct sk_buff *skb, u32 off, u32 len) 3189 { 3190 bool trans_same = skb->transport_header == skb->network_header; 3191 int ret; 3192 3193 /* There's no need for __skb_push()/__skb_pull() pair to 3194 * get to the start of the mac header as we're guaranteed 3195 * to always start from here under eBPF. 3196 */ 3197 ret = bpf_skb_generic_push(skb, off, len); 3198 if (likely(!ret)) { 3199 skb->mac_header -= len; 3200 skb->network_header -= len; 3201 if (trans_same) 3202 skb->transport_header = skb->network_header; 3203 } 3204 3205 return ret; 3206 } 3207 3208 static int bpf_skb_net_hdr_pop(struct sk_buff *skb, u32 off, u32 len) 3209 { 3210 bool trans_same = skb->transport_header == skb->network_header; 3211 int ret; 3212 3213 /* Same here, __skb_push()/__skb_pull() pair not needed. */ 3214 ret = bpf_skb_generic_pop(skb, off, len); 3215 if (likely(!ret)) { 3216 skb->mac_header += len; 3217 skb->network_header += len; 3218 if (trans_same) 3219 skb->transport_header = skb->network_header; 3220 } 3221 3222 return ret; 3223 } 3224 3225 static int bpf_skb_proto_4_to_6(struct sk_buff *skb) 3226 { 3227 const u32 len_diff = sizeof(struct ipv6hdr) - sizeof(struct iphdr); 3228 u32 off = skb_mac_header_len(skb); 3229 int ret; 3230 3231 ret = skb_cow(skb, len_diff); 3232 if (unlikely(ret < 0)) 3233 return ret; 3234 3235 ret = bpf_skb_net_hdr_push(skb, off, len_diff); 3236 if (unlikely(ret < 0)) 3237 return ret; 3238 3239 if (skb_is_gso(skb)) { 3240 struct skb_shared_info *shinfo = skb_shinfo(skb); 3241 3242 /* SKB_GSO_TCPV4 needs to be changed into SKB_GSO_TCPV6. */ 3243 if (shinfo->gso_type & SKB_GSO_TCPV4) { 3244 shinfo->gso_type &= ~SKB_GSO_TCPV4; 3245 shinfo->gso_type |= SKB_GSO_TCPV6; 3246 } 3247 } 3248 3249 skb->protocol = htons(ETH_P_IPV6); 3250 skb_clear_hash(skb); 3251 3252 return 0; 3253 } 3254 3255 static int bpf_skb_proto_6_to_4(struct sk_buff *skb) 3256 { 3257 const u32 len_diff = sizeof(struct ipv6hdr) - sizeof(struct iphdr); 3258 u32 off = skb_mac_header_len(skb); 3259 int ret; 3260 3261 ret = skb_unclone(skb, GFP_ATOMIC); 3262 if (unlikely(ret < 0)) 3263 return ret; 3264 3265 ret = bpf_skb_net_hdr_pop(skb, off, len_diff); 3266 if (unlikely(ret < 0)) 3267 return ret; 3268 3269 if (skb_is_gso(skb)) { 3270 struct skb_shared_info *shinfo = skb_shinfo(skb); 3271 3272 /* SKB_GSO_TCPV6 needs to be changed into SKB_GSO_TCPV4. */ 3273 if (shinfo->gso_type & SKB_GSO_TCPV6) { 3274 shinfo->gso_type &= ~SKB_GSO_TCPV6; 3275 shinfo->gso_type |= SKB_GSO_TCPV4; 3276 } 3277 } 3278 3279 skb->protocol = htons(ETH_P_IP); 3280 skb_clear_hash(skb); 3281 3282 return 0; 3283 } 3284 3285 static int bpf_skb_proto_xlat(struct sk_buff *skb, __be16 to_proto) 3286 { 3287 __be16 from_proto = skb->protocol; 3288 3289 if (from_proto == htons(ETH_P_IP) && 3290 to_proto == htons(ETH_P_IPV6)) 3291 return bpf_skb_proto_4_to_6(skb); 3292 3293 if (from_proto == htons(ETH_P_IPV6) && 3294 to_proto == htons(ETH_P_IP)) 3295 return bpf_skb_proto_6_to_4(skb); 3296 3297 return -ENOTSUPP; 3298 } 3299 3300 BPF_CALL_3(bpf_skb_change_proto, struct sk_buff *, skb, __be16, proto, 3301 u64, flags) 3302 { 3303 int ret; 3304 3305 if (unlikely(flags)) 3306 return -EINVAL; 3307 3308 /* General idea is that this helper does the basic groundwork 3309 * needed for changing the protocol, and eBPF program fills the 3310 * rest through bpf_skb_store_bytes(), bpf_lX_csum_replace() 3311 * and other helpers, rather than passing a raw buffer here. 3312 * 3313 * The rationale is to keep this minimal and without a need to 3314 * deal with raw packet data. F.e. even if we would pass buffers 3315 * here, the program still needs to call the bpf_lX_csum_replace() 3316 * helpers anyway. Plus, this way we keep also separation of 3317 * concerns, since f.e. bpf_skb_store_bytes() should only take 3318 * care of stores. 3319 * 3320 * Currently, additional options and extension header space are 3321 * not supported, but flags register is reserved so we can adapt 3322 * that. For offloads, we mark packet as dodgy, so that headers 3323 * need to be verified first. 3324 */ 3325 ret = bpf_skb_proto_xlat(skb, proto); 3326 bpf_compute_data_pointers(skb); 3327 return ret; 3328 } 3329 3330 static const struct bpf_func_proto bpf_skb_change_proto_proto = { 3331 .func = bpf_skb_change_proto, 3332 .gpl_only = false, 3333 .ret_type = RET_INTEGER, 3334 .arg1_type = ARG_PTR_TO_CTX, 3335 .arg2_type = ARG_ANYTHING, 3336 .arg3_type = ARG_ANYTHING, 3337 }; 3338 3339 BPF_CALL_2(bpf_skb_change_type, struct sk_buff *, skb, u32, pkt_type) 3340 { 3341 /* We only allow a restricted subset to be changed for now. */ 3342 if (unlikely(!skb_pkt_type_ok(skb->pkt_type) || 3343 !skb_pkt_type_ok(pkt_type))) 3344 return -EINVAL; 3345 3346 skb->pkt_type = pkt_type; 3347 return 0; 3348 } 3349 3350 static const struct bpf_func_proto bpf_skb_change_type_proto = { 3351 .func = bpf_skb_change_type, 3352 .gpl_only = false, 3353 .ret_type = RET_INTEGER, 3354 .arg1_type = ARG_PTR_TO_CTX, 3355 .arg2_type = ARG_ANYTHING, 3356 }; 3357 3358 static u32 bpf_skb_net_base_len(const struct sk_buff *skb) 3359 { 3360 switch (skb->protocol) { 3361 case htons(ETH_P_IP): 3362 return sizeof(struct iphdr); 3363 case htons(ETH_P_IPV6): 3364 return sizeof(struct ipv6hdr); 3365 default: 3366 return ~0U; 3367 } 3368 } 3369 3370 #define BPF_F_ADJ_ROOM_ENCAP_L3_MASK (BPF_F_ADJ_ROOM_ENCAP_L3_IPV4 | \ 3371 BPF_F_ADJ_ROOM_ENCAP_L3_IPV6) 3372 3373 #define BPF_F_ADJ_ROOM_MASK (BPF_F_ADJ_ROOM_FIXED_GSO | \ 3374 BPF_F_ADJ_ROOM_ENCAP_L3_MASK | \ 3375 BPF_F_ADJ_ROOM_ENCAP_L4_GRE | \ 3376 BPF_F_ADJ_ROOM_ENCAP_L4_UDP | \ 3377 BPF_F_ADJ_ROOM_ENCAP_L2_ETH | \ 3378 BPF_F_ADJ_ROOM_ENCAP_L2( \ 3379 BPF_ADJ_ROOM_ENCAP_L2_MASK)) 3380 3381 static int bpf_skb_net_grow(struct sk_buff *skb, u32 off, u32 len_diff, 3382 u64 flags) 3383 { 3384 u8 inner_mac_len = flags >> BPF_ADJ_ROOM_ENCAP_L2_SHIFT; 3385 bool encap = flags & BPF_F_ADJ_ROOM_ENCAP_L3_MASK; 3386 u16 mac_len = 0, inner_net = 0, inner_trans = 0; 3387 unsigned int gso_type = SKB_GSO_DODGY; 3388 int ret; 3389 3390 if (skb_is_gso(skb) && !skb_is_gso_tcp(skb)) { 3391 /* udp gso_size delineates datagrams, only allow if fixed */ 3392 if (!(skb_shinfo(skb)->gso_type & SKB_GSO_UDP_L4) || 3393 !(flags & BPF_F_ADJ_ROOM_FIXED_GSO)) 3394 return -ENOTSUPP; 3395 } 3396 3397 ret = skb_cow_head(skb, len_diff); 3398 if (unlikely(ret < 0)) 3399 return ret; 3400 3401 if (encap) { 3402 if (skb->protocol != htons(ETH_P_IP) && 3403 skb->protocol != htons(ETH_P_IPV6)) 3404 return -ENOTSUPP; 3405 3406 if (flags & BPF_F_ADJ_ROOM_ENCAP_L3_IPV4 && 3407 flags & BPF_F_ADJ_ROOM_ENCAP_L3_IPV6) 3408 return -EINVAL; 3409 3410 if (flags & BPF_F_ADJ_ROOM_ENCAP_L4_GRE && 3411 flags & BPF_F_ADJ_ROOM_ENCAP_L4_UDP) 3412 return -EINVAL; 3413 3414 if (flags & BPF_F_ADJ_ROOM_ENCAP_L2_ETH && 3415 inner_mac_len < ETH_HLEN) 3416 return -EINVAL; 3417 3418 if (skb->encapsulation) 3419 return -EALREADY; 3420 3421 mac_len = skb->network_header - skb->mac_header; 3422 inner_net = skb->network_header; 3423 if (inner_mac_len > len_diff) 3424 return -EINVAL; 3425 inner_trans = skb->transport_header; 3426 } 3427 3428 ret = bpf_skb_net_hdr_push(skb, off, len_diff); 3429 if (unlikely(ret < 0)) 3430 return ret; 3431 3432 if (encap) { 3433 skb->inner_mac_header = inner_net - inner_mac_len; 3434 skb->inner_network_header = inner_net; 3435 skb->inner_transport_header = inner_trans; 3436 3437 if (flags & BPF_F_ADJ_ROOM_ENCAP_L2_ETH) 3438 skb_set_inner_protocol(skb, htons(ETH_P_TEB)); 3439 else 3440 skb_set_inner_protocol(skb, skb->protocol); 3441 3442 skb->encapsulation = 1; 3443 skb_set_network_header(skb, mac_len); 3444 3445 if (flags & BPF_F_ADJ_ROOM_ENCAP_L4_UDP) 3446 gso_type |= SKB_GSO_UDP_TUNNEL; 3447 else if (flags & BPF_F_ADJ_ROOM_ENCAP_L4_GRE) 3448 gso_type |= SKB_GSO_GRE; 3449 else if (flags & BPF_F_ADJ_ROOM_ENCAP_L3_IPV6) 3450 gso_type |= SKB_GSO_IPXIP6; 3451 else if (flags & BPF_F_ADJ_ROOM_ENCAP_L3_IPV4) 3452 gso_type |= SKB_GSO_IPXIP4; 3453 3454 if (flags & BPF_F_ADJ_ROOM_ENCAP_L4_GRE || 3455 flags & BPF_F_ADJ_ROOM_ENCAP_L4_UDP) { 3456 int nh_len = flags & BPF_F_ADJ_ROOM_ENCAP_L3_IPV6 ? 3457 sizeof(struct ipv6hdr) : 3458 sizeof(struct iphdr); 3459 3460 skb_set_transport_header(skb, mac_len + nh_len); 3461 } 3462 3463 /* Match skb->protocol to new outer l3 protocol */ 3464 if (skb->protocol == htons(ETH_P_IP) && 3465 flags & BPF_F_ADJ_ROOM_ENCAP_L3_IPV6) 3466 skb->protocol = htons(ETH_P_IPV6); 3467 else if (skb->protocol == htons(ETH_P_IPV6) && 3468 flags & BPF_F_ADJ_ROOM_ENCAP_L3_IPV4) 3469 skb->protocol = htons(ETH_P_IP); 3470 } 3471 3472 if (skb_is_gso(skb)) { 3473 struct skb_shared_info *shinfo = skb_shinfo(skb); 3474 3475 /* Due to header grow, MSS needs to be downgraded. */ 3476 if (!(flags & BPF_F_ADJ_ROOM_FIXED_GSO)) 3477 skb_decrease_gso_size(shinfo, len_diff); 3478 3479 /* Header must be checked, and gso_segs recomputed. */ 3480 shinfo->gso_type |= gso_type; 3481 shinfo->gso_segs = 0; 3482 } 3483 3484 return 0; 3485 } 3486 3487 static int bpf_skb_net_shrink(struct sk_buff *skb, u32 off, u32 len_diff, 3488 u64 flags) 3489 { 3490 int ret; 3491 3492 if (unlikely(flags & ~(BPF_F_ADJ_ROOM_FIXED_GSO | 3493 BPF_F_ADJ_ROOM_NO_CSUM_RESET))) 3494 return -EINVAL; 3495 3496 if (skb_is_gso(skb) && !skb_is_gso_tcp(skb)) { 3497 /* udp gso_size delineates datagrams, only allow if fixed */ 3498 if (!(skb_shinfo(skb)->gso_type & SKB_GSO_UDP_L4) || 3499 !(flags & BPF_F_ADJ_ROOM_FIXED_GSO)) 3500 return -ENOTSUPP; 3501 } 3502 3503 ret = skb_unclone(skb, GFP_ATOMIC); 3504 if (unlikely(ret < 0)) 3505 return ret; 3506 3507 ret = bpf_skb_net_hdr_pop(skb, off, len_diff); 3508 if (unlikely(ret < 0)) 3509 return ret; 3510 3511 if (skb_is_gso(skb)) { 3512 struct skb_shared_info *shinfo = skb_shinfo(skb); 3513 3514 /* Due to header shrink, MSS can be upgraded. */ 3515 if (!(flags & BPF_F_ADJ_ROOM_FIXED_GSO)) 3516 skb_increase_gso_size(shinfo, len_diff); 3517 3518 /* Header must be checked, and gso_segs recomputed. */ 3519 shinfo->gso_type |= SKB_GSO_DODGY; 3520 shinfo->gso_segs = 0; 3521 } 3522 3523 return 0; 3524 } 3525 3526 #define BPF_SKB_MAX_LEN SKB_MAX_ALLOC 3527 3528 BPF_CALL_4(sk_skb_adjust_room, struct sk_buff *, skb, s32, len_diff, 3529 u32, mode, u64, flags) 3530 { 3531 u32 len_diff_abs = abs(len_diff); 3532 bool shrink = len_diff < 0; 3533 int ret = 0; 3534 3535 if (unlikely(flags || mode)) 3536 return -EINVAL; 3537 if (unlikely(len_diff_abs > 0xfffU)) 3538 return -EFAULT; 3539 3540 if (!shrink) { 3541 ret = skb_cow(skb, len_diff); 3542 if (unlikely(ret < 0)) 3543 return ret; 3544 __skb_push(skb, len_diff_abs); 3545 memset(skb->data, 0, len_diff_abs); 3546 } else { 3547 if (unlikely(!pskb_may_pull(skb, len_diff_abs))) 3548 return -ENOMEM; 3549 __skb_pull(skb, len_diff_abs); 3550 } 3551 if (tls_sw_has_ctx_rx(skb->sk)) { 3552 struct strp_msg *rxm = strp_msg(skb); 3553 3554 rxm->full_len += len_diff; 3555 } 3556 return ret; 3557 } 3558 3559 static const struct bpf_func_proto sk_skb_adjust_room_proto = { 3560 .func = sk_skb_adjust_room, 3561 .gpl_only = false, 3562 .ret_type = RET_INTEGER, 3563 .arg1_type = ARG_PTR_TO_CTX, 3564 .arg2_type = ARG_ANYTHING, 3565 .arg3_type = ARG_ANYTHING, 3566 .arg4_type = ARG_ANYTHING, 3567 }; 3568 3569 BPF_CALL_4(bpf_skb_adjust_room, struct sk_buff *, skb, s32, len_diff, 3570 u32, mode, u64, flags) 3571 { 3572 u32 len_cur, len_diff_abs = abs(len_diff); 3573 u32 len_min = bpf_skb_net_base_len(skb); 3574 u32 len_max = BPF_SKB_MAX_LEN; 3575 __be16 proto = skb->protocol; 3576 bool shrink = len_diff < 0; 3577 u32 off; 3578 int ret; 3579 3580 if (unlikely(flags & ~(BPF_F_ADJ_ROOM_MASK | 3581 BPF_F_ADJ_ROOM_NO_CSUM_RESET))) 3582 return -EINVAL; 3583 if (unlikely(len_diff_abs > 0xfffU)) 3584 return -EFAULT; 3585 if (unlikely(proto != htons(ETH_P_IP) && 3586 proto != htons(ETH_P_IPV6))) 3587 return -ENOTSUPP; 3588 3589 off = skb_mac_header_len(skb); 3590 switch (mode) { 3591 case BPF_ADJ_ROOM_NET: 3592 off += bpf_skb_net_base_len(skb); 3593 break; 3594 case BPF_ADJ_ROOM_MAC: 3595 break; 3596 default: 3597 return -ENOTSUPP; 3598 } 3599 3600 len_cur = skb->len - skb_network_offset(skb); 3601 if ((shrink && (len_diff_abs >= len_cur || 3602 len_cur - len_diff_abs < len_min)) || 3603 (!shrink && (skb->len + len_diff_abs > len_max && 3604 !skb_is_gso(skb)))) 3605 return -ENOTSUPP; 3606 3607 ret = shrink ? bpf_skb_net_shrink(skb, off, len_diff_abs, flags) : 3608 bpf_skb_net_grow(skb, off, len_diff_abs, flags); 3609 if (!ret && !(flags & BPF_F_ADJ_ROOM_NO_CSUM_RESET)) 3610 __skb_reset_checksum_unnecessary(skb); 3611 3612 bpf_compute_data_pointers(skb); 3613 return ret; 3614 } 3615 3616 static const struct bpf_func_proto bpf_skb_adjust_room_proto = { 3617 .func = bpf_skb_adjust_room, 3618 .gpl_only = false, 3619 .ret_type = RET_INTEGER, 3620 .arg1_type = ARG_PTR_TO_CTX, 3621 .arg2_type = ARG_ANYTHING, 3622 .arg3_type = ARG_ANYTHING, 3623 .arg4_type = ARG_ANYTHING, 3624 }; 3625 3626 static u32 __bpf_skb_min_len(const struct sk_buff *skb) 3627 { 3628 u32 min_len = skb_network_offset(skb); 3629 3630 if (skb_transport_header_was_set(skb)) 3631 min_len = skb_transport_offset(skb); 3632 if (skb->ip_summed == CHECKSUM_PARTIAL) 3633 min_len = skb_checksum_start_offset(skb) + 3634 skb->csum_offset + sizeof(__sum16); 3635 return min_len; 3636 } 3637 3638 static int bpf_skb_grow_rcsum(struct sk_buff *skb, unsigned int new_len) 3639 { 3640 unsigned int old_len = skb->len; 3641 int ret; 3642 3643 ret = __skb_grow_rcsum(skb, new_len); 3644 if (!ret) 3645 memset(skb->data + old_len, 0, new_len - old_len); 3646 return ret; 3647 } 3648 3649 static int bpf_skb_trim_rcsum(struct sk_buff *skb, unsigned int new_len) 3650 { 3651 return __skb_trim_rcsum(skb, new_len); 3652 } 3653 3654 static inline int __bpf_skb_change_tail(struct sk_buff *skb, u32 new_len, 3655 u64 flags) 3656 { 3657 u32 max_len = BPF_SKB_MAX_LEN; 3658 u32 min_len = __bpf_skb_min_len(skb); 3659 int ret; 3660 3661 if (unlikely(flags || new_len > max_len || new_len < min_len)) 3662 return -EINVAL; 3663 if (skb->encapsulation) 3664 return -ENOTSUPP; 3665 3666 /* The basic idea of this helper is that it's performing the 3667 * needed work to either grow or trim an skb, and eBPF program 3668 * rewrites the rest via helpers like bpf_skb_store_bytes(), 3669 * bpf_lX_csum_replace() and others rather than passing a raw 3670 * buffer here. This one is a slow path helper and intended 3671 * for replies with control messages. 3672 * 3673 * Like in bpf_skb_change_proto(), we want to keep this rather 3674 * minimal and without protocol specifics so that we are able 3675 * to separate concerns as in bpf_skb_store_bytes() should only 3676 * be the one responsible for writing buffers. 3677 * 3678 * It's really expected to be a slow path operation here for 3679 * control message replies, so we're implicitly linearizing, 3680 * uncloning and drop offloads from the skb by this. 3681 */ 3682 ret = __bpf_try_make_writable(skb, skb->len); 3683 if (!ret) { 3684 if (new_len > skb->len) 3685 ret = bpf_skb_grow_rcsum(skb, new_len); 3686 else if (new_len < skb->len) 3687 ret = bpf_skb_trim_rcsum(skb, new_len); 3688 if (!ret && skb_is_gso(skb)) 3689 skb_gso_reset(skb); 3690 } 3691 return ret; 3692 } 3693 3694 BPF_CALL_3(bpf_skb_change_tail, struct sk_buff *, skb, u32, new_len, 3695 u64, flags) 3696 { 3697 int ret = __bpf_skb_change_tail(skb, new_len, flags); 3698 3699 bpf_compute_data_pointers(skb); 3700 return ret; 3701 } 3702 3703 static const struct bpf_func_proto bpf_skb_change_tail_proto = { 3704 .func = bpf_skb_change_tail, 3705 .gpl_only = false, 3706 .ret_type = RET_INTEGER, 3707 .arg1_type = ARG_PTR_TO_CTX, 3708 .arg2_type = ARG_ANYTHING, 3709 .arg3_type = ARG_ANYTHING, 3710 }; 3711 3712 BPF_CALL_3(sk_skb_change_tail, struct sk_buff *, skb, u32, new_len, 3713 u64, flags) 3714 { 3715 return __bpf_skb_change_tail(skb, new_len, flags); 3716 } 3717 3718 static const struct bpf_func_proto sk_skb_change_tail_proto = { 3719 .func = sk_skb_change_tail, 3720 .gpl_only = false, 3721 .ret_type = RET_INTEGER, 3722 .arg1_type = ARG_PTR_TO_CTX, 3723 .arg2_type = ARG_ANYTHING, 3724 .arg3_type = ARG_ANYTHING, 3725 }; 3726 3727 static inline int __bpf_skb_change_head(struct sk_buff *skb, u32 head_room, 3728 u64 flags) 3729 { 3730 u32 max_len = BPF_SKB_MAX_LEN; 3731 u32 new_len = skb->len + head_room; 3732 int ret; 3733 3734 if (unlikely(flags || (!skb_is_gso(skb) && new_len > max_len) || 3735 new_len < skb->len)) 3736 return -EINVAL; 3737 3738 ret = skb_cow(skb, head_room); 3739 if (likely(!ret)) { 3740 /* Idea for this helper is that we currently only 3741 * allow to expand on mac header. This means that 3742 * skb->protocol network header, etc, stay as is. 3743 * Compared to bpf_skb_change_tail(), we're more 3744 * flexible due to not needing to linearize or 3745 * reset GSO. Intention for this helper is to be 3746 * used by an L3 skb that needs to push mac header 3747 * for redirection into L2 device. 3748 */ 3749 __skb_push(skb, head_room); 3750 memset(skb->data, 0, head_room); 3751 skb_reset_mac_header(skb); 3752 skb_reset_mac_len(skb); 3753 } 3754 3755 return ret; 3756 } 3757 3758 BPF_CALL_3(bpf_skb_change_head, struct sk_buff *, skb, u32, head_room, 3759 u64, flags) 3760 { 3761 int ret = __bpf_skb_change_head(skb, head_room, flags); 3762 3763 bpf_compute_data_pointers(skb); 3764 return ret; 3765 } 3766 3767 static const struct bpf_func_proto bpf_skb_change_head_proto = { 3768 .func = bpf_skb_change_head, 3769 .gpl_only = false, 3770 .ret_type = RET_INTEGER, 3771 .arg1_type = ARG_PTR_TO_CTX, 3772 .arg2_type = ARG_ANYTHING, 3773 .arg3_type = ARG_ANYTHING, 3774 }; 3775 3776 BPF_CALL_3(sk_skb_change_head, struct sk_buff *, skb, u32, head_room, 3777 u64, flags) 3778 { 3779 return __bpf_skb_change_head(skb, head_room, flags); 3780 } 3781 3782 static const struct bpf_func_proto sk_skb_change_head_proto = { 3783 .func = sk_skb_change_head, 3784 .gpl_only = false, 3785 .ret_type = RET_INTEGER, 3786 .arg1_type = ARG_PTR_TO_CTX, 3787 .arg2_type = ARG_ANYTHING, 3788 .arg3_type = ARG_ANYTHING, 3789 }; 3790 3791 BPF_CALL_1(bpf_xdp_get_buff_len, struct xdp_buff*, xdp) 3792 { 3793 return xdp_get_buff_len(xdp); 3794 } 3795 3796 static const struct bpf_func_proto bpf_xdp_get_buff_len_proto = { 3797 .func = bpf_xdp_get_buff_len, 3798 .gpl_only = false, 3799 .ret_type = RET_INTEGER, 3800 .arg1_type = ARG_PTR_TO_CTX, 3801 }; 3802 3803 BTF_ID_LIST_SINGLE(bpf_xdp_get_buff_len_bpf_ids, struct, xdp_buff) 3804 3805 const struct bpf_func_proto bpf_xdp_get_buff_len_trace_proto = { 3806 .func = bpf_xdp_get_buff_len, 3807 .gpl_only = false, 3808 .arg1_type = ARG_PTR_TO_BTF_ID, 3809 .arg1_btf_id = &bpf_xdp_get_buff_len_bpf_ids[0], 3810 }; 3811 3812 static unsigned long xdp_get_metalen(const struct xdp_buff *xdp) 3813 { 3814 return xdp_data_meta_unsupported(xdp) ? 0 : 3815 xdp->data - xdp->data_meta; 3816 } 3817 3818 BPF_CALL_2(bpf_xdp_adjust_head, struct xdp_buff *, xdp, int, offset) 3819 { 3820 void *xdp_frame_end = xdp->data_hard_start + sizeof(struct xdp_frame); 3821 unsigned long metalen = xdp_get_metalen(xdp); 3822 void *data_start = xdp_frame_end + metalen; 3823 void *data = xdp->data + offset; 3824 3825 if (unlikely(data < data_start || 3826 data > xdp->data_end - ETH_HLEN)) 3827 return -EINVAL; 3828 3829 if (metalen) 3830 memmove(xdp->data_meta + offset, 3831 xdp->data_meta, metalen); 3832 xdp->data_meta += offset; 3833 xdp->data = data; 3834 3835 return 0; 3836 } 3837 3838 static const struct bpf_func_proto bpf_xdp_adjust_head_proto = { 3839 .func = bpf_xdp_adjust_head, 3840 .gpl_only = false, 3841 .ret_type = RET_INTEGER, 3842 .arg1_type = ARG_PTR_TO_CTX, 3843 .arg2_type = ARG_ANYTHING, 3844 }; 3845 3846 static void bpf_xdp_copy_buf(struct xdp_buff *xdp, unsigned long off, 3847 void *buf, unsigned long len, bool flush) 3848 { 3849 unsigned long ptr_len, ptr_off = 0; 3850 skb_frag_t *next_frag, *end_frag; 3851 struct skb_shared_info *sinfo; 3852 void *src, *dst; 3853 u8 *ptr_buf; 3854 3855 if (likely(xdp->data_end - xdp->data >= off + len)) { 3856 src = flush ? buf : xdp->data + off; 3857 dst = flush ? xdp->data + off : buf; 3858 memcpy(dst, src, len); 3859 return; 3860 } 3861 3862 sinfo = xdp_get_shared_info_from_buff(xdp); 3863 end_frag = &sinfo->frags[sinfo->nr_frags]; 3864 next_frag = &sinfo->frags[0]; 3865 3866 ptr_len = xdp->data_end - xdp->data; 3867 ptr_buf = xdp->data; 3868 3869 while (true) { 3870 if (off < ptr_off + ptr_len) { 3871 unsigned long copy_off = off - ptr_off; 3872 unsigned long copy_len = min(len, ptr_len - copy_off); 3873 3874 src = flush ? buf : ptr_buf + copy_off; 3875 dst = flush ? ptr_buf + copy_off : buf; 3876 memcpy(dst, src, copy_len); 3877 3878 off += copy_len; 3879 len -= copy_len; 3880 buf += copy_len; 3881 } 3882 3883 if (!len || next_frag == end_frag) 3884 break; 3885 3886 ptr_off += ptr_len; 3887 ptr_buf = skb_frag_address(next_frag); 3888 ptr_len = skb_frag_size(next_frag); 3889 next_frag++; 3890 } 3891 } 3892 3893 static void *bpf_xdp_pointer(struct xdp_buff *xdp, u32 offset, u32 len) 3894 { 3895 struct skb_shared_info *sinfo = xdp_get_shared_info_from_buff(xdp); 3896 u32 size = xdp->data_end - xdp->data; 3897 void *addr = xdp->data; 3898 int i; 3899 3900 if (unlikely(offset > 0xffff || len > 0xffff)) 3901 return ERR_PTR(-EFAULT); 3902 3903 if (offset + len > xdp_get_buff_len(xdp)) 3904 return ERR_PTR(-EINVAL); 3905 3906 if (offset < size) /* linear area */ 3907 goto out; 3908 3909 offset -= size; 3910 for (i = 0; i < sinfo->nr_frags; i++) { /* paged area */ 3911 u32 frag_size = skb_frag_size(&sinfo->frags[i]); 3912 3913 if (offset < frag_size) { 3914 addr = skb_frag_address(&sinfo->frags[i]); 3915 size = frag_size; 3916 break; 3917 } 3918 offset -= frag_size; 3919 } 3920 out: 3921 return offset + len <= size ? addr + offset : NULL; 3922 } 3923 3924 BPF_CALL_4(bpf_xdp_load_bytes, struct xdp_buff *, xdp, u32, offset, 3925 void *, buf, u32, len) 3926 { 3927 void *ptr; 3928 3929 ptr = bpf_xdp_pointer(xdp, offset, len); 3930 if (IS_ERR(ptr)) 3931 return PTR_ERR(ptr); 3932 3933 if (!ptr) 3934 bpf_xdp_copy_buf(xdp, offset, buf, len, false); 3935 else 3936 memcpy(buf, ptr, len); 3937 3938 return 0; 3939 } 3940 3941 static const struct bpf_func_proto bpf_xdp_load_bytes_proto = { 3942 .func = bpf_xdp_load_bytes, 3943 .gpl_only = false, 3944 .ret_type = RET_INTEGER, 3945 .arg1_type = ARG_PTR_TO_CTX, 3946 .arg2_type = ARG_ANYTHING, 3947 .arg3_type = ARG_PTR_TO_UNINIT_MEM, 3948 .arg4_type = ARG_CONST_SIZE, 3949 }; 3950 3951 BPF_CALL_4(bpf_xdp_store_bytes, struct xdp_buff *, xdp, u32, offset, 3952 void *, buf, u32, len) 3953 { 3954 void *ptr; 3955 3956 ptr = bpf_xdp_pointer(xdp, offset, len); 3957 if (IS_ERR(ptr)) 3958 return PTR_ERR(ptr); 3959 3960 if (!ptr) 3961 bpf_xdp_copy_buf(xdp, offset, buf, len, true); 3962 else 3963 memcpy(ptr, buf, len); 3964 3965 return 0; 3966 } 3967 3968 static const struct bpf_func_proto bpf_xdp_store_bytes_proto = { 3969 .func = bpf_xdp_store_bytes, 3970 .gpl_only = false, 3971 .ret_type = RET_INTEGER, 3972 .arg1_type = ARG_PTR_TO_CTX, 3973 .arg2_type = ARG_ANYTHING, 3974 .arg3_type = ARG_PTR_TO_UNINIT_MEM, 3975 .arg4_type = ARG_CONST_SIZE, 3976 }; 3977 3978 static int bpf_xdp_frags_increase_tail(struct xdp_buff *xdp, int offset) 3979 { 3980 struct skb_shared_info *sinfo = xdp_get_shared_info_from_buff(xdp); 3981 skb_frag_t *frag = &sinfo->frags[sinfo->nr_frags - 1]; 3982 struct xdp_rxq_info *rxq = xdp->rxq; 3983 unsigned int tailroom; 3984 3985 if (!rxq->frag_size || rxq->frag_size > xdp->frame_sz) 3986 return -EOPNOTSUPP; 3987 3988 tailroom = rxq->frag_size - skb_frag_size(frag) - skb_frag_off(frag); 3989 if (unlikely(offset > tailroom)) 3990 return -EINVAL; 3991 3992 memset(skb_frag_address(frag) + skb_frag_size(frag), 0, offset); 3993 skb_frag_size_add(frag, offset); 3994 sinfo->xdp_frags_size += offset; 3995 3996 return 0; 3997 } 3998 3999 static int bpf_xdp_frags_shrink_tail(struct xdp_buff *xdp, int offset) 4000 { 4001 struct skb_shared_info *sinfo = xdp_get_shared_info_from_buff(xdp); 4002 int i, n_frags_free = 0, len_free = 0; 4003 4004 if (unlikely(offset > (int)xdp_get_buff_len(xdp) - ETH_HLEN)) 4005 return -EINVAL; 4006 4007 for (i = sinfo->nr_frags - 1; i >= 0 && offset > 0; i--) { 4008 skb_frag_t *frag = &sinfo->frags[i]; 4009 int shrink = min_t(int, offset, skb_frag_size(frag)); 4010 4011 len_free += shrink; 4012 offset -= shrink; 4013 4014 if (skb_frag_size(frag) == shrink) { 4015 struct page *page = skb_frag_page(frag); 4016 4017 __xdp_return(page_address(page), &xdp->rxq->mem, 4018 false, NULL); 4019 n_frags_free++; 4020 } else { 4021 skb_frag_size_sub(frag, shrink); 4022 break; 4023 } 4024 } 4025 sinfo->nr_frags -= n_frags_free; 4026 sinfo->xdp_frags_size -= len_free; 4027 4028 if (unlikely(!sinfo->nr_frags)) { 4029 xdp_buff_clear_frags_flag(xdp); 4030 xdp->data_end -= offset; 4031 } 4032 4033 return 0; 4034 } 4035 4036 BPF_CALL_2(bpf_xdp_adjust_tail, struct xdp_buff *, xdp, int, offset) 4037 { 4038 void *data_hard_end = xdp_data_hard_end(xdp); /* use xdp->frame_sz */ 4039 void *data_end = xdp->data_end + offset; 4040 4041 if (unlikely(xdp_buff_has_frags(xdp))) { /* non-linear xdp buff */ 4042 if (offset < 0) 4043 return bpf_xdp_frags_shrink_tail(xdp, -offset); 4044 4045 return bpf_xdp_frags_increase_tail(xdp, offset); 4046 } 4047 4048 /* Notice that xdp_data_hard_end have reserved some tailroom */ 4049 if (unlikely(data_end > data_hard_end)) 4050 return -EINVAL; 4051 4052 /* ALL drivers MUST init xdp->frame_sz, chicken check below */ 4053 if (unlikely(xdp->frame_sz > PAGE_SIZE)) { 4054 WARN_ONCE(1, "Too BIG xdp->frame_sz = %d\n", xdp->frame_sz); 4055 return -EINVAL; 4056 } 4057 4058 if (unlikely(data_end < xdp->data + ETH_HLEN)) 4059 return -EINVAL; 4060 4061 /* Clear memory area on grow, can contain uninit kernel memory */ 4062 if (offset > 0) 4063 memset(xdp->data_end, 0, offset); 4064 4065 xdp->data_end = data_end; 4066 4067 return 0; 4068 } 4069 4070 static const struct bpf_func_proto bpf_xdp_adjust_tail_proto = { 4071 .func = bpf_xdp_adjust_tail, 4072 .gpl_only = false, 4073 .ret_type = RET_INTEGER, 4074 .arg1_type = ARG_PTR_TO_CTX, 4075 .arg2_type = ARG_ANYTHING, 4076 }; 4077 4078 BPF_CALL_2(bpf_xdp_adjust_meta, struct xdp_buff *, xdp, int, offset) 4079 { 4080 void *xdp_frame_end = xdp->data_hard_start + sizeof(struct xdp_frame); 4081 void *meta = xdp->data_meta + offset; 4082 unsigned long metalen = xdp->data - meta; 4083 4084 if (xdp_data_meta_unsupported(xdp)) 4085 return -ENOTSUPP; 4086 if (unlikely(meta < xdp_frame_end || 4087 meta > xdp->data)) 4088 return -EINVAL; 4089 if (unlikely(xdp_metalen_invalid(metalen))) 4090 return -EACCES; 4091 4092 xdp->data_meta = meta; 4093 4094 return 0; 4095 } 4096 4097 static const struct bpf_func_proto bpf_xdp_adjust_meta_proto = { 4098 .func = bpf_xdp_adjust_meta, 4099 .gpl_only = false, 4100 .ret_type = RET_INTEGER, 4101 .arg1_type = ARG_PTR_TO_CTX, 4102 .arg2_type = ARG_ANYTHING, 4103 }; 4104 4105 /* XDP_REDIRECT works by a three-step process, implemented in the functions 4106 * below: 4107 * 4108 * 1. The bpf_redirect() and bpf_redirect_map() helpers will lookup the target 4109 * of the redirect and store it (along with some other metadata) in a per-CPU 4110 * struct bpf_redirect_info. 4111 * 4112 * 2. When the program returns the XDP_REDIRECT return code, the driver will 4113 * call xdp_do_redirect() which will use the information in struct 4114 * bpf_redirect_info to actually enqueue the frame into a map type-specific 4115 * bulk queue structure. 4116 * 4117 * 3. Before exiting its NAPI poll loop, the driver will call xdp_do_flush(), 4118 * which will flush all the different bulk queues, thus completing the 4119 * redirect. 4120 * 4121 * Pointers to the map entries will be kept around for this whole sequence of 4122 * steps, protected by RCU. However, there is no top-level rcu_read_lock() in 4123 * the core code; instead, the RCU protection relies on everything happening 4124 * inside a single NAPI poll sequence, which means it's between a pair of calls 4125 * to local_bh_disable()/local_bh_enable(). 4126 * 4127 * The map entries are marked as __rcu and the map code makes sure to 4128 * dereference those pointers with rcu_dereference_check() in a way that works 4129 * for both sections that to hold an rcu_read_lock() and sections that are 4130 * called from NAPI without a separate rcu_read_lock(). The code below does not 4131 * use RCU annotations, but relies on those in the map code. 4132 */ 4133 void xdp_do_flush(void) 4134 { 4135 __dev_flush(); 4136 __cpu_map_flush(); 4137 __xsk_map_flush(); 4138 } 4139 EXPORT_SYMBOL_GPL(xdp_do_flush); 4140 4141 void bpf_clear_redirect_map(struct bpf_map *map) 4142 { 4143 struct bpf_redirect_info *ri; 4144 int cpu; 4145 4146 for_each_possible_cpu(cpu) { 4147 ri = per_cpu_ptr(&bpf_redirect_info, cpu); 4148 /* Avoid polluting remote cacheline due to writes if 4149 * not needed. Once we pass this test, we need the 4150 * cmpxchg() to make sure it hasn't been changed in 4151 * the meantime by remote CPU. 4152 */ 4153 if (unlikely(READ_ONCE(ri->map) == map)) 4154 cmpxchg(&ri->map, map, NULL); 4155 } 4156 } 4157 4158 DEFINE_STATIC_KEY_FALSE(bpf_master_redirect_enabled_key); 4159 EXPORT_SYMBOL_GPL(bpf_master_redirect_enabled_key); 4160 4161 u32 xdp_master_redirect(struct xdp_buff *xdp) 4162 { 4163 struct net_device *master, *slave; 4164 struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info); 4165 4166 master = netdev_master_upper_dev_get_rcu(xdp->rxq->dev); 4167 slave = master->netdev_ops->ndo_xdp_get_xmit_slave(master, xdp); 4168 if (slave && slave != xdp->rxq->dev) { 4169 /* The target device is different from the receiving device, so 4170 * redirect it to the new device. 4171 * Using XDP_REDIRECT gets the correct behaviour from XDP enabled 4172 * drivers to unmap the packet from their rx ring. 4173 */ 4174 ri->tgt_index = slave->ifindex; 4175 ri->map_id = INT_MAX; 4176 ri->map_type = BPF_MAP_TYPE_UNSPEC; 4177 return XDP_REDIRECT; 4178 } 4179 return XDP_TX; 4180 } 4181 EXPORT_SYMBOL_GPL(xdp_master_redirect); 4182 4183 static inline int __xdp_do_redirect_xsk(struct bpf_redirect_info *ri, 4184 struct net_device *dev, 4185 struct xdp_buff *xdp, 4186 struct bpf_prog *xdp_prog) 4187 { 4188 enum bpf_map_type map_type = ri->map_type; 4189 void *fwd = ri->tgt_value; 4190 u32 map_id = ri->map_id; 4191 int err; 4192 4193 ri->map_id = 0; /* Valid map id idr range: [1,INT_MAX[ */ 4194 ri->map_type = BPF_MAP_TYPE_UNSPEC; 4195 4196 err = __xsk_map_redirect(fwd, xdp); 4197 if (unlikely(err)) 4198 goto err; 4199 4200 _trace_xdp_redirect_map(dev, xdp_prog, fwd, map_type, map_id, ri->tgt_index); 4201 return 0; 4202 err: 4203 _trace_xdp_redirect_map_err(dev, xdp_prog, fwd, map_type, map_id, ri->tgt_index, err); 4204 return err; 4205 } 4206 4207 static __always_inline int __xdp_do_redirect_frame(struct bpf_redirect_info *ri, 4208 struct net_device *dev, 4209 struct xdp_frame *xdpf, 4210 struct bpf_prog *xdp_prog) 4211 { 4212 enum bpf_map_type map_type = ri->map_type; 4213 void *fwd = ri->tgt_value; 4214 u32 map_id = ri->map_id; 4215 struct bpf_map *map; 4216 int err; 4217 4218 ri->map_id = 0; /* Valid map id idr range: [1,INT_MAX[ */ 4219 ri->map_type = BPF_MAP_TYPE_UNSPEC; 4220 4221 if (unlikely(!xdpf)) { 4222 err = -EOVERFLOW; 4223 goto err; 4224 } 4225 4226 switch (map_type) { 4227 case BPF_MAP_TYPE_DEVMAP: 4228 fallthrough; 4229 case BPF_MAP_TYPE_DEVMAP_HASH: 4230 map = READ_ONCE(ri->map); 4231 if (unlikely(map)) { 4232 WRITE_ONCE(ri->map, NULL); 4233 err = dev_map_enqueue_multi(xdpf, dev, map, 4234 ri->flags & BPF_F_EXCLUDE_INGRESS); 4235 } else { 4236 err = dev_map_enqueue(fwd, xdpf, dev); 4237 } 4238 break; 4239 case BPF_MAP_TYPE_CPUMAP: 4240 err = cpu_map_enqueue(fwd, xdpf, dev); 4241 break; 4242 case BPF_MAP_TYPE_UNSPEC: 4243 if (map_id == INT_MAX) { 4244 fwd = dev_get_by_index_rcu(dev_net(dev), ri->tgt_index); 4245 if (unlikely(!fwd)) { 4246 err = -EINVAL; 4247 break; 4248 } 4249 err = dev_xdp_enqueue(fwd, xdpf, dev); 4250 break; 4251 } 4252 fallthrough; 4253 default: 4254 err = -EBADRQC; 4255 } 4256 4257 if (unlikely(err)) 4258 goto err; 4259 4260 _trace_xdp_redirect_map(dev, xdp_prog, fwd, map_type, map_id, ri->tgt_index); 4261 return 0; 4262 err: 4263 _trace_xdp_redirect_map_err(dev, xdp_prog, fwd, map_type, map_id, ri->tgt_index, err); 4264 return err; 4265 } 4266 4267 int xdp_do_redirect(struct net_device *dev, struct xdp_buff *xdp, 4268 struct bpf_prog *xdp_prog) 4269 { 4270 struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info); 4271 enum bpf_map_type map_type = ri->map_type; 4272 4273 /* XDP_REDIRECT is not fully supported yet for xdp frags since 4274 * not all XDP capable drivers can map non-linear xdp_frame in 4275 * ndo_xdp_xmit. 4276 */ 4277 if (unlikely(xdp_buff_has_frags(xdp) && 4278 map_type != BPF_MAP_TYPE_CPUMAP)) 4279 return -EOPNOTSUPP; 4280 4281 if (map_type == BPF_MAP_TYPE_XSKMAP) 4282 return __xdp_do_redirect_xsk(ri, dev, xdp, xdp_prog); 4283 4284 return __xdp_do_redirect_frame(ri, dev, xdp_convert_buff_to_frame(xdp), 4285 xdp_prog); 4286 } 4287 EXPORT_SYMBOL_GPL(xdp_do_redirect); 4288 4289 int xdp_do_redirect_frame(struct net_device *dev, struct xdp_buff *xdp, 4290 struct xdp_frame *xdpf, struct bpf_prog *xdp_prog) 4291 { 4292 struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info); 4293 enum bpf_map_type map_type = ri->map_type; 4294 4295 if (map_type == BPF_MAP_TYPE_XSKMAP) 4296 return __xdp_do_redirect_xsk(ri, dev, xdp, xdp_prog); 4297 4298 return __xdp_do_redirect_frame(ri, dev, xdpf, xdp_prog); 4299 } 4300 EXPORT_SYMBOL_GPL(xdp_do_redirect_frame); 4301 4302 static int xdp_do_generic_redirect_map(struct net_device *dev, 4303 struct sk_buff *skb, 4304 struct xdp_buff *xdp, 4305 struct bpf_prog *xdp_prog, 4306 void *fwd, 4307 enum bpf_map_type map_type, u32 map_id) 4308 { 4309 struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info); 4310 struct bpf_map *map; 4311 int err; 4312 4313 switch (map_type) { 4314 case BPF_MAP_TYPE_DEVMAP: 4315 fallthrough; 4316 case BPF_MAP_TYPE_DEVMAP_HASH: 4317 map = READ_ONCE(ri->map); 4318 if (unlikely(map)) { 4319 WRITE_ONCE(ri->map, NULL); 4320 err = dev_map_redirect_multi(dev, skb, xdp_prog, map, 4321 ri->flags & BPF_F_EXCLUDE_INGRESS); 4322 } else { 4323 err = dev_map_generic_redirect(fwd, skb, xdp_prog); 4324 } 4325 if (unlikely(err)) 4326 goto err; 4327 break; 4328 case BPF_MAP_TYPE_XSKMAP: 4329 err = xsk_generic_rcv(fwd, xdp); 4330 if (err) 4331 goto err; 4332 consume_skb(skb); 4333 break; 4334 case BPF_MAP_TYPE_CPUMAP: 4335 err = cpu_map_generic_redirect(fwd, skb); 4336 if (unlikely(err)) 4337 goto err; 4338 break; 4339 default: 4340 err = -EBADRQC; 4341 goto err; 4342 } 4343 4344 _trace_xdp_redirect_map(dev, xdp_prog, fwd, map_type, map_id, ri->tgt_index); 4345 return 0; 4346 err: 4347 _trace_xdp_redirect_map_err(dev, xdp_prog, fwd, map_type, map_id, ri->tgt_index, err); 4348 return err; 4349 } 4350 4351 int xdp_do_generic_redirect(struct net_device *dev, struct sk_buff *skb, 4352 struct xdp_buff *xdp, struct bpf_prog *xdp_prog) 4353 { 4354 struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info); 4355 enum bpf_map_type map_type = ri->map_type; 4356 void *fwd = ri->tgt_value; 4357 u32 map_id = ri->map_id; 4358 int err; 4359 4360 ri->map_id = 0; /* Valid map id idr range: [1,INT_MAX[ */ 4361 ri->map_type = BPF_MAP_TYPE_UNSPEC; 4362 4363 if (map_type == BPF_MAP_TYPE_UNSPEC && map_id == INT_MAX) { 4364 fwd = dev_get_by_index_rcu(dev_net(dev), ri->tgt_index); 4365 if (unlikely(!fwd)) { 4366 err = -EINVAL; 4367 goto err; 4368 } 4369 4370 err = xdp_ok_fwd_dev(fwd, skb->len); 4371 if (unlikely(err)) 4372 goto err; 4373 4374 skb->dev = fwd; 4375 _trace_xdp_redirect(dev, xdp_prog, ri->tgt_index); 4376 generic_xdp_tx(skb, xdp_prog); 4377 return 0; 4378 } 4379 4380 return xdp_do_generic_redirect_map(dev, skb, xdp, xdp_prog, fwd, map_type, map_id); 4381 err: 4382 _trace_xdp_redirect_err(dev, xdp_prog, ri->tgt_index, err); 4383 return err; 4384 } 4385 4386 BPF_CALL_2(bpf_xdp_redirect, u32, ifindex, u64, flags) 4387 { 4388 struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info); 4389 4390 if (unlikely(flags)) 4391 return XDP_ABORTED; 4392 4393 /* NB! Map type UNSPEC and map_id == INT_MAX (never generated 4394 * by map_idr) is used for ifindex based XDP redirect. 4395 */ 4396 ri->tgt_index = ifindex; 4397 ri->map_id = INT_MAX; 4398 ri->map_type = BPF_MAP_TYPE_UNSPEC; 4399 4400 return XDP_REDIRECT; 4401 } 4402 4403 static const struct bpf_func_proto bpf_xdp_redirect_proto = { 4404 .func = bpf_xdp_redirect, 4405 .gpl_only = false, 4406 .ret_type = RET_INTEGER, 4407 .arg1_type = ARG_ANYTHING, 4408 .arg2_type = ARG_ANYTHING, 4409 }; 4410 4411 BPF_CALL_3(bpf_xdp_redirect_map, struct bpf_map *, map, u32, ifindex, 4412 u64, flags) 4413 { 4414 return map->ops->map_redirect(map, ifindex, flags); 4415 } 4416 4417 static const struct bpf_func_proto bpf_xdp_redirect_map_proto = { 4418 .func = bpf_xdp_redirect_map, 4419 .gpl_only = false, 4420 .ret_type = RET_INTEGER, 4421 .arg1_type = ARG_CONST_MAP_PTR, 4422 .arg2_type = ARG_ANYTHING, 4423 .arg3_type = ARG_ANYTHING, 4424 }; 4425 4426 static unsigned long bpf_skb_copy(void *dst_buff, const void *skb, 4427 unsigned long off, unsigned long len) 4428 { 4429 void *ptr = skb_header_pointer(skb, off, len, dst_buff); 4430 4431 if (unlikely(!ptr)) 4432 return len; 4433 if (ptr != dst_buff) 4434 memcpy(dst_buff, ptr, len); 4435 4436 return 0; 4437 } 4438 4439 BPF_CALL_5(bpf_skb_event_output, struct sk_buff *, skb, struct bpf_map *, map, 4440 u64, flags, void *, meta, u64, meta_size) 4441 { 4442 u64 skb_size = (flags & BPF_F_CTXLEN_MASK) >> 32; 4443 4444 if (unlikely(flags & ~(BPF_F_CTXLEN_MASK | BPF_F_INDEX_MASK))) 4445 return -EINVAL; 4446 if (unlikely(!skb || skb_size > skb->len)) 4447 return -EFAULT; 4448 4449 return bpf_event_output(map, flags, meta, meta_size, skb, skb_size, 4450 bpf_skb_copy); 4451 } 4452 4453 static const struct bpf_func_proto bpf_skb_event_output_proto = { 4454 .func = bpf_skb_event_output, 4455 .gpl_only = true, 4456 .ret_type = RET_INTEGER, 4457 .arg1_type = ARG_PTR_TO_CTX, 4458 .arg2_type = ARG_CONST_MAP_PTR, 4459 .arg3_type = ARG_ANYTHING, 4460 .arg4_type = ARG_PTR_TO_MEM | MEM_RDONLY, 4461 .arg5_type = ARG_CONST_SIZE_OR_ZERO, 4462 }; 4463 4464 BTF_ID_LIST_SINGLE(bpf_skb_output_btf_ids, struct, sk_buff) 4465 4466 const struct bpf_func_proto bpf_skb_output_proto = { 4467 .func = bpf_skb_event_output, 4468 .gpl_only = true, 4469 .ret_type = RET_INTEGER, 4470 .arg1_type = ARG_PTR_TO_BTF_ID, 4471 .arg1_btf_id = &bpf_skb_output_btf_ids[0], 4472 .arg2_type = ARG_CONST_MAP_PTR, 4473 .arg3_type = ARG_ANYTHING, 4474 .arg4_type = ARG_PTR_TO_MEM | MEM_RDONLY, 4475 .arg5_type = ARG_CONST_SIZE_OR_ZERO, 4476 }; 4477 4478 static unsigned short bpf_tunnel_key_af(u64 flags) 4479 { 4480 return flags & BPF_F_TUNINFO_IPV6 ? AF_INET6 : AF_INET; 4481 } 4482 4483 BPF_CALL_4(bpf_skb_get_tunnel_key, struct sk_buff *, skb, struct bpf_tunnel_key *, to, 4484 u32, size, u64, flags) 4485 { 4486 const struct ip_tunnel_info *info = skb_tunnel_info(skb); 4487 u8 compat[sizeof(struct bpf_tunnel_key)]; 4488 void *to_orig = to; 4489 int err; 4490 4491 if (unlikely(!info || (flags & ~(BPF_F_TUNINFO_IPV6)))) { 4492 err = -EINVAL; 4493 goto err_clear; 4494 } 4495 if (ip_tunnel_info_af(info) != bpf_tunnel_key_af(flags)) { 4496 err = -EPROTO; 4497 goto err_clear; 4498 } 4499 if (unlikely(size != sizeof(struct bpf_tunnel_key))) { 4500 err = -EINVAL; 4501 switch (size) { 4502 case offsetof(struct bpf_tunnel_key, local_ipv6[0]): 4503 case offsetof(struct bpf_tunnel_key, tunnel_label): 4504 case offsetof(struct bpf_tunnel_key, tunnel_ext): 4505 goto set_compat; 4506 case offsetof(struct bpf_tunnel_key, remote_ipv6[1]): 4507 /* Fixup deprecated structure layouts here, so we have 4508 * a common path later on. 4509 */ 4510 if (ip_tunnel_info_af(info) != AF_INET) 4511 goto err_clear; 4512 set_compat: 4513 to = (struct bpf_tunnel_key *)compat; 4514 break; 4515 default: 4516 goto err_clear; 4517 } 4518 } 4519 4520 to->tunnel_id = be64_to_cpu(info->key.tun_id); 4521 to->tunnel_tos = info->key.tos; 4522 to->tunnel_ttl = info->key.ttl; 4523 to->tunnel_ext = 0; 4524 4525 if (flags & BPF_F_TUNINFO_IPV6) { 4526 memcpy(to->remote_ipv6, &info->key.u.ipv6.src, 4527 sizeof(to->remote_ipv6)); 4528 memcpy(to->local_ipv6, &info->key.u.ipv6.dst, 4529 sizeof(to->local_ipv6)); 4530 to->tunnel_label = be32_to_cpu(info->key.label); 4531 } else { 4532 to->remote_ipv4 = be32_to_cpu(info->key.u.ipv4.src); 4533 memset(&to->remote_ipv6[1], 0, sizeof(__u32) * 3); 4534 to->local_ipv4 = be32_to_cpu(info->key.u.ipv4.dst); 4535 memset(&to->local_ipv6[1], 0, sizeof(__u32) * 3); 4536 to->tunnel_label = 0; 4537 } 4538 4539 if (unlikely(size != sizeof(struct bpf_tunnel_key))) 4540 memcpy(to_orig, to, size); 4541 4542 return 0; 4543 err_clear: 4544 memset(to_orig, 0, size); 4545 return err; 4546 } 4547 4548 static const struct bpf_func_proto bpf_skb_get_tunnel_key_proto = { 4549 .func = bpf_skb_get_tunnel_key, 4550 .gpl_only = false, 4551 .ret_type = RET_INTEGER, 4552 .arg1_type = ARG_PTR_TO_CTX, 4553 .arg2_type = ARG_PTR_TO_UNINIT_MEM, 4554 .arg3_type = ARG_CONST_SIZE, 4555 .arg4_type = ARG_ANYTHING, 4556 }; 4557 4558 BPF_CALL_3(bpf_skb_get_tunnel_opt, struct sk_buff *, skb, u8 *, to, u32, size) 4559 { 4560 const struct ip_tunnel_info *info = skb_tunnel_info(skb); 4561 int err; 4562 4563 if (unlikely(!info || 4564 !(info->key.tun_flags & TUNNEL_OPTIONS_PRESENT))) { 4565 err = -ENOENT; 4566 goto err_clear; 4567 } 4568 if (unlikely(size < info->options_len)) { 4569 err = -ENOMEM; 4570 goto err_clear; 4571 } 4572 4573 ip_tunnel_info_opts_get(to, info); 4574 if (size > info->options_len) 4575 memset(to + info->options_len, 0, size - info->options_len); 4576 4577 return info->options_len; 4578 err_clear: 4579 memset(to, 0, size); 4580 return err; 4581 } 4582 4583 static const struct bpf_func_proto bpf_skb_get_tunnel_opt_proto = { 4584 .func = bpf_skb_get_tunnel_opt, 4585 .gpl_only = false, 4586 .ret_type = RET_INTEGER, 4587 .arg1_type = ARG_PTR_TO_CTX, 4588 .arg2_type = ARG_PTR_TO_UNINIT_MEM, 4589 .arg3_type = ARG_CONST_SIZE, 4590 }; 4591 4592 static struct metadata_dst __percpu *md_dst; 4593 4594 BPF_CALL_4(bpf_skb_set_tunnel_key, struct sk_buff *, skb, 4595 const struct bpf_tunnel_key *, from, u32, size, u64, flags) 4596 { 4597 struct metadata_dst *md = this_cpu_ptr(md_dst); 4598 u8 compat[sizeof(struct bpf_tunnel_key)]; 4599 struct ip_tunnel_info *info; 4600 4601 if (unlikely(flags & ~(BPF_F_TUNINFO_IPV6 | BPF_F_ZERO_CSUM_TX | 4602 BPF_F_DONT_FRAGMENT | BPF_F_SEQ_NUMBER))) 4603 return -EINVAL; 4604 if (unlikely(size != sizeof(struct bpf_tunnel_key))) { 4605 switch (size) { 4606 case offsetof(struct bpf_tunnel_key, local_ipv6[0]): 4607 case offsetof(struct bpf_tunnel_key, tunnel_label): 4608 case offsetof(struct bpf_tunnel_key, tunnel_ext): 4609 case offsetof(struct bpf_tunnel_key, remote_ipv6[1]): 4610 /* Fixup deprecated structure layouts here, so we have 4611 * a common path later on. 4612 */ 4613 memcpy(compat, from, size); 4614 memset(compat + size, 0, sizeof(compat) - size); 4615 from = (const struct bpf_tunnel_key *) compat; 4616 break; 4617 default: 4618 return -EINVAL; 4619 } 4620 } 4621 if (unlikely((!(flags & BPF_F_TUNINFO_IPV6) && from->tunnel_label) || 4622 from->tunnel_ext)) 4623 return -EINVAL; 4624 4625 skb_dst_drop(skb); 4626 dst_hold((struct dst_entry *) md); 4627 skb_dst_set(skb, (struct dst_entry *) md); 4628 4629 info = &md->u.tun_info; 4630 memset(info, 0, sizeof(*info)); 4631 info->mode = IP_TUNNEL_INFO_TX; 4632 4633 info->key.tun_flags = TUNNEL_KEY | TUNNEL_CSUM | TUNNEL_NOCACHE; 4634 if (flags & BPF_F_DONT_FRAGMENT) 4635 info->key.tun_flags |= TUNNEL_DONT_FRAGMENT; 4636 if (flags & BPF_F_ZERO_CSUM_TX) 4637 info->key.tun_flags &= ~TUNNEL_CSUM; 4638 if (flags & BPF_F_SEQ_NUMBER) 4639 info->key.tun_flags |= TUNNEL_SEQ; 4640 4641 info->key.tun_id = cpu_to_be64(from->tunnel_id); 4642 info->key.tos = from->tunnel_tos; 4643 info->key.ttl = from->tunnel_ttl; 4644 4645 if (flags & BPF_F_TUNINFO_IPV6) { 4646 info->mode |= IP_TUNNEL_INFO_IPV6; 4647 memcpy(&info->key.u.ipv6.dst, from->remote_ipv6, 4648 sizeof(from->remote_ipv6)); 4649 memcpy(&info->key.u.ipv6.src, from->local_ipv6, 4650 sizeof(from->local_ipv6)); 4651 info->key.label = cpu_to_be32(from->tunnel_label) & 4652 IPV6_FLOWLABEL_MASK; 4653 } else { 4654 info->key.u.ipv4.dst = cpu_to_be32(from->remote_ipv4); 4655 info->key.u.ipv4.src = cpu_to_be32(from->local_ipv4); 4656 info->key.flow_flags = FLOWI_FLAG_ANYSRC; 4657 } 4658 4659 return 0; 4660 } 4661 4662 static const struct bpf_func_proto bpf_skb_set_tunnel_key_proto = { 4663 .func = bpf_skb_set_tunnel_key, 4664 .gpl_only = false, 4665 .ret_type = RET_INTEGER, 4666 .arg1_type = ARG_PTR_TO_CTX, 4667 .arg2_type = ARG_PTR_TO_MEM | MEM_RDONLY, 4668 .arg3_type = ARG_CONST_SIZE, 4669 .arg4_type = ARG_ANYTHING, 4670 }; 4671 4672 BPF_CALL_3(bpf_skb_set_tunnel_opt, struct sk_buff *, skb, 4673 const u8 *, from, u32, size) 4674 { 4675 struct ip_tunnel_info *info = skb_tunnel_info(skb); 4676 const struct metadata_dst *md = this_cpu_ptr(md_dst); 4677 4678 if (unlikely(info != &md->u.tun_info || (size & (sizeof(u32) - 1)))) 4679 return -EINVAL; 4680 if (unlikely(size > IP_TUNNEL_OPTS_MAX)) 4681 return -ENOMEM; 4682 4683 ip_tunnel_info_opts_set(info, from, size, TUNNEL_OPTIONS_PRESENT); 4684 4685 return 0; 4686 } 4687 4688 static const struct bpf_func_proto bpf_skb_set_tunnel_opt_proto = { 4689 .func = bpf_skb_set_tunnel_opt, 4690 .gpl_only = false, 4691 .ret_type = RET_INTEGER, 4692 .arg1_type = ARG_PTR_TO_CTX, 4693 .arg2_type = ARG_PTR_TO_MEM | MEM_RDONLY, 4694 .arg3_type = ARG_CONST_SIZE, 4695 }; 4696 4697 static const struct bpf_func_proto * 4698 bpf_get_skb_set_tunnel_proto(enum bpf_func_id which) 4699 { 4700 if (!md_dst) { 4701 struct metadata_dst __percpu *tmp; 4702 4703 tmp = metadata_dst_alloc_percpu(IP_TUNNEL_OPTS_MAX, 4704 METADATA_IP_TUNNEL, 4705 GFP_KERNEL); 4706 if (!tmp) 4707 return NULL; 4708 if (cmpxchg(&md_dst, NULL, tmp)) 4709 metadata_dst_free_percpu(tmp); 4710 } 4711 4712 switch (which) { 4713 case BPF_FUNC_skb_set_tunnel_key: 4714 return &bpf_skb_set_tunnel_key_proto; 4715 case BPF_FUNC_skb_set_tunnel_opt: 4716 return &bpf_skb_set_tunnel_opt_proto; 4717 default: 4718 return NULL; 4719 } 4720 } 4721 4722 BPF_CALL_3(bpf_skb_under_cgroup, struct sk_buff *, skb, struct bpf_map *, map, 4723 u32, idx) 4724 { 4725 struct bpf_array *array = container_of(map, struct bpf_array, map); 4726 struct cgroup *cgrp; 4727 struct sock *sk; 4728 4729 sk = skb_to_full_sk(skb); 4730 if (!sk || !sk_fullsock(sk)) 4731 return -ENOENT; 4732 if (unlikely(idx >= array->map.max_entries)) 4733 return -E2BIG; 4734 4735 cgrp = READ_ONCE(array->ptrs[idx]); 4736 if (unlikely(!cgrp)) 4737 return -EAGAIN; 4738 4739 return sk_under_cgroup_hierarchy(sk, cgrp); 4740 } 4741 4742 static const struct bpf_func_proto bpf_skb_under_cgroup_proto = { 4743 .func = bpf_skb_under_cgroup, 4744 .gpl_only = false, 4745 .ret_type = RET_INTEGER, 4746 .arg1_type = ARG_PTR_TO_CTX, 4747 .arg2_type = ARG_CONST_MAP_PTR, 4748 .arg3_type = ARG_ANYTHING, 4749 }; 4750 4751 #ifdef CONFIG_SOCK_CGROUP_DATA 4752 static inline u64 __bpf_sk_cgroup_id(struct sock *sk) 4753 { 4754 struct cgroup *cgrp; 4755 4756 sk = sk_to_full_sk(sk); 4757 if (!sk || !sk_fullsock(sk)) 4758 return 0; 4759 4760 cgrp = sock_cgroup_ptr(&sk->sk_cgrp_data); 4761 return cgroup_id(cgrp); 4762 } 4763 4764 BPF_CALL_1(bpf_skb_cgroup_id, const struct sk_buff *, skb) 4765 { 4766 return __bpf_sk_cgroup_id(skb->sk); 4767 } 4768 4769 static const struct bpf_func_proto bpf_skb_cgroup_id_proto = { 4770 .func = bpf_skb_cgroup_id, 4771 .gpl_only = false, 4772 .ret_type = RET_INTEGER, 4773 .arg1_type = ARG_PTR_TO_CTX, 4774 }; 4775 4776 static inline u64 __bpf_sk_ancestor_cgroup_id(struct sock *sk, 4777 int ancestor_level) 4778 { 4779 struct cgroup *ancestor; 4780 struct cgroup *cgrp; 4781 4782 sk = sk_to_full_sk(sk); 4783 if (!sk || !sk_fullsock(sk)) 4784 return 0; 4785 4786 cgrp = sock_cgroup_ptr(&sk->sk_cgrp_data); 4787 ancestor = cgroup_ancestor(cgrp, ancestor_level); 4788 if (!ancestor) 4789 return 0; 4790 4791 return cgroup_id(ancestor); 4792 } 4793 4794 BPF_CALL_2(bpf_skb_ancestor_cgroup_id, const struct sk_buff *, skb, int, 4795 ancestor_level) 4796 { 4797 return __bpf_sk_ancestor_cgroup_id(skb->sk, ancestor_level); 4798 } 4799 4800 static const struct bpf_func_proto bpf_skb_ancestor_cgroup_id_proto = { 4801 .func = bpf_skb_ancestor_cgroup_id, 4802 .gpl_only = false, 4803 .ret_type = RET_INTEGER, 4804 .arg1_type = ARG_PTR_TO_CTX, 4805 .arg2_type = ARG_ANYTHING, 4806 }; 4807 4808 BPF_CALL_1(bpf_sk_cgroup_id, struct sock *, sk) 4809 { 4810 return __bpf_sk_cgroup_id(sk); 4811 } 4812 4813 static const struct bpf_func_proto bpf_sk_cgroup_id_proto = { 4814 .func = bpf_sk_cgroup_id, 4815 .gpl_only = false, 4816 .ret_type = RET_INTEGER, 4817 .arg1_type = ARG_PTR_TO_BTF_ID_SOCK_COMMON, 4818 }; 4819 4820 BPF_CALL_2(bpf_sk_ancestor_cgroup_id, struct sock *, sk, int, ancestor_level) 4821 { 4822 return __bpf_sk_ancestor_cgroup_id(sk, ancestor_level); 4823 } 4824 4825 static const struct bpf_func_proto bpf_sk_ancestor_cgroup_id_proto = { 4826 .func = bpf_sk_ancestor_cgroup_id, 4827 .gpl_only = false, 4828 .ret_type = RET_INTEGER, 4829 .arg1_type = ARG_PTR_TO_BTF_ID_SOCK_COMMON, 4830 .arg2_type = ARG_ANYTHING, 4831 }; 4832 #endif 4833 4834 static unsigned long bpf_xdp_copy(void *dst, const void *ctx, 4835 unsigned long off, unsigned long len) 4836 { 4837 struct xdp_buff *xdp = (struct xdp_buff *)ctx; 4838 4839 bpf_xdp_copy_buf(xdp, off, dst, len, false); 4840 return 0; 4841 } 4842 4843 BPF_CALL_5(bpf_xdp_event_output, struct xdp_buff *, xdp, struct bpf_map *, map, 4844 u64, flags, void *, meta, u64, meta_size) 4845 { 4846 u64 xdp_size = (flags & BPF_F_CTXLEN_MASK) >> 32; 4847 4848 if (unlikely(flags & ~(BPF_F_CTXLEN_MASK | BPF_F_INDEX_MASK))) 4849 return -EINVAL; 4850 4851 if (unlikely(!xdp || xdp_size > xdp_get_buff_len(xdp))) 4852 return -EFAULT; 4853 4854 return bpf_event_output(map, flags, meta, meta_size, xdp, 4855 xdp_size, bpf_xdp_copy); 4856 } 4857 4858 static const struct bpf_func_proto bpf_xdp_event_output_proto = { 4859 .func = bpf_xdp_event_output, 4860 .gpl_only = true, 4861 .ret_type = RET_INTEGER, 4862 .arg1_type = ARG_PTR_TO_CTX, 4863 .arg2_type = ARG_CONST_MAP_PTR, 4864 .arg3_type = ARG_ANYTHING, 4865 .arg4_type = ARG_PTR_TO_MEM | MEM_RDONLY, 4866 .arg5_type = ARG_CONST_SIZE_OR_ZERO, 4867 }; 4868 4869 BTF_ID_LIST_SINGLE(bpf_xdp_output_btf_ids, struct, xdp_buff) 4870 4871 const struct bpf_func_proto bpf_xdp_output_proto = { 4872 .func = bpf_xdp_event_output, 4873 .gpl_only = true, 4874 .ret_type = RET_INTEGER, 4875 .arg1_type = ARG_PTR_TO_BTF_ID, 4876 .arg1_btf_id = &bpf_xdp_output_btf_ids[0], 4877 .arg2_type = ARG_CONST_MAP_PTR, 4878 .arg3_type = ARG_ANYTHING, 4879 .arg4_type = ARG_PTR_TO_MEM | MEM_RDONLY, 4880 .arg5_type = ARG_CONST_SIZE_OR_ZERO, 4881 }; 4882 4883 BPF_CALL_1(bpf_get_socket_cookie, struct sk_buff *, skb) 4884 { 4885 return skb->sk ? __sock_gen_cookie(skb->sk) : 0; 4886 } 4887 4888 static const struct bpf_func_proto bpf_get_socket_cookie_proto = { 4889 .func = bpf_get_socket_cookie, 4890 .gpl_only = false, 4891 .ret_type = RET_INTEGER, 4892 .arg1_type = ARG_PTR_TO_CTX, 4893 }; 4894 4895 BPF_CALL_1(bpf_get_socket_cookie_sock_addr, struct bpf_sock_addr_kern *, ctx) 4896 { 4897 return __sock_gen_cookie(ctx->sk); 4898 } 4899 4900 static const struct bpf_func_proto bpf_get_socket_cookie_sock_addr_proto = { 4901 .func = bpf_get_socket_cookie_sock_addr, 4902 .gpl_only = false, 4903 .ret_type = RET_INTEGER, 4904 .arg1_type = ARG_PTR_TO_CTX, 4905 }; 4906 4907 BPF_CALL_1(bpf_get_socket_cookie_sock, struct sock *, ctx) 4908 { 4909 return __sock_gen_cookie(ctx); 4910 } 4911 4912 static const struct bpf_func_proto bpf_get_socket_cookie_sock_proto = { 4913 .func = bpf_get_socket_cookie_sock, 4914 .gpl_only = false, 4915 .ret_type = RET_INTEGER, 4916 .arg1_type = ARG_PTR_TO_CTX, 4917 }; 4918 4919 BPF_CALL_1(bpf_get_socket_ptr_cookie, struct sock *, sk) 4920 { 4921 return sk ? sock_gen_cookie(sk) : 0; 4922 } 4923 4924 const struct bpf_func_proto bpf_get_socket_ptr_cookie_proto = { 4925 .func = bpf_get_socket_ptr_cookie, 4926 .gpl_only = false, 4927 .ret_type = RET_INTEGER, 4928 .arg1_type = ARG_PTR_TO_BTF_ID_SOCK_COMMON, 4929 }; 4930 4931 BPF_CALL_1(bpf_get_socket_cookie_sock_ops, struct bpf_sock_ops_kern *, ctx) 4932 { 4933 return __sock_gen_cookie(ctx->sk); 4934 } 4935 4936 static const struct bpf_func_proto bpf_get_socket_cookie_sock_ops_proto = { 4937 .func = bpf_get_socket_cookie_sock_ops, 4938 .gpl_only = false, 4939 .ret_type = RET_INTEGER, 4940 .arg1_type = ARG_PTR_TO_CTX, 4941 }; 4942 4943 static u64 __bpf_get_netns_cookie(struct sock *sk) 4944 { 4945 const struct net *net = sk ? sock_net(sk) : &init_net; 4946 4947 return net->net_cookie; 4948 } 4949 4950 BPF_CALL_1(bpf_get_netns_cookie_sock, struct sock *, ctx) 4951 { 4952 return __bpf_get_netns_cookie(ctx); 4953 } 4954 4955 static const struct bpf_func_proto bpf_get_netns_cookie_sock_proto = { 4956 .func = bpf_get_netns_cookie_sock, 4957 .gpl_only = false, 4958 .ret_type = RET_INTEGER, 4959 .arg1_type = ARG_PTR_TO_CTX_OR_NULL, 4960 }; 4961 4962 BPF_CALL_1(bpf_get_netns_cookie_sock_addr, struct bpf_sock_addr_kern *, ctx) 4963 { 4964 return __bpf_get_netns_cookie(ctx ? ctx->sk : NULL); 4965 } 4966 4967 static const struct bpf_func_proto bpf_get_netns_cookie_sock_addr_proto = { 4968 .func = bpf_get_netns_cookie_sock_addr, 4969 .gpl_only = false, 4970 .ret_type = RET_INTEGER, 4971 .arg1_type = ARG_PTR_TO_CTX_OR_NULL, 4972 }; 4973 4974 BPF_CALL_1(bpf_get_netns_cookie_sock_ops, struct bpf_sock_ops_kern *, ctx) 4975 { 4976 return __bpf_get_netns_cookie(ctx ? ctx->sk : NULL); 4977 } 4978 4979 static const struct bpf_func_proto bpf_get_netns_cookie_sock_ops_proto = { 4980 .func = bpf_get_netns_cookie_sock_ops, 4981 .gpl_only = false, 4982 .ret_type = RET_INTEGER, 4983 .arg1_type = ARG_PTR_TO_CTX_OR_NULL, 4984 }; 4985 4986 BPF_CALL_1(bpf_get_netns_cookie_sk_msg, struct sk_msg *, ctx) 4987 { 4988 return __bpf_get_netns_cookie(ctx ? ctx->sk : NULL); 4989 } 4990 4991 static const struct bpf_func_proto bpf_get_netns_cookie_sk_msg_proto = { 4992 .func = bpf_get_netns_cookie_sk_msg, 4993 .gpl_only = false, 4994 .ret_type = RET_INTEGER, 4995 .arg1_type = ARG_PTR_TO_CTX_OR_NULL, 4996 }; 4997 4998 BPF_CALL_1(bpf_get_socket_uid, struct sk_buff *, skb) 4999 { 5000 struct sock *sk = sk_to_full_sk(skb->sk); 5001 kuid_t kuid; 5002 5003 if (!sk || !sk_fullsock(sk)) 5004 return overflowuid; 5005 kuid = sock_net_uid(sock_net(sk), sk); 5006 return from_kuid_munged(sock_net(sk)->user_ns, kuid); 5007 } 5008 5009 static const struct bpf_func_proto bpf_get_socket_uid_proto = { 5010 .func = bpf_get_socket_uid, 5011 .gpl_only = false, 5012 .ret_type = RET_INTEGER, 5013 .arg1_type = ARG_PTR_TO_CTX, 5014 }; 5015 5016 static int __bpf_setsockopt(struct sock *sk, int level, int optname, 5017 char *optval, int optlen) 5018 { 5019 char devname[IFNAMSIZ]; 5020 int val, valbool; 5021 struct net *net; 5022 int ifindex; 5023 int ret = 0; 5024 5025 if (!sk_fullsock(sk)) 5026 return -EINVAL; 5027 5028 if (level == SOL_SOCKET) { 5029 if (optlen != sizeof(int) && optname != SO_BINDTODEVICE) 5030 return -EINVAL; 5031 val = *((int *)optval); 5032 valbool = val ? 1 : 0; 5033 5034 /* Only some socketops are supported */ 5035 switch (optname) { 5036 case SO_RCVBUF: 5037 val = min_t(u32, val, sysctl_rmem_max); 5038 val = min_t(int, val, INT_MAX / 2); 5039 sk->sk_userlocks |= SOCK_RCVBUF_LOCK; 5040 WRITE_ONCE(sk->sk_rcvbuf, 5041 max_t(int, val * 2, SOCK_MIN_RCVBUF)); 5042 break; 5043 case SO_SNDBUF: 5044 val = min_t(u32, val, sysctl_wmem_max); 5045 val = min_t(int, val, INT_MAX / 2); 5046 sk->sk_userlocks |= SOCK_SNDBUF_LOCK; 5047 WRITE_ONCE(sk->sk_sndbuf, 5048 max_t(int, val * 2, SOCK_MIN_SNDBUF)); 5049 break; 5050 case SO_MAX_PACING_RATE: /* 32bit version */ 5051 if (val != ~0U) 5052 cmpxchg(&sk->sk_pacing_status, 5053 SK_PACING_NONE, 5054 SK_PACING_NEEDED); 5055 sk->sk_max_pacing_rate = (val == ~0U) ? 5056 ~0UL : (unsigned int)val; 5057 sk->sk_pacing_rate = min(sk->sk_pacing_rate, 5058 sk->sk_max_pacing_rate); 5059 break; 5060 case SO_PRIORITY: 5061 sk->sk_priority = val; 5062 break; 5063 case SO_RCVLOWAT: 5064 if (val < 0) 5065 val = INT_MAX; 5066 if (sk->sk_socket && sk->sk_socket->ops->set_rcvlowat) 5067 ret = sk->sk_socket->ops->set_rcvlowat(sk, val); 5068 else 5069 WRITE_ONCE(sk->sk_rcvlowat, val ? : 1); 5070 break; 5071 case SO_MARK: 5072 if (sk->sk_mark != val) { 5073 sk->sk_mark = val; 5074 sk_dst_reset(sk); 5075 } 5076 break; 5077 case SO_BINDTODEVICE: 5078 optlen = min_t(long, optlen, IFNAMSIZ - 1); 5079 strncpy(devname, optval, optlen); 5080 devname[optlen] = 0; 5081 5082 ifindex = 0; 5083 if (devname[0] != '\0') { 5084 struct net_device *dev; 5085 5086 ret = -ENODEV; 5087 5088 net = sock_net(sk); 5089 dev = dev_get_by_name(net, devname); 5090 if (!dev) 5091 break; 5092 ifindex = dev->ifindex; 5093 dev_put(dev); 5094 } 5095 fallthrough; 5096 case SO_BINDTOIFINDEX: 5097 if (optname == SO_BINDTOIFINDEX) 5098 ifindex = val; 5099 ret = sock_bindtoindex(sk, ifindex, false); 5100 break; 5101 case SO_KEEPALIVE: 5102 if (sk->sk_prot->keepalive) 5103 sk->sk_prot->keepalive(sk, valbool); 5104 sock_valbool_flag(sk, SOCK_KEEPOPEN, valbool); 5105 break; 5106 case SO_REUSEPORT: 5107 sk->sk_reuseport = valbool; 5108 break; 5109 case SO_TXREHASH: 5110 if (val < -1 || val > 1) { 5111 ret = -EINVAL; 5112 break; 5113 } 5114 sk->sk_txrehash = (u8)val; 5115 break; 5116 default: 5117 ret = -EINVAL; 5118 } 5119 #ifdef CONFIG_INET 5120 } else if (level == SOL_IP) { 5121 if (optlen != sizeof(int) || sk->sk_family != AF_INET) 5122 return -EINVAL; 5123 5124 val = *((int *)optval); 5125 /* Only some options are supported */ 5126 switch (optname) { 5127 case IP_TOS: 5128 if (val < -1 || val > 0xff) { 5129 ret = -EINVAL; 5130 } else { 5131 struct inet_sock *inet = inet_sk(sk); 5132 5133 if (val == -1) 5134 val = 0; 5135 inet->tos = val; 5136 } 5137 break; 5138 default: 5139 ret = -EINVAL; 5140 } 5141 #if IS_ENABLED(CONFIG_IPV6) 5142 } else if (level == SOL_IPV6) { 5143 if (optlen != sizeof(int) || sk->sk_family != AF_INET6) 5144 return -EINVAL; 5145 5146 val = *((int *)optval); 5147 /* Only some options are supported */ 5148 switch (optname) { 5149 case IPV6_TCLASS: 5150 if (val < -1 || val > 0xff) { 5151 ret = -EINVAL; 5152 } else { 5153 struct ipv6_pinfo *np = inet6_sk(sk); 5154 5155 if (val == -1) 5156 val = 0; 5157 np->tclass = val; 5158 } 5159 break; 5160 default: 5161 ret = -EINVAL; 5162 } 5163 #endif 5164 } else if (level == SOL_TCP && 5165 sk->sk_prot->setsockopt == tcp_setsockopt) { 5166 if (optname == TCP_CONGESTION) { 5167 char name[TCP_CA_NAME_MAX]; 5168 5169 strncpy(name, optval, min_t(long, optlen, 5170 TCP_CA_NAME_MAX-1)); 5171 name[TCP_CA_NAME_MAX-1] = 0; 5172 ret = tcp_set_congestion_control(sk, name, false, true); 5173 } else { 5174 struct inet_connection_sock *icsk = inet_csk(sk); 5175 struct tcp_sock *tp = tcp_sk(sk); 5176 unsigned long timeout; 5177 5178 if (optlen != sizeof(int)) 5179 return -EINVAL; 5180 5181 val = *((int *)optval); 5182 /* Only some options are supported */ 5183 switch (optname) { 5184 case TCP_BPF_IW: 5185 if (val <= 0 || tp->data_segs_out > tp->syn_data) 5186 ret = -EINVAL; 5187 else 5188 tcp_snd_cwnd_set(tp, val); 5189 break; 5190 case TCP_BPF_SNDCWND_CLAMP: 5191 if (val <= 0) { 5192 ret = -EINVAL; 5193 } else { 5194 tp->snd_cwnd_clamp = val; 5195 tp->snd_ssthresh = val; 5196 } 5197 break; 5198 case TCP_BPF_DELACK_MAX: 5199 timeout = usecs_to_jiffies(val); 5200 if (timeout > TCP_DELACK_MAX || 5201 timeout < TCP_TIMEOUT_MIN) 5202 return -EINVAL; 5203 inet_csk(sk)->icsk_delack_max = timeout; 5204 break; 5205 case TCP_BPF_RTO_MIN: 5206 timeout = usecs_to_jiffies(val); 5207 if (timeout > TCP_RTO_MIN || 5208 timeout < TCP_TIMEOUT_MIN) 5209 return -EINVAL; 5210 inet_csk(sk)->icsk_rto_min = timeout; 5211 break; 5212 case TCP_SAVE_SYN: 5213 if (val < 0 || val > 1) 5214 ret = -EINVAL; 5215 else 5216 tp->save_syn = val; 5217 break; 5218 case TCP_KEEPIDLE: 5219 ret = tcp_sock_set_keepidle_locked(sk, val); 5220 break; 5221 case TCP_KEEPINTVL: 5222 if (val < 1 || val > MAX_TCP_KEEPINTVL) 5223 ret = -EINVAL; 5224 else 5225 tp->keepalive_intvl = val * HZ; 5226 break; 5227 case TCP_KEEPCNT: 5228 if (val < 1 || val > MAX_TCP_KEEPCNT) 5229 ret = -EINVAL; 5230 else 5231 tp->keepalive_probes = val; 5232 break; 5233 case TCP_SYNCNT: 5234 if (val < 1 || val > MAX_TCP_SYNCNT) 5235 ret = -EINVAL; 5236 else 5237 icsk->icsk_syn_retries = val; 5238 break; 5239 case TCP_USER_TIMEOUT: 5240 if (val < 0) 5241 ret = -EINVAL; 5242 else 5243 icsk->icsk_user_timeout = val; 5244 break; 5245 case TCP_NOTSENT_LOWAT: 5246 tp->notsent_lowat = val; 5247 sk->sk_write_space(sk); 5248 break; 5249 case TCP_WINDOW_CLAMP: 5250 ret = tcp_set_window_clamp(sk, val); 5251 break; 5252 default: 5253 ret = -EINVAL; 5254 } 5255 } 5256 #endif 5257 } else { 5258 ret = -EINVAL; 5259 } 5260 return ret; 5261 } 5262 5263 static int _bpf_setsockopt(struct sock *sk, int level, int optname, 5264 char *optval, int optlen) 5265 { 5266 if (sk_fullsock(sk)) 5267 sock_owned_by_me(sk); 5268 return __bpf_setsockopt(sk, level, optname, optval, optlen); 5269 } 5270 5271 static int __bpf_getsockopt(struct sock *sk, int level, int optname, 5272 char *optval, int optlen) 5273 { 5274 if (!sk_fullsock(sk)) 5275 goto err_clear; 5276 5277 if (level == SOL_SOCKET) { 5278 if (optlen != sizeof(int)) 5279 goto err_clear; 5280 5281 switch (optname) { 5282 case SO_RCVBUF: 5283 *((int *)optval) = sk->sk_rcvbuf; 5284 break; 5285 case SO_SNDBUF: 5286 *((int *)optval) = sk->sk_sndbuf; 5287 break; 5288 case SO_MARK: 5289 *((int *)optval) = sk->sk_mark; 5290 break; 5291 case SO_PRIORITY: 5292 *((int *)optval) = sk->sk_priority; 5293 break; 5294 case SO_BINDTOIFINDEX: 5295 *((int *)optval) = sk->sk_bound_dev_if; 5296 break; 5297 case SO_REUSEPORT: 5298 *((int *)optval) = sk->sk_reuseport; 5299 break; 5300 case SO_TXREHASH: 5301 *((int *)optval) = sk->sk_txrehash; 5302 break; 5303 default: 5304 goto err_clear; 5305 } 5306 #ifdef CONFIG_INET 5307 } else if (level == SOL_TCP && sk->sk_prot->getsockopt == tcp_getsockopt) { 5308 struct inet_connection_sock *icsk; 5309 struct tcp_sock *tp; 5310 5311 switch (optname) { 5312 case TCP_CONGESTION: 5313 icsk = inet_csk(sk); 5314 5315 if (!icsk->icsk_ca_ops || optlen <= 1) 5316 goto err_clear; 5317 strncpy(optval, icsk->icsk_ca_ops->name, optlen); 5318 optval[optlen - 1] = 0; 5319 break; 5320 case TCP_SAVED_SYN: 5321 tp = tcp_sk(sk); 5322 5323 if (optlen <= 0 || !tp->saved_syn || 5324 optlen > tcp_saved_syn_len(tp->saved_syn)) 5325 goto err_clear; 5326 memcpy(optval, tp->saved_syn->data, optlen); 5327 break; 5328 default: 5329 goto err_clear; 5330 } 5331 } else if (level == SOL_IP) { 5332 struct inet_sock *inet = inet_sk(sk); 5333 5334 if (optlen != sizeof(int) || sk->sk_family != AF_INET) 5335 goto err_clear; 5336 5337 /* Only some options are supported */ 5338 switch (optname) { 5339 case IP_TOS: 5340 *((int *)optval) = (int)inet->tos; 5341 break; 5342 default: 5343 goto err_clear; 5344 } 5345 #if IS_ENABLED(CONFIG_IPV6) 5346 } else if (level == SOL_IPV6) { 5347 struct ipv6_pinfo *np = inet6_sk(sk); 5348 5349 if (optlen != sizeof(int) || sk->sk_family != AF_INET6) 5350 goto err_clear; 5351 5352 /* Only some options are supported */ 5353 switch (optname) { 5354 case IPV6_TCLASS: 5355 *((int *)optval) = (int)np->tclass; 5356 break; 5357 default: 5358 goto err_clear; 5359 } 5360 #endif 5361 #endif 5362 } else { 5363 goto err_clear; 5364 } 5365 return 0; 5366 err_clear: 5367 memset(optval, 0, optlen); 5368 return -EINVAL; 5369 } 5370 5371 static int _bpf_getsockopt(struct sock *sk, int level, int optname, 5372 char *optval, int optlen) 5373 { 5374 if (sk_fullsock(sk)) 5375 sock_owned_by_me(sk); 5376 return __bpf_getsockopt(sk, level, optname, optval, optlen); 5377 } 5378 5379 BPF_CALL_5(bpf_sk_setsockopt, struct sock *, sk, int, level, 5380 int, optname, char *, optval, int, optlen) 5381 { 5382 if (level == SOL_TCP && optname == TCP_CONGESTION) { 5383 if (optlen >= sizeof("cdg") - 1 && 5384 !strncmp("cdg", optval, optlen)) 5385 return -ENOTSUPP; 5386 } 5387 5388 return _bpf_setsockopt(sk, level, optname, optval, optlen); 5389 } 5390 5391 const struct bpf_func_proto bpf_sk_setsockopt_proto = { 5392 .func = bpf_sk_setsockopt, 5393 .gpl_only = false, 5394 .ret_type = RET_INTEGER, 5395 .arg1_type = ARG_PTR_TO_BTF_ID_SOCK_COMMON, 5396 .arg2_type = ARG_ANYTHING, 5397 .arg3_type = ARG_ANYTHING, 5398 .arg4_type = ARG_PTR_TO_MEM | MEM_RDONLY, 5399 .arg5_type = ARG_CONST_SIZE, 5400 }; 5401 5402 BPF_CALL_5(bpf_sk_getsockopt, struct sock *, sk, int, level, 5403 int, optname, char *, optval, int, optlen) 5404 { 5405 return _bpf_getsockopt(sk, level, optname, optval, optlen); 5406 } 5407 5408 const struct bpf_func_proto bpf_sk_getsockopt_proto = { 5409 .func = bpf_sk_getsockopt, 5410 .gpl_only = false, 5411 .ret_type = RET_INTEGER, 5412 .arg1_type = ARG_PTR_TO_BTF_ID_SOCK_COMMON, 5413 .arg2_type = ARG_ANYTHING, 5414 .arg3_type = ARG_ANYTHING, 5415 .arg4_type = ARG_PTR_TO_UNINIT_MEM, 5416 .arg5_type = ARG_CONST_SIZE, 5417 }; 5418 5419 BPF_CALL_5(bpf_unlocked_sk_setsockopt, struct sock *, sk, int, level, 5420 int, optname, char *, optval, int, optlen) 5421 { 5422 return __bpf_setsockopt(sk, level, optname, optval, optlen); 5423 } 5424 5425 const struct bpf_func_proto bpf_unlocked_sk_setsockopt_proto = { 5426 .func = bpf_unlocked_sk_setsockopt, 5427 .gpl_only = false, 5428 .ret_type = RET_INTEGER, 5429 .arg1_type = ARG_PTR_TO_BTF_ID_SOCK_COMMON, 5430 .arg2_type = ARG_ANYTHING, 5431 .arg3_type = ARG_ANYTHING, 5432 .arg4_type = ARG_PTR_TO_MEM | MEM_RDONLY, 5433 .arg5_type = ARG_CONST_SIZE, 5434 }; 5435 5436 BPF_CALL_5(bpf_unlocked_sk_getsockopt, struct sock *, sk, int, level, 5437 int, optname, char *, optval, int, optlen) 5438 { 5439 return __bpf_getsockopt(sk, level, optname, optval, optlen); 5440 } 5441 5442 const struct bpf_func_proto bpf_unlocked_sk_getsockopt_proto = { 5443 .func = bpf_unlocked_sk_getsockopt, 5444 .gpl_only = false, 5445 .ret_type = RET_INTEGER, 5446 .arg1_type = ARG_PTR_TO_BTF_ID_SOCK_COMMON, 5447 .arg2_type = ARG_ANYTHING, 5448 .arg3_type = ARG_ANYTHING, 5449 .arg4_type = ARG_PTR_TO_UNINIT_MEM, 5450 .arg5_type = ARG_CONST_SIZE, 5451 }; 5452 5453 BPF_CALL_5(bpf_sock_addr_setsockopt, struct bpf_sock_addr_kern *, ctx, 5454 int, level, int, optname, char *, optval, int, optlen) 5455 { 5456 return _bpf_setsockopt(ctx->sk, level, optname, optval, optlen); 5457 } 5458 5459 static const struct bpf_func_proto bpf_sock_addr_setsockopt_proto = { 5460 .func = bpf_sock_addr_setsockopt, 5461 .gpl_only = false, 5462 .ret_type = RET_INTEGER, 5463 .arg1_type = ARG_PTR_TO_CTX, 5464 .arg2_type = ARG_ANYTHING, 5465 .arg3_type = ARG_ANYTHING, 5466 .arg4_type = ARG_PTR_TO_MEM | MEM_RDONLY, 5467 .arg5_type = ARG_CONST_SIZE, 5468 }; 5469 5470 BPF_CALL_5(bpf_sock_addr_getsockopt, struct bpf_sock_addr_kern *, ctx, 5471 int, level, int, optname, char *, optval, int, optlen) 5472 { 5473 return _bpf_getsockopt(ctx->sk, level, optname, optval, optlen); 5474 } 5475 5476 static const struct bpf_func_proto bpf_sock_addr_getsockopt_proto = { 5477 .func = bpf_sock_addr_getsockopt, 5478 .gpl_only = false, 5479 .ret_type = RET_INTEGER, 5480 .arg1_type = ARG_PTR_TO_CTX, 5481 .arg2_type = ARG_ANYTHING, 5482 .arg3_type = ARG_ANYTHING, 5483 .arg4_type = ARG_PTR_TO_UNINIT_MEM, 5484 .arg5_type = ARG_CONST_SIZE, 5485 }; 5486 5487 BPF_CALL_5(bpf_sock_ops_setsockopt, struct bpf_sock_ops_kern *, bpf_sock, 5488 int, level, int, optname, char *, optval, int, optlen) 5489 { 5490 return _bpf_setsockopt(bpf_sock->sk, level, optname, optval, optlen); 5491 } 5492 5493 static const struct bpf_func_proto bpf_sock_ops_setsockopt_proto = { 5494 .func = bpf_sock_ops_setsockopt, 5495 .gpl_only = false, 5496 .ret_type = RET_INTEGER, 5497 .arg1_type = ARG_PTR_TO_CTX, 5498 .arg2_type = ARG_ANYTHING, 5499 .arg3_type = ARG_ANYTHING, 5500 .arg4_type = ARG_PTR_TO_MEM | MEM_RDONLY, 5501 .arg5_type = ARG_CONST_SIZE, 5502 }; 5503 5504 static int bpf_sock_ops_get_syn(struct bpf_sock_ops_kern *bpf_sock, 5505 int optname, const u8 **start) 5506 { 5507 struct sk_buff *syn_skb = bpf_sock->syn_skb; 5508 const u8 *hdr_start; 5509 int ret; 5510 5511 if (syn_skb) { 5512 /* sk is a request_sock here */ 5513 5514 if (optname == TCP_BPF_SYN) { 5515 hdr_start = syn_skb->data; 5516 ret = tcp_hdrlen(syn_skb); 5517 } else if (optname == TCP_BPF_SYN_IP) { 5518 hdr_start = skb_network_header(syn_skb); 5519 ret = skb_network_header_len(syn_skb) + 5520 tcp_hdrlen(syn_skb); 5521 } else { 5522 /* optname == TCP_BPF_SYN_MAC */ 5523 hdr_start = skb_mac_header(syn_skb); 5524 ret = skb_mac_header_len(syn_skb) + 5525 skb_network_header_len(syn_skb) + 5526 tcp_hdrlen(syn_skb); 5527 } 5528 } else { 5529 struct sock *sk = bpf_sock->sk; 5530 struct saved_syn *saved_syn; 5531 5532 if (sk->sk_state == TCP_NEW_SYN_RECV) 5533 /* synack retransmit. bpf_sock->syn_skb will 5534 * not be available. It has to resort to 5535 * saved_syn (if it is saved). 5536 */ 5537 saved_syn = inet_reqsk(sk)->saved_syn; 5538 else 5539 saved_syn = tcp_sk(sk)->saved_syn; 5540 5541 if (!saved_syn) 5542 return -ENOENT; 5543 5544 if (optname == TCP_BPF_SYN) { 5545 hdr_start = saved_syn->data + 5546 saved_syn->mac_hdrlen + 5547 saved_syn->network_hdrlen; 5548 ret = saved_syn->tcp_hdrlen; 5549 } else if (optname == TCP_BPF_SYN_IP) { 5550 hdr_start = saved_syn->data + 5551 saved_syn->mac_hdrlen; 5552 ret = saved_syn->network_hdrlen + 5553 saved_syn->tcp_hdrlen; 5554 } else { 5555 /* optname == TCP_BPF_SYN_MAC */ 5556 5557 /* TCP_SAVE_SYN may not have saved the mac hdr */ 5558 if (!saved_syn->mac_hdrlen) 5559 return -ENOENT; 5560 5561 hdr_start = saved_syn->data; 5562 ret = saved_syn->mac_hdrlen + 5563 saved_syn->network_hdrlen + 5564 saved_syn->tcp_hdrlen; 5565 } 5566 } 5567 5568 *start = hdr_start; 5569 return ret; 5570 } 5571 5572 BPF_CALL_5(bpf_sock_ops_getsockopt, struct bpf_sock_ops_kern *, bpf_sock, 5573 int, level, int, optname, char *, optval, int, optlen) 5574 { 5575 if (IS_ENABLED(CONFIG_INET) && level == SOL_TCP && 5576 optname >= TCP_BPF_SYN && optname <= TCP_BPF_SYN_MAC) { 5577 int ret, copy_len = 0; 5578 const u8 *start; 5579 5580 ret = bpf_sock_ops_get_syn(bpf_sock, optname, &start); 5581 if (ret > 0) { 5582 copy_len = ret; 5583 if (optlen < copy_len) { 5584 copy_len = optlen; 5585 ret = -ENOSPC; 5586 } 5587 5588 memcpy(optval, start, copy_len); 5589 } 5590 5591 /* Zero out unused buffer at the end */ 5592 memset(optval + copy_len, 0, optlen - copy_len); 5593 5594 return ret; 5595 } 5596 5597 return _bpf_getsockopt(bpf_sock->sk, level, optname, optval, optlen); 5598 } 5599 5600 static const struct bpf_func_proto bpf_sock_ops_getsockopt_proto = { 5601 .func = bpf_sock_ops_getsockopt, 5602 .gpl_only = false, 5603 .ret_type = RET_INTEGER, 5604 .arg1_type = ARG_PTR_TO_CTX, 5605 .arg2_type = ARG_ANYTHING, 5606 .arg3_type = ARG_ANYTHING, 5607 .arg4_type = ARG_PTR_TO_UNINIT_MEM, 5608 .arg5_type = ARG_CONST_SIZE, 5609 }; 5610 5611 BPF_CALL_2(bpf_sock_ops_cb_flags_set, struct bpf_sock_ops_kern *, bpf_sock, 5612 int, argval) 5613 { 5614 struct sock *sk = bpf_sock->sk; 5615 int val = argval & BPF_SOCK_OPS_ALL_CB_FLAGS; 5616 5617 if (!IS_ENABLED(CONFIG_INET) || !sk_fullsock(sk)) 5618 return -EINVAL; 5619 5620 tcp_sk(sk)->bpf_sock_ops_cb_flags = val; 5621 5622 return argval & (~BPF_SOCK_OPS_ALL_CB_FLAGS); 5623 } 5624 5625 static const struct bpf_func_proto bpf_sock_ops_cb_flags_set_proto = { 5626 .func = bpf_sock_ops_cb_flags_set, 5627 .gpl_only = false, 5628 .ret_type = RET_INTEGER, 5629 .arg1_type = ARG_PTR_TO_CTX, 5630 .arg2_type = ARG_ANYTHING, 5631 }; 5632 5633 const struct ipv6_bpf_stub *ipv6_bpf_stub __read_mostly; 5634 EXPORT_SYMBOL_GPL(ipv6_bpf_stub); 5635 5636 BPF_CALL_3(bpf_bind, struct bpf_sock_addr_kern *, ctx, struct sockaddr *, addr, 5637 int, addr_len) 5638 { 5639 #ifdef CONFIG_INET 5640 struct sock *sk = ctx->sk; 5641 u32 flags = BIND_FROM_BPF; 5642 int err; 5643 5644 err = -EINVAL; 5645 if (addr_len < offsetofend(struct sockaddr, sa_family)) 5646 return err; 5647 if (addr->sa_family == AF_INET) { 5648 if (addr_len < sizeof(struct sockaddr_in)) 5649 return err; 5650 if (((struct sockaddr_in *)addr)->sin_port == htons(0)) 5651 flags |= BIND_FORCE_ADDRESS_NO_PORT; 5652 return __inet_bind(sk, addr, addr_len, flags); 5653 #if IS_ENABLED(CONFIG_IPV6) 5654 } else if (addr->sa_family == AF_INET6) { 5655 if (addr_len < SIN6_LEN_RFC2133) 5656 return err; 5657 if (((struct sockaddr_in6 *)addr)->sin6_port == htons(0)) 5658 flags |= BIND_FORCE_ADDRESS_NO_PORT; 5659 /* ipv6_bpf_stub cannot be NULL, since it's called from 5660 * bpf_cgroup_inet6_connect hook and ipv6 is already loaded 5661 */ 5662 return ipv6_bpf_stub->inet6_bind(sk, addr, addr_len, flags); 5663 #endif /* CONFIG_IPV6 */ 5664 } 5665 #endif /* CONFIG_INET */ 5666 5667 return -EAFNOSUPPORT; 5668 } 5669 5670 static const struct bpf_func_proto bpf_bind_proto = { 5671 .func = bpf_bind, 5672 .gpl_only = false, 5673 .ret_type = RET_INTEGER, 5674 .arg1_type = ARG_PTR_TO_CTX, 5675 .arg2_type = ARG_PTR_TO_MEM | MEM_RDONLY, 5676 .arg3_type = ARG_CONST_SIZE, 5677 }; 5678 5679 #ifdef CONFIG_XFRM 5680 BPF_CALL_5(bpf_skb_get_xfrm_state, struct sk_buff *, skb, u32, index, 5681 struct bpf_xfrm_state *, to, u32, size, u64, flags) 5682 { 5683 const struct sec_path *sp = skb_sec_path(skb); 5684 const struct xfrm_state *x; 5685 5686 if (!sp || unlikely(index >= sp->len || flags)) 5687 goto err_clear; 5688 5689 x = sp->xvec[index]; 5690 5691 if (unlikely(size != sizeof(struct bpf_xfrm_state))) 5692 goto err_clear; 5693 5694 to->reqid = x->props.reqid; 5695 to->spi = x->id.spi; 5696 to->family = x->props.family; 5697 to->ext = 0; 5698 5699 if (to->family == AF_INET6) { 5700 memcpy(to->remote_ipv6, x->props.saddr.a6, 5701 sizeof(to->remote_ipv6)); 5702 } else { 5703 to->remote_ipv4 = x->props.saddr.a4; 5704 memset(&to->remote_ipv6[1], 0, sizeof(__u32) * 3); 5705 } 5706 5707 return 0; 5708 err_clear: 5709 memset(to, 0, size); 5710 return -EINVAL; 5711 } 5712 5713 static const struct bpf_func_proto bpf_skb_get_xfrm_state_proto = { 5714 .func = bpf_skb_get_xfrm_state, 5715 .gpl_only = false, 5716 .ret_type = RET_INTEGER, 5717 .arg1_type = ARG_PTR_TO_CTX, 5718 .arg2_type = ARG_ANYTHING, 5719 .arg3_type = ARG_PTR_TO_UNINIT_MEM, 5720 .arg4_type = ARG_CONST_SIZE, 5721 .arg5_type = ARG_ANYTHING, 5722 }; 5723 #endif 5724 5725 #if IS_ENABLED(CONFIG_INET) || IS_ENABLED(CONFIG_IPV6) 5726 static int bpf_fib_set_fwd_params(struct bpf_fib_lookup *params, 5727 const struct neighbour *neigh, 5728 const struct net_device *dev, u32 mtu) 5729 { 5730 memcpy(params->dmac, neigh->ha, ETH_ALEN); 5731 memcpy(params->smac, dev->dev_addr, ETH_ALEN); 5732 params->h_vlan_TCI = 0; 5733 params->h_vlan_proto = 0; 5734 if (mtu) 5735 params->mtu_result = mtu; /* union with tot_len */ 5736 5737 return 0; 5738 } 5739 #endif 5740 5741 #if IS_ENABLED(CONFIG_INET) 5742 static int bpf_ipv4_fib_lookup(struct net *net, struct bpf_fib_lookup *params, 5743 u32 flags, bool check_mtu) 5744 { 5745 struct fib_nh_common *nhc; 5746 struct in_device *in_dev; 5747 struct neighbour *neigh; 5748 struct net_device *dev; 5749 struct fib_result res; 5750 struct flowi4 fl4; 5751 u32 mtu = 0; 5752 int err; 5753 5754 dev = dev_get_by_index_rcu(net, params->ifindex); 5755 if (unlikely(!dev)) 5756 return -ENODEV; 5757 5758 /* verify forwarding is enabled on this interface */ 5759 in_dev = __in_dev_get_rcu(dev); 5760 if (unlikely(!in_dev || !IN_DEV_FORWARD(in_dev))) 5761 return BPF_FIB_LKUP_RET_FWD_DISABLED; 5762 5763 if (flags & BPF_FIB_LOOKUP_OUTPUT) { 5764 fl4.flowi4_iif = 1; 5765 fl4.flowi4_oif = params->ifindex; 5766 } else { 5767 fl4.flowi4_iif = params->ifindex; 5768 fl4.flowi4_oif = 0; 5769 } 5770 fl4.flowi4_tos = params->tos & IPTOS_RT_MASK; 5771 fl4.flowi4_scope = RT_SCOPE_UNIVERSE; 5772 fl4.flowi4_flags = 0; 5773 5774 fl4.flowi4_proto = params->l4_protocol; 5775 fl4.daddr = params->ipv4_dst; 5776 fl4.saddr = params->ipv4_src; 5777 fl4.fl4_sport = params->sport; 5778 fl4.fl4_dport = params->dport; 5779 fl4.flowi4_multipath_hash = 0; 5780 5781 if (flags & BPF_FIB_LOOKUP_DIRECT) { 5782 u32 tbid = l3mdev_fib_table_rcu(dev) ? : RT_TABLE_MAIN; 5783 struct fib_table *tb; 5784 5785 tb = fib_get_table(net, tbid); 5786 if (unlikely(!tb)) 5787 return BPF_FIB_LKUP_RET_NOT_FWDED; 5788 5789 err = fib_table_lookup(tb, &fl4, &res, FIB_LOOKUP_NOREF); 5790 } else { 5791 fl4.flowi4_mark = 0; 5792 fl4.flowi4_secid = 0; 5793 fl4.flowi4_tun_key.tun_id = 0; 5794 fl4.flowi4_uid = sock_net_uid(net, NULL); 5795 5796 err = fib_lookup(net, &fl4, &res, FIB_LOOKUP_NOREF); 5797 } 5798 5799 if (err) { 5800 /* map fib lookup errors to RTN_ type */ 5801 if (err == -EINVAL) 5802 return BPF_FIB_LKUP_RET_BLACKHOLE; 5803 if (err == -EHOSTUNREACH) 5804 return BPF_FIB_LKUP_RET_UNREACHABLE; 5805 if (err == -EACCES) 5806 return BPF_FIB_LKUP_RET_PROHIBIT; 5807 5808 return BPF_FIB_LKUP_RET_NOT_FWDED; 5809 } 5810 5811 if (res.type != RTN_UNICAST) 5812 return BPF_FIB_LKUP_RET_NOT_FWDED; 5813 5814 if (fib_info_num_path(res.fi) > 1) 5815 fib_select_path(net, &res, &fl4, NULL); 5816 5817 if (check_mtu) { 5818 mtu = ip_mtu_from_fib_result(&res, params->ipv4_dst); 5819 if (params->tot_len > mtu) { 5820 params->mtu_result = mtu; /* union with tot_len */ 5821 return BPF_FIB_LKUP_RET_FRAG_NEEDED; 5822 } 5823 } 5824 5825 nhc = res.nhc; 5826 5827 /* do not handle lwt encaps right now */ 5828 if (nhc->nhc_lwtstate) 5829 return BPF_FIB_LKUP_RET_UNSUPP_LWT; 5830 5831 dev = nhc->nhc_dev; 5832 5833 params->rt_metric = res.fi->fib_priority; 5834 params->ifindex = dev->ifindex; 5835 5836 /* xdp and cls_bpf programs are run in RCU-bh so 5837 * rcu_read_lock_bh is not needed here 5838 */ 5839 if (likely(nhc->nhc_gw_family != AF_INET6)) { 5840 if (nhc->nhc_gw_family) 5841 params->ipv4_dst = nhc->nhc_gw.ipv4; 5842 5843 neigh = __ipv4_neigh_lookup_noref(dev, 5844 (__force u32)params->ipv4_dst); 5845 } else { 5846 struct in6_addr *dst = (struct in6_addr *)params->ipv6_dst; 5847 5848 params->family = AF_INET6; 5849 *dst = nhc->nhc_gw.ipv6; 5850 neigh = __ipv6_neigh_lookup_noref_stub(dev, dst); 5851 } 5852 5853 if (!neigh) 5854 return BPF_FIB_LKUP_RET_NO_NEIGH; 5855 5856 return bpf_fib_set_fwd_params(params, neigh, dev, mtu); 5857 } 5858 #endif 5859 5860 #if IS_ENABLED(CONFIG_IPV6) 5861 static int bpf_ipv6_fib_lookup(struct net *net, struct bpf_fib_lookup *params, 5862 u32 flags, bool check_mtu) 5863 { 5864 struct in6_addr *src = (struct in6_addr *) params->ipv6_src; 5865 struct in6_addr *dst = (struct in6_addr *) params->ipv6_dst; 5866 struct fib6_result res = {}; 5867 struct neighbour *neigh; 5868 struct net_device *dev; 5869 struct inet6_dev *idev; 5870 struct flowi6 fl6; 5871 int strict = 0; 5872 int oif, err; 5873 u32 mtu = 0; 5874 5875 /* link local addresses are never forwarded */ 5876 if (rt6_need_strict(dst) || rt6_need_strict(src)) 5877 return BPF_FIB_LKUP_RET_NOT_FWDED; 5878 5879 dev = dev_get_by_index_rcu(net, params->ifindex); 5880 if (unlikely(!dev)) 5881 return -ENODEV; 5882 5883 idev = __in6_dev_get_safely(dev); 5884 if (unlikely(!idev || !idev->cnf.forwarding)) 5885 return BPF_FIB_LKUP_RET_FWD_DISABLED; 5886 5887 if (flags & BPF_FIB_LOOKUP_OUTPUT) { 5888 fl6.flowi6_iif = 1; 5889 oif = fl6.flowi6_oif = params->ifindex; 5890 } else { 5891 oif = fl6.flowi6_iif = params->ifindex; 5892 fl6.flowi6_oif = 0; 5893 strict = RT6_LOOKUP_F_HAS_SADDR; 5894 } 5895 fl6.flowlabel = params->flowinfo; 5896 fl6.flowi6_scope = 0; 5897 fl6.flowi6_flags = 0; 5898 fl6.mp_hash = 0; 5899 5900 fl6.flowi6_proto = params->l4_protocol; 5901 fl6.daddr = *dst; 5902 fl6.saddr = *src; 5903 fl6.fl6_sport = params->sport; 5904 fl6.fl6_dport = params->dport; 5905 5906 if (flags & BPF_FIB_LOOKUP_DIRECT) { 5907 u32 tbid = l3mdev_fib_table_rcu(dev) ? : RT_TABLE_MAIN; 5908 struct fib6_table *tb; 5909 5910 tb = ipv6_stub->fib6_get_table(net, tbid); 5911 if (unlikely(!tb)) 5912 return BPF_FIB_LKUP_RET_NOT_FWDED; 5913 5914 err = ipv6_stub->fib6_table_lookup(net, tb, oif, &fl6, &res, 5915 strict); 5916 } else { 5917 fl6.flowi6_mark = 0; 5918 fl6.flowi6_secid = 0; 5919 fl6.flowi6_tun_key.tun_id = 0; 5920 fl6.flowi6_uid = sock_net_uid(net, NULL); 5921 5922 err = ipv6_stub->fib6_lookup(net, oif, &fl6, &res, strict); 5923 } 5924 5925 if (unlikely(err || IS_ERR_OR_NULL(res.f6i) || 5926 res.f6i == net->ipv6.fib6_null_entry)) 5927 return BPF_FIB_LKUP_RET_NOT_FWDED; 5928 5929 switch (res.fib6_type) { 5930 /* only unicast is forwarded */ 5931 case RTN_UNICAST: 5932 break; 5933 case RTN_BLACKHOLE: 5934 return BPF_FIB_LKUP_RET_BLACKHOLE; 5935 case RTN_UNREACHABLE: 5936 return BPF_FIB_LKUP_RET_UNREACHABLE; 5937 case RTN_PROHIBIT: 5938 return BPF_FIB_LKUP_RET_PROHIBIT; 5939 default: 5940 return BPF_FIB_LKUP_RET_NOT_FWDED; 5941 } 5942 5943 ipv6_stub->fib6_select_path(net, &res, &fl6, fl6.flowi6_oif, 5944 fl6.flowi6_oif != 0, NULL, strict); 5945 5946 if (check_mtu) { 5947 mtu = ipv6_stub->ip6_mtu_from_fib6(&res, dst, src); 5948 if (params->tot_len > mtu) { 5949 params->mtu_result = mtu; /* union with tot_len */ 5950 return BPF_FIB_LKUP_RET_FRAG_NEEDED; 5951 } 5952 } 5953 5954 if (res.nh->fib_nh_lws) 5955 return BPF_FIB_LKUP_RET_UNSUPP_LWT; 5956 5957 if (res.nh->fib_nh_gw_family) 5958 *dst = res.nh->fib_nh_gw6; 5959 5960 dev = res.nh->fib_nh_dev; 5961 params->rt_metric = res.f6i->fib6_metric; 5962 params->ifindex = dev->ifindex; 5963 5964 /* xdp and cls_bpf programs are run in RCU-bh so rcu_read_lock_bh is 5965 * not needed here. 5966 */ 5967 neigh = __ipv6_neigh_lookup_noref_stub(dev, dst); 5968 if (!neigh) 5969 return BPF_FIB_LKUP_RET_NO_NEIGH; 5970 5971 return bpf_fib_set_fwd_params(params, neigh, dev, mtu); 5972 } 5973 #endif 5974 5975 BPF_CALL_4(bpf_xdp_fib_lookup, struct xdp_buff *, ctx, 5976 struct bpf_fib_lookup *, params, int, plen, u32, flags) 5977 { 5978 if (plen < sizeof(*params)) 5979 return -EINVAL; 5980 5981 if (flags & ~(BPF_FIB_LOOKUP_DIRECT | BPF_FIB_LOOKUP_OUTPUT)) 5982 return -EINVAL; 5983 5984 switch (params->family) { 5985 #if IS_ENABLED(CONFIG_INET) 5986 case AF_INET: 5987 return bpf_ipv4_fib_lookup(dev_net(ctx->rxq->dev), params, 5988 flags, true); 5989 #endif 5990 #if IS_ENABLED(CONFIG_IPV6) 5991 case AF_INET6: 5992 return bpf_ipv6_fib_lookup(dev_net(ctx->rxq->dev), params, 5993 flags, true); 5994 #endif 5995 } 5996 return -EAFNOSUPPORT; 5997 } 5998 5999 static const struct bpf_func_proto bpf_xdp_fib_lookup_proto = { 6000 .func = bpf_xdp_fib_lookup, 6001 .gpl_only = true, 6002 .ret_type = RET_INTEGER, 6003 .arg1_type = ARG_PTR_TO_CTX, 6004 .arg2_type = ARG_PTR_TO_MEM, 6005 .arg3_type = ARG_CONST_SIZE, 6006 .arg4_type = ARG_ANYTHING, 6007 }; 6008 6009 BPF_CALL_4(bpf_skb_fib_lookup, struct sk_buff *, skb, 6010 struct bpf_fib_lookup *, params, int, plen, u32, flags) 6011 { 6012 struct net *net = dev_net(skb->dev); 6013 int rc = -EAFNOSUPPORT; 6014 bool check_mtu = false; 6015 6016 if (plen < sizeof(*params)) 6017 return -EINVAL; 6018 6019 if (flags & ~(BPF_FIB_LOOKUP_DIRECT | BPF_FIB_LOOKUP_OUTPUT)) 6020 return -EINVAL; 6021 6022 if (params->tot_len) 6023 check_mtu = true; 6024 6025 switch (params->family) { 6026 #if IS_ENABLED(CONFIG_INET) 6027 case AF_INET: 6028 rc = bpf_ipv4_fib_lookup(net, params, flags, check_mtu); 6029 break; 6030 #endif 6031 #if IS_ENABLED(CONFIG_IPV6) 6032 case AF_INET6: 6033 rc = bpf_ipv6_fib_lookup(net, params, flags, check_mtu); 6034 break; 6035 #endif 6036 } 6037 6038 if (rc == BPF_FIB_LKUP_RET_SUCCESS && !check_mtu) { 6039 struct net_device *dev; 6040 6041 /* When tot_len isn't provided by user, check skb 6042 * against MTU of FIB lookup resulting net_device 6043 */ 6044 dev = dev_get_by_index_rcu(net, params->ifindex); 6045 if (!is_skb_forwardable(dev, skb)) 6046 rc = BPF_FIB_LKUP_RET_FRAG_NEEDED; 6047 6048 params->mtu_result = dev->mtu; /* union with tot_len */ 6049 } 6050 6051 return rc; 6052 } 6053 6054 static const struct bpf_func_proto bpf_skb_fib_lookup_proto = { 6055 .func = bpf_skb_fib_lookup, 6056 .gpl_only = true, 6057 .ret_type = RET_INTEGER, 6058 .arg1_type = ARG_PTR_TO_CTX, 6059 .arg2_type = ARG_PTR_TO_MEM, 6060 .arg3_type = ARG_CONST_SIZE, 6061 .arg4_type = ARG_ANYTHING, 6062 }; 6063 6064 static struct net_device *__dev_via_ifindex(struct net_device *dev_curr, 6065 u32 ifindex) 6066 { 6067 struct net *netns = dev_net(dev_curr); 6068 6069 /* Non-redirect use-cases can use ifindex=0 and save ifindex lookup */ 6070 if (ifindex == 0) 6071 return dev_curr; 6072 6073 return dev_get_by_index_rcu(netns, ifindex); 6074 } 6075 6076 BPF_CALL_5(bpf_skb_check_mtu, struct sk_buff *, skb, 6077 u32, ifindex, u32 *, mtu_len, s32, len_diff, u64, flags) 6078 { 6079 int ret = BPF_MTU_CHK_RET_FRAG_NEEDED; 6080 struct net_device *dev = skb->dev; 6081 int skb_len, dev_len; 6082 int mtu; 6083 6084 if (unlikely(flags & ~(BPF_MTU_CHK_SEGS))) 6085 return -EINVAL; 6086 6087 if (unlikely(flags & BPF_MTU_CHK_SEGS && (len_diff || *mtu_len))) 6088 return -EINVAL; 6089 6090 dev = __dev_via_ifindex(dev, ifindex); 6091 if (unlikely(!dev)) 6092 return -ENODEV; 6093 6094 mtu = READ_ONCE(dev->mtu); 6095 6096 dev_len = mtu + dev->hard_header_len; 6097 6098 /* If set use *mtu_len as input, L3 as iph->tot_len (like fib_lookup) */ 6099 skb_len = *mtu_len ? *mtu_len + dev->hard_header_len : skb->len; 6100 6101 skb_len += len_diff; /* minus result pass check */ 6102 if (skb_len <= dev_len) { 6103 ret = BPF_MTU_CHK_RET_SUCCESS; 6104 goto out; 6105 } 6106 /* At this point, skb->len exceed MTU, but as it include length of all 6107 * segments, it can still be below MTU. The SKB can possibly get 6108 * re-segmented in transmit path (see validate_xmit_skb). Thus, user 6109 * must choose if segs are to be MTU checked. 6110 */ 6111 if (skb_is_gso(skb)) { 6112 ret = BPF_MTU_CHK_RET_SUCCESS; 6113 6114 if (flags & BPF_MTU_CHK_SEGS && 6115 !skb_gso_validate_network_len(skb, mtu)) 6116 ret = BPF_MTU_CHK_RET_SEGS_TOOBIG; 6117 } 6118 out: 6119 /* BPF verifier guarantees valid pointer */ 6120 *mtu_len = mtu; 6121 6122 return ret; 6123 } 6124 6125 BPF_CALL_5(bpf_xdp_check_mtu, struct xdp_buff *, xdp, 6126 u32, ifindex, u32 *, mtu_len, s32, len_diff, u64, flags) 6127 { 6128 struct net_device *dev = xdp->rxq->dev; 6129 int xdp_len = xdp->data_end - xdp->data; 6130 int ret = BPF_MTU_CHK_RET_SUCCESS; 6131 int mtu, dev_len; 6132 6133 /* XDP variant doesn't support multi-buffer segment check (yet) */ 6134 if (unlikely(flags)) 6135 return -EINVAL; 6136 6137 dev = __dev_via_ifindex(dev, ifindex); 6138 if (unlikely(!dev)) 6139 return -ENODEV; 6140 6141 mtu = READ_ONCE(dev->mtu); 6142 6143 /* Add L2-header as dev MTU is L3 size */ 6144 dev_len = mtu + dev->hard_header_len; 6145 6146 /* Use *mtu_len as input, L3 as iph->tot_len (like fib_lookup) */ 6147 if (*mtu_len) 6148 xdp_len = *mtu_len + dev->hard_header_len; 6149 6150 xdp_len += len_diff; /* minus result pass check */ 6151 if (xdp_len > dev_len) 6152 ret = BPF_MTU_CHK_RET_FRAG_NEEDED; 6153 6154 /* BPF verifier guarantees valid pointer */ 6155 *mtu_len = mtu; 6156 6157 return ret; 6158 } 6159 6160 static const struct bpf_func_proto bpf_skb_check_mtu_proto = { 6161 .func = bpf_skb_check_mtu, 6162 .gpl_only = true, 6163 .ret_type = RET_INTEGER, 6164 .arg1_type = ARG_PTR_TO_CTX, 6165 .arg2_type = ARG_ANYTHING, 6166 .arg3_type = ARG_PTR_TO_INT, 6167 .arg4_type = ARG_ANYTHING, 6168 .arg5_type = ARG_ANYTHING, 6169 }; 6170 6171 static const struct bpf_func_proto bpf_xdp_check_mtu_proto = { 6172 .func = bpf_xdp_check_mtu, 6173 .gpl_only = true, 6174 .ret_type = RET_INTEGER, 6175 .arg1_type = ARG_PTR_TO_CTX, 6176 .arg2_type = ARG_ANYTHING, 6177 .arg3_type = ARG_PTR_TO_INT, 6178 .arg4_type = ARG_ANYTHING, 6179 .arg5_type = ARG_ANYTHING, 6180 }; 6181 6182 #if IS_ENABLED(CONFIG_IPV6_SEG6_BPF) 6183 static int bpf_push_seg6_encap(struct sk_buff *skb, u32 type, void *hdr, u32 len) 6184 { 6185 int err; 6186 struct ipv6_sr_hdr *srh = (struct ipv6_sr_hdr *)hdr; 6187 6188 if (!seg6_validate_srh(srh, len, false)) 6189 return -EINVAL; 6190 6191 switch (type) { 6192 case BPF_LWT_ENCAP_SEG6_INLINE: 6193 if (skb->protocol != htons(ETH_P_IPV6)) 6194 return -EBADMSG; 6195 6196 err = seg6_do_srh_inline(skb, srh); 6197 break; 6198 case BPF_LWT_ENCAP_SEG6: 6199 skb_reset_inner_headers(skb); 6200 skb->encapsulation = 1; 6201 err = seg6_do_srh_encap(skb, srh, IPPROTO_IPV6); 6202 break; 6203 default: 6204 return -EINVAL; 6205 } 6206 6207 bpf_compute_data_pointers(skb); 6208 if (err) 6209 return err; 6210 6211 skb_set_transport_header(skb, sizeof(struct ipv6hdr)); 6212 6213 return seg6_lookup_nexthop(skb, NULL, 0); 6214 } 6215 #endif /* CONFIG_IPV6_SEG6_BPF */ 6216 6217 #if IS_ENABLED(CONFIG_LWTUNNEL_BPF) 6218 static int bpf_push_ip_encap(struct sk_buff *skb, void *hdr, u32 len, 6219 bool ingress) 6220 { 6221 return bpf_lwt_push_ip_encap(skb, hdr, len, ingress); 6222 } 6223 #endif 6224 6225 BPF_CALL_4(bpf_lwt_in_push_encap, struct sk_buff *, skb, u32, type, void *, hdr, 6226 u32, len) 6227 { 6228 switch (type) { 6229 #if IS_ENABLED(CONFIG_IPV6_SEG6_BPF) 6230 case BPF_LWT_ENCAP_SEG6: 6231 case BPF_LWT_ENCAP_SEG6_INLINE: 6232 return bpf_push_seg6_encap(skb, type, hdr, len); 6233 #endif 6234 #if IS_ENABLED(CONFIG_LWTUNNEL_BPF) 6235 case BPF_LWT_ENCAP_IP: 6236 return bpf_push_ip_encap(skb, hdr, len, true /* ingress */); 6237 #endif 6238 default: 6239 return -EINVAL; 6240 } 6241 } 6242 6243 BPF_CALL_4(bpf_lwt_xmit_push_encap, struct sk_buff *, skb, u32, type, 6244 void *, hdr, u32, len) 6245 { 6246 switch (type) { 6247 #if IS_ENABLED(CONFIG_LWTUNNEL_BPF) 6248 case BPF_LWT_ENCAP_IP: 6249 return bpf_push_ip_encap(skb, hdr, len, false /* egress */); 6250 #endif 6251 default: 6252 return -EINVAL; 6253 } 6254 } 6255 6256 static const struct bpf_func_proto bpf_lwt_in_push_encap_proto = { 6257 .func = bpf_lwt_in_push_encap, 6258 .gpl_only = false, 6259 .ret_type = RET_INTEGER, 6260 .arg1_type = ARG_PTR_TO_CTX, 6261 .arg2_type = ARG_ANYTHING, 6262 .arg3_type = ARG_PTR_TO_MEM | MEM_RDONLY, 6263 .arg4_type = ARG_CONST_SIZE 6264 }; 6265 6266 static const struct bpf_func_proto bpf_lwt_xmit_push_encap_proto = { 6267 .func = bpf_lwt_xmit_push_encap, 6268 .gpl_only = false, 6269 .ret_type = RET_INTEGER, 6270 .arg1_type = ARG_PTR_TO_CTX, 6271 .arg2_type = ARG_ANYTHING, 6272 .arg3_type = ARG_PTR_TO_MEM | MEM_RDONLY, 6273 .arg4_type = ARG_CONST_SIZE 6274 }; 6275 6276 #if IS_ENABLED(CONFIG_IPV6_SEG6_BPF) 6277 BPF_CALL_4(bpf_lwt_seg6_store_bytes, struct sk_buff *, skb, u32, offset, 6278 const void *, from, u32, len) 6279 { 6280 struct seg6_bpf_srh_state *srh_state = 6281 this_cpu_ptr(&seg6_bpf_srh_states); 6282 struct ipv6_sr_hdr *srh = srh_state->srh; 6283 void *srh_tlvs, *srh_end, *ptr; 6284 int srhoff = 0; 6285 6286 if (srh == NULL) 6287 return -EINVAL; 6288 6289 srh_tlvs = (void *)((char *)srh + ((srh->first_segment + 1) << 4)); 6290 srh_end = (void *)((char *)srh + sizeof(*srh) + srh_state->hdrlen); 6291 6292 ptr = skb->data + offset; 6293 if (ptr >= srh_tlvs && ptr + len <= srh_end) 6294 srh_state->valid = false; 6295 else if (ptr < (void *)&srh->flags || 6296 ptr + len > (void *)&srh->segments) 6297 return -EFAULT; 6298 6299 if (unlikely(bpf_try_make_writable(skb, offset + len))) 6300 return -EFAULT; 6301 if (ipv6_find_hdr(skb, &srhoff, IPPROTO_ROUTING, NULL, NULL) < 0) 6302 return -EINVAL; 6303 srh_state->srh = (struct ipv6_sr_hdr *)(skb->data + srhoff); 6304 6305 memcpy(skb->data + offset, from, len); 6306 return 0; 6307 } 6308 6309 static const struct bpf_func_proto bpf_lwt_seg6_store_bytes_proto = { 6310 .func = bpf_lwt_seg6_store_bytes, 6311 .gpl_only = false, 6312 .ret_type = RET_INTEGER, 6313 .arg1_type = ARG_PTR_TO_CTX, 6314 .arg2_type = ARG_ANYTHING, 6315 .arg3_type = ARG_PTR_TO_MEM | MEM_RDONLY, 6316 .arg4_type = ARG_CONST_SIZE 6317 }; 6318 6319 static void bpf_update_srh_state(struct sk_buff *skb) 6320 { 6321 struct seg6_bpf_srh_state *srh_state = 6322 this_cpu_ptr(&seg6_bpf_srh_states); 6323 int srhoff = 0; 6324 6325 if (ipv6_find_hdr(skb, &srhoff, IPPROTO_ROUTING, NULL, NULL) < 0) { 6326 srh_state->srh = NULL; 6327 } else { 6328 srh_state->srh = (struct ipv6_sr_hdr *)(skb->data + srhoff); 6329 srh_state->hdrlen = srh_state->srh->hdrlen << 3; 6330 srh_state->valid = true; 6331 } 6332 } 6333 6334 BPF_CALL_4(bpf_lwt_seg6_action, struct sk_buff *, skb, 6335 u32, action, void *, param, u32, param_len) 6336 { 6337 struct seg6_bpf_srh_state *srh_state = 6338 this_cpu_ptr(&seg6_bpf_srh_states); 6339 int hdroff = 0; 6340 int err; 6341 6342 switch (action) { 6343 case SEG6_LOCAL_ACTION_END_X: 6344 if (!seg6_bpf_has_valid_srh(skb)) 6345 return -EBADMSG; 6346 if (param_len != sizeof(struct in6_addr)) 6347 return -EINVAL; 6348 return seg6_lookup_nexthop(skb, (struct in6_addr *)param, 0); 6349 case SEG6_LOCAL_ACTION_END_T: 6350 if (!seg6_bpf_has_valid_srh(skb)) 6351 return -EBADMSG; 6352 if (param_len != sizeof(int)) 6353 return -EINVAL; 6354 return seg6_lookup_nexthop(skb, NULL, *(int *)param); 6355 case SEG6_LOCAL_ACTION_END_DT6: 6356 if (!seg6_bpf_has_valid_srh(skb)) 6357 return -EBADMSG; 6358 if (param_len != sizeof(int)) 6359 return -EINVAL; 6360 6361 if (ipv6_find_hdr(skb, &hdroff, IPPROTO_IPV6, NULL, NULL) < 0) 6362 return -EBADMSG; 6363 if (!pskb_pull(skb, hdroff)) 6364 return -EBADMSG; 6365 6366 skb_postpull_rcsum(skb, skb_network_header(skb), hdroff); 6367 skb_reset_network_header(skb); 6368 skb_reset_transport_header(skb); 6369 skb->encapsulation = 0; 6370 6371 bpf_compute_data_pointers(skb); 6372 bpf_update_srh_state(skb); 6373 return seg6_lookup_nexthop(skb, NULL, *(int *)param); 6374 case SEG6_LOCAL_ACTION_END_B6: 6375 if (srh_state->srh && !seg6_bpf_has_valid_srh(skb)) 6376 return -EBADMSG; 6377 err = bpf_push_seg6_encap(skb, BPF_LWT_ENCAP_SEG6_INLINE, 6378 param, param_len); 6379 if (!err) 6380 bpf_update_srh_state(skb); 6381 6382 return err; 6383 case SEG6_LOCAL_ACTION_END_B6_ENCAP: 6384 if (srh_state->srh && !seg6_bpf_has_valid_srh(skb)) 6385 return -EBADMSG; 6386 err = bpf_push_seg6_encap(skb, BPF_LWT_ENCAP_SEG6, 6387 param, param_len); 6388 if (!err) 6389 bpf_update_srh_state(skb); 6390 6391 return err; 6392 default: 6393 return -EINVAL; 6394 } 6395 } 6396 6397 static const struct bpf_func_proto bpf_lwt_seg6_action_proto = { 6398 .func = bpf_lwt_seg6_action, 6399 .gpl_only = false, 6400 .ret_type = RET_INTEGER, 6401 .arg1_type = ARG_PTR_TO_CTX, 6402 .arg2_type = ARG_ANYTHING, 6403 .arg3_type = ARG_PTR_TO_MEM | MEM_RDONLY, 6404 .arg4_type = ARG_CONST_SIZE 6405 }; 6406 6407 BPF_CALL_3(bpf_lwt_seg6_adjust_srh, struct sk_buff *, skb, u32, offset, 6408 s32, len) 6409 { 6410 struct seg6_bpf_srh_state *srh_state = 6411 this_cpu_ptr(&seg6_bpf_srh_states); 6412 struct ipv6_sr_hdr *srh = srh_state->srh; 6413 void *srh_end, *srh_tlvs, *ptr; 6414 struct ipv6hdr *hdr; 6415 int srhoff = 0; 6416 int ret; 6417 6418 if (unlikely(srh == NULL)) 6419 return -EINVAL; 6420 6421 srh_tlvs = (void *)((unsigned char *)srh + sizeof(*srh) + 6422 ((srh->first_segment + 1) << 4)); 6423 srh_end = (void *)((unsigned char *)srh + sizeof(*srh) + 6424 srh_state->hdrlen); 6425 ptr = skb->data + offset; 6426 6427 if (unlikely(ptr < srh_tlvs || ptr > srh_end)) 6428 return -EFAULT; 6429 if (unlikely(len < 0 && (void *)((char *)ptr - len) > srh_end)) 6430 return -EFAULT; 6431 6432 if (len > 0) { 6433 ret = skb_cow_head(skb, len); 6434 if (unlikely(ret < 0)) 6435 return ret; 6436 6437 ret = bpf_skb_net_hdr_push(skb, offset, len); 6438 } else { 6439 ret = bpf_skb_net_hdr_pop(skb, offset, -1 * len); 6440 } 6441 6442 bpf_compute_data_pointers(skb); 6443 if (unlikely(ret < 0)) 6444 return ret; 6445 6446 hdr = (struct ipv6hdr *)skb->data; 6447 hdr->payload_len = htons(skb->len - sizeof(struct ipv6hdr)); 6448 6449 if (ipv6_find_hdr(skb, &srhoff, IPPROTO_ROUTING, NULL, NULL) < 0) 6450 return -EINVAL; 6451 srh_state->srh = (struct ipv6_sr_hdr *)(skb->data + srhoff); 6452 srh_state->hdrlen += len; 6453 srh_state->valid = false; 6454 return 0; 6455 } 6456 6457 static const struct bpf_func_proto bpf_lwt_seg6_adjust_srh_proto = { 6458 .func = bpf_lwt_seg6_adjust_srh, 6459 .gpl_only = false, 6460 .ret_type = RET_INTEGER, 6461 .arg1_type = ARG_PTR_TO_CTX, 6462 .arg2_type = ARG_ANYTHING, 6463 .arg3_type = ARG_ANYTHING, 6464 }; 6465 #endif /* CONFIG_IPV6_SEG6_BPF */ 6466 6467 #ifdef CONFIG_INET 6468 static struct sock *sk_lookup(struct net *net, struct bpf_sock_tuple *tuple, 6469 int dif, int sdif, u8 family, u8 proto) 6470 { 6471 bool refcounted = false; 6472 struct sock *sk = NULL; 6473 6474 if (family == AF_INET) { 6475 __be32 src4 = tuple->ipv4.saddr; 6476 __be32 dst4 = tuple->ipv4.daddr; 6477 6478 if (proto == IPPROTO_TCP) 6479 sk = __inet_lookup(net, &tcp_hashinfo, NULL, 0, 6480 src4, tuple->ipv4.sport, 6481 dst4, tuple->ipv4.dport, 6482 dif, sdif, &refcounted); 6483 else 6484 sk = __udp4_lib_lookup(net, src4, tuple->ipv4.sport, 6485 dst4, tuple->ipv4.dport, 6486 dif, sdif, &udp_table, NULL); 6487 #if IS_ENABLED(CONFIG_IPV6) 6488 } else { 6489 struct in6_addr *src6 = (struct in6_addr *)&tuple->ipv6.saddr; 6490 struct in6_addr *dst6 = (struct in6_addr *)&tuple->ipv6.daddr; 6491 6492 if (proto == IPPROTO_TCP) 6493 sk = __inet6_lookup(net, &tcp_hashinfo, NULL, 0, 6494 src6, tuple->ipv6.sport, 6495 dst6, ntohs(tuple->ipv6.dport), 6496 dif, sdif, &refcounted); 6497 else if (likely(ipv6_bpf_stub)) 6498 sk = ipv6_bpf_stub->udp6_lib_lookup(net, 6499 src6, tuple->ipv6.sport, 6500 dst6, tuple->ipv6.dport, 6501 dif, sdif, 6502 &udp_table, NULL); 6503 #endif 6504 } 6505 6506 if (unlikely(sk && !refcounted && !sock_flag(sk, SOCK_RCU_FREE))) { 6507 WARN_ONCE(1, "Found non-RCU, unreferenced socket!"); 6508 sk = NULL; 6509 } 6510 return sk; 6511 } 6512 6513 /* bpf_skc_lookup performs the core lookup for different types of sockets, 6514 * taking a reference on the socket if it doesn't have the flag SOCK_RCU_FREE. 6515 */ 6516 static struct sock * 6517 __bpf_skc_lookup(struct sk_buff *skb, struct bpf_sock_tuple *tuple, u32 len, 6518 struct net *caller_net, u32 ifindex, u8 proto, u64 netns_id, 6519 u64 flags) 6520 { 6521 struct sock *sk = NULL; 6522 struct net *net; 6523 u8 family; 6524 int sdif; 6525 6526 if (len == sizeof(tuple->ipv4)) 6527 family = AF_INET; 6528 else if (len == sizeof(tuple->ipv6)) 6529 family = AF_INET6; 6530 else 6531 return NULL; 6532 6533 if (unlikely(flags || !((s32)netns_id < 0 || netns_id <= S32_MAX))) 6534 goto out; 6535 6536 if (family == AF_INET) 6537 sdif = inet_sdif(skb); 6538 else 6539 sdif = inet6_sdif(skb); 6540 6541 if ((s32)netns_id < 0) { 6542 net = caller_net; 6543 sk = sk_lookup(net, tuple, ifindex, sdif, family, proto); 6544 } else { 6545 net = get_net_ns_by_id(caller_net, netns_id); 6546 if (unlikely(!net)) 6547 goto out; 6548 sk = sk_lookup(net, tuple, ifindex, sdif, family, proto); 6549 put_net(net); 6550 } 6551 6552 out: 6553 return sk; 6554 } 6555 6556 static struct sock * 6557 __bpf_sk_lookup(struct sk_buff *skb, struct bpf_sock_tuple *tuple, u32 len, 6558 struct net *caller_net, u32 ifindex, u8 proto, u64 netns_id, 6559 u64 flags) 6560 { 6561 struct sock *sk = __bpf_skc_lookup(skb, tuple, len, caller_net, 6562 ifindex, proto, netns_id, flags); 6563 6564 if (sk) { 6565 struct sock *sk2 = sk_to_full_sk(sk); 6566 6567 /* sk_to_full_sk() may return (sk)->rsk_listener, so make sure the original sk 6568 * sock refcnt is decremented to prevent a request_sock leak. 6569 */ 6570 if (!sk_fullsock(sk2)) 6571 sk2 = NULL; 6572 if (sk2 != sk) { 6573 sock_gen_put(sk); 6574 /* Ensure there is no need to bump sk2 refcnt */ 6575 if (unlikely(sk2 && !sock_flag(sk2, SOCK_RCU_FREE))) { 6576 WARN_ONCE(1, "Found non-RCU, unreferenced socket!"); 6577 return NULL; 6578 } 6579 sk = sk2; 6580 } 6581 } 6582 6583 return sk; 6584 } 6585 6586 static struct sock * 6587 bpf_skc_lookup(struct sk_buff *skb, struct bpf_sock_tuple *tuple, u32 len, 6588 u8 proto, u64 netns_id, u64 flags) 6589 { 6590 struct net *caller_net; 6591 int ifindex; 6592 6593 if (skb->dev) { 6594 caller_net = dev_net(skb->dev); 6595 ifindex = skb->dev->ifindex; 6596 } else { 6597 caller_net = sock_net(skb->sk); 6598 ifindex = 0; 6599 } 6600 6601 return __bpf_skc_lookup(skb, tuple, len, caller_net, ifindex, proto, 6602 netns_id, flags); 6603 } 6604 6605 static struct sock * 6606 bpf_sk_lookup(struct sk_buff *skb, struct bpf_sock_tuple *tuple, u32 len, 6607 u8 proto, u64 netns_id, u64 flags) 6608 { 6609 struct sock *sk = bpf_skc_lookup(skb, tuple, len, proto, netns_id, 6610 flags); 6611 6612 if (sk) { 6613 struct sock *sk2 = sk_to_full_sk(sk); 6614 6615 /* sk_to_full_sk() may return (sk)->rsk_listener, so make sure the original sk 6616 * sock refcnt is decremented to prevent a request_sock leak. 6617 */ 6618 if (!sk_fullsock(sk2)) 6619 sk2 = NULL; 6620 if (sk2 != sk) { 6621 sock_gen_put(sk); 6622 /* Ensure there is no need to bump sk2 refcnt */ 6623 if (unlikely(sk2 && !sock_flag(sk2, SOCK_RCU_FREE))) { 6624 WARN_ONCE(1, "Found non-RCU, unreferenced socket!"); 6625 return NULL; 6626 } 6627 sk = sk2; 6628 } 6629 } 6630 6631 return sk; 6632 } 6633 6634 BPF_CALL_5(bpf_skc_lookup_tcp, struct sk_buff *, skb, 6635 struct bpf_sock_tuple *, tuple, u32, len, u64, netns_id, u64, flags) 6636 { 6637 return (unsigned long)bpf_skc_lookup(skb, tuple, len, IPPROTO_TCP, 6638 netns_id, flags); 6639 } 6640 6641 static const struct bpf_func_proto bpf_skc_lookup_tcp_proto = { 6642 .func = bpf_skc_lookup_tcp, 6643 .gpl_only = false, 6644 .pkt_access = true, 6645 .ret_type = RET_PTR_TO_SOCK_COMMON_OR_NULL, 6646 .arg1_type = ARG_PTR_TO_CTX, 6647 .arg2_type = ARG_PTR_TO_MEM | MEM_RDONLY, 6648 .arg3_type = ARG_CONST_SIZE, 6649 .arg4_type = ARG_ANYTHING, 6650 .arg5_type = ARG_ANYTHING, 6651 }; 6652 6653 BPF_CALL_5(bpf_sk_lookup_tcp, struct sk_buff *, skb, 6654 struct bpf_sock_tuple *, tuple, u32, len, u64, netns_id, u64, flags) 6655 { 6656 return (unsigned long)bpf_sk_lookup(skb, tuple, len, IPPROTO_TCP, 6657 netns_id, flags); 6658 } 6659 6660 static const struct bpf_func_proto bpf_sk_lookup_tcp_proto = { 6661 .func = bpf_sk_lookup_tcp, 6662 .gpl_only = false, 6663 .pkt_access = true, 6664 .ret_type = RET_PTR_TO_SOCKET_OR_NULL, 6665 .arg1_type = ARG_PTR_TO_CTX, 6666 .arg2_type = ARG_PTR_TO_MEM | MEM_RDONLY, 6667 .arg3_type = ARG_CONST_SIZE, 6668 .arg4_type = ARG_ANYTHING, 6669 .arg5_type = ARG_ANYTHING, 6670 }; 6671 6672 BPF_CALL_5(bpf_sk_lookup_udp, struct sk_buff *, skb, 6673 struct bpf_sock_tuple *, tuple, u32, len, u64, netns_id, u64, flags) 6674 { 6675 return (unsigned long)bpf_sk_lookup(skb, tuple, len, IPPROTO_UDP, 6676 netns_id, flags); 6677 } 6678 6679 static const struct bpf_func_proto bpf_sk_lookup_udp_proto = { 6680 .func = bpf_sk_lookup_udp, 6681 .gpl_only = false, 6682 .pkt_access = true, 6683 .ret_type = RET_PTR_TO_SOCKET_OR_NULL, 6684 .arg1_type = ARG_PTR_TO_CTX, 6685 .arg2_type = ARG_PTR_TO_MEM | MEM_RDONLY, 6686 .arg3_type = ARG_CONST_SIZE, 6687 .arg4_type = ARG_ANYTHING, 6688 .arg5_type = ARG_ANYTHING, 6689 }; 6690 6691 BPF_CALL_1(bpf_sk_release, struct sock *, sk) 6692 { 6693 if (sk && sk_is_refcounted(sk)) 6694 sock_gen_put(sk); 6695 return 0; 6696 } 6697 6698 static const struct bpf_func_proto bpf_sk_release_proto = { 6699 .func = bpf_sk_release, 6700 .gpl_only = false, 6701 .ret_type = RET_INTEGER, 6702 .arg1_type = ARG_PTR_TO_BTF_ID_SOCK_COMMON | OBJ_RELEASE, 6703 }; 6704 6705 BPF_CALL_5(bpf_xdp_sk_lookup_udp, struct xdp_buff *, ctx, 6706 struct bpf_sock_tuple *, tuple, u32, len, u32, netns_id, u64, flags) 6707 { 6708 struct net *caller_net = dev_net(ctx->rxq->dev); 6709 int ifindex = ctx->rxq->dev->ifindex; 6710 6711 return (unsigned long)__bpf_sk_lookup(NULL, tuple, len, caller_net, 6712 ifindex, IPPROTO_UDP, netns_id, 6713 flags); 6714 } 6715 6716 static const struct bpf_func_proto bpf_xdp_sk_lookup_udp_proto = { 6717 .func = bpf_xdp_sk_lookup_udp, 6718 .gpl_only = false, 6719 .pkt_access = true, 6720 .ret_type = RET_PTR_TO_SOCKET_OR_NULL, 6721 .arg1_type = ARG_PTR_TO_CTX, 6722 .arg2_type = ARG_PTR_TO_MEM | MEM_RDONLY, 6723 .arg3_type = ARG_CONST_SIZE, 6724 .arg4_type = ARG_ANYTHING, 6725 .arg5_type = ARG_ANYTHING, 6726 }; 6727 6728 BPF_CALL_5(bpf_xdp_skc_lookup_tcp, struct xdp_buff *, ctx, 6729 struct bpf_sock_tuple *, tuple, u32, len, u32, netns_id, u64, flags) 6730 { 6731 struct net *caller_net = dev_net(ctx->rxq->dev); 6732 int ifindex = ctx->rxq->dev->ifindex; 6733 6734 return (unsigned long)__bpf_skc_lookup(NULL, tuple, len, caller_net, 6735 ifindex, IPPROTO_TCP, netns_id, 6736 flags); 6737 } 6738 6739 static const struct bpf_func_proto bpf_xdp_skc_lookup_tcp_proto = { 6740 .func = bpf_xdp_skc_lookup_tcp, 6741 .gpl_only = false, 6742 .pkt_access = true, 6743 .ret_type = RET_PTR_TO_SOCK_COMMON_OR_NULL, 6744 .arg1_type = ARG_PTR_TO_CTX, 6745 .arg2_type = ARG_PTR_TO_MEM | MEM_RDONLY, 6746 .arg3_type = ARG_CONST_SIZE, 6747 .arg4_type = ARG_ANYTHING, 6748 .arg5_type = ARG_ANYTHING, 6749 }; 6750 6751 BPF_CALL_5(bpf_xdp_sk_lookup_tcp, struct xdp_buff *, ctx, 6752 struct bpf_sock_tuple *, tuple, u32, len, u32, netns_id, u64, flags) 6753 { 6754 struct net *caller_net = dev_net(ctx->rxq->dev); 6755 int ifindex = ctx->rxq->dev->ifindex; 6756 6757 return (unsigned long)__bpf_sk_lookup(NULL, tuple, len, caller_net, 6758 ifindex, IPPROTO_TCP, netns_id, 6759 flags); 6760 } 6761 6762 static const struct bpf_func_proto bpf_xdp_sk_lookup_tcp_proto = { 6763 .func = bpf_xdp_sk_lookup_tcp, 6764 .gpl_only = false, 6765 .pkt_access = true, 6766 .ret_type = RET_PTR_TO_SOCKET_OR_NULL, 6767 .arg1_type = ARG_PTR_TO_CTX, 6768 .arg2_type = ARG_PTR_TO_MEM | MEM_RDONLY, 6769 .arg3_type = ARG_CONST_SIZE, 6770 .arg4_type = ARG_ANYTHING, 6771 .arg5_type = ARG_ANYTHING, 6772 }; 6773 6774 BPF_CALL_5(bpf_sock_addr_skc_lookup_tcp, struct bpf_sock_addr_kern *, ctx, 6775 struct bpf_sock_tuple *, tuple, u32, len, u64, netns_id, u64, flags) 6776 { 6777 return (unsigned long)__bpf_skc_lookup(NULL, tuple, len, 6778 sock_net(ctx->sk), 0, 6779 IPPROTO_TCP, netns_id, flags); 6780 } 6781 6782 static const struct bpf_func_proto bpf_sock_addr_skc_lookup_tcp_proto = { 6783 .func = bpf_sock_addr_skc_lookup_tcp, 6784 .gpl_only = false, 6785 .ret_type = RET_PTR_TO_SOCK_COMMON_OR_NULL, 6786 .arg1_type = ARG_PTR_TO_CTX, 6787 .arg2_type = ARG_PTR_TO_MEM | MEM_RDONLY, 6788 .arg3_type = ARG_CONST_SIZE, 6789 .arg4_type = ARG_ANYTHING, 6790 .arg5_type = ARG_ANYTHING, 6791 }; 6792 6793 BPF_CALL_5(bpf_sock_addr_sk_lookup_tcp, struct bpf_sock_addr_kern *, ctx, 6794 struct bpf_sock_tuple *, tuple, u32, len, u64, netns_id, u64, flags) 6795 { 6796 return (unsigned long)__bpf_sk_lookup(NULL, tuple, len, 6797 sock_net(ctx->sk), 0, IPPROTO_TCP, 6798 netns_id, flags); 6799 } 6800 6801 static const struct bpf_func_proto bpf_sock_addr_sk_lookup_tcp_proto = { 6802 .func = bpf_sock_addr_sk_lookup_tcp, 6803 .gpl_only = false, 6804 .ret_type = RET_PTR_TO_SOCKET_OR_NULL, 6805 .arg1_type = ARG_PTR_TO_CTX, 6806 .arg2_type = ARG_PTR_TO_MEM | MEM_RDONLY, 6807 .arg3_type = ARG_CONST_SIZE, 6808 .arg4_type = ARG_ANYTHING, 6809 .arg5_type = ARG_ANYTHING, 6810 }; 6811 6812 BPF_CALL_5(bpf_sock_addr_sk_lookup_udp, struct bpf_sock_addr_kern *, ctx, 6813 struct bpf_sock_tuple *, tuple, u32, len, u64, netns_id, u64, flags) 6814 { 6815 return (unsigned long)__bpf_sk_lookup(NULL, tuple, len, 6816 sock_net(ctx->sk), 0, IPPROTO_UDP, 6817 netns_id, flags); 6818 } 6819 6820 static const struct bpf_func_proto bpf_sock_addr_sk_lookup_udp_proto = { 6821 .func = bpf_sock_addr_sk_lookup_udp, 6822 .gpl_only = false, 6823 .ret_type = RET_PTR_TO_SOCKET_OR_NULL, 6824 .arg1_type = ARG_PTR_TO_CTX, 6825 .arg2_type = ARG_PTR_TO_MEM | MEM_RDONLY, 6826 .arg3_type = ARG_CONST_SIZE, 6827 .arg4_type = ARG_ANYTHING, 6828 .arg5_type = ARG_ANYTHING, 6829 }; 6830 6831 bool bpf_tcp_sock_is_valid_access(int off, int size, enum bpf_access_type type, 6832 struct bpf_insn_access_aux *info) 6833 { 6834 if (off < 0 || off >= offsetofend(struct bpf_tcp_sock, 6835 icsk_retransmits)) 6836 return false; 6837 6838 if (off % size != 0) 6839 return false; 6840 6841 switch (off) { 6842 case offsetof(struct bpf_tcp_sock, bytes_received): 6843 case offsetof(struct bpf_tcp_sock, bytes_acked): 6844 return size == sizeof(__u64); 6845 default: 6846 return size == sizeof(__u32); 6847 } 6848 } 6849 6850 u32 bpf_tcp_sock_convert_ctx_access(enum bpf_access_type type, 6851 const struct bpf_insn *si, 6852 struct bpf_insn *insn_buf, 6853 struct bpf_prog *prog, u32 *target_size) 6854 { 6855 struct bpf_insn *insn = insn_buf; 6856 6857 #define BPF_TCP_SOCK_GET_COMMON(FIELD) \ 6858 do { \ 6859 BUILD_BUG_ON(sizeof_field(struct tcp_sock, FIELD) > \ 6860 sizeof_field(struct bpf_tcp_sock, FIELD)); \ 6861 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct tcp_sock, FIELD),\ 6862 si->dst_reg, si->src_reg, \ 6863 offsetof(struct tcp_sock, FIELD)); \ 6864 } while (0) 6865 6866 #define BPF_INET_SOCK_GET_COMMON(FIELD) \ 6867 do { \ 6868 BUILD_BUG_ON(sizeof_field(struct inet_connection_sock, \ 6869 FIELD) > \ 6870 sizeof_field(struct bpf_tcp_sock, FIELD)); \ 6871 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF( \ 6872 struct inet_connection_sock, \ 6873 FIELD), \ 6874 si->dst_reg, si->src_reg, \ 6875 offsetof( \ 6876 struct inet_connection_sock, \ 6877 FIELD)); \ 6878 } while (0) 6879 6880 if (insn > insn_buf) 6881 return insn - insn_buf; 6882 6883 switch (si->off) { 6884 case offsetof(struct bpf_tcp_sock, rtt_min): 6885 BUILD_BUG_ON(sizeof_field(struct tcp_sock, rtt_min) != 6886 sizeof(struct minmax)); 6887 BUILD_BUG_ON(sizeof(struct minmax) < 6888 sizeof(struct minmax_sample)); 6889 6890 *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg, 6891 offsetof(struct tcp_sock, rtt_min) + 6892 offsetof(struct minmax_sample, v)); 6893 break; 6894 case offsetof(struct bpf_tcp_sock, snd_cwnd): 6895 BPF_TCP_SOCK_GET_COMMON(snd_cwnd); 6896 break; 6897 case offsetof(struct bpf_tcp_sock, srtt_us): 6898 BPF_TCP_SOCK_GET_COMMON(srtt_us); 6899 break; 6900 case offsetof(struct bpf_tcp_sock, snd_ssthresh): 6901 BPF_TCP_SOCK_GET_COMMON(snd_ssthresh); 6902 break; 6903 case offsetof(struct bpf_tcp_sock, rcv_nxt): 6904 BPF_TCP_SOCK_GET_COMMON(rcv_nxt); 6905 break; 6906 case offsetof(struct bpf_tcp_sock, snd_nxt): 6907 BPF_TCP_SOCK_GET_COMMON(snd_nxt); 6908 break; 6909 case offsetof(struct bpf_tcp_sock, snd_una): 6910 BPF_TCP_SOCK_GET_COMMON(snd_una); 6911 break; 6912 case offsetof(struct bpf_tcp_sock, mss_cache): 6913 BPF_TCP_SOCK_GET_COMMON(mss_cache); 6914 break; 6915 case offsetof(struct bpf_tcp_sock, ecn_flags): 6916 BPF_TCP_SOCK_GET_COMMON(ecn_flags); 6917 break; 6918 case offsetof(struct bpf_tcp_sock, rate_delivered): 6919 BPF_TCP_SOCK_GET_COMMON(rate_delivered); 6920 break; 6921 case offsetof(struct bpf_tcp_sock, rate_interval_us): 6922 BPF_TCP_SOCK_GET_COMMON(rate_interval_us); 6923 break; 6924 case offsetof(struct bpf_tcp_sock, packets_out): 6925 BPF_TCP_SOCK_GET_COMMON(packets_out); 6926 break; 6927 case offsetof(struct bpf_tcp_sock, retrans_out): 6928 BPF_TCP_SOCK_GET_COMMON(retrans_out); 6929 break; 6930 case offsetof(struct bpf_tcp_sock, total_retrans): 6931 BPF_TCP_SOCK_GET_COMMON(total_retrans); 6932 break; 6933 case offsetof(struct bpf_tcp_sock, segs_in): 6934 BPF_TCP_SOCK_GET_COMMON(segs_in); 6935 break; 6936 case offsetof(struct bpf_tcp_sock, data_segs_in): 6937 BPF_TCP_SOCK_GET_COMMON(data_segs_in); 6938 break; 6939 case offsetof(struct bpf_tcp_sock, segs_out): 6940 BPF_TCP_SOCK_GET_COMMON(segs_out); 6941 break; 6942 case offsetof(struct bpf_tcp_sock, data_segs_out): 6943 BPF_TCP_SOCK_GET_COMMON(data_segs_out); 6944 break; 6945 case offsetof(struct bpf_tcp_sock, lost_out): 6946 BPF_TCP_SOCK_GET_COMMON(lost_out); 6947 break; 6948 case offsetof(struct bpf_tcp_sock, sacked_out): 6949 BPF_TCP_SOCK_GET_COMMON(sacked_out); 6950 break; 6951 case offsetof(struct bpf_tcp_sock, bytes_received): 6952 BPF_TCP_SOCK_GET_COMMON(bytes_received); 6953 break; 6954 case offsetof(struct bpf_tcp_sock, bytes_acked): 6955 BPF_TCP_SOCK_GET_COMMON(bytes_acked); 6956 break; 6957 case offsetof(struct bpf_tcp_sock, dsack_dups): 6958 BPF_TCP_SOCK_GET_COMMON(dsack_dups); 6959 break; 6960 case offsetof(struct bpf_tcp_sock, delivered): 6961 BPF_TCP_SOCK_GET_COMMON(delivered); 6962 break; 6963 case offsetof(struct bpf_tcp_sock, delivered_ce): 6964 BPF_TCP_SOCK_GET_COMMON(delivered_ce); 6965 break; 6966 case offsetof(struct bpf_tcp_sock, icsk_retransmits): 6967 BPF_INET_SOCK_GET_COMMON(icsk_retransmits); 6968 break; 6969 } 6970 6971 return insn - insn_buf; 6972 } 6973 6974 BPF_CALL_1(bpf_tcp_sock, struct sock *, sk) 6975 { 6976 if (sk_fullsock(sk) && sk->sk_protocol == IPPROTO_TCP) 6977 return (unsigned long)sk; 6978 6979 return (unsigned long)NULL; 6980 } 6981 6982 const struct bpf_func_proto bpf_tcp_sock_proto = { 6983 .func = bpf_tcp_sock, 6984 .gpl_only = false, 6985 .ret_type = RET_PTR_TO_TCP_SOCK_OR_NULL, 6986 .arg1_type = ARG_PTR_TO_SOCK_COMMON, 6987 }; 6988 6989 BPF_CALL_1(bpf_get_listener_sock, struct sock *, sk) 6990 { 6991 sk = sk_to_full_sk(sk); 6992 6993 if (sk->sk_state == TCP_LISTEN && sock_flag(sk, SOCK_RCU_FREE)) 6994 return (unsigned long)sk; 6995 6996 return (unsigned long)NULL; 6997 } 6998 6999 static const struct bpf_func_proto bpf_get_listener_sock_proto = { 7000 .func = bpf_get_listener_sock, 7001 .gpl_only = false, 7002 .ret_type = RET_PTR_TO_SOCKET_OR_NULL, 7003 .arg1_type = ARG_PTR_TO_SOCK_COMMON, 7004 }; 7005 7006 BPF_CALL_1(bpf_skb_ecn_set_ce, struct sk_buff *, skb) 7007 { 7008 unsigned int iphdr_len; 7009 7010 switch (skb_protocol(skb, true)) { 7011 case cpu_to_be16(ETH_P_IP): 7012 iphdr_len = sizeof(struct iphdr); 7013 break; 7014 case cpu_to_be16(ETH_P_IPV6): 7015 iphdr_len = sizeof(struct ipv6hdr); 7016 break; 7017 default: 7018 return 0; 7019 } 7020 7021 if (skb_headlen(skb) < iphdr_len) 7022 return 0; 7023 7024 if (skb_cloned(skb) && !skb_clone_writable(skb, iphdr_len)) 7025 return 0; 7026 7027 return INET_ECN_set_ce(skb); 7028 } 7029 7030 bool bpf_xdp_sock_is_valid_access(int off, int size, enum bpf_access_type type, 7031 struct bpf_insn_access_aux *info) 7032 { 7033 if (off < 0 || off >= offsetofend(struct bpf_xdp_sock, queue_id)) 7034 return false; 7035 7036 if (off % size != 0) 7037 return false; 7038 7039 switch (off) { 7040 default: 7041 return size == sizeof(__u32); 7042 } 7043 } 7044 7045 u32 bpf_xdp_sock_convert_ctx_access(enum bpf_access_type type, 7046 const struct bpf_insn *si, 7047 struct bpf_insn *insn_buf, 7048 struct bpf_prog *prog, u32 *target_size) 7049 { 7050 struct bpf_insn *insn = insn_buf; 7051 7052 #define BPF_XDP_SOCK_GET(FIELD) \ 7053 do { \ 7054 BUILD_BUG_ON(sizeof_field(struct xdp_sock, FIELD) > \ 7055 sizeof_field(struct bpf_xdp_sock, FIELD)); \ 7056 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct xdp_sock, FIELD),\ 7057 si->dst_reg, si->src_reg, \ 7058 offsetof(struct xdp_sock, FIELD)); \ 7059 } while (0) 7060 7061 switch (si->off) { 7062 case offsetof(struct bpf_xdp_sock, queue_id): 7063 BPF_XDP_SOCK_GET(queue_id); 7064 break; 7065 } 7066 7067 return insn - insn_buf; 7068 } 7069 7070 static const struct bpf_func_proto bpf_skb_ecn_set_ce_proto = { 7071 .func = bpf_skb_ecn_set_ce, 7072 .gpl_only = false, 7073 .ret_type = RET_INTEGER, 7074 .arg1_type = ARG_PTR_TO_CTX, 7075 }; 7076 7077 BPF_CALL_5(bpf_tcp_check_syncookie, struct sock *, sk, void *, iph, u32, iph_len, 7078 struct tcphdr *, th, u32, th_len) 7079 { 7080 #ifdef CONFIG_SYN_COOKIES 7081 u32 cookie; 7082 int ret; 7083 7084 if (unlikely(!sk || th_len < sizeof(*th))) 7085 return -EINVAL; 7086 7087 /* sk_listener() allows TCP_NEW_SYN_RECV, which makes no sense here. */ 7088 if (sk->sk_protocol != IPPROTO_TCP || sk->sk_state != TCP_LISTEN) 7089 return -EINVAL; 7090 7091 if (!READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_syncookies)) 7092 return -EINVAL; 7093 7094 if (!th->ack || th->rst || th->syn) 7095 return -ENOENT; 7096 7097 if (unlikely(iph_len < sizeof(struct iphdr))) 7098 return -EINVAL; 7099 7100 if (tcp_synq_no_recent_overflow(sk)) 7101 return -ENOENT; 7102 7103 cookie = ntohl(th->ack_seq) - 1; 7104 7105 /* Both struct iphdr and struct ipv6hdr have the version field at the 7106 * same offset so we can cast to the shorter header (struct iphdr). 7107 */ 7108 switch (((struct iphdr *)iph)->version) { 7109 case 4: 7110 if (sk->sk_family == AF_INET6 && ipv6_only_sock(sk)) 7111 return -EINVAL; 7112 7113 ret = __cookie_v4_check((struct iphdr *)iph, th, cookie); 7114 break; 7115 7116 #if IS_BUILTIN(CONFIG_IPV6) 7117 case 6: 7118 if (unlikely(iph_len < sizeof(struct ipv6hdr))) 7119 return -EINVAL; 7120 7121 if (sk->sk_family != AF_INET6) 7122 return -EINVAL; 7123 7124 ret = __cookie_v6_check((struct ipv6hdr *)iph, th, cookie); 7125 break; 7126 #endif /* CONFIG_IPV6 */ 7127 7128 default: 7129 return -EPROTONOSUPPORT; 7130 } 7131 7132 if (ret > 0) 7133 return 0; 7134 7135 return -ENOENT; 7136 #else 7137 return -ENOTSUPP; 7138 #endif 7139 } 7140 7141 static const struct bpf_func_proto bpf_tcp_check_syncookie_proto = { 7142 .func = bpf_tcp_check_syncookie, 7143 .gpl_only = true, 7144 .pkt_access = true, 7145 .ret_type = RET_INTEGER, 7146 .arg1_type = ARG_PTR_TO_BTF_ID_SOCK_COMMON, 7147 .arg2_type = ARG_PTR_TO_MEM | MEM_RDONLY, 7148 .arg3_type = ARG_CONST_SIZE, 7149 .arg4_type = ARG_PTR_TO_MEM | MEM_RDONLY, 7150 .arg5_type = ARG_CONST_SIZE, 7151 }; 7152 7153 BPF_CALL_5(bpf_tcp_gen_syncookie, struct sock *, sk, void *, iph, u32, iph_len, 7154 struct tcphdr *, th, u32, th_len) 7155 { 7156 #ifdef CONFIG_SYN_COOKIES 7157 u32 cookie; 7158 u16 mss; 7159 7160 if (unlikely(!sk || th_len < sizeof(*th) || th_len != th->doff * 4)) 7161 return -EINVAL; 7162 7163 if (sk->sk_protocol != IPPROTO_TCP || sk->sk_state != TCP_LISTEN) 7164 return -EINVAL; 7165 7166 if (!READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_syncookies)) 7167 return -ENOENT; 7168 7169 if (!th->syn || th->ack || th->fin || th->rst) 7170 return -EINVAL; 7171 7172 if (unlikely(iph_len < sizeof(struct iphdr))) 7173 return -EINVAL; 7174 7175 /* Both struct iphdr and struct ipv6hdr have the version field at the 7176 * same offset so we can cast to the shorter header (struct iphdr). 7177 */ 7178 switch (((struct iphdr *)iph)->version) { 7179 case 4: 7180 if (sk->sk_family == AF_INET6 && ipv6_only_sock(sk)) 7181 return -EINVAL; 7182 7183 mss = tcp_v4_get_syncookie(sk, iph, th, &cookie); 7184 break; 7185 7186 #if IS_BUILTIN(CONFIG_IPV6) 7187 case 6: 7188 if (unlikely(iph_len < sizeof(struct ipv6hdr))) 7189 return -EINVAL; 7190 7191 if (sk->sk_family != AF_INET6) 7192 return -EINVAL; 7193 7194 mss = tcp_v6_get_syncookie(sk, iph, th, &cookie); 7195 break; 7196 #endif /* CONFIG_IPV6 */ 7197 7198 default: 7199 return -EPROTONOSUPPORT; 7200 } 7201 if (mss == 0) 7202 return -ENOENT; 7203 7204 return cookie | ((u64)mss << 32); 7205 #else 7206 return -EOPNOTSUPP; 7207 #endif /* CONFIG_SYN_COOKIES */ 7208 } 7209 7210 static const struct bpf_func_proto bpf_tcp_gen_syncookie_proto = { 7211 .func = bpf_tcp_gen_syncookie, 7212 .gpl_only = true, /* __cookie_v*_init_sequence() is GPL */ 7213 .pkt_access = true, 7214 .ret_type = RET_INTEGER, 7215 .arg1_type = ARG_PTR_TO_BTF_ID_SOCK_COMMON, 7216 .arg2_type = ARG_PTR_TO_MEM | MEM_RDONLY, 7217 .arg3_type = ARG_CONST_SIZE, 7218 .arg4_type = ARG_PTR_TO_MEM | MEM_RDONLY, 7219 .arg5_type = ARG_CONST_SIZE, 7220 }; 7221 7222 BPF_CALL_3(bpf_sk_assign, struct sk_buff *, skb, struct sock *, sk, u64, flags) 7223 { 7224 if (!sk || flags != 0) 7225 return -EINVAL; 7226 if (!skb_at_tc_ingress(skb)) 7227 return -EOPNOTSUPP; 7228 if (unlikely(dev_net(skb->dev) != sock_net(sk))) 7229 return -ENETUNREACH; 7230 if (unlikely(sk_fullsock(sk) && sk->sk_reuseport)) 7231 return -ESOCKTNOSUPPORT; 7232 if (sk_is_refcounted(sk) && 7233 unlikely(!refcount_inc_not_zero(&sk->sk_refcnt))) 7234 return -ENOENT; 7235 7236 skb_orphan(skb); 7237 skb->sk = sk; 7238 skb->destructor = sock_pfree; 7239 7240 return 0; 7241 } 7242 7243 static const struct bpf_func_proto bpf_sk_assign_proto = { 7244 .func = bpf_sk_assign, 7245 .gpl_only = false, 7246 .ret_type = RET_INTEGER, 7247 .arg1_type = ARG_PTR_TO_CTX, 7248 .arg2_type = ARG_PTR_TO_BTF_ID_SOCK_COMMON, 7249 .arg3_type = ARG_ANYTHING, 7250 }; 7251 7252 static const u8 *bpf_search_tcp_opt(const u8 *op, const u8 *opend, 7253 u8 search_kind, const u8 *magic, 7254 u8 magic_len, bool *eol) 7255 { 7256 u8 kind, kind_len; 7257 7258 *eol = false; 7259 7260 while (op < opend) { 7261 kind = op[0]; 7262 7263 if (kind == TCPOPT_EOL) { 7264 *eol = true; 7265 return ERR_PTR(-ENOMSG); 7266 } else if (kind == TCPOPT_NOP) { 7267 op++; 7268 continue; 7269 } 7270 7271 if (opend - op < 2 || opend - op < op[1] || op[1] < 2) 7272 /* Something is wrong in the received header. 7273 * Follow the TCP stack's tcp_parse_options() 7274 * and just bail here. 7275 */ 7276 return ERR_PTR(-EFAULT); 7277 7278 kind_len = op[1]; 7279 if (search_kind == kind) { 7280 if (!magic_len) 7281 return op; 7282 7283 if (magic_len > kind_len - 2) 7284 return ERR_PTR(-ENOMSG); 7285 7286 if (!memcmp(&op[2], magic, magic_len)) 7287 return op; 7288 } 7289 7290 op += kind_len; 7291 } 7292 7293 return ERR_PTR(-ENOMSG); 7294 } 7295 7296 BPF_CALL_4(bpf_sock_ops_load_hdr_opt, struct bpf_sock_ops_kern *, bpf_sock, 7297 void *, search_res, u32, len, u64, flags) 7298 { 7299 bool eol, load_syn = flags & BPF_LOAD_HDR_OPT_TCP_SYN; 7300 const u8 *op, *opend, *magic, *search = search_res; 7301 u8 search_kind, search_len, copy_len, magic_len; 7302 int ret; 7303 7304 /* 2 byte is the minimal option len except TCPOPT_NOP and 7305 * TCPOPT_EOL which are useless for the bpf prog to learn 7306 * and this helper disallow loading them also. 7307 */ 7308 if (len < 2 || flags & ~BPF_LOAD_HDR_OPT_TCP_SYN) 7309 return -EINVAL; 7310 7311 search_kind = search[0]; 7312 search_len = search[1]; 7313 7314 if (search_len > len || search_kind == TCPOPT_NOP || 7315 search_kind == TCPOPT_EOL) 7316 return -EINVAL; 7317 7318 if (search_kind == TCPOPT_EXP || search_kind == 253) { 7319 /* 16 or 32 bit magic. +2 for kind and kind length */ 7320 if (search_len != 4 && search_len != 6) 7321 return -EINVAL; 7322 magic = &search[2]; 7323 magic_len = search_len - 2; 7324 } else { 7325 if (search_len) 7326 return -EINVAL; 7327 magic = NULL; 7328 magic_len = 0; 7329 } 7330 7331 if (load_syn) { 7332 ret = bpf_sock_ops_get_syn(bpf_sock, TCP_BPF_SYN, &op); 7333 if (ret < 0) 7334 return ret; 7335 7336 opend = op + ret; 7337 op += sizeof(struct tcphdr); 7338 } else { 7339 if (!bpf_sock->skb || 7340 bpf_sock->op == BPF_SOCK_OPS_HDR_OPT_LEN_CB) 7341 /* This bpf_sock->op cannot call this helper */ 7342 return -EPERM; 7343 7344 opend = bpf_sock->skb_data_end; 7345 op = bpf_sock->skb->data + sizeof(struct tcphdr); 7346 } 7347 7348 op = bpf_search_tcp_opt(op, opend, search_kind, magic, magic_len, 7349 &eol); 7350 if (IS_ERR(op)) 7351 return PTR_ERR(op); 7352 7353 copy_len = op[1]; 7354 ret = copy_len; 7355 if (copy_len > len) { 7356 ret = -ENOSPC; 7357 copy_len = len; 7358 } 7359 7360 memcpy(search_res, op, copy_len); 7361 return ret; 7362 } 7363 7364 static const struct bpf_func_proto bpf_sock_ops_load_hdr_opt_proto = { 7365 .func = bpf_sock_ops_load_hdr_opt, 7366 .gpl_only = false, 7367 .ret_type = RET_INTEGER, 7368 .arg1_type = ARG_PTR_TO_CTX, 7369 .arg2_type = ARG_PTR_TO_MEM, 7370 .arg3_type = ARG_CONST_SIZE, 7371 .arg4_type = ARG_ANYTHING, 7372 }; 7373 7374 BPF_CALL_4(bpf_sock_ops_store_hdr_opt, struct bpf_sock_ops_kern *, bpf_sock, 7375 const void *, from, u32, len, u64, flags) 7376 { 7377 u8 new_kind, new_kind_len, magic_len = 0, *opend; 7378 const u8 *op, *new_op, *magic = NULL; 7379 struct sk_buff *skb; 7380 bool eol; 7381 7382 if (bpf_sock->op != BPF_SOCK_OPS_WRITE_HDR_OPT_CB) 7383 return -EPERM; 7384 7385 if (len < 2 || flags) 7386 return -EINVAL; 7387 7388 new_op = from; 7389 new_kind = new_op[0]; 7390 new_kind_len = new_op[1]; 7391 7392 if (new_kind_len > len || new_kind == TCPOPT_NOP || 7393 new_kind == TCPOPT_EOL) 7394 return -EINVAL; 7395 7396 if (new_kind_len > bpf_sock->remaining_opt_len) 7397 return -ENOSPC; 7398 7399 /* 253 is another experimental kind */ 7400 if (new_kind == TCPOPT_EXP || new_kind == 253) { 7401 if (new_kind_len < 4) 7402 return -EINVAL; 7403 /* Match for the 2 byte magic also. 7404 * RFC 6994: the magic could be 2 or 4 bytes. 7405 * Hence, matching by 2 byte only is on the 7406 * conservative side but it is the right 7407 * thing to do for the 'search-for-duplication' 7408 * purpose. 7409 */ 7410 magic = &new_op[2]; 7411 magic_len = 2; 7412 } 7413 7414 /* Check for duplication */ 7415 skb = bpf_sock->skb; 7416 op = skb->data + sizeof(struct tcphdr); 7417 opend = bpf_sock->skb_data_end; 7418 7419 op = bpf_search_tcp_opt(op, opend, new_kind, magic, magic_len, 7420 &eol); 7421 if (!IS_ERR(op)) 7422 return -EEXIST; 7423 7424 if (PTR_ERR(op) != -ENOMSG) 7425 return PTR_ERR(op); 7426 7427 if (eol) 7428 /* The option has been ended. Treat it as no more 7429 * header option can be written. 7430 */ 7431 return -ENOSPC; 7432 7433 /* No duplication found. Store the header option. */ 7434 memcpy(opend, from, new_kind_len); 7435 7436 bpf_sock->remaining_opt_len -= new_kind_len; 7437 bpf_sock->skb_data_end += new_kind_len; 7438 7439 return 0; 7440 } 7441 7442 static const struct bpf_func_proto bpf_sock_ops_store_hdr_opt_proto = { 7443 .func = bpf_sock_ops_store_hdr_opt, 7444 .gpl_only = false, 7445 .ret_type = RET_INTEGER, 7446 .arg1_type = ARG_PTR_TO_CTX, 7447 .arg2_type = ARG_PTR_TO_MEM | MEM_RDONLY, 7448 .arg3_type = ARG_CONST_SIZE, 7449 .arg4_type = ARG_ANYTHING, 7450 }; 7451 7452 BPF_CALL_3(bpf_sock_ops_reserve_hdr_opt, struct bpf_sock_ops_kern *, bpf_sock, 7453 u32, len, u64, flags) 7454 { 7455 if (bpf_sock->op != BPF_SOCK_OPS_HDR_OPT_LEN_CB) 7456 return -EPERM; 7457 7458 if (flags || len < 2) 7459 return -EINVAL; 7460 7461 if (len > bpf_sock->remaining_opt_len) 7462 return -ENOSPC; 7463 7464 bpf_sock->remaining_opt_len -= len; 7465 7466 return 0; 7467 } 7468 7469 static const struct bpf_func_proto bpf_sock_ops_reserve_hdr_opt_proto = { 7470 .func = bpf_sock_ops_reserve_hdr_opt, 7471 .gpl_only = false, 7472 .ret_type = RET_INTEGER, 7473 .arg1_type = ARG_PTR_TO_CTX, 7474 .arg2_type = ARG_ANYTHING, 7475 .arg3_type = ARG_ANYTHING, 7476 }; 7477 7478 BPF_CALL_3(bpf_skb_set_tstamp, struct sk_buff *, skb, 7479 u64, tstamp, u32, tstamp_type) 7480 { 7481 /* skb_clear_delivery_time() is done for inet protocol */ 7482 if (skb->protocol != htons(ETH_P_IP) && 7483 skb->protocol != htons(ETH_P_IPV6)) 7484 return -EOPNOTSUPP; 7485 7486 switch (tstamp_type) { 7487 case BPF_SKB_TSTAMP_DELIVERY_MONO: 7488 if (!tstamp) 7489 return -EINVAL; 7490 skb->tstamp = tstamp; 7491 skb->mono_delivery_time = 1; 7492 break; 7493 case BPF_SKB_TSTAMP_UNSPEC: 7494 if (tstamp) 7495 return -EINVAL; 7496 skb->tstamp = 0; 7497 skb->mono_delivery_time = 0; 7498 break; 7499 default: 7500 return -EINVAL; 7501 } 7502 7503 return 0; 7504 } 7505 7506 static const struct bpf_func_proto bpf_skb_set_tstamp_proto = { 7507 .func = bpf_skb_set_tstamp, 7508 .gpl_only = false, 7509 .ret_type = RET_INTEGER, 7510 .arg1_type = ARG_PTR_TO_CTX, 7511 .arg2_type = ARG_ANYTHING, 7512 .arg3_type = ARG_ANYTHING, 7513 }; 7514 7515 #ifdef CONFIG_SYN_COOKIES 7516 BPF_CALL_3(bpf_tcp_raw_gen_syncookie_ipv4, struct iphdr *, iph, 7517 struct tcphdr *, th, u32, th_len) 7518 { 7519 u32 cookie; 7520 u16 mss; 7521 7522 if (unlikely(th_len < sizeof(*th) || th_len != th->doff * 4)) 7523 return -EINVAL; 7524 7525 mss = tcp_parse_mss_option(th, 0) ?: TCP_MSS_DEFAULT; 7526 cookie = __cookie_v4_init_sequence(iph, th, &mss); 7527 7528 return cookie | ((u64)mss << 32); 7529 } 7530 7531 static const struct bpf_func_proto bpf_tcp_raw_gen_syncookie_ipv4_proto = { 7532 .func = bpf_tcp_raw_gen_syncookie_ipv4, 7533 .gpl_only = true, /* __cookie_v4_init_sequence() is GPL */ 7534 .pkt_access = true, 7535 .ret_type = RET_INTEGER, 7536 .arg1_type = ARG_PTR_TO_FIXED_SIZE_MEM, 7537 .arg1_size = sizeof(struct iphdr), 7538 .arg2_type = ARG_PTR_TO_MEM, 7539 .arg3_type = ARG_CONST_SIZE, 7540 }; 7541 7542 BPF_CALL_3(bpf_tcp_raw_gen_syncookie_ipv6, struct ipv6hdr *, iph, 7543 struct tcphdr *, th, u32, th_len) 7544 { 7545 #if IS_BUILTIN(CONFIG_IPV6) 7546 const u16 mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - 7547 sizeof(struct ipv6hdr); 7548 u32 cookie; 7549 u16 mss; 7550 7551 if (unlikely(th_len < sizeof(*th) || th_len != th->doff * 4)) 7552 return -EINVAL; 7553 7554 mss = tcp_parse_mss_option(th, 0) ?: mss_clamp; 7555 cookie = __cookie_v6_init_sequence(iph, th, &mss); 7556 7557 return cookie | ((u64)mss << 32); 7558 #else 7559 return -EPROTONOSUPPORT; 7560 #endif 7561 } 7562 7563 static const struct bpf_func_proto bpf_tcp_raw_gen_syncookie_ipv6_proto = { 7564 .func = bpf_tcp_raw_gen_syncookie_ipv6, 7565 .gpl_only = true, /* __cookie_v6_init_sequence() is GPL */ 7566 .pkt_access = true, 7567 .ret_type = RET_INTEGER, 7568 .arg1_type = ARG_PTR_TO_FIXED_SIZE_MEM, 7569 .arg1_size = sizeof(struct ipv6hdr), 7570 .arg2_type = ARG_PTR_TO_MEM, 7571 .arg3_type = ARG_CONST_SIZE, 7572 }; 7573 7574 BPF_CALL_2(bpf_tcp_raw_check_syncookie_ipv4, struct iphdr *, iph, 7575 struct tcphdr *, th) 7576 { 7577 u32 cookie = ntohl(th->ack_seq) - 1; 7578 7579 if (__cookie_v4_check(iph, th, cookie) > 0) 7580 return 0; 7581 7582 return -EACCES; 7583 } 7584 7585 static const struct bpf_func_proto bpf_tcp_raw_check_syncookie_ipv4_proto = { 7586 .func = bpf_tcp_raw_check_syncookie_ipv4, 7587 .gpl_only = true, /* __cookie_v4_check is GPL */ 7588 .pkt_access = true, 7589 .ret_type = RET_INTEGER, 7590 .arg1_type = ARG_PTR_TO_FIXED_SIZE_MEM, 7591 .arg1_size = sizeof(struct iphdr), 7592 .arg2_type = ARG_PTR_TO_FIXED_SIZE_MEM, 7593 .arg2_size = sizeof(struct tcphdr), 7594 }; 7595 7596 BPF_CALL_2(bpf_tcp_raw_check_syncookie_ipv6, struct ipv6hdr *, iph, 7597 struct tcphdr *, th) 7598 { 7599 #if IS_BUILTIN(CONFIG_IPV6) 7600 u32 cookie = ntohl(th->ack_seq) - 1; 7601 7602 if (__cookie_v6_check(iph, th, cookie) > 0) 7603 return 0; 7604 7605 return -EACCES; 7606 #else 7607 return -EPROTONOSUPPORT; 7608 #endif 7609 } 7610 7611 static const struct bpf_func_proto bpf_tcp_raw_check_syncookie_ipv6_proto = { 7612 .func = bpf_tcp_raw_check_syncookie_ipv6, 7613 .gpl_only = true, /* __cookie_v6_check is GPL */ 7614 .pkt_access = true, 7615 .ret_type = RET_INTEGER, 7616 .arg1_type = ARG_PTR_TO_FIXED_SIZE_MEM, 7617 .arg1_size = sizeof(struct ipv6hdr), 7618 .arg2_type = ARG_PTR_TO_FIXED_SIZE_MEM, 7619 .arg2_size = sizeof(struct tcphdr), 7620 }; 7621 #endif /* CONFIG_SYN_COOKIES */ 7622 7623 #endif /* CONFIG_INET */ 7624 7625 bool bpf_helper_changes_pkt_data(void *func) 7626 { 7627 if (func == bpf_skb_vlan_push || 7628 func == bpf_skb_vlan_pop || 7629 func == bpf_skb_store_bytes || 7630 func == bpf_skb_change_proto || 7631 func == bpf_skb_change_head || 7632 func == sk_skb_change_head || 7633 func == bpf_skb_change_tail || 7634 func == sk_skb_change_tail || 7635 func == bpf_skb_adjust_room || 7636 func == sk_skb_adjust_room || 7637 func == bpf_skb_pull_data || 7638 func == sk_skb_pull_data || 7639 func == bpf_clone_redirect || 7640 func == bpf_l3_csum_replace || 7641 func == bpf_l4_csum_replace || 7642 func == bpf_xdp_adjust_head || 7643 func == bpf_xdp_adjust_meta || 7644 func == bpf_msg_pull_data || 7645 func == bpf_msg_push_data || 7646 func == bpf_msg_pop_data || 7647 func == bpf_xdp_adjust_tail || 7648 #if IS_ENABLED(CONFIG_IPV6_SEG6_BPF) 7649 func == bpf_lwt_seg6_store_bytes || 7650 func == bpf_lwt_seg6_adjust_srh || 7651 func == bpf_lwt_seg6_action || 7652 #endif 7653 #ifdef CONFIG_INET 7654 func == bpf_sock_ops_store_hdr_opt || 7655 #endif 7656 func == bpf_lwt_in_push_encap || 7657 func == bpf_lwt_xmit_push_encap) 7658 return true; 7659 7660 return false; 7661 } 7662 7663 const struct bpf_func_proto bpf_event_output_data_proto __weak; 7664 const struct bpf_func_proto bpf_sk_storage_get_cg_sock_proto __weak; 7665 7666 static const struct bpf_func_proto * 7667 sock_filter_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) 7668 { 7669 switch (func_id) { 7670 /* inet and inet6 sockets are created in a process 7671 * context so there is always a valid uid/gid 7672 */ 7673 case BPF_FUNC_get_current_uid_gid: 7674 return &bpf_get_current_uid_gid_proto; 7675 case BPF_FUNC_get_local_storage: 7676 return &bpf_get_local_storage_proto; 7677 case BPF_FUNC_get_socket_cookie: 7678 return &bpf_get_socket_cookie_sock_proto; 7679 case BPF_FUNC_get_netns_cookie: 7680 return &bpf_get_netns_cookie_sock_proto; 7681 case BPF_FUNC_perf_event_output: 7682 return &bpf_event_output_data_proto; 7683 case BPF_FUNC_get_current_pid_tgid: 7684 return &bpf_get_current_pid_tgid_proto; 7685 case BPF_FUNC_get_current_comm: 7686 return &bpf_get_current_comm_proto; 7687 #ifdef CONFIG_CGROUPS 7688 case BPF_FUNC_get_current_cgroup_id: 7689 return &bpf_get_current_cgroup_id_proto; 7690 case BPF_FUNC_get_current_ancestor_cgroup_id: 7691 return &bpf_get_current_ancestor_cgroup_id_proto; 7692 #endif 7693 #ifdef CONFIG_CGROUP_NET_CLASSID 7694 case BPF_FUNC_get_cgroup_classid: 7695 return &bpf_get_cgroup_classid_curr_proto; 7696 #endif 7697 case BPF_FUNC_sk_storage_get: 7698 return &bpf_sk_storage_get_cg_sock_proto; 7699 case BPF_FUNC_ktime_get_coarse_ns: 7700 return &bpf_ktime_get_coarse_ns_proto; 7701 default: 7702 return bpf_base_func_proto(func_id); 7703 } 7704 } 7705 7706 static const struct bpf_func_proto * 7707 sock_addr_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) 7708 { 7709 switch (func_id) { 7710 /* inet and inet6 sockets are created in a process 7711 * context so there is always a valid uid/gid 7712 */ 7713 case BPF_FUNC_get_current_uid_gid: 7714 return &bpf_get_current_uid_gid_proto; 7715 case BPF_FUNC_bind: 7716 switch (prog->expected_attach_type) { 7717 case BPF_CGROUP_INET4_CONNECT: 7718 case BPF_CGROUP_INET6_CONNECT: 7719 return &bpf_bind_proto; 7720 default: 7721 return NULL; 7722 } 7723 case BPF_FUNC_get_socket_cookie: 7724 return &bpf_get_socket_cookie_sock_addr_proto; 7725 case BPF_FUNC_get_netns_cookie: 7726 return &bpf_get_netns_cookie_sock_addr_proto; 7727 case BPF_FUNC_get_local_storage: 7728 return &bpf_get_local_storage_proto; 7729 case BPF_FUNC_perf_event_output: 7730 return &bpf_event_output_data_proto; 7731 case BPF_FUNC_get_current_pid_tgid: 7732 return &bpf_get_current_pid_tgid_proto; 7733 case BPF_FUNC_get_current_comm: 7734 return &bpf_get_current_comm_proto; 7735 #ifdef CONFIG_CGROUPS 7736 case BPF_FUNC_get_current_cgroup_id: 7737 return &bpf_get_current_cgroup_id_proto; 7738 case BPF_FUNC_get_current_ancestor_cgroup_id: 7739 return &bpf_get_current_ancestor_cgroup_id_proto; 7740 #endif 7741 #ifdef CONFIG_CGROUP_NET_CLASSID 7742 case BPF_FUNC_get_cgroup_classid: 7743 return &bpf_get_cgroup_classid_curr_proto; 7744 #endif 7745 #ifdef CONFIG_INET 7746 case BPF_FUNC_sk_lookup_tcp: 7747 return &bpf_sock_addr_sk_lookup_tcp_proto; 7748 case BPF_FUNC_sk_lookup_udp: 7749 return &bpf_sock_addr_sk_lookup_udp_proto; 7750 case BPF_FUNC_sk_release: 7751 return &bpf_sk_release_proto; 7752 case BPF_FUNC_skc_lookup_tcp: 7753 return &bpf_sock_addr_skc_lookup_tcp_proto; 7754 #endif /* CONFIG_INET */ 7755 case BPF_FUNC_sk_storage_get: 7756 return &bpf_sk_storage_get_proto; 7757 case BPF_FUNC_sk_storage_delete: 7758 return &bpf_sk_storage_delete_proto; 7759 case BPF_FUNC_setsockopt: 7760 switch (prog->expected_attach_type) { 7761 case BPF_CGROUP_INET4_BIND: 7762 case BPF_CGROUP_INET6_BIND: 7763 case BPF_CGROUP_INET4_CONNECT: 7764 case BPF_CGROUP_INET6_CONNECT: 7765 case BPF_CGROUP_UDP4_RECVMSG: 7766 case BPF_CGROUP_UDP6_RECVMSG: 7767 case BPF_CGROUP_UDP4_SENDMSG: 7768 case BPF_CGROUP_UDP6_SENDMSG: 7769 case BPF_CGROUP_INET4_GETPEERNAME: 7770 case BPF_CGROUP_INET6_GETPEERNAME: 7771 case BPF_CGROUP_INET4_GETSOCKNAME: 7772 case BPF_CGROUP_INET6_GETSOCKNAME: 7773 return &bpf_sock_addr_setsockopt_proto; 7774 default: 7775 return NULL; 7776 } 7777 case BPF_FUNC_getsockopt: 7778 switch (prog->expected_attach_type) { 7779 case BPF_CGROUP_INET4_BIND: 7780 case BPF_CGROUP_INET6_BIND: 7781 case BPF_CGROUP_INET4_CONNECT: 7782 case BPF_CGROUP_INET6_CONNECT: 7783 case BPF_CGROUP_UDP4_RECVMSG: 7784 case BPF_CGROUP_UDP6_RECVMSG: 7785 case BPF_CGROUP_UDP4_SENDMSG: 7786 case BPF_CGROUP_UDP6_SENDMSG: 7787 case BPF_CGROUP_INET4_GETPEERNAME: 7788 case BPF_CGROUP_INET6_GETPEERNAME: 7789 case BPF_CGROUP_INET4_GETSOCKNAME: 7790 case BPF_CGROUP_INET6_GETSOCKNAME: 7791 return &bpf_sock_addr_getsockopt_proto; 7792 default: 7793 return NULL; 7794 } 7795 default: 7796 return bpf_sk_base_func_proto(func_id); 7797 } 7798 } 7799 7800 static const struct bpf_func_proto * 7801 sk_filter_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) 7802 { 7803 switch (func_id) { 7804 case BPF_FUNC_skb_load_bytes: 7805 return &bpf_skb_load_bytes_proto; 7806 case BPF_FUNC_skb_load_bytes_relative: 7807 return &bpf_skb_load_bytes_relative_proto; 7808 case BPF_FUNC_get_socket_cookie: 7809 return &bpf_get_socket_cookie_proto; 7810 case BPF_FUNC_get_socket_uid: 7811 return &bpf_get_socket_uid_proto; 7812 case BPF_FUNC_perf_event_output: 7813 return &bpf_skb_event_output_proto; 7814 default: 7815 return bpf_sk_base_func_proto(func_id); 7816 } 7817 } 7818 7819 const struct bpf_func_proto bpf_sk_storage_get_proto __weak; 7820 const struct bpf_func_proto bpf_sk_storage_delete_proto __weak; 7821 7822 static const struct bpf_func_proto * 7823 cg_skb_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) 7824 { 7825 switch (func_id) { 7826 case BPF_FUNC_get_local_storage: 7827 return &bpf_get_local_storage_proto; 7828 case BPF_FUNC_sk_fullsock: 7829 return &bpf_sk_fullsock_proto; 7830 case BPF_FUNC_sk_storage_get: 7831 return &bpf_sk_storage_get_proto; 7832 case BPF_FUNC_sk_storage_delete: 7833 return &bpf_sk_storage_delete_proto; 7834 case BPF_FUNC_perf_event_output: 7835 return &bpf_skb_event_output_proto; 7836 #ifdef CONFIG_SOCK_CGROUP_DATA 7837 case BPF_FUNC_skb_cgroup_id: 7838 return &bpf_skb_cgroup_id_proto; 7839 case BPF_FUNC_skb_ancestor_cgroup_id: 7840 return &bpf_skb_ancestor_cgroup_id_proto; 7841 case BPF_FUNC_sk_cgroup_id: 7842 return &bpf_sk_cgroup_id_proto; 7843 case BPF_FUNC_sk_ancestor_cgroup_id: 7844 return &bpf_sk_ancestor_cgroup_id_proto; 7845 #endif 7846 #ifdef CONFIG_INET 7847 case BPF_FUNC_sk_lookup_tcp: 7848 return &bpf_sk_lookup_tcp_proto; 7849 case BPF_FUNC_sk_lookup_udp: 7850 return &bpf_sk_lookup_udp_proto; 7851 case BPF_FUNC_sk_release: 7852 return &bpf_sk_release_proto; 7853 case BPF_FUNC_skc_lookup_tcp: 7854 return &bpf_skc_lookup_tcp_proto; 7855 case BPF_FUNC_tcp_sock: 7856 return &bpf_tcp_sock_proto; 7857 case BPF_FUNC_get_listener_sock: 7858 return &bpf_get_listener_sock_proto; 7859 case BPF_FUNC_skb_ecn_set_ce: 7860 return &bpf_skb_ecn_set_ce_proto; 7861 #endif 7862 default: 7863 return sk_filter_func_proto(func_id, prog); 7864 } 7865 } 7866 7867 static const struct bpf_func_proto * 7868 tc_cls_act_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) 7869 { 7870 switch (func_id) { 7871 case BPF_FUNC_skb_store_bytes: 7872 return &bpf_skb_store_bytes_proto; 7873 case BPF_FUNC_skb_load_bytes: 7874 return &bpf_skb_load_bytes_proto; 7875 case BPF_FUNC_skb_load_bytes_relative: 7876 return &bpf_skb_load_bytes_relative_proto; 7877 case BPF_FUNC_skb_pull_data: 7878 return &bpf_skb_pull_data_proto; 7879 case BPF_FUNC_csum_diff: 7880 return &bpf_csum_diff_proto; 7881 case BPF_FUNC_csum_update: 7882 return &bpf_csum_update_proto; 7883 case BPF_FUNC_csum_level: 7884 return &bpf_csum_level_proto; 7885 case BPF_FUNC_l3_csum_replace: 7886 return &bpf_l3_csum_replace_proto; 7887 case BPF_FUNC_l4_csum_replace: 7888 return &bpf_l4_csum_replace_proto; 7889 case BPF_FUNC_clone_redirect: 7890 return &bpf_clone_redirect_proto; 7891 case BPF_FUNC_get_cgroup_classid: 7892 return &bpf_get_cgroup_classid_proto; 7893 case BPF_FUNC_skb_vlan_push: 7894 return &bpf_skb_vlan_push_proto; 7895 case BPF_FUNC_skb_vlan_pop: 7896 return &bpf_skb_vlan_pop_proto; 7897 case BPF_FUNC_skb_change_proto: 7898 return &bpf_skb_change_proto_proto; 7899 case BPF_FUNC_skb_change_type: 7900 return &bpf_skb_change_type_proto; 7901 case BPF_FUNC_skb_adjust_room: 7902 return &bpf_skb_adjust_room_proto; 7903 case BPF_FUNC_skb_change_tail: 7904 return &bpf_skb_change_tail_proto; 7905 case BPF_FUNC_skb_change_head: 7906 return &bpf_skb_change_head_proto; 7907 case BPF_FUNC_skb_get_tunnel_key: 7908 return &bpf_skb_get_tunnel_key_proto; 7909 case BPF_FUNC_skb_set_tunnel_key: 7910 return bpf_get_skb_set_tunnel_proto(func_id); 7911 case BPF_FUNC_skb_get_tunnel_opt: 7912 return &bpf_skb_get_tunnel_opt_proto; 7913 case BPF_FUNC_skb_set_tunnel_opt: 7914 return bpf_get_skb_set_tunnel_proto(func_id); 7915 case BPF_FUNC_redirect: 7916 return &bpf_redirect_proto; 7917 case BPF_FUNC_redirect_neigh: 7918 return &bpf_redirect_neigh_proto; 7919 case BPF_FUNC_redirect_peer: 7920 return &bpf_redirect_peer_proto; 7921 case BPF_FUNC_get_route_realm: 7922 return &bpf_get_route_realm_proto; 7923 case BPF_FUNC_get_hash_recalc: 7924 return &bpf_get_hash_recalc_proto; 7925 case BPF_FUNC_set_hash_invalid: 7926 return &bpf_set_hash_invalid_proto; 7927 case BPF_FUNC_set_hash: 7928 return &bpf_set_hash_proto; 7929 case BPF_FUNC_perf_event_output: 7930 return &bpf_skb_event_output_proto; 7931 case BPF_FUNC_get_smp_processor_id: 7932 return &bpf_get_smp_processor_id_proto; 7933 case BPF_FUNC_skb_under_cgroup: 7934 return &bpf_skb_under_cgroup_proto; 7935 case BPF_FUNC_get_socket_cookie: 7936 return &bpf_get_socket_cookie_proto; 7937 case BPF_FUNC_get_socket_uid: 7938 return &bpf_get_socket_uid_proto; 7939 case BPF_FUNC_fib_lookup: 7940 return &bpf_skb_fib_lookup_proto; 7941 case BPF_FUNC_check_mtu: 7942 return &bpf_skb_check_mtu_proto; 7943 case BPF_FUNC_sk_fullsock: 7944 return &bpf_sk_fullsock_proto; 7945 case BPF_FUNC_sk_storage_get: 7946 return &bpf_sk_storage_get_proto; 7947 case BPF_FUNC_sk_storage_delete: 7948 return &bpf_sk_storage_delete_proto; 7949 #ifdef CONFIG_XFRM 7950 case BPF_FUNC_skb_get_xfrm_state: 7951 return &bpf_skb_get_xfrm_state_proto; 7952 #endif 7953 #ifdef CONFIG_CGROUP_NET_CLASSID 7954 case BPF_FUNC_skb_cgroup_classid: 7955 return &bpf_skb_cgroup_classid_proto; 7956 #endif 7957 #ifdef CONFIG_SOCK_CGROUP_DATA 7958 case BPF_FUNC_skb_cgroup_id: 7959 return &bpf_skb_cgroup_id_proto; 7960 case BPF_FUNC_skb_ancestor_cgroup_id: 7961 return &bpf_skb_ancestor_cgroup_id_proto; 7962 #endif 7963 #ifdef CONFIG_INET 7964 case BPF_FUNC_sk_lookup_tcp: 7965 return &bpf_sk_lookup_tcp_proto; 7966 case BPF_FUNC_sk_lookup_udp: 7967 return &bpf_sk_lookup_udp_proto; 7968 case BPF_FUNC_sk_release: 7969 return &bpf_sk_release_proto; 7970 case BPF_FUNC_tcp_sock: 7971 return &bpf_tcp_sock_proto; 7972 case BPF_FUNC_get_listener_sock: 7973 return &bpf_get_listener_sock_proto; 7974 case BPF_FUNC_skc_lookup_tcp: 7975 return &bpf_skc_lookup_tcp_proto; 7976 case BPF_FUNC_tcp_check_syncookie: 7977 return &bpf_tcp_check_syncookie_proto; 7978 case BPF_FUNC_skb_ecn_set_ce: 7979 return &bpf_skb_ecn_set_ce_proto; 7980 case BPF_FUNC_tcp_gen_syncookie: 7981 return &bpf_tcp_gen_syncookie_proto; 7982 case BPF_FUNC_sk_assign: 7983 return &bpf_sk_assign_proto; 7984 case BPF_FUNC_skb_set_tstamp: 7985 return &bpf_skb_set_tstamp_proto; 7986 #ifdef CONFIG_SYN_COOKIES 7987 case BPF_FUNC_tcp_raw_gen_syncookie_ipv4: 7988 return &bpf_tcp_raw_gen_syncookie_ipv4_proto; 7989 case BPF_FUNC_tcp_raw_gen_syncookie_ipv6: 7990 return &bpf_tcp_raw_gen_syncookie_ipv6_proto; 7991 case BPF_FUNC_tcp_raw_check_syncookie_ipv4: 7992 return &bpf_tcp_raw_check_syncookie_ipv4_proto; 7993 case BPF_FUNC_tcp_raw_check_syncookie_ipv6: 7994 return &bpf_tcp_raw_check_syncookie_ipv6_proto; 7995 #endif 7996 #endif 7997 default: 7998 return bpf_sk_base_func_proto(func_id); 7999 } 8000 } 8001 8002 static const struct bpf_func_proto * 8003 xdp_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) 8004 { 8005 switch (func_id) { 8006 case BPF_FUNC_perf_event_output: 8007 return &bpf_xdp_event_output_proto; 8008 case BPF_FUNC_get_smp_processor_id: 8009 return &bpf_get_smp_processor_id_proto; 8010 case BPF_FUNC_csum_diff: 8011 return &bpf_csum_diff_proto; 8012 case BPF_FUNC_xdp_adjust_head: 8013 return &bpf_xdp_adjust_head_proto; 8014 case BPF_FUNC_xdp_adjust_meta: 8015 return &bpf_xdp_adjust_meta_proto; 8016 case BPF_FUNC_redirect: 8017 return &bpf_xdp_redirect_proto; 8018 case BPF_FUNC_redirect_map: 8019 return &bpf_xdp_redirect_map_proto; 8020 case BPF_FUNC_xdp_adjust_tail: 8021 return &bpf_xdp_adjust_tail_proto; 8022 case BPF_FUNC_xdp_get_buff_len: 8023 return &bpf_xdp_get_buff_len_proto; 8024 case BPF_FUNC_xdp_load_bytes: 8025 return &bpf_xdp_load_bytes_proto; 8026 case BPF_FUNC_xdp_store_bytes: 8027 return &bpf_xdp_store_bytes_proto; 8028 case BPF_FUNC_fib_lookup: 8029 return &bpf_xdp_fib_lookup_proto; 8030 case BPF_FUNC_check_mtu: 8031 return &bpf_xdp_check_mtu_proto; 8032 #ifdef CONFIG_INET 8033 case BPF_FUNC_sk_lookup_udp: 8034 return &bpf_xdp_sk_lookup_udp_proto; 8035 case BPF_FUNC_sk_lookup_tcp: 8036 return &bpf_xdp_sk_lookup_tcp_proto; 8037 case BPF_FUNC_sk_release: 8038 return &bpf_sk_release_proto; 8039 case BPF_FUNC_skc_lookup_tcp: 8040 return &bpf_xdp_skc_lookup_tcp_proto; 8041 case BPF_FUNC_tcp_check_syncookie: 8042 return &bpf_tcp_check_syncookie_proto; 8043 case BPF_FUNC_tcp_gen_syncookie: 8044 return &bpf_tcp_gen_syncookie_proto; 8045 #ifdef CONFIG_SYN_COOKIES 8046 case BPF_FUNC_tcp_raw_gen_syncookie_ipv4: 8047 return &bpf_tcp_raw_gen_syncookie_ipv4_proto; 8048 case BPF_FUNC_tcp_raw_gen_syncookie_ipv6: 8049 return &bpf_tcp_raw_gen_syncookie_ipv6_proto; 8050 case BPF_FUNC_tcp_raw_check_syncookie_ipv4: 8051 return &bpf_tcp_raw_check_syncookie_ipv4_proto; 8052 case BPF_FUNC_tcp_raw_check_syncookie_ipv6: 8053 return &bpf_tcp_raw_check_syncookie_ipv6_proto; 8054 #endif 8055 #endif 8056 default: 8057 return bpf_sk_base_func_proto(func_id); 8058 } 8059 } 8060 8061 const struct bpf_func_proto bpf_sock_map_update_proto __weak; 8062 const struct bpf_func_proto bpf_sock_hash_update_proto __weak; 8063 8064 static const struct bpf_func_proto * 8065 sock_ops_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) 8066 { 8067 switch (func_id) { 8068 case BPF_FUNC_setsockopt: 8069 return &bpf_sock_ops_setsockopt_proto; 8070 case BPF_FUNC_getsockopt: 8071 return &bpf_sock_ops_getsockopt_proto; 8072 case BPF_FUNC_sock_ops_cb_flags_set: 8073 return &bpf_sock_ops_cb_flags_set_proto; 8074 case BPF_FUNC_sock_map_update: 8075 return &bpf_sock_map_update_proto; 8076 case BPF_FUNC_sock_hash_update: 8077 return &bpf_sock_hash_update_proto; 8078 case BPF_FUNC_get_socket_cookie: 8079 return &bpf_get_socket_cookie_sock_ops_proto; 8080 case BPF_FUNC_get_local_storage: 8081 return &bpf_get_local_storage_proto; 8082 case BPF_FUNC_perf_event_output: 8083 return &bpf_event_output_data_proto; 8084 case BPF_FUNC_sk_storage_get: 8085 return &bpf_sk_storage_get_proto; 8086 case BPF_FUNC_sk_storage_delete: 8087 return &bpf_sk_storage_delete_proto; 8088 case BPF_FUNC_get_netns_cookie: 8089 return &bpf_get_netns_cookie_sock_ops_proto; 8090 #ifdef CONFIG_INET 8091 case BPF_FUNC_load_hdr_opt: 8092 return &bpf_sock_ops_load_hdr_opt_proto; 8093 case BPF_FUNC_store_hdr_opt: 8094 return &bpf_sock_ops_store_hdr_opt_proto; 8095 case BPF_FUNC_reserve_hdr_opt: 8096 return &bpf_sock_ops_reserve_hdr_opt_proto; 8097 case BPF_FUNC_tcp_sock: 8098 return &bpf_tcp_sock_proto; 8099 #endif /* CONFIG_INET */ 8100 default: 8101 return bpf_sk_base_func_proto(func_id); 8102 } 8103 } 8104 8105 const struct bpf_func_proto bpf_msg_redirect_map_proto __weak; 8106 const struct bpf_func_proto bpf_msg_redirect_hash_proto __weak; 8107 8108 static const struct bpf_func_proto * 8109 sk_msg_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) 8110 { 8111 switch (func_id) { 8112 case BPF_FUNC_msg_redirect_map: 8113 return &bpf_msg_redirect_map_proto; 8114 case BPF_FUNC_msg_redirect_hash: 8115 return &bpf_msg_redirect_hash_proto; 8116 case BPF_FUNC_msg_apply_bytes: 8117 return &bpf_msg_apply_bytes_proto; 8118 case BPF_FUNC_msg_cork_bytes: 8119 return &bpf_msg_cork_bytes_proto; 8120 case BPF_FUNC_msg_pull_data: 8121 return &bpf_msg_pull_data_proto; 8122 case BPF_FUNC_msg_push_data: 8123 return &bpf_msg_push_data_proto; 8124 case BPF_FUNC_msg_pop_data: 8125 return &bpf_msg_pop_data_proto; 8126 case BPF_FUNC_perf_event_output: 8127 return &bpf_event_output_data_proto; 8128 case BPF_FUNC_get_current_uid_gid: 8129 return &bpf_get_current_uid_gid_proto; 8130 case BPF_FUNC_get_current_pid_tgid: 8131 return &bpf_get_current_pid_tgid_proto; 8132 case BPF_FUNC_sk_storage_get: 8133 return &bpf_sk_storage_get_proto; 8134 case BPF_FUNC_sk_storage_delete: 8135 return &bpf_sk_storage_delete_proto; 8136 case BPF_FUNC_get_netns_cookie: 8137 return &bpf_get_netns_cookie_sk_msg_proto; 8138 #ifdef CONFIG_CGROUPS 8139 case BPF_FUNC_get_current_cgroup_id: 8140 return &bpf_get_current_cgroup_id_proto; 8141 case BPF_FUNC_get_current_ancestor_cgroup_id: 8142 return &bpf_get_current_ancestor_cgroup_id_proto; 8143 #endif 8144 #ifdef CONFIG_CGROUP_NET_CLASSID 8145 case BPF_FUNC_get_cgroup_classid: 8146 return &bpf_get_cgroup_classid_curr_proto; 8147 #endif 8148 default: 8149 return bpf_sk_base_func_proto(func_id); 8150 } 8151 } 8152 8153 const struct bpf_func_proto bpf_sk_redirect_map_proto __weak; 8154 const struct bpf_func_proto bpf_sk_redirect_hash_proto __weak; 8155 8156 static const struct bpf_func_proto * 8157 sk_skb_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) 8158 { 8159 switch (func_id) { 8160 case BPF_FUNC_skb_store_bytes: 8161 return &bpf_skb_store_bytes_proto; 8162 case BPF_FUNC_skb_load_bytes: 8163 return &bpf_skb_load_bytes_proto; 8164 case BPF_FUNC_skb_pull_data: 8165 return &sk_skb_pull_data_proto; 8166 case BPF_FUNC_skb_change_tail: 8167 return &sk_skb_change_tail_proto; 8168 case BPF_FUNC_skb_change_head: 8169 return &sk_skb_change_head_proto; 8170 case BPF_FUNC_skb_adjust_room: 8171 return &sk_skb_adjust_room_proto; 8172 case BPF_FUNC_get_socket_cookie: 8173 return &bpf_get_socket_cookie_proto; 8174 case BPF_FUNC_get_socket_uid: 8175 return &bpf_get_socket_uid_proto; 8176 case BPF_FUNC_sk_redirect_map: 8177 return &bpf_sk_redirect_map_proto; 8178 case BPF_FUNC_sk_redirect_hash: 8179 return &bpf_sk_redirect_hash_proto; 8180 case BPF_FUNC_perf_event_output: 8181 return &bpf_skb_event_output_proto; 8182 #ifdef CONFIG_INET 8183 case BPF_FUNC_sk_lookup_tcp: 8184 return &bpf_sk_lookup_tcp_proto; 8185 case BPF_FUNC_sk_lookup_udp: 8186 return &bpf_sk_lookup_udp_proto; 8187 case BPF_FUNC_sk_release: 8188 return &bpf_sk_release_proto; 8189 case BPF_FUNC_skc_lookup_tcp: 8190 return &bpf_skc_lookup_tcp_proto; 8191 #endif 8192 default: 8193 return bpf_sk_base_func_proto(func_id); 8194 } 8195 } 8196 8197 static const struct bpf_func_proto * 8198 flow_dissector_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) 8199 { 8200 switch (func_id) { 8201 case BPF_FUNC_skb_load_bytes: 8202 return &bpf_flow_dissector_load_bytes_proto; 8203 default: 8204 return bpf_sk_base_func_proto(func_id); 8205 } 8206 } 8207 8208 static const struct bpf_func_proto * 8209 lwt_out_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) 8210 { 8211 switch (func_id) { 8212 case BPF_FUNC_skb_load_bytes: 8213 return &bpf_skb_load_bytes_proto; 8214 case BPF_FUNC_skb_pull_data: 8215 return &bpf_skb_pull_data_proto; 8216 case BPF_FUNC_csum_diff: 8217 return &bpf_csum_diff_proto; 8218 case BPF_FUNC_get_cgroup_classid: 8219 return &bpf_get_cgroup_classid_proto; 8220 case BPF_FUNC_get_route_realm: 8221 return &bpf_get_route_realm_proto; 8222 case BPF_FUNC_get_hash_recalc: 8223 return &bpf_get_hash_recalc_proto; 8224 case BPF_FUNC_perf_event_output: 8225 return &bpf_skb_event_output_proto; 8226 case BPF_FUNC_get_smp_processor_id: 8227 return &bpf_get_smp_processor_id_proto; 8228 case BPF_FUNC_skb_under_cgroup: 8229 return &bpf_skb_under_cgroup_proto; 8230 default: 8231 return bpf_sk_base_func_proto(func_id); 8232 } 8233 } 8234 8235 static const struct bpf_func_proto * 8236 lwt_in_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) 8237 { 8238 switch (func_id) { 8239 case BPF_FUNC_lwt_push_encap: 8240 return &bpf_lwt_in_push_encap_proto; 8241 default: 8242 return lwt_out_func_proto(func_id, prog); 8243 } 8244 } 8245 8246 static const struct bpf_func_proto * 8247 lwt_xmit_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) 8248 { 8249 switch (func_id) { 8250 case BPF_FUNC_skb_get_tunnel_key: 8251 return &bpf_skb_get_tunnel_key_proto; 8252 case BPF_FUNC_skb_set_tunnel_key: 8253 return bpf_get_skb_set_tunnel_proto(func_id); 8254 case BPF_FUNC_skb_get_tunnel_opt: 8255 return &bpf_skb_get_tunnel_opt_proto; 8256 case BPF_FUNC_skb_set_tunnel_opt: 8257 return bpf_get_skb_set_tunnel_proto(func_id); 8258 case BPF_FUNC_redirect: 8259 return &bpf_redirect_proto; 8260 case BPF_FUNC_clone_redirect: 8261 return &bpf_clone_redirect_proto; 8262 case BPF_FUNC_skb_change_tail: 8263 return &bpf_skb_change_tail_proto; 8264 case BPF_FUNC_skb_change_head: 8265 return &bpf_skb_change_head_proto; 8266 case BPF_FUNC_skb_store_bytes: 8267 return &bpf_skb_store_bytes_proto; 8268 case BPF_FUNC_csum_update: 8269 return &bpf_csum_update_proto; 8270 case BPF_FUNC_csum_level: 8271 return &bpf_csum_level_proto; 8272 case BPF_FUNC_l3_csum_replace: 8273 return &bpf_l3_csum_replace_proto; 8274 case BPF_FUNC_l4_csum_replace: 8275 return &bpf_l4_csum_replace_proto; 8276 case BPF_FUNC_set_hash_invalid: 8277 return &bpf_set_hash_invalid_proto; 8278 case BPF_FUNC_lwt_push_encap: 8279 return &bpf_lwt_xmit_push_encap_proto; 8280 default: 8281 return lwt_out_func_proto(func_id, prog); 8282 } 8283 } 8284 8285 static const struct bpf_func_proto * 8286 lwt_seg6local_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) 8287 { 8288 switch (func_id) { 8289 #if IS_ENABLED(CONFIG_IPV6_SEG6_BPF) 8290 case BPF_FUNC_lwt_seg6_store_bytes: 8291 return &bpf_lwt_seg6_store_bytes_proto; 8292 case BPF_FUNC_lwt_seg6_action: 8293 return &bpf_lwt_seg6_action_proto; 8294 case BPF_FUNC_lwt_seg6_adjust_srh: 8295 return &bpf_lwt_seg6_adjust_srh_proto; 8296 #endif 8297 default: 8298 return lwt_out_func_proto(func_id, prog); 8299 } 8300 } 8301 8302 static bool bpf_skb_is_valid_access(int off, int size, enum bpf_access_type type, 8303 const struct bpf_prog *prog, 8304 struct bpf_insn_access_aux *info) 8305 { 8306 const int size_default = sizeof(__u32); 8307 8308 if (off < 0 || off >= sizeof(struct __sk_buff)) 8309 return false; 8310 8311 /* The verifier guarantees that size > 0. */ 8312 if (off % size != 0) 8313 return false; 8314 8315 switch (off) { 8316 case bpf_ctx_range_till(struct __sk_buff, cb[0], cb[4]): 8317 if (off + size > offsetofend(struct __sk_buff, cb[4])) 8318 return false; 8319 break; 8320 case bpf_ctx_range_till(struct __sk_buff, remote_ip6[0], remote_ip6[3]): 8321 case bpf_ctx_range_till(struct __sk_buff, local_ip6[0], local_ip6[3]): 8322 case bpf_ctx_range_till(struct __sk_buff, remote_ip4, remote_ip4): 8323 case bpf_ctx_range_till(struct __sk_buff, local_ip4, local_ip4): 8324 case bpf_ctx_range(struct __sk_buff, data): 8325 case bpf_ctx_range(struct __sk_buff, data_meta): 8326 case bpf_ctx_range(struct __sk_buff, data_end): 8327 if (size != size_default) 8328 return false; 8329 break; 8330 case bpf_ctx_range_ptr(struct __sk_buff, flow_keys): 8331 return false; 8332 case bpf_ctx_range(struct __sk_buff, hwtstamp): 8333 if (type == BPF_WRITE || size != sizeof(__u64)) 8334 return false; 8335 break; 8336 case bpf_ctx_range(struct __sk_buff, tstamp): 8337 if (size != sizeof(__u64)) 8338 return false; 8339 break; 8340 case offsetof(struct __sk_buff, sk): 8341 if (type == BPF_WRITE || size != sizeof(__u64)) 8342 return false; 8343 info->reg_type = PTR_TO_SOCK_COMMON_OR_NULL; 8344 break; 8345 case offsetof(struct __sk_buff, tstamp_type): 8346 return false; 8347 case offsetofend(struct __sk_buff, tstamp_type) ... offsetof(struct __sk_buff, hwtstamp) - 1: 8348 /* Explicitly prohibit access to padding in __sk_buff. */ 8349 return false; 8350 default: 8351 /* Only narrow read access allowed for now. */ 8352 if (type == BPF_WRITE) { 8353 if (size != size_default) 8354 return false; 8355 } else { 8356 bpf_ctx_record_field_size(info, size_default); 8357 if (!bpf_ctx_narrow_access_ok(off, size, size_default)) 8358 return false; 8359 } 8360 } 8361 8362 return true; 8363 } 8364 8365 static bool sk_filter_is_valid_access(int off, int size, 8366 enum bpf_access_type type, 8367 const struct bpf_prog *prog, 8368 struct bpf_insn_access_aux *info) 8369 { 8370 switch (off) { 8371 case bpf_ctx_range(struct __sk_buff, tc_classid): 8372 case bpf_ctx_range(struct __sk_buff, data): 8373 case bpf_ctx_range(struct __sk_buff, data_meta): 8374 case bpf_ctx_range(struct __sk_buff, data_end): 8375 case bpf_ctx_range_till(struct __sk_buff, family, local_port): 8376 case bpf_ctx_range(struct __sk_buff, tstamp): 8377 case bpf_ctx_range(struct __sk_buff, wire_len): 8378 case bpf_ctx_range(struct __sk_buff, hwtstamp): 8379 return false; 8380 } 8381 8382 if (type == BPF_WRITE) { 8383 switch (off) { 8384 case bpf_ctx_range_till(struct __sk_buff, cb[0], cb[4]): 8385 break; 8386 default: 8387 return false; 8388 } 8389 } 8390 8391 return bpf_skb_is_valid_access(off, size, type, prog, info); 8392 } 8393 8394 static bool cg_skb_is_valid_access(int off, int size, 8395 enum bpf_access_type type, 8396 const struct bpf_prog *prog, 8397 struct bpf_insn_access_aux *info) 8398 { 8399 switch (off) { 8400 case bpf_ctx_range(struct __sk_buff, tc_classid): 8401 case bpf_ctx_range(struct __sk_buff, data_meta): 8402 case bpf_ctx_range(struct __sk_buff, wire_len): 8403 return false; 8404 case bpf_ctx_range(struct __sk_buff, data): 8405 case bpf_ctx_range(struct __sk_buff, data_end): 8406 if (!bpf_capable()) 8407 return false; 8408 break; 8409 } 8410 8411 if (type == BPF_WRITE) { 8412 switch (off) { 8413 case bpf_ctx_range(struct __sk_buff, mark): 8414 case bpf_ctx_range(struct __sk_buff, priority): 8415 case bpf_ctx_range_till(struct __sk_buff, cb[0], cb[4]): 8416 break; 8417 case bpf_ctx_range(struct __sk_buff, tstamp): 8418 if (!bpf_capable()) 8419 return false; 8420 break; 8421 default: 8422 return false; 8423 } 8424 } 8425 8426 switch (off) { 8427 case bpf_ctx_range(struct __sk_buff, data): 8428 info->reg_type = PTR_TO_PACKET; 8429 break; 8430 case bpf_ctx_range(struct __sk_buff, data_end): 8431 info->reg_type = PTR_TO_PACKET_END; 8432 break; 8433 } 8434 8435 return bpf_skb_is_valid_access(off, size, type, prog, info); 8436 } 8437 8438 static bool lwt_is_valid_access(int off, int size, 8439 enum bpf_access_type type, 8440 const struct bpf_prog *prog, 8441 struct bpf_insn_access_aux *info) 8442 { 8443 switch (off) { 8444 case bpf_ctx_range(struct __sk_buff, tc_classid): 8445 case bpf_ctx_range_till(struct __sk_buff, family, local_port): 8446 case bpf_ctx_range(struct __sk_buff, data_meta): 8447 case bpf_ctx_range(struct __sk_buff, tstamp): 8448 case bpf_ctx_range(struct __sk_buff, wire_len): 8449 case bpf_ctx_range(struct __sk_buff, hwtstamp): 8450 return false; 8451 } 8452 8453 if (type == BPF_WRITE) { 8454 switch (off) { 8455 case bpf_ctx_range(struct __sk_buff, mark): 8456 case bpf_ctx_range(struct __sk_buff, priority): 8457 case bpf_ctx_range_till(struct __sk_buff, cb[0], cb[4]): 8458 break; 8459 default: 8460 return false; 8461 } 8462 } 8463 8464 switch (off) { 8465 case bpf_ctx_range(struct __sk_buff, data): 8466 info->reg_type = PTR_TO_PACKET; 8467 break; 8468 case bpf_ctx_range(struct __sk_buff, data_end): 8469 info->reg_type = PTR_TO_PACKET_END; 8470 break; 8471 } 8472 8473 return bpf_skb_is_valid_access(off, size, type, prog, info); 8474 } 8475 8476 /* Attach type specific accesses */ 8477 static bool __sock_filter_check_attach_type(int off, 8478 enum bpf_access_type access_type, 8479 enum bpf_attach_type attach_type) 8480 { 8481 switch (off) { 8482 case offsetof(struct bpf_sock, bound_dev_if): 8483 case offsetof(struct bpf_sock, mark): 8484 case offsetof(struct bpf_sock, priority): 8485 switch (attach_type) { 8486 case BPF_CGROUP_INET_SOCK_CREATE: 8487 case BPF_CGROUP_INET_SOCK_RELEASE: 8488 goto full_access; 8489 default: 8490 return false; 8491 } 8492 case bpf_ctx_range(struct bpf_sock, src_ip4): 8493 switch (attach_type) { 8494 case BPF_CGROUP_INET4_POST_BIND: 8495 goto read_only; 8496 default: 8497 return false; 8498 } 8499 case bpf_ctx_range_till(struct bpf_sock, src_ip6[0], src_ip6[3]): 8500 switch (attach_type) { 8501 case BPF_CGROUP_INET6_POST_BIND: 8502 goto read_only; 8503 default: 8504 return false; 8505 } 8506 case bpf_ctx_range(struct bpf_sock, src_port): 8507 switch (attach_type) { 8508 case BPF_CGROUP_INET4_POST_BIND: 8509 case BPF_CGROUP_INET6_POST_BIND: 8510 goto read_only; 8511 default: 8512 return false; 8513 } 8514 } 8515 read_only: 8516 return access_type == BPF_READ; 8517 full_access: 8518 return true; 8519 } 8520 8521 bool bpf_sock_common_is_valid_access(int off, int size, 8522 enum bpf_access_type type, 8523 struct bpf_insn_access_aux *info) 8524 { 8525 switch (off) { 8526 case bpf_ctx_range_till(struct bpf_sock, type, priority): 8527 return false; 8528 default: 8529 return bpf_sock_is_valid_access(off, size, type, info); 8530 } 8531 } 8532 8533 bool bpf_sock_is_valid_access(int off, int size, enum bpf_access_type type, 8534 struct bpf_insn_access_aux *info) 8535 { 8536 const int size_default = sizeof(__u32); 8537 int field_size; 8538 8539 if (off < 0 || off >= sizeof(struct bpf_sock)) 8540 return false; 8541 if (off % size != 0) 8542 return false; 8543 8544 switch (off) { 8545 case offsetof(struct bpf_sock, state): 8546 case offsetof(struct bpf_sock, family): 8547 case offsetof(struct bpf_sock, type): 8548 case offsetof(struct bpf_sock, protocol): 8549 case offsetof(struct bpf_sock, src_port): 8550 case offsetof(struct bpf_sock, rx_queue_mapping): 8551 case bpf_ctx_range(struct bpf_sock, src_ip4): 8552 case bpf_ctx_range_till(struct bpf_sock, src_ip6[0], src_ip6[3]): 8553 case bpf_ctx_range(struct bpf_sock, dst_ip4): 8554 case bpf_ctx_range_till(struct bpf_sock, dst_ip6[0], dst_ip6[3]): 8555 bpf_ctx_record_field_size(info, size_default); 8556 return bpf_ctx_narrow_access_ok(off, size, size_default); 8557 case bpf_ctx_range(struct bpf_sock, dst_port): 8558 field_size = size == size_default ? 8559 size_default : sizeof_field(struct bpf_sock, dst_port); 8560 bpf_ctx_record_field_size(info, field_size); 8561 return bpf_ctx_narrow_access_ok(off, size, field_size); 8562 case offsetofend(struct bpf_sock, dst_port) ... 8563 offsetof(struct bpf_sock, dst_ip4) - 1: 8564 return false; 8565 } 8566 8567 return size == size_default; 8568 } 8569 8570 static bool sock_filter_is_valid_access(int off, int size, 8571 enum bpf_access_type type, 8572 const struct bpf_prog *prog, 8573 struct bpf_insn_access_aux *info) 8574 { 8575 if (!bpf_sock_is_valid_access(off, size, type, info)) 8576 return false; 8577 return __sock_filter_check_attach_type(off, type, 8578 prog->expected_attach_type); 8579 } 8580 8581 static int bpf_noop_prologue(struct bpf_insn *insn_buf, bool direct_write, 8582 const struct bpf_prog *prog) 8583 { 8584 /* Neither direct read nor direct write requires any preliminary 8585 * action. 8586 */ 8587 return 0; 8588 } 8589 8590 static int bpf_unclone_prologue(struct bpf_insn *insn_buf, bool direct_write, 8591 const struct bpf_prog *prog, int drop_verdict) 8592 { 8593 struct bpf_insn *insn = insn_buf; 8594 8595 if (!direct_write) 8596 return 0; 8597 8598 /* if (!skb->cloned) 8599 * goto start; 8600 * 8601 * (Fast-path, otherwise approximation that we might be 8602 * a clone, do the rest in helper.) 8603 */ 8604 *insn++ = BPF_LDX_MEM(BPF_B, BPF_REG_6, BPF_REG_1, CLONED_OFFSET); 8605 *insn++ = BPF_ALU32_IMM(BPF_AND, BPF_REG_6, CLONED_MASK); 8606 *insn++ = BPF_JMP_IMM(BPF_JEQ, BPF_REG_6, 0, 7); 8607 8608 /* ret = bpf_skb_pull_data(skb, 0); */ 8609 *insn++ = BPF_MOV64_REG(BPF_REG_6, BPF_REG_1); 8610 *insn++ = BPF_ALU64_REG(BPF_XOR, BPF_REG_2, BPF_REG_2); 8611 *insn++ = BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, 8612 BPF_FUNC_skb_pull_data); 8613 /* if (!ret) 8614 * goto restore; 8615 * return TC_ACT_SHOT; 8616 */ 8617 *insn++ = BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 2); 8618 *insn++ = BPF_ALU32_IMM(BPF_MOV, BPF_REG_0, drop_verdict); 8619 *insn++ = BPF_EXIT_INSN(); 8620 8621 /* restore: */ 8622 *insn++ = BPF_MOV64_REG(BPF_REG_1, BPF_REG_6); 8623 /* start: */ 8624 *insn++ = prog->insnsi[0]; 8625 8626 return insn - insn_buf; 8627 } 8628 8629 static int bpf_gen_ld_abs(const struct bpf_insn *orig, 8630 struct bpf_insn *insn_buf) 8631 { 8632 bool indirect = BPF_MODE(orig->code) == BPF_IND; 8633 struct bpf_insn *insn = insn_buf; 8634 8635 if (!indirect) { 8636 *insn++ = BPF_MOV64_IMM(BPF_REG_2, orig->imm); 8637 } else { 8638 *insn++ = BPF_MOV64_REG(BPF_REG_2, orig->src_reg); 8639 if (orig->imm) 8640 *insn++ = BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, orig->imm); 8641 } 8642 /* We're guaranteed here that CTX is in R6. */ 8643 *insn++ = BPF_MOV64_REG(BPF_REG_1, BPF_REG_CTX); 8644 8645 switch (BPF_SIZE(orig->code)) { 8646 case BPF_B: 8647 *insn++ = BPF_EMIT_CALL(bpf_skb_load_helper_8_no_cache); 8648 break; 8649 case BPF_H: 8650 *insn++ = BPF_EMIT_CALL(bpf_skb_load_helper_16_no_cache); 8651 break; 8652 case BPF_W: 8653 *insn++ = BPF_EMIT_CALL(bpf_skb_load_helper_32_no_cache); 8654 break; 8655 } 8656 8657 *insn++ = BPF_JMP_IMM(BPF_JSGE, BPF_REG_0, 0, 2); 8658 *insn++ = BPF_ALU32_REG(BPF_XOR, BPF_REG_0, BPF_REG_0); 8659 *insn++ = BPF_EXIT_INSN(); 8660 8661 return insn - insn_buf; 8662 } 8663 8664 static int tc_cls_act_prologue(struct bpf_insn *insn_buf, bool direct_write, 8665 const struct bpf_prog *prog) 8666 { 8667 return bpf_unclone_prologue(insn_buf, direct_write, prog, TC_ACT_SHOT); 8668 } 8669 8670 static bool tc_cls_act_is_valid_access(int off, int size, 8671 enum bpf_access_type type, 8672 const struct bpf_prog *prog, 8673 struct bpf_insn_access_aux *info) 8674 { 8675 if (type == BPF_WRITE) { 8676 switch (off) { 8677 case bpf_ctx_range(struct __sk_buff, mark): 8678 case bpf_ctx_range(struct __sk_buff, tc_index): 8679 case bpf_ctx_range(struct __sk_buff, priority): 8680 case bpf_ctx_range(struct __sk_buff, tc_classid): 8681 case bpf_ctx_range_till(struct __sk_buff, cb[0], cb[4]): 8682 case bpf_ctx_range(struct __sk_buff, tstamp): 8683 case bpf_ctx_range(struct __sk_buff, queue_mapping): 8684 break; 8685 default: 8686 return false; 8687 } 8688 } 8689 8690 switch (off) { 8691 case bpf_ctx_range(struct __sk_buff, data): 8692 info->reg_type = PTR_TO_PACKET; 8693 break; 8694 case bpf_ctx_range(struct __sk_buff, data_meta): 8695 info->reg_type = PTR_TO_PACKET_META; 8696 break; 8697 case bpf_ctx_range(struct __sk_buff, data_end): 8698 info->reg_type = PTR_TO_PACKET_END; 8699 break; 8700 case bpf_ctx_range_till(struct __sk_buff, family, local_port): 8701 return false; 8702 case offsetof(struct __sk_buff, tstamp_type): 8703 /* The convert_ctx_access() on reading and writing 8704 * __sk_buff->tstamp depends on whether the bpf prog 8705 * has used __sk_buff->tstamp_type or not. 8706 * Thus, we need to set prog->tstamp_type_access 8707 * earlier during is_valid_access() here. 8708 */ 8709 ((struct bpf_prog *)prog)->tstamp_type_access = 1; 8710 return size == sizeof(__u8); 8711 } 8712 8713 return bpf_skb_is_valid_access(off, size, type, prog, info); 8714 } 8715 8716 static bool __is_valid_xdp_access(int off, int size) 8717 { 8718 if (off < 0 || off >= sizeof(struct xdp_md)) 8719 return false; 8720 if (off % size != 0) 8721 return false; 8722 if (size != sizeof(__u32)) 8723 return false; 8724 8725 return true; 8726 } 8727 8728 static bool xdp_is_valid_access(int off, int size, 8729 enum bpf_access_type type, 8730 const struct bpf_prog *prog, 8731 struct bpf_insn_access_aux *info) 8732 { 8733 if (prog->expected_attach_type != BPF_XDP_DEVMAP) { 8734 switch (off) { 8735 case offsetof(struct xdp_md, egress_ifindex): 8736 return false; 8737 } 8738 } 8739 8740 if (type == BPF_WRITE) { 8741 if (bpf_prog_is_dev_bound(prog->aux)) { 8742 switch (off) { 8743 case offsetof(struct xdp_md, rx_queue_index): 8744 return __is_valid_xdp_access(off, size); 8745 } 8746 } 8747 return false; 8748 } 8749 8750 switch (off) { 8751 case offsetof(struct xdp_md, data): 8752 info->reg_type = PTR_TO_PACKET; 8753 break; 8754 case offsetof(struct xdp_md, data_meta): 8755 info->reg_type = PTR_TO_PACKET_META; 8756 break; 8757 case offsetof(struct xdp_md, data_end): 8758 info->reg_type = PTR_TO_PACKET_END; 8759 break; 8760 } 8761 8762 return __is_valid_xdp_access(off, size); 8763 } 8764 8765 void bpf_warn_invalid_xdp_action(struct net_device *dev, struct bpf_prog *prog, u32 act) 8766 { 8767 const u32 act_max = XDP_REDIRECT; 8768 8769 pr_warn_once("%s XDP return value %u on prog %s (id %d) dev %s, expect packet loss!\n", 8770 act > act_max ? "Illegal" : "Driver unsupported", 8771 act, prog->aux->name, prog->aux->id, dev ? dev->name : "N/A"); 8772 } 8773 EXPORT_SYMBOL_GPL(bpf_warn_invalid_xdp_action); 8774 8775 static bool sock_addr_is_valid_access(int off, int size, 8776 enum bpf_access_type type, 8777 const struct bpf_prog *prog, 8778 struct bpf_insn_access_aux *info) 8779 { 8780 const int size_default = sizeof(__u32); 8781 8782 if (off < 0 || off >= sizeof(struct bpf_sock_addr)) 8783 return false; 8784 if (off % size != 0) 8785 return false; 8786 8787 /* Disallow access to IPv6 fields from IPv4 contex and vise 8788 * versa. 8789 */ 8790 switch (off) { 8791 case bpf_ctx_range(struct bpf_sock_addr, user_ip4): 8792 switch (prog->expected_attach_type) { 8793 case BPF_CGROUP_INET4_BIND: 8794 case BPF_CGROUP_INET4_CONNECT: 8795 case BPF_CGROUP_INET4_GETPEERNAME: 8796 case BPF_CGROUP_INET4_GETSOCKNAME: 8797 case BPF_CGROUP_UDP4_SENDMSG: 8798 case BPF_CGROUP_UDP4_RECVMSG: 8799 break; 8800 default: 8801 return false; 8802 } 8803 break; 8804 case bpf_ctx_range_till(struct bpf_sock_addr, user_ip6[0], user_ip6[3]): 8805 switch (prog->expected_attach_type) { 8806 case BPF_CGROUP_INET6_BIND: 8807 case BPF_CGROUP_INET6_CONNECT: 8808 case BPF_CGROUP_INET6_GETPEERNAME: 8809 case BPF_CGROUP_INET6_GETSOCKNAME: 8810 case BPF_CGROUP_UDP6_SENDMSG: 8811 case BPF_CGROUP_UDP6_RECVMSG: 8812 break; 8813 default: 8814 return false; 8815 } 8816 break; 8817 case bpf_ctx_range(struct bpf_sock_addr, msg_src_ip4): 8818 switch (prog->expected_attach_type) { 8819 case BPF_CGROUP_UDP4_SENDMSG: 8820 break; 8821 default: 8822 return false; 8823 } 8824 break; 8825 case bpf_ctx_range_till(struct bpf_sock_addr, msg_src_ip6[0], 8826 msg_src_ip6[3]): 8827 switch (prog->expected_attach_type) { 8828 case BPF_CGROUP_UDP6_SENDMSG: 8829 break; 8830 default: 8831 return false; 8832 } 8833 break; 8834 } 8835 8836 switch (off) { 8837 case bpf_ctx_range(struct bpf_sock_addr, user_ip4): 8838 case bpf_ctx_range_till(struct bpf_sock_addr, user_ip6[0], user_ip6[3]): 8839 case bpf_ctx_range(struct bpf_sock_addr, msg_src_ip4): 8840 case bpf_ctx_range_till(struct bpf_sock_addr, msg_src_ip6[0], 8841 msg_src_ip6[3]): 8842 case bpf_ctx_range(struct bpf_sock_addr, user_port): 8843 if (type == BPF_READ) { 8844 bpf_ctx_record_field_size(info, size_default); 8845 8846 if (bpf_ctx_wide_access_ok(off, size, 8847 struct bpf_sock_addr, 8848 user_ip6)) 8849 return true; 8850 8851 if (bpf_ctx_wide_access_ok(off, size, 8852 struct bpf_sock_addr, 8853 msg_src_ip6)) 8854 return true; 8855 8856 if (!bpf_ctx_narrow_access_ok(off, size, size_default)) 8857 return false; 8858 } else { 8859 if (bpf_ctx_wide_access_ok(off, size, 8860 struct bpf_sock_addr, 8861 user_ip6)) 8862 return true; 8863 8864 if (bpf_ctx_wide_access_ok(off, size, 8865 struct bpf_sock_addr, 8866 msg_src_ip6)) 8867 return true; 8868 8869 if (size != size_default) 8870 return false; 8871 } 8872 break; 8873 case offsetof(struct bpf_sock_addr, sk): 8874 if (type != BPF_READ) 8875 return false; 8876 if (size != sizeof(__u64)) 8877 return false; 8878 info->reg_type = PTR_TO_SOCKET; 8879 break; 8880 default: 8881 if (type == BPF_READ) { 8882 if (size != size_default) 8883 return false; 8884 } else { 8885 return false; 8886 } 8887 } 8888 8889 return true; 8890 } 8891 8892 static bool sock_ops_is_valid_access(int off, int size, 8893 enum bpf_access_type type, 8894 const struct bpf_prog *prog, 8895 struct bpf_insn_access_aux *info) 8896 { 8897 const int size_default = sizeof(__u32); 8898 8899 if (off < 0 || off >= sizeof(struct bpf_sock_ops)) 8900 return false; 8901 8902 /* The verifier guarantees that size > 0. */ 8903 if (off % size != 0) 8904 return false; 8905 8906 if (type == BPF_WRITE) { 8907 switch (off) { 8908 case offsetof(struct bpf_sock_ops, reply): 8909 case offsetof(struct bpf_sock_ops, sk_txhash): 8910 if (size != size_default) 8911 return false; 8912 break; 8913 default: 8914 return false; 8915 } 8916 } else { 8917 switch (off) { 8918 case bpf_ctx_range_till(struct bpf_sock_ops, bytes_received, 8919 bytes_acked): 8920 if (size != sizeof(__u64)) 8921 return false; 8922 break; 8923 case offsetof(struct bpf_sock_ops, sk): 8924 if (size != sizeof(__u64)) 8925 return false; 8926 info->reg_type = PTR_TO_SOCKET_OR_NULL; 8927 break; 8928 case offsetof(struct bpf_sock_ops, skb_data): 8929 if (size != sizeof(__u64)) 8930 return false; 8931 info->reg_type = PTR_TO_PACKET; 8932 break; 8933 case offsetof(struct bpf_sock_ops, skb_data_end): 8934 if (size != sizeof(__u64)) 8935 return false; 8936 info->reg_type = PTR_TO_PACKET_END; 8937 break; 8938 case offsetof(struct bpf_sock_ops, skb_tcp_flags): 8939 bpf_ctx_record_field_size(info, size_default); 8940 return bpf_ctx_narrow_access_ok(off, size, 8941 size_default); 8942 default: 8943 if (size != size_default) 8944 return false; 8945 break; 8946 } 8947 } 8948 8949 return true; 8950 } 8951 8952 static int sk_skb_prologue(struct bpf_insn *insn_buf, bool direct_write, 8953 const struct bpf_prog *prog) 8954 { 8955 return bpf_unclone_prologue(insn_buf, direct_write, prog, SK_DROP); 8956 } 8957 8958 static bool sk_skb_is_valid_access(int off, int size, 8959 enum bpf_access_type type, 8960 const struct bpf_prog *prog, 8961 struct bpf_insn_access_aux *info) 8962 { 8963 switch (off) { 8964 case bpf_ctx_range(struct __sk_buff, tc_classid): 8965 case bpf_ctx_range(struct __sk_buff, data_meta): 8966 case bpf_ctx_range(struct __sk_buff, tstamp): 8967 case bpf_ctx_range(struct __sk_buff, wire_len): 8968 case bpf_ctx_range(struct __sk_buff, hwtstamp): 8969 return false; 8970 } 8971 8972 if (type == BPF_WRITE) { 8973 switch (off) { 8974 case bpf_ctx_range(struct __sk_buff, tc_index): 8975 case bpf_ctx_range(struct __sk_buff, priority): 8976 break; 8977 default: 8978 return false; 8979 } 8980 } 8981 8982 switch (off) { 8983 case bpf_ctx_range(struct __sk_buff, mark): 8984 return false; 8985 case bpf_ctx_range(struct __sk_buff, data): 8986 info->reg_type = PTR_TO_PACKET; 8987 break; 8988 case bpf_ctx_range(struct __sk_buff, data_end): 8989 info->reg_type = PTR_TO_PACKET_END; 8990 break; 8991 } 8992 8993 return bpf_skb_is_valid_access(off, size, type, prog, info); 8994 } 8995 8996 static bool sk_msg_is_valid_access(int off, int size, 8997 enum bpf_access_type type, 8998 const struct bpf_prog *prog, 8999 struct bpf_insn_access_aux *info) 9000 { 9001 if (type == BPF_WRITE) 9002 return false; 9003 9004 if (off % size != 0) 9005 return false; 9006 9007 switch (off) { 9008 case offsetof(struct sk_msg_md, data): 9009 info->reg_type = PTR_TO_PACKET; 9010 if (size != sizeof(__u64)) 9011 return false; 9012 break; 9013 case offsetof(struct sk_msg_md, data_end): 9014 info->reg_type = PTR_TO_PACKET_END; 9015 if (size != sizeof(__u64)) 9016 return false; 9017 break; 9018 case offsetof(struct sk_msg_md, sk): 9019 if (size != sizeof(__u64)) 9020 return false; 9021 info->reg_type = PTR_TO_SOCKET; 9022 break; 9023 case bpf_ctx_range(struct sk_msg_md, family): 9024 case bpf_ctx_range(struct sk_msg_md, remote_ip4): 9025 case bpf_ctx_range(struct sk_msg_md, local_ip4): 9026 case bpf_ctx_range_till(struct sk_msg_md, remote_ip6[0], remote_ip6[3]): 9027 case bpf_ctx_range_till(struct sk_msg_md, local_ip6[0], local_ip6[3]): 9028 case bpf_ctx_range(struct sk_msg_md, remote_port): 9029 case bpf_ctx_range(struct sk_msg_md, local_port): 9030 case bpf_ctx_range(struct sk_msg_md, size): 9031 if (size != sizeof(__u32)) 9032 return false; 9033 break; 9034 default: 9035 return false; 9036 } 9037 return true; 9038 } 9039 9040 static bool flow_dissector_is_valid_access(int off, int size, 9041 enum bpf_access_type type, 9042 const struct bpf_prog *prog, 9043 struct bpf_insn_access_aux *info) 9044 { 9045 const int size_default = sizeof(__u32); 9046 9047 if (off < 0 || off >= sizeof(struct __sk_buff)) 9048 return false; 9049 9050 if (type == BPF_WRITE) 9051 return false; 9052 9053 switch (off) { 9054 case bpf_ctx_range(struct __sk_buff, data): 9055 if (size != size_default) 9056 return false; 9057 info->reg_type = PTR_TO_PACKET; 9058 return true; 9059 case bpf_ctx_range(struct __sk_buff, data_end): 9060 if (size != size_default) 9061 return false; 9062 info->reg_type = PTR_TO_PACKET_END; 9063 return true; 9064 case bpf_ctx_range_ptr(struct __sk_buff, flow_keys): 9065 if (size != sizeof(__u64)) 9066 return false; 9067 info->reg_type = PTR_TO_FLOW_KEYS; 9068 return true; 9069 default: 9070 return false; 9071 } 9072 } 9073 9074 static u32 flow_dissector_convert_ctx_access(enum bpf_access_type type, 9075 const struct bpf_insn *si, 9076 struct bpf_insn *insn_buf, 9077 struct bpf_prog *prog, 9078 u32 *target_size) 9079 9080 { 9081 struct bpf_insn *insn = insn_buf; 9082 9083 switch (si->off) { 9084 case offsetof(struct __sk_buff, data): 9085 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct bpf_flow_dissector, data), 9086 si->dst_reg, si->src_reg, 9087 offsetof(struct bpf_flow_dissector, data)); 9088 break; 9089 9090 case offsetof(struct __sk_buff, data_end): 9091 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct bpf_flow_dissector, data_end), 9092 si->dst_reg, si->src_reg, 9093 offsetof(struct bpf_flow_dissector, data_end)); 9094 break; 9095 9096 case offsetof(struct __sk_buff, flow_keys): 9097 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct bpf_flow_dissector, flow_keys), 9098 si->dst_reg, si->src_reg, 9099 offsetof(struct bpf_flow_dissector, flow_keys)); 9100 break; 9101 } 9102 9103 return insn - insn_buf; 9104 } 9105 9106 static struct bpf_insn *bpf_convert_tstamp_type_read(const struct bpf_insn *si, 9107 struct bpf_insn *insn) 9108 { 9109 __u8 value_reg = si->dst_reg; 9110 __u8 skb_reg = si->src_reg; 9111 /* AX is needed because src_reg and dst_reg could be the same */ 9112 __u8 tmp_reg = BPF_REG_AX; 9113 9114 *insn++ = BPF_LDX_MEM(BPF_B, tmp_reg, skb_reg, 9115 PKT_VLAN_PRESENT_OFFSET); 9116 *insn++ = BPF_JMP32_IMM(BPF_JSET, tmp_reg, 9117 SKB_MONO_DELIVERY_TIME_MASK, 2); 9118 *insn++ = BPF_MOV32_IMM(value_reg, BPF_SKB_TSTAMP_UNSPEC); 9119 *insn++ = BPF_JMP_A(1); 9120 *insn++ = BPF_MOV32_IMM(value_reg, BPF_SKB_TSTAMP_DELIVERY_MONO); 9121 9122 return insn; 9123 } 9124 9125 static struct bpf_insn *bpf_convert_shinfo_access(const struct bpf_insn *si, 9126 struct bpf_insn *insn) 9127 { 9128 /* si->dst_reg = skb_shinfo(SKB); */ 9129 #ifdef NET_SKBUFF_DATA_USES_OFFSET 9130 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, end), 9131 BPF_REG_AX, si->src_reg, 9132 offsetof(struct sk_buff, end)); 9133 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, head), 9134 si->dst_reg, si->src_reg, 9135 offsetof(struct sk_buff, head)); 9136 *insn++ = BPF_ALU64_REG(BPF_ADD, si->dst_reg, BPF_REG_AX); 9137 #else 9138 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, end), 9139 si->dst_reg, si->src_reg, 9140 offsetof(struct sk_buff, end)); 9141 #endif 9142 9143 return insn; 9144 } 9145 9146 static struct bpf_insn *bpf_convert_tstamp_read(const struct bpf_prog *prog, 9147 const struct bpf_insn *si, 9148 struct bpf_insn *insn) 9149 { 9150 __u8 value_reg = si->dst_reg; 9151 __u8 skb_reg = si->src_reg; 9152 9153 #ifdef CONFIG_NET_CLS_ACT 9154 /* If the tstamp_type is read, 9155 * the bpf prog is aware the tstamp could have delivery time. 9156 * Thus, read skb->tstamp as is if tstamp_type_access is true. 9157 */ 9158 if (!prog->tstamp_type_access) { 9159 /* AX is needed because src_reg and dst_reg could be the same */ 9160 __u8 tmp_reg = BPF_REG_AX; 9161 9162 *insn++ = BPF_LDX_MEM(BPF_B, tmp_reg, skb_reg, PKT_VLAN_PRESENT_OFFSET); 9163 *insn++ = BPF_ALU32_IMM(BPF_AND, tmp_reg, 9164 TC_AT_INGRESS_MASK | SKB_MONO_DELIVERY_TIME_MASK); 9165 *insn++ = BPF_JMP32_IMM(BPF_JNE, tmp_reg, 9166 TC_AT_INGRESS_MASK | SKB_MONO_DELIVERY_TIME_MASK, 2); 9167 /* skb->tc_at_ingress && skb->mono_delivery_time, 9168 * read 0 as the (rcv) timestamp. 9169 */ 9170 *insn++ = BPF_MOV64_IMM(value_reg, 0); 9171 *insn++ = BPF_JMP_A(1); 9172 } 9173 #endif 9174 9175 *insn++ = BPF_LDX_MEM(BPF_DW, value_reg, skb_reg, 9176 offsetof(struct sk_buff, tstamp)); 9177 return insn; 9178 } 9179 9180 static struct bpf_insn *bpf_convert_tstamp_write(const struct bpf_prog *prog, 9181 const struct bpf_insn *si, 9182 struct bpf_insn *insn) 9183 { 9184 __u8 value_reg = si->src_reg; 9185 __u8 skb_reg = si->dst_reg; 9186 9187 #ifdef CONFIG_NET_CLS_ACT 9188 /* If the tstamp_type is read, 9189 * the bpf prog is aware the tstamp could have delivery time. 9190 * Thus, write skb->tstamp as is if tstamp_type_access is true. 9191 * Otherwise, writing at ingress will have to clear the 9192 * mono_delivery_time bit also. 9193 */ 9194 if (!prog->tstamp_type_access) { 9195 __u8 tmp_reg = BPF_REG_AX; 9196 9197 *insn++ = BPF_LDX_MEM(BPF_B, tmp_reg, skb_reg, PKT_VLAN_PRESENT_OFFSET); 9198 /* Writing __sk_buff->tstamp as ingress, goto <clear> */ 9199 *insn++ = BPF_JMP32_IMM(BPF_JSET, tmp_reg, TC_AT_INGRESS_MASK, 1); 9200 /* goto <store> */ 9201 *insn++ = BPF_JMP_A(2); 9202 /* <clear>: mono_delivery_time */ 9203 *insn++ = BPF_ALU32_IMM(BPF_AND, tmp_reg, ~SKB_MONO_DELIVERY_TIME_MASK); 9204 *insn++ = BPF_STX_MEM(BPF_B, skb_reg, tmp_reg, PKT_VLAN_PRESENT_OFFSET); 9205 } 9206 #endif 9207 9208 /* <store>: skb->tstamp = tstamp */ 9209 *insn++ = BPF_STX_MEM(BPF_DW, skb_reg, value_reg, 9210 offsetof(struct sk_buff, tstamp)); 9211 return insn; 9212 } 9213 9214 static u32 bpf_convert_ctx_access(enum bpf_access_type type, 9215 const struct bpf_insn *si, 9216 struct bpf_insn *insn_buf, 9217 struct bpf_prog *prog, u32 *target_size) 9218 { 9219 struct bpf_insn *insn = insn_buf; 9220 int off; 9221 9222 switch (si->off) { 9223 case offsetof(struct __sk_buff, len): 9224 *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg, 9225 bpf_target_off(struct sk_buff, len, 4, 9226 target_size)); 9227 break; 9228 9229 case offsetof(struct __sk_buff, protocol): 9230 *insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->src_reg, 9231 bpf_target_off(struct sk_buff, protocol, 2, 9232 target_size)); 9233 break; 9234 9235 case offsetof(struct __sk_buff, vlan_proto): 9236 *insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->src_reg, 9237 bpf_target_off(struct sk_buff, vlan_proto, 2, 9238 target_size)); 9239 break; 9240 9241 case offsetof(struct __sk_buff, priority): 9242 if (type == BPF_WRITE) 9243 *insn++ = BPF_STX_MEM(BPF_W, si->dst_reg, si->src_reg, 9244 bpf_target_off(struct sk_buff, priority, 4, 9245 target_size)); 9246 else 9247 *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg, 9248 bpf_target_off(struct sk_buff, priority, 4, 9249 target_size)); 9250 break; 9251 9252 case offsetof(struct __sk_buff, ingress_ifindex): 9253 *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg, 9254 bpf_target_off(struct sk_buff, skb_iif, 4, 9255 target_size)); 9256 break; 9257 9258 case offsetof(struct __sk_buff, ifindex): 9259 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, dev), 9260 si->dst_reg, si->src_reg, 9261 offsetof(struct sk_buff, dev)); 9262 *insn++ = BPF_JMP_IMM(BPF_JEQ, si->dst_reg, 0, 1); 9263 *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg, 9264 bpf_target_off(struct net_device, ifindex, 4, 9265 target_size)); 9266 break; 9267 9268 case offsetof(struct __sk_buff, hash): 9269 *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg, 9270 bpf_target_off(struct sk_buff, hash, 4, 9271 target_size)); 9272 break; 9273 9274 case offsetof(struct __sk_buff, mark): 9275 if (type == BPF_WRITE) 9276 *insn++ = BPF_STX_MEM(BPF_W, si->dst_reg, si->src_reg, 9277 bpf_target_off(struct sk_buff, mark, 4, 9278 target_size)); 9279 else 9280 *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg, 9281 bpf_target_off(struct sk_buff, mark, 4, 9282 target_size)); 9283 break; 9284 9285 case offsetof(struct __sk_buff, pkt_type): 9286 *target_size = 1; 9287 *insn++ = BPF_LDX_MEM(BPF_B, si->dst_reg, si->src_reg, 9288 PKT_TYPE_OFFSET); 9289 *insn++ = BPF_ALU32_IMM(BPF_AND, si->dst_reg, PKT_TYPE_MAX); 9290 #ifdef __BIG_ENDIAN_BITFIELD 9291 *insn++ = BPF_ALU32_IMM(BPF_RSH, si->dst_reg, 5); 9292 #endif 9293 break; 9294 9295 case offsetof(struct __sk_buff, queue_mapping): 9296 if (type == BPF_WRITE) { 9297 *insn++ = BPF_JMP_IMM(BPF_JGE, si->src_reg, NO_QUEUE_MAPPING, 1); 9298 *insn++ = BPF_STX_MEM(BPF_H, si->dst_reg, si->src_reg, 9299 bpf_target_off(struct sk_buff, 9300 queue_mapping, 9301 2, target_size)); 9302 } else { 9303 *insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->src_reg, 9304 bpf_target_off(struct sk_buff, 9305 queue_mapping, 9306 2, target_size)); 9307 } 9308 break; 9309 9310 case offsetof(struct __sk_buff, vlan_present): 9311 *target_size = 1; 9312 *insn++ = BPF_LDX_MEM(BPF_B, si->dst_reg, si->src_reg, 9313 PKT_VLAN_PRESENT_OFFSET); 9314 if (PKT_VLAN_PRESENT_BIT) 9315 *insn++ = BPF_ALU32_IMM(BPF_RSH, si->dst_reg, PKT_VLAN_PRESENT_BIT); 9316 if (PKT_VLAN_PRESENT_BIT < 7) 9317 *insn++ = BPF_ALU32_IMM(BPF_AND, si->dst_reg, 1); 9318 break; 9319 9320 case offsetof(struct __sk_buff, vlan_tci): 9321 *insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->src_reg, 9322 bpf_target_off(struct sk_buff, vlan_tci, 2, 9323 target_size)); 9324 break; 9325 9326 case offsetof(struct __sk_buff, cb[0]) ... 9327 offsetofend(struct __sk_buff, cb[4]) - 1: 9328 BUILD_BUG_ON(sizeof_field(struct qdisc_skb_cb, data) < 20); 9329 BUILD_BUG_ON((offsetof(struct sk_buff, cb) + 9330 offsetof(struct qdisc_skb_cb, data)) % 9331 sizeof(__u64)); 9332 9333 prog->cb_access = 1; 9334 off = si->off; 9335 off -= offsetof(struct __sk_buff, cb[0]); 9336 off += offsetof(struct sk_buff, cb); 9337 off += offsetof(struct qdisc_skb_cb, data); 9338 if (type == BPF_WRITE) 9339 *insn++ = BPF_STX_MEM(BPF_SIZE(si->code), si->dst_reg, 9340 si->src_reg, off); 9341 else 9342 *insn++ = BPF_LDX_MEM(BPF_SIZE(si->code), si->dst_reg, 9343 si->src_reg, off); 9344 break; 9345 9346 case offsetof(struct __sk_buff, tc_classid): 9347 BUILD_BUG_ON(sizeof_field(struct qdisc_skb_cb, tc_classid) != 2); 9348 9349 off = si->off; 9350 off -= offsetof(struct __sk_buff, tc_classid); 9351 off += offsetof(struct sk_buff, cb); 9352 off += offsetof(struct qdisc_skb_cb, tc_classid); 9353 *target_size = 2; 9354 if (type == BPF_WRITE) 9355 *insn++ = BPF_STX_MEM(BPF_H, si->dst_reg, 9356 si->src_reg, off); 9357 else 9358 *insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, 9359 si->src_reg, off); 9360 break; 9361 9362 case offsetof(struct __sk_buff, data): 9363 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, data), 9364 si->dst_reg, si->src_reg, 9365 offsetof(struct sk_buff, data)); 9366 break; 9367 9368 case offsetof(struct __sk_buff, data_meta): 9369 off = si->off; 9370 off -= offsetof(struct __sk_buff, data_meta); 9371 off += offsetof(struct sk_buff, cb); 9372 off += offsetof(struct bpf_skb_data_end, data_meta); 9373 *insn++ = BPF_LDX_MEM(BPF_SIZEOF(void *), si->dst_reg, 9374 si->src_reg, off); 9375 break; 9376 9377 case offsetof(struct __sk_buff, data_end): 9378 off = si->off; 9379 off -= offsetof(struct __sk_buff, data_end); 9380 off += offsetof(struct sk_buff, cb); 9381 off += offsetof(struct bpf_skb_data_end, data_end); 9382 *insn++ = BPF_LDX_MEM(BPF_SIZEOF(void *), si->dst_reg, 9383 si->src_reg, off); 9384 break; 9385 9386 case offsetof(struct __sk_buff, tc_index): 9387 #ifdef CONFIG_NET_SCHED 9388 if (type == BPF_WRITE) 9389 *insn++ = BPF_STX_MEM(BPF_H, si->dst_reg, si->src_reg, 9390 bpf_target_off(struct sk_buff, tc_index, 2, 9391 target_size)); 9392 else 9393 *insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->src_reg, 9394 bpf_target_off(struct sk_buff, tc_index, 2, 9395 target_size)); 9396 #else 9397 *target_size = 2; 9398 if (type == BPF_WRITE) 9399 *insn++ = BPF_MOV64_REG(si->dst_reg, si->dst_reg); 9400 else 9401 *insn++ = BPF_MOV64_IMM(si->dst_reg, 0); 9402 #endif 9403 break; 9404 9405 case offsetof(struct __sk_buff, napi_id): 9406 #if defined(CONFIG_NET_RX_BUSY_POLL) 9407 *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg, 9408 bpf_target_off(struct sk_buff, napi_id, 4, 9409 target_size)); 9410 *insn++ = BPF_JMP_IMM(BPF_JGE, si->dst_reg, MIN_NAPI_ID, 1); 9411 *insn++ = BPF_MOV64_IMM(si->dst_reg, 0); 9412 #else 9413 *target_size = 4; 9414 *insn++ = BPF_MOV64_IMM(si->dst_reg, 0); 9415 #endif 9416 break; 9417 case offsetof(struct __sk_buff, family): 9418 BUILD_BUG_ON(sizeof_field(struct sock_common, skc_family) != 2); 9419 9420 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, sk), 9421 si->dst_reg, si->src_reg, 9422 offsetof(struct sk_buff, sk)); 9423 *insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->dst_reg, 9424 bpf_target_off(struct sock_common, 9425 skc_family, 9426 2, target_size)); 9427 break; 9428 case offsetof(struct __sk_buff, remote_ip4): 9429 BUILD_BUG_ON(sizeof_field(struct sock_common, skc_daddr) != 4); 9430 9431 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, sk), 9432 si->dst_reg, si->src_reg, 9433 offsetof(struct sk_buff, sk)); 9434 *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg, 9435 bpf_target_off(struct sock_common, 9436 skc_daddr, 9437 4, target_size)); 9438 break; 9439 case offsetof(struct __sk_buff, local_ip4): 9440 BUILD_BUG_ON(sizeof_field(struct sock_common, 9441 skc_rcv_saddr) != 4); 9442 9443 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, sk), 9444 si->dst_reg, si->src_reg, 9445 offsetof(struct sk_buff, sk)); 9446 *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg, 9447 bpf_target_off(struct sock_common, 9448 skc_rcv_saddr, 9449 4, target_size)); 9450 break; 9451 case offsetof(struct __sk_buff, remote_ip6[0]) ... 9452 offsetof(struct __sk_buff, remote_ip6[3]): 9453 #if IS_ENABLED(CONFIG_IPV6) 9454 BUILD_BUG_ON(sizeof_field(struct sock_common, 9455 skc_v6_daddr.s6_addr32[0]) != 4); 9456 9457 off = si->off; 9458 off -= offsetof(struct __sk_buff, remote_ip6[0]); 9459 9460 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, sk), 9461 si->dst_reg, si->src_reg, 9462 offsetof(struct sk_buff, sk)); 9463 *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg, 9464 offsetof(struct sock_common, 9465 skc_v6_daddr.s6_addr32[0]) + 9466 off); 9467 #else 9468 *insn++ = BPF_MOV32_IMM(si->dst_reg, 0); 9469 #endif 9470 break; 9471 case offsetof(struct __sk_buff, local_ip6[0]) ... 9472 offsetof(struct __sk_buff, local_ip6[3]): 9473 #if IS_ENABLED(CONFIG_IPV6) 9474 BUILD_BUG_ON(sizeof_field(struct sock_common, 9475 skc_v6_rcv_saddr.s6_addr32[0]) != 4); 9476 9477 off = si->off; 9478 off -= offsetof(struct __sk_buff, local_ip6[0]); 9479 9480 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, sk), 9481 si->dst_reg, si->src_reg, 9482 offsetof(struct sk_buff, sk)); 9483 *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg, 9484 offsetof(struct sock_common, 9485 skc_v6_rcv_saddr.s6_addr32[0]) + 9486 off); 9487 #else 9488 *insn++ = BPF_MOV32_IMM(si->dst_reg, 0); 9489 #endif 9490 break; 9491 9492 case offsetof(struct __sk_buff, remote_port): 9493 BUILD_BUG_ON(sizeof_field(struct sock_common, skc_dport) != 2); 9494 9495 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, sk), 9496 si->dst_reg, si->src_reg, 9497 offsetof(struct sk_buff, sk)); 9498 *insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->dst_reg, 9499 bpf_target_off(struct sock_common, 9500 skc_dport, 9501 2, target_size)); 9502 #ifndef __BIG_ENDIAN_BITFIELD 9503 *insn++ = BPF_ALU32_IMM(BPF_LSH, si->dst_reg, 16); 9504 #endif 9505 break; 9506 9507 case offsetof(struct __sk_buff, local_port): 9508 BUILD_BUG_ON(sizeof_field(struct sock_common, skc_num) != 2); 9509 9510 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, sk), 9511 si->dst_reg, si->src_reg, 9512 offsetof(struct sk_buff, sk)); 9513 *insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->dst_reg, 9514 bpf_target_off(struct sock_common, 9515 skc_num, 2, target_size)); 9516 break; 9517 9518 case offsetof(struct __sk_buff, tstamp): 9519 BUILD_BUG_ON(sizeof_field(struct sk_buff, tstamp) != 8); 9520 9521 if (type == BPF_WRITE) 9522 insn = bpf_convert_tstamp_write(prog, si, insn); 9523 else 9524 insn = bpf_convert_tstamp_read(prog, si, insn); 9525 break; 9526 9527 case offsetof(struct __sk_buff, tstamp_type): 9528 insn = bpf_convert_tstamp_type_read(si, insn); 9529 break; 9530 9531 case offsetof(struct __sk_buff, gso_segs): 9532 insn = bpf_convert_shinfo_access(si, insn); 9533 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct skb_shared_info, gso_segs), 9534 si->dst_reg, si->dst_reg, 9535 bpf_target_off(struct skb_shared_info, 9536 gso_segs, 2, 9537 target_size)); 9538 break; 9539 case offsetof(struct __sk_buff, gso_size): 9540 insn = bpf_convert_shinfo_access(si, insn); 9541 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct skb_shared_info, gso_size), 9542 si->dst_reg, si->dst_reg, 9543 bpf_target_off(struct skb_shared_info, 9544 gso_size, 2, 9545 target_size)); 9546 break; 9547 case offsetof(struct __sk_buff, wire_len): 9548 BUILD_BUG_ON(sizeof_field(struct qdisc_skb_cb, pkt_len) != 4); 9549 9550 off = si->off; 9551 off -= offsetof(struct __sk_buff, wire_len); 9552 off += offsetof(struct sk_buff, cb); 9553 off += offsetof(struct qdisc_skb_cb, pkt_len); 9554 *target_size = 4; 9555 *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg, off); 9556 break; 9557 9558 case offsetof(struct __sk_buff, sk): 9559 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, sk), 9560 si->dst_reg, si->src_reg, 9561 offsetof(struct sk_buff, sk)); 9562 break; 9563 case offsetof(struct __sk_buff, hwtstamp): 9564 BUILD_BUG_ON(sizeof_field(struct skb_shared_hwtstamps, hwtstamp) != 8); 9565 BUILD_BUG_ON(offsetof(struct skb_shared_hwtstamps, hwtstamp) != 0); 9566 9567 insn = bpf_convert_shinfo_access(si, insn); 9568 *insn++ = BPF_LDX_MEM(BPF_DW, 9569 si->dst_reg, si->dst_reg, 9570 bpf_target_off(struct skb_shared_info, 9571 hwtstamps, 8, 9572 target_size)); 9573 break; 9574 } 9575 9576 return insn - insn_buf; 9577 } 9578 9579 u32 bpf_sock_convert_ctx_access(enum bpf_access_type type, 9580 const struct bpf_insn *si, 9581 struct bpf_insn *insn_buf, 9582 struct bpf_prog *prog, u32 *target_size) 9583 { 9584 struct bpf_insn *insn = insn_buf; 9585 int off; 9586 9587 switch (si->off) { 9588 case offsetof(struct bpf_sock, bound_dev_if): 9589 BUILD_BUG_ON(sizeof_field(struct sock, sk_bound_dev_if) != 4); 9590 9591 if (type == BPF_WRITE) 9592 *insn++ = BPF_STX_MEM(BPF_W, si->dst_reg, si->src_reg, 9593 offsetof(struct sock, sk_bound_dev_if)); 9594 else 9595 *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg, 9596 offsetof(struct sock, sk_bound_dev_if)); 9597 break; 9598 9599 case offsetof(struct bpf_sock, mark): 9600 BUILD_BUG_ON(sizeof_field(struct sock, sk_mark) != 4); 9601 9602 if (type == BPF_WRITE) 9603 *insn++ = BPF_STX_MEM(BPF_W, si->dst_reg, si->src_reg, 9604 offsetof(struct sock, sk_mark)); 9605 else 9606 *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg, 9607 offsetof(struct sock, sk_mark)); 9608 break; 9609 9610 case offsetof(struct bpf_sock, priority): 9611 BUILD_BUG_ON(sizeof_field(struct sock, sk_priority) != 4); 9612 9613 if (type == BPF_WRITE) 9614 *insn++ = BPF_STX_MEM(BPF_W, si->dst_reg, si->src_reg, 9615 offsetof(struct sock, sk_priority)); 9616 else 9617 *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg, 9618 offsetof(struct sock, sk_priority)); 9619 break; 9620 9621 case offsetof(struct bpf_sock, family): 9622 *insn++ = BPF_LDX_MEM( 9623 BPF_FIELD_SIZEOF(struct sock_common, skc_family), 9624 si->dst_reg, si->src_reg, 9625 bpf_target_off(struct sock_common, 9626 skc_family, 9627 sizeof_field(struct sock_common, 9628 skc_family), 9629 target_size)); 9630 break; 9631 9632 case offsetof(struct bpf_sock, type): 9633 *insn++ = BPF_LDX_MEM( 9634 BPF_FIELD_SIZEOF(struct sock, sk_type), 9635 si->dst_reg, si->src_reg, 9636 bpf_target_off(struct sock, sk_type, 9637 sizeof_field(struct sock, sk_type), 9638 target_size)); 9639 break; 9640 9641 case offsetof(struct bpf_sock, protocol): 9642 *insn++ = BPF_LDX_MEM( 9643 BPF_FIELD_SIZEOF(struct sock, sk_protocol), 9644 si->dst_reg, si->src_reg, 9645 bpf_target_off(struct sock, sk_protocol, 9646 sizeof_field(struct sock, sk_protocol), 9647 target_size)); 9648 break; 9649 9650 case offsetof(struct bpf_sock, src_ip4): 9651 *insn++ = BPF_LDX_MEM( 9652 BPF_SIZE(si->code), si->dst_reg, si->src_reg, 9653 bpf_target_off(struct sock_common, skc_rcv_saddr, 9654 sizeof_field(struct sock_common, 9655 skc_rcv_saddr), 9656 target_size)); 9657 break; 9658 9659 case offsetof(struct bpf_sock, dst_ip4): 9660 *insn++ = BPF_LDX_MEM( 9661 BPF_SIZE(si->code), si->dst_reg, si->src_reg, 9662 bpf_target_off(struct sock_common, skc_daddr, 9663 sizeof_field(struct sock_common, 9664 skc_daddr), 9665 target_size)); 9666 break; 9667 9668 case bpf_ctx_range_till(struct bpf_sock, src_ip6[0], src_ip6[3]): 9669 #if IS_ENABLED(CONFIG_IPV6) 9670 off = si->off; 9671 off -= offsetof(struct bpf_sock, src_ip6[0]); 9672 *insn++ = BPF_LDX_MEM( 9673 BPF_SIZE(si->code), si->dst_reg, si->src_reg, 9674 bpf_target_off( 9675 struct sock_common, 9676 skc_v6_rcv_saddr.s6_addr32[0], 9677 sizeof_field(struct sock_common, 9678 skc_v6_rcv_saddr.s6_addr32[0]), 9679 target_size) + off); 9680 #else 9681 (void)off; 9682 *insn++ = BPF_MOV32_IMM(si->dst_reg, 0); 9683 #endif 9684 break; 9685 9686 case bpf_ctx_range_till(struct bpf_sock, dst_ip6[0], dst_ip6[3]): 9687 #if IS_ENABLED(CONFIG_IPV6) 9688 off = si->off; 9689 off -= offsetof(struct bpf_sock, dst_ip6[0]); 9690 *insn++ = BPF_LDX_MEM( 9691 BPF_SIZE(si->code), si->dst_reg, si->src_reg, 9692 bpf_target_off(struct sock_common, 9693 skc_v6_daddr.s6_addr32[0], 9694 sizeof_field(struct sock_common, 9695 skc_v6_daddr.s6_addr32[0]), 9696 target_size) + off); 9697 #else 9698 *insn++ = BPF_MOV32_IMM(si->dst_reg, 0); 9699 *target_size = 4; 9700 #endif 9701 break; 9702 9703 case offsetof(struct bpf_sock, src_port): 9704 *insn++ = BPF_LDX_MEM( 9705 BPF_FIELD_SIZEOF(struct sock_common, skc_num), 9706 si->dst_reg, si->src_reg, 9707 bpf_target_off(struct sock_common, skc_num, 9708 sizeof_field(struct sock_common, 9709 skc_num), 9710 target_size)); 9711 break; 9712 9713 case offsetof(struct bpf_sock, dst_port): 9714 *insn++ = BPF_LDX_MEM( 9715 BPF_FIELD_SIZEOF(struct sock_common, skc_dport), 9716 si->dst_reg, si->src_reg, 9717 bpf_target_off(struct sock_common, skc_dport, 9718 sizeof_field(struct sock_common, 9719 skc_dport), 9720 target_size)); 9721 break; 9722 9723 case offsetof(struct bpf_sock, state): 9724 *insn++ = BPF_LDX_MEM( 9725 BPF_FIELD_SIZEOF(struct sock_common, skc_state), 9726 si->dst_reg, si->src_reg, 9727 bpf_target_off(struct sock_common, skc_state, 9728 sizeof_field(struct sock_common, 9729 skc_state), 9730 target_size)); 9731 break; 9732 case offsetof(struct bpf_sock, rx_queue_mapping): 9733 #ifdef CONFIG_SOCK_RX_QUEUE_MAPPING 9734 *insn++ = BPF_LDX_MEM( 9735 BPF_FIELD_SIZEOF(struct sock, sk_rx_queue_mapping), 9736 si->dst_reg, si->src_reg, 9737 bpf_target_off(struct sock, sk_rx_queue_mapping, 9738 sizeof_field(struct sock, 9739 sk_rx_queue_mapping), 9740 target_size)); 9741 *insn++ = BPF_JMP_IMM(BPF_JNE, si->dst_reg, NO_QUEUE_MAPPING, 9742 1); 9743 *insn++ = BPF_MOV64_IMM(si->dst_reg, -1); 9744 #else 9745 *insn++ = BPF_MOV64_IMM(si->dst_reg, -1); 9746 *target_size = 2; 9747 #endif 9748 break; 9749 } 9750 9751 return insn - insn_buf; 9752 } 9753 9754 static u32 tc_cls_act_convert_ctx_access(enum bpf_access_type type, 9755 const struct bpf_insn *si, 9756 struct bpf_insn *insn_buf, 9757 struct bpf_prog *prog, u32 *target_size) 9758 { 9759 struct bpf_insn *insn = insn_buf; 9760 9761 switch (si->off) { 9762 case offsetof(struct __sk_buff, ifindex): 9763 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, dev), 9764 si->dst_reg, si->src_reg, 9765 offsetof(struct sk_buff, dev)); 9766 *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg, 9767 bpf_target_off(struct net_device, ifindex, 4, 9768 target_size)); 9769 break; 9770 default: 9771 return bpf_convert_ctx_access(type, si, insn_buf, prog, 9772 target_size); 9773 } 9774 9775 return insn - insn_buf; 9776 } 9777 9778 static u32 xdp_convert_ctx_access(enum bpf_access_type type, 9779 const struct bpf_insn *si, 9780 struct bpf_insn *insn_buf, 9781 struct bpf_prog *prog, u32 *target_size) 9782 { 9783 struct bpf_insn *insn = insn_buf; 9784 9785 switch (si->off) { 9786 case offsetof(struct xdp_md, data): 9787 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct xdp_buff, data), 9788 si->dst_reg, si->src_reg, 9789 offsetof(struct xdp_buff, data)); 9790 break; 9791 case offsetof(struct xdp_md, data_meta): 9792 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct xdp_buff, data_meta), 9793 si->dst_reg, si->src_reg, 9794 offsetof(struct xdp_buff, data_meta)); 9795 break; 9796 case offsetof(struct xdp_md, data_end): 9797 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct xdp_buff, data_end), 9798 si->dst_reg, si->src_reg, 9799 offsetof(struct xdp_buff, data_end)); 9800 break; 9801 case offsetof(struct xdp_md, ingress_ifindex): 9802 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct xdp_buff, rxq), 9803 si->dst_reg, si->src_reg, 9804 offsetof(struct xdp_buff, rxq)); 9805 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct xdp_rxq_info, dev), 9806 si->dst_reg, si->dst_reg, 9807 offsetof(struct xdp_rxq_info, dev)); 9808 *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg, 9809 offsetof(struct net_device, ifindex)); 9810 break; 9811 case offsetof(struct xdp_md, rx_queue_index): 9812 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct xdp_buff, rxq), 9813 si->dst_reg, si->src_reg, 9814 offsetof(struct xdp_buff, rxq)); 9815 *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg, 9816 offsetof(struct xdp_rxq_info, 9817 queue_index)); 9818 break; 9819 case offsetof(struct xdp_md, egress_ifindex): 9820 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct xdp_buff, txq), 9821 si->dst_reg, si->src_reg, 9822 offsetof(struct xdp_buff, txq)); 9823 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct xdp_txq_info, dev), 9824 si->dst_reg, si->dst_reg, 9825 offsetof(struct xdp_txq_info, dev)); 9826 *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg, 9827 offsetof(struct net_device, ifindex)); 9828 break; 9829 } 9830 9831 return insn - insn_buf; 9832 } 9833 9834 /* SOCK_ADDR_LOAD_NESTED_FIELD() loads Nested Field S.F.NF where S is type of 9835 * context Structure, F is Field in context structure that contains a pointer 9836 * to Nested Structure of type NS that has the field NF. 9837 * 9838 * SIZE encodes the load size (BPF_B, BPF_H, etc). It's up to caller to make 9839 * sure that SIZE is not greater than actual size of S.F.NF. 9840 * 9841 * If offset OFF is provided, the load happens from that offset relative to 9842 * offset of NF. 9843 */ 9844 #define SOCK_ADDR_LOAD_NESTED_FIELD_SIZE_OFF(S, NS, F, NF, SIZE, OFF) \ 9845 do { \ 9846 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(S, F), si->dst_reg, \ 9847 si->src_reg, offsetof(S, F)); \ 9848 *insn++ = BPF_LDX_MEM( \ 9849 SIZE, si->dst_reg, si->dst_reg, \ 9850 bpf_target_off(NS, NF, sizeof_field(NS, NF), \ 9851 target_size) \ 9852 + OFF); \ 9853 } while (0) 9854 9855 #define SOCK_ADDR_LOAD_NESTED_FIELD(S, NS, F, NF) \ 9856 SOCK_ADDR_LOAD_NESTED_FIELD_SIZE_OFF(S, NS, F, NF, \ 9857 BPF_FIELD_SIZEOF(NS, NF), 0) 9858 9859 /* SOCK_ADDR_STORE_NESTED_FIELD_OFF() has semantic similar to 9860 * SOCK_ADDR_LOAD_NESTED_FIELD_SIZE_OFF() but for store operation. 9861 * 9862 * In addition it uses Temporary Field TF (member of struct S) as the 3rd 9863 * "register" since two registers available in convert_ctx_access are not 9864 * enough: we can't override neither SRC, since it contains value to store, nor 9865 * DST since it contains pointer to context that may be used by later 9866 * instructions. But we need a temporary place to save pointer to nested 9867 * structure whose field we want to store to. 9868 */ 9869 #define SOCK_ADDR_STORE_NESTED_FIELD_OFF(S, NS, F, NF, SIZE, OFF, TF) \ 9870 do { \ 9871 int tmp_reg = BPF_REG_9; \ 9872 if (si->src_reg == tmp_reg || si->dst_reg == tmp_reg) \ 9873 --tmp_reg; \ 9874 if (si->src_reg == tmp_reg || si->dst_reg == tmp_reg) \ 9875 --tmp_reg; \ 9876 *insn++ = BPF_STX_MEM(BPF_DW, si->dst_reg, tmp_reg, \ 9877 offsetof(S, TF)); \ 9878 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(S, F), tmp_reg, \ 9879 si->dst_reg, offsetof(S, F)); \ 9880 *insn++ = BPF_STX_MEM(SIZE, tmp_reg, si->src_reg, \ 9881 bpf_target_off(NS, NF, sizeof_field(NS, NF), \ 9882 target_size) \ 9883 + OFF); \ 9884 *insn++ = BPF_LDX_MEM(BPF_DW, tmp_reg, si->dst_reg, \ 9885 offsetof(S, TF)); \ 9886 } while (0) 9887 9888 #define SOCK_ADDR_LOAD_OR_STORE_NESTED_FIELD_SIZE_OFF(S, NS, F, NF, SIZE, OFF, \ 9889 TF) \ 9890 do { \ 9891 if (type == BPF_WRITE) { \ 9892 SOCK_ADDR_STORE_NESTED_FIELD_OFF(S, NS, F, NF, SIZE, \ 9893 OFF, TF); \ 9894 } else { \ 9895 SOCK_ADDR_LOAD_NESTED_FIELD_SIZE_OFF( \ 9896 S, NS, F, NF, SIZE, OFF); \ 9897 } \ 9898 } while (0) 9899 9900 #define SOCK_ADDR_LOAD_OR_STORE_NESTED_FIELD(S, NS, F, NF, TF) \ 9901 SOCK_ADDR_LOAD_OR_STORE_NESTED_FIELD_SIZE_OFF( \ 9902 S, NS, F, NF, BPF_FIELD_SIZEOF(NS, NF), 0, TF) 9903 9904 static u32 sock_addr_convert_ctx_access(enum bpf_access_type type, 9905 const struct bpf_insn *si, 9906 struct bpf_insn *insn_buf, 9907 struct bpf_prog *prog, u32 *target_size) 9908 { 9909 int off, port_size = sizeof_field(struct sockaddr_in6, sin6_port); 9910 struct bpf_insn *insn = insn_buf; 9911 9912 switch (si->off) { 9913 case offsetof(struct bpf_sock_addr, user_family): 9914 SOCK_ADDR_LOAD_NESTED_FIELD(struct bpf_sock_addr_kern, 9915 struct sockaddr, uaddr, sa_family); 9916 break; 9917 9918 case offsetof(struct bpf_sock_addr, user_ip4): 9919 SOCK_ADDR_LOAD_OR_STORE_NESTED_FIELD_SIZE_OFF( 9920 struct bpf_sock_addr_kern, struct sockaddr_in, uaddr, 9921 sin_addr, BPF_SIZE(si->code), 0, tmp_reg); 9922 break; 9923 9924 case bpf_ctx_range_till(struct bpf_sock_addr, user_ip6[0], user_ip6[3]): 9925 off = si->off; 9926 off -= offsetof(struct bpf_sock_addr, user_ip6[0]); 9927 SOCK_ADDR_LOAD_OR_STORE_NESTED_FIELD_SIZE_OFF( 9928 struct bpf_sock_addr_kern, struct sockaddr_in6, uaddr, 9929 sin6_addr.s6_addr32[0], BPF_SIZE(si->code), off, 9930 tmp_reg); 9931 break; 9932 9933 case offsetof(struct bpf_sock_addr, user_port): 9934 /* To get port we need to know sa_family first and then treat 9935 * sockaddr as either sockaddr_in or sockaddr_in6. 9936 * Though we can simplify since port field has same offset and 9937 * size in both structures. 9938 * Here we check this invariant and use just one of the 9939 * structures if it's true. 9940 */ 9941 BUILD_BUG_ON(offsetof(struct sockaddr_in, sin_port) != 9942 offsetof(struct sockaddr_in6, sin6_port)); 9943 BUILD_BUG_ON(sizeof_field(struct sockaddr_in, sin_port) != 9944 sizeof_field(struct sockaddr_in6, sin6_port)); 9945 /* Account for sin6_port being smaller than user_port. */ 9946 port_size = min(port_size, BPF_LDST_BYTES(si)); 9947 SOCK_ADDR_LOAD_OR_STORE_NESTED_FIELD_SIZE_OFF( 9948 struct bpf_sock_addr_kern, struct sockaddr_in6, uaddr, 9949 sin6_port, bytes_to_bpf_size(port_size), 0, tmp_reg); 9950 break; 9951 9952 case offsetof(struct bpf_sock_addr, family): 9953 SOCK_ADDR_LOAD_NESTED_FIELD(struct bpf_sock_addr_kern, 9954 struct sock, sk, sk_family); 9955 break; 9956 9957 case offsetof(struct bpf_sock_addr, type): 9958 SOCK_ADDR_LOAD_NESTED_FIELD(struct bpf_sock_addr_kern, 9959 struct sock, sk, sk_type); 9960 break; 9961 9962 case offsetof(struct bpf_sock_addr, protocol): 9963 SOCK_ADDR_LOAD_NESTED_FIELD(struct bpf_sock_addr_kern, 9964 struct sock, sk, sk_protocol); 9965 break; 9966 9967 case offsetof(struct bpf_sock_addr, msg_src_ip4): 9968 /* Treat t_ctx as struct in_addr for msg_src_ip4. */ 9969 SOCK_ADDR_LOAD_OR_STORE_NESTED_FIELD_SIZE_OFF( 9970 struct bpf_sock_addr_kern, struct in_addr, t_ctx, 9971 s_addr, BPF_SIZE(si->code), 0, tmp_reg); 9972 break; 9973 9974 case bpf_ctx_range_till(struct bpf_sock_addr, msg_src_ip6[0], 9975 msg_src_ip6[3]): 9976 off = si->off; 9977 off -= offsetof(struct bpf_sock_addr, msg_src_ip6[0]); 9978 /* Treat t_ctx as struct in6_addr for msg_src_ip6. */ 9979 SOCK_ADDR_LOAD_OR_STORE_NESTED_FIELD_SIZE_OFF( 9980 struct bpf_sock_addr_kern, struct in6_addr, t_ctx, 9981 s6_addr32[0], BPF_SIZE(si->code), off, tmp_reg); 9982 break; 9983 case offsetof(struct bpf_sock_addr, sk): 9984 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct bpf_sock_addr_kern, sk), 9985 si->dst_reg, si->src_reg, 9986 offsetof(struct bpf_sock_addr_kern, sk)); 9987 break; 9988 } 9989 9990 return insn - insn_buf; 9991 } 9992 9993 static u32 sock_ops_convert_ctx_access(enum bpf_access_type type, 9994 const struct bpf_insn *si, 9995 struct bpf_insn *insn_buf, 9996 struct bpf_prog *prog, 9997 u32 *target_size) 9998 { 9999 struct bpf_insn *insn = insn_buf; 10000 int off; 10001 10002 /* Helper macro for adding read access to tcp_sock or sock fields. */ 10003 #define SOCK_OPS_GET_FIELD(BPF_FIELD, OBJ_FIELD, OBJ) \ 10004 do { \ 10005 int fullsock_reg = si->dst_reg, reg = BPF_REG_9, jmp = 2; \ 10006 BUILD_BUG_ON(sizeof_field(OBJ, OBJ_FIELD) > \ 10007 sizeof_field(struct bpf_sock_ops, BPF_FIELD)); \ 10008 if (si->dst_reg == reg || si->src_reg == reg) \ 10009 reg--; \ 10010 if (si->dst_reg == reg || si->src_reg == reg) \ 10011 reg--; \ 10012 if (si->dst_reg == si->src_reg) { \ 10013 *insn++ = BPF_STX_MEM(BPF_DW, si->src_reg, reg, \ 10014 offsetof(struct bpf_sock_ops_kern, \ 10015 temp)); \ 10016 fullsock_reg = reg; \ 10017 jmp += 2; \ 10018 } \ 10019 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF( \ 10020 struct bpf_sock_ops_kern, \ 10021 is_fullsock), \ 10022 fullsock_reg, si->src_reg, \ 10023 offsetof(struct bpf_sock_ops_kern, \ 10024 is_fullsock)); \ 10025 *insn++ = BPF_JMP_IMM(BPF_JEQ, fullsock_reg, 0, jmp); \ 10026 if (si->dst_reg == si->src_reg) \ 10027 *insn++ = BPF_LDX_MEM(BPF_DW, reg, si->src_reg, \ 10028 offsetof(struct bpf_sock_ops_kern, \ 10029 temp)); \ 10030 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF( \ 10031 struct bpf_sock_ops_kern, sk),\ 10032 si->dst_reg, si->src_reg, \ 10033 offsetof(struct bpf_sock_ops_kern, sk));\ 10034 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(OBJ, \ 10035 OBJ_FIELD), \ 10036 si->dst_reg, si->dst_reg, \ 10037 offsetof(OBJ, OBJ_FIELD)); \ 10038 if (si->dst_reg == si->src_reg) { \ 10039 *insn++ = BPF_JMP_A(1); \ 10040 *insn++ = BPF_LDX_MEM(BPF_DW, reg, si->src_reg, \ 10041 offsetof(struct bpf_sock_ops_kern, \ 10042 temp)); \ 10043 } \ 10044 } while (0) 10045 10046 #define SOCK_OPS_GET_SK() \ 10047 do { \ 10048 int fullsock_reg = si->dst_reg, reg = BPF_REG_9, jmp = 1; \ 10049 if (si->dst_reg == reg || si->src_reg == reg) \ 10050 reg--; \ 10051 if (si->dst_reg == reg || si->src_reg == reg) \ 10052 reg--; \ 10053 if (si->dst_reg == si->src_reg) { \ 10054 *insn++ = BPF_STX_MEM(BPF_DW, si->src_reg, reg, \ 10055 offsetof(struct bpf_sock_ops_kern, \ 10056 temp)); \ 10057 fullsock_reg = reg; \ 10058 jmp += 2; \ 10059 } \ 10060 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF( \ 10061 struct bpf_sock_ops_kern, \ 10062 is_fullsock), \ 10063 fullsock_reg, si->src_reg, \ 10064 offsetof(struct bpf_sock_ops_kern, \ 10065 is_fullsock)); \ 10066 *insn++ = BPF_JMP_IMM(BPF_JEQ, fullsock_reg, 0, jmp); \ 10067 if (si->dst_reg == si->src_reg) \ 10068 *insn++ = BPF_LDX_MEM(BPF_DW, reg, si->src_reg, \ 10069 offsetof(struct bpf_sock_ops_kern, \ 10070 temp)); \ 10071 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF( \ 10072 struct bpf_sock_ops_kern, sk),\ 10073 si->dst_reg, si->src_reg, \ 10074 offsetof(struct bpf_sock_ops_kern, sk));\ 10075 if (si->dst_reg == si->src_reg) { \ 10076 *insn++ = BPF_JMP_A(1); \ 10077 *insn++ = BPF_LDX_MEM(BPF_DW, reg, si->src_reg, \ 10078 offsetof(struct bpf_sock_ops_kern, \ 10079 temp)); \ 10080 } \ 10081 } while (0) 10082 10083 #define SOCK_OPS_GET_TCP_SOCK_FIELD(FIELD) \ 10084 SOCK_OPS_GET_FIELD(FIELD, FIELD, struct tcp_sock) 10085 10086 /* Helper macro for adding write access to tcp_sock or sock fields. 10087 * The macro is called with two registers, dst_reg which contains a pointer 10088 * to ctx (context) and src_reg which contains the value that should be 10089 * stored. However, we need an additional register since we cannot overwrite 10090 * dst_reg because it may be used later in the program. 10091 * Instead we "borrow" one of the other register. We first save its value 10092 * into a new (temp) field in bpf_sock_ops_kern, use it, and then restore 10093 * it at the end of the macro. 10094 */ 10095 #define SOCK_OPS_SET_FIELD(BPF_FIELD, OBJ_FIELD, OBJ) \ 10096 do { \ 10097 int reg = BPF_REG_9; \ 10098 BUILD_BUG_ON(sizeof_field(OBJ, OBJ_FIELD) > \ 10099 sizeof_field(struct bpf_sock_ops, BPF_FIELD)); \ 10100 if (si->dst_reg == reg || si->src_reg == reg) \ 10101 reg--; \ 10102 if (si->dst_reg == reg || si->src_reg == reg) \ 10103 reg--; \ 10104 *insn++ = BPF_STX_MEM(BPF_DW, si->dst_reg, reg, \ 10105 offsetof(struct bpf_sock_ops_kern, \ 10106 temp)); \ 10107 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF( \ 10108 struct bpf_sock_ops_kern, \ 10109 is_fullsock), \ 10110 reg, si->dst_reg, \ 10111 offsetof(struct bpf_sock_ops_kern, \ 10112 is_fullsock)); \ 10113 *insn++ = BPF_JMP_IMM(BPF_JEQ, reg, 0, 2); \ 10114 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF( \ 10115 struct bpf_sock_ops_kern, sk),\ 10116 reg, si->dst_reg, \ 10117 offsetof(struct bpf_sock_ops_kern, sk));\ 10118 *insn++ = BPF_STX_MEM(BPF_FIELD_SIZEOF(OBJ, OBJ_FIELD), \ 10119 reg, si->src_reg, \ 10120 offsetof(OBJ, OBJ_FIELD)); \ 10121 *insn++ = BPF_LDX_MEM(BPF_DW, reg, si->dst_reg, \ 10122 offsetof(struct bpf_sock_ops_kern, \ 10123 temp)); \ 10124 } while (0) 10125 10126 #define SOCK_OPS_GET_OR_SET_FIELD(BPF_FIELD, OBJ_FIELD, OBJ, TYPE) \ 10127 do { \ 10128 if (TYPE == BPF_WRITE) \ 10129 SOCK_OPS_SET_FIELD(BPF_FIELD, OBJ_FIELD, OBJ); \ 10130 else \ 10131 SOCK_OPS_GET_FIELD(BPF_FIELD, OBJ_FIELD, OBJ); \ 10132 } while (0) 10133 10134 if (insn > insn_buf) 10135 return insn - insn_buf; 10136 10137 switch (si->off) { 10138 case offsetof(struct bpf_sock_ops, op): 10139 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct bpf_sock_ops_kern, 10140 op), 10141 si->dst_reg, si->src_reg, 10142 offsetof(struct bpf_sock_ops_kern, op)); 10143 break; 10144 10145 case offsetof(struct bpf_sock_ops, replylong[0]) ... 10146 offsetof(struct bpf_sock_ops, replylong[3]): 10147 BUILD_BUG_ON(sizeof_field(struct bpf_sock_ops, reply) != 10148 sizeof_field(struct bpf_sock_ops_kern, reply)); 10149 BUILD_BUG_ON(sizeof_field(struct bpf_sock_ops, replylong) != 10150 sizeof_field(struct bpf_sock_ops_kern, replylong)); 10151 off = si->off; 10152 off -= offsetof(struct bpf_sock_ops, replylong[0]); 10153 off += offsetof(struct bpf_sock_ops_kern, replylong[0]); 10154 if (type == BPF_WRITE) 10155 *insn++ = BPF_STX_MEM(BPF_W, si->dst_reg, si->src_reg, 10156 off); 10157 else 10158 *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg, 10159 off); 10160 break; 10161 10162 case offsetof(struct bpf_sock_ops, family): 10163 BUILD_BUG_ON(sizeof_field(struct sock_common, skc_family) != 2); 10164 10165 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF( 10166 struct bpf_sock_ops_kern, sk), 10167 si->dst_reg, si->src_reg, 10168 offsetof(struct bpf_sock_ops_kern, sk)); 10169 *insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->dst_reg, 10170 offsetof(struct sock_common, skc_family)); 10171 break; 10172 10173 case offsetof(struct bpf_sock_ops, remote_ip4): 10174 BUILD_BUG_ON(sizeof_field(struct sock_common, skc_daddr) != 4); 10175 10176 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF( 10177 struct bpf_sock_ops_kern, sk), 10178 si->dst_reg, si->src_reg, 10179 offsetof(struct bpf_sock_ops_kern, sk)); 10180 *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg, 10181 offsetof(struct sock_common, skc_daddr)); 10182 break; 10183 10184 case offsetof(struct bpf_sock_ops, local_ip4): 10185 BUILD_BUG_ON(sizeof_field(struct sock_common, 10186 skc_rcv_saddr) != 4); 10187 10188 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF( 10189 struct bpf_sock_ops_kern, sk), 10190 si->dst_reg, si->src_reg, 10191 offsetof(struct bpf_sock_ops_kern, sk)); 10192 *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg, 10193 offsetof(struct sock_common, 10194 skc_rcv_saddr)); 10195 break; 10196 10197 case offsetof(struct bpf_sock_ops, remote_ip6[0]) ... 10198 offsetof(struct bpf_sock_ops, remote_ip6[3]): 10199 #if IS_ENABLED(CONFIG_IPV6) 10200 BUILD_BUG_ON(sizeof_field(struct sock_common, 10201 skc_v6_daddr.s6_addr32[0]) != 4); 10202 10203 off = si->off; 10204 off -= offsetof(struct bpf_sock_ops, remote_ip6[0]); 10205 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF( 10206 struct bpf_sock_ops_kern, sk), 10207 si->dst_reg, si->src_reg, 10208 offsetof(struct bpf_sock_ops_kern, sk)); 10209 *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg, 10210 offsetof(struct sock_common, 10211 skc_v6_daddr.s6_addr32[0]) + 10212 off); 10213 #else 10214 *insn++ = BPF_MOV32_IMM(si->dst_reg, 0); 10215 #endif 10216 break; 10217 10218 case offsetof(struct bpf_sock_ops, local_ip6[0]) ... 10219 offsetof(struct bpf_sock_ops, local_ip6[3]): 10220 #if IS_ENABLED(CONFIG_IPV6) 10221 BUILD_BUG_ON(sizeof_field(struct sock_common, 10222 skc_v6_rcv_saddr.s6_addr32[0]) != 4); 10223 10224 off = si->off; 10225 off -= offsetof(struct bpf_sock_ops, local_ip6[0]); 10226 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF( 10227 struct bpf_sock_ops_kern, sk), 10228 si->dst_reg, si->src_reg, 10229 offsetof(struct bpf_sock_ops_kern, sk)); 10230 *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg, 10231 offsetof(struct sock_common, 10232 skc_v6_rcv_saddr.s6_addr32[0]) + 10233 off); 10234 #else 10235 *insn++ = BPF_MOV32_IMM(si->dst_reg, 0); 10236 #endif 10237 break; 10238 10239 case offsetof(struct bpf_sock_ops, remote_port): 10240 BUILD_BUG_ON(sizeof_field(struct sock_common, skc_dport) != 2); 10241 10242 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF( 10243 struct bpf_sock_ops_kern, sk), 10244 si->dst_reg, si->src_reg, 10245 offsetof(struct bpf_sock_ops_kern, sk)); 10246 *insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->dst_reg, 10247 offsetof(struct sock_common, skc_dport)); 10248 #ifndef __BIG_ENDIAN_BITFIELD 10249 *insn++ = BPF_ALU32_IMM(BPF_LSH, si->dst_reg, 16); 10250 #endif 10251 break; 10252 10253 case offsetof(struct bpf_sock_ops, local_port): 10254 BUILD_BUG_ON(sizeof_field(struct sock_common, skc_num) != 2); 10255 10256 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF( 10257 struct bpf_sock_ops_kern, sk), 10258 si->dst_reg, si->src_reg, 10259 offsetof(struct bpf_sock_ops_kern, sk)); 10260 *insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->dst_reg, 10261 offsetof(struct sock_common, skc_num)); 10262 break; 10263 10264 case offsetof(struct bpf_sock_ops, is_fullsock): 10265 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF( 10266 struct bpf_sock_ops_kern, 10267 is_fullsock), 10268 si->dst_reg, si->src_reg, 10269 offsetof(struct bpf_sock_ops_kern, 10270 is_fullsock)); 10271 break; 10272 10273 case offsetof(struct bpf_sock_ops, state): 10274 BUILD_BUG_ON(sizeof_field(struct sock_common, skc_state) != 1); 10275 10276 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF( 10277 struct bpf_sock_ops_kern, sk), 10278 si->dst_reg, si->src_reg, 10279 offsetof(struct bpf_sock_ops_kern, sk)); 10280 *insn++ = BPF_LDX_MEM(BPF_B, si->dst_reg, si->dst_reg, 10281 offsetof(struct sock_common, skc_state)); 10282 break; 10283 10284 case offsetof(struct bpf_sock_ops, rtt_min): 10285 BUILD_BUG_ON(sizeof_field(struct tcp_sock, rtt_min) != 10286 sizeof(struct minmax)); 10287 BUILD_BUG_ON(sizeof(struct minmax) < 10288 sizeof(struct minmax_sample)); 10289 10290 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF( 10291 struct bpf_sock_ops_kern, sk), 10292 si->dst_reg, si->src_reg, 10293 offsetof(struct bpf_sock_ops_kern, sk)); 10294 *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg, 10295 offsetof(struct tcp_sock, rtt_min) + 10296 sizeof_field(struct minmax_sample, t)); 10297 break; 10298 10299 case offsetof(struct bpf_sock_ops, bpf_sock_ops_cb_flags): 10300 SOCK_OPS_GET_FIELD(bpf_sock_ops_cb_flags, bpf_sock_ops_cb_flags, 10301 struct tcp_sock); 10302 break; 10303 10304 case offsetof(struct bpf_sock_ops, sk_txhash): 10305 SOCK_OPS_GET_OR_SET_FIELD(sk_txhash, sk_txhash, 10306 struct sock, type); 10307 break; 10308 case offsetof(struct bpf_sock_ops, snd_cwnd): 10309 SOCK_OPS_GET_TCP_SOCK_FIELD(snd_cwnd); 10310 break; 10311 case offsetof(struct bpf_sock_ops, srtt_us): 10312 SOCK_OPS_GET_TCP_SOCK_FIELD(srtt_us); 10313 break; 10314 case offsetof(struct bpf_sock_ops, snd_ssthresh): 10315 SOCK_OPS_GET_TCP_SOCK_FIELD(snd_ssthresh); 10316 break; 10317 case offsetof(struct bpf_sock_ops, rcv_nxt): 10318 SOCK_OPS_GET_TCP_SOCK_FIELD(rcv_nxt); 10319 break; 10320 case offsetof(struct bpf_sock_ops, snd_nxt): 10321 SOCK_OPS_GET_TCP_SOCK_FIELD(snd_nxt); 10322 break; 10323 case offsetof(struct bpf_sock_ops, snd_una): 10324 SOCK_OPS_GET_TCP_SOCK_FIELD(snd_una); 10325 break; 10326 case offsetof(struct bpf_sock_ops, mss_cache): 10327 SOCK_OPS_GET_TCP_SOCK_FIELD(mss_cache); 10328 break; 10329 case offsetof(struct bpf_sock_ops, ecn_flags): 10330 SOCK_OPS_GET_TCP_SOCK_FIELD(ecn_flags); 10331 break; 10332 case offsetof(struct bpf_sock_ops, rate_delivered): 10333 SOCK_OPS_GET_TCP_SOCK_FIELD(rate_delivered); 10334 break; 10335 case offsetof(struct bpf_sock_ops, rate_interval_us): 10336 SOCK_OPS_GET_TCP_SOCK_FIELD(rate_interval_us); 10337 break; 10338 case offsetof(struct bpf_sock_ops, packets_out): 10339 SOCK_OPS_GET_TCP_SOCK_FIELD(packets_out); 10340 break; 10341 case offsetof(struct bpf_sock_ops, retrans_out): 10342 SOCK_OPS_GET_TCP_SOCK_FIELD(retrans_out); 10343 break; 10344 case offsetof(struct bpf_sock_ops, total_retrans): 10345 SOCK_OPS_GET_TCP_SOCK_FIELD(total_retrans); 10346 break; 10347 case offsetof(struct bpf_sock_ops, segs_in): 10348 SOCK_OPS_GET_TCP_SOCK_FIELD(segs_in); 10349 break; 10350 case offsetof(struct bpf_sock_ops, data_segs_in): 10351 SOCK_OPS_GET_TCP_SOCK_FIELD(data_segs_in); 10352 break; 10353 case offsetof(struct bpf_sock_ops, segs_out): 10354 SOCK_OPS_GET_TCP_SOCK_FIELD(segs_out); 10355 break; 10356 case offsetof(struct bpf_sock_ops, data_segs_out): 10357 SOCK_OPS_GET_TCP_SOCK_FIELD(data_segs_out); 10358 break; 10359 case offsetof(struct bpf_sock_ops, lost_out): 10360 SOCK_OPS_GET_TCP_SOCK_FIELD(lost_out); 10361 break; 10362 case offsetof(struct bpf_sock_ops, sacked_out): 10363 SOCK_OPS_GET_TCP_SOCK_FIELD(sacked_out); 10364 break; 10365 case offsetof(struct bpf_sock_ops, bytes_received): 10366 SOCK_OPS_GET_TCP_SOCK_FIELD(bytes_received); 10367 break; 10368 case offsetof(struct bpf_sock_ops, bytes_acked): 10369 SOCK_OPS_GET_TCP_SOCK_FIELD(bytes_acked); 10370 break; 10371 case offsetof(struct bpf_sock_ops, sk): 10372 SOCK_OPS_GET_SK(); 10373 break; 10374 case offsetof(struct bpf_sock_ops, skb_data_end): 10375 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct bpf_sock_ops_kern, 10376 skb_data_end), 10377 si->dst_reg, si->src_reg, 10378 offsetof(struct bpf_sock_ops_kern, 10379 skb_data_end)); 10380 break; 10381 case offsetof(struct bpf_sock_ops, skb_data): 10382 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct bpf_sock_ops_kern, 10383 skb), 10384 si->dst_reg, si->src_reg, 10385 offsetof(struct bpf_sock_ops_kern, 10386 skb)); 10387 *insn++ = BPF_JMP_IMM(BPF_JEQ, si->dst_reg, 0, 1); 10388 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, data), 10389 si->dst_reg, si->dst_reg, 10390 offsetof(struct sk_buff, data)); 10391 break; 10392 case offsetof(struct bpf_sock_ops, skb_len): 10393 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct bpf_sock_ops_kern, 10394 skb), 10395 si->dst_reg, si->src_reg, 10396 offsetof(struct bpf_sock_ops_kern, 10397 skb)); 10398 *insn++ = BPF_JMP_IMM(BPF_JEQ, si->dst_reg, 0, 1); 10399 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, len), 10400 si->dst_reg, si->dst_reg, 10401 offsetof(struct sk_buff, len)); 10402 break; 10403 case offsetof(struct bpf_sock_ops, skb_tcp_flags): 10404 off = offsetof(struct sk_buff, cb); 10405 off += offsetof(struct tcp_skb_cb, tcp_flags); 10406 *target_size = sizeof_field(struct tcp_skb_cb, tcp_flags); 10407 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct bpf_sock_ops_kern, 10408 skb), 10409 si->dst_reg, si->src_reg, 10410 offsetof(struct bpf_sock_ops_kern, 10411 skb)); 10412 *insn++ = BPF_JMP_IMM(BPF_JEQ, si->dst_reg, 0, 1); 10413 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct tcp_skb_cb, 10414 tcp_flags), 10415 si->dst_reg, si->dst_reg, off); 10416 break; 10417 } 10418 return insn - insn_buf; 10419 } 10420 10421 /* data_end = skb->data + skb_headlen() */ 10422 static struct bpf_insn *bpf_convert_data_end_access(const struct bpf_insn *si, 10423 struct bpf_insn *insn) 10424 { 10425 int reg; 10426 int temp_reg_off = offsetof(struct sk_buff, cb) + 10427 offsetof(struct sk_skb_cb, temp_reg); 10428 10429 if (si->src_reg == si->dst_reg) { 10430 /* We need an extra register, choose and save a register. */ 10431 reg = BPF_REG_9; 10432 if (si->src_reg == reg || si->dst_reg == reg) 10433 reg--; 10434 if (si->src_reg == reg || si->dst_reg == reg) 10435 reg--; 10436 *insn++ = BPF_STX_MEM(BPF_DW, si->src_reg, reg, temp_reg_off); 10437 } else { 10438 reg = si->dst_reg; 10439 } 10440 10441 /* reg = skb->data */ 10442 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, data), 10443 reg, si->src_reg, 10444 offsetof(struct sk_buff, data)); 10445 /* AX = skb->len */ 10446 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, len), 10447 BPF_REG_AX, si->src_reg, 10448 offsetof(struct sk_buff, len)); 10449 /* reg = skb->data + skb->len */ 10450 *insn++ = BPF_ALU64_REG(BPF_ADD, reg, BPF_REG_AX); 10451 /* AX = skb->data_len */ 10452 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, data_len), 10453 BPF_REG_AX, si->src_reg, 10454 offsetof(struct sk_buff, data_len)); 10455 10456 /* reg = skb->data + skb->len - skb->data_len */ 10457 *insn++ = BPF_ALU64_REG(BPF_SUB, reg, BPF_REG_AX); 10458 10459 if (si->src_reg == si->dst_reg) { 10460 /* Restore the saved register */ 10461 *insn++ = BPF_MOV64_REG(BPF_REG_AX, si->src_reg); 10462 *insn++ = BPF_MOV64_REG(si->dst_reg, reg); 10463 *insn++ = BPF_LDX_MEM(BPF_DW, reg, BPF_REG_AX, temp_reg_off); 10464 } 10465 10466 return insn; 10467 } 10468 10469 static u32 sk_skb_convert_ctx_access(enum bpf_access_type type, 10470 const struct bpf_insn *si, 10471 struct bpf_insn *insn_buf, 10472 struct bpf_prog *prog, u32 *target_size) 10473 { 10474 struct bpf_insn *insn = insn_buf; 10475 int off; 10476 10477 switch (si->off) { 10478 case offsetof(struct __sk_buff, data_end): 10479 insn = bpf_convert_data_end_access(si, insn); 10480 break; 10481 case offsetof(struct __sk_buff, cb[0]) ... 10482 offsetofend(struct __sk_buff, cb[4]) - 1: 10483 BUILD_BUG_ON(sizeof_field(struct sk_skb_cb, data) < 20); 10484 BUILD_BUG_ON((offsetof(struct sk_buff, cb) + 10485 offsetof(struct sk_skb_cb, data)) % 10486 sizeof(__u64)); 10487 10488 prog->cb_access = 1; 10489 off = si->off; 10490 off -= offsetof(struct __sk_buff, cb[0]); 10491 off += offsetof(struct sk_buff, cb); 10492 off += offsetof(struct sk_skb_cb, data); 10493 if (type == BPF_WRITE) 10494 *insn++ = BPF_STX_MEM(BPF_SIZE(si->code), si->dst_reg, 10495 si->src_reg, off); 10496 else 10497 *insn++ = BPF_LDX_MEM(BPF_SIZE(si->code), si->dst_reg, 10498 si->src_reg, off); 10499 break; 10500 10501 10502 default: 10503 return bpf_convert_ctx_access(type, si, insn_buf, prog, 10504 target_size); 10505 } 10506 10507 return insn - insn_buf; 10508 } 10509 10510 static u32 sk_msg_convert_ctx_access(enum bpf_access_type type, 10511 const struct bpf_insn *si, 10512 struct bpf_insn *insn_buf, 10513 struct bpf_prog *prog, u32 *target_size) 10514 { 10515 struct bpf_insn *insn = insn_buf; 10516 #if IS_ENABLED(CONFIG_IPV6) 10517 int off; 10518 #endif 10519 10520 /* convert ctx uses the fact sg element is first in struct */ 10521 BUILD_BUG_ON(offsetof(struct sk_msg, sg) != 0); 10522 10523 switch (si->off) { 10524 case offsetof(struct sk_msg_md, data): 10525 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_msg, data), 10526 si->dst_reg, si->src_reg, 10527 offsetof(struct sk_msg, data)); 10528 break; 10529 case offsetof(struct sk_msg_md, data_end): 10530 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_msg, data_end), 10531 si->dst_reg, si->src_reg, 10532 offsetof(struct sk_msg, data_end)); 10533 break; 10534 case offsetof(struct sk_msg_md, family): 10535 BUILD_BUG_ON(sizeof_field(struct sock_common, skc_family) != 2); 10536 10537 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF( 10538 struct sk_msg, sk), 10539 si->dst_reg, si->src_reg, 10540 offsetof(struct sk_msg, sk)); 10541 *insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->dst_reg, 10542 offsetof(struct sock_common, skc_family)); 10543 break; 10544 10545 case offsetof(struct sk_msg_md, remote_ip4): 10546 BUILD_BUG_ON(sizeof_field(struct sock_common, skc_daddr) != 4); 10547 10548 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF( 10549 struct sk_msg, sk), 10550 si->dst_reg, si->src_reg, 10551 offsetof(struct sk_msg, sk)); 10552 *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg, 10553 offsetof(struct sock_common, skc_daddr)); 10554 break; 10555 10556 case offsetof(struct sk_msg_md, local_ip4): 10557 BUILD_BUG_ON(sizeof_field(struct sock_common, 10558 skc_rcv_saddr) != 4); 10559 10560 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF( 10561 struct sk_msg, sk), 10562 si->dst_reg, si->src_reg, 10563 offsetof(struct sk_msg, sk)); 10564 *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg, 10565 offsetof(struct sock_common, 10566 skc_rcv_saddr)); 10567 break; 10568 10569 case offsetof(struct sk_msg_md, remote_ip6[0]) ... 10570 offsetof(struct sk_msg_md, remote_ip6[3]): 10571 #if IS_ENABLED(CONFIG_IPV6) 10572 BUILD_BUG_ON(sizeof_field(struct sock_common, 10573 skc_v6_daddr.s6_addr32[0]) != 4); 10574 10575 off = si->off; 10576 off -= offsetof(struct sk_msg_md, remote_ip6[0]); 10577 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF( 10578 struct sk_msg, sk), 10579 si->dst_reg, si->src_reg, 10580 offsetof(struct sk_msg, sk)); 10581 *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg, 10582 offsetof(struct sock_common, 10583 skc_v6_daddr.s6_addr32[0]) + 10584 off); 10585 #else 10586 *insn++ = BPF_MOV32_IMM(si->dst_reg, 0); 10587 #endif 10588 break; 10589 10590 case offsetof(struct sk_msg_md, local_ip6[0]) ... 10591 offsetof(struct sk_msg_md, local_ip6[3]): 10592 #if IS_ENABLED(CONFIG_IPV6) 10593 BUILD_BUG_ON(sizeof_field(struct sock_common, 10594 skc_v6_rcv_saddr.s6_addr32[0]) != 4); 10595 10596 off = si->off; 10597 off -= offsetof(struct sk_msg_md, local_ip6[0]); 10598 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF( 10599 struct sk_msg, sk), 10600 si->dst_reg, si->src_reg, 10601 offsetof(struct sk_msg, sk)); 10602 *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg, 10603 offsetof(struct sock_common, 10604 skc_v6_rcv_saddr.s6_addr32[0]) + 10605 off); 10606 #else 10607 *insn++ = BPF_MOV32_IMM(si->dst_reg, 0); 10608 #endif 10609 break; 10610 10611 case offsetof(struct sk_msg_md, remote_port): 10612 BUILD_BUG_ON(sizeof_field(struct sock_common, skc_dport) != 2); 10613 10614 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF( 10615 struct sk_msg, sk), 10616 si->dst_reg, si->src_reg, 10617 offsetof(struct sk_msg, sk)); 10618 *insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->dst_reg, 10619 offsetof(struct sock_common, skc_dport)); 10620 #ifndef __BIG_ENDIAN_BITFIELD 10621 *insn++ = BPF_ALU32_IMM(BPF_LSH, si->dst_reg, 16); 10622 #endif 10623 break; 10624 10625 case offsetof(struct sk_msg_md, local_port): 10626 BUILD_BUG_ON(sizeof_field(struct sock_common, skc_num) != 2); 10627 10628 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF( 10629 struct sk_msg, sk), 10630 si->dst_reg, si->src_reg, 10631 offsetof(struct sk_msg, sk)); 10632 *insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->dst_reg, 10633 offsetof(struct sock_common, skc_num)); 10634 break; 10635 10636 case offsetof(struct sk_msg_md, size): 10637 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_msg_sg, size), 10638 si->dst_reg, si->src_reg, 10639 offsetof(struct sk_msg_sg, size)); 10640 break; 10641 10642 case offsetof(struct sk_msg_md, sk): 10643 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_msg, sk), 10644 si->dst_reg, si->src_reg, 10645 offsetof(struct sk_msg, sk)); 10646 break; 10647 } 10648 10649 return insn - insn_buf; 10650 } 10651 10652 const struct bpf_verifier_ops sk_filter_verifier_ops = { 10653 .get_func_proto = sk_filter_func_proto, 10654 .is_valid_access = sk_filter_is_valid_access, 10655 .convert_ctx_access = bpf_convert_ctx_access, 10656 .gen_ld_abs = bpf_gen_ld_abs, 10657 }; 10658 10659 const struct bpf_prog_ops sk_filter_prog_ops = { 10660 .test_run = bpf_prog_test_run_skb, 10661 }; 10662 10663 const struct bpf_verifier_ops tc_cls_act_verifier_ops = { 10664 .get_func_proto = tc_cls_act_func_proto, 10665 .is_valid_access = tc_cls_act_is_valid_access, 10666 .convert_ctx_access = tc_cls_act_convert_ctx_access, 10667 .gen_prologue = tc_cls_act_prologue, 10668 .gen_ld_abs = bpf_gen_ld_abs, 10669 }; 10670 10671 const struct bpf_prog_ops tc_cls_act_prog_ops = { 10672 .test_run = bpf_prog_test_run_skb, 10673 }; 10674 10675 const struct bpf_verifier_ops xdp_verifier_ops = { 10676 .get_func_proto = xdp_func_proto, 10677 .is_valid_access = xdp_is_valid_access, 10678 .convert_ctx_access = xdp_convert_ctx_access, 10679 .gen_prologue = bpf_noop_prologue, 10680 }; 10681 10682 const struct bpf_prog_ops xdp_prog_ops = { 10683 .test_run = bpf_prog_test_run_xdp, 10684 }; 10685 10686 const struct bpf_verifier_ops cg_skb_verifier_ops = { 10687 .get_func_proto = cg_skb_func_proto, 10688 .is_valid_access = cg_skb_is_valid_access, 10689 .convert_ctx_access = bpf_convert_ctx_access, 10690 }; 10691 10692 const struct bpf_prog_ops cg_skb_prog_ops = { 10693 .test_run = bpf_prog_test_run_skb, 10694 }; 10695 10696 const struct bpf_verifier_ops lwt_in_verifier_ops = { 10697 .get_func_proto = lwt_in_func_proto, 10698 .is_valid_access = lwt_is_valid_access, 10699 .convert_ctx_access = bpf_convert_ctx_access, 10700 }; 10701 10702 const struct bpf_prog_ops lwt_in_prog_ops = { 10703 .test_run = bpf_prog_test_run_skb, 10704 }; 10705 10706 const struct bpf_verifier_ops lwt_out_verifier_ops = { 10707 .get_func_proto = lwt_out_func_proto, 10708 .is_valid_access = lwt_is_valid_access, 10709 .convert_ctx_access = bpf_convert_ctx_access, 10710 }; 10711 10712 const struct bpf_prog_ops lwt_out_prog_ops = { 10713 .test_run = bpf_prog_test_run_skb, 10714 }; 10715 10716 const struct bpf_verifier_ops lwt_xmit_verifier_ops = { 10717 .get_func_proto = lwt_xmit_func_proto, 10718 .is_valid_access = lwt_is_valid_access, 10719 .convert_ctx_access = bpf_convert_ctx_access, 10720 .gen_prologue = tc_cls_act_prologue, 10721 }; 10722 10723 const struct bpf_prog_ops lwt_xmit_prog_ops = { 10724 .test_run = bpf_prog_test_run_skb, 10725 }; 10726 10727 const struct bpf_verifier_ops lwt_seg6local_verifier_ops = { 10728 .get_func_proto = lwt_seg6local_func_proto, 10729 .is_valid_access = lwt_is_valid_access, 10730 .convert_ctx_access = bpf_convert_ctx_access, 10731 }; 10732 10733 const struct bpf_prog_ops lwt_seg6local_prog_ops = { 10734 .test_run = bpf_prog_test_run_skb, 10735 }; 10736 10737 const struct bpf_verifier_ops cg_sock_verifier_ops = { 10738 .get_func_proto = sock_filter_func_proto, 10739 .is_valid_access = sock_filter_is_valid_access, 10740 .convert_ctx_access = bpf_sock_convert_ctx_access, 10741 }; 10742 10743 const struct bpf_prog_ops cg_sock_prog_ops = { 10744 }; 10745 10746 const struct bpf_verifier_ops cg_sock_addr_verifier_ops = { 10747 .get_func_proto = sock_addr_func_proto, 10748 .is_valid_access = sock_addr_is_valid_access, 10749 .convert_ctx_access = sock_addr_convert_ctx_access, 10750 }; 10751 10752 const struct bpf_prog_ops cg_sock_addr_prog_ops = { 10753 }; 10754 10755 const struct bpf_verifier_ops sock_ops_verifier_ops = { 10756 .get_func_proto = sock_ops_func_proto, 10757 .is_valid_access = sock_ops_is_valid_access, 10758 .convert_ctx_access = sock_ops_convert_ctx_access, 10759 }; 10760 10761 const struct bpf_prog_ops sock_ops_prog_ops = { 10762 }; 10763 10764 const struct bpf_verifier_ops sk_skb_verifier_ops = { 10765 .get_func_proto = sk_skb_func_proto, 10766 .is_valid_access = sk_skb_is_valid_access, 10767 .convert_ctx_access = sk_skb_convert_ctx_access, 10768 .gen_prologue = sk_skb_prologue, 10769 }; 10770 10771 const struct bpf_prog_ops sk_skb_prog_ops = { 10772 }; 10773 10774 const struct bpf_verifier_ops sk_msg_verifier_ops = { 10775 .get_func_proto = sk_msg_func_proto, 10776 .is_valid_access = sk_msg_is_valid_access, 10777 .convert_ctx_access = sk_msg_convert_ctx_access, 10778 .gen_prologue = bpf_noop_prologue, 10779 }; 10780 10781 const struct bpf_prog_ops sk_msg_prog_ops = { 10782 }; 10783 10784 const struct bpf_verifier_ops flow_dissector_verifier_ops = { 10785 .get_func_proto = flow_dissector_func_proto, 10786 .is_valid_access = flow_dissector_is_valid_access, 10787 .convert_ctx_access = flow_dissector_convert_ctx_access, 10788 }; 10789 10790 const struct bpf_prog_ops flow_dissector_prog_ops = { 10791 .test_run = bpf_prog_test_run_flow_dissector, 10792 }; 10793 10794 int sk_detach_filter(struct sock *sk) 10795 { 10796 int ret = -ENOENT; 10797 struct sk_filter *filter; 10798 10799 if (sock_flag(sk, SOCK_FILTER_LOCKED)) 10800 return -EPERM; 10801 10802 filter = rcu_dereference_protected(sk->sk_filter, 10803 lockdep_sock_is_held(sk)); 10804 if (filter) { 10805 RCU_INIT_POINTER(sk->sk_filter, NULL); 10806 sk_filter_uncharge(sk, filter); 10807 ret = 0; 10808 } 10809 10810 return ret; 10811 } 10812 EXPORT_SYMBOL_GPL(sk_detach_filter); 10813 10814 int sk_get_filter(struct sock *sk, struct sock_filter __user *ubuf, 10815 unsigned int len) 10816 { 10817 struct sock_fprog_kern *fprog; 10818 struct sk_filter *filter; 10819 int ret = 0; 10820 10821 lock_sock(sk); 10822 filter = rcu_dereference_protected(sk->sk_filter, 10823 lockdep_sock_is_held(sk)); 10824 if (!filter) 10825 goto out; 10826 10827 /* We're copying the filter that has been originally attached, 10828 * so no conversion/decode needed anymore. eBPF programs that 10829 * have no original program cannot be dumped through this. 10830 */ 10831 ret = -EACCES; 10832 fprog = filter->prog->orig_prog; 10833 if (!fprog) 10834 goto out; 10835 10836 ret = fprog->len; 10837 if (!len) 10838 /* User space only enquires number of filter blocks. */ 10839 goto out; 10840 10841 ret = -EINVAL; 10842 if (len < fprog->len) 10843 goto out; 10844 10845 ret = -EFAULT; 10846 if (copy_to_user(ubuf, fprog->filter, bpf_classic_proglen(fprog))) 10847 goto out; 10848 10849 /* Instead of bytes, the API requests to return the number 10850 * of filter blocks. 10851 */ 10852 ret = fprog->len; 10853 out: 10854 release_sock(sk); 10855 return ret; 10856 } 10857 10858 #ifdef CONFIG_INET 10859 static void bpf_init_reuseport_kern(struct sk_reuseport_kern *reuse_kern, 10860 struct sock_reuseport *reuse, 10861 struct sock *sk, struct sk_buff *skb, 10862 struct sock *migrating_sk, 10863 u32 hash) 10864 { 10865 reuse_kern->skb = skb; 10866 reuse_kern->sk = sk; 10867 reuse_kern->selected_sk = NULL; 10868 reuse_kern->migrating_sk = migrating_sk; 10869 reuse_kern->data_end = skb->data + skb_headlen(skb); 10870 reuse_kern->hash = hash; 10871 reuse_kern->reuseport_id = reuse->reuseport_id; 10872 reuse_kern->bind_inany = reuse->bind_inany; 10873 } 10874 10875 struct sock *bpf_run_sk_reuseport(struct sock_reuseport *reuse, struct sock *sk, 10876 struct bpf_prog *prog, struct sk_buff *skb, 10877 struct sock *migrating_sk, 10878 u32 hash) 10879 { 10880 struct sk_reuseport_kern reuse_kern; 10881 enum sk_action action; 10882 10883 bpf_init_reuseport_kern(&reuse_kern, reuse, sk, skb, migrating_sk, hash); 10884 action = bpf_prog_run(prog, &reuse_kern); 10885 10886 if (action == SK_PASS) 10887 return reuse_kern.selected_sk; 10888 else 10889 return ERR_PTR(-ECONNREFUSED); 10890 } 10891 10892 BPF_CALL_4(sk_select_reuseport, struct sk_reuseport_kern *, reuse_kern, 10893 struct bpf_map *, map, void *, key, u32, flags) 10894 { 10895 bool is_sockarray = map->map_type == BPF_MAP_TYPE_REUSEPORT_SOCKARRAY; 10896 struct sock_reuseport *reuse; 10897 struct sock *selected_sk; 10898 10899 selected_sk = map->ops->map_lookup_elem(map, key); 10900 if (!selected_sk) 10901 return -ENOENT; 10902 10903 reuse = rcu_dereference(selected_sk->sk_reuseport_cb); 10904 if (!reuse) { 10905 /* Lookup in sock_map can return TCP ESTABLISHED sockets. */ 10906 if (sk_is_refcounted(selected_sk)) 10907 sock_put(selected_sk); 10908 10909 /* reuseport_array has only sk with non NULL sk_reuseport_cb. 10910 * The only (!reuse) case here is - the sk has already been 10911 * unhashed (e.g. by close()), so treat it as -ENOENT. 10912 * 10913 * Other maps (e.g. sock_map) do not provide this guarantee and 10914 * the sk may never be in the reuseport group to begin with. 10915 */ 10916 return is_sockarray ? -ENOENT : -EINVAL; 10917 } 10918 10919 if (unlikely(reuse->reuseport_id != reuse_kern->reuseport_id)) { 10920 struct sock *sk = reuse_kern->sk; 10921 10922 if (sk->sk_protocol != selected_sk->sk_protocol) 10923 return -EPROTOTYPE; 10924 else if (sk->sk_family != selected_sk->sk_family) 10925 return -EAFNOSUPPORT; 10926 10927 /* Catch all. Likely bound to a different sockaddr. */ 10928 return -EBADFD; 10929 } 10930 10931 reuse_kern->selected_sk = selected_sk; 10932 10933 return 0; 10934 } 10935 10936 static const struct bpf_func_proto sk_select_reuseport_proto = { 10937 .func = sk_select_reuseport, 10938 .gpl_only = false, 10939 .ret_type = RET_INTEGER, 10940 .arg1_type = ARG_PTR_TO_CTX, 10941 .arg2_type = ARG_CONST_MAP_PTR, 10942 .arg3_type = ARG_PTR_TO_MAP_KEY, 10943 .arg4_type = ARG_ANYTHING, 10944 }; 10945 10946 BPF_CALL_4(sk_reuseport_load_bytes, 10947 const struct sk_reuseport_kern *, reuse_kern, u32, offset, 10948 void *, to, u32, len) 10949 { 10950 return ____bpf_skb_load_bytes(reuse_kern->skb, offset, to, len); 10951 } 10952 10953 static const struct bpf_func_proto sk_reuseport_load_bytes_proto = { 10954 .func = sk_reuseport_load_bytes, 10955 .gpl_only = false, 10956 .ret_type = RET_INTEGER, 10957 .arg1_type = ARG_PTR_TO_CTX, 10958 .arg2_type = ARG_ANYTHING, 10959 .arg3_type = ARG_PTR_TO_UNINIT_MEM, 10960 .arg4_type = ARG_CONST_SIZE, 10961 }; 10962 10963 BPF_CALL_5(sk_reuseport_load_bytes_relative, 10964 const struct sk_reuseport_kern *, reuse_kern, u32, offset, 10965 void *, to, u32, len, u32, start_header) 10966 { 10967 return ____bpf_skb_load_bytes_relative(reuse_kern->skb, offset, to, 10968 len, start_header); 10969 } 10970 10971 static const struct bpf_func_proto sk_reuseport_load_bytes_relative_proto = { 10972 .func = sk_reuseport_load_bytes_relative, 10973 .gpl_only = false, 10974 .ret_type = RET_INTEGER, 10975 .arg1_type = ARG_PTR_TO_CTX, 10976 .arg2_type = ARG_ANYTHING, 10977 .arg3_type = ARG_PTR_TO_UNINIT_MEM, 10978 .arg4_type = ARG_CONST_SIZE, 10979 .arg5_type = ARG_ANYTHING, 10980 }; 10981 10982 static const struct bpf_func_proto * 10983 sk_reuseport_func_proto(enum bpf_func_id func_id, 10984 const struct bpf_prog *prog) 10985 { 10986 switch (func_id) { 10987 case BPF_FUNC_sk_select_reuseport: 10988 return &sk_select_reuseport_proto; 10989 case BPF_FUNC_skb_load_bytes: 10990 return &sk_reuseport_load_bytes_proto; 10991 case BPF_FUNC_skb_load_bytes_relative: 10992 return &sk_reuseport_load_bytes_relative_proto; 10993 case BPF_FUNC_get_socket_cookie: 10994 return &bpf_get_socket_ptr_cookie_proto; 10995 case BPF_FUNC_ktime_get_coarse_ns: 10996 return &bpf_ktime_get_coarse_ns_proto; 10997 default: 10998 return bpf_base_func_proto(func_id); 10999 } 11000 } 11001 11002 static bool 11003 sk_reuseport_is_valid_access(int off, int size, 11004 enum bpf_access_type type, 11005 const struct bpf_prog *prog, 11006 struct bpf_insn_access_aux *info) 11007 { 11008 const u32 size_default = sizeof(__u32); 11009 11010 if (off < 0 || off >= sizeof(struct sk_reuseport_md) || 11011 off % size || type != BPF_READ) 11012 return false; 11013 11014 switch (off) { 11015 case offsetof(struct sk_reuseport_md, data): 11016 info->reg_type = PTR_TO_PACKET; 11017 return size == sizeof(__u64); 11018 11019 case offsetof(struct sk_reuseport_md, data_end): 11020 info->reg_type = PTR_TO_PACKET_END; 11021 return size == sizeof(__u64); 11022 11023 case offsetof(struct sk_reuseport_md, hash): 11024 return size == size_default; 11025 11026 case offsetof(struct sk_reuseport_md, sk): 11027 info->reg_type = PTR_TO_SOCKET; 11028 return size == sizeof(__u64); 11029 11030 case offsetof(struct sk_reuseport_md, migrating_sk): 11031 info->reg_type = PTR_TO_SOCK_COMMON_OR_NULL; 11032 return size == sizeof(__u64); 11033 11034 /* Fields that allow narrowing */ 11035 case bpf_ctx_range(struct sk_reuseport_md, eth_protocol): 11036 if (size < sizeof_field(struct sk_buff, protocol)) 11037 return false; 11038 fallthrough; 11039 case bpf_ctx_range(struct sk_reuseport_md, ip_protocol): 11040 case bpf_ctx_range(struct sk_reuseport_md, bind_inany): 11041 case bpf_ctx_range(struct sk_reuseport_md, len): 11042 bpf_ctx_record_field_size(info, size_default); 11043 return bpf_ctx_narrow_access_ok(off, size, size_default); 11044 11045 default: 11046 return false; 11047 } 11048 } 11049 11050 #define SK_REUSEPORT_LOAD_FIELD(F) ({ \ 11051 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_reuseport_kern, F), \ 11052 si->dst_reg, si->src_reg, \ 11053 bpf_target_off(struct sk_reuseport_kern, F, \ 11054 sizeof_field(struct sk_reuseport_kern, F), \ 11055 target_size)); \ 11056 }) 11057 11058 #define SK_REUSEPORT_LOAD_SKB_FIELD(SKB_FIELD) \ 11059 SOCK_ADDR_LOAD_NESTED_FIELD(struct sk_reuseport_kern, \ 11060 struct sk_buff, \ 11061 skb, \ 11062 SKB_FIELD) 11063 11064 #define SK_REUSEPORT_LOAD_SK_FIELD(SK_FIELD) \ 11065 SOCK_ADDR_LOAD_NESTED_FIELD(struct sk_reuseport_kern, \ 11066 struct sock, \ 11067 sk, \ 11068 SK_FIELD) 11069 11070 static u32 sk_reuseport_convert_ctx_access(enum bpf_access_type type, 11071 const struct bpf_insn *si, 11072 struct bpf_insn *insn_buf, 11073 struct bpf_prog *prog, 11074 u32 *target_size) 11075 { 11076 struct bpf_insn *insn = insn_buf; 11077 11078 switch (si->off) { 11079 case offsetof(struct sk_reuseport_md, data): 11080 SK_REUSEPORT_LOAD_SKB_FIELD(data); 11081 break; 11082 11083 case offsetof(struct sk_reuseport_md, len): 11084 SK_REUSEPORT_LOAD_SKB_FIELD(len); 11085 break; 11086 11087 case offsetof(struct sk_reuseport_md, eth_protocol): 11088 SK_REUSEPORT_LOAD_SKB_FIELD(protocol); 11089 break; 11090 11091 case offsetof(struct sk_reuseport_md, ip_protocol): 11092 SK_REUSEPORT_LOAD_SK_FIELD(sk_protocol); 11093 break; 11094 11095 case offsetof(struct sk_reuseport_md, data_end): 11096 SK_REUSEPORT_LOAD_FIELD(data_end); 11097 break; 11098 11099 case offsetof(struct sk_reuseport_md, hash): 11100 SK_REUSEPORT_LOAD_FIELD(hash); 11101 break; 11102 11103 case offsetof(struct sk_reuseport_md, bind_inany): 11104 SK_REUSEPORT_LOAD_FIELD(bind_inany); 11105 break; 11106 11107 case offsetof(struct sk_reuseport_md, sk): 11108 SK_REUSEPORT_LOAD_FIELD(sk); 11109 break; 11110 11111 case offsetof(struct sk_reuseport_md, migrating_sk): 11112 SK_REUSEPORT_LOAD_FIELD(migrating_sk); 11113 break; 11114 } 11115 11116 return insn - insn_buf; 11117 } 11118 11119 const struct bpf_verifier_ops sk_reuseport_verifier_ops = { 11120 .get_func_proto = sk_reuseport_func_proto, 11121 .is_valid_access = sk_reuseport_is_valid_access, 11122 .convert_ctx_access = sk_reuseport_convert_ctx_access, 11123 }; 11124 11125 const struct bpf_prog_ops sk_reuseport_prog_ops = { 11126 }; 11127 11128 DEFINE_STATIC_KEY_FALSE(bpf_sk_lookup_enabled); 11129 EXPORT_SYMBOL(bpf_sk_lookup_enabled); 11130 11131 BPF_CALL_3(bpf_sk_lookup_assign, struct bpf_sk_lookup_kern *, ctx, 11132 struct sock *, sk, u64, flags) 11133 { 11134 if (unlikely(flags & ~(BPF_SK_LOOKUP_F_REPLACE | 11135 BPF_SK_LOOKUP_F_NO_REUSEPORT))) 11136 return -EINVAL; 11137 if (unlikely(sk && sk_is_refcounted(sk))) 11138 return -ESOCKTNOSUPPORT; /* reject non-RCU freed sockets */ 11139 if (unlikely(sk && sk_is_tcp(sk) && sk->sk_state != TCP_LISTEN)) 11140 return -ESOCKTNOSUPPORT; /* only accept TCP socket in LISTEN */ 11141 if (unlikely(sk && sk_is_udp(sk) && sk->sk_state != TCP_CLOSE)) 11142 return -ESOCKTNOSUPPORT; /* only accept UDP socket in CLOSE */ 11143 11144 /* Check if socket is suitable for packet L3/L4 protocol */ 11145 if (sk && sk->sk_protocol != ctx->protocol) 11146 return -EPROTOTYPE; 11147 if (sk && sk->sk_family != ctx->family && 11148 (sk->sk_family == AF_INET || ipv6_only_sock(sk))) 11149 return -EAFNOSUPPORT; 11150 11151 if (ctx->selected_sk && !(flags & BPF_SK_LOOKUP_F_REPLACE)) 11152 return -EEXIST; 11153 11154 /* Select socket as lookup result */ 11155 ctx->selected_sk = sk; 11156 ctx->no_reuseport = flags & BPF_SK_LOOKUP_F_NO_REUSEPORT; 11157 return 0; 11158 } 11159 11160 static const struct bpf_func_proto bpf_sk_lookup_assign_proto = { 11161 .func = bpf_sk_lookup_assign, 11162 .gpl_only = false, 11163 .ret_type = RET_INTEGER, 11164 .arg1_type = ARG_PTR_TO_CTX, 11165 .arg2_type = ARG_PTR_TO_SOCKET_OR_NULL, 11166 .arg3_type = ARG_ANYTHING, 11167 }; 11168 11169 static const struct bpf_func_proto * 11170 sk_lookup_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) 11171 { 11172 switch (func_id) { 11173 case BPF_FUNC_perf_event_output: 11174 return &bpf_event_output_data_proto; 11175 case BPF_FUNC_sk_assign: 11176 return &bpf_sk_lookup_assign_proto; 11177 case BPF_FUNC_sk_release: 11178 return &bpf_sk_release_proto; 11179 default: 11180 return bpf_sk_base_func_proto(func_id); 11181 } 11182 } 11183 11184 static bool sk_lookup_is_valid_access(int off, int size, 11185 enum bpf_access_type type, 11186 const struct bpf_prog *prog, 11187 struct bpf_insn_access_aux *info) 11188 { 11189 if (off < 0 || off >= sizeof(struct bpf_sk_lookup)) 11190 return false; 11191 if (off % size != 0) 11192 return false; 11193 if (type != BPF_READ) 11194 return false; 11195 11196 switch (off) { 11197 case offsetof(struct bpf_sk_lookup, sk): 11198 info->reg_type = PTR_TO_SOCKET_OR_NULL; 11199 return size == sizeof(__u64); 11200 11201 case bpf_ctx_range(struct bpf_sk_lookup, family): 11202 case bpf_ctx_range(struct bpf_sk_lookup, protocol): 11203 case bpf_ctx_range(struct bpf_sk_lookup, remote_ip4): 11204 case bpf_ctx_range(struct bpf_sk_lookup, local_ip4): 11205 case bpf_ctx_range_till(struct bpf_sk_lookup, remote_ip6[0], remote_ip6[3]): 11206 case bpf_ctx_range_till(struct bpf_sk_lookup, local_ip6[0], local_ip6[3]): 11207 case bpf_ctx_range(struct bpf_sk_lookup, local_port): 11208 case bpf_ctx_range(struct bpf_sk_lookup, ingress_ifindex): 11209 bpf_ctx_record_field_size(info, sizeof(__u32)); 11210 return bpf_ctx_narrow_access_ok(off, size, sizeof(__u32)); 11211 11212 case bpf_ctx_range(struct bpf_sk_lookup, remote_port): 11213 /* Allow 4-byte access to 2-byte field for backward compatibility */ 11214 if (size == sizeof(__u32)) 11215 return true; 11216 bpf_ctx_record_field_size(info, sizeof(__be16)); 11217 return bpf_ctx_narrow_access_ok(off, size, sizeof(__be16)); 11218 11219 case offsetofend(struct bpf_sk_lookup, remote_port) ... 11220 offsetof(struct bpf_sk_lookup, local_ip4) - 1: 11221 /* Allow access to zero padding for backward compatibility */ 11222 bpf_ctx_record_field_size(info, sizeof(__u16)); 11223 return bpf_ctx_narrow_access_ok(off, size, sizeof(__u16)); 11224 11225 default: 11226 return false; 11227 } 11228 } 11229 11230 static u32 sk_lookup_convert_ctx_access(enum bpf_access_type type, 11231 const struct bpf_insn *si, 11232 struct bpf_insn *insn_buf, 11233 struct bpf_prog *prog, 11234 u32 *target_size) 11235 { 11236 struct bpf_insn *insn = insn_buf; 11237 11238 switch (si->off) { 11239 case offsetof(struct bpf_sk_lookup, sk): 11240 *insn++ = BPF_LDX_MEM(BPF_SIZEOF(void *), si->dst_reg, si->src_reg, 11241 offsetof(struct bpf_sk_lookup_kern, selected_sk)); 11242 break; 11243 11244 case offsetof(struct bpf_sk_lookup, family): 11245 *insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->src_reg, 11246 bpf_target_off(struct bpf_sk_lookup_kern, 11247 family, 2, target_size)); 11248 break; 11249 11250 case offsetof(struct bpf_sk_lookup, protocol): 11251 *insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->src_reg, 11252 bpf_target_off(struct bpf_sk_lookup_kern, 11253 protocol, 2, target_size)); 11254 break; 11255 11256 case offsetof(struct bpf_sk_lookup, remote_ip4): 11257 *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg, 11258 bpf_target_off(struct bpf_sk_lookup_kern, 11259 v4.saddr, 4, target_size)); 11260 break; 11261 11262 case offsetof(struct bpf_sk_lookup, local_ip4): 11263 *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg, 11264 bpf_target_off(struct bpf_sk_lookup_kern, 11265 v4.daddr, 4, target_size)); 11266 break; 11267 11268 case bpf_ctx_range_till(struct bpf_sk_lookup, 11269 remote_ip6[0], remote_ip6[3]): { 11270 #if IS_ENABLED(CONFIG_IPV6) 11271 int off = si->off; 11272 11273 off -= offsetof(struct bpf_sk_lookup, remote_ip6[0]); 11274 off += bpf_target_off(struct in6_addr, s6_addr32[0], 4, target_size); 11275 *insn++ = BPF_LDX_MEM(BPF_SIZEOF(void *), si->dst_reg, si->src_reg, 11276 offsetof(struct bpf_sk_lookup_kern, v6.saddr)); 11277 *insn++ = BPF_JMP_IMM(BPF_JEQ, si->dst_reg, 0, 1); 11278 *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg, off); 11279 #else 11280 *insn++ = BPF_MOV32_IMM(si->dst_reg, 0); 11281 #endif 11282 break; 11283 } 11284 case bpf_ctx_range_till(struct bpf_sk_lookup, 11285 local_ip6[0], local_ip6[3]): { 11286 #if IS_ENABLED(CONFIG_IPV6) 11287 int off = si->off; 11288 11289 off -= offsetof(struct bpf_sk_lookup, local_ip6[0]); 11290 off += bpf_target_off(struct in6_addr, s6_addr32[0], 4, target_size); 11291 *insn++ = BPF_LDX_MEM(BPF_SIZEOF(void *), si->dst_reg, si->src_reg, 11292 offsetof(struct bpf_sk_lookup_kern, v6.daddr)); 11293 *insn++ = BPF_JMP_IMM(BPF_JEQ, si->dst_reg, 0, 1); 11294 *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg, off); 11295 #else 11296 *insn++ = BPF_MOV32_IMM(si->dst_reg, 0); 11297 #endif 11298 break; 11299 } 11300 case offsetof(struct bpf_sk_lookup, remote_port): 11301 *insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->src_reg, 11302 bpf_target_off(struct bpf_sk_lookup_kern, 11303 sport, 2, target_size)); 11304 break; 11305 11306 case offsetofend(struct bpf_sk_lookup, remote_port): 11307 *target_size = 2; 11308 *insn++ = BPF_MOV32_IMM(si->dst_reg, 0); 11309 break; 11310 11311 case offsetof(struct bpf_sk_lookup, local_port): 11312 *insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->src_reg, 11313 bpf_target_off(struct bpf_sk_lookup_kern, 11314 dport, 2, target_size)); 11315 break; 11316 11317 case offsetof(struct bpf_sk_lookup, ingress_ifindex): 11318 *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg, 11319 bpf_target_off(struct bpf_sk_lookup_kern, 11320 ingress_ifindex, 4, target_size)); 11321 break; 11322 } 11323 11324 return insn - insn_buf; 11325 } 11326 11327 const struct bpf_prog_ops sk_lookup_prog_ops = { 11328 .test_run = bpf_prog_test_run_sk_lookup, 11329 }; 11330 11331 const struct bpf_verifier_ops sk_lookup_verifier_ops = { 11332 .get_func_proto = sk_lookup_func_proto, 11333 .is_valid_access = sk_lookup_is_valid_access, 11334 .convert_ctx_access = sk_lookup_convert_ctx_access, 11335 }; 11336 11337 #endif /* CONFIG_INET */ 11338 11339 DEFINE_BPF_DISPATCHER(xdp) 11340 11341 void bpf_prog_change_xdp(struct bpf_prog *prev_prog, struct bpf_prog *prog) 11342 { 11343 bpf_dispatcher_change_prog(BPF_DISPATCHER_PTR(xdp), prev_prog, prog); 11344 } 11345 11346 BTF_ID_LIST_GLOBAL(btf_sock_ids, MAX_BTF_SOCK_TYPE) 11347 #define BTF_SOCK_TYPE(name, type) BTF_ID(struct, type) 11348 BTF_SOCK_TYPE_xxx 11349 #undef BTF_SOCK_TYPE 11350 11351 BPF_CALL_1(bpf_skc_to_tcp6_sock, struct sock *, sk) 11352 { 11353 /* tcp6_sock type is not generated in dwarf and hence btf, 11354 * trigger an explicit type generation here. 11355 */ 11356 BTF_TYPE_EMIT(struct tcp6_sock); 11357 if (sk && sk_fullsock(sk) && sk->sk_protocol == IPPROTO_TCP && 11358 sk->sk_family == AF_INET6) 11359 return (unsigned long)sk; 11360 11361 return (unsigned long)NULL; 11362 } 11363 11364 const struct bpf_func_proto bpf_skc_to_tcp6_sock_proto = { 11365 .func = bpf_skc_to_tcp6_sock, 11366 .gpl_only = false, 11367 .ret_type = RET_PTR_TO_BTF_ID_OR_NULL, 11368 .arg1_type = ARG_PTR_TO_BTF_ID_SOCK_COMMON, 11369 .ret_btf_id = &btf_sock_ids[BTF_SOCK_TYPE_TCP6], 11370 }; 11371 11372 BPF_CALL_1(bpf_skc_to_tcp_sock, struct sock *, sk) 11373 { 11374 if (sk && sk_fullsock(sk) && sk->sk_protocol == IPPROTO_TCP) 11375 return (unsigned long)sk; 11376 11377 return (unsigned long)NULL; 11378 } 11379 11380 const struct bpf_func_proto bpf_skc_to_tcp_sock_proto = { 11381 .func = bpf_skc_to_tcp_sock, 11382 .gpl_only = false, 11383 .ret_type = RET_PTR_TO_BTF_ID_OR_NULL, 11384 .arg1_type = ARG_PTR_TO_BTF_ID_SOCK_COMMON, 11385 .ret_btf_id = &btf_sock_ids[BTF_SOCK_TYPE_TCP], 11386 }; 11387 11388 BPF_CALL_1(bpf_skc_to_tcp_timewait_sock, struct sock *, sk) 11389 { 11390 /* BTF types for tcp_timewait_sock and inet_timewait_sock are not 11391 * generated if CONFIG_INET=n. Trigger an explicit generation here. 11392 */ 11393 BTF_TYPE_EMIT(struct inet_timewait_sock); 11394 BTF_TYPE_EMIT(struct tcp_timewait_sock); 11395 11396 #ifdef CONFIG_INET 11397 if (sk && sk->sk_prot == &tcp_prot && sk->sk_state == TCP_TIME_WAIT) 11398 return (unsigned long)sk; 11399 #endif 11400 11401 #if IS_BUILTIN(CONFIG_IPV6) 11402 if (sk && sk->sk_prot == &tcpv6_prot && sk->sk_state == TCP_TIME_WAIT) 11403 return (unsigned long)sk; 11404 #endif 11405 11406 return (unsigned long)NULL; 11407 } 11408 11409 const struct bpf_func_proto bpf_skc_to_tcp_timewait_sock_proto = { 11410 .func = bpf_skc_to_tcp_timewait_sock, 11411 .gpl_only = false, 11412 .ret_type = RET_PTR_TO_BTF_ID_OR_NULL, 11413 .arg1_type = ARG_PTR_TO_BTF_ID_SOCK_COMMON, 11414 .ret_btf_id = &btf_sock_ids[BTF_SOCK_TYPE_TCP_TW], 11415 }; 11416 11417 BPF_CALL_1(bpf_skc_to_tcp_request_sock, struct sock *, sk) 11418 { 11419 #ifdef CONFIG_INET 11420 if (sk && sk->sk_prot == &tcp_prot && sk->sk_state == TCP_NEW_SYN_RECV) 11421 return (unsigned long)sk; 11422 #endif 11423 11424 #if IS_BUILTIN(CONFIG_IPV6) 11425 if (sk && sk->sk_prot == &tcpv6_prot && sk->sk_state == TCP_NEW_SYN_RECV) 11426 return (unsigned long)sk; 11427 #endif 11428 11429 return (unsigned long)NULL; 11430 } 11431 11432 const struct bpf_func_proto bpf_skc_to_tcp_request_sock_proto = { 11433 .func = bpf_skc_to_tcp_request_sock, 11434 .gpl_only = false, 11435 .ret_type = RET_PTR_TO_BTF_ID_OR_NULL, 11436 .arg1_type = ARG_PTR_TO_BTF_ID_SOCK_COMMON, 11437 .ret_btf_id = &btf_sock_ids[BTF_SOCK_TYPE_TCP_REQ], 11438 }; 11439 11440 BPF_CALL_1(bpf_skc_to_udp6_sock, struct sock *, sk) 11441 { 11442 /* udp6_sock type is not generated in dwarf and hence btf, 11443 * trigger an explicit type generation here. 11444 */ 11445 BTF_TYPE_EMIT(struct udp6_sock); 11446 if (sk && sk_fullsock(sk) && sk->sk_protocol == IPPROTO_UDP && 11447 sk->sk_type == SOCK_DGRAM && sk->sk_family == AF_INET6) 11448 return (unsigned long)sk; 11449 11450 return (unsigned long)NULL; 11451 } 11452 11453 const struct bpf_func_proto bpf_skc_to_udp6_sock_proto = { 11454 .func = bpf_skc_to_udp6_sock, 11455 .gpl_only = false, 11456 .ret_type = RET_PTR_TO_BTF_ID_OR_NULL, 11457 .arg1_type = ARG_PTR_TO_BTF_ID_SOCK_COMMON, 11458 .ret_btf_id = &btf_sock_ids[BTF_SOCK_TYPE_UDP6], 11459 }; 11460 11461 BPF_CALL_1(bpf_skc_to_unix_sock, struct sock *, sk) 11462 { 11463 /* unix_sock type is not generated in dwarf and hence btf, 11464 * trigger an explicit type generation here. 11465 */ 11466 BTF_TYPE_EMIT(struct unix_sock); 11467 if (sk && sk_fullsock(sk) && sk->sk_family == AF_UNIX) 11468 return (unsigned long)sk; 11469 11470 return (unsigned long)NULL; 11471 } 11472 11473 const struct bpf_func_proto bpf_skc_to_unix_sock_proto = { 11474 .func = bpf_skc_to_unix_sock, 11475 .gpl_only = false, 11476 .ret_type = RET_PTR_TO_BTF_ID_OR_NULL, 11477 .arg1_type = ARG_PTR_TO_BTF_ID_SOCK_COMMON, 11478 .ret_btf_id = &btf_sock_ids[BTF_SOCK_TYPE_UNIX], 11479 }; 11480 11481 BPF_CALL_1(bpf_skc_to_mptcp_sock, struct sock *, sk) 11482 { 11483 BTF_TYPE_EMIT(struct mptcp_sock); 11484 return (unsigned long)bpf_mptcp_sock_from_subflow(sk); 11485 } 11486 11487 const struct bpf_func_proto bpf_skc_to_mptcp_sock_proto = { 11488 .func = bpf_skc_to_mptcp_sock, 11489 .gpl_only = false, 11490 .ret_type = RET_PTR_TO_BTF_ID_OR_NULL, 11491 .arg1_type = ARG_PTR_TO_SOCK_COMMON, 11492 .ret_btf_id = &btf_sock_ids[BTF_SOCK_TYPE_MPTCP], 11493 }; 11494 11495 BPF_CALL_1(bpf_sock_from_file, struct file *, file) 11496 { 11497 return (unsigned long)sock_from_file(file); 11498 } 11499 11500 BTF_ID_LIST(bpf_sock_from_file_btf_ids) 11501 BTF_ID(struct, socket) 11502 BTF_ID(struct, file) 11503 11504 const struct bpf_func_proto bpf_sock_from_file_proto = { 11505 .func = bpf_sock_from_file, 11506 .gpl_only = false, 11507 .ret_type = RET_PTR_TO_BTF_ID_OR_NULL, 11508 .ret_btf_id = &bpf_sock_from_file_btf_ids[0], 11509 .arg1_type = ARG_PTR_TO_BTF_ID, 11510 .arg1_btf_id = &bpf_sock_from_file_btf_ids[1], 11511 }; 11512 11513 static const struct bpf_func_proto * 11514 bpf_sk_base_func_proto(enum bpf_func_id func_id) 11515 { 11516 const struct bpf_func_proto *func; 11517 11518 switch (func_id) { 11519 case BPF_FUNC_skc_to_tcp6_sock: 11520 func = &bpf_skc_to_tcp6_sock_proto; 11521 break; 11522 case BPF_FUNC_skc_to_tcp_sock: 11523 func = &bpf_skc_to_tcp_sock_proto; 11524 break; 11525 case BPF_FUNC_skc_to_tcp_timewait_sock: 11526 func = &bpf_skc_to_tcp_timewait_sock_proto; 11527 break; 11528 case BPF_FUNC_skc_to_tcp_request_sock: 11529 func = &bpf_skc_to_tcp_request_sock_proto; 11530 break; 11531 case BPF_FUNC_skc_to_udp6_sock: 11532 func = &bpf_skc_to_udp6_sock_proto; 11533 break; 11534 case BPF_FUNC_skc_to_unix_sock: 11535 func = &bpf_skc_to_unix_sock_proto; 11536 break; 11537 case BPF_FUNC_skc_to_mptcp_sock: 11538 func = &bpf_skc_to_mptcp_sock_proto; 11539 break; 11540 case BPF_FUNC_ktime_get_coarse_ns: 11541 return &bpf_ktime_get_coarse_ns_proto; 11542 default: 11543 return bpf_base_func_proto(func_id); 11544 } 11545 11546 if (!perfmon_capable()) 11547 return NULL; 11548 11549 return func; 11550 } 11551