1 /* 2 * Linux Socket Filter - Kernel level socket filtering 3 * 4 * Based on the design of the Berkeley Packet Filter. The new 5 * internal format has been designed by PLUMgrid: 6 * 7 * Copyright (c) 2011 - 2014 PLUMgrid, http://plumgrid.com 8 * 9 * Authors: 10 * 11 * Jay Schulist <jschlst@samba.org> 12 * Alexei Starovoitov <ast@plumgrid.com> 13 * Daniel Borkmann <dborkman@redhat.com> 14 * 15 * This program is free software; you can redistribute it and/or 16 * modify it under the terms of the GNU General Public License 17 * as published by the Free Software Foundation; either version 18 * 2 of the License, or (at your option) any later version. 19 * 20 * Andi Kleen - Fix a few bad bugs and races. 21 * Kris Katterjohn - Added many additional checks in bpf_check_classic() 22 */ 23 24 #include <linux/module.h> 25 #include <linux/types.h> 26 #include <linux/mm.h> 27 #include <linux/fcntl.h> 28 #include <linux/socket.h> 29 #include <linux/sock_diag.h> 30 #include <linux/in.h> 31 #include <linux/inet.h> 32 #include <linux/netdevice.h> 33 #include <linux/if_packet.h> 34 #include <linux/if_arp.h> 35 #include <linux/gfp.h> 36 #include <net/inet_common.h> 37 #include <net/ip.h> 38 #include <net/protocol.h> 39 #include <net/netlink.h> 40 #include <linux/skbuff.h> 41 #include <linux/skmsg.h> 42 #include <net/sock.h> 43 #include <net/flow_dissector.h> 44 #include <linux/errno.h> 45 #include <linux/timer.h> 46 #include <linux/uaccess.h> 47 #include <asm/unaligned.h> 48 #include <asm/cmpxchg.h> 49 #include <linux/filter.h> 50 #include <linux/ratelimit.h> 51 #include <linux/seccomp.h> 52 #include <linux/if_vlan.h> 53 #include <linux/bpf.h> 54 #include <net/sch_generic.h> 55 #include <net/cls_cgroup.h> 56 #include <net/dst_metadata.h> 57 #include <net/dst.h> 58 #include <net/sock_reuseport.h> 59 #include <net/busy_poll.h> 60 #include <net/tcp.h> 61 #include <net/xfrm.h> 62 #include <net/udp.h> 63 #include <linux/bpf_trace.h> 64 #include <net/xdp_sock.h> 65 #include <linux/inetdevice.h> 66 #include <net/inet_hashtables.h> 67 #include <net/inet6_hashtables.h> 68 #include <net/ip_fib.h> 69 #include <net/flow.h> 70 #include <net/arp.h> 71 #include <net/ipv6.h> 72 #include <net/net_namespace.h> 73 #include <linux/seg6_local.h> 74 #include <net/seg6.h> 75 #include <net/seg6_local.h> 76 #include <net/lwtunnel.h> 77 78 /** 79 * sk_filter_trim_cap - run a packet through a socket filter 80 * @sk: sock associated with &sk_buff 81 * @skb: buffer to filter 82 * @cap: limit on how short the eBPF program may trim the packet 83 * 84 * Run the eBPF program and then cut skb->data to correct size returned by 85 * the program. If pkt_len is 0 we toss packet. If skb->len is smaller 86 * than pkt_len we keep whole skb->data. This is the socket level 87 * wrapper to BPF_PROG_RUN. It returns 0 if the packet should 88 * be accepted or -EPERM if the packet should be tossed. 89 * 90 */ 91 int sk_filter_trim_cap(struct sock *sk, struct sk_buff *skb, unsigned int cap) 92 { 93 int err; 94 struct sk_filter *filter; 95 96 /* 97 * If the skb was allocated from pfmemalloc reserves, only 98 * allow SOCK_MEMALLOC sockets to use it as this socket is 99 * helping free memory 100 */ 101 if (skb_pfmemalloc(skb) && !sock_flag(sk, SOCK_MEMALLOC)) { 102 NET_INC_STATS(sock_net(sk), LINUX_MIB_PFMEMALLOCDROP); 103 return -ENOMEM; 104 } 105 err = BPF_CGROUP_RUN_PROG_INET_INGRESS(sk, skb); 106 if (err) 107 return err; 108 109 err = security_sock_rcv_skb(sk, skb); 110 if (err) 111 return err; 112 113 rcu_read_lock(); 114 filter = rcu_dereference(sk->sk_filter); 115 if (filter) { 116 struct sock *save_sk = skb->sk; 117 unsigned int pkt_len; 118 119 skb->sk = sk; 120 pkt_len = bpf_prog_run_save_cb(filter->prog, skb); 121 skb->sk = save_sk; 122 err = pkt_len ? pskb_trim(skb, max(cap, pkt_len)) : -EPERM; 123 } 124 rcu_read_unlock(); 125 126 return err; 127 } 128 EXPORT_SYMBOL(sk_filter_trim_cap); 129 130 BPF_CALL_1(bpf_skb_get_pay_offset, struct sk_buff *, skb) 131 { 132 return skb_get_poff(skb); 133 } 134 135 BPF_CALL_3(bpf_skb_get_nlattr, struct sk_buff *, skb, u32, a, u32, x) 136 { 137 struct nlattr *nla; 138 139 if (skb_is_nonlinear(skb)) 140 return 0; 141 142 if (skb->len < sizeof(struct nlattr)) 143 return 0; 144 145 if (a > skb->len - sizeof(struct nlattr)) 146 return 0; 147 148 nla = nla_find((struct nlattr *) &skb->data[a], skb->len - a, x); 149 if (nla) 150 return (void *) nla - (void *) skb->data; 151 152 return 0; 153 } 154 155 BPF_CALL_3(bpf_skb_get_nlattr_nest, struct sk_buff *, skb, u32, a, u32, x) 156 { 157 struct nlattr *nla; 158 159 if (skb_is_nonlinear(skb)) 160 return 0; 161 162 if (skb->len < sizeof(struct nlattr)) 163 return 0; 164 165 if (a > skb->len - sizeof(struct nlattr)) 166 return 0; 167 168 nla = (struct nlattr *) &skb->data[a]; 169 if (nla->nla_len > skb->len - a) 170 return 0; 171 172 nla = nla_find_nested(nla, x); 173 if (nla) 174 return (void *) nla - (void *) skb->data; 175 176 return 0; 177 } 178 179 BPF_CALL_4(bpf_skb_load_helper_8, const struct sk_buff *, skb, const void *, 180 data, int, headlen, int, offset) 181 { 182 u8 tmp, *ptr; 183 const int len = sizeof(tmp); 184 185 if (offset >= 0) { 186 if (headlen - offset >= len) 187 return *(u8 *)(data + offset); 188 if (!skb_copy_bits(skb, offset, &tmp, sizeof(tmp))) 189 return tmp; 190 } else { 191 ptr = bpf_internal_load_pointer_neg_helper(skb, offset, len); 192 if (likely(ptr)) 193 return *(u8 *)ptr; 194 } 195 196 return -EFAULT; 197 } 198 199 BPF_CALL_2(bpf_skb_load_helper_8_no_cache, const struct sk_buff *, skb, 200 int, offset) 201 { 202 return ____bpf_skb_load_helper_8(skb, skb->data, skb->len - skb->data_len, 203 offset); 204 } 205 206 BPF_CALL_4(bpf_skb_load_helper_16, const struct sk_buff *, skb, const void *, 207 data, int, headlen, int, offset) 208 { 209 u16 tmp, *ptr; 210 const int len = sizeof(tmp); 211 212 if (offset >= 0) { 213 if (headlen - offset >= len) 214 return get_unaligned_be16(data + offset); 215 if (!skb_copy_bits(skb, offset, &tmp, sizeof(tmp))) 216 return be16_to_cpu(tmp); 217 } else { 218 ptr = bpf_internal_load_pointer_neg_helper(skb, offset, len); 219 if (likely(ptr)) 220 return get_unaligned_be16(ptr); 221 } 222 223 return -EFAULT; 224 } 225 226 BPF_CALL_2(bpf_skb_load_helper_16_no_cache, const struct sk_buff *, skb, 227 int, offset) 228 { 229 return ____bpf_skb_load_helper_16(skb, skb->data, skb->len - skb->data_len, 230 offset); 231 } 232 233 BPF_CALL_4(bpf_skb_load_helper_32, const struct sk_buff *, skb, const void *, 234 data, int, headlen, int, offset) 235 { 236 u32 tmp, *ptr; 237 const int len = sizeof(tmp); 238 239 if (likely(offset >= 0)) { 240 if (headlen - offset >= len) 241 return get_unaligned_be32(data + offset); 242 if (!skb_copy_bits(skb, offset, &tmp, sizeof(tmp))) 243 return be32_to_cpu(tmp); 244 } else { 245 ptr = bpf_internal_load_pointer_neg_helper(skb, offset, len); 246 if (likely(ptr)) 247 return get_unaligned_be32(ptr); 248 } 249 250 return -EFAULT; 251 } 252 253 BPF_CALL_2(bpf_skb_load_helper_32_no_cache, const struct sk_buff *, skb, 254 int, offset) 255 { 256 return ____bpf_skb_load_helper_32(skb, skb->data, skb->len - skb->data_len, 257 offset); 258 } 259 260 BPF_CALL_0(bpf_get_raw_cpu_id) 261 { 262 return raw_smp_processor_id(); 263 } 264 265 static const struct bpf_func_proto bpf_get_raw_smp_processor_id_proto = { 266 .func = bpf_get_raw_cpu_id, 267 .gpl_only = false, 268 .ret_type = RET_INTEGER, 269 }; 270 271 static u32 convert_skb_access(int skb_field, int dst_reg, int src_reg, 272 struct bpf_insn *insn_buf) 273 { 274 struct bpf_insn *insn = insn_buf; 275 276 switch (skb_field) { 277 case SKF_AD_MARK: 278 BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, mark) != 4); 279 280 *insn++ = BPF_LDX_MEM(BPF_W, dst_reg, src_reg, 281 offsetof(struct sk_buff, mark)); 282 break; 283 284 case SKF_AD_PKTTYPE: 285 *insn++ = BPF_LDX_MEM(BPF_B, dst_reg, src_reg, PKT_TYPE_OFFSET()); 286 *insn++ = BPF_ALU32_IMM(BPF_AND, dst_reg, PKT_TYPE_MAX); 287 #ifdef __BIG_ENDIAN_BITFIELD 288 *insn++ = BPF_ALU32_IMM(BPF_RSH, dst_reg, 5); 289 #endif 290 break; 291 292 case SKF_AD_QUEUE: 293 BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, queue_mapping) != 2); 294 295 *insn++ = BPF_LDX_MEM(BPF_H, dst_reg, src_reg, 296 offsetof(struct sk_buff, queue_mapping)); 297 break; 298 299 case SKF_AD_VLAN_TAG: 300 BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, vlan_tci) != 2); 301 302 /* dst_reg = *(u16 *) (src_reg + offsetof(vlan_tci)) */ 303 *insn++ = BPF_LDX_MEM(BPF_H, dst_reg, src_reg, 304 offsetof(struct sk_buff, vlan_tci)); 305 break; 306 case SKF_AD_VLAN_TAG_PRESENT: 307 *insn++ = BPF_LDX_MEM(BPF_B, dst_reg, src_reg, PKT_VLAN_PRESENT_OFFSET()); 308 if (PKT_VLAN_PRESENT_BIT) 309 *insn++ = BPF_ALU32_IMM(BPF_RSH, dst_reg, PKT_VLAN_PRESENT_BIT); 310 if (PKT_VLAN_PRESENT_BIT < 7) 311 *insn++ = BPF_ALU32_IMM(BPF_AND, dst_reg, 1); 312 break; 313 } 314 315 return insn - insn_buf; 316 } 317 318 static bool convert_bpf_extensions(struct sock_filter *fp, 319 struct bpf_insn **insnp) 320 { 321 struct bpf_insn *insn = *insnp; 322 u32 cnt; 323 324 switch (fp->k) { 325 case SKF_AD_OFF + SKF_AD_PROTOCOL: 326 BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, protocol) != 2); 327 328 /* A = *(u16 *) (CTX + offsetof(protocol)) */ 329 *insn++ = BPF_LDX_MEM(BPF_H, BPF_REG_A, BPF_REG_CTX, 330 offsetof(struct sk_buff, protocol)); 331 /* A = ntohs(A) [emitting a nop or swap16] */ 332 *insn = BPF_ENDIAN(BPF_FROM_BE, BPF_REG_A, 16); 333 break; 334 335 case SKF_AD_OFF + SKF_AD_PKTTYPE: 336 cnt = convert_skb_access(SKF_AD_PKTTYPE, BPF_REG_A, BPF_REG_CTX, insn); 337 insn += cnt - 1; 338 break; 339 340 case SKF_AD_OFF + SKF_AD_IFINDEX: 341 case SKF_AD_OFF + SKF_AD_HATYPE: 342 BUILD_BUG_ON(FIELD_SIZEOF(struct net_device, ifindex) != 4); 343 BUILD_BUG_ON(FIELD_SIZEOF(struct net_device, type) != 2); 344 345 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, dev), 346 BPF_REG_TMP, BPF_REG_CTX, 347 offsetof(struct sk_buff, dev)); 348 /* if (tmp != 0) goto pc + 1 */ 349 *insn++ = BPF_JMP_IMM(BPF_JNE, BPF_REG_TMP, 0, 1); 350 *insn++ = BPF_EXIT_INSN(); 351 if (fp->k == SKF_AD_OFF + SKF_AD_IFINDEX) 352 *insn = BPF_LDX_MEM(BPF_W, BPF_REG_A, BPF_REG_TMP, 353 offsetof(struct net_device, ifindex)); 354 else 355 *insn = BPF_LDX_MEM(BPF_H, BPF_REG_A, BPF_REG_TMP, 356 offsetof(struct net_device, type)); 357 break; 358 359 case SKF_AD_OFF + SKF_AD_MARK: 360 cnt = convert_skb_access(SKF_AD_MARK, BPF_REG_A, BPF_REG_CTX, insn); 361 insn += cnt - 1; 362 break; 363 364 case SKF_AD_OFF + SKF_AD_RXHASH: 365 BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, hash) != 4); 366 367 *insn = BPF_LDX_MEM(BPF_W, BPF_REG_A, BPF_REG_CTX, 368 offsetof(struct sk_buff, hash)); 369 break; 370 371 case SKF_AD_OFF + SKF_AD_QUEUE: 372 cnt = convert_skb_access(SKF_AD_QUEUE, BPF_REG_A, BPF_REG_CTX, insn); 373 insn += cnt - 1; 374 break; 375 376 case SKF_AD_OFF + SKF_AD_VLAN_TAG: 377 cnt = convert_skb_access(SKF_AD_VLAN_TAG, 378 BPF_REG_A, BPF_REG_CTX, insn); 379 insn += cnt - 1; 380 break; 381 382 case SKF_AD_OFF + SKF_AD_VLAN_TAG_PRESENT: 383 cnt = convert_skb_access(SKF_AD_VLAN_TAG_PRESENT, 384 BPF_REG_A, BPF_REG_CTX, insn); 385 insn += cnt - 1; 386 break; 387 388 case SKF_AD_OFF + SKF_AD_VLAN_TPID: 389 BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, vlan_proto) != 2); 390 391 /* A = *(u16 *) (CTX + offsetof(vlan_proto)) */ 392 *insn++ = BPF_LDX_MEM(BPF_H, BPF_REG_A, BPF_REG_CTX, 393 offsetof(struct sk_buff, vlan_proto)); 394 /* A = ntohs(A) [emitting a nop or swap16] */ 395 *insn = BPF_ENDIAN(BPF_FROM_BE, BPF_REG_A, 16); 396 break; 397 398 case SKF_AD_OFF + SKF_AD_PAY_OFFSET: 399 case SKF_AD_OFF + SKF_AD_NLATTR: 400 case SKF_AD_OFF + SKF_AD_NLATTR_NEST: 401 case SKF_AD_OFF + SKF_AD_CPU: 402 case SKF_AD_OFF + SKF_AD_RANDOM: 403 /* arg1 = CTX */ 404 *insn++ = BPF_MOV64_REG(BPF_REG_ARG1, BPF_REG_CTX); 405 /* arg2 = A */ 406 *insn++ = BPF_MOV64_REG(BPF_REG_ARG2, BPF_REG_A); 407 /* arg3 = X */ 408 *insn++ = BPF_MOV64_REG(BPF_REG_ARG3, BPF_REG_X); 409 /* Emit call(arg1=CTX, arg2=A, arg3=X) */ 410 switch (fp->k) { 411 case SKF_AD_OFF + SKF_AD_PAY_OFFSET: 412 *insn = BPF_EMIT_CALL(bpf_skb_get_pay_offset); 413 break; 414 case SKF_AD_OFF + SKF_AD_NLATTR: 415 *insn = BPF_EMIT_CALL(bpf_skb_get_nlattr); 416 break; 417 case SKF_AD_OFF + SKF_AD_NLATTR_NEST: 418 *insn = BPF_EMIT_CALL(bpf_skb_get_nlattr_nest); 419 break; 420 case SKF_AD_OFF + SKF_AD_CPU: 421 *insn = BPF_EMIT_CALL(bpf_get_raw_cpu_id); 422 break; 423 case SKF_AD_OFF + SKF_AD_RANDOM: 424 *insn = BPF_EMIT_CALL(bpf_user_rnd_u32); 425 bpf_user_rnd_init_once(); 426 break; 427 } 428 break; 429 430 case SKF_AD_OFF + SKF_AD_ALU_XOR_X: 431 /* A ^= X */ 432 *insn = BPF_ALU32_REG(BPF_XOR, BPF_REG_A, BPF_REG_X); 433 break; 434 435 default: 436 /* This is just a dummy call to avoid letting the compiler 437 * evict __bpf_call_base() as an optimization. Placed here 438 * where no-one bothers. 439 */ 440 BUG_ON(__bpf_call_base(0, 0, 0, 0, 0) != 0); 441 return false; 442 } 443 444 *insnp = insn; 445 return true; 446 } 447 448 static bool convert_bpf_ld_abs(struct sock_filter *fp, struct bpf_insn **insnp) 449 { 450 const bool unaligned_ok = IS_BUILTIN(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS); 451 int size = bpf_size_to_bytes(BPF_SIZE(fp->code)); 452 bool endian = BPF_SIZE(fp->code) == BPF_H || 453 BPF_SIZE(fp->code) == BPF_W; 454 bool indirect = BPF_MODE(fp->code) == BPF_IND; 455 const int ip_align = NET_IP_ALIGN; 456 struct bpf_insn *insn = *insnp; 457 int offset = fp->k; 458 459 if (!indirect && 460 ((unaligned_ok && offset >= 0) || 461 (!unaligned_ok && offset >= 0 && 462 offset + ip_align >= 0 && 463 offset + ip_align % size == 0))) { 464 bool ldx_off_ok = offset <= S16_MAX; 465 466 *insn++ = BPF_MOV64_REG(BPF_REG_TMP, BPF_REG_H); 467 if (offset) 468 *insn++ = BPF_ALU64_IMM(BPF_SUB, BPF_REG_TMP, offset); 469 *insn++ = BPF_JMP_IMM(BPF_JSLT, BPF_REG_TMP, 470 size, 2 + endian + (!ldx_off_ok * 2)); 471 if (ldx_off_ok) { 472 *insn++ = BPF_LDX_MEM(BPF_SIZE(fp->code), BPF_REG_A, 473 BPF_REG_D, offset); 474 } else { 475 *insn++ = BPF_MOV64_REG(BPF_REG_TMP, BPF_REG_D); 476 *insn++ = BPF_ALU64_IMM(BPF_ADD, BPF_REG_TMP, offset); 477 *insn++ = BPF_LDX_MEM(BPF_SIZE(fp->code), BPF_REG_A, 478 BPF_REG_TMP, 0); 479 } 480 if (endian) 481 *insn++ = BPF_ENDIAN(BPF_FROM_BE, BPF_REG_A, size * 8); 482 *insn++ = BPF_JMP_A(8); 483 } 484 485 *insn++ = BPF_MOV64_REG(BPF_REG_ARG1, BPF_REG_CTX); 486 *insn++ = BPF_MOV64_REG(BPF_REG_ARG2, BPF_REG_D); 487 *insn++ = BPF_MOV64_REG(BPF_REG_ARG3, BPF_REG_H); 488 if (!indirect) { 489 *insn++ = BPF_MOV64_IMM(BPF_REG_ARG4, offset); 490 } else { 491 *insn++ = BPF_MOV64_REG(BPF_REG_ARG4, BPF_REG_X); 492 if (fp->k) 493 *insn++ = BPF_ALU64_IMM(BPF_ADD, BPF_REG_ARG4, offset); 494 } 495 496 switch (BPF_SIZE(fp->code)) { 497 case BPF_B: 498 *insn++ = BPF_EMIT_CALL(bpf_skb_load_helper_8); 499 break; 500 case BPF_H: 501 *insn++ = BPF_EMIT_CALL(bpf_skb_load_helper_16); 502 break; 503 case BPF_W: 504 *insn++ = BPF_EMIT_CALL(bpf_skb_load_helper_32); 505 break; 506 default: 507 return false; 508 } 509 510 *insn++ = BPF_JMP_IMM(BPF_JSGE, BPF_REG_A, 0, 2); 511 *insn++ = BPF_ALU32_REG(BPF_XOR, BPF_REG_A, BPF_REG_A); 512 *insn = BPF_EXIT_INSN(); 513 514 *insnp = insn; 515 return true; 516 } 517 518 /** 519 * bpf_convert_filter - convert filter program 520 * @prog: the user passed filter program 521 * @len: the length of the user passed filter program 522 * @new_prog: allocated 'struct bpf_prog' or NULL 523 * @new_len: pointer to store length of converted program 524 * @seen_ld_abs: bool whether we've seen ld_abs/ind 525 * 526 * Remap 'sock_filter' style classic BPF (cBPF) instruction set to 'bpf_insn' 527 * style extended BPF (eBPF). 528 * Conversion workflow: 529 * 530 * 1) First pass for calculating the new program length: 531 * bpf_convert_filter(old_prog, old_len, NULL, &new_len, &seen_ld_abs) 532 * 533 * 2) 2nd pass to remap in two passes: 1st pass finds new 534 * jump offsets, 2nd pass remapping: 535 * bpf_convert_filter(old_prog, old_len, new_prog, &new_len, &seen_ld_abs) 536 */ 537 static int bpf_convert_filter(struct sock_filter *prog, int len, 538 struct bpf_prog *new_prog, int *new_len, 539 bool *seen_ld_abs) 540 { 541 int new_flen = 0, pass = 0, target, i, stack_off; 542 struct bpf_insn *new_insn, *first_insn = NULL; 543 struct sock_filter *fp; 544 int *addrs = NULL; 545 u8 bpf_src; 546 547 BUILD_BUG_ON(BPF_MEMWORDS * sizeof(u32) > MAX_BPF_STACK); 548 BUILD_BUG_ON(BPF_REG_FP + 1 != MAX_BPF_REG); 549 550 if (len <= 0 || len > BPF_MAXINSNS) 551 return -EINVAL; 552 553 if (new_prog) { 554 first_insn = new_prog->insnsi; 555 addrs = kcalloc(len, sizeof(*addrs), 556 GFP_KERNEL | __GFP_NOWARN); 557 if (!addrs) 558 return -ENOMEM; 559 } 560 561 do_pass: 562 new_insn = first_insn; 563 fp = prog; 564 565 /* Classic BPF related prologue emission. */ 566 if (new_prog) { 567 /* Classic BPF expects A and X to be reset first. These need 568 * to be guaranteed to be the first two instructions. 569 */ 570 *new_insn++ = BPF_ALU32_REG(BPF_XOR, BPF_REG_A, BPF_REG_A); 571 *new_insn++ = BPF_ALU32_REG(BPF_XOR, BPF_REG_X, BPF_REG_X); 572 573 /* All programs must keep CTX in callee saved BPF_REG_CTX. 574 * In eBPF case it's done by the compiler, here we need to 575 * do this ourself. Initial CTX is present in BPF_REG_ARG1. 576 */ 577 *new_insn++ = BPF_MOV64_REG(BPF_REG_CTX, BPF_REG_ARG1); 578 if (*seen_ld_abs) { 579 /* For packet access in classic BPF, cache skb->data 580 * in callee-saved BPF R8 and skb->len - skb->data_len 581 * (headlen) in BPF R9. Since classic BPF is read-only 582 * on CTX, we only need to cache it once. 583 */ 584 *new_insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, data), 585 BPF_REG_D, BPF_REG_CTX, 586 offsetof(struct sk_buff, data)); 587 *new_insn++ = BPF_LDX_MEM(BPF_W, BPF_REG_H, BPF_REG_CTX, 588 offsetof(struct sk_buff, len)); 589 *new_insn++ = BPF_LDX_MEM(BPF_W, BPF_REG_TMP, BPF_REG_CTX, 590 offsetof(struct sk_buff, data_len)); 591 *new_insn++ = BPF_ALU32_REG(BPF_SUB, BPF_REG_H, BPF_REG_TMP); 592 } 593 } else { 594 new_insn += 3; 595 } 596 597 for (i = 0; i < len; fp++, i++) { 598 struct bpf_insn tmp_insns[32] = { }; 599 struct bpf_insn *insn = tmp_insns; 600 601 if (addrs) 602 addrs[i] = new_insn - first_insn; 603 604 switch (fp->code) { 605 /* All arithmetic insns and skb loads map as-is. */ 606 case BPF_ALU | BPF_ADD | BPF_X: 607 case BPF_ALU | BPF_ADD | BPF_K: 608 case BPF_ALU | BPF_SUB | BPF_X: 609 case BPF_ALU | BPF_SUB | BPF_K: 610 case BPF_ALU | BPF_AND | BPF_X: 611 case BPF_ALU | BPF_AND | BPF_K: 612 case BPF_ALU | BPF_OR | BPF_X: 613 case BPF_ALU | BPF_OR | BPF_K: 614 case BPF_ALU | BPF_LSH | BPF_X: 615 case BPF_ALU | BPF_LSH | BPF_K: 616 case BPF_ALU | BPF_RSH | BPF_X: 617 case BPF_ALU | BPF_RSH | BPF_K: 618 case BPF_ALU | BPF_XOR | BPF_X: 619 case BPF_ALU | BPF_XOR | BPF_K: 620 case BPF_ALU | BPF_MUL | BPF_X: 621 case BPF_ALU | BPF_MUL | BPF_K: 622 case BPF_ALU | BPF_DIV | BPF_X: 623 case BPF_ALU | BPF_DIV | BPF_K: 624 case BPF_ALU | BPF_MOD | BPF_X: 625 case BPF_ALU | BPF_MOD | BPF_K: 626 case BPF_ALU | BPF_NEG: 627 case BPF_LD | BPF_ABS | BPF_W: 628 case BPF_LD | BPF_ABS | BPF_H: 629 case BPF_LD | BPF_ABS | BPF_B: 630 case BPF_LD | BPF_IND | BPF_W: 631 case BPF_LD | BPF_IND | BPF_H: 632 case BPF_LD | BPF_IND | BPF_B: 633 /* Check for overloaded BPF extension and 634 * directly convert it if found, otherwise 635 * just move on with mapping. 636 */ 637 if (BPF_CLASS(fp->code) == BPF_LD && 638 BPF_MODE(fp->code) == BPF_ABS && 639 convert_bpf_extensions(fp, &insn)) 640 break; 641 if (BPF_CLASS(fp->code) == BPF_LD && 642 convert_bpf_ld_abs(fp, &insn)) { 643 *seen_ld_abs = true; 644 break; 645 } 646 647 if (fp->code == (BPF_ALU | BPF_DIV | BPF_X) || 648 fp->code == (BPF_ALU | BPF_MOD | BPF_X)) { 649 *insn++ = BPF_MOV32_REG(BPF_REG_X, BPF_REG_X); 650 /* Error with exception code on div/mod by 0. 651 * For cBPF programs, this was always return 0. 652 */ 653 *insn++ = BPF_JMP_IMM(BPF_JNE, BPF_REG_X, 0, 2); 654 *insn++ = BPF_ALU32_REG(BPF_XOR, BPF_REG_A, BPF_REG_A); 655 *insn++ = BPF_EXIT_INSN(); 656 } 657 658 *insn = BPF_RAW_INSN(fp->code, BPF_REG_A, BPF_REG_X, 0, fp->k); 659 break; 660 661 /* Jump transformation cannot use BPF block macros 662 * everywhere as offset calculation and target updates 663 * require a bit more work than the rest, i.e. jump 664 * opcodes map as-is, but offsets need adjustment. 665 */ 666 667 #define BPF_EMIT_JMP \ 668 do { \ 669 const s32 off_min = S16_MIN, off_max = S16_MAX; \ 670 s32 off; \ 671 \ 672 if (target >= len || target < 0) \ 673 goto err; \ 674 off = addrs ? addrs[target] - addrs[i] - 1 : 0; \ 675 /* Adjust pc relative offset for 2nd or 3rd insn. */ \ 676 off -= insn - tmp_insns; \ 677 /* Reject anything not fitting into insn->off. */ \ 678 if (off < off_min || off > off_max) \ 679 goto err; \ 680 insn->off = off; \ 681 } while (0) 682 683 case BPF_JMP | BPF_JA: 684 target = i + fp->k + 1; 685 insn->code = fp->code; 686 BPF_EMIT_JMP; 687 break; 688 689 case BPF_JMP | BPF_JEQ | BPF_K: 690 case BPF_JMP | BPF_JEQ | BPF_X: 691 case BPF_JMP | BPF_JSET | BPF_K: 692 case BPF_JMP | BPF_JSET | BPF_X: 693 case BPF_JMP | BPF_JGT | BPF_K: 694 case BPF_JMP | BPF_JGT | BPF_X: 695 case BPF_JMP | BPF_JGE | BPF_K: 696 case BPF_JMP | BPF_JGE | BPF_X: 697 if (BPF_SRC(fp->code) == BPF_K && (int) fp->k < 0) { 698 /* BPF immediates are signed, zero extend 699 * immediate into tmp register and use it 700 * in compare insn. 701 */ 702 *insn++ = BPF_MOV32_IMM(BPF_REG_TMP, fp->k); 703 704 insn->dst_reg = BPF_REG_A; 705 insn->src_reg = BPF_REG_TMP; 706 bpf_src = BPF_X; 707 } else { 708 insn->dst_reg = BPF_REG_A; 709 insn->imm = fp->k; 710 bpf_src = BPF_SRC(fp->code); 711 insn->src_reg = bpf_src == BPF_X ? BPF_REG_X : 0; 712 } 713 714 /* Common case where 'jump_false' is next insn. */ 715 if (fp->jf == 0) { 716 insn->code = BPF_JMP | BPF_OP(fp->code) | bpf_src; 717 target = i + fp->jt + 1; 718 BPF_EMIT_JMP; 719 break; 720 } 721 722 /* Convert some jumps when 'jump_true' is next insn. */ 723 if (fp->jt == 0) { 724 switch (BPF_OP(fp->code)) { 725 case BPF_JEQ: 726 insn->code = BPF_JMP | BPF_JNE | bpf_src; 727 break; 728 case BPF_JGT: 729 insn->code = BPF_JMP | BPF_JLE | bpf_src; 730 break; 731 case BPF_JGE: 732 insn->code = BPF_JMP | BPF_JLT | bpf_src; 733 break; 734 default: 735 goto jmp_rest; 736 } 737 738 target = i + fp->jf + 1; 739 BPF_EMIT_JMP; 740 break; 741 } 742 jmp_rest: 743 /* Other jumps are mapped into two insns: Jxx and JA. */ 744 target = i + fp->jt + 1; 745 insn->code = BPF_JMP | BPF_OP(fp->code) | bpf_src; 746 BPF_EMIT_JMP; 747 insn++; 748 749 insn->code = BPF_JMP | BPF_JA; 750 target = i + fp->jf + 1; 751 BPF_EMIT_JMP; 752 break; 753 754 /* ldxb 4 * ([14] & 0xf) is remaped into 6 insns. */ 755 case BPF_LDX | BPF_MSH | BPF_B: { 756 struct sock_filter tmp = { 757 .code = BPF_LD | BPF_ABS | BPF_B, 758 .k = fp->k, 759 }; 760 761 *seen_ld_abs = true; 762 763 /* X = A */ 764 *insn++ = BPF_MOV64_REG(BPF_REG_X, BPF_REG_A); 765 /* A = BPF_R0 = *(u8 *) (skb->data + K) */ 766 convert_bpf_ld_abs(&tmp, &insn); 767 insn++; 768 /* A &= 0xf */ 769 *insn++ = BPF_ALU32_IMM(BPF_AND, BPF_REG_A, 0xf); 770 /* A <<= 2 */ 771 *insn++ = BPF_ALU32_IMM(BPF_LSH, BPF_REG_A, 2); 772 /* tmp = X */ 773 *insn++ = BPF_MOV64_REG(BPF_REG_TMP, BPF_REG_X); 774 /* X = A */ 775 *insn++ = BPF_MOV64_REG(BPF_REG_X, BPF_REG_A); 776 /* A = tmp */ 777 *insn = BPF_MOV64_REG(BPF_REG_A, BPF_REG_TMP); 778 break; 779 } 780 /* RET_K is remaped into 2 insns. RET_A case doesn't need an 781 * extra mov as BPF_REG_0 is already mapped into BPF_REG_A. 782 */ 783 case BPF_RET | BPF_A: 784 case BPF_RET | BPF_K: 785 if (BPF_RVAL(fp->code) == BPF_K) 786 *insn++ = BPF_MOV32_RAW(BPF_K, BPF_REG_0, 787 0, fp->k); 788 *insn = BPF_EXIT_INSN(); 789 break; 790 791 /* Store to stack. */ 792 case BPF_ST: 793 case BPF_STX: 794 stack_off = fp->k * 4 + 4; 795 *insn = BPF_STX_MEM(BPF_W, BPF_REG_FP, BPF_CLASS(fp->code) == 796 BPF_ST ? BPF_REG_A : BPF_REG_X, 797 -stack_off); 798 /* check_load_and_stores() verifies that classic BPF can 799 * load from stack only after write, so tracking 800 * stack_depth for ST|STX insns is enough 801 */ 802 if (new_prog && new_prog->aux->stack_depth < stack_off) 803 new_prog->aux->stack_depth = stack_off; 804 break; 805 806 /* Load from stack. */ 807 case BPF_LD | BPF_MEM: 808 case BPF_LDX | BPF_MEM: 809 stack_off = fp->k * 4 + 4; 810 *insn = BPF_LDX_MEM(BPF_W, BPF_CLASS(fp->code) == BPF_LD ? 811 BPF_REG_A : BPF_REG_X, BPF_REG_FP, 812 -stack_off); 813 break; 814 815 /* A = K or X = K */ 816 case BPF_LD | BPF_IMM: 817 case BPF_LDX | BPF_IMM: 818 *insn = BPF_MOV32_IMM(BPF_CLASS(fp->code) == BPF_LD ? 819 BPF_REG_A : BPF_REG_X, fp->k); 820 break; 821 822 /* X = A */ 823 case BPF_MISC | BPF_TAX: 824 *insn = BPF_MOV64_REG(BPF_REG_X, BPF_REG_A); 825 break; 826 827 /* A = X */ 828 case BPF_MISC | BPF_TXA: 829 *insn = BPF_MOV64_REG(BPF_REG_A, BPF_REG_X); 830 break; 831 832 /* A = skb->len or X = skb->len */ 833 case BPF_LD | BPF_W | BPF_LEN: 834 case BPF_LDX | BPF_W | BPF_LEN: 835 *insn = BPF_LDX_MEM(BPF_W, BPF_CLASS(fp->code) == BPF_LD ? 836 BPF_REG_A : BPF_REG_X, BPF_REG_CTX, 837 offsetof(struct sk_buff, len)); 838 break; 839 840 /* Access seccomp_data fields. */ 841 case BPF_LDX | BPF_ABS | BPF_W: 842 /* A = *(u32 *) (ctx + K) */ 843 *insn = BPF_LDX_MEM(BPF_W, BPF_REG_A, BPF_REG_CTX, fp->k); 844 break; 845 846 /* Unknown instruction. */ 847 default: 848 goto err; 849 } 850 851 insn++; 852 if (new_prog) 853 memcpy(new_insn, tmp_insns, 854 sizeof(*insn) * (insn - tmp_insns)); 855 new_insn += insn - tmp_insns; 856 } 857 858 if (!new_prog) { 859 /* Only calculating new length. */ 860 *new_len = new_insn - first_insn; 861 if (*seen_ld_abs) 862 *new_len += 4; /* Prologue bits. */ 863 return 0; 864 } 865 866 pass++; 867 if (new_flen != new_insn - first_insn) { 868 new_flen = new_insn - first_insn; 869 if (pass > 2) 870 goto err; 871 goto do_pass; 872 } 873 874 kfree(addrs); 875 BUG_ON(*new_len != new_flen); 876 return 0; 877 err: 878 kfree(addrs); 879 return -EINVAL; 880 } 881 882 /* Security: 883 * 884 * As we dont want to clear mem[] array for each packet going through 885 * __bpf_prog_run(), we check that filter loaded by user never try to read 886 * a cell if not previously written, and we check all branches to be sure 887 * a malicious user doesn't try to abuse us. 888 */ 889 static int check_load_and_stores(const struct sock_filter *filter, int flen) 890 { 891 u16 *masks, memvalid = 0; /* One bit per cell, 16 cells */ 892 int pc, ret = 0; 893 894 BUILD_BUG_ON(BPF_MEMWORDS > 16); 895 896 masks = kmalloc_array(flen, sizeof(*masks), GFP_KERNEL); 897 if (!masks) 898 return -ENOMEM; 899 900 memset(masks, 0xff, flen * sizeof(*masks)); 901 902 for (pc = 0; pc < flen; pc++) { 903 memvalid &= masks[pc]; 904 905 switch (filter[pc].code) { 906 case BPF_ST: 907 case BPF_STX: 908 memvalid |= (1 << filter[pc].k); 909 break; 910 case BPF_LD | BPF_MEM: 911 case BPF_LDX | BPF_MEM: 912 if (!(memvalid & (1 << filter[pc].k))) { 913 ret = -EINVAL; 914 goto error; 915 } 916 break; 917 case BPF_JMP | BPF_JA: 918 /* A jump must set masks on target */ 919 masks[pc + 1 + filter[pc].k] &= memvalid; 920 memvalid = ~0; 921 break; 922 case BPF_JMP | BPF_JEQ | BPF_K: 923 case BPF_JMP | BPF_JEQ | BPF_X: 924 case BPF_JMP | BPF_JGE | BPF_K: 925 case BPF_JMP | BPF_JGE | BPF_X: 926 case BPF_JMP | BPF_JGT | BPF_K: 927 case BPF_JMP | BPF_JGT | BPF_X: 928 case BPF_JMP | BPF_JSET | BPF_K: 929 case BPF_JMP | BPF_JSET | BPF_X: 930 /* A jump must set masks on targets */ 931 masks[pc + 1 + filter[pc].jt] &= memvalid; 932 masks[pc + 1 + filter[pc].jf] &= memvalid; 933 memvalid = ~0; 934 break; 935 } 936 } 937 error: 938 kfree(masks); 939 return ret; 940 } 941 942 static bool chk_code_allowed(u16 code_to_probe) 943 { 944 static const bool codes[] = { 945 /* 32 bit ALU operations */ 946 [BPF_ALU | BPF_ADD | BPF_K] = true, 947 [BPF_ALU | BPF_ADD | BPF_X] = true, 948 [BPF_ALU | BPF_SUB | BPF_K] = true, 949 [BPF_ALU | BPF_SUB | BPF_X] = true, 950 [BPF_ALU | BPF_MUL | BPF_K] = true, 951 [BPF_ALU | BPF_MUL | BPF_X] = true, 952 [BPF_ALU | BPF_DIV | BPF_K] = true, 953 [BPF_ALU | BPF_DIV | BPF_X] = true, 954 [BPF_ALU | BPF_MOD | BPF_K] = true, 955 [BPF_ALU | BPF_MOD | BPF_X] = true, 956 [BPF_ALU | BPF_AND | BPF_K] = true, 957 [BPF_ALU | BPF_AND | BPF_X] = true, 958 [BPF_ALU | BPF_OR | BPF_K] = true, 959 [BPF_ALU | BPF_OR | BPF_X] = true, 960 [BPF_ALU | BPF_XOR | BPF_K] = true, 961 [BPF_ALU | BPF_XOR | BPF_X] = true, 962 [BPF_ALU | BPF_LSH | BPF_K] = true, 963 [BPF_ALU | BPF_LSH | BPF_X] = true, 964 [BPF_ALU | BPF_RSH | BPF_K] = true, 965 [BPF_ALU | BPF_RSH | BPF_X] = true, 966 [BPF_ALU | BPF_NEG] = true, 967 /* Load instructions */ 968 [BPF_LD | BPF_W | BPF_ABS] = true, 969 [BPF_LD | BPF_H | BPF_ABS] = true, 970 [BPF_LD | BPF_B | BPF_ABS] = true, 971 [BPF_LD | BPF_W | BPF_LEN] = true, 972 [BPF_LD | BPF_W | BPF_IND] = true, 973 [BPF_LD | BPF_H | BPF_IND] = true, 974 [BPF_LD | BPF_B | BPF_IND] = true, 975 [BPF_LD | BPF_IMM] = true, 976 [BPF_LD | BPF_MEM] = true, 977 [BPF_LDX | BPF_W | BPF_LEN] = true, 978 [BPF_LDX | BPF_B | BPF_MSH] = true, 979 [BPF_LDX | BPF_IMM] = true, 980 [BPF_LDX | BPF_MEM] = true, 981 /* Store instructions */ 982 [BPF_ST] = true, 983 [BPF_STX] = true, 984 /* Misc instructions */ 985 [BPF_MISC | BPF_TAX] = true, 986 [BPF_MISC | BPF_TXA] = true, 987 /* Return instructions */ 988 [BPF_RET | BPF_K] = true, 989 [BPF_RET | BPF_A] = true, 990 /* Jump instructions */ 991 [BPF_JMP | BPF_JA] = true, 992 [BPF_JMP | BPF_JEQ | BPF_K] = true, 993 [BPF_JMP | BPF_JEQ | BPF_X] = true, 994 [BPF_JMP | BPF_JGE | BPF_K] = true, 995 [BPF_JMP | BPF_JGE | BPF_X] = true, 996 [BPF_JMP | BPF_JGT | BPF_K] = true, 997 [BPF_JMP | BPF_JGT | BPF_X] = true, 998 [BPF_JMP | BPF_JSET | BPF_K] = true, 999 [BPF_JMP | BPF_JSET | BPF_X] = true, 1000 }; 1001 1002 if (code_to_probe >= ARRAY_SIZE(codes)) 1003 return false; 1004 1005 return codes[code_to_probe]; 1006 } 1007 1008 static bool bpf_check_basics_ok(const struct sock_filter *filter, 1009 unsigned int flen) 1010 { 1011 if (filter == NULL) 1012 return false; 1013 if (flen == 0 || flen > BPF_MAXINSNS) 1014 return false; 1015 1016 return true; 1017 } 1018 1019 /** 1020 * bpf_check_classic - verify socket filter code 1021 * @filter: filter to verify 1022 * @flen: length of filter 1023 * 1024 * Check the user's filter code. If we let some ugly 1025 * filter code slip through kaboom! The filter must contain 1026 * no references or jumps that are out of range, no illegal 1027 * instructions, and must end with a RET instruction. 1028 * 1029 * All jumps are forward as they are not signed. 1030 * 1031 * Returns 0 if the rule set is legal or -EINVAL if not. 1032 */ 1033 static int bpf_check_classic(const struct sock_filter *filter, 1034 unsigned int flen) 1035 { 1036 bool anc_found; 1037 int pc; 1038 1039 /* Check the filter code now */ 1040 for (pc = 0; pc < flen; pc++) { 1041 const struct sock_filter *ftest = &filter[pc]; 1042 1043 /* May we actually operate on this code? */ 1044 if (!chk_code_allowed(ftest->code)) 1045 return -EINVAL; 1046 1047 /* Some instructions need special checks */ 1048 switch (ftest->code) { 1049 case BPF_ALU | BPF_DIV | BPF_K: 1050 case BPF_ALU | BPF_MOD | BPF_K: 1051 /* Check for division by zero */ 1052 if (ftest->k == 0) 1053 return -EINVAL; 1054 break; 1055 case BPF_ALU | BPF_LSH | BPF_K: 1056 case BPF_ALU | BPF_RSH | BPF_K: 1057 if (ftest->k >= 32) 1058 return -EINVAL; 1059 break; 1060 case BPF_LD | BPF_MEM: 1061 case BPF_LDX | BPF_MEM: 1062 case BPF_ST: 1063 case BPF_STX: 1064 /* Check for invalid memory addresses */ 1065 if (ftest->k >= BPF_MEMWORDS) 1066 return -EINVAL; 1067 break; 1068 case BPF_JMP | BPF_JA: 1069 /* Note, the large ftest->k might cause loops. 1070 * Compare this with conditional jumps below, 1071 * where offsets are limited. --ANK (981016) 1072 */ 1073 if (ftest->k >= (unsigned int)(flen - pc - 1)) 1074 return -EINVAL; 1075 break; 1076 case BPF_JMP | BPF_JEQ | BPF_K: 1077 case BPF_JMP | BPF_JEQ | BPF_X: 1078 case BPF_JMP | BPF_JGE | BPF_K: 1079 case BPF_JMP | BPF_JGE | BPF_X: 1080 case BPF_JMP | BPF_JGT | BPF_K: 1081 case BPF_JMP | BPF_JGT | BPF_X: 1082 case BPF_JMP | BPF_JSET | BPF_K: 1083 case BPF_JMP | BPF_JSET | BPF_X: 1084 /* Both conditionals must be safe */ 1085 if (pc + ftest->jt + 1 >= flen || 1086 pc + ftest->jf + 1 >= flen) 1087 return -EINVAL; 1088 break; 1089 case BPF_LD | BPF_W | BPF_ABS: 1090 case BPF_LD | BPF_H | BPF_ABS: 1091 case BPF_LD | BPF_B | BPF_ABS: 1092 anc_found = false; 1093 if (bpf_anc_helper(ftest) & BPF_ANC) 1094 anc_found = true; 1095 /* Ancillary operation unknown or unsupported */ 1096 if (anc_found == false && ftest->k >= SKF_AD_OFF) 1097 return -EINVAL; 1098 } 1099 } 1100 1101 /* Last instruction must be a RET code */ 1102 switch (filter[flen - 1].code) { 1103 case BPF_RET | BPF_K: 1104 case BPF_RET | BPF_A: 1105 return check_load_and_stores(filter, flen); 1106 } 1107 1108 return -EINVAL; 1109 } 1110 1111 static int bpf_prog_store_orig_filter(struct bpf_prog *fp, 1112 const struct sock_fprog *fprog) 1113 { 1114 unsigned int fsize = bpf_classic_proglen(fprog); 1115 struct sock_fprog_kern *fkprog; 1116 1117 fp->orig_prog = kmalloc(sizeof(*fkprog), GFP_KERNEL); 1118 if (!fp->orig_prog) 1119 return -ENOMEM; 1120 1121 fkprog = fp->orig_prog; 1122 fkprog->len = fprog->len; 1123 1124 fkprog->filter = kmemdup(fp->insns, fsize, 1125 GFP_KERNEL | __GFP_NOWARN); 1126 if (!fkprog->filter) { 1127 kfree(fp->orig_prog); 1128 return -ENOMEM; 1129 } 1130 1131 return 0; 1132 } 1133 1134 static void bpf_release_orig_filter(struct bpf_prog *fp) 1135 { 1136 struct sock_fprog_kern *fprog = fp->orig_prog; 1137 1138 if (fprog) { 1139 kfree(fprog->filter); 1140 kfree(fprog); 1141 } 1142 } 1143 1144 static void __bpf_prog_release(struct bpf_prog *prog) 1145 { 1146 if (prog->type == BPF_PROG_TYPE_SOCKET_FILTER) { 1147 bpf_prog_put(prog); 1148 } else { 1149 bpf_release_orig_filter(prog); 1150 bpf_prog_free(prog); 1151 } 1152 } 1153 1154 static void __sk_filter_release(struct sk_filter *fp) 1155 { 1156 __bpf_prog_release(fp->prog); 1157 kfree(fp); 1158 } 1159 1160 /** 1161 * sk_filter_release_rcu - Release a socket filter by rcu_head 1162 * @rcu: rcu_head that contains the sk_filter to free 1163 */ 1164 static void sk_filter_release_rcu(struct rcu_head *rcu) 1165 { 1166 struct sk_filter *fp = container_of(rcu, struct sk_filter, rcu); 1167 1168 __sk_filter_release(fp); 1169 } 1170 1171 /** 1172 * sk_filter_release - release a socket filter 1173 * @fp: filter to remove 1174 * 1175 * Remove a filter from a socket and release its resources. 1176 */ 1177 static void sk_filter_release(struct sk_filter *fp) 1178 { 1179 if (refcount_dec_and_test(&fp->refcnt)) 1180 call_rcu(&fp->rcu, sk_filter_release_rcu); 1181 } 1182 1183 void sk_filter_uncharge(struct sock *sk, struct sk_filter *fp) 1184 { 1185 u32 filter_size = bpf_prog_size(fp->prog->len); 1186 1187 atomic_sub(filter_size, &sk->sk_omem_alloc); 1188 sk_filter_release(fp); 1189 } 1190 1191 /* try to charge the socket memory if there is space available 1192 * return true on success 1193 */ 1194 static bool __sk_filter_charge(struct sock *sk, struct sk_filter *fp) 1195 { 1196 u32 filter_size = bpf_prog_size(fp->prog->len); 1197 1198 /* same check as in sock_kmalloc() */ 1199 if (filter_size <= sysctl_optmem_max && 1200 atomic_read(&sk->sk_omem_alloc) + filter_size < sysctl_optmem_max) { 1201 atomic_add(filter_size, &sk->sk_omem_alloc); 1202 return true; 1203 } 1204 return false; 1205 } 1206 1207 bool sk_filter_charge(struct sock *sk, struct sk_filter *fp) 1208 { 1209 if (!refcount_inc_not_zero(&fp->refcnt)) 1210 return false; 1211 1212 if (!__sk_filter_charge(sk, fp)) { 1213 sk_filter_release(fp); 1214 return false; 1215 } 1216 return true; 1217 } 1218 1219 static struct bpf_prog *bpf_migrate_filter(struct bpf_prog *fp) 1220 { 1221 struct sock_filter *old_prog; 1222 struct bpf_prog *old_fp; 1223 int err, new_len, old_len = fp->len; 1224 bool seen_ld_abs = false; 1225 1226 /* We are free to overwrite insns et al right here as it 1227 * won't be used at this point in time anymore internally 1228 * after the migration to the internal BPF instruction 1229 * representation. 1230 */ 1231 BUILD_BUG_ON(sizeof(struct sock_filter) != 1232 sizeof(struct bpf_insn)); 1233 1234 /* Conversion cannot happen on overlapping memory areas, 1235 * so we need to keep the user BPF around until the 2nd 1236 * pass. At this time, the user BPF is stored in fp->insns. 1237 */ 1238 old_prog = kmemdup(fp->insns, old_len * sizeof(struct sock_filter), 1239 GFP_KERNEL | __GFP_NOWARN); 1240 if (!old_prog) { 1241 err = -ENOMEM; 1242 goto out_err; 1243 } 1244 1245 /* 1st pass: calculate the new program length. */ 1246 err = bpf_convert_filter(old_prog, old_len, NULL, &new_len, 1247 &seen_ld_abs); 1248 if (err) 1249 goto out_err_free; 1250 1251 /* Expand fp for appending the new filter representation. */ 1252 old_fp = fp; 1253 fp = bpf_prog_realloc(old_fp, bpf_prog_size(new_len), 0); 1254 if (!fp) { 1255 /* The old_fp is still around in case we couldn't 1256 * allocate new memory, so uncharge on that one. 1257 */ 1258 fp = old_fp; 1259 err = -ENOMEM; 1260 goto out_err_free; 1261 } 1262 1263 fp->len = new_len; 1264 1265 /* 2nd pass: remap sock_filter insns into bpf_insn insns. */ 1266 err = bpf_convert_filter(old_prog, old_len, fp, &new_len, 1267 &seen_ld_abs); 1268 if (err) 1269 /* 2nd bpf_convert_filter() can fail only if it fails 1270 * to allocate memory, remapping must succeed. Note, 1271 * that at this time old_fp has already been released 1272 * by krealloc(). 1273 */ 1274 goto out_err_free; 1275 1276 fp = bpf_prog_select_runtime(fp, &err); 1277 if (err) 1278 goto out_err_free; 1279 1280 kfree(old_prog); 1281 return fp; 1282 1283 out_err_free: 1284 kfree(old_prog); 1285 out_err: 1286 __bpf_prog_release(fp); 1287 return ERR_PTR(err); 1288 } 1289 1290 static struct bpf_prog *bpf_prepare_filter(struct bpf_prog *fp, 1291 bpf_aux_classic_check_t trans) 1292 { 1293 int err; 1294 1295 fp->bpf_func = NULL; 1296 fp->jited = 0; 1297 1298 err = bpf_check_classic(fp->insns, fp->len); 1299 if (err) { 1300 __bpf_prog_release(fp); 1301 return ERR_PTR(err); 1302 } 1303 1304 /* There might be additional checks and transformations 1305 * needed on classic filters, f.e. in case of seccomp. 1306 */ 1307 if (trans) { 1308 err = trans(fp->insns, fp->len); 1309 if (err) { 1310 __bpf_prog_release(fp); 1311 return ERR_PTR(err); 1312 } 1313 } 1314 1315 /* Probe if we can JIT compile the filter and if so, do 1316 * the compilation of the filter. 1317 */ 1318 bpf_jit_compile(fp); 1319 1320 /* JIT compiler couldn't process this filter, so do the 1321 * internal BPF translation for the optimized interpreter. 1322 */ 1323 if (!fp->jited) 1324 fp = bpf_migrate_filter(fp); 1325 1326 return fp; 1327 } 1328 1329 /** 1330 * bpf_prog_create - create an unattached filter 1331 * @pfp: the unattached filter that is created 1332 * @fprog: the filter program 1333 * 1334 * Create a filter independent of any socket. We first run some 1335 * sanity checks on it to make sure it does not explode on us later. 1336 * If an error occurs or there is insufficient memory for the filter 1337 * a negative errno code is returned. On success the return is zero. 1338 */ 1339 int bpf_prog_create(struct bpf_prog **pfp, struct sock_fprog_kern *fprog) 1340 { 1341 unsigned int fsize = bpf_classic_proglen(fprog); 1342 struct bpf_prog *fp; 1343 1344 /* Make sure new filter is there and in the right amounts. */ 1345 if (!bpf_check_basics_ok(fprog->filter, fprog->len)) 1346 return -EINVAL; 1347 1348 fp = bpf_prog_alloc(bpf_prog_size(fprog->len), 0); 1349 if (!fp) 1350 return -ENOMEM; 1351 1352 memcpy(fp->insns, fprog->filter, fsize); 1353 1354 fp->len = fprog->len; 1355 /* Since unattached filters are not copied back to user 1356 * space through sk_get_filter(), we do not need to hold 1357 * a copy here, and can spare us the work. 1358 */ 1359 fp->orig_prog = NULL; 1360 1361 /* bpf_prepare_filter() already takes care of freeing 1362 * memory in case something goes wrong. 1363 */ 1364 fp = bpf_prepare_filter(fp, NULL); 1365 if (IS_ERR(fp)) 1366 return PTR_ERR(fp); 1367 1368 *pfp = fp; 1369 return 0; 1370 } 1371 EXPORT_SYMBOL_GPL(bpf_prog_create); 1372 1373 /** 1374 * bpf_prog_create_from_user - create an unattached filter from user buffer 1375 * @pfp: the unattached filter that is created 1376 * @fprog: the filter program 1377 * @trans: post-classic verifier transformation handler 1378 * @save_orig: save classic BPF program 1379 * 1380 * This function effectively does the same as bpf_prog_create(), only 1381 * that it builds up its insns buffer from user space provided buffer. 1382 * It also allows for passing a bpf_aux_classic_check_t handler. 1383 */ 1384 int bpf_prog_create_from_user(struct bpf_prog **pfp, struct sock_fprog *fprog, 1385 bpf_aux_classic_check_t trans, bool save_orig) 1386 { 1387 unsigned int fsize = bpf_classic_proglen(fprog); 1388 struct bpf_prog *fp; 1389 int err; 1390 1391 /* Make sure new filter is there and in the right amounts. */ 1392 if (!bpf_check_basics_ok(fprog->filter, fprog->len)) 1393 return -EINVAL; 1394 1395 fp = bpf_prog_alloc(bpf_prog_size(fprog->len), 0); 1396 if (!fp) 1397 return -ENOMEM; 1398 1399 if (copy_from_user(fp->insns, fprog->filter, fsize)) { 1400 __bpf_prog_free(fp); 1401 return -EFAULT; 1402 } 1403 1404 fp->len = fprog->len; 1405 fp->orig_prog = NULL; 1406 1407 if (save_orig) { 1408 err = bpf_prog_store_orig_filter(fp, fprog); 1409 if (err) { 1410 __bpf_prog_free(fp); 1411 return -ENOMEM; 1412 } 1413 } 1414 1415 /* bpf_prepare_filter() already takes care of freeing 1416 * memory in case something goes wrong. 1417 */ 1418 fp = bpf_prepare_filter(fp, trans); 1419 if (IS_ERR(fp)) 1420 return PTR_ERR(fp); 1421 1422 *pfp = fp; 1423 return 0; 1424 } 1425 EXPORT_SYMBOL_GPL(bpf_prog_create_from_user); 1426 1427 void bpf_prog_destroy(struct bpf_prog *fp) 1428 { 1429 __bpf_prog_release(fp); 1430 } 1431 EXPORT_SYMBOL_GPL(bpf_prog_destroy); 1432 1433 static int __sk_attach_prog(struct bpf_prog *prog, struct sock *sk) 1434 { 1435 struct sk_filter *fp, *old_fp; 1436 1437 fp = kmalloc(sizeof(*fp), GFP_KERNEL); 1438 if (!fp) 1439 return -ENOMEM; 1440 1441 fp->prog = prog; 1442 1443 if (!__sk_filter_charge(sk, fp)) { 1444 kfree(fp); 1445 return -ENOMEM; 1446 } 1447 refcount_set(&fp->refcnt, 1); 1448 1449 old_fp = rcu_dereference_protected(sk->sk_filter, 1450 lockdep_sock_is_held(sk)); 1451 rcu_assign_pointer(sk->sk_filter, fp); 1452 1453 if (old_fp) 1454 sk_filter_uncharge(sk, old_fp); 1455 1456 return 0; 1457 } 1458 1459 static 1460 struct bpf_prog *__get_filter(struct sock_fprog *fprog, struct sock *sk) 1461 { 1462 unsigned int fsize = bpf_classic_proglen(fprog); 1463 struct bpf_prog *prog; 1464 int err; 1465 1466 if (sock_flag(sk, SOCK_FILTER_LOCKED)) 1467 return ERR_PTR(-EPERM); 1468 1469 /* Make sure new filter is there and in the right amounts. */ 1470 if (!bpf_check_basics_ok(fprog->filter, fprog->len)) 1471 return ERR_PTR(-EINVAL); 1472 1473 prog = bpf_prog_alloc(bpf_prog_size(fprog->len), 0); 1474 if (!prog) 1475 return ERR_PTR(-ENOMEM); 1476 1477 if (copy_from_user(prog->insns, fprog->filter, fsize)) { 1478 __bpf_prog_free(prog); 1479 return ERR_PTR(-EFAULT); 1480 } 1481 1482 prog->len = fprog->len; 1483 1484 err = bpf_prog_store_orig_filter(prog, fprog); 1485 if (err) { 1486 __bpf_prog_free(prog); 1487 return ERR_PTR(-ENOMEM); 1488 } 1489 1490 /* bpf_prepare_filter() already takes care of freeing 1491 * memory in case something goes wrong. 1492 */ 1493 return bpf_prepare_filter(prog, NULL); 1494 } 1495 1496 /** 1497 * sk_attach_filter - attach a socket filter 1498 * @fprog: the filter program 1499 * @sk: the socket to use 1500 * 1501 * Attach the user's filter code. We first run some sanity checks on 1502 * it to make sure it does not explode on us later. If an error 1503 * occurs or there is insufficient memory for the filter a negative 1504 * errno code is returned. On success the return is zero. 1505 */ 1506 int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk) 1507 { 1508 struct bpf_prog *prog = __get_filter(fprog, sk); 1509 int err; 1510 1511 if (IS_ERR(prog)) 1512 return PTR_ERR(prog); 1513 1514 err = __sk_attach_prog(prog, sk); 1515 if (err < 0) { 1516 __bpf_prog_release(prog); 1517 return err; 1518 } 1519 1520 return 0; 1521 } 1522 EXPORT_SYMBOL_GPL(sk_attach_filter); 1523 1524 int sk_reuseport_attach_filter(struct sock_fprog *fprog, struct sock *sk) 1525 { 1526 struct bpf_prog *prog = __get_filter(fprog, sk); 1527 int err; 1528 1529 if (IS_ERR(prog)) 1530 return PTR_ERR(prog); 1531 1532 if (bpf_prog_size(prog->len) > sysctl_optmem_max) 1533 err = -ENOMEM; 1534 else 1535 err = reuseport_attach_prog(sk, prog); 1536 1537 if (err) 1538 __bpf_prog_release(prog); 1539 1540 return err; 1541 } 1542 1543 static struct bpf_prog *__get_bpf(u32 ufd, struct sock *sk) 1544 { 1545 if (sock_flag(sk, SOCK_FILTER_LOCKED)) 1546 return ERR_PTR(-EPERM); 1547 1548 return bpf_prog_get_type(ufd, BPF_PROG_TYPE_SOCKET_FILTER); 1549 } 1550 1551 int sk_attach_bpf(u32 ufd, struct sock *sk) 1552 { 1553 struct bpf_prog *prog = __get_bpf(ufd, sk); 1554 int err; 1555 1556 if (IS_ERR(prog)) 1557 return PTR_ERR(prog); 1558 1559 err = __sk_attach_prog(prog, sk); 1560 if (err < 0) { 1561 bpf_prog_put(prog); 1562 return err; 1563 } 1564 1565 return 0; 1566 } 1567 1568 int sk_reuseport_attach_bpf(u32 ufd, struct sock *sk) 1569 { 1570 struct bpf_prog *prog; 1571 int err; 1572 1573 if (sock_flag(sk, SOCK_FILTER_LOCKED)) 1574 return -EPERM; 1575 1576 prog = bpf_prog_get_type(ufd, BPF_PROG_TYPE_SOCKET_FILTER); 1577 if (IS_ERR(prog) && PTR_ERR(prog) == -EINVAL) 1578 prog = bpf_prog_get_type(ufd, BPF_PROG_TYPE_SK_REUSEPORT); 1579 if (IS_ERR(prog)) 1580 return PTR_ERR(prog); 1581 1582 if (prog->type == BPF_PROG_TYPE_SK_REUSEPORT) { 1583 /* Like other non BPF_PROG_TYPE_SOCKET_FILTER 1584 * bpf prog (e.g. sockmap). It depends on the 1585 * limitation imposed by bpf_prog_load(). 1586 * Hence, sysctl_optmem_max is not checked. 1587 */ 1588 if ((sk->sk_type != SOCK_STREAM && 1589 sk->sk_type != SOCK_DGRAM) || 1590 (sk->sk_protocol != IPPROTO_UDP && 1591 sk->sk_protocol != IPPROTO_TCP) || 1592 (sk->sk_family != AF_INET && 1593 sk->sk_family != AF_INET6)) { 1594 err = -ENOTSUPP; 1595 goto err_prog_put; 1596 } 1597 } else { 1598 /* BPF_PROG_TYPE_SOCKET_FILTER */ 1599 if (bpf_prog_size(prog->len) > sysctl_optmem_max) { 1600 err = -ENOMEM; 1601 goto err_prog_put; 1602 } 1603 } 1604 1605 err = reuseport_attach_prog(sk, prog); 1606 err_prog_put: 1607 if (err) 1608 bpf_prog_put(prog); 1609 1610 return err; 1611 } 1612 1613 void sk_reuseport_prog_free(struct bpf_prog *prog) 1614 { 1615 if (!prog) 1616 return; 1617 1618 if (prog->type == BPF_PROG_TYPE_SK_REUSEPORT) 1619 bpf_prog_put(prog); 1620 else 1621 bpf_prog_destroy(prog); 1622 } 1623 1624 struct bpf_scratchpad { 1625 union { 1626 __be32 diff[MAX_BPF_STACK / sizeof(__be32)]; 1627 u8 buff[MAX_BPF_STACK]; 1628 }; 1629 }; 1630 1631 static DEFINE_PER_CPU(struct bpf_scratchpad, bpf_sp); 1632 1633 static inline int __bpf_try_make_writable(struct sk_buff *skb, 1634 unsigned int write_len) 1635 { 1636 return skb_ensure_writable(skb, write_len); 1637 } 1638 1639 static inline int bpf_try_make_writable(struct sk_buff *skb, 1640 unsigned int write_len) 1641 { 1642 int err = __bpf_try_make_writable(skb, write_len); 1643 1644 bpf_compute_data_pointers(skb); 1645 return err; 1646 } 1647 1648 static int bpf_try_make_head_writable(struct sk_buff *skb) 1649 { 1650 return bpf_try_make_writable(skb, skb_headlen(skb)); 1651 } 1652 1653 static inline void bpf_push_mac_rcsum(struct sk_buff *skb) 1654 { 1655 if (skb_at_tc_ingress(skb)) 1656 skb_postpush_rcsum(skb, skb_mac_header(skb), skb->mac_len); 1657 } 1658 1659 static inline void bpf_pull_mac_rcsum(struct sk_buff *skb) 1660 { 1661 if (skb_at_tc_ingress(skb)) 1662 skb_postpull_rcsum(skb, skb_mac_header(skb), skb->mac_len); 1663 } 1664 1665 BPF_CALL_5(bpf_skb_store_bytes, struct sk_buff *, skb, u32, offset, 1666 const void *, from, u32, len, u64, flags) 1667 { 1668 void *ptr; 1669 1670 if (unlikely(flags & ~(BPF_F_RECOMPUTE_CSUM | BPF_F_INVALIDATE_HASH))) 1671 return -EINVAL; 1672 if (unlikely(offset > 0xffff)) 1673 return -EFAULT; 1674 if (unlikely(bpf_try_make_writable(skb, offset + len))) 1675 return -EFAULT; 1676 1677 ptr = skb->data + offset; 1678 if (flags & BPF_F_RECOMPUTE_CSUM) 1679 __skb_postpull_rcsum(skb, ptr, len, offset); 1680 1681 memcpy(ptr, from, len); 1682 1683 if (flags & BPF_F_RECOMPUTE_CSUM) 1684 __skb_postpush_rcsum(skb, ptr, len, offset); 1685 if (flags & BPF_F_INVALIDATE_HASH) 1686 skb_clear_hash(skb); 1687 1688 return 0; 1689 } 1690 1691 static const struct bpf_func_proto bpf_skb_store_bytes_proto = { 1692 .func = bpf_skb_store_bytes, 1693 .gpl_only = false, 1694 .ret_type = RET_INTEGER, 1695 .arg1_type = ARG_PTR_TO_CTX, 1696 .arg2_type = ARG_ANYTHING, 1697 .arg3_type = ARG_PTR_TO_MEM, 1698 .arg4_type = ARG_CONST_SIZE, 1699 .arg5_type = ARG_ANYTHING, 1700 }; 1701 1702 BPF_CALL_4(bpf_skb_load_bytes, const struct sk_buff *, skb, u32, offset, 1703 void *, to, u32, len) 1704 { 1705 void *ptr; 1706 1707 if (unlikely(offset > 0xffff)) 1708 goto err_clear; 1709 1710 ptr = skb_header_pointer(skb, offset, len, to); 1711 if (unlikely(!ptr)) 1712 goto err_clear; 1713 if (ptr != to) 1714 memcpy(to, ptr, len); 1715 1716 return 0; 1717 err_clear: 1718 memset(to, 0, len); 1719 return -EFAULT; 1720 } 1721 1722 static const struct bpf_func_proto bpf_skb_load_bytes_proto = { 1723 .func = bpf_skb_load_bytes, 1724 .gpl_only = false, 1725 .ret_type = RET_INTEGER, 1726 .arg1_type = ARG_PTR_TO_CTX, 1727 .arg2_type = ARG_ANYTHING, 1728 .arg3_type = ARG_PTR_TO_UNINIT_MEM, 1729 .arg4_type = ARG_CONST_SIZE, 1730 }; 1731 1732 BPF_CALL_5(bpf_skb_load_bytes_relative, const struct sk_buff *, skb, 1733 u32, offset, void *, to, u32, len, u32, start_header) 1734 { 1735 u8 *end = skb_tail_pointer(skb); 1736 u8 *net = skb_network_header(skb); 1737 u8 *mac = skb_mac_header(skb); 1738 u8 *ptr; 1739 1740 if (unlikely(offset > 0xffff || len > (end - mac))) 1741 goto err_clear; 1742 1743 switch (start_header) { 1744 case BPF_HDR_START_MAC: 1745 ptr = mac + offset; 1746 break; 1747 case BPF_HDR_START_NET: 1748 ptr = net + offset; 1749 break; 1750 default: 1751 goto err_clear; 1752 } 1753 1754 if (likely(ptr >= mac && ptr + len <= end)) { 1755 memcpy(to, ptr, len); 1756 return 0; 1757 } 1758 1759 err_clear: 1760 memset(to, 0, len); 1761 return -EFAULT; 1762 } 1763 1764 static const struct bpf_func_proto bpf_skb_load_bytes_relative_proto = { 1765 .func = bpf_skb_load_bytes_relative, 1766 .gpl_only = false, 1767 .ret_type = RET_INTEGER, 1768 .arg1_type = ARG_PTR_TO_CTX, 1769 .arg2_type = ARG_ANYTHING, 1770 .arg3_type = ARG_PTR_TO_UNINIT_MEM, 1771 .arg4_type = ARG_CONST_SIZE, 1772 .arg5_type = ARG_ANYTHING, 1773 }; 1774 1775 BPF_CALL_2(bpf_skb_pull_data, struct sk_buff *, skb, u32, len) 1776 { 1777 /* Idea is the following: should the needed direct read/write 1778 * test fail during runtime, we can pull in more data and redo 1779 * again, since implicitly, we invalidate previous checks here. 1780 * 1781 * Or, since we know how much we need to make read/writeable, 1782 * this can be done once at the program beginning for direct 1783 * access case. By this we overcome limitations of only current 1784 * headroom being accessible. 1785 */ 1786 return bpf_try_make_writable(skb, len ? : skb_headlen(skb)); 1787 } 1788 1789 static const struct bpf_func_proto bpf_skb_pull_data_proto = { 1790 .func = bpf_skb_pull_data, 1791 .gpl_only = false, 1792 .ret_type = RET_INTEGER, 1793 .arg1_type = ARG_PTR_TO_CTX, 1794 .arg2_type = ARG_ANYTHING, 1795 }; 1796 1797 BPF_CALL_1(bpf_sk_fullsock, struct sock *, sk) 1798 { 1799 return sk_fullsock(sk) ? (unsigned long)sk : (unsigned long)NULL; 1800 } 1801 1802 static const struct bpf_func_proto bpf_sk_fullsock_proto = { 1803 .func = bpf_sk_fullsock, 1804 .gpl_only = false, 1805 .ret_type = RET_PTR_TO_SOCKET_OR_NULL, 1806 .arg1_type = ARG_PTR_TO_SOCK_COMMON, 1807 }; 1808 1809 static inline int sk_skb_try_make_writable(struct sk_buff *skb, 1810 unsigned int write_len) 1811 { 1812 int err = __bpf_try_make_writable(skb, write_len); 1813 1814 bpf_compute_data_end_sk_skb(skb); 1815 return err; 1816 } 1817 1818 BPF_CALL_2(sk_skb_pull_data, struct sk_buff *, skb, u32, len) 1819 { 1820 /* Idea is the following: should the needed direct read/write 1821 * test fail during runtime, we can pull in more data and redo 1822 * again, since implicitly, we invalidate previous checks here. 1823 * 1824 * Or, since we know how much we need to make read/writeable, 1825 * this can be done once at the program beginning for direct 1826 * access case. By this we overcome limitations of only current 1827 * headroom being accessible. 1828 */ 1829 return sk_skb_try_make_writable(skb, len ? : skb_headlen(skb)); 1830 } 1831 1832 static const struct bpf_func_proto sk_skb_pull_data_proto = { 1833 .func = sk_skb_pull_data, 1834 .gpl_only = false, 1835 .ret_type = RET_INTEGER, 1836 .arg1_type = ARG_PTR_TO_CTX, 1837 .arg2_type = ARG_ANYTHING, 1838 }; 1839 1840 BPF_CALL_5(bpf_l3_csum_replace, struct sk_buff *, skb, u32, offset, 1841 u64, from, u64, to, u64, flags) 1842 { 1843 __sum16 *ptr; 1844 1845 if (unlikely(flags & ~(BPF_F_HDR_FIELD_MASK))) 1846 return -EINVAL; 1847 if (unlikely(offset > 0xffff || offset & 1)) 1848 return -EFAULT; 1849 if (unlikely(bpf_try_make_writable(skb, offset + sizeof(*ptr)))) 1850 return -EFAULT; 1851 1852 ptr = (__sum16 *)(skb->data + offset); 1853 switch (flags & BPF_F_HDR_FIELD_MASK) { 1854 case 0: 1855 if (unlikely(from != 0)) 1856 return -EINVAL; 1857 1858 csum_replace_by_diff(ptr, to); 1859 break; 1860 case 2: 1861 csum_replace2(ptr, from, to); 1862 break; 1863 case 4: 1864 csum_replace4(ptr, from, to); 1865 break; 1866 default: 1867 return -EINVAL; 1868 } 1869 1870 return 0; 1871 } 1872 1873 static const struct bpf_func_proto bpf_l3_csum_replace_proto = { 1874 .func = bpf_l3_csum_replace, 1875 .gpl_only = false, 1876 .ret_type = RET_INTEGER, 1877 .arg1_type = ARG_PTR_TO_CTX, 1878 .arg2_type = ARG_ANYTHING, 1879 .arg3_type = ARG_ANYTHING, 1880 .arg4_type = ARG_ANYTHING, 1881 .arg5_type = ARG_ANYTHING, 1882 }; 1883 1884 BPF_CALL_5(bpf_l4_csum_replace, struct sk_buff *, skb, u32, offset, 1885 u64, from, u64, to, u64, flags) 1886 { 1887 bool is_pseudo = flags & BPF_F_PSEUDO_HDR; 1888 bool is_mmzero = flags & BPF_F_MARK_MANGLED_0; 1889 bool do_mforce = flags & BPF_F_MARK_ENFORCE; 1890 __sum16 *ptr; 1891 1892 if (unlikely(flags & ~(BPF_F_MARK_MANGLED_0 | BPF_F_MARK_ENFORCE | 1893 BPF_F_PSEUDO_HDR | BPF_F_HDR_FIELD_MASK))) 1894 return -EINVAL; 1895 if (unlikely(offset > 0xffff || offset & 1)) 1896 return -EFAULT; 1897 if (unlikely(bpf_try_make_writable(skb, offset + sizeof(*ptr)))) 1898 return -EFAULT; 1899 1900 ptr = (__sum16 *)(skb->data + offset); 1901 if (is_mmzero && !do_mforce && !*ptr) 1902 return 0; 1903 1904 switch (flags & BPF_F_HDR_FIELD_MASK) { 1905 case 0: 1906 if (unlikely(from != 0)) 1907 return -EINVAL; 1908 1909 inet_proto_csum_replace_by_diff(ptr, skb, to, is_pseudo); 1910 break; 1911 case 2: 1912 inet_proto_csum_replace2(ptr, skb, from, to, is_pseudo); 1913 break; 1914 case 4: 1915 inet_proto_csum_replace4(ptr, skb, from, to, is_pseudo); 1916 break; 1917 default: 1918 return -EINVAL; 1919 } 1920 1921 if (is_mmzero && !*ptr) 1922 *ptr = CSUM_MANGLED_0; 1923 return 0; 1924 } 1925 1926 static const struct bpf_func_proto bpf_l4_csum_replace_proto = { 1927 .func = bpf_l4_csum_replace, 1928 .gpl_only = false, 1929 .ret_type = RET_INTEGER, 1930 .arg1_type = ARG_PTR_TO_CTX, 1931 .arg2_type = ARG_ANYTHING, 1932 .arg3_type = ARG_ANYTHING, 1933 .arg4_type = ARG_ANYTHING, 1934 .arg5_type = ARG_ANYTHING, 1935 }; 1936 1937 BPF_CALL_5(bpf_csum_diff, __be32 *, from, u32, from_size, 1938 __be32 *, to, u32, to_size, __wsum, seed) 1939 { 1940 struct bpf_scratchpad *sp = this_cpu_ptr(&bpf_sp); 1941 u32 diff_size = from_size + to_size; 1942 int i, j = 0; 1943 1944 /* This is quite flexible, some examples: 1945 * 1946 * from_size == 0, to_size > 0, seed := csum --> pushing data 1947 * from_size > 0, to_size == 0, seed := csum --> pulling data 1948 * from_size > 0, to_size > 0, seed := 0 --> diffing data 1949 * 1950 * Even for diffing, from_size and to_size don't need to be equal. 1951 */ 1952 if (unlikely(((from_size | to_size) & (sizeof(__be32) - 1)) || 1953 diff_size > sizeof(sp->diff))) 1954 return -EINVAL; 1955 1956 for (i = 0; i < from_size / sizeof(__be32); i++, j++) 1957 sp->diff[j] = ~from[i]; 1958 for (i = 0; i < to_size / sizeof(__be32); i++, j++) 1959 sp->diff[j] = to[i]; 1960 1961 return csum_partial(sp->diff, diff_size, seed); 1962 } 1963 1964 static const struct bpf_func_proto bpf_csum_diff_proto = { 1965 .func = bpf_csum_diff, 1966 .gpl_only = false, 1967 .pkt_access = true, 1968 .ret_type = RET_INTEGER, 1969 .arg1_type = ARG_PTR_TO_MEM_OR_NULL, 1970 .arg2_type = ARG_CONST_SIZE_OR_ZERO, 1971 .arg3_type = ARG_PTR_TO_MEM_OR_NULL, 1972 .arg4_type = ARG_CONST_SIZE_OR_ZERO, 1973 .arg5_type = ARG_ANYTHING, 1974 }; 1975 1976 BPF_CALL_2(bpf_csum_update, struct sk_buff *, skb, __wsum, csum) 1977 { 1978 /* The interface is to be used in combination with bpf_csum_diff() 1979 * for direct packet writes. csum rotation for alignment as well 1980 * as emulating csum_sub() can be done from the eBPF program. 1981 */ 1982 if (skb->ip_summed == CHECKSUM_COMPLETE) 1983 return (skb->csum = csum_add(skb->csum, csum)); 1984 1985 return -ENOTSUPP; 1986 } 1987 1988 static const struct bpf_func_proto bpf_csum_update_proto = { 1989 .func = bpf_csum_update, 1990 .gpl_only = false, 1991 .ret_type = RET_INTEGER, 1992 .arg1_type = ARG_PTR_TO_CTX, 1993 .arg2_type = ARG_ANYTHING, 1994 }; 1995 1996 static inline int __bpf_rx_skb(struct net_device *dev, struct sk_buff *skb) 1997 { 1998 return dev_forward_skb(dev, skb); 1999 } 2000 2001 static inline int __bpf_rx_skb_no_mac(struct net_device *dev, 2002 struct sk_buff *skb) 2003 { 2004 int ret = ____dev_forward_skb(dev, skb); 2005 2006 if (likely(!ret)) { 2007 skb->dev = dev; 2008 ret = netif_rx(skb); 2009 } 2010 2011 return ret; 2012 } 2013 2014 static inline int __bpf_tx_skb(struct net_device *dev, struct sk_buff *skb) 2015 { 2016 int ret; 2017 2018 if (unlikely(__this_cpu_read(xmit_recursion) > XMIT_RECURSION_LIMIT)) { 2019 net_crit_ratelimited("bpf: recursion limit reached on datapath, buggy bpf program?\n"); 2020 kfree_skb(skb); 2021 return -ENETDOWN; 2022 } 2023 2024 skb->dev = dev; 2025 2026 __this_cpu_inc(xmit_recursion); 2027 ret = dev_queue_xmit(skb); 2028 __this_cpu_dec(xmit_recursion); 2029 2030 return ret; 2031 } 2032 2033 static int __bpf_redirect_no_mac(struct sk_buff *skb, struct net_device *dev, 2034 u32 flags) 2035 { 2036 unsigned int mlen = skb_network_offset(skb); 2037 2038 if (mlen) { 2039 __skb_pull(skb, mlen); 2040 2041 /* At ingress, the mac header has already been pulled once. 2042 * At egress, skb_pospull_rcsum has to be done in case that 2043 * the skb is originated from ingress (i.e. a forwarded skb) 2044 * to ensure that rcsum starts at net header. 2045 */ 2046 if (!skb_at_tc_ingress(skb)) 2047 skb_postpull_rcsum(skb, skb_mac_header(skb), mlen); 2048 } 2049 skb_pop_mac_header(skb); 2050 skb_reset_mac_len(skb); 2051 return flags & BPF_F_INGRESS ? 2052 __bpf_rx_skb_no_mac(dev, skb) : __bpf_tx_skb(dev, skb); 2053 } 2054 2055 static int __bpf_redirect_common(struct sk_buff *skb, struct net_device *dev, 2056 u32 flags) 2057 { 2058 /* Verify that a link layer header is carried */ 2059 if (unlikely(skb->mac_header >= skb->network_header)) { 2060 kfree_skb(skb); 2061 return -ERANGE; 2062 } 2063 2064 bpf_push_mac_rcsum(skb); 2065 return flags & BPF_F_INGRESS ? 2066 __bpf_rx_skb(dev, skb) : __bpf_tx_skb(dev, skb); 2067 } 2068 2069 static int __bpf_redirect(struct sk_buff *skb, struct net_device *dev, 2070 u32 flags) 2071 { 2072 if (dev_is_mac_header_xmit(dev)) 2073 return __bpf_redirect_common(skb, dev, flags); 2074 else 2075 return __bpf_redirect_no_mac(skb, dev, flags); 2076 } 2077 2078 BPF_CALL_3(bpf_clone_redirect, struct sk_buff *, skb, u32, ifindex, u64, flags) 2079 { 2080 struct net_device *dev; 2081 struct sk_buff *clone; 2082 int ret; 2083 2084 if (unlikely(flags & ~(BPF_F_INGRESS))) 2085 return -EINVAL; 2086 2087 dev = dev_get_by_index_rcu(dev_net(skb->dev), ifindex); 2088 if (unlikely(!dev)) 2089 return -EINVAL; 2090 2091 clone = skb_clone(skb, GFP_ATOMIC); 2092 if (unlikely(!clone)) 2093 return -ENOMEM; 2094 2095 /* For direct write, we need to keep the invariant that the skbs 2096 * we're dealing with need to be uncloned. Should uncloning fail 2097 * here, we need to free the just generated clone to unclone once 2098 * again. 2099 */ 2100 ret = bpf_try_make_head_writable(skb); 2101 if (unlikely(ret)) { 2102 kfree_skb(clone); 2103 return -ENOMEM; 2104 } 2105 2106 return __bpf_redirect(clone, dev, flags); 2107 } 2108 2109 static const struct bpf_func_proto bpf_clone_redirect_proto = { 2110 .func = bpf_clone_redirect, 2111 .gpl_only = false, 2112 .ret_type = RET_INTEGER, 2113 .arg1_type = ARG_PTR_TO_CTX, 2114 .arg2_type = ARG_ANYTHING, 2115 .arg3_type = ARG_ANYTHING, 2116 }; 2117 2118 DEFINE_PER_CPU(struct bpf_redirect_info, bpf_redirect_info); 2119 EXPORT_PER_CPU_SYMBOL_GPL(bpf_redirect_info); 2120 2121 BPF_CALL_2(bpf_redirect, u32, ifindex, u64, flags) 2122 { 2123 struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info); 2124 2125 if (unlikely(flags & ~(BPF_F_INGRESS))) 2126 return TC_ACT_SHOT; 2127 2128 ri->ifindex = ifindex; 2129 ri->flags = flags; 2130 2131 return TC_ACT_REDIRECT; 2132 } 2133 2134 int skb_do_redirect(struct sk_buff *skb) 2135 { 2136 struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info); 2137 struct net_device *dev; 2138 2139 dev = dev_get_by_index_rcu(dev_net(skb->dev), ri->ifindex); 2140 ri->ifindex = 0; 2141 if (unlikely(!dev)) { 2142 kfree_skb(skb); 2143 return -EINVAL; 2144 } 2145 2146 return __bpf_redirect(skb, dev, ri->flags); 2147 } 2148 2149 static const struct bpf_func_proto bpf_redirect_proto = { 2150 .func = bpf_redirect, 2151 .gpl_only = false, 2152 .ret_type = RET_INTEGER, 2153 .arg1_type = ARG_ANYTHING, 2154 .arg2_type = ARG_ANYTHING, 2155 }; 2156 2157 BPF_CALL_2(bpf_msg_apply_bytes, struct sk_msg *, msg, u32, bytes) 2158 { 2159 msg->apply_bytes = bytes; 2160 return 0; 2161 } 2162 2163 static const struct bpf_func_proto bpf_msg_apply_bytes_proto = { 2164 .func = bpf_msg_apply_bytes, 2165 .gpl_only = false, 2166 .ret_type = RET_INTEGER, 2167 .arg1_type = ARG_PTR_TO_CTX, 2168 .arg2_type = ARG_ANYTHING, 2169 }; 2170 2171 BPF_CALL_2(bpf_msg_cork_bytes, struct sk_msg *, msg, u32, bytes) 2172 { 2173 msg->cork_bytes = bytes; 2174 return 0; 2175 } 2176 2177 static const struct bpf_func_proto bpf_msg_cork_bytes_proto = { 2178 .func = bpf_msg_cork_bytes, 2179 .gpl_only = false, 2180 .ret_type = RET_INTEGER, 2181 .arg1_type = ARG_PTR_TO_CTX, 2182 .arg2_type = ARG_ANYTHING, 2183 }; 2184 2185 BPF_CALL_4(bpf_msg_pull_data, struct sk_msg *, msg, u32, start, 2186 u32, end, u64, flags) 2187 { 2188 u32 len = 0, offset = 0, copy = 0, poffset = 0, bytes = end - start; 2189 u32 first_sge, last_sge, i, shift, bytes_sg_total; 2190 struct scatterlist *sge; 2191 u8 *raw, *to, *from; 2192 struct page *page; 2193 2194 if (unlikely(flags || end <= start)) 2195 return -EINVAL; 2196 2197 /* First find the starting scatterlist element */ 2198 i = msg->sg.start; 2199 do { 2200 len = sk_msg_elem(msg, i)->length; 2201 if (start < offset + len) 2202 break; 2203 offset += len; 2204 sk_msg_iter_var_next(i); 2205 } while (i != msg->sg.end); 2206 2207 if (unlikely(start >= offset + len)) 2208 return -EINVAL; 2209 2210 first_sge = i; 2211 /* The start may point into the sg element so we need to also 2212 * account for the headroom. 2213 */ 2214 bytes_sg_total = start - offset + bytes; 2215 if (!msg->sg.copy[i] && bytes_sg_total <= len) 2216 goto out; 2217 2218 /* At this point we need to linearize multiple scatterlist 2219 * elements or a single shared page. Either way we need to 2220 * copy into a linear buffer exclusively owned by BPF. Then 2221 * place the buffer in the scatterlist and fixup the original 2222 * entries by removing the entries now in the linear buffer 2223 * and shifting the remaining entries. For now we do not try 2224 * to copy partial entries to avoid complexity of running out 2225 * of sg_entry slots. The downside is reading a single byte 2226 * will copy the entire sg entry. 2227 */ 2228 do { 2229 copy += sk_msg_elem(msg, i)->length; 2230 sk_msg_iter_var_next(i); 2231 if (bytes_sg_total <= copy) 2232 break; 2233 } while (i != msg->sg.end); 2234 last_sge = i; 2235 2236 if (unlikely(bytes_sg_total > copy)) 2237 return -EINVAL; 2238 2239 page = alloc_pages(__GFP_NOWARN | GFP_ATOMIC | __GFP_COMP, 2240 get_order(copy)); 2241 if (unlikely(!page)) 2242 return -ENOMEM; 2243 2244 raw = page_address(page); 2245 i = first_sge; 2246 do { 2247 sge = sk_msg_elem(msg, i); 2248 from = sg_virt(sge); 2249 len = sge->length; 2250 to = raw + poffset; 2251 2252 memcpy(to, from, len); 2253 poffset += len; 2254 sge->length = 0; 2255 put_page(sg_page(sge)); 2256 2257 sk_msg_iter_var_next(i); 2258 } while (i != last_sge); 2259 2260 sg_set_page(&msg->sg.data[first_sge], page, copy, 0); 2261 2262 /* To repair sg ring we need to shift entries. If we only 2263 * had a single entry though we can just replace it and 2264 * be done. Otherwise walk the ring and shift the entries. 2265 */ 2266 WARN_ON_ONCE(last_sge == first_sge); 2267 shift = last_sge > first_sge ? 2268 last_sge - first_sge - 1 : 2269 MAX_SKB_FRAGS - first_sge + last_sge - 1; 2270 if (!shift) 2271 goto out; 2272 2273 i = first_sge; 2274 sk_msg_iter_var_next(i); 2275 do { 2276 u32 move_from; 2277 2278 if (i + shift >= MAX_MSG_FRAGS) 2279 move_from = i + shift - MAX_MSG_FRAGS; 2280 else 2281 move_from = i + shift; 2282 if (move_from == msg->sg.end) 2283 break; 2284 2285 msg->sg.data[i] = msg->sg.data[move_from]; 2286 msg->sg.data[move_from].length = 0; 2287 msg->sg.data[move_from].page_link = 0; 2288 msg->sg.data[move_from].offset = 0; 2289 sk_msg_iter_var_next(i); 2290 } while (1); 2291 2292 msg->sg.end = msg->sg.end - shift > msg->sg.end ? 2293 msg->sg.end - shift + MAX_MSG_FRAGS : 2294 msg->sg.end - shift; 2295 out: 2296 msg->data = sg_virt(&msg->sg.data[first_sge]) + start - offset; 2297 msg->data_end = msg->data + bytes; 2298 return 0; 2299 } 2300 2301 static const struct bpf_func_proto bpf_msg_pull_data_proto = { 2302 .func = bpf_msg_pull_data, 2303 .gpl_only = false, 2304 .ret_type = RET_INTEGER, 2305 .arg1_type = ARG_PTR_TO_CTX, 2306 .arg2_type = ARG_ANYTHING, 2307 .arg3_type = ARG_ANYTHING, 2308 .arg4_type = ARG_ANYTHING, 2309 }; 2310 2311 BPF_CALL_4(bpf_msg_push_data, struct sk_msg *, msg, u32, start, 2312 u32, len, u64, flags) 2313 { 2314 struct scatterlist sge, nsge, nnsge, rsge = {0}, *psge; 2315 u32 new, i = 0, l, space, copy = 0, offset = 0; 2316 u8 *raw, *to, *from; 2317 struct page *page; 2318 2319 if (unlikely(flags)) 2320 return -EINVAL; 2321 2322 /* First find the starting scatterlist element */ 2323 i = msg->sg.start; 2324 do { 2325 l = sk_msg_elem(msg, i)->length; 2326 2327 if (start < offset + l) 2328 break; 2329 offset += l; 2330 sk_msg_iter_var_next(i); 2331 } while (i != msg->sg.end); 2332 2333 if (start >= offset + l) 2334 return -EINVAL; 2335 2336 space = MAX_MSG_FRAGS - sk_msg_elem_used(msg); 2337 2338 /* If no space available will fallback to copy, we need at 2339 * least one scatterlist elem available to push data into 2340 * when start aligns to the beginning of an element or two 2341 * when it falls inside an element. We handle the start equals 2342 * offset case because its the common case for inserting a 2343 * header. 2344 */ 2345 if (!space || (space == 1 && start != offset)) 2346 copy = msg->sg.data[i].length; 2347 2348 page = alloc_pages(__GFP_NOWARN | GFP_ATOMIC | __GFP_COMP, 2349 get_order(copy + len)); 2350 if (unlikely(!page)) 2351 return -ENOMEM; 2352 2353 if (copy) { 2354 int front, back; 2355 2356 raw = page_address(page); 2357 2358 psge = sk_msg_elem(msg, i); 2359 front = start - offset; 2360 back = psge->length - front; 2361 from = sg_virt(psge); 2362 2363 if (front) 2364 memcpy(raw, from, front); 2365 2366 if (back) { 2367 from += front; 2368 to = raw + front + len; 2369 2370 memcpy(to, from, back); 2371 } 2372 2373 put_page(sg_page(psge)); 2374 } else if (start - offset) { 2375 psge = sk_msg_elem(msg, i); 2376 rsge = sk_msg_elem_cpy(msg, i); 2377 2378 psge->length = start - offset; 2379 rsge.length -= psge->length; 2380 rsge.offset += start; 2381 2382 sk_msg_iter_var_next(i); 2383 sg_unmark_end(psge); 2384 sk_msg_iter_next(msg, end); 2385 } 2386 2387 /* Slot(s) to place newly allocated data */ 2388 new = i; 2389 2390 /* Shift one or two slots as needed */ 2391 if (!copy) { 2392 sge = sk_msg_elem_cpy(msg, i); 2393 2394 sk_msg_iter_var_next(i); 2395 sg_unmark_end(&sge); 2396 sk_msg_iter_next(msg, end); 2397 2398 nsge = sk_msg_elem_cpy(msg, i); 2399 if (rsge.length) { 2400 sk_msg_iter_var_next(i); 2401 nnsge = sk_msg_elem_cpy(msg, i); 2402 } 2403 2404 while (i != msg->sg.end) { 2405 msg->sg.data[i] = sge; 2406 sge = nsge; 2407 sk_msg_iter_var_next(i); 2408 if (rsge.length) { 2409 nsge = nnsge; 2410 nnsge = sk_msg_elem_cpy(msg, i); 2411 } else { 2412 nsge = sk_msg_elem_cpy(msg, i); 2413 } 2414 } 2415 } 2416 2417 /* Place newly allocated data buffer */ 2418 sk_mem_charge(msg->sk, len); 2419 msg->sg.size += len; 2420 msg->sg.copy[new] = false; 2421 sg_set_page(&msg->sg.data[new], page, len + copy, 0); 2422 if (rsge.length) { 2423 get_page(sg_page(&rsge)); 2424 sk_msg_iter_var_next(new); 2425 msg->sg.data[new] = rsge; 2426 } 2427 2428 sk_msg_compute_data_pointers(msg); 2429 return 0; 2430 } 2431 2432 static const struct bpf_func_proto bpf_msg_push_data_proto = { 2433 .func = bpf_msg_push_data, 2434 .gpl_only = false, 2435 .ret_type = RET_INTEGER, 2436 .arg1_type = ARG_PTR_TO_CTX, 2437 .arg2_type = ARG_ANYTHING, 2438 .arg3_type = ARG_ANYTHING, 2439 .arg4_type = ARG_ANYTHING, 2440 }; 2441 2442 static void sk_msg_shift_left(struct sk_msg *msg, int i) 2443 { 2444 int prev; 2445 2446 do { 2447 prev = i; 2448 sk_msg_iter_var_next(i); 2449 msg->sg.data[prev] = msg->sg.data[i]; 2450 } while (i != msg->sg.end); 2451 2452 sk_msg_iter_prev(msg, end); 2453 } 2454 2455 static void sk_msg_shift_right(struct sk_msg *msg, int i) 2456 { 2457 struct scatterlist tmp, sge; 2458 2459 sk_msg_iter_next(msg, end); 2460 sge = sk_msg_elem_cpy(msg, i); 2461 sk_msg_iter_var_next(i); 2462 tmp = sk_msg_elem_cpy(msg, i); 2463 2464 while (i != msg->sg.end) { 2465 msg->sg.data[i] = sge; 2466 sk_msg_iter_var_next(i); 2467 sge = tmp; 2468 tmp = sk_msg_elem_cpy(msg, i); 2469 } 2470 } 2471 2472 BPF_CALL_4(bpf_msg_pop_data, struct sk_msg *, msg, u32, start, 2473 u32, len, u64, flags) 2474 { 2475 u32 i = 0, l, space, offset = 0; 2476 u64 last = start + len; 2477 int pop; 2478 2479 if (unlikely(flags)) 2480 return -EINVAL; 2481 2482 /* First find the starting scatterlist element */ 2483 i = msg->sg.start; 2484 do { 2485 l = sk_msg_elem(msg, i)->length; 2486 2487 if (start < offset + l) 2488 break; 2489 offset += l; 2490 sk_msg_iter_var_next(i); 2491 } while (i != msg->sg.end); 2492 2493 /* Bounds checks: start and pop must be inside message */ 2494 if (start >= offset + l || last >= msg->sg.size) 2495 return -EINVAL; 2496 2497 space = MAX_MSG_FRAGS - sk_msg_elem_used(msg); 2498 2499 pop = len; 2500 /* --------------| offset 2501 * -| start |-------- len -------| 2502 * 2503 * |----- a ----|-------- pop -------|----- b ----| 2504 * |______________________________________________| length 2505 * 2506 * 2507 * a: region at front of scatter element to save 2508 * b: region at back of scatter element to save when length > A + pop 2509 * pop: region to pop from element, same as input 'pop' here will be 2510 * decremented below per iteration. 2511 * 2512 * Two top-level cases to handle when start != offset, first B is non 2513 * zero and second B is zero corresponding to when a pop includes more 2514 * than one element. 2515 * 2516 * Then if B is non-zero AND there is no space allocate space and 2517 * compact A, B regions into page. If there is space shift ring to 2518 * the rigth free'ing the next element in ring to place B, leaving 2519 * A untouched except to reduce length. 2520 */ 2521 if (start != offset) { 2522 struct scatterlist *nsge, *sge = sk_msg_elem(msg, i); 2523 int a = start; 2524 int b = sge->length - pop - a; 2525 2526 sk_msg_iter_var_next(i); 2527 2528 if (pop < sge->length - a) { 2529 if (space) { 2530 sge->length = a; 2531 sk_msg_shift_right(msg, i); 2532 nsge = sk_msg_elem(msg, i); 2533 get_page(sg_page(sge)); 2534 sg_set_page(nsge, 2535 sg_page(sge), 2536 b, sge->offset + pop + a); 2537 } else { 2538 struct page *page, *orig; 2539 u8 *to, *from; 2540 2541 page = alloc_pages(__GFP_NOWARN | 2542 __GFP_COMP | GFP_ATOMIC, 2543 get_order(a + b)); 2544 if (unlikely(!page)) 2545 return -ENOMEM; 2546 2547 sge->length = a; 2548 orig = sg_page(sge); 2549 from = sg_virt(sge); 2550 to = page_address(page); 2551 memcpy(to, from, a); 2552 memcpy(to + a, from + a + pop, b); 2553 sg_set_page(sge, page, a + b, 0); 2554 put_page(orig); 2555 } 2556 pop = 0; 2557 } else if (pop >= sge->length - a) { 2558 sge->length = a; 2559 pop -= (sge->length - a); 2560 } 2561 } 2562 2563 /* From above the current layout _must_ be as follows, 2564 * 2565 * -| offset 2566 * -| start 2567 * 2568 * |---- pop ---|---------------- b ------------| 2569 * |____________________________________________| length 2570 * 2571 * Offset and start of the current msg elem are equal because in the 2572 * previous case we handled offset != start and either consumed the 2573 * entire element and advanced to the next element OR pop == 0. 2574 * 2575 * Two cases to handle here are first pop is less than the length 2576 * leaving some remainder b above. Simply adjust the element's layout 2577 * in this case. Or pop >= length of the element so that b = 0. In this 2578 * case advance to next element decrementing pop. 2579 */ 2580 while (pop) { 2581 struct scatterlist *sge = sk_msg_elem(msg, i); 2582 2583 if (pop < sge->length) { 2584 sge->length -= pop; 2585 sge->offset += pop; 2586 pop = 0; 2587 } else { 2588 pop -= sge->length; 2589 sk_msg_shift_left(msg, i); 2590 } 2591 sk_msg_iter_var_next(i); 2592 } 2593 2594 sk_mem_uncharge(msg->sk, len - pop); 2595 msg->sg.size -= (len - pop); 2596 sk_msg_compute_data_pointers(msg); 2597 return 0; 2598 } 2599 2600 static const struct bpf_func_proto bpf_msg_pop_data_proto = { 2601 .func = bpf_msg_pop_data, 2602 .gpl_only = false, 2603 .ret_type = RET_INTEGER, 2604 .arg1_type = ARG_PTR_TO_CTX, 2605 .arg2_type = ARG_ANYTHING, 2606 .arg3_type = ARG_ANYTHING, 2607 .arg4_type = ARG_ANYTHING, 2608 }; 2609 2610 BPF_CALL_1(bpf_get_cgroup_classid, const struct sk_buff *, skb) 2611 { 2612 return task_get_classid(skb); 2613 } 2614 2615 static const struct bpf_func_proto bpf_get_cgroup_classid_proto = { 2616 .func = bpf_get_cgroup_classid, 2617 .gpl_only = false, 2618 .ret_type = RET_INTEGER, 2619 .arg1_type = ARG_PTR_TO_CTX, 2620 }; 2621 2622 BPF_CALL_1(bpf_get_route_realm, const struct sk_buff *, skb) 2623 { 2624 return dst_tclassid(skb); 2625 } 2626 2627 static const struct bpf_func_proto bpf_get_route_realm_proto = { 2628 .func = bpf_get_route_realm, 2629 .gpl_only = false, 2630 .ret_type = RET_INTEGER, 2631 .arg1_type = ARG_PTR_TO_CTX, 2632 }; 2633 2634 BPF_CALL_1(bpf_get_hash_recalc, struct sk_buff *, skb) 2635 { 2636 /* If skb_clear_hash() was called due to mangling, we can 2637 * trigger SW recalculation here. Later access to hash 2638 * can then use the inline skb->hash via context directly 2639 * instead of calling this helper again. 2640 */ 2641 return skb_get_hash(skb); 2642 } 2643 2644 static const struct bpf_func_proto bpf_get_hash_recalc_proto = { 2645 .func = bpf_get_hash_recalc, 2646 .gpl_only = false, 2647 .ret_type = RET_INTEGER, 2648 .arg1_type = ARG_PTR_TO_CTX, 2649 }; 2650 2651 BPF_CALL_1(bpf_set_hash_invalid, struct sk_buff *, skb) 2652 { 2653 /* After all direct packet write, this can be used once for 2654 * triggering a lazy recalc on next skb_get_hash() invocation. 2655 */ 2656 skb_clear_hash(skb); 2657 return 0; 2658 } 2659 2660 static const struct bpf_func_proto bpf_set_hash_invalid_proto = { 2661 .func = bpf_set_hash_invalid, 2662 .gpl_only = false, 2663 .ret_type = RET_INTEGER, 2664 .arg1_type = ARG_PTR_TO_CTX, 2665 }; 2666 2667 BPF_CALL_2(bpf_set_hash, struct sk_buff *, skb, u32, hash) 2668 { 2669 /* Set user specified hash as L4(+), so that it gets returned 2670 * on skb_get_hash() call unless BPF prog later on triggers a 2671 * skb_clear_hash(). 2672 */ 2673 __skb_set_sw_hash(skb, hash, true); 2674 return 0; 2675 } 2676 2677 static const struct bpf_func_proto bpf_set_hash_proto = { 2678 .func = bpf_set_hash, 2679 .gpl_only = false, 2680 .ret_type = RET_INTEGER, 2681 .arg1_type = ARG_PTR_TO_CTX, 2682 .arg2_type = ARG_ANYTHING, 2683 }; 2684 2685 BPF_CALL_3(bpf_skb_vlan_push, struct sk_buff *, skb, __be16, vlan_proto, 2686 u16, vlan_tci) 2687 { 2688 int ret; 2689 2690 if (unlikely(vlan_proto != htons(ETH_P_8021Q) && 2691 vlan_proto != htons(ETH_P_8021AD))) 2692 vlan_proto = htons(ETH_P_8021Q); 2693 2694 bpf_push_mac_rcsum(skb); 2695 ret = skb_vlan_push(skb, vlan_proto, vlan_tci); 2696 bpf_pull_mac_rcsum(skb); 2697 2698 bpf_compute_data_pointers(skb); 2699 return ret; 2700 } 2701 2702 static const struct bpf_func_proto bpf_skb_vlan_push_proto = { 2703 .func = bpf_skb_vlan_push, 2704 .gpl_only = false, 2705 .ret_type = RET_INTEGER, 2706 .arg1_type = ARG_PTR_TO_CTX, 2707 .arg2_type = ARG_ANYTHING, 2708 .arg3_type = ARG_ANYTHING, 2709 }; 2710 2711 BPF_CALL_1(bpf_skb_vlan_pop, struct sk_buff *, skb) 2712 { 2713 int ret; 2714 2715 bpf_push_mac_rcsum(skb); 2716 ret = skb_vlan_pop(skb); 2717 bpf_pull_mac_rcsum(skb); 2718 2719 bpf_compute_data_pointers(skb); 2720 return ret; 2721 } 2722 2723 static const struct bpf_func_proto bpf_skb_vlan_pop_proto = { 2724 .func = bpf_skb_vlan_pop, 2725 .gpl_only = false, 2726 .ret_type = RET_INTEGER, 2727 .arg1_type = ARG_PTR_TO_CTX, 2728 }; 2729 2730 static int bpf_skb_generic_push(struct sk_buff *skb, u32 off, u32 len) 2731 { 2732 /* Caller already did skb_cow() with len as headroom, 2733 * so no need to do it here. 2734 */ 2735 skb_push(skb, len); 2736 memmove(skb->data, skb->data + len, off); 2737 memset(skb->data + off, 0, len); 2738 2739 /* No skb_postpush_rcsum(skb, skb->data + off, len) 2740 * needed here as it does not change the skb->csum 2741 * result for checksum complete when summing over 2742 * zeroed blocks. 2743 */ 2744 return 0; 2745 } 2746 2747 static int bpf_skb_generic_pop(struct sk_buff *skb, u32 off, u32 len) 2748 { 2749 /* skb_ensure_writable() is not needed here, as we're 2750 * already working on an uncloned skb. 2751 */ 2752 if (unlikely(!pskb_may_pull(skb, off + len))) 2753 return -ENOMEM; 2754 2755 skb_postpull_rcsum(skb, skb->data + off, len); 2756 memmove(skb->data + len, skb->data, off); 2757 __skb_pull(skb, len); 2758 2759 return 0; 2760 } 2761 2762 static int bpf_skb_net_hdr_push(struct sk_buff *skb, u32 off, u32 len) 2763 { 2764 bool trans_same = skb->transport_header == skb->network_header; 2765 int ret; 2766 2767 /* There's no need for __skb_push()/__skb_pull() pair to 2768 * get to the start of the mac header as we're guaranteed 2769 * to always start from here under eBPF. 2770 */ 2771 ret = bpf_skb_generic_push(skb, off, len); 2772 if (likely(!ret)) { 2773 skb->mac_header -= len; 2774 skb->network_header -= len; 2775 if (trans_same) 2776 skb->transport_header = skb->network_header; 2777 } 2778 2779 return ret; 2780 } 2781 2782 static int bpf_skb_net_hdr_pop(struct sk_buff *skb, u32 off, u32 len) 2783 { 2784 bool trans_same = skb->transport_header == skb->network_header; 2785 int ret; 2786 2787 /* Same here, __skb_push()/__skb_pull() pair not needed. */ 2788 ret = bpf_skb_generic_pop(skb, off, len); 2789 if (likely(!ret)) { 2790 skb->mac_header += len; 2791 skb->network_header += len; 2792 if (trans_same) 2793 skb->transport_header = skb->network_header; 2794 } 2795 2796 return ret; 2797 } 2798 2799 static int bpf_skb_proto_4_to_6(struct sk_buff *skb) 2800 { 2801 const u32 len_diff = sizeof(struct ipv6hdr) - sizeof(struct iphdr); 2802 u32 off = skb_mac_header_len(skb); 2803 int ret; 2804 2805 if (skb_is_gso(skb) && !skb_is_gso_tcp(skb)) 2806 return -ENOTSUPP; 2807 2808 ret = skb_cow(skb, len_diff); 2809 if (unlikely(ret < 0)) 2810 return ret; 2811 2812 ret = bpf_skb_net_hdr_push(skb, off, len_diff); 2813 if (unlikely(ret < 0)) 2814 return ret; 2815 2816 if (skb_is_gso(skb)) { 2817 struct skb_shared_info *shinfo = skb_shinfo(skb); 2818 2819 /* SKB_GSO_TCPV4 needs to be changed into 2820 * SKB_GSO_TCPV6. 2821 */ 2822 if (shinfo->gso_type & SKB_GSO_TCPV4) { 2823 shinfo->gso_type &= ~SKB_GSO_TCPV4; 2824 shinfo->gso_type |= SKB_GSO_TCPV6; 2825 } 2826 2827 /* Due to IPv6 header, MSS needs to be downgraded. */ 2828 skb_decrease_gso_size(shinfo, len_diff); 2829 /* Header must be checked, and gso_segs recomputed. */ 2830 shinfo->gso_type |= SKB_GSO_DODGY; 2831 shinfo->gso_segs = 0; 2832 } 2833 2834 skb->protocol = htons(ETH_P_IPV6); 2835 skb_clear_hash(skb); 2836 2837 return 0; 2838 } 2839 2840 static int bpf_skb_proto_6_to_4(struct sk_buff *skb) 2841 { 2842 const u32 len_diff = sizeof(struct ipv6hdr) - sizeof(struct iphdr); 2843 u32 off = skb_mac_header_len(skb); 2844 int ret; 2845 2846 if (skb_is_gso(skb) && !skb_is_gso_tcp(skb)) 2847 return -ENOTSUPP; 2848 2849 ret = skb_unclone(skb, GFP_ATOMIC); 2850 if (unlikely(ret < 0)) 2851 return ret; 2852 2853 ret = bpf_skb_net_hdr_pop(skb, off, len_diff); 2854 if (unlikely(ret < 0)) 2855 return ret; 2856 2857 if (skb_is_gso(skb)) { 2858 struct skb_shared_info *shinfo = skb_shinfo(skb); 2859 2860 /* SKB_GSO_TCPV6 needs to be changed into 2861 * SKB_GSO_TCPV4. 2862 */ 2863 if (shinfo->gso_type & SKB_GSO_TCPV6) { 2864 shinfo->gso_type &= ~SKB_GSO_TCPV6; 2865 shinfo->gso_type |= SKB_GSO_TCPV4; 2866 } 2867 2868 /* Due to IPv4 header, MSS can be upgraded. */ 2869 skb_increase_gso_size(shinfo, len_diff); 2870 /* Header must be checked, and gso_segs recomputed. */ 2871 shinfo->gso_type |= SKB_GSO_DODGY; 2872 shinfo->gso_segs = 0; 2873 } 2874 2875 skb->protocol = htons(ETH_P_IP); 2876 skb_clear_hash(skb); 2877 2878 return 0; 2879 } 2880 2881 static int bpf_skb_proto_xlat(struct sk_buff *skb, __be16 to_proto) 2882 { 2883 __be16 from_proto = skb->protocol; 2884 2885 if (from_proto == htons(ETH_P_IP) && 2886 to_proto == htons(ETH_P_IPV6)) 2887 return bpf_skb_proto_4_to_6(skb); 2888 2889 if (from_proto == htons(ETH_P_IPV6) && 2890 to_proto == htons(ETH_P_IP)) 2891 return bpf_skb_proto_6_to_4(skb); 2892 2893 return -ENOTSUPP; 2894 } 2895 2896 BPF_CALL_3(bpf_skb_change_proto, struct sk_buff *, skb, __be16, proto, 2897 u64, flags) 2898 { 2899 int ret; 2900 2901 if (unlikely(flags)) 2902 return -EINVAL; 2903 2904 /* General idea is that this helper does the basic groundwork 2905 * needed for changing the protocol, and eBPF program fills the 2906 * rest through bpf_skb_store_bytes(), bpf_lX_csum_replace() 2907 * and other helpers, rather than passing a raw buffer here. 2908 * 2909 * The rationale is to keep this minimal and without a need to 2910 * deal with raw packet data. F.e. even if we would pass buffers 2911 * here, the program still needs to call the bpf_lX_csum_replace() 2912 * helpers anyway. Plus, this way we keep also separation of 2913 * concerns, since f.e. bpf_skb_store_bytes() should only take 2914 * care of stores. 2915 * 2916 * Currently, additional options and extension header space are 2917 * not supported, but flags register is reserved so we can adapt 2918 * that. For offloads, we mark packet as dodgy, so that headers 2919 * need to be verified first. 2920 */ 2921 ret = bpf_skb_proto_xlat(skb, proto); 2922 bpf_compute_data_pointers(skb); 2923 return ret; 2924 } 2925 2926 static const struct bpf_func_proto bpf_skb_change_proto_proto = { 2927 .func = bpf_skb_change_proto, 2928 .gpl_only = false, 2929 .ret_type = RET_INTEGER, 2930 .arg1_type = ARG_PTR_TO_CTX, 2931 .arg2_type = ARG_ANYTHING, 2932 .arg3_type = ARG_ANYTHING, 2933 }; 2934 2935 BPF_CALL_2(bpf_skb_change_type, struct sk_buff *, skb, u32, pkt_type) 2936 { 2937 /* We only allow a restricted subset to be changed for now. */ 2938 if (unlikely(!skb_pkt_type_ok(skb->pkt_type) || 2939 !skb_pkt_type_ok(pkt_type))) 2940 return -EINVAL; 2941 2942 skb->pkt_type = pkt_type; 2943 return 0; 2944 } 2945 2946 static const struct bpf_func_proto bpf_skb_change_type_proto = { 2947 .func = bpf_skb_change_type, 2948 .gpl_only = false, 2949 .ret_type = RET_INTEGER, 2950 .arg1_type = ARG_PTR_TO_CTX, 2951 .arg2_type = ARG_ANYTHING, 2952 }; 2953 2954 static u32 bpf_skb_net_base_len(const struct sk_buff *skb) 2955 { 2956 switch (skb->protocol) { 2957 case htons(ETH_P_IP): 2958 return sizeof(struct iphdr); 2959 case htons(ETH_P_IPV6): 2960 return sizeof(struct ipv6hdr); 2961 default: 2962 return ~0U; 2963 } 2964 } 2965 2966 static int bpf_skb_net_grow(struct sk_buff *skb, u32 len_diff) 2967 { 2968 u32 off = skb_mac_header_len(skb) + bpf_skb_net_base_len(skb); 2969 int ret; 2970 2971 if (skb_is_gso(skb) && !skb_is_gso_tcp(skb)) 2972 return -ENOTSUPP; 2973 2974 ret = skb_cow(skb, len_diff); 2975 if (unlikely(ret < 0)) 2976 return ret; 2977 2978 ret = bpf_skb_net_hdr_push(skb, off, len_diff); 2979 if (unlikely(ret < 0)) 2980 return ret; 2981 2982 if (skb_is_gso(skb)) { 2983 struct skb_shared_info *shinfo = skb_shinfo(skb); 2984 2985 /* Due to header grow, MSS needs to be downgraded. */ 2986 skb_decrease_gso_size(shinfo, len_diff); 2987 /* Header must be checked, and gso_segs recomputed. */ 2988 shinfo->gso_type |= SKB_GSO_DODGY; 2989 shinfo->gso_segs = 0; 2990 } 2991 2992 return 0; 2993 } 2994 2995 static int bpf_skb_net_shrink(struct sk_buff *skb, u32 len_diff) 2996 { 2997 u32 off = skb_mac_header_len(skb) + bpf_skb_net_base_len(skb); 2998 int ret; 2999 3000 if (skb_is_gso(skb) && !skb_is_gso_tcp(skb)) 3001 return -ENOTSUPP; 3002 3003 ret = skb_unclone(skb, GFP_ATOMIC); 3004 if (unlikely(ret < 0)) 3005 return ret; 3006 3007 ret = bpf_skb_net_hdr_pop(skb, off, len_diff); 3008 if (unlikely(ret < 0)) 3009 return ret; 3010 3011 if (skb_is_gso(skb)) { 3012 struct skb_shared_info *shinfo = skb_shinfo(skb); 3013 3014 /* Due to header shrink, MSS can be upgraded. */ 3015 skb_increase_gso_size(shinfo, len_diff); 3016 /* Header must be checked, and gso_segs recomputed. */ 3017 shinfo->gso_type |= SKB_GSO_DODGY; 3018 shinfo->gso_segs = 0; 3019 } 3020 3021 return 0; 3022 } 3023 3024 static u32 __bpf_skb_max_len(const struct sk_buff *skb) 3025 { 3026 return skb->dev ? skb->dev->mtu + skb->dev->hard_header_len : 3027 SKB_MAX_ALLOC; 3028 } 3029 3030 static int bpf_skb_adjust_net(struct sk_buff *skb, s32 len_diff) 3031 { 3032 bool trans_same = skb->transport_header == skb->network_header; 3033 u32 len_cur, len_diff_abs = abs(len_diff); 3034 u32 len_min = bpf_skb_net_base_len(skb); 3035 u32 len_max = __bpf_skb_max_len(skb); 3036 __be16 proto = skb->protocol; 3037 bool shrink = len_diff < 0; 3038 int ret; 3039 3040 if (unlikely(len_diff_abs > 0xfffU)) 3041 return -EFAULT; 3042 if (unlikely(proto != htons(ETH_P_IP) && 3043 proto != htons(ETH_P_IPV6))) 3044 return -ENOTSUPP; 3045 3046 len_cur = skb->len - skb_network_offset(skb); 3047 if (skb_transport_header_was_set(skb) && !trans_same) 3048 len_cur = skb_network_header_len(skb); 3049 if ((shrink && (len_diff_abs >= len_cur || 3050 len_cur - len_diff_abs < len_min)) || 3051 (!shrink && (skb->len + len_diff_abs > len_max && 3052 !skb_is_gso(skb)))) 3053 return -ENOTSUPP; 3054 3055 ret = shrink ? bpf_skb_net_shrink(skb, len_diff_abs) : 3056 bpf_skb_net_grow(skb, len_diff_abs); 3057 3058 bpf_compute_data_pointers(skb); 3059 return ret; 3060 } 3061 3062 BPF_CALL_4(bpf_skb_adjust_room, struct sk_buff *, skb, s32, len_diff, 3063 u32, mode, u64, flags) 3064 { 3065 if (unlikely(flags)) 3066 return -EINVAL; 3067 if (likely(mode == BPF_ADJ_ROOM_NET)) 3068 return bpf_skb_adjust_net(skb, len_diff); 3069 3070 return -ENOTSUPP; 3071 } 3072 3073 static const struct bpf_func_proto bpf_skb_adjust_room_proto = { 3074 .func = bpf_skb_adjust_room, 3075 .gpl_only = false, 3076 .ret_type = RET_INTEGER, 3077 .arg1_type = ARG_PTR_TO_CTX, 3078 .arg2_type = ARG_ANYTHING, 3079 .arg3_type = ARG_ANYTHING, 3080 .arg4_type = ARG_ANYTHING, 3081 }; 3082 3083 static u32 __bpf_skb_min_len(const struct sk_buff *skb) 3084 { 3085 u32 min_len = skb_network_offset(skb); 3086 3087 if (skb_transport_header_was_set(skb)) 3088 min_len = skb_transport_offset(skb); 3089 if (skb->ip_summed == CHECKSUM_PARTIAL) 3090 min_len = skb_checksum_start_offset(skb) + 3091 skb->csum_offset + sizeof(__sum16); 3092 return min_len; 3093 } 3094 3095 static int bpf_skb_grow_rcsum(struct sk_buff *skb, unsigned int new_len) 3096 { 3097 unsigned int old_len = skb->len; 3098 int ret; 3099 3100 ret = __skb_grow_rcsum(skb, new_len); 3101 if (!ret) 3102 memset(skb->data + old_len, 0, new_len - old_len); 3103 return ret; 3104 } 3105 3106 static int bpf_skb_trim_rcsum(struct sk_buff *skb, unsigned int new_len) 3107 { 3108 return __skb_trim_rcsum(skb, new_len); 3109 } 3110 3111 static inline int __bpf_skb_change_tail(struct sk_buff *skb, u32 new_len, 3112 u64 flags) 3113 { 3114 u32 max_len = __bpf_skb_max_len(skb); 3115 u32 min_len = __bpf_skb_min_len(skb); 3116 int ret; 3117 3118 if (unlikely(flags || new_len > max_len || new_len < min_len)) 3119 return -EINVAL; 3120 if (skb->encapsulation) 3121 return -ENOTSUPP; 3122 3123 /* The basic idea of this helper is that it's performing the 3124 * needed work to either grow or trim an skb, and eBPF program 3125 * rewrites the rest via helpers like bpf_skb_store_bytes(), 3126 * bpf_lX_csum_replace() and others rather than passing a raw 3127 * buffer here. This one is a slow path helper and intended 3128 * for replies with control messages. 3129 * 3130 * Like in bpf_skb_change_proto(), we want to keep this rather 3131 * minimal and without protocol specifics so that we are able 3132 * to separate concerns as in bpf_skb_store_bytes() should only 3133 * be the one responsible for writing buffers. 3134 * 3135 * It's really expected to be a slow path operation here for 3136 * control message replies, so we're implicitly linearizing, 3137 * uncloning and drop offloads from the skb by this. 3138 */ 3139 ret = __bpf_try_make_writable(skb, skb->len); 3140 if (!ret) { 3141 if (new_len > skb->len) 3142 ret = bpf_skb_grow_rcsum(skb, new_len); 3143 else if (new_len < skb->len) 3144 ret = bpf_skb_trim_rcsum(skb, new_len); 3145 if (!ret && skb_is_gso(skb)) 3146 skb_gso_reset(skb); 3147 } 3148 return ret; 3149 } 3150 3151 BPF_CALL_3(bpf_skb_change_tail, struct sk_buff *, skb, u32, new_len, 3152 u64, flags) 3153 { 3154 int ret = __bpf_skb_change_tail(skb, new_len, flags); 3155 3156 bpf_compute_data_pointers(skb); 3157 return ret; 3158 } 3159 3160 static const struct bpf_func_proto bpf_skb_change_tail_proto = { 3161 .func = bpf_skb_change_tail, 3162 .gpl_only = false, 3163 .ret_type = RET_INTEGER, 3164 .arg1_type = ARG_PTR_TO_CTX, 3165 .arg2_type = ARG_ANYTHING, 3166 .arg3_type = ARG_ANYTHING, 3167 }; 3168 3169 BPF_CALL_3(sk_skb_change_tail, struct sk_buff *, skb, u32, new_len, 3170 u64, flags) 3171 { 3172 int ret = __bpf_skb_change_tail(skb, new_len, flags); 3173 3174 bpf_compute_data_end_sk_skb(skb); 3175 return ret; 3176 } 3177 3178 static const struct bpf_func_proto sk_skb_change_tail_proto = { 3179 .func = sk_skb_change_tail, 3180 .gpl_only = false, 3181 .ret_type = RET_INTEGER, 3182 .arg1_type = ARG_PTR_TO_CTX, 3183 .arg2_type = ARG_ANYTHING, 3184 .arg3_type = ARG_ANYTHING, 3185 }; 3186 3187 static inline int __bpf_skb_change_head(struct sk_buff *skb, u32 head_room, 3188 u64 flags) 3189 { 3190 u32 max_len = __bpf_skb_max_len(skb); 3191 u32 new_len = skb->len + head_room; 3192 int ret; 3193 3194 if (unlikely(flags || (!skb_is_gso(skb) && new_len > max_len) || 3195 new_len < skb->len)) 3196 return -EINVAL; 3197 3198 ret = skb_cow(skb, head_room); 3199 if (likely(!ret)) { 3200 /* Idea for this helper is that we currently only 3201 * allow to expand on mac header. This means that 3202 * skb->protocol network header, etc, stay as is. 3203 * Compared to bpf_skb_change_tail(), we're more 3204 * flexible due to not needing to linearize or 3205 * reset GSO. Intention for this helper is to be 3206 * used by an L3 skb that needs to push mac header 3207 * for redirection into L2 device. 3208 */ 3209 __skb_push(skb, head_room); 3210 memset(skb->data, 0, head_room); 3211 skb_reset_mac_header(skb); 3212 } 3213 3214 return ret; 3215 } 3216 3217 BPF_CALL_3(bpf_skb_change_head, struct sk_buff *, skb, u32, head_room, 3218 u64, flags) 3219 { 3220 int ret = __bpf_skb_change_head(skb, head_room, flags); 3221 3222 bpf_compute_data_pointers(skb); 3223 return ret; 3224 } 3225 3226 static const struct bpf_func_proto bpf_skb_change_head_proto = { 3227 .func = bpf_skb_change_head, 3228 .gpl_only = false, 3229 .ret_type = RET_INTEGER, 3230 .arg1_type = ARG_PTR_TO_CTX, 3231 .arg2_type = ARG_ANYTHING, 3232 .arg3_type = ARG_ANYTHING, 3233 }; 3234 3235 BPF_CALL_3(sk_skb_change_head, struct sk_buff *, skb, u32, head_room, 3236 u64, flags) 3237 { 3238 int ret = __bpf_skb_change_head(skb, head_room, flags); 3239 3240 bpf_compute_data_end_sk_skb(skb); 3241 return ret; 3242 } 3243 3244 static const struct bpf_func_proto sk_skb_change_head_proto = { 3245 .func = sk_skb_change_head, 3246 .gpl_only = false, 3247 .ret_type = RET_INTEGER, 3248 .arg1_type = ARG_PTR_TO_CTX, 3249 .arg2_type = ARG_ANYTHING, 3250 .arg3_type = ARG_ANYTHING, 3251 }; 3252 static unsigned long xdp_get_metalen(const struct xdp_buff *xdp) 3253 { 3254 return xdp_data_meta_unsupported(xdp) ? 0 : 3255 xdp->data - xdp->data_meta; 3256 } 3257 3258 BPF_CALL_2(bpf_xdp_adjust_head, struct xdp_buff *, xdp, int, offset) 3259 { 3260 void *xdp_frame_end = xdp->data_hard_start + sizeof(struct xdp_frame); 3261 unsigned long metalen = xdp_get_metalen(xdp); 3262 void *data_start = xdp_frame_end + metalen; 3263 void *data = xdp->data + offset; 3264 3265 if (unlikely(data < data_start || 3266 data > xdp->data_end - ETH_HLEN)) 3267 return -EINVAL; 3268 3269 if (metalen) 3270 memmove(xdp->data_meta + offset, 3271 xdp->data_meta, metalen); 3272 xdp->data_meta += offset; 3273 xdp->data = data; 3274 3275 return 0; 3276 } 3277 3278 static const struct bpf_func_proto bpf_xdp_adjust_head_proto = { 3279 .func = bpf_xdp_adjust_head, 3280 .gpl_only = false, 3281 .ret_type = RET_INTEGER, 3282 .arg1_type = ARG_PTR_TO_CTX, 3283 .arg2_type = ARG_ANYTHING, 3284 }; 3285 3286 BPF_CALL_2(bpf_xdp_adjust_tail, struct xdp_buff *, xdp, int, offset) 3287 { 3288 void *data_end = xdp->data_end + offset; 3289 3290 /* only shrinking is allowed for now. */ 3291 if (unlikely(offset >= 0)) 3292 return -EINVAL; 3293 3294 if (unlikely(data_end < xdp->data + ETH_HLEN)) 3295 return -EINVAL; 3296 3297 xdp->data_end = data_end; 3298 3299 return 0; 3300 } 3301 3302 static const struct bpf_func_proto bpf_xdp_adjust_tail_proto = { 3303 .func = bpf_xdp_adjust_tail, 3304 .gpl_only = false, 3305 .ret_type = RET_INTEGER, 3306 .arg1_type = ARG_PTR_TO_CTX, 3307 .arg2_type = ARG_ANYTHING, 3308 }; 3309 3310 BPF_CALL_2(bpf_xdp_adjust_meta, struct xdp_buff *, xdp, int, offset) 3311 { 3312 void *xdp_frame_end = xdp->data_hard_start + sizeof(struct xdp_frame); 3313 void *meta = xdp->data_meta + offset; 3314 unsigned long metalen = xdp->data - meta; 3315 3316 if (xdp_data_meta_unsupported(xdp)) 3317 return -ENOTSUPP; 3318 if (unlikely(meta < xdp_frame_end || 3319 meta > xdp->data)) 3320 return -EINVAL; 3321 if (unlikely((metalen & (sizeof(__u32) - 1)) || 3322 (metalen > 32))) 3323 return -EACCES; 3324 3325 xdp->data_meta = meta; 3326 3327 return 0; 3328 } 3329 3330 static const struct bpf_func_proto bpf_xdp_adjust_meta_proto = { 3331 .func = bpf_xdp_adjust_meta, 3332 .gpl_only = false, 3333 .ret_type = RET_INTEGER, 3334 .arg1_type = ARG_PTR_TO_CTX, 3335 .arg2_type = ARG_ANYTHING, 3336 }; 3337 3338 static int __bpf_tx_xdp(struct net_device *dev, 3339 struct bpf_map *map, 3340 struct xdp_buff *xdp, 3341 u32 index) 3342 { 3343 struct xdp_frame *xdpf; 3344 int err, sent; 3345 3346 if (!dev->netdev_ops->ndo_xdp_xmit) { 3347 return -EOPNOTSUPP; 3348 } 3349 3350 err = xdp_ok_fwd_dev(dev, xdp->data_end - xdp->data); 3351 if (unlikely(err)) 3352 return err; 3353 3354 xdpf = convert_to_xdp_frame(xdp); 3355 if (unlikely(!xdpf)) 3356 return -EOVERFLOW; 3357 3358 sent = dev->netdev_ops->ndo_xdp_xmit(dev, 1, &xdpf, XDP_XMIT_FLUSH); 3359 if (sent <= 0) 3360 return sent; 3361 return 0; 3362 } 3363 3364 static noinline int 3365 xdp_do_redirect_slow(struct net_device *dev, struct xdp_buff *xdp, 3366 struct bpf_prog *xdp_prog, struct bpf_redirect_info *ri) 3367 { 3368 struct net_device *fwd; 3369 u32 index = ri->ifindex; 3370 int err; 3371 3372 fwd = dev_get_by_index_rcu(dev_net(dev), index); 3373 ri->ifindex = 0; 3374 if (unlikely(!fwd)) { 3375 err = -EINVAL; 3376 goto err; 3377 } 3378 3379 err = __bpf_tx_xdp(fwd, NULL, xdp, 0); 3380 if (unlikely(err)) 3381 goto err; 3382 3383 _trace_xdp_redirect(dev, xdp_prog, index); 3384 return 0; 3385 err: 3386 _trace_xdp_redirect_err(dev, xdp_prog, index, err); 3387 return err; 3388 } 3389 3390 static int __bpf_tx_xdp_map(struct net_device *dev_rx, void *fwd, 3391 struct bpf_map *map, 3392 struct xdp_buff *xdp, 3393 u32 index) 3394 { 3395 int err; 3396 3397 switch (map->map_type) { 3398 case BPF_MAP_TYPE_DEVMAP: { 3399 struct bpf_dtab_netdev *dst = fwd; 3400 3401 err = dev_map_enqueue(dst, xdp, dev_rx); 3402 if (unlikely(err)) 3403 return err; 3404 __dev_map_insert_ctx(map, index); 3405 break; 3406 } 3407 case BPF_MAP_TYPE_CPUMAP: { 3408 struct bpf_cpu_map_entry *rcpu = fwd; 3409 3410 err = cpu_map_enqueue(rcpu, xdp, dev_rx); 3411 if (unlikely(err)) 3412 return err; 3413 __cpu_map_insert_ctx(map, index); 3414 break; 3415 } 3416 case BPF_MAP_TYPE_XSKMAP: { 3417 struct xdp_sock *xs = fwd; 3418 3419 err = __xsk_map_redirect(map, xdp, xs); 3420 return err; 3421 } 3422 default: 3423 break; 3424 } 3425 return 0; 3426 } 3427 3428 void xdp_do_flush_map(void) 3429 { 3430 struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info); 3431 struct bpf_map *map = ri->map_to_flush; 3432 3433 ri->map_to_flush = NULL; 3434 if (map) { 3435 switch (map->map_type) { 3436 case BPF_MAP_TYPE_DEVMAP: 3437 __dev_map_flush(map); 3438 break; 3439 case BPF_MAP_TYPE_CPUMAP: 3440 __cpu_map_flush(map); 3441 break; 3442 case BPF_MAP_TYPE_XSKMAP: 3443 __xsk_map_flush(map); 3444 break; 3445 default: 3446 break; 3447 } 3448 } 3449 } 3450 EXPORT_SYMBOL_GPL(xdp_do_flush_map); 3451 3452 static inline void *__xdp_map_lookup_elem(struct bpf_map *map, u32 index) 3453 { 3454 switch (map->map_type) { 3455 case BPF_MAP_TYPE_DEVMAP: 3456 return __dev_map_lookup_elem(map, index); 3457 case BPF_MAP_TYPE_CPUMAP: 3458 return __cpu_map_lookup_elem(map, index); 3459 case BPF_MAP_TYPE_XSKMAP: 3460 return __xsk_map_lookup_elem(map, index); 3461 default: 3462 return NULL; 3463 } 3464 } 3465 3466 void bpf_clear_redirect_map(struct bpf_map *map) 3467 { 3468 struct bpf_redirect_info *ri; 3469 int cpu; 3470 3471 for_each_possible_cpu(cpu) { 3472 ri = per_cpu_ptr(&bpf_redirect_info, cpu); 3473 /* Avoid polluting remote cacheline due to writes if 3474 * not needed. Once we pass this test, we need the 3475 * cmpxchg() to make sure it hasn't been changed in 3476 * the meantime by remote CPU. 3477 */ 3478 if (unlikely(READ_ONCE(ri->map) == map)) 3479 cmpxchg(&ri->map, map, NULL); 3480 } 3481 } 3482 3483 static int xdp_do_redirect_map(struct net_device *dev, struct xdp_buff *xdp, 3484 struct bpf_prog *xdp_prog, struct bpf_map *map, 3485 struct bpf_redirect_info *ri) 3486 { 3487 u32 index = ri->ifindex; 3488 void *fwd = NULL; 3489 int err; 3490 3491 ri->ifindex = 0; 3492 WRITE_ONCE(ri->map, NULL); 3493 3494 fwd = __xdp_map_lookup_elem(map, index); 3495 if (unlikely(!fwd)) { 3496 err = -EINVAL; 3497 goto err; 3498 } 3499 if (ri->map_to_flush && unlikely(ri->map_to_flush != map)) 3500 xdp_do_flush_map(); 3501 3502 err = __bpf_tx_xdp_map(dev, fwd, map, xdp, index); 3503 if (unlikely(err)) 3504 goto err; 3505 3506 ri->map_to_flush = map; 3507 _trace_xdp_redirect_map(dev, xdp_prog, fwd, map, index); 3508 return 0; 3509 err: 3510 _trace_xdp_redirect_map_err(dev, xdp_prog, fwd, map, index, err); 3511 return err; 3512 } 3513 3514 int xdp_do_redirect(struct net_device *dev, struct xdp_buff *xdp, 3515 struct bpf_prog *xdp_prog) 3516 { 3517 struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info); 3518 struct bpf_map *map = READ_ONCE(ri->map); 3519 3520 if (likely(map)) 3521 return xdp_do_redirect_map(dev, xdp, xdp_prog, map, ri); 3522 3523 return xdp_do_redirect_slow(dev, xdp, xdp_prog, ri); 3524 } 3525 EXPORT_SYMBOL_GPL(xdp_do_redirect); 3526 3527 static int xdp_do_generic_redirect_map(struct net_device *dev, 3528 struct sk_buff *skb, 3529 struct xdp_buff *xdp, 3530 struct bpf_prog *xdp_prog, 3531 struct bpf_map *map) 3532 { 3533 struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info); 3534 u32 index = ri->ifindex; 3535 void *fwd = NULL; 3536 int err = 0; 3537 3538 ri->ifindex = 0; 3539 WRITE_ONCE(ri->map, NULL); 3540 3541 fwd = __xdp_map_lookup_elem(map, index); 3542 if (unlikely(!fwd)) { 3543 err = -EINVAL; 3544 goto err; 3545 } 3546 3547 if (map->map_type == BPF_MAP_TYPE_DEVMAP) { 3548 struct bpf_dtab_netdev *dst = fwd; 3549 3550 err = dev_map_generic_redirect(dst, skb, xdp_prog); 3551 if (unlikely(err)) 3552 goto err; 3553 } else if (map->map_type == BPF_MAP_TYPE_XSKMAP) { 3554 struct xdp_sock *xs = fwd; 3555 3556 err = xsk_generic_rcv(xs, xdp); 3557 if (err) 3558 goto err; 3559 consume_skb(skb); 3560 } else { 3561 /* TODO: Handle BPF_MAP_TYPE_CPUMAP */ 3562 err = -EBADRQC; 3563 goto err; 3564 } 3565 3566 _trace_xdp_redirect_map(dev, xdp_prog, fwd, map, index); 3567 return 0; 3568 err: 3569 _trace_xdp_redirect_map_err(dev, xdp_prog, fwd, map, index, err); 3570 return err; 3571 } 3572 3573 int xdp_do_generic_redirect(struct net_device *dev, struct sk_buff *skb, 3574 struct xdp_buff *xdp, struct bpf_prog *xdp_prog) 3575 { 3576 struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info); 3577 struct bpf_map *map = READ_ONCE(ri->map); 3578 u32 index = ri->ifindex; 3579 struct net_device *fwd; 3580 int err = 0; 3581 3582 if (map) 3583 return xdp_do_generic_redirect_map(dev, skb, xdp, xdp_prog, 3584 map); 3585 ri->ifindex = 0; 3586 fwd = dev_get_by_index_rcu(dev_net(dev), index); 3587 if (unlikely(!fwd)) { 3588 err = -EINVAL; 3589 goto err; 3590 } 3591 3592 err = xdp_ok_fwd_dev(fwd, skb->len); 3593 if (unlikely(err)) 3594 goto err; 3595 3596 skb->dev = fwd; 3597 _trace_xdp_redirect(dev, xdp_prog, index); 3598 generic_xdp_tx(skb, xdp_prog); 3599 return 0; 3600 err: 3601 _trace_xdp_redirect_err(dev, xdp_prog, index, err); 3602 return err; 3603 } 3604 EXPORT_SYMBOL_GPL(xdp_do_generic_redirect); 3605 3606 BPF_CALL_2(bpf_xdp_redirect, u32, ifindex, u64, flags) 3607 { 3608 struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info); 3609 3610 if (unlikely(flags)) 3611 return XDP_ABORTED; 3612 3613 ri->ifindex = ifindex; 3614 ri->flags = flags; 3615 WRITE_ONCE(ri->map, NULL); 3616 3617 return XDP_REDIRECT; 3618 } 3619 3620 static const struct bpf_func_proto bpf_xdp_redirect_proto = { 3621 .func = bpf_xdp_redirect, 3622 .gpl_only = false, 3623 .ret_type = RET_INTEGER, 3624 .arg1_type = ARG_ANYTHING, 3625 .arg2_type = ARG_ANYTHING, 3626 }; 3627 3628 BPF_CALL_3(bpf_xdp_redirect_map, struct bpf_map *, map, u32, ifindex, 3629 u64, flags) 3630 { 3631 struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info); 3632 3633 if (unlikely(flags)) 3634 return XDP_ABORTED; 3635 3636 ri->ifindex = ifindex; 3637 ri->flags = flags; 3638 WRITE_ONCE(ri->map, map); 3639 3640 return XDP_REDIRECT; 3641 } 3642 3643 static const struct bpf_func_proto bpf_xdp_redirect_map_proto = { 3644 .func = bpf_xdp_redirect_map, 3645 .gpl_only = false, 3646 .ret_type = RET_INTEGER, 3647 .arg1_type = ARG_CONST_MAP_PTR, 3648 .arg2_type = ARG_ANYTHING, 3649 .arg3_type = ARG_ANYTHING, 3650 }; 3651 3652 static unsigned long bpf_skb_copy(void *dst_buff, const void *skb, 3653 unsigned long off, unsigned long len) 3654 { 3655 void *ptr = skb_header_pointer(skb, off, len, dst_buff); 3656 3657 if (unlikely(!ptr)) 3658 return len; 3659 if (ptr != dst_buff) 3660 memcpy(dst_buff, ptr, len); 3661 3662 return 0; 3663 } 3664 3665 BPF_CALL_5(bpf_skb_event_output, struct sk_buff *, skb, struct bpf_map *, map, 3666 u64, flags, void *, meta, u64, meta_size) 3667 { 3668 u64 skb_size = (flags & BPF_F_CTXLEN_MASK) >> 32; 3669 3670 if (unlikely(flags & ~(BPF_F_CTXLEN_MASK | BPF_F_INDEX_MASK))) 3671 return -EINVAL; 3672 if (unlikely(skb_size > skb->len)) 3673 return -EFAULT; 3674 3675 return bpf_event_output(map, flags, meta, meta_size, skb, skb_size, 3676 bpf_skb_copy); 3677 } 3678 3679 static const struct bpf_func_proto bpf_skb_event_output_proto = { 3680 .func = bpf_skb_event_output, 3681 .gpl_only = true, 3682 .ret_type = RET_INTEGER, 3683 .arg1_type = ARG_PTR_TO_CTX, 3684 .arg2_type = ARG_CONST_MAP_PTR, 3685 .arg3_type = ARG_ANYTHING, 3686 .arg4_type = ARG_PTR_TO_MEM, 3687 .arg5_type = ARG_CONST_SIZE_OR_ZERO, 3688 }; 3689 3690 static unsigned short bpf_tunnel_key_af(u64 flags) 3691 { 3692 return flags & BPF_F_TUNINFO_IPV6 ? AF_INET6 : AF_INET; 3693 } 3694 3695 BPF_CALL_4(bpf_skb_get_tunnel_key, struct sk_buff *, skb, struct bpf_tunnel_key *, to, 3696 u32, size, u64, flags) 3697 { 3698 const struct ip_tunnel_info *info = skb_tunnel_info(skb); 3699 u8 compat[sizeof(struct bpf_tunnel_key)]; 3700 void *to_orig = to; 3701 int err; 3702 3703 if (unlikely(!info || (flags & ~(BPF_F_TUNINFO_IPV6)))) { 3704 err = -EINVAL; 3705 goto err_clear; 3706 } 3707 if (ip_tunnel_info_af(info) != bpf_tunnel_key_af(flags)) { 3708 err = -EPROTO; 3709 goto err_clear; 3710 } 3711 if (unlikely(size != sizeof(struct bpf_tunnel_key))) { 3712 err = -EINVAL; 3713 switch (size) { 3714 case offsetof(struct bpf_tunnel_key, tunnel_label): 3715 case offsetof(struct bpf_tunnel_key, tunnel_ext): 3716 goto set_compat; 3717 case offsetof(struct bpf_tunnel_key, remote_ipv6[1]): 3718 /* Fixup deprecated structure layouts here, so we have 3719 * a common path later on. 3720 */ 3721 if (ip_tunnel_info_af(info) != AF_INET) 3722 goto err_clear; 3723 set_compat: 3724 to = (struct bpf_tunnel_key *)compat; 3725 break; 3726 default: 3727 goto err_clear; 3728 } 3729 } 3730 3731 to->tunnel_id = be64_to_cpu(info->key.tun_id); 3732 to->tunnel_tos = info->key.tos; 3733 to->tunnel_ttl = info->key.ttl; 3734 to->tunnel_ext = 0; 3735 3736 if (flags & BPF_F_TUNINFO_IPV6) { 3737 memcpy(to->remote_ipv6, &info->key.u.ipv6.src, 3738 sizeof(to->remote_ipv6)); 3739 to->tunnel_label = be32_to_cpu(info->key.label); 3740 } else { 3741 to->remote_ipv4 = be32_to_cpu(info->key.u.ipv4.src); 3742 memset(&to->remote_ipv6[1], 0, sizeof(__u32) * 3); 3743 to->tunnel_label = 0; 3744 } 3745 3746 if (unlikely(size != sizeof(struct bpf_tunnel_key))) 3747 memcpy(to_orig, to, size); 3748 3749 return 0; 3750 err_clear: 3751 memset(to_orig, 0, size); 3752 return err; 3753 } 3754 3755 static const struct bpf_func_proto bpf_skb_get_tunnel_key_proto = { 3756 .func = bpf_skb_get_tunnel_key, 3757 .gpl_only = false, 3758 .ret_type = RET_INTEGER, 3759 .arg1_type = ARG_PTR_TO_CTX, 3760 .arg2_type = ARG_PTR_TO_UNINIT_MEM, 3761 .arg3_type = ARG_CONST_SIZE, 3762 .arg4_type = ARG_ANYTHING, 3763 }; 3764 3765 BPF_CALL_3(bpf_skb_get_tunnel_opt, struct sk_buff *, skb, u8 *, to, u32, size) 3766 { 3767 const struct ip_tunnel_info *info = skb_tunnel_info(skb); 3768 int err; 3769 3770 if (unlikely(!info || 3771 !(info->key.tun_flags & TUNNEL_OPTIONS_PRESENT))) { 3772 err = -ENOENT; 3773 goto err_clear; 3774 } 3775 if (unlikely(size < info->options_len)) { 3776 err = -ENOMEM; 3777 goto err_clear; 3778 } 3779 3780 ip_tunnel_info_opts_get(to, info); 3781 if (size > info->options_len) 3782 memset(to + info->options_len, 0, size - info->options_len); 3783 3784 return info->options_len; 3785 err_clear: 3786 memset(to, 0, size); 3787 return err; 3788 } 3789 3790 static const struct bpf_func_proto bpf_skb_get_tunnel_opt_proto = { 3791 .func = bpf_skb_get_tunnel_opt, 3792 .gpl_only = false, 3793 .ret_type = RET_INTEGER, 3794 .arg1_type = ARG_PTR_TO_CTX, 3795 .arg2_type = ARG_PTR_TO_UNINIT_MEM, 3796 .arg3_type = ARG_CONST_SIZE, 3797 }; 3798 3799 static struct metadata_dst __percpu *md_dst; 3800 3801 BPF_CALL_4(bpf_skb_set_tunnel_key, struct sk_buff *, skb, 3802 const struct bpf_tunnel_key *, from, u32, size, u64, flags) 3803 { 3804 struct metadata_dst *md = this_cpu_ptr(md_dst); 3805 u8 compat[sizeof(struct bpf_tunnel_key)]; 3806 struct ip_tunnel_info *info; 3807 3808 if (unlikely(flags & ~(BPF_F_TUNINFO_IPV6 | BPF_F_ZERO_CSUM_TX | 3809 BPF_F_DONT_FRAGMENT | BPF_F_SEQ_NUMBER))) 3810 return -EINVAL; 3811 if (unlikely(size != sizeof(struct bpf_tunnel_key))) { 3812 switch (size) { 3813 case offsetof(struct bpf_tunnel_key, tunnel_label): 3814 case offsetof(struct bpf_tunnel_key, tunnel_ext): 3815 case offsetof(struct bpf_tunnel_key, remote_ipv6[1]): 3816 /* Fixup deprecated structure layouts here, so we have 3817 * a common path later on. 3818 */ 3819 memcpy(compat, from, size); 3820 memset(compat + size, 0, sizeof(compat) - size); 3821 from = (const struct bpf_tunnel_key *) compat; 3822 break; 3823 default: 3824 return -EINVAL; 3825 } 3826 } 3827 if (unlikely((!(flags & BPF_F_TUNINFO_IPV6) && from->tunnel_label) || 3828 from->tunnel_ext)) 3829 return -EINVAL; 3830 3831 skb_dst_drop(skb); 3832 dst_hold((struct dst_entry *) md); 3833 skb_dst_set(skb, (struct dst_entry *) md); 3834 3835 info = &md->u.tun_info; 3836 memset(info, 0, sizeof(*info)); 3837 info->mode = IP_TUNNEL_INFO_TX; 3838 3839 info->key.tun_flags = TUNNEL_KEY | TUNNEL_CSUM | TUNNEL_NOCACHE; 3840 if (flags & BPF_F_DONT_FRAGMENT) 3841 info->key.tun_flags |= TUNNEL_DONT_FRAGMENT; 3842 if (flags & BPF_F_ZERO_CSUM_TX) 3843 info->key.tun_flags &= ~TUNNEL_CSUM; 3844 if (flags & BPF_F_SEQ_NUMBER) 3845 info->key.tun_flags |= TUNNEL_SEQ; 3846 3847 info->key.tun_id = cpu_to_be64(from->tunnel_id); 3848 info->key.tos = from->tunnel_tos; 3849 info->key.ttl = from->tunnel_ttl; 3850 3851 if (flags & BPF_F_TUNINFO_IPV6) { 3852 info->mode |= IP_TUNNEL_INFO_IPV6; 3853 memcpy(&info->key.u.ipv6.dst, from->remote_ipv6, 3854 sizeof(from->remote_ipv6)); 3855 info->key.label = cpu_to_be32(from->tunnel_label) & 3856 IPV6_FLOWLABEL_MASK; 3857 } else { 3858 info->key.u.ipv4.dst = cpu_to_be32(from->remote_ipv4); 3859 } 3860 3861 return 0; 3862 } 3863 3864 static const struct bpf_func_proto bpf_skb_set_tunnel_key_proto = { 3865 .func = bpf_skb_set_tunnel_key, 3866 .gpl_only = false, 3867 .ret_type = RET_INTEGER, 3868 .arg1_type = ARG_PTR_TO_CTX, 3869 .arg2_type = ARG_PTR_TO_MEM, 3870 .arg3_type = ARG_CONST_SIZE, 3871 .arg4_type = ARG_ANYTHING, 3872 }; 3873 3874 BPF_CALL_3(bpf_skb_set_tunnel_opt, struct sk_buff *, skb, 3875 const u8 *, from, u32, size) 3876 { 3877 struct ip_tunnel_info *info = skb_tunnel_info(skb); 3878 const struct metadata_dst *md = this_cpu_ptr(md_dst); 3879 3880 if (unlikely(info != &md->u.tun_info || (size & (sizeof(u32) - 1)))) 3881 return -EINVAL; 3882 if (unlikely(size > IP_TUNNEL_OPTS_MAX)) 3883 return -ENOMEM; 3884 3885 ip_tunnel_info_opts_set(info, from, size, TUNNEL_OPTIONS_PRESENT); 3886 3887 return 0; 3888 } 3889 3890 static const struct bpf_func_proto bpf_skb_set_tunnel_opt_proto = { 3891 .func = bpf_skb_set_tunnel_opt, 3892 .gpl_only = false, 3893 .ret_type = RET_INTEGER, 3894 .arg1_type = ARG_PTR_TO_CTX, 3895 .arg2_type = ARG_PTR_TO_MEM, 3896 .arg3_type = ARG_CONST_SIZE, 3897 }; 3898 3899 static const struct bpf_func_proto * 3900 bpf_get_skb_set_tunnel_proto(enum bpf_func_id which) 3901 { 3902 if (!md_dst) { 3903 struct metadata_dst __percpu *tmp; 3904 3905 tmp = metadata_dst_alloc_percpu(IP_TUNNEL_OPTS_MAX, 3906 METADATA_IP_TUNNEL, 3907 GFP_KERNEL); 3908 if (!tmp) 3909 return NULL; 3910 if (cmpxchg(&md_dst, NULL, tmp)) 3911 metadata_dst_free_percpu(tmp); 3912 } 3913 3914 switch (which) { 3915 case BPF_FUNC_skb_set_tunnel_key: 3916 return &bpf_skb_set_tunnel_key_proto; 3917 case BPF_FUNC_skb_set_tunnel_opt: 3918 return &bpf_skb_set_tunnel_opt_proto; 3919 default: 3920 return NULL; 3921 } 3922 } 3923 3924 BPF_CALL_3(bpf_skb_under_cgroup, struct sk_buff *, skb, struct bpf_map *, map, 3925 u32, idx) 3926 { 3927 struct bpf_array *array = container_of(map, struct bpf_array, map); 3928 struct cgroup *cgrp; 3929 struct sock *sk; 3930 3931 sk = skb_to_full_sk(skb); 3932 if (!sk || !sk_fullsock(sk)) 3933 return -ENOENT; 3934 if (unlikely(idx >= array->map.max_entries)) 3935 return -E2BIG; 3936 3937 cgrp = READ_ONCE(array->ptrs[idx]); 3938 if (unlikely(!cgrp)) 3939 return -EAGAIN; 3940 3941 return sk_under_cgroup_hierarchy(sk, cgrp); 3942 } 3943 3944 static const struct bpf_func_proto bpf_skb_under_cgroup_proto = { 3945 .func = bpf_skb_under_cgroup, 3946 .gpl_only = false, 3947 .ret_type = RET_INTEGER, 3948 .arg1_type = ARG_PTR_TO_CTX, 3949 .arg2_type = ARG_CONST_MAP_PTR, 3950 .arg3_type = ARG_ANYTHING, 3951 }; 3952 3953 #ifdef CONFIG_SOCK_CGROUP_DATA 3954 BPF_CALL_1(bpf_skb_cgroup_id, const struct sk_buff *, skb) 3955 { 3956 struct sock *sk = skb_to_full_sk(skb); 3957 struct cgroup *cgrp; 3958 3959 if (!sk || !sk_fullsock(sk)) 3960 return 0; 3961 3962 cgrp = sock_cgroup_ptr(&sk->sk_cgrp_data); 3963 return cgrp->kn->id.id; 3964 } 3965 3966 static const struct bpf_func_proto bpf_skb_cgroup_id_proto = { 3967 .func = bpf_skb_cgroup_id, 3968 .gpl_only = false, 3969 .ret_type = RET_INTEGER, 3970 .arg1_type = ARG_PTR_TO_CTX, 3971 }; 3972 3973 BPF_CALL_2(bpf_skb_ancestor_cgroup_id, const struct sk_buff *, skb, int, 3974 ancestor_level) 3975 { 3976 struct sock *sk = skb_to_full_sk(skb); 3977 struct cgroup *ancestor; 3978 struct cgroup *cgrp; 3979 3980 if (!sk || !sk_fullsock(sk)) 3981 return 0; 3982 3983 cgrp = sock_cgroup_ptr(&sk->sk_cgrp_data); 3984 ancestor = cgroup_ancestor(cgrp, ancestor_level); 3985 if (!ancestor) 3986 return 0; 3987 3988 return ancestor->kn->id.id; 3989 } 3990 3991 static const struct bpf_func_proto bpf_skb_ancestor_cgroup_id_proto = { 3992 .func = bpf_skb_ancestor_cgroup_id, 3993 .gpl_only = false, 3994 .ret_type = RET_INTEGER, 3995 .arg1_type = ARG_PTR_TO_CTX, 3996 .arg2_type = ARG_ANYTHING, 3997 }; 3998 #endif 3999 4000 static unsigned long bpf_xdp_copy(void *dst_buff, const void *src_buff, 4001 unsigned long off, unsigned long len) 4002 { 4003 memcpy(dst_buff, src_buff + off, len); 4004 return 0; 4005 } 4006 4007 BPF_CALL_5(bpf_xdp_event_output, struct xdp_buff *, xdp, struct bpf_map *, map, 4008 u64, flags, void *, meta, u64, meta_size) 4009 { 4010 u64 xdp_size = (flags & BPF_F_CTXLEN_MASK) >> 32; 4011 4012 if (unlikely(flags & ~(BPF_F_CTXLEN_MASK | BPF_F_INDEX_MASK))) 4013 return -EINVAL; 4014 if (unlikely(xdp_size > (unsigned long)(xdp->data_end - xdp->data))) 4015 return -EFAULT; 4016 4017 return bpf_event_output(map, flags, meta, meta_size, xdp->data, 4018 xdp_size, bpf_xdp_copy); 4019 } 4020 4021 static const struct bpf_func_proto bpf_xdp_event_output_proto = { 4022 .func = bpf_xdp_event_output, 4023 .gpl_only = true, 4024 .ret_type = RET_INTEGER, 4025 .arg1_type = ARG_PTR_TO_CTX, 4026 .arg2_type = ARG_CONST_MAP_PTR, 4027 .arg3_type = ARG_ANYTHING, 4028 .arg4_type = ARG_PTR_TO_MEM, 4029 .arg5_type = ARG_CONST_SIZE_OR_ZERO, 4030 }; 4031 4032 BPF_CALL_1(bpf_get_socket_cookie, struct sk_buff *, skb) 4033 { 4034 return skb->sk ? sock_gen_cookie(skb->sk) : 0; 4035 } 4036 4037 static const struct bpf_func_proto bpf_get_socket_cookie_proto = { 4038 .func = bpf_get_socket_cookie, 4039 .gpl_only = false, 4040 .ret_type = RET_INTEGER, 4041 .arg1_type = ARG_PTR_TO_CTX, 4042 }; 4043 4044 BPF_CALL_1(bpf_get_socket_cookie_sock_addr, struct bpf_sock_addr_kern *, ctx) 4045 { 4046 return sock_gen_cookie(ctx->sk); 4047 } 4048 4049 static const struct bpf_func_proto bpf_get_socket_cookie_sock_addr_proto = { 4050 .func = bpf_get_socket_cookie_sock_addr, 4051 .gpl_only = false, 4052 .ret_type = RET_INTEGER, 4053 .arg1_type = ARG_PTR_TO_CTX, 4054 }; 4055 4056 BPF_CALL_1(bpf_get_socket_cookie_sock_ops, struct bpf_sock_ops_kern *, ctx) 4057 { 4058 return sock_gen_cookie(ctx->sk); 4059 } 4060 4061 static const struct bpf_func_proto bpf_get_socket_cookie_sock_ops_proto = { 4062 .func = bpf_get_socket_cookie_sock_ops, 4063 .gpl_only = false, 4064 .ret_type = RET_INTEGER, 4065 .arg1_type = ARG_PTR_TO_CTX, 4066 }; 4067 4068 BPF_CALL_1(bpf_get_socket_uid, struct sk_buff *, skb) 4069 { 4070 struct sock *sk = sk_to_full_sk(skb->sk); 4071 kuid_t kuid; 4072 4073 if (!sk || !sk_fullsock(sk)) 4074 return overflowuid; 4075 kuid = sock_net_uid(sock_net(sk), sk); 4076 return from_kuid_munged(sock_net(sk)->user_ns, kuid); 4077 } 4078 4079 static const struct bpf_func_proto bpf_get_socket_uid_proto = { 4080 .func = bpf_get_socket_uid, 4081 .gpl_only = false, 4082 .ret_type = RET_INTEGER, 4083 .arg1_type = ARG_PTR_TO_CTX, 4084 }; 4085 4086 BPF_CALL_5(bpf_sockopt_event_output, struct bpf_sock_ops_kern *, bpf_sock, 4087 struct bpf_map *, map, u64, flags, void *, data, u64, size) 4088 { 4089 if (unlikely(flags & ~(BPF_F_INDEX_MASK))) 4090 return -EINVAL; 4091 4092 return bpf_event_output(map, flags, data, size, NULL, 0, NULL); 4093 } 4094 4095 static const struct bpf_func_proto bpf_sockopt_event_output_proto = { 4096 .func = bpf_sockopt_event_output, 4097 .gpl_only = true, 4098 .ret_type = RET_INTEGER, 4099 .arg1_type = ARG_PTR_TO_CTX, 4100 .arg2_type = ARG_CONST_MAP_PTR, 4101 .arg3_type = ARG_ANYTHING, 4102 .arg4_type = ARG_PTR_TO_MEM, 4103 .arg5_type = ARG_CONST_SIZE_OR_ZERO, 4104 }; 4105 4106 BPF_CALL_5(bpf_setsockopt, struct bpf_sock_ops_kern *, bpf_sock, 4107 int, level, int, optname, char *, optval, int, optlen) 4108 { 4109 struct sock *sk = bpf_sock->sk; 4110 int ret = 0; 4111 int val; 4112 4113 if (!sk_fullsock(sk)) 4114 return -EINVAL; 4115 4116 if (level == SOL_SOCKET) { 4117 if (optlen != sizeof(int)) 4118 return -EINVAL; 4119 val = *((int *)optval); 4120 4121 /* Only some socketops are supported */ 4122 switch (optname) { 4123 case SO_RCVBUF: 4124 val = min_t(u32, val, sysctl_rmem_max); 4125 sk->sk_userlocks |= SOCK_RCVBUF_LOCK; 4126 sk->sk_rcvbuf = max_t(int, val * 2, SOCK_MIN_RCVBUF); 4127 break; 4128 case SO_SNDBUF: 4129 val = min_t(u32, val, sysctl_wmem_max); 4130 sk->sk_userlocks |= SOCK_SNDBUF_LOCK; 4131 sk->sk_sndbuf = max_t(int, val * 2, SOCK_MIN_SNDBUF); 4132 break; 4133 case SO_MAX_PACING_RATE: /* 32bit version */ 4134 if (val != ~0U) 4135 cmpxchg(&sk->sk_pacing_status, 4136 SK_PACING_NONE, 4137 SK_PACING_NEEDED); 4138 sk->sk_max_pacing_rate = (val == ~0U) ? ~0UL : val; 4139 sk->sk_pacing_rate = min(sk->sk_pacing_rate, 4140 sk->sk_max_pacing_rate); 4141 break; 4142 case SO_PRIORITY: 4143 sk->sk_priority = val; 4144 break; 4145 case SO_RCVLOWAT: 4146 if (val < 0) 4147 val = INT_MAX; 4148 sk->sk_rcvlowat = val ? : 1; 4149 break; 4150 case SO_MARK: 4151 if (sk->sk_mark != val) { 4152 sk->sk_mark = val; 4153 sk_dst_reset(sk); 4154 } 4155 break; 4156 default: 4157 ret = -EINVAL; 4158 } 4159 #ifdef CONFIG_INET 4160 } else if (level == SOL_IP) { 4161 if (optlen != sizeof(int) || sk->sk_family != AF_INET) 4162 return -EINVAL; 4163 4164 val = *((int *)optval); 4165 /* Only some options are supported */ 4166 switch (optname) { 4167 case IP_TOS: 4168 if (val < -1 || val > 0xff) { 4169 ret = -EINVAL; 4170 } else { 4171 struct inet_sock *inet = inet_sk(sk); 4172 4173 if (val == -1) 4174 val = 0; 4175 inet->tos = val; 4176 } 4177 break; 4178 default: 4179 ret = -EINVAL; 4180 } 4181 #if IS_ENABLED(CONFIG_IPV6) 4182 } else if (level == SOL_IPV6) { 4183 if (optlen != sizeof(int) || sk->sk_family != AF_INET6) 4184 return -EINVAL; 4185 4186 val = *((int *)optval); 4187 /* Only some options are supported */ 4188 switch (optname) { 4189 case IPV6_TCLASS: 4190 if (val < -1 || val > 0xff) { 4191 ret = -EINVAL; 4192 } else { 4193 struct ipv6_pinfo *np = inet6_sk(sk); 4194 4195 if (val == -1) 4196 val = 0; 4197 np->tclass = val; 4198 } 4199 break; 4200 default: 4201 ret = -EINVAL; 4202 } 4203 #endif 4204 } else if (level == SOL_TCP && 4205 sk->sk_prot->setsockopt == tcp_setsockopt) { 4206 if (optname == TCP_CONGESTION) { 4207 char name[TCP_CA_NAME_MAX]; 4208 bool reinit = bpf_sock->op > BPF_SOCK_OPS_NEEDS_ECN; 4209 4210 strncpy(name, optval, min_t(long, optlen, 4211 TCP_CA_NAME_MAX-1)); 4212 name[TCP_CA_NAME_MAX-1] = 0; 4213 ret = tcp_set_congestion_control(sk, name, false, 4214 reinit); 4215 } else { 4216 struct tcp_sock *tp = tcp_sk(sk); 4217 4218 if (optlen != sizeof(int)) 4219 return -EINVAL; 4220 4221 val = *((int *)optval); 4222 /* Only some options are supported */ 4223 switch (optname) { 4224 case TCP_BPF_IW: 4225 if (val <= 0 || tp->data_segs_out > tp->syn_data) 4226 ret = -EINVAL; 4227 else 4228 tp->snd_cwnd = val; 4229 break; 4230 case TCP_BPF_SNDCWND_CLAMP: 4231 if (val <= 0) { 4232 ret = -EINVAL; 4233 } else { 4234 tp->snd_cwnd_clamp = val; 4235 tp->snd_ssthresh = val; 4236 } 4237 break; 4238 case TCP_SAVE_SYN: 4239 if (val < 0 || val > 1) 4240 ret = -EINVAL; 4241 else 4242 tp->save_syn = val; 4243 break; 4244 default: 4245 ret = -EINVAL; 4246 } 4247 } 4248 #endif 4249 } else { 4250 ret = -EINVAL; 4251 } 4252 return ret; 4253 } 4254 4255 static const struct bpf_func_proto bpf_setsockopt_proto = { 4256 .func = bpf_setsockopt, 4257 .gpl_only = false, 4258 .ret_type = RET_INTEGER, 4259 .arg1_type = ARG_PTR_TO_CTX, 4260 .arg2_type = ARG_ANYTHING, 4261 .arg3_type = ARG_ANYTHING, 4262 .arg4_type = ARG_PTR_TO_MEM, 4263 .arg5_type = ARG_CONST_SIZE, 4264 }; 4265 4266 BPF_CALL_5(bpf_getsockopt, struct bpf_sock_ops_kern *, bpf_sock, 4267 int, level, int, optname, char *, optval, int, optlen) 4268 { 4269 struct sock *sk = bpf_sock->sk; 4270 4271 if (!sk_fullsock(sk)) 4272 goto err_clear; 4273 #ifdef CONFIG_INET 4274 if (level == SOL_TCP && sk->sk_prot->getsockopt == tcp_getsockopt) { 4275 struct inet_connection_sock *icsk; 4276 struct tcp_sock *tp; 4277 4278 switch (optname) { 4279 case TCP_CONGESTION: 4280 icsk = inet_csk(sk); 4281 4282 if (!icsk->icsk_ca_ops || optlen <= 1) 4283 goto err_clear; 4284 strncpy(optval, icsk->icsk_ca_ops->name, optlen); 4285 optval[optlen - 1] = 0; 4286 break; 4287 case TCP_SAVED_SYN: 4288 tp = tcp_sk(sk); 4289 4290 if (optlen <= 0 || !tp->saved_syn || 4291 optlen > tp->saved_syn[0]) 4292 goto err_clear; 4293 memcpy(optval, tp->saved_syn + 1, optlen); 4294 break; 4295 default: 4296 goto err_clear; 4297 } 4298 } else if (level == SOL_IP) { 4299 struct inet_sock *inet = inet_sk(sk); 4300 4301 if (optlen != sizeof(int) || sk->sk_family != AF_INET) 4302 goto err_clear; 4303 4304 /* Only some options are supported */ 4305 switch (optname) { 4306 case IP_TOS: 4307 *((int *)optval) = (int)inet->tos; 4308 break; 4309 default: 4310 goto err_clear; 4311 } 4312 #if IS_ENABLED(CONFIG_IPV6) 4313 } else if (level == SOL_IPV6) { 4314 struct ipv6_pinfo *np = inet6_sk(sk); 4315 4316 if (optlen != sizeof(int) || sk->sk_family != AF_INET6) 4317 goto err_clear; 4318 4319 /* Only some options are supported */ 4320 switch (optname) { 4321 case IPV6_TCLASS: 4322 *((int *)optval) = (int)np->tclass; 4323 break; 4324 default: 4325 goto err_clear; 4326 } 4327 #endif 4328 } else { 4329 goto err_clear; 4330 } 4331 return 0; 4332 #endif 4333 err_clear: 4334 memset(optval, 0, optlen); 4335 return -EINVAL; 4336 } 4337 4338 static const struct bpf_func_proto bpf_getsockopt_proto = { 4339 .func = bpf_getsockopt, 4340 .gpl_only = false, 4341 .ret_type = RET_INTEGER, 4342 .arg1_type = ARG_PTR_TO_CTX, 4343 .arg2_type = ARG_ANYTHING, 4344 .arg3_type = ARG_ANYTHING, 4345 .arg4_type = ARG_PTR_TO_UNINIT_MEM, 4346 .arg5_type = ARG_CONST_SIZE, 4347 }; 4348 4349 BPF_CALL_2(bpf_sock_ops_cb_flags_set, struct bpf_sock_ops_kern *, bpf_sock, 4350 int, argval) 4351 { 4352 struct sock *sk = bpf_sock->sk; 4353 int val = argval & BPF_SOCK_OPS_ALL_CB_FLAGS; 4354 4355 if (!IS_ENABLED(CONFIG_INET) || !sk_fullsock(sk)) 4356 return -EINVAL; 4357 4358 if (val) 4359 tcp_sk(sk)->bpf_sock_ops_cb_flags = val; 4360 4361 return argval & (~BPF_SOCK_OPS_ALL_CB_FLAGS); 4362 } 4363 4364 static const struct bpf_func_proto bpf_sock_ops_cb_flags_set_proto = { 4365 .func = bpf_sock_ops_cb_flags_set, 4366 .gpl_only = false, 4367 .ret_type = RET_INTEGER, 4368 .arg1_type = ARG_PTR_TO_CTX, 4369 .arg2_type = ARG_ANYTHING, 4370 }; 4371 4372 const struct ipv6_bpf_stub *ipv6_bpf_stub __read_mostly; 4373 EXPORT_SYMBOL_GPL(ipv6_bpf_stub); 4374 4375 BPF_CALL_3(bpf_bind, struct bpf_sock_addr_kern *, ctx, struct sockaddr *, addr, 4376 int, addr_len) 4377 { 4378 #ifdef CONFIG_INET 4379 struct sock *sk = ctx->sk; 4380 int err; 4381 4382 /* Binding to port can be expensive so it's prohibited in the helper. 4383 * Only binding to IP is supported. 4384 */ 4385 err = -EINVAL; 4386 if (addr->sa_family == AF_INET) { 4387 if (addr_len < sizeof(struct sockaddr_in)) 4388 return err; 4389 if (((struct sockaddr_in *)addr)->sin_port != htons(0)) 4390 return err; 4391 return __inet_bind(sk, addr, addr_len, true, false); 4392 #if IS_ENABLED(CONFIG_IPV6) 4393 } else if (addr->sa_family == AF_INET6) { 4394 if (addr_len < SIN6_LEN_RFC2133) 4395 return err; 4396 if (((struct sockaddr_in6 *)addr)->sin6_port != htons(0)) 4397 return err; 4398 /* ipv6_bpf_stub cannot be NULL, since it's called from 4399 * bpf_cgroup_inet6_connect hook and ipv6 is already loaded 4400 */ 4401 return ipv6_bpf_stub->inet6_bind(sk, addr, addr_len, true, false); 4402 #endif /* CONFIG_IPV6 */ 4403 } 4404 #endif /* CONFIG_INET */ 4405 4406 return -EAFNOSUPPORT; 4407 } 4408 4409 static const struct bpf_func_proto bpf_bind_proto = { 4410 .func = bpf_bind, 4411 .gpl_only = false, 4412 .ret_type = RET_INTEGER, 4413 .arg1_type = ARG_PTR_TO_CTX, 4414 .arg2_type = ARG_PTR_TO_MEM, 4415 .arg3_type = ARG_CONST_SIZE, 4416 }; 4417 4418 #ifdef CONFIG_XFRM 4419 BPF_CALL_5(bpf_skb_get_xfrm_state, struct sk_buff *, skb, u32, index, 4420 struct bpf_xfrm_state *, to, u32, size, u64, flags) 4421 { 4422 const struct sec_path *sp = skb_sec_path(skb); 4423 const struct xfrm_state *x; 4424 4425 if (!sp || unlikely(index >= sp->len || flags)) 4426 goto err_clear; 4427 4428 x = sp->xvec[index]; 4429 4430 if (unlikely(size != sizeof(struct bpf_xfrm_state))) 4431 goto err_clear; 4432 4433 to->reqid = x->props.reqid; 4434 to->spi = x->id.spi; 4435 to->family = x->props.family; 4436 to->ext = 0; 4437 4438 if (to->family == AF_INET6) { 4439 memcpy(to->remote_ipv6, x->props.saddr.a6, 4440 sizeof(to->remote_ipv6)); 4441 } else { 4442 to->remote_ipv4 = x->props.saddr.a4; 4443 memset(&to->remote_ipv6[1], 0, sizeof(__u32) * 3); 4444 } 4445 4446 return 0; 4447 err_clear: 4448 memset(to, 0, size); 4449 return -EINVAL; 4450 } 4451 4452 static const struct bpf_func_proto bpf_skb_get_xfrm_state_proto = { 4453 .func = bpf_skb_get_xfrm_state, 4454 .gpl_only = false, 4455 .ret_type = RET_INTEGER, 4456 .arg1_type = ARG_PTR_TO_CTX, 4457 .arg2_type = ARG_ANYTHING, 4458 .arg3_type = ARG_PTR_TO_UNINIT_MEM, 4459 .arg4_type = ARG_CONST_SIZE, 4460 .arg5_type = ARG_ANYTHING, 4461 }; 4462 #endif 4463 4464 #if IS_ENABLED(CONFIG_INET) || IS_ENABLED(CONFIG_IPV6) 4465 static int bpf_fib_set_fwd_params(struct bpf_fib_lookup *params, 4466 const struct neighbour *neigh, 4467 const struct net_device *dev) 4468 { 4469 memcpy(params->dmac, neigh->ha, ETH_ALEN); 4470 memcpy(params->smac, dev->dev_addr, ETH_ALEN); 4471 params->h_vlan_TCI = 0; 4472 params->h_vlan_proto = 0; 4473 params->ifindex = dev->ifindex; 4474 4475 return 0; 4476 } 4477 #endif 4478 4479 #if IS_ENABLED(CONFIG_INET) 4480 static int bpf_ipv4_fib_lookup(struct net *net, struct bpf_fib_lookup *params, 4481 u32 flags, bool check_mtu) 4482 { 4483 struct in_device *in_dev; 4484 struct neighbour *neigh; 4485 struct net_device *dev; 4486 struct fib_result res; 4487 struct fib_nh *nh; 4488 struct flowi4 fl4; 4489 int err; 4490 u32 mtu; 4491 4492 dev = dev_get_by_index_rcu(net, params->ifindex); 4493 if (unlikely(!dev)) 4494 return -ENODEV; 4495 4496 /* verify forwarding is enabled on this interface */ 4497 in_dev = __in_dev_get_rcu(dev); 4498 if (unlikely(!in_dev || !IN_DEV_FORWARD(in_dev))) 4499 return BPF_FIB_LKUP_RET_FWD_DISABLED; 4500 4501 if (flags & BPF_FIB_LOOKUP_OUTPUT) { 4502 fl4.flowi4_iif = 1; 4503 fl4.flowi4_oif = params->ifindex; 4504 } else { 4505 fl4.flowi4_iif = params->ifindex; 4506 fl4.flowi4_oif = 0; 4507 } 4508 fl4.flowi4_tos = params->tos & IPTOS_RT_MASK; 4509 fl4.flowi4_scope = RT_SCOPE_UNIVERSE; 4510 fl4.flowi4_flags = 0; 4511 4512 fl4.flowi4_proto = params->l4_protocol; 4513 fl4.daddr = params->ipv4_dst; 4514 fl4.saddr = params->ipv4_src; 4515 fl4.fl4_sport = params->sport; 4516 fl4.fl4_dport = params->dport; 4517 4518 if (flags & BPF_FIB_LOOKUP_DIRECT) { 4519 u32 tbid = l3mdev_fib_table_rcu(dev) ? : RT_TABLE_MAIN; 4520 struct fib_table *tb; 4521 4522 tb = fib_get_table(net, tbid); 4523 if (unlikely(!tb)) 4524 return BPF_FIB_LKUP_RET_NOT_FWDED; 4525 4526 err = fib_table_lookup(tb, &fl4, &res, FIB_LOOKUP_NOREF); 4527 } else { 4528 fl4.flowi4_mark = 0; 4529 fl4.flowi4_secid = 0; 4530 fl4.flowi4_tun_key.tun_id = 0; 4531 fl4.flowi4_uid = sock_net_uid(net, NULL); 4532 4533 err = fib_lookup(net, &fl4, &res, FIB_LOOKUP_NOREF); 4534 } 4535 4536 if (err) { 4537 /* map fib lookup errors to RTN_ type */ 4538 if (err == -EINVAL) 4539 return BPF_FIB_LKUP_RET_BLACKHOLE; 4540 if (err == -EHOSTUNREACH) 4541 return BPF_FIB_LKUP_RET_UNREACHABLE; 4542 if (err == -EACCES) 4543 return BPF_FIB_LKUP_RET_PROHIBIT; 4544 4545 return BPF_FIB_LKUP_RET_NOT_FWDED; 4546 } 4547 4548 if (res.type != RTN_UNICAST) 4549 return BPF_FIB_LKUP_RET_NOT_FWDED; 4550 4551 if (res.fi->fib_nhs > 1) 4552 fib_select_path(net, &res, &fl4, NULL); 4553 4554 if (check_mtu) { 4555 mtu = ip_mtu_from_fib_result(&res, params->ipv4_dst); 4556 if (params->tot_len > mtu) 4557 return BPF_FIB_LKUP_RET_FRAG_NEEDED; 4558 } 4559 4560 nh = &res.fi->fib_nh[res.nh_sel]; 4561 4562 /* do not handle lwt encaps right now */ 4563 if (nh->nh_lwtstate) 4564 return BPF_FIB_LKUP_RET_UNSUPP_LWT; 4565 4566 dev = nh->nh_dev; 4567 if (nh->nh_gw) 4568 params->ipv4_dst = nh->nh_gw; 4569 4570 params->rt_metric = res.fi->fib_priority; 4571 4572 /* xdp and cls_bpf programs are run in RCU-bh so 4573 * rcu_read_lock_bh is not needed here 4574 */ 4575 neigh = __ipv4_neigh_lookup_noref(dev, (__force u32)params->ipv4_dst); 4576 if (!neigh) 4577 return BPF_FIB_LKUP_RET_NO_NEIGH; 4578 4579 return bpf_fib_set_fwd_params(params, neigh, dev); 4580 } 4581 #endif 4582 4583 #if IS_ENABLED(CONFIG_IPV6) 4584 static int bpf_ipv6_fib_lookup(struct net *net, struct bpf_fib_lookup *params, 4585 u32 flags, bool check_mtu) 4586 { 4587 struct in6_addr *src = (struct in6_addr *) params->ipv6_src; 4588 struct in6_addr *dst = (struct in6_addr *) params->ipv6_dst; 4589 struct neighbour *neigh; 4590 struct net_device *dev; 4591 struct inet6_dev *idev; 4592 struct fib6_info *f6i; 4593 struct flowi6 fl6; 4594 int strict = 0; 4595 int oif; 4596 u32 mtu; 4597 4598 /* link local addresses are never forwarded */ 4599 if (rt6_need_strict(dst) || rt6_need_strict(src)) 4600 return BPF_FIB_LKUP_RET_NOT_FWDED; 4601 4602 dev = dev_get_by_index_rcu(net, params->ifindex); 4603 if (unlikely(!dev)) 4604 return -ENODEV; 4605 4606 idev = __in6_dev_get_safely(dev); 4607 if (unlikely(!idev || !net->ipv6.devconf_all->forwarding)) 4608 return BPF_FIB_LKUP_RET_FWD_DISABLED; 4609 4610 if (flags & BPF_FIB_LOOKUP_OUTPUT) { 4611 fl6.flowi6_iif = 1; 4612 oif = fl6.flowi6_oif = params->ifindex; 4613 } else { 4614 oif = fl6.flowi6_iif = params->ifindex; 4615 fl6.flowi6_oif = 0; 4616 strict = RT6_LOOKUP_F_HAS_SADDR; 4617 } 4618 fl6.flowlabel = params->flowinfo; 4619 fl6.flowi6_scope = 0; 4620 fl6.flowi6_flags = 0; 4621 fl6.mp_hash = 0; 4622 4623 fl6.flowi6_proto = params->l4_protocol; 4624 fl6.daddr = *dst; 4625 fl6.saddr = *src; 4626 fl6.fl6_sport = params->sport; 4627 fl6.fl6_dport = params->dport; 4628 4629 if (flags & BPF_FIB_LOOKUP_DIRECT) { 4630 u32 tbid = l3mdev_fib_table_rcu(dev) ? : RT_TABLE_MAIN; 4631 struct fib6_table *tb; 4632 4633 tb = ipv6_stub->fib6_get_table(net, tbid); 4634 if (unlikely(!tb)) 4635 return BPF_FIB_LKUP_RET_NOT_FWDED; 4636 4637 f6i = ipv6_stub->fib6_table_lookup(net, tb, oif, &fl6, strict); 4638 } else { 4639 fl6.flowi6_mark = 0; 4640 fl6.flowi6_secid = 0; 4641 fl6.flowi6_tun_key.tun_id = 0; 4642 fl6.flowi6_uid = sock_net_uid(net, NULL); 4643 4644 f6i = ipv6_stub->fib6_lookup(net, oif, &fl6, strict); 4645 } 4646 4647 if (unlikely(IS_ERR_OR_NULL(f6i) || f6i == net->ipv6.fib6_null_entry)) 4648 return BPF_FIB_LKUP_RET_NOT_FWDED; 4649 4650 if (unlikely(f6i->fib6_flags & RTF_REJECT)) { 4651 switch (f6i->fib6_type) { 4652 case RTN_BLACKHOLE: 4653 return BPF_FIB_LKUP_RET_BLACKHOLE; 4654 case RTN_UNREACHABLE: 4655 return BPF_FIB_LKUP_RET_UNREACHABLE; 4656 case RTN_PROHIBIT: 4657 return BPF_FIB_LKUP_RET_PROHIBIT; 4658 default: 4659 return BPF_FIB_LKUP_RET_NOT_FWDED; 4660 } 4661 } 4662 4663 if (f6i->fib6_type != RTN_UNICAST) 4664 return BPF_FIB_LKUP_RET_NOT_FWDED; 4665 4666 if (f6i->fib6_nsiblings && fl6.flowi6_oif == 0) 4667 f6i = ipv6_stub->fib6_multipath_select(net, f6i, &fl6, 4668 fl6.flowi6_oif, NULL, 4669 strict); 4670 4671 if (check_mtu) { 4672 mtu = ipv6_stub->ip6_mtu_from_fib6(f6i, dst, src); 4673 if (params->tot_len > mtu) 4674 return BPF_FIB_LKUP_RET_FRAG_NEEDED; 4675 } 4676 4677 if (f6i->fib6_nh.nh_lwtstate) 4678 return BPF_FIB_LKUP_RET_UNSUPP_LWT; 4679 4680 if (f6i->fib6_flags & RTF_GATEWAY) 4681 *dst = f6i->fib6_nh.nh_gw; 4682 4683 dev = f6i->fib6_nh.nh_dev; 4684 params->rt_metric = f6i->fib6_metric; 4685 4686 /* xdp and cls_bpf programs are run in RCU-bh so rcu_read_lock_bh is 4687 * not needed here. Can not use __ipv6_neigh_lookup_noref here 4688 * because we need to get nd_tbl via the stub 4689 */ 4690 neigh = ___neigh_lookup_noref(ipv6_stub->nd_tbl, neigh_key_eq128, 4691 ndisc_hashfn, dst, dev); 4692 if (!neigh) 4693 return BPF_FIB_LKUP_RET_NO_NEIGH; 4694 4695 return bpf_fib_set_fwd_params(params, neigh, dev); 4696 } 4697 #endif 4698 4699 BPF_CALL_4(bpf_xdp_fib_lookup, struct xdp_buff *, ctx, 4700 struct bpf_fib_lookup *, params, int, plen, u32, flags) 4701 { 4702 if (plen < sizeof(*params)) 4703 return -EINVAL; 4704 4705 if (flags & ~(BPF_FIB_LOOKUP_DIRECT | BPF_FIB_LOOKUP_OUTPUT)) 4706 return -EINVAL; 4707 4708 switch (params->family) { 4709 #if IS_ENABLED(CONFIG_INET) 4710 case AF_INET: 4711 return bpf_ipv4_fib_lookup(dev_net(ctx->rxq->dev), params, 4712 flags, true); 4713 #endif 4714 #if IS_ENABLED(CONFIG_IPV6) 4715 case AF_INET6: 4716 return bpf_ipv6_fib_lookup(dev_net(ctx->rxq->dev), params, 4717 flags, true); 4718 #endif 4719 } 4720 return -EAFNOSUPPORT; 4721 } 4722 4723 static const struct bpf_func_proto bpf_xdp_fib_lookup_proto = { 4724 .func = bpf_xdp_fib_lookup, 4725 .gpl_only = true, 4726 .ret_type = RET_INTEGER, 4727 .arg1_type = ARG_PTR_TO_CTX, 4728 .arg2_type = ARG_PTR_TO_MEM, 4729 .arg3_type = ARG_CONST_SIZE, 4730 .arg4_type = ARG_ANYTHING, 4731 }; 4732 4733 BPF_CALL_4(bpf_skb_fib_lookup, struct sk_buff *, skb, 4734 struct bpf_fib_lookup *, params, int, plen, u32, flags) 4735 { 4736 struct net *net = dev_net(skb->dev); 4737 int rc = -EAFNOSUPPORT; 4738 4739 if (plen < sizeof(*params)) 4740 return -EINVAL; 4741 4742 if (flags & ~(BPF_FIB_LOOKUP_DIRECT | BPF_FIB_LOOKUP_OUTPUT)) 4743 return -EINVAL; 4744 4745 switch (params->family) { 4746 #if IS_ENABLED(CONFIG_INET) 4747 case AF_INET: 4748 rc = bpf_ipv4_fib_lookup(net, params, flags, false); 4749 break; 4750 #endif 4751 #if IS_ENABLED(CONFIG_IPV6) 4752 case AF_INET6: 4753 rc = bpf_ipv6_fib_lookup(net, params, flags, false); 4754 break; 4755 #endif 4756 } 4757 4758 if (!rc) { 4759 struct net_device *dev; 4760 4761 dev = dev_get_by_index_rcu(net, params->ifindex); 4762 if (!is_skb_forwardable(dev, skb)) 4763 rc = BPF_FIB_LKUP_RET_FRAG_NEEDED; 4764 } 4765 4766 return rc; 4767 } 4768 4769 static const struct bpf_func_proto bpf_skb_fib_lookup_proto = { 4770 .func = bpf_skb_fib_lookup, 4771 .gpl_only = true, 4772 .ret_type = RET_INTEGER, 4773 .arg1_type = ARG_PTR_TO_CTX, 4774 .arg2_type = ARG_PTR_TO_MEM, 4775 .arg3_type = ARG_CONST_SIZE, 4776 .arg4_type = ARG_ANYTHING, 4777 }; 4778 4779 #if IS_ENABLED(CONFIG_IPV6_SEG6_BPF) 4780 static int bpf_push_seg6_encap(struct sk_buff *skb, u32 type, void *hdr, u32 len) 4781 { 4782 int err; 4783 struct ipv6_sr_hdr *srh = (struct ipv6_sr_hdr *)hdr; 4784 4785 if (!seg6_validate_srh(srh, len)) 4786 return -EINVAL; 4787 4788 switch (type) { 4789 case BPF_LWT_ENCAP_SEG6_INLINE: 4790 if (skb->protocol != htons(ETH_P_IPV6)) 4791 return -EBADMSG; 4792 4793 err = seg6_do_srh_inline(skb, srh); 4794 break; 4795 case BPF_LWT_ENCAP_SEG6: 4796 skb_reset_inner_headers(skb); 4797 skb->encapsulation = 1; 4798 err = seg6_do_srh_encap(skb, srh, IPPROTO_IPV6); 4799 break; 4800 default: 4801 return -EINVAL; 4802 } 4803 4804 bpf_compute_data_pointers(skb); 4805 if (err) 4806 return err; 4807 4808 ipv6_hdr(skb)->payload_len = htons(skb->len - sizeof(struct ipv6hdr)); 4809 skb_set_transport_header(skb, sizeof(struct ipv6hdr)); 4810 4811 return seg6_lookup_nexthop(skb, NULL, 0); 4812 } 4813 #endif /* CONFIG_IPV6_SEG6_BPF */ 4814 4815 #if IS_ENABLED(CONFIG_LWTUNNEL_BPF) 4816 static int bpf_push_ip_encap(struct sk_buff *skb, void *hdr, u32 len, 4817 bool ingress) 4818 { 4819 return bpf_lwt_push_ip_encap(skb, hdr, len, ingress); 4820 } 4821 #endif 4822 4823 BPF_CALL_4(bpf_lwt_in_push_encap, struct sk_buff *, skb, u32, type, void *, hdr, 4824 u32, len) 4825 { 4826 switch (type) { 4827 #if IS_ENABLED(CONFIG_IPV6_SEG6_BPF) 4828 case BPF_LWT_ENCAP_SEG6: 4829 case BPF_LWT_ENCAP_SEG6_INLINE: 4830 return bpf_push_seg6_encap(skb, type, hdr, len); 4831 #endif 4832 #if IS_ENABLED(CONFIG_LWTUNNEL_BPF) 4833 case BPF_LWT_ENCAP_IP: 4834 return bpf_push_ip_encap(skb, hdr, len, true /* ingress */); 4835 #endif 4836 default: 4837 return -EINVAL; 4838 } 4839 } 4840 4841 BPF_CALL_4(bpf_lwt_xmit_push_encap, struct sk_buff *, skb, u32, type, 4842 void *, hdr, u32, len) 4843 { 4844 switch (type) { 4845 #if IS_ENABLED(CONFIG_LWTUNNEL_BPF) 4846 case BPF_LWT_ENCAP_IP: 4847 return bpf_push_ip_encap(skb, hdr, len, false /* egress */); 4848 #endif 4849 default: 4850 return -EINVAL; 4851 } 4852 } 4853 4854 static const struct bpf_func_proto bpf_lwt_in_push_encap_proto = { 4855 .func = bpf_lwt_in_push_encap, 4856 .gpl_only = false, 4857 .ret_type = RET_INTEGER, 4858 .arg1_type = ARG_PTR_TO_CTX, 4859 .arg2_type = ARG_ANYTHING, 4860 .arg3_type = ARG_PTR_TO_MEM, 4861 .arg4_type = ARG_CONST_SIZE 4862 }; 4863 4864 static const struct bpf_func_proto bpf_lwt_xmit_push_encap_proto = { 4865 .func = bpf_lwt_xmit_push_encap, 4866 .gpl_only = false, 4867 .ret_type = RET_INTEGER, 4868 .arg1_type = ARG_PTR_TO_CTX, 4869 .arg2_type = ARG_ANYTHING, 4870 .arg3_type = ARG_PTR_TO_MEM, 4871 .arg4_type = ARG_CONST_SIZE 4872 }; 4873 4874 #if IS_ENABLED(CONFIG_IPV6_SEG6_BPF) 4875 BPF_CALL_4(bpf_lwt_seg6_store_bytes, struct sk_buff *, skb, u32, offset, 4876 const void *, from, u32, len) 4877 { 4878 struct seg6_bpf_srh_state *srh_state = 4879 this_cpu_ptr(&seg6_bpf_srh_states); 4880 struct ipv6_sr_hdr *srh = srh_state->srh; 4881 void *srh_tlvs, *srh_end, *ptr; 4882 int srhoff = 0; 4883 4884 if (srh == NULL) 4885 return -EINVAL; 4886 4887 srh_tlvs = (void *)((char *)srh + ((srh->first_segment + 1) << 4)); 4888 srh_end = (void *)((char *)srh + sizeof(*srh) + srh_state->hdrlen); 4889 4890 ptr = skb->data + offset; 4891 if (ptr >= srh_tlvs && ptr + len <= srh_end) 4892 srh_state->valid = false; 4893 else if (ptr < (void *)&srh->flags || 4894 ptr + len > (void *)&srh->segments) 4895 return -EFAULT; 4896 4897 if (unlikely(bpf_try_make_writable(skb, offset + len))) 4898 return -EFAULT; 4899 if (ipv6_find_hdr(skb, &srhoff, IPPROTO_ROUTING, NULL, NULL) < 0) 4900 return -EINVAL; 4901 srh_state->srh = (struct ipv6_sr_hdr *)(skb->data + srhoff); 4902 4903 memcpy(skb->data + offset, from, len); 4904 return 0; 4905 } 4906 4907 static const struct bpf_func_proto bpf_lwt_seg6_store_bytes_proto = { 4908 .func = bpf_lwt_seg6_store_bytes, 4909 .gpl_only = false, 4910 .ret_type = RET_INTEGER, 4911 .arg1_type = ARG_PTR_TO_CTX, 4912 .arg2_type = ARG_ANYTHING, 4913 .arg3_type = ARG_PTR_TO_MEM, 4914 .arg4_type = ARG_CONST_SIZE 4915 }; 4916 4917 static void bpf_update_srh_state(struct sk_buff *skb) 4918 { 4919 struct seg6_bpf_srh_state *srh_state = 4920 this_cpu_ptr(&seg6_bpf_srh_states); 4921 int srhoff = 0; 4922 4923 if (ipv6_find_hdr(skb, &srhoff, IPPROTO_ROUTING, NULL, NULL) < 0) { 4924 srh_state->srh = NULL; 4925 } else { 4926 srh_state->srh = (struct ipv6_sr_hdr *)(skb->data + srhoff); 4927 srh_state->hdrlen = srh_state->srh->hdrlen << 3; 4928 srh_state->valid = true; 4929 } 4930 } 4931 4932 BPF_CALL_4(bpf_lwt_seg6_action, struct sk_buff *, skb, 4933 u32, action, void *, param, u32, param_len) 4934 { 4935 struct seg6_bpf_srh_state *srh_state = 4936 this_cpu_ptr(&seg6_bpf_srh_states); 4937 int hdroff = 0; 4938 int err; 4939 4940 switch (action) { 4941 case SEG6_LOCAL_ACTION_END_X: 4942 if (!seg6_bpf_has_valid_srh(skb)) 4943 return -EBADMSG; 4944 if (param_len != sizeof(struct in6_addr)) 4945 return -EINVAL; 4946 return seg6_lookup_nexthop(skb, (struct in6_addr *)param, 0); 4947 case SEG6_LOCAL_ACTION_END_T: 4948 if (!seg6_bpf_has_valid_srh(skb)) 4949 return -EBADMSG; 4950 if (param_len != sizeof(int)) 4951 return -EINVAL; 4952 return seg6_lookup_nexthop(skb, NULL, *(int *)param); 4953 case SEG6_LOCAL_ACTION_END_DT6: 4954 if (!seg6_bpf_has_valid_srh(skb)) 4955 return -EBADMSG; 4956 if (param_len != sizeof(int)) 4957 return -EINVAL; 4958 4959 if (ipv6_find_hdr(skb, &hdroff, IPPROTO_IPV6, NULL, NULL) < 0) 4960 return -EBADMSG; 4961 if (!pskb_pull(skb, hdroff)) 4962 return -EBADMSG; 4963 4964 skb_postpull_rcsum(skb, skb_network_header(skb), hdroff); 4965 skb_reset_network_header(skb); 4966 skb_reset_transport_header(skb); 4967 skb->encapsulation = 0; 4968 4969 bpf_compute_data_pointers(skb); 4970 bpf_update_srh_state(skb); 4971 return seg6_lookup_nexthop(skb, NULL, *(int *)param); 4972 case SEG6_LOCAL_ACTION_END_B6: 4973 if (srh_state->srh && !seg6_bpf_has_valid_srh(skb)) 4974 return -EBADMSG; 4975 err = bpf_push_seg6_encap(skb, BPF_LWT_ENCAP_SEG6_INLINE, 4976 param, param_len); 4977 if (!err) 4978 bpf_update_srh_state(skb); 4979 4980 return err; 4981 case SEG6_LOCAL_ACTION_END_B6_ENCAP: 4982 if (srh_state->srh && !seg6_bpf_has_valid_srh(skb)) 4983 return -EBADMSG; 4984 err = bpf_push_seg6_encap(skb, BPF_LWT_ENCAP_SEG6, 4985 param, param_len); 4986 if (!err) 4987 bpf_update_srh_state(skb); 4988 4989 return err; 4990 default: 4991 return -EINVAL; 4992 } 4993 } 4994 4995 static const struct bpf_func_proto bpf_lwt_seg6_action_proto = { 4996 .func = bpf_lwt_seg6_action, 4997 .gpl_only = false, 4998 .ret_type = RET_INTEGER, 4999 .arg1_type = ARG_PTR_TO_CTX, 5000 .arg2_type = ARG_ANYTHING, 5001 .arg3_type = ARG_PTR_TO_MEM, 5002 .arg4_type = ARG_CONST_SIZE 5003 }; 5004 5005 BPF_CALL_3(bpf_lwt_seg6_adjust_srh, struct sk_buff *, skb, u32, offset, 5006 s32, len) 5007 { 5008 struct seg6_bpf_srh_state *srh_state = 5009 this_cpu_ptr(&seg6_bpf_srh_states); 5010 struct ipv6_sr_hdr *srh = srh_state->srh; 5011 void *srh_end, *srh_tlvs, *ptr; 5012 struct ipv6hdr *hdr; 5013 int srhoff = 0; 5014 int ret; 5015 5016 if (unlikely(srh == NULL)) 5017 return -EINVAL; 5018 5019 srh_tlvs = (void *)((unsigned char *)srh + sizeof(*srh) + 5020 ((srh->first_segment + 1) << 4)); 5021 srh_end = (void *)((unsigned char *)srh + sizeof(*srh) + 5022 srh_state->hdrlen); 5023 ptr = skb->data + offset; 5024 5025 if (unlikely(ptr < srh_tlvs || ptr > srh_end)) 5026 return -EFAULT; 5027 if (unlikely(len < 0 && (void *)((char *)ptr - len) > srh_end)) 5028 return -EFAULT; 5029 5030 if (len > 0) { 5031 ret = skb_cow_head(skb, len); 5032 if (unlikely(ret < 0)) 5033 return ret; 5034 5035 ret = bpf_skb_net_hdr_push(skb, offset, len); 5036 } else { 5037 ret = bpf_skb_net_hdr_pop(skb, offset, -1 * len); 5038 } 5039 5040 bpf_compute_data_pointers(skb); 5041 if (unlikely(ret < 0)) 5042 return ret; 5043 5044 hdr = (struct ipv6hdr *)skb->data; 5045 hdr->payload_len = htons(skb->len - sizeof(struct ipv6hdr)); 5046 5047 if (ipv6_find_hdr(skb, &srhoff, IPPROTO_ROUTING, NULL, NULL) < 0) 5048 return -EINVAL; 5049 srh_state->srh = (struct ipv6_sr_hdr *)(skb->data + srhoff); 5050 srh_state->hdrlen += len; 5051 srh_state->valid = false; 5052 return 0; 5053 } 5054 5055 static const struct bpf_func_proto bpf_lwt_seg6_adjust_srh_proto = { 5056 .func = bpf_lwt_seg6_adjust_srh, 5057 .gpl_only = false, 5058 .ret_type = RET_INTEGER, 5059 .arg1_type = ARG_PTR_TO_CTX, 5060 .arg2_type = ARG_ANYTHING, 5061 .arg3_type = ARG_ANYTHING, 5062 }; 5063 #endif /* CONFIG_IPV6_SEG6_BPF */ 5064 5065 #define CONVERT_COMMON_TCP_SOCK_FIELDS(md_type, CONVERT) \ 5066 do { \ 5067 switch (si->off) { \ 5068 case offsetof(md_type, snd_cwnd): \ 5069 CONVERT(snd_cwnd); break; \ 5070 case offsetof(md_type, srtt_us): \ 5071 CONVERT(srtt_us); break; \ 5072 case offsetof(md_type, snd_ssthresh): \ 5073 CONVERT(snd_ssthresh); break; \ 5074 case offsetof(md_type, rcv_nxt): \ 5075 CONVERT(rcv_nxt); break; \ 5076 case offsetof(md_type, snd_nxt): \ 5077 CONVERT(snd_nxt); break; \ 5078 case offsetof(md_type, snd_una): \ 5079 CONVERT(snd_una); break; \ 5080 case offsetof(md_type, mss_cache): \ 5081 CONVERT(mss_cache); break; \ 5082 case offsetof(md_type, ecn_flags): \ 5083 CONVERT(ecn_flags); break; \ 5084 case offsetof(md_type, rate_delivered): \ 5085 CONVERT(rate_delivered); break; \ 5086 case offsetof(md_type, rate_interval_us): \ 5087 CONVERT(rate_interval_us); break; \ 5088 case offsetof(md_type, packets_out): \ 5089 CONVERT(packets_out); break; \ 5090 case offsetof(md_type, retrans_out): \ 5091 CONVERT(retrans_out); break; \ 5092 case offsetof(md_type, total_retrans): \ 5093 CONVERT(total_retrans); break; \ 5094 case offsetof(md_type, segs_in): \ 5095 CONVERT(segs_in); break; \ 5096 case offsetof(md_type, data_segs_in): \ 5097 CONVERT(data_segs_in); break; \ 5098 case offsetof(md_type, segs_out): \ 5099 CONVERT(segs_out); break; \ 5100 case offsetof(md_type, data_segs_out): \ 5101 CONVERT(data_segs_out); break; \ 5102 case offsetof(md_type, lost_out): \ 5103 CONVERT(lost_out); break; \ 5104 case offsetof(md_type, sacked_out): \ 5105 CONVERT(sacked_out); break; \ 5106 case offsetof(md_type, bytes_received): \ 5107 CONVERT(bytes_received); break; \ 5108 case offsetof(md_type, bytes_acked): \ 5109 CONVERT(bytes_acked); break; \ 5110 } \ 5111 } while (0) 5112 5113 #ifdef CONFIG_INET 5114 static struct sock *sk_lookup(struct net *net, struct bpf_sock_tuple *tuple, 5115 int dif, int sdif, u8 family, u8 proto) 5116 { 5117 bool refcounted = false; 5118 struct sock *sk = NULL; 5119 5120 if (family == AF_INET) { 5121 __be32 src4 = tuple->ipv4.saddr; 5122 __be32 dst4 = tuple->ipv4.daddr; 5123 5124 if (proto == IPPROTO_TCP) 5125 sk = __inet_lookup(net, &tcp_hashinfo, NULL, 0, 5126 src4, tuple->ipv4.sport, 5127 dst4, tuple->ipv4.dport, 5128 dif, sdif, &refcounted); 5129 else 5130 sk = __udp4_lib_lookup(net, src4, tuple->ipv4.sport, 5131 dst4, tuple->ipv4.dport, 5132 dif, sdif, &udp_table, NULL); 5133 #if IS_ENABLED(CONFIG_IPV6) 5134 } else { 5135 struct in6_addr *src6 = (struct in6_addr *)&tuple->ipv6.saddr; 5136 struct in6_addr *dst6 = (struct in6_addr *)&tuple->ipv6.daddr; 5137 5138 if (proto == IPPROTO_TCP) 5139 sk = __inet6_lookup(net, &tcp_hashinfo, NULL, 0, 5140 src6, tuple->ipv6.sport, 5141 dst6, ntohs(tuple->ipv6.dport), 5142 dif, sdif, &refcounted); 5143 else if (likely(ipv6_bpf_stub)) 5144 sk = ipv6_bpf_stub->udp6_lib_lookup(net, 5145 src6, tuple->ipv6.sport, 5146 dst6, tuple->ipv6.dport, 5147 dif, sdif, 5148 &udp_table, NULL); 5149 #endif 5150 } 5151 5152 if (unlikely(sk && !refcounted && !sock_flag(sk, SOCK_RCU_FREE))) { 5153 WARN_ONCE(1, "Found non-RCU, unreferenced socket!"); 5154 sk = NULL; 5155 } 5156 return sk; 5157 } 5158 5159 /* bpf_sk_lookup performs the core lookup for different types of sockets, 5160 * taking a reference on the socket if it doesn't have the flag SOCK_RCU_FREE. 5161 * Returns the socket as an 'unsigned long' to simplify the casting in the 5162 * callers to satisfy BPF_CALL declarations. 5163 */ 5164 static unsigned long 5165 __bpf_sk_lookup(struct sk_buff *skb, struct bpf_sock_tuple *tuple, u32 len, 5166 struct net *caller_net, u32 ifindex, u8 proto, u64 netns_id, 5167 u64 flags) 5168 { 5169 struct sock *sk = NULL; 5170 u8 family = AF_UNSPEC; 5171 struct net *net; 5172 int sdif; 5173 5174 family = len == sizeof(tuple->ipv4) ? AF_INET : AF_INET6; 5175 if (unlikely(family == AF_UNSPEC || flags || 5176 !((s32)netns_id < 0 || netns_id <= S32_MAX))) 5177 goto out; 5178 5179 if (family == AF_INET) 5180 sdif = inet_sdif(skb); 5181 else 5182 sdif = inet6_sdif(skb); 5183 5184 if ((s32)netns_id < 0) { 5185 net = caller_net; 5186 sk = sk_lookup(net, tuple, ifindex, sdif, family, proto); 5187 } else { 5188 net = get_net_ns_by_id(caller_net, netns_id); 5189 if (unlikely(!net)) 5190 goto out; 5191 sk = sk_lookup(net, tuple, ifindex, sdif, family, proto); 5192 put_net(net); 5193 } 5194 5195 if (sk) 5196 sk = sk_to_full_sk(sk); 5197 out: 5198 return (unsigned long) sk; 5199 } 5200 5201 static unsigned long 5202 bpf_sk_lookup(struct sk_buff *skb, struct bpf_sock_tuple *tuple, u32 len, 5203 u8 proto, u64 netns_id, u64 flags) 5204 { 5205 struct net *caller_net; 5206 int ifindex; 5207 5208 if (skb->dev) { 5209 caller_net = dev_net(skb->dev); 5210 ifindex = skb->dev->ifindex; 5211 } else { 5212 caller_net = sock_net(skb->sk); 5213 ifindex = 0; 5214 } 5215 5216 return __bpf_sk_lookup(skb, tuple, len, caller_net, ifindex, 5217 proto, netns_id, flags); 5218 } 5219 5220 BPF_CALL_5(bpf_sk_lookup_tcp, struct sk_buff *, skb, 5221 struct bpf_sock_tuple *, tuple, u32, len, u64, netns_id, u64, flags) 5222 { 5223 return bpf_sk_lookup(skb, tuple, len, IPPROTO_TCP, netns_id, flags); 5224 } 5225 5226 static const struct bpf_func_proto bpf_sk_lookup_tcp_proto = { 5227 .func = bpf_sk_lookup_tcp, 5228 .gpl_only = false, 5229 .pkt_access = true, 5230 .ret_type = RET_PTR_TO_SOCKET_OR_NULL, 5231 .arg1_type = ARG_PTR_TO_CTX, 5232 .arg2_type = ARG_PTR_TO_MEM, 5233 .arg3_type = ARG_CONST_SIZE, 5234 .arg4_type = ARG_ANYTHING, 5235 .arg5_type = ARG_ANYTHING, 5236 }; 5237 5238 BPF_CALL_5(bpf_sk_lookup_udp, struct sk_buff *, skb, 5239 struct bpf_sock_tuple *, tuple, u32, len, u64, netns_id, u64, flags) 5240 { 5241 return bpf_sk_lookup(skb, tuple, len, IPPROTO_UDP, netns_id, flags); 5242 } 5243 5244 static const struct bpf_func_proto bpf_sk_lookup_udp_proto = { 5245 .func = bpf_sk_lookup_udp, 5246 .gpl_only = false, 5247 .pkt_access = true, 5248 .ret_type = RET_PTR_TO_SOCKET_OR_NULL, 5249 .arg1_type = ARG_PTR_TO_CTX, 5250 .arg2_type = ARG_PTR_TO_MEM, 5251 .arg3_type = ARG_CONST_SIZE, 5252 .arg4_type = ARG_ANYTHING, 5253 .arg5_type = ARG_ANYTHING, 5254 }; 5255 5256 BPF_CALL_1(bpf_sk_release, struct sock *, sk) 5257 { 5258 if (!sock_flag(sk, SOCK_RCU_FREE)) 5259 sock_gen_put(sk); 5260 return 0; 5261 } 5262 5263 static const struct bpf_func_proto bpf_sk_release_proto = { 5264 .func = bpf_sk_release, 5265 .gpl_only = false, 5266 .ret_type = RET_INTEGER, 5267 .arg1_type = ARG_PTR_TO_SOCK_COMMON, 5268 }; 5269 5270 BPF_CALL_5(bpf_xdp_sk_lookup_udp, struct xdp_buff *, ctx, 5271 struct bpf_sock_tuple *, tuple, u32, len, u32, netns_id, u64, flags) 5272 { 5273 struct net *caller_net = dev_net(ctx->rxq->dev); 5274 int ifindex = ctx->rxq->dev->ifindex; 5275 5276 return __bpf_sk_lookup(NULL, tuple, len, caller_net, ifindex, 5277 IPPROTO_UDP, netns_id, flags); 5278 } 5279 5280 static const struct bpf_func_proto bpf_xdp_sk_lookup_udp_proto = { 5281 .func = bpf_xdp_sk_lookup_udp, 5282 .gpl_only = false, 5283 .pkt_access = true, 5284 .ret_type = RET_PTR_TO_SOCKET_OR_NULL, 5285 .arg1_type = ARG_PTR_TO_CTX, 5286 .arg2_type = ARG_PTR_TO_MEM, 5287 .arg3_type = ARG_CONST_SIZE, 5288 .arg4_type = ARG_ANYTHING, 5289 .arg5_type = ARG_ANYTHING, 5290 }; 5291 5292 BPF_CALL_5(bpf_xdp_sk_lookup_tcp, struct xdp_buff *, ctx, 5293 struct bpf_sock_tuple *, tuple, u32, len, u32, netns_id, u64, flags) 5294 { 5295 struct net *caller_net = dev_net(ctx->rxq->dev); 5296 int ifindex = ctx->rxq->dev->ifindex; 5297 5298 return __bpf_sk_lookup(NULL, tuple, len, caller_net, ifindex, 5299 IPPROTO_TCP, netns_id, flags); 5300 } 5301 5302 static const struct bpf_func_proto bpf_xdp_sk_lookup_tcp_proto = { 5303 .func = bpf_xdp_sk_lookup_tcp, 5304 .gpl_only = false, 5305 .pkt_access = true, 5306 .ret_type = RET_PTR_TO_SOCKET_OR_NULL, 5307 .arg1_type = ARG_PTR_TO_CTX, 5308 .arg2_type = ARG_PTR_TO_MEM, 5309 .arg3_type = ARG_CONST_SIZE, 5310 .arg4_type = ARG_ANYTHING, 5311 .arg5_type = ARG_ANYTHING, 5312 }; 5313 5314 BPF_CALL_5(bpf_sock_addr_sk_lookup_tcp, struct bpf_sock_addr_kern *, ctx, 5315 struct bpf_sock_tuple *, tuple, u32, len, u64, netns_id, u64, flags) 5316 { 5317 return __bpf_sk_lookup(NULL, tuple, len, sock_net(ctx->sk), 0, 5318 IPPROTO_TCP, netns_id, flags); 5319 } 5320 5321 static const struct bpf_func_proto bpf_sock_addr_sk_lookup_tcp_proto = { 5322 .func = bpf_sock_addr_sk_lookup_tcp, 5323 .gpl_only = false, 5324 .ret_type = RET_PTR_TO_SOCKET_OR_NULL, 5325 .arg1_type = ARG_PTR_TO_CTX, 5326 .arg2_type = ARG_PTR_TO_MEM, 5327 .arg3_type = ARG_CONST_SIZE, 5328 .arg4_type = ARG_ANYTHING, 5329 .arg5_type = ARG_ANYTHING, 5330 }; 5331 5332 BPF_CALL_5(bpf_sock_addr_sk_lookup_udp, struct bpf_sock_addr_kern *, ctx, 5333 struct bpf_sock_tuple *, tuple, u32, len, u64, netns_id, u64, flags) 5334 { 5335 return __bpf_sk_lookup(NULL, tuple, len, sock_net(ctx->sk), 0, 5336 IPPROTO_UDP, netns_id, flags); 5337 } 5338 5339 static const struct bpf_func_proto bpf_sock_addr_sk_lookup_udp_proto = { 5340 .func = bpf_sock_addr_sk_lookup_udp, 5341 .gpl_only = false, 5342 .ret_type = RET_PTR_TO_SOCKET_OR_NULL, 5343 .arg1_type = ARG_PTR_TO_CTX, 5344 .arg2_type = ARG_PTR_TO_MEM, 5345 .arg3_type = ARG_CONST_SIZE, 5346 .arg4_type = ARG_ANYTHING, 5347 .arg5_type = ARG_ANYTHING, 5348 }; 5349 5350 bool bpf_tcp_sock_is_valid_access(int off, int size, enum bpf_access_type type, 5351 struct bpf_insn_access_aux *info) 5352 { 5353 if (off < 0 || off >= offsetofend(struct bpf_tcp_sock, bytes_acked)) 5354 return false; 5355 5356 if (off % size != 0) 5357 return false; 5358 5359 switch (off) { 5360 case offsetof(struct bpf_tcp_sock, bytes_received): 5361 case offsetof(struct bpf_tcp_sock, bytes_acked): 5362 return size == sizeof(__u64); 5363 default: 5364 return size == sizeof(__u32); 5365 } 5366 } 5367 5368 u32 bpf_tcp_sock_convert_ctx_access(enum bpf_access_type type, 5369 const struct bpf_insn *si, 5370 struct bpf_insn *insn_buf, 5371 struct bpf_prog *prog, u32 *target_size) 5372 { 5373 struct bpf_insn *insn = insn_buf; 5374 5375 #define BPF_TCP_SOCK_GET_COMMON(FIELD) \ 5376 do { \ 5377 BUILD_BUG_ON(FIELD_SIZEOF(struct tcp_sock, FIELD) > \ 5378 FIELD_SIZEOF(struct bpf_tcp_sock, FIELD)); \ 5379 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct tcp_sock, FIELD),\ 5380 si->dst_reg, si->src_reg, \ 5381 offsetof(struct tcp_sock, FIELD)); \ 5382 } while (0) 5383 5384 CONVERT_COMMON_TCP_SOCK_FIELDS(struct bpf_tcp_sock, 5385 BPF_TCP_SOCK_GET_COMMON); 5386 5387 if (insn > insn_buf) 5388 return insn - insn_buf; 5389 5390 switch (si->off) { 5391 case offsetof(struct bpf_tcp_sock, rtt_min): 5392 BUILD_BUG_ON(FIELD_SIZEOF(struct tcp_sock, rtt_min) != 5393 sizeof(struct minmax)); 5394 BUILD_BUG_ON(sizeof(struct minmax) < 5395 sizeof(struct minmax_sample)); 5396 5397 *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg, 5398 offsetof(struct tcp_sock, rtt_min) + 5399 offsetof(struct minmax_sample, v)); 5400 break; 5401 } 5402 5403 return insn - insn_buf; 5404 } 5405 5406 BPF_CALL_1(bpf_tcp_sock, struct sock *, sk) 5407 { 5408 if (sk_fullsock(sk) && sk->sk_protocol == IPPROTO_TCP) 5409 return (unsigned long)sk; 5410 5411 return (unsigned long)NULL; 5412 } 5413 5414 static const struct bpf_func_proto bpf_tcp_sock_proto = { 5415 .func = bpf_tcp_sock, 5416 .gpl_only = false, 5417 .ret_type = RET_PTR_TO_TCP_SOCK_OR_NULL, 5418 .arg1_type = ARG_PTR_TO_SOCK_COMMON, 5419 }; 5420 5421 BPF_CALL_1(bpf_get_listener_sock, struct sock *, sk) 5422 { 5423 sk = sk_to_full_sk(sk); 5424 5425 if (sk->sk_state == TCP_LISTEN && sock_flag(sk, SOCK_RCU_FREE)) 5426 return (unsigned long)sk; 5427 5428 return (unsigned long)NULL; 5429 } 5430 5431 static const struct bpf_func_proto bpf_get_listener_sock_proto = { 5432 .func = bpf_get_listener_sock, 5433 .gpl_only = false, 5434 .ret_type = RET_PTR_TO_SOCKET_OR_NULL, 5435 .arg1_type = ARG_PTR_TO_SOCK_COMMON, 5436 }; 5437 5438 BPF_CALL_1(bpf_skb_ecn_set_ce, struct sk_buff *, skb) 5439 { 5440 unsigned int iphdr_len; 5441 5442 if (skb->protocol == cpu_to_be16(ETH_P_IP)) 5443 iphdr_len = sizeof(struct iphdr); 5444 else if (skb->protocol == cpu_to_be16(ETH_P_IPV6)) 5445 iphdr_len = sizeof(struct ipv6hdr); 5446 else 5447 return 0; 5448 5449 if (skb_headlen(skb) < iphdr_len) 5450 return 0; 5451 5452 if (skb_cloned(skb) && !skb_clone_writable(skb, iphdr_len)) 5453 return 0; 5454 5455 return INET_ECN_set_ce(skb); 5456 } 5457 5458 static const struct bpf_func_proto bpf_skb_ecn_set_ce_proto = { 5459 .func = bpf_skb_ecn_set_ce, 5460 .gpl_only = false, 5461 .ret_type = RET_INTEGER, 5462 .arg1_type = ARG_PTR_TO_CTX, 5463 }; 5464 #endif /* CONFIG_INET */ 5465 5466 bool bpf_helper_changes_pkt_data(void *func) 5467 { 5468 if (func == bpf_skb_vlan_push || 5469 func == bpf_skb_vlan_pop || 5470 func == bpf_skb_store_bytes || 5471 func == bpf_skb_change_proto || 5472 func == bpf_skb_change_head || 5473 func == sk_skb_change_head || 5474 func == bpf_skb_change_tail || 5475 func == sk_skb_change_tail || 5476 func == bpf_skb_adjust_room || 5477 func == bpf_skb_pull_data || 5478 func == sk_skb_pull_data || 5479 func == bpf_clone_redirect || 5480 func == bpf_l3_csum_replace || 5481 func == bpf_l4_csum_replace || 5482 func == bpf_xdp_adjust_head || 5483 func == bpf_xdp_adjust_meta || 5484 func == bpf_msg_pull_data || 5485 func == bpf_msg_push_data || 5486 func == bpf_msg_pop_data || 5487 func == bpf_xdp_adjust_tail || 5488 #if IS_ENABLED(CONFIG_IPV6_SEG6_BPF) 5489 func == bpf_lwt_seg6_store_bytes || 5490 func == bpf_lwt_seg6_adjust_srh || 5491 func == bpf_lwt_seg6_action || 5492 #endif 5493 func == bpf_lwt_in_push_encap || 5494 func == bpf_lwt_xmit_push_encap) 5495 return true; 5496 5497 return false; 5498 } 5499 5500 static const struct bpf_func_proto * 5501 bpf_base_func_proto(enum bpf_func_id func_id) 5502 { 5503 switch (func_id) { 5504 case BPF_FUNC_map_lookup_elem: 5505 return &bpf_map_lookup_elem_proto; 5506 case BPF_FUNC_map_update_elem: 5507 return &bpf_map_update_elem_proto; 5508 case BPF_FUNC_map_delete_elem: 5509 return &bpf_map_delete_elem_proto; 5510 case BPF_FUNC_map_push_elem: 5511 return &bpf_map_push_elem_proto; 5512 case BPF_FUNC_map_pop_elem: 5513 return &bpf_map_pop_elem_proto; 5514 case BPF_FUNC_map_peek_elem: 5515 return &bpf_map_peek_elem_proto; 5516 case BPF_FUNC_get_prandom_u32: 5517 return &bpf_get_prandom_u32_proto; 5518 case BPF_FUNC_get_smp_processor_id: 5519 return &bpf_get_raw_smp_processor_id_proto; 5520 case BPF_FUNC_get_numa_node_id: 5521 return &bpf_get_numa_node_id_proto; 5522 case BPF_FUNC_tail_call: 5523 return &bpf_tail_call_proto; 5524 case BPF_FUNC_ktime_get_ns: 5525 return &bpf_ktime_get_ns_proto; 5526 default: 5527 break; 5528 } 5529 5530 if (!capable(CAP_SYS_ADMIN)) 5531 return NULL; 5532 5533 switch (func_id) { 5534 case BPF_FUNC_spin_lock: 5535 return &bpf_spin_lock_proto; 5536 case BPF_FUNC_spin_unlock: 5537 return &bpf_spin_unlock_proto; 5538 case BPF_FUNC_trace_printk: 5539 return bpf_get_trace_printk_proto(); 5540 default: 5541 return NULL; 5542 } 5543 } 5544 5545 static const struct bpf_func_proto * 5546 sock_filter_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) 5547 { 5548 switch (func_id) { 5549 /* inet and inet6 sockets are created in a process 5550 * context so there is always a valid uid/gid 5551 */ 5552 case BPF_FUNC_get_current_uid_gid: 5553 return &bpf_get_current_uid_gid_proto; 5554 case BPF_FUNC_get_local_storage: 5555 return &bpf_get_local_storage_proto; 5556 default: 5557 return bpf_base_func_proto(func_id); 5558 } 5559 } 5560 5561 static const struct bpf_func_proto * 5562 sock_addr_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) 5563 { 5564 switch (func_id) { 5565 /* inet and inet6 sockets are created in a process 5566 * context so there is always a valid uid/gid 5567 */ 5568 case BPF_FUNC_get_current_uid_gid: 5569 return &bpf_get_current_uid_gid_proto; 5570 case BPF_FUNC_bind: 5571 switch (prog->expected_attach_type) { 5572 case BPF_CGROUP_INET4_CONNECT: 5573 case BPF_CGROUP_INET6_CONNECT: 5574 return &bpf_bind_proto; 5575 default: 5576 return NULL; 5577 } 5578 case BPF_FUNC_get_socket_cookie: 5579 return &bpf_get_socket_cookie_sock_addr_proto; 5580 case BPF_FUNC_get_local_storage: 5581 return &bpf_get_local_storage_proto; 5582 #ifdef CONFIG_INET 5583 case BPF_FUNC_sk_lookup_tcp: 5584 return &bpf_sock_addr_sk_lookup_tcp_proto; 5585 case BPF_FUNC_sk_lookup_udp: 5586 return &bpf_sock_addr_sk_lookup_udp_proto; 5587 case BPF_FUNC_sk_release: 5588 return &bpf_sk_release_proto; 5589 #endif /* CONFIG_INET */ 5590 default: 5591 return bpf_base_func_proto(func_id); 5592 } 5593 } 5594 5595 static const struct bpf_func_proto * 5596 sk_filter_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) 5597 { 5598 switch (func_id) { 5599 case BPF_FUNC_skb_load_bytes: 5600 return &bpf_skb_load_bytes_proto; 5601 case BPF_FUNC_skb_load_bytes_relative: 5602 return &bpf_skb_load_bytes_relative_proto; 5603 case BPF_FUNC_get_socket_cookie: 5604 return &bpf_get_socket_cookie_proto; 5605 case BPF_FUNC_get_socket_uid: 5606 return &bpf_get_socket_uid_proto; 5607 default: 5608 return bpf_base_func_proto(func_id); 5609 } 5610 } 5611 5612 static const struct bpf_func_proto * 5613 cg_skb_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) 5614 { 5615 switch (func_id) { 5616 case BPF_FUNC_get_local_storage: 5617 return &bpf_get_local_storage_proto; 5618 case BPF_FUNC_sk_fullsock: 5619 return &bpf_sk_fullsock_proto; 5620 #ifdef CONFIG_INET 5621 case BPF_FUNC_tcp_sock: 5622 return &bpf_tcp_sock_proto; 5623 case BPF_FUNC_get_listener_sock: 5624 return &bpf_get_listener_sock_proto; 5625 case BPF_FUNC_skb_ecn_set_ce: 5626 return &bpf_skb_ecn_set_ce_proto; 5627 #endif 5628 default: 5629 return sk_filter_func_proto(func_id, prog); 5630 } 5631 } 5632 5633 static const struct bpf_func_proto * 5634 tc_cls_act_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) 5635 { 5636 switch (func_id) { 5637 case BPF_FUNC_skb_store_bytes: 5638 return &bpf_skb_store_bytes_proto; 5639 case BPF_FUNC_skb_load_bytes: 5640 return &bpf_skb_load_bytes_proto; 5641 case BPF_FUNC_skb_load_bytes_relative: 5642 return &bpf_skb_load_bytes_relative_proto; 5643 case BPF_FUNC_skb_pull_data: 5644 return &bpf_skb_pull_data_proto; 5645 case BPF_FUNC_csum_diff: 5646 return &bpf_csum_diff_proto; 5647 case BPF_FUNC_csum_update: 5648 return &bpf_csum_update_proto; 5649 case BPF_FUNC_l3_csum_replace: 5650 return &bpf_l3_csum_replace_proto; 5651 case BPF_FUNC_l4_csum_replace: 5652 return &bpf_l4_csum_replace_proto; 5653 case BPF_FUNC_clone_redirect: 5654 return &bpf_clone_redirect_proto; 5655 case BPF_FUNC_get_cgroup_classid: 5656 return &bpf_get_cgroup_classid_proto; 5657 case BPF_FUNC_skb_vlan_push: 5658 return &bpf_skb_vlan_push_proto; 5659 case BPF_FUNC_skb_vlan_pop: 5660 return &bpf_skb_vlan_pop_proto; 5661 case BPF_FUNC_skb_change_proto: 5662 return &bpf_skb_change_proto_proto; 5663 case BPF_FUNC_skb_change_type: 5664 return &bpf_skb_change_type_proto; 5665 case BPF_FUNC_skb_adjust_room: 5666 return &bpf_skb_adjust_room_proto; 5667 case BPF_FUNC_skb_change_tail: 5668 return &bpf_skb_change_tail_proto; 5669 case BPF_FUNC_skb_get_tunnel_key: 5670 return &bpf_skb_get_tunnel_key_proto; 5671 case BPF_FUNC_skb_set_tunnel_key: 5672 return bpf_get_skb_set_tunnel_proto(func_id); 5673 case BPF_FUNC_skb_get_tunnel_opt: 5674 return &bpf_skb_get_tunnel_opt_proto; 5675 case BPF_FUNC_skb_set_tunnel_opt: 5676 return bpf_get_skb_set_tunnel_proto(func_id); 5677 case BPF_FUNC_redirect: 5678 return &bpf_redirect_proto; 5679 case BPF_FUNC_get_route_realm: 5680 return &bpf_get_route_realm_proto; 5681 case BPF_FUNC_get_hash_recalc: 5682 return &bpf_get_hash_recalc_proto; 5683 case BPF_FUNC_set_hash_invalid: 5684 return &bpf_set_hash_invalid_proto; 5685 case BPF_FUNC_set_hash: 5686 return &bpf_set_hash_proto; 5687 case BPF_FUNC_perf_event_output: 5688 return &bpf_skb_event_output_proto; 5689 case BPF_FUNC_get_smp_processor_id: 5690 return &bpf_get_smp_processor_id_proto; 5691 case BPF_FUNC_skb_under_cgroup: 5692 return &bpf_skb_under_cgroup_proto; 5693 case BPF_FUNC_get_socket_cookie: 5694 return &bpf_get_socket_cookie_proto; 5695 case BPF_FUNC_get_socket_uid: 5696 return &bpf_get_socket_uid_proto; 5697 case BPF_FUNC_fib_lookup: 5698 return &bpf_skb_fib_lookup_proto; 5699 case BPF_FUNC_sk_fullsock: 5700 return &bpf_sk_fullsock_proto; 5701 #ifdef CONFIG_XFRM 5702 case BPF_FUNC_skb_get_xfrm_state: 5703 return &bpf_skb_get_xfrm_state_proto; 5704 #endif 5705 #ifdef CONFIG_SOCK_CGROUP_DATA 5706 case BPF_FUNC_skb_cgroup_id: 5707 return &bpf_skb_cgroup_id_proto; 5708 case BPF_FUNC_skb_ancestor_cgroup_id: 5709 return &bpf_skb_ancestor_cgroup_id_proto; 5710 #endif 5711 #ifdef CONFIG_INET 5712 case BPF_FUNC_sk_lookup_tcp: 5713 return &bpf_sk_lookup_tcp_proto; 5714 case BPF_FUNC_sk_lookup_udp: 5715 return &bpf_sk_lookup_udp_proto; 5716 case BPF_FUNC_sk_release: 5717 return &bpf_sk_release_proto; 5718 case BPF_FUNC_tcp_sock: 5719 return &bpf_tcp_sock_proto; 5720 case BPF_FUNC_get_listener_sock: 5721 return &bpf_get_listener_sock_proto; 5722 #endif 5723 default: 5724 return bpf_base_func_proto(func_id); 5725 } 5726 } 5727 5728 static const struct bpf_func_proto * 5729 xdp_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) 5730 { 5731 switch (func_id) { 5732 case BPF_FUNC_perf_event_output: 5733 return &bpf_xdp_event_output_proto; 5734 case BPF_FUNC_get_smp_processor_id: 5735 return &bpf_get_smp_processor_id_proto; 5736 case BPF_FUNC_csum_diff: 5737 return &bpf_csum_diff_proto; 5738 case BPF_FUNC_xdp_adjust_head: 5739 return &bpf_xdp_adjust_head_proto; 5740 case BPF_FUNC_xdp_adjust_meta: 5741 return &bpf_xdp_adjust_meta_proto; 5742 case BPF_FUNC_redirect: 5743 return &bpf_xdp_redirect_proto; 5744 case BPF_FUNC_redirect_map: 5745 return &bpf_xdp_redirect_map_proto; 5746 case BPF_FUNC_xdp_adjust_tail: 5747 return &bpf_xdp_adjust_tail_proto; 5748 case BPF_FUNC_fib_lookup: 5749 return &bpf_xdp_fib_lookup_proto; 5750 #ifdef CONFIG_INET 5751 case BPF_FUNC_sk_lookup_udp: 5752 return &bpf_xdp_sk_lookup_udp_proto; 5753 case BPF_FUNC_sk_lookup_tcp: 5754 return &bpf_xdp_sk_lookup_tcp_proto; 5755 case BPF_FUNC_sk_release: 5756 return &bpf_sk_release_proto; 5757 #endif 5758 default: 5759 return bpf_base_func_proto(func_id); 5760 } 5761 } 5762 5763 const struct bpf_func_proto bpf_sock_map_update_proto __weak; 5764 const struct bpf_func_proto bpf_sock_hash_update_proto __weak; 5765 5766 static const struct bpf_func_proto * 5767 sock_ops_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) 5768 { 5769 switch (func_id) { 5770 case BPF_FUNC_setsockopt: 5771 return &bpf_setsockopt_proto; 5772 case BPF_FUNC_getsockopt: 5773 return &bpf_getsockopt_proto; 5774 case BPF_FUNC_sock_ops_cb_flags_set: 5775 return &bpf_sock_ops_cb_flags_set_proto; 5776 case BPF_FUNC_sock_map_update: 5777 return &bpf_sock_map_update_proto; 5778 case BPF_FUNC_sock_hash_update: 5779 return &bpf_sock_hash_update_proto; 5780 case BPF_FUNC_get_socket_cookie: 5781 return &bpf_get_socket_cookie_sock_ops_proto; 5782 case BPF_FUNC_get_local_storage: 5783 return &bpf_get_local_storage_proto; 5784 case BPF_FUNC_perf_event_output: 5785 return &bpf_sockopt_event_output_proto; 5786 default: 5787 return bpf_base_func_proto(func_id); 5788 } 5789 } 5790 5791 const struct bpf_func_proto bpf_msg_redirect_map_proto __weak; 5792 const struct bpf_func_proto bpf_msg_redirect_hash_proto __weak; 5793 5794 static const struct bpf_func_proto * 5795 sk_msg_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) 5796 { 5797 switch (func_id) { 5798 case BPF_FUNC_msg_redirect_map: 5799 return &bpf_msg_redirect_map_proto; 5800 case BPF_FUNC_msg_redirect_hash: 5801 return &bpf_msg_redirect_hash_proto; 5802 case BPF_FUNC_msg_apply_bytes: 5803 return &bpf_msg_apply_bytes_proto; 5804 case BPF_FUNC_msg_cork_bytes: 5805 return &bpf_msg_cork_bytes_proto; 5806 case BPF_FUNC_msg_pull_data: 5807 return &bpf_msg_pull_data_proto; 5808 case BPF_FUNC_msg_push_data: 5809 return &bpf_msg_push_data_proto; 5810 case BPF_FUNC_msg_pop_data: 5811 return &bpf_msg_pop_data_proto; 5812 default: 5813 return bpf_base_func_proto(func_id); 5814 } 5815 } 5816 5817 const struct bpf_func_proto bpf_sk_redirect_map_proto __weak; 5818 const struct bpf_func_proto bpf_sk_redirect_hash_proto __weak; 5819 5820 static const struct bpf_func_proto * 5821 sk_skb_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) 5822 { 5823 switch (func_id) { 5824 case BPF_FUNC_skb_store_bytes: 5825 return &bpf_skb_store_bytes_proto; 5826 case BPF_FUNC_skb_load_bytes: 5827 return &bpf_skb_load_bytes_proto; 5828 case BPF_FUNC_skb_pull_data: 5829 return &sk_skb_pull_data_proto; 5830 case BPF_FUNC_skb_change_tail: 5831 return &sk_skb_change_tail_proto; 5832 case BPF_FUNC_skb_change_head: 5833 return &sk_skb_change_head_proto; 5834 case BPF_FUNC_get_socket_cookie: 5835 return &bpf_get_socket_cookie_proto; 5836 case BPF_FUNC_get_socket_uid: 5837 return &bpf_get_socket_uid_proto; 5838 case BPF_FUNC_sk_redirect_map: 5839 return &bpf_sk_redirect_map_proto; 5840 case BPF_FUNC_sk_redirect_hash: 5841 return &bpf_sk_redirect_hash_proto; 5842 #ifdef CONFIG_INET 5843 case BPF_FUNC_sk_lookup_tcp: 5844 return &bpf_sk_lookup_tcp_proto; 5845 case BPF_FUNC_sk_lookup_udp: 5846 return &bpf_sk_lookup_udp_proto; 5847 case BPF_FUNC_sk_release: 5848 return &bpf_sk_release_proto; 5849 #endif 5850 default: 5851 return bpf_base_func_proto(func_id); 5852 } 5853 } 5854 5855 static const struct bpf_func_proto * 5856 flow_dissector_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) 5857 { 5858 switch (func_id) { 5859 case BPF_FUNC_skb_load_bytes: 5860 return &bpf_skb_load_bytes_proto; 5861 default: 5862 return bpf_base_func_proto(func_id); 5863 } 5864 } 5865 5866 static const struct bpf_func_proto * 5867 lwt_out_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) 5868 { 5869 switch (func_id) { 5870 case BPF_FUNC_skb_load_bytes: 5871 return &bpf_skb_load_bytes_proto; 5872 case BPF_FUNC_skb_pull_data: 5873 return &bpf_skb_pull_data_proto; 5874 case BPF_FUNC_csum_diff: 5875 return &bpf_csum_diff_proto; 5876 case BPF_FUNC_get_cgroup_classid: 5877 return &bpf_get_cgroup_classid_proto; 5878 case BPF_FUNC_get_route_realm: 5879 return &bpf_get_route_realm_proto; 5880 case BPF_FUNC_get_hash_recalc: 5881 return &bpf_get_hash_recalc_proto; 5882 case BPF_FUNC_perf_event_output: 5883 return &bpf_skb_event_output_proto; 5884 case BPF_FUNC_get_smp_processor_id: 5885 return &bpf_get_smp_processor_id_proto; 5886 case BPF_FUNC_skb_under_cgroup: 5887 return &bpf_skb_under_cgroup_proto; 5888 default: 5889 return bpf_base_func_proto(func_id); 5890 } 5891 } 5892 5893 static const struct bpf_func_proto * 5894 lwt_in_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) 5895 { 5896 switch (func_id) { 5897 case BPF_FUNC_lwt_push_encap: 5898 return &bpf_lwt_in_push_encap_proto; 5899 default: 5900 return lwt_out_func_proto(func_id, prog); 5901 } 5902 } 5903 5904 static const struct bpf_func_proto * 5905 lwt_xmit_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) 5906 { 5907 switch (func_id) { 5908 case BPF_FUNC_skb_get_tunnel_key: 5909 return &bpf_skb_get_tunnel_key_proto; 5910 case BPF_FUNC_skb_set_tunnel_key: 5911 return bpf_get_skb_set_tunnel_proto(func_id); 5912 case BPF_FUNC_skb_get_tunnel_opt: 5913 return &bpf_skb_get_tunnel_opt_proto; 5914 case BPF_FUNC_skb_set_tunnel_opt: 5915 return bpf_get_skb_set_tunnel_proto(func_id); 5916 case BPF_FUNC_redirect: 5917 return &bpf_redirect_proto; 5918 case BPF_FUNC_clone_redirect: 5919 return &bpf_clone_redirect_proto; 5920 case BPF_FUNC_skb_change_tail: 5921 return &bpf_skb_change_tail_proto; 5922 case BPF_FUNC_skb_change_head: 5923 return &bpf_skb_change_head_proto; 5924 case BPF_FUNC_skb_store_bytes: 5925 return &bpf_skb_store_bytes_proto; 5926 case BPF_FUNC_csum_update: 5927 return &bpf_csum_update_proto; 5928 case BPF_FUNC_l3_csum_replace: 5929 return &bpf_l3_csum_replace_proto; 5930 case BPF_FUNC_l4_csum_replace: 5931 return &bpf_l4_csum_replace_proto; 5932 case BPF_FUNC_set_hash_invalid: 5933 return &bpf_set_hash_invalid_proto; 5934 case BPF_FUNC_lwt_push_encap: 5935 return &bpf_lwt_xmit_push_encap_proto; 5936 default: 5937 return lwt_out_func_proto(func_id, prog); 5938 } 5939 } 5940 5941 static const struct bpf_func_proto * 5942 lwt_seg6local_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) 5943 { 5944 switch (func_id) { 5945 #if IS_ENABLED(CONFIG_IPV6_SEG6_BPF) 5946 case BPF_FUNC_lwt_seg6_store_bytes: 5947 return &bpf_lwt_seg6_store_bytes_proto; 5948 case BPF_FUNC_lwt_seg6_action: 5949 return &bpf_lwt_seg6_action_proto; 5950 case BPF_FUNC_lwt_seg6_adjust_srh: 5951 return &bpf_lwt_seg6_adjust_srh_proto; 5952 #endif 5953 default: 5954 return lwt_out_func_proto(func_id, prog); 5955 } 5956 } 5957 5958 static bool bpf_skb_is_valid_access(int off, int size, enum bpf_access_type type, 5959 const struct bpf_prog *prog, 5960 struct bpf_insn_access_aux *info) 5961 { 5962 const int size_default = sizeof(__u32); 5963 5964 if (off < 0 || off >= sizeof(struct __sk_buff)) 5965 return false; 5966 5967 /* The verifier guarantees that size > 0. */ 5968 if (off % size != 0) 5969 return false; 5970 5971 switch (off) { 5972 case bpf_ctx_range_till(struct __sk_buff, cb[0], cb[4]): 5973 if (off + size > offsetofend(struct __sk_buff, cb[4])) 5974 return false; 5975 break; 5976 case bpf_ctx_range_till(struct __sk_buff, remote_ip6[0], remote_ip6[3]): 5977 case bpf_ctx_range_till(struct __sk_buff, local_ip6[0], local_ip6[3]): 5978 case bpf_ctx_range_till(struct __sk_buff, remote_ip4, remote_ip4): 5979 case bpf_ctx_range_till(struct __sk_buff, local_ip4, local_ip4): 5980 case bpf_ctx_range(struct __sk_buff, data): 5981 case bpf_ctx_range(struct __sk_buff, data_meta): 5982 case bpf_ctx_range(struct __sk_buff, data_end): 5983 if (size != size_default) 5984 return false; 5985 break; 5986 case bpf_ctx_range_ptr(struct __sk_buff, flow_keys): 5987 if (size != sizeof(__u64)) 5988 return false; 5989 break; 5990 case bpf_ctx_range(struct __sk_buff, tstamp): 5991 if (size != sizeof(__u64)) 5992 return false; 5993 break; 5994 case offsetof(struct __sk_buff, sk): 5995 if (type == BPF_WRITE || size != sizeof(__u64)) 5996 return false; 5997 info->reg_type = PTR_TO_SOCK_COMMON_OR_NULL; 5998 break; 5999 default: 6000 /* Only narrow read access allowed for now. */ 6001 if (type == BPF_WRITE) { 6002 if (size != size_default) 6003 return false; 6004 } else { 6005 bpf_ctx_record_field_size(info, size_default); 6006 if (!bpf_ctx_narrow_access_ok(off, size, size_default)) 6007 return false; 6008 } 6009 } 6010 6011 return true; 6012 } 6013 6014 static bool sk_filter_is_valid_access(int off, int size, 6015 enum bpf_access_type type, 6016 const struct bpf_prog *prog, 6017 struct bpf_insn_access_aux *info) 6018 { 6019 switch (off) { 6020 case bpf_ctx_range(struct __sk_buff, tc_classid): 6021 case bpf_ctx_range(struct __sk_buff, data): 6022 case bpf_ctx_range(struct __sk_buff, data_meta): 6023 case bpf_ctx_range(struct __sk_buff, data_end): 6024 case bpf_ctx_range_ptr(struct __sk_buff, flow_keys): 6025 case bpf_ctx_range_till(struct __sk_buff, family, local_port): 6026 case bpf_ctx_range(struct __sk_buff, tstamp): 6027 case bpf_ctx_range(struct __sk_buff, wire_len): 6028 return false; 6029 } 6030 6031 if (type == BPF_WRITE) { 6032 switch (off) { 6033 case bpf_ctx_range_till(struct __sk_buff, cb[0], cb[4]): 6034 break; 6035 default: 6036 return false; 6037 } 6038 } 6039 6040 return bpf_skb_is_valid_access(off, size, type, prog, info); 6041 } 6042 6043 static bool cg_skb_is_valid_access(int off, int size, 6044 enum bpf_access_type type, 6045 const struct bpf_prog *prog, 6046 struct bpf_insn_access_aux *info) 6047 { 6048 switch (off) { 6049 case bpf_ctx_range(struct __sk_buff, tc_classid): 6050 case bpf_ctx_range(struct __sk_buff, data_meta): 6051 case bpf_ctx_range_ptr(struct __sk_buff, flow_keys): 6052 case bpf_ctx_range(struct __sk_buff, wire_len): 6053 return false; 6054 case bpf_ctx_range(struct __sk_buff, data): 6055 case bpf_ctx_range(struct __sk_buff, data_end): 6056 if (!capable(CAP_SYS_ADMIN)) 6057 return false; 6058 break; 6059 } 6060 6061 if (type == BPF_WRITE) { 6062 switch (off) { 6063 case bpf_ctx_range(struct __sk_buff, mark): 6064 case bpf_ctx_range(struct __sk_buff, priority): 6065 case bpf_ctx_range_till(struct __sk_buff, cb[0], cb[4]): 6066 break; 6067 case bpf_ctx_range(struct __sk_buff, tstamp): 6068 if (!capable(CAP_SYS_ADMIN)) 6069 return false; 6070 break; 6071 default: 6072 return false; 6073 } 6074 } 6075 6076 switch (off) { 6077 case bpf_ctx_range(struct __sk_buff, data): 6078 info->reg_type = PTR_TO_PACKET; 6079 break; 6080 case bpf_ctx_range(struct __sk_buff, data_end): 6081 info->reg_type = PTR_TO_PACKET_END; 6082 break; 6083 } 6084 6085 return bpf_skb_is_valid_access(off, size, type, prog, info); 6086 } 6087 6088 static bool lwt_is_valid_access(int off, int size, 6089 enum bpf_access_type type, 6090 const struct bpf_prog *prog, 6091 struct bpf_insn_access_aux *info) 6092 { 6093 switch (off) { 6094 case bpf_ctx_range(struct __sk_buff, tc_classid): 6095 case bpf_ctx_range_till(struct __sk_buff, family, local_port): 6096 case bpf_ctx_range(struct __sk_buff, data_meta): 6097 case bpf_ctx_range_ptr(struct __sk_buff, flow_keys): 6098 case bpf_ctx_range(struct __sk_buff, tstamp): 6099 case bpf_ctx_range(struct __sk_buff, wire_len): 6100 return false; 6101 } 6102 6103 if (type == BPF_WRITE) { 6104 switch (off) { 6105 case bpf_ctx_range(struct __sk_buff, mark): 6106 case bpf_ctx_range(struct __sk_buff, priority): 6107 case bpf_ctx_range_till(struct __sk_buff, cb[0], cb[4]): 6108 break; 6109 default: 6110 return false; 6111 } 6112 } 6113 6114 switch (off) { 6115 case bpf_ctx_range(struct __sk_buff, data): 6116 info->reg_type = PTR_TO_PACKET; 6117 break; 6118 case bpf_ctx_range(struct __sk_buff, data_end): 6119 info->reg_type = PTR_TO_PACKET_END; 6120 break; 6121 } 6122 6123 return bpf_skb_is_valid_access(off, size, type, prog, info); 6124 } 6125 6126 /* Attach type specific accesses */ 6127 static bool __sock_filter_check_attach_type(int off, 6128 enum bpf_access_type access_type, 6129 enum bpf_attach_type attach_type) 6130 { 6131 switch (off) { 6132 case offsetof(struct bpf_sock, bound_dev_if): 6133 case offsetof(struct bpf_sock, mark): 6134 case offsetof(struct bpf_sock, priority): 6135 switch (attach_type) { 6136 case BPF_CGROUP_INET_SOCK_CREATE: 6137 goto full_access; 6138 default: 6139 return false; 6140 } 6141 case bpf_ctx_range(struct bpf_sock, src_ip4): 6142 switch (attach_type) { 6143 case BPF_CGROUP_INET4_POST_BIND: 6144 goto read_only; 6145 default: 6146 return false; 6147 } 6148 case bpf_ctx_range_till(struct bpf_sock, src_ip6[0], src_ip6[3]): 6149 switch (attach_type) { 6150 case BPF_CGROUP_INET6_POST_BIND: 6151 goto read_only; 6152 default: 6153 return false; 6154 } 6155 case bpf_ctx_range(struct bpf_sock, src_port): 6156 switch (attach_type) { 6157 case BPF_CGROUP_INET4_POST_BIND: 6158 case BPF_CGROUP_INET6_POST_BIND: 6159 goto read_only; 6160 default: 6161 return false; 6162 } 6163 } 6164 read_only: 6165 return access_type == BPF_READ; 6166 full_access: 6167 return true; 6168 } 6169 6170 bool bpf_sock_common_is_valid_access(int off, int size, 6171 enum bpf_access_type type, 6172 struct bpf_insn_access_aux *info) 6173 { 6174 switch (off) { 6175 case bpf_ctx_range_till(struct bpf_sock, type, priority): 6176 return false; 6177 default: 6178 return bpf_sock_is_valid_access(off, size, type, info); 6179 } 6180 } 6181 6182 bool bpf_sock_is_valid_access(int off, int size, enum bpf_access_type type, 6183 struct bpf_insn_access_aux *info) 6184 { 6185 const int size_default = sizeof(__u32); 6186 6187 if (off < 0 || off >= sizeof(struct bpf_sock)) 6188 return false; 6189 if (off % size != 0) 6190 return false; 6191 6192 switch (off) { 6193 case offsetof(struct bpf_sock, state): 6194 case offsetof(struct bpf_sock, family): 6195 case offsetof(struct bpf_sock, type): 6196 case offsetof(struct bpf_sock, protocol): 6197 case offsetof(struct bpf_sock, dst_port): 6198 case offsetof(struct bpf_sock, src_port): 6199 case bpf_ctx_range(struct bpf_sock, src_ip4): 6200 case bpf_ctx_range_till(struct bpf_sock, src_ip6[0], src_ip6[3]): 6201 case bpf_ctx_range(struct bpf_sock, dst_ip4): 6202 case bpf_ctx_range_till(struct bpf_sock, dst_ip6[0], dst_ip6[3]): 6203 bpf_ctx_record_field_size(info, size_default); 6204 return bpf_ctx_narrow_access_ok(off, size, size_default); 6205 } 6206 6207 return size == size_default; 6208 } 6209 6210 static bool sock_filter_is_valid_access(int off, int size, 6211 enum bpf_access_type type, 6212 const struct bpf_prog *prog, 6213 struct bpf_insn_access_aux *info) 6214 { 6215 if (!bpf_sock_is_valid_access(off, size, type, info)) 6216 return false; 6217 return __sock_filter_check_attach_type(off, type, 6218 prog->expected_attach_type); 6219 } 6220 6221 static int bpf_noop_prologue(struct bpf_insn *insn_buf, bool direct_write, 6222 const struct bpf_prog *prog) 6223 { 6224 /* Neither direct read nor direct write requires any preliminary 6225 * action. 6226 */ 6227 return 0; 6228 } 6229 6230 static int bpf_unclone_prologue(struct bpf_insn *insn_buf, bool direct_write, 6231 const struct bpf_prog *prog, int drop_verdict) 6232 { 6233 struct bpf_insn *insn = insn_buf; 6234 6235 if (!direct_write) 6236 return 0; 6237 6238 /* if (!skb->cloned) 6239 * goto start; 6240 * 6241 * (Fast-path, otherwise approximation that we might be 6242 * a clone, do the rest in helper.) 6243 */ 6244 *insn++ = BPF_LDX_MEM(BPF_B, BPF_REG_6, BPF_REG_1, CLONED_OFFSET()); 6245 *insn++ = BPF_ALU32_IMM(BPF_AND, BPF_REG_6, CLONED_MASK); 6246 *insn++ = BPF_JMP_IMM(BPF_JEQ, BPF_REG_6, 0, 7); 6247 6248 /* ret = bpf_skb_pull_data(skb, 0); */ 6249 *insn++ = BPF_MOV64_REG(BPF_REG_6, BPF_REG_1); 6250 *insn++ = BPF_ALU64_REG(BPF_XOR, BPF_REG_2, BPF_REG_2); 6251 *insn++ = BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, 6252 BPF_FUNC_skb_pull_data); 6253 /* if (!ret) 6254 * goto restore; 6255 * return TC_ACT_SHOT; 6256 */ 6257 *insn++ = BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 2); 6258 *insn++ = BPF_ALU32_IMM(BPF_MOV, BPF_REG_0, drop_verdict); 6259 *insn++ = BPF_EXIT_INSN(); 6260 6261 /* restore: */ 6262 *insn++ = BPF_MOV64_REG(BPF_REG_1, BPF_REG_6); 6263 /* start: */ 6264 *insn++ = prog->insnsi[0]; 6265 6266 return insn - insn_buf; 6267 } 6268 6269 static int bpf_gen_ld_abs(const struct bpf_insn *orig, 6270 struct bpf_insn *insn_buf) 6271 { 6272 bool indirect = BPF_MODE(orig->code) == BPF_IND; 6273 struct bpf_insn *insn = insn_buf; 6274 6275 /* We're guaranteed here that CTX is in R6. */ 6276 *insn++ = BPF_MOV64_REG(BPF_REG_1, BPF_REG_CTX); 6277 if (!indirect) { 6278 *insn++ = BPF_MOV64_IMM(BPF_REG_2, orig->imm); 6279 } else { 6280 *insn++ = BPF_MOV64_REG(BPF_REG_2, orig->src_reg); 6281 if (orig->imm) 6282 *insn++ = BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, orig->imm); 6283 } 6284 6285 switch (BPF_SIZE(orig->code)) { 6286 case BPF_B: 6287 *insn++ = BPF_EMIT_CALL(bpf_skb_load_helper_8_no_cache); 6288 break; 6289 case BPF_H: 6290 *insn++ = BPF_EMIT_CALL(bpf_skb_load_helper_16_no_cache); 6291 break; 6292 case BPF_W: 6293 *insn++ = BPF_EMIT_CALL(bpf_skb_load_helper_32_no_cache); 6294 break; 6295 } 6296 6297 *insn++ = BPF_JMP_IMM(BPF_JSGE, BPF_REG_0, 0, 2); 6298 *insn++ = BPF_ALU32_REG(BPF_XOR, BPF_REG_0, BPF_REG_0); 6299 *insn++ = BPF_EXIT_INSN(); 6300 6301 return insn - insn_buf; 6302 } 6303 6304 static int tc_cls_act_prologue(struct bpf_insn *insn_buf, bool direct_write, 6305 const struct bpf_prog *prog) 6306 { 6307 return bpf_unclone_prologue(insn_buf, direct_write, prog, TC_ACT_SHOT); 6308 } 6309 6310 static bool tc_cls_act_is_valid_access(int off, int size, 6311 enum bpf_access_type type, 6312 const struct bpf_prog *prog, 6313 struct bpf_insn_access_aux *info) 6314 { 6315 if (type == BPF_WRITE) { 6316 switch (off) { 6317 case bpf_ctx_range(struct __sk_buff, mark): 6318 case bpf_ctx_range(struct __sk_buff, tc_index): 6319 case bpf_ctx_range(struct __sk_buff, priority): 6320 case bpf_ctx_range(struct __sk_buff, tc_classid): 6321 case bpf_ctx_range_till(struct __sk_buff, cb[0], cb[4]): 6322 case bpf_ctx_range(struct __sk_buff, tstamp): 6323 case bpf_ctx_range(struct __sk_buff, queue_mapping): 6324 break; 6325 default: 6326 return false; 6327 } 6328 } 6329 6330 switch (off) { 6331 case bpf_ctx_range(struct __sk_buff, data): 6332 info->reg_type = PTR_TO_PACKET; 6333 break; 6334 case bpf_ctx_range(struct __sk_buff, data_meta): 6335 info->reg_type = PTR_TO_PACKET_META; 6336 break; 6337 case bpf_ctx_range(struct __sk_buff, data_end): 6338 info->reg_type = PTR_TO_PACKET_END; 6339 break; 6340 case bpf_ctx_range_ptr(struct __sk_buff, flow_keys): 6341 case bpf_ctx_range_till(struct __sk_buff, family, local_port): 6342 return false; 6343 } 6344 6345 return bpf_skb_is_valid_access(off, size, type, prog, info); 6346 } 6347 6348 static bool __is_valid_xdp_access(int off, int size) 6349 { 6350 if (off < 0 || off >= sizeof(struct xdp_md)) 6351 return false; 6352 if (off % size != 0) 6353 return false; 6354 if (size != sizeof(__u32)) 6355 return false; 6356 6357 return true; 6358 } 6359 6360 static bool xdp_is_valid_access(int off, int size, 6361 enum bpf_access_type type, 6362 const struct bpf_prog *prog, 6363 struct bpf_insn_access_aux *info) 6364 { 6365 if (type == BPF_WRITE) { 6366 if (bpf_prog_is_dev_bound(prog->aux)) { 6367 switch (off) { 6368 case offsetof(struct xdp_md, rx_queue_index): 6369 return __is_valid_xdp_access(off, size); 6370 } 6371 } 6372 return false; 6373 } 6374 6375 switch (off) { 6376 case offsetof(struct xdp_md, data): 6377 info->reg_type = PTR_TO_PACKET; 6378 break; 6379 case offsetof(struct xdp_md, data_meta): 6380 info->reg_type = PTR_TO_PACKET_META; 6381 break; 6382 case offsetof(struct xdp_md, data_end): 6383 info->reg_type = PTR_TO_PACKET_END; 6384 break; 6385 } 6386 6387 return __is_valid_xdp_access(off, size); 6388 } 6389 6390 void bpf_warn_invalid_xdp_action(u32 act) 6391 { 6392 const u32 act_max = XDP_REDIRECT; 6393 6394 WARN_ONCE(1, "%s XDP return value %u, expect packet loss!\n", 6395 act > act_max ? "Illegal" : "Driver unsupported", 6396 act); 6397 } 6398 EXPORT_SYMBOL_GPL(bpf_warn_invalid_xdp_action); 6399 6400 static bool sock_addr_is_valid_access(int off, int size, 6401 enum bpf_access_type type, 6402 const struct bpf_prog *prog, 6403 struct bpf_insn_access_aux *info) 6404 { 6405 const int size_default = sizeof(__u32); 6406 6407 if (off < 0 || off >= sizeof(struct bpf_sock_addr)) 6408 return false; 6409 if (off % size != 0) 6410 return false; 6411 6412 /* Disallow access to IPv6 fields from IPv4 contex and vise 6413 * versa. 6414 */ 6415 switch (off) { 6416 case bpf_ctx_range(struct bpf_sock_addr, user_ip4): 6417 switch (prog->expected_attach_type) { 6418 case BPF_CGROUP_INET4_BIND: 6419 case BPF_CGROUP_INET4_CONNECT: 6420 case BPF_CGROUP_UDP4_SENDMSG: 6421 break; 6422 default: 6423 return false; 6424 } 6425 break; 6426 case bpf_ctx_range_till(struct bpf_sock_addr, user_ip6[0], user_ip6[3]): 6427 switch (prog->expected_attach_type) { 6428 case BPF_CGROUP_INET6_BIND: 6429 case BPF_CGROUP_INET6_CONNECT: 6430 case BPF_CGROUP_UDP6_SENDMSG: 6431 break; 6432 default: 6433 return false; 6434 } 6435 break; 6436 case bpf_ctx_range(struct bpf_sock_addr, msg_src_ip4): 6437 switch (prog->expected_attach_type) { 6438 case BPF_CGROUP_UDP4_SENDMSG: 6439 break; 6440 default: 6441 return false; 6442 } 6443 break; 6444 case bpf_ctx_range_till(struct bpf_sock_addr, msg_src_ip6[0], 6445 msg_src_ip6[3]): 6446 switch (prog->expected_attach_type) { 6447 case BPF_CGROUP_UDP6_SENDMSG: 6448 break; 6449 default: 6450 return false; 6451 } 6452 break; 6453 } 6454 6455 switch (off) { 6456 case bpf_ctx_range(struct bpf_sock_addr, user_ip4): 6457 case bpf_ctx_range_till(struct bpf_sock_addr, user_ip6[0], user_ip6[3]): 6458 case bpf_ctx_range(struct bpf_sock_addr, msg_src_ip4): 6459 case bpf_ctx_range_till(struct bpf_sock_addr, msg_src_ip6[0], 6460 msg_src_ip6[3]): 6461 /* Only narrow read access allowed for now. */ 6462 if (type == BPF_READ) { 6463 bpf_ctx_record_field_size(info, size_default); 6464 if (!bpf_ctx_narrow_access_ok(off, size, size_default)) 6465 return false; 6466 } else { 6467 if (size != size_default) 6468 return false; 6469 } 6470 break; 6471 case bpf_ctx_range(struct bpf_sock_addr, user_port): 6472 if (size != size_default) 6473 return false; 6474 break; 6475 default: 6476 if (type == BPF_READ) { 6477 if (size != size_default) 6478 return false; 6479 } else { 6480 return false; 6481 } 6482 } 6483 6484 return true; 6485 } 6486 6487 static bool sock_ops_is_valid_access(int off, int size, 6488 enum bpf_access_type type, 6489 const struct bpf_prog *prog, 6490 struct bpf_insn_access_aux *info) 6491 { 6492 const int size_default = sizeof(__u32); 6493 6494 if (off < 0 || off >= sizeof(struct bpf_sock_ops)) 6495 return false; 6496 6497 /* The verifier guarantees that size > 0. */ 6498 if (off % size != 0) 6499 return false; 6500 6501 if (type == BPF_WRITE) { 6502 switch (off) { 6503 case offsetof(struct bpf_sock_ops, reply): 6504 case offsetof(struct bpf_sock_ops, sk_txhash): 6505 if (size != size_default) 6506 return false; 6507 break; 6508 default: 6509 return false; 6510 } 6511 } else { 6512 switch (off) { 6513 case bpf_ctx_range_till(struct bpf_sock_ops, bytes_received, 6514 bytes_acked): 6515 if (size != sizeof(__u64)) 6516 return false; 6517 break; 6518 default: 6519 if (size != size_default) 6520 return false; 6521 break; 6522 } 6523 } 6524 6525 return true; 6526 } 6527 6528 static int sk_skb_prologue(struct bpf_insn *insn_buf, bool direct_write, 6529 const struct bpf_prog *prog) 6530 { 6531 return bpf_unclone_prologue(insn_buf, direct_write, prog, SK_DROP); 6532 } 6533 6534 static bool sk_skb_is_valid_access(int off, int size, 6535 enum bpf_access_type type, 6536 const struct bpf_prog *prog, 6537 struct bpf_insn_access_aux *info) 6538 { 6539 switch (off) { 6540 case bpf_ctx_range(struct __sk_buff, tc_classid): 6541 case bpf_ctx_range(struct __sk_buff, data_meta): 6542 case bpf_ctx_range_ptr(struct __sk_buff, flow_keys): 6543 case bpf_ctx_range(struct __sk_buff, tstamp): 6544 case bpf_ctx_range(struct __sk_buff, wire_len): 6545 return false; 6546 } 6547 6548 if (type == BPF_WRITE) { 6549 switch (off) { 6550 case bpf_ctx_range(struct __sk_buff, tc_index): 6551 case bpf_ctx_range(struct __sk_buff, priority): 6552 break; 6553 default: 6554 return false; 6555 } 6556 } 6557 6558 switch (off) { 6559 case bpf_ctx_range(struct __sk_buff, mark): 6560 return false; 6561 case bpf_ctx_range(struct __sk_buff, data): 6562 info->reg_type = PTR_TO_PACKET; 6563 break; 6564 case bpf_ctx_range(struct __sk_buff, data_end): 6565 info->reg_type = PTR_TO_PACKET_END; 6566 break; 6567 } 6568 6569 return bpf_skb_is_valid_access(off, size, type, prog, info); 6570 } 6571 6572 static bool sk_msg_is_valid_access(int off, int size, 6573 enum bpf_access_type type, 6574 const struct bpf_prog *prog, 6575 struct bpf_insn_access_aux *info) 6576 { 6577 if (type == BPF_WRITE) 6578 return false; 6579 6580 if (off % size != 0) 6581 return false; 6582 6583 switch (off) { 6584 case offsetof(struct sk_msg_md, data): 6585 info->reg_type = PTR_TO_PACKET; 6586 if (size != sizeof(__u64)) 6587 return false; 6588 break; 6589 case offsetof(struct sk_msg_md, data_end): 6590 info->reg_type = PTR_TO_PACKET_END; 6591 if (size != sizeof(__u64)) 6592 return false; 6593 break; 6594 case bpf_ctx_range(struct sk_msg_md, family): 6595 case bpf_ctx_range(struct sk_msg_md, remote_ip4): 6596 case bpf_ctx_range(struct sk_msg_md, local_ip4): 6597 case bpf_ctx_range_till(struct sk_msg_md, remote_ip6[0], remote_ip6[3]): 6598 case bpf_ctx_range_till(struct sk_msg_md, local_ip6[0], local_ip6[3]): 6599 case bpf_ctx_range(struct sk_msg_md, remote_port): 6600 case bpf_ctx_range(struct sk_msg_md, local_port): 6601 case bpf_ctx_range(struct sk_msg_md, size): 6602 if (size != sizeof(__u32)) 6603 return false; 6604 break; 6605 default: 6606 return false; 6607 } 6608 return true; 6609 } 6610 6611 static bool flow_dissector_is_valid_access(int off, int size, 6612 enum bpf_access_type type, 6613 const struct bpf_prog *prog, 6614 struct bpf_insn_access_aux *info) 6615 { 6616 if (type == BPF_WRITE) { 6617 switch (off) { 6618 case bpf_ctx_range_till(struct __sk_buff, cb[0], cb[4]): 6619 break; 6620 default: 6621 return false; 6622 } 6623 } 6624 6625 switch (off) { 6626 case bpf_ctx_range(struct __sk_buff, data): 6627 info->reg_type = PTR_TO_PACKET; 6628 break; 6629 case bpf_ctx_range(struct __sk_buff, data_end): 6630 info->reg_type = PTR_TO_PACKET_END; 6631 break; 6632 case bpf_ctx_range_ptr(struct __sk_buff, flow_keys): 6633 info->reg_type = PTR_TO_FLOW_KEYS; 6634 break; 6635 case bpf_ctx_range(struct __sk_buff, tc_classid): 6636 case bpf_ctx_range(struct __sk_buff, data_meta): 6637 case bpf_ctx_range_till(struct __sk_buff, family, local_port): 6638 case bpf_ctx_range(struct __sk_buff, tstamp): 6639 case bpf_ctx_range(struct __sk_buff, wire_len): 6640 return false; 6641 } 6642 6643 return bpf_skb_is_valid_access(off, size, type, prog, info); 6644 } 6645 6646 static u32 bpf_convert_ctx_access(enum bpf_access_type type, 6647 const struct bpf_insn *si, 6648 struct bpf_insn *insn_buf, 6649 struct bpf_prog *prog, u32 *target_size) 6650 { 6651 struct bpf_insn *insn = insn_buf; 6652 int off; 6653 6654 switch (si->off) { 6655 case offsetof(struct __sk_buff, len): 6656 *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg, 6657 bpf_target_off(struct sk_buff, len, 4, 6658 target_size)); 6659 break; 6660 6661 case offsetof(struct __sk_buff, protocol): 6662 *insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->src_reg, 6663 bpf_target_off(struct sk_buff, protocol, 2, 6664 target_size)); 6665 break; 6666 6667 case offsetof(struct __sk_buff, vlan_proto): 6668 *insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->src_reg, 6669 bpf_target_off(struct sk_buff, vlan_proto, 2, 6670 target_size)); 6671 break; 6672 6673 case offsetof(struct __sk_buff, priority): 6674 if (type == BPF_WRITE) 6675 *insn++ = BPF_STX_MEM(BPF_W, si->dst_reg, si->src_reg, 6676 bpf_target_off(struct sk_buff, priority, 4, 6677 target_size)); 6678 else 6679 *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg, 6680 bpf_target_off(struct sk_buff, priority, 4, 6681 target_size)); 6682 break; 6683 6684 case offsetof(struct __sk_buff, ingress_ifindex): 6685 *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg, 6686 bpf_target_off(struct sk_buff, skb_iif, 4, 6687 target_size)); 6688 break; 6689 6690 case offsetof(struct __sk_buff, ifindex): 6691 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, dev), 6692 si->dst_reg, si->src_reg, 6693 offsetof(struct sk_buff, dev)); 6694 *insn++ = BPF_JMP_IMM(BPF_JEQ, si->dst_reg, 0, 1); 6695 *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg, 6696 bpf_target_off(struct net_device, ifindex, 4, 6697 target_size)); 6698 break; 6699 6700 case offsetof(struct __sk_buff, hash): 6701 *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg, 6702 bpf_target_off(struct sk_buff, hash, 4, 6703 target_size)); 6704 break; 6705 6706 case offsetof(struct __sk_buff, mark): 6707 if (type == BPF_WRITE) 6708 *insn++ = BPF_STX_MEM(BPF_W, si->dst_reg, si->src_reg, 6709 bpf_target_off(struct sk_buff, mark, 4, 6710 target_size)); 6711 else 6712 *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg, 6713 bpf_target_off(struct sk_buff, mark, 4, 6714 target_size)); 6715 break; 6716 6717 case offsetof(struct __sk_buff, pkt_type): 6718 *target_size = 1; 6719 *insn++ = BPF_LDX_MEM(BPF_B, si->dst_reg, si->src_reg, 6720 PKT_TYPE_OFFSET()); 6721 *insn++ = BPF_ALU32_IMM(BPF_AND, si->dst_reg, PKT_TYPE_MAX); 6722 #ifdef __BIG_ENDIAN_BITFIELD 6723 *insn++ = BPF_ALU32_IMM(BPF_RSH, si->dst_reg, 5); 6724 #endif 6725 break; 6726 6727 case offsetof(struct __sk_buff, queue_mapping): 6728 if (type == BPF_WRITE) { 6729 *insn++ = BPF_JMP_IMM(BPF_JGE, si->src_reg, NO_QUEUE_MAPPING, 1); 6730 *insn++ = BPF_STX_MEM(BPF_H, si->dst_reg, si->src_reg, 6731 bpf_target_off(struct sk_buff, 6732 queue_mapping, 6733 2, target_size)); 6734 } else { 6735 *insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->src_reg, 6736 bpf_target_off(struct sk_buff, 6737 queue_mapping, 6738 2, target_size)); 6739 } 6740 break; 6741 6742 case offsetof(struct __sk_buff, vlan_present): 6743 *target_size = 1; 6744 *insn++ = BPF_LDX_MEM(BPF_B, si->dst_reg, si->src_reg, 6745 PKT_VLAN_PRESENT_OFFSET()); 6746 if (PKT_VLAN_PRESENT_BIT) 6747 *insn++ = BPF_ALU32_IMM(BPF_RSH, si->dst_reg, PKT_VLAN_PRESENT_BIT); 6748 if (PKT_VLAN_PRESENT_BIT < 7) 6749 *insn++ = BPF_ALU32_IMM(BPF_AND, si->dst_reg, 1); 6750 break; 6751 6752 case offsetof(struct __sk_buff, vlan_tci): 6753 *insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->src_reg, 6754 bpf_target_off(struct sk_buff, vlan_tci, 2, 6755 target_size)); 6756 break; 6757 6758 case offsetof(struct __sk_buff, cb[0]) ... 6759 offsetofend(struct __sk_buff, cb[4]) - 1: 6760 BUILD_BUG_ON(FIELD_SIZEOF(struct qdisc_skb_cb, data) < 20); 6761 BUILD_BUG_ON((offsetof(struct sk_buff, cb) + 6762 offsetof(struct qdisc_skb_cb, data)) % 6763 sizeof(__u64)); 6764 6765 prog->cb_access = 1; 6766 off = si->off; 6767 off -= offsetof(struct __sk_buff, cb[0]); 6768 off += offsetof(struct sk_buff, cb); 6769 off += offsetof(struct qdisc_skb_cb, data); 6770 if (type == BPF_WRITE) 6771 *insn++ = BPF_STX_MEM(BPF_SIZE(si->code), si->dst_reg, 6772 si->src_reg, off); 6773 else 6774 *insn++ = BPF_LDX_MEM(BPF_SIZE(si->code), si->dst_reg, 6775 si->src_reg, off); 6776 break; 6777 6778 case offsetof(struct __sk_buff, tc_classid): 6779 BUILD_BUG_ON(FIELD_SIZEOF(struct qdisc_skb_cb, tc_classid) != 2); 6780 6781 off = si->off; 6782 off -= offsetof(struct __sk_buff, tc_classid); 6783 off += offsetof(struct sk_buff, cb); 6784 off += offsetof(struct qdisc_skb_cb, tc_classid); 6785 *target_size = 2; 6786 if (type == BPF_WRITE) 6787 *insn++ = BPF_STX_MEM(BPF_H, si->dst_reg, 6788 si->src_reg, off); 6789 else 6790 *insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, 6791 si->src_reg, off); 6792 break; 6793 6794 case offsetof(struct __sk_buff, data): 6795 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, data), 6796 si->dst_reg, si->src_reg, 6797 offsetof(struct sk_buff, data)); 6798 break; 6799 6800 case offsetof(struct __sk_buff, data_meta): 6801 off = si->off; 6802 off -= offsetof(struct __sk_buff, data_meta); 6803 off += offsetof(struct sk_buff, cb); 6804 off += offsetof(struct bpf_skb_data_end, data_meta); 6805 *insn++ = BPF_LDX_MEM(BPF_SIZEOF(void *), si->dst_reg, 6806 si->src_reg, off); 6807 break; 6808 6809 case offsetof(struct __sk_buff, data_end): 6810 off = si->off; 6811 off -= offsetof(struct __sk_buff, data_end); 6812 off += offsetof(struct sk_buff, cb); 6813 off += offsetof(struct bpf_skb_data_end, data_end); 6814 *insn++ = BPF_LDX_MEM(BPF_SIZEOF(void *), si->dst_reg, 6815 si->src_reg, off); 6816 break; 6817 6818 case offsetof(struct __sk_buff, tc_index): 6819 #ifdef CONFIG_NET_SCHED 6820 if (type == BPF_WRITE) 6821 *insn++ = BPF_STX_MEM(BPF_H, si->dst_reg, si->src_reg, 6822 bpf_target_off(struct sk_buff, tc_index, 2, 6823 target_size)); 6824 else 6825 *insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->src_reg, 6826 bpf_target_off(struct sk_buff, tc_index, 2, 6827 target_size)); 6828 #else 6829 *target_size = 2; 6830 if (type == BPF_WRITE) 6831 *insn++ = BPF_MOV64_REG(si->dst_reg, si->dst_reg); 6832 else 6833 *insn++ = BPF_MOV64_IMM(si->dst_reg, 0); 6834 #endif 6835 break; 6836 6837 case offsetof(struct __sk_buff, napi_id): 6838 #if defined(CONFIG_NET_RX_BUSY_POLL) 6839 *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg, 6840 bpf_target_off(struct sk_buff, napi_id, 4, 6841 target_size)); 6842 *insn++ = BPF_JMP_IMM(BPF_JGE, si->dst_reg, MIN_NAPI_ID, 1); 6843 *insn++ = BPF_MOV64_IMM(si->dst_reg, 0); 6844 #else 6845 *target_size = 4; 6846 *insn++ = BPF_MOV64_IMM(si->dst_reg, 0); 6847 #endif 6848 break; 6849 case offsetof(struct __sk_buff, family): 6850 BUILD_BUG_ON(FIELD_SIZEOF(struct sock_common, skc_family) != 2); 6851 6852 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, sk), 6853 si->dst_reg, si->src_reg, 6854 offsetof(struct sk_buff, sk)); 6855 *insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->dst_reg, 6856 bpf_target_off(struct sock_common, 6857 skc_family, 6858 2, target_size)); 6859 break; 6860 case offsetof(struct __sk_buff, remote_ip4): 6861 BUILD_BUG_ON(FIELD_SIZEOF(struct sock_common, skc_daddr) != 4); 6862 6863 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, sk), 6864 si->dst_reg, si->src_reg, 6865 offsetof(struct sk_buff, sk)); 6866 *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg, 6867 bpf_target_off(struct sock_common, 6868 skc_daddr, 6869 4, target_size)); 6870 break; 6871 case offsetof(struct __sk_buff, local_ip4): 6872 BUILD_BUG_ON(FIELD_SIZEOF(struct sock_common, 6873 skc_rcv_saddr) != 4); 6874 6875 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, sk), 6876 si->dst_reg, si->src_reg, 6877 offsetof(struct sk_buff, sk)); 6878 *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg, 6879 bpf_target_off(struct sock_common, 6880 skc_rcv_saddr, 6881 4, target_size)); 6882 break; 6883 case offsetof(struct __sk_buff, remote_ip6[0]) ... 6884 offsetof(struct __sk_buff, remote_ip6[3]): 6885 #if IS_ENABLED(CONFIG_IPV6) 6886 BUILD_BUG_ON(FIELD_SIZEOF(struct sock_common, 6887 skc_v6_daddr.s6_addr32[0]) != 4); 6888 6889 off = si->off; 6890 off -= offsetof(struct __sk_buff, remote_ip6[0]); 6891 6892 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, sk), 6893 si->dst_reg, si->src_reg, 6894 offsetof(struct sk_buff, sk)); 6895 *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg, 6896 offsetof(struct sock_common, 6897 skc_v6_daddr.s6_addr32[0]) + 6898 off); 6899 #else 6900 *insn++ = BPF_MOV32_IMM(si->dst_reg, 0); 6901 #endif 6902 break; 6903 case offsetof(struct __sk_buff, local_ip6[0]) ... 6904 offsetof(struct __sk_buff, local_ip6[3]): 6905 #if IS_ENABLED(CONFIG_IPV6) 6906 BUILD_BUG_ON(FIELD_SIZEOF(struct sock_common, 6907 skc_v6_rcv_saddr.s6_addr32[0]) != 4); 6908 6909 off = si->off; 6910 off -= offsetof(struct __sk_buff, local_ip6[0]); 6911 6912 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, sk), 6913 si->dst_reg, si->src_reg, 6914 offsetof(struct sk_buff, sk)); 6915 *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg, 6916 offsetof(struct sock_common, 6917 skc_v6_rcv_saddr.s6_addr32[0]) + 6918 off); 6919 #else 6920 *insn++ = BPF_MOV32_IMM(si->dst_reg, 0); 6921 #endif 6922 break; 6923 6924 case offsetof(struct __sk_buff, remote_port): 6925 BUILD_BUG_ON(FIELD_SIZEOF(struct sock_common, skc_dport) != 2); 6926 6927 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, sk), 6928 si->dst_reg, si->src_reg, 6929 offsetof(struct sk_buff, sk)); 6930 *insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->dst_reg, 6931 bpf_target_off(struct sock_common, 6932 skc_dport, 6933 2, target_size)); 6934 #ifndef __BIG_ENDIAN_BITFIELD 6935 *insn++ = BPF_ALU32_IMM(BPF_LSH, si->dst_reg, 16); 6936 #endif 6937 break; 6938 6939 case offsetof(struct __sk_buff, local_port): 6940 BUILD_BUG_ON(FIELD_SIZEOF(struct sock_common, skc_num) != 2); 6941 6942 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, sk), 6943 si->dst_reg, si->src_reg, 6944 offsetof(struct sk_buff, sk)); 6945 *insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->dst_reg, 6946 bpf_target_off(struct sock_common, 6947 skc_num, 2, target_size)); 6948 break; 6949 6950 case offsetof(struct __sk_buff, flow_keys): 6951 off = si->off; 6952 off -= offsetof(struct __sk_buff, flow_keys); 6953 off += offsetof(struct sk_buff, cb); 6954 off += offsetof(struct qdisc_skb_cb, flow_keys); 6955 *insn++ = BPF_LDX_MEM(BPF_SIZEOF(void *), si->dst_reg, 6956 si->src_reg, off); 6957 break; 6958 6959 case offsetof(struct __sk_buff, tstamp): 6960 BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, tstamp) != 8); 6961 6962 if (type == BPF_WRITE) 6963 *insn++ = BPF_STX_MEM(BPF_DW, 6964 si->dst_reg, si->src_reg, 6965 bpf_target_off(struct sk_buff, 6966 tstamp, 8, 6967 target_size)); 6968 else 6969 *insn++ = BPF_LDX_MEM(BPF_DW, 6970 si->dst_reg, si->src_reg, 6971 bpf_target_off(struct sk_buff, 6972 tstamp, 8, 6973 target_size)); 6974 break; 6975 6976 case offsetof(struct __sk_buff, gso_segs): 6977 /* si->dst_reg = skb_shinfo(SKB); */ 6978 #ifdef NET_SKBUFF_DATA_USES_OFFSET 6979 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, head), 6980 si->dst_reg, si->src_reg, 6981 offsetof(struct sk_buff, head)); 6982 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, end), 6983 BPF_REG_AX, si->src_reg, 6984 offsetof(struct sk_buff, end)); 6985 *insn++ = BPF_ALU64_REG(BPF_ADD, si->dst_reg, BPF_REG_AX); 6986 #else 6987 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, end), 6988 si->dst_reg, si->src_reg, 6989 offsetof(struct sk_buff, end)); 6990 #endif 6991 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct skb_shared_info, gso_segs), 6992 si->dst_reg, si->dst_reg, 6993 bpf_target_off(struct skb_shared_info, 6994 gso_segs, 2, 6995 target_size)); 6996 break; 6997 case offsetof(struct __sk_buff, wire_len): 6998 BUILD_BUG_ON(FIELD_SIZEOF(struct qdisc_skb_cb, pkt_len) != 4); 6999 7000 off = si->off; 7001 off -= offsetof(struct __sk_buff, wire_len); 7002 off += offsetof(struct sk_buff, cb); 7003 off += offsetof(struct qdisc_skb_cb, pkt_len); 7004 *target_size = 4; 7005 *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg, off); 7006 break; 7007 7008 case offsetof(struct __sk_buff, sk): 7009 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, sk), 7010 si->dst_reg, si->src_reg, 7011 offsetof(struct sk_buff, sk)); 7012 break; 7013 } 7014 7015 return insn - insn_buf; 7016 } 7017 7018 u32 bpf_sock_convert_ctx_access(enum bpf_access_type type, 7019 const struct bpf_insn *si, 7020 struct bpf_insn *insn_buf, 7021 struct bpf_prog *prog, u32 *target_size) 7022 { 7023 struct bpf_insn *insn = insn_buf; 7024 int off; 7025 7026 switch (si->off) { 7027 case offsetof(struct bpf_sock, bound_dev_if): 7028 BUILD_BUG_ON(FIELD_SIZEOF(struct sock, sk_bound_dev_if) != 4); 7029 7030 if (type == BPF_WRITE) 7031 *insn++ = BPF_STX_MEM(BPF_W, si->dst_reg, si->src_reg, 7032 offsetof(struct sock, sk_bound_dev_if)); 7033 else 7034 *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg, 7035 offsetof(struct sock, sk_bound_dev_if)); 7036 break; 7037 7038 case offsetof(struct bpf_sock, mark): 7039 BUILD_BUG_ON(FIELD_SIZEOF(struct sock, sk_mark) != 4); 7040 7041 if (type == BPF_WRITE) 7042 *insn++ = BPF_STX_MEM(BPF_W, si->dst_reg, si->src_reg, 7043 offsetof(struct sock, sk_mark)); 7044 else 7045 *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg, 7046 offsetof(struct sock, sk_mark)); 7047 break; 7048 7049 case offsetof(struct bpf_sock, priority): 7050 BUILD_BUG_ON(FIELD_SIZEOF(struct sock, sk_priority) != 4); 7051 7052 if (type == BPF_WRITE) 7053 *insn++ = BPF_STX_MEM(BPF_W, si->dst_reg, si->src_reg, 7054 offsetof(struct sock, sk_priority)); 7055 else 7056 *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg, 7057 offsetof(struct sock, sk_priority)); 7058 break; 7059 7060 case offsetof(struct bpf_sock, family): 7061 *insn++ = BPF_LDX_MEM( 7062 BPF_FIELD_SIZEOF(struct sock_common, skc_family), 7063 si->dst_reg, si->src_reg, 7064 bpf_target_off(struct sock_common, 7065 skc_family, 7066 FIELD_SIZEOF(struct sock_common, 7067 skc_family), 7068 target_size)); 7069 break; 7070 7071 case offsetof(struct bpf_sock, type): 7072 BUILD_BUG_ON(HWEIGHT32(SK_FL_TYPE_MASK) != BITS_PER_BYTE * 2); 7073 *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg, 7074 offsetof(struct sock, __sk_flags_offset)); 7075 *insn++ = BPF_ALU32_IMM(BPF_AND, si->dst_reg, SK_FL_TYPE_MASK); 7076 *insn++ = BPF_ALU32_IMM(BPF_RSH, si->dst_reg, SK_FL_TYPE_SHIFT); 7077 *target_size = 2; 7078 break; 7079 7080 case offsetof(struct bpf_sock, protocol): 7081 BUILD_BUG_ON(HWEIGHT32(SK_FL_PROTO_MASK) != BITS_PER_BYTE); 7082 *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg, 7083 offsetof(struct sock, __sk_flags_offset)); 7084 *insn++ = BPF_ALU32_IMM(BPF_AND, si->dst_reg, SK_FL_PROTO_MASK); 7085 *insn++ = BPF_ALU32_IMM(BPF_RSH, si->dst_reg, SK_FL_PROTO_SHIFT); 7086 *target_size = 1; 7087 break; 7088 7089 case offsetof(struct bpf_sock, src_ip4): 7090 *insn++ = BPF_LDX_MEM( 7091 BPF_SIZE(si->code), si->dst_reg, si->src_reg, 7092 bpf_target_off(struct sock_common, skc_rcv_saddr, 7093 FIELD_SIZEOF(struct sock_common, 7094 skc_rcv_saddr), 7095 target_size)); 7096 break; 7097 7098 case offsetof(struct bpf_sock, dst_ip4): 7099 *insn++ = BPF_LDX_MEM( 7100 BPF_SIZE(si->code), si->dst_reg, si->src_reg, 7101 bpf_target_off(struct sock_common, skc_daddr, 7102 FIELD_SIZEOF(struct sock_common, 7103 skc_daddr), 7104 target_size)); 7105 break; 7106 7107 case bpf_ctx_range_till(struct bpf_sock, src_ip6[0], src_ip6[3]): 7108 #if IS_ENABLED(CONFIG_IPV6) 7109 off = si->off; 7110 off -= offsetof(struct bpf_sock, src_ip6[0]); 7111 *insn++ = BPF_LDX_MEM( 7112 BPF_SIZE(si->code), si->dst_reg, si->src_reg, 7113 bpf_target_off( 7114 struct sock_common, 7115 skc_v6_rcv_saddr.s6_addr32[0], 7116 FIELD_SIZEOF(struct sock_common, 7117 skc_v6_rcv_saddr.s6_addr32[0]), 7118 target_size) + off); 7119 #else 7120 (void)off; 7121 *insn++ = BPF_MOV32_IMM(si->dst_reg, 0); 7122 #endif 7123 break; 7124 7125 case bpf_ctx_range_till(struct bpf_sock, dst_ip6[0], dst_ip6[3]): 7126 #if IS_ENABLED(CONFIG_IPV6) 7127 off = si->off; 7128 off -= offsetof(struct bpf_sock, dst_ip6[0]); 7129 *insn++ = BPF_LDX_MEM( 7130 BPF_SIZE(si->code), si->dst_reg, si->src_reg, 7131 bpf_target_off(struct sock_common, 7132 skc_v6_daddr.s6_addr32[0], 7133 FIELD_SIZEOF(struct sock_common, 7134 skc_v6_daddr.s6_addr32[0]), 7135 target_size) + off); 7136 #else 7137 *insn++ = BPF_MOV32_IMM(si->dst_reg, 0); 7138 *target_size = 4; 7139 #endif 7140 break; 7141 7142 case offsetof(struct bpf_sock, src_port): 7143 *insn++ = BPF_LDX_MEM( 7144 BPF_FIELD_SIZEOF(struct sock_common, skc_num), 7145 si->dst_reg, si->src_reg, 7146 bpf_target_off(struct sock_common, skc_num, 7147 FIELD_SIZEOF(struct sock_common, 7148 skc_num), 7149 target_size)); 7150 break; 7151 7152 case offsetof(struct bpf_sock, dst_port): 7153 *insn++ = BPF_LDX_MEM( 7154 BPF_FIELD_SIZEOF(struct sock_common, skc_dport), 7155 si->dst_reg, si->src_reg, 7156 bpf_target_off(struct sock_common, skc_dport, 7157 FIELD_SIZEOF(struct sock_common, 7158 skc_dport), 7159 target_size)); 7160 break; 7161 7162 case offsetof(struct bpf_sock, state): 7163 *insn++ = BPF_LDX_MEM( 7164 BPF_FIELD_SIZEOF(struct sock_common, skc_state), 7165 si->dst_reg, si->src_reg, 7166 bpf_target_off(struct sock_common, skc_state, 7167 FIELD_SIZEOF(struct sock_common, 7168 skc_state), 7169 target_size)); 7170 break; 7171 } 7172 7173 return insn - insn_buf; 7174 } 7175 7176 static u32 tc_cls_act_convert_ctx_access(enum bpf_access_type type, 7177 const struct bpf_insn *si, 7178 struct bpf_insn *insn_buf, 7179 struct bpf_prog *prog, u32 *target_size) 7180 { 7181 struct bpf_insn *insn = insn_buf; 7182 7183 switch (si->off) { 7184 case offsetof(struct __sk_buff, ifindex): 7185 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, dev), 7186 si->dst_reg, si->src_reg, 7187 offsetof(struct sk_buff, dev)); 7188 *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg, 7189 bpf_target_off(struct net_device, ifindex, 4, 7190 target_size)); 7191 break; 7192 default: 7193 return bpf_convert_ctx_access(type, si, insn_buf, prog, 7194 target_size); 7195 } 7196 7197 return insn - insn_buf; 7198 } 7199 7200 static u32 xdp_convert_ctx_access(enum bpf_access_type type, 7201 const struct bpf_insn *si, 7202 struct bpf_insn *insn_buf, 7203 struct bpf_prog *prog, u32 *target_size) 7204 { 7205 struct bpf_insn *insn = insn_buf; 7206 7207 switch (si->off) { 7208 case offsetof(struct xdp_md, data): 7209 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct xdp_buff, data), 7210 si->dst_reg, si->src_reg, 7211 offsetof(struct xdp_buff, data)); 7212 break; 7213 case offsetof(struct xdp_md, data_meta): 7214 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct xdp_buff, data_meta), 7215 si->dst_reg, si->src_reg, 7216 offsetof(struct xdp_buff, data_meta)); 7217 break; 7218 case offsetof(struct xdp_md, data_end): 7219 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct xdp_buff, data_end), 7220 si->dst_reg, si->src_reg, 7221 offsetof(struct xdp_buff, data_end)); 7222 break; 7223 case offsetof(struct xdp_md, ingress_ifindex): 7224 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct xdp_buff, rxq), 7225 si->dst_reg, si->src_reg, 7226 offsetof(struct xdp_buff, rxq)); 7227 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct xdp_rxq_info, dev), 7228 si->dst_reg, si->dst_reg, 7229 offsetof(struct xdp_rxq_info, dev)); 7230 *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg, 7231 offsetof(struct net_device, ifindex)); 7232 break; 7233 case offsetof(struct xdp_md, rx_queue_index): 7234 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct xdp_buff, rxq), 7235 si->dst_reg, si->src_reg, 7236 offsetof(struct xdp_buff, rxq)); 7237 *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg, 7238 offsetof(struct xdp_rxq_info, 7239 queue_index)); 7240 break; 7241 } 7242 7243 return insn - insn_buf; 7244 } 7245 7246 /* SOCK_ADDR_LOAD_NESTED_FIELD() loads Nested Field S.F.NF where S is type of 7247 * context Structure, F is Field in context structure that contains a pointer 7248 * to Nested Structure of type NS that has the field NF. 7249 * 7250 * SIZE encodes the load size (BPF_B, BPF_H, etc). It's up to caller to make 7251 * sure that SIZE is not greater than actual size of S.F.NF. 7252 * 7253 * If offset OFF is provided, the load happens from that offset relative to 7254 * offset of NF. 7255 */ 7256 #define SOCK_ADDR_LOAD_NESTED_FIELD_SIZE_OFF(S, NS, F, NF, SIZE, OFF) \ 7257 do { \ 7258 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(S, F), si->dst_reg, \ 7259 si->src_reg, offsetof(S, F)); \ 7260 *insn++ = BPF_LDX_MEM( \ 7261 SIZE, si->dst_reg, si->dst_reg, \ 7262 bpf_target_off(NS, NF, FIELD_SIZEOF(NS, NF), \ 7263 target_size) \ 7264 + OFF); \ 7265 } while (0) 7266 7267 #define SOCK_ADDR_LOAD_NESTED_FIELD(S, NS, F, NF) \ 7268 SOCK_ADDR_LOAD_NESTED_FIELD_SIZE_OFF(S, NS, F, NF, \ 7269 BPF_FIELD_SIZEOF(NS, NF), 0) 7270 7271 /* SOCK_ADDR_STORE_NESTED_FIELD_OFF() has semantic similar to 7272 * SOCK_ADDR_LOAD_NESTED_FIELD_SIZE_OFF() but for store operation. 7273 * 7274 * It doesn't support SIZE argument though since narrow stores are not 7275 * supported for now. 7276 * 7277 * In addition it uses Temporary Field TF (member of struct S) as the 3rd 7278 * "register" since two registers available in convert_ctx_access are not 7279 * enough: we can't override neither SRC, since it contains value to store, nor 7280 * DST since it contains pointer to context that may be used by later 7281 * instructions. But we need a temporary place to save pointer to nested 7282 * structure whose field we want to store to. 7283 */ 7284 #define SOCK_ADDR_STORE_NESTED_FIELD_OFF(S, NS, F, NF, OFF, TF) \ 7285 do { \ 7286 int tmp_reg = BPF_REG_9; \ 7287 if (si->src_reg == tmp_reg || si->dst_reg == tmp_reg) \ 7288 --tmp_reg; \ 7289 if (si->src_reg == tmp_reg || si->dst_reg == tmp_reg) \ 7290 --tmp_reg; \ 7291 *insn++ = BPF_STX_MEM(BPF_DW, si->dst_reg, tmp_reg, \ 7292 offsetof(S, TF)); \ 7293 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(S, F), tmp_reg, \ 7294 si->dst_reg, offsetof(S, F)); \ 7295 *insn++ = BPF_STX_MEM( \ 7296 BPF_FIELD_SIZEOF(NS, NF), tmp_reg, si->src_reg, \ 7297 bpf_target_off(NS, NF, FIELD_SIZEOF(NS, NF), \ 7298 target_size) \ 7299 + OFF); \ 7300 *insn++ = BPF_LDX_MEM(BPF_DW, tmp_reg, si->dst_reg, \ 7301 offsetof(S, TF)); \ 7302 } while (0) 7303 7304 #define SOCK_ADDR_LOAD_OR_STORE_NESTED_FIELD_SIZE_OFF(S, NS, F, NF, SIZE, OFF, \ 7305 TF) \ 7306 do { \ 7307 if (type == BPF_WRITE) { \ 7308 SOCK_ADDR_STORE_NESTED_FIELD_OFF(S, NS, F, NF, OFF, \ 7309 TF); \ 7310 } else { \ 7311 SOCK_ADDR_LOAD_NESTED_FIELD_SIZE_OFF( \ 7312 S, NS, F, NF, SIZE, OFF); \ 7313 } \ 7314 } while (0) 7315 7316 #define SOCK_ADDR_LOAD_OR_STORE_NESTED_FIELD(S, NS, F, NF, TF) \ 7317 SOCK_ADDR_LOAD_OR_STORE_NESTED_FIELD_SIZE_OFF( \ 7318 S, NS, F, NF, BPF_FIELD_SIZEOF(NS, NF), 0, TF) 7319 7320 static u32 sock_addr_convert_ctx_access(enum bpf_access_type type, 7321 const struct bpf_insn *si, 7322 struct bpf_insn *insn_buf, 7323 struct bpf_prog *prog, u32 *target_size) 7324 { 7325 struct bpf_insn *insn = insn_buf; 7326 int off; 7327 7328 switch (si->off) { 7329 case offsetof(struct bpf_sock_addr, user_family): 7330 SOCK_ADDR_LOAD_NESTED_FIELD(struct bpf_sock_addr_kern, 7331 struct sockaddr, uaddr, sa_family); 7332 break; 7333 7334 case offsetof(struct bpf_sock_addr, user_ip4): 7335 SOCK_ADDR_LOAD_OR_STORE_NESTED_FIELD_SIZE_OFF( 7336 struct bpf_sock_addr_kern, struct sockaddr_in, uaddr, 7337 sin_addr, BPF_SIZE(si->code), 0, tmp_reg); 7338 break; 7339 7340 case bpf_ctx_range_till(struct bpf_sock_addr, user_ip6[0], user_ip6[3]): 7341 off = si->off; 7342 off -= offsetof(struct bpf_sock_addr, user_ip6[0]); 7343 SOCK_ADDR_LOAD_OR_STORE_NESTED_FIELD_SIZE_OFF( 7344 struct bpf_sock_addr_kern, struct sockaddr_in6, uaddr, 7345 sin6_addr.s6_addr32[0], BPF_SIZE(si->code), off, 7346 tmp_reg); 7347 break; 7348 7349 case offsetof(struct bpf_sock_addr, user_port): 7350 /* To get port we need to know sa_family first and then treat 7351 * sockaddr as either sockaddr_in or sockaddr_in6. 7352 * Though we can simplify since port field has same offset and 7353 * size in both structures. 7354 * Here we check this invariant and use just one of the 7355 * structures if it's true. 7356 */ 7357 BUILD_BUG_ON(offsetof(struct sockaddr_in, sin_port) != 7358 offsetof(struct sockaddr_in6, sin6_port)); 7359 BUILD_BUG_ON(FIELD_SIZEOF(struct sockaddr_in, sin_port) != 7360 FIELD_SIZEOF(struct sockaddr_in6, sin6_port)); 7361 SOCK_ADDR_LOAD_OR_STORE_NESTED_FIELD(struct bpf_sock_addr_kern, 7362 struct sockaddr_in6, uaddr, 7363 sin6_port, tmp_reg); 7364 break; 7365 7366 case offsetof(struct bpf_sock_addr, family): 7367 SOCK_ADDR_LOAD_NESTED_FIELD(struct bpf_sock_addr_kern, 7368 struct sock, sk, sk_family); 7369 break; 7370 7371 case offsetof(struct bpf_sock_addr, type): 7372 SOCK_ADDR_LOAD_NESTED_FIELD_SIZE_OFF( 7373 struct bpf_sock_addr_kern, struct sock, sk, 7374 __sk_flags_offset, BPF_W, 0); 7375 *insn++ = BPF_ALU32_IMM(BPF_AND, si->dst_reg, SK_FL_TYPE_MASK); 7376 *insn++ = BPF_ALU32_IMM(BPF_RSH, si->dst_reg, SK_FL_TYPE_SHIFT); 7377 break; 7378 7379 case offsetof(struct bpf_sock_addr, protocol): 7380 SOCK_ADDR_LOAD_NESTED_FIELD_SIZE_OFF( 7381 struct bpf_sock_addr_kern, struct sock, sk, 7382 __sk_flags_offset, BPF_W, 0); 7383 *insn++ = BPF_ALU32_IMM(BPF_AND, si->dst_reg, SK_FL_PROTO_MASK); 7384 *insn++ = BPF_ALU32_IMM(BPF_RSH, si->dst_reg, 7385 SK_FL_PROTO_SHIFT); 7386 break; 7387 7388 case offsetof(struct bpf_sock_addr, msg_src_ip4): 7389 /* Treat t_ctx as struct in_addr for msg_src_ip4. */ 7390 SOCK_ADDR_LOAD_OR_STORE_NESTED_FIELD_SIZE_OFF( 7391 struct bpf_sock_addr_kern, struct in_addr, t_ctx, 7392 s_addr, BPF_SIZE(si->code), 0, tmp_reg); 7393 break; 7394 7395 case bpf_ctx_range_till(struct bpf_sock_addr, msg_src_ip6[0], 7396 msg_src_ip6[3]): 7397 off = si->off; 7398 off -= offsetof(struct bpf_sock_addr, msg_src_ip6[0]); 7399 /* Treat t_ctx as struct in6_addr for msg_src_ip6. */ 7400 SOCK_ADDR_LOAD_OR_STORE_NESTED_FIELD_SIZE_OFF( 7401 struct bpf_sock_addr_kern, struct in6_addr, t_ctx, 7402 s6_addr32[0], BPF_SIZE(si->code), off, tmp_reg); 7403 break; 7404 } 7405 7406 return insn - insn_buf; 7407 } 7408 7409 static u32 sock_ops_convert_ctx_access(enum bpf_access_type type, 7410 const struct bpf_insn *si, 7411 struct bpf_insn *insn_buf, 7412 struct bpf_prog *prog, 7413 u32 *target_size) 7414 { 7415 struct bpf_insn *insn = insn_buf; 7416 int off; 7417 7418 /* Helper macro for adding read access to tcp_sock or sock fields. */ 7419 #define SOCK_OPS_GET_FIELD(BPF_FIELD, OBJ_FIELD, OBJ) \ 7420 do { \ 7421 BUILD_BUG_ON(FIELD_SIZEOF(OBJ, OBJ_FIELD) > \ 7422 FIELD_SIZEOF(struct bpf_sock_ops, BPF_FIELD)); \ 7423 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF( \ 7424 struct bpf_sock_ops_kern, \ 7425 is_fullsock), \ 7426 si->dst_reg, si->src_reg, \ 7427 offsetof(struct bpf_sock_ops_kern, \ 7428 is_fullsock)); \ 7429 *insn++ = BPF_JMP_IMM(BPF_JEQ, si->dst_reg, 0, 2); \ 7430 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF( \ 7431 struct bpf_sock_ops_kern, sk),\ 7432 si->dst_reg, si->src_reg, \ 7433 offsetof(struct bpf_sock_ops_kern, sk));\ 7434 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(OBJ, \ 7435 OBJ_FIELD), \ 7436 si->dst_reg, si->dst_reg, \ 7437 offsetof(OBJ, OBJ_FIELD)); \ 7438 } while (0) 7439 7440 #define SOCK_OPS_GET_TCP_SOCK_FIELD(FIELD) \ 7441 SOCK_OPS_GET_FIELD(FIELD, FIELD, struct tcp_sock) 7442 7443 /* Helper macro for adding write access to tcp_sock or sock fields. 7444 * The macro is called with two registers, dst_reg which contains a pointer 7445 * to ctx (context) and src_reg which contains the value that should be 7446 * stored. However, we need an additional register since we cannot overwrite 7447 * dst_reg because it may be used later in the program. 7448 * Instead we "borrow" one of the other register. We first save its value 7449 * into a new (temp) field in bpf_sock_ops_kern, use it, and then restore 7450 * it at the end of the macro. 7451 */ 7452 #define SOCK_OPS_SET_FIELD(BPF_FIELD, OBJ_FIELD, OBJ) \ 7453 do { \ 7454 int reg = BPF_REG_9; \ 7455 BUILD_BUG_ON(FIELD_SIZEOF(OBJ, OBJ_FIELD) > \ 7456 FIELD_SIZEOF(struct bpf_sock_ops, BPF_FIELD)); \ 7457 if (si->dst_reg == reg || si->src_reg == reg) \ 7458 reg--; \ 7459 if (si->dst_reg == reg || si->src_reg == reg) \ 7460 reg--; \ 7461 *insn++ = BPF_STX_MEM(BPF_DW, si->dst_reg, reg, \ 7462 offsetof(struct bpf_sock_ops_kern, \ 7463 temp)); \ 7464 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF( \ 7465 struct bpf_sock_ops_kern, \ 7466 is_fullsock), \ 7467 reg, si->dst_reg, \ 7468 offsetof(struct bpf_sock_ops_kern, \ 7469 is_fullsock)); \ 7470 *insn++ = BPF_JMP_IMM(BPF_JEQ, reg, 0, 2); \ 7471 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF( \ 7472 struct bpf_sock_ops_kern, sk),\ 7473 reg, si->dst_reg, \ 7474 offsetof(struct bpf_sock_ops_kern, sk));\ 7475 *insn++ = BPF_STX_MEM(BPF_FIELD_SIZEOF(OBJ, OBJ_FIELD), \ 7476 reg, si->src_reg, \ 7477 offsetof(OBJ, OBJ_FIELD)); \ 7478 *insn++ = BPF_LDX_MEM(BPF_DW, reg, si->dst_reg, \ 7479 offsetof(struct bpf_sock_ops_kern, \ 7480 temp)); \ 7481 } while (0) 7482 7483 #define SOCK_OPS_GET_OR_SET_FIELD(BPF_FIELD, OBJ_FIELD, OBJ, TYPE) \ 7484 do { \ 7485 if (TYPE == BPF_WRITE) \ 7486 SOCK_OPS_SET_FIELD(BPF_FIELD, OBJ_FIELD, OBJ); \ 7487 else \ 7488 SOCK_OPS_GET_FIELD(BPF_FIELD, OBJ_FIELD, OBJ); \ 7489 } while (0) 7490 7491 CONVERT_COMMON_TCP_SOCK_FIELDS(struct bpf_sock_ops, 7492 SOCK_OPS_GET_TCP_SOCK_FIELD); 7493 7494 if (insn > insn_buf) 7495 return insn - insn_buf; 7496 7497 switch (si->off) { 7498 case offsetof(struct bpf_sock_ops, op) ... 7499 offsetof(struct bpf_sock_ops, replylong[3]): 7500 BUILD_BUG_ON(FIELD_SIZEOF(struct bpf_sock_ops, op) != 7501 FIELD_SIZEOF(struct bpf_sock_ops_kern, op)); 7502 BUILD_BUG_ON(FIELD_SIZEOF(struct bpf_sock_ops, reply) != 7503 FIELD_SIZEOF(struct bpf_sock_ops_kern, reply)); 7504 BUILD_BUG_ON(FIELD_SIZEOF(struct bpf_sock_ops, replylong) != 7505 FIELD_SIZEOF(struct bpf_sock_ops_kern, replylong)); 7506 off = si->off; 7507 off -= offsetof(struct bpf_sock_ops, op); 7508 off += offsetof(struct bpf_sock_ops_kern, op); 7509 if (type == BPF_WRITE) 7510 *insn++ = BPF_STX_MEM(BPF_W, si->dst_reg, si->src_reg, 7511 off); 7512 else 7513 *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg, 7514 off); 7515 break; 7516 7517 case offsetof(struct bpf_sock_ops, family): 7518 BUILD_BUG_ON(FIELD_SIZEOF(struct sock_common, skc_family) != 2); 7519 7520 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF( 7521 struct bpf_sock_ops_kern, sk), 7522 si->dst_reg, si->src_reg, 7523 offsetof(struct bpf_sock_ops_kern, sk)); 7524 *insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->dst_reg, 7525 offsetof(struct sock_common, skc_family)); 7526 break; 7527 7528 case offsetof(struct bpf_sock_ops, remote_ip4): 7529 BUILD_BUG_ON(FIELD_SIZEOF(struct sock_common, skc_daddr) != 4); 7530 7531 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF( 7532 struct bpf_sock_ops_kern, sk), 7533 si->dst_reg, si->src_reg, 7534 offsetof(struct bpf_sock_ops_kern, sk)); 7535 *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg, 7536 offsetof(struct sock_common, skc_daddr)); 7537 break; 7538 7539 case offsetof(struct bpf_sock_ops, local_ip4): 7540 BUILD_BUG_ON(FIELD_SIZEOF(struct sock_common, 7541 skc_rcv_saddr) != 4); 7542 7543 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF( 7544 struct bpf_sock_ops_kern, sk), 7545 si->dst_reg, si->src_reg, 7546 offsetof(struct bpf_sock_ops_kern, sk)); 7547 *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg, 7548 offsetof(struct sock_common, 7549 skc_rcv_saddr)); 7550 break; 7551 7552 case offsetof(struct bpf_sock_ops, remote_ip6[0]) ... 7553 offsetof(struct bpf_sock_ops, remote_ip6[3]): 7554 #if IS_ENABLED(CONFIG_IPV6) 7555 BUILD_BUG_ON(FIELD_SIZEOF(struct sock_common, 7556 skc_v6_daddr.s6_addr32[0]) != 4); 7557 7558 off = si->off; 7559 off -= offsetof(struct bpf_sock_ops, remote_ip6[0]); 7560 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF( 7561 struct bpf_sock_ops_kern, sk), 7562 si->dst_reg, si->src_reg, 7563 offsetof(struct bpf_sock_ops_kern, sk)); 7564 *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg, 7565 offsetof(struct sock_common, 7566 skc_v6_daddr.s6_addr32[0]) + 7567 off); 7568 #else 7569 *insn++ = BPF_MOV32_IMM(si->dst_reg, 0); 7570 #endif 7571 break; 7572 7573 case offsetof(struct bpf_sock_ops, local_ip6[0]) ... 7574 offsetof(struct bpf_sock_ops, local_ip6[3]): 7575 #if IS_ENABLED(CONFIG_IPV6) 7576 BUILD_BUG_ON(FIELD_SIZEOF(struct sock_common, 7577 skc_v6_rcv_saddr.s6_addr32[0]) != 4); 7578 7579 off = si->off; 7580 off -= offsetof(struct bpf_sock_ops, local_ip6[0]); 7581 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF( 7582 struct bpf_sock_ops_kern, sk), 7583 si->dst_reg, si->src_reg, 7584 offsetof(struct bpf_sock_ops_kern, sk)); 7585 *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg, 7586 offsetof(struct sock_common, 7587 skc_v6_rcv_saddr.s6_addr32[0]) + 7588 off); 7589 #else 7590 *insn++ = BPF_MOV32_IMM(si->dst_reg, 0); 7591 #endif 7592 break; 7593 7594 case offsetof(struct bpf_sock_ops, remote_port): 7595 BUILD_BUG_ON(FIELD_SIZEOF(struct sock_common, skc_dport) != 2); 7596 7597 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF( 7598 struct bpf_sock_ops_kern, sk), 7599 si->dst_reg, si->src_reg, 7600 offsetof(struct bpf_sock_ops_kern, sk)); 7601 *insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->dst_reg, 7602 offsetof(struct sock_common, skc_dport)); 7603 #ifndef __BIG_ENDIAN_BITFIELD 7604 *insn++ = BPF_ALU32_IMM(BPF_LSH, si->dst_reg, 16); 7605 #endif 7606 break; 7607 7608 case offsetof(struct bpf_sock_ops, local_port): 7609 BUILD_BUG_ON(FIELD_SIZEOF(struct sock_common, skc_num) != 2); 7610 7611 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF( 7612 struct bpf_sock_ops_kern, sk), 7613 si->dst_reg, si->src_reg, 7614 offsetof(struct bpf_sock_ops_kern, sk)); 7615 *insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->dst_reg, 7616 offsetof(struct sock_common, skc_num)); 7617 break; 7618 7619 case offsetof(struct bpf_sock_ops, is_fullsock): 7620 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF( 7621 struct bpf_sock_ops_kern, 7622 is_fullsock), 7623 si->dst_reg, si->src_reg, 7624 offsetof(struct bpf_sock_ops_kern, 7625 is_fullsock)); 7626 break; 7627 7628 case offsetof(struct bpf_sock_ops, state): 7629 BUILD_BUG_ON(FIELD_SIZEOF(struct sock_common, skc_state) != 1); 7630 7631 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF( 7632 struct bpf_sock_ops_kern, sk), 7633 si->dst_reg, si->src_reg, 7634 offsetof(struct bpf_sock_ops_kern, sk)); 7635 *insn++ = BPF_LDX_MEM(BPF_B, si->dst_reg, si->dst_reg, 7636 offsetof(struct sock_common, skc_state)); 7637 break; 7638 7639 case offsetof(struct bpf_sock_ops, rtt_min): 7640 BUILD_BUG_ON(FIELD_SIZEOF(struct tcp_sock, rtt_min) != 7641 sizeof(struct minmax)); 7642 BUILD_BUG_ON(sizeof(struct minmax) < 7643 sizeof(struct minmax_sample)); 7644 7645 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF( 7646 struct bpf_sock_ops_kern, sk), 7647 si->dst_reg, si->src_reg, 7648 offsetof(struct bpf_sock_ops_kern, sk)); 7649 *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg, 7650 offsetof(struct tcp_sock, rtt_min) + 7651 FIELD_SIZEOF(struct minmax_sample, t)); 7652 break; 7653 7654 case offsetof(struct bpf_sock_ops, bpf_sock_ops_cb_flags): 7655 SOCK_OPS_GET_FIELD(bpf_sock_ops_cb_flags, bpf_sock_ops_cb_flags, 7656 struct tcp_sock); 7657 break; 7658 7659 case offsetof(struct bpf_sock_ops, sk_txhash): 7660 SOCK_OPS_GET_OR_SET_FIELD(sk_txhash, sk_txhash, 7661 struct sock, type); 7662 break; 7663 } 7664 return insn - insn_buf; 7665 } 7666 7667 static u32 sk_skb_convert_ctx_access(enum bpf_access_type type, 7668 const struct bpf_insn *si, 7669 struct bpf_insn *insn_buf, 7670 struct bpf_prog *prog, u32 *target_size) 7671 { 7672 struct bpf_insn *insn = insn_buf; 7673 int off; 7674 7675 switch (si->off) { 7676 case offsetof(struct __sk_buff, data_end): 7677 off = si->off; 7678 off -= offsetof(struct __sk_buff, data_end); 7679 off += offsetof(struct sk_buff, cb); 7680 off += offsetof(struct tcp_skb_cb, bpf.data_end); 7681 *insn++ = BPF_LDX_MEM(BPF_SIZEOF(void *), si->dst_reg, 7682 si->src_reg, off); 7683 break; 7684 default: 7685 return bpf_convert_ctx_access(type, si, insn_buf, prog, 7686 target_size); 7687 } 7688 7689 return insn - insn_buf; 7690 } 7691 7692 static u32 sk_msg_convert_ctx_access(enum bpf_access_type type, 7693 const struct bpf_insn *si, 7694 struct bpf_insn *insn_buf, 7695 struct bpf_prog *prog, u32 *target_size) 7696 { 7697 struct bpf_insn *insn = insn_buf; 7698 #if IS_ENABLED(CONFIG_IPV6) 7699 int off; 7700 #endif 7701 7702 /* convert ctx uses the fact sg element is first in struct */ 7703 BUILD_BUG_ON(offsetof(struct sk_msg, sg) != 0); 7704 7705 switch (si->off) { 7706 case offsetof(struct sk_msg_md, data): 7707 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_msg, data), 7708 si->dst_reg, si->src_reg, 7709 offsetof(struct sk_msg, data)); 7710 break; 7711 case offsetof(struct sk_msg_md, data_end): 7712 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_msg, data_end), 7713 si->dst_reg, si->src_reg, 7714 offsetof(struct sk_msg, data_end)); 7715 break; 7716 case offsetof(struct sk_msg_md, family): 7717 BUILD_BUG_ON(FIELD_SIZEOF(struct sock_common, skc_family) != 2); 7718 7719 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF( 7720 struct sk_msg, sk), 7721 si->dst_reg, si->src_reg, 7722 offsetof(struct sk_msg, sk)); 7723 *insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->dst_reg, 7724 offsetof(struct sock_common, skc_family)); 7725 break; 7726 7727 case offsetof(struct sk_msg_md, remote_ip4): 7728 BUILD_BUG_ON(FIELD_SIZEOF(struct sock_common, skc_daddr) != 4); 7729 7730 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF( 7731 struct sk_msg, sk), 7732 si->dst_reg, si->src_reg, 7733 offsetof(struct sk_msg, sk)); 7734 *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg, 7735 offsetof(struct sock_common, skc_daddr)); 7736 break; 7737 7738 case offsetof(struct sk_msg_md, local_ip4): 7739 BUILD_BUG_ON(FIELD_SIZEOF(struct sock_common, 7740 skc_rcv_saddr) != 4); 7741 7742 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF( 7743 struct sk_msg, sk), 7744 si->dst_reg, si->src_reg, 7745 offsetof(struct sk_msg, sk)); 7746 *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg, 7747 offsetof(struct sock_common, 7748 skc_rcv_saddr)); 7749 break; 7750 7751 case offsetof(struct sk_msg_md, remote_ip6[0]) ... 7752 offsetof(struct sk_msg_md, remote_ip6[3]): 7753 #if IS_ENABLED(CONFIG_IPV6) 7754 BUILD_BUG_ON(FIELD_SIZEOF(struct sock_common, 7755 skc_v6_daddr.s6_addr32[0]) != 4); 7756 7757 off = si->off; 7758 off -= offsetof(struct sk_msg_md, remote_ip6[0]); 7759 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF( 7760 struct sk_msg, sk), 7761 si->dst_reg, si->src_reg, 7762 offsetof(struct sk_msg, sk)); 7763 *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg, 7764 offsetof(struct sock_common, 7765 skc_v6_daddr.s6_addr32[0]) + 7766 off); 7767 #else 7768 *insn++ = BPF_MOV32_IMM(si->dst_reg, 0); 7769 #endif 7770 break; 7771 7772 case offsetof(struct sk_msg_md, local_ip6[0]) ... 7773 offsetof(struct sk_msg_md, local_ip6[3]): 7774 #if IS_ENABLED(CONFIG_IPV6) 7775 BUILD_BUG_ON(FIELD_SIZEOF(struct sock_common, 7776 skc_v6_rcv_saddr.s6_addr32[0]) != 4); 7777 7778 off = si->off; 7779 off -= offsetof(struct sk_msg_md, local_ip6[0]); 7780 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF( 7781 struct sk_msg, sk), 7782 si->dst_reg, si->src_reg, 7783 offsetof(struct sk_msg, sk)); 7784 *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg, 7785 offsetof(struct sock_common, 7786 skc_v6_rcv_saddr.s6_addr32[0]) + 7787 off); 7788 #else 7789 *insn++ = BPF_MOV32_IMM(si->dst_reg, 0); 7790 #endif 7791 break; 7792 7793 case offsetof(struct sk_msg_md, remote_port): 7794 BUILD_BUG_ON(FIELD_SIZEOF(struct sock_common, skc_dport) != 2); 7795 7796 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF( 7797 struct sk_msg, sk), 7798 si->dst_reg, si->src_reg, 7799 offsetof(struct sk_msg, sk)); 7800 *insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->dst_reg, 7801 offsetof(struct sock_common, skc_dport)); 7802 #ifndef __BIG_ENDIAN_BITFIELD 7803 *insn++ = BPF_ALU32_IMM(BPF_LSH, si->dst_reg, 16); 7804 #endif 7805 break; 7806 7807 case offsetof(struct sk_msg_md, local_port): 7808 BUILD_BUG_ON(FIELD_SIZEOF(struct sock_common, skc_num) != 2); 7809 7810 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF( 7811 struct sk_msg, sk), 7812 si->dst_reg, si->src_reg, 7813 offsetof(struct sk_msg, sk)); 7814 *insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->dst_reg, 7815 offsetof(struct sock_common, skc_num)); 7816 break; 7817 7818 case offsetof(struct sk_msg_md, size): 7819 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_msg_sg, size), 7820 si->dst_reg, si->src_reg, 7821 offsetof(struct sk_msg_sg, size)); 7822 break; 7823 } 7824 7825 return insn - insn_buf; 7826 } 7827 7828 const struct bpf_verifier_ops sk_filter_verifier_ops = { 7829 .get_func_proto = sk_filter_func_proto, 7830 .is_valid_access = sk_filter_is_valid_access, 7831 .convert_ctx_access = bpf_convert_ctx_access, 7832 .gen_ld_abs = bpf_gen_ld_abs, 7833 }; 7834 7835 const struct bpf_prog_ops sk_filter_prog_ops = { 7836 .test_run = bpf_prog_test_run_skb, 7837 }; 7838 7839 const struct bpf_verifier_ops tc_cls_act_verifier_ops = { 7840 .get_func_proto = tc_cls_act_func_proto, 7841 .is_valid_access = tc_cls_act_is_valid_access, 7842 .convert_ctx_access = tc_cls_act_convert_ctx_access, 7843 .gen_prologue = tc_cls_act_prologue, 7844 .gen_ld_abs = bpf_gen_ld_abs, 7845 }; 7846 7847 const struct bpf_prog_ops tc_cls_act_prog_ops = { 7848 .test_run = bpf_prog_test_run_skb, 7849 }; 7850 7851 const struct bpf_verifier_ops xdp_verifier_ops = { 7852 .get_func_proto = xdp_func_proto, 7853 .is_valid_access = xdp_is_valid_access, 7854 .convert_ctx_access = xdp_convert_ctx_access, 7855 .gen_prologue = bpf_noop_prologue, 7856 }; 7857 7858 const struct bpf_prog_ops xdp_prog_ops = { 7859 .test_run = bpf_prog_test_run_xdp, 7860 }; 7861 7862 const struct bpf_verifier_ops cg_skb_verifier_ops = { 7863 .get_func_proto = cg_skb_func_proto, 7864 .is_valid_access = cg_skb_is_valid_access, 7865 .convert_ctx_access = bpf_convert_ctx_access, 7866 }; 7867 7868 const struct bpf_prog_ops cg_skb_prog_ops = { 7869 .test_run = bpf_prog_test_run_skb, 7870 }; 7871 7872 const struct bpf_verifier_ops lwt_in_verifier_ops = { 7873 .get_func_proto = lwt_in_func_proto, 7874 .is_valid_access = lwt_is_valid_access, 7875 .convert_ctx_access = bpf_convert_ctx_access, 7876 }; 7877 7878 const struct bpf_prog_ops lwt_in_prog_ops = { 7879 .test_run = bpf_prog_test_run_skb, 7880 }; 7881 7882 const struct bpf_verifier_ops lwt_out_verifier_ops = { 7883 .get_func_proto = lwt_out_func_proto, 7884 .is_valid_access = lwt_is_valid_access, 7885 .convert_ctx_access = bpf_convert_ctx_access, 7886 }; 7887 7888 const struct bpf_prog_ops lwt_out_prog_ops = { 7889 .test_run = bpf_prog_test_run_skb, 7890 }; 7891 7892 const struct bpf_verifier_ops lwt_xmit_verifier_ops = { 7893 .get_func_proto = lwt_xmit_func_proto, 7894 .is_valid_access = lwt_is_valid_access, 7895 .convert_ctx_access = bpf_convert_ctx_access, 7896 .gen_prologue = tc_cls_act_prologue, 7897 }; 7898 7899 const struct bpf_prog_ops lwt_xmit_prog_ops = { 7900 .test_run = bpf_prog_test_run_skb, 7901 }; 7902 7903 const struct bpf_verifier_ops lwt_seg6local_verifier_ops = { 7904 .get_func_proto = lwt_seg6local_func_proto, 7905 .is_valid_access = lwt_is_valid_access, 7906 .convert_ctx_access = bpf_convert_ctx_access, 7907 }; 7908 7909 const struct bpf_prog_ops lwt_seg6local_prog_ops = { 7910 .test_run = bpf_prog_test_run_skb, 7911 }; 7912 7913 const struct bpf_verifier_ops cg_sock_verifier_ops = { 7914 .get_func_proto = sock_filter_func_proto, 7915 .is_valid_access = sock_filter_is_valid_access, 7916 .convert_ctx_access = bpf_sock_convert_ctx_access, 7917 }; 7918 7919 const struct bpf_prog_ops cg_sock_prog_ops = { 7920 }; 7921 7922 const struct bpf_verifier_ops cg_sock_addr_verifier_ops = { 7923 .get_func_proto = sock_addr_func_proto, 7924 .is_valid_access = sock_addr_is_valid_access, 7925 .convert_ctx_access = sock_addr_convert_ctx_access, 7926 }; 7927 7928 const struct bpf_prog_ops cg_sock_addr_prog_ops = { 7929 }; 7930 7931 const struct bpf_verifier_ops sock_ops_verifier_ops = { 7932 .get_func_proto = sock_ops_func_proto, 7933 .is_valid_access = sock_ops_is_valid_access, 7934 .convert_ctx_access = sock_ops_convert_ctx_access, 7935 }; 7936 7937 const struct bpf_prog_ops sock_ops_prog_ops = { 7938 }; 7939 7940 const struct bpf_verifier_ops sk_skb_verifier_ops = { 7941 .get_func_proto = sk_skb_func_proto, 7942 .is_valid_access = sk_skb_is_valid_access, 7943 .convert_ctx_access = sk_skb_convert_ctx_access, 7944 .gen_prologue = sk_skb_prologue, 7945 }; 7946 7947 const struct bpf_prog_ops sk_skb_prog_ops = { 7948 }; 7949 7950 const struct bpf_verifier_ops sk_msg_verifier_ops = { 7951 .get_func_proto = sk_msg_func_proto, 7952 .is_valid_access = sk_msg_is_valid_access, 7953 .convert_ctx_access = sk_msg_convert_ctx_access, 7954 .gen_prologue = bpf_noop_prologue, 7955 }; 7956 7957 const struct bpf_prog_ops sk_msg_prog_ops = { 7958 }; 7959 7960 const struct bpf_verifier_ops flow_dissector_verifier_ops = { 7961 .get_func_proto = flow_dissector_func_proto, 7962 .is_valid_access = flow_dissector_is_valid_access, 7963 .convert_ctx_access = bpf_convert_ctx_access, 7964 }; 7965 7966 const struct bpf_prog_ops flow_dissector_prog_ops = { 7967 .test_run = bpf_prog_test_run_flow_dissector, 7968 }; 7969 7970 int sk_detach_filter(struct sock *sk) 7971 { 7972 int ret = -ENOENT; 7973 struct sk_filter *filter; 7974 7975 if (sock_flag(sk, SOCK_FILTER_LOCKED)) 7976 return -EPERM; 7977 7978 filter = rcu_dereference_protected(sk->sk_filter, 7979 lockdep_sock_is_held(sk)); 7980 if (filter) { 7981 RCU_INIT_POINTER(sk->sk_filter, NULL); 7982 sk_filter_uncharge(sk, filter); 7983 ret = 0; 7984 } 7985 7986 return ret; 7987 } 7988 EXPORT_SYMBOL_GPL(sk_detach_filter); 7989 7990 int sk_get_filter(struct sock *sk, struct sock_filter __user *ubuf, 7991 unsigned int len) 7992 { 7993 struct sock_fprog_kern *fprog; 7994 struct sk_filter *filter; 7995 int ret = 0; 7996 7997 lock_sock(sk); 7998 filter = rcu_dereference_protected(sk->sk_filter, 7999 lockdep_sock_is_held(sk)); 8000 if (!filter) 8001 goto out; 8002 8003 /* We're copying the filter that has been originally attached, 8004 * so no conversion/decode needed anymore. eBPF programs that 8005 * have no original program cannot be dumped through this. 8006 */ 8007 ret = -EACCES; 8008 fprog = filter->prog->orig_prog; 8009 if (!fprog) 8010 goto out; 8011 8012 ret = fprog->len; 8013 if (!len) 8014 /* User space only enquires number of filter blocks. */ 8015 goto out; 8016 8017 ret = -EINVAL; 8018 if (len < fprog->len) 8019 goto out; 8020 8021 ret = -EFAULT; 8022 if (copy_to_user(ubuf, fprog->filter, bpf_classic_proglen(fprog))) 8023 goto out; 8024 8025 /* Instead of bytes, the API requests to return the number 8026 * of filter blocks. 8027 */ 8028 ret = fprog->len; 8029 out: 8030 release_sock(sk); 8031 return ret; 8032 } 8033 8034 #ifdef CONFIG_INET 8035 struct sk_reuseport_kern { 8036 struct sk_buff *skb; 8037 struct sock *sk; 8038 struct sock *selected_sk; 8039 void *data_end; 8040 u32 hash; 8041 u32 reuseport_id; 8042 bool bind_inany; 8043 }; 8044 8045 static void bpf_init_reuseport_kern(struct sk_reuseport_kern *reuse_kern, 8046 struct sock_reuseport *reuse, 8047 struct sock *sk, struct sk_buff *skb, 8048 u32 hash) 8049 { 8050 reuse_kern->skb = skb; 8051 reuse_kern->sk = sk; 8052 reuse_kern->selected_sk = NULL; 8053 reuse_kern->data_end = skb->data + skb_headlen(skb); 8054 reuse_kern->hash = hash; 8055 reuse_kern->reuseport_id = reuse->reuseport_id; 8056 reuse_kern->bind_inany = reuse->bind_inany; 8057 } 8058 8059 struct sock *bpf_run_sk_reuseport(struct sock_reuseport *reuse, struct sock *sk, 8060 struct bpf_prog *prog, struct sk_buff *skb, 8061 u32 hash) 8062 { 8063 struct sk_reuseport_kern reuse_kern; 8064 enum sk_action action; 8065 8066 bpf_init_reuseport_kern(&reuse_kern, reuse, sk, skb, hash); 8067 action = BPF_PROG_RUN(prog, &reuse_kern); 8068 8069 if (action == SK_PASS) 8070 return reuse_kern.selected_sk; 8071 else 8072 return ERR_PTR(-ECONNREFUSED); 8073 } 8074 8075 BPF_CALL_4(sk_select_reuseport, struct sk_reuseport_kern *, reuse_kern, 8076 struct bpf_map *, map, void *, key, u32, flags) 8077 { 8078 struct sock_reuseport *reuse; 8079 struct sock *selected_sk; 8080 8081 selected_sk = map->ops->map_lookup_elem(map, key); 8082 if (!selected_sk) 8083 return -ENOENT; 8084 8085 reuse = rcu_dereference(selected_sk->sk_reuseport_cb); 8086 if (!reuse) 8087 /* selected_sk is unhashed (e.g. by close()) after the 8088 * above map_lookup_elem(). Treat selected_sk has already 8089 * been removed from the map. 8090 */ 8091 return -ENOENT; 8092 8093 if (unlikely(reuse->reuseport_id != reuse_kern->reuseport_id)) { 8094 struct sock *sk; 8095 8096 if (unlikely(!reuse_kern->reuseport_id)) 8097 /* There is a small race between adding the 8098 * sk to the map and setting the 8099 * reuse_kern->reuseport_id. 8100 * Treat it as the sk has not been added to 8101 * the bpf map yet. 8102 */ 8103 return -ENOENT; 8104 8105 sk = reuse_kern->sk; 8106 if (sk->sk_protocol != selected_sk->sk_protocol) 8107 return -EPROTOTYPE; 8108 else if (sk->sk_family != selected_sk->sk_family) 8109 return -EAFNOSUPPORT; 8110 8111 /* Catch all. Likely bound to a different sockaddr. */ 8112 return -EBADFD; 8113 } 8114 8115 reuse_kern->selected_sk = selected_sk; 8116 8117 return 0; 8118 } 8119 8120 static const struct bpf_func_proto sk_select_reuseport_proto = { 8121 .func = sk_select_reuseport, 8122 .gpl_only = false, 8123 .ret_type = RET_INTEGER, 8124 .arg1_type = ARG_PTR_TO_CTX, 8125 .arg2_type = ARG_CONST_MAP_PTR, 8126 .arg3_type = ARG_PTR_TO_MAP_KEY, 8127 .arg4_type = ARG_ANYTHING, 8128 }; 8129 8130 BPF_CALL_4(sk_reuseport_load_bytes, 8131 const struct sk_reuseport_kern *, reuse_kern, u32, offset, 8132 void *, to, u32, len) 8133 { 8134 return ____bpf_skb_load_bytes(reuse_kern->skb, offset, to, len); 8135 } 8136 8137 static const struct bpf_func_proto sk_reuseport_load_bytes_proto = { 8138 .func = sk_reuseport_load_bytes, 8139 .gpl_only = false, 8140 .ret_type = RET_INTEGER, 8141 .arg1_type = ARG_PTR_TO_CTX, 8142 .arg2_type = ARG_ANYTHING, 8143 .arg3_type = ARG_PTR_TO_UNINIT_MEM, 8144 .arg4_type = ARG_CONST_SIZE, 8145 }; 8146 8147 BPF_CALL_5(sk_reuseport_load_bytes_relative, 8148 const struct sk_reuseport_kern *, reuse_kern, u32, offset, 8149 void *, to, u32, len, u32, start_header) 8150 { 8151 return ____bpf_skb_load_bytes_relative(reuse_kern->skb, offset, to, 8152 len, start_header); 8153 } 8154 8155 static const struct bpf_func_proto sk_reuseport_load_bytes_relative_proto = { 8156 .func = sk_reuseport_load_bytes_relative, 8157 .gpl_only = false, 8158 .ret_type = RET_INTEGER, 8159 .arg1_type = ARG_PTR_TO_CTX, 8160 .arg2_type = ARG_ANYTHING, 8161 .arg3_type = ARG_PTR_TO_UNINIT_MEM, 8162 .arg4_type = ARG_CONST_SIZE, 8163 .arg5_type = ARG_ANYTHING, 8164 }; 8165 8166 static const struct bpf_func_proto * 8167 sk_reuseport_func_proto(enum bpf_func_id func_id, 8168 const struct bpf_prog *prog) 8169 { 8170 switch (func_id) { 8171 case BPF_FUNC_sk_select_reuseport: 8172 return &sk_select_reuseport_proto; 8173 case BPF_FUNC_skb_load_bytes: 8174 return &sk_reuseport_load_bytes_proto; 8175 case BPF_FUNC_skb_load_bytes_relative: 8176 return &sk_reuseport_load_bytes_relative_proto; 8177 default: 8178 return bpf_base_func_proto(func_id); 8179 } 8180 } 8181 8182 static bool 8183 sk_reuseport_is_valid_access(int off, int size, 8184 enum bpf_access_type type, 8185 const struct bpf_prog *prog, 8186 struct bpf_insn_access_aux *info) 8187 { 8188 const u32 size_default = sizeof(__u32); 8189 8190 if (off < 0 || off >= sizeof(struct sk_reuseport_md) || 8191 off % size || type != BPF_READ) 8192 return false; 8193 8194 switch (off) { 8195 case offsetof(struct sk_reuseport_md, data): 8196 info->reg_type = PTR_TO_PACKET; 8197 return size == sizeof(__u64); 8198 8199 case offsetof(struct sk_reuseport_md, data_end): 8200 info->reg_type = PTR_TO_PACKET_END; 8201 return size == sizeof(__u64); 8202 8203 case offsetof(struct sk_reuseport_md, hash): 8204 return size == size_default; 8205 8206 /* Fields that allow narrowing */ 8207 case offsetof(struct sk_reuseport_md, eth_protocol): 8208 if (size < FIELD_SIZEOF(struct sk_buff, protocol)) 8209 return false; 8210 /* fall through */ 8211 case offsetof(struct sk_reuseport_md, ip_protocol): 8212 case offsetof(struct sk_reuseport_md, bind_inany): 8213 case offsetof(struct sk_reuseport_md, len): 8214 bpf_ctx_record_field_size(info, size_default); 8215 return bpf_ctx_narrow_access_ok(off, size, size_default); 8216 8217 default: 8218 return false; 8219 } 8220 } 8221 8222 #define SK_REUSEPORT_LOAD_FIELD(F) ({ \ 8223 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_reuseport_kern, F), \ 8224 si->dst_reg, si->src_reg, \ 8225 bpf_target_off(struct sk_reuseport_kern, F, \ 8226 FIELD_SIZEOF(struct sk_reuseport_kern, F), \ 8227 target_size)); \ 8228 }) 8229 8230 #define SK_REUSEPORT_LOAD_SKB_FIELD(SKB_FIELD) \ 8231 SOCK_ADDR_LOAD_NESTED_FIELD(struct sk_reuseport_kern, \ 8232 struct sk_buff, \ 8233 skb, \ 8234 SKB_FIELD) 8235 8236 #define SK_REUSEPORT_LOAD_SK_FIELD_SIZE_OFF(SK_FIELD, BPF_SIZE, EXTRA_OFF) \ 8237 SOCK_ADDR_LOAD_NESTED_FIELD_SIZE_OFF(struct sk_reuseport_kern, \ 8238 struct sock, \ 8239 sk, \ 8240 SK_FIELD, BPF_SIZE, EXTRA_OFF) 8241 8242 static u32 sk_reuseport_convert_ctx_access(enum bpf_access_type type, 8243 const struct bpf_insn *si, 8244 struct bpf_insn *insn_buf, 8245 struct bpf_prog *prog, 8246 u32 *target_size) 8247 { 8248 struct bpf_insn *insn = insn_buf; 8249 8250 switch (si->off) { 8251 case offsetof(struct sk_reuseport_md, data): 8252 SK_REUSEPORT_LOAD_SKB_FIELD(data); 8253 break; 8254 8255 case offsetof(struct sk_reuseport_md, len): 8256 SK_REUSEPORT_LOAD_SKB_FIELD(len); 8257 break; 8258 8259 case offsetof(struct sk_reuseport_md, eth_protocol): 8260 SK_REUSEPORT_LOAD_SKB_FIELD(protocol); 8261 break; 8262 8263 case offsetof(struct sk_reuseport_md, ip_protocol): 8264 BUILD_BUG_ON(HWEIGHT32(SK_FL_PROTO_MASK) != BITS_PER_BYTE); 8265 SK_REUSEPORT_LOAD_SK_FIELD_SIZE_OFF(__sk_flags_offset, 8266 BPF_W, 0); 8267 *insn++ = BPF_ALU32_IMM(BPF_AND, si->dst_reg, SK_FL_PROTO_MASK); 8268 *insn++ = BPF_ALU32_IMM(BPF_RSH, si->dst_reg, 8269 SK_FL_PROTO_SHIFT); 8270 /* SK_FL_PROTO_MASK and SK_FL_PROTO_SHIFT are endian 8271 * aware. No further narrowing or masking is needed. 8272 */ 8273 *target_size = 1; 8274 break; 8275 8276 case offsetof(struct sk_reuseport_md, data_end): 8277 SK_REUSEPORT_LOAD_FIELD(data_end); 8278 break; 8279 8280 case offsetof(struct sk_reuseport_md, hash): 8281 SK_REUSEPORT_LOAD_FIELD(hash); 8282 break; 8283 8284 case offsetof(struct sk_reuseport_md, bind_inany): 8285 SK_REUSEPORT_LOAD_FIELD(bind_inany); 8286 break; 8287 } 8288 8289 return insn - insn_buf; 8290 } 8291 8292 const struct bpf_verifier_ops sk_reuseport_verifier_ops = { 8293 .get_func_proto = sk_reuseport_func_proto, 8294 .is_valid_access = sk_reuseport_is_valid_access, 8295 .convert_ctx_access = sk_reuseport_convert_ctx_access, 8296 }; 8297 8298 const struct bpf_prog_ops sk_reuseport_prog_ops = { 8299 }; 8300 #endif /* CONFIG_INET */ 8301