1 /* 2 * Linux Socket Filter - Kernel level socket filtering 3 * 4 * Based on the design of the Berkeley Packet Filter. The new 5 * internal format has been designed by PLUMgrid: 6 * 7 * Copyright (c) 2011 - 2014 PLUMgrid, http://plumgrid.com 8 * 9 * Authors: 10 * 11 * Jay Schulist <jschlst@samba.org> 12 * Alexei Starovoitov <ast@plumgrid.com> 13 * Daniel Borkmann <dborkman@redhat.com> 14 * 15 * This program is free software; you can redistribute it and/or 16 * modify it under the terms of the GNU General Public License 17 * as published by the Free Software Foundation; either version 18 * 2 of the License, or (at your option) any later version. 19 * 20 * Andi Kleen - Fix a few bad bugs and races. 21 * Kris Katterjohn - Added many additional checks in bpf_check_classic() 22 */ 23 24 #include <linux/module.h> 25 #include <linux/types.h> 26 #include <linux/mm.h> 27 #include <linux/fcntl.h> 28 #include <linux/socket.h> 29 #include <linux/sock_diag.h> 30 #include <linux/in.h> 31 #include <linux/inet.h> 32 #include <linux/netdevice.h> 33 #include <linux/if_packet.h> 34 #include <linux/if_arp.h> 35 #include <linux/gfp.h> 36 #include <net/ip.h> 37 #include <net/protocol.h> 38 #include <net/netlink.h> 39 #include <linux/skbuff.h> 40 #include <net/sock.h> 41 #include <net/flow_dissector.h> 42 #include <linux/errno.h> 43 #include <linux/timer.h> 44 #include <linux/uaccess.h> 45 #include <asm/unaligned.h> 46 #include <asm/cmpxchg.h> 47 #include <linux/filter.h> 48 #include <linux/ratelimit.h> 49 #include <linux/seccomp.h> 50 #include <linux/if_vlan.h> 51 #include <linux/bpf.h> 52 #include <net/sch_generic.h> 53 #include <net/cls_cgroup.h> 54 #include <net/dst_metadata.h> 55 #include <net/dst.h> 56 #include <net/sock_reuseport.h> 57 #include <net/busy_poll.h> 58 #include <net/tcp.h> 59 #include <linux/bpf_trace.h> 60 61 /** 62 * sk_filter_trim_cap - run a packet through a socket filter 63 * @sk: sock associated with &sk_buff 64 * @skb: buffer to filter 65 * @cap: limit on how short the eBPF program may trim the packet 66 * 67 * Run the eBPF program and then cut skb->data to correct size returned by 68 * the program. If pkt_len is 0 we toss packet. If skb->len is smaller 69 * than pkt_len we keep whole skb->data. This is the socket level 70 * wrapper to BPF_PROG_RUN. It returns 0 if the packet should 71 * be accepted or -EPERM if the packet should be tossed. 72 * 73 */ 74 int sk_filter_trim_cap(struct sock *sk, struct sk_buff *skb, unsigned int cap) 75 { 76 int err; 77 struct sk_filter *filter; 78 79 /* 80 * If the skb was allocated from pfmemalloc reserves, only 81 * allow SOCK_MEMALLOC sockets to use it as this socket is 82 * helping free memory 83 */ 84 if (skb_pfmemalloc(skb) && !sock_flag(sk, SOCK_MEMALLOC)) { 85 NET_INC_STATS(sock_net(sk), LINUX_MIB_PFMEMALLOCDROP); 86 return -ENOMEM; 87 } 88 err = BPF_CGROUP_RUN_PROG_INET_INGRESS(sk, skb); 89 if (err) 90 return err; 91 92 err = security_sock_rcv_skb(sk, skb); 93 if (err) 94 return err; 95 96 rcu_read_lock(); 97 filter = rcu_dereference(sk->sk_filter); 98 if (filter) { 99 struct sock *save_sk = skb->sk; 100 unsigned int pkt_len; 101 102 skb->sk = sk; 103 pkt_len = bpf_prog_run_save_cb(filter->prog, skb); 104 skb->sk = save_sk; 105 err = pkt_len ? pskb_trim(skb, max(cap, pkt_len)) : -EPERM; 106 } 107 rcu_read_unlock(); 108 109 return err; 110 } 111 EXPORT_SYMBOL(sk_filter_trim_cap); 112 113 BPF_CALL_1(__skb_get_pay_offset, struct sk_buff *, skb) 114 { 115 return skb_get_poff(skb); 116 } 117 118 BPF_CALL_3(__skb_get_nlattr, struct sk_buff *, skb, u32, a, u32, x) 119 { 120 struct nlattr *nla; 121 122 if (skb_is_nonlinear(skb)) 123 return 0; 124 125 if (skb->len < sizeof(struct nlattr)) 126 return 0; 127 128 if (a > skb->len - sizeof(struct nlattr)) 129 return 0; 130 131 nla = nla_find((struct nlattr *) &skb->data[a], skb->len - a, x); 132 if (nla) 133 return (void *) nla - (void *) skb->data; 134 135 return 0; 136 } 137 138 BPF_CALL_3(__skb_get_nlattr_nest, struct sk_buff *, skb, u32, a, u32, x) 139 { 140 struct nlattr *nla; 141 142 if (skb_is_nonlinear(skb)) 143 return 0; 144 145 if (skb->len < sizeof(struct nlattr)) 146 return 0; 147 148 if (a > skb->len - sizeof(struct nlattr)) 149 return 0; 150 151 nla = (struct nlattr *) &skb->data[a]; 152 if (nla->nla_len > skb->len - a) 153 return 0; 154 155 nla = nla_find_nested(nla, x); 156 if (nla) 157 return (void *) nla - (void *) skb->data; 158 159 return 0; 160 } 161 162 BPF_CALL_0(__get_raw_cpu_id) 163 { 164 return raw_smp_processor_id(); 165 } 166 167 static const struct bpf_func_proto bpf_get_raw_smp_processor_id_proto = { 168 .func = __get_raw_cpu_id, 169 .gpl_only = false, 170 .ret_type = RET_INTEGER, 171 }; 172 173 static u32 convert_skb_access(int skb_field, int dst_reg, int src_reg, 174 struct bpf_insn *insn_buf) 175 { 176 struct bpf_insn *insn = insn_buf; 177 178 switch (skb_field) { 179 case SKF_AD_MARK: 180 BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, mark) != 4); 181 182 *insn++ = BPF_LDX_MEM(BPF_W, dst_reg, src_reg, 183 offsetof(struct sk_buff, mark)); 184 break; 185 186 case SKF_AD_PKTTYPE: 187 *insn++ = BPF_LDX_MEM(BPF_B, dst_reg, src_reg, PKT_TYPE_OFFSET()); 188 *insn++ = BPF_ALU32_IMM(BPF_AND, dst_reg, PKT_TYPE_MAX); 189 #ifdef __BIG_ENDIAN_BITFIELD 190 *insn++ = BPF_ALU32_IMM(BPF_RSH, dst_reg, 5); 191 #endif 192 break; 193 194 case SKF_AD_QUEUE: 195 BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, queue_mapping) != 2); 196 197 *insn++ = BPF_LDX_MEM(BPF_H, dst_reg, src_reg, 198 offsetof(struct sk_buff, queue_mapping)); 199 break; 200 201 case SKF_AD_VLAN_TAG: 202 case SKF_AD_VLAN_TAG_PRESENT: 203 BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, vlan_tci) != 2); 204 BUILD_BUG_ON(VLAN_TAG_PRESENT != 0x1000); 205 206 /* dst_reg = *(u16 *) (src_reg + offsetof(vlan_tci)) */ 207 *insn++ = BPF_LDX_MEM(BPF_H, dst_reg, src_reg, 208 offsetof(struct sk_buff, vlan_tci)); 209 if (skb_field == SKF_AD_VLAN_TAG) { 210 *insn++ = BPF_ALU32_IMM(BPF_AND, dst_reg, 211 ~VLAN_TAG_PRESENT); 212 } else { 213 /* dst_reg >>= 12 */ 214 *insn++ = BPF_ALU32_IMM(BPF_RSH, dst_reg, 12); 215 /* dst_reg &= 1 */ 216 *insn++ = BPF_ALU32_IMM(BPF_AND, dst_reg, 1); 217 } 218 break; 219 } 220 221 return insn - insn_buf; 222 } 223 224 static bool convert_bpf_extensions(struct sock_filter *fp, 225 struct bpf_insn **insnp) 226 { 227 struct bpf_insn *insn = *insnp; 228 u32 cnt; 229 230 switch (fp->k) { 231 case SKF_AD_OFF + SKF_AD_PROTOCOL: 232 BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, protocol) != 2); 233 234 /* A = *(u16 *) (CTX + offsetof(protocol)) */ 235 *insn++ = BPF_LDX_MEM(BPF_H, BPF_REG_A, BPF_REG_CTX, 236 offsetof(struct sk_buff, protocol)); 237 /* A = ntohs(A) [emitting a nop or swap16] */ 238 *insn = BPF_ENDIAN(BPF_FROM_BE, BPF_REG_A, 16); 239 break; 240 241 case SKF_AD_OFF + SKF_AD_PKTTYPE: 242 cnt = convert_skb_access(SKF_AD_PKTTYPE, BPF_REG_A, BPF_REG_CTX, insn); 243 insn += cnt - 1; 244 break; 245 246 case SKF_AD_OFF + SKF_AD_IFINDEX: 247 case SKF_AD_OFF + SKF_AD_HATYPE: 248 BUILD_BUG_ON(FIELD_SIZEOF(struct net_device, ifindex) != 4); 249 BUILD_BUG_ON(FIELD_SIZEOF(struct net_device, type) != 2); 250 251 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, dev), 252 BPF_REG_TMP, BPF_REG_CTX, 253 offsetof(struct sk_buff, dev)); 254 /* if (tmp != 0) goto pc + 1 */ 255 *insn++ = BPF_JMP_IMM(BPF_JNE, BPF_REG_TMP, 0, 1); 256 *insn++ = BPF_EXIT_INSN(); 257 if (fp->k == SKF_AD_OFF + SKF_AD_IFINDEX) 258 *insn = BPF_LDX_MEM(BPF_W, BPF_REG_A, BPF_REG_TMP, 259 offsetof(struct net_device, ifindex)); 260 else 261 *insn = BPF_LDX_MEM(BPF_H, BPF_REG_A, BPF_REG_TMP, 262 offsetof(struct net_device, type)); 263 break; 264 265 case SKF_AD_OFF + SKF_AD_MARK: 266 cnt = convert_skb_access(SKF_AD_MARK, BPF_REG_A, BPF_REG_CTX, insn); 267 insn += cnt - 1; 268 break; 269 270 case SKF_AD_OFF + SKF_AD_RXHASH: 271 BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, hash) != 4); 272 273 *insn = BPF_LDX_MEM(BPF_W, BPF_REG_A, BPF_REG_CTX, 274 offsetof(struct sk_buff, hash)); 275 break; 276 277 case SKF_AD_OFF + SKF_AD_QUEUE: 278 cnt = convert_skb_access(SKF_AD_QUEUE, BPF_REG_A, BPF_REG_CTX, insn); 279 insn += cnt - 1; 280 break; 281 282 case SKF_AD_OFF + SKF_AD_VLAN_TAG: 283 cnt = convert_skb_access(SKF_AD_VLAN_TAG, 284 BPF_REG_A, BPF_REG_CTX, insn); 285 insn += cnt - 1; 286 break; 287 288 case SKF_AD_OFF + SKF_AD_VLAN_TAG_PRESENT: 289 cnt = convert_skb_access(SKF_AD_VLAN_TAG_PRESENT, 290 BPF_REG_A, BPF_REG_CTX, insn); 291 insn += cnt - 1; 292 break; 293 294 case SKF_AD_OFF + SKF_AD_VLAN_TPID: 295 BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, vlan_proto) != 2); 296 297 /* A = *(u16 *) (CTX + offsetof(vlan_proto)) */ 298 *insn++ = BPF_LDX_MEM(BPF_H, BPF_REG_A, BPF_REG_CTX, 299 offsetof(struct sk_buff, vlan_proto)); 300 /* A = ntohs(A) [emitting a nop or swap16] */ 301 *insn = BPF_ENDIAN(BPF_FROM_BE, BPF_REG_A, 16); 302 break; 303 304 case SKF_AD_OFF + SKF_AD_PAY_OFFSET: 305 case SKF_AD_OFF + SKF_AD_NLATTR: 306 case SKF_AD_OFF + SKF_AD_NLATTR_NEST: 307 case SKF_AD_OFF + SKF_AD_CPU: 308 case SKF_AD_OFF + SKF_AD_RANDOM: 309 /* arg1 = CTX */ 310 *insn++ = BPF_MOV64_REG(BPF_REG_ARG1, BPF_REG_CTX); 311 /* arg2 = A */ 312 *insn++ = BPF_MOV64_REG(BPF_REG_ARG2, BPF_REG_A); 313 /* arg3 = X */ 314 *insn++ = BPF_MOV64_REG(BPF_REG_ARG3, BPF_REG_X); 315 /* Emit call(arg1=CTX, arg2=A, arg3=X) */ 316 switch (fp->k) { 317 case SKF_AD_OFF + SKF_AD_PAY_OFFSET: 318 *insn = BPF_EMIT_CALL(__skb_get_pay_offset); 319 break; 320 case SKF_AD_OFF + SKF_AD_NLATTR: 321 *insn = BPF_EMIT_CALL(__skb_get_nlattr); 322 break; 323 case SKF_AD_OFF + SKF_AD_NLATTR_NEST: 324 *insn = BPF_EMIT_CALL(__skb_get_nlattr_nest); 325 break; 326 case SKF_AD_OFF + SKF_AD_CPU: 327 *insn = BPF_EMIT_CALL(__get_raw_cpu_id); 328 break; 329 case SKF_AD_OFF + SKF_AD_RANDOM: 330 *insn = BPF_EMIT_CALL(bpf_user_rnd_u32); 331 bpf_user_rnd_init_once(); 332 break; 333 } 334 break; 335 336 case SKF_AD_OFF + SKF_AD_ALU_XOR_X: 337 /* A ^= X */ 338 *insn = BPF_ALU32_REG(BPF_XOR, BPF_REG_A, BPF_REG_X); 339 break; 340 341 default: 342 /* This is just a dummy call to avoid letting the compiler 343 * evict __bpf_call_base() as an optimization. Placed here 344 * where no-one bothers. 345 */ 346 BUG_ON(__bpf_call_base(0, 0, 0, 0, 0) != 0); 347 return false; 348 } 349 350 *insnp = insn; 351 return true; 352 } 353 354 /** 355 * bpf_convert_filter - convert filter program 356 * @prog: the user passed filter program 357 * @len: the length of the user passed filter program 358 * @new_prog: allocated 'struct bpf_prog' or NULL 359 * @new_len: pointer to store length of converted program 360 * 361 * Remap 'sock_filter' style classic BPF (cBPF) instruction set to 'bpf_insn' 362 * style extended BPF (eBPF). 363 * Conversion workflow: 364 * 365 * 1) First pass for calculating the new program length: 366 * bpf_convert_filter(old_prog, old_len, NULL, &new_len) 367 * 368 * 2) 2nd pass to remap in two passes: 1st pass finds new 369 * jump offsets, 2nd pass remapping: 370 * bpf_convert_filter(old_prog, old_len, new_prog, &new_len); 371 */ 372 static int bpf_convert_filter(struct sock_filter *prog, int len, 373 struct bpf_prog *new_prog, int *new_len) 374 { 375 int new_flen = 0, pass = 0, target, i, stack_off; 376 struct bpf_insn *new_insn, *first_insn = NULL; 377 struct sock_filter *fp; 378 int *addrs = NULL; 379 u8 bpf_src; 380 381 BUILD_BUG_ON(BPF_MEMWORDS * sizeof(u32) > MAX_BPF_STACK); 382 BUILD_BUG_ON(BPF_REG_FP + 1 != MAX_BPF_REG); 383 384 if (len <= 0 || len > BPF_MAXINSNS) 385 return -EINVAL; 386 387 if (new_prog) { 388 first_insn = new_prog->insnsi; 389 addrs = kcalloc(len, sizeof(*addrs), 390 GFP_KERNEL | __GFP_NOWARN); 391 if (!addrs) 392 return -ENOMEM; 393 } 394 395 do_pass: 396 new_insn = first_insn; 397 fp = prog; 398 399 /* Classic BPF related prologue emission. */ 400 if (new_prog) { 401 /* Classic BPF expects A and X to be reset first. These need 402 * to be guaranteed to be the first two instructions. 403 */ 404 *new_insn++ = BPF_ALU32_REG(BPF_XOR, BPF_REG_A, BPF_REG_A); 405 *new_insn++ = BPF_ALU32_REG(BPF_XOR, BPF_REG_X, BPF_REG_X); 406 407 /* All programs must keep CTX in callee saved BPF_REG_CTX. 408 * In eBPF case it's done by the compiler, here we need to 409 * do this ourself. Initial CTX is present in BPF_REG_ARG1. 410 */ 411 *new_insn++ = BPF_MOV64_REG(BPF_REG_CTX, BPF_REG_ARG1); 412 } else { 413 new_insn += 3; 414 } 415 416 for (i = 0; i < len; fp++, i++) { 417 struct bpf_insn tmp_insns[6] = { }; 418 struct bpf_insn *insn = tmp_insns; 419 420 if (addrs) 421 addrs[i] = new_insn - first_insn; 422 423 switch (fp->code) { 424 /* All arithmetic insns and skb loads map as-is. */ 425 case BPF_ALU | BPF_ADD | BPF_X: 426 case BPF_ALU | BPF_ADD | BPF_K: 427 case BPF_ALU | BPF_SUB | BPF_X: 428 case BPF_ALU | BPF_SUB | BPF_K: 429 case BPF_ALU | BPF_AND | BPF_X: 430 case BPF_ALU | BPF_AND | BPF_K: 431 case BPF_ALU | BPF_OR | BPF_X: 432 case BPF_ALU | BPF_OR | BPF_K: 433 case BPF_ALU | BPF_LSH | BPF_X: 434 case BPF_ALU | BPF_LSH | BPF_K: 435 case BPF_ALU | BPF_RSH | BPF_X: 436 case BPF_ALU | BPF_RSH | BPF_K: 437 case BPF_ALU | BPF_XOR | BPF_X: 438 case BPF_ALU | BPF_XOR | BPF_K: 439 case BPF_ALU | BPF_MUL | BPF_X: 440 case BPF_ALU | BPF_MUL | BPF_K: 441 case BPF_ALU | BPF_DIV | BPF_X: 442 case BPF_ALU | BPF_DIV | BPF_K: 443 case BPF_ALU | BPF_MOD | BPF_X: 444 case BPF_ALU | BPF_MOD | BPF_K: 445 case BPF_ALU | BPF_NEG: 446 case BPF_LD | BPF_ABS | BPF_W: 447 case BPF_LD | BPF_ABS | BPF_H: 448 case BPF_LD | BPF_ABS | BPF_B: 449 case BPF_LD | BPF_IND | BPF_W: 450 case BPF_LD | BPF_IND | BPF_H: 451 case BPF_LD | BPF_IND | BPF_B: 452 /* Check for overloaded BPF extension and 453 * directly convert it if found, otherwise 454 * just move on with mapping. 455 */ 456 if (BPF_CLASS(fp->code) == BPF_LD && 457 BPF_MODE(fp->code) == BPF_ABS && 458 convert_bpf_extensions(fp, &insn)) 459 break; 460 461 if (fp->code == (BPF_ALU | BPF_DIV | BPF_X) || 462 fp->code == (BPF_ALU | BPF_MOD | BPF_X)) { 463 *insn++ = BPF_MOV32_REG(BPF_REG_X, BPF_REG_X); 464 /* Error with exception code on div/mod by 0. 465 * For cBPF programs, this was always return 0. 466 */ 467 *insn++ = BPF_JMP_IMM(BPF_JNE, BPF_REG_X, 0, 2); 468 *insn++ = BPF_ALU32_REG(BPF_XOR, BPF_REG_A, BPF_REG_A); 469 *insn++ = BPF_EXIT_INSN(); 470 } 471 472 *insn = BPF_RAW_INSN(fp->code, BPF_REG_A, BPF_REG_X, 0, fp->k); 473 break; 474 475 /* Jump transformation cannot use BPF block macros 476 * everywhere as offset calculation and target updates 477 * require a bit more work than the rest, i.e. jump 478 * opcodes map as-is, but offsets need adjustment. 479 */ 480 481 #define BPF_EMIT_JMP \ 482 do { \ 483 if (target >= len || target < 0) \ 484 goto err; \ 485 insn->off = addrs ? addrs[target] - addrs[i] - 1 : 0; \ 486 /* Adjust pc relative offset for 2nd or 3rd insn. */ \ 487 insn->off -= insn - tmp_insns; \ 488 } while (0) 489 490 case BPF_JMP | BPF_JA: 491 target = i + fp->k + 1; 492 insn->code = fp->code; 493 BPF_EMIT_JMP; 494 break; 495 496 case BPF_JMP | BPF_JEQ | BPF_K: 497 case BPF_JMP | BPF_JEQ | BPF_X: 498 case BPF_JMP | BPF_JSET | BPF_K: 499 case BPF_JMP | BPF_JSET | BPF_X: 500 case BPF_JMP | BPF_JGT | BPF_K: 501 case BPF_JMP | BPF_JGT | BPF_X: 502 case BPF_JMP | BPF_JGE | BPF_K: 503 case BPF_JMP | BPF_JGE | BPF_X: 504 if (BPF_SRC(fp->code) == BPF_K && (int) fp->k < 0) { 505 /* BPF immediates are signed, zero extend 506 * immediate into tmp register and use it 507 * in compare insn. 508 */ 509 *insn++ = BPF_MOV32_IMM(BPF_REG_TMP, fp->k); 510 511 insn->dst_reg = BPF_REG_A; 512 insn->src_reg = BPF_REG_TMP; 513 bpf_src = BPF_X; 514 } else { 515 insn->dst_reg = BPF_REG_A; 516 insn->imm = fp->k; 517 bpf_src = BPF_SRC(fp->code); 518 insn->src_reg = bpf_src == BPF_X ? BPF_REG_X : 0; 519 } 520 521 /* Common case where 'jump_false' is next insn. */ 522 if (fp->jf == 0) { 523 insn->code = BPF_JMP | BPF_OP(fp->code) | bpf_src; 524 target = i + fp->jt + 1; 525 BPF_EMIT_JMP; 526 break; 527 } 528 529 /* Convert some jumps when 'jump_true' is next insn. */ 530 if (fp->jt == 0) { 531 switch (BPF_OP(fp->code)) { 532 case BPF_JEQ: 533 insn->code = BPF_JMP | BPF_JNE | bpf_src; 534 break; 535 case BPF_JGT: 536 insn->code = BPF_JMP | BPF_JLE | bpf_src; 537 break; 538 case BPF_JGE: 539 insn->code = BPF_JMP | BPF_JLT | bpf_src; 540 break; 541 default: 542 goto jmp_rest; 543 } 544 545 target = i + fp->jf + 1; 546 BPF_EMIT_JMP; 547 break; 548 } 549 jmp_rest: 550 /* Other jumps are mapped into two insns: Jxx and JA. */ 551 target = i + fp->jt + 1; 552 insn->code = BPF_JMP | BPF_OP(fp->code) | bpf_src; 553 BPF_EMIT_JMP; 554 insn++; 555 556 insn->code = BPF_JMP | BPF_JA; 557 target = i + fp->jf + 1; 558 BPF_EMIT_JMP; 559 break; 560 561 /* ldxb 4 * ([14] & 0xf) is remaped into 6 insns. */ 562 case BPF_LDX | BPF_MSH | BPF_B: 563 /* tmp = A */ 564 *insn++ = BPF_MOV64_REG(BPF_REG_TMP, BPF_REG_A); 565 /* A = BPF_R0 = *(u8 *) (skb->data + K) */ 566 *insn++ = BPF_LD_ABS(BPF_B, fp->k); 567 /* A &= 0xf */ 568 *insn++ = BPF_ALU32_IMM(BPF_AND, BPF_REG_A, 0xf); 569 /* A <<= 2 */ 570 *insn++ = BPF_ALU32_IMM(BPF_LSH, BPF_REG_A, 2); 571 /* X = A */ 572 *insn++ = BPF_MOV64_REG(BPF_REG_X, BPF_REG_A); 573 /* A = tmp */ 574 *insn = BPF_MOV64_REG(BPF_REG_A, BPF_REG_TMP); 575 break; 576 577 /* RET_K is remaped into 2 insns. RET_A case doesn't need an 578 * extra mov as BPF_REG_0 is already mapped into BPF_REG_A. 579 */ 580 case BPF_RET | BPF_A: 581 case BPF_RET | BPF_K: 582 if (BPF_RVAL(fp->code) == BPF_K) 583 *insn++ = BPF_MOV32_RAW(BPF_K, BPF_REG_0, 584 0, fp->k); 585 *insn = BPF_EXIT_INSN(); 586 break; 587 588 /* Store to stack. */ 589 case BPF_ST: 590 case BPF_STX: 591 stack_off = fp->k * 4 + 4; 592 *insn = BPF_STX_MEM(BPF_W, BPF_REG_FP, BPF_CLASS(fp->code) == 593 BPF_ST ? BPF_REG_A : BPF_REG_X, 594 -stack_off); 595 /* check_load_and_stores() verifies that classic BPF can 596 * load from stack only after write, so tracking 597 * stack_depth for ST|STX insns is enough 598 */ 599 if (new_prog && new_prog->aux->stack_depth < stack_off) 600 new_prog->aux->stack_depth = stack_off; 601 break; 602 603 /* Load from stack. */ 604 case BPF_LD | BPF_MEM: 605 case BPF_LDX | BPF_MEM: 606 stack_off = fp->k * 4 + 4; 607 *insn = BPF_LDX_MEM(BPF_W, BPF_CLASS(fp->code) == BPF_LD ? 608 BPF_REG_A : BPF_REG_X, BPF_REG_FP, 609 -stack_off); 610 break; 611 612 /* A = K or X = K */ 613 case BPF_LD | BPF_IMM: 614 case BPF_LDX | BPF_IMM: 615 *insn = BPF_MOV32_IMM(BPF_CLASS(fp->code) == BPF_LD ? 616 BPF_REG_A : BPF_REG_X, fp->k); 617 break; 618 619 /* X = A */ 620 case BPF_MISC | BPF_TAX: 621 *insn = BPF_MOV64_REG(BPF_REG_X, BPF_REG_A); 622 break; 623 624 /* A = X */ 625 case BPF_MISC | BPF_TXA: 626 *insn = BPF_MOV64_REG(BPF_REG_A, BPF_REG_X); 627 break; 628 629 /* A = skb->len or X = skb->len */ 630 case BPF_LD | BPF_W | BPF_LEN: 631 case BPF_LDX | BPF_W | BPF_LEN: 632 *insn = BPF_LDX_MEM(BPF_W, BPF_CLASS(fp->code) == BPF_LD ? 633 BPF_REG_A : BPF_REG_X, BPF_REG_CTX, 634 offsetof(struct sk_buff, len)); 635 break; 636 637 /* Access seccomp_data fields. */ 638 case BPF_LDX | BPF_ABS | BPF_W: 639 /* A = *(u32 *) (ctx + K) */ 640 *insn = BPF_LDX_MEM(BPF_W, BPF_REG_A, BPF_REG_CTX, fp->k); 641 break; 642 643 /* Unknown instruction. */ 644 default: 645 goto err; 646 } 647 648 insn++; 649 if (new_prog) 650 memcpy(new_insn, tmp_insns, 651 sizeof(*insn) * (insn - tmp_insns)); 652 new_insn += insn - tmp_insns; 653 } 654 655 if (!new_prog) { 656 /* Only calculating new length. */ 657 *new_len = new_insn - first_insn; 658 return 0; 659 } 660 661 pass++; 662 if (new_flen != new_insn - first_insn) { 663 new_flen = new_insn - first_insn; 664 if (pass > 2) 665 goto err; 666 goto do_pass; 667 } 668 669 kfree(addrs); 670 BUG_ON(*new_len != new_flen); 671 return 0; 672 err: 673 kfree(addrs); 674 return -EINVAL; 675 } 676 677 /* Security: 678 * 679 * As we dont want to clear mem[] array for each packet going through 680 * __bpf_prog_run(), we check that filter loaded by user never try to read 681 * a cell if not previously written, and we check all branches to be sure 682 * a malicious user doesn't try to abuse us. 683 */ 684 static int check_load_and_stores(const struct sock_filter *filter, int flen) 685 { 686 u16 *masks, memvalid = 0; /* One bit per cell, 16 cells */ 687 int pc, ret = 0; 688 689 BUILD_BUG_ON(BPF_MEMWORDS > 16); 690 691 masks = kmalloc_array(flen, sizeof(*masks), GFP_KERNEL); 692 if (!masks) 693 return -ENOMEM; 694 695 memset(masks, 0xff, flen * sizeof(*masks)); 696 697 for (pc = 0; pc < flen; pc++) { 698 memvalid &= masks[pc]; 699 700 switch (filter[pc].code) { 701 case BPF_ST: 702 case BPF_STX: 703 memvalid |= (1 << filter[pc].k); 704 break; 705 case BPF_LD | BPF_MEM: 706 case BPF_LDX | BPF_MEM: 707 if (!(memvalid & (1 << filter[pc].k))) { 708 ret = -EINVAL; 709 goto error; 710 } 711 break; 712 case BPF_JMP | BPF_JA: 713 /* A jump must set masks on target */ 714 masks[pc + 1 + filter[pc].k] &= memvalid; 715 memvalid = ~0; 716 break; 717 case BPF_JMP | BPF_JEQ | BPF_K: 718 case BPF_JMP | BPF_JEQ | BPF_X: 719 case BPF_JMP | BPF_JGE | BPF_K: 720 case BPF_JMP | BPF_JGE | BPF_X: 721 case BPF_JMP | BPF_JGT | BPF_K: 722 case BPF_JMP | BPF_JGT | BPF_X: 723 case BPF_JMP | BPF_JSET | BPF_K: 724 case BPF_JMP | BPF_JSET | BPF_X: 725 /* A jump must set masks on targets */ 726 masks[pc + 1 + filter[pc].jt] &= memvalid; 727 masks[pc + 1 + filter[pc].jf] &= memvalid; 728 memvalid = ~0; 729 break; 730 } 731 } 732 error: 733 kfree(masks); 734 return ret; 735 } 736 737 static bool chk_code_allowed(u16 code_to_probe) 738 { 739 static const bool codes[] = { 740 /* 32 bit ALU operations */ 741 [BPF_ALU | BPF_ADD | BPF_K] = true, 742 [BPF_ALU | BPF_ADD | BPF_X] = true, 743 [BPF_ALU | BPF_SUB | BPF_K] = true, 744 [BPF_ALU | BPF_SUB | BPF_X] = true, 745 [BPF_ALU | BPF_MUL | BPF_K] = true, 746 [BPF_ALU | BPF_MUL | BPF_X] = true, 747 [BPF_ALU | BPF_DIV | BPF_K] = true, 748 [BPF_ALU | BPF_DIV | BPF_X] = true, 749 [BPF_ALU | BPF_MOD | BPF_K] = true, 750 [BPF_ALU | BPF_MOD | BPF_X] = true, 751 [BPF_ALU | BPF_AND | BPF_K] = true, 752 [BPF_ALU | BPF_AND | BPF_X] = true, 753 [BPF_ALU | BPF_OR | BPF_K] = true, 754 [BPF_ALU | BPF_OR | BPF_X] = true, 755 [BPF_ALU | BPF_XOR | BPF_K] = true, 756 [BPF_ALU | BPF_XOR | BPF_X] = true, 757 [BPF_ALU | BPF_LSH | BPF_K] = true, 758 [BPF_ALU | BPF_LSH | BPF_X] = true, 759 [BPF_ALU | BPF_RSH | BPF_K] = true, 760 [BPF_ALU | BPF_RSH | BPF_X] = true, 761 [BPF_ALU | BPF_NEG] = true, 762 /* Load instructions */ 763 [BPF_LD | BPF_W | BPF_ABS] = true, 764 [BPF_LD | BPF_H | BPF_ABS] = true, 765 [BPF_LD | BPF_B | BPF_ABS] = true, 766 [BPF_LD | BPF_W | BPF_LEN] = true, 767 [BPF_LD | BPF_W | BPF_IND] = true, 768 [BPF_LD | BPF_H | BPF_IND] = true, 769 [BPF_LD | BPF_B | BPF_IND] = true, 770 [BPF_LD | BPF_IMM] = true, 771 [BPF_LD | BPF_MEM] = true, 772 [BPF_LDX | BPF_W | BPF_LEN] = true, 773 [BPF_LDX | BPF_B | BPF_MSH] = true, 774 [BPF_LDX | BPF_IMM] = true, 775 [BPF_LDX | BPF_MEM] = true, 776 /* Store instructions */ 777 [BPF_ST] = true, 778 [BPF_STX] = true, 779 /* Misc instructions */ 780 [BPF_MISC | BPF_TAX] = true, 781 [BPF_MISC | BPF_TXA] = true, 782 /* Return instructions */ 783 [BPF_RET | BPF_K] = true, 784 [BPF_RET | BPF_A] = true, 785 /* Jump instructions */ 786 [BPF_JMP | BPF_JA] = true, 787 [BPF_JMP | BPF_JEQ | BPF_K] = true, 788 [BPF_JMP | BPF_JEQ | BPF_X] = true, 789 [BPF_JMP | BPF_JGE | BPF_K] = true, 790 [BPF_JMP | BPF_JGE | BPF_X] = true, 791 [BPF_JMP | BPF_JGT | BPF_K] = true, 792 [BPF_JMP | BPF_JGT | BPF_X] = true, 793 [BPF_JMP | BPF_JSET | BPF_K] = true, 794 [BPF_JMP | BPF_JSET | BPF_X] = true, 795 }; 796 797 if (code_to_probe >= ARRAY_SIZE(codes)) 798 return false; 799 800 return codes[code_to_probe]; 801 } 802 803 static bool bpf_check_basics_ok(const struct sock_filter *filter, 804 unsigned int flen) 805 { 806 if (filter == NULL) 807 return false; 808 if (flen == 0 || flen > BPF_MAXINSNS) 809 return false; 810 811 return true; 812 } 813 814 /** 815 * bpf_check_classic - verify socket filter code 816 * @filter: filter to verify 817 * @flen: length of filter 818 * 819 * Check the user's filter code. If we let some ugly 820 * filter code slip through kaboom! The filter must contain 821 * no references or jumps that are out of range, no illegal 822 * instructions, and must end with a RET instruction. 823 * 824 * All jumps are forward as they are not signed. 825 * 826 * Returns 0 if the rule set is legal or -EINVAL if not. 827 */ 828 static int bpf_check_classic(const struct sock_filter *filter, 829 unsigned int flen) 830 { 831 bool anc_found; 832 int pc; 833 834 /* Check the filter code now */ 835 for (pc = 0; pc < flen; pc++) { 836 const struct sock_filter *ftest = &filter[pc]; 837 838 /* May we actually operate on this code? */ 839 if (!chk_code_allowed(ftest->code)) 840 return -EINVAL; 841 842 /* Some instructions need special checks */ 843 switch (ftest->code) { 844 case BPF_ALU | BPF_DIV | BPF_K: 845 case BPF_ALU | BPF_MOD | BPF_K: 846 /* Check for division by zero */ 847 if (ftest->k == 0) 848 return -EINVAL; 849 break; 850 case BPF_ALU | BPF_LSH | BPF_K: 851 case BPF_ALU | BPF_RSH | BPF_K: 852 if (ftest->k >= 32) 853 return -EINVAL; 854 break; 855 case BPF_LD | BPF_MEM: 856 case BPF_LDX | BPF_MEM: 857 case BPF_ST: 858 case BPF_STX: 859 /* Check for invalid memory addresses */ 860 if (ftest->k >= BPF_MEMWORDS) 861 return -EINVAL; 862 break; 863 case BPF_JMP | BPF_JA: 864 /* Note, the large ftest->k might cause loops. 865 * Compare this with conditional jumps below, 866 * where offsets are limited. --ANK (981016) 867 */ 868 if (ftest->k >= (unsigned int)(flen - pc - 1)) 869 return -EINVAL; 870 break; 871 case BPF_JMP | BPF_JEQ | BPF_K: 872 case BPF_JMP | BPF_JEQ | BPF_X: 873 case BPF_JMP | BPF_JGE | BPF_K: 874 case BPF_JMP | BPF_JGE | BPF_X: 875 case BPF_JMP | BPF_JGT | BPF_K: 876 case BPF_JMP | BPF_JGT | BPF_X: 877 case BPF_JMP | BPF_JSET | BPF_K: 878 case BPF_JMP | BPF_JSET | BPF_X: 879 /* Both conditionals must be safe */ 880 if (pc + ftest->jt + 1 >= flen || 881 pc + ftest->jf + 1 >= flen) 882 return -EINVAL; 883 break; 884 case BPF_LD | BPF_W | BPF_ABS: 885 case BPF_LD | BPF_H | BPF_ABS: 886 case BPF_LD | BPF_B | BPF_ABS: 887 anc_found = false; 888 if (bpf_anc_helper(ftest) & BPF_ANC) 889 anc_found = true; 890 /* Ancillary operation unknown or unsupported */ 891 if (anc_found == false && ftest->k >= SKF_AD_OFF) 892 return -EINVAL; 893 } 894 } 895 896 /* Last instruction must be a RET code */ 897 switch (filter[flen - 1].code) { 898 case BPF_RET | BPF_K: 899 case BPF_RET | BPF_A: 900 return check_load_and_stores(filter, flen); 901 } 902 903 return -EINVAL; 904 } 905 906 static int bpf_prog_store_orig_filter(struct bpf_prog *fp, 907 const struct sock_fprog *fprog) 908 { 909 unsigned int fsize = bpf_classic_proglen(fprog); 910 struct sock_fprog_kern *fkprog; 911 912 fp->orig_prog = kmalloc(sizeof(*fkprog), GFP_KERNEL); 913 if (!fp->orig_prog) 914 return -ENOMEM; 915 916 fkprog = fp->orig_prog; 917 fkprog->len = fprog->len; 918 919 fkprog->filter = kmemdup(fp->insns, fsize, 920 GFP_KERNEL | __GFP_NOWARN); 921 if (!fkprog->filter) { 922 kfree(fp->orig_prog); 923 return -ENOMEM; 924 } 925 926 return 0; 927 } 928 929 static void bpf_release_orig_filter(struct bpf_prog *fp) 930 { 931 struct sock_fprog_kern *fprog = fp->orig_prog; 932 933 if (fprog) { 934 kfree(fprog->filter); 935 kfree(fprog); 936 } 937 } 938 939 static void __bpf_prog_release(struct bpf_prog *prog) 940 { 941 if (prog->type == BPF_PROG_TYPE_SOCKET_FILTER) { 942 bpf_prog_put(prog); 943 } else { 944 bpf_release_orig_filter(prog); 945 bpf_prog_free(prog); 946 } 947 } 948 949 static void __sk_filter_release(struct sk_filter *fp) 950 { 951 __bpf_prog_release(fp->prog); 952 kfree(fp); 953 } 954 955 /** 956 * sk_filter_release_rcu - Release a socket filter by rcu_head 957 * @rcu: rcu_head that contains the sk_filter to free 958 */ 959 static void sk_filter_release_rcu(struct rcu_head *rcu) 960 { 961 struct sk_filter *fp = container_of(rcu, struct sk_filter, rcu); 962 963 __sk_filter_release(fp); 964 } 965 966 /** 967 * sk_filter_release - release a socket filter 968 * @fp: filter to remove 969 * 970 * Remove a filter from a socket and release its resources. 971 */ 972 static void sk_filter_release(struct sk_filter *fp) 973 { 974 if (refcount_dec_and_test(&fp->refcnt)) 975 call_rcu(&fp->rcu, sk_filter_release_rcu); 976 } 977 978 void sk_filter_uncharge(struct sock *sk, struct sk_filter *fp) 979 { 980 u32 filter_size = bpf_prog_size(fp->prog->len); 981 982 atomic_sub(filter_size, &sk->sk_omem_alloc); 983 sk_filter_release(fp); 984 } 985 986 /* try to charge the socket memory if there is space available 987 * return true on success 988 */ 989 static bool __sk_filter_charge(struct sock *sk, struct sk_filter *fp) 990 { 991 u32 filter_size = bpf_prog_size(fp->prog->len); 992 993 /* same check as in sock_kmalloc() */ 994 if (filter_size <= sysctl_optmem_max && 995 atomic_read(&sk->sk_omem_alloc) + filter_size < sysctl_optmem_max) { 996 atomic_add(filter_size, &sk->sk_omem_alloc); 997 return true; 998 } 999 return false; 1000 } 1001 1002 bool sk_filter_charge(struct sock *sk, struct sk_filter *fp) 1003 { 1004 if (!refcount_inc_not_zero(&fp->refcnt)) 1005 return false; 1006 1007 if (!__sk_filter_charge(sk, fp)) { 1008 sk_filter_release(fp); 1009 return false; 1010 } 1011 return true; 1012 } 1013 1014 static struct bpf_prog *bpf_migrate_filter(struct bpf_prog *fp) 1015 { 1016 struct sock_filter *old_prog; 1017 struct bpf_prog *old_fp; 1018 int err, new_len, old_len = fp->len; 1019 1020 /* We are free to overwrite insns et al right here as it 1021 * won't be used at this point in time anymore internally 1022 * after the migration to the internal BPF instruction 1023 * representation. 1024 */ 1025 BUILD_BUG_ON(sizeof(struct sock_filter) != 1026 sizeof(struct bpf_insn)); 1027 1028 /* Conversion cannot happen on overlapping memory areas, 1029 * so we need to keep the user BPF around until the 2nd 1030 * pass. At this time, the user BPF is stored in fp->insns. 1031 */ 1032 old_prog = kmemdup(fp->insns, old_len * sizeof(struct sock_filter), 1033 GFP_KERNEL | __GFP_NOWARN); 1034 if (!old_prog) { 1035 err = -ENOMEM; 1036 goto out_err; 1037 } 1038 1039 /* 1st pass: calculate the new program length. */ 1040 err = bpf_convert_filter(old_prog, old_len, NULL, &new_len); 1041 if (err) 1042 goto out_err_free; 1043 1044 /* Expand fp for appending the new filter representation. */ 1045 old_fp = fp; 1046 fp = bpf_prog_realloc(old_fp, bpf_prog_size(new_len), 0); 1047 if (!fp) { 1048 /* The old_fp is still around in case we couldn't 1049 * allocate new memory, so uncharge on that one. 1050 */ 1051 fp = old_fp; 1052 err = -ENOMEM; 1053 goto out_err_free; 1054 } 1055 1056 fp->len = new_len; 1057 1058 /* 2nd pass: remap sock_filter insns into bpf_insn insns. */ 1059 err = bpf_convert_filter(old_prog, old_len, fp, &new_len); 1060 if (err) 1061 /* 2nd bpf_convert_filter() can fail only if it fails 1062 * to allocate memory, remapping must succeed. Note, 1063 * that at this time old_fp has already been released 1064 * by krealloc(). 1065 */ 1066 goto out_err_free; 1067 1068 fp = bpf_prog_select_runtime(fp, &err); 1069 if (err) 1070 goto out_err_free; 1071 1072 kfree(old_prog); 1073 return fp; 1074 1075 out_err_free: 1076 kfree(old_prog); 1077 out_err: 1078 __bpf_prog_release(fp); 1079 return ERR_PTR(err); 1080 } 1081 1082 static struct bpf_prog *bpf_prepare_filter(struct bpf_prog *fp, 1083 bpf_aux_classic_check_t trans) 1084 { 1085 int err; 1086 1087 fp->bpf_func = NULL; 1088 fp->jited = 0; 1089 1090 err = bpf_check_classic(fp->insns, fp->len); 1091 if (err) { 1092 __bpf_prog_release(fp); 1093 return ERR_PTR(err); 1094 } 1095 1096 /* There might be additional checks and transformations 1097 * needed on classic filters, f.e. in case of seccomp. 1098 */ 1099 if (trans) { 1100 err = trans(fp->insns, fp->len); 1101 if (err) { 1102 __bpf_prog_release(fp); 1103 return ERR_PTR(err); 1104 } 1105 } 1106 1107 /* Probe if we can JIT compile the filter and if so, do 1108 * the compilation of the filter. 1109 */ 1110 bpf_jit_compile(fp); 1111 1112 /* JIT compiler couldn't process this filter, so do the 1113 * internal BPF translation for the optimized interpreter. 1114 */ 1115 if (!fp->jited) 1116 fp = bpf_migrate_filter(fp); 1117 1118 return fp; 1119 } 1120 1121 /** 1122 * bpf_prog_create - create an unattached filter 1123 * @pfp: the unattached filter that is created 1124 * @fprog: the filter program 1125 * 1126 * Create a filter independent of any socket. We first run some 1127 * sanity checks on it to make sure it does not explode on us later. 1128 * If an error occurs or there is insufficient memory for the filter 1129 * a negative errno code is returned. On success the return is zero. 1130 */ 1131 int bpf_prog_create(struct bpf_prog **pfp, struct sock_fprog_kern *fprog) 1132 { 1133 unsigned int fsize = bpf_classic_proglen(fprog); 1134 struct bpf_prog *fp; 1135 1136 /* Make sure new filter is there and in the right amounts. */ 1137 if (!bpf_check_basics_ok(fprog->filter, fprog->len)) 1138 return -EINVAL; 1139 1140 fp = bpf_prog_alloc(bpf_prog_size(fprog->len), 0); 1141 if (!fp) 1142 return -ENOMEM; 1143 1144 memcpy(fp->insns, fprog->filter, fsize); 1145 1146 fp->len = fprog->len; 1147 /* Since unattached filters are not copied back to user 1148 * space through sk_get_filter(), we do not need to hold 1149 * a copy here, and can spare us the work. 1150 */ 1151 fp->orig_prog = NULL; 1152 1153 /* bpf_prepare_filter() already takes care of freeing 1154 * memory in case something goes wrong. 1155 */ 1156 fp = bpf_prepare_filter(fp, NULL); 1157 if (IS_ERR(fp)) 1158 return PTR_ERR(fp); 1159 1160 *pfp = fp; 1161 return 0; 1162 } 1163 EXPORT_SYMBOL_GPL(bpf_prog_create); 1164 1165 /** 1166 * bpf_prog_create_from_user - create an unattached filter from user buffer 1167 * @pfp: the unattached filter that is created 1168 * @fprog: the filter program 1169 * @trans: post-classic verifier transformation handler 1170 * @save_orig: save classic BPF program 1171 * 1172 * This function effectively does the same as bpf_prog_create(), only 1173 * that it builds up its insns buffer from user space provided buffer. 1174 * It also allows for passing a bpf_aux_classic_check_t handler. 1175 */ 1176 int bpf_prog_create_from_user(struct bpf_prog **pfp, struct sock_fprog *fprog, 1177 bpf_aux_classic_check_t trans, bool save_orig) 1178 { 1179 unsigned int fsize = bpf_classic_proglen(fprog); 1180 struct bpf_prog *fp; 1181 int err; 1182 1183 /* Make sure new filter is there and in the right amounts. */ 1184 if (!bpf_check_basics_ok(fprog->filter, fprog->len)) 1185 return -EINVAL; 1186 1187 fp = bpf_prog_alloc(bpf_prog_size(fprog->len), 0); 1188 if (!fp) 1189 return -ENOMEM; 1190 1191 if (copy_from_user(fp->insns, fprog->filter, fsize)) { 1192 __bpf_prog_free(fp); 1193 return -EFAULT; 1194 } 1195 1196 fp->len = fprog->len; 1197 fp->orig_prog = NULL; 1198 1199 if (save_orig) { 1200 err = bpf_prog_store_orig_filter(fp, fprog); 1201 if (err) { 1202 __bpf_prog_free(fp); 1203 return -ENOMEM; 1204 } 1205 } 1206 1207 /* bpf_prepare_filter() already takes care of freeing 1208 * memory in case something goes wrong. 1209 */ 1210 fp = bpf_prepare_filter(fp, trans); 1211 if (IS_ERR(fp)) 1212 return PTR_ERR(fp); 1213 1214 *pfp = fp; 1215 return 0; 1216 } 1217 EXPORT_SYMBOL_GPL(bpf_prog_create_from_user); 1218 1219 void bpf_prog_destroy(struct bpf_prog *fp) 1220 { 1221 __bpf_prog_release(fp); 1222 } 1223 EXPORT_SYMBOL_GPL(bpf_prog_destroy); 1224 1225 static int __sk_attach_prog(struct bpf_prog *prog, struct sock *sk) 1226 { 1227 struct sk_filter *fp, *old_fp; 1228 1229 fp = kmalloc(sizeof(*fp), GFP_KERNEL); 1230 if (!fp) 1231 return -ENOMEM; 1232 1233 fp->prog = prog; 1234 1235 if (!__sk_filter_charge(sk, fp)) { 1236 kfree(fp); 1237 return -ENOMEM; 1238 } 1239 refcount_set(&fp->refcnt, 1); 1240 1241 old_fp = rcu_dereference_protected(sk->sk_filter, 1242 lockdep_sock_is_held(sk)); 1243 rcu_assign_pointer(sk->sk_filter, fp); 1244 1245 if (old_fp) 1246 sk_filter_uncharge(sk, old_fp); 1247 1248 return 0; 1249 } 1250 1251 static int __reuseport_attach_prog(struct bpf_prog *prog, struct sock *sk) 1252 { 1253 struct bpf_prog *old_prog; 1254 int err; 1255 1256 if (bpf_prog_size(prog->len) > sysctl_optmem_max) 1257 return -ENOMEM; 1258 1259 if (sk_unhashed(sk) && sk->sk_reuseport) { 1260 err = reuseport_alloc(sk); 1261 if (err) 1262 return err; 1263 } else if (!rcu_access_pointer(sk->sk_reuseport_cb)) { 1264 /* The socket wasn't bound with SO_REUSEPORT */ 1265 return -EINVAL; 1266 } 1267 1268 old_prog = reuseport_attach_prog(sk, prog); 1269 if (old_prog) 1270 bpf_prog_destroy(old_prog); 1271 1272 return 0; 1273 } 1274 1275 static 1276 struct bpf_prog *__get_filter(struct sock_fprog *fprog, struct sock *sk) 1277 { 1278 unsigned int fsize = bpf_classic_proglen(fprog); 1279 struct bpf_prog *prog; 1280 int err; 1281 1282 if (sock_flag(sk, SOCK_FILTER_LOCKED)) 1283 return ERR_PTR(-EPERM); 1284 1285 /* Make sure new filter is there and in the right amounts. */ 1286 if (!bpf_check_basics_ok(fprog->filter, fprog->len)) 1287 return ERR_PTR(-EINVAL); 1288 1289 prog = bpf_prog_alloc(bpf_prog_size(fprog->len), 0); 1290 if (!prog) 1291 return ERR_PTR(-ENOMEM); 1292 1293 if (copy_from_user(prog->insns, fprog->filter, fsize)) { 1294 __bpf_prog_free(prog); 1295 return ERR_PTR(-EFAULT); 1296 } 1297 1298 prog->len = fprog->len; 1299 1300 err = bpf_prog_store_orig_filter(prog, fprog); 1301 if (err) { 1302 __bpf_prog_free(prog); 1303 return ERR_PTR(-ENOMEM); 1304 } 1305 1306 /* bpf_prepare_filter() already takes care of freeing 1307 * memory in case something goes wrong. 1308 */ 1309 return bpf_prepare_filter(prog, NULL); 1310 } 1311 1312 /** 1313 * sk_attach_filter - attach a socket filter 1314 * @fprog: the filter program 1315 * @sk: the socket to use 1316 * 1317 * Attach the user's filter code. We first run some sanity checks on 1318 * it to make sure it does not explode on us later. If an error 1319 * occurs or there is insufficient memory for the filter a negative 1320 * errno code is returned. On success the return is zero. 1321 */ 1322 int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk) 1323 { 1324 struct bpf_prog *prog = __get_filter(fprog, sk); 1325 int err; 1326 1327 if (IS_ERR(prog)) 1328 return PTR_ERR(prog); 1329 1330 err = __sk_attach_prog(prog, sk); 1331 if (err < 0) { 1332 __bpf_prog_release(prog); 1333 return err; 1334 } 1335 1336 return 0; 1337 } 1338 EXPORT_SYMBOL_GPL(sk_attach_filter); 1339 1340 int sk_reuseport_attach_filter(struct sock_fprog *fprog, struct sock *sk) 1341 { 1342 struct bpf_prog *prog = __get_filter(fprog, sk); 1343 int err; 1344 1345 if (IS_ERR(prog)) 1346 return PTR_ERR(prog); 1347 1348 err = __reuseport_attach_prog(prog, sk); 1349 if (err < 0) { 1350 __bpf_prog_release(prog); 1351 return err; 1352 } 1353 1354 return 0; 1355 } 1356 1357 static struct bpf_prog *__get_bpf(u32 ufd, struct sock *sk) 1358 { 1359 if (sock_flag(sk, SOCK_FILTER_LOCKED)) 1360 return ERR_PTR(-EPERM); 1361 1362 return bpf_prog_get_type(ufd, BPF_PROG_TYPE_SOCKET_FILTER); 1363 } 1364 1365 int sk_attach_bpf(u32 ufd, struct sock *sk) 1366 { 1367 struct bpf_prog *prog = __get_bpf(ufd, sk); 1368 int err; 1369 1370 if (IS_ERR(prog)) 1371 return PTR_ERR(prog); 1372 1373 err = __sk_attach_prog(prog, sk); 1374 if (err < 0) { 1375 bpf_prog_put(prog); 1376 return err; 1377 } 1378 1379 return 0; 1380 } 1381 1382 int sk_reuseport_attach_bpf(u32 ufd, struct sock *sk) 1383 { 1384 struct bpf_prog *prog = __get_bpf(ufd, sk); 1385 int err; 1386 1387 if (IS_ERR(prog)) 1388 return PTR_ERR(prog); 1389 1390 err = __reuseport_attach_prog(prog, sk); 1391 if (err < 0) { 1392 bpf_prog_put(prog); 1393 return err; 1394 } 1395 1396 return 0; 1397 } 1398 1399 struct bpf_scratchpad { 1400 union { 1401 __be32 diff[MAX_BPF_STACK / sizeof(__be32)]; 1402 u8 buff[MAX_BPF_STACK]; 1403 }; 1404 }; 1405 1406 static DEFINE_PER_CPU(struct bpf_scratchpad, bpf_sp); 1407 1408 static inline int __bpf_try_make_writable(struct sk_buff *skb, 1409 unsigned int write_len) 1410 { 1411 return skb_ensure_writable(skb, write_len); 1412 } 1413 1414 static inline int bpf_try_make_writable(struct sk_buff *skb, 1415 unsigned int write_len) 1416 { 1417 int err = __bpf_try_make_writable(skb, write_len); 1418 1419 bpf_compute_data_pointers(skb); 1420 return err; 1421 } 1422 1423 static int bpf_try_make_head_writable(struct sk_buff *skb) 1424 { 1425 return bpf_try_make_writable(skb, skb_headlen(skb)); 1426 } 1427 1428 static inline void bpf_push_mac_rcsum(struct sk_buff *skb) 1429 { 1430 if (skb_at_tc_ingress(skb)) 1431 skb_postpush_rcsum(skb, skb_mac_header(skb), skb->mac_len); 1432 } 1433 1434 static inline void bpf_pull_mac_rcsum(struct sk_buff *skb) 1435 { 1436 if (skb_at_tc_ingress(skb)) 1437 skb_postpull_rcsum(skb, skb_mac_header(skb), skb->mac_len); 1438 } 1439 1440 BPF_CALL_5(bpf_skb_store_bytes, struct sk_buff *, skb, u32, offset, 1441 const void *, from, u32, len, u64, flags) 1442 { 1443 void *ptr; 1444 1445 if (unlikely(flags & ~(BPF_F_RECOMPUTE_CSUM | BPF_F_INVALIDATE_HASH))) 1446 return -EINVAL; 1447 if (unlikely(offset > 0xffff)) 1448 return -EFAULT; 1449 if (unlikely(bpf_try_make_writable(skb, offset + len))) 1450 return -EFAULT; 1451 1452 ptr = skb->data + offset; 1453 if (flags & BPF_F_RECOMPUTE_CSUM) 1454 __skb_postpull_rcsum(skb, ptr, len, offset); 1455 1456 memcpy(ptr, from, len); 1457 1458 if (flags & BPF_F_RECOMPUTE_CSUM) 1459 __skb_postpush_rcsum(skb, ptr, len, offset); 1460 if (flags & BPF_F_INVALIDATE_HASH) 1461 skb_clear_hash(skb); 1462 1463 return 0; 1464 } 1465 1466 static const struct bpf_func_proto bpf_skb_store_bytes_proto = { 1467 .func = bpf_skb_store_bytes, 1468 .gpl_only = false, 1469 .ret_type = RET_INTEGER, 1470 .arg1_type = ARG_PTR_TO_CTX, 1471 .arg2_type = ARG_ANYTHING, 1472 .arg3_type = ARG_PTR_TO_MEM, 1473 .arg4_type = ARG_CONST_SIZE, 1474 .arg5_type = ARG_ANYTHING, 1475 }; 1476 1477 BPF_CALL_4(bpf_skb_load_bytes, const struct sk_buff *, skb, u32, offset, 1478 void *, to, u32, len) 1479 { 1480 void *ptr; 1481 1482 if (unlikely(offset > 0xffff)) 1483 goto err_clear; 1484 1485 ptr = skb_header_pointer(skb, offset, len, to); 1486 if (unlikely(!ptr)) 1487 goto err_clear; 1488 if (ptr != to) 1489 memcpy(to, ptr, len); 1490 1491 return 0; 1492 err_clear: 1493 memset(to, 0, len); 1494 return -EFAULT; 1495 } 1496 1497 static const struct bpf_func_proto bpf_skb_load_bytes_proto = { 1498 .func = bpf_skb_load_bytes, 1499 .gpl_only = false, 1500 .ret_type = RET_INTEGER, 1501 .arg1_type = ARG_PTR_TO_CTX, 1502 .arg2_type = ARG_ANYTHING, 1503 .arg3_type = ARG_PTR_TO_UNINIT_MEM, 1504 .arg4_type = ARG_CONST_SIZE, 1505 }; 1506 1507 BPF_CALL_2(bpf_skb_pull_data, struct sk_buff *, skb, u32, len) 1508 { 1509 /* Idea is the following: should the needed direct read/write 1510 * test fail during runtime, we can pull in more data and redo 1511 * again, since implicitly, we invalidate previous checks here. 1512 * 1513 * Or, since we know how much we need to make read/writeable, 1514 * this can be done once at the program beginning for direct 1515 * access case. By this we overcome limitations of only current 1516 * headroom being accessible. 1517 */ 1518 return bpf_try_make_writable(skb, len ? : skb_headlen(skb)); 1519 } 1520 1521 static const struct bpf_func_proto bpf_skb_pull_data_proto = { 1522 .func = bpf_skb_pull_data, 1523 .gpl_only = false, 1524 .ret_type = RET_INTEGER, 1525 .arg1_type = ARG_PTR_TO_CTX, 1526 .arg2_type = ARG_ANYTHING, 1527 }; 1528 1529 BPF_CALL_5(bpf_l3_csum_replace, struct sk_buff *, skb, u32, offset, 1530 u64, from, u64, to, u64, flags) 1531 { 1532 __sum16 *ptr; 1533 1534 if (unlikely(flags & ~(BPF_F_HDR_FIELD_MASK))) 1535 return -EINVAL; 1536 if (unlikely(offset > 0xffff || offset & 1)) 1537 return -EFAULT; 1538 if (unlikely(bpf_try_make_writable(skb, offset + sizeof(*ptr)))) 1539 return -EFAULT; 1540 1541 ptr = (__sum16 *)(skb->data + offset); 1542 switch (flags & BPF_F_HDR_FIELD_MASK) { 1543 case 0: 1544 if (unlikely(from != 0)) 1545 return -EINVAL; 1546 1547 csum_replace_by_diff(ptr, to); 1548 break; 1549 case 2: 1550 csum_replace2(ptr, from, to); 1551 break; 1552 case 4: 1553 csum_replace4(ptr, from, to); 1554 break; 1555 default: 1556 return -EINVAL; 1557 } 1558 1559 return 0; 1560 } 1561 1562 static const struct bpf_func_proto bpf_l3_csum_replace_proto = { 1563 .func = bpf_l3_csum_replace, 1564 .gpl_only = false, 1565 .ret_type = RET_INTEGER, 1566 .arg1_type = ARG_PTR_TO_CTX, 1567 .arg2_type = ARG_ANYTHING, 1568 .arg3_type = ARG_ANYTHING, 1569 .arg4_type = ARG_ANYTHING, 1570 .arg5_type = ARG_ANYTHING, 1571 }; 1572 1573 BPF_CALL_5(bpf_l4_csum_replace, struct sk_buff *, skb, u32, offset, 1574 u64, from, u64, to, u64, flags) 1575 { 1576 bool is_pseudo = flags & BPF_F_PSEUDO_HDR; 1577 bool is_mmzero = flags & BPF_F_MARK_MANGLED_0; 1578 bool do_mforce = flags & BPF_F_MARK_ENFORCE; 1579 __sum16 *ptr; 1580 1581 if (unlikely(flags & ~(BPF_F_MARK_MANGLED_0 | BPF_F_MARK_ENFORCE | 1582 BPF_F_PSEUDO_HDR | BPF_F_HDR_FIELD_MASK))) 1583 return -EINVAL; 1584 if (unlikely(offset > 0xffff || offset & 1)) 1585 return -EFAULT; 1586 if (unlikely(bpf_try_make_writable(skb, offset + sizeof(*ptr)))) 1587 return -EFAULT; 1588 1589 ptr = (__sum16 *)(skb->data + offset); 1590 if (is_mmzero && !do_mforce && !*ptr) 1591 return 0; 1592 1593 switch (flags & BPF_F_HDR_FIELD_MASK) { 1594 case 0: 1595 if (unlikely(from != 0)) 1596 return -EINVAL; 1597 1598 inet_proto_csum_replace_by_diff(ptr, skb, to, is_pseudo); 1599 break; 1600 case 2: 1601 inet_proto_csum_replace2(ptr, skb, from, to, is_pseudo); 1602 break; 1603 case 4: 1604 inet_proto_csum_replace4(ptr, skb, from, to, is_pseudo); 1605 break; 1606 default: 1607 return -EINVAL; 1608 } 1609 1610 if (is_mmzero && !*ptr) 1611 *ptr = CSUM_MANGLED_0; 1612 return 0; 1613 } 1614 1615 static const struct bpf_func_proto bpf_l4_csum_replace_proto = { 1616 .func = bpf_l4_csum_replace, 1617 .gpl_only = false, 1618 .ret_type = RET_INTEGER, 1619 .arg1_type = ARG_PTR_TO_CTX, 1620 .arg2_type = ARG_ANYTHING, 1621 .arg3_type = ARG_ANYTHING, 1622 .arg4_type = ARG_ANYTHING, 1623 .arg5_type = ARG_ANYTHING, 1624 }; 1625 1626 BPF_CALL_5(bpf_csum_diff, __be32 *, from, u32, from_size, 1627 __be32 *, to, u32, to_size, __wsum, seed) 1628 { 1629 struct bpf_scratchpad *sp = this_cpu_ptr(&bpf_sp); 1630 u32 diff_size = from_size + to_size; 1631 int i, j = 0; 1632 1633 /* This is quite flexible, some examples: 1634 * 1635 * from_size == 0, to_size > 0, seed := csum --> pushing data 1636 * from_size > 0, to_size == 0, seed := csum --> pulling data 1637 * from_size > 0, to_size > 0, seed := 0 --> diffing data 1638 * 1639 * Even for diffing, from_size and to_size don't need to be equal. 1640 */ 1641 if (unlikely(((from_size | to_size) & (sizeof(__be32) - 1)) || 1642 diff_size > sizeof(sp->diff))) 1643 return -EINVAL; 1644 1645 for (i = 0; i < from_size / sizeof(__be32); i++, j++) 1646 sp->diff[j] = ~from[i]; 1647 for (i = 0; i < to_size / sizeof(__be32); i++, j++) 1648 sp->diff[j] = to[i]; 1649 1650 return csum_partial(sp->diff, diff_size, seed); 1651 } 1652 1653 static const struct bpf_func_proto bpf_csum_diff_proto = { 1654 .func = bpf_csum_diff, 1655 .gpl_only = false, 1656 .pkt_access = true, 1657 .ret_type = RET_INTEGER, 1658 .arg1_type = ARG_PTR_TO_MEM_OR_NULL, 1659 .arg2_type = ARG_CONST_SIZE_OR_ZERO, 1660 .arg3_type = ARG_PTR_TO_MEM_OR_NULL, 1661 .arg4_type = ARG_CONST_SIZE_OR_ZERO, 1662 .arg5_type = ARG_ANYTHING, 1663 }; 1664 1665 BPF_CALL_2(bpf_csum_update, struct sk_buff *, skb, __wsum, csum) 1666 { 1667 /* The interface is to be used in combination with bpf_csum_diff() 1668 * for direct packet writes. csum rotation for alignment as well 1669 * as emulating csum_sub() can be done from the eBPF program. 1670 */ 1671 if (skb->ip_summed == CHECKSUM_COMPLETE) 1672 return (skb->csum = csum_add(skb->csum, csum)); 1673 1674 return -ENOTSUPP; 1675 } 1676 1677 static const struct bpf_func_proto bpf_csum_update_proto = { 1678 .func = bpf_csum_update, 1679 .gpl_only = false, 1680 .ret_type = RET_INTEGER, 1681 .arg1_type = ARG_PTR_TO_CTX, 1682 .arg2_type = ARG_ANYTHING, 1683 }; 1684 1685 static inline int __bpf_rx_skb(struct net_device *dev, struct sk_buff *skb) 1686 { 1687 return dev_forward_skb(dev, skb); 1688 } 1689 1690 static inline int __bpf_rx_skb_no_mac(struct net_device *dev, 1691 struct sk_buff *skb) 1692 { 1693 int ret = ____dev_forward_skb(dev, skb); 1694 1695 if (likely(!ret)) { 1696 skb->dev = dev; 1697 ret = netif_rx(skb); 1698 } 1699 1700 return ret; 1701 } 1702 1703 static inline int __bpf_tx_skb(struct net_device *dev, struct sk_buff *skb) 1704 { 1705 int ret; 1706 1707 if (unlikely(__this_cpu_read(xmit_recursion) > XMIT_RECURSION_LIMIT)) { 1708 net_crit_ratelimited("bpf: recursion limit reached on datapath, buggy bpf program?\n"); 1709 kfree_skb(skb); 1710 return -ENETDOWN; 1711 } 1712 1713 skb->dev = dev; 1714 1715 __this_cpu_inc(xmit_recursion); 1716 ret = dev_queue_xmit(skb); 1717 __this_cpu_dec(xmit_recursion); 1718 1719 return ret; 1720 } 1721 1722 static int __bpf_redirect_no_mac(struct sk_buff *skb, struct net_device *dev, 1723 u32 flags) 1724 { 1725 /* skb->mac_len is not set on normal egress */ 1726 unsigned int mlen = skb->network_header - skb->mac_header; 1727 1728 __skb_pull(skb, mlen); 1729 1730 /* At ingress, the mac header has already been pulled once. 1731 * At egress, skb_pospull_rcsum has to be done in case that 1732 * the skb is originated from ingress (i.e. a forwarded skb) 1733 * to ensure that rcsum starts at net header. 1734 */ 1735 if (!skb_at_tc_ingress(skb)) 1736 skb_postpull_rcsum(skb, skb_mac_header(skb), mlen); 1737 skb_pop_mac_header(skb); 1738 skb_reset_mac_len(skb); 1739 return flags & BPF_F_INGRESS ? 1740 __bpf_rx_skb_no_mac(dev, skb) : __bpf_tx_skb(dev, skb); 1741 } 1742 1743 static int __bpf_redirect_common(struct sk_buff *skb, struct net_device *dev, 1744 u32 flags) 1745 { 1746 /* Verify that a link layer header is carried */ 1747 if (unlikely(skb->mac_header >= skb->network_header)) { 1748 kfree_skb(skb); 1749 return -ERANGE; 1750 } 1751 1752 bpf_push_mac_rcsum(skb); 1753 return flags & BPF_F_INGRESS ? 1754 __bpf_rx_skb(dev, skb) : __bpf_tx_skb(dev, skb); 1755 } 1756 1757 static int __bpf_redirect(struct sk_buff *skb, struct net_device *dev, 1758 u32 flags) 1759 { 1760 if (dev_is_mac_header_xmit(dev)) 1761 return __bpf_redirect_common(skb, dev, flags); 1762 else 1763 return __bpf_redirect_no_mac(skb, dev, flags); 1764 } 1765 1766 BPF_CALL_3(bpf_clone_redirect, struct sk_buff *, skb, u32, ifindex, u64, flags) 1767 { 1768 struct net_device *dev; 1769 struct sk_buff *clone; 1770 int ret; 1771 1772 if (unlikely(flags & ~(BPF_F_INGRESS))) 1773 return -EINVAL; 1774 1775 dev = dev_get_by_index_rcu(dev_net(skb->dev), ifindex); 1776 if (unlikely(!dev)) 1777 return -EINVAL; 1778 1779 clone = skb_clone(skb, GFP_ATOMIC); 1780 if (unlikely(!clone)) 1781 return -ENOMEM; 1782 1783 /* For direct write, we need to keep the invariant that the skbs 1784 * we're dealing with need to be uncloned. Should uncloning fail 1785 * here, we need to free the just generated clone to unclone once 1786 * again. 1787 */ 1788 ret = bpf_try_make_head_writable(skb); 1789 if (unlikely(ret)) { 1790 kfree_skb(clone); 1791 return -ENOMEM; 1792 } 1793 1794 return __bpf_redirect(clone, dev, flags); 1795 } 1796 1797 static const struct bpf_func_proto bpf_clone_redirect_proto = { 1798 .func = bpf_clone_redirect, 1799 .gpl_only = false, 1800 .ret_type = RET_INTEGER, 1801 .arg1_type = ARG_PTR_TO_CTX, 1802 .arg2_type = ARG_ANYTHING, 1803 .arg3_type = ARG_ANYTHING, 1804 }; 1805 1806 struct redirect_info { 1807 u32 ifindex; 1808 u32 flags; 1809 struct bpf_map *map; 1810 struct bpf_map *map_to_flush; 1811 unsigned long map_owner; 1812 }; 1813 1814 static DEFINE_PER_CPU(struct redirect_info, redirect_info); 1815 1816 BPF_CALL_2(bpf_redirect, u32, ifindex, u64, flags) 1817 { 1818 struct redirect_info *ri = this_cpu_ptr(&redirect_info); 1819 1820 if (unlikely(flags & ~(BPF_F_INGRESS))) 1821 return TC_ACT_SHOT; 1822 1823 ri->ifindex = ifindex; 1824 ri->flags = flags; 1825 1826 return TC_ACT_REDIRECT; 1827 } 1828 1829 int skb_do_redirect(struct sk_buff *skb) 1830 { 1831 struct redirect_info *ri = this_cpu_ptr(&redirect_info); 1832 struct net_device *dev; 1833 1834 dev = dev_get_by_index_rcu(dev_net(skb->dev), ri->ifindex); 1835 ri->ifindex = 0; 1836 if (unlikely(!dev)) { 1837 kfree_skb(skb); 1838 return -EINVAL; 1839 } 1840 1841 return __bpf_redirect(skb, dev, ri->flags); 1842 } 1843 1844 static const struct bpf_func_proto bpf_redirect_proto = { 1845 .func = bpf_redirect, 1846 .gpl_only = false, 1847 .ret_type = RET_INTEGER, 1848 .arg1_type = ARG_ANYTHING, 1849 .arg2_type = ARG_ANYTHING, 1850 }; 1851 1852 BPF_CALL_4(bpf_sk_redirect_map, struct sk_buff *, skb, 1853 struct bpf_map *, map, u32, key, u64, flags) 1854 { 1855 struct tcp_skb_cb *tcb = TCP_SKB_CB(skb); 1856 1857 /* If user passes invalid input drop the packet. */ 1858 if (unlikely(flags)) 1859 return SK_DROP; 1860 1861 tcb->bpf.key = key; 1862 tcb->bpf.flags = flags; 1863 tcb->bpf.map = map; 1864 1865 return SK_PASS; 1866 } 1867 1868 struct sock *do_sk_redirect_map(struct sk_buff *skb) 1869 { 1870 struct tcp_skb_cb *tcb = TCP_SKB_CB(skb); 1871 struct sock *sk = NULL; 1872 1873 if (tcb->bpf.map) { 1874 sk = __sock_map_lookup_elem(tcb->bpf.map, tcb->bpf.key); 1875 1876 tcb->bpf.key = 0; 1877 tcb->bpf.map = NULL; 1878 } 1879 1880 return sk; 1881 } 1882 1883 static const struct bpf_func_proto bpf_sk_redirect_map_proto = { 1884 .func = bpf_sk_redirect_map, 1885 .gpl_only = false, 1886 .ret_type = RET_INTEGER, 1887 .arg1_type = ARG_PTR_TO_CTX, 1888 .arg2_type = ARG_CONST_MAP_PTR, 1889 .arg3_type = ARG_ANYTHING, 1890 .arg4_type = ARG_ANYTHING, 1891 }; 1892 1893 BPF_CALL_1(bpf_get_cgroup_classid, const struct sk_buff *, skb) 1894 { 1895 return task_get_classid(skb); 1896 } 1897 1898 static const struct bpf_func_proto bpf_get_cgroup_classid_proto = { 1899 .func = bpf_get_cgroup_classid, 1900 .gpl_only = false, 1901 .ret_type = RET_INTEGER, 1902 .arg1_type = ARG_PTR_TO_CTX, 1903 }; 1904 1905 BPF_CALL_1(bpf_get_route_realm, const struct sk_buff *, skb) 1906 { 1907 return dst_tclassid(skb); 1908 } 1909 1910 static const struct bpf_func_proto bpf_get_route_realm_proto = { 1911 .func = bpf_get_route_realm, 1912 .gpl_only = false, 1913 .ret_type = RET_INTEGER, 1914 .arg1_type = ARG_PTR_TO_CTX, 1915 }; 1916 1917 BPF_CALL_1(bpf_get_hash_recalc, struct sk_buff *, skb) 1918 { 1919 /* If skb_clear_hash() was called due to mangling, we can 1920 * trigger SW recalculation here. Later access to hash 1921 * can then use the inline skb->hash via context directly 1922 * instead of calling this helper again. 1923 */ 1924 return skb_get_hash(skb); 1925 } 1926 1927 static const struct bpf_func_proto bpf_get_hash_recalc_proto = { 1928 .func = bpf_get_hash_recalc, 1929 .gpl_only = false, 1930 .ret_type = RET_INTEGER, 1931 .arg1_type = ARG_PTR_TO_CTX, 1932 }; 1933 1934 BPF_CALL_1(bpf_set_hash_invalid, struct sk_buff *, skb) 1935 { 1936 /* After all direct packet write, this can be used once for 1937 * triggering a lazy recalc on next skb_get_hash() invocation. 1938 */ 1939 skb_clear_hash(skb); 1940 return 0; 1941 } 1942 1943 static const struct bpf_func_proto bpf_set_hash_invalid_proto = { 1944 .func = bpf_set_hash_invalid, 1945 .gpl_only = false, 1946 .ret_type = RET_INTEGER, 1947 .arg1_type = ARG_PTR_TO_CTX, 1948 }; 1949 1950 BPF_CALL_2(bpf_set_hash, struct sk_buff *, skb, u32, hash) 1951 { 1952 /* Set user specified hash as L4(+), so that it gets returned 1953 * on skb_get_hash() call unless BPF prog later on triggers a 1954 * skb_clear_hash(). 1955 */ 1956 __skb_set_sw_hash(skb, hash, true); 1957 return 0; 1958 } 1959 1960 static const struct bpf_func_proto bpf_set_hash_proto = { 1961 .func = bpf_set_hash, 1962 .gpl_only = false, 1963 .ret_type = RET_INTEGER, 1964 .arg1_type = ARG_PTR_TO_CTX, 1965 .arg2_type = ARG_ANYTHING, 1966 }; 1967 1968 BPF_CALL_3(bpf_skb_vlan_push, struct sk_buff *, skb, __be16, vlan_proto, 1969 u16, vlan_tci) 1970 { 1971 int ret; 1972 1973 if (unlikely(vlan_proto != htons(ETH_P_8021Q) && 1974 vlan_proto != htons(ETH_P_8021AD))) 1975 vlan_proto = htons(ETH_P_8021Q); 1976 1977 bpf_push_mac_rcsum(skb); 1978 ret = skb_vlan_push(skb, vlan_proto, vlan_tci); 1979 bpf_pull_mac_rcsum(skb); 1980 1981 bpf_compute_data_pointers(skb); 1982 return ret; 1983 } 1984 1985 const struct bpf_func_proto bpf_skb_vlan_push_proto = { 1986 .func = bpf_skb_vlan_push, 1987 .gpl_only = false, 1988 .ret_type = RET_INTEGER, 1989 .arg1_type = ARG_PTR_TO_CTX, 1990 .arg2_type = ARG_ANYTHING, 1991 .arg3_type = ARG_ANYTHING, 1992 }; 1993 EXPORT_SYMBOL_GPL(bpf_skb_vlan_push_proto); 1994 1995 BPF_CALL_1(bpf_skb_vlan_pop, struct sk_buff *, skb) 1996 { 1997 int ret; 1998 1999 bpf_push_mac_rcsum(skb); 2000 ret = skb_vlan_pop(skb); 2001 bpf_pull_mac_rcsum(skb); 2002 2003 bpf_compute_data_pointers(skb); 2004 return ret; 2005 } 2006 2007 const struct bpf_func_proto bpf_skb_vlan_pop_proto = { 2008 .func = bpf_skb_vlan_pop, 2009 .gpl_only = false, 2010 .ret_type = RET_INTEGER, 2011 .arg1_type = ARG_PTR_TO_CTX, 2012 }; 2013 EXPORT_SYMBOL_GPL(bpf_skb_vlan_pop_proto); 2014 2015 static int bpf_skb_generic_push(struct sk_buff *skb, u32 off, u32 len) 2016 { 2017 /* Caller already did skb_cow() with len as headroom, 2018 * so no need to do it here. 2019 */ 2020 skb_push(skb, len); 2021 memmove(skb->data, skb->data + len, off); 2022 memset(skb->data + off, 0, len); 2023 2024 /* No skb_postpush_rcsum(skb, skb->data + off, len) 2025 * needed here as it does not change the skb->csum 2026 * result for checksum complete when summing over 2027 * zeroed blocks. 2028 */ 2029 return 0; 2030 } 2031 2032 static int bpf_skb_generic_pop(struct sk_buff *skb, u32 off, u32 len) 2033 { 2034 /* skb_ensure_writable() is not needed here, as we're 2035 * already working on an uncloned skb. 2036 */ 2037 if (unlikely(!pskb_may_pull(skb, off + len))) 2038 return -ENOMEM; 2039 2040 skb_postpull_rcsum(skb, skb->data + off, len); 2041 memmove(skb->data + len, skb->data, off); 2042 __skb_pull(skb, len); 2043 2044 return 0; 2045 } 2046 2047 static int bpf_skb_net_hdr_push(struct sk_buff *skb, u32 off, u32 len) 2048 { 2049 bool trans_same = skb->transport_header == skb->network_header; 2050 int ret; 2051 2052 /* There's no need for __skb_push()/__skb_pull() pair to 2053 * get to the start of the mac header as we're guaranteed 2054 * to always start from here under eBPF. 2055 */ 2056 ret = bpf_skb_generic_push(skb, off, len); 2057 if (likely(!ret)) { 2058 skb->mac_header -= len; 2059 skb->network_header -= len; 2060 if (trans_same) 2061 skb->transport_header = skb->network_header; 2062 } 2063 2064 return ret; 2065 } 2066 2067 static int bpf_skb_net_hdr_pop(struct sk_buff *skb, u32 off, u32 len) 2068 { 2069 bool trans_same = skb->transport_header == skb->network_header; 2070 int ret; 2071 2072 /* Same here, __skb_push()/__skb_pull() pair not needed. */ 2073 ret = bpf_skb_generic_pop(skb, off, len); 2074 if (likely(!ret)) { 2075 skb->mac_header += len; 2076 skb->network_header += len; 2077 if (trans_same) 2078 skb->transport_header = skb->network_header; 2079 } 2080 2081 return ret; 2082 } 2083 2084 static int bpf_skb_proto_4_to_6(struct sk_buff *skb) 2085 { 2086 const u32 len_diff = sizeof(struct ipv6hdr) - sizeof(struct iphdr); 2087 u32 off = skb_mac_header_len(skb); 2088 int ret; 2089 2090 ret = skb_cow(skb, len_diff); 2091 if (unlikely(ret < 0)) 2092 return ret; 2093 2094 ret = bpf_skb_net_hdr_push(skb, off, len_diff); 2095 if (unlikely(ret < 0)) 2096 return ret; 2097 2098 if (skb_is_gso(skb)) { 2099 /* SKB_GSO_TCPV4 needs to be changed into 2100 * SKB_GSO_TCPV6. 2101 */ 2102 if (skb_shinfo(skb)->gso_type & SKB_GSO_TCPV4) { 2103 skb_shinfo(skb)->gso_type &= ~SKB_GSO_TCPV4; 2104 skb_shinfo(skb)->gso_type |= SKB_GSO_TCPV6; 2105 } 2106 2107 /* Due to IPv6 header, MSS needs to be downgraded. */ 2108 skb_shinfo(skb)->gso_size -= len_diff; 2109 /* Header must be checked, and gso_segs recomputed. */ 2110 skb_shinfo(skb)->gso_type |= SKB_GSO_DODGY; 2111 skb_shinfo(skb)->gso_segs = 0; 2112 } 2113 2114 skb->protocol = htons(ETH_P_IPV6); 2115 skb_clear_hash(skb); 2116 2117 return 0; 2118 } 2119 2120 static int bpf_skb_proto_6_to_4(struct sk_buff *skb) 2121 { 2122 const u32 len_diff = sizeof(struct ipv6hdr) - sizeof(struct iphdr); 2123 u32 off = skb_mac_header_len(skb); 2124 int ret; 2125 2126 ret = skb_unclone(skb, GFP_ATOMIC); 2127 if (unlikely(ret < 0)) 2128 return ret; 2129 2130 ret = bpf_skb_net_hdr_pop(skb, off, len_diff); 2131 if (unlikely(ret < 0)) 2132 return ret; 2133 2134 if (skb_is_gso(skb)) { 2135 /* SKB_GSO_TCPV6 needs to be changed into 2136 * SKB_GSO_TCPV4. 2137 */ 2138 if (skb_shinfo(skb)->gso_type & SKB_GSO_TCPV6) { 2139 skb_shinfo(skb)->gso_type &= ~SKB_GSO_TCPV6; 2140 skb_shinfo(skb)->gso_type |= SKB_GSO_TCPV4; 2141 } 2142 2143 /* Due to IPv4 header, MSS can be upgraded. */ 2144 skb_shinfo(skb)->gso_size += len_diff; 2145 /* Header must be checked, and gso_segs recomputed. */ 2146 skb_shinfo(skb)->gso_type |= SKB_GSO_DODGY; 2147 skb_shinfo(skb)->gso_segs = 0; 2148 } 2149 2150 skb->protocol = htons(ETH_P_IP); 2151 skb_clear_hash(skb); 2152 2153 return 0; 2154 } 2155 2156 static int bpf_skb_proto_xlat(struct sk_buff *skb, __be16 to_proto) 2157 { 2158 __be16 from_proto = skb->protocol; 2159 2160 if (from_proto == htons(ETH_P_IP) && 2161 to_proto == htons(ETH_P_IPV6)) 2162 return bpf_skb_proto_4_to_6(skb); 2163 2164 if (from_proto == htons(ETH_P_IPV6) && 2165 to_proto == htons(ETH_P_IP)) 2166 return bpf_skb_proto_6_to_4(skb); 2167 2168 return -ENOTSUPP; 2169 } 2170 2171 BPF_CALL_3(bpf_skb_change_proto, struct sk_buff *, skb, __be16, proto, 2172 u64, flags) 2173 { 2174 int ret; 2175 2176 if (unlikely(flags)) 2177 return -EINVAL; 2178 2179 /* General idea is that this helper does the basic groundwork 2180 * needed for changing the protocol, and eBPF program fills the 2181 * rest through bpf_skb_store_bytes(), bpf_lX_csum_replace() 2182 * and other helpers, rather than passing a raw buffer here. 2183 * 2184 * The rationale is to keep this minimal and without a need to 2185 * deal with raw packet data. F.e. even if we would pass buffers 2186 * here, the program still needs to call the bpf_lX_csum_replace() 2187 * helpers anyway. Plus, this way we keep also separation of 2188 * concerns, since f.e. bpf_skb_store_bytes() should only take 2189 * care of stores. 2190 * 2191 * Currently, additional options and extension header space are 2192 * not supported, but flags register is reserved so we can adapt 2193 * that. For offloads, we mark packet as dodgy, so that headers 2194 * need to be verified first. 2195 */ 2196 ret = bpf_skb_proto_xlat(skb, proto); 2197 bpf_compute_data_pointers(skb); 2198 return ret; 2199 } 2200 2201 static const struct bpf_func_proto bpf_skb_change_proto_proto = { 2202 .func = bpf_skb_change_proto, 2203 .gpl_only = false, 2204 .ret_type = RET_INTEGER, 2205 .arg1_type = ARG_PTR_TO_CTX, 2206 .arg2_type = ARG_ANYTHING, 2207 .arg3_type = ARG_ANYTHING, 2208 }; 2209 2210 BPF_CALL_2(bpf_skb_change_type, struct sk_buff *, skb, u32, pkt_type) 2211 { 2212 /* We only allow a restricted subset to be changed for now. */ 2213 if (unlikely(!skb_pkt_type_ok(skb->pkt_type) || 2214 !skb_pkt_type_ok(pkt_type))) 2215 return -EINVAL; 2216 2217 skb->pkt_type = pkt_type; 2218 return 0; 2219 } 2220 2221 static const struct bpf_func_proto bpf_skb_change_type_proto = { 2222 .func = bpf_skb_change_type, 2223 .gpl_only = false, 2224 .ret_type = RET_INTEGER, 2225 .arg1_type = ARG_PTR_TO_CTX, 2226 .arg2_type = ARG_ANYTHING, 2227 }; 2228 2229 static u32 bpf_skb_net_base_len(const struct sk_buff *skb) 2230 { 2231 switch (skb->protocol) { 2232 case htons(ETH_P_IP): 2233 return sizeof(struct iphdr); 2234 case htons(ETH_P_IPV6): 2235 return sizeof(struct ipv6hdr); 2236 default: 2237 return ~0U; 2238 } 2239 } 2240 2241 static int bpf_skb_net_grow(struct sk_buff *skb, u32 len_diff) 2242 { 2243 u32 off = skb_mac_header_len(skb) + bpf_skb_net_base_len(skb); 2244 int ret; 2245 2246 ret = skb_cow(skb, len_diff); 2247 if (unlikely(ret < 0)) 2248 return ret; 2249 2250 ret = bpf_skb_net_hdr_push(skb, off, len_diff); 2251 if (unlikely(ret < 0)) 2252 return ret; 2253 2254 if (skb_is_gso(skb)) { 2255 /* Due to header grow, MSS needs to be downgraded. */ 2256 skb_shinfo(skb)->gso_size -= len_diff; 2257 /* Header must be checked, and gso_segs recomputed. */ 2258 skb_shinfo(skb)->gso_type |= SKB_GSO_DODGY; 2259 skb_shinfo(skb)->gso_segs = 0; 2260 } 2261 2262 return 0; 2263 } 2264 2265 static int bpf_skb_net_shrink(struct sk_buff *skb, u32 len_diff) 2266 { 2267 u32 off = skb_mac_header_len(skb) + bpf_skb_net_base_len(skb); 2268 int ret; 2269 2270 ret = skb_unclone(skb, GFP_ATOMIC); 2271 if (unlikely(ret < 0)) 2272 return ret; 2273 2274 ret = bpf_skb_net_hdr_pop(skb, off, len_diff); 2275 if (unlikely(ret < 0)) 2276 return ret; 2277 2278 if (skb_is_gso(skb)) { 2279 /* Due to header shrink, MSS can be upgraded. */ 2280 skb_shinfo(skb)->gso_size += len_diff; 2281 /* Header must be checked, and gso_segs recomputed. */ 2282 skb_shinfo(skb)->gso_type |= SKB_GSO_DODGY; 2283 skb_shinfo(skb)->gso_segs = 0; 2284 } 2285 2286 return 0; 2287 } 2288 2289 static u32 __bpf_skb_max_len(const struct sk_buff *skb) 2290 { 2291 return skb->dev->mtu + skb->dev->hard_header_len; 2292 } 2293 2294 static int bpf_skb_adjust_net(struct sk_buff *skb, s32 len_diff) 2295 { 2296 bool trans_same = skb->transport_header == skb->network_header; 2297 u32 len_cur, len_diff_abs = abs(len_diff); 2298 u32 len_min = bpf_skb_net_base_len(skb); 2299 u32 len_max = __bpf_skb_max_len(skb); 2300 __be16 proto = skb->protocol; 2301 bool shrink = len_diff < 0; 2302 int ret; 2303 2304 if (unlikely(len_diff_abs > 0xfffU)) 2305 return -EFAULT; 2306 if (unlikely(proto != htons(ETH_P_IP) && 2307 proto != htons(ETH_P_IPV6))) 2308 return -ENOTSUPP; 2309 2310 len_cur = skb->len - skb_network_offset(skb); 2311 if (skb_transport_header_was_set(skb) && !trans_same) 2312 len_cur = skb_network_header_len(skb); 2313 if ((shrink && (len_diff_abs >= len_cur || 2314 len_cur - len_diff_abs < len_min)) || 2315 (!shrink && (skb->len + len_diff_abs > len_max && 2316 !skb_is_gso(skb)))) 2317 return -ENOTSUPP; 2318 2319 ret = shrink ? bpf_skb_net_shrink(skb, len_diff_abs) : 2320 bpf_skb_net_grow(skb, len_diff_abs); 2321 2322 bpf_compute_data_pointers(skb); 2323 return ret; 2324 } 2325 2326 BPF_CALL_4(bpf_skb_adjust_room, struct sk_buff *, skb, s32, len_diff, 2327 u32, mode, u64, flags) 2328 { 2329 if (unlikely(flags)) 2330 return -EINVAL; 2331 if (likely(mode == BPF_ADJ_ROOM_NET)) 2332 return bpf_skb_adjust_net(skb, len_diff); 2333 2334 return -ENOTSUPP; 2335 } 2336 2337 static const struct bpf_func_proto bpf_skb_adjust_room_proto = { 2338 .func = bpf_skb_adjust_room, 2339 .gpl_only = false, 2340 .ret_type = RET_INTEGER, 2341 .arg1_type = ARG_PTR_TO_CTX, 2342 .arg2_type = ARG_ANYTHING, 2343 .arg3_type = ARG_ANYTHING, 2344 .arg4_type = ARG_ANYTHING, 2345 }; 2346 2347 static u32 __bpf_skb_min_len(const struct sk_buff *skb) 2348 { 2349 u32 min_len = skb_network_offset(skb); 2350 2351 if (skb_transport_header_was_set(skb)) 2352 min_len = skb_transport_offset(skb); 2353 if (skb->ip_summed == CHECKSUM_PARTIAL) 2354 min_len = skb_checksum_start_offset(skb) + 2355 skb->csum_offset + sizeof(__sum16); 2356 return min_len; 2357 } 2358 2359 static int bpf_skb_grow_rcsum(struct sk_buff *skb, unsigned int new_len) 2360 { 2361 unsigned int old_len = skb->len; 2362 int ret; 2363 2364 ret = __skb_grow_rcsum(skb, new_len); 2365 if (!ret) 2366 memset(skb->data + old_len, 0, new_len - old_len); 2367 return ret; 2368 } 2369 2370 static int bpf_skb_trim_rcsum(struct sk_buff *skb, unsigned int new_len) 2371 { 2372 return __skb_trim_rcsum(skb, new_len); 2373 } 2374 2375 BPF_CALL_3(bpf_skb_change_tail, struct sk_buff *, skb, u32, new_len, 2376 u64, flags) 2377 { 2378 u32 max_len = __bpf_skb_max_len(skb); 2379 u32 min_len = __bpf_skb_min_len(skb); 2380 int ret; 2381 2382 if (unlikely(flags || new_len > max_len || new_len < min_len)) 2383 return -EINVAL; 2384 if (skb->encapsulation) 2385 return -ENOTSUPP; 2386 2387 /* The basic idea of this helper is that it's performing the 2388 * needed work to either grow or trim an skb, and eBPF program 2389 * rewrites the rest via helpers like bpf_skb_store_bytes(), 2390 * bpf_lX_csum_replace() and others rather than passing a raw 2391 * buffer here. This one is a slow path helper and intended 2392 * for replies with control messages. 2393 * 2394 * Like in bpf_skb_change_proto(), we want to keep this rather 2395 * minimal and without protocol specifics so that we are able 2396 * to separate concerns as in bpf_skb_store_bytes() should only 2397 * be the one responsible for writing buffers. 2398 * 2399 * It's really expected to be a slow path operation here for 2400 * control message replies, so we're implicitly linearizing, 2401 * uncloning and drop offloads from the skb by this. 2402 */ 2403 ret = __bpf_try_make_writable(skb, skb->len); 2404 if (!ret) { 2405 if (new_len > skb->len) 2406 ret = bpf_skb_grow_rcsum(skb, new_len); 2407 else if (new_len < skb->len) 2408 ret = bpf_skb_trim_rcsum(skb, new_len); 2409 if (!ret && skb_is_gso(skb)) 2410 skb_gso_reset(skb); 2411 } 2412 2413 bpf_compute_data_pointers(skb); 2414 return ret; 2415 } 2416 2417 static const struct bpf_func_proto bpf_skb_change_tail_proto = { 2418 .func = bpf_skb_change_tail, 2419 .gpl_only = false, 2420 .ret_type = RET_INTEGER, 2421 .arg1_type = ARG_PTR_TO_CTX, 2422 .arg2_type = ARG_ANYTHING, 2423 .arg3_type = ARG_ANYTHING, 2424 }; 2425 2426 BPF_CALL_3(bpf_skb_change_head, struct sk_buff *, skb, u32, head_room, 2427 u64, flags) 2428 { 2429 u32 max_len = __bpf_skb_max_len(skb); 2430 u32 new_len = skb->len + head_room; 2431 int ret; 2432 2433 if (unlikely(flags || (!skb_is_gso(skb) && new_len > max_len) || 2434 new_len < skb->len)) 2435 return -EINVAL; 2436 2437 ret = skb_cow(skb, head_room); 2438 if (likely(!ret)) { 2439 /* Idea for this helper is that we currently only 2440 * allow to expand on mac header. This means that 2441 * skb->protocol network header, etc, stay as is. 2442 * Compared to bpf_skb_change_tail(), we're more 2443 * flexible due to not needing to linearize or 2444 * reset GSO. Intention for this helper is to be 2445 * used by an L3 skb that needs to push mac header 2446 * for redirection into L2 device. 2447 */ 2448 __skb_push(skb, head_room); 2449 memset(skb->data, 0, head_room); 2450 skb_reset_mac_header(skb); 2451 } 2452 2453 bpf_compute_data_pointers(skb); 2454 return 0; 2455 } 2456 2457 static const struct bpf_func_proto bpf_skb_change_head_proto = { 2458 .func = bpf_skb_change_head, 2459 .gpl_only = false, 2460 .ret_type = RET_INTEGER, 2461 .arg1_type = ARG_PTR_TO_CTX, 2462 .arg2_type = ARG_ANYTHING, 2463 .arg3_type = ARG_ANYTHING, 2464 }; 2465 2466 static unsigned long xdp_get_metalen(const struct xdp_buff *xdp) 2467 { 2468 return xdp_data_meta_unsupported(xdp) ? 0 : 2469 xdp->data - xdp->data_meta; 2470 } 2471 2472 BPF_CALL_2(bpf_xdp_adjust_head, struct xdp_buff *, xdp, int, offset) 2473 { 2474 unsigned long metalen = xdp_get_metalen(xdp); 2475 void *data_start = xdp->data_hard_start + metalen; 2476 void *data = xdp->data + offset; 2477 2478 if (unlikely(data < data_start || 2479 data > xdp->data_end - ETH_HLEN)) 2480 return -EINVAL; 2481 2482 if (metalen) 2483 memmove(xdp->data_meta + offset, 2484 xdp->data_meta, metalen); 2485 xdp->data_meta += offset; 2486 xdp->data = data; 2487 2488 return 0; 2489 } 2490 2491 static const struct bpf_func_proto bpf_xdp_adjust_head_proto = { 2492 .func = bpf_xdp_adjust_head, 2493 .gpl_only = false, 2494 .ret_type = RET_INTEGER, 2495 .arg1_type = ARG_PTR_TO_CTX, 2496 .arg2_type = ARG_ANYTHING, 2497 }; 2498 2499 BPF_CALL_2(bpf_xdp_adjust_meta, struct xdp_buff *, xdp, int, offset) 2500 { 2501 void *meta = xdp->data_meta + offset; 2502 unsigned long metalen = xdp->data - meta; 2503 2504 if (xdp_data_meta_unsupported(xdp)) 2505 return -ENOTSUPP; 2506 if (unlikely(meta < xdp->data_hard_start || 2507 meta > xdp->data)) 2508 return -EINVAL; 2509 if (unlikely((metalen & (sizeof(__u32) - 1)) || 2510 (metalen > 32))) 2511 return -EACCES; 2512 2513 xdp->data_meta = meta; 2514 2515 return 0; 2516 } 2517 2518 static const struct bpf_func_proto bpf_xdp_adjust_meta_proto = { 2519 .func = bpf_xdp_adjust_meta, 2520 .gpl_only = false, 2521 .ret_type = RET_INTEGER, 2522 .arg1_type = ARG_PTR_TO_CTX, 2523 .arg2_type = ARG_ANYTHING, 2524 }; 2525 2526 static int __bpf_tx_xdp(struct net_device *dev, 2527 struct bpf_map *map, 2528 struct xdp_buff *xdp, 2529 u32 index) 2530 { 2531 int err; 2532 2533 if (!dev->netdev_ops->ndo_xdp_xmit) { 2534 return -EOPNOTSUPP; 2535 } 2536 2537 err = dev->netdev_ops->ndo_xdp_xmit(dev, xdp); 2538 if (err) 2539 return err; 2540 dev->netdev_ops->ndo_xdp_flush(dev); 2541 return 0; 2542 } 2543 2544 static int __bpf_tx_xdp_map(struct net_device *dev_rx, void *fwd, 2545 struct bpf_map *map, 2546 struct xdp_buff *xdp, 2547 u32 index) 2548 { 2549 int err; 2550 2551 if (map->map_type == BPF_MAP_TYPE_DEVMAP) { 2552 struct net_device *dev = fwd; 2553 2554 if (!dev->netdev_ops->ndo_xdp_xmit) 2555 return -EOPNOTSUPP; 2556 2557 err = dev->netdev_ops->ndo_xdp_xmit(dev, xdp); 2558 if (err) 2559 return err; 2560 __dev_map_insert_ctx(map, index); 2561 2562 } else if (map->map_type == BPF_MAP_TYPE_CPUMAP) { 2563 struct bpf_cpu_map_entry *rcpu = fwd; 2564 2565 err = cpu_map_enqueue(rcpu, xdp, dev_rx); 2566 if (err) 2567 return err; 2568 __cpu_map_insert_ctx(map, index); 2569 } 2570 return 0; 2571 } 2572 2573 void xdp_do_flush_map(void) 2574 { 2575 struct redirect_info *ri = this_cpu_ptr(&redirect_info); 2576 struct bpf_map *map = ri->map_to_flush; 2577 2578 ri->map_to_flush = NULL; 2579 if (map) { 2580 switch (map->map_type) { 2581 case BPF_MAP_TYPE_DEVMAP: 2582 __dev_map_flush(map); 2583 break; 2584 case BPF_MAP_TYPE_CPUMAP: 2585 __cpu_map_flush(map); 2586 break; 2587 default: 2588 break; 2589 } 2590 } 2591 } 2592 EXPORT_SYMBOL_GPL(xdp_do_flush_map); 2593 2594 static void *__xdp_map_lookup_elem(struct bpf_map *map, u32 index) 2595 { 2596 switch (map->map_type) { 2597 case BPF_MAP_TYPE_DEVMAP: 2598 return __dev_map_lookup_elem(map, index); 2599 case BPF_MAP_TYPE_CPUMAP: 2600 return __cpu_map_lookup_elem(map, index); 2601 default: 2602 return NULL; 2603 } 2604 } 2605 2606 static inline bool xdp_map_invalid(const struct bpf_prog *xdp_prog, 2607 unsigned long aux) 2608 { 2609 return (unsigned long)xdp_prog->aux != aux; 2610 } 2611 2612 static int xdp_do_redirect_map(struct net_device *dev, struct xdp_buff *xdp, 2613 struct bpf_prog *xdp_prog) 2614 { 2615 struct redirect_info *ri = this_cpu_ptr(&redirect_info); 2616 unsigned long map_owner = ri->map_owner; 2617 struct bpf_map *map = ri->map; 2618 u32 index = ri->ifindex; 2619 void *fwd = NULL; 2620 int err; 2621 2622 ri->ifindex = 0; 2623 ri->map = NULL; 2624 ri->map_owner = 0; 2625 2626 if (unlikely(xdp_map_invalid(xdp_prog, map_owner))) { 2627 err = -EFAULT; 2628 map = NULL; 2629 goto err; 2630 } 2631 2632 fwd = __xdp_map_lookup_elem(map, index); 2633 if (!fwd) { 2634 err = -EINVAL; 2635 goto err; 2636 } 2637 if (ri->map_to_flush && ri->map_to_flush != map) 2638 xdp_do_flush_map(); 2639 2640 err = __bpf_tx_xdp_map(dev, fwd, map, xdp, index); 2641 if (unlikely(err)) 2642 goto err; 2643 2644 ri->map_to_flush = map; 2645 _trace_xdp_redirect_map(dev, xdp_prog, fwd, map, index); 2646 return 0; 2647 err: 2648 _trace_xdp_redirect_map_err(dev, xdp_prog, fwd, map, index, err); 2649 return err; 2650 } 2651 2652 int xdp_do_redirect(struct net_device *dev, struct xdp_buff *xdp, 2653 struct bpf_prog *xdp_prog) 2654 { 2655 struct redirect_info *ri = this_cpu_ptr(&redirect_info); 2656 struct net_device *fwd; 2657 u32 index = ri->ifindex; 2658 int err; 2659 2660 if (ri->map) 2661 return xdp_do_redirect_map(dev, xdp, xdp_prog); 2662 2663 fwd = dev_get_by_index_rcu(dev_net(dev), index); 2664 ri->ifindex = 0; 2665 if (unlikely(!fwd)) { 2666 err = -EINVAL; 2667 goto err; 2668 } 2669 2670 err = __bpf_tx_xdp(fwd, NULL, xdp, 0); 2671 if (unlikely(err)) 2672 goto err; 2673 2674 _trace_xdp_redirect(dev, xdp_prog, index); 2675 return 0; 2676 err: 2677 _trace_xdp_redirect_err(dev, xdp_prog, index, err); 2678 return err; 2679 } 2680 EXPORT_SYMBOL_GPL(xdp_do_redirect); 2681 2682 static int __xdp_generic_ok_fwd_dev(struct sk_buff *skb, struct net_device *fwd) 2683 { 2684 unsigned int len; 2685 2686 if (unlikely(!(fwd->flags & IFF_UP))) 2687 return -ENETDOWN; 2688 2689 len = fwd->mtu + fwd->hard_header_len + VLAN_HLEN; 2690 if (skb->len > len) 2691 return -EMSGSIZE; 2692 2693 return 0; 2694 } 2695 2696 static int xdp_do_generic_redirect_map(struct net_device *dev, 2697 struct sk_buff *skb, 2698 struct bpf_prog *xdp_prog) 2699 { 2700 struct redirect_info *ri = this_cpu_ptr(&redirect_info); 2701 unsigned long map_owner = ri->map_owner; 2702 struct bpf_map *map = ri->map; 2703 struct net_device *fwd = NULL; 2704 u32 index = ri->ifindex; 2705 int err = 0; 2706 2707 ri->ifindex = 0; 2708 ri->map = NULL; 2709 ri->map_owner = 0; 2710 2711 if (unlikely(xdp_map_invalid(xdp_prog, map_owner))) { 2712 err = -EFAULT; 2713 map = NULL; 2714 goto err; 2715 } 2716 fwd = __xdp_map_lookup_elem(map, index); 2717 if (unlikely(!fwd)) { 2718 err = -EINVAL; 2719 goto err; 2720 } 2721 2722 if (map->map_type == BPF_MAP_TYPE_DEVMAP) { 2723 if (unlikely((err = __xdp_generic_ok_fwd_dev(skb, fwd)))) 2724 goto err; 2725 skb->dev = fwd; 2726 } else { 2727 /* TODO: Handle BPF_MAP_TYPE_CPUMAP */ 2728 err = -EBADRQC; 2729 goto err; 2730 } 2731 2732 _trace_xdp_redirect_map(dev, xdp_prog, fwd, map, index); 2733 return 0; 2734 err: 2735 _trace_xdp_redirect_map_err(dev, xdp_prog, fwd, map, index, err); 2736 return err; 2737 } 2738 2739 int xdp_do_generic_redirect(struct net_device *dev, struct sk_buff *skb, 2740 struct bpf_prog *xdp_prog) 2741 { 2742 struct redirect_info *ri = this_cpu_ptr(&redirect_info); 2743 u32 index = ri->ifindex; 2744 struct net_device *fwd; 2745 int err = 0; 2746 2747 if (ri->map) 2748 return xdp_do_generic_redirect_map(dev, skb, xdp_prog); 2749 2750 ri->ifindex = 0; 2751 fwd = dev_get_by_index_rcu(dev_net(dev), index); 2752 if (unlikely(!fwd)) { 2753 err = -EINVAL; 2754 goto err; 2755 } 2756 2757 if (unlikely((err = __xdp_generic_ok_fwd_dev(skb, fwd)))) 2758 goto err; 2759 2760 skb->dev = fwd; 2761 _trace_xdp_redirect(dev, xdp_prog, index); 2762 return 0; 2763 err: 2764 _trace_xdp_redirect_err(dev, xdp_prog, index, err); 2765 return err; 2766 } 2767 EXPORT_SYMBOL_GPL(xdp_do_generic_redirect); 2768 2769 BPF_CALL_2(bpf_xdp_redirect, u32, ifindex, u64, flags) 2770 { 2771 struct redirect_info *ri = this_cpu_ptr(&redirect_info); 2772 2773 if (unlikely(flags)) 2774 return XDP_ABORTED; 2775 2776 ri->ifindex = ifindex; 2777 ri->flags = flags; 2778 ri->map = NULL; 2779 ri->map_owner = 0; 2780 2781 return XDP_REDIRECT; 2782 } 2783 2784 static const struct bpf_func_proto bpf_xdp_redirect_proto = { 2785 .func = bpf_xdp_redirect, 2786 .gpl_only = false, 2787 .ret_type = RET_INTEGER, 2788 .arg1_type = ARG_ANYTHING, 2789 .arg2_type = ARG_ANYTHING, 2790 }; 2791 2792 BPF_CALL_4(bpf_xdp_redirect_map, struct bpf_map *, map, u32, ifindex, u64, flags, 2793 unsigned long, map_owner) 2794 { 2795 struct redirect_info *ri = this_cpu_ptr(&redirect_info); 2796 2797 if (unlikely(flags)) 2798 return XDP_ABORTED; 2799 2800 ri->ifindex = ifindex; 2801 ri->flags = flags; 2802 ri->map = map; 2803 ri->map_owner = map_owner; 2804 2805 return XDP_REDIRECT; 2806 } 2807 2808 /* Note, arg4 is hidden from users and populated by the verifier 2809 * with the right pointer. 2810 */ 2811 static const struct bpf_func_proto bpf_xdp_redirect_map_proto = { 2812 .func = bpf_xdp_redirect_map, 2813 .gpl_only = false, 2814 .ret_type = RET_INTEGER, 2815 .arg1_type = ARG_CONST_MAP_PTR, 2816 .arg2_type = ARG_ANYTHING, 2817 .arg3_type = ARG_ANYTHING, 2818 }; 2819 2820 bool bpf_helper_changes_pkt_data(void *func) 2821 { 2822 if (func == bpf_skb_vlan_push || 2823 func == bpf_skb_vlan_pop || 2824 func == bpf_skb_store_bytes || 2825 func == bpf_skb_change_proto || 2826 func == bpf_skb_change_head || 2827 func == bpf_skb_change_tail || 2828 func == bpf_skb_adjust_room || 2829 func == bpf_skb_pull_data || 2830 func == bpf_clone_redirect || 2831 func == bpf_l3_csum_replace || 2832 func == bpf_l4_csum_replace || 2833 func == bpf_xdp_adjust_head || 2834 func == bpf_xdp_adjust_meta) 2835 return true; 2836 2837 return false; 2838 } 2839 2840 static unsigned long bpf_skb_copy(void *dst_buff, const void *skb, 2841 unsigned long off, unsigned long len) 2842 { 2843 void *ptr = skb_header_pointer(skb, off, len, dst_buff); 2844 2845 if (unlikely(!ptr)) 2846 return len; 2847 if (ptr != dst_buff) 2848 memcpy(dst_buff, ptr, len); 2849 2850 return 0; 2851 } 2852 2853 BPF_CALL_5(bpf_skb_event_output, struct sk_buff *, skb, struct bpf_map *, map, 2854 u64, flags, void *, meta, u64, meta_size) 2855 { 2856 u64 skb_size = (flags & BPF_F_CTXLEN_MASK) >> 32; 2857 2858 if (unlikely(flags & ~(BPF_F_CTXLEN_MASK | BPF_F_INDEX_MASK))) 2859 return -EINVAL; 2860 if (unlikely(skb_size > skb->len)) 2861 return -EFAULT; 2862 2863 return bpf_event_output(map, flags, meta, meta_size, skb, skb_size, 2864 bpf_skb_copy); 2865 } 2866 2867 static const struct bpf_func_proto bpf_skb_event_output_proto = { 2868 .func = bpf_skb_event_output, 2869 .gpl_only = true, 2870 .ret_type = RET_INTEGER, 2871 .arg1_type = ARG_PTR_TO_CTX, 2872 .arg2_type = ARG_CONST_MAP_PTR, 2873 .arg3_type = ARG_ANYTHING, 2874 .arg4_type = ARG_PTR_TO_MEM, 2875 .arg5_type = ARG_CONST_SIZE_OR_ZERO, 2876 }; 2877 2878 static unsigned short bpf_tunnel_key_af(u64 flags) 2879 { 2880 return flags & BPF_F_TUNINFO_IPV6 ? AF_INET6 : AF_INET; 2881 } 2882 2883 BPF_CALL_4(bpf_skb_get_tunnel_key, struct sk_buff *, skb, struct bpf_tunnel_key *, to, 2884 u32, size, u64, flags) 2885 { 2886 const struct ip_tunnel_info *info = skb_tunnel_info(skb); 2887 u8 compat[sizeof(struct bpf_tunnel_key)]; 2888 void *to_orig = to; 2889 int err; 2890 2891 if (unlikely(!info || (flags & ~(BPF_F_TUNINFO_IPV6)))) { 2892 err = -EINVAL; 2893 goto err_clear; 2894 } 2895 if (ip_tunnel_info_af(info) != bpf_tunnel_key_af(flags)) { 2896 err = -EPROTO; 2897 goto err_clear; 2898 } 2899 if (unlikely(size != sizeof(struct bpf_tunnel_key))) { 2900 err = -EINVAL; 2901 switch (size) { 2902 case offsetof(struct bpf_tunnel_key, tunnel_label): 2903 case offsetof(struct bpf_tunnel_key, tunnel_ext): 2904 goto set_compat; 2905 case offsetof(struct bpf_tunnel_key, remote_ipv6[1]): 2906 /* Fixup deprecated structure layouts here, so we have 2907 * a common path later on. 2908 */ 2909 if (ip_tunnel_info_af(info) != AF_INET) 2910 goto err_clear; 2911 set_compat: 2912 to = (struct bpf_tunnel_key *)compat; 2913 break; 2914 default: 2915 goto err_clear; 2916 } 2917 } 2918 2919 to->tunnel_id = be64_to_cpu(info->key.tun_id); 2920 to->tunnel_tos = info->key.tos; 2921 to->tunnel_ttl = info->key.ttl; 2922 2923 if (flags & BPF_F_TUNINFO_IPV6) { 2924 memcpy(to->remote_ipv6, &info->key.u.ipv6.src, 2925 sizeof(to->remote_ipv6)); 2926 to->tunnel_label = be32_to_cpu(info->key.label); 2927 } else { 2928 to->remote_ipv4 = be32_to_cpu(info->key.u.ipv4.src); 2929 } 2930 2931 if (unlikely(size != sizeof(struct bpf_tunnel_key))) 2932 memcpy(to_orig, to, size); 2933 2934 return 0; 2935 err_clear: 2936 memset(to_orig, 0, size); 2937 return err; 2938 } 2939 2940 static const struct bpf_func_proto bpf_skb_get_tunnel_key_proto = { 2941 .func = bpf_skb_get_tunnel_key, 2942 .gpl_only = false, 2943 .ret_type = RET_INTEGER, 2944 .arg1_type = ARG_PTR_TO_CTX, 2945 .arg2_type = ARG_PTR_TO_UNINIT_MEM, 2946 .arg3_type = ARG_CONST_SIZE, 2947 .arg4_type = ARG_ANYTHING, 2948 }; 2949 2950 BPF_CALL_3(bpf_skb_get_tunnel_opt, struct sk_buff *, skb, u8 *, to, u32, size) 2951 { 2952 const struct ip_tunnel_info *info = skb_tunnel_info(skb); 2953 int err; 2954 2955 if (unlikely(!info || 2956 !(info->key.tun_flags & TUNNEL_OPTIONS_PRESENT))) { 2957 err = -ENOENT; 2958 goto err_clear; 2959 } 2960 if (unlikely(size < info->options_len)) { 2961 err = -ENOMEM; 2962 goto err_clear; 2963 } 2964 2965 ip_tunnel_info_opts_get(to, info); 2966 if (size > info->options_len) 2967 memset(to + info->options_len, 0, size - info->options_len); 2968 2969 return info->options_len; 2970 err_clear: 2971 memset(to, 0, size); 2972 return err; 2973 } 2974 2975 static const struct bpf_func_proto bpf_skb_get_tunnel_opt_proto = { 2976 .func = bpf_skb_get_tunnel_opt, 2977 .gpl_only = false, 2978 .ret_type = RET_INTEGER, 2979 .arg1_type = ARG_PTR_TO_CTX, 2980 .arg2_type = ARG_PTR_TO_UNINIT_MEM, 2981 .arg3_type = ARG_CONST_SIZE, 2982 }; 2983 2984 static struct metadata_dst __percpu *md_dst; 2985 2986 BPF_CALL_4(bpf_skb_set_tunnel_key, struct sk_buff *, skb, 2987 const struct bpf_tunnel_key *, from, u32, size, u64, flags) 2988 { 2989 struct metadata_dst *md = this_cpu_ptr(md_dst); 2990 u8 compat[sizeof(struct bpf_tunnel_key)]; 2991 struct ip_tunnel_info *info; 2992 2993 if (unlikely(flags & ~(BPF_F_TUNINFO_IPV6 | BPF_F_ZERO_CSUM_TX | 2994 BPF_F_DONT_FRAGMENT))) 2995 return -EINVAL; 2996 if (unlikely(size != sizeof(struct bpf_tunnel_key))) { 2997 switch (size) { 2998 case offsetof(struct bpf_tunnel_key, tunnel_label): 2999 case offsetof(struct bpf_tunnel_key, tunnel_ext): 3000 case offsetof(struct bpf_tunnel_key, remote_ipv6[1]): 3001 /* Fixup deprecated structure layouts here, so we have 3002 * a common path later on. 3003 */ 3004 memcpy(compat, from, size); 3005 memset(compat + size, 0, sizeof(compat) - size); 3006 from = (const struct bpf_tunnel_key *) compat; 3007 break; 3008 default: 3009 return -EINVAL; 3010 } 3011 } 3012 if (unlikely((!(flags & BPF_F_TUNINFO_IPV6) && from->tunnel_label) || 3013 from->tunnel_ext)) 3014 return -EINVAL; 3015 3016 skb_dst_drop(skb); 3017 dst_hold((struct dst_entry *) md); 3018 skb_dst_set(skb, (struct dst_entry *) md); 3019 3020 info = &md->u.tun_info; 3021 info->mode = IP_TUNNEL_INFO_TX; 3022 3023 info->key.tun_flags = TUNNEL_KEY | TUNNEL_CSUM | TUNNEL_NOCACHE; 3024 if (flags & BPF_F_DONT_FRAGMENT) 3025 info->key.tun_flags |= TUNNEL_DONT_FRAGMENT; 3026 if (flags & BPF_F_ZERO_CSUM_TX) 3027 info->key.tun_flags &= ~TUNNEL_CSUM; 3028 3029 info->key.tun_id = cpu_to_be64(from->tunnel_id); 3030 info->key.tos = from->tunnel_tos; 3031 info->key.ttl = from->tunnel_ttl; 3032 3033 if (flags & BPF_F_TUNINFO_IPV6) { 3034 info->mode |= IP_TUNNEL_INFO_IPV6; 3035 memcpy(&info->key.u.ipv6.dst, from->remote_ipv6, 3036 sizeof(from->remote_ipv6)); 3037 info->key.label = cpu_to_be32(from->tunnel_label) & 3038 IPV6_FLOWLABEL_MASK; 3039 } else { 3040 info->key.u.ipv4.dst = cpu_to_be32(from->remote_ipv4); 3041 } 3042 3043 return 0; 3044 } 3045 3046 static const struct bpf_func_proto bpf_skb_set_tunnel_key_proto = { 3047 .func = bpf_skb_set_tunnel_key, 3048 .gpl_only = false, 3049 .ret_type = RET_INTEGER, 3050 .arg1_type = ARG_PTR_TO_CTX, 3051 .arg2_type = ARG_PTR_TO_MEM, 3052 .arg3_type = ARG_CONST_SIZE, 3053 .arg4_type = ARG_ANYTHING, 3054 }; 3055 3056 BPF_CALL_3(bpf_skb_set_tunnel_opt, struct sk_buff *, skb, 3057 const u8 *, from, u32, size) 3058 { 3059 struct ip_tunnel_info *info = skb_tunnel_info(skb); 3060 const struct metadata_dst *md = this_cpu_ptr(md_dst); 3061 3062 if (unlikely(info != &md->u.tun_info || (size & (sizeof(u32) - 1)))) 3063 return -EINVAL; 3064 if (unlikely(size > IP_TUNNEL_OPTS_MAX)) 3065 return -ENOMEM; 3066 3067 ip_tunnel_info_opts_set(info, from, size); 3068 3069 return 0; 3070 } 3071 3072 static const struct bpf_func_proto bpf_skb_set_tunnel_opt_proto = { 3073 .func = bpf_skb_set_tunnel_opt, 3074 .gpl_only = false, 3075 .ret_type = RET_INTEGER, 3076 .arg1_type = ARG_PTR_TO_CTX, 3077 .arg2_type = ARG_PTR_TO_MEM, 3078 .arg3_type = ARG_CONST_SIZE, 3079 }; 3080 3081 static const struct bpf_func_proto * 3082 bpf_get_skb_set_tunnel_proto(enum bpf_func_id which) 3083 { 3084 if (!md_dst) { 3085 struct metadata_dst __percpu *tmp; 3086 3087 tmp = metadata_dst_alloc_percpu(IP_TUNNEL_OPTS_MAX, 3088 METADATA_IP_TUNNEL, 3089 GFP_KERNEL); 3090 if (!tmp) 3091 return NULL; 3092 if (cmpxchg(&md_dst, NULL, tmp)) 3093 metadata_dst_free_percpu(tmp); 3094 } 3095 3096 switch (which) { 3097 case BPF_FUNC_skb_set_tunnel_key: 3098 return &bpf_skb_set_tunnel_key_proto; 3099 case BPF_FUNC_skb_set_tunnel_opt: 3100 return &bpf_skb_set_tunnel_opt_proto; 3101 default: 3102 return NULL; 3103 } 3104 } 3105 3106 BPF_CALL_3(bpf_skb_under_cgroup, struct sk_buff *, skb, struct bpf_map *, map, 3107 u32, idx) 3108 { 3109 struct bpf_array *array = container_of(map, struct bpf_array, map); 3110 struct cgroup *cgrp; 3111 struct sock *sk; 3112 3113 sk = skb_to_full_sk(skb); 3114 if (!sk || !sk_fullsock(sk)) 3115 return -ENOENT; 3116 if (unlikely(idx >= array->map.max_entries)) 3117 return -E2BIG; 3118 3119 cgrp = READ_ONCE(array->ptrs[idx]); 3120 if (unlikely(!cgrp)) 3121 return -EAGAIN; 3122 3123 return sk_under_cgroup_hierarchy(sk, cgrp); 3124 } 3125 3126 static const struct bpf_func_proto bpf_skb_under_cgroup_proto = { 3127 .func = bpf_skb_under_cgroup, 3128 .gpl_only = false, 3129 .ret_type = RET_INTEGER, 3130 .arg1_type = ARG_PTR_TO_CTX, 3131 .arg2_type = ARG_CONST_MAP_PTR, 3132 .arg3_type = ARG_ANYTHING, 3133 }; 3134 3135 static unsigned long bpf_xdp_copy(void *dst_buff, const void *src_buff, 3136 unsigned long off, unsigned long len) 3137 { 3138 memcpy(dst_buff, src_buff + off, len); 3139 return 0; 3140 } 3141 3142 BPF_CALL_5(bpf_xdp_event_output, struct xdp_buff *, xdp, struct bpf_map *, map, 3143 u64, flags, void *, meta, u64, meta_size) 3144 { 3145 u64 xdp_size = (flags & BPF_F_CTXLEN_MASK) >> 32; 3146 3147 if (unlikely(flags & ~(BPF_F_CTXLEN_MASK | BPF_F_INDEX_MASK))) 3148 return -EINVAL; 3149 if (unlikely(xdp_size > (unsigned long)(xdp->data_end - xdp->data))) 3150 return -EFAULT; 3151 3152 return bpf_event_output(map, flags, meta, meta_size, xdp->data, 3153 xdp_size, bpf_xdp_copy); 3154 } 3155 3156 static const struct bpf_func_proto bpf_xdp_event_output_proto = { 3157 .func = bpf_xdp_event_output, 3158 .gpl_only = true, 3159 .ret_type = RET_INTEGER, 3160 .arg1_type = ARG_PTR_TO_CTX, 3161 .arg2_type = ARG_CONST_MAP_PTR, 3162 .arg3_type = ARG_ANYTHING, 3163 .arg4_type = ARG_PTR_TO_MEM, 3164 .arg5_type = ARG_CONST_SIZE_OR_ZERO, 3165 }; 3166 3167 BPF_CALL_1(bpf_get_socket_cookie, struct sk_buff *, skb) 3168 { 3169 return skb->sk ? sock_gen_cookie(skb->sk) : 0; 3170 } 3171 3172 static const struct bpf_func_proto bpf_get_socket_cookie_proto = { 3173 .func = bpf_get_socket_cookie, 3174 .gpl_only = false, 3175 .ret_type = RET_INTEGER, 3176 .arg1_type = ARG_PTR_TO_CTX, 3177 }; 3178 3179 BPF_CALL_1(bpf_get_socket_uid, struct sk_buff *, skb) 3180 { 3181 struct sock *sk = sk_to_full_sk(skb->sk); 3182 kuid_t kuid; 3183 3184 if (!sk || !sk_fullsock(sk)) 3185 return overflowuid; 3186 kuid = sock_net_uid(sock_net(sk), sk); 3187 return from_kuid_munged(sock_net(sk)->user_ns, kuid); 3188 } 3189 3190 static const struct bpf_func_proto bpf_get_socket_uid_proto = { 3191 .func = bpf_get_socket_uid, 3192 .gpl_only = false, 3193 .ret_type = RET_INTEGER, 3194 .arg1_type = ARG_PTR_TO_CTX, 3195 }; 3196 3197 BPF_CALL_5(bpf_setsockopt, struct bpf_sock_ops_kern *, bpf_sock, 3198 int, level, int, optname, char *, optval, int, optlen) 3199 { 3200 struct sock *sk = bpf_sock->sk; 3201 int ret = 0; 3202 int val; 3203 3204 if (!sk_fullsock(sk)) 3205 return -EINVAL; 3206 3207 if (level == SOL_SOCKET) { 3208 if (optlen != sizeof(int)) 3209 return -EINVAL; 3210 val = *((int *)optval); 3211 3212 /* Only some socketops are supported */ 3213 switch (optname) { 3214 case SO_RCVBUF: 3215 sk->sk_userlocks |= SOCK_RCVBUF_LOCK; 3216 sk->sk_rcvbuf = max_t(int, val * 2, SOCK_MIN_RCVBUF); 3217 break; 3218 case SO_SNDBUF: 3219 sk->sk_userlocks |= SOCK_SNDBUF_LOCK; 3220 sk->sk_sndbuf = max_t(int, val * 2, SOCK_MIN_SNDBUF); 3221 break; 3222 case SO_MAX_PACING_RATE: 3223 sk->sk_max_pacing_rate = val; 3224 sk->sk_pacing_rate = min(sk->sk_pacing_rate, 3225 sk->sk_max_pacing_rate); 3226 break; 3227 case SO_PRIORITY: 3228 sk->sk_priority = val; 3229 break; 3230 case SO_RCVLOWAT: 3231 if (val < 0) 3232 val = INT_MAX; 3233 sk->sk_rcvlowat = val ? : 1; 3234 break; 3235 case SO_MARK: 3236 sk->sk_mark = val; 3237 break; 3238 default: 3239 ret = -EINVAL; 3240 } 3241 #ifdef CONFIG_INET 3242 #if IS_ENABLED(CONFIG_IPV6) 3243 } else if (level == SOL_IPV6) { 3244 if (optlen != sizeof(int) || sk->sk_family != AF_INET6) 3245 return -EINVAL; 3246 3247 val = *((int *)optval); 3248 /* Only some options are supported */ 3249 switch (optname) { 3250 case IPV6_TCLASS: 3251 if (val < -1 || val > 0xff) { 3252 ret = -EINVAL; 3253 } else { 3254 struct ipv6_pinfo *np = inet6_sk(sk); 3255 3256 if (val == -1) 3257 val = 0; 3258 np->tclass = val; 3259 } 3260 break; 3261 default: 3262 ret = -EINVAL; 3263 } 3264 #endif 3265 } else if (level == SOL_TCP && 3266 sk->sk_prot->setsockopt == tcp_setsockopt) { 3267 if (optname == TCP_CONGESTION) { 3268 char name[TCP_CA_NAME_MAX]; 3269 bool reinit = bpf_sock->op > BPF_SOCK_OPS_NEEDS_ECN; 3270 3271 strncpy(name, optval, min_t(long, optlen, 3272 TCP_CA_NAME_MAX-1)); 3273 name[TCP_CA_NAME_MAX-1] = 0; 3274 ret = tcp_set_congestion_control(sk, name, false, 3275 reinit); 3276 } else { 3277 struct tcp_sock *tp = tcp_sk(sk); 3278 3279 if (optlen != sizeof(int)) 3280 return -EINVAL; 3281 3282 val = *((int *)optval); 3283 /* Only some options are supported */ 3284 switch (optname) { 3285 case TCP_BPF_IW: 3286 if (val <= 0 || tp->data_segs_out > 0) 3287 ret = -EINVAL; 3288 else 3289 tp->snd_cwnd = val; 3290 break; 3291 case TCP_BPF_SNDCWND_CLAMP: 3292 if (val <= 0) { 3293 ret = -EINVAL; 3294 } else { 3295 tp->snd_cwnd_clamp = val; 3296 tp->snd_ssthresh = val; 3297 } 3298 break; 3299 default: 3300 ret = -EINVAL; 3301 } 3302 } 3303 #endif 3304 } else { 3305 ret = -EINVAL; 3306 } 3307 return ret; 3308 } 3309 3310 static const struct bpf_func_proto bpf_setsockopt_proto = { 3311 .func = bpf_setsockopt, 3312 .gpl_only = false, 3313 .ret_type = RET_INTEGER, 3314 .arg1_type = ARG_PTR_TO_CTX, 3315 .arg2_type = ARG_ANYTHING, 3316 .arg3_type = ARG_ANYTHING, 3317 .arg4_type = ARG_PTR_TO_MEM, 3318 .arg5_type = ARG_CONST_SIZE, 3319 }; 3320 3321 BPF_CALL_5(bpf_getsockopt, struct bpf_sock_ops_kern *, bpf_sock, 3322 int, level, int, optname, char *, optval, int, optlen) 3323 { 3324 struct sock *sk = bpf_sock->sk; 3325 3326 if (!sk_fullsock(sk)) 3327 goto err_clear; 3328 3329 #ifdef CONFIG_INET 3330 if (level == SOL_TCP && sk->sk_prot->getsockopt == tcp_getsockopt) { 3331 if (optname == TCP_CONGESTION) { 3332 struct inet_connection_sock *icsk = inet_csk(sk); 3333 3334 if (!icsk->icsk_ca_ops || optlen <= 1) 3335 goto err_clear; 3336 strncpy(optval, icsk->icsk_ca_ops->name, optlen); 3337 optval[optlen - 1] = 0; 3338 } else { 3339 goto err_clear; 3340 } 3341 #if IS_ENABLED(CONFIG_IPV6) 3342 } else if (level == SOL_IPV6) { 3343 struct ipv6_pinfo *np = inet6_sk(sk); 3344 3345 if (optlen != sizeof(int) || sk->sk_family != AF_INET6) 3346 goto err_clear; 3347 3348 /* Only some options are supported */ 3349 switch (optname) { 3350 case IPV6_TCLASS: 3351 *((int *)optval) = (int)np->tclass; 3352 break; 3353 default: 3354 goto err_clear; 3355 } 3356 #endif 3357 } else { 3358 goto err_clear; 3359 } 3360 return 0; 3361 #endif 3362 err_clear: 3363 memset(optval, 0, optlen); 3364 return -EINVAL; 3365 } 3366 3367 static const struct bpf_func_proto bpf_getsockopt_proto = { 3368 .func = bpf_getsockopt, 3369 .gpl_only = false, 3370 .ret_type = RET_INTEGER, 3371 .arg1_type = ARG_PTR_TO_CTX, 3372 .arg2_type = ARG_ANYTHING, 3373 .arg3_type = ARG_ANYTHING, 3374 .arg4_type = ARG_PTR_TO_UNINIT_MEM, 3375 .arg5_type = ARG_CONST_SIZE, 3376 }; 3377 3378 BPF_CALL_2(bpf_sock_ops_cb_flags_set, struct bpf_sock_ops_kern *, bpf_sock, 3379 int, argval) 3380 { 3381 struct sock *sk = bpf_sock->sk; 3382 int val = argval & BPF_SOCK_OPS_ALL_CB_FLAGS; 3383 3384 if (!sk_fullsock(sk)) 3385 return -EINVAL; 3386 3387 #ifdef CONFIG_INET 3388 if (val) 3389 tcp_sk(sk)->bpf_sock_ops_cb_flags = val; 3390 3391 return argval & (~BPF_SOCK_OPS_ALL_CB_FLAGS); 3392 #else 3393 return -EINVAL; 3394 #endif 3395 } 3396 3397 static const struct bpf_func_proto bpf_sock_ops_cb_flags_set_proto = { 3398 .func = bpf_sock_ops_cb_flags_set, 3399 .gpl_only = false, 3400 .ret_type = RET_INTEGER, 3401 .arg1_type = ARG_PTR_TO_CTX, 3402 .arg2_type = ARG_ANYTHING, 3403 }; 3404 3405 static const struct bpf_func_proto * 3406 bpf_base_func_proto(enum bpf_func_id func_id) 3407 { 3408 switch (func_id) { 3409 case BPF_FUNC_map_lookup_elem: 3410 return &bpf_map_lookup_elem_proto; 3411 case BPF_FUNC_map_update_elem: 3412 return &bpf_map_update_elem_proto; 3413 case BPF_FUNC_map_delete_elem: 3414 return &bpf_map_delete_elem_proto; 3415 case BPF_FUNC_get_prandom_u32: 3416 return &bpf_get_prandom_u32_proto; 3417 case BPF_FUNC_get_smp_processor_id: 3418 return &bpf_get_raw_smp_processor_id_proto; 3419 case BPF_FUNC_get_numa_node_id: 3420 return &bpf_get_numa_node_id_proto; 3421 case BPF_FUNC_tail_call: 3422 return &bpf_tail_call_proto; 3423 case BPF_FUNC_ktime_get_ns: 3424 return &bpf_ktime_get_ns_proto; 3425 case BPF_FUNC_trace_printk: 3426 if (capable(CAP_SYS_ADMIN)) 3427 return bpf_get_trace_printk_proto(); 3428 default: 3429 return NULL; 3430 } 3431 } 3432 3433 static const struct bpf_func_proto * 3434 sock_filter_func_proto(enum bpf_func_id func_id) 3435 { 3436 switch (func_id) { 3437 /* inet and inet6 sockets are created in a process 3438 * context so there is always a valid uid/gid 3439 */ 3440 case BPF_FUNC_get_current_uid_gid: 3441 return &bpf_get_current_uid_gid_proto; 3442 default: 3443 return bpf_base_func_proto(func_id); 3444 } 3445 } 3446 3447 static const struct bpf_func_proto * 3448 sk_filter_func_proto(enum bpf_func_id func_id) 3449 { 3450 switch (func_id) { 3451 case BPF_FUNC_skb_load_bytes: 3452 return &bpf_skb_load_bytes_proto; 3453 case BPF_FUNC_get_socket_cookie: 3454 return &bpf_get_socket_cookie_proto; 3455 case BPF_FUNC_get_socket_uid: 3456 return &bpf_get_socket_uid_proto; 3457 default: 3458 return bpf_base_func_proto(func_id); 3459 } 3460 } 3461 3462 static const struct bpf_func_proto * 3463 tc_cls_act_func_proto(enum bpf_func_id func_id) 3464 { 3465 switch (func_id) { 3466 case BPF_FUNC_skb_store_bytes: 3467 return &bpf_skb_store_bytes_proto; 3468 case BPF_FUNC_skb_load_bytes: 3469 return &bpf_skb_load_bytes_proto; 3470 case BPF_FUNC_skb_pull_data: 3471 return &bpf_skb_pull_data_proto; 3472 case BPF_FUNC_csum_diff: 3473 return &bpf_csum_diff_proto; 3474 case BPF_FUNC_csum_update: 3475 return &bpf_csum_update_proto; 3476 case BPF_FUNC_l3_csum_replace: 3477 return &bpf_l3_csum_replace_proto; 3478 case BPF_FUNC_l4_csum_replace: 3479 return &bpf_l4_csum_replace_proto; 3480 case BPF_FUNC_clone_redirect: 3481 return &bpf_clone_redirect_proto; 3482 case BPF_FUNC_get_cgroup_classid: 3483 return &bpf_get_cgroup_classid_proto; 3484 case BPF_FUNC_skb_vlan_push: 3485 return &bpf_skb_vlan_push_proto; 3486 case BPF_FUNC_skb_vlan_pop: 3487 return &bpf_skb_vlan_pop_proto; 3488 case BPF_FUNC_skb_change_proto: 3489 return &bpf_skb_change_proto_proto; 3490 case BPF_FUNC_skb_change_type: 3491 return &bpf_skb_change_type_proto; 3492 case BPF_FUNC_skb_adjust_room: 3493 return &bpf_skb_adjust_room_proto; 3494 case BPF_FUNC_skb_change_tail: 3495 return &bpf_skb_change_tail_proto; 3496 case BPF_FUNC_skb_get_tunnel_key: 3497 return &bpf_skb_get_tunnel_key_proto; 3498 case BPF_FUNC_skb_set_tunnel_key: 3499 return bpf_get_skb_set_tunnel_proto(func_id); 3500 case BPF_FUNC_skb_get_tunnel_opt: 3501 return &bpf_skb_get_tunnel_opt_proto; 3502 case BPF_FUNC_skb_set_tunnel_opt: 3503 return bpf_get_skb_set_tunnel_proto(func_id); 3504 case BPF_FUNC_redirect: 3505 return &bpf_redirect_proto; 3506 case BPF_FUNC_get_route_realm: 3507 return &bpf_get_route_realm_proto; 3508 case BPF_FUNC_get_hash_recalc: 3509 return &bpf_get_hash_recalc_proto; 3510 case BPF_FUNC_set_hash_invalid: 3511 return &bpf_set_hash_invalid_proto; 3512 case BPF_FUNC_set_hash: 3513 return &bpf_set_hash_proto; 3514 case BPF_FUNC_perf_event_output: 3515 return &bpf_skb_event_output_proto; 3516 case BPF_FUNC_get_smp_processor_id: 3517 return &bpf_get_smp_processor_id_proto; 3518 case BPF_FUNC_skb_under_cgroup: 3519 return &bpf_skb_under_cgroup_proto; 3520 case BPF_FUNC_get_socket_cookie: 3521 return &bpf_get_socket_cookie_proto; 3522 case BPF_FUNC_get_socket_uid: 3523 return &bpf_get_socket_uid_proto; 3524 default: 3525 return bpf_base_func_proto(func_id); 3526 } 3527 } 3528 3529 static const struct bpf_func_proto * 3530 xdp_func_proto(enum bpf_func_id func_id) 3531 { 3532 switch (func_id) { 3533 case BPF_FUNC_perf_event_output: 3534 return &bpf_xdp_event_output_proto; 3535 case BPF_FUNC_get_smp_processor_id: 3536 return &bpf_get_smp_processor_id_proto; 3537 case BPF_FUNC_csum_diff: 3538 return &bpf_csum_diff_proto; 3539 case BPF_FUNC_xdp_adjust_head: 3540 return &bpf_xdp_adjust_head_proto; 3541 case BPF_FUNC_xdp_adjust_meta: 3542 return &bpf_xdp_adjust_meta_proto; 3543 case BPF_FUNC_redirect: 3544 return &bpf_xdp_redirect_proto; 3545 case BPF_FUNC_redirect_map: 3546 return &bpf_xdp_redirect_map_proto; 3547 default: 3548 return bpf_base_func_proto(func_id); 3549 } 3550 } 3551 3552 static const struct bpf_func_proto * 3553 lwt_inout_func_proto(enum bpf_func_id func_id) 3554 { 3555 switch (func_id) { 3556 case BPF_FUNC_skb_load_bytes: 3557 return &bpf_skb_load_bytes_proto; 3558 case BPF_FUNC_skb_pull_data: 3559 return &bpf_skb_pull_data_proto; 3560 case BPF_FUNC_csum_diff: 3561 return &bpf_csum_diff_proto; 3562 case BPF_FUNC_get_cgroup_classid: 3563 return &bpf_get_cgroup_classid_proto; 3564 case BPF_FUNC_get_route_realm: 3565 return &bpf_get_route_realm_proto; 3566 case BPF_FUNC_get_hash_recalc: 3567 return &bpf_get_hash_recalc_proto; 3568 case BPF_FUNC_perf_event_output: 3569 return &bpf_skb_event_output_proto; 3570 case BPF_FUNC_get_smp_processor_id: 3571 return &bpf_get_smp_processor_id_proto; 3572 case BPF_FUNC_skb_under_cgroup: 3573 return &bpf_skb_under_cgroup_proto; 3574 default: 3575 return bpf_base_func_proto(func_id); 3576 } 3577 } 3578 3579 static const struct bpf_func_proto * 3580 sock_ops_func_proto(enum bpf_func_id func_id) 3581 { 3582 switch (func_id) { 3583 case BPF_FUNC_setsockopt: 3584 return &bpf_setsockopt_proto; 3585 case BPF_FUNC_getsockopt: 3586 return &bpf_getsockopt_proto; 3587 case BPF_FUNC_sock_ops_cb_flags_set: 3588 return &bpf_sock_ops_cb_flags_set_proto; 3589 case BPF_FUNC_sock_map_update: 3590 return &bpf_sock_map_update_proto; 3591 default: 3592 return bpf_base_func_proto(func_id); 3593 } 3594 } 3595 3596 static const struct bpf_func_proto *sk_skb_func_proto(enum bpf_func_id func_id) 3597 { 3598 switch (func_id) { 3599 case BPF_FUNC_skb_store_bytes: 3600 return &bpf_skb_store_bytes_proto; 3601 case BPF_FUNC_skb_load_bytes: 3602 return &bpf_skb_load_bytes_proto; 3603 case BPF_FUNC_skb_pull_data: 3604 return &bpf_skb_pull_data_proto; 3605 case BPF_FUNC_skb_change_tail: 3606 return &bpf_skb_change_tail_proto; 3607 case BPF_FUNC_skb_change_head: 3608 return &bpf_skb_change_head_proto; 3609 case BPF_FUNC_get_socket_cookie: 3610 return &bpf_get_socket_cookie_proto; 3611 case BPF_FUNC_get_socket_uid: 3612 return &bpf_get_socket_uid_proto; 3613 case BPF_FUNC_sk_redirect_map: 3614 return &bpf_sk_redirect_map_proto; 3615 default: 3616 return bpf_base_func_proto(func_id); 3617 } 3618 } 3619 3620 static const struct bpf_func_proto * 3621 lwt_xmit_func_proto(enum bpf_func_id func_id) 3622 { 3623 switch (func_id) { 3624 case BPF_FUNC_skb_get_tunnel_key: 3625 return &bpf_skb_get_tunnel_key_proto; 3626 case BPF_FUNC_skb_set_tunnel_key: 3627 return bpf_get_skb_set_tunnel_proto(func_id); 3628 case BPF_FUNC_skb_get_tunnel_opt: 3629 return &bpf_skb_get_tunnel_opt_proto; 3630 case BPF_FUNC_skb_set_tunnel_opt: 3631 return bpf_get_skb_set_tunnel_proto(func_id); 3632 case BPF_FUNC_redirect: 3633 return &bpf_redirect_proto; 3634 case BPF_FUNC_clone_redirect: 3635 return &bpf_clone_redirect_proto; 3636 case BPF_FUNC_skb_change_tail: 3637 return &bpf_skb_change_tail_proto; 3638 case BPF_FUNC_skb_change_head: 3639 return &bpf_skb_change_head_proto; 3640 case BPF_FUNC_skb_store_bytes: 3641 return &bpf_skb_store_bytes_proto; 3642 case BPF_FUNC_csum_update: 3643 return &bpf_csum_update_proto; 3644 case BPF_FUNC_l3_csum_replace: 3645 return &bpf_l3_csum_replace_proto; 3646 case BPF_FUNC_l4_csum_replace: 3647 return &bpf_l4_csum_replace_proto; 3648 case BPF_FUNC_set_hash_invalid: 3649 return &bpf_set_hash_invalid_proto; 3650 default: 3651 return lwt_inout_func_proto(func_id); 3652 } 3653 } 3654 3655 static bool bpf_skb_is_valid_access(int off, int size, enum bpf_access_type type, 3656 struct bpf_insn_access_aux *info) 3657 { 3658 const int size_default = sizeof(__u32); 3659 3660 if (off < 0 || off >= sizeof(struct __sk_buff)) 3661 return false; 3662 3663 /* The verifier guarantees that size > 0. */ 3664 if (off % size != 0) 3665 return false; 3666 3667 switch (off) { 3668 case bpf_ctx_range_till(struct __sk_buff, cb[0], cb[4]): 3669 if (off + size > offsetofend(struct __sk_buff, cb[4])) 3670 return false; 3671 break; 3672 case bpf_ctx_range_till(struct __sk_buff, remote_ip6[0], remote_ip6[3]): 3673 case bpf_ctx_range_till(struct __sk_buff, local_ip6[0], local_ip6[3]): 3674 case bpf_ctx_range_till(struct __sk_buff, remote_ip4, remote_ip4): 3675 case bpf_ctx_range_till(struct __sk_buff, local_ip4, local_ip4): 3676 case bpf_ctx_range(struct __sk_buff, data): 3677 case bpf_ctx_range(struct __sk_buff, data_meta): 3678 case bpf_ctx_range(struct __sk_buff, data_end): 3679 if (size != size_default) 3680 return false; 3681 break; 3682 default: 3683 /* Only narrow read access allowed for now. */ 3684 if (type == BPF_WRITE) { 3685 if (size != size_default) 3686 return false; 3687 } else { 3688 bpf_ctx_record_field_size(info, size_default); 3689 if (!bpf_ctx_narrow_access_ok(off, size, size_default)) 3690 return false; 3691 } 3692 } 3693 3694 return true; 3695 } 3696 3697 static bool sk_filter_is_valid_access(int off, int size, 3698 enum bpf_access_type type, 3699 struct bpf_insn_access_aux *info) 3700 { 3701 switch (off) { 3702 case bpf_ctx_range(struct __sk_buff, tc_classid): 3703 case bpf_ctx_range(struct __sk_buff, data): 3704 case bpf_ctx_range(struct __sk_buff, data_meta): 3705 case bpf_ctx_range(struct __sk_buff, data_end): 3706 case bpf_ctx_range_till(struct __sk_buff, family, local_port): 3707 return false; 3708 } 3709 3710 if (type == BPF_WRITE) { 3711 switch (off) { 3712 case bpf_ctx_range_till(struct __sk_buff, cb[0], cb[4]): 3713 break; 3714 default: 3715 return false; 3716 } 3717 } 3718 3719 return bpf_skb_is_valid_access(off, size, type, info); 3720 } 3721 3722 static bool lwt_is_valid_access(int off, int size, 3723 enum bpf_access_type type, 3724 struct bpf_insn_access_aux *info) 3725 { 3726 switch (off) { 3727 case bpf_ctx_range(struct __sk_buff, tc_classid): 3728 case bpf_ctx_range_till(struct __sk_buff, family, local_port): 3729 case bpf_ctx_range(struct __sk_buff, data_meta): 3730 return false; 3731 } 3732 3733 if (type == BPF_WRITE) { 3734 switch (off) { 3735 case bpf_ctx_range(struct __sk_buff, mark): 3736 case bpf_ctx_range(struct __sk_buff, priority): 3737 case bpf_ctx_range_till(struct __sk_buff, cb[0], cb[4]): 3738 break; 3739 default: 3740 return false; 3741 } 3742 } 3743 3744 switch (off) { 3745 case bpf_ctx_range(struct __sk_buff, data): 3746 info->reg_type = PTR_TO_PACKET; 3747 break; 3748 case bpf_ctx_range(struct __sk_buff, data_end): 3749 info->reg_type = PTR_TO_PACKET_END; 3750 break; 3751 } 3752 3753 return bpf_skb_is_valid_access(off, size, type, info); 3754 } 3755 3756 static bool sock_filter_is_valid_access(int off, int size, 3757 enum bpf_access_type type, 3758 struct bpf_insn_access_aux *info) 3759 { 3760 if (type == BPF_WRITE) { 3761 switch (off) { 3762 case offsetof(struct bpf_sock, bound_dev_if): 3763 case offsetof(struct bpf_sock, mark): 3764 case offsetof(struct bpf_sock, priority): 3765 break; 3766 default: 3767 return false; 3768 } 3769 } 3770 3771 if (off < 0 || off + size > sizeof(struct bpf_sock)) 3772 return false; 3773 /* The verifier guarantees that size > 0. */ 3774 if (off % size != 0) 3775 return false; 3776 if (size != sizeof(__u32)) 3777 return false; 3778 3779 return true; 3780 } 3781 3782 static int bpf_unclone_prologue(struct bpf_insn *insn_buf, bool direct_write, 3783 const struct bpf_prog *prog, int drop_verdict) 3784 { 3785 struct bpf_insn *insn = insn_buf; 3786 3787 if (!direct_write) 3788 return 0; 3789 3790 /* if (!skb->cloned) 3791 * goto start; 3792 * 3793 * (Fast-path, otherwise approximation that we might be 3794 * a clone, do the rest in helper.) 3795 */ 3796 *insn++ = BPF_LDX_MEM(BPF_B, BPF_REG_6, BPF_REG_1, CLONED_OFFSET()); 3797 *insn++ = BPF_ALU32_IMM(BPF_AND, BPF_REG_6, CLONED_MASK); 3798 *insn++ = BPF_JMP_IMM(BPF_JEQ, BPF_REG_6, 0, 7); 3799 3800 /* ret = bpf_skb_pull_data(skb, 0); */ 3801 *insn++ = BPF_MOV64_REG(BPF_REG_6, BPF_REG_1); 3802 *insn++ = BPF_ALU64_REG(BPF_XOR, BPF_REG_2, BPF_REG_2); 3803 *insn++ = BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, 3804 BPF_FUNC_skb_pull_data); 3805 /* if (!ret) 3806 * goto restore; 3807 * return TC_ACT_SHOT; 3808 */ 3809 *insn++ = BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 2); 3810 *insn++ = BPF_ALU32_IMM(BPF_MOV, BPF_REG_0, drop_verdict); 3811 *insn++ = BPF_EXIT_INSN(); 3812 3813 /* restore: */ 3814 *insn++ = BPF_MOV64_REG(BPF_REG_1, BPF_REG_6); 3815 /* start: */ 3816 *insn++ = prog->insnsi[0]; 3817 3818 return insn - insn_buf; 3819 } 3820 3821 static int tc_cls_act_prologue(struct bpf_insn *insn_buf, bool direct_write, 3822 const struct bpf_prog *prog) 3823 { 3824 return bpf_unclone_prologue(insn_buf, direct_write, prog, TC_ACT_SHOT); 3825 } 3826 3827 static bool tc_cls_act_is_valid_access(int off, int size, 3828 enum bpf_access_type type, 3829 struct bpf_insn_access_aux *info) 3830 { 3831 if (type == BPF_WRITE) { 3832 switch (off) { 3833 case bpf_ctx_range(struct __sk_buff, mark): 3834 case bpf_ctx_range(struct __sk_buff, tc_index): 3835 case bpf_ctx_range(struct __sk_buff, priority): 3836 case bpf_ctx_range(struct __sk_buff, tc_classid): 3837 case bpf_ctx_range_till(struct __sk_buff, cb[0], cb[4]): 3838 break; 3839 default: 3840 return false; 3841 } 3842 } 3843 3844 switch (off) { 3845 case bpf_ctx_range(struct __sk_buff, data): 3846 info->reg_type = PTR_TO_PACKET; 3847 break; 3848 case bpf_ctx_range(struct __sk_buff, data_meta): 3849 info->reg_type = PTR_TO_PACKET_META; 3850 break; 3851 case bpf_ctx_range(struct __sk_buff, data_end): 3852 info->reg_type = PTR_TO_PACKET_END; 3853 break; 3854 case bpf_ctx_range_till(struct __sk_buff, family, local_port): 3855 return false; 3856 } 3857 3858 return bpf_skb_is_valid_access(off, size, type, info); 3859 } 3860 3861 static bool __is_valid_xdp_access(int off, int size) 3862 { 3863 if (off < 0 || off >= sizeof(struct xdp_md)) 3864 return false; 3865 if (off % size != 0) 3866 return false; 3867 if (size != sizeof(__u32)) 3868 return false; 3869 3870 return true; 3871 } 3872 3873 static bool xdp_is_valid_access(int off, int size, 3874 enum bpf_access_type type, 3875 struct bpf_insn_access_aux *info) 3876 { 3877 if (type == BPF_WRITE) 3878 return false; 3879 3880 switch (off) { 3881 case offsetof(struct xdp_md, data): 3882 info->reg_type = PTR_TO_PACKET; 3883 break; 3884 case offsetof(struct xdp_md, data_meta): 3885 info->reg_type = PTR_TO_PACKET_META; 3886 break; 3887 case offsetof(struct xdp_md, data_end): 3888 info->reg_type = PTR_TO_PACKET_END; 3889 break; 3890 } 3891 3892 return __is_valid_xdp_access(off, size); 3893 } 3894 3895 void bpf_warn_invalid_xdp_action(u32 act) 3896 { 3897 const u32 act_max = XDP_REDIRECT; 3898 3899 WARN_ONCE(1, "%s XDP return value %u, expect packet loss!\n", 3900 act > act_max ? "Illegal" : "Driver unsupported", 3901 act); 3902 } 3903 EXPORT_SYMBOL_GPL(bpf_warn_invalid_xdp_action); 3904 3905 static bool sock_ops_is_valid_access(int off, int size, 3906 enum bpf_access_type type, 3907 struct bpf_insn_access_aux *info) 3908 { 3909 const int size_default = sizeof(__u32); 3910 3911 if (off < 0 || off >= sizeof(struct bpf_sock_ops)) 3912 return false; 3913 3914 /* The verifier guarantees that size > 0. */ 3915 if (off % size != 0) 3916 return false; 3917 3918 if (type == BPF_WRITE) { 3919 switch (off) { 3920 case offsetof(struct bpf_sock_ops, reply): 3921 case offsetof(struct bpf_sock_ops, sk_txhash): 3922 if (size != size_default) 3923 return false; 3924 break; 3925 default: 3926 return false; 3927 } 3928 } else { 3929 switch (off) { 3930 case bpf_ctx_range_till(struct bpf_sock_ops, bytes_received, 3931 bytes_acked): 3932 if (size != sizeof(__u64)) 3933 return false; 3934 break; 3935 default: 3936 if (size != size_default) 3937 return false; 3938 break; 3939 } 3940 } 3941 3942 return true; 3943 } 3944 3945 static int sk_skb_prologue(struct bpf_insn *insn_buf, bool direct_write, 3946 const struct bpf_prog *prog) 3947 { 3948 return bpf_unclone_prologue(insn_buf, direct_write, prog, SK_DROP); 3949 } 3950 3951 static bool sk_skb_is_valid_access(int off, int size, 3952 enum bpf_access_type type, 3953 struct bpf_insn_access_aux *info) 3954 { 3955 switch (off) { 3956 case bpf_ctx_range(struct __sk_buff, tc_classid): 3957 case bpf_ctx_range(struct __sk_buff, data_meta): 3958 return false; 3959 } 3960 3961 if (type == BPF_WRITE) { 3962 switch (off) { 3963 case bpf_ctx_range(struct __sk_buff, tc_index): 3964 case bpf_ctx_range(struct __sk_buff, priority): 3965 break; 3966 default: 3967 return false; 3968 } 3969 } 3970 3971 switch (off) { 3972 case bpf_ctx_range(struct __sk_buff, mark): 3973 return false; 3974 case bpf_ctx_range(struct __sk_buff, data): 3975 info->reg_type = PTR_TO_PACKET; 3976 break; 3977 case bpf_ctx_range(struct __sk_buff, data_end): 3978 info->reg_type = PTR_TO_PACKET_END; 3979 break; 3980 } 3981 3982 return bpf_skb_is_valid_access(off, size, type, info); 3983 } 3984 3985 static u32 bpf_convert_ctx_access(enum bpf_access_type type, 3986 const struct bpf_insn *si, 3987 struct bpf_insn *insn_buf, 3988 struct bpf_prog *prog, u32 *target_size) 3989 { 3990 struct bpf_insn *insn = insn_buf; 3991 int off; 3992 3993 switch (si->off) { 3994 case offsetof(struct __sk_buff, len): 3995 *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg, 3996 bpf_target_off(struct sk_buff, len, 4, 3997 target_size)); 3998 break; 3999 4000 case offsetof(struct __sk_buff, protocol): 4001 *insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->src_reg, 4002 bpf_target_off(struct sk_buff, protocol, 2, 4003 target_size)); 4004 break; 4005 4006 case offsetof(struct __sk_buff, vlan_proto): 4007 *insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->src_reg, 4008 bpf_target_off(struct sk_buff, vlan_proto, 2, 4009 target_size)); 4010 break; 4011 4012 case offsetof(struct __sk_buff, priority): 4013 if (type == BPF_WRITE) 4014 *insn++ = BPF_STX_MEM(BPF_W, si->dst_reg, si->src_reg, 4015 bpf_target_off(struct sk_buff, priority, 4, 4016 target_size)); 4017 else 4018 *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg, 4019 bpf_target_off(struct sk_buff, priority, 4, 4020 target_size)); 4021 break; 4022 4023 case offsetof(struct __sk_buff, ingress_ifindex): 4024 *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg, 4025 bpf_target_off(struct sk_buff, skb_iif, 4, 4026 target_size)); 4027 break; 4028 4029 case offsetof(struct __sk_buff, ifindex): 4030 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, dev), 4031 si->dst_reg, si->src_reg, 4032 offsetof(struct sk_buff, dev)); 4033 *insn++ = BPF_JMP_IMM(BPF_JEQ, si->dst_reg, 0, 1); 4034 *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg, 4035 bpf_target_off(struct net_device, ifindex, 4, 4036 target_size)); 4037 break; 4038 4039 case offsetof(struct __sk_buff, hash): 4040 *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg, 4041 bpf_target_off(struct sk_buff, hash, 4, 4042 target_size)); 4043 break; 4044 4045 case offsetof(struct __sk_buff, mark): 4046 if (type == BPF_WRITE) 4047 *insn++ = BPF_STX_MEM(BPF_W, si->dst_reg, si->src_reg, 4048 bpf_target_off(struct sk_buff, mark, 4, 4049 target_size)); 4050 else 4051 *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg, 4052 bpf_target_off(struct sk_buff, mark, 4, 4053 target_size)); 4054 break; 4055 4056 case offsetof(struct __sk_buff, pkt_type): 4057 *target_size = 1; 4058 *insn++ = BPF_LDX_MEM(BPF_B, si->dst_reg, si->src_reg, 4059 PKT_TYPE_OFFSET()); 4060 *insn++ = BPF_ALU32_IMM(BPF_AND, si->dst_reg, PKT_TYPE_MAX); 4061 #ifdef __BIG_ENDIAN_BITFIELD 4062 *insn++ = BPF_ALU32_IMM(BPF_RSH, si->dst_reg, 5); 4063 #endif 4064 break; 4065 4066 case offsetof(struct __sk_buff, queue_mapping): 4067 *insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->src_reg, 4068 bpf_target_off(struct sk_buff, queue_mapping, 2, 4069 target_size)); 4070 break; 4071 4072 case offsetof(struct __sk_buff, vlan_present): 4073 case offsetof(struct __sk_buff, vlan_tci): 4074 BUILD_BUG_ON(VLAN_TAG_PRESENT != 0x1000); 4075 4076 *insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->src_reg, 4077 bpf_target_off(struct sk_buff, vlan_tci, 2, 4078 target_size)); 4079 if (si->off == offsetof(struct __sk_buff, vlan_tci)) { 4080 *insn++ = BPF_ALU32_IMM(BPF_AND, si->dst_reg, 4081 ~VLAN_TAG_PRESENT); 4082 } else { 4083 *insn++ = BPF_ALU32_IMM(BPF_RSH, si->dst_reg, 12); 4084 *insn++ = BPF_ALU32_IMM(BPF_AND, si->dst_reg, 1); 4085 } 4086 break; 4087 4088 case offsetof(struct __sk_buff, cb[0]) ... 4089 offsetofend(struct __sk_buff, cb[4]) - 1: 4090 BUILD_BUG_ON(FIELD_SIZEOF(struct qdisc_skb_cb, data) < 20); 4091 BUILD_BUG_ON((offsetof(struct sk_buff, cb) + 4092 offsetof(struct qdisc_skb_cb, data)) % 4093 sizeof(__u64)); 4094 4095 prog->cb_access = 1; 4096 off = si->off; 4097 off -= offsetof(struct __sk_buff, cb[0]); 4098 off += offsetof(struct sk_buff, cb); 4099 off += offsetof(struct qdisc_skb_cb, data); 4100 if (type == BPF_WRITE) 4101 *insn++ = BPF_STX_MEM(BPF_SIZE(si->code), si->dst_reg, 4102 si->src_reg, off); 4103 else 4104 *insn++ = BPF_LDX_MEM(BPF_SIZE(si->code), si->dst_reg, 4105 si->src_reg, off); 4106 break; 4107 4108 case offsetof(struct __sk_buff, tc_classid): 4109 BUILD_BUG_ON(FIELD_SIZEOF(struct qdisc_skb_cb, tc_classid) != 2); 4110 4111 off = si->off; 4112 off -= offsetof(struct __sk_buff, tc_classid); 4113 off += offsetof(struct sk_buff, cb); 4114 off += offsetof(struct qdisc_skb_cb, tc_classid); 4115 *target_size = 2; 4116 if (type == BPF_WRITE) 4117 *insn++ = BPF_STX_MEM(BPF_H, si->dst_reg, 4118 si->src_reg, off); 4119 else 4120 *insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, 4121 si->src_reg, off); 4122 break; 4123 4124 case offsetof(struct __sk_buff, data): 4125 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, data), 4126 si->dst_reg, si->src_reg, 4127 offsetof(struct sk_buff, data)); 4128 break; 4129 4130 case offsetof(struct __sk_buff, data_meta): 4131 off = si->off; 4132 off -= offsetof(struct __sk_buff, data_meta); 4133 off += offsetof(struct sk_buff, cb); 4134 off += offsetof(struct bpf_skb_data_end, data_meta); 4135 *insn++ = BPF_LDX_MEM(BPF_SIZEOF(void *), si->dst_reg, 4136 si->src_reg, off); 4137 break; 4138 4139 case offsetof(struct __sk_buff, data_end): 4140 off = si->off; 4141 off -= offsetof(struct __sk_buff, data_end); 4142 off += offsetof(struct sk_buff, cb); 4143 off += offsetof(struct bpf_skb_data_end, data_end); 4144 *insn++ = BPF_LDX_MEM(BPF_SIZEOF(void *), si->dst_reg, 4145 si->src_reg, off); 4146 break; 4147 4148 case offsetof(struct __sk_buff, tc_index): 4149 #ifdef CONFIG_NET_SCHED 4150 if (type == BPF_WRITE) 4151 *insn++ = BPF_STX_MEM(BPF_H, si->dst_reg, si->src_reg, 4152 bpf_target_off(struct sk_buff, tc_index, 2, 4153 target_size)); 4154 else 4155 *insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->src_reg, 4156 bpf_target_off(struct sk_buff, tc_index, 2, 4157 target_size)); 4158 #else 4159 *target_size = 2; 4160 if (type == BPF_WRITE) 4161 *insn++ = BPF_MOV64_REG(si->dst_reg, si->dst_reg); 4162 else 4163 *insn++ = BPF_MOV64_IMM(si->dst_reg, 0); 4164 #endif 4165 break; 4166 4167 case offsetof(struct __sk_buff, napi_id): 4168 #if defined(CONFIG_NET_RX_BUSY_POLL) 4169 *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg, 4170 bpf_target_off(struct sk_buff, napi_id, 4, 4171 target_size)); 4172 *insn++ = BPF_JMP_IMM(BPF_JGE, si->dst_reg, MIN_NAPI_ID, 1); 4173 *insn++ = BPF_MOV64_IMM(si->dst_reg, 0); 4174 #else 4175 *target_size = 4; 4176 *insn++ = BPF_MOV64_IMM(si->dst_reg, 0); 4177 #endif 4178 break; 4179 case offsetof(struct __sk_buff, family): 4180 BUILD_BUG_ON(FIELD_SIZEOF(struct sock_common, skc_family) != 2); 4181 4182 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, sk), 4183 si->dst_reg, si->src_reg, 4184 offsetof(struct sk_buff, sk)); 4185 *insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->dst_reg, 4186 bpf_target_off(struct sock_common, 4187 skc_family, 4188 2, target_size)); 4189 break; 4190 case offsetof(struct __sk_buff, remote_ip4): 4191 BUILD_BUG_ON(FIELD_SIZEOF(struct sock_common, skc_daddr) != 4); 4192 4193 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, sk), 4194 si->dst_reg, si->src_reg, 4195 offsetof(struct sk_buff, sk)); 4196 *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg, 4197 bpf_target_off(struct sock_common, 4198 skc_daddr, 4199 4, target_size)); 4200 break; 4201 case offsetof(struct __sk_buff, local_ip4): 4202 BUILD_BUG_ON(FIELD_SIZEOF(struct sock_common, 4203 skc_rcv_saddr) != 4); 4204 4205 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, sk), 4206 si->dst_reg, si->src_reg, 4207 offsetof(struct sk_buff, sk)); 4208 *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg, 4209 bpf_target_off(struct sock_common, 4210 skc_rcv_saddr, 4211 4, target_size)); 4212 break; 4213 case offsetof(struct __sk_buff, remote_ip6[0]) ... 4214 offsetof(struct __sk_buff, remote_ip6[3]): 4215 #if IS_ENABLED(CONFIG_IPV6) 4216 BUILD_BUG_ON(FIELD_SIZEOF(struct sock_common, 4217 skc_v6_daddr.s6_addr32[0]) != 4); 4218 4219 off = si->off; 4220 off -= offsetof(struct __sk_buff, remote_ip6[0]); 4221 4222 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, sk), 4223 si->dst_reg, si->src_reg, 4224 offsetof(struct sk_buff, sk)); 4225 *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg, 4226 offsetof(struct sock_common, 4227 skc_v6_daddr.s6_addr32[0]) + 4228 off); 4229 #else 4230 *insn++ = BPF_MOV32_IMM(si->dst_reg, 0); 4231 #endif 4232 break; 4233 case offsetof(struct __sk_buff, local_ip6[0]) ... 4234 offsetof(struct __sk_buff, local_ip6[3]): 4235 #if IS_ENABLED(CONFIG_IPV6) 4236 BUILD_BUG_ON(FIELD_SIZEOF(struct sock_common, 4237 skc_v6_rcv_saddr.s6_addr32[0]) != 4); 4238 4239 off = si->off; 4240 off -= offsetof(struct __sk_buff, local_ip6[0]); 4241 4242 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, sk), 4243 si->dst_reg, si->src_reg, 4244 offsetof(struct sk_buff, sk)); 4245 *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg, 4246 offsetof(struct sock_common, 4247 skc_v6_rcv_saddr.s6_addr32[0]) + 4248 off); 4249 #else 4250 *insn++ = BPF_MOV32_IMM(si->dst_reg, 0); 4251 #endif 4252 break; 4253 4254 case offsetof(struct __sk_buff, remote_port): 4255 BUILD_BUG_ON(FIELD_SIZEOF(struct sock_common, skc_dport) != 2); 4256 4257 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, sk), 4258 si->dst_reg, si->src_reg, 4259 offsetof(struct sk_buff, sk)); 4260 *insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->dst_reg, 4261 bpf_target_off(struct sock_common, 4262 skc_dport, 4263 2, target_size)); 4264 #ifndef __BIG_ENDIAN_BITFIELD 4265 *insn++ = BPF_ALU32_IMM(BPF_LSH, si->dst_reg, 16); 4266 #endif 4267 break; 4268 4269 case offsetof(struct __sk_buff, local_port): 4270 BUILD_BUG_ON(FIELD_SIZEOF(struct sock_common, skc_num) != 2); 4271 4272 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, sk), 4273 si->dst_reg, si->src_reg, 4274 offsetof(struct sk_buff, sk)); 4275 *insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->dst_reg, 4276 bpf_target_off(struct sock_common, 4277 skc_num, 2, target_size)); 4278 break; 4279 } 4280 4281 return insn - insn_buf; 4282 } 4283 4284 static u32 sock_filter_convert_ctx_access(enum bpf_access_type type, 4285 const struct bpf_insn *si, 4286 struct bpf_insn *insn_buf, 4287 struct bpf_prog *prog, u32 *target_size) 4288 { 4289 struct bpf_insn *insn = insn_buf; 4290 4291 switch (si->off) { 4292 case offsetof(struct bpf_sock, bound_dev_if): 4293 BUILD_BUG_ON(FIELD_SIZEOF(struct sock, sk_bound_dev_if) != 4); 4294 4295 if (type == BPF_WRITE) 4296 *insn++ = BPF_STX_MEM(BPF_W, si->dst_reg, si->src_reg, 4297 offsetof(struct sock, sk_bound_dev_if)); 4298 else 4299 *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg, 4300 offsetof(struct sock, sk_bound_dev_if)); 4301 break; 4302 4303 case offsetof(struct bpf_sock, mark): 4304 BUILD_BUG_ON(FIELD_SIZEOF(struct sock, sk_mark) != 4); 4305 4306 if (type == BPF_WRITE) 4307 *insn++ = BPF_STX_MEM(BPF_W, si->dst_reg, si->src_reg, 4308 offsetof(struct sock, sk_mark)); 4309 else 4310 *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg, 4311 offsetof(struct sock, sk_mark)); 4312 break; 4313 4314 case offsetof(struct bpf_sock, priority): 4315 BUILD_BUG_ON(FIELD_SIZEOF(struct sock, sk_priority) != 4); 4316 4317 if (type == BPF_WRITE) 4318 *insn++ = BPF_STX_MEM(BPF_W, si->dst_reg, si->src_reg, 4319 offsetof(struct sock, sk_priority)); 4320 else 4321 *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg, 4322 offsetof(struct sock, sk_priority)); 4323 break; 4324 4325 case offsetof(struct bpf_sock, family): 4326 BUILD_BUG_ON(FIELD_SIZEOF(struct sock, sk_family) != 2); 4327 4328 *insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->src_reg, 4329 offsetof(struct sock, sk_family)); 4330 break; 4331 4332 case offsetof(struct bpf_sock, type): 4333 *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg, 4334 offsetof(struct sock, __sk_flags_offset)); 4335 *insn++ = BPF_ALU32_IMM(BPF_AND, si->dst_reg, SK_FL_TYPE_MASK); 4336 *insn++ = BPF_ALU32_IMM(BPF_RSH, si->dst_reg, SK_FL_TYPE_SHIFT); 4337 break; 4338 4339 case offsetof(struct bpf_sock, protocol): 4340 *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg, 4341 offsetof(struct sock, __sk_flags_offset)); 4342 *insn++ = BPF_ALU32_IMM(BPF_AND, si->dst_reg, SK_FL_PROTO_MASK); 4343 *insn++ = BPF_ALU32_IMM(BPF_RSH, si->dst_reg, SK_FL_PROTO_SHIFT); 4344 break; 4345 } 4346 4347 return insn - insn_buf; 4348 } 4349 4350 static u32 tc_cls_act_convert_ctx_access(enum bpf_access_type type, 4351 const struct bpf_insn *si, 4352 struct bpf_insn *insn_buf, 4353 struct bpf_prog *prog, u32 *target_size) 4354 { 4355 struct bpf_insn *insn = insn_buf; 4356 4357 switch (si->off) { 4358 case offsetof(struct __sk_buff, ifindex): 4359 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, dev), 4360 si->dst_reg, si->src_reg, 4361 offsetof(struct sk_buff, dev)); 4362 *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg, 4363 bpf_target_off(struct net_device, ifindex, 4, 4364 target_size)); 4365 break; 4366 default: 4367 return bpf_convert_ctx_access(type, si, insn_buf, prog, 4368 target_size); 4369 } 4370 4371 return insn - insn_buf; 4372 } 4373 4374 static u32 xdp_convert_ctx_access(enum bpf_access_type type, 4375 const struct bpf_insn *si, 4376 struct bpf_insn *insn_buf, 4377 struct bpf_prog *prog, u32 *target_size) 4378 { 4379 struct bpf_insn *insn = insn_buf; 4380 4381 switch (si->off) { 4382 case offsetof(struct xdp_md, data): 4383 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct xdp_buff, data), 4384 si->dst_reg, si->src_reg, 4385 offsetof(struct xdp_buff, data)); 4386 break; 4387 case offsetof(struct xdp_md, data_meta): 4388 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct xdp_buff, data_meta), 4389 si->dst_reg, si->src_reg, 4390 offsetof(struct xdp_buff, data_meta)); 4391 break; 4392 case offsetof(struct xdp_md, data_end): 4393 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct xdp_buff, data_end), 4394 si->dst_reg, si->src_reg, 4395 offsetof(struct xdp_buff, data_end)); 4396 break; 4397 case offsetof(struct xdp_md, ingress_ifindex): 4398 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct xdp_buff, rxq), 4399 si->dst_reg, si->src_reg, 4400 offsetof(struct xdp_buff, rxq)); 4401 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct xdp_rxq_info, dev), 4402 si->dst_reg, si->dst_reg, 4403 offsetof(struct xdp_rxq_info, dev)); 4404 *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg, 4405 offsetof(struct net_device, ifindex)); 4406 break; 4407 case offsetof(struct xdp_md, rx_queue_index): 4408 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct xdp_buff, rxq), 4409 si->dst_reg, si->src_reg, 4410 offsetof(struct xdp_buff, rxq)); 4411 *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg, 4412 offsetof(struct xdp_rxq_info, 4413 queue_index)); 4414 break; 4415 } 4416 4417 return insn - insn_buf; 4418 } 4419 4420 static u32 sock_ops_convert_ctx_access(enum bpf_access_type type, 4421 const struct bpf_insn *si, 4422 struct bpf_insn *insn_buf, 4423 struct bpf_prog *prog, 4424 u32 *target_size) 4425 { 4426 struct bpf_insn *insn = insn_buf; 4427 int off; 4428 4429 switch (si->off) { 4430 case offsetof(struct bpf_sock_ops, op) ... 4431 offsetof(struct bpf_sock_ops, replylong[3]): 4432 BUILD_BUG_ON(FIELD_SIZEOF(struct bpf_sock_ops, op) != 4433 FIELD_SIZEOF(struct bpf_sock_ops_kern, op)); 4434 BUILD_BUG_ON(FIELD_SIZEOF(struct bpf_sock_ops, reply) != 4435 FIELD_SIZEOF(struct bpf_sock_ops_kern, reply)); 4436 BUILD_BUG_ON(FIELD_SIZEOF(struct bpf_sock_ops, replylong) != 4437 FIELD_SIZEOF(struct bpf_sock_ops_kern, replylong)); 4438 off = si->off; 4439 off -= offsetof(struct bpf_sock_ops, op); 4440 off += offsetof(struct bpf_sock_ops_kern, op); 4441 if (type == BPF_WRITE) 4442 *insn++ = BPF_STX_MEM(BPF_W, si->dst_reg, si->src_reg, 4443 off); 4444 else 4445 *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg, 4446 off); 4447 break; 4448 4449 case offsetof(struct bpf_sock_ops, family): 4450 BUILD_BUG_ON(FIELD_SIZEOF(struct sock_common, skc_family) != 2); 4451 4452 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF( 4453 struct bpf_sock_ops_kern, sk), 4454 si->dst_reg, si->src_reg, 4455 offsetof(struct bpf_sock_ops_kern, sk)); 4456 *insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->dst_reg, 4457 offsetof(struct sock_common, skc_family)); 4458 break; 4459 4460 case offsetof(struct bpf_sock_ops, remote_ip4): 4461 BUILD_BUG_ON(FIELD_SIZEOF(struct sock_common, skc_daddr) != 4); 4462 4463 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF( 4464 struct bpf_sock_ops_kern, sk), 4465 si->dst_reg, si->src_reg, 4466 offsetof(struct bpf_sock_ops_kern, sk)); 4467 *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg, 4468 offsetof(struct sock_common, skc_daddr)); 4469 break; 4470 4471 case offsetof(struct bpf_sock_ops, local_ip4): 4472 BUILD_BUG_ON(FIELD_SIZEOF(struct sock_common, skc_rcv_saddr) != 4); 4473 4474 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF( 4475 struct bpf_sock_ops_kern, sk), 4476 si->dst_reg, si->src_reg, 4477 offsetof(struct bpf_sock_ops_kern, sk)); 4478 *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg, 4479 offsetof(struct sock_common, 4480 skc_rcv_saddr)); 4481 break; 4482 4483 case offsetof(struct bpf_sock_ops, remote_ip6[0]) ... 4484 offsetof(struct bpf_sock_ops, remote_ip6[3]): 4485 #if IS_ENABLED(CONFIG_IPV6) 4486 BUILD_BUG_ON(FIELD_SIZEOF(struct sock_common, 4487 skc_v6_daddr.s6_addr32[0]) != 4); 4488 4489 off = si->off; 4490 off -= offsetof(struct bpf_sock_ops, remote_ip6[0]); 4491 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF( 4492 struct bpf_sock_ops_kern, sk), 4493 si->dst_reg, si->src_reg, 4494 offsetof(struct bpf_sock_ops_kern, sk)); 4495 *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg, 4496 offsetof(struct sock_common, 4497 skc_v6_daddr.s6_addr32[0]) + 4498 off); 4499 #else 4500 *insn++ = BPF_MOV32_IMM(si->dst_reg, 0); 4501 #endif 4502 break; 4503 4504 case offsetof(struct bpf_sock_ops, local_ip6[0]) ... 4505 offsetof(struct bpf_sock_ops, local_ip6[3]): 4506 #if IS_ENABLED(CONFIG_IPV6) 4507 BUILD_BUG_ON(FIELD_SIZEOF(struct sock_common, 4508 skc_v6_rcv_saddr.s6_addr32[0]) != 4); 4509 4510 off = si->off; 4511 off -= offsetof(struct bpf_sock_ops, local_ip6[0]); 4512 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF( 4513 struct bpf_sock_ops_kern, sk), 4514 si->dst_reg, si->src_reg, 4515 offsetof(struct bpf_sock_ops_kern, sk)); 4516 *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg, 4517 offsetof(struct sock_common, 4518 skc_v6_rcv_saddr.s6_addr32[0]) + 4519 off); 4520 #else 4521 *insn++ = BPF_MOV32_IMM(si->dst_reg, 0); 4522 #endif 4523 break; 4524 4525 case offsetof(struct bpf_sock_ops, remote_port): 4526 BUILD_BUG_ON(FIELD_SIZEOF(struct sock_common, skc_dport) != 2); 4527 4528 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF( 4529 struct bpf_sock_ops_kern, sk), 4530 si->dst_reg, si->src_reg, 4531 offsetof(struct bpf_sock_ops_kern, sk)); 4532 *insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->dst_reg, 4533 offsetof(struct sock_common, skc_dport)); 4534 #ifndef __BIG_ENDIAN_BITFIELD 4535 *insn++ = BPF_ALU32_IMM(BPF_LSH, si->dst_reg, 16); 4536 #endif 4537 break; 4538 4539 case offsetof(struct bpf_sock_ops, local_port): 4540 BUILD_BUG_ON(FIELD_SIZEOF(struct sock_common, skc_num) != 2); 4541 4542 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF( 4543 struct bpf_sock_ops_kern, sk), 4544 si->dst_reg, si->src_reg, 4545 offsetof(struct bpf_sock_ops_kern, sk)); 4546 *insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->dst_reg, 4547 offsetof(struct sock_common, skc_num)); 4548 break; 4549 4550 case offsetof(struct bpf_sock_ops, is_fullsock): 4551 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF( 4552 struct bpf_sock_ops_kern, 4553 is_fullsock), 4554 si->dst_reg, si->src_reg, 4555 offsetof(struct bpf_sock_ops_kern, 4556 is_fullsock)); 4557 break; 4558 4559 case offsetof(struct bpf_sock_ops, state): 4560 BUILD_BUG_ON(FIELD_SIZEOF(struct sock_common, skc_state) != 1); 4561 4562 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF( 4563 struct bpf_sock_ops_kern, sk), 4564 si->dst_reg, si->src_reg, 4565 offsetof(struct bpf_sock_ops_kern, sk)); 4566 *insn++ = BPF_LDX_MEM(BPF_B, si->dst_reg, si->dst_reg, 4567 offsetof(struct sock_common, skc_state)); 4568 break; 4569 4570 case offsetof(struct bpf_sock_ops, rtt_min): 4571 BUILD_BUG_ON(FIELD_SIZEOF(struct tcp_sock, rtt_min) != 4572 sizeof(struct minmax)); 4573 BUILD_BUG_ON(sizeof(struct minmax) < 4574 sizeof(struct minmax_sample)); 4575 4576 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF( 4577 struct bpf_sock_ops_kern, sk), 4578 si->dst_reg, si->src_reg, 4579 offsetof(struct bpf_sock_ops_kern, sk)); 4580 *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg, 4581 offsetof(struct tcp_sock, rtt_min) + 4582 FIELD_SIZEOF(struct minmax_sample, t)); 4583 break; 4584 4585 /* Helper macro for adding read access to tcp_sock or sock fields. */ 4586 #define SOCK_OPS_GET_FIELD(BPF_FIELD, OBJ_FIELD, OBJ) \ 4587 do { \ 4588 BUILD_BUG_ON(FIELD_SIZEOF(OBJ, OBJ_FIELD) > \ 4589 FIELD_SIZEOF(struct bpf_sock_ops, BPF_FIELD)); \ 4590 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF( \ 4591 struct bpf_sock_ops_kern, \ 4592 is_fullsock), \ 4593 si->dst_reg, si->src_reg, \ 4594 offsetof(struct bpf_sock_ops_kern, \ 4595 is_fullsock)); \ 4596 *insn++ = BPF_JMP_IMM(BPF_JEQ, si->dst_reg, 0, 2); \ 4597 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF( \ 4598 struct bpf_sock_ops_kern, sk),\ 4599 si->dst_reg, si->src_reg, \ 4600 offsetof(struct bpf_sock_ops_kern, sk));\ 4601 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(OBJ, \ 4602 OBJ_FIELD), \ 4603 si->dst_reg, si->dst_reg, \ 4604 offsetof(OBJ, OBJ_FIELD)); \ 4605 } while (0) 4606 4607 /* Helper macro for adding write access to tcp_sock or sock fields. 4608 * The macro is called with two registers, dst_reg which contains a pointer 4609 * to ctx (context) and src_reg which contains the value that should be 4610 * stored. However, we need an additional register since we cannot overwrite 4611 * dst_reg because it may be used later in the program. 4612 * Instead we "borrow" one of the other register. We first save its value 4613 * into a new (temp) field in bpf_sock_ops_kern, use it, and then restore 4614 * it at the end of the macro. 4615 */ 4616 #define SOCK_OPS_SET_FIELD(BPF_FIELD, OBJ_FIELD, OBJ) \ 4617 do { \ 4618 int reg = BPF_REG_9; \ 4619 BUILD_BUG_ON(FIELD_SIZEOF(OBJ, OBJ_FIELD) > \ 4620 FIELD_SIZEOF(struct bpf_sock_ops, BPF_FIELD)); \ 4621 if (si->dst_reg == reg || si->src_reg == reg) \ 4622 reg--; \ 4623 if (si->dst_reg == reg || si->src_reg == reg) \ 4624 reg--; \ 4625 *insn++ = BPF_STX_MEM(BPF_DW, si->dst_reg, reg, \ 4626 offsetof(struct bpf_sock_ops_kern, \ 4627 temp)); \ 4628 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF( \ 4629 struct bpf_sock_ops_kern, \ 4630 is_fullsock), \ 4631 reg, si->dst_reg, \ 4632 offsetof(struct bpf_sock_ops_kern, \ 4633 is_fullsock)); \ 4634 *insn++ = BPF_JMP_IMM(BPF_JEQ, reg, 0, 2); \ 4635 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF( \ 4636 struct bpf_sock_ops_kern, sk),\ 4637 reg, si->dst_reg, \ 4638 offsetof(struct bpf_sock_ops_kern, sk));\ 4639 *insn++ = BPF_STX_MEM(BPF_FIELD_SIZEOF(OBJ, OBJ_FIELD), \ 4640 reg, si->src_reg, \ 4641 offsetof(OBJ, OBJ_FIELD)); \ 4642 *insn++ = BPF_LDX_MEM(BPF_DW, reg, si->dst_reg, \ 4643 offsetof(struct bpf_sock_ops_kern, \ 4644 temp)); \ 4645 } while (0) 4646 4647 #define SOCK_OPS_GET_OR_SET_FIELD(BPF_FIELD, OBJ_FIELD, OBJ, TYPE) \ 4648 do { \ 4649 if (TYPE == BPF_WRITE) \ 4650 SOCK_OPS_SET_FIELD(BPF_FIELD, OBJ_FIELD, OBJ); \ 4651 else \ 4652 SOCK_OPS_GET_FIELD(BPF_FIELD, OBJ_FIELD, OBJ); \ 4653 } while (0) 4654 4655 case offsetof(struct bpf_sock_ops, snd_cwnd): 4656 SOCK_OPS_GET_FIELD(snd_cwnd, snd_cwnd, struct tcp_sock); 4657 break; 4658 4659 case offsetof(struct bpf_sock_ops, srtt_us): 4660 SOCK_OPS_GET_FIELD(srtt_us, srtt_us, struct tcp_sock); 4661 break; 4662 4663 case offsetof(struct bpf_sock_ops, bpf_sock_ops_cb_flags): 4664 SOCK_OPS_GET_FIELD(bpf_sock_ops_cb_flags, bpf_sock_ops_cb_flags, 4665 struct tcp_sock); 4666 break; 4667 4668 case offsetof(struct bpf_sock_ops, snd_ssthresh): 4669 SOCK_OPS_GET_FIELD(snd_ssthresh, snd_ssthresh, struct tcp_sock); 4670 break; 4671 4672 case offsetof(struct bpf_sock_ops, rcv_nxt): 4673 SOCK_OPS_GET_FIELD(rcv_nxt, rcv_nxt, struct tcp_sock); 4674 break; 4675 4676 case offsetof(struct bpf_sock_ops, snd_nxt): 4677 SOCK_OPS_GET_FIELD(snd_nxt, snd_nxt, struct tcp_sock); 4678 break; 4679 4680 case offsetof(struct bpf_sock_ops, snd_una): 4681 SOCK_OPS_GET_FIELD(snd_una, snd_una, struct tcp_sock); 4682 break; 4683 4684 case offsetof(struct bpf_sock_ops, mss_cache): 4685 SOCK_OPS_GET_FIELD(mss_cache, mss_cache, struct tcp_sock); 4686 break; 4687 4688 case offsetof(struct bpf_sock_ops, ecn_flags): 4689 SOCK_OPS_GET_FIELD(ecn_flags, ecn_flags, struct tcp_sock); 4690 break; 4691 4692 case offsetof(struct bpf_sock_ops, rate_delivered): 4693 SOCK_OPS_GET_FIELD(rate_delivered, rate_delivered, 4694 struct tcp_sock); 4695 break; 4696 4697 case offsetof(struct bpf_sock_ops, rate_interval_us): 4698 SOCK_OPS_GET_FIELD(rate_interval_us, rate_interval_us, 4699 struct tcp_sock); 4700 break; 4701 4702 case offsetof(struct bpf_sock_ops, packets_out): 4703 SOCK_OPS_GET_FIELD(packets_out, packets_out, struct tcp_sock); 4704 break; 4705 4706 case offsetof(struct bpf_sock_ops, retrans_out): 4707 SOCK_OPS_GET_FIELD(retrans_out, retrans_out, struct tcp_sock); 4708 break; 4709 4710 case offsetof(struct bpf_sock_ops, total_retrans): 4711 SOCK_OPS_GET_FIELD(total_retrans, total_retrans, 4712 struct tcp_sock); 4713 break; 4714 4715 case offsetof(struct bpf_sock_ops, segs_in): 4716 SOCK_OPS_GET_FIELD(segs_in, segs_in, struct tcp_sock); 4717 break; 4718 4719 case offsetof(struct bpf_sock_ops, data_segs_in): 4720 SOCK_OPS_GET_FIELD(data_segs_in, data_segs_in, struct tcp_sock); 4721 break; 4722 4723 case offsetof(struct bpf_sock_ops, segs_out): 4724 SOCK_OPS_GET_FIELD(segs_out, segs_out, struct tcp_sock); 4725 break; 4726 4727 case offsetof(struct bpf_sock_ops, data_segs_out): 4728 SOCK_OPS_GET_FIELD(data_segs_out, data_segs_out, 4729 struct tcp_sock); 4730 break; 4731 4732 case offsetof(struct bpf_sock_ops, lost_out): 4733 SOCK_OPS_GET_FIELD(lost_out, lost_out, struct tcp_sock); 4734 break; 4735 4736 case offsetof(struct bpf_sock_ops, sacked_out): 4737 SOCK_OPS_GET_FIELD(sacked_out, sacked_out, struct tcp_sock); 4738 break; 4739 4740 case offsetof(struct bpf_sock_ops, sk_txhash): 4741 SOCK_OPS_GET_OR_SET_FIELD(sk_txhash, sk_txhash, 4742 struct sock, type); 4743 break; 4744 4745 case offsetof(struct bpf_sock_ops, bytes_received): 4746 SOCK_OPS_GET_FIELD(bytes_received, bytes_received, 4747 struct tcp_sock); 4748 break; 4749 4750 case offsetof(struct bpf_sock_ops, bytes_acked): 4751 SOCK_OPS_GET_FIELD(bytes_acked, bytes_acked, struct tcp_sock); 4752 break; 4753 4754 } 4755 return insn - insn_buf; 4756 } 4757 4758 static u32 sk_skb_convert_ctx_access(enum bpf_access_type type, 4759 const struct bpf_insn *si, 4760 struct bpf_insn *insn_buf, 4761 struct bpf_prog *prog, u32 *target_size) 4762 { 4763 struct bpf_insn *insn = insn_buf; 4764 int off; 4765 4766 switch (si->off) { 4767 case offsetof(struct __sk_buff, data_end): 4768 off = si->off; 4769 off -= offsetof(struct __sk_buff, data_end); 4770 off += offsetof(struct sk_buff, cb); 4771 off += offsetof(struct tcp_skb_cb, bpf.data_end); 4772 *insn++ = BPF_LDX_MEM(BPF_SIZEOF(void *), si->dst_reg, 4773 si->src_reg, off); 4774 break; 4775 default: 4776 return bpf_convert_ctx_access(type, si, insn_buf, prog, 4777 target_size); 4778 } 4779 4780 return insn - insn_buf; 4781 } 4782 4783 const struct bpf_verifier_ops sk_filter_verifier_ops = { 4784 .get_func_proto = sk_filter_func_proto, 4785 .is_valid_access = sk_filter_is_valid_access, 4786 .convert_ctx_access = bpf_convert_ctx_access, 4787 }; 4788 4789 const struct bpf_prog_ops sk_filter_prog_ops = { 4790 .test_run = bpf_prog_test_run_skb, 4791 }; 4792 4793 const struct bpf_verifier_ops tc_cls_act_verifier_ops = { 4794 .get_func_proto = tc_cls_act_func_proto, 4795 .is_valid_access = tc_cls_act_is_valid_access, 4796 .convert_ctx_access = tc_cls_act_convert_ctx_access, 4797 .gen_prologue = tc_cls_act_prologue, 4798 }; 4799 4800 const struct bpf_prog_ops tc_cls_act_prog_ops = { 4801 .test_run = bpf_prog_test_run_skb, 4802 }; 4803 4804 const struct bpf_verifier_ops xdp_verifier_ops = { 4805 .get_func_proto = xdp_func_proto, 4806 .is_valid_access = xdp_is_valid_access, 4807 .convert_ctx_access = xdp_convert_ctx_access, 4808 }; 4809 4810 const struct bpf_prog_ops xdp_prog_ops = { 4811 .test_run = bpf_prog_test_run_xdp, 4812 }; 4813 4814 const struct bpf_verifier_ops cg_skb_verifier_ops = { 4815 .get_func_proto = sk_filter_func_proto, 4816 .is_valid_access = sk_filter_is_valid_access, 4817 .convert_ctx_access = bpf_convert_ctx_access, 4818 }; 4819 4820 const struct bpf_prog_ops cg_skb_prog_ops = { 4821 .test_run = bpf_prog_test_run_skb, 4822 }; 4823 4824 const struct bpf_verifier_ops lwt_inout_verifier_ops = { 4825 .get_func_proto = lwt_inout_func_proto, 4826 .is_valid_access = lwt_is_valid_access, 4827 .convert_ctx_access = bpf_convert_ctx_access, 4828 }; 4829 4830 const struct bpf_prog_ops lwt_inout_prog_ops = { 4831 .test_run = bpf_prog_test_run_skb, 4832 }; 4833 4834 const struct bpf_verifier_ops lwt_xmit_verifier_ops = { 4835 .get_func_proto = lwt_xmit_func_proto, 4836 .is_valid_access = lwt_is_valid_access, 4837 .convert_ctx_access = bpf_convert_ctx_access, 4838 .gen_prologue = tc_cls_act_prologue, 4839 }; 4840 4841 const struct bpf_prog_ops lwt_xmit_prog_ops = { 4842 .test_run = bpf_prog_test_run_skb, 4843 }; 4844 4845 const struct bpf_verifier_ops cg_sock_verifier_ops = { 4846 .get_func_proto = sock_filter_func_proto, 4847 .is_valid_access = sock_filter_is_valid_access, 4848 .convert_ctx_access = sock_filter_convert_ctx_access, 4849 }; 4850 4851 const struct bpf_prog_ops cg_sock_prog_ops = { 4852 }; 4853 4854 const struct bpf_verifier_ops sock_ops_verifier_ops = { 4855 .get_func_proto = sock_ops_func_proto, 4856 .is_valid_access = sock_ops_is_valid_access, 4857 .convert_ctx_access = sock_ops_convert_ctx_access, 4858 }; 4859 4860 const struct bpf_prog_ops sock_ops_prog_ops = { 4861 }; 4862 4863 const struct bpf_verifier_ops sk_skb_verifier_ops = { 4864 .get_func_proto = sk_skb_func_proto, 4865 .is_valid_access = sk_skb_is_valid_access, 4866 .convert_ctx_access = sk_skb_convert_ctx_access, 4867 .gen_prologue = sk_skb_prologue, 4868 }; 4869 4870 const struct bpf_prog_ops sk_skb_prog_ops = { 4871 }; 4872 4873 int sk_detach_filter(struct sock *sk) 4874 { 4875 int ret = -ENOENT; 4876 struct sk_filter *filter; 4877 4878 if (sock_flag(sk, SOCK_FILTER_LOCKED)) 4879 return -EPERM; 4880 4881 filter = rcu_dereference_protected(sk->sk_filter, 4882 lockdep_sock_is_held(sk)); 4883 if (filter) { 4884 RCU_INIT_POINTER(sk->sk_filter, NULL); 4885 sk_filter_uncharge(sk, filter); 4886 ret = 0; 4887 } 4888 4889 return ret; 4890 } 4891 EXPORT_SYMBOL_GPL(sk_detach_filter); 4892 4893 int sk_get_filter(struct sock *sk, struct sock_filter __user *ubuf, 4894 unsigned int len) 4895 { 4896 struct sock_fprog_kern *fprog; 4897 struct sk_filter *filter; 4898 int ret = 0; 4899 4900 lock_sock(sk); 4901 filter = rcu_dereference_protected(sk->sk_filter, 4902 lockdep_sock_is_held(sk)); 4903 if (!filter) 4904 goto out; 4905 4906 /* We're copying the filter that has been originally attached, 4907 * so no conversion/decode needed anymore. eBPF programs that 4908 * have no original program cannot be dumped through this. 4909 */ 4910 ret = -EACCES; 4911 fprog = filter->prog->orig_prog; 4912 if (!fprog) 4913 goto out; 4914 4915 ret = fprog->len; 4916 if (!len) 4917 /* User space only enquires number of filter blocks. */ 4918 goto out; 4919 4920 ret = -EINVAL; 4921 if (len < fprog->len) 4922 goto out; 4923 4924 ret = -EFAULT; 4925 if (copy_to_user(ubuf, fprog->filter, bpf_classic_proglen(fprog))) 4926 goto out; 4927 4928 /* Instead of bytes, the API requests to return the number 4929 * of filter blocks. 4930 */ 4931 ret = fprog->len; 4932 out: 4933 release_sock(sk); 4934 return ret; 4935 } 4936