1 // SPDX-License-Identifier: GPL-2.0-only 2 #include <linux/kernel.h> 3 #include <linux/skbuff.h> 4 #include <linux/export.h> 5 #include <linux/ip.h> 6 #include <linux/ipv6.h> 7 #include <linux/if_vlan.h> 8 #include <net/dsa.h> 9 #include <net/dst_metadata.h> 10 #include <net/ip.h> 11 #include <net/ipv6.h> 12 #include <net/gre.h> 13 #include <net/pptp.h> 14 #include <net/tipc.h> 15 #include <linux/igmp.h> 16 #include <linux/icmp.h> 17 #include <linux/sctp.h> 18 #include <linux/dccp.h> 19 #include <linux/if_tunnel.h> 20 #include <linux/if_pppox.h> 21 #include <linux/ppp_defs.h> 22 #include <linux/stddef.h> 23 #include <linux/if_ether.h> 24 #include <linux/mpls.h> 25 #include <linux/tcp.h> 26 #include <net/flow_dissector.h> 27 #include <scsi/fc/fc_fcoe.h> 28 #include <uapi/linux/batadv_packet.h> 29 #include <linux/bpf.h> 30 #if IS_ENABLED(CONFIG_NF_CONNTRACK) 31 #include <net/netfilter/nf_conntrack_core.h> 32 #include <net/netfilter/nf_conntrack_labels.h> 33 #endif 34 35 static DEFINE_MUTEX(flow_dissector_mutex); 36 37 static void dissector_set_key(struct flow_dissector *flow_dissector, 38 enum flow_dissector_key_id key_id) 39 { 40 flow_dissector->used_keys |= (1 << key_id); 41 } 42 43 void skb_flow_dissector_init(struct flow_dissector *flow_dissector, 44 const struct flow_dissector_key *key, 45 unsigned int key_count) 46 { 47 unsigned int i; 48 49 memset(flow_dissector, 0, sizeof(*flow_dissector)); 50 51 for (i = 0; i < key_count; i++, key++) { 52 /* User should make sure that every key target offset is withing 53 * boundaries of unsigned short. 54 */ 55 BUG_ON(key->offset > USHRT_MAX); 56 BUG_ON(dissector_uses_key(flow_dissector, 57 key->key_id)); 58 59 dissector_set_key(flow_dissector, key->key_id); 60 flow_dissector->offset[key->key_id] = key->offset; 61 } 62 63 /* Ensure that the dissector always includes control and basic key. 64 * That way we are able to avoid handling lack of these in fast path. 65 */ 66 BUG_ON(!dissector_uses_key(flow_dissector, 67 FLOW_DISSECTOR_KEY_CONTROL)); 68 BUG_ON(!dissector_uses_key(flow_dissector, 69 FLOW_DISSECTOR_KEY_BASIC)); 70 } 71 EXPORT_SYMBOL(skb_flow_dissector_init); 72 73 int skb_flow_dissector_prog_query(const union bpf_attr *attr, 74 union bpf_attr __user *uattr) 75 { 76 __u32 __user *prog_ids = u64_to_user_ptr(attr->query.prog_ids); 77 u32 prog_id, prog_cnt = 0, flags = 0; 78 struct bpf_prog *attached; 79 struct net *net; 80 81 if (attr->query.query_flags) 82 return -EINVAL; 83 84 net = get_net_ns_by_fd(attr->query.target_fd); 85 if (IS_ERR(net)) 86 return PTR_ERR(net); 87 88 rcu_read_lock(); 89 attached = rcu_dereference(net->flow_dissector_prog); 90 if (attached) { 91 prog_cnt = 1; 92 prog_id = attached->aux->id; 93 } 94 rcu_read_unlock(); 95 96 put_net(net); 97 98 if (copy_to_user(&uattr->query.attach_flags, &flags, sizeof(flags))) 99 return -EFAULT; 100 if (copy_to_user(&uattr->query.prog_cnt, &prog_cnt, sizeof(prog_cnt))) 101 return -EFAULT; 102 103 if (!attr->query.prog_cnt || !prog_ids || !prog_cnt) 104 return 0; 105 106 if (copy_to_user(prog_ids, &prog_id, sizeof(u32))) 107 return -EFAULT; 108 109 return 0; 110 } 111 112 int skb_flow_dissector_bpf_prog_attach(const union bpf_attr *attr, 113 struct bpf_prog *prog) 114 { 115 struct bpf_prog *attached; 116 struct net *net; 117 int ret = 0; 118 119 net = current->nsproxy->net_ns; 120 mutex_lock(&flow_dissector_mutex); 121 122 if (net == &init_net) { 123 /* BPF flow dissector in the root namespace overrides 124 * any per-net-namespace one. When attaching to root, 125 * make sure we don't have any BPF program attached 126 * to the non-root namespaces. 127 */ 128 struct net *ns; 129 130 for_each_net(ns) { 131 if (ns == &init_net) 132 continue; 133 if (rcu_access_pointer(ns->flow_dissector_prog)) { 134 ret = -EEXIST; 135 goto out; 136 } 137 } 138 } else { 139 /* Make sure root flow dissector is not attached 140 * when attaching to the non-root namespace. 141 */ 142 if (rcu_access_pointer(init_net.flow_dissector_prog)) { 143 ret = -EEXIST; 144 goto out; 145 } 146 } 147 148 attached = rcu_dereference_protected(net->flow_dissector_prog, 149 lockdep_is_held(&flow_dissector_mutex)); 150 if (attached == prog) { 151 /* The same program cannot be attached twice */ 152 ret = -EINVAL; 153 goto out; 154 } 155 rcu_assign_pointer(net->flow_dissector_prog, prog); 156 if (attached) 157 bpf_prog_put(attached); 158 out: 159 mutex_unlock(&flow_dissector_mutex); 160 return ret; 161 } 162 163 int skb_flow_dissector_bpf_prog_detach(const union bpf_attr *attr) 164 { 165 struct bpf_prog *attached; 166 struct net *net; 167 168 net = current->nsproxy->net_ns; 169 mutex_lock(&flow_dissector_mutex); 170 attached = rcu_dereference_protected(net->flow_dissector_prog, 171 lockdep_is_held(&flow_dissector_mutex)); 172 if (!attached) { 173 mutex_unlock(&flow_dissector_mutex); 174 return -ENOENT; 175 } 176 RCU_INIT_POINTER(net->flow_dissector_prog, NULL); 177 bpf_prog_put(attached); 178 mutex_unlock(&flow_dissector_mutex); 179 return 0; 180 } 181 182 /** 183 * __skb_flow_get_ports - extract the upper layer ports and return them 184 * @skb: sk_buff to extract the ports from 185 * @thoff: transport header offset 186 * @ip_proto: protocol for which to get port offset 187 * @data: raw buffer pointer to the packet, if NULL use skb->data 188 * @hlen: packet header length, if @data is NULL use skb_headlen(skb) 189 * 190 * The function will try to retrieve the ports at offset thoff + poff where poff 191 * is the protocol port offset returned from proto_ports_offset 192 */ 193 __be32 __skb_flow_get_ports(const struct sk_buff *skb, int thoff, u8 ip_proto, 194 void *data, int hlen) 195 { 196 int poff = proto_ports_offset(ip_proto); 197 198 if (!data) { 199 data = skb->data; 200 hlen = skb_headlen(skb); 201 } 202 203 if (poff >= 0) { 204 __be32 *ports, _ports; 205 206 ports = __skb_header_pointer(skb, thoff + poff, 207 sizeof(_ports), data, hlen, &_ports); 208 if (ports) 209 return *ports; 210 } 211 212 return 0; 213 } 214 EXPORT_SYMBOL(__skb_flow_get_ports); 215 216 static bool icmp_has_id(u8 type) 217 { 218 switch (type) { 219 case ICMP_ECHO: 220 case ICMP_ECHOREPLY: 221 case ICMP_TIMESTAMP: 222 case ICMP_TIMESTAMPREPLY: 223 case ICMPV6_ECHO_REQUEST: 224 case ICMPV6_ECHO_REPLY: 225 return true; 226 } 227 228 return false; 229 } 230 231 /** 232 * skb_flow_get_icmp_tci - extract ICMP(6) Type, Code and Identifier fields 233 * @skb: sk_buff to extract from 234 * @key_icmp: struct flow_dissector_key_icmp to fill 235 * @data: raw buffer pointer to the packet 236 * @toff: offset to extract at 237 * @hlen: packet header length 238 */ 239 void skb_flow_get_icmp_tci(const struct sk_buff *skb, 240 struct flow_dissector_key_icmp *key_icmp, 241 void *data, int thoff, int hlen) 242 { 243 struct icmphdr *ih, _ih; 244 245 ih = __skb_header_pointer(skb, thoff, sizeof(_ih), data, hlen, &_ih); 246 if (!ih) 247 return; 248 249 key_icmp->type = ih->type; 250 key_icmp->code = ih->code; 251 252 /* As we use 0 to signal that the Id field is not present, 253 * avoid confusion with packets without such field 254 */ 255 if (icmp_has_id(ih->type)) 256 key_icmp->id = ih->un.echo.id ? : 1; 257 else 258 key_icmp->id = 0; 259 } 260 EXPORT_SYMBOL(skb_flow_get_icmp_tci); 261 262 /* If FLOW_DISSECTOR_KEY_ICMP is set, dissect an ICMP packet 263 * using skb_flow_get_icmp_tci(). 264 */ 265 static void __skb_flow_dissect_icmp(const struct sk_buff *skb, 266 struct flow_dissector *flow_dissector, 267 void *target_container, 268 void *data, int thoff, int hlen) 269 { 270 struct flow_dissector_key_icmp *key_icmp; 271 272 if (!dissector_uses_key(flow_dissector, FLOW_DISSECTOR_KEY_ICMP)) 273 return; 274 275 key_icmp = skb_flow_dissector_target(flow_dissector, 276 FLOW_DISSECTOR_KEY_ICMP, 277 target_container); 278 279 skb_flow_get_icmp_tci(skb, key_icmp, data, thoff, hlen); 280 } 281 282 void skb_flow_dissect_meta(const struct sk_buff *skb, 283 struct flow_dissector *flow_dissector, 284 void *target_container) 285 { 286 struct flow_dissector_key_meta *meta; 287 288 if (!dissector_uses_key(flow_dissector, FLOW_DISSECTOR_KEY_META)) 289 return; 290 291 meta = skb_flow_dissector_target(flow_dissector, 292 FLOW_DISSECTOR_KEY_META, 293 target_container); 294 meta->ingress_ifindex = skb->skb_iif; 295 } 296 EXPORT_SYMBOL(skb_flow_dissect_meta); 297 298 static void 299 skb_flow_dissect_set_enc_addr_type(enum flow_dissector_key_id type, 300 struct flow_dissector *flow_dissector, 301 void *target_container) 302 { 303 struct flow_dissector_key_control *ctrl; 304 305 if (!dissector_uses_key(flow_dissector, FLOW_DISSECTOR_KEY_ENC_CONTROL)) 306 return; 307 308 ctrl = skb_flow_dissector_target(flow_dissector, 309 FLOW_DISSECTOR_KEY_ENC_CONTROL, 310 target_container); 311 ctrl->addr_type = type; 312 } 313 314 void 315 skb_flow_dissect_ct(const struct sk_buff *skb, 316 struct flow_dissector *flow_dissector, 317 void *target_container, 318 u16 *ctinfo_map, 319 size_t mapsize) 320 { 321 #if IS_ENABLED(CONFIG_NF_CONNTRACK) 322 struct flow_dissector_key_ct *key; 323 enum ip_conntrack_info ctinfo; 324 struct nf_conn_labels *cl; 325 struct nf_conn *ct; 326 327 if (!dissector_uses_key(flow_dissector, FLOW_DISSECTOR_KEY_CT)) 328 return; 329 330 ct = nf_ct_get(skb, &ctinfo); 331 if (!ct) 332 return; 333 334 key = skb_flow_dissector_target(flow_dissector, 335 FLOW_DISSECTOR_KEY_CT, 336 target_container); 337 338 if (ctinfo < mapsize) 339 key->ct_state = ctinfo_map[ctinfo]; 340 #if IS_ENABLED(CONFIG_NF_CONNTRACK_ZONES) 341 key->ct_zone = ct->zone.id; 342 #endif 343 #if IS_ENABLED(CONFIG_NF_CONNTRACK_MARK) 344 key->ct_mark = ct->mark; 345 #endif 346 347 cl = nf_ct_labels_find(ct); 348 if (cl) 349 memcpy(key->ct_labels, cl->bits, sizeof(key->ct_labels)); 350 #endif /* CONFIG_NF_CONNTRACK */ 351 } 352 EXPORT_SYMBOL(skb_flow_dissect_ct); 353 354 void 355 skb_flow_dissect_tunnel_info(const struct sk_buff *skb, 356 struct flow_dissector *flow_dissector, 357 void *target_container) 358 { 359 struct ip_tunnel_info *info; 360 struct ip_tunnel_key *key; 361 362 /* A quick check to see if there might be something to do. */ 363 if (!dissector_uses_key(flow_dissector, 364 FLOW_DISSECTOR_KEY_ENC_KEYID) && 365 !dissector_uses_key(flow_dissector, 366 FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS) && 367 !dissector_uses_key(flow_dissector, 368 FLOW_DISSECTOR_KEY_ENC_IPV6_ADDRS) && 369 !dissector_uses_key(flow_dissector, 370 FLOW_DISSECTOR_KEY_ENC_CONTROL) && 371 !dissector_uses_key(flow_dissector, 372 FLOW_DISSECTOR_KEY_ENC_PORTS) && 373 !dissector_uses_key(flow_dissector, 374 FLOW_DISSECTOR_KEY_ENC_IP) && 375 !dissector_uses_key(flow_dissector, 376 FLOW_DISSECTOR_KEY_ENC_OPTS)) 377 return; 378 379 info = skb_tunnel_info(skb); 380 if (!info) 381 return; 382 383 key = &info->key; 384 385 switch (ip_tunnel_info_af(info)) { 386 case AF_INET: 387 skb_flow_dissect_set_enc_addr_type(FLOW_DISSECTOR_KEY_IPV4_ADDRS, 388 flow_dissector, 389 target_container); 390 if (dissector_uses_key(flow_dissector, 391 FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS)) { 392 struct flow_dissector_key_ipv4_addrs *ipv4; 393 394 ipv4 = skb_flow_dissector_target(flow_dissector, 395 FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS, 396 target_container); 397 ipv4->src = key->u.ipv4.src; 398 ipv4->dst = key->u.ipv4.dst; 399 } 400 break; 401 case AF_INET6: 402 skb_flow_dissect_set_enc_addr_type(FLOW_DISSECTOR_KEY_IPV6_ADDRS, 403 flow_dissector, 404 target_container); 405 if (dissector_uses_key(flow_dissector, 406 FLOW_DISSECTOR_KEY_ENC_IPV6_ADDRS)) { 407 struct flow_dissector_key_ipv6_addrs *ipv6; 408 409 ipv6 = skb_flow_dissector_target(flow_dissector, 410 FLOW_DISSECTOR_KEY_ENC_IPV6_ADDRS, 411 target_container); 412 ipv6->src = key->u.ipv6.src; 413 ipv6->dst = key->u.ipv6.dst; 414 } 415 break; 416 } 417 418 if (dissector_uses_key(flow_dissector, FLOW_DISSECTOR_KEY_ENC_KEYID)) { 419 struct flow_dissector_key_keyid *keyid; 420 421 keyid = skb_flow_dissector_target(flow_dissector, 422 FLOW_DISSECTOR_KEY_ENC_KEYID, 423 target_container); 424 keyid->keyid = tunnel_id_to_key32(key->tun_id); 425 } 426 427 if (dissector_uses_key(flow_dissector, FLOW_DISSECTOR_KEY_ENC_PORTS)) { 428 struct flow_dissector_key_ports *tp; 429 430 tp = skb_flow_dissector_target(flow_dissector, 431 FLOW_DISSECTOR_KEY_ENC_PORTS, 432 target_container); 433 tp->src = key->tp_src; 434 tp->dst = key->tp_dst; 435 } 436 437 if (dissector_uses_key(flow_dissector, FLOW_DISSECTOR_KEY_ENC_IP)) { 438 struct flow_dissector_key_ip *ip; 439 440 ip = skb_flow_dissector_target(flow_dissector, 441 FLOW_DISSECTOR_KEY_ENC_IP, 442 target_container); 443 ip->tos = key->tos; 444 ip->ttl = key->ttl; 445 } 446 447 if (dissector_uses_key(flow_dissector, FLOW_DISSECTOR_KEY_ENC_OPTS)) { 448 struct flow_dissector_key_enc_opts *enc_opt; 449 450 enc_opt = skb_flow_dissector_target(flow_dissector, 451 FLOW_DISSECTOR_KEY_ENC_OPTS, 452 target_container); 453 454 if (info->options_len) { 455 enc_opt->len = info->options_len; 456 ip_tunnel_info_opts_get(enc_opt->data, info); 457 enc_opt->dst_opt_type = info->key.tun_flags & 458 TUNNEL_OPTIONS_PRESENT; 459 } 460 } 461 } 462 EXPORT_SYMBOL(skb_flow_dissect_tunnel_info); 463 464 static enum flow_dissect_ret 465 __skb_flow_dissect_mpls(const struct sk_buff *skb, 466 struct flow_dissector *flow_dissector, 467 void *target_container, void *data, int nhoff, int hlen) 468 { 469 struct flow_dissector_key_keyid *key_keyid; 470 struct mpls_label *hdr, _hdr[2]; 471 u32 entry, label; 472 473 if (!dissector_uses_key(flow_dissector, 474 FLOW_DISSECTOR_KEY_MPLS_ENTROPY) && 475 !dissector_uses_key(flow_dissector, FLOW_DISSECTOR_KEY_MPLS)) 476 return FLOW_DISSECT_RET_OUT_GOOD; 477 478 hdr = __skb_header_pointer(skb, nhoff, sizeof(_hdr), data, 479 hlen, &_hdr); 480 if (!hdr) 481 return FLOW_DISSECT_RET_OUT_BAD; 482 483 entry = ntohl(hdr[0].entry); 484 label = (entry & MPLS_LS_LABEL_MASK) >> MPLS_LS_LABEL_SHIFT; 485 486 if (dissector_uses_key(flow_dissector, FLOW_DISSECTOR_KEY_MPLS)) { 487 struct flow_dissector_key_mpls *key_mpls; 488 489 key_mpls = skb_flow_dissector_target(flow_dissector, 490 FLOW_DISSECTOR_KEY_MPLS, 491 target_container); 492 key_mpls->mpls_label = label; 493 key_mpls->mpls_ttl = (entry & MPLS_LS_TTL_MASK) 494 >> MPLS_LS_TTL_SHIFT; 495 key_mpls->mpls_tc = (entry & MPLS_LS_TC_MASK) 496 >> MPLS_LS_TC_SHIFT; 497 key_mpls->mpls_bos = (entry & MPLS_LS_S_MASK) 498 >> MPLS_LS_S_SHIFT; 499 } 500 501 if (label == MPLS_LABEL_ENTROPY) { 502 key_keyid = skb_flow_dissector_target(flow_dissector, 503 FLOW_DISSECTOR_KEY_MPLS_ENTROPY, 504 target_container); 505 key_keyid->keyid = hdr[1].entry & htonl(MPLS_LS_LABEL_MASK); 506 } 507 return FLOW_DISSECT_RET_OUT_GOOD; 508 } 509 510 static enum flow_dissect_ret 511 __skb_flow_dissect_arp(const struct sk_buff *skb, 512 struct flow_dissector *flow_dissector, 513 void *target_container, void *data, int nhoff, int hlen) 514 { 515 struct flow_dissector_key_arp *key_arp; 516 struct { 517 unsigned char ar_sha[ETH_ALEN]; 518 unsigned char ar_sip[4]; 519 unsigned char ar_tha[ETH_ALEN]; 520 unsigned char ar_tip[4]; 521 } *arp_eth, _arp_eth; 522 const struct arphdr *arp; 523 struct arphdr _arp; 524 525 if (!dissector_uses_key(flow_dissector, FLOW_DISSECTOR_KEY_ARP)) 526 return FLOW_DISSECT_RET_OUT_GOOD; 527 528 arp = __skb_header_pointer(skb, nhoff, sizeof(_arp), data, 529 hlen, &_arp); 530 if (!arp) 531 return FLOW_DISSECT_RET_OUT_BAD; 532 533 if (arp->ar_hrd != htons(ARPHRD_ETHER) || 534 arp->ar_pro != htons(ETH_P_IP) || 535 arp->ar_hln != ETH_ALEN || 536 arp->ar_pln != 4 || 537 (arp->ar_op != htons(ARPOP_REPLY) && 538 arp->ar_op != htons(ARPOP_REQUEST))) 539 return FLOW_DISSECT_RET_OUT_BAD; 540 541 arp_eth = __skb_header_pointer(skb, nhoff + sizeof(_arp), 542 sizeof(_arp_eth), data, 543 hlen, &_arp_eth); 544 if (!arp_eth) 545 return FLOW_DISSECT_RET_OUT_BAD; 546 547 key_arp = skb_flow_dissector_target(flow_dissector, 548 FLOW_DISSECTOR_KEY_ARP, 549 target_container); 550 551 memcpy(&key_arp->sip, arp_eth->ar_sip, sizeof(key_arp->sip)); 552 memcpy(&key_arp->tip, arp_eth->ar_tip, sizeof(key_arp->tip)); 553 554 /* Only store the lower byte of the opcode; 555 * this covers ARPOP_REPLY and ARPOP_REQUEST. 556 */ 557 key_arp->op = ntohs(arp->ar_op) & 0xff; 558 559 ether_addr_copy(key_arp->sha, arp_eth->ar_sha); 560 ether_addr_copy(key_arp->tha, arp_eth->ar_tha); 561 562 return FLOW_DISSECT_RET_OUT_GOOD; 563 } 564 565 static enum flow_dissect_ret 566 __skb_flow_dissect_gre(const struct sk_buff *skb, 567 struct flow_dissector_key_control *key_control, 568 struct flow_dissector *flow_dissector, 569 void *target_container, void *data, 570 __be16 *p_proto, int *p_nhoff, int *p_hlen, 571 unsigned int flags) 572 { 573 struct flow_dissector_key_keyid *key_keyid; 574 struct gre_base_hdr *hdr, _hdr; 575 int offset = 0; 576 u16 gre_ver; 577 578 hdr = __skb_header_pointer(skb, *p_nhoff, sizeof(_hdr), 579 data, *p_hlen, &_hdr); 580 if (!hdr) 581 return FLOW_DISSECT_RET_OUT_BAD; 582 583 /* Only look inside GRE without routing */ 584 if (hdr->flags & GRE_ROUTING) 585 return FLOW_DISSECT_RET_OUT_GOOD; 586 587 /* Only look inside GRE for version 0 and 1 */ 588 gre_ver = ntohs(hdr->flags & GRE_VERSION); 589 if (gre_ver > 1) 590 return FLOW_DISSECT_RET_OUT_GOOD; 591 592 *p_proto = hdr->protocol; 593 if (gre_ver) { 594 /* Version1 must be PPTP, and check the flags */ 595 if (!(*p_proto == GRE_PROTO_PPP && (hdr->flags & GRE_KEY))) 596 return FLOW_DISSECT_RET_OUT_GOOD; 597 } 598 599 offset += sizeof(struct gre_base_hdr); 600 601 if (hdr->flags & GRE_CSUM) 602 offset += sizeof_field(struct gre_full_hdr, csum) + 603 sizeof_field(struct gre_full_hdr, reserved1); 604 605 if (hdr->flags & GRE_KEY) { 606 const __be32 *keyid; 607 __be32 _keyid; 608 609 keyid = __skb_header_pointer(skb, *p_nhoff + offset, 610 sizeof(_keyid), 611 data, *p_hlen, &_keyid); 612 if (!keyid) 613 return FLOW_DISSECT_RET_OUT_BAD; 614 615 if (dissector_uses_key(flow_dissector, 616 FLOW_DISSECTOR_KEY_GRE_KEYID)) { 617 key_keyid = skb_flow_dissector_target(flow_dissector, 618 FLOW_DISSECTOR_KEY_GRE_KEYID, 619 target_container); 620 if (gre_ver == 0) 621 key_keyid->keyid = *keyid; 622 else 623 key_keyid->keyid = *keyid & GRE_PPTP_KEY_MASK; 624 } 625 offset += sizeof_field(struct gre_full_hdr, key); 626 } 627 628 if (hdr->flags & GRE_SEQ) 629 offset += sizeof_field(struct pptp_gre_header, seq); 630 631 if (gre_ver == 0) { 632 if (*p_proto == htons(ETH_P_TEB)) { 633 const struct ethhdr *eth; 634 struct ethhdr _eth; 635 636 eth = __skb_header_pointer(skb, *p_nhoff + offset, 637 sizeof(_eth), 638 data, *p_hlen, &_eth); 639 if (!eth) 640 return FLOW_DISSECT_RET_OUT_BAD; 641 *p_proto = eth->h_proto; 642 offset += sizeof(*eth); 643 644 /* Cap headers that we access via pointers at the 645 * end of the Ethernet header as our maximum alignment 646 * at that point is only 2 bytes. 647 */ 648 if (NET_IP_ALIGN) 649 *p_hlen = *p_nhoff + offset; 650 } 651 } else { /* version 1, must be PPTP */ 652 u8 _ppp_hdr[PPP_HDRLEN]; 653 u8 *ppp_hdr; 654 655 if (hdr->flags & GRE_ACK) 656 offset += sizeof_field(struct pptp_gre_header, ack); 657 658 ppp_hdr = __skb_header_pointer(skb, *p_nhoff + offset, 659 sizeof(_ppp_hdr), 660 data, *p_hlen, _ppp_hdr); 661 if (!ppp_hdr) 662 return FLOW_DISSECT_RET_OUT_BAD; 663 664 switch (PPP_PROTOCOL(ppp_hdr)) { 665 case PPP_IP: 666 *p_proto = htons(ETH_P_IP); 667 break; 668 case PPP_IPV6: 669 *p_proto = htons(ETH_P_IPV6); 670 break; 671 default: 672 /* Could probably catch some more like MPLS */ 673 break; 674 } 675 676 offset += PPP_HDRLEN; 677 } 678 679 *p_nhoff += offset; 680 key_control->flags |= FLOW_DIS_ENCAPSULATION; 681 if (flags & FLOW_DISSECTOR_F_STOP_AT_ENCAP) 682 return FLOW_DISSECT_RET_OUT_GOOD; 683 684 return FLOW_DISSECT_RET_PROTO_AGAIN; 685 } 686 687 /** 688 * __skb_flow_dissect_batadv() - dissect batman-adv header 689 * @skb: sk_buff to with the batman-adv header 690 * @key_control: flow dissectors control key 691 * @data: raw buffer pointer to the packet, if NULL use skb->data 692 * @p_proto: pointer used to update the protocol to process next 693 * @p_nhoff: pointer used to update inner network header offset 694 * @hlen: packet header length 695 * @flags: any combination of FLOW_DISSECTOR_F_* 696 * 697 * ETH_P_BATMAN packets are tried to be dissected. Only 698 * &struct batadv_unicast packets are actually processed because they contain an 699 * inner ethernet header and are usually followed by actual network header. This 700 * allows the flow dissector to continue processing the packet. 701 * 702 * Return: FLOW_DISSECT_RET_PROTO_AGAIN when &struct batadv_unicast was found, 703 * FLOW_DISSECT_RET_OUT_GOOD when dissector should stop after encapsulation, 704 * otherwise FLOW_DISSECT_RET_OUT_BAD 705 */ 706 static enum flow_dissect_ret 707 __skb_flow_dissect_batadv(const struct sk_buff *skb, 708 struct flow_dissector_key_control *key_control, 709 void *data, __be16 *p_proto, int *p_nhoff, int hlen, 710 unsigned int flags) 711 { 712 struct { 713 struct batadv_unicast_packet batadv_unicast; 714 struct ethhdr eth; 715 } *hdr, _hdr; 716 717 hdr = __skb_header_pointer(skb, *p_nhoff, sizeof(_hdr), data, hlen, 718 &_hdr); 719 if (!hdr) 720 return FLOW_DISSECT_RET_OUT_BAD; 721 722 if (hdr->batadv_unicast.version != BATADV_COMPAT_VERSION) 723 return FLOW_DISSECT_RET_OUT_BAD; 724 725 if (hdr->batadv_unicast.packet_type != BATADV_UNICAST) 726 return FLOW_DISSECT_RET_OUT_BAD; 727 728 *p_proto = hdr->eth.h_proto; 729 *p_nhoff += sizeof(*hdr); 730 731 key_control->flags |= FLOW_DIS_ENCAPSULATION; 732 if (flags & FLOW_DISSECTOR_F_STOP_AT_ENCAP) 733 return FLOW_DISSECT_RET_OUT_GOOD; 734 735 return FLOW_DISSECT_RET_PROTO_AGAIN; 736 } 737 738 static void 739 __skb_flow_dissect_tcp(const struct sk_buff *skb, 740 struct flow_dissector *flow_dissector, 741 void *target_container, void *data, int thoff, int hlen) 742 { 743 struct flow_dissector_key_tcp *key_tcp; 744 struct tcphdr *th, _th; 745 746 if (!dissector_uses_key(flow_dissector, FLOW_DISSECTOR_KEY_TCP)) 747 return; 748 749 th = __skb_header_pointer(skb, thoff, sizeof(_th), data, hlen, &_th); 750 if (!th) 751 return; 752 753 if (unlikely(__tcp_hdrlen(th) < sizeof(_th))) 754 return; 755 756 key_tcp = skb_flow_dissector_target(flow_dissector, 757 FLOW_DISSECTOR_KEY_TCP, 758 target_container); 759 key_tcp->flags = (*(__be16 *) &tcp_flag_word(th) & htons(0x0FFF)); 760 } 761 762 static void 763 __skb_flow_dissect_ports(const struct sk_buff *skb, 764 struct flow_dissector *flow_dissector, 765 void *target_container, void *data, int nhoff, 766 u8 ip_proto, int hlen) 767 { 768 enum flow_dissector_key_id dissector_ports = FLOW_DISSECTOR_KEY_MAX; 769 struct flow_dissector_key_ports *key_ports; 770 771 if (dissector_uses_key(flow_dissector, FLOW_DISSECTOR_KEY_PORTS)) 772 dissector_ports = FLOW_DISSECTOR_KEY_PORTS; 773 else if (dissector_uses_key(flow_dissector, 774 FLOW_DISSECTOR_KEY_PORTS_RANGE)) 775 dissector_ports = FLOW_DISSECTOR_KEY_PORTS_RANGE; 776 777 if (dissector_ports == FLOW_DISSECTOR_KEY_MAX) 778 return; 779 780 key_ports = skb_flow_dissector_target(flow_dissector, 781 dissector_ports, 782 target_container); 783 key_ports->ports = __skb_flow_get_ports(skb, nhoff, ip_proto, 784 data, hlen); 785 } 786 787 static void 788 __skb_flow_dissect_ipv4(const struct sk_buff *skb, 789 struct flow_dissector *flow_dissector, 790 void *target_container, void *data, const struct iphdr *iph) 791 { 792 struct flow_dissector_key_ip *key_ip; 793 794 if (!dissector_uses_key(flow_dissector, FLOW_DISSECTOR_KEY_IP)) 795 return; 796 797 key_ip = skb_flow_dissector_target(flow_dissector, 798 FLOW_DISSECTOR_KEY_IP, 799 target_container); 800 key_ip->tos = iph->tos; 801 key_ip->ttl = iph->ttl; 802 } 803 804 static void 805 __skb_flow_dissect_ipv6(const struct sk_buff *skb, 806 struct flow_dissector *flow_dissector, 807 void *target_container, void *data, const struct ipv6hdr *iph) 808 { 809 struct flow_dissector_key_ip *key_ip; 810 811 if (!dissector_uses_key(flow_dissector, FLOW_DISSECTOR_KEY_IP)) 812 return; 813 814 key_ip = skb_flow_dissector_target(flow_dissector, 815 FLOW_DISSECTOR_KEY_IP, 816 target_container); 817 key_ip->tos = ipv6_get_dsfield(iph); 818 key_ip->ttl = iph->hop_limit; 819 } 820 821 /* Maximum number of protocol headers that can be parsed in 822 * __skb_flow_dissect 823 */ 824 #define MAX_FLOW_DISSECT_HDRS 15 825 826 static bool skb_flow_dissect_allowed(int *num_hdrs) 827 { 828 ++*num_hdrs; 829 830 return (*num_hdrs <= MAX_FLOW_DISSECT_HDRS); 831 } 832 833 static void __skb_flow_bpf_to_target(const struct bpf_flow_keys *flow_keys, 834 struct flow_dissector *flow_dissector, 835 void *target_container) 836 { 837 struct flow_dissector_key_control *key_control; 838 struct flow_dissector_key_basic *key_basic; 839 struct flow_dissector_key_addrs *key_addrs; 840 struct flow_dissector_key_ports *key_ports; 841 struct flow_dissector_key_tags *key_tags; 842 843 key_control = skb_flow_dissector_target(flow_dissector, 844 FLOW_DISSECTOR_KEY_CONTROL, 845 target_container); 846 key_control->thoff = flow_keys->thoff; 847 if (flow_keys->is_frag) 848 key_control->flags |= FLOW_DIS_IS_FRAGMENT; 849 if (flow_keys->is_first_frag) 850 key_control->flags |= FLOW_DIS_FIRST_FRAG; 851 if (flow_keys->is_encap) 852 key_control->flags |= FLOW_DIS_ENCAPSULATION; 853 854 key_basic = skb_flow_dissector_target(flow_dissector, 855 FLOW_DISSECTOR_KEY_BASIC, 856 target_container); 857 key_basic->n_proto = flow_keys->n_proto; 858 key_basic->ip_proto = flow_keys->ip_proto; 859 860 if (flow_keys->addr_proto == ETH_P_IP && 861 dissector_uses_key(flow_dissector, FLOW_DISSECTOR_KEY_IPV4_ADDRS)) { 862 key_addrs = skb_flow_dissector_target(flow_dissector, 863 FLOW_DISSECTOR_KEY_IPV4_ADDRS, 864 target_container); 865 key_addrs->v4addrs.src = flow_keys->ipv4_src; 866 key_addrs->v4addrs.dst = flow_keys->ipv4_dst; 867 key_control->addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS; 868 } else if (flow_keys->addr_proto == ETH_P_IPV6 && 869 dissector_uses_key(flow_dissector, 870 FLOW_DISSECTOR_KEY_IPV6_ADDRS)) { 871 key_addrs = skb_flow_dissector_target(flow_dissector, 872 FLOW_DISSECTOR_KEY_IPV6_ADDRS, 873 target_container); 874 memcpy(&key_addrs->v6addrs, &flow_keys->ipv6_src, 875 sizeof(key_addrs->v6addrs)); 876 key_control->addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS; 877 } 878 879 if (dissector_uses_key(flow_dissector, FLOW_DISSECTOR_KEY_PORTS)) { 880 key_ports = skb_flow_dissector_target(flow_dissector, 881 FLOW_DISSECTOR_KEY_PORTS, 882 target_container); 883 key_ports->src = flow_keys->sport; 884 key_ports->dst = flow_keys->dport; 885 } 886 887 if (dissector_uses_key(flow_dissector, 888 FLOW_DISSECTOR_KEY_FLOW_LABEL)) { 889 key_tags = skb_flow_dissector_target(flow_dissector, 890 FLOW_DISSECTOR_KEY_FLOW_LABEL, 891 target_container); 892 key_tags->flow_label = ntohl(flow_keys->flow_label); 893 } 894 } 895 896 bool bpf_flow_dissect(struct bpf_prog *prog, struct bpf_flow_dissector *ctx, 897 __be16 proto, int nhoff, int hlen, unsigned int flags) 898 { 899 struct bpf_flow_keys *flow_keys = ctx->flow_keys; 900 u32 result; 901 902 /* Pass parameters to the BPF program */ 903 memset(flow_keys, 0, sizeof(*flow_keys)); 904 flow_keys->n_proto = proto; 905 flow_keys->nhoff = nhoff; 906 flow_keys->thoff = flow_keys->nhoff; 907 908 BUILD_BUG_ON((int)BPF_FLOW_DISSECTOR_F_PARSE_1ST_FRAG != 909 (int)FLOW_DISSECTOR_F_PARSE_1ST_FRAG); 910 BUILD_BUG_ON((int)BPF_FLOW_DISSECTOR_F_STOP_AT_FLOW_LABEL != 911 (int)FLOW_DISSECTOR_F_STOP_AT_FLOW_LABEL); 912 BUILD_BUG_ON((int)BPF_FLOW_DISSECTOR_F_STOP_AT_ENCAP != 913 (int)FLOW_DISSECTOR_F_STOP_AT_ENCAP); 914 flow_keys->flags = flags; 915 916 preempt_disable(); 917 result = BPF_PROG_RUN(prog, ctx); 918 preempt_enable(); 919 920 flow_keys->nhoff = clamp_t(u16, flow_keys->nhoff, nhoff, hlen); 921 flow_keys->thoff = clamp_t(u16, flow_keys->thoff, 922 flow_keys->nhoff, hlen); 923 924 return result == BPF_OK; 925 } 926 927 /** 928 * __skb_flow_dissect - extract the flow_keys struct and return it 929 * @net: associated network namespace, derived from @skb if NULL 930 * @skb: sk_buff to extract the flow from, can be NULL if the rest are specified 931 * @flow_dissector: list of keys to dissect 932 * @target_container: target structure to put dissected values into 933 * @data: raw buffer pointer to the packet, if NULL use skb->data 934 * @proto: protocol for which to get the flow, if @data is NULL use skb->protocol 935 * @nhoff: network header offset, if @data is NULL use skb_network_offset(skb) 936 * @hlen: packet header length, if @data is NULL use skb_headlen(skb) 937 * @flags: flags that control the dissection process, e.g. 938 * FLOW_DISSECTOR_F_STOP_AT_ENCAP. 939 * 940 * The function will try to retrieve individual keys into target specified 941 * by flow_dissector from either the skbuff or a raw buffer specified by the 942 * rest parameters. 943 * 944 * Caller must take care of zeroing target container memory. 945 */ 946 bool __skb_flow_dissect(const struct net *net, 947 const struct sk_buff *skb, 948 struct flow_dissector *flow_dissector, 949 void *target_container, 950 void *data, __be16 proto, int nhoff, int hlen, 951 unsigned int flags) 952 { 953 struct flow_dissector_key_control *key_control; 954 struct flow_dissector_key_basic *key_basic; 955 struct flow_dissector_key_addrs *key_addrs; 956 struct flow_dissector_key_tags *key_tags; 957 struct flow_dissector_key_vlan *key_vlan; 958 struct bpf_prog *attached = NULL; 959 enum flow_dissect_ret fdret; 960 enum flow_dissector_key_id dissector_vlan = FLOW_DISSECTOR_KEY_MAX; 961 int num_hdrs = 0; 962 u8 ip_proto = 0; 963 bool ret; 964 965 if (!data) { 966 data = skb->data; 967 proto = skb_vlan_tag_present(skb) ? 968 skb->vlan_proto : skb->protocol; 969 nhoff = skb_network_offset(skb); 970 hlen = skb_headlen(skb); 971 #if IS_ENABLED(CONFIG_NET_DSA) 972 if (unlikely(skb->dev && netdev_uses_dsa(skb->dev) && 973 proto == htons(ETH_P_XDSA))) { 974 const struct dsa_device_ops *ops; 975 int offset = 0; 976 977 ops = skb->dev->dsa_ptr->tag_ops; 978 if (ops->flow_dissect && 979 !ops->flow_dissect(skb, &proto, &offset)) { 980 hlen -= offset; 981 nhoff += offset; 982 } 983 } 984 #endif 985 } 986 987 /* It is ensured by skb_flow_dissector_init() that control key will 988 * be always present. 989 */ 990 key_control = skb_flow_dissector_target(flow_dissector, 991 FLOW_DISSECTOR_KEY_CONTROL, 992 target_container); 993 994 /* It is ensured by skb_flow_dissector_init() that basic key will 995 * be always present. 996 */ 997 key_basic = skb_flow_dissector_target(flow_dissector, 998 FLOW_DISSECTOR_KEY_BASIC, 999 target_container); 1000 1001 if (skb) { 1002 if (!net) { 1003 if (skb->dev) 1004 net = dev_net(skb->dev); 1005 else if (skb->sk) 1006 net = sock_net(skb->sk); 1007 } 1008 } 1009 1010 WARN_ON_ONCE(!net); 1011 if (net) { 1012 rcu_read_lock(); 1013 attached = rcu_dereference(init_net.flow_dissector_prog); 1014 1015 if (!attached) 1016 attached = rcu_dereference(net->flow_dissector_prog); 1017 1018 if (attached) { 1019 struct bpf_flow_keys flow_keys; 1020 struct bpf_flow_dissector ctx = { 1021 .flow_keys = &flow_keys, 1022 .data = data, 1023 .data_end = data + hlen, 1024 }; 1025 __be16 n_proto = proto; 1026 1027 if (skb) { 1028 ctx.skb = skb; 1029 /* we can't use 'proto' in the skb case 1030 * because it might be set to skb->vlan_proto 1031 * which has been pulled from the data 1032 */ 1033 n_proto = skb->protocol; 1034 } 1035 1036 ret = bpf_flow_dissect(attached, &ctx, n_proto, nhoff, 1037 hlen, flags); 1038 __skb_flow_bpf_to_target(&flow_keys, flow_dissector, 1039 target_container); 1040 rcu_read_unlock(); 1041 return ret; 1042 } 1043 rcu_read_unlock(); 1044 } 1045 1046 if (dissector_uses_key(flow_dissector, 1047 FLOW_DISSECTOR_KEY_ETH_ADDRS)) { 1048 struct ethhdr *eth = eth_hdr(skb); 1049 struct flow_dissector_key_eth_addrs *key_eth_addrs; 1050 1051 key_eth_addrs = skb_flow_dissector_target(flow_dissector, 1052 FLOW_DISSECTOR_KEY_ETH_ADDRS, 1053 target_container); 1054 memcpy(key_eth_addrs, ð->h_dest, sizeof(*key_eth_addrs)); 1055 } 1056 1057 proto_again: 1058 fdret = FLOW_DISSECT_RET_CONTINUE; 1059 1060 switch (proto) { 1061 case htons(ETH_P_IP): { 1062 const struct iphdr *iph; 1063 struct iphdr _iph; 1064 1065 iph = __skb_header_pointer(skb, nhoff, sizeof(_iph), data, hlen, &_iph); 1066 if (!iph || iph->ihl < 5) { 1067 fdret = FLOW_DISSECT_RET_OUT_BAD; 1068 break; 1069 } 1070 1071 nhoff += iph->ihl * 4; 1072 1073 ip_proto = iph->protocol; 1074 1075 if (dissector_uses_key(flow_dissector, 1076 FLOW_DISSECTOR_KEY_IPV4_ADDRS)) { 1077 key_addrs = skb_flow_dissector_target(flow_dissector, 1078 FLOW_DISSECTOR_KEY_IPV4_ADDRS, 1079 target_container); 1080 1081 memcpy(&key_addrs->v4addrs, &iph->saddr, 1082 sizeof(key_addrs->v4addrs)); 1083 key_control->addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS; 1084 } 1085 1086 if (ip_is_fragment(iph)) { 1087 key_control->flags |= FLOW_DIS_IS_FRAGMENT; 1088 1089 if (iph->frag_off & htons(IP_OFFSET)) { 1090 fdret = FLOW_DISSECT_RET_OUT_GOOD; 1091 break; 1092 } else { 1093 key_control->flags |= FLOW_DIS_FIRST_FRAG; 1094 if (!(flags & 1095 FLOW_DISSECTOR_F_PARSE_1ST_FRAG)) { 1096 fdret = FLOW_DISSECT_RET_OUT_GOOD; 1097 break; 1098 } 1099 } 1100 } 1101 1102 __skb_flow_dissect_ipv4(skb, flow_dissector, 1103 target_container, data, iph); 1104 1105 break; 1106 } 1107 case htons(ETH_P_IPV6): { 1108 const struct ipv6hdr *iph; 1109 struct ipv6hdr _iph; 1110 1111 iph = __skb_header_pointer(skb, nhoff, sizeof(_iph), data, hlen, &_iph); 1112 if (!iph) { 1113 fdret = FLOW_DISSECT_RET_OUT_BAD; 1114 break; 1115 } 1116 1117 ip_proto = iph->nexthdr; 1118 nhoff += sizeof(struct ipv6hdr); 1119 1120 if (dissector_uses_key(flow_dissector, 1121 FLOW_DISSECTOR_KEY_IPV6_ADDRS)) { 1122 key_addrs = skb_flow_dissector_target(flow_dissector, 1123 FLOW_DISSECTOR_KEY_IPV6_ADDRS, 1124 target_container); 1125 1126 memcpy(&key_addrs->v6addrs, &iph->saddr, 1127 sizeof(key_addrs->v6addrs)); 1128 key_control->addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS; 1129 } 1130 1131 if ((dissector_uses_key(flow_dissector, 1132 FLOW_DISSECTOR_KEY_FLOW_LABEL) || 1133 (flags & FLOW_DISSECTOR_F_STOP_AT_FLOW_LABEL)) && 1134 ip6_flowlabel(iph)) { 1135 __be32 flow_label = ip6_flowlabel(iph); 1136 1137 if (dissector_uses_key(flow_dissector, 1138 FLOW_DISSECTOR_KEY_FLOW_LABEL)) { 1139 key_tags = skb_flow_dissector_target(flow_dissector, 1140 FLOW_DISSECTOR_KEY_FLOW_LABEL, 1141 target_container); 1142 key_tags->flow_label = ntohl(flow_label); 1143 } 1144 if (flags & FLOW_DISSECTOR_F_STOP_AT_FLOW_LABEL) { 1145 fdret = FLOW_DISSECT_RET_OUT_GOOD; 1146 break; 1147 } 1148 } 1149 1150 __skb_flow_dissect_ipv6(skb, flow_dissector, 1151 target_container, data, iph); 1152 1153 break; 1154 } 1155 case htons(ETH_P_8021AD): 1156 case htons(ETH_P_8021Q): { 1157 const struct vlan_hdr *vlan = NULL; 1158 struct vlan_hdr _vlan; 1159 __be16 saved_vlan_tpid = proto; 1160 1161 if (dissector_vlan == FLOW_DISSECTOR_KEY_MAX && 1162 skb && skb_vlan_tag_present(skb)) { 1163 proto = skb->protocol; 1164 } else { 1165 vlan = __skb_header_pointer(skb, nhoff, sizeof(_vlan), 1166 data, hlen, &_vlan); 1167 if (!vlan) { 1168 fdret = FLOW_DISSECT_RET_OUT_BAD; 1169 break; 1170 } 1171 1172 proto = vlan->h_vlan_encapsulated_proto; 1173 nhoff += sizeof(*vlan); 1174 } 1175 1176 if (dissector_vlan == FLOW_DISSECTOR_KEY_MAX) { 1177 dissector_vlan = FLOW_DISSECTOR_KEY_VLAN; 1178 } else if (dissector_vlan == FLOW_DISSECTOR_KEY_VLAN) { 1179 dissector_vlan = FLOW_DISSECTOR_KEY_CVLAN; 1180 } else { 1181 fdret = FLOW_DISSECT_RET_PROTO_AGAIN; 1182 break; 1183 } 1184 1185 if (dissector_uses_key(flow_dissector, dissector_vlan)) { 1186 key_vlan = skb_flow_dissector_target(flow_dissector, 1187 dissector_vlan, 1188 target_container); 1189 1190 if (!vlan) { 1191 key_vlan->vlan_id = skb_vlan_tag_get_id(skb); 1192 key_vlan->vlan_priority = skb_vlan_tag_get_prio(skb); 1193 } else { 1194 key_vlan->vlan_id = ntohs(vlan->h_vlan_TCI) & 1195 VLAN_VID_MASK; 1196 key_vlan->vlan_priority = 1197 (ntohs(vlan->h_vlan_TCI) & 1198 VLAN_PRIO_MASK) >> VLAN_PRIO_SHIFT; 1199 } 1200 key_vlan->vlan_tpid = saved_vlan_tpid; 1201 } 1202 1203 fdret = FLOW_DISSECT_RET_PROTO_AGAIN; 1204 break; 1205 } 1206 case htons(ETH_P_PPP_SES): { 1207 struct { 1208 struct pppoe_hdr hdr; 1209 __be16 proto; 1210 } *hdr, _hdr; 1211 hdr = __skb_header_pointer(skb, nhoff, sizeof(_hdr), data, hlen, &_hdr); 1212 if (!hdr) { 1213 fdret = FLOW_DISSECT_RET_OUT_BAD; 1214 break; 1215 } 1216 1217 proto = hdr->proto; 1218 nhoff += PPPOE_SES_HLEN; 1219 switch (proto) { 1220 case htons(PPP_IP): 1221 proto = htons(ETH_P_IP); 1222 fdret = FLOW_DISSECT_RET_PROTO_AGAIN; 1223 break; 1224 case htons(PPP_IPV6): 1225 proto = htons(ETH_P_IPV6); 1226 fdret = FLOW_DISSECT_RET_PROTO_AGAIN; 1227 break; 1228 default: 1229 fdret = FLOW_DISSECT_RET_OUT_BAD; 1230 break; 1231 } 1232 break; 1233 } 1234 case htons(ETH_P_TIPC): { 1235 struct tipc_basic_hdr *hdr, _hdr; 1236 1237 hdr = __skb_header_pointer(skb, nhoff, sizeof(_hdr), 1238 data, hlen, &_hdr); 1239 if (!hdr) { 1240 fdret = FLOW_DISSECT_RET_OUT_BAD; 1241 break; 1242 } 1243 1244 if (dissector_uses_key(flow_dissector, 1245 FLOW_DISSECTOR_KEY_TIPC)) { 1246 key_addrs = skb_flow_dissector_target(flow_dissector, 1247 FLOW_DISSECTOR_KEY_TIPC, 1248 target_container); 1249 key_addrs->tipckey.key = tipc_hdr_rps_key(hdr); 1250 key_control->addr_type = FLOW_DISSECTOR_KEY_TIPC; 1251 } 1252 fdret = FLOW_DISSECT_RET_OUT_GOOD; 1253 break; 1254 } 1255 1256 case htons(ETH_P_MPLS_UC): 1257 case htons(ETH_P_MPLS_MC): 1258 fdret = __skb_flow_dissect_mpls(skb, flow_dissector, 1259 target_container, data, 1260 nhoff, hlen); 1261 break; 1262 case htons(ETH_P_FCOE): 1263 if ((hlen - nhoff) < FCOE_HEADER_LEN) { 1264 fdret = FLOW_DISSECT_RET_OUT_BAD; 1265 break; 1266 } 1267 1268 nhoff += FCOE_HEADER_LEN; 1269 fdret = FLOW_DISSECT_RET_OUT_GOOD; 1270 break; 1271 1272 case htons(ETH_P_ARP): 1273 case htons(ETH_P_RARP): 1274 fdret = __skb_flow_dissect_arp(skb, flow_dissector, 1275 target_container, data, 1276 nhoff, hlen); 1277 break; 1278 1279 case htons(ETH_P_BATMAN): 1280 fdret = __skb_flow_dissect_batadv(skb, key_control, data, 1281 &proto, &nhoff, hlen, flags); 1282 break; 1283 1284 default: 1285 fdret = FLOW_DISSECT_RET_OUT_BAD; 1286 break; 1287 } 1288 1289 /* Process result of proto processing */ 1290 switch (fdret) { 1291 case FLOW_DISSECT_RET_OUT_GOOD: 1292 goto out_good; 1293 case FLOW_DISSECT_RET_PROTO_AGAIN: 1294 if (skb_flow_dissect_allowed(&num_hdrs)) 1295 goto proto_again; 1296 goto out_good; 1297 case FLOW_DISSECT_RET_CONTINUE: 1298 case FLOW_DISSECT_RET_IPPROTO_AGAIN: 1299 break; 1300 case FLOW_DISSECT_RET_OUT_BAD: 1301 default: 1302 goto out_bad; 1303 } 1304 1305 ip_proto_again: 1306 fdret = FLOW_DISSECT_RET_CONTINUE; 1307 1308 switch (ip_proto) { 1309 case IPPROTO_GRE: 1310 fdret = __skb_flow_dissect_gre(skb, key_control, flow_dissector, 1311 target_container, data, 1312 &proto, &nhoff, &hlen, flags); 1313 break; 1314 1315 case NEXTHDR_HOP: 1316 case NEXTHDR_ROUTING: 1317 case NEXTHDR_DEST: { 1318 u8 _opthdr[2], *opthdr; 1319 1320 if (proto != htons(ETH_P_IPV6)) 1321 break; 1322 1323 opthdr = __skb_header_pointer(skb, nhoff, sizeof(_opthdr), 1324 data, hlen, &_opthdr); 1325 if (!opthdr) { 1326 fdret = FLOW_DISSECT_RET_OUT_BAD; 1327 break; 1328 } 1329 1330 ip_proto = opthdr[0]; 1331 nhoff += (opthdr[1] + 1) << 3; 1332 1333 fdret = FLOW_DISSECT_RET_IPPROTO_AGAIN; 1334 break; 1335 } 1336 case NEXTHDR_FRAGMENT: { 1337 struct frag_hdr _fh, *fh; 1338 1339 if (proto != htons(ETH_P_IPV6)) 1340 break; 1341 1342 fh = __skb_header_pointer(skb, nhoff, sizeof(_fh), 1343 data, hlen, &_fh); 1344 1345 if (!fh) { 1346 fdret = FLOW_DISSECT_RET_OUT_BAD; 1347 break; 1348 } 1349 1350 key_control->flags |= FLOW_DIS_IS_FRAGMENT; 1351 1352 nhoff += sizeof(_fh); 1353 ip_proto = fh->nexthdr; 1354 1355 if (!(fh->frag_off & htons(IP6_OFFSET))) { 1356 key_control->flags |= FLOW_DIS_FIRST_FRAG; 1357 if (flags & FLOW_DISSECTOR_F_PARSE_1ST_FRAG) { 1358 fdret = FLOW_DISSECT_RET_IPPROTO_AGAIN; 1359 break; 1360 } 1361 } 1362 1363 fdret = FLOW_DISSECT_RET_OUT_GOOD; 1364 break; 1365 } 1366 case IPPROTO_IPIP: 1367 proto = htons(ETH_P_IP); 1368 1369 key_control->flags |= FLOW_DIS_ENCAPSULATION; 1370 if (flags & FLOW_DISSECTOR_F_STOP_AT_ENCAP) { 1371 fdret = FLOW_DISSECT_RET_OUT_GOOD; 1372 break; 1373 } 1374 1375 fdret = FLOW_DISSECT_RET_PROTO_AGAIN; 1376 break; 1377 1378 case IPPROTO_IPV6: 1379 proto = htons(ETH_P_IPV6); 1380 1381 key_control->flags |= FLOW_DIS_ENCAPSULATION; 1382 if (flags & FLOW_DISSECTOR_F_STOP_AT_ENCAP) { 1383 fdret = FLOW_DISSECT_RET_OUT_GOOD; 1384 break; 1385 } 1386 1387 fdret = FLOW_DISSECT_RET_PROTO_AGAIN; 1388 break; 1389 1390 1391 case IPPROTO_MPLS: 1392 proto = htons(ETH_P_MPLS_UC); 1393 fdret = FLOW_DISSECT_RET_PROTO_AGAIN; 1394 break; 1395 1396 case IPPROTO_TCP: 1397 __skb_flow_dissect_tcp(skb, flow_dissector, target_container, 1398 data, nhoff, hlen); 1399 break; 1400 1401 case IPPROTO_ICMP: 1402 case IPPROTO_ICMPV6: 1403 __skb_flow_dissect_icmp(skb, flow_dissector, target_container, 1404 data, nhoff, hlen); 1405 break; 1406 1407 default: 1408 break; 1409 } 1410 1411 if (!(key_control->flags & FLOW_DIS_IS_FRAGMENT)) 1412 __skb_flow_dissect_ports(skb, flow_dissector, target_container, 1413 data, nhoff, ip_proto, hlen); 1414 1415 /* Process result of IP proto processing */ 1416 switch (fdret) { 1417 case FLOW_DISSECT_RET_PROTO_AGAIN: 1418 if (skb_flow_dissect_allowed(&num_hdrs)) 1419 goto proto_again; 1420 break; 1421 case FLOW_DISSECT_RET_IPPROTO_AGAIN: 1422 if (skb_flow_dissect_allowed(&num_hdrs)) 1423 goto ip_proto_again; 1424 break; 1425 case FLOW_DISSECT_RET_OUT_GOOD: 1426 case FLOW_DISSECT_RET_CONTINUE: 1427 break; 1428 case FLOW_DISSECT_RET_OUT_BAD: 1429 default: 1430 goto out_bad; 1431 } 1432 1433 out_good: 1434 ret = true; 1435 1436 out: 1437 key_control->thoff = min_t(u16, nhoff, skb ? skb->len : hlen); 1438 key_basic->n_proto = proto; 1439 key_basic->ip_proto = ip_proto; 1440 1441 return ret; 1442 1443 out_bad: 1444 ret = false; 1445 goto out; 1446 } 1447 EXPORT_SYMBOL(__skb_flow_dissect); 1448 1449 static siphash_key_t hashrnd __read_mostly; 1450 static __always_inline void __flow_hash_secret_init(void) 1451 { 1452 net_get_random_once(&hashrnd, sizeof(hashrnd)); 1453 } 1454 1455 static const void *flow_keys_hash_start(const struct flow_keys *flow) 1456 { 1457 BUILD_BUG_ON(FLOW_KEYS_HASH_OFFSET % SIPHASH_ALIGNMENT); 1458 return &flow->FLOW_KEYS_HASH_START_FIELD; 1459 } 1460 1461 static inline size_t flow_keys_hash_length(const struct flow_keys *flow) 1462 { 1463 size_t diff = FLOW_KEYS_HASH_OFFSET + sizeof(flow->addrs); 1464 1465 BUILD_BUG_ON((sizeof(*flow) - FLOW_KEYS_HASH_OFFSET) % sizeof(u32)); 1466 1467 switch (flow->control.addr_type) { 1468 case FLOW_DISSECTOR_KEY_IPV4_ADDRS: 1469 diff -= sizeof(flow->addrs.v4addrs); 1470 break; 1471 case FLOW_DISSECTOR_KEY_IPV6_ADDRS: 1472 diff -= sizeof(flow->addrs.v6addrs); 1473 break; 1474 case FLOW_DISSECTOR_KEY_TIPC: 1475 diff -= sizeof(flow->addrs.tipckey); 1476 break; 1477 } 1478 return sizeof(*flow) - diff; 1479 } 1480 1481 __be32 flow_get_u32_src(const struct flow_keys *flow) 1482 { 1483 switch (flow->control.addr_type) { 1484 case FLOW_DISSECTOR_KEY_IPV4_ADDRS: 1485 return flow->addrs.v4addrs.src; 1486 case FLOW_DISSECTOR_KEY_IPV6_ADDRS: 1487 return (__force __be32)ipv6_addr_hash( 1488 &flow->addrs.v6addrs.src); 1489 case FLOW_DISSECTOR_KEY_TIPC: 1490 return flow->addrs.tipckey.key; 1491 default: 1492 return 0; 1493 } 1494 } 1495 EXPORT_SYMBOL(flow_get_u32_src); 1496 1497 __be32 flow_get_u32_dst(const struct flow_keys *flow) 1498 { 1499 switch (flow->control.addr_type) { 1500 case FLOW_DISSECTOR_KEY_IPV4_ADDRS: 1501 return flow->addrs.v4addrs.dst; 1502 case FLOW_DISSECTOR_KEY_IPV6_ADDRS: 1503 return (__force __be32)ipv6_addr_hash( 1504 &flow->addrs.v6addrs.dst); 1505 default: 1506 return 0; 1507 } 1508 } 1509 EXPORT_SYMBOL(flow_get_u32_dst); 1510 1511 /* Sort the source and destination IP (and the ports if the IP are the same), 1512 * to have consistent hash within the two directions 1513 */ 1514 static inline void __flow_hash_consistentify(struct flow_keys *keys) 1515 { 1516 int addr_diff, i; 1517 1518 switch (keys->control.addr_type) { 1519 case FLOW_DISSECTOR_KEY_IPV4_ADDRS: 1520 addr_diff = (__force u32)keys->addrs.v4addrs.dst - 1521 (__force u32)keys->addrs.v4addrs.src; 1522 if ((addr_diff < 0) || 1523 (addr_diff == 0 && 1524 ((__force u16)keys->ports.dst < 1525 (__force u16)keys->ports.src))) { 1526 swap(keys->addrs.v4addrs.src, keys->addrs.v4addrs.dst); 1527 swap(keys->ports.src, keys->ports.dst); 1528 } 1529 break; 1530 case FLOW_DISSECTOR_KEY_IPV6_ADDRS: 1531 addr_diff = memcmp(&keys->addrs.v6addrs.dst, 1532 &keys->addrs.v6addrs.src, 1533 sizeof(keys->addrs.v6addrs.dst)); 1534 if ((addr_diff < 0) || 1535 (addr_diff == 0 && 1536 ((__force u16)keys->ports.dst < 1537 (__force u16)keys->ports.src))) { 1538 for (i = 0; i < 4; i++) 1539 swap(keys->addrs.v6addrs.src.s6_addr32[i], 1540 keys->addrs.v6addrs.dst.s6_addr32[i]); 1541 swap(keys->ports.src, keys->ports.dst); 1542 } 1543 break; 1544 } 1545 } 1546 1547 static inline u32 __flow_hash_from_keys(struct flow_keys *keys, 1548 const siphash_key_t *keyval) 1549 { 1550 u32 hash; 1551 1552 __flow_hash_consistentify(keys); 1553 1554 hash = siphash(flow_keys_hash_start(keys), 1555 flow_keys_hash_length(keys), keyval); 1556 if (!hash) 1557 hash = 1; 1558 1559 return hash; 1560 } 1561 1562 u32 flow_hash_from_keys(struct flow_keys *keys) 1563 { 1564 __flow_hash_secret_init(); 1565 return __flow_hash_from_keys(keys, &hashrnd); 1566 } 1567 EXPORT_SYMBOL(flow_hash_from_keys); 1568 1569 static inline u32 ___skb_get_hash(const struct sk_buff *skb, 1570 struct flow_keys *keys, 1571 const siphash_key_t *keyval) 1572 { 1573 skb_flow_dissect_flow_keys(skb, keys, 1574 FLOW_DISSECTOR_F_STOP_AT_FLOW_LABEL); 1575 1576 return __flow_hash_from_keys(keys, keyval); 1577 } 1578 1579 struct _flow_keys_digest_data { 1580 __be16 n_proto; 1581 u8 ip_proto; 1582 u8 padding; 1583 __be32 ports; 1584 __be32 src; 1585 __be32 dst; 1586 }; 1587 1588 void make_flow_keys_digest(struct flow_keys_digest *digest, 1589 const struct flow_keys *flow) 1590 { 1591 struct _flow_keys_digest_data *data = 1592 (struct _flow_keys_digest_data *)digest; 1593 1594 BUILD_BUG_ON(sizeof(*data) > sizeof(*digest)); 1595 1596 memset(digest, 0, sizeof(*digest)); 1597 1598 data->n_proto = flow->basic.n_proto; 1599 data->ip_proto = flow->basic.ip_proto; 1600 data->ports = flow->ports.ports; 1601 data->src = flow->addrs.v4addrs.src; 1602 data->dst = flow->addrs.v4addrs.dst; 1603 } 1604 EXPORT_SYMBOL(make_flow_keys_digest); 1605 1606 static struct flow_dissector flow_keys_dissector_symmetric __read_mostly; 1607 1608 u32 __skb_get_hash_symmetric(const struct sk_buff *skb) 1609 { 1610 struct flow_keys keys; 1611 1612 __flow_hash_secret_init(); 1613 1614 memset(&keys, 0, sizeof(keys)); 1615 __skb_flow_dissect(NULL, skb, &flow_keys_dissector_symmetric, 1616 &keys, NULL, 0, 0, 0, 1617 FLOW_DISSECTOR_F_STOP_AT_FLOW_LABEL); 1618 1619 return __flow_hash_from_keys(&keys, &hashrnd); 1620 } 1621 EXPORT_SYMBOL_GPL(__skb_get_hash_symmetric); 1622 1623 /** 1624 * __skb_get_hash: calculate a flow hash 1625 * @skb: sk_buff to calculate flow hash from 1626 * 1627 * This function calculates a flow hash based on src/dst addresses 1628 * and src/dst port numbers. Sets hash in skb to non-zero hash value 1629 * on success, zero indicates no valid hash. Also, sets l4_hash in skb 1630 * if hash is a canonical 4-tuple hash over transport ports. 1631 */ 1632 void __skb_get_hash(struct sk_buff *skb) 1633 { 1634 struct flow_keys keys; 1635 u32 hash; 1636 1637 __flow_hash_secret_init(); 1638 1639 hash = ___skb_get_hash(skb, &keys, &hashrnd); 1640 1641 __skb_set_sw_hash(skb, hash, flow_keys_have_l4(&keys)); 1642 } 1643 EXPORT_SYMBOL(__skb_get_hash); 1644 1645 __u32 skb_get_hash_perturb(const struct sk_buff *skb, 1646 const siphash_key_t *perturb) 1647 { 1648 struct flow_keys keys; 1649 1650 return ___skb_get_hash(skb, &keys, perturb); 1651 } 1652 EXPORT_SYMBOL(skb_get_hash_perturb); 1653 1654 u32 __skb_get_poff(const struct sk_buff *skb, void *data, 1655 const struct flow_keys_basic *keys, int hlen) 1656 { 1657 u32 poff = keys->control.thoff; 1658 1659 /* skip L4 headers for fragments after the first */ 1660 if ((keys->control.flags & FLOW_DIS_IS_FRAGMENT) && 1661 !(keys->control.flags & FLOW_DIS_FIRST_FRAG)) 1662 return poff; 1663 1664 switch (keys->basic.ip_proto) { 1665 case IPPROTO_TCP: { 1666 /* access doff as u8 to avoid unaligned access */ 1667 const u8 *doff; 1668 u8 _doff; 1669 1670 doff = __skb_header_pointer(skb, poff + 12, sizeof(_doff), 1671 data, hlen, &_doff); 1672 if (!doff) 1673 return poff; 1674 1675 poff += max_t(u32, sizeof(struct tcphdr), (*doff & 0xF0) >> 2); 1676 break; 1677 } 1678 case IPPROTO_UDP: 1679 case IPPROTO_UDPLITE: 1680 poff += sizeof(struct udphdr); 1681 break; 1682 /* For the rest, we do not really care about header 1683 * extensions at this point for now. 1684 */ 1685 case IPPROTO_ICMP: 1686 poff += sizeof(struct icmphdr); 1687 break; 1688 case IPPROTO_ICMPV6: 1689 poff += sizeof(struct icmp6hdr); 1690 break; 1691 case IPPROTO_IGMP: 1692 poff += sizeof(struct igmphdr); 1693 break; 1694 case IPPROTO_DCCP: 1695 poff += sizeof(struct dccp_hdr); 1696 break; 1697 case IPPROTO_SCTP: 1698 poff += sizeof(struct sctphdr); 1699 break; 1700 } 1701 1702 return poff; 1703 } 1704 1705 /** 1706 * skb_get_poff - get the offset to the payload 1707 * @skb: sk_buff to get the payload offset from 1708 * 1709 * The function will get the offset to the payload as far as it could 1710 * be dissected. The main user is currently BPF, so that we can dynamically 1711 * truncate packets without needing to push actual payload to the user 1712 * space and can analyze headers only, instead. 1713 */ 1714 u32 skb_get_poff(const struct sk_buff *skb) 1715 { 1716 struct flow_keys_basic keys; 1717 1718 if (!skb_flow_dissect_flow_keys_basic(NULL, skb, &keys, 1719 NULL, 0, 0, 0, 0)) 1720 return 0; 1721 1722 return __skb_get_poff(skb, skb->data, &keys, skb_headlen(skb)); 1723 } 1724 1725 __u32 __get_hash_from_flowi6(const struct flowi6 *fl6, struct flow_keys *keys) 1726 { 1727 memset(keys, 0, sizeof(*keys)); 1728 1729 memcpy(&keys->addrs.v6addrs.src, &fl6->saddr, 1730 sizeof(keys->addrs.v6addrs.src)); 1731 memcpy(&keys->addrs.v6addrs.dst, &fl6->daddr, 1732 sizeof(keys->addrs.v6addrs.dst)); 1733 keys->control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS; 1734 keys->ports.src = fl6->fl6_sport; 1735 keys->ports.dst = fl6->fl6_dport; 1736 keys->keyid.keyid = fl6->fl6_gre_key; 1737 keys->tags.flow_label = (__force u32)flowi6_get_flowlabel(fl6); 1738 keys->basic.ip_proto = fl6->flowi6_proto; 1739 1740 return flow_hash_from_keys(keys); 1741 } 1742 EXPORT_SYMBOL(__get_hash_from_flowi6); 1743 1744 static const struct flow_dissector_key flow_keys_dissector_keys[] = { 1745 { 1746 .key_id = FLOW_DISSECTOR_KEY_CONTROL, 1747 .offset = offsetof(struct flow_keys, control), 1748 }, 1749 { 1750 .key_id = FLOW_DISSECTOR_KEY_BASIC, 1751 .offset = offsetof(struct flow_keys, basic), 1752 }, 1753 { 1754 .key_id = FLOW_DISSECTOR_KEY_IPV4_ADDRS, 1755 .offset = offsetof(struct flow_keys, addrs.v4addrs), 1756 }, 1757 { 1758 .key_id = FLOW_DISSECTOR_KEY_IPV6_ADDRS, 1759 .offset = offsetof(struct flow_keys, addrs.v6addrs), 1760 }, 1761 { 1762 .key_id = FLOW_DISSECTOR_KEY_TIPC, 1763 .offset = offsetof(struct flow_keys, addrs.tipckey), 1764 }, 1765 { 1766 .key_id = FLOW_DISSECTOR_KEY_PORTS, 1767 .offset = offsetof(struct flow_keys, ports), 1768 }, 1769 { 1770 .key_id = FLOW_DISSECTOR_KEY_VLAN, 1771 .offset = offsetof(struct flow_keys, vlan), 1772 }, 1773 { 1774 .key_id = FLOW_DISSECTOR_KEY_FLOW_LABEL, 1775 .offset = offsetof(struct flow_keys, tags), 1776 }, 1777 { 1778 .key_id = FLOW_DISSECTOR_KEY_GRE_KEYID, 1779 .offset = offsetof(struct flow_keys, keyid), 1780 }, 1781 }; 1782 1783 static const struct flow_dissector_key flow_keys_dissector_symmetric_keys[] = { 1784 { 1785 .key_id = FLOW_DISSECTOR_KEY_CONTROL, 1786 .offset = offsetof(struct flow_keys, control), 1787 }, 1788 { 1789 .key_id = FLOW_DISSECTOR_KEY_BASIC, 1790 .offset = offsetof(struct flow_keys, basic), 1791 }, 1792 { 1793 .key_id = FLOW_DISSECTOR_KEY_IPV4_ADDRS, 1794 .offset = offsetof(struct flow_keys, addrs.v4addrs), 1795 }, 1796 { 1797 .key_id = FLOW_DISSECTOR_KEY_IPV6_ADDRS, 1798 .offset = offsetof(struct flow_keys, addrs.v6addrs), 1799 }, 1800 { 1801 .key_id = FLOW_DISSECTOR_KEY_PORTS, 1802 .offset = offsetof(struct flow_keys, ports), 1803 }, 1804 }; 1805 1806 static const struct flow_dissector_key flow_keys_basic_dissector_keys[] = { 1807 { 1808 .key_id = FLOW_DISSECTOR_KEY_CONTROL, 1809 .offset = offsetof(struct flow_keys, control), 1810 }, 1811 { 1812 .key_id = FLOW_DISSECTOR_KEY_BASIC, 1813 .offset = offsetof(struct flow_keys, basic), 1814 }, 1815 }; 1816 1817 struct flow_dissector flow_keys_dissector __read_mostly; 1818 EXPORT_SYMBOL(flow_keys_dissector); 1819 1820 struct flow_dissector flow_keys_basic_dissector __read_mostly; 1821 EXPORT_SYMBOL(flow_keys_basic_dissector); 1822 1823 static int __init init_default_flow_dissectors(void) 1824 { 1825 skb_flow_dissector_init(&flow_keys_dissector, 1826 flow_keys_dissector_keys, 1827 ARRAY_SIZE(flow_keys_dissector_keys)); 1828 skb_flow_dissector_init(&flow_keys_dissector_symmetric, 1829 flow_keys_dissector_symmetric_keys, 1830 ARRAY_SIZE(flow_keys_dissector_symmetric_keys)); 1831 skb_flow_dissector_init(&flow_keys_basic_dissector, 1832 flow_keys_basic_dissector_keys, 1833 ARRAY_SIZE(flow_keys_basic_dissector_keys)); 1834 return 0; 1835 } 1836 1837 core_initcall(init_default_flow_dissectors); 1838