1 // SPDX-License-Identifier: GPL-2.0-only 2 #include <linux/kernel.h> 3 #include <linux/skbuff.h> 4 #include <linux/export.h> 5 #include <linux/ip.h> 6 #include <linux/ipv6.h> 7 #include <linux/if_vlan.h> 8 #include <net/dsa.h> 9 #include <net/dst_metadata.h> 10 #include <net/ip.h> 11 #include <net/ipv6.h> 12 #include <net/gre.h> 13 #include <net/pptp.h> 14 #include <net/tipc.h> 15 #include <linux/igmp.h> 16 #include <linux/icmp.h> 17 #include <linux/sctp.h> 18 #include <linux/dccp.h> 19 #include <linux/if_tunnel.h> 20 #include <linux/if_pppox.h> 21 #include <linux/ppp_defs.h> 22 #include <linux/stddef.h> 23 #include <linux/if_ether.h> 24 #include <linux/mpls.h> 25 #include <linux/tcp.h> 26 #include <net/flow_dissector.h> 27 #include <scsi/fc/fc_fcoe.h> 28 #include <uapi/linux/batadv_packet.h> 29 #include <linux/bpf.h> 30 #if IS_ENABLED(CONFIG_NF_CONNTRACK) 31 #include <net/netfilter/nf_conntrack_core.h> 32 #include <net/netfilter/nf_conntrack_labels.h> 33 #endif 34 #include <linux/bpf-netns.h> 35 36 static void dissector_set_key(struct flow_dissector *flow_dissector, 37 enum flow_dissector_key_id key_id) 38 { 39 flow_dissector->used_keys |= (1 << key_id); 40 } 41 42 void skb_flow_dissector_init(struct flow_dissector *flow_dissector, 43 const struct flow_dissector_key *key, 44 unsigned int key_count) 45 { 46 unsigned int i; 47 48 memset(flow_dissector, 0, sizeof(*flow_dissector)); 49 50 for (i = 0; i < key_count; i++, key++) { 51 /* User should make sure that every key target offset is withing 52 * boundaries of unsigned short. 53 */ 54 BUG_ON(key->offset > USHRT_MAX); 55 BUG_ON(dissector_uses_key(flow_dissector, 56 key->key_id)); 57 58 dissector_set_key(flow_dissector, key->key_id); 59 flow_dissector->offset[key->key_id] = key->offset; 60 } 61 62 /* Ensure that the dissector always includes control and basic key. 63 * That way we are able to avoid handling lack of these in fast path. 64 */ 65 BUG_ON(!dissector_uses_key(flow_dissector, 66 FLOW_DISSECTOR_KEY_CONTROL)); 67 BUG_ON(!dissector_uses_key(flow_dissector, 68 FLOW_DISSECTOR_KEY_BASIC)); 69 } 70 EXPORT_SYMBOL(skb_flow_dissector_init); 71 72 #ifdef CONFIG_BPF_SYSCALL 73 int flow_dissector_bpf_prog_attach(struct net *net, struct bpf_prog *prog) 74 { 75 enum netns_bpf_attach_type type = NETNS_BPF_FLOW_DISSECTOR; 76 struct bpf_prog *attached; 77 78 if (net == &init_net) { 79 /* BPF flow dissector in the root namespace overrides 80 * any per-net-namespace one. When attaching to root, 81 * make sure we don't have any BPF program attached 82 * to the non-root namespaces. 83 */ 84 struct net *ns; 85 86 for_each_net(ns) { 87 if (ns == &init_net) 88 continue; 89 if (rcu_access_pointer(ns->bpf.progs[type])) 90 return -EEXIST; 91 } 92 } else { 93 /* Make sure root flow dissector is not attached 94 * when attaching to the non-root namespace. 95 */ 96 if (rcu_access_pointer(init_net.bpf.progs[type])) 97 return -EEXIST; 98 } 99 100 attached = rcu_dereference_protected(net->bpf.progs[type], 101 lockdep_is_held(&netns_bpf_mutex)); 102 if (attached == prog) 103 /* The same program cannot be attached twice */ 104 return -EINVAL; 105 106 rcu_assign_pointer(net->bpf.progs[type], prog); 107 if (attached) 108 bpf_prog_put(attached); 109 return 0; 110 } 111 #endif /* CONFIG_BPF_SYSCALL */ 112 113 /** 114 * __skb_flow_get_ports - extract the upper layer ports and return them 115 * @skb: sk_buff to extract the ports from 116 * @thoff: transport header offset 117 * @ip_proto: protocol for which to get port offset 118 * @data: raw buffer pointer to the packet, if NULL use skb->data 119 * @hlen: packet header length, if @data is NULL use skb_headlen(skb) 120 * 121 * The function will try to retrieve the ports at offset thoff + poff where poff 122 * is the protocol port offset returned from proto_ports_offset 123 */ 124 __be32 __skb_flow_get_ports(const struct sk_buff *skb, int thoff, u8 ip_proto, 125 void *data, int hlen) 126 { 127 int poff = proto_ports_offset(ip_proto); 128 129 if (!data) { 130 data = skb->data; 131 hlen = skb_headlen(skb); 132 } 133 134 if (poff >= 0) { 135 __be32 *ports, _ports; 136 137 ports = __skb_header_pointer(skb, thoff + poff, 138 sizeof(_ports), data, hlen, &_ports); 139 if (ports) 140 return *ports; 141 } 142 143 return 0; 144 } 145 EXPORT_SYMBOL(__skb_flow_get_ports); 146 147 static bool icmp_has_id(u8 type) 148 { 149 switch (type) { 150 case ICMP_ECHO: 151 case ICMP_ECHOREPLY: 152 case ICMP_TIMESTAMP: 153 case ICMP_TIMESTAMPREPLY: 154 case ICMPV6_ECHO_REQUEST: 155 case ICMPV6_ECHO_REPLY: 156 return true; 157 } 158 159 return false; 160 } 161 162 /** 163 * skb_flow_get_icmp_tci - extract ICMP(6) Type, Code and Identifier fields 164 * @skb: sk_buff to extract from 165 * @key_icmp: struct flow_dissector_key_icmp to fill 166 * @data: raw buffer pointer to the packet 167 * @thoff: offset to extract at 168 * @hlen: packet header length 169 */ 170 void skb_flow_get_icmp_tci(const struct sk_buff *skb, 171 struct flow_dissector_key_icmp *key_icmp, 172 void *data, int thoff, int hlen) 173 { 174 struct icmphdr *ih, _ih; 175 176 ih = __skb_header_pointer(skb, thoff, sizeof(_ih), data, hlen, &_ih); 177 if (!ih) 178 return; 179 180 key_icmp->type = ih->type; 181 key_icmp->code = ih->code; 182 183 /* As we use 0 to signal that the Id field is not present, 184 * avoid confusion with packets without such field 185 */ 186 if (icmp_has_id(ih->type)) 187 key_icmp->id = ih->un.echo.id ? : 1; 188 else 189 key_icmp->id = 0; 190 } 191 EXPORT_SYMBOL(skb_flow_get_icmp_tci); 192 193 /* If FLOW_DISSECTOR_KEY_ICMP is set, dissect an ICMP packet 194 * using skb_flow_get_icmp_tci(). 195 */ 196 static void __skb_flow_dissect_icmp(const struct sk_buff *skb, 197 struct flow_dissector *flow_dissector, 198 void *target_container, 199 void *data, int thoff, int hlen) 200 { 201 struct flow_dissector_key_icmp *key_icmp; 202 203 if (!dissector_uses_key(flow_dissector, FLOW_DISSECTOR_KEY_ICMP)) 204 return; 205 206 key_icmp = skb_flow_dissector_target(flow_dissector, 207 FLOW_DISSECTOR_KEY_ICMP, 208 target_container); 209 210 skb_flow_get_icmp_tci(skb, key_icmp, data, thoff, hlen); 211 } 212 213 void skb_flow_dissect_meta(const struct sk_buff *skb, 214 struct flow_dissector *flow_dissector, 215 void *target_container) 216 { 217 struct flow_dissector_key_meta *meta; 218 219 if (!dissector_uses_key(flow_dissector, FLOW_DISSECTOR_KEY_META)) 220 return; 221 222 meta = skb_flow_dissector_target(flow_dissector, 223 FLOW_DISSECTOR_KEY_META, 224 target_container); 225 meta->ingress_ifindex = skb->skb_iif; 226 } 227 EXPORT_SYMBOL(skb_flow_dissect_meta); 228 229 static void 230 skb_flow_dissect_set_enc_addr_type(enum flow_dissector_key_id type, 231 struct flow_dissector *flow_dissector, 232 void *target_container) 233 { 234 struct flow_dissector_key_control *ctrl; 235 236 if (!dissector_uses_key(flow_dissector, FLOW_DISSECTOR_KEY_ENC_CONTROL)) 237 return; 238 239 ctrl = skb_flow_dissector_target(flow_dissector, 240 FLOW_DISSECTOR_KEY_ENC_CONTROL, 241 target_container); 242 ctrl->addr_type = type; 243 } 244 245 void 246 skb_flow_dissect_ct(const struct sk_buff *skb, 247 struct flow_dissector *flow_dissector, 248 void *target_container, 249 u16 *ctinfo_map, 250 size_t mapsize) 251 { 252 #if IS_ENABLED(CONFIG_NF_CONNTRACK) 253 struct flow_dissector_key_ct *key; 254 enum ip_conntrack_info ctinfo; 255 struct nf_conn_labels *cl; 256 struct nf_conn *ct; 257 258 if (!dissector_uses_key(flow_dissector, FLOW_DISSECTOR_KEY_CT)) 259 return; 260 261 ct = nf_ct_get(skb, &ctinfo); 262 if (!ct) 263 return; 264 265 key = skb_flow_dissector_target(flow_dissector, 266 FLOW_DISSECTOR_KEY_CT, 267 target_container); 268 269 if (ctinfo < mapsize) 270 key->ct_state = ctinfo_map[ctinfo]; 271 #if IS_ENABLED(CONFIG_NF_CONNTRACK_ZONES) 272 key->ct_zone = ct->zone.id; 273 #endif 274 #if IS_ENABLED(CONFIG_NF_CONNTRACK_MARK) 275 key->ct_mark = ct->mark; 276 #endif 277 278 cl = nf_ct_labels_find(ct); 279 if (cl) 280 memcpy(key->ct_labels, cl->bits, sizeof(key->ct_labels)); 281 #endif /* CONFIG_NF_CONNTRACK */ 282 } 283 EXPORT_SYMBOL(skb_flow_dissect_ct); 284 285 void 286 skb_flow_dissect_tunnel_info(const struct sk_buff *skb, 287 struct flow_dissector *flow_dissector, 288 void *target_container) 289 { 290 struct ip_tunnel_info *info; 291 struct ip_tunnel_key *key; 292 293 /* A quick check to see if there might be something to do. */ 294 if (!dissector_uses_key(flow_dissector, 295 FLOW_DISSECTOR_KEY_ENC_KEYID) && 296 !dissector_uses_key(flow_dissector, 297 FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS) && 298 !dissector_uses_key(flow_dissector, 299 FLOW_DISSECTOR_KEY_ENC_IPV6_ADDRS) && 300 !dissector_uses_key(flow_dissector, 301 FLOW_DISSECTOR_KEY_ENC_CONTROL) && 302 !dissector_uses_key(flow_dissector, 303 FLOW_DISSECTOR_KEY_ENC_PORTS) && 304 !dissector_uses_key(flow_dissector, 305 FLOW_DISSECTOR_KEY_ENC_IP) && 306 !dissector_uses_key(flow_dissector, 307 FLOW_DISSECTOR_KEY_ENC_OPTS)) 308 return; 309 310 info = skb_tunnel_info(skb); 311 if (!info) 312 return; 313 314 key = &info->key; 315 316 switch (ip_tunnel_info_af(info)) { 317 case AF_INET: 318 skb_flow_dissect_set_enc_addr_type(FLOW_DISSECTOR_KEY_IPV4_ADDRS, 319 flow_dissector, 320 target_container); 321 if (dissector_uses_key(flow_dissector, 322 FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS)) { 323 struct flow_dissector_key_ipv4_addrs *ipv4; 324 325 ipv4 = skb_flow_dissector_target(flow_dissector, 326 FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS, 327 target_container); 328 ipv4->src = key->u.ipv4.src; 329 ipv4->dst = key->u.ipv4.dst; 330 } 331 break; 332 case AF_INET6: 333 skb_flow_dissect_set_enc_addr_type(FLOW_DISSECTOR_KEY_IPV6_ADDRS, 334 flow_dissector, 335 target_container); 336 if (dissector_uses_key(flow_dissector, 337 FLOW_DISSECTOR_KEY_ENC_IPV6_ADDRS)) { 338 struct flow_dissector_key_ipv6_addrs *ipv6; 339 340 ipv6 = skb_flow_dissector_target(flow_dissector, 341 FLOW_DISSECTOR_KEY_ENC_IPV6_ADDRS, 342 target_container); 343 ipv6->src = key->u.ipv6.src; 344 ipv6->dst = key->u.ipv6.dst; 345 } 346 break; 347 } 348 349 if (dissector_uses_key(flow_dissector, FLOW_DISSECTOR_KEY_ENC_KEYID)) { 350 struct flow_dissector_key_keyid *keyid; 351 352 keyid = skb_flow_dissector_target(flow_dissector, 353 FLOW_DISSECTOR_KEY_ENC_KEYID, 354 target_container); 355 keyid->keyid = tunnel_id_to_key32(key->tun_id); 356 } 357 358 if (dissector_uses_key(flow_dissector, FLOW_DISSECTOR_KEY_ENC_PORTS)) { 359 struct flow_dissector_key_ports *tp; 360 361 tp = skb_flow_dissector_target(flow_dissector, 362 FLOW_DISSECTOR_KEY_ENC_PORTS, 363 target_container); 364 tp->src = key->tp_src; 365 tp->dst = key->tp_dst; 366 } 367 368 if (dissector_uses_key(flow_dissector, FLOW_DISSECTOR_KEY_ENC_IP)) { 369 struct flow_dissector_key_ip *ip; 370 371 ip = skb_flow_dissector_target(flow_dissector, 372 FLOW_DISSECTOR_KEY_ENC_IP, 373 target_container); 374 ip->tos = key->tos; 375 ip->ttl = key->ttl; 376 } 377 378 if (dissector_uses_key(flow_dissector, FLOW_DISSECTOR_KEY_ENC_OPTS)) { 379 struct flow_dissector_key_enc_opts *enc_opt; 380 381 enc_opt = skb_flow_dissector_target(flow_dissector, 382 FLOW_DISSECTOR_KEY_ENC_OPTS, 383 target_container); 384 385 if (info->options_len) { 386 enc_opt->len = info->options_len; 387 ip_tunnel_info_opts_get(enc_opt->data, info); 388 enc_opt->dst_opt_type = info->key.tun_flags & 389 TUNNEL_OPTIONS_PRESENT; 390 } 391 } 392 } 393 EXPORT_SYMBOL(skb_flow_dissect_tunnel_info); 394 395 static enum flow_dissect_ret 396 __skb_flow_dissect_mpls(const struct sk_buff *skb, 397 struct flow_dissector *flow_dissector, 398 void *target_container, void *data, int nhoff, int hlen, 399 int lse_index, bool *entropy_label) 400 { 401 struct mpls_label *hdr, _hdr; 402 u32 entry, label, bos; 403 404 if (!dissector_uses_key(flow_dissector, 405 FLOW_DISSECTOR_KEY_MPLS_ENTROPY) && 406 !dissector_uses_key(flow_dissector, FLOW_DISSECTOR_KEY_MPLS)) 407 return FLOW_DISSECT_RET_OUT_GOOD; 408 409 if (lse_index >= FLOW_DIS_MPLS_MAX) 410 return FLOW_DISSECT_RET_OUT_GOOD; 411 412 hdr = __skb_header_pointer(skb, nhoff, sizeof(_hdr), data, 413 hlen, &_hdr); 414 if (!hdr) 415 return FLOW_DISSECT_RET_OUT_BAD; 416 417 entry = ntohl(hdr->entry); 418 label = (entry & MPLS_LS_LABEL_MASK) >> MPLS_LS_LABEL_SHIFT; 419 bos = (entry & MPLS_LS_S_MASK) >> MPLS_LS_S_SHIFT; 420 421 if (dissector_uses_key(flow_dissector, FLOW_DISSECTOR_KEY_MPLS)) { 422 struct flow_dissector_key_mpls *key_mpls; 423 struct flow_dissector_mpls_lse *lse; 424 425 key_mpls = skb_flow_dissector_target(flow_dissector, 426 FLOW_DISSECTOR_KEY_MPLS, 427 target_container); 428 lse = &key_mpls->ls[lse_index]; 429 430 lse->mpls_ttl = (entry & MPLS_LS_TTL_MASK) >> MPLS_LS_TTL_SHIFT; 431 lse->mpls_bos = bos; 432 lse->mpls_tc = (entry & MPLS_LS_TC_MASK) >> MPLS_LS_TC_SHIFT; 433 lse->mpls_label = label; 434 dissector_set_mpls_lse(key_mpls, lse_index); 435 } 436 437 if (*entropy_label && 438 dissector_uses_key(flow_dissector, 439 FLOW_DISSECTOR_KEY_MPLS_ENTROPY)) { 440 struct flow_dissector_key_keyid *key_keyid; 441 442 key_keyid = skb_flow_dissector_target(flow_dissector, 443 FLOW_DISSECTOR_KEY_MPLS_ENTROPY, 444 target_container); 445 key_keyid->keyid = cpu_to_be32(label); 446 } 447 448 *entropy_label = label == MPLS_LABEL_ENTROPY; 449 450 return bos ? FLOW_DISSECT_RET_OUT_GOOD : FLOW_DISSECT_RET_PROTO_AGAIN; 451 } 452 453 static enum flow_dissect_ret 454 __skb_flow_dissect_arp(const struct sk_buff *skb, 455 struct flow_dissector *flow_dissector, 456 void *target_container, void *data, int nhoff, int hlen) 457 { 458 struct flow_dissector_key_arp *key_arp; 459 struct { 460 unsigned char ar_sha[ETH_ALEN]; 461 unsigned char ar_sip[4]; 462 unsigned char ar_tha[ETH_ALEN]; 463 unsigned char ar_tip[4]; 464 } *arp_eth, _arp_eth; 465 const struct arphdr *arp; 466 struct arphdr _arp; 467 468 if (!dissector_uses_key(flow_dissector, FLOW_DISSECTOR_KEY_ARP)) 469 return FLOW_DISSECT_RET_OUT_GOOD; 470 471 arp = __skb_header_pointer(skb, nhoff, sizeof(_arp), data, 472 hlen, &_arp); 473 if (!arp) 474 return FLOW_DISSECT_RET_OUT_BAD; 475 476 if (arp->ar_hrd != htons(ARPHRD_ETHER) || 477 arp->ar_pro != htons(ETH_P_IP) || 478 arp->ar_hln != ETH_ALEN || 479 arp->ar_pln != 4 || 480 (arp->ar_op != htons(ARPOP_REPLY) && 481 arp->ar_op != htons(ARPOP_REQUEST))) 482 return FLOW_DISSECT_RET_OUT_BAD; 483 484 arp_eth = __skb_header_pointer(skb, nhoff + sizeof(_arp), 485 sizeof(_arp_eth), data, 486 hlen, &_arp_eth); 487 if (!arp_eth) 488 return FLOW_DISSECT_RET_OUT_BAD; 489 490 key_arp = skb_flow_dissector_target(flow_dissector, 491 FLOW_DISSECTOR_KEY_ARP, 492 target_container); 493 494 memcpy(&key_arp->sip, arp_eth->ar_sip, sizeof(key_arp->sip)); 495 memcpy(&key_arp->tip, arp_eth->ar_tip, sizeof(key_arp->tip)); 496 497 /* Only store the lower byte of the opcode; 498 * this covers ARPOP_REPLY and ARPOP_REQUEST. 499 */ 500 key_arp->op = ntohs(arp->ar_op) & 0xff; 501 502 ether_addr_copy(key_arp->sha, arp_eth->ar_sha); 503 ether_addr_copy(key_arp->tha, arp_eth->ar_tha); 504 505 return FLOW_DISSECT_RET_OUT_GOOD; 506 } 507 508 static enum flow_dissect_ret 509 __skb_flow_dissect_gre(const struct sk_buff *skb, 510 struct flow_dissector_key_control *key_control, 511 struct flow_dissector *flow_dissector, 512 void *target_container, void *data, 513 __be16 *p_proto, int *p_nhoff, int *p_hlen, 514 unsigned int flags) 515 { 516 struct flow_dissector_key_keyid *key_keyid; 517 struct gre_base_hdr *hdr, _hdr; 518 int offset = 0; 519 u16 gre_ver; 520 521 hdr = __skb_header_pointer(skb, *p_nhoff, sizeof(_hdr), 522 data, *p_hlen, &_hdr); 523 if (!hdr) 524 return FLOW_DISSECT_RET_OUT_BAD; 525 526 /* Only look inside GRE without routing */ 527 if (hdr->flags & GRE_ROUTING) 528 return FLOW_DISSECT_RET_OUT_GOOD; 529 530 /* Only look inside GRE for version 0 and 1 */ 531 gre_ver = ntohs(hdr->flags & GRE_VERSION); 532 if (gre_ver > 1) 533 return FLOW_DISSECT_RET_OUT_GOOD; 534 535 *p_proto = hdr->protocol; 536 if (gre_ver) { 537 /* Version1 must be PPTP, and check the flags */ 538 if (!(*p_proto == GRE_PROTO_PPP && (hdr->flags & GRE_KEY))) 539 return FLOW_DISSECT_RET_OUT_GOOD; 540 } 541 542 offset += sizeof(struct gre_base_hdr); 543 544 if (hdr->flags & GRE_CSUM) 545 offset += sizeof_field(struct gre_full_hdr, csum) + 546 sizeof_field(struct gre_full_hdr, reserved1); 547 548 if (hdr->flags & GRE_KEY) { 549 const __be32 *keyid; 550 __be32 _keyid; 551 552 keyid = __skb_header_pointer(skb, *p_nhoff + offset, 553 sizeof(_keyid), 554 data, *p_hlen, &_keyid); 555 if (!keyid) 556 return FLOW_DISSECT_RET_OUT_BAD; 557 558 if (dissector_uses_key(flow_dissector, 559 FLOW_DISSECTOR_KEY_GRE_KEYID)) { 560 key_keyid = skb_flow_dissector_target(flow_dissector, 561 FLOW_DISSECTOR_KEY_GRE_KEYID, 562 target_container); 563 if (gre_ver == 0) 564 key_keyid->keyid = *keyid; 565 else 566 key_keyid->keyid = *keyid & GRE_PPTP_KEY_MASK; 567 } 568 offset += sizeof_field(struct gre_full_hdr, key); 569 } 570 571 if (hdr->flags & GRE_SEQ) 572 offset += sizeof_field(struct pptp_gre_header, seq); 573 574 if (gre_ver == 0) { 575 if (*p_proto == htons(ETH_P_TEB)) { 576 const struct ethhdr *eth; 577 struct ethhdr _eth; 578 579 eth = __skb_header_pointer(skb, *p_nhoff + offset, 580 sizeof(_eth), 581 data, *p_hlen, &_eth); 582 if (!eth) 583 return FLOW_DISSECT_RET_OUT_BAD; 584 *p_proto = eth->h_proto; 585 offset += sizeof(*eth); 586 587 /* Cap headers that we access via pointers at the 588 * end of the Ethernet header as our maximum alignment 589 * at that point is only 2 bytes. 590 */ 591 if (NET_IP_ALIGN) 592 *p_hlen = *p_nhoff + offset; 593 } 594 } else { /* version 1, must be PPTP */ 595 u8 _ppp_hdr[PPP_HDRLEN]; 596 u8 *ppp_hdr; 597 598 if (hdr->flags & GRE_ACK) 599 offset += sizeof_field(struct pptp_gre_header, ack); 600 601 ppp_hdr = __skb_header_pointer(skb, *p_nhoff + offset, 602 sizeof(_ppp_hdr), 603 data, *p_hlen, _ppp_hdr); 604 if (!ppp_hdr) 605 return FLOW_DISSECT_RET_OUT_BAD; 606 607 switch (PPP_PROTOCOL(ppp_hdr)) { 608 case PPP_IP: 609 *p_proto = htons(ETH_P_IP); 610 break; 611 case PPP_IPV6: 612 *p_proto = htons(ETH_P_IPV6); 613 break; 614 default: 615 /* Could probably catch some more like MPLS */ 616 break; 617 } 618 619 offset += PPP_HDRLEN; 620 } 621 622 *p_nhoff += offset; 623 key_control->flags |= FLOW_DIS_ENCAPSULATION; 624 if (flags & FLOW_DISSECTOR_F_STOP_AT_ENCAP) 625 return FLOW_DISSECT_RET_OUT_GOOD; 626 627 return FLOW_DISSECT_RET_PROTO_AGAIN; 628 } 629 630 /** 631 * __skb_flow_dissect_batadv() - dissect batman-adv header 632 * @skb: sk_buff to with the batman-adv header 633 * @key_control: flow dissectors control key 634 * @data: raw buffer pointer to the packet, if NULL use skb->data 635 * @p_proto: pointer used to update the protocol to process next 636 * @p_nhoff: pointer used to update inner network header offset 637 * @hlen: packet header length 638 * @flags: any combination of FLOW_DISSECTOR_F_* 639 * 640 * ETH_P_BATMAN packets are tried to be dissected. Only 641 * &struct batadv_unicast packets are actually processed because they contain an 642 * inner ethernet header and are usually followed by actual network header. This 643 * allows the flow dissector to continue processing the packet. 644 * 645 * Return: FLOW_DISSECT_RET_PROTO_AGAIN when &struct batadv_unicast was found, 646 * FLOW_DISSECT_RET_OUT_GOOD when dissector should stop after encapsulation, 647 * otherwise FLOW_DISSECT_RET_OUT_BAD 648 */ 649 static enum flow_dissect_ret 650 __skb_flow_dissect_batadv(const struct sk_buff *skb, 651 struct flow_dissector_key_control *key_control, 652 void *data, __be16 *p_proto, int *p_nhoff, int hlen, 653 unsigned int flags) 654 { 655 struct { 656 struct batadv_unicast_packet batadv_unicast; 657 struct ethhdr eth; 658 } *hdr, _hdr; 659 660 hdr = __skb_header_pointer(skb, *p_nhoff, sizeof(_hdr), data, hlen, 661 &_hdr); 662 if (!hdr) 663 return FLOW_DISSECT_RET_OUT_BAD; 664 665 if (hdr->batadv_unicast.version != BATADV_COMPAT_VERSION) 666 return FLOW_DISSECT_RET_OUT_BAD; 667 668 if (hdr->batadv_unicast.packet_type != BATADV_UNICAST) 669 return FLOW_DISSECT_RET_OUT_BAD; 670 671 *p_proto = hdr->eth.h_proto; 672 *p_nhoff += sizeof(*hdr); 673 674 key_control->flags |= FLOW_DIS_ENCAPSULATION; 675 if (flags & FLOW_DISSECTOR_F_STOP_AT_ENCAP) 676 return FLOW_DISSECT_RET_OUT_GOOD; 677 678 return FLOW_DISSECT_RET_PROTO_AGAIN; 679 } 680 681 static void 682 __skb_flow_dissect_tcp(const struct sk_buff *skb, 683 struct flow_dissector *flow_dissector, 684 void *target_container, void *data, int thoff, int hlen) 685 { 686 struct flow_dissector_key_tcp *key_tcp; 687 struct tcphdr *th, _th; 688 689 if (!dissector_uses_key(flow_dissector, FLOW_DISSECTOR_KEY_TCP)) 690 return; 691 692 th = __skb_header_pointer(skb, thoff, sizeof(_th), data, hlen, &_th); 693 if (!th) 694 return; 695 696 if (unlikely(__tcp_hdrlen(th) < sizeof(_th))) 697 return; 698 699 key_tcp = skb_flow_dissector_target(flow_dissector, 700 FLOW_DISSECTOR_KEY_TCP, 701 target_container); 702 key_tcp->flags = (*(__be16 *) &tcp_flag_word(th) & htons(0x0FFF)); 703 } 704 705 static void 706 __skb_flow_dissect_ports(const struct sk_buff *skb, 707 struct flow_dissector *flow_dissector, 708 void *target_container, void *data, int nhoff, 709 u8 ip_proto, int hlen) 710 { 711 enum flow_dissector_key_id dissector_ports = FLOW_DISSECTOR_KEY_MAX; 712 struct flow_dissector_key_ports *key_ports; 713 714 if (dissector_uses_key(flow_dissector, FLOW_DISSECTOR_KEY_PORTS)) 715 dissector_ports = FLOW_DISSECTOR_KEY_PORTS; 716 else if (dissector_uses_key(flow_dissector, 717 FLOW_DISSECTOR_KEY_PORTS_RANGE)) 718 dissector_ports = FLOW_DISSECTOR_KEY_PORTS_RANGE; 719 720 if (dissector_ports == FLOW_DISSECTOR_KEY_MAX) 721 return; 722 723 key_ports = skb_flow_dissector_target(flow_dissector, 724 dissector_ports, 725 target_container); 726 key_ports->ports = __skb_flow_get_ports(skb, nhoff, ip_proto, 727 data, hlen); 728 } 729 730 static void 731 __skb_flow_dissect_ipv4(const struct sk_buff *skb, 732 struct flow_dissector *flow_dissector, 733 void *target_container, void *data, const struct iphdr *iph) 734 { 735 struct flow_dissector_key_ip *key_ip; 736 737 if (!dissector_uses_key(flow_dissector, FLOW_DISSECTOR_KEY_IP)) 738 return; 739 740 key_ip = skb_flow_dissector_target(flow_dissector, 741 FLOW_DISSECTOR_KEY_IP, 742 target_container); 743 key_ip->tos = iph->tos; 744 key_ip->ttl = iph->ttl; 745 } 746 747 static void 748 __skb_flow_dissect_ipv6(const struct sk_buff *skb, 749 struct flow_dissector *flow_dissector, 750 void *target_container, void *data, const struct ipv6hdr *iph) 751 { 752 struct flow_dissector_key_ip *key_ip; 753 754 if (!dissector_uses_key(flow_dissector, FLOW_DISSECTOR_KEY_IP)) 755 return; 756 757 key_ip = skb_flow_dissector_target(flow_dissector, 758 FLOW_DISSECTOR_KEY_IP, 759 target_container); 760 key_ip->tos = ipv6_get_dsfield(iph); 761 key_ip->ttl = iph->hop_limit; 762 } 763 764 /* Maximum number of protocol headers that can be parsed in 765 * __skb_flow_dissect 766 */ 767 #define MAX_FLOW_DISSECT_HDRS 15 768 769 static bool skb_flow_dissect_allowed(int *num_hdrs) 770 { 771 ++*num_hdrs; 772 773 return (*num_hdrs <= MAX_FLOW_DISSECT_HDRS); 774 } 775 776 static void __skb_flow_bpf_to_target(const struct bpf_flow_keys *flow_keys, 777 struct flow_dissector *flow_dissector, 778 void *target_container) 779 { 780 struct flow_dissector_key_ports *key_ports = NULL; 781 struct flow_dissector_key_control *key_control; 782 struct flow_dissector_key_basic *key_basic; 783 struct flow_dissector_key_addrs *key_addrs; 784 struct flow_dissector_key_tags *key_tags; 785 786 key_control = skb_flow_dissector_target(flow_dissector, 787 FLOW_DISSECTOR_KEY_CONTROL, 788 target_container); 789 key_control->thoff = flow_keys->thoff; 790 if (flow_keys->is_frag) 791 key_control->flags |= FLOW_DIS_IS_FRAGMENT; 792 if (flow_keys->is_first_frag) 793 key_control->flags |= FLOW_DIS_FIRST_FRAG; 794 if (flow_keys->is_encap) 795 key_control->flags |= FLOW_DIS_ENCAPSULATION; 796 797 key_basic = skb_flow_dissector_target(flow_dissector, 798 FLOW_DISSECTOR_KEY_BASIC, 799 target_container); 800 key_basic->n_proto = flow_keys->n_proto; 801 key_basic->ip_proto = flow_keys->ip_proto; 802 803 if (flow_keys->addr_proto == ETH_P_IP && 804 dissector_uses_key(flow_dissector, FLOW_DISSECTOR_KEY_IPV4_ADDRS)) { 805 key_addrs = skb_flow_dissector_target(flow_dissector, 806 FLOW_DISSECTOR_KEY_IPV4_ADDRS, 807 target_container); 808 key_addrs->v4addrs.src = flow_keys->ipv4_src; 809 key_addrs->v4addrs.dst = flow_keys->ipv4_dst; 810 key_control->addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS; 811 } else if (flow_keys->addr_proto == ETH_P_IPV6 && 812 dissector_uses_key(flow_dissector, 813 FLOW_DISSECTOR_KEY_IPV6_ADDRS)) { 814 key_addrs = skb_flow_dissector_target(flow_dissector, 815 FLOW_DISSECTOR_KEY_IPV6_ADDRS, 816 target_container); 817 memcpy(&key_addrs->v6addrs, &flow_keys->ipv6_src, 818 sizeof(key_addrs->v6addrs)); 819 key_control->addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS; 820 } 821 822 if (dissector_uses_key(flow_dissector, FLOW_DISSECTOR_KEY_PORTS)) 823 key_ports = skb_flow_dissector_target(flow_dissector, 824 FLOW_DISSECTOR_KEY_PORTS, 825 target_container); 826 else if (dissector_uses_key(flow_dissector, 827 FLOW_DISSECTOR_KEY_PORTS_RANGE)) 828 key_ports = skb_flow_dissector_target(flow_dissector, 829 FLOW_DISSECTOR_KEY_PORTS_RANGE, 830 target_container); 831 832 if (key_ports) { 833 key_ports->src = flow_keys->sport; 834 key_ports->dst = flow_keys->dport; 835 } 836 837 if (dissector_uses_key(flow_dissector, 838 FLOW_DISSECTOR_KEY_FLOW_LABEL)) { 839 key_tags = skb_flow_dissector_target(flow_dissector, 840 FLOW_DISSECTOR_KEY_FLOW_LABEL, 841 target_container); 842 key_tags->flow_label = ntohl(flow_keys->flow_label); 843 } 844 } 845 846 bool bpf_flow_dissect(struct bpf_prog *prog, struct bpf_flow_dissector *ctx, 847 __be16 proto, int nhoff, int hlen, unsigned int flags) 848 { 849 struct bpf_flow_keys *flow_keys = ctx->flow_keys; 850 u32 result; 851 852 /* Pass parameters to the BPF program */ 853 memset(flow_keys, 0, sizeof(*flow_keys)); 854 flow_keys->n_proto = proto; 855 flow_keys->nhoff = nhoff; 856 flow_keys->thoff = flow_keys->nhoff; 857 858 BUILD_BUG_ON((int)BPF_FLOW_DISSECTOR_F_PARSE_1ST_FRAG != 859 (int)FLOW_DISSECTOR_F_PARSE_1ST_FRAG); 860 BUILD_BUG_ON((int)BPF_FLOW_DISSECTOR_F_STOP_AT_FLOW_LABEL != 861 (int)FLOW_DISSECTOR_F_STOP_AT_FLOW_LABEL); 862 BUILD_BUG_ON((int)BPF_FLOW_DISSECTOR_F_STOP_AT_ENCAP != 863 (int)FLOW_DISSECTOR_F_STOP_AT_ENCAP); 864 flow_keys->flags = flags; 865 866 result = bpf_prog_run_pin_on_cpu(prog, ctx); 867 868 flow_keys->nhoff = clamp_t(u16, flow_keys->nhoff, nhoff, hlen); 869 flow_keys->thoff = clamp_t(u16, flow_keys->thoff, 870 flow_keys->nhoff, hlen); 871 872 return result == BPF_OK; 873 } 874 875 /** 876 * __skb_flow_dissect - extract the flow_keys struct and return it 877 * @net: associated network namespace, derived from @skb if NULL 878 * @skb: sk_buff to extract the flow from, can be NULL if the rest are specified 879 * @flow_dissector: list of keys to dissect 880 * @target_container: target structure to put dissected values into 881 * @data: raw buffer pointer to the packet, if NULL use skb->data 882 * @proto: protocol for which to get the flow, if @data is NULL use skb->protocol 883 * @nhoff: network header offset, if @data is NULL use skb_network_offset(skb) 884 * @hlen: packet header length, if @data is NULL use skb_headlen(skb) 885 * @flags: flags that control the dissection process, e.g. 886 * FLOW_DISSECTOR_F_STOP_AT_ENCAP. 887 * 888 * The function will try to retrieve individual keys into target specified 889 * by flow_dissector from either the skbuff or a raw buffer specified by the 890 * rest parameters. 891 * 892 * Caller must take care of zeroing target container memory. 893 */ 894 bool __skb_flow_dissect(const struct net *net, 895 const struct sk_buff *skb, 896 struct flow_dissector *flow_dissector, 897 void *target_container, 898 void *data, __be16 proto, int nhoff, int hlen, 899 unsigned int flags) 900 { 901 struct flow_dissector_key_control *key_control; 902 struct flow_dissector_key_basic *key_basic; 903 struct flow_dissector_key_addrs *key_addrs; 904 struct flow_dissector_key_tags *key_tags; 905 struct flow_dissector_key_vlan *key_vlan; 906 struct bpf_prog *attached = NULL; 907 enum flow_dissect_ret fdret; 908 enum flow_dissector_key_id dissector_vlan = FLOW_DISSECTOR_KEY_MAX; 909 bool mpls_el = false; 910 int mpls_lse = 0; 911 int num_hdrs = 0; 912 u8 ip_proto = 0; 913 bool ret; 914 915 if (!data) { 916 data = skb->data; 917 proto = skb_vlan_tag_present(skb) ? 918 skb->vlan_proto : skb->protocol; 919 nhoff = skb_network_offset(skb); 920 hlen = skb_headlen(skb); 921 #if IS_ENABLED(CONFIG_NET_DSA) 922 if (unlikely(skb->dev && netdev_uses_dsa(skb->dev) && 923 proto == htons(ETH_P_XDSA))) { 924 const struct dsa_device_ops *ops; 925 int offset = 0; 926 927 ops = skb->dev->dsa_ptr->tag_ops; 928 if (ops->flow_dissect && 929 !ops->flow_dissect(skb, &proto, &offset)) { 930 hlen -= offset; 931 nhoff += offset; 932 } 933 } 934 #endif 935 } 936 937 /* It is ensured by skb_flow_dissector_init() that control key will 938 * be always present. 939 */ 940 key_control = skb_flow_dissector_target(flow_dissector, 941 FLOW_DISSECTOR_KEY_CONTROL, 942 target_container); 943 944 /* It is ensured by skb_flow_dissector_init() that basic key will 945 * be always present. 946 */ 947 key_basic = skb_flow_dissector_target(flow_dissector, 948 FLOW_DISSECTOR_KEY_BASIC, 949 target_container); 950 951 if (skb) { 952 if (!net) { 953 if (skb->dev) 954 net = dev_net(skb->dev); 955 else if (skb->sk) 956 net = sock_net(skb->sk); 957 } 958 } 959 960 WARN_ON_ONCE(!net); 961 if (net) { 962 enum netns_bpf_attach_type type = NETNS_BPF_FLOW_DISSECTOR; 963 964 rcu_read_lock(); 965 attached = rcu_dereference(init_net.bpf.progs[type]); 966 967 if (!attached) 968 attached = rcu_dereference(net->bpf.progs[type]); 969 970 if (attached) { 971 struct bpf_flow_keys flow_keys; 972 struct bpf_flow_dissector ctx = { 973 .flow_keys = &flow_keys, 974 .data = data, 975 .data_end = data + hlen, 976 }; 977 __be16 n_proto = proto; 978 979 if (skb) { 980 ctx.skb = skb; 981 /* we can't use 'proto' in the skb case 982 * because it might be set to skb->vlan_proto 983 * which has been pulled from the data 984 */ 985 n_proto = skb->protocol; 986 } 987 988 ret = bpf_flow_dissect(attached, &ctx, n_proto, nhoff, 989 hlen, flags); 990 __skb_flow_bpf_to_target(&flow_keys, flow_dissector, 991 target_container); 992 rcu_read_unlock(); 993 return ret; 994 } 995 rcu_read_unlock(); 996 } 997 998 if (dissector_uses_key(flow_dissector, 999 FLOW_DISSECTOR_KEY_ETH_ADDRS)) { 1000 struct ethhdr *eth = eth_hdr(skb); 1001 struct flow_dissector_key_eth_addrs *key_eth_addrs; 1002 1003 key_eth_addrs = skb_flow_dissector_target(flow_dissector, 1004 FLOW_DISSECTOR_KEY_ETH_ADDRS, 1005 target_container); 1006 memcpy(key_eth_addrs, ð->h_dest, sizeof(*key_eth_addrs)); 1007 } 1008 1009 proto_again: 1010 fdret = FLOW_DISSECT_RET_CONTINUE; 1011 1012 switch (proto) { 1013 case htons(ETH_P_IP): { 1014 const struct iphdr *iph; 1015 struct iphdr _iph; 1016 1017 iph = __skb_header_pointer(skb, nhoff, sizeof(_iph), data, hlen, &_iph); 1018 if (!iph || iph->ihl < 5) { 1019 fdret = FLOW_DISSECT_RET_OUT_BAD; 1020 break; 1021 } 1022 1023 nhoff += iph->ihl * 4; 1024 1025 ip_proto = iph->protocol; 1026 1027 if (dissector_uses_key(flow_dissector, 1028 FLOW_DISSECTOR_KEY_IPV4_ADDRS)) { 1029 key_addrs = skb_flow_dissector_target(flow_dissector, 1030 FLOW_DISSECTOR_KEY_IPV4_ADDRS, 1031 target_container); 1032 1033 memcpy(&key_addrs->v4addrs, &iph->saddr, 1034 sizeof(key_addrs->v4addrs)); 1035 key_control->addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS; 1036 } 1037 1038 if (ip_is_fragment(iph)) { 1039 key_control->flags |= FLOW_DIS_IS_FRAGMENT; 1040 1041 if (iph->frag_off & htons(IP_OFFSET)) { 1042 fdret = FLOW_DISSECT_RET_OUT_GOOD; 1043 break; 1044 } else { 1045 key_control->flags |= FLOW_DIS_FIRST_FRAG; 1046 if (!(flags & 1047 FLOW_DISSECTOR_F_PARSE_1ST_FRAG)) { 1048 fdret = FLOW_DISSECT_RET_OUT_GOOD; 1049 break; 1050 } 1051 } 1052 } 1053 1054 __skb_flow_dissect_ipv4(skb, flow_dissector, 1055 target_container, data, iph); 1056 1057 break; 1058 } 1059 case htons(ETH_P_IPV6): { 1060 const struct ipv6hdr *iph; 1061 struct ipv6hdr _iph; 1062 1063 iph = __skb_header_pointer(skb, nhoff, sizeof(_iph), data, hlen, &_iph); 1064 if (!iph) { 1065 fdret = FLOW_DISSECT_RET_OUT_BAD; 1066 break; 1067 } 1068 1069 ip_proto = iph->nexthdr; 1070 nhoff += sizeof(struct ipv6hdr); 1071 1072 if (dissector_uses_key(flow_dissector, 1073 FLOW_DISSECTOR_KEY_IPV6_ADDRS)) { 1074 key_addrs = skb_flow_dissector_target(flow_dissector, 1075 FLOW_DISSECTOR_KEY_IPV6_ADDRS, 1076 target_container); 1077 1078 memcpy(&key_addrs->v6addrs, &iph->saddr, 1079 sizeof(key_addrs->v6addrs)); 1080 key_control->addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS; 1081 } 1082 1083 if ((dissector_uses_key(flow_dissector, 1084 FLOW_DISSECTOR_KEY_FLOW_LABEL) || 1085 (flags & FLOW_DISSECTOR_F_STOP_AT_FLOW_LABEL)) && 1086 ip6_flowlabel(iph)) { 1087 __be32 flow_label = ip6_flowlabel(iph); 1088 1089 if (dissector_uses_key(flow_dissector, 1090 FLOW_DISSECTOR_KEY_FLOW_LABEL)) { 1091 key_tags = skb_flow_dissector_target(flow_dissector, 1092 FLOW_DISSECTOR_KEY_FLOW_LABEL, 1093 target_container); 1094 key_tags->flow_label = ntohl(flow_label); 1095 } 1096 if (flags & FLOW_DISSECTOR_F_STOP_AT_FLOW_LABEL) { 1097 fdret = FLOW_DISSECT_RET_OUT_GOOD; 1098 break; 1099 } 1100 } 1101 1102 __skb_flow_dissect_ipv6(skb, flow_dissector, 1103 target_container, data, iph); 1104 1105 break; 1106 } 1107 case htons(ETH_P_8021AD): 1108 case htons(ETH_P_8021Q): { 1109 const struct vlan_hdr *vlan = NULL; 1110 struct vlan_hdr _vlan; 1111 __be16 saved_vlan_tpid = proto; 1112 1113 if (dissector_vlan == FLOW_DISSECTOR_KEY_MAX && 1114 skb && skb_vlan_tag_present(skb)) { 1115 proto = skb->protocol; 1116 } else { 1117 vlan = __skb_header_pointer(skb, nhoff, sizeof(_vlan), 1118 data, hlen, &_vlan); 1119 if (!vlan) { 1120 fdret = FLOW_DISSECT_RET_OUT_BAD; 1121 break; 1122 } 1123 1124 proto = vlan->h_vlan_encapsulated_proto; 1125 nhoff += sizeof(*vlan); 1126 } 1127 1128 if (dissector_vlan == FLOW_DISSECTOR_KEY_MAX) { 1129 dissector_vlan = FLOW_DISSECTOR_KEY_VLAN; 1130 } else if (dissector_vlan == FLOW_DISSECTOR_KEY_VLAN) { 1131 dissector_vlan = FLOW_DISSECTOR_KEY_CVLAN; 1132 } else { 1133 fdret = FLOW_DISSECT_RET_PROTO_AGAIN; 1134 break; 1135 } 1136 1137 if (dissector_uses_key(flow_dissector, dissector_vlan)) { 1138 key_vlan = skb_flow_dissector_target(flow_dissector, 1139 dissector_vlan, 1140 target_container); 1141 1142 if (!vlan) { 1143 key_vlan->vlan_id = skb_vlan_tag_get_id(skb); 1144 key_vlan->vlan_priority = skb_vlan_tag_get_prio(skb); 1145 } else { 1146 key_vlan->vlan_id = ntohs(vlan->h_vlan_TCI) & 1147 VLAN_VID_MASK; 1148 key_vlan->vlan_priority = 1149 (ntohs(vlan->h_vlan_TCI) & 1150 VLAN_PRIO_MASK) >> VLAN_PRIO_SHIFT; 1151 } 1152 key_vlan->vlan_tpid = saved_vlan_tpid; 1153 } 1154 1155 fdret = FLOW_DISSECT_RET_PROTO_AGAIN; 1156 break; 1157 } 1158 case htons(ETH_P_PPP_SES): { 1159 struct { 1160 struct pppoe_hdr hdr; 1161 __be16 proto; 1162 } *hdr, _hdr; 1163 hdr = __skb_header_pointer(skb, nhoff, sizeof(_hdr), data, hlen, &_hdr); 1164 if (!hdr) { 1165 fdret = FLOW_DISSECT_RET_OUT_BAD; 1166 break; 1167 } 1168 1169 proto = hdr->proto; 1170 nhoff += PPPOE_SES_HLEN; 1171 switch (proto) { 1172 case htons(PPP_IP): 1173 proto = htons(ETH_P_IP); 1174 fdret = FLOW_DISSECT_RET_PROTO_AGAIN; 1175 break; 1176 case htons(PPP_IPV6): 1177 proto = htons(ETH_P_IPV6); 1178 fdret = FLOW_DISSECT_RET_PROTO_AGAIN; 1179 break; 1180 default: 1181 fdret = FLOW_DISSECT_RET_OUT_BAD; 1182 break; 1183 } 1184 break; 1185 } 1186 case htons(ETH_P_TIPC): { 1187 struct tipc_basic_hdr *hdr, _hdr; 1188 1189 hdr = __skb_header_pointer(skb, nhoff, sizeof(_hdr), 1190 data, hlen, &_hdr); 1191 if (!hdr) { 1192 fdret = FLOW_DISSECT_RET_OUT_BAD; 1193 break; 1194 } 1195 1196 if (dissector_uses_key(flow_dissector, 1197 FLOW_DISSECTOR_KEY_TIPC)) { 1198 key_addrs = skb_flow_dissector_target(flow_dissector, 1199 FLOW_DISSECTOR_KEY_TIPC, 1200 target_container); 1201 key_addrs->tipckey.key = tipc_hdr_rps_key(hdr); 1202 key_control->addr_type = FLOW_DISSECTOR_KEY_TIPC; 1203 } 1204 fdret = FLOW_DISSECT_RET_OUT_GOOD; 1205 break; 1206 } 1207 1208 case htons(ETH_P_MPLS_UC): 1209 case htons(ETH_P_MPLS_MC): 1210 fdret = __skb_flow_dissect_mpls(skb, flow_dissector, 1211 target_container, data, 1212 nhoff, hlen, mpls_lse, 1213 &mpls_el); 1214 nhoff += sizeof(struct mpls_label); 1215 mpls_lse++; 1216 break; 1217 case htons(ETH_P_FCOE): 1218 if ((hlen - nhoff) < FCOE_HEADER_LEN) { 1219 fdret = FLOW_DISSECT_RET_OUT_BAD; 1220 break; 1221 } 1222 1223 nhoff += FCOE_HEADER_LEN; 1224 fdret = FLOW_DISSECT_RET_OUT_GOOD; 1225 break; 1226 1227 case htons(ETH_P_ARP): 1228 case htons(ETH_P_RARP): 1229 fdret = __skb_flow_dissect_arp(skb, flow_dissector, 1230 target_container, data, 1231 nhoff, hlen); 1232 break; 1233 1234 case htons(ETH_P_BATMAN): 1235 fdret = __skb_flow_dissect_batadv(skb, key_control, data, 1236 &proto, &nhoff, hlen, flags); 1237 break; 1238 1239 default: 1240 fdret = FLOW_DISSECT_RET_OUT_BAD; 1241 break; 1242 } 1243 1244 /* Process result of proto processing */ 1245 switch (fdret) { 1246 case FLOW_DISSECT_RET_OUT_GOOD: 1247 goto out_good; 1248 case FLOW_DISSECT_RET_PROTO_AGAIN: 1249 if (skb_flow_dissect_allowed(&num_hdrs)) 1250 goto proto_again; 1251 goto out_good; 1252 case FLOW_DISSECT_RET_CONTINUE: 1253 case FLOW_DISSECT_RET_IPPROTO_AGAIN: 1254 break; 1255 case FLOW_DISSECT_RET_OUT_BAD: 1256 default: 1257 goto out_bad; 1258 } 1259 1260 ip_proto_again: 1261 fdret = FLOW_DISSECT_RET_CONTINUE; 1262 1263 switch (ip_proto) { 1264 case IPPROTO_GRE: 1265 fdret = __skb_flow_dissect_gre(skb, key_control, flow_dissector, 1266 target_container, data, 1267 &proto, &nhoff, &hlen, flags); 1268 break; 1269 1270 case NEXTHDR_HOP: 1271 case NEXTHDR_ROUTING: 1272 case NEXTHDR_DEST: { 1273 u8 _opthdr[2], *opthdr; 1274 1275 if (proto != htons(ETH_P_IPV6)) 1276 break; 1277 1278 opthdr = __skb_header_pointer(skb, nhoff, sizeof(_opthdr), 1279 data, hlen, &_opthdr); 1280 if (!opthdr) { 1281 fdret = FLOW_DISSECT_RET_OUT_BAD; 1282 break; 1283 } 1284 1285 ip_proto = opthdr[0]; 1286 nhoff += (opthdr[1] + 1) << 3; 1287 1288 fdret = FLOW_DISSECT_RET_IPPROTO_AGAIN; 1289 break; 1290 } 1291 case NEXTHDR_FRAGMENT: { 1292 struct frag_hdr _fh, *fh; 1293 1294 if (proto != htons(ETH_P_IPV6)) 1295 break; 1296 1297 fh = __skb_header_pointer(skb, nhoff, sizeof(_fh), 1298 data, hlen, &_fh); 1299 1300 if (!fh) { 1301 fdret = FLOW_DISSECT_RET_OUT_BAD; 1302 break; 1303 } 1304 1305 key_control->flags |= FLOW_DIS_IS_FRAGMENT; 1306 1307 nhoff += sizeof(_fh); 1308 ip_proto = fh->nexthdr; 1309 1310 if (!(fh->frag_off & htons(IP6_OFFSET))) { 1311 key_control->flags |= FLOW_DIS_FIRST_FRAG; 1312 if (flags & FLOW_DISSECTOR_F_PARSE_1ST_FRAG) { 1313 fdret = FLOW_DISSECT_RET_IPPROTO_AGAIN; 1314 break; 1315 } 1316 } 1317 1318 fdret = FLOW_DISSECT_RET_OUT_GOOD; 1319 break; 1320 } 1321 case IPPROTO_IPIP: 1322 proto = htons(ETH_P_IP); 1323 1324 key_control->flags |= FLOW_DIS_ENCAPSULATION; 1325 if (flags & FLOW_DISSECTOR_F_STOP_AT_ENCAP) { 1326 fdret = FLOW_DISSECT_RET_OUT_GOOD; 1327 break; 1328 } 1329 1330 fdret = FLOW_DISSECT_RET_PROTO_AGAIN; 1331 break; 1332 1333 case IPPROTO_IPV6: 1334 proto = htons(ETH_P_IPV6); 1335 1336 key_control->flags |= FLOW_DIS_ENCAPSULATION; 1337 if (flags & FLOW_DISSECTOR_F_STOP_AT_ENCAP) { 1338 fdret = FLOW_DISSECT_RET_OUT_GOOD; 1339 break; 1340 } 1341 1342 fdret = FLOW_DISSECT_RET_PROTO_AGAIN; 1343 break; 1344 1345 1346 case IPPROTO_MPLS: 1347 proto = htons(ETH_P_MPLS_UC); 1348 fdret = FLOW_DISSECT_RET_PROTO_AGAIN; 1349 break; 1350 1351 case IPPROTO_TCP: 1352 __skb_flow_dissect_tcp(skb, flow_dissector, target_container, 1353 data, nhoff, hlen); 1354 break; 1355 1356 case IPPROTO_ICMP: 1357 case IPPROTO_ICMPV6: 1358 __skb_flow_dissect_icmp(skb, flow_dissector, target_container, 1359 data, nhoff, hlen); 1360 break; 1361 1362 default: 1363 break; 1364 } 1365 1366 if (!(key_control->flags & FLOW_DIS_IS_FRAGMENT)) 1367 __skb_flow_dissect_ports(skb, flow_dissector, target_container, 1368 data, nhoff, ip_proto, hlen); 1369 1370 /* Process result of IP proto processing */ 1371 switch (fdret) { 1372 case FLOW_DISSECT_RET_PROTO_AGAIN: 1373 if (skb_flow_dissect_allowed(&num_hdrs)) 1374 goto proto_again; 1375 break; 1376 case FLOW_DISSECT_RET_IPPROTO_AGAIN: 1377 if (skb_flow_dissect_allowed(&num_hdrs)) 1378 goto ip_proto_again; 1379 break; 1380 case FLOW_DISSECT_RET_OUT_GOOD: 1381 case FLOW_DISSECT_RET_CONTINUE: 1382 break; 1383 case FLOW_DISSECT_RET_OUT_BAD: 1384 default: 1385 goto out_bad; 1386 } 1387 1388 out_good: 1389 ret = true; 1390 1391 out: 1392 key_control->thoff = min_t(u16, nhoff, skb ? skb->len : hlen); 1393 key_basic->n_proto = proto; 1394 key_basic->ip_proto = ip_proto; 1395 1396 return ret; 1397 1398 out_bad: 1399 ret = false; 1400 goto out; 1401 } 1402 EXPORT_SYMBOL(__skb_flow_dissect); 1403 1404 static siphash_key_t hashrnd __read_mostly; 1405 static __always_inline void __flow_hash_secret_init(void) 1406 { 1407 net_get_random_once(&hashrnd, sizeof(hashrnd)); 1408 } 1409 1410 static const void *flow_keys_hash_start(const struct flow_keys *flow) 1411 { 1412 BUILD_BUG_ON(FLOW_KEYS_HASH_OFFSET % SIPHASH_ALIGNMENT); 1413 return &flow->FLOW_KEYS_HASH_START_FIELD; 1414 } 1415 1416 static inline size_t flow_keys_hash_length(const struct flow_keys *flow) 1417 { 1418 size_t diff = FLOW_KEYS_HASH_OFFSET + sizeof(flow->addrs); 1419 1420 BUILD_BUG_ON((sizeof(*flow) - FLOW_KEYS_HASH_OFFSET) % sizeof(u32)); 1421 1422 switch (flow->control.addr_type) { 1423 case FLOW_DISSECTOR_KEY_IPV4_ADDRS: 1424 diff -= sizeof(flow->addrs.v4addrs); 1425 break; 1426 case FLOW_DISSECTOR_KEY_IPV6_ADDRS: 1427 diff -= sizeof(flow->addrs.v6addrs); 1428 break; 1429 case FLOW_DISSECTOR_KEY_TIPC: 1430 diff -= sizeof(flow->addrs.tipckey); 1431 break; 1432 } 1433 return sizeof(*flow) - diff; 1434 } 1435 1436 __be32 flow_get_u32_src(const struct flow_keys *flow) 1437 { 1438 switch (flow->control.addr_type) { 1439 case FLOW_DISSECTOR_KEY_IPV4_ADDRS: 1440 return flow->addrs.v4addrs.src; 1441 case FLOW_DISSECTOR_KEY_IPV6_ADDRS: 1442 return (__force __be32)ipv6_addr_hash( 1443 &flow->addrs.v6addrs.src); 1444 case FLOW_DISSECTOR_KEY_TIPC: 1445 return flow->addrs.tipckey.key; 1446 default: 1447 return 0; 1448 } 1449 } 1450 EXPORT_SYMBOL(flow_get_u32_src); 1451 1452 __be32 flow_get_u32_dst(const struct flow_keys *flow) 1453 { 1454 switch (flow->control.addr_type) { 1455 case FLOW_DISSECTOR_KEY_IPV4_ADDRS: 1456 return flow->addrs.v4addrs.dst; 1457 case FLOW_DISSECTOR_KEY_IPV6_ADDRS: 1458 return (__force __be32)ipv6_addr_hash( 1459 &flow->addrs.v6addrs.dst); 1460 default: 1461 return 0; 1462 } 1463 } 1464 EXPORT_SYMBOL(flow_get_u32_dst); 1465 1466 /* Sort the source and destination IP (and the ports if the IP are the same), 1467 * to have consistent hash within the two directions 1468 */ 1469 static inline void __flow_hash_consistentify(struct flow_keys *keys) 1470 { 1471 int addr_diff, i; 1472 1473 switch (keys->control.addr_type) { 1474 case FLOW_DISSECTOR_KEY_IPV4_ADDRS: 1475 addr_diff = (__force u32)keys->addrs.v4addrs.dst - 1476 (__force u32)keys->addrs.v4addrs.src; 1477 if ((addr_diff < 0) || 1478 (addr_diff == 0 && 1479 ((__force u16)keys->ports.dst < 1480 (__force u16)keys->ports.src))) { 1481 swap(keys->addrs.v4addrs.src, keys->addrs.v4addrs.dst); 1482 swap(keys->ports.src, keys->ports.dst); 1483 } 1484 break; 1485 case FLOW_DISSECTOR_KEY_IPV6_ADDRS: 1486 addr_diff = memcmp(&keys->addrs.v6addrs.dst, 1487 &keys->addrs.v6addrs.src, 1488 sizeof(keys->addrs.v6addrs.dst)); 1489 if ((addr_diff < 0) || 1490 (addr_diff == 0 && 1491 ((__force u16)keys->ports.dst < 1492 (__force u16)keys->ports.src))) { 1493 for (i = 0; i < 4; i++) 1494 swap(keys->addrs.v6addrs.src.s6_addr32[i], 1495 keys->addrs.v6addrs.dst.s6_addr32[i]); 1496 swap(keys->ports.src, keys->ports.dst); 1497 } 1498 break; 1499 } 1500 } 1501 1502 static inline u32 __flow_hash_from_keys(struct flow_keys *keys, 1503 const siphash_key_t *keyval) 1504 { 1505 u32 hash; 1506 1507 __flow_hash_consistentify(keys); 1508 1509 hash = siphash(flow_keys_hash_start(keys), 1510 flow_keys_hash_length(keys), keyval); 1511 if (!hash) 1512 hash = 1; 1513 1514 return hash; 1515 } 1516 1517 u32 flow_hash_from_keys(struct flow_keys *keys) 1518 { 1519 __flow_hash_secret_init(); 1520 return __flow_hash_from_keys(keys, &hashrnd); 1521 } 1522 EXPORT_SYMBOL(flow_hash_from_keys); 1523 1524 static inline u32 ___skb_get_hash(const struct sk_buff *skb, 1525 struct flow_keys *keys, 1526 const siphash_key_t *keyval) 1527 { 1528 skb_flow_dissect_flow_keys(skb, keys, 1529 FLOW_DISSECTOR_F_STOP_AT_FLOW_LABEL); 1530 1531 return __flow_hash_from_keys(keys, keyval); 1532 } 1533 1534 struct _flow_keys_digest_data { 1535 __be16 n_proto; 1536 u8 ip_proto; 1537 u8 padding; 1538 __be32 ports; 1539 __be32 src; 1540 __be32 dst; 1541 }; 1542 1543 void make_flow_keys_digest(struct flow_keys_digest *digest, 1544 const struct flow_keys *flow) 1545 { 1546 struct _flow_keys_digest_data *data = 1547 (struct _flow_keys_digest_data *)digest; 1548 1549 BUILD_BUG_ON(sizeof(*data) > sizeof(*digest)); 1550 1551 memset(digest, 0, sizeof(*digest)); 1552 1553 data->n_proto = flow->basic.n_proto; 1554 data->ip_proto = flow->basic.ip_proto; 1555 data->ports = flow->ports.ports; 1556 data->src = flow->addrs.v4addrs.src; 1557 data->dst = flow->addrs.v4addrs.dst; 1558 } 1559 EXPORT_SYMBOL(make_flow_keys_digest); 1560 1561 static struct flow_dissector flow_keys_dissector_symmetric __read_mostly; 1562 1563 u32 __skb_get_hash_symmetric(const struct sk_buff *skb) 1564 { 1565 struct flow_keys keys; 1566 1567 __flow_hash_secret_init(); 1568 1569 memset(&keys, 0, sizeof(keys)); 1570 __skb_flow_dissect(NULL, skb, &flow_keys_dissector_symmetric, 1571 &keys, NULL, 0, 0, 0, 1572 FLOW_DISSECTOR_F_STOP_AT_FLOW_LABEL); 1573 1574 return __flow_hash_from_keys(&keys, &hashrnd); 1575 } 1576 EXPORT_SYMBOL_GPL(__skb_get_hash_symmetric); 1577 1578 /** 1579 * __skb_get_hash: calculate a flow hash 1580 * @skb: sk_buff to calculate flow hash from 1581 * 1582 * This function calculates a flow hash based on src/dst addresses 1583 * and src/dst port numbers. Sets hash in skb to non-zero hash value 1584 * on success, zero indicates no valid hash. Also, sets l4_hash in skb 1585 * if hash is a canonical 4-tuple hash over transport ports. 1586 */ 1587 void __skb_get_hash(struct sk_buff *skb) 1588 { 1589 struct flow_keys keys; 1590 u32 hash; 1591 1592 __flow_hash_secret_init(); 1593 1594 hash = ___skb_get_hash(skb, &keys, &hashrnd); 1595 1596 __skb_set_sw_hash(skb, hash, flow_keys_have_l4(&keys)); 1597 } 1598 EXPORT_SYMBOL(__skb_get_hash); 1599 1600 __u32 skb_get_hash_perturb(const struct sk_buff *skb, 1601 const siphash_key_t *perturb) 1602 { 1603 struct flow_keys keys; 1604 1605 return ___skb_get_hash(skb, &keys, perturb); 1606 } 1607 EXPORT_SYMBOL(skb_get_hash_perturb); 1608 1609 u32 __skb_get_poff(const struct sk_buff *skb, void *data, 1610 const struct flow_keys_basic *keys, int hlen) 1611 { 1612 u32 poff = keys->control.thoff; 1613 1614 /* skip L4 headers for fragments after the first */ 1615 if ((keys->control.flags & FLOW_DIS_IS_FRAGMENT) && 1616 !(keys->control.flags & FLOW_DIS_FIRST_FRAG)) 1617 return poff; 1618 1619 switch (keys->basic.ip_proto) { 1620 case IPPROTO_TCP: { 1621 /* access doff as u8 to avoid unaligned access */ 1622 const u8 *doff; 1623 u8 _doff; 1624 1625 doff = __skb_header_pointer(skb, poff + 12, sizeof(_doff), 1626 data, hlen, &_doff); 1627 if (!doff) 1628 return poff; 1629 1630 poff += max_t(u32, sizeof(struct tcphdr), (*doff & 0xF0) >> 2); 1631 break; 1632 } 1633 case IPPROTO_UDP: 1634 case IPPROTO_UDPLITE: 1635 poff += sizeof(struct udphdr); 1636 break; 1637 /* For the rest, we do not really care about header 1638 * extensions at this point for now. 1639 */ 1640 case IPPROTO_ICMP: 1641 poff += sizeof(struct icmphdr); 1642 break; 1643 case IPPROTO_ICMPV6: 1644 poff += sizeof(struct icmp6hdr); 1645 break; 1646 case IPPROTO_IGMP: 1647 poff += sizeof(struct igmphdr); 1648 break; 1649 case IPPROTO_DCCP: 1650 poff += sizeof(struct dccp_hdr); 1651 break; 1652 case IPPROTO_SCTP: 1653 poff += sizeof(struct sctphdr); 1654 break; 1655 } 1656 1657 return poff; 1658 } 1659 1660 /** 1661 * skb_get_poff - get the offset to the payload 1662 * @skb: sk_buff to get the payload offset from 1663 * 1664 * The function will get the offset to the payload as far as it could 1665 * be dissected. The main user is currently BPF, so that we can dynamically 1666 * truncate packets without needing to push actual payload to the user 1667 * space and can analyze headers only, instead. 1668 */ 1669 u32 skb_get_poff(const struct sk_buff *skb) 1670 { 1671 struct flow_keys_basic keys; 1672 1673 if (!skb_flow_dissect_flow_keys_basic(NULL, skb, &keys, 1674 NULL, 0, 0, 0, 0)) 1675 return 0; 1676 1677 return __skb_get_poff(skb, skb->data, &keys, skb_headlen(skb)); 1678 } 1679 1680 __u32 __get_hash_from_flowi6(const struct flowi6 *fl6, struct flow_keys *keys) 1681 { 1682 memset(keys, 0, sizeof(*keys)); 1683 1684 memcpy(&keys->addrs.v6addrs.src, &fl6->saddr, 1685 sizeof(keys->addrs.v6addrs.src)); 1686 memcpy(&keys->addrs.v6addrs.dst, &fl6->daddr, 1687 sizeof(keys->addrs.v6addrs.dst)); 1688 keys->control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS; 1689 keys->ports.src = fl6->fl6_sport; 1690 keys->ports.dst = fl6->fl6_dport; 1691 keys->keyid.keyid = fl6->fl6_gre_key; 1692 keys->tags.flow_label = (__force u32)flowi6_get_flowlabel(fl6); 1693 keys->basic.ip_proto = fl6->flowi6_proto; 1694 1695 return flow_hash_from_keys(keys); 1696 } 1697 EXPORT_SYMBOL(__get_hash_from_flowi6); 1698 1699 static const struct flow_dissector_key flow_keys_dissector_keys[] = { 1700 { 1701 .key_id = FLOW_DISSECTOR_KEY_CONTROL, 1702 .offset = offsetof(struct flow_keys, control), 1703 }, 1704 { 1705 .key_id = FLOW_DISSECTOR_KEY_BASIC, 1706 .offset = offsetof(struct flow_keys, basic), 1707 }, 1708 { 1709 .key_id = FLOW_DISSECTOR_KEY_IPV4_ADDRS, 1710 .offset = offsetof(struct flow_keys, addrs.v4addrs), 1711 }, 1712 { 1713 .key_id = FLOW_DISSECTOR_KEY_IPV6_ADDRS, 1714 .offset = offsetof(struct flow_keys, addrs.v6addrs), 1715 }, 1716 { 1717 .key_id = FLOW_DISSECTOR_KEY_TIPC, 1718 .offset = offsetof(struct flow_keys, addrs.tipckey), 1719 }, 1720 { 1721 .key_id = FLOW_DISSECTOR_KEY_PORTS, 1722 .offset = offsetof(struct flow_keys, ports), 1723 }, 1724 { 1725 .key_id = FLOW_DISSECTOR_KEY_VLAN, 1726 .offset = offsetof(struct flow_keys, vlan), 1727 }, 1728 { 1729 .key_id = FLOW_DISSECTOR_KEY_FLOW_LABEL, 1730 .offset = offsetof(struct flow_keys, tags), 1731 }, 1732 { 1733 .key_id = FLOW_DISSECTOR_KEY_GRE_KEYID, 1734 .offset = offsetof(struct flow_keys, keyid), 1735 }, 1736 }; 1737 1738 static const struct flow_dissector_key flow_keys_dissector_symmetric_keys[] = { 1739 { 1740 .key_id = FLOW_DISSECTOR_KEY_CONTROL, 1741 .offset = offsetof(struct flow_keys, control), 1742 }, 1743 { 1744 .key_id = FLOW_DISSECTOR_KEY_BASIC, 1745 .offset = offsetof(struct flow_keys, basic), 1746 }, 1747 { 1748 .key_id = FLOW_DISSECTOR_KEY_IPV4_ADDRS, 1749 .offset = offsetof(struct flow_keys, addrs.v4addrs), 1750 }, 1751 { 1752 .key_id = FLOW_DISSECTOR_KEY_IPV6_ADDRS, 1753 .offset = offsetof(struct flow_keys, addrs.v6addrs), 1754 }, 1755 { 1756 .key_id = FLOW_DISSECTOR_KEY_PORTS, 1757 .offset = offsetof(struct flow_keys, ports), 1758 }, 1759 }; 1760 1761 static const struct flow_dissector_key flow_keys_basic_dissector_keys[] = { 1762 { 1763 .key_id = FLOW_DISSECTOR_KEY_CONTROL, 1764 .offset = offsetof(struct flow_keys, control), 1765 }, 1766 { 1767 .key_id = FLOW_DISSECTOR_KEY_BASIC, 1768 .offset = offsetof(struct flow_keys, basic), 1769 }, 1770 }; 1771 1772 struct flow_dissector flow_keys_dissector __read_mostly; 1773 EXPORT_SYMBOL(flow_keys_dissector); 1774 1775 struct flow_dissector flow_keys_basic_dissector __read_mostly; 1776 EXPORT_SYMBOL(flow_keys_basic_dissector); 1777 1778 static int __init init_default_flow_dissectors(void) 1779 { 1780 skb_flow_dissector_init(&flow_keys_dissector, 1781 flow_keys_dissector_keys, 1782 ARRAY_SIZE(flow_keys_dissector_keys)); 1783 skb_flow_dissector_init(&flow_keys_dissector_symmetric, 1784 flow_keys_dissector_symmetric_keys, 1785 ARRAY_SIZE(flow_keys_dissector_symmetric_keys)); 1786 skb_flow_dissector_init(&flow_keys_basic_dissector, 1787 flow_keys_basic_dissector_keys, 1788 ARRAY_SIZE(flow_keys_basic_dissector_keys)); 1789 return 0; 1790 } 1791 core_initcall(init_default_flow_dissectors); 1792