1 #include <linux/kernel.h> 2 #include <linux/skbuff.h> 3 #include <linux/export.h> 4 #include <linux/ip.h> 5 #include <linux/ipv6.h> 6 #include <linux/if_vlan.h> 7 #include <net/ip.h> 8 #include <net/ipv6.h> 9 #include <net/gre.h> 10 #include <net/pptp.h> 11 #include <linux/igmp.h> 12 #include <linux/icmp.h> 13 #include <linux/sctp.h> 14 #include <linux/dccp.h> 15 #include <linux/if_tunnel.h> 16 #include <linux/if_pppox.h> 17 #include <linux/ppp_defs.h> 18 #include <linux/stddef.h> 19 #include <linux/if_ether.h> 20 #include <linux/mpls.h> 21 #include <net/flow_dissector.h> 22 #include <scsi/fc/fc_fcoe.h> 23 24 static void dissector_set_key(struct flow_dissector *flow_dissector, 25 enum flow_dissector_key_id key_id) 26 { 27 flow_dissector->used_keys |= (1 << key_id); 28 } 29 30 void skb_flow_dissector_init(struct flow_dissector *flow_dissector, 31 const struct flow_dissector_key *key, 32 unsigned int key_count) 33 { 34 unsigned int i; 35 36 memset(flow_dissector, 0, sizeof(*flow_dissector)); 37 38 for (i = 0; i < key_count; i++, key++) { 39 /* User should make sure that every key target offset is withing 40 * boundaries of unsigned short. 41 */ 42 BUG_ON(key->offset > USHRT_MAX); 43 BUG_ON(dissector_uses_key(flow_dissector, 44 key->key_id)); 45 46 dissector_set_key(flow_dissector, key->key_id); 47 flow_dissector->offset[key->key_id] = key->offset; 48 } 49 50 /* Ensure that the dissector always includes control and basic key. 51 * That way we are able to avoid handling lack of these in fast path. 52 */ 53 BUG_ON(!dissector_uses_key(flow_dissector, 54 FLOW_DISSECTOR_KEY_CONTROL)); 55 BUG_ON(!dissector_uses_key(flow_dissector, 56 FLOW_DISSECTOR_KEY_BASIC)); 57 } 58 EXPORT_SYMBOL(skb_flow_dissector_init); 59 60 /** 61 * skb_flow_get_be16 - extract be16 entity 62 * @skb: sk_buff to extract from 63 * @poff: offset to extract at 64 * @data: raw buffer pointer to the packet 65 * @hlen: packet header length 66 * 67 * The function will try to retrieve a be32 entity at 68 * offset poff 69 */ 70 static __be16 skb_flow_get_be16(const struct sk_buff *skb, int poff, 71 void *data, int hlen) 72 { 73 __be16 *u, _u; 74 75 u = __skb_header_pointer(skb, poff, sizeof(_u), data, hlen, &_u); 76 if (u) 77 return *u; 78 79 return 0; 80 } 81 82 /** 83 * __skb_flow_get_ports - extract the upper layer ports and return them 84 * @skb: sk_buff to extract the ports from 85 * @thoff: transport header offset 86 * @ip_proto: protocol for which to get port offset 87 * @data: raw buffer pointer to the packet, if NULL use skb->data 88 * @hlen: packet header length, if @data is NULL use skb_headlen(skb) 89 * 90 * The function will try to retrieve the ports at offset thoff + poff where poff 91 * is the protocol port offset returned from proto_ports_offset 92 */ 93 __be32 __skb_flow_get_ports(const struct sk_buff *skb, int thoff, u8 ip_proto, 94 void *data, int hlen) 95 { 96 int poff = proto_ports_offset(ip_proto); 97 98 if (!data) { 99 data = skb->data; 100 hlen = skb_headlen(skb); 101 } 102 103 if (poff >= 0) { 104 __be32 *ports, _ports; 105 106 ports = __skb_header_pointer(skb, thoff + poff, 107 sizeof(_ports), data, hlen, &_ports); 108 if (ports) 109 return *ports; 110 } 111 112 return 0; 113 } 114 EXPORT_SYMBOL(__skb_flow_get_ports); 115 116 /** 117 * __skb_flow_dissect - extract the flow_keys struct and return it 118 * @skb: sk_buff to extract the flow from, can be NULL if the rest are specified 119 * @flow_dissector: list of keys to dissect 120 * @target_container: target structure to put dissected values into 121 * @data: raw buffer pointer to the packet, if NULL use skb->data 122 * @proto: protocol for which to get the flow, if @data is NULL use skb->protocol 123 * @nhoff: network header offset, if @data is NULL use skb_network_offset(skb) 124 * @hlen: packet header length, if @data is NULL use skb_headlen(skb) 125 * 126 * The function will try to retrieve individual keys into target specified 127 * by flow_dissector from either the skbuff or a raw buffer specified by the 128 * rest parameters. 129 * 130 * Caller must take care of zeroing target container memory. 131 */ 132 bool __skb_flow_dissect(const struct sk_buff *skb, 133 struct flow_dissector *flow_dissector, 134 void *target_container, 135 void *data, __be16 proto, int nhoff, int hlen, 136 unsigned int flags) 137 { 138 struct flow_dissector_key_control *key_control; 139 struct flow_dissector_key_basic *key_basic; 140 struct flow_dissector_key_addrs *key_addrs; 141 struct flow_dissector_key_arp *key_arp; 142 struct flow_dissector_key_ports *key_ports; 143 struct flow_dissector_key_icmp *key_icmp; 144 struct flow_dissector_key_tags *key_tags; 145 struct flow_dissector_key_vlan *key_vlan; 146 struct flow_dissector_key_keyid *key_keyid; 147 bool skip_vlan = false; 148 u8 ip_proto = 0; 149 bool ret; 150 151 if (!data) { 152 data = skb->data; 153 proto = skb_vlan_tag_present(skb) ? 154 skb->vlan_proto : skb->protocol; 155 nhoff = skb_network_offset(skb); 156 hlen = skb_headlen(skb); 157 } 158 159 /* It is ensured by skb_flow_dissector_init() that control key will 160 * be always present. 161 */ 162 key_control = skb_flow_dissector_target(flow_dissector, 163 FLOW_DISSECTOR_KEY_CONTROL, 164 target_container); 165 166 /* It is ensured by skb_flow_dissector_init() that basic key will 167 * be always present. 168 */ 169 key_basic = skb_flow_dissector_target(flow_dissector, 170 FLOW_DISSECTOR_KEY_BASIC, 171 target_container); 172 173 if (dissector_uses_key(flow_dissector, 174 FLOW_DISSECTOR_KEY_ETH_ADDRS)) { 175 struct ethhdr *eth = eth_hdr(skb); 176 struct flow_dissector_key_eth_addrs *key_eth_addrs; 177 178 key_eth_addrs = skb_flow_dissector_target(flow_dissector, 179 FLOW_DISSECTOR_KEY_ETH_ADDRS, 180 target_container); 181 memcpy(key_eth_addrs, ð->h_dest, sizeof(*key_eth_addrs)); 182 } 183 184 again: 185 switch (proto) { 186 case htons(ETH_P_IP): { 187 const struct iphdr *iph; 188 struct iphdr _iph; 189 ip: 190 iph = __skb_header_pointer(skb, nhoff, sizeof(_iph), data, hlen, &_iph); 191 if (!iph || iph->ihl < 5) 192 goto out_bad; 193 nhoff += iph->ihl * 4; 194 195 ip_proto = iph->protocol; 196 197 if (dissector_uses_key(flow_dissector, 198 FLOW_DISSECTOR_KEY_IPV4_ADDRS)) { 199 key_addrs = skb_flow_dissector_target(flow_dissector, 200 FLOW_DISSECTOR_KEY_IPV4_ADDRS, 201 target_container); 202 203 memcpy(&key_addrs->v4addrs, &iph->saddr, 204 sizeof(key_addrs->v4addrs)); 205 key_control->addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS; 206 } 207 208 if (ip_is_fragment(iph)) { 209 key_control->flags |= FLOW_DIS_IS_FRAGMENT; 210 211 if (iph->frag_off & htons(IP_OFFSET)) { 212 goto out_good; 213 } else { 214 key_control->flags |= FLOW_DIS_FIRST_FRAG; 215 if (!(flags & FLOW_DISSECTOR_F_PARSE_1ST_FRAG)) 216 goto out_good; 217 } 218 } 219 220 if (flags & FLOW_DISSECTOR_F_STOP_AT_L3) 221 goto out_good; 222 223 break; 224 } 225 case htons(ETH_P_IPV6): { 226 const struct ipv6hdr *iph; 227 struct ipv6hdr _iph; 228 229 ipv6: 230 iph = __skb_header_pointer(skb, nhoff, sizeof(_iph), data, hlen, &_iph); 231 if (!iph) 232 goto out_bad; 233 234 ip_proto = iph->nexthdr; 235 nhoff += sizeof(struct ipv6hdr); 236 237 if (dissector_uses_key(flow_dissector, 238 FLOW_DISSECTOR_KEY_IPV6_ADDRS)) { 239 key_addrs = skb_flow_dissector_target(flow_dissector, 240 FLOW_DISSECTOR_KEY_IPV6_ADDRS, 241 target_container); 242 243 memcpy(&key_addrs->v6addrs, &iph->saddr, 244 sizeof(key_addrs->v6addrs)); 245 key_control->addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS; 246 } 247 248 if ((dissector_uses_key(flow_dissector, 249 FLOW_DISSECTOR_KEY_FLOW_LABEL) || 250 (flags & FLOW_DISSECTOR_F_STOP_AT_FLOW_LABEL)) && 251 ip6_flowlabel(iph)) { 252 __be32 flow_label = ip6_flowlabel(iph); 253 254 if (dissector_uses_key(flow_dissector, 255 FLOW_DISSECTOR_KEY_FLOW_LABEL)) { 256 key_tags = skb_flow_dissector_target(flow_dissector, 257 FLOW_DISSECTOR_KEY_FLOW_LABEL, 258 target_container); 259 key_tags->flow_label = ntohl(flow_label); 260 } 261 if (flags & FLOW_DISSECTOR_F_STOP_AT_FLOW_LABEL) 262 goto out_good; 263 } 264 265 if (flags & FLOW_DISSECTOR_F_STOP_AT_L3) 266 goto out_good; 267 268 break; 269 } 270 case htons(ETH_P_8021AD): 271 case htons(ETH_P_8021Q): { 272 const struct vlan_hdr *vlan; 273 struct vlan_hdr _vlan; 274 bool vlan_tag_present = skb && skb_vlan_tag_present(skb); 275 276 if (vlan_tag_present) 277 proto = skb->protocol; 278 279 if (!vlan_tag_present || eth_type_vlan(skb->protocol)) { 280 vlan = __skb_header_pointer(skb, nhoff, sizeof(_vlan), 281 data, hlen, &_vlan); 282 if (!vlan) 283 goto out_bad; 284 proto = vlan->h_vlan_encapsulated_proto; 285 nhoff += sizeof(*vlan); 286 if (skip_vlan) 287 goto again; 288 } 289 290 skip_vlan = true; 291 if (dissector_uses_key(flow_dissector, 292 FLOW_DISSECTOR_KEY_VLAN)) { 293 key_vlan = skb_flow_dissector_target(flow_dissector, 294 FLOW_DISSECTOR_KEY_VLAN, 295 target_container); 296 297 if (vlan_tag_present) { 298 key_vlan->vlan_id = skb_vlan_tag_get_id(skb); 299 key_vlan->vlan_priority = 300 (skb_vlan_tag_get_prio(skb) >> VLAN_PRIO_SHIFT); 301 } else { 302 key_vlan->vlan_id = ntohs(vlan->h_vlan_TCI) & 303 VLAN_VID_MASK; 304 key_vlan->vlan_priority = 305 (ntohs(vlan->h_vlan_TCI) & 306 VLAN_PRIO_MASK) >> VLAN_PRIO_SHIFT; 307 } 308 } 309 310 goto again; 311 } 312 case htons(ETH_P_PPP_SES): { 313 struct { 314 struct pppoe_hdr hdr; 315 __be16 proto; 316 } *hdr, _hdr; 317 hdr = __skb_header_pointer(skb, nhoff, sizeof(_hdr), data, hlen, &_hdr); 318 if (!hdr) 319 goto out_bad; 320 proto = hdr->proto; 321 nhoff += PPPOE_SES_HLEN; 322 switch (proto) { 323 case htons(PPP_IP): 324 goto ip; 325 case htons(PPP_IPV6): 326 goto ipv6; 327 default: 328 goto out_bad; 329 } 330 } 331 case htons(ETH_P_TIPC): { 332 struct { 333 __be32 pre[3]; 334 __be32 srcnode; 335 } *hdr, _hdr; 336 hdr = __skb_header_pointer(skb, nhoff, sizeof(_hdr), data, hlen, &_hdr); 337 if (!hdr) 338 goto out_bad; 339 340 if (dissector_uses_key(flow_dissector, 341 FLOW_DISSECTOR_KEY_TIPC_ADDRS)) { 342 key_addrs = skb_flow_dissector_target(flow_dissector, 343 FLOW_DISSECTOR_KEY_TIPC_ADDRS, 344 target_container); 345 key_addrs->tipcaddrs.srcnode = hdr->srcnode; 346 key_control->addr_type = FLOW_DISSECTOR_KEY_TIPC_ADDRS; 347 } 348 goto out_good; 349 } 350 351 case htons(ETH_P_MPLS_UC): 352 case htons(ETH_P_MPLS_MC): { 353 struct mpls_label *hdr, _hdr[2]; 354 mpls: 355 hdr = __skb_header_pointer(skb, nhoff, sizeof(_hdr), data, 356 hlen, &_hdr); 357 if (!hdr) 358 goto out_bad; 359 360 if ((ntohl(hdr[0].entry) & MPLS_LS_LABEL_MASK) >> 361 MPLS_LS_LABEL_SHIFT == MPLS_LABEL_ENTROPY) { 362 if (dissector_uses_key(flow_dissector, 363 FLOW_DISSECTOR_KEY_MPLS_ENTROPY)) { 364 key_keyid = skb_flow_dissector_target(flow_dissector, 365 FLOW_DISSECTOR_KEY_MPLS_ENTROPY, 366 target_container); 367 key_keyid->keyid = hdr[1].entry & 368 htonl(MPLS_LS_LABEL_MASK); 369 } 370 371 goto out_good; 372 } 373 374 goto out_good; 375 } 376 377 case htons(ETH_P_FCOE): 378 if ((hlen - nhoff) < FCOE_HEADER_LEN) 379 goto out_bad; 380 381 nhoff += FCOE_HEADER_LEN; 382 goto out_good; 383 384 case htons(ETH_P_ARP): 385 case htons(ETH_P_RARP): { 386 struct { 387 unsigned char ar_sha[ETH_ALEN]; 388 unsigned char ar_sip[4]; 389 unsigned char ar_tha[ETH_ALEN]; 390 unsigned char ar_tip[4]; 391 } *arp_eth, _arp_eth; 392 const struct arphdr *arp; 393 struct arphdr *_arp; 394 395 arp = __skb_header_pointer(skb, nhoff, sizeof(_arp), data, 396 hlen, &_arp); 397 if (!arp) 398 goto out_bad; 399 400 if (arp->ar_hrd != htons(ARPHRD_ETHER) || 401 arp->ar_pro != htons(ETH_P_IP) || 402 arp->ar_hln != ETH_ALEN || 403 arp->ar_pln != 4 || 404 (arp->ar_op != htons(ARPOP_REPLY) && 405 arp->ar_op != htons(ARPOP_REQUEST))) 406 goto out_bad; 407 408 arp_eth = __skb_header_pointer(skb, nhoff + sizeof(_arp), 409 sizeof(_arp_eth), data, 410 hlen, 411 &_arp_eth); 412 if (!arp_eth) 413 goto out_bad; 414 415 if (dissector_uses_key(flow_dissector, 416 FLOW_DISSECTOR_KEY_ARP)) { 417 418 key_arp = skb_flow_dissector_target(flow_dissector, 419 FLOW_DISSECTOR_KEY_ARP, 420 target_container); 421 422 memcpy(&key_arp->sip, arp_eth->ar_sip, 423 sizeof(key_arp->sip)); 424 memcpy(&key_arp->tip, arp_eth->ar_tip, 425 sizeof(key_arp->tip)); 426 427 /* Only store the lower byte of the opcode; 428 * this covers ARPOP_REPLY and ARPOP_REQUEST. 429 */ 430 key_arp->op = ntohs(arp->ar_op) & 0xff; 431 432 ether_addr_copy(key_arp->sha, arp_eth->ar_sha); 433 ether_addr_copy(key_arp->tha, arp_eth->ar_tha); 434 } 435 436 goto out_good; 437 } 438 439 default: 440 goto out_bad; 441 } 442 443 ip_proto_again: 444 switch (ip_proto) { 445 case IPPROTO_GRE: { 446 struct gre_base_hdr *hdr, _hdr; 447 u16 gre_ver; 448 int offset = 0; 449 450 hdr = __skb_header_pointer(skb, nhoff, sizeof(_hdr), data, hlen, &_hdr); 451 if (!hdr) 452 goto out_bad; 453 454 /* Only look inside GRE without routing */ 455 if (hdr->flags & GRE_ROUTING) 456 break; 457 458 /* Only look inside GRE for version 0 and 1 */ 459 gre_ver = ntohs(hdr->flags & GRE_VERSION); 460 if (gre_ver > 1) 461 break; 462 463 proto = hdr->protocol; 464 if (gre_ver) { 465 /* Version1 must be PPTP, and check the flags */ 466 if (!(proto == GRE_PROTO_PPP && (hdr->flags & GRE_KEY))) 467 break; 468 } 469 470 offset += sizeof(struct gre_base_hdr); 471 472 if (hdr->flags & GRE_CSUM) 473 offset += sizeof(((struct gre_full_hdr *)0)->csum) + 474 sizeof(((struct gre_full_hdr *)0)->reserved1); 475 476 if (hdr->flags & GRE_KEY) { 477 const __be32 *keyid; 478 __be32 _keyid; 479 480 keyid = __skb_header_pointer(skb, nhoff + offset, sizeof(_keyid), 481 data, hlen, &_keyid); 482 if (!keyid) 483 goto out_bad; 484 485 if (dissector_uses_key(flow_dissector, 486 FLOW_DISSECTOR_KEY_GRE_KEYID)) { 487 key_keyid = skb_flow_dissector_target(flow_dissector, 488 FLOW_DISSECTOR_KEY_GRE_KEYID, 489 target_container); 490 if (gre_ver == 0) 491 key_keyid->keyid = *keyid; 492 else 493 key_keyid->keyid = *keyid & GRE_PPTP_KEY_MASK; 494 } 495 offset += sizeof(((struct gre_full_hdr *)0)->key); 496 } 497 498 if (hdr->flags & GRE_SEQ) 499 offset += sizeof(((struct pptp_gre_header *)0)->seq); 500 501 if (gre_ver == 0) { 502 if (proto == htons(ETH_P_TEB)) { 503 const struct ethhdr *eth; 504 struct ethhdr _eth; 505 506 eth = __skb_header_pointer(skb, nhoff + offset, 507 sizeof(_eth), 508 data, hlen, &_eth); 509 if (!eth) 510 goto out_bad; 511 proto = eth->h_proto; 512 offset += sizeof(*eth); 513 514 /* Cap headers that we access via pointers at the 515 * end of the Ethernet header as our maximum alignment 516 * at that point is only 2 bytes. 517 */ 518 if (NET_IP_ALIGN) 519 hlen = (nhoff + offset); 520 } 521 } else { /* version 1, must be PPTP */ 522 u8 _ppp_hdr[PPP_HDRLEN]; 523 u8 *ppp_hdr; 524 525 if (hdr->flags & GRE_ACK) 526 offset += sizeof(((struct pptp_gre_header *)0)->ack); 527 528 ppp_hdr = __skb_header_pointer(skb, nhoff + offset, 529 sizeof(_ppp_hdr), 530 data, hlen, _ppp_hdr); 531 if (!ppp_hdr) 532 goto out_bad; 533 534 switch (PPP_PROTOCOL(ppp_hdr)) { 535 case PPP_IP: 536 proto = htons(ETH_P_IP); 537 break; 538 case PPP_IPV6: 539 proto = htons(ETH_P_IPV6); 540 break; 541 default: 542 /* Could probably catch some more like MPLS */ 543 break; 544 } 545 546 offset += PPP_HDRLEN; 547 } 548 549 nhoff += offset; 550 key_control->flags |= FLOW_DIS_ENCAPSULATION; 551 if (flags & FLOW_DISSECTOR_F_STOP_AT_ENCAP) 552 goto out_good; 553 554 goto again; 555 } 556 case NEXTHDR_HOP: 557 case NEXTHDR_ROUTING: 558 case NEXTHDR_DEST: { 559 u8 _opthdr[2], *opthdr; 560 561 if (proto != htons(ETH_P_IPV6)) 562 break; 563 564 opthdr = __skb_header_pointer(skb, nhoff, sizeof(_opthdr), 565 data, hlen, &_opthdr); 566 if (!opthdr) 567 goto out_bad; 568 569 ip_proto = opthdr[0]; 570 nhoff += (opthdr[1] + 1) << 3; 571 572 goto ip_proto_again; 573 } 574 case NEXTHDR_FRAGMENT: { 575 struct frag_hdr _fh, *fh; 576 577 if (proto != htons(ETH_P_IPV6)) 578 break; 579 580 fh = __skb_header_pointer(skb, nhoff, sizeof(_fh), 581 data, hlen, &_fh); 582 583 if (!fh) 584 goto out_bad; 585 586 key_control->flags |= FLOW_DIS_IS_FRAGMENT; 587 588 nhoff += sizeof(_fh); 589 ip_proto = fh->nexthdr; 590 591 if (!(fh->frag_off & htons(IP6_OFFSET))) { 592 key_control->flags |= FLOW_DIS_FIRST_FRAG; 593 if (flags & FLOW_DISSECTOR_F_PARSE_1ST_FRAG) 594 goto ip_proto_again; 595 } 596 goto out_good; 597 } 598 case IPPROTO_IPIP: 599 proto = htons(ETH_P_IP); 600 601 key_control->flags |= FLOW_DIS_ENCAPSULATION; 602 if (flags & FLOW_DISSECTOR_F_STOP_AT_ENCAP) 603 goto out_good; 604 605 goto ip; 606 case IPPROTO_IPV6: 607 proto = htons(ETH_P_IPV6); 608 609 key_control->flags |= FLOW_DIS_ENCAPSULATION; 610 if (flags & FLOW_DISSECTOR_F_STOP_AT_ENCAP) 611 goto out_good; 612 613 goto ipv6; 614 case IPPROTO_MPLS: 615 proto = htons(ETH_P_MPLS_UC); 616 goto mpls; 617 default: 618 break; 619 } 620 621 if (dissector_uses_key(flow_dissector, 622 FLOW_DISSECTOR_KEY_PORTS)) { 623 key_ports = skb_flow_dissector_target(flow_dissector, 624 FLOW_DISSECTOR_KEY_PORTS, 625 target_container); 626 key_ports->ports = __skb_flow_get_ports(skb, nhoff, ip_proto, 627 data, hlen); 628 } 629 630 if (dissector_uses_key(flow_dissector, 631 FLOW_DISSECTOR_KEY_ICMP)) { 632 key_icmp = skb_flow_dissector_target(flow_dissector, 633 FLOW_DISSECTOR_KEY_ICMP, 634 target_container); 635 key_icmp->icmp = skb_flow_get_be16(skb, nhoff, data, hlen); 636 } 637 638 out_good: 639 ret = true; 640 641 key_control->thoff = (u16)nhoff; 642 out: 643 key_basic->n_proto = proto; 644 key_basic->ip_proto = ip_proto; 645 646 return ret; 647 648 out_bad: 649 ret = false; 650 key_control->thoff = min_t(u16, nhoff, skb ? skb->len : hlen); 651 goto out; 652 } 653 EXPORT_SYMBOL(__skb_flow_dissect); 654 655 static u32 hashrnd __read_mostly; 656 static __always_inline void __flow_hash_secret_init(void) 657 { 658 net_get_random_once(&hashrnd, sizeof(hashrnd)); 659 } 660 661 static __always_inline u32 __flow_hash_words(const u32 *words, u32 length, 662 u32 keyval) 663 { 664 return jhash2(words, length, keyval); 665 } 666 667 static inline const u32 *flow_keys_hash_start(const struct flow_keys *flow) 668 { 669 const void *p = flow; 670 671 BUILD_BUG_ON(FLOW_KEYS_HASH_OFFSET % sizeof(u32)); 672 return (const u32 *)(p + FLOW_KEYS_HASH_OFFSET); 673 } 674 675 static inline size_t flow_keys_hash_length(const struct flow_keys *flow) 676 { 677 size_t diff = FLOW_KEYS_HASH_OFFSET + sizeof(flow->addrs); 678 BUILD_BUG_ON((sizeof(*flow) - FLOW_KEYS_HASH_OFFSET) % sizeof(u32)); 679 BUILD_BUG_ON(offsetof(typeof(*flow), addrs) != 680 sizeof(*flow) - sizeof(flow->addrs)); 681 682 switch (flow->control.addr_type) { 683 case FLOW_DISSECTOR_KEY_IPV4_ADDRS: 684 diff -= sizeof(flow->addrs.v4addrs); 685 break; 686 case FLOW_DISSECTOR_KEY_IPV6_ADDRS: 687 diff -= sizeof(flow->addrs.v6addrs); 688 break; 689 case FLOW_DISSECTOR_KEY_TIPC_ADDRS: 690 diff -= sizeof(flow->addrs.tipcaddrs); 691 break; 692 } 693 return (sizeof(*flow) - diff) / sizeof(u32); 694 } 695 696 __be32 flow_get_u32_src(const struct flow_keys *flow) 697 { 698 switch (flow->control.addr_type) { 699 case FLOW_DISSECTOR_KEY_IPV4_ADDRS: 700 return flow->addrs.v4addrs.src; 701 case FLOW_DISSECTOR_KEY_IPV6_ADDRS: 702 return (__force __be32)ipv6_addr_hash( 703 &flow->addrs.v6addrs.src); 704 case FLOW_DISSECTOR_KEY_TIPC_ADDRS: 705 return flow->addrs.tipcaddrs.srcnode; 706 default: 707 return 0; 708 } 709 } 710 EXPORT_SYMBOL(flow_get_u32_src); 711 712 __be32 flow_get_u32_dst(const struct flow_keys *flow) 713 { 714 switch (flow->control.addr_type) { 715 case FLOW_DISSECTOR_KEY_IPV4_ADDRS: 716 return flow->addrs.v4addrs.dst; 717 case FLOW_DISSECTOR_KEY_IPV6_ADDRS: 718 return (__force __be32)ipv6_addr_hash( 719 &flow->addrs.v6addrs.dst); 720 default: 721 return 0; 722 } 723 } 724 EXPORT_SYMBOL(flow_get_u32_dst); 725 726 static inline void __flow_hash_consistentify(struct flow_keys *keys) 727 { 728 int addr_diff, i; 729 730 switch (keys->control.addr_type) { 731 case FLOW_DISSECTOR_KEY_IPV4_ADDRS: 732 addr_diff = (__force u32)keys->addrs.v4addrs.dst - 733 (__force u32)keys->addrs.v4addrs.src; 734 if ((addr_diff < 0) || 735 (addr_diff == 0 && 736 ((__force u16)keys->ports.dst < 737 (__force u16)keys->ports.src))) { 738 swap(keys->addrs.v4addrs.src, keys->addrs.v4addrs.dst); 739 swap(keys->ports.src, keys->ports.dst); 740 } 741 break; 742 case FLOW_DISSECTOR_KEY_IPV6_ADDRS: 743 addr_diff = memcmp(&keys->addrs.v6addrs.dst, 744 &keys->addrs.v6addrs.src, 745 sizeof(keys->addrs.v6addrs.dst)); 746 if ((addr_diff < 0) || 747 (addr_diff == 0 && 748 ((__force u16)keys->ports.dst < 749 (__force u16)keys->ports.src))) { 750 for (i = 0; i < 4; i++) 751 swap(keys->addrs.v6addrs.src.s6_addr32[i], 752 keys->addrs.v6addrs.dst.s6_addr32[i]); 753 swap(keys->ports.src, keys->ports.dst); 754 } 755 break; 756 } 757 } 758 759 static inline u32 __flow_hash_from_keys(struct flow_keys *keys, u32 keyval) 760 { 761 u32 hash; 762 763 __flow_hash_consistentify(keys); 764 765 hash = __flow_hash_words(flow_keys_hash_start(keys), 766 flow_keys_hash_length(keys), keyval); 767 if (!hash) 768 hash = 1; 769 770 return hash; 771 } 772 773 u32 flow_hash_from_keys(struct flow_keys *keys) 774 { 775 __flow_hash_secret_init(); 776 return __flow_hash_from_keys(keys, hashrnd); 777 } 778 EXPORT_SYMBOL(flow_hash_from_keys); 779 780 static inline u32 ___skb_get_hash(const struct sk_buff *skb, 781 struct flow_keys *keys, u32 keyval) 782 { 783 skb_flow_dissect_flow_keys(skb, keys, 784 FLOW_DISSECTOR_F_STOP_AT_FLOW_LABEL); 785 786 return __flow_hash_from_keys(keys, keyval); 787 } 788 789 struct _flow_keys_digest_data { 790 __be16 n_proto; 791 u8 ip_proto; 792 u8 padding; 793 __be32 ports; 794 __be32 src; 795 __be32 dst; 796 }; 797 798 void make_flow_keys_digest(struct flow_keys_digest *digest, 799 const struct flow_keys *flow) 800 { 801 struct _flow_keys_digest_data *data = 802 (struct _flow_keys_digest_data *)digest; 803 804 BUILD_BUG_ON(sizeof(*data) > sizeof(*digest)); 805 806 memset(digest, 0, sizeof(*digest)); 807 808 data->n_proto = flow->basic.n_proto; 809 data->ip_proto = flow->basic.ip_proto; 810 data->ports = flow->ports.ports; 811 data->src = flow->addrs.v4addrs.src; 812 data->dst = flow->addrs.v4addrs.dst; 813 } 814 EXPORT_SYMBOL(make_flow_keys_digest); 815 816 static struct flow_dissector flow_keys_dissector_symmetric __read_mostly; 817 818 u32 __skb_get_hash_symmetric(const struct sk_buff *skb) 819 { 820 struct flow_keys keys; 821 822 __flow_hash_secret_init(); 823 824 memset(&keys, 0, sizeof(keys)); 825 __skb_flow_dissect(skb, &flow_keys_dissector_symmetric, &keys, 826 NULL, 0, 0, 0, 827 FLOW_DISSECTOR_F_STOP_AT_FLOW_LABEL); 828 829 return __flow_hash_from_keys(&keys, hashrnd); 830 } 831 EXPORT_SYMBOL_GPL(__skb_get_hash_symmetric); 832 833 /** 834 * __skb_get_hash: calculate a flow hash 835 * @skb: sk_buff to calculate flow hash from 836 * 837 * This function calculates a flow hash based on src/dst addresses 838 * and src/dst port numbers. Sets hash in skb to non-zero hash value 839 * on success, zero indicates no valid hash. Also, sets l4_hash in skb 840 * if hash is a canonical 4-tuple hash over transport ports. 841 */ 842 void __skb_get_hash(struct sk_buff *skb) 843 { 844 struct flow_keys keys; 845 u32 hash; 846 847 __flow_hash_secret_init(); 848 849 hash = ___skb_get_hash(skb, &keys, hashrnd); 850 851 __skb_set_sw_hash(skb, hash, flow_keys_have_l4(&keys)); 852 } 853 EXPORT_SYMBOL(__skb_get_hash); 854 855 __u32 skb_get_hash_perturb(const struct sk_buff *skb, u32 perturb) 856 { 857 struct flow_keys keys; 858 859 return ___skb_get_hash(skb, &keys, perturb); 860 } 861 EXPORT_SYMBOL(skb_get_hash_perturb); 862 863 __u32 __skb_get_hash_flowi6(struct sk_buff *skb, const struct flowi6 *fl6) 864 { 865 struct flow_keys keys; 866 867 memset(&keys, 0, sizeof(keys)); 868 869 memcpy(&keys.addrs.v6addrs.src, &fl6->saddr, 870 sizeof(keys.addrs.v6addrs.src)); 871 memcpy(&keys.addrs.v6addrs.dst, &fl6->daddr, 872 sizeof(keys.addrs.v6addrs.dst)); 873 keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS; 874 keys.ports.src = fl6->fl6_sport; 875 keys.ports.dst = fl6->fl6_dport; 876 keys.keyid.keyid = fl6->fl6_gre_key; 877 keys.tags.flow_label = (__force u32)fl6->flowlabel; 878 keys.basic.ip_proto = fl6->flowi6_proto; 879 880 __skb_set_sw_hash(skb, flow_hash_from_keys(&keys), 881 flow_keys_have_l4(&keys)); 882 883 return skb->hash; 884 } 885 EXPORT_SYMBOL(__skb_get_hash_flowi6); 886 887 __u32 __skb_get_hash_flowi4(struct sk_buff *skb, const struct flowi4 *fl4) 888 { 889 struct flow_keys keys; 890 891 memset(&keys, 0, sizeof(keys)); 892 893 keys.addrs.v4addrs.src = fl4->saddr; 894 keys.addrs.v4addrs.dst = fl4->daddr; 895 keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS; 896 keys.ports.src = fl4->fl4_sport; 897 keys.ports.dst = fl4->fl4_dport; 898 keys.keyid.keyid = fl4->fl4_gre_key; 899 keys.basic.ip_proto = fl4->flowi4_proto; 900 901 __skb_set_sw_hash(skb, flow_hash_from_keys(&keys), 902 flow_keys_have_l4(&keys)); 903 904 return skb->hash; 905 } 906 EXPORT_SYMBOL(__skb_get_hash_flowi4); 907 908 u32 __skb_get_poff(const struct sk_buff *skb, void *data, 909 const struct flow_keys *keys, int hlen) 910 { 911 u32 poff = keys->control.thoff; 912 913 /* skip L4 headers for fragments after the first */ 914 if ((keys->control.flags & FLOW_DIS_IS_FRAGMENT) && 915 !(keys->control.flags & FLOW_DIS_FIRST_FRAG)) 916 return poff; 917 918 switch (keys->basic.ip_proto) { 919 case IPPROTO_TCP: { 920 /* access doff as u8 to avoid unaligned access */ 921 const u8 *doff; 922 u8 _doff; 923 924 doff = __skb_header_pointer(skb, poff + 12, sizeof(_doff), 925 data, hlen, &_doff); 926 if (!doff) 927 return poff; 928 929 poff += max_t(u32, sizeof(struct tcphdr), (*doff & 0xF0) >> 2); 930 break; 931 } 932 case IPPROTO_UDP: 933 case IPPROTO_UDPLITE: 934 poff += sizeof(struct udphdr); 935 break; 936 /* For the rest, we do not really care about header 937 * extensions at this point for now. 938 */ 939 case IPPROTO_ICMP: 940 poff += sizeof(struct icmphdr); 941 break; 942 case IPPROTO_ICMPV6: 943 poff += sizeof(struct icmp6hdr); 944 break; 945 case IPPROTO_IGMP: 946 poff += sizeof(struct igmphdr); 947 break; 948 case IPPROTO_DCCP: 949 poff += sizeof(struct dccp_hdr); 950 break; 951 case IPPROTO_SCTP: 952 poff += sizeof(struct sctphdr); 953 break; 954 } 955 956 return poff; 957 } 958 959 /** 960 * skb_get_poff - get the offset to the payload 961 * @skb: sk_buff to get the payload offset from 962 * 963 * The function will get the offset to the payload as far as it could 964 * be dissected. The main user is currently BPF, so that we can dynamically 965 * truncate packets without needing to push actual payload to the user 966 * space and can analyze headers only, instead. 967 */ 968 u32 skb_get_poff(const struct sk_buff *skb) 969 { 970 struct flow_keys keys; 971 972 if (!skb_flow_dissect_flow_keys(skb, &keys, 0)) 973 return 0; 974 975 return __skb_get_poff(skb, skb->data, &keys, skb_headlen(skb)); 976 } 977 978 __u32 __get_hash_from_flowi6(const struct flowi6 *fl6, struct flow_keys *keys) 979 { 980 memset(keys, 0, sizeof(*keys)); 981 982 memcpy(&keys->addrs.v6addrs.src, &fl6->saddr, 983 sizeof(keys->addrs.v6addrs.src)); 984 memcpy(&keys->addrs.v6addrs.dst, &fl6->daddr, 985 sizeof(keys->addrs.v6addrs.dst)); 986 keys->control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS; 987 keys->ports.src = fl6->fl6_sport; 988 keys->ports.dst = fl6->fl6_dport; 989 keys->keyid.keyid = fl6->fl6_gre_key; 990 keys->tags.flow_label = (__force u32)fl6->flowlabel; 991 keys->basic.ip_proto = fl6->flowi6_proto; 992 993 return flow_hash_from_keys(keys); 994 } 995 EXPORT_SYMBOL(__get_hash_from_flowi6); 996 997 __u32 __get_hash_from_flowi4(const struct flowi4 *fl4, struct flow_keys *keys) 998 { 999 memset(keys, 0, sizeof(*keys)); 1000 1001 keys->addrs.v4addrs.src = fl4->saddr; 1002 keys->addrs.v4addrs.dst = fl4->daddr; 1003 keys->control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS; 1004 keys->ports.src = fl4->fl4_sport; 1005 keys->ports.dst = fl4->fl4_dport; 1006 keys->keyid.keyid = fl4->fl4_gre_key; 1007 keys->basic.ip_proto = fl4->flowi4_proto; 1008 1009 return flow_hash_from_keys(keys); 1010 } 1011 EXPORT_SYMBOL(__get_hash_from_flowi4); 1012 1013 static const struct flow_dissector_key flow_keys_dissector_keys[] = { 1014 { 1015 .key_id = FLOW_DISSECTOR_KEY_CONTROL, 1016 .offset = offsetof(struct flow_keys, control), 1017 }, 1018 { 1019 .key_id = FLOW_DISSECTOR_KEY_BASIC, 1020 .offset = offsetof(struct flow_keys, basic), 1021 }, 1022 { 1023 .key_id = FLOW_DISSECTOR_KEY_IPV4_ADDRS, 1024 .offset = offsetof(struct flow_keys, addrs.v4addrs), 1025 }, 1026 { 1027 .key_id = FLOW_DISSECTOR_KEY_IPV6_ADDRS, 1028 .offset = offsetof(struct flow_keys, addrs.v6addrs), 1029 }, 1030 { 1031 .key_id = FLOW_DISSECTOR_KEY_TIPC_ADDRS, 1032 .offset = offsetof(struct flow_keys, addrs.tipcaddrs), 1033 }, 1034 { 1035 .key_id = FLOW_DISSECTOR_KEY_PORTS, 1036 .offset = offsetof(struct flow_keys, ports), 1037 }, 1038 { 1039 .key_id = FLOW_DISSECTOR_KEY_VLAN, 1040 .offset = offsetof(struct flow_keys, vlan), 1041 }, 1042 { 1043 .key_id = FLOW_DISSECTOR_KEY_FLOW_LABEL, 1044 .offset = offsetof(struct flow_keys, tags), 1045 }, 1046 { 1047 .key_id = FLOW_DISSECTOR_KEY_GRE_KEYID, 1048 .offset = offsetof(struct flow_keys, keyid), 1049 }, 1050 }; 1051 1052 static const struct flow_dissector_key flow_keys_dissector_symmetric_keys[] = { 1053 { 1054 .key_id = FLOW_DISSECTOR_KEY_CONTROL, 1055 .offset = offsetof(struct flow_keys, control), 1056 }, 1057 { 1058 .key_id = FLOW_DISSECTOR_KEY_BASIC, 1059 .offset = offsetof(struct flow_keys, basic), 1060 }, 1061 { 1062 .key_id = FLOW_DISSECTOR_KEY_IPV4_ADDRS, 1063 .offset = offsetof(struct flow_keys, addrs.v4addrs), 1064 }, 1065 { 1066 .key_id = FLOW_DISSECTOR_KEY_IPV6_ADDRS, 1067 .offset = offsetof(struct flow_keys, addrs.v6addrs), 1068 }, 1069 { 1070 .key_id = FLOW_DISSECTOR_KEY_PORTS, 1071 .offset = offsetof(struct flow_keys, ports), 1072 }, 1073 }; 1074 1075 static const struct flow_dissector_key flow_keys_buf_dissector_keys[] = { 1076 { 1077 .key_id = FLOW_DISSECTOR_KEY_CONTROL, 1078 .offset = offsetof(struct flow_keys, control), 1079 }, 1080 { 1081 .key_id = FLOW_DISSECTOR_KEY_BASIC, 1082 .offset = offsetof(struct flow_keys, basic), 1083 }, 1084 }; 1085 1086 struct flow_dissector flow_keys_dissector __read_mostly; 1087 EXPORT_SYMBOL(flow_keys_dissector); 1088 1089 struct flow_dissector flow_keys_buf_dissector __read_mostly; 1090 1091 static int __init init_default_flow_dissectors(void) 1092 { 1093 skb_flow_dissector_init(&flow_keys_dissector, 1094 flow_keys_dissector_keys, 1095 ARRAY_SIZE(flow_keys_dissector_keys)); 1096 skb_flow_dissector_init(&flow_keys_dissector_symmetric, 1097 flow_keys_dissector_symmetric_keys, 1098 ARRAY_SIZE(flow_keys_dissector_symmetric_keys)); 1099 skb_flow_dissector_init(&flow_keys_buf_dissector, 1100 flow_keys_buf_dissector_keys, 1101 ARRAY_SIZE(flow_keys_buf_dissector_keys)); 1102 return 0; 1103 } 1104 1105 core_initcall(init_default_flow_dissectors); 1106