1 // SPDX-License-Identifier: GPL-2.0-only 2 #include <linux/kernel.h> 3 #include <linux/skbuff.h> 4 #include <linux/export.h> 5 #include <linux/ip.h> 6 #include <linux/ipv6.h> 7 #include <linux/if_vlan.h> 8 #include <net/dsa.h> 9 #include <net/dst_metadata.h> 10 #include <net/ip.h> 11 #include <net/ipv6.h> 12 #include <net/gre.h> 13 #include <net/pptp.h> 14 #include <net/tipc.h> 15 #include <linux/igmp.h> 16 #include <linux/icmp.h> 17 #include <linux/sctp.h> 18 #include <linux/dccp.h> 19 #include <linux/if_tunnel.h> 20 #include <linux/if_pppox.h> 21 #include <linux/ppp_defs.h> 22 #include <linux/stddef.h> 23 #include <linux/if_ether.h> 24 #include <linux/mpls.h> 25 #include <linux/tcp.h> 26 #include <net/flow_dissector.h> 27 #include <scsi/fc/fc_fcoe.h> 28 #include <uapi/linux/batadv_packet.h> 29 #include <linux/bpf.h> 30 31 static DEFINE_MUTEX(flow_dissector_mutex); 32 33 static void dissector_set_key(struct flow_dissector *flow_dissector, 34 enum flow_dissector_key_id key_id) 35 { 36 flow_dissector->used_keys |= (1 << key_id); 37 } 38 39 void skb_flow_dissector_init(struct flow_dissector *flow_dissector, 40 const struct flow_dissector_key *key, 41 unsigned int key_count) 42 { 43 unsigned int i; 44 45 memset(flow_dissector, 0, sizeof(*flow_dissector)); 46 47 for (i = 0; i < key_count; i++, key++) { 48 /* User should make sure that every key target offset is withing 49 * boundaries of unsigned short. 50 */ 51 BUG_ON(key->offset > USHRT_MAX); 52 BUG_ON(dissector_uses_key(flow_dissector, 53 key->key_id)); 54 55 dissector_set_key(flow_dissector, key->key_id); 56 flow_dissector->offset[key->key_id] = key->offset; 57 } 58 59 /* Ensure that the dissector always includes control and basic key. 60 * That way we are able to avoid handling lack of these in fast path. 61 */ 62 BUG_ON(!dissector_uses_key(flow_dissector, 63 FLOW_DISSECTOR_KEY_CONTROL)); 64 BUG_ON(!dissector_uses_key(flow_dissector, 65 FLOW_DISSECTOR_KEY_BASIC)); 66 } 67 EXPORT_SYMBOL(skb_flow_dissector_init); 68 69 int skb_flow_dissector_prog_query(const union bpf_attr *attr, 70 union bpf_attr __user *uattr) 71 { 72 __u32 __user *prog_ids = u64_to_user_ptr(attr->query.prog_ids); 73 u32 prog_id, prog_cnt = 0, flags = 0; 74 struct bpf_prog *attached; 75 struct net *net; 76 77 if (attr->query.query_flags) 78 return -EINVAL; 79 80 net = get_net_ns_by_fd(attr->query.target_fd); 81 if (IS_ERR(net)) 82 return PTR_ERR(net); 83 84 rcu_read_lock(); 85 attached = rcu_dereference(net->flow_dissector_prog); 86 if (attached) { 87 prog_cnt = 1; 88 prog_id = attached->aux->id; 89 } 90 rcu_read_unlock(); 91 92 put_net(net); 93 94 if (copy_to_user(&uattr->query.attach_flags, &flags, sizeof(flags))) 95 return -EFAULT; 96 if (copy_to_user(&uattr->query.prog_cnt, &prog_cnt, sizeof(prog_cnt))) 97 return -EFAULT; 98 99 if (!attr->query.prog_cnt || !prog_ids || !prog_cnt) 100 return 0; 101 102 if (copy_to_user(prog_ids, &prog_id, sizeof(u32))) 103 return -EFAULT; 104 105 return 0; 106 } 107 108 int skb_flow_dissector_bpf_prog_attach(const union bpf_attr *attr, 109 struct bpf_prog *prog) 110 { 111 struct bpf_prog *attached; 112 struct net *net; 113 114 net = current->nsproxy->net_ns; 115 mutex_lock(&flow_dissector_mutex); 116 attached = rcu_dereference_protected(net->flow_dissector_prog, 117 lockdep_is_held(&flow_dissector_mutex)); 118 if (attached) { 119 /* Only one BPF program can be attached at a time */ 120 mutex_unlock(&flow_dissector_mutex); 121 return -EEXIST; 122 } 123 rcu_assign_pointer(net->flow_dissector_prog, prog); 124 mutex_unlock(&flow_dissector_mutex); 125 return 0; 126 } 127 128 int skb_flow_dissector_bpf_prog_detach(const union bpf_attr *attr) 129 { 130 struct bpf_prog *attached; 131 struct net *net; 132 133 net = current->nsproxy->net_ns; 134 mutex_lock(&flow_dissector_mutex); 135 attached = rcu_dereference_protected(net->flow_dissector_prog, 136 lockdep_is_held(&flow_dissector_mutex)); 137 if (!attached) { 138 mutex_unlock(&flow_dissector_mutex); 139 return -ENOENT; 140 } 141 bpf_prog_put(attached); 142 RCU_INIT_POINTER(net->flow_dissector_prog, NULL); 143 mutex_unlock(&flow_dissector_mutex); 144 return 0; 145 } 146 /** 147 * skb_flow_get_be16 - extract be16 entity 148 * @skb: sk_buff to extract from 149 * @poff: offset to extract at 150 * @data: raw buffer pointer to the packet 151 * @hlen: packet header length 152 * 153 * The function will try to retrieve a be32 entity at 154 * offset poff 155 */ 156 static __be16 skb_flow_get_be16(const struct sk_buff *skb, int poff, 157 void *data, int hlen) 158 { 159 __be16 *u, _u; 160 161 u = __skb_header_pointer(skb, poff, sizeof(_u), data, hlen, &_u); 162 if (u) 163 return *u; 164 165 return 0; 166 } 167 168 /** 169 * __skb_flow_get_ports - extract the upper layer ports and return them 170 * @skb: sk_buff to extract the ports from 171 * @thoff: transport header offset 172 * @ip_proto: protocol for which to get port offset 173 * @data: raw buffer pointer to the packet, if NULL use skb->data 174 * @hlen: packet header length, if @data is NULL use skb_headlen(skb) 175 * 176 * The function will try to retrieve the ports at offset thoff + poff where poff 177 * is the protocol port offset returned from proto_ports_offset 178 */ 179 __be32 __skb_flow_get_ports(const struct sk_buff *skb, int thoff, u8 ip_proto, 180 void *data, int hlen) 181 { 182 int poff = proto_ports_offset(ip_proto); 183 184 if (!data) { 185 data = skb->data; 186 hlen = skb_headlen(skb); 187 } 188 189 if (poff >= 0) { 190 __be32 *ports, _ports; 191 192 ports = __skb_header_pointer(skb, thoff + poff, 193 sizeof(_ports), data, hlen, &_ports); 194 if (ports) 195 return *ports; 196 } 197 198 return 0; 199 } 200 EXPORT_SYMBOL(__skb_flow_get_ports); 201 202 static void 203 skb_flow_dissect_set_enc_addr_type(enum flow_dissector_key_id type, 204 struct flow_dissector *flow_dissector, 205 void *target_container) 206 { 207 struct flow_dissector_key_control *ctrl; 208 209 if (!dissector_uses_key(flow_dissector, FLOW_DISSECTOR_KEY_ENC_CONTROL)) 210 return; 211 212 ctrl = skb_flow_dissector_target(flow_dissector, 213 FLOW_DISSECTOR_KEY_ENC_CONTROL, 214 target_container); 215 ctrl->addr_type = type; 216 } 217 218 void 219 skb_flow_dissect_tunnel_info(const struct sk_buff *skb, 220 struct flow_dissector *flow_dissector, 221 void *target_container) 222 { 223 struct ip_tunnel_info *info; 224 struct ip_tunnel_key *key; 225 226 /* A quick check to see if there might be something to do. */ 227 if (!dissector_uses_key(flow_dissector, 228 FLOW_DISSECTOR_KEY_ENC_KEYID) && 229 !dissector_uses_key(flow_dissector, 230 FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS) && 231 !dissector_uses_key(flow_dissector, 232 FLOW_DISSECTOR_KEY_ENC_IPV6_ADDRS) && 233 !dissector_uses_key(flow_dissector, 234 FLOW_DISSECTOR_KEY_ENC_CONTROL) && 235 !dissector_uses_key(flow_dissector, 236 FLOW_DISSECTOR_KEY_ENC_PORTS) && 237 !dissector_uses_key(flow_dissector, 238 FLOW_DISSECTOR_KEY_ENC_IP) && 239 !dissector_uses_key(flow_dissector, 240 FLOW_DISSECTOR_KEY_ENC_OPTS)) 241 return; 242 243 info = skb_tunnel_info(skb); 244 if (!info) 245 return; 246 247 key = &info->key; 248 249 switch (ip_tunnel_info_af(info)) { 250 case AF_INET: 251 skb_flow_dissect_set_enc_addr_type(FLOW_DISSECTOR_KEY_IPV4_ADDRS, 252 flow_dissector, 253 target_container); 254 if (dissector_uses_key(flow_dissector, 255 FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS)) { 256 struct flow_dissector_key_ipv4_addrs *ipv4; 257 258 ipv4 = skb_flow_dissector_target(flow_dissector, 259 FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS, 260 target_container); 261 ipv4->src = key->u.ipv4.src; 262 ipv4->dst = key->u.ipv4.dst; 263 } 264 break; 265 case AF_INET6: 266 skb_flow_dissect_set_enc_addr_type(FLOW_DISSECTOR_KEY_IPV6_ADDRS, 267 flow_dissector, 268 target_container); 269 if (dissector_uses_key(flow_dissector, 270 FLOW_DISSECTOR_KEY_ENC_IPV6_ADDRS)) { 271 struct flow_dissector_key_ipv6_addrs *ipv6; 272 273 ipv6 = skb_flow_dissector_target(flow_dissector, 274 FLOW_DISSECTOR_KEY_ENC_IPV6_ADDRS, 275 target_container); 276 ipv6->src = key->u.ipv6.src; 277 ipv6->dst = key->u.ipv6.dst; 278 } 279 break; 280 } 281 282 if (dissector_uses_key(flow_dissector, FLOW_DISSECTOR_KEY_ENC_KEYID)) { 283 struct flow_dissector_key_keyid *keyid; 284 285 keyid = skb_flow_dissector_target(flow_dissector, 286 FLOW_DISSECTOR_KEY_ENC_KEYID, 287 target_container); 288 keyid->keyid = tunnel_id_to_key32(key->tun_id); 289 } 290 291 if (dissector_uses_key(flow_dissector, FLOW_DISSECTOR_KEY_ENC_PORTS)) { 292 struct flow_dissector_key_ports *tp; 293 294 tp = skb_flow_dissector_target(flow_dissector, 295 FLOW_DISSECTOR_KEY_ENC_PORTS, 296 target_container); 297 tp->src = key->tp_src; 298 tp->dst = key->tp_dst; 299 } 300 301 if (dissector_uses_key(flow_dissector, FLOW_DISSECTOR_KEY_ENC_IP)) { 302 struct flow_dissector_key_ip *ip; 303 304 ip = skb_flow_dissector_target(flow_dissector, 305 FLOW_DISSECTOR_KEY_ENC_IP, 306 target_container); 307 ip->tos = key->tos; 308 ip->ttl = key->ttl; 309 } 310 311 if (dissector_uses_key(flow_dissector, FLOW_DISSECTOR_KEY_ENC_OPTS)) { 312 struct flow_dissector_key_enc_opts *enc_opt; 313 314 enc_opt = skb_flow_dissector_target(flow_dissector, 315 FLOW_DISSECTOR_KEY_ENC_OPTS, 316 target_container); 317 318 if (info->options_len) { 319 enc_opt->len = info->options_len; 320 ip_tunnel_info_opts_get(enc_opt->data, info); 321 enc_opt->dst_opt_type = info->key.tun_flags & 322 TUNNEL_OPTIONS_PRESENT; 323 } 324 } 325 } 326 EXPORT_SYMBOL(skb_flow_dissect_tunnel_info); 327 328 static enum flow_dissect_ret 329 __skb_flow_dissect_mpls(const struct sk_buff *skb, 330 struct flow_dissector *flow_dissector, 331 void *target_container, void *data, int nhoff, int hlen) 332 { 333 struct flow_dissector_key_keyid *key_keyid; 334 struct mpls_label *hdr, _hdr[2]; 335 u32 entry, label; 336 337 if (!dissector_uses_key(flow_dissector, 338 FLOW_DISSECTOR_KEY_MPLS_ENTROPY) && 339 !dissector_uses_key(flow_dissector, FLOW_DISSECTOR_KEY_MPLS)) 340 return FLOW_DISSECT_RET_OUT_GOOD; 341 342 hdr = __skb_header_pointer(skb, nhoff, sizeof(_hdr), data, 343 hlen, &_hdr); 344 if (!hdr) 345 return FLOW_DISSECT_RET_OUT_BAD; 346 347 entry = ntohl(hdr[0].entry); 348 label = (entry & MPLS_LS_LABEL_MASK) >> MPLS_LS_LABEL_SHIFT; 349 350 if (dissector_uses_key(flow_dissector, FLOW_DISSECTOR_KEY_MPLS)) { 351 struct flow_dissector_key_mpls *key_mpls; 352 353 key_mpls = skb_flow_dissector_target(flow_dissector, 354 FLOW_DISSECTOR_KEY_MPLS, 355 target_container); 356 key_mpls->mpls_label = label; 357 key_mpls->mpls_ttl = (entry & MPLS_LS_TTL_MASK) 358 >> MPLS_LS_TTL_SHIFT; 359 key_mpls->mpls_tc = (entry & MPLS_LS_TC_MASK) 360 >> MPLS_LS_TC_SHIFT; 361 key_mpls->mpls_bos = (entry & MPLS_LS_S_MASK) 362 >> MPLS_LS_S_SHIFT; 363 } 364 365 if (label == MPLS_LABEL_ENTROPY) { 366 key_keyid = skb_flow_dissector_target(flow_dissector, 367 FLOW_DISSECTOR_KEY_MPLS_ENTROPY, 368 target_container); 369 key_keyid->keyid = hdr[1].entry & htonl(MPLS_LS_LABEL_MASK); 370 } 371 return FLOW_DISSECT_RET_OUT_GOOD; 372 } 373 374 static enum flow_dissect_ret 375 __skb_flow_dissect_arp(const struct sk_buff *skb, 376 struct flow_dissector *flow_dissector, 377 void *target_container, void *data, int nhoff, int hlen) 378 { 379 struct flow_dissector_key_arp *key_arp; 380 struct { 381 unsigned char ar_sha[ETH_ALEN]; 382 unsigned char ar_sip[4]; 383 unsigned char ar_tha[ETH_ALEN]; 384 unsigned char ar_tip[4]; 385 } *arp_eth, _arp_eth; 386 const struct arphdr *arp; 387 struct arphdr _arp; 388 389 if (!dissector_uses_key(flow_dissector, FLOW_DISSECTOR_KEY_ARP)) 390 return FLOW_DISSECT_RET_OUT_GOOD; 391 392 arp = __skb_header_pointer(skb, nhoff, sizeof(_arp), data, 393 hlen, &_arp); 394 if (!arp) 395 return FLOW_DISSECT_RET_OUT_BAD; 396 397 if (arp->ar_hrd != htons(ARPHRD_ETHER) || 398 arp->ar_pro != htons(ETH_P_IP) || 399 arp->ar_hln != ETH_ALEN || 400 arp->ar_pln != 4 || 401 (arp->ar_op != htons(ARPOP_REPLY) && 402 arp->ar_op != htons(ARPOP_REQUEST))) 403 return FLOW_DISSECT_RET_OUT_BAD; 404 405 arp_eth = __skb_header_pointer(skb, nhoff + sizeof(_arp), 406 sizeof(_arp_eth), data, 407 hlen, &_arp_eth); 408 if (!arp_eth) 409 return FLOW_DISSECT_RET_OUT_BAD; 410 411 key_arp = skb_flow_dissector_target(flow_dissector, 412 FLOW_DISSECTOR_KEY_ARP, 413 target_container); 414 415 memcpy(&key_arp->sip, arp_eth->ar_sip, sizeof(key_arp->sip)); 416 memcpy(&key_arp->tip, arp_eth->ar_tip, sizeof(key_arp->tip)); 417 418 /* Only store the lower byte of the opcode; 419 * this covers ARPOP_REPLY and ARPOP_REQUEST. 420 */ 421 key_arp->op = ntohs(arp->ar_op) & 0xff; 422 423 ether_addr_copy(key_arp->sha, arp_eth->ar_sha); 424 ether_addr_copy(key_arp->tha, arp_eth->ar_tha); 425 426 return FLOW_DISSECT_RET_OUT_GOOD; 427 } 428 429 static enum flow_dissect_ret 430 __skb_flow_dissect_gre(const struct sk_buff *skb, 431 struct flow_dissector_key_control *key_control, 432 struct flow_dissector *flow_dissector, 433 void *target_container, void *data, 434 __be16 *p_proto, int *p_nhoff, int *p_hlen, 435 unsigned int flags) 436 { 437 struct flow_dissector_key_keyid *key_keyid; 438 struct gre_base_hdr *hdr, _hdr; 439 int offset = 0; 440 u16 gre_ver; 441 442 hdr = __skb_header_pointer(skb, *p_nhoff, sizeof(_hdr), 443 data, *p_hlen, &_hdr); 444 if (!hdr) 445 return FLOW_DISSECT_RET_OUT_BAD; 446 447 /* Only look inside GRE without routing */ 448 if (hdr->flags & GRE_ROUTING) 449 return FLOW_DISSECT_RET_OUT_GOOD; 450 451 /* Only look inside GRE for version 0 and 1 */ 452 gre_ver = ntohs(hdr->flags & GRE_VERSION); 453 if (gre_ver > 1) 454 return FLOW_DISSECT_RET_OUT_GOOD; 455 456 *p_proto = hdr->protocol; 457 if (gre_ver) { 458 /* Version1 must be PPTP, and check the flags */ 459 if (!(*p_proto == GRE_PROTO_PPP && (hdr->flags & GRE_KEY))) 460 return FLOW_DISSECT_RET_OUT_GOOD; 461 } 462 463 offset += sizeof(struct gre_base_hdr); 464 465 if (hdr->flags & GRE_CSUM) 466 offset += FIELD_SIZEOF(struct gre_full_hdr, csum) + 467 FIELD_SIZEOF(struct gre_full_hdr, reserved1); 468 469 if (hdr->flags & GRE_KEY) { 470 const __be32 *keyid; 471 __be32 _keyid; 472 473 keyid = __skb_header_pointer(skb, *p_nhoff + offset, 474 sizeof(_keyid), 475 data, *p_hlen, &_keyid); 476 if (!keyid) 477 return FLOW_DISSECT_RET_OUT_BAD; 478 479 if (dissector_uses_key(flow_dissector, 480 FLOW_DISSECTOR_KEY_GRE_KEYID)) { 481 key_keyid = skb_flow_dissector_target(flow_dissector, 482 FLOW_DISSECTOR_KEY_GRE_KEYID, 483 target_container); 484 if (gre_ver == 0) 485 key_keyid->keyid = *keyid; 486 else 487 key_keyid->keyid = *keyid & GRE_PPTP_KEY_MASK; 488 } 489 offset += FIELD_SIZEOF(struct gre_full_hdr, key); 490 } 491 492 if (hdr->flags & GRE_SEQ) 493 offset += FIELD_SIZEOF(struct pptp_gre_header, seq); 494 495 if (gre_ver == 0) { 496 if (*p_proto == htons(ETH_P_TEB)) { 497 const struct ethhdr *eth; 498 struct ethhdr _eth; 499 500 eth = __skb_header_pointer(skb, *p_nhoff + offset, 501 sizeof(_eth), 502 data, *p_hlen, &_eth); 503 if (!eth) 504 return FLOW_DISSECT_RET_OUT_BAD; 505 *p_proto = eth->h_proto; 506 offset += sizeof(*eth); 507 508 /* Cap headers that we access via pointers at the 509 * end of the Ethernet header as our maximum alignment 510 * at that point is only 2 bytes. 511 */ 512 if (NET_IP_ALIGN) 513 *p_hlen = *p_nhoff + offset; 514 } 515 } else { /* version 1, must be PPTP */ 516 u8 _ppp_hdr[PPP_HDRLEN]; 517 u8 *ppp_hdr; 518 519 if (hdr->flags & GRE_ACK) 520 offset += FIELD_SIZEOF(struct pptp_gre_header, ack); 521 522 ppp_hdr = __skb_header_pointer(skb, *p_nhoff + offset, 523 sizeof(_ppp_hdr), 524 data, *p_hlen, _ppp_hdr); 525 if (!ppp_hdr) 526 return FLOW_DISSECT_RET_OUT_BAD; 527 528 switch (PPP_PROTOCOL(ppp_hdr)) { 529 case PPP_IP: 530 *p_proto = htons(ETH_P_IP); 531 break; 532 case PPP_IPV6: 533 *p_proto = htons(ETH_P_IPV6); 534 break; 535 default: 536 /* Could probably catch some more like MPLS */ 537 break; 538 } 539 540 offset += PPP_HDRLEN; 541 } 542 543 *p_nhoff += offset; 544 key_control->flags |= FLOW_DIS_ENCAPSULATION; 545 if (flags & FLOW_DISSECTOR_F_STOP_AT_ENCAP) 546 return FLOW_DISSECT_RET_OUT_GOOD; 547 548 return FLOW_DISSECT_RET_PROTO_AGAIN; 549 } 550 551 /** 552 * __skb_flow_dissect_batadv() - dissect batman-adv header 553 * @skb: sk_buff to with the batman-adv header 554 * @key_control: flow dissectors control key 555 * @data: raw buffer pointer to the packet, if NULL use skb->data 556 * @p_proto: pointer used to update the protocol to process next 557 * @p_nhoff: pointer used to update inner network header offset 558 * @hlen: packet header length 559 * @flags: any combination of FLOW_DISSECTOR_F_* 560 * 561 * ETH_P_BATMAN packets are tried to be dissected. Only 562 * &struct batadv_unicast packets are actually processed because they contain an 563 * inner ethernet header and are usually followed by actual network header. This 564 * allows the flow dissector to continue processing the packet. 565 * 566 * Return: FLOW_DISSECT_RET_PROTO_AGAIN when &struct batadv_unicast was found, 567 * FLOW_DISSECT_RET_OUT_GOOD when dissector should stop after encapsulation, 568 * otherwise FLOW_DISSECT_RET_OUT_BAD 569 */ 570 static enum flow_dissect_ret 571 __skb_flow_dissect_batadv(const struct sk_buff *skb, 572 struct flow_dissector_key_control *key_control, 573 void *data, __be16 *p_proto, int *p_nhoff, int hlen, 574 unsigned int flags) 575 { 576 struct { 577 struct batadv_unicast_packet batadv_unicast; 578 struct ethhdr eth; 579 } *hdr, _hdr; 580 581 hdr = __skb_header_pointer(skb, *p_nhoff, sizeof(_hdr), data, hlen, 582 &_hdr); 583 if (!hdr) 584 return FLOW_DISSECT_RET_OUT_BAD; 585 586 if (hdr->batadv_unicast.version != BATADV_COMPAT_VERSION) 587 return FLOW_DISSECT_RET_OUT_BAD; 588 589 if (hdr->batadv_unicast.packet_type != BATADV_UNICAST) 590 return FLOW_DISSECT_RET_OUT_BAD; 591 592 *p_proto = hdr->eth.h_proto; 593 *p_nhoff += sizeof(*hdr); 594 595 key_control->flags |= FLOW_DIS_ENCAPSULATION; 596 if (flags & FLOW_DISSECTOR_F_STOP_AT_ENCAP) 597 return FLOW_DISSECT_RET_OUT_GOOD; 598 599 return FLOW_DISSECT_RET_PROTO_AGAIN; 600 } 601 602 static void 603 __skb_flow_dissect_tcp(const struct sk_buff *skb, 604 struct flow_dissector *flow_dissector, 605 void *target_container, void *data, int thoff, int hlen) 606 { 607 struct flow_dissector_key_tcp *key_tcp; 608 struct tcphdr *th, _th; 609 610 if (!dissector_uses_key(flow_dissector, FLOW_DISSECTOR_KEY_TCP)) 611 return; 612 613 th = __skb_header_pointer(skb, thoff, sizeof(_th), data, hlen, &_th); 614 if (!th) 615 return; 616 617 if (unlikely(__tcp_hdrlen(th) < sizeof(_th))) 618 return; 619 620 key_tcp = skb_flow_dissector_target(flow_dissector, 621 FLOW_DISSECTOR_KEY_TCP, 622 target_container); 623 key_tcp->flags = (*(__be16 *) &tcp_flag_word(th) & htons(0x0FFF)); 624 } 625 626 static void 627 __skb_flow_dissect_ipv4(const struct sk_buff *skb, 628 struct flow_dissector *flow_dissector, 629 void *target_container, void *data, const struct iphdr *iph) 630 { 631 struct flow_dissector_key_ip *key_ip; 632 633 if (!dissector_uses_key(flow_dissector, FLOW_DISSECTOR_KEY_IP)) 634 return; 635 636 key_ip = skb_flow_dissector_target(flow_dissector, 637 FLOW_DISSECTOR_KEY_IP, 638 target_container); 639 key_ip->tos = iph->tos; 640 key_ip->ttl = iph->ttl; 641 } 642 643 static void 644 __skb_flow_dissect_ipv6(const struct sk_buff *skb, 645 struct flow_dissector *flow_dissector, 646 void *target_container, void *data, const struct ipv6hdr *iph) 647 { 648 struct flow_dissector_key_ip *key_ip; 649 650 if (!dissector_uses_key(flow_dissector, FLOW_DISSECTOR_KEY_IP)) 651 return; 652 653 key_ip = skb_flow_dissector_target(flow_dissector, 654 FLOW_DISSECTOR_KEY_IP, 655 target_container); 656 key_ip->tos = ipv6_get_dsfield(iph); 657 key_ip->ttl = iph->hop_limit; 658 } 659 660 /* Maximum number of protocol headers that can be parsed in 661 * __skb_flow_dissect 662 */ 663 #define MAX_FLOW_DISSECT_HDRS 15 664 665 static bool skb_flow_dissect_allowed(int *num_hdrs) 666 { 667 ++*num_hdrs; 668 669 return (*num_hdrs <= MAX_FLOW_DISSECT_HDRS); 670 } 671 672 static void __skb_flow_bpf_to_target(const struct bpf_flow_keys *flow_keys, 673 struct flow_dissector *flow_dissector, 674 void *target_container) 675 { 676 struct flow_dissector_key_control *key_control; 677 struct flow_dissector_key_basic *key_basic; 678 struct flow_dissector_key_addrs *key_addrs; 679 struct flow_dissector_key_ports *key_ports; 680 681 key_control = skb_flow_dissector_target(flow_dissector, 682 FLOW_DISSECTOR_KEY_CONTROL, 683 target_container); 684 key_control->thoff = flow_keys->thoff; 685 if (flow_keys->is_frag) 686 key_control->flags |= FLOW_DIS_IS_FRAGMENT; 687 if (flow_keys->is_first_frag) 688 key_control->flags |= FLOW_DIS_FIRST_FRAG; 689 if (flow_keys->is_encap) 690 key_control->flags |= FLOW_DIS_ENCAPSULATION; 691 692 key_basic = skb_flow_dissector_target(flow_dissector, 693 FLOW_DISSECTOR_KEY_BASIC, 694 target_container); 695 key_basic->n_proto = flow_keys->n_proto; 696 key_basic->ip_proto = flow_keys->ip_proto; 697 698 if (flow_keys->addr_proto == ETH_P_IP && 699 dissector_uses_key(flow_dissector, FLOW_DISSECTOR_KEY_IPV4_ADDRS)) { 700 key_addrs = skb_flow_dissector_target(flow_dissector, 701 FLOW_DISSECTOR_KEY_IPV4_ADDRS, 702 target_container); 703 key_addrs->v4addrs.src = flow_keys->ipv4_src; 704 key_addrs->v4addrs.dst = flow_keys->ipv4_dst; 705 key_control->addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS; 706 } else if (flow_keys->addr_proto == ETH_P_IPV6 && 707 dissector_uses_key(flow_dissector, 708 FLOW_DISSECTOR_KEY_IPV6_ADDRS)) { 709 key_addrs = skb_flow_dissector_target(flow_dissector, 710 FLOW_DISSECTOR_KEY_IPV6_ADDRS, 711 target_container); 712 memcpy(&key_addrs->v6addrs, &flow_keys->ipv6_src, 713 sizeof(key_addrs->v6addrs)); 714 key_control->addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS; 715 } 716 717 if (dissector_uses_key(flow_dissector, FLOW_DISSECTOR_KEY_PORTS)) { 718 key_ports = skb_flow_dissector_target(flow_dissector, 719 FLOW_DISSECTOR_KEY_PORTS, 720 target_container); 721 key_ports->src = flow_keys->sport; 722 key_ports->dst = flow_keys->dport; 723 } 724 } 725 726 bool bpf_flow_dissect(struct bpf_prog *prog, struct bpf_flow_dissector *ctx, 727 __be16 proto, int nhoff, int hlen) 728 { 729 struct bpf_flow_keys *flow_keys = ctx->flow_keys; 730 u32 result; 731 732 /* Pass parameters to the BPF program */ 733 memset(flow_keys, 0, sizeof(*flow_keys)); 734 flow_keys->n_proto = proto; 735 flow_keys->nhoff = nhoff; 736 flow_keys->thoff = flow_keys->nhoff; 737 738 preempt_disable(); 739 result = BPF_PROG_RUN(prog, ctx); 740 preempt_enable(); 741 742 flow_keys->nhoff = clamp_t(u16, flow_keys->nhoff, nhoff, hlen); 743 flow_keys->thoff = clamp_t(u16, flow_keys->thoff, 744 flow_keys->nhoff, hlen); 745 746 return result == BPF_OK; 747 } 748 749 /** 750 * __skb_flow_dissect - extract the flow_keys struct and return it 751 * @net: associated network namespace, derived from @skb if NULL 752 * @skb: sk_buff to extract the flow from, can be NULL if the rest are specified 753 * @flow_dissector: list of keys to dissect 754 * @target_container: target structure to put dissected values into 755 * @data: raw buffer pointer to the packet, if NULL use skb->data 756 * @proto: protocol for which to get the flow, if @data is NULL use skb->protocol 757 * @nhoff: network header offset, if @data is NULL use skb_network_offset(skb) 758 * @hlen: packet header length, if @data is NULL use skb_headlen(skb) 759 * @flags: flags that control the dissection process, e.g. 760 * FLOW_DISSECTOR_F_STOP_AT_L3. 761 * 762 * The function will try to retrieve individual keys into target specified 763 * by flow_dissector from either the skbuff or a raw buffer specified by the 764 * rest parameters. 765 * 766 * Caller must take care of zeroing target container memory. 767 */ 768 bool __skb_flow_dissect(const struct net *net, 769 const struct sk_buff *skb, 770 struct flow_dissector *flow_dissector, 771 void *target_container, 772 void *data, __be16 proto, int nhoff, int hlen, 773 unsigned int flags) 774 { 775 struct flow_dissector_key_control *key_control; 776 struct flow_dissector_key_basic *key_basic; 777 struct flow_dissector_key_addrs *key_addrs; 778 struct flow_dissector_key_ports *key_ports; 779 struct flow_dissector_key_icmp *key_icmp; 780 struct flow_dissector_key_tags *key_tags; 781 struct flow_dissector_key_vlan *key_vlan; 782 struct bpf_prog *attached = NULL; 783 enum flow_dissect_ret fdret; 784 enum flow_dissector_key_id dissector_vlan = FLOW_DISSECTOR_KEY_MAX; 785 int num_hdrs = 0; 786 u8 ip_proto = 0; 787 bool ret; 788 789 if (!data) { 790 data = skb->data; 791 proto = skb_vlan_tag_present(skb) ? 792 skb->vlan_proto : skb->protocol; 793 nhoff = skb_network_offset(skb); 794 hlen = skb_headlen(skb); 795 #if IS_ENABLED(CONFIG_NET_DSA) 796 if (unlikely(skb->dev && netdev_uses_dsa(skb->dev))) { 797 const struct dsa_device_ops *ops; 798 int offset; 799 800 ops = skb->dev->dsa_ptr->tag_ops; 801 if (ops->flow_dissect && 802 !ops->flow_dissect(skb, &proto, &offset)) { 803 hlen -= offset; 804 nhoff += offset; 805 } 806 } 807 #endif 808 } 809 810 /* It is ensured by skb_flow_dissector_init() that control key will 811 * be always present. 812 */ 813 key_control = skb_flow_dissector_target(flow_dissector, 814 FLOW_DISSECTOR_KEY_CONTROL, 815 target_container); 816 817 /* It is ensured by skb_flow_dissector_init() that basic key will 818 * be always present. 819 */ 820 key_basic = skb_flow_dissector_target(flow_dissector, 821 FLOW_DISSECTOR_KEY_BASIC, 822 target_container); 823 824 if (skb) { 825 if (!net) { 826 if (skb->dev) 827 net = dev_net(skb->dev); 828 else if (skb->sk) 829 net = sock_net(skb->sk); 830 } 831 } 832 833 WARN_ON_ONCE(!net); 834 if (net) { 835 rcu_read_lock(); 836 attached = rcu_dereference(net->flow_dissector_prog); 837 838 if (attached) { 839 struct bpf_flow_keys flow_keys; 840 struct bpf_flow_dissector ctx = { 841 .flow_keys = &flow_keys, 842 .data = data, 843 .data_end = data + hlen, 844 }; 845 __be16 n_proto = proto; 846 847 if (skb) { 848 ctx.skb = skb; 849 /* we can't use 'proto' in the skb case 850 * because it might be set to skb->vlan_proto 851 * which has been pulled from the data 852 */ 853 n_proto = skb->protocol; 854 } 855 856 ret = bpf_flow_dissect(attached, &ctx, n_proto, nhoff, 857 hlen); 858 __skb_flow_bpf_to_target(&flow_keys, flow_dissector, 859 target_container); 860 rcu_read_unlock(); 861 return ret; 862 } 863 rcu_read_unlock(); 864 } 865 866 if (dissector_uses_key(flow_dissector, 867 FLOW_DISSECTOR_KEY_ETH_ADDRS)) { 868 struct ethhdr *eth = eth_hdr(skb); 869 struct flow_dissector_key_eth_addrs *key_eth_addrs; 870 871 key_eth_addrs = skb_flow_dissector_target(flow_dissector, 872 FLOW_DISSECTOR_KEY_ETH_ADDRS, 873 target_container); 874 memcpy(key_eth_addrs, ð->h_dest, sizeof(*key_eth_addrs)); 875 } 876 877 proto_again: 878 fdret = FLOW_DISSECT_RET_CONTINUE; 879 880 switch (proto) { 881 case htons(ETH_P_IP): { 882 const struct iphdr *iph; 883 struct iphdr _iph; 884 885 iph = __skb_header_pointer(skb, nhoff, sizeof(_iph), data, hlen, &_iph); 886 if (!iph || iph->ihl < 5) { 887 fdret = FLOW_DISSECT_RET_OUT_BAD; 888 break; 889 } 890 891 nhoff += iph->ihl * 4; 892 893 ip_proto = iph->protocol; 894 895 if (dissector_uses_key(flow_dissector, 896 FLOW_DISSECTOR_KEY_IPV4_ADDRS)) { 897 key_addrs = skb_flow_dissector_target(flow_dissector, 898 FLOW_DISSECTOR_KEY_IPV4_ADDRS, 899 target_container); 900 901 memcpy(&key_addrs->v4addrs, &iph->saddr, 902 sizeof(key_addrs->v4addrs)); 903 key_control->addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS; 904 } 905 906 if (ip_is_fragment(iph)) { 907 key_control->flags |= FLOW_DIS_IS_FRAGMENT; 908 909 if (iph->frag_off & htons(IP_OFFSET)) { 910 fdret = FLOW_DISSECT_RET_OUT_GOOD; 911 break; 912 } else { 913 key_control->flags |= FLOW_DIS_FIRST_FRAG; 914 if (!(flags & 915 FLOW_DISSECTOR_F_PARSE_1ST_FRAG)) { 916 fdret = FLOW_DISSECT_RET_OUT_GOOD; 917 break; 918 } 919 } 920 } 921 922 __skb_flow_dissect_ipv4(skb, flow_dissector, 923 target_container, data, iph); 924 925 if (flags & FLOW_DISSECTOR_F_STOP_AT_L3) { 926 fdret = FLOW_DISSECT_RET_OUT_GOOD; 927 break; 928 } 929 930 break; 931 } 932 case htons(ETH_P_IPV6): { 933 const struct ipv6hdr *iph; 934 struct ipv6hdr _iph; 935 936 iph = __skb_header_pointer(skb, nhoff, sizeof(_iph), data, hlen, &_iph); 937 if (!iph) { 938 fdret = FLOW_DISSECT_RET_OUT_BAD; 939 break; 940 } 941 942 ip_proto = iph->nexthdr; 943 nhoff += sizeof(struct ipv6hdr); 944 945 if (dissector_uses_key(flow_dissector, 946 FLOW_DISSECTOR_KEY_IPV6_ADDRS)) { 947 key_addrs = skb_flow_dissector_target(flow_dissector, 948 FLOW_DISSECTOR_KEY_IPV6_ADDRS, 949 target_container); 950 951 memcpy(&key_addrs->v6addrs, &iph->saddr, 952 sizeof(key_addrs->v6addrs)); 953 key_control->addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS; 954 } 955 956 if ((dissector_uses_key(flow_dissector, 957 FLOW_DISSECTOR_KEY_FLOW_LABEL) || 958 (flags & FLOW_DISSECTOR_F_STOP_AT_FLOW_LABEL)) && 959 ip6_flowlabel(iph)) { 960 __be32 flow_label = ip6_flowlabel(iph); 961 962 if (dissector_uses_key(flow_dissector, 963 FLOW_DISSECTOR_KEY_FLOW_LABEL)) { 964 key_tags = skb_flow_dissector_target(flow_dissector, 965 FLOW_DISSECTOR_KEY_FLOW_LABEL, 966 target_container); 967 key_tags->flow_label = ntohl(flow_label); 968 } 969 if (flags & FLOW_DISSECTOR_F_STOP_AT_FLOW_LABEL) { 970 fdret = FLOW_DISSECT_RET_OUT_GOOD; 971 break; 972 } 973 } 974 975 __skb_flow_dissect_ipv6(skb, flow_dissector, 976 target_container, data, iph); 977 978 if (flags & FLOW_DISSECTOR_F_STOP_AT_L3) 979 fdret = FLOW_DISSECT_RET_OUT_GOOD; 980 981 break; 982 } 983 case htons(ETH_P_8021AD): 984 case htons(ETH_P_8021Q): { 985 const struct vlan_hdr *vlan = NULL; 986 struct vlan_hdr _vlan; 987 __be16 saved_vlan_tpid = proto; 988 989 if (dissector_vlan == FLOW_DISSECTOR_KEY_MAX && 990 skb && skb_vlan_tag_present(skb)) { 991 proto = skb->protocol; 992 } else { 993 vlan = __skb_header_pointer(skb, nhoff, sizeof(_vlan), 994 data, hlen, &_vlan); 995 if (!vlan) { 996 fdret = FLOW_DISSECT_RET_OUT_BAD; 997 break; 998 } 999 1000 proto = vlan->h_vlan_encapsulated_proto; 1001 nhoff += sizeof(*vlan); 1002 } 1003 1004 if (dissector_vlan == FLOW_DISSECTOR_KEY_MAX) { 1005 dissector_vlan = FLOW_DISSECTOR_KEY_VLAN; 1006 } else if (dissector_vlan == FLOW_DISSECTOR_KEY_VLAN) { 1007 dissector_vlan = FLOW_DISSECTOR_KEY_CVLAN; 1008 } else { 1009 fdret = FLOW_DISSECT_RET_PROTO_AGAIN; 1010 break; 1011 } 1012 1013 if (dissector_uses_key(flow_dissector, dissector_vlan)) { 1014 key_vlan = skb_flow_dissector_target(flow_dissector, 1015 dissector_vlan, 1016 target_container); 1017 1018 if (!vlan) { 1019 key_vlan->vlan_id = skb_vlan_tag_get_id(skb); 1020 key_vlan->vlan_priority = skb_vlan_tag_get_prio(skb); 1021 } else { 1022 key_vlan->vlan_id = ntohs(vlan->h_vlan_TCI) & 1023 VLAN_VID_MASK; 1024 key_vlan->vlan_priority = 1025 (ntohs(vlan->h_vlan_TCI) & 1026 VLAN_PRIO_MASK) >> VLAN_PRIO_SHIFT; 1027 } 1028 key_vlan->vlan_tpid = saved_vlan_tpid; 1029 } 1030 1031 fdret = FLOW_DISSECT_RET_PROTO_AGAIN; 1032 break; 1033 } 1034 case htons(ETH_P_PPP_SES): { 1035 struct { 1036 struct pppoe_hdr hdr; 1037 __be16 proto; 1038 } *hdr, _hdr; 1039 hdr = __skb_header_pointer(skb, nhoff, sizeof(_hdr), data, hlen, &_hdr); 1040 if (!hdr) { 1041 fdret = FLOW_DISSECT_RET_OUT_BAD; 1042 break; 1043 } 1044 1045 proto = hdr->proto; 1046 nhoff += PPPOE_SES_HLEN; 1047 switch (proto) { 1048 case htons(PPP_IP): 1049 proto = htons(ETH_P_IP); 1050 fdret = FLOW_DISSECT_RET_PROTO_AGAIN; 1051 break; 1052 case htons(PPP_IPV6): 1053 proto = htons(ETH_P_IPV6); 1054 fdret = FLOW_DISSECT_RET_PROTO_AGAIN; 1055 break; 1056 default: 1057 fdret = FLOW_DISSECT_RET_OUT_BAD; 1058 break; 1059 } 1060 break; 1061 } 1062 case htons(ETH_P_TIPC): { 1063 struct tipc_basic_hdr *hdr, _hdr; 1064 1065 hdr = __skb_header_pointer(skb, nhoff, sizeof(_hdr), 1066 data, hlen, &_hdr); 1067 if (!hdr) { 1068 fdret = FLOW_DISSECT_RET_OUT_BAD; 1069 break; 1070 } 1071 1072 if (dissector_uses_key(flow_dissector, 1073 FLOW_DISSECTOR_KEY_TIPC)) { 1074 key_addrs = skb_flow_dissector_target(flow_dissector, 1075 FLOW_DISSECTOR_KEY_TIPC, 1076 target_container); 1077 key_addrs->tipckey.key = tipc_hdr_rps_key(hdr); 1078 key_control->addr_type = FLOW_DISSECTOR_KEY_TIPC; 1079 } 1080 fdret = FLOW_DISSECT_RET_OUT_GOOD; 1081 break; 1082 } 1083 1084 case htons(ETH_P_MPLS_UC): 1085 case htons(ETH_P_MPLS_MC): 1086 fdret = __skb_flow_dissect_mpls(skb, flow_dissector, 1087 target_container, data, 1088 nhoff, hlen); 1089 break; 1090 case htons(ETH_P_FCOE): 1091 if ((hlen - nhoff) < FCOE_HEADER_LEN) { 1092 fdret = FLOW_DISSECT_RET_OUT_BAD; 1093 break; 1094 } 1095 1096 nhoff += FCOE_HEADER_LEN; 1097 fdret = FLOW_DISSECT_RET_OUT_GOOD; 1098 break; 1099 1100 case htons(ETH_P_ARP): 1101 case htons(ETH_P_RARP): 1102 fdret = __skb_flow_dissect_arp(skb, flow_dissector, 1103 target_container, data, 1104 nhoff, hlen); 1105 break; 1106 1107 case htons(ETH_P_BATMAN): 1108 fdret = __skb_flow_dissect_batadv(skb, key_control, data, 1109 &proto, &nhoff, hlen, flags); 1110 break; 1111 1112 default: 1113 fdret = FLOW_DISSECT_RET_OUT_BAD; 1114 break; 1115 } 1116 1117 /* Process result of proto processing */ 1118 switch (fdret) { 1119 case FLOW_DISSECT_RET_OUT_GOOD: 1120 goto out_good; 1121 case FLOW_DISSECT_RET_PROTO_AGAIN: 1122 if (skb_flow_dissect_allowed(&num_hdrs)) 1123 goto proto_again; 1124 goto out_good; 1125 case FLOW_DISSECT_RET_CONTINUE: 1126 case FLOW_DISSECT_RET_IPPROTO_AGAIN: 1127 break; 1128 case FLOW_DISSECT_RET_OUT_BAD: 1129 default: 1130 goto out_bad; 1131 } 1132 1133 ip_proto_again: 1134 fdret = FLOW_DISSECT_RET_CONTINUE; 1135 1136 switch (ip_proto) { 1137 case IPPROTO_GRE: 1138 fdret = __skb_flow_dissect_gre(skb, key_control, flow_dissector, 1139 target_container, data, 1140 &proto, &nhoff, &hlen, flags); 1141 break; 1142 1143 case NEXTHDR_HOP: 1144 case NEXTHDR_ROUTING: 1145 case NEXTHDR_DEST: { 1146 u8 _opthdr[2], *opthdr; 1147 1148 if (proto != htons(ETH_P_IPV6)) 1149 break; 1150 1151 opthdr = __skb_header_pointer(skb, nhoff, sizeof(_opthdr), 1152 data, hlen, &_opthdr); 1153 if (!opthdr) { 1154 fdret = FLOW_DISSECT_RET_OUT_BAD; 1155 break; 1156 } 1157 1158 ip_proto = opthdr[0]; 1159 nhoff += (opthdr[1] + 1) << 3; 1160 1161 fdret = FLOW_DISSECT_RET_IPPROTO_AGAIN; 1162 break; 1163 } 1164 case NEXTHDR_FRAGMENT: { 1165 struct frag_hdr _fh, *fh; 1166 1167 if (proto != htons(ETH_P_IPV6)) 1168 break; 1169 1170 fh = __skb_header_pointer(skb, nhoff, sizeof(_fh), 1171 data, hlen, &_fh); 1172 1173 if (!fh) { 1174 fdret = FLOW_DISSECT_RET_OUT_BAD; 1175 break; 1176 } 1177 1178 key_control->flags |= FLOW_DIS_IS_FRAGMENT; 1179 1180 nhoff += sizeof(_fh); 1181 ip_proto = fh->nexthdr; 1182 1183 if (!(fh->frag_off & htons(IP6_OFFSET))) { 1184 key_control->flags |= FLOW_DIS_FIRST_FRAG; 1185 if (flags & FLOW_DISSECTOR_F_PARSE_1ST_FRAG) { 1186 fdret = FLOW_DISSECT_RET_IPPROTO_AGAIN; 1187 break; 1188 } 1189 } 1190 1191 fdret = FLOW_DISSECT_RET_OUT_GOOD; 1192 break; 1193 } 1194 case IPPROTO_IPIP: 1195 proto = htons(ETH_P_IP); 1196 1197 key_control->flags |= FLOW_DIS_ENCAPSULATION; 1198 if (flags & FLOW_DISSECTOR_F_STOP_AT_ENCAP) { 1199 fdret = FLOW_DISSECT_RET_OUT_GOOD; 1200 break; 1201 } 1202 1203 fdret = FLOW_DISSECT_RET_PROTO_AGAIN; 1204 break; 1205 1206 case IPPROTO_IPV6: 1207 proto = htons(ETH_P_IPV6); 1208 1209 key_control->flags |= FLOW_DIS_ENCAPSULATION; 1210 if (flags & FLOW_DISSECTOR_F_STOP_AT_ENCAP) { 1211 fdret = FLOW_DISSECT_RET_OUT_GOOD; 1212 break; 1213 } 1214 1215 fdret = FLOW_DISSECT_RET_PROTO_AGAIN; 1216 break; 1217 1218 1219 case IPPROTO_MPLS: 1220 proto = htons(ETH_P_MPLS_UC); 1221 fdret = FLOW_DISSECT_RET_PROTO_AGAIN; 1222 break; 1223 1224 case IPPROTO_TCP: 1225 __skb_flow_dissect_tcp(skb, flow_dissector, target_container, 1226 data, nhoff, hlen); 1227 break; 1228 1229 default: 1230 break; 1231 } 1232 1233 if (dissector_uses_key(flow_dissector, FLOW_DISSECTOR_KEY_PORTS) && 1234 !(key_control->flags & FLOW_DIS_IS_FRAGMENT)) { 1235 key_ports = skb_flow_dissector_target(flow_dissector, 1236 FLOW_DISSECTOR_KEY_PORTS, 1237 target_container); 1238 key_ports->ports = __skb_flow_get_ports(skb, nhoff, ip_proto, 1239 data, hlen); 1240 } 1241 1242 if (dissector_uses_key(flow_dissector, 1243 FLOW_DISSECTOR_KEY_ICMP)) { 1244 key_icmp = skb_flow_dissector_target(flow_dissector, 1245 FLOW_DISSECTOR_KEY_ICMP, 1246 target_container); 1247 key_icmp->icmp = skb_flow_get_be16(skb, nhoff, data, hlen); 1248 } 1249 1250 /* Process result of IP proto processing */ 1251 switch (fdret) { 1252 case FLOW_DISSECT_RET_PROTO_AGAIN: 1253 if (skb_flow_dissect_allowed(&num_hdrs)) 1254 goto proto_again; 1255 break; 1256 case FLOW_DISSECT_RET_IPPROTO_AGAIN: 1257 if (skb_flow_dissect_allowed(&num_hdrs)) 1258 goto ip_proto_again; 1259 break; 1260 case FLOW_DISSECT_RET_OUT_GOOD: 1261 case FLOW_DISSECT_RET_CONTINUE: 1262 break; 1263 case FLOW_DISSECT_RET_OUT_BAD: 1264 default: 1265 goto out_bad; 1266 } 1267 1268 out_good: 1269 ret = true; 1270 1271 out: 1272 key_control->thoff = min_t(u16, nhoff, skb ? skb->len : hlen); 1273 key_basic->n_proto = proto; 1274 key_basic->ip_proto = ip_proto; 1275 1276 return ret; 1277 1278 out_bad: 1279 ret = false; 1280 goto out; 1281 } 1282 EXPORT_SYMBOL(__skb_flow_dissect); 1283 1284 static u32 hashrnd __read_mostly; 1285 static __always_inline void __flow_hash_secret_init(void) 1286 { 1287 net_get_random_once(&hashrnd, sizeof(hashrnd)); 1288 } 1289 1290 static __always_inline u32 __flow_hash_words(const u32 *words, u32 length, 1291 u32 keyval) 1292 { 1293 return jhash2(words, length, keyval); 1294 } 1295 1296 static inline const u32 *flow_keys_hash_start(const struct flow_keys *flow) 1297 { 1298 const void *p = flow; 1299 1300 BUILD_BUG_ON(FLOW_KEYS_HASH_OFFSET % sizeof(u32)); 1301 return (const u32 *)(p + FLOW_KEYS_HASH_OFFSET); 1302 } 1303 1304 static inline size_t flow_keys_hash_length(const struct flow_keys *flow) 1305 { 1306 size_t diff = FLOW_KEYS_HASH_OFFSET + sizeof(flow->addrs); 1307 BUILD_BUG_ON((sizeof(*flow) - FLOW_KEYS_HASH_OFFSET) % sizeof(u32)); 1308 BUILD_BUG_ON(offsetof(typeof(*flow), addrs) != 1309 sizeof(*flow) - sizeof(flow->addrs)); 1310 1311 switch (flow->control.addr_type) { 1312 case FLOW_DISSECTOR_KEY_IPV4_ADDRS: 1313 diff -= sizeof(flow->addrs.v4addrs); 1314 break; 1315 case FLOW_DISSECTOR_KEY_IPV6_ADDRS: 1316 diff -= sizeof(flow->addrs.v6addrs); 1317 break; 1318 case FLOW_DISSECTOR_KEY_TIPC: 1319 diff -= sizeof(flow->addrs.tipckey); 1320 break; 1321 } 1322 return (sizeof(*flow) - diff) / sizeof(u32); 1323 } 1324 1325 __be32 flow_get_u32_src(const struct flow_keys *flow) 1326 { 1327 switch (flow->control.addr_type) { 1328 case FLOW_DISSECTOR_KEY_IPV4_ADDRS: 1329 return flow->addrs.v4addrs.src; 1330 case FLOW_DISSECTOR_KEY_IPV6_ADDRS: 1331 return (__force __be32)ipv6_addr_hash( 1332 &flow->addrs.v6addrs.src); 1333 case FLOW_DISSECTOR_KEY_TIPC: 1334 return flow->addrs.tipckey.key; 1335 default: 1336 return 0; 1337 } 1338 } 1339 EXPORT_SYMBOL(flow_get_u32_src); 1340 1341 __be32 flow_get_u32_dst(const struct flow_keys *flow) 1342 { 1343 switch (flow->control.addr_type) { 1344 case FLOW_DISSECTOR_KEY_IPV4_ADDRS: 1345 return flow->addrs.v4addrs.dst; 1346 case FLOW_DISSECTOR_KEY_IPV6_ADDRS: 1347 return (__force __be32)ipv6_addr_hash( 1348 &flow->addrs.v6addrs.dst); 1349 default: 1350 return 0; 1351 } 1352 } 1353 EXPORT_SYMBOL(flow_get_u32_dst); 1354 1355 static inline void __flow_hash_consistentify(struct flow_keys *keys) 1356 { 1357 int addr_diff, i; 1358 1359 switch (keys->control.addr_type) { 1360 case FLOW_DISSECTOR_KEY_IPV4_ADDRS: 1361 addr_diff = (__force u32)keys->addrs.v4addrs.dst - 1362 (__force u32)keys->addrs.v4addrs.src; 1363 if ((addr_diff < 0) || 1364 (addr_diff == 0 && 1365 ((__force u16)keys->ports.dst < 1366 (__force u16)keys->ports.src))) { 1367 swap(keys->addrs.v4addrs.src, keys->addrs.v4addrs.dst); 1368 swap(keys->ports.src, keys->ports.dst); 1369 } 1370 break; 1371 case FLOW_DISSECTOR_KEY_IPV6_ADDRS: 1372 addr_diff = memcmp(&keys->addrs.v6addrs.dst, 1373 &keys->addrs.v6addrs.src, 1374 sizeof(keys->addrs.v6addrs.dst)); 1375 if ((addr_diff < 0) || 1376 (addr_diff == 0 && 1377 ((__force u16)keys->ports.dst < 1378 (__force u16)keys->ports.src))) { 1379 for (i = 0; i < 4; i++) 1380 swap(keys->addrs.v6addrs.src.s6_addr32[i], 1381 keys->addrs.v6addrs.dst.s6_addr32[i]); 1382 swap(keys->ports.src, keys->ports.dst); 1383 } 1384 break; 1385 } 1386 } 1387 1388 static inline u32 __flow_hash_from_keys(struct flow_keys *keys, u32 keyval) 1389 { 1390 u32 hash; 1391 1392 __flow_hash_consistentify(keys); 1393 1394 hash = __flow_hash_words(flow_keys_hash_start(keys), 1395 flow_keys_hash_length(keys), keyval); 1396 if (!hash) 1397 hash = 1; 1398 1399 return hash; 1400 } 1401 1402 u32 flow_hash_from_keys(struct flow_keys *keys) 1403 { 1404 __flow_hash_secret_init(); 1405 return __flow_hash_from_keys(keys, hashrnd); 1406 } 1407 EXPORT_SYMBOL(flow_hash_from_keys); 1408 1409 static inline u32 ___skb_get_hash(const struct sk_buff *skb, 1410 struct flow_keys *keys, u32 keyval) 1411 { 1412 skb_flow_dissect_flow_keys(skb, keys, 1413 FLOW_DISSECTOR_F_STOP_AT_FLOW_LABEL); 1414 1415 return __flow_hash_from_keys(keys, keyval); 1416 } 1417 1418 struct _flow_keys_digest_data { 1419 __be16 n_proto; 1420 u8 ip_proto; 1421 u8 padding; 1422 __be32 ports; 1423 __be32 src; 1424 __be32 dst; 1425 }; 1426 1427 void make_flow_keys_digest(struct flow_keys_digest *digest, 1428 const struct flow_keys *flow) 1429 { 1430 struct _flow_keys_digest_data *data = 1431 (struct _flow_keys_digest_data *)digest; 1432 1433 BUILD_BUG_ON(sizeof(*data) > sizeof(*digest)); 1434 1435 memset(digest, 0, sizeof(*digest)); 1436 1437 data->n_proto = flow->basic.n_proto; 1438 data->ip_proto = flow->basic.ip_proto; 1439 data->ports = flow->ports.ports; 1440 data->src = flow->addrs.v4addrs.src; 1441 data->dst = flow->addrs.v4addrs.dst; 1442 } 1443 EXPORT_SYMBOL(make_flow_keys_digest); 1444 1445 static struct flow_dissector flow_keys_dissector_symmetric __read_mostly; 1446 1447 u32 __skb_get_hash_symmetric(const struct sk_buff *skb) 1448 { 1449 struct flow_keys keys; 1450 1451 __flow_hash_secret_init(); 1452 1453 memset(&keys, 0, sizeof(keys)); 1454 __skb_flow_dissect(NULL, skb, &flow_keys_dissector_symmetric, 1455 &keys, NULL, 0, 0, 0, 1456 FLOW_DISSECTOR_F_STOP_AT_FLOW_LABEL); 1457 1458 return __flow_hash_from_keys(&keys, hashrnd); 1459 } 1460 EXPORT_SYMBOL_GPL(__skb_get_hash_symmetric); 1461 1462 /** 1463 * __skb_get_hash: calculate a flow hash 1464 * @skb: sk_buff to calculate flow hash from 1465 * 1466 * This function calculates a flow hash based on src/dst addresses 1467 * and src/dst port numbers. Sets hash in skb to non-zero hash value 1468 * on success, zero indicates no valid hash. Also, sets l4_hash in skb 1469 * if hash is a canonical 4-tuple hash over transport ports. 1470 */ 1471 void __skb_get_hash(struct sk_buff *skb) 1472 { 1473 struct flow_keys keys; 1474 u32 hash; 1475 1476 __flow_hash_secret_init(); 1477 1478 hash = ___skb_get_hash(skb, &keys, hashrnd); 1479 1480 __skb_set_sw_hash(skb, hash, flow_keys_have_l4(&keys)); 1481 } 1482 EXPORT_SYMBOL(__skb_get_hash); 1483 1484 __u32 skb_get_hash_perturb(const struct sk_buff *skb, u32 perturb) 1485 { 1486 struct flow_keys keys; 1487 1488 return ___skb_get_hash(skb, &keys, perturb); 1489 } 1490 EXPORT_SYMBOL(skb_get_hash_perturb); 1491 1492 u32 __skb_get_poff(const struct sk_buff *skb, void *data, 1493 const struct flow_keys_basic *keys, int hlen) 1494 { 1495 u32 poff = keys->control.thoff; 1496 1497 /* skip L4 headers for fragments after the first */ 1498 if ((keys->control.flags & FLOW_DIS_IS_FRAGMENT) && 1499 !(keys->control.flags & FLOW_DIS_FIRST_FRAG)) 1500 return poff; 1501 1502 switch (keys->basic.ip_proto) { 1503 case IPPROTO_TCP: { 1504 /* access doff as u8 to avoid unaligned access */ 1505 const u8 *doff; 1506 u8 _doff; 1507 1508 doff = __skb_header_pointer(skb, poff + 12, sizeof(_doff), 1509 data, hlen, &_doff); 1510 if (!doff) 1511 return poff; 1512 1513 poff += max_t(u32, sizeof(struct tcphdr), (*doff & 0xF0) >> 2); 1514 break; 1515 } 1516 case IPPROTO_UDP: 1517 case IPPROTO_UDPLITE: 1518 poff += sizeof(struct udphdr); 1519 break; 1520 /* For the rest, we do not really care about header 1521 * extensions at this point for now. 1522 */ 1523 case IPPROTO_ICMP: 1524 poff += sizeof(struct icmphdr); 1525 break; 1526 case IPPROTO_ICMPV6: 1527 poff += sizeof(struct icmp6hdr); 1528 break; 1529 case IPPROTO_IGMP: 1530 poff += sizeof(struct igmphdr); 1531 break; 1532 case IPPROTO_DCCP: 1533 poff += sizeof(struct dccp_hdr); 1534 break; 1535 case IPPROTO_SCTP: 1536 poff += sizeof(struct sctphdr); 1537 break; 1538 } 1539 1540 return poff; 1541 } 1542 1543 /** 1544 * skb_get_poff - get the offset to the payload 1545 * @skb: sk_buff to get the payload offset from 1546 * 1547 * The function will get the offset to the payload as far as it could 1548 * be dissected. The main user is currently BPF, so that we can dynamically 1549 * truncate packets without needing to push actual payload to the user 1550 * space and can analyze headers only, instead. 1551 */ 1552 u32 skb_get_poff(const struct sk_buff *skb) 1553 { 1554 struct flow_keys_basic keys; 1555 1556 if (!skb_flow_dissect_flow_keys_basic(NULL, skb, &keys, 1557 NULL, 0, 0, 0, 0)) 1558 return 0; 1559 1560 return __skb_get_poff(skb, skb->data, &keys, skb_headlen(skb)); 1561 } 1562 1563 __u32 __get_hash_from_flowi6(const struct flowi6 *fl6, struct flow_keys *keys) 1564 { 1565 memset(keys, 0, sizeof(*keys)); 1566 1567 memcpy(&keys->addrs.v6addrs.src, &fl6->saddr, 1568 sizeof(keys->addrs.v6addrs.src)); 1569 memcpy(&keys->addrs.v6addrs.dst, &fl6->daddr, 1570 sizeof(keys->addrs.v6addrs.dst)); 1571 keys->control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS; 1572 keys->ports.src = fl6->fl6_sport; 1573 keys->ports.dst = fl6->fl6_dport; 1574 keys->keyid.keyid = fl6->fl6_gre_key; 1575 keys->tags.flow_label = (__force u32)flowi6_get_flowlabel(fl6); 1576 keys->basic.ip_proto = fl6->flowi6_proto; 1577 1578 return flow_hash_from_keys(keys); 1579 } 1580 EXPORT_SYMBOL(__get_hash_from_flowi6); 1581 1582 static const struct flow_dissector_key flow_keys_dissector_keys[] = { 1583 { 1584 .key_id = FLOW_DISSECTOR_KEY_CONTROL, 1585 .offset = offsetof(struct flow_keys, control), 1586 }, 1587 { 1588 .key_id = FLOW_DISSECTOR_KEY_BASIC, 1589 .offset = offsetof(struct flow_keys, basic), 1590 }, 1591 { 1592 .key_id = FLOW_DISSECTOR_KEY_IPV4_ADDRS, 1593 .offset = offsetof(struct flow_keys, addrs.v4addrs), 1594 }, 1595 { 1596 .key_id = FLOW_DISSECTOR_KEY_IPV6_ADDRS, 1597 .offset = offsetof(struct flow_keys, addrs.v6addrs), 1598 }, 1599 { 1600 .key_id = FLOW_DISSECTOR_KEY_TIPC, 1601 .offset = offsetof(struct flow_keys, addrs.tipckey), 1602 }, 1603 { 1604 .key_id = FLOW_DISSECTOR_KEY_PORTS, 1605 .offset = offsetof(struct flow_keys, ports), 1606 }, 1607 { 1608 .key_id = FLOW_DISSECTOR_KEY_VLAN, 1609 .offset = offsetof(struct flow_keys, vlan), 1610 }, 1611 { 1612 .key_id = FLOW_DISSECTOR_KEY_FLOW_LABEL, 1613 .offset = offsetof(struct flow_keys, tags), 1614 }, 1615 { 1616 .key_id = FLOW_DISSECTOR_KEY_GRE_KEYID, 1617 .offset = offsetof(struct flow_keys, keyid), 1618 }, 1619 }; 1620 1621 static const struct flow_dissector_key flow_keys_dissector_symmetric_keys[] = { 1622 { 1623 .key_id = FLOW_DISSECTOR_KEY_CONTROL, 1624 .offset = offsetof(struct flow_keys, control), 1625 }, 1626 { 1627 .key_id = FLOW_DISSECTOR_KEY_BASIC, 1628 .offset = offsetof(struct flow_keys, basic), 1629 }, 1630 { 1631 .key_id = FLOW_DISSECTOR_KEY_IPV4_ADDRS, 1632 .offset = offsetof(struct flow_keys, addrs.v4addrs), 1633 }, 1634 { 1635 .key_id = FLOW_DISSECTOR_KEY_IPV6_ADDRS, 1636 .offset = offsetof(struct flow_keys, addrs.v6addrs), 1637 }, 1638 { 1639 .key_id = FLOW_DISSECTOR_KEY_PORTS, 1640 .offset = offsetof(struct flow_keys, ports), 1641 }, 1642 }; 1643 1644 static const struct flow_dissector_key flow_keys_basic_dissector_keys[] = { 1645 { 1646 .key_id = FLOW_DISSECTOR_KEY_CONTROL, 1647 .offset = offsetof(struct flow_keys, control), 1648 }, 1649 { 1650 .key_id = FLOW_DISSECTOR_KEY_BASIC, 1651 .offset = offsetof(struct flow_keys, basic), 1652 }, 1653 }; 1654 1655 struct flow_dissector flow_keys_dissector __read_mostly; 1656 EXPORT_SYMBOL(flow_keys_dissector); 1657 1658 struct flow_dissector flow_keys_basic_dissector __read_mostly; 1659 EXPORT_SYMBOL(flow_keys_basic_dissector); 1660 1661 static int __init init_default_flow_dissectors(void) 1662 { 1663 skb_flow_dissector_init(&flow_keys_dissector, 1664 flow_keys_dissector_keys, 1665 ARRAY_SIZE(flow_keys_dissector_keys)); 1666 skb_flow_dissector_init(&flow_keys_dissector_symmetric, 1667 flow_keys_dissector_symmetric_keys, 1668 ARRAY_SIZE(flow_keys_dissector_symmetric_keys)); 1669 skb_flow_dissector_init(&flow_keys_basic_dissector, 1670 flow_keys_basic_dissector_keys, 1671 ARRAY_SIZE(flow_keys_basic_dissector_keys)); 1672 return 0; 1673 } 1674 1675 core_initcall(init_default_flow_dissectors); 1676