1 // SPDX-License-Identifier: GPL-2.0-only 2 #include <linux/kernel.h> 3 #include <linux/skbuff.h> 4 #include <linux/export.h> 5 #include <linux/ip.h> 6 #include <linux/ipv6.h> 7 #include <linux/if_vlan.h> 8 #include <net/dsa.h> 9 #include <net/dst_metadata.h> 10 #include <net/ip.h> 11 #include <net/ipv6.h> 12 #include <net/gre.h> 13 #include <net/pptp.h> 14 #include <net/tipc.h> 15 #include <linux/igmp.h> 16 #include <linux/icmp.h> 17 #include <linux/sctp.h> 18 #include <linux/dccp.h> 19 #include <linux/if_tunnel.h> 20 #include <linux/if_pppox.h> 21 #include <linux/ppp_defs.h> 22 #include <linux/stddef.h> 23 #include <linux/if_ether.h> 24 #include <linux/mpls.h> 25 #include <linux/tcp.h> 26 #include <net/flow_dissector.h> 27 #include <scsi/fc/fc_fcoe.h> 28 #include <uapi/linux/batadv_packet.h> 29 #include <linux/bpf.h> 30 #if IS_ENABLED(CONFIG_NF_CONNTRACK) 31 #include <net/netfilter/nf_conntrack_core.h> 32 #include <net/netfilter/nf_conntrack_labels.h> 33 #endif 34 35 static DEFINE_MUTEX(flow_dissector_mutex); 36 37 static void dissector_set_key(struct flow_dissector *flow_dissector, 38 enum flow_dissector_key_id key_id) 39 { 40 flow_dissector->used_keys |= (1 << key_id); 41 } 42 43 void skb_flow_dissector_init(struct flow_dissector *flow_dissector, 44 const struct flow_dissector_key *key, 45 unsigned int key_count) 46 { 47 unsigned int i; 48 49 memset(flow_dissector, 0, sizeof(*flow_dissector)); 50 51 for (i = 0; i < key_count; i++, key++) { 52 /* User should make sure that every key target offset is withing 53 * boundaries of unsigned short. 54 */ 55 BUG_ON(key->offset > USHRT_MAX); 56 BUG_ON(dissector_uses_key(flow_dissector, 57 key->key_id)); 58 59 dissector_set_key(flow_dissector, key->key_id); 60 flow_dissector->offset[key->key_id] = key->offset; 61 } 62 63 /* Ensure that the dissector always includes control and basic key. 64 * That way we are able to avoid handling lack of these in fast path. 65 */ 66 BUG_ON(!dissector_uses_key(flow_dissector, 67 FLOW_DISSECTOR_KEY_CONTROL)); 68 BUG_ON(!dissector_uses_key(flow_dissector, 69 FLOW_DISSECTOR_KEY_BASIC)); 70 } 71 EXPORT_SYMBOL(skb_flow_dissector_init); 72 73 int skb_flow_dissector_prog_query(const union bpf_attr *attr, 74 union bpf_attr __user *uattr) 75 { 76 __u32 __user *prog_ids = u64_to_user_ptr(attr->query.prog_ids); 77 u32 prog_id, prog_cnt = 0, flags = 0; 78 struct bpf_prog *attached; 79 struct net *net; 80 81 if (attr->query.query_flags) 82 return -EINVAL; 83 84 net = get_net_ns_by_fd(attr->query.target_fd); 85 if (IS_ERR(net)) 86 return PTR_ERR(net); 87 88 rcu_read_lock(); 89 attached = rcu_dereference(net->flow_dissector_prog); 90 if (attached) { 91 prog_cnt = 1; 92 prog_id = attached->aux->id; 93 } 94 rcu_read_unlock(); 95 96 put_net(net); 97 98 if (copy_to_user(&uattr->query.attach_flags, &flags, sizeof(flags))) 99 return -EFAULT; 100 if (copy_to_user(&uattr->query.prog_cnt, &prog_cnt, sizeof(prog_cnt))) 101 return -EFAULT; 102 103 if (!attr->query.prog_cnt || !prog_ids || !prog_cnt) 104 return 0; 105 106 if (copy_to_user(prog_ids, &prog_id, sizeof(u32))) 107 return -EFAULT; 108 109 return 0; 110 } 111 112 int skb_flow_dissector_bpf_prog_attach(const union bpf_attr *attr, 113 struct bpf_prog *prog) 114 { 115 struct bpf_prog *attached; 116 struct net *net; 117 int ret = 0; 118 119 net = current->nsproxy->net_ns; 120 mutex_lock(&flow_dissector_mutex); 121 122 if (net == &init_net) { 123 /* BPF flow dissector in the root namespace overrides 124 * any per-net-namespace one. When attaching to root, 125 * make sure we don't have any BPF program attached 126 * to the non-root namespaces. 127 */ 128 struct net *ns; 129 130 for_each_net(ns) { 131 if (ns == &init_net) 132 continue; 133 if (rcu_access_pointer(ns->flow_dissector_prog)) { 134 ret = -EEXIST; 135 goto out; 136 } 137 } 138 } else { 139 /* Make sure root flow dissector is not attached 140 * when attaching to the non-root namespace. 141 */ 142 if (rcu_access_pointer(init_net.flow_dissector_prog)) { 143 ret = -EEXIST; 144 goto out; 145 } 146 } 147 148 attached = rcu_dereference_protected(net->flow_dissector_prog, 149 lockdep_is_held(&flow_dissector_mutex)); 150 if (attached == prog) { 151 /* The same program cannot be attached twice */ 152 ret = -EINVAL; 153 goto out; 154 } 155 rcu_assign_pointer(net->flow_dissector_prog, prog); 156 if (attached) 157 bpf_prog_put(attached); 158 out: 159 mutex_unlock(&flow_dissector_mutex); 160 return ret; 161 } 162 163 int skb_flow_dissector_bpf_prog_detach(const union bpf_attr *attr) 164 { 165 struct bpf_prog *attached; 166 struct net *net; 167 168 net = current->nsproxy->net_ns; 169 mutex_lock(&flow_dissector_mutex); 170 attached = rcu_dereference_protected(net->flow_dissector_prog, 171 lockdep_is_held(&flow_dissector_mutex)); 172 if (!attached) { 173 mutex_unlock(&flow_dissector_mutex); 174 return -ENOENT; 175 } 176 RCU_INIT_POINTER(net->flow_dissector_prog, NULL); 177 bpf_prog_put(attached); 178 mutex_unlock(&flow_dissector_mutex); 179 return 0; 180 } 181 182 /** 183 * __skb_flow_get_ports - extract the upper layer ports and return them 184 * @skb: sk_buff to extract the ports from 185 * @thoff: transport header offset 186 * @ip_proto: protocol for which to get port offset 187 * @data: raw buffer pointer to the packet, if NULL use skb->data 188 * @hlen: packet header length, if @data is NULL use skb_headlen(skb) 189 * 190 * The function will try to retrieve the ports at offset thoff + poff where poff 191 * is the protocol port offset returned from proto_ports_offset 192 */ 193 __be32 __skb_flow_get_ports(const struct sk_buff *skb, int thoff, u8 ip_proto, 194 void *data, int hlen) 195 { 196 int poff = proto_ports_offset(ip_proto); 197 198 if (!data) { 199 data = skb->data; 200 hlen = skb_headlen(skb); 201 } 202 203 if (poff >= 0) { 204 __be32 *ports, _ports; 205 206 ports = __skb_header_pointer(skb, thoff + poff, 207 sizeof(_ports), data, hlen, &_ports); 208 if (ports) 209 return *ports; 210 } 211 212 return 0; 213 } 214 EXPORT_SYMBOL(__skb_flow_get_ports); 215 216 static bool icmp_has_id(u8 type) 217 { 218 switch (type) { 219 case ICMP_ECHO: 220 case ICMP_ECHOREPLY: 221 case ICMP_TIMESTAMP: 222 case ICMP_TIMESTAMPREPLY: 223 case ICMPV6_ECHO_REQUEST: 224 case ICMPV6_ECHO_REPLY: 225 return true; 226 } 227 228 return false; 229 } 230 231 /** 232 * skb_flow_get_icmp_tci - extract ICMP(6) Type, Code and Identifier fields 233 * @skb: sk_buff to extract from 234 * @key_icmp: struct flow_dissector_key_icmp to fill 235 * @data: raw buffer pointer to the packet 236 * @toff: offset to extract at 237 * @hlen: packet header length 238 */ 239 void skb_flow_get_icmp_tci(const struct sk_buff *skb, 240 struct flow_dissector_key_icmp *key_icmp, 241 void *data, int thoff, int hlen) 242 { 243 struct icmphdr *ih, _ih; 244 245 ih = __skb_header_pointer(skb, thoff, sizeof(_ih), data, hlen, &_ih); 246 if (!ih) 247 return; 248 249 key_icmp->type = ih->type; 250 key_icmp->code = ih->code; 251 252 /* As we use 0 to signal that the Id field is not present, 253 * avoid confusion with packets without such field 254 */ 255 if (icmp_has_id(ih->type)) 256 key_icmp->id = ih->un.echo.id ? : 1; 257 else 258 key_icmp->id = 0; 259 } 260 EXPORT_SYMBOL(skb_flow_get_icmp_tci); 261 262 /* If FLOW_DISSECTOR_KEY_ICMP is set, dissect an ICMP packet 263 * using skb_flow_get_icmp_tci(). 264 */ 265 static void __skb_flow_dissect_icmp(const struct sk_buff *skb, 266 struct flow_dissector *flow_dissector, 267 void *target_container, 268 void *data, int thoff, int hlen) 269 { 270 struct flow_dissector_key_icmp *key_icmp; 271 272 if (!dissector_uses_key(flow_dissector, FLOW_DISSECTOR_KEY_ICMP)) 273 return; 274 275 key_icmp = skb_flow_dissector_target(flow_dissector, 276 FLOW_DISSECTOR_KEY_ICMP, 277 target_container); 278 279 skb_flow_get_icmp_tci(skb, key_icmp, data, thoff, hlen); 280 } 281 282 void skb_flow_dissect_meta(const struct sk_buff *skb, 283 struct flow_dissector *flow_dissector, 284 void *target_container) 285 { 286 struct flow_dissector_key_meta *meta; 287 288 if (!dissector_uses_key(flow_dissector, FLOW_DISSECTOR_KEY_META)) 289 return; 290 291 meta = skb_flow_dissector_target(flow_dissector, 292 FLOW_DISSECTOR_KEY_META, 293 target_container); 294 meta->ingress_ifindex = skb->skb_iif; 295 } 296 EXPORT_SYMBOL(skb_flow_dissect_meta); 297 298 static void 299 skb_flow_dissect_set_enc_addr_type(enum flow_dissector_key_id type, 300 struct flow_dissector *flow_dissector, 301 void *target_container) 302 { 303 struct flow_dissector_key_control *ctrl; 304 305 if (!dissector_uses_key(flow_dissector, FLOW_DISSECTOR_KEY_ENC_CONTROL)) 306 return; 307 308 ctrl = skb_flow_dissector_target(flow_dissector, 309 FLOW_DISSECTOR_KEY_ENC_CONTROL, 310 target_container); 311 ctrl->addr_type = type; 312 } 313 314 void 315 skb_flow_dissect_ct(const struct sk_buff *skb, 316 struct flow_dissector *flow_dissector, 317 void *target_container, 318 u16 *ctinfo_map, 319 size_t mapsize) 320 { 321 #if IS_ENABLED(CONFIG_NF_CONNTRACK) 322 struct flow_dissector_key_ct *key; 323 enum ip_conntrack_info ctinfo; 324 struct nf_conn_labels *cl; 325 struct nf_conn *ct; 326 327 if (!dissector_uses_key(flow_dissector, FLOW_DISSECTOR_KEY_CT)) 328 return; 329 330 ct = nf_ct_get(skb, &ctinfo); 331 if (!ct) 332 return; 333 334 key = skb_flow_dissector_target(flow_dissector, 335 FLOW_DISSECTOR_KEY_CT, 336 target_container); 337 338 if (ctinfo < mapsize) 339 key->ct_state = ctinfo_map[ctinfo]; 340 #if IS_ENABLED(CONFIG_NF_CONNTRACK_ZONES) 341 key->ct_zone = ct->zone.id; 342 #endif 343 #if IS_ENABLED(CONFIG_NF_CONNTRACK_MARK) 344 key->ct_mark = ct->mark; 345 #endif 346 347 cl = nf_ct_labels_find(ct); 348 if (cl) 349 memcpy(key->ct_labels, cl->bits, sizeof(key->ct_labels)); 350 #endif /* CONFIG_NF_CONNTRACK */ 351 } 352 EXPORT_SYMBOL(skb_flow_dissect_ct); 353 354 void 355 skb_flow_dissect_tunnel_info(const struct sk_buff *skb, 356 struct flow_dissector *flow_dissector, 357 void *target_container) 358 { 359 struct ip_tunnel_info *info; 360 struct ip_tunnel_key *key; 361 362 /* A quick check to see if there might be something to do. */ 363 if (!dissector_uses_key(flow_dissector, 364 FLOW_DISSECTOR_KEY_ENC_KEYID) && 365 !dissector_uses_key(flow_dissector, 366 FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS) && 367 !dissector_uses_key(flow_dissector, 368 FLOW_DISSECTOR_KEY_ENC_IPV6_ADDRS) && 369 !dissector_uses_key(flow_dissector, 370 FLOW_DISSECTOR_KEY_ENC_CONTROL) && 371 !dissector_uses_key(flow_dissector, 372 FLOW_DISSECTOR_KEY_ENC_PORTS) && 373 !dissector_uses_key(flow_dissector, 374 FLOW_DISSECTOR_KEY_ENC_IP) && 375 !dissector_uses_key(flow_dissector, 376 FLOW_DISSECTOR_KEY_ENC_OPTS)) 377 return; 378 379 info = skb_tunnel_info(skb); 380 if (!info) 381 return; 382 383 key = &info->key; 384 385 switch (ip_tunnel_info_af(info)) { 386 case AF_INET: 387 skb_flow_dissect_set_enc_addr_type(FLOW_DISSECTOR_KEY_IPV4_ADDRS, 388 flow_dissector, 389 target_container); 390 if (dissector_uses_key(flow_dissector, 391 FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS)) { 392 struct flow_dissector_key_ipv4_addrs *ipv4; 393 394 ipv4 = skb_flow_dissector_target(flow_dissector, 395 FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS, 396 target_container); 397 ipv4->src = key->u.ipv4.src; 398 ipv4->dst = key->u.ipv4.dst; 399 } 400 break; 401 case AF_INET6: 402 skb_flow_dissect_set_enc_addr_type(FLOW_DISSECTOR_KEY_IPV6_ADDRS, 403 flow_dissector, 404 target_container); 405 if (dissector_uses_key(flow_dissector, 406 FLOW_DISSECTOR_KEY_ENC_IPV6_ADDRS)) { 407 struct flow_dissector_key_ipv6_addrs *ipv6; 408 409 ipv6 = skb_flow_dissector_target(flow_dissector, 410 FLOW_DISSECTOR_KEY_ENC_IPV6_ADDRS, 411 target_container); 412 ipv6->src = key->u.ipv6.src; 413 ipv6->dst = key->u.ipv6.dst; 414 } 415 break; 416 } 417 418 if (dissector_uses_key(flow_dissector, FLOW_DISSECTOR_KEY_ENC_KEYID)) { 419 struct flow_dissector_key_keyid *keyid; 420 421 keyid = skb_flow_dissector_target(flow_dissector, 422 FLOW_DISSECTOR_KEY_ENC_KEYID, 423 target_container); 424 keyid->keyid = tunnel_id_to_key32(key->tun_id); 425 } 426 427 if (dissector_uses_key(flow_dissector, FLOW_DISSECTOR_KEY_ENC_PORTS)) { 428 struct flow_dissector_key_ports *tp; 429 430 tp = skb_flow_dissector_target(flow_dissector, 431 FLOW_DISSECTOR_KEY_ENC_PORTS, 432 target_container); 433 tp->src = key->tp_src; 434 tp->dst = key->tp_dst; 435 } 436 437 if (dissector_uses_key(flow_dissector, FLOW_DISSECTOR_KEY_ENC_IP)) { 438 struct flow_dissector_key_ip *ip; 439 440 ip = skb_flow_dissector_target(flow_dissector, 441 FLOW_DISSECTOR_KEY_ENC_IP, 442 target_container); 443 ip->tos = key->tos; 444 ip->ttl = key->ttl; 445 } 446 447 if (dissector_uses_key(flow_dissector, FLOW_DISSECTOR_KEY_ENC_OPTS)) { 448 struct flow_dissector_key_enc_opts *enc_opt; 449 450 enc_opt = skb_flow_dissector_target(flow_dissector, 451 FLOW_DISSECTOR_KEY_ENC_OPTS, 452 target_container); 453 454 if (info->options_len) { 455 enc_opt->len = info->options_len; 456 ip_tunnel_info_opts_get(enc_opt->data, info); 457 enc_opt->dst_opt_type = info->key.tun_flags & 458 TUNNEL_OPTIONS_PRESENT; 459 } 460 } 461 } 462 EXPORT_SYMBOL(skb_flow_dissect_tunnel_info); 463 464 static enum flow_dissect_ret 465 __skb_flow_dissect_mpls(const struct sk_buff *skb, 466 struct flow_dissector *flow_dissector, 467 void *target_container, void *data, int nhoff, int hlen) 468 { 469 struct flow_dissector_key_keyid *key_keyid; 470 struct mpls_label *hdr, _hdr[2]; 471 u32 entry, label; 472 473 if (!dissector_uses_key(flow_dissector, 474 FLOW_DISSECTOR_KEY_MPLS_ENTROPY) && 475 !dissector_uses_key(flow_dissector, FLOW_DISSECTOR_KEY_MPLS)) 476 return FLOW_DISSECT_RET_OUT_GOOD; 477 478 hdr = __skb_header_pointer(skb, nhoff, sizeof(_hdr), data, 479 hlen, &_hdr); 480 if (!hdr) 481 return FLOW_DISSECT_RET_OUT_BAD; 482 483 entry = ntohl(hdr[0].entry); 484 label = (entry & MPLS_LS_LABEL_MASK) >> MPLS_LS_LABEL_SHIFT; 485 486 if (dissector_uses_key(flow_dissector, FLOW_DISSECTOR_KEY_MPLS)) { 487 struct flow_dissector_key_mpls *key_mpls; 488 489 key_mpls = skb_flow_dissector_target(flow_dissector, 490 FLOW_DISSECTOR_KEY_MPLS, 491 target_container); 492 key_mpls->mpls_label = label; 493 key_mpls->mpls_ttl = (entry & MPLS_LS_TTL_MASK) 494 >> MPLS_LS_TTL_SHIFT; 495 key_mpls->mpls_tc = (entry & MPLS_LS_TC_MASK) 496 >> MPLS_LS_TC_SHIFT; 497 key_mpls->mpls_bos = (entry & MPLS_LS_S_MASK) 498 >> MPLS_LS_S_SHIFT; 499 } 500 501 if (label == MPLS_LABEL_ENTROPY) { 502 key_keyid = skb_flow_dissector_target(flow_dissector, 503 FLOW_DISSECTOR_KEY_MPLS_ENTROPY, 504 target_container); 505 key_keyid->keyid = hdr[1].entry & htonl(MPLS_LS_LABEL_MASK); 506 } 507 return FLOW_DISSECT_RET_OUT_GOOD; 508 } 509 510 static enum flow_dissect_ret 511 __skb_flow_dissect_arp(const struct sk_buff *skb, 512 struct flow_dissector *flow_dissector, 513 void *target_container, void *data, int nhoff, int hlen) 514 { 515 struct flow_dissector_key_arp *key_arp; 516 struct { 517 unsigned char ar_sha[ETH_ALEN]; 518 unsigned char ar_sip[4]; 519 unsigned char ar_tha[ETH_ALEN]; 520 unsigned char ar_tip[4]; 521 } *arp_eth, _arp_eth; 522 const struct arphdr *arp; 523 struct arphdr _arp; 524 525 if (!dissector_uses_key(flow_dissector, FLOW_DISSECTOR_KEY_ARP)) 526 return FLOW_DISSECT_RET_OUT_GOOD; 527 528 arp = __skb_header_pointer(skb, nhoff, sizeof(_arp), data, 529 hlen, &_arp); 530 if (!arp) 531 return FLOW_DISSECT_RET_OUT_BAD; 532 533 if (arp->ar_hrd != htons(ARPHRD_ETHER) || 534 arp->ar_pro != htons(ETH_P_IP) || 535 arp->ar_hln != ETH_ALEN || 536 arp->ar_pln != 4 || 537 (arp->ar_op != htons(ARPOP_REPLY) && 538 arp->ar_op != htons(ARPOP_REQUEST))) 539 return FLOW_DISSECT_RET_OUT_BAD; 540 541 arp_eth = __skb_header_pointer(skb, nhoff + sizeof(_arp), 542 sizeof(_arp_eth), data, 543 hlen, &_arp_eth); 544 if (!arp_eth) 545 return FLOW_DISSECT_RET_OUT_BAD; 546 547 key_arp = skb_flow_dissector_target(flow_dissector, 548 FLOW_DISSECTOR_KEY_ARP, 549 target_container); 550 551 memcpy(&key_arp->sip, arp_eth->ar_sip, sizeof(key_arp->sip)); 552 memcpy(&key_arp->tip, arp_eth->ar_tip, sizeof(key_arp->tip)); 553 554 /* Only store the lower byte of the opcode; 555 * this covers ARPOP_REPLY and ARPOP_REQUEST. 556 */ 557 key_arp->op = ntohs(arp->ar_op) & 0xff; 558 559 ether_addr_copy(key_arp->sha, arp_eth->ar_sha); 560 ether_addr_copy(key_arp->tha, arp_eth->ar_tha); 561 562 return FLOW_DISSECT_RET_OUT_GOOD; 563 } 564 565 static enum flow_dissect_ret 566 __skb_flow_dissect_gre(const struct sk_buff *skb, 567 struct flow_dissector_key_control *key_control, 568 struct flow_dissector *flow_dissector, 569 void *target_container, void *data, 570 __be16 *p_proto, int *p_nhoff, int *p_hlen, 571 unsigned int flags) 572 { 573 struct flow_dissector_key_keyid *key_keyid; 574 struct gre_base_hdr *hdr, _hdr; 575 int offset = 0; 576 u16 gre_ver; 577 578 hdr = __skb_header_pointer(skb, *p_nhoff, sizeof(_hdr), 579 data, *p_hlen, &_hdr); 580 if (!hdr) 581 return FLOW_DISSECT_RET_OUT_BAD; 582 583 /* Only look inside GRE without routing */ 584 if (hdr->flags & GRE_ROUTING) 585 return FLOW_DISSECT_RET_OUT_GOOD; 586 587 /* Only look inside GRE for version 0 and 1 */ 588 gre_ver = ntohs(hdr->flags & GRE_VERSION); 589 if (gre_ver > 1) 590 return FLOW_DISSECT_RET_OUT_GOOD; 591 592 *p_proto = hdr->protocol; 593 if (gre_ver) { 594 /* Version1 must be PPTP, and check the flags */ 595 if (!(*p_proto == GRE_PROTO_PPP && (hdr->flags & GRE_KEY))) 596 return FLOW_DISSECT_RET_OUT_GOOD; 597 } 598 599 offset += sizeof(struct gre_base_hdr); 600 601 if (hdr->flags & GRE_CSUM) 602 offset += FIELD_SIZEOF(struct gre_full_hdr, csum) + 603 FIELD_SIZEOF(struct gre_full_hdr, reserved1); 604 605 if (hdr->flags & GRE_KEY) { 606 const __be32 *keyid; 607 __be32 _keyid; 608 609 keyid = __skb_header_pointer(skb, *p_nhoff + offset, 610 sizeof(_keyid), 611 data, *p_hlen, &_keyid); 612 if (!keyid) 613 return FLOW_DISSECT_RET_OUT_BAD; 614 615 if (dissector_uses_key(flow_dissector, 616 FLOW_DISSECTOR_KEY_GRE_KEYID)) { 617 key_keyid = skb_flow_dissector_target(flow_dissector, 618 FLOW_DISSECTOR_KEY_GRE_KEYID, 619 target_container); 620 if (gre_ver == 0) 621 key_keyid->keyid = *keyid; 622 else 623 key_keyid->keyid = *keyid & GRE_PPTP_KEY_MASK; 624 } 625 offset += FIELD_SIZEOF(struct gre_full_hdr, key); 626 } 627 628 if (hdr->flags & GRE_SEQ) 629 offset += FIELD_SIZEOF(struct pptp_gre_header, seq); 630 631 if (gre_ver == 0) { 632 if (*p_proto == htons(ETH_P_TEB)) { 633 const struct ethhdr *eth; 634 struct ethhdr _eth; 635 636 eth = __skb_header_pointer(skb, *p_nhoff + offset, 637 sizeof(_eth), 638 data, *p_hlen, &_eth); 639 if (!eth) 640 return FLOW_DISSECT_RET_OUT_BAD; 641 *p_proto = eth->h_proto; 642 offset += sizeof(*eth); 643 644 /* Cap headers that we access via pointers at the 645 * end of the Ethernet header as our maximum alignment 646 * at that point is only 2 bytes. 647 */ 648 if (NET_IP_ALIGN) 649 *p_hlen = *p_nhoff + offset; 650 } 651 } else { /* version 1, must be PPTP */ 652 u8 _ppp_hdr[PPP_HDRLEN]; 653 u8 *ppp_hdr; 654 655 if (hdr->flags & GRE_ACK) 656 offset += FIELD_SIZEOF(struct pptp_gre_header, ack); 657 658 ppp_hdr = __skb_header_pointer(skb, *p_nhoff + offset, 659 sizeof(_ppp_hdr), 660 data, *p_hlen, _ppp_hdr); 661 if (!ppp_hdr) 662 return FLOW_DISSECT_RET_OUT_BAD; 663 664 switch (PPP_PROTOCOL(ppp_hdr)) { 665 case PPP_IP: 666 *p_proto = htons(ETH_P_IP); 667 break; 668 case PPP_IPV6: 669 *p_proto = htons(ETH_P_IPV6); 670 break; 671 default: 672 /* Could probably catch some more like MPLS */ 673 break; 674 } 675 676 offset += PPP_HDRLEN; 677 } 678 679 *p_nhoff += offset; 680 key_control->flags |= FLOW_DIS_ENCAPSULATION; 681 if (flags & FLOW_DISSECTOR_F_STOP_AT_ENCAP) 682 return FLOW_DISSECT_RET_OUT_GOOD; 683 684 return FLOW_DISSECT_RET_PROTO_AGAIN; 685 } 686 687 /** 688 * __skb_flow_dissect_batadv() - dissect batman-adv header 689 * @skb: sk_buff to with the batman-adv header 690 * @key_control: flow dissectors control key 691 * @data: raw buffer pointer to the packet, if NULL use skb->data 692 * @p_proto: pointer used to update the protocol to process next 693 * @p_nhoff: pointer used to update inner network header offset 694 * @hlen: packet header length 695 * @flags: any combination of FLOW_DISSECTOR_F_* 696 * 697 * ETH_P_BATMAN packets are tried to be dissected. Only 698 * &struct batadv_unicast packets are actually processed because they contain an 699 * inner ethernet header and are usually followed by actual network header. This 700 * allows the flow dissector to continue processing the packet. 701 * 702 * Return: FLOW_DISSECT_RET_PROTO_AGAIN when &struct batadv_unicast was found, 703 * FLOW_DISSECT_RET_OUT_GOOD when dissector should stop after encapsulation, 704 * otherwise FLOW_DISSECT_RET_OUT_BAD 705 */ 706 static enum flow_dissect_ret 707 __skb_flow_dissect_batadv(const struct sk_buff *skb, 708 struct flow_dissector_key_control *key_control, 709 void *data, __be16 *p_proto, int *p_nhoff, int hlen, 710 unsigned int flags) 711 { 712 struct { 713 struct batadv_unicast_packet batadv_unicast; 714 struct ethhdr eth; 715 } *hdr, _hdr; 716 717 hdr = __skb_header_pointer(skb, *p_nhoff, sizeof(_hdr), data, hlen, 718 &_hdr); 719 if (!hdr) 720 return FLOW_DISSECT_RET_OUT_BAD; 721 722 if (hdr->batadv_unicast.version != BATADV_COMPAT_VERSION) 723 return FLOW_DISSECT_RET_OUT_BAD; 724 725 if (hdr->batadv_unicast.packet_type != BATADV_UNICAST) 726 return FLOW_DISSECT_RET_OUT_BAD; 727 728 *p_proto = hdr->eth.h_proto; 729 *p_nhoff += sizeof(*hdr); 730 731 key_control->flags |= FLOW_DIS_ENCAPSULATION; 732 if (flags & FLOW_DISSECTOR_F_STOP_AT_ENCAP) 733 return FLOW_DISSECT_RET_OUT_GOOD; 734 735 return FLOW_DISSECT_RET_PROTO_AGAIN; 736 } 737 738 static void 739 __skb_flow_dissect_tcp(const struct sk_buff *skb, 740 struct flow_dissector *flow_dissector, 741 void *target_container, void *data, int thoff, int hlen) 742 { 743 struct flow_dissector_key_tcp *key_tcp; 744 struct tcphdr *th, _th; 745 746 if (!dissector_uses_key(flow_dissector, FLOW_DISSECTOR_KEY_TCP)) 747 return; 748 749 th = __skb_header_pointer(skb, thoff, sizeof(_th), data, hlen, &_th); 750 if (!th) 751 return; 752 753 if (unlikely(__tcp_hdrlen(th) < sizeof(_th))) 754 return; 755 756 key_tcp = skb_flow_dissector_target(flow_dissector, 757 FLOW_DISSECTOR_KEY_TCP, 758 target_container); 759 key_tcp->flags = (*(__be16 *) &tcp_flag_word(th) & htons(0x0FFF)); 760 } 761 762 static void 763 __skb_flow_dissect_ipv4(const struct sk_buff *skb, 764 struct flow_dissector *flow_dissector, 765 void *target_container, void *data, const struct iphdr *iph) 766 { 767 struct flow_dissector_key_ip *key_ip; 768 769 if (!dissector_uses_key(flow_dissector, FLOW_DISSECTOR_KEY_IP)) 770 return; 771 772 key_ip = skb_flow_dissector_target(flow_dissector, 773 FLOW_DISSECTOR_KEY_IP, 774 target_container); 775 key_ip->tos = iph->tos; 776 key_ip->ttl = iph->ttl; 777 } 778 779 static void 780 __skb_flow_dissect_ipv6(const struct sk_buff *skb, 781 struct flow_dissector *flow_dissector, 782 void *target_container, void *data, const struct ipv6hdr *iph) 783 { 784 struct flow_dissector_key_ip *key_ip; 785 786 if (!dissector_uses_key(flow_dissector, FLOW_DISSECTOR_KEY_IP)) 787 return; 788 789 key_ip = skb_flow_dissector_target(flow_dissector, 790 FLOW_DISSECTOR_KEY_IP, 791 target_container); 792 key_ip->tos = ipv6_get_dsfield(iph); 793 key_ip->ttl = iph->hop_limit; 794 } 795 796 /* Maximum number of protocol headers that can be parsed in 797 * __skb_flow_dissect 798 */ 799 #define MAX_FLOW_DISSECT_HDRS 15 800 801 static bool skb_flow_dissect_allowed(int *num_hdrs) 802 { 803 ++*num_hdrs; 804 805 return (*num_hdrs <= MAX_FLOW_DISSECT_HDRS); 806 } 807 808 static void __skb_flow_bpf_to_target(const struct bpf_flow_keys *flow_keys, 809 struct flow_dissector *flow_dissector, 810 void *target_container) 811 { 812 struct flow_dissector_key_control *key_control; 813 struct flow_dissector_key_basic *key_basic; 814 struct flow_dissector_key_addrs *key_addrs; 815 struct flow_dissector_key_ports *key_ports; 816 struct flow_dissector_key_tags *key_tags; 817 818 key_control = skb_flow_dissector_target(flow_dissector, 819 FLOW_DISSECTOR_KEY_CONTROL, 820 target_container); 821 key_control->thoff = flow_keys->thoff; 822 if (flow_keys->is_frag) 823 key_control->flags |= FLOW_DIS_IS_FRAGMENT; 824 if (flow_keys->is_first_frag) 825 key_control->flags |= FLOW_DIS_FIRST_FRAG; 826 if (flow_keys->is_encap) 827 key_control->flags |= FLOW_DIS_ENCAPSULATION; 828 829 key_basic = skb_flow_dissector_target(flow_dissector, 830 FLOW_DISSECTOR_KEY_BASIC, 831 target_container); 832 key_basic->n_proto = flow_keys->n_proto; 833 key_basic->ip_proto = flow_keys->ip_proto; 834 835 if (flow_keys->addr_proto == ETH_P_IP && 836 dissector_uses_key(flow_dissector, FLOW_DISSECTOR_KEY_IPV4_ADDRS)) { 837 key_addrs = skb_flow_dissector_target(flow_dissector, 838 FLOW_DISSECTOR_KEY_IPV4_ADDRS, 839 target_container); 840 key_addrs->v4addrs.src = flow_keys->ipv4_src; 841 key_addrs->v4addrs.dst = flow_keys->ipv4_dst; 842 key_control->addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS; 843 } else if (flow_keys->addr_proto == ETH_P_IPV6 && 844 dissector_uses_key(flow_dissector, 845 FLOW_DISSECTOR_KEY_IPV6_ADDRS)) { 846 key_addrs = skb_flow_dissector_target(flow_dissector, 847 FLOW_DISSECTOR_KEY_IPV6_ADDRS, 848 target_container); 849 memcpy(&key_addrs->v6addrs, &flow_keys->ipv6_src, 850 sizeof(key_addrs->v6addrs)); 851 key_control->addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS; 852 } 853 854 if (dissector_uses_key(flow_dissector, FLOW_DISSECTOR_KEY_PORTS)) { 855 key_ports = skb_flow_dissector_target(flow_dissector, 856 FLOW_DISSECTOR_KEY_PORTS, 857 target_container); 858 key_ports->src = flow_keys->sport; 859 key_ports->dst = flow_keys->dport; 860 } 861 862 if (dissector_uses_key(flow_dissector, 863 FLOW_DISSECTOR_KEY_FLOW_LABEL)) { 864 key_tags = skb_flow_dissector_target(flow_dissector, 865 FLOW_DISSECTOR_KEY_FLOW_LABEL, 866 target_container); 867 key_tags->flow_label = ntohl(flow_keys->flow_label); 868 } 869 } 870 871 bool bpf_flow_dissect(struct bpf_prog *prog, struct bpf_flow_dissector *ctx, 872 __be16 proto, int nhoff, int hlen, unsigned int flags) 873 { 874 struct bpf_flow_keys *flow_keys = ctx->flow_keys; 875 u32 result; 876 877 /* Pass parameters to the BPF program */ 878 memset(flow_keys, 0, sizeof(*flow_keys)); 879 flow_keys->n_proto = proto; 880 flow_keys->nhoff = nhoff; 881 flow_keys->thoff = flow_keys->nhoff; 882 883 BUILD_BUG_ON((int)BPF_FLOW_DISSECTOR_F_PARSE_1ST_FRAG != 884 (int)FLOW_DISSECTOR_F_PARSE_1ST_FRAG); 885 BUILD_BUG_ON((int)BPF_FLOW_DISSECTOR_F_STOP_AT_FLOW_LABEL != 886 (int)FLOW_DISSECTOR_F_STOP_AT_FLOW_LABEL); 887 BUILD_BUG_ON((int)BPF_FLOW_DISSECTOR_F_STOP_AT_ENCAP != 888 (int)FLOW_DISSECTOR_F_STOP_AT_ENCAP); 889 flow_keys->flags = flags; 890 891 preempt_disable(); 892 result = BPF_PROG_RUN(prog, ctx); 893 preempt_enable(); 894 895 flow_keys->nhoff = clamp_t(u16, flow_keys->nhoff, nhoff, hlen); 896 flow_keys->thoff = clamp_t(u16, flow_keys->thoff, 897 flow_keys->nhoff, hlen); 898 899 return result == BPF_OK; 900 } 901 902 /** 903 * __skb_flow_dissect - extract the flow_keys struct and return it 904 * @net: associated network namespace, derived from @skb if NULL 905 * @skb: sk_buff to extract the flow from, can be NULL if the rest are specified 906 * @flow_dissector: list of keys to dissect 907 * @target_container: target structure to put dissected values into 908 * @data: raw buffer pointer to the packet, if NULL use skb->data 909 * @proto: protocol for which to get the flow, if @data is NULL use skb->protocol 910 * @nhoff: network header offset, if @data is NULL use skb_network_offset(skb) 911 * @hlen: packet header length, if @data is NULL use skb_headlen(skb) 912 * @flags: flags that control the dissection process, e.g. 913 * FLOW_DISSECTOR_F_STOP_AT_ENCAP. 914 * 915 * The function will try to retrieve individual keys into target specified 916 * by flow_dissector from either the skbuff or a raw buffer specified by the 917 * rest parameters. 918 * 919 * Caller must take care of zeroing target container memory. 920 */ 921 bool __skb_flow_dissect(const struct net *net, 922 const struct sk_buff *skb, 923 struct flow_dissector *flow_dissector, 924 void *target_container, 925 void *data, __be16 proto, int nhoff, int hlen, 926 unsigned int flags) 927 { 928 struct flow_dissector_key_control *key_control; 929 struct flow_dissector_key_basic *key_basic; 930 struct flow_dissector_key_addrs *key_addrs; 931 struct flow_dissector_key_ports *key_ports; 932 struct flow_dissector_key_tags *key_tags; 933 struct flow_dissector_key_vlan *key_vlan; 934 struct bpf_prog *attached = NULL; 935 enum flow_dissect_ret fdret; 936 enum flow_dissector_key_id dissector_vlan = FLOW_DISSECTOR_KEY_MAX; 937 int num_hdrs = 0; 938 u8 ip_proto = 0; 939 bool ret; 940 941 if (!data) { 942 data = skb->data; 943 proto = skb_vlan_tag_present(skb) ? 944 skb->vlan_proto : skb->protocol; 945 nhoff = skb_network_offset(skb); 946 hlen = skb_headlen(skb); 947 #if IS_ENABLED(CONFIG_NET_DSA) 948 if (unlikely(skb->dev && netdev_uses_dsa(skb->dev))) { 949 const struct dsa_device_ops *ops; 950 int offset; 951 952 ops = skb->dev->dsa_ptr->tag_ops; 953 if (ops->flow_dissect && 954 !ops->flow_dissect(skb, &proto, &offset)) { 955 hlen -= offset; 956 nhoff += offset; 957 } 958 } 959 #endif 960 } 961 962 /* It is ensured by skb_flow_dissector_init() that control key will 963 * be always present. 964 */ 965 key_control = skb_flow_dissector_target(flow_dissector, 966 FLOW_DISSECTOR_KEY_CONTROL, 967 target_container); 968 969 /* It is ensured by skb_flow_dissector_init() that basic key will 970 * be always present. 971 */ 972 key_basic = skb_flow_dissector_target(flow_dissector, 973 FLOW_DISSECTOR_KEY_BASIC, 974 target_container); 975 976 if (skb) { 977 if (!net) { 978 if (skb->dev) 979 net = dev_net(skb->dev); 980 else if (skb->sk) 981 net = sock_net(skb->sk); 982 } 983 } 984 985 WARN_ON_ONCE(!net); 986 if (net) { 987 rcu_read_lock(); 988 attached = rcu_dereference(init_net.flow_dissector_prog); 989 990 if (!attached) 991 attached = rcu_dereference(net->flow_dissector_prog); 992 993 if (attached) { 994 struct bpf_flow_keys flow_keys; 995 struct bpf_flow_dissector ctx = { 996 .flow_keys = &flow_keys, 997 .data = data, 998 .data_end = data + hlen, 999 }; 1000 __be16 n_proto = proto; 1001 1002 if (skb) { 1003 ctx.skb = skb; 1004 /* we can't use 'proto' in the skb case 1005 * because it might be set to skb->vlan_proto 1006 * which has been pulled from the data 1007 */ 1008 n_proto = skb->protocol; 1009 } 1010 1011 ret = bpf_flow_dissect(attached, &ctx, n_proto, nhoff, 1012 hlen, flags); 1013 __skb_flow_bpf_to_target(&flow_keys, flow_dissector, 1014 target_container); 1015 rcu_read_unlock(); 1016 return ret; 1017 } 1018 rcu_read_unlock(); 1019 } 1020 1021 if (dissector_uses_key(flow_dissector, 1022 FLOW_DISSECTOR_KEY_ETH_ADDRS)) { 1023 struct ethhdr *eth = eth_hdr(skb); 1024 struct flow_dissector_key_eth_addrs *key_eth_addrs; 1025 1026 key_eth_addrs = skb_flow_dissector_target(flow_dissector, 1027 FLOW_DISSECTOR_KEY_ETH_ADDRS, 1028 target_container); 1029 memcpy(key_eth_addrs, ð->h_dest, sizeof(*key_eth_addrs)); 1030 } 1031 1032 proto_again: 1033 fdret = FLOW_DISSECT_RET_CONTINUE; 1034 1035 switch (proto) { 1036 case htons(ETH_P_IP): { 1037 const struct iphdr *iph; 1038 struct iphdr _iph; 1039 1040 iph = __skb_header_pointer(skb, nhoff, sizeof(_iph), data, hlen, &_iph); 1041 if (!iph || iph->ihl < 5) { 1042 fdret = FLOW_DISSECT_RET_OUT_BAD; 1043 break; 1044 } 1045 1046 nhoff += iph->ihl * 4; 1047 1048 ip_proto = iph->protocol; 1049 1050 if (dissector_uses_key(flow_dissector, 1051 FLOW_DISSECTOR_KEY_IPV4_ADDRS)) { 1052 key_addrs = skb_flow_dissector_target(flow_dissector, 1053 FLOW_DISSECTOR_KEY_IPV4_ADDRS, 1054 target_container); 1055 1056 memcpy(&key_addrs->v4addrs, &iph->saddr, 1057 sizeof(key_addrs->v4addrs)); 1058 key_control->addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS; 1059 } 1060 1061 if (ip_is_fragment(iph)) { 1062 key_control->flags |= FLOW_DIS_IS_FRAGMENT; 1063 1064 if (iph->frag_off & htons(IP_OFFSET)) { 1065 fdret = FLOW_DISSECT_RET_OUT_GOOD; 1066 break; 1067 } else { 1068 key_control->flags |= FLOW_DIS_FIRST_FRAG; 1069 if (!(flags & 1070 FLOW_DISSECTOR_F_PARSE_1ST_FRAG)) { 1071 fdret = FLOW_DISSECT_RET_OUT_GOOD; 1072 break; 1073 } 1074 } 1075 } 1076 1077 __skb_flow_dissect_ipv4(skb, flow_dissector, 1078 target_container, data, iph); 1079 1080 break; 1081 } 1082 case htons(ETH_P_IPV6): { 1083 const struct ipv6hdr *iph; 1084 struct ipv6hdr _iph; 1085 1086 iph = __skb_header_pointer(skb, nhoff, sizeof(_iph), data, hlen, &_iph); 1087 if (!iph) { 1088 fdret = FLOW_DISSECT_RET_OUT_BAD; 1089 break; 1090 } 1091 1092 ip_proto = iph->nexthdr; 1093 nhoff += sizeof(struct ipv6hdr); 1094 1095 if (dissector_uses_key(flow_dissector, 1096 FLOW_DISSECTOR_KEY_IPV6_ADDRS)) { 1097 key_addrs = skb_flow_dissector_target(flow_dissector, 1098 FLOW_DISSECTOR_KEY_IPV6_ADDRS, 1099 target_container); 1100 1101 memcpy(&key_addrs->v6addrs, &iph->saddr, 1102 sizeof(key_addrs->v6addrs)); 1103 key_control->addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS; 1104 } 1105 1106 if ((dissector_uses_key(flow_dissector, 1107 FLOW_DISSECTOR_KEY_FLOW_LABEL) || 1108 (flags & FLOW_DISSECTOR_F_STOP_AT_FLOW_LABEL)) && 1109 ip6_flowlabel(iph)) { 1110 __be32 flow_label = ip6_flowlabel(iph); 1111 1112 if (dissector_uses_key(flow_dissector, 1113 FLOW_DISSECTOR_KEY_FLOW_LABEL)) { 1114 key_tags = skb_flow_dissector_target(flow_dissector, 1115 FLOW_DISSECTOR_KEY_FLOW_LABEL, 1116 target_container); 1117 key_tags->flow_label = ntohl(flow_label); 1118 } 1119 if (flags & FLOW_DISSECTOR_F_STOP_AT_FLOW_LABEL) { 1120 fdret = FLOW_DISSECT_RET_OUT_GOOD; 1121 break; 1122 } 1123 } 1124 1125 __skb_flow_dissect_ipv6(skb, flow_dissector, 1126 target_container, data, iph); 1127 1128 break; 1129 } 1130 case htons(ETH_P_8021AD): 1131 case htons(ETH_P_8021Q): { 1132 const struct vlan_hdr *vlan = NULL; 1133 struct vlan_hdr _vlan; 1134 __be16 saved_vlan_tpid = proto; 1135 1136 if (dissector_vlan == FLOW_DISSECTOR_KEY_MAX && 1137 skb && skb_vlan_tag_present(skb)) { 1138 proto = skb->protocol; 1139 } else { 1140 vlan = __skb_header_pointer(skb, nhoff, sizeof(_vlan), 1141 data, hlen, &_vlan); 1142 if (!vlan) { 1143 fdret = FLOW_DISSECT_RET_OUT_BAD; 1144 break; 1145 } 1146 1147 proto = vlan->h_vlan_encapsulated_proto; 1148 nhoff += sizeof(*vlan); 1149 } 1150 1151 if (dissector_vlan == FLOW_DISSECTOR_KEY_MAX) { 1152 dissector_vlan = FLOW_DISSECTOR_KEY_VLAN; 1153 } else if (dissector_vlan == FLOW_DISSECTOR_KEY_VLAN) { 1154 dissector_vlan = FLOW_DISSECTOR_KEY_CVLAN; 1155 } else { 1156 fdret = FLOW_DISSECT_RET_PROTO_AGAIN; 1157 break; 1158 } 1159 1160 if (dissector_uses_key(flow_dissector, dissector_vlan)) { 1161 key_vlan = skb_flow_dissector_target(flow_dissector, 1162 dissector_vlan, 1163 target_container); 1164 1165 if (!vlan) { 1166 key_vlan->vlan_id = skb_vlan_tag_get_id(skb); 1167 key_vlan->vlan_priority = skb_vlan_tag_get_prio(skb); 1168 } else { 1169 key_vlan->vlan_id = ntohs(vlan->h_vlan_TCI) & 1170 VLAN_VID_MASK; 1171 key_vlan->vlan_priority = 1172 (ntohs(vlan->h_vlan_TCI) & 1173 VLAN_PRIO_MASK) >> VLAN_PRIO_SHIFT; 1174 } 1175 key_vlan->vlan_tpid = saved_vlan_tpid; 1176 } 1177 1178 fdret = FLOW_DISSECT_RET_PROTO_AGAIN; 1179 break; 1180 } 1181 case htons(ETH_P_PPP_SES): { 1182 struct { 1183 struct pppoe_hdr hdr; 1184 __be16 proto; 1185 } *hdr, _hdr; 1186 hdr = __skb_header_pointer(skb, nhoff, sizeof(_hdr), data, hlen, &_hdr); 1187 if (!hdr) { 1188 fdret = FLOW_DISSECT_RET_OUT_BAD; 1189 break; 1190 } 1191 1192 proto = hdr->proto; 1193 nhoff += PPPOE_SES_HLEN; 1194 switch (proto) { 1195 case htons(PPP_IP): 1196 proto = htons(ETH_P_IP); 1197 fdret = FLOW_DISSECT_RET_PROTO_AGAIN; 1198 break; 1199 case htons(PPP_IPV6): 1200 proto = htons(ETH_P_IPV6); 1201 fdret = FLOW_DISSECT_RET_PROTO_AGAIN; 1202 break; 1203 default: 1204 fdret = FLOW_DISSECT_RET_OUT_BAD; 1205 break; 1206 } 1207 break; 1208 } 1209 case htons(ETH_P_TIPC): { 1210 struct tipc_basic_hdr *hdr, _hdr; 1211 1212 hdr = __skb_header_pointer(skb, nhoff, sizeof(_hdr), 1213 data, hlen, &_hdr); 1214 if (!hdr) { 1215 fdret = FLOW_DISSECT_RET_OUT_BAD; 1216 break; 1217 } 1218 1219 if (dissector_uses_key(flow_dissector, 1220 FLOW_DISSECTOR_KEY_TIPC)) { 1221 key_addrs = skb_flow_dissector_target(flow_dissector, 1222 FLOW_DISSECTOR_KEY_TIPC, 1223 target_container); 1224 key_addrs->tipckey.key = tipc_hdr_rps_key(hdr); 1225 key_control->addr_type = FLOW_DISSECTOR_KEY_TIPC; 1226 } 1227 fdret = FLOW_DISSECT_RET_OUT_GOOD; 1228 break; 1229 } 1230 1231 case htons(ETH_P_MPLS_UC): 1232 case htons(ETH_P_MPLS_MC): 1233 fdret = __skb_flow_dissect_mpls(skb, flow_dissector, 1234 target_container, data, 1235 nhoff, hlen); 1236 break; 1237 case htons(ETH_P_FCOE): 1238 if ((hlen - nhoff) < FCOE_HEADER_LEN) { 1239 fdret = FLOW_DISSECT_RET_OUT_BAD; 1240 break; 1241 } 1242 1243 nhoff += FCOE_HEADER_LEN; 1244 fdret = FLOW_DISSECT_RET_OUT_GOOD; 1245 break; 1246 1247 case htons(ETH_P_ARP): 1248 case htons(ETH_P_RARP): 1249 fdret = __skb_flow_dissect_arp(skb, flow_dissector, 1250 target_container, data, 1251 nhoff, hlen); 1252 break; 1253 1254 case htons(ETH_P_BATMAN): 1255 fdret = __skb_flow_dissect_batadv(skb, key_control, data, 1256 &proto, &nhoff, hlen, flags); 1257 break; 1258 1259 default: 1260 fdret = FLOW_DISSECT_RET_OUT_BAD; 1261 break; 1262 } 1263 1264 /* Process result of proto processing */ 1265 switch (fdret) { 1266 case FLOW_DISSECT_RET_OUT_GOOD: 1267 goto out_good; 1268 case FLOW_DISSECT_RET_PROTO_AGAIN: 1269 if (skb_flow_dissect_allowed(&num_hdrs)) 1270 goto proto_again; 1271 goto out_good; 1272 case FLOW_DISSECT_RET_CONTINUE: 1273 case FLOW_DISSECT_RET_IPPROTO_AGAIN: 1274 break; 1275 case FLOW_DISSECT_RET_OUT_BAD: 1276 default: 1277 goto out_bad; 1278 } 1279 1280 ip_proto_again: 1281 fdret = FLOW_DISSECT_RET_CONTINUE; 1282 1283 switch (ip_proto) { 1284 case IPPROTO_GRE: 1285 fdret = __skb_flow_dissect_gre(skb, key_control, flow_dissector, 1286 target_container, data, 1287 &proto, &nhoff, &hlen, flags); 1288 break; 1289 1290 case NEXTHDR_HOP: 1291 case NEXTHDR_ROUTING: 1292 case NEXTHDR_DEST: { 1293 u8 _opthdr[2], *opthdr; 1294 1295 if (proto != htons(ETH_P_IPV6)) 1296 break; 1297 1298 opthdr = __skb_header_pointer(skb, nhoff, sizeof(_opthdr), 1299 data, hlen, &_opthdr); 1300 if (!opthdr) { 1301 fdret = FLOW_DISSECT_RET_OUT_BAD; 1302 break; 1303 } 1304 1305 ip_proto = opthdr[0]; 1306 nhoff += (opthdr[1] + 1) << 3; 1307 1308 fdret = FLOW_DISSECT_RET_IPPROTO_AGAIN; 1309 break; 1310 } 1311 case NEXTHDR_FRAGMENT: { 1312 struct frag_hdr _fh, *fh; 1313 1314 if (proto != htons(ETH_P_IPV6)) 1315 break; 1316 1317 fh = __skb_header_pointer(skb, nhoff, sizeof(_fh), 1318 data, hlen, &_fh); 1319 1320 if (!fh) { 1321 fdret = FLOW_DISSECT_RET_OUT_BAD; 1322 break; 1323 } 1324 1325 key_control->flags |= FLOW_DIS_IS_FRAGMENT; 1326 1327 nhoff += sizeof(_fh); 1328 ip_proto = fh->nexthdr; 1329 1330 if (!(fh->frag_off & htons(IP6_OFFSET))) { 1331 key_control->flags |= FLOW_DIS_FIRST_FRAG; 1332 if (flags & FLOW_DISSECTOR_F_PARSE_1ST_FRAG) { 1333 fdret = FLOW_DISSECT_RET_IPPROTO_AGAIN; 1334 break; 1335 } 1336 } 1337 1338 fdret = FLOW_DISSECT_RET_OUT_GOOD; 1339 break; 1340 } 1341 case IPPROTO_IPIP: 1342 proto = htons(ETH_P_IP); 1343 1344 key_control->flags |= FLOW_DIS_ENCAPSULATION; 1345 if (flags & FLOW_DISSECTOR_F_STOP_AT_ENCAP) { 1346 fdret = FLOW_DISSECT_RET_OUT_GOOD; 1347 break; 1348 } 1349 1350 fdret = FLOW_DISSECT_RET_PROTO_AGAIN; 1351 break; 1352 1353 case IPPROTO_IPV6: 1354 proto = htons(ETH_P_IPV6); 1355 1356 key_control->flags |= FLOW_DIS_ENCAPSULATION; 1357 if (flags & FLOW_DISSECTOR_F_STOP_AT_ENCAP) { 1358 fdret = FLOW_DISSECT_RET_OUT_GOOD; 1359 break; 1360 } 1361 1362 fdret = FLOW_DISSECT_RET_PROTO_AGAIN; 1363 break; 1364 1365 1366 case IPPROTO_MPLS: 1367 proto = htons(ETH_P_MPLS_UC); 1368 fdret = FLOW_DISSECT_RET_PROTO_AGAIN; 1369 break; 1370 1371 case IPPROTO_TCP: 1372 __skb_flow_dissect_tcp(skb, flow_dissector, target_container, 1373 data, nhoff, hlen); 1374 break; 1375 1376 case IPPROTO_ICMP: 1377 case IPPROTO_ICMPV6: 1378 __skb_flow_dissect_icmp(skb, flow_dissector, target_container, 1379 data, nhoff, hlen); 1380 break; 1381 1382 default: 1383 break; 1384 } 1385 1386 if (dissector_uses_key(flow_dissector, FLOW_DISSECTOR_KEY_PORTS) && 1387 !(key_control->flags & FLOW_DIS_IS_FRAGMENT)) { 1388 key_ports = skb_flow_dissector_target(flow_dissector, 1389 FLOW_DISSECTOR_KEY_PORTS, 1390 target_container); 1391 key_ports->ports = __skb_flow_get_ports(skb, nhoff, ip_proto, 1392 data, hlen); 1393 } 1394 1395 /* Process result of IP proto processing */ 1396 switch (fdret) { 1397 case FLOW_DISSECT_RET_PROTO_AGAIN: 1398 if (skb_flow_dissect_allowed(&num_hdrs)) 1399 goto proto_again; 1400 break; 1401 case FLOW_DISSECT_RET_IPPROTO_AGAIN: 1402 if (skb_flow_dissect_allowed(&num_hdrs)) 1403 goto ip_proto_again; 1404 break; 1405 case FLOW_DISSECT_RET_OUT_GOOD: 1406 case FLOW_DISSECT_RET_CONTINUE: 1407 break; 1408 case FLOW_DISSECT_RET_OUT_BAD: 1409 default: 1410 goto out_bad; 1411 } 1412 1413 out_good: 1414 ret = true; 1415 1416 out: 1417 key_control->thoff = min_t(u16, nhoff, skb ? skb->len : hlen); 1418 key_basic->n_proto = proto; 1419 key_basic->ip_proto = ip_proto; 1420 1421 return ret; 1422 1423 out_bad: 1424 ret = false; 1425 goto out; 1426 } 1427 EXPORT_SYMBOL(__skb_flow_dissect); 1428 1429 static siphash_key_t hashrnd __read_mostly; 1430 static __always_inline void __flow_hash_secret_init(void) 1431 { 1432 net_get_random_once(&hashrnd, sizeof(hashrnd)); 1433 } 1434 1435 static const void *flow_keys_hash_start(const struct flow_keys *flow) 1436 { 1437 BUILD_BUG_ON(FLOW_KEYS_HASH_OFFSET % SIPHASH_ALIGNMENT); 1438 return &flow->FLOW_KEYS_HASH_START_FIELD; 1439 } 1440 1441 static inline size_t flow_keys_hash_length(const struct flow_keys *flow) 1442 { 1443 size_t diff = FLOW_KEYS_HASH_OFFSET + sizeof(flow->addrs); 1444 1445 BUILD_BUG_ON((sizeof(*flow) - FLOW_KEYS_HASH_OFFSET) % sizeof(u32)); 1446 1447 switch (flow->control.addr_type) { 1448 case FLOW_DISSECTOR_KEY_IPV4_ADDRS: 1449 diff -= sizeof(flow->addrs.v4addrs); 1450 break; 1451 case FLOW_DISSECTOR_KEY_IPV6_ADDRS: 1452 diff -= sizeof(flow->addrs.v6addrs); 1453 break; 1454 case FLOW_DISSECTOR_KEY_TIPC: 1455 diff -= sizeof(flow->addrs.tipckey); 1456 break; 1457 } 1458 return sizeof(*flow) - diff; 1459 } 1460 1461 __be32 flow_get_u32_src(const struct flow_keys *flow) 1462 { 1463 switch (flow->control.addr_type) { 1464 case FLOW_DISSECTOR_KEY_IPV4_ADDRS: 1465 return flow->addrs.v4addrs.src; 1466 case FLOW_DISSECTOR_KEY_IPV6_ADDRS: 1467 return (__force __be32)ipv6_addr_hash( 1468 &flow->addrs.v6addrs.src); 1469 case FLOW_DISSECTOR_KEY_TIPC: 1470 return flow->addrs.tipckey.key; 1471 default: 1472 return 0; 1473 } 1474 } 1475 EXPORT_SYMBOL(flow_get_u32_src); 1476 1477 __be32 flow_get_u32_dst(const struct flow_keys *flow) 1478 { 1479 switch (flow->control.addr_type) { 1480 case FLOW_DISSECTOR_KEY_IPV4_ADDRS: 1481 return flow->addrs.v4addrs.dst; 1482 case FLOW_DISSECTOR_KEY_IPV6_ADDRS: 1483 return (__force __be32)ipv6_addr_hash( 1484 &flow->addrs.v6addrs.dst); 1485 default: 1486 return 0; 1487 } 1488 } 1489 EXPORT_SYMBOL(flow_get_u32_dst); 1490 1491 /* Sort the source and destination IP (and the ports if the IP are the same), 1492 * to have consistent hash within the two directions 1493 */ 1494 static inline void __flow_hash_consistentify(struct flow_keys *keys) 1495 { 1496 int addr_diff, i; 1497 1498 switch (keys->control.addr_type) { 1499 case FLOW_DISSECTOR_KEY_IPV4_ADDRS: 1500 addr_diff = (__force u32)keys->addrs.v4addrs.dst - 1501 (__force u32)keys->addrs.v4addrs.src; 1502 if ((addr_diff < 0) || 1503 (addr_diff == 0 && 1504 ((__force u16)keys->ports.dst < 1505 (__force u16)keys->ports.src))) { 1506 swap(keys->addrs.v4addrs.src, keys->addrs.v4addrs.dst); 1507 swap(keys->ports.src, keys->ports.dst); 1508 } 1509 break; 1510 case FLOW_DISSECTOR_KEY_IPV6_ADDRS: 1511 addr_diff = memcmp(&keys->addrs.v6addrs.dst, 1512 &keys->addrs.v6addrs.src, 1513 sizeof(keys->addrs.v6addrs.dst)); 1514 if ((addr_diff < 0) || 1515 (addr_diff == 0 && 1516 ((__force u16)keys->ports.dst < 1517 (__force u16)keys->ports.src))) { 1518 for (i = 0; i < 4; i++) 1519 swap(keys->addrs.v6addrs.src.s6_addr32[i], 1520 keys->addrs.v6addrs.dst.s6_addr32[i]); 1521 swap(keys->ports.src, keys->ports.dst); 1522 } 1523 break; 1524 } 1525 } 1526 1527 static inline u32 __flow_hash_from_keys(struct flow_keys *keys, 1528 const siphash_key_t *keyval) 1529 { 1530 u32 hash; 1531 1532 __flow_hash_consistentify(keys); 1533 1534 hash = siphash(flow_keys_hash_start(keys), 1535 flow_keys_hash_length(keys), keyval); 1536 if (!hash) 1537 hash = 1; 1538 1539 return hash; 1540 } 1541 1542 u32 flow_hash_from_keys(struct flow_keys *keys) 1543 { 1544 __flow_hash_secret_init(); 1545 return __flow_hash_from_keys(keys, &hashrnd); 1546 } 1547 EXPORT_SYMBOL(flow_hash_from_keys); 1548 1549 static inline u32 ___skb_get_hash(const struct sk_buff *skb, 1550 struct flow_keys *keys, 1551 const siphash_key_t *keyval) 1552 { 1553 skb_flow_dissect_flow_keys(skb, keys, 1554 FLOW_DISSECTOR_F_STOP_AT_FLOW_LABEL); 1555 1556 return __flow_hash_from_keys(keys, keyval); 1557 } 1558 1559 struct _flow_keys_digest_data { 1560 __be16 n_proto; 1561 u8 ip_proto; 1562 u8 padding; 1563 __be32 ports; 1564 __be32 src; 1565 __be32 dst; 1566 }; 1567 1568 void make_flow_keys_digest(struct flow_keys_digest *digest, 1569 const struct flow_keys *flow) 1570 { 1571 struct _flow_keys_digest_data *data = 1572 (struct _flow_keys_digest_data *)digest; 1573 1574 BUILD_BUG_ON(sizeof(*data) > sizeof(*digest)); 1575 1576 memset(digest, 0, sizeof(*digest)); 1577 1578 data->n_proto = flow->basic.n_proto; 1579 data->ip_proto = flow->basic.ip_proto; 1580 data->ports = flow->ports.ports; 1581 data->src = flow->addrs.v4addrs.src; 1582 data->dst = flow->addrs.v4addrs.dst; 1583 } 1584 EXPORT_SYMBOL(make_flow_keys_digest); 1585 1586 static struct flow_dissector flow_keys_dissector_symmetric __read_mostly; 1587 1588 u32 __skb_get_hash_symmetric(const struct sk_buff *skb) 1589 { 1590 struct flow_keys keys; 1591 1592 __flow_hash_secret_init(); 1593 1594 memset(&keys, 0, sizeof(keys)); 1595 __skb_flow_dissect(NULL, skb, &flow_keys_dissector_symmetric, 1596 &keys, NULL, 0, 0, 0, 1597 FLOW_DISSECTOR_F_STOP_AT_FLOW_LABEL); 1598 1599 return __flow_hash_from_keys(&keys, &hashrnd); 1600 } 1601 EXPORT_SYMBOL_GPL(__skb_get_hash_symmetric); 1602 1603 /** 1604 * __skb_get_hash: calculate a flow hash 1605 * @skb: sk_buff to calculate flow hash from 1606 * 1607 * This function calculates a flow hash based on src/dst addresses 1608 * and src/dst port numbers. Sets hash in skb to non-zero hash value 1609 * on success, zero indicates no valid hash. Also, sets l4_hash in skb 1610 * if hash is a canonical 4-tuple hash over transport ports. 1611 */ 1612 void __skb_get_hash(struct sk_buff *skb) 1613 { 1614 struct flow_keys keys; 1615 u32 hash; 1616 1617 __flow_hash_secret_init(); 1618 1619 hash = ___skb_get_hash(skb, &keys, &hashrnd); 1620 1621 __skb_set_sw_hash(skb, hash, flow_keys_have_l4(&keys)); 1622 } 1623 EXPORT_SYMBOL(__skb_get_hash); 1624 1625 __u32 skb_get_hash_perturb(const struct sk_buff *skb, 1626 const siphash_key_t *perturb) 1627 { 1628 struct flow_keys keys; 1629 1630 return ___skb_get_hash(skb, &keys, perturb); 1631 } 1632 EXPORT_SYMBOL(skb_get_hash_perturb); 1633 1634 u32 __skb_get_poff(const struct sk_buff *skb, void *data, 1635 const struct flow_keys_basic *keys, int hlen) 1636 { 1637 u32 poff = keys->control.thoff; 1638 1639 /* skip L4 headers for fragments after the first */ 1640 if ((keys->control.flags & FLOW_DIS_IS_FRAGMENT) && 1641 !(keys->control.flags & FLOW_DIS_FIRST_FRAG)) 1642 return poff; 1643 1644 switch (keys->basic.ip_proto) { 1645 case IPPROTO_TCP: { 1646 /* access doff as u8 to avoid unaligned access */ 1647 const u8 *doff; 1648 u8 _doff; 1649 1650 doff = __skb_header_pointer(skb, poff + 12, sizeof(_doff), 1651 data, hlen, &_doff); 1652 if (!doff) 1653 return poff; 1654 1655 poff += max_t(u32, sizeof(struct tcphdr), (*doff & 0xF0) >> 2); 1656 break; 1657 } 1658 case IPPROTO_UDP: 1659 case IPPROTO_UDPLITE: 1660 poff += sizeof(struct udphdr); 1661 break; 1662 /* For the rest, we do not really care about header 1663 * extensions at this point for now. 1664 */ 1665 case IPPROTO_ICMP: 1666 poff += sizeof(struct icmphdr); 1667 break; 1668 case IPPROTO_ICMPV6: 1669 poff += sizeof(struct icmp6hdr); 1670 break; 1671 case IPPROTO_IGMP: 1672 poff += sizeof(struct igmphdr); 1673 break; 1674 case IPPROTO_DCCP: 1675 poff += sizeof(struct dccp_hdr); 1676 break; 1677 case IPPROTO_SCTP: 1678 poff += sizeof(struct sctphdr); 1679 break; 1680 } 1681 1682 return poff; 1683 } 1684 1685 /** 1686 * skb_get_poff - get the offset to the payload 1687 * @skb: sk_buff to get the payload offset from 1688 * 1689 * The function will get the offset to the payload as far as it could 1690 * be dissected. The main user is currently BPF, so that we can dynamically 1691 * truncate packets without needing to push actual payload to the user 1692 * space and can analyze headers only, instead. 1693 */ 1694 u32 skb_get_poff(const struct sk_buff *skb) 1695 { 1696 struct flow_keys_basic keys; 1697 1698 if (!skb_flow_dissect_flow_keys_basic(NULL, skb, &keys, 1699 NULL, 0, 0, 0, 0)) 1700 return 0; 1701 1702 return __skb_get_poff(skb, skb->data, &keys, skb_headlen(skb)); 1703 } 1704 1705 __u32 __get_hash_from_flowi6(const struct flowi6 *fl6, struct flow_keys *keys) 1706 { 1707 memset(keys, 0, sizeof(*keys)); 1708 1709 memcpy(&keys->addrs.v6addrs.src, &fl6->saddr, 1710 sizeof(keys->addrs.v6addrs.src)); 1711 memcpy(&keys->addrs.v6addrs.dst, &fl6->daddr, 1712 sizeof(keys->addrs.v6addrs.dst)); 1713 keys->control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS; 1714 keys->ports.src = fl6->fl6_sport; 1715 keys->ports.dst = fl6->fl6_dport; 1716 keys->keyid.keyid = fl6->fl6_gre_key; 1717 keys->tags.flow_label = (__force u32)flowi6_get_flowlabel(fl6); 1718 keys->basic.ip_proto = fl6->flowi6_proto; 1719 1720 return flow_hash_from_keys(keys); 1721 } 1722 EXPORT_SYMBOL(__get_hash_from_flowi6); 1723 1724 static const struct flow_dissector_key flow_keys_dissector_keys[] = { 1725 { 1726 .key_id = FLOW_DISSECTOR_KEY_CONTROL, 1727 .offset = offsetof(struct flow_keys, control), 1728 }, 1729 { 1730 .key_id = FLOW_DISSECTOR_KEY_BASIC, 1731 .offset = offsetof(struct flow_keys, basic), 1732 }, 1733 { 1734 .key_id = FLOW_DISSECTOR_KEY_IPV4_ADDRS, 1735 .offset = offsetof(struct flow_keys, addrs.v4addrs), 1736 }, 1737 { 1738 .key_id = FLOW_DISSECTOR_KEY_IPV6_ADDRS, 1739 .offset = offsetof(struct flow_keys, addrs.v6addrs), 1740 }, 1741 { 1742 .key_id = FLOW_DISSECTOR_KEY_TIPC, 1743 .offset = offsetof(struct flow_keys, addrs.tipckey), 1744 }, 1745 { 1746 .key_id = FLOW_DISSECTOR_KEY_PORTS, 1747 .offset = offsetof(struct flow_keys, ports), 1748 }, 1749 { 1750 .key_id = FLOW_DISSECTOR_KEY_VLAN, 1751 .offset = offsetof(struct flow_keys, vlan), 1752 }, 1753 { 1754 .key_id = FLOW_DISSECTOR_KEY_FLOW_LABEL, 1755 .offset = offsetof(struct flow_keys, tags), 1756 }, 1757 { 1758 .key_id = FLOW_DISSECTOR_KEY_GRE_KEYID, 1759 .offset = offsetof(struct flow_keys, keyid), 1760 }, 1761 }; 1762 1763 static const struct flow_dissector_key flow_keys_dissector_symmetric_keys[] = { 1764 { 1765 .key_id = FLOW_DISSECTOR_KEY_CONTROL, 1766 .offset = offsetof(struct flow_keys, control), 1767 }, 1768 { 1769 .key_id = FLOW_DISSECTOR_KEY_BASIC, 1770 .offset = offsetof(struct flow_keys, basic), 1771 }, 1772 { 1773 .key_id = FLOW_DISSECTOR_KEY_IPV4_ADDRS, 1774 .offset = offsetof(struct flow_keys, addrs.v4addrs), 1775 }, 1776 { 1777 .key_id = FLOW_DISSECTOR_KEY_IPV6_ADDRS, 1778 .offset = offsetof(struct flow_keys, addrs.v6addrs), 1779 }, 1780 { 1781 .key_id = FLOW_DISSECTOR_KEY_PORTS, 1782 .offset = offsetof(struct flow_keys, ports), 1783 }, 1784 }; 1785 1786 static const struct flow_dissector_key flow_keys_basic_dissector_keys[] = { 1787 { 1788 .key_id = FLOW_DISSECTOR_KEY_CONTROL, 1789 .offset = offsetof(struct flow_keys, control), 1790 }, 1791 { 1792 .key_id = FLOW_DISSECTOR_KEY_BASIC, 1793 .offset = offsetof(struct flow_keys, basic), 1794 }, 1795 }; 1796 1797 struct flow_dissector flow_keys_dissector __read_mostly; 1798 EXPORT_SYMBOL(flow_keys_dissector); 1799 1800 struct flow_dissector flow_keys_basic_dissector __read_mostly; 1801 EXPORT_SYMBOL(flow_keys_basic_dissector); 1802 1803 static int __init init_default_flow_dissectors(void) 1804 { 1805 skb_flow_dissector_init(&flow_keys_dissector, 1806 flow_keys_dissector_keys, 1807 ARRAY_SIZE(flow_keys_dissector_keys)); 1808 skb_flow_dissector_init(&flow_keys_dissector_symmetric, 1809 flow_keys_dissector_symmetric_keys, 1810 ARRAY_SIZE(flow_keys_dissector_symmetric_keys)); 1811 skb_flow_dissector_init(&flow_keys_basic_dissector, 1812 flow_keys_basic_dissector_keys, 1813 ARRAY_SIZE(flow_keys_basic_dissector_keys)); 1814 return 0; 1815 } 1816 1817 core_initcall(init_default_flow_dissectors); 1818