1 /* Copyright (c) 2014 Mahesh Bandewar <maheshb@google.com> 2 * 3 * This program is free software; you can redistribute it and/or 4 * modify it under the terms of the GNU General Public License as 5 * published by the Free Software Foundation; either version 2 of 6 * the License, or (at your option) any later version. 7 * 8 */ 9 10 #include "ipvlan.h" 11 12 static u32 ipvlan_jhash_secret __read_mostly; 13 14 void ipvlan_init_secret(void) 15 { 16 net_get_random_once(&ipvlan_jhash_secret, sizeof(ipvlan_jhash_secret)); 17 } 18 19 void ipvlan_count_rx(const struct ipvl_dev *ipvlan, 20 unsigned int len, bool success, bool mcast) 21 { 22 if (likely(success)) { 23 struct ipvl_pcpu_stats *pcptr; 24 25 pcptr = this_cpu_ptr(ipvlan->pcpu_stats); 26 u64_stats_update_begin(&pcptr->syncp); 27 pcptr->rx_pkts++; 28 pcptr->rx_bytes += len; 29 if (mcast) 30 pcptr->rx_mcast++; 31 u64_stats_update_end(&pcptr->syncp); 32 } else { 33 this_cpu_inc(ipvlan->pcpu_stats->rx_errs); 34 } 35 } 36 EXPORT_SYMBOL_GPL(ipvlan_count_rx); 37 38 static u8 ipvlan_get_v6_hash(const void *iaddr) 39 { 40 const struct in6_addr *ip6_addr = iaddr; 41 42 return __ipv6_addr_jhash(ip6_addr, ipvlan_jhash_secret) & 43 IPVLAN_HASH_MASK; 44 } 45 46 static u8 ipvlan_get_v4_hash(const void *iaddr) 47 { 48 const struct in_addr *ip4_addr = iaddr; 49 50 return jhash_1word(ip4_addr->s_addr, ipvlan_jhash_secret) & 51 IPVLAN_HASH_MASK; 52 } 53 54 static struct ipvl_addr *ipvlan_ht_addr_lookup(const struct ipvl_port *port, 55 const void *iaddr, bool is_v6) 56 { 57 struct ipvl_addr *addr; 58 u8 hash; 59 60 hash = is_v6 ? ipvlan_get_v6_hash(iaddr) : 61 ipvlan_get_v4_hash(iaddr); 62 hlist_for_each_entry_rcu(addr, &port->hlhead[hash], hlnode) { 63 if (is_v6 && addr->atype == IPVL_IPV6 && 64 ipv6_addr_equal(&addr->ip6addr, iaddr)) 65 return addr; 66 else if (!is_v6 && addr->atype == IPVL_IPV4 && 67 addr->ip4addr.s_addr == 68 ((struct in_addr *)iaddr)->s_addr) 69 return addr; 70 } 71 return NULL; 72 } 73 74 void ipvlan_ht_addr_add(struct ipvl_dev *ipvlan, struct ipvl_addr *addr) 75 { 76 struct ipvl_port *port = ipvlan->port; 77 u8 hash; 78 79 hash = (addr->atype == IPVL_IPV6) ? 80 ipvlan_get_v6_hash(&addr->ip6addr) : 81 ipvlan_get_v4_hash(&addr->ip4addr); 82 if (hlist_unhashed(&addr->hlnode)) 83 hlist_add_head_rcu(&addr->hlnode, &port->hlhead[hash]); 84 } 85 86 void ipvlan_ht_addr_del(struct ipvl_addr *addr) 87 { 88 hlist_del_init_rcu(&addr->hlnode); 89 } 90 91 struct ipvl_addr *ipvlan_find_addr(const struct ipvl_dev *ipvlan, 92 const void *iaddr, bool is_v6) 93 { 94 struct ipvl_addr *addr; 95 96 list_for_each_entry(addr, &ipvlan->addrs, anode) { 97 if ((is_v6 && addr->atype == IPVL_IPV6 && 98 ipv6_addr_equal(&addr->ip6addr, iaddr)) || 99 (!is_v6 && addr->atype == IPVL_IPV4 && 100 addr->ip4addr.s_addr == ((struct in_addr *)iaddr)->s_addr)) 101 return addr; 102 } 103 return NULL; 104 } 105 106 bool ipvlan_addr_busy(struct ipvl_port *port, void *iaddr, bool is_v6) 107 { 108 struct ipvl_dev *ipvlan; 109 110 ASSERT_RTNL(); 111 112 list_for_each_entry(ipvlan, &port->ipvlans, pnode) { 113 if (ipvlan_find_addr(ipvlan, iaddr, is_v6)) 114 return true; 115 } 116 return false; 117 } 118 119 static void *ipvlan_get_L3_hdr(struct ipvl_port *port, struct sk_buff *skb, int *type) 120 { 121 void *lyr3h = NULL; 122 123 switch (skb->protocol) { 124 case htons(ETH_P_ARP): { 125 struct arphdr *arph; 126 127 if (unlikely(!pskb_may_pull(skb, arp_hdr_len(port->dev)))) 128 return NULL; 129 130 arph = arp_hdr(skb); 131 *type = IPVL_ARP; 132 lyr3h = arph; 133 break; 134 } 135 case htons(ETH_P_IP): { 136 u32 pktlen; 137 struct iphdr *ip4h; 138 139 if (unlikely(!pskb_may_pull(skb, sizeof(*ip4h)))) 140 return NULL; 141 142 ip4h = ip_hdr(skb); 143 pktlen = ntohs(ip4h->tot_len); 144 if (ip4h->ihl < 5 || ip4h->version != 4) 145 return NULL; 146 if (skb->len < pktlen || pktlen < (ip4h->ihl * 4)) 147 return NULL; 148 149 *type = IPVL_IPV4; 150 lyr3h = ip4h; 151 break; 152 } 153 case htons(ETH_P_IPV6): { 154 struct ipv6hdr *ip6h; 155 156 if (unlikely(!pskb_may_pull(skb, sizeof(*ip6h)))) 157 return NULL; 158 159 ip6h = ipv6_hdr(skb); 160 if (ip6h->version != 6) 161 return NULL; 162 163 *type = IPVL_IPV6; 164 lyr3h = ip6h; 165 /* Only Neighbour Solicitation pkts need different treatment */ 166 if (ipv6_addr_any(&ip6h->saddr) && 167 ip6h->nexthdr == NEXTHDR_ICMP) { 168 struct icmp6hdr *icmph; 169 170 if (unlikely(!pskb_may_pull(skb, sizeof(*ip6h) + sizeof(*icmph)))) 171 return NULL; 172 173 ip6h = ipv6_hdr(skb); 174 icmph = (struct icmp6hdr *)(ip6h + 1); 175 176 if (icmph->icmp6_type == NDISC_NEIGHBOUR_SOLICITATION) { 177 /* Need to access the ipv6 address in body */ 178 if (unlikely(!pskb_may_pull(skb, sizeof(*ip6h) + sizeof(*icmph) 179 + sizeof(struct in6_addr)))) 180 return NULL; 181 182 ip6h = ipv6_hdr(skb); 183 icmph = (struct icmp6hdr *)(ip6h + 1); 184 } 185 186 *type = IPVL_ICMPV6; 187 lyr3h = icmph; 188 } 189 break; 190 } 191 default: 192 return NULL; 193 } 194 195 return lyr3h; 196 } 197 198 unsigned int ipvlan_mac_hash(const unsigned char *addr) 199 { 200 u32 hash = jhash_1word(__get_unaligned_cpu32(addr+2), 201 ipvlan_jhash_secret); 202 203 return hash & IPVLAN_MAC_FILTER_MASK; 204 } 205 206 void ipvlan_process_multicast(struct work_struct *work) 207 { 208 struct ipvl_port *port = container_of(work, struct ipvl_port, wq); 209 struct ethhdr *ethh; 210 struct ipvl_dev *ipvlan; 211 struct sk_buff *skb, *nskb; 212 struct sk_buff_head list; 213 unsigned int len; 214 unsigned int mac_hash; 215 int ret; 216 u8 pkt_type; 217 bool tx_pkt; 218 219 __skb_queue_head_init(&list); 220 221 spin_lock_bh(&port->backlog.lock); 222 skb_queue_splice_tail_init(&port->backlog, &list); 223 spin_unlock_bh(&port->backlog.lock); 224 225 while ((skb = __skb_dequeue(&list)) != NULL) { 226 struct net_device *dev = skb->dev; 227 bool consumed = false; 228 229 ethh = eth_hdr(skb); 230 tx_pkt = IPVL_SKB_CB(skb)->tx_pkt; 231 mac_hash = ipvlan_mac_hash(ethh->h_dest); 232 233 if (ether_addr_equal(ethh->h_dest, port->dev->broadcast)) 234 pkt_type = PACKET_BROADCAST; 235 else 236 pkt_type = PACKET_MULTICAST; 237 238 rcu_read_lock(); 239 list_for_each_entry_rcu(ipvlan, &port->ipvlans, pnode) { 240 if (tx_pkt && (ipvlan->dev == skb->dev)) 241 continue; 242 if (!test_bit(mac_hash, ipvlan->mac_filters)) 243 continue; 244 if (!(ipvlan->dev->flags & IFF_UP)) 245 continue; 246 ret = NET_RX_DROP; 247 len = skb->len + ETH_HLEN; 248 nskb = skb_clone(skb, GFP_ATOMIC); 249 local_bh_disable(); 250 if (nskb) { 251 consumed = true; 252 nskb->pkt_type = pkt_type; 253 nskb->dev = ipvlan->dev; 254 if (tx_pkt) 255 ret = dev_forward_skb(ipvlan->dev, nskb); 256 else 257 ret = netif_rx(nskb); 258 } 259 ipvlan_count_rx(ipvlan, len, ret == NET_RX_SUCCESS, true); 260 local_bh_enable(); 261 } 262 rcu_read_unlock(); 263 264 if (tx_pkt) { 265 /* If the packet originated here, send it out. */ 266 skb->dev = port->dev; 267 skb->pkt_type = pkt_type; 268 dev_queue_xmit(skb); 269 } else { 270 if (consumed) 271 consume_skb(skb); 272 else 273 kfree_skb(skb); 274 } 275 if (dev) 276 dev_put(dev); 277 } 278 } 279 280 static void ipvlan_skb_crossing_ns(struct sk_buff *skb, struct net_device *dev) 281 { 282 bool xnet = true; 283 284 if (dev) 285 xnet = !net_eq(dev_net(skb->dev), dev_net(dev)); 286 287 skb_scrub_packet(skb, xnet); 288 if (dev) 289 skb->dev = dev; 290 } 291 292 static int ipvlan_rcv_frame(struct ipvl_addr *addr, struct sk_buff **pskb, 293 bool local) 294 { 295 struct ipvl_dev *ipvlan = addr->master; 296 struct net_device *dev = ipvlan->dev; 297 unsigned int len; 298 rx_handler_result_t ret = RX_HANDLER_CONSUMED; 299 bool success = false; 300 struct sk_buff *skb = *pskb; 301 302 len = skb->len + ETH_HLEN; 303 /* Only packets exchanged between two local slaves need to have 304 * device-up check as well as skb-share check. 305 */ 306 if (local) { 307 if (unlikely(!(dev->flags & IFF_UP))) { 308 kfree_skb(skb); 309 goto out; 310 } 311 312 skb = skb_share_check(skb, GFP_ATOMIC); 313 if (!skb) 314 goto out; 315 316 *pskb = skb; 317 } 318 319 if (local) { 320 skb->pkt_type = PACKET_HOST; 321 if (dev_forward_skb(ipvlan->dev, skb) == NET_RX_SUCCESS) 322 success = true; 323 } else { 324 skb->dev = dev; 325 ret = RX_HANDLER_ANOTHER; 326 success = true; 327 } 328 329 out: 330 ipvlan_count_rx(ipvlan, len, success, false); 331 return ret; 332 } 333 334 static struct ipvl_addr *ipvlan_addr_lookup(struct ipvl_port *port, 335 void *lyr3h, int addr_type, 336 bool use_dest) 337 { 338 struct ipvl_addr *addr = NULL; 339 340 if (addr_type == IPVL_IPV6) { 341 struct ipv6hdr *ip6h; 342 struct in6_addr *i6addr; 343 344 ip6h = (struct ipv6hdr *)lyr3h; 345 i6addr = use_dest ? &ip6h->daddr : &ip6h->saddr; 346 addr = ipvlan_ht_addr_lookup(port, i6addr, true); 347 } else if (addr_type == IPVL_ICMPV6) { 348 struct nd_msg *ndmh; 349 struct in6_addr *i6addr; 350 351 /* Make sure that the NeighborSolicitation ICMPv6 packets 352 * are handled to avoid DAD issue. 353 */ 354 ndmh = (struct nd_msg *)lyr3h; 355 if (ndmh->icmph.icmp6_type == NDISC_NEIGHBOUR_SOLICITATION) { 356 i6addr = &ndmh->target; 357 addr = ipvlan_ht_addr_lookup(port, i6addr, true); 358 } 359 } else if (addr_type == IPVL_IPV4) { 360 struct iphdr *ip4h; 361 __be32 *i4addr; 362 363 ip4h = (struct iphdr *)lyr3h; 364 i4addr = use_dest ? &ip4h->daddr : &ip4h->saddr; 365 addr = ipvlan_ht_addr_lookup(port, i4addr, false); 366 } else if (addr_type == IPVL_ARP) { 367 struct arphdr *arph; 368 unsigned char *arp_ptr; 369 __be32 dip; 370 371 arph = (struct arphdr *)lyr3h; 372 arp_ptr = (unsigned char *)(arph + 1); 373 if (use_dest) 374 arp_ptr += (2 * port->dev->addr_len) + 4; 375 else 376 arp_ptr += port->dev->addr_len; 377 378 memcpy(&dip, arp_ptr, 4); 379 addr = ipvlan_ht_addr_lookup(port, &dip, false); 380 } 381 382 return addr; 383 } 384 385 static int ipvlan_process_v4_outbound(struct sk_buff *skb) 386 { 387 const struct iphdr *ip4h = ip_hdr(skb); 388 struct net_device *dev = skb->dev; 389 struct net *net = dev_net(dev); 390 struct rtable *rt; 391 int err, ret = NET_XMIT_DROP; 392 struct flowi4 fl4 = { 393 .flowi4_oif = dev->ifindex, 394 .flowi4_tos = RT_TOS(ip4h->tos), 395 .flowi4_flags = FLOWI_FLAG_ANYSRC, 396 .flowi4_mark = skb->mark, 397 .daddr = ip4h->daddr, 398 .saddr = ip4h->saddr, 399 }; 400 401 rt = ip_route_output_flow(net, &fl4, NULL); 402 if (IS_ERR(rt)) 403 goto err; 404 405 if (rt->rt_type != RTN_UNICAST && rt->rt_type != RTN_LOCAL) { 406 ip_rt_put(rt); 407 goto err; 408 } 409 skb_dst_set(skb, &rt->dst); 410 err = ip_local_out(net, skb->sk, skb); 411 if (unlikely(net_xmit_eval(err))) 412 dev->stats.tx_errors++; 413 else 414 ret = NET_XMIT_SUCCESS; 415 goto out; 416 err: 417 dev->stats.tx_errors++; 418 kfree_skb(skb); 419 out: 420 return ret; 421 } 422 423 static int ipvlan_process_v6_outbound(struct sk_buff *skb) 424 { 425 const struct ipv6hdr *ip6h = ipv6_hdr(skb); 426 struct net_device *dev = skb->dev; 427 struct net *net = dev_net(dev); 428 struct dst_entry *dst; 429 int err, ret = NET_XMIT_DROP; 430 struct flowi6 fl6 = { 431 .flowi6_oif = dev->ifindex, 432 .daddr = ip6h->daddr, 433 .saddr = ip6h->saddr, 434 .flowi6_flags = FLOWI_FLAG_ANYSRC, 435 .flowlabel = ip6_flowinfo(ip6h), 436 .flowi6_mark = skb->mark, 437 .flowi6_proto = ip6h->nexthdr, 438 }; 439 440 dst = ip6_route_output(net, NULL, &fl6); 441 if (dst->error) { 442 ret = dst->error; 443 dst_release(dst); 444 goto err; 445 } 446 skb_dst_set(skb, dst); 447 err = ip6_local_out(net, skb->sk, skb); 448 if (unlikely(net_xmit_eval(err))) 449 dev->stats.tx_errors++; 450 else 451 ret = NET_XMIT_SUCCESS; 452 goto out; 453 err: 454 dev->stats.tx_errors++; 455 kfree_skb(skb); 456 out: 457 return ret; 458 } 459 460 static int ipvlan_process_outbound(struct sk_buff *skb) 461 { 462 struct ethhdr *ethh = eth_hdr(skb); 463 int ret = NET_XMIT_DROP; 464 465 /* In this mode we dont care about multicast and broadcast traffic */ 466 if (is_multicast_ether_addr(ethh->h_dest)) { 467 pr_warn_ratelimited("Dropped {multi|broad}cast of type= [%x]\n", 468 ntohs(skb->protocol)); 469 kfree_skb(skb); 470 goto out; 471 } 472 473 /* The ipvlan is a pseudo-L2 device, so the packets that we receive 474 * will have L2; which need to discarded and processed further 475 * in the net-ns of the main-device. 476 */ 477 if (skb_mac_header_was_set(skb)) { 478 skb_pull(skb, sizeof(*ethh)); 479 skb->mac_header = (typeof(skb->mac_header))~0U; 480 skb_reset_network_header(skb); 481 } 482 483 if (skb->protocol == htons(ETH_P_IPV6)) 484 ret = ipvlan_process_v6_outbound(skb); 485 else if (skb->protocol == htons(ETH_P_IP)) 486 ret = ipvlan_process_v4_outbound(skb); 487 else { 488 pr_warn_ratelimited("Dropped outbound packet type=%x\n", 489 ntohs(skb->protocol)); 490 kfree_skb(skb); 491 } 492 out: 493 return ret; 494 } 495 496 static void ipvlan_multicast_enqueue(struct ipvl_port *port, 497 struct sk_buff *skb, bool tx_pkt) 498 { 499 if (skb->protocol == htons(ETH_P_PAUSE)) { 500 kfree_skb(skb); 501 return; 502 } 503 504 /* Record that the deferred packet is from TX or RX path. By 505 * looking at mac-addresses on packet will lead to erronus decisions. 506 * (This would be true for a loopback-mode on master device or a 507 * hair-pin mode of the switch.) 508 */ 509 IPVL_SKB_CB(skb)->tx_pkt = tx_pkt; 510 511 spin_lock(&port->backlog.lock); 512 if (skb_queue_len(&port->backlog) < IPVLAN_QBACKLOG_LIMIT) { 513 if (skb->dev) 514 dev_hold(skb->dev); 515 __skb_queue_tail(&port->backlog, skb); 516 spin_unlock(&port->backlog.lock); 517 schedule_work(&port->wq); 518 } else { 519 spin_unlock(&port->backlog.lock); 520 atomic_long_inc(&skb->dev->rx_dropped); 521 kfree_skb(skb); 522 } 523 } 524 525 static int ipvlan_xmit_mode_l3(struct sk_buff *skb, struct net_device *dev) 526 { 527 const struct ipvl_dev *ipvlan = netdev_priv(dev); 528 void *lyr3h; 529 struct ipvl_addr *addr; 530 int addr_type; 531 532 lyr3h = ipvlan_get_L3_hdr(ipvlan->port, skb, &addr_type); 533 if (!lyr3h) 534 goto out; 535 536 if (!ipvlan_is_vepa(ipvlan->port)) { 537 addr = ipvlan_addr_lookup(ipvlan->port, lyr3h, addr_type, true); 538 if (addr) { 539 if (ipvlan_is_private(ipvlan->port)) { 540 consume_skb(skb); 541 return NET_XMIT_DROP; 542 } 543 return ipvlan_rcv_frame(addr, &skb, true); 544 } 545 } 546 out: 547 ipvlan_skb_crossing_ns(skb, ipvlan->phy_dev); 548 return ipvlan_process_outbound(skb); 549 } 550 551 static int ipvlan_xmit_mode_l2(struct sk_buff *skb, struct net_device *dev) 552 { 553 const struct ipvl_dev *ipvlan = netdev_priv(dev); 554 struct ethhdr *eth = eth_hdr(skb); 555 struct ipvl_addr *addr; 556 void *lyr3h; 557 int addr_type; 558 559 if (!ipvlan_is_vepa(ipvlan->port) && 560 ether_addr_equal(eth->h_dest, eth->h_source)) { 561 lyr3h = ipvlan_get_L3_hdr(ipvlan->port, skb, &addr_type); 562 if (lyr3h) { 563 addr = ipvlan_addr_lookup(ipvlan->port, lyr3h, addr_type, true); 564 if (addr) { 565 if (ipvlan_is_private(ipvlan->port)) { 566 consume_skb(skb); 567 return NET_XMIT_DROP; 568 } 569 return ipvlan_rcv_frame(addr, &skb, true); 570 } 571 } 572 skb = skb_share_check(skb, GFP_ATOMIC); 573 if (!skb) 574 return NET_XMIT_DROP; 575 576 /* Packet definitely does not belong to any of the 577 * virtual devices, but the dest is local. So forward 578 * the skb for the main-dev. At the RX side we just return 579 * RX_PASS for it to be processed further on the stack. 580 */ 581 return dev_forward_skb(ipvlan->phy_dev, skb); 582 583 } else if (is_multicast_ether_addr(eth->h_dest)) { 584 ipvlan_skb_crossing_ns(skb, NULL); 585 ipvlan_multicast_enqueue(ipvlan->port, skb, true); 586 return NET_XMIT_SUCCESS; 587 } 588 589 skb->dev = ipvlan->phy_dev; 590 return dev_queue_xmit(skb); 591 } 592 593 int ipvlan_queue_xmit(struct sk_buff *skb, struct net_device *dev) 594 { 595 struct ipvl_dev *ipvlan = netdev_priv(dev); 596 struct ipvl_port *port = ipvlan_port_get_rcu_bh(ipvlan->phy_dev); 597 598 if (!port) 599 goto out; 600 601 if (unlikely(!pskb_may_pull(skb, sizeof(struct ethhdr)))) 602 goto out; 603 604 switch(port->mode) { 605 case IPVLAN_MODE_L2: 606 return ipvlan_xmit_mode_l2(skb, dev); 607 case IPVLAN_MODE_L3: 608 case IPVLAN_MODE_L3S: 609 return ipvlan_xmit_mode_l3(skb, dev); 610 } 611 612 /* Should not reach here */ 613 WARN_ONCE(true, "ipvlan_queue_xmit() called for mode = [%hx]\n", 614 port->mode); 615 out: 616 kfree_skb(skb); 617 return NET_XMIT_DROP; 618 } 619 620 static bool ipvlan_external_frame(struct sk_buff *skb, struct ipvl_port *port) 621 { 622 struct ethhdr *eth = eth_hdr(skb); 623 struct ipvl_addr *addr; 624 void *lyr3h; 625 int addr_type; 626 627 if (ether_addr_equal(eth->h_source, skb->dev->dev_addr)) { 628 lyr3h = ipvlan_get_L3_hdr(port, skb, &addr_type); 629 if (!lyr3h) 630 return true; 631 632 addr = ipvlan_addr_lookup(port, lyr3h, addr_type, false); 633 if (addr) 634 return false; 635 } 636 637 return true; 638 } 639 640 static rx_handler_result_t ipvlan_handle_mode_l3(struct sk_buff **pskb, 641 struct ipvl_port *port) 642 { 643 void *lyr3h; 644 int addr_type; 645 struct ipvl_addr *addr; 646 struct sk_buff *skb = *pskb; 647 rx_handler_result_t ret = RX_HANDLER_PASS; 648 649 lyr3h = ipvlan_get_L3_hdr(port, skb, &addr_type); 650 if (!lyr3h) 651 goto out; 652 653 addr = ipvlan_addr_lookup(port, lyr3h, addr_type, true); 654 if (addr) 655 ret = ipvlan_rcv_frame(addr, pskb, false); 656 657 out: 658 return ret; 659 } 660 661 static rx_handler_result_t ipvlan_handle_mode_l2(struct sk_buff **pskb, 662 struct ipvl_port *port) 663 { 664 struct sk_buff *skb = *pskb; 665 struct ethhdr *eth = eth_hdr(skb); 666 rx_handler_result_t ret = RX_HANDLER_PASS; 667 668 if (is_multicast_ether_addr(eth->h_dest)) { 669 if (ipvlan_external_frame(skb, port)) { 670 struct sk_buff *nskb = skb_clone(skb, GFP_ATOMIC); 671 672 /* External frames are queued for device local 673 * distribution, but a copy is given to master 674 * straight away to avoid sending duplicates later 675 * when work-queue processes this frame. This is 676 * achieved by returning RX_HANDLER_PASS. 677 */ 678 if (nskb) { 679 ipvlan_skb_crossing_ns(nskb, NULL); 680 ipvlan_multicast_enqueue(port, nskb, false); 681 } 682 } 683 } else { 684 /* Perform like l3 mode for non-multicast packet */ 685 ret = ipvlan_handle_mode_l3(pskb, port); 686 } 687 688 return ret; 689 } 690 691 rx_handler_result_t ipvlan_handle_frame(struct sk_buff **pskb) 692 { 693 struct sk_buff *skb = *pskb; 694 struct ipvl_port *port = ipvlan_port_get_rcu(skb->dev); 695 696 if (!port) 697 return RX_HANDLER_PASS; 698 699 switch (port->mode) { 700 case IPVLAN_MODE_L2: 701 return ipvlan_handle_mode_l2(pskb, port); 702 case IPVLAN_MODE_L3: 703 return ipvlan_handle_mode_l3(pskb, port); 704 case IPVLAN_MODE_L3S: 705 return RX_HANDLER_PASS; 706 } 707 708 /* Should not reach here */ 709 WARN_ONCE(true, "ipvlan_handle_frame() called for mode = [%hx]\n", 710 port->mode); 711 kfree_skb(skb); 712 return RX_HANDLER_CONSUMED; 713 } 714 715 static struct ipvl_addr *ipvlan_skb_to_addr(struct sk_buff *skb, 716 struct net_device *dev) 717 { 718 struct ipvl_addr *addr = NULL; 719 struct ipvl_port *port; 720 void *lyr3h; 721 int addr_type; 722 723 if (!dev || !netif_is_ipvlan_port(dev)) 724 goto out; 725 726 port = ipvlan_port_get_rcu(dev); 727 if (!port || port->mode != IPVLAN_MODE_L3S) 728 goto out; 729 730 lyr3h = ipvlan_get_L3_hdr(port, skb, &addr_type); 731 if (!lyr3h) 732 goto out; 733 734 addr = ipvlan_addr_lookup(port, lyr3h, addr_type, true); 735 out: 736 return addr; 737 } 738 739 struct sk_buff *ipvlan_l3_rcv(struct net_device *dev, struct sk_buff *skb, 740 u16 proto) 741 { 742 struct ipvl_addr *addr; 743 struct net_device *sdev; 744 745 addr = ipvlan_skb_to_addr(skb, dev); 746 if (!addr) 747 goto out; 748 749 sdev = addr->master->dev; 750 switch (proto) { 751 case AF_INET: 752 { 753 int err; 754 struct iphdr *ip4h = ip_hdr(skb); 755 756 err = ip_route_input_noref(skb, ip4h->daddr, ip4h->saddr, 757 ip4h->tos, sdev); 758 if (unlikely(err)) 759 goto out; 760 break; 761 } 762 case AF_INET6: 763 { 764 struct dst_entry *dst; 765 struct ipv6hdr *ip6h = ipv6_hdr(skb); 766 int flags = RT6_LOOKUP_F_HAS_SADDR; 767 struct flowi6 fl6 = { 768 .flowi6_iif = sdev->ifindex, 769 .daddr = ip6h->daddr, 770 .saddr = ip6h->saddr, 771 .flowlabel = ip6_flowinfo(ip6h), 772 .flowi6_mark = skb->mark, 773 .flowi6_proto = ip6h->nexthdr, 774 }; 775 776 skb_dst_drop(skb); 777 dst = ip6_route_input_lookup(dev_net(sdev), sdev, &fl6, flags); 778 skb_dst_set(skb, dst); 779 break; 780 } 781 default: 782 break; 783 } 784 785 out: 786 return skb; 787 } 788 789 unsigned int ipvlan_nf_input(void *priv, struct sk_buff *skb, 790 const struct nf_hook_state *state) 791 { 792 struct ipvl_addr *addr; 793 unsigned int len; 794 795 addr = ipvlan_skb_to_addr(skb, skb->dev); 796 if (!addr) 797 goto out; 798 799 skb->dev = addr->master->dev; 800 len = skb->len + ETH_HLEN; 801 ipvlan_count_rx(addr->master, len, true, false); 802 out: 803 return NF_ACCEPT; 804 } 805