1 /* Copyright (c) 2014 Mahesh Bandewar <maheshb@google.com> 2 * 3 * This program is free software; you can redistribute it and/or 4 * modify it under the terms of the GNU General Public License as 5 * published by the Free Software Foundation; either version 2 of 6 * the License, or (at your option) any later version. 7 * 8 */ 9 10 #include "ipvlan.h" 11 12 static u32 ipvlan_jhash_secret __read_mostly; 13 14 void ipvlan_init_secret(void) 15 { 16 net_get_random_once(&ipvlan_jhash_secret, sizeof(ipvlan_jhash_secret)); 17 } 18 19 void ipvlan_count_rx(const struct ipvl_dev *ipvlan, 20 unsigned int len, bool success, bool mcast) 21 { 22 if (likely(success)) { 23 struct ipvl_pcpu_stats *pcptr; 24 25 pcptr = this_cpu_ptr(ipvlan->pcpu_stats); 26 u64_stats_update_begin(&pcptr->syncp); 27 pcptr->rx_pkts++; 28 pcptr->rx_bytes += len; 29 if (mcast) 30 pcptr->rx_mcast++; 31 u64_stats_update_end(&pcptr->syncp); 32 } else { 33 this_cpu_inc(ipvlan->pcpu_stats->rx_errs); 34 } 35 } 36 EXPORT_SYMBOL_GPL(ipvlan_count_rx); 37 38 static u8 ipvlan_get_v6_hash(const void *iaddr) 39 { 40 const struct in6_addr *ip6_addr = iaddr; 41 42 return __ipv6_addr_jhash(ip6_addr, ipvlan_jhash_secret) & 43 IPVLAN_HASH_MASK; 44 } 45 46 static u8 ipvlan_get_v4_hash(const void *iaddr) 47 { 48 const struct in_addr *ip4_addr = iaddr; 49 50 return jhash_1word(ip4_addr->s_addr, ipvlan_jhash_secret) & 51 IPVLAN_HASH_MASK; 52 } 53 54 static struct ipvl_addr *ipvlan_ht_addr_lookup(const struct ipvl_port *port, 55 const void *iaddr, bool is_v6) 56 { 57 struct ipvl_addr *addr; 58 u8 hash; 59 60 hash = is_v6 ? ipvlan_get_v6_hash(iaddr) : 61 ipvlan_get_v4_hash(iaddr); 62 hlist_for_each_entry_rcu(addr, &port->hlhead[hash], hlnode) { 63 if (is_v6 && addr->atype == IPVL_IPV6 && 64 ipv6_addr_equal(&addr->ip6addr, iaddr)) 65 return addr; 66 else if (!is_v6 && addr->atype == IPVL_IPV4 && 67 addr->ip4addr.s_addr == 68 ((struct in_addr *)iaddr)->s_addr) 69 return addr; 70 } 71 return NULL; 72 } 73 74 void ipvlan_ht_addr_add(struct ipvl_dev *ipvlan, struct ipvl_addr *addr) 75 { 76 struct ipvl_port *port = ipvlan->port; 77 u8 hash; 78 79 hash = (addr->atype == IPVL_IPV6) ? 80 ipvlan_get_v6_hash(&addr->ip6addr) : 81 ipvlan_get_v4_hash(&addr->ip4addr); 82 if (hlist_unhashed(&addr->hlnode)) 83 hlist_add_head_rcu(&addr->hlnode, &port->hlhead[hash]); 84 } 85 86 void ipvlan_ht_addr_del(struct ipvl_addr *addr) 87 { 88 hlist_del_init_rcu(&addr->hlnode); 89 } 90 91 struct ipvl_addr *ipvlan_find_addr(const struct ipvl_dev *ipvlan, 92 const void *iaddr, bool is_v6) 93 { 94 struct ipvl_addr *addr; 95 96 list_for_each_entry(addr, &ipvlan->addrs, anode) { 97 if ((is_v6 && addr->atype == IPVL_IPV6 && 98 ipv6_addr_equal(&addr->ip6addr, iaddr)) || 99 (!is_v6 && addr->atype == IPVL_IPV4 && 100 addr->ip4addr.s_addr == ((struct in_addr *)iaddr)->s_addr)) 101 return addr; 102 } 103 return NULL; 104 } 105 106 bool ipvlan_addr_busy(struct ipvl_port *port, void *iaddr, bool is_v6) 107 { 108 struct ipvl_dev *ipvlan; 109 110 ASSERT_RTNL(); 111 112 list_for_each_entry(ipvlan, &port->ipvlans, pnode) { 113 if (ipvlan_find_addr(ipvlan, iaddr, is_v6)) 114 return true; 115 } 116 return false; 117 } 118 119 static void *ipvlan_get_L3_hdr(struct ipvl_port *port, struct sk_buff *skb, int *type) 120 { 121 void *lyr3h = NULL; 122 123 switch (skb->protocol) { 124 case htons(ETH_P_ARP): { 125 struct arphdr *arph; 126 127 if (unlikely(!pskb_may_pull(skb, arp_hdr_len(port->dev)))) 128 return NULL; 129 130 arph = arp_hdr(skb); 131 *type = IPVL_ARP; 132 lyr3h = arph; 133 break; 134 } 135 case htons(ETH_P_IP): { 136 u32 pktlen; 137 struct iphdr *ip4h; 138 139 if (unlikely(!pskb_may_pull(skb, sizeof(*ip4h)))) 140 return NULL; 141 142 ip4h = ip_hdr(skb); 143 pktlen = ntohs(ip4h->tot_len); 144 if (ip4h->ihl < 5 || ip4h->version != 4) 145 return NULL; 146 if (skb->len < pktlen || pktlen < (ip4h->ihl * 4)) 147 return NULL; 148 149 *type = IPVL_IPV4; 150 lyr3h = ip4h; 151 break; 152 } 153 case htons(ETH_P_IPV6): { 154 struct ipv6hdr *ip6h; 155 156 if (unlikely(!pskb_may_pull(skb, sizeof(*ip6h)))) 157 return NULL; 158 159 ip6h = ipv6_hdr(skb); 160 if (ip6h->version != 6) 161 return NULL; 162 163 *type = IPVL_IPV6; 164 lyr3h = ip6h; 165 /* Only Neighbour Solicitation pkts need different treatment */ 166 if (ipv6_addr_any(&ip6h->saddr) && 167 ip6h->nexthdr == NEXTHDR_ICMP) { 168 struct icmp6hdr *icmph; 169 170 if (unlikely(!pskb_may_pull(skb, sizeof(*ip6h) + sizeof(*icmph)))) 171 return NULL; 172 173 ip6h = ipv6_hdr(skb); 174 icmph = (struct icmp6hdr *)(ip6h + 1); 175 176 if (icmph->icmp6_type == NDISC_NEIGHBOUR_SOLICITATION) { 177 /* Need to access the ipv6 address in body */ 178 if (unlikely(!pskb_may_pull(skb, sizeof(*ip6h) + sizeof(*icmph) 179 + sizeof(struct in6_addr)))) 180 return NULL; 181 182 ip6h = ipv6_hdr(skb); 183 icmph = (struct icmp6hdr *)(ip6h + 1); 184 } 185 186 *type = IPVL_ICMPV6; 187 lyr3h = icmph; 188 } 189 break; 190 } 191 default: 192 return NULL; 193 } 194 195 return lyr3h; 196 } 197 198 unsigned int ipvlan_mac_hash(const unsigned char *addr) 199 { 200 u32 hash = jhash_1word(__get_unaligned_cpu32(addr+2), 201 ipvlan_jhash_secret); 202 203 return hash & IPVLAN_MAC_FILTER_MASK; 204 } 205 206 void ipvlan_process_multicast(struct work_struct *work) 207 { 208 struct ipvl_port *port = container_of(work, struct ipvl_port, wq); 209 struct ethhdr *ethh; 210 struct ipvl_dev *ipvlan; 211 struct sk_buff *skb, *nskb; 212 struct sk_buff_head list; 213 unsigned int len; 214 unsigned int mac_hash; 215 int ret; 216 u8 pkt_type; 217 bool tx_pkt; 218 219 __skb_queue_head_init(&list); 220 221 spin_lock_bh(&port->backlog.lock); 222 skb_queue_splice_tail_init(&port->backlog, &list); 223 spin_unlock_bh(&port->backlog.lock); 224 225 while ((skb = __skb_dequeue(&list)) != NULL) { 226 struct net_device *dev = skb->dev; 227 bool consumed = false; 228 229 ethh = eth_hdr(skb); 230 tx_pkt = IPVL_SKB_CB(skb)->tx_pkt; 231 mac_hash = ipvlan_mac_hash(ethh->h_dest); 232 233 if (ether_addr_equal(ethh->h_dest, port->dev->broadcast)) 234 pkt_type = PACKET_BROADCAST; 235 else 236 pkt_type = PACKET_MULTICAST; 237 238 rcu_read_lock(); 239 list_for_each_entry_rcu(ipvlan, &port->ipvlans, pnode) { 240 if (tx_pkt && (ipvlan->dev == skb->dev)) 241 continue; 242 if (!test_bit(mac_hash, ipvlan->mac_filters)) 243 continue; 244 if (!(ipvlan->dev->flags & IFF_UP)) 245 continue; 246 ret = NET_RX_DROP; 247 len = skb->len + ETH_HLEN; 248 nskb = skb_clone(skb, GFP_ATOMIC); 249 local_bh_disable(); 250 if (nskb) { 251 consumed = true; 252 nskb->pkt_type = pkt_type; 253 nskb->dev = ipvlan->dev; 254 if (tx_pkt) 255 ret = dev_forward_skb(ipvlan->dev, nskb); 256 else 257 ret = netif_rx(nskb); 258 } 259 ipvlan_count_rx(ipvlan, len, ret == NET_RX_SUCCESS, true); 260 local_bh_enable(); 261 } 262 rcu_read_unlock(); 263 264 if (tx_pkt) { 265 /* If the packet originated here, send it out. */ 266 skb->dev = port->dev; 267 skb->pkt_type = pkt_type; 268 dev_queue_xmit(skb); 269 } else { 270 if (consumed) 271 consume_skb(skb); 272 else 273 kfree_skb(skb); 274 } 275 if (dev) 276 dev_put(dev); 277 } 278 } 279 280 static void ipvlan_skb_crossing_ns(struct sk_buff *skb, struct net_device *dev) 281 { 282 bool xnet = true; 283 284 if (dev) 285 xnet = !net_eq(dev_net(skb->dev), dev_net(dev)); 286 287 skb_scrub_packet(skb, xnet); 288 if (dev) 289 skb->dev = dev; 290 } 291 292 static int ipvlan_rcv_frame(struct ipvl_addr *addr, struct sk_buff **pskb, 293 bool local) 294 { 295 struct ipvl_dev *ipvlan = addr->master; 296 struct net_device *dev = ipvlan->dev; 297 unsigned int len; 298 rx_handler_result_t ret = RX_HANDLER_CONSUMED; 299 bool success = false; 300 struct sk_buff *skb = *pskb; 301 302 len = skb->len + ETH_HLEN; 303 /* Only packets exchanged between two local slaves need to have 304 * device-up check as well as skb-share check. 305 */ 306 if (local) { 307 if (unlikely(!(dev->flags & IFF_UP))) { 308 kfree_skb(skb); 309 goto out; 310 } 311 312 skb = skb_share_check(skb, GFP_ATOMIC); 313 if (!skb) 314 goto out; 315 316 *pskb = skb; 317 } 318 ipvlan_skb_crossing_ns(skb, dev); 319 320 if (local) { 321 skb->pkt_type = PACKET_HOST; 322 if (dev_forward_skb(ipvlan->dev, skb) == NET_RX_SUCCESS) 323 success = true; 324 } else { 325 ret = RX_HANDLER_ANOTHER; 326 success = true; 327 } 328 329 out: 330 ipvlan_count_rx(ipvlan, len, success, false); 331 return ret; 332 } 333 334 static struct ipvl_addr *ipvlan_addr_lookup(struct ipvl_port *port, 335 void *lyr3h, int addr_type, 336 bool use_dest) 337 { 338 struct ipvl_addr *addr = NULL; 339 340 if (addr_type == IPVL_IPV6) { 341 struct ipv6hdr *ip6h; 342 struct in6_addr *i6addr; 343 344 ip6h = (struct ipv6hdr *)lyr3h; 345 i6addr = use_dest ? &ip6h->daddr : &ip6h->saddr; 346 addr = ipvlan_ht_addr_lookup(port, i6addr, true); 347 } else if (addr_type == IPVL_ICMPV6) { 348 struct nd_msg *ndmh; 349 struct in6_addr *i6addr; 350 351 /* Make sure that the NeighborSolicitation ICMPv6 packets 352 * are handled to avoid DAD issue. 353 */ 354 ndmh = (struct nd_msg *)lyr3h; 355 if (ndmh->icmph.icmp6_type == NDISC_NEIGHBOUR_SOLICITATION) { 356 i6addr = &ndmh->target; 357 addr = ipvlan_ht_addr_lookup(port, i6addr, true); 358 } 359 } else if (addr_type == IPVL_IPV4) { 360 struct iphdr *ip4h; 361 __be32 *i4addr; 362 363 ip4h = (struct iphdr *)lyr3h; 364 i4addr = use_dest ? &ip4h->daddr : &ip4h->saddr; 365 addr = ipvlan_ht_addr_lookup(port, i4addr, false); 366 } else if (addr_type == IPVL_ARP) { 367 struct arphdr *arph; 368 unsigned char *arp_ptr; 369 __be32 dip; 370 371 arph = (struct arphdr *)lyr3h; 372 arp_ptr = (unsigned char *)(arph + 1); 373 if (use_dest) 374 arp_ptr += (2 * port->dev->addr_len) + 4; 375 else 376 arp_ptr += port->dev->addr_len; 377 378 memcpy(&dip, arp_ptr, 4); 379 addr = ipvlan_ht_addr_lookup(port, &dip, false); 380 } 381 382 return addr; 383 } 384 385 static int ipvlan_process_v4_outbound(struct sk_buff *skb) 386 { 387 const struct iphdr *ip4h = ip_hdr(skb); 388 struct net_device *dev = skb->dev; 389 struct net *net = dev_net(dev); 390 struct rtable *rt; 391 int err, ret = NET_XMIT_DROP; 392 struct flowi4 fl4 = { 393 .flowi4_oif = dev->ifindex, 394 .flowi4_tos = RT_TOS(ip4h->tos), 395 .flowi4_flags = FLOWI_FLAG_ANYSRC, 396 .daddr = ip4h->daddr, 397 .saddr = ip4h->saddr, 398 }; 399 400 rt = ip_route_output_flow(net, &fl4, NULL); 401 if (IS_ERR(rt)) 402 goto err; 403 404 if (rt->rt_type != RTN_UNICAST && rt->rt_type != RTN_LOCAL) { 405 ip_rt_put(rt); 406 goto err; 407 } 408 skb_dst_set(skb, &rt->dst); 409 err = ip_local_out(net, skb->sk, skb); 410 if (unlikely(net_xmit_eval(err))) 411 dev->stats.tx_errors++; 412 else 413 ret = NET_XMIT_SUCCESS; 414 goto out; 415 err: 416 dev->stats.tx_errors++; 417 kfree_skb(skb); 418 out: 419 return ret; 420 } 421 422 static int ipvlan_process_v6_outbound(struct sk_buff *skb) 423 { 424 const struct ipv6hdr *ip6h = ipv6_hdr(skb); 425 struct net_device *dev = skb->dev; 426 struct net *net = dev_net(dev); 427 struct dst_entry *dst; 428 int err, ret = NET_XMIT_DROP; 429 struct flowi6 fl6 = { 430 .flowi6_oif = dev->ifindex, 431 .daddr = ip6h->daddr, 432 .saddr = ip6h->saddr, 433 .flowi6_flags = FLOWI_FLAG_ANYSRC, 434 .flowlabel = ip6_flowinfo(ip6h), 435 .flowi6_mark = skb->mark, 436 .flowi6_proto = ip6h->nexthdr, 437 }; 438 439 dst = ip6_route_output(net, NULL, &fl6); 440 if (dst->error) { 441 ret = dst->error; 442 dst_release(dst); 443 goto err; 444 } 445 skb_dst_set(skb, dst); 446 err = ip6_local_out(net, skb->sk, skb); 447 if (unlikely(net_xmit_eval(err))) 448 dev->stats.tx_errors++; 449 else 450 ret = NET_XMIT_SUCCESS; 451 goto out; 452 err: 453 dev->stats.tx_errors++; 454 kfree_skb(skb); 455 out: 456 return ret; 457 } 458 459 static int ipvlan_process_outbound(struct sk_buff *skb) 460 { 461 struct ethhdr *ethh = eth_hdr(skb); 462 int ret = NET_XMIT_DROP; 463 464 /* In this mode we dont care about multicast and broadcast traffic */ 465 if (is_multicast_ether_addr(ethh->h_dest)) { 466 pr_warn_ratelimited("Dropped {multi|broad}cast of type= [%x]\n", 467 ntohs(skb->protocol)); 468 kfree_skb(skb); 469 goto out; 470 } 471 472 /* The ipvlan is a pseudo-L2 device, so the packets that we receive 473 * will have L2; which need to discarded and processed further 474 * in the net-ns of the main-device. 475 */ 476 if (skb_mac_header_was_set(skb)) { 477 skb_pull(skb, sizeof(*ethh)); 478 skb->mac_header = (typeof(skb->mac_header))~0U; 479 skb_reset_network_header(skb); 480 } 481 482 if (skb->protocol == htons(ETH_P_IPV6)) 483 ret = ipvlan_process_v6_outbound(skb); 484 else if (skb->protocol == htons(ETH_P_IP)) 485 ret = ipvlan_process_v4_outbound(skb); 486 else { 487 pr_warn_ratelimited("Dropped outbound packet type=%x\n", 488 ntohs(skb->protocol)); 489 kfree_skb(skb); 490 } 491 out: 492 return ret; 493 } 494 495 static void ipvlan_multicast_enqueue(struct ipvl_port *port, 496 struct sk_buff *skb, bool tx_pkt) 497 { 498 if (skb->protocol == htons(ETH_P_PAUSE)) { 499 kfree_skb(skb); 500 return; 501 } 502 503 /* Record that the deferred packet is from TX or RX path. By 504 * looking at mac-addresses on packet will lead to erronus decisions. 505 * (This would be true for a loopback-mode on master device or a 506 * hair-pin mode of the switch.) 507 */ 508 IPVL_SKB_CB(skb)->tx_pkt = tx_pkt; 509 510 spin_lock(&port->backlog.lock); 511 if (skb_queue_len(&port->backlog) < IPVLAN_QBACKLOG_LIMIT) { 512 if (skb->dev) 513 dev_hold(skb->dev); 514 __skb_queue_tail(&port->backlog, skb); 515 spin_unlock(&port->backlog.lock); 516 schedule_work(&port->wq); 517 } else { 518 spin_unlock(&port->backlog.lock); 519 atomic_long_inc(&skb->dev->rx_dropped); 520 kfree_skb(skb); 521 } 522 } 523 524 static int ipvlan_xmit_mode_l3(struct sk_buff *skb, struct net_device *dev) 525 { 526 const struct ipvl_dev *ipvlan = netdev_priv(dev); 527 void *lyr3h; 528 struct ipvl_addr *addr; 529 int addr_type; 530 531 lyr3h = ipvlan_get_L3_hdr(ipvlan->port, skb, &addr_type); 532 if (!lyr3h) 533 goto out; 534 535 if (!ipvlan_is_vepa(ipvlan->port)) { 536 addr = ipvlan_addr_lookup(ipvlan->port, lyr3h, addr_type, true); 537 if (addr) { 538 if (ipvlan_is_private(ipvlan->port)) { 539 consume_skb(skb); 540 return NET_XMIT_DROP; 541 } 542 return ipvlan_rcv_frame(addr, &skb, true); 543 } 544 } 545 out: 546 ipvlan_skb_crossing_ns(skb, ipvlan->phy_dev); 547 return ipvlan_process_outbound(skb); 548 } 549 550 static int ipvlan_xmit_mode_l2(struct sk_buff *skb, struct net_device *dev) 551 { 552 const struct ipvl_dev *ipvlan = netdev_priv(dev); 553 struct ethhdr *eth = eth_hdr(skb); 554 struct ipvl_addr *addr; 555 void *lyr3h; 556 int addr_type; 557 558 if (!ipvlan_is_vepa(ipvlan->port) && 559 ether_addr_equal(eth->h_dest, eth->h_source)) { 560 lyr3h = ipvlan_get_L3_hdr(ipvlan->port, skb, &addr_type); 561 if (lyr3h) { 562 addr = ipvlan_addr_lookup(ipvlan->port, lyr3h, addr_type, true); 563 if (addr) { 564 if (ipvlan_is_private(ipvlan->port)) { 565 consume_skb(skb); 566 return NET_XMIT_DROP; 567 } 568 return ipvlan_rcv_frame(addr, &skb, true); 569 } 570 } 571 skb = skb_share_check(skb, GFP_ATOMIC); 572 if (!skb) 573 return NET_XMIT_DROP; 574 575 /* Packet definitely does not belong to any of the 576 * virtual devices, but the dest is local. So forward 577 * the skb for the main-dev. At the RX side we just return 578 * RX_PASS for it to be processed further on the stack. 579 */ 580 return dev_forward_skb(ipvlan->phy_dev, skb); 581 582 } else if (is_multicast_ether_addr(eth->h_dest)) { 583 ipvlan_skb_crossing_ns(skb, NULL); 584 ipvlan_multicast_enqueue(ipvlan->port, skb, true); 585 return NET_XMIT_SUCCESS; 586 } 587 588 ipvlan_skb_crossing_ns(skb, ipvlan->phy_dev); 589 return dev_queue_xmit(skb); 590 } 591 592 int ipvlan_queue_xmit(struct sk_buff *skb, struct net_device *dev) 593 { 594 struct ipvl_dev *ipvlan = netdev_priv(dev); 595 struct ipvl_port *port = ipvlan_port_get_rcu_bh(ipvlan->phy_dev); 596 597 if (!port) 598 goto out; 599 600 if (unlikely(!pskb_may_pull(skb, sizeof(struct ethhdr)))) 601 goto out; 602 603 switch(port->mode) { 604 case IPVLAN_MODE_L2: 605 return ipvlan_xmit_mode_l2(skb, dev); 606 case IPVLAN_MODE_L3: 607 case IPVLAN_MODE_L3S: 608 return ipvlan_xmit_mode_l3(skb, dev); 609 } 610 611 /* Should not reach here */ 612 WARN_ONCE(true, "ipvlan_queue_xmit() called for mode = [%hx]\n", 613 port->mode); 614 out: 615 kfree_skb(skb); 616 return NET_XMIT_DROP; 617 } 618 619 static bool ipvlan_external_frame(struct sk_buff *skb, struct ipvl_port *port) 620 { 621 struct ethhdr *eth = eth_hdr(skb); 622 struct ipvl_addr *addr; 623 void *lyr3h; 624 int addr_type; 625 626 if (ether_addr_equal(eth->h_source, skb->dev->dev_addr)) { 627 lyr3h = ipvlan_get_L3_hdr(port, skb, &addr_type); 628 if (!lyr3h) 629 return true; 630 631 addr = ipvlan_addr_lookup(port, lyr3h, addr_type, false); 632 if (addr) 633 return false; 634 } 635 636 return true; 637 } 638 639 static rx_handler_result_t ipvlan_handle_mode_l3(struct sk_buff **pskb, 640 struct ipvl_port *port) 641 { 642 void *lyr3h; 643 int addr_type; 644 struct ipvl_addr *addr; 645 struct sk_buff *skb = *pskb; 646 rx_handler_result_t ret = RX_HANDLER_PASS; 647 648 lyr3h = ipvlan_get_L3_hdr(port, skb, &addr_type); 649 if (!lyr3h) 650 goto out; 651 652 addr = ipvlan_addr_lookup(port, lyr3h, addr_type, true); 653 if (addr) 654 ret = ipvlan_rcv_frame(addr, pskb, false); 655 656 out: 657 return ret; 658 } 659 660 static rx_handler_result_t ipvlan_handle_mode_l2(struct sk_buff **pskb, 661 struct ipvl_port *port) 662 { 663 struct sk_buff *skb = *pskb; 664 struct ethhdr *eth = eth_hdr(skb); 665 rx_handler_result_t ret = RX_HANDLER_PASS; 666 void *lyr3h; 667 int addr_type; 668 669 if (is_multicast_ether_addr(eth->h_dest)) { 670 if (ipvlan_external_frame(skb, port)) { 671 struct sk_buff *nskb = skb_clone(skb, GFP_ATOMIC); 672 673 /* External frames are queued for device local 674 * distribution, but a copy is given to master 675 * straight away to avoid sending duplicates later 676 * when work-queue processes this frame. This is 677 * achieved by returning RX_HANDLER_PASS. 678 */ 679 if (nskb) { 680 ipvlan_skb_crossing_ns(nskb, NULL); 681 ipvlan_multicast_enqueue(port, nskb, false); 682 } 683 } 684 } else { 685 struct ipvl_addr *addr; 686 687 lyr3h = ipvlan_get_L3_hdr(port, skb, &addr_type); 688 if (!lyr3h) 689 return ret; 690 691 addr = ipvlan_addr_lookup(port, lyr3h, addr_type, true); 692 if (addr) 693 ret = ipvlan_rcv_frame(addr, pskb, false); 694 } 695 696 return ret; 697 } 698 699 rx_handler_result_t ipvlan_handle_frame(struct sk_buff **pskb) 700 { 701 struct sk_buff *skb = *pskb; 702 struct ipvl_port *port = ipvlan_port_get_rcu(skb->dev); 703 704 if (!port) 705 return RX_HANDLER_PASS; 706 707 switch (port->mode) { 708 case IPVLAN_MODE_L2: 709 return ipvlan_handle_mode_l2(pskb, port); 710 case IPVLAN_MODE_L3: 711 return ipvlan_handle_mode_l3(pskb, port); 712 case IPVLAN_MODE_L3S: 713 return RX_HANDLER_PASS; 714 } 715 716 /* Should not reach here */ 717 WARN_ONCE(true, "ipvlan_handle_frame() called for mode = [%hx]\n", 718 port->mode); 719 kfree_skb(skb); 720 return RX_HANDLER_CONSUMED; 721 } 722 723 static struct ipvl_addr *ipvlan_skb_to_addr(struct sk_buff *skb, 724 struct net_device *dev) 725 { 726 struct ipvl_addr *addr = NULL; 727 struct ipvl_port *port; 728 void *lyr3h; 729 int addr_type; 730 731 if (!dev || !netif_is_ipvlan_port(dev)) 732 goto out; 733 734 port = ipvlan_port_get_rcu(dev); 735 if (!port || port->mode != IPVLAN_MODE_L3S) 736 goto out; 737 738 lyr3h = ipvlan_get_L3_hdr(port, skb, &addr_type); 739 if (!lyr3h) 740 goto out; 741 742 addr = ipvlan_addr_lookup(port, lyr3h, addr_type, true); 743 out: 744 return addr; 745 } 746 747 struct sk_buff *ipvlan_l3_rcv(struct net_device *dev, struct sk_buff *skb, 748 u16 proto) 749 { 750 struct ipvl_addr *addr; 751 struct net_device *sdev; 752 753 addr = ipvlan_skb_to_addr(skb, dev); 754 if (!addr) 755 goto out; 756 757 sdev = addr->master->dev; 758 switch (proto) { 759 case AF_INET: 760 { 761 int err; 762 struct iphdr *ip4h = ip_hdr(skb); 763 764 err = ip_route_input_noref(skb, ip4h->daddr, ip4h->saddr, 765 ip4h->tos, sdev); 766 if (unlikely(err)) 767 goto out; 768 break; 769 } 770 case AF_INET6: 771 { 772 struct dst_entry *dst; 773 struct ipv6hdr *ip6h = ipv6_hdr(skb); 774 int flags = RT6_LOOKUP_F_HAS_SADDR; 775 struct flowi6 fl6 = { 776 .flowi6_iif = sdev->ifindex, 777 .daddr = ip6h->daddr, 778 .saddr = ip6h->saddr, 779 .flowlabel = ip6_flowinfo(ip6h), 780 .flowi6_mark = skb->mark, 781 .flowi6_proto = ip6h->nexthdr, 782 }; 783 784 skb_dst_drop(skb); 785 dst = ip6_route_input_lookup(dev_net(sdev), sdev, &fl6, flags); 786 skb_dst_set(skb, dst); 787 break; 788 } 789 default: 790 break; 791 } 792 793 out: 794 return skb; 795 } 796 797 unsigned int ipvlan_nf_input(void *priv, struct sk_buff *skb, 798 const struct nf_hook_state *state) 799 { 800 struct ipvl_addr *addr; 801 unsigned int len; 802 803 addr = ipvlan_skb_to_addr(skb, skb->dev); 804 if (!addr) 805 goto out; 806 807 skb->dev = addr->master->dev; 808 len = skb->len + ETH_HLEN; 809 ipvlan_count_rx(addr->master, len, true, false); 810 out: 811 return NF_ACCEPT; 812 } 813