1 /* Copyright (c) 2014 Mahesh Bandewar <maheshb@google.com> 2 * 3 * This program is free software; you can redistribute it and/or 4 * modify it under the terms of the GNU General Public License as 5 * published by the Free Software Foundation; either version 2 of 6 * the License, or (at your option) any later version. 7 * 8 */ 9 10 #include "ipvlan.h" 11 12 static u32 ipvlan_jhash_secret __read_mostly; 13 14 void ipvlan_init_secret(void) 15 { 16 net_get_random_once(&ipvlan_jhash_secret, sizeof(ipvlan_jhash_secret)); 17 } 18 19 void ipvlan_count_rx(const struct ipvl_dev *ipvlan, 20 unsigned int len, bool success, bool mcast) 21 { 22 if (likely(success)) { 23 struct ipvl_pcpu_stats *pcptr; 24 25 pcptr = this_cpu_ptr(ipvlan->pcpu_stats); 26 u64_stats_update_begin(&pcptr->syncp); 27 pcptr->rx_pkts++; 28 pcptr->rx_bytes += len; 29 if (mcast) 30 pcptr->rx_mcast++; 31 u64_stats_update_end(&pcptr->syncp); 32 } else { 33 this_cpu_inc(ipvlan->pcpu_stats->rx_errs); 34 } 35 } 36 EXPORT_SYMBOL_GPL(ipvlan_count_rx); 37 38 static u8 ipvlan_get_v6_hash(const void *iaddr) 39 { 40 const struct in6_addr *ip6_addr = iaddr; 41 42 return __ipv6_addr_jhash(ip6_addr, ipvlan_jhash_secret) & 43 IPVLAN_HASH_MASK; 44 } 45 46 static u8 ipvlan_get_v4_hash(const void *iaddr) 47 { 48 const struct in_addr *ip4_addr = iaddr; 49 50 return jhash_1word(ip4_addr->s_addr, ipvlan_jhash_secret) & 51 IPVLAN_HASH_MASK; 52 } 53 54 static struct ipvl_addr *ipvlan_ht_addr_lookup(const struct ipvl_port *port, 55 const void *iaddr, bool is_v6) 56 { 57 struct ipvl_addr *addr; 58 u8 hash; 59 60 hash = is_v6 ? ipvlan_get_v6_hash(iaddr) : 61 ipvlan_get_v4_hash(iaddr); 62 hlist_for_each_entry_rcu(addr, &port->hlhead[hash], hlnode) { 63 if (is_v6 && addr->atype == IPVL_IPV6 && 64 ipv6_addr_equal(&addr->ip6addr, iaddr)) 65 return addr; 66 else if (!is_v6 && addr->atype == IPVL_IPV4 && 67 addr->ip4addr.s_addr == 68 ((struct in_addr *)iaddr)->s_addr) 69 return addr; 70 } 71 return NULL; 72 } 73 74 void ipvlan_ht_addr_add(struct ipvl_dev *ipvlan, struct ipvl_addr *addr) 75 { 76 struct ipvl_port *port = ipvlan->port; 77 u8 hash; 78 79 hash = (addr->atype == IPVL_IPV6) ? 80 ipvlan_get_v6_hash(&addr->ip6addr) : 81 ipvlan_get_v4_hash(&addr->ip4addr); 82 if (hlist_unhashed(&addr->hlnode)) 83 hlist_add_head_rcu(&addr->hlnode, &port->hlhead[hash]); 84 } 85 86 void ipvlan_ht_addr_del(struct ipvl_addr *addr) 87 { 88 hlist_del_init_rcu(&addr->hlnode); 89 } 90 91 struct ipvl_addr *ipvlan_find_addr(const struct ipvl_dev *ipvlan, 92 const void *iaddr, bool is_v6) 93 { 94 struct ipvl_addr *addr; 95 96 list_for_each_entry(addr, &ipvlan->addrs, anode) { 97 if ((is_v6 && addr->atype == IPVL_IPV6 && 98 ipv6_addr_equal(&addr->ip6addr, iaddr)) || 99 (!is_v6 && addr->atype == IPVL_IPV4 && 100 addr->ip4addr.s_addr == ((struct in_addr *)iaddr)->s_addr)) 101 return addr; 102 } 103 return NULL; 104 } 105 106 bool ipvlan_addr_busy(struct ipvl_port *port, void *iaddr, bool is_v6) 107 { 108 struct ipvl_dev *ipvlan; 109 110 ASSERT_RTNL(); 111 112 list_for_each_entry(ipvlan, &port->ipvlans, pnode) { 113 if (ipvlan_find_addr(ipvlan, iaddr, is_v6)) 114 return true; 115 } 116 return false; 117 } 118 119 static void *ipvlan_get_L3_hdr(struct sk_buff *skb, int *type) 120 { 121 void *lyr3h = NULL; 122 123 switch (skb->protocol) { 124 case htons(ETH_P_ARP): { 125 struct arphdr *arph; 126 127 if (unlikely(!pskb_may_pull(skb, sizeof(*arph)))) 128 return NULL; 129 130 arph = arp_hdr(skb); 131 *type = IPVL_ARP; 132 lyr3h = arph; 133 break; 134 } 135 case htons(ETH_P_IP): { 136 u32 pktlen; 137 struct iphdr *ip4h; 138 139 if (unlikely(!pskb_may_pull(skb, sizeof(*ip4h)))) 140 return NULL; 141 142 ip4h = ip_hdr(skb); 143 pktlen = ntohs(ip4h->tot_len); 144 if (ip4h->ihl < 5 || ip4h->version != 4) 145 return NULL; 146 if (skb->len < pktlen || pktlen < (ip4h->ihl * 4)) 147 return NULL; 148 149 *type = IPVL_IPV4; 150 lyr3h = ip4h; 151 break; 152 } 153 case htons(ETH_P_IPV6): { 154 struct ipv6hdr *ip6h; 155 156 if (unlikely(!pskb_may_pull(skb, sizeof(*ip6h)))) 157 return NULL; 158 159 ip6h = ipv6_hdr(skb); 160 if (ip6h->version != 6) 161 return NULL; 162 163 *type = IPVL_IPV6; 164 lyr3h = ip6h; 165 /* Only Neighbour Solicitation pkts need different treatment */ 166 if (ipv6_addr_any(&ip6h->saddr) && 167 ip6h->nexthdr == NEXTHDR_ICMP) { 168 *type = IPVL_ICMPV6; 169 lyr3h = ip6h + 1; 170 } 171 break; 172 } 173 default: 174 return NULL; 175 } 176 177 return lyr3h; 178 } 179 180 unsigned int ipvlan_mac_hash(const unsigned char *addr) 181 { 182 u32 hash = jhash_1word(__get_unaligned_cpu32(addr+2), 183 ipvlan_jhash_secret); 184 185 return hash & IPVLAN_MAC_FILTER_MASK; 186 } 187 188 void ipvlan_process_multicast(struct work_struct *work) 189 { 190 struct ipvl_port *port = container_of(work, struct ipvl_port, wq); 191 struct ethhdr *ethh; 192 struct ipvl_dev *ipvlan; 193 struct sk_buff *skb, *nskb; 194 struct sk_buff_head list; 195 unsigned int len; 196 unsigned int mac_hash; 197 int ret; 198 u8 pkt_type; 199 bool tx_pkt; 200 201 __skb_queue_head_init(&list); 202 203 spin_lock_bh(&port->backlog.lock); 204 skb_queue_splice_tail_init(&port->backlog, &list); 205 spin_unlock_bh(&port->backlog.lock); 206 207 while ((skb = __skb_dequeue(&list)) != NULL) { 208 struct net_device *dev = skb->dev; 209 bool consumed = false; 210 211 ethh = eth_hdr(skb); 212 tx_pkt = IPVL_SKB_CB(skb)->tx_pkt; 213 mac_hash = ipvlan_mac_hash(ethh->h_dest); 214 215 if (ether_addr_equal(ethh->h_dest, port->dev->broadcast)) 216 pkt_type = PACKET_BROADCAST; 217 else 218 pkt_type = PACKET_MULTICAST; 219 220 rcu_read_lock(); 221 list_for_each_entry_rcu(ipvlan, &port->ipvlans, pnode) { 222 if (tx_pkt && (ipvlan->dev == skb->dev)) 223 continue; 224 if (!test_bit(mac_hash, ipvlan->mac_filters)) 225 continue; 226 if (!(ipvlan->dev->flags & IFF_UP)) 227 continue; 228 ret = NET_RX_DROP; 229 len = skb->len + ETH_HLEN; 230 nskb = skb_clone(skb, GFP_ATOMIC); 231 local_bh_disable(); 232 if (nskb) { 233 consumed = true; 234 nskb->pkt_type = pkt_type; 235 nskb->dev = ipvlan->dev; 236 if (tx_pkt) 237 ret = dev_forward_skb(ipvlan->dev, nskb); 238 else 239 ret = netif_rx(nskb); 240 } 241 ipvlan_count_rx(ipvlan, len, ret == NET_RX_SUCCESS, true); 242 local_bh_enable(); 243 } 244 rcu_read_unlock(); 245 246 if (tx_pkt) { 247 /* If the packet originated here, send it out. */ 248 skb->dev = port->dev; 249 skb->pkt_type = pkt_type; 250 dev_queue_xmit(skb); 251 } else { 252 if (consumed) 253 consume_skb(skb); 254 else 255 kfree_skb(skb); 256 } 257 if (dev) 258 dev_put(dev); 259 } 260 } 261 262 static void ipvlan_skb_crossing_ns(struct sk_buff *skb, struct net_device *dev) 263 { 264 bool xnet = true; 265 266 if (dev) 267 xnet = !net_eq(dev_net(skb->dev), dev_net(dev)); 268 269 skb_scrub_packet(skb, xnet); 270 if (dev) 271 skb->dev = dev; 272 } 273 274 static int ipvlan_rcv_frame(struct ipvl_addr *addr, struct sk_buff **pskb, 275 bool local) 276 { 277 struct ipvl_dev *ipvlan = addr->master; 278 struct net_device *dev = ipvlan->dev; 279 unsigned int len; 280 rx_handler_result_t ret = RX_HANDLER_CONSUMED; 281 bool success = false; 282 struct sk_buff *skb = *pskb; 283 284 len = skb->len + ETH_HLEN; 285 /* Only packets exchanged between two local slaves need to have 286 * device-up check as well as skb-share check. 287 */ 288 if (local) { 289 if (unlikely(!(dev->flags & IFF_UP))) { 290 kfree_skb(skb); 291 goto out; 292 } 293 294 skb = skb_share_check(skb, GFP_ATOMIC); 295 if (!skb) 296 goto out; 297 298 *pskb = skb; 299 } 300 ipvlan_skb_crossing_ns(skb, dev); 301 302 if (local) { 303 skb->pkt_type = PACKET_HOST; 304 if (dev_forward_skb(ipvlan->dev, skb) == NET_RX_SUCCESS) 305 success = true; 306 } else { 307 ret = RX_HANDLER_ANOTHER; 308 success = true; 309 } 310 311 out: 312 ipvlan_count_rx(ipvlan, len, success, false); 313 return ret; 314 } 315 316 static struct ipvl_addr *ipvlan_addr_lookup(struct ipvl_port *port, 317 void *lyr3h, int addr_type, 318 bool use_dest) 319 { 320 struct ipvl_addr *addr = NULL; 321 322 if (addr_type == IPVL_IPV6) { 323 struct ipv6hdr *ip6h; 324 struct in6_addr *i6addr; 325 326 ip6h = (struct ipv6hdr *)lyr3h; 327 i6addr = use_dest ? &ip6h->daddr : &ip6h->saddr; 328 addr = ipvlan_ht_addr_lookup(port, i6addr, true); 329 } else if (addr_type == IPVL_ICMPV6) { 330 struct nd_msg *ndmh; 331 struct in6_addr *i6addr; 332 333 /* Make sure that the NeighborSolicitation ICMPv6 packets 334 * are handled to avoid DAD issue. 335 */ 336 ndmh = (struct nd_msg *)lyr3h; 337 if (ndmh->icmph.icmp6_type == NDISC_NEIGHBOUR_SOLICITATION) { 338 i6addr = &ndmh->target; 339 addr = ipvlan_ht_addr_lookup(port, i6addr, true); 340 } 341 } else if (addr_type == IPVL_IPV4) { 342 struct iphdr *ip4h; 343 __be32 *i4addr; 344 345 ip4h = (struct iphdr *)lyr3h; 346 i4addr = use_dest ? &ip4h->daddr : &ip4h->saddr; 347 addr = ipvlan_ht_addr_lookup(port, i4addr, false); 348 } else if (addr_type == IPVL_ARP) { 349 struct arphdr *arph; 350 unsigned char *arp_ptr; 351 __be32 dip; 352 353 arph = (struct arphdr *)lyr3h; 354 arp_ptr = (unsigned char *)(arph + 1); 355 if (use_dest) 356 arp_ptr += (2 * port->dev->addr_len) + 4; 357 else 358 arp_ptr += port->dev->addr_len; 359 360 memcpy(&dip, arp_ptr, 4); 361 addr = ipvlan_ht_addr_lookup(port, &dip, false); 362 } 363 364 return addr; 365 } 366 367 static int ipvlan_process_v4_outbound(struct sk_buff *skb) 368 { 369 const struct iphdr *ip4h = ip_hdr(skb); 370 struct net_device *dev = skb->dev; 371 struct net *net = dev_net(dev); 372 struct rtable *rt; 373 int err, ret = NET_XMIT_DROP; 374 struct flowi4 fl4 = { 375 .flowi4_oif = dev->ifindex, 376 .flowi4_tos = RT_TOS(ip4h->tos), 377 .flowi4_flags = FLOWI_FLAG_ANYSRC, 378 .daddr = ip4h->daddr, 379 .saddr = ip4h->saddr, 380 }; 381 382 rt = ip_route_output_flow(net, &fl4, NULL); 383 if (IS_ERR(rt)) 384 goto err; 385 386 if (rt->rt_type != RTN_UNICAST && rt->rt_type != RTN_LOCAL) { 387 ip_rt_put(rt); 388 goto err; 389 } 390 skb_dst_set(skb, &rt->dst); 391 err = ip_local_out(net, skb->sk, skb); 392 if (unlikely(net_xmit_eval(err))) 393 dev->stats.tx_errors++; 394 else 395 ret = NET_XMIT_SUCCESS; 396 goto out; 397 err: 398 dev->stats.tx_errors++; 399 kfree_skb(skb); 400 out: 401 return ret; 402 } 403 404 static int ipvlan_process_v6_outbound(struct sk_buff *skb) 405 { 406 const struct ipv6hdr *ip6h = ipv6_hdr(skb); 407 struct net_device *dev = skb->dev; 408 struct net *net = dev_net(dev); 409 struct dst_entry *dst; 410 int err, ret = NET_XMIT_DROP; 411 struct flowi6 fl6 = { 412 .flowi6_iif = dev->ifindex, 413 .daddr = ip6h->daddr, 414 .saddr = ip6h->saddr, 415 .flowi6_flags = FLOWI_FLAG_ANYSRC, 416 .flowlabel = ip6_flowinfo(ip6h), 417 .flowi6_mark = skb->mark, 418 .flowi6_proto = ip6h->nexthdr, 419 }; 420 421 dst = ip6_route_output(net, NULL, &fl6); 422 if (dst->error) { 423 ret = dst->error; 424 dst_release(dst); 425 goto err; 426 } 427 skb_dst_set(skb, dst); 428 err = ip6_local_out(net, skb->sk, skb); 429 if (unlikely(net_xmit_eval(err))) 430 dev->stats.tx_errors++; 431 else 432 ret = NET_XMIT_SUCCESS; 433 goto out; 434 err: 435 dev->stats.tx_errors++; 436 kfree_skb(skb); 437 out: 438 return ret; 439 } 440 441 static int ipvlan_process_outbound(struct sk_buff *skb) 442 { 443 struct ethhdr *ethh = eth_hdr(skb); 444 int ret = NET_XMIT_DROP; 445 446 /* In this mode we dont care about multicast and broadcast traffic */ 447 if (is_multicast_ether_addr(ethh->h_dest)) { 448 pr_warn_ratelimited("Dropped {multi|broad}cast of type= [%x]\n", 449 ntohs(skb->protocol)); 450 kfree_skb(skb); 451 goto out; 452 } 453 454 /* The ipvlan is a pseudo-L2 device, so the packets that we receive 455 * will have L2; which need to discarded and processed further 456 * in the net-ns of the main-device. 457 */ 458 if (skb_mac_header_was_set(skb)) { 459 skb_pull(skb, sizeof(*ethh)); 460 skb->mac_header = (typeof(skb->mac_header))~0U; 461 skb_reset_network_header(skb); 462 } 463 464 if (skb->protocol == htons(ETH_P_IPV6)) 465 ret = ipvlan_process_v6_outbound(skb); 466 else if (skb->protocol == htons(ETH_P_IP)) 467 ret = ipvlan_process_v4_outbound(skb); 468 else { 469 pr_warn_ratelimited("Dropped outbound packet type=%x\n", 470 ntohs(skb->protocol)); 471 kfree_skb(skb); 472 } 473 out: 474 return ret; 475 } 476 477 static void ipvlan_multicast_enqueue(struct ipvl_port *port, 478 struct sk_buff *skb, bool tx_pkt) 479 { 480 if (skb->protocol == htons(ETH_P_PAUSE)) { 481 kfree_skb(skb); 482 return; 483 } 484 485 /* Record that the deferred packet is from TX or RX path. By 486 * looking at mac-addresses on packet will lead to erronus decisions. 487 * (This would be true for a loopback-mode on master device or a 488 * hair-pin mode of the switch.) 489 */ 490 IPVL_SKB_CB(skb)->tx_pkt = tx_pkt; 491 492 spin_lock(&port->backlog.lock); 493 if (skb_queue_len(&port->backlog) < IPVLAN_QBACKLOG_LIMIT) { 494 if (skb->dev) 495 dev_hold(skb->dev); 496 __skb_queue_tail(&port->backlog, skb); 497 spin_unlock(&port->backlog.lock); 498 schedule_work(&port->wq); 499 } else { 500 spin_unlock(&port->backlog.lock); 501 atomic_long_inc(&skb->dev->rx_dropped); 502 kfree_skb(skb); 503 } 504 } 505 506 static int ipvlan_xmit_mode_l3(struct sk_buff *skb, struct net_device *dev) 507 { 508 const struct ipvl_dev *ipvlan = netdev_priv(dev); 509 void *lyr3h; 510 struct ipvl_addr *addr; 511 int addr_type; 512 513 lyr3h = ipvlan_get_L3_hdr(skb, &addr_type); 514 if (!lyr3h) 515 goto out; 516 517 addr = ipvlan_addr_lookup(ipvlan->port, lyr3h, addr_type, true); 518 if (addr) 519 return ipvlan_rcv_frame(addr, &skb, true); 520 521 out: 522 ipvlan_skb_crossing_ns(skb, ipvlan->phy_dev); 523 return ipvlan_process_outbound(skb); 524 } 525 526 static int ipvlan_xmit_mode_l2(struct sk_buff *skb, struct net_device *dev) 527 { 528 const struct ipvl_dev *ipvlan = netdev_priv(dev); 529 struct ethhdr *eth = eth_hdr(skb); 530 struct ipvl_addr *addr; 531 void *lyr3h; 532 int addr_type; 533 534 if (ether_addr_equal(eth->h_dest, eth->h_source)) { 535 lyr3h = ipvlan_get_L3_hdr(skb, &addr_type); 536 if (lyr3h) { 537 addr = ipvlan_addr_lookup(ipvlan->port, lyr3h, addr_type, true); 538 if (addr) 539 return ipvlan_rcv_frame(addr, &skb, true); 540 } 541 skb = skb_share_check(skb, GFP_ATOMIC); 542 if (!skb) 543 return NET_XMIT_DROP; 544 545 /* Packet definitely does not belong to any of the 546 * virtual devices, but the dest is local. So forward 547 * the skb for the main-dev. At the RX side we just return 548 * RX_PASS for it to be processed further on the stack. 549 */ 550 return dev_forward_skb(ipvlan->phy_dev, skb); 551 552 } else if (is_multicast_ether_addr(eth->h_dest)) { 553 ipvlan_skb_crossing_ns(skb, NULL); 554 ipvlan_multicast_enqueue(ipvlan->port, skb, true); 555 return NET_XMIT_SUCCESS; 556 } 557 558 ipvlan_skb_crossing_ns(skb, ipvlan->phy_dev); 559 return dev_queue_xmit(skb); 560 } 561 562 int ipvlan_queue_xmit(struct sk_buff *skb, struct net_device *dev) 563 { 564 struct ipvl_dev *ipvlan = netdev_priv(dev); 565 struct ipvl_port *port = ipvlan_port_get_rcu_bh(ipvlan->phy_dev); 566 567 if (!port) 568 goto out; 569 570 if (unlikely(!pskb_may_pull(skb, sizeof(struct ethhdr)))) 571 goto out; 572 573 switch(port->mode) { 574 case IPVLAN_MODE_L2: 575 return ipvlan_xmit_mode_l2(skb, dev); 576 case IPVLAN_MODE_L3: 577 case IPVLAN_MODE_L3S: 578 return ipvlan_xmit_mode_l3(skb, dev); 579 } 580 581 /* Should not reach here */ 582 WARN_ONCE(true, "ipvlan_queue_xmit() called for mode = [%hx]\n", 583 port->mode); 584 out: 585 kfree_skb(skb); 586 return NET_XMIT_DROP; 587 } 588 589 static bool ipvlan_external_frame(struct sk_buff *skb, struct ipvl_port *port) 590 { 591 struct ethhdr *eth = eth_hdr(skb); 592 struct ipvl_addr *addr; 593 void *lyr3h; 594 int addr_type; 595 596 if (ether_addr_equal(eth->h_source, skb->dev->dev_addr)) { 597 lyr3h = ipvlan_get_L3_hdr(skb, &addr_type); 598 if (!lyr3h) 599 return true; 600 601 addr = ipvlan_addr_lookup(port, lyr3h, addr_type, false); 602 if (addr) 603 return false; 604 } 605 606 return true; 607 } 608 609 static rx_handler_result_t ipvlan_handle_mode_l3(struct sk_buff **pskb, 610 struct ipvl_port *port) 611 { 612 void *lyr3h; 613 int addr_type; 614 struct ipvl_addr *addr; 615 struct sk_buff *skb = *pskb; 616 rx_handler_result_t ret = RX_HANDLER_PASS; 617 618 lyr3h = ipvlan_get_L3_hdr(skb, &addr_type); 619 if (!lyr3h) 620 goto out; 621 622 addr = ipvlan_addr_lookup(port, lyr3h, addr_type, true); 623 if (addr) 624 ret = ipvlan_rcv_frame(addr, pskb, false); 625 626 out: 627 return ret; 628 } 629 630 static rx_handler_result_t ipvlan_handle_mode_l2(struct sk_buff **pskb, 631 struct ipvl_port *port) 632 { 633 struct sk_buff *skb = *pskb; 634 struct ethhdr *eth = eth_hdr(skb); 635 rx_handler_result_t ret = RX_HANDLER_PASS; 636 void *lyr3h; 637 int addr_type; 638 639 if (is_multicast_ether_addr(eth->h_dest)) { 640 if (ipvlan_external_frame(skb, port)) { 641 struct sk_buff *nskb = skb_clone(skb, GFP_ATOMIC); 642 643 /* External frames are queued for device local 644 * distribution, but a copy is given to master 645 * straight away to avoid sending duplicates later 646 * when work-queue processes this frame. This is 647 * achieved by returning RX_HANDLER_PASS. 648 */ 649 if (nskb) { 650 ipvlan_skb_crossing_ns(nskb, NULL); 651 ipvlan_multicast_enqueue(port, nskb, false); 652 } 653 } 654 } else { 655 struct ipvl_addr *addr; 656 657 lyr3h = ipvlan_get_L3_hdr(skb, &addr_type); 658 if (!lyr3h) 659 return ret; 660 661 addr = ipvlan_addr_lookup(port, lyr3h, addr_type, true); 662 if (addr) 663 ret = ipvlan_rcv_frame(addr, pskb, false); 664 } 665 666 return ret; 667 } 668 669 rx_handler_result_t ipvlan_handle_frame(struct sk_buff **pskb) 670 { 671 struct sk_buff *skb = *pskb; 672 struct ipvl_port *port = ipvlan_port_get_rcu(skb->dev); 673 674 if (!port) 675 return RX_HANDLER_PASS; 676 677 switch (port->mode) { 678 case IPVLAN_MODE_L2: 679 return ipvlan_handle_mode_l2(pskb, port); 680 case IPVLAN_MODE_L3: 681 return ipvlan_handle_mode_l3(pskb, port); 682 case IPVLAN_MODE_L3S: 683 return RX_HANDLER_PASS; 684 } 685 686 /* Should not reach here */ 687 WARN_ONCE(true, "ipvlan_handle_frame() called for mode = [%hx]\n", 688 port->mode); 689 kfree_skb(skb); 690 return RX_HANDLER_CONSUMED; 691 } 692 693 static struct ipvl_addr *ipvlan_skb_to_addr(struct sk_buff *skb, 694 struct net_device *dev) 695 { 696 struct ipvl_addr *addr = NULL; 697 struct ipvl_port *port; 698 void *lyr3h; 699 int addr_type; 700 701 if (!dev || !netif_is_ipvlan_port(dev)) 702 goto out; 703 704 port = ipvlan_port_get_rcu(dev); 705 if (!port || port->mode != IPVLAN_MODE_L3S) 706 goto out; 707 708 lyr3h = ipvlan_get_L3_hdr(skb, &addr_type); 709 if (!lyr3h) 710 goto out; 711 712 addr = ipvlan_addr_lookup(port, lyr3h, addr_type, true); 713 out: 714 return addr; 715 } 716 717 struct sk_buff *ipvlan_l3_rcv(struct net_device *dev, struct sk_buff *skb, 718 u16 proto) 719 { 720 struct ipvl_addr *addr; 721 struct net_device *sdev; 722 723 addr = ipvlan_skb_to_addr(skb, dev); 724 if (!addr) 725 goto out; 726 727 sdev = addr->master->dev; 728 switch (proto) { 729 case AF_INET: 730 { 731 int err; 732 struct iphdr *ip4h = ip_hdr(skb); 733 734 err = ip_route_input_noref(skb, ip4h->daddr, ip4h->saddr, 735 ip4h->tos, sdev); 736 if (unlikely(err)) 737 goto out; 738 break; 739 } 740 case AF_INET6: 741 { 742 struct dst_entry *dst; 743 struct ipv6hdr *ip6h = ipv6_hdr(skb); 744 int flags = RT6_LOOKUP_F_HAS_SADDR; 745 struct flowi6 fl6 = { 746 .flowi6_iif = sdev->ifindex, 747 .daddr = ip6h->daddr, 748 .saddr = ip6h->saddr, 749 .flowlabel = ip6_flowinfo(ip6h), 750 .flowi6_mark = skb->mark, 751 .flowi6_proto = ip6h->nexthdr, 752 }; 753 754 skb_dst_drop(skb); 755 dst = ip6_route_input_lookup(dev_net(sdev), sdev, &fl6, flags); 756 skb_dst_set(skb, dst); 757 break; 758 } 759 default: 760 break; 761 } 762 763 out: 764 return skb; 765 } 766 767 unsigned int ipvlan_nf_input(void *priv, struct sk_buff *skb, 768 const struct nf_hook_state *state) 769 { 770 struct ipvl_addr *addr; 771 unsigned int len; 772 773 addr = ipvlan_skb_to_addr(skb, skb->dev); 774 if (!addr) 775 goto out; 776 777 skb->dev = addr->master->dev; 778 len = skb->len + ETH_HLEN; 779 ipvlan_count_rx(addr->master, len, true, false); 780 out: 781 return NF_ACCEPT; 782 } 783