1 /* Copyright (c) 2014 Mahesh Bandewar <maheshb@google.com> 2 * 3 * This program is free software; you can redistribute it and/or 4 * modify it under the terms of the GNU General Public License as 5 * published by the Free Software Foundation; either version 2 of 6 * the License, or (at your option) any later version. 7 * 8 */ 9 10 #include "ipvlan.h" 11 12 static u32 ipvlan_jhash_secret __read_mostly; 13 14 void ipvlan_init_secret(void) 15 { 16 net_get_random_once(&ipvlan_jhash_secret, sizeof(ipvlan_jhash_secret)); 17 } 18 19 void ipvlan_count_rx(const struct ipvl_dev *ipvlan, 20 unsigned int len, bool success, bool mcast) 21 { 22 if (likely(success)) { 23 struct ipvl_pcpu_stats *pcptr; 24 25 pcptr = this_cpu_ptr(ipvlan->pcpu_stats); 26 u64_stats_update_begin(&pcptr->syncp); 27 pcptr->rx_pkts++; 28 pcptr->rx_bytes += len; 29 if (mcast) 30 pcptr->rx_mcast++; 31 u64_stats_update_end(&pcptr->syncp); 32 } else { 33 this_cpu_inc(ipvlan->pcpu_stats->rx_errs); 34 } 35 } 36 EXPORT_SYMBOL_GPL(ipvlan_count_rx); 37 38 #if IS_ENABLED(CONFIG_IPV6) 39 static u8 ipvlan_get_v6_hash(const void *iaddr) 40 { 41 const struct in6_addr *ip6_addr = iaddr; 42 43 return __ipv6_addr_jhash(ip6_addr, ipvlan_jhash_secret) & 44 IPVLAN_HASH_MASK; 45 } 46 #else 47 static u8 ipvlan_get_v6_hash(const void *iaddr) 48 { 49 return 0; 50 } 51 #endif 52 53 static u8 ipvlan_get_v4_hash(const void *iaddr) 54 { 55 const struct in_addr *ip4_addr = iaddr; 56 57 return jhash_1word(ip4_addr->s_addr, ipvlan_jhash_secret) & 58 IPVLAN_HASH_MASK; 59 } 60 61 static bool addr_equal(bool is_v6, struct ipvl_addr *addr, const void *iaddr) 62 { 63 if (!is_v6 && addr->atype == IPVL_IPV4) { 64 struct in_addr *i4addr = (struct in_addr *)iaddr; 65 66 return addr->ip4addr.s_addr == i4addr->s_addr; 67 #if IS_ENABLED(CONFIG_IPV6) 68 } else if (is_v6 && addr->atype == IPVL_IPV6) { 69 struct in6_addr *i6addr = (struct in6_addr *)iaddr; 70 71 return ipv6_addr_equal(&addr->ip6addr, i6addr); 72 #endif 73 } 74 75 return false; 76 } 77 78 static struct ipvl_addr *ipvlan_ht_addr_lookup(const struct ipvl_port *port, 79 const void *iaddr, bool is_v6) 80 { 81 struct ipvl_addr *addr; 82 u8 hash; 83 84 hash = is_v6 ? ipvlan_get_v6_hash(iaddr) : 85 ipvlan_get_v4_hash(iaddr); 86 hlist_for_each_entry_rcu(addr, &port->hlhead[hash], hlnode) 87 if (addr_equal(is_v6, addr, iaddr)) 88 return addr; 89 return NULL; 90 } 91 92 void ipvlan_ht_addr_add(struct ipvl_dev *ipvlan, struct ipvl_addr *addr) 93 { 94 struct ipvl_port *port = ipvlan->port; 95 u8 hash; 96 97 hash = (addr->atype == IPVL_IPV6) ? 98 ipvlan_get_v6_hash(&addr->ip6addr) : 99 ipvlan_get_v4_hash(&addr->ip4addr); 100 if (hlist_unhashed(&addr->hlnode)) 101 hlist_add_head_rcu(&addr->hlnode, &port->hlhead[hash]); 102 } 103 104 void ipvlan_ht_addr_del(struct ipvl_addr *addr) 105 { 106 hlist_del_init_rcu(&addr->hlnode); 107 } 108 109 struct ipvl_addr *ipvlan_find_addr(const struct ipvl_dev *ipvlan, 110 const void *iaddr, bool is_v6) 111 { 112 struct ipvl_addr *addr, *ret = NULL; 113 114 rcu_read_lock(); 115 list_for_each_entry_rcu(addr, &ipvlan->addrs, anode) { 116 if (addr_equal(is_v6, addr, iaddr)) { 117 ret = addr; 118 break; 119 } 120 } 121 rcu_read_unlock(); 122 return ret; 123 } 124 125 bool ipvlan_addr_busy(struct ipvl_port *port, void *iaddr, bool is_v6) 126 { 127 struct ipvl_dev *ipvlan; 128 bool ret = false; 129 130 rcu_read_lock(); 131 list_for_each_entry_rcu(ipvlan, &port->ipvlans, pnode) { 132 if (ipvlan_find_addr(ipvlan, iaddr, is_v6)) { 133 ret = true; 134 break; 135 } 136 } 137 rcu_read_unlock(); 138 return ret; 139 } 140 141 static void *ipvlan_get_L3_hdr(struct ipvl_port *port, struct sk_buff *skb, int *type) 142 { 143 void *lyr3h = NULL; 144 145 switch (skb->protocol) { 146 case htons(ETH_P_ARP): { 147 struct arphdr *arph; 148 149 if (unlikely(!pskb_may_pull(skb, arp_hdr_len(port->dev)))) 150 return NULL; 151 152 arph = arp_hdr(skb); 153 *type = IPVL_ARP; 154 lyr3h = arph; 155 break; 156 } 157 case htons(ETH_P_IP): { 158 u32 pktlen; 159 struct iphdr *ip4h; 160 161 if (unlikely(!pskb_may_pull(skb, sizeof(*ip4h)))) 162 return NULL; 163 164 ip4h = ip_hdr(skb); 165 pktlen = ntohs(ip4h->tot_len); 166 if (ip4h->ihl < 5 || ip4h->version != 4) 167 return NULL; 168 if (skb->len < pktlen || pktlen < (ip4h->ihl * 4)) 169 return NULL; 170 171 *type = IPVL_IPV4; 172 lyr3h = ip4h; 173 break; 174 } 175 #if IS_ENABLED(CONFIG_IPV6) 176 case htons(ETH_P_IPV6): { 177 struct ipv6hdr *ip6h; 178 179 if (unlikely(!pskb_may_pull(skb, sizeof(*ip6h)))) 180 return NULL; 181 182 ip6h = ipv6_hdr(skb); 183 if (ip6h->version != 6) 184 return NULL; 185 186 *type = IPVL_IPV6; 187 lyr3h = ip6h; 188 /* Only Neighbour Solicitation pkts need different treatment */ 189 if (ipv6_addr_any(&ip6h->saddr) && 190 ip6h->nexthdr == NEXTHDR_ICMP) { 191 struct icmp6hdr *icmph; 192 193 if (unlikely(!pskb_may_pull(skb, sizeof(*ip6h) + sizeof(*icmph)))) 194 return NULL; 195 196 ip6h = ipv6_hdr(skb); 197 icmph = (struct icmp6hdr *)(ip6h + 1); 198 199 if (icmph->icmp6_type == NDISC_NEIGHBOUR_SOLICITATION) { 200 /* Need to access the ipv6 address in body */ 201 if (unlikely(!pskb_may_pull(skb, sizeof(*ip6h) + sizeof(*icmph) 202 + sizeof(struct in6_addr)))) 203 return NULL; 204 205 ip6h = ipv6_hdr(skb); 206 icmph = (struct icmp6hdr *)(ip6h + 1); 207 } 208 209 *type = IPVL_ICMPV6; 210 lyr3h = icmph; 211 } 212 break; 213 } 214 #endif 215 default: 216 return NULL; 217 } 218 219 return lyr3h; 220 } 221 222 unsigned int ipvlan_mac_hash(const unsigned char *addr) 223 { 224 u32 hash = jhash_1word(__get_unaligned_cpu32(addr+2), 225 ipvlan_jhash_secret); 226 227 return hash & IPVLAN_MAC_FILTER_MASK; 228 } 229 230 void ipvlan_process_multicast(struct work_struct *work) 231 { 232 struct ipvl_port *port = container_of(work, struct ipvl_port, wq); 233 struct ethhdr *ethh; 234 struct ipvl_dev *ipvlan; 235 struct sk_buff *skb, *nskb; 236 struct sk_buff_head list; 237 unsigned int len; 238 unsigned int mac_hash; 239 int ret; 240 u8 pkt_type; 241 bool tx_pkt; 242 243 __skb_queue_head_init(&list); 244 245 spin_lock_bh(&port->backlog.lock); 246 skb_queue_splice_tail_init(&port->backlog, &list); 247 spin_unlock_bh(&port->backlog.lock); 248 249 while ((skb = __skb_dequeue(&list)) != NULL) { 250 struct net_device *dev = skb->dev; 251 bool consumed = false; 252 253 ethh = eth_hdr(skb); 254 tx_pkt = IPVL_SKB_CB(skb)->tx_pkt; 255 mac_hash = ipvlan_mac_hash(ethh->h_dest); 256 257 if (ether_addr_equal(ethh->h_dest, port->dev->broadcast)) 258 pkt_type = PACKET_BROADCAST; 259 else 260 pkt_type = PACKET_MULTICAST; 261 262 rcu_read_lock(); 263 list_for_each_entry_rcu(ipvlan, &port->ipvlans, pnode) { 264 if (tx_pkt && (ipvlan->dev == skb->dev)) 265 continue; 266 if (!test_bit(mac_hash, ipvlan->mac_filters)) 267 continue; 268 if (!(ipvlan->dev->flags & IFF_UP)) 269 continue; 270 ret = NET_RX_DROP; 271 len = skb->len + ETH_HLEN; 272 nskb = skb_clone(skb, GFP_ATOMIC); 273 local_bh_disable(); 274 if (nskb) { 275 consumed = true; 276 nskb->pkt_type = pkt_type; 277 nskb->dev = ipvlan->dev; 278 if (tx_pkt) 279 ret = dev_forward_skb(ipvlan->dev, nskb); 280 else 281 ret = netif_rx(nskb); 282 } 283 ipvlan_count_rx(ipvlan, len, ret == NET_RX_SUCCESS, true); 284 local_bh_enable(); 285 } 286 rcu_read_unlock(); 287 288 if (tx_pkt) { 289 /* If the packet originated here, send it out. */ 290 skb->dev = port->dev; 291 skb->pkt_type = pkt_type; 292 dev_queue_xmit(skb); 293 } else { 294 if (consumed) 295 consume_skb(skb); 296 else 297 kfree_skb(skb); 298 } 299 if (dev) 300 dev_put(dev); 301 } 302 } 303 304 static void ipvlan_skb_crossing_ns(struct sk_buff *skb, struct net_device *dev) 305 { 306 bool xnet = true; 307 308 if (dev) 309 xnet = !net_eq(dev_net(skb->dev), dev_net(dev)); 310 311 skb_scrub_packet(skb, xnet); 312 if (dev) 313 skb->dev = dev; 314 } 315 316 static int ipvlan_rcv_frame(struct ipvl_addr *addr, struct sk_buff **pskb, 317 bool local) 318 { 319 struct ipvl_dev *ipvlan = addr->master; 320 struct net_device *dev = ipvlan->dev; 321 unsigned int len; 322 rx_handler_result_t ret = RX_HANDLER_CONSUMED; 323 bool success = false; 324 struct sk_buff *skb = *pskb; 325 326 len = skb->len + ETH_HLEN; 327 /* Only packets exchanged between two local slaves need to have 328 * device-up check as well as skb-share check. 329 */ 330 if (local) { 331 if (unlikely(!(dev->flags & IFF_UP))) { 332 kfree_skb(skb); 333 goto out; 334 } 335 336 skb = skb_share_check(skb, GFP_ATOMIC); 337 if (!skb) 338 goto out; 339 340 *pskb = skb; 341 } 342 343 if (local) { 344 skb->pkt_type = PACKET_HOST; 345 if (dev_forward_skb(ipvlan->dev, skb) == NET_RX_SUCCESS) 346 success = true; 347 } else { 348 skb->dev = dev; 349 ret = RX_HANDLER_ANOTHER; 350 success = true; 351 } 352 353 out: 354 ipvlan_count_rx(ipvlan, len, success, false); 355 return ret; 356 } 357 358 static struct ipvl_addr *ipvlan_addr_lookup(struct ipvl_port *port, 359 void *lyr3h, int addr_type, 360 bool use_dest) 361 { 362 struct ipvl_addr *addr = NULL; 363 364 switch (addr_type) { 365 #if IS_ENABLED(CONFIG_IPV6) 366 case IPVL_IPV6: { 367 struct ipv6hdr *ip6h; 368 struct in6_addr *i6addr; 369 370 ip6h = (struct ipv6hdr *)lyr3h; 371 i6addr = use_dest ? &ip6h->daddr : &ip6h->saddr; 372 addr = ipvlan_ht_addr_lookup(port, i6addr, true); 373 break; 374 } 375 case IPVL_ICMPV6: { 376 struct nd_msg *ndmh; 377 struct in6_addr *i6addr; 378 379 /* Make sure that the NeighborSolicitation ICMPv6 packets 380 * are handled to avoid DAD issue. 381 */ 382 ndmh = (struct nd_msg *)lyr3h; 383 if (ndmh->icmph.icmp6_type == NDISC_NEIGHBOUR_SOLICITATION) { 384 i6addr = &ndmh->target; 385 addr = ipvlan_ht_addr_lookup(port, i6addr, true); 386 } 387 break; 388 } 389 #endif 390 case IPVL_IPV4: { 391 struct iphdr *ip4h; 392 __be32 *i4addr; 393 394 ip4h = (struct iphdr *)lyr3h; 395 i4addr = use_dest ? &ip4h->daddr : &ip4h->saddr; 396 addr = ipvlan_ht_addr_lookup(port, i4addr, false); 397 break; 398 } 399 case IPVL_ARP: { 400 struct arphdr *arph; 401 unsigned char *arp_ptr; 402 __be32 dip; 403 404 arph = (struct arphdr *)lyr3h; 405 arp_ptr = (unsigned char *)(arph + 1); 406 if (use_dest) 407 arp_ptr += (2 * port->dev->addr_len) + 4; 408 else 409 arp_ptr += port->dev->addr_len; 410 411 memcpy(&dip, arp_ptr, 4); 412 addr = ipvlan_ht_addr_lookup(port, &dip, false); 413 break; 414 } 415 } 416 417 return addr; 418 } 419 420 static int ipvlan_process_v4_outbound(struct sk_buff *skb) 421 { 422 const struct iphdr *ip4h = ip_hdr(skb); 423 struct net_device *dev = skb->dev; 424 struct net *net = dev_net(dev); 425 struct rtable *rt; 426 int err, ret = NET_XMIT_DROP; 427 struct flowi4 fl4 = { 428 .flowi4_oif = dev->ifindex, 429 .flowi4_tos = RT_TOS(ip4h->tos), 430 .flowi4_flags = FLOWI_FLAG_ANYSRC, 431 .flowi4_mark = skb->mark, 432 .daddr = ip4h->daddr, 433 .saddr = ip4h->saddr, 434 }; 435 436 rt = ip_route_output_flow(net, &fl4, NULL); 437 if (IS_ERR(rt)) 438 goto err; 439 440 if (rt->rt_type != RTN_UNICAST && rt->rt_type != RTN_LOCAL) { 441 ip_rt_put(rt); 442 goto err; 443 } 444 skb_dst_set(skb, &rt->dst); 445 err = ip_local_out(net, skb->sk, skb); 446 if (unlikely(net_xmit_eval(err))) 447 dev->stats.tx_errors++; 448 else 449 ret = NET_XMIT_SUCCESS; 450 goto out; 451 err: 452 dev->stats.tx_errors++; 453 kfree_skb(skb); 454 out: 455 return ret; 456 } 457 458 #if IS_ENABLED(CONFIG_IPV6) 459 static int ipvlan_process_v6_outbound(struct sk_buff *skb) 460 { 461 const struct ipv6hdr *ip6h = ipv6_hdr(skb); 462 struct net_device *dev = skb->dev; 463 struct net *net = dev_net(dev); 464 struct dst_entry *dst; 465 int err, ret = NET_XMIT_DROP; 466 struct flowi6 fl6 = { 467 .flowi6_oif = dev->ifindex, 468 .daddr = ip6h->daddr, 469 .saddr = ip6h->saddr, 470 .flowi6_flags = FLOWI_FLAG_ANYSRC, 471 .flowlabel = ip6_flowinfo(ip6h), 472 .flowi6_mark = skb->mark, 473 .flowi6_proto = ip6h->nexthdr, 474 }; 475 476 dst = ip6_route_output(net, NULL, &fl6); 477 if (dst->error) { 478 ret = dst->error; 479 dst_release(dst); 480 goto err; 481 } 482 skb_dst_set(skb, dst); 483 err = ip6_local_out(net, skb->sk, skb); 484 if (unlikely(net_xmit_eval(err))) 485 dev->stats.tx_errors++; 486 else 487 ret = NET_XMIT_SUCCESS; 488 goto out; 489 err: 490 dev->stats.tx_errors++; 491 kfree_skb(skb); 492 out: 493 return ret; 494 } 495 #else 496 static int ipvlan_process_v6_outbound(struct sk_buff *skb) 497 { 498 return NET_XMIT_DROP; 499 } 500 #endif 501 502 static int ipvlan_process_outbound(struct sk_buff *skb) 503 { 504 struct ethhdr *ethh = eth_hdr(skb); 505 int ret = NET_XMIT_DROP; 506 507 /* In this mode we dont care about multicast and broadcast traffic */ 508 if (is_multicast_ether_addr(ethh->h_dest)) { 509 pr_debug_ratelimited("Dropped {multi|broad}cast of type=[%x]\n", 510 ntohs(skb->protocol)); 511 kfree_skb(skb); 512 goto out; 513 } 514 515 /* The ipvlan is a pseudo-L2 device, so the packets that we receive 516 * will have L2; which need to discarded and processed further 517 * in the net-ns of the main-device. 518 */ 519 if (skb_mac_header_was_set(skb)) { 520 skb_pull(skb, sizeof(*ethh)); 521 skb->mac_header = (typeof(skb->mac_header))~0U; 522 skb_reset_network_header(skb); 523 } 524 525 if (skb->protocol == htons(ETH_P_IPV6)) 526 ret = ipvlan_process_v6_outbound(skb); 527 else if (skb->protocol == htons(ETH_P_IP)) 528 ret = ipvlan_process_v4_outbound(skb); 529 else { 530 pr_warn_ratelimited("Dropped outbound packet type=%x\n", 531 ntohs(skb->protocol)); 532 kfree_skb(skb); 533 } 534 out: 535 return ret; 536 } 537 538 static void ipvlan_multicast_enqueue(struct ipvl_port *port, 539 struct sk_buff *skb, bool tx_pkt) 540 { 541 if (skb->protocol == htons(ETH_P_PAUSE)) { 542 kfree_skb(skb); 543 return; 544 } 545 546 /* Record that the deferred packet is from TX or RX path. By 547 * looking at mac-addresses on packet will lead to erronus decisions. 548 * (This would be true for a loopback-mode on master device or a 549 * hair-pin mode of the switch.) 550 */ 551 IPVL_SKB_CB(skb)->tx_pkt = tx_pkt; 552 553 spin_lock(&port->backlog.lock); 554 if (skb_queue_len(&port->backlog) < IPVLAN_QBACKLOG_LIMIT) { 555 if (skb->dev) 556 dev_hold(skb->dev); 557 __skb_queue_tail(&port->backlog, skb); 558 spin_unlock(&port->backlog.lock); 559 schedule_work(&port->wq); 560 } else { 561 spin_unlock(&port->backlog.lock); 562 atomic_long_inc(&skb->dev->rx_dropped); 563 kfree_skb(skb); 564 } 565 } 566 567 static int ipvlan_xmit_mode_l3(struct sk_buff *skb, struct net_device *dev) 568 { 569 const struct ipvl_dev *ipvlan = netdev_priv(dev); 570 void *lyr3h; 571 struct ipvl_addr *addr; 572 int addr_type; 573 574 lyr3h = ipvlan_get_L3_hdr(ipvlan->port, skb, &addr_type); 575 if (!lyr3h) 576 goto out; 577 578 if (!ipvlan_is_vepa(ipvlan->port)) { 579 addr = ipvlan_addr_lookup(ipvlan->port, lyr3h, addr_type, true); 580 if (addr) { 581 if (ipvlan_is_private(ipvlan->port)) { 582 consume_skb(skb); 583 return NET_XMIT_DROP; 584 } 585 return ipvlan_rcv_frame(addr, &skb, true); 586 } 587 } 588 out: 589 ipvlan_skb_crossing_ns(skb, ipvlan->phy_dev); 590 return ipvlan_process_outbound(skb); 591 } 592 593 static int ipvlan_xmit_mode_l2(struct sk_buff *skb, struct net_device *dev) 594 { 595 const struct ipvl_dev *ipvlan = netdev_priv(dev); 596 struct ethhdr *eth = eth_hdr(skb); 597 struct ipvl_addr *addr; 598 void *lyr3h; 599 int addr_type; 600 601 if (!ipvlan_is_vepa(ipvlan->port) && 602 ether_addr_equal(eth->h_dest, eth->h_source)) { 603 lyr3h = ipvlan_get_L3_hdr(ipvlan->port, skb, &addr_type); 604 if (lyr3h) { 605 addr = ipvlan_addr_lookup(ipvlan->port, lyr3h, addr_type, true); 606 if (addr) { 607 if (ipvlan_is_private(ipvlan->port)) { 608 consume_skb(skb); 609 return NET_XMIT_DROP; 610 } 611 return ipvlan_rcv_frame(addr, &skb, true); 612 } 613 } 614 skb = skb_share_check(skb, GFP_ATOMIC); 615 if (!skb) 616 return NET_XMIT_DROP; 617 618 /* Packet definitely does not belong to any of the 619 * virtual devices, but the dest is local. So forward 620 * the skb for the main-dev. At the RX side we just return 621 * RX_PASS for it to be processed further on the stack. 622 */ 623 return dev_forward_skb(ipvlan->phy_dev, skb); 624 625 } else if (is_multicast_ether_addr(eth->h_dest)) { 626 ipvlan_skb_crossing_ns(skb, NULL); 627 ipvlan_multicast_enqueue(ipvlan->port, skb, true); 628 return NET_XMIT_SUCCESS; 629 } 630 631 skb->dev = ipvlan->phy_dev; 632 return dev_queue_xmit(skb); 633 } 634 635 int ipvlan_queue_xmit(struct sk_buff *skb, struct net_device *dev) 636 { 637 struct ipvl_dev *ipvlan = netdev_priv(dev); 638 struct ipvl_port *port = ipvlan_port_get_rcu_bh(ipvlan->phy_dev); 639 640 if (!port) 641 goto out; 642 643 if (unlikely(!pskb_may_pull(skb, sizeof(struct ethhdr)))) 644 goto out; 645 646 switch(port->mode) { 647 case IPVLAN_MODE_L2: 648 return ipvlan_xmit_mode_l2(skb, dev); 649 case IPVLAN_MODE_L3: 650 case IPVLAN_MODE_L3S: 651 return ipvlan_xmit_mode_l3(skb, dev); 652 } 653 654 /* Should not reach here */ 655 WARN_ONCE(true, "ipvlan_queue_xmit() called for mode = [%hx]\n", 656 port->mode); 657 out: 658 kfree_skb(skb); 659 return NET_XMIT_DROP; 660 } 661 662 static bool ipvlan_external_frame(struct sk_buff *skb, struct ipvl_port *port) 663 { 664 struct ethhdr *eth = eth_hdr(skb); 665 struct ipvl_addr *addr; 666 void *lyr3h; 667 int addr_type; 668 669 if (ether_addr_equal(eth->h_source, skb->dev->dev_addr)) { 670 lyr3h = ipvlan_get_L3_hdr(port, skb, &addr_type); 671 if (!lyr3h) 672 return true; 673 674 addr = ipvlan_addr_lookup(port, lyr3h, addr_type, false); 675 if (addr) 676 return false; 677 } 678 679 return true; 680 } 681 682 static rx_handler_result_t ipvlan_handle_mode_l3(struct sk_buff **pskb, 683 struct ipvl_port *port) 684 { 685 void *lyr3h; 686 int addr_type; 687 struct ipvl_addr *addr; 688 struct sk_buff *skb = *pskb; 689 rx_handler_result_t ret = RX_HANDLER_PASS; 690 691 lyr3h = ipvlan_get_L3_hdr(port, skb, &addr_type); 692 if (!lyr3h) 693 goto out; 694 695 addr = ipvlan_addr_lookup(port, lyr3h, addr_type, true); 696 if (addr) 697 ret = ipvlan_rcv_frame(addr, pskb, false); 698 699 out: 700 return ret; 701 } 702 703 static rx_handler_result_t ipvlan_handle_mode_l2(struct sk_buff **pskb, 704 struct ipvl_port *port) 705 { 706 struct sk_buff *skb = *pskb; 707 struct ethhdr *eth = eth_hdr(skb); 708 rx_handler_result_t ret = RX_HANDLER_PASS; 709 710 if (is_multicast_ether_addr(eth->h_dest)) { 711 if (ipvlan_external_frame(skb, port)) { 712 struct sk_buff *nskb = skb_clone(skb, GFP_ATOMIC); 713 714 /* External frames are queued for device local 715 * distribution, but a copy is given to master 716 * straight away to avoid sending duplicates later 717 * when work-queue processes this frame. This is 718 * achieved by returning RX_HANDLER_PASS. 719 */ 720 if (nskb) { 721 ipvlan_skb_crossing_ns(nskb, NULL); 722 ipvlan_multicast_enqueue(port, nskb, false); 723 } 724 } 725 } else { 726 /* Perform like l3 mode for non-multicast packet */ 727 ret = ipvlan_handle_mode_l3(pskb, port); 728 } 729 730 return ret; 731 } 732 733 rx_handler_result_t ipvlan_handle_frame(struct sk_buff **pskb) 734 { 735 struct sk_buff *skb = *pskb; 736 struct ipvl_port *port = ipvlan_port_get_rcu(skb->dev); 737 738 if (!port) 739 return RX_HANDLER_PASS; 740 741 switch (port->mode) { 742 case IPVLAN_MODE_L2: 743 return ipvlan_handle_mode_l2(pskb, port); 744 case IPVLAN_MODE_L3: 745 return ipvlan_handle_mode_l3(pskb, port); 746 case IPVLAN_MODE_L3S: 747 return RX_HANDLER_PASS; 748 } 749 750 /* Should not reach here */ 751 WARN_ONCE(true, "ipvlan_handle_frame() called for mode = [%hx]\n", 752 port->mode); 753 kfree_skb(skb); 754 return RX_HANDLER_CONSUMED; 755 } 756 757 static struct ipvl_addr *ipvlan_skb_to_addr(struct sk_buff *skb, 758 struct net_device *dev) 759 { 760 struct ipvl_addr *addr = NULL; 761 struct ipvl_port *port; 762 void *lyr3h; 763 int addr_type; 764 765 if (!dev || !netif_is_ipvlan_port(dev)) 766 goto out; 767 768 port = ipvlan_port_get_rcu(dev); 769 if (!port || port->mode != IPVLAN_MODE_L3S) 770 goto out; 771 772 lyr3h = ipvlan_get_L3_hdr(port, skb, &addr_type); 773 if (!lyr3h) 774 goto out; 775 776 addr = ipvlan_addr_lookup(port, lyr3h, addr_type, true); 777 out: 778 return addr; 779 } 780 781 struct sk_buff *ipvlan_l3_rcv(struct net_device *dev, struct sk_buff *skb, 782 u16 proto) 783 { 784 struct ipvl_addr *addr; 785 struct net_device *sdev; 786 787 addr = ipvlan_skb_to_addr(skb, dev); 788 if (!addr) 789 goto out; 790 791 sdev = addr->master->dev; 792 switch (proto) { 793 case AF_INET: 794 { 795 int err; 796 struct iphdr *ip4h = ip_hdr(skb); 797 798 err = ip_route_input_noref(skb, ip4h->daddr, ip4h->saddr, 799 ip4h->tos, sdev); 800 if (unlikely(err)) 801 goto out; 802 break; 803 } 804 #if IS_ENABLED(CONFIG_IPV6) 805 case AF_INET6: 806 { 807 struct dst_entry *dst; 808 struct ipv6hdr *ip6h = ipv6_hdr(skb); 809 int flags = RT6_LOOKUP_F_HAS_SADDR; 810 struct flowi6 fl6 = { 811 .flowi6_iif = sdev->ifindex, 812 .daddr = ip6h->daddr, 813 .saddr = ip6h->saddr, 814 .flowlabel = ip6_flowinfo(ip6h), 815 .flowi6_mark = skb->mark, 816 .flowi6_proto = ip6h->nexthdr, 817 }; 818 819 skb_dst_drop(skb); 820 dst = ip6_route_input_lookup(dev_net(sdev), sdev, &fl6, 821 skb, flags); 822 skb_dst_set(skb, dst); 823 break; 824 } 825 #endif 826 default: 827 break; 828 } 829 830 out: 831 return skb; 832 } 833 834 unsigned int ipvlan_nf_input(void *priv, struct sk_buff *skb, 835 const struct nf_hook_state *state) 836 { 837 struct ipvl_addr *addr; 838 unsigned int len; 839 840 addr = ipvlan_skb_to_addr(skb, skb->dev); 841 if (!addr) 842 goto out; 843 844 skb->dev = addr->master->dev; 845 len = skb->len + ETH_HLEN; 846 ipvlan_count_rx(addr->master, len, true, false); 847 out: 848 return NF_ACCEPT; 849 } 850