10e6fbc5bSPravin B Shelar /* 20e6fbc5bSPravin B Shelar * Copyright (c) 2013 Nicira, Inc. 30e6fbc5bSPravin B Shelar * 40e6fbc5bSPravin B Shelar * This program is free software; you can redistribute it and/or 50e6fbc5bSPravin B Shelar * modify it under the terms of version 2 of the GNU General Public 60e6fbc5bSPravin B Shelar * License as published by the Free Software Foundation. 70e6fbc5bSPravin B Shelar * 80e6fbc5bSPravin B Shelar * This program is distributed in the hope that it will be useful, but 90e6fbc5bSPravin B Shelar * WITHOUT ANY WARRANTY; without even the implied warranty of 100e6fbc5bSPravin B Shelar * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 110e6fbc5bSPravin B Shelar * General Public License for more details. 120e6fbc5bSPravin B Shelar * 130e6fbc5bSPravin B Shelar * You should have received a copy of the GNU General Public License 140e6fbc5bSPravin B Shelar * along with this program; if not, write to the Free Software 150e6fbc5bSPravin B Shelar * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 160e6fbc5bSPravin B Shelar * 02110-1301, USA 170e6fbc5bSPravin B Shelar */ 180e6fbc5bSPravin B Shelar 190e6fbc5bSPravin B Shelar #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 200e6fbc5bSPravin B Shelar 210e6fbc5bSPravin B Shelar #include <linux/types.h> 220e6fbc5bSPravin B Shelar #include <linux/kernel.h> 230e6fbc5bSPravin B Shelar #include <linux/skbuff.h> 240e6fbc5bSPravin B Shelar #include <linux/netdevice.h> 250e6fbc5bSPravin B Shelar #include <linux/in.h> 260e6fbc5bSPravin B Shelar #include <linux/if_arp.h> 270e6fbc5bSPravin B Shelar #include <linux/mroute.h> 280e6fbc5bSPravin B Shelar #include <linux/init.h> 290e6fbc5bSPravin B Shelar #include <linux/in6.h> 300e6fbc5bSPravin B Shelar #include <linux/inetdevice.h> 310e6fbc5bSPravin B Shelar #include <linux/netfilter_ipv4.h> 320e6fbc5bSPravin B Shelar #include <linux/etherdevice.h> 330e6fbc5bSPravin B Shelar #include <linux/if_ether.h> 340e6fbc5bSPravin B Shelar #include <linux/if_vlan.h> 350e6fbc5bSPravin B Shelar 360e6fbc5bSPravin B Shelar #include <net/ip.h> 370e6fbc5bSPravin B Shelar #include <net/icmp.h> 380e6fbc5bSPravin B Shelar #include <net/protocol.h> 390e6fbc5bSPravin B Shelar #include <net/ip_tunnels.h> 400e6fbc5bSPravin B Shelar #include <net/arp.h> 410e6fbc5bSPravin B Shelar #include <net/checksum.h> 420e6fbc5bSPravin B Shelar #include <net/dsfield.h> 430e6fbc5bSPravin B Shelar #include <net/inet_ecn.h> 440e6fbc5bSPravin B Shelar #include <net/xfrm.h> 450e6fbc5bSPravin B Shelar #include <net/net_namespace.h> 460e6fbc5bSPravin B Shelar #include <net/netns/generic.h> 470e6fbc5bSPravin B Shelar #include <net/rtnetlink.h> 480e6fbc5bSPravin B Shelar 49aad88724SEric Dumazet int iptunnel_xmit(struct sock *sk, struct rtable *rt, struct sk_buff *skb, 500e6fbc5bSPravin B Shelar __be32 src, __be32 dst, __u8 proto, 51963a88b3SNicolas Dichtel __u8 tos, __u8 ttl, __be16 df, bool xnet) 520e6fbc5bSPravin B Shelar { 530e6fbc5bSPravin B Shelar int pkt_len = skb->len; 540e6fbc5bSPravin B Shelar struct iphdr *iph; 550e6fbc5bSPravin B Shelar int err; 560e6fbc5bSPravin B Shelar 57963a88b3SNicolas Dichtel skb_scrub_packet(skb, xnet); 58963a88b3SNicolas Dichtel 597539fadcSTom Herbert skb_clear_hash(skb); 600e6fbc5bSPravin B Shelar skb_dst_set(skb, &rt->dst); 610e6fbc5bSPravin B Shelar memset(IPCB(skb), 0, sizeof(*IPCB(skb))); 620e6fbc5bSPravin B Shelar 630e6fbc5bSPravin B Shelar /* Push down and install the IP header. */ 6478a3694dSSteffen Klassert skb_push(skb, sizeof(struct iphdr)); 650e6fbc5bSPravin B Shelar skb_reset_network_header(skb); 660e6fbc5bSPravin B Shelar 670e6fbc5bSPravin B Shelar iph = ip_hdr(skb); 680e6fbc5bSPravin B Shelar 690e6fbc5bSPravin B Shelar iph->version = 4; 700e6fbc5bSPravin B Shelar iph->ihl = sizeof(struct iphdr) >> 2; 710e6fbc5bSPravin B Shelar iph->frag_off = df; 720e6fbc5bSPravin B Shelar iph->protocol = proto; 730e6fbc5bSPravin B Shelar iph->tos = tos; 740e6fbc5bSPravin B Shelar iph->daddr = dst; 750e6fbc5bSPravin B Shelar iph->saddr = src; 760e6fbc5bSPravin B Shelar iph->ttl = ttl; 77926a882fSHannes Frederic Sowa __ip_select_ident(dev_net(rt->dst.dev), iph, 78926a882fSHannes Frederic Sowa skb_shinfo(skb)->gso_segs ?: 1); 790e6fbc5bSPravin B Shelar 80aad88724SEric Dumazet err = ip_local_out_sk(sk, skb); 810e6fbc5bSPravin B Shelar if (unlikely(net_xmit_eval(err))) 820e6fbc5bSPravin B Shelar pkt_len = 0; 830e6fbc5bSPravin B Shelar return pkt_len; 840e6fbc5bSPravin B Shelar } 850e6fbc5bSPravin B Shelar EXPORT_SYMBOL_GPL(iptunnel_xmit); 863d7b46cdSPravin B Shelar 873d7b46cdSPravin B Shelar int iptunnel_pull_header(struct sk_buff *skb, int hdr_len, __be16 inner_proto) 883d7b46cdSPravin B Shelar { 893d7b46cdSPravin B Shelar if (unlikely(!pskb_may_pull(skb, hdr_len))) 903d7b46cdSPravin B Shelar return -ENOMEM; 913d7b46cdSPravin B Shelar 923d7b46cdSPravin B Shelar skb_pull_rcsum(skb, hdr_len); 933d7b46cdSPravin B Shelar 943d7b46cdSPravin B Shelar if (inner_proto == htons(ETH_P_TEB)) { 951245dfc8SLi RongQing struct ethhdr *eh; 963d7b46cdSPravin B Shelar 973d7b46cdSPravin B Shelar if (unlikely(!pskb_may_pull(skb, ETH_HLEN))) 983d7b46cdSPravin B Shelar return -ENOMEM; 993d7b46cdSPravin B Shelar 1001245dfc8SLi RongQing eh = (struct ethhdr *)skb->data; 101d181ddcaSAlexander Duyck if (likely(eth_proto_is_802_3(eh->h_proto))) 1023d7b46cdSPravin B Shelar skb->protocol = eh->h_proto; 1033d7b46cdSPravin B Shelar else 1043d7b46cdSPravin B Shelar skb->protocol = htons(ETH_P_802_2); 1053d7b46cdSPravin B Shelar 1063d7b46cdSPravin B Shelar } else { 1073d7b46cdSPravin B Shelar skb->protocol = inner_proto; 1083d7b46cdSPravin B Shelar } 1093d7b46cdSPravin B Shelar 1103d7b46cdSPravin B Shelar nf_reset(skb); 1113d7b46cdSPravin B Shelar secpath_reset(skb); 1127539fadcSTom Herbert skb_clear_hash_if_not_l4(skb); 113fbd02dd4SPravin B Shelar skb_dst_drop(skb); 1143d7b46cdSPravin B Shelar skb->vlan_tci = 0; 1153d7b46cdSPravin B Shelar skb_set_queue_mapping(skb, 0); 1163d7b46cdSPravin B Shelar skb->pkt_type = PACKET_HOST; 1173d7b46cdSPravin B Shelar return 0; 1183d7b46cdSPravin B Shelar } 1193d7b46cdSPravin B Shelar EXPORT_SYMBOL_GPL(iptunnel_pull_header); 1202d26f0a3SEric Dumazet 1212d26f0a3SEric Dumazet struct sk_buff *iptunnel_handle_offloads(struct sk_buff *skb, 1222d26f0a3SEric Dumazet bool csum_help, 1232d26f0a3SEric Dumazet int gso_type_mask) 1242d26f0a3SEric Dumazet { 1252d26f0a3SEric Dumazet int err; 1262d26f0a3SEric Dumazet 1272d26f0a3SEric Dumazet if (likely(!skb->encapsulation)) { 1282d26f0a3SEric Dumazet skb_reset_inner_headers(skb); 1292d26f0a3SEric Dumazet skb->encapsulation = 1; 1302d26f0a3SEric Dumazet } 1312d26f0a3SEric Dumazet 1322d26f0a3SEric Dumazet if (skb_is_gso(skb)) { 1332d26f0a3SEric Dumazet err = skb_unclone(skb, GFP_ATOMIC); 1342d26f0a3SEric Dumazet if (unlikely(err)) 1352d26f0a3SEric Dumazet goto error; 1362d26f0a3SEric Dumazet skb_shinfo(skb)->gso_type |= gso_type_mask; 1372d26f0a3SEric Dumazet return skb; 1382d26f0a3SEric Dumazet } 1392d26f0a3SEric Dumazet 1407e2b10c1STom Herbert /* If packet is not gso and we are resolving any partial checksum, 1417e2b10c1STom Herbert * clear encapsulation flag. This allows setting CHECKSUM_PARTIAL 1427e2b10c1STom Herbert * on the outer header without confusing devices that implement 1437e2b10c1STom Herbert * NETIF_F_IP_CSUM with encapsulation. 1447e2b10c1STom Herbert */ 1457e2b10c1STom Herbert if (csum_help) 1467e2b10c1STom Herbert skb->encapsulation = 0; 1477e2b10c1STom Herbert 1482d26f0a3SEric Dumazet if (skb->ip_summed == CHECKSUM_PARTIAL && csum_help) { 1492d26f0a3SEric Dumazet err = skb_checksum_help(skb); 1502d26f0a3SEric Dumazet if (unlikely(err)) 1512d26f0a3SEric Dumazet goto error; 1522d26f0a3SEric Dumazet } else if (skb->ip_summed != CHECKSUM_PARTIAL) 1532d26f0a3SEric Dumazet skb->ip_summed = CHECKSUM_NONE; 1542d26f0a3SEric Dumazet 1552d26f0a3SEric Dumazet return skb; 1562d26f0a3SEric Dumazet error: 1572d26f0a3SEric Dumazet kfree_skb(skb); 1582d26f0a3SEric Dumazet return ERR_PTR(err); 1592d26f0a3SEric Dumazet } 1602d26f0a3SEric Dumazet EXPORT_SYMBOL_GPL(iptunnel_handle_offloads); 161ebe44f35SDavid S. Miller 162ebe44f35SDavid S. Miller /* Often modified stats are per cpu, other are shared (netdev->stats) */ 163ebe44f35SDavid S. Miller struct rtnl_link_stats64 *ip_tunnel_get_stats64(struct net_device *dev, 164ebe44f35SDavid S. Miller struct rtnl_link_stats64 *tot) 165ebe44f35SDavid S. Miller { 166ebe44f35SDavid S. Miller int i; 167ebe44f35SDavid S. Miller 168c24a5964SAlexander Duyck netdev_stats_to_stats64(tot, &dev->stats); 169c24a5964SAlexander Duyck 170ebe44f35SDavid S. Miller for_each_possible_cpu(i) { 171ebe44f35SDavid S. Miller const struct pcpu_sw_netstats *tstats = 172ebe44f35SDavid S. Miller per_cpu_ptr(dev->tstats, i); 173ebe44f35SDavid S. Miller u64 rx_packets, rx_bytes, tx_packets, tx_bytes; 174ebe44f35SDavid S. Miller unsigned int start; 175ebe44f35SDavid S. Miller 176ebe44f35SDavid S. Miller do { 17757a7744eSEric W. Biederman start = u64_stats_fetch_begin_irq(&tstats->syncp); 178ebe44f35SDavid S. Miller rx_packets = tstats->rx_packets; 179ebe44f35SDavid S. Miller tx_packets = tstats->tx_packets; 180ebe44f35SDavid S. Miller rx_bytes = tstats->rx_bytes; 181ebe44f35SDavid S. Miller tx_bytes = tstats->tx_bytes; 18257a7744eSEric W. Biederman } while (u64_stats_fetch_retry_irq(&tstats->syncp, start)); 183ebe44f35SDavid S. Miller 184ebe44f35SDavid S. Miller tot->rx_packets += rx_packets; 185ebe44f35SDavid S. Miller tot->tx_packets += tx_packets; 186ebe44f35SDavid S. Miller tot->rx_bytes += rx_bytes; 187ebe44f35SDavid S. Miller tot->tx_bytes += tx_bytes; 188ebe44f35SDavid S. Miller } 189ebe44f35SDavid S. Miller 190ebe44f35SDavid S. Miller return tot; 191ebe44f35SDavid S. Miller } 192ebe44f35SDavid S. Miller EXPORT_SYMBOL_GPL(ip_tunnel_get_stats64); 1933093fbe7SThomas Graf 1943093fbe7SThomas Graf static const struct nla_policy ip_tun_policy[IP_TUN_MAX + 1] = { 1953093fbe7SThomas Graf [IP_TUN_ID] = { .type = NLA_U64 }, 1963093fbe7SThomas Graf [IP_TUN_DST] = { .type = NLA_U32 }, 1973093fbe7SThomas Graf [IP_TUN_SRC] = { .type = NLA_U32 }, 1983093fbe7SThomas Graf [IP_TUN_TTL] = { .type = NLA_U8 }, 1993093fbe7SThomas Graf [IP_TUN_TOS] = { .type = NLA_U8 }, 2003093fbe7SThomas Graf [IP_TUN_SPORT] = { .type = NLA_U16 }, 2013093fbe7SThomas Graf [IP_TUN_DPORT] = { .type = NLA_U16 }, 2023093fbe7SThomas Graf [IP_TUN_FLAGS] = { .type = NLA_U16 }, 2033093fbe7SThomas Graf }; 2043093fbe7SThomas Graf 2053093fbe7SThomas Graf static int ip_tun_build_state(struct net_device *dev, struct nlattr *attr, 2063093fbe7SThomas Graf struct lwtunnel_state **ts) 2073093fbe7SThomas Graf { 2083093fbe7SThomas Graf struct ip_tunnel_info *tun_info; 2093093fbe7SThomas Graf struct lwtunnel_state *new_state; 2103093fbe7SThomas Graf struct nlattr *tb[IP_TUN_MAX + 1]; 2113093fbe7SThomas Graf int err; 2123093fbe7SThomas Graf 2133093fbe7SThomas Graf err = nla_parse_nested(tb, IP_TUN_MAX, attr, ip_tun_policy); 2143093fbe7SThomas Graf if (err < 0) 2153093fbe7SThomas Graf return err; 2163093fbe7SThomas Graf 2173093fbe7SThomas Graf new_state = lwtunnel_state_alloc(sizeof(*tun_info)); 2183093fbe7SThomas Graf if (!new_state) 2193093fbe7SThomas Graf return -ENOMEM; 2203093fbe7SThomas Graf 2213093fbe7SThomas Graf new_state->type = LWTUNNEL_ENCAP_IP; 2223093fbe7SThomas Graf 2233093fbe7SThomas Graf tun_info = lwt_tun_info(new_state); 2243093fbe7SThomas Graf 2253093fbe7SThomas Graf if (tb[IP_TUN_ID]) 2263093fbe7SThomas Graf tun_info->key.tun_id = nla_get_u64(tb[IP_TUN_ID]); 2273093fbe7SThomas Graf 2283093fbe7SThomas Graf if (tb[IP_TUN_DST]) 2293093fbe7SThomas Graf tun_info->key.ipv4_dst = nla_get_be32(tb[IP_TUN_DST]); 2303093fbe7SThomas Graf 2313093fbe7SThomas Graf if (tb[IP_TUN_SRC]) 2323093fbe7SThomas Graf tun_info->key.ipv4_src = nla_get_be32(tb[IP_TUN_SRC]); 2333093fbe7SThomas Graf 2343093fbe7SThomas Graf if (tb[IP_TUN_TTL]) 2353093fbe7SThomas Graf tun_info->key.ipv4_ttl = nla_get_u8(tb[IP_TUN_TTL]); 2363093fbe7SThomas Graf 2373093fbe7SThomas Graf if (tb[IP_TUN_TOS]) 2383093fbe7SThomas Graf tun_info->key.ipv4_tos = nla_get_u8(tb[IP_TUN_TOS]); 2393093fbe7SThomas Graf 2403093fbe7SThomas Graf if (tb[IP_TUN_SPORT]) 2413093fbe7SThomas Graf tun_info->key.tp_src = nla_get_be16(tb[IP_TUN_SPORT]); 2423093fbe7SThomas Graf 2433093fbe7SThomas Graf if (tb[IP_TUN_DPORT]) 2443093fbe7SThomas Graf tun_info->key.tp_dst = nla_get_be16(tb[IP_TUN_DPORT]); 2453093fbe7SThomas Graf 2463093fbe7SThomas Graf if (tb[IP_TUN_FLAGS]) 2473093fbe7SThomas Graf tun_info->key.tun_flags = nla_get_u16(tb[IP_TUN_FLAGS]); 2483093fbe7SThomas Graf 2493093fbe7SThomas Graf tun_info->mode = IP_TUNNEL_INFO_TX; 2503093fbe7SThomas Graf tun_info->options = NULL; 2513093fbe7SThomas Graf tun_info->options_len = 0; 2523093fbe7SThomas Graf 2533093fbe7SThomas Graf *ts = new_state; 2543093fbe7SThomas Graf 2553093fbe7SThomas Graf return 0; 2563093fbe7SThomas Graf } 2573093fbe7SThomas Graf 2583093fbe7SThomas Graf static int ip_tun_fill_encap_info(struct sk_buff *skb, 2593093fbe7SThomas Graf struct lwtunnel_state *lwtstate) 2603093fbe7SThomas Graf { 2613093fbe7SThomas Graf struct ip_tunnel_info *tun_info = lwt_tun_info(lwtstate); 2623093fbe7SThomas Graf 2633093fbe7SThomas Graf if (nla_put_u64(skb, IP_TUN_ID, tun_info->key.tun_id) || 2643093fbe7SThomas Graf nla_put_be32(skb, IP_TUN_DST, tun_info->key.ipv4_dst) || 2653093fbe7SThomas Graf nla_put_be32(skb, IP_TUN_SRC, tun_info->key.ipv4_src) || 2663093fbe7SThomas Graf nla_put_u8(skb, IP_TUN_TOS, tun_info->key.ipv4_tos) || 2673093fbe7SThomas Graf nla_put_u8(skb, IP_TUN_TTL, tun_info->key.ipv4_ttl) || 2683093fbe7SThomas Graf nla_put_u16(skb, IP_TUN_SPORT, tun_info->key.tp_src) || 2693093fbe7SThomas Graf nla_put_u16(skb, IP_TUN_DPORT, tun_info->key.tp_dst) || 2703093fbe7SThomas Graf nla_put_u16(skb, IP_TUN_FLAGS, tun_info->key.tun_flags)) 2713093fbe7SThomas Graf return -ENOMEM; 2723093fbe7SThomas Graf 2733093fbe7SThomas Graf return 0; 2743093fbe7SThomas Graf } 2753093fbe7SThomas Graf 2763093fbe7SThomas Graf static int ip_tun_encap_nlsize(struct lwtunnel_state *lwtstate) 2773093fbe7SThomas Graf { 2783093fbe7SThomas Graf return nla_total_size(8) /* IP_TUN_ID */ 2793093fbe7SThomas Graf + nla_total_size(4) /* IP_TUN_DST */ 2803093fbe7SThomas Graf + nla_total_size(4) /* IP_TUN_SRC */ 2813093fbe7SThomas Graf + nla_total_size(1) /* IP_TUN_TOS */ 2823093fbe7SThomas Graf + nla_total_size(1) /* IP_TUN_TTL */ 2833093fbe7SThomas Graf + nla_total_size(2) /* IP_TUN_SPORT */ 2843093fbe7SThomas Graf + nla_total_size(2) /* IP_TUN_DPORT */ 2853093fbe7SThomas Graf + nla_total_size(2); /* IP_TUN_FLAGS */ 2863093fbe7SThomas Graf } 2873093fbe7SThomas Graf 2883093fbe7SThomas Graf static const struct lwtunnel_encap_ops ip_tun_lwt_ops = { 2893093fbe7SThomas Graf .build_state = ip_tun_build_state, 2903093fbe7SThomas Graf .fill_encap = ip_tun_fill_encap_info, 2913093fbe7SThomas Graf .get_encap_size = ip_tun_encap_nlsize, 2923093fbe7SThomas Graf }; 2933093fbe7SThomas Graf 2943093fbe7SThomas Graf static int __init ip_tunnel_core_init(void) 2953093fbe7SThomas Graf { 2963093fbe7SThomas Graf lwtunnel_encap_add_ops(&ip_tun_lwt_ops, LWTUNNEL_ENCAP_IP); 2973093fbe7SThomas Graf 2983093fbe7SThomas Graf return 0; 2993093fbe7SThomas Graf } 3003093fbe7SThomas Graf module_init(ip_tunnel_core_init); 3013093fbe7SThomas Graf 3023093fbe7SThomas Graf static void __exit ip_tunnel_core_exit(void) 3033093fbe7SThomas Graf { 3043093fbe7SThomas Graf lwtunnel_encap_del_ops(&ip_tun_lwt_ops, LWTUNNEL_ENCAP_IP); 3053093fbe7SThomas Graf } 3063093fbe7SThomas Graf module_exit(ip_tunnel_core_exit); 307