1 /* 2 * Linux NET3: IP/IP protocol decoder modified to support 3 * virtual tunnel interface 4 * 5 * Authors: 6 * Saurabh Mohan (saurabh.mohan@vyatta.com) 05/07/2012 7 * 8 * This program is free software; you can redistribute it and/or 9 * modify it under the terms of the GNU General Public License 10 * as published by the Free Software Foundation; either version 11 * 2 of the License, or (at your option) any later version. 12 * 13 */ 14 15 /* 16 This version of net/ipv4/ip_vti.c is cloned of net/ipv4/ipip.c 17 18 For comments look at net/ipv4/ip_gre.c --ANK 19 */ 20 21 22 #include <linux/capability.h> 23 #include <linux/module.h> 24 #include <linux/types.h> 25 #include <linux/kernel.h> 26 #include <linux/uaccess.h> 27 #include <linux/skbuff.h> 28 #include <linux/netdevice.h> 29 #include <linux/in.h> 30 #include <linux/tcp.h> 31 #include <linux/udp.h> 32 #include <linux/if_arp.h> 33 #include <linux/mroute.h> 34 #include <linux/init.h> 35 #include <linux/netfilter_ipv4.h> 36 #include <linux/if_ether.h> 37 38 #include <net/sock.h> 39 #include <net/ip.h> 40 #include <net/icmp.h> 41 #include <net/ip_tunnels.h> 42 #include <net/inet_ecn.h> 43 #include <net/xfrm.h> 44 #include <net/net_namespace.h> 45 #include <net/netns/generic.h> 46 47 static struct rtnl_link_ops vti_link_ops __read_mostly; 48 49 static int vti_net_id __read_mostly; 50 static int vti_tunnel_init(struct net_device *dev); 51 52 static int vti_err(struct sk_buff *skb, u32 info) 53 { 54 55 /* All the routers (except for Linux) return only 56 * 8 bytes of packet payload. It means, that precise relaying of 57 * ICMP in the real Internet is absolutely infeasible. 58 */ 59 struct net *net = dev_net(skb->dev); 60 struct ip_tunnel_net *itn = net_generic(net, vti_net_id); 61 struct iphdr *iph = (struct iphdr *)skb->data; 62 const int type = icmp_hdr(skb)->type; 63 const int code = icmp_hdr(skb)->code; 64 struct ip_tunnel *t; 65 int err; 66 67 switch (type) { 68 default: 69 case ICMP_PARAMETERPROB: 70 return 0; 71 72 case ICMP_DEST_UNREACH: 73 switch (code) { 74 case ICMP_SR_FAILED: 75 case ICMP_PORT_UNREACH: 76 /* Impossible event. */ 77 return 0; 78 default: 79 /* All others are translated to HOST_UNREACH. */ 80 break; 81 } 82 break; 83 case ICMP_TIME_EXCEEDED: 84 if (code != ICMP_EXC_TTL) 85 return 0; 86 break; 87 } 88 89 err = -ENOENT; 90 91 t = ip_tunnel_lookup(itn, skb->dev->ifindex, TUNNEL_NO_KEY, 92 iph->daddr, iph->saddr, 0); 93 if (t == NULL) 94 goto out; 95 96 if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED) { 97 ipv4_update_pmtu(skb, dev_net(skb->dev), info, 98 t->parms.link, 0, IPPROTO_IPIP, 0); 99 err = 0; 100 goto out; 101 } 102 103 err = 0; 104 if (t->parms.iph.ttl == 0 && type == ICMP_TIME_EXCEEDED) 105 goto out; 106 107 if (time_before(jiffies, t->err_time + IPTUNNEL_ERR_TIMEO)) 108 t->err_count++; 109 else 110 t->err_count = 1; 111 t->err_time = jiffies; 112 out: 113 return err; 114 } 115 116 /* We dont digest the packet therefore let the packet pass */ 117 static int vti_rcv(struct sk_buff *skb) 118 { 119 struct ip_tunnel *tunnel; 120 const struct iphdr *iph = ip_hdr(skb); 121 struct net *net = dev_net(skb->dev); 122 struct ip_tunnel_net *itn = net_generic(net, vti_net_id); 123 124 tunnel = ip_tunnel_lookup(itn, skb->dev->ifindex, TUNNEL_NO_KEY, 125 iph->saddr, iph->daddr, 0); 126 if (tunnel != NULL) { 127 struct pcpu_tstats *tstats; 128 u32 oldmark = skb->mark; 129 int ret; 130 131 132 /* temporarily mark the skb with the tunnel o_key, to 133 * only match policies with this mark. 134 */ 135 skb->mark = be32_to_cpu(tunnel->parms.o_key); 136 ret = xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb); 137 skb->mark = oldmark; 138 if (!ret) 139 return -1; 140 141 tstats = this_cpu_ptr(tunnel->dev->tstats); 142 u64_stats_update_begin(&tstats->syncp); 143 tstats->rx_packets++; 144 tstats->rx_bytes += skb->len; 145 u64_stats_update_end(&tstats->syncp); 146 147 secpath_reset(skb); 148 skb->dev = tunnel->dev; 149 return 1; 150 } 151 152 return -1; 153 } 154 155 /* This function assumes it is being called from dev_queue_xmit() 156 * and that skb is filled properly by that function. 157 */ 158 159 static netdev_tx_t vti_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) 160 { 161 struct ip_tunnel *tunnel = netdev_priv(dev); 162 struct iphdr *tiph = &tunnel->parms.iph; 163 u8 tos; 164 struct rtable *rt; /* Route to the other host */ 165 struct net_device *tdev; /* Device to other host */ 166 struct iphdr *old_iph = ip_hdr(skb); 167 __be32 dst = tiph->daddr; 168 struct flowi4 fl4; 169 int err; 170 171 if (skb->protocol != htons(ETH_P_IP)) 172 goto tx_error; 173 174 tos = old_iph->tos; 175 176 memset(&fl4, 0, sizeof(fl4)); 177 flowi4_init_output(&fl4, tunnel->parms.link, 178 be32_to_cpu(tunnel->parms.o_key), RT_TOS(tos), 179 RT_SCOPE_UNIVERSE, 180 IPPROTO_IPIP, 0, 181 dst, tiph->saddr, 0, 0); 182 rt = ip_route_output_key(dev_net(dev), &fl4); 183 if (IS_ERR(rt)) { 184 dev->stats.tx_carrier_errors++; 185 goto tx_error_icmp; 186 } 187 /* if there is no transform then this tunnel is not functional. 188 * Or if the xfrm is not mode tunnel. 189 */ 190 if (!rt->dst.xfrm || 191 rt->dst.xfrm->props.mode != XFRM_MODE_TUNNEL) { 192 dev->stats.tx_carrier_errors++; 193 goto tx_error_icmp; 194 } 195 tdev = rt->dst.dev; 196 197 if (tdev == dev) { 198 ip_rt_put(rt); 199 dev->stats.collisions++; 200 goto tx_error; 201 } 202 203 if (tunnel->err_count > 0) { 204 if (time_before(jiffies, 205 tunnel->err_time + IPTUNNEL_ERR_TIMEO)) { 206 tunnel->err_count--; 207 dst_link_failure(skb); 208 } else 209 tunnel->err_count = 0; 210 } 211 212 memset(IPCB(skb), 0, sizeof(*IPCB(skb))); 213 skb_dst_drop(skb); 214 skb_dst_set(skb, &rt->dst); 215 nf_reset(skb); 216 skb->dev = skb_dst(skb)->dev; 217 218 err = dst_output(skb); 219 if (net_xmit_eval(err) == 0) 220 err = skb->len; 221 iptunnel_xmit_stats(err, &dev->stats, dev->tstats); 222 return NETDEV_TX_OK; 223 224 tx_error_icmp: 225 dst_link_failure(skb); 226 tx_error: 227 dev->stats.tx_errors++; 228 dev_kfree_skb(skb); 229 return NETDEV_TX_OK; 230 } 231 232 static int 233 vti_tunnel_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) 234 { 235 int err = 0; 236 struct ip_tunnel_parm p; 237 238 if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) 239 return -EFAULT; 240 241 if (cmd == SIOCADDTUNNEL || cmd == SIOCCHGTUNNEL) { 242 if (p.iph.version != 4 || p.iph.protocol != IPPROTO_IPIP || 243 p.iph.ihl != 5) 244 return -EINVAL; 245 } 246 247 err = ip_tunnel_ioctl(dev, &p, cmd); 248 if (err) 249 return err; 250 251 if (cmd != SIOCDELTUNNEL) { 252 p.i_flags |= GRE_KEY | VTI_ISVTI; 253 p.o_flags |= GRE_KEY; 254 } 255 256 if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p))) 257 return -EFAULT; 258 return 0; 259 } 260 261 static const struct net_device_ops vti_netdev_ops = { 262 .ndo_init = vti_tunnel_init, 263 .ndo_uninit = ip_tunnel_uninit, 264 .ndo_start_xmit = vti_tunnel_xmit, 265 .ndo_do_ioctl = vti_tunnel_ioctl, 266 .ndo_change_mtu = ip_tunnel_change_mtu, 267 .ndo_get_stats64 = ip_tunnel_get_stats64, 268 }; 269 270 static void vti_tunnel_setup(struct net_device *dev) 271 { 272 dev->netdev_ops = &vti_netdev_ops; 273 ip_tunnel_setup(dev, vti_net_id); 274 } 275 276 static int vti_tunnel_init(struct net_device *dev) 277 { 278 struct ip_tunnel *tunnel = netdev_priv(dev); 279 struct iphdr *iph = &tunnel->parms.iph; 280 281 memcpy(dev->dev_addr, &iph->saddr, 4); 282 memcpy(dev->broadcast, &iph->daddr, 4); 283 284 dev->type = ARPHRD_TUNNEL; 285 dev->hard_header_len = LL_MAX_HEADER + sizeof(struct iphdr); 286 dev->mtu = ETH_DATA_LEN; 287 dev->flags = IFF_NOARP; 288 dev->iflink = 0; 289 dev->addr_len = 4; 290 dev->features |= NETIF_F_NETNS_LOCAL; 291 dev->features |= NETIF_F_LLTX; 292 dev->priv_flags &= ~IFF_XMIT_DST_RELEASE; 293 294 return ip_tunnel_init(dev); 295 } 296 297 static void __net_init vti_fb_tunnel_init(struct net_device *dev) 298 { 299 struct ip_tunnel *tunnel = netdev_priv(dev); 300 struct iphdr *iph = &tunnel->parms.iph; 301 302 iph->version = 4; 303 iph->protocol = IPPROTO_IPIP; 304 iph->ihl = 5; 305 } 306 307 static struct xfrm_tunnel vti_handler __read_mostly = { 308 .handler = vti_rcv, 309 .err_handler = vti_err, 310 .priority = 1, 311 }; 312 313 static int __net_init vti_init_net(struct net *net) 314 { 315 int err; 316 struct ip_tunnel_net *itn; 317 318 err = ip_tunnel_init_net(net, vti_net_id, &vti_link_ops, "ip_vti0"); 319 if (err) 320 return err; 321 itn = net_generic(net, vti_net_id); 322 vti_fb_tunnel_init(itn->fb_tunnel_dev); 323 return 0; 324 } 325 326 static void __net_exit vti_exit_net(struct net *net) 327 { 328 struct ip_tunnel_net *itn = net_generic(net, vti_net_id); 329 ip_tunnel_delete_net(itn, &vti_link_ops); 330 } 331 332 static struct pernet_operations vti_net_ops = { 333 .init = vti_init_net, 334 .exit = vti_exit_net, 335 .id = &vti_net_id, 336 .size = sizeof(struct ip_tunnel_net), 337 }; 338 339 static int vti_tunnel_validate(struct nlattr *tb[], struct nlattr *data[]) 340 { 341 return 0; 342 } 343 344 static void vti_netlink_parms(struct nlattr *data[], 345 struct ip_tunnel_parm *parms) 346 { 347 memset(parms, 0, sizeof(*parms)); 348 349 parms->iph.protocol = IPPROTO_IPIP; 350 351 if (!data) 352 return; 353 354 if (data[IFLA_VTI_LINK]) 355 parms->link = nla_get_u32(data[IFLA_VTI_LINK]); 356 357 if (data[IFLA_VTI_IKEY]) 358 parms->i_key = nla_get_be32(data[IFLA_VTI_IKEY]); 359 360 if (data[IFLA_VTI_OKEY]) 361 parms->o_key = nla_get_be32(data[IFLA_VTI_OKEY]); 362 363 if (data[IFLA_VTI_LOCAL]) 364 parms->iph.saddr = nla_get_be32(data[IFLA_VTI_LOCAL]); 365 366 if (data[IFLA_VTI_REMOTE]) 367 parms->iph.daddr = nla_get_be32(data[IFLA_VTI_REMOTE]); 368 369 } 370 371 static int vti_newlink(struct net *src_net, struct net_device *dev, 372 struct nlattr *tb[], struct nlattr *data[]) 373 { 374 struct ip_tunnel_parm parms; 375 376 vti_netlink_parms(data, &parms); 377 return ip_tunnel_newlink(dev, tb, &parms); 378 } 379 380 static int vti_changelink(struct net_device *dev, struct nlattr *tb[], 381 struct nlattr *data[]) 382 { 383 struct ip_tunnel_parm p; 384 385 vti_netlink_parms(data, &p); 386 return ip_tunnel_changelink(dev, tb, &p); 387 } 388 389 static size_t vti_get_size(const struct net_device *dev) 390 { 391 return 392 /* IFLA_VTI_LINK */ 393 nla_total_size(4) + 394 /* IFLA_VTI_IKEY */ 395 nla_total_size(4) + 396 /* IFLA_VTI_OKEY */ 397 nla_total_size(4) + 398 /* IFLA_VTI_LOCAL */ 399 nla_total_size(4) + 400 /* IFLA_VTI_REMOTE */ 401 nla_total_size(4) + 402 0; 403 } 404 405 static int vti_fill_info(struct sk_buff *skb, const struct net_device *dev) 406 { 407 struct ip_tunnel *t = netdev_priv(dev); 408 struct ip_tunnel_parm *p = &t->parms; 409 410 nla_put_u32(skb, IFLA_VTI_LINK, p->link); 411 nla_put_be32(skb, IFLA_VTI_IKEY, p->i_key); 412 nla_put_be32(skb, IFLA_VTI_OKEY, p->o_key); 413 nla_put_be32(skb, IFLA_VTI_LOCAL, p->iph.saddr); 414 nla_put_be32(skb, IFLA_VTI_REMOTE, p->iph.daddr); 415 416 return 0; 417 } 418 419 static const struct nla_policy vti_policy[IFLA_VTI_MAX + 1] = { 420 [IFLA_VTI_LINK] = { .type = NLA_U32 }, 421 [IFLA_VTI_IKEY] = { .type = NLA_U32 }, 422 [IFLA_VTI_OKEY] = { .type = NLA_U32 }, 423 [IFLA_VTI_LOCAL] = { .len = FIELD_SIZEOF(struct iphdr, saddr) }, 424 [IFLA_VTI_REMOTE] = { .len = FIELD_SIZEOF(struct iphdr, daddr) }, 425 }; 426 427 static struct rtnl_link_ops vti_link_ops __read_mostly = { 428 .kind = "vti", 429 .maxtype = IFLA_VTI_MAX, 430 .policy = vti_policy, 431 .priv_size = sizeof(struct ip_tunnel), 432 .setup = vti_tunnel_setup, 433 .validate = vti_tunnel_validate, 434 .newlink = vti_newlink, 435 .changelink = vti_changelink, 436 .get_size = vti_get_size, 437 .fill_info = vti_fill_info, 438 }; 439 440 static int __init vti_init(void) 441 { 442 int err; 443 444 pr_info("IPv4 over IPSec tunneling driver\n"); 445 446 err = register_pernet_device(&vti_net_ops); 447 if (err < 0) 448 return err; 449 err = xfrm4_mode_tunnel_input_register(&vti_handler); 450 if (err < 0) { 451 unregister_pernet_device(&vti_net_ops); 452 pr_info("vti init: can't register tunnel\n"); 453 } 454 455 err = rtnl_link_register(&vti_link_ops); 456 if (err < 0) 457 goto rtnl_link_failed; 458 459 return err; 460 461 rtnl_link_failed: 462 xfrm4_mode_tunnel_input_deregister(&vti_handler); 463 unregister_pernet_device(&vti_net_ops); 464 return err; 465 } 466 467 static void __exit vti_fini(void) 468 { 469 rtnl_link_unregister(&vti_link_ops); 470 if (xfrm4_mode_tunnel_input_deregister(&vti_handler)) 471 pr_info("vti close: can't deregister tunnel\n"); 472 473 unregister_pernet_device(&vti_net_ops); 474 } 475 476 module_init(vti_init); 477 module_exit(vti_fini); 478 MODULE_LICENSE("GPL"); 479 MODULE_ALIAS_RTNL_LINK("vti"); 480 MODULE_ALIAS_NETDEV("ip_vti0"); 481