xref: /openbmc/linux/net/ipv4/ip_vti.c (revision 3ca9760fdfa411f7e5db54b3437fbb858d2ec825)
1  /*
2   *	Linux NET3: IP/IP protocol decoder modified to support
3   *		    virtual tunnel interface
4   *
5   *	Authors:
6   *		Saurabh Mohan (saurabh.mohan@vyatta.com) 05/07/2012
7   *
8   *	This program is free software; you can redistribute it and/or
9   *	modify it under the terms of the GNU General Public License
10   *	as published by the Free Software Foundation; either version
11   *	2 of the License, or (at your option) any later version.
12   *
13   */
14  
15  /*
16     This version of net/ipv4/ip_vti.c is cloned of net/ipv4/ipip.c
17  
18     For comments look at net/ipv4/ip_gre.c --ANK
19   */
20  
21  
22  #include <linux/capability.h>
23  #include <linux/module.h>
24  #include <linux/types.h>
25  #include <linux/kernel.h>
26  #include <linux/uaccess.h>
27  #include <linux/skbuff.h>
28  #include <linux/netdevice.h>
29  #include <linux/in.h>
30  #include <linux/tcp.h>
31  #include <linux/udp.h>
32  #include <linux/if_arp.h>
33  #include <linux/init.h>
34  #include <linux/netfilter_ipv4.h>
35  #include <linux/if_ether.h>
36  #include <linux/icmpv6.h>
37  
38  #include <net/sock.h>
39  #include <net/ip.h>
40  #include <net/icmp.h>
41  #include <net/ip_tunnels.h>
42  #include <net/inet_ecn.h>
43  #include <net/xfrm.h>
44  #include <net/net_namespace.h>
45  #include <net/netns/generic.h>
46  
47  static struct rtnl_link_ops vti_link_ops __read_mostly;
48  
49  static int vti_net_id __read_mostly;
50  static int vti_tunnel_init(struct net_device *dev);
51  
52  static int vti_input(struct sk_buff *skb, int nexthdr, __be32 spi,
53  		     int encap_type)
54  {
55  	struct ip_tunnel *tunnel;
56  	const struct iphdr *iph = ip_hdr(skb);
57  	struct net *net = dev_net(skb->dev);
58  	struct ip_tunnel_net *itn = net_generic(net, vti_net_id);
59  
60  	tunnel = ip_tunnel_lookup(itn, skb->dev->ifindex, TUNNEL_NO_KEY,
61  				  iph->saddr, iph->daddr, 0);
62  	if (tunnel) {
63  		if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb))
64  			goto drop;
65  
66  		XFRM_TUNNEL_SKB_CB(skb)->tunnel.ip4 = tunnel;
67  
68  		return xfrm_input(skb, nexthdr, spi, encap_type);
69  	}
70  
71  	return -EINVAL;
72  drop:
73  	kfree_skb(skb);
74  	return 0;
75  }
76  
77  static int vti_rcv(struct sk_buff *skb)
78  {
79  	XFRM_SPI_SKB_CB(skb)->family = AF_INET;
80  	XFRM_SPI_SKB_CB(skb)->daddroff = offsetof(struct iphdr, daddr);
81  
82  	return vti_input(skb, ip_hdr(skb)->protocol, 0, 0);
83  }
84  
85  static int vti_rcv_cb(struct sk_buff *skb, int err)
86  {
87  	unsigned short family;
88  	struct net_device *dev;
89  	struct pcpu_sw_netstats *tstats;
90  	struct xfrm_state *x;
91  	struct ip_tunnel *tunnel = XFRM_TUNNEL_SKB_CB(skb)->tunnel.ip4;
92  	u32 orig_mark = skb->mark;
93  	int ret;
94  
95  	if (!tunnel)
96  		return 1;
97  
98  	dev = tunnel->dev;
99  
100  	if (err) {
101  		dev->stats.rx_errors++;
102  		dev->stats.rx_dropped++;
103  
104  		return 0;
105  	}
106  
107  	x = xfrm_input_state(skb);
108  	family = x->inner_mode->afinfo->family;
109  
110  	skb->mark = be32_to_cpu(tunnel->parms.i_key);
111  	ret = xfrm_policy_check(NULL, XFRM_POLICY_IN, skb, family);
112  	skb->mark = orig_mark;
113  
114  	if (!ret)
115  		return -EPERM;
116  
117  	skb_scrub_packet(skb, !net_eq(tunnel->net, dev_net(skb->dev)));
118  	skb->dev = dev;
119  
120  	tstats = this_cpu_ptr(dev->tstats);
121  
122  	u64_stats_update_begin(&tstats->syncp);
123  	tstats->rx_packets++;
124  	tstats->rx_bytes += skb->len;
125  	u64_stats_update_end(&tstats->syncp);
126  
127  	return 0;
128  }
129  
130  static bool vti_state_check(const struct xfrm_state *x, __be32 dst, __be32 src)
131  {
132  	xfrm_address_t *daddr = (xfrm_address_t *)&dst;
133  	xfrm_address_t *saddr = (xfrm_address_t *)&src;
134  
135  	/* if there is no transform then this tunnel is not functional.
136  	 * Or if the xfrm is not mode tunnel.
137  	 */
138  	if (!x || x->props.mode != XFRM_MODE_TUNNEL ||
139  	    x->props.family != AF_INET)
140  		return false;
141  
142  	if (!dst)
143  		return xfrm_addr_equal(saddr, &x->props.saddr, AF_INET);
144  
145  	if (!xfrm_state_addr_check(x, daddr, saddr, AF_INET))
146  		return false;
147  
148  	return true;
149  }
150  
151  static netdev_tx_t vti_xmit(struct sk_buff *skb, struct net_device *dev,
152  			    struct flowi *fl)
153  {
154  	struct ip_tunnel *tunnel = netdev_priv(dev);
155  	struct ip_tunnel_parm *parms = &tunnel->parms;
156  	struct dst_entry *dst = skb_dst(skb);
157  	struct net_device *tdev;	/* Device to other host */
158  	int err;
159  	int mtu;
160  
161  	if (!dst) {
162  		dev->stats.tx_carrier_errors++;
163  		goto tx_error_icmp;
164  	}
165  
166  	dst_hold(dst);
167  	dst = xfrm_lookup(tunnel->net, dst, fl, NULL, 0);
168  	if (IS_ERR(dst)) {
169  		dev->stats.tx_carrier_errors++;
170  		goto tx_error_icmp;
171  	}
172  
173  	if (!vti_state_check(dst->xfrm, parms->iph.daddr, parms->iph.saddr)) {
174  		dev->stats.tx_carrier_errors++;
175  		dst_release(dst);
176  		goto tx_error_icmp;
177  	}
178  
179  	tdev = dst->dev;
180  
181  	if (tdev == dev) {
182  		dst_release(dst);
183  		dev->stats.collisions++;
184  		goto tx_error;
185  	}
186  
187  	if (tunnel->err_count > 0) {
188  		if (time_before(jiffies,
189  				tunnel->err_time + IPTUNNEL_ERR_TIMEO)) {
190  			tunnel->err_count--;
191  			dst_link_failure(skb);
192  		} else
193  			tunnel->err_count = 0;
194  	}
195  
196  	mtu = dst_mtu(dst);
197  	if (skb->len > mtu) {
198  		skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, skb, mtu);
199  		if (skb->protocol == htons(ETH_P_IP)) {
200  			icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED,
201  				  htonl(mtu));
202  		} else {
203  			if (mtu < IPV6_MIN_MTU)
204  				mtu = IPV6_MIN_MTU;
205  
206  			icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
207  		}
208  
209  		dst_release(dst);
210  		goto tx_error;
211  	}
212  
213  	skb_scrub_packet(skb, !net_eq(tunnel->net, dev_net(dev)));
214  	skb_dst_set(skb, dst);
215  	skb->dev = skb_dst(skb)->dev;
216  
217  	err = dst_output(tunnel->net, skb->sk, skb);
218  	if (net_xmit_eval(err) == 0)
219  		err = skb->len;
220  	iptunnel_xmit_stats(dev, err);
221  	return NETDEV_TX_OK;
222  
223  tx_error_icmp:
224  	dst_link_failure(skb);
225  tx_error:
226  	dev->stats.tx_errors++;
227  	kfree_skb(skb);
228  	return NETDEV_TX_OK;
229  }
230  
231  /* This function assumes it is being called from dev_queue_xmit()
232   * and that skb is filled properly by that function.
233   */
234  static netdev_tx_t vti_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
235  {
236  	struct ip_tunnel *tunnel = netdev_priv(dev);
237  	struct flowi fl;
238  
239  	memset(&fl, 0, sizeof(fl));
240  
241  	switch (skb->protocol) {
242  	case htons(ETH_P_IP):
243  		xfrm_decode_session(skb, &fl, AF_INET);
244  		memset(IPCB(skb), 0, sizeof(*IPCB(skb)));
245  		break;
246  	case htons(ETH_P_IPV6):
247  		xfrm_decode_session(skb, &fl, AF_INET6);
248  		memset(IP6CB(skb), 0, sizeof(*IP6CB(skb)));
249  		break;
250  	default:
251  		dev->stats.tx_errors++;
252  		dev_kfree_skb(skb);
253  		return NETDEV_TX_OK;
254  	}
255  
256  	/* override mark with tunnel output key */
257  	fl.flowi_mark = be32_to_cpu(tunnel->parms.o_key);
258  
259  	return vti_xmit(skb, dev, &fl);
260  }
261  
262  static int vti4_err(struct sk_buff *skb, u32 info)
263  {
264  	__be32 spi;
265  	__u32 mark;
266  	struct xfrm_state *x;
267  	struct ip_tunnel *tunnel;
268  	struct ip_esp_hdr *esph;
269  	struct ip_auth_hdr *ah ;
270  	struct ip_comp_hdr *ipch;
271  	struct net *net = dev_net(skb->dev);
272  	const struct iphdr *iph = (const struct iphdr *)skb->data;
273  	int protocol = iph->protocol;
274  	struct ip_tunnel_net *itn = net_generic(net, vti_net_id);
275  
276  	tunnel = ip_tunnel_lookup(itn, skb->dev->ifindex, TUNNEL_NO_KEY,
277  				  iph->daddr, iph->saddr, 0);
278  	if (!tunnel)
279  		return -1;
280  
281  	mark = be32_to_cpu(tunnel->parms.o_key);
282  
283  	switch (protocol) {
284  	case IPPROTO_ESP:
285  		esph = (struct ip_esp_hdr *)(skb->data+(iph->ihl<<2));
286  		spi = esph->spi;
287  		break;
288  	case IPPROTO_AH:
289  		ah = (struct ip_auth_hdr *)(skb->data+(iph->ihl<<2));
290  		spi = ah->spi;
291  		break;
292  	case IPPROTO_COMP:
293  		ipch = (struct ip_comp_hdr *)(skb->data+(iph->ihl<<2));
294  		spi = htonl(ntohs(ipch->cpi));
295  		break;
296  	default:
297  		return 0;
298  	}
299  
300  	switch (icmp_hdr(skb)->type) {
301  	case ICMP_DEST_UNREACH:
302  		if (icmp_hdr(skb)->code != ICMP_FRAG_NEEDED)
303  			return 0;
304  	case ICMP_REDIRECT:
305  		break;
306  	default:
307  		return 0;
308  	}
309  
310  	x = xfrm_state_lookup(net, mark, (const xfrm_address_t *)&iph->daddr,
311  			      spi, protocol, AF_INET);
312  	if (!x)
313  		return 0;
314  
315  	if (icmp_hdr(skb)->type == ICMP_DEST_UNREACH)
316  		ipv4_update_pmtu(skb, net, info, 0, 0, protocol, 0);
317  	else
318  		ipv4_redirect(skb, net, 0, 0, protocol, 0);
319  	xfrm_state_put(x);
320  
321  	return 0;
322  }
323  
324  static int
325  vti_tunnel_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
326  {
327  	int err = 0;
328  	struct ip_tunnel_parm p;
329  
330  	if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
331  		return -EFAULT;
332  
333  	if (cmd == SIOCADDTUNNEL || cmd == SIOCCHGTUNNEL) {
334  		if (p.iph.version != 4 || p.iph.protocol != IPPROTO_IPIP ||
335  		    p.iph.ihl != 5)
336  			return -EINVAL;
337  	}
338  
339  	if (!(p.i_flags & GRE_KEY))
340  		p.i_key = 0;
341  	if (!(p.o_flags & GRE_KEY))
342  		p.o_key = 0;
343  
344  	p.i_flags = VTI_ISVTI;
345  
346  	err = ip_tunnel_ioctl(dev, &p, cmd);
347  	if (err)
348  		return err;
349  
350  	if (cmd != SIOCDELTUNNEL) {
351  		p.i_flags |= GRE_KEY;
352  		p.o_flags |= GRE_KEY;
353  	}
354  
355  	if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p)))
356  		return -EFAULT;
357  	return 0;
358  }
359  
360  static const struct net_device_ops vti_netdev_ops = {
361  	.ndo_init	= vti_tunnel_init,
362  	.ndo_uninit	= ip_tunnel_uninit,
363  	.ndo_start_xmit	= vti_tunnel_xmit,
364  	.ndo_do_ioctl	= vti_tunnel_ioctl,
365  	.ndo_change_mtu	= ip_tunnel_change_mtu,
366  	.ndo_get_stats64 = ip_tunnel_get_stats64,
367  	.ndo_get_iflink = ip_tunnel_get_iflink,
368  };
369  
370  static void vti_tunnel_setup(struct net_device *dev)
371  {
372  	dev->netdev_ops		= &vti_netdev_ops;
373  	dev->type		= ARPHRD_TUNNEL;
374  	ip_tunnel_setup(dev, vti_net_id);
375  }
376  
377  static int vti_tunnel_init(struct net_device *dev)
378  {
379  	struct ip_tunnel *tunnel = netdev_priv(dev);
380  	struct iphdr *iph = &tunnel->parms.iph;
381  
382  	memcpy(dev->dev_addr, &iph->saddr, 4);
383  	memcpy(dev->broadcast, &iph->daddr, 4);
384  
385  	dev->hard_header_len	= LL_MAX_HEADER + sizeof(struct iphdr);
386  	dev->mtu		= ETH_DATA_LEN;
387  	dev->flags		= IFF_NOARP;
388  	dev->addr_len		= 4;
389  	dev->features		|= NETIF_F_LLTX;
390  	netif_keep_dst(dev);
391  
392  	return ip_tunnel_init(dev);
393  }
394  
395  static void __net_init vti_fb_tunnel_init(struct net_device *dev)
396  {
397  	struct ip_tunnel *tunnel = netdev_priv(dev);
398  	struct iphdr *iph = &tunnel->parms.iph;
399  
400  	iph->version		= 4;
401  	iph->protocol		= IPPROTO_IPIP;
402  	iph->ihl		= 5;
403  }
404  
405  static struct xfrm4_protocol vti_esp4_protocol __read_mostly = {
406  	.handler	=	vti_rcv,
407  	.input_handler	=	vti_input,
408  	.cb_handler	=	vti_rcv_cb,
409  	.err_handler	=	vti4_err,
410  	.priority	=	100,
411  };
412  
413  static struct xfrm4_protocol vti_ah4_protocol __read_mostly = {
414  	.handler	=	vti_rcv,
415  	.input_handler	=	vti_input,
416  	.cb_handler	=	vti_rcv_cb,
417  	.err_handler	=	vti4_err,
418  	.priority	=	100,
419  };
420  
421  static struct xfrm4_protocol vti_ipcomp4_protocol __read_mostly = {
422  	.handler	=	vti_rcv,
423  	.input_handler	=	vti_input,
424  	.cb_handler	=	vti_rcv_cb,
425  	.err_handler	=	vti4_err,
426  	.priority	=	100,
427  };
428  
429  static int __net_init vti_init_net(struct net *net)
430  {
431  	int err;
432  	struct ip_tunnel_net *itn;
433  
434  	err = ip_tunnel_init_net(net, vti_net_id, &vti_link_ops, "ip_vti0");
435  	if (err)
436  		return err;
437  	itn = net_generic(net, vti_net_id);
438  	vti_fb_tunnel_init(itn->fb_tunnel_dev);
439  	return 0;
440  }
441  
442  static void __net_exit vti_exit_net(struct net *net)
443  {
444  	struct ip_tunnel_net *itn = net_generic(net, vti_net_id);
445  	ip_tunnel_delete_net(itn, &vti_link_ops);
446  }
447  
448  static struct pernet_operations vti_net_ops = {
449  	.init = vti_init_net,
450  	.exit = vti_exit_net,
451  	.id   = &vti_net_id,
452  	.size = sizeof(struct ip_tunnel_net),
453  };
454  
455  static int vti_tunnel_validate(struct nlattr *tb[], struct nlattr *data[])
456  {
457  	return 0;
458  }
459  
460  static void vti_netlink_parms(struct nlattr *data[],
461  			      struct ip_tunnel_parm *parms)
462  {
463  	memset(parms, 0, sizeof(*parms));
464  
465  	parms->iph.protocol = IPPROTO_IPIP;
466  
467  	if (!data)
468  		return;
469  
470  	parms->i_flags = VTI_ISVTI;
471  
472  	if (data[IFLA_VTI_LINK])
473  		parms->link = nla_get_u32(data[IFLA_VTI_LINK]);
474  
475  	if (data[IFLA_VTI_IKEY])
476  		parms->i_key = nla_get_be32(data[IFLA_VTI_IKEY]);
477  
478  	if (data[IFLA_VTI_OKEY])
479  		parms->o_key = nla_get_be32(data[IFLA_VTI_OKEY]);
480  
481  	if (data[IFLA_VTI_LOCAL])
482  		parms->iph.saddr = nla_get_in_addr(data[IFLA_VTI_LOCAL]);
483  
484  	if (data[IFLA_VTI_REMOTE])
485  		parms->iph.daddr = nla_get_in_addr(data[IFLA_VTI_REMOTE]);
486  
487  }
488  
489  static int vti_newlink(struct net *src_net, struct net_device *dev,
490  		       struct nlattr *tb[], struct nlattr *data[])
491  {
492  	struct ip_tunnel_parm parms;
493  
494  	vti_netlink_parms(data, &parms);
495  	return ip_tunnel_newlink(dev, tb, &parms);
496  }
497  
498  static int vti_changelink(struct net_device *dev, struct nlattr *tb[],
499  			  struct nlattr *data[])
500  {
501  	struct ip_tunnel_parm p;
502  
503  	vti_netlink_parms(data, &p);
504  	return ip_tunnel_changelink(dev, tb, &p);
505  }
506  
507  static size_t vti_get_size(const struct net_device *dev)
508  {
509  	return
510  		/* IFLA_VTI_LINK */
511  		nla_total_size(4) +
512  		/* IFLA_VTI_IKEY */
513  		nla_total_size(4) +
514  		/* IFLA_VTI_OKEY */
515  		nla_total_size(4) +
516  		/* IFLA_VTI_LOCAL */
517  		nla_total_size(4) +
518  		/* IFLA_VTI_REMOTE */
519  		nla_total_size(4) +
520  		0;
521  }
522  
523  static int vti_fill_info(struct sk_buff *skb, const struct net_device *dev)
524  {
525  	struct ip_tunnel *t = netdev_priv(dev);
526  	struct ip_tunnel_parm *p = &t->parms;
527  
528  	nla_put_u32(skb, IFLA_VTI_LINK, p->link);
529  	nla_put_be32(skb, IFLA_VTI_IKEY, p->i_key);
530  	nla_put_be32(skb, IFLA_VTI_OKEY, p->o_key);
531  	nla_put_in_addr(skb, IFLA_VTI_LOCAL, p->iph.saddr);
532  	nla_put_in_addr(skb, IFLA_VTI_REMOTE, p->iph.daddr);
533  
534  	return 0;
535  }
536  
537  static const struct nla_policy vti_policy[IFLA_VTI_MAX + 1] = {
538  	[IFLA_VTI_LINK]		= { .type = NLA_U32 },
539  	[IFLA_VTI_IKEY]		= { .type = NLA_U32 },
540  	[IFLA_VTI_OKEY]		= { .type = NLA_U32 },
541  	[IFLA_VTI_LOCAL]	= { .len = FIELD_SIZEOF(struct iphdr, saddr) },
542  	[IFLA_VTI_REMOTE]	= { .len = FIELD_SIZEOF(struct iphdr, daddr) },
543  };
544  
545  static struct rtnl_link_ops vti_link_ops __read_mostly = {
546  	.kind		= "vti",
547  	.maxtype	= IFLA_VTI_MAX,
548  	.policy		= vti_policy,
549  	.priv_size	= sizeof(struct ip_tunnel),
550  	.setup		= vti_tunnel_setup,
551  	.validate	= vti_tunnel_validate,
552  	.newlink	= vti_newlink,
553  	.changelink	= vti_changelink,
554  	.dellink        = ip_tunnel_dellink,
555  	.get_size	= vti_get_size,
556  	.fill_info	= vti_fill_info,
557  	.get_link_net	= ip_tunnel_get_link_net,
558  };
559  
560  static int __init vti_init(void)
561  {
562  	const char *msg;
563  	int err;
564  
565  	pr_info("IPv4 over IPsec tunneling driver\n");
566  
567  	msg = "tunnel device";
568  	err = register_pernet_device(&vti_net_ops);
569  	if (err < 0)
570  		goto pernet_dev_failed;
571  
572  	msg = "tunnel protocols";
573  	err = xfrm4_protocol_register(&vti_esp4_protocol, IPPROTO_ESP);
574  	if (err < 0)
575  		goto xfrm_proto_esp_failed;
576  	err = xfrm4_protocol_register(&vti_ah4_protocol, IPPROTO_AH);
577  	if (err < 0)
578  		goto xfrm_proto_ah_failed;
579  	err = xfrm4_protocol_register(&vti_ipcomp4_protocol, IPPROTO_COMP);
580  	if (err < 0)
581  		goto xfrm_proto_comp_failed;
582  
583  	msg = "netlink interface";
584  	err = rtnl_link_register(&vti_link_ops);
585  	if (err < 0)
586  		goto rtnl_link_failed;
587  
588  	return err;
589  
590  rtnl_link_failed:
591  	xfrm4_protocol_deregister(&vti_ipcomp4_protocol, IPPROTO_COMP);
592  xfrm_proto_comp_failed:
593  	xfrm4_protocol_deregister(&vti_ah4_protocol, IPPROTO_AH);
594  xfrm_proto_ah_failed:
595  	xfrm4_protocol_deregister(&vti_esp4_protocol, IPPROTO_ESP);
596  xfrm_proto_esp_failed:
597  	unregister_pernet_device(&vti_net_ops);
598  pernet_dev_failed:
599  	pr_err("vti init: failed to register %s\n", msg);
600  	return err;
601  }
602  
603  static void __exit vti_fini(void)
604  {
605  	rtnl_link_unregister(&vti_link_ops);
606  	xfrm4_protocol_deregister(&vti_ipcomp4_protocol, IPPROTO_COMP);
607  	xfrm4_protocol_deregister(&vti_ah4_protocol, IPPROTO_AH);
608  	xfrm4_protocol_deregister(&vti_esp4_protocol, IPPROTO_ESP);
609  	unregister_pernet_device(&vti_net_ops);
610  }
611  
612  module_init(vti_init);
613  module_exit(vti_fini);
614  MODULE_LICENSE("GPL");
615  MODULE_ALIAS_RTNL_LINK("vti");
616  MODULE_ALIAS_NETDEV("ip_vti0");
617