xref: /openbmc/linux/net/ipv4/ip_gre.c (revision 5c73cc4b6c83e88863a5de869cc5df3b913aef4a)
1 /*
2  *	Linux NET3:	GRE over IP protocol decoder.
3  *
4  *	Authors: Alexey Kuznetsov (kuznet@ms2.inr.ac.ru)
5  *
6  *	This program is free software; you can redistribute it and/or
7  *	modify it under the terms of the GNU General Public License
8  *	as published by the Free Software Foundation; either version
9  *	2 of the License, or (at your option) any later version.
10  *
11  */
12 
13 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
14 
15 #include <linux/capability.h>
16 #include <linux/module.h>
17 #include <linux/types.h>
18 #include <linux/kernel.h>
19 #include <linux/slab.h>
20 #include <asm/uaccess.h>
21 #include <linux/skbuff.h>
22 #include <linux/netdevice.h>
23 #include <linux/in.h>
24 #include <linux/tcp.h>
25 #include <linux/udp.h>
26 #include <linux/if_arp.h>
27 #include <linux/mroute.h>
28 #include <linux/init.h>
29 #include <linux/in6.h>
30 #include <linux/inetdevice.h>
31 #include <linux/igmp.h>
32 #include <linux/netfilter_ipv4.h>
33 #include <linux/etherdevice.h>
34 #include <linux/if_ether.h>
35 
36 #include <net/sock.h>
37 #include <net/ip.h>
38 #include <net/icmp.h>
39 #include <net/protocol.h>
40 #include <net/ip_tunnels.h>
41 #include <net/arp.h>
42 #include <net/checksum.h>
43 #include <net/dsfield.h>
44 #include <net/inet_ecn.h>
45 #include <net/xfrm.h>
46 #include <net/net_namespace.h>
47 #include <net/netns/generic.h>
48 #include <net/rtnetlink.h>
49 #include <net/gre.h>
50 
51 #if IS_ENABLED(CONFIG_IPV6)
52 #include <net/ipv6.h>
53 #include <net/ip6_fib.h>
54 #include <net/ip6_route.h>
55 #endif
56 
57 /*
58    Problems & solutions
59    --------------------
60 
61    1. The most important issue is detecting local dead loops.
62    They would cause complete host lockup in transmit, which
63    would be "resolved" by stack overflow or, if queueing is enabled,
64    with infinite looping in net_bh.
65 
66    We cannot track such dead loops during route installation,
67    it is infeasible task. The most general solutions would be
68    to keep skb->encapsulation counter (sort of local ttl),
69    and silently drop packet when it expires. It is a good
70    solution, but it supposes maintaining new variable in ALL
71    skb, even if no tunneling is used.
72 
73    Current solution: xmit_recursion breaks dead loops. This is a percpu
74    counter, since when we enter the first ndo_xmit(), cpu migration is
75    forbidden. We force an exit if this counter reaches RECURSION_LIMIT
76 
77    2. Networking dead loops would not kill routers, but would really
78    kill network. IP hop limit plays role of "t->recursion" in this case,
79    if we copy it from packet being encapsulated to upper header.
80    It is very good solution, but it introduces two problems:
81 
82    - Routing protocols, using packets with ttl=1 (OSPF, RIP2),
83      do not work over tunnels.
84    - traceroute does not work. I planned to relay ICMP from tunnel,
85      so that this problem would be solved and traceroute output
86      would even more informative. This idea appeared to be wrong:
87      only Linux complies to rfc1812 now (yes, guys, Linux is the only
88      true router now :-)), all routers (at least, in neighbourhood of mine)
89      return only 8 bytes of payload. It is the end.
90 
91    Hence, if we want that OSPF worked or traceroute said something reasonable,
92    we should search for another solution.
93 
94    One of them is to parse packet trying to detect inner encapsulation
95    made by our node. It is difficult or even impossible, especially,
96    taking into account fragmentation. TO be short, ttl is not solution at all.
97 
98    Current solution: The solution was UNEXPECTEDLY SIMPLE.
99    We force DF flag on tunnels with preconfigured hop limit,
100    that is ALL. :-) Well, it does not remove the problem completely,
101    but exponential growth of network traffic is changed to linear
102    (branches, that exceed pmtu are pruned) and tunnel mtu
103    rapidly degrades to value <68, where looping stops.
104    Yes, it is not good if there exists a router in the loop,
105    which does not force DF, even when encapsulating packets have DF set.
106    But it is not our problem! Nobody could accuse us, we made
107    all that we could make. Even if it is your gated who injected
108    fatal route to network, even if it were you who configured
109    fatal static route: you are innocent. :-)
110 
111    Alexey Kuznetsov.
112  */
113 
114 static bool log_ecn_error = true;
115 module_param(log_ecn_error, bool, 0644);
116 MODULE_PARM_DESC(log_ecn_error, "Log packets received with corrupted ECN");
117 
118 static struct rtnl_link_ops ipgre_link_ops __read_mostly;
119 static int ipgre_tunnel_init(struct net_device *dev);
120 
121 static int ipgre_net_id __read_mostly;
122 static int gre_tap_net_id __read_mostly;
123 
124 static int ipgre_err(struct sk_buff *skb, u32 info,
125 		     const struct tnl_ptk_info *tpi)
126 {
127 
128 	/* All the routers (except for Linux) return only
129 	   8 bytes of packet payload. It means, that precise relaying of
130 	   ICMP in the real Internet is absolutely infeasible.
131 
132 	   Moreover, Cisco "wise men" put GRE key to the third word
133 	   in GRE header. It makes impossible maintaining even soft
134 	   state for keyed GRE tunnels with enabled checksum. Tell
135 	   them "thank you".
136 
137 	   Well, I wonder, rfc1812 was written by Cisco employee,
138 	   what the hell these idiots break standards established
139 	   by themselves???
140 	   */
141 	struct net *net = dev_net(skb->dev);
142 	struct ip_tunnel_net *itn;
143 	const struct iphdr *iph;
144 	const int type = icmp_hdr(skb)->type;
145 	const int code = icmp_hdr(skb)->code;
146 	struct ip_tunnel *t;
147 
148 	switch (type) {
149 	default:
150 	case ICMP_PARAMETERPROB:
151 		return PACKET_RCVD;
152 
153 	case ICMP_DEST_UNREACH:
154 		switch (code) {
155 		case ICMP_SR_FAILED:
156 		case ICMP_PORT_UNREACH:
157 			/* Impossible event. */
158 			return PACKET_RCVD;
159 		default:
160 			/* All others are translated to HOST_UNREACH.
161 			   rfc2003 contains "deep thoughts" about NET_UNREACH,
162 			   I believe they are just ether pollution. --ANK
163 			 */
164 			break;
165 		}
166 		break;
167 	case ICMP_TIME_EXCEEDED:
168 		if (code != ICMP_EXC_TTL)
169 			return PACKET_RCVD;
170 		break;
171 
172 	case ICMP_REDIRECT:
173 		break;
174 	}
175 
176 	if (tpi->proto == htons(ETH_P_TEB))
177 		itn = net_generic(net, gre_tap_net_id);
178 	else
179 		itn = net_generic(net, ipgre_net_id);
180 
181 	iph = (const struct iphdr *)(icmp_hdr(skb) + 1);
182 	t = ip_tunnel_lookup(itn, skb->dev->ifindex, tpi->flags,
183 			     iph->daddr, iph->saddr, tpi->key);
184 
185 	if (!t)
186 		return PACKET_REJECT;
187 
188 	if (t->parms.iph.daddr == 0 ||
189 	    ipv4_is_multicast(t->parms.iph.daddr))
190 		return PACKET_RCVD;
191 
192 	if (t->parms.iph.ttl == 0 && type == ICMP_TIME_EXCEEDED)
193 		return PACKET_RCVD;
194 
195 	if (time_before(jiffies, t->err_time + IPTUNNEL_ERR_TIMEO))
196 		t->err_count++;
197 	else
198 		t->err_count = 1;
199 	t->err_time = jiffies;
200 	return PACKET_RCVD;
201 }
202 
203 static int ipgre_rcv(struct sk_buff *skb, const struct tnl_ptk_info *tpi)
204 {
205 	struct net *net = dev_net(skb->dev);
206 	struct ip_tunnel_net *itn;
207 	const struct iphdr *iph;
208 	struct ip_tunnel *tunnel;
209 
210 	if (tpi->proto == htons(ETH_P_TEB))
211 		itn = net_generic(net, gre_tap_net_id);
212 	else
213 		itn = net_generic(net, ipgre_net_id);
214 
215 	iph = ip_hdr(skb);
216 	tunnel = ip_tunnel_lookup(itn, skb->dev->ifindex, tpi->flags,
217 				  iph->saddr, iph->daddr, tpi->key);
218 
219 	if (tunnel) {
220 		skb_pop_mac_header(skb);
221 		ip_tunnel_rcv(tunnel, skb, tpi, log_ecn_error);
222 		return PACKET_RCVD;
223 	}
224 	return PACKET_REJECT;
225 }
226 
227 static void __gre_xmit(struct sk_buff *skb, struct net_device *dev,
228 		       const struct iphdr *tnl_params,
229 		       __be16 proto)
230 {
231 	struct ip_tunnel *tunnel = netdev_priv(dev);
232 	struct tnl_ptk_info tpi;
233 
234 	tpi.flags = tunnel->parms.o_flags;
235 	tpi.proto = proto;
236 	tpi.key = tunnel->parms.o_key;
237 	if (tunnel->parms.o_flags & TUNNEL_SEQ)
238 		tunnel->o_seqno++;
239 	tpi.seq = htonl(tunnel->o_seqno);
240 
241 	/* Push GRE header. */
242 	gre_build_header(skb, &tpi, tunnel->tun_hlen);
243 
244 	skb_set_inner_protocol(skb, tpi.proto);
245 
246 	ip_tunnel_xmit(skb, dev, tnl_params, tnl_params->protocol);
247 }
248 
249 static netdev_tx_t ipgre_xmit(struct sk_buff *skb,
250 			      struct net_device *dev)
251 {
252 	struct ip_tunnel *tunnel = netdev_priv(dev);
253 	const struct iphdr *tnl_params;
254 
255 	if (dev->header_ops) {
256 		/* Need space for new headers */
257 		if (skb_cow_head(skb, dev->needed_headroom -
258 				      (tunnel->hlen + sizeof(struct iphdr))))
259 			goto free_skb;
260 
261 		tnl_params = (const struct iphdr *)skb->data;
262 
263 		/* Pull skb since ip_tunnel_xmit() needs skb->data pointing
264 		 * to gre header.
265 		 */
266 		skb_pull(skb, tunnel->hlen + sizeof(struct iphdr));
267 		skb_reset_mac_header(skb);
268 	} else {
269 		if (skb_cow_head(skb, dev->needed_headroom))
270 			goto free_skb;
271 
272 		tnl_params = &tunnel->parms.iph;
273 	}
274 
275 	skb = gre_handle_offloads(skb, !!(tunnel->parms.o_flags&TUNNEL_CSUM));
276 	if (IS_ERR(skb))
277 		goto out;
278 
279 	__gre_xmit(skb, dev, tnl_params, skb->protocol);
280 
281 	return NETDEV_TX_OK;
282 
283 free_skb:
284 	kfree_skb(skb);
285 out:
286 	dev->stats.tx_dropped++;
287 	return NETDEV_TX_OK;
288 }
289 
290 static netdev_tx_t gre_tap_xmit(struct sk_buff *skb,
291 				struct net_device *dev)
292 {
293 	struct ip_tunnel *tunnel = netdev_priv(dev);
294 
295 	skb = gre_handle_offloads(skb, !!(tunnel->parms.o_flags&TUNNEL_CSUM));
296 	if (IS_ERR(skb))
297 		goto out;
298 
299 	if (skb_cow_head(skb, dev->needed_headroom))
300 		goto free_skb;
301 
302 	__gre_xmit(skb, dev, &tunnel->parms.iph, htons(ETH_P_TEB));
303 
304 	return NETDEV_TX_OK;
305 
306 free_skb:
307 	kfree_skb(skb);
308 out:
309 	dev->stats.tx_dropped++;
310 	return NETDEV_TX_OK;
311 }
312 
313 static int ipgre_tunnel_ioctl(struct net_device *dev,
314 			      struct ifreq *ifr, int cmd)
315 {
316 	int err;
317 	struct ip_tunnel_parm p;
318 
319 	if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
320 		return -EFAULT;
321 	if (cmd == SIOCADDTUNNEL || cmd == SIOCCHGTUNNEL) {
322 		if (p.iph.version != 4 || p.iph.protocol != IPPROTO_GRE ||
323 		    p.iph.ihl != 5 || (p.iph.frag_off&htons(~IP_DF)) ||
324 		    ((p.i_flags|p.o_flags)&(GRE_VERSION|GRE_ROUTING)))
325 			return -EINVAL;
326 	}
327 	p.i_flags = gre_flags_to_tnl_flags(p.i_flags);
328 	p.o_flags = gre_flags_to_tnl_flags(p.o_flags);
329 
330 	err = ip_tunnel_ioctl(dev, &p, cmd);
331 	if (err)
332 		return err;
333 
334 	p.i_flags = tnl_flags_to_gre_flags(p.i_flags);
335 	p.o_flags = tnl_flags_to_gre_flags(p.o_flags);
336 
337 	if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p)))
338 		return -EFAULT;
339 	return 0;
340 }
341 
342 /* Nice toy. Unfortunately, useless in real life :-)
343    It allows to construct virtual multiprotocol broadcast "LAN"
344    over the Internet, provided multicast routing is tuned.
345 
346 
347    I have no idea was this bicycle invented before me,
348    so that I had to set ARPHRD_IPGRE to a random value.
349    I have an impression, that Cisco could make something similar,
350    but this feature is apparently missing in IOS<=11.2(8).
351 
352    I set up 10.66.66/24 and fec0:6666:6666::0/96 as virtual networks
353    with broadcast 224.66.66.66. If you have access to mbone, play with me :-)
354 
355    ping -t 255 224.66.66.66
356 
357    If nobody answers, mbone does not work.
358 
359    ip tunnel add Universe mode gre remote 224.66.66.66 local <Your_real_addr> ttl 255
360    ip addr add 10.66.66.<somewhat>/24 dev Universe
361    ifconfig Universe up
362    ifconfig Universe add fe80::<Your_real_addr>/10
363    ifconfig Universe add fec0:6666:6666::<Your_real_addr>/96
364    ftp 10.66.66.66
365    ...
366    ftp fec0:6666:6666::193.233.7.65
367    ...
368  */
369 static int ipgre_header(struct sk_buff *skb, struct net_device *dev,
370 			unsigned short type,
371 			const void *daddr, const void *saddr, unsigned int len)
372 {
373 	struct ip_tunnel *t = netdev_priv(dev);
374 	struct iphdr *iph;
375 	struct gre_base_hdr *greh;
376 
377 	iph = (struct iphdr *)skb_push(skb, t->hlen + sizeof(*iph));
378 	greh = (struct gre_base_hdr *)(iph+1);
379 	greh->flags = tnl_flags_to_gre_flags(t->parms.o_flags);
380 	greh->protocol = htons(type);
381 
382 	memcpy(iph, &t->parms.iph, sizeof(struct iphdr));
383 
384 	/* Set the source hardware address. */
385 	if (saddr)
386 		memcpy(&iph->saddr, saddr, 4);
387 	if (daddr)
388 		memcpy(&iph->daddr, daddr, 4);
389 	if (iph->daddr)
390 		return t->hlen + sizeof(*iph);
391 
392 	return -(t->hlen + sizeof(*iph));
393 }
394 
395 static int ipgre_header_parse(const struct sk_buff *skb, unsigned char *haddr)
396 {
397 	const struct iphdr *iph = (const struct iphdr *) skb_mac_header(skb);
398 	memcpy(haddr, &iph->saddr, 4);
399 	return 4;
400 }
401 
402 static const struct header_ops ipgre_header_ops = {
403 	.create	= ipgre_header,
404 	.parse	= ipgre_header_parse,
405 };
406 
407 #ifdef CONFIG_NET_IPGRE_BROADCAST
408 static int ipgre_open(struct net_device *dev)
409 {
410 	struct ip_tunnel *t = netdev_priv(dev);
411 
412 	if (ipv4_is_multicast(t->parms.iph.daddr)) {
413 		struct flowi4 fl4;
414 		struct rtable *rt;
415 
416 		rt = ip_route_output_gre(t->net, &fl4,
417 					 t->parms.iph.daddr,
418 					 t->parms.iph.saddr,
419 					 t->parms.o_key,
420 					 RT_TOS(t->parms.iph.tos),
421 					 t->parms.link);
422 		if (IS_ERR(rt))
423 			return -EADDRNOTAVAIL;
424 		dev = rt->dst.dev;
425 		ip_rt_put(rt);
426 		if (!__in_dev_get_rtnl(dev))
427 			return -EADDRNOTAVAIL;
428 		t->mlink = dev->ifindex;
429 		ip_mc_inc_group(__in_dev_get_rtnl(dev), t->parms.iph.daddr);
430 	}
431 	return 0;
432 }
433 
434 static int ipgre_close(struct net_device *dev)
435 {
436 	struct ip_tunnel *t = netdev_priv(dev);
437 
438 	if (ipv4_is_multicast(t->parms.iph.daddr) && t->mlink) {
439 		struct in_device *in_dev;
440 		in_dev = inetdev_by_index(t->net, t->mlink);
441 		if (in_dev)
442 			ip_mc_dec_group(in_dev, t->parms.iph.daddr);
443 	}
444 	return 0;
445 }
446 #endif
447 
448 static const struct net_device_ops ipgre_netdev_ops = {
449 	.ndo_init		= ipgre_tunnel_init,
450 	.ndo_uninit		= ip_tunnel_uninit,
451 #ifdef CONFIG_NET_IPGRE_BROADCAST
452 	.ndo_open		= ipgre_open,
453 	.ndo_stop		= ipgre_close,
454 #endif
455 	.ndo_start_xmit		= ipgre_xmit,
456 	.ndo_do_ioctl		= ipgre_tunnel_ioctl,
457 	.ndo_change_mtu		= ip_tunnel_change_mtu,
458 	.ndo_get_stats64	= ip_tunnel_get_stats64,
459 	.ndo_get_iflink		= ip_tunnel_get_iflink,
460 };
461 
462 #define GRE_FEATURES (NETIF_F_SG |		\
463 		      NETIF_F_FRAGLIST |	\
464 		      NETIF_F_HIGHDMA |		\
465 		      NETIF_F_HW_CSUM)
466 
467 static void ipgre_tunnel_setup(struct net_device *dev)
468 {
469 	dev->netdev_ops		= &ipgre_netdev_ops;
470 	dev->type		= ARPHRD_IPGRE;
471 	ip_tunnel_setup(dev, ipgre_net_id);
472 }
473 
474 static void __gre_tunnel_init(struct net_device *dev)
475 {
476 	struct ip_tunnel *tunnel;
477 	int t_hlen;
478 
479 	tunnel = netdev_priv(dev);
480 	tunnel->tun_hlen = ip_gre_calc_hlen(tunnel->parms.o_flags);
481 	tunnel->parms.iph.protocol = IPPROTO_GRE;
482 
483 	tunnel->hlen = tunnel->tun_hlen + tunnel->encap_hlen;
484 
485 	t_hlen = tunnel->hlen + sizeof(struct iphdr);
486 
487 	dev->needed_headroom	= LL_MAX_HEADER + t_hlen + 4;
488 	dev->mtu		= ETH_DATA_LEN - t_hlen - 4;
489 
490 	dev->features		|= GRE_FEATURES;
491 	dev->hw_features	|= GRE_FEATURES;
492 
493 	if (!(tunnel->parms.o_flags & TUNNEL_SEQ)) {
494 		/* TCP offload with GRE SEQ is not supported. */
495 		dev->features    |= NETIF_F_GSO_SOFTWARE;
496 		dev->hw_features |= NETIF_F_GSO_SOFTWARE;
497 		/* Can use a lockless transmit, unless we generate
498 		 * output sequences
499 		 */
500 		dev->features |= NETIF_F_LLTX;
501 	}
502 }
503 
504 static int ipgre_tunnel_init(struct net_device *dev)
505 {
506 	struct ip_tunnel *tunnel = netdev_priv(dev);
507 	struct iphdr *iph = &tunnel->parms.iph;
508 
509 	__gre_tunnel_init(dev);
510 
511 	memcpy(dev->dev_addr, &iph->saddr, 4);
512 	memcpy(dev->broadcast, &iph->daddr, 4);
513 
514 	dev->flags		= IFF_NOARP;
515 	netif_keep_dst(dev);
516 	dev->addr_len		= 4;
517 
518 	if (iph->daddr) {
519 #ifdef CONFIG_NET_IPGRE_BROADCAST
520 		if (ipv4_is_multicast(iph->daddr)) {
521 			if (!iph->saddr)
522 				return -EINVAL;
523 			dev->flags = IFF_BROADCAST;
524 			dev->header_ops = &ipgre_header_ops;
525 		}
526 #endif
527 	} else
528 		dev->header_ops = &ipgre_header_ops;
529 
530 	return ip_tunnel_init(dev);
531 }
532 
533 static struct gre_cisco_protocol ipgre_protocol = {
534 	.handler        = ipgre_rcv,
535 	.err_handler    = ipgre_err,
536 	.priority       = 0,
537 };
538 
539 static int __net_init ipgre_init_net(struct net *net)
540 {
541 	return ip_tunnel_init_net(net, ipgre_net_id, &ipgre_link_ops, NULL);
542 }
543 
544 static void __net_exit ipgre_exit_net(struct net *net)
545 {
546 	struct ip_tunnel_net *itn = net_generic(net, ipgre_net_id);
547 	ip_tunnel_delete_net(itn, &ipgre_link_ops);
548 }
549 
550 static struct pernet_operations ipgre_net_ops = {
551 	.init = ipgre_init_net,
552 	.exit = ipgre_exit_net,
553 	.id   = &ipgre_net_id,
554 	.size = sizeof(struct ip_tunnel_net),
555 };
556 
557 static int ipgre_tunnel_validate(struct nlattr *tb[], struct nlattr *data[])
558 {
559 	__be16 flags;
560 
561 	if (!data)
562 		return 0;
563 
564 	flags = 0;
565 	if (data[IFLA_GRE_IFLAGS])
566 		flags |= nla_get_be16(data[IFLA_GRE_IFLAGS]);
567 	if (data[IFLA_GRE_OFLAGS])
568 		flags |= nla_get_be16(data[IFLA_GRE_OFLAGS]);
569 	if (flags & (GRE_VERSION|GRE_ROUTING))
570 		return -EINVAL;
571 
572 	return 0;
573 }
574 
575 static int ipgre_tap_validate(struct nlattr *tb[], struct nlattr *data[])
576 {
577 	__be32 daddr;
578 
579 	if (tb[IFLA_ADDRESS]) {
580 		if (nla_len(tb[IFLA_ADDRESS]) != ETH_ALEN)
581 			return -EINVAL;
582 		if (!is_valid_ether_addr(nla_data(tb[IFLA_ADDRESS])))
583 			return -EADDRNOTAVAIL;
584 	}
585 
586 	if (!data)
587 		goto out;
588 
589 	if (data[IFLA_GRE_REMOTE]) {
590 		memcpy(&daddr, nla_data(data[IFLA_GRE_REMOTE]), 4);
591 		if (!daddr)
592 			return -EINVAL;
593 	}
594 
595 out:
596 	return ipgre_tunnel_validate(tb, data);
597 }
598 
599 static void ipgre_netlink_parms(struct nlattr *data[], struct nlattr *tb[],
600 			       struct ip_tunnel_parm *parms)
601 {
602 	memset(parms, 0, sizeof(*parms));
603 
604 	parms->iph.protocol = IPPROTO_GRE;
605 
606 	if (!data)
607 		return;
608 
609 	if (data[IFLA_GRE_LINK])
610 		parms->link = nla_get_u32(data[IFLA_GRE_LINK]);
611 
612 	if (data[IFLA_GRE_IFLAGS])
613 		parms->i_flags = gre_flags_to_tnl_flags(nla_get_be16(data[IFLA_GRE_IFLAGS]));
614 
615 	if (data[IFLA_GRE_OFLAGS])
616 		parms->o_flags = gre_flags_to_tnl_flags(nla_get_be16(data[IFLA_GRE_OFLAGS]));
617 
618 	if (data[IFLA_GRE_IKEY])
619 		parms->i_key = nla_get_be32(data[IFLA_GRE_IKEY]);
620 
621 	if (data[IFLA_GRE_OKEY])
622 		parms->o_key = nla_get_be32(data[IFLA_GRE_OKEY]);
623 
624 	if (data[IFLA_GRE_LOCAL])
625 		parms->iph.saddr = nla_get_in_addr(data[IFLA_GRE_LOCAL]);
626 
627 	if (data[IFLA_GRE_REMOTE])
628 		parms->iph.daddr = nla_get_in_addr(data[IFLA_GRE_REMOTE]);
629 
630 	if (data[IFLA_GRE_TTL])
631 		parms->iph.ttl = nla_get_u8(data[IFLA_GRE_TTL]);
632 
633 	if (data[IFLA_GRE_TOS])
634 		parms->iph.tos = nla_get_u8(data[IFLA_GRE_TOS]);
635 
636 	if (!data[IFLA_GRE_PMTUDISC] || nla_get_u8(data[IFLA_GRE_PMTUDISC]))
637 		parms->iph.frag_off = htons(IP_DF);
638 }
639 
640 /* This function returns true when ENCAP attributes are present in the nl msg */
641 static bool ipgre_netlink_encap_parms(struct nlattr *data[],
642 				      struct ip_tunnel_encap *ipencap)
643 {
644 	bool ret = false;
645 
646 	memset(ipencap, 0, sizeof(*ipencap));
647 
648 	if (!data)
649 		return ret;
650 
651 	if (data[IFLA_GRE_ENCAP_TYPE]) {
652 		ret = true;
653 		ipencap->type = nla_get_u16(data[IFLA_GRE_ENCAP_TYPE]);
654 	}
655 
656 	if (data[IFLA_GRE_ENCAP_FLAGS]) {
657 		ret = true;
658 		ipencap->flags = nla_get_u16(data[IFLA_GRE_ENCAP_FLAGS]);
659 	}
660 
661 	if (data[IFLA_GRE_ENCAP_SPORT]) {
662 		ret = true;
663 		ipencap->sport = nla_get_be16(data[IFLA_GRE_ENCAP_SPORT]);
664 	}
665 
666 	if (data[IFLA_GRE_ENCAP_DPORT]) {
667 		ret = true;
668 		ipencap->dport = nla_get_be16(data[IFLA_GRE_ENCAP_DPORT]);
669 	}
670 
671 	return ret;
672 }
673 
674 static int gre_tap_init(struct net_device *dev)
675 {
676 	__gre_tunnel_init(dev);
677 	dev->priv_flags |= IFF_LIVE_ADDR_CHANGE;
678 
679 	return ip_tunnel_init(dev);
680 }
681 
682 static const struct net_device_ops gre_tap_netdev_ops = {
683 	.ndo_init		= gre_tap_init,
684 	.ndo_uninit		= ip_tunnel_uninit,
685 	.ndo_start_xmit		= gre_tap_xmit,
686 	.ndo_set_mac_address 	= eth_mac_addr,
687 	.ndo_validate_addr	= eth_validate_addr,
688 	.ndo_change_mtu		= ip_tunnel_change_mtu,
689 	.ndo_get_stats64	= ip_tunnel_get_stats64,
690 	.ndo_get_iflink		= ip_tunnel_get_iflink,
691 };
692 
693 static void ipgre_tap_setup(struct net_device *dev)
694 {
695 	ether_setup(dev);
696 	dev->netdev_ops		= &gre_tap_netdev_ops;
697 	dev->priv_flags 	|= IFF_LIVE_ADDR_CHANGE;
698 	ip_tunnel_setup(dev, gre_tap_net_id);
699 }
700 
701 static int ipgre_newlink(struct net *src_net, struct net_device *dev,
702 			 struct nlattr *tb[], struct nlattr *data[])
703 {
704 	struct ip_tunnel_parm p;
705 	struct ip_tunnel_encap ipencap;
706 
707 	if (ipgre_netlink_encap_parms(data, &ipencap)) {
708 		struct ip_tunnel *t = netdev_priv(dev);
709 		int err = ip_tunnel_encap_setup(t, &ipencap);
710 
711 		if (err < 0)
712 			return err;
713 	}
714 
715 	ipgre_netlink_parms(data, tb, &p);
716 	return ip_tunnel_newlink(dev, tb, &p);
717 }
718 
719 static int ipgre_changelink(struct net_device *dev, struct nlattr *tb[],
720 			    struct nlattr *data[])
721 {
722 	struct ip_tunnel_parm p;
723 	struct ip_tunnel_encap ipencap;
724 
725 	if (ipgre_netlink_encap_parms(data, &ipencap)) {
726 		struct ip_tunnel *t = netdev_priv(dev);
727 		int err = ip_tunnel_encap_setup(t, &ipencap);
728 
729 		if (err < 0)
730 			return err;
731 	}
732 
733 	ipgre_netlink_parms(data, tb, &p);
734 	return ip_tunnel_changelink(dev, tb, &p);
735 }
736 
737 static size_t ipgre_get_size(const struct net_device *dev)
738 {
739 	return
740 		/* IFLA_GRE_LINK */
741 		nla_total_size(4) +
742 		/* IFLA_GRE_IFLAGS */
743 		nla_total_size(2) +
744 		/* IFLA_GRE_OFLAGS */
745 		nla_total_size(2) +
746 		/* IFLA_GRE_IKEY */
747 		nla_total_size(4) +
748 		/* IFLA_GRE_OKEY */
749 		nla_total_size(4) +
750 		/* IFLA_GRE_LOCAL */
751 		nla_total_size(4) +
752 		/* IFLA_GRE_REMOTE */
753 		nla_total_size(4) +
754 		/* IFLA_GRE_TTL */
755 		nla_total_size(1) +
756 		/* IFLA_GRE_TOS */
757 		nla_total_size(1) +
758 		/* IFLA_GRE_PMTUDISC */
759 		nla_total_size(1) +
760 		/* IFLA_GRE_ENCAP_TYPE */
761 		nla_total_size(2) +
762 		/* IFLA_GRE_ENCAP_FLAGS */
763 		nla_total_size(2) +
764 		/* IFLA_GRE_ENCAP_SPORT */
765 		nla_total_size(2) +
766 		/* IFLA_GRE_ENCAP_DPORT */
767 		nla_total_size(2) +
768 		0;
769 }
770 
771 static int ipgre_fill_info(struct sk_buff *skb, const struct net_device *dev)
772 {
773 	struct ip_tunnel *t = netdev_priv(dev);
774 	struct ip_tunnel_parm *p = &t->parms;
775 
776 	if (nla_put_u32(skb, IFLA_GRE_LINK, p->link) ||
777 	    nla_put_be16(skb, IFLA_GRE_IFLAGS, tnl_flags_to_gre_flags(p->i_flags)) ||
778 	    nla_put_be16(skb, IFLA_GRE_OFLAGS, tnl_flags_to_gre_flags(p->o_flags)) ||
779 	    nla_put_be32(skb, IFLA_GRE_IKEY, p->i_key) ||
780 	    nla_put_be32(skb, IFLA_GRE_OKEY, p->o_key) ||
781 	    nla_put_in_addr(skb, IFLA_GRE_LOCAL, p->iph.saddr) ||
782 	    nla_put_in_addr(skb, IFLA_GRE_REMOTE, p->iph.daddr) ||
783 	    nla_put_u8(skb, IFLA_GRE_TTL, p->iph.ttl) ||
784 	    nla_put_u8(skb, IFLA_GRE_TOS, p->iph.tos) ||
785 	    nla_put_u8(skb, IFLA_GRE_PMTUDISC,
786 		       !!(p->iph.frag_off & htons(IP_DF))))
787 		goto nla_put_failure;
788 
789 	if (nla_put_u16(skb, IFLA_GRE_ENCAP_TYPE,
790 			t->encap.type) ||
791 	    nla_put_be16(skb, IFLA_GRE_ENCAP_SPORT,
792 			 t->encap.sport) ||
793 	    nla_put_be16(skb, IFLA_GRE_ENCAP_DPORT,
794 			 t->encap.dport) ||
795 	    nla_put_u16(skb, IFLA_GRE_ENCAP_FLAGS,
796 			t->encap.flags))
797 		goto nla_put_failure;
798 
799 	return 0;
800 
801 nla_put_failure:
802 	return -EMSGSIZE;
803 }
804 
805 static const struct nla_policy ipgre_policy[IFLA_GRE_MAX + 1] = {
806 	[IFLA_GRE_LINK]		= { .type = NLA_U32 },
807 	[IFLA_GRE_IFLAGS]	= { .type = NLA_U16 },
808 	[IFLA_GRE_OFLAGS]	= { .type = NLA_U16 },
809 	[IFLA_GRE_IKEY]		= { .type = NLA_U32 },
810 	[IFLA_GRE_OKEY]		= { .type = NLA_U32 },
811 	[IFLA_GRE_LOCAL]	= { .len = FIELD_SIZEOF(struct iphdr, saddr) },
812 	[IFLA_GRE_REMOTE]	= { .len = FIELD_SIZEOF(struct iphdr, daddr) },
813 	[IFLA_GRE_TTL]		= { .type = NLA_U8 },
814 	[IFLA_GRE_TOS]		= { .type = NLA_U8 },
815 	[IFLA_GRE_PMTUDISC]	= { .type = NLA_U8 },
816 	[IFLA_GRE_ENCAP_TYPE]	= { .type = NLA_U16 },
817 	[IFLA_GRE_ENCAP_FLAGS]	= { .type = NLA_U16 },
818 	[IFLA_GRE_ENCAP_SPORT]	= { .type = NLA_U16 },
819 	[IFLA_GRE_ENCAP_DPORT]	= { .type = NLA_U16 },
820 };
821 
822 static struct rtnl_link_ops ipgre_link_ops __read_mostly = {
823 	.kind		= "gre",
824 	.maxtype	= IFLA_GRE_MAX,
825 	.policy		= ipgre_policy,
826 	.priv_size	= sizeof(struct ip_tunnel),
827 	.setup		= ipgre_tunnel_setup,
828 	.validate	= ipgre_tunnel_validate,
829 	.newlink	= ipgre_newlink,
830 	.changelink	= ipgre_changelink,
831 	.dellink	= ip_tunnel_dellink,
832 	.get_size	= ipgre_get_size,
833 	.fill_info	= ipgre_fill_info,
834 	.get_link_net	= ip_tunnel_get_link_net,
835 };
836 
837 static struct rtnl_link_ops ipgre_tap_ops __read_mostly = {
838 	.kind		= "gretap",
839 	.maxtype	= IFLA_GRE_MAX,
840 	.policy		= ipgre_policy,
841 	.priv_size	= sizeof(struct ip_tunnel),
842 	.setup		= ipgre_tap_setup,
843 	.validate	= ipgre_tap_validate,
844 	.newlink	= ipgre_newlink,
845 	.changelink	= ipgre_changelink,
846 	.dellink	= ip_tunnel_dellink,
847 	.get_size	= ipgre_get_size,
848 	.fill_info	= ipgre_fill_info,
849 	.get_link_net	= ip_tunnel_get_link_net,
850 };
851 
852 static int __net_init ipgre_tap_init_net(struct net *net)
853 {
854 	return ip_tunnel_init_net(net, gre_tap_net_id, &ipgre_tap_ops, NULL);
855 }
856 
857 static void __net_exit ipgre_tap_exit_net(struct net *net)
858 {
859 	struct ip_tunnel_net *itn = net_generic(net, gre_tap_net_id);
860 	ip_tunnel_delete_net(itn, &ipgre_tap_ops);
861 }
862 
863 static struct pernet_operations ipgre_tap_net_ops = {
864 	.init = ipgre_tap_init_net,
865 	.exit = ipgre_tap_exit_net,
866 	.id   = &gre_tap_net_id,
867 	.size = sizeof(struct ip_tunnel_net),
868 };
869 
870 static int __init ipgre_init(void)
871 {
872 	int err;
873 
874 	pr_info("GRE over IPv4 tunneling driver\n");
875 
876 	err = register_pernet_device(&ipgre_net_ops);
877 	if (err < 0)
878 		return err;
879 
880 	err = register_pernet_device(&ipgre_tap_net_ops);
881 	if (err < 0)
882 		goto pnet_tap_faied;
883 
884 	err = gre_cisco_register(&ipgre_protocol);
885 	if (err < 0) {
886 		pr_info("%s: can't add protocol\n", __func__);
887 		goto add_proto_failed;
888 	}
889 
890 	err = rtnl_link_register(&ipgre_link_ops);
891 	if (err < 0)
892 		goto rtnl_link_failed;
893 
894 	err = rtnl_link_register(&ipgre_tap_ops);
895 	if (err < 0)
896 		goto tap_ops_failed;
897 
898 	return 0;
899 
900 tap_ops_failed:
901 	rtnl_link_unregister(&ipgre_link_ops);
902 rtnl_link_failed:
903 	gre_cisco_unregister(&ipgre_protocol);
904 add_proto_failed:
905 	unregister_pernet_device(&ipgre_tap_net_ops);
906 pnet_tap_faied:
907 	unregister_pernet_device(&ipgre_net_ops);
908 	return err;
909 }
910 
911 static void __exit ipgre_fini(void)
912 {
913 	rtnl_link_unregister(&ipgre_tap_ops);
914 	rtnl_link_unregister(&ipgre_link_ops);
915 	gre_cisco_unregister(&ipgre_protocol);
916 	unregister_pernet_device(&ipgre_tap_net_ops);
917 	unregister_pernet_device(&ipgre_net_ops);
918 }
919 
920 module_init(ipgre_init);
921 module_exit(ipgre_fini);
922 MODULE_LICENSE("GPL");
923 MODULE_ALIAS_RTNL_LINK("gre");
924 MODULE_ALIAS_RTNL_LINK("gretap");
925 MODULE_ALIAS_NETDEV("gre0");
926 MODULE_ALIAS_NETDEV("gretap0");
927