xref: /openbmc/linux/net/ipv4/ip_gre.c (revision 94c7b6fc)
1 /*
2  *	Linux NET3:	GRE over IP protocol decoder.
3  *
4  *	Authors: Alexey Kuznetsov (kuznet@ms2.inr.ac.ru)
5  *
6  *	This program is free software; you can redistribute it and/or
7  *	modify it under the terms of the GNU General Public License
8  *	as published by the Free Software Foundation; either version
9  *	2 of the License, or (at your option) any later version.
10  *
11  */
12 
13 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
14 
15 #include <linux/capability.h>
16 #include <linux/module.h>
17 #include <linux/types.h>
18 #include <linux/kernel.h>
19 #include <linux/slab.h>
20 #include <asm/uaccess.h>
21 #include <linux/skbuff.h>
22 #include <linux/netdevice.h>
23 #include <linux/in.h>
24 #include <linux/tcp.h>
25 #include <linux/udp.h>
26 #include <linux/if_arp.h>
27 #include <linux/mroute.h>
28 #include <linux/init.h>
29 #include <linux/in6.h>
30 #include <linux/inetdevice.h>
31 #include <linux/igmp.h>
32 #include <linux/netfilter_ipv4.h>
33 #include <linux/etherdevice.h>
34 #include <linux/if_ether.h>
35 
36 #include <net/sock.h>
37 #include <net/ip.h>
38 #include <net/icmp.h>
39 #include <net/protocol.h>
40 #include <net/ip_tunnels.h>
41 #include <net/arp.h>
42 #include <net/checksum.h>
43 #include <net/dsfield.h>
44 #include <net/inet_ecn.h>
45 #include <net/xfrm.h>
46 #include <net/net_namespace.h>
47 #include <net/netns/generic.h>
48 #include <net/rtnetlink.h>
49 #include <net/gre.h>
50 
51 #if IS_ENABLED(CONFIG_IPV6)
52 #include <net/ipv6.h>
53 #include <net/ip6_fib.h>
54 #include <net/ip6_route.h>
55 #endif
56 
57 /*
58    Problems & solutions
59    --------------------
60 
61    1. The most important issue is detecting local dead loops.
62    They would cause complete host lockup in transmit, which
63    would be "resolved" by stack overflow or, if queueing is enabled,
64    with infinite looping in net_bh.
65 
66    We cannot track such dead loops during route installation,
67    it is infeasible task. The most general solutions would be
68    to keep skb->encapsulation counter (sort of local ttl),
69    and silently drop packet when it expires. It is a good
70    solution, but it supposes maintaining new variable in ALL
71    skb, even if no tunneling is used.
72 
73    Current solution: xmit_recursion breaks dead loops. This is a percpu
74    counter, since when we enter the first ndo_xmit(), cpu migration is
75    forbidden. We force an exit if this counter reaches RECURSION_LIMIT
76 
77    2. Networking dead loops would not kill routers, but would really
78    kill network. IP hop limit plays role of "t->recursion" in this case,
79    if we copy it from packet being encapsulated to upper header.
80    It is very good solution, but it introduces two problems:
81 
82    - Routing protocols, using packets with ttl=1 (OSPF, RIP2),
83      do not work over tunnels.
84    - traceroute does not work. I planned to relay ICMP from tunnel,
85      so that this problem would be solved and traceroute output
86      would even more informative. This idea appeared to be wrong:
87      only Linux complies to rfc1812 now (yes, guys, Linux is the only
88      true router now :-)), all routers (at least, in neighbourhood of mine)
89      return only 8 bytes of payload. It is the end.
90 
91    Hence, if we want that OSPF worked or traceroute said something reasonable,
92    we should search for another solution.
93 
94    One of them is to parse packet trying to detect inner encapsulation
95    made by our node. It is difficult or even impossible, especially,
96    taking into account fragmentation. TO be short, ttl is not solution at all.
97 
98    Current solution: The solution was UNEXPECTEDLY SIMPLE.
99    We force DF flag on tunnels with preconfigured hop limit,
100    that is ALL. :-) Well, it does not remove the problem completely,
101    but exponential growth of network traffic is changed to linear
102    (branches, that exceed pmtu are pruned) and tunnel mtu
103    rapidly degrades to value <68, where looping stops.
104    Yes, it is not good if there exists a router in the loop,
105    which does not force DF, even when encapsulating packets have DF set.
106    But it is not our problem! Nobody could accuse us, we made
107    all that we could make. Even if it is your gated who injected
108    fatal route to network, even if it were you who configured
109    fatal static route: you are innocent. :-)
110 
111    Alexey Kuznetsov.
112  */
113 
114 static bool log_ecn_error = true;
115 module_param(log_ecn_error, bool, 0644);
116 MODULE_PARM_DESC(log_ecn_error, "Log packets received with corrupted ECN");
117 
118 static struct rtnl_link_ops ipgre_link_ops __read_mostly;
119 static int ipgre_tunnel_init(struct net_device *dev);
120 
121 static int ipgre_net_id __read_mostly;
122 static int gre_tap_net_id __read_mostly;
123 
124 static int ipgre_err(struct sk_buff *skb, u32 info,
125 		     const struct tnl_ptk_info *tpi)
126 {
127 
128 	/* All the routers (except for Linux) return only
129 	   8 bytes of packet payload. It means, that precise relaying of
130 	   ICMP in the real Internet is absolutely infeasible.
131 
132 	   Moreover, Cisco "wise men" put GRE key to the third word
133 	   in GRE header. It makes impossible maintaining even soft
134 	   state for keyed GRE tunnels with enabled checksum. Tell
135 	   them "thank you".
136 
137 	   Well, I wonder, rfc1812 was written by Cisco employee,
138 	   what the hell these idiots break standards established
139 	   by themselves???
140 	   */
141 	struct net *net = dev_net(skb->dev);
142 	struct ip_tunnel_net *itn;
143 	const struct iphdr *iph;
144 	const int type = icmp_hdr(skb)->type;
145 	const int code = icmp_hdr(skb)->code;
146 	struct ip_tunnel *t;
147 
148 	switch (type) {
149 	default:
150 	case ICMP_PARAMETERPROB:
151 		return PACKET_RCVD;
152 
153 	case ICMP_DEST_UNREACH:
154 		switch (code) {
155 		case ICMP_SR_FAILED:
156 		case ICMP_PORT_UNREACH:
157 			/* Impossible event. */
158 			return PACKET_RCVD;
159 		default:
160 			/* All others are translated to HOST_UNREACH.
161 			   rfc2003 contains "deep thoughts" about NET_UNREACH,
162 			   I believe they are just ether pollution. --ANK
163 			 */
164 			break;
165 		}
166 		break;
167 	case ICMP_TIME_EXCEEDED:
168 		if (code != ICMP_EXC_TTL)
169 			return PACKET_RCVD;
170 		break;
171 
172 	case ICMP_REDIRECT:
173 		break;
174 	}
175 
176 	if (tpi->proto == htons(ETH_P_TEB))
177 		itn = net_generic(net, gre_tap_net_id);
178 	else
179 		itn = net_generic(net, ipgre_net_id);
180 
181 	iph = (const struct iphdr *)(icmp_hdr(skb) + 1);
182 	t = ip_tunnel_lookup(itn, skb->dev->ifindex, tpi->flags,
183 			     iph->daddr, iph->saddr, tpi->key);
184 
185 	if (t == NULL)
186 		return PACKET_REJECT;
187 
188 	if (t->parms.iph.daddr == 0 ||
189 	    ipv4_is_multicast(t->parms.iph.daddr))
190 		return PACKET_RCVD;
191 
192 	if (t->parms.iph.ttl == 0 && type == ICMP_TIME_EXCEEDED)
193 		return PACKET_RCVD;
194 
195 	if (time_before(jiffies, t->err_time + IPTUNNEL_ERR_TIMEO))
196 		t->err_count++;
197 	else
198 		t->err_count = 1;
199 	t->err_time = jiffies;
200 	return PACKET_RCVD;
201 }
202 
203 static int ipgre_rcv(struct sk_buff *skb, const struct tnl_ptk_info *tpi)
204 {
205 	struct net *net = dev_net(skb->dev);
206 	struct ip_tunnel_net *itn;
207 	const struct iphdr *iph;
208 	struct ip_tunnel *tunnel;
209 
210 	if (tpi->proto == htons(ETH_P_TEB))
211 		itn = net_generic(net, gre_tap_net_id);
212 	else
213 		itn = net_generic(net, ipgre_net_id);
214 
215 	iph = ip_hdr(skb);
216 	tunnel = ip_tunnel_lookup(itn, skb->dev->ifindex, tpi->flags,
217 				  iph->saddr, iph->daddr, tpi->key);
218 
219 	if (tunnel) {
220 		skb_pop_mac_header(skb);
221 		ip_tunnel_rcv(tunnel, skb, tpi, log_ecn_error);
222 		return PACKET_RCVD;
223 	}
224 	return PACKET_REJECT;
225 }
226 
227 static void __gre_xmit(struct sk_buff *skb, struct net_device *dev,
228 		       const struct iphdr *tnl_params,
229 		       __be16 proto)
230 {
231 	struct ip_tunnel *tunnel = netdev_priv(dev);
232 	struct tnl_ptk_info tpi;
233 
234 	tpi.flags = tunnel->parms.o_flags;
235 	tpi.proto = proto;
236 	tpi.key = tunnel->parms.o_key;
237 	if (tunnel->parms.o_flags & TUNNEL_SEQ)
238 		tunnel->o_seqno++;
239 	tpi.seq = htonl(tunnel->o_seqno);
240 
241 	/* Push GRE header. */
242 	gre_build_header(skb, &tpi, tunnel->hlen);
243 
244 	ip_tunnel_xmit(skb, dev, tnl_params, tnl_params->protocol);
245 }
246 
247 static netdev_tx_t ipgre_xmit(struct sk_buff *skb,
248 			      struct net_device *dev)
249 {
250 	struct ip_tunnel *tunnel = netdev_priv(dev);
251 	const struct iphdr *tnl_params;
252 
253 	skb = gre_handle_offloads(skb, !!(tunnel->parms.o_flags&TUNNEL_CSUM));
254 	if (IS_ERR(skb))
255 		goto out;
256 
257 	if (dev->header_ops) {
258 		/* Need space for new headers */
259 		if (skb_cow_head(skb, dev->needed_headroom -
260 				      (tunnel->hlen + sizeof(struct iphdr))))
261 			goto free_skb;
262 
263 		tnl_params = (const struct iphdr *)skb->data;
264 
265 		/* Pull skb since ip_tunnel_xmit() needs skb->data pointing
266 		 * to gre header.
267 		 */
268 		skb_pull(skb, tunnel->hlen + sizeof(struct iphdr));
269 	} else {
270 		if (skb_cow_head(skb, dev->needed_headroom))
271 			goto free_skb;
272 
273 		tnl_params = &tunnel->parms.iph;
274 	}
275 
276 	__gre_xmit(skb, dev, tnl_params, skb->protocol);
277 
278 	return NETDEV_TX_OK;
279 
280 free_skb:
281 	kfree_skb(skb);
282 out:
283 	dev->stats.tx_dropped++;
284 	return NETDEV_TX_OK;
285 }
286 
287 static netdev_tx_t gre_tap_xmit(struct sk_buff *skb,
288 				struct net_device *dev)
289 {
290 	struct ip_tunnel *tunnel = netdev_priv(dev);
291 
292 	skb = gre_handle_offloads(skb, !!(tunnel->parms.o_flags&TUNNEL_CSUM));
293 	if (IS_ERR(skb))
294 		goto out;
295 
296 	if (skb_cow_head(skb, dev->needed_headroom))
297 		goto free_skb;
298 
299 	__gre_xmit(skb, dev, &tunnel->parms.iph, htons(ETH_P_TEB));
300 
301 	return NETDEV_TX_OK;
302 
303 free_skb:
304 	kfree_skb(skb);
305 out:
306 	dev->stats.tx_dropped++;
307 	return NETDEV_TX_OK;
308 }
309 
310 static int ipgre_tunnel_ioctl(struct net_device *dev,
311 			      struct ifreq *ifr, int cmd)
312 {
313 	int err = 0;
314 	struct ip_tunnel_parm p;
315 
316 	if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
317 		return -EFAULT;
318 	if (cmd == SIOCADDTUNNEL || cmd == SIOCCHGTUNNEL) {
319 		if (p.iph.version != 4 || p.iph.protocol != IPPROTO_GRE ||
320 		    p.iph.ihl != 5 || (p.iph.frag_off&htons(~IP_DF)) ||
321 		    ((p.i_flags|p.o_flags)&(GRE_VERSION|GRE_ROUTING)))
322 			return -EINVAL;
323 	}
324 	p.i_flags = gre_flags_to_tnl_flags(p.i_flags);
325 	p.o_flags = gre_flags_to_tnl_flags(p.o_flags);
326 
327 	err = ip_tunnel_ioctl(dev, &p, cmd);
328 	if (err)
329 		return err;
330 
331 	p.i_flags = tnl_flags_to_gre_flags(p.i_flags);
332 	p.o_flags = tnl_flags_to_gre_flags(p.o_flags);
333 
334 	if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p)))
335 		return -EFAULT;
336 	return 0;
337 }
338 
339 /* Nice toy. Unfortunately, useless in real life :-)
340    It allows to construct virtual multiprotocol broadcast "LAN"
341    over the Internet, provided multicast routing is tuned.
342 
343 
344    I have no idea was this bicycle invented before me,
345    so that I had to set ARPHRD_IPGRE to a random value.
346    I have an impression, that Cisco could make something similar,
347    but this feature is apparently missing in IOS<=11.2(8).
348 
349    I set up 10.66.66/24 and fec0:6666:6666::0/96 as virtual networks
350    with broadcast 224.66.66.66. If you have access to mbone, play with me :-)
351 
352    ping -t 255 224.66.66.66
353 
354    If nobody answers, mbone does not work.
355 
356    ip tunnel add Universe mode gre remote 224.66.66.66 local <Your_real_addr> ttl 255
357    ip addr add 10.66.66.<somewhat>/24 dev Universe
358    ifconfig Universe up
359    ifconfig Universe add fe80::<Your_real_addr>/10
360    ifconfig Universe add fec0:6666:6666::<Your_real_addr>/96
361    ftp 10.66.66.66
362    ...
363    ftp fec0:6666:6666::193.233.7.65
364    ...
365  */
366 static int ipgre_header(struct sk_buff *skb, struct net_device *dev,
367 			unsigned short type,
368 			const void *daddr, const void *saddr, unsigned int len)
369 {
370 	struct ip_tunnel *t = netdev_priv(dev);
371 	struct iphdr *iph;
372 	struct gre_base_hdr *greh;
373 
374 	iph = (struct iphdr *)skb_push(skb, t->hlen + sizeof(*iph));
375 	greh = (struct gre_base_hdr *)(iph+1);
376 	greh->flags = tnl_flags_to_gre_flags(t->parms.o_flags);
377 	greh->protocol = htons(type);
378 
379 	memcpy(iph, &t->parms.iph, sizeof(struct iphdr));
380 
381 	/* Set the source hardware address. */
382 	if (saddr)
383 		memcpy(&iph->saddr, saddr, 4);
384 	if (daddr)
385 		memcpy(&iph->daddr, daddr, 4);
386 	if (iph->daddr)
387 		return t->hlen + sizeof(*iph);
388 
389 	return -(t->hlen + sizeof(*iph));
390 }
391 
392 static int ipgre_header_parse(const struct sk_buff *skb, unsigned char *haddr)
393 {
394 	const struct iphdr *iph = (const struct iphdr *) skb_mac_header(skb);
395 	memcpy(haddr, &iph->saddr, 4);
396 	return 4;
397 }
398 
399 static const struct header_ops ipgre_header_ops = {
400 	.create	= ipgre_header,
401 	.parse	= ipgre_header_parse,
402 };
403 
404 #ifdef CONFIG_NET_IPGRE_BROADCAST
405 static int ipgre_open(struct net_device *dev)
406 {
407 	struct ip_tunnel *t = netdev_priv(dev);
408 
409 	if (ipv4_is_multicast(t->parms.iph.daddr)) {
410 		struct flowi4 fl4;
411 		struct rtable *rt;
412 
413 		rt = ip_route_output_gre(t->net, &fl4,
414 					 t->parms.iph.daddr,
415 					 t->parms.iph.saddr,
416 					 t->parms.o_key,
417 					 RT_TOS(t->parms.iph.tos),
418 					 t->parms.link);
419 		if (IS_ERR(rt))
420 			return -EADDRNOTAVAIL;
421 		dev = rt->dst.dev;
422 		ip_rt_put(rt);
423 		if (__in_dev_get_rtnl(dev) == NULL)
424 			return -EADDRNOTAVAIL;
425 		t->mlink = dev->ifindex;
426 		ip_mc_inc_group(__in_dev_get_rtnl(dev), t->parms.iph.daddr);
427 	}
428 	return 0;
429 }
430 
431 static int ipgre_close(struct net_device *dev)
432 {
433 	struct ip_tunnel *t = netdev_priv(dev);
434 
435 	if (ipv4_is_multicast(t->parms.iph.daddr) && t->mlink) {
436 		struct in_device *in_dev;
437 		in_dev = inetdev_by_index(t->net, t->mlink);
438 		if (in_dev)
439 			ip_mc_dec_group(in_dev, t->parms.iph.daddr);
440 	}
441 	return 0;
442 }
443 #endif
444 
445 static const struct net_device_ops ipgre_netdev_ops = {
446 	.ndo_init		= ipgre_tunnel_init,
447 	.ndo_uninit		= ip_tunnel_uninit,
448 #ifdef CONFIG_NET_IPGRE_BROADCAST
449 	.ndo_open		= ipgre_open,
450 	.ndo_stop		= ipgre_close,
451 #endif
452 	.ndo_start_xmit		= ipgre_xmit,
453 	.ndo_do_ioctl		= ipgre_tunnel_ioctl,
454 	.ndo_change_mtu		= ip_tunnel_change_mtu,
455 	.ndo_get_stats64	= ip_tunnel_get_stats64,
456 };
457 
458 #define GRE_FEATURES (NETIF_F_SG |		\
459 		      NETIF_F_FRAGLIST |	\
460 		      NETIF_F_HIGHDMA |		\
461 		      NETIF_F_HW_CSUM)
462 
463 static void ipgre_tunnel_setup(struct net_device *dev)
464 {
465 	dev->netdev_ops		= &ipgre_netdev_ops;
466 	dev->type		= ARPHRD_IPGRE;
467 	ip_tunnel_setup(dev, ipgre_net_id);
468 }
469 
470 static void __gre_tunnel_init(struct net_device *dev)
471 {
472 	struct ip_tunnel *tunnel;
473 
474 	tunnel = netdev_priv(dev);
475 	tunnel->hlen = ip_gre_calc_hlen(tunnel->parms.o_flags);
476 	tunnel->parms.iph.protocol = IPPROTO_GRE;
477 
478 	dev->needed_headroom	= LL_MAX_HEADER + sizeof(struct iphdr) + 4;
479 	dev->mtu		= ETH_DATA_LEN - sizeof(struct iphdr) - 4;
480 
481 	dev->features		|= GRE_FEATURES;
482 	dev->hw_features	|= GRE_FEATURES;
483 
484 	if (!(tunnel->parms.o_flags & TUNNEL_SEQ)) {
485 		/* TCP offload with GRE SEQ is not supported. */
486 		dev->features    |= NETIF_F_GSO_SOFTWARE;
487 		dev->hw_features |= NETIF_F_GSO_SOFTWARE;
488 		/* Can use a lockless transmit, unless we generate
489 		 * output sequences
490 		 */
491 		dev->features |= NETIF_F_LLTX;
492 	}
493 }
494 
495 static int ipgre_tunnel_init(struct net_device *dev)
496 {
497 	struct ip_tunnel *tunnel = netdev_priv(dev);
498 	struct iphdr *iph = &tunnel->parms.iph;
499 
500 	__gre_tunnel_init(dev);
501 
502 	memcpy(dev->dev_addr, &iph->saddr, 4);
503 	memcpy(dev->broadcast, &iph->daddr, 4);
504 
505 	dev->flags		= IFF_NOARP;
506 	dev->priv_flags		&= ~IFF_XMIT_DST_RELEASE;
507 	dev->addr_len		= 4;
508 
509 	if (iph->daddr) {
510 #ifdef CONFIG_NET_IPGRE_BROADCAST
511 		if (ipv4_is_multicast(iph->daddr)) {
512 			if (!iph->saddr)
513 				return -EINVAL;
514 			dev->flags = IFF_BROADCAST;
515 			dev->header_ops = &ipgre_header_ops;
516 		}
517 #endif
518 	} else
519 		dev->header_ops = &ipgre_header_ops;
520 
521 	return ip_tunnel_init(dev);
522 }
523 
524 static struct gre_cisco_protocol ipgre_protocol = {
525 	.handler        = ipgre_rcv,
526 	.err_handler    = ipgre_err,
527 	.priority       = 0,
528 };
529 
530 static int __net_init ipgre_init_net(struct net *net)
531 {
532 	return ip_tunnel_init_net(net, ipgre_net_id, &ipgre_link_ops, NULL);
533 }
534 
535 static void __net_exit ipgre_exit_net(struct net *net)
536 {
537 	struct ip_tunnel_net *itn = net_generic(net, ipgre_net_id);
538 	ip_tunnel_delete_net(itn, &ipgre_link_ops);
539 }
540 
541 static struct pernet_operations ipgre_net_ops = {
542 	.init = ipgre_init_net,
543 	.exit = ipgre_exit_net,
544 	.id   = &ipgre_net_id,
545 	.size = sizeof(struct ip_tunnel_net),
546 };
547 
548 static int ipgre_tunnel_validate(struct nlattr *tb[], struct nlattr *data[])
549 {
550 	__be16 flags;
551 
552 	if (!data)
553 		return 0;
554 
555 	flags = 0;
556 	if (data[IFLA_GRE_IFLAGS])
557 		flags |= nla_get_be16(data[IFLA_GRE_IFLAGS]);
558 	if (data[IFLA_GRE_OFLAGS])
559 		flags |= nla_get_be16(data[IFLA_GRE_OFLAGS]);
560 	if (flags & (GRE_VERSION|GRE_ROUTING))
561 		return -EINVAL;
562 
563 	return 0;
564 }
565 
566 static int ipgre_tap_validate(struct nlattr *tb[], struct nlattr *data[])
567 {
568 	__be32 daddr;
569 
570 	if (tb[IFLA_ADDRESS]) {
571 		if (nla_len(tb[IFLA_ADDRESS]) != ETH_ALEN)
572 			return -EINVAL;
573 		if (!is_valid_ether_addr(nla_data(tb[IFLA_ADDRESS])))
574 			return -EADDRNOTAVAIL;
575 	}
576 
577 	if (!data)
578 		goto out;
579 
580 	if (data[IFLA_GRE_REMOTE]) {
581 		memcpy(&daddr, nla_data(data[IFLA_GRE_REMOTE]), 4);
582 		if (!daddr)
583 			return -EINVAL;
584 	}
585 
586 out:
587 	return ipgre_tunnel_validate(tb, data);
588 }
589 
590 static void ipgre_netlink_parms(struct nlattr *data[], struct nlattr *tb[],
591 			       struct ip_tunnel_parm *parms)
592 {
593 	memset(parms, 0, sizeof(*parms));
594 
595 	parms->iph.protocol = IPPROTO_GRE;
596 
597 	if (!data)
598 		return;
599 
600 	if (data[IFLA_GRE_LINK])
601 		parms->link = nla_get_u32(data[IFLA_GRE_LINK]);
602 
603 	if (data[IFLA_GRE_IFLAGS])
604 		parms->i_flags = gre_flags_to_tnl_flags(nla_get_be16(data[IFLA_GRE_IFLAGS]));
605 
606 	if (data[IFLA_GRE_OFLAGS])
607 		parms->o_flags = gre_flags_to_tnl_flags(nla_get_be16(data[IFLA_GRE_OFLAGS]));
608 
609 	if (data[IFLA_GRE_IKEY])
610 		parms->i_key = nla_get_be32(data[IFLA_GRE_IKEY]);
611 
612 	if (data[IFLA_GRE_OKEY])
613 		parms->o_key = nla_get_be32(data[IFLA_GRE_OKEY]);
614 
615 	if (data[IFLA_GRE_LOCAL])
616 		parms->iph.saddr = nla_get_be32(data[IFLA_GRE_LOCAL]);
617 
618 	if (data[IFLA_GRE_REMOTE])
619 		parms->iph.daddr = nla_get_be32(data[IFLA_GRE_REMOTE]);
620 
621 	if (data[IFLA_GRE_TTL])
622 		parms->iph.ttl = nla_get_u8(data[IFLA_GRE_TTL]);
623 
624 	if (data[IFLA_GRE_TOS])
625 		parms->iph.tos = nla_get_u8(data[IFLA_GRE_TOS]);
626 
627 	if (!data[IFLA_GRE_PMTUDISC] || nla_get_u8(data[IFLA_GRE_PMTUDISC]))
628 		parms->iph.frag_off = htons(IP_DF);
629 }
630 
631 static int gre_tap_init(struct net_device *dev)
632 {
633 	__gre_tunnel_init(dev);
634 
635 	return ip_tunnel_init(dev);
636 }
637 
638 static const struct net_device_ops gre_tap_netdev_ops = {
639 	.ndo_init		= gre_tap_init,
640 	.ndo_uninit		= ip_tunnel_uninit,
641 	.ndo_start_xmit		= gre_tap_xmit,
642 	.ndo_set_mac_address 	= eth_mac_addr,
643 	.ndo_validate_addr	= eth_validate_addr,
644 	.ndo_change_mtu		= ip_tunnel_change_mtu,
645 	.ndo_get_stats64	= ip_tunnel_get_stats64,
646 };
647 
648 static void ipgre_tap_setup(struct net_device *dev)
649 {
650 	ether_setup(dev);
651 	dev->netdev_ops		= &gre_tap_netdev_ops;
652 	dev->priv_flags 	|= IFF_LIVE_ADDR_CHANGE;
653 	ip_tunnel_setup(dev, gre_tap_net_id);
654 }
655 
656 static int ipgre_newlink(struct net *src_net, struct net_device *dev,
657 			 struct nlattr *tb[], struct nlattr *data[])
658 {
659 	struct ip_tunnel_parm p;
660 
661 	ipgre_netlink_parms(data, tb, &p);
662 	return ip_tunnel_newlink(dev, tb, &p);
663 }
664 
665 static int ipgre_changelink(struct net_device *dev, struct nlattr *tb[],
666 			    struct nlattr *data[])
667 {
668 	struct ip_tunnel_parm p;
669 
670 	ipgre_netlink_parms(data, tb, &p);
671 	return ip_tunnel_changelink(dev, tb, &p);
672 }
673 
674 static size_t ipgre_get_size(const struct net_device *dev)
675 {
676 	return
677 		/* IFLA_GRE_LINK */
678 		nla_total_size(4) +
679 		/* IFLA_GRE_IFLAGS */
680 		nla_total_size(2) +
681 		/* IFLA_GRE_OFLAGS */
682 		nla_total_size(2) +
683 		/* IFLA_GRE_IKEY */
684 		nla_total_size(4) +
685 		/* IFLA_GRE_OKEY */
686 		nla_total_size(4) +
687 		/* IFLA_GRE_LOCAL */
688 		nla_total_size(4) +
689 		/* IFLA_GRE_REMOTE */
690 		nla_total_size(4) +
691 		/* IFLA_GRE_TTL */
692 		nla_total_size(1) +
693 		/* IFLA_GRE_TOS */
694 		nla_total_size(1) +
695 		/* IFLA_GRE_PMTUDISC */
696 		nla_total_size(1) +
697 		0;
698 }
699 
700 static int ipgre_fill_info(struct sk_buff *skb, const struct net_device *dev)
701 {
702 	struct ip_tunnel *t = netdev_priv(dev);
703 	struct ip_tunnel_parm *p = &t->parms;
704 
705 	if (nla_put_u32(skb, IFLA_GRE_LINK, p->link) ||
706 	    nla_put_be16(skb, IFLA_GRE_IFLAGS, tnl_flags_to_gre_flags(p->i_flags)) ||
707 	    nla_put_be16(skb, IFLA_GRE_OFLAGS, tnl_flags_to_gre_flags(p->o_flags)) ||
708 	    nla_put_be32(skb, IFLA_GRE_IKEY, p->i_key) ||
709 	    nla_put_be32(skb, IFLA_GRE_OKEY, p->o_key) ||
710 	    nla_put_be32(skb, IFLA_GRE_LOCAL, p->iph.saddr) ||
711 	    nla_put_be32(skb, IFLA_GRE_REMOTE, p->iph.daddr) ||
712 	    nla_put_u8(skb, IFLA_GRE_TTL, p->iph.ttl) ||
713 	    nla_put_u8(skb, IFLA_GRE_TOS, p->iph.tos) ||
714 	    nla_put_u8(skb, IFLA_GRE_PMTUDISC,
715 		       !!(p->iph.frag_off & htons(IP_DF))))
716 		goto nla_put_failure;
717 	return 0;
718 
719 nla_put_failure:
720 	return -EMSGSIZE;
721 }
722 
723 static const struct nla_policy ipgre_policy[IFLA_GRE_MAX + 1] = {
724 	[IFLA_GRE_LINK]		= { .type = NLA_U32 },
725 	[IFLA_GRE_IFLAGS]	= { .type = NLA_U16 },
726 	[IFLA_GRE_OFLAGS]	= { .type = NLA_U16 },
727 	[IFLA_GRE_IKEY]		= { .type = NLA_U32 },
728 	[IFLA_GRE_OKEY]		= { .type = NLA_U32 },
729 	[IFLA_GRE_LOCAL]	= { .len = FIELD_SIZEOF(struct iphdr, saddr) },
730 	[IFLA_GRE_REMOTE]	= { .len = FIELD_SIZEOF(struct iphdr, daddr) },
731 	[IFLA_GRE_TTL]		= { .type = NLA_U8 },
732 	[IFLA_GRE_TOS]		= { .type = NLA_U8 },
733 	[IFLA_GRE_PMTUDISC]	= { .type = NLA_U8 },
734 };
735 
736 static struct rtnl_link_ops ipgre_link_ops __read_mostly = {
737 	.kind		= "gre",
738 	.maxtype	= IFLA_GRE_MAX,
739 	.policy		= ipgre_policy,
740 	.priv_size	= sizeof(struct ip_tunnel),
741 	.setup		= ipgre_tunnel_setup,
742 	.validate	= ipgre_tunnel_validate,
743 	.newlink	= ipgre_newlink,
744 	.changelink	= ipgre_changelink,
745 	.dellink	= ip_tunnel_dellink,
746 	.get_size	= ipgre_get_size,
747 	.fill_info	= ipgre_fill_info,
748 };
749 
750 static struct rtnl_link_ops ipgre_tap_ops __read_mostly = {
751 	.kind		= "gretap",
752 	.maxtype	= IFLA_GRE_MAX,
753 	.policy		= ipgre_policy,
754 	.priv_size	= sizeof(struct ip_tunnel),
755 	.setup		= ipgre_tap_setup,
756 	.validate	= ipgre_tap_validate,
757 	.newlink	= ipgre_newlink,
758 	.changelink	= ipgre_changelink,
759 	.dellink	= ip_tunnel_dellink,
760 	.get_size	= ipgre_get_size,
761 	.fill_info	= ipgre_fill_info,
762 };
763 
764 static int __net_init ipgre_tap_init_net(struct net *net)
765 {
766 	return ip_tunnel_init_net(net, gre_tap_net_id, &ipgre_tap_ops, NULL);
767 }
768 
769 static void __net_exit ipgre_tap_exit_net(struct net *net)
770 {
771 	struct ip_tunnel_net *itn = net_generic(net, gre_tap_net_id);
772 	ip_tunnel_delete_net(itn, &ipgre_tap_ops);
773 }
774 
775 static struct pernet_operations ipgre_tap_net_ops = {
776 	.init = ipgre_tap_init_net,
777 	.exit = ipgre_tap_exit_net,
778 	.id   = &gre_tap_net_id,
779 	.size = sizeof(struct ip_tunnel_net),
780 };
781 
782 static int __init ipgre_init(void)
783 {
784 	int err;
785 
786 	pr_info("GRE over IPv4 tunneling driver\n");
787 
788 	err = register_pernet_device(&ipgre_net_ops);
789 	if (err < 0)
790 		return err;
791 
792 	err = register_pernet_device(&ipgre_tap_net_ops);
793 	if (err < 0)
794 		goto pnet_tap_faied;
795 
796 	err = gre_cisco_register(&ipgre_protocol);
797 	if (err < 0) {
798 		pr_info("%s: can't add protocol\n", __func__);
799 		goto add_proto_failed;
800 	}
801 
802 	err = rtnl_link_register(&ipgre_link_ops);
803 	if (err < 0)
804 		goto rtnl_link_failed;
805 
806 	err = rtnl_link_register(&ipgre_tap_ops);
807 	if (err < 0)
808 		goto tap_ops_failed;
809 
810 	return 0;
811 
812 tap_ops_failed:
813 	rtnl_link_unregister(&ipgre_link_ops);
814 rtnl_link_failed:
815 	gre_cisco_unregister(&ipgre_protocol);
816 add_proto_failed:
817 	unregister_pernet_device(&ipgre_tap_net_ops);
818 pnet_tap_faied:
819 	unregister_pernet_device(&ipgre_net_ops);
820 	return err;
821 }
822 
823 static void __exit ipgre_fini(void)
824 {
825 	rtnl_link_unregister(&ipgre_tap_ops);
826 	rtnl_link_unregister(&ipgre_link_ops);
827 	gre_cisco_unregister(&ipgre_protocol);
828 	unregister_pernet_device(&ipgre_tap_net_ops);
829 	unregister_pernet_device(&ipgre_net_ops);
830 }
831 
832 module_init(ipgre_init);
833 module_exit(ipgre_fini);
834 MODULE_LICENSE("GPL");
835 MODULE_ALIAS_RTNL_LINK("gre");
836 MODULE_ALIAS_RTNL_LINK("gretap");
837 MODULE_ALIAS_NETDEV("gre0");
838 MODULE_ALIAS_NETDEV("gretap0");
839