xref: /openbmc/linux/net/ipv4/ip_gre.c (revision 0199e993)
1 /*
2  *	Linux NET3:	GRE over IP protocol decoder.
3  *
4  *	Authors: Alexey Kuznetsov (kuznet@ms2.inr.ac.ru)
5  *
6  *	This program is free software; you can redistribute it and/or
7  *	modify it under the terms of the GNU General Public License
8  *	as published by the Free Software Foundation; either version
9  *	2 of the License, or (at your option) any later version.
10  *
11  */
12 
13 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
14 
15 #include <linux/capability.h>
16 #include <linux/module.h>
17 #include <linux/types.h>
18 #include <linux/kernel.h>
19 #include <linux/slab.h>
20 #include <asm/uaccess.h>
21 #include <linux/skbuff.h>
22 #include <linux/netdevice.h>
23 #include <linux/in.h>
24 #include <linux/tcp.h>
25 #include <linux/udp.h>
26 #include <linux/if_arp.h>
27 #include <linux/mroute.h>
28 #include <linux/init.h>
29 #include <linux/in6.h>
30 #include <linux/inetdevice.h>
31 #include <linux/igmp.h>
32 #include <linux/netfilter_ipv4.h>
33 #include <linux/etherdevice.h>
34 #include <linux/if_ether.h>
35 
36 #include <net/sock.h>
37 #include <net/ip.h>
38 #include <net/icmp.h>
39 #include <net/protocol.h>
40 #include <net/ip_tunnels.h>
41 #include <net/arp.h>
42 #include <net/checksum.h>
43 #include <net/dsfield.h>
44 #include <net/inet_ecn.h>
45 #include <net/xfrm.h>
46 #include <net/net_namespace.h>
47 #include <net/netns/generic.h>
48 #include <net/rtnetlink.h>
49 #include <net/gre.h>
50 
51 #if IS_ENABLED(CONFIG_IPV6)
52 #include <net/ipv6.h>
53 #include <net/ip6_fib.h>
54 #include <net/ip6_route.h>
55 #endif
56 
57 /*
58    Problems & solutions
59    --------------------
60 
61    1. The most important issue is detecting local dead loops.
62    They would cause complete host lockup in transmit, which
63    would be "resolved" by stack overflow or, if queueing is enabled,
64    with infinite looping in net_bh.
65 
66    We cannot track such dead loops during route installation,
67    it is infeasible task. The most general solutions would be
68    to keep skb->encapsulation counter (sort of local ttl),
69    and silently drop packet when it expires. It is a good
70    solution, but it supposes maintaining new variable in ALL
71    skb, even if no tunneling is used.
72 
73    Current solution: xmit_recursion breaks dead loops. This is a percpu
74    counter, since when we enter the first ndo_xmit(), cpu migration is
75    forbidden. We force an exit if this counter reaches RECURSION_LIMIT
76 
77    2. Networking dead loops would not kill routers, but would really
78    kill network. IP hop limit plays role of "t->recursion" in this case,
79    if we copy it from packet being encapsulated to upper header.
80    It is very good solution, but it introduces two problems:
81 
82    - Routing protocols, using packets with ttl=1 (OSPF, RIP2),
83      do not work over tunnels.
84    - traceroute does not work. I planned to relay ICMP from tunnel,
85      so that this problem would be solved and traceroute output
86      would even more informative. This idea appeared to be wrong:
87      only Linux complies to rfc1812 now (yes, guys, Linux is the only
88      true router now :-)), all routers (at least, in neighbourhood of mine)
89      return only 8 bytes of payload. It is the end.
90 
91    Hence, if we want that OSPF worked or traceroute said something reasonable,
92    we should search for another solution.
93 
94    One of them is to parse packet trying to detect inner encapsulation
95    made by our node. It is difficult or even impossible, especially,
96    taking into account fragmentation. TO be short, ttl is not solution at all.
97 
98    Current solution: The solution was UNEXPECTEDLY SIMPLE.
99    We force DF flag on tunnels with preconfigured hop limit,
100    that is ALL. :-) Well, it does not remove the problem completely,
101    but exponential growth of network traffic is changed to linear
102    (branches, that exceed pmtu are pruned) and tunnel mtu
103    rapidly degrades to value <68, where looping stops.
104    Yes, it is not good if there exists a router in the loop,
105    which does not force DF, even when encapsulating packets have DF set.
106    But it is not our problem! Nobody could accuse us, we made
107    all that we could make. Even if it is your gated who injected
108    fatal route to network, even if it were you who configured
109    fatal static route: you are innocent. :-)
110 
111    Alexey Kuznetsov.
112  */
113 
114 static bool log_ecn_error = true;
115 module_param(log_ecn_error, bool, 0644);
116 MODULE_PARM_DESC(log_ecn_error, "Log packets received with corrupted ECN");
117 
118 static struct rtnl_link_ops ipgre_link_ops __read_mostly;
119 static int ipgre_tunnel_init(struct net_device *dev);
120 
121 static int ipgre_net_id __read_mostly;
122 static int gre_tap_net_id __read_mostly;
123 
124 static int ipgre_err(struct sk_buff *skb, u32 info,
125 		     const struct tnl_ptk_info *tpi)
126 {
127 
128 	/* All the routers (except for Linux) return only
129 	   8 bytes of packet payload. It means, that precise relaying of
130 	   ICMP in the real Internet is absolutely infeasible.
131 
132 	   Moreover, Cisco "wise men" put GRE key to the third word
133 	   in GRE header. It makes impossible maintaining even soft
134 	   state for keyed GRE tunnels with enabled checksum. Tell
135 	   them "thank you".
136 
137 	   Well, I wonder, rfc1812 was written by Cisco employee,
138 	   what the hell these idiots break standards established
139 	   by themselves???
140 	   */
141 	struct net *net = dev_net(skb->dev);
142 	struct ip_tunnel_net *itn;
143 	const struct iphdr *iph;
144 	const int type = icmp_hdr(skb)->type;
145 	const int code = icmp_hdr(skb)->code;
146 	struct ip_tunnel *t;
147 
148 	switch (type) {
149 	default:
150 	case ICMP_PARAMETERPROB:
151 		return PACKET_RCVD;
152 
153 	case ICMP_DEST_UNREACH:
154 		switch (code) {
155 		case ICMP_SR_FAILED:
156 		case ICMP_PORT_UNREACH:
157 			/* Impossible event. */
158 			return PACKET_RCVD;
159 		default:
160 			/* All others are translated to HOST_UNREACH.
161 			   rfc2003 contains "deep thoughts" about NET_UNREACH,
162 			   I believe they are just ether pollution. --ANK
163 			 */
164 			break;
165 		}
166 		break;
167 	case ICMP_TIME_EXCEEDED:
168 		if (code != ICMP_EXC_TTL)
169 			return PACKET_RCVD;
170 		break;
171 
172 	case ICMP_REDIRECT:
173 		break;
174 	}
175 
176 	if (tpi->proto == htons(ETH_P_TEB))
177 		itn = net_generic(net, gre_tap_net_id);
178 	else
179 		itn = net_generic(net, ipgre_net_id);
180 
181 	iph = (const struct iphdr *)(icmp_hdr(skb) + 1);
182 	t = ip_tunnel_lookup(itn, skb->dev->ifindex, tpi->flags,
183 			     iph->daddr, iph->saddr, tpi->key);
184 
185 	if (t == NULL)
186 		return PACKET_REJECT;
187 
188 	if (t->parms.iph.daddr == 0 ||
189 	    ipv4_is_multicast(t->parms.iph.daddr))
190 		return PACKET_RCVD;
191 
192 	if (t->parms.iph.ttl == 0 && type == ICMP_TIME_EXCEEDED)
193 		return PACKET_RCVD;
194 
195 	if (time_before(jiffies, t->err_time + IPTUNNEL_ERR_TIMEO))
196 		t->err_count++;
197 	else
198 		t->err_count = 1;
199 	t->err_time = jiffies;
200 	return PACKET_RCVD;
201 }
202 
203 static int ipgre_rcv(struct sk_buff *skb, const struct tnl_ptk_info *tpi)
204 {
205 	struct net *net = dev_net(skb->dev);
206 	struct ip_tunnel_net *itn;
207 	const struct iphdr *iph;
208 	struct ip_tunnel *tunnel;
209 
210 	if (tpi->proto == htons(ETH_P_TEB))
211 		itn = net_generic(net, gre_tap_net_id);
212 	else
213 		itn = net_generic(net, ipgre_net_id);
214 
215 	iph = ip_hdr(skb);
216 	tunnel = ip_tunnel_lookup(itn, skb->dev->ifindex, tpi->flags,
217 				  iph->saddr, iph->daddr, tpi->key);
218 
219 	if (tunnel) {
220 		skb_pop_mac_header(skb);
221 		ip_tunnel_rcv(tunnel, skb, tpi, log_ecn_error);
222 		return PACKET_RCVD;
223 	}
224 	return PACKET_REJECT;
225 }
226 
227 static void __gre_xmit(struct sk_buff *skb, struct net_device *dev,
228 		       const struct iphdr *tnl_params,
229 		       __be16 proto)
230 {
231 	struct ip_tunnel *tunnel = netdev_priv(dev);
232 	struct tnl_ptk_info tpi;
233 
234 	tpi.flags = tunnel->parms.o_flags;
235 	tpi.proto = proto;
236 	tpi.key = tunnel->parms.o_key;
237 	if (tunnel->parms.o_flags & TUNNEL_SEQ)
238 		tunnel->o_seqno++;
239 	tpi.seq = htonl(tunnel->o_seqno);
240 
241 	/* Push GRE header. */
242 	gre_build_header(skb, &tpi, tunnel->tun_hlen);
243 
244 	skb_set_inner_protocol(skb, tpi.proto);
245 
246 	ip_tunnel_xmit(skb, dev, tnl_params, tnl_params->protocol);
247 }
248 
249 static netdev_tx_t ipgre_xmit(struct sk_buff *skb,
250 			      struct net_device *dev)
251 {
252 	struct ip_tunnel *tunnel = netdev_priv(dev);
253 	const struct iphdr *tnl_params;
254 
255 	skb = gre_handle_offloads(skb, !!(tunnel->parms.o_flags&TUNNEL_CSUM));
256 	if (IS_ERR(skb))
257 		goto out;
258 
259 	if (dev->header_ops) {
260 		/* Need space for new headers */
261 		if (skb_cow_head(skb, dev->needed_headroom -
262 				      (tunnel->hlen + sizeof(struct iphdr))))
263 			goto free_skb;
264 
265 		tnl_params = (const struct iphdr *)skb->data;
266 
267 		/* Pull skb since ip_tunnel_xmit() needs skb->data pointing
268 		 * to gre header.
269 		 */
270 		skb_pull(skb, tunnel->hlen + sizeof(struct iphdr));
271 	} else {
272 		if (skb_cow_head(skb, dev->needed_headroom))
273 			goto free_skb;
274 
275 		tnl_params = &tunnel->parms.iph;
276 	}
277 
278 	__gre_xmit(skb, dev, tnl_params, skb->protocol);
279 
280 	return NETDEV_TX_OK;
281 
282 free_skb:
283 	kfree_skb(skb);
284 out:
285 	dev->stats.tx_dropped++;
286 	return NETDEV_TX_OK;
287 }
288 
289 static netdev_tx_t gre_tap_xmit(struct sk_buff *skb,
290 				struct net_device *dev)
291 {
292 	struct ip_tunnel *tunnel = netdev_priv(dev);
293 
294 	skb = gre_handle_offloads(skb, !!(tunnel->parms.o_flags&TUNNEL_CSUM));
295 	if (IS_ERR(skb))
296 		goto out;
297 
298 	if (skb_cow_head(skb, dev->needed_headroom))
299 		goto free_skb;
300 
301 	__gre_xmit(skb, dev, &tunnel->parms.iph, htons(ETH_P_TEB));
302 
303 	return NETDEV_TX_OK;
304 
305 free_skb:
306 	kfree_skb(skb);
307 out:
308 	dev->stats.tx_dropped++;
309 	return NETDEV_TX_OK;
310 }
311 
312 static int ipgre_tunnel_ioctl(struct net_device *dev,
313 			      struct ifreq *ifr, int cmd)
314 {
315 	int err;
316 	struct ip_tunnel_parm p;
317 
318 	if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
319 		return -EFAULT;
320 	if (cmd == SIOCADDTUNNEL || cmd == SIOCCHGTUNNEL) {
321 		if (p.iph.version != 4 || p.iph.protocol != IPPROTO_GRE ||
322 		    p.iph.ihl != 5 || (p.iph.frag_off&htons(~IP_DF)) ||
323 		    ((p.i_flags|p.o_flags)&(GRE_VERSION|GRE_ROUTING)))
324 			return -EINVAL;
325 	}
326 	p.i_flags = gre_flags_to_tnl_flags(p.i_flags);
327 	p.o_flags = gre_flags_to_tnl_flags(p.o_flags);
328 
329 	err = ip_tunnel_ioctl(dev, &p, cmd);
330 	if (err)
331 		return err;
332 
333 	p.i_flags = tnl_flags_to_gre_flags(p.i_flags);
334 	p.o_flags = tnl_flags_to_gre_flags(p.o_flags);
335 
336 	if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p)))
337 		return -EFAULT;
338 	return 0;
339 }
340 
341 /* Nice toy. Unfortunately, useless in real life :-)
342    It allows to construct virtual multiprotocol broadcast "LAN"
343    over the Internet, provided multicast routing is tuned.
344 
345 
346    I have no idea was this bicycle invented before me,
347    so that I had to set ARPHRD_IPGRE to a random value.
348    I have an impression, that Cisco could make something similar,
349    but this feature is apparently missing in IOS<=11.2(8).
350 
351    I set up 10.66.66/24 and fec0:6666:6666::0/96 as virtual networks
352    with broadcast 224.66.66.66. If you have access to mbone, play with me :-)
353 
354    ping -t 255 224.66.66.66
355 
356    If nobody answers, mbone does not work.
357 
358    ip tunnel add Universe mode gre remote 224.66.66.66 local <Your_real_addr> ttl 255
359    ip addr add 10.66.66.<somewhat>/24 dev Universe
360    ifconfig Universe up
361    ifconfig Universe add fe80::<Your_real_addr>/10
362    ifconfig Universe add fec0:6666:6666::<Your_real_addr>/96
363    ftp 10.66.66.66
364    ...
365    ftp fec0:6666:6666::193.233.7.65
366    ...
367  */
368 static int ipgre_header(struct sk_buff *skb, struct net_device *dev,
369 			unsigned short type,
370 			const void *daddr, const void *saddr, unsigned int len)
371 {
372 	struct ip_tunnel *t = netdev_priv(dev);
373 	struct iphdr *iph;
374 	struct gre_base_hdr *greh;
375 
376 	iph = (struct iphdr *)skb_push(skb, t->hlen + sizeof(*iph));
377 	greh = (struct gre_base_hdr *)(iph+1);
378 	greh->flags = tnl_flags_to_gre_flags(t->parms.o_flags);
379 	greh->protocol = htons(type);
380 
381 	memcpy(iph, &t->parms.iph, sizeof(struct iphdr));
382 
383 	/* Set the source hardware address. */
384 	if (saddr)
385 		memcpy(&iph->saddr, saddr, 4);
386 	if (daddr)
387 		memcpy(&iph->daddr, daddr, 4);
388 	if (iph->daddr)
389 		return t->hlen + sizeof(*iph);
390 
391 	return -(t->hlen + sizeof(*iph));
392 }
393 
394 static int ipgre_header_parse(const struct sk_buff *skb, unsigned char *haddr)
395 {
396 	const struct iphdr *iph = (const struct iphdr *) skb_mac_header(skb);
397 	memcpy(haddr, &iph->saddr, 4);
398 	return 4;
399 }
400 
401 static const struct header_ops ipgre_header_ops = {
402 	.create	= ipgre_header,
403 	.parse	= ipgre_header_parse,
404 };
405 
406 #ifdef CONFIG_NET_IPGRE_BROADCAST
407 static int ipgre_open(struct net_device *dev)
408 {
409 	struct ip_tunnel *t = netdev_priv(dev);
410 
411 	if (ipv4_is_multicast(t->parms.iph.daddr)) {
412 		struct flowi4 fl4;
413 		struct rtable *rt;
414 
415 		rt = ip_route_output_gre(t->net, &fl4,
416 					 t->parms.iph.daddr,
417 					 t->parms.iph.saddr,
418 					 t->parms.o_key,
419 					 RT_TOS(t->parms.iph.tos),
420 					 t->parms.link);
421 		if (IS_ERR(rt))
422 			return -EADDRNOTAVAIL;
423 		dev = rt->dst.dev;
424 		ip_rt_put(rt);
425 		if (__in_dev_get_rtnl(dev) == NULL)
426 			return -EADDRNOTAVAIL;
427 		t->mlink = dev->ifindex;
428 		ip_mc_inc_group(__in_dev_get_rtnl(dev), t->parms.iph.daddr);
429 	}
430 	return 0;
431 }
432 
433 static int ipgre_close(struct net_device *dev)
434 {
435 	struct ip_tunnel *t = netdev_priv(dev);
436 
437 	if (ipv4_is_multicast(t->parms.iph.daddr) && t->mlink) {
438 		struct in_device *in_dev;
439 		in_dev = inetdev_by_index(t->net, t->mlink);
440 		if (in_dev)
441 			ip_mc_dec_group(in_dev, t->parms.iph.daddr);
442 	}
443 	return 0;
444 }
445 #endif
446 
447 static const struct net_device_ops ipgre_netdev_ops = {
448 	.ndo_init		= ipgre_tunnel_init,
449 	.ndo_uninit		= ip_tunnel_uninit,
450 #ifdef CONFIG_NET_IPGRE_BROADCAST
451 	.ndo_open		= ipgre_open,
452 	.ndo_stop		= ipgre_close,
453 #endif
454 	.ndo_start_xmit		= ipgre_xmit,
455 	.ndo_do_ioctl		= ipgre_tunnel_ioctl,
456 	.ndo_change_mtu		= ip_tunnel_change_mtu,
457 	.ndo_get_stats64	= ip_tunnel_get_stats64,
458 };
459 
460 #define GRE_FEATURES (NETIF_F_SG |		\
461 		      NETIF_F_FRAGLIST |	\
462 		      NETIF_F_HIGHDMA |		\
463 		      NETIF_F_HW_CSUM)
464 
465 static void ipgre_tunnel_setup(struct net_device *dev)
466 {
467 	dev->netdev_ops		= &ipgre_netdev_ops;
468 	dev->type		= ARPHRD_IPGRE;
469 	ip_tunnel_setup(dev, ipgre_net_id);
470 }
471 
472 static void __gre_tunnel_init(struct net_device *dev)
473 {
474 	struct ip_tunnel *tunnel;
475 	int t_hlen;
476 
477 	tunnel = netdev_priv(dev);
478 	tunnel->tun_hlen = ip_gre_calc_hlen(tunnel->parms.o_flags);
479 	tunnel->parms.iph.protocol = IPPROTO_GRE;
480 
481 	tunnel->hlen = tunnel->tun_hlen + tunnel->encap_hlen;
482 
483 	t_hlen = tunnel->hlen + sizeof(struct iphdr);
484 
485 	dev->needed_headroom	= LL_MAX_HEADER + t_hlen + 4;
486 	dev->mtu		= ETH_DATA_LEN - t_hlen - 4;
487 
488 	dev->features		|= GRE_FEATURES;
489 	dev->hw_features	|= GRE_FEATURES;
490 
491 	if (!(tunnel->parms.o_flags & TUNNEL_SEQ)) {
492 		/* TCP offload with GRE SEQ is not supported. */
493 		dev->features    |= NETIF_F_GSO_SOFTWARE;
494 		dev->hw_features |= NETIF_F_GSO_SOFTWARE;
495 		/* Can use a lockless transmit, unless we generate
496 		 * output sequences
497 		 */
498 		dev->features |= NETIF_F_LLTX;
499 	}
500 }
501 
502 static int ipgre_tunnel_init(struct net_device *dev)
503 {
504 	struct ip_tunnel *tunnel = netdev_priv(dev);
505 	struct iphdr *iph = &tunnel->parms.iph;
506 
507 	__gre_tunnel_init(dev);
508 
509 	memcpy(dev->dev_addr, &iph->saddr, 4);
510 	memcpy(dev->broadcast, &iph->daddr, 4);
511 
512 	dev->flags		= IFF_NOARP;
513 	netif_keep_dst(dev);
514 	dev->addr_len		= 4;
515 
516 	if (iph->daddr) {
517 #ifdef CONFIG_NET_IPGRE_BROADCAST
518 		if (ipv4_is_multicast(iph->daddr)) {
519 			if (!iph->saddr)
520 				return -EINVAL;
521 			dev->flags = IFF_BROADCAST;
522 			dev->header_ops = &ipgre_header_ops;
523 		}
524 #endif
525 	} else
526 		dev->header_ops = &ipgre_header_ops;
527 
528 	return ip_tunnel_init(dev);
529 }
530 
531 static struct gre_cisco_protocol ipgre_protocol = {
532 	.handler        = ipgre_rcv,
533 	.err_handler    = ipgre_err,
534 	.priority       = 0,
535 };
536 
537 static int __net_init ipgre_init_net(struct net *net)
538 {
539 	return ip_tunnel_init_net(net, ipgre_net_id, &ipgre_link_ops, NULL);
540 }
541 
542 static void __net_exit ipgre_exit_net(struct net *net)
543 {
544 	struct ip_tunnel_net *itn = net_generic(net, ipgre_net_id);
545 	ip_tunnel_delete_net(itn, &ipgre_link_ops);
546 }
547 
548 static struct pernet_operations ipgre_net_ops = {
549 	.init = ipgre_init_net,
550 	.exit = ipgre_exit_net,
551 	.id   = &ipgre_net_id,
552 	.size = sizeof(struct ip_tunnel_net),
553 };
554 
555 static int ipgre_tunnel_validate(struct nlattr *tb[], struct nlattr *data[])
556 {
557 	__be16 flags;
558 
559 	if (!data)
560 		return 0;
561 
562 	flags = 0;
563 	if (data[IFLA_GRE_IFLAGS])
564 		flags |= nla_get_be16(data[IFLA_GRE_IFLAGS]);
565 	if (data[IFLA_GRE_OFLAGS])
566 		flags |= nla_get_be16(data[IFLA_GRE_OFLAGS]);
567 	if (flags & (GRE_VERSION|GRE_ROUTING))
568 		return -EINVAL;
569 
570 	return 0;
571 }
572 
573 static int ipgre_tap_validate(struct nlattr *tb[], struct nlattr *data[])
574 {
575 	__be32 daddr;
576 
577 	if (tb[IFLA_ADDRESS]) {
578 		if (nla_len(tb[IFLA_ADDRESS]) != ETH_ALEN)
579 			return -EINVAL;
580 		if (!is_valid_ether_addr(nla_data(tb[IFLA_ADDRESS])))
581 			return -EADDRNOTAVAIL;
582 	}
583 
584 	if (!data)
585 		goto out;
586 
587 	if (data[IFLA_GRE_REMOTE]) {
588 		memcpy(&daddr, nla_data(data[IFLA_GRE_REMOTE]), 4);
589 		if (!daddr)
590 			return -EINVAL;
591 	}
592 
593 out:
594 	return ipgre_tunnel_validate(tb, data);
595 }
596 
597 static void ipgre_netlink_parms(struct nlattr *data[], struct nlattr *tb[],
598 			       struct ip_tunnel_parm *parms)
599 {
600 	memset(parms, 0, sizeof(*parms));
601 
602 	parms->iph.protocol = IPPROTO_GRE;
603 
604 	if (!data)
605 		return;
606 
607 	if (data[IFLA_GRE_LINK])
608 		parms->link = nla_get_u32(data[IFLA_GRE_LINK]);
609 
610 	if (data[IFLA_GRE_IFLAGS])
611 		parms->i_flags = gre_flags_to_tnl_flags(nla_get_be16(data[IFLA_GRE_IFLAGS]));
612 
613 	if (data[IFLA_GRE_OFLAGS])
614 		parms->o_flags = gre_flags_to_tnl_flags(nla_get_be16(data[IFLA_GRE_OFLAGS]));
615 
616 	if (data[IFLA_GRE_IKEY])
617 		parms->i_key = nla_get_be32(data[IFLA_GRE_IKEY]);
618 
619 	if (data[IFLA_GRE_OKEY])
620 		parms->o_key = nla_get_be32(data[IFLA_GRE_OKEY]);
621 
622 	if (data[IFLA_GRE_LOCAL])
623 		parms->iph.saddr = nla_get_be32(data[IFLA_GRE_LOCAL]);
624 
625 	if (data[IFLA_GRE_REMOTE])
626 		parms->iph.daddr = nla_get_be32(data[IFLA_GRE_REMOTE]);
627 
628 	if (data[IFLA_GRE_TTL])
629 		parms->iph.ttl = nla_get_u8(data[IFLA_GRE_TTL]);
630 
631 	if (data[IFLA_GRE_TOS])
632 		parms->iph.tos = nla_get_u8(data[IFLA_GRE_TOS]);
633 
634 	if (!data[IFLA_GRE_PMTUDISC] || nla_get_u8(data[IFLA_GRE_PMTUDISC]))
635 		parms->iph.frag_off = htons(IP_DF);
636 }
637 
638 /* This function returns true when ENCAP attributes are present in the nl msg */
639 static bool ipgre_netlink_encap_parms(struct nlattr *data[],
640 				      struct ip_tunnel_encap *ipencap)
641 {
642 	bool ret = false;
643 
644 	memset(ipencap, 0, sizeof(*ipencap));
645 
646 	if (!data)
647 		return ret;
648 
649 	if (data[IFLA_GRE_ENCAP_TYPE]) {
650 		ret = true;
651 		ipencap->type = nla_get_u16(data[IFLA_GRE_ENCAP_TYPE]);
652 	}
653 
654 	if (data[IFLA_GRE_ENCAP_FLAGS]) {
655 		ret = true;
656 		ipencap->flags = nla_get_u16(data[IFLA_GRE_ENCAP_FLAGS]);
657 	}
658 
659 	if (data[IFLA_GRE_ENCAP_SPORT]) {
660 		ret = true;
661 		ipencap->sport = nla_get_u16(data[IFLA_GRE_ENCAP_SPORT]);
662 	}
663 
664 	if (data[IFLA_GRE_ENCAP_DPORT]) {
665 		ret = true;
666 		ipencap->dport = nla_get_u16(data[IFLA_GRE_ENCAP_DPORT]);
667 	}
668 
669 	return ret;
670 }
671 
672 static int gre_tap_init(struct net_device *dev)
673 {
674 	__gre_tunnel_init(dev);
675 
676 	return ip_tunnel_init(dev);
677 }
678 
679 static const struct net_device_ops gre_tap_netdev_ops = {
680 	.ndo_init		= gre_tap_init,
681 	.ndo_uninit		= ip_tunnel_uninit,
682 	.ndo_start_xmit		= gre_tap_xmit,
683 	.ndo_set_mac_address 	= eth_mac_addr,
684 	.ndo_validate_addr	= eth_validate_addr,
685 	.ndo_change_mtu		= ip_tunnel_change_mtu,
686 	.ndo_get_stats64	= ip_tunnel_get_stats64,
687 };
688 
689 static void ipgre_tap_setup(struct net_device *dev)
690 {
691 	ether_setup(dev);
692 	dev->netdev_ops		= &gre_tap_netdev_ops;
693 	dev->priv_flags 	|= IFF_LIVE_ADDR_CHANGE;
694 	ip_tunnel_setup(dev, gre_tap_net_id);
695 }
696 
697 static int ipgre_newlink(struct net *src_net, struct net_device *dev,
698 			 struct nlattr *tb[], struct nlattr *data[])
699 {
700 	struct ip_tunnel_parm p;
701 	struct ip_tunnel_encap ipencap;
702 
703 	if (ipgre_netlink_encap_parms(data, &ipencap)) {
704 		struct ip_tunnel *t = netdev_priv(dev);
705 		int err = ip_tunnel_encap_setup(t, &ipencap);
706 
707 		if (err < 0)
708 			return err;
709 	}
710 
711 	ipgre_netlink_parms(data, tb, &p);
712 	return ip_tunnel_newlink(dev, tb, &p);
713 }
714 
715 static int ipgre_changelink(struct net_device *dev, struct nlattr *tb[],
716 			    struct nlattr *data[])
717 {
718 	struct ip_tunnel_parm p;
719 	struct ip_tunnel_encap ipencap;
720 
721 	if (ipgre_netlink_encap_parms(data, &ipencap)) {
722 		struct ip_tunnel *t = netdev_priv(dev);
723 		int err = ip_tunnel_encap_setup(t, &ipencap);
724 
725 		if (err < 0)
726 			return err;
727 	}
728 
729 	ipgre_netlink_parms(data, tb, &p);
730 	return ip_tunnel_changelink(dev, tb, &p);
731 }
732 
733 static size_t ipgre_get_size(const struct net_device *dev)
734 {
735 	return
736 		/* IFLA_GRE_LINK */
737 		nla_total_size(4) +
738 		/* IFLA_GRE_IFLAGS */
739 		nla_total_size(2) +
740 		/* IFLA_GRE_OFLAGS */
741 		nla_total_size(2) +
742 		/* IFLA_GRE_IKEY */
743 		nla_total_size(4) +
744 		/* IFLA_GRE_OKEY */
745 		nla_total_size(4) +
746 		/* IFLA_GRE_LOCAL */
747 		nla_total_size(4) +
748 		/* IFLA_GRE_REMOTE */
749 		nla_total_size(4) +
750 		/* IFLA_GRE_TTL */
751 		nla_total_size(1) +
752 		/* IFLA_GRE_TOS */
753 		nla_total_size(1) +
754 		/* IFLA_GRE_PMTUDISC */
755 		nla_total_size(1) +
756 		/* IFLA_GRE_ENCAP_TYPE */
757 		nla_total_size(2) +
758 		/* IFLA_GRE_ENCAP_FLAGS */
759 		nla_total_size(2) +
760 		/* IFLA_GRE_ENCAP_SPORT */
761 		nla_total_size(2) +
762 		/* IFLA_GRE_ENCAP_DPORT */
763 		nla_total_size(2) +
764 		0;
765 }
766 
767 static int ipgre_fill_info(struct sk_buff *skb, const struct net_device *dev)
768 {
769 	struct ip_tunnel *t = netdev_priv(dev);
770 	struct ip_tunnel_parm *p = &t->parms;
771 
772 	if (nla_put_u32(skb, IFLA_GRE_LINK, p->link) ||
773 	    nla_put_be16(skb, IFLA_GRE_IFLAGS, tnl_flags_to_gre_flags(p->i_flags)) ||
774 	    nla_put_be16(skb, IFLA_GRE_OFLAGS, tnl_flags_to_gre_flags(p->o_flags)) ||
775 	    nla_put_be32(skb, IFLA_GRE_IKEY, p->i_key) ||
776 	    nla_put_be32(skb, IFLA_GRE_OKEY, p->o_key) ||
777 	    nla_put_be32(skb, IFLA_GRE_LOCAL, p->iph.saddr) ||
778 	    nla_put_be32(skb, IFLA_GRE_REMOTE, p->iph.daddr) ||
779 	    nla_put_u8(skb, IFLA_GRE_TTL, p->iph.ttl) ||
780 	    nla_put_u8(skb, IFLA_GRE_TOS, p->iph.tos) ||
781 	    nla_put_u8(skb, IFLA_GRE_PMTUDISC,
782 		       !!(p->iph.frag_off & htons(IP_DF))))
783 		goto nla_put_failure;
784 
785 	if (nla_put_u16(skb, IFLA_GRE_ENCAP_TYPE,
786 			t->encap.type) ||
787 	    nla_put_u16(skb, IFLA_GRE_ENCAP_SPORT,
788 			t->encap.sport) ||
789 	    nla_put_u16(skb, IFLA_GRE_ENCAP_DPORT,
790 			t->encap.dport) ||
791 	    nla_put_u16(skb, IFLA_GRE_ENCAP_FLAGS,
792 			t->encap.dport))
793 		goto nla_put_failure;
794 
795 	return 0;
796 
797 nla_put_failure:
798 	return -EMSGSIZE;
799 }
800 
801 static const struct nla_policy ipgre_policy[IFLA_GRE_MAX + 1] = {
802 	[IFLA_GRE_LINK]		= { .type = NLA_U32 },
803 	[IFLA_GRE_IFLAGS]	= { .type = NLA_U16 },
804 	[IFLA_GRE_OFLAGS]	= { .type = NLA_U16 },
805 	[IFLA_GRE_IKEY]		= { .type = NLA_U32 },
806 	[IFLA_GRE_OKEY]		= { .type = NLA_U32 },
807 	[IFLA_GRE_LOCAL]	= { .len = FIELD_SIZEOF(struct iphdr, saddr) },
808 	[IFLA_GRE_REMOTE]	= { .len = FIELD_SIZEOF(struct iphdr, daddr) },
809 	[IFLA_GRE_TTL]		= { .type = NLA_U8 },
810 	[IFLA_GRE_TOS]		= { .type = NLA_U8 },
811 	[IFLA_GRE_PMTUDISC]	= { .type = NLA_U8 },
812 	[IFLA_GRE_ENCAP_TYPE]	= { .type = NLA_U16 },
813 	[IFLA_GRE_ENCAP_FLAGS]	= { .type = NLA_U16 },
814 	[IFLA_GRE_ENCAP_SPORT]	= { .type = NLA_U16 },
815 	[IFLA_GRE_ENCAP_DPORT]	= { .type = NLA_U16 },
816 };
817 
818 static struct rtnl_link_ops ipgre_link_ops __read_mostly = {
819 	.kind		= "gre",
820 	.maxtype	= IFLA_GRE_MAX,
821 	.policy		= ipgre_policy,
822 	.priv_size	= sizeof(struct ip_tunnel),
823 	.setup		= ipgre_tunnel_setup,
824 	.validate	= ipgre_tunnel_validate,
825 	.newlink	= ipgre_newlink,
826 	.changelink	= ipgre_changelink,
827 	.dellink	= ip_tunnel_dellink,
828 	.get_size	= ipgre_get_size,
829 	.fill_info	= ipgre_fill_info,
830 };
831 
832 static struct rtnl_link_ops ipgre_tap_ops __read_mostly = {
833 	.kind		= "gretap",
834 	.maxtype	= IFLA_GRE_MAX,
835 	.policy		= ipgre_policy,
836 	.priv_size	= sizeof(struct ip_tunnel),
837 	.setup		= ipgre_tap_setup,
838 	.validate	= ipgre_tap_validate,
839 	.newlink	= ipgre_newlink,
840 	.changelink	= ipgre_changelink,
841 	.dellink	= ip_tunnel_dellink,
842 	.get_size	= ipgre_get_size,
843 	.fill_info	= ipgre_fill_info,
844 };
845 
846 static int __net_init ipgre_tap_init_net(struct net *net)
847 {
848 	return ip_tunnel_init_net(net, gre_tap_net_id, &ipgre_tap_ops, NULL);
849 }
850 
851 static void __net_exit ipgre_tap_exit_net(struct net *net)
852 {
853 	struct ip_tunnel_net *itn = net_generic(net, gre_tap_net_id);
854 	ip_tunnel_delete_net(itn, &ipgre_tap_ops);
855 }
856 
857 static struct pernet_operations ipgre_tap_net_ops = {
858 	.init = ipgre_tap_init_net,
859 	.exit = ipgre_tap_exit_net,
860 	.id   = &gre_tap_net_id,
861 	.size = sizeof(struct ip_tunnel_net),
862 };
863 
864 static int __init ipgre_init(void)
865 {
866 	int err;
867 
868 	pr_info("GRE over IPv4 tunneling driver\n");
869 
870 	err = register_pernet_device(&ipgre_net_ops);
871 	if (err < 0)
872 		return err;
873 
874 	err = register_pernet_device(&ipgre_tap_net_ops);
875 	if (err < 0)
876 		goto pnet_tap_faied;
877 
878 	err = gre_cisco_register(&ipgre_protocol);
879 	if (err < 0) {
880 		pr_info("%s: can't add protocol\n", __func__);
881 		goto add_proto_failed;
882 	}
883 
884 	err = rtnl_link_register(&ipgre_link_ops);
885 	if (err < 0)
886 		goto rtnl_link_failed;
887 
888 	err = rtnl_link_register(&ipgre_tap_ops);
889 	if (err < 0)
890 		goto tap_ops_failed;
891 
892 	return 0;
893 
894 tap_ops_failed:
895 	rtnl_link_unregister(&ipgre_link_ops);
896 rtnl_link_failed:
897 	gre_cisco_unregister(&ipgre_protocol);
898 add_proto_failed:
899 	unregister_pernet_device(&ipgre_tap_net_ops);
900 pnet_tap_faied:
901 	unregister_pernet_device(&ipgre_net_ops);
902 	return err;
903 }
904 
905 static void __exit ipgre_fini(void)
906 {
907 	rtnl_link_unregister(&ipgre_tap_ops);
908 	rtnl_link_unregister(&ipgre_link_ops);
909 	gre_cisco_unregister(&ipgre_protocol);
910 	unregister_pernet_device(&ipgre_tap_net_ops);
911 	unregister_pernet_device(&ipgre_net_ops);
912 }
913 
914 module_init(ipgre_init);
915 module_exit(ipgre_fini);
916 MODULE_LICENSE("GPL");
917 MODULE_ALIAS_RTNL_LINK("gre");
918 MODULE_ALIAS_RTNL_LINK("gretap");
919 MODULE_ALIAS_NETDEV("gre0");
920 MODULE_ALIAS_NETDEV("gretap0");
921