xref: /openbmc/linux/net/ipv4/ip_gre.c (revision f35e839a)
1 /*
2  *	Linux NET3:	GRE over IP protocol decoder.
3  *
4  *	Authors: Alexey Kuznetsov (kuznet@ms2.inr.ac.ru)
5  *
6  *	This program is free software; you can redistribute it and/or
7  *	modify it under the terms of the GNU General Public License
8  *	as published by the Free Software Foundation; either version
9  *	2 of the License, or (at your option) any later version.
10  *
11  */
12 
13 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
14 
15 #include <linux/capability.h>
16 #include <linux/module.h>
17 #include <linux/types.h>
18 #include <linux/kernel.h>
19 #include <linux/slab.h>
20 #include <asm/uaccess.h>
21 #include <linux/skbuff.h>
22 #include <linux/netdevice.h>
23 #include <linux/in.h>
24 #include <linux/tcp.h>
25 #include <linux/udp.h>
26 #include <linux/if_arp.h>
27 #include <linux/mroute.h>
28 #include <linux/init.h>
29 #include <linux/in6.h>
30 #include <linux/inetdevice.h>
31 #include <linux/igmp.h>
32 #include <linux/netfilter_ipv4.h>
33 #include <linux/etherdevice.h>
34 #include <linux/if_ether.h>
35 
36 #include <net/sock.h>
37 #include <net/ip.h>
38 #include <net/icmp.h>
39 #include <net/protocol.h>
40 #include <net/ip_tunnels.h>
41 #include <net/arp.h>
42 #include <net/checksum.h>
43 #include <net/dsfield.h>
44 #include <net/inet_ecn.h>
45 #include <net/xfrm.h>
46 #include <net/net_namespace.h>
47 #include <net/netns/generic.h>
48 #include <net/rtnetlink.h>
49 #include <net/gre.h>
50 
51 #if IS_ENABLED(CONFIG_IPV6)
52 #include <net/ipv6.h>
53 #include <net/ip6_fib.h>
54 #include <net/ip6_route.h>
55 #endif
56 
57 /*
58    Problems & solutions
59    --------------------
60 
61    1. The most important issue is detecting local dead loops.
62    They would cause complete host lockup in transmit, which
63    would be "resolved" by stack overflow or, if queueing is enabled,
64    with infinite looping in net_bh.
65 
66    We cannot track such dead loops during route installation,
67    it is infeasible task. The most general solutions would be
68    to keep skb->encapsulation counter (sort of local ttl),
69    and silently drop packet when it expires. It is a good
70    solution, but it supposes maintaining new variable in ALL
71    skb, even if no tunneling is used.
72 
73    Current solution: xmit_recursion breaks dead loops. This is a percpu
74    counter, since when we enter the first ndo_xmit(), cpu migration is
75    forbidden. We force an exit if this counter reaches RECURSION_LIMIT
76 
77    2. Networking dead loops would not kill routers, but would really
78    kill network. IP hop limit plays role of "t->recursion" in this case,
79    if we copy it from packet being encapsulated to upper header.
80    It is very good solution, but it introduces two problems:
81 
82    - Routing protocols, using packets with ttl=1 (OSPF, RIP2),
83      do not work over tunnels.
84    - traceroute does not work. I planned to relay ICMP from tunnel,
85      so that this problem would be solved and traceroute output
86      would even more informative. This idea appeared to be wrong:
87      only Linux complies to rfc1812 now (yes, guys, Linux is the only
88      true router now :-)), all routers (at least, in neighbourhood of mine)
89      return only 8 bytes of payload. It is the end.
90 
91    Hence, if we want that OSPF worked or traceroute said something reasonable,
92    we should search for another solution.
93 
94    One of them is to parse packet trying to detect inner encapsulation
95    made by our node. It is difficult or even impossible, especially,
96    taking into account fragmentation. TO be short, ttl is not solution at all.
97 
98    Current solution: The solution was UNEXPECTEDLY SIMPLE.
99    We force DF flag on tunnels with preconfigured hop limit,
100    that is ALL. :-) Well, it does not remove the problem completely,
101    but exponential growth of network traffic is changed to linear
102    (branches, that exceed pmtu are pruned) and tunnel mtu
103    rapidly degrades to value <68, where looping stops.
104    Yes, it is not good if there exists a router in the loop,
105    which does not force DF, even when encapsulating packets have DF set.
106    But it is not our problem! Nobody could accuse us, we made
107    all that we could make. Even if it is your gated who injected
108    fatal route to network, even if it were you who configured
109    fatal static route: you are innocent. :-)
110 
111    Alexey Kuznetsov.
112  */
113 
114 static bool log_ecn_error = true;
115 module_param(log_ecn_error, bool, 0644);
116 MODULE_PARM_DESC(log_ecn_error, "Log packets received with corrupted ECN");
117 
118 static struct rtnl_link_ops ipgre_link_ops __read_mostly;
119 static int ipgre_tunnel_init(struct net_device *dev);
120 
121 static int ipgre_net_id __read_mostly;
122 static int gre_tap_net_id __read_mostly;
123 
124 static __sum16 check_checksum(struct sk_buff *skb)
125 {
126 	__sum16 csum = 0;
127 
128 	switch (skb->ip_summed) {
129 	case CHECKSUM_COMPLETE:
130 		csum = csum_fold(skb->csum);
131 
132 		if (!csum)
133 			break;
134 		/* Fall through. */
135 
136 	case CHECKSUM_NONE:
137 		skb->csum = 0;
138 		csum = __skb_checksum_complete(skb);
139 		skb->ip_summed = CHECKSUM_COMPLETE;
140 		break;
141 	}
142 
143 	return csum;
144 }
145 
146 static int ip_gre_calc_hlen(__be16 o_flags)
147 {
148 	int addend = 4;
149 
150 	if (o_flags&TUNNEL_CSUM)
151 		addend += 4;
152 	if (o_flags&TUNNEL_KEY)
153 		addend += 4;
154 	if (o_flags&TUNNEL_SEQ)
155 		addend += 4;
156 	return addend;
157 }
158 
159 static int parse_gre_header(struct sk_buff *skb, struct tnl_ptk_info *tpi,
160 			    bool *csum_err, int *hdr_len)
161 {
162 	unsigned int ip_hlen = ip_hdrlen(skb);
163 	const struct gre_base_hdr *greh;
164 	__be32 *options;
165 
166 	if (unlikely(!pskb_may_pull(skb, sizeof(struct gre_base_hdr))))
167 		return -EINVAL;
168 
169 	greh = (struct gre_base_hdr *)(skb_network_header(skb) + ip_hlen);
170 	if (unlikely(greh->flags & (GRE_VERSION | GRE_ROUTING)))
171 		return -EINVAL;
172 
173 	tpi->flags = gre_flags_to_tnl_flags(greh->flags);
174 	*hdr_len = ip_gre_calc_hlen(tpi->flags);
175 
176 	if (!pskb_may_pull(skb, *hdr_len))
177 		return -EINVAL;
178 
179 	greh = (struct gre_base_hdr *)(skb_network_header(skb) + ip_hlen);
180 
181 	tpi->proto = greh->protocol;
182 
183 	options = (__be32 *)(greh + 1);
184 	if (greh->flags & GRE_CSUM) {
185 		if (check_checksum(skb)) {
186 			*csum_err = true;
187 			return -EINVAL;
188 		}
189 		options++;
190 	}
191 
192 	if (greh->flags & GRE_KEY) {
193 		tpi->key = *options;
194 		options++;
195 	} else
196 		tpi->key = 0;
197 
198 	if (unlikely(greh->flags & GRE_SEQ)) {
199 		tpi->seq = *options;
200 		options++;
201 	} else
202 		tpi->seq = 0;
203 
204 	/* WCCP version 1 and 2 protocol decoding.
205 	 * - Change protocol to IP
206 	 * - When dealing with WCCPv2, Skip extra 4 bytes in GRE header
207 	 */
208 	if (greh->flags == 0 && tpi->proto == htons(ETH_P_WCCP)) {
209 		tpi->proto = htons(ETH_P_IP);
210 		if ((*(u8 *)options & 0xF0) != 0x40) {
211 			*hdr_len += 4;
212 			if (!pskb_may_pull(skb, *hdr_len))
213 				return -EINVAL;
214 		}
215 	}
216 
217 	return 0;
218 }
219 
220 static void ipgre_err(struct sk_buff *skb, u32 info)
221 {
222 
223 	/* All the routers (except for Linux) return only
224 	   8 bytes of packet payload. It means, that precise relaying of
225 	   ICMP in the real Internet is absolutely infeasible.
226 
227 	   Moreover, Cisco "wise men" put GRE key to the third word
228 	   in GRE header. It makes impossible maintaining even soft
229 	   state for keyed GRE tunnels with enabled checksum. Tell
230 	   them "thank you".
231 
232 	   Well, I wonder, rfc1812 was written by Cisco employee,
233 	   what the hell these idiots break standards established
234 	   by themselves???
235 	   */
236 	struct net *net = dev_net(skb->dev);
237 	struct ip_tunnel_net *itn;
238 	const struct iphdr *iph = (const struct iphdr *)skb->data;
239 	const int type = icmp_hdr(skb)->type;
240 	const int code = icmp_hdr(skb)->code;
241 	struct ip_tunnel *t;
242 	struct tnl_ptk_info tpi;
243 	int hdr_len;
244 	bool csum_err = false;
245 
246 	if (parse_gre_header(skb, &tpi, &csum_err, &hdr_len)) {
247 		if (!csum_err)          /* ignore csum errors. */
248 			return;
249 	}
250 
251 	switch (type) {
252 	default:
253 	case ICMP_PARAMETERPROB:
254 		return;
255 
256 	case ICMP_DEST_UNREACH:
257 		switch (code) {
258 		case ICMP_SR_FAILED:
259 		case ICMP_PORT_UNREACH:
260 			/* Impossible event. */
261 			return;
262 		default:
263 			/* All others are translated to HOST_UNREACH.
264 			   rfc2003 contains "deep thoughts" about NET_UNREACH,
265 			   I believe they are just ether pollution. --ANK
266 			 */
267 			break;
268 		}
269 		break;
270 	case ICMP_TIME_EXCEEDED:
271 		if (code != ICMP_EXC_TTL)
272 			return;
273 		break;
274 
275 	case ICMP_REDIRECT:
276 		break;
277 	}
278 
279 	if (tpi.proto == htons(ETH_P_TEB))
280 		itn = net_generic(net, gre_tap_net_id);
281 	else
282 		itn = net_generic(net, ipgre_net_id);
283 
284 	t = ip_tunnel_lookup(itn, skb->dev->ifindex, tpi.flags,
285 			     iph->daddr, iph->saddr, tpi.key);
286 
287 	if (t == NULL)
288 		return;
289 
290 	if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED) {
291 		ipv4_update_pmtu(skb, dev_net(skb->dev), info,
292 				 t->parms.link, 0, IPPROTO_GRE, 0);
293 		return;
294 	}
295 	if (type == ICMP_REDIRECT) {
296 		ipv4_redirect(skb, dev_net(skb->dev), t->parms.link, 0,
297 			      IPPROTO_GRE, 0);
298 		return;
299 	}
300 	if (t->parms.iph.daddr == 0 ||
301 	    ipv4_is_multicast(t->parms.iph.daddr))
302 		return;
303 
304 	if (t->parms.iph.ttl == 0 && type == ICMP_TIME_EXCEEDED)
305 		return;
306 
307 	if (time_before(jiffies, t->err_time + IPTUNNEL_ERR_TIMEO))
308 		t->err_count++;
309 	else
310 		t->err_count = 1;
311 	t->err_time = jiffies;
312 }
313 
314 static int ipgre_rcv(struct sk_buff *skb)
315 {
316 	struct net *net = dev_net(skb->dev);
317 	struct ip_tunnel_net *itn;
318 	const struct iphdr *iph;
319 	struct ip_tunnel *tunnel;
320 	struct tnl_ptk_info tpi;
321 	int hdr_len;
322 	bool csum_err = false;
323 
324 	if (parse_gre_header(skb, &tpi, &csum_err, &hdr_len) < 0)
325 		goto drop;
326 
327 	if (tpi.proto == htons(ETH_P_TEB))
328 		itn = net_generic(net, gre_tap_net_id);
329 	else
330 		itn = net_generic(net, ipgre_net_id);
331 
332 	iph = ip_hdr(skb);
333 	tunnel = ip_tunnel_lookup(itn, skb->dev->ifindex, tpi.flags,
334 				  iph->saddr, iph->daddr, tpi.key);
335 
336 	if (tunnel) {
337 		ip_tunnel_rcv(tunnel, skb, &tpi, log_ecn_error);
338 		return 0;
339 	}
340 	icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0);
341 drop:
342 	kfree_skb(skb);
343 	return 0;
344 }
345 
346 static struct sk_buff *handle_offloads(struct ip_tunnel *tunnel, struct sk_buff *skb)
347 {
348 	int err;
349 
350 	if (skb_is_gso(skb)) {
351 		err = skb_unclone(skb, GFP_ATOMIC);
352 		if (unlikely(err))
353 			goto error;
354 		skb_shinfo(skb)->gso_type |= SKB_GSO_GRE;
355 		return skb;
356 	} else if (skb->ip_summed == CHECKSUM_PARTIAL &&
357 		   tunnel->parms.o_flags&TUNNEL_CSUM) {
358 		err = skb_checksum_help(skb);
359 		if (unlikely(err))
360 			goto error;
361 	} else if (skb->ip_summed != CHECKSUM_PARTIAL)
362 		skb->ip_summed = CHECKSUM_NONE;
363 
364 	return skb;
365 
366 error:
367 	kfree_skb(skb);
368 	return ERR_PTR(err);
369 }
370 
371 static struct sk_buff *gre_build_header(struct sk_buff *skb,
372 					const struct tnl_ptk_info *tpi,
373 					int hdr_len)
374 {
375 	struct gre_base_hdr *greh;
376 
377 	skb_push(skb, hdr_len);
378 
379 	greh = (struct gre_base_hdr *)skb->data;
380 	greh->flags = tnl_flags_to_gre_flags(tpi->flags);
381 	greh->protocol = tpi->proto;
382 
383 	if (tpi->flags&(TUNNEL_KEY|TUNNEL_CSUM|TUNNEL_SEQ)) {
384 		__be32 *ptr = (__be32 *)(((u8 *)greh) + hdr_len - 4);
385 
386 		if (tpi->flags&TUNNEL_SEQ) {
387 			*ptr = tpi->seq;
388 			ptr--;
389 		}
390 		if (tpi->flags&TUNNEL_KEY) {
391 			*ptr = tpi->key;
392 			ptr--;
393 		}
394 		if (tpi->flags&TUNNEL_CSUM &&
395 		    !(skb_shinfo(skb)->gso_type & SKB_GSO_GRE)) {
396 			*(__sum16 *)ptr = 0;
397 			*(__sum16 *)ptr = csum_fold(skb_checksum(skb, 0,
398 								 skb->len, 0));
399 		}
400 	}
401 
402 	return skb;
403 }
404 
405 static void __gre_xmit(struct sk_buff *skb, struct net_device *dev,
406 		       const struct iphdr *tnl_params,
407 		       __be16 proto)
408 {
409 	struct ip_tunnel *tunnel = netdev_priv(dev);
410 	struct tnl_ptk_info tpi;
411 
412 	if (likely(!skb->encapsulation)) {
413 		skb_reset_inner_headers(skb);
414 		skb->encapsulation = 1;
415 	}
416 
417 	tpi.flags = tunnel->parms.o_flags;
418 	tpi.proto = proto;
419 	tpi.key = tunnel->parms.o_key;
420 	if (tunnel->parms.o_flags & TUNNEL_SEQ)
421 		tunnel->o_seqno++;
422 	tpi.seq = htonl(tunnel->o_seqno);
423 
424 	/* Push GRE header. */
425 	skb = gre_build_header(skb, &tpi, tunnel->hlen);
426 	if (unlikely(!skb)) {
427 		dev->stats.tx_dropped++;
428 		return;
429 	}
430 
431 	ip_tunnel_xmit(skb, dev, tnl_params);
432 }
433 
434 static netdev_tx_t ipgre_xmit(struct sk_buff *skb,
435 			      struct net_device *dev)
436 {
437 	struct ip_tunnel *tunnel = netdev_priv(dev);
438 	const struct iphdr *tnl_params;
439 
440 	skb = handle_offloads(tunnel, skb);
441 	if (IS_ERR(skb))
442 		goto out;
443 
444 	if (dev->header_ops) {
445 		/* Need space for new headers */
446 		if (skb_cow_head(skb, dev->needed_headroom -
447 				      (tunnel->hlen + sizeof(struct iphdr))))
448 			goto free_skb;
449 
450 		tnl_params = (const struct iphdr *)skb->data;
451 
452 		/* Pull skb since ip_tunnel_xmit() needs skb->data pointing
453 		 * to gre header.
454 		 */
455 		skb_pull(skb, tunnel->hlen + sizeof(struct iphdr));
456 	} else {
457 		if (skb_cow_head(skb, dev->needed_headroom))
458 			goto free_skb;
459 
460 		tnl_params = &tunnel->parms.iph;
461 	}
462 
463 	__gre_xmit(skb, dev, tnl_params, skb->protocol);
464 
465 	return NETDEV_TX_OK;
466 
467 free_skb:
468 	dev_kfree_skb(skb);
469 out:
470 	dev->stats.tx_dropped++;
471 	return NETDEV_TX_OK;
472 }
473 
474 static netdev_tx_t gre_tap_xmit(struct sk_buff *skb,
475 				struct net_device *dev)
476 {
477 	struct ip_tunnel *tunnel = netdev_priv(dev);
478 
479 	skb = handle_offloads(tunnel, skb);
480 	if (IS_ERR(skb))
481 		goto out;
482 
483 	if (skb_cow_head(skb, dev->needed_headroom))
484 		goto free_skb;
485 
486 	__gre_xmit(skb, dev, &tunnel->parms.iph, htons(ETH_P_TEB));
487 
488 	return NETDEV_TX_OK;
489 
490 free_skb:
491 	dev_kfree_skb(skb);
492 out:
493 	dev->stats.tx_dropped++;
494 	return NETDEV_TX_OK;
495 }
496 
497 static int ipgre_tunnel_ioctl(struct net_device *dev,
498 			      struct ifreq *ifr, int cmd)
499 {
500 	int err = 0;
501 	struct ip_tunnel_parm p;
502 
503 	if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
504 		return -EFAULT;
505 	if (p.iph.version != 4 || p.iph.protocol != IPPROTO_GRE ||
506 	    p.iph.ihl != 5 || (p.iph.frag_off&htons(~IP_DF)) ||
507 	    ((p.i_flags|p.o_flags)&(GRE_VERSION|GRE_ROUTING))) {
508 		return -EINVAL;
509 	}
510 	p.i_flags = gre_flags_to_tnl_flags(p.i_flags);
511 	p.o_flags = gre_flags_to_tnl_flags(p.o_flags);
512 
513 	err = ip_tunnel_ioctl(dev, &p, cmd);
514 	if (err)
515 		return err;
516 
517 	p.i_flags = tnl_flags_to_gre_flags(p.i_flags);
518 	p.o_flags = tnl_flags_to_gre_flags(p.o_flags);
519 
520 	if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p)))
521 		return -EFAULT;
522 	return 0;
523 }
524 
525 /* Nice toy. Unfortunately, useless in real life :-)
526    It allows to construct virtual multiprotocol broadcast "LAN"
527    over the Internet, provided multicast routing is tuned.
528 
529 
530    I have no idea was this bicycle invented before me,
531    so that I had to set ARPHRD_IPGRE to a random value.
532    I have an impression, that Cisco could make something similar,
533    but this feature is apparently missing in IOS<=11.2(8).
534 
535    I set up 10.66.66/24 and fec0:6666:6666::0/96 as virtual networks
536    with broadcast 224.66.66.66. If you have access to mbone, play with me :-)
537 
538    ping -t 255 224.66.66.66
539 
540    If nobody answers, mbone does not work.
541 
542    ip tunnel add Universe mode gre remote 224.66.66.66 local <Your_real_addr> ttl 255
543    ip addr add 10.66.66.<somewhat>/24 dev Universe
544    ifconfig Universe up
545    ifconfig Universe add fe80::<Your_real_addr>/10
546    ifconfig Universe add fec0:6666:6666::<Your_real_addr>/96
547    ftp 10.66.66.66
548    ...
549    ftp fec0:6666:6666::193.233.7.65
550    ...
551  */
552 static int ipgre_header(struct sk_buff *skb, struct net_device *dev,
553 			unsigned short type,
554 			const void *daddr, const void *saddr, unsigned int len)
555 {
556 	struct ip_tunnel *t = netdev_priv(dev);
557 	struct iphdr *iph;
558 	struct gre_base_hdr *greh;
559 
560 	iph = (struct iphdr *)skb_push(skb, t->hlen + sizeof(*iph));
561 	greh = (struct gre_base_hdr *)(iph+1);
562 	greh->flags = tnl_flags_to_gre_flags(t->parms.o_flags);
563 	greh->protocol = htons(type);
564 
565 	memcpy(iph, &t->parms.iph, sizeof(struct iphdr));
566 
567 	/* Set the source hardware address. */
568 	if (saddr)
569 		memcpy(&iph->saddr, saddr, 4);
570 	if (daddr)
571 		memcpy(&iph->daddr, daddr, 4);
572 	if (iph->daddr)
573 		return t->hlen;
574 
575 	return -(t->hlen + sizeof(*iph));
576 }
577 
578 static int ipgre_header_parse(const struct sk_buff *skb, unsigned char *haddr)
579 {
580 	const struct iphdr *iph = (const struct iphdr *) skb_mac_header(skb);
581 	memcpy(haddr, &iph->saddr, 4);
582 	return 4;
583 }
584 
585 static const struct header_ops ipgre_header_ops = {
586 	.create	= ipgre_header,
587 	.parse	= ipgre_header_parse,
588 };
589 
590 #ifdef CONFIG_NET_IPGRE_BROADCAST
591 static int ipgre_open(struct net_device *dev)
592 {
593 	struct ip_tunnel *t = netdev_priv(dev);
594 
595 	if (ipv4_is_multicast(t->parms.iph.daddr)) {
596 		struct flowi4 fl4;
597 		struct rtable *rt;
598 
599 		rt = ip_route_output_gre(dev_net(dev), &fl4,
600 					 t->parms.iph.daddr,
601 					 t->parms.iph.saddr,
602 					 t->parms.o_key,
603 					 RT_TOS(t->parms.iph.tos),
604 					 t->parms.link);
605 		if (IS_ERR(rt))
606 			return -EADDRNOTAVAIL;
607 		dev = rt->dst.dev;
608 		ip_rt_put(rt);
609 		if (__in_dev_get_rtnl(dev) == NULL)
610 			return -EADDRNOTAVAIL;
611 		t->mlink = dev->ifindex;
612 		ip_mc_inc_group(__in_dev_get_rtnl(dev), t->parms.iph.daddr);
613 	}
614 	return 0;
615 }
616 
617 static int ipgre_close(struct net_device *dev)
618 {
619 	struct ip_tunnel *t = netdev_priv(dev);
620 
621 	if (ipv4_is_multicast(t->parms.iph.daddr) && t->mlink) {
622 		struct in_device *in_dev;
623 		in_dev = inetdev_by_index(dev_net(dev), t->mlink);
624 		if (in_dev)
625 			ip_mc_dec_group(in_dev, t->parms.iph.daddr);
626 	}
627 	return 0;
628 }
629 #endif
630 
631 static const struct net_device_ops ipgre_netdev_ops = {
632 	.ndo_init		= ipgre_tunnel_init,
633 	.ndo_uninit		= ip_tunnel_uninit,
634 #ifdef CONFIG_NET_IPGRE_BROADCAST
635 	.ndo_open		= ipgre_open,
636 	.ndo_stop		= ipgre_close,
637 #endif
638 	.ndo_start_xmit		= ipgre_xmit,
639 	.ndo_do_ioctl		= ipgre_tunnel_ioctl,
640 	.ndo_change_mtu		= ip_tunnel_change_mtu,
641 	.ndo_get_stats64	= ip_tunnel_get_stats64,
642 };
643 
644 #define GRE_FEATURES (NETIF_F_SG |		\
645 		      NETIF_F_FRAGLIST |	\
646 		      NETIF_F_HIGHDMA |		\
647 		      NETIF_F_HW_CSUM)
648 
649 static void ipgre_tunnel_setup(struct net_device *dev)
650 {
651 	dev->netdev_ops		= &ipgre_netdev_ops;
652 	ip_tunnel_setup(dev, ipgre_net_id);
653 }
654 
655 static void __gre_tunnel_init(struct net_device *dev)
656 {
657 	struct ip_tunnel *tunnel;
658 
659 	tunnel = netdev_priv(dev);
660 	tunnel->hlen = ip_gre_calc_hlen(tunnel->parms.o_flags);
661 	tunnel->parms.iph.protocol = IPPROTO_GRE;
662 
663 	dev->needed_headroom	= LL_MAX_HEADER + sizeof(struct iphdr) + 4;
664 	dev->mtu		= ETH_DATA_LEN - sizeof(struct iphdr) - 4;
665 
666 	dev->features		|= NETIF_F_NETNS_LOCAL | GRE_FEATURES;
667 	dev->hw_features	|= GRE_FEATURES;
668 
669 	if (!(tunnel->parms.o_flags & TUNNEL_SEQ)) {
670 		/* TCP offload with GRE SEQ is not supported. */
671 		dev->features    |= NETIF_F_GSO_SOFTWARE;
672 		dev->hw_features |= NETIF_F_GSO_SOFTWARE;
673 		/* Can use a lockless transmit, unless we generate
674 		 * output sequences
675 		 */
676 		dev->features |= NETIF_F_LLTX;
677 	}
678 }
679 
680 static int ipgre_tunnel_init(struct net_device *dev)
681 {
682 	struct ip_tunnel *tunnel = netdev_priv(dev);
683 	struct iphdr *iph = &tunnel->parms.iph;
684 
685 	__gre_tunnel_init(dev);
686 
687 	memcpy(dev->dev_addr, &iph->saddr, 4);
688 	memcpy(dev->broadcast, &iph->daddr, 4);
689 
690 	dev->type		= ARPHRD_IPGRE;
691 	dev->flags		= IFF_NOARP;
692 	dev->priv_flags		&= ~IFF_XMIT_DST_RELEASE;
693 	dev->addr_len		= 4;
694 
695 	if (iph->daddr) {
696 #ifdef CONFIG_NET_IPGRE_BROADCAST
697 		if (ipv4_is_multicast(iph->daddr)) {
698 			if (!iph->saddr)
699 				return -EINVAL;
700 			dev->flags = IFF_BROADCAST;
701 			dev->header_ops = &ipgre_header_ops;
702 		}
703 #endif
704 	} else
705 		dev->header_ops = &ipgre_header_ops;
706 
707 	return ip_tunnel_init(dev);
708 }
709 
710 static const struct gre_protocol ipgre_protocol = {
711 	.handler     = ipgre_rcv,
712 	.err_handler = ipgre_err,
713 };
714 
715 static int __net_init ipgre_init_net(struct net *net)
716 {
717 	return ip_tunnel_init_net(net, ipgre_net_id, &ipgre_link_ops, NULL);
718 }
719 
720 static void __net_exit ipgre_exit_net(struct net *net)
721 {
722 	struct ip_tunnel_net *itn = net_generic(net, ipgre_net_id);
723 	ip_tunnel_delete_net(itn);
724 }
725 
726 static struct pernet_operations ipgre_net_ops = {
727 	.init = ipgre_init_net,
728 	.exit = ipgre_exit_net,
729 	.id   = &ipgre_net_id,
730 	.size = sizeof(struct ip_tunnel_net),
731 };
732 
733 static int ipgre_tunnel_validate(struct nlattr *tb[], struct nlattr *data[])
734 {
735 	__be16 flags;
736 
737 	if (!data)
738 		return 0;
739 
740 	flags = 0;
741 	if (data[IFLA_GRE_IFLAGS])
742 		flags |= nla_get_be16(data[IFLA_GRE_IFLAGS]);
743 	if (data[IFLA_GRE_OFLAGS])
744 		flags |= nla_get_be16(data[IFLA_GRE_OFLAGS]);
745 	if (flags & (GRE_VERSION|GRE_ROUTING))
746 		return -EINVAL;
747 
748 	return 0;
749 }
750 
751 static int ipgre_tap_validate(struct nlattr *tb[], struct nlattr *data[])
752 {
753 	__be32 daddr;
754 
755 	if (tb[IFLA_ADDRESS]) {
756 		if (nla_len(tb[IFLA_ADDRESS]) != ETH_ALEN)
757 			return -EINVAL;
758 		if (!is_valid_ether_addr(nla_data(tb[IFLA_ADDRESS])))
759 			return -EADDRNOTAVAIL;
760 	}
761 
762 	if (!data)
763 		goto out;
764 
765 	if (data[IFLA_GRE_REMOTE]) {
766 		memcpy(&daddr, nla_data(data[IFLA_GRE_REMOTE]), 4);
767 		if (!daddr)
768 			return -EINVAL;
769 	}
770 
771 out:
772 	return ipgre_tunnel_validate(tb, data);
773 }
774 
775 static void ipgre_netlink_parms(struct nlattr *data[], struct nlattr *tb[],
776 			       struct ip_tunnel_parm *parms)
777 {
778 	memset(parms, 0, sizeof(*parms));
779 
780 	parms->iph.protocol = IPPROTO_GRE;
781 
782 	if (!data)
783 		return;
784 
785 	if (data[IFLA_GRE_LINK])
786 		parms->link = nla_get_u32(data[IFLA_GRE_LINK]);
787 
788 	if (data[IFLA_GRE_IFLAGS])
789 		parms->i_flags = gre_flags_to_tnl_flags(nla_get_be16(data[IFLA_GRE_IFLAGS]));
790 
791 	if (data[IFLA_GRE_OFLAGS])
792 		parms->o_flags = gre_flags_to_tnl_flags(nla_get_be16(data[IFLA_GRE_OFLAGS]));
793 
794 	if (data[IFLA_GRE_IKEY])
795 		parms->i_key = nla_get_be32(data[IFLA_GRE_IKEY]);
796 
797 	if (data[IFLA_GRE_OKEY])
798 		parms->o_key = nla_get_be32(data[IFLA_GRE_OKEY]);
799 
800 	if (data[IFLA_GRE_LOCAL])
801 		parms->iph.saddr = nla_get_be32(data[IFLA_GRE_LOCAL]);
802 
803 	if (data[IFLA_GRE_REMOTE])
804 		parms->iph.daddr = nla_get_be32(data[IFLA_GRE_REMOTE]);
805 
806 	if (data[IFLA_GRE_TTL])
807 		parms->iph.ttl = nla_get_u8(data[IFLA_GRE_TTL]);
808 
809 	if (data[IFLA_GRE_TOS])
810 		parms->iph.tos = nla_get_u8(data[IFLA_GRE_TOS]);
811 
812 	if (!data[IFLA_GRE_PMTUDISC] || nla_get_u8(data[IFLA_GRE_PMTUDISC]))
813 		parms->iph.frag_off = htons(IP_DF);
814 }
815 
816 static int gre_tap_init(struct net_device *dev)
817 {
818 	__gre_tunnel_init(dev);
819 
820 	return ip_tunnel_init(dev);
821 }
822 
823 static const struct net_device_ops gre_tap_netdev_ops = {
824 	.ndo_init		= gre_tap_init,
825 	.ndo_uninit		= ip_tunnel_uninit,
826 	.ndo_start_xmit		= gre_tap_xmit,
827 	.ndo_set_mac_address 	= eth_mac_addr,
828 	.ndo_validate_addr	= eth_validate_addr,
829 	.ndo_change_mtu		= ip_tunnel_change_mtu,
830 	.ndo_get_stats64	= ip_tunnel_get_stats64,
831 };
832 
833 static void ipgre_tap_setup(struct net_device *dev)
834 {
835 	ether_setup(dev);
836 	dev->netdev_ops		= &gre_tap_netdev_ops;
837 	ip_tunnel_setup(dev, gre_tap_net_id);
838 }
839 
840 static int ipgre_newlink(struct net *src_net, struct net_device *dev,
841 			 struct nlattr *tb[], struct nlattr *data[])
842 {
843 	struct ip_tunnel_parm p;
844 
845 	ipgre_netlink_parms(data, tb, &p);
846 	return ip_tunnel_newlink(dev, tb, &p);
847 }
848 
849 static int ipgre_changelink(struct net_device *dev, struct nlattr *tb[],
850 			    struct nlattr *data[])
851 {
852 	struct ip_tunnel_parm p;
853 
854 	ipgre_netlink_parms(data, tb, &p);
855 	return ip_tunnel_changelink(dev, tb, &p);
856 }
857 
858 static size_t ipgre_get_size(const struct net_device *dev)
859 {
860 	return
861 		/* IFLA_GRE_LINK */
862 		nla_total_size(4) +
863 		/* IFLA_GRE_IFLAGS */
864 		nla_total_size(2) +
865 		/* IFLA_GRE_OFLAGS */
866 		nla_total_size(2) +
867 		/* IFLA_GRE_IKEY */
868 		nla_total_size(4) +
869 		/* IFLA_GRE_OKEY */
870 		nla_total_size(4) +
871 		/* IFLA_GRE_LOCAL */
872 		nla_total_size(4) +
873 		/* IFLA_GRE_REMOTE */
874 		nla_total_size(4) +
875 		/* IFLA_GRE_TTL */
876 		nla_total_size(1) +
877 		/* IFLA_GRE_TOS */
878 		nla_total_size(1) +
879 		/* IFLA_GRE_PMTUDISC */
880 		nla_total_size(1) +
881 		0;
882 }
883 
884 static int ipgre_fill_info(struct sk_buff *skb, const struct net_device *dev)
885 {
886 	struct ip_tunnel *t = netdev_priv(dev);
887 	struct ip_tunnel_parm *p = &t->parms;
888 
889 	if (nla_put_u32(skb, IFLA_GRE_LINK, p->link) ||
890 	    nla_put_be16(skb, IFLA_GRE_IFLAGS, tnl_flags_to_gre_flags(p->i_flags)) ||
891 	    nla_put_be16(skb, IFLA_GRE_OFLAGS, tnl_flags_to_gre_flags(p->o_flags)) ||
892 	    nla_put_be32(skb, IFLA_GRE_IKEY, p->i_key) ||
893 	    nla_put_be32(skb, IFLA_GRE_OKEY, p->o_key) ||
894 	    nla_put_be32(skb, IFLA_GRE_LOCAL, p->iph.saddr) ||
895 	    nla_put_be32(skb, IFLA_GRE_REMOTE, p->iph.daddr) ||
896 	    nla_put_u8(skb, IFLA_GRE_TTL, p->iph.ttl) ||
897 	    nla_put_u8(skb, IFLA_GRE_TOS, p->iph.tos) ||
898 	    nla_put_u8(skb, IFLA_GRE_PMTUDISC,
899 		       !!(p->iph.frag_off & htons(IP_DF))))
900 		goto nla_put_failure;
901 	return 0;
902 
903 nla_put_failure:
904 	return -EMSGSIZE;
905 }
906 
907 static const struct nla_policy ipgre_policy[IFLA_GRE_MAX + 1] = {
908 	[IFLA_GRE_LINK]		= { .type = NLA_U32 },
909 	[IFLA_GRE_IFLAGS]	= { .type = NLA_U16 },
910 	[IFLA_GRE_OFLAGS]	= { .type = NLA_U16 },
911 	[IFLA_GRE_IKEY]		= { .type = NLA_U32 },
912 	[IFLA_GRE_OKEY]		= { .type = NLA_U32 },
913 	[IFLA_GRE_LOCAL]	= { .len = FIELD_SIZEOF(struct iphdr, saddr) },
914 	[IFLA_GRE_REMOTE]	= { .len = FIELD_SIZEOF(struct iphdr, daddr) },
915 	[IFLA_GRE_TTL]		= { .type = NLA_U8 },
916 	[IFLA_GRE_TOS]		= { .type = NLA_U8 },
917 	[IFLA_GRE_PMTUDISC]	= { .type = NLA_U8 },
918 };
919 
920 static struct rtnl_link_ops ipgre_link_ops __read_mostly = {
921 	.kind		= "gre",
922 	.maxtype	= IFLA_GRE_MAX,
923 	.policy		= ipgre_policy,
924 	.priv_size	= sizeof(struct ip_tunnel),
925 	.setup		= ipgre_tunnel_setup,
926 	.validate	= ipgre_tunnel_validate,
927 	.newlink	= ipgre_newlink,
928 	.changelink	= ipgre_changelink,
929 	.dellink	= ip_tunnel_dellink,
930 	.get_size	= ipgre_get_size,
931 	.fill_info	= ipgre_fill_info,
932 };
933 
934 static struct rtnl_link_ops ipgre_tap_ops __read_mostly = {
935 	.kind		= "gretap",
936 	.maxtype	= IFLA_GRE_MAX,
937 	.policy		= ipgre_policy,
938 	.priv_size	= sizeof(struct ip_tunnel),
939 	.setup		= ipgre_tap_setup,
940 	.validate	= ipgre_tap_validate,
941 	.newlink	= ipgre_newlink,
942 	.changelink	= ipgre_changelink,
943 	.dellink	= ip_tunnel_dellink,
944 	.get_size	= ipgre_get_size,
945 	.fill_info	= ipgre_fill_info,
946 };
947 
948 static int __net_init ipgre_tap_init_net(struct net *net)
949 {
950 	return ip_tunnel_init_net(net, gre_tap_net_id, &ipgre_tap_ops, NULL);
951 }
952 
953 static void __net_exit ipgre_tap_exit_net(struct net *net)
954 {
955 	struct ip_tunnel_net *itn = net_generic(net, gre_tap_net_id);
956 	ip_tunnel_delete_net(itn);
957 }
958 
959 static struct pernet_operations ipgre_tap_net_ops = {
960 	.init = ipgre_tap_init_net,
961 	.exit = ipgre_tap_exit_net,
962 	.id   = &gre_tap_net_id,
963 	.size = sizeof(struct ip_tunnel_net),
964 };
965 
966 static int __init ipgre_init(void)
967 {
968 	int err;
969 
970 	pr_info("GRE over IPv4 tunneling driver\n");
971 
972 	err = register_pernet_device(&ipgre_net_ops);
973 	if (err < 0)
974 		return err;
975 
976 	err = register_pernet_device(&ipgre_tap_net_ops);
977 	if (err < 0)
978 		goto pnet_tap_faied;
979 
980 	err = gre_add_protocol(&ipgre_protocol, GREPROTO_CISCO);
981 	if (err < 0) {
982 		pr_info("%s: can't add protocol\n", __func__);
983 		goto add_proto_failed;
984 	}
985 
986 	err = rtnl_link_register(&ipgre_link_ops);
987 	if (err < 0)
988 		goto rtnl_link_failed;
989 
990 	err = rtnl_link_register(&ipgre_tap_ops);
991 	if (err < 0)
992 		goto tap_ops_failed;
993 
994 	return 0;
995 
996 tap_ops_failed:
997 	rtnl_link_unregister(&ipgre_link_ops);
998 rtnl_link_failed:
999 	gre_del_protocol(&ipgre_protocol, GREPROTO_CISCO);
1000 add_proto_failed:
1001 	unregister_pernet_device(&ipgre_tap_net_ops);
1002 pnet_tap_faied:
1003 	unregister_pernet_device(&ipgre_net_ops);
1004 	return err;
1005 }
1006 
1007 static void __exit ipgre_fini(void)
1008 {
1009 	rtnl_link_unregister(&ipgre_tap_ops);
1010 	rtnl_link_unregister(&ipgre_link_ops);
1011 	if (gre_del_protocol(&ipgre_protocol, GREPROTO_CISCO) < 0)
1012 		pr_info("%s: can't remove protocol\n", __func__);
1013 	unregister_pernet_device(&ipgre_tap_net_ops);
1014 	unregister_pernet_device(&ipgre_net_ops);
1015 }
1016 
1017 module_init(ipgre_init);
1018 module_exit(ipgre_fini);
1019 MODULE_LICENSE("GPL");
1020 MODULE_ALIAS_RTNL_LINK("gre");
1021 MODULE_ALIAS_RTNL_LINK("gretap");
1022 MODULE_ALIAS_NETDEV("gre0");
1023 MODULE_ALIAS_NETDEV("gretap0");
1024