xref: /openbmc/linux/net/ipv4/ip_gre.c (revision f42b3800)
1 /*
2  *	Linux NET3:	GRE over IP protocol decoder.
3  *
4  *	Authors: Alexey Kuznetsov (kuznet@ms2.inr.ac.ru)
5  *
6  *	This program is free software; you can redistribute it and/or
7  *	modify it under the terms of the GNU General Public License
8  *	as published by the Free Software Foundation; either version
9  *	2 of the License, or (at your option) any later version.
10  *
11  */
12 
13 #include <linux/capability.h>
14 #include <linux/module.h>
15 #include <linux/types.h>
16 #include <linux/kernel.h>
17 #include <asm/uaccess.h>
18 #include <linux/skbuff.h>
19 #include <linux/netdevice.h>
20 #include <linux/in.h>
21 #include <linux/tcp.h>
22 #include <linux/udp.h>
23 #include <linux/if_arp.h>
24 #include <linux/mroute.h>
25 #include <linux/init.h>
26 #include <linux/in6.h>
27 #include <linux/inetdevice.h>
28 #include <linux/igmp.h>
29 #include <linux/netfilter_ipv4.h>
30 #include <linux/if_ether.h>
31 
32 #include <net/sock.h>
33 #include <net/ip.h>
34 #include <net/icmp.h>
35 #include <net/protocol.h>
36 #include <net/ipip.h>
37 #include <net/arp.h>
38 #include <net/checksum.h>
39 #include <net/dsfield.h>
40 #include <net/inet_ecn.h>
41 #include <net/xfrm.h>
42 #include <net/net_namespace.h>
43 #include <net/netns/generic.h>
44 
45 #ifdef CONFIG_IPV6
46 #include <net/ipv6.h>
47 #include <net/ip6_fib.h>
48 #include <net/ip6_route.h>
49 #endif
50 
51 /*
52    Problems & solutions
53    --------------------
54 
55    1. The most important issue is detecting local dead loops.
56    They would cause complete host lockup in transmit, which
57    would be "resolved" by stack overflow or, if queueing is enabled,
58    with infinite looping in net_bh.
59 
60    We cannot track such dead loops during route installation,
61    it is infeasible task. The most general solutions would be
62    to keep skb->encapsulation counter (sort of local ttl),
63    and silently drop packet when it expires. It is the best
64    solution, but it supposes maintaing new variable in ALL
65    skb, even if no tunneling is used.
66 
67    Current solution: t->recursion lock breaks dead loops. It looks
68    like dev->tbusy flag, but I preferred new variable, because
69    the semantics is different. One day, when hard_start_xmit
70    will be multithreaded we will have to use skb->encapsulation.
71 
72 
73 
74    2. Networking dead loops would not kill routers, but would really
75    kill network. IP hop limit plays role of "t->recursion" in this case,
76    if we copy it from packet being encapsulated to upper header.
77    It is very good solution, but it introduces two problems:
78 
79    - Routing protocols, using packets with ttl=1 (OSPF, RIP2),
80      do not work over tunnels.
81    - traceroute does not work. I planned to relay ICMP from tunnel,
82      so that this problem would be solved and traceroute output
83      would even more informative. This idea appeared to be wrong:
84      only Linux complies to rfc1812 now (yes, guys, Linux is the only
85      true router now :-)), all routers (at least, in neighbourhood of mine)
86      return only 8 bytes of payload. It is the end.
87 
88    Hence, if we want that OSPF worked or traceroute said something reasonable,
89    we should search for another solution.
90 
91    One of them is to parse packet trying to detect inner encapsulation
92    made by our node. It is difficult or even impossible, especially,
93    taking into account fragmentation. TO be short, tt is not solution at all.
94 
95    Current solution: The solution was UNEXPECTEDLY SIMPLE.
96    We force DF flag on tunnels with preconfigured hop limit,
97    that is ALL. :-) Well, it does not remove the problem completely,
98    but exponential growth of network traffic is changed to linear
99    (branches, that exceed pmtu are pruned) and tunnel mtu
100    fastly degrades to value <68, where looping stops.
101    Yes, it is not good if there exists a router in the loop,
102    which does not force DF, even when encapsulating packets have DF set.
103    But it is not our problem! Nobody could accuse us, we made
104    all that we could make. Even if it is your gated who injected
105    fatal route to network, even if it were you who configured
106    fatal static route: you are innocent. :-)
107 
108 
109 
110    3. Really, ipv4/ipip.c, ipv4/ip_gre.c and ipv6/sit.c contain
111    practically identical code. It would be good to glue them
112    together, but it is not very evident, how to make them modular.
113    sit is integral part of IPv6, ipip and gre are naturally modular.
114    We could extract common parts (hash table, ioctl etc)
115    to a separate module (ip_tunnel.c).
116 
117    Alexey Kuznetsov.
118  */
119 
120 static int ipgre_tunnel_init(struct net_device *dev);
121 static void ipgre_tunnel_setup(struct net_device *dev);
122 
123 /* Fallback tunnel: no source, no destination, no key, no options */
124 
125 static int ipgre_fb_tunnel_init(struct net_device *dev);
126 
127 #define HASH_SIZE  16
128 
129 static int ipgre_net_id;
130 struct ipgre_net {
131 	struct ip_tunnel *tunnels[4][HASH_SIZE];
132 
133 	struct net_device *fb_tunnel_dev;
134 };
135 
136 /* Tunnel hash table */
137 
138 /*
139    4 hash tables:
140 
141    3: (remote,local)
142    2: (remote,*)
143    1: (*,local)
144    0: (*,*)
145 
146    We require exact key match i.e. if a key is present in packet
147    it will match only tunnel with the same key; if it is not present,
148    it will match only keyless tunnel.
149 
150    All keysless packets, if not matched configured keyless tunnels
151    will match fallback tunnel.
152  */
153 
154 #define HASH(addr) (((__force u32)addr^((__force u32)addr>>4))&0xF)
155 
156 #define tunnels_r_l	tunnels[3]
157 #define tunnels_r	tunnels[2]
158 #define tunnels_l	tunnels[1]
159 #define tunnels_wc	tunnels[0]
160 
161 static DEFINE_RWLOCK(ipgre_lock);
162 
163 /* Given src, dst and key, find appropriate for input tunnel. */
164 
165 static struct ip_tunnel * ipgre_tunnel_lookup(struct net *net,
166 		__be32 remote, __be32 local, __be32 key)
167 {
168 	unsigned h0 = HASH(remote);
169 	unsigned h1 = HASH(key);
170 	struct ip_tunnel *t;
171 	struct ipgre_net *ign = net_generic(net, ipgre_net_id);
172 
173 	for (t = ign->tunnels_r_l[h0^h1]; t; t = t->next) {
174 		if (local == t->parms.iph.saddr && remote == t->parms.iph.daddr) {
175 			if (t->parms.i_key == key && (t->dev->flags&IFF_UP))
176 				return t;
177 		}
178 	}
179 	for (t = ign->tunnels_r[h0^h1]; t; t = t->next) {
180 		if (remote == t->parms.iph.daddr) {
181 			if (t->parms.i_key == key && (t->dev->flags&IFF_UP))
182 				return t;
183 		}
184 	}
185 	for (t = ign->tunnels_l[h1]; t; t = t->next) {
186 		if (local == t->parms.iph.saddr ||
187 		     (local == t->parms.iph.daddr &&
188 		      ipv4_is_multicast(local))) {
189 			if (t->parms.i_key == key && (t->dev->flags&IFF_UP))
190 				return t;
191 		}
192 	}
193 	for (t = ign->tunnels_wc[h1]; t; t = t->next) {
194 		if (t->parms.i_key == key && (t->dev->flags&IFF_UP))
195 			return t;
196 	}
197 
198 	if (ign->fb_tunnel_dev->flags&IFF_UP)
199 		return netdev_priv(ign->fb_tunnel_dev);
200 	return NULL;
201 }
202 
203 static struct ip_tunnel **__ipgre_bucket(struct ipgre_net *ign,
204 		struct ip_tunnel_parm *parms)
205 {
206 	__be32 remote = parms->iph.daddr;
207 	__be32 local = parms->iph.saddr;
208 	__be32 key = parms->i_key;
209 	unsigned h = HASH(key);
210 	int prio = 0;
211 
212 	if (local)
213 		prio |= 1;
214 	if (remote && !ipv4_is_multicast(remote)) {
215 		prio |= 2;
216 		h ^= HASH(remote);
217 	}
218 
219 	return &ign->tunnels[prio][h];
220 }
221 
222 static inline struct ip_tunnel **ipgre_bucket(struct ipgre_net *ign,
223 		struct ip_tunnel *t)
224 {
225 	return __ipgre_bucket(ign, &t->parms);
226 }
227 
228 static void ipgre_tunnel_link(struct ipgre_net *ign, struct ip_tunnel *t)
229 {
230 	struct ip_tunnel **tp = ipgre_bucket(ign, t);
231 
232 	t->next = *tp;
233 	write_lock_bh(&ipgre_lock);
234 	*tp = t;
235 	write_unlock_bh(&ipgre_lock);
236 }
237 
238 static void ipgre_tunnel_unlink(struct ipgre_net *ign, struct ip_tunnel *t)
239 {
240 	struct ip_tunnel **tp;
241 
242 	for (tp = ipgre_bucket(ign, t); *tp; tp = &(*tp)->next) {
243 		if (t == *tp) {
244 			write_lock_bh(&ipgre_lock);
245 			*tp = t->next;
246 			write_unlock_bh(&ipgre_lock);
247 			break;
248 		}
249 	}
250 }
251 
252 static struct ip_tunnel * ipgre_tunnel_locate(struct net *net,
253 		struct ip_tunnel_parm *parms, int create)
254 {
255 	__be32 remote = parms->iph.daddr;
256 	__be32 local = parms->iph.saddr;
257 	__be32 key = parms->i_key;
258 	struct ip_tunnel *t, **tp, *nt;
259 	struct net_device *dev;
260 	char name[IFNAMSIZ];
261 	struct ipgre_net *ign = net_generic(net, ipgre_net_id);
262 
263 	for (tp = __ipgre_bucket(ign, parms); (t = *tp) != NULL; tp = &t->next) {
264 		if (local == t->parms.iph.saddr && remote == t->parms.iph.daddr) {
265 			if (key == t->parms.i_key)
266 				return t;
267 		}
268 	}
269 	if (!create)
270 		return NULL;
271 
272 	if (parms->name[0])
273 		strlcpy(name, parms->name, IFNAMSIZ);
274 	else
275 		sprintf(name, "gre%%d");
276 
277 	dev = alloc_netdev(sizeof(*t), name, ipgre_tunnel_setup);
278 	if (!dev)
279 	  return NULL;
280 
281 	dev_net_set(dev, net);
282 
283 	if (strchr(name, '%')) {
284 		if (dev_alloc_name(dev, name) < 0)
285 			goto failed_free;
286 	}
287 
288 	dev->init = ipgre_tunnel_init;
289 	nt = netdev_priv(dev);
290 	nt->parms = *parms;
291 
292 	if (register_netdevice(dev) < 0)
293 		goto failed_free;
294 
295 	dev_hold(dev);
296 	ipgre_tunnel_link(ign, nt);
297 	return nt;
298 
299 failed_free:
300 	free_netdev(dev);
301 	return NULL;
302 }
303 
304 static void ipgre_tunnel_uninit(struct net_device *dev)
305 {
306 	struct net *net = dev_net(dev);
307 	struct ipgre_net *ign = net_generic(net, ipgre_net_id);
308 
309 	ipgre_tunnel_unlink(ign, netdev_priv(dev));
310 	dev_put(dev);
311 }
312 
313 
314 static void ipgre_err(struct sk_buff *skb, u32 info)
315 {
316 #ifndef I_WISH_WORLD_WERE_PERFECT
317 
318 /* It is not :-( All the routers (except for Linux) return only
319    8 bytes of packet payload. It means, that precise relaying of
320    ICMP in the real Internet is absolutely infeasible.
321 
322    Moreover, Cisco "wise men" put GRE key to the third word
323    in GRE header. It makes impossible maintaining even soft state for keyed
324    GRE tunnels with enabled checksum. Tell them "thank you".
325 
326    Well, I wonder, rfc1812 was written by Cisco employee,
327    what the hell these idiots break standrads established
328    by themself???
329  */
330 
331 	struct iphdr *iph = (struct iphdr*)skb->data;
332 	__be16	     *p = (__be16*)(skb->data+(iph->ihl<<2));
333 	int grehlen = (iph->ihl<<2) + 4;
334 	const int type = icmp_hdr(skb)->type;
335 	const int code = icmp_hdr(skb)->code;
336 	struct ip_tunnel *t;
337 	__be16 flags;
338 
339 	flags = p[0];
340 	if (flags&(GRE_CSUM|GRE_KEY|GRE_SEQ|GRE_ROUTING|GRE_VERSION)) {
341 		if (flags&(GRE_VERSION|GRE_ROUTING))
342 			return;
343 		if (flags&GRE_KEY) {
344 			grehlen += 4;
345 			if (flags&GRE_CSUM)
346 				grehlen += 4;
347 		}
348 	}
349 
350 	/* If only 8 bytes returned, keyed message will be dropped here */
351 	if (skb_headlen(skb) < grehlen)
352 		return;
353 
354 	switch (type) {
355 	default:
356 	case ICMP_PARAMETERPROB:
357 		return;
358 
359 	case ICMP_DEST_UNREACH:
360 		switch (code) {
361 		case ICMP_SR_FAILED:
362 		case ICMP_PORT_UNREACH:
363 			/* Impossible event. */
364 			return;
365 		case ICMP_FRAG_NEEDED:
366 			/* Soft state for pmtu is maintained by IP core. */
367 			return;
368 		default:
369 			/* All others are translated to HOST_UNREACH.
370 			   rfc2003 contains "deep thoughts" about NET_UNREACH,
371 			   I believe they are just ether pollution. --ANK
372 			 */
373 			break;
374 		}
375 		break;
376 	case ICMP_TIME_EXCEEDED:
377 		if (code != ICMP_EXC_TTL)
378 			return;
379 		break;
380 	}
381 
382 	read_lock(&ipgre_lock);
383 	t = ipgre_tunnel_lookup(dev_net(skb->dev), iph->daddr, iph->saddr,
384 			(flags&GRE_KEY) ?
385 			*(((__be32*)p) + (grehlen>>2) - 1) : 0);
386 	if (t == NULL || t->parms.iph.daddr == 0 ||
387 	    ipv4_is_multicast(t->parms.iph.daddr))
388 		goto out;
389 
390 	if (t->parms.iph.ttl == 0 && type == ICMP_TIME_EXCEEDED)
391 		goto out;
392 
393 	if (jiffies - t->err_time < IPTUNNEL_ERR_TIMEO)
394 		t->err_count++;
395 	else
396 		t->err_count = 1;
397 	t->err_time = jiffies;
398 out:
399 	read_unlock(&ipgre_lock);
400 	return;
401 #else
402 	struct iphdr *iph = (struct iphdr*)dp;
403 	struct iphdr *eiph;
404 	__be16	     *p = (__be16*)(dp+(iph->ihl<<2));
405 	const int type = icmp_hdr(skb)->type;
406 	const int code = icmp_hdr(skb)->code;
407 	int rel_type = 0;
408 	int rel_code = 0;
409 	__be32 rel_info = 0;
410 	__u32 n = 0;
411 	__be16 flags;
412 	int grehlen = (iph->ihl<<2) + 4;
413 	struct sk_buff *skb2;
414 	struct flowi fl;
415 	struct rtable *rt;
416 
417 	if (p[1] != htons(ETH_P_IP))
418 		return;
419 
420 	flags = p[0];
421 	if (flags&(GRE_CSUM|GRE_KEY|GRE_SEQ|GRE_ROUTING|GRE_VERSION)) {
422 		if (flags&(GRE_VERSION|GRE_ROUTING))
423 			return;
424 		if (flags&GRE_CSUM)
425 			grehlen += 4;
426 		if (flags&GRE_KEY)
427 			grehlen += 4;
428 		if (flags&GRE_SEQ)
429 			grehlen += 4;
430 	}
431 	if (len < grehlen + sizeof(struct iphdr))
432 		return;
433 	eiph = (struct iphdr*)(dp + grehlen);
434 
435 	switch (type) {
436 	default:
437 		return;
438 	case ICMP_PARAMETERPROB:
439 		n = ntohl(icmp_hdr(skb)->un.gateway) >> 24;
440 		if (n < (iph->ihl<<2))
441 			return;
442 
443 		/* So... This guy found something strange INSIDE encapsulated
444 		   packet. Well, he is fool, but what can we do ?
445 		 */
446 		rel_type = ICMP_PARAMETERPROB;
447 		n -= grehlen;
448 		rel_info = htonl(n << 24);
449 		break;
450 
451 	case ICMP_DEST_UNREACH:
452 		switch (code) {
453 		case ICMP_SR_FAILED:
454 		case ICMP_PORT_UNREACH:
455 			/* Impossible event. */
456 			return;
457 		case ICMP_FRAG_NEEDED:
458 			/* And it is the only really necessary thing :-) */
459 			n = ntohs(icmp_hdr(skb)->un.frag.mtu);
460 			if (n < grehlen+68)
461 				return;
462 			n -= grehlen;
463 			/* BSD 4.2 MORE DOES NOT EXIST IN NATURE. */
464 			if (n > ntohs(eiph->tot_len))
465 				return;
466 			rel_info = htonl(n);
467 			break;
468 		default:
469 			/* All others are translated to HOST_UNREACH.
470 			   rfc2003 contains "deep thoughts" about NET_UNREACH,
471 			   I believe, it is just ether pollution. --ANK
472 			 */
473 			rel_type = ICMP_DEST_UNREACH;
474 			rel_code = ICMP_HOST_UNREACH;
475 			break;
476 		}
477 		break;
478 	case ICMP_TIME_EXCEEDED:
479 		if (code != ICMP_EXC_TTL)
480 			return;
481 		break;
482 	}
483 
484 	/* Prepare fake skb to feed it to icmp_send */
485 	skb2 = skb_clone(skb, GFP_ATOMIC);
486 	if (skb2 == NULL)
487 		return;
488 	dst_release(skb2->dst);
489 	skb2->dst = NULL;
490 	skb_pull(skb2, skb->data - (u8*)eiph);
491 	skb_reset_network_header(skb2);
492 
493 	/* Try to guess incoming interface */
494 	memset(&fl, 0, sizeof(fl));
495 	fl.fl4_dst = eiph->saddr;
496 	fl.fl4_tos = RT_TOS(eiph->tos);
497 	fl.proto = IPPROTO_GRE;
498 	if (ip_route_output_key(dev_net(skb->dev), &rt, &fl)) {
499 		kfree_skb(skb2);
500 		return;
501 	}
502 	skb2->dev = rt->u.dst.dev;
503 
504 	/* route "incoming" packet */
505 	if (rt->rt_flags&RTCF_LOCAL) {
506 		ip_rt_put(rt);
507 		rt = NULL;
508 		fl.fl4_dst = eiph->daddr;
509 		fl.fl4_src = eiph->saddr;
510 		fl.fl4_tos = eiph->tos;
511 		if (ip_route_output_key(dev_net(skb->dev), &rt, &fl) ||
512 		    rt->u.dst.dev->type != ARPHRD_IPGRE) {
513 			ip_rt_put(rt);
514 			kfree_skb(skb2);
515 			return;
516 		}
517 	} else {
518 		ip_rt_put(rt);
519 		if (ip_route_input(skb2, eiph->daddr, eiph->saddr, eiph->tos, skb2->dev) ||
520 		    skb2->dst->dev->type != ARPHRD_IPGRE) {
521 			kfree_skb(skb2);
522 			return;
523 		}
524 	}
525 
526 	/* change mtu on this route */
527 	if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED) {
528 		if (n > dst_mtu(skb2->dst)) {
529 			kfree_skb(skb2);
530 			return;
531 		}
532 		skb2->dst->ops->update_pmtu(skb2->dst, n);
533 	} else if (type == ICMP_TIME_EXCEEDED) {
534 		struct ip_tunnel *t = netdev_priv(skb2->dev);
535 		if (t->parms.iph.ttl) {
536 			rel_type = ICMP_DEST_UNREACH;
537 			rel_code = ICMP_HOST_UNREACH;
538 		}
539 	}
540 
541 	icmp_send(skb2, rel_type, rel_code, rel_info);
542 	kfree_skb(skb2);
543 #endif
544 }
545 
546 static inline void ipgre_ecn_decapsulate(struct iphdr *iph, struct sk_buff *skb)
547 {
548 	if (INET_ECN_is_ce(iph->tos)) {
549 		if (skb->protocol == htons(ETH_P_IP)) {
550 			IP_ECN_set_ce(ip_hdr(skb));
551 		} else if (skb->protocol == htons(ETH_P_IPV6)) {
552 			IP6_ECN_set_ce(ipv6_hdr(skb));
553 		}
554 	}
555 }
556 
557 static inline u8
558 ipgre_ecn_encapsulate(u8 tos, struct iphdr *old_iph, struct sk_buff *skb)
559 {
560 	u8 inner = 0;
561 	if (skb->protocol == htons(ETH_P_IP))
562 		inner = old_iph->tos;
563 	else if (skb->protocol == htons(ETH_P_IPV6))
564 		inner = ipv6_get_dsfield((struct ipv6hdr *)old_iph);
565 	return INET_ECN_encapsulate(tos, inner);
566 }
567 
568 static int ipgre_rcv(struct sk_buff *skb)
569 {
570 	struct iphdr *iph;
571 	u8     *h;
572 	__be16    flags;
573 	__sum16   csum = 0;
574 	__be32 key = 0;
575 	u32    seqno = 0;
576 	struct ip_tunnel *tunnel;
577 	int    offset = 4;
578 
579 	if (!pskb_may_pull(skb, 16))
580 		goto drop_nolock;
581 
582 	iph = ip_hdr(skb);
583 	h = skb->data;
584 	flags = *(__be16*)h;
585 
586 	if (flags&(GRE_CSUM|GRE_KEY|GRE_ROUTING|GRE_SEQ|GRE_VERSION)) {
587 		/* - Version must be 0.
588 		   - We do not support routing headers.
589 		 */
590 		if (flags&(GRE_VERSION|GRE_ROUTING))
591 			goto drop_nolock;
592 
593 		if (flags&GRE_CSUM) {
594 			switch (skb->ip_summed) {
595 			case CHECKSUM_COMPLETE:
596 				csum = csum_fold(skb->csum);
597 				if (!csum)
598 					break;
599 				/* fall through */
600 			case CHECKSUM_NONE:
601 				skb->csum = 0;
602 				csum = __skb_checksum_complete(skb);
603 				skb->ip_summed = CHECKSUM_COMPLETE;
604 			}
605 			offset += 4;
606 		}
607 		if (flags&GRE_KEY) {
608 			key = *(__be32*)(h + offset);
609 			offset += 4;
610 		}
611 		if (flags&GRE_SEQ) {
612 			seqno = ntohl(*(__be32*)(h + offset));
613 			offset += 4;
614 		}
615 	}
616 
617 	read_lock(&ipgre_lock);
618 	if ((tunnel = ipgre_tunnel_lookup(dev_net(skb->dev),
619 					iph->saddr, iph->daddr, key)) != NULL) {
620 		secpath_reset(skb);
621 
622 		skb->protocol = *(__be16*)(h + 2);
623 		/* WCCP version 1 and 2 protocol decoding.
624 		 * - Change protocol to IP
625 		 * - When dealing with WCCPv2, Skip extra 4 bytes in GRE header
626 		 */
627 		if (flags == 0 &&
628 		    skb->protocol == htons(ETH_P_WCCP)) {
629 			skb->protocol = htons(ETH_P_IP);
630 			if ((*(h + offset) & 0xF0) != 0x40)
631 				offset += 4;
632 		}
633 
634 		skb->mac_header = skb->network_header;
635 		__pskb_pull(skb, offset);
636 		skb_reset_network_header(skb);
637 		skb_postpull_rcsum(skb, skb_transport_header(skb), offset);
638 		skb->pkt_type = PACKET_HOST;
639 #ifdef CONFIG_NET_IPGRE_BROADCAST
640 		if (ipv4_is_multicast(iph->daddr)) {
641 			/* Looped back packet, drop it! */
642 			if (skb->rtable->fl.iif == 0)
643 				goto drop;
644 			tunnel->stat.multicast++;
645 			skb->pkt_type = PACKET_BROADCAST;
646 		}
647 #endif
648 
649 		if (((flags&GRE_CSUM) && csum) ||
650 		    (!(flags&GRE_CSUM) && tunnel->parms.i_flags&GRE_CSUM)) {
651 			tunnel->stat.rx_crc_errors++;
652 			tunnel->stat.rx_errors++;
653 			goto drop;
654 		}
655 		if (tunnel->parms.i_flags&GRE_SEQ) {
656 			if (!(flags&GRE_SEQ) ||
657 			    (tunnel->i_seqno && (s32)(seqno - tunnel->i_seqno) < 0)) {
658 				tunnel->stat.rx_fifo_errors++;
659 				tunnel->stat.rx_errors++;
660 				goto drop;
661 			}
662 			tunnel->i_seqno = seqno + 1;
663 		}
664 		tunnel->stat.rx_packets++;
665 		tunnel->stat.rx_bytes += skb->len;
666 		skb->dev = tunnel->dev;
667 		dst_release(skb->dst);
668 		skb->dst = NULL;
669 		nf_reset(skb);
670 		ipgre_ecn_decapsulate(iph, skb);
671 		netif_rx(skb);
672 		read_unlock(&ipgre_lock);
673 		return(0);
674 	}
675 	icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0);
676 
677 drop:
678 	read_unlock(&ipgre_lock);
679 drop_nolock:
680 	kfree_skb(skb);
681 	return(0);
682 }
683 
684 static int ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
685 {
686 	struct ip_tunnel *tunnel = netdev_priv(dev);
687 	struct net_device_stats *stats = &tunnel->stat;
688 	struct iphdr  *old_iph = ip_hdr(skb);
689 	struct iphdr  *tiph;
690 	u8     tos;
691 	__be16 df;
692 	struct rtable *rt;     			/* Route to the other host */
693 	struct net_device *tdev;			/* Device to other host */
694 	struct iphdr  *iph;			/* Our new IP header */
695 	unsigned int max_headroom;		/* The extra header space needed */
696 	int    gre_hlen;
697 	__be32 dst;
698 	int    mtu;
699 
700 	if (tunnel->recursion++) {
701 		tunnel->stat.collisions++;
702 		goto tx_error;
703 	}
704 
705 	if (dev->header_ops) {
706 		gre_hlen = 0;
707 		tiph = (struct iphdr*)skb->data;
708 	} else {
709 		gre_hlen = tunnel->hlen;
710 		tiph = &tunnel->parms.iph;
711 	}
712 
713 	if ((dst = tiph->daddr) == 0) {
714 		/* NBMA tunnel */
715 
716 		if (skb->dst == NULL) {
717 			tunnel->stat.tx_fifo_errors++;
718 			goto tx_error;
719 		}
720 
721 		if (skb->protocol == htons(ETH_P_IP)) {
722 			rt = skb->rtable;
723 			if ((dst = rt->rt_gateway) == 0)
724 				goto tx_error_icmp;
725 		}
726 #ifdef CONFIG_IPV6
727 		else if (skb->protocol == htons(ETH_P_IPV6)) {
728 			struct in6_addr *addr6;
729 			int addr_type;
730 			struct neighbour *neigh = skb->dst->neighbour;
731 
732 			if (neigh == NULL)
733 				goto tx_error;
734 
735 			addr6 = (struct in6_addr*)&neigh->primary_key;
736 			addr_type = ipv6_addr_type(addr6);
737 
738 			if (addr_type == IPV6_ADDR_ANY) {
739 				addr6 = &ipv6_hdr(skb)->daddr;
740 				addr_type = ipv6_addr_type(addr6);
741 			}
742 
743 			if ((addr_type & IPV6_ADDR_COMPATv4) == 0)
744 				goto tx_error_icmp;
745 
746 			dst = addr6->s6_addr32[3];
747 		}
748 #endif
749 		else
750 			goto tx_error;
751 	}
752 
753 	tos = tiph->tos;
754 	if (tos&1) {
755 		if (skb->protocol == htons(ETH_P_IP))
756 			tos = old_iph->tos;
757 		tos &= ~1;
758 	}
759 
760 	{
761 		struct flowi fl = { .oif = tunnel->parms.link,
762 				    .nl_u = { .ip4_u =
763 					      { .daddr = dst,
764 						.saddr = tiph->saddr,
765 						.tos = RT_TOS(tos) } },
766 				    .proto = IPPROTO_GRE };
767 		if (ip_route_output_key(dev_net(dev), &rt, &fl)) {
768 			tunnel->stat.tx_carrier_errors++;
769 			goto tx_error;
770 		}
771 	}
772 	tdev = rt->u.dst.dev;
773 
774 	if (tdev == dev) {
775 		ip_rt_put(rt);
776 		tunnel->stat.collisions++;
777 		goto tx_error;
778 	}
779 
780 	df = tiph->frag_off;
781 	if (df)
782 		mtu = dst_mtu(&rt->u.dst) - tunnel->hlen;
783 	else
784 		mtu = skb->dst ? dst_mtu(skb->dst) : dev->mtu;
785 
786 	if (skb->dst)
787 		skb->dst->ops->update_pmtu(skb->dst, mtu);
788 
789 	if (skb->protocol == htons(ETH_P_IP)) {
790 		df |= (old_iph->frag_off&htons(IP_DF));
791 
792 		if ((old_iph->frag_off&htons(IP_DF)) &&
793 		    mtu < ntohs(old_iph->tot_len)) {
794 			icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu));
795 			ip_rt_put(rt);
796 			goto tx_error;
797 		}
798 	}
799 #ifdef CONFIG_IPV6
800 	else if (skb->protocol == htons(ETH_P_IPV6)) {
801 		struct rt6_info *rt6 = (struct rt6_info*)skb->dst;
802 
803 		if (rt6 && mtu < dst_mtu(skb->dst) && mtu >= IPV6_MIN_MTU) {
804 			if ((tunnel->parms.iph.daddr &&
805 			     !ipv4_is_multicast(tunnel->parms.iph.daddr)) ||
806 			    rt6->rt6i_dst.plen == 128) {
807 				rt6->rt6i_flags |= RTF_MODIFIED;
808 				skb->dst->metrics[RTAX_MTU-1] = mtu;
809 			}
810 		}
811 
812 		if (mtu >= IPV6_MIN_MTU && mtu < skb->len - tunnel->hlen + gre_hlen) {
813 			icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, dev);
814 			ip_rt_put(rt);
815 			goto tx_error;
816 		}
817 	}
818 #endif
819 
820 	if (tunnel->err_count > 0) {
821 		if (jiffies - tunnel->err_time < IPTUNNEL_ERR_TIMEO) {
822 			tunnel->err_count--;
823 
824 			dst_link_failure(skb);
825 		} else
826 			tunnel->err_count = 0;
827 	}
828 
829 	max_headroom = LL_RESERVED_SPACE(tdev) + gre_hlen;
830 
831 	if (skb_headroom(skb) < max_headroom || skb_shared(skb)||
832 	    (skb_cloned(skb) && !skb_clone_writable(skb, 0))) {
833 		struct sk_buff *new_skb = skb_realloc_headroom(skb, max_headroom);
834 		if (!new_skb) {
835 			ip_rt_put(rt);
836 			stats->tx_dropped++;
837 			dev_kfree_skb(skb);
838 			tunnel->recursion--;
839 			return 0;
840 		}
841 		if (skb->sk)
842 			skb_set_owner_w(new_skb, skb->sk);
843 		dev_kfree_skb(skb);
844 		skb = new_skb;
845 		old_iph = ip_hdr(skb);
846 	}
847 
848 	skb->transport_header = skb->network_header;
849 	skb_push(skb, gre_hlen);
850 	skb_reset_network_header(skb);
851 	memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
852 	IPCB(skb)->flags &= ~(IPSKB_XFRM_TUNNEL_SIZE | IPSKB_XFRM_TRANSFORMED |
853 			      IPSKB_REROUTED);
854 	dst_release(skb->dst);
855 	skb->dst = &rt->u.dst;
856 
857 	/*
858 	 *	Push down and install the IPIP header.
859 	 */
860 
861 	iph 			=	ip_hdr(skb);
862 	iph->version		=	4;
863 	iph->ihl		=	sizeof(struct iphdr) >> 2;
864 	iph->frag_off		=	df;
865 	iph->protocol		=	IPPROTO_GRE;
866 	iph->tos		=	ipgre_ecn_encapsulate(tos, old_iph, skb);
867 	iph->daddr		=	rt->rt_dst;
868 	iph->saddr		=	rt->rt_src;
869 
870 	if ((iph->ttl = tiph->ttl) == 0) {
871 		if (skb->protocol == htons(ETH_P_IP))
872 			iph->ttl = old_iph->ttl;
873 #ifdef CONFIG_IPV6
874 		else if (skb->protocol == htons(ETH_P_IPV6))
875 			iph->ttl = ((struct ipv6hdr*)old_iph)->hop_limit;
876 #endif
877 		else
878 			iph->ttl = dst_metric(&rt->u.dst, RTAX_HOPLIMIT);
879 	}
880 
881 	((__be16*)(iph+1))[0] = tunnel->parms.o_flags;
882 	((__be16*)(iph+1))[1] = skb->protocol;
883 
884 	if (tunnel->parms.o_flags&(GRE_KEY|GRE_CSUM|GRE_SEQ)) {
885 		__be32 *ptr = (__be32*)(((u8*)iph) + tunnel->hlen - 4);
886 
887 		if (tunnel->parms.o_flags&GRE_SEQ) {
888 			++tunnel->o_seqno;
889 			*ptr = htonl(tunnel->o_seqno);
890 			ptr--;
891 		}
892 		if (tunnel->parms.o_flags&GRE_KEY) {
893 			*ptr = tunnel->parms.o_key;
894 			ptr--;
895 		}
896 		if (tunnel->parms.o_flags&GRE_CSUM) {
897 			*ptr = 0;
898 			*(__sum16*)ptr = ip_compute_csum((void*)(iph+1), skb->len - sizeof(struct iphdr));
899 		}
900 	}
901 
902 	nf_reset(skb);
903 
904 	IPTUNNEL_XMIT();
905 	tunnel->recursion--;
906 	return 0;
907 
908 tx_error_icmp:
909 	dst_link_failure(skb);
910 
911 tx_error:
912 	stats->tx_errors++;
913 	dev_kfree_skb(skb);
914 	tunnel->recursion--;
915 	return 0;
916 }
917 
918 static void ipgre_tunnel_bind_dev(struct net_device *dev)
919 {
920 	struct net_device *tdev = NULL;
921 	struct ip_tunnel *tunnel;
922 	struct iphdr *iph;
923 	int hlen = LL_MAX_HEADER;
924 	int mtu = ETH_DATA_LEN;
925 	int addend = sizeof(struct iphdr) + 4;
926 
927 	tunnel = netdev_priv(dev);
928 	iph = &tunnel->parms.iph;
929 
930 	/* Guess output device to choose reasonable mtu and hard_header_len */
931 
932 	if (iph->daddr) {
933 		struct flowi fl = { .oif = tunnel->parms.link,
934 				    .nl_u = { .ip4_u =
935 					      { .daddr = iph->daddr,
936 						.saddr = iph->saddr,
937 						.tos = RT_TOS(iph->tos) } },
938 				    .proto = IPPROTO_GRE };
939 		struct rtable *rt;
940 		if (!ip_route_output_key(dev_net(dev), &rt, &fl)) {
941 			tdev = rt->u.dst.dev;
942 			ip_rt_put(rt);
943 		}
944 		dev->flags |= IFF_POINTOPOINT;
945 	}
946 
947 	if (!tdev && tunnel->parms.link)
948 		tdev = __dev_get_by_index(dev_net(dev), tunnel->parms.link);
949 
950 	if (tdev) {
951 		hlen = tdev->hard_header_len;
952 		mtu = tdev->mtu;
953 	}
954 	dev->iflink = tunnel->parms.link;
955 
956 	/* Precalculate GRE options length */
957 	if (tunnel->parms.o_flags&(GRE_CSUM|GRE_KEY|GRE_SEQ)) {
958 		if (tunnel->parms.o_flags&GRE_CSUM)
959 			addend += 4;
960 		if (tunnel->parms.o_flags&GRE_KEY)
961 			addend += 4;
962 		if (tunnel->parms.o_flags&GRE_SEQ)
963 			addend += 4;
964 	}
965 	dev->hard_header_len = hlen + addend;
966 	dev->mtu = mtu - addend;
967 	tunnel->hlen = addend;
968 
969 }
970 
971 static int
972 ipgre_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd)
973 {
974 	int err = 0;
975 	struct ip_tunnel_parm p;
976 	struct ip_tunnel *t;
977 	struct net *net = dev_net(dev);
978 	struct ipgre_net *ign = net_generic(net, ipgre_net_id);
979 
980 	switch (cmd) {
981 	case SIOCGETTUNNEL:
982 		t = NULL;
983 		if (dev == ign->fb_tunnel_dev) {
984 			if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) {
985 				err = -EFAULT;
986 				break;
987 			}
988 			t = ipgre_tunnel_locate(net, &p, 0);
989 		}
990 		if (t == NULL)
991 			t = netdev_priv(dev);
992 		memcpy(&p, &t->parms, sizeof(p));
993 		if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p)))
994 			err = -EFAULT;
995 		break;
996 
997 	case SIOCADDTUNNEL:
998 	case SIOCCHGTUNNEL:
999 		err = -EPERM;
1000 		if (!capable(CAP_NET_ADMIN))
1001 			goto done;
1002 
1003 		err = -EFAULT;
1004 		if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
1005 			goto done;
1006 
1007 		err = -EINVAL;
1008 		if (p.iph.version != 4 || p.iph.protocol != IPPROTO_GRE ||
1009 		    p.iph.ihl != 5 || (p.iph.frag_off&htons(~IP_DF)) ||
1010 		    ((p.i_flags|p.o_flags)&(GRE_VERSION|GRE_ROUTING)))
1011 			goto done;
1012 		if (p.iph.ttl)
1013 			p.iph.frag_off |= htons(IP_DF);
1014 
1015 		if (!(p.i_flags&GRE_KEY))
1016 			p.i_key = 0;
1017 		if (!(p.o_flags&GRE_KEY))
1018 			p.o_key = 0;
1019 
1020 		t = ipgre_tunnel_locate(net, &p, cmd == SIOCADDTUNNEL);
1021 
1022 		if (dev != ign->fb_tunnel_dev && cmd == SIOCCHGTUNNEL) {
1023 			if (t != NULL) {
1024 				if (t->dev != dev) {
1025 					err = -EEXIST;
1026 					break;
1027 				}
1028 			} else {
1029 				unsigned nflags=0;
1030 
1031 				t = netdev_priv(dev);
1032 
1033 				if (ipv4_is_multicast(p.iph.daddr))
1034 					nflags = IFF_BROADCAST;
1035 				else if (p.iph.daddr)
1036 					nflags = IFF_POINTOPOINT;
1037 
1038 				if ((dev->flags^nflags)&(IFF_POINTOPOINT|IFF_BROADCAST)) {
1039 					err = -EINVAL;
1040 					break;
1041 				}
1042 				ipgre_tunnel_unlink(ign, t);
1043 				t->parms.iph.saddr = p.iph.saddr;
1044 				t->parms.iph.daddr = p.iph.daddr;
1045 				t->parms.i_key = p.i_key;
1046 				t->parms.o_key = p.o_key;
1047 				memcpy(dev->dev_addr, &p.iph.saddr, 4);
1048 				memcpy(dev->broadcast, &p.iph.daddr, 4);
1049 				ipgre_tunnel_link(ign, t);
1050 				netdev_state_change(dev);
1051 			}
1052 		}
1053 
1054 		if (t) {
1055 			err = 0;
1056 			if (cmd == SIOCCHGTUNNEL) {
1057 				t->parms.iph.ttl = p.iph.ttl;
1058 				t->parms.iph.tos = p.iph.tos;
1059 				t->parms.iph.frag_off = p.iph.frag_off;
1060 				if (t->parms.link != p.link) {
1061 					t->parms.link = p.link;
1062 					ipgre_tunnel_bind_dev(dev);
1063 					netdev_state_change(dev);
1064 				}
1065 			}
1066 			if (copy_to_user(ifr->ifr_ifru.ifru_data, &t->parms, sizeof(p)))
1067 				err = -EFAULT;
1068 		} else
1069 			err = (cmd == SIOCADDTUNNEL ? -ENOBUFS : -ENOENT);
1070 		break;
1071 
1072 	case SIOCDELTUNNEL:
1073 		err = -EPERM;
1074 		if (!capable(CAP_NET_ADMIN))
1075 			goto done;
1076 
1077 		if (dev == ign->fb_tunnel_dev) {
1078 			err = -EFAULT;
1079 			if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
1080 				goto done;
1081 			err = -ENOENT;
1082 			if ((t = ipgre_tunnel_locate(net, &p, 0)) == NULL)
1083 				goto done;
1084 			err = -EPERM;
1085 			if (t == netdev_priv(ign->fb_tunnel_dev))
1086 				goto done;
1087 			dev = t->dev;
1088 		}
1089 		unregister_netdevice(dev);
1090 		err = 0;
1091 		break;
1092 
1093 	default:
1094 		err = -EINVAL;
1095 	}
1096 
1097 done:
1098 	return err;
1099 }
1100 
1101 static struct net_device_stats *ipgre_tunnel_get_stats(struct net_device *dev)
1102 {
1103 	return &(((struct ip_tunnel*)netdev_priv(dev))->stat);
1104 }
1105 
1106 static int ipgre_tunnel_change_mtu(struct net_device *dev, int new_mtu)
1107 {
1108 	struct ip_tunnel *tunnel = netdev_priv(dev);
1109 	if (new_mtu < 68 || new_mtu > 0xFFF8 - tunnel->hlen)
1110 		return -EINVAL;
1111 	dev->mtu = new_mtu;
1112 	return 0;
1113 }
1114 
1115 /* Nice toy. Unfortunately, useless in real life :-)
1116    It allows to construct virtual multiprotocol broadcast "LAN"
1117    over the Internet, provided multicast routing is tuned.
1118 
1119 
1120    I have no idea was this bicycle invented before me,
1121    so that I had to set ARPHRD_IPGRE to a random value.
1122    I have an impression, that Cisco could make something similar,
1123    but this feature is apparently missing in IOS<=11.2(8).
1124 
1125    I set up 10.66.66/24 and fec0:6666:6666::0/96 as virtual networks
1126    with broadcast 224.66.66.66. If you have access to mbone, play with me :-)
1127 
1128    ping -t 255 224.66.66.66
1129 
1130    If nobody answers, mbone does not work.
1131 
1132    ip tunnel add Universe mode gre remote 224.66.66.66 local <Your_real_addr> ttl 255
1133    ip addr add 10.66.66.<somewhat>/24 dev Universe
1134    ifconfig Universe up
1135    ifconfig Universe add fe80::<Your_real_addr>/10
1136    ifconfig Universe add fec0:6666:6666::<Your_real_addr>/96
1137    ftp 10.66.66.66
1138    ...
1139    ftp fec0:6666:6666::193.233.7.65
1140    ...
1141 
1142  */
1143 
1144 static int ipgre_header(struct sk_buff *skb, struct net_device *dev,
1145 			unsigned short type,
1146 			const void *daddr, const void *saddr, unsigned len)
1147 {
1148 	struct ip_tunnel *t = netdev_priv(dev);
1149 	struct iphdr *iph = (struct iphdr *)skb_push(skb, t->hlen);
1150 	__be16 *p = (__be16*)(iph+1);
1151 
1152 	memcpy(iph, &t->parms.iph, sizeof(struct iphdr));
1153 	p[0]		= t->parms.o_flags;
1154 	p[1]		= htons(type);
1155 
1156 	/*
1157 	 *	Set the source hardware address.
1158 	 */
1159 
1160 	if (saddr)
1161 		memcpy(&iph->saddr, saddr, 4);
1162 
1163 	if (daddr) {
1164 		memcpy(&iph->daddr, daddr, 4);
1165 		return t->hlen;
1166 	}
1167 	if (iph->daddr && !ipv4_is_multicast(iph->daddr))
1168 		return t->hlen;
1169 
1170 	return -t->hlen;
1171 }
1172 
1173 static int ipgre_header_parse(const struct sk_buff *skb, unsigned char *haddr)
1174 {
1175 	struct iphdr *iph = (struct iphdr*) skb_mac_header(skb);
1176 	memcpy(haddr, &iph->saddr, 4);
1177 	return 4;
1178 }
1179 
1180 static const struct header_ops ipgre_header_ops = {
1181 	.create	= ipgre_header,
1182 	.parse	= ipgre_header_parse,
1183 };
1184 
1185 #ifdef CONFIG_NET_IPGRE_BROADCAST
1186 static int ipgre_open(struct net_device *dev)
1187 {
1188 	struct ip_tunnel *t = netdev_priv(dev);
1189 
1190 	if (ipv4_is_multicast(t->parms.iph.daddr)) {
1191 		struct flowi fl = { .oif = t->parms.link,
1192 				    .nl_u = { .ip4_u =
1193 					      { .daddr = t->parms.iph.daddr,
1194 						.saddr = t->parms.iph.saddr,
1195 						.tos = RT_TOS(t->parms.iph.tos) } },
1196 				    .proto = IPPROTO_GRE };
1197 		struct rtable *rt;
1198 		if (ip_route_output_key(dev_net(dev), &rt, &fl))
1199 			return -EADDRNOTAVAIL;
1200 		dev = rt->u.dst.dev;
1201 		ip_rt_put(rt);
1202 		if (__in_dev_get_rtnl(dev) == NULL)
1203 			return -EADDRNOTAVAIL;
1204 		t->mlink = dev->ifindex;
1205 		ip_mc_inc_group(__in_dev_get_rtnl(dev), t->parms.iph.daddr);
1206 	}
1207 	return 0;
1208 }
1209 
1210 static int ipgre_close(struct net_device *dev)
1211 {
1212 	struct ip_tunnel *t = netdev_priv(dev);
1213 	if (ipv4_is_multicast(t->parms.iph.daddr) && t->mlink) {
1214 		struct in_device *in_dev;
1215 		in_dev = inetdev_by_index(dev_net(dev), t->mlink);
1216 		if (in_dev) {
1217 			ip_mc_dec_group(in_dev, t->parms.iph.daddr);
1218 			in_dev_put(in_dev);
1219 		}
1220 	}
1221 	return 0;
1222 }
1223 
1224 #endif
1225 
1226 static void ipgre_tunnel_setup(struct net_device *dev)
1227 {
1228 	dev->uninit		= ipgre_tunnel_uninit;
1229 	dev->destructor 	= free_netdev;
1230 	dev->hard_start_xmit	= ipgre_tunnel_xmit;
1231 	dev->get_stats		= ipgre_tunnel_get_stats;
1232 	dev->do_ioctl		= ipgre_tunnel_ioctl;
1233 	dev->change_mtu		= ipgre_tunnel_change_mtu;
1234 
1235 	dev->type		= ARPHRD_IPGRE;
1236 	dev->hard_header_len 	= LL_MAX_HEADER + sizeof(struct iphdr) + 4;
1237 	dev->mtu		= ETH_DATA_LEN - sizeof(struct iphdr) - 4;
1238 	dev->flags		= IFF_NOARP;
1239 	dev->iflink		= 0;
1240 	dev->addr_len		= 4;
1241 	dev->features		|= NETIF_F_NETNS_LOCAL;
1242 }
1243 
1244 static int ipgre_tunnel_init(struct net_device *dev)
1245 {
1246 	struct ip_tunnel *tunnel;
1247 	struct iphdr *iph;
1248 
1249 	tunnel = netdev_priv(dev);
1250 	iph = &tunnel->parms.iph;
1251 
1252 	tunnel->dev = dev;
1253 	strcpy(tunnel->parms.name, dev->name);
1254 
1255 	memcpy(dev->dev_addr, &tunnel->parms.iph.saddr, 4);
1256 	memcpy(dev->broadcast, &tunnel->parms.iph.daddr, 4);
1257 
1258 	ipgre_tunnel_bind_dev(dev);
1259 
1260 	if (iph->daddr) {
1261 #ifdef CONFIG_NET_IPGRE_BROADCAST
1262 		if (ipv4_is_multicast(iph->daddr)) {
1263 			if (!iph->saddr)
1264 				return -EINVAL;
1265 			dev->flags = IFF_BROADCAST;
1266 			dev->header_ops = &ipgre_header_ops;
1267 			dev->open = ipgre_open;
1268 			dev->stop = ipgre_close;
1269 		}
1270 #endif
1271 	} else
1272 		dev->header_ops = &ipgre_header_ops;
1273 
1274 	return 0;
1275 }
1276 
1277 static int ipgre_fb_tunnel_init(struct net_device *dev)
1278 {
1279 	struct ip_tunnel *tunnel = netdev_priv(dev);
1280 	struct iphdr *iph = &tunnel->parms.iph;
1281 	struct ipgre_net *ign = net_generic(dev_net(dev), ipgre_net_id);
1282 
1283 	tunnel->dev = dev;
1284 	strcpy(tunnel->parms.name, dev->name);
1285 
1286 	iph->version		= 4;
1287 	iph->protocol		= IPPROTO_GRE;
1288 	iph->ihl		= 5;
1289 	tunnel->hlen		= sizeof(struct iphdr) + 4;
1290 
1291 	dev_hold(dev);
1292 	ign->tunnels_wc[0]	= tunnel;
1293 	return 0;
1294 }
1295 
1296 
1297 static struct net_protocol ipgre_protocol = {
1298 	.handler	=	ipgre_rcv,
1299 	.err_handler	=	ipgre_err,
1300 	.netns_ok	=	1,
1301 };
1302 
1303 static void ipgre_destroy_tunnels(struct ipgre_net *ign)
1304 {
1305 	int prio;
1306 
1307 	for (prio = 0; prio < 4; prio++) {
1308 		int h;
1309 		for (h = 0; h < HASH_SIZE; h++) {
1310 			struct ip_tunnel *t;
1311 			while ((t = ign->tunnels[prio][h]) != NULL)
1312 				unregister_netdevice(t->dev);
1313 		}
1314 	}
1315 }
1316 
1317 static int ipgre_init_net(struct net *net)
1318 {
1319 	int err;
1320 	struct ipgre_net *ign;
1321 
1322 	err = -ENOMEM;
1323 	ign = kzalloc(sizeof(struct ipgre_net), GFP_KERNEL);
1324 	if (ign == NULL)
1325 		goto err_alloc;
1326 
1327 	err = net_assign_generic(net, ipgre_net_id, ign);
1328 	if (err < 0)
1329 		goto err_assign;
1330 
1331 	ign->fb_tunnel_dev = alloc_netdev(sizeof(struct ip_tunnel), "gre0",
1332 					   ipgre_tunnel_setup);
1333 	if (!ign->fb_tunnel_dev) {
1334 		err = -ENOMEM;
1335 		goto err_alloc_dev;
1336 	}
1337 
1338 	ign->fb_tunnel_dev->init = ipgre_fb_tunnel_init;
1339 	dev_net_set(ign->fb_tunnel_dev, net);
1340 
1341 	if ((err = register_netdev(ign->fb_tunnel_dev)))
1342 		goto err_reg_dev;
1343 
1344 	return 0;
1345 
1346 err_reg_dev:
1347 	free_netdev(ign->fb_tunnel_dev);
1348 err_alloc_dev:
1349 	/* nothing */
1350 err_assign:
1351 	kfree(ign);
1352 err_alloc:
1353 	return err;
1354 }
1355 
1356 static void ipgre_exit_net(struct net *net)
1357 {
1358 	struct ipgre_net *ign;
1359 
1360 	ign = net_generic(net, ipgre_net_id);
1361 	rtnl_lock();
1362 	ipgre_destroy_tunnels(ign);
1363 	rtnl_unlock();
1364 	kfree(ign);
1365 }
1366 
1367 static struct pernet_operations ipgre_net_ops = {
1368 	.init = ipgre_init_net,
1369 	.exit = ipgre_exit_net,
1370 };
1371 
1372 /*
1373  *	And now the modules code and kernel interface.
1374  */
1375 
1376 static int __init ipgre_init(void)
1377 {
1378 	int err;
1379 
1380 	printk(KERN_INFO "GRE over IPv4 tunneling driver\n");
1381 
1382 	if (inet_add_protocol(&ipgre_protocol, IPPROTO_GRE) < 0) {
1383 		printk(KERN_INFO "ipgre init: can't add protocol\n");
1384 		return -EAGAIN;
1385 	}
1386 
1387 	err = register_pernet_gen_device(&ipgre_net_id, &ipgre_net_ops);
1388 	if (err < 0)
1389 		inet_del_protocol(&ipgre_protocol, IPPROTO_GRE);
1390 
1391 	return err;
1392 }
1393 
1394 static void __exit ipgre_fini(void)
1395 {
1396 	if (inet_del_protocol(&ipgre_protocol, IPPROTO_GRE) < 0)
1397 		printk(KERN_INFO "ipgre close: can't remove protocol\n");
1398 
1399 	unregister_pernet_gen_device(ipgre_net_id, &ipgre_net_ops);
1400 }
1401 
1402 module_init(ipgre_init);
1403 module_exit(ipgre_fini);
1404 MODULE_LICENSE("GPL");
1405