xref: /openbmc/linux/net/ipv4/ip_gre.c (revision 643d1f7f)
1 /*
2  *	Linux NET3:	GRE over IP protocol decoder.
3  *
4  *	Authors: Alexey Kuznetsov (kuznet@ms2.inr.ac.ru)
5  *
6  *	This program is free software; you can redistribute it and/or
7  *	modify it under the terms of the GNU General Public License
8  *	as published by the Free Software Foundation; either version
9  *	2 of the License, or (at your option) any later version.
10  *
11  */
12 
13 #include <linux/capability.h>
14 #include <linux/module.h>
15 #include <linux/types.h>
16 #include <linux/kernel.h>
17 #include <asm/uaccess.h>
18 #include <linux/skbuff.h>
19 #include <linux/netdevice.h>
20 #include <linux/in.h>
21 #include <linux/tcp.h>
22 #include <linux/udp.h>
23 #include <linux/if_arp.h>
24 #include <linux/mroute.h>
25 #include <linux/init.h>
26 #include <linux/in6.h>
27 #include <linux/inetdevice.h>
28 #include <linux/igmp.h>
29 #include <linux/netfilter_ipv4.h>
30 #include <linux/if_ether.h>
31 
32 #include <net/sock.h>
33 #include <net/ip.h>
34 #include <net/icmp.h>
35 #include <net/protocol.h>
36 #include <net/ipip.h>
37 #include <net/arp.h>
38 #include <net/checksum.h>
39 #include <net/dsfield.h>
40 #include <net/inet_ecn.h>
41 #include <net/xfrm.h>
42 
43 #ifdef CONFIG_IPV6
44 #include <net/ipv6.h>
45 #include <net/ip6_fib.h>
46 #include <net/ip6_route.h>
47 #endif
48 
49 /*
50    Problems & solutions
51    --------------------
52 
53    1. The most important issue is detecting local dead loops.
54    They would cause complete host lockup in transmit, which
55    would be "resolved" by stack overflow or, if queueing is enabled,
56    with infinite looping in net_bh.
57 
58    We cannot track such dead loops during route installation,
59    it is infeasible task. The most general solutions would be
60    to keep skb->encapsulation counter (sort of local ttl),
61    and silently drop packet when it expires. It is the best
62    solution, but it supposes maintaing new variable in ALL
63    skb, even if no tunneling is used.
64 
65    Current solution: t->recursion lock breaks dead loops. It looks
66    like dev->tbusy flag, but I preferred new variable, because
67    the semantics is different. One day, when hard_start_xmit
68    will be multithreaded we will have to use skb->encapsulation.
69 
70 
71 
72    2. Networking dead loops would not kill routers, but would really
73    kill network. IP hop limit plays role of "t->recursion" in this case,
74    if we copy it from packet being encapsulated to upper header.
75    It is very good solution, but it introduces two problems:
76 
77    - Routing protocols, using packets with ttl=1 (OSPF, RIP2),
78      do not work over tunnels.
79    - traceroute does not work. I planned to relay ICMP from tunnel,
80      so that this problem would be solved and traceroute output
81      would even more informative. This idea appeared to be wrong:
82      only Linux complies to rfc1812 now (yes, guys, Linux is the only
83      true router now :-)), all routers (at least, in neighbourhood of mine)
84      return only 8 bytes of payload. It is the end.
85 
86    Hence, if we want that OSPF worked or traceroute said something reasonable,
87    we should search for another solution.
88 
89    One of them is to parse packet trying to detect inner encapsulation
90    made by our node. It is difficult or even impossible, especially,
91    taking into account fragmentation. TO be short, tt is not solution at all.
92 
93    Current solution: The solution was UNEXPECTEDLY SIMPLE.
94    We force DF flag on tunnels with preconfigured hop limit,
95    that is ALL. :-) Well, it does not remove the problem completely,
96    but exponential growth of network traffic is changed to linear
97    (branches, that exceed pmtu are pruned) and tunnel mtu
98    fastly degrades to value <68, where looping stops.
99    Yes, it is not good if there exists a router in the loop,
100    which does not force DF, even when encapsulating packets have DF set.
101    But it is not our problem! Nobody could accuse us, we made
102    all that we could make. Even if it is your gated who injected
103    fatal route to network, even if it were you who configured
104    fatal static route: you are innocent. :-)
105 
106 
107 
108    3. Really, ipv4/ipip.c, ipv4/ip_gre.c and ipv6/sit.c contain
109    practically identical code. It would be good to glue them
110    together, but it is not very evident, how to make them modular.
111    sit is integral part of IPv6, ipip and gre are naturally modular.
112    We could extract common parts (hash table, ioctl etc)
113    to a separate module (ip_tunnel.c).
114 
115    Alexey Kuznetsov.
116  */
117 
118 static int ipgre_tunnel_init(struct net_device *dev);
119 static void ipgre_tunnel_setup(struct net_device *dev);
120 
121 /* Fallback tunnel: no source, no destination, no key, no options */
122 
123 static int ipgre_fb_tunnel_init(struct net_device *dev);
124 
125 static struct net_device *ipgre_fb_tunnel_dev;
126 
127 /* Tunnel hash table */
128 
129 /*
130    4 hash tables:
131 
132    3: (remote,local)
133    2: (remote,*)
134    1: (*,local)
135    0: (*,*)
136 
137    We require exact key match i.e. if a key is present in packet
138    it will match only tunnel with the same key; if it is not present,
139    it will match only keyless tunnel.
140 
141    All keysless packets, if not matched configured keyless tunnels
142    will match fallback tunnel.
143  */
144 
145 #define HASH_SIZE  16
146 #define HASH(addr) (((__force u32)addr^((__force u32)addr>>4))&0xF)
147 
148 static struct ip_tunnel *tunnels[4][HASH_SIZE];
149 
150 #define tunnels_r_l	(tunnels[3])
151 #define tunnels_r	(tunnels[2])
152 #define tunnels_l	(tunnels[1])
153 #define tunnels_wc	(tunnels[0])
154 
155 static DEFINE_RWLOCK(ipgre_lock);
156 
157 /* Given src, dst and key, find appropriate for input tunnel. */
158 
159 static struct ip_tunnel * ipgre_tunnel_lookup(__be32 remote, __be32 local, __be32 key)
160 {
161 	unsigned h0 = HASH(remote);
162 	unsigned h1 = HASH(key);
163 	struct ip_tunnel *t;
164 
165 	for (t = tunnels_r_l[h0^h1]; t; t = t->next) {
166 		if (local == t->parms.iph.saddr && remote == t->parms.iph.daddr) {
167 			if (t->parms.i_key == key && (t->dev->flags&IFF_UP))
168 				return t;
169 		}
170 	}
171 	for (t = tunnels_r[h0^h1]; t; t = t->next) {
172 		if (remote == t->parms.iph.daddr) {
173 			if (t->parms.i_key == key && (t->dev->flags&IFF_UP))
174 				return t;
175 		}
176 	}
177 	for (t = tunnels_l[h1]; t; t = t->next) {
178 		if (local == t->parms.iph.saddr ||
179 		     (local == t->parms.iph.daddr &&
180 		      ipv4_is_multicast(local))) {
181 			if (t->parms.i_key == key && (t->dev->flags&IFF_UP))
182 				return t;
183 		}
184 	}
185 	for (t = tunnels_wc[h1]; t; t = t->next) {
186 		if (t->parms.i_key == key && (t->dev->flags&IFF_UP))
187 			return t;
188 	}
189 
190 	if (ipgre_fb_tunnel_dev->flags&IFF_UP)
191 		return netdev_priv(ipgre_fb_tunnel_dev);
192 	return NULL;
193 }
194 
195 static struct ip_tunnel **__ipgre_bucket(struct ip_tunnel_parm *parms)
196 {
197 	__be32 remote = parms->iph.daddr;
198 	__be32 local = parms->iph.saddr;
199 	__be32 key = parms->i_key;
200 	unsigned h = HASH(key);
201 	int prio = 0;
202 
203 	if (local)
204 		prio |= 1;
205 	if (remote && !ipv4_is_multicast(remote)) {
206 		prio |= 2;
207 		h ^= HASH(remote);
208 	}
209 
210 	return &tunnels[prio][h];
211 }
212 
213 static inline struct ip_tunnel **ipgre_bucket(struct ip_tunnel *t)
214 {
215 	return __ipgre_bucket(&t->parms);
216 }
217 
218 static void ipgre_tunnel_link(struct ip_tunnel *t)
219 {
220 	struct ip_tunnel **tp = ipgre_bucket(t);
221 
222 	t->next = *tp;
223 	write_lock_bh(&ipgre_lock);
224 	*tp = t;
225 	write_unlock_bh(&ipgre_lock);
226 }
227 
228 static void ipgre_tunnel_unlink(struct ip_tunnel *t)
229 {
230 	struct ip_tunnel **tp;
231 
232 	for (tp = ipgre_bucket(t); *tp; tp = &(*tp)->next) {
233 		if (t == *tp) {
234 			write_lock_bh(&ipgre_lock);
235 			*tp = t->next;
236 			write_unlock_bh(&ipgre_lock);
237 			break;
238 		}
239 	}
240 }
241 
242 static struct ip_tunnel * ipgre_tunnel_locate(struct ip_tunnel_parm *parms, int create)
243 {
244 	__be32 remote = parms->iph.daddr;
245 	__be32 local = parms->iph.saddr;
246 	__be32 key = parms->i_key;
247 	struct ip_tunnel *t, **tp, *nt;
248 	struct net_device *dev;
249 	char name[IFNAMSIZ];
250 
251 	for (tp = __ipgre_bucket(parms); (t = *tp) != NULL; tp = &t->next) {
252 		if (local == t->parms.iph.saddr && remote == t->parms.iph.daddr) {
253 			if (key == t->parms.i_key)
254 				return t;
255 		}
256 	}
257 	if (!create)
258 		return NULL;
259 
260 	if (parms->name[0])
261 		strlcpy(name, parms->name, IFNAMSIZ);
262 	else {
263 		int i;
264 		for (i=1; i<100; i++) {
265 			sprintf(name, "gre%d", i);
266 			if (__dev_get_by_name(&init_net, name) == NULL)
267 				break;
268 		}
269 		if (i==100)
270 			goto failed;
271 	}
272 
273 	dev = alloc_netdev(sizeof(*t), name, ipgre_tunnel_setup);
274 	if (!dev)
275 	  return NULL;
276 
277 	dev->init = ipgre_tunnel_init;
278 	nt = netdev_priv(dev);
279 	nt->parms = *parms;
280 
281 	if (register_netdevice(dev) < 0) {
282 		free_netdev(dev);
283 		goto failed;
284 	}
285 
286 	dev_hold(dev);
287 	ipgre_tunnel_link(nt);
288 	return nt;
289 
290 failed:
291 	return NULL;
292 }
293 
294 static void ipgre_tunnel_uninit(struct net_device *dev)
295 {
296 	ipgre_tunnel_unlink(netdev_priv(dev));
297 	dev_put(dev);
298 }
299 
300 
301 static void ipgre_err(struct sk_buff *skb, u32 info)
302 {
303 #ifndef I_WISH_WORLD_WERE_PERFECT
304 
305 /* It is not :-( All the routers (except for Linux) return only
306    8 bytes of packet payload. It means, that precise relaying of
307    ICMP in the real Internet is absolutely infeasible.
308 
309    Moreover, Cisco "wise men" put GRE key to the third word
310    in GRE header. It makes impossible maintaining even soft state for keyed
311    GRE tunnels with enabled checksum. Tell them "thank you".
312 
313    Well, I wonder, rfc1812 was written by Cisco employee,
314    what the hell these idiots break standrads established
315    by themself???
316  */
317 
318 	struct iphdr *iph = (struct iphdr*)skb->data;
319 	__be16	     *p = (__be16*)(skb->data+(iph->ihl<<2));
320 	int grehlen = (iph->ihl<<2) + 4;
321 	const int type = icmp_hdr(skb)->type;
322 	const int code = icmp_hdr(skb)->code;
323 	struct ip_tunnel *t;
324 	__be16 flags;
325 
326 	flags = p[0];
327 	if (flags&(GRE_CSUM|GRE_KEY|GRE_SEQ|GRE_ROUTING|GRE_VERSION)) {
328 		if (flags&(GRE_VERSION|GRE_ROUTING))
329 			return;
330 		if (flags&GRE_KEY) {
331 			grehlen += 4;
332 			if (flags&GRE_CSUM)
333 				grehlen += 4;
334 		}
335 	}
336 
337 	/* If only 8 bytes returned, keyed message will be dropped here */
338 	if (skb_headlen(skb) < grehlen)
339 		return;
340 
341 	switch (type) {
342 	default:
343 	case ICMP_PARAMETERPROB:
344 		return;
345 
346 	case ICMP_DEST_UNREACH:
347 		switch (code) {
348 		case ICMP_SR_FAILED:
349 		case ICMP_PORT_UNREACH:
350 			/* Impossible event. */
351 			return;
352 		case ICMP_FRAG_NEEDED:
353 			/* Soft state for pmtu is maintained by IP core. */
354 			return;
355 		default:
356 			/* All others are translated to HOST_UNREACH.
357 			   rfc2003 contains "deep thoughts" about NET_UNREACH,
358 			   I believe they are just ether pollution. --ANK
359 			 */
360 			break;
361 		}
362 		break;
363 	case ICMP_TIME_EXCEEDED:
364 		if (code != ICMP_EXC_TTL)
365 			return;
366 		break;
367 	}
368 
369 	read_lock(&ipgre_lock);
370 	t = ipgre_tunnel_lookup(iph->daddr, iph->saddr, (flags&GRE_KEY) ? *(((__be32*)p) + (grehlen>>2) - 1) : 0);
371 	if (t == NULL || t->parms.iph.daddr == 0 ||
372 	    ipv4_is_multicast(t->parms.iph.daddr))
373 		goto out;
374 
375 	if (t->parms.iph.ttl == 0 && type == ICMP_TIME_EXCEEDED)
376 		goto out;
377 
378 	if (jiffies - t->err_time < IPTUNNEL_ERR_TIMEO)
379 		t->err_count++;
380 	else
381 		t->err_count = 1;
382 	t->err_time = jiffies;
383 out:
384 	read_unlock(&ipgre_lock);
385 	return;
386 #else
387 	struct iphdr *iph = (struct iphdr*)dp;
388 	struct iphdr *eiph;
389 	__be16	     *p = (__be16*)(dp+(iph->ihl<<2));
390 	const int type = icmp_hdr(skb)->type;
391 	const int code = icmp_hdr(skb)->code;
392 	int rel_type = 0;
393 	int rel_code = 0;
394 	__be32 rel_info = 0;
395 	__u32 n = 0;
396 	__be16 flags;
397 	int grehlen = (iph->ihl<<2) + 4;
398 	struct sk_buff *skb2;
399 	struct flowi fl;
400 	struct rtable *rt;
401 
402 	if (p[1] != htons(ETH_P_IP))
403 		return;
404 
405 	flags = p[0];
406 	if (flags&(GRE_CSUM|GRE_KEY|GRE_SEQ|GRE_ROUTING|GRE_VERSION)) {
407 		if (flags&(GRE_VERSION|GRE_ROUTING))
408 			return;
409 		if (flags&GRE_CSUM)
410 			grehlen += 4;
411 		if (flags&GRE_KEY)
412 			grehlen += 4;
413 		if (flags&GRE_SEQ)
414 			grehlen += 4;
415 	}
416 	if (len < grehlen + sizeof(struct iphdr))
417 		return;
418 	eiph = (struct iphdr*)(dp + grehlen);
419 
420 	switch (type) {
421 	default:
422 		return;
423 	case ICMP_PARAMETERPROB:
424 		n = ntohl(icmp_hdr(skb)->un.gateway) >> 24;
425 		if (n < (iph->ihl<<2))
426 			return;
427 
428 		/* So... This guy found something strange INSIDE encapsulated
429 		   packet. Well, he is fool, but what can we do ?
430 		 */
431 		rel_type = ICMP_PARAMETERPROB;
432 		n -= grehlen;
433 		rel_info = htonl(n << 24);
434 		break;
435 
436 	case ICMP_DEST_UNREACH:
437 		switch (code) {
438 		case ICMP_SR_FAILED:
439 		case ICMP_PORT_UNREACH:
440 			/* Impossible event. */
441 			return;
442 		case ICMP_FRAG_NEEDED:
443 			/* And it is the only really necessary thing :-) */
444 			n = ntohs(icmp_hdr(skb)->un.frag.mtu);
445 			if (n < grehlen+68)
446 				return;
447 			n -= grehlen;
448 			/* BSD 4.2 MORE DOES NOT EXIST IN NATURE. */
449 			if (n > ntohs(eiph->tot_len))
450 				return;
451 			rel_info = htonl(n);
452 			break;
453 		default:
454 			/* All others are translated to HOST_UNREACH.
455 			   rfc2003 contains "deep thoughts" about NET_UNREACH,
456 			   I believe, it is just ether pollution. --ANK
457 			 */
458 			rel_type = ICMP_DEST_UNREACH;
459 			rel_code = ICMP_HOST_UNREACH;
460 			break;
461 		}
462 		break;
463 	case ICMP_TIME_EXCEEDED:
464 		if (code != ICMP_EXC_TTL)
465 			return;
466 		break;
467 	}
468 
469 	/* Prepare fake skb to feed it to icmp_send */
470 	skb2 = skb_clone(skb, GFP_ATOMIC);
471 	if (skb2 == NULL)
472 		return;
473 	dst_release(skb2->dst);
474 	skb2->dst = NULL;
475 	skb_pull(skb2, skb->data - (u8*)eiph);
476 	skb_reset_network_header(skb2);
477 
478 	/* Try to guess incoming interface */
479 	memset(&fl, 0, sizeof(fl));
480 	fl.fl4_dst = eiph->saddr;
481 	fl.fl4_tos = RT_TOS(eiph->tos);
482 	fl.proto = IPPROTO_GRE;
483 	if (ip_route_output_key(&init_net, &rt, &fl)) {
484 		kfree_skb(skb2);
485 		return;
486 	}
487 	skb2->dev = rt->u.dst.dev;
488 
489 	/* route "incoming" packet */
490 	if (rt->rt_flags&RTCF_LOCAL) {
491 		ip_rt_put(rt);
492 		rt = NULL;
493 		fl.fl4_dst = eiph->daddr;
494 		fl.fl4_src = eiph->saddr;
495 		fl.fl4_tos = eiph->tos;
496 		if (ip_route_output_key(&init_net, &rt, &fl) ||
497 		    rt->u.dst.dev->type != ARPHRD_IPGRE) {
498 			ip_rt_put(rt);
499 			kfree_skb(skb2);
500 			return;
501 		}
502 	} else {
503 		ip_rt_put(rt);
504 		if (ip_route_input(skb2, eiph->daddr, eiph->saddr, eiph->tos, skb2->dev) ||
505 		    skb2->dst->dev->type != ARPHRD_IPGRE) {
506 			kfree_skb(skb2);
507 			return;
508 		}
509 	}
510 
511 	/* change mtu on this route */
512 	if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED) {
513 		if (n > dst_mtu(skb2->dst)) {
514 			kfree_skb(skb2);
515 			return;
516 		}
517 		skb2->dst->ops->update_pmtu(skb2->dst, n);
518 	} else if (type == ICMP_TIME_EXCEEDED) {
519 		struct ip_tunnel *t = netdev_priv(skb2->dev);
520 		if (t->parms.iph.ttl) {
521 			rel_type = ICMP_DEST_UNREACH;
522 			rel_code = ICMP_HOST_UNREACH;
523 		}
524 	}
525 
526 	icmp_send(skb2, rel_type, rel_code, rel_info);
527 	kfree_skb(skb2);
528 #endif
529 }
530 
531 static inline void ipgre_ecn_decapsulate(struct iphdr *iph, struct sk_buff *skb)
532 {
533 	if (INET_ECN_is_ce(iph->tos)) {
534 		if (skb->protocol == htons(ETH_P_IP)) {
535 			IP_ECN_set_ce(ip_hdr(skb));
536 		} else if (skb->protocol == htons(ETH_P_IPV6)) {
537 			IP6_ECN_set_ce(ipv6_hdr(skb));
538 		}
539 	}
540 }
541 
542 static inline u8
543 ipgre_ecn_encapsulate(u8 tos, struct iphdr *old_iph, struct sk_buff *skb)
544 {
545 	u8 inner = 0;
546 	if (skb->protocol == htons(ETH_P_IP))
547 		inner = old_iph->tos;
548 	else if (skb->protocol == htons(ETH_P_IPV6))
549 		inner = ipv6_get_dsfield((struct ipv6hdr *)old_iph);
550 	return INET_ECN_encapsulate(tos, inner);
551 }
552 
553 static int ipgre_rcv(struct sk_buff *skb)
554 {
555 	struct iphdr *iph;
556 	u8     *h;
557 	__be16    flags;
558 	__sum16   csum = 0;
559 	__be32 key = 0;
560 	u32    seqno = 0;
561 	struct ip_tunnel *tunnel;
562 	int    offset = 4;
563 
564 	if (!pskb_may_pull(skb, 16))
565 		goto drop_nolock;
566 
567 	iph = ip_hdr(skb);
568 	h = skb->data;
569 	flags = *(__be16*)h;
570 
571 	if (flags&(GRE_CSUM|GRE_KEY|GRE_ROUTING|GRE_SEQ|GRE_VERSION)) {
572 		/* - Version must be 0.
573 		   - We do not support routing headers.
574 		 */
575 		if (flags&(GRE_VERSION|GRE_ROUTING))
576 			goto drop_nolock;
577 
578 		if (flags&GRE_CSUM) {
579 			switch (skb->ip_summed) {
580 			case CHECKSUM_COMPLETE:
581 				csum = csum_fold(skb->csum);
582 				if (!csum)
583 					break;
584 				/* fall through */
585 			case CHECKSUM_NONE:
586 				skb->csum = 0;
587 				csum = __skb_checksum_complete(skb);
588 				skb->ip_summed = CHECKSUM_COMPLETE;
589 			}
590 			offset += 4;
591 		}
592 		if (flags&GRE_KEY) {
593 			key = *(__be32*)(h + offset);
594 			offset += 4;
595 		}
596 		if (flags&GRE_SEQ) {
597 			seqno = ntohl(*(__be32*)(h + offset));
598 			offset += 4;
599 		}
600 	}
601 
602 	read_lock(&ipgre_lock);
603 	if ((tunnel = ipgre_tunnel_lookup(iph->saddr, iph->daddr, key)) != NULL) {
604 		secpath_reset(skb);
605 
606 		skb->protocol = *(__be16*)(h + 2);
607 		/* WCCP version 1 and 2 protocol decoding.
608 		 * - Change protocol to IP
609 		 * - When dealing with WCCPv2, Skip extra 4 bytes in GRE header
610 		 */
611 		if (flags == 0 &&
612 		    skb->protocol == htons(ETH_P_WCCP)) {
613 			skb->protocol = htons(ETH_P_IP);
614 			if ((*(h + offset) & 0xF0) != 0x40)
615 				offset += 4;
616 		}
617 
618 		skb->mac_header = skb->network_header;
619 		__pskb_pull(skb, offset);
620 		skb_reset_network_header(skb);
621 		skb_postpull_rcsum(skb, skb_transport_header(skb), offset);
622 		skb->pkt_type = PACKET_HOST;
623 #ifdef CONFIG_NET_IPGRE_BROADCAST
624 		if (ipv4_is_multicast(iph->daddr)) {
625 			/* Looped back packet, drop it! */
626 			if (((struct rtable*)skb->dst)->fl.iif == 0)
627 				goto drop;
628 			tunnel->stat.multicast++;
629 			skb->pkt_type = PACKET_BROADCAST;
630 		}
631 #endif
632 
633 		if (((flags&GRE_CSUM) && csum) ||
634 		    (!(flags&GRE_CSUM) && tunnel->parms.i_flags&GRE_CSUM)) {
635 			tunnel->stat.rx_crc_errors++;
636 			tunnel->stat.rx_errors++;
637 			goto drop;
638 		}
639 		if (tunnel->parms.i_flags&GRE_SEQ) {
640 			if (!(flags&GRE_SEQ) ||
641 			    (tunnel->i_seqno && (s32)(seqno - tunnel->i_seqno) < 0)) {
642 				tunnel->stat.rx_fifo_errors++;
643 				tunnel->stat.rx_errors++;
644 				goto drop;
645 			}
646 			tunnel->i_seqno = seqno + 1;
647 		}
648 		tunnel->stat.rx_packets++;
649 		tunnel->stat.rx_bytes += skb->len;
650 		skb->dev = tunnel->dev;
651 		dst_release(skb->dst);
652 		skb->dst = NULL;
653 		nf_reset(skb);
654 		ipgre_ecn_decapsulate(iph, skb);
655 		netif_rx(skb);
656 		read_unlock(&ipgre_lock);
657 		return(0);
658 	}
659 	icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0);
660 
661 drop:
662 	read_unlock(&ipgre_lock);
663 drop_nolock:
664 	kfree_skb(skb);
665 	return(0);
666 }
667 
668 static int ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
669 {
670 	struct ip_tunnel *tunnel = netdev_priv(dev);
671 	struct net_device_stats *stats = &tunnel->stat;
672 	struct iphdr  *old_iph = ip_hdr(skb);
673 	struct iphdr  *tiph;
674 	u8     tos;
675 	__be16 df;
676 	struct rtable *rt;     			/* Route to the other host */
677 	struct net_device *tdev;			/* Device to other host */
678 	struct iphdr  *iph;			/* Our new IP header */
679 	unsigned int max_headroom;		/* The extra header space needed */
680 	int    gre_hlen;
681 	__be32 dst;
682 	int    mtu;
683 
684 	if (tunnel->recursion++) {
685 		tunnel->stat.collisions++;
686 		goto tx_error;
687 	}
688 
689 	if (dev->header_ops) {
690 		gre_hlen = 0;
691 		tiph = (struct iphdr*)skb->data;
692 	} else {
693 		gre_hlen = tunnel->hlen;
694 		tiph = &tunnel->parms.iph;
695 	}
696 
697 	if ((dst = tiph->daddr) == 0) {
698 		/* NBMA tunnel */
699 
700 		if (skb->dst == NULL) {
701 			tunnel->stat.tx_fifo_errors++;
702 			goto tx_error;
703 		}
704 
705 		if (skb->protocol == htons(ETH_P_IP)) {
706 			rt = (struct rtable*)skb->dst;
707 			if ((dst = rt->rt_gateway) == 0)
708 				goto tx_error_icmp;
709 		}
710 #ifdef CONFIG_IPV6
711 		else if (skb->protocol == htons(ETH_P_IPV6)) {
712 			struct in6_addr *addr6;
713 			int addr_type;
714 			struct neighbour *neigh = skb->dst->neighbour;
715 
716 			if (neigh == NULL)
717 				goto tx_error;
718 
719 			addr6 = (struct in6_addr*)&neigh->primary_key;
720 			addr_type = ipv6_addr_type(addr6);
721 
722 			if (addr_type == IPV6_ADDR_ANY) {
723 				addr6 = &ipv6_hdr(skb)->daddr;
724 				addr_type = ipv6_addr_type(addr6);
725 			}
726 
727 			if ((addr_type & IPV6_ADDR_COMPATv4) == 0)
728 				goto tx_error_icmp;
729 
730 			dst = addr6->s6_addr32[3];
731 		}
732 #endif
733 		else
734 			goto tx_error;
735 	}
736 
737 	tos = tiph->tos;
738 	if (tos&1) {
739 		if (skb->protocol == htons(ETH_P_IP))
740 			tos = old_iph->tos;
741 		tos &= ~1;
742 	}
743 
744 	{
745 		struct flowi fl = { .oif = tunnel->parms.link,
746 				    .nl_u = { .ip4_u =
747 					      { .daddr = dst,
748 						.saddr = tiph->saddr,
749 						.tos = RT_TOS(tos) } },
750 				    .proto = IPPROTO_GRE };
751 		if (ip_route_output_key(&init_net, &rt, &fl)) {
752 			tunnel->stat.tx_carrier_errors++;
753 			goto tx_error;
754 		}
755 	}
756 	tdev = rt->u.dst.dev;
757 
758 	if (tdev == dev) {
759 		ip_rt_put(rt);
760 		tunnel->stat.collisions++;
761 		goto tx_error;
762 	}
763 
764 	df = tiph->frag_off;
765 	if (df)
766 		mtu = dst_mtu(&rt->u.dst) - tunnel->hlen;
767 	else
768 		mtu = skb->dst ? dst_mtu(skb->dst) : dev->mtu;
769 
770 	if (skb->dst)
771 		skb->dst->ops->update_pmtu(skb->dst, mtu);
772 
773 	if (skb->protocol == htons(ETH_P_IP)) {
774 		df |= (old_iph->frag_off&htons(IP_DF));
775 
776 		if ((old_iph->frag_off&htons(IP_DF)) &&
777 		    mtu < ntohs(old_iph->tot_len)) {
778 			icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu));
779 			ip_rt_put(rt);
780 			goto tx_error;
781 		}
782 	}
783 #ifdef CONFIG_IPV6
784 	else if (skb->protocol == htons(ETH_P_IPV6)) {
785 		struct rt6_info *rt6 = (struct rt6_info*)skb->dst;
786 
787 		if (rt6 && mtu < dst_mtu(skb->dst) && mtu >= IPV6_MIN_MTU) {
788 			if ((tunnel->parms.iph.daddr &&
789 			     !ipv4_is_multicast(tunnel->parms.iph.daddr)) ||
790 			    rt6->rt6i_dst.plen == 128) {
791 				rt6->rt6i_flags |= RTF_MODIFIED;
792 				skb->dst->metrics[RTAX_MTU-1] = mtu;
793 			}
794 		}
795 
796 		if (mtu >= IPV6_MIN_MTU && mtu < skb->len - tunnel->hlen + gre_hlen) {
797 			icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, dev);
798 			ip_rt_put(rt);
799 			goto tx_error;
800 		}
801 	}
802 #endif
803 
804 	if (tunnel->err_count > 0) {
805 		if (jiffies - tunnel->err_time < IPTUNNEL_ERR_TIMEO) {
806 			tunnel->err_count--;
807 
808 			dst_link_failure(skb);
809 		} else
810 			tunnel->err_count = 0;
811 	}
812 
813 	max_headroom = LL_RESERVED_SPACE(tdev) + gre_hlen;
814 
815 	if (skb_headroom(skb) < max_headroom || skb_shared(skb)||
816 	    (skb_cloned(skb) && !skb_clone_writable(skb, 0))) {
817 		struct sk_buff *new_skb = skb_realloc_headroom(skb, max_headroom);
818 		if (!new_skb) {
819 			ip_rt_put(rt);
820 			stats->tx_dropped++;
821 			dev_kfree_skb(skb);
822 			tunnel->recursion--;
823 			return 0;
824 		}
825 		if (skb->sk)
826 			skb_set_owner_w(new_skb, skb->sk);
827 		dev_kfree_skb(skb);
828 		skb = new_skb;
829 		old_iph = ip_hdr(skb);
830 	}
831 
832 	skb->transport_header = skb->network_header;
833 	skb_push(skb, gre_hlen);
834 	skb_reset_network_header(skb);
835 	memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
836 	IPCB(skb)->flags &= ~(IPSKB_XFRM_TUNNEL_SIZE | IPSKB_XFRM_TRANSFORMED |
837 			      IPSKB_REROUTED);
838 	dst_release(skb->dst);
839 	skb->dst = &rt->u.dst;
840 
841 	/*
842 	 *	Push down and install the IPIP header.
843 	 */
844 
845 	iph 			=	ip_hdr(skb);
846 	iph->version		=	4;
847 	iph->ihl		=	sizeof(struct iphdr) >> 2;
848 	iph->frag_off		=	df;
849 	iph->protocol		=	IPPROTO_GRE;
850 	iph->tos		=	ipgre_ecn_encapsulate(tos, old_iph, skb);
851 	iph->daddr		=	rt->rt_dst;
852 	iph->saddr		=	rt->rt_src;
853 
854 	if ((iph->ttl = tiph->ttl) == 0) {
855 		if (skb->protocol == htons(ETH_P_IP))
856 			iph->ttl = old_iph->ttl;
857 #ifdef CONFIG_IPV6
858 		else if (skb->protocol == htons(ETH_P_IPV6))
859 			iph->ttl = ((struct ipv6hdr*)old_iph)->hop_limit;
860 #endif
861 		else
862 			iph->ttl = dst_metric(&rt->u.dst, RTAX_HOPLIMIT);
863 	}
864 
865 	((__be16*)(iph+1))[0] = tunnel->parms.o_flags;
866 	((__be16*)(iph+1))[1] = skb->protocol;
867 
868 	if (tunnel->parms.o_flags&(GRE_KEY|GRE_CSUM|GRE_SEQ)) {
869 		__be32 *ptr = (__be32*)(((u8*)iph) + tunnel->hlen - 4);
870 
871 		if (tunnel->parms.o_flags&GRE_SEQ) {
872 			++tunnel->o_seqno;
873 			*ptr = htonl(tunnel->o_seqno);
874 			ptr--;
875 		}
876 		if (tunnel->parms.o_flags&GRE_KEY) {
877 			*ptr = tunnel->parms.o_key;
878 			ptr--;
879 		}
880 		if (tunnel->parms.o_flags&GRE_CSUM) {
881 			*ptr = 0;
882 			*(__sum16*)ptr = ip_compute_csum((void*)(iph+1), skb->len - sizeof(struct iphdr));
883 		}
884 	}
885 
886 	nf_reset(skb);
887 
888 	IPTUNNEL_XMIT();
889 	tunnel->recursion--;
890 	return 0;
891 
892 tx_error_icmp:
893 	dst_link_failure(skb);
894 
895 tx_error:
896 	stats->tx_errors++;
897 	dev_kfree_skb(skb);
898 	tunnel->recursion--;
899 	return 0;
900 }
901 
902 static void ipgre_tunnel_bind_dev(struct net_device *dev)
903 {
904 	struct net_device *tdev = NULL;
905 	struct ip_tunnel *tunnel;
906 	struct iphdr *iph;
907 	int hlen = LL_MAX_HEADER;
908 	int mtu = ETH_DATA_LEN;
909 	int addend = sizeof(struct iphdr) + 4;
910 
911 	tunnel = netdev_priv(dev);
912 	iph = &tunnel->parms.iph;
913 
914 	/* Guess output device to choose reasonable mtu and hard_header_len */
915 
916 	if (iph->daddr) {
917 		struct flowi fl = { .oif = tunnel->parms.link,
918 				    .nl_u = { .ip4_u =
919 					      { .daddr = iph->daddr,
920 						.saddr = iph->saddr,
921 						.tos = RT_TOS(iph->tos) } },
922 				    .proto = IPPROTO_GRE };
923 		struct rtable *rt;
924 		if (!ip_route_output_key(&init_net, &rt, &fl)) {
925 			tdev = rt->u.dst.dev;
926 			ip_rt_put(rt);
927 		}
928 		dev->flags |= IFF_POINTOPOINT;
929 	}
930 
931 	if (!tdev && tunnel->parms.link)
932 		tdev = __dev_get_by_index(&init_net, tunnel->parms.link);
933 
934 	if (tdev) {
935 		hlen = tdev->hard_header_len;
936 		mtu = tdev->mtu;
937 	}
938 	dev->iflink = tunnel->parms.link;
939 
940 	/* Precalculate GRE options length */
941 	if (tunnel->parms.o_flags&(GRE_CSUM|GRE_KEY|GRE_SEQ)) {
942 		if (tunnel->parms.o_flags&GRE_CSUM)
943 			addend += 4;
944 		if (tunnel->parms.o_flags&GRE_KEY)
945 			addend += 4;
946 		if (tunnel->parms.o_flags&GRE_SEQ)
947 			addend += 4;
948 	}
949 	dev->hard_header_len = hlen + addend;
950 	dev->mtu = mtu - addend;
951 	tunnel->hlen = addend;
952 
953 }
954 
955 static int
956 ipgre_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd)
957 {
958 	int err = 0;
959 	struct ip_tunnel_parm p;
960 	struct ip_tunnel *t;
961 
962 	switch (cmd) {
963 	case SIOCGETTUNNEL:
964 		t = NULL;
965 		if (dev == ipgre_fb_tunnel_dev) {
966 			if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) {
967 				err = -EFAULT;
968 				break;
969 			}
970 			t = ipgre_tunnel_locate(&p, 0);
971 		}
972 		if (t == NULL)
973 			t = netdev_priv(dev);
974 		memcpy(&p, &t->parms, sizeof(p));
975 		if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p)))
976 			err = -EFAULT;
977 		break;
978 
979 	case SIOCADDTUNNEL:
980 	case SIOCCHGTUNNEL:
981 		err = -EPERM;
982 		if (!capable(CAP_NET_ADMIN))
983 			goto done;
984 
985 		err = -EFAULT;
986 		if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
987 			goto done;
988 
989 		err = -EINVAL;
990 		if (p.iph.version != 4 || p.iph.protocol != IPPROTO_GRE ||
991 		    p.iph.ihl != 5 || (p.iph.frag_off&htons(~IP_DF)) ||
992 		    ((p.i_flags|p.o_flags)&(GRE_VERSION|GRE_ROUTING)))
993 			goto done;
994 		if (p.iph.ttl)
995 			p.iph.frag_off |= htons(IP_DF);
996 
997 		if (!(p.i_flags&GRE_KEY))
998 			p.i_key = 0;
999 		if (!(p.o_flags&GRE_KEY))
1000 			p.o_key = 0;
1001 
1002 		t = ipgre_tunnel_locate(&p, cmd == SIOCADDTUNNEL);
1003 
1004 		if (dev != ipgre_fb_tunnel_dev && cmd == SIOCCHGTUNNEL) {
1005 			if (t != NULL) {
1006 				if (t->dev != dev) {
1007 					err = -EEXIST;
1008 					break;
1009 				}
1010 			} else {
1011 				unsigned nflags=0;
1012 
1013 				t = netdev_priv(dev);
1014 
1015 				if (ipv4_is_multicast(p.iph.daddr))
1016 					nflags = IFF_BROADCAST;
1017 				else if (p.iph.daddr)
1018 					nflags = IFF_POINTOPOINT;
1019 
1020 				if ((dev->flags^nflags)&(IFF_POINTOPOINT|IFF_BROADCAST)) {
1021 					err = -EINVAL;
1022 					break;
1023 				}
1024 				ipgre_tunnel_unlink(t);
1025 				t->parms.iph.saddr = p.iph.saddr;
1026 				t->parms.iph.daddr = p.iph.daddr;
1027 				t->parms.i_key = p.i_key;
1028 				t->parms.o_key = p.o_key;
1029 				memcpy(dev->dev_addr, &p.iph.saddr, 4);
1030 				memcpy(dev->broadcast, &p.iph.daddr, 4);
1031 				ipgre_tunnel_link(t);
1032 				netdev_state_change(dev);
1033 			}
1034 		}
1035 
1036 		if (t) {
1037 			err = 0;
1038 			if (cmd == SIOCCHGTUNNEL) {
1039 				t->parms.iph.ttl = p.iph.ttl;
1040 				t->parms.iph.tos = p.iph.tos;
1041 				t->parms.iph.frag_off = p.iph.frag_off;
1042 				if (t->parms.link != p.link) {
1043 					t->parms.link = p.link;
1044 					ipgre_tunnel_bind_dev(dev);
1045 					netdev_state_change(dev);
1046 				}
1047 			}
1048 			if (copy_to_user(ifr->ifr_ifru.ifru_data, &t->parms, sizeof(p)))
1049 				err = -EFAULT;
1050 		} else
1051 			err = (cmd == SIOCADDTUNNEL ? -ENOBUFS : -ENOENT);
1052 		break;
1053 
1054 	case SIOCDELTUNNEL:
1055 		err = -EPERM;
1056 		if (!capable(CAP_NET_ADMIN))
1057 			goto done;
1058 
1059 		if (dev == ipgre_fb_tunnel_dev) {
1060 			err = -EFAULT;
1061 			if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
1062 				goto done;
1063 			err = -ENOENT;
1064 			if ((t = ipgre_tunnel_locate(&p, 0)) == NULL)
1065 				goto done;
1066 			err = -EPERM;
1067 			if (t == netdev_priv(ipgre_fb_tunnel_dev))
1068 				goto done;
1069 			dev = t->dev;
1070 		}
1071 		unregister_netdevice(dev);
1072 		err = 0;
1073 		break;
1074 
1075 	default:
1076 		err = -EINVAL;
1077 	}
1078 
1079 done:
1080 	return err;
1081 }
1082 
1083 static struct net_device_stats *ipgre_tunnel_get_stats(struct net_device *dev)
1084 {
1085 	return &(((struct ip_tunnel*)netdev_priv(dev))->stat);
1086 }
1087 
1088 static int ipgre_tunnel_change_mtu(struct net_device *dev, int new_mtu)
1089 {
1090 	struct ip_tunnel *tunnel = netdev_priv(dev);
1091 	if (new_mtu < 68 || new_mtu > 0xFFF8 - tunnel->hlen)
1092 		return -EINVAL;
1093 	dev->mtu = new_mtu;
1094 	return 0;
1095 }
1096 
1097 /* Nice toy. Unfortunately, useless in real life :-)
1098    It allows to construct virtual multiprotocol broadcast "LAN"
1099    over the Internet, provided multicast routing is tuned.
1100 
1101 
1102    I have no idea was this bicycle invented before me,
1103    so that I had to set ARPHRD_IPGRE to a random value.
1104    I have an impression, that Cisco could make something similar,
1105    but this feature is apparently missing in IOS<=11.2(8).
1106 
1107    I set up 10.66.66/24 and fec0:6666:6666::0/96 as virtual networks
1108    with broadcast 224.66.66.66. If you have access to mbone, play with me :-)
1109 
1110    ping -t 255 224.66.66.66
1111 
1112    If nobody answers, mbone does not work.
1113 
1114    ip tunnel add Universe mode gre remote 224.66.66.66 local <Your_real_addr> ttl 255
1115    ip addr add 10.66.66.<somewhat>/24 dev Universe
1116    ifconfig Universe up
1117    ifconfig Universe add fe80::<Your_real_addr>/10
1118    ifconfig Universe add fec0:6666:6666::<Your_real_addr>/96
1119    ftp 10.66.66.66
1120    ...
1121    ftp fec0:6666:6666::193.233.7.65
1122    ...
1123 
1124  */
1125 
1126 static int ipgre_header(struct sk_buff *skb, struct net_device *dev,
1127 			unsigned short type,
1128 			const void *daddr, const void *saddr, unsigned len)
1129 {
1130 	struct ip_tunnel *t = netdev_priv(dev);
1131 	struct iphdr *iph = (struct iphdr *)skb_push(skb, t->hlen);
1132 	__be16 *p = (__be16*)(iph+1);
1133 
1134 	memcpy(iph, &t->parms.iph, sizeof(struct iphdr));
1135 	p[0]		= t->parms.o_flags;
1136 	p[1]		= htons(type);
1137 
1138 	/*
1139 	 *	Set the source hardware address.
1140 	 */
1141 
1142 	if (saddr)
1143 		memcpy(&iph->saddr, saddr, 4);
1144 
1145 	if (daddr) {
1146 		memcpy(&iph->daddr, daddr, 4);
1147 		return t->hlen;
1148 	}
1149 	if (iph->daddr && !ipv4_is_multicast(iph->daddr))
1150 		return t->hlen;
1151 
1152 	return -t->hlen;
1153 }
1154 
1155 static int ipgre_header_parse(const struct sk_buff *skb, unsigned char *haddr)
1156 {
1157 	struct iphdr *iph = (struct iphdr*) skb_mac_header(skb);
1158 	memcpy(haddr, &iph->saddr, 4);
1159 	return 4;
1160 }
1161 
1162 static const struct header_ops ipgre_header_ops = {
1163 	.create	= ipgre_header,
1164 	.parse	= ipgre_header_parse,
1165 };
1166 
1167 #ifdef CONFIG_NET_IPGRE_BROADCAST
1168 static int ipgre_open(struct net_device *dev)
1169 {
1170 	struct ip_tunnel *t = netdev_priv(dev);
1171 
1172 	if (ipv4_is_multicast(t->parms.iph.daddr)) {
1173 		struct flowi fl = { .oif = t->parms.link,
1174 				    .nl_u = { .ip4_u =
1175 					      { .daddr = t->parms.iph.daddr,
1176 						.saddr = t->parms.iph.saddr,
1177 						.tos = RT_TOS(t->parms.iph.tos) } },
1178 				    .proto = IPPROTO_GRE };
1179 		struct rtable *rt;
1180 		if (ip_route_output_key(&init_net, &rt, &fl))
1181 			return -EADDRNOTAVAIL;
1182 		dev = rt->u.dst.dev;
1183 		ip_rt_put(rt);
1184 		if (__in_dev_get_rtnl(dev) == NULL)
1185 			return -EADDRNOTAVAIL;
1186 		t->mlink = dev->ifindex;
1187 		ip_mc_inc_group(__in_dev_get_rtnl(dev), t->parms.iph.daddr);
1188 	}
1189 	return 0;
1190 }
1191 
1192 static int ipgre_close(struct net_device *dev)
1193 {
1194 	struct ip_tunnel *t = netdev_priv(dev);
1195 	if (ipv4_is_multicast(t->parms.iph.daddr) && t->mlink) {
1196 		struct in_device *in_dev;
1197 		in_dev = inetdev_by_index(dev->nd_net, t->mlink);
1198 		if (in_dev) {
1199 			ip_mc_dec_group(in_dev, t->parms.iph.daddr);
1200 			in_dev_put(in_dev);
1201 		}
1202 	}
1203 	return 0;
1204 }
1205 
1206 #endif
1207 
1208 static void ipgre_tunnel_setup(struct net_device *dev)
1209 {
1210 	dev->uninit		= ipgre_tunnel_uninit;
1211 	dev->destructor 	= free_netdev;
1212 	dev->hard_start_xmit	= ipgre_tunnel_xmit;
1213 	dev->get_stats		= ipgre_tunnel_get_stats;
1214 	dev->do_ioctl		= ipgre_tunnel_ioctl;
1215 	dev->change_mtu		= ipgre_tunnel_change_mtu;
1216 
1217 	dev->type		= ARPHRD_IPGRE;
1218 	dev->hard_header_len 	= LL_MAX_HEADER + sizeof(struct iphdr) + 4;
1219 	dev->mtu		= ETH_DATA_LEN - sizeof(struct iphdr) - 4;
1220 	dev->flags		= IFF_NOARP;
1221 	dev->iflink		= 0;
1222 	dev->addr_len		= 4;
1223 }
1224 
1225 static int ipgre_tunnel_init(struct net_device *dev)
1226 {
1227 	struct ip_tunnel *tunnel;
1228 	struct iphdr *iph;
1229 
1230 	tunnel = netdev_priv(dev);
1231 	iph = &tunnel->parms.iph;
1232 
1233 	tunnel->dev = dev;
1234 	strcpy(tunnel->parms.name, dev->name);
1235 
1236 	memcpy(dev->dev_addr, &tunnel->parms.iph.saddr, 4);
1237 	memcpy(dev->broadcast, &tunnel->parms.iph.daddr, 4);
1238 
1239 	ipgre_tunnel_bind_dev(dev);
1240 
1241 	if (iph->daddr) {
1242 #ifdef CONFIG_NET_IPGRE_BROADCAST
1243 		if (ipv4_is_multicast(iph->daddr)) {
1244 			if (!iph->saddr)
1245 				return -EINVAL;
1246 			dev->flags = IFF_BROADCAST;
1247 			dev->header_ops = &ipgre_header_ops;
1248 			dev->open = ipgre_open;
1249 			dev->stop = ipgre_close;
1250 		}
1251 #endif
1252 	} else
1253 		dev->header_ops = &ipgre_header_ops;
1254 
1255 	return 0;
1256 }
1257 
1258 static int __init ipgre_fb_tunnel_init(struct net_device *dev)
1259 {
1260 	struct ip_tunnel *tunnel = netdev_priv(dev);
1261 	struct iphdr *iph = &tunnel->parms.iph;
1262 
1263 	tunnel->dev = dev;
1264 	strcpy(tunnel->parms.name, dev->name);
1265 
1266 	iph->version		= 4;
1267 	iph->protocol		= IPPROTO_GRE;
1268 	iph->ihl		= 5;
1269 	tunnel->hlen		= sizeof(struct iphdr) + 4;
1270 
1271 	dev_hold(dev);
1272 	tunnels_wc[0]		= tunnel;
1273 	return 0;
1274 }
1275 
1276 
1277 static struct net_protocol ipgre_protocol = {
1278 	.handler	=	ipgre_rcv,
1279 	.err_handler	=	ipgre_err,
1280 };
1281 
1282 
1283 /*
1284  *	And now the modules code and kernel interface.
1285  */
1286 
1287 static int __init ipgre_init(void)
1288 {
1289 	int err;
1290 
1291 	printk(KERN_INFO "GRE over IPv4 tunneling driver\n");
1292 
1293 	if (inet_add_protocol(&ipgre_protocol, IPPROTO_GRE) < 0) {
1294 		printk(KERN_INFO "ipgre init: can't add protocol\n");
1295 		return -EAGAIN;
1296 	}
1297 
1298 	ipgre_fb_tunnel_dev = alloc_netdev(sizeof(struct ip_tunnel), "gre0",
1299 					   ipgre_tunnel_setup);
1300 	if (!ipgre_fb_tunnel_dev) {
1301 		err = -ENOMEM;
1302 		goto err1;
1303 	}
1304 
1305 	ipgre_fb_tunnel_dev->init = ipgre_fb_tunnel_init;
1306 
1307 	if ((err = register_netdev(ipgre_fb_tunnel_dev)))
1308 		goto err2;
1309 out:
1310 	return err;
1311 err2:
1312 	free_netdev(ipgre_fb_tunnel_dev);
1313 err1:
1314 	inet_del_protocol(&ipgre_protocol, IPPROTO_GRE);
1315 	goto out;
1316 }
1317 
1318 static void __exit ipgre_destroy_tunnels(void)
1319 {
1320 	int prio;
1321 
1322 	for (prio = 0; prio < 4; prio++) {
1323 		int h;
1324 		for (h = 0; h < HASH_SIZE; h++) {
1325 			struct ip_tunnel *t;
1326 			while ((t = tunnels[prio][h]) != NULL)
1327 				unregister_netdevice(t->dev);
1328 		}
1329 	}
1330 }
1331 
1332 static void __exit ipgre_fini(void)
1333 {
1334 	if (inet_del_protocol(&ipgre_protocol, IPPROTO_GRE) < 0)
1335 		printk(KERN_INFO "ipgre close: can't remove protocol\n");
1336 
1337 	rtnl_lock();
1338 	ipgre_destroy_tunnels();
1339 	rtnl_unlock();
1340 }
1341 
1342 module_init(ipgre_init);
1343 module_exit(ipgre_fini);
1344 MODULE_LICENSE("GPL");
1345