xref: /openbmc/linux/net/ipv4/ip_gre.c (revision a1e58bbd)
1 /*
2  *	Linux NET3:	GRE over IP protocol decoder.
3  *
4  *	Authors: Alexey Kuznetsov (kuznet@ms2.inr.ac.ru)
5  *
6  *	This program is free software; you can redistribute it and/or
7  *	modify it under the terms of the GNU General Public License
8  *	as published by the Free Software Foundation; either version
9  *	2 of the License, or (at your option) any later version.
10  *
11  */
12 
13 #include <linux/capability.h>
14 #include <linux/module.h>
15 #include <linux/types.h>
16 #include <linux/kernel.h>
17 #include <asm/uaccess.h>
18 #include <linux/skbuff.h>
19 #include <linux/netdevice.h>
20 #include <linux/in.h>
21 #include <linux/tcp.h>
22 #include <linux/udp.h>
23 #include <linux/if_arp.h>
24 #include <linux/mroute.h>
25 #include <linux/init.h>
26 #include <linux/in6.h>
27 #include <linux/inetdevice.h>
28 #include <linux/igmp.h>
29 #include <linux/netfilter_ipv4.h>
30 #include <linux/if_ether.h>
31 
32 #include <net/sock.h>
33 #include <net/ip.h>
34 #include <net/icmp.h>
35 #include <net/protocol.h>
36 #include <net/ipip.h>
37 #include <net/arp.h>
38 #include <net/checksum.h>
39 #include <net/dsfield.h>
40 #include <net/inet_ecn.h>
41 #include <net/xfrm.h>
42 
43 #ifdef CONFIG_IPV6
44 #include <net/ipv6.h>
45 #include <net/ip6_fib.h>
46 #include <net/ip6_route.h>
47 #endif
48 
49 /*
50    Problems & solutions
51    --------------------
52 
53    1. The most important issue is detecting local dead loops.
54    They would cause complete host lockup in transmit, which
55    would be "resolved" by stack overflow or, if queueing is enabled,
56    with infinite looping in net_bh.
57 
58    We cannot track such dead loops during route installation,
59    it is infeasible task. The most general solutions would be
60    to keep skb->encapsulation counter (sort of local ttl),
61    and silently drop packet when it expires. It is the best
62    solution, but it supposes maintaing new variable in ALL
63    skb, even if no tunneling is used.
64 
65    Current solution: t->recursion lock breaks dead loops. It looks
66    like dev->tbusy flag, but I preferred new variable, because
67    the semantics is different. One day, when hard_start_xmit
68    will be multithreaded we will have to use skb->encapsulation.
69 
70 
71 
72    2. Networking dead loops would not kill routers, but would really
73    kill network. IP hop limit plays role of "t->recursion" in this case,
74    if we copy it from packet being encapsulated to upper header.
75    It is very good solution, but it introduces two problems:
76 
77    - Routing protocols, using packets with ttl=1 (OSPF, RIP2),
78      do not work over tunnels.
79    - traceroute does not work. I planned to relay ICMP from tunnel,
80      so that this problem would be solved and traceroute output
81      would even more informative. This idea appeared to be wrong:
82      only Linux complies to rfc1812 now (yes, guys, Linux is the only
83      true router now :-)), all routers (at least, in neighbourhood of mine)
84      return only 8 bytes of payload. It is the end.
85 
86    Hence, if we want that OSPF worked or traceroute said something reasonable,
87    we should search for another solution.
88 
89    One of them is to parse packet trying to detect inner encapsulation
90    made by our node. It is difficult or even impossible, especially,
91    taking into account fragmentation. TO be short, tt is not solution at all.
92 
93    Current solution: The solution was UNEXPECTEDLY SIMPLE.
94    We force DF flag on tunnels with preconfigured hop limit,
95    that is ALL. :-) Well, it does not remove the problem completely,
96    but exponential growth of network traffic is changed to linear
97    (branches, that exceed pmtu are pruned) and tunnel mtu
98    fastly degrades to value <68, where looping stops.
99    Yes, it is not good if there exists a router in the loop,
100    which does not force DF, even when encapsulating packets have DF set.
101    But it is not our problem! Nobody could accuse us, we made
102    all that we could make. Even if it is your gated who injected
103    fatal route to network, even if it were you who configured
104    fatal static route: you are innocent. :-)
105 
106 
107 
108    3. Really, ipv4/ipip.c, ipv4/ip_gre.c and ipv6/sit.c contain
109    practically identical code. It would be good to glue them
110    together, but it is not very evident, how to make them modular.
111    sit is integral part of IPv6, ipip and gre are naturally modular.
112    We could extract common parts (hash table, ioctl etc)
113    to a separate module (ip_tunnel.c).
114 
115    Alexey Kuznetsov.
116  */
117 
118 static int ipgre_tunnel_init(struct net_device *dev);
119 static void ipgre_tunnel_setup(struct net_device *dev);
120 
121 /* Fallback tunnel: no source, no destination, no key, no options */
122 
123 static int ipgre_fb_tunnel_init(struct net_device *dev);
124 
125 static struct net_device *ipgre_fb_tunnel_dev;
126 
127 /* Tunnel hash table */
128 
129 /*
130    4 hash tables:
131 
132    3: (remote,local)
133    2: (remote,*)
134    1: (*,local)
135    0: (*,*)
136 
137    We require exact key match i.e. if a key is present in packet
138    it will match only tunnel with the same key; if it is not present,
139    it will match only keyless tunnel.
140 
141    All keysless packets, if not matched configured keyless tunnels
142    will match fallback tunnel.
143  */
144 
145 #define HASH_SIZE  16
146 #define HASH(addr) (((__force u32)addr^((__force u32)addr>>4))&0xF)
147 
148 static struct ip_tunnel *tunnels[4][HASH_SIZE];
149 
150 #define tunnels_r_l	(tunnels[3])
151 #define tunnels_r	(tunnels[2])
152 #define tunnels_l	(tunnels[1])
153 #define tunnels_wc	(tunnels[0])
154 
155 static DEFINE_RWLOCK(ipgre_lock);
156 
157 /* Given src, dst and key, find appropriate for input tunnel. */
158 
159 static struct ip_tunnel * ipgre_tunnel_lookup(__be32 remote, __be32 local, __be32 key)
160 {
161 	unsigned h0 = HASH(remote);
162 	unsigned h1 = HASH(key);
163 	struct ip_tunnel *t;
164 
165 	for (t = tunnels_r_l[h0^h1]; t; t = t->next) {
166 		if (local == t->parms.iph.saddr && remote == t->parms.iph.daddr) {
167 			if (t->parms.i_key == key && (t->dev->flags&IFF_UP))
168 				return t;
169 		}
170 	}
171 	for (t = tunnels_r[h0^h1]; t; t = t->next) {
172 		if (remote == t->parms.iph.daddr) {
173 			if (t->parms.i_key == key && (t->dev->flags&IFF_UP))
174 				return t;
175 		}
176 	}
177 	for (t = tunnels_l[h1]; t; t = t->next) {
178 		if (local == t->parms.iph.saddr ||
179 		     (local == t->parms.iph.daddr &&
180 		      ipv4_is_multicast(local))) {
181 			if (t->parms.i_key == key && (t->dev->flags&IFF_UP))
182 				return t;
183 		}
184 	}
185 	for (t = tunnels_wc[h1]; t; t = t->next) {
186 		if (t->parms.i_key == key && (t->dev->flags&IFF_UP))
187 			return t;
188 	}
189 
190 	if (ipgre_fb_tunnel_dev->flags&IFF_UP)
191 		return netdev_priv(ipgre_fb_tunnel_dev);
192 	return NULL;
193 }
194 
195 static struct ip_tunnel **__ipgre_bucket(struct ip_tunnel_parm *parms)
196 {
197 	__be32 remote = parms->iph.daddr;
198 	__be32 local = parms->iph.saddr;
199 	__be32 key = parms->i_key;
200 	unsigned h = HASH(key);
201 	int prio = 0;
202 
203 	if (local)
204 		prio |= 1;
205 	if (remote && !ipv4_is_multicast(remote)) {
206 		prio |= 2;
207 		h ^= HASH(remote);
208 	}
209 
210 	return &tunnels[prio][h];
211 }
212 
213 static inline struct ip_tunnel **ipgre_bucket(struct ip_tunnel *t)
214 {
215 	return __ipgre_bucket(&t->parms);
216 }
217 
218 static void ipgre_tunnel_link(struct ip_tunnel *t)
219 {
220 	struct ip_tunnel **tp = ipgre_bucket(t);
221 
222 	t->next = *tp;
223 	write_lock_bh(&ipgre_lock);
224 	*tp = t;
225 	write_unlock_bh(&ipgre_lock);
226 }
227 
228 static void ipgre_tunnel_unlink(struct ip_tunnel *t)
229 {
230 	struct ip_tunnel **tp;
231 
232 	for (tp = ipgre_bucket(t); *tp; tp = &(*tp)->next) {
233 		if (t == *tp) {
234 			write_lock_bh(&ipgre_lock);
235 			*tp = t->next;
236 			write_unlock_bh(&ipgre_lock);
237 			break;
238 		}
239 	}
240 }
241 
242 static struct ip_tunnel * ipgre_tunnel_locate(struct ip_tunnel_parm *parms, int create)
243 {
244 	__be32 remote = parms->iph.daddr;
245 	__be32 local = parms->iph.saddr;
246 	__be32 key = parms->i_key;
247 	struct ip_tunnel *t, **tp, *nt;
248 	struct net_device *dev;
249 	char name[IFNAMSIZ];
250 
251 	for (tp = __ipgre_bucket(parms); (t = *tp) != NULL; tp = &t->next) {
252 		if (local == t->parms.iph.saddr && remote == t->parms.iph.daddr) {
253 			if (key == t->parms.i_key)
254 				return t;
255 		}
256 	}
257 	if (!create)
258 		return NULL;
259 
260 	if (parms->name[0])
261 		strlcpy(name, parms->name, IFNAMSIZ);
262 	else
263 		sprintf(name, "gre%%d");
264 
265 	dev = alloc_netdev(sizeof(*t), name, ipgre_tunnel_setup);
266 	if (!dev)
267 	  return NULL;
268 
269 	if (strchr(name, '%')) {
270 		if (dev_alloc_name(dev, name) < 0)
271 			goto failed_free;
272 	}
273 
274 	dev->init = ipgre_tunnel_init;
275 	nt = netdev_priv(dev);
276 	nt->parms = *parms;
277 
278 	if (register_netdevice(dev) < 0)
279 		goto failed_free;
280 
281 	dev_hold(dev);
282 	ipgre_tunnel_link(nt);
283 	return nt;
284 
285 failed_free:
286 	free_netdev(dev);
287 	return NULL;
288 }
289 
290 static void ipgre_tunnel_uninit(struct net_device *dev)
291 {
292 	ipgre_tunnel_unlink(netdev_priv(dev));
293 	dev_put(dev);
294 }
295 
296 
297 static void ipgre_err(struct sk_buff *skb, u32 info)
298 {
299 #ifndef I_WISH_WORLD_WERE_PERFECT
300 
301 /* It is not :-( All the routers (except for Linux) return only
302    8 bytes of packet payload. It means, that precise relaying of
303    ICMP in the real Internet is absolutely infeasible.
304 
305    Moreover, Cisco "wise men" put GRE key to the third word
306    in GRE header. It makes impossible maintaining even soft state for keyed
307    GRE tunnels with enabled checksum. Tell them "thank you".
308 
309    Well, I wonder, rfc1812 was written by Cisco employee,
310    what the hell these idiots break standrads established
311    by themself???
312  */
313 
314 	struct iphdr *iph = (struct iphdr*)skb->data;
315 	__be16	     *p = (__be16*)(skb->data+(iph->ihl<<2));
316 	int grehlen = (iph->ihl<<2) + 4;
317 	const int type = icmp_hdr(skb)->type;
318 	const int code = icmp_hdr(skb)->code;
319 	struct ip_tunnel *t;
320 	__be16 flags;
321 
322 	flags = p[0];
323 	if (flags&(GRE_CSUM|GRE_KEY|GRE_SEQ|GRE_ROUTING|GRE_VERSION)) {
324 		if (flags&(GRE_VERSION|GRE_ROUTING))
325 			return;
326 		if (flags&GRE_KEY) {
327 			grehlen += 4;
328 			if (flags&GRE_CSUM)
329 				grehlen += 4;
330 		}
331 	}
332 
333 	/* If only 8 bytes returned, keyed message will be dropped here */
334 	if (skb_headlen(skb) < grehlen)
335 		return;
336 
337 	switch (type) {
338 	default:
339 	case ICMP_PARAMETERPROB:
340 		return;
341 
342 	case ICMP_DEST_UNREACH:
343 		switch (code) {
344 		case ICMP_SR_FAILED:
345 		case ICMP_PORT_UNREACH:
346 			/* Impossible event. */
347 			return;
348 		case ICMP_FRAG_NEEDED:
349 			/* Soft state for pmtu is maintained by IP core. */
350 			return;
351 		default:
352 			/* All others are translated to HOST_UNREACH.
353 			   rfc2003 contains "deep thoughts" about NET_UNREACH,
354 			   I believe they are just ether pollution. --ANK
355 			 */
356 			break;
357 		}
358 		break;
359 	case ICMP_TIME_EXCEEDED:
360 		if (code != ICMP_EXC_TTL)
361 			return;
362 		break;
363 	}
364 
365 	read_lock(&ipgre_lock);
366 	t = ipgre_tunnel_lookup(iph->daddr, iph->saddr, (flags&GRE_KEY) ? *(((__be32*)p) + (grehlen>>2) - 1) : 0);
367 	if (t == NULL || t->parms.iph.daddr == 0 ||
368 	    ipv4_is_multicast(t->parms.iph.daddr))
369 		goto out;
370 
371 	if (t->parms.iph.ttl == 0 && type == ICMP_TIME_EXCEEDED)
372 		goto out;
373 
374 	if (jiffies - t->err_time < IPTUNNEL_ERR_TIMEO)
375 		t->err_count++;
376 	else
377 		t->err_count = 1;
378 	t->err_time = jiffies;
379 out:
380 	read_unlock(&ipgre_lock);
381 	return;
382 #else
383 	struct iphdr *iph = (struct iphdr*)dp;
384 	struct iphdr *eiph;
385 	__be16	     *p = (__be16*)(dp+(iph->ihl<<2));
386 	const int type = icmp_hdr(skb)->type;
387 	const int code = icmp_hdr(skb)->code;
388 	int rel_type = 0;
389 	int rel_code = 0;
390 	__be32 rel_info = 0;
391 	__u32 n = 0;
392 	__be16 flags;
393 	int grehlen = (iph->ihl<<2) + 4;
394 	struct sk_buff *skb2;
395 	struct flowi fl;
396 	struct rtable *rt;
397 
398 	if (p[1] != htons(ETH_P_IP))
399 		return;
400 
401 	flags = p[0];
402 	if (flags&(GRE_CSUM|GRE_KEY|GRE_SEQ|GRE_ROUTING|GRE_VERSION)) {
403 		if (flags&(GRE_VERSION|GRE_ROUTING))
404 			return;
405 		if (flags&GRE_CSUM)
406 			grehlen += 4;
407 		if (flags&GRE_KEY)
408 			grehlen += 4;
409 		if (flags&GRE_SEQ)
410 			grehlen += 4;
411 	}
412 	if (len < grehlen + sizeof(struct iphdr))
413 		return;
414 	eiph = (struct iphdr*)(dp + grehlen);
415 
416 	switch (type) {
417 	default:
418 		return;
419 	case ICMP_PARAMETERPROB:
420 		n = ntohl(icmp_hdr(skb)->un.gateway) >> 24;
421 		if (n < (iph->ihl<<2))
422 			return;
423 
424 		/* So... This guy found something strange INSIDE encapsulated
425 		   packet. Well, he is fool, but what can we do ?
426 		 */
427 		rel_type = ICMP_PARAMETERPROB;
428 		n -= grehlen;
429 		rel_info = htonl(n << 24);
430 		break;
431 
432 	case ICMP_DEST_UNREACH:
433 		switch (code) {
434 		case ICMP_SR_FAILED:
435 		case ICMP_PORT_UNREACH:
436 			/* Impossible event. */
437 			return;
438 		case ICMP_FRAG_NEEDED:
439 			/* And it is the only really necessary thing :-) */
440 			n = ntohs(icmp_hdr(skb)->un.frag.mtu);
441 			if (n < grehlen+68)
442 				return;
443 			n -= grehlen;
444 			/* BSD 4.2 MORE DOES NOT EXIST IN NATURE. */
445 			if (n > ntohs(eiph->tot_len))
446 				return;
447 			rel_info = htonl(n);
448 			break;
449 		default:
450 			/* All others are translated to HOST_UNREACH.
451 			   rfc2003 contains "deep thoughts" about NET_UNREACH,
452 			   I believe, it is just ether pollution. --ANK
453 			 */
454 			rel_type = ICMP_DEST_UNREACH;
455 			rel_code = ICMP_HOST_UNREACH;
456 			break;
457 		}
458 		break;
459 	case ICMP_TIME_EXCEEDED:
460 		if (code != ICMP_EXC_TTL)
461 			return;
462 		break;
463 	}
464 
465 	/* Prepare fake skb to feed it to icmp_send */
466 	skb2 = skb_clone(skb, GFP_ATOMIC);
467 	if (skb2 == NULL)
468 		return;
469 	dst_release(skb2->dst);
470 	skb2->dst = NULL;
471 	skb_pull(skb2, skb->data - (u8*)eiph);
472 	skb_reset_network_header(skb2);
473 
474 	/* Try to guess incoming interface */
475 	memset(&fl, 0, sizeof(fl));
476 	fl.fl4_dst = eiph->saddr;
477 	fl.fl4_tos = RT_TOS(eiph->tos);
478 	fl.proto = IPPROTO_GRE;
479 	if (ip_route_output_key(&init_net, &rt, &fl)) {
480 		kfree_skb(skb2);
481 		return;
482 	}
483 	skb2->dev = rt->u.dst.dev;
484 
485 	/* route "incoming" packet */
486 	if (rt->rt_flags&RTCF_LOCAL) {
487 		ip_rt_put(rt);
488 		rt = NULL;
489 		fl.fl4_dst = eiph->daddr;
490 		fl.fl4_src = eiph->saddr;
491 		fl.fl4_tos = eiph->tos;
492 		if (ip_route_output_key(&init_net, &rt, &fl) ||
493 		    rt->u.dst.dev->type != ARPHRD_IPGRE) {
494 			ip_rt_put(rt);
495 			kfree_skb(skb2);
496 			return;
497 		}
498 	} else {
499 		ip_rt_put(rt);
500 		if (ip_route_input(skb2, eiph->daddr, eiph->saddr, eiph->tos, skb2->dev) ||
501 		    skb2->dst->dev->type != ARPHRD_IPGRE) {
502 			kfree_skb(skb2);
503 			return;
504 		}
505 	}
506 
507 	/* change mtu on this route */
508 	if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED) {
509 		if (n > dst_mtu(skb2->dst)) {
510 			kfree_skb(skb2);
511 			return;
512 		}
513 		skb2->dst->ops->update_pmtu(skb2->dst, n);
514 	} else if (type == ICMP_TIME_EXCEEDED) {
515 		struct ip_tunnel *t = netdev_priv(skb2->dev);
516 		if (t->parms.iph.ttl) {
517 			rel_type = ICMP_DEST_UNREACH;
518 			rel_code = ICMP_HOST_UNREACH;
519 		}
520 	}
521 
522 	icmp_send(skb2, rel_type, rel_code, rel_info);
523 	kfree_skb(skb2);
524 #endif
525 }
526 
527 static inline void ipgre_ecn_decapsulate(struct iphdr *iph, struct sk_buff *skb)
528 {
529 	if (INET_ECN_is_ce(iph->tos)) {
530 		if (skb->protocol == htons(ETH_P_IP)) {
531 			IP_ECN_set_ce(ip_hdr(skb));
532 		} else if (skb->protocol == htons(ETH_P_IPV6)) {
533 			IP6_ECN_set_ce(ipv6_hdr(skb));
534 		}
535 	}
536 }
537 
538 static inline u8
539 ipgre_ecn_encapsulate(u8 tos, struct iphdr *old_iph, struct sk_buff *skb)
540 {
541 	u8 inner = 0;
542 	if (skb->protocol == htons(ETH_P_IP))
543 		inner = old_iph->tos;
544 	else if (skb->protocol == htons(ETH_P_IPV6))
545 		inner = ipv6_get_dsfield((struct ipv6hdr *)old_iph);
546 	return INET_ECN_encapsulate(tos, inner);
547 }
548 
549 static int ipgre_rcv(struct sk_buff *skb)
550 {
551 	struct iphdr *iph;
552 	u8     *h;
553 	__be16    flags;
554 	__sum16   csum = 0;
555 	__be32 key = 0;
556 	u32    seqno = 0;
557 	struct ip_tunnel *tunnel;
558 	int    offset = 4;
559 
560 	if (!pskb_may_pull(skb, 16))
561 		goto drop_nolock;
562 
563 	iph = ip_hdr(skb);
564 	h = skb->data;
565 	flags = *(__be16*)h;
566 
567 	if (flags&(GRE_CSUM|GRE_KEY|GRE_ROUTING|GRE_SEQ|GRE_VERSION)) {
568 		/* - Version must be 0.
569 		   - We do not support routing headers.
570 		 */
571 		if (flags&(GRE_VERSION|GRE_ROUTING))
572 			goto drop_nolock;
573 
574 		if (flags&GRE_CSUM) {
575 			switch (skb->ip_summed) {
576 			case CHECKSUM_COMPLETE:
577 				csum = csum_fold(skb->csum);
578 				if (!csum)
579 					break;
580 				/* fall through */
581 			case CHECKSUM_NONE:
582 				skb->csum = 0;
583 				csum = __skb_checksum_complete(skb);
584 				skb->ip_summed = CHECKSUM_COMPLETE;
585 			}
586 			offset += 4;
587 		}
588 		if (flags&GRE_KEY) {
589 			key = *(__be32*)(h + offset);
590 			offset += 4;
591 		}
592 		if (flags&GRE_SEQ) {
593 			seqno = ntohl(*(__be32*)(h + offset));
594 			offset += 4;
595 		}
596 	}
597 
598 	read_lock(&ipgre_lock);
599 	if ((tunnel = ipgre_tunnel_lookup(iph->saddr, iph->daddr, key)) != NULL) {
600 		secpath_reset(skb);
601 
602 		skb->protocol = *(__be16*)(h + 2);
603 		/* WCCP version 1 and 2 protocol decoding.
604 		 * - Change protocol to IP
605 		 * - When dealing with WCCPv2, Skip extra 4 bytes in GRE header
606 		 */
607 		if (flags == 0 &&
608 		    skb->protocol == htons(ETH_P_WCCP)) {
609 			skb->protocol = htons(ETH_P_IP);
610 			if ((*(h + offset) & 0xF0) != 0x40)
611 				offset += 4;
612 		}
613 
614 		skb->mac_header = skb->network_header;
615 		__pskb_pull(skb, offset);
616 		skb_reset_network_header(skb);
617 		skb_postpull_rcsum(skb, skb_transport_header(skb), offset);
618 		skb->pkt_type = PACKET_HOST;
619 #ifdef CONFIG_NET_IPGRE_BROADCAST
620 		if (ipv4_is_multicast(iph->daddr)) {
621 			/* Looped back packet, drop it! */
622 			if (((struct rtable*)skb->dst)->fl.iif == 0)
623 				goto drop;
624 			tunnel->stat.multicast++;
625 			skb->pkt_type = PACKET_BROADCAST;
626 		}
627 #endif
628 
629 		if (((flags&GRE_CSUM) && csum) ||
630 		    (!(flags&GRE_CSUM) && tunnel->parms.i_flags&GRE_CSUM)) {
631 			tunnel->stat.rx_crc_errors++;
632 			tunnel->stat.rx_errors++;
633 			goto drop;
634 		}
635 		if (tunnel->parms.i_flags&GRE_SEQ) {
636 			if (!(flags&GRE_SEQ) ||
637 			    (tunnel->i_seqno && (s32)(seqno - tunnel->i_seqno) < 0)) {
638 				tunnel->stat.rx_fifo_errors++;
639 				tunnel->stat.rx_errors++;
640 				goto drop;
641 			}
642 			tunnel->i_seqno = seqno + 1;
643 		}
644 		tunnel->stat.rx_packets++;
645 		tunnel->stat.rx_bytes += skb->len;
646 		skb->dev = tunnel->dev;
647 		dst_release(skb->dst);
648 		skb->dst = NULL;
649 		nf_reset(skb);
650 		ipgre_ecn_decapsulate(iph, skb);
651 		netif_rx(skb);
652 		read_unlock(&ipgre_lock);
653 		return(0);
654 	}
655 	icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0);
656 
657 drop:
658 	read_unlock(&ipgre_lock);
659 drop_nolock:
660 	kfree_skb(skb);
661 	return(0);
662 }
663 
664 static int ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
665 {
666 	struct ip_tunnel *tunnel = netdev_priv(dev);
667 	struct net_device_stats *stats = &tunnel->stat;
668 	struct iphdr  *old_iph = ip_hdr(skb);
669 	struct iphdr  *tiph;
670 	u8     tos;
671 	__be16 df;
672 	struct rtable *rt;     			/* Route to the other host */
673 	struct net_device *tdev;			/* Device to other host */
674 	struct iphdr  *iph;			/* Our new IP header */
675 	unsigned int max_headroom;		/* The extra header space needed */
676 	int    gre_hlen;
677 	__be32 dst;
678 	int    mtu;
679 
680 	if (tunnel->recursion++) {
681 		tunnel->stat.collisions++;
682 		goto tx_error;
683 	}
684 
685 	if (dev->header_ops) {
686 		gre_hlen = 0;
687 		tiph = (struct iphdr*)skb->data;
688 	} else {
689 		gre_hlen = tunnel->hlen;
690 		tiph = &tunnel->parms.iph;
691 	}
692 
693 	if ((dst = tiph->daddr) == 0) {
694 		/* NBMA tunnel */
695 
696 		if (skb->dst == NULL) {
697 			tunnel->stat.tx_fifo_errors++;
698 			goto tx_error;
699 		}
700 
701 		if (skb->protocol == htons(ETH_P_IP)) {
702 			rt = (struct rtable*)skb->dst;
703 			if ((dst = rt->rt_gateway) == 0)
704 				goto tx_error_icmp;
705 		}
706 #ifdef CONFIG_IPV6
707 		else if (skb->protocol == htons(ETH_P_IPV6)) {
708 			struct in6_addr *addr6;
709 			int addr_type;
710 			struct neighbour *neigh = skb->dst->neighbour;
711 
712 			if (neigh == NULL)
713 				goto tx_error;
714 
715 			addr6 = (struct in6_addr*)&neigh->primary_key;
716 			addr_type = ipv6_addr_type(addr6);
717 
718 			if (addr_type == IPV6_ADDR_ANY) {
719 				addr6 = &ipv6_hdr(skb)->daddr;
720 				addr_type = ipv6_addr_type(addr6);
721 			}
722 
723 			if ((addr_type & IPV6_ADDR_COMPATv4) == 0)
724 				goto tx_error_icmp;
725 
726 			dst = addr6->s6_addr32[3];
727 		}
728 #endif
729 		else
730 			goto tx_error;
731 	}
732 
733 	tos = tiph->tos;
734 	if (tos&1) {
735 		if (skb->protocol == htons(ETH_P_IP))
736 			tos = old_iph->tos;
737 		tos &= ~1;
738 	}
739 
740 	{
741 		struct flowi fl = { .oif = tunnel->parms.link,
742 				    .nl_u = { .ip4_u =
743 					      { .daddr = dst,
744 						.saddr = tiph->saddr,
745 						.tos = RT_TOS(tos) } },
746 				    .proto = IPPROTO_GRE };
747 		if (ip_route_output_key(&init_net, &rt, &fl)) {
748 			tunnel->stat.tx_carrier_errors++;
749 			goto tx_error;
750 		}
751 	}
752 	tdev = rt->u.dst.dev;
753 
754 	if (tdev == dev) {
755 		ip_rt_put(rt);
756 		tunnel->stat.collisions++;
757 		goto tx_error;
758 	}
759 
760 	df = tiph->frag_off;
761 	if (df)
762 		mtu = dst_mtu(&rt->u.dst) - tunnel->hlen;
763 	else
764 		mtu = skb->dst ? dst_mtu(skb->dst) : dev->mtu;
765 
766 	if (skb->dst)
767 		skb->dst->ops->update_pmtu(skb->dst, mtu);
768 
769 	if (skb->protocol == htons(ETH_P_IP)) {
770 		df |= (old_iph->frag_off&htons(IP_DF));
771 
772 		if ((old_iph->frag_off&htons(IP_DF)) &&
773 		    mtu < ntohs(old_iph->tot_len)) {
774 			icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu));
775 			ip_rt_put(rt);
776 			goto tx_error;
777 		}
778 	}
779 #ifdef CONFIG_IPV6
780 	else if (skb->protocol == htons(ETH_P_IPV6)) {
781 		struct rt6_info *rt6 = (struct rt6_info*)skb->dst;
782 
783 		if (rt6 && mtu < dst_mtu(skb->dst) && mtu >= IPV6_MIN_MTU) {
784 			if ((tunnel->parms.iph.daddr &&
785 			     !ipv4_is_multicast(tunnel->parms.iph.daddr)) ||
786 			    rt6->rt6i_dst.plen == 128) {
787 				rt6->rt6i_flags |= RTF_MODIFIED;
788 				skb->dst->metrics[RTAX_MTU-1] = mtu;
789 			}
790 		}
791 
792 		if (mtu >= IPV6_MIN_MTU && mtu < skb->len - tunnel->hlen + gre_hlen) {
793 			icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, dev);
794 			ip_rt_put(rt);
795 			goto tx_error;
796 		}
797 	}
798 #endif
799 
800 	if (tunnel->err_count > 0) {
801 		if (jiffies - tunnel->err_time < IPTUNNEL_ERR_TIMEO) {
802 			tunnel->err_count--;
803 
804 			dst_link_failure(skb);
805 		} else
806 			tunnel->err_count = 0;
807 	}
808 
809 	max_headroom = LL_RESERVED_SPACE(tdev) + gre_hlen;
810 
811 	if (skb_headroom(skb) < max_headroom || skb_shared(skb)||
812 	    (skb_cloned(skb) && !skb_clone_writable(skb, 0))) {
813 		struct sk_buff *new_skb = skb_realloc_headroom(skb, max_headroom);
814 		if (!new_skb) {
815 			ip_rt_put(rt);
816 			stats->tx_dropped++;
817 			dev_kfree_skb(skb);
818 			tunnel->recursion--;
819 			return 0;
820 		}
821 		if (skb->sk)
822 			skb_set_owner_w(new_skb, skb->sk);
823 		dev_kfree_skb(skb);
824 		skb = new_skb;
825 		old_iph = ip_hdr(skb);
826 	}
827 
828 	skb->transport_header = skb->network_header;
829 	skb_push(skb, gre_hlen);
830 	skb_reset_network_header(skb);
831 	memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
832 	IPCB(skb)->flags &= ~(IPSKB_XFRM_TUNNEL_SIZE | IPSKB_XFRM_TRANSFORMED |
833 			      IPSKB_REROUTED);
834 	dst_release(skb->dst);
835 	skb->dst = &rt->u.dst;
836 
837 	/*
838 	 *	Push down and install the IPIP header.
839 	 */
840 
841 	iph 			=	ip_hdr(skb);
842 	iph->version		=	4;
843 	iph->ihl		=	sizeof(struct iphdr) >> 2;
844 	iph->frag_off		=	df;
845 	iph->protocol		=	IPPROTO_GRE;
846 	iph->tos		=	ipgre_ecn_encapsulate(tos, old_iph, skb);
847 	iph->daddr		=	rt->rt_dst;
848 	iph->saddr		=	rt->rt_src;
849 
850 	if ((iph->ttl = tiph->ttl) == 0) {
851 		if (skb->protocol == htons(ETH_P_IP))
852 			iph->ttl = old_iph->ttl;
853 #ifdef CONFIG_IPV6
854 		else if (skb->protocol == htons(ETH_P_IPV6))
855 			iph->ttl = ((struct ipv6hdr*)old_iph)->hop_limit;
856 #endif
857 		else
858 			iph->ttl = dst_metric(&rt->u.dst, RTAX_HOPLIMIT);
859 	}
860 
861 	((__be16*)(iph+1))[0] = tunnel->parms.o_flags;
862 	((__be16*)(iph+1))[1] = skb->protocol;
863 
864 	if (tunnel->parms.o_flags&(GRE_KEY|GRE_CSUM|GRE_SEQ)) {
865 		__be32 *ptr = (__be32*)(((u8*)iph) + tunnel->hlen - 4);
866 
867 		if (tunnel->parms.o_flags&GRE_SEQ) {
868 			++tunnel->o_seqno;
869 			*ptr = htonl(tunnel->o_seqno);
870 			ptr--;
871 		}
872 		if (tunnel->parms.o_flags&GRE_KEY) {
873 			*ptr = tunnel->parms.o_key;
874 			ptr--;
875 		}
876 		if (tunnel->parms.o_flags&GRE_CSUM) {
877 			*ptr = 0;
878 			*(__sum16*)ptr = ip_compute_csum((void*)(iph+1), skb->len - sizeof(struct iphdr));
879 		}
880 	}
881 
882 	nf_reset(skb);
883 
884 	IPTUNNEL_XMIT();
885 	tunnel->recursion--;
886 	return 0;
887 
888 tx_error_icmp:
889 	dst_link_failure(skb);
890 
891 tx_error:
892 	stats->tx_errors++;
893 	dev_kfree_skb(skb);
894 	tunnel->recursion--;
895 	return 0;
896 }
897 
898 static void ipgre_tunnel_bind_dev(struct net_device *dev)
899 {
900 	struct net_device *tdev = NULL;
901 	struct ip_tunnel *tunnel;
902 	struct iphdr *iph;
903 	int hlen = LL_MAX_HEADER;
904 	int mtu = ETH_DATA_LEN;
905 	int addend = sizeof(struct iphdr) + 4;
906 
907 	tunnel = netdev_priv(dev);
908 	iph = &tunnel->parms.iph;
909 
910 	/* Guess output device to choose reasonable mtu and hard_header_len */
911 
912 	if (iph->daddr) {
913 		struct flowi fl = { .oif = tunnel->parms.link,
914 				    .nl_u = { .ip4_u =
915 					      { .daddr = iph->daddr,
916 						.saddr = iph->saddr,
917 						.tos = RT_TOS(iph->tos) } },
918 				    .proto = IPPROTO_GRE };
919 		struct rtable *rt;
920 		if (!ip_route_output_key(&init_net, &rt, &fl)) {
921 			tdev = rt->u.dst.dev;
922 			ip_rt_put(rt);
923 		}
924 		dev->flags |= IFF_POINTOPOINT;
925 	}
926 
927 	if (!tdev && tunnel->parms.link)
928 		tdev = __dev_get_by_index(&init_net, tunnel->parms.link);
929 
930 	if (tdev) {
931 		hlen = tdev->hard_header_len;
932 		mtu = tdev->mtu;
933 	}
934 	dev->iflink = tunnel->parms.link;
935 
936 	/* Precalculate GRE options length */
937 	if (tunnel->parms.o_flags&(GRE_CSUM|GRE_KEY|GRE_SEQ)) {
938 		if (tunnel->parms.o_flags&GRE_CSUM)
939 			addend += 4;
940 		if (tunnel->parms.o_flags&GRE_KEY)
941 			addend += 4;
942 		if (tunnel->parms.o_flags&GRE_SEQ)
943 			addend += 4;
944 	}
945 	dev->hard_header_len = hlen + addend;
946 	dev->mtu = mtu - addend;
947 	tunnel->hlen = addend;
948 
949 }
950 
951 static int
952 ipgre_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd)
953 {
954 	int err = 0;
955 	struct ip_tunnel_parm p;
956 	struct ip_tunnel *t;
957 
958 	switch (cmd) {
959 	case SIOCGETTUNNEL:
960 		t = NULL;
961 		if (dev == ipgre_fb_tunnel_dev) {
962 			if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) {
963 				err = -EFAULT;
964 				break;
965 			}
966 			t = ipgre_tunnel_locate(&p, 0);
967 		}
968 		if (t == NULL)
969 			t = netdev_priv(dev);
970 		memcpy(&p, &t->parms, sizeof(p));
971 		if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p)))
972 			err = -EFAULT;
973 		break;
974 
975 	case SIOCADDTUNNEL:
976 	case SIOCCHGTUNNEL:
977 		err = -EPERM;
978 		if (!capable(CAP_NET_ADMIN))
979 			goto done;
980 
981 		err = -EFAULT;
982 		if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
983 			goto done;
984 
985 		err = -EINVAL;
986 		if (p.iph.version != 4 || p.iph.protocol != IPPROTO_GRE ||
987 		    p.iph.ihl != 5 || (p.iph.frag_off&htons(~IP_DF)) ||
988 		    ((p.i_flags|p.o_flags)&(GRE_VERSION|GRE_ROUTING)))
989 			goto done;
990 		if (p.iph.ttl)
991 			p.iph.frag_off |= htons(IP_DF);
992 
993 		if (!(p.i_flags&GRE_KEY))
994 			p.i_key = 0;
995 		if (!(p.o_flags&GRE_KEY))
996 			p.o_key = 0;
997 
998 		t = ipgre_tunnel_locate(&p, cmd == SIOCADDTUNNEL);
999 
1000 		if (dev != ipgre_fb_tunnel_dev && cmd == SIOCCHGTUNNEL) {
1001 			if (t != NULL) {
1002 				if (t->dev != dev) {
1003 					err = -EEXIST;
1004 					break;
1005 				}
1006 			} else {
1007 				unsigned nflags=0;
1008 
1009 				t = netdev_priv(dev);
1010 
1011 				if (ipv4_is_multicast(p.iph.daddr))
1012 					nflags = IFF_BROADCAST;
1013 				else if (p.iph.daddr)
1014 					nflags = IFF_POINTOPOINT;
1015 
1016 				if ((dev->flags^nflags)&(IFF_POINTOPOINT|IFF_BROADCAST)) {
1017 					err = -EINVAL;
1018 					break;
1019 				}
1020 				ipgre_tunnel_unlink(t);
1021 				t->parms.iph.saddr = p.iph.saddr;
1022 				t->parms.iph.daddr = p.iph.daddr;
1023 				t->parms.i_key = p.i_key;
1024 				t->parms.o_key = p.o_key;
1025 				memcpy(dev->dev_addr, &p.iph.saddr, 4);
1026 				memcpy(dev->broadcast, &p.iph.daddr, 4);
1027 				ipgre_tunnel_link(t);
1028 				netdev_state_change(dev);
1029 			}
1030 		}
1031 
1032 		if (t) {
1033 			err = 0;
1034 			if (cmd == SIOCCHGTUNNEL) {
1035 				t->parms.iph.ttl = p.iph.ttl;
1036 				t->parms.iph.tos = p.iph.tos;
1037 				t->parms.iph.frag_off = p.iph.frag_off;
1038 				if (t->parms.link != p.link) {
1039 					t->parms.link = p.link;
1040 					ipgre_tunnel_bind_dev(dev);
1041 					netdev_state_change(dev);
1042 				}
1043 			}
1044 			if (copy_to_user(ifr->ifr_ifru.ifru_data, &t->parms, sizeof(p)))
1045 				err = -EFAULT;
1046 		} else
1047 			err = (cmd == SIOCADDTUNNEL ? -ENOBUFS : -ENOENT);
1048 		break;
1049 
1050 	case SIOCDELTUNNEL:
1051 		err = -EPERM;
1052 		if (!capable(CAP_NET_ADMIN))
1053 			goto done;
1054 
1055 		if (dev == ipgre_fb_tunnel_dev) {
1056 			err = -EFAULT;
1057 			if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
1058 				goto done;
1059 			err = -ENOENT;
1060 			if ((t = ipgre_tunnel_locate(&p, 0)) == NULL)
1061 				goto done;
1062 			err = -EPERM;
1063 			if (t == netdev_priv(ipgre_fb_tunnel_dev))
1064 				goto done;
1065 			dev = t->dev;
1066 		}
1067 		unregister_netdevice(dev);
1068 		err = 0;
1069 		break;
1070 
1071 	default:
1072 		err = -EINVAL;
1073 	}
1074 
1075 done:
1076 	return err;
1077 }
1078 
1079 static struct net_device_stats *ipgre_tunnel_get_stats(struct net_device *dev)
1080 {
1081 	return &(((struct ip_tunnel*)netdev_priv(dev))->stat);
1082 }
1083 
1084 static int ipgre_tunnel_change_mtu(struct net_device *dev, int new_mtu)
1085 {
1086 	struct ip_tunnel *tunnel = netdev_priv(dev);
1087 	if (new_mtu < 68 || new_mtu > 0xFFF8 - tunnel->hlen)
1088 		return -EINVAL;
1089 	dev->mtu = new_mtu;
1090 	return 0;
1091 }
1092 
1093 /* Nice toy. Unfortunately, useless in real life :-)
1094    It allows to construct virtual multiprotocol broadcast "LAN"
1095    over the Internet, provided multicast routing is tuned.
1096 
1097 
1098    I have no idea was this bicycle invented before me,
1099    so that I had to set ARPHRD_IPGRE to a random value.
1100    I have an impression, that Cisco could make something similar,
1101    but this feature is apparently missing in IOS<=11.2(8).
1102 
1103    I set up 10.66.66/24 and fec0:6666:6666::0/96 as virtual networks
1104    with broadcast 224.66.66.66. If you have access to mbone, play with me :-)
1105 
1106    ping -t 255 224.66.66.66
1107 
1108    If nobody answers, mbone does not work.
1109 
1110    ip tunnel add Universe mode gre remote 224.66.66.66 local <Your_real_addr> ttl 255
1111    ip addr add 10.66.66.<somewhat>/24 dev Universe
1112    ifconfig Universe up
1113    ifconfig Universe add fe80::<Your_real_addr>/10
1114    ifconfig Universe add fec0:6666:6666::<Your_real_addr>/96
1115    ftp 10.66.66.66
1116    ...
1117    ftp fec0:6666:6666::193.233.7.65
1118    ...
1119 
1120  */
1121 
1122 static int ipgre_header(struct sk_buff *skb, struct net_device *dev,
1123 			unsigned short type,
1124 			const void *daddr, const void *saddr, unsigned len)
1125 {
1126 	struct ip_tunnel *t = netdev_priv(dev);
1127 	struct iphdr *iph = (struct iphdr *)skb_push(skb, t->hlen);
1128 	__be16 *p = (__be16*)(iph+1);
1129 
1130 	memcpy(iph, &t->parms.iph, sizeof(struct iphdr));
1131 	p[0]		= t->parms.o_flags;
1132 	p[1]		= htons(type);
1133 
1134 	/*
1135 	 *	Set the source hardware address.
1136 	 */
1137 
1138 	if (saddr)
1139 		memcpy(&iph->saddr, saddr, 4);
1140 
1141 	if (daddr) {
1142 		memcpy(&iph->daddr, daddr, 4);
1143 		return t->hlen;
1144 	}
1145 	if (iph->daddr && !ipv4_is_multicast(iph->daddr))
1146 		return t->hlen;
1147 
1148 	return -t->hlen;
1149 }
1150 
1151 static int ipgre_header_parse(const struct sk_buff *skb, unsigned char *haddr)
1152 {
1153 	struct iphdr *iph = (struct iphdr*) skb_mac_header(skb);
1154 	memcpy(haddr, &iph->saddr, 4);
1155 	return 4;
1156 }
1157 
1158 static const struct header_ops ipgre_header_ops = {
1159 	.create	= ipgre_header,
1160 	.parse	= ipgre_header_parse,
1161 };
1162 
1163 #ifdef CONFIG_NET_IPGRE_BROADCAST
1164 static int ipgre_open(struct net_device *dev)
1165 {
1166 	struct ip_tunnel *t = netdev_priv(dev);
1167 
1168 	if (ipv4_is_multicast(t->parms.iph.daddr)) {
1169 		struct flowi fl = { .oif = t->parms.link,
1170 				    .nl_u = { .ip4_u =
1171 					      { .daddr = t->parms.iph.daddr,
1172 						.saddr = t->parms.iph.saddr,
1173 						.tos = RT_TOS(t->parms.iph.tos) } },
1174 				    .proto = IPPROTO_GRE };
1175 		struct rtable *rt;
1176 		if (ip_route_output_key(&init_net, &rt, &fl))
1177 			return -EADDRNOTAVAIL;
1178 		dev = rt->u.dst.dev;
1179 		ip_rt_put(rt);
1180 		if (__in_dev_get_rtnl(dev) == NULL)
1181 			return -EADDRNOTAVAIL;
1182 		t->mlink = dev->ifindex;
1183 		ip_mc_inc_group(__in_dev_get_rtnl(dev), t->parms.iph.daddr);
1184 	}
1185 	return 0;
1186 }
1187 
1188 static int ipgre_close(struct net_device *dev)
1189 {
1190 	struct ip_tunnel *t = netdev_priv(dev);
1191 	if (ipv4_is_multicast(t->parms.iph.daddr) && t->mlink) {
1192 		struct in_device *in_dev;
1193 		in_dev = inetdev_by_index(dev->nd_net, t->mlink);
1194 		if (in_dev) {
1195 			ip_mc_dec_group(in_dev, t->parms.iph.daddr);
1196 			in_dev_put(in_dev);
1197 		}
1198 	}
1199 	return 0;
1200 }
1201 
1202 #endif
1203 
1204 static void ipgre_tunnel_setup(struct net_device *dev)
1205 {
1206 	dev->uninit		= ipgre_tunnel_uninit;
1207 	dev->destructor 	= free_netdev;
1208 	dev->hard_start_xmit	= ipgre_tunnel_xmit;
1209 	dev->get_stats		= ipgre_tunnel_get_stats;
1210 	dev->do_ioctl		= ipgre_tunnel_ioctl;
1211 	dev->change_mtu		= ipgre_tunnel_change_mtu;
1212 
1213 	dev->type		= ARPHRD_IPGRE;
1214 	dev->hard_header_len 	= LL_MAX_HEADER + sizeof(struct iphdr) + 4;
1215 	dev->mtu		= ETH_DATA_LEN - sizeof(struct iphdr) - 4;
1216 	dev->flags		= IFF_NOARP;
1217 	dev->iflink		= 0;
1218 	dev->addr_len		= 4;
1219 }
1220 
1221 static int ipgre_tunnel_init(struct net_device *dev)
1222 {
1223 	struct ip_tunnel *tunnel;
1224 	struct iphdr *iph;
1225 
1226 	tunnel = netdev_priv(dev);
1227 	iph = &tunnel->parms.iph;
1228 
1229 	tunnel->dev = dev;
1230 	strcpy(tunnel->parms.name, dev->name);
1231 
1232 	memcpy(dev->dev_addr, &tunnel->parms.iph.saddr, 4);
1233 	memcpy(dev->broadcast, &tunnel->parms.iph.daddr, 4);
1234 
1235 	ipgre_tunnel_bind_dev(dev);
1236 
1237 	if (iph->daddr) {
1238 #ifdef CONFIG_NET_IPGRE_BROADCAST
1239 		if (ipv4_is_multicast(iph->daddr)) {
1240 			if (!iph->saddr)
1241 				return -EINVAL;
1242 			dev->flags = IFF_BROADCAST;
1243 			dev->header_ops = &ipgre_header_ops;
1244 			dev->open = ipgre_open;
1245 			dev->stop = ipgre_close;
1246 		}
1247 #endif
1248 	} else
1249 		dev->header_ops = &ipgre_header_ops;
1250 
1251 	return 0;
1252 }
1253 
1254 static int __init ipgre_fb_tunnel_init(struct net_device *dev)
1255 {
1256 	struct ip_tunnel *tunnel = netdev_priv(dev);
1257 	struct iphdr *iph = &tunnel->parms.iph;
1258 
1259 	tunnel->dev = dev;
1260 	strcpy(tunnel->parms.name, dev->name);
1261 
1262 	iph->version		= 4;
1263 	iph->protocol		= IPPROTO_GRE;
1264 	iph->ihl		= 5;
1265 	tunnel->hlen		= sizeof(struct iphdr) + 4;
1266 
1267 	dev_hold(dev);
1268 	tunnels_wc[0]		= tunnel;
1269 	return 0;
1270 }
1271 
1272 
1273 static struct net_protocol ipgre_protocol = {
1274 	.handler	=	ipgre_rcv,
1275 	.err_handler	=	ipgre_err,
1276 };
1277 
1278 
1279 /*
1280  *	And now the modules code and kernel interface.
1281  */
1282 
1283 static int __init ipgre_init(void)
1284 {
1285 	int err;
1286 
1287 	printk(KERN_INFO "GRE over IPv4 tunneling driver\n");
1288 
1289 	if (inet_add_protocol(&ipgre_protocol, IPPROTO_GRE) < 0) {
1290 		printk(KERN_INFO "ipgre init: can't add protocol\n");
1291 		return -EAGAIN;
1292 	}
1293 
1294 	ipgre_fb_tunnel_dev = alloc_netdev(sizeof(struct ip_tunnel), "gre0",
1295 					   ipgre_tunnel_setup);
1296 	if (!ipgre_fb_tunnel_dev) {
1297 		err = -ENOMEM;
1298 		goto err1;
1299 	}
1300 
1301 	ipgre_fb_tunnel_dev->init = ipgre_fb_tunnel_init;
1302 
1303 	if ((err = register_netdev(ipgre_fb_tunnel_dev)))
1304 		goto err2;
1305 out:
1306 	return err;
1307 err2:
1308 	free_netdev(ipgre_fb_tunnel_dev);
1309 err1:
1310 	inet_del_protocol(&ipgre_protocol, IPPROTO_GRE);
1311 	goto out;
1312 }
1313 
1314 static void __exit ipgre_destroy_tunnels(void)
1315 {
1316 	int prio;
1317 
1318 	for (prio = 0; prio < 4; prio++) {
1319 		int h;
1320 		for (h = 0; h < HASH_SIZE; h++) {
1321 			struct ip_tunnel *t;
1322 			while ((t = tunnels[prio][h]) != NULL)
1323 				unregister_netdevice(t->dev);
1324 		}
1325 	}
1326 }
1327 
1328 static void __exit ipgre_fini(void)
1329 {
1330 	if (inet_del_protocol(&ipgre_protocol, IPPROTO_GRE) < 0)
1331 		printk(KERN_INFO "ipgre close: can't remove protocol\n");
1332 
1333 	rtnl_lock();
1334 	ipgre_destroy_tunnels();
1335 	rtnl_unlock();
1336 }
1337 
1338 module_init(ipgre_init);
1339 module_exit(ipgre_fini);
1340 MODULE_LICENSE("GPL");
1341