xref: /openbmc/linux/net/ipv4/ipip.c (revision 87c2ce3b)
1 /*
2  *	Linux NET3:	IP/IP protocol decoder.
3  *
4  *	Version: $Id: ipip.c,v 1.50 2001/10/02 02:22:36 davem Exp $
5  *
6  *	Authors:
7  *		Sam Lantinga (slouken@cs.ucdavis.edu)  02/01/95
8  *
9  *	Fixes:
10  *		Alan Cox	:	Merged and made usable non modular (its so tiny its silly as
11  *					a module taking up 2 pages).
12  *		Alan Cox	: 	Fixed bug with 1.3.18 and IPIP not working (now needs to set skb->h.iph)
13  *					to keep ip_forward happy.
14  *		Alan Cox	:	More fixes for 1.3.21, and firewall fix. Maybe this will work soon 8).
15  *		Kai Schulte	:	Fixed #defines for IP_FIREWALL->FIREWALL
16  *              David Woodhouse :       Perform some basic ICMP handling.
17  *                                      IPIP Routing without decapsulation.
18  *              Carlos Picoto   :       GRE over IP support
19  *		Alexey Kuznetsov:	Reworked. Really, now it is truncated version of ipv4/ip_gre.c.
20  *					I do not want to merge them together.
21  *
22  *	This program is free software; you can redistribute it and/or
23  *	modify it under the terms of the GNU General Public License
24  *	as published by the Free Software Foundation; either version
25  *	2 of the License, or (at your option) any later version.
26  *
27  */
28 
29 /* tunnel.c: an IP tunnel driver
30 
31 	The purpose of this driver is to provide an IP tunnel through
32 	which you can tunnel network traffic transparently across subnets.
33 
34 	This was written by looking at Nick Holloway's dummy driver
35 	Thanks for the great code!
36 
37 		-Sam Lantinga	(slouken@cs.ucdavis.edu)  02/01/95
38 
39 	Minor tweaks:
40 		Cleaned up the code a little and added some pre-1.3.0 tweaks.
41 		dev->hard_header/hard_header_len changed to use no headers.
42 		Comments/bracketing tweaked.
43 		Made the tunnels use dev->name not tunnel: when error reporting.
44 		Added tx_dropped stat
45 
46 		-Alan Cox	(Alan.Cox@linux.org) 21 March 95
47 
48 	Reworked:
49 		Changed to tunnel to destination gateway in addition to the
50 			tunnel's pointopoint address
51 		Almost completely rewritten
52 		Note:  There is currently no firewall or ICMP handling done.
53 
54 		-Sam Lantinga	(slouken@cs.ucdavis.edu) 02/13/96
55 
56 */
57 
58 /* Things I wish I had known when writing the tunnel driver:
59 
60 	When the tunnel_xmit() function is called, the skb contains the
61 	packet to be sent (plus a great deal of extra info), and dev
62 	contains the tunnel device that _we_ are.
63 
64 	When we are passed a packet, we are expected to fill in the
65 	source address with our source IP address.
66 
67 	What is the proper way to allocate, copy and free a buffer?
68 	After you allocate it, it is a "0 length" chunk of memory
69 	starting at zero.  If you want to add headers to the buffer
70 	later, you'll have to call "skb_reserve(skb, amount)" with
71 	the amount of memory you want reserved.  Then, you call
72 	"skb_put(skb, amount)" with the amount of space you want in
73 	the buffer.  skb_put() returns a pointer to the top (#0) of
74 	that buffer.  skb->len is set to the amount of space you have
75 	"allocated" with skb_put().  You can then write up to skb->len
76 	bytes to that buffer.  If you need more, you can call skb_put()
77 	again with the additional amount of space you need.  You can
78 	find out how much more space you can allocate by calling
79 	"skb_tailroom(skb)".
80 	Now, to add header space, call "skb_push(skb, header_len)".
81 	This creates space at the beginning of the buffer and returns
82 	a pointer to this new space.  If later you need to strip a
83 	header from a buffer, call "skb_pull(skb, header_len)".
84 	skb_headroom() will return how much space is left at the top
85 	of the buffer (before the main data).  Remember, this headroom
86 	space must be reserved before the skb_put() function is called.
87 	*/
88 
89 /*
90    This version of net/ipv4/ipip.c is cloned of net/ipv4/ip_gre.c
91 
92    For comments look at net/ipv4/ip_gre.c --ANK
93  */
94 
95 
96 #include <linux/config.h>
97 #include <linux/module.h>
98 #include <linux/types.h>
99 #include <linux/sched.h>
100 #include <linux/kernel.h>
101 #include <asm/uaccess.h>
102 #include <linux/skbuff.h>
103 #include <linux/netdevice.h>
104 #include <linux/in.h>
105 #include <linux/tcp.h>
106 #include <linux/udp.h>
107 #include <linux/if_arp.h>
108 #include <linux/mroute.h>
109 #include <linux/init.h>
110 #include <linux/netfilter_ipv4.h>
111 #include <linux/if_ether.h>
112 
113 #include <net/sock.h>
114 #include <net/ip.h>
115 #include <net/icmp.h>
116 #include <net/protocol.h>
117 #include <net/ipip.h>
118 #include <net/inet_ecn.h>
119 #include <net/xfrm.h>
120 
121 #define HASH_SIZE  16
122 #define HASH(addr) ((addr^(addr>>4))&0xF)
123 
124 static int ipip_fb_tunnel_init(struct net_device *dev);
125 static int ipip_tunnel_init(struct net_device *dev);
126 static void ipip_tunnel_setup(struct net_device *dev);
127 
128 static struct net_device *ipip_fb_tunnel_dev;
129 
130 static struct ip_tunnel *tunnels_r_l[HASH_SIZE];
131 static struct ip_tunnel *tunnels_r[HASH_SIZE];
132 static struct ip_tunnel *tunnels_l[HASH_SIZE];
133 static struct ip_tunnel *tunnels_wc[1];
134 static struct ip_tunnel **tunnels[4] = { tunnels_wc, tunnels_l, tunnels_r, tunnels_r_l };
135 
136 static DEFINE_RWLOCK(ipip_lock);
137 
138 static struct ip_tunnel * ipip_tunnel_lookup(u32 remote, u32 local)
139 {
140 	unsigned h0 = HASH(remote);
141 	unsigned h1 = HASH(local);
142 	struct ip_tunnel *t;
143 
144 	for (t = tunnels_r_l[h0^h1]; t; t = t->next) {
145 		if (local == t->parms.iph.saddr &&
146 		    remote == t->parms.iph.daddr && (t->dev->flags&IFF_UP))
147 			return t;
148 	}
149 	for (t = tunnels_r[h0]; t; t = t->next) {
150 		if (remote == t->parms.iph.daddr && (t->dev->flags&IFF_UP))
151 			return t;
152 	}
153 	for (t = tunnels_l[h1]; t; t = t->next) {
154 		if (local == t->parms.iph.saddr && (t->dev->flags&IFF_UP))
155 			return t;
156 	}
157 	if ((t = tunnels_wc[0]) != NULL && (t->dev->flags&IFF_UP))
158 		return t;
159 	return NULL;
160 }
161 
162 static struct ip_tunnel **ipip_bucket(struct ip_tunnel *t)
163 {
164 	u32 remote = t->parms.iph.daddr;
165 	u32 local = t->parms.iph.saddr;
166 	unsigned h = 0;
167 	int prio = 0;
168 
169 	if (remote) {
170 		prio |= 2;
171 		h ^= HASH(remote);
172 	}
173 	if (local) {
174 		prio |= 1;
175 		h ^= HASH(local);
176 	}
177 	return &tunnels[prio][h];
178 }
179 
180 
181 static void ipip_tunnel_unlink(struct ip_tunnel *t)
182 {
183 	struct ip_tunnel **tp;
184 
185 	for (tp = ipip_bucket(t); *tp; tp = &(*tp)->next) {
186 		if (t == *tp) {
187 			write_lock_bh(&ipip_lock);
188 			*tp = t->next;
189 			write_unlock_bh(&ipip_lock);
190 			break;
191 		}
192 	}
193 }
194 
195 static void ipip_tunnel_link(struct ip_tunnel *t)
196 {
197 	struct ip_tunnel **tp = ipip_bucket(t);
198 
199 	t->next = *tp;
200 	write_lock_bh(&ipip_lock);
201 	*tp = t;
202 	write_unlock_bh(&ipip_lock);
203 }
204 
205 static struct ip_tunnel * ipip_tunnel_locate(struct ip_tunnel_parm *parms, int create)
206 {
207 	u32 remote = parms->iph.daddr;
208 	u32 local = parms->iph.saddr;
209 	struct ip_tunnel *t, **tp, *nt;
210 	struct net_device *dev;
211 	unsigned h = 0;
212 	int prio = 0;
213 	char name[IFNAMSIZ];
214 
215 	if (remote) {
216 		prio |= 2;
217 		h ^= HASH(remote);
218 	}
219 	if (local) {
220 		prio |= 1;
221 		h ^= HASH(local);
222 	}
223 	for (tp = &tunnels[prio][h]; (t = *tp) != NULL; tp = &t->next) {
224 		if (local == t->parms.iph.saddr && remote == t->parms.iph.daddr)
225 			return t;
226 	}
227 	if (!create)
228 		return NULL;
229 
230 	if (parms->name[0])
231 		strlcpy(name, parms->name, IFNAMSIZ);
232 	else {
233 		int i;
234 		for (i=1; i<100; i++) {
235 			sprintf(name, "tunl%d", i);
236 			if (__dev_get_by_name(name) == NULL)
237 				break;
238 		}
239 		if (i==100)
240 			goto failed;
241 	}
242 
243 	dev = alloc_netdev(sizeof(*t), name, ipip_tunnel_setup);
244 	if (dev == NULL)
245 		return NULL;
246 
247 	nt = netdev_priv(dev);
248 	SET_MODULE_OWNER(dev);
249 	dev->init = ipip_tunnel_init;
250 	nt->parms = *parms;
251 
252 	if (register_netdevice(dev) < 0) {
253 		free_netdev(dev);
254 		goto failed;
255 	}
256 
257 	dev_hold(dev);
258 	ipip_tunnel_link(nt);
259 	return nt;
260 
261 failed:
262 	return NULL;
263 }
264 
265 static void ipip_tunnel_uninit(struct net_device *dev)
266 {
267 	if (dev == ipip_fb_tunnel_dev) {
268 		write_lock_bh(&ipip_lock);
269 		tunnels_wc[0] = NULL;
270 		write_unlock_bh(&ipip_lock);
271 	} else
272 		ipip_tunnel_unlink(netdev_priv(dev));
273 	dev_put(dev);
274 }
275 
276 static void ipip_err(struct sk_buff *skb, u32 info)
277 {
278 #ifndef I_WISH_WORLD_WERE_PERFECT
279 
280 /* It is not :-( All the routers (except for Linux) return only
281    8 bytes of packet payload. It means, that precise relaying of
282    ICMP in the real Internet is absolutely infeasible.
283  */
284 	struct iphdr *iph = (struct iphdr*)skb->data;
285 	int type = skb->h.icmph->type;
286 	int code = skb->h.icmph->code;
287 	struct ip_tunnel *t;
288 
289 	switch (type) {
290 	default:
291 	case ICMP_PARAMETERPROB:
292 		return;
293 
294 	case ICMP_DEST_UNREACH:
295 		switch (code) {
296 		case ICMP_SR_FAILED:
297 		case ICMP_PORT_UNREACH:
298 			/* Impossible event. */
299 			return;
300 		case ICMP_FRAG_NEEDED:
301 			/* Soft state for pmtu is maintained by IP core. */
302 			return;
303 		default:
304 			/* All others are translated to HOST_UNREACH.
305 			   rfc2003 contains "deep thoughts" about NET_UNREACH,
306 			   I believe they are just ether pollution. --ANK
307 			 */
308 			break;
309 		}
310 		break;
311 	case ICMP_TIME_EXCEEDED:
312 		if (code != ICMP_EXC_TTL)
313 			return;
314 		break;
315 	}
316 
317 	read_lock(&ipip_lock);
318 	t = ipip_tunnel_lookup(iph->daddr, iph->saddr);
319 	if (t == NULL || t->parms.iph.daddr == 0)
320 		goto out;
321 	if (t->parms.iph.ttl == 0 && type == ICMP_TIME_EXCEEDED)
322 		goto out;
323 
324 	if (jiffies - t->err_time < IPTUNNEL_ERR_TIMEO)
325 		t->err_count++;
326 	else
327 		t->err_count = 1;
328 	t->err_time = jiffies;
329 out:
330 	read_unlock(&ipip_lock);
331 	return;
332 #else
333 	struct iphdr *iph = (struct iphdr*)dp;
334 	int hlen = iph->ihl<<2;
335 	struct iphdr *eiph;
336 	int type = skb->h.icmph->type;
337 	int code = skb->h.icmph->code;
338 	int rel_type = 0;
339 	int rel_code = 0;
340 	int rel_info = 0;
341 	struct sk_buff *skb2;
342 	struct flowi fl;
343 	struct rtable *rt;
344 
345 	if (len < hlen + sizeof(struct iphdr))
346 		return;
347 	eiph = (struct iphdr*)(dp + hlen);
348 
349 	switch (type) {
350 	default:
351 		return;
352 	case ICMP_PARAMETERPROB:
353 		if (skb->h.icmph->un.gateway < hlen)
354 			return;
355 
356 		/* So... This guy found something strange INSIDE encapsulated
357 		   packet. Well, he is fool, but what can we do ?
358 		 */
359 		rel_type = ICMP_PARAMETERPROB;
360 		rel_info = skb->h.icmph->un.gateway - hlen;
361 		break;
362 
363 	case ICMP_DEST_UNREACH:
364 		switch (code) {
365 		case ICMP_SR_FAILED:
366 		case ICMP_PORT_UNREACH:
367 			/* Impossible event. */
368 			return;
369 		case ICMP_FRAG_NEEDED:
370 			/* And it is the only really necessary thing :-) */
371 			rel_info = ntohs(skb->h.icmph->un.frag.mtu);
372 			if (rel_info < hlen+68)
373 				return;
374 			rel_info -= hlen;
375 			/* BSD 4.2 MORE DOES NOT EXIST IN NATURE. */
376 			if (rel_info > ntohs(eiph->tot_len))
377 				return;
378 			break;
379 		default:
380 			/* All others are translated to HOST_UNREACH.
381 			   rfc2003 contains "deep thoughts" about NET_UNREACH,
382 			   I believe, it is just ether pollution. --ANK
383 			 */
384 			rel_type = ICMP_DEST_UNREACH;
385 			rel_code = ICMP_HOST_UNREACH;
386 			break;
387 		}
388 		break;
389 	case ICMP_TIME_EXCEEDED:
390 		if (code != ICMP_EXC_TTL)
391 			return;
392 		break;
393 	}
394 
395 	/* Prepare fake skb to feed it to icmp_send */
396 	skb2 = skb_clone(skb, GFP_ATOMIC);
397 	if (skb2 == NULL)
398 		return;
399 	dst_release(skb2->dst);
400 	skb2->dst = NULL;
401 	skb_pull(skb2, skb->data - (u8*)eiph);
402 	skb2->nh.raw = skb2->data;
403 
404 	/* Try to guess incoming interface */
405 	memset(&fl, 0, sizeof(fl));
406 	fl.fl4_daddr = eiph->saddr;
407 	fl.fl4_tos = RT_TOS(eiph->tos);
408 	fl.proto = IPPROTO_IPIP;
409 	if (ip_route_output_key(&rt, &key)) {
410 		kfree_skb(skb2);
411 		return;
412 	}
413 	skb2->dev = rt->u.dst.dev;
414 
415 	/* route "incoming" packet */
416 	if (rt->rt_flags&RTCF_LOCAL) {
417 		ip_rt_put(rt);
418 		rt = NULL;
419 		fl.fl4_daddr = eiph->daddr;
420 		fl.fl4_src = eiph->saddr;
421 		fl.fl4_tos = eiph->tos;
422 		if (ip_route_output_key(&rt, &fl) ||
423 		    rt->u.dst.dev->type != ARPHRD_TUNNEL) {
424 			ip_rt_put(rt);
425 			kfree_skb(skb2);
426 			return;
427 		}
428 	} else {
429 		ip_rt_put(rt);
430 		if (ip_route_input(skb2, eiph->daddr, eiph->saddr, eiph->tos, skb2->dev) ||
431 		    skb2->dst->dev->type != ARPHRD_TUNNEL) {
432 			kfree_skb(skb2);
433 			return;
434 		}
435 	}
436 
437 	/* change mtu on this route */
438 	if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED) {
439 		if (rel_info > dst_mtu(skb2->dst)) {
440 			kfree_skb(skb2);
441 			return;
442 		}
443 		skb2->dst->ops->update_pmtu(skb2->dst, rel_info);
444 		rel_info = htonl(rel_info);
445 	} else if (type == ICMP_TIME_EXCEEDED) {
446 		struct ip_tunnel *t = netdev_priv(skb2->dev);
447 		if (t->parms.iph.ttl) {
448 			rel_type = ICMP_DEST_UNREACH;
449 			rel_code = ICMP_HOST_UNREACH;
450 		}
451 	}
452 
453 	icmp_send(skb2, rel_type, rel_code, rel_info);
454 	kfree_skb(skb2);
455 	return;
456 #endif
457 }
458 
459 static inline void ipip_ecn_decapsulate(struct iphdr *outer_iph, struct sk_buff *skb)
460 {
461 	struct iphdr *inner_iph = skb->nh.iph;
462 
463 	if (INET_ECN_is_ce(outer_iph->tos))
464 		IP_ECN_set_ce(inner_iph);
465 }
466 
467 static int ipip_rcv(struct sk_buff *skb)
468 {
469 	struct iphdr *iph;
470 	struct ip_tunnel *tunnel;
471 
472 	if (!pskb_may_pull(skb, sizeof(struct iphdr)))
473 		goto out;
474 
475 	iph = skb->nh.iph;
476 
477 	read_lock(&ipip_lock);
478 	if ((tunnel = ipip_tunnel_lookup(iph->saddr, iph->daddr)) != NULL) {
479 		if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) {
480 			read_unlock(&ipip_lock);
481 			kfree_skb(skb);
482 			return 0;
483 		}
484 
485 		secpath_reset(skb);
486 
487 		skb->mac.raw = skb->nh.raw;
488 		skb->nh.raw = skb->data;
489 		memset(&(IPCB(skb)->opt), 0, sizeof(struct ip_options));
490 		skb->protocol = htons(ETH_P_IP);
491 		skb->pkt_type = PACKET_HOST;
492 
493 		tunnel->stat.rx_packets++;
494 		tunnel->stat.rx_bytes += skb->len;
495 		skb->dev = tunnel->dev;
496 		dst_release(skb->dst);
497 		skb->dst = NULL;
498 		nf_reset(skb);
499 		ipip_ecn_decapsulate(iph, skb);
500 		netif_rx(skb);
501 		read_unlock(&ipip_lock);
502 		return 0;
503 	}
504 	read_unlock(&ipip_lock);
505 
506 out:
507 	return -1;
508 }
509 
510 /*
511  *	This function assumes it is being called from dev_queue_xmit()
512  *	and that skb is filled properly by that function.
513  */
514 
515 static int ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
516 {
517 	struct ip_tunnel *tunnel = netdev_priv(dev);
518 	struct net_device_stats *stats = &tunnel->stat;
519 	struct iphdr  *tiph = &tunnel->parms.iph;
520 	u8     tos = tunnel->parms.iph.tos;
521 	u16    df = tiph->frag_off;
522 	struct rtable *rt;     			/* Route to the other host */
523 	struct net_device *tdev;			/* Device to other host */
524 	struct iphdr  *old_iph = skb->nh.iph;
525 	struct iphdr  *iph;			/* Our new IP header */
526 	int    max_headroom;			/* The extra header space needed */
527 	u32    dst = tiph->daddr;
528 	int    mtu;
529 
530 	if (tunnel->recursion++) {
531 		tunnel->stat.collisions++;
532 		goto tx_error;
533 	}
534 
535 	if (skb->protocol != htons(ETH_P_IP))
536 		goto tx_error;
537 
538 	if (tos&1)
539 		tos = old_iph->tos;
540 
541 	if (!dst) {
542 		/* NBMA tunnel */
543 		if ((rt = (struct rtable*)skb->dst) == NULL) {
544 			tunnel->stat.tx_fifo_errors++;
545 			goto tx_error;
546 		}
547 		if ((dst = rt->rt_gateway) == 0)
548 			goto tx_error_icmp;
549 	}
550 
551 	{
552 		struct flowi fl = { .oif = tunnel->parms.link,
553 				    .nl_u = { .ip4_u =
554 					      { .daddr = dst,
555 						.saddr = tiph->saddr,
556 						.tos = RT_TOS(tos) } },
557 				    .proto = IPPROTO_IPIP };
558 		if (ip_route_output_key(&rt, &fl)) {
559 			tunnel->stat.tx_carrier_errors++;
560 			goto tx_error_icmp;
561 		}
562 	}
563 	tdev = rt->u.dst.dev;
564 
565 	if (tdev == dev) {
566 		ip_rt_put(rt);
567 		tunnel->stat.collisions++;
568 		goto tx_error;
569 	}
570 
571 	if (tiph->frag_off)
572 		mtu = dst_mtu(&rt->u.dst) - sizeof(struct iphdr);
573 	else
574 		mtu = skb->dst ? dst_mtu(skb->dst) : dev->mtu;
575 
576 	if (mtu < 68) {
577 		tunnel->stat.collisions++;
578 		ip_rt_put(rt);
579 		goto tx_error;
580 	}
581 	if (skb->dst)
582 		skb->dst->ops->update_pmtu(skb->dst, mtu);
583 
584 	df |= (old_iph->frag_off&htons(IP_DF));
585 
586 	if ((old_iph->frag_off&htons(IP_DF)) && mtu < ntohs(old_iph->tot_len)) {
587 		icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu));
588 		ip_rt_put(rt);
589 		goto tx_error;
590 	}
591 
592 	if (tunnel->err_count > 0) {
593 		if (jiffies - tunnel->err_time < IPTUNNEL_ERR_TIMEO) {
594 			tunnel->err_count--;
595 			dst_link_failure(skb);
596 		} else
597 			tunnel->err_count = 0;
598 	}
599 
600 	/*
601 	 * Okay, now see if we can stuff it in the buffer as-is.
602 	 */
603 	max_headroom = (LL_RESERVED_SPACE(tdev)+sizeof(struct iphdr));
604 
605 	if (skb_headroom(skb) < max_headroom || skb_cloned(skb) || skb_shared(skb)) {
606 		struct sk_buff *new_skb = skb_realloc_headroom(skb, max_headroom);
607 		if (!new_skb) {
608 			ip_rt_put(rt);
609   			stats->tx_dropped++;
610 			dev_kfree_skb(skb);
611 			tunnel->recursion--;
612 			return 0;
613 		}
614 		if (skb->sk)
615 			skb_set_owner_w(new_skb, skb->sk);
616 		dev_kfree_skb(skb);
617 		skb = new_skb;
618 		old_iph = skb->nh.iph;
619 	}
620 
621 	skb->h.raw = skb->nh.raw;
622 	skb->nh.raw = skb_push(skb, sizeof(struct iphdr));
623 	memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
624 	IPCB(skb)->flags &= ~(IPSKB_XFRM_TUNNEL_SIZE|IPSKB_XFRM_TRANSFORMED);
625 	dst_release(skb->dst);
626 	skb->dst = &rt->u.dst;
627 
628 	/*
629 	 *	Push down and install the IPIP header.
630 	 */
631 
632 	iph 			=	skb->nh.iph;
633 	iph->version		=	4;
634 	iph->ihl		=	sizeof(struct iphdr)>>2;
635 	iph->frag_off		=	df;
636 	iph->protocol		=	IPPROTO_IPIP;
637 	iph->tos		=	INET_ECN_encapsulate(tos, old_iph->tos);
638 	iph->daddr		=	rt->rt_dst;
639 	iph->saddr		=	rt->rt_src;
640 
641 	if ((iph->ttl = tiph->ttl) == 0)
642 		iph->ttl	=	old_iph->ttl;
643 
644 	nf_reset(skb);
645 
646 	IPTUNNEL_XMIT();
647 	tunnel->recursion--;
648 	return 0;
649 
650 tx_error_icmp:
651 	dst_link_failure(skb);
652 tx_error:
653 	stats->tx_errors++;
654 	dev_kfree_skb(skb);
655 	tunnel->recursion--;
656 	return 0;
657 }
658 
659 static int
660 ipip_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd)
661 {
662 	int err = 0;
663 	struct ip_tunnel_parm p;
664 	struct ip_tunnel *t;
665 
666 	switch (cmd) {
667 	case SIOCGETTUNNEL:
668 		t = NULL;
669 		if (dev == ipip_fb_tunnel_dev) {
670 			if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) {
671 				err = -EFAULT;
672 				break;
673 			}
674 			t = ipip_tunnel_locate(&p, 0);
675 		}
676 		if (t == NULL)
677 			t = netdev_priv(dev);
678 		memcpy(&p, &t->parms, sizeof(p));
679 		if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p)))
680 			err = -EFAULT;
681 		break;
682 
683 	case SIOCADDTUNNEL:
684 	case SIOCCHGTUNNEL:
685 		err = -EPERM;
686 		if (!capable(CAP_NET_ADMIN))
687 			goto done;
688 
689 		err = -EFAULT;
690 		if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
691 			goto done;
692 
693 		err = -EINVAL;
694 		if (p.iph.version != 4 || p.iph.protocol != IPPROTO_IPIP ||
695 		    p.iph.ihl != 5 || (p.iph.frag_off&htons(~IP_DF)))
696 			goto done;
697 		if (p.iph.ttl)
698 			p.iph.frag_off |= htons(IP_DF);
699 
700 		t = ipip_tunnel_locate(&p, cmd == SIOCADDTUNNEL);
701 
702 		if (dev != ipip_fb_tunnel_dev && cmd == SIOCCHGTUNNEL) {
703 			if (t != NULL) {
704 				if (t->dev != dev) {
705 					err = -EEXIST;
706 					break;
707 				}
708 			} else {
709 				if (((dev->flags&IFF_POINTOPOINT) && !p.iph.daddr) ||
710 				    (!(dev->flags&IFF_POINTOPOINT) && p.iph.daddr)) {
711 					err = -EINVAL;
712 					break;
713 				}
714 				t = netdev_priv(dev);
715 				ipip_tunnel_unlink(t);
716 				t->parms.iph.saddr = p.iph.saddr;
717 				t->parms.iph.daddr = p.iph.daddr;
718 				memcpy(dev->dev_addr, &p.iph.saddr, 4);
719 				memcpy(dev->broadcast, &p.iph.daddr, 4);
720 				ipip_tunnel_link(t);
721 				netdev_state_change(dev);
722 			}
723 		}
724 
725 		if (t) {
726 			err = 0;
727 			if (cmd == SIOCCHGTUNNEL) {
728 				t->parms.iph.ttl = p.iph.ttl;
729 				t->parms.iph.tos = p.iph.tos;
730 				t->parms.iph.frag_off = p.iph.frag_off;
731 			}
732 			if (copy_to_user(ifr->ifr_ifru.ifru_data, &t->parms, sizeof(p)))
733 				err = -EFAULT;
734 		} else
735 			err = (cmd == SIOCADDTUNNEL ? -ENOBUFS : -ENOENT);
736 		break;
737 
738 	case SIOCDELTUNNEL:
739 		err = -EPERM;
740 		if (!capable(CAP_NET_ADMIN))
741 			goto done;
742 
743 		if (dev == ipip_fb_tunnel_dev) {
744 			err = -EFAULT;
745 			if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
746 				goto done;
747 			err = -ENOENT;
748 			if ((t = ipip_tunnel_locate(&p, 0)) == NULL)
749 				goto done;
750 			err = -EPERM;
751 			if (t->dev == ipip_fb_tunnel_dev)
752 				goto done;
753 			dev = t->dev;
754 		}
755 		err = unregister_netdevice(dev);
756 		break;
757 
758 	default:
759 		err = -EINVAL;
760 	}
761 
762 done:
763 	return err;
764 }
765 
766 static struct net_device_stats *ipip_tunnel_get_stats(struct net_device *dev)
767 {
768 	return &(((struct ip_tunnel*)netdev_priv(dev))->stat);
769 }
770 
771 static int ipip_tunnel_change_mtu(struct net_device *dev, int new_mtu)
772 {
773 	if (new_mtu < 68 || new_mtu > 0xFFF8 - sizeof(struct iphdr))
774 		return -EINVAL;
775 	dev->mtu = new_mtu;
776 	return 0;
777 }
778 
779 static void ipip_tunnel_setup(struct net_device *dev)
780 {
781 	SET_MODULE_OWNER(dev);
782 	dev->uninit		= ipip_tunnel_uninit;
783 	dev->hard_start_xmit	= ipip_tunnel_xmit;
784 	dev->get_stats		= ipip_tunnel_get_stats;
785 	dev->do_ioctl		= ipip_tunnel_ioctl;
786 	dev->change_mtu		= ipip_tunnel_change_mtu;
787 	dev->destructor		= free_netdev;
788 
789 	dev->type		= ARPHRD_TUNNEL;
790 	dev->hard_header_len 	= LL_MAX_HEADER + sizeof(struct iphdr);
791 	dev->mtu		= ETH_DATA_LEN - sizeof(struct iphdr);
792 	dev->flags		= IFF_NOARP;
793 	dev->iflink		= 0;
794 	dev->addr_len		= 4;
795 }
796 
797 static int ipip_tunnel_init(struct net_device *dev)
798 {
799 	struct net_device *tdev = NULL;
800 	struct ip_tunnel *tunnel;
801 	struct iphdr *iph;
802 
803 	tunnel = netdev_priv(dev);
804 	iph = &tunnel->parms.iph;
805 
806 	tunnel->dev = dev;
807 	strcpy(tunnel->parms.name, dev->name);
808 
809 	memcpy(dev->dev_addr, &tunnel->parms.iph.saddr, 4);
810 	memcpy(dev->broadcast, &tunnel->parms.iph.daddr, 4);
811 
812 	if (iph->daddr) {
813 		struct flowi fl = { .oif = tunnel->parms.link,
814 				    .nl_u = { .ip4_u =
815 					      { .daddr = iph->daddr,
816 						.saddr = iph->saddr,
817 						.tos = RT_TOS(iph->tos) } },
818 				    .proto = IPPROTO_IPIP };
819 		struct rtable *rt;
820 		if (!ip_route_output_key(&rt, &fl)) {
821 			tdev = rt->u.dst.dev;
822 			ip_rt_put(rt);
823 		}
824 		dev->flags |= IFF_POINTOPOINT;
825 	}
826 
827 	if (!tdev && tunnel->parms.link)
828 		tdev = __dev_get_by_index(tunnel->parms.link);
829 
830 	if (tdev) {
831 		dev->hard_header_len = tdev->hard_header_len + sizeof(struct iphdr);
832 		dev->mtu = tdev->mtu - sizeof(struct iphdr);
833 	}
834 	dev->iflink = tunnel->parms.link;
835 
836 	return 0;
837 }
838 
839 static int __init ipip_fb_tunnel_init(struct net_device *dev)
840 {
841 	struct ip_tunnel *tunnel = netdev_priv(dev);
842 	struct iphdr *iph = &tunnel->parms.iph;
843 
844 	tunnel->dev = dev;
845 	strcpy(tunnel->parms.name, dev->name);
846 
847 	iph->version		= 4;
848 	iph->protocol		= IPPROTO_IPIP;
849 	iph->ihl		= 5;
850 
851 	dev_hold(dev);
852 	tunnels_wc[0]		= tunnel;
853 	return 0;
854 }
855 
856 #ifdef CONFIG_INET_TUNNEL
857 static struct xfrm_tunnel ipip_handler = {
858 	.handler	=	ipip_rcv,
859 	.err_handler	=	ipip_err,
860 };
861 
862 static inline int ipip_register(void)
863 {
864 	return xfrm4_tunnel_register(&ipip_handler);
865 }
866 
867 static inline int ipip_unregister(void)
868 {
869 	return xfrm4_tunnel_deregister(&ipip_handler);
870 }
871 #else
872 static struct net_protocol ipip_protocol = {
873 	.handler	=	ipip_rcv,
874 	.err_handler	=	ipip_err,
875 	.no_policy	=	1,
876 };
877 
878 static inline int ipip_register(void)
879 {
880 	return inet_add_protocol(&ipip_protocol, IPPROTO_IPIP);
881 }
882 
883 static inline int ipip_unregister(void)
884 {
885 	return inet_del_protocol(&ipip_protocol, IPPROTO_IPIP);
886 }
887 #endif
888 
889 static char banner[] __initdata =
890 	KERN_INFO "IPv4 over IPv4 tunneling driver\n";
891 
892 static int __init ipip_init(void)
893 {
894 	int err;
895 
896 	printk(banner);
897 
898 	if (ipip_register() < 0) {
899 		printk(KERN_INFO "ipip init: can't register tunnel\n");
900 		return -EAGAIN;
901 	}
902 
903 	ipip_fb_tunnel_dev = alloc_netdev(sizeof(struct ip_tunnel),
904 					   "tunl0",
905 					   ipip_tunnel_setup);
906 	if (!ipip_fb_tunnel_dev) {
907 		err = -ENOMEM;
908 		goto err1;
909 	}
910 
911 	ipip_fb_tunnel_dev->init = ipip_fb_tunnel_init;
912 
913 	if ((err = register_netdev(ipip_fb_tunnel_dev)))
914 		goto err2;
915  out:
916 	return err;
917  err2:
918 	free_netdev(ipip_fb_tunnel_dev);
919  err1:
920 	ipip_unregister();
921 	goto out;
922 }
923 
924 static void __exit ipip_destroy_tunnels(void)
925 {
926 	int prio;
927 
928 	for (prio = 1; prio < 4; prio++) {
929 		int h;
930 		for (h = 0; h < HASH_SIZE; h++) {
931 			struct ip_tunnel *t;
932 			while ((t = tunnels[prio][h]) != NULL)
933 				unregister_netdevice(t->dev);
934 		}
935 	}
936 }
937 
938 static void __exit ipip_fini(void)
939 {
940 	if (ipip_unregister() < 0)
941 		printk(KERN_INFO "ipip close: can't deregister tunnel\n");
942 
943 	rtnl_lock();
944 	ipip_destroy_tunnels();
945 	unregister_netdevice(ipip_fb_tunnel_dev);
946 	rtnl_unlock();
947 }
948 
949 module_init(ipip_init);
950 module_exit(ipip_fini);
951 MODULE_LICENSE("GPL");
952