xref: /openbmc/linux/net/ipv4/ipip.c (revision d5cb9783536a41df9f9cba5b0a1d78047ed787f7)
1 /*
2  *	Linux NET3:	IP/IP protocol decoder.
3  *
4  *	Version: $Id: ipip.c,v 1.50 2001/10/02 02:22:36 davem Exp $
5  *
6  *	Authors:
7  *		Sam Lantinga (slouken@cs.ucdavis.edu)  02/01/95
8  *
9  *	Fixes:
10  *		Alan Cox	:	Merged and made usable non modular (its so tiny its silly as
11  *					a module taking up 2 pages).
12  *		Alan Cox	: 	Fixed bug with 1.3.18 and IPIP not working (now needs to set skb->h.iph)
13  *					to keep ip_forward happy.
14  *		Alan Cox	:	More fixes for 1.3.21, and firewall fix. Maybe this will work soon 8).
15  *		Kai Schulte	:	Fixed #defines for IP_FIREWALL->FIREWALL
16  *              David Woodhouse :       Perform some basic ICMP handling.
17  *                                      IPIP Routing without decapsulation.
18  *              Carlos Picoto   :       GRE over IP support
19  *		Alexey Kuznetsov:	Reworked. Really, now it is truncated version of ipv4/ip_gre.c.
20  *					I do not want to merge them together.
21  *
22  *	This program is free software; you can redistribute it and/or
23  *	modify it under the terms of the GNU General Public License
24  *	as published by the Free Software Foundation; either version
25  *	2 of the License, or (at your option) any later version.
26  *
27  */
28 
29 /* tunnel.c: an IP tunnel driver
30 
31 	The purpose of this driver is to provide an IP tunnel through
32 	which you can tunnel network traffic transparently across subnets.
33 
34 	This was written by looking at Nick Holloway's dummy driver
35 	Thanks for the great code!
36 
37 		-Sam Lantinga	(slouken@cs.ucdavis.edu)  02/01/95
38 
39 	Minor tweaks:
40 		Cleaned up the code a little and added some pre-1.3.0 tweaks.
41 		dev->hard_header/hard_header_len changed to use no headers.
42 		Comments/bracketing tweaked.
43 		Made the tunnels use dev->name not tunnel: when error reporting.
44 		Added tx_dropped stat
45 
46 		-Alan Cox	(Alan.Cox@linux.org) 21 March 95
47 
48 	Reworked:
49 		Changed to tunnel to destination gateway in addition to the
50 			tunnel's pointopoint address
51 		Almost completely rewritten
52 		Note:  There is currently no firewall or ICMP handling done.
53 
54 		-Sam Lantinga	(slouken@cs.ucdavis.edu) 02/13/96
55 
56 */
57 
58 /* Things I wish I had known when writing the tunnel driver:
59 
60 	When the tunnel_xmit() function is called, the skb contains the
61 	packet to be sent (plus a great deal of extra info), and dev
62 	contains the tunnel device that _we_ are.
63 
64 	When we are passed a packet, we are expected to fill in the
65 	source address with our source IP address.
66 
67 	What is the proper way to allocate, copy and free a buffer?
68 	After you allocate it, it is a "0 length" chunk of memory
69 	starting at zero.  If you want to add headers to the buffer
70 	later, you'll have to call "skb_reserve(skb, amount)" with
71 	the amount of memory you want reserved.  Then, you call
72 	"skb_put(skb, amount)" with the amount of space you want in
73 	the buffer.  skb_put() returns a pointer to the top (#0) of
74 	that buffer.  skb->len is set to the amount of space you have
75 	"allocated" with skb_put().  You can then write up to skb->len
76 	bytes to that buffer.  If you need more, you can call skb_put()
77 	again with the additional amount of space you need.  You can
78 	find out how much more space you can allocate by calling
79 	"skb_tailroom(skb)".
80 	Now, to add header space, call "skb_push(skb, header_len)".
81 	This creates space at the beginning of the buffer and returns
82 	a pointer to this new space.  If later you need to strip a
83 	header from a buffer, call "skb_pull(skb, header_len)".
84 	skb_headroom() will return how much space is left at the top
85 	of the buffer (before the main data).  Remember, this headroom
86 	space must be reserved before the skb_put() function is called.
87 	*/
88 
89 /*
90    This version of net/ipv4/ipip.c is cloned of net/ipv4/ip_gre.c
91 
92    For comments look at net/ipv4/ip_gre.c --ANK
93  */
94 
95 
96 #include <linux/config.h>
97 #include <linux/module.h>
98 #include <linux/types.h>
99 #include <linux/sched.h>
100 #include <linux/kernel.h>
101 #include <asm/uaccess.h>
102 #include <linux/skbuff.h>
103 #include <linux/netdevice.h>
104 #include <linux/in.h>
105 #include <linux/tcp.h>
106 #include <linux/udp.h>
107 #include <linux/if_arp.h>
108 #include <linux/mroute.h>
109 #include <linux/init.h>
110 #include <linux/netfilter_ipv4.h>
111 
112 #include <net/sock.h>
113 #include <net/ip.h>
114 #include <net/icmp.h>
115 #include <net/protocol.h>
116 #include <net/ipip.h>
117 #include <net/inet_ecn.h>
118 #include <net/xfrm.h>
119 
120 #define HASH_SIZE  16
121 #define HASH(addr) ((addr^(addr>>4))&0xF)
122 
123 static int ipip_fb_tunnel_init(struct net_device *dev);
124 static int ipip_tunnel_init(struct net_device *dev);
125 static void ipip_tunnel_setup(struct net_device *dev);
126 
127 static struct net_device *ipip_fb_tunnel_dev;
128 
129 static struct ip_tunnel *tunnels_r_l[HASH_SIZE];
130 static struct ip_tunnel *tunnels_r[HASH_SIZE];
131 static struct ip_tunnel *tunnels_l[HASH_SIZE];
132 static struct ip_tunnel *tunnels_wc[1];
133 static struct ip_tunnel **tunnels[4] = { tunnels_wc, tunnels_l, tunnels_r, tunnels_r_l };
134 
135 static DEFINE_RWLOCK(ipip_lock);
136 
137 static struct ip_tunnel * ipip_tunnel_lookup(u32 remote, u32 local)
138 {
139 	unsigned h0 = HASH(remote);
140 	unsigned h1 = HASH(local);
141 	struct ip_tunnel *t;
142 
143 	for (t = tunnels_r_l[h0^h1]; t; t = t->next) {
144 		if (local == t->parms.iph.saddr &&
145 		    remote == t->parms.iph.daddr && (t->dev->flags&IFF_UP))
146 			return t;
147 	}
148 	for (t = tunnels_r[h0]; t; t = t->next) {
149 		if (remote == t->parms.iph.daddr && (t->dev->flags&IFF_UP))
150 			return t;
151 	}
152 	for (t = tunnels_l[h1]; t; t = t->next) {
153 		if (local == t->parms.iph.saddr && (t->dev->flags&IFF_UP))
154 			return t;
155 	}
156 	if ((t = tunnels_wc[0]) != NULL && (t->dev->flags&IFF_UP))
157 		return t;
158 	return NULL;
159 }
160 
161 static struct ip_tunnel **ipip_bucket(struct ip_tunnel *t)
162 {
163 	u32 remote = t->parms.iph.daddr;
164 	u32 local = t->parms.iph.saddr;
165 	unsigned h = 0;
166 	int prio = 0;
167 
168 	if (remote) {
169 		prio |= 2;
170 		h ^= HASH(remote);
171 	}
172 	if (local) {
173 		prio |= 1;
174 		h ^= HASH(local);
175 	}
176 	return &tunnels[prio][h];
177 }
178 
179 
180 static void ipip_tunnel_unlink(struct ip_tunnel *t)
181 {
182 	struct ip_tunnel **tp;
183 
184 	for (tp = ipip_bucket(t); *tp; tp = &(*tp)->next) {
185 		if (t == *tp) {
186 			write_lock_bh(&ipip_lock);
187 			*tp = t->next;
188 			write_unlock_bh(&ipip_lock);
189 			break;
190 		}
191 	}
192 }
193 
194 static void ipip_tunnel_link(struct ip_tunnel *t)
195 {
196 	struct ip_tunnel **tp = ipip_bucket(t);
197 
198 	t->next = *tp;
199 	write_lock_bh(&ipip_lock);
200 	*tp = t;
201 	write_unlock_bh(&ipip_lock);
202 }
203 
204 static struct ip_tunnel * ipip_tunnel_locate(struct ip_tunnel_parm *parms, int create)
205 {
206 	u32 remote = parms->iph.daddr;
207 	u32 local = parms->iph.saddr;
208 	struct ip_tunnel *t, **tp, *nt;
209 	struct net_device *dev;
210 	unsigned h = 0;
211 	int prio = 0;
212 	char name[IFNAMSIZ];
213 
214 	if (remote) {
215 		prio |= 2;
216 		h ^= HASH(remote);
217 	}
218 	if (local) {
219 		prio |= 1;
220 		h ^= HASH(local);
221 	}
222 	for (tp = &tunnels[prio][h]; (t = *tp) != NULL; tp = &t->next) {
223 		if (local == t->parms.iph.saddr && remote == t->parms.iph.daddr)
224 			return t;
225 	}
226 	if (!create)
227 		return NULL;
228 
229 	if (parms->name[0])
230 		strlcpy(name, parms->name, IFNAMSIZ);
231 	else {
232 		int i;
233 		for (i=1; i<100; i++) {
234 			sprintf(name, "tunl%d", i);
235 			if (__dev_get_by_name(name) == NULL)
236 				break;
237 		}
238 		if (i==100)
239 			goto failed;
240 	}
241 
242 	dev = alloc_netdev(sizeof(*t), name, ipip_tunnel_setup);
243 	if (dev == NULL)
244 		return NULL;
245 
246 	nt = dev->priv;
247 	SET_MODULE_OWNER(dev);
248 	dev->init = ipip_tunnel_init;
249 	nt->parms = *parms;
250 
251 	if (register_netdevice(dev) < 0) {
252 		free_netdev(dev);
253 		goto failed;
254 	}
255 
256 	dev_hold(dev);
257 	ipip_tunnel_link(nt);
258 	return nt;
259 
260 failed:
261 	return NULL;
262 }
263 
264 static void ipip_tunnel_uninit(struct net_device *dev)
265 {
266 	if (dev == ipip_fb_tunnel_dev) {
267 		write_lock_bh(&ipip_lock);
268 		tunnels_wc[0] = NULL;
269 		write_unlock_bh(&ipip_lock);
270 	} else
271 		ipip_tunnel_unlink((struct ip_tunnel*)dev->priv);
272 	dev_put(dev);
273 }
274 
275 static void ipip_err(struct sk_buff *skb, u32 info)
276 {
277 #ifndef I_WISH_WORLD_WERE_PERFECT
278 
279 /* It is not :-( All the routers (except for Linux) return only
280    8 bytes of packet payload. It means, that precise relaying of
281    ICMP in the real Internet is absolutely infeasible.
282  */
283 	struct iphdr *iph = (struct iphdr*)skb->data;
284 	int type = skb->h.icmph->type;
285 	int code = skb->h.icmph->code;
286 	struct ip_tunnel *t;
287 
288 	switch (type) {
289 	default:
290 	case ICMP_PARAMETERPROB:
291 		return;
292 
293 	case ICMP_DEST_UNREACH:
294 		switch (code) {
295 		case ICMP_SR_FAILED:
296 		case ICMP_PORT_UNREACH:
297 			/* Impossible event. */
298 			return;
299 		case ICMP_FRAG_NEEDED:
300 			/* Soft state for pmtu is maintained by IP core. */
301 			return;
302 		default:
303 			/* All others are translated to HOST_UNREACH.
304 			   rfc2003 contains "deep thoughts" about NET_UNREACH,
305 			   I believe they are just ether pollution. --ANK
306 			 */
307 			break;
308 		}
309 		break;
310 	case ICMP_TIME_EXCEEDED:
311 		if (code != ICMP_EXC_TTL)
312 			return;
313 		break;
314 	}
315 
316 	read_lock(&ipip_lock);
317 	t = ipip_tunnel_lookup(iph->daddr, iph->saddr);
318 	if (t == NULL || t->parms.iph.daddr == 0)
319 		goto out;
320 	if (t->parms.iph.ttl == 0 && type == ICMP_TIME_EXCEEDED)
321 		goto out;
322 
323 	if (jiffies - t->err_time < IPTUNNEL_ERR_TIMEO)
324 		t->err_count++;
325 	else
326 		t->err_count = 1;
327 	t->err_time = jiffies;
328 out:
329 	read_unlock(&ipip_lock);
330 	return;
331 #else
332 	struct iphdr *iph = (struct iphdr*)dp;
333 	int hlen = iph->ihl<<2;
334 	struct iphdr *eiph;
335 	int type = skb->h.icmph->type;
336 	int code = skb->h.icmph->code;
337 	int rel_type = 0;
338 	int rel_code = 0;
339 	int rel_info = 0;
340 	struct sk_buff *skb2;
341 	struct flowi fl;
342 	struct rtable *rt;
343 
344 	if (len < hlen + sizeof(struct iphdr))
345 		return;
346 	eiph = (struct iphdr*)(dp + hlen);
347 
348 	switch (type) {
349 	default:
350 		return;
351 	case ICMP_PARAMETERPROB:
352 		if (skb->h.icmph->un.gateway < hlen)
353 			return;
354 
355 		/* So... This guy found something strange INSIDE encapsulated
356 		   packet. Well, he is fool, but what can we do ?
357 		 */
358 		rel_type = ICMP_PARAMETERPROB;
359 		rel_info = skb->h.icmph->un.gateway - hlen;
360 		break;
361 
362 	case ICMP_DEST_UNREACH:
363 		switch (code) {
364 		case ICMP_SR_FAILED:
365 		case ICMP_PORT_UNREACH:
366 			/* Impossible event. */
367 			return;
368 		case ICMP_FRAG_NEEDED:
369 			/* And it is the only really necessary thing :-) */
370 			rel_info = ntohs(skb->h.icmph->un.frag.mtu);
371 			if (rel_info < hlen+68)
372 				return;
373 			rel_info -= hlen;
374 			/* BSD 4.2 MORE DOES NOT EXIST IN NATURE. */
375 			if (rel_info > ntohs(eiph->tot_len))
376 				return;
377 			break;
378 		default:
379 			/* All others are translated to HOST_UNREACH.
380 			   rfc2003 contains "deep thoughts" about NET_UNREACH,
381 			   I believe, it is just ether pollution. --ANK
382 			 */
383 			rel_type = ICMP_DEST_UNREACH;
384 			rel_code = ICMP_HOST_UNREACH;
385 			break;
386 		}
387 		break;
388 	case ICMP_TIME_EXCEEDED:
389 		if (code != ICMP_EXC_TTL)
390 			return;
391 		break;
392 	}
393 
394 	/* Prepare fake skb to feed it to icmp_send */
395 	skb2 = skb_clone(skb, GFP_ATOMIC);
396 	if (skb2 == NULL)
397 		return;
398 	dst_release(skb2->dst);
399 	skb2->dst = NULL;
400 	skb_pull(skb2, skb->data - (u8*)eiph);
401 	skb2->nh.raw = skb2->data;
402 
403 	/* Try to guess incoming interface */
404 	memset(&fl, 0, sizeof(fl));
405 	fl.fl4_daddr = eiph->saddr;
406 	fl.fl4_tos = RT_TOS(eiph->tos);
407 	fl.proto = IPPROTO_IPIP;
408 	if (ip_route_output_key(&rt, &key)) {
409 		kfree_skb(skb2);
410 		return;
411 	}
412 	skb2->dev = rt->u.dst.dev;
413 
414 	/* route "incoming" packet */
415 	if (rt->rt_flags&RTCF_LOCAL) {
416 		ip_rt_put(rt);
417 		rt = NULL;
418 		fl.fl4_daddr = eiph->daddr;
419 		fl.fl4_src = eiph->saddr;
420 		fl.fl4_tos = eiph->tos;
421 		if (ip_route_output_key(&rt, &fl) ||
422 		    rt->u.dst.dev->type != ARPHRD_TUNNEL) {
423 			ip_rt_put(rt);
424 			kfree_skb(skb2);
425 			return;
426 		}
427 	} else {
428 		ip_rt_put(rt);
429 		if (ip_route_input(skb2, eiph->daddr, eiph->saddr, eiph->tos, skb2->dev) ||
430 		    skb2->dst->dev->type != ARPHRD_TUNNEL) {
431 			kfree_skb(skb2);
432 			return;
433 		}
434 	}
435 
436 	/* change mtu on this route */
437 	if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED) {
438 		if (rel_info > dst_mtu(skb2->dst)) {
439 			kfree_skb(skb2);
440 			return;
441 		}
442 		skb2->dst->ops->update_pmtu(skb2->dst, rel_info);
443 		rel_info = htonl(rel_info);
444 	} else if (type == ICMP_TIME_EXCEEDED) {
445 		struct ip_tunnel *t = (struct ip_tunnel*)skb2->dev->priv;
446 		if (t->parms.iph.ttl) {
447 			rel_type = ICMP_DEST_UNREACH;
448 			rel_code = ICMP_HOST_UNREACH;
449 		}
450 	}
451 
452 	icmp_send(skb2, rel_type, rel_code, rel_info);
453 	kfree_skb(skb2);
454 	return;
455 #endif
456 }
457 
458 static inline void ipip_ecn_decapsulate(struct iphdr *outer_iph, struct sk_buff *skb)
459 {
460 	struct iphdr *inner_iph = skb->nh.iph;
461 
462 	if (INET_ECN_is_ce(outer_iph->tos))
463 		IP_ECN_set_ce(inner_iph);
464 }
465 
466 static int ipip_rcv(struct sk_buff *skb)
467 {
468 	struct iphdr *iph;
469 	struct ip_tunnel *tunnel;
470 
471 	if (!pskb_may_pull(skb, sizeof(struct iphdr)))
472 		goto out;
473 
474 	iph = skb->nh.iph;
475 
476 	read_lock(&ipip_lock);
477 	if ((tunnel = ipip_tunnel_lookup(iph->saddr, iph->daddr)) != NULL) {
478 		if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) {
479 			read_unlock(&ipip_lock);
480 			kfree_skb(skb);
481 			return 0;
482 		}
483 
484 		secpath_reset(skb);
485 
486 		skb->mac.raw = skb->nh.raw;
487 		skb->nh.raw = skb->data;
488 		memset(&(IPCB(skb)->opt), 0, sizeof(struct ip_options));
489 		skb->protocol = htons(ETH_P_IP);
490 		skb->pkt_type = PACKET_HOST;
491 
492 		tunnel->stat.rx_packets++;
493 		tunnel->stat.rx_bytes += skb->len;
494 		skb->dev = tunnel->dev;
495 		dst_release(skb->dst);
496 		skb->dst = NULL;
497 		nf_reset(skb);
498 		ipip_ecn_decapsulate(iph, skb);
499 		netif_rx(skb);
500 		read_unlock(&ipip_lock);
501 		return 0;
502 	}
503 	read_unlock(&ipip_lock);
504 
505 out:
506 	return -1;
507 }
508 
509 /*
510  *	This function assumes it is being called from dev_queue_xmit()
511  *	and that skb is filled properly by that function.
512  */
513 
514 static int ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
515 {
516 	struct ip_tunnel *tunnel = (struct ip_tunnel*)dev->priv;
517 	struct net_device_stats *stats = &tunnel->stat;
518 	struct iphdr  *tiph = &tunnel->parms.iph;
519 	u8     tos = tunnel->parms.iph.tos;
520 	u16    df = tiph->frag_off;
521 	struct rtable *rt;     			/* Route to the other host */
522 	struct net_device *tdev;			/* Device to other host */
523 	struct iphdr  *old_iph = skb->nh.iph;
524 	struct iphdr  *iph;			/* Our new IP header */
525 	int    max_headroom;			/* The extra header space needed */
526 	u32    dst = tiph->daddr;
527 	int    mtu;
528 
529 	if (tunnel->recursion++) {
530 		tunnel->stat.collisions++;
531 		goto tx_error;
532 	}
533 
534 	if (skb->protocol != htons(ETH_P_IP))
535 		goto tx_error;
536 
537 	if (tos&1)
538 		tos = old_iph->tos;
539 
540 	if (!dst) {
541 		/* NBMA tunnel */
542 		if ((rt = (struct rtable*)skb->dst) == NULL) {
543 			tunnel->stat.tx_fifo_errors++;
544 			goto tx_error;
545 		}
546 		if ((dst = rt->rt_gateway) == 0)
547 			goto tx_error_icmp;
548 	}
549 
550 	{
551 		struct flowi fl = { .oif = tunnel->parms.link,
552 				    .nl_u = { .ip4_u =
553 					      { .daddr = dst,
554 						.saddr = tiph->saddr,
555 						.tos = RT_TOS(tos) } },
556 				    .proto = IPPROTO_IPIP };
557 		if (ip_route_output_key(&rt, &fl)) {
558 			tunnel->stat.tx_carrier_errors++;
559 			goto tx_error_icmp;
560 		}
561 	}
562 	tdev = rt->u.dst.dev;
563 
564 	if (tdev == dev) {
565 		ip_rt_put(rt);
566 		tunnel->stat.collisions++;
567 		goto tx_error;
568 	}
569 
570 	if (tiph->frag_off)
571 		mtu = dst_mtu(&rt->u.dst) - sizeof(struct iphdr);
572 	else
573 		mtu = skb->dst ? dst_mtu(skb->dst) : dev->mtu;
574 
575 	if (mtu < 68) {
576 		tunnel->stat.collisions++;
577 		ip_rt_put(rt);
578 		goto tx_error;
579 	}
580 	if (skb->dst)
581 		skb->dst->ops->update_pmtu(skb->dst, mtu);
582 
583 	df |= (old_iph->frag_off&htons(IP_DF));
584 
585 	if ((old_iph->frag_off&htons(IP_DF)) && mtu < ntohs(old_iph->tot_len)) {
586 		icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu));
587 		ip_rt_put(rt);
588 		goto tx_error;
589 	}
590 
591 	if (tunnel->err_count > 0) {
592 		if (jiffies - tunnel->err_time < IPTUNNEL_ERR_TIMEO) {
593 			tunnel->err_count--;
594 			dst_link_failure(skb);
595 		} else
596 			tunnel->err_count = 0;
597 	}
598 
599 	/*
600 	 * Okay, now see if we can stuff it in the buffer as-is.
601 	 */
602 	max_headroom = (LL_RESERVED_SPACE(tdev)+sizeof(struct iphdr));
603 
604 	if (skb_headroom(skb) < max_headroom || skb_cloned(skb) || skb_shared(skb)) {
605 		struct sk_buff *new_skb = skb_realloc_headroom(skb, max_headroom);
606 		if (!new_skb) {
607 			ip_rt_put(rt);
608   			stats->tx_dropped++;
609 			dev_kfree_skb(skb);
610 			tunnel->recursion--;
611 			return 0;
612 		}
613 		if (skb->sk)
614 			skb_set_owner_w(new_skb, skb->sk);
615 		dev_kfree_skb(skb);
616 		skb = new_skb;
617 		old_iph = skb->nh.iph;
618 	}
619 
620 	skb->h.raw = skb->nh.raw;
621 	skb->nh.raw = skb_push(skb, sizeof(struct iphdr));
622 	memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
623 	dst_release(skb->dst);
624 	skb->dst = &rt->u.dst;
625 
626 	/*
627 	 *	Push down and install the IPIP header.
628 	 */
629 
630 	iph 			=	skb->nh.iph;
631 	iph->version		=	4;
632 	iph->ihl		=	sizeof(struct iphdr)>>2;
633 	iph->frag_off		=	df;
634 	iph->protocol		=	IPPROTO_IPIP;
635 	iph->tos		=	INET_ECN_encapsulate(tos, old_iph->tos);
636 	iph->daddr		=	rt->rt_dst;
637 	iph->saddr		=	rt->rt_src;
638 
639 	if ((iph->ttl = tiph->ttl) == 0)
640 		iph->ttl	=	old_iph->ttl;
641 
642 	nf_reset(skb);
643 
644 	IPTUNNEL_XMIT();
645 	tunnel->recursion--;
646 	return 0;
647 
648 tx_error_icmp:
649 	dst_link_failure(skb);
650 tx_error:
651 	stats->tx_errors++;
652 	dev_kfree_skb(skb);
653 	tunnel->recursion--;
654 	return 0;
655 }
656 
657 static int
658 ipip_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd)
659 {
660 	int err = 0;
661 	struct ip_tunnel_parm p;
662 	struct ip_tunnel *t;
663 
664 	switch (cmd) {
665 	case SIOCGETTUNNEL:
666 		t = NULL;
667 		if (dev == ipip_fb_tunnel_dev) {
668 			if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) {
669 				err = -EFAULT;
670 				break;
671 			}
672 			t = ipip_tunnel_locate(&p, 0);
673 		}
674 		if (t == NULL)
675 			t = (struct ip_tunnel*)dev->priv;
676 		memcpy(&p, &t->parms, sizeof(p));
677 		if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p)))
678 			err = -EFAULT;
679 		break;
680 
681 	case SIOCADDTUNNEL:
682 	case SIOCCHGTUNNEL:
683 		err = -EPERM;
684 		if (!capable(CAP_NET_ADMIN))
685 			goto done;
686 
687 		err = -EFAULT;
688 		if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
689 			goto done;
690 
691 		err = -EINVAL;
692 		if (p.iph.version != 4 || p.iph.protocol != IPPROTO_IPIP ||
693 		    p.iph.ihl != 5 || (p.iph.frag_off&htons(~IP_DF)))
694 			goto done;
695 		if (p.iph.ttl)
696 			p.iph.frag_off |= htons(IP_DF);
697 
698 		t = ipip_tunnel_locate(&p, cmd == SIOCADDTUNNEL);
699 
700 		if (dev != ipip_fb_tunnel_dev && cmd == SIOCCHGTUNNEL) {
701 			if (t != NULL) {
702 				if (t->dev != dev) {
703 					err = -EEXIST;
704 					break;
705 				}
706 			} else {
707 				if (((dev->flags&IFF_POINTOPOINT) && !p.iph.daddr) ||
708 				    (!(dev->flags&IFF_POINTOPOINT) && p.iph.daddr)) {
709 					err = -EINVAL;
710 					break;
711 				}
712 				t = (struct ip_tunnel*)dev->priv;
713 				ipip_tunnel_unlink(t);
714 				t->parms.iph.saddr = p.iph.saddr;
715 				t->parms.iph.daddr = p.iph.daddr;
716 				memcpy(dev->dev_addr, &p.iph.saddr, 4);
717 				memcpy(dev->broadcast, &p.iph.daddr, 4);
718 				ipip_tunnel_link(t);
719 				netdev_state_change(dev);
720 			}
721 		}
722 
723 		if (t) {
724 			err = 0;
725 			if (cmd == SIOCCHGTUNNEL) {
726 				t->parms.iph.ttl = p.iph.ttl;
727 				t->parms.iph.tos = p.iph.tos;
728 				t->parms.iph.frag_off = p.iph.frag_off;
729 			}
730 			if (copy_to_user(ifr->ifr_ifru.ifru_data, &t->parms, sizeof(p)))
731 				err = -EFAULT;
732 		} else
733 			err = (cmd == SIOCADDTUNNEL ? -ENOBUFS : -ENOENT);
734 		break;
735 
736 	case SIOCDELTUNNEL:
737 		err = -EPERM;
738 		if (!capable(CAP_NET_ADMIN))
739 			goto done;
740 
741 		if (dev == ipip_fb_tunnel_dev) {
742 			err = -EFAULT;
743 			if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
744 				goto done;
745 			err = -ENOENT;
746 			if ((t = ipip_tunnel_locate(&p, 0)) == NULL)
747 				goto done;
748 			err = -EPERM;
749 			if (t->dev == ipip_fb_tunnel_dev)
750 				goto done;
751 			dev = t->dev;
752 		}
753 		err = unregister_netdevice(dev);
754 		break;
755 
756 	default:
757 		err = -EINVAL;
758 	}
759 
760 done:
761 	return err;
762 }
763 
764 static struct net_device_stats *ipip_tunnel_get_stats(struct net_device *dev)
765 {
766 	return &(((struct ip_tunnel*)dev->priv)->stat);
767 }
768 
769 static int ipip_tunnel_change_mtu(struct net_device *dev, int new_mtu)
770 {
771 	if (new_mtu < 68 || new_mtu > 0xFFF8 - sizeof(struct iphdr))
772 		return -EINVAL;
773 	dev->mtu = new_mtu;
774 	return 0;
775 }
776 
777 static void ipip_tunnel_setup(struct net_device *dev)
778 {
779 	SET_MODULE_OWNER(dev);
780 	dev->uninit		= ipip_tunnel_uninit;
781 	dev->hard_start_xmit	= ipip_tunnel_xmit;
782 	dev->get_stats		= ipip_tunnel_get_stats;
783 	dev->do_ioctl		= ipip_tunnel_ioctl;
784 	dev->change_mtu		= ipip_tunnel_change_mtu;
785 	dev->destructor		= free_netdev;
786 
787 	dev->type		= ARPHRD_TUNNEL;
788 	dev->hard_header_len 	= LL_MAX_HEADER + sizeof(struct iphdr);
789 	dev->mtu		= 1500 - sizeof(struct iphdr);
790 	dev->flags		= IFF_NOARP;
791 	dev->iflink		= 0;
792 	dev->addr_len		= 4;
793 }
794 
795 static int ipip_tunnel_init(struct net_device *dev)
796 {
797 	struct net_device *tdev = NULL;
798 	struct ip_tunnel *tunnel;
799 	struct iphdr *iph;
800 
801 	tunnel = (struct ip_tunnel*)dev->priv;
802 	iph = &tunnel->parms.iph;
803 
804 	tunnel->dev = dev;
805 	strcpy(tunnel->parms.name, dev->name);
806 
807 	memcpy(dev->dev_addr, &tunnel->parms.iph.saddr, 4);
808 	memcpy(dev->broadcast, &tunnel->parms.iph.daddr, 4);
809 
810 	if (iph->daddr) {
811 		struct flowi fl = { .oif = tunnel->parms.link,
812 				    .nl_u = { .ip4_u =
813 					      { .daddr = iph->daddr,
814 						.saddr = iph->saddr,
815 						.tos = RT_TOS(iph->tos) } },
816 				    .proto = IPPROTO_IPIP };
817 		struct rtable *rt;
818 		if (!ip_route_output_key(&rt, &fl)) {
819 			tdev = rt->u.dst.dev;
820 			ip_rt_put(rt);
821 		}
822 		dev->flags |= IFF_POINTOPOINT;
823 	}
824 
825 	if (!tdev && tunnel->parms.link)
826 		tdev = __dev_get_by_index(tunnel->parms.link);
827 
828 	if (tdev) {
829 		dev->hard_header_len = tdev->hard_header_len + sizeof(struct iphdr);
830 		dev->mtu = tdev->mtu - sizeof(struct iphdr);
831 	}
832 	dev->iflink = tunnel->parms.link;
833 
834 	return 0;
835 }
836 
837 static int __init ipip_fb_tunnel_init(struct net_device *dev)
838 {
839 	struct ip_tunnel *tunnel = dev->priv;
840 	struct iphdr *iph = &tunnel->parms.iph;
841 
842 	tunnel->dev = dev;
843 	strcpy(tunnel->parms.name, dev->name);
844 
845 	iph->version		= 4;
846 	iph->protocol		= IPPROTO_IPIP;
847 	iph->ihl		= 5;
848 
849 	dev_hold(dev);
850 	tunnels_wc[0]		= tunnel;
851 	return 0;
852 }
853 
854 #ifdef CONFIG_INET_TUNNEL
855 static struct xfrm_tunnel ipip_handler = {
856 	.handler	=	ipip_rcv,
857 	.err_handler	=	ipip_err,
858 };
859 
860 static inline int ipip_register(void)
861 {
862 	return xfrm4_tunnel_register(&ipip_handler);
863 }
864 
865 static inline int ipip_unregister(void)
866 {
867 	return xfrm4_tunnel_deregister(&ipip_handler);
868 }
869 #else
870 static struct net_protocol ipip_protocol = {
871 	.handler	=	ipip_rcv,
872 	.err_handler	=	ipip_err,
873 	.no_policy	=	1,
874 };
875 
876 static inline int ipip_register(void)
877 {
878 	return inet_add_protocol(&ipip_protocol, IPPROTO_IPIP);
879 }
880 
881 static inline int ipip_unregister(void)
882 {
883 	return inet_del_protocol(&ipip_protocol, IPPROTO_IPIP);
884 }
885 #endif
886 
887 static char banner[] __initdata =
888 	KERN_INFO "IPv4 over IPv4 tunneling driver\n";
889 
890 static int __init ipip_init(void)
891 {
892 	int err;
893 
894 	printk(banner);
895 
896 	if (ipip_register() < 0) {
897 		printk(KERN_INFO "ipip init: can't register tunnel\n");
898 		return -EAGAIN;
899 	}
900 
901 	ipip_fb_tunnel_dev = alloc_netdev(sizeof(struct ip_tunnel),
902 					   "tunl0",
903 					   ipip_tunnel_setup);
904 	if (!ipip_fb_tunnel_dev) {
905 		err = -ENOMEM;
906 		goto err1;
907 	}
908 
909 	ipip_fb_tunnel_dev->init = ipip_fb_tunnel_init;
910 
911 	if ((err = register_netdev(ipip_fb_tunnel_dev)))
912 		goto err2;
913  out:
914 	return err;
915  err2:
916 	free_netdev(ipip_fb_tunnel_dev);
917  err1:
918 	ipip_unregister();
919 	goto out;
920 }
921 
922 static void __exit ipip_destroy_tunnels(void)
923 {
924 	int prio;
925 
926 	for (prio = 1; prio < 4; prio++) {
927 		int h;
928 		for (h = 0; h < HASH_SIZE; h++) {
929 			struct ip_tunnel *t;
930 			while ((t = tunnels[prio][h]) != NULL)
931 				unregister_netdevice(t->dev);
932 		}
933 	}
934 }
935 
936 static void __exit ipip_fini(void)
937 {
938 	if (ipip_unregister() < 0)
939 		printk(KERN_INFO "ipip close: can't deregister tunnel\n");
940 
941 	rtnl_lock();
942 	ipip_destroy_tunnels();
943 	unregister_netdevice(ipip_fb_tunnel_dev);
944 	rtnl_unlock();
945 }
946 
947 module_init(ipip_init);
948 module_exit(ipip_fini);
949 MODULE_LICENSE("GPL");
950