xref: /openbmc/linux/net/ipv4/ipip.c (revision 2c8c1e7297e19bdef3c178c3ea41d898a7716e3e)
1 /*
2  *	Linux NET3:	IP/IP protocol decoder.
3  *
4  *	Authors:
5  *		Sam Lantinga (slouken@cs.ucdavis.edu)  02/01/95
6  *
7  *	Fixes:
8  *		Alan Cox	:	Merged and made usable non modular (its so tiny its silly as
9  *					a module taking up 2 pages).
10  *		Alan Cox	: 	Fixed bug with 1.3.18 and IPIP not working (now needs to set skb->h.iph)
11  *					to keep ip_forward happy.
12  *		Alan Cox	:	More fixes for 1.3.21, and firewall fix. Maybe this will work soon 8).
13  *		Kai Schulte	:	Fixed #defines for IP_FIREWALL->FIREWALL
14  *              David Woodhouse :       Perform some basic ICMP handling.
15  *                                      IPIP Routing without decapsulation.
16  *              Carlos Picoto   :       GRE over IP support
17  *		Alexey Kuznetsov:	Reworked. Really, now it is truncated version of ipv4/ip_gre.c.
18  *					I do not want to merge them together.
19  *
20  *	This program is free software; you can redistribute it and/or
21  *	modify it under the terms of the GNU General Public License
22  *	as published by the Free Software Foundation; either version
23  *	2 of the License, or (at your option) any later version.
24  *
25  */
26 
27 /* tunnel.c: an IP tunnel driver
28 
29 	The purpose of this driver is to provide an IP tunnel through
30 	which you can tunnel network traffic transparently across subnets.
31 
32 	This was written by looking at Nick Holloway's dummy driver
33 	Thanks for the great code!
34 
35 		-Sam Lantinga	(slouken@cs.ucdavis.edu)  02/01/95
36 
37 	Minor tweaks:
38 		Cleaned up the code a little and added some pre-1.3.0 tweaks.
39 		dev->hard_header/hard_header_len changed to use no headers.
40 		Comments/bracketing tweaked.
41 		Made the tunnels use dev->name not tunnel: when error reporting.
42 		Added tx_dropped stat
43 
44 		-Alan Cox	(alan@lxorguk.ukuu.org.uk) 21 March 95
45 
46 	Reworked:
47 		Changed to tunnel to destination gateway in addition to the
48 			tunnel's pointopoint address
49 		Almost completely rewritten
50 		Note:  There is currently no firewall or ICMP handling done.
51 
52 		-Sam Lantinga	(slouken@cs.ucdavis.edu) 02/13/96
53 
54 */
55 
56 /* Things I wish I had known when writing the tunnel driver:
57 
58 	When the tunnel_xmit() function is called, the skb contains the
59 	packet to be sent (plus a great deal of extra info), and dev
60 	contains the tunnel device that _we_ are.
61 
62 	When we are passed a packet, we are expected to fill in the
63 	source address with our source IP address.
64 
65 	What is the proper way to allocate, copy and free a buffer?
66 	After you allocate it, it is a "0 length" chunk of memory
67 	starting at zero.  If you want to add headers to the buffer
68 	later, you'll have to call "skb_reserve(skb, amount)" with
69 	the amount of memory you want reserved.  Then, you call
70 	"skb_put(skb, amount)" with the amount of space you want in
71 	the buffer.  skb_put() returns a pointer to the top (#0) of
72 	that buffer.  skb->len is set to the amount of space you have
73 	"allocated" with skb_put().  You can then write up to skb->len
74 	bytes to that buffer.  If you need more, you can call skb_put()
75 	again with the additional amount of space you need.  You can
76 	find out how much more space you can allocate by calling
77 	"skb_tailroom(skb)".
78 	Now, to add header space, call "skb_push(skb, header_len)".
79 	This creates space at the beginning of the buffer and returns
80 	a pointer to this new space.  If later you need to strip a
81 	header from a buffer, call "skb_pull(skb, header_len)".
82 	skb_headroom() will return how much space is left at the top
83 	of the buffer (before the main data).  Remember, this headroom
84 	space must be reserved before the skb_put() function is called.
85 	*/
86 
87 /*
88    This version of net/ipv4/ipip.c is cloned of net/ipv4/ip_gre.c
89 
90    For comments look at net/ipv4/ip_gre.c --ANK
91  */
92 
93 
94 #include <linux/capability.h>
95 #include <linux/module.h>
96 #include <linux/types.h>
97 #include <linux/kernel.h>
98 #include <asm/uaccess.h>
99 #include <linux/skbuff.h>
100 #include <linux/netdevice.h>
101 #include <linux/in.h>
102 #include <linux/tcp.h>
103 #include <linux/udp.h>
104 #include <linux/if_arp.h>
105 #include <linux/mroute.h>
106 #include <linux/init.h>
107 #include <linux/netfilter_ipv4.h>
108 #include <linux/if_ether.h>
109 
110 #include <net/sock.h>
111 #include <net/ip.h>
112 #include <net/icmp.h>
113 #include <net/ipip.h>
114 #include <net/inet_ecn.h>
115 #include <net/xfrm.h>
116 #include <net/net_namespace.h>
117 #include <net/netns/generic.h>
118 
119 #define HASH_SIZE  16
120 #define HASH(addr) (((__force u32)addr^((__force u32)addr>>4))&0xF)
121 
122 static int ipip_net_id __read_mostly;
123 struct ipip_net {
124 	struct ip_tunnel *tunnels_r_l[HASH_SIZE];
125 	struct ip_tunnel *tunnels_r[HASH_SIZE];
126 	struct ip_tunnel *tunnels_l[HASH_SIZE];
127 	struct ip_tunnel *tunnels_wc[1];
128 	struct ip_tunnel **tunnels[4];
129 
130 	struct net_device *fb_tunnel_dev;
131 };
132 
133 static void ipip_tunnel_init(struct net_device *dev);
134 static void ipip_tunnel_setup(struct net_device *dev);
135 
136 /*
137  * Locking : hash tables are protected by RCU and a spinlock
138  */
139 static DEFINE_SPINLOCK(ipip_lock);
140 
141 #define for_each_ip_tunnel_rcu(start) \
142 	for (t = rcu_dereference(start); t; t = rcu_dereference(t->next))
143 
144 static struct ip_tunnel * ipip_tunnel_lookup(struct net *net,
145 		__be32 remote, __be32 local)
146 {
147 	unsigned h0 = HASH(remote);
148 	unsigned h1 = HASH(local);
149 	struct ip_tunnel *t;
150 	struct ipip_net *ipn = net_generic(net, ipip_net_id);
151 
152 	for_each_ip_tunnel_rcu(ipn->tunnels_r_l[h0 ^ h1])
153 		if (local == t->parms.iph.saddr &&
154 		    remote == t->parms.iph.daddr && (t->dev->flags&IFF_UP))
155 			return t;
156 
157 	for_each_ip_tunnel_rcu(ipn->tunnels_r[h0])
158 		if (remote == t->parms.iph.daddr && (t->dev->flags&IFF_UP))
159 			return t;
160 
161 	for_each_ip_tunnel_rcu(ipn->tunnels_l[h1])
162 		if (local == t->parms.iph.saddr && (t->dev->flags&IFF_UP))
163 			return t;
164 
165 	t = rcu_dereference(ipn->tunnels_wc[0]);
166 	if (t && (t->dev->flags&IFF_UP))
167 		return t;
168 	return NULL;
169 }
170 
171 static struct ip_tunnel **__ipip_bucket(struct ipip_net *ipn,
172 		struct ip_tunnel_parm *parms)
173 {
174 	__be32 remote = parms->iph.daddr;
175 	__be32 local = parms->iph.saddr;
176 	unsigned h = 0;
177 	int prio = 0;
178 
179 	if (remote) {
180 		prio |= 2;
181 		h ^= HASH(remote);
182 	}
183 	if (local) {
184 		prio |= 1;
185 		h ^= HASH(local);
186 	}
187 	return &ipn->tunnels[prio][h];
188 }
189 
190 static inline struct ip_tunnel **ipip_bucket(struct ipip_net *ipn,
191 		struct ip_tunnel *t)
192 {
193 	return __ipip_bucket(ipn, &t->parms);
194 }
195 
196 static void ipip_tunnel_unlink(struct ipip_net *ipn, struct ip_tunnel *t)
197 {
198 	struct ip_tunnel **tp;
199 
200 	for (tp = ipip_bucket(ipn, t); *tp; tp = &(*tp)->next) {
201 		if (t == *tp) {
202 			spin_lock_bh(&ipip_lock);
203 			*tp = t->next;
204 			spin_unlock_bh(&ipip_lock);
205 			break;
206 		}
207 	}
208 }
209 
210 static void ipip_tunnel_link(struct ipip_net *ipn, struct ip_tunnel *t)
211 {
212 	struct ip_tunnel **tp = ipip_bucket(ipn, t);
213 
214 	spin_lock_bh(&ipip_lock);
215 	t->next = *tp;
216 	rcu_assign_pointer(*tp, t);
217 	spin_unlock_bh(&ipip_lock);
218 }
219 
220 static struct ip_tunnel * ipip_tunnel_locate(struct net *net,
221 		struct ip_tunnel_parm *parms, int create)
222 {
223 	__be32 remote = parms->iph.daddr;
224 	__be32 local = parms->iph.saddr;
225 	struct ip_tunnel *t, **tp, *nt;
226 	struct net_device *dev;
227 	char name[IFNAMSIZ];
228 	struct ipip_net *ipn = net_generic(net, ipip_net_id);
229 
230 	for (tp = __ipip_bucket(ipn, parms); (t = *tp) != NULL; tp = &t->next) {
231 		if (local == t->parms.iph.saddr && remote == t->parms.iph.daddr)
232 			return t;
233 	}
234 	if (!create)
235 		return NULL;
236 
237 	if (parms->name[0])
238 		strlcpy(name, parms->name, IFNAMSIZ);
239 	else
240 		sprintf(name, "tunl%%d");
241 
242 	dev = alloc_netdev(sizeof(*t), name, ipip_tunnel_setup);
243 	if (dev == NULL)
244 		return NULL;
245 
246 	dev_net_set(dev, net);
247 
248 	if (strchr(name, '%')) {
249 		if (dev_alloc_name(dev, name) < 0)
250 			goto failed_free;
251 	}
252 
253 	nt = netdev_priv(dev);
254 	nt->parms = *parms;
255 
256 	ipip_tunnel_init(dev);
257 
258 	if (register_netdevice(dev) < 0)
259 		goto failed_free;
260 
261 	dev_hold(dev);
262 	ipip_tunnel_link(ipn, nt);
263 	return nt;
264 
265 failed_free:
266 	free_netdev(dev);
267 	return NULL;
268 }
269 
270 static void ipip_tunnel_uninit(struct net_device *dev)
271 {
272 	struct net *net = dev_net(dev);
273 	struct ipip_net *ipn = net_generic(net, ipip_net_id);
274 
275 	if (dev == ipn->fb_tunnel_dev) {
276 		spin_lock_bh(&ipip_lock);
277 		ipn->tunnels_wc[0] = NULL;
278 		spin_unlock_bh(&ipip_lock);
279 	} else
280 		ipip_tunnel_unlink(ipn, netdev_priv(dev));
281 	dev_put(dev);
282 }
283 
284 static int ipip_err(struct sk_buff *skb, u32 info)
285 {
286 
287 /* All the routers (except for Linux) return only
288    8 bytes of packet payload. It means, that precise relaying of
289    ICMP in the real Internet is absolutely infeasible.
290  */
291 	struct iphdr *iph = (struct iphdr *)skb->data;
292 	const int type = icmp_hdr(skb)->type;
293 	const int code = icmp_hdr(skb)->code;
294 	struct ip_tunnel *t;
295 	int err;
296 
297 	switch (type) {
298 	default:
299 	case ICMP_PARAMETERPROB:
300 		return 0;
301 
302 	case ICMP_DEST_UNREACH:
303 		switch (code) {
304 		case ICMP_SR_FAILED:
305 		case ICMP_PORT_UNREACH:
306 			/* Impossible event. */
307 			return 0;
308 		case ICMP_FRAG_NEEDED:
309 			/* Soft state for pmtu is maintained by IP core. */
310 			return 0;
311 		default:
312 			/* All others are translated to HOST_UNREACH.
313 			   rfc2003 contains "deep thoughts" about NET_UNREACH,
314 			   I believe they are just ether pollution. --ANK
315 			 */
316 			break;
317 		}
318 		break;
319 	case ICMP_TIME_EXCEEDED:
320 		if (code != ICMP_EXC_TTL)
321 			return 0;
322 		break;
323 	}
324 
325 	err = -ENOENT;
326 
327 	rcu_read_lock();
328 	t = ipip_tunnel_lookup(dev_net(skb->dev), iph->daddr, iph->saddr);
329 	if (t == NULL || t->parms.iph.daddr == 0)
330 		goto out;
331 
332 	err = 0;
333 	if (t->parms.iph.ttl == 0 && type == ICMP_TIME_EXCEEDED)
334 		goto out;
335 
336 	if (time_before(jiffies, t->err_time + IPTUNNEL_ERR_TIMEO))
337 		t->err_count++;
338 	else
339 		t->err_count = 1;
340 	t->err_time = jiffies;
341 out:
342 	rcu_read_unlock();
343 	return err;
344 }
345 
346 static inline void ipip_ecn_decapsulate(const struct iphdr *outer_iph,
347 					struct sk_buff *skb)
348 {
349 	struct iphdr *inner_iph = ip_hdr(skb);
350 
351 	if (INET_ECN_is_ce(outer_iph->tos))
352 		IP_ECN_set_ce(inner_iph);
353 }
354 
355 static int ipip_rcv(struct sk_buff *skb)
356 {
357 	struct ip_tunnel *tunnel;
358 	const struct iphdr *iph = ip_hdr(skb);
359 
360 	rcu_read_lock();
361 	if ((tunnel = ipip_tunnel_lookup(dev_net(skb->dev),
362 					iph->saddr, iph->daddr)) != NULL) {
363 		if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) {
364 			rcu_read_unlock();
365 			kfree_skb(skb);
366 			return 0;
367 		}
368 
369 		secpath_reset(skb);
370 
371 		skb->mac_header = skb->network_header;
372 		skb_reset_network_header(skb);
373 		skb->protocol = htons(ETH_P_IP);
374 		skb->pkt_type = PACKET_HOST;
375 
376 		tunnel->dev->stats.rx_packets++;
377 		tunnel->dev->stats.rx_bytes += skb->len;
378 		skb->dev = tunnel->dev;
379 		skb_dst_drop(skb);
380 		nf_reset(skb);
381 		ipip_ecn_decapsulate(iph, skb);
382 		netif_rx(skb);
383 		rcu_read_unlock();
384 		return 0;
385 	}
386 	rcu_read_unlock();
387 
388 	return -1;
389 }
390 
391 /*
392  *	This function assumes it is being called from dev_queue_xmit()
393  *	and that skb is filled properly by that function.
394  */
395 
396 static netdev_tx_t ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
397 {
398 	struct ip_tunnel *tunnel = netdev_priv(dev);
399 	struct net_device_stats *stats = &dev->stats;
400 	struct netdev_queue *txq = netdev_get_tx_queue(dev, 0);
401 	struct iphdr  *tiph = &tunnel->parms.iph;
402 	u8     tos = tunnel->parms.iph.tos;
403 	__be16 df = tiph->frag_off;
404 	struct rtable *rt;     			/* Route to the other host */
405 	struct net_device *tdev;			/* Device to other host */
406 	struct iphdr  *old_iph = ip_hdr(skb);
407 	struct iphdr  *iph;			/* Our new IP header */
408 	unsigned int max_headroom;		/* The extra header space needed */
409 	__be32 dst = tiph->daddr;
410 	int    mtu;
411 
412 	if (skb->protocol != htons(ETH_P_IP))
413 		goto tx_error;
414 
415 	if (tos&1)
416 		tos = old_iph->tos;
417 
418 	if (!dst) {
419 		/* NBMA tunnel */
420 		if ((rt = skb_rtable(skb)) == NULL) {
421 			stats->tx_fifo_errors++;
422 			goto tx_error;
423 		}
424 		if ((dst = rt->rt_gateway) == 0)
425 			goto tx_error_icmp;
426 	}
427 
428 	{
429 		struct flowi fl = { .oif = tunnel->parms.link,
430 				    .nl_u = { .ip4_u =
431 					      { .daddr = dst,
432 						.saddr = tiph->saddr,
433 						.tos = RT_TOS(tos) } },
434 				    .proto = IPPROTO_IPIP };
435 		if (ip_route_output_key(dev_net(dev), &rt, &fl)) {
436 			stats->tx_carrier_errors++;
437 			goto tx_error_icmp;
438 		}
439 	}
440 	tdev = rt->u.dst.dev;
441 
442 	if (tdev == dev) {
443 		ip_rt_put(rt);
444 		stats->collisions++;
445 		goto tx_error;
446 	}
447 
448 	df |= old_iph->frag_off & htons(IP_DF);
449 
450 	if (df) {
451 		mtu = dst_mtu(&rt->u.dst) - sizeof(struct iphdr);
452 
453 		if (mtu < 68) {
454 			stats->collisions++;
455 			ip_rt_put(rt);
456 			goto tx_error;
457 		}
458 
459 		if (skb_dst(skb))
460 			skb_dst(skb)->ops->update_pmtu(skb_dst(skb), mtu);
461 
462 		if ((old_iph->frag_off & htons(IP_DF)) &&
463 		    mtu < ntohs(old_iph->tot_len)) {
464 			icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED,
465 				  htonl(mtu));
466 			ip_rt_put(rt);
467 			goto tx_error;
468 		}
469 	}
470 
471 	if (tunnel->err_count > 0) {
472 		if (time_before(jiffies,
473 				tunnel->err_time + IPTUNNEL_ERR_TIMEO)) {
474 			tunnel->err_count--;
475 			dst_link_failure(skb);
476 		} else
477 			tunnel->err_count = 0;
478 	}
479 
480 	/*
481 	 * Okay, now see if we can stuff it in the buffer as-is.
482 	 */
483 	max_headroom = (LL_RESERVED_SPACE(tdev)+sizeof(struct iphdr));
484 
485 	if (skb_headroom(skb) < max_headroom || skb_shared(skb) ||
486 	    (skb_cloned(skb) && !skb_clone_writable(skb, 0))) {
487 		struct sk_buff *new_skb = skb_realloc_headroom(skb, max_headroom);
488 		if (!new_skb) {
489 			ip_rt_put(rt);
490 			txq->tx_dropped++;
491 			dev_kfree_skb(skb);
492 			return NETDEV_TX_OK;
493 		}
494 		if (skb->sk)
495 			skb_set_owner_w(new_skb, skb->sk);
496 		dev_kfree_skb(skb);
497 		skb = new_skb;
498 		old_iph = ip_hdr(skb);
499 	}
500 
501 	skb->transport_header = skb->network_header;
502 	skb_push(skb, sizeof(struct iphdr));
503 	skb_reset_network_header(skb);
504 	memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
505 	IPCB(skb)->flags &= ~(IPSKB_XFRM_TUNNEL_SIZE | IPSKB_XFRM_TRANSFORMED |
506 			      IPSKB_REROUTED);
507 	skb_dst_drop(skb);
508 	skb_dst_set(skb, &rt->u.dst);
509 
510 	/*
511 	 *	Push down and install the IPIP header.
512 	 */
513 
514 	iph 			=	ip_hdr(skb);
515 	iph->version		=	4;
516 	iph->ihl		=	sizeof(struct iphdr)>>2;
517 	iph->frag_off		=	df;
518 	iph->protocol		=	IPPROTO_IPIP;
519 	iph->tos		=	INET_ECN_encapsulate(tos, old_iph->tos);
520 	iph->daddr		=	rt->rt_dst;
521 	iph->saddr		=	rt->rt_src;
522 
523 	if ((iph->ttl = tiph->ttl) == 0)
524 		iph->ttl	=	old_iph->ttl;
525 
526 	nf_reset(skb);
527 
528 	IPTUNNEL_XMIT();
529 	return NETDEV_TX_OK;
530 
531 tx_error_icmp:
532 	dst_link_failure(skb);
533 tx_error:
534 	stats->tx_errors++;
535 	dev_kfree_skb(skb);
536 	return NETDEV_TX_OK;
537 }
538 
539 static void ipip_tunnel_bind_dev(struct net_device *dev)
540 {
541 	struct net_device *tdev = NULL;
542 	struct ip_tunnel *tunnel;
543 	struct iphdr *iph;
544 
545 	tunnel = netdev_priv(dev);
546 	iph = &tunnel->parms.iph;
547 
548 	if (iph->daddr) {
549 		struct flowi fl = { .oif = tunnel->parms.link,
550 				    .nl_u = { .ip4_u =
551 					      { .daddr = iph->daddr,
552 						.saddr = iph->saddr,
553 						.tos = RT_TOS(iph->tos) } },
554 				    .proto = IPPROTO_IPIP };
555 		struct rtable *rt;
556 		if (!ip_route_output_key(dev_net(dev), &rt, &fl)) {
557 			tdev = rt->u.dst.dev;
558 			ip_rt_put(rt);
559 		}
560 		dev->flags |= IFF_POINTOPOINT;
561 	}
562 
563 	if (!tdev && tunnel->parms.link)
564 		tdev = __dev_get_by_index(dev_net(dev), tunnel->parms.link);
565 
566 	if (tdev) {
567 		dev->hard_header_len = tdev->hard_header_len + sizeof(struct iphdr);
568 		dev->mtu = tdev->mtu - sizeof(struct iphdr);
569 	}
570 	dev->iflink = tunnel->parms.link;
571 }
572 
573 static int
574 ipip_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd)
575 {
576 	int err = 0;
577 	struct ip_tunnel_parm p;
578 	struct ip_tunnel *t;
579 	struct net *net = dev_net(dev);
580 	struct ipip_net *ipn = net_generic(net, ipip_net_id);
581 
582 	switch (cmd) {
583 	case SIOCGETTUNNEL:
584 		t = NULL;
585 		if (dev == ipn->fb_tunnel_dev) {
586 			if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) {
587 				err = -EFAULT;
588 				break;
589 			}
590 			t = ipip_tunnel_locate(net, &p, 0);
591 		}
592 		if (t == NULL)
593 			t = netdev_priv(dev);
594 		memcpy(&p, &t->parms, sizeof(p));
595 		if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p)))
596 			err = -EFAULT;
597 		break;
598 
599 	case SIOCADDTUNNEL:
600 	case SIOCCHGTUNNEL:
601 		err = -EPERM;
602 		if (!capable(CAP_NET_ADMIN))
603 			goto done;
604 
605 		err = -EFAULT;
606 		if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
607 			goto done;
608 
609 		err = -EINVAL;
610 		if (p.iph.version != 4 || p.iph.protocol != IPPROTO_IPIP ||
611 		    p.iph.ihl != 5 || (p.iph.frag_off&htons(~IP_DF)))
612 			goto done;
613 		if (p.iph.ttl)
614 			p.iph.frag_off |= htons(IP_DF);
615 
616 		t = ipip_tunnel_locate(net, &p, cmd == SIOCADDTUNNEL);
617 
618 		if (dev != ipn->fb_tunnel_dev && cmd == SIOCCHGTUNNEL) {
619 			if (t != NULL) {
620 				if (t->dev != dev) {
621 					err = -EEXIST;
622 					break;
623 				}
624 			} else {
625 				if (((dev->flags&IFF_POINTOPOINT) && !p.iph.daddr) ||
626 				    (!(dev->flags&IFF_POINTOPOINT) && p.iph.daddr)) {
627 					err = -EINVAL;
628 					break;
629 				}
630 				t = netdev_priv(dev);
631 				ipip_tunnel_unlink(ipn, t);
632 				t->parms.iph.saddr = p.iph.saddr;
633 				t->parms.iph.daddr = p.iph.daddr;
634 				memcpy(dev->dev_addr, &p.iph.saddr, 4);
635 				memcpy(dev->broadcast, &p.iph.daddr, 4);
636 				ipip_tunnel_link(ipn, t);
637 				netdev_state_change(dev);
638 			}
639 		}
640 
641 		if (t) {
642 			err = 0;
643 			if (cmd == SIOCCHGTUNNEL) {
644 				t->parms.iph.ttl = p.iph.ttl;
645 				t->parms.iph.tos = p.iph.tos;
646 				t->parms.iph.frag_off = p.iph.frag_off;
647 				if (t->parms.link != p.link) {
648 					t->parms.link = p.link;
649 					ipip_tunnel_bind_dev(dev);
650 					netdev_state_change(dev);
651 				}
652 			}
653 			if (copy_to_user(ifr->ifr_ifru.ifru_data, &t->parms, sizeof(p)))
654 				err = -EFAULT;
655 		} else
656 			err = (cmd == SIOCADDTUNNEL ? -ENOBUFS : -ENOENT);
657 		break;
658 
659 	case SIOCDELTUNNEL:
660 		err = -EPERM;
661 		if (!capable(CAP_NET_ADMIN))
662 			goto done;
663 
664 		if (dev == ipn->fb_tunnel_dev) {
665 			err = -EFAULT;
666 			if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
667 				goto done;
668 			err = -ENOENT;
669 			if ((t = ipip_tunnel_locate(net, &p, 0)) == NULL)
670 				goto done;
671 			err = -EPERM;
672 			if (t->dev == ipn->fb_tunnel_dev)
673 				goto done;
674 			dev = t->dev;
675 		}
676 		unregister_netdevice(dev);
677 		err = 0;
678 		break;
679 
680 	default:
681 		err = -EINVAL;
682 	}
683 
684 done:
685 	return err;
686 }
687 
688 static int ipip_tunnel_change_mtu(struct net_device *dev, int new_mtu)
689 {
690 	if (new_mtu < 68 || new_mtu > 0xFFF8 - sizeof(struct iphdr))
691 		return -EINVAL;
692 	dev->mtu = new_mtu;
693 	return 0;
694 }
695 
696 static const struct net_device_ops ipip_netdev_ops = {
697 	.ndo_uninit	= ipip_tunnel_uninit,
698 	.ndo_start_xmit	= ipip_tunnel_xmit,
699 	.ndo_do_ioctl	= ipip_tunnel_ioctl,
700 	.ndo_change_mtu	= ipip_tunnel_change_mtu,
701 
702 };
703 
704 static void ipip_tunnel_setup(struct net_device *dev)
705 {
706 	dev->netdev_ops		= &ipip_netdev_ops;
707 	dev->destructor		= free_netdev;
708 
709 	dev->type		= ARPHRD_TUNNEL;
710 	dev->hard_header_len 	= LL_MAX_HEADER + sizeof(struct iphdr);
711 	dev->mtu		= ETH_DATA_LEN - sizeof(struct iphdr);
712 	dev->flags		= IFF_NOARP;
713 	dev->iflink		= 0;
714 	dev->addr_len		= 4;
715 	dev->features		|= NETIF_F_NETNS_LOCAL;
716 	dev->priv_flags		&= ~IFF_XMIT_DST_RELEASE;
717 }
718 
719 static void ipip_tunnel_init(struct net_device *dev)
720 {
721 	struct ip_tunnel *tunnel = netdev_priv(dev);
722 
723 	tunnel->dev = dev;
724 	strcpy(tunnel->parms.name, dev->name);
725 
726 	memcpy(dev->dev_addr, &tunnel->parms.iph.saddr, 4);
727 	memcpy(dev->broadcast, &tunnel->parms.iph.daddr, 4);
728 
729 	ipip_tunnel_bind_dev(dev);
730 }
731 
732 static void __net_init ipip_fb_tunnel_init(struct net_device *dev)
733 {
734 	struct ip_tunnel *tunnel = netdev_priv(dev);
735 	struct iphdr *iph = &tunnel->parms.iph;
736 	struct ipip_net *ipn = net_generic(dev_net(dev), ipip_net_id);
737 
738 	tunnel->dev = dev;
739 	strcpy(tunnel->parms.name, dev->name);
740 
741 	iph->version		= 4;
742 	iph->protocol		= IPPROTO_IPIP;
743 	iph->ihl		= 5;
744 
745 	dev_hold(dev);
746 	ipn->tunnels_wc[0]	= tunnel;
747 }
748 
749 static struct xfrm_tunnel ipip_handler = {
750 	.handler	=	ipip_rcv,
751 	.err_handler	=	ipip_err,
752 	.priority	=	1,
753 };
754 
755 static const char banner[] __initconst =
756 	KERN_INFO "IPv4 over IPv4 tunneling driver\n";
757 
758 static void ipip_destroy_tunnels(struct ipip_net *ipn, struct list_head *head)
759 {
760 	int prio;
761 
762 	for (prio = 1; prio < 4; prio++) {
763 		int h;
764 		for (h = 0; h < HASH_SIZE; h++) {
765 			struct ip_tunnel *t = ipn->tunnels[prio][h];
766 
767 			while (t != NULL) {
768 				unregister_netdevice_queue(t->dev, head);
769 				t = t->next;
770 			}
771 		}
772 	}
773 }
774 
775 static int __net_init ipip_init_net(struct net *net)
776 {
777 	struct ipip_net *ipn = net_generic(net, ipip_net_id);
778 	int err;
779 
780 	ipn->tunnels[0] = ipn->tunnels_wc;
781 	ipn->tunnels[1] = ipn->tunnels_l;
782 	ipn->tunnels[2] = ipn->tunnels_r;
783 	ipn->tunnels[3] = ipn->tunnels_r_l;
784 
785 	ipn->fb_tunnel_dev = alloc_netdev(sizeof(struct ip_tunnel),
786 					   "tunl0",
787 					   ipip_tunnel_setup);
788 	if (!ipn->fb_tunnel_dev) {
789 		err = -ENOMEM;
790 		goto err_alloc_dev;
791 	}
792 	dev_net_set(ipn->fb_tunnel_dev, net);
793 
794 	ipip_fb_tunnel_init(ipn->fb_tunnel_dev);
795 
796 	if ((err = register_netdev(ipn->fb_tunnel_dev)))
797 		goto err_reg_dev;
798 
799 	return 0;
800 
801 err_reg_dev:
802 	free_netdev(ipn->fb_tunnel_dev);
803 err_alloc_dev:
804 	/* nothing */
805 	return err;
806 }
807 
808 static void __net_exit ipip_exit_net(struct net *net)
809 {
810 	struct ipip_net *ipn = net_generic(net, ipip_net_id);
811 	LIST_HEAD(list);
812 
813 	rtnl_lock();
814 	ipip_destroy_tunnels(ipn, &list);
815 	unregister_netdevice_queue(ipn->fb_tunnel_dev, &list);
816 	unregister_netdevice_many(&list);
817 	rtnl_unlock();
818 }
819 
820 static struct pernet_operations ipip_net_ops = {
821 	.init = ipip_init_net,
822 	.exit = ipip_exit_net,
823 	.id   = &ipip_net_id,
824 	.size = sizeof(struct ipip_net),
825 };
826 
827 static int __init ipip_init(void)
828 {
829 	int err;
830 
831 	printk(banner);
832 
833 	if (xfrm4_tunnel_register(&ipip_handler, AF_INET)) {
834 		printk(KERN_INFO "ipip init: can't register tunnel\n");
835 		return -EAGAIN;
836 	}
837 
838 	err = register_pernet_device(&ipip_net_ops);
839 	if (err)
840 		xfrm4_tunnel_deregister(&ipip_handler, AF_INET);
841 
842 	return err;
843 }
844 
845 static void __exit ipip_fini(void)
846 {
847 	if (xfrm4_tunnel_deregister(&ipip_handler, AF_INET))
848 		printk(KERN_INFO "ipip close: can't deregister tunnel\n");
849 
850 	unregister_pernet_device(&ipip_net_ops);
851 }
852 
853 module_init(ipip_init);
854 module_exit(ipip_fini);
855 MODULE_LICENSE("GPL");
856