xref: /openbmc/linux/net/ipv4/ipip.c (revision 9d56dd3b083a3bec56e9da35ce07baca81030b03)
1 /*
2  *	Linux NET3:	IP/IP protocol decoder.
3  *
4  *	Authors:
5  *		Sam Lantinga (slouken@cs.ucdavis.edu)  02/01/95
6  *
7  *	Fixes:
8  *		Alan Cox	:	Merged and made usable non modular (its so tiny its silly as
9  *					a module taking up 2 pages).
10  *		Alan Cox	: 	Fixed bug with 1.3.18 and IPIP not working (now needs to set skb->h.iph)
11  *					to keep ip_forward happy.
12  *		Alan Cox	:	More fixes for 1.3.21, and firewall fix. Maybe this will work soon 8).
13  *		Kai Schulte	:	Fixed #defines for IP_FIREWALL->FIREWALL
14  *              David Woodhouse :       Perform some basic ICMP handling.
15  *                                      IPIP Routing without decapsulation.
16  *              Carlos Picoto   :       GRE over IP support
17  *		Alexey Kuznetsov:	Reworked. Really, now it is truncated version of ipv4/ip_gre.c.
18  *					I do not want to merge them together.
19  *
20  *	This program is free software; you can redistribute it and/or
21  *	modify it under the terms of the GNU General Public License
22  *	as published by the Free Software Foundation; either version
23  *	2 of the License, or (at your option) any later version.
24  *
25  */
26 
27 /* tunnel.c: an IP tunnel driver
28 
29 	The purpose of this driver is to provide an IP tunnel through
30 	which you can tunnel network traffic transparently across subnets.
31 
32 	This was written by looking at Nick Holloway's dummy driver
33 	Thanks for the great code!
34 
35 		-Sam Lantinga	(slouken@cs.ucdavis.edu)  02/01/95
36 
37 	Minor tweaks:
38 		Cleaned up the code a little and added some pre-1.3.0 tweaks.
39 		dev->hard_header/hard_header_len changed to use no headers.
40 		Comments/bracketing tweaked.
41 		Made the tunnels use dev->name not tunnel: when error reporting.
42 		Added tx_dropped stat
43 
44 		-Alan Cox	(alan@lxorguk.ukuu.org.uk) 21 March 95
45 
46 	Reworked:
47 		Changed to tunnel to destination gateway in addition to the
48 			tunnel's pointopoint address
49 		Almost completely rewritten
50 		Note:  There is currently no firewall or ICMP handling done.
51 
52 		-Sam Lantinga	(slouken@cs.ucdavis.edu) 02/13/96
53 
54 */
55 
56 /* Things I wish I had known when writing the tunnel driver:
57 
58 	When the tunnel_xmit() function is called, the skb contains the
59 	packet to be sent (plus a great deal of extra info), and dev
60 	contains the tunnel device that _we_ are.
61 
62 	When we are passed a packet, we are expected to fill in the
63 	source address with our source IP address.
64 
65 	What is the proper way to allocate, copy and free a buffer?
66 	After you allocate it, it is a "0 length" chunk of memory
67 	starting at zero.  If you want to add headers to the buffer
68 	later, you'll have to call "skb_reserve(skb, amount)" with
69 	the amount of memory you want reserved.  Then, you call
70 	"skb_put(skb, amount)" with the amount of space you want in
71 	the buffer.  skb_put() returns a pointer to the top (#0) of
72 	that buffer.  skb->len is set to the amount of space you have
73 	"allocated" with skb_put().  You can then write up to skb->len
74 	bytes to that buffer.  If you need more, you can call skb_put()
75 	again with the additional amount of space you need.  You can
76 	find out how much more space you can allocate by calling
77 	"skb_tailroom(skb)".
78 	Now, to add header space, call "skb_push(skb, header_len)".
79 	This creates space at the beginning of the buffer and returns
80 	a pointer to this new space.  If later you need to strip a
81 	header from a buffer, call "skb_pull(skb, header_len)".
82 	skb_headroom() will return how much space is left at the top
83 	of the buffer (before the main data).  Remember, this headroom
84 	space must be reserved before the skb_put() function is called.
85 	*/
86 
87 /*
88    This version of net/ipv4/ipip.c is cloned of net/ipv4/ip_gre.c
89 
90    For comments look at net/ipv4/ip_gre.c --ANK
91  */
92 
93 
94 #include <linux/capability.h>
95 #include <linux/module.h>
96 #include <linux/types.h>
97 #include <linux/kernel.h>
98 #include <asm/uaccess.h>
99 #include <linux/skbuff.h>
100 #include <linux/netdevice.h>
101 #include <linux/in.h>
102 #include <linux/tcp.h>
103 #include <linux/udp.h>
104 #include <linux/if_arp.h>
105 #include <linux/mroute.h>
106 #include <linux/init.h>
107 #include <linux/netfilter_ipv4.h>
108 #include <linux/if_ether.h>
109 
110 #include <net/sock.h>
111 #include <net/ip.h>
112 #include <net/icmp.h>
113 #include <net/ipip.h>
114 #include <net/inet_ecn.h>
115 #include <net/xfrm.h>
116 #include <net/net_namespace.h>
117 #include <net/netns/generic.h>
118 
119 #define HASH_SIZE  16
120 #define HASH(addr) (((__force u32)addr^((__force u32)addr>>4))&0xF)
121 
122 static int ipip_net_id __read_mostly;
123 struct ipip_net {
124 	struct ip_tunnel *tunnels_r_l[HASH_SIZE];
125 	struct ip_tunnel *tunnels_r[HASH_SIZE];
126 	struct ip_tunnel *tunnels_l[HASH_SIZE];
127 	struct ip_tunnel *tunnels_wc[1];
128 	struct ip_tunnel **tunnels[4];
129 
130 	struct net_device *fb_tunnel_dev;
131 };
132 
133 static void ipip_fb_tunnel_init(struct net_device *dev);
134 static void ipip_tunnel_init(struct net_device *dev);
135 static void ipip_tunnel_setup(struct net_device *dev);
136 
137 /*
138  * Locking : hash tables are protected by RCU and a spinlock
139  */
140 static DEFINE_SPINLOCK(ipip_lock);
141 
142 #define for_each_ip_tunnel_rcu(start) \
143 	for (t = rcu_dereference(start); t; t = rcu_dereference(t->next))
144 
145 static struct ip_tunnel * ipip_tunnel_lookup(struct net *net,
146 		__be32 remote, __be32 local)
147 {
148 	unsigned h0 = HASH(remote);
149 	unsigned h1 = HASH(local);
150 	struct ip_tunnel *t;
151 	struct ipip_net *ipn = net_generic(net, ipip_net_id);
152 
153 	for_each_ip_tunnel_rcu(ipn->tunnels_r_l[h0 ^ h1])
154 		if (local == t->parms.iph.saddr &&
155 		    remote == t->parms.iph.daddr && (t->dev->flags&IFF_UP))
156 			return t;
157 
158 	for_each_ip_tunnel_rcu(ipn->tunnels_r[h0])
159 		if (remote == t->parms.iph.daddr && (t->dev->flags&IFF_UP))
160 			return t;
161 
162 	for_each_ip_tunnel_rcu(ipn->tunnels_l[h1])
163 		if (local == t->parms.iph.saddr && (t->dev->flags&IFF_UP))
164 			return t;
165 
166 	t = rcu_dereference(ipn->tunnels_wc[0]);
167 	if (t && (t->dev->flags&IFF_UP))
168 		return t;
169 	return NULL;
170 }
171 
172 static struct ip_tunnel **__ipip_bucket(struct ipip_net *ipn,
173 		struct ip_tunnel_parm *parms)
174 {
175 	__be32 remote = parms->iph.daddr;
176 	__be32 local = parms->iph.saddr;
177 	unsigned h = 0;
178 	int prio = 0;
179 
180 	if (remote) {
181 		prio |= 2;
182 		h ^= HASH(remote);
183 	}
184 	if (local) {
185 		prio |= 1;
186 		h ^= HASH(local);
187 	}
188 	return &ipn->tunnels[prio][h];
189 }
190 
191 static inline struct ip_tunnel **ipip_bucket(struct ipip_net *ipn,
192 		struct ip_tunnel *t)
193 {
194 	return __ipip_bucket(ipn, &t->parms);
195 }
196 
197 static void ipip_tunnel_unlink(struct ipip_net *ipn, struct ip_tunnel *t)
198 {
199 	struct ip_tunnel **tp;
200 
201 	for (tp = ipip_bucket(ipn, t); *tp; tp = &(*tp)->next) {
202 		if (t == *tp) {
203 			spin_lock_bh(&ipip_lock);
204 			*tp = t->next;
205 			spin_unlock_bh(&ipip_lock);
206 			break;
207 		}
208 	}
209 }
210 
211 static void ipip_tunnel_link(struct ipip_net *ipn, struct ip_tunnel *t)
212 {
213 	struct ip_tunnel **tp = ipip_bucket(ipn, t);
214 
215 	spin_lock_bh(&ipip_lock);
216 	t->next = *tp;
217 	rcu_assign_pointer(*tp, t);
218 	spin_unlock_bh(&ipip_lock);
219 }
220 
221 static struct ip_tunnel * ipip_tunnel_locate(struct net *net,
222 		struct ip_tunnel_parm *parms, int create)
223 {
224 	__be32 remote = parms->iph.daddr;
225 	__be32 local = parms->iph.saddr;
226 	struct ip_tunnel *t, **tp, *nt;
227 	struct net_device *dev;
228 	char name[IFNAMSIZ];
229 	struct ipip_net *ipn = net_generic(net, ipip_net_id);
230 
231 	for (tp = __ipip_bucket(ipn, parms); (t = *tp) != NULL; tp = &t->next) {
232 		if (local == t->parms.iph.saddr && remote == t->parms.iph.daddr)
233 			return t;
234 	}
235 	if (!create)
236 		return NULL;
237 
238 	if (parms->name[0])
239 		strlcpy(name, parms->name, IFNAMSIZ);
240 	else
241 		sprintf(name, "tunl%%d");
242 
243 	dev = alloc_netdev(sizeof(*t), name, ipip_tunnel_setup);
244 	if (dev == NULL)
245 		return NULL;
246 
247 	dev_net_set(dev, net);
248 
249 	if (strchr(name, '%')) {
250 		if (dev_alloc_name(dev, name) < 0)
251 			goto failed_free;
252 	}
253 
254 	nt = netdev_priv(dev);
255 	nt->parms = *parms;
256 
257 	ipip_tunnel_init(dev);
258 
259 	if (register_netdevice(dev) < 0)
260 		goto failed_free;
261 
262 	dev_hold(dev);
263 	ipip_tunnel_link(ipn, nt);
264 	return nt;
265 
266 failed_free:
267 	free_netdev(dev);
268 	return NULL;
269 }
270 
271 static void ipip_tunnel_uninit(struct net_device *dev)
272 {
273 	struct net *net = dev_net(dev);
274 	struct ipip_net *ipn = net_generic(net, ipip_net_id);
275 
276 	if (dev == ipn->fb_tunnel_dev) {
277 		spin_lock_bh(&ipip_lock);
278 		ipn->tunnels_wc[0] = NULL;
279 		spin_unlock_bh(&ipip_lock);
280 	} else
281 		ipip_tunnel_unlink(ipn, netdev_priv(dev));
282 	dev_put(dev);
283 }
284 
285 static int ipip_err(struct sk_buff *skb, u32 info)
286 {
287 
288 /* All the routers (except for Linux) return only
289    8 bytes of packet payload. It means, that precise relaying of
290    ICMP in the real Internet is absolutely infeasible.
291  */
292 	struct iphdr *iph = (struct iphdr *)skb->data;
293 	const int type = icmp_hdr(skb)->type;
294 	const int code = icmp_hdr(skb)->code;
295 	struct ip_tunnel *t;
296 	int err;
297 
298 	switch (type) {
299 	default:
300 	case ICMP_PARAMETERPROB:
301 		return 0;
302 
303 	case ICMP_DEST_UNREACH:
304 		switch (code) {
305 		case ICMP_SR_FAILED:
306 		case ICMP_PORT_UNREACH:
307 			/* Impossible event. */
308 			return 0;
309 		case ICMP_FRAG_NEEDED:
310 			/* Soft state for pmtu is maintained by IP core. */
311 			return 0;
312 		default:
313 			/* All others are translated to HOST_UNREACH.
314 			   rfc2003 contains "deep thoughts" about NET_UNREACH,
315 			   I believe they are just ether pollution. --ANK
316 			 */
317 			break;
318 		}
319 		break;
320 	case ICMP_TIME_EXCEEDED:
321 		if (code != ICMP_EXC_TTL)
322 			return 0;
323 		break;
324 	}
325 
326 	err = -ENOENT;
327 
328 	rcu_read_lock();
329 	t = ipip_tunnel_lookup(dev_net(skb->dev), iph->daddr, iph->saddr);
330 	if (t == NULL || t->parms.iph.daddr == 0)
331 		goto out;
332 
333 	err = 0;
334 	if (t->parms.iph.ttl == 0 && type == ICMP_TIME_EXCEEDED)
335 		goto out;
336 
337 	if (time_before(jiffies, t->err_time + IPTUNNEL_ERR_TIMEO))
338 		t->err_count++;
339 	else
340 		t->err_count = 1;
341 	t->err_time = jiffies;
342 out:
343 	rcu_read_unlock();
344 	return err;
345 }
346 
347 static inline void ipip_ecn_decapsulate(const struct iphdr *outer_iph,
348 					struct sk_buff *skb)
349 {
350 	struct iphdr *inner_iph = ip_hdr(skb);
351 
352 	if (INET_ECN_is_ce(outer_iph->tos))
353 		IP_ECN_set_ce(inner_iph);
354 }
355 
356 static int ipip_rcv(struct sk_buff *skb)
357 {
358 	struct ip_tunnel *tunnel;
359 	const struct iphdr *iph = ip_hdr(skb);
360 
361 	rcu_read_lock();
362 	if ((tunnel = ipip_tunnel_lookup(dev_net(skb->dev),
363 					iph->saddr, iph->daddr)) != NULL) {
364 		if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) {
365 			rcu_read_unlock();
366 			kfree_skb(skb);
367 			return 0;
368 		}
369 
370 		secpath_reset(skb);
371 
372 		skb->mac_header = skb->network_header;
373 		skb_reset_network_header(skb);
374 		skb->protocol = htons(ETH_P_IP);
375 		skb->pkt_type = PACKET_HOST;
376 
377 		tunnel->dev->stats.rx_packets++;
378 		tunnel->dev->stats.rx_bytes += skb->len;
379 		skb->dev = tunnel->dev;
380 		skb_dst_drop(skb);
381 		nf_reset(skb);
382 		ipip_ecn_decapsulate(iph, skb);
383 		netif_rx(skb);
384 		rcu_read_unlock();
385 		return 0;
386 	}
387 	rcu_read_unlock();
388 
389 	return -1;
390 }
391 
392 /*
393  *	This function assumes it is being called from dev_queue_xmit()
394  *	and that skb is filled properly by that function.
395  */
396 
397 static netdev_tx_t ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
398 {
399 	struct ip_tunnel *tunnel = netdev_priv(dev);
400 	struct net_device_stats *stats = &dev->stats;
401 	struct netdev_queue *txq = netdev_get_tx_queue(dev, 0);
402 	struct iphdr  *tiph = &tunnel->parms.iph;
403 	u8     tos = tunnel->parms.iph.tos;
404 	__be16 df = tiph->frag_off;
405 	struct rtable *rt;     			/* Route to the other host */
406 	struct net_device *tdev;			/* Device to other host */
407 	struct iphdr  *old_iph = ip_hdr(skb);
408 	struct iphdr  *iph;			/* Our new IP header */
409 	unsigned int max_headroom;		/* The extra header space needed */
410 	__be32 dst = tiph->daddr;
411 	int    mtu;
412 
413 	if (skb->protocol != htons(ETH_P_IP))
414 		goto tx_error;
415 
416 	if (tos&1)
417 		tos = old_iph->tos;
418 
419 	if (!dst) {
420 		/* NBMA tunnel */
421 		if ((rt = skb_rtable(skb)) == NULL) {
422 			stats->tx_fifo_errors++;
423 			goto tx_error;
424 		}
425 		if ((dst = rt->rt_gateway) == 0)
426 			goto tx_error_icmp;
427 	}
428 
429 	{
430 		struct flowi fl = { .oif = tunnel->parms.link,
431 				    .nl_u = { .ip4_u =
432 					      { .daddr = dst,
433 						.saddr = tiph->saddr,
434 						.tos = RT_TOS(tos) } },
435 				    .proto = IPPROTO_IPIP };
436 		if (ip_route_output_key(dev_net(dev), &rt, &fl)) {
437 			stats->tx_carrier_errors++;
438 			goto tx_error_icmp;
439 		}
440 	}
441 	tdev = rt->u.dst.dev;
442 
443 	if (tdev == dev) {
444 		ip_rt_put(rt);
445 		stats->collisions++;
446 		goto tx_error;
447 	}
448 
449 	df |= old_iph->frag_off & htons(IP_DF);
450 
451 	if (df) {
452 		mtu = dst_mtu(&rt->u.dst) - sizeof(struct iphdr);
453 
454 		if (mtu < 68) {
455 			stats->collisions++;
456 			ip_rt_put(rt);
457 			goto tx_error;
458 		}
459 
460 		if (skb_dst(skb))
461 			skb_dst(skb)->ops->update_pmtu(skb_dst(skb), mtu);
462 
463 		if ((old_iph->frag_off & htons(IP_DF)) &&
464 		    mtu < ntohs(old_iph->tot_len)) {
465 			icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED,
466 				  htonl(mtu));
467 			ip_rt_put(rt);
468 			goto tx_error;
469 		}
470 	}
471 
472 	if (tunnel->err_count > 0) {
473 		if (time_before(jiffies,
474 				tunnel->err_time + IPTUNNEL_ERR_TIMEO)) {
475 			tunnel->err_count--;
476 			dst_link_failure(skb);
477 		} else
478 			tunnel->err_count = 0;
479 	}
480 
481 	/*
482 	 * Okay, now see if we can stuff it in the buffer as-is.
483 	 */
484 	max_headroom = (LL_RESERVED_SPACE(tdev)+sizeof(struct iphdr));
485 
486 	if (skb_headroom(skb) < max_headroom || skb_shared(skb) ||
487 	    (skb_cloned(skb) && !skb_clone_writable(skb, 0))) {
488 		struct sk_buff *new_skb = skb_realloc_headroom(skb, max_headroom);
489 		if (!new_skb) {
490 			ip_rt_put(rt);
491 			txq->tx_dropped++;
492 			dev_kfree_skb(skb);
493 			return NETDEV_TX_OK;
494 		}
495 		if (skb->sk)
496 			skb_set_owner_w(new_skb, skb->sk);
497 		dev_kfree_skb(skb);
498 		skb = new_skb;
499 		old_iph = ip_hdr(skb);
500 	}
501 
502 	skb->transport_header = skb->network_header;
503 	skb_push(skb, sizeof(struct iphdr));
504 	skb_reset_network_header(skb);
505 	memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
506 	IPCB(skb)->flags &= ~(IPSKB_XFRM_TUNNEL_SIZE | IPSKB_XFRM_TRANSFORMED |
507 			      IPSKB_REROUTED);
508 	skb_dst_drop(skb);
509 	skb_dst_set(skb, &rt->u.dst);
510 
511 	/*
512 	 *	Push down and install the IPIP header.
513 	 */
514 
515 	iph 			=	ip_hdr(skb);
516 	iph->version		=	4;
517 	iph->ihl		=	sizeof(struct iphdr)>>2;
518 	iph->frag_off		=	df;
519 	iph->protocol		=	IPPROTO_IPIP;
520 	iph->tos		=	INET_ECN_encapsulate(tos, old_iph->tos);
521 	iph->daddr		=	rt->rt_dst;
522 	iph->saddr		=	rt->rt_src;
523 
524 	if ((iph->ttl = tiph->ttl) == 0)
525 		iph->ttl	=	old_iph->ttl;
526 
527 	nf_reset(skb);
528 
529 	IPTUNNEL_XMIT();
530 	return NETDEV_TX_OK;
531 
532 tx_error_icmp:
533 	dst_link_failure(skb);
534 tx_error:
535 	stats->tx_errors++;
536 	dev_kfree_skb(skb);
537 	return NETDEV_TX_OK;
538 }
539 
540 static void ipip_tunnel_bind_dev(struct net_device *dev)
541 {
542 	struct net_device *tdev = NULL;
543 	struct ip_tunnel *tunnel;
544 	struct iphdr *iph;
545 
546 	tunnel = netdev_priv(dev);
547 	iph = &tunnel->parms.iph;
548 
549 	if (iph->daddr) {
550 		struct flowi fl = { .oif = tunnel->parms.link,
551 				    .nl_u = { .ip4_u =
552 					      { .daddr = iph->daddr,
553 						.saddr = iph->saddr,
554 						.tos = RT_TOS(iph->tos) } },
555 				    .proto = IPPROTO_IPIP };
556 		struct rtable *rt;
557 		if (!ip_route_output_key(dev_net(dev), &rt, &fl)) {
558 			tdev = rt->u.dst.dev;
559 			ip_rt_put(rt);
560 		}
561 		dev->flags |= IFF_POINTOPOINT;
562 	}
563 
564 	if (!tdev && tunnel->parms.link)
565 		tdev = __dev_get_by_index(dev_net(dev), tunnel->parms.link);
566 
567 	if (tdev) {
568 		dev->hard_header_len = tdev->hard_header_len + sizeof(struct iphdr);
569 		dev->mtu = tdev->mtu - sizeof(struct iphdr);
570 	}
571 	dev->iflink = tunnel->parms.link;
572 }
573 
574 static int
575 ipip_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd)
576 {
577 	int err = 0;
578 	struct ip_tunnel_parm p;
579 	struct ip_tunnel *t;
580 	struct net *net = dev_net(dev);
581 	struct ipip_net *ipn = net_generic(net, ipip_net_id);
582 
583 	switch (cmd) {
584 	case SIOCGETTUNNEL:
585 		t = NULL;
586 		if (dev == ipn->fb_tunnel_dev) {
587 			if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) {
588 				err = -EFAULT;
589 				break;
590 			}
591 			t = ipip_tunnel_locate(net, &p, 0);
592 		}
593 		if (t == NULL)
594 			t = netdev_priv(dev);
595 		memcpy(&p, &t->parms, sizeof(p));
596 		if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p)))
597 			err = -EFAULT;
598 		break;
599 
600 	case SIOCADDTUNNEL:
601 	case SIOCCHGTUNNEL:
602 		err = -EPERM;
603 		if (!capable(CAP_NET_ADMIN))
604 			goto done;
605 
606 		err = -EFAULT;
607 		if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
608 			goto done;
609 
610 		err = -EINVAL;
611 		if (p.iph.version != 4 || p.iph.protocol != IPPROTO_IPIP ||
612 		    p.iph.ihl != 5 || (p.iph.frag_off&htons(~IP_DF)))
613 			goto done;
614 		if (p.iph.ttl)
615 			p.iph.frag_off |= htons(IP_DF);
616 
617 		t = ipip_tunnel_locate(net, &p, cmd == SIOCADDTUNNEL);
618 
619 		if (dev != ipn->fb_tunnel_dev && cmd == SIOCCHGTUNNEL) {
620 			if (t != NULL) {
621 				if (t->dev != dev) {
622 					err = -EEXIST;
623 					break;
624 				}
625 			} else {
626 				if (((dev->flags&IFF_POINTOPOINT) && !p.iph.daddr) ||
627 				    (!(dev->flags&IFF_POINTOPOINT) && p.iph.daddr)) {
628 					err = -EINVAL;
629 					break;
630 				}
631 				t = netdev_priv(dev);
632 				ipip_tunnel_unlink(ipn, t);
633 				t->parms.iph.saddr = p.iph.saddr;
634 				t->parms.iph.daddr = p.iph.daddr;
635 				memcpy(dev->dev_addr, &p.iph.saddr, 4);
636 				memcpy(dev->broadcast, &p.iph.daddr, 4);
637 				ipip_tunnel_link(ipn, t);
638 				netdev_state_change(dev);
639 			}
640 		}
641 
642 		if (t) {
643 			err = 0;
644 			if (cmd == SIOCCHGTUNNEL) {
645 				t->parms.iph.ttl = p.iph.ttl;
646 				t->parms.iph.tos = p.iph.tos;
647 				t->parms.iph.frag_off = p.iph.frag_off;
648 				if (t->parms.link != p.link) {
649 					t->parms.link = p.link;
650 					ipip_tunnel_bind_dev(dev);
651 					netdev_state_change(dev);
652 				}
653 			}
654 			if (copy_to_user(ifr->ifr_ifru.ifru_data, &t->parms, sizeof(p)))
655 				err = -EFAULT;
656 		} else
657 			err = (cmd == SIOCADDTUNNEL ? -ENOBUFS : -ENOENT);
658 		break;
659 
660 	case SIOCDELTUNNEL:
661 		err = -EPERM;
662 		if (!capable(CAP_NET_ADMIN))
663 			goto done;
664 
665 		if (dev == ipn->fb_tunnel_dev) {
666 			err = -EFAULT;
667 			if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
668 				goto done;
669 			err = -ENOENT;
670 			if ((t = ipip_tunnel_locate(net, &p, 0)) == NULL)
671 				goto done;
672 			err = -EPERM;
673 			if (t->dev == ipn->fb_tunnel_dev)
674 				goto done;
675 			dev = t->dev;
676 		}
677 		unregister_netdevice(dev);
678 		err = 0;
679 		break;
680 
681 	default:
682 		err = -EINVAL;
683 	}
684 
685 done:
686 	return err;
687 }
688 
689 static int ipip_tunnel_change_mtu(struct net_device *dev, int new_mtu)
690 {
691 	if (new_mtu < 68 || new_mtu > 0xFFF8 - sizeof(struct iphdr))
692 		return -EINVAL;
693 	dev->mtu = new_mtu;
694 	return 0;
695 }
696 
697 static const struct net_device_ops ipip_netdev_ops = {
698 	.ndo_uninit	= ipip_tunnel_uninit,
699 	.ndo_start_xmit	= ipip_tunnel_xmit,
700 	.ndo_do_ioctl	= ipip_tunnel_ioctl,
701 	.ndo_change_mtu	= ipip_tunnel_change_mtu,
702 
703 };
704 
705 static void ipip_tunnel_setup(struct net_device *dev)
706 {
707 	dev->netdev_ops		= &ipip_netdev_ops;
708 	dev->destructor		= free_netdev;
709 
710 	dev->type		= ARPHRD_TUNNEL;
711 	dev->hard_header_len 	= LL_MAX_HEADER + sizeof(struct iphdr);
712 	dev->mtu		= ETH_DATA_LEN - sizeof(struct iphdr);
713 	dev->flags		= IFF_NOARP;
714 	dev->iflink		= 0;
715 	dev->addr_len		= 4;
716 	dev->features		|= NETIF_F_NETNS_LOCAL;
717 	dev->priv_flags		&= ~IFF_XMIT_DST_RELEASE;
718 }
719 
720 static void ipip_tunnel_init(struct net_device *dev)
721 {
722 	struct ip_tunnel *tunnel = netdev_priv(dev);
723 
724 	tunnel->dev = dev;
725 	strcpy(tunnel->parms.name, dev->name);
726 
727 	memcpy(dev->dev_addr, &tunnel->parms.iph.saddr, 4);
728 	memcpy(dev->broadcast, &tunnel->parms.iph.daddr, 4);
729 
730 	ipip_tunnel_bind_dev(dev);
731 }
732 
733 static void ipip_fb_tunnel_init(struct net_device *dev)
734 {
735 	struct ip_tunnel *tunnel = netdev_priv(dev);
736 	struct iphdr *iph = &tunnel->parms.iph;
737 	struct ipip_net *ipn = net_generic(dev_net(dev), ipip_net_id);
738 
739 	tunnel->dev = dev;
740 	strcpy(tunnel->parms.name, dev->name);
741 
742 	iph->version		= 4;
743 	iph->protocol		= IPPROTO_IPIP;
744 	iph->ihl		= 5;
745 
746 	dev_hold(dev);
747 	ipn->tunnels_wc[0]	= tunnel;
748 }
749 
750 static struct xfrm_tunnel ipip_handler = {
751 	.handler	=	ipip_rcv,
752 	.err_handler	=	ipip_err,
753 	.priority	=	1,
754 };
755 
756 static const char banner[] __initconst =
757 	KERN_INFO "IPv4 over IPv4 tunneling driver\n";
758 
759 static void ipip_destroy_tunnels(struct ipip_net *ipn, struct list_head *head)
760 {
761 	int prio;
762 
763 	for (prio = 1; prio < 4; prio++) {
764 		int h;
765 		for (h = 0; h < HASH_SIZE; h++) {
766 			struct ip_tunnel *t = ipn->tunnels[prio][h];
767 
768 			while (t != NULL) {
769 				unregister_netdevice_queue(t->dev, head);
770 				t = t->next;
771 			}
772 		}
773 	}
774 }
775 
776 static int ipip_init_net(struct net *net)
777 {
778 	struct ipip_net *ipn = net_generic(net, ipip_net_id);
779 	int err;
780 
781 	ipn->tunnels[0] = ipn->tunnels_wc;
782 	ipn->tunnels[1] = ipn->tunnels_l;
783 	ipn->tunnels[2] = ipn->tunnels_r;
784 	ipn->tunnels[3] = ipn->tunnels_r_l;
785 
786 	ipn->fb_tunnel_dev = alloc_netdev(sizeof(struct ip_tunnel),
787 					   "tunl0",
788 					   ipip_tunnel_setup);
789 	if (!ipn->fb_tunnel_dev) {
790 		err = -ENOMEM;
791 		goto err_alloc_dev;
792 	}
793 	dev_net_set(ipn->fb_tunnel_dev, net);
794 
795 	ipip_fb_tunnel_init(ipn->fb_tunnel_dev);
796 
797 	if ((err = register_netdev(ipn->fb_tunnel_dev)))
798 		goto err_reg_dev;
799 
800 	return 0;
801 
802 err_reg_dev:
803 	free_netdev(ipn->fb_tunnel_dev);
804 err_alloc_dev:
805 	/* nothing */
806 	return err;
807 }
808 
809 static void ipip_exit_net(struct net *net)
810 {
811 	struct ipip_net *ipn = net_generic(net, ipip_net_id);
812 	LIST_HEAD(list);
813 
814 	rtnl_lock();
815 	ipip_destroy_tunnels(ipn, &list);
816 	unregister_netdevice_queue(ipn->fb_tunnel_dev, &list);
817 	unregister_netdevice_many(&list);
818 	rtnl_unlock();
819 }
820 
821 static struct pernet_operations ipip_net_ops = {
822 	.init = ipip_init_net,
823 	.exit = ipip_exit_net,
824 	.id   = &ipip_net_id,
825 	.size = sizeof(struct ipip_net),
826 };
827 
828 static int __init ipip_init(void)
829 {
830 	int err;
831 
832 	printk(banner);
833 
834 	if (xfrm4_tunnel_register(&ipip_handler, AF_INET)) {
835 		printk(KERN_INFO "ipip init: can't register tunnel\n");
836 		return -EAGAIN;
837 	}
838 
839 	err = register_pernet_device(&ipip_net_ops);
840 	if (err)
841 		xfrm4_tunnel_deregister(&ipip_handler, AF_INET);
842 
843 	return err;
844 }
845 
846 static void __exit ipip_fini(void)
847 {
848 	if (xfrm4_tunnel_deregister(&ipip_handler, AF_INET))
849 		printk(KERN_INFO "ipip close: can't deregister tunnel\n");
850 
851 	unregister_pernet_device(&ipip_net_ops);
852 }
853 
854 module_init(ipip_init);
855 module_exit(ipip_fini);
856 MODULE_LICENSE("GPL");
857