xref: /openbmc/linux/net/ipv4/ipip.c (revision 1fa6ac37)
1 /*
2  *	Linux NET3:	IP/IP protocol decoder.
3  *
4  *	Authors:
5  *		Sam Lantinga (slouken@cs.ucdavis.edu)  02/01/95
6  *
7  *	Fixes:
8  *		Alan Cox	:	Merged and made usable non modular (its so tiny its silly as
9  *					a module taking up 2 pages).
10  *		Alan Cox	: 	Fixed bug with 1.3.18 and IPIP not working (now needs to set skb->h.iph)
11  *					to keep ip_forward happy.
12  *		Alan Cox	:	More fixes for 1.3.21, and firewall fix. Maybe this will work soon 8).
13  *		Kai Schulte	:	Fixed #defines for IP_FIREWALL->FIREWALL
14  *              David Woodhouse :       Perform some basic ICMP handling.
15  *                                      IPIP Routing without decapsulation.
16  *              Carlos Picoto   :       GRE over IP support
17  *		Alexey Kuznetsov:	Reworked. Really, now it is truncated version of ipv4/ip_gre.c.
18  *					I do not want to merge them together.
19  *
20  *	This program is free software; you can redistribute it and/or
21  *	modify it under the terms of the GNU General Public License
22  *	as published by the Free Software Foundation; either version
23  *	2 of the License, or (at your option) any later version.
24  *
25  */
26 
27 /* tunnel.c: an IP tunnel driver
28 
29 	The purpose of this driver is to provide an IP tunnel through
30 	which you can tunnel network traffic transparently across subnets.
31 
32 	This was written by looking at Nick Holloway's dummy driver
33 	Thanks for the great code!
34 
35 		-Sam Lantinga	(slouken@cs.ucdavis.edu)  02/01/95
36 
37 	Minor tweaks:
38 		Cleaned up the code a little and added some pre-1.3.0 tweaks.
39 		dev->hard_header/hard_header_len changed to use no headers.
40 		Comments/bracketing tweaked.
41 		Made the tunnels use dev->name not tunnel: when error reporting.
42 		Added tx_dropped stat
43 
44 		-Alan Cox	(alan@lxorguk.ukuu.org.uk) 21 March 95
45 
46 	Reworked:
47 		Changed to tunnel to destination gateway in addition to the
48 			tunnel's pointopoint address
49 		Almost completely rewritten
50 		Note:  There is currently no firewall or ICMP handling done.
51 
52 		-Sam Lantinga	(slouken@cs.ucdavis.edu) 02/13/96
53 
54 */
55 
56 /* Things I wish I had known when writing the tunnel driver:
57 
58 	When the tunnel_xmit() function is called, the skb contains the
59 	packet to be sent (plus a great deal of extra info), and dev
60 	contains the tunnel device that _we_ are.
61 
62 	When we are passed a packet, we are expected to fill in the
63 	source address with our source IP address.
64 
65 	What is the proper way to allocate, copy and free a buffer?
66 	After you allocate it, it is a "0 length" chunk of memory
67 	starting at zero.  If you want to add headers to the buffer
68 	later, you'll have to call "skb_reserve(skb, amount)" with
69 	the amount of memory you want reserved.  Then, you call
70 	"skb_put(skb, amount)" with the amount of space you want in
71 	the buffer.  skb_put() returns a pointer to the top (#0) of
72 	that buffer.  skb->len is set to the amount of space you have
73 	"allocated" with skb_put().  You can then write up to skb->len
74 	bytes to that buffer.  If you need more, you can call skb_put()
75 	again with the additional amount of space you need.  You can
76 	find out how much more space you can allocate by calling
77 	"skb_tailroom(skb)".
78 	Now, to add header space, call "skb_push(skb, header_len)".
79 	This creates space at the beginning of the buffer and returns
80 	a pointer to this new space.  If later you need to strip a
81 	header from a buffer, call "skb_pull(skb, header_len)".
82 	skb_headroom() will return how much space is left at the top
83 	of the buffer (before the main data).  Remember, this headroom
84 	space must be reserved before the skb_put() function is called.
85 	*/
86 
87 /*
88    This version of net/ipv4/ipip.c is cloned of net/ipv4/ip_gre.c
89 
90    For comments look at net/ipv4/ip_gre.c --ANK
91  */
92 
93 
94 #include <linux/capability.h>
95 #include <linux/module.h>
96 #include <linux/types.h>
97 #include <linux/kernel.h>
98 #include <linux/slab.h>
99 #include <asm/uaccess.h>
100 #include <linux/skbuff.h>
101 #include <linux/netdevice.h>
102 #include <linux/in.h>
103 #include <linux/tcp.h>
104 #include <linux/udp.h>
105 #include <linux/if_arp.h>
106 #include <linux/mroute.h>
107 #include <linux/init.h>
108 #include <linux/netfilter_ipv4.h>
109 #include <linux/if_ether.h>
110 
111 #include <net/sock.h>
112 #include <net/ip.h>
113 #include <net/icmp.h>
114 #include <net/ipip.h>
115 #include <net/inet_ecn.h>
116 #include <net/xfrm.h>
117 #include <net/net_namespace.h>
118 #include <net/netns/generic.h>
119 
120 #define HASH_SIZE  16
121 #define HASH(addr) (((__force u32)addr^((__force u32)addr>>4))&0xF)
122 
123 static int ipip_net_id __read_mostly;
124 struct ipip_net {
125 	struct ip_tunnel *tunnels_r_l[HASH_SIZE];
126 	struct ip_tunnel *tunnels_r[HASH_SIZE];
127 	struct ip_tunnel *tunnels_l[HASH_SIZE];
128 	struct ip_tunnel *tunnels_wc[1];
129 	struct ip_tunnel **tunnels[4];
130 
131 	struct net_device *fb_tunnel_dev;
132 };
133 
134 static void ipip_tunnel_init(struct net_device *dev);
135 static void ipip_tunnel_setup(struct net_device *dev);
136 
137 /*
138  * Locking : hash tables are protected by RCU and a spinlock
139  */
140 static DEFINE_SPINLOCK(ipip_lock);
141 
142 #define for_each_ip_tunnel_rcu(start) \
143 	for (t = rcu_dereference(start); t; t = rcu_dereference(t->next))
144 
145 static struct ip_tunnel * ipip_tunnel_lookup(struct net *net,
146 		__be32 remote, __be32 local)
147 {
148 	unsigned h0 = HASH(remote);
149 	unsigned h1 = HASH(local);
150 	struct ip_tunnel *t;
151 	struct ipip_net *ipn = net_generic(net, ipip_net_id);
152 
153 	for_each_ip_tunnel_rcu(ipn->tunnels_r_l[h0 ^ h1])
154 		if (local == t->parms.iph.saddr &&
155 		    remote == t->parms.iph.daddr && (t->dev->flags&IFF_UP))
156 			return t;
157 
158 	for_each_ip_tunnel_rcu(ipn->tunnels_r[h0])
159 		if (remote == t->parms.iph.daddr && (t->dev->flags&IFF_UP))
160 			return t;
161 
162 	for_each_ip_tunnel_rcu(ipn->tunnels_l[h1])
163 		if (local == t->parms.iph.saddr && (t->dev->flags&IFF_UP))
164 			return t;
165 
166 	t = rcu_dereference(ipn->tunnels_wc[0]);
167 	if (t && (t->dev->flags&IFF_UP))
168 		return t;
169 	return NULL;
170 }
171 
172 static struct ip_tunnel **__ipip_bucket(struct ipip_net *ipn,
173 		struct ip_tunnel_parm *parms)
174 {
175 	__be32 remote = parms->iph.daddr;
176 	__be32 local = parms->iph.saddr;
177 	unsigned h = 0;
178 	int prio = 0;
179 
180 	if (remote) {
181 		prio |= 2;
182 		h ^= HASH(remote);
183 	}
184 	if (local) {
185 		prio |= 1;
186 		h ^= HASH(local);
187 	}
188 	return &ipn->tunnels[prio][h];
189 }
190 
191 static inline struct ip_tunnel **ipip_bucket(struct ipip_net *ipn,
192 		struct ip_tunnel *t)
193 {
194 	return __ipip_bucket(ipn, &t->parms);
195 }
196 
197 static void ipip_tunnel_unlink(struct ipip_net *ipn, struct ip_tunnel *t)
198 {
199 	struct ip_tunnel **tp;
200 
201 	for (tp = ipip_bucket(ipn, t); *tp; tp = &(*tp)->next) {
202 		if (t == *tp) {
203 			spin_lock_bh(&ipip_lock);
204 			*tp = t->next;
205 			spin_unlock_bh(&ipip_lock);
206 			break;
207 		}
208 	}
209 }
210 
211 static void ipip_tunnel_link(struct ipip_net *ipn, struct ip_tunnel *t)
212 {
213 	struct ip_tunnel **tp = ipip_bucket(ipn, t);
214 
215 	spin_lock_bh(&ipip_lock);
216 	t->next = *tp;
217 	rcu_assign_pointer(*tp, t);
218 	spin_unlock_bh(&ipip_lock);
219 }
220 
221 static struct ip_tunnel * ipip_tunnel_locate(struct net *net,
222 		struct ip_tunnel_parm *parms, int create)
223 {
224 	__be32 remote = parms->iph.daddr;
225 	__be32 local = parms->iph.saddr;
226 	struct ip_tunnel *t, **tp, *nt;
227 	struct net_device *dev;
228 	char name[IFNAMSIZ];
229 	struct ipip_net *ipn = net_generic(net, ipip_net_id);
230 
231 	for (tp = __ipip_bucket(ipn, parms); (t = *tp) != NULL; tp = &t->next) {
232 		if (local == t->parms.iph.saddr && remote == t->parms.iph.daddr)
233 			return t;
234 	}
235 	if (!create)
236 		return NULL;
237 
238 	if (parms->name[0])
239 		strlcpy(name, parms->name, IFNAMSIZ);
240 	else
241 		sprintf(name, "tunl%%d");
242 
243 	dev = alloc_netdev(sizeof(*t), name, ipip_tunnel_setup);
244 	if (dev == NULL)
245 		return NULL;
246 
247 	dev_net_set(dev, net);
248 
249 	if (strchr(name, '%')) {
250 		if (dev_alloc_name(dev, name) < 0)
251 			goto failed_free;
252 	}
253 
254 	nt = netdev_priv(dev);
255 	nt->parms = *parms;
256 
257 	ipip_tunnel_init(dev);
258 
259 	if (register_netdevice(dev) < 0)
260 		goto failed_free;
261 
262 	dev_hold(dev);
263 	ipip_tunnel_link(ipn, nt);
264 	return nt;
265 
266 failed_free:
267 	free_netdev(dev);
268 	return NULL;
269 }
270 
271 static void ipip_tunnel_uninit(struct net_device *dev)
272 {
273 	struct net *net = dev_net(dev);
274 	struct ipip_net *ipn = net_generic(net, ipip_net_id);
275 
276 	if (dev == ipn->fb_tunnel_dev) {
277 		spin_lock_bh(&ipip_lock);
278 		ipn->tunnels_wc[0] = NULL;
279 		spin_unlock_bh(&ipip_lock);
280 	} else
281 		ipip_tunnel_unlink(ipn, netdev_priv(dev));
282 	dev_put(dev);
283 }
284 
285 static int ipip_err(struct sk_buff *skb, u32 info)
286 {
287 
288 /* All the routers (except for Linux) return only
289    8 bytes of packet payload. It means, that precise relaying of
290    ICMP in the real Internet is absolutely infeasible.
291  */
292 	struct iphdr *iph = (struct iphdr *)skb->data;
293 	const int type = icmp_hdr(skb)->type;
294 	const int code = icmp_hdr(skb)->code;
295 	struct ip_tunnel *t;
296 	int err;
297 
298 	switch (type) {
299 	default:
300 	case ICMP_PARAMETERPROB:
301 		return 0;
302 
303 	case ICMP_DEST_UNREACH:
304 		switch (code) {
305 		case ICMP_SR_FAILED:
306 		case ICMP_PORT_UNREACH:
307 			/* Impossible event. */
308 			return 0;
309 		case ICMP_FRAG_NEEDED:
310 			/* Soft state for pmtu is maintained by IP core. */
311 			return 0;
312 		default:
313 			/* All others are translated to HOST_UNREACH.
314 			   rfc2003 contains "deep thoughts" about NET_UNREACH,
315 			   I believe they are just ether pollution. --ANK
316 			 */
317 			break;
318 		}
319 		break;
320 	case ICMP_TIME_EXCEEDED:
321 		if (code != ICMP_EXC_TTL)
322 			return 0;
323 		break;
324 	}
325 
326 	err = -ENOENT;
327 
328 	rcu_read_lock();
329 	t = ipip_tunnel_lookup(dev_net(skb->dev), iph->daddr, iph->saddr);
330 	if (t == NULL || t->parms.iph.daddr == 0)
331 		goto out;
332 
333 	err = 0;
334 	if (t->parms.iph.ttl == 0 && type == ICMP_TIME_EXCEEDED)
335 		goto out;
336 
337 	if (time_before(jiffies, t->err_time + IPTUNNEL_ERR_TIMEO))
338 		t->err_count++;
339 	else
340 		t->err_count = 1;
341 	t->err_time = jiffies;
342 out:
343 	rcu_read_unlock();
344 	return err;
345 }
346 
347 static inline void ipip_ecn_decapsulate(const struct iphdr *outer_iph,
348 					struct sk_buff *skb)
349 {
350 	struct iphdr *inner_iph = ip_hdr(skb);
351 
352 	if (INET_ECN_is_ce(outer_iph->tos))
353 		IP_ECN_set_ce(inner_iph);
354 }
355 
356 static int ipip_rcv(struct sk_buff *skb)
357 {
358 	struct ip_tunnel *tunnel;
359 	const struct iphdr *iph = ip_hdr(skb);
360 
361 	rcu_read_lock();
362 	if ((tunnel = ipip_tunnel_lookup(dev_net(skb->dev),
363 					iph->saddr, iph->daddr)) != NULL) {
364 		if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) {
365 			rcu_read_unlock();
366 			kfree_skb(skb);
367 			return 0;
368 		}
369 
370 		secpath_reset(skb);
371 
372 		skb->mac_header = skb->network_header;
373 		skb_reset_network_header(skb);
374 		skb->protocol = htons(ETH_P_IP);
375 		skb->pkt_type = PACKET_HOST;
376 
377 		skb_tunnel_rx(skb, tunnel->dev);
378 
379 		ipip_ecn_decapsulate(iph, skb);
380 		netif_rx(skb);
381 		rcu_read_unlock();
382 		return 0;
383 	}
384 	rcu_read_unlock();
385 
386 	return -1;
387 }
388 
389 /*
390  *	This function assumes it is being called from dev_queue_xmit()
391  *	and that skb is filled properly by that function.
392  */
393 
394 static netdev_tx_t ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
395 {
396 	struct ip_tunnel *tunnel = netdev_priv(dev);
397 	struct net_device_stats *stats = &dev->stats;
398 	struct netdev_queue *txq = netdev_get_tx_queue(dev, 0);
399 	struct iphdr  *tiph = &tunnel->parms.iph;
400 	u8     tos = tunnel->parms.iph.tos;
401 	__be16 df = tiph->frag_off;
402 	struct rtable *rt;     			/* Route to the other host */
403 	struct net_device *tdev;			/* Device to other host */
404 	struct iphdr  *old_iph = ip_hdr(skb);
405 	struct iphdr  *iph;			/* Our new IP header */
406 	unsigned int max_headroom;		/* The extra header space needed */
407 	__be32 dst = tiph->daddr;
408 	int    mtu;
409 
410 	if (skb->protocol != htons(ETH_P_IP))
411 		goto tx_error;
412 
413 	if (tos&1)
414 		tos = old_iph->tos;
415 
416 	if (!dst) {
417 		/* NBMA tunnel */
418 		if ((rt = skb_rtable(skb)) == NULL) {
419 			stats->tx_fifo_errors++;
420 			goto tx_error;
421 		}
422 		if ((dst = rt->rt_gateway) == 0)
423 			goto tx_error_icmp;
424 	}
425 
426 	{
427 		struct flowi fl = { .oif = tunnel->parms.link,
428 				    .nl_u = { .ip4_u =
429 					      { .daddr = dst,
430 						.saddr = tiph->saddr,
431 						.tos = RT_TOS(tos) } },
432 				    .proto = IPPROTO_IPIP };
433 		if (ip_route_output_key(dev_net(dev), &rt, &fl)) {
434 			stats->tx_carrier_errors++;
435 			goto tx_error_icmp;
436 		}
437 	}
438 	tdev = rt->u.dst.dev;
439 
440 	if (tdev == dev) {
441 		ip_rt_put(rt);
442 		stats->collisions++;
443 		goto tx_error;
444 	}
445 
446 	df |= old_iph->frag_off & htons(IP_DF);
447 
448 	if (df) {
449 		mtu = dst_mtu(&rt->u.dst) - sizeof(struct iphdr);
450 
451 		if (mtu < 68) {
452 			stats->collisions++;
453 			ip_rt_put(rt);
454 			goto tx_error;
455 		}
456 
457 		if (skb_dst(skb))
458 			skb_dst(skb)->ops->update_pmtu(skb_dst(skb), mtu);
459 
460 		if ((old_iph->frag_off & htons(IP_DF)) &&
461 		    mtu < ntohs(old_iph->tot_len)) {
462 			icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED,
463 				  htonl(mtu));
464 			ip_rt_put(rt);
465 			goto tx_error;
466 		}
467 	}
468 
469 	if (tunnel->err_count > 0) {
470 		if (time_before(jiffies,
471 				tunnel->err_time + IPTUNNEL_ERR_TIMEO)) {
472 			tunnel->err_count--;
473 			dst_link_failure(skb);
474 		} else
475 			tunnel->err_count = 0;
476 	}
477 
478 	/*
479 	 * Okay, now see if we can stuff it in the buffer as-is.
480 	 */
481 	max_headroom = (LL_RESERVED_SPACE(tdev)+sizeof(struct iphdr));
482 
483 	if (skb_headroom(skb) < max_headroom || skb_shared(skb) ||
484 	    (skb_cloned(skb) && !skb_clone_writable(skb, 0))) {
485 		struct sk_buff *new_skb = skb_realloc_headroom(skb, max_headroom);
486 		if (!new_skb) {
487 			ip_rt_put(rt);
488 			txq->tx_dropped++;
489 			dev_kfree_skb(skb);
490 			return NETDEV_TX_OK;
491 		}
492 		if (skb->sk)
493 			skb_set_owner_w(new_skb, skb->sk);
494 		dev_kfree_skb(skb);
495 		skb = new_skb;
496 		old_iph = ip_hdr(skb);
497 	}
498 
499 	skb->transport_header = skb->network_header;
500 	skb_push(skb, sizeof(struct iphdr));
501 	skb_reset_network_header(skb);
502 	memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
503 	IPCB(skb)->flags &= ~(IPSKB_XFRM_TUNNEL_SIZE | IPSKB_XFRM_TRANSFORMED |
504 			      IPSKB_REROUTED);
505 	skb_dst_drop(skb);
506 	skb_dst_set(skb, &rt->u.dst);
507 
508 	/*
509 	 *	Push down and install the IPIP header.
510 	 */
511 
512 	iph 			=	ip_hdr(skb);
513 	iph->version		=	4;
514 	iph->ihl		=	sizeof(struct iphdr)>>2;
515 	iph->frag_off		=	df;
516 	iph->protocol		=	IPPROTO_IPIP;
517 	iph->tos		=	INET_ECN_encapsulate(tos, old_iph->tos);
518 	iph->daddr		=	rt->rt_dst;
519 	iph->saddr		=	rt->rt_src;
520 
521 	if ((iph->ttl = tiph->ttl) == 0)
522 		iph->ttl	=	old_iph->ttl;
523 
524 	nf_reset(skb);
525 
526 	IPTUNNEL_XMIT();
527 	return NETDEV_TX_OK;
528 
529 tx_error_icmp:
530 	dst_link_failure(skb);
531 tx_error:
532 	stats->tx_errors++;
533 	dev_kfree_skb(skb);
534 	return NETDEV_TX_OK;
535 }
536 
537 static void ipip_tunnel_bind_dev(struct net_device *dev)
538 {
539 	struct net_device *tdev = NULL;
540 	struct ip_tunnel *tunnel;
541 	struct iphdr *iph;
542 
543 	tunnel = netdev_priv(dev);
544 	iph = &tunnel->parms.iph;
545 
546 	if (iph->daddr) {
547 		struct flowi fl = { .oif = tunnel->parms.link,
548 				    .nl_u = { .ip4_u =
549 					      { .daddr = iph->daddr,
550 						.saddr = iph->saddr,
551 						.tos = RT_TOS(iph->tos) } },
552 				    .proto = IPPROTO_IPIP };
553 		struct rtable *rt;
554 		if (!ip_route_output_key(dev_net(dev), &rt, &fl)) {
555 			tdev = rt->u.dst.dev;
556 			ip_rt_put(rt);
557 		}
558 		dev->flags |= IFF_POINTOPOINT;
559 	}
560 
561 	if (!tdev && tunnel->parms.link)
562 		tdev = __dev_get_by_index(dev_net(dev), tunnel->parms.link);
563 
564 	if (tdev) {
565 		dev->hard_header_len = tdev->hard_header_len + sizeof(struct iphdr);
566 		dev->mtu = tdev->mtu - sizeof(struct iphdr);
567 	}
568 	dev->iflink = tunnel->parms.link;
569 }
570 
571 static int
572 ipip_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd)
573 {
574 	int err = 0;
575 	struct ip_tunnel_parm p;
576 	struct ip_tunnel *t;
577 	struct net *net = dev_net(dev);
578 	struct ipip_net *ipn = net_generic(net, ipip_net_id);
579 
580 	switch (cmd) {
581 	case SIOCGETTUNNEL:
582 		t = NULL;
583 		if (dev == ipn->fb_tunnel_dev) {
584 			if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) {
585 				err = -EFAULT;
586 				break;
587 			}
588 			t = ipip_tunnel_locate(net, &p, 0);
589 		}
590 		if (t == NULL)
591 			t = netdev_priv(dev);
592 		memcpy(&p, &t->parms, sizeof(p));
593 		if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p)))
594 			err = -EFAULT;
595 		break;
596 
597 	case SIOCADDTUNNEL:
598 	case SIOCCHGTUNNEL:
599 		err = -EPERM;
600 		if (!capable(CAP_NET_ADMIN))
601 			goto done;
602 
603 		err = -EFAULT;
604 		if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
605 			goto done;
606 
607 		err = -EINVAL;
608 		if (p.iph.version != 4 || p.iph.protocol != IPPROTO_IPIP ||
609 		    p.iph.ihl != 5 || (p.iph.frag_off&htons(~IP_DF)))
610 			goto done;
611 		if (p.iph.ttl)
612 			p.iph.frag_off |= htons(IP_DF);
613 
614 		t = ipip_tunnel_locate(net, &p, cmd == SIOCADDTUNNEL);
615 
616 		if (dev != ipn->fb_tunnel_dev && cmd == SIOCCHGTUNNEL) {
617 			if (t != NULL) {
618 				if (t->dev != dev) {
619 					err = -EEXIST;
620 					break;
621 				}
622 			} else {
623 				if (((dev->flags&IFF_POINTOPOINT) && !p.iph.daddr) ||
624 				    (!(dev->flags&IFF_POINTOPOINT) && p.iph.daddr)) {
625 					err = -EINVAL;
626 					break;
627 				}
628 				t = netdev_priv(dev);
629 				ipip_tunnel_unlink(ipn, t);
630 				t->parms.iph.saddr = p.iph.saddr;
631 				t->parms.iph.daddr = p.iph.daddr;
632 				memcpy(dev->dev_addr, &p.iph.saddr, 4);
633 				memcpy(dev->broadcast, &p.iph.daddr, 4);
634 				ipip_tunnel_link(ipn, t);
635 				netdev_state_change(dev);
636 			}
637 		}
638 
639 		if (t) {
640 			err = 0;
641 			if (cmd == SIOCCHGTUNNEL) {
642 				t->parms.iph.ttl = p.iph.ttl;
643 				t->parms.iph.tos = p.iph.tos;
644 				t->parms.iph.frag_off = p.iph.frag_off;
645 				if (t->parms.link != p.link) {
646 					t->parms.link = p.link;
647 					ipip_tunnel_bind_dev(dev);
648 					netdev_state_change(dev);
649 				}
650 			}
651 			if (copy_to_user(ifr->ifr_ifru.ifru_data, &t->parms, sizeof(p)))
652 				err = -EFAULT;
653 		} else
654 			err = (cmd == SIOCADDTUNNEL ? -ENOBUFS : -ENOENT);
655 		break;
656 
657 	case SIOCDELTUNNEL:
658 		err = -EPERM;
659 		if (!capable(CAP_NET_ADMIN))
660 			goto done;
661 
662 		if (dev == ipn->fb_tunnel_dev) {
663 			err = -EFAULT;
664 			if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
665 				goto done;
666 			err = -ENOENT;
667 			if ((t = ipip_tunnel_locate(net, &p, 0)) == NULL)
668 				goto done;
669 			err = -EPERM;
670 			if (t->dev == ipn->fb_tunnel_dev)
671 				goto done;
672 			dev = t->dev;
673 		}
674 		unregister_netdevice(dev);
675 		err = 0;
676 		break;
677 
678 	default:
679 		err = -EINVAL;
680 	}
681 
682 done:
683 	return err;
684 }
685 
686 static int ipip_tunnel_change_mtu(struct net_device *dev, int new_mtu)
687 {
688 	if (new_mtu < 68 || new_mtu > 0xFFF8 - sizeof(struct iphdr))
689 		return -EINVAL;
690 	dev->mtu = new_mtu;
691 	return 0;
692 }
693 
694 static const struct net_device_ops ipip_netdev_ops = {
695 	.ndo_uninit	= ipip_tunnel_uninit,
696 	.ndo_start_xmit	= ipip_tunnel_xmit,
697 	.ndo_do_ioctl	= ipip_tunnel_ioctl,
698 	.ndo_change_mtu	= ipip_tunnel_change_mtu,
699 
700 };
701 
702 static void ipip_tunnel_setup(struct net_device *dev)
703 {
704 	dev->netdev_ops		= &ipip_netdev_ops;
705 	dev->destructor		= free_netdev;
706 
707 	dev->type		= ARPHRD_TUNNEL;
708 	dev->hard_header_len 	= LL_MAX_HEADER + sizeof(struct iphdr);
709 	dev->mtu		= ETH_DATA_LEN - sizeof(struct iphdr);
710 	dev->flags		= IFF_NOARP;
711 	dev->iflink		= 0;
712 	dev->addr_len		= 4;
713 	dev->features		|= NETIF_F_NETNS_LOCAL;
714 	dev->priv_flags		&= ~IFF_XMIT_DST_RELEASE;
715 }
716 
717 static void ipip_tunnel_init(struct net_device *dev)
718 {
719 	struct ip_tunnel *tunnel = netdev_priv(dev);
720 
721 	tunnel->dev = dev;
722 	strcpy(tunnel->parms.name, dev->name);
723 
724 	memcpy(dev->dev_addr, &tunnel->parms.iph.saddr, 4);
725 	memcpy(dev->broadcast, &tunnel->parms.iph.daddr, 4);
726 
727 	ipip_tunnel_bind_dev(dev);
728 }
729 
730 static void __net_init ipip_fb_tunnel_init(struct net_device *dev)
731 {
732 	struct ip_tunnel *tunnel = netdev_priv(dev);
733 	struct iphdr *iph = &tunnel->parms.iph;
734 	struct ipip_net *ipn = net_generic(dev_net(dev), ipip_net_id);
735 
736 	tunnel->dev = dev;
737 	strcpy(tunnel->parms.name, dev->name);
738 
739 	iph->version		= 4;
740 	iph->protocol		= IPPROTO_IPIP;
741 	iph->ihl		= 5;
742 
743 	dev_hold(dev);
744 	ipn->tunnels_wc[0]	= tunnel;
745 }
746 
747 static struct xfrm_tunnel ipip_handler = {
748 	.handler	=	ipip_rcv,
749 	.err_handler	=	ipip_err,
750 	.priority	=	1,
751 };
752 
753 static const char banner[] __initconst =
754 	KERN_INFO "IPv4 over IPv4 tunneling driver\n";
755 
756 static void ipip_destroy_tunnels(struct ipip_net *ipn, struct list_head *head)
757 {
758 	int prio;
759 
760 	for (prio = 1; prio < 4; prio++) {
761 		int h;
762 		for (h = 0; h < HASH_SIZE; h++) {
763 			struct ip_tunnel *t = ipn->tunnels[prio][h];
764 
765 			while (t != NULL) {
766 				unregister_netdevice_queue(t->dev, head);
767 				t = t->next;
768 			}
769 		}
770 	}
771 }
772 
773 static int __net_init ipip_init_net(struct net *net)
774 {
775 	struct ipip_net *ipn = net_generic(net, ipip_net_id);
776 	int err;
777 
778 	ipn->tunnels[0] = ipn->tunnels_wc;
779 	ipn->tunnels[1] = ipn->tunnels_l;
780 	ipn->tunnels[2] = ipn->tunnels_r;
781 	ipn->tunnels[3] = ipn->tunnels_r_l;
782 
783 	ipn->fb_tunnel_dev = alloc_netdev(sizeof(struct ip_tunnel),
784 					   "tunl0",
785 					   ipip_tunnel_setup);
786 	if (!ipn->fb_tunnel_dev) {
787 		err = -ENOMEM;
788 		goto err_alloc_dev;
789 	}
790 	dev_net_set(ipn->fb_tunnel_dev, net);
791 
792 	ipip_fb_tunnel_init(ipn->fb_tunnel_dev);
793 
794 	if ((err = register_netdev(ipn->fb_tunnel_dev)))
795 		goto err_reg_dev;
796 
797 	return 0;
798 
799 err_reg_dev:
800 	free_netdev(ipn->fb_tunnel_dev);
801 err_alloc_dev:
802 	/* nothing */
803 	return err;
804 }
805 
806 static void __net_exit ipip_exit_net(struct net *net)
807 {
808 	struct ipip_net *ipn = net_generic(net, ipip_net_id);
809 	LIST_HEAD(list);
810 
811 	rtnl_lock();
812 	ipip_destroy_tunnels(ipn, &list);
813 	unregister_netdevice_queue(ipn->fb_tunnel_dev, &list);
814 	unregister_netdevice_many(&list);
815 	rtnl_unlock();
816 }
817 
818 static struct pernet_operations ipip_net_ops = {
819 	.init = ipip_init_net,
820 	.exit = ipip_exit_net,
821 	.id   = &ipip_net_id,
822 	.size = sizeof(struct ipip_net),
823 };
824 
825 static int __init ipip_init(void)
826 {
827 	int err;
828 
829 	printk(banner);
830 
831 	err = register_pernet_device(&ipip_net_ops);
832 	if (err < 0)
833 		return err;
834 	err = xfrm4_tunnel_register(&ipip_handler, AF_INET);
835 	if (err < 0) {
836 		unregister_pernet_device(&ipip_net_ops);
837 		printk(KERN_INFO "ipip init: can't register tunnel\n");
838 	}
839 	return err;
840 }
841 
842 static void __exit ipip_fini(void)
843 {
844 	if (xfrm4_tunnel_deregister(&ipip_handler, AF_INET))
845 		printk(KERN_INFO "ipip close: can't deregister tunnel\n");
846 
847 	unregister_pernet_device(&ipip_net_ops);
848 }
849 
850 module_init(ipip_init);
851 module_exit(ipip_fini);
852 MODULE_LICENSE("GPL");
853