xref: /openbmc/linux/net/ipv4/ipip.c (revision 81d67439)
1 /*
2  *	Linux NET3:	IP/IP protocol decoder.
3  *
4  *	Authors:
5  *		Sam Lantinga (slouken@cs.ucdavis.edu)  02/01/95
6  *
7  *	Fixes:
8  *		Alan Cox	:	Merged and made usable non modular (its so tiny its silly as
9  *					a module taking up 2 pages).
10  *		Alan Cox	: 	Fixed bug with 1.3.18 and IPIP not working (now needs to set skb->h.iph)
11  *					to keep ip_forward happy.
12  *		Alan Cox	:	More fixes for 1.3.21, and firewall fix. Maybe this will work soon 8).
13  *		Kai Schulte	:	Fixed #defines for IP_FIREWALL->FIREWALL
14  *              David Woodhouse :       Perform some basic ICMP handling.
15  *                                      IPIP Routing without decapsulation.
16  *              Carlos Picoto   :       GRE over IP support
17  *		Alexey Kuznetsov:	Reworked. Really, now it is truncated version of ipv4/ip_gre.c.
18  *					I do not want to merge them together.
19  *
20  *	This program is free software; you can redistribute it and/or
21  *	modify it under the terms of the GNU General Public License
22  *	as published by the Free Software Foundation; either version
23  *	2 of the License, or (at your option) any later version.
24  *
25  */
26 
27 /* tunnel.c: an IP tunnel driver
28 
29 	The purpose of this driver is to provide an IP tunnel through
30 	which you can tunnel network traffic transparently across subnets.
31 
32 	This was written by looking at Nick Holloway's dummy driver
33 	Thanks for the great code!
34 
35 		-Sam Lantinga	(slouken@cs.ucdavis.edu)  02/01/95
36 
37 	Minor tweaks:
38 		Cleaned up the code a little and added some pre-1.3.0 tweaks.
39 		dev->hard_header/hard_header_len changed to use no headers.
40 		Comments/bracketing tweaked.
41 		Made the tunnels use dev->name not tunnel: when error reporting.
42 		Added tx_dropped stat
43 
44 		-Alan Cox	(alan@lxorguk.ukuu.org.uk) 21 March 95
45 
46 	Reworked:
47 		Changed to tunnel to destination gateway in addition to the
48 			tunnel's pointopoint address
49 		Almost completely rewritten
50 		Note:  There is currently no firewall or ICMP handling done.
51 
52 		-Sam Lantinga	(slouken@cs.ucdavis.edu) 02/13/96
53 
54 */
55 
56 /* Things I wish I had known when writing the tunnel driver:
57 
58 	When the tunnel_xmit() function is called, the skb contains the
59 	packet to be sent (plus a great deal of extra info), and dev
60 	contains the tunnel device that _we_ are.
61 
62 	When we are passed a packet, we are expected to fill in the
63 	source address with our source IP address.
64 
65 	What is the proper way to allocate, copy and free a buffer?
66 	After you allocate it, it is a "0 length" chunk of memory
67 	starting at zero.  If you want to add headers to the buffer
68 	later, you'll have to call "skb_reserve(skb, amount)" with
69 	the amount of memory you want reserved.  Then, you call
70 	"skb_put(skb, amount)" with the amount of space you want in
71 	the buffer.  skb_put() returns a pointer to the top (#0) of
72 	that buffer.  skb->len is set to the amount of space you have
73 	"allocated" with skb_put().  You can then write up to skb->len
74 	bytes to that buffer.  If you need more, you can call skb_put()
75 	again with the additional amount of space you need.  You can
76 	find out how much more space you can allocate by calling
77 	"skb_tailroom(skb)".
78 	Now, to add header space, call "skb_push(skb, header_len)".
79 	This creates space at the beginning of the buffer and returns
80 	a pointer to this new space.  If later you need to strip a
81 	header from a buffer, call "skb_pull(skb, header_len)".
82 	skb_headroom() will return how much space is left at the top
83 	of the buffer (before the main data).  Remember, this headroom
84 	space must be reserved before the skb_put() function is called.
85 	*/
86 
87 /*
88    This version of net/ipv4/ipip.c is cloned of net/ipv4/ip_gre.c
89 
90    For comments look at net/ipv4/ip_gre.c --ANK
91  */
92 
93 
94 #include <linux/capability.h>
95 #include <linux/module.h>
96 #include <linux/types.h>
97 #include <linux/kernel.h>
98 #include <linux/slab.h>
99 #include <asm/uaccess.h>
100 #include <linux/skbuff.h>
101 #include <linux/netdevice.h>
102 #include <linux/in.h>
103 #include <linux/tcp.h>
104 #include <linux/udp.h>
105 #include <linux/if_arp.h>
106 #include <linux/mroute.h>
107 #include <linux/init.h>
108 #include <linux/netfilter_ipv4.h>
109 #include <linux/if_ether.h>
110 
111 #include <net/sock.h>
112 #include <net/ip.h>
113 #include <net/icmp.h>
114 #include <net/ipip.h>
115 #include <net/inet_ecn.h>
116 #include <net/xfrm.h>
117 #include <net/net_namespace.h>
118 #include <net/netns/generic.h>
119 
120 #define HASH_SIZE  16
121 #define HASH(addr) (((__force u32)addr^((__force u32)addr>>4))&0xF)
122 
123 static int ipip_net_id __read_mostly;
124 struct ipip_net {
125 	struct ip_tunnel __rcu *tunnels_r_l[HASH_SIZE];
126 	struct ip_tunnel __rcu *tunnels_r[HASH_SIZE];
127 	struct ip_tunnel __rcu *tunnels_l[HASH_SIZE];
128 	struct ip_tunnel __rcu *tunnels_wc[1];
129 	struct ip_tunnel __rcu **tunnels[4];
130 
131 	struct net_device *fb_tunnel_dev;
132 };
133 
134 static int ipip_tunnel_init(struct net_device *dev);
135 static void ipip_tunnel_setup(struct net_device *dev);
136 static void ipip_dev_free(struct net_device *dev);
137 
138 /*
139  * Locking : hash tables are protected by RCU and RTNL
140  */
141 
142 #define for_each_ip_tunnel_rcu(start) \
143 	for (t = rcu_dereference(start); t; t = rcu_dereference(t->next))
144 
145 /* often modified stats are per cpu, other are shared (netdev->stats) */
146 struct pcpu_tstats {
147 	unsigned long	rx_packets;
148 	unsigned long	rx_bytes;
149 	unsigned long	tx_packets;
150 	unsigned long	tx_bytes;
151 };
152 
153 static struct net_device_stats *ipip_get_stats(struct net_device *dev)
154 {
155 	struct pcpu_tstats sum = { 0 };
156 	int i;
157 
158 	for_each_possible_cpu(i) {
159 		const struct pcpu_tstats *tstats = per_cpu_ptr(dev->tstats, i);
160 
161 		sum.rx_packets += tstats->rx_packets;
162 		sum.rx_bytes   += tstats->rx_bytes;
163 		sum.tx_packets += tstats->tx_packets;
164 		sum.tx_bytes   += tstats->tx_bytes;
165 	}
166 	dev->stats.rx_packets = sum.rx_packets;
167 	dev->stats.rx_bytes   = sum.rx_bytes;
168 	dev->stats.tx_packets = sum.tx_packets;
169 	dev->stats.tx_bytes   = sum.tx_bytes;
170 	return &dev->stats;
171 }
172 
173 static struct ip_tunnel * ipip_tunnel_lookup(struct net *net,
174 		__be32 remote, __be32 local)
175 {
176 	unsigned int h0 = HASH(remote);
177 	unsigned int h1 = HASH(local);
178 	struct ip_tunnel *t;
179 	struct ipip_net *ipn = net_generic(net, ipip_net_id);
180 
181 	for_each_ip_tunnel_rcu(ipn->tunnels_r_l[h0 ^ h1])
182 		if (local == t->parms.iph.saddr &&
183 		    remote == t->parms.iph.daddr && (t->dev->flags&IFF_UP))
184 			return t;
185 
186 	for_each_ip_tunnel_rcu(ipn->tunnels_r[h0])
187 		if (remote == t->parms.iph.daddr && (t->dev->flags&IFF_UP))
188 			return t;
189 
190 	for_each_ip_tunnel_rcu(ipn->tunnels_l[h1])
191 		if (local == t->parms.iph.saddr && (t->dev->flags&IFF_UP))
192 			return t;
193 
194 	t = rcu_dereference(ipn->tunnels_wc[0]);
195 	if (t && (t->dev->flags&IFF_UP))
196 		return t;
197 	return NULL;
198 }
199 
200 static struct ip_tunnel __rcu **__ipip_bucket(struct ipip_net *ipn,
201 		struct ip_tunnel_parm *parms)
202 {
203 	__be32 remote = parms->iph.daddr;
204 	__be32 local = parms->iph.saddr;
205 	unsigned int h = 0;
206 	int prio = 0;
207 
208 	if (remote) {
209 		prio |= 2;
210 		h ^= HASH(remote);
211 	}
212 	if (local) {
213 		prio |= 1;
214 		h ^= HASH(local);
215 	}
216 	return &ipn->tunnels[prio][h];
217 }
218 
219 static inline struct ip_tunnel __rcu **ipip_bucket(struct ipip_net *ipn,
220 		struct ip_tunnel *t)
221 {
222 	return __ipip_bucket(ipn, &t->parms);
223 }
224 
225 static void ipip_tunnel_unlink(struct ipip_net *ipn, struct ip_tunnel *t)
226 {
227 	struct ip_tunnel __rcu **tp;
228 	struct ip_tunnel *iter;
229 
230 	for (tp = ipip_bucket(ipn, t);
231 	     (iter = rtnl_dereference(*tp)) != NULL;
232 	     tp = &iter->next) {
233 		if (t == iter) {
234 			rcu_assign_pointer(*tp, t->next);
235 			break;
236 		}
237 	}
238 }
239 
240 static void ipip_tunnel_link(struct ipip_net *ipn, struct ip_tunnel *t)
241 {
242 	struct ip_tunnel __rcu **tp = ipip_bucket(ipn, t);
243 
244 	rcu_assign_pointer(t->next, rtnl_dereference(*tp));
245 	rcu_assign_pointer(*tp, t);
246 }
247 
248 static struct ip_tunnel * ipip_tunnel_locate(struct net *net,
249 		struct ip_tunnel_parm *parms, int create)
250 {
251 	__be32 remote = parms->iph.daddr;
252 	__be32 local = parms->iph.saddr;
253 	struct ip_tunnel *t, *nt;
254 	struct ip_tunnel __rcu **tp;
255 	struct net_device *dev;
256 	char name[IFNAMSIZ];
257 	struct ipip_net *ipn = net_generic(net, ipip_net_id);
258 
259 	for (tp = __ipip_bucket(ipn, parms);
260 		 (t = rtnl_dereference(*tp)) != NULL;
261 		 tp = &t->next) {
262 		if (local == t->parms.iph.saddr && remote == t->parms.iph.daddr)
263 			return t;
264 	}
265 	if (!create)
266 		return NULL;
267 
268 	if (parms->name[0])
269 		strlcpy(name, parms->name, IFNAMSIZ);
270 	else
271 		strcpy(name, "tunl%d");
272 
273 	dev = alloc_netdev(sizeof(*t), name, ipip_tunnel_setup);
274 	if (dev == NULL)
275 		return NULL;
276 
277 	dev_net_set(dev, net);
278 
279 	nt = netdev_priv(dev);
280 	nt->parms = *parms;
281 
282 	if (ipip_tunnel_init(dev) < 0)
283 		goto failed_free;
284 
285 	if (register_netdevice(dev) < 0)
286 		goto failed_free;
287 
288 	dev_hold(dev);
289 	ipip_tunnel_link(ipn, nt);
290 	return nt;
291 
292 failed_free:
293 	ipip_dev_free(dev);
294 	return NULL;
295 }
296 
297 /* called with RTNL */
298 static void ipip_tunnel_uninit(struct net_device *dev)
299 {
300 	struct net *net = dev_net(dev);
301 	struct ipip_net *ipn = net_generic(net, ipip_net_id);
302 
303 	if (dev == ipn->fb_tunnel_dev)
304 		rcu_assign_pointer(ipn->tunnels_wc[0], NULL);
305 	else
306 		ipip_tunnel_unlink(ipn, netdev_priv(dev));
307 	dev_put(dev);
308 }
309 
310 static int ipip_err(struct sk_buff *skb, u32 info)
311 {
312 
313 /* All the routers (except for Linux) return only
314    8 bytes of packet payload. It means, that precise relaying of
315    ICMP in the real Internet is absolutely infeasible.
316  */
317 	const struct iphdr *iph = (const struct iphdr *)skb->data;
318 	const int type = icmp_hdr(skb)->type;
319 	const int code = icmp_hdr(skb)->code;
320 	struct ip_tunnel *t;
321 	int err;
322 
323 	switch (type) {
324 	default:
325 	case ICMP_PARAMETERPROB:
326 		return 0;
327 
328 	case ICMP_DEST_UNREACH:
329 		switch (code) {
330 		case ICMP_SR_FAILED:
331 		case ICMP_PORT_UNREACH:
332 			/* Impossible event. */
333 			return 0;
334 		case ICMP_FRAG_NEEDED:
335 			/* Soft state for pmtu is maintained by IP core. */
336 			return 0;
337 		default:
338 			/* All others are translated to HOST_UNREACH.
339 			   rfc2003 contains "deep thoughts" about NET_UNREACH,
340 			   I believe they are just ether pollution. --ANK
341 			 */
342 			break;
343 		}
344 		break;
345 	case ICMP_TIME_EXCEEDED:
346 		if (code != ICMP_EXC_TTL)
347 			return 0;
348 		break;
349 	}
350 
351 	err = -ENOENT;
352 
353 	rcu_read_lock();
354 	t = ipip_tunnel_lookup(dev_net(skb->dev), iph->daddr, iph->saddr);
355 	if (t == NULL || t->parms.iph.daddr == 0)
356 		goto out;
357 
358 	err = 0;
359 	if (t->parms.iph.ttl == 0 && type == ICMP_TIME_EXCEEDED)
360 		goto out;
361 
362 	if (time_before(jiffies, t->err_time + IPTUNNEL_ERR_TIMEO))
363 		t->err_count++;
364 	else
365 		t->err_count = 1;
366 	t->err_time = jiffies;
367 out:
368 	rcu_read_unlock();
369 	return err;
370 }
371 
372 static inline void ipip_ecn_decapsulate(const struct iphdr *outer_iph,
373 					struct sk_buff *skb)
374 {
375 	struct iphdr *inner_iph = ip_hdr(skb);
376 
377 	if (INET_ECN_is_ce(outer_iph->tos))
378 		IP_ECN_set_ce(inner_iph);
379 }
380 
381 static int ipip_rcv(struct sk_buff *skb)
382 {
383 	struct ip_tunnel *tunnel;
384 	const struct iphdr *iph = ip_hdr(skb);
385 
386 	rcu_read_lock();
387 	tunnel = ipip_tunnel_lookup(dev_net(skb->dev), iph->saddr, iph->daddr);
388 	if (tunnel != NULL) {
389 		struct pcpu_tstats *tstats;
390 
391 		if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) {
392 			rcu_read_unlock();
393 			kfree_skb(skb);
394 			return 0;
395 		}
396 
397 		secpath_reset(skb);
398 
399 		skb->mac_header = skb->network_header;
400 		skb_reset_network_header(skb);
401 		skb->protocol = htons(ETH_P_IP);
402 		skb->pkt_type = PACKET_HOST;
403 
404 		tstats = this_cpu_ptr(tunnel->dev->tstats);
405 		tstats->rx_packets++;
406 		tstats->rx_bytes += skb->len;
407 
408 		__skb_tunnel_rx(skb, tunnel->dev);
409 
410 		ipip_ecn_decapsulate(iph, skb);
411 
412 		netif_rx(skb);
413 
414 		rcu_read_unlock();
415 		return 0;
416 	}
417 	rcu_read_unlock();
418 
419 	return -1;
420 }
421 
422 /*
423  *	This function assumes it is being called from dev_queue_xmit()
424  *	and that skb is filled properly by that function.
425  */
426 
427 static netdev_tx_t ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
428 {
429 	struct ip_tunnel *tunnel = netdev_priv(dev);
430 	struct pcpu_tstats *tstats;
431 	const struct iphdr  *tiph = &tunnel->parms.iph;
432 	u8     tos = tunnel->parms.iph.tos;
433 	__be16 df = tiph->frag_off;
434 	struct rtable *rt;     			/* Route to the other host */
435 	struct net_device *tdev;		/* Device to other host */
436 	const struct iphdr  *old_iph = ip_hdr(skb);
437 	struct iphdr  *iph;			/* Our new IP header */
438 	unsigned int max_headroom;		/* The extra header space needed */
439 	__be32 dst = tiph->daddr;
440 	struct flowi4 fl4;
441 	int    mtu;
442 
443 	if (skb->protocol != htons(ETH_P_IP))
444 		goto tx_error;
445 
446 	if (tos & 1)
447 		tos = old_iph->tos;
448 
449 	if (!dst) {
450 		/* NBMA tunnel */
451 		if ((rt = skb_rtable(skb)) == NULL) {
452 			dev->stats.tx_fifo_errors++;
453 			goto tx_error;
454 		}
455 		if ((dst = rt->rt_gateway) == 0)
456 			goto tx_error_icmp;
457 	}
458 
459 	rt = ip_route_output_ports(dev_net(dev), &fl4, NULL,
460 				   dst, tiph->saddr,
461 				   0, 0,
462 				   IPPROTO_IPIP, RT_TOS(tos),
463 				   tunnel->parms.link);
464 	if (IS_ERR(rt)) {
465 		dev->stats.tx_carrier_errors++;
466 		goto tx_error_icmp;
467 	}
468 	tdev = rt->dst.dev;
469 
470 	if (tdev == dev) {
471 		ip_rt_put(rt);
472 		dev->stats.collisions++;
473 		goto tx_error;
474 	}
475 
476 	df |= old_iph->frag_off & htons(IP_DF);
477 
478 	if (df) {
479 		mtu = dst_mtu(&rt->dst) - sizeof(struct iphdr);
480 
481 		if (mtu < 68) {
482 			dev->stats.collisions++;
483 			ip_rt_put(rt);
484 			goto tx_error;
485 		}
486 
487 		if (skb_dst(skb))
488 			skb_dst(skb)->ops->update_pmtu(skb_dst(skb), mtu);
489 
490 		if ((old_iph->frag_off & htons(IP_DF)) &&
491 		    mtu < ntohs(old_iph->tot_len)) {
492 			icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED,
493 				  htonl(mtu));
494 			ip_rt_put(rt);
495 			goto tx_error;
496 		}
497 	}
498 
499 	if (tunnel->err_count > 0) {
500 		if (time_before(jiffies,
501 				tunnel->err_time + IPTUNNEL_ERR_TIMEO)) {
502 			tunnel->err_count--;
503 			dst_link_failure(skb);
504 		} else
505 			tunnel->err_count = 0;
506 	}
507 
508 	/*
509 	 * Okay, now see if we can stuff it in the buffer as-is.
510 	 */
511 	max_headroom = (LL_RESERVED_SPACE(tdev)+sizeof(struct iphdr));
512 
513 	if (skb_headroom(skb) < max_headroom || skb_shared(skb) ||
514 	    (skb_cloned(skb) && !skb_clone_writable(skb, 0))) {
515 		struct sk_buff *new_skb = skb_realloc_headroom(skb, max_headroom);
516 		if (!new_skb) {
517 			ip_rt_put(rt);
518 			dev->stats.tx_dropped++;
519 			dev_kfree_skb(skb);
520 			return NETDEV_TX_OK;
521 		}
522 		if (skb->sk)
523 			skb_set_owner_w(new_skb, skb->sk);
524 		dev_kfree_skb(skb);
525 		skb = new_skb;
526 		old_iph = ip_hdr(skb);
527 	}
528 
529 	skb->transport_header = skb->network_header;
530 	skb_push(skb, sizeof(struct iphdr));
531 	skb_reset_network_header(skb);
532 	memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
533 	IPCB(skb)->flags &= ~(IPSKB_XFRM_TUNNEL_SIZE | IPSKB_XFRM_TRANSFORMED |
534 			      IPSKB_REROUTED);
535 	skb_dst_drop(skb);
536 	skb_dst_set(skb, &rt->dst);
537 
538 	/*
539 	 *	Push down and install the IPIP header.
540 	 */
541 
542 	iph 			=	ip_hdr(skb);
543 	iph->version		=	4;
544 	iph->ihl		=	sizeof(struct iphdr)>>2;
545 	iph->frag_off		=	df;
546 	iph->protocol		=	IPPROTO_IPIP;
547 	iph->tos		=	INET_ECN_encapsulate(tos, old_iph->tos);
548 	iph->daddr		=	fl4.daddr;
549 	iph->saddr		=	fl4.saddr;
550 
551 	if ((iph->ttl = tiph->ttl) == 0)
552 		iph->ttl	=	old_iph->ttl;
553 
554 	nf_reset(skb);
555 	tstats = this_cpu_ptr(dev->tstats);
556 	__IPTUNNEL_XMIT(tstats, &dev->stats);
557 	return NETDEV_TX_OK;
558 
559 tx_error_icmp:
560 	dst_link_failure(skb);
561 tx_error:
562 	dev->stats.tx_errors++;
563 	dev_kfree_skb(skb);
564 	return NETDEV_TX_OK;
565 }
566 
567 static void ipip_tunnel_bind_dev(struct net_device *dev)
568 {
569 	struct net_device *tdev = NULL;
570 	struct ip_tunnel *tunnel;
571 	const struct iphdr *iph;
572 
573 	tunnel = netdev_priv(dev);
574 	iph = &tunnel->parms.iph;
575 
576 	if (iph->daddr) {
577 		struct rtable *rt;
578 		struct flowi4 fl4;
579 
580 		rt = ip_route_output_ports(dev_net(dev), &fl4, NULL,
581 					   iph->daddr, iph->saddr,
582 					   0, 0,
583 					   IPPROTO_IPIP,
584 					   RT_TOS(iph->tos),
585 					   tunnel->parms.link);
586 		if (!IS_ERR(rt)) {
587 			tdev = rt->dst.dev;
588 			ip_rt_put(rt);
589 		}
590 		dev->flags |= IFF_POINTOPOINT;
591 	}
592 
593 	if (!tdev && tunnel->parms.link)
594 		tdev = __dev_get_by_index(dev_net(dev), tunnel->parms.link);
595 
596 	if (tdev) {
597 		dev->hard_header_len = tdev->hard_header_len + sizeof(struct iphdr);
598 		dev->mtu = tdev->mtu - sizeof(struct iphdr);
599 	}
600 	dev->iflink = tunnel->parms.link;
601 }
602 
603 static int
604 ipip_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd)
605 {
606 	int err = 0;
607 	struct ip_tunnel_parm p;
608 	struct ip_tunnel *t;
609 	struct net *net = dev_net(dev);
610 	struct ipip_net *ipn = net_generic(net, ipip_net_id);
611 
612 	switch (cmd) {
613 	case SIOCGETTUNNEL:
614 		t = NULL;
615 		if (dev == ipn->fb_tunnel_dev) {
616 			if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) {
617 				err = -EFAULT;
618 				break;
619 			}
620 			t = ipip_tunnel_locate(net, &p, 0);
621 		}
622 		if (t == NULL)
623 			t = netdev_priv(dev);
624 		memcpy(&p, &t->parms, sizeof(p));
625 		if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p)))
626 			err = -EFAULT;
627 		break;
628 
629 	case SIOCADDTUNNEL:
630 	case SIOCCHGTUNNEL:
631 		err = -EPERM;
632 		if (!capable(CAP_NET_ADMIN))
633 			goto done;
634 
635 		err = -EFAULT;
636 		if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
637 			goto done;
638 
639 		err = -EINVAL;
640 		if (p.iph.version != 4 || p.iph.protocol != IPPROTO_IPIP ||
641 		    p.iph.ihl != 5 || (p.iph.frag_off&htons(~IP_DF)))
642 			goto done;
643 		if (p.iph.ttl)
644 			p.iph.frag_off |= htons(IP_DF);
645 
646 		t = ipip_tunnel_locate(net, &p, cmd == SIOCADDTUNNEL);
647 
648 		if (dev != ipn->fb_tunnel_dev && cmd == SIOCCHGTUNNEL) {
649 			if (t != NULL) {
650 				if (t->dev != dev) {
651 					err = -EEXIST;
652 					break;
653 				}
654 			} else {
655 				if (((dev->flags&IFF_POINTOPOINT) && !p.iph.daddr) ||
656 				    (!(dev->flags&IFF_POINTOPOINT) && p.iph.daddr)) {
657 					err = -EINVAL;
658 					break;
659 				}
660 				t = netdev_priv(dev);
661 				ipip_tunnel_unlink(ipn, t);
662 				synchronize_net();
663 				t->parms.iph.saddr = p.iph.saddr;
664 				t->parms.iph.daddr = p.iph.daddr;
665 				memcpy(dev->dev_addr, &p.iph.saddr, 4);
666 				memcpy(dev->broadcast, &p.iph.daddr, 4);
667 				ipip_tunnel_link(ipn, t);
668 				netdev_state_change(dev);
669 			}
670 		}
671 
672 		if (t) {
673 			err = 0;
674 			if (cmd == SIOCCHGTUNNEL) {
675 				t->parms.iph.ttl = p.iph.ttl;
676 				t->parms.iph.tos = p.iph.tos;
677 				t->parms.iph.frag_off = p.iph.frag_off;
678 				if (t->parms.link != p.link) {
679 					t->parms.link = p.link;
680 					ipip_tunnel_bind_dev(dev);
681 					netdev_state_change(dev);
682 				}
683 			}
684 			if (copy_to_user(ifr->ifr_ifru.ifru_data, &t->parms, sizeof(p)))
685 				err = -EFAULT;
686 		} else
687 			err = (cmd == SIOCADDTUNNEL ? -ENOBUFS : -ENOENT);
688 		break;
689 
690 	case SIOCDELTUNNEL:
691 		err = -EPERM;
692 		if (!capable(CAP_NET_ADMIN))
693 			goto done;
694 
695 		if (dev == ipn->fb_tunnel_dev) {
696 			err = -EFAULT;
697 			if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
698 				goto done;
699 			err = -ENOENT;
700 			if ((t = ipip_tunnel_locate(net, &p, 0)) == NULL)
701 				goto done;
702 			err = -EPERM;
703 			if (t->dev == ipn->fb_tunnel_dev)
704 				goto done;
705 			dev = t->dev;
706 		}
707 		unregister_netdevice(dev);
708 		err = 0;
709 		break;
710 
711 	default:
712 		err = -EINVAL;
713 	}
714 
715 done:
716 	return err;
717 }
718 
719 static int ipip_tunnel_change_mtu(struct net_device *dev, int new_mtu)
720 {
721 	if (new_mtu < 68 || new_mtu > 0xFFF8 - sizeof(struct iphdr))
722 		return -EINVAL;
723 	dev->mtu = new_mtu;
724 	return 0;
725 }
726 
727 static const struct net_device_ops ipip_netdev_ops = {
728 	.ndo_uninit	= ipip_tunnel_uninit,
729 	.ndo_start_xmit	= ipip_tunnel_xmit,
730 	.ndo_do_ioctl	= ipip_tunnel_ioctl,
731 	.ndo_change_mtu	= ipip_tunnel_change_mtu,
732 	.ndo_get_stats  = ipip_get_stats,
733 };
734 
735 static void ipip_dev_free(struct net_device *dev)
736 {
737 	free_percpu(dev->tstats);
738 	free_netdev(dev);
739 }
740 
741 static void ipip_tunnel_setup(struct net_device *dev)
742 {
743 	dev->netdev_ops		= &ipip_netdev_ops;
744 	dev->destructor		= ipip_dev_free;
745 
746 	dev->type		= ARPHRD_TUNNEL;
747 	dev->hard_header_len 	= LL_MAX_HEADER + sizeof(struct iphdr);
748 	dev->mtu		= ETH_DATA_LEN - sizeof(struct iphdr);
749 	dev->flags		= IFF_NOARP;
750 	dev->iflink		= 0;
751 	dev->addr_len		= 4;
752 	dev->features		|= NETIF_F_NETNS_LOCAL;
753 	dev->features		|= NETIF_F_LLTX;
754 	dev->priv_flags		&= ~IFF_XMIT_DST_RELEASE;
755 }
756 
757 static int ipip_tunnel_init(struct net_device *dev)
758 {
759 	struct ip_tunnel *tunnel = netdev_priv(dev);
760 
761 	tunnel->dev = dev;
762 	strcpy(tunnel->parms.name, dev->name);
763 
764 	memcpy(dev->dev_addr, &tunnel->parms.iph.saddr, 4);
765 	memcpy(dev->broadcast, &tunnel->parms.iph.daddr, 4);
766 
767 	ipip_tunnel_bind_dev(dev);
768 
769 	dev->tstats = alloc_percpu(struct pcpu_tstats);
770 	if (!dev->tstats)
771 		return -ENOMEM;
772 
773 	return 0;
774 }
775 
776 static int __net_init ipip_fb_tunnel_init(struct net_device *dev)
777 {
778 	struct ip_tunnel *tunnel = netdev_priv(dev);
779 	struct iphdr *iph = &tunnel->parms.iph;
780 	struct ipip_net *ipn = net_generic(dev_net(dev), ipip_net_id);
781 
782 	tunnel->dev = dev;
783 	strcpy(tunnel->parms.name, dev->name);
784 
785 	iph->version		= 4;
786 	iph->protocol		= IPPROTO_IPIP;
787 	iph->ihl		= 5;
788 
789 	dev->tstats = alloc_percpu(struct pcpu_tstats);
790 	if (!dev->tstats)
791 		return -ENOMEM;
792 
793 	dev_hold(dev);
794 	rcu_assign_pointer(ipn->tunnels_wc[0], tunnel);
795 	return 0;
796 }
797 
798 static struct xfrm_tunnel ipip_handler __read_mostly = {
799 	.handler	=	ipip_rcv,
800 	.err_handler	=	ipip_err,
801 	.priority	=	1,
802 };
803 
804 static const char banner[] __initconst =
805 	KERN_INFO "IPv4 over IPv4 tunneling driver\n";
806 
807 static void ipip_destroy_tunnels(struct ipip_net *ipn, struct list_head *head)
808 {
809 	int prio;
810 
811 	for (prio = 1; prio < 4; prio++) {
812 		int h;
813 		for (h = 0; h < HASH_SIZE; h++) {
814 			struct ip_tunnel *t;
815 
816 			t = rtnl_dereference(ipn->tunnels[prio][h]);
817 			while (t != NULL) {
818 				unregister_netdevice_queue(t->dev, head);
819 				t = rtnl_dereference(t->next);
820 			}
821 		}
822 	}
823 }
824 
825 static int __net_init ipip_init_net(struct net *net)
826 {
827 	struct ipip_net *ipn = net_generic(net, ipip_net_id);
828 	int err;
829 
830 	ipn->tunnels[0] = ipn->tunnels_wc;
831 	ipn->tunnels[1] = ipn->tunnels_l;
832 	ipn->tunnels[2] = ipn->tunnels_r;
833 	ipn->tunnels[3] = ipn->tunnels_r_l;
834 
835 	ipn->fb_tunnel_dev = alloc_netdev(sizeof(struct ip_tunnel),
836 					   "tunl0",
837 					   ipip_tunnel_setup);
838 	if (!ipn->fb_tunnel_dev) {
839 		err = -ENOMEM;
840 		goto err_alloc_dev;
841 	}
842 	dev_net_set(ipn->fb_tunnel_dev, net);
843 
844 	err = ipip_fb_tunnel_init(ipn->fb_tunnel_dev);
845 	if (err)
846 		goto err_reg_dev;
847 
848 	if ((err = register_netdev(ipn->fb_tunnel_dev)))
849 		goto err_reg_dev;
850 
851 	return 0;
852 
853 err_reg_dev:
854 	ipip_dev_free(ipn->fb_tunnel_dev);
855 err_alloc_dev:
856 	/* nothing */
857 	return err;
858 }
859 
860 static void __net_exit ipip_exit_net(struct net *net)
861 {
862 	struct ipip_net *ipn = net_generic(net, ipip_net_id);
863 	LIST_HEAD(list);
864 
865 	rtnl_lock();
866 	ipip_destroy_tunnels(ipn, &list);
867 	unregister_netdevice_queue(ipn->fb_tunnel_dev, &list);
868 	unregister_netdevice_many(&list);
869 	rtnl_unlock();
870 }
871 
872 static struct pernet_operations ipip_net_ops = {
873 	.init = ipip_init_net,
874 	.exit = ipip_exit_net,
875 	.id   = &ipip_net_id,
876 	.size = sizeof(struct ipip_net),
877 };
878 
879 static int __init ipip_init(void)
880 {
881 	int err;
882 
883 	printk(banner);
884 
885 	err = register_pernet_device(&ipip_net_ops);
886 	if (err < 0)
887 		return err;
888 	err = xfrm4_tunnel_register(&ipip_handler, AF_INET);
889 	if (err < 0) {
890 		unregister_pernet_device(&ipip_net_ops);
891 		printk(KERN_INFO "ipip init: can't register tunnel\n");
892 	}
893 	return err;
894 }
895 
896 static void __exit ipip_fini(void)
897 {
898 	if (xfrm4_tunnel_deregister(&ipip_handler, AF_INET))
899 		printk(KERN_INFO "ipip close: can't deregister tunnel\n");
900 
901 	unregister_pernet_device(&ipip_net_ops);
902 }
903 
904 module_init(ipip_init);
905 module_exit(ipip_fini);
906 MODULE_LICENSE("GPL");
907 MODULE_ALIAS_NETDEV("tunl0");
908