xref: /openbmc/linux/net/ipv4/ipip.c (revision b627b4ed)
1 /*
2  *	Linux NET3:	IP/IP protocol decoder.
3  *
4  *	Authors:
5  *		Sam Lantinga (slouken@cs.ucdavis.edu)  02/01/95
6  *
7  *	Fixes:
8  *		Alan Cox	:	Merged and made usable non modular (its so tiny its silly as
9  *					a module taking up 2 pages).
10  *		Alan Cox	: 	Fixed bug with 1.3.18 and IPIP not working (now needs to set skb->h.iph)
11  *					to keep ip_forward happy.
12  *		Alan Cox	:	More fixes for 1.3.21, and firewall fix. Maybe this will work soon 8).
13  *		Kai Schulte	:	Fixed #defines for IP_FIREWALL->FIREWALL
14  *              David Woodhouse :       Perform some basic ICMP handling.
15  *                                      IPIP Routing without decapsulation.
16  *              Carlos Picoto   :       GRE over IP support
17  *		Alexey Kuznetsov:	Reworked. Really, now it is truncated version of ipv4/ip_gre.c.
18  *					I do not want to merge them together.
19  *
20  *	This program is free software; you can redistribute it and/or
21  *	modify it under the terms of the GNU General Public License
22  *	as published by the Free Software Foundation; either version
23  *	2 of the License, or (at your option) any later version.
24  *
25  */
26 
27 /* tunnel.c: an IP tunnel driver
28 
29 	The purpose of this driver is to provide an IP tunnel through
30 	which you can tunnel network traffic transparently across subnets.
31 
32 	This was written by looking at Nick Holloway's dummy driver
33 	Thanks for the great code!
34 
35 		-Sam Lantinga	(slouken@cs.ucdavis.edu)  02/01/95
36 
37 	Minor tweaks:
38 		Cleaned up the code a little and added some pre-1.3.0 tweaks.
39 		dev->hard_header/hard_header_len changed to use no headers.
40 		Comments/bracketing tweaked.
41 		Made the tunnels use dev->name not tunnel: when error reporting.
42 		Added tx_dropped stat
43 
44 		-Alan Cox	(alan@lxorguk.ukuu.org.uk) 21 March 95
45 
46 	Reworked:
47 		Changed to tunnel to destination gateway in addition to the
48 			tunnel's pointopoint address
49 		Almost completely rewritten
50 		Note:  There is currently no firewall or ICMP handling done.
51 
52 		-Sam Lantinga	(slouken@cs.ucdavis.edu) 02/13/96
53 
54 */
55 
56 /* Things I wish I had known when writing the tunnel driver:
57 
58 	When the tunnel_xmit() function is called, the skb contains the
59 	packet to be sent (plus a great deal of extra info), and dev
60 	contains the tunnel device that _we_ are.
61 
62 	When we are passed a packet, we are expected to fill in the
63 	source address with our source IP address.
64 
65 	What is the proper way to allocate, copy and free a buffer?
66 	After you allocate it, it is a "0 length" chunk of memory
67 	starting at zero.  If you want to add headers to the buffer
68 	later, you'll have to call "skb_reserve(skb, amount)" with
69 	the amount of memory you want reserved.  Then, you call
70 	"skb_put(skb, amount)" with the amount of space you want in
71 	the buffer.  skb_put() returns a pointer to the top (#0) of
72 	that buffer.  skb->len is set to the amount of space you have
73 	"allocated" with skb_put().  You can then write up to skb->len
74 	bytes to that buffer.  If you need more, you can call skb_put()
75 	again with the additional amount of space you need.  You can
76 	find out how much more space you can allocate by calling
77 	"skb_tailroom(skb)".
78 	Now, to add header space, call "skb_push(skb, header_len)".
79 	This creates space at the beginning of the buffer and returns
80 	a pointer to this new space.  If later you need to strip a
81 	header from a buffer, call "skb_pull(skb, header_len)".
82 	skb_headroom() will return how much space is left at the top
83 	of the buffer (before the main data).  Remember, this headroom
84 	space must be reserved before the skb_put() function is called.
85 	*/
86 
87 /*
88    This version of net/ipv4/ipip.c is cloned of net/ipv4/ip_gre.c
89 
90    For comments look at net/ipv4/ip_gre.c --ANK
91  */
92 
93 
94 #include <linux/capability.h>
95 #include <linux/module.h>
96 #include <linux/types.h>
97 #include <linux/kernel.h>
98 #include <asm/uaccess.h>
99 #include <linux/skbuff.h>
100 #include <linux/netdevice.h>
101 #include <linux/in.h>
102 #include <linux/tcp.h>
103 #include <linux/udp.h>
104 #include <linux/if_arp.h>
105 #include <linux/mroute.h>
106 #include <linux/init.h>
107 #include <linux/netfilter_ipv4.h>
108 #include <linux/if_ether.h>
109 
110 #include <net/sock.h>
111 #include <net/ip.h>
112 #include <net/icmp.h>
113 #include <net/ipip.h>
114 #include <net/inet_ecn.h>
115 #include <net/xfrm.h>
116 #include <net/net_namespace.h>
117 #include <net/netns/generic.h>
118 
119 #define HASH_SIZE  16
120 #define HASH(addr) (((__force u32)addr^((__force u32)addr>>4))&0xF)
121 
122 static int ipip_net_id;
123 struct ipip_net {
124 	struct ip_tunnel *tunnels_r_l[HASH_SIZE];
125 	struct ip_tunnel *tunnels_r[HASH_SIZE];
126 	struct ip_tunnel *tunnels_l[HASH_SIZE];
127 	struct ip_tunnel *tunnels_wc[1];
128 	struct ip_tunnel **tunnels[4];
129 
130 	struct net_device *fb_tunnel_dev;
131 };
132 
133 static void ipip_fb_tunnel_init(struct net_device *dev);
134 static void ipip_tunnel_init(struct net_device *dev);
135 static void ipip_tunnel_setup(struct net_device *dev);
136 
137 static DEFINE_RWLOCK(ipip_lock);
138 
139 static struct ip_tunnel * ipip_tunnel_lookup(struct net *net,
140 		__be32 remote, __be32 local)
141 {
142 	unsigned h0 = HASH(remote);
143 	unsigned h1 = HASH(local);
144 	struct ip_tunnel *t;
145 	struct ipip_net *ipn = net_generic(net, ipip_net_id);
146 
147 	for (t = ipn->tunnels_r_l[h0^h1]; t; t = t->next) {
148 		if (local == t->parms.iph.saddr &&
149 		    remote == t->parms.iph.daddr && (t->dev->flags&IFF_UP))
150 			return t;
151 	}
152 	for (t = ipn->tunnels_r[h0]; t; t = t->next) {
153 		if (remote == t->parms.iph.daddr && (t->dev->flags&IFF_UP))
154 			return t;
155 	}
156 	for (t = ipn->tunnels_l[h1]; t; t = t->next) {
157 		if (local == t->parms.iph.saddr && (t->dev->flags&IFF_UP))
158 			return t;
159 	}
160 	if ((t = ipn->tunnels_wc[0]) != NULL && (t->dev->flags&IFF_UP))
161 		return t;
162 	return NULL;
163 }
164 
165 static struct ip_tunnel **__ipip_bucket(struct ipip_net *ipn,
166 		struct ip_tunnel_parm *parms)
167 {
168 	__be32 remote = parms->iph.daddr;
169 	__be32 local = parms->iph.saddr;
170 	unsigned h = 0;
171 	int prio = 0;
172 
173 	if (remote) {
174 		prio |= 2;
175 		h ^= HASH(remote);
176 	}
177 	if (local) {
178 		prio |= 1;
179 		h ^= HASH(local);
180 	}
181 	return &ipn->tunnels[prio][h];
182 }
183 
184 static inline struct ip_tunnel **ipip_bucket(struct ipip_net *ipn,
185 		struct ip_tunnel *t)
186 {
187 	return __ipip_bucket(ipn, &t->parms);
188 }
189 
190 static void ipip_tunnel_unlink(struct ipip_net *ipn, struct ip_tunnel *t)
191 {
192 	struct ip_tunnel **tp;
193 
194 	for (tp = ipip_bucket(ipn, t); *tp; tp = &(*tp)->next) {
195 		if (t == *tp) {
196 			write_lock_bh(&ipip_lock);
197 			*tp = t->next;
198 			write_unlock_bh(&ipip_lock);
199 			break;
200 		}
201 	}
202 }
203 
204 static void ipip_tunnel_link(struct ipip_net *ipn, struct ip_tunnel *t)
205 {
206 	struct ip_tunnel **tp = ipip_bucket(ipn, t);
207 
208 	t->next = *tp;
209 	write_lock_bh(&ipip_lock);
210 	*tp = t;
211 	write_unlock_bh(&ipip_lock);
212 }
213 
214 static struct ip_tunnel * ipip_tunnel_locate(struct net *net,
215 		struct ip_tunnel_parm *parms, int create)
216 {
217 	__be32 remote = parms->iph.daddr;
218 	__be32 local = parms->iph.saddr;
219 	struct ip_tunnel *t, **tp, *nt;
220 	struct net_device *dev;
221 	char name[IFNAMSIZ];
222 	struct ipip_net *ipn = net_generic(net, ipip_net_id);
223 
224 	for (tp = __ipip_bucket(ipn, parms); (t = *tp) != NULL; tp = &t->next) {
225 		if (local == t->parms.iph.saddr && remote == t->parms.iph.daddr)
226 			return t;
227 	}
228 	if (!create)
229 		return NULL;
230 
231 	if (parms->name[0])
232 		strlcpy(name, parms->name, IFNAMSIZ);
233 	else
234 		sprintf(name, "tunl%%d");
235 
236 	dev = alloc_netdev(sizeof(*t), name, ipip_tunnel_setup);
237 	if (dev == NULL)
238 		return NULL;
239 
240 	dev_net_set(dev, net);
241 
242 	if (strchr(name, '%')) {
243 		if (dev_alloc_name(dev, name) < 0)
244 			goto failed_free;
245 	}
246 
247 	nt = netdev_priv(dev);
248 	nt->parms = *parms;
249 
250 	ipip_tunnel_init(dev);
251 
252 	if (register_netdevice(dev) < 0)
253 		goto failed_free;
254 
255 	dev_hold(dev);
256 	ipip_tunnel_link(ipn, nt);
257 	return nt;
258 
259 failed_free:
260 	free_netdev(dev);
261 	return NULL;
262 }
263 
264 static void ipip_tunnel_uninit(struct net_device *dev)
265 {
266 	struct net *net = dev_net(dev);
267 	struct ipip_net *ipn = net_generic(net, ipip_net_id);
268 
269 	if (dev == ipn->fb_tunnel_dev) {
270 		write_lock_bh(&ipip_lock);
271 		ipn->tunnels_wc[0] = NULL;
272 		write_unlock_bh(&ipip_lock);
273 	} else
274 		ipip_tunnel_unlink(ipn, netdev_priv(dev));
275 	dev_put(dev);
276 }
277 
278 static int ipip_err(struct sk_buff *skb, u32 info)
279 {
280 
281 /* All the routers (except for Linux) return only
282    8 bytes of packet payload. It means, that precise relaying of
283    ICMP in the real Internet is absolutely infeasible.
284  */
285 	struct iphdr *iph = (struct iphdr *)skb->data;
286 	const int type = icmp_hdr(skb)->type;
287 	const int code = icmp_hdr(skb)->code;
288 	struct ip_tunnel *t;
289 	int err;
290 
291 	switch (type) {
292 	default:
293 	case ICMP_PARAMETERPROB:
294 		return 0;
295 
296 	case ICMP_DEST_UNREACH:
297 		switch (code) {
298 		case ICMP_SR_FAILED:
299 		case ICMP_PORT_UNREACH:
300 			/* Impossible event. */
301 			return 0;
302 		case ICMP_FRAG_NEEDED:
303 			/* Soft state for pmtu is maintained by IP core. */
304 			return 0;
305 		default:
306 			/* All others are translated to HOST_UNREACH.
307 			   rfc2003 contains "deep thoughts" about NET_UNREACH,
308 			   I believe they are just ether pollution. --ANK
309 			 */
310 			break;
311 		}
312 		break;
313 	case ICMP_TIME_EXCEEDED:
314 		if (code != ICMP_EXC_TTL)
315 			return 0;
316 		break;
317 	}
318 
319 	err = -ENOENT;
320 
321 	read_lock(&ipip_lock);
322 	t = ipip_tunnel_lookup(dev_net(skb->dev), iph->daddr, iph->saddr);
323 	if (t == NULL || t->parms.iph.daddr == 0)
324 		goto out;
325 
326 	err = 0;
327 	if (t->parms.iph.ttl == 0 && type == ICMP_TIME_EXCEEDED)
328 		goto out;
329 
330 	if (time_before(jiffies, t->err_time + IPTUNNEL_ERR_TIMEO))
331 		t->err_count++;
332 	else
333 		t->err_count = 1;
334 	t->err_time = jiffies;
335 out:
336 	read_unlock(&ipip_lock);
337 	return err;
338 }
339 
340 static inline void ipip_ecn_decapsulate(const struct iphdr *outer_iph,
341 					struct sk_buff *skb)
342 {
343 	struct iphdr *inner_iph = ip_hdr(skb);
344 
345 	if (INET_ECN_is_ce(outer_iph->tos))
346 		IP_ECN_set_ce(inner_iph);
347 }
348 
349 static int ipip_rcv(struct sk_buff *skb)
350 {
351 	struct ip_tunnel *tunnel;
352 	const struct iphdr *iph = ip_hdr(skb);
353 
354 	read_lock(&ipip_lock);
355 	if ((tunnel = ipip_tunnel_lookup(dev_net(skb->dev),
356 					iph->saddr, iph->daddr)) != NULL) {
357 		if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) {
358 			read_unlock(&ipip_lock);
359 			kfree_skb(skb);
360 			return 0;
361 		}
362 
363 		secpath_reset(skb);
364 
365 		skb->mac_header = skb->network_header;
366 		skb_reset_network_header(skb);
367 		skb->protocol = htons(ETH_P_IP);
368 		skb->pkt_type = PACKET_HOST;
369 
370 		tunnel->dev->stats.rx_packets++;
371 		tunnel->dev->stats.rx_bytes += skb->len;
372 		skb->dev = tunnel->dev;
373 		dst_release(skb->dst);
374 		skb->dst = NULL;
375 		nf_reset(skb);
376 		ipip_ecn_decapsulate(iph, skb);
377 		netif_rx(skb);
378 		read_unlock(&ipip_lock);
379 		return 0;
380 	}
381 	read_unlock(&ipip_lock);
382 
383 	return -1;
384 }
385 
386 /*
387  *	This function assumes it is being called from dev_queue_xmit()
388  *	and that skb is filled properly by that function.
389  */
390 
391 static int ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
392 {
393 	struct ip_tunnel *tunnel = netdev_priv(dev);
394 	struct net_device_stats *stats = &tunnel->dev->stats;
395 	struct iphdr  *tiph = &tunnel->parms.iph;
396 	u8     tos = tunnel->parms.iph.tos;
397 	__be16 df = tiph->frag_off;
398 	struct rtable *rt;     			/* Route to the other host */
399 	struct net_device *tdev;			/* Device to other host */
400 	struct iphdr  *old_iph = ip_hdr(skb);
401 	struct iphdr  *iph;			/* Our new IP header */
402 	unsigned int max_headroom;		/* The extra header space needed */
403 	__be32 dst = tiph->daddr;
404 	int    mtu;
405 
406 	if (tunnel->recursion++) {
407 		stats->collisions++;
408 		goto tx_error;
409 	}
410 
411 	if (skb->protocol != htons(ETH_P_IP))
412 		goto tx_error;
413 
414 	if (tos&1)
415 		tos = old_iph->tos;
416 
417 	if (!dst) {
418 		/* NBMA tunnel */
419 		if ((rt = skb->rtable) == NULL) {
420 			stats->tx_fifo_errors++;
421 			goto tx_error;
422 		}
423 		if ((dst = rt->rt_gateway) == 0)
424 			goto tx_error_icmp;
425 	}
426 
427 	{
428 		struct flowi fl = { .oif = tunnel->parms.link,
429 				    .nl_u = { .ip4_u =
430 					      { .daddr = dst,
431 						.saddr = tiph->saddr,
432 						.tos = RT_TOS(tos) } },
433 				    .proto = IPPROTO_IPIP };
434 		if (ip_route_output_key(dev_net(dev), &rt, &fl)) {
435 			stats->tx_carrier_errors++;
436 			goto tx_error_icmp;
437 		}
438 	}
439 	tdev = rt->u.dst.dev;
440 
441 	if (tdev == dev) {
442 		ip_rt_put(rt);
443 		stats->collisions++;
444 		goto tx_error;
445 	}
446 
447 	if (tiph->frag_off)
448 		mtu = dst_mtu(&rt->u.dst) - sizeof(struct iphdr);
449 	else
450 		mtu = skb->dst ? dst_mtu(skb->dst) : dev->mtu;
451 
452 	if (mtu < 68) {
453 		stats->collisions++;
454 		ip_rt_put(rt);
455 		goto tx_error;
456 	}
457 	if (skb->dst)
458 		skb->dst->ops->update_pmtu(skb->dst, mtu);
459 
460 	df |= (old_iph->frag_off&htons(IP_DF));
461 
462 	if ((old_iph->frag_off&htons(IP_DF)) && mtu < ntohs(old_iph->tot_len)) {
463 		icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu));
464 		ip_rt_put(rt);
465 		goto tx_error;
466 	}
467 
468 	if (tunnel->err_count > 0) {
469 		if (time_before(jiffies,
470 				tunnel->err_time + IPTUNNEL_ERR_TIMEO)) {
471 			tunnel->err_count--;
472 			dst_link_failure(skb);
473 		} else
474 			tunnel->err_count = 0;
475 	}
476 
477 	/*
478 	 * Okay, now see if we can stuff it in the buffer as-is.
479 	 */
480 	max_headroom = (LL_RESERVED_SPACE(tdev)+sizeof(struct iphdr));
481 
482 	if (skb_headroom(skb) < max_headroom || skb_shared(skb) ||
483 	    (skb_cloned(skb) && !skb_clone_writable(skb, 0))) {
484 		struct sk_buff *new_skb = skb_realloc_headroom(skb, max_headroom);
485 		if (!new_skb) {
486 			ip_rt_put(rt);
487 			stats->tx_dropped++;
488 			dev_kfree_skb(skb);
489 			tunnel->recursion--;
490 			return 0;
491 		}
492 		if (skb->sk)
493 			skb_set_owner_w(new_skb, skb->sk);
494 		dev_kfree_skb(skb);
495 		skb = new_skb;
496 		old_iph = ip_hdr(skb);
497 	}
498 
499 	skb->transport_header = skb->network_header;
500 	skb_push(skb, sizeof(struct iphdr));
501 	skb_reset_network_header(skb);
502 	memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
503 	IPCB(skb)->flags &= ~(IPSKB_XFRM_TUNNEL_SIZE | IPSKB_XFRM_TRANSFORMED |
504 			      IPSKB_REROUTED);
505 	dst_release(skb->dst);
506 	skb->dst = &rt->u.dst;
507 
508 	/*
509 	 *	Push down and install the IPIP header.
510 	 */
511 
512 	iph 			=	ip_hdr(skb);
513 	iph->version		=	4;
514 	iph->ihl		=	sizeof(struct iphdr)>>2;
515 	iph->frag_off		=	df;
516 	iph->protocol		=	IPPROTO_IPIP;
517 	iph->tos		=	INET_ECN_encapsulate(tos, old_iph->tos);
518 	iph->daddr		=	rt->rt_dst;
519 	iph->saddr		=	rt->rt_src;
520 
521 	if ((iph->ttl = tiph->ttl) == 0)
522 		iph->ttl	=	old_iph->ttl;
523 
524 	nf_reset(skb);
525 
526 	IPTUNNEL_XMIT();
527 	tunnel->recursion--;
528 	return 0;
529 
530 tx_error_icmp:
531 	dst_link_failure(skb);
532 tx_error:
533 	stats->tx_errors++;
534 	dev_kfree_skb(skb);
535 	tunnel->recursion--;
536 	return 0;
537 }
538 
539 static void ipip_tunnel_bind_dev(struct net_device *dev)
540 {
541 	struct net_device *tdev = NULL;
542 	struct ip_tunnel *tunnel;
543 	struct iphdr *iph;
544 
545 	tunnel = netdev_priv(dev);
546 	iph = &tunnel->parms.iph;
547 
548 	if (iph->daddr) {
549 		struct flowi fl = { .oif = tunnel->parms.link,
550 				    .nl_u = { .ip4_u =
551 					      { .daddr = iph->daddr,
552 						.saddr = iph->saddr,
553 						.tos = RT_TOS(iph->tos) } },
554 				    .proto = IPPROTO_IPIP };
555 		struct rtable *rt;
556 		if (!ip_route_output_key(dev_net(dev), &rt, &fl)) {
557 			tdev = rt->u.dst.dev;
558 			ip_rt_put(rt);
559 		}
560 		dev->flags |= IFF_POINTOPOINT;
561 	}
562 
563 	if (!tdev && tunnel->parms.link)
564 		tdev = __dev_get_by_index(dev_net(dev), tunnel->parms.link);
565 
566 	if (tdev) {
567 		dev->hard_header_len = tdev->hard_header_len + sizeof(struct iphdr);
568 		dev->mtu = tdev->mtu - sizeof(struct iphdr);
569 	}
570 	dev->iflink = tunnel->parms.link;
571 }
572 
573 static int
574 ipip_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd)
575 {
576 	int err = 0;
577 	struct ip_tunnel_parm p;
578 	struct ip_tunnel *t;
579 	struct net *net = dev_net(dev);
580 	struct ipip_net *ipn = net_generic(net, ipip_net_id);
581 
582 	switch (cmd) {
583 	case SIOCGETTUNNEL:
584 		t = NULL;
585 		if (dev == ipn->fb_tunnel_dev) {
586 			if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) {
587 				err = -EFAULT;
588 				break;
589 			}
590 			t = ipip_tunnel_locate(net, &p, 0);
591 		}
592 		if (t == NULL)
593 			t = netdev_priv(dev);
594 		memcpy(&p, &t->parms, sizeof(p));
595 		if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p)))
596 			err = -EFAULT;
597 		break;
598 
599 	case SIOCADDTUNNEL:
600 	case SIOCCHGTUNNEL:
601 		err = -EPERM;
602 		if (!capable(CAP_NET_ADMIN))
603 			goto done;
604 
605 		err = -EFAULT;
606 		if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
607 			goto done;
608 
609 		err = -EINVAL;
610 		if (p.iph.version != 4 || p.iph.protocol != IPPROTO_IPIP ||
611 		    p.iph.ihl != 5 || (p.iph.frag_off&htons(~IP_DF)))
612 			goto done;
613 		if (p.iph.ttl)
614 			p.iph.frag_off |= htons(IP_DF);
615 
616 		t = ipip_tunnel_locate(net, &p, cmd == SIOCADDTUNNEL);
617 
618 		if (dev != ipn->fb_tunnel_dev && cmd == SIOCCHGTUNNEL) {
619 			if (t != NULL) {
620 				if (t->dev != dev) {
621 					err = -EEXIST;
622 					break;
623 				}
624 			} else {
625 				if (((dev->flags&IFF_POINTOPOINT) && !p.iph.daddr) ||
626 				    (!(dev->flags&IFF_POINTOPOINT) && p.iph.daddr)) {
627 					err = -EINVAL;
628 					break;
629 				}
630 				t = netdev_priv(dev);
631 				ipip_tunnel_unlink(ipn, t);
632 				t->parms.iph.saddr = p.iph.saddr;
633 				t->parms.iph.daddr = p.iph.daddr;
634 				memcpy(dev->dev_addr, &p.iph.saddr, 4);
635 				memcpy(dev->broadcast, &p.iph.daddr, 4);
636 				ipip_tunnel_link(ipn, t);
637 				netdev_state_change(dev);
638 			}
639 		}
640 
641 		if (t) {
642 			err = 0;
643 			if (cmd == SIOCCHGTUNNEL) {
644 				t->parms.iph.ttl = p.iph.ttl;
645 				t->parms.iph.tos = p.iph.tos;
646 				t->parms.iph.frag_off = p.iph.frag_off;
647 				if (t->parms.link != p.link) {
648 					t->parms.link = p.link;
649 					ipip_tunnel_bind_dev(dev);
650 					netdev_state_change(dev);
651 				}
652 			}
653 			if (copy_to_user(ifr->ifr_ifru.ifru_data, &t->parms, sizeof(p)))
654 				err = -EFAULT;
655 		} else
656 			err = (cmd == SIOCADDTUNNEL ? -ENOBUFS : -ENOENT);
657 		break;
658 
659 	case SIOCDELTUNNEL:
660 		err = -EPERM;
661 		if (!capable(CAP_NET_ADMIN))
662 			goto done;
663 
664 		if (dev == ipn->fb_tunnel_dev) {
665 			err = -EFAULT;
666 			if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
667 				goto done;
668 			err = -ENOENT;
669 			if ((t = ipip_tunnel_locate(net, &p, 0)) == NULL)
670 				goto done;
671 			err = -EPERM;
672 			if (t->dev == ipn->fb_tunnel_dev)
673 				goto done;
674 			dev = t->dev;
675 		}
676 		unregister_netdevice(dev);
677 		err = 0;
678 		break;
679 
680 	default:
681 		err = -EINVAL;
682 	}
683 
684 done:
685 	return err;
686 }
687 
688 static int ipip_tunnel_change_mtu(struct net_device *dev, int new_mtu)
689 {
690 	if (new_mtu < 68 || new_mtu > 0xFFF8 - sizeof(struct iphdr))
691 		return -EINVAL;
692 	dev->mtu = new_mtu;
693 	return 0;
694 }
695 
696 static const struct net_device_ops ipip_netdev_ops = {
697 	.ndo_uninit	= ipip_tunnel_uninit,
698 	.ndo_start_xmit	= ipip_tunnel_xmit,
699 	.ndo_do_ioctl	= ipip_tunnel_ioctl,
700 	.ndo_change_mtu	= ipip_tunnel_change_mtu,
701 
702 };
703 
704 static void ipip_tunnel_setup(struct net_device *dev)
705 {
706 	dev->netdev_ops		= &ipip_netdev_ops;
707 	dev->destructor		= free_netdev;
708 
709 	dev->type		= ARPHRD_TUNNEL;
710 	dev->hard_header_len 	= LL_MAX_HEADER + sizeof(struct iphdr);
711 	dev->mtu		= ETH_DATA_LEN - sizeof(struct iphdr);
712 	dev->flags		= IFF_NOARP;
713 	dev->iflink		= 0;
714 	dev->addr_len		= 4;
715 	dev->features		|= NETIF_F_NETNS_LOCAL;
716 }
717 
718 static void ipip_tunnel_init(struct net_device *dev)
719 {
720 	struct ip_tunnel *tunnel = netdev_priv(dev);
721 
722 	tunnel->dev = dev;
723 	strcpy(tunnel->parms.name, dev->name);
724 
725 	memcpy(dev->dev_addr, &tunnel->parms.iph.saddr, 4);
726 	memcpy(dev->broadcast, &tunnel->parms.iph.daddr, 4);
727 
728 	ipip_tunnel_bind_dev(dev);
729 }
730 
731 static void ipip_fb_tunnel_init(struct net_device *dev)
732 {
733 	struct ip_tunnel *tunnel = netdev_priv(dev);
734 	struct iphdr *iph = &tunnel->parms.iph;
735 	struct ipip_net *ipn = net_generic(dev_net(dev), ipip_net_id);
736 
737 	tunnel->dev = dev;
738 	strcpy(tunnel->parms.name, dev->name);
739 
740 	iph->version		= 4;
741 	iph->protocol		= IPPROTO_IPIP;
742 	iph->ihl		= 5;
743 
744 	dev_hold(dev);
745 	ipn->tunnels_wc[0]	= tunnel;
746 }
747 
748 static struct xfrm_tunnel ipip_handler = {
749 	.handler	=	ipip_rcv,
750 	.err_handler	=	ipip_err,
751 	.priority	=	1,
752 };
753 
754 static const char banner[] __initconst =
755 	KERN_INFO "IPv4 over IPv4 tunneling driver\n";
756 
757 static void ipip_destroy_tunnels(struct ipip_net *ipn)
758 {
759 	int prio;
760 
761 	for (prio = 1; prio < 4; prio++) {
762 		int h;
763 		for (h = 0; h < HASH_SIZE; h++) {
764 			struct ip_tunnel *t;
765 			while ((t = ipn->tunnels[prio][h]) != NULL)
766 				unregister_netdevice(t->dev);
767 		}
768 	}
769 }
770 
771 static int ipip_init_net(struct net *net)
772 {
773 	int err;
774 	struct ipip_net *ipn;
775 
776 	err = -ENOMEM;
777 	ipn = kzalloc(sizeof(struct ipip_net), GFP_KERNEL);
778 	if (ipn == NULL)
779 		goto err_alloc;
780 
781 	err = net_assign_generic(net, ipip_net_id, ipn);
782 	if (err < 0)
783 		goto err_assign;
784 
785 	ipn->tunnels[0] = ipn->tunnels_wc;
786 	ipn->tunnels[1] = ipn->tunnels_l;
787 	ipn->tunnels[2] = ipn->tunnels_r;
788 	ipn->tunnels[3] = ipn->tunnels_r_l;
789 
790 	ipn->fb_tunnel_dev = alloc_netdev(sizeof(struct ip_tunnel),
791 					   "tunl0",
792 					   ipip_tunnel_setup);
793 	if (!ipn->fb_tunnel_dev) {
794 		err = -ENOMEM;
795 		goto err_alloc_dev;
796 	}
797 	dev_net_set(ipn->fb_tunnel_dev, net);
798 
799 	ipip_fb_tunnel_init(ipn->fb_tunnel_dev);
800 
801 	if ((err = register_netdev(ipn->fb_tunnel_dev)))
802 		goto err_reg_dev;
803 
804 	return 0;
805 
806 err_reg_dev:
807 	free_netdev(ipn->fb_tunnel_dev);
808 err_alloc_dev:
809 	/* nothing */
810 err_assign:
811 	kfree(ipn);
812 err_alloc:
813 	return err;
814 }
815 
816 static void ipip_exit_net(struct net *net)
817 {
818 	struct ipip_net *ipn;
819 
820 	ipn = net_generic(net, ipip_net_id);
821 	rtnl_lock();
822 	ipip_destroy_tunnels(ipn);
823 	unregister_netdevice(ipn->fb_tunnel_dev);
824 	rtnl_unlock();
825 	kfree(ipn);
826 }
827 
828 static struct pernet_operations ipip_net_ops = {
829 	.init = ipip_init_net,
830 	.exit = ipip_exit_net,
831 };
832 
833 static int __init ipip_init(void)
834 {
835 	int err;
836 
837 	printk(banner);
838 
839 	if (xfrm4_tunnel_register(&ipip_handler, AF_INET)) {
840 		printk(KERN_INFO "ipip init: can't register tunnel\n");
841 		return -EAGAIN;
842 	}
843 
844 	err = register_pernet_gen_device(&ipip_net_id, &ipip_net_ops);
845 	if (err)
846 		xfrm4_tunnel_deregister(&ipip_handler, AF_INET);
847 
848 	return err;
849 }
850 
851 static void __exit ipip_fini(void)
852 {
853 	if (xfrm4_tunnel_deregister(&ipip_handler, AF_INET))
854 		printk(KERN_INFO "ipip close: can't deregister tunnel\n");
855 
856 	unregister_pernet_gen_device(ipip_net_id, &ipip_net_ops);
857 }
858 
859 module_init(ipip_init);
860 module_exit(ipip_fini);
861 MODULE_LICENSE("GPL");
862