xref: /openbmc/linux/net/ipv4/ipip.c (revision b9ccfda2)
1 /*
2  *	Linux NET3:	IP/IP protocol decoder.
3  *
4  *	Authors:
5  *		Sam Lantinga (slouken@cs.ucdavis.edu)  02/01/95
6  *
7  *	Fixes:
8  *		Alan Cox	:	Merged and made usable non modular (its so tiny its silly as
9  *					a module taking up 2 pages).
10  *		Alan Cox	: 	Fixed bug with 1.3.18 and IPIP not working (now needs to set skb->h.iph)
11  *					to keep ip_forward happy.
12  *		Alan Cox	:	More fixes for 1.3.21, and firewall fix. Maybe this will work soon 8).
13  *		Kai Schulte	:	Fixed #defines for IP_FIREWALL->FIREWALL
14  *              David Woodhouse :       Perform some basic ICMP handling.
15  *                                      IPIP Routing without decapsulation.
16  *              Carlos Picoto   :       GRE over IP support
17  *		Alexey Kuznetsov:	Reworked. Really, now it is truncated version of ipv4/ip_gre.c.
18  *					I do not want to merge them together.
19  *
20  *	This program is free software; you can redistribute it and/or
21  *	modify it under the terms of the GNU General Public License
22  *	as published by the Free Software Foundation; either version
23  *	2 of the License, or (at your option) any later version.
24  *
25  */
26 
27 /* tunnel.c: an IP tunnel driver
28 
29 	The purpose of this driver is to provide an IP tunnel through
30 	which you can tunnel network traffic transparently across subnets.
31 
32 	This was written by looking at Nick Holloway's dummy driver
33 	Thanks for the great code!
34 
35 		-Sam Lantinga	(slouken@cs.ucdavis.edu)  02/01/95
36 
37 	Minor tweaks:
38 		Cleaned up the code a little and added some pre-1.3.0 tweaks.
39 		dev->hard_header/hard_header_len changed to use no headers.
40 		Comments/bracketing tweaked.
41 		Made the tunnels use dev->name not tunnel: when error reporting.
42 		Added tx_dropped stat
43 
44 		-Alan Cox	(alan@lxorguk.ukuu.org.uk) 21 March 95
45 
46 	Reworked:
47 		Changed to tunnel to destination gateway in addition to the
48 			tunnel's pointopoint address
49 		Almost completely rewritten
50 		Note:  There is currently no firewall or ICMP handling done.
51 
52 		-Sam Lantinga	(slouken@cs.ucdavis.edu) 02/13/96
53 
54 */
55 
56 /* Things I wish I had known when writing the tunnel driver:
57 
58 	When the tunnel_xmit() function is called, the skb contains the
59 	packet to be sent (plus a great deal of extra info), and dev
60 	contains the tunnel device that _we_ are.
61 
62 	When we are passed a packet, we are expected to fill in the
63 	source address with our source IP address.
64 
65 	What is the proper way to allocate, copy and free a buffer?
66 	After you allocate it, it is a "0 length" chunk of memory
67 	starting at zero.  If you want to add headers to the buffer
68 	later, you'll have to call "skb_reserve(skb, amount)" with
69 	the amount of memory you want reserved.  Then, you call
70 	"skb_put(skb, amount)" with the amount of space you want in
71 	the buffer.  skb_put() returns a pointer to the top (#0) of
72 	that buffer.  skb->len is set to the amount of space you have
73 	"allocated" with skb_put().  You can then write up to skb->len
74 	bytes to that buffer.  If you need more, you can call skb_put()
75 	again with the additional amount of space you need.  You can
76 	find out how much more space you can allocate by calling
77 	"skb_tailroom(skb)".
78 	Now, to add header space, call "skb_push(skb, header_len)".
79 	This creates space at the beginning of the buffer and returns
80 	a pointer to this new space.  If later you need to strip a
81 	header from a buffer, call "skb_pull(skb, header_len)".
82 	skb_headroom() will return how much space is left at the top
83 	of the buffer (before the main data).  Remember, this headroom
84 	space must be reserved before the skb_put() function is called.
85 	*/
86 
87 /*
88    This version of net/ipv4/ipip.c is cloned of net/ipv4/ip_gre.c
89 
90    For comments look at net/ipv4/ip_gre.c --ANK
91  */
92 
93 
94 #include <linux/capability.h>
95 #include <linux/module.h>
96 #include <linux/types.h>
97 #include <linux/kernel.h>
98 #include <linux/slab.h>
99 #include <asm/uaccess.h>
100 #include <linux/skbuff.h>
101 #include <linux/netdevice.h>
102 #include <linux/in.h>
103 #include <linux/tcp.h>
104 #include <linux/udp.h>
105 #include <linux/if_arp.h>
106 #include <linux/mroute.h>
107 #include <linux/init.h>
108 #include <linux/netfilter_ipv4.h>
109 #include <linux/if_ether.h>
110 
111 #include <net/sock.h>
112 #include <net/ip.h>
113 #include <net/icmp.h>
114 #include <net/ipip.h>
115 #include <net/inet_ecn.h>
116 #include <net/xfrm.h>
117 #include <net/net_namespace.h>
118 #include <net/netns/generic.h>
119 
120 #define HASH_SIZE  16
121 #define HASH(addr) (((__force u32)addr^((__force u32)addr>>4))&0xF)
122 
123 static int ipip_net_id __read_mostly;
124 struct ipip_net {
125 	struct ip_tunnel __rcu *tunnels_r_l[HASH_SIZE];
126 	struct ip_tunnel __rcu *tunnels_r[HASH_SIZE];
127 	struct ip_tunnel __rcu *tunnels_l[HASH_SIZE];
128 	struct ip_tunnel __rcu *tunnels_wc[1];
129 	struct ip_tunnel __rcu **tunnels[4];
130 
131 	struct net_device *fb_tunnel_dev;
132 };
133 
134 static int ipip_tunnel_init(struct net_device *dev);
135 static void ipip_tunnel_setup(struct net_device *dev);
136 static void ipip_dev_free(struct net_device *dev);
137 
138 /*
139  * Locking : hash tables are protected by RCU and RTNL
140  */
141 
142 #define for_each_ip_tunnel_rcu(start) \
143 	for (t = rcu_dereference(start); t; t = rcu_dereference(t->next))
144 
145 /* often modified stats are per cpu, other are shared (netdev->stats) */
146 struct pcpu_tstats {
147 	u64	rx_packets;
148 	u64	rx_bytes;
149 	u64	tx_packets;
150 	u64	tx_bytes;
151 	struct u64_stats_sync	syncp;
152 };
153 
154 static struct rtnl_link_stats64 *ipip_get_stats64(struct net_device *dev,
155 						  struct rtnl_link_stats64 *tot)
156 {
157 	int i;
158 
159 	for_each_possible_cpu(i) {
160 		const struct pcpu_tstats *tstats = per_cpu_ptr(dev->tstats, i);
161 		u64 rx_packets, rx_bytes, tx_packets, tx_bytes;
162 		unsigned int start;
163 
164 		do {
165 			start = u64_stats_fetch_begin_bh(&tstats->syncp);
166 			rx_packets = tstats->rx_packets;
167 			tx_packets = tstats->tx_packets;
168 			rx_bytes = tstats->rx_bytes;
169 			tx_bytes = tstats->tx_bytes;
170 		} while (u64_stats_fetch_retry_bh(&tstats->syncp, start));
171 
172 		tot->rx_packets += rx_packets;
173 		tot->tx_packets += tx_packets;
174 		tot->rx_bytes   += rx_bytes;
175 		tot->tx_bytes   += tx_bytes;
176 	}
177 
178 	tot->tx_fifo_errors = dev->stats.tx_fifo_errors;
179 	tot->tx_carrier_errors = dev->stats.tx_carrier_errors;
180 	tot->tx_dropped = dev->stats.tx_dropped;
181 	tot->tx_aborted_errors = dev->stats.tx_aborted_errors;
182 	tot->tx_errors = dev->stats.tx_errors;
183 	tot->collisions = dev->stats.collisions;
184 
185 	return tot;
186 }
187 
188 static struct ip_tunnel *ipip_tunnel_lookup(struct net *net,
189 		__be32 remote, __be32 local)
190 {
191 	unsigned int h0 = HASH(remote);
192 	unsigned int h1 = HASH(local);
193 	struct ip_tunnel *t;
194 	struct ipip_net *ipn = net_generic(net, ipip_net_id);
195 
196 	for_each_ip_tunnel_rcu(ipn->tunnels_r_l[h0 ^ h1])
197 		if (local == t->parms.iph.saddr &&
198 		    remote == t->parms.iph.daddr && (t->dev->flags&IFF_UP))
199 			return t;
200 
201 	for_each_ip_tunnel_rcu(ipn->tunnels_r[h0])
202 		if (remote == t->parms.iph.daddr && (t->dev->flags&IFF_UP))
203 			return t;
204 
205 	for_each_ip_tunnel_rcu(ipn->tunnels_l[h1])
206 		if (local == t->parms.iph.saddr && (t->dev->flags&IFF_UP))
207 			return t;
208 
209 	t = rcu_dereference(ipn->tunnels_wc[0]);
210 	if (t && (t->dev->flags&IFF_UP))
211 		return t;
212 	return NULL;
213 }
214 
215 static struct ip_tunnel __rcu **__ipip_bucket(struct ipip_net *ipn,
216 		struct ip_tunnel_parm *parms)
217 {
218 	__be32 remote = parms->iph.daddr;
219 	__be32 local = parms->iph.saddr;
220 	unsigned int h = 0;
221 	int prio = 0;
222 
223 	if (remote) {
224 		prio |= 2;
225 		h ^= HASH(remote);
226 	}
227 	if (local) {
228 		prio |= 1;
229 		h ^= HASH(local);
230 	}
231 	return &ipn->tunnels[prio][h];
232 }
233 
234 static inline struct ip_tunnel __rcu **ipip_bucket(struct ipip_net *ipn,
235 		struct ip_tunnel *t)
236 {
237 	return __ipip_bucket(ipn, &t->parms);
238 }
239 
240 static void ipip_tunnel_unlink(struct ipip_net *ipn, struct ip_tunnel *t)
241 {
242 	struct ip_tunnel __rcu **tp;
243 	struct ip_tunnel *iter;
244 
245 	for (tp = ipip_bucket(ipn, t);
246 	     (iter = rtnl_dereference(*tp)) != NULL;
247 	     tp = &iter->next) {
248 		if (t == iter) {
249 			rcu_assign_pointer(*tp, t->next);
250 			break;
251 		}
252 	}
253 }
254 
255 static void ipip_tunnel_link(struct ipip_net *ipn, struct ip_tunnel *t)
256 {
257 	struct ip_tunnel __rcu **tp = ipip_bucket(ipn, t);
258 
259 	rcu_assign_pointer(t->next, rtnl_dereference(*tp));
260 	rcu_assign_pointer(*tp, t);
261 }
262 
263 static struct ip_tunnel *ipip_tunnel_locate(struct net *net,
264 		struct ip_tunnel_parm *parms, int create)
265 {
266 	__be32 remote = parms->iph.daddr;
267 	__be32 local = parms->iph.saddr;
268 	struct ip_tunnel *t, *nt;
269 	struct ip_tunnel __rcu **tp;
270 	struct net_device *dev;
271 	char name[IFNAMSIZ];
272 	struct ipip_net *ipn = net_generic(net, ipip_net_id);
273 
274 	for (tp = __ipip_bucket(ipn, parms);
275 		 (t = rtnl_dereference(*tp)) != NULL;
276 		 tp = &t->next) {
277 		if (local == t->parms.iph.saddr && remote == t->parms.iph.daddr)
278 			return t;
279 	}
280 	if (!create)
281 		return NULL;
282 
283 	if (parms->name[0])
284 		strlcpy(name, parms->name, IFNAMSIZ);
285 	else
286 		strcpy(name, "tunl%d");
287 
288 	dev = alloc_netdev(sizeof(*t), name, ipip_tunnel_setup);
289 	if (dev == NULL)
290 		return NULL;
291 
292 	dev_net_set(dev, net);
293 
294 	nt = netdev_priv(dev);
295 	nt->parms = *parms;
296 
297 	if (ipip_tunnel_init(dev) < 0)
298 		goto failed_free;
299 
300 	if (register_netdevice(dev) < 0)
301 		goto failed_free;
302 
303 	strcpy(nt->parms.name, dev->name);
304 
305 	dev_hold(dev);
306 	ipip_tunnel_link(ipn, nt);
307 	return nt;
308 
309 failed_free:
310 	ipip_dev_free(dev);
311 	return NULL;
312 }
313 
314 /* called with RTNL */
315 static void ipip_tunnel_uninit(struct net_device *dev)
316 {
317 	struct net *net = dev_net(dev);
318 	struct ipip_net *ipn = net_generic(net, ipip_net_id);
319 
320 	if (dev == ipn->fb_tunnel_dev)
321 		RCU_INIT_POINTER(ipn->tunnels_wc[0], NULL);
322 	else
323 		ipip_tunnel_unlink(ipn, netdev_priv(dev));
324 	dev_put(dev);
325 }
326 
327 static int ipip_err(struct sk_buff *skb, u32 info)
328 {
329 
330 /* All the routers (except for Linux) return only
331    8 bytes of packet payload. It means, that precise relaying of
332    ICMP in the real Internet is absolutely infeasible.
333  */
334 	const struct iphdr *iph = (const struct iphdr *)skb->data;
335 	const int type = icmp_hdr(skb)->type;
336 	const int code = icmp_hdr(skb)->code;
337 	struct ip_tunnel *t;
338 	int err;
339 
340 	switch (type) {
341 	default:
342 	case ICMP_PARAMETERPROB:
343 		return 0;
344 
345 	case ICMP_DEST_UNREACH:
346 		switch (code) {
347 		case ICMP_SR_FAILED:
348 		case ICMP_PORT_UNREACH:
349 			/* Impossible event. */
350 			return 0;
351 		default:
352 			/* All others are translated to HOST_UNREACH.
353 			   rfc2003 contains "deep thoughts" about NET_UNREACH,
354 			   I believe they are just ether pollution. --ANK
355 			 */
356 			break;
357 		}
358 		break;
359 	case ICMP_TIME_EXCEEDED:
360 		if (code != ICMP_EXC_TTL)
361 			return 0;
362 		break;
363 	case ICMP_REDIRECT:
364 		break;
365 	}
366 
367 	err = -ENOENT;
368 
369 	rcu_read_lock();
370 	t = ipip_tunnel_lookup(dev_net(skb->dev), iph->daddr, iph->saddr);
371 	if (t == NULL)
372 		goto out;
373 
374 	if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED) {
375 		ipv4_update_pmtu(skb, dev_net(skb->dev), info,
376 				 t->dev->ifindex, 0, IPPROTO_IPIP, 0);
377 		err = 0;
378 		goto out;
379 	}
380 
381 	if (type == ICMP_REDIRECT) {
382 		ipv4_redirect(skb, dev_net(skb->dev), t->dev->ifindex, 0,
383 			      IPPROTO_IPIP, 0);
384 		err = 0;
385 		goto out;
386 	}
387 
388 	if (t->parms.iph.daddr == 0)
389 		goto out;
390 
391 	err = 0;
392 	if (t->parms.iph.ttl == 0 && type == ICMP_TIME_EXCEEDED)
393 		goto out;
394 
395 	if (time_before(jiffies, t->err_time + IPTUNNEL_ERR_TIMEO))
396 		t->err_count++;
397 	else
398 		t->err_count = 1;
399 	t->err_time = jiffies;
400 out:
401 	rcu_read_unlock();
402 	return err;
403 }
404 
405 static inline void ipip_ecn_decapsulate(const struct iphdr *outer_iph,
406 					struct sk_buff *skb)
407 {
408 	struct iphdr *inner_iph = ip_hdr(skb);
409 
410 	if (INET_ECN_is_ce(outer_iph->tos))
411 		IP_ECN_set_ce(inner_iph);
412 }
413 
414 static int ipip_rcv(struct sk_buff *skb)
415 {
416 	struct ip_tunnel *tunnel;
417 	const struct iphdr *iph = ip_hdr(skb);
418 
419 	rcu_read_lock();
420 	tunnel = ipip_tunnel_lookup(dev_net(skb->dev), iph->saddr, iph->daddr);
421 	if (tunnel != NULL) {
422 		struct pcpu_tstats *tstats;
423 
424 		if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) {
425 			rcu_read_unlock();
426 			kfree_skb(skb);
427 			return 0;
428 		}
429 
430 		secpath_reset(skb);
431 
432 		skb->mac_header = skb->network_header;
433 		skb_reset_network_header(skb);
434 		skb->protocol = htons(ETH_P_IP);
435 		skb->pkt_type = PACKET_HOST;
436 
437 		tstats = this_cpu_ptr(tunnel->dev->tstats);
438 		u64_stats_update_begin(&tstats->syncp);
439 		tstats->rx_packets++;
440 		tstats->rx_bytes += skb->len;
441 		u64_stats_update_end(&tstats->syncp);
442 
443 		__skb_tunnel_rx(skb, tunnel->dev);
444 
445 		ipip_ecn_decapsulate(iph, skb);
446 
447 		netif_rx(skb);
448 
449 		rcu_read_unlock();
450 		return 0;
451 	}
452 	rcu_read_unlock();
453 
454 	return -1;
455 }
456 
457 /*
458  *	This function assumes it is being called from dev_queue_xmit()
459  *	and that skb is filled properly by that function.
460  */
461 
462 static netdev_tx_t ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
463 {
464 	struct ip_tunnel *tunnel = netdev_priv(dev);
465 	struct pcpu_tstats *tstats;
466 	const struct iphdr  *tiph = &tunnel->parms.iph;
467 	u8     tos = tunnel->parms.iph.tos;
468 	__be16 df = tiph->frag_off;
469 	struct rtable *rt;     			/* Route to the other host */
470 	struct net_device *tdev;		/* Device to other host */
471 	const struct iphdr  *old_iph = ip_hdr(skb);
472 	struct iphdr  *iph;			/* Our new IP header */
473 	unsigned int max_headroom;		/* The extra header space needed */
474 	__be32 dst = tiph->daddr;
475 	struct flowi4 fl4;
476 	int    mtu;
477 
478 	if (skb->protocol != htons(ETH_P_IP))
479 		goto tx_error;
480 
481 	if (tos & 1)
482 		tos = old_iph->tos;
483 
484 	if (!dst) {
485 		/* NBMA tunnel */
486 		if ((rt = skb_rtable(skb)) == NULL) {
487 			dev->stats.tx_fifo_errors++;
488 			goto tx_error;
489 		}
490 		dst = rt_nexthop(rt, old_iph->daddr);
491 	}
492 
493 	rt = ip_route_output_ports(dev_net(dev), &fl4, NULL,
494 				   dst, tiph->saddr,
495 				   0, 0,
496 				   IPPROTO_IPIP, RT_TOS(tos),
497 				   tunnel->parms.link);
498 	if (IS_ERR(rt)) {
499 		dev->stats.tx_carrier_errors++;
500 		goto tx_error_icmp;
501 	}
502 	tdev = rt->dst.dev;
503 
504 	if (tdev == dev) {
505 		ip_rt_put(rt);
506 		dev->stats.collisions++;
507 		goto tx_error;
508 	}
509 
510 	df |= old_iph->frag_off & htons(IP_DF);
511 
512 	if (df) {
513 		mtu = dst_mtu(&rt->dst) - sizeof(struct iphdr);
514 
515 		if (mtu < 68) {
516 			dev->stats.collisions++;
517 			ip_rt_put(rt);
518 			goto tx_error;
519 		}
520 
521 		if (skb_dst(skb))
522 			skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, skb, mtu);
523 
524 		if ((old_iph->frag_off & htons(IP_DF)) &&
525 		    mtu < ntohs(old_iph->tot_len)) {
526 			icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED,
527 				  htonl(mtu));
528 			ip_rt_put(rt);
529 			goto tx_error;
530 		}
531 	}
532 
533 	if (tunnel->err_count > 0) {
534 		if (time_before(jiffies,
535 				tunnel->err_time + IPTUNNEL_ERR_TIMEO)) {
536 			tunnel->err_count--;
537 			dst_link_failure(skb);
538 		} else
539 			tunnel->err_count = 0;
540 	}
541 
542 	/*
543 	 * Okay, now see if we can stuff it in the buffer as-is.
544 	 */
545 	max_headroom = (LL_RESERVED_SPACE(tdev)+sizeof(struct iphdr));
546 
547 	if (skb_headroom(skb) < max_headroom || skb_shared(skb) ||
548 	    (skb_cloned(skb) && !skb_clone_writable(skb, 0))) {
549 		struct sk_buff *new_skb = skb_realloc_headroom(skb, max_headroom);
550 		if (!new_skb) {
551 			ip_rt_put(rt);
552 			dev->stats.tx_dropped++;
553 			dev_kfree_skb(skb);
554 			return NETDEV_TX_OK;
555 		}
556 		if (skb->sk)
557 			skb_set_owner_w(new_skb, skb->sk);
558 		dev_kfree_skb(skb);
559 		skb = new_skb;
560 		old_iph = ip_hdr(skb);
561 	}
562 
563 	skb->transport_header = skb->network_header;
564 	skb_push(skb, sizeof(struct iphdr));
565 	skb_reset_network_header(skb);
566 	memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
567 	IPCB(skb)->flags &= ~(IPSKB_XFRM_TUNNEL_SIZE | IPSKB_XFRM_TRANSFORMED |
568 			      IPSKB_REROUTED);
569 	skb_dst_drop(skb);
570 	skb_dst_set(skb, &rt->dst);
571 
572 	/*
573 	 *	Push down and install the IPIP header.
574 	 */
575 
576 	iph 			=	ip_hdr(skb);
577 	iph->version		=	4;
578 	iph->ihl		=	sizeof(struct iphdr)>>2;
579 	iph->frag_off		=	df;
580 	iph->protocol		=	IPPROTO_IPIP;
581 	iph->tos		=	INET_ECN_encapsulate(tos, old_iph->tos);
582 	iph->daddr		=	fl4.daddr;
583 	iph->saddr		=	fl4.saddr;
584 
585 	if ((iph->ttl = tiph->ttl) == 0)
586 		iph->ttl	=	old_iph->ttl;
587 
588 	nf_reset(skb);
589 	tstats = this_cpu_ptr(dev->tstats);
590 	__IPTUNNEL_XMIT(tstats, &dev->stats);
591 	return NETDEV_TX_OK;
592 
593 tx_error_icmp:
594 	dst_link_failure(skb);
595 tx_error:
596 	dev->stats.tx_errors++;
597 	dev_kfree_skb(skb);
598 	return NETDEV_TX_OK;
599 }
600 
601 static void ipip_tunnel_bind_dev(struct net_device *dev)
602 {
603 	struct net_device *tdev = NULL;
604 	struct ip_tunnel *tunnel;
605 	const struct iphdr *iph;
606 
607 	tunnel = netdev_priv(dev);
608 	iph = &tunnel->parms.iph;
609 
610 	if (iph->daddr) {
611 		struct rtable *rt;
612 		struct flowi4 fl4;
613 
614 		rt = ip_route_output_ports(dev_net(dev), &fl4, NULL,
615 					   iph->daddr, iph->saddr,
616 					   0, 0,
617 					   IPPROTO_IPIP,
618 					   RT_TOS(iph->tos),
619 					   tunnel->parms.link);
620 		if (!IS_ERR(rt)) {
621 			tdev = rt->dst.dev;
622 			ip_rt_put(rt);
623 		}
624 		dev->flags |= IFF_POINTOPOINT;
625 	}
626 
627 	if (!tdev && tunnel->parms.link)
628 		tdev = __dev_get_by_index(dev_net(dev), tunnel->parms.link);
629 
630 	if (tdev) {
631 		dev->hard_header_len = tdev->hard_header_len + sizeof(struct iphdr);
632 		dev->mtu = tdev->mtu - sizeof(struct iphdr);
633 	}
634 	dev->iflink = tunnel->parms.link;
635 }
636 
637 static int
638 ipip_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd)
639 {
640 	int err = 0;
641 	struct ip_tunnel_parm p;
642 	struct ip_tunnel *t;
643 	struct net *net = dev_net(dev);
644 	struct ipip_net *ipn = net_generic(net, ipip_net_id);
645 
646 	switch (cmd) {
647 	case SIOCGETTUNNEL:
648 		t = NULL;
649 		if (dev == ipn->fb_tunnel_dev) {
650 			if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) {
651 				err = -EFAULT;
652 				break;
653 			}
654 			t = ipip_tunnel_locate(net, &p, 0);
655 		}
656 		if (t == NULL)
657 			t = netdev_priv(dev);
658 		memcpy(&p, &t->parms, sizeof(p));
659 		if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p)))
660 			err = -EFAULT;
661 		break;
662 
663 	case SIOCADDTUNNEL:
664 	case SIOCCHGTUNNEL:
665 		err = -EPERM;
666 		if (!capable(CAP_NET_ADMIN))
667 			goto done;
668 
669 		err = -EFAULT;
670 		if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
671 			goto done;
672 
673 		err = -EINVAL;
674 		if (p.iph.version != 4 || p.iph.protocol != IPPROTO_IPIP ||
675 		    p.iph.ihl != 5 || (p.iph.frag_off&htons(~IP_DF)))
676 			goto done;
677 		if (p.iph.ttl)
678 			p.iph.frag_off |= htons(IP_DF);
679 
680 		t = ipip_tunnel_locate(net, &p, cmd == SIOCADDTUNNEL);
681 
682 		if (dev != ipn->fb_tunnel_dev && cmd == SIOCCHGTUNNEL) {
683 			if (t != NULL) {
684 				if (t->dev != dev) {
685 					err = -EEXIST;
686 					break;
687 				}
688 			} else {
689 				if (((dev->flags&IFF_POINTOPOINT) && !p.iph.daddr) ||
690 				    (!(dev->flags&IFF_POINTOPOINT) && p.iph.daddr)) {
691 					err = -EINVAL;
692 					break;
693 				}
694 				t = netdev_priv(dev);
695 				ipip_tunnel_unlink(ipn, t);
696 				synchronize_net();
697 				t->parms.iph.saddr = p.iph.saddr;
698 				t->parms.iph.daddr = p.iph.daddr;
699 				memcpy(dev->dev_addr, &p.iph.saddr, 4);
700 				memcpy(dev->broadcast, &p.iph.daddr, 4);
701 				ipip_tunnel_link(ipn, t);
702 				netdev_state_change(dev);
703 			}
704 		}
705 
706 		if (t) {
707 			err = 0;
708 			if (cmd == SIOCCHGTUNNEL) {
709 				t->parms.iph.ttl = p.iph.ttl;
710 				t->parms.iph.tos = p.iph.tos;
711 				t->parms.iph.frag_off = p.iph.frag_off;
712 				if (t->parms.link != p.link) {
713 					t->parms.link = p.link;
714 					ipip_tunnel_bind_dev(dev);
715 					netdev_state_change(dev);
716 				}
717 			}
718 			if (copy_to_user(ifr->ifr_ifru.ifru_data, &t->parms, sizeof(p)))
719 				err = -EFAULT;
720 		} else
721 			err = (cmd == SIOCADDTUNNEL ? -ENOBUFS : -ENOENT);
722 		break;
723 
724 	case SIOCDELTUNNEL:
725 		err = -EPERM;
726 		if (!capable(CAP_NET_ADMIN))
727 			goto done;
728 
729 		if (dev == ipn->fb_tunnel_dev) {
730 			err = -EFAULT;
731 			if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
732 				goto done;
733 			err = -ENOENT;
734 			if ((t = ipip_tunnel_locate(net, &p, 0)) == NULL)
735 				goto done;
736 			err = -EPERM;
737 			if (t->dev == ipn->fb_tunnel_dev)
738 				goto done;
739 			dev = t->dev;
740 		}
741 		unregister_netdevice(dev);
742 		err = 0;
743 		break;
744 
745 	default:
746 		err = -EINVAL;
747 	}
748 
749 done:
750 	return err;
751 }
752 
753 static int ipip_tunnel_change_mtu(struct net_device *dev, int new_mtu)
754 {
755 	if (new_mtu < 68 || new_mtu > 0xFFF8 - sizeof(struct iphdr))
756 		return -EINVAL;
757 	dev->mtu = new_mtu;
758 	return 0;
759 }
760 
761 static const struct net_device_ops ipip_netdev_ops = {
762 	.ndo_uninit	= ipip_tunnel_uninit,
763 	.ndo_start_xmit	= ipip_tunnel_xmit,
764 	.ndo_do_ioctl	= ipip_tunnel_ioctl,
765 	.ndo_change_mtu	= ipip_tunnel_change_mtu,
766 	.ndo_get_stats64 = ipip_get_stats64,
767 };
768 
769 static void ipip_dev_free(struct net_device *dev)
770 {
771 	free_percpu(dev->tstats);
772 	free_netdev(dev);
773 }
774 
775 static void ipip_tunnel_setup(struct net_device *dev)
776 {
777 	dev->netdev_ops		= &ipip_netdev_ops;
778 	dev->destructor		= ipip_dev_free;
779 
780 	dev->type		= ARPHRD_TUNNEL;
781 	dev->hard_header_len 	= LL_MAX_HEADER + sizeof(struct iphdr);
782 	dev->mtu		= ETH_DATA_LEN - sizeof(struct iphdr);
783 	dev->flags		= IFF_NOARP;
784 	dev->iflink		= 0;
785 	dev->addr_len		= 4;
786 	dev->features		|= NETIF_F_NETNS_LOCAL;
787 	dev->features		|= NETIF_F_LLTX;
788 	dev->priv_flags		&= ~IFF_XMIT_DST_RELEASE;
789 }
790 
791 static int ipip_tunnel_init(struct net_device *dev)
792 {
793 	struct ip_tunnel *tunnel = netdev_priv(dev);
794 
795 	tunnel->dev = dev;
796 
797 	memcpy(dev->dev_addr, &tunnel->parms.iph.saddr, 4);
798 	memcpy(dev->broadcast, &tunnel->parms.iph.daddr, 4);
799 
800 	ipip_tunnel_bind_dev(dev);
801 
802 	dev->tstats = alloc_percpu(struct pcpu_tstats);
803 	if (!dev->tstats)
804 		return -ENOMEM;
805 
806 	return 0;
807 }
808 
809 static int __net_init ipip_fb_tunnel_init(struct net_device *dev)
810 {
811 	struct ip_tunnel *tunnel = netdev_priv(dev);
812 	struct iphdr *iph = &tunnel->parms.iph;
813 	struct ipip_net *ipn = net_generic(dev_net(dev), ipip_net_id);
814 
815 	tunnel->dev = dev;
816 	strcpy(tunnel->parms.name, dev->name);
817 
818 	iph->version		= 4;
819 	iph->protocol		= IPPROTO_IPIP;
820 	iph->ihl		= 5;
821 
822 	dev->tstats = alloc_percpu(struct pcpu_tstats);
823 	if (!dev->tstats)
824 		return -ENOMEM;
825 
826 	dev_hold(dev);
827 	rcu_assign_pointer(ipn->tunnels_wc[0], tunnel);
828 	return 0;
829 }
830 
831 static struct xfrm_tunnel ipip_handler __read_mostly = {
832 	.handler	=	ipip_rcv,
833 	.err_handler	=	ipip_err,
834 	.priority	=	1,
835 };
836 
837 static const char banner[] __initconst =
838 	KERN_INFO "IPv4 over IPv4 tunneling driver\n";
839 
840 static void ipip_destroy_tunnels(struct ipip_net *ipn, struct list_head *head)
841 {
842 	int prio;
843 
844 	for (prio = 1; prio < 4; prio++) {
845 		int h;
846 		for (h = 0; h < HASH_SIZE; h++) {
847 			struct ip_tunnel *t;
848 
849 			t = rtnl_dereference(ipn->tunnels[prio][h]);
850 			while (t != NULL) {
851 				unregister_netdevice_queue(t->dev, head);
852 				t = rtnl_dereference(t->next);
853 			}
854 		}
855 	}
856 }
857 
858 static int __net_init ipip_init_net(struct net *net)
859 {
860 	struct ipip_net *ipn = net_generic(net, ipip_net_id);
861 	struct ip_tunnel *t;
862 	int err;
863 
864 	ipn->tunnels[0] = ipn->tunnels_wc;
865 	ipn->tunnels[1] = ipn->tunnels_l;
866 	ipn->tunnels[2] = ipn->tunnels_r;
867 	ipn->tunnels[3] = ipn->tunnels_r_l;
868 
869 	ipn->fb_tunnel_dev = alloc_netdev(sizeof(struct ip_tunnel),
870 					   "tunl0",
871 					   ipip_tunnel_setup);
872 	if (!ipn->fb_tunnel_dev) {
873 		err = -ENOMEM;
874 		goto err_alloc_dev;
875 	}
876 	dev_net_set(ipn->fb_tunnel_dev, net);
877 
878 	err = ipip_fb_tunnel_init(ipn->fb_tunnel_dev);
879 	if (err)
880 		goto err_reg_dev;
881 
882 	if ((err = register_netdev(ipn->fb_tunnel_dev)))
883 		goto err_reg_dev;
884 
885 	t = netdev_priv(ipn->fb_tunnel_dev);
886 
887 	strcpy(t->parms.name, ipn->fb_tunnel_dev->name);
888 	return 0;
889 
890 err_reg_dev:
891 	ipip_dev_free(ipn->fb_tunnel_dev);
892 err_alloc_dev:
893 	/* nothing */
894 	return err;
895 }
896 
897 static void __net_exit ipip_exit_net(struct net *net)
898 {
899 	struct ipip_net *ipn = net_generic(net, ipip_net_id);
900 	LIST_HEAD(list);
901 
902 	rtnl_lock();
903 	ipip_destroy_tunnels(ipn, &list);
904 	unregister_netdevice_queue(ipn->fb_tunnel_dev, &list);
905 	unregister_netdevice_many(&list);
906 	rtnl_unlock();
907 }
908 
909 static struct pernet_operations ipip_net_ops = {
910 	.init = ipip_init_net,
911 	.exit = ipip_exit_net,
912 	.id   = &ipip_net_id,
913 	.size = sizeof(struct ipip_net),
914 };
915 
916 static int __init ipip_init(void)
917 {
918 	int err;
919 
920 	printk(banner);
921 
922 	err = register_pernet_device(&ipip_net_ops);
923 	if (err < 0)
924 		return err;
925 	err = xfrm4_tunnel_register(&ipip_handler, AF_INET);
926 	if (err < 0) {
927 		unregister_pernet_device(&ipip_net_ops);
928 		pr_info("%s: can't register tunnel\n", __func__);
929 	}
930 	return err;
931 }
932 
933 static void __exit ipip_fini(void)
934 {
935 	if (xfrm4_tunnel_deregister(&ipip_handler, AF_INET))
936 		pr_info("%s: can't deregister tunnel\n", __func__);
937 
938 	unregister_pernet_device(&ipip_net_ops);
939 }
940 
941 module_init(ipip_init);
942 module_exit(ipip_fini);
943 MODULE_LICENSE("GPL");
944 MODULE_ALIAS_NETDEV("tunl0");
945