xref: /openbmc/linux/net/ipv4/ipip.c (revision 9d749629)
1 /*
2  *	Linux NET3:	IP/IP protocol decoder.
3  *
4  *	Authors:
5  *		Sam Lantinga (slouken@cs.ucdavis.edu)  02/01/95
6  *
7  *	Fixes:
8  *		Alan Cox	:	Merged and made usable non modular (its so tiny its silly as
9  *					a module taking up 2 pages).
10  *		Alan Cox	: 	Fixed bug with 1.3.18 and IPIP not working (now needs to set skb->h.iph)
11  *					to keep ip_forward happy.
12  *		Alan Cox	:	More fixes for 1.3.21, and firewall fix. Maybe this will work soon 8).
13  *		Kai Schulte	:	Fixed #defines for IP_FIREWALL->FIREWALL
14  *              David Woodhouse :       Perform some basic ICMP handling.
15  *                                      IPIP Routing without decapsulation.
16  *              Carlos Picoto   :       GRE over IP support
17  *		Alexey Kuznetsov:	Reworked. Really, now it is truncated version of ipv4/ip_gre.c.
18  *					I do not want to merge them together.
19  *
20  *	This program is free software; you can redistribute it and/or
21  *	modify it under the terms of the GNU General Public License
22  *	as published by the Free Software Foundation; either version
23  *	2 of the License, or (at your option) any later version.
24  *
25  */
26 
27 /* tunnel.c: an IP tunnel driver
28 
29 	The purpose of this driver is to provide an IP tunnel through
30 	which you can tunnel network traffic transparently across subnets.
31 
32 	This was written by looking at Nick Holloway's dummy driver
33 	Thanks for the great code!
34 
35 		-Sam Lantinga	(slouken@cs.ucdavis.edu)  02/01/95
36 
37 	Minor tweaks:
38 		Cleaned up the code a little and added some pre-1.3.0 tweaks.
39 		dev->hard_header/hard_header_len changed to use no headers.
40 		Comments/bracketing tweaked.
41 		Made the tunnels use dev->name not tunnel: when error reporting.
42 		Added tx_dropped stat
43 
44 		-Alan Cox	(alan@lxorguk.ukuu.org.uk) 21 March 95
45 
46 	Reworked:
47 		Changed to tunnel to destination gateway in addition to the
48 			tunnel's pointopoint address
49 		Almost completely rewritten
50 		Note:  There is currently no firewall or ICMP handling done.
51 
52 		-Sam Lantinga	(slouken@cs.ucdavis.edu) 02/13/96
53 
54 */
55 
56 /* Things I wish I had known when writing the tunnel driver:
57 
58 	When the tunnel_xmit() function is called, the skb contains the
59 	packet to be sent (plus a great deal of extra info), and dev
60 	contains the tunnel device that _we_ are.
61 
62 	When we are passed a packet, we are expected to fill in the
63 	source address with our source IP address.
64 
65 	What is the proper way to allocate, copy and free a buffer?
66 	After you allocate it, it is a "0 length" chunk of memory
67 	starting at zero.  If you want to add headers to the buffer
68 	later, you'll have to call "skb_reserve(skb, amount)" with
69 	the amount of memory you want reserved.  Then, you call
70 	"skb_put(skb, amount)" with the amount of space you want in
71 	the buffer.  skb_put() returns a pointer to the top (#0) of
72 	that buffer.  skb->len is set to the amount of space you have
73 	"allocated" with skb_put().  You can then write up to skb->len
74 	bytes to that buffer.  If you need more, you can call skb_put()
75 	again with the additional amount of space you need.  You can
76 	find out how much more space you can allocate by calling
77 	"skb_tailroom(skb)".
78 	Now, to add header space, call "skb_push(skb, header_len)".
79 	This creates space at the beginning of the buffer and returns
80 	a pointer to this new space.  If later you need to strip a
81 	header from a buffer, call "skb_pull(skb, header_len)".
82 	skb_headroom() will return how much space is left at the top
83 	of the buffer (before the main data).  Remember, this headroom
84 	space must be reserved before the skb_put() function is called.
85 	*/
86 
87 /*
88    This version of net/ipv4/ipip.c is cloned of net/ipv4/ip_gre.c
89 
90    For comments look at net/ipv4/ip_gre.c --ANK
91  */
92 
93 
94 #include <linux/capability.h>
95 #include <linux/module.h>
96 #include <linux/types.h>
97 #include <linux/kernel.h>
98 #include <linux/slab.h>
99 #include <asm/uaccess.h>
100 #include <linux/skbuff.h>
101 #include <linux/netdevice.h>
102 #include <linux/in.h>
103 #include <linux/tcp.h>
104 #include <linux/udp.h>
105 #include <linux/if_arp.h>
106 #include <linux/mroute.h>
107 #include <linux/init.h>
108 #include <linux/netfilter_ipv4.h>
109 #include <linux/if_ether.h>
110 
111 #include <net/sock.h>
112 #include <net/ip.h>
113 #include <net/icmp.h>
114 #include <net/ipip.h>
115 #include <net/inet_ecn.h>
116 #include <net/xfrm.h>
117 #include <net/net_namespace.h>
118 #include <net/netns/generic.h>
119 
120 #define HASH_SIZE  16
121 #define HASH(addr) (((__force u32)addr^((__force u32)addr>>4))&0xF)
122 
123 static bool log_ecn_error = true;
124 module_param(log_ecn_error, bool, 0644);
125 MODULE_PARM_DESC(log_ecn_error, "Log packets received with corrupted ECN");
126 
127 static int ipip_net_id __read_mostly;
128 struct ipip_net {
129 	struct ip_tunnel __rcu *tunnels_r_l[HASH_SIZE];
130 	struct ip_tunnel __rcu *tunnels_r[HASH_SIZE];
131 	struct ip_tunnel __rcu *tunnels_l[HASH_SIZE];
132 	struct ip_tunnel __rcu *tunnels_wc[1];
133 	struct ip_tunnel __rcu **tunnels[4];
134 
135 	struct net_device *fb_tunnel_dev;
136 };
137 
138 static int ipip_tunnel_init(struct net_device *dev);
139 static void ipip_tunnel_setup(struct net_device *dev);
140 static void ipip_dev_free(struct net_device *dev);
141 static struct rtnl_link_ops ipip_link_ops __read_mostly;
142 
143 static struct rtnl_link_stats64 *ipip_get_stats64(struct net_device *dev,
144 						  struct rtnl_link_stats64 *tot)
145 {
146 	int i;
147 
148 	for_each_possible_cpu(i) {
149 		const struct pcpu_tstats *tstats = per_cpu_ptr(dev->tstats, i);
150 		u64 rx_packets, rx_bytes, tx_packets, tx_bytes;
151 		unsigned int start;
152 
153 		do {
154 			start = u64_stats_fetch_begin_bh(&tstats->syncp);
155 			rx_packets = tstats->rx_packets;
156 			tx_packets = tstats->tx_packets;
157 			rx_bytes = tstats->rx_bytes;
158 			tx_bytes = tstats->tx_bytes;
159 		} while (u64_stats_fetch_retry_bh(&tstats->syncp, start));
160 
161 		tot->rx_packets += rx_packets;
162 		tot->tx_packets += tx_packets;
163 		tot->rx_bytes   += rx_bytes;
164 		tot->tx_bytes   += tx_bytes;
165 	}
166 
167 	tot->tx_fifo_errors = dev->stats.tx_fifo_errors;
168 	tot->tx_carrier_errors = dev->stats.tx_carrier_errors;
169 	tot->tx_dropped = dev->stats.tx_dropped;
170 	tot->tx_aborted_errors = dev->stats.tx_aborted_errors;
171 	tot->tx_errors = dev->stats.tx_errors;
172 	tot->collisions = dev->stats.collisions;
173 
174 	return tot;
175 }
176 
177 static struct ip_tunnel *ipip_tunnel_lookup(struct net *net,
178 		__be32 remote, __be32 local)
179 {
180 	unsigned int h0 = HASH(remote);
181 	unsigned int h1 = HASH(local);
182 	struct ip_tunnel *t;
183 	struct ipip_net *ipn = net_generic(net, ipip_net_id);
184 
185 	for_each_ip_tunnel_rcu(t, ipn->tunnels_r_l[h0 ^ h1])
186 		if (local == t->parms.iph.saddr &&
187 		    remote == t->parms.iph.daddr && (t->dev->flags&IFF_UP))
188 			return t;
189 
190 	for_each_ip_tunnel_rcu(t, ipn->tunnels_r[h0])
191 		if (remote == t->parms.iph.daddr && (t->dev->flags&IFF_UP))
192 			return t;
193 
194 	for_each_ip_tunnel_rcu(t, ipn->tunnels_l[h1])
195 		if (local == t->parms.iph.saddr && (t->dev->flags&IFF_UP))
196 			return t;
197 
198 	t = rcu_dereference(ipn->tunnels_wc[0]);
199 	if (t && (t->dev->flags&IFF_UP))
200 		return t;
201 	return NULL;
202 }
203 
204 static struct ip_tunnel __rcu **__ipip_bucket(struct ipip_net *ipn,
205 		struct ip_tunnel_parm *parms)
206 {
207 	__be32 remote = parms->iph.daddr;
208 	__be32 local = parms->iph.saddr;
209 	unsigned int h = 0;
210 	int prio = 0;
211 
212 	if (remote) {
213 		prio |= 2;
214 		h ^= HASH(remote);
215 	}
216 	if (local) {
217 		prio |= 1;
218 		h ^= HASH(local);
219 	}
220 	return &ipn->tunnels[prio][h];
221 }
222 
223 static inline struct ip_tunnel __rcu **ipip_bucket(struct ipip_net *ipn,
224 		struct ip_tunnel *t)
225 {
226 	return __ipip_bucket(ipn, &t->parms);
227 }
228 
229 static void ipip_tunnel_unlink(struct ipip_net *ipn, struct ip_tunnel *t)
230 {
231 	struct ip_tunnel __rcu **tp;
232 	struct ip_tunnel *iter;
233 
234 	for (tp = ipip_bucket(ipn, t);
235 	     (iter = rtnl_dereference(*tp)) != NULL;
236 	     tp = &iter->next) {
237 		if (t == iter) {
238 			rcu_assign_pointer(*tp, t->next);
239 			break;
240 		}
241 	}
242 }
243 
244 static void ipip_tunnel_link(struct ipip_net *ipn, struct ip_tunnel *t)
245 {
246 	struct ip_tunnel __rcu **tp = ipip_bucket(ipn, t);
247 
248 	rcu_assign_pointer(t->next, rtnl_dereference(*tp));
249 	rcu_assign_pointer(*tp, t);
250 }
251 
252 static int ipip_tunnel_create(struct net_device *dev)
253 {
254 	struct ip_tunnel *t = netdev_priv(dev);
255 	struct net *net = dev_net(dev);
256 	struct ipip_net *ipn = net_generic(net, ipip_net_id);
257 	int err;
258 
259 	err = ipip_tunnel_init(dev);
260 	if (err < 0)
261 		goto out;
262 
263 	err = register_netdevice(dev);
264 	if (err < 0)
265 		goto out;
266 
267 	strcpy(t->parms.name, dev->name);
268 	dev->rtnl_link_ops = &ipip_link_ops;
269 
270 	dev_hold(dev);
271 	ipip_tunnel_link(ipn, t);
272 	return 0;
273 
274 out:
275 	return err;
276 }
277 
278 static struct ip_tunnel *ipip_tunnel_locate(struct net *net,
279 		struct ip_tunnel_parm *parms, int create)
280 {
281 	__be32 remote = parms->iph.daddr;
282 	__be32 local = parms->iph.saddr;
283 	struct ip_tunnel *t, *nt;
284 	struct ip_tunnel __rcu **tp;
285 	struct net_device *dev;
286 	char name[IFNAMSIZ];
287 	struct ipip_net *ipn = net_generic(net, ipip_net_id);
288 
289 	for (tp = __ipip_bucket(ipn, parms);
290 		 (t = rtnl_dereference(*tp)) != NULL;
291 		 tp = &t->next) {
292 		if (local == t->parms.iph.saddr && remote == t->parms.iph.daddr)
293 			return t;
294 	}
295 	if (!create)
296 		return NULL;
297 
298 	if (parms->name[0])
299 		strlcpy(name, parms->name, IFNAMSIZ);
300 	else
301 		strcpy(name, "tunl%d");
302 
303 	dev = alloc_netdev(sizeof(*t), name, ipip_tunnel_setup);
304 	if (dev == NULL)
305 		return NULL;
306 
307 	dev_net_set(dev, net);
308 
309 	nt = netdev_priv(dev);
310 	nt->parms = *parms;
311 
312 	if (ipip_tunnel_create(dev) < 0)
313 		goto failed_free;
314 
315 	return nt;
316 
317 failed_free:
318 	ipip_dev_free(dev);
319 	return NULL;
320 }
321 
322 /* called with RTNL */
323 static void ipip_tunnel_uninit(struct net_device *dev)
324 {
325 	struct net *net = dev_net(dev);
326 	struct ipip_net *ipn = net_generic(net, ipip_net_id);
327 
328 	if (dev == ipn->fb_tunnel_dev)
329 		RCU_INIT_POINTER(ipn->tunnels_wc[0], NULL);
330 	else
331 		ipip_tunnel_unlink(ipn, netdev_priv(dev));
332 	dev_put(dev);
333 }
334 
335 static int ipip_err(struct sk_buff *skb, u32 info)
336 {
337 
338 /* All the routers (except for Linux) return only
339    8 bytes of packet payload. It means, that precise relaying of
340    ICMP in the real Internet is absolutely infeasible.
341  */
342 	const struct iphdr *iph = (const struct iphdr *)skb->data;
343 	const int type = icmp_hdr(skb)->type;
344 	const int code = icmp_hdr(skb)->code;
345 	struct ip_tunnel *t;
346 	int err;
347 
348 	switch (type) {
349 	default:
350 	case ICMP_PARAMETERPROB:
351 		return 0;
352 
353 	case ICMP_DEST_UNREACH:
354 		switch (code) {
355 		case ICMP_SR_FAILED:
356 		case ICMP_PORT_UNREACH:
357 			/* Impossible event. */
358 			return 0;
359 		default:
360 			/* All others are translated to HOST_UNREACH.
361 			   rfc2003 contains "deep thoughts" about NET_UNREACH,
362 			   I believe they are just ether pollution. --ANK
363 			 */
364 			break;
365 		}
366 		break;
367 	case ICMP_TIME_EXCEEDED:
368 		if (code != ICMP_EXC_TTL)
369 			return 0;
370 		break;
371 	case ICMP_REDIRECT:
372 		break;
373 	}
374 
375 	err = -ENOENT;
376 	t = ipip_tunnel_lookup(dev_net(skb->dev), iph->daddr, iph->saddr);
377 	if (t == NULL)
378 		goto out;
379 
380 	if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED) {
381 		ipv4_update_pmtu(skb, dev_net(skb->dev), info,
382 				 t->dev->ifindex, 0, IPPROTO_IPIP, 0);
383 		err = 0;
384 		goto out;
385 	}
386 
387 	if (type == ICMP_REDIRECT) {
388 		ipv4_redirect(skb, dev_net(skb->dev), t->dev->ifindex, 0,
389 			      IPPROTO_IPIP, 0);
390 		err = 0;
391 		goto out;
392 	}
393 
394 	if (t->parms.iph.daddr == 0)
395 		goto out;
396 
397 	err = 0;
398 	if (t->parms.iph.ttl == 0 && type == ICMP_TIME_EXCEEDED)
399 		goto out;
400 
401 	if (time_before(jiffies, t->err_time + IPTUNNEL_ERR_TIMEO))
402 		t->err_count++;
403 	else
404 		t->err_count = 1;
405 	t->err_time = jiffies;
406 out:
407 
408 	return err;
409 }
410 
411 static int ipip_rcv(struct sk_buff *skb)
412 {
413 	struct ip_tunnel *tunnel;
414 	const struct iphdr *iph = ip_hdr(skb);
415 	int err;
416 
417 	tunnel = ipip_tunnel_lookup(dev_net(skb->dev), iph->saddr, iph->daddr);
418 	if (tunnel != NULL) {
419 		struct pcpu_tstats *tstats;
420 
421 		if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb))
422 			goto drop;
423 
424 		secpath_reset(skb);
425 
426 		skb->mac_header = skb->network_header;
427 		skb_reset_network_header(skb);
428 		skb->protocol = htons(ETH_P_IP);
429 		skb->pkt_type = PACKET_HOST;
430 
431 		__skb_tunnel_rx(skb, tunnel->dev);
432 
433 		err = IP_ECN_decapsulate(iph, skb);
434 		if (unlikely(err)) {
435 			if (log_ecn_error)
436 				net_info_ratelimited("non-ECT from %pI4 with TOS=%#x\n",
437 						     &iph->saddr, iph->tos);
438 			if (err > 1) {
439 				++tunnel->dev->stats.rx_frame_errors;
440 				++tunnel->dev->stats.rx_errors;
441 				goto drop;
442 			}
443 		}
444 
445 		tstats = this_cpu_ptr(tunnel->dev->tstats);
446 		u64_stats_update_begin(&tstats->syncp);
447 		tstats->rx_packets++;
448 		tstats->rx_bytes += skb->len;
449 		u64_stats_update_end(&tstats->syncp);
450 
451 		netif_rx(skb);
452 		return 0;
453 	}
454 
455 	return -1;
456 
457 drop:
458 	kfree_skb(skb);
459 	return 0;
460 }
461 
462 /*
463  *	This function assumes it is being called from dev_queue_xmit()
464  *	and that skb is filled properly by that function.
465  */
466 
467 static netdev_tx_t ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
468 {
469 	struct ip_tunnel *tunnel = netdev_priv(dev);
470 	const struct iphdr  *tiph = &tunnel->parms.iph;
471 	u8     tos = tunnel->parms.iph.tos;
472 	__be16 df = tiph->frag_off;
473 	struct rtable *rt;     			/* Route to the other host */
474 	struct net_device *tdev;		/* Device to other host */
475 	const struct iphdr  *old_iph;
476 	struct iphdr  *iph;			/* Our new IP header */
477 	unsigned int max_headroom;		/* The extra header space needed */
478 	__be32 dst = tiph->daddr;
479 	struct flowi4 fl4;
480 	int    mtu;
481 
482 	if (skb->protocol != htons(ETH_P_IP))
483 		goto tx_error;
484 
485 	if (skb->ip_summed == CHECKSUM_PARTIAL &&
486 	    skb_checksum_help(skb))
487 		goto tx_error;
488 
489 	old_iph = ip_hdr(skb);
490 
491 	if (tos & 1)
492 		tos = old_iph->tos;
493 
494 	if (!dst) {
495 		/* NBMA tunnel */
496 		if ((rt = skb_rtable(skb)) == NULL) {
497 			dev->stats.tx_fifo_errors++;
498 			goto tx_error;
499 		}
500 		dst = rt_nexthop(rt, old_iph->daddr);
501 	}
502 
503 	rt = ip_route_output_ports(dev_net(dev), &fl4, NULL,
504 				   dst, tiph->saddr,
505 				   0, 0,
506 				   IPPROTO_IPIP, RT_TOS(tos),
507 				   tunnel->parms.link);
508 	if (IS_ERR(rt)) {
509 		dev->stats.tx_carrier_errors++;
510 		goto tx_error_icmp;
511 	}
512 	tdev = rt->dst.dev;
513 
514 	if (tdev == dev) {
515 		ip_rt_put(rt);
516 		dev->stats.collisions++;
517 		goto tx_error;
518 	}
519 
520 	df |= old_iph->frag_off & htons(IP_DF);
521 
522 	if (df) {
523 		mtu = dst_mtu(&rt->dst) - sizeof(struct iphdr);
524 
525 		if (mtu < 68) {
526 			dev->stats.collisions++;
527 			ip_rt_put(rt);
528 			goto tx_error;
529 		}
530 
531 		if (skb_dst(skb))
532 			skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, skb, mtu);
533 
534 		if ((old_iph->frag_off & htons(IP_DF)) &&
535 		    mtu < ntohs(old_iph->tot_len)) {
536 			icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED,
537 				  htonl(mtu));
538 			ip_rt_put(rt);
539 			goto tx_error;
540 		}
541 	}
542 
543 	if (tunnel->err_count > 0) {
544 		if (time_before(jiffies,
545 				tunnel->err_time + IPTUNNEL_ERR_TIMEO)) {
546 			tunnel->err_count--;
547 			dst_link_failure(skb);
548 		} else
549 			tunnel->err_count = 0;
550 	}
551 
552 	/*
553 	 * Okay, now see if we can stuff it in the buffer as-is.
554 	 */
555 	max_headroom = (LL_RESERVED_SPACE(tdev)+sizeof(struct iphdr));
556 
557 	if (skb_headroom(skb) < max_headroom || skb_shared(skb) ||
558 	    (skb_cloned(skb) && !skb_clone_writable(skb, 0))) {
559 		struct sk_buff *new_skb = skb_realloc_headroom(skb, max_headroom);
560 		if (!new_skb) {
561 			ip_rt_put(rt);
562 			dev->stats.tx_dropped++;
563 			dev_kfree_skb(skb);
564 			return NETDEV_TX_OK;
565 		}
566 		if (skb->sk)
567 			skb_set_owner_w(new_skb, skb->sk);
568 		dev_kfree_skb(skb);
569 		skb = new_skb;
570 		old_iph = ip_hdr(skb);
571 	}
572 
573 	skb->transport_header = skb->network_header;
574 	skb_push(skb, sizeof(struct iphdr));
575 	skb_reset_network_header(skb);
576 	memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
577 	IPCB(skb)->flags &= ~(IPSKB_XFRM_TUNNEL_SIZE | IPSKB_XFRM_TRANSFORMED |
578 			      IPSKB_REROUTED);
579 	skb_dst_drop(skb);
580 	skb_dst_set(skb, &rt->dst);
581 
582 	/*
583 	 *	Push down and install the IPIP header.
584 	 */
585 
586 	iph 			=	ip_hdr(skb);
587 	iph->version		=	4;
588 	iph->ihl		=	sizeof(struct iphdr)>>2;
589 	iph->frag_off		=	df;
590 	iph->protocol		=	IPPROTO_IPIP;
591 	iph->tos		=	INET_ECN_encapsulate(tos, old_iph->tos);
592 	iph->daddr		=	fl4.daddr;
593 	iph->saddr		=	fl4.saddr;
594 
595 	if ((iph->ttl = tiph->ttl) == 0)
596 		iph->ttl	=	old_iph->ttl;
597 
598 	iptunnel_xmit(skb, dev);
599 	return NETDEV_TX_OK;
600 
601 tx_error_icmp:
602 	dst_link_failure(skb);
603 tx_error:
604 	dev->stats.tx_errors++;
605 	dev_kfree_skb(skb);
606 	return NETDEV_TX_OK;
607 }
608 
609 static void ipip_tunnel_bind_dev(struct net_device *dev)
610 {
611 	struct net_device *tdev = NULL;
612 	struct ip_tunnel *tunnel;
613 	const struct iphdr *iph;
614 
615 	tunnel = netdev_priv(dev);
616 	iph = &tunnel->parms.iph;
617 
618 	if (iph->daddr) {
619 		struct rtable *rt;
620 		struct flowi4 fl4;
621 
622 		rt = ip_route_output_ports(dev_net(dev), &fl4, NULL,
623 					   iph->daddr, iph->saddr,
624 					   0, 0,
625 					   IPPROTO_IPIP,
626 					   RT_TOS(iph->tos),
627 					   tunnel->parms.link);
628 		if (!IS_ERR(rt)) {
629 			tdev = rt->dst.dev;
630 			ip_rt_put(rt);
631 		}
632 		dev->flags |= IFF_POINTOPOINT;
633 	}
634 
635 	if (!tdev && tunnel->parms.link)
636 		tdev = __dev_get_by_index(dev_net(dev), tunnel->parms.link);
637 
638 	if (tdev) {
639 		dev->hard_header_len = tdev->hard_header_len + sizeof(struct iphdr);
640 		dev->mtu = tdev->mtu - sizeof(struct iphdr);
641 	}
642 	dev->iflink = tunnel->parms.link;
643 }
644 
645 static void ipip_tunnel_update(struct ip_tunnel *t, struct ip_tunnel_parm *p)
646 {
647 	struct net *net = dev_net(t->dev);
648 	struct ipip_net *ipn = net_generic(net, ipip_net_id);
649 
650 	ipip_tunnel_unlink(ipn, t);
651 	synchronize_net();
652 	t->parms.iph.saddr = p->iph.saddr;
653 	t->parms.iph.daddr = p->iph.daddr;
654 	memcpy(t->dev->dev_addr, &p->iph.saddr, 4);
655 	memcpy(t->dev->broadcast, &p->iph.daddr, 4);
656 	ipip_tunnel_link(ipn, t);
657 	t->parms.iph.ttl = p->iph.ttl;
658 	t->parms.iph.tos = p->iph.tos;
659 	t->parms.iph.frag_off = p->iph.frag_off;
660 	if (t->parms.link != p->link) {
661 		t->parms.link = p->link;
662 		ipip_tunnel_bind_dev(t->dev);
663 	}
664 	netdev_state_change(t->dev);
665 }
666 
667 static int
668 ipip_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd)
669 {
670 	int err = 0;
671 	struct ip_tunnel_parm p;
672 	struct ip_tunnel *t;
673 	struct net *net = dev_net(dev);
674 	struct ipip_net *ipn = net_generic(net, ipip_net_id);
675 
676 	switch (cmd) {
677 	case SIOCGETTUNNEL:
678 		t = NULL;
679 		if (dev == ipn->fb_tunnel_dev) {
680 			if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) {
681 				err = -EFAULT;
682 				break;
683 			}
684 			t = ipip_tunnel_locate(net, &p, 0);
685 		}
686 		if (t == NULL)
687 			t = netdev_priv(dev);
688 		memcpy(&p, &t->parms, sizeof(p));
689 		if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p)))
690 			err = -EFAULT;
691 		break;
692 
693 	case SIOCADDTUNNEL:
694 	case SIOCCHGTUNNEL:
695 		err = -EPERM;
696 		if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
697 			goto done;
698 
699 		err = -EFAULT;
700 		if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
701 			goto done;
702 
703 		err = -EINVAL;
704 		if (p.iph.version != 4 || p.iph.protocol != IPPROTO_IPIP ||
705 		    p.iph.ihl != 5 || (p.iph.frag_off&htons(~IP_DF)))
706 			goto done;
707 		if (p.iph.ttl)
708 			p.iph.frag_off |= htons(IP_DF);
709 
710 		t = ipip_tunnel_locate(net, &p, cmd == SIOCADDTUNNEL);
711 
712 		if (dev != ipn->fb_tunnel_dev && cmd == SIOCCHGTUNNEL) {
713 			if (t != NULL) {
714 				if (t->dev != dev) {
715 					err = -EEXIST;
716 					break;
717 				}
718 			} else {
719 				if (((dev->flags&IFF_POINTOPOINT) && !p.iph.daddr) ||
720 				    (!(dev->flags&IFF_POINTOPOINT) && p.iph.daddr)) {
721 					err = -EINVAL;
722 					break;
723 				}
724 				t = netdev_priv(dev);
725 			}
726 
727 			ipip_tunnel_update(t, &p);
728 		}
729 
730 		if (t) {
731 			err = 0;
732 			if (copy_to_user(ifr->ifr_ifru.ifru_data, &t->parms, sizeof(p)))
733 				err = -EFAULT;
734 		} else
735 			err = (cmd == SIOCADDTUNNEL ? -ENOBUFS : -ENOENT);
736 		break;
737 
738 	case SIOCDELTUNNEL:
739 		err = -EPERM;
740 		if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
741 			goto done;
742 
743 		if (dev == ipn->fb_tunnel_dev) {
744 			err = -EFAULT;
745 			if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
746 				goto done;
747 			err = -ENOENT;
748 			if ((t = ipip_tunnel_locate(net, &p, 0)) == NULL)
749 				goto done;
750 			err = -EPERM;
751 			if (t->dev == ipn->fb_tunnel_dev)
752 				goto done;
753 			dev = t->dev;
754 		}
755 		unregister_netdevice(dev);
756 		err = 0;
757 		break;
758 
759 	default:
760 		err = -EINVAL;
761 	}
762 
763 done:
764 	return err;
765 }
766 
767 static int ipip_tunnel_change_mtu(struct net_device *dev, int new_mtu)
768 {
769 	if (new_mtu < 68 || new_mtu > 0xFFF8 - sizeof(struct iphdr))
770 		return -EINVAL;
771 	dev->mtu = new_mtu;
772 	return 0;
773 }
774 
775 static const struct net_device_ops ipip_netdev_ops = {
776 	.ndo_uninit	= ipip_tunnel_uninit,
777 	.ndo_start_xmit	= ipip_tunnel_xmit,
778 	.ndo_do_ioctl	= ipip_tunnel_ioctl,
779 	.ndo_change_mtu	= ipip_tunnel_change_mtu,
780 	.ndo_get_stats64 = ipip_get_stats64,
781 };
782 
783 static void ipip_dev_free(struct net_device *dev)
784 {
785 	free_percpu(dev->tstats);
786 	free_netdev(dev);
787 }
788 
789 #define IPIP_FEATURES (NETIF_F_SG |		\
790 		       NETIF_F_FRAGLIST |	\
791 		       NETIF_F_HIGHDMA |	\
792 		       NETIF_F_HW_CSUM)
793 
794 static void ipip_tunnel_setup(struct net_device *dev)
795 {
796 	dev->netdev_ops		= &ipip_netdev_ops;
797 	dev->destructor		= ipip_dev_free;
798 
799 	dev->type		= ARPHRD_TUNNEL;
800 	dev->hard_header_len 	= LL_MAX_HEADER + sizeof(struct iphdr);
801 	dev->mtu		= ETH_DATA_LEN - sizeof(struct iphdr);
802 	dev->flags		= IFF_NOARP;
803 	dev->iflink		= 0;
804 	dev->addr_len		= 4;
805 	dev->features		|= NETIF_F_NETNS_LOCAL;
806 	dev->features		|= NETIF_F_LLTX;
807 	dev->priv_flags		&= ~IFF_XMIT_DST_RELEASE;
808 
809 	dev->features		|= IPIP_FEATURES;
810 	dev->hw_features	|= IPIP_FEATURES;
811 }
812 
813 static int ipip_tunnel_init(struct net_device *dev)
814 {
815 	struct ip_tunnel *tunnel = netdev_priv(dev);
816 
817 	tunnel->dev = dev;
818 
819 	memcpy(dev->dev_addr, &tunnel->parms.iph.saddr, 4);
820 	memcpy(dev->broadcast, &tunnel->parms.iph.daddr, 4);
821 
822 	ipip_tunnel_bind_dev(dev);
823 
824 	dev->tstats = alloc_percpu(struct pcpu_tstats);
825 	if (!dev->tstats)
826 		return -ENOMEM;
827 
828 	return 0;
829 }
830 
831 static int __net_init ipip_fb_tunnel_init(struct net_device *dev)
832 {
833 	struct ip_tunnel *tunnel = netdev_priv(dev);
834 	struct iphdr *iph = &tunnel->parms.iph;
835 	struct ipip_net *ipn = net_generic(dev_net(dev), ipip_net_id);
836 
837 	tunnel->dev = dev;
838 	strcpy(tunnel->parms.name, dev->name);
839 
840 	iph->version		= 4;
841 	iph->protocol		= IPPROTO_IPIP;
842 	iph->ihl		= 5;
843 
844 	dev->tstats = alloc_percpu(struct pcpu_tstats);
845 	if (!dev->tstats)
846 		return -ENOMEM;
847 
848 	dev_hold(dev);
849 	rcu_assign_pointer(ipn->tunnels_wc[0], tunnel);
850 	return 0;
851 }
852 
853 static void ipip_netlink_parms(struct nlattr *data[],
854 			       struct ip_tunnel_parm *parms)
855 {
856 	memset(parms, 0, sizeof(*parms));
857 
858 	parms->iph.version = 4;
859 	parms->iph.protocol = IPPROTO_IPIP;
860 	parms->iph.ihl = 5;
861 
862 	if (!data)
863 		return;
864 
865 	if (data[IFLA_IPTUN_LINK])
866 		parms->link = nla_get_u32(data[IFLA_IPTUN_LINK]);
867 
868 	if (data[IFLA_IPTUN_LOCAL])
869 		parms->iph.saddr = nla_get_be32(data[IFLA_IPTUN_LOCAL]);
870 
871 	if (data[IFLA_IPTUN_REMOTE])
872 		parms->iph.daddr = nla_get_be32(data[IFLA_IPTUN_REMOTE]);
873 
874 	if (data[IFLA_IPTUN_TTL]) {
875 		parms->iph.ttl = nla_get_u8(data[IFLA_IPTUN_TTL]);
876 		if (parms->iph.ttl)
877 			parms->iph.frag_off = htons(IP_DF);
878 	}
879 
880 	if (data[IFLA_IPTUN_TOS])
881 		parms->iph.tos = nla_get_u8(data[IFLA_IPTUN_TOS]);
882 
883 	if (!data[IFLA_IPTUN_PMTUDISC] || nla_get_u8(data[IFLA_IPTUN_PMTUDISC]))
884 		parms->iph.frag_off = htons(IP_DF);
885 }
886 
887 static int ipip_newlink(struct net *src_net, struct net_device *dev,
888 			struct nlattr *tb[], struct nlattr *data[])
889 {
890 	struct net *net = dev_net(dev);
891 	struct ip_tunnel *nt;
892 
893 	nt = netdev_priv(dev);
894 	ipip_netlink_parms(data, &nt->parms);
895 
896 	if (ipip_tunnel_locate(net, &nt->parms, 0))
897 		return -EEXIST;
898 
899 	return ipip_tunnel_create(dev);
900 }
901 
902 static int ipip_changelink(struct net_device *dev, struct nlattr *tb[],
903 			   struct nlattr *data[])
904 {
905 	struct ip_tunnel *t;
906 	struct ip_tunnel_parm p;
907 	struct net *net = dev_net(dev);
908 	struct ipip_net *ipn = net_generic(net, ipip_net_id);
909 
910 	if (dev == ipn->fb_tunnel_dev)
911 		return -EINVAL;
912 
913 	ipip_netlink_parms(data, &p);
914 
915 	if (((dev->flags & IFF_POINTOPOINT) && !p.iph.daddr) ||
916 	    (!(dev->flags & IFF_POINTOPOINT) && p.iph.daddr))
917 		return -EINVAL;
918 
919 	t = ipip_tunnel_locate(net, &p, 0);
920 
921 	if (t) {
922 		if (t->dev != dev)
923 			return -EEXIST;
924 	} else
925 		t = netdev_priv(dev);
926 
927 	ipip_tunnel_update(t, &p);
928 	return 0;
929 }
930 
931 static size_t ipip_get_size(const struct net_device *dev)
932 {
933 	return
934 		/* IFLA_IPTUN_LINK */
935 		nla_total_size(4) +
936 		/* IFLA_IPTUN_LOCAL */
937 		nla_total_size(4) +
938 		/* IFLA_IPTUN_REMOTE */
939 		nla_total_size(4) +
940 		/* IFLA_IPTUN_TTL */
941 		nla_total_size(1) +
942 		/* IFLA_IPTUN_TOS */
943 		nla_total_size(1) +
944 		/* IFLA_IPTUN_PMTUDISC */
945 		nla_total_size(1) +
946 		0;
947 }
948 
949 static int ipip_fill_info(struct sk_buff *skb, const struct net_device *dev)
950 {
951 	struct ip_tunnel *tunnel = netdev_priv(dev);
952 	struct ip_tunnel_parm *parm = &tunnel->parms;
953 
954 	if (nla_put_u32(skb, IFLA_IPTUN_LINK, parm->link) ||
955 	    nla_put_be32(skb, IFLA_IPTUN_LOCAL, parm->iph.saddr) ||
956 	    nla_put_be32(skb, IFLA_IPTUN_REMOTE, parm->iph.daddr) ||
957 	    nla_put_u8(skb, IFLA_IPTUN_TTL, parm->iph.ttl) ||
958 	    nla_put_u8(skb, IFLA_IPTUN_TOS, parm->iph.tos) ||
959 	    nla_put_u8(skb, IFLA_IPTUN_PMTUDISC,
960 		       !!(parm->iph.frag_off & htons(IP_DF))))
961 		goto nla_put_failure;
962 	return 0;
963 
964 nla_put_failure:
965 	return -EMSGSIZE;
966 }
967 
968 static const struct nla_policy ipip_policy[IFLA_IPTUN_MAX + 1] = {
969 	[IFLA_IPTUN_LINK]		= { .type = NLA_U32 },
970 	[IFLA_IPTUN_LOCAL]		= { .type = NLA_U32 },
971 	[IFLA_IPTUN_REMOTE]		= { .type = NLA_U32 },
972 	[IFLA_IPTUN_TTL]		= { .type = NLA_U8 },
973 	[IFLA_IPTUN_TOS]		= { .type = NLA_U8 },
974 	[IFLA_IPTUN_PMTUDISC]		= { .type = NLA_U8 },
975 };
976 
977 static struct rtnl_link_ops ipip_link_ops __read_mostly = {
978 	.kind		= "ipip",
979 	.maxtype	= IFLA_IPTUN_MAX,
980 	.policy		= ipip_policy,
981 	.priv_size	= sizeof(struct ip_tunnel),
982 	.setup		= ipip_tunnel_setup,
983 	.newlink	= ipip_newlink,
984 	.changelink	= ipip_changelink,
985 	.get_size	= ipip_get_size,
986 	.fill_info	= ipip_fill_info,
987 };
988 
989 static struct xfrm_tunnel ipip_handler __read_mostly = {
990 	.handler	=	ipip_rcv,
991 	.err_handler	=	ipip_err,
992 	.priority	=	1,
993 };
994 
995 static const char banner[] __initconst =
996 	KERN_INFO "IPv4 over IPv4 tunneling driver\n";
997 
998 static void ipip_destroy_tunnels(struct ipip_net *ipn, struct list_head *head)
999 {
1000 	int prio;
1001 
1002 	for (prio = 1; prio < 4; prio++) {
1003 		int h;
1004 		for (h = 0; h < HASH_SIZE; h++) {
1005 			struct ip_tunnel *t;
1006 
1007 			t = rtnl_dereference(ipn->tunnels[prio][h]);
1008 			while (t != NULL) {
1009 				unregister_netdevice_queue(t->dev, head);
1010 				t = rtnl_dereference(t->next);
1011 			}
1012 		}
1013 	}
1014 }
1015 
1016 static int __net_init ipip_init_net(struct net *net)
1017 {
1018 	struct ipip_net *ipn = net_generic(net, ipip_net_id);
1019 	struct ip_tunnel *t;
1020 	int err;
1021 
1022 	ipn->tunnels[0] = ipn->tunnels_wc;
1023 	ipn->tunnels[1] = ipn->tunnels_l;
1024 	ipn->tunnels[2] = ipn->tunnels_r;
1025 	ipn->tunnels[3] = ipn->tunnels_r_l;
1026 
1027 	ipn->fb_tunnel_dev = alloc_netdev(sizeof(struct ip_tunnel),
1028 					   "tunl0",
1029 					   ipip_tunnel_setup);
1030 	if (!ipn->fb_tunnel_dev) {
1031 		err = -ENOMEM;
1032 		goto err_alloc_dev;
1033 	}
1034 	dev_net_set(ipn->fb_tunnel_dev, net);
1035 
1036 	err = ipip_fb_tunnel_init(ipn->fb_tunnel_dev);
1037 	if (err)
1038 		goto err_reg_dev;
1039 
1040 	if ((err = register_netdev(ipn->fb_tunnel_dev)))
1041 		goto err_reg_dev;
1042 
1043 	t = netdev_priv(ipn->fb_tunnel_dev);
1044 
1045 	strcpy(t->parms.name, ipn->fb_tunnel_dev->name);
1046 	return 0;
1047 
1048 err_reg_dev:
1049 	ipip_dev_free(ipn->fb_tunnel_dev);
1050 err_alloc_dev:
1051 	/* nothing */
1052 	return err;
1053 }
1054 
1055 static void __net_exit ipip_exit_net(struct net *net)
1056 {
1057 	struct ipip_net *ipn = net_generic(net, ipip_net_id);
1058 	LIST_HEAD(list);
1059 
1060 	rtnl_lock();
1061 	ipip_destroy_tunnels(ipn, &list);
1062 	unregister_netdevice_queue(ipn->fb_tunnel_dev, &list);
1063 	unregister_netdevice_many(&list);
1064 	rtnl_unlock();
1065 }
1066 
1067 static struct pernet_operations ipip_net_ops = {
1068 	.init = ipip_init_net,
1069 	.exit = ipip_exit_net,
1070 	.id   = &ipip_net_id,
1071 	.size = sizeof(struct ipip_net),
1072 };
1073 
1074 static int __init ipip_init(void)
1075 {
1076 	int err;
1077 
1078 	printk(banner);
1079 
1080 	err = register_pernet_device(&ipip_net_ops);
1081 	if (err < 0)
1082 		return err;
1083 	err = xfrm4_tunnel_register(&ipip_handler, AF_INET);
1084 	if (err < 0) {
1085 		pr_info("%s: can't register tunnel\n", __func__);
1086 		goto xfrm_tunnel_failed;
1087 	}
1088 	err = rtnl_link_register(&ipip_link_ops);
1089 	if (err < 0)
1090 		goto rtnl_link_failed;
1091 
1092 out:
1093 	return err;
1094 
1095 rtnl_link_failed:
1096 	xfrm4_tunnel_deregister(&ipip_handler, AF_INET);
1097 xfrm_tunnel_failed:
1098 	unregister_pernet_device(&ipip_net_ops);
1099 	goto out;
1100 }
1101 
1102 static void __exit ipip_fini(void)
1103 {
1104 	rtnl_link_unregister(&ipip_link_ops);
1105 	if (xfrm4_tunnel_deregister(&ipip_handler, AF_INET))
1106 		pr_info("%s: can't deregister tunnel\n", __func__);
1107 
1108 	unregister_pernet_device(&ipip_net_ops);
1109 }
1110 
1111 module_init(ipip_init);
1112 module_exit(ipip_fini);
1113 MODULE_LICENSE("GPL");
1114 MODULE_ALIAS_NETDEV("tunl0");
1115