xref: /openbmc/linux/net/ipv4/ip_tunnel.c (revision 278002edb19bce2c628fafb0af936e77000f3a5b)
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * Copyright (c) 2013 Nicira, Inc.
4  */
5 
6 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
7 
8 #include <linux/capability.h>
9 #include <linux/module.h>
10 #include <linux/types.h>
11 #include <linux/kernel.h>
12 #include <linux/slab.h>
13 #include <linux/uaccess.h>
14 #include <linux/skbuff.h>
15 #include <linux/netdevice.h>
16 #include <linux/in.h>
17 #include <linux/tcp.h>
18 #include <linux/udp.h>
19 #include <linux/if_arp.h>
20 #include <linux/init.h>
21 #include <linux/in6.h>
22 #include <linux/inetdevice.h>
23 #include <linux/igmp.h>
24 #include <linux/netfilter_ipv4.h>
25 #include <linux/etherdevice.h>
26 #include <linux/if_ether.h>
27 #include <linux/if_vlan.h>
28 #include <linux/rculist.h>
29 #include <linux/err.h>
30 
31 #include <net/sock.h>
32 #include <net/ip.h>
33 #include <net/icmp.h>
34 #include <net/protocol.h>
35 #include <net/ip_tunnels.h>
36 #include <net/arp.h>
37 #include <net/checksum.h>
38 #include <net/dsfield.h>
39 #include <net/inet_ecn.h>
40 #include <net/xfrm.h>
41 #include <net/net_namespace.h>
42 #include <net/netns/generic.h>
43 #include <net/rtnetlink.h>
44 #include <net/udp.h>
45 #include <net/dst_metadata.h>
46 
47 #if IS_ENABLED(CONFIG_IPV6)
48 #include <net/ipv6.h>
49 #include <net/ip6_fib.h>
50 #include <net/ip6_route.h>
51 #endif
52 
ip_tunnel_hash(__be32 key,__be32 remote)53 static unsigned int ip_tunnel_hash(__be32 key, __be32 remote)
54 {
55 	return hash_32((__force u32)key ^ (__force u32)remote,
56 			 IP_TNL_HASH_BITS);
57 }
58 
ip_tunnel_key_match(const struct ip_tunnel_parm * p,__be16 flags,__be32 key)59 static bool ip_tunnel_key_match(const struct ip_tunnel_parm *p,
60 				__be16 flags, __be32 key)
61 {
62 	if (p->i_flags & TUNNEL_KEY) {
63 		if (flags & TUNNEL_KEY)
64 			return key == p->i_key;
65 		else
66 			/* key expected, none present */
67 			return false;
68 	} else
69 		return !(flags & TUNNEL_KEY);
70 }
71 
72 /* Fallback tunnel: no source, no destination, no key, no options
73 
74    Tunnel hash table:
75    We require exact key match i.e. if a key is present in packet
76    it will match only tunnel with the same key; if it is not present,
77    it will match only keyless tunnel.
78 
79    All keysless packets, if not matched configured keyless tunnels
80    will match fallback tunnel.
81    Given src, dst and key, find appropriate for input tunnel.
82 */
ip_tunnel_lookup(struct ip_tunnel_net * itn,int link,__be16 flags,__be32 remote,__be32 local,__be32 key)83 struct ip_tunnel *ip_tunnel_lookup(struct ip_tunnel_net *itn,
84 				   int link, __be16 flags,
85 				   __be32 remote, __be32 local,
86 				   __be32 key)
87 {
88 	struct ip_tunnel *t, *cand = NULL;
89 	struct hlist_head *head;
90 	struct net_device *ndev;
91 	unsigned int hash;
92 
93 	hash = ip_tunnel_hash(key, remote);
94 	head = &itn->tunnels[hash];
95 
96 	hlist_for_each_entry_rcu(t, head, hash_node) {
97 		if (local != t->parms.iph.saddr ||
98 		    remote != t->parms.iph.daddr ||
99 		    !(t->dev->flags & IFF_UP))
100 			continue;
101 
102 		if (!ip_tunnel_key_match(&t->parms, flags, key))
103 			continue;
104 
105 		if (t->parms.link == link)
106 			return t;
107 		else
108 			cand = t;
109 	}
110 
111 	hlist_for_each_entry_rcu(t, head, hash_node) {
112 		if (remote != t->parms.iph.daddr ||
113 		    t->parms.iph.saddr != 0 ||
114 		    !(t->dev->flags & IFF_UP))
115 			continue;
116 
117 		if (!ip_tunnel_key_match(&t->parms, flags, key))
118 			continue;
119 
120 		if (t->parms.link == link)
121 			return t;
122 		else if (!cand)
123 			cand = t;
124 	}
125 
126 	hash = ip_tunnel_hash(key, 0);
127 	head = &itn->tunnels[hash];
128 
129 	hlist_for_each_entry_rcu(t, head, hash_node) {
130 		if ((local != t->parms.iph.saddr || t->parms.iph.daddr != 0) &&
131 		    (local != t->parms.iph.daddr || !ipv4_is_multicast(local)))
132 			continue;
133 
134 		if (!(t->dev->flags & IFF_UP))
135 			continue;
136 
137 		if (!ip_tunnel_key_match(&t->parms, flags, key))
138 			continue;
139 
140 		if (t->parms.link == link)
141 			return t;
142 		else if (!cand)
143 			cand = t;
144 	}
145 
146 	hlist_for_each_entry_rcu(t, head, hash_node) {
147 		if ((!(flags & TUNNEL_NO_KEY) && t->parms.i_key != key) ||
148 		    t->parms.iph.saddr != 0 ||
149 		    t->parms.iph.daddr != 0 ||
150 		    !(t->dev->flags & IFF_UP))
151 			continue;
152 
153 		if (t->parms.link == link)
154 			return t;
155 		else if (!cand)
156 			cand = t;
157 	}
158 
159 	if (cand)
160 		return cand;
161 
162 	t = rcu_dereference(itn->collect_md_tun);
163 	if (t && t->dev->flags & IFF_UP)
164 		return t;
165 
166 	ndev = READ_ONCE(itn->fb_tunnel_dev);
167 	if (ndev && ndev->flags & IFF_UP)
168 		return netdev_priv(ndev);
169 
170 	return NULL;
171 }
172 EXPORT_SYMBOL_GPL(ip_tunnel_lookup);
173 
ip_bucket(struct ip_tunnel_net * itn,struct ip_tunnel_parm * parms)174 static struct hlist_head *ip_bucket(struct ip_tunnel_net *itn,
175 				    struct ip_tunnel_parm *parms)
176 {
177 	unsigned int h;
178 	__be32 remote;
179 	__be32 i_key = parms->i_key;
180 
181 	if (parms->iph.daddr && !ipv4_is_multicast(parms->iph.daddr))
182 		remote = parms->iph.daddr;
183 	else
184 		remote = 0;
185 
186 	if (!(parms->i_flags & TUNNEL_KEY) && (parms->i_flags & VTI_ISVTI))
187 		i_key = 0;
188 
189 	h = ip_tunnel_hash(i_key, remote);
190 	return &itn->tunnels[h];
191 }
192 
ip_tunnel_add(struct ip_tunnel_net * itn,struct ip_tunnel * t)193 static void ip_tunnel_add(struct ip_tunnel_net *itn, struct ip_tunnel *t)
194 {
195 	struct hlist_head *head = ip_bucket(itn, &t->parms);
196 
197 	if (t->collect_md)
198 		rcu_assign_pointer(itn->collect_md_tun, t);
199 	hlist_add_head_rcu(&t->hash_node, head);
200 }
201 
ip_tunnel_del(struct ip_tunnel_net * itn,struct ip_tunnel * t)202 static void ip_tunnel_del(struct ip_tunnel_net *itn, struct ip_tunnel *t)
203 {
204 	if (t->collect_md)
205 		rcu_assign_pointer(itn->collect_md_tun, NULL);
206 	hlist_del_init_rcu(&t->hash_node);
207 }
208 
ip_tunnel_find(struct ip_tunnel_net * itn,struct ip_tunnel_parm * parms,int type)209 static struct ip_tunnel *ip_tunnel_find(struct ip_tunnel_net *itn,
210 					struct ip_tunnel_parm *parms,
211 					int type)
212 {
213 	__be32 remote = parms->iph.daddr;
214 	__be32 local = parms->iph.saddr;
215 	__be32 key = parms->i_key;
216 	__be16 flags = parms->i_flags;
217 	int link = parms->link;
218 	struct ip_tunnel *t = NULL;
219 	struct hlist_head *head = ip_bucket(itn, parms);
220 
221 	hlist_for_each_entry_rcu(t, head, hash_node) {
222 		if (local == t->parms.iph.saddr &&
223 		    remote == t->parms.iph.daddr &&
224 		    link == t->parms.link &&
225 		    type == t->dev->type &&
226 		    ip_tunnel_key_match(&t->parms, flags, key))
227 			break;
228 	}
229 	return t;
230 }
231 
__ip_tunnel_create(struct net * net,const struct rtnl_link_ops * ops,struct ip_tunnel_parm * parms)232 static struct net_device *__ip_tunnel_create(struct net *net,
233 					     const struct rtnl_link_ops *ops,
234 					     struct ip_tunnel_parm *parms)
235 {
236 	int err;
237 	struct ip_tunnel *tunnel;
238 	struct net_device *dev;
239 	char name[IFNAMSIZ];
240 
241 	err = -E2BIG;
242 	if (parms->name[0]) {
243 		if (!dev_valid_name(parms->name))
244 			goto failed;
245 		strscpy(name, parms->name, IFNAMSIZ);
246 	} else {
247 		if (strlen(ops->kind) > (IFNAMSIZ - 3))
248 			goto failed;
249 		strcpy(name, ops->kind);
250 		strcat(name, "%d");
251 	}
252 
253 	ASSERT_RTNL();
254 	dev = alloc_netdev(ops->priv_size, name, NET_NAME_UNKNOWN, ops->setup);
255 	if (!dev) {
256 		err = -ENOMEM;
257 		goto failed;
258 	}
259 	dev_net_set(dev, net);
260 
261 	dev->rtnl_link_ops = ops;
262 
263 	tunnel = netdev_priv(dev);
264 	tunnel->parms = *parms;
265 	tunnel->net = net;
266 
267 	err = register_netdevice(dev);
268 	if (err)
269 		goto failed_free;
270 
271 	return dev;
272 
273 failed_free:
274 	free_netdev(dev);
275 failed:
276 	return ERR_PTR(err);
277 }
278 
ip_tunnel_bind_dev(struct net_device * dev)279 static int ip_tunnel_bind_dev(struct net_device *dev)
280 {
281 	struct net_device *tdev = NULL;
282 	struct ip_tunnel *tunnel = netdev_priv(dev);
283 	const struct iphdr *iph;
284 	int hlen = LL_MAX_HEADER;
285 	int mtu = ETH_DATA_LEN;
286 	int t_hlen = tunnel->hlen + sizeof(struct iphdr);
287 
288 	iph = &tunnel->parms.iph;
289 
290 	/* Guess output device to choose reasonable mtu and needed_headroom */
291 	if (iph->daddr) {
292 		struct flowi4 fl4;
293 		struct rtable *rt;
294 
295 		ip_tunnel_init_flow(&fl4, iph->protocol, iph->daddr,
296 				    iph->saddr, tunnel->parms.o_key,
297 				    RT_TOS(iph->tos), dev_net(dev),
298 				    tunnel->parms.link, tunnel->fwmark, 0, 0);
299 		rt = ip_route_output_key(tunnel->net, &fl4);
300 
301 		if (!IS_ERR(rt)) {
302 			tdev = rt->dst.dev;
303 			ip_rt_put(rt);
304 		}
305 		if (dev->type != ARPHRD_ETHER)
306 			dev->flags |= IFF_POINTOPOINT;
307 
308 		dst_cache_reset(&tunnel->dst_cache);
309 	}
310 
311 	if (!tdev && tunnel->parms.link)
312 		tdev = __dev_get_by_index(tunnel->net, tunnel->parms.link);
313 
314 	if (tdev) {
315 		hlen = tdev->hard_header_len + tdev->needed_headroom;
316 		mtu = min(tdev->mtu, IP_MAX_MTU);
317 	}
318 
319 	dev->needed_headroom = t_hlen + hlen;
320 	mtu -= t_hlen + (dev->type == ARPHRD_ETHER ? dev->hard_header_len : 0);
321 
322 	if (mtu < IPV4_MIN_MTU)
323 		mtu = IPV4_MIN_MTU;
324 
325 	return mtu;
326 }
327 
ip_tunnel_create(struct net * net,struct ip_tunnel_net * itn,struct ip_tunnel_parm * parms)328 static struct ip_tunnel *ip_tunnel_create(struct net *net,
329 					  struct ip_tunnel_net *itn,
330 					  struct ip_tunnel_parm *parms)
331 {
332 	struct ip_tunnel *nt;
333 	struct net_device *dev;
334 	int t_hlen;
335 	int mtu;
336 	int err;
337 
338 	dev = __ip_tunnel_create(net, itn->rtnl_link_ops, parms);
339 	if (IS_ERR(dev))
340 		return ERR_CAST(dev);
341 
342 	mtu = ip_tunnel_bind_dev(dev);
343 	err = dev_set_mtu(dev, mtu);
344 	if (err)
345 		goto err_dev_set_mtu;
346 
347 	nt = netdev_priv(dev);
348 	t_hlen = nt->hlen + sizeof(struct iphdr);
349 	dev->min_mtu = ETH_MIN_MTU;
350 	dev->max_mtu = IP_MAX_MTU - t_hlen;
351 	if (dev->type == ARPHRD_ETHER)
352 		dev->max_mtu -= dev->hard_header_len;
353 
354 	ip_tunnel_add(itn, nt);
355 	return nt;
356 
357 err_dev_set_mtu:
358 	unregister_netdevice(dev);
359 	return ERR_PTR(err);
360 }
361 
ip_tunnel_md_udp_encap(struct sk_buff * skb,struct ip_tunnel_info * info)362 void ip_tunnel_md_udp_encap(struct sk_buff *skb, struct ip_tunnel_info *info)
363 {
364 	const struct iphdr *iph = ip_hdr(skb);
365 	const struct udphdr *udph;
366 
367 	if (iph->protocol != IPPROTO_UDP)
368 		return;
369 
370 	udph = (struct udphdr *)((__u8 *)iph + (iph->ihl << 2));
371 	info->encap.sport = udph->source;
372 	info->encap.dport = udph->dest;
373 }
374 EXPORT_SYMBOL(ip_tunnel_md_udp_encap);
375 
ip_tunnel_rcv(struct ip_tunnel * tunnel,struct sk_buff * skb,const struct tnl_ptk_info * tpi,struct metadata_dst * tun_dst,bool log_ecn_error)376 int ip_tunnel_rcv(struct ip_tunnel *tunnel, struct sk_buff *skb,
377 		  const struct tnl_ptk_info *tpi, struct metadata_dst *tun_dst,
378 		  bool log_ecn_error)
379 {
380 	const struct iphdr *iph = ip_hdr(skb);
381 	int nh, err;
382 
383 #ifdef CONFIG_NET_IPGRE_BROADCAST
384 	if (ipv4_is_multicast(iph->daddr)) {
385 		DEV_STATS_INC(tunnel->dev, multicast);
386 		skb->pkt_type = PACKET_BROADCAST;
387 	}
388 #endif
389 
390 	if ((!(tpi->flags&TUNNEL_CSUM) &&  (tunnel->parms.i_flags&TUNNEL_CSUM)) ||
391 	     ((tpi->flags&TUNNEL_CSUM) && !(tunnel->parms.i_flags&TUNNEL_CSUM))) {
392 		DEV_STATS_INC(tunnel->dev, rx_crc_errors);
393 		DEV_STATS_INC(tunnel->dev, rx_errors);
394 		goto drop;
395 	}
396 
397 	if (tunnel->parms.i_flags&TUNNEL_SEQ) {
398 		if (!(tpi->flags&TUNNEL_SEQ) ||
399 		    (tunnel->i_seqno && (s32)(ntohl(tpi->seq) - tunnel->i_seqno) < 0)) {
400 			DEV_STATS_INC(tunnel->dev, rx_fifo_errors);
401 			DEV_STATS_INC(tunnel->dev, rx_errors);
402 			goto drop;
403 		}
404 		tunnel->i_seqno = ntohl(tpi->seq) + 1;
405 	}
406 
407 	/* Save offset of outer header relative to skb->head,
408 	 * because we are going to reset the network header to the inner header
409 	 * and might change skb->head.
410 	 */
411 	nh = skb_network_header(skb) - skb->head;
412 
413 	skb_set_network_header(skb, (tunnel->dev->type == ARPHRD_ETHER) ? ETH_HLEN : 0);
414 
415 	if (!pskb_inet_may_pull(skb)) {
416 		DEV_STATS_INC(tunnel->dev, rx_length_errors);
417 		DEV_STATS_INC(tunnel->dev, rx_errors);
418 		goto drop;
419 	}
420 	iph = (struct iphdr *)(skb->head + nh);
421 
422 	err = IP_ECN_decapsulate(iph, skb);
423 	if (unlikely(err)) {
424 		if (log_ecn_error)
425 			net_info_ratelimited("non-ECT from %pI4 with TOS=%#x\n",
426 					&iph->saddr, iph->tos);
427 		if (err > 1) {
428 			DEV_STATS_INC(tunnel->dev, rx_frame_errors);
429 			DEV_STATS_INC(tunnel->dev, rx_errors);
430 			goto drop;
431 		}
432 	}
433 
434 	dev_sw_netstats_rx_add(tunnel->dev, skb->len);
435 	skb_scrub_packet(skb, !net_eq(tunnel->net, dev_net(tunnel->dev)));
436 
437 	if (tunnel->dev->type == ARPHRD_ETHER) {
438 		skb->protocol = eth_type_trans(skb, tunnel->dev);
439 		skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN);
440 	} else {
441 		skb->dev = tunnel->dev;
442 	}
443 
444 	if (tun_dst)
445 		skb_dst_set(skb, (struct dst_entry *)tun_dst);
446 
447 	gro_cells_receive(&tunnel->gro_cells, skb);
448 	return 0;
449 
450 drop:
451 	if (tun_dst)
452 		dst_release((struct dst_entry *)tun_dst);
453 	kfree_skb(skb);
454 	return 0;
455 }
456 EXPORT_SYMBOL_GPL(ip_tunnel_rcv);
457 
ip_tunnel_encap_add_ops(const struct ip_tunnel_encap_ops * ops,unsigned int num)458 int ip_tunnel_encap_add_ops(const struct ip_tunnel_encap_ops *ops,
459 			    unsigned int num)
460 {
461 	if (num >= MAX_IPTUN_ENCAP_OPS)
462 		return -ERANGE;
463 
464 	return !cmpxchg((const struct ip_tunnel_encap_ops **)
465 			&iptun_encaps[num],
466 			NULL, ops) ? 0 : -1;
467 }
468 EXPORT_SYMBOL(ip_tunnel_encap_add_ops);
469 
ip_tunnel_encap_del_ops(const struct ip_tunnel_encap_ops * ops,unsigned int num)470 int ip_tunnel_encap_del_ops(const struct ip_tunnel_encap_ops *ops,
471 			    unsigned int num)
472 {
473 	int ret;
474 
475 	if (num >= MAX_IPTUN_ENCAP_OPS)
476 		return -ERANGE;
477 
478 	ret = (cmpxchg((const struct ip_tunnel_encap_ops **)
479 		       &iptun_encaps[num],
480 		       ops, NULL) == ops) ? 0 : -1;
481 
482 	synchronize_net();
483 
484 	return ret;
485 }
486 EXPORT_SYMBOL(ip_tunnel_encap_del_ops);
487 
ip_tunnel_encap_setup(struct ip_tunnel * t,struct ip_tunnel_encap * ipencap)488 int ip_tunnel_encap_setup(struct ip_tunnel *t,
489 			  struct ip_tunnel_encap *ipencap)
490 {
491 	int hlen;
492 
493 	memset(&t->encap, 0, sizeof(t->encap));
494 
495 	hlen = ip_encap_hlen(ipencap);
496 	if (hlen < 0)
497 		return hlen;
498 
499 	t->encap.type = ipencap->type;
500 	t->encap.sport = ipencap->sport;
501 	t->encap.dport = ipencap->dport;
502 	t->encap.flags = ipencap->flags;
503 
504 	t->encap_hlen = hlen;
505 	t->hlen = t->encap_hlen + t->tun_hlen;
506 
507 	return 0;
508 }
509 EXPORT_SYMBOL_GPL(ip_tunnel_encap_setup);
510 
tnl_update_pmtu(struct net_device * dev,struct sk_buff * skb,struct rtable * rt,__be16 df,const struct iphdr * inner_iph,int tunnel_hlen,__be32 dst,bool md)511 static int tnl_update_pmtu(struct net_device *dev, struct sk_buff *skb,
512 			    struct rtable *rt, __be16 df,
513 			    const struct iphdr *inner_iph,
514 			    int tunnel_hlen, __be32 dst, bool md)
515 {
516 	struct ip_tunnel *tunnel = netdev_priv(dev);
517 	int pkt_size;
518 	int mtu;
519 
520 	tunnel_hlen = md ? tunnel_hlen : tunnel->hlen;
521 	pkt_size = skb->len - tunnel_hlen;
522 	pkt_size -= dev->type == ARPHRD_ETHER ? dev->hard_header_len : 0;
523 
524 	if (df) {
525 		mtu = dst_mtu(&rt->dst) - (sizeof(struct iphdr) + tunnel_hlen);
526 		mtu -= dev->type == ARPHRD_ETHER ? dev->hard_header_len : 0;
527 	} else {
528 		mtu = skb_valid_dst(skb) ? dst_mtu(skb_dst(skb)) : dev->mtu;
529 	}
530 
531 	if (skb_valid_dst(skb))
532 		skb_dst_update_pmtu_no_confirm(skb, mtu);
533 
534 	if (skb->protocol == htons(ETH_P_IP)) {
535 		if (!skb_is_gso(skb) &&
536 		    (inner_iph->frag_off & htons(IP_DF)) &&
537 		    mtu < pkt_size) {
538 			icmp_ndo_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu));
539 			return -E2BIG;
540 		}
541 	}
542 #if IS_ENABLED(CONFIG_IPV6)
543 	else if (skb->protocol == htons(ETH_P_IPV6)) {
544 		struct rt6_info *rt6;
545 		__be32 daddr;
546 
547 		rt6 = skb_valid_dst(skb) ? dst_rt6_info(skb_dst(skb)) :
548 					   NULL;
549 		daddr = md ? dst : tunnel->parms.iph.daddr;
550 
551 		if (rt6 && mtu < dst_mtu(skb_dst(skb)) &&
552 			   mtu >= IPV6_MIN_MTU) {
553 			if ((daddr && !ipv4_is_multicast(daddr)) ||
554 			    rt6->rt6i_dst.plen == 128) {
555 				rt6->rt6i_flags |= RTF_MODIFIED;
556 				dst_metric_set(skb_dst(skb), RTAX_MTU, mtu);
557 			}
558 		}
559 
560 		if (!skb_is_gso(skb) && mtu >= IPV6_MIN_MTU &&
561 					mtu < pkt_size) {
562 			icmpv6_ndo_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
563 			return -E2BIG;
564 		}
565 	}
566 #endif
567 	return 0;
568 }
569 
ip_tunnel_adj_headroom(struct net_device * dev,unsigned int headroom)570 static void ip_tunnel_adj_headroom(struct net_device *dev, unsigned int headroom)
571 {
572 	/* we must cap headroom to some upperlimit, else pskb_expand_head
573 	 * will overflow header offsets in skb_headers_offset_update().
574 	 */
575 	static const unsigned int max_allowed = 512;
576 
577 	if (headroom > max_allowed)
578 		headroom = max_allowed;
579 
580 	if (headroom > READ_ONCE(dev->needed_headroom))
581 		WRITE_ONCE(dev->needed_headroom, headroom);
582 }
583 
ip_md_tunnel_xmit(struct sk_buff * skb,struct net_device * dev,u8 proto,int tunnel_hlen)584 void ip_md_tunnel_xmit(struct sk_buff *skb, struct net_device *dev,
585 		       u8 proto, int tunnel_hlen)
586 {
587 	struct ip_tunnel *tunnel = netdev_priv(dev);
588 	u32 headroom = sizeof(struct iphdr);
589 	struct ip_tunnel_info *tun_info;
590 	const struct ip_tunnel_key *key;
591 	const struct iphdr *inner_iph;
592 	struct rtable *rt = NULL;
593 	struct flowi4 fl4;
594 	__be16 df = 0;
595 	u8 tos, ttl;
596 	bool use_cache;
597 
598 	tun_info = skb_tunnel_info(skb);
599 	if (unlikely(!tun_info || !(tun_info->mode & IP_TUNNEL_INFO_TX) ||
600 		     ip_tunnel_info_af(tun_info) != AF_INET))
601 		goto tx_error;
602 	key = &tun_info->key;
603 	memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
604 	inner_iph = (const struct iphdr *)skb_inner_network_header(skb);
605 	tos = key->tos;
606 	if (tos == 1) {
607 		if (skb->protocol == htons(ETH_P_IP))
608 			tos = inner_iph->tos;
609 		else if (skb->protocol == htons(ETH_P_IPV6))
610 			tos = ipv6_get_dsfield((const struct ipv6hdr *)inner_iph);
611 	}
612 	ip_tunnel_init_flow(&fl4, proto, key->u.ipv4.dst, key->u.ipv4.src,
613 			    tunnel_id_to_key32(key->tun_id), RT_TOS(tos),
614 			    dev_net(dev), 0, skb->mark, skb_get_hash(skb),
615 			    key->flow_flags);
616 
617 	if (!tunnel_hlen)
618 		tunnel_hlen = ip_encap_hlen(&tun_info->encap);
619 
620 	if (ip_tunnel_encap(skb, &tun_info->encap, &proto, &fl4) < 0)
621 		goto tx_error;
622 
623 	use_cache = ip_tunnel_dst_cache_usable(skb, tun_info);
624 	if (use_cache)
625 		rt = dst_cache_get_ip4(&tun_info->dst_cache, &fl4.saddr);
626 	if (!rt) {
627 		rt = ip_route_output_key(tunnel->net, &fl4);
628 		if (IS_ERR(rt)) {
629 			DEV_STATS_INC(dev, tx_carrier_errors);
630 			goto tx_error;
631 		}
632 		if (use_cache)
633 			dst_cache_set_ip4(&tun_info->dst_cache, &rt->dst,
634 					  fl4.saddr);
635 	}
636 	if (rt->dst.dev == dev) {
637 		ip_rt_put(rt);
638 		DEV_STATS_INC(dev, collisions);
639 		goto tx_error;
640 	}
641 
642 	if (key->tun_flags & TUNNEL_DONT_FRAGMENT)
643 		df = htons(IP_DF);
644 	if (tnl_update_pmtu(dev, skb, rt, df, inner_iph, tunnel_hlen,
645 			    key->u.ipv4.dst, true)) {
646 		ip_rt_put(rt);
647 		goto tx_error;
648 	}
649 
650 	tos = ip_tunnel_ecn_encap(tos, inner_iph, skb);
651 	ttl = key->ttl;
652 	if (ttl == 0) {
653 		if (skb->protocol == htons(ETH_P_IP))
654 			ttl = inner_iph->ttl;
655 		else if (skb->protocol == htons(ETH_P_IPV6))
656 			ttl = ((const struct ipv6hdr *)inner_iph)->hop_limit;
657 		else
658 			ttl = ip4_dst_hoplimit(&rt->dst);
659 	}
660 
661 	headroom += LL_RESERVED_SPACE(rt->dst.dev) + rt->dst.header_len;
662 	if (skb_cow_head(skb, headroom)) {
663 		ip_rt_put(rt);
664 		goto tx_dropped;
665 	}
666 
667 	ip_tunnel_adj_headroom(dev, headroom);
668 
669 	iptunnel_xmit(NULL, rt, skb, fl4.saddr, fl4.daddr, proto, tos, ttl,
670 		      df, !net_eq(tunnel->net, dev_net(dev)));
671 	return;
672 tx_error:
673 	DEV_STATS_INC(dev, tx_errors);
674 	goto kfree;
675 tx_dropped:
676 	DEV_STATS_INC(dev, tx_dropped);
677 kfree:
678 	kfree_skb(skb);
679 }
680 EXPORT_SYMBOL_GPL(ip_md_tunnel_xmit);
681 
ip_tunnel_xmit(struct sk_buff * skb,struct net_device * dev,const struct iphdr * tnl_params,u8 protocol)682 void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev,
683 		    const struct iphdr *tnl_params, u8 protocol)
684 {
685 	struct ip_tunnel *tunnel = netdev_priv(dev);
686 	struct ip_tunnel_info *tun_info = NULL;
687 	const struct iphdr *inner_iph;
688 	unsigned int max_headroom;	/* The extra header space needed */
689 	struct rtable *rt = NULL;		/* Route to the other host */
690 	__be16 payload_protocol;
691 	bool use_cache = false;
692 	struct flowi4 fl4;
693 	bool md = false;
694 	bool connected;
695 	u8 tos, ttl;
696 	__be32 dst;
697 	__be16 df;
698 
699 	inner_iph = (const struct iphdr *)skb_inner_network_header(skb);
700 	connected = (tunnel->parms.iph.daddr != 0);
701 	payload_protocol = skb_protocol(skb, true);
702 
703 	memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
704 
705 	dst = tnl_params->daddr;
706 	if (dst == 0) {
707 		/* NBMA tunnel */
708 
709 		if (!skb_dst(skb)) {
710 			DEV_STATS_INC(dev, tx_fifo_errors);
711 			goto tx_error;
712 		}
713 
714 		tun_info = skb_tunnel_info(skb);
715 		if (tun_info && (tun_info->mode & IP_TUNNEL_INFO_TX) &&
716 		    ip_tunnel_info_af(tun_info) == AF_INET &&
717 		    tun_info->key.u.ipv4.dst) {
718 			dst = tun_info->key.u.ipv4.dst;
719 			md = true;
720 			connected = true;
721 		} else if (payload_protocol == htons(ETH_P_IP)) {
722 			rt = skb_rtable(skb);
723 			dst = rt_nexthop(rt, inner_iph->daddr);
724 		}
725 #if IS_ENABLED(CONFIG_IPV6)
726 		else if (payload_protocol == htons(ETH_P_IPV6)) {
727 			const struct in6_addr *addr6;
728 			struct neighbour *neigh;
729 			bool do_tx_error_icmp;
730 			int addr_type;
731 
732 			neigh = dst_neigh_lookup(skb_dst(skb),
733 						 &ipv6_hdr(skb)->daddr);
734 			if (!neigh)
735 				goto tx_error;
736 
737 			addr6 = (const struct in6_addr *)&neigh->primary_key;
738 			addr_type = ipv6_addr_type(addr6);
739 
740 			if (addr_type == IPV6_ADDR_ANY) {
741 				addr6 = &ipv6_hdr(skb)->daddr;
742 				addr_type = ipv6_addr_type(addr6);
743 			}
744 
745 			if ((addr_type & IPV6_ADDR_COMPATv4) == 0)
746 				do_tx_error_icmp = true;
747 			else {
748 				do_tx_error_icmp = false;
749 				dst = addr6->s6_addr32[3];
750 			}
751 			neigh_release(neigh);
752 			if (do_tx_error_icmp)
753 				goto tx_error_icmp;
754 		}
755 #endif
756 		else
757 			goto tx_error;
758 
759 		if (!md)
760 			connected = false;
761 	}
762 
763 	tos = tnl_params->tos;
764 	if (tos & 0x1) {
765 		tos &= ~0x1;
766 		if (payload_protocol == htons(ETH_P_IP)) {
767 			tos = inner_iph->tos;
768 			connected = false;
769 		} else if (payload_protocol == htons(ETH_P_IPV6)) {
770 			tos = ipv6_get_dsfield((const struct ipv6hdr *)inner_iph);
771 			connected = false;
772 		}
773 	}
774 
775 	ip_tunnel_init_flow(&fl4, protocol, dst, tnl_params->saddr,
776 			    tunnel->parms.o_key, RT_TOS(tos),
777 			    dev_net(dev), tunnel->parms.link,
778 			    tunnel->fwmark, skb_get_hash(skb), 0);
779 
780 	if (ip_tunnel_encap(skb, &tunnel->encap, &protocol, &fl4) < 0)
781 		goto tx_error;
782 
783 	if (connected && md) {
784 		use_cache = ip_tunnel_dst_cache_usable(skb, tun_info);
785 		if (use_cache)
786 			rt = dst_cache_get_ip4(&tun_info->dst_cache,
787 					       &fl4.saddr);
788 	} else {
789 		rt = connected ? dst_cache_get_ip4(&tunnel->dst_cache,
790 						&fl4.saddr) : NULL;
791 	}
792 
793 	if (!rt) {
794 		rt = ip_route_output_key(tunnel->net, &fl4);
795 
796 		if (IS_ERR(rt)) {
797 			DEV_STATS_INC(dev, tx_carrier_errors);
798 			goto tx_error;
799 		}
800 		if (use_cache)
801 			dst_cache_set_ip4(&tun_info->dst_cache, &rt->dst,
802 					  fl4.saddr);
803 		else if (!md && connected)
804 			dst_cache_set_ip4(&tunnel->dst_cache, &rt->dst,
805 					  fl4.saddr);
806 	}
807 
808 	if (rt->dst.dev == dev) {
809 		ip_rt_put(rt);
810 		DEV_STATS_INC(dev, collisions);
811 		goto tx_error;
812 	}
813 
814 	df = tnl_params->frag_off;
815 	if (payload_protocol == htons(ETH_P_IP) && !tunnel->ignore_df)
816 		df |= (inner_iph->frag_off & htons(IP_DF));
817 
818 	if (tnl_update_pmtu(dev, skb, rt, df, inner_iph, 0, 0, false)) {
819 		ip_rt_put(rt);
820 		goto tx_error;
821 	}
822 
823 	if (tunnel->err_count > 0) {
824 		if (time_before(jiffies,
825 				tunnel->err_time + IPTUNNEL_ERR_TIMEO)) {
826 			tunnel->err_count--;
827 
828 			dst_link_failure(skb);
829 		} else
830 			tunnel->err_count = 0;
831 	}
832 
833 	tos = ip_tunnel_ecn_encap(tos, inner_iph, skb);
834 	ttl = tnl_params->ttl;
835 	if (ttl == 0) {
836 		if (payload_protocol == htons(ETH_P_IP))
837 			ttl = inner_iph->ttl;
838 #if IS_ENABLED(CONFIG_IPV6)
839 		else if (payload_protocol == htons(ETH_P_IPV6))
840 			ttl = ((const struct ipv6hdr *)inner_iph)->hop_limit;
841 #endif
842 		else
843 			ttl = ip4_dst_hoplimit(&rt->dst);
844 	}
845 
846 	max_headroom = LL_RESERVED_SPACE(rt->dst.dev) + sizeof(struct iphdr)
847 			+ rt->dst.header_len + ip_encap_hlen(&tunnel->encap);
848 
849 	if (skb_cow_head(skb, max_headroom)) {
850 		ip_rt_put(rt);
851 		DEV_STATS_INC(dev, tx_dropped);
852 		kfree_skb(skb);
853 		return;
854 	}
855 
856 	ip_tunnel_adj_headroom(dev, max_headroom);
857 
858 	iptunnel_xmit(NULL, rt, skb, fl4.saddr, fl4.daddr, protocol, tos, ttl,
859 		      df, !net_eq(tunnel->net, dev_net(dev)));
860 	return;
861 
862 #if IS_ENABLED(CONFIG_IPV6)
863 tx_error_icmp:
864 	dst_link_failure(skb);
865 #endif
866 tx_error:
867 	DEV_STATS_INC(dev, tx_errors);
868 	kfree_skb(skb);
869 }
870 EXPORT_SYMBOL_GPL(ip_tunnel_xmit);
871 
ip_tunnel_update(struct ip_tunnel_net * itn,struct ip_tunnel * t,struct net_device * dev,struct ip_tunnel_parm * p,bool set_mtu,__u32 fwmark)872 static void ip_tunnel_update(struct ip_tunnel_net *itn,
873 			     struct ip_tunnel *t,
874 			     struct net_device *dev,
875 			     struct ip_tunnel_parm *p,
876 			     bool set_mtu,
877 			     __u32 fwmark)
878 {
879 	ip_tunnel_del(itn, t);
880 	t->parms.iph.saddr = p->iph.saddr;
881 	t->parms.iph.daddr = p->iph.daddr;
882 	t->parms.i_key = p->i_key;
883 	t->parms.o_key = p->o_key;
884 	if (dev->type != ARPHRD_ETHER) {
885 		__dev_addr_set(dev, &p->iph.saddr, 4);
886 		memcpy(dev->broadcast, &p->iph.daddr, 4);
887 	}
888 	ip_tunnel_add(itn, t);
889 
890 	t->parms.iph.ttl = p->iph.ttl;
891 	t->parms.iph.tos = p->iph.tos;
892 	t->parms.iph.frag_off = p->iph.frag_off;
893 
894 	if (t->parms.link != p->link || t->fwmark != fwmark) {
895 		int mtu;
896 
897 		t->parms.link = p->link;
898 		t->fwmark = fwmark;
899 		mtu = ip_tunnel_bind_dev(dev);
900 		if (set_mtu)
901 			dev->mtu = mtu;
902 	}
903 	dst_cache_reset(&t->dst_cache);
904 	netdev_state_change(dev);
905 }
906 
ip_tunnel_ctl(struct net_device * dev,struct ip_tunnel_parm * p,int cmd)907 int ip_tunnel_ctl(struct net_device *dev, struct ip_tunnel_parm *p, int cmd)
908 {
909 	int err = 0;
910 	struct ip_tunnel *t = netdev_priv(dev);
911 	struct net *net = t->net;
912 	struct ip_tunnel_net *itn = net_generic(net, t->ip_tnl_net_id);
913 
914 	switch (cmd) {
915 	case SIOCGETTUNNEL:
916 		if (dev == itn->fb_tunnel_dev) {
917 			t = ip_tunnel_find(itn, p, itn->fb_tunnel_dev->type);
918 			if (!t)
919 				t = netdev_priv(dev);
920 		}
921 		memcpy(p, &t->parms, sizeof(*p));
922 		break;
923 
924 	case SIOCADDTUNNEL:
925 	case SIOCCHGTUNNEL:
926 		err = -EPERM;
927 		if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
928 			goto done;
929 		if (p->iph.ttl)
930 			p->iph.frag_off |= htons(IP_DF);
931 		if (!(p->i_flags & VTI_ISVTI)) {
932 			if (!(p->i_flags & TUNNEL_KEY))
933 				p->i_key = 0;
934 			if (!(p->o_flags & TUNNEL_KEY))
935 				p->o_key = 0;
936 		}
937 
938 		t = ip_tunnel_find(itn, p, itn->type);
939 
940 		if (cmd == SIOCADDTUNNEL) {
941 			if (!t) {
942 				t = ip_tunnel_create(net, itn, p);
943 				err = PTR_ERR_OR_ZERO(t);
944 				break;
945 			}
946 
947 			err = -EEXIST;
948 			break;
949 		}
950 		if (dev != itn->fb_tunnel_dev && cmd == SIOCCHGTUNNEL) {
951 			if (t) {
952 				if (t->dev != dev) {
953 					err = -EEXIST;
954 					break;
955 				}
956 			} else {
957 				unsigned int nflags = 0;
958 
959 				if (ipv4_is_multicast(p->iph.daddr))
960 					nflags = IFF_BROADCAST;
961 				else if (p->iph.daddr)
962 					nflags = IFF_POINTOPOINT;
963 
964 				if ((dev->flags^nflags)&(IFF_POINTOPOINT|IFF_BROADCAST)) {
965 					err = -EINVAL;
966 					break;
967 				}
968 
969 				t = netdev_priv(dev);
970 			}
971 		}
972 
973 		if (t) {
974 			err = 0;
975 			ip_tunnel_update(itn, t, dev, p, true, 0);
976 		} else {
977 			err = -ENOENT;
978 		}
979 		break;
980 
981 	case SIOCDELTUNNEL:
982 		err = -EPERM;
983 		if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
984 			goto done;
985 
986 		if (dev == itn->fb_tunnel_dev) {
987 			err = -ENOENT;
988 			t = ip_tunnel_find(itn, p, itn->fb_tunnel_dev->type);
989 			if (!t)
990 				goto done;
991 			err = -EPERM;
992 			if (t == netdev_priv(itn->fb_tunnel_dev))
993 				goto done;
994 			dev = t->dev;
995 		}
996 		unregister_netdevice(dev);
997 		err = 0;
998 		break;
999 
1000 	default:
1001 		err = -EINVAL;
1002 	}
1003 
1004 done:
1005 	return err;
1006 }
1007 EXPORT_SYMBOL_GPL(ip_tunnel_ctl);
1008 
ip_tunnel_siocdevprivate(struct net_device * dev,struct ifreq * ifr,void __user * data,int cmd)1009 int ip_tunnel_siocdevprivate(struct net_device *dev, struct ifreq *ifr,
1010 			     void __user *data, int cmd)
1011 {
1012 	struct ip_tunnel_parm p;
1013 	int err;
1014 
1015 	if (copy_from_user(&p, data, sizeof(p)))
1016 		return -EFAULT;
1017 	err = dev->netdev_ops->ndo_tunnel_ctl(dev, &p, cmd);
1018 	if (!err && copy_to_user(data, &p, sizeof(p)))
1019 		return -EFAULT;
1020 	return err;
1021 }
1022 EXPORT_SYMBOL_GPL(ip_tunnel_siocdevprivate);
1023 
__ip_tunnel_change_mtu(struct net_device * dev,int new_mtu,bool strict)1024 int __ip_tunnel_change_mtu(struct net_device *dev, int new_mtu, bool strict)
1025 {
1026 	struct ip_tunnel *tunnel = netdev_priv(dev);
1027 	int t_hlen = tunnel->hlen + sizeof(struct iphdr);
1028 	int max_mtu = IP_MAX_MTU - t_hlen;
1029 
1030 	if (dev->type == ARPHRD_ETHER)
1031 		max_mtu -= dev->hard_header_len;
1032 
1033 	if (new_mtu < ETH_MIN_MTU)
1034 		return -EINVAL;
1035 
1036 	if (new_mtu > max_mtu) {
1037 		if (strict)
1038 			return -EINVAL;
1039 
1040 		new_mtu = max_mtu;
1041 	}
1042 
1043 	dev->mtu = new_mtu;
1044 	return 0;
1045 }
1046 EXPORT_SYMBOL_GPL(__ip_tunnel_change_mtu);
1047 
ip_tunnel_change_mtu(struct net_device * dev,int new_mtu)1048 int ip_tunnel_change_mtu(struct net_device *dev, int new_mtu)
1049 {
1050 	return __ip_tunnel_change_mtu(dev, new_mtu, true);
1051 }
1052 EXPORT_SYMBOL_GPL(ip_tunnel_change_mtu);
1053 
ip_tunnel_dev_free(struct net_device * dev)1054 static void ip_tunnel_dev_free(struct net_device *dev)
1055 {
1056 	struct ip_tunnel *tunnel = netdev_priv(dev);
1057 
1058 	gro_cells_destroy(&tunnel->gro_cells);
1059 	dst_cache_destroy(&tunnel->dst_cache);
1060 	free_percpu(dev->tstats);
1061 }
1062 
ip_tunnel_dellink(struct net_device * dev,struct list_head * head)1063 void ip_tunnel_dellink(struct net_device *dev, struct list_head *head)
1064 {
1065 	struct ip_tunnel *tunnel = netdev_priv(dev);
1066 	struct ip_tunnel_net *itn;
1067 
1068 	itn = net_generic(tunnel->net, tunnel->ip_tnl_net_id);
1069 
1070 	if (itn->fb_tunnel_dev != dev) {
1071 		ip_tunnel_del(itn, netdev_priv(dev));
1072 		unregister_netdevice_queue(dev, head);
1073 	}
1074 }
1075 EXPORT_SYMBOL_GPL(ip_tunnel_dellink);
1076 
ip_tunnel_get_link_net(const struct net_device * dev)1077 struct net *ip_tunnel_get_link_net(const struct net_device *dev)
1078 {
1079 	struct ip_tunnel *tunnel = netdev_priv(dev);
1080 
1081 	return tunnel->net;
1082 }
1083 EXPORT_SYMBOL(ip_tunnel_get_link_net);
1084 
ip_tunnel_get_iflink(const struct net_device * dev)1085 int ip_tunnel_get_iflink(const struct net_device *dev)
1086 {
1087 	struct ip_tunnel *tunnel = netdev_priv(dev);
1088 
1089 	return tunnel->parms.link;
1090 }
1091 EXPORT_SYMBOL(ip_tunnel_get_iflink);
1092 
ip_tunnel_init_net(struct net * net,unsigned int ip_tnl_net_id,struct rtnl_link_ops * ops,char * devname)1093 int ip_tunnel_init_net(struct net *net, unsigned int ip_tnl_net_id,
1094 				  struct rtnl_link_ops *ops, char *devname)
1095 {
1096 	struct ip_tunnel_net *itn = net_generic(net, ip_tnl_net_id);
1097 	struct ip_tunnel_parm parms;
1098 	unsigned int i;
1099 
1100 	itn->rtnl_link_ops = ops;
1101 	for (i = 0; i < IP_TNL_HASH_SIZE; i++)
1102 		INIT_HLIST_HEAD(&itn->tunnels[i]);
1103 
1104 	if (!ops || !net_has_fallback_tunnels(net)) {
1105 		struct ip_tunnel_net *it_init_net;
1106 
1107 		it_init_net = net_generic(&init_net, ip_tnl_net_id);
1108 		itn->type = it_init_net->type;
1109 		itn->fb_tunnel_dev = NULL;
1110 		return 0;
1111 	}
1112 
1113 	memset(&parms, 0, sizeof(parms));
1114 	if (devname)
1115 		strscpy(parms.name, devname, IFNAMSIZ);
1116 
1117 	rtnl_lock();
1118 	itn->fb_tunnel_dev = __ip_tunnel_create(net, ops, &parms);
1119 	/* FB netdevice is special: we have one, and only one per netns.
1120 	 * Allowing to move it to another netns is clearly unsafe.
1121 	 */
1122 	if (!IS_ERR(itn->fb_tunnel_dev)) {
1123 		itn->fb_tunnel_dev->features |= NETIF_F_NETNS_LOCAL;
1124 		itn->fb_tunnel_dev->mtu = ip_tunnel_bind_dev(itn->fb_tunnel_dev);
1125 		ip_tunnel_add(itn, netdev_priv(itn->fb_tunnel_dev));
1126 		itn->type = itn->fb_tunnel_dev->type;
1127 	}
1128 	rtnl_unlock();
1129 
1130 	return PTR_ERR_OR_ZERO(itn->fb_tunnel_dev);
1131 }
1132 EXPORT_SYMBOL_GPL(ip_tunnel_init_net);
1133 
ip_tunnel_destroy(struct net * net,struct ip_tunnel_net * itn,struct list_head * head,struct rtnl_link_ops * ops)1134 static void ip_tunnel_destroy(struct net *net, struct ip_tunnel_net *itn,
1135 			      struct list_head *head,
1136 			      struct rtnl_link_ops *ops)
1137 {
1138 	struct net_device *dev, *aux;
1139 	int h;
1140 
1141 	for_each_netdev_safe(net, dev, aux)
1142 		if (dev->rtnl_link_ops == ops)
1143 			unregister_netdevice_queue(dev, head);
1144 
1145 	for (h = 0; h < IP_TNL_HASH_SIZE; h++) {
1146 		struct ip_tunnel *t;
1147 		struct hlist_node *n;
1148 		struct hlist_head *thead = &itn->tunnels[h];
1149 
1150 		hlist_for_each_entry_safe(t, n, thead, hash_node)
1151 			/* If dev is in the same netns, it has already
1152 			 * been added to the list by the previous loop.
1153 			 */
1154 			if (!net_eq(dev_net(t->dev), net))
1155 				unregister_netdevice_queue(t->dev, head);
1156 	}
1157 }
1158 
ip_tunnel_delete_nets(struct list_head * net_list,unsigned int id,struct rtnl_link_ops * ops)1159 void ip_tunnel_delete_nets(struct list_head *net_list, unsigned int id,
1160 			   struct rtnl_link_ops *ops)
1161 {
1162 	struct ip_tunnel_net *itn;
1163 	struct net *net;
1164 	LIST_HEAD(list);
1165 
1166 	rtnl_lock();
1167 	list_for_each_entry(net, net_list, exit_list) {
1168 		itn = net_generic(net, id);
1169 		ip_tunnel_destroy(net, itn, &list, ops);
1170 	}
1171 	unregister_netdevice_many(&list);
1172 	rtnl_unlock();
1173 }
1174 EXPORT_SYMBOL_GPL(ip_tunnel_delete_nets);
1175 
ip_tunnel_newlink(struct net_device * dev,struct nlattr * tb[],struct ip_tunnel_parm * p,__u32 fwmark)1176 int ip_tunnel_newlink(struct net_device *dev, struct nlattr *tb[],
1177 		      struct ip_tunnel_parm *p, __u32 fwmark)
1178 {
1179 	struct ip_tunnel *nt;
1180 	struct net *net = dev_net(dev);
1181 	struct ip_tunnel_net *itn;
1182 	int mtu;
1183 	int err;
1184 
1185 	nt = netdev_priv(dev);
1186 	itn = net_generic(net, nt->ip_tnl_net_id);
1187 
1188 	if (nt->collect_md) {
1189 		if (rtnl_dereference(itn->collect_md_tun))
1190 			return -EEXIST;
1191 	} else {
1192 		if (ip_tunnel_find(itn, p, dev->type))
1193 			return -EEXIST;
1194 	}
1195 
1196 	nt->net = net;
1197 	nt->parms = *p;
1198 	nt->fwmark = fwmark;
1199 	err = register_netdevice(dev);
1200 	if (err)
1201 		goto err_register_netdevice;
1202 
1203 	if (dev->type == ARPHRD_ETHER && !tb[IFLA_ADDRESS])
1204 		eth_hw_addr_random(dev);
1205 
1206 	mtu = ip_tunnel_bind_dev(dev);
1207 	if (tb[IFLA_MTU]) {
1208 		unsigned int max = IP_MAX_MTU - (nt->hlen + sizeof(struct iphdr));
1209 
1210 		if (dev->type == ARPHRD_ETHER)
1211 			max -= dev->hard_header_len;
1212 
1213 		mtu = clamp(dev->mtu, (unsigned int)ETH_MIN_MTU, max);
1214 	}
1215 
1216 	err = dev_set_mtu(dev, mtu);
1217 	if (err)
1218 		goto err_dev_set_mtu;
1219 
1220 	ip_tunnel_add(itn, nt);
1221 	return 0;
1222 
1223 err_dev_set_mtu:
1224 	unregister_netdevice(dev);
1225 err_register_netdevice:
1226 	return err;
1227 }
1228 EXPORT_SYMBOL_GPL(ip_tunnel_newlink);
1229 
ip_tunnel_changelink(struct net_device * dev,struct nlattr * tb[],struct ip_tunnel_parm * p,__u32 fwmark)1230 int ip_tunnel_changelink(struct net_device *dev, struct nlattr *tb[],
1231 			 struct ip_tunnel_parm *p, __u32 fwmark)
1232 {
1233 	struct ip_tunnel *t;
1234 	struct ip_tunnel *tunnel = netdev_priv(dev);
1235 	struct net *net = tunnel->net;
1236 	struct ip_tunnel_net *itn = net_generic(net, tunnel->ip_tnl_net_id);
1237 
1238 	if (dev == itn->fb_tunnel_dev)
1239 		return -EINVAL;
1240 
1241 	t = ip_tunnel_find(itn, p, dev->type);
1242 
1243 	if (t) {
1244 		if (t->dev != dev)
1245 			return -EEXIST;
1246 	} else {
1247 		t = tunnel;
1248 
1249 		if (dev->type != ARPHRD_ETHER) {
1250 			unsigned int nflags = 0;
1251 
1252 			if (ipv4_is_multicast(p->iph.daddr))
1253 				nflags = IFF_BROADCAST;
1254 			else if (p->iph.daddr)
1255 				nflags = IFF_POINTOPOINT;
1256 
1257 			if ((dev->flags ^ nflags) &
1258 			    (IFF_POINTOPOINT | IFF_BROADCAST))
1259 				return -EINVAL;
1260 		}
1261 	}
1262 
1263 	ip_tunnel_update(itn, t, dev, p, !tb[IFLA_MTU], fwmark);
1264 	return 0;
1265 }
1266 EXPORT_SYMBOL_GPL(ip_tunnel_changelink);
1267 
ip_tunnel_init(struct net_device * dev)1268 int ip_tunnel_init(struct net_device *dev)
1269 {
1270 	struct ip_tunnel *tunnel = netdev_priv(dev);
1271 	struct iphdr *iph = &tunnel->parms.iph;
1272 	int err;
1273 
1274 	dev->needs_free_netdev = true;
1275 	dev->priv_destructor = ip_tunnel_dev_free;
1276 	dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats);
1277 	if (!dev->tstats)
1278 		return -ENOMEM;
1279 
1280 	err = dst_cache_init(&tunnel->dst_cache, GFP_KERNEL);
1281 	if (err) {
1282 		free_percpu(dev->tstats);
1283 		return err;
1284 	}
1285 
1286 	err = gro_cells_init(&tunnel->gro_cells, dev);
1287 	if (err) {
1288 		dst_cache_destroy(&tunnel->dst_cache);
1289 		free_percpu(dev->tstats);
1290 		return err;
1291 	}
1292 
1293 	tunnel->dev = dev;
1294 	tunnel->net = dev_net(dev);
1295 	strcpy(tunnel->parms.name, dev->name);
1296 	iph->version		= 4;
1297 	iph->ihl		= 5;
1298 
1299 	if (tunnel->collect_md)
1300 		netif_keep_dst(dev);
1301 	netdev_lockdep_set_classes(dev);
1302 	return 0;
1303 }
1304 EXPORT_SYMBOL_GPL(ip_tunnel_init);
1305 
ip_tunnel_uninit(struct net_device * dev)1306 void ip_tunnel_uninit(struct net_device *dev)
1307 {
1308 	struct ip_tunnel *tunnel = netdev_priv(dev);
1309 	struct net *net = tunnel->net;
1310 	struct ip_tunnel_net *itn;
1311 
1312 	itn = net_generic(net, tunnel->ip_tnl_net_id);
1313 	ip_tunnel_del(itn, netdev_priv(dev));
1314 	if (itn->fb_tunnel_dev == dev)
1315 		WRITE_ONCE(itn->fb_tunnel_dev, NULL);
1316 
1317 	dst_cache_reset(&tunnel->dst_cache);
1318 }
1319 EXPORT_SYMBOL_GPL(ip_tunnel_uninit);
1320 
1321 /* Do least required initialization, rest of init is done in tunnel_init call */
ip_tunnel_setup(struct net_device * dev,unsigned int net_id)1322 void ip_tunnel_setup(struct net_device *dev, unsigned int net_id)
1323 {
1324 	struct ip_tunnel *tunnel = netdev_priv(dev);
1325 	tunnel->ip_tnl_net_id = net_id;
1326 }
1327 EXPORT_SYMBOL_GPL(ip_tunnel_setup);
1328 
1329 MODULE_LICENSE("GPL");
1330