xref: /openbmc/linux/net/ipv4/ip_gre.c (revision 4d75f5c664195b970e1cd2fd25b65b5eff257a0a)
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3  *	Linux NET3:	GRE over IP protocol decoder.
4  *
5  *	Authors: Alexey Kuznetsov (kuznet@ms2.inr.ac.ru)
6  */
7 
8 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
9 
10 #include <linux/capability.h>
11 #include <linux/module.h>
12 #include <linux/types.h>
13 #include <linux/kernel.h>
14 #include <linux/slab.h>
15 #include <linux/uaccess.h>
16 #include <linux/skbuff.h>
17 #include <linux/netdevice.h>
18 #include <linux/in.h>
19 #include <linux/tcp.h>
20 #include <linux/udp.h>
21 #include <linux/if_arp.h>
22 #include <linux/if_vlan.h>
23 #include <linux/init.h>
24 #include <linux/in6.h>
25 #include <linux/inetdevice.h>
26 #include <linux/igmp.h>
27 #include <linux/netfilter_ipv4.h>
28 #include <linux/etherdevice.h>
29 #include <linux/if_ether.h>
30 
31 #include <net/sock.h>
32 #include <net/ip.h>
33 #include <net/icmp.h>
34 #include <net/protocol.h>
35 #include <net/ip_tunnels.h>
36 #include <net/arp.h>
37 #include <net/checksum.h>
38 #include <net/dsfield.h>
39 #include <net/inet_ecn.h>
40 #include <net/xfrm.h>
41 #include <net/net_namespace.h>
42 #include <net/netns/generic.h>
43 #include <net/rtnetlink.h>
44 #include <net/gre.h>
45 #include <net/dst_metadata.h>
46 #include <net/erspan.h>
47 
48 /*
49    Problems & solutions
50    --------------------
51 
52    1. The most important issue is detecting local dead loops.
53    They would cause complete host lockup in transmit, which
54    would be "resolved" by stack overflow or, if queueing is enabled,
55    with infinite looping in net_bh.
56 
57    We cannot track such dead loops during route installation,
58    it is infeasible task. The most general solutions would be
59    to keep skb->encapsulation counter (sort of local ttl),
60    and silently drop packet when it expires. It is a good
61    solution, but it supposes maintaining new variable in ALL
62    skb, even if no tunneling is used.
63 
64    Current solution: xmit_recursion breaks dead loops. This is a percpu
65    counter, since when we enter the first ndo_xmit(), cpu migration is
66    forbidden. We force an exit if this counter reaches RECURSION_LIMIT
67 
68    2. Networking dead loops would not kill routers, but would really
69    kill network. IP hop limit plays role of "t->recursion" in this case,
70    if we copy it from packet being encapsulated to upper header.
71    It is very good solution, but it introduces two problems:
72 
73    - Routing protocols, using packets with ttl=1 (OSPF, RIP2),
74      do not work over tunnels.
75    - traceroute does not work. I planned to relay ICMP from tunnel,
76      so that this problem would be solved and traceroute output
77      would even more informative. This idea appeared to be wrong:
78      only Linux complies to rfc1812 now (yes, guys, Linux is the only
79      true router now :-)), all routers (at least, in neighbourhood of mine)
80      return only 8 bytes of payload. It is the end.
81 
82    Hence, if we want that OSPF worked or traceroute said something reasonable,
83    we should search for another solution.
84 
85    One of them is to parse packet trying to detect inner encapsulation
86    made by our node. It is difficult or even impossible, especially,
87    taking into account fragmentation. TO be short, ttl is not solution at all.
88 
89    Current solution: The solution was UNEXPECTEDLY SIMPLE.
90    We force DF flag on tunnels with preconfigured hop limit,
91    that is ALL. :-) Well, it does not remove the problem completely,
92    but exponential growth of network traffic is changed to linear
93    (branches, that exceed pmtu are pruned) and tunnel mtu
94    rapidly degrades to value <68, where looping stops.
95    Yes, it is not good if there exists a router in the loop,
96    which does not force DF, even when encapsulating packets have DF set.
97    But it is not our problem! Nobody could accuse us, we made
98    all that we could make. Even if it is your gated who injected
99    fatal route to network, even if it were you who configured
100    fatal static route: you are innocent. :-)
101 
102    Alexey Kuznetsov.
103  */
104 
105 static bool log_ecn_error = true;
106 module_param(log_ecn_error, bool, 0644);
107 MODULE_PARM_DESC(log_ecn_error, "Log packets received with corrupted ECN");
108 
109 static struct rtnl_link_ops ipgre_link_ops __read_mostly;
110 static const struct header_ops ipgre_header_ops;
111 
112 static int ipgre_tunnel_init(struct net_device *dev);
113 static void erspan_build_header(struct sk_buff *skb,
114 				u32 id, u32 index,
115 				bool truncate, bool is_ipv4);
116 
117 static unsigned int ipgre_net_id __read_mostly;
118 static unsigned int gre_tap_net_id __read_mostly;
119 static unsigned int erspan_net_id __read_mostly;
120 
ipgre_err(struct sk_buff * skb,u32 info,const struct tnl_ptk_info * tpi)121 static int ipgre_err(struct sk_buff *skb, u32 info,
122 		     const struct tnl_ptk_info *tpi)
123 {
124 
125 	/* All the routers (except for Linux) return only
126 	   8 bytes of packet payload. It means, that precise relaying of
127 	   ICMP in the real Internet is absolutely infeasible.
128 
129 	   Moreover, Cisco "wise men" put GRE key to the third word
130 	   in GRE header. It makes impossible maintaining even soft
131 	   state for keyed GRE tunnels with enabled checksum. Tell
132 	   them "thank you".
133 
134 	   Well, I wonder, rfc1812 was written by Cisco employee,
135 	   what the hell these idiots break standards established
136 	   by themselves???
137 	   */
138 	struct net *net = dev_net(skb->dev);
139 	struct ip_tunnel_net *itn;
140 	const struct iphdr *iph;
141 	const int type = icmp_hdr(skb)->type;
142 	const int code = icmp_hdr(skb)->code;
143 	struct ip_tunnel *t;
144 
145 	if (tpi->proto == htons(ETH_P_TEB))
146 		itn = net_generic(net, gre_tap_net_id);
147 	else if (tpi->proto == htons(ETH_P_ERSPAN) ||
148 		 tpi->proto == htons(ETH_P_ERSPAN2))
149 		itn = net_generic(net, erspan_net_id);
150 	else
151 		itn = net_generic(net, ipgre_net_id);
152 
153 	iph = (const struct iphdr *)(icmp_hdr(skb) + 1);
154 	t = ip_tunnel_lookup(itn, skb->dev->ifindex, tpi->flags,
155 			     iph->daddr, iph->saddr, tpi->key);
156 
157 	if (!t)
158 		return -ENOENT;
159 
160 	switch (type) {
161 	default:
162 	case ICMP_PARAMETERPROB:
163 		return 0;
164 
165 	case ICMP_DEST_UNREACH:
166 		switch (code) {
167 		case ICMP_SR_FAILED:
168 		case ICMP_PORT_UNREACH:
169 			/* Impossible event. */
170 			return 0;
171 		default:
172 			/* All others are translated to HOST_UNREACH.
173 			   rfc2003 contains "deep thoughts" about NET_UNREACH,
174 			   I believe they are just ether pollution. --ANK
175 			 */
176 			break;
177 		}
178 		break;
179 
180 	case ICMP_TIME_EXCEEDED:
181 		if (code != ICMP_EXC_TTL)
182 			return 0;
183 		break;
184 
185 	case ICMP_REDIRECT:
186 		break;
187 	}
188 
189 #if IS_ENABLED(CONFIG_IPV6)
190 	if (tpi->proto == htons(ETH_P_IPV6)) {
191 		unsigned int data_len = 0;
192 
193 		if (type == ICMP_TIME_EXCEEDED)
194 			data_len = icmp_hdr(skb)->un.reserved[1] * 4; /* RFC 4884 4.1 */
195 
196 		if (!ip6_err_gen_icmpv6_unreach(skb, iph->ihl * 4 + tpi->hdr_len,
197 						type, data_len))
198 			return 0;
199 	}
200 #endif
201 
202 	if (t->parms.iph.daddr == 0 ||
203 	    ipv4_is_multicast(t->parms.iph.daddr))
204 		return 0;
205 
206 	if (t->parms.iph.ttl == 0 && type == ICMP_TIME_EXCEEDED)
207 		return 0;
208 
209 	if (time_before(jiffies, t->err_time + IPTUNNEL_ERR_TIMEO))
210 		t->err_count++;
211 	else
212 		t->err_count = 1;
213 	t->err_time = jiffies;
214 
215 	return 0;
216 }
217 
gre_err(struct sk_buff * skb,u32 info)218 static void gre_err(struct sk_buff *skb, u32 info)
219 {
220 	/* All the routers (except for Linux) return only
221 	 * 8 bytes of packet payload. It means, that precise relaying of
222 	 * ICMP in the real Internet is absolutely infeasible.
223 	 *
224 	 * Moreover, Cisco "wise men" put GRE key to the third word
225 	 * in GRE header. It makes impossible maintaining even soft
226 	 * state for keyed
227 	 * GRE tunnels with enabled checksum. Tell them "thank you".
228 	 *
229 	 * Well, I wonder, rfc1812 was written by Cisco employee,
230 	 * what the hell these idiots break standards established
231 	 * by themselves???
232 	 */
233 
234 	const struct iphdr *iph = (struct iphdr *)skb->data;
235 	const int type = icmp_hdr(skb)->type;
236 	const int code = icmp_hdr(skb)->code;
237 	struct tnl_ptk_info tpi;
238 
239 	if (gre_parse_header(skb, &tpi, NULL, htons(ETH_P_IP),
240 			     iph->ihl * 4) < 0)
241 		return;
242 
243 	if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED) {
244 		ipv4_update_pmtu(skb, dev_net(skb->dev), info,
245 				 skb->dev->ifindex, IPPROTO_GRE);
246 		return;
247 	}
248 	if (type == ICMP_REDIRECT) {
249 		ipv4_redirect(skb, dev_net(skb->dev), skb->dev->ifindex,
250 			      IPPROTO_GRE);
251 		return;
252 	}
253 
254 	ipgre_err(skb, info, &tpi);
255 }
256 
is_erspan_type1(int gre_hdr_len)257 static bool is_erspan_type1(int gre_hdr_len)
258 {
259 	/* Both ERSPAN type I (version 0) and type II (version 1) use
260 	 * protocol 0x88BE, but the type I has only 4-byte GRE header,
261 	 * while type II has 8-byte.
262 	 */
263 	return gre_hdr_len == 4;
264 }
265 
erspan_rcv(struct sk_buff * skb,struct tnl_ptk_info * tpi,int gre_hdr_len)266 static int erspan_rcv(struct sk_buff *skb, struct tnl_ptk_info *tpi,
267 		      int gre_hdr_len)
268 {
269 	struct net *net = dev_net(skb->dev);
270 	struct metadata_dst *tun_dst = NULL;
271 	struct erspan_base_hdr *ershdr;
272 	struct ip_tunnel_net *itn;
273 	struct ip_tunnel *tunnel;
274 	const struct iphdr *iph;
275 	struct erspan_md2 *md2;
276 	int ver;
277 	int len;
278 
279 	itn = net_generic(net, erspan_net_id);
280 	iph = ip_hdr(skb);
281 	if (is_erspan_type1(gre_hdr_len)) {
282 		ver = 0;
283 		tunnel = ip_tunnel_lookup(itn, skb->dev->ifindex,
284 					  tpi->flags | TUNNEL_NO_KEY,
285 					  iph->saddr, iph->daddr, 0);
286 	} else {
287 		if (unlikely(!pskb_may_pull(skb,
288 					    gre_hdr_len + sizeof(*ershdr))))
289 			return PACKET_REJECT;
290 
291 		ershdr = (struct erspan_base_hdr *)(skb->data + gre_hdr_len);
292 		ver = ershdr->ver;
293 		iph = ip_hdr(skb);
294 		tunnel = ip_tunnel_lookup(itn, skb->dev->ifindex,
295 					  tpi->flags | TUNNEL_KEY,
296 					  iph->saddr, iph->daddr, tpi->key);
297 	}
298 
299 	if (tunnel) {
300 		if (is_erspan_type1(gre_hdr_len))
301 			len = gre_hdr_len;
302 		else
303 			len = gre_hdr_len + erspan_hdr_len(ver);
304 
305 		if (unlikely(!pskb_may_pull(skb, len)))
306 			return PACKET_REJECT;
307 
308 		if (__iptunnel_pull_header(skb,
309 					   len,
310 					   htons(ETH_P_TEB),
311 					   false, false) < 0)
312 			goto drop;
313 
314 		if (tunnel->collect_md) {
315 			struct erspan_metadata *pkt_md, *md;
316 			struct ip_tunnel_info *info;
317 			unsigned char *gh;
318 			__be64 tun_id;
319 			__be16 flags;
320 
321 			tpi->flags |= TUNNEL_KEY;
322 			flags = tpi->flags;
323 			tun_id = key32_to_tunnel_id(tpi->key);
324 
325 			tun_dst = ip_tun_rx_dst(skb, flags,
326 						tun_id, sizeof(*md));
327 			if (!tun_dst)
328 				return PACKET_REJECT;
329 
330 			/* skb can be uncloned in __iptunnel_pull_header, so
331 			 * old pkt_md is no longer valid and we need to reset
332 			 * it
333 			 */
334 			gh = skb_network_header(skb) +
335 			     skb_network_header_len(skb);
336 			pkt_md = (struct erspan_metadata *)(gh + gre_hdr_len +
337 							    sizeof(*ershdr));
338 			md = ip_tunnel_info_opts(&tun_dst->u.tun_info);
339 			md->version = ver;
340 			md2 = &md->u.md2;
341 			memcpy(md2, pkt_md, ver == 1 ? ERSPAN_V1_MDSIZE :
342 						       ERSPAN_V2_MDSIZE);
343 
344 			info = &tun_dst->u.tun_info;
345 			info->key.tun_flags |= TUNNEL_ERSPAN_OPT;
346 			info->options_len = sizeof(*md);
347 		}
348 
349 		skb_reset_mac_header(skb);
350 		ip_tunnel_rcv(tunnel, skb, tpi, tun_dst, log_ecn_error);
351 		return PACKET_RCVD;
352 	}
353 	return PACKET_REJECT;
354 
355 drop:
356 	kfree_skb(skb);
357 	return PACKET_RCVD;
358 }
359 
__ipgre_rcv(struct sk_buff * skb,const struct tnl_ptk_info * tpi,struct ip_tunnel_net * itn,int hdr_len,bool raw_proto)360 static int __ipgre_rcv(struct sk_buff *skb, const struct tnl_ptk_info *tpi,
361 		       struct ip_tunnel_net *itn, int hdr_len, bool raw_proto)
362 {
363 	struct metadata_dst *tun_dst = NULL;
364 	const struct iphdr *iph;
365 	struct ip_tunnel *tunnel;
366 
367 	iph = ip_hdr(skb);
368 	tunnel = ip_tunnel_lookup(itn, skb->dev->ifindex, tpi->flags,
369 				  iph->saddr, iph->daddr, tpi->key);
370 
371 	if (tunnel) {
372 		const struct iphdr *tnl_params;
373 
374 		if (__iptunnel_pull_header(skb, hdr_len, tpi->proto,
375 					   raw_proto, false) < 0)
376 			goto drop;
377 
378 		/* Special case for ipgre_header_parse(), which expects the
379 		 * mac_header to point to the outer IP header.
380 		 */
381 		if (tunnel->dev->header_ops == &ipgre_header_ops)
382 			skb_pop_mac_header(skb);
383 		else
384 			skb_reset_mac_header(skb);
385 
386 		tnl_params = &tunnel->parms.iph;
387 		if (tunnel->collect_md || tnl_params->daddr == 0) {
388 			__be16 flags;
389 			__be64 tun_id;
390 
391 			flags = tpi->flags & (TUNNEL_CSUM | TUNNEL_KEY);
392 			tun_id = key32_to_tunnel_id(tpi->key);
393 			tun_dst = ip_tun_rx_dst(skb, flags, tun_id, 0);
394 			if (!tun_dst)
395 				return PACKET_REJECT;
396 		}
397 
398 		ip_tunnel_rcv(tunnel, skb, tpi, tun_dst, log_ecn_error);
399 		return PACKET_RCVD;
400 	}
401 	return PACKET_NEXT;
402 
403 drop:
404 	kfree_skb(skb);
405 	return PACKET_RCVD;
406 }
407 
ipgre_rcv(struct sk_buff * skb,const struct tnl_ptk_info * tpi,int hdr_len)408 static int ipgre_rcv(struct sk_buff *skb, const struct tnl_ptk_info *tpi,
409 		     int hdr_len)
410 {
411 	struct net *net = dev_net(skb->dev);
412 	struct ip_tunnel_net *itn;
413 	int res;
414 
415 	if (tpi->proto == htons(ETH_P_TEB))
416 		itn = net_generic(net, gre_tap_net_id);
417 	else
418 		itn = net_generic(net, ipgre_net_id);
419 
420 	res = __ipgre_rcv(skb, tpi, itn, hdr_len, false);
421 	if (res == PACKET_NEXT && tpi->proto == htons(ETH_P_TEB)) {
422 		/* ipgre tunnels in collect metadata mode should receive
423 		 * also ETH_P_TEB traffic.
424 		 */
425 		itn = net_generic(net, ipgre_net_id);
426 		res = __ipgre_rcv(skb, tpi, itn, hdr_len, true);
427 	}
428 	return res;
429 }
430 
gre_rcv(struct sk_buff * skb)431 static int gre_rcv(struct sk_buff *skb)
432 {
433 	struct tnl_ptk_info tpi;
434 	bool csum_err = false;
435 	int hdr_len;
436 
437 #ifdef CONFIG_NET_IPGRE_BROADCAST
438 	if (ipv4_is_multicast(ip_hdr(skb)->daddr)) {
439 		/* Looped back packet, drop it! */
440 		if (rt_is_output_route(skb_rtable(skb)))
441 			goto drop;
442 	}
443 #endif
444 
445 	hdr_len = gre_parse_header(skb, &tpi, &csum_err, htons(ETH_P_IP), 0);
446 	if (hdr_len < 0)
447 		goto drop;
448 
449 	if (unlikely(tpi.proto == htons(ETH_P_ERSPAN) ||
450 		     tpi.proto == htons(ETH_P_ERSPAN2))) {
451 		if (erspan_rcv(skb, &tpi, hdr_len) == PACKET_RCVD)
452 			return 0;
453 		goto out;
454 	}
455 
456 	if (ipgre_rcv(skb, &tpi, hdr_len) == PACKET_RCVD)
457 		return 0;
458 
459 out:
460 	icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0);
461 drop:
462 	kfree_skb(skb);
463 	return 0;
464 }
465 
__gre_xmit(struct sk_buff * skb,struct net_device * dev,const struct iphdr * tnl_params,__be16 proto)466 static void __gre_xmit(struct sk_buff *skb, struct net_device *dev,
467 		       const struct iphdr *tnl_params,
468 		       __be16 proto)
469 {
470 	struct ip_tunnel *tunnel = netdev_priv(dev);
471 	__be16 flags = tunnel->parms.o_flags;
472 
473 	/* Push GRE header. */
474 	gre_build_header(skb, tunnel->tun_hlen,
475 			 flags, proto, tunnel->parms.o_key,
476 			 (flags & TUNNEL_SEQ) ? htonl(atomic_fetch_inc(&tunnel->o_seqno)) : 0);
477 
478 	ip_tunnel_xmit(skb, dev, tnl_params, tnl_params->protocol);
479 }
480 
gre_handle_offloads(struct sk_buff * skb,bool csum)481 static int gre_handle_offloads(struct sk_buff *skb, bool csum)
482 {
483 	return iptunnel_handle_offloads(skb, csum ? SKB_GSO_GRE_CSUM : SKB_GSO_GRE);
484 }
485 
gre_fb_xmit(struct sk_buff * skb,struct net_device * dev,__be16 proto)486 static void gre_fb_xmit(struct sk_buff *skb, struct net_device *dev,
487 			__be16 proto)
488 {
489 	struct ip_tunnel *tunnel = netdev_priv(dev);
490 	struct ip_tunnel_info *tun_info;
491 	const struct ip_tunnel_key *key;
492 	int tunnel_hlen;
493 	__be16 flags;
494 
495 	tun_info = skb_tunnel_info(skb);
496 	if (unlikely(!tun_info || !(tun_info->mode & IP_TUNNEL_INFO_TX) ||
497 		     ip_tunnel_info_af(tun_info) != AF_INET))
498 		goto err_free_skb;
499 
500 	key = &tun_info->key;
501 	tunnel_hlen = gre_calc_hlen(key->tun_flags);
502 
503 	if (skb_cow_head(skb, dev->needed_headroom))
504 		goto err_free_skb;
505 
506 	/* Push Tunnel header. */
507 	if (gre_handle_offloads(skb, !!(tun_info->key.tun_flags & TUNNEL_CSUM)))
508 		goto err_free_skb;
509 
510 	flags = tun_info->key.tun_flags &
511 		(TUNNEL_CSUM | TUNNEL_KEY | TUNNEL_SEQ);
512 	gre_build_header(skb, tunnel_hlen, flags, proto,
513 			 tunnel_id_to_key32(tun_info->key.tun_id),
514 			 (flags & TUNNEL_SEQ) ? htonl(atomic_fetch_inc(&tunnel->o_seqno)) : 0);
515 
516 	ip_md_tunnel_xmit(skb, dev, IPPROTO_GRE, tunnel_hlen);
517 
518 	return;
519 
520 err_free_skb:
521 	kfree_skb(skb);
522 	DEV_STATS_INC(dev, tx_dropped);
523 }
524 
erspan_fb_xmit(struct sk_buff * skb,struct net_device * dev)525 static void erspan_fb_xmit(struct sk_buff *skb, struct net_device *dev)
526 {
527 	struct ip_tunnel *tunnel = netdev_priv(dev);
528 	struct ip_tunnel_info *tun_info;
529 	const struct ip_tunnel_key *key;
530 	struct erspan_metadata *md;
531 	bool truncate = false;
532 	__be16 proto;
533 	int tunnel_hlen;
534 	int version;
535 	int nhoff;
536 
537 	tun_info = skb_tunnel_info(skb);
538 	if (unlikely(!tun_info || !(tun_info->mode & IP_TUNNEL_INFO_TX) ||
539 		     ip_tunnel_info_af(tun_info) != AF_INET))
540 		goto err_free_skb;
541 
542 	key = &tun_info->key;
543 	if (!(tun_info->key.tun_flags & TUNNEL_ERSPAN_OPT))
544 		goto err_free_skb;
545 	if (tun_info->options_len < sizeof(*md))
546 		goto err_free_skb;
547 	md = ip_tunnel_info_opts(tun_info);
548 
549 	/* ERSPAN has fixed 8 byte GRE header */
550 	version = md->version;
551 	tunnel_hlen = 8 + erspan_hdr_len(version);
552 
553 	if (skb_cow_head(skb, dev->needed_headroom))
554 		goto err_free_skb;
555 
556 	if (gre_handle_offloads(skb, false))
557 		goto err_free_skb;
558 
559 	if (skb->len > dev->mtu + dev->hard_header_len) {
560 		if (pskb_trim(skb, dev->mtu + dev->hard_header_len))
561 			goto err_free_skb;
562 		truncate = true;
563 	}
564 
565 	nhoff = skb_network_offset(skb);
566 	if (skb->protocol == htons(ETH_P_IP) &&
567 	    (ntohs(ip_hdr(skb)->tot_len) > skb->len - nhoff))
568 		truncate = true;
569 
570 	if (skb->protocol == htons(ETH_P_IPV6)) {
571 		int thoff;
572 
573 		if (skb_transport_header_was_set(skb))
574 			thoff = skb_transport_offset(skb);
575 		else
576 			thoff = nhoff + sizeof(struct ipv6hdr);
577 		if (ntohs(ipv6_hdr(skb)->payload_len) > skb->len - thoff)
578 			truncate = true;
579 	}
580 
581 	if (version == 1) {
582 		erspan_build_header(skb, ntohl(tunnel_id_to_key32(key->tun_id)),
583 				    ntohl(md->u.index), truncate, true);
584 		proto = htons(ETH_P_ERSPAN);
585 	} else if (version == 2) {
586 		erspan_build_header_v2(skb,
587 				       ntohl(tunnel_id_to_key32(key->tun_id)),
588 				       md->u.md2.dir,
589 				       get_hwid(&md->u.md2),
590 				       truncate, true);
591 		proto = htons(ETH_P_ERSPAN2);
592 	} else {
593 		goto err_free_skb;
594 	}
595 
596 	gre_build_header(skb, 8, TUNNEL_SEQ,
597 			 proto, 0, htonl(atomic_fetch_inc(&tunnel->o_seqno)));
598 
599 	ip_md_tunnel_xmit(skb, dev, IPPROTO_GRE, tunnel_hlen);
600 
601 	return;
602 
603 err_free_skb:
604 	kfree_skb(skb);
605 	DEV_STATS_INC(dev, tx_dropped);
606 }
607 
gre_fill_metadata_dst(struct net_device * dev,struct sk_buff * skb)608 static int gre_fill_metadata_dst(struct net_device *dev, struct sk_buff *skb)
609 {
610 	struct ip_tunnel_info *info = skb_tunnel_info(skb);
611 	const struct ip_tunnel_key *key;
612 	struct rtable *rt;
613 	struct flowi4 fl4;
614 
615 	if (ip_tunnel_info_af(info) != AF_INET)
616 		return -EINVAL;
617 
618 	key = &info->key;
619 	ip_tunnel_init_flow(&fl4, IPPROTO_GRE, key->u.ipv4.dst, key->u.ipv4.src,
620 			    tunnel_id_to_key32(key->tun_id),
621 			    key->tos & ~INET_ECN_MASK, dev_net(dev), 0,
622 			    skb->mark, skb_get_hash(skb), key->flow_flags);
623 	rt = ip_route_output_key(dev_net(dev), &fl4);
624 	if (IS_ERR(rt))
625 		return PTR_ERR(rt);
626 
627 	ip_rt_put(rt);
628 	info->key.u.ipv4.src = fl4.saddr;
629 	return 0;
630 }
631 
ipgre_xmit(struct sk_buff * skb,struct net_device * dev)632 static netdev_tx_t ipgre_xmit(struct sk_buff *skb,
633 			      struct net_device *dev)
634 {
635 	struct ip_tunnel *tunnel = netdev_priv(dev);
636 	const struct iphdr *tnl_params;
637 
638 	if (!pskb_inet_may_pull(skb))
639 		goto free_skb;
640 
641 	if (tunnel->collect_md) {
642 		gre_fb_xmit(skb, dev, skb->protocol);
643 		return NETDEV_TX_OK;
644 	}
645 
646 	if (dev->header_ops) {
647 		int pull_len = tunnel->hlen + sizeof(struct iphdr);
648 
649 		if (skb_cow_head(skb, 0))
650 			goto free_skb;
651 
652 		if (!pskb_may_pull(skb, pull_len))
653 			goto free_skb;
654 
655 		tnl_params = (const struct iphdr *)skb->data;
656 
657 		/* ip_tunnel_xmit() needs skb->data pointing to gre header. */
658 		skb_pull(skb, pull_len);
659 		skb_reset_mac_header(skb);
660 
661 		if (skb->ip_summed == CHECKSUM_PARTIAL &&
662 		    skb_checksum_start(skb) < skb->data)
663 			goto free_skb;
664 	} else {
665 		if (skb_cow_head(skb, dev->needed_headroom))
666 			goto free_skb;
667 
668 		tnl_params = &tunnel->parms.iph;
669 	}
670 
671 	if (gre_handle_offloads(skb, !!(tunnel->parms.o_flags & TUNNEL_CSUM)))
672 		goto free_skb;
673 
674 	__gre_xmit(skb, dev, tnl_params, skb->protocol);
675 	return NETDEV_TX_OK;
676 
677 free_skb:
678 	kfree_skb(skb);
679 	DEV_STATS_INC(dev, tx_dropped);
680 	return NETDEV_TX_OK;
681 }
682 
erspan_xmit(struct sk_buff * skb,struct net_device * dev)683 static netdev_tx_t erspan_xmit(struct sk_buff *skb,
684 			       struct net_device *dev)
685 {
686 	struct ip_tunnel *tunnel = netdev_priv(dev);
687 	bool truncate = false;
688 	__be16 proto;
689 
690 	if (!pskb_inet_may_pull(skb))
691 		goto free_skb;
692 
693 	if (tunnel->collect_md) {
694 		erspan_fb_xmit(skb, dev);
695 		return NETDEV_TX_OK;
696 	}
697 
698 	if (gre_handle_offloads(skb, false))
699 		goto free_skb;
700 
701 	if (skb_cow_head(skb, dev->needed_headroom))
702 		goto free_skb;
703 
704 	if (skb->len > dev->mtu + dev->hard_header_len) {
705 		if (pskb_trim(skb, dev->mtu + dev->hard_header_len))
706 			goto free_skb;
707 		truncate = true;
708 	}
709 
710 	/* Push ERSPAN header */
711 	if (tunnel->erspan_ver == 0) {
712 		proto = htons(ETH_P_ERSPAN);
713 		tunnel->parms.o_flags &= ~TUNNEL_SEQ;
714 	} else if (tunnel->erspan_ver == 1) {
715 		erspan_build_header(skb, ntohl(tunnel->parms.o_key),
716 				    tunnel->index,
717 				    truncate, true);
718 		proto = htons(ETH_P_ERSPAN);
719 	} else if (tunnel->erspan_ver == 2) {
720 		erspan_build_header_v2(skb, ntohl(tunnel->parms.o_key),
721 				       tunnel->dir, tunnel->hwid,
722 				       truncate, true);
723 		proto = htons(ETH_P_ERSPAN2);
724 	} else {
725 		goto free_skb;
726 	}
727 
728 	tunnel->parms.o_flags &= ~TUNNEL_KEY;
729 	__gre_xmit(skb, dev, &tunnel->parms.iph, proto);
730 	return NETDEV_TX_OK;
731 
732 free_skb:
733 	kfree_skb(skb);
734 	DEV_STATS_INC(dev, tx_dropped);
735 	return NETDEV_TX_OK;
736 }
737 
gre_tap_xmit(struct sk_buff * skb,struct net_device * dev)738 static netdev_tx_t gre_tap_xmit(struct sk_buff *skb,
739 				struct net_device *dev)
740 {
741 	struct ip_tunnel *tunnel = netdev_priv(dev);
742 
743 	if (!pskb_inet_may_pull(skb))
744 		goto free_skb;
745 
746 	if (tunnel->collect_md) {
747 		gre_fb_xmit(skb, dev, htons(ETH_P_TEB));
748 		return NETDEV_TX_OK;
749 	}
750 
751 	if (gre_handle_offloads(skb, !!(tunnel->parms.o_flags & TUNNEL_CSUM)))
752 		goto free_skb;
753 
754 	if (skb_cow_head(skb, dev->needed_headroom))
755 		goto free_skb;
756 
757 	__gre_xmit(skb, dev, &tunnel->parms.iph, htons(ETH_P_TEB));
758 	return NETDEV_TX_OK;
759 
760 free_skb:
761 	kfree_skb(skb);
762 	DEV_STATS_INC(dev, tx_dropped);
763 	return NETDEV_TX_OK;
764 }
765 
ipgre_link_update(struct net_device * dev,bool set_mtu)766 static void ipgre_link_update(struct net_device *dev, bool set_mtu)
767 {
768 	struct ip_tunnel *tunnel = netdev_priv(dev);
769 	__be16 flags;
770 	int len;
771 
772 	len = tunnel->tun_hlen;
773 	tunnel->tun_hlen = gre_calc_hlen(tunnel->parms.o_flags);
774 	len = tunnel->tun_hlen - len;
775 	tunnel->hlen = tunnel->hlen + len;
776 
777 	if (dev->header_ops)
778 		dev->hard_header_len += len;
779 	else
780 		dev->needed_headroom += len;
781 
782 	if (set_mtu)
783 		dev->mtu = max_t(int, dev->mtu - len, 68);
784 
785 	flags = tunnel->parms.o_flags;
786 
787 	if (flags & TUNNEL_SEQ ||
788 	    (flags & TUNNEL_CSUM && tunnel->encap.type != TUNNEL_ENCAP_NONE)) {
789 		dev->features &= ~NETIF_F_GSO_SOFTWARE;
790 		dev->hw_features &= ~NETIF_F_GSO_SOFTWARE;
791 	} else {
792 		dev->features |= NETIF_F_GSO_SOFTWARE;
793 		dev->hw_features |= NETIF_F_GSO_SOFTWARE;
794 	}
795 }
796 
ipgre_tunnel_ctl(struct net_device * dev,struct ip_tunnel_parm * p,int cmd)797 static int ipgre_tunnel_ctl(struct net_device *dev, struct ip_tunnel_parm *p,
798 			    int cmd)
799 {
800 	int err;
801 
802 	if (cmd == SIOCADDTUNNEL || cmd == SIOCCHGTUNNEL) {
803 		if (p->iph.version != 4 || p->iph.protocol != IPPROTO_GRE ||
804 		    p->iph.ihl != 5 || (p->iph.frag_off & htons(~IP_DF)) ||
805 		    ((p->i_flags | p->o_flags) & (GRE_VERSION | GRE_ROUTING)))
806 			return -EINVAL;
807 	}
808 
809 	p->i_flags = gre_flags_to_tnl_flags(p->i_flags);
810 	p->o_flags = gre_flags_to_tnl_flags(p->o_flags);
811 
812 	err = ip_tunnel_ctl(dev, p, cmd);
813 	if (err)
814 		return err;
815 
816 	if (cmd == SIOCCHGTUNNEL) {
817 		struct ip_tunnel *t = netdev_priv(dev);
818 
819 		t->parms.i_flags = p->i_flags;
820 		t->parms.o_flags = p->o_flags;
821 
822 		if (strcmp(dev->rtnl_link_ops->kind, "erspan"))
823 			ipgre_link_update(dev, true);
824 	}
825 
826 	p->i_flags = gre_tnl_flags_to_gre_flags(p->i_flags);
827 	p->o_flags = gre_tnl_flags_to_gre_flags(p->o_flags);
828 	return 0;
829 }
830 
831 /* Nice toy. Unfortunately, useless in real life :-)
832    It allows to construct virtual multiprotocol broadcast "LAN"
833    over the Internet, provided multicast routing is tuned.
834 
835 
836    I have no idea was this bicycle invented before me,
837    so that I had to set ARPHRD_IPGRE to a random value.
838    I have an impression, that Cisco could make something similar,
839    but this feature is apparently missing in IOS<=11.2(8).
840 
841    I set up 10.66.66/24 and fec0:6666:6666::0/96 as virtual networks
842    with broadcast 224.66.66.66. If you have access to mbone, play with me :-)
843 
844    ping -t 255 224.66.66.66
845 
846    If nobody answers, mbone does not work.
847 
848    ip tunnel add Universe mode gre remote 224.66.66.66 local <Your_real_addr> ttl 255
849    ip addr add 10.66.66.<somewhat>/24 dev Universe
850    ifconfig Universe up
851    ifconfig Universe add fe80::<Your_real_addr>/10
852    ifconfig Universe add fec0:6666:6666::<Your_real_addr>/96
853    ftp 10.66.66.66
854    ...
855    ftp fec0:6666:6666::193.233.7.65
856    ...
857  */
ipgre_header(struct sk_buff * skb,struct net_device * dev,unsigned short type,const void * daddr,const void * saddr,unsigned int len)858 static int ipgre_header(struct sk_buff *skb, struct net_device *dev,
859 			unsigned short type,
860 			const void *daddr, const void *saddr, unsigned int len)
861 {
862 	struct ip_tunnel *t = netdev_priv(dev);
863 	struct iphdr *iph;
864 	struct gre_base_hdr *greh;
865 
866 	iph = skb_push(skb, t->hlen + sizeof(*iph));
867 	greh = (struct gre_base_hdr *)(iph+1);
868 	greh->flags = gre_tnl_flags_to_gre_flags(t->parms.o_flags);
869 	greh->protocol = htons(type);
870 
871 	memcpy(iph, &t->parms.iph, sizeof(struct iphdr));
872 
873 	/* Set the source hardware address. */
874 	if (saddr)
875 		memcpy(&iph->saddr, saddr, 4);
876 	if (daddr)
877 		memcpy(&iph->daddr, daddr, 4);
878 	if (iph->daddr)
879 		return t->hlen + sizeof(*iph);
880 
881 	return -(t->hlen + sizeof(*iph));
882 }
883 
ipgre_header_parse(const struct sk_buff * skb,unsigned char * haddr)884 static int ipgre_header_parse(const struct sk_buff *skb, unsigned char *haddr)
885 {
886 	const struct iphdr *iph = (const struct iphdr *) skb_mac_header(skb);
887 	memcpy(haddr, &iph->saddr, 4);
888 	return 4;
889 }
890 
891 static const struct header_ops ipgre_header_ops = {
892 	.create	= ipgre_header,
893 	.parse	= ipgre_header_parse,
894 };
895 
896 #ifdef CONFIG_NET_IPGRE_BROADCAST
ipgre_open(struct net_device * dev)897 static int ipgre_open(struct net_device *dev)
898 {
899 	struct ip_tunnel *t = netdev_priv(dev);
900 
901 	if (ipv4_is_multicast(t->parms.iph.daddr)) {
902 		struct flowi4 fl4;
903 		struct rtable *rt;
904 
905 		rt = ip_route_output_gre(t->net, &fl4,
906 					 t->parms.iph.daddr,
907 					 t->parms.iph.saddr,
908 					 t->parms.o_key,
909 					 RT_TOS(t->parms.iph.tos),
910 					 t->parms.link);
911 		if (IS_ERR(rt))
912 			return -EADDRNOTAVAIL;
913 		dev = rt->dst.dev;
914 		ip_rt_put(rt);
915 		if (!__in_dev_get_rtnl(dev))
916 			return -EADDRNOTAVAIL;
917 		t->mlink = dev->ifindex;
918 		ip_mc_inc_group(__in_dev_get_rtnl(dev), t->parms.iph.daddr);
919 	}
920 	return 0;
921 }
922 
ipgre_close(struct net_device * dev)923 static int ipgre_close(struct net_device *dev)
924 {
925 	struct ip_tunnel *t = netdev_priv(dev);
926 
927 	if (ipv4_is_multicast(t->parms.iph.daddr) && t->mlink) {
928 		struct in_device *in_dev;
929 		in_dev = inetdev_by_index(t->net, t->mlink);
930 		if (in_dev)
931 			ip_mc_dec_group(in_dev, t->parms.iph.daddr);
932 	}
933 	return 0;
934 }
935 #endif
936 
937 static const struct net_device_ops ipgre_netdev_ops = {
938 	.ndo_init		= ipgre_tunnel_init,
939 	.ndo_uninit		= ip_tunnel_uninit,
940 #ifdef CONFIG_NET_IPGRE_BROADCAST
941 	.ndo_open		= ipgre_open,
942 	.ndo_stop		= ipgre_close,
943 #endif
944 	.ndo_start_xmit		= ipgre_xmit,
945 	.ndo_siocdevprivate	= ip_tunnel_siocdevprivate,
946 	.ndo_change_mtu		= ip_tunnel_change_mtu,
947 	.ndo_get_stats64	= dev_get_tstats64,
948 	.ndo_get_iflink		= ip_tunnel_get_iflink,
949 	.ndo_tunnel_ctl		= ipgre_tunnel_ctl,
950 };
951 
952 #define GRE_FEATURES (NETIF_F_SG |		\
953 		      NETIF_F_FRAGLIST |	\
954 		      NETIF_F_HIGHDMA |		\
955 		      NETIF_F_HW_CSUM)
956 
ipgre_tunnel_setup(struct net_device * dev)957 static void ipgre_tunnel_setup(struct net_device *dev)
958 {
959 	dev->netdev_ops		= &ipgre_netdev_ops;
960 	dev->type		= ARPHRD_IPGRE;
961 	ip_tunnel_setup(dev, ipgre_net_id);
962 }
963 
__gre_tunnel_init(struct net_device * dev)964 static void __gre_tunnel_init(struct net_device *dev)
965 {
966 	struct ip_tunnel *tunnel;
967 	__be16 flags;
968 
969 	tunnel = netdev_priv(dev);
970 	tunnel->tun_hlen = gre_calc_hlen(tunnel->parms.o_flags);
971 	tunnel->parms.iph.protocol = IPPROTO_GRE;
972 
973 	tunnel->hlen = tunnel->tun_hlen + tunnel->encap_hlen;
974 	dev->needed_headroom = tunnel->hlen + sizeof(tunnel->parms.iph);
975 
976 	dev->features		|= GRE_FEATURES | NETIF_F_LLTX;
977 	dev->hw_features	|= GRE_FEATURES;
978 
979 	flags = tunnel->parms.o_flags;
980 
981 	/* TCP offload with GRE SEQ is not supported, nor can we support 2
982 	 * levels of outer headers requiring an update.
983 	 */
984 	if (flags & TUNNEL_SEQ)
985 		return;
986 	if (flags & TUNNEL_CSUM && tunnel->encap.type != TUNNEL_ENCAP_NONE)
987 		return;
988 
989 	dev->features |= NETIF_F_GSO_SOFTWARE;
990 	dev->hw_features |= NETIF_F_GSO_SOFTWARE;
991 }
992 
ipgre_tunnel_init(struct net_device * dev)993 static int ipgre_tunnel_init(struct net_device *dev)
994 {
995 	struct ip_tunnel *tunnel = netdev_priv(dev);
996 	struct iphdr *iph = &tunnel->parms.iph;
997 
998 	__gre_tunnel_init(dev);
999 
1000 	__dev_addr_set(dev, &iph->saddr, 4);
1001 	memcpy(dev->broadcast, &iph->daddr, 4);
1002 
1003 	dev->flags		= IFF_NOARP;
1004 	netif_keep_dst(dev);
1005 	dev->addr_len		= 4;
1006 
1007 	if (iph->daddr && !tunnel->collect_md) {
1008 #ifdef CONFIG_NET_IPGRE_BROADCAST
1009 		if (ipv4_is_multicast(iph->daddr)) {
1010 			if (!iph->saddr)
1011 				return -EINVAL;
1012 			dev->flags = IFF_BROADCAST;
1013 			dev->header_ops = &ipgre_header_ops;
1014 			dev->hard_header_len = tunnel->hlen + sizeof(*iph);
1015 			dev->needed_headroom = 0;
1016 		}
1017 #endif
1018 	} else if (!tunnel->collect_md) {
1019 		dev->header_ops = &ipgre_header_ops;
1020 		dev->hard_header_len = tunnel->hlen + sizeof(*iph);
1021 		dev->needed_headroom = 0;
1022 	}
1023 
1024 	return ip_tunnel_init(dev);
1025 }
1026 
1027 static const struct gre_protocol ipgre_protocol = {
1028 	.handler     = gre_rcv,
1029 	.err_handler = gre_err,
1030 };
1031 
ipgre_init_net(struct net * net)1032 static int __net_init ipgre_init_net(struct net *net)
1033 {
1034 	return ip_tunnel_init_net(net, ipgre_net_id, &ipgre_link_ops, NULL);
1035 }
1036 
ipgre_exit_batch_net(struct list_head * list_net)1037 static void __net_exit ipgre_exit_batch_net(struct list_head *list_net)
1038 {
1039 	ip_tunnel_delete_nets(list_net, ipgre_net_id, &ipgre_link_ops);
1040 }
1041 
1042 static struct pernet_operations ipgre_net_ops = {
1043 	.init = ipgre_init_net,
1044 	.exit_batch = ipgre_exit_batch_net,
1045 	.id   = &ipgre_net_id,
1046 	.size = sizeof(struct ip_tunnel_net),
1047 };
1048 
ipgre_tunnel_validate(struct nlattr * tb[],struct nlattr * data[],struct netlink_ext_ack * extack)1049 static int ipgre_tunnel_validate(struct nlattr *tb[], struct nlattr *data[],
1050 				 struct netlink_ext_ack *extack)
1051 {
1052 	__be16 flags;
1053 
1054 	if (!data)
1055 		return 0;
1056 
1057 	flags = 0;
1058 	if (data[IFLA_GRE_IFLAGS])
1059 		flags |= nla_get_be16(data[IFLA_GRE_IFLAGS]);
1060 	if (data[IFLA_GRE_OFLAGS])
1061 		flags |= nla_get_be16(data[IFLA_GRE_OFLAGS]);
1062 	if (flags & (GRE_VERSION|GRE_ROUTING))
1063 		return -EINVAL;
1064 
1065 	if (data[IFLA_GRE_COLLECT_METADATA] &&
1066 	    data[IFLA_GRE_ENCAP_TYPE] &&
1067 	    nla_get_u16(data[IFLA_GRE_ENCAP_TYPE]) != TUNNEL_ENCAP_NONE)
1068 		return -EINVAL;
1069 
1070 	return 0;
1071 }
1072 
ipgre_tap_validate(struct nlattr * tb[],struct nlattr * data[],struct netlink_ext_ack * extack)1073 static int ipgre_tap_validate(struct nlattr *tb[], struct nlattr *data[],
1074 			      struct netlink_ext_ack *extack)
1075 {
1076 	__be32 daddr;
1077 
1078 	if (tb[IFLA_ADDRESS]) {
1079 		if (nla_len(tb[IFLA_ADDRESS]) != ETH_ALEN)
1080 			return -EINVAL;
1081 		if (!is_valid_ether_addr(nla_data(tb[IFLA_ADDRESS])))
1082 			return -EADDRNOTAVAIL;
1083 	}
1084 
1085 	if (!data)
1086 		goto out;
1087 
1088 	if (data[IFLA_GRE_REMOTE]) {
1089 		memcpy(&daddr, nla_data(data[IFLA_GRE_REMOTE]), 4);
1090 		if (!daddr)
1091 			return -EINVAL;
1092 	}
1093 
1094 out:
1095 	return ipgre_tunnel_validate(tb, data, extack);
1096 }
1097 
erspan_validate(struct nlattr * tb[],struct nlattr * data[],struct netlink_ext_ack * extack)1098 static int erspan_validate(struct nlattr *tb[], struct nlattr *data[],
1099 			   struct netlink_ext_ack *extack)
1100 {
1101 	__be16 flags = 0;
1102 	int ret;
1103 
1104 	if (!data)
1105 		return 0;
1106 
1107 	ret = ipgre_tap_validate(tb, data, extack);
1108 	if (ret)
1109 		return ret;
1110 
1111 	if (data[IFLA_GRE_ERSPAN_VER] &&
1112 	    nla_get_u8(data[IFLA_GRE_ERSPAN_VER]) == 0)
1113 		return 0;
1114 
1115 	/* ERSPAN type II/III should only have GRE sequence and key flag */
1116 	if (data[IFLA_GRE_OFLAGS])
1117 		flags |= nla_get_be16(data[IFLA_GRE_OFLAGS]);
1118 	if (data[IFLA_GRE_IFLAGS])
1119 		flags |= nla_get_be16(data[IFLA_GRE_IFLAGS]);
1120 	if (!data[IFLA_GRE_COLLECT_METADATA] &&
1121 	    flags != (GRE_SEQ | GRE_KEY))
1122 		return -EINVAL;
1123 
1124 	/* ERSPAN Session ID only has 10-bit. Since we reuse
1125 	 * 32-bit key field as ID, check it's range.
1126 	 */
1127 	if (data[IFLA_GRE_IKEY] &&
1128 	    (ntohl(nla_get_be32(data[IFLA_GRE_IKEY])) & ~ID_MASK))
1129 		return -EINVAL;
1130 
1131 	if (data[IFLA_GRE_OKEY] &&
1132 	    (ntohl(nla_get_be32(data[IFLA_GRE_OKEY])) & ~ID_MASK))
1133 		return -EINVAL;
1134 
1135 	return 0;
1136 }
1137 
ipgre_netlink_parms(struct net_device * dev,struct nlattr * data[],struct nlattr * tb[],struct ip_tunnel_parm * parms,__u32 * fwmark)1138 static int ipgre_netlink_parms(struct net_device *dev,
1139 				struct nlattr *data[],
1140 				struct nlattr *tb[],
1141 				struct ip_tunnel_parm *parms,
1142 				__u32 *fwmark)
1143 {
1144 	struct ip_tunnel *t = netdev_priv(dev);
1145 
1146 	memset(parms, 0, sizeof(*parms));
1147 
1148 	parms->iph.protocol = IPPROTO_GRE;
1149 
1150 	if (!data)
1151 		return 0;
1152 
1153 	if (data[IFLA_GRE_LINK])
1154 		parms->link = nla_get_u32(data[IFLA_GRE_LINK]);
1155 
1156 	if (data[IFLA_GRE_IFLAGS])
1157 		parms->i_flags = gre_flags_to_tnl_flags(nla_get_be16(data[IFLA_GRE_IFLAGS]));
1158 
1159 	if (data[IFLA_GRE_OFLAGS])
1160 		parms->o_flags = gre_flags_to_tnl_flags(nla_get_be16(data[IFLA_GRE_OFLAGS]));
1161 
1162 	if (data[IFLA_GRE_IKEY])
1163 		parms->i_key = nla_get_be32(data[IFLA_GRE_IKEY]);
1164 
1165 	if (data[IFLA_GRE_OKEY])
1166 		parms->o_key = nla_get_be32(data[IFLA_GRE_OKEY]);
1167 
1168 	if (data[IFLA_GRE_LOCAL])
1169 		parms->iph.saddr = nla_get_in_addr(data[IFLA_GRE_LOCAL]);
1170 
1171 	if (data[IFLA_GRE_REMOTE])
1172 		parms->iph.daddr = nla_get_in_addr(data[IFLA_GRE_REMOTE]);
1173 
1174 	if (data[IFLA_GRE_TTL])
1175 		parms->iph.ttl = nla_get_u8(data[IFLA_GRE_TTL]);
1176 
1177 	if (data[IFLA_GRE_TOS])
1178 		parms->iph.tos = nla_get_u8(data[IFLA_GRE_TOS]);
1179 
1180 	if (!data[IFLA_GRE_PMTUDISC] || nla_get_u8(data[IFLA_GRE_PMTUDISC])) {
1181 		if (t->ignore_df)
1182 			return -EINVAL;
1183 		parms->iph.frag_off = htons(IP_DF);
1184 	}
1185 
1186 	if (data[IFLA_GRE_COLLECT_METADATA]) {
1187 		t->collect_md = true;
1188 		if (dev->type == ARPHRD_IPGRE)
1189 			dev->type = ARPHRD_NONE;
1190 	}
1191 
1192 	if (data[IFLA_GRE_IGNORE_DF]) {
1193 		if (nla_get_u8(data[IFLA_GRE_IGNORE_DF])
1194 		  && (parms->iph.frag_off & htons(IP_DF)))
1195 			return -EINVAL;
1196 		t->ignore_df = !!nla_get_u8(data[IFLA_GRE_IGNORE_DF]);
1197 	}
1198 
1199 	if (data[IFLA_GRE_FWMARK])
1200 		*fwmark = nla_get_u32(data[IFLA_GRE_FWMARK]);
1201 
1202 	return 0;
1203 }
1204 
erspan_netlink_parms(struct net_device * dev,struct nlattr * data[],struct nlattr * tb[],struct ip_tunnel_parm * parms,__u32 * fwmark)1205 static int erspan_netlink_parms(struct net_device *dev,
1206 				struct nlattr *data[],
1207 				struct nlattr *tb[],
1208 				struct ip_tunnel_parm *parms,
1209 				__u32 *fwmark)
1210 {
1211 	struct ip_tunnel *t = netdev_priv(dev);
1212 	int err;
1213 
1214 	err = ipgre_netlink_parms(dev, data, tb, parms, fwmark);
1215 	if (err)
1216 		return err;
1217 	if (!data)
1218 		return 0;
1219 
1220 	if (data[IFLA_GRE_ERSPAN_VER]) {
1221 		t->erspan_ver = nla_get_u8(data[IFLA_GRE_ERSPAN_VER]);
1222 
1223 		if (t->erspan_ver > 2)
1224 			return -EINVAL;
1225 	}
1226 
1227 	if (t->erspan_ver == 1) {
1228 		if (data[IFLA_GRE_ERSPAN_INDEX]) {
1229 			t->index = nla_get_u32(data[IFLA_GRE_ERSPAN_INDEX]);
1230 			if (t->index & ~INDEX_MASK)
1231 				return -EINVAL;
1232 		}
1233 	} else if (t->erspan_ver == 2) {
1234 		if (data[IFLA_GRE_ERSPAN_DIR]) {
1235 			t->dir = nla_get_u8(data[IFLA_GRE_ERSPAN_DIR]);
1236 			if (t->dir & ~(DIR_MASK >> DIR_OFFSET))
1237 				return -EINVAL;
1238 		}
1239 		if (data[IFLA_GRE_ERSPAN_HWID]) {
1240 			t->hwid = nla_get_u16(data[IFLA_GRE_ERSPAN_HWID]);
1241 			if (t->hwid & ~(HWID_MASK >> HWID_OFFSET))
1242 				return -EINVAL;
1243 		}
1244 	}
1245 
1246 	return 0;
1247 }
1248 
1249 /* This function returns true when ENCAP attributes are present in the nl msg */
ipgre_netlink_encap_parms(struct nlattr * data[],struct ip_tunnel_encap * ipencap)1250 static bool ipgre_netlink_encap_parms(struct nlattr *data[],
1251 				      struct ip_tunnel_encap *ipencap)
1252 {
1253 	bool ret = false;
1254 
1255 	memset(ipencap, 0, sizeof(*ipencap));
1256 
1257 	if (!data)
1258 		return ret;
1259 
1260 	if (data[IFLA_GRE_ENCAP_TYPE]) {
1261 		ret = true;
1262 		ipencap->type = nla_get_u16(data[IFLA_GRE_ENCAP_TYPE]);
1263 	}
1264 
1265 	if (data[IFLA_GRE_ENCAP_FLAGS]) {
1266 		ret = true;
1267 		ipencap->flags = nla_get_u16(data[IFLA_GRE_ENCAP_FLAGS]);
1268 	}
1269 
1270 	if (data[IFLA_GRE_ENCAP_SPORT]) {
1271 		ret = true;
1272 		ipencap->sport = nla_get_be16(data[IFLA_GRE_ENCAP_SPORT]);
1273 	}
1274 
1275 	if (data[IFLA_GRE_ENCAP_DPORT]) {
1276 		ret = true;
1277 		ipencap->dport = nla_get_be16(data[IFLA_GRE_ENCAP_DPORT]);
1278 	}
1279 
1280 	return ret;
1281 }
1282 
gre_tap_init(struct net_device * dev)1283 static int gre_tap_init(struct net_device *dev)
1284 {
1285 	__gre_tunnel_init(dev);
1286 	dev->priv_flags |= IFF_LIVE_ADDR_CHANGE;
1287 	netif_keep_dst(dev);
1288 
1289 	return ip_tunnel_init(dev);
1290 }
1291 
1292 static const struct net_device_ops gre_tap_netdev_ops = {
1293 	.ndo_init		= gre_tap_init,
1294 	.ndo_uninit		= ip_tunnel_uninit,
1295 	.ndo_start_xmit		= gre_tap_xmit,
1296 	.ndo_set_mac_address 	= eth_mac_addr,
1297 	.ndo_validate_addr	= eth_validate_addr,
1298 	.ndo_change_mtu		= ip_tunnel_change_mtu,
1299 	.ndo_get_stats64	= dev_get_tstats64,
1300 	.ndo_get_iflink		= ip_tunnel_get_iflink,
1301 	.ndo_fill_metadata_dst	= gre_fill_metadata_dst,
1302 };
1303 
erspan_tunnel_init(struct net_device * dev)1304 static int erspan_tunnel_init(struct net_device *dev)
1305 {
1306 	struct ip_tunnel *tunnel = netdev_priv(dev);
1307 
1308 	if (tunnel->erspan_ver == 0)
1309 		tunnel->tun_hlen = 4; /* 4-byte GRE hdr. */
1310 	else
1311 		tunnel->tun_hlen = 8; /* 8-byte GRE hdr. */
1312 
1313 	tunnel->parms.iph.protocol = IPPROTO_GRE;
1314 	tunnel->hlen = tunnel->tun_hlen + tunnel->encap_hlen +
1315 		       erspan_hdr_len(tunnel->erspan_ver);
1316 
1317 	dev->features		|= GRE_FEATURES;
1318 	dev->hw_features	|= GRE_FEATURES;
1319 	dev->priv_flags		|= IFF_LIVE_ADDR_CHANGE;
1320 	netif_keep_dst(dev);
1321 
1322 	return ip_tunnel_init(dev);
1323 }
1324 
1325 static const struct net_device_ops erspan_netdev_ops = {
1326 	.ndo_init		= erspan_tunnel_init,
1327 	.ndo_uninit		= ip_tunnel_uninit,
1328 	.ndo_start_xmit		= erspan_xmit,
1329 	.ndo_set_mac_address	= eth_mac_addr,
1330 	.ndo_validate_addr	= eth_validate_addr,
1331 	.ndo_change_mtu		= ip_tunnel_change_mtu,
1332 	.ndo_get_stats64	= dev_get_tstats64,
1333 	.ndo_get_iflink		= ip_tunnel_get_iflink,
1334 	.ndo_fill_metadata_dst	= gre_fill_metadata_dst,
1335 };
1336 
ipgre_tap_setup(struct net_device * dev)1337 static void ipgre_tap_setup(struct net_device *dev)
1338 {
1339 	ether_setup(dev);
1340 	dev->max_mtu = 0;
1341 	dev->netdev_ops	= &gre_tap_netdev_ops;
1342 	dev->priv_flags &= ~IFF_TX_SKB_SHARING;
1343 	dev->priv_flags	|= IFF_LIVE_ADDR_CHANGE;
1344 	ip_tunnel_setup(dev, gre_tap_net_id);
1345 }
1346 
1347 static int
ipgre_newlink_encap_setup(struct net_device * dev,struct nlattr * data[])1348 ipgre_newlink_encap_setup(struct net_device *dev, struct nlattr *data[])
1349 {
1350 	struct ip_tunnel_encap ipencap;
1351 
1352 	if (ipgre_netlink_encap_parms(data, &ipencap)) {
1353 		struct ip_tunnel *t = netdev_priv(dev);
1354 		int err = ip_tunnel_encap_setup(t, &ipencap);
1355 
1356 		if (err < 0)
1357 			return err;
1358 	}
1359 
1360 	return 0;
1361 }
1362 
ipgre_newlink(struct net * src_net,struct net_device * dev,struct nlattr * tb[],struct nlattr * data[],struct netlink_ext_ack * extack)1363 static int ipgre_newlink(struct net *src_net, struct net_device *dev,
1364 			 struct nlattr *tb[], struct nlattr *data[],
1365 			 struct netlink_ext_ack *extack)
1366 {
1367 	struct ip_tunnel_parm p;
1368 	__u32 fwmark = 0;
1369 	int err;
1370 
1371 	err = ipgre_newlink_encap_setup(dev, data);
1372 	if (err)
1373 		return err;
1374 
1375 	err = ipgre_netlink_parms(dev, data, tb, &p, &fwmark);
1376 	if (err < 0)
1377 		return err;
1378 	return ip_tunnel_newlink(dev, tb, &p, fwmark);
1379 }
1380 
erspan_newlink(struct net * src_net,struct net_device * dev,struct nlattr * tb[],struct nlattr * data[],struct netlink_ext_ack * extack)1381 static int erspan_newlink(struct net *src_net, struct net_device *dev,
1382 			  struct nlattr *tb[], struct nlattr *data[],
1383 			  struct netlink_ext_ack *extack)
1384 {
1385 	struct ip_tunnel_parm p;
1386 	__u32 fwmark = 0;
1387 	int err;
1388 
1389 	err = ipgre_newlink_encap_setup(dev, data);
1390 	if (err)
1391 		return err;
1392 
1393 	err = erspan_netlink_parms(dev, data, tb, &p, &fwmark);
1394 	if (err)
1395 		return err;
1396 	return ip_tunnel_newlink(dev, tb, &p, fwmark);
1397 }
1398 
ipgre_changelink(struct net_device * dev,struct nlattr * tb[],struct nlattr * data[],struct netlink_ext_ack * extack)1399 static int ipgre_changelink(struct net_device *dev, struct nlattr *tb[],
1400 			    struct nlattr *data[],
1401 			    struct netlink_ext_ack *extack)
1402 {
1403 	struct ip_tunnel *t = netdev_priv(dev);
1404 	__u32 fwmark = t->fwmark;
1405 	struct ip_tunnel_parm p;
1406 	int err;
1407 
1408 	err = ipgre_newlink_encap_setup(dev, data);
1409 	if (err)
1410 		return err;
1411 
1412 	err = ipgre_netlink_parms(dev, data, tb, &p, &fwmark);
1413 	if (err < 0)
1414 		return err;
1415 
1416 	err = ip_tunnel_changelink(dev, tb, &p, fwmark);
1417 	if (err < 0)
1418 		return err;
1419 
1420 	t->parms.i_flags = p.i_flags;
1421 	t->parms.o_flags = p.o_flags;
1422 
1423 	ipgre_link_update(dev, !tb[IFLA_MTU]);
1424 
1425 	return 0;
1426 }
1427 
erspan_changelink(struct net_device * dev,struct nlattr * tb[],struct nlattr * data[],struct netlink_ext_ack * extack)1428 static int erspan_changelink(struct net_device *dev, struct nlattr *tb[],
1429 			     struct nlattr *data[],
1430 			     struct netlink_ext_ack *extack)
1431 {
1432 	struct ip_tunnel *t = netdev_priv(dev);
1433 	__u32 fwmark = t->fwmark;
1434 	struct ip_tunnel_parm p;
1435 	int err;
1436 
1437 	err = ipgre_newlink_encap_setup(dev, data);
1438 	if (err)
1439 		return err;
1440 
1441 	err = erspan_netlink_parms(dev, data, tb, &p, &fwmark);
1442 	if (err < 0)
1443 		return err;
1444 
1445 	err = ip_tunnel_changelink(dev, tb, &p, fwmark);
1446 	if (err < 0)
1447 		return err;
1448 
1449 	t->parms.i_flags = p.i_flags;
1450 	t->parms.o_flags = p.o_flags;
1451 
1452 	return 0;
1453 }
1454 
ipgre_get_size(const struct net_device * dev)1455 static size_t ipgre_get_size(const struct net_device *dev)
1456 {
1457 	return
1458 		/* IFLA_GRE_LINK */
1459 		nla_total_size(4) +
1460 		/* IFLA_GRE_IFLAGS */
1461 		nla_total_size(2) +
1462 		/* IFLA_GRE_OFLAGS */
1463 		nla_total_size(2) +
1464 		/* IFLA_GRE_IKEY */
1465 		nla_total_size(4) +
1466 		/* IFLA_GRE_OKEY */
1467 		nla_total_size(4) +
1468 		/* IFLA_GRE_LOCAL */
1469 		nla_total_size(4) +
1470 		/* IFLA_GRE_REMOTE */
1471 		nla_total_size(4) +
1472 		/* IFLA_GRE_TTL */
1473 		nla_total_size(1) +
1474 		/* IFLA_GRE_TOS */
1475 		nla_total_size(1) +
1476 		/* IFLA_GRE_PMTUDISC */
1477 		nla_total_size(1) +
1478 		/* IFLA_GRE_ENCAP_TYPE */
1479 		nla_total_size(2) +
1480 		/* IFLA_GRE_ENCAP_FLAGS */
1481 		nla_total_size(2) +
1482 		/* IFLA_GRE_ENCAP_SPORT */
1483 		nla_total_size(2) +
1484 		/* IFLA_GRE_ENCAP_DPORT */
1485 		nla_total_size(2) +
1486 		/* IFLA_GRE_COLLECT_METADATA */
1487 		nla_total_size(0) +
1488 		/* IFLA_GRE_IGNORE_DF */
1489 		nla_total_size(1) +
1490 		/* IFLA_GRE_FWMARK */
1491 		nla_total_size(4) +
1492 		/* IFLA_GRE_ERSPAN_INDEX */
1493 		nla_total_size(4) +
1494 		/* IFLA_GRE_ERSPAN_VER */
1495 		nla_total_size(1) +
1496 		/* IFLA_GRE_ERSPAN_DIR */
1497 		nla_total_size(1) +
1498 		/* IFLA_GRE_ERSPAN_HWID */
1499 		nla_total_size(2) +
1500 		0;
1501 }
1502 
ipgre_fill_info(struct sk_buff * skb,const struct net_device * dev)1503 static int ipgre_fill_info(struct sk_buff *skb, const struct net_device *dev)
1504 {
1505 	struct ip_tunnel *t = netdev_priv(dev);
1506 	struct ip_tunnel_parm *p = &t->parms;
1507 	__be16 o_flags = p->o_flags;
1508 
1509 	if (nla_put_u32(skb, IFLA_GRE_LINK, p->link) ||
1510 	    nla_put_be16(skb, IFLA_GRE_IFLAGS,
1511 			 gre_tnl_flags_to_gre_flags(p->i_flags)) ||
1512 	    nla_put_be16(skb, IFLA_GRE_OFLAGS,
1513 			 gre_tnl_flags_to_gre_flags(o_flags)) ||
1514 	    nla_put_be32(skb, IFLA_GRE_IKEY, p->i_key) ||
1515 	    nla_put_be32(skb, IFLA_GRE_OKEY, p->o_key) ||
1516 	    nla_put_in_addr(skb, IFLA_GRE_LOCAL, p->iph.saddr) ||
1517 	    nla_put_in_addr(skb, IFLA_GRE_REMOTE, p->iph.daddr) ||
1518 	    nla_put_u8(skb, IFLA_GRE_TTL, p->iph.ttl) ||
1519 	    nla_put_u8(skb, IFLA_GRE_TOS, p->iph.tos) ||
1520 	    nla_put_u8(skb, IFLA_GRE_PMTUDISC,
1521 		       !!(p->iph.frag_off & htons(IP_DF))) ||
1522 	    nla_put_u32(skb, IFLA_GRE_FWMARK, t->fwmark))
1523 		goto nla_put_failure;
1524 
1525 	if (nla_put_u16(skb, IFLA_GRE_ENCAP_TYPE,
1526 			t->encap.type) ||
1527 	    nla_put_be16(skb, IFLA_GRE_ENCAP_SPORT,
1528 			 t->encap.sport) ||
1529 	    nla_put_be16(skb, IFLA_GRE_ENCAP_DPORT,
1530 			 t->encap.dport) ||
1531 	    nla_put_u16(skb, IFLA_GRE_ENCAP_FLAGS,
1532 			t->encap.flags))
1533 		goto nla_put_failure;
1534 
1535 	if (nla_put_u8(skb, IFLA_GRE_IGNORE_DF, t->ignore_df))
1536 		goto nla_put_failure;
1537 
1538 	if (t->collect_md) {
1539 		if (nla_put_flag(skb, IFLA_GRE_COLLECT_METADATA))
1540 			goto nla_put_failure;
1541 	}
1542 
1543 	return 0;
1544 
1545 nla_put_failure:
1546 	return -EMSGSIZE;
1547 }
1548 
erspan_fill_info(struct sk_buff * skb,const struct net_device * dev)1549 static int erspan_fill_info(struct sk_buff *skb, const struct net_device *dev)
1550 {
1551 	struct ip_tunnel *t = netdev_priv(dev);
1552 
1553 	if (t->erspan_ver <= 2) {
1554 		if (t->erspan_ver != 0 && !t->collect_md)
1555 			t->parms.o_flags |= TUNNEL_KEY;
1556 
1557 		if (nla_put_u8(skb, IFLA_GRE_ERSPAN_VER, t->erspan_ver))
1558 			goto nla_put_failure;
1559 
1560 		if (t->erspan_ver == 1) {
1561 			if (nla_put_u32(skb, IFLA_GRE_ERSPAN_INDEX, t->index))
1562 				goto nla_put_failure;
1563 		} else if (t->erspan_ver == 2) {
1564 			if (nla_put_u8(skb, IFLA_GRE_ERSPAN_DIR, t->dir))
1565 				goto nla_put_failure;
1566 			if (nla_put_u16(skb, IFLA_GRE_ERSPAN_HWID, t->hwid))
1567 				goto nla_put_failure;
1568 		}
1569 	}
1570 
1571 	return ipgre_fill_info(skb, dev);
1572 
1573 nla_put_failure:
1574 	return -EMSGSIZE;
1575 }
1576 
erspan_setup(struct net_device * dev)1577 static void erspan_setup(struct net_device *dev)
1578 {
1579 	struct ip_tunnel *t = netdev_priv(dev);
1580 
1581 	ether_setup(dev);
1582 	dev->max_mtu = 0;
1583 	dev->netdev_ops = &erspan_netdev_ops;
1584 	dev->priv_flags &= ~IFF_TX_SKB_SHARING;
1585 	dev->priv_flags |= IFF_LIVE_ADDR_CHANGE;
1586 	ip_tunnel_setup(dev, erspan_net_id);
1587 	t->erspan_ver = 1;
1588 }
1589 
1590 static const struct nla_policy ipgre_policy[IFLA_GRE_MAX + 1] = {
1591 	[IFLA_GRE_LINK]		= { .type = NLA_U32 },
1592 	[IFLA_GRE_IFLAGS]	= { .type = NLA_U16 },
1593 	[IFLA_GRE_OFLAGS]	= { .type = NLA_U16 },
1594 	[IFLA_GRE_IKEY]		= { .type = NLA_U32 },
1595 	[IFLA_GRE_OKEY]		= { .type = NLA_U32 },
1596 	[IFLA_GRE_LOCAL]	= { .len = sizeof_field(struct iphdr, saddr) },
1597 	[IFLA_GRE_REMOTE]	= { .len = sizeof_field(struct iphdr, daddr) },
1598 	[IFLA_GRE_TTL]		= { .type = NLA_U8 },
1599 	[IFLA_GRE_TOS]		= { .type = NLA_U8 },
1600 	[IFLA_GRE_PMTUDISC]	= { .type = NLA_U8 },
1601 	[IFLA_GRE_ENCAP_TYPE]	= { .type = NLA_U16 },
1602 	[IFLA_GRE_ENCAP_FLAGS]	= { .type = NLA_U16 },
1603 	[IFLA_GRE_ENCAP_SPORT]	= { .type = NLA_U16 },
1604 	[IFLA_GRE_ENCAP_DPORT]	= { .type = NLA_U16 },
1605 	[IFLA_GRE_COLLECT_METADATA]	= { .type = NLA_FLAG },
1606 	[IFLA_GRE_IGNORE_DF]	= { .type = NLA_U8 },
1607 	[IFLA_GRE_FWMARK]	= { .type = NLA_U32 },
1608 	[IFLA_GRE_ERSPAN_INDEX]	= { .type = NLA_U32 },
1609 	[IFLA_GRE_ERSPAN_VER]	= { .type = NLA_U8 },
1610 	[IFLA_GRE_ERSPAN_DIR]	= { .type = NLA_U8 },
1611 	[IFLA_GRE_ERSPAN_HWID]	= { .type = NLA_U16 },
1612 };
1613 
1614 static struct rtnl_link_ops ipgre_link_ops __read_mostly = {
1615 	.kind		= "gre",
1616 	.maxtype	= IFLA_GRE_MAX,
1617 	.policy		= ipgre_policy,
1618 	.priv_size	= sizeof(struct ip_tunnel),
1619 	.setup		= ipgre_tunnel_setup,
1620 	.validate	= ipgre_tunnel_validate,
1621 	.newlink	= ipgre_newlink,
1622 	.changelink	= ipgre_changelink,
1623 	.dellink	= ip_tunnel_dellink,
1624 	.get_size	= ipgre_get_size,
1625 	.fill_info	= ipgre_fill_info,
1626 	.get_link_net	= ip_tunnel_get_link_net,
1627 };
1628 
1629 static struct rtnl_link_ops ipgre_tap_ops __read_mostly = {
1630 	.kind		= "gretap",
1631 	.maxtype	= IFLA_GRE_MAX,
1632 	.policy		= ipgre_policy,
1633 	.priv_size	= sizeof(struct ip_tunnel),
1634 	.setup		= ipgre_tap_setup,
1635 	.validate	= ipgre_tap_validate,
1636 	.newlink	= ipgre_newlink,
1637 	.changelink	= ipgre_changelink,
1638 	.dellink	= ip_tunnel_dellink,
1639 	.get_size	= ipgre_get_size,
1640 	.fill_info	= ipgre_fill_info,
1641 	.get_link_net	= ip_tunnel_get_link_net,
1642 };
1643 
1644 static struct rtnl_link_ops erspan_link_ops __read_mostly = {
1645 	.kind		= "erspan",
1646 	.maxtype	= IFLA_GRE_MAX,
1647 	.policy		= ipgre_policy,
1648 	.priv_size	= sizeof(struct ip_tunnel),
1649 	.setup		= erspan_setup,
1650 	.validate	= erspan_validate,
1651 	.newlink	= erspan_newlink,
1652 	.changelink	= erspan_changelink,
1653 	.dellink	= ip_tunnel_dellink,
1654 	.get_size	= ipgre_get_size,
1655 	.fill_info	= erspan_fill_info,
1656 	.get_link_net	= ip_tunnel_get_link_net,
1657 };
1658 
gretap_fb_dev_create(struct net * net,const char * name,u8 name_assign_type)1659 struct net_device *gretap_fb_dev_create(struct net *net, const char *name,
1660 					u8 name_assign_type)
1661 {
1662 	struct nlattr *tb[IFLA_MAX + 1];
1663 	struct net_device *dev;
1664 	LIST_HEAD(list_kill);
1665 	struct ip_tunnel *t;
1666 	int err;
1667 
1668 	memset(&tb, 0, sizeof(tb));
1669 
1670 	dev = rtnl_create_link(net, name, name_assign_type,
1671 			       &ipgre_tap_ops, tb, NULL);
1672 	if (IS_ERR(dev))
1673 		return dev;
1674 
1675 	/* Configure flow based GRE device. */
1676 	t = netdev_priv(dev);
1677 	t->collect_md = true;
1678 
1679 	err = ipgre_newlink(net, dev, tb, NULL, NULL);
1680 	if (err < 0) {
1681 		free_netdev(dev);
1682 		return ERR_PTR(err);
1683 	}
1684 
1685 	/* openvswitch users expect packet sizes to be unrestricted,
1686 	 * so set the largest MTU we can.
1687 	 */
1688 	err = __ip_tunnel_change_mtu(dev, IP_MAX_MTU, false);
1689 	if (err)
1690 		goto out;
1691 
1692 	err = rtnl_configure_link(dev, NULL, 0, NULL);
1693 	if (err < 0)
1694 		goto out;
1695 
1696 	return dev;
1697 out:
1698 	ip_tunnel_dellink(dev, &list_kill);
1699 	unregister_netdevice_many(&list_kill);
1700 	return ERR_PTR(err);
1701 }
1702 EXPORT_SYMBOL_GPL(gretap_fb_dev_create);
1703 
ipgre_tap_init_net(struct net * net)1704 static int __net_init ipgre_tap_init_net(struct net *net)
1705 {
1706 	return ip_tunnel_init_net(net, gre_tap_net_id, &ipgre_tap_ops, "gretap0");
1707 }
1708 
ipgre_tap_exit_batch_net(struct list_head * list_net)1709 static void __net_exit ipgre_tap_exit_batch_net(struct list_head *list_net)
1710 {
1711 	ip_tunnel_delete_nets(list_net, gre_tap_net_id, &ipgre_tap_ops);
1712 }
1713 
1714 static struct pernet_operations ipgre_tap_net_ops = {
1715 	.init = ipgre_tap_init_net,
1716 	.exit_batch = ipgre_tap_exit_batch_net,
1717 	.id   = &gre_tap_net_id,
1718 	.size = sizeof(struct ip_tunnel_net),
1719 };
1720 
erspan_init_net(struct net * net)1721 static int __net_init erspan_init_net(struct net *net)
1722 {
1723 	return ip_tunnel_init_net(net, erspan_net_id,
1724 				  &erspan_link_ops, "erspan0");
1725 }
1726 
erspan_exit_batch_net(struct list_head * net_list)1727 static void __net_exit erspan_exit_batch_net(struct list_head *net_list)
1728 {
1729 	ip_tunnel_delete_nets(net_list, erspan_net_id, &erspan_link_ops);
1730 }
1731 
1732 static struct pernet_operations erspan_net_ops = {
1733 	.init = erspan_init_net,
1734 	.exit_batch = erspan_exit_batch_net,
1735 	.id   = &erspan_net_id,
1736 	.size = sizeof(struct ip_tunnel_net),
1737 };
1738 
ipgre_init(void)1739 static int __init ipgre_init(void)
1740 {
1741 	int err;
1742 
1743 	pr_info("GRE over IPv4 tunneling driver\n");
1744 
1745 	err = register_pernet_device(&ipgre_net_ops);
1746 	if (err < 0)
1747 		return err;
1748 
1749 	err = register_pernet_device(&ipgre_tap_net_ops);
1750 	if (err < 0)
1751 		goto pnet_tap_failed;
1752 
1753 	err = register_pernet_device(&erspan_net_ops);
1754 	if (err < 0)
1755 		goto pnet_erspan_failed;
1756 
1757 	err = gre_add_protocol(&ipgre_protocol, GREPROTO_CISCO);
1758 	if (err < 0) {
1759 		pr_info("%s: can't add protocol\n", __func__);
1760 		goto add_proto_failed;
1761 	}
1762 
1763 	err = rtnl_link_register(&ipgre_link_ops);
1764 	if (err < 0)
1765 		goto rtnl_link_failed;
1766 
1767 	err = rtnl_link_register(&ipgre_tap_ops);
1768 	if (err < 0)
1769 		goto tap_ops_failed;
1770 
1771 	err = rtnl_link_register(&erspan_link_ops);
1772 	if (err < 0)
1773 		goto erspan_link_failed;
1774 
1775 	return 0;
1776 
1777 erspan_link_failed:
1778 	rtnl_link_unregister(&ipgre_tap_ops);
1779 tap_ops_failed:
1780 	rtnl_link_unregister(&ipgre_link_ops);
1781 rtnl_link_failed:
1782 	gre_del_protocol(&ipgre_protocol, GREPROTO_CISCO);
1783 add_proto_failed:
1784 	unregister_pernet_device(&erspan_net_ops);
1785 pnet_erspan_failed:
1786 	unregister_pernet_device(&ipgre_tap_net_ops);
1787 pnet_tap_failed:
1788 	unregister_pernet_device(&ipgre_net_ops);
1789 	return err;
1790 }
1791 
ipgre_fini(void)1792 static void __exit ipgre_fini(void)
1793 {
1794 	rtnl_link_unregister(&ipgre_tap_ops);
1795 	rtnl_link_unregister(&ipgre_link_ops);
1796 	rtnl_link_unregister(&erspan_link_ops);
1797 	gre_del_protocol(&ipgre_protocol, GREPROTO_CISCO);
1798 	unregister_pernet_device(&ipgre_tap_net_ops);
1799 	unregister_pernet_device(&ipgre_net_ops);
1800 	unregister_pernet_device(&erspan_net_ops);
1801 }
1802 
1803 module_init(ipgre_init);
1804 module_exit(ipgre_fini);
1805 MODULE_LICENSE("GPL");
1806 MODULE_ALIAS_RTNL_LINK("gre");
1807 MODULE_ALIAS_RTNL_LINK("gretap");
1808 MODULE_ALIAS_RTNL_LINK("erspan");
1809 MODULE_ALIAS_NETDEV("gre0");
1810 MODULE_ALIAS_NETDEV("gretap0");
1811 MODULE_ALIAS_NETDEV("erspan0");
1812