xref: /openbmc/linux/net/ipv4/ip_gre.c (revision c127f98ba9aba1818a6ca3a1da5a24653a10d966)
1 /*
2  *	Linux NET3:	GRE over IP protocol decoder.
3  *
4  *	Authors: Alexey Kuznetsov (kuznet@ms2.inr.ac.ru)
5  *
6  *	This program is free software; you can redistribute it and/or
7  *	modify it under the terms of the GNU General Public License
8  *	as published by the Free Software Foundation; either version
9  *	2 of the License, or (at your option) any later version.
10  *
11  */
12 
13 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
14 
15 #include <linux/capability.h>
16 #include <linux/module.h>
17 #include <linux/types.h>
18 #include <linux/kernel.h>
19 #include <linux/slab.h>
20 #include <linux/uaccess.h>
21 #include <linux/skbuff.h>
22 #include <linux/netdevice.h>
23 #include <linux/in.h>
24 #include <linux/tcp.h>
25 #include <linux/udp.h>
26 #include <linux/if_arp.h>
27 #include <linux/if_vlan.h>
28 #include <linux/init.h>
29 #include <linux/in6.h>
30 #include <linux/inetdevice.h>
31 #include <linux/igmp.h>
32 #include <linux/netfilter_ipv4.h>
33 #include <linux/etherdevice.h>
34 #include <linux/if_ether.h>
35 
36 #include <net/sock.h>
37 #include <net/ip.h>
38 #include <net/icmp.h>
39 #include <net/protocol.h>
40 #include <net/ip_tunnels.h>
41 #include <net/arp.h>
42 #include <net/checksum.h>
43 #include <net/dsfield.h>
44 #include <net/inet_ecn.h>
45 #include <net/xfrm.h>
46 #include <net/net_namespace.h>
47 #include <net/netns/generic.h>
48 #include <net/rtnetlink.h>
49 #include <net/gre.h>
50 #include <net/dst_metadata.h>
51 #include <net/erspan.h>
52 
53 /*
54    Problems & solutions
55    --------------------
56 
57    1. The most important issue is detecting local dead loops.
58    They would cause complete host lockup in transmit, which
59    would be "resolved" by stack overflow or, if queueing is enabled,
60    with infinite looping in net_bh.
61 
62    We cannot track such dead loops during route installation,
63    it is infeasible task. The most general solutions would be
64    to keep skb->encapsulation counter (sort of local ttl),
65    and silently drop packet when it expires. It is a good
66    solution, but it supposes maintaining new variable in ALL
67    skb, even if no tunneling is used.
68 
69    Current solution: xmit_recursion breaks dead loops. This is a percpu
70    counter, since when we enter the first ndo_xmit(), cpu migration is
71    forbidden. We force an exit if this counter reaches RECURSION_LIMIT
72 
73    2. Networking dead loops would not kill routers, but would really
74    kill network. IP hop limit plays role of "t->recursion" in this case,
75    if we copy it from packet being encapsulated to upper header.
76    It is very good solution, but it introduces two problems:
77 
78    - Routing protocols, using packets with ttl=1 (OSPF, RIP2),
79      do not work over tunnels.
80    - traceroute does not work. I planned to relay ICMP from tunnel,
81      so that this problem would be solved and traceroute output
82      would even more informative. This idea appeared to be wrong:
83      only Linux complies to rfc1812 now (yes, guys, Linux is the only
84      true router now :-)), all routers (at least, in neighbourhood of mine)
85      return only 8 bytes of payload. It is the end.
86 
87    Hence, if we want that OSPF worked or traceroute said something reasonable,
88    we should search for another solution.
89 
90    One of them is to parse packet trying to detect inner encapsulation
91    made by our node. It is difficult or even impossible, especially,
92    taking into account fragmentation. TO be short, ttl is not solution at all.
93 
94    Current solution: The solution was UNEXPECTEDLY SIMPLE.
95    We force DF flag on tunnels with preconfigured hop limit,
96    that is ALL. :-) Well, it does not remove the problem completely,
97    but exponential growth of network traffic is changed to linear
98    (branches, that exceed pmtu are pruned) and tunnel mtu
99    rapidly degrades to value <68, where looping stops.
100    Yes, it is not good if there exists a router in the loop,
101    which does not force DF, even when encapsulating packets have DF set.
102    But it is not our problem! Nobody could accuse us, we made
103    all that we could make. Even if it is your gated who injected
104    fatal route to network, even if it were you who configured
105    fatal static route: you are innocent. :-)
106 
107    Alexey Kuznetsov.
108  */
109 
110 static bool log_ecn_error = true;
111 module_param(log_ecn_error, bool, 0644);
112 MODULE_PARM_DESC(log_ecn_error, "Log packets received with corrupted ECN");
113 
114 static struct rtnl_link_ops ipgre_link_ops __read_mostly;
115 static int ipgre_tunnel_init(struct net_device *dev);
116 static void erspan_build_header(struct sk_buff *skb,
117 				__be32 id, u32 index, bool truncate);
118 
119 static unsigned int ipgre_net_id __read_mostly;
120 static unsigned int gre_tap_net_id __read_mostly;
121 static unsigned int erspan_net_id __read_mostly;
122 
123 static void ipgre_err(struct sk_buff *skb, u32 info,
124 		      const struct tnl_ptk_info *tpi)
125 {
126 
127 	/* All the routers (except for Linux) return only
128 	   8 bytes of packet payload. It means, that precise relaying of
129 	   ICMP in the real Internet is absolutely infeasible.
130 
131 	   Moreover, Cisco "wise men" put GRE key to the third word
132 	   in GRE header. It makes impossible maintaining even soft
133 	   state for keyed GRE tunnels with enabled checksum. Tell
134 	   them "thank you".
135 
136 	   Well, I wonder, rfc1812 was written by Cisco employee,
137 	   what the hell these idiots break standards established
138 	   by themselves???
139 	   */
140 	struct net *net = dev_net(skb->dev);
141 	struct ip_tunnel_net *itn;
142 	const struct iphdr *iph;
143 	const int type = icmp_hdr(skb)->type;
144 	const int code = icmp_hdr(skb)->code;
145 	unsigned int data_len = 0;
146 	struct ip_tunnel *t;
147 
148 	switch (type) {
149 	default:
150 	case ICMP_PARAMETERPROB:
151 		return;
152 
153 	case ICMP_DEST_UNREACH:
154 		switch (code) {
155 		case ICMP_SR_FAILED:
156 		case ICMP_PORT_UNREACH:
157 			/* Impossible event. */
158 			return;
159 		default:
160 			/* All others are translated to HOST_UNREACH.
161 			   rfc2003 contains "deep thoughts" about NET_UNREACH,
162 			   I believe they are just ether pollution. --ANK
163 			 */
164 			break;
165 		}
166 		break;
167 
168 	case ICMP_TIME_EXCEEDED:
169 		if (code != ICMP_EXC_TTL)
170 			return;
171 		data_len = icmp_hdr(skb)->un.reserved[1] * 4; /* RFC 4884 4.1 */
172 		break;
173 
174 	case ICMP_REDIRECT:
175 		break;
176 	}
177 
178 	if (tpi->proto == htons(ETH_P_TEB))
179 		itn = net_generic(net, gre_tap_net_id);
180 	else
181 		itn = net_generic(net, ipgre_net_id);
182 
183 	iph = (const struct iphdr *)(icmp_hdr(skb) + 1);
184 	t = ip_tunnel_lookup(itn, skb->dev->ifindex, tpi->flags,
185 			     iph->daddr, iph->saddr, tpi->key);
186 
187 	if (!t)
188 		return;
189 
190 #if IS_ENABLED(CONFIG_IPV6)
191        if (tpi->proto == htons(ETH_P_IPV6) &&
192            !ip6_err_gen_icmpv6_unreach(skb, iph->ihl * 4 + tpi->hdr_len,
193 				       type, data_len))
194                return;
195 #endif
196 
197 	if (t->parms.iph.daddr == 0 ||
198 	    ipv4_is_multicast(t->parms.iph.daddr))
199 		return;
200 
201 	if (t->parms.iph.ttl == 0 && type == ICMP_TIME_EXCEEDED)
202 		return;
203 
204 	if (time_before(jiffies, t->err_time + IPTUNNEL_ERR_TIMEO))
205 		t->err_count++;
206 	else
207 		t->err_count = 1;
208 	t->err_time = jiffies;
209 }
210 
211 static void gre_err(struct sk_buff *skb, u32 info)
212 {
213 	/* All the routers (except for Linux) return only
214 	 * 8 bytes of packet payload. It means, that precise relaying of
215 	 * ICMP in the real Internet is absolutely infeasible.
216 	 *
217 	 * Moreover, Cisco "wise men" put GRE key to the third word
218 	 * in GRE header. It makes impossible maintaining even soft
219 	 * state for keyed
220 	 * GRE tunnels with enabled checksum. Tell them "thank you".
221 	 *
222 	 * Well, I wonder, rfc1812 was written by Cisco employee,
223 	 * what the hell these idiots break standards established
224 	 * by themselves???
225 	 */
226 
227 	const struct iphdr *iph = (struct iphdr *)skb->data;
228 	const int type = icmp_hdr(skb)->type;
229 	const int code = icmp_hdr(skb)->code;
230 	struct tnl_ptk_info tpi;
231 	bool csum_err = false;
232 
233 	if (gre_parse_header(skb, &tpi, &csum_err, htons(ETH_P_IP),
234 			     iph->ihl * 4) < 0) {
235 		if (!csum_err)		/* ignore csum errors. */
236 			return;
237 	}
238 
239 	if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED) {
240 		ipv4_update_pmtu(skb, dev_net(skb->dev), info,
241 				 skb->dev->ifindex, 0, IPPROTO_GRE, 0);
242 		return;
243 	}
244 	if (type == ICMP_REDIRECT) {
245 		ipv4_redirect(skb, dev_net(skb->dev), skb->dev->ifindex, 0,
246 			      IPPROTO_GRE, 0);
247 		return;
248 	}
249 
250 	ipgre_err(skb, info, &tpi);
251 }
252 
253 static int erspan_rcv(struct sk_buff *skb, struct tnl_ptk_info *tpi,
254 		      int gre_hdr_len)
255 {
256 	struct net *net = dev_net(skb->dev);
257 	struct metadata_dst *tun_dst = NULL;
258 	struct ip_tunnel_net *itn;
259 	struct ip_tunnel *tunnel;
260 	struct erspanhdr *ershdr;
261 	const struct iphdr *iph;
262 	__be32 index;
263 	int len;
264 
265 	itn = net_generic(net, erspan_net_id);
266 	len = gre_hdr_len + sizeof(*ershdr);
267 
268 	if (unlikely(!pskb_may_pull(skb, len)))
269 		return PACKET_REJECT;
270 
271 	iph = ip_hdr(skb);
272 	ershdr = (struct erspanhdr *)(skb->data + gre_hdr_len);
273 
274 	/* The original GRE header does not have key field,
275 	 * Use ERSPAN 10-bit session ID as key.
276 	 */
277 	tpi->key = cpu_to_be32(ntohs(ershdr->session_id) & ID_MASK);
278 	index = ershdr->md.index;
279 	tunnel = ip_tunnel_lookup(itn, skb->dev->ifindex,
280 				  tpi->flags | TUNNEL_KEY,
281 				  iph->saddr, iph->daddr, tpi->key);
282 
283 	if (tunnel) {
284 		if (__iptunnel_pull_header(skb,
285 					   gre_hdr_len + sizeof(*ershdr),
286 					   htons(ETH_P_TEB),
287 					   false, false) < 0)
288 			goto drop;
289 
290 		if (tunnel->collect_md) {
291 			struct ip_tunnel_info *info;
292 			struct erspan_metadata *md;
293 			__be64 tun_id;
294 			__be16 flags;
295 
296 			tpi->flags |= TUNNEL_KEY;
297 			flags = tpi->flags;
298 			tun_id = key32_to_tunnel_id(tpi->key);
299 
300 			tun_dst = ip_tun_rx_dst(skb, flags,
301 						tun_id, sizeof(*md));
302 			if (!tun_dst)
303 				return PACKET_REJECT;
304 
305 			md = ip_tunnel_info_opts(&tun_dst->u.tun_info);
306 			if (!md)
307 				return PACKET_REJECT;
308 
309 			md->index = index;
310 			info = &tun_dst->u.tun_info;
311 			info->key.tun_flags |= TUNNEL_ERSPAN_OPT;
312 			info->options_len = sizeof(*md);
313 		} else {
314 			tunnel->index = ntohl(index);
315 		}
316 
317 		skb_reset_mac_header(skb);
318 		ip_tunnel_rcv(tunnel, skb, tpi, tun_dst, log_ecn_error);
319 		return PACKET_RCVD;
320 	}
321 drop:
322 	kfree_skb(skb);
323 	return PACKET_RCVD;
324 }
325 
326 static int __ipgre_rcv(struct sk_buff *skb, const struct tnl_ptk_info *tpi,
327 		       struct ip_tunnel_net *itn, int hdr_len, bool raw_proto)
328 {
329 	struct metadata_dst *tun_dst = NULL;
330 	const struct iphdr *iph;
331 	struct ip_tunnel *tunnel;
332 
333 	iph = ip_hdr(skb);
334 	tunnel = ip_tunnel_lookup(itn, skb->dev->ifindex, tpi->flags,
335 				  iph->saddr, iph->daddr, tpi->key);
336 
337 	if (tunnel) {
338 		if (__iptunnel_pull_header(skb, hdr_len, tpi->proto,
339 					   raw_proto, false) < 0)
340 			goto drop;
341 
342 		if (tunnel->dev->type != ARPHRD_NONE)
343 			skb_pop_mac_header(skb);
344 		else
345 			skb_reset_mac_header(skb);
346 		if (tunnel->collect_md) {
347 			__be16 flags;
348 			__be64 tun_id;
349 
350 			flags = tpi->flags & (TUNNEL_CSUM | TUNNEL_KEY);
351 			tun_id = key32_to_tunnel_id(tpi->key);
352 			tun_dst = ip_tun_rx_dst(skb, flags, tun_id, 0);
353 			if (!tun_dst)
354 				return PACKET_REJECT;
355 		}
356 
357 		ip_tunnel_rcv(tunnel, skb, tpi, tun_dst, log_ecn_error);
358 		return PACKET_RCVD;
359 	}
360 	return PACKET_NEXT;
361 
362 drop:
363 	kfree_skb(skb);
364 	return PACKET_RCVD;
365 }
366 
367 static int ipgre_rcv(struct sk_buff *skb, const struct tnl_ptk_info *tpi,
368 		     int hdr_len)
369 {
370 	struct net *net = dev_net(skb->dev);
371 	struct ip_tunnel_net *itn;
372 	int res;
373 
374 	if (tpi->proto == htons(ETH_P_TEB))
375 		itn = net_generic(net, gre_tap_net_id);
376 	else
377 		itn = net_generic(net, ipgre_net_id);
378 
379 	res = __ipgre_rcv(skb, tpi, itn, hdr_len, false);
380 	if (res == PACKET_NEXT && tpi->proto == htons(ETH_P_TEB)) {
381 		/* ipgre tunnels in collect metadata mode should receive
382 		 * also ETH_P_TEB traffic.
383 		 */
384 		itn = net_generic(net, ipgre_net_id);
385 		res = __ipgre_rcv(skb, tpi, itn, hdr_len, true);
386 	}
387 	return res;
388 }
389 
390 static int gre_rcv(struct sk_buff *skb)
391 {
392 	struct tnl_ptk_info tpi;
393 	bool csum_err = false;
394 	int hdr_len;
395 
396 #ifdef CONFIG_NET_IPGRE_BROADCAST
397 	if (ipv4_is_multicast(ip_hdr(skb)->daddr)) {
398 		/* Looped back packet, drop it! */
399 		if (rt_is_output_route(skb_rtable(skb)))
400 			goto drop;
401 	}
402 #endif
403 
404 	hdr_len = gre_parse_header(skb, &tpi, &csum_err, htons(ETH_P_IP), 0);
405 	if (hdr_len < 0)
406 		goto drop;
407 
408 	if (unlikely(tpi.proto == htons(ETH_P_ERSPAN))) {
409 		if (erspan_rcv(skb, &tpi, hdr_len) == PACKET_RCVD)
410 			return 0;
411 	}
412 
413 	if (ipgre_rcv(skb, &tpi, hdr_len) == PACKET_RCVD)
414 		return 0;
415 
416 	icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0);
417 drop:
418 	kfree_skb(skb);
419 	return 0;
420 }
421 
422 static void __gre_xmit(struct sk_buff *skb, struct net_device *dev,
423 		       const struct iphdr *tnl_params,
424 		       __be16 proto)
425 {
426 	struct ip_tunnel *tunnel = netdev_priv(dev);
427 
428 	if (tunnel->parms.o_flags & TUNNEL_SEQ)
429 		tunnel->o_seqno++;
430 
431 	/* Push GRE header. */
432 	gre_build_header(skb, tunnel->tun_hlen,
433 			 tunnel->parms.o_flags, proto, tunnel->parms.o_key,
434 			 htonl(tunnel->o_seqno));
435 
436 	ip_tunnel_xmit(skb, dev, tnl_params, tnl_params->protocol);
437 }
438 
439 static int gre_handle_offloads(struct sk_buff *skb, bool csum)
440 {
441 	return iptunnel_handle_offloads(skb, csum ? SKB_GSO_GRE_CSUM : SKB_GSO_GRE);
442 }
443 
444 static struct rtable *gre_get_rt(struct sk_buff *skb,
445 				 struct net_device *dev,
446 				 struct flowi4 *fl,
447 				 const struct ip_tunnel_key *key)
448 {
449 	struct net *net = dev_net(dev);
450 
451 	memset(fl, 0, sizeof(*fl));
452 	fl->daddr = key->u.ipv4.dst;
453 	fl->saddr = key->u.ipv4.src;
454 	fl->flowi4_tos = RT_TOS(key->tos);
455 	fl->flowi4_mark = skb->mark;
456 	fl->flowi4_proto = IPPROTO_GRE;
457 
458 	return ip_route_output_key(net, fl);
459 }
460 
461 static struct rtable *prepare_fb_xmit(struct sk_buff *skb,
462 				      struct net_device *dev,
463 				      struct flowi4 *fl,
464 				      int tunnel_hlen)
465 {
466 	struct ip_tunnel_info *tun_info;
467 	const struct ip_tunnel_key *key;
468 	struct rtable *rt = NULL;
469 	int min_headroom;
470 	bool use_cache;
471 	int err;
472 
473 	tun_info = skb_tunnel_info(skb);
474 	key = &tun_info->key;
475 	use_cache = ip_tunnel_dst_cache_usable(skb, tun_info);
476 
477 	if (use_cache)
478 		rt = dst_cache_get_ip4(&tun_info->dst_cache, &fl->saddr);
479 	if (!rt) {
480 		rt = gre_get_rt(skb, dev, fl, key);
481 		if (IS_ERR(rt))
482 			goto err_free_skb;
483 		if (use_cache)
484 			dst_cache_set_ip4(&tun_info->dst_cache, &rt->dst,
485 					  fl->saddr);
486 	}
487 
488 	min_headroom = LL_RESERVED_SPACE(rt->dst.dev) + rt->dst.header_len
489 			+ tunnel_hlen + sizeof(struct iphdr);
490 	if (skb_headroom(skb) < min_headroom || skb_header_cloned(skb)) {
491 		int head_delta = SKB_DATA_ALIGN(min_headroom -
492 						skb_headroom(skb) +
493 						16);
494 		err = pskb_expand_head(skb, max_t(int, head_delta, 0),
495 				       0, GFP_ATOMIC);
496 		if (unlikely(err))
497 			goto err_free_rt;
498 	}
499 	return rt;
500 
501 err_free_rt:
502 	ip_rt_put(rt);
503 err_free_skb:
504 	kfree_skb(skb);
505 	dev->stats.tx_dropped++;
506 	return NULL;
507 }
508 
509 static void gre_fb_xmit(struct sk_buff *skb, struct net_device *dev,
510 			__be16 proto)
511 {
512 	struct ip_tunnel_info *tun_info;
513 	const struct ip_tunnel_key *key;
514 	struct rtable *rt = NULL;
515 	struct flowi4 fl;
516 	int tunnel_hlen;
517 	__be16 df, flags;
518 
519 	tun_info = skb_tunnel_info(skb);
520 	if (unlikely(!tun_info || !(tun_info->mode & IP_TUNNEL_INFO_TX) ||
521 		     ip_tunnel_info_af(tun_info) != AF_INET))
522 		goto err_free_skb;
523 
524 	key = &tun_info->key;
525 	tunnel_hlen = gre_calc_hlen(key->tun_flags);
526 
527 	rt = prepare_fb_xmit(skb, dev, &fl, tunnel_hlen);
528 	if (!rt)
529 		return;
530 
531 	/* Push Tunnel header. */
532 	if (gre_handle_offloads(skb, !!(tun_info->key.tun_flags & TUNNEL_CSUM)))
533 		goto err_free_rt;
534 
535 	flags = tun_info->key.tun_flags & (TUNNEL_CSUM | TUNNEL_KEY);
536 	gre_build_header(skb, tunnel_hlen, flags, proto,
537 			 tunnel_id_to_key32(tun_info->key.tun_id), 0);
538 
539 	df = key->tun_flags & TUNNEL_DONT_FRAGMENT ?  htons(IP_DF) : 0;
540 
541 	iptunnel_xmit(skb->sk, rt, skb, fl.saddr, key->u.ipv4.dst, IPPROTO_GRE,
542 		      key->tos, key->ttl, df, false);
543 	return;
544 
545 err_free_rt:
546 	ip_rt_put(rt);
547 err_free_skb:
548 	kfree_skb(skb);
549 	dev->stats.tx_dropped++;
550 }
551 
552 static void erspan_fb_xmit(struct sk_buff *skb, struct net_device *dev,
553 			   __be16 proto)
554 {
555 	struct ip_tunnel *tunnel = netdev_priv(dev);
556 	struct ip_tunnel_info *tun_info;
557 	const struct ip_tunnel_key *key;
558 	struct erspan_metadata *md;
559 	struct rtable *rt = NULL;
560 	bool truncate = false;
561 	struct flowi4 fl;
562 	int tunnel_hlen;
563 	__be16 df;
564 
565 	tun_info = skb_tunnel_info(skb);
566 	if (unlikely(!tun_info || !(tun_info->mode & IP_TUNNEL_INFO_TX) ||
567 		     ip_tunnel_info_af(tun_info) != AF_INET))
568 		goto err_free_skb;
569 
570 	key = &tun_info->key;
571 
572 	/* ERSPAN has fixed 8 byte GRE header */
573 	tunnel_hlen = 8 + sizeof(struct erspanhdr);
574 
575 	rt = prepare_fb_xmit(skb, dev, &fl, tunnel_hlen);
576 	if (!rt)
577 		return;
578 
579 	if (gre_handle_offloads(skb, false))
580 		goto err_free_rt;
581 
582 	if (skb->len > dev->mtu + dev->hard_header_len) {
583 		pskb_trim(skb, dev->mtu + dev->hard_header_len);
584 		truncate = true;
585 	}
586 
587 	md = ip_tunnel_info_opts(tun_info);
588 	if (!md)
589 		goto err_free_rt;
590 
591 	erspan_build_header(skb, tunnel_id_to_key32(key->tun_id),
592 			    ntohl(md->index), truncate);
593 
594 	gre_build_header(skb, 8, TUNNEL_SEQ,
595 			 htons(ETH_P_ERSPAN), 0, htonl(tunnel->o_seqno++));
596 
597 	df = key->tun_flags & TUNNEL_DONT_FRAGMENT ?  htons(IP_DF) : 0;
598 
599 	iptunnel_xmit(skb->sk, rt, skb, fl.saddr, key->u.ipv4.dst, IPPROTO_GRE,
600 		      key->tos, key->ttl, df, false);
601 	return;
602 
603 err_free_rt:
604 	ip_rt_put(rt);
605 err_free_skb:
606 	kfree_skb(skb);
607 	dev->stats.tx_dropped++;
608 }
609 
610 static int gre_fill_metadata_dst(struct net_device *dev, struct sk_buff *skb)
611 {
612 	struct ip_tunnel_info *info = skb_tunnel_info(skb);
613 	struct rtable *rt;
614 	struct flowi4 fl4;
615 
616 	if (ip_tunnel_info_af(info) != AF_INET)
617 		return -EINVAL;
618 
619 	rt = gre_get_rt(skb, dev, &fl4, &info->key);
620 	if (IS_ERR(rt))
621 		return PTR_ERR(rt);
622 
623 	ip_rt_put(rt);
624 	info->key.u.ipv4.src = fl4.saddr;
625 	return 0;
626 }
627 
628 static netdev_tx_t ipgre_xmit(struct sk_buff *skb,
629 			      struct net_device *dev)
630 {
631 	struct ip_tunnel *tunnel = netdev_priv(dev);
632 	const struct iphdr *tnl_params;
633 
634 	if (tunnel->collect_md) {
635 		gre_fb_xmit(skb, dev, skb->protocol);
636 		return NETDEV_TX_OK;
637 	}
638 
639 	if (dev->header_ops) {
640 		/* Need space for new headers */
641 		if (skb_cow_head(skb, dev->needed_headroom -
642 				      (tunnel->hlen + sizeof(struct iphdr))))
643 			goto free_skb;
644 
645 		tnl_params = (const struct iphdr *)skb->data;
646 
647 		/* Pull skb since ip_tunnel_xmit() needs skb->data pointing
648 		 * to gre header.
649 		 */
650 		skb_pull(skb, tunnel->hlen + sizeof(struct iphdr));
651 		skb_reset_mac_header(skb);
652 	} else {
653 		if (skb_cow_head(skb, dev->needed_headroom))
654 			goto free_skb;
655 
656 		tnl_params = &tunnel->parms.iph;
657 	}
658 
659 	if (gre_handle_offloads(skb, !!(tunnel->parms.o_flags & TUNNEL_CSUM)))
660 		goto free_skb;
661 
662 	__gre_xmit(skb, dev, tnl_params, skb->protocol);
663 	return NETDEV_TX_OK;
664 
665 free_skb:
666 	kfree_skb(skb);
667 	dev->stats.tx_dropped++;
668 	return NETDEV_TX_OK;
669 }
670 
671 static inline u8 tos_to_cos(u8 tos)
672 {
673 	u8 dscp, cos;
674 
675 	dscp = tos >> 2;
676 	cos = dscp >> 3;
677 	return cos;
678 }
679 
680 static void erspan_build_header(struct sk_buff *skb,
681 				__be32 id, u32 index, bool truncate)
682 {
683 	struct iphdr *iphdr = ip_hdr(skb);
684 	struct ethhdr *eth = eth_hdr(skb);
685 	enum erspan_encap_type enc_type;
686 	struct erspanhdr *ershdr;
687 	struct qtag_prefix {
688 		__be16 eth_type;
689 		__be16 tci;
690 	} *qp;
691 	u16 vlan_tci = 0;
692 
693 	enc_type = ERSPAN_ENCAP_NOVLAN;
694 
695 	/* If mirrored packet has vlan tag, extract tci and
696 	 *  perserve vlan header in the mirrored frame.
697 	 */
698 	if (eth->h_proto == htons(ETH_P_8021Q)) {
699 		qp = (struct qtag_prefix *)(skb->data + 2 * ETH_ALEN);
700 		vlan_tci = ntohs(qp->tci);
701 		enc_type = ERSPAN_ENCAP_INFRAME;
702 	}
703 
704 	skb_push(skb, sizeof(*ershdr));
705 	ershdr = (struct erspanhdr *)skb->data;
706 	memset(ershdr, 0, sizeof(*ershdr));
707 
708 	ershdr->ver_vlan = htons((vlan_tci & VLAN_MASK) |
709 				 (ERSPAN_VERSION << VER_OFFSET));
710 	ershdr->session_id = htons((u16)(ntohl(id) & ID_MASK) |
711 			   ((tos_to_cos(iphdr->tos) << COS_OFFSET) & COS_MASK) |
712 			   (enc_type << EN_OFFSET & EN_MASK) |
713 			   ((truncate << T_OFFSET) & T_MASK));
714 	ershdr->md.index = htonl(index & INDEX_MASK);
715 }
716 
717 static netdev_tx_t erspan_xmit(struct sk_buff *skb,
718 			       struct net_device *dev)
719 {
720 	struct ip_tunnel *tunnel = netdev_priv(dev);
721 	bool truncate = false;
722 
723 	if (tunnel->collect_md) {
724 		erspan_fb_xmit(skb, dev, skb->protocol);
725 		return NETDEV_TX_OK;
726 	}
727 
728 	if (gre_handle_offloads(skb, false))
729 		goto free_skb;
730 
731 	if (skb_cow_head(skb, dev->needed_headroom))
732 		goto free_skb;
733 
734 	if (skb->len > dev->mtu + dev->hard_header_len) {
735 		pskb_trim(skb, dev->mtu + dev->hard_header_len);
736 		truncate = true;
737 	}
738 
739 	/* Push ERSPAN header */
740 	erspan_build_header(skb, tunnel->parms.o_key, tunnel->index, truncate);
741 	tunnel->parms.o_flags &= ~TUNNEL_KEY;
742 	__gre_xmit(skb, dev, &tunnel->parms.iph, htons(ETH_P_ERSPAN));
743 	return NETDEV_TX_OK;
744 
745 free_skb:
746 	kfree_skb(skb);
747 	dev->stats.tx_dropped++;
748 	return NETDEV_TX_OK;
749 }
750 
751 static netdev_tx_t gre_tap_xmit(struct sk_buff *skb,
752 				struct net_device *dev)
753 {
754 	struct ip_tunnel *tunnel = netdev_priv(dev);
755 
756 	if (tunnel->collect_md) {
757 		gre_fb_xmit(skb, dev, htons(ETH_P_TEB));
758 		return NETDEV_TX_OK;
759 	}
760 
761 	if (gre_handle_offloads(skb, !!(tunnel->parms.o_flags & TUNNEL_CSUM)))
762 		goto free_skb;
763 
764 	if (skb_cow_head(skb, dev->needed_headroom))
765 		goto free_skb;
766 
767 	__gre_xmit(skb, dev, &tunnel->parms.iph, htons(ETH_P_TEB));
768 	return NETDEV_TX_OK;
769 
770 free_skb:
771 	kfree_skb(skb);
772 	dev->stats.tx_dropped++;
773 	return NETDEV_TX_OK;
774 }
775 
776 static void ipgre_link_update(struct net_device *dev, bool set_mtu)
777 {
778 	struct ip_tunnel *tunnel = netdev_priv(dev);
779 	int len;
780 
781 	len = tunnel->tun_hlen;
782 	tunnel->tun_hlen = gre_calc_hlen(tunnel->parms.o_flags);
783 	len = tunnel->tun_hlen - len;
784 	tunnel->hlen = tunnel->hlen + len;
785 
786 	dev->needed_headroom = dev->needed_headroom + len;
787 	if (set_mtu)
788 		dev->mtu = max_t(int, dev->mtu - len, 68);
789 
790 	if (!(tunnel->parms.o_flags & TUNNEL_SEQ)) {
791 		if (!(tunnel->parms.o_flags & TUNNEL_CSUM) ||
792 		    tunnel->encap.type == TUNNEL_ENCAP_NONE) {
793 			dev->features |= NETIF_F_GSO_SOFTWARE;
794 			dev->hw_features |= NETIF_F_GSO_SOFTWARE;
795 		}
796 		dev->features |= NETIF_F_LLTX;
797 	}
798 }
799 
800 static int ipgre_tunnel_ioctl(struct net_device *dev,
801 			      struct ifreq *ifr, int cmd)
802 {
803 	struct ip_tunnel_parm p;
804 	int err;
805 
806 	if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
807 		return -EFAULT;
808 
809 	if (cmd == SIOCADDTUNNEL || cmd == SIOCCHGTUNNEL) {
810 		if (p.iph.version != 4 || p.iph.protocol != IPPROTO_GRE ||
811 		    p.iph.ihl != 5 || (p.iph.frag_off & htons(~IP_DF)) ||
812 		    ((p.i_flags | p.o_flags) & (GRE_VERSION | GRE_ROUTING)))
813 			return -EINVAL;
814 	}
815 
816 	p.i_flags = gre_flags_to_tnl_flags(p.i_flags);
817 	p.o_flags = gre_flags_to_tnl_flags(p.o_flags);
818 
819 	err = ip_tunnel_ioctl(dev, &p, cmd);
820 	if (err)
821 		return err;
822 
823 	if (cmd == SIOCCHGTUNNEL) {
824 		struct ip_tunnel *t = netdev_priv(dev);
825 
826 		t->parms.i_flags = p.i_flags;
827 		t->parms.o_flags = p.o_flags;
828 
829 		if (strcmp(dev->rtnl_link_ops->kind, "erspan"))
830 			ipgre_link_update(dev, true);
831 	}
832 
833 	p.i_flags = gre_tnl_flags_to_gre_flags(p.i_flags);
834 	p.o_flags = gre_tnl_flags_to_gre_flags(p.o_flags);
835 
836 	if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p)))
837 		return -EFAULT;
838 
839 	return 0;
840 }
841 
842 /* Nice toy. Unfortunately, useless in real life :-)
843    It allows to construct virtual multiprotocol broadcast "LAN"
844    over the Internet, provided multicast routing is tuned.
845 
846 
847    I have no idea was this bicycle invented before me,
848    so that I had to set ARPHRD_IPGRE to a random value.
849    I have an impression, that Cisco could make something similar,
850    but this feature is apparently missing in IOS<=11.2(8).
851 
852    I set up 10.66.66/24 and fec0:6666:6666::0/96 as virtual networks
853    with broadcast 224.66.66.66. If you have access to mbone, play with me :-)
854 
855    ping -t 255 224.66.66.66
856 
857    If nobody answers, mbone does not work.
858 
859    ip tunnel add Universe mode gre remote 224.66.66.66 local <Your_real_addr> ttl 255
860    ip addr add 10.66.66.<somewhat>/24 dev Universe
861    ifconfig Universe up
862    ifconfig Universe add fe80::<Your_real_addr>/10
863    ifconfig Universe add fec0:6666:6666::<Your_real_addr>/96
864    ftp 10.66.66.66
865    ...
866    ftp fec0:6666:6666::193.233.7.65
867    ...
868  */
869 static int ipgre_header(struct sk_buff *skb, struct net_device *dev,
870 			unsigned short type,
871 			const void *daddr, const void *saddr, unsigned int len)
872 {
873 	struct ip_tunnel *t = netdev_priv(dev);
874 	struct iphdr *iph;
875 	struct gre_base_hdr *greh;
876 
877 	iph = skb_push(skb, t->hlen + sizeof(*iph));
878 	greh = (struct gre_base_hdr *)(iph+1);
879 	greh->flags = gre_tnl_flags_to_gre_flags(t->parms.o_flags);
880 	greh->protocol = htons(type);
881 
882 	memcpy(iph, &t->parms.iph, sizeof(struct iphdr));
883 
884 	/* Set the source hardware address. */
885 	if (saddr)
886 		memcpy(&iph->saddr, saddr, 4);
887 	if (daddr)
888 		memcpy(&iph->daddr, daddr, 4);
889 	if (iph->daddr)
890 		return t->hlen + sizeof(*iph);
891 
892 	return -(t->hlen + sizeof(*iph));
893 }
894 
895 static int ipgre_header_parse(const struct sk_buff *skb, unsigned char *haddr)
896 {
897 	const struct iphdr *iph = (const struct iphdr *) skb_mac_header(skb);
898 	memcpy(haddr, &iph->saddr, 4);
899 	return 4;
900 }
901 
902 static const struct header_ops ipgre_header_ops = {
903 	.create	= ipgre_header,
904 	.parse	= ipgre_header_parse,
905 };
906 
907 #ifdef CONFIG_NET_IPGRE_BROADCAST
908 static int ipgre_open(struct net_device *dev)
909 {
910 	struct ip_tunnel *t = netdev_priv(dev);
911 
912 	if (ipv4_is_multicast(t->parms.iph.daddr)) {
913 		struct flowi4 fl4;
914 		struct rtable *rt;
915 
916 		rt = ip_route_output_gre(t->net, &fl4,
917 					 t->parms.iph.daddr,
918 					 t->parms.iph.saddr,
919 					 t->parms.o_key,
920 					 RT_TOS(t->parms.iph.tos),
921 					 t->parms.link);
922 		if (IS_ERR(rt))
923 			return -EADDRNOTAVAIL;
924 		dev = rt->dst.dev;
925 		ip_rt_put(rt);
926 		if (!__in_dev_get_rtnl(dev))
927 			return -EADDRNOTAVAIL;
928 		t->mlink = dev->ifindex;
929 		ip_mc_inc_group(__in_dev_get_rtnl(dev), t->parms.iph.daddr);
930 	}
931 	return 0;
932 }
933 
934 static int ipgre_close(struct net_device *dev)
935 {
936 	struct ip_tunnel *t = netdev_priv(dev);
937 
938 	if (ipv4_is_multicast(t->parms.iph.daddr) && t->mlink) {
939 		struct in_device *in_dev;
940 		in_dev = inetdev_by_index(t->net, t->mlink);
941 		if (in_dev)
942 			ip_mc_dec_group(in_dev, t->parms.iph.daddr);
943 	}
944 	return 0;
945 }
946 #endif
947 
948 static const struct net_device_ops ipgre_netdev_ops = {
949 	.ndo_init		= ipgre_tunnel_init,
950 	.ndo_uninit		= ip_tunnel_uninit,
951 #ifdef CONFIG_NET_IPGRE_BROADCAST
952 	.ndo_open		= ipgre_open,
953 	.ndo_stop		= ipgre_close,
954 #endif
955 	.ndo_start_xmit		= ipgre_xmit,
956 	.ndo_do_ioctl		= ipgre_tunnel_ioctl,
957 	.ndo_change_mtu		= ip_tunnel_change_mtu,
958 	.ndo_get_stats64	= ip_tunnel_get_stats64,
959 	.ndo_get_iflink		= ip_tunnel_get_iflink,
960 };
961 
962 #define GRE_FEATURES (NETIF_F_SG |		\
963 		      NETIF_F_FRAGLIST |	\
964 		      NETIF_F_HIGHDMA |		\
965 		      NETIF_F_HW_CSUM)
966 
967 static void ipgre_tunnel_setup(struct net_device *dev)
968 {
969 	dev->netdev_ops		= &ipgre_netdev_ops;
970 	dev->type		= ARPHRD_IPGRE;
971 	ip_tunnel_setup(dev, ipgre_net_id);
972 }
973 
974 static void __gre_tunnel_init(struct net_device *dev)
975 {
976 	struct ip_tunnel *tunnel;
977 	int t_hlen;
978 
979 	tunnel = netdev_priv(dev);
980 	tunnel->tun_hlen = gre_calc_hlen(tunnel->parms.o_flags);
981 	tunnel->parms.iph.protocol = IPPROTO_GRE;
982 
983 	tunnel->hlen = tunnel->tun_hlen + tunnel->encap_hlen;
984 
985 	t_hlen = tunnel->hlen + sizeof(struct iphdr);
986 
987 	dev->needed_headroom	= LL_MAX_HEADER + t_hlen + 4;
988 	dev->mtu		= ETH_DATA_LEN - t_hlen - 4;
989 
990 	dev->features		|= GRE_FEATURES;
991 	dev->hw_features	|= GRE_FEATURES;
992 
993 	if (!(tunnel->parms.o_flags & TUNNEL_SEQ)) {
994 		/* TCP offload with GRE SEQ is not supported, nor
995 		 * can we support 2 levels of outer headers requiring
996 		 * an update.
997 		 */
998 		if (!(tunnel->parms.o_flags & TUNNEL_CSUM) ||
999 		    (tunnel->encap.type == TUNNEL_ENCAP_NONE)) {
1000 			dev->features    |= NETIF_F_GSO_SOFTWARE;
1001 			dev->hw_features |= NETIF_F_GSO_SOFTWARE;
1002 		}
1003 
1004 		/* Can use a lockless transmit, unless we generate
1005 		 * output sequences
1006 		 */
1007 		dev->features |= NETIF_F_LLTX;
1008 	}
1009 }
1010 
1011 static int ipgre_tunnel_init(struct net_device *dev)
1012 {
1013 	struct ip_tunnel *tunnel = netdev_priv(dev);
1014 	struct iphdr *iph = &tunnel->parms.iph;
1015 
1016 	__gre_tunnel_init(dev);
1017 
1018 	memcpy(dev->dev_addr, &iph->saddr, 4);
1019 	memcpy(dev->broadcast, &iph->daddr, 4);
1020 
1021 	dev->flags		= IFF_NOARP;
1022 	netif_keep_dst(dev);
1023 	dev->addr_len		= 4;
1024 
1025 	if (iph->daddr && !tunnel->collect_md) {
1026 #ifdef CONFIG_NET_IPGRE_BROADCAST
1027 		if (ipv4_is_multicast(iph->daddr)) {
1028 			if (!iph->saddr)
1029 				return -EINVAL;
1030 			dev->flags = IFF_BROADCAST;
1031 			dev->header_ops = &ipgre_header_ops;
1032 		}
1033 #endif
1034 	} else if (!tunnel->collect_md) {
1035 		dev->header_ops = &ipgre_header_ops;
1036 	}
1037 
1038 	return ip_tunnel_init(dev);
1039 }
1040 
1041 static const struct gre_protocol ipgre_protocol = {
1042 	.handler     = gre_rcv,
1043 	.err_handler = gre_err,
1044 };
1045 
1046 static int __net_init ipgre_init_net(struct net *net)
1047 {
1048 	return ip_tunnel_init_net(net, ipgre_net_id, &ipgre_link_ops, NULL);
1049 }
1050 
1051 static void __net_exit ipgre_exit_batch_net(struct list_head *list_net)
1052 {
1053 	ip_tunnel_delete_nets(list_net, ipgre_net_id, &ipgre_link_ops);
1054 }
1055 
1056 static struct pernet_operations ipgre_net_ops = {
1057 	.init = ipgre_init_net,
1058 	.exit_batch = ipgre_exit_batch_net,
1059 	.id   = &ipgre_net_id,
1060 	.size = sizeof(struct ip_tunnel_net),
1061 };
1062 
1063 static int ipgre_tunnel_validate(struct nlattr *tb[], struct nlattr *data[],
1064 				 struct netlink_ext_ack *extack)
1065 {
1066 	__be16 flags;
1067 
1068 	if (!data)
1069 		return 0;
1070 
1071 	flags = 0;
1072 	if (data[IFLA_GRE_IFLAGS])
1073 		flags |= nla_get_be16(data[IFLA_GRE_IFLAGS]);
1074 	if (data[IFLA_GRE_OFLAGS])
1075 		flags |= nla_get_be16(data[IFLA_GRE_OFLAGS]);
1076 	if (flags & (GRE_VERSION|GRE_ROUTING))
1077 		return -EINVAL;
1078 
1079 	if (data[IFLA_GRE_COLLECT_METADATA] &&
1080 	    data[IFLA_GRE_ENCAP_TYPE] &&
1081 	    nla_get_u16(data[IFLA_GRE_ENCAP_TYPE]) != TUNNEL_ENCAP_NONE)
1082 		return -EINVAL;
1083 
1084 	return 0;
1085 }
1086 
1087 static int ipgre_tap_validate(struct nlattr *tb[], struct nlattr *data[],
1088 			      struct netlink_ext_ack *extack)
1089 {
1090 	__be32 daddr;
1091 
1092 	if (tb[IFLA_ADDRESS]) {
1093 		if (nla_len(tb[IFLA_ADDRESS]) != ETH_ALEN)
1094 			return -EINVAL;
1095 		if (!is_valid_ether_addr(nla_data(tb[IFLA_ADDRESS])))
1096 			return -EADDRNOTAVAIL;
1097 	}
1098 
1099 	if (!data)
1100 		goto out;
1101 
1102 	if (data[IFLA_GRE_REMOTE]) {
1103 		memcpy(&daddr, nla_data(data[IFLA_GRE_REMOTE]), 4);
1104 		if (!daddr)
1105 			return -EINVAL;
1106 	}
1107 
1108 out:
1109 	return ipgre_tunnel_validate(tb, data, extack);
1110 }
1111 
1112 static int erspan_validate(struct nlattr *tb[], struct nlattr *data[],
1113 			   struct netlink_ext_ack *extack)
1114 {
1115 	__be16 flags = 0;
1116 	int ret;
1117 
1118 	if (!data)
1119 		return 0;
1120 
1121 	ret = ipgre_tap_validate(tb, data, extack);
1122 	if (ret)
1123 		return ret;
1124 
1125 	/* ERSPAN should only have GRE sequence and key flag */
1126 	if (data[IFLA_GRE_OFLAGS])
1127 		flags |= nla_get_be16(data[IFLA_GRE_OFLAGS]);
1128 	if (data[IFLA_GRE_IFLAGS])
1129 		flags |= nla_get_be16(data[IFLA_GRE_IFLAGS]);
1130 	if (!data[IFLA_GRE_COLLECT_METADATA] &&
1131 	    flags != (GRE_SEQ | GRE_KEY))
1132 		return -EINVAL;
1133 
1134 	/* ERSPAN Session ID only has 10-bit. Since we reuse
1135 	 * 32-bit key field as ID, check it's range.
1136 	 */
1137 	if (data[IFLA_GRE_IKEY] &&
1138 	    (ntohl(nla_get_be32(data[IFLA_GRE_IKEY])) & ~ID_MASK))
1139 		return -EINVAL;
1140 
1141 	if (data[IFLA_GRE_OKEY] &&
1142 	    (ntohl(nla_get_be32(data[IFLA_GRE_OKEY])) & ~ID_MASK))
1143 		return -EINVAL;
1144 
1145 	return 0;
1146 }
1147 
1148 static int ipgre_netlink_parms(struct net_device *dev,
1149 				struct nlattr *data[],
1150 				struct nlattr *tb[],
1151 				struct ip_tunnel_parm *parms,
1152 				__u32 *fwmark)
1153 {
1154 	struct ip_tunnel *t = netdev_priv(dev);
1155 
1156 	memset(parms, 0, sizeof(*parms));
1157 
1158 	parms->iph.protocol = IPPROTO_GRE;
1159 
1160 	if (!data)
1161 		return 0;
1162 
1163 	if (data[IFLA_GRE_LINK])
1164 		parms->link = nla_get_u32(data[IFLA_GRE_LINK]);
1165 
1166 	if (data[IFLA_GRE_IFLAGS])
1167 		parms->i_flags = gre_flags_to_tnl_flags(nla_get_be16(data[IFLA_GRE_IFLAGS]));
1168 
1169 	if (data[IFLA_GRE_OFLAGS])
1170 		parms->o_flags = gre_flags_to_tnl_flags(nla_get_be16(data[IFLA_GRE_OFLAGS]));
1171 
1172 	if (data[IFLA_GRE_IKEY])
1173 		parms->i_key = nla_get_be32(data[IFLA_GRE_IKEY]);
1174 
1175 	if (data[IFLA_GRE_OKEY])
1176 		parms->o_key = nla_get_be32(data[IFLA_GRE_OKEY]);
1177 
1178 	if (data[IFLA_GRE_LOCAL])
1179 		parms->iph.saddr = nla_get_in_addr(data[IFLA_GRE_LOCAL]);
1180 
1181 	if (data[IFLA_GRE_REMOTE])
1182 		parms->iph.daddr = nla_get_in_addr(data[IFLA_GRE_REMOTE]);
1183 
1184 	if (data[IFLA_GRE_TTL])
1185 		parms->iph.ttl = nla_get_u8(data[IFLA_GRE_TTL]);
1186 
1187 	if (data[IFLA_GRE_TOS])
1188 		parms->iph.tos = nla_get_u8(data[IFLA_GRE_TOS]);
1189 
1190 	if (!data[IFLA_GRE_PMTUDISC] || nla_get_u8(data[IFLA_GRE_PMTUDISC])) {
1191 		if (t->ignore_df)
1192 			return -EINVAL;
1193 		parms->iph.frag_off = htons(IP_DF);
1194 	}
1195 
1196 	if (data[IFLA_GRE_COLLECT_METADATA]) {
1197 		t->collect_md = true;
1198 		if (dev->type == ARPHRD_IPGRE)
1199 			dev->type = ARPHRD_NONE;
1200 	}
1201 
1202 	if (data[IFLA_GRE_IGNORE_DF]) {
1203 		if (nla_get_u8(data[IFLA_GRE_IGNORE_DF])
1204 		  && (parms->iph.frag_off & htons(IP_DF)))
1205 			return -EINVAL;
1206 		t->ignore_df = !!nla_get_u8(data[IFLA_GRE_IGNORE_DF]);
1207 	}
1208 
1209 	if (data[IFLA_GRE_FWMARK])
1210 		*fwmark = nla_get_u32(data[IFLA_GRE_FWMARK]);
1211 
1212 	if (data[IFLA_GRE_ERSPAN_INDEX]) {
1213 		t->index = nla_get_u32(data[IFLA_GRE_ERSPAN_INDEX]);
1214 
1215 		if (t->index & ~INDEX_MASK)
1216 			return -EINVAL;
1217 	}
1218 
1219 	return 0;
1220 }
1221 
1222 /* This function returns true when ENCAP attributes are present in the nl msg */
1223 static bool ipgre_netlink_encap_parms(struct nlattr *data[],
1224 				      struct ip_tunnel_encap *ipencap)
1225 {
1226 	bool ret = false;
1227 
1228 	memset(ipencap, 0, sizeof(*ipencap));
1229 
1230 	if (!data)
1231 		return ret;
1232 
1233 	if (data[IFLA_GRE_ENCAP_TYPE]) {
1234 		ret = true;
1235 		ipencap->type = nla_get_u16(data[IFLA_GRE_ENCAP_TYPE]);
1236 	}
1237 
1238 	if (data[IFLA_GRE_ENCAP_FLAGS]) {
1239 		ret = true;
1240 		ipencap->flags = nla_get_u16(data[IFLA_GRE_ENCAP_FLAGS]);
1241 	}
1242 
1243 	if (data[IFLA_GRE_ENCAP_SPORT]) {
1244 		ret = true;
1245 		ipencap->sport = nla_get_be16(data[IFLA_GRE_ENCAP_SPORT]);
1246 	}
1247 
1248 	if (data[IFLA_GRE_ENCAP_DPORT]) {
1249 		ret = true;
1250 		ipencap->dport = nla_get_be16(data[IFLA_GRE_ENCAP_DPORT]);
1251 	}
1252 
1253 	return ret;
1254 }
1255 
1256 static int gre_tap_init(struct net_device *dev)
1257 {
1258 	__gre_tunnel_init(dev);
1259 	dev->priv_flags |= IFF_LIVE_ADDR_CHANGE;
1260 	netif_keep_dst(dev);
1261 
1262 	return ip_tunnel_init(dev);
1263 }
1264 
1265 static const struct net_device_ops gre_tap_netdev_ops = {
1266 	.ndo_init		= gre_tap_init,
1267 	.ndo_uninit		= ip_tunnel_uninit,
1268 	.ndo_start_xmit		= gre_tap_xmit,
1269 	.ndo_set_mac_address 	= eth_mac_addr,
1270 	.ndo_validate_addr	= eth_validate_addr,
1271 	.ndo_change_mtu		= ip_tunnel_change_mtu,
1272 	.ndo_get_stats64	= ip_tunnel_get_stats64,
1273 	.ndo_get_iflink		= ip_tunnel_get_iflink,
1274 	.ndo_fill_metadata_dst	= gre_fill_metadata_dst,
1275 };
1276 
1277 static int erspan_tunnel_init(struct net_device *dev)
1278 {
1279 	struct ip_tunnel *tunnel = netdev_priv(dev);
1280 	int t_hlen;
1281 
1282 	tunnel->tun_hlen = 8;
1283 	tunnel->parms.iph.protocol = IPPROTO_GRE;
1284 	tunnel->hlen = tunnel->tun_hlen + tunnel->encap_hlen +
1285 		       sizeof(struct erspanhdr);
1286 	t_hlen = tunnel->hlen + sizeof(struct iphdr);
1287 
1288 	dev->needed_headroom = LL_MAX_HEADER + t_hlen + 4;
1289 	dev->mtu = ETH_DATA_LEN - t_hlen - 4;
1290 	dev->features		|= GRE_FEATURES;
1291 	dev->hw_features	|= GRE_FEATURES;
1292 	dev->priv_flags		|= IFF_LIVE_ADDR_CHANGE;
1293 	netif_keep_dst(dev);
1294 
1295 	return ip_tunnel_init(dev);
1296 }
1297 
1298 static const struct net_device_ops erspan_netdev_ops = {
1299 	.ndo_init		= erspan_tunnel_init,
1300 	.ndo_uninit		= ip_tunnel_uninit,
1301 	.ndo_start_xmit		= erspan_xmit,
1302 	.ndo_set_mac_address	= eth_mac_addr,
1303 	.ndo_validate_addr	= eth_validate_addr,
1304 	.ndo_change_mtu		= ip_tunnel_change_mtu,
1305 	.ndo_get_stats64	= ip_tunnel_get_stats64,
1306 	.ndo_get_iflink		= ip_tunnel_get_iflink,
1307 	.ndo_fill_metadata_dst	= gre_fill_metadata_dst,
1308 };
1309 
1310 static void ipgre_tap_setup(struct net_device *dev)
1311 {
1312 	ether_setup(dev);
1313 	dev->max_mtu = 0;
1314 	dev->netdev_ops	= &gre_tap_netdev_ops;
1315 	dev->priv_flags &= ~IFF_TX_SKB_SHARING;
1316 	dev->priv_flags	|= IFF_LIVE_ADDR_CHANGE;
1317 	ip_tunnel_setup(dev, gre_tap_net_id);
1318 }
1319 
1320 static int ipgre_newlink(struct net *src_net, struct net_device *dev,
1321 			 struct nlattr *tb[], struct nlattr *data[],
1322 			 struct netlink_ext_ack *extack)
1323 {
1324 	struct ip_tunnel_parm p;
1325 	struct ip_tunnel_encap ipencap;
1326 	__u32 fwmark = 0;
1327 	int err;
1328 
1329 	if (ipgre_netlink_encap_parms(data, &ipencap)) {
1330 		struct ip_tunnel *t = netdev_priv(dev);
1331 		err = ip_tunnel_encap_setup(t, &ipencap);
1332 
1333 		if (err < 0)
1334 			return err;
1335 	}
1336 
1337 	err = ipgre_netlink_parms(dev, data, tb, &p, &fwmark);
1338 	if (err < 0)
1339 		return err;
1340 	return ip_tunnel_newlink(dev, tb, &p, fwmark);
1341 }
1342 
1343 static int ipgre_changelink(struct net_device *dev, struct nlattr *tb[],
1344 			    struct nlattr *data[],
1345 			    struct netlink_ext_ack *extack)
1346 {
1347 	struct ip_tunnel *t = netdev_priv(dev);
1348 	struct ip_tunnel_encap ipencap;
1349 	__u32 fwmark = t->fwmark;
1350 	struct ip_tunnel_parm p;
1351 	int err;
1352 
1353 	if (ipgre_netlink_encap_parms(data, &ipencap)) {
1354 		err = ip_tunnel_encap_setup(t, &ipencap);
1355 
1356 		if (err < 0)
1357 			return err;
1358 	}
1359 
1360 	err = ipgre_netlink_parms(dev, data, tb, &p, &fwmark);
1361 	if (err < 0)
1362 		return err;
1363 
1364 	err = ip_tunnel_changelink(dev, tb, &p, fwmark);
1365 	if (err < 0)
1366 		return err;
1367 
1368 	t->parms.i_flags = p.i_flags;
1369 	t->parms.o_flags = p.o_flags;
1370 
1371 	if (strcmp(dev->rtnl_link_ops->kind, "erspan"))
1372 		ipgre_link_update(dev, !tb[IFLA_MTU]);
1373 
1374 	return 0;
1375 }
1376 
1377 static size_t ipgre_get_size(const struct net_device *dev)
1378 {
1379 	return
1380 		/* IFLA_GRE_LINK */
1381 		nla_total_size(4) +
1382 		/* IFLA_GRE_IFLAGS */
1383 		nla_total_size(2) +
1384 		/* IFLA_GRE_OFLAGS */
1385 		nla_total_size(2) +
1386 		/* IFLA_GRE_IKEY */
1387 		nla_total_size(4) +
1388 		/* IFLA_GRE_OKEY */
1389 		nla_total_size(4) +
1390 		/* IFLA_GRE_LOCAL */
1391 		nla_total_size(4) +
1392 		/* IFLA_GRE_REMOTE */
1393 		nla_total_size(4) +
1394 		/* IFLA_GRE_TTL */
1395 		nla_total_size(1) +
1396 		/* IFLA_GRE_TOS */
1397 		nla_total_size(1) +
1398 		/* IFLA_GRE_PMTUDISC */
1399 		nla_total_size(1) +
1400 		/* IFLA_GRE_ENCAP_TYPE */
1401 		nla_total_size(2) +
1402 		/* IFLA_GRE_ENCAP_FLAGS */
1403 		nla_total_size(2) +
1404 		/* IFLA_GRE_ENCAP_SPORT */
1405 		nla_total_size(2) +
1406 		/* IFLA_GRE_ENCAP_DPORT */
1407 		nla_total_size(2) +
1408 		/* IFLA_GRE_COLLECT_METADATA */
1409 		nla_total_size(0) +
1410 		/* IFLA_GRE_IGNORE_DF */
1411 		nla_total_size(1) +
1412 		/* IFLA_GRE_FWMARK */
1413 		nla_total_size(4) +
1414 		/* IFLA_GRE_ERSPAN_INDEX */
1415 		nla_total_size(4) +
1416 		0;
1417 }
1418 
1419 static int ipgre_fill_info(struct sk_buff *skb, const struct net_device *dev)
1420 {
1421 	struct ip_tunnel *t = netdev_priv(dev);
1422 	struct ip_tunnel_parm *p = &t->parms;
1423 
1424 	if (nla_put_u32(skb, IFLA_GRE_LINK, p->link) ||
1425 	    nla_put_be16(skb, IFLA_GRE_IFLAGS,
1426 			 gre_tnl_flags_to_gre_flags(p->i_flags)) ||
1427 	    nla_put_be16(skb, IFLA_GRE_OFLAGS,
1428 			 gre_tnl_flags_to_gre_flags(p->o_flags)) ||
1429 	    nla_put_be32(skb, IFLA_GRE_IKEY, p->i_key) ||
1430 	    nla_put_be32(skb, IFLA_GRE_OKEY, p->o_key) ||
1431 	    nla_put_in_addr(skb, IFLA_GRE_LOCAL, p->iph.saddr) ||
1432 	    nla_put_in_addr(skb, IFLA_GRE_REMOTE, p->iph.daddr) ||
1433 	    nla_put_u8(skb, IFLA_GRE_TTL, p->iph.ttl) ||
1434 	    nla_put_u8(skb, IFLA_GRE_TOS, p->iph.tos) ||
1435 	    nla_put_u8(skb, IFLA_GRE_PMTUDISC,
1436 		       !!(p->iph.frag_off & htons(IP_DF))) ||
1437 	    nla_put_u32(skb, IFLA_GRE_FWMARK, t->fwmark))
1438 		goto nla_put_failure;
1439 
1440 	if (nla_put_u16(skb, IFLA_GRE_ENCAP_TYPE,
1441 			t->encap.type) ||
1442 	    nla_put_be16(skb, IFLA_GRE_ENCAP_SPORT,
1443 			 t->encap.sport) ||
1444 	    nla_put_be16(skb, IFLA_GRE_ENCAP_DPORT,
1445 			 t->encap.dport) ||
1446 	    nla_put_u16(skb, IFLA_GRE_ENCAP_FLAGS,
1447 			t->encap.flags))
1448 		goto nla_put_failure;
1449 
1450 	if (nla_put_u8(skb, IFLA_GRE_IGNORE_DF, t->ignore_df))
1451 		goto nla_put_failure;
1452 
1453 	if (t->collect_md) {
1454 		if (nla_put_flag(skb, IFLA_GRE_COLLECT_METADATA))
1455 			goto nla_put_failure;
1456 	}
1457 
1458 	if (t->index)
1459 		if (nla_put_u32(skb, IFLA_GRE_ERSPAN_INDEX, t->index))
1460 			goto nla_put_failure;
1461 
1462 	return 0;
1463 
1464 nla_put_failure:
1465 	return -EMSGSIZE;
1466 }
1467 
1468 static void erspan_setup(struct net_device *dev)
1469 {
1470 	ether_setup(dev);
1471 	dev->netdev_ops = &erspan_netdev_ops;
1472 	dev->priv_flags &= ~IFF_TX_SKB_SHARING;
1473 	dev->priv_flags |= IFF_LIVE_ADDR_CHANGE;
1474 	ip_tunnel_setup(dev, erspan_net_id);
1475 }
1476 
1477 static const struct nla_policy ipgre_policy[IFLA_GRE_MAX + 1] = {
1478 	[IFLA_GRE_LINK]		= { .type = NLA_U32 },
1479 	[IFLA_GRE_IFLAGS]	= { .type = NLA_U16 },
1480 	[IFLA_GRE_OFLAGS]	= { .type = NLA_U16 },
1481 	[IFLA_GRE_IKEY]		= { .type = NLA_U32 },
1482 	[IFLA_GRE_OKEY]		= { .type = NLA_U32 },
1483 	[IFLA_GRE_LOCAL]	= { .len = FIELD_SIZEOF(struct iphdr, saddr) },
1484 	[IFLA_GRE_REMOTE]	= { .len = FIELD_SIZEOF(struct iphdr, daddr) },
1485 	[IFLA_GRE_TTL]		= { .type = NLA_U8 },
1486 	[IFLA_GRE_TOS]		= { .type = NLA_U8 },
1487 	[IFLA_GRE_PMTUDISC]	= { .type = NLA_U8 },
1488 	[IFLA_GRE_ENCAP_TYPE]	= { .type = NLA_U16 },
1489 	[IFLA_GRE_ENCAP_FLAGS]	= { .type = NLA_U16 },
1490 	[IFLA_GRE_ENCAP_SPORT]	= { .type = NLA_U16 },
1491 	[IFLA_GRE_ENCAP_DPORT]	= { .type = NLA_U16 },
1492 	[IFLA_GRE_COLLECT_METADATA]	= { .type = NLA_FLAG },
1493 	[IFLA_GRE_IGNORE_DF]	= { .type = NLA_U8 },
1494 	[IFLA_GRE_FWMARK]	= { .type = NLA_U32 },
1495 	[IFLA_GRE_ERSPAN_INDEX]	= { .type = NLA_U32 },
1496 };
1497 
1498 static struct rtnl_link_ops ipgre_link_ops __read_mostly = {
1499 	.kind		= "gre",
1500 	.maxtype	= IFLA_GRE_MAX,
1501 	.policy		= ipgre_policy,
1502 	.priv_size	= sizeof(struct ip_tunnel),
1503 	.setup		= ipgre_tunnel_setup,
1504 	.validate	= ipgre_tunnel_validate,
1505 	.newlink	= ipgre_newlink,
1506 	.changelink	= ipgre_changelink,
1507 	.dellink	= ip_tunnel_dellink,
1508 	.get_size	= ipgre_get_size,
1509 	.fill_info	= ipgre_fill_info,
1510 	.get_link_net	= ip_tunnel_get_link_net,
1511 };
1512 
1513 static struct rtnl_link_ops ipgre_tap_ops __read_mostly = {
1514 	.kind		= "gretap",
1515 	.maxtype	= IFLA_GRE_MAX,
1516 	.policy		= ipgre_policy,
1517 	.priv_size	= sizeof(struct ip_tunnel),
1518 	.setup		= ipgre_tap_setup,
1519 	.validate	= ipgre_tap_validate,
1520 	.newlink	= ipgre_newlink,
1521 	.changelink	= ipgre_changelink,
1522 	.dellink	= ip_tunnel_dellink,
1523 	.get_size	= ipgre_get_size,
1524 	.fill_info	= ipgre_fill_info,
1525 	.get_link_net	= ip_tunnel_get_link_net,
1526 };
1527 
1528 static struct rtnl_link_ops erspan_link_ops __read_mostly = {
1529 	.kind		= "erspan",
1530 	.maxtype	= IFLA_GRE_MAX,
1531 	.policy		= ipgre_policy,
1532 	.priv_size	= sizeof(struct ip_tunnel),
1533 	.setup		= erspan_setup,
1534 	.validate	= erspan_validate,
1535 	.newlink	= ipgre_newlink,
1536 	.changelink	= ipgre_changelink,
1537 	.dellink	= ip_tunnel_dellink,
1538 	.get_size	= ipgre_get_size,
1539 	.fill_info	= ipgre_fill_info,
1540 	.get_link_net	= ip_tunnel_get_link_net,
1541 };
1542 
1543 struct net_device *gretap_fb_dev_create(struct net *net, const char *name,
1544 					u8 name_assign_type)
1545 {
1546 	struct nlattr *tb[IFLA_MAX + 1];
1547 	struct net_device *dev;
1548 	LIST_HEAD(list_kill);
1549 	struct ip_tunnel *t;
1550 	int err;
1551 
1552 	memset(&tb, 0, sizeof(tb));
1553 
1554 	dev = rtnl_create_link(net, name, name_assign_type,
1555 			       &ipgre_tap_ops, tb);
1556 	if (IS_ERR(dev))
1557 		return dev;
1558 
1559 	/* Configure flow based GRE device. */
1560 	t = netdev_priv(dev);
1561 	t->collect_md = true;
1562 
1563 	err = ipgre_newlink(net, dev, tb, NULL, NULL);
1564 	if (err < 0) {
1565 		free_netdev(dev);
1566 		return ERR_PTR(err);
1567 	}
1568 
1569 	/* openvswitch users expect packet sizes to be unrestricted,
1570 	 * so set the largest MTU we can.
1571 	 */
1572 	err = __ip_tunnel_change_mtu(dev, IP_MAX_MTU, false);
1573 	if (err)
1574 		goto out;
1575 
1576 	err = rtnl_configure_link(dev, NULL);
1577 	if (err < 0)
1578 		goto out;
1579 
1580 	return dev;
1581 out:
1582 	ip_tunnel_dellink(dev, &list_kill);
1583 	unregister_netdevice_many(&list_kill);
1584 	return ERR_PTR(err);
1585 }
1586 EXPORT_SYMBOL_GPL(gretap_fb_dev_create);
1587 
1588 static int __net_init ipgre_tap_init_net(struct net *net)
1589 {
1590 	return ip_tunnel_init_net(net, gre_tap_net_id, &ipgre_tap_ops, "gretap0");
1591 }
1592 
1593 static void __net_exit ipgre_tap_exit_batch_net(struct list_head *list_net)
1594 {
1595 	ip_tunnel_delete_nets(list_net, gre_tap_net_id, &ipgre_tap_ops);
1596 }
1597 
1598 static struct pernet_operations ipgre_tap_net_ops = {
1599 	.init = ipgre_tap_init_net,
1600 	.exit_batch = ipgre_tap_exit_batch_net,
1601 	.id   = &gre_tap_net_id,
1602 	.size = sizeof(struct ip_tunnel_net),
1603 };
1604 
1605 static int __net_init erspan_init_net(struct net *net)
1606 {
1607 	return ip_tunnel_init_net(net, erspan_net_id,
1608 				  &erspan_link_ops, "erspan0");
1609 }
1610 
1611 static void __net_exit erspan_exit_batch_net(struct list_head *net_list)
1612 {
1613 	ip_tunnel_delete_nets(net_list, erspan_net_id, &erspan_link_ops);
1614 }
1615 
1616 static struct pernet_operations erspan_net_ops = {
1617 	.init = erspan_init_net,
1618 	.exit_batch = erspan_exit_batch_net,
1619 	.id   = &erspan_net_id,
1620 	.size = sizeof(struct ip_tunnel_net),
1621 };
1622 
1623 static int __init ipgre_init(void)
1624 {
1625 	int err;
1626 
1627 	pr_info("GRE over IPv4 tunneling driver\n");
1628 
1629 	err = register_pernet_device(&ipgre_net_ops);
1630 	if (err < 0)
1631 		return err;
1632 
1633 	err = register_pernet_device(&ipgre_tap_net_ops);
1634 	if (err < 0)
1635 		goto pnet_tap_failed;
1636 
1637 	err = register_pernet_device(&erspan_net_ops);
1638 	if (err < 0)
1639 		goto pnet_erspan_failed;
1640 
1641 	err = gre_add_protocol(&ipgre_protocol, GREPROTO_CISCO);
1642 	if (err < 0) {
1643 		pr_info("%s: can't add protocol\n", __func__);
1644 		goto add_proto_failed;
1645 	}
1646 
1647 	err = rtnl_link_register(&ipgre_link_ops);
1648 	if (err < 0)
1649 		goto rtnl_link_failed;
1650 
1651 	err = rtnl_link_register(&ipgre_tap_ops);
1652 	if (err < 0)
1653 		goto tap_ops_failed;
1654 
1655 	err = rtnl_link_register(&erspan_link_ops);
1656 	if (err < 0)
1657 		goto erspan_link_failed;
1658 
1659 	return 0;
1660 
1661 erspan_link_failed:
1662 	rtnl_link_unregister(&ipgre_tap_ops);
1663 tap_ops_failed:
1664 	rtnl_link_unregister(&ipgre_link_ops);
1665 rtnl_link_failed:
1666 	gre_del_protocol(&ipgre_protocol, GREPROTO_CISCO);
1667 add_proto_failed:
1668 	unregister_pernet_device(&erspan_net_ops);
1669 pnet_erspan_failed:
1670 	unregister_pernet_device(&ipgre_tap_net_ops);
1671 pnet_tap_failed:
1672 	unregister_pernet_device(&ipgre_net_ops);
1673 	return err;
1674 }
1675 
1676 static void __exit ipgre_fini(void)
1677 {
1678 	rtnl_link_unregister(&ipgre_tap_ops);
1679 	rtnl_link_unregister(&ipgre_link_ops);
1680 	rtnl_link_unregister(&erspan_link_ops);
1681 	gre_del_protocol(&ipgre_protocol, GREPROTO_CISCO);
1682 	unregister_pernet_device(&ipgre_tap_net_ops);
1683 	unregister_pernet_device(&ipgre_net_ops);
1684 	unregister_pernet_device(&erspan_net_ops);
1685 }
1686 
1687 module_init(ipgre_init);
1688 module_exit(ipgre_fini);
1689 MODULE_LICENSE("GPL");
1690 MODULE_ALIAS_RTNL_LINK("gre");
1691 MODULE_ALIAS_RTNL_LINK("gretap");
1692 MODULE_ALIAS_RTNL_LINK("erspan");
1693 MODULE_ALIAS_NETDEV("gre0");
1694 MODULE_ALIAS_NETDEV("gretap0");
1695 MODULE_ALIAS_NETDEV("erspan0");
1696