xref: /openbmc/linux/net/ipv4/ip_gre.c (revision ca55b2fef3a9373fcfc30f82fd26bc7fccbda732)
1 /*
2  *	Linux NET3:	GRE over IP protocol decoder.
3  *
4  *	Authors: Alexey Kuznetsov (kuznet@ms2.inr.ac.ru)
5  *
6  *	This program is free software; you can redistribute it and/or
7  *	modify it under the terms of the GNU General Public License
8  *	as published by the Free Software Foundation; either version
9  *	2 of the License, or (at your option) any later version.
10  *
11  */
12 
13 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
14 
15 #include <linux/capability.h>
16 #include <linux/module.h>
17 #include <linux/types.h>
18 #include <linux/kernel.h>
19 #include <linux/slab.h>
20 #include <asm/uaccess.h>
21 #include <linux/skbuff.h>
22 #include <linux/netdevice.h>
23 #include <linux/in.h>
24 #include <linux/tcp.h>
25 #include <linux/udp.h>
26 #include <linux/if_arp.h>
27 #include <linux/mroute.h>
28 #include <linux/if_vlan.h>
29 #include <linux/init.h>
30 #include <linux/in6.h>
31 #include <linux/inetdevice.h>
32 #include <linux/igmp.h>
33 #include <linux/netfilter_ipv4.h>
34 #include <linux/etherdevice.h>
35 #include <linux/if_ether.h>
36 
37 #include <net/sock.h>
38 #include <net/ip.h>
39 #include <net/icmp.h>
40 #include <net/protocol.h>
41 #include <net/ip_tunnels.h>
42 #include <net/arp.h>
43 #include <net/checksum.h>
44 #include <net/dsfield.h>
45 #include <net/inet_ecn.h>
46 #include <net/xfrm.h>
47 #include <net/net_namespace.h>
48 #include <net/netns/generic.h>
49 #include <net/rtnetlink.h>
50 #include <net/gre.h>
51 #include <net/dst_metadata.h>
52 
53 #if IS_ENABLED(CONFIG_IPV6)
54 #include <net/ipv6.h>
55 #include <net/ip6_fib.h>
56 #include <net/ip6_route.h>
57 #endif
58 
59 /*
60    Problems & solutions
61    --------------------
62 
63    1. The most important issue is detecting local dead loops.
64    They would cause complete host lockup in transmit, which
65    would be "resolved" by stack overflow or, if queueing is enabled,
66    with infinite looping in net_bh.
67 
68    We cannot track such dead loops during route installation,
69    it is infeasible task. The most general solutions would be
70    to keep skb->encapsulation counter (sort of local ttl),
71    and silently drop packet when it expires. It is a good
72    solution, but it supposes maintaining new variable in ALL
73    skb, even if no tunneling is used.
74 
75    Current solution: xmit_recursion breaks dead loops. This is a percpu
76    counter, since when we enter the first ndo_xmit(), cpu migration is
77    forbidden. We force an exit if this counter reaches RECURSION_LIMIT
78 
79    2. Networking dead loops would not kill routers, but would really
80    kill network. IP hop limit plays role of "t->recursion" in this case,
81    if we copy it from packet being encapsulated to upper header.
82    It is very good solution, but it introduces two problems:
83 
84    - Routing protocols, using packets with ttl=1 (OSPF, RIP2),
85      do not work over tunnels.
86    - traceroute does not work. I planned to relay ICMP from tunnel,
87      so that this problem would be solved and traceroute output
88      would even more informative. This idea appeared to be wrong:
89      only Linux complies to rfc1812 now (yes, guys, Linux is the only
90      true router now :-)), all routers (at least, in neighbourhood of mine)
91      return only 8 bytes of payload. It is the end.
92 
93    Hence, if we want that OSPF worked or traceroute said something reasonable,
94    we should search for another solution.
95 
96    One of them is to parse packet trying to detect inner encapsulation
97    made by our node. It is difficult or even impossible, especially,
98    taking into account fragmentation. TO be short, ttl is not solution at all.
99 
100    Current solution: The solution was UNEXPECTEDLY SIMPLE.
101    We force DF flag on tunnels with preconfigured hop limit,
102    that is ALL. :-) Well, it does not remove the problem completely,
103    but exponential growth of network traffic is changed to linear
104    (branches, that exceed pmtu are pruned) and tunnel mtu
105    rapidly degrades to value <68, where looping stops.
106    Yes, it is not good if there exists a router in the loop,
107    which does not force DF, even when encapsulating packets have DF set.
108    But it is not our problem! Nobody could accuse us, we made
109    all that we could make. Even if it is your gated who injected
110    fatal route to network, even if it were you who configured
111    fatal static route: you are innocent. :-)
112 
113    Alexey Kuznetsov.
114  */
115 
116 static bool log_ecn_error = true;
117 module_param(log_ecn_error, bool, 0644);
118 MODULE_PARM_DESC(log_ecn_error, "Log packets received with corrupted ECN");
119 
120 static struct rtnl_link_ops ipgre_link_ops __read_mostly;
121 static int ipgre_tunnel_init(struct net_device *dev);
122 
123 static int ipgre_net_id __read_mostly;
124 static int gre_tap_net_id __read_mostly;
125 
126 static int ip_gre_calc_hlen(__be16 o_flags)
127 {
128 	int addend = 4;
129 
130 	if (o_flags & TUNNEL_CSUM)
131 		addend += 4;
132 	if (o_flags & TUNNEL_KEY)
133 		addend += 4;
134 	if (o_flags & TUNNEL_SEQ)
135 		addend += 4;
136 	return addend;
137 }
138 
139 static __be16 gre_flags_to_tnl_flags(__be16 flags)
140 {
141 	__be16 tflags = 0;
142 
143 	if (flags & GRE_CSUM)
144 		tflags |= TUNNEL_CSUM;
145 	if (flags & GRE_ROUTING)
146 		tflags |= TUNNEL_ROUTING;
147 	if (flags & GRE_KEY)
148 		tflags |= TUNNEL_KEY;
149 	if (flags & GRE_SEQ)
150 		tflags |= TUNNEL_SEQ;
151 	if (flags & GRE_STRICT)
152 		tflags |= TUNNEL_STRICT;
153 	if (flags & GRE_REC)
154 		tflags |= TUNNEL_REC;
155 	if (flags & GRE_VERSION)
156 		tflags |= TUNNEL_VERSION;
157 
158 	return tflags;
159 }
160 
161 static __be16 tnl_flags_to_gre_flags(__be16 tflags)
162 {
163 	__be16 flags = 0;
164 
165 	if (tflags & TUNNEL_CSUM)
166 		flags |= GRE_CSUM;
167 	if (tflags & TUNNEL_ROUTING)
168 		flags |= GRE_ROUTING;
169 	if (tflags & TUNNEL_KEY)
170 		flags |= GRE_KEY;
171 	if (tflags & TUNNEL_SEQ)
172 		flags |= GRE_SEQ;
173 	if (tflags & TUNNEL_STRICT)
174 		flags |= GRE_STRICT;
175 	if (tflags & TUNNEL_REC)
176 		flags |= GRE_REC;
177 	if (tflags & TUNNEL_VERSION)
178 		flags |= GRE_VERSION;
179 
180 	return flags;
181 }
182 
183 static int parse_gre_header(struct sk_buff *skb, struct tnl_ptk_info *tpi,
184 			    bool *csum_err)
185 {
186 	const struct gre_base_hdr *greh;
187 	__be32 *options;
188 	int hdr_len;
189 
190 	if (unlikely(!pskb_may_pull(skb, sizeof(struct gre_base_hdr))))
191 		return -EINVAL;
192 
193 	greh = (struct gre_base_hdr *)skb_transport_header(skb);
194 	if (unlikely(greh->flags & (GRE_VERSION | GRE_ROUTING)))
195 		return -EINVAL;
196 
197 	tpi->flags = gre_flags_to_tnl_flags(greh->flags);
198 	hdr_len = ip_gre_calc_hlen(tpi->flags);
199 
200 	if (!pskb_may_pull(skb, hdr_len))
201 		return -EINVAL;
202 
203 	greh = (struct gre_base_hdr *)skb_transport_header(skb);
204 	tpi->proto = greh->protocol;
205 
206 	options = (__be32 *)(greh + 1);
207 	if (greh->flags & GRE_CSUM) {
208 		if (skb_checksum_simple_validate(skb)) {
209 			*csum_err = true;
210 			return -EINVAL;
211 		}
212 
213 		skb_checksum_try_convert(skb, IPPROTO_GRE, 0,
214 					 null_compute_pseudo);
215 		options++;
216 	}
217 
218 	if (greh->flags & GRE_KEY) {
219 		tpi->key = *options;
220 		options++;
221 	} else {
222 		tpi->key = 0;
223 	}
224 	if (unlikely(greh->flags & GRE_SEQ)) {
225 		tpi->seq = *options;
226 		options++;
227 	} else {
228 		tpi->seq = 0;
229 	}
230 	/* WCCP version 1 and 2 protocol decoding.
231 	 * - Change protocol to IP
232 	 * - When dealing with WCCPv2, Skip extra 4 bytes in GRE header
233 	 */
234 	if (greh->flags == 0 && tpi->proto == htons(ETH_P_WCCP)) {
235 		tpi->proto = htons(ETH_P_IP);
236 		if ((*(u8 *)options & 0xF0) != 0x40) {
237 			hdr_len += 4;
238 			if (!pskb_may_pull(skb, hdr_len))
239 				return -EINVAL;
240 		}
241 	}
242 	return iptunnel_pull_header(skb, hdr_len, tpi->proto);
243 }
244 
245 static void ipgre_err(struct sk_buff *skb, u32 info,
246 		      const struct tnl_ptk_info *tpi)
247 {
248 
249 	/* All the routers (except for Linux) return only
250 	   8 bytes of packet payload. It means, that precise relaying of
251 	   ICMP in the real Internet is absolutely infeasible.
252 
253 	   Moreover, Cisco "wise men" put GRE key to the third word
254 	   in GRE header. It makes impossible maintaining even soft
255 	   state for keyed GRE tunnels with enabled checksum. Tell
256 	   them "thank you".
257 
258 	   Well, I wonder, rfc1812 was written by Cisco employee,
259 	   what the hell these idiots break standards established
260 	   by themselves???
261 	   */
262 	struct net *net = dev_net(skb->dev);
263 	struct ip_tunnel_net *itn;
264 	const struct iphdr *iph;
265 	const int type = icmp_hdr(skb)->type;
266 	const int code = icmp_hdr(skb)->code;
267 	struct ip_tunnel *t;
268 
269 	switch (type) {
270 	default:
271 	case ICMP_PARAMETERPROB:
272 		return;
273 
274 	case ICMP_DEST_UNREACH:
275 		switch (code) {
276 		case ICMP_SR_FAILED:
277 		case ICMP_PORT_UNREACH:
278 			/* Impossible event. */
279 			return;
280 		default:
281 			/* All others are translated to HOST_UNREACH.
282 			   rfc2003 contains "deep thoughts" about NET_UNREACH,
283 			   I believe they are just ether pollution. --ANK
284 			 */
285 			break;
286 		}
287 		break;
288 
289 	case ICMP_TIME_EXCEEDED:
290 		if (code != ICMP_EXC_TTL)
291 			return;
292 		break;
293 
294 	case ICMP_REDIRECT:
295 		break;
296 	}
297 
298 	if (tpi->proto == htons(ETH_P_TEB))
299 		itn = net_generic(net, gre_tap_net_id);
300 	else
301 		itn = net_generic(net, ipgre_net_id);
302 
303 	iph = (const struct iphdr *)(icmp_hdr(skb) + 1);
304 	t = ip_tunnel_lookup(itn, skb->dev->ifindex, tpi->flags,
305 			     iph->daddr, iph->saddr, tpi->key);
306 
307 	if (!t)
308 		return;
309 
310 	if (t->parms.iph.daddr == 0 ||
311 	    ipv4_is_multicast(t->parms.iph.daddr))
312 		return;
313 
314 	if (t->parms.iph.ttl == 0 && type == ICMP_TIME_EXCEEDED)
315 		return;
316 
317 	if (time_before(jiffies, t->err_time + IPTUNNEL_ERR_TIMEO))
318 		t->err_count++;
319 	else
320 		t->err_count = 1;
321 	t->err_time = jiffies;
322 }
323 
324 static void gre_err(struct sk_buff *skb, u32 info)
325 {
326 	/* All the routers (except for Linux) return only
327 	 * 8 bytes of packet payload. It means, that precise relaying of
328 	 * ICMP in the real Internet is absolutely infeasible.
329 	 *
330 	 * Moreover, Cisco "wise men" put GRE key to the third word
331 	 * in GRE header. It makes impossible maintaining even soft
332 	 * state for keyed
333 	 * GRE tunnels with enabled checksum. Tell them "thank you".
334 	 *
335 	 * Well, I wonder, rfc1812 was written by Cisco employee,
336 	 * what the hell these idiots break standards established
337 	 * by themselves???
338 	 */
339 
340 	const int type = icmp_hdr(skb)->type;
341 	const int code = icmp_hdr(skb)->code;
342 	struct tnl_ptk_info tpi;
343 	bool csum_err = false;
344 
345 	if (parse_gre_header(skb, &tpi, &csum_err)) {
346 		if (!csum_err)		/* ignore csum errors. */
347 			return;
348 	}
349 
350 	if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED) {
351 		ipv4_update_pmtu(skb, dev_net(skb->dev), info,
352 				 skb->dev->ifindex, 0, IPPROTO_GRE, 0);
353 		return;
354 	}
355 	if (type == ICMP_REDIRECT) {
356 		ipv4_redirect(skb, dev_net(skb->dev), skb->dev->ifindex, 0,
357 			      IPPROTO_GRE, 0);
358 		return;
359 	}
360 
361 	ipgre_err(skb, info, &tpi);
362 }
363 
364 static __be64 key_to_tunnel_id(__be32 key)
365 {
366 #ifdef __BIG_ENDIAN
367 	return (__force __be64)((__force u32)key);
368 #else
369 	return (__force __be64)((__force u64)key << 32);
370 #endif
371 }
372 
373 /* Returns the least-significant 32 bits of a __be64. */
374 static __be32 tunnel_id_to_key(__be64 x)
375 {
376 #ifdef __BIG_ENDIAN
377 	return (__force __be32)x;
378 #else
379 	return (__force __be32)((__force u64)x >> 32);
380 #endif
381 }
382 
383 static int ipgre_rcv(struct sk_buff *skb, const struct tnl_ptk_info *tpi)
384 {
385 	struct net *net = dev_net(skb->dev);
386 	struct metadata_dst *tun_dst = NULL;
387 	struct ip_tunnel_net *itn;
388 	const struct iphdr *iph;
389 	struct ip_tunnel *tunnel;
390 
391 	if (tpi->proto == htons(ETH_P_TEB))
392 		itn = net_generic(net, gre_tap_net_id);
393 	else
394 		itn = net_generic(net, ipgre_net_id);
395 
396 	iph = ip_hdr(skb);
397 	tunnel = ip_tunnel_lookup(itn, skb->dev->ifindex, tpi->flags,
398 				  iph->saddr, iph->daddr, tpi->key);
399 
400 	if (tunnel) {
401 		skb_pop_mac_header(skb);
402 		if (tunnel->collect_md) {
403 			__be16 flags;
404 			__be64 tun_id;
405 
406 			flags = tpi->flags & (TUNNEL_CSUM | TUNNEL_KEY);
407 			tun_id = key_to_tunnel_id(tpi->key);
408 			tun_dst = ip_tun_rx_dst(skb, flags, tun_id, 0);
409 			if (!tun_dst)
410 				return PACKET_REJECT;
411 		}
412 
413 		ip_tunnel_rcv(tunnel, skb, tpi, tun_dst, log_ecn_error);
414 		return PACKET_RCVD;
415 	}
416 	return PACKET_REJECT;
417 }
418 
419 static int gre_rcv(struct sk_buff *skb)
420 {
421 	struct tnl_ptk_info tpi;
422 	bool csum_err = false;
423 
424 #ifdef CONFIG_NET_IPGRE_BROADCAST
425 	if (ipv4_is_multicast(ip_hdr(skb)->daddr)) {
426 		/* Looped back packet, drop it! */
427 		if (rt_is_output_route(skb_rtable(skb)))
428 			goto drop;
429 	}
430 #endif
431 
432 	if (parse_gre_header(skb, &tpi, &csum_err) < 0)
433 		goto drop;
434 
435 	if (ipgre_rcv(skb, &tpi) == PACKET_RCVD)
436 		return 0;
437 
438 	icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0);
439 drop:
440 	kfree_skb(skb);
441 	return 0;
442 }
443 
444 static void build_header(struct sk_buff *skb, int hdr_len, __be16 flags,
445 			 __be16 proto, __be32 key, __be32 seq)
446 {
447 	struct gre_base_hdr *greh;
448 
449 	skb_push(skb, hdr_len);
450 
451 	skb_reset_transport_header(skb);
452 	greh = (struct gre_base_hdr *)skb->data;
453 	greh->flags = tnl_flags_to_gre_flags(flags);
454 	greh->protocol = proto;
455 
456 	if (flags & (TUNNEL_KEY | TUNNEL_CSUM | TUNNEL_SEQ)) {
457 		__be32 *ptr = (__be32 *)(((u8 *)greh) + hdr_len - 4);
458 
459 		if (flags & TUNNEL_SEQ) {
460 			*ptr = seq;
461 			ptr--;
462 		}
463 		if (flags & TUNNEL_KEY) {
464 			*ptr = key;
465 			ptr--;
466 		}
467 		if (flags & TUNNEL_CSUM &&
468 		    !(skb_shinfo(skb)->gso_type &
469 		      (SKB_GSO_GRE | SKB_GSO_GRE_CSUM))) {
470 			*ptr = 0;
471 			*(__sum16 *)ptr = csum_fold(skb_checksum(skb, 0,
472 								 skb->len, 0));
473 		}
474 	}
475 }
476 
477 static void __gre_xmit(struct sk_buff *skb, struct net_device *dev,
478 		       const struct iphdr *tnl_params,
479 		       __be16 proto)
480 {
481 	struct ip_tunnel *tunnel = netdev_priv(dev);
482 
483 	if (tunnel->parms.o_flags & TUNNEL_SEQ)
484 		tunnel->o_seqno++;
485 
486 	/* Push GRE header. */
487 	build_header(skb, tunnel->tun_hlen, tunnel->parms.o_flags,
488 		     proto, tunnel->parms.o_key, htonl(tunnel->o_seqno));
489 
490 	skb_set_inner_protocol(skb, proto);
491 	ip_tunnel_xmit(skb, dev, tnl_params, tnl_params->protocol);
492 }
493 
494 static struct sk_buff *gre_handle_offloads(struct sk_buff *skb,
495 					   bool csum)
496 {
497 	return iptunnel_handle_offloads(skb, csum,
498 					csum ? SKB_GSO_GRE_CSUM : SKB_GSO_GRE);
499 }
500 
501 static struct rtable *gre_get_rt(struct sk_buff *skb,
502 				 struct net_device *dev,
503 				 struct flowi4 *fl,
504 				 const struct ip_tunnel_key *key)
505 {
506 	struct net *net = dev_net(dev);
507 
508 	memset(fl, 0, sizeof(*fl));
509 	fl->daddr = key->u.ipv4.dst;
510 	fl->saddr = key->u.ipv4.src;
511 	fl->flowi4_tos = RT_TOS(key->tos);
512 	fl->flowi4_mark = skb->mark;
513 	fl->flowi4_proto = IPPROTO_GRE;
514 
515 	return ip_route_output_key(net, fl);
516 }
517 
518 static void gre_fb_xmit(struct sk_buff *skb, struct net_device *dev)
519 {
520 	struct ip_tunnel_info *tun_info;
521 	const struct ip_tunnel_key *key;
522 	struct flowi4 fl;
523 	struct rtable *rt;
524 	int min_headroom;
525 	int tunnel_hlen;
526 	__be16 df, flags;
527 	int err;
528 
529 	tun_info = skb_tunnel_info(skb);
530 	if (unlikely(!tun_info || !(tun_info->mode & IP_TUNNEL_INFO_TX) ||
531 		     ip_tunnel_info_af(tun_info) != AF_INET))
532 		goto err_free_skb;
533 
534 	key = &tun_info->key;
535 	rt = gre_get_rt(skb, dev, &fl, key);
536 	if (IS_ERR(rt))
537 		goto err_free_skb;
538 
539 	tunnel_hlen = ip_gre_calc_hlen(key->tun_flags);
540 
541 	min_headroom = LL_RESERVED_SPACE(rt->dst.dev) + rt->dst.header_len
542 			+ tunnel_hlen + sizeof(struct iphdr);
543 	if (skb_headroom(skb) < min_headroom || skb_header_cloned(skb)) {
544 		int head_delta = SKB_DATA_ALIGN(min_headroom -
545 						skb_headroom(skb) +
546 						16);
547 		err = pskb_expand_head(skb, max_t(int, head_delta, 0),
548 				       0, GFP_ATOMIC);
549 		if (unlikely(err))
550 			goto err_free_rt;
551 	}
552 
553 	/* Push Tunnel header. */
554 	skb = gre_handle_offloads(skb, !!(tun_info->key.tun_flags & TUNNEL_CSUM));
555 	if (IS_ERR(skb)) {
556 		skb = NULL;
557 		goto err_free_rt;
558 	}
559 
560 	flags = tun_info->key.tun_flags & (TUNNEL_CSUM | TUNNEL_KEY);
561 	build_header(skb, tunnel_hlen, flags, htons(ETH_P_TEB),
562 		     tunnel_id_to_key(tun_info->key.tun_id), 0);
563 
564 	df = key->tun_flags & TUNNEL_DONT_FRAGMENT ?  htons(IP_DF) : 0;
565 	err = iptunnel_xmit(skb->sk, rt, skb, fl.saddr,
566 			    key->u.ipv4.dst, IPPROTO_GRE,
567 			    key->tos, key->ttl, df, false);
568 	iptunnel_xmit_stats(err, &dev->stats, dev->tstats);
569 	return;
570 
571 err_free_rt:
572 	ip_rt_put(rt);
573 err_free_skb:
574 	kfree_skb(skb);
575 	dev->stats.tx_dropped++;
576 }
577 
578 static int gre_fill_metadata_dst(struct net_device *dev, struct sk_buff *skb)
579 {
580 	struct ip_tunnel_info *info = skb_tunnel_info(skb);
581 	struct rtable *rt;
582 	struct flowi4 fl4;
583 
584 	if (ip_tunnel_info_af(info) != AF_INET)
585 		return -EINVAL;
586 
587 	rt = gre_get_rt(skb, dev, &fl4, &info->key);
588 	if (IS_ERR(rt))
589 		return PTR_ERR(rt);
590 
591 	ip_rt_put(rt);
592 	info->key.u.ipv4.src = fl4.saddr;
593 	return 0;
594 }
595 
596 static netdev_tx_t ipgre_xmit(struct sk_buff *skb,
597 			      struct net_device *dev)
598 {
599 	struct ip_tunnel *tunnel = netdev_priv(dev);
600 	const struct iphdr *tnl_params;
601 
602 	if (tunnel->collect_md) {
603 		gre_fb_xmit(skb, dev);
604 		return NETDEV_TX_OK;
605 	}
606 
607 	if (dev->header_ops) {
608 		/* Need space for new headers */
609 		if (skb_cow_head(skb, dev->needed_headroom -
610 				      (tunnel->hlen + sizeof(struct iphdr))))
611 			goto free_skb;
612 
613 		tnl_params = (const struct iphdr *)skb->data;
614 
615 		/* Pull skb since ip_tunnel_xmit() needs skb->data pointing
616 		 * to gre header.
617 		 */
618 		skb_pull(skb, tunnel->hlen + sizeof(struct iphdr));
619 		skb_reset_mac_header(skb);
620 	} else {
621 		if (skb_cow_head(skb, dev->needed_headroom))
622 			goto free_skb;
623 
624 		tnl_params = &tunnel->parms.iph;
625 	}
626 
627 	skb = gre_handle_offloads(skb, !!(tunnel->parms.o_flags&TUNNEL_CSUM));
628 	if (IS_ERR(skb))
629 		goto out;
630 
631 	__gre_xmit(skb, dev, tnl_params, skb->protocol);
632 	return NETDEV_TX_OK;
633 
634 free_skb:
635 	kfree_skb(skb);
636 out:
637 	dev->stats.tx_dropped++;
638 	return NETDEV_TX_OK;
639 }
640 
641 static netdev_tx_t gre_tap_xmit(struct sk_buff *skb,
642 				struct net_device *dev)
643 {
644 	struct ip_tunnel *tunnel = netdev_priv(dev);
645 
646 	if (tunnel->collect_md) {
647 		gre_fb_xmit(skb, dev);
648 		return NETDEV_TX_OK;
649 	}
650 
651 	skb = gre_handle_offloads(skb, !!(tunnel->parms.o_flags&TUNNEL_CSUM));
652 	if (IS_ERR(skb))
653 		goto out;
654 
655 	if (skb_cow_head(skb, dev->needed_headroom))
656 		goto free_skb;
657 
658 	__gre_xmit(skb, dev, &tunnel->parms.iph, htons(ETH_P_TEB));
659 	return NETDEV_TX_OK;
660 
661 free_skb:
662 	kfree_skb(skb);
663 out:
664 	dev->stats.tx_dropped++;
665 	return NETDEV_TX_OK;
666 }
667 
668 static int ipgre_tunnel_ioctl(struct net_device *dev,
669 			      struct ifreq *ifr, int cmd)
670 {
671 	int err;
672 	struct ip_tunnel_parm p;
673 
674 	if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
675 		return -EFAULT;
676 	if (cmd == SIOCADDTUNNEL || cmd == SIOCCHGTUNNEL) {
677 		if (p.iph.version != 4 || p.iph.protocol != IPPROTO_GRE ||
678 		    p.iph.ihl != 5 || (p.iph.frag_off&htons(~IP_DF)) ||
679 		    ((p.i_flags|p.o_flags)&(GRE_VERSION|GRE_ROUTING)))
680 			return -EINVAL;
681 	}
682 	p.i_flags = gre_flags_to_tnl_flags(p.i_flags);
683 	p.o_flags = gre_flags_to_tnl_flags(p.o_flags);
684 
685 	err = ip_tunnel_ioctl(dev, &p, cmd);
686 	if (err)
687 		return err;
688 
689 	p.i_flags = tnl_flags_to_gre_flags(p.i_flags);
690 	p.o_flags = tnl_flags_to_gre_flags(p.o_flags);
691 
692 	if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p)))
693 		return -EFAULT;
694 	return 0;
695 }
696 
697 /* Nice toy. Unfortunately, useless in real life :-)
698    It allows to construct virtual multiprotocol broadcast "LAN"
699    over the Internet, provided multicast routing is tuned.
700 
701 
702    I have no idea was this bicycle invented before me,
703    so that I had to set ARPHRD_IPGRE to a random value.
704    I have an impression, that Cisco could make something similar,
705    but this feature is apparently missing in IOS<=11.2(8).
706 
707    I set up 10.66.66/24 and fec0:6666:6666::0/96 as virtual networks
708    with broadcast 224.66.66.66. If you have access to mbone, play with me :-)
709 
710    ping -t 255 224.66.66.66
711 
712    If nobody answers, mbone does not work.
713 
714    ip tunnel add Universe mode gre remote 224.66.66.66 local <Your_real_addr> ttl 255
715    ip addr add 10.66.66.<somewhat>/24 dev Universe
716    ifconfig Universe up
717    ifconfig Universe add fe80::<Your_real_addr>/10
718    ifconfig Universe add fec0:6666:6666::<Your_real_addr>/96
719    ftp 10.66.66.66
720    ...
721    ftp fec0:6666:6666::193.233.7.65
722    ...
723  */
724 static int ipgre_header(struct sk_buff *skb, struct net_device *dev,
725 			unsigned short type,
726 			const void *daddr, const void *saddr, unsigned int len)
727 {
728 	struct ip_tunnel *t = netdev_priv(dev);
729 	struct iphdr *iph;
730 	struct gre_base_hdr *greh;
731 
732 	iph = (struct iphdr *)skb_push(skb, t->hlen + sizeof(*iph));
733 	greh = (struct gre_base_hdr *)(iph+1);
734 	greh->flags = tnl_flags_to_gre_flags(t->parms.o_flags);
735 	greh->protocol = htons(type);
736 
737 	memcpy(iph, &t->parms.iph, sizeof(struct iphdr));
738 
739 	/* Set the source hardware address. */
740 	if (saddr)
741 		memcpy(&iph->saddr, saddr, 4);
742 	if (daddr)
743 		memcpy(&iph->daddr, daddr, 4);
744 	if (iph->daddr)
745 		return t->hlen + sizeof(*iph);
746 
747 	return -(t->hlen + sizeof(*iph));
748 }
749 
750 static int ipgre_header_parse(const struct sk_buff *skb, unsigned char *haddr)
751 {
752 	const struct iphdr *iph = (const struct iphdr *) skb_mac_header(skb);
753 	memcpy(haddr, &iph->saddr, 4);
754 	return 4;
755 }
756 
757 static const struct header_ops ipgre_header_ops = {
758 	.create	= ipgre_header,
759 	.parse	= ipgre_header_parse,
760 };
761 
762 #ifdef CONFIG_NET_IPGRE_BROADCAST
763 static int ipgre_open(struct net_device *dev)
764 {
765 	struct ip_tunnel *t = netdev_priv(dev);
766 
767 	if (ipv4_is_multicast(t->parms.iph.daddr)) {
768 		struct flowi4 fl4;
769 		struct rtable *rt;
770 
771 		rt = ip_route_output_gre(t->net, &fl4,
772 					 t->parms.iph.daddr,
773 					 t->parms.iph.saddr,
774 					 t->parms.o_key,
775 					 RT_TOS(t->parms.iph.tos),
776 					 t->parms.link);
777 		if (IS_ERR(rt))
778 			return -EADDRNOTAVAIL;
779 		dev = rt->dst.dev;
780 		ip_rt_put(rt);
781 		if (!__in_dev_get_rtnl(dev))
782 			return -EADDRNOTAVAIL;
783 		t->mlink = dev->ifindex;
784 		ip_mc_inc_group(__in_dev_get_rtnl(dev), t->parms.iph.daddr);
785 	}
786 	return 0;
787 }
788 
789 static int ipgre_close(struct net_device *dev)
790 {
791 	struct ip_tunnel *t = netdev_priv(dev);
792 
793 	if (ipv4_is_multicast(t->parms.iph.daddr) && t->mlink) {
794 		struct in_device *in_dev;
795 		in_dev = inetdev_by_index(t->net, t->mlink);
796 		if (in_dev)
797 			ip_mc_dec_group(in_dev, t->parms.iph.daddr);
798 	}
799 	return 0;
800 }
801 #endif
802 
803 static const struct net_device_ops ipgre_netdev_ops = {
804 	.ndo_init		= ipgre_tunnel_init,
805 	.ndo_uninit		= ip_tunnel_uninit,
806 #ifdef CONFIG_NET_IPGRE_BROADCAST
807 	.ndo_open		= ipgre_open,
808 	.ndo_stop		= ipgre_close,
809 #endif
810 	.ndo_start_xmit		= ipgre_xmit,
811 	.ndo_do_ioctl		= ipgre_tunnel_ioctl,
812 	.ndo_change_mtu		= ip_tunnel_change_mtu,
813 	.ndo_get_stats64	= ip_tunnel_get_stats64,
814 	.ndo_get_iflink		= ip_tunnel_get_iflink,
815 };
816 
817 #define GRE_FEATURES (NETIF_F_SG |		\
818 		      NETIF_F_FRAGLIST |	\
819 		      NETIF_F_HIGHDMA |		\
820 		      NETIF_F_HW_CSUM)
821 
822 static void ipgre_tunnel_setup(struct net_device *dev)
823 {
824 	dev->netdev_ops		= &ipgre_netdev_ops;
825 	dev->type		= ARPHRD_IPGRE;
826 	ip_tunnel_setup(dev, ipgre_net_id);
827 }
828 
829 static void __gre_tunnel_init(struct net_device *dev)
830 {
831 	struct ip_tunnel *tunnel;
832 	int t_hlen;
833 
834 	tunnel = netdev_priv(dev);
835 	tunnel->tun_hlen = ip_gre_calc_hlen(tunnel->parms.o_flags);
836 	tunnel->parms.iph.protocol = IPPROTO_GRE;
837 
838 	tunnel->hlen = tunnel->tun_hlen + tunnel->encap_hlen;
839 
840 	t_hlen = tunnel->hlen + sizeof(struct iphdr);
841 
842 	dev->needed_headroom	= LL_MAX_HEADER + t_hlen + 4;
843 	dev->mtu		= ETH_DATA_LEN - t_hlen - 4;
844 
845 	dev->features		|= GRE_FEATURES;
846 	dev->hw_features	|= GRE_FEATURES;
847 
848 	if (!(tunnel->parms.o_flags & TUNNEL_SEQ)) {
849 		/* TCP offload with GRE SEQ is not supported. */
850 		dev->features    |= NETIF_F_GSO_SOFTWARE;
851 		dev->hw_features |= NETIF_F_GSO_SOFTWARE;
852 		/* Can use a lockless transmit, unless we generate
853 		 * output sequences
854 		 */
855 		dev->features |= NETIF_F_LLTX;
856 	}
857 }
858 
859 static int ipgre_tunnel_init(struct net_device *dev)
860 {
861 	struct ip_tunnel *tunnel = netdev_priv(dev);
862 	struct iphdr *iph = &tunnel->parms.iph;
863 
864 	__gre_tunnel_init(dev);
865 
866 	memcpy(dev->dev_addr, &iph->saddr, 4);
867 	memcpy(dev->broadcast, &iph->daddr, 4);
868 
869 	dev->flags		= IFF_NOARP;
870 	netif_keep_dst(dev);
871 	dev->addr_len		= 4;
872 
873 	if (iph->daddr) {
874 #ifdef CONFIG_NET_IPGRE_BROADCAST
875 		if (ipv4_is_multicast(iph->daddr)) {
876 			if (!iph->saddr)
877 				return -EINVAL;
878 			dev->flags = IFF_BROADCAST;
879 			dev->header_ops = &ipgre_header_ops;
880 		}
881 #endif
882 	} else
883 		dev->header_ops = &ipgre_header_ops;
884 
885 	return ip_tunnel_init(dev);
886 }
887 
888 static const struct gre_protocol ipgre_protocol = {
889 	.handler     = gre_rcv,
890 	.err_handler = gre_err,
891 };
892 
893 static int __net_init ipgre_init_net(struct net *net)
894 {
895 	return ip_tunnel_init_net(net, ipgre_net_id, &ipgre_link_ops, NULL);
896 }
897 
898 static void __net_exit ipgre_exit_net(struct net *net)
899 {
900 	struct ip_tunnel_net *itn = net_generic(net, ipgre_net_id);
901 	ip_tunnel_delete_net(itn, &ipgre_link_ops);
902 }
903 
904 static struct pernet_operations ipgre_net_ops = {
905 	.init = ipgre_init_net,
906 	.exit = ipgre_exit_net,
907 	.id   = &ipgre_net_id,
908 	.size = sizeof(struct ip_tunnel_net),
909 };
910 
911 static int ipgre_tunnel_validate(struct nlattr *tb[], struct nlattr *data[])
912 {
913 	__be16 flags;
914 
915 	if (!data)
916 		return 0;
917 
918 	flags = 0;
919 	if (data[IFLA_GRE_IFLAGS])
920 		flags |= nla_get_be16(data[IFLA_GRE_IFLAGS]);
921 	if (data[IFLA_GRE_OFLAGS])
922 		flags |= nla_get_be16(data[IFLA_GRE_OFLAGS]);
923 	if (flags & (GRE_VERSION|GRE_ROUTING))
924 		return -EINVAL;
925 
926 	return 0;
927 }
928 
929 static int ipgre_tap_validate(struct nlattr *tb[], struct nlattr *data[])
930 {
931 	__be32 daddr;
932 
933 	if (tb[IFLA_ADDRESS]) {
934 		if (nla_len(tb[IFLA_ADDRESS]) != ETH_ALEN)
935 			return -EINVAL;
936 		if (!is_valid_ether_addr(nla_data(tb[IFLA_ADDRESS])))
937 			return -EADDRNOTAVAIL;
938 	}
939 
940 	if (!data)
941 		goto out;
942 
943 	if (data[IFLA_GRE_REMOTE]) {
944 		memcpy(&daddr, nla_data(data[IFLA_GRE_REMOTE]), 4);
945 		if (!daddr)
946 			return -EINVAL;
947 	}
948 
949 out:
950 	return ipgre_tunnel_validate(tb, data);
951 }
952 
953 static void ipgre_netlink_parms(struct net_device *dev,
954 				struct nlattr *data[],
955 				struct nlattr *tb[],
956 				struct ip_tunnel_parm *parms)
957 {
958 	memset(parms, 0, sizeof(*parms));
959 
960 	parms->iph.protocol = IPPROTO_GRE;
961 
962 	if (!data)
963 		return;
964 
965 	if (data[IFLA_GRE_LINK])
966 		parms->link = nla_get_u32(data[IFLA_GRE_LINK]);
967 
968 	if (data[IFLA_GRE_IFLAGS])
969 		parms->i_flags = gre_flags_to_tnl_flags(nla_get_be16(data[IFLA_GRE_IFLAGS]));
970 
971 	if (data[IFLA_GRE_OFLAGS])
972 		parms->o_flags = gre_flags_to_tnl_flags(nla_get_be16(data[IFLA_GRE_OFLAGS]));
973 
974 	if (data[IFLA_GRE_IKEY])
975 		parms->i_key = nla_get_be32(data[IFLA_GRE_IKEY]);
976 
977 	if (data[IFLA_GRE_OKEY])
978 		parms->o_key = nla_get_be32(data[IFLA_GRE_OKEY]);
979 
980 	if (data[IFLA_GRE_LOCAL])
981 		parms->iph.saddr = nla_get_in_addr(data[IFLA_GRE_LOCAL]);
982 
983 	if (data[IFLA_GRE_REMOTE])
984 		parms->iph.daddr = nla_get_in_addr(data[IFLA_GRE_REMOTE]);
985 
986 	if (data[IFLA_GRE_TTL])
987 		parms->iph.ttl = nla_get_u8(data[IFLA_GRE_TTL]);
988 
989 	if (data[IFLA_GRE_TOS])
990 		parms->iph.tos = nla_get_u8(data[IFLA_GRE_TOS]);
991 
992 	if (!data[IFLA_GRE_PMTUDISC] || nla_get_u8(data[IFLA_GRE_PMTUDISC]))
993 		parms->iph.frag_off = htons(IP_DF);
994 
995 	if (data[IFLA_GRE_COLLECT_METADATA]) {
996 		struct ip_tunnel *t = netdev_priv(dev);
997 
998 		t->collect_md = true;
999 	}
1000 }
1001 
1002 /* This function returns true when ENCAP attributes are present in the nl msg */
1003 static bool ipgre_netlink_encap_parms(struct nlattr *data[],
1004 				      struct ip_tunnel_encap *ipencap)
1005 {
1006 	bool ret = false;
1007 
1008 	memset(ipencap, 0, sizeof(*ipencap));
1009 
1010 	if (!data)
1011 		return ret;
1012 
1013 	if (data[IFLA_GRE_ENCAP_TYPE]) {
1014 		ret = true;
1015 		ipencap->type = nla_get_u16(data[IFLA_GRE_ENCAP_TYPE]);
1016 	}
1017 
1018 	if (data[IFLA_GRE_ENCAP_FLAGS]) {
1019 		ret = true;
1020 		ipencap->flags = nla_get_u16(data[IFLA_GRE_ENCAP_FLAGS]);
1021 	}
1022 
1023 	if (data[IFLA_GRE_ENCAP_SPORT]) {
1024 		ret = true;
1025 		ipencap->sport = nla_get_be16(data[IFLA_GRE_ENCAP_SPORT]);
1026 	}
1027 
1028 	if (data[IFLA_GRE_ENCAP_DPORT]) {
1029 		ret = true;
1030 		ipencap->dport = nla_get_be16(data[IFLA_GRE_ENCAP_DPORT]);
1031 	}
1032 
1033 	return ret;
1034 }
1035 
1036 static int gre_tap_init(struct net_device *dev)
1037 {
1038 	__gre_tunnel_init(dev);
1039 	dev->priv_flags |= IFF_LIVE_ADDR_CHANGE;
1040 
1041 	return ip_tunnel_init(dev);
1042 }
1043 
1044 static const struct net_device_ops gre_tap_netdev_ops = {
1045 	.ndo_init		= gre_tap_init,
1046 	.ndo_uninit		= ip_tunnel_uninit,
1047 	.ndo_start_xmit		= gre_tap_xmit,
1048 	.ndo_set_mac_address 	= eth_mac_addr,
1049 	.ndo_validate_addr	= eth_validate_addr,
1050 	.ndo_change_mtu		= ip_tunnel_change_mtu,
1051 	.ndo_get_stats64	= ip_tunnel_get_stats64,
1052 	.ndo_get_iflink		= ip_tunnel_get_iflink,
1053 	.ndo_fill_metadata_dst	= gre_fill_metadata_dst,
1054 };
1055 
1056 static void ipgre_tap_setup(struct net_device *dev)
1057 {
1058 	ether_setup(dev);
1059 	dev->netdev_ops		= &gre_tap_netdev_ops;
1060 	dev->priv_flags 	|= IFF_LIVE_ADDR_CHANGE;
1061 	ip_tunnel_setup(dev, gre_tap_net_id);
1062 }
1063 
1064 static int ipgre_newlink(struct net *src_net, struct net_device *dev,
1065 			 struct nlattr *tb[], struct nlattr *data[])
1066 {
1067 	struct ip_tunnel_parm p;
1068 	struct ip_tunnel_encap ipencap;
1069 
1070 	if (ipgre_netlink_encap_parms(data, &ipencap)) {
1071 		struct ip_tunnel *t = netdev_priv(dev);
1072 		int err = ip_tunnel_encap_setup(t, &ipencap);
1073 
1074 		if (err < 0)
1075 			return err;
1076 	}
1077 
1078 	ipgre_netlink_parms(dev, data, tb, &p);
1079 	return ip_tunnel_newlink(dev, tb, &p);
1080 }
1081 
1082 static int ipgre_changelink(struct net_device *dev, struct nlattr *tb[],
1083 			    struct nlattr *data[])
1084 {
1085 	struct ip_tunnel_parm p;
1086 	struct ip_tunnel_encap ipencap;
1087 
1088 	if (ipgre_netlink_encap_parms(data, &ipencap)) {
1089 		struct ip_tunnel *t = netdev_priv(dev);
1090 		int err = ip_tunnel_encap_setup(t, &ipencap);
1091 
1092 		if (err < 0)
1093 			return err;
1094 	}
1095 
1096 	ipgre_netlink_parms(dev, data, tb, &p);
1097 	return ip_tunnel_changelink(dev, tb, &p);
1098 }
1099 
1100 static size_t ipgre_get_size(const struct net_device *dev)
1101 {
1102 	return
1103 		/* IFLA_GRE_LINK */
1104 		nla_total_size(4) +
1105 		/* IFLA_GRE_IFLAGS */
1106 		nla_total_size(2) +
1107 		/* IFLA_GRE_OFLAGS */
1108 		nla_total_size(2) +
1109 		/* IFLA_GRE_IKEY */
1110 		nla_total_size(4) +
1111 		/* IFLA_GRE_OKEY */
1112 		nla_total_size(4) +
1113 		/* IFLA_GRE_LOCAL */
1114 		nla_total_size(4) +
1115 		/* IFLA_GRE_REMOTE */
1116 		nla_total_size(4) +
1117 		/* IFLA_GRE_TTL */
1118 		nla_total_size(1) +
1119 		/* IFLA_GRE_TOS */
1120 		nla_total_size(1) +
1121 		/* IFLA_GRE_PMTUDISC */
1122 		nla_total_size(1) +
1123 		/* IFLA_GRE_ENCAP_TYPE */
1124 		nla_total_size(2) +
1125 		/* IFLA_GRE_ENCAP_FLAGS */
1126 		nla_total_size(2) +
1127 		/* IFLA_GRE_ENCAP_SPORT */
1128 		nla_total_size(2) +
1129 		/* IFLA_GRE_ENCAP_DPORT */
1130 		nla_total_size(2) +
1131 		/* IFLA_GRE_COLLECT_METADATA */
1132 		nla_total_size(0) +
1133 		0;
1134 }
1135 
1136 static int ipgre_fill_info(struct sk_buff *skb, const struct net_device *dev)
1137 {
1138 	struct ip_tunnel *t = netdev_priv(dev);
1139 	struct ip_tunnel_parm *p = &t->parms;
1140 
1141 	if (nla_put_u32(skb, IFLA_GRE_LINK, p->link) ||
1142 	    nla_put_be16(skb, IFLA_GRE_IFLAGS, tnl_flags_to_gre_flags(p->i_flags)) ||
1143 	    nla_put_be16(skb, IFLA_GRE_OFLAGS, tnl_flags_to_gre_flags(p->o_flags)) ||
1144 	    nla_put_be32(skb, IFLA_GRE_IKEY, p->i_key) ||
1145 	    nla_put_be32(skb, IFLA_GRE_OKEY, p->o_key) ||
1146 	    nla_put_in_addr(skb, IFLA_GRE_LOCAL, p->iph.saddr) ||
1147 	    nla_put_in_addr(skb, IFLA_GRE_REMOTE, p->iph.daddr) ||
1148 	    nla_put_u8(skb, IFLA_GRE_TTL, p->iph.ttl) ||
1149 	    nla_put_u8(skb, IFLA_GRE_TOS, p->iph.tos) ||
1150 	    nla_put_u8(skb, IFLA_GRE_PMTUDISC,
1151 		       !!(p->iph.frag_off & htons(IP_DF))))
1152 		goto nla_put_failure;
1153 
1154 	if (nla_put_u16(skb, IFLA_GRE_ENCAP_TYPE,
1155 			t->encap.type) ||
1156 	    nla_put_be16(skb, IFLA_GRE_ENCAP_SPORT,
1157 			 t->encap.sport) ||
1158 	    nla_put_be16(skb, IFLA_GRE_ENCAP_DPORT,
1159 			 t->encap.dport) ||
1160 	    nla_put_u16(skb, IFLA_GRE_ENCAP_FLAGS,
1161 			t->encap.flags))
1162 		goto nla_put_failure;
1163 
1164 	if (t->collect_md) {
1165 		if (nla_put_flag(skb, IFLA_GRE_COLLECT_METADATA))
1166 			goto nla_put_failure;
1167 	}
1168 
1169 	return 0;
1170 
1171 nla_put_failure:
1172 	return -EMSGSIZE;
1173 }
1174 
1175 static const struct nla_policy ipgre_policy[IFLA_GRE_MAX + 1] = {
1176 	[IFLA_GRE_LINK]		= { .type = NLA_U32 },
1177 	[IFLA_GRE_IFLAGS]	= { .type = NLA_U16 },
1178 	[IFLA_GRE_OFLAGS]	= { .type = NLA_U16 },
1179 	[IFLA_GRE_IKEY]		= { .type = NLA_U32 },
1180 	[IFLA_GRE_OKEY]		= { .type = NLA_U32 },
1181 	[IFLA_GRE_LOCAL]	= { .len = FIELD_SIZEOF(struct iphdr, saddr) },
1182 	[IFLA_GRE_REMOTE]	= { .len = FIELD_SIZEOF(struct iphdr, daddr) },
1183 	[IFLA_GRE_TTL]		= { .type = NLA_U8 },
1184 	[IFLA_GRE_TOS]		= { .type = NLA_U8 },
1185 	[IFLA_GRE_PMTUDISC]	= { .type = NLA_U8 },
1186 	[IFLA_GRE_ENCAP_TYPE]	= { .type = NLA_U16 },
1187 	[IFLA_GRE_ENCAP_FLAGS]	= { .type = NLA_U16 },
1188 	[IFLA_GRE_ENCAP_SPORT]	= { .type = NLA_U16 },
1189 	[IFLA_GRE_ENCAP_DPORT]	= { .type = NLA_U16 },
1190 	[IFLA_GRE_COLLECT_METADATA]	= { .type = NLA_FLAG },
1191 };
1192 
1193 static struct rtnl_link_ops ipgre_link_ops __read_mostly = {
1194 	.kind		= "gre",
1195 	.maxtype	= IFLA_GRE_MAX,
1196 	.policy		= ipgre_policy,
1197 	.priv_size	= sizeof(struct ip_tunnel),
1198 	.setup		= ipgre_tunnel_setup,
1199 	.validate	= ipgre_tunnel_validate,
1200 	.newlink	= ipgre_newlink,
1201 	.changelink	= ipgre_changelink,
1202 	.dellink	= ip_tunnel_dellink,
1203 	.get_size	= ipgre_get_size,
1204 	.fill_info	= ipgre_fill_info,
1205 	.get_link_net	= ip_tunnel_get_link_net,
1206 };
1207 
1208 static struct rtnl_link_ops ipgre_tap_ops __read_mostly = {
1209 	.kind		= "gretap",
1210 	.maxtype	= IFLA_GRE_MAX,
1211 	.policy		= ipgre_policy,
1212 	.priv_size	= sizeof(struct ip_tunnel),
1213 	.setup		= ipgre_tap_setup,
1214 	.validate	= ipgre_tap_validate,
1215 	.newlink	= ipgre_newlink,
1216 	.changelink	= ipgre_changelink,
1217 	.dellink	= ip_tunnel_dellink,
1218 	.get_size	= ipgre_get_size,
1219 	.fill_info	= ipgre_fill_info,
1220 	.get_link_net	= ip_tunnel_get_link_net,
1221 };
1222 
1223 struct net_device *gretap_fb_dev_create(struct net *net, const char *name,
1224 					u8 name_assign_type)
1225 {
1226 	struct nlattr *tb[IFLA_MAX + 1];
1227 	struct net_device *dev;
1228 	struct ip_tunnel *t;
1229 	int err;
1230 
1231 	memset(&tb, 0, sizeof(tb));
1232 
1233 	dev = rtnl_create_link(net, name, name_assign_type,
1234 			       &ipgre_tap_ops, tb);
1235 	if (IS_ERR(dev))
1236 		return dev;
1237 
1238 	/* Configure flow based GRE device. */
1239 	t = netdev_priv(dev);
1240 	t->collect_md = true;
1241 
1242 	err = ipgre_newlink(net, dev, tb, NULL);
1243 	if (err < 0)
1244 		goto out;
1245 	return dev;
1246 out:
1247 	free_netdev(dev);
1248 	return ERR_PTR(err);
1249 }
1250 EXPORT_SYMBOL_GPL(gretap_fb_dev_create);
1251 
1252 static int __net_init ipgre_tap_init_net(struct net *net)
1253 {
1254 	return ip_tunnel_init_net(net, gre_tap_net_id, &ipgre_tap_ops, "gretap0");
1255 }
1256 
1257 static void __net_exit ipgre_tap_exit_net(struct net *net)
1258 {
1259 	struct ip_tunnel_net *itn = net_generic(net, gre_tap_net_id);
1260 	ip_tunnel_delete_net(itn, &ipgre_tap_ops);
1261 }
1262 
1263 static struct pernet_operations ipgre_tap_net_ops = {
1264 	.init = ipgre_tap_init_net,
1265 	.exit = ipgre_tap_exit_net,
1266 	.id   = &gre_tap_net_id,
1267 	.size = sizeof(struct ip_tunnel_net),
1268 };
1269 
1270 static int __init ipgre_init(void)
1271 {
1272 	int err;
1273 
1274 	pr_info("GRE over IPv4 tunneling driver\n");
1275 
1276 	err = register_pernet_device(&ipgre_net_ops);
1277 	if (err < 0)
1278 		return err;
1279 
1280 	err = register_pernet_device(&ipgre_tap_net_ops);
1281 	if (err < 0)
1282 		goto pnet_tap_faied;
1283 
1284 	err = gre_add_protocol(&ipgre_protocol, GREPROTO_CISCO);
1285 	if (err < 0) {
1286 		pr_info("%s: can't add protocol\n", __func__);
1287 		goto add_proto_failed;
1288 	}
1289 
1290 	err = rtnl_link_register(&ipgre_link_ops);
1291 	if (err < 0)
1292 		goto rtnl_link_failed;
1293 
1294 	err = rtnl_link_register(&ipgre_tap_ops);
1295 	if (err < 0)
1296 		goto tap_ops_failed;
1297 
1298 	return 0;
1299 
1300 tap_ops_failed:
1301 	rtnl_link_unregister(&ipgre_link_ops);
1302 rtnl_link_failed:
1303 	gre_del_protocol(&ipgre_protocol, GREPROTO_CISCO);
1304 add_proto_failed:
1305 	unregister_pernet_device(&ipgre_tap_net_ops);
1306 pnet_tap_faied:
1307 	unregister_pernet_device(&ipgre_net_ops);
1308 	return err;
1309 }
1310 
1311 static void __exit ipgre_fini(void)
1312 {
1313 	rtnl_link_unregister(&ipgre_tap_ops);
1314 	rtnl_link_unregister(&ipgre_link_ops);
1315 	gre_del_protocol(&ipgre_protocol, GREPROTO_CISCO);
1316 	unregister_pernet_device(&ipgre_tap_net_ops);
1317 	unregister_pernet_device(&ipgre_net_ops);
1318 }
1319 
1320 module_init(ipgre_init);
1321 module_exit(ipgre_fini);
1322 MODULE_LICENSE("GPL");
1323 MODULE_ALIAS_RTNL_LINK("gre");
1324 MODULE_ALIAS_RTNL_LINK("gretap");
1325 MODULE_ALIAS_NETDEV("gre0");
1326 MODULE_ALIAS_NETDEV("gretap0");
1327