xref: /openbmc/linux/net/ipv4/ip_gre.c (revision b802fb99ae964681d1754428f67970911e0476e9)
1 /*
2  *	Linux NET3:	GRE over IP protocol decoder.
3  *
4  *	Authors: Alexey Kuznetsov (kuznet@ms2.inr.ac.ru)
5  *
6  *	This program is free software; you can redistribute it and/or
7  *	modify it under the terms of the GNU General Public License
8  *	as published by the Free Software Foundation; either version
9  *	2 of the License, or (at your option) any later version.
10  *
11  */
12 
13 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
14 
15 #include <linux/capability.h>
16 #include <linux/module.h>
17 #include <linux/types.h>
18 #include <linux/kernel.h>
19 #include <linux/slab.h>
20 #include <asm/uaccess.h>
21 #include <linux/skbuff.h>
22 #include <linux/netdevice.h>
23 #include <linux/in.h>
24 #include <linux/tcp.h>
25 #include <linux/udp.h>
26 #include <linux/if_arp.h>
27 #include <linux/if_vlan.h>
28 #include <linux/init.h>
29 #include <linux/in6.h>
30 #include <linux/inetdevice.h>
31 #include <linux/igmp.h>
32 #include <linux/netfilter_ipv4.h>
33 #include <linux/etherdevice.h>
34 #include <linux/if_ether.h>
35 
36 #include <net/sock.h>
37 #include <net/ip.h>
38 #include <net/icmp.h>
39 #include <net/protocol.h>
40 #include <net/ip_tunnels.h>
41 #include <net/arp.h>
42 #include <net/checksum.h>
43 #include <net/dsfield.h>
44 #include <net/inet_ecn.h>
45 #include <net/xfrm.h>
46 #include <net/net_namespace.h>
47 #include <net/netns/generic.h>
48 #include <net/rtnetlink.h>
49 #include <net/gre.h>
50 #include <net/dst_metadata.h>
51 
52 #if IS_ENABLED(CONFIG_IPV6)
53 #include <net/ipv6.h>
54 #include <net/ip6_fib.h>
55 #include <net/ip6_route.h>
56 #endif
57 
58 /*
59    Problems & solutions
60    --------------------
61 
62    1. The most important issue is detecting local dead loops.
63    They would cause complete host lockup in transmit, which
64    would be "resolved" by stack overflow or, if queueing is enabled,
65    with infinite looping in net_bh.
66 
67    We cannot track such dead loops during route installation,
68    it is infeasible task. The most general solutions would be
69    to keep skb->encapsulation counter (sort of local ttl),
70    and silently drop packet when it expires. It is a good
71    solution, but it supposes maintaining new variable in ALL
72    skb, even if no tunneling is used.
73 
74    Current solution: xmit_recursion breaks dead loops. This is a percpu
75    counter, since when we enter the first ndo_xmit(), cpu migration is
76    forbidden. We force an exit if this counter reaches RECURSION_LIMIT
77 
78    2. Networking dead loops would not kill routers, but would really
79    kill network. IP hop limit plays role of "t->recursion" in this case,
80    if we copy it from packet being encapsulated to upper header.
81    It is very good solution, but it introduces two problems:
82 
83    - Routing protocols, using packets with ttl=1 (OSPF, RIP2),
84      do not work over tunnels.
85    - traceroute does not work. I planned to relay ICMP from tunnel,
86      so that this problem would be solved and traceroute output
87      would even more informative. This idea appeared to be wrong:
88      only Linux complies to rfc1812 now (yes, guys, Linux is the only
89      true router now :-)), all routers (at least, in neighbourhood of mine)
90      return only 8 bytes of payload. It is the end.
91 
92    Hence, if we want that OSPF worked or traceroute said something reasonable,
93    we should search for another solution.
94 
95    One of them is to parse packet trying to detect inner encapsulation
96    made by our node. It is difficult or even impossible, especially,
97    taking into account fragmentation. TO be short, ttl is not solution at all.
98 
99    Current solution: The solution was UNEXPECTEDLY SIMPLE.
100    We force DF flag on tunnels with preconfigured hop limit,
101    that is ALL. :-) Well, it does not remove the problem completely,
102    but exponential growth of network traffic is changed to linear
103    (branches, that exceed pmtu are pruned) and tunnel mtu
104    rapidly degrades to value <68, where looping stops.
105    Yes, it is not good if there exists a router in the loop,
106    which does not force DF, even when encapsulating packets have DF set.
107    But it is not our problem! Nobody could accuse us, we made
108    all that we could make. Even if it is your gated who injected
109    fatal route to network, even if it were you who configured
110    fatal static route: you are innocent. :-)
111 
112    Alexey Kuznetsov.
113  */
114 
115 static bool log_ecn_error = true;
116 module_param(log_ecn_error, bool, 0644);
117 MODULE_PARM_DESC(log_ecn_error, "Log packets received with corrupted ECN");
118 
119 static struct rtnl_link_ops ipgre_link_ops __read_mostly;
120 static int ipgre_tunnel_init(struct net_device *dev);
121 
122 static int ipgre_net_id __read_mostly;
123 static int gre_tap_net_id __read_mostly;
124 
125 static int ip_gre_calc_hlen(__be16 o_flags)
126 {
127 	int addend = 4;
128 
129 	if (o_flags & TUNNEL_CSUM)
130 		addend += 4;
131 	if (o_flags & TUNNEL_KEY)
132 		addend += 4;
133 	if (o_flags & TUNNEL_SEQ)
134 		addend += 4;
135 	return addend;
136 }
137 
138 static __be16 gre_flags_to_tnl_flags(__be16 flags)
139 {
140 	__be16 tflags = 0;
141 
142 	if (flags & GRE_CSUM)
143 		tflags |= TUNNEL_CSUM;
144 	if (flags & GRE_ROUTING)
145 		tflags |= TUNNEL_ROUTING;
146 	if (flags & GRE_KEY)
147 		tflags |= TUNNEL_KEY;
148 	if (flags & GRE_SEQ)
149 		tflags |= TUNNEL_SEQ;
150 	if (flags & GRE_STRICT)
151 		tflags |= TUNNEL_STRICT;
152 	if (flags & GRE_REC)
153 		tflags |= TUNNEL_REC;
154 	if (flags & GRE_VERSION)
155 		tflags |= TUNNEL_VERSION;
156 
157 	return tflags;
158 }
159 
160 static __be16 tnl_flags_to_gre_flags(__be16 tflags)
161 {
162 	__be16 flags = 0;
163 
164 	if (tflags & TUNNEL_CSUM)
165 		flags |= GRE_CSUM;
166 	if (tflags & TUNNEL_ROUTING)
167 		flags |= GRE_ROUTING;
168 	if (tflags & TUNNEL_KEY)
169 		flags |= GRE_KEY;
170 	if (tflags & TUNNEL_SEQ)
171 		flags |= GRE_SEQ;
172 	if (tflags & TUNNEL_STRICT)
173 		flags |= GRE_STRICT;
174 	if (tflags & TUNNEL_REC)
175 		flags |= GRE_REC;
176 	if (tflags & TUNNEL_VERSION)
177 		flags |= GRE_VERSION;
178 
179 	return flags;
180 }
181 
182 static int parse_gre_header(struct sk_buff *skb, struct tnl_ptk_info *tpi,
183 			    bool *csum_err)
184 {
185 	const struct gre_base_hdr *greh;
186 	__be32 *options;
187 	int hdr_len;
188 
189 	if (unlikely(!pskb_may_pull(skb, sizeof(struct gre_base_hdr))))
190 		return -EINVAL;
191 
192 	greh = (struct gre_base_hdr *)skb_transport_header(skb);
193 	if (unlikely(greh->flags & (GRE_VERSION | GRE_ROUTING)))
194 		return -EINVAL;
195 
196 	tpi->flags = gre_flags_to_tnl_flags(greh->flags);
197 	hdr_len = ip_gre_calc_hlen(tpi->flags);
198 
199 	if (!pskb_may_pull(skb, hdr_len))
200 		return -EINVAL;
201 
202 	greh = (struct gre_base_hdr *)skb_transport_header(skb);
203 	tpi->proto = greh->protocol;
204 
205 	options = (__be32 *)(greh + 1);
206 	if (greh->flags & GRE_CSUM) {
207 		if (skb_checksum_simple_validate(skb)) {
208 			*csum_err = true;
209 			return -EINVAL;
210 		}
211 
212 		skb_checksum_try_convert(skb, IPPROTO_GRE, 0,
213 					 null_compute_pseudo);
214 		options++;
215 	}
216 
217 	if (greh->flags & GRE_KEY) {
218 		tpi->key = *options;
219 		options++;
220 	} else {
221 		tpi->key = 0;
222 	}
223 	if (unlikely(greh->flags & GRE_SEQ)) {
224 		tpi->seq = *options;
225 		options++;
226 	} else {
227 		tpi->seq = 0;
228 	}
229 	/* WCCP version 1 and 2 protocol decoding.
230 	 * - Change protocol to IP
231 	 * - When dealing with WCCPv2, Skip extra 4 bytes in GRE header
232 	 */
233 	if (greh->flags == 0 && tpi->proto == htons(ETH_P_WCCP)) {
234 		tpi->proto = htons(ETH_P_IP);
235 		if ((*(u8 *)options & 0xF0) != 0x40) {
236 			hdr_len += 4;
237 			if (!pskb_may_pull(skb, hdr_len))
238 				return -EINVAL;
239 		}
240 	}
241 	return iptunnel_pull_header(skb, hdr_len, tpi->proto);
242 }
243 
244 static void ipgre_err(struct sk_buff *skb, u32 info,
245 		      const struct tnl_ptk_info *tpi)
246 {
247 
248 	/* All the routers (except for Linux) return only
249 	   8 bytes of packet payload. It means, that precise relaying of
250 	   ICMP in the real Internet is absolutely infeasible.
251 
252 	   Moreover, Cisco "wise men" put GRE key to the third word
253 	   in GRE header. It makes impossible maintaining even soft
254 	   state for keyed GRE tunnels with enabled checksum. Tell
255 	   them "thank you".
256 
257 	   Well, I wonder, rfc1812 was written by Cisco employee,
258 	   what the hell these idiots break standards established
259 	   by themselves???
260 	   */
261 	struct net *net = dev_net(skb->dev);
262 	struct ip_tunnel_net *itn;
263 	const struct iphdr *iph;
264 	const int type = icmp_hdr(skb)->type;
265 	const int code = icmp_hdr(skb)->code;
266 	struct ip_tunnel *t;
267 
268 	switch (type) {
269 	default:
270 	case ICMP_PARAMETERPROB:
271 		return;
272 
273 	case ICMP_DEST_UNREACH:
274 		switch (code) {
275 		case ICMP_SR_FAILED:
276 		case ICMP_PORT_UNREACH:
277 			/* Impossible event. */
278 			return;
279 		default:
280 			/* All others are translated to HOST_UNREACH.
281 			   rfc2003 contains "deep thoughts" about NET_UNREACH,
282 			   I believe they are just ether pollution. --ANK
283 			 */
284 			break;
285 		}
286 		break;
287 
288 	case ICMP_TIME_EXCEEDED:
289 		if (code != ICMP_EXC_TTL)
290 			return;
291 		break;
292 
293 	case ICMP_REDIRECT:
294 		break;
295 	}
296 
297 	if (tpi->proto == htons(ETH_P_TEB))
298 		itn = net_generic(net, gre_tap_net_id);
299 	else
300 		itn = net_generic(net, ipgre_net_id);
301 
302 	iph = (const struct iphdr *)(icmp_hdr(skb) + 1);
303 	t = ip_tunnel_lookup(itn, skb->dev->ifindex, tpi->flags,
304 			     iph->daddr, iph->saddr, tpi->key);
305 
306 	if (!t)
307 		return;
308 
309 	if (t->parms.iph.daddr == 0 ||
310 	    ipv4_is_multicast(t->parms.iph.daddr))
311 		return;
312 
313 	if (t->parms.iph.ttl == 0 && type == ICMP_TIME_EXCEEDED)
314 		return;
315 
316 	if (time_before(jiffies, t->err_time + IPTUNNEL_ERR_TIMEO))
317 		t->err_count++;
318 	else
319 		t->err_count = 1;
320 	t->err_time = jiffies;
321 }
322 
323 static void gre_err(struct sk_buff *skb, u32 info)
324 {
325 	/* All the routers (except for Linux) return only
326 	 * 8 bytes of packet payload. It means, that precise relaying of
327 	 * ICMP in the real Internet is absolutely infeasible.
328 	 *
329 	 * Moreover, Cisco "wise men" put GRE key to the third word
330 	 * in GRE header. It makes impossible maintaining even soft
331 	 * state for keyed
332 	 * GRE tunnels with enabled checksum. Tell them "thank you".
333 	 *
334 	 * Well, I wonder, rfc1812 was written by Cisco employee,
335 	 * what the hell these idiots break standards established
336 	 * by themselves???
337 	 */
338 
339 	const int type = icmp_hdr(skb)->type;
340 	const int code = icmp_hdr(skb)->code;
341 	struct tnl_ptk_info tpi;
342 	bool csum_err = false;
343 
344 	if (parse_gre_header(skb, &tpi, &csum_err)) {
345 		if (!csum_err)		/* ignore csum errors. */
346 			return;
347 	}
348 
349 	if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED) {
350 		ipv4_update_pmtu(skb, dev_net(skb->dev), info,
351 				 skb->dev->ifindex, 0, IPPROTO_GRE, 0);
352 		return;
353 	}
354 	if (type == ICMP_REDIRECT) {
355 		ipv4_redirect(skb, dev_net(skb->dev), skb->dev->ifindex, 0,
356 			      IPPROTO_GRE, 0);
357 		return;
358 	}
359 
360 	ipgre_err(skb, info, &tpi);
361 }
362 
363 static __be64 key_to_tunnel_id(__be32 key)
364 {
365 #ifdef __BIG_ENDIAN
366 	return (__force __be64)((__force u32)key);
367 #else
368 	return (__force __be64)((__force u64)key << 32);
369 #endif
370 }
371 
372 /* Returns the least-significant 32 bits of a __be64. */
373 static __be32 tunnel_id_to_key(__be64 x)
374 {
375 #ifdef __BIG_ENDIAN
376 	return (__force __be32)x;
377 #else
378 	return (__force __be32)((__force u64)x >> 32);
379 #endif
380 }
381 
382 static int ipgre_rcv(struct sk_buff *skb, const struct tnl_ptk_info *tpi)
383 {
384 	struct net *net = dev_net(skb->dev);
385 	struct metadata_dst *tun_dst = NULL;
386 	struct ip_tunnel_net *itn;
387 	const struct iphdr *iph;
388 	struct ip_tunnel *tunnel;
389 
390 	if (tpi->proto == htons(ETH_P_TEB))
391 		itn = net_generic(net, gre_tap_net_id);
392 	else
393 		itn = net_generic(net, ipgre_net_id);
394 
395 	iph = ip_hdr(skb);
396 	tunnel = ip_tunnel_lookup(itn, skb->dev->ifindex, tpi->flags,
397 				  iph->saddr, iph->daddr, tpi->key);
398 
399 	if (tunnel) {
400 		skb_pop_mac_header(skb);
401 		if (tunnel->collect_md) {
402 			__be16 flags;
403 			__be64 tun_id;
404 
405 			flags = tpi->flags & (TUNNEL_CSUM | TUNNEL_KEY);
406 			tun_id = key_to_tunnel_id(tpi->key);
407 			tun_dst = ip_tun_rx_dst(skb, flags, tun_id, 0);
408 			if (!tun_dst)
409 				return PACKET_REJECT;
410 		}
411 
412 		ip_tunnel_rcv(tunnel, skb, tpi, tun_dst, log_ecn_error);
413 		return PACKET_RCVD;
414 	}
415 	return PACKET_REJECT;
416 }
417 
418 static int gre_rcv(struct sk_buff *skb)
419 {
420 	struct tnl_ptk_info tpi;
421 	bool csum_err = false;
422 
423 #ifdef CONFIG_NET_IPGRE_BROADCAST
424 	if (ipv4_is_multicast(ip_hdr(skb)->daddr)) {
425 		/* Looped back packet, drop it! */
426 		if (rt_is_output_route(skb_rtable(skb)))
427 			goto drop;
428 	}
429 #endif
430 
431 	if (parse_gre_header(skb, &tpi, &csum_err) < 0)
432 		goto drop;
433 
434 	if (ipgre_rcv(skb, &tpi) == PACKET_RCVD)
435 		return 0;
436 
437 	icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0);
438 drop:
439 	kfree_skb(skb);
440 	return 0;
441 }
442 
443 static void build_header(struct sk_buff *skb, int hdr_len, __be16 flags,
444 			 __be16 proto, __be32 key, __be32 seq)
445 {
446 	struct gre_base_hdr *greh;
447 
448 	skb_push(skb, hdr_len);
449 
450 	skb_reset_transport_header(skb);
451 	greh = (struct gre_base_hdr *)skb->data;
452 	greh->flags = tnl_flags_to_gre_flags(flags);
453 	greh->protocol = proto;
454 
455 	if (flags & (TUNNEL_KEY | TUNNEL_CSUM | TUNNEL_SEQ)) {
456 		__be32 *ptr = (__be32 *)(((u8 *)greh) + hdr_len - 4);
457 
458 		if (flags & TUNNEL_SEQ) {
459 			*ptr = seq;
460 			ptr--;
461 		}
462 		if (flags & TUNNEL_KEY) {
463 			*ptr = key;
464 			ptr--;
465 		}
466 		if (flags & TUNNEL_CSUM &&
467 		    !(skb_shinfo(skb)->gso_type &
468 		      (SKB_GSO_GRE | SKB_GSO_GRE_CSUM))) {
469 			*ptr = 0;
470 			*(__sum16 *)ptr = csum_fold(skb_checksum(skb, 0,
471 								 skb->len, 0));
472 		}
473 	}
474 }
475 
476 static void __gre_xmit(struct sk_buff *skb, struct net_device *dev,
477 		       const struct iphdr *tnl_params,
478 		       __be16 proto)
479 {
480 	struct ip_tunnel *tunnel = netdev_priv(dev);
481 
482 	if (tunnel->parms.o_flags & TUNNEL_SEQ)
483 		tunnel->o_seqno++;
484 
485 	/* Push GRE header. */
486 	build_header(skb, tunnel->tun_hlen, tunnel->parms.o_flags,
487 		     proto, tunnel->parms.o_key, htonl(tunnel->o_seqno));
488 
489 	skb_set_inner_protocol(skb, proto);
490 	ip_tunnel_xmit(skb, dev, tnl_params, tnl_params->protocol);
491 }
492 
493 static struct sk_buff *gre_handle_offloads(struct sk_buff *skb,
494 					   bool csum)
495 {
496 	return iptunnel_handle_offloads(skb, csum,
497 					csum ? SKB_GSO_GRE_CSUM : SKB_GSO_GRE);
498 }
499 
500 static struct rtable *gre_get_rt(struct sk_buff *skb,
501 				 struct net_device *dev,
502 				 struct flowi4 *fl,
503 				 const struct ip_tunnel_key *key)
504 {
505 	struct net *net = dev_net(dev);
506 
507 	memset(fl, 0, sizeof(*fl));
508 	fl->daddr = key->u.ipv4.dst;
509 	fl->saddr = key->u.ipv4.src;
510 	fl->flowi4_tos = RT_TOS(key->tos);
511 	fl->flowi4_mark = skb->mark;
512 	fl->flowi4_proto = IPPROTO_GRE;
513 
514 	return ip_route_output_key(net, fl);
515 }
516 
517 static void gre_fb_xmit(struct sk_buff *skb, struct net_device *dev)
518 {
519 	struct ip_tunnel_info *tun_info;
520 	const struct ip_tunnel_key *key;
521 	struct flowi4 fl;
522 	struct rtable *rt;
523 	int min_headroom;
524 	int tunnel_hlen;
525 	__be16 df, flags;
526 	int err;
527 
528 	tun_info = skb_tunnel_info(skb);
529 	if (unlikely(!tun_info || !(tun_info->mode & IP_TUNNEL_INFO_TX) ||
530 		     ip_tunnel_info_af(tun_info) != AF_INET))
531 		goto err_free_skb;
532 
533 	key = &tun_info->key;
534 	rt = gre_get_rt(skb, dev, &fl, key);
535 	if (IS_ERR(rt))
536 		goto err_free_skb;
537 
538 	tunnel_hlen = ip_gre_calc_hlen(key->tun_flags);
539 
540 	min_headroom = LL_RESERVED_SPACE(rt->dst.dev) + rt->dst.header_len
541 			+ tunnel_hlen + sizeof(struct iphdr);
542 	if (skb_headroom(skb) < min_headroom || skb_header_cloned(skb)) {
543 		int head_delta = SKB_DATA_ALIGN(min_headroom -
544 						skb_headroom(skb) +
545 						16);
546 		err = pskb_expand_head(skb, max_t(int, head_delta, 0),
547 				       0, GFP_ATOMIC);
548 		if (unlikely(err))
549 			goto err_free_rt;
550 	}
551 
552 	/* Push Tunnel header. */
553 	skb = gre_handle_offloads(skb, !!(tun_info->key.tun_flags & TUNNEL_CSUM));
554 	if (IS_ERR(skb)) {
555 		skb = NULL;
556 		goto err_free_rt;
557 	}
558 
559 	flags = tun_info->key.tun_flags & (TUNNEL_CSUM | TUNNEL_KEY);
560 	build_header(skb, tunnel_hlen, flags, htons(ETH_P_TEB),
561 		     tunnel_id_to_key(tun_info->key.tun_id), 0);
562 
563 	df = key->tun_flags & TUNNEL_DONT_FRAGMENT ?  htons(IP_DF) : 0;
564 
565 	iptunnel_xmit(skb->sk, rt, skb, fl.saddr, key->u.ipv4.dst, IPPROTO_GRE,
566 		      key->tos, key->ttl, df, false);
567 	return;
568 
569 err_free_rt:
570 	ip_rt_put(rt);
571 err_free_skb:
572 	kfree_skb(skb);
573 	dev->stats.tx_dropped++;
574 }
575 
576 static int gre_fill_metadata_dst(struct net_device *dev, struct sk_buff *skb)
577 {
578 	struct ip_tunnel_info *info = skb_tunnel_info(skb);
579 	struct rtable *rt;
580 	struct flowi4 fl4;
581 
582 	if (ip_tunnel_info_af(info) != AF_INET)
583 		return -EINVAL;
584 
585 	rt = gre_get_rt(skb, dev, &fl4, &info->key);
586 	if (IS_ERR(rt))
587 		return PTR_ERR(rt);
588 
589 	ip_rt_put(rt);
590 	info->key.u.ipv4.src = fl4.saddr;
591 	return 0;
592 }
593 
594 static netdev_tx_t ipgre_xmit(struct sk_buff *skb,
595 			      struct net_device *dev)
596 {
597 	struct ip_tunnel *tunnel = netdev_priv(dev);
598 	const struct iphdr *tnl_params;
599 
600 	if (tunnel->collect_md) {
601 		gre_fb_xmit(skb, dev);
602 		return NETDEV_TX_OK;
603 	}
604 
605 	if (dev->header_ops) {
606 		/* Need space for new headers */
607 		if (skb_cow_head(skb, dev->needed_headroom -
608 				      (tunnel->hlen + sizeof(struct iphdr))))
609 			goto free_skb;
610 
611 		tnl_params = (const struct iphdr *)skb->data;
612 
613 		/* Pull skb since ip_tunnel_xmit() needs skb->data pointing
614 		 * to gre header.
615 		 */
616 		skb_pull(skb, tunnel->hlen + sizeof(struct iphdr));
617 		skb_reset_mac_header(skb);
618 	} else {
619 		if (skb_cow_head(skb, dev->needed_headroom))
620 			goto free_skb;
621 
622 		tnl_params = &tunnel->parms.iph;
623 	}
624 
625 	skb = gre_handle_offloads(skb, !!(tunnel->parms.o_flags&TUNNEL_CSUM));
626 	if (IS_ERR(skb))
627 		goto out;
628 
629 	__gre_xmit(skb, dev, tnl_params, skb->protocol);
630 	return NETDEV_TX_OK;
631 
632 free_skb:
633 	kfree_skb(skb);
634 out:
635 	dev->stats.tx_dropped++;
636 	return NETDEV_TX_OK;
637 }
638 
639 static netdev_tx_t gre_tap_xmit(struct sk_buff *skb,
640 				struct net_device *dev)
641 {
642 	struct ip_tunnel *tunnel = netdev_priv(dev);
643 
644 	if (tunnel->collect_md) {
645 		gre_fb_xmit(skb, dev);
646 		return NETDEV_TX_OK;
647 	}
648 
649 	skb = gre_handle_offloads(skb, !!(tunnel->parms.o_flags&TUNNEL_CSUM));
650 	if (IS_ERR(skb))
651 		goto out;
652 
653 	if (skb_cow_head(skb, dev->needed_headroom))
654 		goto free_skb;
655 
656 	__gre_xmit(skb, dev, &tunnel->parms.iph, htons(ETH_P_TEB));
657 	return NETDEV_TX_OK;
658 
659 free_skb:
660 	kfree_skb(skb);
661 out:
662 	dev->stats.tx_dropped++;
663 	return NETDEV_TX_OK;
664 }
665 
666 static int ipgre_tunnel_ioctl(struct net_device *dev,
667 			      struct ifreq *ifr, int cmd)
668 {
669 	int err;
670 	struct ip_tunnel_parm p;
671 
672 	if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
673 		return -EFAULT;
674 	if (cmd == SIOCADDTUNNEL || cmd == SIOCCHGTUNNEL) {
675 		if (p.iph.version != 4 || p.iph.protocol != IPPROTO_GRE ||
676 		    p.iph.ihl != 5 || (p.iph.frag_off&htons(~IP_DF)) ||
677 		    ((p.i_flags|p.o_flags)&(GRE_VERSION|GRE_ROUTING)))
678 			return -EINVAL;
679 	}
680 	p.i_flags = gre_flags_to_tnl_flags(p.i_flags);
681 	p.o_flags = gre_flags_to_tnl_flags(p.o_flags);
682 
683 	err = ip_tunnel_ioctl(dev, &p, cmd);
684 	if (err)
685 		return err;
686 
687 	p.i_flags = tnl_flags_to_gre_flags(p.i_flags);
688 	p.o_flags = tnl_flags_to_gre_flags(p.o_flags);
689 
690 	if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p)))
691 		return -EFAULT;
692 	return 0;
693 }
694 
695 /* Nice toy. Unfortunately, useless in real life :-)
696    It allows to construct virtual multiprotocol broadcast "LAN"
697    over the Internet, provided multicast routing is tuned.
698 
699 
700    I have no idea was this bicycle invented before me,
701    so that I had to set ARPHRD_IPGRE to a random value.
702    I have an impression, that Cisco could make something similar,
703    but this feature is apparently missing in IOS<=11.2(8).
704 
705    I set up 10.66.66/24 and fec0:6666:6666::0/96 as virtual networks
706    with broadcast 224.66.66.66. If you have access to mbone, play with me :-)
707 
708    ping -t 255 224.66.66.66
709 
710    If nobody answers, mbone does not work.
711 
712    ip tunnel add Universe mode gre remote 224.66.66.66 local <Your_real_addr> ttl 255
713    ip addr add 10.66.66.<somewhat>/24 dev Universe
714    ifconfig Universe up
715    ifconfig Universe add fe80::<Your_real_addr>/10
716    ifconfig Universe add fec0:6666:6666::<Your_real_addr>/96
717    ftp 10.66.66.66
718    ...
719    ftp fec0:6666:6666::193.233.7.65
720    ...
721  */
722 static int ipgre_header(struct sk_buff *skb, struct net_device *dev,
723 			unsigned short type,
724 			const void *daddr, const void *saddr, unsigned int len)
725 {
726 	struct ip_tunnel *t = netdev_priv(dev);
727 	struct iphdr *iph;
728 	struct gre_base_hdr *greh;
729 
730 	iph = (struct iphdr *)skb_push(skb, t->hlen + sizeof(*iph));
731 	greh = (struct gre_base_hdr *)(iph+1);
732 	greh->flags = tnl_flags_to_gre_flags(t->parms.o_flags);
733 	greh->protocol = htons(type);
734 
735 	memcpy(iph, &t->parms.iph, sizeof(struct iphdr));
736 
737 	/* Set the source hardware address. */
738 	if (saddr)
739 		memcpy(&iph->saddr, saddr, 4);
740 	if (daddr)
741 		memcpy(&iph->daddr, daddr, 4);
742 	if (iph->daddr)
743 		return t->hlen + sizeof(*iph);
744 
745 	return -(t->hlen + sizeof(*iph));
746 }
747 
748 static int ipgre_header_parse(const struct sk_buff *skb, unsigned char *haddr)
749 {
750 	const struct iphdr *iph = (const struct iphdr *) skb_mac_header(skb);
751 	memcpy(haddr, &iph->saddr, 4);
752 	return 4;
753 }
754 
755 static const struct header_ops ipgre_header_ops = {
756 	.create	= ipgre_header,
757 	.parse	= ipgre_header_parse,
758 };
759 
760 #ifdef CONFIG_NET_IPGRE_BROADCAST
761 static int ipgre_open(struct net_device *dev)
762 {
763 	struct ip_tunnel *t = netdev_priv(dev);
764 
765 	if (ipv4_is_multicast(t->parms.iph.daddr)) {
766 		struct flowi4 fl4;
767 		struct rtable *rt;
768 
769 		rt = ip_route_output_gre(t->net, &fl4,
770 					 t->parms.iph.daddr,
771 					 t->parms.iph.saddr,
772 					 t->parms.o_key,
773 					 RT_TOS(t->parms.iph.tos),
774 					 t->parms.link);
775 		if (IS_ERR(rt))
776 			return -EADDRNOTAVAIL;
777 		dev = rt->dst.dev;
778 		ip_rt_put(rt);
779 		if (!__in_dev_get_rtnl(dev))
780 			return -EADDRNOTAVAIL;
781 		t->mlink = dev->ifindex;
782 		ip_mc_inc_group(__in_dev_get_rtnl(dev), t->parms.iph.daddr);
783 	}
784 	return 0;
785 }
786 
787 static int ipgre_close(struct net_device *dev)
788 {
789 	struct ip_tunnel *t = netdev_priv(dev);
790 
791 	if (ipv4_is_multicast(t->parms.iph.daddr) && t->mlink) {
792 		struct in_device *in_dev;
793 		in_dev = inetdev_by_index(t->net, t->mlink);
794 		if (in_dev)
795 			ip_mc_dec_group(in_dev, t->parms.iph.daddr);
796 	}
797 	return 0;
798 }
799 #endif
800 
801 static const struct net_device_ops ipgre_netdev_ops = {
802 	.ndo_init		= ipgre_tunnel_init,
803 	.ndo_uninit		= ip_tunnel_uninit,
804 #ifdef CONFIG_NET_IPGRE_BROADCAST
805 	.ndo_open		= ipgre_open,
806 	.ndo_stop		= ipgre_close,
807 #endif
808 	.ndo_start_xmit		= ipgre_xmit,
809 	.ndo_do_ioctl		= ipgre_tunnel_ioctl,
810 	.ndo_change_mtu		= ip_tunnel_change_mtu,
811 	.ndo_get_stats64	= ip_tunnel_get_stats64,
812 	.ndo_get_iflink		= ip_tunnel_get_iflink,
813 };
814 
815 #define GRE_FEATURES (NETIF_F_SG |		\
816 		      NETIF_F_FRAGLIST |	\
817 		      NETIF_F_HIGHDMA |		\
818 		      NETIF_F_HW_CSUM)
819 
820 static void ipgre_tunnel_setup(struct net_device *dev)
821 {
822 	dev->netdev_ops		= &ipgre_netdev_ops;
823 	dev->type		= ARPHRD_IPGRE;
824 	ip_tunnel_setup(dev, ipgre_net_id);
825 }
826 
827 static void __gre_tunnel_init(struct net_device *dev)
828 {
829 	struct ip_tunnel *tunnel;
830 	int t_hlen;
831 
832 	tunnel = netdev_priv(dev);
833 	tunnel->tun_hlen = ip_gre_calc_hlen(tunnel->parms.o_flags);
834 	tunnel->parms.iph.protocol = IPPROTO_GRE;
835 
836 	tunnel->hlen = tunnel->tun_hlen + tunnel->encap_hlen;
837 
838 	t_hlen = tunnel->hlen + sizeof(struct iphdr);
839 
840 	dev->needed_headroom	= LL_MAX_HEADER + t_hlen + 4;
841 	dev->mtu		= ETH_DATA_LEN - t_hlen - 4;
842 
843 	dev->features		|= GRE_FEATURES;
844 	dev->hw_features	|= GRE_FEATURES;
845 
846 	if (!(tunnel->parms.o_flags & TUNNEL_SEQ)) {
847 		/* TCP offload with GRE SEQ is not supported. */
848 		dev->features    |= NETIF_F_GSO_SOFTWARE;
849 		dev->hw_features |= NETIF_F_GSO_SOFTWARE;
850 		/* Can use a lockless transmit, unless we generate
851 		 * output sequences
852 		 */
853 		dev->features |= NETIF_F_LLTX;
854 	}
855 }
856 
857 static int ipgre_tunnel_init(struct net_device *dev)
858 {
859 	struct ip_tunnel *tunnel = netdev_priv(dev);
860 	struct iphdr *iph = &tunnel->parms.iph;
861 
862 	__gre_tunnel_init(dev);
863 
864 	memcpy(dev->dev_addr, &iph->saddr, 4);
865 	memcpy(dev->broadcast, &iph->daddr, 4);
866 
867 	dev->flags		= IFF_NOARP;
868 	netif_keep_dst(dev);
869 	dev->addr_len		= 4;
870 
871 	if (iph->daddr) {
872 #ifdef CONFIG_NET_IPGRE_BROADCAST
873 		if (ipv4_is_multicast(iph->daddr)) {
874 			if (!iph->saddr)
875 				return -EINVAL;
876 			dev->flags = IFF_BROADCAST;
877 			dev->header_ops = &ipgre_header_ops;
878 		}
879 #endif
880 	} else
881 		dev->header_ops = &ipgre_header_ops;
882 
883 	return ip_tunnel_init(dev);
884 }
885 
886 static const struct gre_protocol ipgre_protocol = {
887 	.handler     = gre_rcv,
888 	.err_handler = gre_err,
889 };
890 
891 static int __net_init ipgre_init_net(struct net *net)
892 {
893 	return ip_tunnel_init_net(net, ipgre_net_id, &ipgre_link_ops, NULL);
894 }
895 
896 static void __net_exit ipgre_exit_net(struct net *net)
897 {
898 	struct ip_tunnel_net *itn = net_generic(net, ipgre_net_id);
899 	ip_tunnel_delete_net(itn, &ipgre_link_ops);
900 }
901 
902 static struct pernet_operations ipgre_net_ops = {
903 	.init = ipgre_init_net,
904 	.exit = ipgre_exit_net,
905 	.id   = &ipgre_net_id,
906 	.size = sizeof(struct ip_tunnel_net),
907 };
908 
909 static int ipgre_tunnel_validate(struct nlattr *tb[], struct nlattr *data[])
910 {
911 	__be16 flags;
912 
913 	if (!data)
914 		return 0;
915 
916 	flags = 0;
917 	if (data[IFLA_GRE_IFLAGS])
918 		flags |= nla_get_be16(data[IFLA_GRE_IFLAGS]);
919 	if (data[IFLA_GRE_OFLAGS])
920 		flags |= nla_get_be16(data[IFLA_GRE_OFLAGS]);
921 	if (flags & (GRE_VERSION|GRE_ROUTING))
922 		return -EINVAL;
923 
924 	return 0;
925 }
926 
927 static int ipgre_tap_validate(struct nlattr *tb[], struct nlattr *data[])
928 {
929 	__be32 daddr;
930 
931 	if (tb[IFLA_ADDRESS]) {
932 		if (nla_len(tb[IFLA_ADDRESS]) != ETH_ALEN)
933 			return -EINVAL;
934 		if (!is_valid_ether_addr(nla_data(tb[IFLA_ADDRESS])))
935 			return -EADDRNOTAVAIL;
936 	}
937 
938 	if (!data)
939 		goto out;
940 
941 	if (data[IFLA_GRE_REMOTE]) {
942 		memcpy(&daddr, nla_data(data[IFLA_GRE_REMOTE]), 4);
943 		if (!daddr)
944 			return -EINVAL;
945 	}
946 
947 out:
948 	return ipgre_tunnel_validate(tb, data);
949 }
950 
951 static void ipgre_netlink_parms(struct net_device *dev,
952 				struct nlattr *data[],
953 				struct nlattr *tb[],
954 				struct ip_tunnel_parm *parms)
955 {
956 	memset(parms, 0, sizeof(*parms));
957 
958 	parms->iph.protocol = IPPROTO_GRE;
959 
960 	if (!data)
961 		return;
962 
963 	if (data[IFLA_GRE_LINK])
964 		parms->link = nla_get_u32(data[IFLA_GRE_LINK]);
965 
966 	if (data[IFLA_GRE_IFLAGS])
967 		parms->i_flags = gre_flags_to_tnl_flags(nla_get_be16(data[IFLA_GRE_IFLAGS]));
968 
969 	if (data[IFLA_GRE_OFLAGS])
970 		parms->o_flags = gre_flags_to_tnl_flags(nla_get_be16(data[IFLA_GRE_OFLAGS]));
971 
972 	if (data[IFLA_GRE_IKEY])
973 		parms->i_key = nla_get_be32(data[IFLA_GRE_IKEY]);
974 
975 	if (data[IFLA_GRE_OKEY])
976 		parms->o_key = nla_get_be32(data[IFLA_GRE_OKEY]);
977 
978 	if (data[IFLA_GRE_LOCAL])
979 		parms->iph.saddr = nla_get_in_addr(data[IFLA_GRE_LOCAL]);
980 
981 	if (data[IFLA_GRE_REMOTE])
982 		parms->iph.daddr = nla_get_in_addr(data[IFLA_GRE_REMOTE]);
983 
984 	if (data[IFLA_GRE_TTL])
985 		parms->iph.ttl = nla_get_u8(data[IFLA_GRE_TTL]);
986 
987 	if (data[IFLA_GRE_TOS])
988 		parms->iph.tos = nla_get_u8(data[IFLA_GRE_TOS]);
989 
990 	if (!data[IFLA_GRE_PMTUDISC] || nla_get_u8(data[IFLA_GRE_PMTUDISC]))
991 		parms->iph.frag_off = htons(IP_DF);
992 
993 	if (data[IFLA_GRE_COLLECT_METADATA]) {
994 		struct ip_tunnel *t = netdev_priv(dev);
995 
996 		t->collect_md = true;
997 	}
998 }
999 
1000 /* This function returns true when ENCAP attributes are present in the nl msg */
1001 static bool ipgre_netlink_encap_parms(struct nlattr *data[],
1002 				      struct ip_tunnel_encap *ipencap)
1003 {
1004 	bool ret = false;
1005 
1006 	memset(ipencap, 0, sizeof(*ipencap));
1007 
1008 	if (!data)
1009 		return ret;
1010 
1011 	if (data[IFLA_GRE_ENCAP_TYPE]) {
1012 		ret = true;
1013 		ipencap->type = nla_get_u16(data[IFLA_GRE_ENCAP_TYPE]);
1014 	}
1015 
1016 	if (data[IFLA_GRE_ENCAP_FLAGS]) {
1017 		ret = true;
1018 		ipencap->flags = nla_get_u16(data[IFLA_GRE_ENCAP_FLAGS]);
1019 	}
1020 
1021 	if (data[IFLA_GRE_ENCAP_SPORT]) {
1022 		ret = true;
1023 		ipencap->sport = nla_get_be16(data[IFLA_GRE_ENCAP_SPORT]);
1024 	}
1025 
1026 	if (data[IFLA_GRE_ENCAP_DPORT]) {
1027 		ret = true;
1028 		ipencap->dport = nla_get_be16(data[IFLA_GRE_ENCAP_DPORT]);
1029 	}
1030 
1031 	return ret;
1032 }
1033 
1034 static int gre_tap_init(struct net_device *dev)
1035 {
1036 	__gre_tunnel_init(dev);
1037 	dev->priv_flags |= IFF_LIVE_ADDR_CHANGE;
1038 
1039 	return ip_tunnel_init(dev);
1040 }
1041 
1042 static const struct net_device_ops gre_tap_netdev_ops = {
1043 	.ndo_init		= gre_tap_init,
1044 	.ndo_uninit		= ip_tunnel_uninit,
1045 	.ndo_start_xmit		= gre_tap_xmit,
1046 	.ndo_set_mac_address 	= eth_mac_addr,
1047 	.ndo_validate_addr	= eth_validate_addr,
1048 	.ndo_change_mtu		= ip_tunnel_change_mtu,
1049 	.ndo_get_stats64	= ip_tunnel_get_stats64,
1050 	.ndo_get_iflink		= ip_tunnel_get_iflink,
1051 	.ndo_fill_metadata_dst	= gre_fill_metadata_dst,
1052 };
1053 
1054 static void ipgre_tap_setup(struct net_device *dev)
1055 {
1056 	ether_setup(dev);
1057 	dev->netdev_ops		= &gre_tap_netdev_ops;
1058 	dev->priv_flags 	|= IFF_LIVE_ADDR_CHANGE;
1059 	ip_tunnel_setup(dev, gre_tap_net_id);
1060 }
1061 
1062 static int ipgre_newlink(struct net *src_net, struct net_device *dev,
1063 			 struct nlattr *tb[], struct nlattr *data[])
1064 {
1065 	struct ip_tunnel_parm p;
1066 	struct ip_tunnel_encap ipencap;
1067 
1068 	if (ipgre_netlink_encap_parms(data, &ipencap)) {
1069 		struct ip_tunnel *t = netdev_priv(dev);
1070 		int err = ip_tunnel_encap_setup(t, &ipencap);
1071 
1072 		if (err < 0)
1073 			return err;
1074 	}
1075 
1076 	ipgre_netlink_parms(dev, data, tb, &p);
1077 	return ip_tunnel_newlink(dev, tb, &p);
1078 }
1079 
1080 static int ipgre_changelink(struct net_device *dev, struct nlattr *tb[],
1081 			    struct nlattr *data[])
1082 {
1083 	struct ip_tunnel_parm p;
1084 	struct ip_tunnel_encap ipencap;
1085 
1086 	if (ipgre_netlink_encap_parms(data, &ipencap)) {
1087 		struct ip_tunnel *t = netdev_priv(dev);
1088 		int err = ip_tunnel_encap_setup(t, &ipencap);
1089 
1090 		if (err < 0)
1091 			return err;
1092 	}
1093 
1094 	ipgre_netlink_parms(dev, data, tb, &p);
1095 	return ip_tunnel_changelink(dev, tb, &p);
1096 }
1097 
1098 static size_t ipgre_get_size(const struct net_device *dev)
1099 {
1100 	return
1101 		/* IFLA_GRE_LINK */
1102 		nla_total_size(4) +
1103 		/* IFLA_GRE_IFLAGS */
1104 		nla_total_size(2) +
1105 		/* IFLA_GRE_OFLAGS */
1106 		nla_total_size(2) +
1107 		/* IFLA_GRE_IKEY */
1108 		nla_total_size(4) +
1109 		/* IFLA_GRE_OKEY */
1110 		nla_total_size(4) +
1111 		/* IFLA_GRE_LOCAL */
1112 		nla_total_size(4) +
1113 		/* IFLA_GRE_REMOTE */
1114 		nla_total_size(4) +
1115 		/* IFLA_GRE_TTL */
1116 		nla_total_size(1) +
1117 		/* IFLA_GRE_TOS */
1118 		nla_total_size(1) +
1119 		/* IFLA_GRE_PMTUDISC */
1120 		nla_total_size(1) +
1121 		/* IFLA_GRE_ENCAP_TYPE */
1122 		nla_total_size(2) +
1123 		/* IFLA_GRE_ENCAP_FLAGS */
1124 		nla_total_size(2) +
1125 		/* IFLA_GRE_ENCAP_SPORT */
1126 		nla_total_size(2) +
1127 		/* IFLA_GRE_ENCAP_DPORT */
1128 		nla_total_size(2) +
1129 		/* IFLA_GRE_COLLECT_METADATA */
1130 		nla_total_size(0) +
1131 		0;
1132 }
1133 
1134 static int ipgre_fill_info(struct sk_buff *skb, const struct net_device *dev)
1135 {
1136 	struct ip_tunnel *t = netdev_priv(dev);
1137 	struct ip_tunnel_parm *p = &t->parms;
1138 
1139 	if (nla_put_u32(skb, IFLA_GRE_LINK, p->link) ||
1140 	    nla_put_be16(skb, IFLA_GRE_IFLAGS, tnl_flags_to_gre_flags(p->i_flags)) ||
1141 	    nla_put_be16(skb, IFLA_GRE_OFLAGS, tnl_flags_to_gre_flags(p->o_flags)) ||
1142 	    nla_put_be32(skb, IFLA_GRE_IKEY, p->i_key) ||
1143 	    nla_put_be32(skb, IFLA_GRE_OKEY, p->o_key) ||
1144 	    nla_put_in_addr(skb, IFLA_GRE_LOCAL, p->iph.saddr) ||
1145 	    nla_put_in_addr(skb, IFLA_GRE_REMOTE, p->iph.daddr) ||
1146 	    nla_put_u8(skb, IFLA_GRE_TTL, p->iph.ttl) ||
1147 	    nla_put_u8(skb, IFLA_GRE_TOS, p->iph.tos) ||
1148 	    nla_put_u8(skb, IFLA_GRE_PMTUDISC,
1149 		       !!(p->iph.frag_off & htons(IP_DF))))
1150 		goto nla_put_failure;
1151 
1152 	if (nla_put_u16(skb, IFLA_GRE_ENCAP_TYPE,
1153 			t->encap.type) ||
1154 	    nla_put_be16(skb, IFLA_GRE_ENCAP_SPORT,
1155 			 t->encap.sport) ||
1156 	    nla_put_be16(skb, IFLA_GRE_ENCAP_DPORT,
1157 			 t->encap.dport) ||
1158 	    nla_put_u16(skb, IFLA_GRE_ENCAP_FLAGS,
1159 			t->encap.flags))
1160 		goto nla_put_failure;
1161 
1162 	if (t->collect_md) {
1163 		if (nla_put_flag(skb, IFLA_GRE_COLLECT_METADATA))
1164 			goto nla_put_failure;
1165 	}
1166 
1167 	return 0;
1168 
1169 nla_put_failure:
1170 	return -EMSGSIZE;
1171 }
1172 
1173 static const struct nla_policy ipgre_policy[IFLA_GRE_MAX + 1] = {
1174 	[IFLA_GRE_LINK]		= { .type = NLA_U32 },
1175 	[IFLA_GRE_IFLAGS]	= { .type = NLA_U16 },
1176 	[IFLA_GRE_OFLAGS]	= { .type = NLA_U16 },
1177 	[IFLA_GRE_IKEY]		= { .type = NLA_U32 },
1178 	[IFLA_GRE_OKEY]		= { .type = NLA_U32 },
1179 	[IFLA_GRE_LOCAL]	= { .len = FIELD_SIZEOF(struct iphdr, saddr) },
1180 	[IFLA_GRE_REMOTE]	= { .len = FIELD_SIZEOF(struct iphdr, daddr) },
1181 	[IFLA_GRE_TTL]		= { .type = NLA_U8 },
1182 	[IFLA_GRE_TOS]		= { .type = NLA_U8 },
1183 	[IFLA_GRE_PMTUDISC]	= { .type = NLA_U8 },
1184 	[IFLA_GRE_ENCAP_TYPE]	= { .type = NLA_U16 },
1185 	[IFLA_GRE_ENCAP_FLAGS]	= { .type = NLA_U16 },
1186 	[IFLA_GRE_ENCAP_SPORT]	= { .type = NLA_U16 },
1187 	[IFLA_GRE_ENCAP_DPORT]	= { .type = NLA_U16 },
1188 	[IFLA_GRE_COLLECT_METADATA]	= { .type = NLA_FLAG },
1189 };
1190 
1191 static struct rtnl_link_ops ipgre_link_ops __read_mostly = {
1192 	.kind		= "gre",
1193 	.maxtype	= IFLA_GRE_MAX,
1194 	.policy		= ipgre_policy,
1195 	.priv_size	= sizeof(struct ip_tunnel),
1196 	.setup		= ipgre_tunnel_setup,
1197 	.validate	= ipgre_tunnel_validate,
1198 	.newlink	= ipgre_newlink,
1199 	.changelink	= ipgre_changelink,
1200 	.dellink	= ip_tunnel_dellink,
1201 	.get_size	= ipgre_get_size,
1202 	.fill_info	= ipgre_fill_info,
1203 	.get_link_net	= ip_tunnel_get_link_net,
1204 };
1205 
1206 static struct rtnl_link_ops ipgre_tap_ops __read_mostly = {
1207 	.kind		= "gretap",
1208 	.maxtype	= IFLA_GRE_MAX,
1209 	.policy		= ipgre_policy,
1210 	.priv_size	= sizeof(struct ip_tunnel),
1211 	.setup		= ipgre_tap_setup,
1212 	.validate	= ipgre_tap_validate,
1213 	.newlink	= ipgre_newlink,
1214 	.changelink	= ipgre_changelink,
1215 	.dellink	= ip_tunnel_dellink,
1216 	.get_size	= ipgre_get_size,
1217 	.fill_info	= ipgre_fill_info,
1218 	.get_link_net	= ip_tunnel_get_link_net,
1219 };
1220 
1221 struct net_device *gretap_fb_dev_create(struct net *net, const char *name,
1222 					u8 name_assign_type)
1223 {
1224 	struct nlattr *tb[IFLA_MAX + 1];
1225 	struct net_device *dev;
1226 	struct ip_tunnel *t;
1227 	int err;
1228 
1229 	memset(&tb, 0, sizeof(tb));
1230 
1231 	dev = rtnl_create_link(net, name, name_assign_type,
1232 			       &ipgre_tap_ops, tb);
1233 	if (IS_ERR(dev))
1234 		return dev;
1235 
1236 	/* Configure flow based GRE device. */
1237 	t = netdev_priv(dev);
1238 	t->collect_md = true;
1239 
1240 	err = ipgre_newlink(net, dev, tb, NULL);
1241 	if (err < 0)
1242 		goto out;
1243 	return dev;
1244 out:
1245 	free_netdev(dev);
1246 	return ERR_PTR(err);
1247 }
1248 EXPORT_SYMBOL_GPL(gretap_fb_dev_create);
1249 
1250 static int __net_init ipgre_tap_init_net(struct net *net)
1251 {
1252 	return ip_tunnel_init_net(net, gre_tap_net_id, &ipgre_tap_ops, "gretap0");
1253 }
1254 
1255 static void __net_exit ipgre_tap_exit_net(struct net *net)
1256 {
1257 	struct ip_tunnel_net *itn = net_generic(net, gre_tap_net_id);
1258 	ip_tunnel_delete_net(itn, &ipgre_tap_ops);
1259 }
1260 
1261 static struct pernet_operations ipgre_tap_net_ops = {
1262 	.init = ipgre_tap_init_net,
1263 	.exit = ipgre_tap_exit_net,
1264 	.id   = &gre_tap_net_id,
1265 	.size = sizeof(struct ip_tunnel_net),
1266 };
1267 
1268 static int __init ipgre_init(void)
1269 {
1270 	int err;
1271 
1272 	pr_info("GRE over IPv4 tunneling driver\n");
1273 
1274 	err = register_pernet_device(&ipgre_net_ops);
1275 	if (err < 0)
1276 		return err;
1277 
1278 	err = register_pernet_device(&ipgre_tap_net_ops);
1279 	if (err < 0)
1280 		goto pnet_tap_faied;
1281 
1282 	err = gre_add_protocol(&ipgre_protocol, GREPROTO_CISCO);
1283 	if (err < 0) {
1284 		pr_info("%s: can't add protocol\n", __func__);
1285 		goto add_proto_failed;
1286 	}
1287 
1288 	err = rtnl_link_register(&ipgre_link_ops);
1289 	if (err < 0)
1290 		goto rtnl_link_failed;
1291 
1292 	err = rtnl_link_register(&ipgre_tap_ops);
1293 	if (err < 0)
1294 		goto tap_ops_failed;
1295 
1296 	return 0;
1297 
1298 tap_ops_failed:
1299 	rtnl_link_unregister(&ipgre_link_ops);
1300 rtnl_link_failed:
1301 	gre_del_protocol(&ipgre_protocol, GREPROTO_CISCO);
1302 add_proto_failed:
1303 	unregister_pernet_device(&ipgre_tap_net_ops);
1304 pnet_tap_faied:
1305 	unregister_pernet_device(&ipgre_net_ops);
1306 	return err;
1307 }
1308 
1309 static void __exit ipgre_fini(void)
1310 {
1311 	rtnl_link_unregister(&ipgre_tap_ops);
1312 	rtnl_link_unregister(&ipgre_link_ops);
1313 	gre_del_protocol(&ipgre_protocol, GREPROTO_CISCO);
1314 	unregister_pernet_device(&ipgre_tap_net_ops);
1315 	unregister_pernet_device(&ipgre_net_ops);
1316 }
1317 
1318 module_init(ipgre_init);
1319 module_exit(ipgre_fini);
1320 MODULE_LICENSE("GPL");
1321 MODULE_ALIAS_RTNL_LINK("gre");
1322 MODULE_ALIAS_RTNL_LINK("gretap");
1323 MODULE_ALIAS_NETDEV("gre0");
1324 MODULE_ALIAS_NETDEV("gretap0");
1325