xref: /openbmc/linux/net/ipv4/ip_gre.c (revision efdbd7345f8836f7495f3ac6ee237d86cb3bb6b0)
1 /*
2  *	Linux NET3:	GRE over IP protocol decoder.
3  *
4  *	Authors: Alexey Kuznetsov (kuznet@ms2.inr.ac.ru)
5  *
6  *	This program is free software; you can redistribute it and/or
7  *	modify it under the terms of the GNU General Public License
8  *	as published by the Free Software Foundation; either version
9  *	2 of the License, or (at your option) any later version.
10  *
11  */
12 
13 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
14 
15 #include <linux/capability.h>
16 #include <linux/module.h>
17 #include <linux/types.h>
18 #include <linux/kernel.h>
19 #include <linux/slab.h>
20 #include <asm/uaccess.h>
21 #include <linux/skbuff.h>
22 #include <linux/netdevice.h>
23 #include <linux/in.h>
24 #include <linux/tcp.h>
25 #include <linux/udp.h>
26 #include <linux/if_arp.h>
27 #include <linux/mroute.h>
28 #include <linux/if_vlan.h>
29 #include <linux/init.h>
30 #include <linux/in6.h>
31 #include <linux/inetdevice.h>
32 #include <linux/igmp.h>
33 #include <linux/netfilter_ipv4.h>
34 #include <linux/etherdevice.h>
35 #include <linux/if_ether.h>
36 
37 #include <net/sock.h>
38 #include <net/ip.h>
39 #include <net/icmp.h>
40 #include <net/protocol.h>
41 #include <net/ip_tunnels.h>
42 #include <net/arp.h>
43 #include <net/checksum.h>
44 #include <net/dsfield.h>
45 #include <net/inet_ecn.h>
46 #include <net/xfrm.h>
47 #include <net/net_namespace.h>
48 #include <net/netns/generic.h>
49 #include <net/rtnetlink.h>
50 #include <net/gre.h>
51 #include <net/dst_metadata.h>
52 
53 #if IS_ENABLED(CONFIG_IPV6)
54 #include <net/ipv6.h>
55 #include <net/ip6_fib.h>
56 #include <net/ip6_route.h>
57 #endif
58 
59 /*
60    Problems & solutions
61    --------------------
62 
63    1. The most important issue is detecting local dead loops.
64    They would cause complete host lockup in transmit, which
65    would be "resolved" by stack overflow or, if queueing is enabled,
66    with infinite looping in net_bh.
67 
68    We cannot track such dead loops during route installation,
69    it is infeasible task. The most general solutions would be
70    to keep skb->encapsulation counter (sort of local ttl),
71    and silently drop packet when it expires. It is a good
72    solution, but it supposes maintaining new variable in ALL
73    skb, even if no tunneling is used.
74 
75    Current solution: xmit_recursion breaks dead loops. This is a percpu
76    counter, since when we enter the first ndo_xmit(), cpu migration is
77    forbidden. We force an exit if this counter reaches RECURSION_LIMIT
78 
79    2. Networking dead loops would not kill routers, but would really
80    kill network. IP hop limit plays role of "t->recursion" in this case,
81    if we copy it from packet being encapsulated to upper header.
82    It is very good solution, but it introduces two problems:
83 
84    - Routing protocols, using packets with ttl=1 (OSPF, RIP2),
85      do not work over tunnels.
86    - traceroute does not work. I planned to relay ICMP from tunnel,
87      so that this problem would be solved and traceroute output
88      would even more informative. This idea appeared to be wrong:
89      only Linux complies to rfc1812 now (yes, guys, Linux is the only
90      true router now :-)), all routers (at least, in neighbourhood of mine)
91      return only 8 bytes of payload. It is the end.
92 
93    Hence, if we want that OSPF worked or traceroute said something reasonable,
94    we should search for another solution.
95 
96    One of them is to parse packet trying to detect inner encapsulation
97    made by our node. It is difficult or even impossible, especially,
98    taking into account fragmentation. TO be short, ttl is not solution at all.
99 
100    Current solution: The solution was UNEXPECTEDLY SIMPLE.
101    We force DF flag on tunnels with preconfigured hop limit,
102    that is ALL. :-) Well, it does not remove the problem completely,
103    but exponential growth of network traffic is changed to linear
104    (branches, that exceed pmtu are pruned) and tunnel mtu
105    rapidly degrades to value <68, where looping stops.
106    Yes, it is not good if there exists a router in the loop,
107    which does not force DF, even when encapsulating packets have DF set.
108    But it is not our problem! Nobody could accuse us, we made
109    all that we could make. Even if it is your gated who injected
110    fatal route to network, even if it were you who configured
111    fatal static route: you are innocent. :-)
112 
113    Alexey Kuznetsov.
114  */
115 
116 static bool log_ecn_error = true;
117 module_param(log_ecn_error, bool, 0644);
118 MODULE_PARM_DESC(log_ecn_error, "Log packets received with corrupted ECN");
119 
120 static struct rtnl_link_ops ipgre_link_ops __read_mostly;
121 static int ipgre_tunnel_init(struct net_device *dev);
122 
123 static int ipgre_net_id __read_mostly;
124 static int gre_tap_net_id __read_mostly;
125 
126 static int ip_gre_calc_hlen(__be16 o_flags)
127 {
128 	int addend = 4;
129 
130 	if (o_flags & TUNNEL_CSUM)
131 		addend += 4;
132 	if (o_flags & TUNNEL_KEY)
133 		addend += 4;
134 	if (o_flags & TUNNEL_SEQ)
135 		addend += 4;
136 	return addend;
137 }
138 
139 static __be16 gre_flags_to_tnl_flags(__be16 flags)
140 {
141 	__be16 tflags = 0;
142 
143 	if (flags & GRE_CSUM)
144 		tflags |= TUNNEL_CSUM;
145 	if (flags & GRE_ROUTING)
146 		tflags |= TUNNEL_ROUTING;
147 	if (flags & GRE_KEY)
148 		tflags |= TUNNEL_KEY;
149 	if (flags & GRE_SEQ)
150 		tflags |= TUNNEL_SEQ;
151 	if (flags & GRE_STRICT)
152 		tflags |= TUNNEL_STRICT;
153 	if (flags & GRE_REC)
154 		tflags |= TUNNEL_REC;
155 	if (flags & GRE_VERSION)
156 		tflags |= TUNNEL_VERSION;
157 
158 	return tflags;
159 }
160 
161 static __be16 tnl_flags_to_gre_flags(__be16 tflags)
162 {
163 	__be16 flags = 0;
164 
165 	if (tflags & TUNNEL_CSUM)
166 		flags |= GRE_CSUM;
167 	if (tflags & TUNNEL_ROUTING)
168 		flags |= GRE_ROUTING;
169 	if (tflags & TUNNEL_KEY)
170 		flags |= GRE_KEY;
171 	if (tflags & TUNNEL_SEQ)
172 		flags |= GRE_SEQ;
173 	if (tflags & TUNNEL_STRICT)
174 		flags |= GRE_STRICT;
175 	if (tflags & TUNNEL_REC)
176 		flags |= GRE_REC;
177 	if (tflags & TUNNEL_VERSION)
178 		flags |= GRE_VERSION;
179 
180 	return flags;
181 }
182 
183 static int parse_gre_header(struct sk_buff *skb, struct tnl_ptk_info *tpi,
184 			    bool *csum_err)
185 {
186 	const struct gre_base_hdr *greh;
187 	__be32 *options;
188 	int hdr_len;
189 
190 	if (unlikely(!pskb_may_pull(skb, sizeof(struct gre_base_hdr))))
191 		return -EINVAL;
192 
193 	greh = (struct gre_base_hdr *)skb_transport_header(skb);
194 	if (unlikely(greh->flags & (GRE_VERSION | GRE_ROUTING)))
195 		return -EINVAL;
196 
197 	tpi->flags = gre_flags_to_tnl_flags(greh->flags);
198 	hdr_len = ip_gre_calc_hlen(tpi->flags);
199 
200 	if (!pskb_may_pull(skb, hdr_len))
201 		return -EINVAL;
202 
203 	greh = (struct gre_base_hdr *)skb_transport_header(skb);
204 	tpi->proto = greh->protocol;
205 
206 	options = (__be32 *)(greh + 1);
207 	if (greh->flags & GRE_CSUM) {
208 		if (skb_checksum_simple_validate(skb)) {
209 			*csum_err = true;
210 			return -EINVAL;
211 		}
212 
213 		skb_checksum_try_convert(skb, IPPROTO_GRE, 0,
214 					 null_compute_pseudo);
215 		options++;
216 	}
217 
218 	if (greh->flags & GRE_KEY) {
219 		tpi->key = *options;
220 		options++;
221 	} else {
222 		tpi->key = 0;
223 	}
224 	if (unlikely(greh->flags & GRE_SEQ)) {
225 		tpi->seq = *options;
226 		options++;
227 	} else {
228 		tpi->seq = 0;
229 	}
230 	/* WCCP version 1 and 2 protocol decoding.
231 	 * - Change protocol to IP
232 	 * - When dealing with WCCPv2, Skip extra 4 bytes in GRE header
233 	 */
234 	if (greh->flags == 0 && tpi->proto == htons(ETH_P_WCCP)) {
235 		tpi->proto = htons(ETH_P_IP);
236 		if ((*(u8 *)options & 0xF0) != 0x40) {
237 			hdr_len += 4;
238 			if (!pskb_may_pull(skb, hdr_len))
239 				return -EINVAL;
240 		}
241 	}
242 	return iptunnel_pull_header(skb, hdr_len, tpi->proto);
243 }
244 
245 static void ipgre_err(struct sk_buff *skb, u32 info,
246 		      const struct tnl_ptk_info *tpi)
247 {
248 
249 	/* All the routers (except for Linux) return only
250 	   8 bytes of packet payload. It means, that precise relaying of
251 	   ICMP in the real Internet is absolutely infeasible.
252 
253 	   Moreover, Cisco "wise men" put GRE key to the third word
254 	   in GRE header. It makes impossible maintaining even soft
255 	   state for keyed GRE tunnels with enabled checksum. Tell
256 	   them "thank you".
257 
258 	   Well, I wonder, rfc1812 was written by Cisco employee,
259 	   what the hell these idiots break standards established
260 	   by themselves???
261 	   */
262 	struct net *net = dev_net(skb->dev);
263 	struct ip_tunnel_net *itn;
264 	const struct iphdr *iph;
265 	const int type = icmp_hdr(skb)->type;
266 	const int code = icmp_hdr(skb)->code;
267 	struct ip_tunnel *t;
268 
269 	switch (type) {
270 	default:
271 	case ICMP_PARAMETERPROB:
272 		return;
273 
274 	case ICMP_DEST_UNREACH:
275 		switch (code) {
276 		case ICMP_SR_FAILED:
277 		case ICMP_PORT_UNREACH:
278 			/* Impossible event. */
279 			return;
280 		default:
281 			/* All others are translated to HOST_UNREACH.
282 			   rfc2003 contains "deep thoughts" about NET_UNREACH,
283 			   I believe they are just ether pollution. --ANK
284 			 */
285 			break;
286 		}
287 		break;
288 
289 	case ICMP_TIME_EXCEEDED:
290 		if (code != ICMP_EXC_TTL)
291 			return;
292 		break;
293 
294 	case ICMP_REDIRECT:
295 		break;
296 	}
297 
298 	if (tpi->proto == htons(ETH_P_TEB))
299 		itn = net_generic(net, gre_tap_net_id);
300 	else
301 		itn = net_generic(net, ipgre_net_id);
302 
303 	iph = (const struct iphdr *)(icmp_hdr(skb) + 1);
304 	t = ip_tunnel_lookup(itn, skb->dev->ifindex, tpi->flags,
305 			     iph->daddr, iph->saddr, tpi->key);
306 
307 	if (!t)
308 		return;
309 
310 	if (t->parms.iph.daddr == 0 ||
311 	    ipv4_is_multicast(t->parms.iph.daddr))
312 		return;
313 
314 	if (t->parms.iph.ttl == 0 && type == ICMP_TIME_EXCEEDED)
315 		return;
316 
317 	if (time_before(jiffies, t->err_time + IPTUNNEL_ERR_TIMEO))
318 		t->err_count++;
319 	else
320 		t->err_count = 1;
321 	t->err_time = jiffies;
322 }
323 
324 static void gre_err(struct sk_buff *skb, u32 info)
325 {
326 	/* All the routers (except for Linux) return only
327 	 * 8 bytes of packet payload. It means, that precise relaying of
328 	 * ICMP in the real Internet is absolutely infeasible.
329 	 *
330 	 * Moreover, Cisco "wise men" put GRE key to the third word
331 	 * in GRE header. It makes impossible maintaining even soft
332 	 * state for keyed
333 	 * GRE tunnels with enabled checksum. Tell them "thank you".
334 	 *
335 	 * Well, I wonder, rfc1812 was written by Cisco employee,
336 	 * what the hell these idiots break standards established
337 	 * by themselves???
338 	 */
339 
340 	const int type = icmp_hdr(skb)->type;
341 	const int code = icmp_hdr(skb)->code;
342 	struct tnl_ptk_info tpi;
343 	bool csum_err = false;
344 
345 	if (parse_gre_header(skb, &tpi, &csum_err)) {
346 		if (!csum_err)		/* ignore csum errors. */
347 			return;
348 	}
349 
350 	if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED) {
351 		ipv4_update_pmtu(skb, dev_net(skb->dev), info,
352 				 skb->dev->ifindex, 0, IPPROTO_GRE, 0);
353 		return;
354 	}
355 	if (type == ICMP_REDIRECT) {
356 		ipv4_redirect(skb, dev_net(skb->dev), skb->dev->ifindex, 0,
357 			      IPPROTO_GRE, 0);
358 		return;
359 	}
360 
361 	ipgre_err(skb, info, &tpi);
362 }
363 
364 static __be64 key_to_tunnel_id(__be32 key)
365 {
366 #ifdef __BIG_ENDIAN
367 	return (__force __be64)((__force u32)key);
368 #else
369 	return (__force __be64)((__force u64)key << 32);
370 #endif
371 }
372 
373 /* Returns the least-significant 32 bits of a __be64. */
374 static __be32 tunnel_id_to_key(__be64 x)
375 {
376 #ifdef __BIG_ENDIAN
377 	return (__force __be32)x;
378 #else
379 	return (__force __be32)((__force u64)x >> 32);
380 #endif
381 }
382 
383 static int ipgre_rcv(struct sk_buff *skb, const struct tnl_ptk_info *tpi)
384 {
385 	struct net *net = dev_net(skb->dev);
386 	struct metadata_dst *tun_dst = NULL;
387 	struct ip_tunnel_net *itn;
388 	const struct iphdr *iph;
389 	struct ip_tunnel *tunnel;
390 
391 	if (tpi->proto == htons(ETH_P_TEB))
392 		itn = net_generic(net, gre_tap_net_id);
393 	else
394 		itn = net_generic(net, ipgre_net_id);
395 
396 	iph = ip_hdr(skb);
397 	tunnel = ip_tunnel_lookup(itn, skb->dev->ifindex, tpi->flags,
398 				  iph->saddr, iph->daddr, tpi->key);
399 
400 	if (tunnel) {
401 		skb_pop_mac_header(skb);
402 		if (tunnel->collect_md) {
403 			__be16 flags;
404 			__be64 tun_id;
405 
406 			flags = tpi->flags & (TUNNEL_CSUM | TUNNEL_KEY);
407 			tun_id = key_to_tunnel_id(tpi->key);
408 			tun_dst = ip_tun_rx_dst(skb, flags, tun_id, 0);
409 			if (!tun_dst)
410 				return PACKET_REJECT;
411 		}
412 
413 		ip_tunnel_rcv(tunnel, skb, tpi, tun_dst, log_ecn_error);
414 		return PACKET_RCVD;
415 	}
416 	return PACKET_REJECT;
417 }
418 
419 static int gre_rcv(struct sk_buff *skb)
420 {
421 	struct tnl_ptk_info tpi;
422 	bool csum_err = false;
423 
424 #ifdef CONFIG_NET_IPGRE_BROADCAST
425 	if (ipv4_is_multicast(ip_hdr(skb)->daddr)) {
426 		/* Looped back packet, drop it! */
427 		if (rt_is_output_route(skb_rtable(skb)))
428 			goto drop;
429 	}
430 #endif
431 
432 	if (parse_gre_header(skb, &tpi, &csum_err) < 0)
433 		goto drop;
434 
435 	if (ipgre_rcv(skb, &tpi) == PACKET_RCVD)
436 		return 0;
437 
438 	icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0);
439 drop:
440 	kfree_skb(skb);
441 	return 0;
442 }
443 
444 static void build_header(struct sk_buff *skb, int hdr_len, __be16 flags,
445 			 __be16 proto, __be32 key, __be32 seq)
446 {
447 	struct gre_base_hdr *greh;
448 
449 	skb_push(skb, hdr_len);
450 
451 	skb_reset_transport_header(skb);
452 	greh = (struct gre_base_hdr *)skb->data;
453 	greh->flags = tnl_flags_to_gre_flags(flags);
454 	greh->protocol = proto;
455 
456 	if (flags & (TUNNEL_KEY | TUNNEL_CSUM | TUNNEL_SEQ)) {
457 		__be32 *ptr = (__be32 *)(((u8 *)greh) + hdr_len - 4);
458 
459 		if (flags & TUNNEL_SEQ) {
460 			*ptr = seq;
461 			ptr--;
462 		}
463 		if (flags & TUNNEL_KEY) {
464 			*ptr = key;
465 			ptr--;
466 		}
467 		if (flags & TUNNEL_CSUM &&
468 		    !(skb_shinfo(skb)->gso_type &
469 		      (SKB_GSO_GRE | SKB_GSO_GRE_CSUM))) {
470 			*ptr = 0;
471 			*(__sum16 *)ptr = csum_fold(skb_checksum(skb, 0,
472 								 skb->len, 0));
473 		}
474 	}
475 }
476 
477 static void __gre_xmit(struct sk_buff *skb, struct net_device *dev,
478 		       const struct iphdr *tnl_params,
479 		       __be16 proto)
480 {
481 	struct ip_tunnel *tunnel = netdev_priv(dev);
482 
483 	if (tunnel->parms.o_flags & TUNNEL_SEQ)
484 		tunnel->o_seqno++;
485 
486 	/* Push GRE header. */
487 	build_header(skb, tunnel->tun_hlen, tunnel->parms.o_flags,
488 		     proto, tunnel->parms.o_key, htonl(tunnel->o_seqno));
489 
490 	skb_set_inner_protocol(skb, proto);
491 	ip_tunnel_xmit(skb, dev, tnl_params, tnl_params->protocol);
492 }
493 
494 static struct sk_buff *gre_handle_offloads(struct sk_buff *skb,
495 					   bool csum)
496 {
497 	return iptunnel_handle_offloads(skb, csum,
498 					csum ? SKB_GSO_GRE_CSUM : SKB_GSO_GRE);
499 }
500 
501 static void gre_fb_xmit(struct sk_buff *skb, struct net_device *dev)
502 {
503 	struct ip_tunnel_info *tun_info;
504 	struct net *net = dev_net(dev);
505 	const struct ip_tunnel_key *key;
506 	struct flowi4 fl;
507 	struct rtable *rt;
508 	int min_headroom;
509 	int tunnel_hlen;
510 	__be16 df, flags;
511 	int err;
512 
513 	tun_info = skb_tunnel_info(skb);
514 	if (unlikely(!tun_info || !(tun_info->mode & IP_TUNNEL_INFO_TX) ||
515 		     ip_tunnel_info_af(tun_info) != AF_INET))
516 		goto err_free_skb;
517 
518 	key = &tun_info->key;
519 	memset(&fl, 0, sizeof(fl));
520 	fl.daddr = key->u.ipv4.dst;
521 	fl.saddr = key->u.ipv4.src;
522 	fl.flowi4_tos = RT_TOS(key->tos);
523 	fl.flowi4_mark = skb->mark;
524 	fl.flowi4_proto = IPPROTO_GRE;
525 
526 	rt = ip_route_output_key(net, &fl);
527 	if (IS_ERR(rt))
528 		goto err_free_skb;
529 
530 	tunnel_hlen = ip_gre_calc_hlen(key->tun_flags);
531 
532 	min_headroom = LL_RESERVED_SPACE(rt->dst.dev) + rt->dst.header_len
533 			+ tunnel_hlen + sizeof(struct iphdr);
534 	if (skb_headroom(skb) < min_headroom || skb_header_cloned(skb)) {
535 		int head_delta = SKB_DATA_ALIGN(min_headroom -
536 						skb_headroom(skb) +
537 						16);
538 		err = pskb_expand_head(skb, max_t(int, head_delta, 0),
539 				       0, GFP_ATOMIC);
540 		if (unlikely(err))
541 			goto err_free_rt;
542 	}
543 
544 	/* Push Tunnel header. */
545 	skb = gre_handle_offloads(skb, !!(tun_info->key.tun_flags & TUNNEL_CSUM));
546 	if (IS_ERR(skb)) {
547 		skb = NULL;
548 		goto err_free_rt;
549 	}
550 
551 	flags = tun_info->key.tun_flags & (TUNNEL_CSUM | TUNNEL_KEY);
552 	build_header(skb, tunnel_hlen, flags, htons(ETH_P_TEB),
553 		     tunnel_id_to_key(tun_info->key.tun_id), 0);
554 
555 	df = key->tun_flags & TUNNEL_DONT_FRAGMENT ?  htons(IP_DF) : 0;
556 	err = iptunnel_xmit(skb->sk, rt, skb, fl.saddr,
557 			    key->u.ipv4.dst, IPPROTO_GRE,
558 			    key->tos, key->ttl, df, false);
559 	iptunnel_xmit_stats(err, &dev->stats, dev->tstats);
560 	return;
561 
562 err_free_rt:
563 	ip_rt_put(rt);
564 err_free_skb:
565 	kfree_skb(skb);
566 	dev->stats.tx_dropped++;
567 }
568 
569 static netdev_tx_t ipgre_xmit(struct sk_buff *skb,
570 			      struct net_device *dev)
571 {
572 	struct ip_tunnel *tunnel = netdev_priv(dev);
573 	const struct iphdr *tnl_params;
574 
575 	if (tunnel->collect_md) {
576 		gre_fb_xmit(skb, dev);
577 		return NETDEV_TX_OK;
578 	}
579 
580 	if (dev->header_ops) {
581 		/* Need space for new headers */
582 		if (skb_cow_head(skb, dev->needed_headroom -
583 				      (tunnel->hlen + sizeof(struct iphdr))))
584 			goto free_skb;
585 
586 		tnl_params = (const struct iphdr *)skb->data;
587 
588 		/* Pull skb since ip_tunnel_xmit() needs skb->data pointing
589 		 * to gre header.
590 		 */
591 		skb_pull(skb, tunnel->hlen + sizeof(struct iphdr));
592 		skb_reset_mac_header(skb);
593 	} else {
594 		if (skb_cow_head(skb, dev->needed_headroom))
595 			goto free_skb;
596 
597 		tnl_params = &tunnel->parms.iph;
598 	}
599 
600 	skb = gre_handle_offloads(skb, !!(tunnel->parms.o_flags&TUNNEL_CSUM));
601 	if (IS_ERR(skb))
602 		goto out;
603 
604 	__gre_xmit(skb, dev, tnl_params, skb->protocol);
605 	return NETDEV_TX_OK;
606 
607 free_skb:
608 	kfree_skb(skb);
609 out:
610 	dev->stats.tx_dropped++;
611 	return NETDEV_TX_OK;
612 }
613 
614 static netdev_tx_t gre_tap_xmit(struct sk_buff *skb,
615 				struct net_device *dev)
616 {
617 	struct ip_tunnel *tunnel = netdev_priv(dev);
618 
619 	if (tunnel->collect_md) {
620 		gre_fb_xmit(skb, dev);
621 		return NETDEV_TX_OK;
622 	}
623 
624 	skb = gre_handle_offloads(skb, !!(tunnel->parms.o_flags&TUNNEL_CSUM));
625 	if (IS_ERR(skb))
626 		goto out;
627 
628 	if (skb_cow_head(skb, dev->needed_headroom))
629 		goto free_skb;
630 
631 	__gre_xmit(skb, dev, &tunnel->parms.iph, htons(ETH_P_TEB));
632 	return NETDEV_TX_OK;
633 
634 free_skb:
635 	kfree_skb(skb);
636 out:
637 	dev->stats.tx_dropped++;
638 	return NETDEV_TX_OK;
639 }
640 
641 static int ipgre_tunnel_ioctl(struct net_device *dev,
642 			      struct ifreq *ifr, int cmd)
643 {
644 	int err;
645 	struct ip_tunnel_parm p;
646 
647 	if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
648 		return -EFAULT;
649 	if (cmd == SIOCADDTUNNEL || cmd == SIOCCHGTUNNEL) {
650 		if (p.iph.version != 4 || p.iph.protocol != IPPROTO_GRE ||
651 		    p.iph.ihl != 5 || (p.iph.frag_off&htons(~IP_DF)) ||
652 		    ((p.i_flags|p.o_flags)&(GRE_VERSION|GRE_ROUTING)))
653 			return -EINVAL;
654 	}
655 	p.i_flags = gre_flags_to_tnl_flags(p.i_flags);
656 	p.o_flags = gre_flags_to_tnl_flags(p.o_flags);
657 
658 	err = ip_tunnel_ioctl(dev, &p, cmd);
659 	if (err)
660 		return err;
661 
662 	p.i_flags = tnl_flags_to_gre_flags(p.i_flags);
663 	p.o_flags = tnl_flags_to_gre_flags(p.o_flags);
664 
665 	if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p)))
666 		return -EFAULT;
667 	return 0;
668 }
669 
670 /* Nice toy. Unfortunately, useless in real life :-)
671    It allows to construct virtual multiprotocol broadcast "LAN"
672    over the Internet, provided multicast routing is tuned.
673 
674 
675    I have no idea was this bicycle invented before me,
676    so that I had to set ARPHRD_IPGRE to a random value.
677    I have an impression, that Cisco could make something similar,
678    but this feature is apparently missing in IOS<=11.2(8).
679 
680    I set up 10.66.66/24 and fec0:6666:6666::0/96 as virtual networks
681    with broadcast 224.66.66.66. If you have access to mbone, play with me :-)
682 
683    ping -t 255 224.66.66.66
684 
685    If nobody answers, mbone does not work.
686 
687    ip tunnel add Universe mode gre remote 224.66.66.66 local <Your_real_addr> ttl 255
688    ip addr add 10.66.66.<somewhat>/24 dev Universe
689    ifconfig Universe up
690    ifconfig Universe add fe80::<Your_real_addr>/10
691    ifconfig Universe add fec0:6666:6666::<Your_real_addr>/96
692    ftp 10.66.66.66
693    ...
694    ftp fec0:6666:6666::193.233.7.65
695    ...
696  */
697 static int ipgre_header(struct sk_buff *skb, struct net_device *dev,
698 			unsigned short type,
699 			const void *daddr, const void *saddr, unsigned int len)
700 {
701 	struct ip_tunnel *t = netdev_priv(dev);
702 	struct iphdr *iph;
703 	struct gre_base_hdr *greh;
704 
705 	iph = (struct iphdr *)skb_push(skb, t->hlen + sizeof(*iph));
706 	greh = (struct gre_base_hdr *)(iph+1);
707 	greh->flags = tnl_flags_to_gre_flags(t->parms.o_flags);
708 	greh->protocol = htons(type);
709 
710 	memcpy(iph, &t->parms.iph, sizeof(struct iphdr));
711 
712 	/* Set the source hardware address. */
713 	if (saddr)
714 		memcpy(&iph->saddr, saddr, 4);
715 	if (daddr)
716 		memcpy(&iph->daddr, daddr, 4);
717 	if (iph->daddr)
718 		return t->hlen + sizeof(*iph);
719 
720 	return -(t->hlen + sizeof(*iph));
721 }
722 
723 static int ipgre_header_parse(const struct sk_buff *skb, unsigned char *haddr)
724 {
725 	const struct iphdr *iph = (const struct iphdr *) skb_mac_header(skb);
726 	memcpy(haddr, &iph->saddr, 4);
727 	return 4;
728 }
729 
730 static const struct header_ops ipgre_header_ops = {
731 	.create	= ipgre_header,
732 	.parse	= ipgre_header_parse,
733 };
734 
735 #ifdef CONFIG_NET_IPGRE_BROADCAST
736 static int ipgre_open(struct net_device *dev)
737 {
738 	struct ip_tunnel *t = netdev_priv(dev);
739 
740 	if (ipv4_is_multicast(t->parms.iph.daddr)) {
741 		struct flowi4 fl4;
742 		struct rtable *rt;
743 
744 		rt = ip_route_output_gre(t->net, &fl4,
745 					 t->parms.iph.daddr,
746 					 t->parms.iph.saddr,
747 					 t->parms.o_key,
748 					 RT_TOS(t->parms.iph.tos),
749 					 t->parms.link);
750 		if (IS_ERR(rt))
751 			return -EADDRNOTAVAIL;
752 		dev = rt->dst.dev;
753 		ip_rt_put(rt);
754 		if (!__in_dev_get_rtnl(dev))
755 			return -EADDRNOTAVAIL;
756 		t->mlink = dev->ifindex;
757 		ip_mc_inc_group(__in_dev_get_rtnl(dev), t->parms.iph.daddr);
758 	}
759 	return 0;
760 }
761 
762 static int ipgre_close(struct net_device *dev)
763 {
764 	struct ip_tunnel *t = netdev_priv(dev);
765 
766 	if (ipv4_is_multicast(t->parms.iph.daddr) && t->mlink) {
767 		struct in_device *in_dev;
768 		in_dev = inetdev_by_index(t->net, t->mlink);
769 		if (in_dev)
770 			ip_mc_dec_group(in_dev, t->parms.iph.daddr);
771 	}
772 	return 0;
773 }
774 #endif
775 
776 static const struct net_device_ops ipgre_netdev_ops = {
777 	.ndo_init		= ipgre_tunnel_init,
778 	.ndo_uninit		= ip_tunnel_uninit,
779 #ifdef CONFIG_NET_IPGRE_BROADCAST
780 	.ndo_open		= ipgre_open,
781 	.ndo_stop		= ipgre_close,
782 #endif
783 	.ndo_start_xmit		= ipgre_xmit,
784 	.ndo_do_ioctl		= ipgre_tunnel_ioctl,
785 	.ndo_change_mtu		= ip_tunnel_change_mtu,
786 	.ndo_get_stats64	= ip_tunnel_get_stats64,
787 	.ndo_get_iflink		= ip_tunnel_get_iflink,
788 };
789 
790 #define GRE_FEATURES (NETIF_F_SG |		\
791 		      NETIF_F_FRAGLIST |	\
792 		      NETIF_F_HIGHDMA |		\
793 		      NETIF_F_HW_CSUM)
794 
795 static void ipgre_tunnel_setup(struct net_device *dev)
796 {
797 	dev->netdev_ops		= &ipgre_netdev_ops;
798 	dev->type		= ARPHRD_IPGRE;
799 	ip_tunnel_setup(dev, ipgre_net_id);
800 }
801 
802 static void __gre_tunnel_init(struct net_device *dev)
803 {
804 	struct ip_tunnel *tunnel;
805 	int t_hlen;
806 
807 	tunnel = netdev_priv(dev);
808 	tunnel->tun_hlen = ip_gre_calc_hlen(tunnel->parms.o_flags);
809 	tunnel->parms.iph.protocol = IPPROTO_GRE;
810 
811 	tunnel->hlen = tunnel->tun_hlen + tunnel->encap_hlen;
812 
813 	t_hlen = tunnel->hlen + sizeof(struct iphdr);
814 
815 	dev->needed_headroom	= LL_MAX_HEADER + t_hlen + 4;
816 	dev->mtu		= ETH_DATA_LEN - t_hlen - 4;
817 
818 	dev->features		|= GRE_FEATURES;
819 	dev->hw_features	|= GRE_FEATURES;
820 
821 	if (!(tunnel->parms.o_flags & TUNNEL_SEQ)) {
822 		/* TCP offload with GRE SEQ is not supported. */
823 		dev->features    |= NETIF_F_GSO_SOFTWARE;
824 		dev->hw_features |= NETIF_F_GSO_SOFTWARE;
825 		/* Can use a lockless transmit, unless we generate
826 		 * output sequences
827 		 */
828 		dev->features |= NETIF_F_LLTX;
829 	}
830 }
831 
832 static int ipgre_tunnel_init(struct net_device *dev)
833 {
834 	struct ip_tunnel *tunnel = netdev_priv(dev);
835 	struct iphdr *iph = &tunnel->parms.iph;
836 
837 	__gre_tunnel_init(dev);
838 
839 	memcpy(dev->dev_addr, &iph->saddr, 4);
840 	memcpy(dev->broadcast, &iph->daddr, 4);
841 
842 	dev->flags		= IFF_NOARP;
843 	netif_keep_dst(dev);
844 	dev->addr_len		= 4;
845 
846 	if (iph->daddr) {
847 #ifdef CONFIG_NET_IPGRE_BROADCAST
848 		if (ipv4_is_multicast(iph->daddr)) {
849 			if (!iph->saddr)
850 				return -EINVAL;
851 			dev->flags = IFF_BROADCAST;
852 			dev->header_ops = &ipgre_header_ops;
853 		}
854 #endif
855 	} else
856 		dev->header_ops = &ipgre_header_ops;
857 
858 	return ip_tunnel_init(dev);
859 }
860 
861 static const struct gre_protocol ipgre_protocol = {
862 	.handler     = gre_rcv,
863 	.err_handler = gre_err,
864 };
865 
866 static int __net_init ipgre_init_net(struct net *net)
867 {
868 	return ip_tunnel_init_net(net, ipgre_net_id, &ipgre_link_ops, NULL);
869 }
870 
871 static void __net_exit ipgre_exit_net(struct net *net)
872 {
873 	struct ip_tunnel_net *itn = net_generic(net, ipgre_net_id);
874 	ip_tunnel_delete_net(itn, &ipgre_link_ops);
875 }
876 
877 static struct pernet_operations ipgre_net_ops = {
878 	.init = ipgre_init_net,
879 	.exit = ipgre_exit_net,
880 	.id   = &ipgre_net_id,
881 	.size = sizeof(struct ip_tunnel_net),
882 };
883 
884 static int ipgre_tunnel_validate(struct nlattr *tb[], struct nlattr *data[])
885 {
886 	__be16 flags;
887 
888 	if (!data)
889 		return 0;
890 
891 	flags = 0;
892 	if (data[IFLA_GRE_IFLAGS])
893 		flags |= nla_get_be16(data[IFLA_GRE_IFLAGS]);
894 	if (data[IFLA_GRE_OFLAGS])
895 		flags |= nla_get_be16(data[IFLA_GRE_OFLAGS]);
896 	if (flags & (GRE_VERSION|GRE_ROUTING))
897 		return -EINVAL;
898 
899 	return 0;
900 }
901 
902 static int ipgre_tap_validate(struct nlattr *tb[], struct nlattr *data[])
903 {
904 	__be32 daddr;
905 
906 	if (tb[IFLA_ADDRESS]) {
907 		if (nla_len(tb[IFLA_ADDRESS]) != ETH_ALEN)
908 			return -EINVAL;
909 		if (!is_valid_ether_addr(nla_data(tb[IFLA_ADDRESS])))
910 			return -EADDRNOTAVAIL;
911 	}
912 
913 	if (!data)
914 		goto out;
915 
916 	if (data[IFLA_GRE_REMOTE]) {
917 		memcpy(&daddr, nla_data(data[IFLA_GRE_REMOTE]), 4);
918 		if (!daddr)
919 			return -EINVAL;
920 	}
921 
922 out:
923 	return ipgre_tunnel_validate(tb, data);
924 }
925 
926 static void ipgre_netlink_parms(struct net_device *dev,
927 				struct nlattr *data[],
928 				struct nlattr *tb[],
929 				struct ip_tunnel_parm *parms)
930 {
931 	memset(parms, 0, sizeof(*parms));
932 
933 	parms->iph.protocol = IPPROTO_GRE;
934 
935 	if (!data)
936 		return;
937 
938 	if (data[IFLA_GRE_LINK])
939 		parms->link = nla_get_u32(data[IFLA_GRE_LINK]);
940 
941 	if (data[IFLA_GRE_IFLAGS])
942 		parms->i_flags = gre_flags_to_tnl_flags(nla_get_be16(data[IFLA_GRE_IFLAGS]));
943 
944 	if (data[IFLA_GRE_OFLAGS])
945 		parms->o_flags = gre_flags_to_tnl_flags(nla_get_be16(data[IFLA_GRE_OFLAGS]));
946 
947 	if (data[IFLA_GRE_IKEY])
948 		parms->i_key = nla_get_be32(data[IFLA_GRE_IKEY]);
949 
950 	if (data[IFLA_GRE_OKEY])
951 		parms->o_key = nla_get_be32(data[IFLA_GRE_OKEY]);
952 
953 	if (data[IFLA_GRE_LOCAL])
954 		parms->iph.saddr = nla_get_in_addr(data[IFLA_GRE_LOCAL]);
955 
956 	if (data[IFLA_GRE_REMOTE])
957 		parms->iph.daddr = nla_get_in_addr(data[IFLA_GRE_REMOTE]);
958 
959 	if (data[IFLA_GRE_TTL])
960 		parms->iph.ttl = nla_get_u8(data[IFLA_GRE_TTL]);
961 
962 	if (data[IFLA_GRE_TOS])
963 		parms->iph.tos = nla_get_u8(data[IFLA_GRE_TOS]);
964 
965 	if (!data[IFLA_GRE_PMTUDISC] || nla_get_u8(data[IFLA_GRE_PMTUDISC]))
966 		parms->iph.frag_off = htons(IP_DF);
967 
968 	if (data[IFLA_GRE_COLLECT_METADATA]) {
969 		struct ip_tunnel *t = netdev_priv(dev);
970 
971 		t->collect_md = true;
972 	}
973 }
974 
975 /* This function returns true when ENCAP attributes are present in the nl msg */
976 static bool ipgre_netlink_encap_parms(struct nlattr *data[],
977 				      struct ip_tunnel_encap *ipencap)
978 {
979 	bool ret = false;
980 
981 	memset(ipencap, 0, sizeof(*ipencap));
982 
983 	if (!data)
984 		return ret;
985 
986 	if (data[IFLA_GRE_ENCAP_TYPE]) {
987 		ret = true;
988 		ipencap->type = nla_get_u16(data[IFLA_GRE_ENCAP_TYPE]);
989 	}
990 
991 	if (data[IFLA_GRE_ENCAP_FLAGS]) {
992 		ret = true;
993 		ipencap->flags = nla_get_u16(data[IFLA_GRE_ENCAP_FLAGS]);
994 	}
995 
996 	if (data[IFLA_GRE_ENCAP_SPORT]) {
997 		ret = true;
998 		ipencap->sport = nla_get_be16(data[IFLA_GRE_ENCAP_SPORT]);
999 	}
1000 
1001 	if (data[IFLA_GRE_ENCAP_DPORT]) {
1002 		ret = true;
1003 		ipencap->dport = nla_get_be16(data[IFLA_GRE_ENCAP_DPORT]);
1004 	}
1005 
1006 	return ret;
1007 }
1008 
1009 static int gre_tap_init(struct net_device *dev)
1010 {
1011 	__gre_tunnel_init(dev);
1012 	dev->priv_flags |= IFF_LIVE_ADDR_CHANGE;
1013 
1014 	return ip_tunnel_init(dev);
1015 }
1016 
1017 static const struct net_device_ops gre_tap_netdev_ops = {
1018 	.ndo_init		= gre_tap_init,
1019 	.ndo_uninit		= ip_tunnel_uninit,
1020 	.ndo_start_xmit		= gre_tap_xmit,
1021 	.ndo_set_mac_address 	= eth_mac_addr,
1022 	.ndo_validate_addr	= eth_validate_addr,
1023 	.ndo_change_mtu		= ip_tunnel_change_mtu,
1024 	.ndo_get_stats64	= ip_tunnel_get_stats64,
1025 	.ndo_get_iflink		= ip_tunnel_get_iflink,
1026 };
1027 
1028 static void ipgre_tap_setup(struct net_device *dev)
1029 {
1030 	ether_setup(dev);
1031 	dev->netdev_ops		= &gre_tap_netdev_ops;
1032 	dev->priv_flags 	|= IFF_LIVE_ADDR_CHANGE;
1033 	ip_tunnel_setup(dev, gre_tap_net_id);
1034 }
1035 
1036 static int ipgre_newlink(struct net *src_net, struct net_device *dev,
1037 			 struct nlattr *tb[], struct nlattr *data[])
1038 {
1039 	struct ip_tunnel_parm p;
1040 	struct ip_tunnel_encap ipencap;
1041 
1042 	if (ipgre_netlink_encap_parms(data, &ipencap)) {
1043 		struct ip_tunnel *t = netdev_priv(dev);
1044 		int err = ip_tunnel_encap_setup(t, &ipencap);
1045 
1046 		if (err < 0)
1047 			return err;
1048 	}
1049 
1050 	ipgre_netlink_parms(dev, data, tb, &p);
1051 	return ip_tunnel_newlink(dev, tb, &p);
1052 }
1053 
1054 static int ipgre_changelink(struct net_device *dev, struct nlattr *tb[],
1055 			    struct nlattr *data[])
1056 {
1057 	struct ip_tunnel_parm p;
1058 	struct ip_tunnel_encap ipencap;
1059 
1060 	if (ipgre_netlink_encap_parms(data, &ipencap)) {
1061 		struct ip_tunnel *t = netdev_priv(dev);
1062 		int err = ip_tunnel_encap_setup(t, &ipencap);
1063 
1064 		if (err < 0)
1065 			return err;
1066 	}
1067 
1068 	ipgre_netlink_parms(dev, data, tb, &p);
1069 	return ip_tunnel_changelink(dev, tb, &p);
1070 }
1071 
1072 static size_t ipgre_get_size(const struct net_device *dev)
1073 {
1074 	return
1075 		/* IFLA_GRE_LINK */
1076 		nla_total_size(4) +
1077 		/* IFLA_GRE_IFLAGS */
1078 		nla_total_size(2) +
1079 		/* IFLA_GRE_OFLAGS */
1080 		nla_total_size(2) +
1081 		/* IFLA_GRE_IKEY */
1082 		nla_total_size(4) +
1083 		/* IFLA_GRE_OKEY */
1084 		nla_total_size(4) +
1085 		/* IFLA_GRE_LOCAL */
1086 		nla_total_size(4) +
1087 		/* IFLA_GRE_REMOTE */
1088 		nla_total_size(4) +
1089 		/* IFLA_GRE_TTL */
1090 		nla_total_size(1) +
1091 		/* IFLA_GRE_TOS */
1092 		nla_total_size(1) +
1093 		/* IFLA_GRE_PMTUDISC */
1094 		nla_total_size(1) +
1095 		/* IFLA_GRE_ENCAP_TYPE */
1096 		nla_total_size(2) +
1097 		/* IFLA_GRE_ENCAP_FLAGS */
1098 		nla_total_size(2) +
1099 		/* IFLA_GRE_ENCAP_SPORT */
1100 		nla_total_size(2) +
1101 		/* IFLA_GRE_ENCAP_DPORT */
1102 		nla_total_size(2) +
1103 		/* IFLA_GRE_COLLECT_METADATA */
1104 		nla_total_size(0) +
1105 		0;
1106 }
1107 
1108 static int ipgre_fill_info(struct sk_buff *skb, const struct net_device *dev)
1109 {
1110 	struct ip_tunnel *t = netdev_priv(dev);
1111 	struct ip_tunnel_parm *p = &t->parms;
1112 
1113 	if (nla_put_u32(skb, IFLA_GRE_LINK, p->link) ||
1114 	    nla_put_be16(skb, IFLA_GRE_IFLAGS, tnl_flags_to_gre_flags(p->i_flags)) ||
1115 	    nla_put_be16(skb, IFLA_GRE_OFLAGS, tnl_flags_to_gre_flags(p->o_flags)) ||
1116 	    nla_put_be32(skb, IFLA_GRE_IKEY, p->i_key) ||
1117 	    nla_put_be32(skb, IFLA_GRE_OKEY, p->o_key) ||
1118 	    nla_put_in_addr(skb, IFLA_GRE_LOCAL, p->iph.saddr) ||
1119 	    nla_put_in_addr(skb, IFLA_GRE_REMOTE, p->iph.daddr) ||
1120 	    nla_put_u8(skb, IFLA_GRE_TTL, p->iph.ttl) ||
1121 	    nla_put_u8(skb, IFLA_GRE_TOS, p->iph.tos) ||
1122 	    nla_put_u8(skb, IFLA_GRE_PMTUDISC,
1123 		       !!(p->iph.frag_off & htons(IP_DF))))
1124 		goto nla_put_failure;
1125 
1126 	if (nla_put_u16(skb, IFLA_GRE_ENCAP_TYPE,
1127 			t->encap.type) ||
1128 	    nla_put_be16(skb, IFLA_GRE_ENCAP_SPORT,
1129 			 t->encap.sport) ||
1130 	    nla_put_be16(skb, IFLA_GRE_ENCAP_DPORT,
1131 			 t->encap.dport) ||
1132 	    nla_put_u16(skb, IFLA_GRE_ENCAP_FLAGS,
1133 			t->encap.flags))
1134 		goto nla_put_failure;
1135 
1136 	if (t->collect_md) {
1137 		if (nla_put_flag(skb, IFLA_GRE_COLLECT_METADATA))
1138 			goto nla_put_failure;
1139 	}
1140 
1141 	return 0;
1142 
1143 nla_put_failure:
1144 	return -EMSGSIZE;
1145 }
1146 
1147 static const struct nla_policy ipgre_policy[IFLA_GRE_MAX + 1] = {
1148 	[IFLA_GRE_LINK]		= { .type = NLA_U32 },
1149 	[IFLA_GRE_IFLAGS]	= { .type = NLA_U16 },
1150 	[IFLA_GRE_OFLAGS]	= { .type = NLA_U16 },
1151 	[IFLA_GRE_IKEY]		= { .type = NLA_U32 },
1152 	[IFLA_GRE_OKEY]		= { .type = NLA_U32 },
1153 	[IFLA_GRE_LOCAL]	= { .len = FIELD_SIZEOF(struct iphdr, saddr) },
1154 	[IFLA_GRE_REMOTE]	= { .len = FIELD_SIZEOF(struct iphdr, daddr) },
1155 	[IFLA_GRE_TTL]		= { .type = NLA_U8 },
1156 	[IFLA_GRE_TOS]		= { .type = NLA_U8 },
1157 	[IFLA_GRE_PMTUDISC]	= { .type = NLA_U8 },
1158 	[IFLA_GRE_ENCAP_TYPE]	= { .type = NLA_U16 },
1159 	[IFLA_GRE_ENCAP_FLAGS]	= { .type = NLA_U16 },
1160 	[IFLA_GRE_ENCAP_SPORT]	= { .type = NLA_U16 },
1161 	[IFLA_GRE_ENCAP_DPORT]	= { .type = NLA_U16 },
1162 	[IFLA_GRE_COLLECT_METADATA]	= { .type = NLA_FLAG },
1163 };
1164 
1165 static struct rtnl_link_ops ipgre_link_ops __read_mostly = {
1166 	.kind		= "gre",
1167 	.maxtype	= IFLA_GRE_MAX,
1168 	.policy		= ipgre_policy,
1169 	.priv_size	= sizeof(struct ip_tunnel),
1170 	.setup		= ipgre_tunnel_setup,
1171 	.validate	= ipgre_tunnel_validate,
1172 	.newlink	= ipgre_newlink,
1173 	.changelink	= ipgre_changelink,
1174 	.dellink	= ip_tunnel_dellink,
1175 	.get_size	= ipgre_get_size,
1176 	.fill_info	= ipgre_fill_info,
1177 	.get_link_net	= ip_tunnel_get_link_net,
1178 };
1179 
1180 static struct rtnl_link_ops ipgre_tap_ops __read_mostly = {
1181 	.kind		= "gretap",
1182 	.maxtype	= IFLA_GRE_MAX,
1183 	.policy		= ipgre_policy,
1184 	.priv_size	= sizeof(struct ip_tunnel),
1185 	.setup		= ipgre_tap_setup,
1186 	.validate	= ipgre_tap_validate,
1187 	.newlink	= ipgre_newlink,
1188 	.changelink	= ipgre_changelink,
1189 	.dellink	= ip_tunnel_dellink,
1190 	.get_size	= ipgre_get_size,
1191 	.fill_info	= ipgre_fill_info,
1192 	.get_link_net	= ip_tunnel_get_link_net,
1193 };
1194 
1195 struct net_device *gretap_fb_dev_create(struct net *net, const char *name,
1196 					u8 name_assign_type)
1197 {
1198 	struct nlattr *tb[IFLA_MAX + 1];
1199 	struct net_device *dev;
1200 	struct ip_tunnel *t;
1201 	int err;
1202 
1203 	memset(&tb, 0, sizeof(tb));
1204 
1205 	dev = rtnl_create_link(net, name, name_assign_type,
1206 			       &ipgre_tap_ops, tb);
1207 	if (IS_ERR(dev))
1208 		return dev;
1209 
1210 	/* Configure flow based GRE device. */
1211 	t = netdev_priv(dev);
1212 	t->collect_md = true;
1213 
1214 	err = ipgre_newlink(net, dev, tb, NULL);
1215 	if (err < 0)
1216 		goto out;
1217 	return dev;
1218 out:
1219 	free_netdev(dev);
1220 	return ERR_PTR(err);
1221 }
1222 EXPORT_SYMBOL_GPL(gretap_fb_dev_create);
1223 
1224 static int __net_init ipgre_tap_init_net(struct net *net)
1225 {
1226 	return ip_tunnel_init_net(net, gre_tap_net_id, &ipgre_tap_ops, "gretap0");
1227 }
1228 
1229 static void __net_exit ipgre_tap_exit_net(struct net *net)
1230 {
1231 	struct ip_tunnel_net *itn = net_generic(net, gre_tap_net_id);
1232 	ip_tunnel_delete_net(itn, &ipgre_tap_ops);
1233 }
1234 
1235 static struct pernet_operations ipgre_tap_net_ops = {
1236 	.init = ipgre_tap_init_net,
1237 	.exit = ipgre_tap_exit_net,
1238 	.id   = &gre_tap_net_id,
1239 	.size = sizeof(struct ip_tunnel_net),
1240 };
1241 
1242 static int __init ipgre_init(void)
1243 {
1244 	int err;
1245 
1246 	pr_info("GRE over IPv4 tunneling driver\n");
1247 
1248 	err = register_pernet_device(&ipgre_net_ops);
1249 	if (err < 0)
1250 		return err;
1251 
1252 	err = register_pernet_device(&ipgre_tap_net_ops);
1253 	if (err < 0)
1254 		goto pnet_tap_faied;
1255 
1256 	err = gre_add_protocol(&ipgre_protocol, GREPROTO_CISCO);
1257 	if (err < 0) {
1258 		pr_info("%s: can't add protocol\n", __func__);
1259 		goto add_proto_failed;
1260 	}
1261 
1262 	err = rtnl_link_register(&ipgre_link_ops);
1263 	if (err < 0)
1264 		goto rtnl_link_failed;
1265 
1266 	err = rtnl_link_register(&ipgre_tap_ops);
1267 	if (err < 0)
1268 		goto tap_ops_failed;
1269 
1270 	return 0;
1271 
1272 tap_ops_failed:
1273 	rtnl_link_unregister(&ipgre_link_ops);
1274 rtnl_link_failed:
1275 	gre_del_protocol(&ipgre_protocol, GREPROTO_CISCO);
1276 add_proto_failed:
1277 	unregister_pernet_device(&ipgre_tap_net_ops);
1278 pnet_tap_faied:
1279 	unregister_pernet_device(&ipgre_net_ops);
1280 	return err;
1281 }
1282 
1283 static void __exit ipgre_fini(void)
1284 {
1285 	rtnl_link_unregister(&ipgre_tap_ops);
1286 	rtnl_link_unregister(&ipgre_link_ops);
1287 	gre_del_protocol(&ipgre_protocol, GREPROTO_CISCO);
1288 	unregister_pernet_device(&ipgre_tap_net_ops);
1289 	unregister_pernet_device(&ipgre_net_ops);
1290 }
1291 
1292 module_init(ipgre_init);
1293 module_exit(ipgre_fini);
1294 MODULE_LICENSE("GPL");
1295 MODULE_ALIAS_RTNL_LINK("gre");
1296 MODULE_ALIAS_RTNL_LINK("gretap");
1297 MODULE_ALIAS_NETDEV("gre0");
1298 MODULE_ALIAS_NETDEV("gretap0");
1299