1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* 3 * Linux NET3: GRE over IP protocol decoder. 4 * 5 * Authors: Alexey Kuznetsov (kuznet@ms2.inr.ac.ru) 6 */ 7 8 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 9 10 #include <linux/capability.h> 11 #include <linux/module.h> 12 #include <linux/types.h> 13 #include <linux/kernel.h> 14 #include <linux/slab.h> 15 #include <linux/uaccess.h> 16 #include <linux/skbuff.h> 17 #include <linux/netdevice.h> 18 #include <linux/in.h> 19 #include <linux/tcp.h> 20 #include <linux/udp.h> 21 #include <linux/if_arp.h> 22 #include <linux/if_vlan.h> 23 #include <linux/init.h> 24 #include <linux/in6.h> 25 #include <linux/inetdevice.h> 26 #include <linux/igmp.h> 27 #include <linux/netfilter_ipv4.h> 28 #include <linux/etherdevice.h> 29 #include <linux/if_ether.h> 30 31 #include <net/sock.h> 32 #include <net/ip.h> 33 #include <net/icmp.h> 34 #include <net/protocol.h> 35 #include <net/ip_tunnels.h> 36 #include <net/arp.h> 37 #include <net/checksum.h> 38 #include <net/dsfield.h> 39 #include <net/inet_ecn.h> 40 #include <net/xfrm.h> 41 #include <net/net_namespace.h> 42 #include <net/netns/generic.h> 43 #include <net/rtnetlink.h> 44 #include <net/gre.h> 45 #include <net/dst_metadata.h> 46 #include <net/erspan.h> 47 48 /* 49 Problems & solutions 50 -------------------- 51 52 1. The most important issue is detecting local dead loops. 53 They would cause complete host lockup in transmit, which 54 would be "resolved" by stack overflow or, if queueing is enabled, 55 with infinite looping in net_bh. 56 57 We cannot track such dead loops during route installation, 58 it is infeasible task. The most general solutions would be 59 to keep skb->encapsulation counter (sort of local ttl), 60 and silently drop packet when it expires. It is a good 61 solution, but it supposes maintaining new variable in ALL 62 skb, even if no tunneling is used. 63 64 Current solution: xmit_recursion breaks dead loops. This is a percpu 65 counter, since when we enter the first ndo_xmit(), cpu migration is 66 forbidden. We force an exit if this counter reaches RECURSION_LIMIT 67 68 2. Networking dead loops would not kill routers, but would really 69 kill network. IP hop limit plays role of "t->recursion" in this case, 70 if we copy it from packet being encapsulated to upper header. 71 It is very good solution, but it introduces two problems: 72 73 - Routing protocols, using packets with ttl=1 (OSPF, RIP2), 74 do not work over tunnels. 75 - traceroute does not work. I planned to relay ICMP from tunnel, 76 so that this problem would be solved and traceroute output 77 would even more informative. This idea appeared to be wrong: 78 only Linux complies to rfc1812 now (yes, guys, Linux is the only 79 true router now :-)), all routers (at least, in neighbourhood of mine) 80 return only 8 bytes of payload. It is the end. 81 82 Hence, if we want that OSPF worked or traceroute said something reasonable, 83 we should search for another solution. 84 85 One of them is to parse packet trying to detect inner encapsulation 86 made by our node. It is difficult or even impossible, especially, 87 taking into account fragmentation. TO be short, ttl is not solution at all. 88 89 Current solution: The solution was UNEXPECTEDLY SIMPLE. 90 We force DF flag on tunnels with preconfigured hop limit, 91 that is ALL. :-) Well, it does not remove the problem completely, 92 but exponential growth of network traffic is changed to linear 93 (branches, that exceed pmtu are pruned) and tunnel mtu 94 rapidly degrades to value <68, where looping stops. 95 Yes, it is not good if there exists a router in the loop, 96 which does not force DF, even when encapsulating packets have DF set. 97 But it is not our problem! Nobody could accuse us, we made 98 all that we could make. Even if it is your gated who injected 99 fatal route to network, even if it were you who configured 100 fatal static route: you are innocent. :-) 101 102 Alexey Kuznetsov. 103 */ 104 105 static bool log_ecn_error = true; 106 module_param(log_ecn_error, bool, 0644); 107 MODULE_PARM_DESC(log_ecn_error, "Log packets received with corrupted ECN"); 108 109 static struct rtnl_link_ops ipgre_link_ops __read_mostly; 110 static const struct header_ops ipgre_header_ops; 111 112 static int ipgre_tunnel_init(struct net_device *dev); 113 static void erspan_build_header(struct sk_buff *skb, 114 u32 id, u32 index, 115 bool truncate, bool is_ipv4); 116 117 static unsigned int ipgre_net_id __read_mostly; 118 static unsigned int gre_tap_net_id __read_mostly; 119 static unsigned int erspan_net_id __read_mostly; 120 121 static int ipgre_err(struct sk_buff *skb, u32 info, 122 const struct tnl_ptk_info *tpi) 123 { 124 125 /* All the routers (except for Linux) return only 126 8 bytes of packet payload. It means, that precise relaying of 127 ICMP in the real Internet is absolutely infeasible. 128 129 Moreover, Cisco "wise men" put GRE key to the third word 130 in GRE header. It makes impossible maintaining even soft 131 state for keyed GRE tunnels with enabled checksum. Tell 132 them "thank you". 133 134 Well, I wonder, rfc1812 was written by Cisco employee, 135 what the hell these idiots break standards established 136 by themselves??? 137 */ 138 struct net *net = dev_net(skb->dev); 139 struct ip_tunnel_net *itn; 140 const struct iphdr *iph; 141 const int type = icmp_hdr(skb)->type; 142 const int code = icmp_hdr(skb)->code; 143 unsigned int data_len = 0; 144 struct ip_tunnel *t; 145 146 if (tpi->proto == htons(ETH_P_TEB)) 147 itn = net_generic(net, gre_tap_net_id); 148 else if (tpi->proto == htons(ETH_P_ERSPAN) || 149 tpi->proto == htons(ETH_P_ERSPAN2)) 150 itn = net_generic(net, erspan_net_id); 151 else 152 itn = net_generic(net, ipgre_net_id); 153 154 iph = (const struct iphdr *)(icmp_hdr(skb) + 1); 155 t = ip_tunnel_lookup(itn, skb->dev->ifindex, tpi->flags, 156 iph->daddr, iph->saddr, tpi->key); 157 158 if (!t) 159 return -ENOENT; 160 161 switch (type) { 162 default: 163 case ICMP_PARAMETERPROB: 164 return 0; 165 166 case ICMP_DEST_UNREACH: 167 switch (code) { 168 case ICMP_SR_FAILED: 169 case ICMP_PORT_UNREACH: 170 /* Impossible event. */ 171 return 0; 172 default: 173 /* All others are translated to HOST_UNREACH. 174 rfc2003 contains "deep thoughts" about NET_UNREACH, 175 I believe they are just ether pollution. --ANK 176 */ 177 break; 178 } 179 break; 180 181 case ICMP_TIME_EXCEEDED: 182 if (code != ICMP_EXC_TTL) 183 return 0; 184 data_len = icmp_hdr(skb)->un.reserved[1] * 4; /* RFC 4884 4.1 */ 185 break; 186 187 case ICMP_REDIRECT: 188 break; 189 } 190 191 #if IS_ENABLED(CONFIG_IPV6) 192 if (tpi->proto == htons(ETH_P_IPV6) && 193 !ip6_err_gen_icmpv6_unreach(skb, iph->ihl * 4 + tpi->hdr_len, 194 type, data_len)) 195 return 0; 196 #endif 197 198 if (t->parms.iph.daddr == 0 || 199 ipv4_is_multicast(t->parms.iph.daddr)) 200 return 0; 201 202 if (t->parms.iph.ttl == 0 && type == ICMP_TIME_EXCEEDED) 203 return 0; 204 205 if (time_before(jiffies, t->err_time + IPTUNNEL_ERR_TIMEO)) 206 t->err_count++; 207 else 208 t->err_count = 1; 209 t->err_time = jiffies; 210 211 return 0; 212 } 213 214 static void gre_err(struct sk_buff *skb, u32 info) 215 { 216 /* All the routers (except for Linux) return only 217 * 8 bytes of packet payload. It means, that precise relaying of 218 * ICMP in the real Internet is absolutely infeasible. 219 * 220 * Moreover, Cisco "wise men" put GRE key to the third word 221 * in GRE header. It makes impossible maintaining even soft 222 * state for keyed 223 * GRE tunnels with enabled checksum. Tell them "thank you". 224 * 225 * Well, I wonder, rfc1812 was written by Cisco employee, 226 * what the hell these idiots break standards established 227 * by themselves??? 228 */ 229 230 const struct iphdr *iph = (struct iphdr *)skb->data; 231 const int type = icmp_hdr(skb)->type; 232 const int code = icmp_hdr(skb)->code; 233 struct tnl_ptk_info tpi; 234 235 if (gre_parse_header(skb, &tpi, NULL, htons(ETH_P_IP), 236 iph->ihl * 4) < 0) 237 return; 238 239 if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED) { 240 ipv4_update_pmtu(skb, dev_net(skb->dev), info, 241 skb->dev->ifindex, IPPROTO_GRE); 242 return; 243 } 244 if (type == ICMP_REDIRECT) { 245 ipv4_redirect(skb, dev_net(skb->dev), skb->dev->ifindex, 246 IPPROTO_GRE); 247 return; 248 } 249 250 ipgre_err(skb, info, &tpi); 251 } 252 253 static bool is_erspan_type1(int gre_hdr_len) 254 { 255 /* Both ERSPAN type I (version 0) and type II (version 1) use 256 * protocol 0x88BE, but the type I has only 4-byte GRE header, 257 * while type II has 8-byte. 258 */ 259 return gre_hdr_len == 4; 260 } 261 262 static int erspan_rcv(struct sk_buff *skb, struct tnl_ptk_info *tpi, 263 int gre_hdr_len) 264 { 265 struct net *net = dev_net(skb->dev); 266 struct metadata_dst *tun_dst = NULL; 267 struct erspan_base_hdr *ershdr; 268 struct ip_tunnel_net *itn; 269 struct ip_tunnel *tunnel; 270 const struct iphdr *iph; 271 struct erspan_md2 *md2; 272 int ver; 273 int len; 274 275 itn = net_generic(net, erspan_net_id); 276 iph = ip_hdr(skb); 277 if (is_erspan_type1(gre_hdr_len)) { 278 ver = 0; 279 tunnel = ip_tunnel_lookup(itn, skb->dev->ifindex, 280 tpi->flags | TUNNEL_NO_KEY, 281 iph->saddr, iph->daddr, 0); 282 } else { 283 if (unlikely(!pskb_may_pull(skb, 284 gre_hdr_len + sizeof(*ershdr)))) 285 return PACKET_REJECT; 286 287 ershdr = (struct erspan_base_hdr *)(skb->data + gre_hdr_len); 288 ver = ershdr->ver; 289 iph = ip_hdr(skb); 290 tunnel = ip_tunnel_lookup(itn, skb->dev->ifindex, 291 tpi->flags | TUNNEL_KEY, 292 iph->saddr, iph->daddr, tpi->key); 293 } 294 295 if (tunnel) { 296 if (is_erspan_type1(gre_hdr_len)) 297 len = gre_hdr_len; 298 else 299 len = gre_hdr_len + erspan_hdr_len(ver); 300 301 if (unlikely(!pskb_may_pull(skb, len))) 302 return PACKET_REJECT; 303 304 if (__iptunnel_pull_header(skb, 305 len, 306 htons(ETH_P_TEB), 307 false, false) < 0) 308 goto drop; 309 310 if (tunnel->collect_md) { 311 struct erspan_metadata *pkt_md, *md; 312 struct ip_tunnel_info *info; 313 unsigned char *gh; 314 __be64 tun_id; 315 __be16 flags; 316 317 tpi->flags |= TUNNEL_KEY; 318 flags = tpi->flags; 319 tun_id = key32_to_tunnel_id(tpi->key); 320 321 tun_dst = ip_tun_rx_dst(skb, flags, 322 tun_id, sizeof(*md)); 323 if (!tun_dst) 324 return PACKET_REJECT; 325 326 /* skb can be uncloned in __iptunnel_pull_header, so 327 * old pkt_md is no longer valid and we need to reset 328 * it 329 */ 330 gh = skb_network_header(skb) + 331 skb_network_header_len(skb); 332 pkt_md = (struct erspan_metadata *)(gh + gre_hdr_len + 333 sizeof(*ershdr)); 334 md = ip_tunnel_info_opts(&tun_dst->u.tun_info); 335 md->version = ver; 336 md2 = &md->u.md2; 337 memcpy(md2, pkt_md, ver == 1 ? ERSPAN_V1_MDSIZE : 338 ERSPAN_V2_MDSIZE); 339 340 info = &tun_dst->u.tun_info; 341 info->key.tun_flags |= TUNNEL_ERSPAN_OPT; 342 info->options_len = sizeof(*md); 343 } 344 345 skb_reset_mac_header(skb); 346 ip_tunnel_rcv(tunnel, skb, tpi, tun_dst, log_ecn_error); 347 return PACKET_RCVD; 348 } 349 return PACKET_REJECT; 350 351 drop: 352 kfree_skb(skb); 353 return PACKET_RCVD; 354 } 355 356 static int __ipgre_rcv(struct sk_buff *skb, const struct tnl_ptk_info *tpi, 357 struct ip_tunnel_net *itn, int hdr_len, bool raw_proto) 358 { 359 struct metadata_dst *tun_dst = NULL; 360 const struct iphdr *iph; 361 struct ip_tunnel *tunnel; 362 363 iph = ip_hdr(skb); 364 tunnel = ip_tunnel_lookup(itn, skb->dev->ifindex, tpi->flags, 365 iph->saddr, iph->daddr, tpi->key); 366 367 if (tunnel) { 368 const struct iphdr *tnl_params; 369 370 if (__iptunnel_pull_header(skb, hdr_len, tpi->proto, 371 raw_proto, false) < 0) 372 goto drop; 373 374 /* Special case for ipgre_header_parse(), which expects the 375 * mac_header to point to the outer IP header. 376 */ 377 if (tunnel->dev->header_ops == &ipgre_header_ops) 378 skb_pop_mac_header(skb); 379 else 380 skb_reset_mac_header(skb); 381 382 tnl_params = &tunnel->parms.iph; 383 if (tunnel->collect_md || tnl_params->daddr == 0) { 384 __be16 flags; 385 __be64 tun_id; 386 387 flags = tpi->flags & (TUNNEL_CSUM | TUNNEL_KEY); 388 tun_id = key32_to_tunnel_id(tpi->key); 389 tun_dst = ip_tun_rx_dst(skb, flags, tun_id, 0); 390 if (!tun_dst) 391 return PACKET_REJECT; 392 } 393 394 ip_tunnel_rcv(tunnel, skb, tpi, tun_dst, log_ecn_error); 395 return PACKET_RCVD; 396 } 397 return PACKET_NEXT; 398 399 drop: 400 kfree_skb(skb); 401 return PACKET_RCVD; 402 } 403 404 static int ipgre_rcv(struct sk_buff *skb, const struct tnl_ptk_info *tpi, 405 int hdr_len) 406 { 407 struct net *net = dev_net(skb->dev); 408 struct ip_tunnel_net *itn; 409 int res; 410 411 if (tpi->proto == htons(ETH_P_TEB)) 412 itn = net_generic(net, gre_tap_net_id); 413 else 414 itn = net_generic(net, ipgre_net_id); 415 416 res = __ipgre_rcv(skb, tpi, itn, hdr_len, false); 417 if (res == PACKET_NEXT && tpi->proto == htons(ETH_P_TEB)) { 418 /* ipgre tunnels in collect metadata mode should receive 419 * also ETH_P_TEB traffic. 420 */ 421 itn = net_generic(net, ipgre_net_id); 422 res = __ipgre_rcv(skb, tpi, itn, hdr_len, true); 423 } 424 return res; 425 } 426 427 static int gre_rcv(struct sk_buff *skb) 428 { 429 struct tnl_ptk_info tpi; 430 bool csum_err = false; 431 int hdr_len; 432 433 #ifdef CONFIG_NET_IPGRE_BROADCAST 434 if (ipv4_is_multicast(ip_hdr(skb)->daddr)) { 435 /* Looped back packet, drop it! */ 436 if (rt_is_output_route(skb_rtable(skb))) 437 goto drop; 438 } 439 #endif 440 441 hdr_len = gre_parse_header(skb, &tpi, &csum_err, htons(ETH_P_IP), 0); 442 if (hdr_len < 0) 443 goto drop; 444 445 if (unlikely(tpi.proto == htons(ETH_P_ERSPAN) || 446 tpi.proto == htons(ETH_P_ERSPAN2))) { 447 if (erspan_rcv(skb, &tpi, hdr_len) == PACKET_RCVD) 448 return 0; 449 goto out; 450 } 451 452 if (ipgre_rcv(skb, &tpi, hdr_len) == PACKET_RCVD) 453 return 0; 454 455 out: 456 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0); 457 drop: 458 kfree_skb(skb); 459 return 0; 460 } 461 462 static void __gre_xmit(struct sk_buff *skb, struct net_device *dev, 463 const struct iphdr *tnl_params, 464 __be16 proto) 465 { 466 struct ip_tunnel *tunnel = netdev_priv(dev); 467 __be16 flags = tunnel->parms.o_flags; 468 469 /* Push GRE header. */ 470 gre_build_header(skb, tunnel->tun_hlen, 471 flags, proto, tunnel->parms.o_key, 472 (flags & TUNNEL_SEQ) ? htonl(atomic_fetch_inc(&tunnel->o_seqno)) : 0); 473 474 ip_tunnel_xmit(skb, dev, tnl_params, tnl_params->protocol); 475 } 476 477 static int gre_handle_offloads(struct sk_buff *skb, bool csum) 478 { 479 return iptunnel_handle_offloads(skb, csum ? SKB_GSO_GRE_CSUM : SKB_GSO_GRE); 480 } 481 482 static void gre_fb_xmit(struct sk_buff *skb, struct net_device *dev, 483 __be16 proto) 484 { 485 struct ip_tunnel *tunnel = netdev_priv(dev); 486 struct ip_tunnel_info *tun_info; 487 const struct ip_tunnel_key *key; 488 int tunnel_hlen; 489 __be16 flags; 490 491 tun_info = skb_tunnel_info(skb); 492 if (unlikely(!tun_info || !(tun_info->mode & IP_TUNNEL_INFO_TX) || 493 ip_tunnel_info_af(tun_info) != AF_INET)) 494 goto err_free_skb; 495 496 key = &tun_info->key; 497 tunnel_hlen = gre_calc_hlen(key->tun_flags); 498 499 if (skb_cow_head(skb, dev->needed_headroom)) 500 goto err_free_skb; 501 502 /* Push Tunnel header. */ 503 if (gre_handle_offloads(skb, !!(tun_info->key.tun_flags & TUNNEL_CSUM))) 504 goto err_free_skb; 505 506 flags = tun_info->key.tun_flags & 507 (TUNNEL_CSUM | TUNNEL_KEY | TUNNEL_SEQ); 508 gre_build_header(skb, tunnel_hlen, flags, proto, 509 tunnel_id_to_key32(tun_info->key.tun_id), 510 (flags & TUNNEL_SEQ) ? htonl(atomic_fetch_inc(&tunnel->o_seqno)) : 0); 511 512 ip_md_tunnel_xmit(skb, dev, IPPROTO_GRE, tunnel_hlen); 513 514 return; 515 516 err_free_skb: 517 kfree_skb(skb); 518 DEV_STATS_INC(dev, tx_dropped); 519 } 520 521 static void erspan_fb_xmit(struct sk_buff *skb, struct net_device *dev) 522 { 523 struct ip_tunnel *tunnel = netdev_priv(dev); 524 struct ip_tunnel_info *tun_info; 525 const struct ip_tunnel_key *key; 526 struct erspan_metadata *md; 527 bool truncate = false; 528 __be16 proto; 529 int tunnel_hlen; 530 int version; 531 int nhoff; 532 533 tun_info = skb_tunnel_info(skb); 534 if (unlikely(!tun_info || !(tun_info->mode & IP_TUNNEL_INFO_TX) || 535 ip_tunnel_info_af(tun_info) != AF_INET)) 536 goto err_free_skb; 537 538 key = &tun_info->key; 539 if (!(tun_info->key.tun_flags & TUNNEL_ERSPAN_OPT)) 540 goto err_free_skb; 541 if (tun_info->options_len < sizeof(*md)) 542 goto err_free_skb; 543 md = ip_tunnel_info_opts(tun_info); 544 545 /* ERSPAN has fixed 8 byte GRE header */ 546 version = md->version; 547 tunnel_hlen = 8 + erspan_hdr_len(version); 548 549 if (skb_cow_head(skb, dev->needed_headroom)) 550 goto err_free_skb; 551 552 if (gre_handle_offloads(skb, false)) 553 goto err_free_skb; 554 555 if (skb->len > dev->mtu + dev->hard_header_len) { 556 if (pskb_trim(skb, dev->mtu + dev->hard_header_len)) 557 goto err_free_skb; 558 truncate = true; 559 } 560 561 nhoff = skb_network_offset(skb); 562 if (skb->protocol == htons(ETH_P_IP) && 563 (ntohs(ip_hdr(skb)->tot_len) > skb->len - nhoff)) 564 truncate = true; 565 566 if (skb->protocol == htons(ETH_P_IPV6)) { 567 int thoff; 568 569 if (skb_transport_header_was_set(skb)) 570 thoff = skb_transport_offset(skb); 571 else 572 thoff = nhoff + sizeof(struct ipv6hdr); 573 if (ntohs(ipv6_hdr(skb)->payload_len) > skb->len - thoff) 574 truncate = true; 575 } 576 577 if (version == 1) { 578 erspan_build_header(skb, ntohl(tunnel_id_to_key32(key->tun_id)), 579 ntohl(md->u.index), truncate, true); 580 proto = htons(ETH_P_ERSPAN); 581 } else if (version == 2) { 582 erspan_build_header_v2(skb, 583 ntohl(tunnel_id_to_key32(key->tun_id)), 584 md->u.md2.dir, 585 get_hwid(&md->u.md2), 586 truncate, true); 587 proto = htons(ETH_P_ERSPAN2); 588 } else { 589 goto err_free_skb; 590 } 591 592 gre_build_header(skb, 8, TUNNEL_SEQ, 593 proto, 0, htonl(atomic_fetch_inc(&tunnel->o_seqno))); 594 595 ip_md_tunnel_xmit(skb, dev, IPPROTO_GRE, tunnel_hlen); 596 597 return; 598 599 err_free_skb: 600 kfree_skb(skb); 601 DEV_STATS_INC(dev, tx_dropped); 602 } 603 604 static int gre_fill_metadata_dst(struct net_device *dev, struct sk_buff *skb) 605 { 606 struct ip_tunnel_info *info = skb_tunnel_info(skb); 607 const struct ip_tunnel_key *key; 608 struct rtable *rt; 609 struct flowi4 fl4; 610 611 if (ip_tunnel_info_af(info) != AF_INET) 612 return -EINVAL; 613 614 key = &info->key; 615 ip_tunnel_init_flow(&fl4, IPPROTO_GRE, key->u.ipv4.dst, key->u.ipv4.src, 616 tunnel_id_to_key32(key->tun_id), 617 key->tos & ~INET_ECN_MASK, dev_net(dev), 0, 618 skb->mark, skb_get_hash(skb), key->flow_flags); 619 rt = ip_route_output_key(dev_net(dev), &fl4); 620 if (IS_ERR(rt)) 621 return PTR_ERR(rt); 622 623 ip_rt_put(rt); 624 info->key.u.ipv4.src = fl4.saddr; 625 return 0; 626 } 627 628 static netdev_tx_t ipgre_xmit(struct sk_buff *skb, 629 struct net_device *dev) 630 { 631 struct ip_tunnel *tunnel = netdev_priv(dev); 632 const struct iphdr *tnl_params; 633 634 if (!pskb_inet_may_pull(skb)) 635 goto free_skb; 636 637 if (tunnel->collect_md) { 638 gre_fb_xmit(skb, dev, skb->protocol); 639 return NETDEV_TX_OK; 640 } 641 642 if (dev->header_ops) { 643 int pull_len = tunnel->hlen + sizeof(struct iphdr); 644 645 if (skb_cow_head(skb, 0)) 646 goto free_skb; 647 648 tnl_params = (const struct iphdr *)skb->data; 649 650 if (!pskb_network_may_pull(skb, pull_len)) 651 goto free_skb; 652 653 /* ip_tunnel_xmit() needs skb->data pointing to gre header. */ 654 skb_pull(skb, pull_len); 655 skb_reset_mac_header(skb); 656 657 if (skb->ip_summed == CHECKSUM_PARTIAL && 658 skb_checksum_start(skb) < skb->data) 659 goto free_skb; 660 } else { 661 if (skb_cow_head(skb, dev->needed_headroom)) 662 goto free_skb; 663 664 tnl_params = &tunnel->parms.iph; 665 } 666 667 if (gre_handle_offloads(skb, !!(tunnel->parms.o_flags & TUNNEL_CSUM))) 668 goto free_skb; 669 670 __gre_xmit(skb, dev, tnl_params, skb->protocol); 671 return NETDEV_TX_OK; 672 673 free_skb: 674 kfree_skb(skb); 675 DEV_STATS_INC(dev, tx_dropped); 676 return NETDEV_TX_OK; 677 } 678 679 static netdev_tx_t erspan_xmit(struct sk_buff *skb, 680 struct net_device *dev) 681 { 682 struct ip_tunnel *tunnel = netdev_priv(dev); 683 bool truncate = false; 684 __be16 proto; 685 686 if (!pskb_inet_may_pull(skb)) 687 goto free_skb; 688 689 if (tunnel->collect_md) { 690 erspan_fb_xmit(skb, dev); 691 return NETDEV_TX_OK; 692 } 693 694 if (gre_handle_offloads(skb, false)) 695 goto free_skb; 696 697 if (skb_cow_head(skb, dev->needed_headroom)) 698 goto free_skb; 699 700 if (skb->len > dev->mtu + dev->hard_header_len) { 701 if (pskb_trim(skb, dev->mtu + dev->hard_header_len)) 702 goto free_skb; 703 truncate = true; 704 } 705 706 /* Push ERSPAN header */ 707 if (tunnel->erspan_ver == 0) { 708 proto = htons(ETH_P_ERSPAN); 709 tunnel->parms.o_flags &= ~TUNNEL_SEQ; 710 } else if (tunnel->erspan_ver == 1) { 711 erspan_build_header(skb, ntohl(tunnel->parms.o_key), 712 tunnel->index, 713 truncate, true); 714 proto = htons(ETH_P_ERSPAN); 715 } else if (tunnel->erspan_ver == 2) { 716 erspan_build_header_v2(skb, ntohl(tunnel->parms.o_key), 717 tunnel->dir, tunnel->hwid, 718 truncate, true); 719 proto = htons(ETH_P_ERSPAN2); 720 } else { 721 goto free_skb; 722 } 723 724 tunnel->parms.o_flags &= ~TUNNEL_KEY; 725 __gre_xmit(skb, dev, &tunnel->parms.iph, proto); 726 return NETDEV_TX_OK; 727 728 free_skb: 729 kfree_skb(skb); 730 DEV_STATS_INC(dev, tx_dropped); 731 return NETDEV_TX_OK; 732 } 733 734 static netdev_tx_t gre_tap_xmit(struct sk_buff *skb, 735 struct net_device *dev) 736 { 737 struct ip_tunnel *tunnel = netdev_priv(dev); 738 739 if (!pskb_inet_may_pull(skb)) 740 goto free_skb; 741 742 if (tunnel->collect_md) { 743 gre_fb_xmit(skb, dev, htons(ETH_P_TEB)); 744 return NETDEV_TX_OK; 745 } 746 747 if (gre_handle_offloads(skb, !!(tunnel->parms.o_flags & TUNNEL_CSUM))) 748 goto free_skb; 749 750 if (skb_cow_head(skb, dev->needed_headroom)) 751 goto free_skb; 752 753 __gre_xmit(skb, dev, &tunnel->parms.iph, htons(ETH_P_TEB)); 754 return NETDEV_TX_OK; 755 756 free_skb: 757 kfree_skb(skb); 758 DEV_STATS_INC(dev, tx_dropped); 759 return NETDEV_TX_OK; 760 } 761 762 static void ipgre_link_update(struct net_device *dev, bool set_mtu) 763 { 764 struct ip_tunnel *tunnel = netdev_priv(dev); 765 __be16 flags; 766 int len; 767 768 len = tunnel->tun_hlen; 769 tunnel->tun_hlen = gre_calc_hlen(tunnel->parms.o_flags); 770 len = tunnel->tun_hlen - len; 771 tunnel->hlen = tunnel->hlen + len; 772 773 if (dev->header_ops) 774 dev->hard_header_len += len; 775 else 776 dev->needed_headroom += len; 777 778 if (set_mtu) 779 dev->mtu = max_t(int, dev->mtu - len, 68); 780 781 flags = tunnel->parms.o_flags; 782 783 if (flags & TUNNEL_SEQ || 784 (flags & TUNNEL_CSUM && tunnel->encap.type != TUNNEL_ENCAP_NONE)) { 785 dev->features &= ~NETIF_F_GSO_SOFTWARE; 786 dev->hw_features &= ~NETIF_F_GSO_SOFTWARE; 787 } else { 788 dev->features |= NETIF_F_GSO_SOFTWARE; 789 dev->hw_features |= NETIF_F_GSO_SOFTWARE; 790 } 791 } 792 793 static int ipgre_tunnel_ctl(struct net_device *dev, struct ip_tunnel_parm *p, 794 int cmd) 795 { 796 int err; 797 798 if (cmd == SIOCADDTUNNEL || cmd == SIOCCHGTUNNEL) { 799 if (p->iph.version != 4 || p->iph.protocol != IPPROTO_GRE || 800 p->iph.ihl != 5 || (p->iph.frag_off & htons(~IP_DF)) || 801 ((p->i_flags | p->o_flags) & (GRE_VERSION | GRE_ROUTING))) 802 return -EINVAL; 803 } 804 805 p->i_flags = gre_flags_to_tnl_flags(p->i_flags); 806 p->o_flags = gre_flags_to_tnl_flags(p->o_flags); 807 808 err = ip_tunnel_ctl(dev, p, cmd); 809 if (err) 810 return err; 811 812 if (cmd == SIOCCHGTUNNEL) { 813 struct ip_tunnel *t = netdev_priv(dev); 814 815 t->parms.i_flags = p->i_flags; 816 t->parms.o_flags = p->o_flags; 817 818 if (strcmp(dev->rtnl_link_ops->kind, "erspan")) 819 ipgre_link_update(dev, true); 820 } 821 822 p->i_flags = gre_tnl_flags_to_gre_flags(p->i_flags); 823 p->o_flags = gre_tnl_flags_to_gre_flags(p->o_flags); 824 return 0; 825 } 826 827 /* Nice toy. Unfortunately, useless in real life :-) 828 It allows to construct virtual multiprotocol broadcast "LAN" 829 over the Internet, provided multicast routing is tuned. 830 831 832 I have no idea was this bicycle invented before me, 833 so that I had to set ARPHRD_IPGRE to a random value. 834 I have an impression, that Cisco could make something similar, 835 but this feature is apparently missing in IOS<=11.2(8). 836 837 I set up 10.66.66/24 and fec0:6666:6666::0/96 as virtual networks 838 with broadcast 224.66.66.66. If you have access to mbone, play with me :-) 839 840 ping -t 255 224.66.66.66 841 842 If nobody answers, mbone does not work. 843 844 ip tunnel add Universe mode gre remote 224.66.66.66 local <Your_real_addr> ttl 255 845 ip addr add 10.66.66.<somewhat>/24 dev Universe 846 ifconfig Universe up 847 ifconfig Universe add fe80::<Your_real_addr>/10 848 ifconfig Universe add fec0:6666:6666::<Your_real_addr>/96 849 ftp 10.66.66.66 850 ... 851 ftp fec0:6666:6666::193.233.7.65 852 ... 853 */ 854 static int ipgre_header(struct sk_buff *skb, struct net_device *dev, 855 unsigned short type, 856 const void *daddr, const void *saddr, unsigned int len) 857 { 858 struct ip_tunnel *t = netdev_priv(dev); 859 struct iphdr *iph; 860 struct gre_base_hdr *greh; 861 862 iph = skb_push(skb, t->hlen + sizeof(*iph)); 863 greh = (struct gre_base_hdr *)(iph+1); 864 greh->flags = gre_tnl_flags_to_gre_flags(t->parms.o_flags); 865 greh->protocol = htons(type); 866 867 memcpy(iph, &t->parms.iph, sizeof(struct iphdr)); 868 869 /* Set the source hardware address. */ 870 if (saddr) 871 memcpy(&iph->saddr, saddr, 4); 872 if (daddr) 873 memcpy(&iph->daddr, daddr, 4); 874 if (iph->daddr) 875 return t->hlen + sizeof(*iph); 876 877 return -(t->hlen + sizeof(*iph)); 878 } 879 880 static int ipgre_header_parse(const struct sk_buff *skb, unsigned char *haddr) 881 { 882 const struct iphdr *iph = (const struct iphdr *) skb_mac_header(skb); 883 memcpy(haddr, &iph->saddr, 4); 884 return 4; 885 } 886 887 static const struct header_ops ipgre_header_ops = { 888 .create = ipgre_header, 889 .parse = ipgre_header_parse, 890 }; 891 892 #ifdef CONFIG_NET_IPGRE_BROADCAST 893 static int ipgre_open(struct net_device *dev) 894 { 895 struct ip_tunnel *t = netdev_priv(dev); 896 897 if (ipv4_is_multicast(t->parms.iph.daddr)) { 898 struct flowi4 fl4; 899 struct rtable *rt; 900 901 rt = ip_route_output_gre(t->net, &fl4, 902 t->parms.iph.daddr, 903 t->parms.iph.saddr, 904 t->parms.o_key, 905 RT_TOS(t->parms.iph.tos), 906 t->parms.link); 907 if (IS_ERR(rt)) 908 return -EADDRNOTAVAIL; 909 dev = rt->dst.dev; 910 ip_rt_put(rt); 911 if (!__in_dev_get_rtnl(dev)) 912 return -EADDRNOTAVAIL; 913 t->mlink = dev->ifindex; 914 ip_mc_inc_group(__in_dev_get_rtnl(dev), t->parms.iph.daddr); 915 } 916 return 0; 917 } 918 919 static int ipgre_close(struct net_device *dev) 920 { 921 struct ip_tunnel *t = netdev_priv(dev); 922 923 if (ipv4_is_multicast(t->parms.iph.daddr) && t->mlink) { 924 struct in_device *in_dev; 925 in_dev = inetdev_by_index(t->net, t->mlink); 926 if (in_dev) 927 ip_mc_dec_group(in_dev, t->parms.iph.daddr); 928 } 929 return 0; 930 } 931 #endif 932 933 static const struct net_device_ops ipgre_netdev_ops = { 934 .ndo_init = ipgre_tunnel_init, 935 .ndo_uninit = ip_tunnel_uninit, 936 #ifdef CONFIG_NET_IPGRE_BROADCAST 937 .ndo_open = ipgre_open, 938 .ndo_stop = ipgre_close, 939 #endif 940 .ndo_start_xmit = ipgre_xmit, 941 .ndo_siocdevprivate = ip_tunnel_siocdevprivate, 942 .ndo_change_mtu = ip_tunnel_change_mtu, 943 .ndo_get_stats64 = dev_get_tstats64, 944 .ndo_get_iflink = ip_tunnel_get_iflink, 945 .ndo_tunnel_ctl = ipgre_tunnel_ctl, 946 }; 947 948 #define GRE_FEATURES (NETIF_F_SG | \ 949 NETIF_F_FRAGLIST | \ 950 NETIF_F_HIGHDMA | \ 951 NETIF_F_HW_CSUM) 952 953 static void ipgre_tunnel_setup(struct net_device *dev) 954 { 955 dev->netdev_ops = &ipgre_netdev_ops; 956 dev->type = ARPHRD_IPGRE; 957 ip_tunnel_setup(dev, ipgre_net_id); 958 } 959 960 static void __gre_tunnel_init(struct net_device *dev) 961 { 962 struct ip_tunnel *tunnel; 963 __be16 flags; 964 965 tunnel = netdev_priv(dev); 966 tunnel->tun_hlen = gre_calc_hlen(tunnel->parms.o_flags); 967 tunnel->parms.iph.protocol = IPPROTO_GRE; 968 969 tunnel->hlen = tunnel->tun_hlen + tunnel->encap_hlen; 970 dev->needed_headroom = tunnel->hlen + sizeof(tunnel->parms.iph); 971 972 dev->features |= GRE_FEATURES | NETIF_F_LLTX; 973 dev->hw_features |= GRE_FEATURES; 974 975 flags = tunnel->parms.o_flags; 976 977 /* TCP offload with GRE SEQ is not supported, nor can we support 2 978 * levels of outer headers requiring an update. 979 */ 980 if (flags & TUNNEL_SEQ) 981 return; 982 if (flags & TUNNEL_CSUM && tunnel->encap.type != TUNNEL_ENCAP_NONE) 983 return; 984 985 dev->features |= NETIF_F_GSO_SOFTWARE; 986 dev->hw_features |= NETIF_F_GSO_SOFTWARE; 987 } 988 989 static int ipgre_tunnel_init(struct net_device *dev) 990 { 991 struct ip_tunnel *tunnel = netdev_priv(dev); 992 struct iphdr *iph = &tunnel->parms.iph; 993 994 __gre_tunnel_init(dev); 995 996 __dev_addr_set(dev, &iph->saddr, 4); 997 memcpy(dev->broadcast, &iph->daddr, 4); 998 999 dev->flags = IFF_NOARP; 1000 netif_keep_dst(dev); 1001 dev->addr_len = 4; 1002 1003 if (iph->daddr && !tunnel->collect_md) { 1004 #ifdef CONFIG_NET_IPGRE_BROADCAST 1005 if (ipv4_is_multicast(iph->daddr)) { 1006 if (!iph->saddr) 1007 return -EINVAL; 1008 dev->flags = IFF_BROADCAST; 1009 dev->header_ops = &ipgre_header_ops; 1010 dev->hard_header_len = tunnel->hlen + sizeof(*iph); 1011 dev->needed_headroom = 0; 1012 } 1013 #endif 1014 } else if (!tunnel->collect_md) { 1015 dev->header_ops = &ipgre_header_ops; 1016 dev->hard_header_len = tunnel->hlen + sizeof(*iph); 1017 dev->needed_headroom = 0; 1018 } 1019 1020 return ip_tunnel_init(dev); 1021 } 1022 1023 static const struct gre_protocol ipgre_protocol = { 1024 .handler = gre_rcv, 1025 .err_handler = gre_err, 1026 }; 1027 1028 static int __net_init ipgre_init_net(struct net *net) 1029 { 1030 return ip_tunnel_init_net(net, ipgre_net_id, &ipgre_link_ops, NULL); 1031 } 1032 1033 static void __net_exit ipgre_exit_batch_net(struct list_head *list_net) 1034 { 1035 ip_tunnel_delete_nets(list_net, ipgre_net_id, &ipgre_link_ops); 1036 } 1037 1038 static struct pernet_operations ipgre_net_ops = { 1039 .init = ipgre_init_net, 1040 .exit_batch = ipgre_exit_batch_net, 1041 .id = &ipgre_net_id, 1042 .size = sizeof(struct ip_tunnel_net), 1043 }; 1044 1045 static int ipgre_tunnel_validate(struct nlattr *tb[], struct nlattr *data[], 1046 struct netlink_ext_ack *extack) 1047 { 1048 __be16 flags; 1049 1050 if (!data) 1051 return 0; 1052 1053 flags = 0; 1054 if (data[IFLA_GRE_IFLAGS]) 1055 flags |= nla_get_be16(data[IFLA_GRE_IFLAGS]); 1056 if (data[IFLA_GRE_OFLAGS]) 1057 flags |= nla_get_be16(data[IFLA_GRE_OFLAGS]); 1058 if (flags & (GRE_VERSION|GRE_ROUTING)) 1059 return -EINVAL; 1060 1061 if (data[IFLA_GRE_COLLECT_METADATA] && 1062 data[IFLA_GRE_ENCAP_TYPE] && 1063 nla_get_u16(data[IFLA_GRE_ENCAP_TYPE]) != TUNNEL_ENCAP_NONE) 1064 return -EINVAL; 1065 1066 return 0; 1067 } 1068 1069 static int ipgre_tap_validate(struct nlattr *tb[], struct nlattr *data[], 1070 struct netlink_ext_ack *extack) 1071 { 1072 __be32 daddr; 1073 1074 if (tb[IFLA_ADDRESS]) { 1075 if (nla_len(tb[IFLA_ADDRESS]) != ETH_ALEN) 1076 return -EINVAL; 1077 if (!is_valid_ether_addr(nla_data(tb[IFLA_ADDRESS]))) 1078 return -EADDRNOTAVAIL; 1079 } 1080 1081 if (!data) 1082 goto out; 1083 1084 if (data[IFLA_GRE_REMOTE]) { 1085 memcpy(&daddr, nla_data(data[IFLA_GRE_REMOTE]), 4); 1086 if (!daddr) 1087 return -EINVAL; 1088 } 1089 1090 out: 1091 return ipgre_tunnel_validate(tb, data, extack); 1092 } 1093 1094 static int erspan_validate(struct nlattr *tb[], struct nlattr *data[], 1095 struct netlink_ext_ack *extack) 1096 { 1097 __be16 flags = 0; 1098 int ret; 1099 1100 if (!data) 1101 return 0; 1102 1103 ret = ipgre_tap_validate(tb, data, extack); 1104 if (ret) 1105 return ret; 1106 1107 if (data[IFLA_GRE_ERSPAN_VER] && 1108 nla_get_u8(data[IFLA_GRE_ERSPAN_VER]) == 0) 1109 return 0; 1110 1111 /* ERSPAN type II/III should only have GRE sequence and key flag */ 1112 if (data[IFLA_GRE_OFLAGS]) 1113 flags |= nla_get_be16(data[IFLA_GRE_OFLAGS]); 1114 if (data[IFLA_GRE_IFLAGS]) 1115 flags |= nla_get_be16(data[IFLA_GRE_IFLAGS]); 1116 if (!data[IFLA_GRE_COLLECT_METADATA] && 1117 flags != (GRE_SEQ | GRE_KEY)) 1118 return -EINVAL; 1119 1120 /* ERSPAN Session ID only has 10-bit. Since we reuse 1121 * 32-bit key field as ID, check it's range. 1122 */ 1123 if (data[IFLA_GRE_IKEY] && 1124 (ntohl(nla_get_be32(data[IFLA_GRE_IKEY])) & ~ID_MASK)) 1125 return -EINVAL; 1126 1127 if (data[IFLA_GRE_OKEY] && 1128 (ntohl(nla_get_be32(data[IFLA_GRE_OKEY])) & ~ID_MASK)) 1129 return -EINVAL; 1130 1131 return 0; 1132 } 1133 1134 static int ipgre_netlink_parms(struct net_device *dev, 1135 struct nlattr *data[], 1136 struct nlattr *tb[], 1137 struct ip_tunnel_parm *parms, 1138 __u32 *fwmark) 1139 { 1140 struct ip_tunnel *t = netdev_priv(dev); 1141 1142 memset(parms, 0, sizeof(*parms)); 1143 1144 parms->iph.protocol = IPPROTO_GRE; 1145 1146 if (!data) 1147 return 0; 1148 1149 if (data[IFLA_GRE_LINK]) 1150 parms->link = nla_get_u32(data[IFLA_GRE_LINK]); 1151 1152 if (data[IFLA_GRE_IFLAGS]) 1153 parms->i_flags = gre_flags_to_tnl_flags(nla_get_be16(data[IFLA_GRE_IFLAGS])); 1154 1155 if (data[IFLA_GRE_OFLAGS]) 1156 parms->o_flags = gre_flags_to_tnl_flags(nla_get_be16(data[IFLA_GRE_OFLAGS])); 1157 1158 if (data[IFLA_GRE_IKEY]) 1159 parms->i_key = nla_get_be32(data[IFLA_GRE_IKEY]); 1160 1161 if (data[IFLA_GRE_OKEY]) 1162 parms->o_key = nla_get_be32(data[IFLA_GRE_OKEY]); 1163 1164 if (data[IFLA_GRE_LOCAL]) 1165 parms->iph.saddr = nla_get_in_addr(data[IFLA_GRE_LOCAL]); 1166 1167 if (data[IFLA_GRE_REMOTE]) 1168 parms->iph.daddr = nla_get_in_addr(data[IFLA_GRE_REMOTE]); 1169 1170 if (data[IFLA_GRE_TTL]) 1171 parms->iph.ttl = nla_get_u8(data[IFLA_GRE_TTL]); 1172 1173 if (data[IFLA_GRE_TOS]) 1174 parms->iph.tos = nla_get_u8(data[IFLA_GRE_TOS]); 1175 1176 if (!data[IFLA_GRE_PMTUDISC] || nla_get_u8(data[IFLA_GRE_PMTUDISC])) { 1177 if (t->ignore_df) 1178 return -EINVAL; 1179 parms->iph.frag_off = htons(IP_DF); 1180 } 1181 1182 if (data[IFLA_GRE_COLLECT_METADATA]) { 1183 t->collect_md = true; 1184 if (dev->type == ARPHRD_IPGRE) 1185 dev->type = ARPHRD_NONE; 1186 } 1187 1188 if (data[IFLA_GRE_IGNORE_DF]) { 1189 if (nla_get_u8(data[IFLA_GRE_IGNORE_DF]) 1190 && (parms->iph.frag_off & htons(IP_DF))) 1191 return -EINVAL; 1192 t->ignore_df = !!nla_get_u8(data[IFLA_GRE_IGNORE_DF]); 1193 } 1194 1195 if (data[IFLA_GRE_FWMARK]) 1196 *fwmark = nla_get_u32(data[IFLA_GRE_FWMARK]); 1197 1198 return 0; 1199 } 1200 1201 static int erspan_netlink_parms(struct net_device *dev, 1202 struct nlattr *data[], 1203 struct nlattr *tb[], 1204 struct ip_tunnel_parm *parms, 1205 __u32 *fwmark) 1206 { 1207 struct ip_tunnel *t = netdev_priv(dev); 1208 int err; 1209 1210 err = ipgre_netlink_parms(dev, data, tb, parms, fwmark); 1211 if (err) 1212 return err; 1213 if (!data) 1214 return 0; 1215 1216 if (data[IFLA_GRE_ERSPAN_VER]) { 1217 t->erspan_ver = nla_get_u8(data[IFLA_GRE_ERSPAN_VER]); 1218 1219 if (t->erspan_ver > 2) 1220 return -EINVAL; 1221 } 1222 1223 if (t->erspan_ver == 1) { 1224 if (data[IFLA_GRE_ERSPAN_INDEX]) { 1225 t->index = nla_get_u32(data[IFLA_GRE_ERSPAN_INDEX]); 1226 if (t->index & ~INDEX_MASK) 1227 return -EINVAL; 1228 } 1229 } else if (t->erspan_ver == 2) { 1230 if (data[IFLA_GRE_ERSPAN_DIR]) { 1231 t->dir = nla_get_u8(data[IFLA_GRE_ERSPAN_DIR]); 1232 if (t->dir & ~(DIR_MASK >> DIR_OFFSET)) 1233 return -EINVAL; 1234 } 1235 if (data[IFLA_GRE_ERSPAN_HWID]) { 1236 t->hwid = nla_get_u16(data[IFLA_GRE_ERSPAN_HWID]); 1237 if (t->hwid & ~(HWID_MASK >> HWID_OFFSET)) 1238 return -EINVAL; 1239 } 1240 } 1241 1242 return 0; 1243 } 1244 1245 /* This function returns true when ENCAP attributes are present in the nl msg */ 1246 static bool ipgre_netlink_encap_parms(struct nlattr *data[], 1247 struct ip_tunnel_encap *ipencap) 1248 { 1249 bool ret = false; 1250 1251 memset(ipencap, 0, sizeof(*ipencap)); 1252 1253 if (!data) 1254 return ret; 1255 1256 if (data[IFLA_GRE_ENCAP_TYPE]) { 1257 ret = true; 1258 ipencap->type = nla_get_u16(data[IFLA_GRE_ENCAP_TYPE]); 1259 } 1260 1261 if (data[IFLA_GRE_ENCAP_FLAGS]) { 1262 ret = true; 1263 ipencap->flags = nla_get_u16(data[IFLA_GRE_ENCAP_FLAGS]); 1264 } 1265 1266 if (data[IFLA_GRE_ENCAP_SPORT]) { 1267 ret = true; 1268 ipencap->sport = nla_get_be16(data[IFLA_GRE_ENCAP_SPORT]); 1269 } 1270 1271 if (data[IFLA_GRE_ENCAP_DPORT]) { 1272 ret = true; 1273 ipencap->dport = nla_get_be16(data[IFLA_GRE_ENCAP_DPORT]); 1274 } 1275 1276 return ret; 1277 } 1278 1279 static int gre_tap_init(struct net_device *dev) 1280 { 1281 __gre_tunnel_init(dev); 1282 dev->priv_flags |= IFF_LIVE_ADDR_CHANGE; 1283 netif_keep_dst(dev); 1284 1285 return ip_tunnel_init(dev); 1286 } 1287 1288 static const struct net_device_ops gre_tap_netdev_ops = { 1289 .ndo_init = gre_tap_init, 1290 .ndo_uninit = ip_tunnel_uninit, 1291 .ndo_start_xmit = gre_tap_xmit, 1292 .ndo_set_mac_address = eth_mac_addr, 1293 .ndo_validate_addr = eth_validate_addr, 1294 .ndo_change_mtu = ip_tunnel_change_mtu, 1295 .ndo_get_stats64 = dev_get_tstats64, 1296 .ndo_get_iflink = ip_tunnel_get_iflink, 1297 .ndo_fill_metadata_dst = gre_fill_metadata_dst, 1298 }; 1299 1300 static int erspan_tunnel_init(struct net_device *dev) 1301 { 1302 struct ip_tunnel *tunnel = netdev_priv(dev); 1303 1304 if (tunnel->erspan_ver == 0) 1305 tunnel->tun_hlen = 4; /* 4-byte GRE hdr. */ 1306 else 1307 tunnel->tun_hlen = 8; /* 8-byte GRE hdr. */ 1308 1309 tunnel->parms.iph.protocol = IPPROTO_GRE; 1310 tunnel->hlen = tunnel->tun_hlen + tunnel->encap_hlen + 1311 erspan_hdr_len(tunnel->erspan_ver); 1312 1313 dev->features |= GRE_FEATURES; 1314 dev->hw_features |= GRE_FEATURES; 1315 dev->priv_flags |= IFF_LIVE_ADDR_CHANGE; 1316 netif_keep_dst(dev); 1317 1318 return ip_tunnel_init(dev); 1319 } 1320 1321 static const struct net_device_ops erspan_netdev_ops = { 1322 .ndo_init = erspan_tunnel_init, 1323 .ndo_uninit = ip_tunnel_uninit, 1324 .ndo_start_xmit = erspan_xmit, 1325 .ndo_set_mac_address = eth_mac_addr, 1326 .ndo_validate_addr = eth_validate_addr, 1327 .ndo_change_mtu = ip_tunnel_change_mtu, 1328 .ndo_get_stats64 = dev_get_tstats64, 1329 .ndo_get_iflink = ip_tunnel_get_iflink, 1330 .ndo_fill_metadata_dst = gre_fill_metadata_dst, 1331 }; 1332 1333 static void ipgre_tap_setup(struct net_device *dev) 1334 { 1335 ether_setup(dev); 1336 dev->max_mtu = 0; 1337 dev->netdev_ops = &gre_tap_netdev_ops; 1338 dev->priv_flags &= ~IFF_TX_SKB_SHARING; 1339 dev->priv_flags |= IFF_LIVE_ADDR_CHANGE; 1340 ip_tunnel_setup(dev, gre_tap_net_id); 1341 } 1342 1343 static int 1344 ipgre_newlink_encap_setup(struct net_device *dev, struct nlattr *data[]) 1345 { 1346 struct ip_tunnel_encap ipencap; 1347 1348 if (ipgre_netlink_encap_parms(data, &ipencap)) { 1349 struct ip_tunnel *t = netdev_priv(dev); 1350 int err = ip_tunnel_encap_setup(t, &ipencap); 1351 1352 if (err < 0) 1353 return err; 1354 } 1355 1356 return 0; 1357 } 1358 1359 static int ipgre_newlink(struct net *src_net, struct net_device *dev, 1360 struct nlattr *tb[], struct nlattr *data[], 1361 struct netlink_ext_ack *extack) 1362 { 1363 struct ip_tunnel_parm p; 1364 __u32 fwmark = 0; 1365 int err; 1366 1367 err = ipgre_newlink_encap_setup(dev, data); 1368 if (err) 1369 return err; 1370 1371 err = ipgre_netlink_parms(dev, data, tb, &p, &fwmark); 1372 if (err < 0) 1373 return err; 1374 return ip_tunnel_newlink(dev, tb, &p, fwmark); 1375 } 1376 1377 static int erspan_newlink(struct net *src_net, struct net_device *dev, 1378 struct nlattr *tb[], struct nlattr *data[], 1379 struct netlink_ext_ack *extack) 1380 { 1381 struct ip_tunnel_parm p; 1382 __u32 fwmark = 0; 1383 int err; 1384 1385 err = ipgre_newlink_encap_setup(dev, data); 1386 if (err) 1387 return err; 1388 1389 err = erspan_netlink_parms(dev, data, tb, &p, &fwmark); 1390 if (err) 1391 return err; 1392 return ip_tunnel_newlink(dev, tb, &p, fwmark); 1393 } 1394 1395 static int ipgre_changelink(struct net_device *dev, struct nlattr *tb[], 1396 struct nlattr *data[], 1397 struct netlink_ext_ack *extack) 1398 { 1399 struct ip_tunnel *t = netdev_priv(dev); 1400 __u32 fwmark = t->fwmark; 1401 struct ip_tunnel_parm p; 1402 int err; 1403 1404 err = ipgre_newlink_encap_setup(dev, data); 1405 if (err) 1406 return err; 1407 1408 err = ipgre_netlink_parms(dev, data, tb, &p, &fwmark); 1409 if (err < 0) 1410 return err; 1411 1412 err = ip_tunnel_changelink(dev, tb, &p, fwmark); 1413 if (err < 0) 1414 return err; 1415 1416 t->parms.i_flags = p.i_flags; 1417 t->parms.o_flags = p.o_flags; 1418 1419 ipgre_link_update(dev, !tb[IFLA_MTU]); 1420 1421 return 0; 1422 } 1423 1424 static int erspan_changelink(struct net_device *dev, struct nlattr *tb[], 1425 struct nlattr *data[], 1426 struct netlink_ext_ack *extack) 1427 { 1428 struct ip_tunnel *t = netdev_priv(dev); 1429 __u32 fwmark = t->fwmark; 1430 struct ip_tunnel_parm p; 1431 int err; 1432 1433 err = ipgre_newlink_encap_setup(dev, data); 1434 if (err) 1435 return err; 1436 1437 err = erspan_netlink_parms(dev, data, tb, &p, &fwmark); 1438 if (err < 0) 1439 return err; 1440 1441 err = ip_tunnel_changelink(dev, tb, &p, fwmark); 1442 if (err < 0) 1443 return err; 1444 1445 t->parms.i_flags = p.i_flags; 1446 t->parms.o_flags = p.o_flags; 1447 1448 return 0; 1449 } 1450 1451 static size_t ipgre_get_size(const struct net_device *dev) 1452 { 1453 return 1454 /* IFLA_GRE_LINK */ 1455 nla_total_size(4) + 1456 /* IFLA_GRE_IFLAGS */ 1457 nla_total_size(2) + 1458 /* IFLA_GRE_OFLAGS */ 1459 nla_total_size(2) + 1460 /* IFLA_GRE_IKEY */ 1461 nla_total_size(4) + 1462 /* IFLA_GRE_OKEY */ 1463 nla_total_size(4) + 1464 /* IFLA_GRE_LOCAL */ 1465 nla_total_size(4) + 1466 /* IFLA_GRE_REMOTE */ 1467 nla_total_size(4) + 1468 /* IFLA_GRE_TTL */ 1469 nla_total_size(1) + 1470 /* IFLA_GRE_TOS */ 1471 nla_total_size(1) + 1472 /* IFLA_GRE_PMTUDISC */ 1473 nla_total_size(1) + 1474 /* IFLA_GRE_ENCAP_TYPE */ 1475 nla_total_size(2) + 1476 /* IFLA_GRE_ENCAP_FLAGS */ 1477 nla_total_size(2) + 1478 /* IFLA_GRE_ENCAP_SPORT */ 1479 nla_total_size(2) + 1480 /* IFLA_GRE_ENCAP_DPORT */ 1481 nla_total_size(2) + 1482 /* IFLA_GRE_COLLECT_METADATA */ 1483 nla_total_size(0) + 1484 /* IFLA_GRE_IGNORE_DF */ 1485 nla_total_size(1) + 1486 /* IFLA_GRE_FWMARK */ 1487 nla_total_size(4) + 1488 /* IFLA_GRE_ERSPAN_INDEX */ 1489 nla_total_size(4) + 1490 /* IFLA_GRE_ERSPAN_VER */ 1491 nla_total_size(1) + 1492 /* IFLA_GRE_ERSPAN_DIR */ 1493 nla_total_size(1) + 1494 /* IFLA_GRE_ERSPAN_HWID */ 1495 nla_total_size(2) + 1496 0; 1497 } 1498 1499 static int ipgre_fill_info(struct sk_buff *skb, const struct net_device *dev) 1500 { 1501 struct ip_tunnel *t = netdev_priv(dev); 1502 struct ip_tunnel_parm *p = &t->parms; 1503 __be16 o_flags = p->o_flags; 1504 1505 if (nla_put_u32(skb, IFLA_GRE_LINK, p->link) || 1506 nla_put_be16(skb, IFLA_GRE_IFLAGS, 1507 gre_tnl_flags_to_gre_flags(p->i_flags)) || 1508 nla_put_be16(skb, IFLA_GRE_OFLAGS, 1509 gre_tnl_flags_to_gre_flags(o_flags)) || 1510 nla_put_be32(skb, IFLA_GRE_IKEY, p->i_key) || 1511 nla_put_be32(skb, IFLA_GRE_OKEY, p->o_key) || 1512 nla_put_in_addr(skb, IFLA_GRE_LOCAL, p->iph.saddr) || 1513 nla_put_in_addr(skb, IFLA_GRE_REMOTE, p->iph.daddr) || 1514 nla_put_u8(skb, IFLA_GRE_TTL, p->iph.ttl) || 1515 nla_put_u8(skb, IFLA_GRE_TOS, p->iph.tos) || 1516 nla_put_u8(skb, IFLA_GRE_PMTUDISC, 1517 !!(p->iph.frag_off & htons(IP_DF))) || 1518 nla_put_u32(skb, IFLA_GRE_FWMARK, t->fwmark)) 1519 goto nla_put_failure; 1520 1521 if (nla_put_u16(skb, IFLA_GRE_ENCAP_TYPE, 1522 t->encap.type) || 1523 nla_put_be16(skb, IFLA_GRE_ENCAP_SPORT, 1524 t->encap.sport) || 1525 nla_put_be16(skb, IFLA_GRE_ENCAP_DPORT, 1526 t->encap.dport) || 1527 nla_put_u16(skb, IFLA_GRE_ENCAP_FLAGS, 1528 t->encap.flags)) 1529 goto nla_put_failure; 1530 1531 if (nla_put_u8(skb, IFLA_GRE_IGNORE_DF, t->ignore_df)) 1532 goto nla_put_failure; 1533 1534 if (t->collect_md) { 1535 if (nla_put_flag(skb, IFLA_GRE_COLLECT_METADATA)) 1536 goto nla_put_failure; 1537 } 1538 1539 return 0; 1540 1541 nla_put_failure: 1542 return -EMSGSIZE; 1543 } 1544 1545 static int erspan_fill_info(struct sk_buff *skb, const struct net_device *dev) 1546 { 1547 struct ip_tunnel *t = netdev_priv(dev); 1548 1549 if (t->erspan_ver <= 2) { 1550 if (t->erspan_ver != 0 && !t->collect_md) 1551 t->parms.o_flags |= TUNNEL_KEY; 1552 1553 if (nla_put_u8(skb, IFLA_GRE_ERSPAN_VER, t->erspan_ver)) 1554 goto nla_put_failure; 1555 1556 if (t->erspan_ver == 1) { 1557 if (nla_put_u32(skb, IFLA_GRE_ERSPAN_INDEX, t->index)) 1558 goto nla_put_failure; 1559 } else if (t->erspan_ver == 2) { 1560 if (nla_put_u8(skb, IFLA_GRE_ERSPAN_DIR, t->dir)) 1561 goto nla_put_failure; 1562 if (nla_put_u16(skb, IFLA_GRE_ERSPAN_HWID, t->hwid)) 1563 goto nla_put_failure; 1564 } 1565 } 1566 1567 return ipgre_fill_info(skb, dev); 1568 1569 nla_put_failure: 1570 return -EMSGSIZE; 1571 } 1572 1573 static void erspan_setup(struct net_device *dev) 1574 { 1575 struct ip_tunnel *t = netdev_priv(dev); 1576 1577 ether_setup(dev); 1578 dev->max_mtu = 0; 1579 dev->netdev_ops = &erspan_netdev_ops; 1580 dev->priv_flags &= ~IFF_TX_SKB_SHARING; 1581 dev->priv_flags |= IFF_LIVE_ADDR_CHANGE; 1582 ip_tunnel_setup(dev, erspan_net_id); 1583 t->erspan_ver = 1; 1584 } 1585 1586 static const struct nla_policy ipgre_policy[IFLA_GRE_MAX + 1] = { 1587 [IFLA_GRE_LINK] = { .type = NLA_U32 }, 1588 [IFLA_GRE_IFLAGS] = { .type = NLA_U16 }, 1589 [IFLA_GRE_OFLAGS] = { .type = NLA_U16 }, 1590 [IFLA_GRE_IKEY] = { .type = NLA_U32 }, 1591 [IFLA_GRE_OKEY] = { .type = NLA_U32 }, 1592 [IFLA_GRE_LOCAL] = { .len = sizeof_field(struct iphdr, saddr) }, 1593 [IFLA_GRE_REMOTE] = { .len = sizeof_field(struct iphdr, daddr) }, 1594 [IFLA_GRE_TTL] = { .type = NLA_U8 }, 1595 [IFLA_GRE_TOS] = { .type = NLA_U8 }, 1596 [IFLA_GRE_PMTUDISC] = { .type = NLA_U8 }, 1597 [IFLA_GRE_ENCAP_TYPE] = { .type = NLA_U16 }, 1598 [IFLA_GRE_ENCAP_FLAGS] = { .type = NLA_U16 }, 1599 [IFLA_GRE_ENCAP_SPORT] = { .type = NLA_U16 }, 1600 [IFLA_GRE_ENCAP_DPORT] = { .type = NLA_U16 }, 1601 [IFLA_GRE_COLLECT_METADATA] = { .type = NLA_FLAG }, 1602 [IFLA_GRE_IGNORE_DF] = { .type = NLA_U8 }, 1603 [IFLA_GRE_FWMARK] = { .type = NLA_U32 }, 1604 [IFLA_GRE_ERSPAN_INDEX] = { .type = NLA_U32 }, 1605 [IFLA_GRE_ERSPAN_VER] = { .type = NLA_U8 }, 1606 [IFLA_GRE_ERSPAN_DIR] = { .type = NLA_U8 }, 1607 [IFLA_GRE_ERSPAN_HWID] = { .type = NLA_U16 }, 1608 }; 1609 1610 static struct rtnl_link_ops ipgre_link_ops __read_mostly = { 1611 .kind = "gre", 1612 .maxtype = IFLA_GRE_MAX, 1613 .policy = ipgre_policy, 1614 .priv_size = sizeof(struct ip_tunnel), 1615 .setup = ipgre_tunnel_setup, 1616 .validate = ipgre_tunnel_validate, 1617 .newlink = ipgre_newlink, 1618 .changelink = ipgre_changelink, 1619 .dellink = ip_tunnel_dellink, 1620 .get_size = ipgre_get_size, 1621 .fill_info = ipgre_fill_info, 1622 .get_link_net = ip_tunnel_get_link_net, 1623 }; 1624 1625 static struct rtnl_link_ops ipgre_tap_ops __read_mostly = { 1626 .kind = "gretap", 1627 .maxtype = IFLA_GRE_MAX, 1628 .policy = ipgre_policy, 1629 .priv_size = sizeof(struct ip_tunnel), 1630 .setup = ipgre_tap_setup, 1631 .validate = ipgre_tap_validate, 1632 .newlink = ipgre_newlink, 1633 .changelink = ipgre_changelink, 1634 .dellink = ip_tunnel_dellink, 1635 .get_size = ipgre_get_size, 1636 .fill_info = ipgre_fill_info, 1637 .get_link_net = ip_tunnel_get_link_net, 1638 }; 1639 1640 static struct rtnl_link_ops erspan_link_ops __read_mostly = { 1641 .kind = "erspan", 1642 .maxtype = IFLA_GRE_MAX, 1643 .policy = ipgre_policy, 1644 .priv_size = sizeof(struct ip_tunnel), 1645 .setup = erspan_setup, 1646 .validate = erspan_validate, 1647 .newlink = erspan_newlink, 1648 .changelink = erspan_changelink, 1649 .dellink = ip_tunnel_dellink, 1650 .get_size = ipgre_get_size, 1651 .fill_info = erspan_fill_info, 1652 .get_link_net = ip_tunnel_get_link_net, 1653 }; 1654 1655 struct net_device *gretap_fb_dev_create(struct net *net, const char *name, 1656 u8 name_assign_type) 1657 { 1658 struct nlattr *tb[IFLA_MAX + 1]; 1659 struct net_device *dev; 1660 LIST_HEAD(list_kill); 1661 struct ip_tunnel *t; 1662 int err; 1663 1664 memset(&tb, 0, sizeof(tb)); 1665 1666 dev = rtnl_create_link(net, name, name_assign_type, 1667 &ipgre_tap_ops, tb, NULL); 1668 if (IS_ERR(dev)) 1669 return dev; 1670 1671 /* Configure flow based GRE device. */ 1672 t = netdev_priv(dev); 1673 t->collect_md = true; 1674 1675 err = ipgre_newlink(net, dev, tb, NULL, NULL); 1676 if (err < 0) { 1677 free_netdev(dev); 1678 return ERR_PTR(err); 1679 } 1680 1681 /* openvswitch users expect packet sizes to be unrestricted, 1682 * so set the largest MTU we can. 1683 */ 1684 err = __ip_tunnel_change_mtu(dev, IP_MAX_MTU, false); 1685 if (err) 1686 goto out; 1687 1688 err = rtnl_configure_link(dev, NULL, 0, NULL); 1689 if (err < 0) 1690 goto out; 1691 1692 return dev; 1693 out: 1694 ip_tunnel_dellink(dev, &list_kill); 1695 unregister_netdevice_many(&list_kill); 1696 return ERR_PTR(err); 1697 } 1698 EXPORT_SYMBOL_GPL(gretap_fb_dev_create); 1699 1700 static int __net_init ipgre_tap_init_net(struct net *net) 1701 { 1702 return ip_tunnel_init_net(net, gre_tap_net_id, &ipgre_tap_ops, "gretap0"); 1703 } 1704 1705 static void __net_exit ipgre_tap_exit_batch_net(struct list_head *list_net) 1706 { 1707 ip_tunnel_delete_nets(list_net, gre_tap_net_id, &ipgre_tap_ops); 1708 } 1709 1710 static struct pernet_operations ipgre_tap_net_ops = { 1711 .init = ipgre_tap_init_net, 1712 .exit_batch = ipgre_tap_exit_batch_net, 1713 .id = &gre_tap_net_id, 1714 .size = sizeof(struct ip_tunnel_net), 1715 }; 1716 1717 static int __net_init erspan_init_net(struct net *net) 1718 { 1719 return ip_tunnel_init_net(net, erspan_net_id, 1720 &erspan_link_ops, "erspan0"); 1721 } 1722 1723 static void __net_exit erspan_exit_batch_net(struct list_head *net_list) 1724 { 1725 ip_tunnel_delete_nets(net_list, erspan_net_id, &erspan_link_ops); 1726 } 1727 1728 static struct pernet_operations erspan_net_ops = { 1729 .init = erspan_init_net, 1730 .exit_batch = erspan_exit_batch_net, 1731 .id = &erspan_net_id, 1732 .size = sizeof(struct ip_tunnel_net), 1733 }; 1734 1735 static int __init ipgre_init(void) 1736 { 1737 int err; 1738 1739 pr_info("GRE over IPv4 tunneling driver\n"); 1740 1741 err = register_pernet_device(&ipgre_net_ops); 1742 if (err < 0) 1743 return err; 1744 1745 err = register_pernet_device(&ipgre_tap_net_ops); 1746 if (err < 0) 1747 goto pnet_tap_failed; 1748 1749 err = register_pernet_device(&erspan_net_ops); 1750 if (err < 0) 1751 goto pnet_erspan_failed; 1752 1753 err = gre_add_protocol(&ipgre_protocol, GREPROTO_CISCO); 1754 if (err < 0) { 1755 pr_info("%s: can't add protocol\n", __func__); 1756 goto add_proto_failed; 1757 } 1758 1759 err = rtnl_link_register(&ipgre_link_ops); 1760 if (err < 0) 1761 goto rtnl_link_failed; 1762 1763 err = rtnl_link_register(&ipgre_tap_ops); 1764 if (err < 0) 1765 goto tap_ops_failed; 1766 1767 err = rtnl_link_register(&erspan_link_ops); 1768 if (err < 0) 1769 goto erspan_link_failed; 1770 1771 return 0; 1772 1773 erspan_link_failed: 1774 rtnl_link_unregister(&ipgre_tap_ops); 1775 tap_ops_failed: 1776 rtnl_link_unregister(&ipgre_link_ops); 1777 rtnl_link_failed: 1778 gre_del_protocol(&ipgre_protocol, GREPROTO_CISCO); 1779 add_proto_failed: 1780 unregister_pernet_device(&erspan_net_ops); 1781 pnet_erspan_failed: 1782 unregister_pernet_device(&ipgre_tap_net_ops); 1783 pnet_tap_failed: 1784 unregister_pernet_device(&ipgre_net_ops); 1785 return err; 1786 } 1787 1788 static void __exit ipgre_fini(void) 1789 { 1790 rtnl_link_unregister(&ipgre_tap_ops); 1791 rtnl_link_unregister(&ipgre_link_ops); 1792 rtnl_link_unregister(&erspan_link_ops); 1793 gre_del_protocol(&ipgre_protocol, GREPROTO_CISCO); 1794 unregister_pernet_device(&ipgre_tap_net_ops); 1795 unregister_pernet_device(&ipgre_net_ops); 1796 unregister_pernet_device(&erspan_net_ops); 1797 } 1798 1799 module_init(ipgre_init); 1800 module_exit(ipgre_fini); 1801 MODULE_LICENSE("GPL"); 1802 MODULE_ALIAS_RTNL_LINK("gre"); 1803 MODULE_ALIAS_RTNL_LINK("gretap"); 1804 MODULE_ALIAS_RTNL_LINK("erspan"); 1805 MODULE_ALIAS_NETDEV("gre0"); 1806 MODULE_ALIAS_NETDEV("gretap0"); 1807 MODULE_ALIAS_NETDEV("erspan0"); 1808