1 /* 2 * Linux NET3: GRE over IP protocol decoder. 3 * 4 * Authors: Alexey Kuznetsov (kuznet@ms2.inr.ac.ru) 5 * 6 * This program is free software; you can redistribute it and/or 7 * modify it under the terms of the GNU General Public License 8 * as published by the Free Software Foundation; either version 9 * 2 of the License, or (at your option) any later version. 10 * 11 */ 12 13 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 14 15 #include <linux/capability.h> 16 #include <linux/module.h> 17 #include <linux/types.h> 18 #include <linux/kernel.h> 19 #include <linux/slab.h> 20 #include <linux/uaccess.h> 21 #include <linux/skbuff.h> 22 #include <linux/netdevice.h> 23 #include <linux/in.h> 24 #include <linux/tcp.h> 25 #include <linux/udp.h> 26 #include <linux/if_arp.h> 27 #include <linux/if_vlan.h> 28 #include <linux/init.h> 29 #include <linux/in6.h> 30 #include <linux/inetdevice.h> 31 #include <linux/igmp.h> 32 #include <linux/netfilter_ipv4.h> 33 #include <linux/etherdevice.h> 34 #include <linux/if_ether.h> 35 36 #include <net/sock.h> 37 #include <net/ip.h> 38 #include <net/icmp.h> 39 #include <net/protocol.h> 40 #include <net/ip_tunnels.h> 41 #include <net/arp.h> 42 #include <net/checksum.h> 43 #include <net/dsfield.h> 44 #include <net/inet_ecn.h> 45 #include <net/xfrm.h> 46 #include <net/net_namespace.h> 47 #include <net/netns/generic.h> 48 #include <net/rtnetlink.h> 49 #include <net/gre.h> 50 #include <net/dst_metadata.h> 51 #include <net/erspan.h> 52 53 /* 54 Problems & solutions 55 -------------------- 56 57 1. The most important issue is detecting local dead loops. 58 They would cause complete host lockup in transmit, which 59 would be "resolved" by stack overflow or, if queueing is enabled, 60 with infinite looping in net_bh. 61 62 We cannot track such dead loops during route installation, 63 it is infeasible task. The most general solutions would be 64 to keep skb->encapsulation counter (sort of local ttl), 65 and silently drop packet when it expires. It is a good 66 solution, but it supposes maintaining new variable in ALL 67 skb, even if no tunneling is used. 68 69 Current solution: xmit_recursion breaks dead loops. This is a percpu 70 counter, since when we enter the first ndo_xmit(), cpu migration is 71 forbidden. We force an exit if this counter reaches RECURSION_LIMIT 72 73 2. Networking dead loops would not kill routers, but would really 74 kill network. IP hop limit plays role of "t->recursion" in this case, 75 if we copy it from packet being encapsulated to upper header. 76 It is very good solution, but it introduces two problems: 77 78 - Routing protocols, using packets with ttl=1 (OSPF, RIP2), 79 do not work over tunnels. 80 - traceroute does not work. I planned to relay ICMP from tunnel, 81 so that this problem would be solved and traceroute output 82 would even more informative. This idea appeared to be wrong: 83 only Linux complies to rfc1812 now (yes, guys, Linux is the only 84 true router now :-)), all routers (at least, in neighbourhood of mine) 85 return only 8 bytes of payload. It is the end. 86 87 Hence, if we want that OSPF worked or traceroute said something reasonable, 88 we should search for another solution. 89 90 One of them is to parse packet trying to detect inner encapsulation 91 made by our node. It is difficult or even impossible, especially, 92 taking into account fragmentation. TO be short, ttl is not solution at all. 93 94 Current solution: The solution was UNEXPECTEDLY SIMPLE. 95 We force DF flag on tunnels with preconfigured hop limit, 96 that is ALL. :-) Well, it does not remove the problem completely, 97 but exponential growth of network traffic is changed to linear 98 (branches, that exceed pmtu are pruned) and tunnel mtu 99 rapidly degrades to value <68, where looping stops. 100 Yes, it is not good if there exists a router in the loop, 101 which does not force DF, even when encapsulating packets have DF set. 102 But it is not our problem! Nobody could accuse us, we made 103 all that we could make. Even if it is your gated who injected 104 fatal route to network, even if it were you who configured 105 fatal static route: you are innocent. :-) 106 107 Alexey Kuznetsov. 108 */ 109 110 static bool log_ecn_error = true; 111 module_param(log_ecn_error, bool, 0644); 112 MODULE_PARM_DESC(log_ecn_error, "Log packets received with corrupted ECN"); 113 114 static struct rtnl_link_ops ipgre_link_ops __read_mostly; 115 static int ipgre_tunnel_init(struct net_device *dev); 116 static void erspan_build_header(struct sk_buff *skb, 117 u32 id, u32 index, 118 bool truncate, bool is_ipv4); 119 120 static unsigned int ipgre_net_id __read_mostly; 121 static unsigned int gre_tap_net_id __read_mostly; 122 static unsigned int erspan_net_id __read_mostly; 123 124 static int ipgre_err(struct sk_buff *skb, u32 info, 125 const struct tnl_ptk_info *tpi) 126 { 127 128 /* All the routers (except for Linux) return only 129 8 bytes of packet payload. It means, that precise relaying of 130 ICMP in the real Internet is absolutely infeasible. 131 132 Moreover, Cisco "wise men" put GRE key to the third word 133 in GRE header. It makes impossible maintaining even soft 134 state for keyed GRE tunnels with enabled checksum. Tell 135 them "thank you". 136 137 Well, I wonder, rfc1812 was written by Cisco employee, 138 what the hell these idiots break standards established 139 by themselves??? 140 */ 141 struct net *net = dev_net(skb->dev); 142 struct ip_tunnel_net *itn; 143 const struct iphdr *iph; 144 const int type = icmp_hdr(skb)->type; 145 const int code = icmp_hdr(skb)->code; 146 unsigned int data_len = 0; 147 struct ip_tunnel *t; 148 149 if (tpi->proto == htons(ETH_P_TEB)) 150 itn = net_generic(net, gre_tap_net_id); 151 else if (tpi->proto == htons(ETH_P_ERSPAN) || 152 tpi->proto == htons(ETH_P_ERSPAN2)) 153 itn = net_generic(net, erspan_net_id); 154 else 155 itn = net_generic(net, ipgre_net_id); 156 157 iph = (const struct iphdr *)(icmp_hdr(skb) + 1); 158 t = ip_tunnel_lookup(itn, skb->dev->ifindex, tpi->flags, 159 iph->daddr, iph->saddr, tpi->key); 160 161 if (!t) 162 return -ENOENT; 163 164 switch (type) { 165 default: 166 case ICMP_PARAMETERPROB: 167 return 0; 168 169 case ICMP_DEST_UNREACH: 170 switch (code) { 171 case ICMP_SR_FAILED: 172 case ICMP_PORT_UNREACH: 173 /* Impossible event. */ 174 return 0; 175 default: 176 /* All others are translated to HOST_UNREACH. 177 rfc2003 contains "deep thoughts" about NET_UNREACH, 178 I believe they are just ether pollution. --ANK 179 */ 180 break; 181 } 182 break; 183 184 case ICMP_TIME_EXCEEDED: 185 if (code != ICMP_EXC_TTL) 186 return 0; 187 data_len = icmp_hdr(skb)->un.reserved[1] * 4; /* RFC 4884 4.1 */ 188 break; 189 190 case ICMP_REDIRECT: 191 break; 192 } 193 194 #if IS_ENABLED(CONFIG_IPV6) 195 if (tpi->proto == htons(ETH_P_IPV6) && 196 !ip6_err_gen_icmpv6_unreach(skb, iph->ihl * 4 + tpi->hdr_len, 197 type, data_len)) 198 return 0; 199 #endif 200 201 if (t->parms.iph.daddr == 0 || 202 ipv4_is_multicast(t->parms.iph.daddr)) 203 return 0; 204 205 if (t->parms.iph.ttl == 0 && type == ICMP_TIME_EXCEEDED) 206 return 0; 207 208 if (time_before(jiffies, t->err_time + IPTUNNEL_ERR_TIMEO)) 209 t->err_count++; 210 else 211 t->err_count = 1; 212 t->err_time = jiffies; 213 214 return 0; 215 } 216 217 static void gre_err(struct sk_buff *skb, u32 info) 218 { 219 /* All the routers (except for Linux) return only 220 * 8 bytes of packet payload. It means, that precise relaying of 221 * ICMP in the real Internet is absolutely infeasible. 222 * 223 * Moreover, Cisco "wise men" put GRE key to the third word 224 * in GRE header. It makes impossible maintaining even soft 225 * state for keyed 226 * GRE tunnels with enabled checksum. Tell them "thank you". 227 * 228 * Well, I wonder, rfc1812 was written by Cisco employee, 229 * what the hell these idiots break standards established 230 * by themselves??? 231 */ 232 233 const struct iphdr *iph = (struct iphdr *)skb->data; 234 const int type = icmp_hdr(skb)->type; 235 const int code = icmp_hdr(skb)->code; 236 struct tnl_ptk_info tpi; 237 238 if (gre_parse_header(skb, &tpi, NULL, htons(ETH_P_IP), 239 iph->ihl * 4) < 0) 240 return; 241 242 if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED) { 243 ipv4_update_pmtu(skb, dev_net(skb->dev), info, 244 skb->dev->ifindex, IPPROTO_GRE); 245 return; 246 } 247 if (type == ICMP_REDIRECT) { 248 ipv4_redirect(skb, dev_net(skb->dev), skb->dev->ifindex, 249 IPPROTO_GRE); 250 return; 251 } 252 253 ipgre_err(skb, info, &tpi); 254 } 255 256 static int erspan_rcv(struct sk_buff *skb, struct tnl_ptk_info *tpi, 257 int gre_hdr_len) 258 { 259 struct net *net = dev_net(skb->dev); 260 struct metadata_dst *tun_dst = NULL; 261 struct erspan_base_hdr *ershdr; 262 struct erspan_metadata *pkt_md; 263 struct ip_tunnel_net *itn; 264 struct ip_tunnel *tunnel; 265 const struct iphdr *iph; 266 struct erspan_md2 *md2; 267 int ver; 268 int len; 269 270 itn = net_generic(net, erspan_net_id); 271 len = gre_hdr_len + sizeof(*ershdr); 272 273 /* Check based hdr len */ 274 if (unlikely(!pskb_may_pull(skb, len))) 275 return PACKET_REJECT; 276 277 iph = ip_hdr(skb); 278 ershdr = (struct erspan_base_hdr *)(skb->data + gre_hdr_len); 279 ver = ershdr->ver; 280 281 /* The original GRE header does not have key field, 282 * Use ERSPAN 10-bit session ID as key. 283 */ 284 tpi->key = cpu_to_be32(get_session_id(ershdr)); 285 tunnel = ip_tunnel_lookup(itn, skb->dev->ifindex, 286 tpi->flags | TUNNEL_KEY, 287 iph->saddr, iph->daddr, tpi->key); 288 289 if (tunnel) { 290 len = gre_hdr_len + erspan_hdr_len(ver); 291 if (unlikely(!pskb_may_pull(skb, len))) 292 return PACKET_REJECT; 293 294 ershdr = (struct erspan_base_hdr *)(skb->data + gre_hdr_len); 295 pkt_md = (struct erspan_metadata *)(ershdr + 1); 296 297 if (__iptunnel_pull_header(skb, 298 len, 299 htons(ETH_P_TEB), 300 false, false) < 0) 301 goto drop; 302 303 if (tunnel->collect_md) { 304 struct ip_tunnel_info *info; 305 struct erspan_metadata *md; 306 __be64 tun_id; 307 __be16 flags; 308 309 tpi->flags |= TUNNEL_KEY; 310 flags = tpi->flags; 311 tun_id = key32_to_tunnel_id(tpi->key); 312 313 tun_dst = ip_tun_rx_dst(skb, flags, 314 tun_id, sizeof(*md)); 315 if (!tun_dst) 316 return PACKET_REJECT; 317 318 md = ip_tunnel_info_opts(&tun_dst->u.tun_info); 319 md->version = ver; 320 md2 = &md->u.md2; 321 memcpy(md2, pkt_md, ver == 1 ? ERSPAN_V1_MDSIZE : 322 ERSPAN_V2_MDSIZE); 323 324 info = &tun_dst->u.tun_info; 325 info->key.tun_flags |= TUNNEL_ERSPAN_OPT; 326 info->options_len = sizeof(*md); 327 } 328 329 skb_reset_mac_header(skb); 330 ip_tunnel_rcv(tunnel, skb, tpi, tun_dst, log_ecn_error); 331 return PACKET_RCVD; 332 } 333 return PACKET_REJECT; 334 335 drop: 336 kfree_skb(skb); 337 return PACKET_RCVD; 338 } 339 340 static int __ipgre_rcv(struct sk_buff *skb, const struct tnl_ptk_info *tpi, 341 struct ip_tunnel_net *itn, int hdr_len, bool raw_proto) 342 { 343 struct metadata_dst *tun_dst = NULL; 344 const struct iphdr *iph; 345 struct ip_tunnel *tunnel; 346 347 iph = ip_hdr(skb); 348 tunnel = ip_tunnel_lookup(itn, skb->dev->ifindex, tpi->flags, 349 iph->saddr, iph->daddr, tpi->key); 350 351 if (tunnel) { 352 if (__iptunnel_pull_header(skb, hdr_len, tpi->proto, 353 raw_proto, false) < 0) 354 goto drop; 355 356 if (tunnel->dev->type != ARPHRD_NONE) 357 skb_pop_mac_header(skb); 358 else 359 skb_reset_mac_header(skb); 360 if (tunnel->collect_md) { 361 __be16 flags; 362 __be64 tun_id; 363 364 flags = tpi->flags & (TUNNEL_CSUM | TUNNEL_KEY); 365 tun_id = key32_to_tunnel_id(tpi->key); 366 tun_dst = ip_tun_rx_dst(skb, flags, tun_id, 0); 367 if (!tun_dst) 368 return PACKET_REJECT; 369 } 370 371 ip_tunnel_rcv(tunnel, skb, tpi, tun_dst, log_ecn_error); 372 return PACKET_RCVD; 373 } 374 return PACKET_NEXT; 375 376 drop: 377 kfree_skb(skb); 378 return PACKET_RCVD; 379 } 380 381 static int ipgre_rcv(struct sk_buff *skb, const struct tnl_ptk_info *tpi, 382 int hdr_len) 383 { 384 struct net *net = dev_net(skb->dev); 385 struct ip_tunnel_net *itn; 386 int res; 387 388 if (tpi->proto == htons(ETH_P_TEB)) 389 itn = net_generic(net, gre_tap_net_id); 390 else 391 itn = net_generic(net, ipgre_net_id); 392 393 res = __ipgre_rcv(skb, tpi, itn, hdr_len, false); 394 if (res == PACKET_NEXT && tpi->proto == htons(ETH_P_TEB)) { 395 /* ipgre tunnels in collect metadata mode should receive 396 * also ETH_P_TEB traffic. 397 */ 398 itn = net_generic(net, ipgre_net_id); 399 res = __ipgre_rcv(skb, tpi, itn, hdr_len, true); 400 } 401 return res; 402 } 403 404 static int gre_rcv(struct sk_buff *skb) 405 { 406 struct tnl_ptk_info tpi; 407 bool csum_err = false; 408 int hdr_len; 409 410 #ifdef CONFIG_NET_IPGRE_BROADCAST 411 if (ipv4_is_multicast(ip_hdr(skb)->daddr)) { 412 /* Looped back packet, drop it! */ 413 if (rt_is_output_route(skb_rtable(skb))) 414 goto drop; 415 } 416 #endif 417 418 hdr_len = gre_parse_header(skb, &tpi, &csum_err, htons(ETH_P_IP), 0); 419 if (hdr_len < 0) 420 goto drop; 421 422 if (unlikely(tpi.proto == htons(ETH_P_ERSPAN) || 423 tpi.proto == htons(ETH_P_ERSPAN2))) { 424 if (erspan_rcv(skb, &tpi, hdr_len) == PACKET_RCVD) 425 return 0; 426 goto out; 427 } 428 429 if (ipgre_rcv(skb, &tpi, hdr_len) == PACKET_RCVD) 430 return 0; 431 432 out: 433 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0); 434 drop: 435 kfree_skb(skb); 436 return 0; 437 } 438 439 static void __gre_xmit(struct sk_buff *skb, struct net_device *dev, 440 const struct iphdr *tnl_params, 441 __be16 proto) 442 { 443 struct ip_tunnel *tunnel = netdev_priv(dev); 444 445 if (tunnel->parms.o_flags & TUNNEL_SEQ) 446 tunnel->o_seqno++; 447 448 /* Push GRE header. */ 449 gre_build_header(skb, tunnel->tun_hlen, 450 tunnel->parms.o_flags, proto, tunnel->parms.o_key, 451 htonl(tunnel->o_seqno)); 452 453 ip_tunnel_xmit(skb, dev, tnl_params, tnl_params->protocol); 454 } 455 456 static int gre_handle_offloads(struct sk_buff *skb, bool csum) 457 { 458 return iptunnel_handle_offloads(skb, csum ? SKB_GSO_GRE_CSUM : SKB_GSO_GRE); 459 } 460 461 static struct rtable *gre_get_rt(struct sk_buff *skb, 462 struct net_device *dev, 463 struct flowi4 *fl, 464 const struct ip_tunnel_key *key) 465 { 466 struct net *net = dev_net(dev); 467 468 memset(fl, 0, sizeof(*fl)); 469 fl->daddr = key->u.ipv4.dst; 470 fl->saddr = key->u.ipv4.src; 471 fl->flowi4_tos = RT_TOS(key->tos); 472 fl->flowi4_mark = skb->mark; 473 fl->flowi4_proto = IPPROTO_GRE; 474 475 return ip_route_output_key(net, fl); 476 } 477 478 static struct rtable *prepare_fb_xmit(struct sk_buff *skb, 479 struct net_device *dev, 480 struct flowi4 *fl, 481 int tunnel_hlen) 482 { 483 struct ip_tunnel_info *tun_info; 484 const struct ip_tunnel_key *key; 485 struct rtable *rt = NULL; 486 int min_headroom; 487 bool use_cache; 488 int err; 489 490 tun_info = skb_tunnel_info(skb); 491 key = &tun_info->key; 492 use_cache = ip_tunnel_dst_cache_usable(skb, tun_info); 493 494 if (use_cache) 495 rt = dst_cache_get_ip4(&tun_info->dst_cache, &fl->saddr); 496 if (!rt) { 497 rt = gre_get_rt(skb, dev, fl, key); 498 if (IS_ERR(rt)) 499 goto err_free_skb; 500 if (use_cache) 501 dst_cache_set_ip4(&tun_info->dst_cache, &rt->dst, 502 fl->saddr); 503 } 504 505 min_headroom = LL_RESERVED_SPACE(rt->dst.dev) + rt->dst.header_len 506 + tunnel_hlen + sizeof(struct iphdr); 507 if (skb_headroom(skb) < min_headroom || skb_header_cloned(skb)) { 508 int head_delta = SKB_DATA_ALIGN(min_headroom - 509 skb_headroom(skb) + 510 16); 511 err = pskb_expand_head(skb, max_t(int, head_delta, 0), 512 0, GFP_ATOMIC); 513 if (unlikely(err)) 514 goto err_free_rt; 515 } 516 return rt; 517 518 err_free_rt: 519 ip_rt_put(rt); 520 err_free_skb: 521 kfree_skb(skb); 522 dev->stats.tx_dropped++; 523 return NULL; 524 } 525 526 static void gre_fb_xmit(struct sk_buff *skb, struct net_device *dev, 527 __be16 proto) 528 { 529 struct ip_tunnel *tunnel = netdev_priv(dev); 530 struct ip_tunnel_info *tun_info; 531 const struct ip_tunnel_key *key; 532 struct rtable *rt = NULL; 533 struct flowi4 fl; 534 int tunnel_hlen; 535 __be16 df, flags; 536 537 tun_info = skb_tunnel_info(skb); 538 if (unlikely(!tun_info || !(tun_info->mode & IP_TUNNEL_INFO_TX) || 539 ip_tunnel_info_af(tun_info) != AF_INET)) 540 goto err_free_skb; 541 542 key = &tun_info->key; 543 tunnel_hlen = gre_calc_hlen(key->tun_flags); 544 545 rt = prepare_fb_xmit(skb, dev, &fl, tunnel_hlen); 546 if (!rt) 547 return; 548 549 /* Push Tunnel header. */ 550 if (gre_handle_offloads(skb, !!(tun_info->key.tun_flags & TUNNEL_CSUM))) 551 goto err_free_rt; 552 553 flags = tun_info->key.tun_flags & 554 (TUNNEL_CSUM | TUNNEL_KEY | TUNNEL_SEQ); 555 gre_build_header(skb, tunnel_hlen, flags, proto, 556 tunnel_id_to_key32(tun_info->key.tun_id), 557 (flags & TUNNEL_SEQ) ? htonl(tunnel->o_seqno++) : 0); 558 559 df = key->tun_flags & TUNNEL_DONT_FRAGMENT ? htons(IP_DF) : 0; 560 561 iptunnel_xmit(skb->sk, rt, skb, fl.saddr, key->u.ipv4.dst, IPPROTO_GRE, 562 key->tos, key->ttl, df, false); 563 return; 564 565 err_free_rt: 566 ip_rt_put(rt); 567 err_free_skb: 568 kfree_skb(skb); 569 dev->stats.tx_dropped++; 570 } 571 572 static void erspan_fb_xmit(struct sk_buff *skb, struct net_device *dev) 573 { 574 struct ip_tunnel *tunnel = netdev_priv(dev); 575 struct ip_tunnel_info *tun_info; 576 const struct ip_tunnel_key *key; 577 struct erspan_metadata *md; 578 struct rtable *rt = NULL; 579 bool truncate = false; 580 __be16 df, proto; 581 struct flowi4 fl; 582 int tunnel_hlen; 583 int version; 584 int nhoff; 585 int thoff; 586 587 tun_info = skb_tunnel_info(skb); 588 if (unlikely(!tun_info || !(tun_info->mode & IP_TUNNEL_INFO_TX) || 589 ip_tunnel_info_af(tun_info) != AF_INET)) 590 goto err_free_skb; 591 592 key = &tun_info->key; 593 if (!(tun_info->key.tun_flags & TUNNEL_ERSPAN_OPT)) 594 goto err_free_rt; 595 md = ip_tunnel_info_opts(tun_info); 596 if (!md) 597 goto err_free_rt; 598 599 /* ERSPAN has fixed 8 byte GRE header */ 600 version = md->version; 601 tunnel_hlen = 8 + erspan_hdr_len(version); 602 603 rt = prepare_fb_xmit(skb, dev, &fl, tunnel_hlen); 604 if (!rt) 605 return; 606 607 if (gre_handle_offloads(skb, false)) 608 goto err_free_rt; 609 610 if (skb->len > dev->mtu + dev->hard_header_len) { 611 pskb_trim(skb, dev->mtu + dev->hard_header_len); 612 truncate = true; 613 } 614 615 nhoff = skb_network_header(skb) - skb_mac_header(skb); 616 if (skb->protocol == htons(ETH_P_IP) && 617 (ntohs(ip_hdr(skb)->tot_len) > skb->len - nhoff)) 618 truncate = true; 619 620 thoff = skb_transport_header(skb) - skb_mac_header(skb); 621 if (skb->protocol == htons(ETH_P_IPV6) && 622 (ntohs(ipv6_hdr(skb)->payload_len) > skb->len - thoff)) 623 truncate = true; 624 625 if (version == 1) { 626 erspan_build_header(skb, ntohl(tunnel_id_to_key32(key->tun_id)), 627 ntohl(md->u.index), truncate, true); 628 proto = htons(ETH_P_ERSPAN); 629 } else if (version == 2) { 630 erspan_build_header_v2(skb, 631 ntohl(tunnel_id_to_key32(key->tun_id)), 632 md->u.md2.dir, 633 get_hwid(&md->u.md2), 634 truncate, true); 635 proto = htons(ETH_P_ERSPAN2); 636 } else { 637 goto err_free_rt; 638 } 639 640 gre_build_header(skb, 8, TUNNEL_SEQ, 641 proto, 0, htonl(tunnel->o_seqno++)); 642 643 df = key->tun_flags & TUNNEL_DONT_FRAGMENT ? htons(IP_DF) : 0; 644 645 iptunnel_xmit(skb->sk, rt, skb, fl.saddr, key->u.ipv4.dst, IPPROTO_GRE, 646 key->tos, key->ttl, df, false); 647 return; 648 649 err_free_rt: 650 ip_rt_put(rt); 651 err_free_skb: 652 kfree_skb(skb); 653 dev->stats.tx_dropped++; 654 } 655 656 static int gre_fill_metadata_dst(struct net_device *dev, struct sk_buff *skb) 657 { 658 struct ip_tunnel_info *info = skb_tunnel_info(skb); 659 struct rtable *rt; 660 struct flowi4 fl4; 661 662 if (ip_tunnel_info_af(info) != AF_INET) 663 return -EINVAL; 664 665 rt = gre_get_rt(skb, dev, &fl4, &info->key); 666 if (IS_ERR(rt)) 667 return PTR_ERR(rt); 668 669 ip_rt_put(rt); 670 info->key.u.ipv4.src = fl4.saddr; 671 return 0; 672 } 673 674 static netdev_tx_t ipgre_xmit(struct sk_buff *skb, 675 struct net_device *dev) 676 { 677 struct ip_tunnel *tunnel = netdev_priv(dev); 678 const struct iphdr *tnl_params; 679 680 if (!pskb_inet_may_pull(skb)) 681 goto free_skb; 682 683 if (tunnel->collect_md) { 684 gre_fb_xmit(skb, dev, skb->protocol); 685 return NETDEV_TX_OK; 686 } 687 688 if (dev->header_ops) { 689 /* Need space for new headers */ 690 if (skb_cow_head(skb, dev->needed_headroom - 691 (tunnel->hlen + sizeof(struct iphdr)))) 692 goto free_skb; 693 694 tnl_params = (const struct iphdr *)skb->data; 695 696 /* Pull skb since ip_tunnel_xmit() needs skb->data pointing 697 * to gre header. 698 */ 699 skb_pull(skb, tunnel->hlen + sizeof(struct iphdr)); 700 skb_reset_mac_header(skb); 701 } else { 702 if (skb_cow_head(skb, dev->needed_headroom)) 703 goto free_skb; 704 705 tnl_params = &tunnel->parms.iph; 706 } 707 708 if (gre_handle_offloads(skb, !!(tunnel->parms.o_flags & TUNNEL_CSUM))) 709 goto free_skb; 710 711 __gre_xmit(skb, dev, tnl_params, skb->protocol); 712 return NETDEV_TX_OK; 713 714 free_skb: 715 kfree_skb(skb); 716 dev->stats.tx_dropped++; 717 return NETDEV_TX_OK; 718 } 719 720 static netdev_tx_t erspan_xmit(struct sk_buff *skb, 721 struct net_device *dev) 722 { 723 struct ip_tunnel *tunnel = netdev_priv(dev); 724 bool truncate = false; 725 __be16 proto; 726 727 if (!pskb_inet_may_pull(skb)) 728 goto free_skb; 729 730 if (tunnel->collect_md) { 731 erspan_fb_xmit(skb, dev); 732 return NETDEV_TX_OK; 733 } 734 735 if (gre_handle_offloads(skb, false)) 736 goto free_skb; 737 738 if (skb_cow_head(skb, dev->needed_headroom)) 739 goto free_skb; 740 741 if (skb->len > dev->mtu + dev->hard_header_len) { 742 pskb_trim(skb, dev->mtu + dev->hard_header_len); 743 truncate = true; 744 } 745 746 /* Push ERSPAN header */ 747 if (tunnel->erspan_ver == 1) { 748 erspan_build_header(skb, ntohl(tunnel->parms.o_key), 749 tunnel->index, 750 truncate, true); 751 proto = htons(ETH_P_ERSPAN); 752 } else if (tunnel->erspan_ver == 2) { 753 erspan_build_header_v2(skb, ntohl(tunnel->parms.o_key), 754 tunnel->dir, tunnel->hwid, 755 truncate, true); 756 proto = htons(ETH_P_ERSPAN2); 757 } else { 758 goto free_skb; 759 } 760 761 tunnel->parms.o_flags &= ~TUNNEL_KEY; 762 __gre_xmit(skb, dev, &tunnel->parms.iph, proto); 763 return NETDEV_TX_OK; 764 765 free_skb: 766 kfree_skb(skb); 767 dev->stats.tx_dropped++; 768 return NETDEV_TX_OK; 769 } 770 771 static netdev_tx_t gre_tap_xmit(struct sk_buff *skb, 772 struct net_device *dev) 773 { 774 struct ip_tunnel *tunnel = netdev_priv(dev); 775 776 if (!pskb_inet_may_pull(skb)) 777 goto free_skb; 778 779 if (tunnel->collect_md) { 780 gre_fb_xmit(skb, dev, htons(ETH_P_TEB)); 781 return NETDEV_TX_OK; 782 } 783 784 if (gre_handle_offloads(skb, !!(tunnel->parms.o_flags & TUNNEL_CSUM))) 785 goto free_skb; 786 787 if (skb_cow_head(skb, dev->needed_headroom)) 788 goto free_skb; 789 790 __gre_xmit(skb, dev, &tunnel->parms.iph, htons(ETH_P_TEB)); 791 return NETDEV_TX_OK; 792 793 free_skb: 794 kfree_skb(skb); 795 dev->stats.tx_dropped++; 796 return NETDEV_TX_OK; 797 } 798 799 static void ipgre_link_update(struct net_device *dev, bool set_mtu) 800 { 801 struct ip_tunnel *tunnel = netdev_priv(dev); 802 int len; 803 804 len = tunnel->tun_hlen; 805 tunnel->tun_hlen = gre_calc_hlen(tunnel->parms.o_flags); 806 len = tunnel->tun_hlen - len; 807 tunnel->hlen = tunnel->hlen + len; 808 809 dev->needed_headroom = dev->needed_headroom + len; 810 if (set_mtu) 811 dev->mtu = max_t(int, dev->mtu - len, 68); 812 813 if (!(tunnel->parms.o_flags & TUNNEL_SEQ)) { 814 if (!(tunnel->parms.o_flags & TUNNEL_CSUM) || 815 tunnel->encap.type == TUNNEL_ENCAP_NONE) { 816 dev->features |= NETIF_F_GSO_SOFTWARE; 817 dev->hw_features |= NETIF_F_GSO_SOFTWARE; 818 } else { 819 dev->features &= ~NETIF_F_GSO_SOFTWARE; 820 dev->hw_features &= ~NETIF_F_GSO_SOFTWARE; 821 } 822 dev->features |= NETIF_F_LLTX; 823 } else { 824 dev->hw_features &= ~NETIF_F_GSO_SOFTWARE; 825 dev->features &= ~(NETIF_F_LLTX | NETIF_F_GSO_SOFTWARE); 826 } 827 } 828 829 static int ipgre_tunnel_ioctl(struct net_device *dev, 830 struct ifreq *ifr, int cmd) 831 { 832 struct ip_tunnel_parm p; 833 int err; 834 835 if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) 836 return -EFAULT; 837 838 if (cmd == SIOCADDTUNNEL || cmd == SIOCCHGTUNNEL) { 839 if (p.iph.version != 4 || p.iph.protocol != IPPROTO_GRE || 840 p.iph.ihl != 5 || (p.iph.frag_off & htons(~IP_DF)) || 841 ((p.i_flags | p.o_flags) & (GRE_VERSION | GRE_ROUTING))) 842 return -EINVAL; 843 } 844 845 p.i_flags = gre_flags_to_tnl_flags(p.i_flags); 846 p.o_flags = gre_flags_to_tnl_flags(p.o_flags); 847 848 err = ip_tunnel_ioctl(dev, &p, cmd); 849 if (err) 850 return err; 851 852 if (cmd == SIOCCHGTUNNEL) { 853 struct ip_tunnel *t = netdev_priv(dev); 854 855 t->parms.i_flags = p.i_flags; 856 t->parms.o_flags = p.o_flags; 857 858 if (strcmp(dev->rtnl_link_ops->kind, "erspan")) 859 ipgre_link_update(dev, true); 860 } 861 862 p.i_flags = gre_tnl_flags_to_gre_flags(p.i_flags); 863 p.o_flags = gre_tnl_flags_to_gre_flags(p.o_flags); 864 865 if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p))) 866 return -EFAULT; 867 868 return 0; 869 } 870 871 /* Nice toy. Unfortunately, useless in real life :-) 872 It allows to construct virtual multiprotocol broadcast "LAN" 873 over the Internet, provided multicast routing is tuned. 874 875 876 I have no idea was this bicycle invented before me, 877 so that I had to set ARPHRD_IPGRE to a random value. 878 I have an impression, that Cisco could make something similar, 879 but this feature is apparently missing in IOS<=11.2(8). 880 881 I set up 10.66.66/24 and fec0:6666:6666::0/96 as virtual networks 882 with broadcast 224.66.66.66. If you have access to mbone, play with me :-) 883 884 ping -t 255 224.66.66.66 885 886 If nobody answers, mbone does not work. 887 888 ip tunnel add Universe mode gre remote 224.66.66.66 local <Your_real_addr> ttl 255 889 ip addr add 10.66.66.<somewhat>/24 dev Universe 890 ifconfig Universe up 891 ifconfig Universe add fe80::<Your_real_addr>/10 892 ifconfig Universe add fec0:6666:6666::<Your_real_addr>/96 893 ftp 10.66.66.66 894 ... 895 ftp fec0:6666:6666::193.233.7.65 896 ... 897 */ 898 static int ipgre_header(struct sk_buff *skb, struct net_device *dev, 899 unsigned short type, 900 const void *daddr, const void *saddr, unsigned int len) 901 { 902 struct ip_tunnel *t = netdev_priv(dev); 903 struct iphdr *iph; 904 struct gre_base_hdr *greh; 905 906 iph = skb_push(skb, t->hlen + sizeof(*iph)); 907 greh = (struct gre_base_hdr *)(iph+1); 908 greh->flags = gre_tnl_flags_to_gre_flags(t->parms.o_flags); 909 greh->protocol = htons(type); 910 911 memcpy(iph, &t->parms.iph, sizeof(struct iphdr)); 912 913 /* Set the source hardware address. */ 914 if (saddr) 915 memcpy(&iph->saddr, saddr, 4); 916 if (daddr) 917 memcpy(&iph->daddr, daddr, 4); 918 if (iph->daddr) 919 return t->hlen + sizeof(*iph); 920 921 return -(t->hlen + sizeof(*iph)); 922 } 923 924 static int ipgre_header_parse(const struct sk_buff *skb, unsigned char *haddr) 925 { 926 const struct iphdr *iph = (const struct iphdr *) skb_mac_header(skb); 927 memcpy(haddr, &iph->saddr, 4); 928 return 4; 929 } 930 931 static const struct header_ops ipgre_header_ops = { 932 .create = ipgre_header, 933 .parse = ipgre_header_parse, 934 }; 935 936 #ifdef CONFIG_NET_IPGRE_BROADCAST 937 static int ipgre_open(struct net_device *dev) 938 { 939 struct ip_tunnel *t = netdev_priv(dev); 940 941 if (ipv4_is_multicast(t->parms.iph.daddr)) { 942 struct flowi4 fl4; 943 struct rtable *rt; 944 945 rt = ip_route_output_gre(t->net, &fl4, 946 t->parms.iph.daddr, 947 t->parms.iph.saddr, 948 t->parms.o_key, 949 RT_TOS(t->parms.iph.tos), 950 t->parms.link); 951 if (IS_ERR(rt)) 952 return -EADDRNOTAVAIL; 953 dev = rt->dst.dev; 954 ip_rt_put(rt); 955 if (!__in_dev_get_rtnl(dev)) 956 return -EADDRNOTAVAIL; 957 t->mlink = dev->ifindex; 958 ip_mc_inc_group(__in_dev_get_rtnl(dev), t->parms.iph.daddr); 959 } 960 return 0; 961 } 962 963 static int ipgre_close(struct net_device *dev) 964 { 965 struct ip_tunnel *t = netdev_priv(dev); 966 967 if (ipv4_is_multicast(t->parms.iph.daddr) && t->mlink) { 968 struct in_device *in_dev; 969 in_dev = inetdev_by_index(t->net, t->mlink); 970 if (in_dev) 971 ip_mc_dec_group(in_dev, t->parms.iph.daddr); 972 } 973 return 0; 974 } 975 #endif 976 977 static const struct net_device_ops ipgre_netdev_ops = { 978 .ndo_init = ipgre_tunnel_init, 979 .ndo_uninit = ip_tunnel_uninit, 980 #ifdef CONFIG_NET_IPGRE_BROADCAST 981 .ndo_open = ipgre_open, 982 .ndo_stop = ipgre_close, 983 #endif 984 .ndo_start_xmit = ipgre_xmit, 985 .ndo_do_ioctl = ipgre_tunnel_ioctl, 986 .ndo_change_mtu = ip_tunnel_change_mtu, 987 .ndo_get_stats64 = ip_tunnel_get_stats64, 988 .ndo_get_iflink = ip_tunnel_get_iflink, 989 }; 990 991 #define GRE_FEATURES (NETIF_F_SG | \ 992 NETIF_F_FRAGLIST | \ 993 NETIF_F_HIGHDMA | \ 994 NETIF_F_HW_CSUM) 995 996 static void ipgre_tunnel_setup(struct net_device *dev) 997 { 998 dev->netdev_ops = &ipgre_netdev_ops; 999 dev->type = ARPHRD_IPGRE; 1000 ip_tunnel_setup(dev, ipgre_net_id); 1001 } 1002 1003 static void __gre_tunnel_init(struct net_device *dev) 1004 { 1005 struct ip_tunnel *tunnel; 1006 1007 tunnel = netdev_priv(dev); 1008 tunnel->tun_hlen = gre_calc_hlen(tunnel->parms.o_flags); 1009 tunnel->parms.iph.protocol = IPPROTO_GRE; 1010 1011 tunnel->hlen = tunnel->tun_hlen + tunnel->encap_hlen; 1012 1013 dev->features |= GRE_FEATURES; 1014 dev->hw_features |= GRE_FEATURES; 1015 1016 if (!(tunnel->parms.o_flags & TUNNEL_SEQ)) { 1017 /* TCP offload with GRE SEQ is not supported, nor 1018 * can we support 2 levels of outer headers requiring 1019 * an update. 1020 */ 1021 if (!(tunnel->parms.o_flags & TUNNEL_CSUM) || 1022 (tunnel->encap.type == TUNNEL_ENCAP_NONE)) { 1023 dev->features |= NETIF_F_GSO_SOFTWARE; 1024 dev->hw_features |= NETIF_F_GSO_SOFTWARE; 1025 } 1026 1027 /* Can use a lockless transmit, unless we generate 1028 * output sequences 1029 */ 1030 dev->features |= NETIF_F_LLTX; 1031 } 1032 } 1033 1034 static int ipgre_tunnel_init(struct net_device *dev) 1035 { 1036 struct ip_tunnel *tunnel = netdev_priv(dev); 1037 struct iphdr *iph = &tunnel->parms.iph; 1038 1039 __gre_tunnel_init(dev); 1040 1041 memcpy(dev->dev_addr, &iph->saddr, 4); 1042 memcpy(dev->broadcast, &iph->daddr, 4); 1043 1044 dev->flags = IFF_NOARP; 1045 netif_keep_dst(dev); 1046 dev->addr_len = 4; 1047 1048 if (iph->daddr && !tunnel->collect_md) { 1049 #ifdef CONFIG_NET_IPGRE_BROADCAST 1050 if (ipv4_is_multicast(iph->daddr)) { 1051 if (!iph->saddr) 1052 return -EINVAL; 1053 dev->flags = IFF_BROADCAST; 1054 dev->header_ops = &ipgre_header_ops; 1055 } 1056 #endif 1057 } else if (!tunnel->collect_md) { 1058 dev->header_ops = &ipgre_header_ops; 1059 } 1060 1061 return ip_tunnel_init(dev); 1062 } 1063 1064 static const struct gre_protocol ipgre_protocol = { 1065 .handler = gre_rcv, 1066 .err_handler = gre_err, 1067 }; 1068 1069 static int __net_init ipgre_init_net(struct net *net) 1070 { 1071 return ip_tunnel_init_net(net, ipgre_net_id, &ipgre_link_ops, NULL); 1072 } 1073 1074 static void __net_exit ipgre_exit_batch_net(struct list_head *list_net) 1075 { 1076 ip_tunnel_delete_nets(list_net, ipgre_net_id, &ipgre_link_ops); 1077 } 1078 1079 static struct pernet_operations ipgre_net_ops = { 1080 .init = ipgre_init_net, 1081 .exit_batch = ipgre_exit_batch_net, 1082 .id = &ipgre_net_id, 1083 .size = sizeof(struct ip_tunnel_net), 1084 }; 1085 1086 static int ipgre_tunnel_validate(struct nlattr *tb[], struct nlattr *data[], 1087 struct netlink_ext_ack *extack) 1088 { 1089 __be16 flags; 1090 1091 if (!data) 1092 return 0; 1093 1094 flags = 0; 1095 if (data[IFLA_GRE_IFLAGS]) 1096 flags |= nla_get_be16(data[IFLA_GRE_IFLAGS]); 1097 if (data[IFLA_GRE_OFLAGS]) 1098 flags |= nla_get_be16(data[IFLA_GRE_OFLAGS]); 1099 if (flags & (GRE_VERSION|GRE_ROUTING)) 1100 return -EINVAL; 1101 1102 if (data[IFLA_GRE_COLLECT_METADATA] && 1103 data[IFLA_GRE_ENCAP_TYPE] && 1104 nla_get_u16(data[IFLA_GRE_ENCAP_TYPE]) != TUNNEL_ENCAP_NONE) 1105 return -EINVAL; 1106 1107 return 0; 1108 } 1109 1110 static int ipgre_tap_validate(struct nlattr *tb[], struct nlattr *data[], 1111 struct netlink_ext_ack *extack) 1112 { 1113 __be32 daddr; 1114 1115 if (tb[IFLA_ADDRESS]) { 1116 if (nla_len(tb[IFLA_ADDRESS]) != ETH_ALEN) 1117 return -EINVAL; 1118 if (!is_valid_ether_addr(nla_data(tb[IFLA_ADDRESS]))) 1119 return -EADDRNOTAVAIL; 1120 } 1121 1122 if (!data) 1123 goto out; 1124 1125 if (data[IFLA_GRE_REMOTE]) { 1126 memcpy(&daddr, nla_data(data[IFLA_GRE_REMOTE]), 4); 1127 if (!daddr) 1128 return -EINVAL; 1129 } 1130 1131 out: 1132 return ipgre_tunnel_validate(tb, data, extack); 1133 } 1134 1135 static int erspan_validate(struct nlattr *tb[], struct nlattr *data[], 1136 struct netlink_ext_ack *extack) 1137 { 1138 __be16 flags = 0; 1139 int ret; 1140 1141 if (!data) 1142 return 0; 1143 1144 ret = ipgre_tap_validate(tb, data, extack); 1145 if (ret) 1146 return ret; 1147 1148 /* ERSPAN should only have GRE sequence and key flag */ 1149 if (data[IFLA_GRE_OFLAGS]) 1150 flags |= nla_get_be16(data[IFLA_GRE_OFLAGS]); 1151 if (data[IFLA_GRE_IFLAGS]) 1152 flags |= nla_get_be16(data[IFLA_GRE_IFLAGS]); 1153 if (!data[IFLA_GRE_COLLECT_METADATA] && 1154 flags != (GRE_SEQ | GRE_KEY)) 1155 return -EINVAL; 1156 1157 /* ERSPAN Session ID only has 10-bit. Since we reuse 1158 * 32-bit key field as ID, check it's range. 1159 */ 1160 if (data[IFLA_GRE_IKEY] && 1161 (ntohl(nla_get_be32(data[IFLA_GRE_IKEY])) & ~ID_MASK)) 1162 return -EINVAL; 1163 1164 if (data[IFLA_GRE_OKEY] && 1165 (ntohl(nla_get_be32(data[IFLA_GRE_OKEY])) & ~ID_MASK)) 1166 return -EINVAL; 1167 1168 return 0; 1169 } 1170 1171 static int ipgre_netlink_parms(struct net_device *dev, 1172 struct nlattr *data[], 1173 struct nlattr *tb[], 1174 struct ip_tunnel_parm *parms, 1175 __u32 *fwmark) 1176 { 1177 struct ip_tunnel *t = netdev_priv(dev); 1178 1179 memset(parms, 0, sizeof(*parms)); 1180 1181 parms->iph.protocol = IPPROTO_GRE; 1182 1183 if (!data) 1184 return 0; 1185 1186 if (data[IFLA_GRE_LINK]) 1187 parms->link = nla_get_u32(data[IFLA_GRE_LINK]); 1188 1189 if (data[IFLA_GRE_IFLAGS]) 1190 parms->i_flags = gre_flags_to_tnl_flags(nla_get_be16(data[IFLA_GRE_IFLAGS])); 1191 1192 if (data[IFLA_GRE_OFLAGS]) 1193 parms->o_flags = gre_flags_to_tnl_flags(nla_get_be16(data[IFLA_GRE_OFLAGS])); 1194 1195 if (data[IFLA_GRE_IKEY]) 1196 parms->i_key = nla_get_be32(data[IFLA_GRE_IKEY]); 1197 1198 if (data[IFLA_GRE_OKEY]) 1199 parms->o_key = nla_get_be32(data[IFLA_GRE_OKEY]); 1200 1201 if (data[IFLA_GRE_LOCAL]) 1202 parms->iph.saddr = nla_get_in_addr(data[IFLA_GRE_LOCAL]); 1203 1204 if (data[IFLA_GRE_REMOTE]) 1205 parms->iph.daddr = nla_get_in_addr(data[IFLA_GRE_REMOTE]); 1206 1207 if (data[IFLA_GRE_TTL]) 1208 parms->iph.ttl = nla_get_u8(data[IFLA_GRE_TTL]); 1209 1210 if (data[IFLA_GRE_TOS]) 1211 parms->iph.tos = nla_get_u8(data[IFLA_GRE_TOS]); 1212 1213 if (!data[IFLA_GRE_PMTUDISC] || nla_get_u8(data[IFLA_GRE_PMTUDISC])) { 1214 if (t->ignore_df) 1215 return -EINVAL; 1216 parms->iph.frag_off = htons(IP_DF); 1217 } 1218 1219 if (data[IFLA_GRE_COLLECT_METADATA]) { 1220 t->collect_md = true; 1221 if (dev->type == ARPHRD_IPGRE) 1222 dev->type = ARPHRD_NONE; 1223 } 1224 1225 if (data[IFLA_GRE_IGNORE_DF]) { 1226 if (nla_get_u8(data[IFLA_GRE_IGNORE_DF]) 1227 && (parms->iph.frag_off & htons(IP_DF))) 1228 return -EINVAL; 1229 t->ignore_df = !!nla_get_u8(data[IFLA_GRE_IGNORE_DF]); 1230 } 1231 1232 if (data[IFLA_GRE_FWMARK]) 1233 *fwmark = nla_get_u32(data[IFLA_GRE_FWMARK]); 1234 1235 if (data[IFLA_GRE_ERSPAN_VER]) { 1236 t->erspan_ver = nla_get_u8(data[IFLA_GRE_ERSPAN_VER]); 1237 1238 if (t->erspan_ver != 1 && t->erspan_ver != 2) 1239 return -EINVAL; 1240 } 1241 1242 if (t->erspan_ver == 1) { 1243 if (data[IFLA_GRE_ERSPAN_INDEX]) { 1244 t->index = nla_get_u32(data[IFLA_GRE_ERSPAN_INDEX]); 1245 if (t->index & ~INDEX_MASK) 1246 return -EINVAL; 1247 } 1248 } else if (t->erspan_ver == 2) { 1249 if (data[IFLA_GRE_ERSPAN_DIR]) { 1250 t->dir = nla_get_u8(data[IFLA_GRE_ERSPAN_DIR]); 1251 if (t->dir & ~(DIR_MASK >> DIR_OFFSET)) 1252 return -EINVAL; 1253 } 1254 if (data[IFLA_GRE_ERSPAN_HWID]) { 1255 t->hwid = nla_get_u16(data[IFLA_GRE_ERSPAN_HWID]); 1256 if (t->hwid & ~(HWID_MASK >> HWID_OFFSET)) 1257 return -EINVAL; 1258 } 1259 } 1260 1261 return 0; 1262 } 1263 1264 /* This function returns true when ENCAP attributes are present in the nl msg */ 1265 static bool ipgre_netlink_encap_parms(struct nlattr *data[], 1266 struct ip_tunnel_encap *ipencap) 1267 { 1268 bool ret = false; 1269 1270 memset(ipencap, 0, sizeof(*ipencap)); 1271 1272 if (!data) 1273 return ret; 1274 1275 if (data[IFLA_GRE_ENCAP_TYPE]) { 1276 ret = true; 1277 ipencap->type = nla_get_u16(data[IFLA_GRE_ENCAP_TYPE]); 1278 } 1279 1280 if (data[IFLA_GRE_ENCAP_FLAGS]) { 1281 ret = true; 1282 ipencap->flags = nla_get_u16(data[IFLA_GRE_ENCAP_FLAGS]); 1283 } 1284 1285 if (data[IFLA_GRE_ENCAP_SPORT]) { 1286 ret = true; 1287 ipencap->sport = nla_get_be16(data[IFLA_GRE_ENCAP_SPORT]); 1288 } 1289 1290 if (data[IFLA_GRE_ENCAP_DPORT]) { 1291 ret = true; 1292 ipencap->dport = nla_get_be16(data[IFLA_GRE_ENCAP_DPORT]); 1293 } 1294 1295 return ret; 1296 } 1297 1298 static int gre_tap_init(struct net_device *dev) 1299 { 1300 __gre_tunnel_init(dev); 1301 dev->priv_flags |= IFF_LIVE_ADDR_CHANGE; 1302 netif_keep_dst(dev); 1303 1304 return ip_tunnel_init(dev); 1305 } 1306 1307 static const struct net_device_ops gre_tap_netdev_ops = { 1308 .ndo_init = gre_tap_init, 1309 .ndo_uninit = ip_tunnel_uninit, 1310 .ndo_start_xmit = gre_tap_xmit, 1311 .ndo_set_mac_address = eth_mac_addr, 1312 .ndo_validate_addr = eth_validate_addr, 1313 .ndo_change_mtu = ip_tunnel_change_mtu, 1314 .ndo_get_stats64 = ip_tunnel_get_stats64, 1315 .ndo_get_iflink = ip_tunnel_get_iflink, 1316 .ndo_fill_metadata_dst = gre_fill_metadata_dst, 1317 }; 1318 1319 static int erspan_tunnel_init(struct net_device *dev) 1320 { 1321 struct ip_tunnel *tunnel = netdev_priv(dev); 1322 1323 tunnel->tun_hlen = 8; 1324 tunnel->parms.iph.protocol = IPPROTO_GRE; 1325 tunnel->hlen = tunnel->tun_hlen + tunnel->encap_hlen + 1326 erspan_hdr_len(tunnel->erspan_ver); 1327 1328 dev->features |= GRE_FEATURES; 1329 dev->hw_features |= GRE_FEATURES; 1330 dev->priv_flags |= IFF_LIVE_ADDR_CHANGE; 1331 netif_keep_dst(dev); 1332 1333 return ip_tunnel_init(dev); 1334 } 1335 1336 static const struct net_device_ops erspan_netdev_ops = { 1337 .ndo_init = erspan_tunnel_init, 1338 .ndo_uninit = ip_tunnel_uninit, 1339 .ndo_start_xmit = erspan_xmit, 1340 .ndo_set_mac_address = eth_mac_addr, 1341 .ndo_validate_addr = eth_validate_addr, 1342 .ndo_change_mtu = ip_tunnel_change_mtu, 1343 .ndo_get_stats64 = ip_tunnel_get_stats64, 1344 .ndo_get_iflink = ip_tunnel_get_iflink, 1345 .ndo_fill_metadata_dst = gre_fill_metadata_dst, 1346 }; 1347 1348 static void ipgre_tap_setup(struct net_device *dev) 1349 { 1350 ether_setup(dev); 1351 dev->max_mtu = 0; 1352 dev->netdev_ops = &gre_tap_netdev_ops; 1353 dev->priv_flags &= ~IFF_TX_SKB_SHARING; 1354 dev->priv_flags |= IFF_LIVE_ADDR_CHANGE; 1355 ip_tunnel_setup(dev, gre_tap_net_id); 1356 } 1357 1358 static int ipgre_newlink(struct net *src_net, struct net_device *dev, 1359 struct nlattr *tb[], struct nlattr *data[], 1360 struct netlink_ext_ack *extack) 1361 { 1362 struct ip_tunnel_parm p; 1363 struct ip_tunnel_encap ipencap; 1364 __u32 fwmark = 0; 1365 int err; 1366 1367 if (ipgre_netlink_encap_parms(data, &ipencap)) { 1368 struct ip_tunnel *t = netdev_priv(dev); 1369 err = ip_tunnel_encap_setup(t, &ipencap); 1370 1371 if (err < 0) 1372 return err; 1373 } 1374 1375 err = ipgre_netlink_parms(dev, data, tb, &p, &fwmark); 1376 if (err < 0) 1377 return err; 1378 return ip_tunnel_newlink(dev, tb, &p, fwmark); 1379 } 1380 1381 static int ipgre_changelink(struct net_device *dev, struct nlattr *tb[], 1382 struct nlattr *data[], 1383 struct netlink_ext_ack *extack) 1384 { 1385 struct ip_tunnel *t = netdev_priv(dev); 1386 struct ip_tunnel_encap ipencap; 1387 __u32 fwmark = t->fwmark; 1388 struct ip_tunnel_parm p; 1389 int err; 1390 1391 if (ipgre_netlink_encap_parms(data, &ipencap)) { 1392 err = ip_tunnel_encap_setup(t, &ipencap); 1393 1394 if (err < 0) 1395 return err; 1396 } 1397 1398 err = ipgre_netlink_parms(dev, data, tb, &p, &fwmark); 1399 if (err < 0) 1400 return err; 1401 1402 err = ip_tunnel_changelink(dev, tb, &p, fwmark); 1403 if (err < 0) 1404 return err; 1405 1406 t->parms.i_flags = p.i_flags; 1407 t->parms.o_flags = p.o_flags; 1408 1409 if (strcmp(dev->rtnl_link_ops->kind, "erspan")) 1410 ipgre_link_update(dev, !tb[IFLA_MTU]); 1411 1412 return 0; 1413 } 1414 1415 static size_t ipgre_get_size(const struct net_device *dev) 1416 { 1417 return 1418 /* IFLA_GRE_LINK */ 1419 nla_total_size(4) + 1420 /* IFLA_GRE_IFLAGS */ 1421 nla_total_size(2) + 1422 /* IFLA_GRE_OFLAGS */ 1423 nla_total_size(2) + 1424 /* IFLA_GRE_IKEY */ 1425 nla_total_size(4) + 1426 /* IFLA_GRE_OKEY */ 1427 nla_total_size(4) + 1428 /* IFLA_GRE_LOCAL */ 1429 nla_total_size(4) + 1430 /* IFLA_GRE_REMOTE */ 1431 nla_total_size(4) + 1432 /* IFLA_GRE_TTL */ 1433 nla_total_size(1) + 1434 /* IFLA_GRE_TOS */ 1435 nla_total_size(1) + 1436 /* IFLA_GRE_PMTUDISC */ 1437 nla_total_size(1) + 1438 /* IFLA_GRE_ENCAP_TYPE */ 1439 nla_total_size(2) + 1440 /* IFLA_GRE_ENCAP_FLAGS */ 1441 nla_total_size(2) + 1442 /* IFLA_GRE_ENCAP_SPORT */ 1443 nla_total_size(2) + 1444 /* IFLA_GRE_ENCAP_DPORT */ 1445 nla_total_size(2) + 1446 /* IFLA_GRE_COLLECT_METADATA */ 1447 nla_total_size(0) + 1448 /* IFLA_GRE_IGNORE_DF */ 1449 nla_total_size(1) + 1450 /* IFLA_GRE_FWMARK */ 1451 nla_total_size(4) + 1452 /* IFLA_GRE_ERSPAN_INDEX */ 1453 nla_total_size(4) + 1454 /* IFLA_GRE_ERSPAN_VER */ 1455 nla_total_size(1) + 1456 /* IFLA_GRE_ERSPAN_DIR */ 1457 nla_total_size(1) + 1458 /* IFLA_GRE_ERSPAN_HWID */ 1459 nla_total_size(2) + 1460 0; 1461 } 1462 1463 static int ipgre_fill_info(struct sk_buff *skb, const struct net_device *dev) 1464 { 1465 struct ip_tunnel *t = netdev_priv(dev); 1466 struct ip_tunnel_parm *p = &t->parms; 1467 1468 if (nla_put_u32(skb, IFLA_GRE_LINK, p->link) || 1469 nla_put_be16(skb, IFLA_GRE_IFLAGS, 1470 gre_tnl_flags_to_gre_flags(p->i_flags)) || 1471 nla_put_be16(skb, IFLA_GRE_OFLAGS, 1472 gre_tnl_flags_to_gre_flags(p->o_flags)) || 1473 nla_put_be32(skb, IFLA_GRE_IKEY, p->i_key) || 1474 nla_put_be32(skb, IFLA_GRE_OKEY, p->o_key) || 1475 nla_put_in_addr(skb, IFLA_GRE_LOCAL, p->iph.saddr) || 1476 nla_put_in_addr(skb, IFLA_GRE_REMOTE, p->iph.daddr) || 1477 nla_put_u8(skb, IFLA_GRE_TTL, p->iph.ttl) || 1478 nla_put_u8(skb, IFLA_GRE_TOS, p->iph.tos) || 1479 nla_put_u8(skb, IFLA_GRE_PMTUDISC, 1480 !!(p->iph.frag_off & htons(IP_DF))) || 1481 nla_put_u32(skb, IFLA_GRE_FWMARK, t->fwmark)) 1482 goto nla_put_failure; 1483 1484 if (nla_put_u16(skb, IFLA_GRE_ENCAP_TYPE, 1485 t->encap.type) || 1486 nla_put_be16(skb, IFLA_GRE_ENCAP_SPORT, 1487 t->encap.sport) || 1488 nla_put_be16(skb, IFLA_GRE_ENCAP_DPORT, 1489 t->encap.dport) || 1490 nla_put_u16(skb, IFLA_GRE_ENCAP_FLAGS, 1491 t->encap.flags)) 1492 goto nla_put_failure; 1493 1494 if (nla_put_u8(skb, IFLA_GRE_IGNORE_DF, t->ignore_df)) 1495 goto nla_put_failure; 1496 1497 if (t->collect_md) { 1498 if (nla_put_flag(skb, IFLA_GRE_COLLECT_METADATA)) 1499 goto nla_put_failure; 1500 } 1501 1502 if (nla_put_u8(skb, IFLA_GRE_ERSPAN_VER, t->erspan_ver)) 1503 goto nla_put_failure; 1504 1505 if (t->erspan_ver == 1) { 1506 if (nla_put_u32(skb, IFLA_GRE_ERSPAN_INDEX, t->index)) 1507 goto nla_put_failure; 1508 } else if (t->erspan_ver == 2) { 1509 if (nla_put_u8(skb, IFLA_GRE_ERSPAN_DIR, t->dir)) 1510 goto nla_put_failure; 1511 if (nla_put_u16(skb, IFLA_GRE_ERSPAN_HWID, t->hwid)) 1512 goto nla_put_failure; 1513 } 1514 1515 return 0; 1516 1517 nla_put_failure: 1518 return -EMSGSIZE; 1519 } 1520 1521 static void erspan_setup(struct net_device *dev) 1522 { 1523 struct ip_tunnel *t = netdev_priv(dev); 1524 1525 ether_setup(dev); 1526 dev->netdev_ops = &erspan_netdev_ops; 1527 dev->priv_flags &= ~IFF_TX_SKB_SHARING; 1528 dev->priv_flags |= IFF_LIVE_ADDR_CHANGE; 1529 ip_tunnel_setup(dev, erspan_net_id); 1530 t->erspan_ver = 1; 1531 } 1532 1533 static const struct nla_policy ipgre_policy[IFLA_GRE_MAX + 1] = { 1534 [IFLA_GRE_LINK] = { .type = NLA_U32 }, 1535 [IFLA_GRE_IFLAGS] = { .type = NLA_U16 }, 1536 [IFLA_GRE_OFLAGS] = { .type = NLA_U16 }, 1537 [IFLA_GRE_IKEY] = { .type = NLA_U32 }, 1538 [IFLA_GRE_OKEY] = { .type = NLA_U32 }, 1539 [IFLA_GRE_LOCAL] = { .len = FIELD_SIZEOF(struct iphdr, saddr) }, 1540 [IFLA_GRE_REMOTE] = { .len = FIELD_SIZEOF(struct iphdr, daddr) }, 1541 [IFLA_GRE_TTL] = { .type = NLA_U8 }, 1542 [IFLA_GRE_TOS] = { .type = NLA_U8 }, 1543 [IFLA_GRE_PMTUDISC] = { .type = NLA_U8 }, 1544 [IFLA_GRE_ENCAP_TYPE] = { .type = NLA_U16 }, 1545 [IFLA_GRE_ENCAP_FLAGS] = { .type = NLA_U16 }, 1546 [IFLA_GRE_ENCAP_SPORT] = { .type = NLA_U16 }, 1547 [IFLA_GRE_ENCAP_DPORT] = { .type = NLA_U16 }, 1548 [IFLA_GRE_COLLECT_METADATA] = { .type = NLA_FLAG }, 1549 [IFLA_GRE_IGNORE_DF] = { .type = NLA_U8 }, 1550 [IFLA_GRE_FWMARK] = { .type = NLA_U32 }, 1551 [IFLA_GRE_ERSPAN_INDEX] = { .type = NLA_U32 }, 1552 [IFLA_GRE_ERSPAN_VER] = { .type = NLA_U8 }, 1553 [IFLA_GRE_ERSPAN_DIR] = { .type = NLA_U8 }, 1554 [IFLA_GRE_ERSPAN_HWID] = { .type = NLA_U16 }, 1555 }; 1556 1557 static struct rtnl_link_ops ipgre_link_ops __read_mostly = { 1558 .kind = "gre", 1559 .maxtype = IFLA_GRE_MAX, 1560 .policy = ipgre_policy, 1561 .priv_size = sizeof(struct ip_tunnel), 1562 .setup = ipgre_tunnel_setup, 1563 .validate = ipgre_tunnel_validate, 1564 .newlink = ipgre_newlink, 1565 .changelink = ipgre_changelink, 1566 .dellink = ip_tunnel_dellink, 1567 .get_size = ipgre_get_size, 1568 .fill_info = ipgre_fill_info, 1569 .get_link_net = ip_tunnel_get_link_net, 1570 }; 1571 1572 static struct rtnl_link_ops ipgre_tap_ops __read_mostly = { 1573 .kind = "gretap", 1574 .maxtype = IFLA_GRE_MAX, 1575 .policy = ipgre_policy, 1576 .priv_size = sizeof(struct ip_tunnel), 1577 .setup = ipgre_tap_setup, 1578 .validate = ipgre_tap_validate, 1579 .newlink = ipgre_newlink, 1580 .changelink = ipgre_changelink, 1581 .dellink = ip_tunnel_dellink, 1582 .get_size = ipgre_get_size, 1583 .fill_info = ipgre_fill_info, 1584 .get_link_net = ip_tunnel_get_link_net, 1585 }; 1586 1587 static struct rtnl_link_ops erspan_link_ops __read_mostly = { 1588 .kind = "erspan", 1589 .maxtype = IFLA_GRE_MAX, 1590 .policy = ipgre_policy, 1591 .priv_size = sizeof(struct ip_tunnel), 1592 .setup = erspan_setup, 1593 .validate = erspan_validate, 1594 .newlink = ipgre_newlink, 1595 .changelink = ipgre_changelink, 1596 .dellink = ip_tunnel_dellink, 1597 .get_size = ipgre_get_size, 1598 .fill_info = ipgre_fill_info, 1599 .get_link_net = ip_tunnel_get_link_net, 1600 }; 1601 1602 struct net_device *gretap_fb_dev_create(struct net *net, const char *name, 1603 u8 name_assign_type) 1604 { 1605 struct nlattr *tb[IFLA_MAX + 1]; 1606 struct net_device *dev; 1607 LIST_HEAD(list_kill); 1608 struct ip_tunnel *t; 1609 int err; 1610 1611 memset(&tb, 0, sizeof(tb)); 1612 1613 dev = rtnl_create_link(net, name, name_assign_type, 1614 &ipgre_tap_ops, tb, NULL); 1615 if (IS_ERR(dev)) 1616 return dev; 1617 1618 /* Configure flow based GRE device. */ 1619 t = netdev_priv(dev); 1620 t->collect_md = true; 1621 1622 err = ipgre_newlink(net, dev, tb, NULL, NULL); 1623 if (err < 0) { 1624 free_netdev(dev); 1625 return ERR_PTR(err); 1626 } 1627 1628 /* openvswitch users expect packet sizes to be unrestricted, 1629 * so set the largest MTU we can. 1630 */ 1631 err = __ip_tunnel_change_mtu(dev, IP_MAX_MTU, false); 1632 if (err) 1633 goto out; 1634 1635 err = rtnl_configure_link(dev, NULL); 1636 if (err < 0) 1637 goto out; 1638 1639 return dev; 1640 out: 1641 ip_tunnel_dellink(dev, &list_kill); 1642 unregister_netdevice_many(&list_kill); 1643 return ERR_PTR(err); 1644 } 1645 EXPORT_SYMBOL_GPL(gretap_fb_dev_create); 1646 1647 static int __net_init ipgre_tap_init_net(struct net *net) 1648 { 1649 return ip_tunnel_init_net(net, gre_tap_net_id, &ipgre_tap_ops, "gretap0"); 1650 } 1651 1652 static void __net_exit ipgre_tap_exit_batch_net(struct list_head *list_net) 1653 { 1654 ip_tunnel_delete_nets(list_net, gre_tap_net_id, &ipgre_tap_ops); 1655 } 1656 1657 static struct pernet_operations ipgre_tap_net_ops = { 1658 .init = ipgre_tap_init_net, 1659 .exit_batch = ipgre_tap_exit_batch_net, 1660 .id = &gre_tap_net_id, 1661 .size = sizeof(struct ip_tunnel_net), 1662 }; 1663 1664 static int __net_init erspan_init_net(struct net *net) 1665 { 1666 return ip_tunnel_init_net(net, erspan_net_id, 1667 &erspan_link_ops, "erspan0"); 1668 } 1669 1670 static void __net_exit erspan_exit_batch_net(struct list_head *net_list) 1671 { 1672 ip_tunnel_delete_nets(net_list, erspan_net_id, &erspan_link_ops); 1673 } 1674 1675 static struct pernet_operations erspan_net_ops = { 1676 .init = erspan_init_net, 1677 .exit_batch = erspan_exit_batch_net, 1678 .id = &erspan_net_id, 1679 .size = sizeof(struct ip_tunnel_net), 1680 }; 1681 1682 static int __init ipgre_init(void) 1683 { 1684 int err; 1685 1686 pr_info("GRE over IPv4 tunneling driver\n"); 1687 1688 err = register_pernet_device(&ipgre_net_ops); 1689 if (err < 0) 1690 return err; 1691 1692 err = register_pernet_device(&ipgre_tap_net_ops); 1693 if (err < 0) 1694 goto pnet_tap_failed; 1695 1696 err = register_pernet_device(&erspan_net_ops); 1697 if (err < 0) 1698 goto pnet_erspan_failed; 1699 1700 err = gre_add_protocol(&ipgre_protocol, GREPROTO_CISCO); 1701 if (err < 0) { 1702 pr_info("%s: can't add protocol\n", __func__); 1703 goto add_proto_failed; 1704 } 1705 1706 err = rtnl_link_register(&ipgre_link_ops); 1707 if (err < 0) 1708 goto rtnl_link_failed; 1709 1710 err = rtnl_link_register(&ipgre_tap_ops); 1711 if (err < 0) 1712 goto tap_ops_failed; 1713 1714 err = rtnl_link_register(&erspan_link_ops); 1715 if (err < 0) 1716 goto erspan_link_failed; 1717 1718 return 0; 1719 1720 erspan_link_failed: 1721 rtnl_link_unregister(&ipgre_tap_ops); 1722 tap_ops_failed: 1723 rtnl_link_unregister(&ipgre_link_ops); 1724 rtnl_link_failed: 1725 gre_del_protocol(&ipgre_protocol, GREPROTO_CISCO); 1726 add_proto_failed: 1727 unregister_pernet_device(&erspan_net_ops); 1728 pnet_erspan_failed: 1729 unregister_pernet_device(&ipgre_tap_net_ops); 1730 pnet_tap_failed: 1731 unregister_pernet_device(&ipgre_net_ops); 1732 return err; 1733 } 1734 1735 static void __exit ipgre_fini(void) 1736 { 1737 rtnl_link_unregister(&ipgre_tap_ops); 1738 rtnl_link_unregister(&ipgre_link_ops); 1739 rtnl_link_unregister(&erspan_link_ops); 1740 gre_del_protocol(&ipgre_protocol, GREPROTO_CISCO); 1741 unregister_pernet_device(&ipgre_tap_net_ops); 1742 unregister_pernet_device(&ipgre_net_ops); 1743 unregister_pernet_device(&erspan_net_ops); 1744 } 1745 1746 module_init(ipgre_init); 1747 module_exit(ipgre_fini); 1748 MODULE_LICENSE("GPL"); 1749 MODULE_ALIAS_RTNL_LINK("gre"); 1750 MODULE_ALIAS_RTNL_LINK("gretap"); 1751 MODULE_ALIAS_RTNL_LINK("erspan"); 1752 MODULE_ALIAS_NETDEV("gre0"); 1753 MODULE_ALIAS_NETDEV("gretap0"); 1754 MODULE_ALIAS_NETDEV("erspan0"); 1755