1 /* 2 * Linux NET3: GRE over IP protocol decoder. 3 * 4 * Authors: Alexey Kuznetsov (kuznet@ms2.inr.ac.ru) 5 * 6 * This program is free software; you can redistribute it and/or 7 * modify it under the terms of the GNU General Public License 8 * as published by the Free Software Foundation; either version 9 * 2 of the License, or (at your option) any later version. 10 * 11 */ 12 13 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 14 15 #include <linux/capability.h> 16 #include <linux/module.h> 17 #include <linux/types.h> 18 #include <linux/kernel.h> 19 #include <linux/slab.h> 20 #include <linux/uaccess.h> 21 #include <linux/skbuff.h> 22 #include <linux/netdevice.h> 23 #include <linux/in.h> 24 #include <linux/tcp.h> 25 #include <linux/udp.h> 26 #include <linux/if_arp.h> 27 #include <linux/if_vlan.h> 28 #include <linux/init.h> 29 #include <linux/in6.h> 30 #include <linux/inetdevice.h> 31 #include <linux/igmp.h> 32 #include <linux/netfilter_ipv4.h> 33 #include <linux/etherdevice.h> 34 #include <linux/if_ether.h> 35 36 #include <net/sock.h> 37 #include <net/ip.h> 38 #include <net/icmp.h> 39 #include <net/protocol.h> 40 #include <net/ip_tunnels.h> 41 #include <net/arp.h> 42 #include <net/checksum.h> 43 #include <net/dsfield.h> 44 #include <net/inet_ecn.h> 45 #include <net/xfrm.h> 46 #include <net/net_namespace.h> 47 #include <net/netns/generic.h> 48 #include <net/rtnetlink.h> 49 #include <net/gre.h> 50 #include <net/dst_metadata.h> 51 #include <net/erspan.h> 52 53 /* 54 Problems & solutions 55 -------------------- 56 57 1. The most important issue is detecting local dead loops. 58 They would cause complete host lockup in transmit, which 59 would be "resolved" by stack overflow or, if queueing is enabled, 60 with infinite looping in net_bh. 61 62 We cannot track such dead loops during route installation, 63 it is infeasible task. The most general solutions would be 64 to keep skb->encapsulation counter (sort of local ttl), 65 and silently drop packet when it expires. It is a good 66 solution, but it supposes maintaining new variable in ALL 67 skb, even if no tunneling is used. 68 69 Current solution: xmit_recursion breaks dead loops. This is a percpu 70 counter, since when we enter the first ndo_xmit(), cpu migration is 71 forbidden. We force an exit if this counter reaches RECURSION_LIMIT 72 73 2. Networking dead loops would not kill routers, but would really 74 kill network. IP hop limit plays role of "t->recursion" in this case, 75 if we copy it from packet being encapsulated to upper header. 76 It is very good solution, but it introduces two problems: 77 78 - Routing protocols, using packets with ttl=1 (OSPF, RIP2), 79 do not work over tunnels. 80 - traceroute does not work. I planned to relay ICMP from tunnel, 81 so that this problem would be solved and traceroute output 82 would even more informative. This idea appeared to be wrong: 83 only Linux complies to rfc1812 now (yes, guys, Linux is the only 84 true router now :-)), all routers (at least, in neighbourhood of mine) 85 return only 8 bytes of payload. It is the end. 86 87 Hence, if we want that OSPF worked or traceroute said something reasonable, 88 we should search for another solution. 89 90 One of them is to parse packet trying to detect inner encapsulation 91 made by our node. It is difficult or even impossible, especially, 92 taking into account fragmentation. TO be short, ttl is not solution at all. 93 94 Current solution: The solution was UNEXPECTEDLY SIMPLE. 95 We force DF flag on tunnels with preconfigured hop limit, 96 that is ALL. :-) Well, it does not remove the problem completely, 97 but exponential growth of network traffic is changed to linear 98 (branches, that exceed pmtu are pruned) and tunnel mtu 99 rapidly degrades to value <68, where looping stops. 100 Yes, it is not good if there exists a router in the loop, 101 which does not force DF, even when encapsulating packets have DF set. 102 But it is not our problem! Nobody could accuse us, we made 103 all that we could make. Even if it is your gated who injected 104 fatal route to network, even if it were you who configured 105 fatal static route: you are innocent. :-) 106 107 Alexey Kuznetsov. 108 */ 109 110 static bool log_ecn_error = true; 111 module_param(log_ecn_error, bool, 0644); 112 MODULE_PARM_DESC(log_ecn_error, "Log packets received with corrupted ECN"); 113 114 static struct rtnl_link_ops ipgre_link_ops __read_mostly; 115 static int ipgre_tunnel_init(struct net_device *dev); 116 static void erspan_build_header(struct sk_buff *skb, 117 u32 id, u32 index, 118 bool truncate, bool is_ipv4); 119 120 static unsigned int ipgre_net_id __read_mostly; 121 static unsigned int gre_tap_net_id __read_mostly; 122 static unsigned int erspan_net_id __read_mostly; 123 124 static int ipgre_err(struct sk_buff *skb, u32 info, 125 const struct tnl_ptk_info *tpi) 126 { 127 128 /* All the routers (except for Linux) return only 129 8 bytes of packet payload. It means, that precise relaying of 130 ICMP in the real Internet is absolutely infeasible. 131 132 Moreover, Cisco "wise men" put GRE key to the third word 133 in GRE header. It makes impossible maintaining even soft 134 state for keyed GRE tunnels with enabled checksum. Tell 135 them "thank you". 136 137 Well, I wonder, rfc1812 was written by Cisco employee, 138 what the hell these idiots break standards established 139 by themselves??? 140 */ 141 struct net *net = dev_net(skb->dev); 142 struct ip_tunnel_net *itn; 143 const struct iphdr *iph; 144 const int type = icmp_hdr(skb)->type; 145 const int code = icmp_hdr(skb)->code; 146 unsigned int data_len = 0; 147 struct ip_tunnel *t; 148 149 if (tpi->proto == htons(ETH_P_TEB)) 150 itn = net_generic(net, gre_tap_net_id); 151 else if (tpi->proto == htons(ETH_P_ERSPAN) || 152 tpi->proto == htons(ETH_P_ERSPAN2)) 153 itn = net_generic(net, erspan_net_id); 154 else 155 itn = net_generic(net, ipgre_net_id); 156 157 iph = (const struct iphdr *)(icmp_hdr(skb) + 1); 158 t = ip_tunnel_lookup(itn, skb->dev->ifindex, tpi->flags, 159 iph->daddr, iph->saddr, tpi->key); 160 161 if (!t) 162 return -ENOENT; 163 164 switch (type) { 165 default: 166 case ICMP_PARAMETERPROB: 167 return 0; 168 169 case ICMP_DEST_UNREACH: 170 switch (code) { 171 case ICMP_SR_FAILED: 172 case ICMP_PORT_UNREACH: 173 /* Impossible event. */ 174 return 0; 175 default: 176 /* All others are translated to HOST_UNREACH. 177 rfc2003 contains "deep thoughts" about NET_UNREACH, 178 I believe they are just ether pollution. --ANK 179 */ 180 break; 181 } 182 break; 183 184 case ICMP_TIME_EXCEEDED: 185 if (code != ICMP_EXC_TTL) 186 return 0; 187 data_len = icmp_hdr(skb)->un.reserved[1] * 4; /* RFC 4884 4.1 */ 188 break; 189 190 case ICMP_REDIRECT: 191 break; 192 } 193 194 #if IS_ENABLED(CONFIG_IPV6) 195 if (tpi->proto == htons(ETH_P_IPV6) && 196 !ip6_err_gen_icmpv6_unreach(skb, iph->ihl * 4 + tpi->hdr_len, 197 type, data_len)) 198 return 0; 199 #endif 200 201 if (t->parms.iph.daddr == 0 || 202 ipv4_is_multicast(t->parms.iph.daddr)) 203 return 0; 204 205 if (t->parms.iph.ttl == 0 && type == ICMP_TIME_EXCEEDED) 206 return 0; 207 208 if (time_before(jiffies, t->err_time + IPTUNNEL_ERR_TIMEO)) 209 t->err_count++; 210 else 211 t->err_count = 1; 212 t->err_time = jiffies; 213 214 return 0; 215 } 216 217 static void gre_err(struct sk_buff *skb, u32 info) 218 { 219 /* All the routers (except for Linux) return only 220 * 8 bytes of packet payload. It means, that precise relaying of 221 * ICMP in the real Internet is absolutely infeasible. 222 * 223 * Moreover, Cisco "wise men" put GRE key to the third word 224 * in GRE header. It makes impossible maintaining even soft 225 * state for keyed 226 * GRE tunnels with enabled checksum. Tell them "thank you". 227 * 228 * Well, I wonder, rfc1812 was written by Cisco employee, 229 * what the hell these idiots break standards established 230 * by themselves??? 231 */ 232 233 const struct iphdr *iph = (struct iphdr *)skb->data; 234 const int type = icmp_hdr(skb)->type; 235 const int code = icmp_hdr(skb)->code; 236 struct tnl_ptk_info tpi; 237 238 if (gre_parse_header(skb, &tpi, NULL, htons(ETH_P_IP), 239 iph->ihl * 4) < 0) 240 return; 241 242 if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED) { 243 ipv4_update_pmtu(skb, dev_net(skb->dev), info, 244 skb->dev->ifindex, IPPROTO_GRE); 245 return; 246 } 247 if (type == ICMP_REDIRECT) { 248 ipv4_redirect(skb, dev_net(skb->dev), skb->dev->ifindex, 249 IPPROTO_GRE); 250 return; 251 } 252 253 ipgre_err(skb, info, &tpi); 254 } 255 256 static int erspan_rcv(struct sk_buff *skb, struct tnl_ptk_info *tpi, 257 int gre_hdr_len) 258 { 259 struct net *net = dev_net(skb->dev); 260 struct metadata_dst *tun_dst = NULL; 261 struct erspan_base_hdr *ershdr; 262 struct erspan_metadata *pkt_md; 263 struct ip_tunnel_net *itn; 264 struct ip_tunnel *tunnel; 265 const struct iphdr *iph; 266 struct erspan_md2 *md2; 267 int ver; 268 int len; 269 270 itn = net_generic(net, erspan_net_id); 271 272 iph = ip_hdr(skb); 273 ershdr = (struct erspan_base_hdr *)(skb->data + gre_hdr_len); 274 ver = ershdr->ver; 275 276 tunnel = ip_tunnel_lookup(itn, skb->dev->ifindex, 277 tpi->flags | TUNNEL_KEY, 278 iph->saddr, iph->daddr, tpi->key); 279 280 if (tunnel) { 281 len = gre_hdr_len + erspan_hdr_len(ver); 282 if (unlikely(!pskb_may_pull(skb, len))) 283 return PACKET_REJECT; 284 285 ershdr = (struct erspan_base_hdr *)(skb->data + gre_hdr_len); 286 pkt_md = (struct erspan_metadata *)(ershdr + 1); 287 288 if (__iptunnel_pull_header(skb, 289 len, 290 htons(ETH_P_TEB), 291 false, false) < 0) 292 goto drop; 293 294 if (tunnel->collect_md) { 295 struct ip_tunnel_info *info; 296 struct erspan_metadata *md; 297 __be64 tun_id; 298 __be16 flags; 299 300 tpi->flags |= TUNNEL_KEY; 301 flags = tpi->flags; 302 tun_id = key32_to_tunnel_id(tpi->key); 303 304 tun_dst = ip_tun_rx_dst(skb, flags, 305 tun_id, sizeof(*md)); 306 if (!tun_dst) 307 return PACKET_REJECT; 308 309 md = ip_tunnel_info_opts(&tun_dst->u.tun_info); 310 md->version = ver; 311 md2 = &md->u.md2; 312 memcpy(md2, pkt_md, ver == 1 ? ERSPAN_V1_MDSIZE : 313 ERSPAN_V2_MDSIZE); 314 315 info = &tun_dst->u.tun_info; 316 info->key.tun_flags |= TUNNEL_ERSPAN_OPT; 317 info->options_len = sizeof(*md); 318 } 319 320 skb_reset_mac_header(skb); 321 ip_tunnel_rcv(tunnel, skb, tpi, tun_dst, log_ecn_error); 322 return PACKET_RCVD; 323 } 324 return PACKET_REJECT; 325 326 drop: 327 kfree_skb(skb); 328 return PACKET_RCVD; 329 } 330 331 static int __ipgre_rcv(struct sk_buff *skb, const struct tnl_ptk_info *tpi, 332 struct ip_tunnel_net *itn, int hdr_len, bool raw_proto) 333 { 334 struct metadata_dst *tun_dst = NULL; 335 const struct iphdr *iph; 336 struct ip_tunnel *tunnel; 337 338 iph = ip_hdr(skb); 339 tunnel = ip_tunnel_lookup(itn, skb->dev->ifindex, tpi->flags, 340 iph->saddr, iph->daddr, tpi->key); 341 342 if (tunnel) { 343 if (__iptunnel_pull_header(skb, hdr_len, tpi->proto, 344 raw_proto, false) < 0) 345 goto drop; 346 347 if (tunnel->dev->type != ARPHRD_NONE) 348 skb_pop_mac_header(skb); 349 else 350 skb_reset_mac_header(skb); 351 if (tunnel->collect_md) { 352 __be16 flags; 353 __be64 tun_id; 354 355 flags = tpi->flags & (TUNNEL_CSUM | TUNNEL_KEY); 356 tun_id = key32_to_tunnel_id(tpi->key); 357 tun_dst = ip_tun_rx_dst(skb, flags, tun_id, 0); 358 if (!tun_dst) 359 return PACKET_REJECT; 360 } 361 362 ip_tunnel_rcv(tunnel, skb, tpi, tun_dst, log_ecn_error); 363 return PACKET_RCVD; 364 } 365 return PACKET_NEXT; 366 367 drop: 368 kfree_skb(skb); 369 return PACKET_RCVD; 370 } 371 372 static int ipgre_rcv(struct sk_buff *skb, const struct tnl_ptk_info *tpi, 373 int hdr_len) 374 { 375 struct net *net = dev_net(skb->dev); 376 struct ip_tunnel_net *itn; 377 int res; 378 379 if (tpi->proto == htons(ETH_P_TEB)) 380 itn = net_generic(net, gre_tap_net_id); 381 else 382 itn = net_generic(net, ipgre_net_id); 383 384 res = __ipgre_rcv(skb, tpi, itn, hdr_len, false); 385 if (res == PACKET_NEXT && tpi->proto == htons(ETH_P_TEB)) { 386 /* ipgre tunnels in collect metadata mode should receive 387 * also ETH_P_TEB traffic. 388 */ 389 itn = net_generic(net, ipgre_net_id); 390 res = __ipgre_rcv(skb, tpi, itn, hdr_len, true); 391 } 392 return res; 393 } 394 395 static int gre_rcv(struct sk_buff *skb) 396 { 397 struct tnl_ptk_info tpi; 398 bool csum_err = false; 399 int hdr_len; 400 401 #ifdef CONFIG_NET_IPGRE_BROADCAST 402 if (ipv4_is_multicast(ip_hdr(skb)->daddr)) { 403 /* Looped back packet, drop it! */ 404 if (rt_is_output_route(skb_rtable(skb))) 405 goto drop; 406 } 407 #endif 408 409 hdr_len = gre_parse_header(skb, &tpi, &csum_err, htons(ETH_P_IP), 0); 410 if (hdr_len < 0) 411 goto drop; 412 413 if (unlikely(tpi.proto == htons(ETH_P_ERSPAN) || 414 tpi.proto == htons(ETH_P_ERSPAN2))) { 415 if (erspan_rcv(skb, &tpi, hdr_len) == PACKET_RCVD) 416 return 0; 417 goto out; 418 } 419 420 if (ipgre_rcv(skb, &tpi, hdr_len) == PACKET_RCVD) 421 return 0; 422 423 out: 424 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0); 425 drop: 426 kfree_skb(skb); 427 return 0; 428 } 429 430 static void __gre_xmit(struct sk_buff *skb, struct net_device *dev, 431 const struct iphdr *tnl_params, 432 __be16 proto) 433 { 434 struct ip_tunnel *tunnel = netdev_priv(dev); 435 436 if (tunnel->parms.o_flags & TUNNEL_SEQ) 437 tunnel->o_seqno++; 438 439 /* Push GRE header. */ 440 gre_build_header(skb, tunnel->tun_hlen, 441 tunnel->parms.o_flags, proto, tunnel->parms.o_key, 442 htonl(tunnel->o_seqno)); 443 444 ip_tunnel_xmit(skb, dev, tnl_params, tnl_params->protocol); 445 } 446 447 static int gre_handle_offloads(struct sk_buff *skb, bool csum) 448 { 449 return iptunnel_handle_offloads(skb, csum ? SKB_GSO_GRE_CSUM : SKB_GSO_GRE); 450 } 451 452 static struct rtable *gre_get_rt(struct sk_buff *skb, 453 struct net_device *dev, 454 struct flowi4 *fl, 455 const struct ip_tunnel_key *key) 456 { 457 struct net *net = dev_net(dev); 458 459 memset(fl, 0, sizeof(*fl)); 460 fl->daddr = key->u.ipv4.dst; 461 fl->saddr = key->u.ipv4.src; 462 fl->flowi4_tos = RT_TOS(key->tos); 463 fl->flowi4_mark = skb->mark; 464 fl->flowi4_proto = IPPROTO_GRE; 465 466 return ip_route_output_key(net, fl); 467 } 468 469 static struct rtable *prepare_fb_xmit(struct sk_buff *skb, 470 struct net_device *dev, 471 struct flowi4 *fl, 472 int tunnel_hlen) 473 { 474 struct ip_tunnel_info *tun_info; 475 const struct ip_tunnel_key *key; 476 struct rtable *rt = NULL; 477 int min_headroom; 478 bool use_cache; 479 int err; 480 481 tun_info = skb_tunnel_info(skb); 482 key = &tun_info->key; 483 use_cache = ip_tunnel_dst_cache_usable(skb, tun_info); 484 485 if (use_cache) 486 rt = dst_cache_get_ip4(&tun_info->dst_cache, &fl->saddr); 487 if (!rt) { 488 rt = gre_get_rt(skb, dev, fl, key); 489 if (IS_ERR(rt)) 490 goto err_free_skb; 491 if (use_cache) 492 dst_cache_set_ip4(&tun_info->dst_cache, &rt->dst, 493 fl->saddr); 494 } 495 496 min_headroom = LL_RESERVED_SPACE(rt->dst.dev) + rt->dst.header_len 497 + tunnel_hlen + sizeof(struct iphdr); 498 if (skb_headroom(skb) < min_headroom || skb_header_cloned(skb)) { 499 int head_delta = SKB_DATA_ALIGN(min_headroom - 500 skb_headroom(skb) + 501 16); 502 err = pskb_expand_head(skb, max_t(int, head_delta, 0), 503 0, GFP_ATOMIC); 504 if (unlikely(err)) 505 goto err_free_rt; 506 } 507 return rt; 508 509 err_free_rt: 510 ip_rt_put(rt); 511 err_free_skb: 512 kfree_skb(skb); 513 dev->stats.tx_dropped++; 514 return NULL; 515 } 516 517 static void gre_fb_xmit(struct sk_buff *skb, struct net_device *dev, 518 __be16 proto) 519 { 520 struct ip_tunnel *tunnel = netdev_priv(dev); 521 struct ip_tunnel_info *tun_info; 522 const struct ip_tunnel_key *key; 523 struct rtable *rt = NULL; 524 struct flowi4 fl; 525 int tunnel_hlen; 526 __be16 df, flags; 527 528 tun_info = skb_tunnel_info(skb); 529 if (unlikely(!tun_info || !(tun_info->mode & IP_TUNNEL_INFO_TX) || 530 ip_tunnel_info_af(tun_info) != AF_INET)) 531 goto err_free_skb; 532 533 key = &tun_info->key; 534 tunnel_hlen = gre_calc_hlen(key->tun_flags); 535 536 rt = prepare_fb_xmit(skb, dev, &fl, tunnel_hlen); 537 if (!rt) 538 return; 539 540 /* Push Tunnel header. */ 541 if (gre_handle_offloads(skb, !!(tun_info->key.tun_flags & TUNNEL_CSUM))) 542 goto err_free_rt; 543 544 flags = tun_info->key.tun_flags & 545 (TUNNEL_CSUM | TUNNEL_KEY | TUNNEL_SEQ); 546 gre_build_header(skb, tunnel_hlen, flags, proto, 547 tunnel_id_to_key32(tun_info->key.tun_id), 548 (flags & TUNNEL_SEQ) ? htonl(tunnel->o_seqno++) : 0); 549 550 df = key->tun_flags & TUNNEL_DONT_FRAGMENT ? htons(IP_DF) : 0; 551 552 iptunnel_xmit(skb->sk, rt, skb, fl.saddr, key->u.ipv4.dst, IPPROTO_GRE, 553 key->tos, key->ttl, df, false); 554 return; 555 556 err_free_rt: 557 ip_rt_put(rt); 558 err_free_skb: 559 kfree_skb(skb); 560 dev->stats.tx_dropped++; 561 } 562 563 static void erspan_fb_xmit(struct sk_buff *skb, struct net_device *dev) 564 { 565 struct ip_tunnel *tunnel = netdev_priv(dev); 566 struct ip_tunnel_info *tun_info; 567 const struct ip_tunnel_key *key; 568 struct erspan_metadata *md; 569 struct rtable *rt = NULL; 570 bool truncate = false; 571 __be16 df, proto; 572 struct flowi4 fl; 573 int tunnel_hlen; 574 int version; 575 int nhoff; 576 int thoff; 577 578 tun_info = skb_tunnel_info(skb); 579 if (unlikely(!tun_info || !(tun_info->mode & IP_TUNNEL_INFO_TX) || 580 ip_tunnel_info_af(tun_info) != AF_INET)) 581 goto err_free_skb; 582 583 key = &tun_info->key; 584 if (!(tun_info->key.tun_flags & TUNNEL_ERSPAN_OPT)) 585 goto err_free_rt; 586 md = ip_tunnel_info_opts(tun_info); 587 if (!md) 588 goto err_free_rt; 589 590 /* ERSPAN has fixed 8 byte GRE header */ 591 version = md->version; 592 tunnel_hlen = 8 + erspan_hdr_len(version); 593 594 rt = prepare_fb_xmit(skb, dev, &fl, tunnel_hlen); 595 if (!rt) 596 return; 597 598 if (gre_handle_offloads(skb, false)) 599 goto err_free_rt; 600 601 if (skb->len > dev->mtu + dev->hard_header_len) { 602 pskb_trim(skb, dev->mtu + dev->hard_header_len); 603 truncate = true; 604 } 605 606 nhoff = skb_network_header(skb) - skb_mac_header(skb); 607 if (skb->protocol == htons(ETH_P_IP) && 608 (ntohs(ip_hdr(skb)->tot_len) > skb->len - nhoff)) 609 truncate = true; 610 611 thoff = skb_transport_header(skb) - skb_mac_header(skb); 612 if (skb->protocol == htons(ETH_P_IPV6) && 613 (ntohs(ipv6_hdr(skb)->payload_len) > skb->len - thoff)) 614 truncate = true; 615 616 if (version == 1) { 617 erspan_build_header(skb, ntohl(tunnel_id_to_key32(key->tun_id)), 618 ntohl(md->u.index), truncate, true); 619 proto = htons(ETH_P_ERSPAN); 620 } else if (version == 2) { 621 erspan_build_header_v2(skb, 622 ntohl(tunnel_id_to_key32(key->tun_id)), 623 md->u.md2.dir, 624 get_hwid(&md->u.md2), 625 truncate, true); 626 proto = htons(ETH_P_ERSPAN2); 627 } else { 628 goto err_free_rt; 629 } 630 631 gre_build_header(skb, 8, TUNNEL_SEQ, 632 proto, 0, htonl(tunnel->o_seqno++)); 633 634 df = key->tun_flags & TUNNEL_DONT_FRAGMENT ? htons(IP_DF) : 0; 635 636 iptunnel_xmit(skb->sk, rt, skb, fl.saddr, key->u.ipv4.dst, IPPROTO_GRE, 637 key->tos, key->ttl, df, false); 638 return; 639 640 err_free_rt: 641 ip_rt_put(rt); 642 err_free_skb: 643 kfree_skb(skb); 644 dev->stats.tx_dropped++; 645 } 646 647 static int gre_fill_metadata_dst(struct net_device *dev, struct sk_buff *skb) 648 { 649 struct ip_tunnel_info *info = skb_tunnel_info(skb); 650 struct rtable *rt; 651 struct flowi4 fl4; 652 653 if (ip_tunnel_info_af(info) != AF_INET) 654 return -EINVAL; 655 656 rt = gre_get_rt(skb, dev, &fl4, &info->key); 657 if (IS_ERR(rt)) 658 return PTR_ERR(rt); 659 660 ip_rt_put(rt); 661 info->key.u.ipv4.src = fl4.saddr; 662 return 0; 663 } 664 665 static netdev_tx_t ipgre_xmit(struct sk_buff *skb, 666 struct net_device *dev) 667 { 668 struct ip_tunnel *tunnel = netdev_priv(dev); 669 const struct iphdr *tnl_params; 670 671 if (!pskb_inet_may_pull(skb)) 672 goto free_skb; 673 674 if (tunnel->collect_md) { 675 gre_fb_xmit(skb, dev, skb->protocol); 676 return NETDEV_TX_OK; 677 } 678 679 if (dev->header_ops) { 680 /* Need space for new headers */ 681 if (skb_cow_head(skb, dev->needed_headroom - 682 (tunnel->hlen + sizeof(struct iphdr)))) 683 goto free_skb; 684 685 tnl_params = (const struct iphdr *)skb->data; 686 687 /* Pull skb since ip_tunnel_xmit() needs skb->data pointing 688 * to gre header. 689 */ 690 skb_pull(skb, tunnel->hlen + sizeof(struct iphdr)); 691 skb_reset_mac_header(skb); 692 } else { 693 if (skb_cow_head(skb, dev->needed_headroom)) 694 goto free_skb; 695 696 tnl_params = &tunnel->parms.iph; 697 } 698 699 if (gre_handle_offloads(skb, !!(tunnel->parms.o_flags & TUNNEL_CSUM))) 700 goto free_skb; 701 702 __gre_xmit(skb, dev, tnl_params, skb->protocol); 703 return NETDEV_TX_OK; 704 705 free_skb: 706 kfree_skb(skb); 707 dev->stats.tx_dropped++; 708 return NETDEV_TX_OK; 709 } 710 711 static netdev_tx_t erspan_xmit(struct sk_buff *skb, 712 struct net_device *dev) 713 { 714 struct ip_tunnel *tunnel = netdev_priv(dev); 715 bool truncate = false; 716 __be16 proto; 717 718 if (!pskb_inet_may_pull(skb)) 719 goto free_skb; 720 721 if (tunnel->collect_md) { 722 erspan_fb_xmit(skb, dev); 723 return NETDEV_TX_OK; 724 } 725 726 if (gre_handle_offloads(skb, false)) 727 goto free_skb; 728 729 if (skb_cow_head(skb, dev->needed_headroom)) 730 goto free_skb; 731 732 if (skb->len > dev->mtu + dev->hard_header_len) { 733 pskb_trim(skb, dev->mtu + dev->hard_header_len); 734 truncate = true; 735 } 736 737 /* Push ERSPAN header */ 738 if (tunnel->erspan_ver == 1) { 739 erspan_build_header(skb, ntohl(tunnel->parms.o_key), 740 tunnel->index, 741 truncate, true); 742 proto = htons(ETH_P_ERSPAN); 743 } else if (tunnel->erspan_ver == 2) { 744 erspan_build_header_v2(skb, ntohl(tunnel->parms.o_key), 745 tunnel->dir, tunnel->hwid, 746 truncate, true); 747 proto = htons(ETH_P_ERSPAN2); 748 } else { 749 goto free_skb; 750 } 751 752 tunnel->parms.o_flags &= ~TUNNEL_KEY; 753 __gre_xmit(skb, dev, &tunnel->parms.iph, proto); 754 return NETDEV_TX_OK; 755 756 free_skb: 757 kfree_skb(skb); 758 dev->stats.tx_dropped++; 759 return NETDEV_TX_OK; 760 } 761 762 static netdev_tx_t gre_tap_xmit(struct sk_buff *skb, 763 struct net_device *dev) 764 { 765 struct ip_tunnel *tunnel = netdev_priv(dev); 766 767 if (!pskb_inet_may_pull(skb)) 768 goto free_skb; 769 770 if (tunnel->collect_md) { 771 gre_fb_xmit(skb, dev, htons(ETH_P_TEB)); 772 return NETDEV_TX_OK; 773 } 774 775 if (gre_handle_offloads(skb, !!(tunnel->parms.o_flags & TUNNEL_CSUM))) 776 goto free_skb; 777 778 if (skb_cow_head(skb, dev->needed_headroom)) 779 goto free_skb; 780 781 __gre_xmit(skb, dev, &tunnel->parms.iph, htons(ETH_P_TEB)); 782 return NETDEV_TX_OK; 783 784 free_skb: 785 kfree_skb(skb); 786 dev->stats.tx_dropped++; 787 return NETDEV_TX_OK; 788 } 789 790 static void ipgre_link_update(struct net_device *dev, bool set_mtu) 791 { 792 struct ip_tunnel *tunnel = netdev_priv(dev); 793 int len; 794 795 len = tunnel->tun_hlen; 796 tunnel->tun_hlen = gre_calc_hlen(tunnel->parms.o_flags); 797 len = tunnel->tun_hlen - len; 798 tunnel->hlen = tunnel->hlen + len; 799 800 dev->needed_headroom = dev->needed_headroom + len; 801 if (set_mtu) 802 dev->mtu = max_t(int, dev->mtu - len, 68); 803 804 if (!(tunnel->parms.o_flags & TUNNEL_SEQ)) { 805 if (!(tunnel->parms.o_flags & TUNNEL_CSUM) || 806 tunnel->encap.type == TUNNEL_ENCAP_NONE) { 807 dev->features |= NETIF_F_GSO_SOFTWARE; 808 dev->hw_features |= NETIF_F_GSO_SOFTWARE; 809 } else { 810 dev->features &= ~NETIF_F_GSO_SOFTWARE; 811 dev->hw_features &= ~NETIF_F_GSO_SOFTWARE; 812 } 813 dev->features |= NETIF_F_LLTX; 814 } else { 815 dev->hw_features &= ~NETIF_F_GSO_SOFTWARE; 816 dev->features &= ~(NETIF_F_LLTX | NETIF_F_GSO_SOFTWARE); 817 } 818 } 819 820 static int ipgre_tunnel_ioctl(struct net_device *dev, 821 struct ifreq *ifr, int cmd) 822 { 823 struct ip_tunnel_parm p; 824 int err; 825 826 if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) 827 return -EFAULT; 828 829 if (cmd == SIOCADDTUNNEL || cmd == SIOCCHGTUNNEL) { 830 if (p.iph.version != 4 || p.iph.protocol != IPPROTO_GRE || 831 p.iph.ihl != 5 || (p.iph.frag_off & htons(~IP_DF)) || 832 ((p.i_flags | p.o_flags) & (GRE_VERSION | GRE_ROUTING))) 833 return -EINVAL; 834 } 835 836 p.i_flags = gre_flags_to_tnl_flags(p.i_flags); 837 p.o_flags = gre_flags_to_tnl_flags(p.o_flags); 838 839 err = ip_tunnel_ioctl(dev, &p, cmd); 840 if (err) 841 return err; 842 843 if (cmd == SIOCCHGTUNNEL) { 844 struct ip_tunnel *t = netdev_priv(dev); 845 846 t->parms.i_flags = p.i_flags; 847 t->parms.o_flags = p.o_flags; 848 849 if (strcmp(dev->rtnl_link_ops->kind, "erspan")) 850 ipgre_link_update(dev, true); 851 } 852 853 p.i_flags = gre_tnl_flags_to_gre_flags(p.i_flags); 854 p.o_flags = gre_tnl_flags_to_gre_flags(p.o_flags); 855 856 if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p))) 857 return -EFAULT; 858 859 return 0; 860 } 861 862 /* Nice toy. Unfortunately, useless in real life :-) 863 It allows to construct virtual multiprotocol broadcast "LAN" 864 over the Internet, provided multicast routing is tuned. 865 866 867 I have no idea was this bicycle invented before me, 868 so that I had to set ARPHRD_IPGRE to a random value. 869 I have an impression, that Cisco could make something similar, 870 but this feature is apparently missing in IOS<=11.2(8). 871 872 I set up 10.66.66/24 and fec0:6666:6666::0/96 as virtual networks 873 with broadcast 224.66.66.66. If you have access to mbone, play with me :-) 874 875 ping -t 255 224.66.66.66 876 877 If nobody answers, mbone does not work. 878 879 ip tunnel add Universe mode gre remote 224.66.66.66 local <Your_real_addr> ttl 255 880 ip addr add 10.66.66.<somewhat>/24 dev Universe 881 ifconfig Universe up 882 ifconfig Universe add fe80::<Your_real_addr>/10 883 ifconfig Universe add fec0:6666:6666::<Your_real_addr>/96 884 ftp 10.66.66.66 885 ... 886 ftp fec0:6666:6666::193.233.7.65 887 ... 888 */ 889 static int ipgre_header(struct sk_buff *skb, struct net_device *dev, 890 unsigned short type, 891 const void *daddr, const void *saddr, unsigned int len) 892 { 893 struct ip_tunnel *t = netdev_priv(dev); 894 struct iphdr *iph; 895 struct gre_base_hdr *greh; 896 897 iph = skb_push(skb, t->hlen + sizeof(*iph)); 898 greh = (struct gre_base_hdr *)(iph+1); 899 greh->flags = gre_tnl_flags_to_gre_flags(t->parms.o_flags); 900 greh->protocol = htons(type); 901 902 memcpy(iph, &t->parms.iph, sizeof(struct iphdr)); 903 904 /* Set the source hardware address. */ 905 if (saddr) 906 memcpy(&iph->saddr, saddr, 4); 907 if (daddr) 908 memcpy(&iph->daddr, daddr, 4); 909 if (iph->daddr) 910 return t->hlen + sizeof(*iph); 911 912 return -(t->hlen + sizeof(*iph)); 913 } 914 915 static int ipgre_header_parse(const struct sk_buff *skb, unsigned char *haddr) 916 { 917 const struct iphdr *iph = (const struct iphdr *) skb_mac_header(skb); 918 memcpy(haddr, &iph->saddr, 4); 919 return 4; 920 } 921 922 static const struct header_ops ipgre_header_ops = { 923 .create = ipgre_header, 924 .parse = ipgre_header_parse, 925 }; 926 927 #ifdef CONFIG_NET_IPGRE_BROADCAST 928 static int ipgre_open(struct net_device *dev) 929 { 930 struct ip_tunnel *t = netdev_priv(dev); 931 932 if (ipv4_is_multicast(t->parms.iph.daddr)) { 933 struct flowi4 fl4; 934 struct rtable *rt; 935 936 rt = ip_route_output_gre(t->net, &fl4, 937 t->parms.iph.daddr, 938 t->parms.iph.saddr, 939 t->parms.o_key, 940 RT_TOS(t->parms.iph.tos), 941 t->parms.link); 942 if (IS_ERR(rt)) 943 return -EADDRNOTAVAIL; 944 dev = rt->dst.dev; 945 ip_rt_put(rt); 946 if (!__in_dev_get_rtnl(dev)) 947 return -EADDRNOTAVAIL; 948 t->mlink = dev->ifindex; 949 ip_mc_inc_group(__in_dev_get_rtnl(dev), t->parms.iph.daddr); 950 } 951 return 0; 952 } 953 954 static int ipgre_close(struct net_device *dev) 955 { 956 struct ip_tunnel *t = netdev_priv(dev); 957 958 if (ipv4_is_multicast(t->parms.iph.daddr) && t->mlink) { 959 struct in_device *in_dev; 960 in_dev = inetdev_by_index(t->net, t->mlink); 961 if (in_dev) 962 ip_mc_dec_group(in_dev, t->parms.iph.daddr); 963 } 964 return 0; 965 } 966 #endif 967 968 static const struct net_device_ops ipgre_netdev_ops = { 969 .ndo_init = ipgre_tunnel_init, 970 .ndo_uninit = ip_tunnel_uninit, 971 #ifdef CONFIG_NET_IPGRE_BROADCAST 972 .ndo_open = ipgre_open, 973 .ndo_stop = ipgre_close, 974 #endif 975 .ndo_start_xmit = ipgre_xmit, 976 .ndo_do_ioctl = ipgre_tunnel_ioctl, 977 .ndo_change_mtu = ip_tunnel_change_mtu, 978 .ndo_get_stats64 = ip_tunnel_get_stats64, 979 .ndo_get_iflink = ip_tunnel_get_iflink, 980 }; 981 982 #define GRE_FEATURES (NETIF_F_SG | \ 983 NETIF_F_FRAGLIST | \ 984 NETIF_F_HIGHDMA | \ 985 NETIF_F_HW_CSUM) 986 987 static void ipgre_tunnel_setup(struct net_device *dev) 988 { 989 dev->netdev_ops = &ipgre_netdev_ops; 990 dev->type = ARPHRD_IPGRE; 991 ip_tunnel_setup(dev, ipgre_net_id); 992 } 993 994 static void __gre_tunnel_init(struct net_device *dev) 995 { 996 struct ip_tunnel *tunnel; 997 998 tunnel = netdev_priv(dev); 999 tunnel->tun_hlen = gre_calc_hlen(tunnel->parms.o_flags); 1000 tunnel->parms.iph.protocol = IPPROTO_GRE; 1001 1002 tunnel->hlen = tunnel->tun_hlen + tunnel->encap_hlen; 1003 1004 dev->features |= GRE_FEATURES; 1005 dev->hw_features |= GRE_FEATURES; 1006 1007 if (!(tunnel->parms.o_flags & TUNNEL_SEQ)) { 1008 /* TCP offload with GRE SEQ is not supported, nor 1009 * can we support 2 levels of outer headers requiring 1010 * an update. 1011 */ 1012 if (!(tunnel->parms.o_flags & TUNNEL_CSUM) || 1013 (tunnel->encap.type == TUNNEL_ENCAP_NONE)) { 1014 dev->features |= NETIF_F_GSO_SOFTWARE; 1015 dev->hw_features |= NETIF_F_GSO_SOFTWARE; 1016 } 1017 1018 /* Can use a lockless transmit, unless we generate 1019 * output sequences 1020 */ 1021 dev->features |= NETIF_F_LLTX; 1022 } 1023 } 1024 1025 static int ipgre_tunnel_init(struct net_device *dev) 1026 { 1027 struct ip_tunnel *tunnel = netdev_priv(dev); 1028 struct iphdr *iph = &tunnel->parms.iph; 1029 1030 __gre_tunnel_init(dev); 1031 1032 memcpy(dev->dev_addr, &iph->saddr, 4); 1033 memcpy(dev->broadcast, &iph->daddr, 4); 1034 1035 dev->flags = IFF_NOARP; 1036 netif_keep_dst(dev); 1037 dev->addr_len = 4; 1038 1039 if (iph->daddr && !tunnel->collect_md) { 1040 #ifdef CONFIG_NET_IPGRE_BROADCAST 1041 if (ipv4_is_multicast(iph->daddr)) { 1042 if (!iph->saddr) 1043 return -EINVAL; 1044 dev->flags = IFF_BROADCAST; 1045 dev->header_ops = &ipgre_header_ops; 1046 } 1047 #endif 1048 } else if (!tunnel->collect_md) { 1049 dev->header_ops = &ipgre_header_ops; 1050 } 1051 1052 return ip_tunnel_init(dev); 1053 } 1054 1055 static const struct gre_protocol ipgre_protocol = { 1056 .handler = gre_rcv, 1057 .err_handler = gre_err, 1058 }; 1059 1060 static int __net_init ipgre_init_net(struct net *net) 1061 { 1062 return ip_tunnel_init_net(net, ipgre_net_id, &ipgre_link_ops, NULL); 1063 } 1064 1065 static void __net_exit ipgre_exit_batch_net(struct list_head *list_net) 1066 { 1067 ip_tunnel_delete_nets(list_net, ipgre_net_id, &ipgre_link_ops); 1068 } 1069 1070 static struct pernet_operations ipgre_net_ops = { 1071 .init = ipgre_init_net, 1072 .exit_batch = ipgre_exit_batch_net, 1073 .id = &ipgre_net_id, 1074 .size = sizeof(struct ip_tunnel_net), 1075 }; 1076 1077 static int ipgre_tunnel_validate(struct nlattr *tb[], struct nlattr *data[], 1078 struct netlink_ext_ack *extack) 1079 { 1080 __be16 flags; 1081 1082 if (!data) 1083 return 0; 1084 1085 flags = 0; 1086 if (data[IFLA_GRE_IFLAGS]) 1087 flags |= nla_get_be16(data[IFLA_GRE_IFLAGS]); 1088 if (data[IFLA_GRE_OFLAGS]) 1089 flags |= nla_get_be16(data[IFLA_GRE_OFLAGS]); 1090 if (flags & (GRE_VERSION|GRE_ROUTING)) 1091 return -EINVAL; 1092 1093 if (data[IFLA_GRE_COLLECT_METADATA] && 1094 data[IFLA_GRE_ENCAP_TYPE] && 1095 nla_get_u16(data[IFLA_GRE_ENCAP_TYPE]) != TUNNEL_ENCAP_NONE) 1096 return -EINVAL; 1097 1098 return 0; 1099 } 1100 1101 static int ipgre_tap_validate(struct nlattr *tb[], struct nlattr *data[], 1102 struct netlink_ext_ack *extack) 1103 { 1104 __be32 daddr; 1105 1106 if (tb[IFLA_ADDRESS]) { 1107 if (nla_len(tb[IFLA_ADDRESS]) != ETH_ALEN) 1108 return -EINVAL; 1109 if (!is_valid_ether_addr(nla_data(tb[IFLA_ADDRESS]))) 1110 return -EADDRNOTAVAIL; 1111 } 1112 1113 if (!data) 1114 goto out; 1115 1116 if (data[IFLA_GRE_REMOTE]) { 1117 memcpy(&daddr, nla_data(data[IFLA_GRE_REMOTE]), 4); 1118 if (!daddr) 1119 return -EINVAL; 1120 } 1121 1122 out: 1123 return ipgre_tunnel_validate(tb, data, extack); 1124 } 1125 1126 static int erspan_validate(struct nlattr *tb[], struct nlattr *data[], 1127 struct netlink_ext_ack *extack) 1128 { 1129 __be16 flags = 0; 1130 int ret; 1131 1132 if (!data) 1133 return 0; 1134 1135 ret = ipgre_tap_validate(tb, data, extack); 1136 if (ret) 1137 return ret; 1138 1139 /* ERSPAN should only have GRE sequence and key flag */ 1140 if (data[IFLA_GRE_OFLAGS]) 1141 flags |= nla_get_be16(data[IFLA_GRE_OFLAGS]); 1142 if (data[IFLA_GRE_IFLAGS]) 1143 flags |= nla_get_be16(data[IFLA_GRE_IFLAGS]); 1144 if (!data[IFLA_GRE_COLLECT_METADATA] && 1145 flags != (GRE_SEQ | GRE_KEY)) 1146 return -EINVAL; 1147 1148 /* ERSPAN Session ID only has 10-bit. Since we reuse 1149 * 32-bit key field as ID, check it's range. 1150 */ 1151 if (data[IFLA_GRE_IKEY] && 1152 (ntohl(nla_get_be32(data[IFLA_GRE_IKEY])) & ~ID_MASK)) 1153 return -EINVAL; 1154 1155 if (data[IFLA_GRE_OKEY] && 1156 (ntohl(nla_get_be32(data[IFLA_GRE_OKEY])) & ~ID_MASK)) 1157 return -EINVAL; 1158 1159 return 0; 1160 } 1161 1162 static int ipgre_netlink_parms(struct net_device *dev, 1163 struct nlattr *data[], 1164 struct nlattr *tb[], 1165 struct ip_tunnel_parm *parms, 1166 __u32 *fwmark) 1167 { 1168 struct ip_tunnel *t = netdev_priv(dev); 1169 1170 memset(parms, 0, sizeof(*parms)); 1171 1172 parms->iph.protocol = IPPROTO_GRE; 1173 1174 if (!data) 1175 return 0; 1176 1177 if (data[IFLA_GRE_LINK]) 1178 parms->link = nla_get_u32(data[IFLA_GRE_LINK]); 1179 1180 if (data[IFLA_GRE_IFLAGS]) 1181 parms->i_flags = gre_flags_to_tnl_flags(nla_get_be16(data[IFLA_GRE_IFLAGS])); 1182 1183 if (data[IFLA_GRE_OFLAGS]) 1184 parms->o_flags = gre_flags_to_tnl_flags(nla_get_be16(data[IFLA_GRE_OFLAGS])); 1185 1186 if (data[IFLA_GRE_IKEY]) 1187 parms->i_key = nla_get_be32(data[IFLA_GRE_IKEY]); 1188 1189 if (data[IFLA_GRE_OKEY]) 1190 parms->o_key = nla_get_be32(data[IFLA_GRE_OKEY]); 1191 1192 if (data[IFLA_GRE_LOCAL]) 1193 parms->iph.saddr = nla_get_in_addr(data[IFLA_GRE_LOCAL]); 1194 1195 if (data[IFLA_GRE_REMOTE]) 1196 parms->iph.daddr = nla_get_in_addr(data[IFLA_GRE_REMOTE]); 1197 1198 if (data[IFLA_GRE_TTL]) 1199 parms->iph.ttl = nla_get_u8(data[IFLA_GRE_TTL]); 1200 1201 if (data[IFLA_GRE_TOS]) 1202 parms->iph.tos = nla_get_u8(data[IFLA_GRE_TOS]); 1203 1204 if (!data[IFLA_GRE_PMTUDISC] || nla_get_u8(data[IFLA_GRE_PMTUDISC])) { 1205 if (t->ignore_df) 1206 return -EINVAL; 1207 parms->iph.frag_off = htons(IP_DF); 1208 } 1209 1210 if (data[IFLA_GRE_COLLECT_METADATA]) { 1211 t->collect_md = true; 1212 if (dev->type == ARPHRD_IPGRE) 1213 dev->type = ARPHRD_NONE; 1214 } 1215 1216 if (data[IFLA_GRE_IGNORE_DF]) { 1217 if (nla_get_u8(data[IFLA_GRE_IGNORE_DF]) 1218 && (parms->iph.frag_off & htons(IP_DF))) 1219 return -EINVAL; 1220 t->ignore_df = !!nla_get_u8(data[IFLA_GRE_IGNORE_DF]); 1221 } 1222 1223 if (data[IFLA_GRE_FWMARK]) 1224 *fwmark = nla_get_u32(data[IFLA_GRE_FWMARK]); 1225 1226 if (data[IFLA_GRE_ERSPAN_VER]) { 1227 t->erspan_ver = nla_get_u8(data[IFLA_GRE_ERSPAN_VER]); 1228 1229 if (t->erspan_ver != 1 && t->erspan_ver != 2) 1230 return -EINVAL; 1231 } 1232 1233 if (t->erspan_ver == 1) { 1234 if (data[IFLA_GRE_ERSPAN_INDEX]) { 1235 t->index = nla_get_u32(data[IFLA_GRE_ERSPAN_INDEX]); 1236 if (t->index & ~INDEX_MASK) 1237 return -EINVAL; 1238 } 1239 } else if (t->erspan_ver == 2) { 1240 if (data[IFLA_GRE_ERSPAN_DIR]) { 1241 t->dir = nla_get_u8(data[IFLA_GRE_ERSPAN_DIR]); 1242 if (t->dir & ~(DIR_MASK >> DIR_OFFSET)) 1243 return -EINVAL; 1244 } 1245 if (data[IFLA_GRE_ERSPAN_HWID]) { 1246 t->hwid = nla_get_u16(data[IFLA_GRE_ERSPAN_HWID]); 1247 if (t->hwid & ~(HWID_MASK >> HWID_OFFSET)) 1248 return -EINVAL; 1249 } 1250 } 1251 1252 return 0; 1253 } 1254 1255 /* This function returns true when ENCAP attributes are present in the nl msg */ 1256 static bool ipgre_netlink_encap_parms(struct nlattr *data[], 1257 struct ip_tunnel_encap *ipencap) 1258 { 1259 bool ret = false; 1260 1261 memset(ipencap, 0, sizeof(*ipencap)); 1262 1263 if (!data) 1264 return ret; 1265 1266 if (data[IFLA_GRE_ENCAP_TYPE]) { 1267 ret = true; 1268 ipencap->type = nla_get_u16(data[IFLA_GRE_ENCAP_TYPE]); 1269 } 1270 1271 if (data[IFLA_GRE_ENCAP_FLAGS]) { 1272 ret = true; 1273 ipencap->flags = nla_get_u16(data[IFLA_GRE_ENCAP_FLAGS]); 1274 } 1275 1276 if (data[IFLA_GRE_ENCAP_SPORT]) { 1277 ret = true; 1278 ipencap->sport = nla_get_be16(data[IFLA_GRE_ENCAP_SPORT]); 1279 } 1280 1281 if (data[IFLA_GRE_ENCAP_DPORT]) { 1282 ret = true; 1283 ipencap->dport = nla_get_be16(data[IFLA_GRE_ENCAP_DPORT]); 1284 } 1285 1286 return ret; 1287 } 1288 1289 static int gre_tap_init(struct net_device *dev) 1290 { 1291 __gre_tunnel_init(dev); 1292 dev->priv_flags |= IFF_LIVE_ADDR_CHANGE; 1293 netif_keep_dst(dev); 1294 1295 return ip_tunnel_init(dev); 1296 } 1297 1298 static const struct net_device_ops gre_tap_netdev_ops = { 1299 .ndo_init = gre_tap_init, 1300 .ndo_uninit = ip_tunnel_uninit, 1301 .ndo_start_xmit = gre_tap_xmit, 1302 .ndo_set_mac_address = eth_mac_addr, 1303 .ndo_validate_addr = eth_validate_addr, 1304 .ndo_change_mtu = ip_tunnel_change_mtu, 1305 .ndo_get_stats64 = ip_tunnel_get_stats64, 1306 .ndo_get_iflink = ip_tunnel_get_iflink, 1307 .ndo_fill_metadata_dst = gre_fill_metadata_dst, 1308 }; 1309 1310 static int erspan_tunnel_init(struct net_device *dev) 1311 { 1312 struct ip_tunnel *tunnel = netdev_priv(dev); 1313 1314 tunnel->tun_hlen = 8; 1315 tunnel->parms.iph.protocol = IPPROTO_GRE; 1316 tunnel->hlen = tunnel->tun_hlen + tunnel->encap_hlen + 1317 erspan_hdr_len(tunnel->erspan_ver); 1318 1319 dev->features |= GRE_FEATURES; 1320 dev->hw_features |= GRE_FEATURES; 1321 dev->priv_flags |= IFF_LIVE_ADDR_CHANGE; 1322 netif_keep_dst(dev); 1323 1324 return ip_tunnel_init(dev); 1325 } 1326 1327 static const struct net_device_ops erspan_netdev_ops = { 1328 .ndo_init = erspan_tunnel_init, 1329 .ndo_uninit = ip_tunnel_uninit, 1330 .ndo_start_xmit = erspan_xmit, 1331 .ndo_set_mac_address = eth_mac_addr, 1332 .ndo_validate_addr = eth_validate_addr, 1333 .ndo_change_mtu = ip_tunnel_change_mtu, 1334 .ndo_get_stats64 = ip_tunnel_get_stats64, 1335 .ndo_get_iflink = ip_tunnel_get_iflink, 1336 .ndo_fill_metadata_dst = gre_fill_metadata_dst, 1337 }; 1338 1339 static void ipgre_tap_setup(struct net_device *dev) 1340 { 1341 ether_setup(dev); 1342 dev->max_mtu = 0; 1343 dev->netdev_ops = &gre_tap_netdev_ops; 1344 dev->priv_flags &= ~IFF_TX_SKB_SHARING; 1345 dev->priv_flags |= IFF_LIVE_ADDR_CHANGE; 1346 ip_tunnel_setup(dev, gre_tap_net_id); 1347 } 1348 1349 static int ipgre_newlink(struct net *src_net, struct net_device *dev, 1350 struct nlattr *tb[], struct nlattr *data[], 1351 struct netlink_ext_ack *extack) 1352 { 1353 struct ip_tunnel_parm p; 1354 struct ip_tunnel_encap ipencap; 1355 __u32 fwmark = 0; 1356 int err; 1357 1358 if (ipgre_netlink_encap_parms(data, &ipencap)) { 1359 struct ip_tunnel *t = netdev_priv(dev); 1360 err = ip_tunnel_encap_setup(t, &ipencap); 1361 1362 if (err < 0) 1363 return err; 1364 } 1365 1366 err = ipgre_netlink_parms(dev, data, tb, &p, &fwmark); 1367 if (err < 0) 1368 return err; 1369 return ip_tunnel_newlink(dev, tb, &p, fwmark); 1370 } 1371 1372 static int ipgre_changelink(struct net_device *dev, struct nlattr *tb[], 1373 struct nlattr *data[], 1374 struct netlink_ext_ack *extack) 1375 { 1376 struct ip_tunnel *t = netdev_priv(dev); 1377 struct ip_tunnel_encap ipencap; 1378 __u32 fwmark = t->fwmark; 1379 struct ip_tunnel_parm p; 1380 int err; 1381 1382 if (ipgre_netlink_encap_parms(data, &ipencap)) { 1383 err = ip_tunnel_encap_setup(t, &ipencap); 1384 1385 if (err < 0) 1386 return err; 1387 } 1388 1389 err = ipgre_netlink_parms(dev, data, tb, &p, &fwmark); 1390 if (err < 0) 1391 return err; 1392 1393 err = ip_tunnel_changelink(dev, tb, &p, fwmark); 1394 if (err < 0) 1395 return err; 1396 1397 t->parms.i_flags = p.i_flags; 1398 t->parms.o_flags = p.o_flags; 1399 1400 if (strcmp(dev->rtnl_link_ops->kind, "erspan")) 1401 ipgre_link_update(dev, !tb[IFLA_MTU]); 1402 1403 return 0; 1404 } 1405 1406 static size_t ipgre_get_size(const struct net_device *dev) 1407 { 1408 return 1409 /* IFLA_GRE_LINK */ 1410 nla_total_size(4) + 1411 /* IFLA_GRE_IFLAGS */ 1412 nla_total_size(2) + 1413 /* IFLA_GRE_OFLAGS */ 1414 nla_total_size(2) + 1415 /* IFLA_GRE_IKEY */ 1416 nla_total_size(4) + 1417 /* IFLA_GRE_OKEY */ 1418 nla_total_size(4) + 1419 /* IFLA_GRE_LOCAL */ 1420 nla_total_size(4) + 1421 /* IFLA_GRE_REMOTE */ 1422 nla_total_size(4) + 1423 /* IFLA_GRE_TTL */ 1424 nla_total_size(1) + 1425 /* IFLA_GRE_TOS */ 1426 nla_total_size(1) + 1427 /* IFLA_GRE_PMTUDISC */ 1428 nla_total_size(1) + 1429 /* IFLA_GRE_ENCAP_TYPE */ 1430 nla_total_size(2) + 1431 /* IFLA_GRE_ENCAP_FLAGS */ 1432 nla_total_size(2) + 1433 /* IFLA_GRE_ENCAP_SPORT */ 1434 nla_total_size(2) + 1435 /* IFLA_GRE_ENCAP_DPORT */ 1436 nla_total_size(2) + 1437 /* IFLA_GRE_COLLECT_METADATA */ 1438 nla_total_size(0) + 1439 /* IFLA_GRE_IGNORE_DF */ 1440 nla_total_size(1) + 1441 /* IFLA_GRE_FWMARK */ 1442 nla_total_size(4) + 1443 /* IFLA_GRE_ERSPAN_INDEX */ 1444 nla_total_size(4) + 1445 /* IFLA_GRE_ERSPAN_VER */ 1446 nla_total_size(1) + 1447 /* IFLA_GRE_ERSPAN_DIR */ 1448 nla_total_size(1) + 1449 /* IFLA_GRE_ERSPAN_HWID */ 1450 nla_total_size(2) + 1451 0; 1452 } 1453 1454 static int ipgre_fill_info(struct sk_buff *skb, const struct net_device *dev) 1455 { 1456 struct ip_tunnel *t = netdev_priv(dev); 1457 struct ip_tunnel_parm *p = &t->parms; 1458 1459 if (nla_put_u32(skb, IFLA_GRE_LINK, p->link) || 1460 nla_put_be16(skb, IFLA_GRE_IFLAGS, 1461 gre_tnl_flags_to_gre_flags(p->i_flags)) || 1462 nla_put_be16(skb, IFLA_GRE_OFLAGS, 1463 gre_tnl_flags_to_gre_flags(p->o_flags)) || 1464 nla_put_be32(skb, IFLA_GRE_IKEY, p->i_key) || 1465 nla_put_be32(skb, IFLA_GRE_OKEY, p->o_key) || 1466 nla_put_in_addr(skb, IFLA_GRE_LOCAL, p->iph.saddr) || 1467 nla_put_in_addr(skb, IFLA_GRE_REMOTE, p->iph.daddr) || 1468 nla_put_u8(skb, IFLA_GRE_TTL, p->iph.ttl) || 1469 nla_put_u8(skb, IFLA_GRE_TOS, p->iph.tos) || 1470 nla_put_u8(skb, IFLA_GRE_PMTUDISC, 1471 !!(p->iph.frag_off & htons(IP_DF))) || 1472 nla_put_u32(skb, IFLA_GRE_FWMARK, t->fwmark)) 1473 goto nla_put_failure; 1474 1475 if (nla_put_u16(skb, IFLA_GRE_ENCAP_TYPE, 1476 t->encap.type) || 1477 nla_put_be16(skb, IFLA_GRE_ENCAP_SPORT, 1478 t->encap.sport) || 1479 nla_put_be16(skb, IFLA_GRE_ENCAP_DPORT, 1480 t->encap.dport) || 1481 nla_put_u16(skb, IFLA_GRE_ENCAP_FLAGS, 1482 t->encap.flags)) 1483 goto nla_put_failure; 1484 1485 if (nla_put_u8(skb, IFLA_GRE_IGNORE_DF, t->ignore_df)) 1486 goto nla_put_failure; 1487 1488 if (t->collect_md) { 1489 if (nla_put_flag(skb, IFLA_GRE_COLLECT_METADATA)) 1490 goto nla_put_failure; 1491 } 1492 1493 if (nla_put_u8(skb, IFLA_GRE_ERSPAN_VER, t->erspan_ver)) 1494 goto nla_put_failure; 1495 1496 if (t->erspan_ver == 1) { 1497 if (nla_put_u32(skb, IFLA_GRE_ERSPAN_INDEX, t->index)) 1498 goto nla_put_failure; 1499 } else if (t->erspan_ver == 2) { 1500 if (nla_put_u8(skb, IFLA_GRE_ERSPAN_DIR, t->dir)) 1501 goto nla_put_failure; 1502 if (nla_put_u16(skb, IFLA_GRE_ERSPAN_HWID, t->hwid)) 1503 goto nla_put_failure; 1504 } 1505 1506 return 0; 1507 1508 nla_put_failure: 1509 return -EMSGSIZE; 1510 } 1511 1512 static void erspan_setup(struct net_device *dev) 1513 { 1514 struct ip_tunnel *t = netdev_priv(dev); 1515 1516 ether_setup(dev); 1517 dev->netdev_ops = &erspan_netdev_ops; 1518 dev->priv_flags &= ~IFF_TX_SKB_SHARING; 1519 dev->priv_flags |= IFF_LIVE_ADDR_CHANGE; 1520 ip_tunnel_setup(dev, erspan_net_id); 1521 t->erspan_ver = 1; 1522 } 1523 1524 static const struct nla_policy ipgre_policy[IFLA_GRE_MAX + 1] = { 1525 [IFLA_GRE_LINK] = { .type = NLA_U32 }, 1526 [IFLA_GRE_IFLAGS] = { .type = NLA_U16 }, 1527 [IFLA_GRE_OFLAGS] = { .type = NLA_U16 }, 1528 [IFLA_GRE_IKEY] = { .type = NLA_U32 }, 1529 [IFLA_GRE_OKEY] = { .type = NLA_U32 }, 1530 [IFLA_GRE_LOCAL] = { .len = FIELD_SIZEOF(struct iphdr, saddr) }, 1531 [IFLA_GRE_REMOTE] = { .len = FIELD_SIZEOF(struct iphdr, daddr) }, 1532 [IFLA_GRE_TTL] = { .type = NLA_U8 }, 1533 [IFLA_GRE_TOS] = { .type = NLA_U8 }, 1534 [IFLA_GRE_PMTUDISC] = { .type = NLA_U8 }, 1535 [IFLA_GRE_ENCAP_TYPE] = { .type = NLA_U16 }, 1536 [IFLA_GRE_ENCAP_FLAGS] = { .type = NLA_U16 }, 1537 [IFLA_GRE_ENCAP_SPORT] = { .type = NLA_U16 }, 1538 [IFLA_GRE_ENCAP_DPORT] = { .type = NLA_U16 }, 1539 [IFLA_GRE_COLLECT_METADATA] = { .type = NLA_FLAG }, 1540 [IFLA_GRE_IGNORE_DF] = { .type = NLA_U8 }, 1541 [IFLA_GRE_FWMARK] = { .type = NLA_U32 }, 1542 [IFLA_GRE_ERSPAN_INDEX] = { .type = NLA_U32 }, 1543 [IFLA_GRE_ERSPAN_VER] = { .type = NLA_U8 }, 1544 [IFLA_GRE_ERSPAN_DIR] = { .type = NLA_U8 }, 1545 [IFLA_GRE_ERSPAN_HWID] = { .type = NLA_U16 }, 1546 }; 1547 1548 static struct rtnl_link_ops ipgre_link_ops __read_mostly = { 1549 .kind = "gre", 1550 .maxtype = IFLA_GRE_MAX, 1551 .policy = ipgre_policy, 1552 .priv_size = sizeof(struct ip_tunnel), 1553 .setup = ipgre_tunnel_setup, 1554 .validate = ipgre_tunnel_validate, 1555 .newlink = ipgre_newlink, 1556 .changelink = ipgre_changelink, 1557 .dellink = ip_tunnel_dellink, 1558 .get_size = ipgre_get_size, 1559 .fill_info = ipgre_fill_info, 1560 .get_link_net = ip_tunnel_get_link_net, 1561 }; 1562 1563 static struct rtnl_link_ops ipgre_tap_ops __read_mostly = { 1564 .kind = "gretap", 1565 .maxtype = IFLA_GRE_MAX, 1566 .policy = ipgre_policy, 1567 .priv_size = sizeof(struct ip_tunnel), 1568 .setup = ipgre_tap_setup, 1569 .validate = ipgre_tap_validate, 1570 .newlink = ipgre_newlink, 1571 .changelink = ipgre_changelink, 1572 .dellink = ip_tunnel_dellink, 1573 .get_size = ipgre_get_size, 1574 .fill_info = ipgre_fill_info, 1575 .get_link_net = ip_tunnel_get_link_net, 1576 }; 1577 1578 static struct rtnl_link_ops erspan_link_ops __read_mostly = { 1579 .kind = "erspan", 1580 .maxtype = IFLA_GRE_MAX, 1581 .policy = ipgre_policy, 1582 .priv_size = sizeof(struct ip_tunnel), 1583 .setup = erspan_setup, 1584 .validate = erspan_validate, 1585 .newlink = ipgre_newlink, 1586 .changelink = ipgre_changelink, 1587 .dellink = ip_tunnel_dellink, 1588 .get_size = ipgre_get_size, 1589 .fill_info = ipgre_fill_info, 1590 .get_link_net = ip_tunnel_get_link_net, 1591 }; 1592 1593 struct net_device *gretap_fb_dev_create(struct net *net, const char *name, 1594 u8 name_assign_type) 1595 { 1596 struct nlattr *tb[IFLA_MAX + 1]; 1597 struct net_device *dev; 1598 LIST_HEAD(list_kill); 1599 struct ip_tunnel *t; 1600 int err; 1601 1602 memset(&tb, 0, sizeof(tb)); 1603 1604 dev = rtnl_create_link(net, name, name_assign_type, 1605 &ipgre_tap_ops, tb, NULL); 1606 if (IS_ERR(dev)) 1607 return dev; 1608 1609 /* Configure flow based GRE device. */ 1610 t = netdev_priv(dev); 1611 t->collect_md = true; 1612 1613 err = ipgre_newlink(net, dev, tb, NULL, NULL); 1614 if (err < 0) { 1615 free_netdev(dev); 1616 return ERR_PTR(err); 1617 } 1618 1619 /* openvswitch users expect packet sizes to be unrestricted, 1620 * so set the largest MTU we can. 1621 */ 1622 err = __ip_tunnel_change_mtu(dev, IP_MAX_MTU, false); 1623 if (err) 1624 goto out; 1625 1626 err = rtnl_configure_link(dev, NULL); 1627 if (err < 0) 1628 goto out; 1629 1630 return dev; 1631 out: 1632 ip_tunnel_dellink(dev, &list_kill); 1633 unregister_netdevice_many(&list_kill); 1634 return ERR_PTR(err); 1635 } 1636 EXPORT_SYMBOL_GPL(gretap_fb_dev_create); 1637 1638 static int __net_init ipgre_tap_init_net(struct net *net) 1639 { 1640 return ip_tunnel_init_net(net, gre_tap_net_id, &ipgre_tap_ops, "gretap0"); 1641 } 1642 1643 static void __net_exit ipgre_tap_exit_batch_net(struct list_head *list_net) 1644 { 1645 ip_tunnel_delete_nets(list_net, gre_tap_net_id, &ipgre_tap_ops); 1646 } 1647 1648 static struct pernet_operations ipgre_tap_net_ops = { 1649 .init = ipgre_tap_init_net, 1650 .exit_batch = ipgre_tap_exit_batch_net, 1651 .id = &gre_tap_net_id, 1652 .size = sizeof(struct ip_tunnel_net), 1653 }; 1654 1655 static int __net_init erspan_init_net(struct net *net) 1656 { 1657 return ip_tunnel_init_net(net, erspan_net_id, 1658 &erspan_link_ops, "erspan0"); 1659 } 1660 1661 static void __net_exit erspan_exit_batch_net(struct list_head *net_list) 1662 { 1663 ip_tunnel_delete_nets(net_list, erspan_net_id, &erspan_link_ops); 1664 } 1665 1666 static struct pernet_operations erspan_net_ops = { 1667 .init = erspan_init_net, 1668 .exit_batch = erspan_exit_batch_net, 1669 .id = &erspan_net_id, 1670 .size = sizeof(struct ip_tunnel_net), 1671 }; 1672 1673 static int __init ipgre_init(void) 1674 { 1675 int err; 1676 1677 pr_info("GRE over IPv4 tunneling driver\n"); 1678 1679 err = register_pernet_device(&ipgre_net_ops); 1680 if (err < 0) 1681 return err; 1682 1683 err = register_pernet_device(&ipgre_tap_net_ops); 1684 if (err < 0) 1685 goto pnet_tap_failed; 1686 1687 err = register_pernet_device(&erspan_net_ops); 1688 if (err < 0) 1689 goto pnet_erspan_failed; 1690 1691 err = gre_add_protocol(&ipgre_protocol, GREPROTO_CISCO); 1692 if (err < 0) { 1693 pr_info("%s: can't add protocol\n", __func__); 1694 goto add_proto_failed; 1695 } 1696 1697 err = rtnl_link_register(&ipgre_link_ops); 1698 if (err < 0) 1699 goto rtnl_link_failed; 1700 1701 err = rtnl_link_register(&ipgre_tap_ops); 1702 if (err < 0) 1703 goto tap_ops_failed; 1704 1705 err = rtnl_link_register(&erspan_link_ops); 1706 if (err < 0) 1707 goto erspan_link_failed; 1708 1709 return 0; 1710 1711 erspan_link_failed: 1712 rtnl_link_unregister(&ipgre_tap_ops); 1713 tap_ops_failed: 1714 rtnl_link_unregister(&ipgre_link_ops); 1715 rtnl_link_failed: 1716 gre_del_protocol(&ipgre_protocol, GREPROTO_CISCO); 1717 add_proto_failed: 1718 unregister_pernet_device(&erspan_net_ops); 1719 pnet_erspan_failed: 1720 unregister_pernet_device(&ipgre_tap_net_ops); 1721 pnet_tap_failed: 1722 unregister_pernet_device(&ipgre_net_ops); 1723 return err; 1724 } 1725 1726 static void __exit ipgre_fini(void) 1727 { 1728 rtnl_link_unregister(&ipgre_tap_ops); 1729 rtnl_link_unregister(&ipgre_link_ops); 1730 rtnl_link_unregister(&erspan_link_ops); 1731 gre_del_protocol(&ipgre_protocol, GREPROTO_CISCO); 1732 unregister_pernet_device(&ipgre_tap_net_ops); 1733 unregister_pernet_device(&ipgre_net_ops); 1734 unregister_pernet_device(&erspan_net_ops); 1735 } 1736 1737 module_init(ipgre_init); 1738 module_exit(ipgre_fini); 1739 MODULE_LICENSE("GPL"); 1740 MODULE_ALIAS_RTNL_LINK("gre"); 1741 MODULE_ALIAS_RTNL_LINK("gretap"); 1742 MODULE_ALIAS_RTNL_LINK("erspan"); 1743 MODULE_ALIAS_NETDEV("gre0"); 1744 MODULE_ALIAS_NETDEV("gretap0"); 1745 MODULE_ALIAS_NETDEV("erspan0"); 1746