1 /* 2 * Linux NET3: GRE over IP protocol decoder. 3 * 4 * Authors: Alexey Kuznetsov (kuznet@ms2.inr.ac.ru) 5 * 6 * This program is free software; you can redistribute it and/or 7 * modify it under the terms of the GNU General Public License 8 * as published by the Free Software Foundation; either version 9 * 2 of the License, or (at your option) any later version. 10 * 11 */ 12 13 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 14 15 #include <linux/capability.h> 16 #include <linux/module.h> 17 #include <linux/types.h> 18 #include <linux/kernel.h> 19 #include <linux/slab.h> 20 #include <linux/uaccess.h> 21 #include <linux/skbuff.h> 22 #include <linux/netdevice.h> 23 #include <linux/in.h> 24 #include <linux/tcp.h> 25 #include <linux/udp.h> 26 #include <linux/if_arp.h> 27 #include <linux/if_vlan.h> 28 #include <linux/init.h> 29 #include <linux/in6.h> 30 #include <linux/inetdevice.h> 31 #include <linux/igmp.h> 32 #include <linux/netfilter_ipv4.h> 33 #include <linux/etherdevice.h> 34 #include <linux/if_ether.h> 35 36 #include <net/sock.h> 37 #include <net/ip.h> 38 #include <net/icmp.h> 39 #include <net/protocol.h> 40 #include <net/ip_tunnels.h> 41 #include <net/arp.h> 42 #include <net/checksum.h> 43 #include <net/dsfield.h> 44 #include <net/inet_ecn.h> 45 #include <net/xfrm.h> 46 #include <net/net_namespace.h> 47 #include <net/netns/generic.h> 48 #include <net/rtnetlink.h> 49 #include <net/gre.h> 50 #include <net/dst_metadata.h> 51 #include <net/erspan.h> 52 53 /* 54 Problems & solutions 55 -------------------- 56 57 1. The most important issue is detecting local dead loops. 58 They would cause complete host lockup in transmit, which 59 would be "resolved" by stack overflow or, if queueing is enabled, 60 with infinite looping in net_bh. 61 62 We cannot track such dead loops during route installation, 63 it is infeasible task. The most general solutions would be 64 to keep skb->encapsulation counter (sort of local ttl), 65 and silently drop packet when it expires. It is a good 66 solution, but it supposes maintaining new variable in ALL 67 skb, even if no tunneling is used. 68 69 Current solution: xmit_recursion breaks dead loops. This is a percpu 70 counter, since when we enter the first ndo_xmit(), cpu migration is 71 forbidden. We force an exit if this counter reaches RECURSION_LIMIT 72 73 2. Networking dead loops would not kill routers, but would really 74 kill network. IP hop limit plays role of "t->recursion" in this case, 75 if we copy it from packet being encapsulated to upper header. 76 It is very good solution, but it introduces two problems: 77 78 - Routing protocols, using packets with ttl=1 (OSPF, RIP2), 79 do not work over tunnels. 80 - traceroute does not work. I planned to relay ICMP from tunnel, 81 so that this problem would be solved and traceroute output 82 would even more informative. This idea appeared to be wrong: 83 only Linux complies to rfc1812 now (yes, guys, Linux is the only 84 true router now :-)), all routers (at least, in neighbourhood of mine) 85 return only 8 bytes of payload. It is the end. 86 87 Hence, if we want that OSPF worked or traceroute said something reasonable, 88 we should search for another solution. 89 90 One of them is to parse packet trying to detect inner encapsulation 91 made by our node. It is difficult or even impossible, especially, 92 taking into account fragmentation. TO be short, ttl is not solution at all. 93 94 Current solution: The solution was UNEXPECTEDLY SIMPLE. 95 We force DF flag on tunnels with preconfigured hop limit, 96 that is ALL. :-) Well, it does not remove the problem completely, 97 but exponential growth of network traffic is changed to linear 98 (branches, that exceed pmtu are pruned) and tunnel mtu 99 rapidly degrades to value <68, where looping stops. 100 Yes, it is not good if there exists a router in the loop, 101 which does not force DF, even when encapsulating packets have DF set. 102 But it is not our problem! Nobody could accuse us, we made 103 all that we could make. Even if it is your gated who injected 104 fatal route to network, even if it were you who configured 105 fatal static route: you are innocent. :-) 106 107 Alexey Kuznetsov. 108 */ 109 110 static bool log_ecn_error = true; 111 module_param(log_ecn_error, bool, 0644); 112 MODULE_PARM_DESC(log_ecn_error, "Log packets received with corrupted ECN"); 113 114 static struct rtnl_link_ops ipgre_link_ops __read_mostly; 115 static int ipgre_tunnel_init(struct net_device *dev); 116 static void erspan_build_header(struct sk_buff *skb, 117 __be32 id, u32 index, 118 bool truncate, bool is_ipv4); 119 120 static unsigned int ipgre_net_id __read_mostly; 121 static unsigned int gre_tap_net_id __read_mostly; 122 static unsigned int erspan_net_id __read_mostly; 123 124 static void ipgre_err(struct sk_buff *skb, u32 info, 125 const struct tnl_ptk_info *tpi) 126 { 127 128 /* All the routers (except for Linux) return only 129 8 bytes of packet payload. It means, that precise relaying of 130 ICMP in the real Internet is absolutely infeasible. 131 132 Moreover, Cisco "wise men" put GRE key to the third word 133 in GRE header. It makes impossible maintaining even soft 134 state for keyed GRE tunnels with enabled checksum. Tell 135 them "thank you". 136 137 Well, I wonder, rfc1812 was written by Cisco employee, 138 what the hell these idiots break standards established 139 by themselves??? 140 */ 141 struct net *net = dev_net(skb->dev); 142 struct ip_tunnel_net *itn; 143 const struct iphdr *iph; 144 const int type = icmp_hdr(skb)->type; 145 const int code = icmp_hdr(skb)->code; 146 unsigned int data_len = 0; 147 struct ip_tunnel *t; 148 149 switch (type) { 150 default: 151 case ICMP_PARAMETERPROB: 152 return; 153 154 case ICMP_DEST_UNREACH: 155 switch (code) { 156 case ICMP_SR_FAILED: 157 case ICMP_PORT_UNREACH: 158 /* Impossible event. */ 159 return; 160 default: 161 /* All others are translated to HOST_UNREACH. 162 rfc2003 contains "deep thoughts" about NET_UNREACH, 163 I believe they are just ether pollution. --ANK 164 */ 165 break; 166 } 167 break; 168 169 case ICMP_TIME_EXCEEDED: 170 if (code != ICMP_EXC_TTL) 171 return; 172 data_len = icmp_hdr(skb)->un.reserved[1] * 4; /* RFC 4884 4.1 */ 173 break; 174 175 case ICMP_REDIRECT: 176 break; 177 } 178 179 if (tpi->proto == htons(ETH_P_TEB)) 180 itn = net_generic(net, gre_tap_net_id); 181 else 182 itn = net_generic(net, ipgre_net_id); 183 184 iph = (const struct iphdr *)(icmp_hdr(skb) + 1); 185 t = ip_tunnel_lookup(itn, skb->dev->ifindex, tpi->flags, 186 iph->daddr, iph->saddr, tpi->key); 187 188 if (!t) 189 return; 190 191 #if IS_ENABLED(CONFIG_IPV6) 192 if (tpi->proto == htons(ETH_P_IPV6) && 193 !ip6_err_gen_icmpv6_unreach(skb, iph->ihl * 4 + tpi->hdr_len, 194 type, data_len)) 195 return; 196 #endif 197 198 if (t->parms.iph.daddr == 0 || 199 ipv4_is_multicast(t->parms.iph.daddr)) 200 return; 201 202 if (t->parms.iph.ttl == 0 && type == ICMP_TIME_EXCEEDED) 203 return; 204 205 if (time_before(jiffies, t->err_time + IPTUNNEL_ERR_TIMEO)) 206 t->err_count++; 207 else 208 t->err_count = 1; 209 t->err_time = jiffies; 210 } 211 212 static void gre_err(struct sk_buff *skb, u32 info) 213 { 214 /* All the routers (except for Linux) return only 215 * 8 bytes of packet payload. It means, that precise relaying of 216 * ICMP in the real Internet is absolutely infeasible. 217 * 218 * Moreover, Cisco "wise men" put GRE key to the third word 219 * in GRE header. It makes impossible maintaining even soft 220 * state for keyed 221 * GRE tunnels with enabled checksum. Tell them "thank you". 222 * 223 * Well, I wonder, rfc1812 was written by Cisco employee, 224 * what the hell these idiots break standards established 225 * by themselves??? 226 */ 227 228 const struct iphdr *iph = (struct iphdr *)skb->data; 229 const int type = icmp_hdr(skb)->type; 230 const int code = icmp_hdr(skb)->code; 231 struct tnl_ptk_info tpi; 232 bool csum_err = false; 233 234 if (gre_parse_header(skb, &tpi, &csum_err, htons(ETH_P_IP), 235 iph->ihl * 4) < 0) { 236 if (!csum_err) /* ignore csum errors. */ 237 return; 238 } 239 240 if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED) { 241 ipv4_update_pmtu(skb, dev_net(skb->dev), info, 242 skb->dev->ifindex, 0, IPPROTO_GRE, 0); 243 return; 244 } 245 if (type == ICMP_REDIRECT) { 246 ipv4_redirect(skb, dev_net(skb->dev), skb->dev->ifindex, 0, 247 IPPROTO_GRE, 0); 248 return; 249 } 250 251 ipgre_err(skb, info, &tpi); 252 } 253 254 static int erspan_rcv(struct sk_buff *skb, struct tnl_ptk_info *tpi, 255 int gre_hdr_len) 256 { 257 struct net *net = dev_net(skb->dev); 258 struct metadata_dst *tun_dst = NULL; 259 struct erspan_base_hdr *ershdr; 260 struct erspan_metadata *pkt_md; 261 struct ip_tunnel_net *itn; 262 struct ip_tunnel *tunnel; 263 const struct iphdr *iph; 264 int ver; 265 int len; 266 267 itn = net_generic(net, erspan_net_id); 268 len = gre_hdr_len + sizeof(*ershdr); 269 270 /* Check based hdr len */ 271 if (unlikely(!pskb_may_pull(skb, len))) 272 return PACKET_REJECT; 273 274 iph = ip_hdr(skb); 275 ershdr = (struct erspan_base_hdr *)(skb->data + gre_hdr_len); 276 ver = (ntohs(ershdr->ver_vlan) & VER_MASK) >> VER_OFFSET; 277 278 /* The original GRE header does not have key field, 279 * Use ERSPAN 10-bit session ID as key. 280 */ 281 tpi->key = cpu_to_be32(ntohs(ershdr->session_id) & ID_MASK); 282 tunnel = ip_tunnel_lookup(itn, skb->dev->ifindex, 283 tpi->flags | TUNNEL_KEY, 284 iph->saddr, iph->daddr, tpi->key); 285 286 if (tunnel) { 287 len = gre_hdr_len + erspan_hdr_len(ver); 288 if (unlikely(!pskb_may_pull(skb, len))) 289 return PACKET_REJECT; 290 291 ershdr = (struct erspan_base_hdr *)(skb->data + gre_hdr_len); 292 pkt_md = (struct erspan_metadata *)(ershdr + 1); 293 294 if (__iptunnel_pull_header(skb, 295 len, 296 htons(ETH_P_TEB), 297 false, false) < 0) 298 goto drop; 299 300 if (tunnel->collect_md) { 301 struct ip_tunnel_info *info; 302 struct erspan_metadata *md; 303 __be64 tun_id; 304 __be16 flags; 305 306 tpi->flags |= TUNNEL_KEY; 307 flags = tpi->flags; 308 tun_id = key32_to_tunnel_id(tpi->key); 309 310 tun_dst = ip_tun_rx_dst(skb, flags, 311 tun_id, sizeof(*md)); 312 if (!tun_dst) 313 return PACKET_REJECT; 314 315 md = ip_tunnel_info_opts(&tun_dst->u.tun_info); 316 memcpy(md, pkt_md, sizeof(*md)); 317 md->version = ver; 318 319 info = &tun_dst->u.tun_info; 320 info->key.tun_flags |= TUNNEL_ERSPAN_OPT; 321 info->options_len = sizeof(*md); 322 } else { 323 tunnel->erspan_ver = ver; 324 if (ver == 1) { 325 tunnel->index = ntohl(pkt_md->u.index); 326 } else { 327 u16 md2_flags; 328 u16 dir, hwid; 329 330 md2_flags = ntohs(pkt_md->u.md2.flags); 331 dir = (md2_flags & DIR_MASK) >> DIR_OFFSET; 332 hwid = (md2_flags & HWID_MASK) >> HWID_OFFSET; 333 tunnel->dir = dir; 334 tunnel->hwid = hwid; 335 } 336 337 } 338 339 skb_reset_mac_header(skb); 340 ip_tunnel_rcv(tunnel, skb, tpi, tun_dst, log_ecn_error); 341 return PACKET_RCVD; 342 } 343 drop: 344 kfree_skb(skb); 345 return PACKET_RCVD; 346 } 347 348 static int __ipgre_rcv(struct sk_buff *skb, const struct tnl_ptk_info *tpi, 349 struct ip_tunnel_net *itn, int hdr_len, bool raw_proto) 350 { 351 struct metadata_dst *tun_dst = NULL; 352 const struct iphdr *iph; 353 struct ip_tunnel *tunnel; 354 355 iph = ip_hdr(skb); 356 tunnel = ip_tunnel_lookup(itn, skb->dev->ifindex, tpi->flags, 357 iph->saddr, iph->daddr, tpi->key); 358 359 if (tunnel) { 360 if (__iptunnel_pull_header(skb, hdr_len, tpi->proto, 361 raw_proto, false) < 0) 362 goto drop; 363 364 if (tunnel->dev->type != ARPHRD_NONE) 365 skb_pop_mac_header(skb); 366 else 367 skb_reset_mac_header(skb); 368 if (tunnel->collect_md) { 369 __be16 flags; 370 __be64 tun_id; 371 372 flags = tpi->flags & (TUNNEL_CSUM | TUNNEL_KEY); 373 tun_id = key32_to_tunnel_id(tpi->key); 374 tun_dst = ip_tun_rx_dst(skb, flags, tun_id, 0); 375 if (!tun_dst) 376 return PACKET_REJECT; 377 } 378 379 ip_tunnel_rcv(tunnel, skb, tpi, tun_dst, log_ecn_error); 380 return PACKET_RCVD; 381 } 382 return PACKET_NEXT; 383 384 drop: 385 kfree_skb(skb); 386 return PACKET_RCVD; 387 } 388 389 static int ipgre_rcv(struct sk_buff *skb, const struct tnl_ptk_info *tpi, 390 int hdr_len) 391 { 392 struct net *net = dev_net(skb->dev); 393 struct ip_tunnel_net *itn; 394 int res; 395 396 if (tpi->proto == htons(ETH_P_TEB)) 397 itn = net_generic(net, gre_tap_net_id); 398 else 399 itn = net_generic(net, ipgre_net_id); 400 401 res = __ipgre_rcv(skb, tpi, itn, hdr_len, false); 402 if (res == PACKET_NEXT && tpi->proto == htons(ETH_P_TEB)) { 403 /* ipgre tunnels in collect metadata mode should receive 404 * also ETH_P_TEB traffic. 405 */ 406 itn = net_generic(net, ipgre_net_id); 407 res = __ipgre_rcv(skb, tpi, itn, hdr_len, true); 408 } 409 return res; 410 } 411 412 static int gre_rcv(struct sk_buff *skb) 413 { 414 struct tnl_ptk_info tpi; 415 bool csum_err = false; 416 int hdr_len; 417 418 #ifdef CONFIG_NET_IPGRE_BROADCAST 419 if (ipv4_is_multicast(ip_hdr(skb)->daddr)) { 420 /* Looped back packet, drop it! */ 421 if (rt_is_output_route(skb_rtable(skb))) 422 goto drop; 423 } 424 #endif 425 426 hdr_len = gre_parse_header(skb, &tpi, &csum_err, htons(ETH_P_IP), 0); 427 if (hdr_len < 0) 428 goto drop; 429 430 if (unlikely(tpi.proto == htons(ETH_P_ERSPAN) || 431 tpi.proto == htons(ETH_P_ERSPAN2))) { 432 if (erspan_rcv(skb, &tpi, hdr_len) == PACKET_RCVD) 433 return 0; 434 goto out; 435 } 436 437 if (ipgre_rcv(skb, &tpi, hdr_len) == PACKET_RCVD) 438 return 0; 439 440 out: 441 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0); 442 drop: 443 kfree_skb(skb); 444 return 0; 445 } 446 447 static void __gre_xmit(struct sk_buff *skb, struct net_device *dev, 448 const struct iphdr *tnl_params, 449 __be16 proto) 450 { 451 struct ip_tunnel *tunnel = netdev_priv(dev); 452 453 if (tunnel->parms.o_flags & TUNNEL_SEQ) 454 tunnel->o_seqno++; 455 456 /* Push GRE header. */ 457 gre_build_header(skb, tunnel->tun_hlen, 458 tunnel->parms.o_flags, proto, tunnel->parms.o_key, 459 htonl(tunnel->o_seqno)); 460 461 ip_tunnel_xmit(skb, dev, tnl_params, tnl_params->protocol); 462 } 463 464 static int gre_handle_offloads(struct sk_buff *skb, bool csum) 465 { 466 return iptunnel_handle_offloads(skb, csum ? SKB_GSO_GRE_CSUM : SKB_GSO_GRE); 467 } 468 469 static struct rtable *gre_get_rt(struct sk_buff *skb, 470 struct net_device *dev, 471 struct flowi4 *fl, 472 const struct ip_tunnel_key *key) 473 { 474 struct net *net = dev_net(dev); 475 476 memset(fl, 0, sizeof(*fl)); 477 fl->daddr = key->u.ipv4.dst; 478 fl->saddr = key->u.ipv4.src; 479 fl->flowi4_tos = RT_TOS(key->tos); 480 fl->flowi4_mark = skb->mark; 481 fl->flowi4_proto = IPPROTO_GRE; 482 483 return ip_route_output_key(net, fl); 484 } 485 486 static struct rtable *prepare_fb_xmit(struct sk_buff *skb, 487 struct net_device *dev, 488 struct flowi4 *fl, 489 int tunnel_hlen) 490 { 491 struct ip_tunnel_info *tun_info; 492 const struct ip_tunnel_key *key; 493 struct rtable *rt = NULL; 494 int min_headroom; 495 bool use_cache; 496 int err; 497 498 tun_info = skb_tunnel_info(skb); 499 key = &tun_info->key; 500 use_cache = ip_tunnel_dst_cache_usable(skb, tun_info); 501 502 if (use_cache) 503 rt = dst_cache_get_ip4(&tun_info->dst_cache, &fl->saddr); 504 if (!rt) { 505 rt = gre_get_rt(skb, dev, fl, key); 506 if (IS_ERR(rt)) 507 goto err_free_skb; 508 if (use_cache) 509 dst_cache_set_ip4(&tun_info->dst_cache, &rt->dst, 510 fl->saddr); 511 } 512 513 min_headroom = LL_RESERVED_SPACE(rt->dst.dev) + rt->dst.header_len 514 + tunnel_hlen + sizeof(struct iphdr); 515 if (skb_headroom(skb) < min_headroom || skb_header_cloned(skb)) { 516 int head_delta = SKB_DATA_ALIGN(min_headroom - 517 skb_headroom(skb) + 518 16); 519 err = pskb_expand_head(skb, max_t(int, head_delta, 0), 520 0, GFP_ATOMIC); 521 if (unlikely(err)) 522 goto err_free_rt; 523 } 524 return rt; 525 526 err_free_rt: 527 ip_rt_put(rt); 528 err_free_skb: 529 kfree_skb(skb); 530 dev->stats.tx_dropped++; 531 return NULL; 532 } 533 534 static void gre_fb_xmit(struct sk_buff *skb, struct net_device *dev, 535 __be16 proto) 536 { 537 struct ip_tunnel_info *tun_info; 538 const struct ip_tunnel_key *key; 539 struct rtable *rt = NULL; 540 struct flowi4 fl; 541 int tunnel_hlen; 542 __be16 df, flags; 543 544 tun_info = skb_tunnel_info(skb); 545 if (unlikely(!tun_info || !(tun_info->mode & IP_TUNNEL_INFO_TX) || 546 ip_tunnel_info_af(tun_info) != AF_INET)) 547 goto err_free_skb; 548 549 key = &tun_info->key; 550 tunnel_hlen = gre_calc_hlen(key->tun_flags); 551 552 rt = prepare_fb_xmit(skb, dev, &fl, tunnel_hlen); 553 if (!rt) 554 return; 555 556 /* Push Tunnel header. */ 557 if (gre_handle_offloads(skb, !!(tun_info->key.tun_flags & TUNNEL_CSUM))) 558 goto err_free_rt; 559 560 flags = tun_info->key.tun_flags & (TUNNEL_CSUM | TUNNEL_KEY); 561 gre_build_header(skb, tunnel_hlen, flags, proto, 562 tunnel_id_to_key32(tun_info->key.tun_id), 0); 563 564 df = key->tun_flags & TUNNEL_DONT_FRAGMENT ? htons(IP_DF) : 0; 565 566 iptunnel_xmit(skb->sk, rt, skb, fl.saddr, key->u.ipv4.dst, IPPROTO_GRE, 567 key->tos, key->ttl, df, false); 568 return; 569 570 err_free_rt: 571 ip_rt_put(rt); 572 err_free_skb: 573 kfree_skb(skb); 574 dev->stats.tx_dropped++; 575 } 576 577 static void erspan_fb_xmit(struct sk_buff *skb, struct net_device *dev, 578 __be16 proto) 579 { 580 struct ip_tunnel *tunnel = netdev_priv(dev); 581 struct ip_tunnel_info *tun_info; 582 const struct ip_tunnel_key *key; 583 struct erspan_metadata *md; 584 struct rtable *rt = NULL; 585 bool truncate = false; 586 struct flowi4 fl; 587 int tunnel_hlen; 588 int version; 589 __be16 df; 590 591 tun_info = skb_tunnel_info(skb); 592 if (unlikely(!tun_info || !(tun_info->mode & IP_TUNNEL_INFO_TX) || 593 ip_tunnel_info_af(tun_info) != AF_INET)) 594 goto err_free_skb; 595 596 key = &tun_info->key; 597 md = ip_tunnel_info_opts(tun_info); 598 if (!md) 599 goto err_free_rt; 600 601 /* ERSPAN has fixed 8 byte GRE header */ 602 version = md->version; 603 tunnel_hlen = 8 + erspan_hdr_len(version); 604 605 rt = prepare_fb_xmit(skb, dev, &fl, tunnel_hlen); 606 if (!rt) 607 return; 608 609 if (gre_handle_offloads(skb, false)) 610 goto err_free_rt; 611 612 if (skb->len > dev->mtu + dev->hard_header_len) { 613 pskb_trim(skb, dev->mtu + dev->hard_header_len); 614 truncate = true; 615 } 616 617 if (version == 1) { 618 erspan_build_header(skb, tunnel_id_to_key32(key->tun_id), 619 ntohl(md->u.index), truncate, true); 620 } else if (version == 2) { 621 u16 md2_flags; 622 u8 direction; 623 u16 hwid; 624 625 md2_flags = ntohs(md->u.md2.flags); 626 direction = (md2_flags & DIR_MASK) >> DIR_OFFSET; 627 hwid = (md2_flags & HWID_MASK) >> HWID_OFFSET; 628 629 erspan_build_header_v2(skb, tunnel_id_to_key32(key->tun_id), 630 direction, hwid, truncate, true); 631 } else { 632 goto err_free_rt; 633 } 634 635 gre_build_header(skb, 8, TUNNEL_SEQ, 636 htons(ETH_P_ERSPAN), 0, htonl(tunnel->o_seqno++)); 637 638 df = key->tun_flags & TUNNEL_DONT_FRAGMENT ? htons(IP_DF) : 0; 639 640 iptunnel_xmit(skb->sk, rt, skb, fl.saddr, key->u.ipv4.dst, IPPROTO_GRE, 641 key->tos, key->ttl, df, false); 642 return; 643 644 err_free_rt: 645 ip_rt_put(rt); 646 err_free_skb: 647 kfree_skb(skb); 648 dev->stats.tx_dropped++; 649 } 650 651 static int gre_fill_metadata_dst(struct net_device *dev, struct sk_buff *skb) 652 { 653 struct ip_tunnel_info *info = skb_tunnel_info(skb); 654 struct rtable *rt; 655 struct flowi4 fl4; 656 657 if (ip_tunnel_info_af(info) != AF_INET) 658 return -EINVAL; 659 660 rt = gre_get_rt(skb, dev, &fl4, &info->key); 661 if (IS_ERR(rt)) 662 return PTR_ERR(rt); 663 664 ip_rt_put(rt); 665 info->key.u.ipv4.src = fl4.saddr; 666 return 0; 667 } 668 669 static netdev_tx_t ipgre_xmit(struct sk_buff *skb, 670 struct net_device *dev) 671 { 672 struct ip_tunnel *tunnel = netdev_priv(dev); 673 const struct iphdr *tnl_params; 674 675 if (tunnel->collect_md) { 676 gre_fb_xmit(skb, dev, skb->protocol); 677 return NETDEV_TX_OK; 678 } 679 680 if (dev->header_ops) { 681 /* Need space for new headers */ 682 if (skb_cow_head(skb, dev->needed_headroom - 683 (tunnel->hlen + sizeof(struct iphdr)))) 684 goto free_skb; 685 686 tnl_params = (const struct iphdr *)skb->data; 687 688 /* Pull skb since ip_tunnel_xmit() needs skb->data pointing 689 * to gre header. 690 */ 691 skb_pull(skb, tunnel->hlen + sizeof(struct iphdr)); 692 skb_reset_mac_header(skb); 693 } else { 694 if (skb_cow_head(skb, dev->needed_headroom)) 695 goto free_skb; 696 697 tnl_params = &tunnel->parms.iph; 698 } 699 700 if (gre_handle_offloads(skb, !!(tunnel->parms.o_flags & TUNNEL_CSUM))) 701 goto free_skb; 702 703 __gre_xmit(skb, dev, tnl_params, skb->protocol); 704 return NETDEV_TX_OK; 705 706 free_skb: 707 kfree_skb(skb); 708 dev->stats.tx_dropped++; 709 return NETDEV_TX_OK; 710 } 711 712 static netdev_tx_t erspan_xmit(struct sk_buff *skb, 713 struct net_device *dev) 714 { 715 struct ip_tunnel *tunnel = netdev_priv(dev); 716 bool truncate = false; 717 718 if (tunnel->collect_md) { 719 erspan_fb_xmit(skb, dev, skb->protocol); 720 return NETDEV_TX_OK; 721 } 722 723 if (gre_handle_offloads(skb, false)) 724 goto free_skb; 725 726 if (skb_cow_head(skb, dev->needed_headroom)) 727 goto free_skb; 728 729 if (skb->len > dev->mtu + dev->hard_header_len) { 730 pskb_trim(skb, dev->mtu + dev->hard_header_len); 731 truncate = true; 732 } 733 734 /* Push ERSPAN header */ 735 if (tunnel->erspan_ver == 1) 736 erspan_build_header(skb, tunnel->parms.o_key, tunnel->index, 737 truncate, true); 738 else 739 erspan_build_header_v2(skb, tunnel->parms.o_key, 740 tunnel->dir, tunnel->hwid, 741 truncate, true); 742 743 tunnel->parms.o_flags &= ~TUNNEL_KEY; 744 __gre_xmit(skb, dev, &tunnel->parms.iph, htons(ETH_P_ERSPAN)); 745 return NETDEV_TX_OK; 746 747 free_skb: 748 kfree_skb(skb); 749 dev->stats.tx_dropped++; 750 return NETDEV_TX_OK; 751 } 752 753 static netdev_tx_t gre_tap_xmit(struct sk_buff *skb, 754 struct net_device *dev) 755 { 756 struct ip_tunnel *tunnel = netdev_priv(dev); 757 758 if (tunnel->collect_md) { 759 gre_fb_xmit(skb, dev, htons(ETH_P_TEB)); 760 return NETDEV_TX_OK; 761 } 762 763 if (gre_handle_offloads(skb, !!(tunnel->parms.o_flags & TUNNEL_CSUM))) 764 goto free_skb; 765 766 if (skb_cow_head(skb, dev->needed_headroom)) 767 goto free_skb; 768 769 __gre_xmit(skb, dev, &tunnel->parms.iph, htons(ETH_P_TEB)); 770 return NETDEV_TX_OK; 771 772 free_skb: 773 kfree_skb(skb); 774 dev->stats.tx_dropped++; 775 return NETDEV_TX_OK; 776 } 777 778 static void ipgre_link_update(struct net_device *dev, bool set_mtu) 779 { 780 struct ip_tunnel *tunnel = netdev_priv(dev); 781 int len; 782 783 len = tunnel->tun_hlen; 784 tunnel->tun_hlen = gre_calc_hlen(tunnel->parms.o_flags); 785 len = tunnel->tun_hlen - len; 786 tunnel->hlen = tunnel->hlen + len; 787 788 dev->needed_headroom = dev->needed_headroom + len; 789 if (set_mtu) 790 dev->mtu = max_t(int, dev->mtu - len, 68); 791 792 if (!(tunnel->parms.o_flags & TUNNEL_SEQ)) { 793 if (!(tunnel->parms.o_flags & TUNNEL_CSUM) || 794 tunnel->encap.type == TUNNEL_ENCAP_NONE) { 795 dev->features |= NETIF_F_GSO_SOFTWARE; 796 dev->hw_features |= NETIF_F_GSO_SOFTWARE; 797 } 798 dev->features |= NETIF_F_LLTX; 799 } 800 } 801 802 static int ipgre_tunnel_ioctl(struct net_device *dev, 803 struct ifreq *ifr, int cmd) 804 { 805 struct ip_tunnel_parm p; 806 int err; 807 808 if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) 809 return -EFAULT; 810 811 if (cmd == SIOCADDTUNNEL || cmd == SIOCCHGTUNNEL) { 812 if (p.iph.version != 4 || p.iph.protocol != IPPROTO_GRE || 813 p.iph.ihl != 5 || (p.iph.frag_off & htons(~IP_DF)) || 814 ((p.i_flags | p.o_flags) & (GRE_VERSION | GRE_ROUTING))) 815 return -EINVAL; 816 } 817 818 p.i_flags = gre_flags_to_tnl_flags(p.i_flags); 819 p.o_flags = gre_flags_to_tnl_flags(p.o_flags); 820 821 err = ip_tunnel_ioctl(dev, &p, cmd); 822 if (err) 823 return err; 824 825 if (cmd == SIOCCHGTUNNEL) { 826 struct ip_tunnel *t = netdev_priv(dev); 827 828 t->parms.i_flags = p.i_flags; 829 t->parms.o_flags = p.o_flags; 830 831 if (strcmp(dev->rtnl_link_ops->kind, "erspan")) 832 ipgre_link_update(dev, true); 833 } 834 835 p.i_flags = gre_tnl_flags_to_gre_flags(p.i_flags); 836 p.o_flags = gre_tnl_flags_to_gre_flags(p.o_flags); 837 838 if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p))) 839 return -EFAULT; 840 841 return 0; 842 } 843 844 /* Nice toy. Unfortunately, useless in real life :-) 845 It allows to construct virtual multiprotocol broadcast "LAN" 846 over the Internet, provided multicast routing is tuned. 847 848 849 I have no idea was this bicycle invented before me, 850 so that I had to set ARPHRD_IPGRE to a random value. 851 I have an impression, that Cisco could make something similar, 852 but this feature is apparently missing in IOS<=11.2(8). 853 854 I set up 10.66.66/24 and fec0:6666:6666::0/96 as virtual networks 855 with broadcast 224.66.66.66. If you have access to mbone, play with me :-) 856 857 ping -t 255 224.66.66.66 858 859 If nobody answers, mbone does not work. 860 861 ip tunnel add Universe mode gre remote 224.66.66.66 local <Your_real_addr> ttl 255 862 ip addr add 10.66.66.<somewhat>/24 dev Universe 863 ifconfig Universe up 864 ifconfig Universe add fe80::<Your_real_addr>/10 865 ifconfig Universe add fec0:6666:6666::<Your_real_addr>/96 866 ftp 10.66.66.66 867 ... 868 ftp fec0:6666:6666::193.233.7.65 869 ... 870 */ 871 static int ipgre_header(struct sk_buff *skb, struct net_device *dev, 872 unsigned short type, 873 const void *daddr, const void *saddr, unsigned int len) 874 { 875 struct ip_tunnel *t = netdev_priv(dev); 876 struct iphdr *iph; 877 struct gre_base_hdr *greh; 878 879 iph = skb_push(skb, t->hlen + sizeof(*iph)); 880 greh = (struct gre_base_hdr *)(iph+1); 881 greh->flags = gre_tnl_flags_to_gre_flags(t->parms.o_flags); 882 greh->protocol = htons(type); 883 884 memcpy(iph, &t->parms.iph, sizeof(struct iphdr)); 885 886 /* Set the source hardware address. */ 887 if (saddr) 888 memcpy(&iph->saddr, saddr, 4); 889 if (daddr) 890 memcpy(&iph->daddr, daddr, 4); 891 if (iph->daddr) 892 return t->hlen + sizeof(*iph); 893 894 return -(t->hlen + sizeof(*iph)); 895 } 896 897 static int ipgre_header_parse(const struct sk_buff *skb, unsigned char *haddr) 898 { 899 const struct iphdr *iph = (const struct iphdr *) skb_mac_header(skb); 900 memcpy(haddr, &iph->saddr, 4); 901 return 4; 902 } 903 904 static const struct header_ops ipgre_header_ops = { 905 .create = ipgre_header, 906 .parse = ipgre_header_parse, 907 }; 908 909 #ifdef CONFIG_NET_IPGRE_BROADCAST 910 static int ipgre_open(struct net_device *dev) 911 { 912 struct ip_tunnel *t = netdev_priv(dev); 913 914 if (ipv4_is_multicast(t->parms.iph.daddr)) { 915 struct flowi4 fl4; 916 struct rtable *rt; 917 918 rt = ip_route_output_gre(t->net, &fl4, 919 t->parms.iph.daddr, 920 t->parms.iph.saddr, 921 t->parms.o_key, 922 RT_TOS(t->parms.iph.tos), 923 t->parms.link); 924 if (IS_ERR(rt)) 925 return -EADDRNOTAVAIL; 926 dev = rt->dst.dev; 927 ip_rt_put(rt); 928 if (!__in_dev_get_rtnl(dev)) 929 return -EADDRNOTAVAIL; 930 t->mlink = dev->ifindex; 931 ip_mc_inc_group(__in_dev_get_rtnl(dev), t->parms.iph.daddr); 932 } 933 return 0; 934 } 935 936 static int ipgre_close(struct net_device *dev) 937 { 938 struct ip_tunnel *t = netdev_priv(dev); 939 940 if (ipv4_is_multicast(t->parms.iph.daddr) && t->mlink) { 941 struct in_device *in_dev; 942 in_dev = inetdev_by_index(t->net, t->mlink); 943 if (in_dev) 944 ip_mc_dec_group(in_dev, t->parms.iph.daddr); 945 } 946 return 0; 947 } 948 #endif 949 950 static const struct net_device_ops ipgre_netdev_ops = { 951 .ndo_init = ipgre_tunnel_init, 952 .ndo_uninit = ip_tunnel_uninit, 953 #ifdef CONFIG_NET_IPGRE_BROADCAST 954 .ndo_open = ipgre_open, 955 .ndo_stop = ipgre_close, 956 #endif 957 .ndo_start_xmit = ipgre_xmit, 958 .ndo_do_ioctl = ipgre_tunnel_ioctl, 959 .ndo_change_mtu = ip_tunnel_change_mtu, 960 .ndo_get_stats64 = ip_tunnel_get_stats64, 961 .ndo_get_iflink = ip_tunnel_get_iflink, 962 }; 963 964 #define GRE_FEATURES (NETIF_F_SG | \ 965 NETIF_F_FRAGLIST | \ 966 NETIF_F_HIGHDMA | \ 967 NETIF_F_HW_CSUM) 968 969 static void ipgre_tunnel_setup(struct net_device *dev) 970 { 971 dev->netdev_ops = &ipgre_netdev_ops; 972 dev->type = ARPHRD_IPGRE; 973 ip_tunnel_setup(dev, ipgre_net_id); 974 } 975 976 static void __gre_tunnel_init(struct net_device *dev) 977 { 978 struct ip_tunnel *tunnel; 979 int t_hlen; 980 981 tunnel = netdev_priv(dev); 982 tunnel->tun_hlen = gre_calc_hlen(tunnel->parms.o_flags); 983 tunnel->parms.iph.protocol = IPPROTO_GRE; 984 985 tunnel->hlen = tunnel->tun_hlen + tunnel->encap_hlen; 986 987 t_hlen = tunnel->hlen + sizeof(struct iphdr); 988 989 dev->needed_headroom = LL_MAX_HEADER + t_hlen + 4; 990 dev->mtu = ETH_DATA_LEN - t_hlen - 4; 991 992 dev->features |= GRE_FEATURES; 993 dev->hw_features |= GRE_FEATURES; 994 995 if (!(tunnel->parms.o_flags & TUNNEL_SEQ)) { 996 /* TCP offload with GRE SEQ is not supported, nor 997 * can we support 2 levels of outer headers requiring 998 * an update. 999 */ 1000 if (!(tunnel->parms.o_flags & TUNNEL_CSUM) || 1001 (tunnel->encap.type == TUNNEL_ENCAP_NONE)) { 1002 dev->features |= NETIF_F_GSO_SOFTWARE; 1003 dev->hw_features |= NETIF_F_GSO_SOFTWARE; 1004 } 1005 1006 /* Can use a lockless transmit, unless we generate 1007 * output sequences 1008 */ 1009 dev->features |= NETIF_F_LLTX; 1010 } 1011 } 1012 1013 static int ipgre_tunnel_init(struct net_device *dev) 1014 { 1015 struct ip_tunnel *tunnel = netdev_priv(dev); 1016 struct iphdr *iph = &tunnel->parms.iph; 1017 1018 __gre_tunnel_init(dev); 1019 1020 memcpy(dev->dev_addr, &iph->saddr, 4); 1021 memcpy(dev->broadcast, &iph->daddr, 4); 1022 1023 dev->flags = IFF_NOARP; 1024 netif_keep_dst(dev); 1025 dev->addr_len = 4; 1026 1027 if (iph->daddr && !tunnel->collect_md) { 1028 #ifdef CONFIG_NET_IPGRE_BROADCAST 1029 if (ipv4_is_multicast(iph->daddr)) { 1030 if (!iph->saddr) 1031 return -EINVAL; 1032 dev->flags = IFF_BROADCAST; 1033 dev->header_ops = &ipgre_header_ops; 1034 } 1035 #endif 1036 } else if (!tunnel->collect_md) { 1037 dev->header_ops = &ipgre_header_ops; 1038 } 1039 1040 return ip_tunnel_init(dev); 1041 } 1042 1043 static const struct gre_protocol ipgre_protocol = { 1044 .handler = gre_rcv, 1045 .err_handler = gre_err, 1046 }; 1047 1048 static int __net_init ipgre_init_net(struct net *net) 1049 { 1050 return ip_tunnel_init_net(net, ipgre_net_id, &ipgre_link_ops, NULL); 1051 } 1052 1053 static void __net_exit ipgre_exit_batch_net(struct list_head *list_net) 1054 { 1055 ip_tunnel_delete_nets(list_net, ipgre_net_id, &ipgre_link_ops); 1056 } 1057 1058 static struct pernet_operations ipgre_net_ops = { 1059 .init = ipgre_init_net, 1060 .exit_batch = ipgre_exit_batch_net, 1061 .id = &ipgre_net_id, 1062 .size = sizeof(struct ip_tunnel_net), 1063 }; 1064 1065 static int ipgre_tunnel_validate(struct nlattr *tb[], struct nlattr *data[], 1066 struct netlink_ext_ack *extack) 1067 { 1068 __be16 flags; 1069 1070 if (!data) 1071 return 0; 1072 1073 flags = 0; 1074 if (data[IFLA_GRE_IFLAGS]) 1075 flags |= nla_get_be16(data[IFLA_GRE_IFLAGS]); 1076 if (data[IFLA_GRE_OFLAGS]) 1077 flags |= nla_get_be16(data[IFLA_GRE_OFLAGS]); 1078 if (flags & (GRE_VERSION|GRE_ROUTING)) 1079 return -EINVAL; 1080 1081 if (data[IFLA_GRE_COLLECT_METADATA] && 1082 data[IFLA_GRE_ENCAP_TYPE] && 1083 nla_get_u16(data[IFLA_GRE_ENCAP_TYPE]) != TUNNEL_ENCAP_NONE) 1084 return -EINVAL; 1085 1086 return 0; 1087 } 1088 1089 static int ipgre_tap_validate(struct nlattr *tb[], struct nlattr *data[], 1090 struct netlink_ext_ack *extack) 1091 { 1092 __be32 daddr; 1093 1094 if (tb[IFLA_ADDRESS]) { 1095 if (nla_len(tb[IFLA_ADDRESS]) != ETH_ALEN) 1096 return -EINVAL; 1097 if (!is_valid_ether_addr(nla_data(tb[IFLA_ADDRESS]))) 1098 return -EADDRNOTAVAIL; 1099 } 1100 1101 if (!data) 1102 goto out; 1103 1104 if (data[IFLA_GRE_REMOTE]) { 1105 memcpy(&daddr, nla_data(data[IFLA_GRE_REMOTE]), 4); 1106 if (!daddr) 1107 return -EINVAL; 1108 } 1109 1110 out: 1111 return ipgre_tunnel_validate(tb, data, extack); 1112 } 1113 1114 static int erspan_validate(struct nlattr *tb[], struct nlattr *data[], 1115 struct netlink_ext_ack *extack) 1116 { 1117 __be16 flags = 0; 1118 int ret; 1119 1120 if (!data) 1121 return 0; 1122 1123 ret = ipgre_tap_validate(tb, data, extack); 1124 if (ret) 1125 return ret; 1126 1127 /* ERSPAN should only have GRE sequence and key flag */ 1128 if (data[IFLA_GRE_OFLAGS]) 1129 flags |= nla_get_be16(data[IFLA_GRE_OFLAGS]); 1130 if (data[IFLA_GRE_IFLAGS]) 1131 flags |= nla_get_be16(data[IFLA_GRE_IFLAGS]); 1132 if (!data[IFLA_GRE_COLLECT_METADATA] && 1133 flags != (GRE_SEQ | GRE_KEY)) 1134 return -EINVAL; 1135 1136 /* ERSPAN Session ID only has 10-bit. Since we reuse 1137 * 32-bit key field as ID, check it's range. 1138 */ 1139 if (data[IFLA_GRE_IKEY] && 1140 (ntohl(nla_get_be32(data[IFLA_GRE_IKEY])) & ~ID_MASK)) 1141 return -EINVAL; 1142 1143 if (data[IFLA_GRE_OKEY] && 1144 (ntohl(nla_get_be32(data[IFLA_GRE_OKEY])) & ~ID_MASK)) 1145 return -EINVAL; 1146 1147 return 0; 1148 } 1149 1150 static int ipgre_netlink_parms(struct net_device *dev, 1151 struct nlattr *data[], 1152 struct nlattr *tb[], 1153 struct ip_tunnel_parm *parms, 1154 __u32 *fwmark) 1155 { 1156 struct ip_tunnel *t = netdev_priv(dev); 1157 1158 memset(parms, 0, sizeof(*parms)); 1159 1160 parms->iph.protocol = IPPROTO_GRE; 1161 1162 if (!data) 1163 return 0; 1164 1165 if (data[IFLA_GRE_LINK]) 1166 parms->link = nla_get_u32(data[IFLA_GRE_LINK]); 1167 1168 if (data[IFLA_GRE_IFLAGS]) 1169 parms->i_flags = gre_flags_to_tnl_flags(nla_get_be16(data[IFLA_GRE_IFLAGS])); 1170 1171 if (data[IFLA_GRE_OFLAGS]) 1172 parms->o_flags = gre_flags_to_tnl_flags(nla_get_be16(data[IFLA_GRE_OFLAGS])); 1173 1174 if (data[IFLA_GRE_IKEY]) 1175 parms->i_key = nla_get_be32(data[IFLA_GRE_IKEY]); 1176 1177 if (data[IFLA_GRE_OKEY]) 1178 parms->o_key = nla_get_be32(data[IFLA_GRE_OKEY]); 1179 1180 if (data[IFLA_GRE_LOCAL]) 1181 parms->iph.saddr = nla_get_in_addr(data[IFLA_GRE_LOCAL]); 1182 1183 if (data[IFLA_GRE_REMOTE]) 1184 parms->iph.daddr = nla_get_in_addr(data[IFLA_GRE_REMOTE]); 1185 1186 if (data[IFLA_GRE_TTL]) 1187 parms->iph.ttl = nla_get_u8(data[IFLA_GRE_TTL]); 1188 1189 if (data[IFLA_GRE_TOS]) 1190 parms->iph.tos = nla_get_u8(data[IFLA_GRE_TOS]); 1191 1192 if (!data[IFLA_GRE_PMTUDISC] || nla_get_u8(data[IFLA_GRE_PMTUDISC])) { 1193 if (t->ignore_df) 1194 return -EINVAL; 1195 parms->iph.frag_off = htons(IP_DF); 1196 } 1197 1198 if (data[IFLA_GRE_COLLECT_METADATA]) { 1199 t->collect_md = true; 1200 if (dev->type == ARPHRD_IPGRE) 1201 dev->type = ARPHRD_NONE; 1202 } 1203 1204 if (data[IFLA_GRE_IGNORE_DF]) { 1205 if (nla_get_u8(data[IFLA_GRE_IGNORE_DF]) 1206 && (parms->iph.frag_off & htons(IP_DF))) 1207 return -EINVAL; 1208 t->ignore_df = !!nla_get_u8(data[IFLA_GRE_IGNORE_DF]); 1209 } 1210 1211 if (data[IFLA_GRE_FWMARK]) 1212 *fwmark = nla_get_u32(data[IFLA_GRE_FWMARK]); 1213 1214 if (data[IFLA_GRE_ERSPAN_VER]) { 1215 t->erspan_ver = nla_get_u8(data[IFLA_GRE_ERSPAN_VER]); 1216 1217 if (t->erspan_ver != 1 && t->erspan_ver != 2) 1218 return -EINVAL; 1219 } 1220 1221 if (t->erspan_ver == 1) { 1222 if (data[IFLA_GRE_ERSPAN_INDEX]) { 1223 t->index = nla_get_u32(data[IFLA_GRE_ERSPAN_INDEX]); 1224 if (t->index & ~INDEX_MASK) 1225 return -EINVAL; 1226 } 1227 } else if (t->erspan_ver == 2) { 1228 if (data[IFLA_GRE_ERSPAN_DIR]) { 1229 t->dir = nla_get_u8(data[IFLA_GRE_ERSPAN_DIR]); 1230 if (t->dir & ~(DIR_MASK >> DIR_OFFSET)) 1231 return -EINVAL; 1232 } 1233 if (data[IFLA_GRE_ERSPAN_HWID]) { 1234 t->hwid = nla_get_u16(data[IFLA_GRE_ERSPAN_HWID]); 1235 if (t->hwid & ~(HWID_MASK >> HWID_OFFSET)) 1236 return -EINVAL; 1237 } 1238 } 1239 1240 return 0; 1241 } 1242 1243 /* This function returns true when ENCAP attributes are present in the nl msg */ 1244 static bool ipgre_netlink_encap_parms(struct nlattr *data[], 1245 struct ip_tunnel_encap *ipencap) 1246 { 1247 bool ret = false; 1248 1249 memset(ipencap, 0, sizeof(*ipencap)); 1250 1251 if (!data) 1252 return ret; 1253 1254 if (data[IFLA_GRE_ENCAP_TYPE]) { 1255 ret = true; 1256 ipencap->type = nla_get_u16(data[IFLA_GRE_ENCAP_TYPE]); 1257 } 1258 1259 if (data[IFLA_GRE_ENCAP_FLAGS]) { 1260 ret = true; 1261 ipencap->flags = nla_get_u16(data[IFLA_GRE_ENCAP_FLAGS]); 1262 } 1263 1264 if (data[IFLA_GRE_ENCAP_SPORT]) { 1265 ret = true; 1266 ipencap->sport = nla_get_be16(data[IFLA_GRE_ENCAP_SPORT]); 1267 } 1268 1269 if (data[IFLA_GRE_ENCAP_DPORT]) { 1270 ret = true; 1271 ipencap->dport = nla_get_be16(data[IFLA_GRE_ENCAP_DPORT]); 1272 } 1273 1274 return ret; 1275 } 1276 1277 static int gre_tap_init(struct net_device *dev) 1278 { 1279 __gre_tunnel_init(dev); 1280 dev->priv_flags |= IFF_LIVE_ADDR_CHANGE; 1281 netif_keep_dst(dev); 1282 1283 return ip_tunnel_init(dev); 1284 } 1285 1286 static const struct net_device_ops gre_tap_netdev_ops = { 1287 .ndo_init = gre_tap_init, 1288 .ndo_uninit = ip_tunnel_uninit, 1289 .ndo_start_xmit = gre_tap_xmit, 1290 .ndo_set_mac_address = eth_mac_addr, 1291 .ndo_validate_addr = eth_validate_addr, 1292 .ndo_change_mtu = ip_tunnel_change_mtu, 1293 .ndo_get_stats64 = ip_tunnel_get_stats64, 1294 .ndo_get_iflink = ip_tunnel_get_iflink, 1295 .ndo_fill_metadata_dst = gre_fill_metadata_dst, 1296 }; 1297 1298 static int erspan_tunnel_init(struct net_device *dev) 1299 { 1300 struct ip_tunnel *tunnel = netdev_priv(dev); 1301 int t_hlen; 1302 1303 tunnel->tun_hlen = 8; 1304 tunnel->parms.iph.protocol = IPPROTO_GRE; 1305 tunnel->hlen = tunnel->tun_hlen + tunnel->encap_hlen + 1306 erspan_hdr_len(tunnel->erspan_ver); 1307 t_hlen = tunnel->hlen + sizeof(struct iphdr); 1308 1309 dev->needed_headroom = LL_MAX_HEADER + t_hlen + 4; 1310 dev->mtu = ETH_DATA_LEN - t_hlen - 4; 1311 dev->features |= GRE_FEATURES; 1312 dev->hw_features |= GRE_FEATURES; 1313 dev->priv_flags |= IFF_LIVE_ADDR_CHANGE; 1314 netif_keep_dst(dev); 1315 1316 return ip_tunnel_init(dev); 1317 } 1318 1319 static const struct net_device_ops erspan_netdev_ops = { 1320 .ndo_init = erspan_tunnel_init, 1321 .ndo_uninit = ip_tunnel_uninit, 1322 .ndo_start_xmit = erspan_xmit, 1323 .ndo_set_mac_address = eth_mac_addr, 1324 .ndo_validate_addr = eth_validate_addr, 1325 .ndo_change_mtu = ip_tunnel_change_mtu, 1326 .ndo_get_stats64 = ip_tunnel_get_stats64, 1327 .ndo_get_iflink = ip_tunnel_get_iflink, 1328 .ndo_fill_metadata_dst = gre_fill_metadata_dst, 1329 }; 1330 1331 static void ipgre_tap_setup(struct net_device *dev) 1332 { 1333 ether_setup(dev); 1334 dev->max_mtu = 0; 1335 dev->netdev_ops = &gre_tap_netdev_ops; 1336 dev->priv_flags &= ~IFF_TX_SKB_SHARING; 1337 dev->priv_flags |= IFF_LIVE_ADDR_CHANGE; 1338 ip_tunnel_setup(dev, gre_tap_net_id); 1339 } 1340 1341 static int ipgre_newlink(struct net *src_net, struct net_device *dev, 1342 struct nlattr *tb[], struct nlattr *data[], 1343 struct netlink_ext_ack *extack) 1344 { 1345 struct ip_tunnel_parm p; 1346 struct ip_tunnel_encap ipencap; 1347 __u32 fwmark = 0; 1348 int err; 1349 1350 if (ipgre_netlink_encap_parms(data, &ipencap)) { 1351 struct ip_tunnel *t = netdev_priv(dev); 1352 err = ip_tunnel_encap_setup(t, &ipencap); 1353 1354 if (err < 0) 1355 return err; 1356 } 1357 1358 err = ipgre_netlink_parms(dev, data, tb, &p, &fwmark); 1359 if (err < 0) 1360 return err; 1361 return ip_tunnel_newlink(dev, tb, &p, fwmark); 1362 } 1363 1364 static int ipgre_changelink(struct net_device *dev, struct nlattr *tb[], 1365 struct nlattr *data[], 1366 struct netlink_ext_ack *extack) 1367 { 1368 struct ip_tunnel *t = netdev_priv(dev); 1369 struct ip_tunnel_encap ipencap; 1370 __u32 fwmark = t->fwmark; 1371 struct ip_tunnel_parm p; 1372 int err; 1373 1374 if (ipgre_netlink_encap_parms(data, &ipencap)) { 1375 err = ip_tunnel_encap_setup(t, &ipencap); 1376 1377 if (err < 0) 1378 return err; 1379 } 1380 1381 err = ipgre_netlink_parms(dev, data, tb, &p, &fwmark); 1382 if (err < 0) 1383 return err; 1384 1385 err = ip_tunnel_changelink(dev, tb, &p, fwmark); 1386 if (err < 0) 1387 return err; 1388 1389 t->parms.i_flags = p.i_flags; 1390 t->parms.o_flags = p.o_flags; 1391 1392 if (strcmp(dev->rtnl_link_ops->kind, "erspan")) 1393 ipgre_link_update(dev, !tb[IFLA_MTU]); 1394 1395 return 0; 1396 } 1397 1398 static size_t ipgre_get_size(const struct net_device *dev) 1399 { 1400 return 1401 /* IFLA_GRE_LINK */ 1402 nla_total_size(4) + 1403 /* IFLA_GRE_IFLAGS */ 1404 nla_total_size(2) + 1405 /* IFLA_GRE_OFLAGS */ 1406 nla_total_size(2) + 1407 /* IFLA_GRE_IKEY */ 1408 nla_total_size(4) + 1409 /* IFLA_GRE_OKEY */ 1410 nla_total_size(4) + 1411 /* IFLA_GRE_LOCAL */ 1412 nla_total_size(4) + 1413 /* IFLA_GRE_REMOTE */ 1414 nla_total_size(4) + 1415 /* IFLA_GRE_TTL */ 1416 nla_total_size(1) + 1417 /* IFLA_GRE_TOS */ 1418 nla_total_size(1) + 1419 /* IFLA_GRE_PMTUDISC */ 1420 nla_total_size(1) + 1421 /* IFLA_GRE_ENCAP_TYPE */ 1422 nla_total_size(2) + 1423 /* IFLA_GRE_ENCAP_FLAGS */ 1424 nla_total_size(2) + 1425 /* IFLA_GRE_ENCAP_SPORT */ 1426 nla_total_size(2) + 1427 /* IFLA_GRE_ENCAP_DPORT */ 1428 nla_total_size(2) + 1429 /* IFLA_GRE_COLLECT_METADATA */ 1430 nla_total_size(0) + 1431 /* IFLA_GRE_IGNORE_DF */ 1432 nla_total_size(1) + 1433 /* IFLA_GRE_FWMARK */ 1434 nla_total_size(4) + 1435 /* IFLA_GRE_ERSPAN_INDEX */ 1436 nla_total_size(4) + 1437 /* IFLA_GRE_ERSPAN_VER */ 1438 nla_total_size(1) + 1439 /* IFLA_GRE_ERSPAN_DIR */ 1440 nla_total_size(1) + 1441 /* IFLA_GRE_ERSPAN_HWID */ 1442 nla_total_size(2) + 1443 0; 1444 } 1445 1446 static int ipgre_fill_info(struct sk_buff *skb, const struct net_device *dev) 1447 { 1448 struct ip_tunnel *t = netdev_priv(dev); 1449 struct ip_tunnel_parm *p = &t->parms; 1450 1451 if (nla_put_u32(skb, IFLA_GRE_LINK, p->link) || 1452 nla_put_be16(skb, IFLA_GRE_IFLAGS, 1453 gre_tnl_flags_to_gre_flags(p->i_flags)) || 1454 nla_put_be16(skb, IFLA_GRE_OFLAGS, 1455 gre_tnl_flags_to_gre_flags(p->o_flags)) || 1456 nla_put_be32(skb, IFLA_GRE_IKEY, p->i_key) || 1457 nla_put_be32(skb, IFLA_GRE_OKEY, p->o_key) || 1458 nla_put_in_addr(skb, IFLA_GRE_LOCAL, p->iph.saddr) || 1459 nla_put_in_addr(skb, IFLA_GRE_REMOTE, p->iph.daddr) || 1460 nla_put_u8(skb, IFLA_GRE_TTL, p->iph.ttl) || 1461 nla_put_u8(skb, IFLA_GRE_TOS, p->iph.tos) || 1462 nla_put_u8(skb, IFLA_GRE_PMTUDISC, 1463 !!(p->iph.frag_off & htons(IP_DF))) || 1464 nla_put_u32(skb, IFLA_GRE_FWMARK, t->fwmark)) 1465 goto nla_put_failure; 1466 1467 if (nla_put_u16(skb, IFLA_GRE_ENCAP_TYPE, 1468 t->encap.type) || 1469 nla_put_be16(skb, IFLA_GRE_ENCAP_SPORT, 1470 t->encap.sport) || 1471 nla_put_be16(skb, IFLA_GRE_ENCAP_DPORT, 1472 t->encap.dport) || 1473 nla_put_u16(skb, IFLA_GRE_ENCAP_FLAGS, 1474 t->encap.flags)) 1475 goto nla_put_failure; 1476 1477 if (nla_put_u8(skb, IFLA_GRE_IGNORE_DF, t->ignore_df)) 1478 goto nla_put_failure; 1479 1480 if (t->collect_md) { 1481 if (nla_put_flag(skb, IFLA_GRE_COLLECT_METADATA)) 1482 goto nla_put_failure; 1483 } 1484 1485 if (nla_put_u8(skb, IFLA_GRE_ERSPAN_VER, t->erspan_ver)) 1486 goto nla_put_failure; 1487 1488 if (t->erspan_ver == 1) { 1489 if (nla_put_u32(skb, IFLA_GRE_ERSPAN_INDEX, t->index)) 1490 goto nla_put_failure; 1491 } else if (t->erspan_ver == 2) { 1492 if (nla_put_u8(skb, IFLA_GRE_ERSPAN_DIR, t->dir)) 1493 goto nla_put_failure; 1494 if (nla_put_u16(skb, IFLA_GRE_ERSPAN_HWID, t->hwid)) 1495 goto nla_put_failure; 1496 } 1497 1498 return 0; 1499 1500 nla_put_failure: 1501 return -EMSGSIZE; 1502 } 1503 1504 static void erspan_setup(struct net_device *dev) 1505 { 1506 ether_setup(dev); 1507 dev->netdev_ops = &erspan_netdev_ops; 1508 dev->priv_flags &= ~IFF_TX_SKB_SHARING; 1509 dev->priv_flags |= IFF_LIVE_ADDR_CHANGE; 1510 ip_tunnel_setup(dev, erspan_net_id); 1511 } 1512 1513 static const struct nla_policy ipgre_policy[IFLA_GRE_MAX + 1] = { 1514 [IFLA_GRE_LINK] = { .type = NLA_U32 }, 1515 [IFLA_GRE_IFLAGS] = { .type = NLA_U16 }, 1516 [IFLA_GRE_OFLAGS] = { .type = NLA_U16 }, 1517 [IFLA_GRE_IKEY] = { .type = NLA_U32 }, 1518 [IFLA_GRE_OKEY] = { .type = NLA_U32 }, 1519 [IFLA_GRE_LOCAL] = { .len = FIELD_SIZEOF(struct iphdr, saddr) }, 1520 [IFLA_GRE_REMOTE] = { .len = FIELD_SIZEOF(struct iphdr, daddr) }, 1521 [IFLA_GRE_TTL] = { .type = NLA_U8 }, 1522 [IFLA_GRE_TOS] = { .type = NLA_U8 }, 1523 [IFLA_GRE_PMTUDISC] = { .type = NLA_U8 }, 1524 [IFLA_GRE_ENCAP_TYPE] = { .type = NLA_U16 }, 1525 [IFLA_GRE_ENCAP_FLAGS] = { .type = NLA_U16 }, 1526 [IFLA_GRE_ENCAP_SPORT] = { .type = NLA_U16 }, 1527 [IFLA_GRE_ENCAP_DPORT] = { .type = NLA_U16 }, 1528 [IFLA_GRE_COLLECT_METADATA] = { .type = NLA_FLAG }, 1529 [IFLA_GRE_IGNORE_DF] = { .type = NLA_U8 }, 1530 [IFLA_GRE_FWMARK] = { .type = NLA_U32 }, 1531 [IFLA_GRE_ERSPAN_INDEX] = { .type = NLA_U32 }, 1532 [IFLA_GRE_ERSPAN_VER] = { .type = NLA_U8 }, 1533 [IFLA_GRE_ERSPAN_DIR] = { .type = NLA_U8 }, 1534 [IFLA_GRE_ERSPAN_HWID] = { .type = NLA_U16 }, 1535 }; 1536 1537 static struct rtnl_link_ops ipgre_link_ops __read_mostly = { 1538 .kind = "gre", 1539 .maxtype = IFLA_GRE_MAX, 1540 .policy = ipgre_policy, 1541 .priv_size = sizeof(struct ip_tunnel), 1542 .setup = ipgre_tunnel_setup, 1543 .validate = ipgre_tunnel_validate, 1544 .newlink = ipgre_newlink, 1545 .changelink = ipgre_changelink, 1546 .dellink = ip_tunnel_dellink, 1547 .get_size = ipgre_get_size, 1548 .fill_info = ipgre_fill_info, 1549 .get_link_net = ip_tunnel_get_link_net, 1550 }; 1551 1552 static struct rtnl_link_ops ipgre_tap_ops __read_mostly = { 1553 .kind = "gretap", 1554 .maxtype = IFLA_GRE_MAX, 1555 .policy = ipgre_policy, 1556 .priv_size = sizeof(struct ip_tunnel), 1557 .setup = ipgre_tap_setup, 1558 .validate = ipgre_tap_validate, 1559 .newlink = ipgre_newlink, 1560 .changelink = ipgre_changelink, 1561 .dellink = ip_tunnel_dellink, 1562 .get_size = ipgre_get_size, 1563 .fill_info = ipgre_fill_info, 1564 .get_link_net = ip_tunnel_get_link_net, 1565 }; 1566 1567 static struct rtnl_link_ops erspan_link_ops __read_mostly = { 1568 .kind = "erspan", 1569 .maxtype = IFLA_GRE_MAX, 1570 .policy = ipgre_policy, 1571 .priv_size = sizeof(struct ip_tunnel), 1572 .setup = erspan_setup, 1573 .validate = erspan_validate, 1574 .newlink = ipgre_newlink, 1575 .changelink = ipgre_changelink, 1576 .dellink = ip_tunnel_dellink, 1577 .get_size = ipgre_get_size, 1578 .fill_info = ipgre_fill_info, 1579 .get_link_net = ip_tunnel_get_link_net, 1580 }; 1581 1582 struct net_device *gretap_fb_dev_create(struct net *net, const char *name, 1583 u8 name_assign_type) 1584 { 1585 struct nlattr *tb[IFLA_MAX + 1]; 1586 struct net_device *dev; 1587 LIST_HEAD(list_kill); 1588 struct ip_tunnel *t; 1589 int err; 1590 1591 memset(&tb, 0, sizeof(tb)); 1592 1593 dev = rtnl_create_link(net, name, name_assign_type, 1594 &ipgre_tap_ops, tb); 1595 if (IS_ERR(dev)) 1596 return dev; 1597 1598 /* Configure flow based GRE device. */ 1599 t = netdev_priv(dev); 1600 t->collect_md = true; 1601 1602 err = ipgre_newlink(net, dev, tb, NULL, NULL); 1603 if (err < 0) { 1604 free_netdev(dev); 1605 return ERR_PTR(err); 1606 } 1607 1608 /* openvswitch users expect packet sizes to be unrestricted, 1609 * so set the largest MTU we can. 1610 */ 1611 err = __ip_tunnel_change_mtu(dev, IP_MAX_MTU, false); 1612 if (err) 1613 goto out; 1614 1615 err = rtnl_configure_link(dev, NULL); 1616 if (err < 0) 1617 goto out; 1618 1619 return dev; 1620 out: 1621 ip_tunnel_dellink(dev, &list_kill); 1622 unregister_netdevice_many(&list_kill); 1623 return ERR_PTR(err); 1624 } 1625 EXPORT_SYMBOL_GPL(gretap_fb_dev_create); 1626 1627 static int __net_init ipgre_tap_init_net(struct net *net) 1628 { 1629 return ip_tunnel_init_net(net, gre_tap_net_id, &ipgre_tap_ops, "gretap0"); 1630 } 1631 1632 static void __net_exit ipgre_tap_exit_batch_net(struct list_head *list_net) 1633 { 1634 ip_tunnel_delete_nets(list_net, gre_tap_net_id, &ipgre_tap_ops); 1635 } 1636 1637 static struct pernet_operations ipgre_tap_net_ops = { 1638 .init = ipgre_tap_init_net, 1639 .exit_batch = ipgre_tap_exit_batch_net, 1640 .id = &gre_tap_net_id, 1641 .size = sizeof(struct ip_tunnel_net), 1642 }; 1643 1644 static int __net_init erspan_init_net(struct net *net) 1645 { 1646 return ip_tunnel_init_net(net, erspan_net_id, 1647 &erspan_link_ops, "erspan0"); 1648 } 1649 1650 static void __net_exit erspan_exit_batch_net(struct list_head *net_list) 1651 { 1652 ip_tunnel_delete_nets(net_list, erspan_net_id, &erspan_link_ops); 1653 } 1654 1655 static struct pernet_operations erspan_net_ops = { 1656 .init = erspan_init_net, 1657 .exit_batch = erspan_exit_batch_net, 1658 .id = &erspan_net_id, 1659 .size = sizeof(struct ip_tunnel_net), 1660 }; 1661 1662 static int __init ipgre_init(void) 1663 { 1664 int err; 1665 1666 pr_info("GRE over IPv4 tunneling driver\n"); 1667 1668 err = register_pernet_device(&ipgre_net_ops); 1669 if (err < 0) 1670 return err; 1671 1672 err = register_pernet_device(&ipgre_tap_net_ops); 1673 if (err < 0) 1674 goto pnet_tap_failed; 1675 1676 err = register_pernet_device(&erspan_net_ops); 1677 if (err < 0) 1678 goto pnet_erspan_failed; 1679 1680 err = gre_add_protocol(&ipgre_protocol, GREPROTO_CISCO); 1681 if (err < 0) { 1682 pr_info("%s: can't add protocol\n", __func__); 1683 goto add_proto_failed; 1684 } 1685 1686 err = rtnl_link_register(&ipgre_link_ops); 1687 if (err < 0) 1688 goto rtnl_link_failed; 1689 1690 err = rtnl_link_register(&ipgre_tap_ops); 1691 if (err < 0) 1692 goto tap_ops_failed; 1693 1694 err = rtnl_link_register(&erspan_link_ops); 1695 if (err < 0) 1696 goto erspan_link_failed; 1697 1698 return 0; 1699 1700 erspan_link_failed: 1701 rtnl_link_unregister(&ipgre_tap_ops); 1702 tap_ops_failed: 1703 rtnl_link_unregister(&ipgre_link_ops); 1704 rtnl_link_failed: 1705 gre_del_protocol(&ipgre_protocol, GREPROTO_CISCO); 1706 add_proto_failed: 1707 unregister_pernet_device(&erspan_net_ops); 1708 pnet_erspan_failed: 1709 unregister_pernet_device(&ipgre_tap_net_ops); 1710 pnet_tap_failed: 1711 unregister_pernet_device(&ipgre_net_ops); 1712 return err; 1713 } 1714 1715 static void __exit ipgre_fini(void) 1716 { 1717 rtnl_link_unregister(&ipgre_tap_ops); 1718 rtnl_link_unregister(&ipgre_link_ops); 1719 rtnl_link_unregister(&erspan_link_ops); 1720 gre_del_protocol(&ipgre_protocol, GREPROTO_CISCO); 1721 unregister_pernet_device(&ipgre_tap_net_ops); 1722 unregister_pernet_device(&ipgre_net_ops); 1723 unregister_pernet_device(&erspan_net_ops); 1724 } 1725 1726 module_init(ipgre_init); 1727 module_exit(ipgre_fini); 1728 MODULE_LICENSE("GPL"); 1729 MODULE_ALIAS_RTNL_LINK("gre"); 1730 MODULE_ALIAS_RTNL_LINK("gretap"); 1731 MODULE_ALIAS_RTNL_LINK("erspan"); 1732 MODULE_ALIAS_NETDEV("gre0"); 1733 MODULE_ALIAS_NETDEV("gretap0"); 1734 MODULE_ALIAS_NETDEV("erspan0"); 1735