1 /* 2 * Linux NET3: GRE over IP protocol decoder. 3 * 4 * Authors: Alexey Kuznetsov (kuznet@ms2.inr.ac.ru) 5 * 6 * This program is free software; you can redistribute it and/or 7 * modify it under the terms of the GNU General Public License 8 * as published by the Free Software Foundation; either version 9 * 2 of the License, or (at your option) any later version. 10 * 11 */ 12 13 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 14 15 #include <linux/capability.h> 16 #include <linux/module.h> 17 #include <linux/types.h> 18 #include <linux/kernel.h> 19 #include <linux/slab.h> 20 #include <linux/uaccess.h> 21 #include <linux/skbuff.h> 22 #include <linux/netdevice.h> 23 #include <linux/in.h> 24 #include <linux/tcp.h> 25 #include <linux/udp.h> 26 #include <linux/if_arp.h> 27 #include <linux/if_vlan.h> 28 #include <linux/init.h> 29 #include <linux/in6.h> 30 #include <linux/inetdevice.h> 31 #include <linux/igmp.h> 32 #include <linux/netfilter_ipv4.h> 33 #include <linux/etherdevice.h> 34 #include <linux/if_ether.h> 35 36 #include <net/sock.h> 37 #include <net/ip.h> 38 #include <net/icmp.h> 39 #include <net/protocol.h> 40 #include <net/ip_tunnels.h> 41 #include <net/arp.h> 42 #include <net/checksum.h> 43 #include <net/dsfield.h> 44 #include <net/inet_ecn.h> 45 #include <net/xfrm.h> 46 #include <net/net_namespace.h> 47 #include <net/netns/generic.h> 48 #include <net/rtnetlink.h> 49 #include <net/gre.h> 50 #include <net/dst_metadata.h> 51 #include <net/erspan.h> 52 53 /* 54 Problems & solutions 55 -------------------- 56 57 1. The most important issue is detecting local dead loops. 58 They would cause complete host lockup in transmit, which 59 would be "resolved" by stack overflow or, if queueing is enabled, 60 with infinite looping in net_bh. 61 62 We cannot track such dead loops during route installation, 63 it is infeasible task. The most general solutions would be 64 to keep skb->encapsulation counter (sort of local ttl), 65 and silently drop packet when it expires. It is a good 66 solution, but it supposes maintaining new variable in ALL 67 skb, even if no tunneling is used. 68 69 Current solution: xmit_recursion breaks dead loops. This is a percpu 70 counter, since when we enter the first ndo_xmit(), cpu migration is 71 forbidden. We force an exit if this counter reaches RECURSION_LIMIT 72 73 2. Networking dead loops would not kill routers, but would really 74 kill network. IP hop limit plays role of "t->recursion" in this case, 75 if we copy it from packet being encapsulated to upper header. 76 It is very good solution, but it introduces two problems: 77 78 - Routing protocols, using packets with ttl=1 (OSPF, RIP2), 79 do not work over tunnels. 80 - traceroute does not work. I planned to relay ICMP from tunnel, 81 so that this problem would be solved and traceroute output 82 would even more informative. This idea appeared to be wrong: 83 only Linux complies to rfc1812 now (yes, guys, Linux is the only 84 true router now :-)), all routers (at least, in neighbourhood of mine) 85 return only 8 bytes of payload. It is the end. 86 87 Hence, if we want that OSPF worked or traceroute said something reasonable, 88 we should search for another solution. 89 90 One of them is to parse packet trying to detect inner encapsulation 91 made by our node. It is difficult or even impossible, especially, 92 taking into account fragmentation. TO be short, ttl is not solution at all. 93 94 Current solution: The solution was UNEXPECTEDLY SIMPLE. 95 We force DF flag on tunnels with preconfigured hop limit, 96 that is ALL. :-) Well, it does not remove the problem completely, 97 but exponential growth of network traffic is changed to linear 98 (branches, that exceed pmtu are pruned) and tunnel mtu 99 rapidly degrades to value <68, where looping stops. 100 Yes, it is not good if there exists a router in the loop, 101 which does not force DF, even when encapsulating packets have DF set. 102 But it is not our problem! Nobody could accuse us, we made 103 all that we could make. Even if it is your gated who injected 104 fatal route to network, even if it were you who configured 105 fatal static route: you are innocent. :-) 106 107 Alexey Kuznetsov. 108 */ 109 110 static bool log_ecn_error = true; 111 module_param(log_ecn_error, bool, 0644); 112 MODULE_PARM_DESC(log_ecn_error, "Log packets received with corrupted ECN"); 113 114 static struct rtnl_link_ops ipgre_link_ops __read_mostly; 115 static int ipgre_tunnel_init(struct net_device *dev); 116 static void erspan_build_header(struct sk_buff *skb, 117 __be32 id, u32 index, bool truncate); 118 119 static unsigned int ipgre_net_id __read_mostly; 120 static unsigned int gre_tap_net_id __read_mostly; 121 static unsigned int erspan_net_id __read_mostly; 122 123 static void ipgre_err(struct sk_buff *skb, u32 info, 124 const struct tnl_ptk_info *tpi) 125 { 126 127 /* All the routers (except for Linux) return only 128 8 bytes of packet payload. It means, that precise relaying of 129 ICMP in the real Internet is absolutely infeasible. 130 131 Moreover, Cisco "wise men" put GRE key to the third word 132 in GRE header. It makes impossible maintaining even soft 133 state for keyed GRE tunnels with enabled checksum. Tell 134 them "thank you". 135 136 Well, I wonder, rfc1812 was written by Cisco employee, 137 what the hell these idiots break standards established 138 by themselves??? 139 */ 140 struct net *net = dev_net(skb->dev); 141 struct ip_tunnel_net *itn; 142 const struct iphdr *iph; 143 const int type = icmp_hdr(skb)->type; 144 const int code = icmp_hdr(skb)->code; 145 unsigned int data_len = 0; 146 struct ip_tunnel *t; 147 148 switch (type) { 149 default: 150 case ICMP_PARAMETERPROB: 151 return; 152 153 case ICMP_DEST_UNREACH: 154 switch (code) { 155 case ICMP_SR_FAILED: 156 case ICMP_PORT_UNREACH: 157 /* Impossible event. */ 158 return; 159 default: 160 /* All others are translated to HOST_UNREACH. 161 rfc2003 contains "deep thoughts" about NET_UNREACH, 162 I believe they are just ether pollution. --ANK 163 */ 164 break; 165 } 166 break; 167 168 case ICMP_TIME_EXCEEDED: 169 if (code != ICMP_EXC_TTL) 170 return; 171 data_len = icmp_hdr(skb)->un.reserved[1] * 4; /* RFC 4884 4.1 */ 172 break; 173 174 case ICMP_REDIRECT: 175 break; 176 } 177 178 if (tpi->proto == htons(ETH_P_TEB)) 179 itn = net_generic(net, gre_tap_net_id); 180 else 181 itn = net_generic(net, ipgre_net_id); 182 183 iph = (const struct iphdr *)(icmp_hdr(skb) + 1); 184 t = ip_tunnel_lookup(itn, skb->dev->ifindex, tpi->flags, 185 iph->daddr, iph->saddr, tpi->key); 186 187 if (!t) 188 return; 189 190 #if IS_ENABLED(CONFIG_IPV6) 191 if (tpi->proto == htons(ETH_P_IPV6) && 192 !ip6_err_gen_icmpv6_unreach(skb, iph->ihl * 4 + tpi->hdr_len, 193 type, data_len)) 194 return; 195 #endif 196 197 if (t->parms.iph.daddr == 0 || 198 ipv4_is_multicast(t->parms.iph.daddr)) 199 return; 200 201 if (t->parms.iph.ttl == 0 && type == ICMP_TIME_EXCEEDED) 202 return; 203 204 if (time_before(jiffies, t->err_time + IPTUNNEL_ERR_TIMEO)) 205 t->err_count++; 206 else 207 t->err_count = 1; 208 t->err_time = jiffies; 209 } 210 211 static void gre_err(struct sk_buff *skb, u32 info) 212 { 213 /* All the routers (except for Linux) return only 214 * 8 bytes of packet payload. It means, that precise relaying of 215 * ICMP in the real Internet is absolutely infeasible. 216 * 217 * Moreover, Cisco "wise men" put GRE key to the third word 218 * in GRE header. It makes impossible maintaining even soft 219 * state for keyed 220 * GRE tunnels with enabled checksum. Tell them "thank you". 221 * 222 * Well, I wonder, rfc1812 was written by Cisco employee, 223 * what the hell these idiots break standards established 224 * by themselves??? 225 */ 226 227 const struct iphdr *iph = (struct iphdr *)skb->data; 228 const int type = icmp_hdr(skb)->type; 229 const int code = icmp_hdr(skb)->code; 230 struct tnl_ptk_info tpi; 231 bool csum_err = false; 232 233 if (gre_parse_header(skb, &tpi, &csum_err, htons(ETH_P_IP), 234 iph->ihl * 4) < 0) { 235 if (!csum_err) /* ignore csum errors. */ 236 return; 237 } 238 239 if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED) { 240 ipv4_update_pmtu(skb, dev_net(skb->dev), info, 241 skb->dev->ifindex, 0, IPPROTO_GRE, 0); 242 return; 243 } 244 if (type == ICMP_REDIRECT) { 245 ipv4_redirect(skb, dev_net(skb->dev), skb->dev->ifindex, 0, 246 IPPROTO_GRE, 0); 247 return; 248 } 249 250 ipgre_err(skb, info, &tpi); 251 } 252 253 static int erspan_rcv(struct sk_buff *skb, struct tnl_ptk_info *tpi, 254 int gre_hdr_len) 255 { 256 struct net *net = dev_net(skb->dev); 257 struct metadata_dst *tun_dst = NULL; 258 struct ip_tunnel_net *itn; 259 struct ip_tunnel *tunnel; 260 struct erspanhdr *ershdr; 261 const struct iphdr *iph; 262 __be32 session_id; 263 __be32 index; 264 int len; 265 266 itn = net_generic(net, erspan_net_id); 267 len = gre_hdr_len + sizeof(*ershdr); 268 269 if (unlikely(!pskb_may_pull(skb, len))) 270 return -ENOMEM; 271 272 iph = ip_hdr(skb); 273 ershdr = (struct erspanhdr *)(skb->data + gre_hdr_len); 274 275 /* The original GRE header does not have key field, 276 * Use ERSPAN 10-bit session ID as key. 277 */ 278 session_id = cpu_to_be32(ntohs(ershdr->session_id)); 279 tpi->key = session_id; 280 index = ershdr->md.index; 281 tunnel = ip_tunnel_lookup(itn, skb->dev->ifindex, 282 tpi->flags | TUNNEL_KEY, 283 iph->saddr, iph->daddr, tpi->key); 284 285 if (tunnel) { 286 if (__iptunnel_pull_header(skb, 287 gre_hdr_len + sizeof(*ershdr), 288 htons(ETH_P_TEB), 289 false, false) < 0) 290 goto drop; 291 292 if (tunnel->collect_md) { 293 struct ip_tunnel_info *info; 294 struct erspan_metadata *md; 295 __be64 tun_id; 296 __be16 flags; 297 298 tpi->flags |= TUNNEL_KEY; 299 flags = tpi->flags; 300 tun_id = key32_to_tunnel_id(tpi->key); 301 302 tun_dst = ip_tun_rx_dst(skb, flags, 303 tun_id, sizeof(*md)); 304 if (!tun_dst) 305 return PACKET_REJECT; 306 307 md = ip_tunnel_info_opts(&tun_dst->u.tun_info); 308 if (!md) 309 return PACKET_REJECT; 310 311 md->index = index; 312 info = &tun_dst->u.tun_info; 313 info->key.tun_flags |= TUNNEL_ERSPAN_OPT; 314 info->options_len = sizeof(*md); 315 } else { 316 tunnel->index = ntohl(index); 317 } 318 319 skb_reset_mac_header(skb); 320 ip_tunnel_rcv(tunnel, skb, tpi, tun_dst, log_ecn_error); 321 return PACKET_RCVD; 322 } 323 drop: 324 kfree_skb(skb); 325 return PACKET_RCVD; 326 } 327 328 static int __ipgre_rcv(struct sk_buff *skb, const struct tnl_ptk_info *tpi, 329 struct ip_tunnel_net *itn, int hdr_len, bool raw_proto) 330 { 331 struct metadata_dst *tun_dst = NULL; 332 const struct iphdr *iph; 333 struct ip_tunnel *tunnel; 334 335 iph = ip_hdr(skb); 336 tunnel = ip_tunnel_lookup(itn, skb->dev->ifindex, tpi->flags, 337 iph->saddr, iph->daddr, tpi->key); 338 339 if (tunnel) { 340 if (__iptunnel_pull_header(skb, hdr_len, tpi->proto, 341 raw_proto, false) < 0) 342 goto drop; 343 344 if (tunnel->dev->type != ARPHRD_NONE) 345 skb_pop_mac_header(skb); 346 else 347 skb_reset_mac_header(skb); 348 if (tunnel->collect_md) { 349 __be16 flags; 350 __be64 tun_id; 351 352 flags = tpi->flags & (TUNNEL_CSUM | TUNNEL_KEY); 353 tun_id = key32_to_tunnel_id(tpi->key); 354 tun_dst = ip_tun_rx_dst(skb, flags, tun_id, 0); 355 if (!tun_dst) 356 return PACKET_REJECT; 357 } 358 359 ip_tunnel_rcv(tunnel, skb, tpi, tun_dst, log_ecn_error); 360 return PACKET_RCVD; 361 } 362 return PACKET_NEXT; 363 364 drop: 365 kfree_skb(skb); 366 return PACKET_RCVD; 367 } 368 369 static int ipgre_rcv(struct sk_buff *skb, const struct tnl_ptk_info *tpi, 370 int hdr_len) 371 { 372 struct net *net = dev_net(skb->dev); 373 struct ip_tunnel_net *itn; 374 int res; 375 376 if (tpi->proto == htons(ETH_P_TEB)) 377 itn = net_generic(net, gre_tap_net_id); 378 else 379 itn = net_generic(net, ipgre_net_id); 380 381 res = __ipgre_rcv(skb, tpi, itn, hdr_len, false); 382 if (res == PACKET_NEXT && tpi->proto == htons(ETH_P_TEB)) { 383 /* ipgre tunnels in collect metadata mode should receive 384 * also ETH_P_TEB traffic. 385 */ 386 itn = net_generic(net, ipgre_net_id); 387 res = __ipgre_rcv(skb, tpi, itn, hdr_len, true); 388 } 389 return res; 390 } 391 392 static int gre_rcv(struct sk_buff *skb) 393 { 394 struct tnl_ptk_info tpi; 395 bool csum_err = false; 396 int hdr_len; 397 398 #ifdef CONFIG_NET_IPGRE_BROADCAST 399 if (ipv4_is_multicast(ip_hdr(skb)->daddr)) { 400 /* Looped back packet, drop it! */ 401 if (rt_is_output_route(skb_rtable(skb))) 402 goto drop; 403 } 404 #endif 405 406 hdr_len = gre_parse_header(skb, &tpi, &csum_err, htons(ETH_P_IP), 0); 407 if (hdr_len < 0) 408 goto drop; 409 410 if (unlikely(tpi.proto == htons(ETH_P_ERSPAN))) { 411 if (erspan_rcv(skb, &tpi, hdr_len) == PACKET_RCVD) 412 return 0; 413 } 414 415 if (ipgre_rcv(skb, &tpi, hdr_len) == PACKET_RCVD) 416 return 0; 417 418 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0); 419 drop: 420 kfree_skb(skb); 421 return 0; 422 } 423 424 static void __gre_xmit(struct sk_buff *skb, struct net_device *dev, 425 const struct iphdr *tnl_params, 426 __be16 proto) 427 { 428 struct ip_tunnel *tunnel = netdev_priv(dev); 429 430 if (tunnel->parms.o_flags & TUNNEL_SEQ) 431 tunnel->o_seqno++; 432 433 /* Push GRE header. */ 434 gre_build_header(skb, tunnel->tun_hlen, 435 tunnel->parms.o_flags, proto, tunnel->parms.o_key, 436 htonl(tunnel->o_seqno)); 437 438 ip_tunnel_xmit(skb, dev, tnl_params, tnl_params->protocol); 439 } 440 441 static int gre_handle_offloads(struct sk_buff *skb, bool csum) 442 { 443 return iptunnel_handle_offloads(skb, csum ? SKB_GSO_GRE_CSUM : SKB_GSO_GRE); 444 } 445 446 static struct rtable *gre_get_rt(struct sk_buff *skb, 447 struct net_device *dev, 448 struct flowi4 *fl, 449 const struct ip_tunnel_key *key) 450 { 451 struct net *net = dev_net(dev); 452 453 memset(fl, 0, sizeof(*fl)); 454 fl->daddr = key->u.ipv4.dst; 455 fl->saddr = key->u.ipv4.src; 456 fl->flowi4_tos = RT_TOS(key->tos); 457 fl->flowi4_mark = skb->mark; 458 fl->flowi4_proto = IPPROTO_GRE; 459 460 return ip_route_output_key(net, fl); 461 } 462 463 static struct rtable *prepare_fb_xmit(struct sk_buff *skb, 464 struct net_device *dev, 465 struct flowi4 *fl, 466 int tunnel_hlen) 467 { 468 struct ip_tunnel_info *tun_info; 469 const struct ip_tunnel_key *key; 470 struct rtable *rt = NULL; 471 int min_headroom; 472 bool use_cache; 473 int err; 474 475 tun_info = skb_tunnel_info(skb); 476 key = &tun_info->key; 477 use_cache = ip_tunnel_dst_cache_usable(skb, tun_info); 478 479 if (use_cache) 480 rt = dst_cache_get_ip4(&tun_info->dst_cache, &fl->saddr); 481 if (!rt) { 482 rt = gre_get_rt(skb, dev, fl, key); 483 if (IS_ERR(rt)) 484 goto err_free_skb; 485 if (use_cache) 486 dst_cache_set_ip4(&tun_info->dst_cache, &rt->dst, 487 fl->saddr); 488 } 489 490 min_headroom = LL_RESERVED_SPACE(rt->dst.dev) + rt->dst.header_len 491 + tunnel_hlen + sizeof(struct iphdr); 492 if (skb_headroom(skb) < min_headroom || skb_header_cloned(skb)) { 493 int head_delta = SKB_DATA_ALIGN(min_headroom - 494 skb_headroom(skb) + 495 16); 496 err = pskb_expand_head(skb, max_t(int, head_delta, 0), 497 0, GFP_ATOMIC); 498 if (unlikely(err)) 499 goto err_free_rt; 500 } 501 return rt; 502 503 err_free_rt: 504 ip_rt_put(rt); 505 err_free_skb: 506 kfree_skb(skb); 507 dev->stats.tx_dropped++; 508 return NULL; 509 } 510 511 static void gre_fb_xmit(struct sk_buff *skb, struct net_device *dev, 512 __be16 proto) 513 { 514 struct ip_tunnel_info *tun_info; 515 const struct ip_tunnel_key *key; 516 struct rtable *rt = NULL; 517 struct flowi4 fl; 518 int tunnel_hlen; 519 __be16 df, flags; 520 521 tun_info = skb_tunnel_info(skb); 522 if (unlikely(!tun_info || !(tun_info->mode & IP_TUNNEL_INFO_TX) || 523 ip_tunnel_info_af(tun_info) != AF_INET)) 524 goto err_free_skb; 525 526 key = &tun_info->key; 527 tunnel_hlen = gre_calc_hlen(key->tun_flags); 528 529 rt = prepare_fb_xmit(skb, dev, &fl, tunnel_hlen); 530 if (!rt) 531 return; 532 533 /* Push Tunnel header. */ 534 if (gre_handle_offloads(skb, !!(tun_info->key.tun_flags & TUNNEL_CSUM))) 535 goto err_free_rt; 536 537 flags = tun_info->key.tun_flags & (TUNNEL_CSUM | TUNNEL_KEY); 538 gre_build_header(skb, tunnel_hlen, flags, proto, 539 tunnel_id_to_key32(tun_info->key.tun_id), 0); 540 541 df = key->tun_flags & TUNNEL_DONT_FRAGMENT ? htons(IP_DF) : 0; 542 543 iptunnel_xmit(skb->sk, rt, skb, fl.saddr, key->u.ipv4.dst, IPPROTO_GRE, 544 key->tos, key->ttl, df, false); 545 return; 546 547 err_free_rt: 548 ip_rt_put(rt); 549 err_free_skb: 550 kfree_skb(skb); 551 dev->stats.tx_dropped++; 552 } 553 554 static void erspan_fb_xmit(struct sk_buff *skb, struct net_device *dev, 555 __be16 proto) 556 { 557 struct ip_tunnel *tunnel = netdev_priv(dev); 558 struct ip_tunnel_info *tun_info; 559 const struct ip_tunnel_key *key; 560 struct erspan_metadata *md; 561 struct rtable *rt = NULL; 562 bool truncate = false; 563 struct flowi4 fl; 564 int tunnel_hlen; 565 __be16 df; 566 567 tun_info = skb_tunnel_info(skb); 568 if (unlikely(!tun_info || !(tun_info->mode & IP_TUNNEL_INFO_TX) || 569 ip_tunnel_info_af(tun_info) != AF_INET)) 570 goto err_free_skb; 571 572 key = &tun_info->key; 573 574 /* ERSPAN has fixed 8 byte GRE header */ 575 tunnel_hlen = 8 + sizeof(struct erspanhdr); 576 577 rt = prepare_fb_xmit(skb, dev, &fl, tunnel_hlen); 578 if (!rt) 579 return; 580 581 if (gre_handle_offloads(skb, false)) 582 goto err_free_rt; 583 584 if (skb->len > dev->mtu) { 585 pskb_trim(skb, dev->mtu); 586 truncate = true; 587 } 588 589 md = ip_tunnel_info_opts(tun_info); 590 if (!md) 591 goto err_free_rt; 592 593 erspan_build_header(skb, tunnel_id_to_key32(key->tun_id), 594 ntohl(md->index), truncate); 595 596 gre_build_header(skb, 8, TUNNEL_SEQ, 597 htons(ETH_P_ERSPAN), 0, htonl(tunnel->o_seqno++)); 598 599 df = key->tun_flags & TUNNEL_DONT_FRAGMENT ? htons(IP_DF) : 0; 600 601 iptunnel_xmit(skb->sk, rt, skb, fl.saddr, key->u.ipv4.dst, IPPROTO_GRE, 602 key->tos, key->ttl, df, false); 603 return; 604 605 err_free_rt: 606 ip_rt_put(rt); 607 err_free_skb: 608 kfree_skb(skb); 609 dev->stats.tx_dropped++; 610 } 611 612 static int gre_fill_metadata_dst(struct net_device *dev, struct sk_buff *skb) 613 { 614 struct ip_tunnel_info *info = skb_tunnel_info(skb); 615 struct rtable *rt; 616 struct flowi4 fl4; 617 618 if (ip_tunnel_info_af(info) != AF_INET) 619 return -EINVAL; 620 621 rt = gre_get_rt(skb, dev, &fl4, &info->key); 622 if (IS_ERR(rt)) 623 return PTR_ERR(rt); 624 625 ip_rt_put(rt); 626 info->key.u.ipv4.src = fl4.saddr; 627 return 0; 628 } 629 630 static netdev_tx_t ipgre_xmit(struct sk_buff *skb, 631 struct net_device *dev) 632 { 633 struct ip_tunnel *tunnel = netdev_priv(dev); 634 const struct iphdr *tnl_params; 635 636 if (tunnel->collect_md) { 637 gre_fb_xmit(skb, dev, skb->protocol); 638 return NETDEV_TX_OK; 639 } 640 641 if (dev->header_ops) { 642 /* Need space for new headers */ 643 if (skb_cow_head(skb, dev->needed_headroom - 644 (tunnel->hlen + sizeof(struct iphdr)))) 645 goto free_skb; 646 647 tnl_params = (const struct iphdr *)skb->data; 648 649 /* Pull skb since ip_tunnel_xmit() needs skb->data pointing 650 * to gre header. 651 */ 652 skb_pull(skb, tunnel->hlen + sizeof(struct iphdr)); 653 skb_reset_mac_header(skb); 654 } else { 655 if (skb_cow_head(skb, dev->needed_headroom)) 656 goto free_skb; 657 658 tnl_params = &tunnel->parms.iph; 659 } 660 661 if (gre_handle_offloads(skb, !!(tunnel->parms.o_flags & TUNNEL_CSUM))) 662 goto free_skb; 663 664 __gre_xmit(skb, dev, tnl_params, skb->protocol); 665 return NETDEV_TX_OK; 666 667 free_skb: 668 kfree_skb(skb); 669 dev->stats.tx_dropped++; 670 return NETDEV_TX_OK; 671 } 672 673 static inline u8 tos_to_cos(u8 tos) 674 { 675 u8 dscp, cos; 676 677 dscp = tos >> 2; 678 cos = dscp >> 3; 679 return cos; 680 } 681 682 static void erspan_build_header(struct sk_buff *skb, 683 __be32 id, u32 index, bool truncate) 684 { 685 struct iphdr *iphdr = ip_hdr(skb); 686 struct ethhdr *eth = eth_hdr(skb); 687 enum erspan_encap_type enc_type; 688 struct erspanhdr *ershdr; 689 struct qtag_prefix { 690 __be16 eth_type; 691 __be16 tci; 692 } *qp; 693 u16 vlan_tci = 0; 694 695 enc_type = ERSPAN_ENCAP_NOVLAN; 696 697 /* If mirrored packet has vlan tag, extract tci and 698 * perserve vlan header in the mirrored frame. 699 */ 700 if (eth->h_proto == htons(ETH_P_8021Q)) { 701 qp = (struct qtag_prefix *)(skb->data + 2 * ETH_ALEN); 702 vlan_tci = ntohs(qp->tci); 703 enc_type = ERSPAN_ENCAP_INFRAME; 704 } 705 706 skb_push(skb, sizeof(*ershdr)); 707 ershdr = (struct erspanhdr *)skb->data; 708 memset(ershdr, 0, sizeof(*ershdr)); 709 710 ershdr->ver_vlan = htons((vlan_tci & VLAN_MASK) | 711 (ERSPAN_VERSION << VER_OFFSET)); 712 ershdr->session_id = htons((u16)(ntohl(id) & ID_MASK) | 713 ((tos_to_cos(iphdr->tos) << COS_OFFSET) & COS_MASK) | 714 (enc_type << EN_OFFSET & EN_MASK) | 715 ((truncate << T_OFFSET) & T_MASK)); 716 ershdr->md.index = htonl(index & INDEX_MASK); 717 } 718 719 static netdev_tx_t erspan_xmit(struct sk_buff *skb, 720 struct net_device *dev) 721 { 722 struct ip_tunnel *tunnel = netdev_priv(dev); 723 bool truncate = false; 724 725 if (tunnel->collect_md) { 726 erspan_fb_xmit(skb, dev, skb->protocol); 727 return NETDEV_TX_OK; 728 } 729 730 if (gre_handle_offloads(skb, false)) 731 goto free_skb; 732 733 if (skb_cow_head(skb, dev->needed_headroom)) 734 goto free_skb; 735 736 if (skb->len > dev->mtu) { 737 pskb_trim(skb, dev->mtu); 738 truncate = true; 739 } 740 741 /* Push ERSPAN header */ 742 erspan_build_header(skb, tunnel->parms.o_key, tunnel->index, truncate); 743 tunnel->parms.o_flags &= ~TUNNEL_KEY; 744 __gre_xmit(skb, dev, &tunnel->parms.iph, htons(ETH_P_ERSPAN)); 745 return NETDEV_TX_OK; 746 747 free_skb: 748 kfree_skb(skb); 749 dev->stats.tx_dropped++; 750 return NETDEV_TX_OK; 751 } 752 753 static netdev_tx_t gre_tap_xmit(struct sk_buff *skb, 754 struct net_device *dev) 755 { 756 struct ip_tunnel *tunnel = netdev_priv(dev); 757 758 if (tunnel->collect_md) { 759 gre_fb_xmit(skb, dev, htons(ETH_P_TEB)); 760 return NETDEV_TX_OK; 761 } 762 763 if (gre_handle_offloads(skb, !!(tunnel->parms.o_flags & TUNNEL_CSUM))) 764 goto free_skb; 765 766 if (skb_cow_head(skb, dev->needed_headroom)) 767 goto free_skb; 768 769 __gre_xmit(skb, dev, &tunnel->parms.iph, htons(ETH_P_TEB)); 770 return NETDEV_TX_OK; 771 772 free_skb: 773 kfree_skb(skb); 774 dev->stats.tx_dropped++; 775 return NETDEV_TX_OK; 776 } 777 778 static int ipgre_tunnel_ioctl(struct net_device *dev, 779 struct ifreq *ifr, int cmd) 780 { 781 int err; 782 struct ip_tunnel_parm p; 783 784 if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) 785 return -EFAULT; 786 if (cmd == SIOCADDTUNNEL || cmd == SIOCCHGTUNNEL) { 787 if (p.iph.version != 4 || p.iph.protocol != IPPROTO_GRE || 788 p.iph.ihl != 5 || (p.iph.frag_off&htons(~IP_DF)) || 789 ((p.i_flags|p.o_flags)&(GRE_VERSION|GRE_ROUTING))) 790 return -EINVAL; 791 } 792 p.i_flags = gre_flags_to_tnl_flags(p.i_flags); 793 p.o_flags = gre_flags_to_tnl_flags(p.o_flags); 794 795 err = ip_tunnel_ioctl(dev, &p, cmd); 796 if (err) 797 return err; 798 799 p.i_flags = gre_tnl_flags_to_gre_flags(p.i_flags); 800 p.o_flags = gre_tnl_flags_to_gre_flags(p.o_flags); 801 802 if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p))) 803 return -EFAULT; 804 return 0; 805 } 806 807 /* Nice toy. Unfortunately, useless in real life :-) 808 It allows to construct virtual multiprotocol broadcast "LAN" 809 over the Internet, provided multicast routing is tuned. 810 811 812 I have no idea was this bicycle invented before me, 813 so that I had to set ARPHRD_IPGRE to a random value. 814 I have an impression, that Cisco could make something similar, 815 but this feature is apparently missing in IOS<=11.2(8). 816 817 I set up 10.66.66/24 and fec0:6666:6666::0/96 as virtual networks 818 with broadcast 224.66.66.66. If you have access to mbone, play with me :-) 819 820 ping -t 255 224.66.66.66 821 822 If nobody answers, mbone does not work. 823 824 ip tunnel add Universe mode gre remote 224.66.66.66 local <Your_real_addr> ttl 255 825 ip addr add 10.66.66.<somewhat>/24 dev Universe 826 ifconfig Universe up 827 ifconfig Universe add fe80::<Your_real_addr>/10 828 ifconfig Universe add fec0:6666:6666::<Your_real_addr>/96 829 ftp 10.66.66.66 830 ... 831 ftp fec0:6666:6666::193.233.7.65 832 ... 833 */ 834 static int ipgre_header(struct sk_buff *skb, struct net_device *dev, 835 unsigned short type, 836 const void *daddr, const void *saddr, unsigned int len) 837 { 838 struct ip_tunnel *t = netdev_priv(dev); 839 struct iphdr *iph; 840 struct gre_base_hdr *greh; 841 842 iph = skb_push(skb, t->hlen + sizeof(*iph)); 843 greh = (struct gre_base_hdr *)(iph+1); 844 greh->flags = gre_tnl_flags_to_gre_flags(t->parms.o_flags); 845 greh->protocol = htons(type); 846 847 memcpy(iph, &t->parms.iph, sizeof(struct iphdr)); 848 849 /* Set the source hardware address. */ 850 if (saddr) 851 memcpy(&iph->saddr, saddr, 4); 852 if (daddr) 853 memcpy(&iph->daddr, daddr, 4); 854 if (iph->daddr) 855 return t->hlen + sizeof(*iph); 856 857 return -(t->hlen + sizeof(*iph)); 858 } 859 860 static int ipgre_header_parse(const struct sk_buff *skb, unsigned char *haddr) 861 { 862 const struct iphdr *iph = (const struct iphdr *) skb_mac_header(skb); 863 memcpy(haddr, &iph->saddr, 4); 864 return 4; 865 } 866 867 static const struct header_ops ipgre_header_ops = { 868 .create = ipgre_header, 869 .parse = ipgre_header_parse, 870 }; 871 872 #ifdef CONFIG_NET_IPGRE_BROADCAST 873 static int ipgre_open(struct net_device *dev) 874 { 875 struct ip_tunnel *t = netdev_priv(dev); 876 877 if (ipv4_is_multicast(t->parms.iph.daddr)) { 878 struct flowi4 fl4; 879 struct rtable *rt; 880 881 rt = ip_route_output_gre(t->net, &fl4, 882 t->parms.iph.daddr, 883 t->parms.iph.saddr, 884 t->parms.o_key, 885 RT_TOS(t->parms.iph.tos), 886 t->parms.link); 887 if (IS_ERR(rt)) 888 return -EADDRNOTAVAIL; 889 dev = rt->dst.dev; 890 ip_rt_put(rt); 891 if (!__in_dev_get_rtnl(dev)) 892 return -EADDRNOTAVAIL; 893 t->mlink = dev->ifindex; 894 ip_mc_inc_group(__in_dev_get_rtnl(dev), t->parms.iph.daddr); 895 } 896 return 0; 897 } 898 899 static int ipgre_close(struct net_device *dev) 900 { 901 struct ip_tunnel *t = netdev_priv(dev); 902 903 if (ipv4_is_multicast(t->parms.iph.daddr) && t->mlink) { 904 struct in_device *in_dev; 905 in_dev = inetdev_by_index(t->net, t->mlink); 906 if (in_dev) 907 ip_mc_dec_group(in_dev, t->parms.iph.daddr); 908 } 909 return 0; 910 } 911 #endif 912 913 static const struct net_device_ops ipgre_netdev_ops = { 914 .ndo_init = ipgre_tunnel_init, 915 .ndo_uninit = ip_tunnel_uninit, 916 #ifdef CONFIG_NET_IPGRE_BROADCAST 917 .ndo_open = ipgre_open, 918 .ndo_stop = ipgre_close, 919 #endif 920 .ndo_start_xmit = ipgre_xmit, 921 .ndo_do_ioctl = ipgre_tunnel_ioctl, 922 .ndo_change_mtu = ip_tunnel_change_mtu, 923 .ndo_get_stats64 = ip_tunnel_get_stats64, 924 .ndo_get_iflink = ip_tunnel_get_iflink, 925 }; 926 927 #define GRE_FEATURES (NETIF_F_SG | \ 928 NETIF_F_FRAGLIST | \ 929 NETIF_F_HIGHDMA | \ 930 NETIF_F_HW_CSUM) 931 932 static void ipgre_tunnel_setup(struct net_device *dev) 933 { 934 dev->netdev_ops = &ipgre_netdev_ops; 935 dev->type = ARPHRD_IPGRE; 936 ip_tunnel_setup(dev, ipgre_net_id); 937 } 938 939 static void __gre_tunnel_init(struct net_device *dev) 940 { 941 struct ip_tunnel *tunnel; 942 int t_hlen; 943 944 tunnel = netdev_priv(dev); 945 tunnel->tun_hlen = gre_calc_hlen(tunnel->parms.o_flags); 946 tunnel->parms.iph.protocol = IPPROTO_GRE; 947 948 tunnel->hlen = tunnel->tun_hlen + tunnel->encap_hlen; 949 950 t_hlen = tunnel->hlen + sizeof(struct iphdr); 951 952 dev->needed_headroom = LL_MAX_HEADER + t_hlen + 4; 953 dev->mtu = ETH_DATA_LEN - t_hlen - 4; 954 955 dev->features |= GRE_FEATURES; 956 dev->hw_features |= GRE_FEATURES; 957 958 if (!(tunnel->parms.o_flags & TUNNEL_SEQ)) { 959 /* TCP offload with GRE SEQ is not supported, nor 960 * can we support 2 levels of outer headers requiring 961 * an update. 962 */ 963 if (!(tunnel->parms.o_flags & TUNNEL_CSUM) || 964 (tunnel->encap.type == TUNNEL_ENCAP_NONE)) { 965 dev->features |= NETIF_F_GSO_SOFTWARE; 966 dev->hw_features |= NETIF_F_GSO_SOFTWARE; 967 } 968 969 /* Can use a lockless transmit, unless we generate 970 * output sequences 971 */ 972 dev->features |= NETIF_F_LLTX; 973 } 974 } 975 976 static int ipgre_tunnel_init(struct net_device *dev) 977 { 978 struct ip_tunnel *tunnel = netdev_priv(dev); 979 struct iphdr *iph = &tunnel->parms.iph; 980 981 __gre_tunnel_init(dev); 982 983 memcpy(dev->dev_addr, &iph->saddr, 4); 984 memcpy(dev->broadcast, &iph->daddr, 4); 985 986 dev->flags = IFF_NOARP; 987 netif_keep_dst(dev); 988 dev->addr_len = 4; 989 990 if (iph->daddr && !tunnel->collect_md) { 991 #ifdef CONFIG_NET_IPGRE_BROADCAST 992 if (ipv4_is_multicast(iph->daddr)) { 993 if (!iph->saddr) 994 return -EINVAL; 995 dev->flags = IFF_BROADCAST; 996 dev->header_ops = &ipgre_header_ops; 997 } 998 #endif 999 } else if (!tunnel->collect_md) { 1000 dev->header_ops = &ipgre_header_ops; 1001 } 1002 1003 return ip_tunnel_init(dev); 1004 } 1005 1006 static const struct gre_protocol ipgre_protocol = { 1007 .handler = gre_rcv, 1008 .err_handler = gre_err, 1009 }; 1010 1011 static int __net_init ipgre_init_net(struct net *net) 1012 { 1013 return ip_tunnel_init_net(net, ipgre_net_id, &ipgre_link_ops, NULL); 1014 } 1015 1016 static void __net_exit ipgre_exit_net(struct net *net) 1017 { 1018 struct ip_tunnel_net *itn = net_generic(net, ipgre_net_id); 1019 ip_tunnel_delete_net(itn, &ipgre_link_ops); 1020 } 1021 1022 static struct pernet_operations ipgre_net_ops = { 1023 .init = ipgre_init_net, 1024 .exit = ipgre_exit_net, 1025 .id = &ipgre_net_id, 1026 .size = sizeof(struct ip_tunnel_net), 1027 }; 1028 1029 static int ipgre_tunnel_validate(struct nlattr *tb[], struct nlattr *data[], 1030 struct netlink_ext_ack *extack) 1031 { 1032 __be16 flags; 1033 1034 if (!data) 1035 return 0; 1036 1037 flags = 0; 1038 if (data[IFLA_GRE_IFLAGS]) 1039 flags |= nla_get_be16(data[IFLA_GRE_IFLAGS]); 1040 if (data[IFLA_GRE_OFLAGS]) 1041 flags |= nla_get_be16(data[IFLA_GRE_OFLAGS]); 1042 if (flags & (GRE_VERSION|GRE_ROUTING)) 1043 return -EINVAL; 1044 1045 if (data[IFLA_GRE_COLLECT_METADATA] && 1046 data[IFLA_GRE_ENCAP_TYPE] && 1047 nla_get_u16(data[IFLA_GRE_ENCAP_TYPE]) != TUNNEL_ENCAP_NONE) 1048 return -EINVAL; 1049 1050 return 0; 1051 } 1052 1053 static int ipgre_tap_validate(struct nlattr *tb[], struct nlattr *data[], 1054 struct netlink_ext_ack *extack) 1055 { 1056 __be32 daddr; 1057 1058 if (tb[IFLA_ADDRESS]) { 1059 if (nla_len(tb[IFLA_ADDRESS]) != ETH_ALEN) 1060 return -EINVAL; 1061 if (!is_valid_ether_addr(nla_data(tb[IFLA_ADDRESS]))) 1062 return -EADDRNOTAVAIL; 1063 } 1064 1065 if (!data) 1066 goto out; 1067 1068 if (data[IFLA_GRE_REMOTE]) { 1069 memcpy(&daddr, nla_data(data[IFLA_GRE_REMOTE]), 4); 1070 if (!daddr) 1071 return -EINVAL; 1072 } 1073 1074 out: 1075 return ipgre_tunnel_validate(tb, data, extack); 1076 } 1077 1078 static int erspan_validate(struct nlattr *tb[], struct nlattr *data[], 1079 struct netlink_ext_ack *extack) 1080 { 1081 __be16 flags = 0; 1082 int ret; 1083 1084 if (!data) 1085 return 0; 1086 1087 ret = ipgre_tap_validate(tb, data, extack); 1088 if (ret) 1089 return ret; 1090 1091 /* ERSPAN should only have GRE sequence and key flag */ 1092 if (data[IFLA_GRE_OFLAGS]) 1093 flags |= nla_get_be16(data[IFLA_GRE_OFLAGS]); 1094 if (data[IFLA_GRE_IFLAGS]) 1095 flags |= nla_get_be16(data[IFLA_GRE_IFLAGS]); 1096 if (!data[IFLA_GRE_COLLECT_METADATA] && 1097 flags != (GRE_SEQ | GRE_KEY)) 1098 return -EINVAL; 1099 1100 /* ERSPAN Session ID only has 10-bit. Since we reuse 1101 * 32-bit key field as ID, check it's range. 1102 */ 1103 if (data[IFLA_GRE_IKEY] && 1104 (ntohl(nla_get_be32(data[IFLA_GRE_IKEY])) & ~ID_MASK)) 1105 return -EINVAL; 1106 1107 if (data[IFLA_GRE_OKEY] && 1108 (ntohl(nla_get_be32(data[IFLA_GRE_OKEY])) & ~ID_MASK)) 1109 return -EINVAL; 1110 1111 return 0; 1112 } 1113 1114 static int ipgre_netlink_parms(struct net_device *dev, 1115 struct nlattr *data[], 1116 struct nlattr *tb[], 1117 struct ip_tunnel_parm *parms, 1118 __u32 *fwmark) 1119 { 1120 struct ip_tunnel *t = netdev_priv(dev); 1121 1122 memset(parms, 0, sizeof(*parms)); 1123 1124 parms->iph.protocol = IPPROTO_GRE; 1125 1126 if (!data) 1127 return 0; 1128 1129 if (data[IFLA_GRE_LINK]) 1130 parms->link = nla_get_u32(data[IFLA_GRE_LINK]); 1131 1132 if (data[IFLA_GRE_IFLAGS]) 1133 parms->i_flags = gre_flags_to_tnl_flags(nla_get_be16(data[IFLA_GRE_IFLAGS])); 1134 1135 if (data[IFLA_GRE_OFLAGS]) 1136 parms->o_flags = gre_flags_to_tnl_flags(nla_get_be16(data[IFLA_GRE_OFLAGS])); 1137 1138 if (data[IFLA_GRE_IKEY]) 1139 parms->i_key = nla_get_be32(data[IFLA_GRE_IKEY]); 1140 1141 if (data[IFLA_GRE_OKEY]) 1142 parms->o_key = nla_get_be32(data[IFLA_GRE_OKEY]); 1143 1144 if (data[IFLA_GRE_LOCAL]) 1145 parms->iph.saddr = nla_get_in_addr(data[IFLA_GRE_LOCAL]); 1146 1147 if (data[IFLA_GRE_REMOTE]) 1148 parms->iph.daddr = nla_get_in_addr(data[IFLA_GRE_REMOTE]); 1149 1150 if (data[IFLA_GRE_TTL]) 1151 parms->iph.ttl = nla_get_u8(data[IFLA_GRE_TTL]); 1152 1153 if (data[IFLA_GRE_TOS]) 1154 parms->iph.tos = nla_get_u8(data[IFLA_GRE_TOS]); 1155 1156 if (!data[IFLA_GRE_PMTUDISC] || nla_get_u8(data[IFLA_GRE_PMTUDISC])) { 1157 if (t->ignore_df) 1158 return -EINVAL; 1159 parms->iph.frag_off = htons(IP_DF); 1160 } 1161 1162 if (data[IFLA_GRE_COLLECT_METADATA]) { 1163 t->collect_md = true; 1164 if (dev->type == ARPHRD_IPGRE) 1165 dev->type = ARPHRD_NONE; 1166 } 1167 1168 if (data[IFLA_GRE_IGNORE_DF]) { 1169 if (nla_get_u8(data[IFLA_GRE_IGNORE_DF]) 1170 && (parms->iph.frag_off & htons(IP_DF))) 1171 return -EINVAL; 1172 t->ignore_df = !!nla_get_u8(data[IFLA_GRE_IGNORE_DF]); 1173 } 1174 1175 if (data[IFLA_GRE_FWMARK]) 1176 *fwmark = nla_get_u32(data[IFLA_GRE_FWMARK]); 1177 1178 if (data[IFLA_GRE_ERSPAN_INDEX]) { 1179 t->index = nla_get_u32(data[IFLA_GRE_ERSPAN_INDEX]); 1180 1181 if (t->index & ~INDEX_MASK) 1182 return -EINVAL; 1183 } 1184 1185 return 0; 1186 } 1187 1188 /* This function returns true when ENCAP attributes are present in the nl msg */ 1189 static bool ipgre_netlink_encap_parms(struct nlattr *data[], 1190 struct ip_tunnel_encap *ipencap) 1191 { 1192 bool ret = false; 1193 1194 memset(ipencap, 0, sizeof(*ipencap)); 1195 1196 if (!data) 1197 return ret; 1198 1199 if (data[IFLA_GRE_ENCAP_TYPE]) { 1200 ret = true; 1201 ipencap->type = nla_get_u16(data[IFLA_GRE_ENCAP_TYPE]); 1202 } 1203 1204 if (data[IFLA_GRE_ENCAP_FLAGS]) { 1205 ret = true; 1206 ipencap->flags = nla_get_u16(data[IFLA_GRE_ENCAP_FLAGS]); 1207 } 1208 1209 if (data[IFLA_GRE_ENCAP_SPORT]) { 1210 ret = true; 1211 ipencap->sport = nla_get_be16(data[IFLA_GRE_ENCAP_SPORT]); 1212 } 1213 1214 if (data[IFLA_GRE_ENCAP_DPORT]) { 1215 ret = true; 1216 ipencap->dport = nla_get_be16(data[IFLA_GRE_ENCAP_DPORT]); 1217 } 1218 1219 return ret; 1220 } 1221 1222 static int gre_tap_init(struct net_device *dev) 1223 { 1224 __gre_tunnel_init(dev); 1225 dev->priv_flags |= IFF_LIVE_ADDR_CHANGE; 1226 1227 return ip_tunnel_init(dev); 1228 } 1229 1230 static const struct net_device_ops gre_tap_netdev_ops = { 1231 .ndo_init = gre_tap_init, 1232 .ndo_uninit = ip_tunnel_uninit, 1233 .ndo_start_xmit = gre_tap_xmit, 1234 .ndo_set_mac_address = eth_mac_addr, 1235 .ndo_validate_addr = eth_validate_addr, 1236 .ndo_change_mtu = ip_tunnel_change_mtu, 1237 .ndo_get_stats64 = ip_tunnel_get_stats64, 1238 .ndo_get_iflink = ip_tunnel_get_iflink, 1239 .ndo_fill_metadata_dst = gre_fill_metadata_dst, 1240 }; 1241 1242 static int erspan_tunnel_init(struct net_device *dev) 1243 { 1244 struct ip_tunnel *tunnel = netdev_priv(dev); 1245 int t_hlen; 1246 1247 tunnel->tun_hlen = 8; 1248 tunnel->parms.iph.protocol = IPPROTO_GRE; 1249 t_hlen = tunnel->hlen + sizeof(struct iphdr) + sizeof(struct erspanhdr); 1250 1251 dev->needed_headroom = LL_MAX_HEADER + t_hlen + 4; 1252 dev->mtu = ETH_DATA_LEN - t_hlen - 4; 1253 dev->features |= GRE_FEATURES; 1254 dev->hw_features |= GRE_FEATURES; 1255 dev->priv_flags |= IFF_LIVE_ADDR_CHANGE; 1256 1257 return ip_tunnel_init(dev); 1258 } 1259 1260 static const struct net_device_ops erspan_netdev_ops = { 1261 .ndo_init = erspan_tunnel_init, 1262 .ndo_uninit = ip_tunnel_uninit, 1263 .ndo_start_xmit = erspan_xmit, 1264 .ndo_set_mac_address = eth_mac_addr, 1265 .ndo_validate_addr = eth_validate_addr, 1266 .ndo_change_mtu = ip_tunnel_change_mtu, 1267 .ndo_get_stats64 = ip_tunnel_get_stats64, 1268 .ndo_get_iflink = ip_tunnel_get_iflink, 1269 .ndo_fill_metadata_dst = gre_fill_metadata_dst, 1270 }; 1271 1272 static void ipgre_tap_setup(struct net_device *dev) 1273 { 1274 ether_setup(dev); 1275 dev->netdev_ops = &gre_tap_netdev_ops; 1276 dev->priv_flags &= ~IFF_TX_SKB_SHARING; 1277 dev->priv_flags |= IFF_LIVE_ADDR_CHANGE; 1278 ip_tunnel_setup(dev, gre_tap_net_id); 1279 } 1280 1281 static int ipgre_newlink(struct net *src_net, struct net_device *dev, 1282 struct nlattr *tb[], struct nlattr *data[], 1283 struct netlink_ext_ack *extack) 1284 { 1285 struct ip_tunnel_parm p; 1286 struct ip_tunnel_encap ipencap; 1287 __u32 fwmark = 0; 1288 int err; 1289 1290 if (ipgre_netlink_encap_parms(data, &ipencap)) { 1291 struct ip_tunnel *t = netdev_priv(dev); 1292 err = ip_tunnel_encap_setup(t, &ipencap); 1293 1294 if (err < 0) 1295 return err; 1296 } 1297 1298 err = ipgre_netlink_parms(dev, data, tb, &p, &fwmark); 1299 if (err < 0) 1300 return err; 1301 return ip_tunnel_newlink(dev, tb, &p, fwmark); 1302 } 1303 1304 static int ipgre_changelink(struct net_device *dev, struct nlattr *tb[], 1305 struct nlattr *data[], 1306 struct netlink_ext_ack *extack) 1307 { 1308 struct ip_tunnel *t = netdev_priv(dev); 1309 struct ip_tunnel_parm p; 1310 struct ip_tunnel_encap ipencap; 1311 __u32 fwmark = t->fwmark; 1312 int err; 1313 1314 if (ipgre_netlink_encap_parms(data, &ipencap)) { 1315 err = ip_tunnel_encap_setup(t, &ipencap); 1316 1317 if (err < 0) 1318 return err; 1319 } 1320 1321 err = ipgre_netlink_parms(dev, data, tb, &p, &fwmark); 1322 if (err < 0) 1323 return err; 1324 return ip_tunnel_changelink(dev, tb, &p, fwmark); 1325 } 1326 1327 static size_t ipgre_get_size(const struct net_device *dev) 1328 { 1329 return 1330 /* IFLA_GRE_LINK */ 1331 nla_total_size(4) + 1332 /* IFLA_GRE_IFLAGS */ 1333 nla_total_size(2) + 1334 /* IFLA_GRE_OFLAGS */ 1335 nla_total_size(2) + 1336 /* IFLA_GRE_IKEY */ 1337 nla_total_size(4) + 1338 /* IFLA_GRE_OKEY */ 1339 nla_total_size(4) + 1340 /* IFLA_GRE_LOCAL */ 1341 nla_total_size(4) + 1342 /* IFLA_GRE_REMOTE */ 1343 nla_total_size(4) + 1344 /* IFLA_GRE_TTL */ 1345 nla_total_size(1) + 1346 /* IFLA_GRE_TOS */ 1347 nla_total_size(1) + 1348 /* IFLA_GRE_PMTUDISC */ 1349 nla_total_size(1) + 1350 /* IFLA_GRE_ENCAP_TYPE */ 1351 nla_total_size(2) + 1352 /* IFLA_GRE_ENCAP_FLAGS */ 1353 nla_total_size(2) + 1354 /* IFLA_GRE_ENCAP_SPORT */ 1355 nla_total_size(2) + 1356 /* IFLA_GRE_ENCAP_DPORT */ 1357 nla_total_size(2) + 1358 /* IFLA_GRE_COLLECT_METADATA */ 1359 nla_total_size(0) + 1360 /* IFLA_GRE_IGNORE_DF */ 1361 nla_total_size(1) + 1362 /* IFLA_GRE_FWMARK */ 1363 nla_total_size(4) + 1364 /* IFLA_GRE_ERSPAN_INDEX */ 1365 nla_total_size(4) + 1366 0; 1367 } 1368 1369 static int ipgre_fill_info(struct sk_buff *skb, const struct net_device *dev) 1370 { 1371 struct ip_tunnel *t = netdev_priv(dev); 1372 struct ip_tunnel_parm *p = &t->parms; 1373 1374 if (nla_put_u32(skb, IFLA_GRE_LINK, p->link) || 1375 nla_put_be16(skb, IFLA_GRE_IFLAGS, 1376 gre_tnl_flags_to_gre_flags(p->i_flags)) || 1377 nla_put_be16(skb, IFLA_GRE_OFLAGS, 1378 gre_tnl_flags_to_gre_flags(p->o_flags)) || 1379 nla_put_be32(skb, IFLA_GRE_IKEY, p->i_key) || 1380 nla_put_be32(skb, IFLA_GRE_OKEY, p->o_key) || 1381 nla_put_in_addr(skb, IFLA_GRE_LOCAL, p->iph.saddr) || 1382 nla_put_in_addr(skb, IFLA_GRE_REMOTE, p->iph.daddr) || 1383 nla_put_u8(skb, IFLA_GRE_TTL, p->iph.ttl) || 1384 nla_put_u8(skb, IFLA_GRE_TOS, p->iph.tos) || 1385 nla_put_u8(skb, IFLA_GRE_PMTUDISC, 1386 !!(p->iph.frag_off & htons(IP_DF))) || 1387 nla_put_u32(skb, IFLA_GRE_FWMARK, t->fwmark)) 1388 goto nla_put_failure; 1389 1390 if (nla_put_u16(skb, IFLA_GRE_ENCAP_TYPE, 1391 t->encap.type) || 1392 nla_put_be16(skb, IFLA_GRE_ENCAP_SPORT, 1393 t->encap.sport) || 1394 nla_put_be16(skb, IFLA_GRE_ENCAP_DPORT, 1395 t->encap.dport) || 1396 nla_put_u16(skb, IFLA_GRE_ENCAP_FLAGS, 1397 t->encap.flags)) 1398 goto nla_put_failure; 1399 1400 if (nla_put_u8(skb, IFLA_GRE_IGNORE_DF, t->ignore_df)) 1401 goto nla_put_failure; 1402 1403 if (t->collect_md) { 1404 if (nla_put_flag(skb, IFLA_GRE_COLLECT_METADATA)) 1405 goto nla_put_failure; 1406 } 1407 1408 if (t->index) 1409 if (nla_put_u32(skb, IFLA_GRE_ERSPAN_INDEX, t->index)) 1410 goto nla_put_failure; 1411 1412 return 0; 1413 1414 nla_put_failure: 1415 return -EMSGSIZE; 1416 } 1417 1418 static void erspan_setup(struct net_device *dev) 1419 { 1420 ether_setup(dev); 1421 dev->netdev_ops = &erspan_netdev_ops; 1422 dev->priv_flags &= ~IFF_TX_SKB_SHARING; 1423 dev->priv_flags |= IFF_LIVE_ADDR_CHANGE; 1424 ip_tunnel_setup(dev, erspan_net_id); 1425 } 1426 1427 static const struct nla_policy ipgre_policy[IFLA_GRE_MAX + 1] = { 1428 [IFLA_GRE_LINK] = { .type = NLA_U32 }, 1429 [IFLA_GRE_IFLAGS] = { .type = NLA_U16 }, 1430 [IFLA_GRE_OFLAGS] = { .type = NLA_U16 }, 1431 [IFLA_GRE_IKEY] = { .type = NLA_U32 }, 1432 [IFLA_GRE_OKEY] = { .type = NLA_U32 }, 1433 [IFLA_GRE_LOCAL] = { .len = FIELD_SIZEOF(struct iphdr, saddr) }, 1434 [IFLA_GRE_REMOTE] = { .len = FIELD_SIZEOF(struct iphdr, daddr) }, 1435 [IFLA_GRE_TTL] = { .type = NLA_U8 }, 1436 [IFLA_GRE_TOS] = { .type = NLA_U8 }, 1437 [IFLA_GRE_PMTUDISC] = { .type = NLA_U8 }, 1438 [IFLA_GRE_ENCAP_TYPE] = { .type = NLA_U16 }, 1439 [IFLA_GRE_ENCAP_FLAGS] = { .type = NLA_U16 }, 1440 [IFLA_GRE_ENCAP_SPORT] = { .type = NLA_U16 }, 1441 [IFLA_GRE_ENCAP_DPORT] = { .type = NLA_U16 }, 1442 [IFLA_GRE_COLLECT_METADATA] = { .type = NLA_FLAG }, 1443 [IFLA_GRE_IGNORE_DF] = { .type = NLA_U8 }, 1444 [IFLA_GRE_FWMARK] = { .type = NLA_U32 }, 1445 [IFLA_GRE_ERSPAN_INDEX] = { .type = NLA_U32 }, 1446 }; 1447 1448 static struct rtnl_link_ops ipgre_link_ops __read_mostly = { 1449 .kind = "gre", 1450 .maxtype = IFLA_GRE_MAX, 1451 .policy = ipgre_policy, 1452 .priv_size = sizeof(struct ip_tunnel), 1453 .setup = ipgre_tunnel_setup, 1454 .validate = ipgre_tunnel_validate, 1455 .newlink = ipgre_newlink, 1456 .changelink = ipgre_changelink, 1457 .dellink = ip_tunnel_dellink, 1458 .get_size = ipgre_get_size, 1459 .fill_info = ipgre_fill_info, 1460 .get_link_net = ip_tunnel_get_link_net, 1461 }; 1462 1463 static struct rtnl_link_ops ipgre_tap_ops __read_mostly = { 1464 .kind = "gretap", 1465 .maxtype = IFLA_GRE_MAX, 1466 .policy = ipgre_policy, 1467 .priv_size = sizeof(struct ip_tunnel), 1468 .setup = ipgre_tap_setup, 1469 .validate = ipgre_tap_validate, 1470 .newlink = ipgre_newlink, 1471 .changelink = ipgre_changelink, 1472 .dellink = ip_tunnel_dellink, 1473 .get_size = ipgre_get_size, 1474 .fill_info = ipgre_fill_info, 1475 .get_link_net = ip_tunnel_get_link_net, 1476 }; 1477 1478 static struct rtnl_link_ops erspan_link_ops __read_mostly = { 1479 .kind = "erspan", 1480 .maxtype = IFLA_GRE_MAX, 1481 .policy = ipgre_policy, 1482 .priv_size = sizeof(struct ip_tunnel), 1483 .setup = erspan_setup, 1484 .validate = erspan_validate, 1485 .newlink = ipgre_newlink, 1486 .changelink = ipgre_changelink, 1487 .dellink = ip_tunnel_dellink, 1488 .get_size = ipgre_get_size, 1489 .fill_info = ipgre_fill_info, 1490 .get_link_net = ip_tunnel_get_link_net, 1491 }; 1492 1493 struct net_device *gretap_fb_dev_create(struct net *net, const char *name, 1494 u8 name_assign_type) 1495 { 1496 struct nlattr *tb[IFLA_MAX + 1]; 1497 struct net_device *dev; 1498 LIST_HEAD(list_kill); 1499 struct ip_tunnel *t; 1500 int err; 1501 1502 memset(&tb, 0, sizeof(tb)); 1503 1504 dev = rtnl_create_link(net, name, name_assign_type, 1505 &ipgre_tap_ops, tb); 1506 if (IS_ERR(dev)) 1507 return dev; 1508 1509 /* Configure flow based GRE device. */ 1510 t = netdev_priv(dev); 1511 t->collect_md = true; 1512 1513 err = ipgre_newlink(net, dev, tb, NULL, NULL); 1514 if (err < 0) { 1515 free_netdev(dev); 1516 return ERR_PTR(err); 1517 } 1518 1519 /* openvswitch users expect packet sizes to be unrestricted, 1520 * so set the largest MTU we can. 1521 */ 1522 err = __ip_tunnel_change_mtu(dev, IP_MAX_MTU, false); 1523 if (err) 1524 goto out; 1525 1526 err = rtnl_configure_link(dev, NULL); 1527 if (err < 0) 1528 goto out; 1529 1530 return dev; 1531 out: 1532 ip_tunnel_dellink(dev, &list_kill); 1533 unregister_netdevice_many(&list_kill); 1534 return ERR_PTR(err); 1535 } 1536 EXPORT_SYMBOL_GPL(gretap_fb_dev_create); 1537 1538 static int __net_init ipgre_tap_init_net(struct net *net) 1539 { 1540 return ip_tunnel_init_net(net, gre_tap_net_id, &ipgre_tap_ops, "gretap0"); 1541 } 1542 1543 static void __net_exit ipgre_tap_exit_net(struct net *net) 1544 { 1545 struct ip_tunnel_net *itn = net_generic(net, gre_tap_net_id); 1546 ip_tunnel_delete_net(itn, &ipgre_tap_ops); 1547 } 1548 1549 static struct pernet_operations ipgre_tap_net_ops = { 1550 .init = ipgre_tap_init_net, 1551 .exit = ipgre_tap_exit_net, 1552 .id = &gre_tap_net_id, 1553 .size = sizeof(struct ip_tunnel_net), 1554 }; 1555 1556 static int __net_init erspan_init_net(struct net *net) 1557 { 1558 return ip_tunnel_init_net(net, erspan_net_id, 1559 &erspan_link_ops, "erspan0"); 1560 } 1561 1562 static void __net_exit erspan_exit_net(struct net *net) 1563 { 1564 struct ip_tunnel_net *itn = net_generic(net, erspan_net_id); 1565 1566 ip_tunnel_delete_net(itn, &erspan_link_ops); 1567 } 1568 1569 static struct pernet_operations erspan_net_ops = { 1570 .init = erspan_init_net, 1571 .exit = erspan_exit_net, 1572 .id = &erspan_net_id, 1573 .size = sizeof(struct ip_tunnel_net), 1574 }; 1575 1576 static int __init ipgre_init(void) 1577 { 1578 int err; 1579 1580 pr_info("GRE over IPv4 tunneling driver\n"); 1581 1582 err = register_pernet_device(&ipgre_net_ops); 1583 if (err < 0) 1584 return err; 1585 1586 err = register_pernet_device(&ipgre_tap_net_ops); 1587 if (err < 0) 1588 goto pnet_tap_failed; 1589 1590 err = register_pernet_device(&erspan_net_ops); 1591 if (err < 0) 1592 goto pnet_erspan_failed; 1593 1594 err = gre_add_protocol(&ipgre_protocol, GREPROTO_CISCO); 1595 if (err < 0) { 1596 pr_info("%s: can't add protocol\n", __func__); 1597 goto add_proto_failed; 1598 } 1599 1600 err = rtnl_link_register(&ipgre_link_ops); 1601 if (err < 0) 1602 goto rtnl_link_failed; 1603 1604 err = rtnl_link_register(&ipgre_tap_ops); 1605 if (err < 0) 1606 goto tap_ops_failed; 1607 1608 err = rtnl_link_register(&erspan_link_ops); 1609 if (err < 0) 1610 goto erspan_link_failed; 1611 1612 return 0; 1613 1614 erspan_link_failed: 1615 rtnl_link_unregister(&ipgre_tap_ops); 1616 tap_ops_failed: 1617 rtnl_link_unregister(&ipgre_link_ops); 1618 rtnl_link_failed: 1619 gre_del_protocol(&ipgre_protocol, GREPROTO_CISCO); 1620 add_proto_failed: 1621 unregister_pernet_device(&erspan_net_ops); 1622 pnet_erspan_failed: 1623 unregister_pernet_device(&ipgre_tap_net_ops); 1624 pnet_tap_failed: 1625 unregister_pernet_device(&ipgre_net_ops); 1626 return err; 1627 } 1628 1629 static void __exit ipgre_fini(void) 1630 { 1631 rtnl_link_unregister(&ipgre_tap_ops); 1632 rtnl_link_unregister(&ipgre_link_ops); 1633 rtnl_link_unregister(&erspan_link_ops); 1634 gre_del_protocol(&ipgre_protocol, GREPROTO_CISCO); 1635 unregister_pernet_device(&ipgre_tap_net_ops); 1636 unregister_pernet_device(&ipgre_net_ops); 1637 unregister_pernet_device(&erspan_net_ops); 1638 } 1639 1640 module_init(ipgre_init); 1641 module_exit(ipgre_fini); 1642 MODULE_LICENSE("GPL"); 1643 MODULE_ALIAS_RTNL_LINK("gre"); 1644 MODULE_ALIAS_RTNL_LINK("gretap"); 1645 MODULE_ALIAS_RTNL_LINK("erspan"); 1646 MODULE_ALIAS_NETDEV("gre0"); 1647 MODULE_ALIAS_NETDEV("gretap0"); 1648 MODULE_ALIAS_NETDEV("erspan0"); 1649