1 /* 2 * Copyright (c) 2013 Nicira, Inc. 3 * 4 * This program is free software; you can redistribute it and/or 5 * modify it under the terms of version 2 of the GNU General Public 6 * License as published by the Free Software Foundation. 7 * 8 * This program is distributed in the hope that it will be useful, but 9 * WITHOUT ANY WARRANTY; without even the implied warranty of 10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 11 * General Public License for more details. 12 * 13 * You should have received a copy of the GNU General Public License 14 * along with this program; if not, write to the Free Software 15 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 16 * 02110-1301, USA 17 */ 18 19 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 20 21 #include <linux/capability.h> 22 #include <linux/module.h> 23 #include <linux/types.h> 24 #include <linux/kernel.h> 25 #include <linux/slab.h> 26 #include <linux/uaccess.h> 27 #include <linux/skbuff.h> 28 #include <linux/netdevice.h> 29 #include <linux/in.h> 30 #include <linux/tcp.h> 31 #include <linux/udp.h> 32 #include <linux/if_arp.h> 33 #include <linux/mroute.h> 34 #include <linux/init.h> 35 #include <linux/in6.h> 36 #include <linux/inetdevice.h> 37 #include <linux/igmp.h> 38 #include <linux/netfilter_ipv4.h> 39 #include <linux/etherdevice.h> 40 #include <linux/if_ether.h> 41 #include <linux/if_vlan.h> 42 #include <linux/rculist.h> 43 #include <linux/err.h> 44 45 #include <net/sock.h> 46 #include <net/ip.h> 47 #include <net/icmp.h> 48 #include <net/protocol.h> 49 #include <net/ip_tunnels.h> 50 #include <net/arp.h> 51 #include <net/checksum.h> 52 #include <net/dsfield.h> 53 #include <net/inet_ecn.h> 54 #include <net/xfrm.h> 55 #include <net/net_namespace.h> 56 #include <net/netns/generic.h> 57 #include <net/rtnetlink.h> 58 #include <net/udp.h> 59 #include <net/gue.h> 60 61 #if IS_ENABLED(CONFIG_IPV6) 62 #include <net/ipv6.h> 63 #include <net/ip6_fib.h> 64 #include <net/ip6_route.h> 65 #endif 66 67 static unsigned int ip_tunnel_hash(__be32 key, __be32 remote) 68 { 69 return hash_32((__force u32)key ^ (__force u32)remote, 70 IP_TNL_HASH_BITS); 71 } 72 73 static void __tunnel_dst_set(struct ip_tunnel_dst *idst, 74 struct dst_entry *dst, __be32 saddr) 75 { 76 struct dst_entry *old_dst; 77 78 dst_clone(dst); 79 old_dst = xchg((__force struct dst_entry **)&idst->dst, dst); 80 dst_release(old_dst); 81 idst->saddr = saddr; 82 } 83 84 static noinline void tunnel_dst_set(struct ip_tunnel *t, 85 struct dst_entry *dst, __be32 saddr) 86 { 87 __tunnel_dst_set(raw_cpu_ptr(t->dst_cache), dst, saddr); 88 } 89 90 static void tunnel_dst_reset(struct ip_tunnel *t) 91 { 92 tunnel_dst_set(t, NULL, 0); 93 } 94 95 void ip_tunnel_dst_reset_all(struct ip_tunnel *t) 96 { 97 int i; 98 99 for_each_possible_cpu(i) 100 __tunnel_dst_set(per_cpu_ptr(t->dst_cache, i), NULL, 0); 101 } 102 EXPORT_SYMBOL(ip_tunnel_dst_reset_all); 103 104 static struct rtable *tunnel_rtable_get(struct ip_tunnel *t, 105 u32 cookie, __be32 *saddr) 106 { 107 struct ip_tunnel_dst *idst; 108 struct dst_entry *dst; 109 110 rcu_read_lock(); 111 idst = raw_cpu_ptr(t->dst_cache); 112 dst = rcu_dereference(idst->dst); 113 if (dst && !atomic_inc_not_zero(&dst->__refcnt)) 114 dst = NULL; 115 if (dst) { 116 if (!dst->obsolete || dst->ops->check(dst, cookie)) { 117 *saddr = idst->saddr; 118 } else { 119 tunnel_dst_reset(t); 120 dst_release(dst); 121 dst = NULL; 122 } 123 } 124 rcu_read_unlock(); 125 return (struct rtable *)dst; 126 } 127 128 static bool ip_tunnel_key_match(const struct ip_tunnel_parm *p, 129 __be16 flags, __be32 key) 130 { 131 if (p->i_flags & TUNNEL_KEY) { 132 if (flags & TUNNEL_KEY) 133 return key == p->i_key; 134 else 135 /* key expected, none present */ 136 return false; 137 } else 138 return !(flags & TUNNEL_KEY); 139 } 140 141 /* Fallback tunnel: no source, no destination, no key, no options 142 143 Tunnel hash table: 144 We require exact key match i.e. if a key is present in packet 145 it will match only tunnel with the same key; if it is not present, 146 it will match only keyless tunnel. 147 148 All keysless packets, if not matched configured keyless tunnels 149 will match fallback tunnel. 150 Given src, dst and key, find appropriate for input tunnel. 151 */ 152 struct ip_tunnel *ip_tunnel_lookup(struct ip_tunnel_net *itn, 153 int link, __be16 flags, 154 __be32 remote, __be32 local, 155 __be32 key) 156 { 157 unsigned int hash; 158 struct ip_tunnel *t, *cand = NULL; 159 struct hlist_head *head; 160 161 hash = ip_tunnel_hash(key, remote); 162 head = &itn->tunnels[hash]; 163 164 hlist_for_each_entry_rcu(t, head, hash_node) { 165 if (local != t->parms.iph.saddr || 166 remote != t->parms.iph.daddr || 167 !(t->dev->flags & IFF_UP)) 168 continue; 169 170 if (!ip_tunnel_key_match(&t->parms, flags, key)) 171 continue; 172 173 if (t->parms.link == link) 174 return t; 175 else 176 cand = t; 177 } 178 179 hlist_for_each_entry_rcu(t, head, hash_node) { 180 if (remote != t->parms.iph.daddr || 181 t->parms.iph.saddr != 0 || 182 !(t->dev->flags & IFF_UP)) 183 continue; 184 185 if (!ip_tunnel_key_match(&t->parms, flags, key)) 186 continue; 187 188 if (t->parms.link == link) 189 return t; 190 else if (!cand) 191 cand = t; 192 } 193 194 hash = ip_tunnel_hash(key, 0); 195 head = &itn->tunnels[hash]; 196 197 hlist_for_each_entry_rcu(t, head, hash_node) { 198 if ((local != t->parms.iph.saddr || t->parms.iph.daddr != 0) && 199 (local != t->parms.iph.daddr || !ipv4_is_multicast(local))) 200 continue; 201 202 if (!(t->dev->flags & IFF_UP)) 203 continue; 204 205 if (!ip_tunnel_key_match(&t->parms, flags, key)) 206 continue; 207 208 if (t->parms.link == link) 209 return t; 210 else if (!cand) 211 cand = t; 212 } 213 214 if (flags & TUNNEL_NO_KEY) 215 goto skip_key_lookup; 216 217 hlist_for_each_entry_rcu(t, head, hash_node) { 218 if (t->parms.i_key != key || 219 t->parms.iph.saddr != 0 || 220 t->parms.iph.daddr != 0 || 221 !(t->dev->flags & IFF_UP)) 222 continue; 223 224 if (t->parms.link == link) 225 return t; 226 else if (!cand) 227 cand = t; 228 } 229 230 skip_key_lookup: 231 if (cand) 232 return cand; 233 234 if (itn->fb_tunnel_dev && itn->fb_tunnel_dev->flags & IFF_UP) 235 return netdev_priv(itn->fb_tunnel_dev); 236 237 238 return NULL; 239 } 240 EXPORT_SYMBOL_GPL(ip_tunnel_lookup); 241 242 static struct hlist_head *ip_bucket(struct ip_tunnel_net *itn, 243 struct ip_tunnel_parm *parms) 244 { 245 unsigned int h; 246 __be32 remote; 247 __be32 i_key = parms->i_key; 248 249 if (parms->iph.daddr && !ipv4_is_multicast(parms->iph.daddr)) 250 remote = parms->iph.daddr; 251 else 252 remote = 0; 253 254 if (!(parms->i_flags & TUNNEL_KEY) && (parms->i_flags & VTI_ISVTI)) 255 i_key = 0; 256 257 h = ip_tunnel_hash(i_key, remote); 258 return &itn->tunnels[h]; 259 } 260 261 static void ip_tunnel_add(struct ip_tunnel_net *itn, struct ip_tunnel *t) 262 { 263 struct hlist_head *head = ip_bucket(itn, &t->parms); 264 265 hlist_add_head_rcu(&t->hash_node, head); 266 } 267 268 static void ip_tunnel_del(struct ip_tunnel *t) 269 { 270 hlist_del_init_rcu(&t->hash_node); 271 } 272 273 static struct ip_tunnel *ip_tunnel_find(struct ip_tunnel_net *itn, 274 struct ip_tunnel_parm *parms, 275 int type) 276 { 277 __be32 remote = parms->iph.daddr; 278 __be32 local = parms->iph.saddr; 279 __be32 key = parms->i_key; 280 __be16 flags = parms->i_flags; 281 int link = parms->link; 282 struct ip_tunnel *t = NULL; 283 struct hlist_head *head = ip_bucket(itn, parms); 284 285 hlist_for_each_entry_rcu(t, head, hash_node) { 286 if (local == t->parms.iph.saddr && 287 remote == t->parms.iph.daddr && 288 link == t->parms.link && 289 type == t->dev->type && 290 ip_tunnel_key_match(&t->parms, flags, key)) 291 break; 292 } 293 return t; 294 } 295 296 static struct net_device *__ip_tunnel_create(struct net *net, 297 const struct rtnl_link_ops *ops, 298 struct ip_tunnel_parm *parms) 299 { 300 int err; 301 struct ip_tunnel *tunnel; 302 struct net_device *dev; 303 char name[IFNAMSIZ]; 304 305 if (parms->name[0]) 306 strlcpy(name, parms->name, IFNAMSIZ); 307 else { 308 if (strlen(ops->kind) > (IFNAMSIZ - 3)) { 309 err = -E2BIG; 310 goto failed; 311 } 312 strlcpy(name, ops->kind, IFNAMSIZ); 313 strncat(name, "%d", 2); 314 } 315 316 ASSERT_RTNL(); 317 dev = alloc_netdev(ops->priv_size, name, NET_NAME_UNKNOWN, ops->setup); 318 if (!dev) { 319 err = -ENOMEM; 320 goto failed; 321 } 322 dev_net_set(dev, net); 323 324 dev->rtnl_link_ops = ops; 325 326 tunnel = netdev_priv(dev); 327 tunnel->parms = *parms; 328 tunnel->net = net; 329 330 err = register_netdevice(dev); 331 if (err) 332 goto failed_free; 333 334 return dev; 335 336 failed_free: 337 free_netdev(dev); 338 failed: 339 return ERR_PTR(err); 340 } 341 342 static inline void init_tunnel_flow(struct flowi4 *fl4, 343 int proto, 344 __be32 daddr, __be32 saddr, 345 __be32 key, __u8 tos, int oif) 346 { 347 memset(fl4, 0, sizeof(*fl4)); 348 fl4->flowi4_oif = oif; 349 fl4->daddr = daddr; 350 fl4->saddr = saddr; 351 fl4->flowi4_tos = tos; 352 fl4->flowi4_proto = proto; 353 fl4->fl4_gre_key = key; 354 } 355 356 static int ip_tunnel_bind_dev(struct net_device *dev) 357 { 358 struct net_device *tdev = NULL; 359 struct ip_tunnel *tunnel = netdev_priv(dev); 360 const struct iphdr *iph; 361 int hlen = LL_MAX_HEADER; 362 int mtu = ETH_DATA_LEN; 363 int t_hlen = tunnel->hlen + sizeof(struct iphdr); 364 365 iph = &tunnel->parms.iph; 366 367 /* Guess output device to choose reasonable mtu and needed_headroom */ 368 if (iph->daddr) { 369 struct flowi4 fl4; 370 struct rtable *rt; 371 372 init_tunnel_flow(&fl4, iph->protocol, iph->daddr, 373 iph->saddr, tunnel->parms.o_key, 374 RT_TOS(iph->tos), tunnel->parms.link); 375 rt = ip_route_output_key(tunnel->net, &fl4); 376 377 if (!IS_ERR(rt)) { 378 tdev = rt->dst.dev; 379 tunnel_dst_set(tunnel, &rt->dst, fl4.saddr); 380 ip_rt_put(rt); 381 } 382 if (dev->type != ARPHRD_ETHER) 383 dev->flags |= IFF_POINTOPOINT; 384 } 385 386 if (!tdev && tunnel->parms.link) 387 tdev = __dev_get_by_index(tunnel->net, tunnel->parms.link); 388 389 if (tdev) { 390 hlen = tdev->hard_header_len + tdev->needed_headroom; 391 mtu = tdev->mtu; 392 } 393 dev->iflink = tunnel->parms.link; 394 395 dev->needed_headroom = t_hlen + hlen; 396 mtu -= (dev->hard_header_len + t_hlen); 397 398 if (mtu < 68) 399 mtu = 68; 400 401 return mtu; 402 } 403 404 static struct ip_tunnel *ip_tunnel_create(struct net *net, 405 struct ip_tunnel_net *itn, 406 struct ip_tunnel_parm *parms) 407 { 408 struct ip_tunnel *nt; 409 struct net_device *dev; 410 411 BUG_ON(!itn->fb_tunnel_dev); 412 dev = __ip_tunnel_create(net, itn->fb_tunnel_dev->rtnl_link_ops, parms); 413 if (IS_ERR(dev)) 414 return ERR_CAST(dev); 415 416 dev->mtu = ip_tunnel_bind_dev(dev); 417 418 nt = netdev_priv(dev); 419 ip_tunnel_add(itn, nt); 420 return nt; 421 } 422 423 int ip_tunnel_rcv(struct ip_tunnel *tunnel, struct sk_buff *skb, 424 const struct tnl_ptk_info *tpi, bool log_ecn_error) 425 { 426 struct pcpu_sw_netstats *tstats; 427 const struct iphdr *iph = ip_hdr(skb); 428 int err; 429 430 #ifdef CONFIG_NET_IPGRE_BROADCAST 431 if (ipv4_is_multicast(iph->daddr)) { 432 tunnel->dev->stats.multicast++; 433 skb->pkt_type = PACKET_BROADCAST; 434 } 435 #endif 436 437 if ((!(tpi->flags&TUNNEL_CSUM) && (tunnel->parms.i_flags&TUNNEL_CSUM)) || 438 ((tpi->flags&TUNNEL_CSUM) && !(tunnel->parms.i_flags&TUNNEL_CSUM))) { 439 tunnel->dev->stats.rx_crc_errors++; 440 tunnel->dev->stats.rx_errors++; 441 goto drop; 442 } 443 444 if (tunnel->parms.i_flags&TUNNEL_SEQ) { 445 if (!(tpi->flags&TUNNEL_SEQ) || 446 (tunnel->i_seqno && (s32)(ntohl(tpi->seq) - tunnel->i_seqno) < 0)) { 447 tunnel->dev->stats.rx_fifo_errors++; 448 tunnel->dev->stats.rx_errors++; 449 goto drop; 450 } 451 tunnel->i_seqno = ntohl(tpi->seq) + 1; 452 } 453 454 skb_reset_network_header(skb); 455 456 err = IP_ECN_decapsulate(iph, skb); 457 if (unlikely(err)) { 458 if (log_ecn_error) 459 net_info_ratelimited("non-ECT from %pI4 with TOS=%#x\n", 460 &iph->saddr, iph->tos); 461 if (err > 1) { 462 ++tunnel->dev->stats.rx_frame_errors; 463 ++tunnel->dev->stats.rx_errors; 464 goto drop; 465 } 466 } 467 468 tstats = this_cpu_ptr(tunnel->dev->tstats); 469 u64_stats_update_begin(&tstats->syncp); 470 tstats->rx_packets++; 471 tstats->rx_bytes += skb->len; 472 u64_stats_update_end(&tstats->syncp); 473 474 skb_scrub_packet(skb, !net_eq(tunnel->net, dev_net(tunnel->dev))); 475 476 if (tunnel->dev->type == ARPHRD_ETHER) { 477 skb->protocol = eth_type_trans(skb, tunnel->dev); 478 skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN); 479 } else { 480 skb->dev = tunnel->dev; 481 } 482 483 gro_cells_receive(&tunnel->gro_cells, skb); 484 return 0; 485 486 drop: 487 kfree_skb(skb); 488 return 0; 489 } 490 EXPORT_SYMBOL_GPL(ip_tunnel_rcv); 491 492 static int ip_encap_hlen(struct ip_tunnel_encap *e) 493 { 494 switch (e->type) { 495 case TUNNEL_ENCAP_NONE: 496 return 0; 497 case TUNNEL_ENCAP_FOU: 498 return sizeof(struct udphdr); 499 case TUNNEL_ENCAP_GUE: 500 return sizeof(struct udphdr) + sizeof(struct guehdr); 501 default: 502 return -EINVAL; 503 } 504 } 505 506 int ip_tunnel_encap_setup(struct ip_tunnel *t, 507 struct ip_tunnel_encap *ipencap) 508 { 509 int hlen; 510 511 memset(&t->encap, 0, sizeof(t->encap)); 512 513 hlen = ip_encap_hlen(ipencap); 514 if (hlen < 0) 515 return hlen; 516 517 t->encap.type = ipencap->type; 518 t->encap.sport = ipencap->sport; 519 t->encap.dport = ipencap->dport; 520 t->encap.flags = ipencap->flags; 521 522 t->encap_hlen = hlen; 523 t->hlen = t->encap_hlen + t->tun_hlen; 524 525 return 0; 526 } 527 EXPORT_SYMBOL_GPL(ip_tunnel_encap_setup); 528 529 static int fou_build_header(struct sk_buff *skb, struct ip_tunnel_encap *e, 530 size_t hdr_len, u8 *protocol, struct flowi4 *fl4) 531 { 532 struct udphdr *uh; 533 __be16 sport; 534 bool csum = !!(e->flags & TUNNEL_ENCAP_FLAG_CSUM); 535 int type = csum ? SKB_GSO_UDP_TUNNEL_CSUM : SKB_GSO_UDP_TUNNEL; 536 537 skb = iptunnel_handle_offloads(skb, csum, type); 538 539 if (IS_ERR(skb)) 540 return PTR_ERR(skb); 541 542 /* Get length and hash before making space in skb */ 543 544 sport = e->sport ? : udp_flow_src_port(dev_net(skb->dev), 545 skb, 0, 0, false); 546 547 skb_push(skb, hdr_len); 548 549 skb_reset_transport_header(skb); 550 uh = udp_hdr(skb); 551 552 if (e->type == TUNNEL_ENCAP_GUE) { 553 struct guehdr *guehdr = (struct guehdr *)&uh[1]; 554 555 guehdr->version = 0; 556 guehdr->hlen = 0; 557 guehdr->flags = 0; 558 guehdr->next_hdr = *protocol; 559 } 560 561 uh->dest = e->dport; 562 uh->source = sport; 563 uh->len = htons(skb->len); 564 uh->check = 0; 565 udp_set_csum(!(e->flags & TUNNEL_ENCAP_FLAG_CSUM), skb, 566 fl4->saddr, fl4->daddr, skb->len); 567 568 *protocol = IPPROTO_UDP; 569 570 return 0; 571 } 572 573 int ip_tunnel_encap(struct sk_buff *skb, struct ip_tunnel *t, 574 u8 *protocol, struct flowi4 *fl4) 575 { 576 switch (t->encap.type) { 577 case TUNNEL_ENCAP_NONE: 578 return 0; 579 case TUNNEL_ENCAP_FOU: 580 case TUNNEL_ENCAP_GUE: 581 return fou_build_header(skb, &t->encap, t->encap_hlen, 582 protocol, fl4); 583 default: 584 return -EINVAL; 585 } 586 } 587 EXPORT_SYMBOL(ip_tunnel_encap); 588 589 static int tnl_update_pmtu(struct net_device *dev, struct sk_buff *skb, 590 struct rtable *rt, __be16 df) 591 { 592 struct ip_tunnel *tunnel = netdev_priv(dev); 593 int pkt_size = skb->len - tunnel->hlen - dev->hard_header_len; 594 int mtu; 595 596 if (df) 597 mtu = dst_mtu(&rt->dst) - dev->hard_header_len 598 - sizeof(struct iphdr) - tunnel->hlen; 599 else 600 mtu = skb_dst(skb) ? dst_mtu(skb_dst(skb)) : dev->mtu; 601 602 if (skb_dst(skb)) 603 skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, skb, mtu); 604 605 if (skb->protocol == htons(ETH_P_IP)) { 606 if (!skb_is_gso(skb) && 607 (df & htons(IP_DF)) && mtu < pkt_size) { 608 memset(IPCB(skb), 0, sizeof(*IPCB(skb))); 609 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu)); 610 return -E2BIG; 611 } 612 } 613 #if IS_ENABLED(CONFIG_IPV6) 614 else if (skb->protocol == htons(ETH_P_IPV6)) { 615 struct rt6_info *rt6 = (struct rt6_info *)skb_dst(skb); 616 617 if (rt6 && mtu < dst_mtu(skb_dst(skb)) && 618 mtu >= IPV6_MIN_MTU) { 619 if ((tunnel->parms.iph.daddr && 620 !ipv4_is_multicast(tunnel->parms.iph.daddr)) || 621 rt6->rt6i_dst.plen == 128) { 622 rt6->rt6i_flags |= RTF_MODIFIED; 623 dst_metric_set(skb_dst(skb), RTAX_MTU, mtu); 624 } 625 } 626 627 if (!skb_is_gso(skb) && mtu >= IPV6_MIN_MTU && 628 mtu < pkt_size) { 629 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); 630 return -E2BIG; 631 } 632 } 633 #endif 634 return 0; 635 } 636 637 void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev, 638 const struct iphdr *tnl_params, u8 protocol) 639 { 640 struct ip_tunnel *tunnel = netdev_priv(dev); 641 const struct iphdr *inner_iph; 642 struct flowi4 fl4; 643 u8 tos, ttl; 644 __be16 df; 645 struct rtable *rt; /* Route to the other host */ 646 unsigned int max_headroom; /* The extra header space needed */ 647 __be32 dst; 648 int err; 649 bool connected; 650 651 inner_iph = (const struct iphdr *)skb_inner_network_header(skb); 652 connected = (tunnel->parms.iph.daddr != 0); 653 654 dst = tnl_params->daddr; 655 if (dst == 0) { 656 /* NBMA tunnel */ 657 658 if (skb_dst(skb) == NULL) { 659 dev->stats.tx_fifo_errors++; 660 goto tx_error; 661 } 662 663 if (skb->protocol == htons(ETH_P_IP)) { 664 rt = skb_rtable(skb); 665 dst = rt_nexthop(rt, inner_iph->daddr); 666 } 667 #if IS_ENABLED(CONFIG_IPV6) 668 else if (skb->protocol == htons(ETH_P_IPV6)) { 669 const struct in6_addr *addr6; 670 struct neighbour *neigh; 671 bool do_tx_error_icmp; 672 int addr_type; 673 674 neigh = dst_neigh_lookup(skb_dst(skb), 675 &ipv6_hdr(skb)->daddr); 676 if (neigh == NULL) 677 goto tx_error; 678 679 addr6 = (const struct in6_addr *)&neigh->primary_key; 680 addr_type = ipv6_addr_type(addr6); 681 682 if (addr_type == IPV6_ADDR_ANY) { 683 addr6 = &ipv6_hdr(skb)->daddr; 684 addr_type = ipv6_addr_type(addr6); 685 } 686 687 if ((addr_type & IPV6_ADDR_COMPATv4) == 0) 688 do_tx_error_icmp = true; 689 else { 690 do_tx_error_icmp = false; 691 dst = addr6->s6_addr32[3]; 692 } 693 neigh_release(neigh); 694 if (do_tx_error_icmp) 695 goto tx_error_icmp; 696 } 697 #endif 698 else 699 goto tx_error; 700 701 connected = false; 702 } 703 704 tos = tnl_params->tos; 705 if (tos & 0x1) { 706 tos &= ~0x1; 707 if (skb->protocol == htons(ETH_P_IP)) { 708 tos = inner_iph->tos; 709 connected = false; 710 } else if (skb->protocol == htons(ETH_P_IPV6)) { 711 tos = ipv6_get_dsfield((const struct ipv6hdr *)inner_iph); 712 connected = false; 713 } 714 } 715 716 init_tunnel_flow(&fl4, protocol, dst, tnl_params->saddr, 717 tunnel->parms.o_key, RT_TOS(tos), tunnel->parms.link); 718 719 if (ip_tunnel_encap(skb, tunnel, &protocol, &fl4) < 0) 720 goto tx_error; 721 722 rt = connected ? tunnel_rtable_get(tunnel, 0, &fl4.saddr) : NULL; 723 724 if (!rt) { 725 rt = ip_route_output_key(tunnel->net, &fl4); 726 727 if (IS_ERR(rt)) { 728 dev->stats.tx_carrier_errors++; 729 goto tx_error; 730 } 731 if (connected) 732 tunnel_dst_set(tunnel, &rt->dst, fl4.saddr); 733 } 734 735 if (rt->dst.dev == dev) { 736 ip_rt_put(rt); 737 dev->stats.collisions++; 738 goto tx_error; 739 } 740 741 if (tnl_update_pmtu(dev, skb, rt, tnl_params->frag_off)) { 742 ip_rt_put(rt); 743 goto tx_error; 744 } 745 746 if (tunnel->err_count > 0) { 747 if (time_before(jiffies, 748 tunnel->err_time + IPTUNNEL_ERR_TIMEO)) { 749 tunnel->err_count--; 750 751 memset(IPCB(skb), 0, sizeof(*IPCB(skb))); 752 dst_link_failure(skb); 753 } else 754 tunnel->err_count = 0; 755 } 756 757 tos = ip_tunnel_ecn_encap(tos, inner_iph, skb); 758 ttl = tnl_params->ttl; 759 if (ttl == 0) { 760 if (skb->protocol == htons(ETH_P_IP)) 761 ttl = inner_iph->ttl; 762 #if IS_ENABLED(CONFIG_IPV6) 763 else if (skb->protocol == htons(ETH_P_IPV6)) 764 ttl = ((const struct ipv6hdr *)inner_iph)->hop_limit; 765 #endif 766 else 767 ttl = ip4_dst_hoplimit(&rt->dst); 768 } 769 770 df = tnl_params->frag_off; 771 if (skb->protocol == htons(ETH_P_IP)) 772 df |= (inner_iph->frag_off&htons(IP_DF)); 773 774 max_headroom = LL_RESERVED_SPACE(rt->dst.dev) + sizeof(struct iphdr) 775 + rt->dst.header_len + ip_encap_hlen(&tunnel->encap); 776 if (max_headroom > dev->needed_headroom) 777 dev->needed_headroom = max_headroom; 778 779 if (skb_cow_head(skb, dev->needed_headroom)) { 780 ip_rt_put(rt); 781 dev->stats.tx_dropped++; 782 kfree_skb(skb); 783 return; 784 } 785 786 err = iptunnel_xmit(skb->sk, rt, skb, fl4.saddr, fl4.daddr, protocol, 787 tos, ttl, df, !net_eq(tunnel->net, dev_net(dev))); 788 iptunnel_xmit_stats(err, &dev->stats, dev->tstats); 789 790 return; 791 792 #if IS_ENABLED(CONFIG_IPV6) 793 tx_error_icmp: 794 dst_link_failure(skb); 795 #endif 796 tx_error: 797 dev->stats.tx_errors++; 798 kfree_skb(skb); 799 } 800 EXPORT_SYMBOL_GPL(ip_tunnel_xmit); 801 802 static void ip_tunnel_update(struct ip_tunnel_net *itn, 803 struct ip_tunnel *t, 804 struct net_device *dev, 805 struct ip_tunnel_parm *p, 806 bool set_mtu) 807 { 808 ip_tunnel_del(t); 809 t->parms.iph.saddr = p->iph.saddr; 810 t->parms.iph.daddr = p->iph.daddr; 811 t->parms.i_key = p->i_key; 812 t->parms.o_key = p->o_key; 813 if (dev->type != ARPHRD_ETHER) { 814 memcpy(dev->dev_addr, &p->iph.saddr, 4); 815 memcpy(dev->broadcast, &p->iph.daddr, 4); 816 } 817 ip_tunnel_add(itn, t); 818 819 t->parms.iph.ttl = p->iph.ttl; 820 t->parms.iph.tos = p->iph.tos; 821 t->parms.iph.frag_off = p->iph.frag_off; 822 823 if (t->parms.link != p->link) { 824 int mtu; 825 826 t->parms.link = p->link; 827 mtu = ip_tunnel_bind_dev(dev); 828 if (set_mtu) 829 dev->mtu = mtu; 830 } 831 ip_tunnel_dst_reset_all(t); 832 netdev_state_change(dev); 833 } 834 835 int ip_tunnel_ioctl(struct net_device *dev, struct ip_tunnel_parm *p, int cmd) 836 { 837 int err = 0; 838 struct ip_tunnel *t = netdev_priv(dev); 839 struct net *net = t->net; 840 struct ip_tunnel_net *itn = net_generic(net, t->ip_tnl_net_id); 841 842 BUG_ON(!itn->fb_tunnel_dev); 843 switch (cmd) { 844 case SIOCGETTUNNEL: 845 if (dev == itn->fb_tunnel_dev) { 846 t = ip_tunnel_find(itn, p, itn->fb_tunnel_dev->type); 847 if (t == NULL) 848 t = netdev_priv(dev); 849 } 850 memcpy(p, &t->parms, sizeof(*p)); 851 break; 852 853 case SIOCADDTUNNEL: 854 case SIOCCHGTUNNEL: 855 err = -EPERM; 856 if (!ns_capable(net->user_ns, CAP_NET_ADMIN)) 857 goto done; 858 if (p->iph.ttl) 859 p->iph.frag_off |= htons(IP_DF); 860 if (!(p->i_flags & VTI_ISVTI)) { 861 if (!(p->i_flags & TUNNEL_KEY)) 862 p->i_key = 0; 863 if (!(p->o_flags & TUNNEL_KEY)) 864 p->o_key = 0; 865 } 866 867 t = ip_tunnel_find(itn, p, itn->fb_tunnel_dev->type); 868 869 if (cmd == SIOCADDTUNNEL) { 870 if (!t) { 871 t = ip_tunnel_create(net, itn, p); 872 err = PTR_ERR_OR_ZERO(t); 873 break; 874 } 875 876 err = -EEXIST; 877 break; 878 } 879 if (dev != itn->fb_tunnel_dev && cmd == SIOCCHGTUNNEL) { 880 if (t != NULL) { 881 if (t->dev != dev) { 882 err = -EEXIST; 883 break; 884 } 885 } else { 886 unsigned int nflags = 0; 887 888 if (ipv4_is_multicast(p->iph.daddr)) 889 nflags = IFF_BROADCAST; 890 else if (p->iph.daddr) 891 nflags = IFF_POINTOPOINT; 892 893 if ((dev->flags^nflags)&(IFF_POINTOPOINT|IFF_BROADCAST)) { 894 err = -EINVAL; 895 break; 896 } 897 898 t = netdev_priv(dev); 899 } 900 } 901 902 if (t) { 903 err = 0; 904 ip_tunnel_update(itn, t, dev, p, true); 905 } else { 906 err = -ENOENT; 907 } 908 break; 909 910 case SIOCDELTUNNEL: 911 err = -EPERM; 912 if (!ns_capable(net->user_ns, CAP_NET_ADMIN)) 913 goto done; 914 915 if (dev == itn->fb_tunnel_dev) { 916 err = -ENOENT; 917 t = ip_tunnel_find(itn, p, itn->fb_tunnel_dev->type); 918 if (t == NULL) 919 goto done; 920 err = -EPERM; 921 if (t == netdev_priv(itn->fb_tunnel_dev)) 922 goto done; 923 dev = t->dev; 924 } 925 unregister_netdevice(dev); 926 err = 0; 927 break; 928 929 default: 930 err = -EINVAL; 931 } 932 933 done: 934 return err; 935 } 936 EXPORT_SYMBOL_GPL(ip_tunnel_ioctl); 937 938 int ip_tunnel_change_mtu(struct net_device *dev, int new_mtu) 939 { 940 struct ip_tunnel *tunnel = netdev_priv(dev); 941 int t_hlen = tunnel->hlen + sizeof(struct iphdr); 942 943 if (new_mtu < 68 || 944 new_mtu > 0xFFF8 - dev->hard_header_len - t_hlen) 945 return -EINVAL; 946 dev->mtu = new_mtu; 947 return 0; 948 } 949 EXPORT_SYMBOL_GPL(ip_tunnel_change_mtu); 950 951 static void ip_tunnel_dev_free(struct net_device *dev) 952 { 953 struct ip_tunnel *tunnel = netdev_priv(dev); 954 955 gro_cells_destroy(&tunnel->gro_cells); 956 free_percpu(tunnel->dst_cache); 957 free_percpu(dev->tstats); 958 free_netdev(dev); 959 } 960 961 void ip_tunnel_dellink(struct net_device *dev, struct list_head *head) 962 { 963 struct ip_tunnel *tunnel = netdev_priv(dev); 964 struct ip_tunnel_net *itn; 965 966 itn = net_generic(tunnel->net, tunnel->ip_tnl_net_id); 967 968 if (itn->fb_tunnel_dev != dev) { 969 ip_tunnel_del(netdev_priv(dev)); 970 unregister_netdevice_queue(dev, head); 971 } 972 } 973 EXPORT_SYMBOL_GPL(ip_tunnel_dellink); 974 975 int ip_tunnel_init_net(struct net *net, int ip_tnl_net_id, 976 struct rtnl_link_ops *ops, char *devname) 977 { 978 struct ip_tunnel_net *itn = net_generic(net, ip_tnl_net_id); 979 struct ip_tunnel_parm parms; 980 unsigned int i; 981 982 for (i = 0; i < IP_TNL_HASH_SIZE; i++) 983 INIT_HLIST_HEAD(&itn->tunnels[i]); 984 985 if (!ops) { 986 itn->fb_tunnel_dev = NULL; 987 return 0; 988 } 989 990 memset(&parms, 0, sizeof(parms)); 991 if (devname) 992 strlcpy(parms.name, devname, IFNAMSIZ); 993 994 rtnl_lock(); 995 itn->fb_tunnel_dev = __ip_tunnel_create(net, ops, &parms); 996 /* FB netdevice is special: we have one, and only one per netns. 997 * Allowing to move it to another netns is clearly unsafe. 998 */ 999 if (!IS_ERR(itn->fb_tunnel_dev)) { 1000 itn->fb_tunnel_dev->features |= NETIF_F_NETNS_LOCAL; 1001 itn->fb_tunnel_dev->mtu = ip_tunnel_bind_dev(itn->fb_tunnel_dev); 1002 ip_tunnel_add(itn, netdev_priv(itn->fb_tunnel_dev)); 1003 } 1004 rtnl_unlock(); 1005 1006 return PTR_ERR_OR_ZERO(itn->fb_tunnel_dev); 1007 } 1008 EXPORT_SYMBOL_GPL(ip_tunnel_init_net); 1009 1010 static void ip_tunnel_destroy(struct ip_tunnel_net *itn, struct list_head *head, 1011 struct rtnl_link_ops *ops) 1012 { 1013 struct net *net = dev_net(itn->fb_tunnel_dev); 1014 struct net_device *dev, *aux; 1015 int h; 1016 1017 for_each_netdev_safe(net, dev, aux) 1018 if (dev->rtnl_link_ops == ops) 1019 unregister_netdevice_queue(dev, head); 1020 1021 for (h = 0; h < IP_TNL_HASH_SIZE; h++) { 1022 struct ip_tunnel *t; 1023 struct hlist_node *n; 1024 struct hlist_head *thead = &itn->tunnels[h]; 1025 1026 hlist_for_each_entry_safe(t, n, thead, hash_node) 1027 /* If dev is in the same netns, it has already 1028 * been added to the list by the previous loop. 1029 */ 1030 if (!net_eq(dev_net(t->dev), net)) 1031 unregister_netdevice_queue(t->dev, head); 1032 } 1033 } 1034 1035 void ip_tunnel_delete_net(struct ip_tunnel_net *itn, struct rtnl_link_ops *ops) 1036 { 1037 LIST_HEAD(list); 1038 1039 rtnl_lock(); 1040 ip_tunnel_destroy(itn, &list, ops); 1041 unregister_netdevice_many(&list); 1042 rtnl_unlock(); 1043 } 1044 EXPORT_SYMBOL_GPL(ip_tunnel_delete_net); 1045 1046 int ip_tunnel_newlink(struct net_device *dev, struct nlattr *tb[], 1047 struct ip_tunnel_parm *p) 1048 { 1049 struct ip_tunnel *nt; 1050 struct net *net = dev_net(dev); 1051 struct ip_tunnel_net *itn; 1052 int mtu; 1053 int err; 1054 1055 nt = netdev_priv(dev); 1056 itn = net_generic(net, nt->ip_tnl_net_id); 1057 1058 if (ip_tunnel_find(itn, p, dev->type)) 1059 return -EEXIST; 1060 1061 nt->net = net; 1062 nt->parms = *p; 1063 err = register_netdevice(dev); 1064 if (err) 1065 goto out; 1066 1067 if (dev->type == ARPHRD_ETHER && !tb[IFLA_ADDRESS]) 1068 eth_hw_addr_random(dev); 1069 1070 mtu = ip_tunnel_bind_dev(dev); 1071 if (!tb[IFLA_MTU]) 1072 dev->mtu = mtu; 1073 1074 ip_tunnel_add(itn, nt); 1075 1076 out: 1077 return err; 1078 } 1079 EXPORT_SYMBOL_GPL(ip_tunnel_newlink); 1080 1081 int ip_tunnel_changelink(struct net_device *dev, struct nlattr *tb[], 1082 struct ip_tunnel_parm *p) 1083 { 1084 struct ip_tunnel *t; 1085 struct ip_tunnel *tunnel = netdev_priv(dev); 1086 struct net *net = tunnel->net; 1087 struct ip_tunnel_net *itn = net_generic(net, tunnel->ip_tnl_net_id); 1088 1089 if (dev == itn->fb_tunnel_dev) 1090 return -EINVAL; 1091 1092 t = ip_tunnel_find(itn, p, dev->type); 1093 1094 if (t) { 1095 if (t->dev != dev) 1096 return -EEXIST; 1097 } else { 1098 t = tunnel; 1099 1100 if (dev->type != ARPHRD_ETHER) { 1101 unsigned int nflags = 0; 1102 1103 if (ipv4_is_multicast(p->iph.daddr)) 1104 nflags = IFF_BROADCAST; 1105 else if (p->iph.daddr) 1106 nflags = IFF_POINTOPOINT; 1107 1108 if ((dev->flags ^ nflags) & 1109 (IFF_POINTOPOINT | IFF_BROADCAST)) 1110 return -EINVAL; 1111 } 1112 } 1113 1114 ip_tunnel_update(itn, t, dev, p, !tb[IFLA_MTU]); 1115 return 0; 1116 } 1117 EXPORT_SYMBOL_GPL(ip_tunnel_changelink); 1118 1119 int ip_tunnel_init(struct net_device *dev) 1120 { 1121 struct ip_tunnel *tunnel = netdev_priv(dev); 1122 struct iphdr *iph = &tunnel->parms.iph; 1123 int err; 1124 1125 dev->destructor = ip_tunnel_dev_free; 1126 dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats); 1127 if (!dev->tstats) 1128 return -ENOMEM; 1129 1130 tunnel->dst_cache = alloc_percpu(struct ip_tunnel_dst); 1131 if (!tunnel->dst_cache) { 1132 free_percpu(dev->tstats); 1133 return -ENOMEM; 1134 } 1135 1136 err = gro_cells_init(&tunnel->gro_cells, dev); 1137 if (err) { 1138 free_percpu(tunnel->dst_cache); 1139 free_percpu(dev->tstats); 1140 return err; 1141 } 1142 1143 tunnel->dev = dev; 1144 tunnel->net = dev_net(dev); 1145 strcpy(tunnel->parms.name, dev->name); 1146 iph->version = 4; 1147 iph->ihl = 5; 1148 1149 return 0; 1150 } 1151 EXPORT_SYMBOL_GPL(ip_tunnel_init); 1152 1153 void ip_tunnel_uninit(struct net_device *dev) 1154 { 1155 struct ip_tunnel *tunnel = netdev_priv(dev); 1156 struct net *net = tunnel->net; 1157 struct ip_tunnel_net *itn; 1158 1159 itn = net_generic(net, tunnel->ip_tnl_net_id); 1160 /* fb_tunnel_dev will be unregisted in net-exit call. */ 1161 if (itn->fb_tunnel_dev != dev) 1162 ip_tunnel_del(netdev_priv(dev)); 1163 1164 ip_tunnel_dst_reset_all(tunnel); 1165 } 1166 EXPORT_SYMBOL_GPL(ip_tunnel_uninit); 1167 1168 /* Do least required initialization, rest of init is done in tunnel_init call */ 1169 void ip_tunnel_setup(struct net_device *dev, int net_id) 1170 { 1171 struct ip_tunnel *tunnel = netdev_priv(dev); 1172 tunnel->ip_tnl_net_id = net_id; 1173 } 1174 EXPORT_SYMBOL_GPL(ip_tunnel_setup); 1175 1176 MODULE_LICENSE("GPL"); 1177