1 /* 2 * Copyright (c) 2013 Nicira, Inc. 3 * 4 * This program is free software; you can redistribute it and/or 5 * modify it under the terms of version 2 of the GNU General Public 6 * License as published by the Free Software Foundation. 7 * 8 * This program is distributed in the hope that it will be useful, but 9 * WITHOUT ANY WARRANTY; without even the implied warranty of 10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 11 * General Public License for more details. 12 * 13 * You should have received a copy of the GNU General Public License 14 * along with this program; if not, write to the Free Software 15 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 16 * 02110-1301, USA 17 */ 18 19 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 20 21 #include <linux/capability.h> 22 #include <linux/module.h> 23 #include <linux/types.h> 24 #include <linux/kernel.h> 25 #include <linux/slab.h> 26 #include <linux/uaccess.h> 27 #include <linux/skbuff.h> 28 #include <linux/netdevice.h> 29 #include <linux/in.h> 30 #include <linux/tcp.h> 31 #include <linux/udp.h> 32 #include <linux/if_arp.h> 33 #include <linux/init.h> 34 #include <linux/in6.h> 35 #include <linux/inetdevice.h> 36 #include <linux/igmp.h> 37 #include <linux/netfilter_ipv4.h> 38 #include <linux/etherdevice.h> 39 #include <linux/if_ether.h> 40 #include <linux/if_vlan.h> 41 #include <linux/rculist.h> 42 #include <linux/err.h> 43 44 #include <net/sock.h> 45 #include <net/ip.h> 46 #include <net/icmp.h> 47 #include <net/protocol.h> 48 #include <net/ip_tunnels.h> 49 #include <net/arp.h> 50 #include <net/checksum.h> 51 #include <net/dsfield.h> 52 #include <net/inet_ecn.h> 53 #include <net/xfrm.h> 54 #include <net/net_namespace.h> 55 #include <net/netns/generic.h> 56 #include <net/rtnetlink.h> 57 #include <net/udp.h> 58 59 #if IS_ENABLED(CONFIG_IPV6) 60 #include <net/ipv6.h> 61 #include <net/ip6_fib.h> 62 #include <net/ip6_route.h> 63 #endif 64 65 static unsigned int ip_tunnel_hash(__be32 key, __be32 remote) 66 { 67 return hash_32((__force u32)key ^ (__force u32)remote, 68 IP_TNL_HASH_BITS); 69 } 70 71 static void __tunnel_dst_set(struct ip_tunnel_dst *idst, 72 struct dst_entry *dst, __be32 saddr) 73 { 74 struct dst_entry *old_dst; 75 76 dst_clone(dst); 77 old_dst = xchg((__force struct dst_entry **)&idst->dst, dst); 78 dst_release(old_dst); 79 idst->saddr = saddr; 80 } 81 82 static noinline void tunnel_dst_set(struct ip_tunnel *t, 83 struct dst_entry *dst, __be32 saddr) 84 { 85 __tunnel_dst_set(raw_cpu_ptr(t->dst_cache), dst, saddr); 86 } 87 88 static void tunnel_dst_reset(struct ip_tunnel *t) 89 { 90 tunnel_dst_set(t, NULL, 0); 91 } 92 93 void ip_tunnel_dst_reset_all(struct ip_tunnel *t) 94 { 95 int i; 96 97 for_each_possible_cpu(i) 98 __tunnel_dst_set(per_cpu_ptr(t->dst_cache, i), NULL, 0); 99 } 100 EXPORT_SYMBOL(ip_tunnel_dst_reset_all); 101 102 static struct rtable *tunnel_rtable_get(struct ip_tunnel *t, 103 u32 cookie, __be32 *saddr) 104 { 105 struct ip_tunnel_dst *idst; 106 struct dst_entry *dst; 107 108 rcu_read_lock(); 109 idst = raw_cpu_ptr(t->dst_cache); 110 dst = rcu_dereference(idst->dst); 111 if (dst && !atomic_inc_not_zero(&dst->__refcnt)) 112 dst = NULL; 113 if (dst) { 114 if (!dst->obsolete || dst->ops->check(dst, cookie)) { 115 *saddr = idst->saddr; 116 } else { 117 tunnel_dst_reset(t); 118 dst_release(dst); 119 dst = NULL; 120 } 121 } 122 rcu_read_unlock(); 123 return (struct rtable *)dst; 124 } 125 126 static bool ip_tunnel_key_match(const struct ip_tunnel_parm *p, 127 __be16 flags, __be32 key) 128 { 129 if (p->i_flags & TUNNEL_KEY) { 130 if (flags & TUNNEL_KEY) 131 return key == p->i_key; 132 else 133 /* key expected, none present */ 134 return false; 135 } else 136 return !(flags & TUNNEL_KEY); 137 } 138 139 /* Fallback tunnel: no source, no destination, no key, no options 140 141 Tunnel hash table: 142 We require exact key match i.e. if a key is present in packet 143 it will match only tunnel with the same key; if it is not present, 144 it will match only keyless tunnel. 145 146 All keysless packets, if not matched configured keyless tunnels 147 will match fallback tunnel. 148 Given src, dst and key, find appropriate for input tunnel. 149 */ 150 struct ip_tunnel *ip_tunnel_lookup(struct ip_tunnel_net *itn, 151 int link, __be16 flags, 152 __be32 remote, __be32 local, 153 __be32 key) 154 { 155 unsigned int hash; 156 struct ip_tunnel *t, *cand = NULL; 157 struct hlist_head *head; 158 159 hash = ip_tunnel_hash(key, remote); 160 head = &itn->tunnels[hash]; 161 162 hlist_for_each_entry_rcu(t, head, hash_node) { 163 if (local != t->parms.iph.saddr || 164 remote != t->parms.iph.daddr || 165 !(t->dev->flags & IFF_UP)) 166 continue; 167 168 if (!ip_tunnel_key_match(&t->parms, flags, key)) 169 continue; 170 171 if (t->parms.link == link) 172 return t; 173 else 174 cand = t; 175 } 176 177 hlist_for_each_entry_rcu(t, head, hash_node) { 178 if (remote != t->parms.iph.daddr || 179 t->parms.iph.saddr != 0 || 180 !(t->dev->flags & IFF_UP)) 181 continue; 182 183 if (!ip_tunnel_key_match(&t->parms, flags, key)) 184 continue; 185 186 if (t->parms.link == link) 187 return t; 188 else if (!cand) 189 cand = t; 190 } 191 192 hash = ip_tunnel_hash(key, 0); 193 head = &itn->tunnels[hash]; 194 195 hlist_for_each_entry_rcu(t, head, hash_node) { 196 if ((local != t->parms.iph.saddr || t->parms.iph.daddr != 0) && 197 (local != t->parms.iph.daddr || !ipv4_is_multicast(local))) 198 continue; 199 200 if (!(t->dev->flags & IFF_UP)) 201 continue; 202 203 if (!ip_tunnel_key_match(&t->parms, flags, key)) 204 continue; 205 206 if (t->parms.link == link) 207 return t; 208 else if (!cand) 209 cand = t; 210 } 211 212 if (flags & TUNNEL_NO_KEY) 213 goto skip_key_lookup; 214 215 hlist_for_each_entry_rcu(t, head, hash_node) { 216 if (t->parms.i_key != key || 217 t->parms.iph.saddr != 0 || 218 t->parms.iph.daddr != 0 || 219 !(t->dev->flags & IFF_UP)) 220 continue; 221 222 if (t->parms.link == link) 223 return t; 224 else if (!cand) 225 cand = t; 226 } 227 228 skip_key_lookup: 229 if (cand) 230 return cand; 231 232 t = rcu_dereference(itn->collect_md_tun); 233 if (t) 234 return t; 235 236 if (itn->fb_tunnel_dev && itn->fb_tunnel_dev->flags & IFF_UP) 237 return netdev_priv(itn->fb_tunnel_dev); 238 239 return NULL; 240 } 241 EXPORT_SYMBOL_GPL(ip_tunnel_lookup); 242 243 static struct hlist_head *ip_bucket(struct ip_tunnel_net *itn, 244 struct ip_tunnel_parm *parms) 245 { 246 unsigned int h; 247 __be32 remote; 248 __be32 i_key = parms->i_key; 249 250 if (parms->iph.daddr && !ipv4_is_multicast(parms->iph.daddr)) 251 remote = parms->iph.daddr; 252 else 253 remote = 0; 254 255 if (!(parms->i_flags & TUNNEL_KEY) && (parms->i_flags & VTI_ISVTI)) 256 i_key = 0; 257 258 h = ip_tunnel_hash(i_key, remote); 259 return &itn->tunnels[h]; 260 } 261 262 static void ip_tunnel_add(struct ip_tunnel_net *itn, struct ip_tunnel *t) 263 { 264 struct hlist_head *head = ip_bucket(itn, &t->parms); 265 266 if (t->collect_md) 267 rcu_assign_pointer(itn->collect_md_tun, t); 268 hlist_add_head_rcu(&t->hash_node, head); 269 } 270 271 static void ip_tunnel_del(struct ip_tunnel_net *itn, struct ip_tunnel *t) 272 { 273 if (t->collect_md) 274 rcu_assign_pointer(itn->collect_md_tun, NULL); 275 hlist_del_init_rcu(&t->hash_node); 276 } 277 278 static struct ip_tunnel *ip_tunnel_find(struct ip_tunnel_net *itn, 279 struct ip_tunnel_parm *parms, 280 int type) 281 { 282 __be32 remote = parms->iph.daddr; 283 __be32 local = parms->iph.saddr; 284 __be32 key = parms->i_key; 285 __be16 flags = parms->i_flags; 286 int link = parms->link; 287 struct ip_tunnel *t = NULL; 288 struct hlist_head *head = ip_bucket(itn, parms); 289 290 hlist_for_each_entry_rcu(t, head, hash_node) { 291 if (local == t->parms.iph.saddr && 292 remote == t->parms.iph.daddr && 293 link == t->parms.link && 294 type == t->dev->type && 295 ip_tunnel_key_match(&t->parms, flags, key)) 296 break; 297 } 298 return t; 299 } 300 301 static struct net_device *__ip_tunnel_create(struct net *net, 302 const struct rtnl_link_ops *ops, 303 struct ip_tunnel_parm *parms) 304 { 305 int err; 306 struct ip_tunnel *tunnel; 307 struct net_device *dev; 308 char name[IFNAMSIZ]; 309 310 if (parms->name[0]) 311 strlcpy(name, parms->name, IFNAMSIZ); 312 else { 313 if (strlen(ops->kind) > (IFNAMSIZ - 3)) { 314 err = -E2BIG; 315 goto failed; 316 } 317 strlcpy(name, ops->kind, IFNAMSIZ); 318 strncat(name, "%d", 2); 319 } 320 321 ASSERT_RTNL(); 322 dev = alloc_netdev(ops->priv_size, name, NET_NAME_UNKNOWN, ops->setup); 323 if (!dev) { 324 err = -ENOMEM; 325 goto failed; 326 } 327 dev_net_set(dev, net); 328 329 dev->rtnl_link_ops = ops; 330 331 tunnel = netdev_priv(dev); 332 tunnel->parms = *parms; 333 tunnel->net = net; 334 335 err = register_netdevice(dev); 336 if (err) 337 goto failed_free; 338 339 return dev; 340 341 failed_free: 342 free_netdev(dev); 343 failed: 344 return ERR_PTR(err); 345 } 346 347 static inline void init_tunnel_flow(struct flowi4 *fl4, 348 int proto, 349 __be32 daddr, __be32 saddr, 350 __be32 key, __u8 tos, int oif) 351 { 352 memset(fl4, 0, sizeof(*fl4)); 353 fl4->flowi4_oif = oif; 354 fl4->daddr = daddr; 355 fl4->saddr = saddr; 356 fl4->flowi4_tos = tos; 357 fl4->flowi4_proto = proto; 358 fl4->fl4_gre_key = key; 359 } 360 361 static int ip_tunnel_bind_dev(struct net_device *dev) 362 { 363 struct net_device *tdev = NULL; 364 struct ip_tunnel *tunnel = netdev_priv(dev); 365 const struct iphdr *iph; 366 int hlen = LL_MAX_HEADER; 367 int mtu = ETH_DATA_LEN; 368 int t_hlen = tunnel->hlen + sizeof(struct iphdr); 369 370 iph = &tunnel->parms.iph; 371 372 /* Guess output device to choose reasonable mtu and needed_headroom */ 373 if (iph->daddr) { 374 struct flowi4 fl4; 375 struct rtable *rt; 376 377 init_tunnel_flow(&fl4, iph->protocol, iph->daddr, 378 iph->saddr, tunnel->parms.o_key, 379 RT_TOS(iph->tos), tunnel->parms.link); 380 rt = ip_route_output_key(tunnel->net, &fl4); 381 382 if (!IS_ERR(rt)) { 383 tdev = rt->dst.dev; 384 tunnel_dst_set(tunnel, &rt->dst, fl4.saddr); 385 ip_rt_put(rt); 386 } 387 if (dev->type != ARPHRD_ETHER) 388 dev->flags |= IFF_POINTOPOINT; 389 } 390 391 if (!tdev && tunnel->parms.link) 392 tdev = __dev_get_by_index(tunnel->net, tunnel->parms.link); 393 394 if (tdev) { 395 hlen = tdev->hard_header_len + tdev->needed_headroom; 396 mtu = tdev->mtu; 397 } 398 399 dev->needed_headroom = t_hlen + hlen; 400 mtu -= (dev->hard_header_len + t_hlen); 401 402 if (mtu < 68) 403 mtu = 68; 404 405 return mtu; 406 } 407 408 static struct ip_tunnel *ip_tunnel_create(struct net *net, 409 struct ip_tunnel_net *itn, 410 struct ip_tunnel_parm *parms) 411 { 412 struct ip_tunnel *nt; 413 struct net_device *dev; 414 415 BUG_ON(!itn->fb_tunnel_dev); 416 dev = __ip_tunnel_create(net, itn->fb_tunnel_dev->rtnl_link_ops, parms); 417 if (IS_ERR(dev)) 418 return ERR_CAST(dev); 419 420 dev->mtu = ip_tunnel_bind_dev(dev); 421 422 nt = netdev_priv(dev); 423 ip_tunnel_add(itn, nt); 424 return nt; 425 } 426 427 int ip_tunnel_rcv(struct ip_tunnel *tunnel, struct sk_buff *skb, 428 const struct tnl_ptk_info *tpi, struct metadata_dst *tun_dst, 429 bool log_ecn_error) 430 { 431 struct pcpu_sw_netstats *tstats; 432 const struct iphdr *iph = ip_hdr(skb); 433 int err; 434 435 #ifdef CONFIG_NET_IPGRE_BROADCAST 436 if (ipv4_is_multicast(iph->daddr)) { 437 tunnel->dev->stats.multicast++; 438 skb->pkt_type = PACKET_BROADCAST; 439 } 440 #endif 441 442 if ((!(tpi->flags&TUNNEL_CSUM) && (tunnel->parms.i_flags&TUNNEL_CSUM)) || 443 ((tpi->flags&TUNNEL_CSUM) && !(tunnel->parms.i_flags&TUNNEL_CSUM))) { 444 tunnel->dev->stats.rx_crc_errors++; 445 tunnel->dev->stats.rx_errors++; 446 goto drop; 447 } 448 449 if (tunnel->parms.i_flags&TUNNEL_SEQ) { 450 if (!(tpi->flags&TUNNEL_SEQ) || 451 (tunnel->i_seqno && (s32)(ntohl(tpi->seq) - tunnel->i_seqno) < 0)) { 452 tunnel->dev->stats.rx_fifo_errors++; 453 tunnel->dev->stats.rx_errors++; 454 goto drop; 455 } 456 tunnel->i_seqno = ntohl(tpi->seq) + 1; 457 } 458 459 skb_reset_network_header(skb); 460 461 err = IP_ECN_decapsulate(iph, skb); 462 if (unlikely(err)) { 463 if (log_ecn_error) 464 net_info_ratelimited("non-ECT from %pI4 with TOS=%#x\n", 465 &iph->saddr, iph->tos); 466 if (err > 1) { 467 ++tunnel->dev->stats.rx_frame_errors; 468 ++tunnel->dev->stats.rx_errors; 469 goto drop; 470 } 471 } 472 473 tstats = this_cpu_ptr(tunnel->dev->tstats); 474 u64_stats_update_begin(&tstats->syncp); 475 tstats->rx_packets++; 476 tstats->rx_bytes += skb->len; 477 u64_stats_update_end(&tstats->syncp); 478 479 skb_scrub_packet(skb, !net_eq(tunnel->net, dev_net(tunnel->dev))); 480 481 if (tunnel->dev->type == ARPHRD_ETHER) { 482 skb->protocol = eth_type_trans(skb, tunnel->dev); 483 skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN); 484 } else { 485 skb->dev = tunnel->dev; 486 } 487 488 if (tun_dst) 489 skb_dst_set(skb, (struct dst_entry *)tun_dst); 490 491 gro_cells_receive(&tunnel->gro_cells, skb); 492 return 0; 493 494 drop: 495 kfree_skb(skb); 496 return 0; 497 } 498 EXPORT_SYMBOL_GPL(ip_tunnel_rcv); 499 500 static int ip_encap_hlen(struct ip_tunnel_encap *e) 501 { 502 const struct ip_tunnel_encap_ops *ops; 503 int hlen = -EINVAL; 504 505 if (e->type == TUNNEL_ENCAP_NONE) 506 return 0; 507 508 if (e->type >= MAX_IPTUN_ENCAP_OPS) 509 return -EINVAL; 510 511 rcu_read_lock(); 512 ops = rcu_dereference(iptun_encaps[e->type]); 513 if (likely(ops && ops->encap_hlen)) 514 hlen = ops->encap_hlen(e); 515 rcu_read_unlock(); 516 517 return hlen; 518 } 519 520 const struct ip_tunnel_encap_ops __rcu * 521 iptun_encaps[MAX_IPTUN_ENCAP_OPS] __read_mostly; 522 523 int ip_tunnel_encap_add_ops(const struct ip_tunnel_encap_ops *ops, 524 unsigned int num) 525 { 526 if (num >= MAX_IPTUN_ENCAP_OPS) 527 return -ERANGE; 528 529 return !cmpxchg((const struct ip_tunnel_encap_ops **) 530 &iptun_encaps[num], 531 NULL, ops) ? 0 : -1; 532 } 533 EXPORT_SYMBOL(ip_tunnel_encap_add_ops); 534 535 int ip_tunnel_encap_del_ops(const struct ip_tunnel_encap_ops *ops, 536 unsigned int num) 537 { 538 int ret; 539 540 if (num >= MAX_IPTUN_ENCAP_OPS) 541 return -ERANGE; 542 543 ret = (cmpxchg((const struct ip_tunnel_encap_ops **) 544 &iptun_encaps[num], 545 ops, NULL) == ops) ? 0 : -1; 546 547 synchronize_net(); 548 549 return ret; 550 } 551 EXPORT_SYMBOL(ip_tunnel_encap_del_ops); 552 553 int ip_tunnel_encap_setup(struct ip_tunnel *t, 554 struct ip_tunnel_encap *ipencap) 555 { 556 int hlen; 557 558 memset(&t->encap, 0, sizeof(t->encap)); 559 560 hlen = ip_encap_hlen(ipencap); 561 if (hlen < 0) 562 return hlen; 563 564 t->encap.type = ipencap->type; 565 t->encap.sport = ipencap->sport; 566 t->encap.dport = ipencap->dport; 567 t->encap.flags = ipencap->flags; 568 569 t->encap_hlen = hlen; 570 t->hlen = t->encap_hlen + t->tun_hlen; 571 572 return 0; 573 } 574 EXPORT_SYMBOL_GPL(ip_tunnel_encap_setup); 575 576 int ip_tunnel_encap(struct sk_buff *skb, struct ip_tunnel *t, 577 u8 *protocol, struct flowi4 *fl4) 578 { 579 const struct ip_tunnel_encap_ops *ops; 580 int ret = -EINVAL; 581 582 if (t->encap.type == TUNNEL_ENCAP_NONE) 583 return 0; 584 585 if (t->encap.type >= MAX_IPTUN_ENCAP_OPS) 586 return -EINVAL; 587 588 rcu_read_lock(); 589 ops = rcu_dereference(iptun_encaps[t->encap.type]); 590 if (likely(ops && ops->build_header)) 591 ret = ops->build_header(skb, &t->encap, protocol, fl4); 592 rcu_read_unlock(); 593 594 return ret; 595 } 596 EXPORT_SYMBOL(ip_tunnel_encap); 597 598 static int tnl_update_pmtu(struct net_device *dev, struct sk_buff *skb, 599 struct rtable *rt, __be16 df, 600 const struct iphdr *inner_iph) 601 { 602 struct ip_tunnel *tunnel = netdev_priv(dev); 603 int pkt_size = skb->len - tunnel->hlen - dev->hard_header_len; 604 int mtu; 605 606 if (df) 607 mtu = dst_mtu(&rt->dst) - dev->hard_header_len 608 - sizeof(struct iphdr) - tunnel->hlen; 609 else 610 mtu = skb_dst(skb) ? dst_mtu(skb_dst(skb)) : dev->mtu; 611 612 if (skb_dst(skb)) 613 skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, skb, mtu); 614 615 if (skb->protocol == htons(ETH_P_IP)) { 616 if (!skb_is_gso(skb) && 617 (inner_iph->frag_off & htons(IP_DF)) && 618 mtu < pkt_size) { 619 memset(IPCB(skb), 0, sizeof(*IPCB(skb))); 620 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu)); 621 return -E2BIG; 622 } 623 } 624 #if IS_ENABLED(CONFIG_IPV6) 625 else if (skb->protocol == htons(ETH_P_IPV6)) { 626 struct rt6_info *rt6 = (struct rt6_info *)skb_dst(skb); 627 628 if (rt6 && mtu < dst_mtu(skb_dst(skb)) && 629 mtu >= IPV6_MIN_MTU) { 630 if ((tunnel->parms.iph.daddr && 631 !ipv4_is_multicast(tunnel->parms.iph.daddr)) || 632 rt6->rt6i_dst.plen == 128) { 633 rt6->rt6i_flags |= RTF_MODIFIED; 634 dst_metric_set(skb_dst(skb), RTAX_MTU, mtu); 635 } 636 } 637 638 if (!skb_is_gso(skb) && mtu >= IPV6_MIN_MTU && 639 mtu < pkt_size) { 640 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); 641 return -E2BIG; 642 } 643 } 644 #endif 645 return 0; 646 } 647 648 void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev, 649 const struct iphdr *tnl_params, u8 protocol) 650 { 651 struct ip_tunnel *tunnel = netdev_priv(dev); 652 const struct iphdr *inner_iph; 653 struct flowi4 fl4; 654 u8 tos, ttl; 655 __be16 df; 656 struct rtable *rt; /* Route to the other host */ 657 unsigned int max_headroom; /* The extra header space needed */ 658 __be32 dst; 659 bool connected; 660 661 inner_iph = (const struct iphdr *)skb_inner_network_header(skb); 662 connected = (tunnel->parms.iph.daddr != 0); 663 664 dst = tnl_params->daddr; 665 if (dst == 0) { 666 /* NBMA tunnel */ 667 668 if (!skb_dst(skb)) { 669 dev->stats.tx_fifo_errors++; 670 goto tx_error; 671 } 672 673 if (skb->protocol == htons(ETH_P_IP)) { 674 rt = skb_rtable(skb); 675 dst = rt_nexthop(rt, inner_iph->daddr); 676 } 677 #if IS_ENABLED(CONFIG_IPV6) 678 else if (skb->protocol == htons(ETH_P_IPV6)) { 679 const struct in6_addr *addr6; 680 struct neighbour *neigh; 681 bool do_tx_error_icmp; 682 int addr_type; 683 684 neigh = dst_neigh_lookup(skb_dst(skb), 685 &ipv6_hdr(skb)->daddr); 686 if (!neigh) 687 goto tx_error; 688 689 addr6 = (const struct in6_addr *)&neigh->primary_key; 690 addr_type = ipv6_addr_type(addr6); 691 692 if (addr_type == IPV6_ADDR_ANY) { 693 addr6 = &ipv6_hdr(skb)->daddr; 694 addr_type = ipv6_addr_type(addr6); 695 } 696 697 if ((addr_type & IPV6_ADDR_COMPATv4) == 0) 698 do_tx_error_icmp = true; 699 else { 700 do_tx_error_icmp = false; 701 dst = addr6->s6_addr32[3]; 702 } 703 neigh_release(neigh); 704 if (do_tx_error_icmp) 705 goto tx_error_icmp; 706 } 707 #endif 708 else 709 goto tx_error; 710 711 connected = false; 712 } 713 714 tos = tnl_params->tos; 715 if (tos & 0x1) { 716 tos &= ~0x1; 717 if (skb->protocol == htons(ETH_P_IP)) { 718 tos = inner_iph->tos; 719 connected = false; 720 } else if (skb->protocol == htons(ETH_P_IPV6)) { 721 tos = ipv6_get_dsfield((const struct ipv6hdr *)inner_iph); 722 connected = false; 723 } 724 } 725 726 init_tunnel_flow(&fl4, protocol, dst, tnl_params->saddr, 727 tunnel->parms.o_key, RT_TOS(tos), tunnel->parms.link); 728 729 if (ip_tunnel_encap(skb, tunnel, &protocol, &fl4) < 0) 730 goto tx_error; 731 732 rt = connected ? tunnel_rtable_get(tunnel, 0, &fl4.saddr) : NULL; 733 734 if (!rt) { 735 rt = ip_route_output_key(tunnel->net, &fl4); 736 737 if (IS_ERR(rt)) { 738 dev->stats.tx_carrier_errors++; 739 goto tx_error; 740 } 741 if (connected) 742 tunnel_dst_set(tunnel, &rt->dst, fl4.saddr); 743 } 744 745 if (rt->dst.dev == dev) { 746 ip_rt_put(rt); 747 dev->stats.collisions++; 748 goto tx_error; 749 } 750 751 if (tnl_update_pmtu(dev, skb, rt, tnl_params->frag_off, inner_iph)) { 752 ip_rt_put(rt); 753 goto tx_error; 754 } 755 756 if (tunnel->err_count > 0) { 757 if (time_before(jiffies, 758 tunnel->err_time + IPTUNNEL_ERR_TIMEO)) { 759 tunnel->err_count--; 760 761 memset(IPCB(skb), 0, sizeof(*IPCB(skb))); 762 dst_link_failure(skb); 763 } else 764 tunnel->err_count = 0; 765 } 766 767 tos = ip_tunnel_ecn_encap(tos, inner_iph, skb); 768 ttl = tnl_params->ttl; 769 if (ttl == 0) { 770 if (skb->protocol == htons(ETH_P_IP)) 771 ttl = inner_iph->ttl; 772 #if IS_ENABLED(CONFIG_IPV6) 773 else if (skb->protocol == htons(ETH_P_IPV6)) 774 ttl = ((const struct ipv6hdr *)inner_iph)->hop_limit; 775 #endif 776 else 777 ttl = ip4_dst_hoplimit(&rt->dst); 778 } 779 780 df = tnl_params->frag_off; 781 if (skb->protocol == htons(ETH_P_IP)) 782 df |= (inner_iph->frag_off&htons(IP_DF)); 783 784 max_headroom = LL_RESERVED_SPACE(rt->dst.dev) + sizeof(struct iphdr) 785 + rt->dst.header_len + ip_encap_hlen(&tunnel->encap); 786 if (max_headroom > dev->needed_headroom) 787 dev->needed_headroom = max_headroom; 788 789 if (skb_cow_head(skb, dev->needed_headroom)) { 790 ip_rt_put(rt); 791 dev->stats.tx_dropped++; 792 kfree_skb(skb); 793 return; 794 } 795 796 iptunnel_xmit(NULL, rt, skb, fl4.saddr, fl4.daddr, protocol, tos, ttl, 797 df, !net_eq(tunnel->net, dev_net(dev))); 798 return; 799 800 #if IS_ENABLED(CONFIG_IPV6) 801 tx_error_icmp: 802 dst_link_failure(skb); 803 #endif 804 tx_error: 805 dev->stats.tx_errors++; 806 kfree_skb(skb); 807 } 808 EXPORT_SYMBOL_GPL(ip_tunnel_xmit); 809 810 static void ip_tunnel_update(struct ip_tunnel_net *itn, 811 struct ip_tunnel *t, 812 struct net_device *dev, 813 struct ip_tunnel_parm *p, 814 bool set_mtu) 815 { 816 ip_tunnel_del(itn, t); 817 t->parms.iph.saddr = p->iph.saddr; 818 t->parms.iph.daddr = p->iph.daddr; 819 t->parms.i_key = p->i_key; 820 t->parms.o_key = p->o_key; 821 if (dev->type != ARPHRD_ETHER) { 822 memcpy(dev->dev_addr, &p->iph.saddr, 4); 823 memcpy(dev->broadcast, &p->iph.daddr, 4); 824 } 825 ip_tunnel_add(itn, t); 826 827 t->parms.iph.ttl = p->iph.ttl; 828 t->parms.iph.tos = p->iph.tos; 829 t->parms.iph.frag_off = p->iph.frag_off; 830 831 if (t->parms.link != p->link) { 832 int mtu; 833 834 t->parms.link = p->link; 835 mtu = ip_tunnel_bind_dev(dev); 836 if (set_mtu) 837 dev->mtu = mtu; 838 } 839 ip_tunnel_dst_reset_all(t); 840 netdev_state_change(dev); 841 } 842 843 int ip_tunnel_ioctl(struct net_device *dev, struct ip_tunnel_parm *p, int cmd) 844 { 845 int err = 0; 846 struct ip_tunnel *t = netdev_priv(dev); 847 struct net *net = t->net; 848 struct ip_tunnel_net *itn = net_generic(net, t->ip_tnl_net_id); 849 850 BUG_ON(!itn->fb_tunnel_dev); 851 switch (cmd) { 852 case SIOCGETTUNNEL: 853 if (dev == itn->fb_tunnel_dev) { 854 t = ip_tunnel_find(itn, p, itn->fb_tunnel_dev->type); 855 if (!t) 856 t = netdev_priv(dev); 857 } 858 memcpy(p, &t->parms, sizeof(*p)); 859 break; 860 861 case SIOCADDTUNNEL: 862 case SIOCCHGTUNNEL: 863 err = -EPERM; 864 if (!ns_capable(net->user_ns, CAP_NET_ADMIN)) 865 goto done; 866 if (p->iph.ttl) 867 p->iph.frag_off |= htons(IP_DF); 868 if (!(p->i_flags & VTI_ISVTI)) { 869 if (!(p->i_flags & TUNNEL_KEY)) 870 p->i_key = 0; 871 if (!(p->o_flags & TUNNEL_KEY)) 872 p->o_key = 0; 873 } 874 875 t = ip_tunnel_find(itn, p, itn->fb_tunnel_dev->type); 876 877 if (cmd == SIOCADDTUNNEL) { 878 if (!t) { 879 t = ip_tunnel_create(net, itn, p); 880 err = PTR_ERR_OR_ZERO(t); 881 break; 882 } 883 884 err = -EEXIST; 885 break; 886 } 887 if (dev != itn->fb_tunnel_dev && cmd == SIOCCHGTUNNEL) { 888 if (t) { 889 if (t->dev != dev) { 890 err = -EEXIST; 891 break; 892 } 893 } else { 894 unsigned int nflags = 0; 895 896 if (ipv4_is_multicast(p->iph.daddr)) 897 nflags = IFF_BROADCAST; 898 else if (p->iph.daddr) 899 nflags = IFF_POINTOPOINT; 900 901 if ((dev->flags^nflags)&(IFF_POINTOPOINT|IFF_BROADCAST)) { 902 err = -EINVAL; 903 break; 904 } 905 906 t = netdev_priv(dev); 907 } 908 } 909 910 if (t) { 911 err = 0; 912 ip_tunnel_update(itn, t, dev, p, true); 913 } else { 914 err = -ENOENT; 915 } 916 break; 917 918 case SIOCDELTUNNEL: 919 err = -EPERM; 920 if (!ns_capable(net->user_ns, CAP_NET_ADMIN)) 921 goto done; 922 923 if (dev == itn->fb_tunnel_dev) { 924 err = -ENOENT; 925 t = ip_tunnel_find(itn, p, itn->fb_tunnel_dev->type); 926 if (!t) 927 goto done; 928 err = -EPERM; 929 if (t == netdev_priv(itn->fb_tunnel_dev)) 930 goto done; 931 dev = t->dev; 932 } 933 unregister_netdevice(dev); 934 err = 0; 935 break; 936 937 default: 938 err = -EINVAL; 939 } 940 941 done: 942 return err; 943 } 944 EXPORT_SYMBOL_GPL(ip_tunnel_ioctl); 945 946 int __ip_tunnel_change_mtu(struct net_device *dev, int new_mtu, bool strict) 947 { 948 struct ip_tunnel *tunnel = netdev_priv(dev); 949 int t_hlen = tunnel->hlen + sizeof(struct iphdr); 950 int max_mtu = 0xFFF8 - dev->hard_header_len - t_hlen; 951 952 if (new_mtu < 68) 953 return -EINVAL; 954 955 if (new_mtu > max_mtu) { 956 if (strict) 957 return -EINVAL; 958 959 new_mtu = max_mtu; 960 } 961 962 dev->mtu = new_mtu; 963 return 0; 964 } 965 EXPORT_SYMBOL_GPL(__ip_tunnel_change_mtu); 966 967 int ip_tunnel_change_mtu(struct net_device *dev, int new_mtu) 968 { 969 return __ip_tunnel_change_mtu(dev, new_mtu, true); 970 } 971 EXPORT_SYMBOL_GPL(ip_tunnel_change_mtu); 972 973 static void ip_tunnel_dev_free(struct net_device *dev) 974 { 975 struct ip_tunnel *tunnel = netdev_priv(dev); 976 977 gro_cells_destroy(&tunnel->gro_cells); 978 free_percpu(tunnel->dst_cache); 979 free_percpu(dev->tstats); 980 free_netdev(dev); 981 } 982 983 void ip_tunnel_dellink(struct net_device *dev, struct list_head *head) 984 { 985 struct ip_tunnel *tunnel = netdev_priv(dev); 986 struct ip_tunnel_net *itn; 987 988 itn = net_generic(tunnel->net, tunnel->ip_tnl_net_id); 989 990 if (itn->fb_tunnel_dev != dev) { 991 ip_tunnel_del(itn, netdev_priv(dev)); 992 unregister_netdevice_queue(dev, head); 993 } 994 } 995 EXPORT_SYMBOL_GPL(ip_tunnel_dellink); 996 997 struct net *ip_tunnel_get_link_net(const struct net_device *dev) 998 { 999 struct ip_tunnel *tunnel = netdev_priv(dev); 1000 1001 return tunnel->net; 1002 } 1003 EXPORT_SYMBOL(ip_tunnel_get_link_net); 1004 1005 int ip_tunnel_get_iflink(const struct net_device *dev) 1006 { 1007 struct ip_tunnel *tunnel = netdev_priv(dev); 1008 1009 return tunnel->parms.link; 1010 } 1011 EXPORT_SYMBOL(ip_tunnel_get_iflink); 1012 1013 int ip_tunnel_init_net(struct net *net, int ip_tnl_net_id, 1014 struct rtnl_link_ops *ops, char *devname) 1015 { 1016 struct ip_tunnel_net *itn = net_generic(net, ip_tnl_net_id); 1017 struct ip_tunnel_parm parms; 1018 unsigned int i; 1019 1020 for (i = 0; i < IP_TNL_HASH_SIZE; i++) 1021 INIT_HLIST_HEAD(&itn->tunnels[i]); 1022 1023 if (!ops) { 1024 itn->fb_tunnel_dev = NULL; 1025 return 0; 1026 } 1027 1028 memset(&parms, 0, sizeof(parms)); 1029 if (devname) 1030 strlcpy(parms.name, devname, IFNAMSIZ); 1031 1032 rtnl_lock(); 1033 itn->fb_tunnel_dev = __ip_tunnel_create(net, ops, &parms); 1034 /* FB netdevice is special: we have one, and only one per netns. 1035 * Allowing to move it to another netns is clearly unsafe. 1036 */ 1037 if (!IS_ERR(itn->fb_tunnel_dev)) { 1038 itn->fb_tunnel_dev->features |= NETIF_F_NETNS_LOCAL; 1039 itn->fb_tunnel_dev->mtu = ip_tunnel_bind_dev(itn->fb_tunnel_dev); 1040 ip_tunnel_add(itn, netdev_priv(itn->fb_tunnel_dev)); 1041 } 1042 rtnl_unlock(); 1043 1044 return PTR_ERR_OR_ZERO(itn->fb_tunnel_dev); 1045 } 1046 EXPORT_SYMBOL_GPL(ip_tunnel_init_net); 1047 1048 static void ip_tunnel_destroy(struct ip_tunnel_net *itn, struct list_head *head, 1049 struct rtnl_link_ops *ops) 1050 { 1051 struct net *net = dev_net(itn->fb_tunnel_dev); 1052 struct net_device *dev, *aux; 1053 int h; 1054 1055 for_each_netdev_safe(net, dev, aux) 1056 if (dev->rtnl_link_ops == ops) 1057 unregister_netdevice_queue(dev, head); 1058 1059 for (h = 0; h < IP_TNL_HASH_SIZE; h++) { 1060 struct ip_tunnel *t; 1061 struct hlist_node *n; 1062 struct hlist_head *thead = &itn->tunnels[h]; 1063 1064 hlist_for_each_entry_safe(t, n, thead, hash_node) 1065 /* If dev is in the same netns, it has already 1066 * been added to the list by the previous loop. 1067 */ 1068 if (!net_eq(dev_net(t->dev), net)) 1069 unregister_netdevice_queue(t->dev, head); 1070 } 1071 } 1072 1073 void ip_tunnel_delete_net(struct ip_tunnel_net *itn, struct rtnl_link_ops *ops) 1074 { 1075 LIST_HEAD(list); 1076 1077 rtnl_lock(); 1078 ip_tunnel_destroy(itn, &list, ops); 1079 unregister_netdevice_many(&list); 1080 rtnl_unlock(); 1081 } 1082 EXPORT_SYMBOL_GPL(ip_tunnel_delete_net); 1083 1084 int ip_tunnel_newlink(struct net_device *dev, struct nlattr *tb[], 1085 struct ip_tunnel_parm *p) 1086 { 1087 struct ip_tunnel *nt; 1088 struct net *net = dev_net(dev); 1089 struct ip_tunnel_net *itn; 1090 int mtu; 1091 int err; 1092 1093 nt = netdev_priv(dev); 1094 itn = net_generic(net, nt->ip_tnl_net_id); 1095 1096 if (nt->collect_md) { 1097 if (rtnl_dereference(itn->collect_md_tun)) 1098 return -EEXIST; 1099 } else { 1100 if (ip_tunnel_find(itn, p, dev->type)) 1101 return -EEXIST; 1102 } 1103 1104 nt->net = net; 1105 nt->parms = *p; 1106 err = register_netdevice(dev); 1107 if (err) 1108 goto out; 1109 1110 if (dev->type == ARPHRD_ETHER && !tb[IFLA_ADDRESS]) 1111 eth_hw_addr_random(dev); 1112 1113 mtu = ip_tunnel_bind_dev(dev); 1114 if (!tb[IFLA_MTU]) 1115 dev->mtu = mtu; 1116 1117 ip_tunnel_add(itn, nt); 1118 out: 1119 return err; 1120 } 1121 EXPORT_SYMBOL_GPL(ip_tunnel_newlink); 1122 1123 int ip_tunnel_changelink(struct net_device *dev, struct nlattr *tb[], 1124 struct ip_tunnel_parm *p) 1125 { 1126 struct ip_tunnel *t; 1127 struct ip_tunnel *tunnel = netdev_priv(dev); 1128 struct net *net = tunnel->net; 1129 struct ip_tunnel_net *itn = net_generic(net, tunnel->ip_tnl_net_id); 1130 1131 if (dev == itn->fb_tunnel_dev) 1132 return -EINVAL; 1133 1134 t = ip_tunnel_find(itn, p, dev->type); 1135 1136 if (t) { 1137 if (t->dev != dev) 1138 return -EEXIST; 1139 } else { 1140 t = tunnel; 1141 1142 if (dev->type != ARPHRD_ETHER) { 1143 unsigned int nflags = 0; 1144 1145 if (ipv4_is_multicast(p->iph.daddr)) 1146 nflags = IFF_BROADCAST; 1147 else if (p->iph.daddr) 1148 nflags = IFF_POINTOPOINT; 1149 1150 if ((dev->flags ^ nflags) & 1151 (IFF_POINTOPOINT | IFF_BROADCAST)) 1152 return -EINVAL; 1153 } 1154 } 1155 1156 ip_tunnel_update(itn, t, dev, p, !tb[IFLA_MTU]); 1157 return 0; 1158 } 1159 EXPORT_SYMBOL_GPL(ip_tunnel_changelink); 1160 1161 int ip_tunnel_init(struct net_device *dev) 1162 { 1163 struct ip_tunnel *tunnel = netdev_priv(dev); 1164 struct iphdr *iph = &tunnel->parms.iph; 1165 int err; 1166 1167 dev->destructor = ip_tunnel_dev_free; 1168 dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats); 1169 if (!dev->tstats) 1170 return -ENOMEM; 1171 1172 tunnel->dst_cache = alloc_percpu(struct ip_tunnel_dst); 1173 if (!tunnel->dst_cache) { 1174 free_percpu(dev->tstats); 1175 return -ENOMEM; 1176 } 1177 1178 err = gro_cells_init(&tunnel->gro_cells, dev); 1179 if (err) { 1180 free_percpu(tunnel->dst_cache); 1181 free_percpu(dev->tstats); 1182 return err; 1183 } 1184 1185 tunnel->dev = dev; 1186 tunnel->net = dev_net(dev); 1187 strcpy(tunnel->parms.name, dev->name); 1188 iph->version = 4; 1189 iph->ihl = 5; 1190 1191 if (tunnel->collect_md) { 1192 dev->features |= NETIF_F_NETNS_LOCAL; 1193 netif_keep_dst(dev); 1194 } 1195 return 0; 1196 } 1197 EXPORT_SYMBOL_GPL(ip_tunnel_init); 1198 1199 void ip_tunnel_uninit(struct net_device *dev) 1200 { 1201 struct ip_tunnel *tunnel = netdev_priv(dev); 1202 struct net *net = tunnel->net; 1203 struct ip_tunnel_net *itn; 1204 1205 itn = net_generic(net, tunnel->ip_tnl_net_id); 1206 /* fb_tunnel_dev will be unregisted in net-exit call. */ 1207 if (itn->fb_tunnel_dev != dev) 1208 ip_tunnel_del(itn, netdev_priv(dev)); 1209 1210 ip_tunnel_dst_reset_all(tunnel); 1211 } 1212 EXPORT_SYMBOL_GPL(ip_tunnel_uninit); 1213 1214 /* Do least required initialization, rest of init is done in tunnel_init call */ 1215 void ip_tunnel_setup(struct net_device *dev, int net_id) 1216 { 1217 struct ip_tunnel *tunnel = netdev_priv(dev); 1218 tunnel->ip_tnl_net_id = net_id; 1219 } 1220 EXPORT_SYMBOL_GPL(ip_tunnel_setup); 1221 1222 MODULE_LICENSE("GPL"); 1223