1 /* 2 * Copyright (c) 2013 Nicira, Inc. 3 * 4 * This program is free software; you can redistribute it and/or 5 * modify it under the terms of version 2 of the GNU General Public 6 * License as published by the Free Software Foundation. 7 * 8 * This program is distributed in the hope that it will be useful, but 9 * WITHOUT ANY WARRANTY; without even the implied warranty of 10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 11 * General Public License for more details. 12 * 13 * You should have received a copy of the GNU General Public License 14 * along with this program; if not, write to the Free Software 15 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 16 * 02110-1301, USA 17 */ 18 19 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 20 21 #include <linux/capability.h> 22 #include <linux/module.h> 23 #include <linux/types.h> 24 #include <linux/kernel.h> 25 #include <linux/slab.h> 26 #include <linux/uaccess.h> 27 #include <linux/skbuff.h> 28 #include <linux/netdevice.h> 29 #include <linux/in.h> 30 #include <linux/tcp.h> 31 #include <linux/udp.h> 32 #include <linux/if_arp.h> 33 #include <linux/mroute.h> 34 #include <linux/init.h> 35 #include <linux/in6.h> 36 #include <linux/inetdevice.h> 37 #include <linux/igmp.h> 38 #include <linux/netfilter_ipv4.h> 39 #include <linux/etherdevice.h> 40 #include <linux/if_ether.h> 41 #include <linux/if_vlan.h> 42 #include <linux/rculist.h> 43 #include <linux/err.h> 44 45 #include <net/sock.h> 46 #include <net/ip.h> 47 #include <net/icmp.h> 48 #include <net/protocol.h> 49 #include <net/ip_tunnels.h> 50 #include <net/arp.h> 51 #include <net/checksum.h> 52 #include <net/dsfield.h> 53 #include <net/inet_ecn.h> 54 #include <net/xfrm.h> 55 #include <net/net_namespace.h> 56 #include <net/netns/generic.h> 57 #include <net/rtnetlink.h> 58 #include <net/udp.h> 59 60 #if IS_ENABLED(CONFIG_IPV6) 61 #include <net/ipv6.h> 62 #include <net/ip6_fib.h> 63 #include <net/ip6_route.h> 64 #endif 65 66 static unsigned int ip_tunnel_hash(__be32 key, __be32 remote) 67 { 68 return hash_32((__force u32)key ^ (__force u32)remote, 69 IP_TNL_HASH_BITS); 70 } 71 72 static void __tunnel_dst_set(struct ip_tunnel_dst *idst, 73 struct dst_entry *dst, __be32 saddr) 74 { 75 struct dst_entry *old_dst; 76 77 dst_clone(dst); 78 old_dst = xchg((__force struct dst_entry **)&idst->dst, dst); 79 dst_release(old_dst); 80 idst->saddr = saddr; 81 } 82 83 static noinline void tunnel_dst_set(struct ip_tunnel *t, 84 struct dst_entry *dst, __be32 saddr) 85 { 86 __tunnel_dst_set(raw_cpu_ptr(t->dst_cache), dst, saddr); 87 } 88 89 static void tunnel_dst_reset(struct ip_tunnel *t) 90 { 91 tunnel_dst_set(t, NULL, 0); 92 } 93 94 void ip_tunnel_dst_reset_all(struct ip_tunnel *t) 95 { 96 int i; 97 98 for_each_possible_cpu(i) 99 __tunnel_dst_set(per_cpu_ptr(t->dst_cache, i), NULL, 0); 100 } 101 EXPORT_SYMBOL(ip_tunnel_dst_reset_all); 102 103 static struct rtable *tunnel_rtable_get(struct ip_tunnel *t, 104 u32 cookie, __be32 *saddr) 105 { 106 struct ip_tunnel_dst *idst; 107 struct dst_entry *dst; 108 109 rcu_read_lock(); 110 idst = raw_cpu_ptr(t->dst_cache); 111 dst = rcu_dereference(idst->dst); 112 if (dst && !atomic_inc_not_zero(&dst->__refcnt)) 113 dst = NULL; 114 if (dst) { 115 if (!dst->obsolete || dst->ops->check(dst, cookie)) { 116 *saddr = idst->saddr; 117 } else { 118 tunnel_dst_reset(t); 119 dst_release(dst); 120 dst = NULL; 121 } 122 } 123 rcu_read_unlock(); 124 return (struct rtable *)dst; 125 } 126 127 static bool ip_tunnel_key_match(const struct ip_tunnel_parm *p, 128 __be16 flags, __be32 key) 129 { 130 if (p->i_flags & TUNNEL_KEY) { 131 if (flags & TUNNEL_KEY) 132 return key == p->i_key; 133 else 134 /* key expected, none present */ 135 return false; 136 } else 137 return !(flags & TUNNEL_KEY); 138 } 139 140 /* Fallback tunnel: no source, no destination, no key, no options 141 142 Tunnel hash table: 143 We require exact key match i.e. if a key is present in packet 144 it will match only tunnel with the same key; if it is not present, 145 it will match only keyless tunnel. 146 147 All keysless packets, if not matched configured keyless tunnels 148 will match fallback tunnel. 149 Given src, dst and key, find appropriate for input tunnel. 150 */ 151 struct ip_tunnel *ip_tunnel_lookup(struct ip_tunnel_net *itn, 152 int link, __be16 flags, 153 __be32 remote, __be32 local, 154 __be32 key) 155 { 156 unsigned int hash; 157 struct ip_tunnel *t, *cand = NULL; 158 struct hlist_head *head; 159 160 hash = ip_tunnel_hash(key, remote); 161 head = &itn->tunnels[hash]; 162 163 hlist_for_each_entry_rcu(t, head, hash_node) { 164 if (local != t->parms.iph.saddr || 165 remote != t->parms.iph.daddr || 166 !(t->dev->flags & IFF_UP)) 167 continue; 168 169 if (!ip_tunnel_key_match(&t->parms, flags, key)) 170 continue; 171 172 if (t->parms.link == link) 173 return t; 174 else 175 cand = t; 176 } 177 178 hlist_for_each_entry_rcu(t, head, hash_node) { 179 if (remote != t->parms.iph.daddr || 180 t->parms.iph.saddr != 0 || 181 !(t->dev->flags & IFF_UP)) 182 continue; 183 184 if (!ip_tunnel_key_match(&t->parms, flags, key)) 185 continue; 186 187 if (t->parms.link == link) 188 return t; 189 else if (!cand) 190 cand = t; 191 } 192 193 hash = ip_tunnel_hash(key, 0); 194 head = &itn->tunnels[hash]; 195 196 hlist_for_each_entry_rcu(t, head, hash_node) { 197 if ((local != t->parms.iph.saddr || t->parms.iph.daddr != 0) && 198 (local != t->parms.iph.daddr || !ipv4_is_multicast(local))) 199 continue; 200 201 if (!(t->dev->flags & IFF_UP)) 202 continue; 203 204 if (!ip_tunnel_key_match(&t->parms, flags, key)) 205 continue; 206 207 if (t->parms.link == link) 208 return t; 209 else if (!cand) 210 cand = t; 211 } 212 213 if (flags & TUNNEL_NO_KEY) 214 goto skip_key_lookup; 215 216 hlist_for_each_entry_rcu(t, head, hash_node) { 217 if (t->parms.i_key != key || 218 t->parms.iph.saddr != 0 || 219 t->parms.iph.daddr != 0 || 220 !(t->dev->flags & IFF_UP)) 221 continue; 222 223 if (t->parms.link == link) 224 return t; 225 else if (!cand) 226 cand = t; 227 } 228 229 skip_key_lookup: 230 if (cand) 231 return cand; 232 233 if (itn->fb_tunnel_dev && itn->fb_tunnel_dev->flags & IFF_UP) 234 return netdev_priv(itn->fb_tunnel_dev); 235 236 237 return NULL; 238 } 239 EXPORT_SYMBOL_GPL(ip_tunnel_lookup); 240 241 static struct hlist_head *ip_bucket(struct ip_tunnel_net *itn, 242 struct ip_tunnel_parm *parms) 243 { 244 unsigned int h; 245 __be32 remote; 246 __be32 i_key = parms->i_key; 247 248 if (parms->iph.daddr && !ipv4_is_multicast(parms->iph.daddr)) 249 remote = parms->iph.daddr; 250 else 251 remote = 0; 252 253 if (!(parms->i_flags & TUNNEL_KEY) && (parms->i_flags & VTI_ISVTI)) 254 i_key = 0; 255 256 h = ip_tunnel_hash(i_key, remote); 257 return &itn->tunnels[h]; 258 } 259 260 static void ip_tunnel_add(struct ip_tunnel_net *itn, struct ip_tunnel *t) 261 { 262 struct hlist_head *head = ip_bucket(itn, &t->parms); 263 264 hlist_add_head_rcu(&t->hash_node, head); 265 } 266 267 static void ip_tunnel_del(struct ip_tunnel *t) 268 { 269 hlist_del_init_rcu(&t->hash_node); 270 } 271 272 static struct ip_tunnel *ip_tunnel_find(struct ip_tunnel_net *itn, 273 struct ip_tunnel_parm *parms, 274 int type) 275 { 276 __be32 remote = parms->iph.daddr; 277 __be32 local = parms->iph.saddr; 278 __be32 key = parms->i_key; 279 __be16 flags = parms->i_flags; 280 int link = parms->link; 281 struct ip_tunnel *t = NULL; 282 struct hlist_head *head = ip_bucket(itn, parms); 283 284 hlist_for_each_entry_rcu(t, head, hash_node) { 285 if (local == t->parms.iph.saddr && 286 remote == t->parms.iph.daddr && 287 link == t->parms.link && 288 type == t->dev->type && 289 ip_tunnel_key_match(&t->parms, flags, key)) 290 break; 291 } 292 return t; 293 } 294 295 static struct net_device *__ip_tunnel_create(struct net *net, 296 const struct rtnl_link_ops *ops, 297 struct ip_tunnel_parm *parms) 298 { 299 int err; 300 struct ip_tunnel *tunnel; 301 struct net_device *dev; 302 char name[IFNAMSIZ]; 303 304 if (parms->name[0]) 305 strlcpy(name, parms->name, IFNAMSIZ); 306 else { 307 if (strlen(ops->kind) > (IFNAMSIZ - 3)) { 308 err = -E2BIG; 309 goto failed; 310 } 311 strlcpy(name, ops->kind, IFNAMSIZ); 312 strncat(name, "%d", 2); 313 } 314 315 ASSERT_RTNL(); 316 dev = alloc_netdev(ops->priv_size, name, NET_NAME_UNKNOWN, ops->setup); 317 if (!dev) { 318 err = -ENOMEM; 319 goto failed; 320 } 321 dev_net_set(dev, net); 322 323 dev->rtnl_link_ops = ops; 324 325 tunnel = netdev_priv(dev); 326 tunnel->parms = *parms; 327 tunnel->net = net; 328 329 err = register_netdevice(dev); 330 if (err) 331 goto failed_free; 332 333 return dev; 334 335 failed_free: 336 free_netdev(dev); 337 failed: 338 return ERR_PTR(err); 339 } 340 341 static inline void init_tunnel_flow(struct flowi4 *fl4, 342 int proto, 343 __be32 daddr, __be32 saddr, 344 __be32 key, __u8 tos, int oif) 345 { 346 memset(fl4, 0, sizeof(*fl4)); 347 fl4->flowi4_oif = oif; 348 fl4->daddr = daddr; 349 fl4->saddr = saddr; 350 fl4->flowi4_tos = tos; 351 fl4->flowi4_proto = proto; 352 fl4->fl4_gre_key = key; 353 } 354 355 static int ip_tunnel_bind_dev(struct net_device *dev) 356 { 357 struct net_device *tdev = NULL; 358 struct ip_tunnel *tunnel = netdev_priv(dev); 359 const struct iphdr *iph; 360 int hlen = LL_MAX_HEADER; 361 int mtu = ETH_DATA_LEN; 362 int t_hlen = tunnel->hlen + sizeof(struct iphdr); 363 364 iph = &tunnel->parms.iph; 365 366 /* Guess output device to choose reasonable mtu and needed_headroom */ 367 if (iph->daddr) { 368 struct flowi4 fl4; 369 struct rtable *rt; 370 371 init_tunnel_flow(&fl4, iph->protocol, iph->daddr, 372 iph->saddr, tunnel->parms.o_key, 373 RT_TOS(iph->tos), tunnel->parms.link); 374 rt = ip_route_output_key(tunnel->net, &fl4); 375 376 if (!IS_ERR(rt)) { 377 tdev = rt->dst.dev; 378 tunnel_dst_set(tunnel, &rt->dst, fl4.saddr); 379 ip_rt_put(rt); 380 } 381 if (dev->type != ARPHRD_ETHER) 382 dev->flags |= IFF_POINTOPOINT; 383 } 384 385 if (!tdev && tunnel->parms.link) 386 tdev = __dev_get_by_index(tunnel->net, tunnel->parms.link); 387 388 if (tdev) { 389 hlen = tdev->hard_header_len + tdev->needed_headroom; 390 mtu = tdev->mtu; 391 } 392 393 dev->needed_headroom = t_hlen + hlen; 394 mtu -= (dev->hard_header_len + t_hlen); 395 396 if (mtu < 68) 397 mtu = 68; 398 399 return mtu; 400 } 401 402 static struct ip_tunnel *ip_tunnel_create(struct net *net, 403 struct ip_tunnel_net *itn, 404 struct ip_tunnel_parm *parms) 405 { 406 struct ip_tunnel *nt; 407 struct net_device *dev; 408 409 BUG_ON(!itn->fb_tunnel_dev); 410 dev = __ip_tunnel_create(net, itn->fb_tunnel_dev->rtnl_link_ops, parms); 411 if (IS_ERR(dev)) 412 return ERR_CAST(dev); 413 414 dev->mtu = ip_tunnel_bind_dev(dev); 415 416 nt = netdev_priv(dev); 417 ip_tunnel_add(itn, nt); 418 return nt; 419 } 420 421 int ip_tunnel_rcv(struct ip_tunnel *tunnel, struct sk_buff *skb, 422 const struct tnl_ptk_info *tpi, bool log_ecn_error) 423 { 424 struct pcpu_sw_netstats *tstats; 425 const struct iphdr *iph = ip_hdr(skb); 426 int err; 427 428 #ifdef CONFIG_NET_IPGRE_BROADCAST 429 if (ipv4_is_multicast(iph->daddr)) { 430 tunnel->dev->stats.multicast++; 431 skb->pkt_type = PACKET_BROADCAST; 432 } 433 #endif 434 435 if ((!(tpi->flags&TUNNEL_CSUM) && (tunnel->parms.i_flags&TUNNEL_CSUM)) || 436 ((tpi->flags&TUNNEL_CSUM) && !(tunnel->parms.i_flags&TUNNEL_CSUM))) { 437 tunnel->dev->stats.rx_crc_errors++; 438 tunnel->dev->stats.rx_errors++; 439 goto drop; 440 } 441 442 if (tunnel->parms.i_flags&TUNNEL_SEQ) { 443 if (!(tpi->flags&TUNNEL_SEQ) || 444 (tunnel->i_seqno && (s32)(ntohl(tpi->seq) - tunnel->i_seqno) < 0)) { 445 tunnel->dev->stats.rx_fifo_errors++; 446 tunnel->dev->stats.rx_errors++; 447 goto drop; 448 } 449 tunnel->i_seqno = ntohl(tpi->seq) + 1; 450 } 451 452 skb_reset_network_header(skb); 453 454 err = IP_ECN_decapsulate(iph, skb); 455 if (unlikely(err)) { 456 if (log_ecn_error) 457 net_info_ratelimited("non-ECT from %pI4 with TOS=%#x\n", 458 &iph->saddr, iph->tos); 459 if (err > 1) { 460 ++tunnel->dev->stats.rx_frame_errors; 461 ++tunnel->dev->stats.rx_errors; 462 goto drop; 463 } 464 } 465 466 tstats = this_cpu_ptr(tunnel->dev->tstats); 467 u64_stats_update_begin(&tstats->syncp); 468 tstats->rx_packets++; 469 tstats->rx_bytes += skb->len; 470 u64_stats_update_end(&tstats->syncp); 471 472 skb_scrub_packet(skb, !net_eq(tunnel->net, dev_net(tunnel->dev))); 473 474 if (tunnel->dev->type == ARPHRD_ETHER) { 475 skb->protocol = eth_type_trans(skb, tunnel->dev); 476 skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN); 477 } else { 478 skb->dev = tunnel->dev; 479 } 480 481 gro_cells_receive(&tunnel->gro_cells, skb); 482 return 0; 483 484 drop: 485 kfree_skb(skb); 486 return 0; 487 } 488 EXPORT_SYMBOL_GPL(ip_tunnel_rcv); 489 490 static int ip_encap_hlen(struct ip_tunnel_encap *e) 491 { 492 const struct ip_tunnel_encap_ops *ops; 493 int hlen = -EINVAL; 494 495 if (e->type == TUNNEL_ENCAP_NONE) 496 return 0; 497 498 if (e->type >= MAX_IPTUN_ENCAP_OPS) 499 return -EINVAL; 500 501 rcu_read_lock(); 502 ops = rcu_dereference(iptun_encaps[e->type]); 503 if (likely(ops && ops->encap_hlen)) 504 hlen = ops->encap_hlen(e); 505 rcu_read_unlock(); 506 507 return hlen; 508 } 509 510 const struct ip_tunnel_encap_ops __rcu * 511 iptun_encaps[MAX_IPTUN_ENCAP_OPS] __read_mostly; 512 513 int ip_tunnel_encap_add_ops(const struct ip_tunnel_encap_ops *ops, 514 unsigned int num) 515 { 516 if (num >= MAX_IPTUN_ENCAP_OPS) 517 return -ERANGE; 518 519 return !cmpxchg((const struct ip_tunnel_encap_ops **) 520 &iptun_encaps[num], 521 NULL, ops) ? 0 : -1; 522 } 523 EXPORT_SYMBOL(ip_tunnel_encap_add_ops); 524 525 int ip_tunnel_encap_del_ops(const struct ip_tunnel_encap_ops *ops, 526 unsigned int num) 527 { 528 int ret; 529 530 if (num >= MAX_IPTUN_ENCAP_OPS) 531 return -ERANGE; 532 533 ret = (cmpxchg((const struct ip_tunnel_encap_ops **) 534 &iptun_encaps[num], 535 ops, NULL) == ops) ? 0 : -1; 536 537 synchronize_net(); 538 539 return ret; 540 } 541 EXPORT_SYMBOL(ip_tunnel_encap_del_ops); 542 543 int ip_tunnel_encap_setup(struct ip_tunnel *t, 544 struct ip_tunnel_encap *ipencap) 545 { 546 int hlen; 547 548 memset(&t->encap, 0, sizeof(t->encap)); 549 550 hlen = ip_encap_hlen(ipencap); 551 if (hlen < 0) 552 return hlen; 553 554 t->encap.type = ipencap->type; 555 t->encap.sport = ipencap->sport; 556 t->encap.dport = ipencap->dport; 557 t->encap.flags = ipencap->flags; 558 559 t->encap_hlen = hlen; 560 t->hlen = t->encap_hlen + t->tun_hlen; 561 562 return 0; 563 } 564 EXPORT_SYMBOL_GPL(ip_tunnel_encap_setup); 565 566 int ip_tunnel_encap(struct sk_buff *skb, struct ip_tunnel *t, 567 u8 *protocol, struct flowi4 *fl4) 568 { 569 const struct ip_tunnel_encap_ops *ops; 570 int ret = -EINVAL; 571 572 if (t->encap.type == TUNNEL_ENCAP_NONE) 573 return 0; 574 575 if (t->encap.type >= MAX_IPTUN_ENCAP_OPS) 576 return -EINVAL; 577 578 rcu_read_lock(); 579 ops = rcu_dereference(iptun_encaps[t->encap.type]); 580 if (likely(ops && ops->build_header)) 581 ret = ops->build_header(skb, &t->encap, protocol, fl4); 582 rcu_read_unlock(); 583 584 return ret; 585 } 586 EXPORT_SYMBOL(ip_tunnel_encap); 587 588 static int tnl_update_pmtu(struct net_device *dev, struct sk_buff *skb, 589 struct rtable *rt, __be16 df) 590 { 591 struct ip_tunnel *tunnel = netdev_priv(dev); 592 int pkt_size = skb->len - tunnel->hlen - dev->hard_header_len; 593 int mtu; 594 595 if (df) 596 mtu = dst_mtu(&rt->dst) - dev->hard_header_len 597 - sizeof(struct iphdr) - tunnel->hlen; 598 else 599 mtu = skb_dst(skb) ? dst_mtu(skb_dst(skb)) : dev->mtu; 600 601 if (skb_dst(skb)) 602 skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, skb, mtu); 603 604 if (skb->protocol == htons(ETH_P_IP)) { 605 if (!skb_is_gso(skb) && 606 (df & htons(IP_DF)) && mtu < pkt_size) { 607 memset(IPCB(skb), 0, sizeof(*IPCB(skb))); 608 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu)); 609 return -E2BIG; 610 } 611 } 612 #if IS_ENABLED(CONFIG_IPV6) 613 else if (skb->protocol == htons(ETH_P_IPV6)) { 614 struct rt6_info *rt6 = (struct rt6_info *)skb_dst(skb); 615 616 if (rt6 && mtu < dst_mtu(skb_dst(skb)) && 617 mtu >= IPV6_MIN_MTU) { 618 if ((tunnel->parms.iph.daddr && 619 !ipv4_is_multicast(tunnel->parms.iph.daddr)) || 620 rt6->rt6i_dst.plen == 128) { 621 rt6->rt6i_flags |= RTF_MODIFIED; 622 dst_metric_set(skb_dst(skb), RTAX_MTU, mtu); 623 } 624 } 625 626 if (!skb_is_gso(skb) && mtu >= IPV6_MIN_MTU && 627 mtu < pkt_size) { 628 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); 629 return -E2BIG; 630 } 631 } 632 #endif 633 return 0; 634 } 635 636 void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev, 637 const struct iphdr *tnl_params, u8 protocol) 638 { 639 struct ip_tunnel *tunnel = netdev_priv(dev); 640 const struct iphdr *inner_iph; 641 struct flowi4 fl4; 642 u8 tos, ttl; 643 __be16 df; 644 struct rtable *rt; /* Route to the other host */ 645 unsigned int max_headroom; /* The extra header space needed */ 646 __be32 dst; 647 int err; 648 bool connected; 649 650 inner_iph = (const struct iphdr *)skb_inner_network_header(skb); 651 connected = (tunnel->parms.iph.daddr != 0); 652 653 dst = tnl_params->daddr; 654 if (dst == 0) { 655 /* NBMA tunnel */ 656 657 if (!skb_dst(skb)) { 658 dev->stats.tx_fifo_errors++; 659 goto tx_error; 660 } 661 662 if (skb->protocol == htons(ETH_P_IP)) { 663 rt = skb_rtable(skb); 664 dst = rt_nexthop(rt, inner_iph->daddr); 665 } 666 #if IS_ENABLED(CONFIG_IPV6) 667 else if (skb->protocol == htons(ETH_P_IPV6)) { 668 const struct in6_addr *addr6; 669 struct neighbour *neigh; 670 bool do_tx_error_icmp; 671 int addr_type; 672 673 neigh = dst_neigh_lookup(skb_dst(skb), 674 &ipv6_hdr(skb)->daddr); 675 if (!neigh) 676 goto tx_error; 677 678 addr6 = (const struct in6_addr *)&neigh->primary_key; 679 addr_type = ipv6_addr_type(addr6); 680 681 if (addr_type == IPV6_ADDR_ANY) { 682 addr6 = &ipv6_hdr(skb)->daddr; 683 addr_type = ipv6_addr_type(addr6); 684 } 685 686 if ((addr_type & IPV6_ADDR_COMPATv4) == 0) 687 do_tx_error_icmp = true; 688 else { 689 do_tx_error_icmp = false; 690 dst = addr6->s6_addr32[3]; 691 } 692 neigh_release(neigh); 693 if (do_tx_error_icmp) 694 goto tx_error_icmp; 695 } 696 #endif 697 else 698 goto tx_error; 699 700 connected = false; 701 } 702 703 tos = tnl_params->tos; 704 if (tos & 0x1) { 705 tos &= ~0x1; 706 if (skb->protocol == htons(ETH_P_IP)) { 707 tos = inner_iph->tos; 708 connected = false; 709 } else if (skb->protocol == htons(ETH_P_IPV6)) { 710 tos = ipv6_get_dsfield((const struct ipv6hdr *)inner_iph); 711 connected = false; 712 } 713 } 714 715 init_tunnel_flow(&fl4, protocol, dst, tnl_params->saddr, 716 tunnel->parms.o_key, RT_TOS(tos), tunnel->parms.link); 717 718 if (ip_tunnel_encap(skb, tunnel, &protocol, &fl4) < 0) 719 goto tx_error; 720 721 rt = connected ? tunnel_rtable_get(tunnel, 0, &fl4.saddr) : NULL; 722 723 if (!rt) { 724 rt = ip_route_output_key(tunnel->net, &fl4); 725 726 if (IS_ERR(rt)) { 727 dev->stats.tx_carrier_errors++; 728 goto tx_error; 729 } 730 if (connected) 731 tunnel_dst_set(tunnel, &rt->dst, fl4.saddr); 732 } 733 734 if (rt->dst.dev == dev) { 735 ip_rt_put(rt); 736 dev->stats.collisions++; 737 goto tx_error; 738 } 739 740 if (tnl_update_pmtu(dev, skb, rt, tnl_params->frag_off)) { 741 ip_rt_put(rt); 742 goto tx_error; 743 } 744 745 if (tunnel->err_count > 0) { 746 if (time_before(jiffies, 747 tunnel->err_time + IPTUNNEL_ERR_TIMEO)) { 748 tunnel->err_count--; 749 750 memset(IPCB(skb), 0, sizeof(*IPCB(skb))); 751 dst_link_failure(skb); 752 } else 753 tunnel->err_count = 0; 754 } 755 756 tos = ip_tunnel_ecn_encap(tos, inner_iph, skb); 757 ttl = tnl_params->ttl; 758 if (ttl == 0) { 759 if (skb->protocol == htons(ETH_P_IP)) 760 ttl = inner_iph->ttl; 761 #if IS_ENABLED(CONFIG_IPV6) 762 else if (skb->protocol == htons(ETH_P_IPV6)) 763 ttl = ((const struct ipv6hdr *)inner_iph)->hop_limit; 764 #endif 765 else 766 ttl = ip4_dst_hoplimit(&rt->dst); 767 } 768 769 df = tnl_params->frag_off; 770 if (skb->protocol == htons(ETH_P_IP)) 771 df |= (inner_iph->frag_off&htons(IP_DF)); 772 773 max_headroom = LL_RESERVED_SPACE(rt->dst.dev) + sizeof(struct iphdr) 774 + rt->dst.header_len + ip_encap_hlen(&tunnel->encap); 775 if (max_headroom > dev->needed_headroom) 776 dev->needed_headroom = max_headroom; 777 778 if (skb_cow_head(skb, dev->needed_headroom)) { 779 ip_rt_put(rt); 780 dev->stats.tx_dropped++; 781 kfree_skb(skb); 782 return; 783 } 784 785 err = iptunnel_xmit(NULL, rt, skb, fl4.saddr, fl4.daddr, protocol, 786 tos, ttl, df, !net_eq(tunnel->net, dev_net(dev))); 787 iptunnel_xmit_stats(err, &dev->stats, dev->tstats); 788 789 return; 790 791 #if IS_ENABLED(CONFIG_IPV6) 792 tx_error_icmp: 793 dst_link_failure(skb); 794 #endif 795 tx_error: 796 dev->stats.tx_errors++; 797 kfree_skb(skb); 798 } 799 EXPORT_SYMBOL_GPL(ip_tunnel_xmit); 800 801 static void ip_tunnel_update(struct ip_tunnel_net *itn, 802 struct ip_tunnel *t, 803 struct net_device *dev, 804 struct ip_tunnel_parm *p, 805 bool set_mtu) 806 { 807 ip_tunnel_del(t); 808 t->parms.iph.saddr = p->iph.saddr; 809 t->parms.iph.daddr = p->iph.daddr; 810 t->parms.i_key = p->i_key; 811 t->parms.o_key = p->o_key; 812 if (dev->type != ARPHRD_ETHER) { 813 memcpy(dev->dev_addr, &p->iph.saddr, 4); 814 memcpy(dev->broadcast, &p->iph.daddr, 4); 815 } 816 ip_tunnel_add(itn, t); 817 818 t->parms.iph.ttl = p->iph.ttl; 819 t->parms.iph.tos = p->iph.tos; 820 t->parms.iph.frag_off = p->iph.frag_off; 821 822 if (t->parms.link != p->link) { 823 int mtu; 824 825 t->parms.link = p->link; 826 mtu = ip_tunnel_bind_dev(dev); 827 if (set_mtu) 828 dev->mtu = mtu; 829 } 830 ip_tunnel_dst_reset_all(t); 831 netdev_state_change(dev); 832 } 833 834 int ip_tunnel_ioctl(struct net_device *dev, struct ip_tunnel_parm *p, int cmd) 835 { 836 int err = 0; 837 struct ip_tunnel *t = netdev_priv(dev); 838 struct net *net = t->net; 839 struct ip_tunnel_net *itn = net_generic(net, t->ip_tnl_net_id); 840 841 BUG_ON(!itn->fb_tunnel_dev); 842 switch (cmd) { 843 case SIOCGETTUNNEL: 844 if (dev == itn->fb_tunnel_dev) { 845 t = ip_tunnel_find(itn, p, itn->fb_tunnel_dev->type); 846 if (!t) 847 t = netdev_priv(dev); 848 } 849 memcpy(p, &t->parms, sizeof(*p)); 850 break; 851 852 case SIOCADDTUNNEL: 853 case SIOCCHGTUNNEL: 854 err = -EPERM; 855 if (!ns_capable(net->user_ns, CAP_NET_ADMIN)) 856 goto done; 857 if (p->iph.ttl) 858 p->iph.frag_off |= htons(IP_DF); 859 if (!(p->i_flags & VTI_ISVTI)) { 860 if (!(p->i_flags & TUNNEL_KEY)) 861 p->i_key = 0; 862 if (!(p->o_flags & TUNNEL_KEY)) 863 p->o_key = 0; 864 } 865 866 t = ip_tunnel_find(itn, p, itn->fb_tunnel_dev->type); 867 868 if (cmd == SIOCADDTUNNEL) { 869 if (!t) { 870 t = ip_tunnel_create(net, itn, p); 871 err = PTR_ERR_OR_ZERO(t); 872 break; 873 } 874 875 err = -EEXIST; 876 break; 877 } 878 if (dev != itn->fb_tunnel_dev && cmd == SIOCCHGTUNNEL) { 879 if (t) { 880 if (t->dev != dev) { 881 err = -EEXIST; 882 break; 883 } 884 } else { 885 unsigned int nflags = 0; 886 887 if (ipv4_is_multicast(p->iph.daddr)) 888 nflags = IFF_BROADCAST; 889 else if (p->iph.daddr) 890 nflags = IFF_POINTOPOINT; 891 892 if ((dev->flags^nflags)&(IFF_POINTOPOINT|IFF_BROADCAST)) { 893 err = -EINVAL; 894 break; 895 } 896 897 t = netdev_priv(dev); 898 } 899 } 900 901 if (t) { 902 err = 0; 903 ip_tunnel_update(itn, t, dev, p, true); 904 } else { 905 err = -ENOENT; 906 } 907 break; 908 909 case SIOCDELTUNNEL: 910 err = -EPERM; 911 if (!ns_capable(net->user_ns, CAP_NET_ADMIN)) 912 goto done; 913 914 if (dev == itn->fb_tunnel_dev) { 915 err = -ENOENT; 916 t = ip_tunnel_find(itn, p, itn->fb_tunnel_dev->type); 917 if (!t) 918 goto done; 919 err = -EPERM; 920 if (t == netdev_priv(itn->fb_tunnel_dev)) 921 goto done; 922 dev = t->dev; 923 } 924 unregister_netdevice(dev); 925 err = 0; 926 break; 927 928 default: 929 err = -EINVAL; 930 } 931 932 done: 933 return err; 934 } 935 EXPORT_SYMBOL_GPL(ip_tunnel_ioctl); 936 937 int ip_tunnel_change_mtu(struct net_device *dev, int new_mtu) 938 { 939 struct ip_tunnel *tunnel = netdev_priv(dev); 940 int t_hlen = tunnel->hlen + sizeof(struct iphdr); 941 942 if (new_mtu < 68 || 943 new_mtu > 0xFFF8 - dev->hard_header_len - t_hlen) 944 return -EINVAL; 945 dev->mtu = new_mtu; 946 return 0; 947 } 948 EXPORT_SYMBOL_GPL(ip_tunnel_change_mtu); 949 950 static void ip_tunnel_dev_free(struct net_device *dev) 951 { 952 struct ip_tunnel *tunnel = netdev_priv(dev); 953 954 gro_cells_destroy(&tunnel->gro_cells); 955 free_percpu(tunnel->dst_cache); 956 free_percpu(dev->tstats); 957 free_netdev(dev); 958 } 959 960 void ip_tunnel_dellink(struct net_device *dev, struct list_head *head) 961 { 962 struct ip_tunnel *tunnel = netdev_priv(dev); 963 struct ip_tunnel_net *itn; 964 965 itn = net_generic(tunnel->net, tunnel->ip_tnl_net_id); 966 967 if (itn->fb_tunnel_dev != dev) { 968 ip_tunnel_del(netdev_priv(dev)); 969 unregister_netdevice_queue(dev, head); 970 } 971 } 972 EXPORT_SYMBOL_GPL(ip_tunnel_dellink); 973 974 struct net *ip_tunnel_get_link_net(const struct net_device *dev) 975 { 976 struct ip_tunnel *tunnel = netdev_priv(dev); 977 978 return tunnel->net; 979 } 980 EXPORT_SYMBOL(ip_tunnel_get_link_net); 981 982 int ip_tunnel_get_iflink(const struct net_device *dev) 983 { 984 struct ip_tunnel *tunnel = netdev_priv(dev); 985 986 return tunnel->parms.link; 987 } 988 EXPORT_SYMBOL(ip_tunnel_get_iflink); 989 990 int ip_tunnel_init_net(struct net *net, int ip_tnl_net_id, 991 struct rtnl_link_ops *ops, char *devname) 992 { 993 struct ip_tunnel_net *itn = net_generic(net, ip_tnl_net_id); 994 struct ip_tunnel_parm parms; 995 unsigned int i; 996 997 for (i = 0; i < IP_TNL_HASH_SIZE; i++) 998 INIT_HLIST_HEAD(&itn->tunnels[i]); 999 1000 if (!ops) { 1001 itn->fb_tunnel_dev = NULL; 1002 return 0; 1003 } 1004 1005 memset(&parms, 0, sizeof(parms)); 1006 if (devname) 1007 strlcpy(parms.name, devname, IFNAMSIZ); 1008 1009 rtnl_lock(); 1010 itn->fb_tunnel_dev = __ip_tunnel_create(net, ops, &parms); 1011 /* FB netdevice is special: we have one, and only one per netns. 1012 * Allowing to move it to another netns is clearly unsafe. 1013 */ 1014 if (!IS_ERR(itn->fb_tunnel_dev)) { 1015 itn->fb_tunnel_dev->features |= NETIF_F_NETNS_LOCAL; 1016 itn->fb_tunnel_dev->mtu = ip_tunnel_bind_dev(itn->fb_tunnel_dev); 1017 ip_tunnel_add(itn, netdev_priv(itn->fb_tunnel_dev)); 1018 } 1019 rtnl_unlock(); 1020 1021 return PTR_ERR_OR_ZERO(itn->fb_tunnel_dev); 1022 } 1023 EXPORT_SYMBOL_GPL(ip_tunnel_init_net); 1024 1025 static void ip_tunnel_destroy(struct ip_tunnel_net *itn, struct list_head *head, 1026 struct rtnl_link_ops *ops) 1027 { 1028 struct net *net = dev_net(itn->fb_tunnel_dev); 1029 struct net_device *dev, *aux; 1030 int h; 1031 1032 for_each_netdev_safe(net, dev, aux) 1033 if (dev->rtnl_link_ops == ops) 1034 unregister_netdevice_queue(dev, head); 1035 1036 for (h = 0; h < IP_TNL_HASH_SIZE; h++) { 1037 struct ip_tunnel *t; 1038 struct hlist_node *n; 1039 struct hlist_head *thead = &itn->tunnels[h]; 1040 1041 hlist_for_each_entry_safe(t, n, thead, hash_node) 1042 /* If dev is in the same netns, it has already 1043 * been added to the list by the previous loop. 1044 */ 1045 if (!net_eq(dev_net(t->dev), net)) 1046 unregister_netdevice_queue(t->dev, head); 1047 } 1048 } 1049 1050 void ip_tunnel_delete_net(struct ip_tunnel_net *itn, struct rtnl_link_ops *ops) 1051 { 1052 LIST_HEAD(list); 1053 1054 rtnl_lock(); 1055 ip_tunnel_destroy(itn, &list, ops); 1056 unregister_netdevice_many(&list); 1057 rtnl_unlock(); 1058 } 1059 EXPORT_SYMBOL_GPL(ip_tunnel_delete_net); 1060 1061 int ip_tunnel_newlink(struct net_device *dev, struct nlattr *tb[], 1062 struct ip_tunnel_parm *p) 1063 { 1064 struct ip_tunnel *nt; 1065 struct net *net = dev_net(dev); 1066 struct ip_tunnel_net *itn; 1067 int mtu; 1068 int err; 1069 1070 nt = netdev_priv(dev); 1071 itn = net_generic(net, nt->ip_tnl_net_id); 1072 1073 if (ip_tunnel_find(itn, p, dev->type)) 1074 return -EEXIST; 1075 1076 nt->net = net; 1077 nt->parms = *p; 1078 err = register_netdevice(dev); 1079 if (err) 1080 goto out; 1081 1082 if (dev->type == ARPHRD_ETHER && !tb[IFLA_ADDRESS]) 1083 eth_hw_addr_random(dev); 1084 1085 mtu = ip_tunnel_bind_dev(dev); 1086 if (!tb[IFLA_MTU]) 1087 dev->mtu = mtu; 1088 1089 ip_tunnel_add(itn, nt); 1090 1091 out: 1092 return err; 1093 } 1094 EXPORT_SYMBOL_GPL(ip_tunnel_newlink); 1095 1096 int ip_tunnel_changelink(struct net_device *dev, struct nlattr *tb[], 1097 struct ip_tunnel_parm *p) 1098 { 1099 struct ip_tunnel *t; 1100 struct ip_tunnel *tunnel = netdev_priv(dev); 1101 struct net *net = tunnel->net; 1102 struct ip_tunnel_net *itn = net_generic(net, tunnel->ip_tnl_net_id); 1103 1104 if (dev == itn->fb_tunnel_dev) 1105 return -EINVAL; 1106 1107 t = ip_tunnel_find(itn, p, dev->type); 1108 1109 if (t) { 1110 if (t->dev != dev) 1111 return -EEXIST; 1112 } else { 1113 t = tunnel; 1114 1115 if (dev->type != ARPHRD_ETHER) { 1116 unsigned int nflags = 0; 1117 1118 if (ipv4_is_multicast(p->iph.daddr)) 1119 nflags = IFF_BROADCAST; 1120 else if (p->iph.daddr) 1121 nflags = IFF_POINTOPOINT; 1122 1123 if ((dev->flags ^ nflags) & 1124 (IFF_POINTOPOINT | IFF_BROADCAST)) 1125 return -EINVAL; 1126 } 1127 } 1128 1129 ip_tunnel_update(itn, t, dev, p, !tb[IFLA_MTU]); 1130 return 0; 1131 } 1132 EXPORT_SYMBOL_GPL(ip_tunnel_changelink); 1133 1134 int ip_tunnel_init(struct net_device *dev) 1135 { 1136 struct ip_tunnel *tunnel = netdev_priv(dev); 1137 struct iphdr *iph = &tunnel->parms.iph; 1138 int err; 1139 1140 dev->destructor = ip_tunnel_dev_free; 1141 dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats); 1142 if (!dev->tstats) 1143 return -ENOMEM; 1144 1145 tunnel->dst_cache = alloc_percpu(struct ip_tunnel_dst); 1146 if (!tunnel->dst_cache) { 1147 free_percpu(dev->tstats); 1148 return -ENOMEM; 1149 } 1150 1151 err = gro_cells_init(&tunnel->gro_cells, dev); 1152 if (err) { 1153 free_percpu(tunnel->dst_cache); 1154 free_percpu(dev->tstats); 1155 return err; 1156 } 1157 1158 tunnel->dev = dev; 1159 tunnel->net = dev_net(dev); 1160 strcpy(tunnel->parms.name, dev->name); 1161 iph->version = 4; 1162 iph->ihl = 5; 1163 1164 return 0; 1165 } 1166 EXPORT_SYMBOL_GPL(ip_tunnel_init); 1167 1168 void ip_tunnel_uninit(struct net_device *dev) 1169 { 1170 struct ip_tunnel *tunnel = netdev_priv(dev); 1171 struct net *net = tunnel->net; 1172 struct ip_tunnel_net *itn; 1173 1174 itn = net_generic(net, tunnel->ip_tnl_net_id); 1175 /* fb_tunnel_dev will be unregisted in net-exit call. */ 1176 if (itn->fb_tunnel_dev != dev) 1177 ip_tunnel_del(netdev_priv(dev)); 1178 1179 ip_tunnel_dst_reset_all(tunnel); 1180 } 1181 EXPORT_SYMBOL_GPL(ip_tunnel_uninit); 1182 1183 /* Do least required initialization, rest of init is done in tunnel_init call */ 1184 void ip_tunnel_setup(struct net_device *dev, int net_id) 1185 { 1186 struct ip_tunnel *tunnel = netdev_priv(dev); 1187 tunnel->ip_tnl_net_id = net_id; 1188 } 1189 EXPORT_SYMBOL_GPL(ip_tunnel_setup); 1190 1191 MODULE_LICENSE("GPL"); 1192