1 /* 2 * Copyright (c) 2013 Nicira, Inc. 3 * 4 * This program is free software; you can redistribute it and/or 5 * modify it under the terms of version 2 of the GNU General Public 6 * License as published by the Free Software Foundation. 7 * 8 * This program is distributed in the hope that it will be useful, but 9 * WITHOUT ANY WARRANTY; without even the implied warranty of 10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 11 * General Public License for more details. 12 * 13 * You should have received a copy of the GNU General Public License 14 * along with this program; if not, write to the Free Software 15 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 16 * 02110-1301, USA 17 */ 18 19 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 20 21 #include <linux/capability.h> 22 #include <linux/module.h> 23 #include <linux/types.h> 24 #include <linux/kernel.h> 25 #include <linux/slab.h> 26 #include <linux/uaccess.h> 27 #include <linux/skbuff.h> 28 #include <linux/netdevice.h> 29 #include <linux/in.h> 30 #include <linux/tcp.h> 31 #include <linux/udp.h> 32 #include <linux/if_arp.h> 33 #include <linux/init.h> 34 #include <linux/in6.h> 35 #include <linux/inetdevice.h> 36 #include <linux/igmp.h> 37 #include <linux/netfilter_ipv4.h> 38 #include <linux/etherdevice.h> 39 #include <linux/if_ether.h> 40 #include <linux/if_vlan.h> 41 #include <linux/rculist.h> 42 #include <linux/err.h> 43 44 #include <net/sock.h> 45 #include <net/ip.h> 46 #include <net/icmp.h> 47 #include <net/protocol.h> 48 #include <net/ip_tunnels.h> 49 #include <net/arp.h> 50 #include <net/checksum.h> 51 #include <net/dsfield.h> 52 #include <net/inet_ecn.h> 53 #include <net/xfrm.h> 54 #include <net/net_namespace.h> 55 #include <net/netns/generic.h> 56 #include <net/rtnetlink.h> 57 #include <net/udp.h> 58 #include <net/dst_metadata.h> 59 60 #if IS_ENABLED(CONFIG_IPV6) 61 #include <net/ipv6.h> 62 #include <net/ip6_fib.h> 63 #include <net/ip6_route.h> 64 #endif 65 66 static unsigned int ip_tunnel_hash(__be32 key, __be32 remote) 67 { 68 return hash_32((__force u32)key ^ (__force u32)remote, 69 IP_TNL_HASH_BITS); 70 } 71 72 static bool ip_tunnel_key_match(const struct ip_tunnel_parm *p, 73 __be16 flags, __be32 key) 74 { 75 if (p->i_flags & TUNNEL_KEY) { 76 if (flags & TUNNEL_KEY) 77 return key == p->i_key; 78 else 79 /* key expected, none present */ 80 return false; 81 } else 82 return !(flags & TUNNEL_KEY); 83 } 84 85 /* Fallback tunnel: no source, no destination, no key, no options 86 87 Tunnel hash table: 88 We require exact key match i.e. if a key is present in packet 89 it will match only tunnel with the same key; if it is not present, 90 it will match only keyless tunnel. 91 92 All keysless packets, if not matched configured keyless tunnels 93 will match fallback tunnel. 94 Given src, dst and key, find appropriate for input tunnel. 95 */ 96 struct ip_tunnel *ip_tunnel_lookup(struct ip_tunnel_net *itn, 97 int link, __be16 flags, 98 __be32 remote, __be32 local, 99 __be32 key) 100 { 101 unsigned int hash; 102 struct ip_tunnel *t, *cand = NULL; 103 struct hlist_head *head; 104 105 hash = ip_tunnel_hash(key, remote); 106 head = &itn->tunnels[hash]; 107 108 hlist_for_each_entry_rcu(t, head, hash_node) { 109 if (local != t->parms.iph.saddr || 110 remote != t->parms.iph.daddr || 111 !(t->dev->flags & IFF_UP)) 112 continue; 113 114 if (!ip_tunnel_key_match(&t->parms, flags, key)) 115 continue; 116 117 if (t->parms.link == link) 118 return t; 119 else 120 cand = t; 121 } 122 123 hlist_for_each_entry_rcu(t, head, hash_node) { 124 if (remote != t->parms.iph.daddr || 125 t->parms.iph.saddr != 0 || 126 !(t->dev->flags & IFF_UP)) 127 continue; 128 129 if (!ip_tunnel_key_match(&t->parms, flags, key)) 130 continue; 131 132 if (t->parms.link == link) 133 return t; 134 else if (!cand) 135 cand = t; 136 } 137 138 hash = ip_tunnel_hash(key, 0); 139 head = &itn->tunnels[hash]; 140 141 hlist_for_each_entry_rcu(t, head, hash_node) { 142 if ((local != t->parms.iph.saddr || t->parms.iph.daddr != 0) && 143 (local != t->parms.iph.daddr || !ipv4_is_multicast(local))) 144 continue; 145 146 if (!(t->dev->flags & IFF_UP)) 147 continue; 148 149 if (!ip_tunnel_key_match(&t->parms, flags, key)) 150 continue; 151 152 if (t->parms.link == link) 153 return t; 154 else if (!cand) 155 cand = t; 156 } 157 158 if (flags & TUNNEL_NO_KEY) 159 goto skip_key_lookup; 160 161 hlist_for_each_entry_rcu(t, head, hash_node) { 162 if (t->parms.i_key != key || 163 t->parms.iph.saddr != 0 || 164 t->parms.iph.daddr != 0 || 165 !(t->dev->flags & IFF_UP)) 166 continue; 167 168 if (t->parms.link == link) 169 return t; 170 else if (!cand) 171 cand = t; 172 } 173 174 skip_key_lookup: 175 if (cand) 176 return cand; 177 178 t = rcu_dereference(itn->collect_md_tun); 179 if (t && t->dev->flags & IFF_UP) 180 return t; 181 182 if (itn->fb_tunnel_dev && itn->fb_tunnel_dev->flags & IFF_UP) 183 return netdev_priv(itn->fb_tunnel_dev); 184 185 return NULL; 186 } 187 EXPORT_SYMBOL_GPL(ip_tunnel_lookup); 188 189 static struct hlist_head *ip_bucket(struct ip_tunnel_net *itn, 190 struct ip_tunnel_parm *parms) 191 { 192 unsigned int h; 193 __be32 remote; 194 __be32 i_key = parms->i_key; 195 196 if (parms->iph.daddr && !ipv4_is_multicast(parms->iph.daddr)) 197 remote = parms->iph.daddr; 198 else 199 remote = 0; 200 201 if (!(parms->i_flags & TUNNEL_KEY) && (parms->i_flags & VTI_ISVTI)) 202 i_key = 0; 203 204 h = ip_tunnel_hash(i_key, remote); 205 return &itn->tunnels[h]; 206 } 207 208 static void ip_tunnel_add(struct ip_tunnel_net *itn, struct ip_tunnel *t) 209 { 210 struct hlist_head *head = ip_bucket(itn, &t->parms); 211 212 if (t->collect_md) 213 rcu_assign_pointer(itn->collect_md_tun, t); 214 hlist_add_head_rcu(&t->hash_node, head); 215 } 216 217 static void ip_tunnel_del(struct ip_tunnel_net *itn, struct ip_tunnel *t) 218 { 219 if (t->collect_md) 220 rcu_assign_pointer(itn->collect_md_tun, NULL); 221 hlist_del_init_rcu(&t->hash_node); 222 } 223 224 static struct ip_tunnel *ip_tunnel_find(struct ip_tunnel_net *itn, 225 struct ip_tunnel_parm *parms, 226 int type) 227 { 228 __be32 remote = parms->iph.daddr; 229 __be32 local = parms->iph.saddr; 230 __be32 key = parms->i_key; 231 __be16 flags = parms->i_flags; 232 int link = parms->link; 233 struct ip_tunnel *t = NULL; 234 struct hlist_head *head = ip_bucket(itn, parms); 235 236 hlist_for_each_entry_rcu(t, head, hash_node) { 237 if (local == t->parms.iph.saddr && 238 remote == t->parms.iph.daddr && 239 link == t->parms.link && 240 type == t->dev->type && 241 ip_tunnel_key_match(&t->parms, flags, key)) 242 break; 243 } 244 return t; 245 } 246 247 static struct net_device *__ip_tunnel_create(struct net *net, 248 const struct rtnl_link_ops *ops, 249 struct ip_tunnel_parm *parms) 250 { 251 int err; 252 struct ip_tunnel *tunnel; 253 struct net_device *dev; 254 char name[IFNAMSIZ]; 255 256 if (parms->name[0]) 257 strlcpy(name, parms->name, IFNAMSIZ); 258 else { 259 if (strlen(ops->kind) > (IFNAMSIZ - 3)) { 260 err = -E2BIG; 261 goto failed; 262 } 263 strlcpy(name, ops->kind, IFNAMSIZ); 264 strncat(name, "%d", 2); 265 } 266 267 ASSERT_RTNL(); 268 dev = alloc_netdev(ops->priv_size, name, NET_NAME_UNKNOWN, ops->setup); 269 if (!dev) { 270 err = -ENOMEM; 271 goto failed; 272 } 273 dev_net_set(dev, net); 274 275 dev->rtnl_link_ops = ops; 276 277 tunnel = netdev_priv(dev); 278 tunnel->parms = *parms; 279 tunnel->net = net; 280 281 err = register_netdevice(dev); 282 if (err) 283 goto failed_free; 284 285 return dev; 286 287 failed_free: 288 free_netdev(dev); 289 failed: 290 return ERR_PTR(err); 291 } 292 293 static int ip_tunnel_bind_dev(struct net_device *dev) 294 { 295 struct net_device *tdev = NULL; 296 struct ip_tunnel *tunnel = netdev_priv(dev); 297 const struct iphdr *iph; 298 int hlen = LL_MAX_HEADER; 299 int mtu = ETH_DATA_LEN; 300 int t_hlen = tunnel->hlen + sizeof(struct iphdr); 301 302 iph = &tunnel->parms.iph; 303 304 /* Guess output device to choose reasonable mtu and needed_headroom */ 305 if (iph->daddr) { 306 struct flowi4 fl4; 307 struct rtable *rt; 308 309 ip_tunnel_init_flow(&fl4, iph->protocol, iph->daddr, 310 iph->saddr, tunnel->parms.o_key, 311 RT_TOS(iph->tos), tunnel->parms.link, 312 tunnel->fwmark); 313 rt = ip_route_output_key(tunnel->net, &fl4); 314 315 if (!IS_ERR(rt)) { 316 tdev = rt->dst.dev; 317 ip_rt_put(rt); 318 } 319 if (dev->type != ARPHRD_ETHER) 320 dev->flags |= IFF_POINTOPOINT; 321 322 dst_cache_reset(&tunnel->dst_cache); 323 } 324 325 if (!tdev && tunnel->parms.link) 326 tdev = __dev_get_by_index(tunnel->net, tunnel->parms.link); 327 328 if (tdev) { 329 hlen = tdev->hard_header_len + tdev->needed_headroom; 330 mtu = tdev->mtu; 331 } 332 333 dev->needed_headroom = t_hlen + hlen; 334 mtu -= (dev->hard_header_len + t_hlen); 335 336 if (mtu < IPV4_MIN_MTU) 337 mtu = IPV4_MIN_MTU; 338 339 return mtu; 340 } 341 342 static struct ip_tunnel *ip_tunnel_create(struct net *net, 343 struct ip_tunnel_net *itn, 344 struct ip_tunnel_parm *parms) 345 { 346 struct ip_tunnel *nt; 347 struct net_device *dev; 348 int t_hlen; 349 int mtu; 350 int err; 351 352 dev = __ip_tunnel_create(net, itn->rtnl_link_ops, parms); 353 if (IS_ERR(dev)) 354 return ERR_CAST(dev); 355 356 mtu = ip_tunnel_bind_dev(dev); 357 err = dev_set_mtu(dev, mtu); 358 if (err) 359 goto err_dev_set_mtu; 360 361 nt = netdev_priv(dev); 362 t_hlen = nt->hlen + sizeof(struct iphdr); 363 dev->min_mtu = ETH_MIN_MTU; 364 dev->max_mtu = 0xFFF8 - dev->hard_header_len - t_hlen; 365 ip_tunnel_add(itn, nt); 366 return nt; 367 368 err_dev_set_mtu: 369 unregister_netdevice(dev); 370 return ERR_PTR(err); 371 } 372 373 int ip_tunnel_rcv(struct ip_tunnel *tunnel, struct sk_buff *skb, 374 const struct tnl_ptk_info *tpi, struct metadata_dst *tun_dst, 375 bool log_ecn_error) 376 { 377 struct pcpu_sw_netstats *tstats; 378 const struct iphdr *iph = ip_hdr(skb); 379 int err; 380 381 #ifdef CONFIG_NET_IPGRE_BROADCAST 382 if (ipv4_is_multicast(iph->daddr)) { 383 tunnel->dev->stats.multicast++; 384 skb->pkt_type = PACKET_BROADCAST; 385 } 386 #endif 387 388 if ((!(tpi->flags&TUNNEL_CSUM) && (tunnel->parms.i_flags&TUNNEL_CSUM)) || 389 ((tpi->flags&TUNNEL_CSUM) && !(tunnel->parms.i_flags&TUNNEL_CSUM))) { 390 tunnel->dev->stats.rx_crc_errors++; 391 tunnel->dev->stats.rx_errors++; 392 goto drop; 393 } 394 395 if (tunnel->parms.i_flags&TUNNEL_SEQ) { 396 if (!(tpi->flags&TUNNEL_SEQ) || 397 (tunnel->i_seqno && (s32)(ntohl(tpi->seq) - tunnel->i_seqno) < 0)) { 398 tunnel->dev->stats.rx_fifo_errors++; 399 tunnel->dev->stats.rx_errors++; 400 goto drop; 401 } 402 tunnel->i_seqno = ntohl(tpi->seq) + 1; 403 } 404 405 skb_reset_network_header(skb); 406 407 err = IP_ECN_decapsulate(iph, skb); 408 if (unlikely(err)) { 409 if (log_ecn_error) 410 net_info_ratelimited("non-ECT from %pI4 with TOS=%#x\n", 411 &iph->saddr, iph->tos); 412 if (err > 1) { 413 ++tunnel->dev->stats.rx_frame_errors; 414 ++tunnel->dev->stats.rx_errors; 415 goto drop; 416 } 417 } 418 419 tstats = this_cpu_ptr(tunnel->dev->tstats); 420 u64_stats_update_begin(&tstats->syncp); 421 tstats->rx_packets++; 422 tstats->rx_bytes += skb->len; 423 u64_stats_update_end(&tstats->syncp); 424 425 skb_scrub_packet(skb, !net_eq(tunnel->net, dev_net(tunnel->dev))); 426 427 if (tunnel->dev->type == ARPHRD_ETHER) { 428 skb->protocol = eth_type_trans(skb, tunnel->dev); 429 skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN); 430 } else { 431 skb->dev = tunnel->dev; 432 } 433 434 if (tun_dst) 435 skb_dst_set(skb, (struct dst_entry *)tun_dst); 436 437 gro_cells_receive(&tunnel->gro_cells, skb); 438 return 0; 439 440 drop: 441 if (tun_dst) 442 dst_release((struct dst_entry *)tun_dst); 443 kfree_skb(skb); 444 return 0; 445 } 446 EXPORT_SYMBOL_GPL(ip_tunnel_rcv); 447 448 int ip_tunnel_encap_add_ops(const struct ip_tunnel_encap_ops *ops, 449 unsigned int num) 450 { 451 if (num >= MAX_IPTUN_ENCAP_OPS) 452 return -ERANGE; 453 454 return !cmpxchg((const struct ip_tunnel_encap_ops **) 455 &iptun_encaps[num], 456 NULL, ops) ? 0 : -1; 457 } 458 EXPORT_SYMBOL(ip_tunnel_encap_add_ops); 459 460 int ip_tunnel_encap_del_ops(const struct ip_tunnel_encap_ops *ops, 461 unsigned int num) 462 { 463 int ret; 464 465 if (num >= MAX_IPTUN_ENCAP_OPS) 466 return -ERANGE; 467 468 ret = (cmpxchg((const struct ip_tunnel_encap_ops **) 469 &iptun_encaps[num], 470 ops, NULL) == ops) ? 0 : -1; 471 472 synchronize_net(); 473 474 return ret; 475 } 476 EXPORT_SYMBOL(ip_tunnel_encap_del_ops); 477 478 int ip_tunnel_encap_setup(struct ip_tunnel *t, 479 struct ip_tunnel_encap *ipencap) 480 { 481 int hlen; 482 483 memset(&t->encap, 0, sizeof(t->encap)); 484 485 hlen = ip_encap_hlen(ipencap); 486 if (hlen < 0) 487 return hlen; 488 489 t->encap.type = ipencap->type; 490 t->encap.sport = ipencap->sport; 491 t->encap.dport = ipencap->dport; 492 t->encap.flags = ipencap->flags; 493 494 t->encap_hlen = hlen; 495 t->hlen = t->encap_hlen + t->tun_hlen; 496 497 return 0; 498 } 499 EXPORT_SYMBOL_GPL(ip_tunnel_encap_setup); 500 501 static int tnl_update_pmtu(struct net_device *dev, struct sk_buff *skb, 502 struct rtable *rt, __be16 df, 503 const struct iphdr *inner_iph) 504 { 505 struct ip_tunnel *tunnel = netdev_priv(dev); 506 int pkt_size = skb->len - tunnel->hlen - dev->hard_header_len; 507 int mtu; 508 509 if (df) 510 mtu = dst_mtu(&rt->dst) - dev->hard_header_len 511 - sizeof(struct iphdr) - tunnel->hlen; 512 else 513 mtu = skb_dst(skb) ? dst_mtu(skb_dst(skb)) : dev->mtu; 514 515 skb_dst_update_pmtu(skb, mtu); 516 517 if (skb->protocol == htons(ETH_P_IP)) { 518 if (!skb_is_gso(skb) && 519 (inner_iph->frag_off & htons(IP_DF)) && 520 mtu < pkt_size) { 521 memset(IPCB(skb), 0, sizeof(*IPCB(skb))); 522 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu)); 523 return -E2BIG; 524 } 525 } 526 #if IS_ENABLED(CONFIG_IPV6) 527 else if (skb->protocol == htons(ETH_P_IPV6)) { 528 struct rt6_info *rt6 = (struct rt6_info *)skb_dst(skb); 529 530 if (rt6 && mtu < dst_mtu(skb_dst(skb)) && 531 mtu >= IPV6_MIN_MTU) { 532 if ((tunnel->parms.iph.daddr && 533 !ipv4_is_multicast(tunnel->parms.iph.daddr)) || 534 rt6->rt6i_dst.plen == 128) { 535 rt6->rt6i_flags |= RTF_MODIFIED; 536 dst_metric_set(skb_dst(skb), RTAX_MTU, mtu); 537 } 538 } 539 540 if (!skb_is_gso(skb) && mtu >= IPV6_MIN_MTU && 541 mtu < pkt_size) { 542 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); 543 return -E2BIG; 544 } 545 } 546 #endif 547 return 0; 548 } 549 550 void ip_md_tunnel_xmit(struct sk_buff *skb, struct net_device *dev, u8 proto) 551 { 552 struct ip_tunnel *tunnel = netdev_priv(dev); 553 u32 headroom = sizeof(struct iphdr); 554 struct ip_tunnel_info *tun_info; 555 const struct ip_tunnel_key *key; 556 const struct iphdr *inner_iph; 557 struct rtable *rt; 558 struct flowi4 fl4; 559 __be16 df = 0; 560 u8 tos, ttl; 561 562 tun_info = skb_tunnel_info(skb); 563 if (unlikely(!tun_info || !(tun_info->mode & IP_TUNNEL_INFO_TX) || 564 ip_tunnel_info_af(tun_info) != AF_INET)) 565 goto tx_error; 566 key = &tun_info->key; 567 memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt)); 568 inner_iph = (const struct iphdr *)skb_inner_network_header(skb); 569 tos = key->tos; 570 if (tos == 1) { 571 if (skb->protocol == htons(ETH_P_IP)) 572 tos = inner_iph->tos; 573 else if (skb->protocol == htons(ETH_P_IPV6)) 574 tos = ipv6_get_dsfield((const struct ipv6hdr *)inner_iph); 575 } 576 ip_tunnel_init_flow(&fl4, proto, key->u.ipv4.dst, key->u.ipv4.src, 0, 577 RT_TOS(tos), tunnel->parms.link, tunnel->fwmark); 578 if (tunnel->encap.type != TUNNEL_ENCAP_NONE) 579 goto tx_error; 580 rt = ip_route_output_key(tunnel->net, &fl4); 581 if (IS_ERR(rt)) { 582 dev->stats.tx_carrier_errors++; 583 goto tx_error; 584 } 585 if (rt->dst.dev == dev) { 586 ip_rt_put(rt); 587 dev->stats.collisions++; 588 goto tx_error; 589 } 590 tos = ip_tunnel_ecn_encap(tos, inner_iph, skb); 591 ttl = key->ttl; 592 if (ttl == 0) { 593 if (skb->protocol == htons(ETH_P_IP)) 594 ttl = inner_iph->ttl; 595 else if (skb->protocol == htons(ETH_P_IPV6)) 596 ttl = ((const struct ipv6hdr *)inner_iph)->hop_limit; 597 else 598 ttl = ip4_dst_hoplimit(&rt->dst); 599 } 600 if (key->tun_flags & TUNNEL_DONT_FRAGMENT) 601 df = htons(IP_DF); 602 else if (skb->protocol == htons(ETH_P_IP)) 603 df = inner_iph->frag_off & htons(IP_DF); 604 headroom += LL_RESERVED_SPACE(rt->dst.dev) + rt->dst.header_len; 605 if (headroom > dev->needed_headroom) 606 dev->needed_headroom = headroom; 607 608 if (skb_cow_head(skb, dev->needed_headroom)) { 609 ip_rt_put(rt); 610 goto tx_dropped; 611 } 612 iptunnel_xmit(NULL, rt, skb, fl4.saddr, fl4.daddr, proto, tos, ttl, 613 df, !net_eq(tunnel->net, dev_net(dev))); 614 return; 615 tx_error: 616 dev->stats.tx_errors++; 617 goto kfree; 618 tx_dropped: 619 dev->stats.tx_dropped++; 620 kfree: 621 kfree_skb(skb); 622 } 623 EXPORT_SYMBOL_GPL(ip_md_tunnel_xmit); 624 625 void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev, 626 const struct iphdr *tnl_params, u8 protocol) 627 { 628 struct ip_tunnel *tunnel = netdev_priv(dev); 629 const struct iphdr *inner_iph; 630 struct flowi4 fl4; 631 u8 tos, ttl; 632 __be16 df; 633 struct rtable *rt; /* Route to the other host */ 634 unsigned int max_headroom; /* The extra header space needed */ 635 __be32 dst; 636 bool connected; 637 638 inner_iph = (const struct iphdr *)skb_inner_network_header(skb); 639 connected = (tunnel->parms.iph.daddr != 0); 640 641 memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt)); 642 643 dst = tnl_params->daddr; 644 if (dst == 0) { 645 /* NBMA tunnel */ 646 647 if (!skb_dst(skb)) { 648 dev->stats.tx_fifo_errors++; 649 goto tx_error; 650 } 651 652 if (skb->protocol == htons(ETH_P_IP)) { 653 rt = skb_rtable(skb); 654 dst = rt_nexthop(rt, inner_iph->daddr); 655 } 656 #if IS_ENABLED(CONFIG_IPV6) 657 else if (skb->protocol == htons(ETH_P_IPV6)) { 658 const struct in6_addr *addr6; 659 struct neighbour *neigh; 660 bool do_tx_error_icmp; 661 int addr_type; 662 663 neigh = dst_neigh_lookup(skb_dst(skb), 664 &ipv6_hdr(skb)->daddr); 665 if (!neigh) 666 goto tx_error; 667 668 addr6 = (const struct in6_addr *)&neigh->primary_key; 669 addr_type = ipv6_addr_type(addr6); 670 671 if (addr_type == IPV6_ADDR_ANY) { 672 addr6 = &ipv6_hdr(skb)->daddr; 673 addr_type = ipv6_addr_type(addr6); 674 } 675 676 if ((addr_type & IPV6_ADDR_COMPATv4) == 0) 677 do_tx_error_icmp = true; 678 else { 679 do_tx_error_icmp = false; 680 dst = addr6->s6_addr32[3]; 681 } 682 neigh_release(neigh); 683 if (do_tx_error_icmp) 684 goto tx_error_icmp; 685 } 686 #endif 687 else 688 goto tx_error; 689 690 connected = false; 691 } 692 693 tos = tnl_params->tos; 694 if (tos & 0x1) { 695 tos &= ~0x1; 696 if (skb->protocol == htons(ETH_P_IP)) { 697 tos = inner_iph->tos; 698 connected = false; 699 } else if (skb->protocol == htons(ETH_P_IPV6)) { 700 tos = ipv6_get_dsfield((const struct ipv6hdr *)inner_iph); 701 connected = false; 702 } 703 } 704 705 ip_tunnel_init_flow(&fl4, protocol, dst, tnl_params->saddr, 706 tunnel->parms.o_key, RT_TOS(tos), tunnel->parms.link, 707 tunnel->fwmark); 708 709 if (ip_tunnel_encap(skb, tunnel, &protocol, &fl4) < 0) 710 goto tx_error; 711 712 rt = connected ? dst_cache_get_ip4(&tunnel->dst_cache, &fl4.saddr) : 713 NULL; 714 715 if (!rt) { 716 rt = ip_route_output_key(tunnel->net, &fl4); 717 718 if (IS_ERR(rt)) { 719 dev->stats.tx_carrier_errors++; 720 goto tx_error; 721 } 722 if (connected) 723 dst_cache_set_ip4(&tunnel->dst_cache, &rt->dst, 724 fl4.saddr); 725 } 726 727 if (rt->dst.dev == dev) { 728 ip_rt_put(rt); 729 dev->stats.collisions++; 730 goto tx_error; 731 } 732 733 if (tnl_update_pmtu(dev, skb, rt, tnl_params->frag_off, inner_iph)) { 734 ip_rt_put(rt); 735 goto tx_error; 736 } 737 738 if (tunnel->err_count > 0) { 739 if (time_before(jiffies, 740 tunnel->err_time + IPTUNNEL_ERR_TIMEO)) { 741 tunnel->err_count--; 742 743 dst_link_failure(skb); 744 } else 745 tunnel->err_count = 0; 746 } 747 748 tos = ip_tunnel_ecn_encap(tos, inner_iph, skb); 749 ttl = tnl_params->ttl; 750 if (ttl == 0) { 751 if (skb->protocol == htons(ETH_P_IP)) 752 ttl = inner_iph->ttl; 753 #if IS_ENABLED(CONFIG_IPV6) 754 else if (skb->protocol == htons(ETH_P_IPV6)) 755 ttl = ((const struct ipv6hdr *)inner_iph)->hop_limit; 756 #endif 757 else 758 ttl = ip4_dst_hoplimit(&rt->dst); 759 } 760 761 df = tnl_params->frag_off; 762 if (skb->protocol == htons(ETH_P_IP) && !tunnel->ignore_df) 763 df |= (inner_iph->frag_off&htons(IP_DF)); 764 765 max_headroom = LL_RESERVED_SPACE(rt->dst.dev) + sizeof(struct iphdr) 766 + rt->dst.header_len + ip_encap_hlen(&tunnel->encap); 767 if (max_headroom > dev->needed_headroom) 768 dev->needed_headroom = max_headroom; 769 770 if (skb_cow_head(skb, dev->needed_headroom)) { 771 ip_rt_put(rt); 772 dev->stats.tx_dropped++; 773 kfree_skb(skb); 774 return; 775 } 776 777 iptunnel_xmit(NULL, rt, skb, fl4.saddr, fl4.daddr, protocol, tos, ttl, 778 df, !net_eq(tunnel->net, dev_net(dev))); 779 return; 780 781 #if IS_ENABLED(CONFIG_IPV6) 782 tx_error_icmp: 783 dst_link_failure(skb); 784 #endif 785 tx_error: 786 dev->stats.tx_errors++; 787 kfree_skb(skb); 788 } 789 EXPORT_SYMBOL_GPL(ip_tunnel_xmit); 790 791 static void ip_tunnel_update(struct ip_tunnel_net *itn, 792 struct ip_tunnel *t, 793 struct net_device *dev, 794 struct ip_tunnel_parm *p, 795 bool set_mtu, 796 __u32 fwmark) 797 { 798 ip_tunnel_del(itn, t); 799 t->parms.iph.saddr = p->iph.saddr; 800 t->parms.iph.daddr = p->iph.daddr; 801 t->parms.i_key = p->i_key; 802 t->parms.o_key = p->o_key; 803 if (dev->type != ARPHRD_ETHER) { 804 memcpy(dev->dev_addr, &p->iph.saddr, 4); 805 memcpy(dev->broadcast, &p->iph.daddr, 4); 806 } 807 ip_tunnel_add(itn, t); 808 809 t->parms.iph.ttl = p->iph.ttl; 810 t->parms.iph.tos = p->iph.tos; 811 t->parms.iph.frag_off = p->iph.frag_off; 812 813 if (t->parms.link != p->link || t->fwmark != fwmark) { 814 int mtu; 815 816 t->parms.link = p->link; 817 t->fwmark = fwmark; 818 mtu = ip_tunnel_bind_dev(dev); 819 if (set_mtu) 820 dev->mtu = mtu; 821 } 822 dst_cache_reset(&t->dst_cache); 823 netdev_state_change(dev); 824 } 825 826 int ip_tunnel_ioctl(struct net_device *dev, struct ip_tunnel_parm *p, int cmd) 827 { 828 int err = 0; 829 struct ip_tunnel *t = netdev_priv(dev); 830 struct net *net = t->net; 831 struct ip_tunnel_net *itn = net_generic(net, t->ip_tnl_net_id); 832 833 switch (cmd) { 834 case SIOCGETTUNNEL: 835 if (dev == itn->fb_tunnel_dev) { 836 t = ip_tunnel_find(itn, p, itn->fb_tunnel_dev->type); 837 if (!t) 838 t = netdev_priv(dev); 839 } 840 memcpy(p, &t->parms, sizeof(*p)); 841 break; 842 843 case SIOCADDTUNNEL: 844 case SIOCCHGTUNNEL: 845 err = -EPERM; 846 if (!ns_capable(net->user_ns, CAP_NET_ADMIN)) 847 goto done; 848 if (p->iph.ttl) 849 p->iph.frag_off |= htons(IP_DF); 850 if (!(p->i_flags & VTI_ISVTI)) { 851 if (!(p->i_flags & TUNNEL_KEY)) 852 p->i_key = 0; 853 if (!(p->o_flags & TUNNEL_KEY)) 854 p->o_key = 0; 855 } 856 857 t = ip_tunnel_find(itn, p, itn->type); 858 859 if (cmd == SIOCADDTUNNEL) { 860 if (!t) { 861 t = ip_tunnel_create(net, itn, p); 862 err = PTR_ERR_OR_ZERO(t); 863 break; 864 } 865 866 err = -EEXIST; 867 break; 868 } 869 if (dev != itn->fb_tunnel_dev && cmd == SIOCCHGTUNNEL) { 870 if (t) { 871 if (t->dev != dev) { 872 err = -EEXIST; 873 break; 874 } 875 } else { 876 unsigned int nflags = 0; 877 878 if (ipv4_is_multicast(p->iph.daddr)) 879 nflags = IFF_BROADCAST; 880 else if (p->iph.daddr) 881 nflags = IFF_POINTOPOINT; 882 883 if ((dev->flags^nflags)&(IFF_POINTOPOINT|IFF_BROADCAST)) { 884 err = -EINVAL; 885 break; 886 } 887 888 t = netdev_priv(dev); 889 } 890 } 891 892 if (t) { 893 err = 0; 894 ip_tunnel_update(itn, t, dev, p, true, 0); 895 } else { 896 err = -ENOENT; 897 } 898 break; 899 900 case SIOCDELTUNNEL: 901 err = -EPERM; 902 if (!ns_capable(net->user_ns, CAP_NET_ADMIN)) 903 goto done; 904 905 if (dev == itn->fb_tunnel_dev) { 906 err = -ENOENT; 907 t = ip_tunnel_find(itn, p, itn->fb_tunnel_dev->type); 908 if (!t) 909 goto done; 910 err = -EPERM; 911 if (t == netdev_priv(itn->fb_tunnel_dev)) 912 goto done; 913 dev = t->dev; 914 } 915 unregister_netdevice(dev); 916 err = 0; 917 break; 918 919 default: 920 err = -EINVAL; 921 } 922 923 done: 924 return err; 925 } 926 EXPORT_SYMBOL_GPL(ip_tunnel_ioctl); 927 928 int __ip_tunnel_change_mtu(struct net_device *dev, int new_mtu, bool strict) 929 { 930 struct ip_tunnel *tunnel = netdev_priv(dev); 931 int t_hlen = tunnel->hlen + sizeof(struct iphdr); 932 int max_mtu = 0xFFF8 - dev->hard_header_len - t_hlen; 933 934 if (new_mtu < ETH_MIN_MTU) 935 return -EINVAL; 936 937 if (new_mtu > max_mtu) { 938 if (strict) 939 return -EINVAL; 940 941 new_mtu = max_mtu; 942 } 943 944 dev->mtu = new_mtu; 945 return 0; 946 } 947 EXPORT_SYMBOL_GPL(__ip_tunnel_change_mtu); 948 949 int ip_tunnel_change_mtu(struct net_device *dev, int new_mtu) 950 { 951 return __ip_tunnel_change_mtu(dev, new_mtu, true); 952 } 953 EXPORT_SYMBOL_GPL(ip_tunnel_change_mtu); 954 955 static void ip_tunnel_dev_free(struct net_device *dev) 956 { 957 struct ip_tunnel *tunnel = netdev_priv(dev); 958 959 gro_cells_destroy(&tunnel->gro_cells); 960 dst_cache_destroy(&tunnel->dst_cache); 961 free_percpu(dev->tstats); 962 } 963 964 void ip_tunnel_dellink(struct net_device *dev, struct list_head *head) 965 { 966 struct ip_tunnel *tunnel = netdev_priv(dev); 967 struct ip_tunnel_net *itn; 968 969 itn = net_generic(tunnel->net, tunnel->ip_tnl_net_id); 970 971 if (itn->fb_tunnel_dev != dev) { 972 ip_tunnel_del(itn, netdev_priv(dev)); 973 unregister_netdevice_queue(dev, head); 974 } 975 } 976 EXPORT_SYMBOL_GPL(ip_tunnel_dellink); 977 978 struct net *ip_tunnel_get_link_net(const struct net_device *dev) 979 { 980 struct ip_tunnel *tunnel = netdev_priv(dev); 981 982 return tunnel->net; 983 } 984 EXPORT_SYMBOL(ip_tunnel_get_link_net); 985 986 int ip_tunnel_get_iflink(const struct net_device *dev) 987 { 988 struct ip_tunnel *tunnel = netdev_priv(dev); 989 990 return tunnel->parms.link; 991 } 992 EXPORT_SYMBOL(ip_tunnel_get_iflink); 993 994 int ip_tunnel_init_net(struct net *net, unsigned int ip_tnl_net_id, 995 struct rtnl_link_ops *ops, char *devname) 996 { 997 struct ip_tunnel_net *itn = net_generic(net, ip_tnl_net_id); 998 struct ip_tunnel_parm parms; 999 unsigned int i; 1000 1001 itn->rtnl_link_ops = ops; 1002 for (i = 0; i < IP_TNL_HASH_SIZE; i++) 1003 INIT_HLIST_HEAD(&itn->tunnels[i]); 1004 1005 if (!ops || !net_has_fallback_tunnels(net)) { 1006 struct ip_tunnel_net *it_init_net; 1007 1008 it_init_net = net_generic(&init_net, ip_tnl_net_id); 1009 itn->type = it_init_net->type; 1010 itn->fb_tunnel_dev = NULL; 1011 return 0; 1012 } 1013 1014 memset(&parms, 0, sizeof(parms)); 1015 if (devname) 1016 strlcpy(parms.name, devname, IFNAMSIZ); 1017 1018 rtnl_lock(); 1019 itn->fb_tunnel_dev = __ip_tunnel_create(net, ops, &parms); 1020 /* FB netdevice is special: we have one, and only one per netns. 1021 * Allowing to move it to another netns is clearly unsafe. 1022 */ 1023 if (!IS_ERR(itn->fb_tunnel_dev)) { 1024 itn->fb_tunnel_dev->features |= NETIF_F_NETNS_LOCAL; 1025 itn->fb_tunnel_dev->mtu = ip_tunnel_bind_dev(itn->fb_tunnel_dev); 1026 ip_tunnel_add(itn, netdev_priv(itn->fb_tunnel_dev)); 1027 itn->type = itn->fb_tunnel_dev->type; 1028 } 1029 rtnl_unlock(); 1030 1031 return PTR_ERR_OR_ZERO(itn->fb_tunnel_dev); 1032 } 1033 EXPORT_SYMBOL_GPL(ip_tunnel_init_net); 1034 1035 static void ip_tunnel_destroy(struct net *net, struct ip_tunnel_net *itn, 1036 struct list_head *head, 1037 struct rtnl_link_ops *ops) 1038 { 1039 struct net_device *dev, *aux; 1040 int h; 1041 1042 for_each_netdev_safe(net, dev, aux) 1043 if (dev->rtnl_link_ops == ops) 1044 unregister_netdevice_queue(dev, head); 1045 1046 for (h = 0; h < IP_TNL_HASH_SIZE; h++) { 1047 struct ip_tunnel *t; 1048 struct hlist_node *n; 1049 struct hlist_head *thead = &itn->tunnels[h]; 1050 1051 hlist_for_each_entry_safe(t, n, thead, hash_node) 1052 /* If dev is in the same netns, it has already 1053 * been added to the list by the previous loop. 1054 */ 1055 if (!net_eq(dev_net(t->dev), net)) 1056 unregister_netdevice_queue(t->dev, head); 1057 } 1058 } 1059 1060 void ip_tunnel_delete_nets(struct list_head *net_list, unsigned int id, 1061 struct rtnl_link_ops *ops) 1062 { 1063 struct ip_tunnel_net *itn; 1064 struct net *net; 1065 LIST_HEAD(list); 1066 1067 rtnl_lock(); 1068 list_for_each_entry(net, net_list, exit_list) { 1069 itn = net_generic(net, id); 1070 ip_tunnel_destroy(net, itn, &list, ops); 1071 } 1072 unregister_netdevice_many(&list); 1073 rtnl_unlock(); 1074 } 1075 EXPORT_SYMBOL_GPL(ip_tunnel_delete_nets); 1076 1077 int ip_tunnel_newlink(struct net_device *dev, struct nlattr *tb[], 1078 struct ip_tunnel_parm *p, __u32 fwmark) 1079 { 1080 struct ip_tunnel *nt; 1081 struct net *net = dev_net(dev); 1082 struct ip_tunnel_net *itn; 1083 int mtu; 1084 int err; 1085 1086 nt = netdev_priv(dev); 1087 itn = net_generic(net, nt->ip_tnl_net_id); 1088 1089 if (nt->collect_md) { 1090 if (rtnl_dereference(itn->collect_md_tun)) 1091 return -EEXIST; 1092 } else { 1093 if (ip_tunnel_find(itn, p, dev->type)) 1094 return -EEXIST; 1095 } 1096 1097 nt->net = net; 1098 nt->parms = *p; 1099 nt->fwmark = fwmark; 1100 err = register_netdevice(dev); 1101 if (err) 1102 goto err_register_netdevice; 1103 1104 if (dev->type == ARPHRD_ETHER && !tb[IFLA_ADDRESS]) 1105 eth_hw_addr_random(dev); 1106 1107 mtu = ip_tunnel_bind_dev(dev); 1108 if (tb[IFLA_MTU]) { 1109 unsigned int max = 0xfff8 - dev->hard_header_len - nt->hlen; 1110 1111 mtu = clamp(dev->mtu, (unsigned int)ETH_MIN_MTU, 1112 (unsigned int)(max - sizeof(struct iphdr))); 1113 } 1114 1115 err = dev_set_mtu(dev, mtu); 1116 if (err) 1117 goto err_dev_set_mtu; 1118 1119 ip_tunnel_add(itn, nt); 1120 return 0; 1121 1122 err_dev_set_mtu: 1123 unregister_netdevice(dev); 1124 err_register_netdevice: 1125 return err; 1126 } 1127 EXPORT_SYMBOL_GPL(ip_tunnel_newlink); 1128 1129 int ip_tunnel_changelink(struct net_device *dev, struct nlattr *tb[], 1130 struct ip_tunnel_parm *p, __u32 fwmark) 1131 { 1132 struct ip_tunnel *t; 1133 struct ip_tunnel *tunnel = netdev_priv(dev); 1134 struct net *net = tunnel->net; 1135 struct ip_tunnel_net *itn = net_generic(net, tunnel->ip_tnl_net_id); 1136 1137 if (dev == itn->fb_tunnel_dev) 1138 return -EINVAL; 1139 1140 t = ip_tunnel_find(itn, p, dev->type); 1141 1142 if (t) { 1143 if (t->dev != dev) 1144 return -EEXIST; 1145 } else { 1146 t = tunnel; 1147 1148 if (dev->type != ARPHRD_ETHER) { 1149 unsigned int nflags = 0; 1150 1151 if (ipv4_is_multicast(p->iph.daddr)) 1152 nflags = IFF_BROADCAST; 1153 else if (p->iph.daddr) 1154 nflags = IFF_POINTOPOINT; 1155 1156 if ((dev->flags ^ nflags) & 1157 (IFF_POINTOPOINT | IFF_BROADCAST)) 1158 return -EINVAL; 1159 } 1160 } 1161 1162 ip_tunnel_update(itn, t, dev, p, !tb[IFLA_MTU], fwmark); 1163 return 0; 1164 } 1165 EXPORT_SYMBOL_GPL(ip_tunnel_changelink); 1166 1167 int ip_tunnel_init(struct net_device *dev) 1168 { 1169 struct ip_tunnel *tunnel = netdev_priv(dev); 1170 struct iphdr *iph = &tunnel->parms.iph; 1171 int err; 1172 1173 dev->needs_free_netdev = true; 1174 dev->priv_destructor = ip_tunnel_dev_free; 1175 dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats); 1176 if (!dev->tstats) 1177 return -ENOMEM; 1178 1179 err = dst_cache_init(&tunnel->dst_cache, GFP_KERNEL); 1180 if (err) { 1181 free_percpu(dev->tstats); 1182 return err; 1183 } 1184 1185 err = gro_cells_init(&tunnel->gro_cells, dev); 1186 if (err) { 1187 dst_cache_destroy(&tunnel->dst_cache); 1188 free_percpu(dev->tstats); 1189 return err; 1190 } 1191 1192 tunnel->dev = dev; 1193 tunnel->net = dev_net(dev); 1194 strcpy(tunnel->parms.name, dev->name); 1195 iph->version = 4; 1196 iph->ihl = 5; 1197 1198 if (tunnel->collect_md) { 1199 dev->features |= NETIF_F_NETNS_LOCAL; 1200 netif_keep_dst(dev); 1201 } 1202 return 0; 1203 } 1204 EXPORT_SYMBOL_GPL(ip_tunnel_init); 1205 1206 void ip_tunnel_uninit(struct net_device *dev) 1207 { 1208 struct ip_tunnel *tunnel = netdev_priv(dev); 1209 struct net *net = tunnel->net; 1210 struct ip_tunnel_net *itn; 1211 1212 itn = net_generic(net, tunnel->ip_tnl_net_id); 1213 /* fb_tunnel_dev will be unregisted in net-exit call. */ 1214 if (itn->fb_tunnel_dev != dev) 1215 ip_tunnel_del(itn, netdev_priv(dev)); 1216 1217 dst_cache_reset(&tunnel->dst_cache); 1218 } 1219 EXPORT_SYMBOL_GPL(ip_tunnel_uninit); 1220 1221 /* Do least required initialization, rest of init is done in tunnel_init call */ 1222 void ip_tunnel_setup(struct net_device *dev, unsigned int net_id) 1223 { 1224 struct ip_tunnel *tunnel = netdev_priv(dev); 1225 tunnel->ip_tnl_net_id = net_id; 1226 } 1227 EXPORT_SYMBOL_GPL(ip_tunnel_setup); 1228 1229 MODULE_LICENSE("GPL"); 1230