1 /* 2 * Copyright (c) 2013 Nicira, Inc. 3 * 4 * This program is free software; you can redistribute it and/or 5 * modify it under the terms of version 2 of the GNU General Public 6 * License as published by the Free Software Foundation. 7 * 8 * This program is distributed in the hope that it will be useful, but 9 * WITHOUT ANY WARRANTY; without even the implied warranty of 10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 11 * General Public License for more details. 12 * 13 * You should have received a copy of the GNU General Public License 14 * along with this program; if not, write to the Free Software 15 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 16 * 02110-1301, USA 17 */ 18 19 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 20 21 #include <linux/capability.h> 22 #include <linux/module.h> 23 #include <linux/types.h> 24 #include <linux/kernel.h> 25 #include <linux/slab.h> 26 #include <linux/uaccess.h> 27 #include <linux/skbuff.h> 28 #include <linux/netdevice.h> 29 #include <linux/in.h> 30 #include <linux/tcp.h> 31 #include <linux/udp.h> 32 #include <linux/if_arp.h> 33 #include <linux/init.h> 34 #include <linux/in6.h> 35 #include <linux/inetdevice.h> 36 #include <linux/igmp.h> 37 #include <linux/netfilter_ipv4.h> 38 #include <linux/etherdevice.h> 39 #include <linux/if_ether.h> 40 #include <linux/if_vlan.h> 41 #include <linux/rculist.h> 42 #include <linux/err.h> 43 44 #include <net/sock.h> 45 #include <net/ip.h> 46 #include <net/icmp.h> 47 #include <net/protocol.h> 48 #include <net/ip_tunnels.h> 49 #include <net/arp.h> 50 #include <net/checksum.h> 51 #include <net/dsfield.h> 52 #include <net/inet_ecn.h> 53 #include <net/xfrm.h> 54 #include <net/net_namespace.h> 55 #include <net/netns/generic.h> 56 #include <net/rtnetlink.h> 57 #include <net/udp.h> 58 #include <net/dst_metadata.h> 59 60 #if IS_ENABLED(CONFIG_IPV6) 61 #include <net/ipv6.h> 62 #include <net/ip6_fib.h> 63 #include <net/ip6_route.h> 64 #endif 65 66 static unsigned int ip_tunnel_hash(__be32 key, __be32 remote) 67 { 68 return hash_32((__force u32)key ^ (__force u32)remote, 69 IP_TNL_HASH_BITS); 70 } 71 72 static bool ip_tunnel_key_match(const struct ip_tunnel_parm *p, 73 __be16 flags, __be32 key) 74 { 75 if (p->i_flags & TUNNEL_KEY) { 76 if (flags & TUNNEL_KEY) 77 return key == p->i_key; 78 else 79 /* key expected, none present */ 80 return false; 81 } else 82 return !(flags & TUNNEL_KEY); 83 } 84 85 /* Fallback tunnel: no source, no destination, no key, no options 86 87 Tunnel hash table: 88 We require exact key match i.e. if a key is present in packet 89 it will match only tunnel with the same key; if it is not present, 90 it will match only keyless tunnel. 91 92 All keysless packets, if not matched configured keyless tunnels 93 will match fallback tunnel. 94 Given src, dst and key, find appropriate for input tunnel. 95 */ 96 struct ip_tunnel *ip_tunnel_lookup(struct ip_tunnel_net *itn, 97 int link, __be16 flags, 98 __be32 remote, __be32 local, 99 __be32 key) 100 { 101 unsigned int hash; 102 struct ip_tunnel *t, *cand = NULL; 103 struct hlist_head *head; 104 105 hash = ip_tunnel_hash(key, remote); 106 head = &itn->tunnels[hash]; 107 108 hlist_for_each_entry_rcu(t, head, hash_node) { 109 if (local != t->parms.iph.saddr || 110 remote != t->parms.iph.daddr || 111 !(t->dev->flags & IFF_UP)) 112 continue; 113 114 if (!ip_tunnel_key_match(&t->parms, flags, key)) 115 continue; 116 117 if (t->parms.link == link) 118 return t; 119 else 120 cand = t; 121 } 122 123 hlist_for_each_entry_rcu(t, head, hash_node) { 124 if (remote != t->parms.iph.daddr || 125 t->parms.iph.saddr != 0 || 126 !(t->dev->flags & IFF_UP)) 127 continue; 128 129 if (!ip_tunnel_key_match(&t->parms, flags, key)) 130 continue; 131 132 if (t->parms.link == link) 133 return t; 134 else if (!cand) 135 cand = t; 136 } 137 138 hash = ip_tunnel_hash(key, 0); 139 head = &itn->tunnels[hash]; 140 141 hlist_for_each_entry_rcu(t, head, hash_node) { 142 if ((local != t->parms.iph.saddr || t->parms.iph.daddr != 0) && 143 (local != t->parms.iph.daddr || !ipv4_is_multicast(local))) 144 continue; 145 146 if (!(t->dev->flags & IFF_UP)) 147 continue; 148 149 if (!ip_tunnel_key_match(&t->parms, flags, key)) 150 continue; 151 152 if (t->parms.link == link) 153 return t; 154 else if (!cand) 155 cand = t; 156 } 157 158 if (flags & TUNNEL_NO_KEY) 159 goto skip_key_lookup; 160 161 hlist_for_each_entry_rcu(t, head, hash_node) { 162 if (t->parms.i_key != key || 163 t->parms.iph.saddr != 0 || 164 t->parms.iph.daddr != 0 || 165 !(t->dev->flags & IFF_UP)) 166 continue; 167 168 if (t->parms.link == link) 169 return t; 170 else if (!cand) 171 cand = t; 172 } 173 174 skip_key_lookup: 175 if (cand) 176 return cand; 177 178 t = rcu_dereference(itn->collect_md_tun); 179 if (t && t->dev->flags & IFF_UP) 180 return t; 181 182 if (itn->fb_tunnel_dev && itn->fb_tunnel_dev->flags & IFF_UP) 183 return netdev_priv(itn->fb_tunnel_dev); 184 185 return NULL; 186 } 187 EXPORT_SYMBOL_GPL(ip_tunnel_lookup); 188 189 static struct hlist_head *ip_bucket(struct ip_tunnel_net *itn, 190 struct ip_tunnel_parm *parms) 191 { 192 unsigned int h; 193 __be32 remote; 194 __be32 i_key = parms->i_key; 195 196 if (parms->iph.daddr && !ipv4_is_multicast(parms->iph.daddr)) 197 remote = parms->iph.daddr; 198 else 199 remote = 0; 200 201 if (!(parms->i_flags & TUNNEL_KEY) && (parms->i_flags & VTI_ISVTI)) 202 i_key = 0; 203 204 h = ip_tunnel_hash(i_key, remote); 205 return &itn->tunnels[h]; 206 } 207 208 static void ip_tunnel_add(struct ip_tunnel_net *itn, struct ip_tunnel *t) 209 { 210 struct hlist_head *head = ip_bucket(itn, &t->parms); 211 212 if (t->collect_md) 213 rcu_assign_pointer(itn->collect_md_tun, t); 214 hlist_add_head_rcu(&t->hash_node, head); 215 } 216 217 static void ip_tunnel_del(struct ip_tunnel_net *itn, struct ip_tunnel *t) 218 { 219 if (t->collect_md) 220 rcu_assign_pointer(itn->collect_md_tun, NULL); 221 hlist_del_init_rcu(&t->hash_node); 222 } 223 224 static struct ip_tunnel *ip_tunnel_find(struct ip_tunnel_net *itn, 225 struct ip_tunnel_parm *parms, 226 int type) 227 { 228 __be32 remote = parms->iph.daddr; 229 __be32 local = parms->iph.saddr; 230 __be32 key = parms->i_key; 231 __be16 flags = parms->i_flags; 232 int link = parms->link; 233 struct ip_tunnel *t = NULL; 234 struct hlist_head *head = ip_bucket(itn, parms); 235 236 hlist_for_each_entry_rcu(t, head, hash_node) { 237 if (local == t->parms.iph.saddr && 238 remote == t->parms.iph.daddr && 239 link == t->parms.link && 240 type == t->dev->type && 241 ip_tunnel_key_match(&t->parms, flags, key)) 242 break; 243 } 244 return t; 245 } 246 247 static struct net_device *__ip_tunnel_create(struct net *net, 248 const struct rtnl_link_ops *ops, 249 struct ip_tunnel_parm *parms) 250 { 251 int err; 252 struct ip_tunnel *tunnel; 253 struct net_device *dev; 254 char name[IFNAMSIZ]; 255 256 if (parms->name[0]) 257 strlcpy(name, parms->name, IFNAMSIZ); 258 else { 259 if (strlen(ops->kind) > (IFNAMSIZ - 3)) { 260 err = -E2BIG; 261 goto failed; 262 } 263 strlcpy(name, ops->kind, IFNAMSIZ); 264 strncat(name, "%d", 2); 265 } 266 267 ASSERT_RTNL(); 268 dev = alloc_netdev(ops->priv_size, name, NET_NAME_UNKNOWN, ops->setup); 269 if (!dev) { 270 err = -ENOMEM; 271 goto failed; 272 } 273 dev_net_set(dev, net); 274 275 dev->rtnl_link_ops = ops; 276 277 tunnel = netdev_priv(dev); 278 tunnel->parms = *parms; 279 tunnel->net = net; 280 281 err = register_netdevice(dev); 282 if (err) 283 goto failed_free; 284 285 return dev; 286 287 failed_free: 288 free_netdev(dev); 289 failed: 290 return ERR_PTR(err); 291 } 292 293 static inline void init_tunnel_flow(struct flowi4 *fl4, 294 int proto, 295 __be32 daddr, __be32 saddr, 296 __be32 key, __u8 tos, int oif, 297 __u32 mark) 298 { 299 memset(fl4, 0, sizeof(*fl4)); 300 fl4->flowi4_oif = oif; 301 fl4->daddr = daddr; 302 fl4->saddr = saddr; 303 fl4->flowi4_tos = tos; 304 fl4->flowi4_proto = proto; 305 fl4->fl4_gre_key = key; 306 fl4->flowi4_mark = mark; 307 } 308 309 static int ip_tunnel_bind_dev(struct net_device *dev) 310 { 311 struct net_device *tdev = NULL; 312 struct ip_tunnel *tunnel = netdev_priv(dev); 313 const struct iphdr *iph; 314 int hlen = LL_MAX_HEADER; 315 int mtu = ETH_DATA_LEN; 316 int t_hlen = tunnel->hlen + sizeof(struct iphdr); 317 318 iph = &tunnel->parms.iph; 319 320 /* Guess output device to choose reasonable mtu and needed_headroom */ 321 if (iph->daddr) { 322 struct flowi4 fl4; 323 struct rtable *rt; 324 325 init_tunnel_flow(&fl4, iph->protocol, iph->daddr, 326 iph->saddr, tunnel->parms.o_key, 327 RT_TOS(iph->tos), tunnel->parms.link, 328 tunnel->fwmark); 329 rt = ip_route_output_key(tunnel->net, &fl4); 330 331 if (!IS_ERR(rt)) { 332 tdev = rt->dst.dev; 333 ip_rt_put(rt); 334 } 335 if (dev->type != ARPHRD_ETHER) 336 dev->flags |= IFF_POINTOPOINT; 337 338 dst_cache_reset(&tunnel->dst_cache); 339 } 340 341 if (!tdev && tunnel->parms.link) 342 tdev = __dev_get_by_index(tunnel->net, tunnel->parms.link); 343 344 if (tdev) { 345 hlen = tdev->hard_header_len + tdev->needed_headroom; 346 mtu = tdev->mtu; 347 } 348 349 dev->needed_headroom = t_hlen + hlen; 350 mtu -= (dev->hard_header_len + t_hlen); 351 352 if (mtu < IPV4_MIN_MTU) 353 mtu = IPV4_MIN_MTU; 354 355 return mtu; 356 } 357 358 static struct ip_tunnel *ip_tunnel_create(struct net *net, 359 struct ip_tunnel_net *itn, 360 struct ip_tunnel_parm *parms) 361 { 362 struct ip_tunnel *nt; 363 struct net_device *dev; 364 int t_hlen; 365 int mtu; 366 int err; 367 368 BUG_ON(!itn->fb_tunnel_dev); 369 dev = __ip_tunnel_create(net, itn->fb_tunnel_dev->rtnl_link_ops, parms); 370 if (IS_ERR(dev)) 371 return ERR_CAST(dev); 372 373 mtu = ip_tunnel_bind_dev(dev); 374 err = dev_set_mtu(dev, mtu); 375 if (err) 376 goto err_dev_set_mtu; 377 378 nt = netdev_priv(dev); 379 t_hlen = nt->hlen + sizeof(struct iphdr); 380 dev->min_mtu = ETH_MIN_MTU; 381 dev->max_mtu = 0xFFF8 - dev->hard_header_len - t_hlen; 382 ip_tunnel_add(itn, nt); 383 return nt; 384 385 err_dev_set_mtu: 386 unregister_netdevice(dev); 387 return ERR_PTR(err); 388 } 389 390 int ip_tunnel_rcv(struct ip_tunnel *tunnel, struct sk_buff *skb, 391 const struct tnl_ptk_info *tpi, struct metadata_dst *tun_dst, 392 bool log_ecn_error) 393 { 394 struct pcpu_sw_netstats *tstats; 395 const struct iphdr *iph = ip_hdr(skb); 396 int err; 397 398 #ifdef CONFIG_NET_IPGRE_BROADCAST 399 if (ipv4_is_multicast(iph->daddr)) { 400 tunnel->dev->stats.multicast++; 401 skb->pkt_type = PACKET_BROADCAST; 402 } 403 #endif 404 405 if ((!(tpi->flags&TUNNEL_CSUM) && (tunnel->parms.i_flags&TUNNEL_CSUM)) || 406 ((tpi->flags&TUNNEL_CSUM) && !(tunnel->parms.i_flags&TUNNEL_CSUM))) { 407 tunnel->dev->stats.rx_crc_errors++; 408 tunnel->dev->stats.rx_errors++; 409 goto drop; 410 } 411 412 if (tunnel->parms.i_flags&TUNNEL_SEQ) { 413 if (!(tpi->flags&TUNNEL_SEQ) || 414 (tunnel->i_seqno && (s32)(ntohl(tpi->seq) - tunnel->i_seqno) < 0)) { 415 tunnel->dev->stats.rx_fifo_errors++; 416 tunnel->dev->stats.rx_errors++; 417 goto drop; 418 } 419 tunnel->i_seqno = ntohl(tpi->seq) + 1; 420 } 421 422 skb_reset_network_header(skb); 423 424 err = IP_ECN_decapsulate(iph, skb); 425 if (unlikely(err)) { 426 if (log_ecn_error) 427 net_info_ratelimited("non-ECT from %pI4 with TOS=%#x\n", 428 &iph->saddr, iph->tos); 429 if (err > 1) { 430 ++tunnel->dev->stats.rx_frame_errors; 431 ++tunnel->dev->stats.rx_errors; 432 goto drop; 433 } 434 } 435 436 tstats = this_cpu_ptr(tunnel->dev->tstats); 437 u64_stats_update_begin(&tstats->syncp); 438 tstats->rx_packets++; 439 tstats->rx_bytes += skb->len; 440 u64_stats_update_end(&tstats->syncp); 441 442 skb_scrub_packet(skb, !net_eq(tunnel->net, dev_net(tunnel->dev))); 443 444 if (tunnel->dev->type == ARPHRD_ETHER) { 445 skb->protocol = eth_type_trans(skb, tunnel->dev); 446 skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN); 447 } else { 448 skb->dev = tunnel->dev; 449 } 450 451 if (tun_dst) 452 skb_dst_set(skb, (struct dst_entry *)tun_dst); 453 454 gro_cells_receive(&tunnel->gro_cells, skb); 455 return 0; 456 457 drop: 458 if (tun_dst) 459 dst_release((struct dst_entry *)tun_dst); 460 kfree_skb(skb); 461 return 0; 462 } 463 EXPORT_SYMBOL_GPL(ip_tunnel_rcv); 464 465 int ip_tunnel_encap_add_ops(const struct ip_tunnel_encap_ops *ops, 466 unsigned int num) 467 { 468 if (num >= MAX_IPTUN_ENCAP_OPS) 469 return -ERANGE; 470 471 return !cmpxchg((const struct ip_tunnel_encap_ops **) 472 &iptun_encaps[num], 473 NULL, ops) ? 0 : -1; 474 } 475 EXPORT_SYMBOL(ip_tunnel_encap_add_ops); 476 477 int ip_tunnel_encap_del_ops(const struct ip_tunnel_encap_ops *ops, 478 unsigned int num) 479 { 480 int ret; 481 482 if (num >= MAX_IPTUN_ENCAP_OPS) 483 return -ERANGE; 484 485 ret = (cmpxchg((const struct ip_tunnel_encap_ops **) 486 &iptun_encaps[num], 487 ops, NULL) == ops) ? 0 : -1; 488 489 synchronize_net(); 490 491 return ret; 492 } 493 EXPORT_SYMBOL(ip_tunnel_encap_del_ops); 494 495 int ip_tunnel_encap_setup(struct ip_tunnel *t, 496 struct ip_tunnel_encap *ipencap) 497 { 498 int hlen; 499 500 memset(&t->encap, 0, sizeof(t->encap)); 501 502 hlen = ip_encap_hlen(ipencap); 503 if (hlen < 0) 504 return hlen; 505 506 t->encap.type = ipencap->type; 507 t->encap.sport = ipencap->sport; 508 t->encap.dport = ipencap->dport; 509 t->encap.flags = ipencap->flags; 510 511 t->encap_hlen = hlen; 512 t->hlen = t->encap_hlen + t->tun_hlen; 513 514 return 0; 515 } 516 EXPORT_SYMBOL_GPL(ip_tunnel_encap_setup); 517 518 static int tnl_update_pmtu(struct net_device *dev, struct sk_buff *skb, 519 struct rtable *rt, __be16 df, 520 const struct iphdr *inner_iph) 521 { 522 struct ip_tunnel *tunnel = netdev_priv(dev); 523 int pkt_size = skb->len - tunnel->hlen - dev->hard_header_len; 524 int mtu; 525 526 if (df) 527 mtu = dst_mtu(&rt->dst) - dev->hard_header_len 528 - sizeof(struct iphdr) - tunnel->hlen; 529 else 530 mtu = skb_dst(skb) ? dst_mtu(skb_dst(skb)) : dev->mtu; 531 532 skb_dst_update_pmtu(skb, mtu); 533 534 if (skb->protocol == htons(ETH_P_IP)) { 535 if (!skb_is_gso(skb) && 536 (inner_iph->frag_off & htons(IP_DF)) && 537 mtu < pkt_size) { 538 memset(IPCB(skb), 0, sizeof(*IPCB(skb))); 539 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu)); 540 return -E2BIG; 541 } 542 } 543 #if IS_ENABLED(CONFIG_IPV6) 544 else if (skb->protocol == htons(ETH_P_IPV6)) { 545 struct rt6_info *rt6 = (struct rt6_info *)skb_dst(skb); 546 547 if (rt6 && mtu < dst_mtu(skb_dst(skb)) && 548 mtu >= IPV6_MIN_MTU) { 549 if ((tunnel->parms.iph.daddr && 550 !ipv4_is_multicast(tunnel->parms.iph.daddr)) || 551 rt6->rt6i_dst.plen == 128) { 552 rt6->rt6i_flags |= RTF_MODIFIED; 553 dst_metric_set(skb_dst(skb), RTAX_MTU, mtu); 554 } 555 } 556 557 if (!skb_is_gso(skb) && mtu >= IPV6_MIN_MTU && 558 mtu < pkt_size) { 559 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); 560 return -E2BIG; 561 } 562 } 563 #endif 564 return 0; 565 } 566 567 void ip_md_tunnel_xmit(struct sk_buff *skb, struct net_device *dev, u8 proto) 568 { 569 struct ip_tunnel *tunnel = netdev_priv(dev); 570 u32 headroom = sizeof(struct iphdr); 571 struct ip_tunnel_info *tun_info; 572 const struct ip_tunnel_key *key; 573 const struct iphdr *inner_iph; 574 struct rtable *rt; 575 struct flowi4 fl4; 576 __be16 df = 0; 577 u8 tos, ttl; 578 579 tun_info = skb_tunnel_info(skb); 580 if (unlikely(!tun_info || !(tun_info->mode & IP_TUNNEL_INFO_TX) || 581 ip_tunnel_info_af(tun_info) != AF_INET)) 582 goto tx_error; 583 key = &tun_info->key; 584 memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt)); 585 inner_iph = (const struct iphdr *)skb_inner_network_header(skb); 586 tos = key->tos; 587 if (tos == 1) { 588 if (skb->protocol == htons(ETH_P_IP)) 589 tos = inner_iph->tos; 590 else if (skb->protocol == htons(ETH_P_IPV6)) 591 tos = ipv6_get_dsfield((const struct ipv6hdr *)inner_iph); 592 } 593 init_tunnel_flow(&fl4, proto, key->u.ipv4.dst, key->u.ipv4.src, 0, 594 RT_TOS(tos), tunnel->parms.link, tunnel->fwmark); 595 if (tunnel->encap.type != TUNNEL_ENCAP_NONE) 596 goto tx_error; 597 rt = ip_route_output_key(tunnel->net, &fl4); 598 if (IS_ERR(rt)) { 599 dev->stats.tx_carrier_errors++; 600 goto tx_error; 601 } 602 if (rt->dst.dev == dev) { 603 ip_rt_put(rt); 604 dev->stats.collisions++; 605 goto tx_error; 606 } 607 tos = ip_tunnel_ecn_encap(tos, inner_iph, skb); 608 ttl = key->ttl; 609 if (ttl == 0) { 610 if (skb->protocol == htons(ETH_P_IP)) 611 ttl = inner_iph->ttl; 612 else if (skb->protocol == htons(ETH_P_IPV6)) 613 ttl = ((const struct ipv6hdr *)inner_iph)->hop_limit; 614 else 615 ttl = ip4_dst_hoplimit(&rt->dst); 616 } 617 if (key->tun_flags & TUNNEL_DONT_FRAGMENT) 618 df = htons(IP_DF); 619 else if (skb->protocol == htons(ETH_P_IP)) 620 df = inner_iph->frag_off & htons(IP_DF); 621 headroom += LL_RESERVED_SPACE(rt->dst.dev) + rt->dst.header_len; 622 if (headroom > dev->needed_headroom) 623 dev->needed_headroom = headroom; 624 625 if (skb_cow_head(skb, dev->needed_headroom)) { 626 ip_rt_put(rt); 627 goto tx_dropped; 628 } 629 iptunnel_xmit(NULL, rt, skb, fl4.saddr, fl4.daddr, proto, tos, ttl, 630 df, !net_eq(tunnel->net, dev_net(dev))); 631 return; 632 tx_error: 633 dev->stats.tx_errors++; 634 goto kfree; 635 tx_dropped: 636 dev->stats.tx_dropped++; 637 kfree: 638 kfree_skb(skb); 639 } 640 EXPORT_SYMBOL_GPL(ip_md_tunnel_xmit); 641 642 void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev, 643 const struct iphdr *tnl_params, u8 protocol) 644 { 645 struct ip_tunnel *tunnel = netdev_priv(dev); 646 const struct iphdr *inner_iph; 647 struct flowi4 fl4; 648 u8 tos, ttl; 649 __be16 df; 650 struct rtable *rt; /* Route to the other host */ 651 unsigned int max_headroom; /* The extra header space needed */ 652 __be32 dst; 653 bool connected; 654 655 inner_iph = (const struct iphdr *)skb_inner_network_header(skb); 656 connected = (tunnel->parms.iph.daddr != 0); 657 658 memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt)); 659 660 dst = tnl_params->daddr; 661 if (dst == 0) { 662 /* NBMA tunnel */ 663 664 if (!skb_dst(skb)) { 665 dev->stats.tx_fifo_errors++; 666 goto tx_error; 667 } 668 669 if (skb->protocol == htons(ETH_P_IP)) { 670 rt = skb_rtable(skb); 671 dst = rt_nexthop(rt, inner_iph->daddr); 672 } 673 #if IS_ENABLED(CONFIG_IPV6) 674 else if (skb->protocol == htons(ETH_P_IPV6)) { 675 const struct in6_addr *addr6; 676 struct neighbour *neigh; 677 bool do_tx_error_icmp; 678 int addr_type; 679 680 neigh = dst_neigh_lookup(skb_dst(skb), 681 &ipv6_hdr(skb)->daddr); 682 if (!neigh) 683 goto tx_error; 684 685 addr6 = (const struct in6_addr *)&neigh->primary_key; 686 addr_type = ipv6_addr_type(addr6); 687 688 if (addr_type == IPV6_ADDR_ANY) { 689 addr6 = &ipv6_hdr(skb)->daddr; 690 addr_type = ipv6_addr_type(addr6); 691 } 692 693 if ((addr_type & IPV6_ADDR_COMPATv4) == 0) 694 do_tx_error_icmp = true; 695 else { 696 do_tx_error_icmp = false; 697 dst = addr6->s6_addr32[3]; 698 } 699 neigh_release(neigh); 700 if (do_tx_error_icmp) 701 goto tx_error_icmp; 702 } 703 #endif 704 else 705 goto tx_error; 706 707 connected = false; 708 } 709 710 tos = tnl_params->tos; 711 if (tos & 0x1) { 712 tos &= ~0x1; 713 if (skb->protocol == htons(ETH_P_IP)) { 714 tos = inner_iph->tos; 715 connected = false; 716 } else if (skb->protocol == htons(ETH_P_IPV6)) { 717 tos = ipv6_get_dsfield((const struct ipv6hdr *)inner_iph); 718 connected = false; 719 } 720 } 721 722 init_tunnel_flow(&fl4, protocol, dst, tnl_params->saddr, 723 tunnel->parms.o_key, RT_TOS(tos), tunnel->parms.link, 724 tunnel->fwmark); 725 726 if (ip_tunnel_encap(skb, tunnel, &protocol, &fl4) < 0) 727 goto tx_error; 728 729 rt = connected ? dst_cache_get_ip4(&tunnel->dst_cache, &fl4.saddr) : 730 NULL; 731 732 if (!rt) { 733 rt = ip_route_output_key(tunnel->net, &fl4); 734 735 if (IS_ERR(rt)) { 736 dev->stats.tx_carrier_errors++; 737 goto tx_error; 738 } 739 if (connected) 740 dst_cache_set_ip4(&tunnel->dst_cache, &rt->dst, 741 fl4.saddr); 742 } 743 744 if (rt->dst.dev == dev) { 745 ip_rt_put(rt); 746 dev->stats.collisions++; 747 goto tx_error; 748 } 749 750 if (tnl_update_pmtu(dev, skb, rt, tnl_params->frag_off, inner_iph)) { 751 ip_rt_put(rt); 752 goto tx_error; 753 } 754 755 if (tunnel->err_count > 0) { 756 if (time_before(jiffies, 757 tunnel->err_time + IPTUNNEL_ERR_TIMEO)) { 758 tunnel->err_count--; 759 760 dst_link_failure(skb); 761 } else 762 tunnel->err_count = 0; 763 } 764 765 tos = ip_tunnel_ecn_encap(tos, inner_iph, skb); 766 ttl = tnl_params->ttl; 767 if (ttl == 0) { 768 if (skb->protocol == htons(ETH_P_IP)) 769 ttl = inner_iph->ttl; 770 #if IS_ENABLED(CONFIG_IPV6) 771 else if (skb->protocol == htons(ETH_P_IPV6)) 772 ttl = ((const struct ipv6hdr *)inner_iph)->hop_limit; 773 #endif 774 else 775 ttl = ip4_dst_hoplimit(&rt->dst); 776 } 777 778 df = tnl_params->frag_off; 779 if (skb->protocol == htons(ETH_P_IP) && !tunnel->ignore_df) 780 df |= (inner_iph->frag_off&htons(IP_DF)); 781 782 max_headroom = LL_RESERVED_SPACE(rt->dst.dev) + sizeof(struct iphdr) 783 + rt->dst.header_len + ip_encap_hlen(&tunnel->encap); 784 if (max_headroom > dev->needed_headroom) 785 dev->needed_headroom = max_headroom; 786 787 if (skb_cow_head(skb, dev->needed_headroom)) { 788 ip_rt_put(rt); 789 dev->stats.tx_dropped++; 790 kfree_skb(skb); 791 return; 792 } 793 794 iptunnel_xmit(NULL, rt, skb, fl4.saddr, fl4.daddr, protocol, tos, ttl, 795 df, !net_eq(tunnel->net, dev_net(dev))); 796 return; 797 798 #if IS_ENABLED(CONFIG_IPV6) 799 tx_error_icmp: 800 dst_link_failure(skb); 801 #endif 802 tx_error: 803 dev->stats.tx_errors++; 804 kfree_skb(skb); 805 } 806 EXPORT_SYMBOL_GPL(ip_tunnel_xmit); 807 808 static void ip_tunnel_update(struct ip_tunnel_net *itn, 809 struct ip_tunnel *t, 810 struct net_device *dev, 811 struct ip_tunnel_parm *p, 812 bool set_mtu, 813 __u32 fwmark) 814 { 815 ip_tunnel_del(itn, t); 816 t->parms.iph.saddr = p->iph.saddr; 817 t->parms.iph.daddr = p->iph.daddr; 818 t->parms.i_key = p->i_key; 819 t->parms.o_key = p->o_key; 820 if (dev->type != ARPHRD_ETHER) { 821 memcpy(dev->dev_addr, &p->iph.saddr, 4); 822 memcpy(dev->broadcast, &p->iph.daddr, 4); 823 } 824 ip_tunnel_add(itn, t); 825 826 t->parms.iph.ttl = p->iph.ttl; 827 t->parms.iph.tos = p->iph.tos; 828 t->parms.iph.frag_off = p->iph.frag_off; 829 830 if (t->parms.link != p->link || t->fwmark != fwmark) { 831 int mtu; 832 833 t->parms.link = p->link; 834 t->fwmark = fwmark; 835 mtu = ip_tunnel_bind_dev(dev); 836 if (set_mtu) 837 dev->mtu = mtu; 838 } 839 dst_cache_reset(&t->dst_cache); 840 netdev_state_change(dev); 841 } 842 843 int ip_tunnel_ioctl(struct net_device *dev, struct ip_tunnel_parm *p, int cmd) 844 { 845 int err = 0; 846 struct ip_tunnel *t = netdev_priv(dev); 847 struct net *net = t->net; 848 struct ip_tunnel_net *itn = net_generic(net, t->ip_tnl_net_id); 849 850 BUG_ON(!itn->fb_tunnel_dev); 851 switch (cmd) { 852 case SIOCGETTUNNEL: 853 if (dev == itn->fb_tunnel_dev) { 854 t = ip_tunnel_find(itn, p, itn->fb_tunnel_dev->type); 855 if (!t) 856 t = netdev_priv(dev); 857 } 858 memcpy(p, &t->parms, sizeof(*p)); 859 break; 860 861 case SIOCADDTUNNEL: 862 case SIOCCHGTUNNEL: 863 err = -EPERM; 864 if (!ns_capable(net->user_ns, CAP_NET_ADMIN)) 865 goto done; 866 if (p->iph.ttl) 867 p->iph.frag_off |= htons(IP_DF); 868 if (!(p->i_flags & VTI_ISVTI)) { 869 if (!(p->i_flags & TUNNEL_KEY)) 870 p->i_key = 0; 871 if (!(p->o_flags & TUNNEL_KEY)) 872 p->o_key = 0; 873 } 874 875 t = ip_tunnel_find(itn, p, itn->fb_tunnel_dev->type); 876 877 if (cmd == SIOCADDTUNNEL) { 878 if (!t) { 879 t = ip_tunnel_create(net, itn, p); 880 err = PTR_ERR_OR_ZERO(t); 881 break; 882 } 883 884 err = -EEXIST; 885 break; 886 } 887 if (dev != itn->fb_tunnel_dev && cmd == SIOCCHGTUNNEL) { 888 if (t) { 889 if (t->dev != dev) { 890 err = -EEXIST; 891 break; 892 } 893 } else { 894 unsigned int nflags = 0; 895 896 if (ipv4_is_multicast(p->iph.daddr)) 897 nflags = IFF_BROADCAST; 898 else if (p->iph.daddr) 899 nflags = IFF_POINTOPOINT; 900 901 if ((dev->flags^nflags)&(IFF_POINTOPOINT|IFF_BROADCAST)) { 902 err = -EINVAL; 903 break; 904 } 905 906 t = netdev_priv(dev); 907 } 908 } 909 910 if (t) { 911 err = 0; 912 ip_tunnel_update(itn, t, dev, p, true, 0); 913 } else { 914 err = -ENOENT; 915 } 916 break; 917 918 case SIOCDELTUNNEL: 919 err = -EPERM; 920 if (!ns_capable(net->user_ns, CAP_NET_ADMIN)) 921 goto done; 922 923 if (dev == itn->fb_tunnel_dev) { 924 err = -ENOENT; 925 t = ip_tunnel_find(itn, p, itn->fb_tunnel_dev->type); 926 if (!t) 927 goto done; 928 err = -EPERM; 929 if (t == netdev_priv(itn->fb_tunnel_dev)) 930 goto done; 931 dev = t->dev; 932 } 933 unregister_netdevice(dev); 934 err = 0; 935 break; 936 937 default: 938 err = -EINVAL; 939 } 940 941 done: 942 return err; 943 } 944 EXPORT_SYMBOL_GPL(ip_tunnel_ioctl); 945 946 int __ip_tunnel_change_mtu(struct net_device *dev, int new_mtu, bool strict) 947 { 948 struct ip_tunnel *tunnel = netdev_priv(dev); 949 int t_hlen = tunnel->hlen + sizeof(struct iphdr); 950 int max_mtu = 0xFFF8 - dev->hard_header_len - t_hlen; 951 952 if (new_mtu < ETH_MIN_MTU) 953 return -EINVAL; 954 955 if (new_mtu > max_mtu) { 956 if (strict) 957 return -EINVAL; 958 959 new_mtu = max_mtu; 960 } 961 962 dev->mtu = new_mtu; 963 return 0; 964 } 965 EXPORT_SYMBOL_GPL(__ip_tunnel_change_mtu); 966 967 int ip_tunnel_change_mtu(struct net_device *dev, int new_mtu) 968 { 969 return __ip_tunnel_change_mtu(dev, new_mtu, true); 970 } 971 EXPORT_SYMBOL_GPL(ip_tunnel_change_mtu); 972 973 static void ip_tunnel_dev_free(struct net_device *dev) 974 { 975 struct ip_tunnel *tunnel = netdev_priv(dev); 976 977 gro_cells_destroy(&tunnel->gro_cells); 978 dst_cache_destroy(&tunnel->dst_cache); 979 free_percpu(dev->tstats); 980 } 981 982 void ip_tunnel_dellink(struct net_device *dev, struct list_head *head) 983 { 984 struct ip_tunnel *tunnel = netdev_priv(dev); 985 struct ip_tunnel_net *itn; 986 987 itn = net_generic(tunnel->net, tunnel->ip_tnl_net_id); 988 989 if (itn->fb_tunnel_dev != dev) { 990 ip_tunnel_del(itn, netdev_priv(dev)); 991 unregister_netdevice_queue(dev, head); 992 } 993 } 994 EXPORT_SYMBOL_GPL(ip_tunnel_dellink); 995 996 struct net *ip_tunnel_get_link_net(const struct net_device *dev) 997 { 998 struct ip_tunnel *tunnel = netdev_priv(dev); 999 1000 return tunnel->net; 1001 } 1002 EXPORT_SYMBOL(ip_tunnel_get_link_net); 1003 1004 int ip_tunnel_get_iflink(const struct net_device *dev) 1005 { 1006 struct ip_tunnel *tunnel = netdev_priv(dev); 1007 1008 return tunnel->parms.link; 1009 } 1010 EXPORT_SYMBOL(ip_tunnel_get_iflink); 1011 1012 int ip_tunnel_init_net(struct net *net, unsigned int ip_tnl_net_id, 1013 struct rtnl_link_ops *ops, char *devname) 1014 { 1015 struct ip_tunnel_net *itn = net_generic(net, ip_tnl_net_id); 1016 struct ip_tunnel_parm parms; 1017 unsigned int i; 1018 1019 for (i = 0; i < IP_TNL_HASH_SIZE; i++) 1020 INIT_HLIST_HEAD(&itn->tunnels[i]); 1021 1022 if (!ops) { 1023 itn->fb_tunnel_dev = NULL; 1024 return 0; 1025 } 1026 1027 memset(&parms, 0, sizeof(parms)); 1028 if (devname) 1029 strlcpy(parms.name, devname, IFNAMSIZ); 1030 1031 rtnl_lock(); 1032 itn->fb_tunnel_dev = __ip_tunnel_create(net, ops, &parms); 1033 /* FB netdevice is special: we have one, and only one per netns. 1034 * Allowing to move it to another netns is clearly unsafe. 1035 */ 1036 if (!IS_ERR(itn->fb_tunnel_dev)) { 1037 itn->fb_tunnel_dev->features |= NETIF_F_NETNS_LOCAL; 1038 itn->fb_tunnel_dev->mtu = ip_tunnel_bind_dev(itn->fb_tunnel_dev); 1039 ip_tunnel_add(itn, netdev_priv(itn->fb_tunnel_dev)); 1040 } 1041 rtnl_unlock(); 1042 1043 return PTR_ERR_OR_ZERO(itn->fb_tunnel_dev); 1044 } 1045 EXPORT_SYMBOL_GPL(ip_tunnel_init_net); 1046 1047 static void ip_tunnel_destroy(struct ip_tunnel_net *itn, struct list_head *head, 1048 struct rtnl_link_ops *ops) 1049 { 1050 struct net *net = dev_net(itn->fb_tunnel_dev); 1051 struct net_device *dev, *aux; 1052 int h; 1053 1054 for_each_netdev_safe(net, dev, aux) 1055 if (dev->rtnl_link_ops == ops) 1056 unregister_netdevice_queue(dev, head); 1057 1058 for (h = 0; h < IP_TNL_HASH_SIZE; h++) { 1059 struct ip_tunnel *t; 1060 struct hlist_node *n; 1061 struct hlist_head *thead = &itn->tunnels[h]; 1062 1063 hlist_for_each_entry_safe(t, n, thead, hash_node) 1064 /* If dev is in the same netns, it has already 1065 * been added to the list by the previous loop. 1066 */ 1067 if (!net_eq(dev_net(t->dev), net)) 1068 unregister_netdevice_queue(t->dev, head); 1069 } 1070 } 1071 1072 void ip_tunnel_delete_nets(struct list_head *net_list, unsigned int id, 1073 struct rtnl_link_ops *ops) 1074 { 1075 struct ip_tunnel_net *itn; 1076 struct net *net; 1077 LIST_HEAD(list); 1078 1079 rtnl_lock(); 1080 list_for_each_entry(net, net_list, exit_list) { 1081 itn = net_generic(net, id); 1082 ip_tunnel_destroy(itn, &list, ops); 1083 } 1084 unregister_netdevice_many(&list); 1085 rtnl_unlock(); 1086 } 1087 EXPORT_SYMBOL_GPL(ip_tunnel_delete_nets); 1088 1089 int ip_tunnel_newlink(struct net_device *dev, struct nlattr *tb[], 1090 struct ip_tunnel_parm *p, __u32 fwmark) 1091 { 1092 struct ip_tunnel *nt; 1093 struct net *net = dev_net(dev); 1094 struct ip_tunnel_net *itn; 1095 int mtu; 1096 int err; 1097 1098 nt = netdev_priv(dev); 1099 itn = net_generic(net, nt->ip_tnl_net_id); 1100 1101 if (nt->collect_md) { 1102 if (rtnl_dereference(itn->collect_md_tun)) 1103 return -EEXIST; 1104 } else { 1105 if (ip_tunnel_find(itn, p, dev->type)) 1106 return -EEXIST; 1107 } 1108 1109 nt->net = net; 1110 nt->parms = *p; 1111 nt->fwmark = fwmark; 1112 err = register_netdevice(dev); 1113 if (err) 1114 goto err_register_netdevice; 1115 1116 if (dev->type == ARPHRD_ETHER && !tb[IFLA_ADDRESS]) 1117 eth_hw_addr_random(dev); 1118 1119 mtu = ip_tunnel_bind_dev(dev); 1120 if (tb[IFLA_MTU]) { 1121 unsigned int max = 0xfff8 - dev->hard_header_len - nt->hlen; 1122 1123 mtu = clamp(dev->mtu, (unsigned int)ETH_MIN_MTU, 1124 (unsigned int)(max - sizeof(struct iphdr))); 1125 } 1126 1127 err = dev_set_mtu(dev, mtu); 1128 if (err) 1129 goto err_dev_set_mtu; 1130 1131 ip_tunnel_add(itn, nt); 1132 return 0; 1133 1134 err_dev_set_mtu: 1135 unregister_netdevice(dev); 1136 err_register_netdevice: 1137 return err; 1138 } 1139 EXPORT_SYMBOL_GPL(ip_tunnel_newlink); 1140 1141 int ip_tunnel_changelink(struct net_device *dev, struct nlattr *tb[], 1142 struct ip_tunnel_parm *p, __u32 fwmark) 1143 { 1144 struct ip_tunnel *t; 1145 struct ip_tunnel *tunnel = netdev_priv(dev); 1146 struct net *net = tunnel->net; 1147 struct ip_tunnel_net *itn = net_generic(net, tunnel->ip_tnl_net_id); 1148 1149 if (dev == itn->fb_tunnel_dev) 1150 return -EINVAL; 1151 1152 t = ip_tunnel_find(itn, p, dev->type); 1153 1154 if (t) { 1155 if (t->dev != dev) 1156 return -EEXIST; 1157 } else { 1158 t = tunnel; 1159 1160 if (dev->type != ARPHRD_ETHER) { 1161 unsigned int nflags = 0; 1162 1163 if (ipv4_is_multicast(p->iph.daddr)) 1164 nflags = IFF_BROADCAST; 1165 else if (p->iph.daddr) 1166 nflags = IFF_POINTOPOINT; 1167 1168 if ((dev->flags ^ nflags) & 1169 (IFF_POINTOPOINT | IFF_BROADCAST)) 1170 return -EINVAL; 1171 } 1172 } 1173 1174 ip_tunnel_update(itn, t, dev, p, !tb[IFLA_MTU], fwmark); 1175 return 0; 1176 } 1177 EXPORT_SYMBOL_GPL(ip_tunnel_changelink); 1178 1179 int ip_tunnel_init(struct net_device *dev) 1180 { 1181 struct ip_tunnel *tunnel = netdev_priv(dev); 1182 struct iphdr *iph = &tunnel->parms.iph; 1183 int err; 1184 1185 dev->needs_free_netdev = true; 1186 dev->priv_destructor = ip_tunnel_dev_free; 1187 dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats); 1188 if (!dev->tstats) 1189 return -ENOMEM; 1190 1191 err = dst_cache_init(&tunnel->dst_cache, GFP_KERNEL); 1192 if (err) { 1193 free_percpu(dev->tstats); 1194 return err; 1195 } 1196 1197 err = gro_cells_init(&tunnel->gro_cells, dev); 1198 if (err) { 1199 dst_cache_destroy(&tunnel->dst_cache); 1200 free_percpu(dev->tstats); 1201 return err; 1202 } 1203 1204 tunnel->dev = dev; 1205 tunnel->net = dev_net(dev); 1206 strcpy(tunnel->parms.name, dev->name); 1207 iph->version = 4; 1208 iph->ihl = 5; 1209 1210 if (tunnel->collect_md) { 1211 dev->features |= NETIF_F_NETNS_LOCAL; 1212 netif_keep_dst(dev); 1213 } 1214 return 0; 1215 } 1216 EXPORT_SYMBOL_GPL(ip_tunnel_init); 1217 1218 void ip_tunnel_uninit(struct net_device *dev) 1219 { 1220 struct ip_tunnel *tunnel = netdev_priv(dev); 1221 struct net *net = tunnel->net; 1222 struct ip_tunnel_net *itn; 1223 1224 itn = net_generic(net, tunnel->ip_tnl_net_id); 1225 /* fb_tunnel_dev will be unregisted in net-exit call. */ 1226 if (itn->fb_tunnel_dev != dev) 1227 ip_tunnel_del(itn, netdev_priv(dev)); 1228 1229 dst_cache_reset(&tunnel->dst_cache); 1230 } 1231 EXPORT_SYMBOL_GPL(ip_tunnel_uninit); 1232 1233 /* Do least required initialization, rest of init is done in tunnel_init call */ 1234 void ip_tunnel_setup(struct net_device *dev, unsigned int net_id) 1235 { 1236 struct ip_tunnel *tunnel = netdev_priv(dev); 1237 tunnel->ip_tnl_net_id = net_id; 1238 } 1239 EXPORT_SYMBOL_GPL(ip_tunnel_setup); 1240 1241 MODULE_LICENSE("GPL"); 1242