1 /* 2 * Copyright (c) 2013 Nicira, Inc. 3 * 4 * This program is free software; you can redistribute it and/or 5 * modify it under the terms of version 2 of the GNU General Public 6 * License as published by the Free Software Foundation. 7 * 8 * This program is distributed in the hope that it will be useful, but 9 * WITHOUT ANY WARRANTY; without even the implied warranty of 10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 11 * General Public License for more details. 12 * 13 * You should have received a copy of the GNU General Public License 14 * along with this program; if not, write to the Free Software 15 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 16 * 02110-1301, USA 17 */ 18 19 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 20 21 #include <linux/capability.h> 22 #include <linux/module.h> 23 #include <linux/types.h> 24 #include <linux/kernel.h> 25 #include <linux/slab.h> 26 #include <linux/uaccess.h> 27 #include <linux/skbuff.h> 28 #include <linux/netdevice.h> 29 #include <linux/in.h> 30 #include <linux/tcp.h> 31 #include <linux/udp.h> 32 #include <linux/if_arp.h> 33 #include <linux/mroute.h> 34 #include <linux/init.h> 35 #include <linux/in6.h> 36 #include <linux/inetdevice.h> 37 #include <linux/igmp.h> 38 #include <linux/netfilter_ipv4.h> 39 #include <linux/etherdevice.h> 40 #include <linux/if_ether.h> 41 #include <linux/if_vlan.h> 42 #include <linux/rculist.h> 43 #include <linux/err.h> 44 45 #include <net/sock.h> 46 #include <net/ip.h> 47 #include <net/icmp.h> 48 #include <net/protocol.h> 49 #include <net/ip_tunnels.h> 50 #include <net/arp.h> 51 #include <net/checksum.h> 52 #include <net/dsfield.h> 53 #include <net/inet_ecn.h> 54 #include <net/xfrm.h> 55 #include <net/net_namespace.h> 56 #include <net/netns/generic.h> 57 #include <net/rtnetlink.h> 58 59 #if IS_ENABLED(CONFIG_IPV6) 60 #include <net/ipv6.h> 61 #include <net/ip6_fib.h> 62 #include <net/ip6_route.h> 63 #endif 64 65 static unsigned int ip_tunnel_hash(__be32 key, __be32 remote) 66 { 67 return hash_32((__force u32)key ^ (__force u32)remote, 68 IP_TNL_HASH_BITS); 69 } 70 71 static void __tunnel_dst_set(struct ip_tunnel_dst *idst, 72 struct dst_entry *dst) 73 { 74 struct dst_entry *old_dst; 75 76 if (dst) { 77 if (dst->flags & DST_NOCACHE) 78 dst = NULL; 79 else 80 dst_clone(dst); 81 } 82 old_dst = xchg((__force struct dst_entry **)&idst->dst, dst); 83 dst_release(old_dst); 84 } 85 86 static void tunnel_dst_set(struct ip_tunnel *t, struct dst_entry *dst) 87 { 88 __tunnel_dst_set(this_cpu_ptr(t->dst_cache), dst); 89 } 90 91 static void tunnel_dst_reset(struct ip_tunnel *t) 92 { 93 tunnel_dst_set(t, NULL); 94 } 95 96 static void tunnel_dst_reset_all(struct ip_tunnel *t) 97 { 98 int i; 99 100 for_each_possible_cpu(i) 101 __tunnel_dst_set(per_cpu_ptr(t->dst_cache, i), NULL); 102 } 103 104 static struct rtable *tunnel_rtable_get(struct ip_tunnel *t, u32 cookie) 105 { 106 struct dst_entry *dst; 107 108 rcu_read_lock(); 109 dst = rcu_dereference(this_cpu_ptr(t->dst_cache)->dst); 110 if (dst) { 111 if (dst->obsolete && dst->ops->check(dst, cookie) == NULL) { 112 rcu_read_unlock(); 113 tunnel_dst_reset(t); 114 return NULL; 115 } 116 dst_hold(dst); 117 } 118 rcu_read_unlock(); 119 return (struct rtable *)dst; 120 } 121 122 /* Often modified stats are per cpu, other are shared (netdev->stats) */ 123 struct rtnl_link_stats64 *ip_tunnel_get_stats64(struct net_device *dev, 124 struct rtnl_link_stats64 *tot) 125 { 126 int i; 127 128 for_each_possible_cpu(i) { 129 const struct pcpu_sw_netstats *tstats = 130 per_cpu_ptr(dev->tstats, i); 131 u64 rx_packets, rx_bytes, tx_packets, tx_bytes; 132 unsigned int start; 133 134 do { 135 start = u64_stats_fetch_begin_bh(&tstats->syncp); 136 rx_packets = tstats->rx_packets; 137 tx_packets = tstats->tx_packets; 138 rx_bytes = tstats->rx_bytes; 139 tx_bytes = tstats->tx_bytes; 140 } while (u64_stats_fetch_retry_bh(&tstats->syncp, start)); 141 142 tot->rx_packets += rx_packets; 143 tot->tx_packets += tx_packets; 144 tot->rx_bytes += rx_bytes; 145 tot->tx_bytes += tx_bytes; 146 } 147 148 tot->multicast = dev->stats.multicast; 149 150 tot->rx_crc_errors = dev->stats.rx_crc_errors; 151 tot->rx_fifo_errors = dev->stats.rx_fifo_errors; 152 tot->rx_length_errors = dev->stats.rx_length_errors; 153 tot->rx_frame_errors = dev->stats.rx_frame_errors; 154 tot->rx_errors = dev->stats.rx_errors; 155 156 tot->tx_fifo_errors = dev->stats.tx_fifo_errors; 157 tot->tx_carrier_errors = dev->stats.tx_carrier_errors; 158 tot->tx_dropped = dev->stats.tx_dropped; 159 tot->tx_aborted_errors = dev->stats.tx_aborted_errors; 160 tot->tx_errors = dev->stats.tx_errors; 161 162 tot->collisions = dev->stats.collisions; 163 164 return tot; 165 } 166 EXPORT_SYMBOL_GPL(ip_tunnel_get_stats64); 167 168 static bool ip_tunnel_key_match(const struct ip_tunnel_parm *p, 169 __be16 flags, __be32 key) 170 { 171 if (p->i_flags & TUNNEL_KEY) { 172 if (flags & TUNNEL_KEY) 173 return key == p->i_key; 174 else 175 /* key expected, none present */ 176 return false; 177 } else 178 return !(flags & TUNNEL_KEY); 179 } 180 181 /* Fallback tunnel: no source, no destination, no key, no options 182 183 Tunnel hash table: 184 We require exact key match i.e. if a key is present in packet 185 it will match only tunnel with the same key; if it is not present, 186 it will match only keyless tunnel. 187 188 All keysless packets, if not matched configured keyless tunnels 189 will match fallback tunnel. 190 Given src, dst and key, find appropriate for input tunnel. 191 */ 192 struct ip_tunnel *ip_tunnel_lookup(struct ip_tunnel_net *itn, 193 int link, __be16 flags, 194 __be32 remote, __be32 local, 195 __be32 key) 196 { 197 unsigned int hash; 198 struct ip_tunnel *t, *cand = NULL; 199 struct hlist_head *head; 200 201 hash = ip_tunnel_hash(key, remote); 202 head = &itn->tunnels[hash]; 203 204 hlist_for_each_entry_rcu(t, head, hash_node) { 205 if (local != t->parms.iph.saddr || 206 remote != t->parms.iph.daddr || 207 !(t->dev->flags & IFF_UP)) 208 continue; 209 210 if (!ip_tunnel_key_match(&t->parms, flags, key)) 211 continue; 212 213 if (t->parms.link == link) 214 return t; 215 else 216 cand = t; 217 } 218 219 hlist_for_each_entry_rcu(t, head, hash_node) { 220 if (remote != t->parms.iph.daddr || 221 !(t->dev->flags & IFF_UP)) 222 continue; 223 224 if (!ip_tunnel_key_match(&t->parms, flags, key)) 225 continue; 226 227 if (t->parms.link == link) 228 return t; 229 else if (!cand) 230 cand = t; 231 } 232 233 hash = ip_tunnel_hash(key, 0); 234 head = &itn->tunnels[hash]; 235 236 hlist_for_each_entry_rcu(t, head, hash_node) { 237 if ((local != t->parms.iph.saddr && 238 (local != t->parms.iph.daddr || 239 !ipv4_is_multicast(local))) || 240 !(t->dev->flags & IFF_UP)) 241 continue; 242 243 if (!ip_tunnel_key_match(&t->parms, flags, key)) 244 continue; 245 246 if (t->parms.link == link) 247 return t; 248 else if (!cand) 249 cand = t; 250 } 251 252 if (flags & TUNNEL_NO_KEY) 253 goto skip_key_lookup; 254 255 hlist_for_each_entry_rcu(t, head, hash_node) { 256 if (t->parms.i_key != key || 257 !(t->dev->flags & IFF_UP)) 258 continue; 259 260 if (t->parms.link == link) 261 return t; 262 else if (!cand) 263 cand = t; 264 } 265 266 skip_key_lookup: 267 if (cand) 268 return cand; 269 270 if (itn->fb_tunnel_dev && itn->fb_tunnel_dev->flags & IFF_UP) 271 return netdev_priv(itn->fb_tunnel_dev); 272 273 274 return NULL; 275 } 276 EXPORT_SYMBOL_GPL(ip_tunnel_lookup); 277 278 static struct hlist_head *ip_bucket(struct ip_tunnel_net *itn, 279 struct ip_tunnel_parm *parms) 280 { 281 unsigned int h; 282 __be32 remote; 283 284 if (parms->iph.daddr && !ipv4_is_multicast(parms->iph.daddr)) 285 remote = parms->iph.daddr; 286 else 287 remote = 0; 288 289 h = ip_tunnel_hash(parms->i_key, remote); 290 return &itn->tunnels[h]; 291 } 292 293 static void ip_tunnel_add(struct ip_tunnel_net *itn, struct ip_tunnel *t) 294 { 295 struct hlist_head *head = ip_bucket(itn, &t->parms); 296 297 hlist_add_head_rcu(&t->hash_node, head); 298 } 299 300 static void ip_tunnel_del(struct ip_tunnel *t) 301 { 302 hlist_del_init_rcu(&t->hash_node); 303 } 304 305 static struct ip_tunnel *ip_tunnel_find(struct ip_tunnel_net *itn, 306 struct ip_tunnel_parm *parms, 307 int type) 308 { 309 __be32 remote = parms->iph.daddr; 310 __be32 local = parms->iph.saddr; 311 __be32 key = parms->i_key; 312 int link = parms->link; 313 struct ip_tunnel *t = NULL; 314 struct hlist_head *head = ip_bucket(itn, parms); 315 316 hlist_for_each_entry_rcu(t, head, hash_node) { 317 if (local == t->parms.iph.saddr && 318 remote == t->parms.iph.daddr && 319 key == t->parms.i_key && 320 link == t->parms.link && 321 type == t->dev->type) 322 break; 323 } 324 return t; 325 } 326 327 static struct net_device *__ip_tunnel_create(struct net *net, 328 const struct rtnl_link_ops *ops, 329 struct ip_tunnel_parm *parms) 330 { 331 int err; 332 struct ip_tunnel *tunnel; 333 struct net_device *dev; 334 char name[IFNAMSIZ]; 335 336 if (parms->name[0]) 337 strlcpy(name, parms->name, IFNAMSIZ); 338 else { 339 if (strlen(ops->kind) > (IFNAMSIZ - 3)) { 340 err = -E2BIG; 341 goto failed; 342 } 343 strlcpy(name, ops->kind, IFNAMSIZ); 344 strncat(name, "%d", 2); 345 } 346 347 ASSERT_RTNL(); 348 dev = alloc_netdev(ops->priv_size, name, ops->setup); 349 if (!dev) { 350 err = -ENOMEM; 351 goto failed; 352 } 353 dev_net_set(dev, net); 354 355 dev->rtnl_link_ops = ops; 356 357 tunnel = netdev_priv(dev); 358 tunnel->parms = *parms; 359 tunnel->net = net; 360 361 err = register_netdevice(dev); 362 if (err) 363 goto failed_free; 364 365 return dev; 366 367 failed_free: 368 free_netdev(dev); 369 failed: 370 return ERR_PTR(err); 371 } 372 373 static inline void init_tunnel_flow(struct flowi4 *fl4, 374 int proto, 375 __be32 daddr, __be32 saddr, 376 __be32 key, __u8 tos, int oif) 377 { 378 memset(fl4, 0, sizeof(*fl4)); 379 fl4->flowi4_oif = oif; 380 fl4->daddr = daddr; 381 fl4->saddr = saddr; 382 fl4->flowi4_tos = tos; 383 fl4->flowi4_proto = proto; 384 fl4->fl4_gre_key = key; 385 } 386 387 static int ip_tunnel_bind_dev(struct net_device *dev) 388 { 389 struct net_device *tdev = NULL; 390 struct ip_tunnel *tunnel = netdev_priv(dev); 391 const struct iphdr *iph; 392 int hlen = LL_MAX_HEADER; 393 int mtu = ETH_DATA_LEN; 394 int t_hlen = tunnel->hlen + sizeof(struct iphdr); 395 396 iph = &tunnel->parms.iph; 397 398 /* Guess output device to choose reasonable mtu and needed_headroom */ 399 if (iph->daddr) { 400 struct flowi4 fl4; 401 struct rtable *rt; 402 403 init_tunnel_flow(&fl4, iph->protocol, iph->daddr, 404 iph->saddr, tunnel->parms.o_key, 405 RT_TOS(iph->tos), tunnel->parms.link); 406 rt = ip_route_output_key(tunnel->net, &fl4); 407 408 if (!IS_ERR(rt)) { 409 tdev = rt->dst.dev; 410 tunnel_dst_set(tunnel, &rt->dst); 411 ip_rt_put(rt); 412 } 413 if (dev->type != ARPHRD_ETHER) 414 dev->flags |= IFF_POINTOPOINT; 415 } 416 417 if (!tdev && tunnel->parms.link) 418 tdev = __dev_get_by_index(tunnel->net, tunnel->parms.link); 419 420 if (tdev) { 421 hlen = tdev->hard_header_len + tdev->needed_headroom; 422 mtu = tdev->mtu; 423 } 424 dev->iflink = tunnel->parms.link; 425 426 dev->needed_headroom = t_hlen + hlen; 427 mtu -= (dev->hard_header_len + t_hlen); 428 429 if (mtu < 68) 430 mtu = 68; 431 432 return mtu; 433 } 434 435 static struct ip_tunnel *ip_tunnel_create(struct net *net, 436 struct ip_tunnel_net *itn, 437 struct ip_tunnel_parm *parms) 438 { 439 struct ip_tunnel *nt, *fbt; 440 struct net_device *dev; 441 442 BUG_ON(!itn->fb_tunnel_dev); 443 fbt = netdev_priv(itn->fb_tunnel_dev); 444 dev = __ip_tunnel_create(net, itn->fb_tunnel_dev->rtnl_link_ops, parms); 445 if (IS_ERR(dev)) 446 return NULL; 447 448 dev->mtu = ip_tunnel_bind_dev(dev); 449 450 nt = netdev_priv(dev); 451 ip_tunnel_add(itn, nt); 452 return nt; 453 } 454 455 int ip_tunnel_rcv(struct ip_tunnel *tunnel, struct sk_buff *skb, 456 const struct tnl_ptk_info *tpi, bool log_ecn_error) 457 { 458 struct pcpu_sw_netstats *tstats; 459 const struct iphdr *iph = ip_hdr(skb); 460 int err; 461 462 #ifdef CONFIG_NET_IPGRE_BROADCAST 463 if (ipv4_is_multicast(iph->daddr)) { 464 /* Looped back packet, drop it! */ 465 if (rt_is_output_route(skb_rtable(skb))) 466 goto drop; 467 tunnel->dev->stats.multicast++; 468 skb->pkt_type = PACKET_BROADCAST; 469 } 470 #endif 471 472 if ((!(tpi->flags&TUNNEL_CSUM) && (tunnel->parms.i_flags&TUNNEL_CSUM)) || 473 ((tpi->flags&TUNNEL_CSUM) && !(tunnel->parms.i_flags&TUNNEL_CSUM))) { 474 tunnel->dev->stats.rx_crc_errors++; 475 tunnel->dev->stats.rx_errors++; 476 goto drop; 477 } 478 479 if (tunnel->parms.i_flags&TUNNEL_SEQ) { 480 if (!(tpi->flags&TUNNEL_SEQ) || 481 (tunnel->i_seqno && (s32)(ntohl(tpi->seq) - tunnel->i_seqno) < 0)) { 482 tunnel->dev->stats.rx_fifo_errors++; 483 tunnel->dev->stats.rx_errors++; 484 goto drop; 485 } 486 tunnel->i_seqno = ntohl(tpi->seq) + 1; 487 } 488 489 err = IP_ECN_decapsulate(iph, skb); 490 if (unlikely(err)) { 491 if (log_ecn_error) 492 net_info_ratelimited("non-ECT from %pI4 with TOS=%#x\n", 493 &iph->saddr, iph->tos); 494 if (err > 1) { 495 ++tunnel->dev->stats.rx_frame_errors; 496 ++tunnel->dev->stats.rx_errors; 497 goto drop; 498 } 499 } 500 501 tstats = this_cpu_ptr(tunnel->dev->tstats); 502 u64_stats_update_begin(&tstats->syncp); 503 tstats->rx_packets++; 504 tstats->rx_bytes += skb->len; 505 u64_stats_update_end(&tstats->syncp); 506 507 skb_scrub_packet(skb, !net_eq(tunnel->net, dev_net(tunnel->dev))); 508 509 if (tunnel->dev->type == ARPHRD_ETHER) { 510 skb->protocol = eth_type_trans(skb, tunnel->dev); 511 skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN); 512 } else { 513 skb->dev = tunnel->dev; 514 } 515 516 gro_cells_receive(&tunnel->gro_cells, skb); 517 return 0; 518 519 drop: 520 kfree_skb(skb); 521 return 0; 522 } 523 EXPORT_SYMBOL_GPL(ip_tunnel_rcv); 524 525 static int tnl_update_pmtu(struct net_device *dev, struct sk_buff *skb, 526 struct rtable *rt, __be16 df) 527 { 528 struct ip_tunnel *tunnel = netdev_priv(dev); 529 int pkt_size = skb->len - tunnel->hlen - dev->hard_header_len; 530 int mtu; 531 532 if (df) 533 mtu = dst_mtu(&rt->dst) - dev->hard_header_len 534 - sizeof(struct iphdr) - tunnel->hlen; 535 else 536 mtu = skb_dst(skb) ? dst_mtu(skb_dst(skb)) : dev->mtu; 537 538 if (skb_dst(skb)) 539 skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, skb, mtu); 540 541 if (skb->protocol == htons(ETH_P_IP)) { 542 if (!skb_is_gso(skb) && 543 (df & htons(IP_DF)) && mtu < pkt_size) { 544 memset(IPCB(skb), 0, sizeof(*IPCB(skb))); 545 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu)); 546 return -E2BIG; 547 } 548 } 549 #if IS_ENABLED(CONFIG_IPV6) 550 else if (skb->protocol == htons(ETH_P_IPV6)) { 551 struct rt6_info *rt6 = (struct rt6_info *)skb_dst(skb); 552 553 if (rt6 && mtu < dst_mtu(skb_dst(skb)) && 554 mtu >= IPV6_MIN_MTU) { 555 if ((tunnel->parms.iph.daddr && 556 !ipv4_is_multicast(tunnel->parms.iph.daddr)) || 557 rt6->rt6i_dst.plen == 128) { 558 rt6->rt6i_flags |= RTF_MODIFIED; 559 dst_metric_set(skb_dst(skb), RTAX_MTU, mtu); 560 } 561 } 562 563 if (!skb_is_gso(skb) && mtu >= IPV6_MIN_MTU && 564 mtu < pkt_size) { 565 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); 566 return -E2BIG; 567 } 568 } 569 #endif 570 return 0; 571 } 572 573 void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev, 574 const struct iphdr *tnl_params, const u8 protocol) 575 { 576 struct ip_tunnel *tunnel = netdev_priv(dev); 577 const struct iphdr *inner_iph; 578 struct flowi4 fl4; 579 u8 tos, ttl; 580 __be16 df; 581 struct rtable *rt; /* Route to the other host */ 582 unsigned int max_headroom; /* The extra header space needed */ 583 __be32 dst; 584 int err; 585 bool connected = true; 586 587 inner_iph = (const struct iphdr *)skb_inner_network_header(skb); 588 589 dst = tnl_params->daddr; 590 if (dst == 0) { 591 /* NBMA tunnel */ 592 593 if (skb_dst(skb) == NULL) { 594 dev->stats.tx_fifo_errors++; 595 goto tx_error; 596 } 597 598 if (skb->protocol == htons(ETH_P_IP)) { 599 rt = skb_rtable(skb); 600 dst = rt_nexthop(rt, inner_iph->daddr); 601 } 602 #if IS_ENABLED(CONFIG_IPV6) 603 else if (skb->protocol == htons(ETH_P_IPV6)) { 604 const struct in6_addr *addr6; 605 struct neighbour *neigh; 606 bool do_tx_error_icmp; 607 int addr_type; 608 609 neigh = dst_neigh_lookup(skb_dst(skb), 610 &ipv6_hdr(skb)->daddr); 611 if (neigh == NULL) 612 goto tx_error; 613 614 addr6 = (const struct in6_addr *)&neigh->primary_key; 615 addr_type = ipv6_addr_type(addr6); 616 617 if (addr_type == IPV6_ADDR_ANY) { 618 addr6 = &ipv6_hdr(skb)->daddr; 619 addr_type = ipv6_addr_type(addr6); 620 } 621 622 if ((addr_type & IPV6_ADDR_COMPATv4) == 0) 623 do_tx_error_icmp = true; 624 else { 625 do_tx_error_icmp = false; 626 dst = addr6->s6_addr32[3]; 627 } 628 neigh_release(neigh); 629 if (do_tx_error_icmp) 630 goto tx_error_icmp; 631 } 632 #endif 633 else 634 goto tx_error; 635 636 connected = false; 637 } 638 639 tos = tnl_params->tos; 640 if (tos & 0x1) { 641 tos &= ~0x1; 642 if (skb->protocol == htons(ETH_P_IP)) { 643 tos = inner_iph->tos; 644 connected = false; 645 } else if (skb->protocol == htons(ETH_P_IPV6)) { 646 tos = ipv6_get_dsfield((const struct ipv6hdr *)inner_iph); 647 connected = false; 648 } 649 } 650 651 init_tunnel_flow(&fl4, protocol, dst, tnl_params->saddr, 652 tunnel->parms.o_key, RT_TOS(tos), tunnel->parms.link); 653 654 rt = connected ? tunnel_rtable_get(tunnel, 0) : NULL; 655 656 if (!rt) { 657 rt = ip_route_output_key(tunnel->net, &fl4); 658 659 if (IS_ERR(rt)) { 660 dev->stats.tx_carrier_errors++; 661 goto tx_error; 662 } 663 if (connected) 664 tunnel_dst_set(tunnel, &rt->dst); 665 } 666 667 if (rt->dst.dev == dev) { 668 ip_rt_put(rt); 669 dev->stats.collisions++; 670 goto tx_error; 671 } 672 673 if (tnl_update_pmtu(dev, skb, rt, tnl_params->frag_off)) { 674 ip_rt_put(rt); 675 goto tx_error; 676 } 677 678 if (tunnel->err_count > 0) { 679 if (time_before(jiffies, 680 tunnel->err_time + IPTUNNEL_ERR_TIMEO)) { 681 tunnel->err_count--; 682 683 memset(IPCB(skb), 0, sizeof(*IPCB(skb))); 684 dst_link_failure(skb); 685 } else 686 tunnel->err_count = 0; 687 } 688 689 tos = ip_tunnel_ecn_encap(tos, inner_iph, skb); 690 ttl = tnl_params->ttl; 691 if (ttl == 0) { 692 if (skb->protocol == htons(ETH_P_IP)) 693 ttl = inner_iph->ttl; 694 #if IS_ENABLED(CONFIG_IPV6) 695 else if (skb->protocol == htons(ETH_P_IPV6)) 696 ttl = ((const struct ipv6hdr *)inner_iph)->hop_limit; 697 #endif 698 else 699 ttl = ip4_dst_hoplimit(&rt->dst); 700 } 701 702 df = tnl_params->frag_off; 703 if (skb->protocol == htons(ETH_P_IP)) 704 df |= (inner_iph->frag_off&htons(IP_DF)); 705 706 max_headroom = LL_RESERVED_SPACE(rt->dst.dev) + sizeof(struct iphdr) 707 + rt->dst.header_len; 708 if (max_headroom > dev->needed_headroom) 709 dev->needed_headroom = max_headroom; 710 711 if (skb_cow_head(skb, dev->needed_headroom)) { 712 dev->stats.tx_dropped++; 713 kfree_skb(skb); 714 return; 715 } 716 717 err = iptunnel_xmit(rt, skb, fl4.saddr, fl4.daddr, protocol, 718 tos, ttl, df, !net_eq(tunnel->net, dev_net(dev))); 719 iptunnel_xmit_stats(err, &dev->stats, dev->tstats); 720 721 return; 722 723 #if IS_ENABLED(CONFIG_IPV6) 724 tx_error_icmp: 725 dst_link_failure(skb); 726 #endif 727 tx_error: 728 dev->stats.tx_errors++; 729 kfree_skb(skb); 730 } 731 EXPORT_SYMBOL_GPL(ip_tunnel_xmit); 732 733 static void ip_tunnel_update(struct ip_tunnel_net *itn, 734 struct ip_tunnel *t, 735 struct net_device *dev, 736 struct ip_tunnel_parm *p, 737 bool set_mtu) 738 { 739 ip_tunnel_del(t); 740 t->parms.iph.saddr = p->iph.saddr; 741 t->parms.iph.daddr = p->iph.daddr; 742 t->parms.i_key = p->i_key; 743 t->parms.o_key = p->o_key; 744 if (dev->type != ARPHRD_ETHER) { 745 memcpy(dev->dev_addr, &p->iph.saddr, 4); 746 memcpy(dev->broadcast, &p->iph.daddr, 4); 747 } 748 ip_tunnel_add(itn, t); 749 750 t->parms.iph.ttl = p->iph.ttl; 751 t->parms.iph.tos = p->iph.tos; 752 t->parms.iph.frag_off = p->iph.frag_off; 753 754 if (t->parms.link != p->link) { 755 int mtu; 756 757 t->parms.link = p->link; 758 mtu = ip_tunnel_bind_dev(dev); 759 if (set_mtu) 760 dev->mtu = mtu; 761 } 762 tunnel_dst_reset_all(t); 763 netdev_state_change(dev); 764 } 765 766 int ip_tunnel_ioctl(struct net_device *dev, struct ip_tunnel_parm *p, int cmd) 767 { 768 int err = 0; 769 struct ip_tunnel *t; 770 struct net *net = dev_net(dev); 771 struct ip_tunnel *tunnel = netdev_priv(dev); 772 struct ip_tunnel_net *itn = net_generic(net, tunnel->ip_tnl_net_id); 773 774 BUG_ON(!itn->fb_tunnel_dev); 775 switch (cmd) { 776 case SIOCGETTUNNEL: 777 t = NULL; 778 if (dev == itn->fb_tunnel_dev) 779 t = ip_tunnel_find(itn, p, itn->fb_tunnel_dev->type); 780 if (t == NULL) 781 t = netdev_priv(dev); 782 memcpy(p, &t->parms, sizeof(*p)); 783 break; 784 785 case SIOCADDTUNNEL: 786 case SIOCCHGTUNNEL: 787 err = -EPERM; 788 if (!ns_capable(net->user_ns, CAP_NET_ADMIN)) 789 goto done; 790 if (p->iph.ttl) 791 p->iph.frag_off |= htons(IP_DF); 792 if (!(p->i_flags&TUNNEL_KEY)) 793 p->i_key = 0; 794 if (!(p->o_flags&TUNNEL_KEY)) 795 p->o_key = 0; 796 797 t = ip_tunnel_find(itn, p, itn->fb_tunnel_dev->type); 798 799 if (!t && (cmd == SIOCADDTUNNEL)) 800 t = ip_tunnel_create(net, itn, p); 801 802 if (dev != itn->fb_tunnel_dev && cmd == SIOCCHGTUNNEL) { 803 if (t != NULL) { 804 if (t->dev != dev) { 805 err = -EEXIST; 806 break; 807 } 808 } else { 809 unsigned int nflags = 0; 810 811 if (ipv4_is_multicast(p->iph.daddr)) 812 nflags = IFF_BROADCAST; 813 else if (p->iph.daddr) 814 nflags = IFF_POINTOPOINT; 815 816 if ((dev->flags^nflags)&(IFF_POINTOPOINT|IFF_BROADCAST)) { 817 err = -EINVAL; 818 break; 819 } 820 821 t = netdev_priv(dev); 822 } 823 } 824 825 if (t) { 826 err = 0; 827 ip_tunnel_update(itn, t, dev, p, true); 828 } else 829 err = (cmd == SIOCADDTUNNEL ? -ENOBUFS : -ENOENT); 830 break; 831 832 case SIOCDELTUNNEL: 833 err = -EPERM; 834 if (!ns_capable(net->user_ns, CAP_NET_ADMIN)) 835 goto done; 836 837 if (dev == itn->fb_tunnel_dev) { 838 err = -ENOENT; 839 t = ip_tunnel_find(itn, p, itn->fb_tunnel_dev->type); 840 if (t == NULL) 841 goto done; 842 err = -EPERM; 843 if (t == netdev_priv(itn->fb_tunnel_dev)) 844 goto done; 845 dev = t->dev; 846 } 847 unregister_netdevice(dev); 848 err = 0; 849 break; 850 851 default: 852 err = -EINVAL; 853 } 854 855 done: 856 return err; 857 } 858 EXPORT_SYMBOL_GPL(ip_tunnel_ioctl); 859 860 int ip_tunnel_change_mtu(struct net_device *dev, int new_mtu) 861 { 862 struct ip_tunnel *tunnel = netdev_priv(dev); 863 int t_hlen = tunnel->hlen + sizeof(struct iphdr); 864 865 if (new_mtu < 68 || 866 new_mtu > 0xFFF8 - dev->hard_header_len - t_hlen) 867 return -EINVAL; 868 dev->mtu = new_mtu; 869 return 0; 870 } 871 EXPORT_SYMBOL_GPL(ip_tunnel_change_mtu); 872 873 static void ip_tunnel_dev_free(struct net_device *dev) 874 { 875 struct ip_tunnel *tunnel = netdev_priv(dev); 876 877 gro_cells_destroy(&tunnel->gro_cells); 878 free_percpu(tunnel->dst_cache); 879 free_percpu(dev->tstats); 880 free_netdev(dev); 881 } 882 883 void ip_tunnel_dellink(struct net_device *dev, struct list_head *head) 884 { 885 struct ip_tunnel *tunnel = netdev_priv(dev); 886 struct ip_tunnel_net *itn; 887 888 itn = net_generic(tunnel->net, tunnel->ip_tnl_net_id); 889 890 if (itn->fb_tunnel_dev != dev) { 891 ip_tunnel_del(netdev_priv(dev)); 892 unregister_netdevice_queue(dev, head); 893 } 894 } 895 EXPORT_SYMBOL_GPL(ip_tunnel_dellink); 896 897 int ip_tunnel_init_net(struct net *net, int ip_tnl_net_id, 898 struct rtnl_link_ops *ops, char *devname) 899 { 900 struct ip_tunnel_net *itn = net_generic(net, ip_tnl_net_id); 901 struct ip_tunnel_parm parms; 902 unsigned int i; 903 904 for (i = 0; i < IP_TNL_HASH_SIZE; i++) 905 INIT_HLIST_HEAD(&itn->tunnels[i]); 906 907 if (!ops) { 908 itn->fb_tunnel_dev = NULL; 909 return 0; 910 } 911 912 memset(&parms, 0, sizeof(parms)); 913 if (devname) 914 strlcpy(parms.name, devname, IFNAMSIZ); 915 916 rtnl_lock(); 917 itn->fb_tunnel_dev = __ip_tunnel_create(net, ops, &parms); 918 /* FB netdevice is special: we have one, and only one per netns. 919 * Allowing to move it to another netns is clearly unsafe. 920 */ 921 if (!IS_ERR(itn->fb_tunnel_dev)) { 922 itn->fb_tunnel_dev->features |= NETIF_F_NETNS_LOCAL; 923 ip_tunnel_add(itn, netdev_priv(itn->fb_tunnel_dev)); 924 } 925 rtnl_unlock(); 926 927 return PTR_ERR_OR_ZERO(itn->fb_tunnel_dev); 928 } 929 EXPORT_SYMBOL_GPL(ip_tunnel_init_net); 930 931 static void ip_tunnel_destroy(struct ip_tunnel_net *itn, struct list_head *head, 932 struct rtnl_link_ops *ops) 933 { 934 struct net *net = dev_net(itn->fb_tunnel_dev); 935 struct net_device *dev, *aux; 936 int h; 937 938 for_each_netdev_safe(net, dev, aux) 939 if (dev->rtnl_link_ops == ops) 940 unregister_netdevice_queue(dev, head); 941 942 for (h = 0; h < IP_TNL_HASH_SIZE; h++) { 943 struct ip_tunnel *t; 944 struct hlist_node *n; 945 struct hlist_head *thead = &itn->tunnels[h]; 946 947 hlist_for_each_entry_safe(t, n, thead, hash_node) 948 /* If dev is in the same netns, it has already 949 * been added to the list by the previous loop. 950 */ 951 if (!net_eq(dev_net(t->dev), net)) 952 unregister_netdevice_queue(t->dev, head); 953 } 954 } 955 956 void ip_tunnel_delete_net(struct ip_tunnel_net *itn, struct rtnl_link_ops *ops) 957 { 958 LIST_HEAD(list); 959 960 rtnl_lock(); 961 ip_tunnel_destroy(itn, &list, ops); 962 unregister_netdevice_many(&list); 963 rtnl_unlock(); 964 } 965 EXPORT_SYMBOL_GPL(ip_tunnel_delete_net); 966 967 int ip_tunnel_newlink(struct net_device *dev, struct nlattr *tb[], 968 struct ip_tunnel_parm *p) 969 { 970 struct ip_tunnel *nt; 971 struct net *net = dev_net(dev); 972 struct ip_tunnel_net *itn; 973 int mtu; 974 int err; 975 976 nt = netdev_priv(dev); 977 itn = net_generic(net, nt->ip_tnl_net_id); 978 979 if (ip_tunnel_find(itn, p, dev->type)) 980 return -EEXIST; 981 982 nt->net = net; 983 nt->parms = *p; 984 err = register_netdevice(dev); 985 if (err) 986 goto out; 987 988 if (dev->type == ARPHRD_ETHER && !tb[IFLA_ADDRESS]) 989 eth_hw_addr_random(dev); 990 991 mtu = ip_tunnel_bind_dev(dev); 992 if (!tb[IFLA_MTU]) 993 dev->mtu = mtu; 994 995 ip_tunnel_add(itn, nt); 996 997 out: 998 return err; 999 } 1000 EXPORT_SYMBOL_GPL(ip_tunnel_newlink); 1001 1002 int ip_tunnel_changelink(struct net_device *dev, struct nlattr *tb[], 1003 struct ip_tunnel_parm *p) 1004 { 1005 struct ip_tunnel *t; 1006 struct ip_tunnel *tunnel = netdev_priv(dev); 1007 struct net *net = tunnel->net; 1008 struct ip_tunnel_net *itn = net_generic(net, tunnel->ip_tnl_net_id); 1009 1010 if (dev == itn->fb_tunnel_dev) 1011 return -EINVAL; 1012 1013 t = ip_tunnel_find(itn, p, dev->type); 1014 1015 if (t) { 1016 if (t->dev != dev) 1017 return -EEXIST; 1018 } else { 1019 t = tunnel; 1020 1021 if (dev->type != ARPHRD_ETHER) { 1022 unsigned int nflags = 0; 1023 1024 if (ipv4_is_multicast(p->iph.daddr)) 1025 nflags = IFF_BROADCAST; 1026 else if (p->iph.daddr) 1027 nflags = IFF_POINTOPOINT; 1028 1029 if ((dev->flags ^ nflags) & 1030 (IFF_POINTOPOINT | IFF_BROADCAST)) 1031 return -EINVAL; 1032 } 1033 } 1034 1035 ip_tunnel_update(itn, t, dev, p, !tb[IFLA_MTU]); 1036 return 0; 1037 } 1038 EXPORT_SYMBOL_GPL(ip_tunnel_changelink); 1039 1040 int ip_tunnel_init(struct net_device *dev) 1041 { 1042 struct ip_tunnel *tunnel = netdev_priv(dev); 1043 struct iphdr *iph = &tunnel->parms.iph; 1044 int i, err; 1045 1046 dev->destructor = ip_tunnel_dev_free; 1047 dev->tstats = alloc_percpu(struct pcpu_sw_netstats); 1048 if (!dev->tstats) 1049 return -ENOMEM; 1050 1051 for_each_possible_cpu(i) { 1052 struct pcpu_sw_netstats *ipt_stats; 1053 ipt_stats = per_cpu_ptr(dev->tstats, i); 1054 u64_stats_init(&ipt_stats->syncp); 1055 } 1056 1057 tunnel->dst_cache = alloc_percpu(struct ip_tunnel_dst); 1058 if (!tunnel->dst_cache) { 1059 free_percpu(dev->tstats); 1060 return -ENOMEM; 1061 } 1062 1063 err = gro_cells_init(&tunnel->gro_cells, dev); 1064 if (err) { 1065 free_percpu(tunnel->dst_cache); 1066 free_percpu(dev->tstats); 1067 return err; 1068 } 1069 1070 tunnel->dev = dev; 1071 tunnel->net = dev_net(dev); 1072 strcpy(tunnel->parms.name, dev->name); 1073 iph->version = 4; 1074 iph->ihl = 5; 1075 1076 return 0; 1077 } 1078 EXPORT_SYMBOL_GPL(ip_tunnel_init); 1079 1080 void ip_tunnel_uninit(struct net_device *dev) 1081 { 1082 struct ip_tunnel *tunnel = netdev_priv(dev); 1083 struct net *net = tunnel->net; 1084 struct ip_tunnel_net *itn; 1085 1086 itn = net_generic(net, tunnel->ip_tnl_net_id); 1087 /* fb_tunnel_dev will be unregisted in net-exit call. */ 1088 if (itn->fb_tunnel_dev != dev) 1089 ip_tunnel_del(netdev_priv(dev)); 1090 1091 tunnel_dst_reset_all(tunnel); 1092 } 1093 EXPORT_SYMBOL_GPL(ip_tunnel_uninit); 1094 1095 /* Do least required initialization, rest of init is done in tunnel_init call */ 1096 void ip_tunnel_setup(struct net_device *dev, int net_id) 1097 { 1098 struct ip_tunnel *tunnel = netdev_priv(dev); 1099 tunnel->ip_tnl_net_id = net_id; 1100 } 1101 EXPORT_SYMBOL_GPL(ip_tunnel_setup); 1102 1103 MODULE_LICENSE("GPL"); 1104