1 /* 2 * Copyright (c) 2013 Nicira, Inc. 3 * 4 * This program is free software; you can redistribute it and/or 5 * modify it under the terms of version 2 of the GNU General Public 6 * License as published by the Free Software Foundation. 7 * 8 * This program is distributed in the hope that it will be useful, but 9 * WITHOUT ANY WARRANTY; without even the implied warranty of 10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 11 * General Public License for more details. 12 * 13 * You should have received a copy of the GNU General Public License 14 * along with this program; if not, write to the Free Software 15 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 16 * 02110-1301, USA 17 */ 18 19 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 20 21 #include <linux/capability.h> 22 #include <linux/module.h> 23 #include <linux/types.h> 24 #include <linux/kernel.h> 25 #include <linux/slab.h> 26 #include <linux/uaccess.h> 27 #include <linux/skbuff.h> 28 #include <linux/netdevice.h> 29 #include <linux/in.h> 30 #include <linux/tcp.h> 31 #include <linux/udp.h> 32 #include <linux/if_arp.h> 33 #include <linux/mroute.h> 34 #include <linux/init.h> 35 #include <linux/in6.h> 36 #include <linux/inetdevice.h> 37 #include <linux/igmp.h> 38 #include <linux/netfilter_ipv4.h> 39 #include <linux/etherdevice.h> 40 #include <linux/if_ether.h> 41 #include <linux/if_vlan.h> 42 #include <linux/rculist.h> 43 44 #include <net/sock.h> 45 #include <net/ip.h> 46 #include <net/icmp.h> 47 #include <net/protocol.h> 48 #include <net/ip_tunnels.h> 49 #include <net/arp.h> 50 #include <net/checksum.h> 51 #include <net/dsfield.h> 52 #include <net/inet_ecn.h> 53 #include <net/xfrm.h> 54 #include <net/net_namespace.h> 55 #include <net/netns/generic.h> 56 #include <net/rtnetlink.h> 57 58 #if IS_ENABLED(CONFIG_IPV6) 59 #include <net/ipv6.h> 60 #include <net/ip6_fib.h> 61 #include <net/ip6_route.h> 62 #endif 63 64 static unsigned int ip_tunnel_hash(struct ip_tunnel_net *itn, 65 __be32 key, __be32 remote) 66 { 67 return hash_32((__force u32)key ^ (__force u32)remote, 68 IP_TNL_HASH_BITS); 69 } 70 71 /* Often modified stats are per cpu, other are shared (netdev->stats) */ 72 struct rtnl_link_stats64 *ip_tunnel_get_stats64(struct net_device *dev, 73 struct rtnl_link_stats64 *tot) 74 { 75 int i; 76 77 for_each_possible_cpu(i) { 78 const struct pcpu_tstats *tstats = per_cpu_ptr(dev->tstats, i); 79 u64 rx_packets, rx_bytes, tx_packets, tx_bytes; 80 unsigned int start; 81 82 do { 83 start = u64_stats_fetch_begin_bh(&tstats->syncp); 84 rx_packets = tstats->rx_packets; 85 tx_packets = tstats->tx_packets; 86 rx_bytes = tstats->rx_bytes; 87 tx_bytes = tstats->tx_bytes; 88 } while (u64_stats_fetch_retry_bh(&tstats->syncp, start)); 89 90 tot->rx_packets += rx_packets; 91 tot->tx_packets += tx_packets; 92 tot->rx_bytes += rx_bytes; 93 tot->tx_bytes += tx_bytes; 94 } 95 96 tot->multicast = dev->stats.multicast; 97 98 tot->rx_crc_errors = dev->stats.rx_crc_errors; 99 tot->rx_fifo_errors = dev->stats.rx_fifo_errors; 100 tot->rx_length_errors = dev->stats.rx_length_errors; 101 tot->rx_frame_errors = dev->stats.rx_frame_errors; 102 tot->rx_errors = dev->stats.rx_errors; 103 104 tot->tx_fifo_errors = dev->stats.tx_fifo_errors; 105 tot->tx_carrier_errors = dev->stats.tx_carrier_errors; 106 tot->tx_dropped = dev->stats.tx_dropped; 107 tot->tx_aborted_errors = dev->stats.tx_aborted_errors; 108 tot->tx_errors = dev->stats.tx_errors; 109 110 tot->collisions = dev->stats.collisions; 111 112 return tot; 113 } 114 EXPORT_SYMBOL_GPL(ip_tunnel_get_stats64); 115 116 static bool ip_tunnel_key_match(const struct ip_tunnel_parm *p, 117 __be16 flags, __be32 key) 118 { 119 if (p->i_flags & TUNNEL_KEY) { 120 if (flags & TUNNEL_KEY) 121 return key == p->i_key; 122 else 123 /* key expected, none present */ 124 return false; 125 } else 126 return !(flags & TUNNEL_KEY); 127 } 128 129 /* Fallback tunnel: no source, no destination, no key, no options 130 131 Tunnel hash table: 132 We require exact key match i.e. if a key is present in packet 133 it will match only tunnel with the same key; if it is not present, 134 it will match only keyless tunnel. 135 136 All keysless packets, if not matched configured keyless tunnels 137 will match fallback tunnel. 138 Given src, dst and key, find appropriate for input tunnel. 139 */ 140 struct ip_tunnel *ip_tunnel_lookup(struct ip_tunnel_net *itn, 141 int link, __be16 flags, 142 __be32 remote, __be32 local, 143 __be32 key) 144 { 145 unsigned int hash; 146 struct ip_tunnel *t, *cand = NULL; 147 struct hlist_head *head; 148 149 hash = ip_tunnel_hash(itn, key, remote); 150 head = &itn->tunnels[hash]; 151 152 hlist_for_each_entry_rcu(t, head, hash_node) { 153 if (local != t->parms.iph.saddr || 154 remote != t->parms.iph.daddr || 155 !(t->dev->flags & IFF_UP)) 156 continue; 157 158 if (!ip_tunnel_key_match(&t->parms, flags, key)) 159 continue; 160 161 if (t->parms.link == link) 162 return t; 163 else 164 cand = t; 165 } 166 167 hlist_for_each_entry_rcu(t, head, hash_node) { 168 if (remote != t->parms.iph.daddr || 169 !(t->dev->flags & IFF_UP)) 170 continue; 171 172 if (!ip_tunnel_key_match(&t->parms, flags, key)) 173 continue; 174 175 if (t->parms.link == link) 176 return t; 177 else if (!cand) 178 cand = t; 179 } 180 181 hash = ip_tunnel_hash(itn, key, 0); 182 head = &itn->tunnels[hash]; 183 184 hlist_for_each_entry_rcu(t, head, hash_node) { 185 if ((local != t->parms.iph.saddr && 186 (local != t->parms.iph.daddr || 187 !ipv4_is_multicast(local))) || 188 !(t->dev->flags & IFF_UP)) 189 continue; 190 191 if (!ip_tunnel_key_match(&t->parms, flags, key)) 192 continue; 193 194 if (t->parms.link == link) 195 return t; 196 else if (!cand) 197 cand = t; 198 } 199 200 if (flags & TUNNEL_NO_KEY) 201 goto skip_key_lookup; 202 203 hlist_for_each_entry_rcu(t, head, hash_node) { 204 if (t->parms.i_key != key || 205 !(t->dev->flags & IFF_UP)) 206 continue; 207 208 if (t->parms.link == link) 209 return t; 210 else if (!cand) 211 cand = t; 212 } 213 214 skip_key_lookup: 215 if (cand) 216 return cand; 217 218 if (itn->fb_tunnel_dev && itn->fb_tunnel_dev->flags & IFF_UP) 219 return netdev_priv(itn->fb_tunnel_dev); 220 221 222 return NULL; 223 } 224 EXPORT_SYMBOL_GPL(ip_tunnel_lookup); 225 226 static struct hlist_head *ip_bucket(struct ip_tunnel_net *itn, 227 struct ip_tunnel_parm *parms) 228 { 229 unsigned int h; 230 __be32 remote; 231 232 if (parms->iph.daddr && !ipv4_is_multicast(parms->iph.daddr)) 233 remote = parms->iph.daddr; 234 else 235 remote = 0; 236 237 h = ip_tunnel_hash(itn, parms->i_key, remote); 238 return &itn->tunnels[h]; 239 } 240 241 static void ip_tunnel_add(struct ip_tunnel_net *itn, struct ip_tunnel *t) 242 { 243 struct hlist_head *head = ip_bucket(itn, &t->parms); 244 245 hlist_add_head_rcu(&t->hash_node, head); 246 } 247 248 static void ip_tunnel_del(struct ip_tunnel *t) 249 { 250 hlist_del_init_rcu(&t->hash_node); 251 } 252 253 static struct ip_tunnel *ip_tunnel_find(struct ip_tunnel_net *itn, 254 struct ip_tunnel_parm *parms, 255 int type) 256 { 257 __be32 remote = parms->iph.daddr; 258 __be32 local = parms->iph.saddr; 259 __be32 key = parms->i_key; 260 int link = parms->link; 261 struct ip_tunnel *t = NULL; 262 struct hlist_head *head = ip_bucket(itn, parms); 263 264 hlist_for_each_entry_rcu(t, head, hash_node) { 265 if (local == t->parms.iph.saddr && 266 remote == t->parms.iph.daddr && 267 key == t->parms.i_key && 268 link == t->parms.link && 269 type == t->dev->type) 270 break; 271 } 272 return t; 273 } 274 275 static struct net_device *__ip_tunnel_create(struct net *net, 276 const struct rtnl_link_ops *ops, 277 struct ip_tunnel_parm *parms) 278 { 279 int err; 280 struct ip_tunnel *tunnel; 281 struct net_device *dev; 282 char name[IFNAMSIZ]; 283 284 if (parms->name[0]) 285 strlcpy(name, parms->name, IFNAMSIZ); 286 else { 287 if (strlen(ops->kind) > (IFNAMSIZ - 3)) { 288 err = -E2BIG; 289 goto failed; 290 } 291 strlcpy(name, ops->kind, IFNAMSIZ); 292 strncat(name, "%d", 2); 293 } 294 295 ASSERT_RTNL(); 296 dev = alloc_netdev(ops->priv_size, name, ops->setup); 297 if (!dev) { 298 err = -ENOMEM; 299 goto failed; 300 } 301 dev_net_set(dev, net); 302 303 dev->rtnl_link_ops = ops; 304 305 tunnel = netdev_priv(dev); 306 tunnel->parms = *parms; 307 308 err = register_netdevice(dev); 309 if (err) 310 goto failed_free; 311 312 return dev; 313 314 failed_free: 315 free_netdev(dev); 316 failed: 317 return ERR_PTR(err); 318 } 319 320 static inline struct rtable *ip_route_output_tunnel(struct net *net, 321 struct flowi4 *fl4, 322 int proto, 323 __be32 daddr, __be32 saddr, 324 __be32 key, __u8 tos, int oif) 325 { 326 memset(fl4, 0, sizeof(*fl4)); 327 fl4->flowi4_oif = oif; 328 fl4->daddr = daddr; 329 fl4->saddr = saddr; 330 fl4->flowi4_tos = tos; 331 fl4->flowi4_proto = proto; 332 fl4->fl4_gre_key = key; 333 return ip_route_output_key(net, fl4); 334 } 335 336 static int ip_tunnel_bind_dev(struct net_device *dev) 337 { 338 struct net_device *tdev = NULL; 339 struct ip_tunnel *tunnel = netdev_priv(dev); 340 const struct iphdr *iph; 341 int hlen = LL_MAX_HEADER; 342 int mtu = ETH_DATA_LEN; 343 int t_hlen = tunnel->hlen + sizeof(struct iphdr); 344 345 iph = &tunnel->parms.iph; 346 347 /* Guess output device to choose reasonable mtu and needed_headroom */ 348 if (iph->daddr) { 349 struct flowi4 fl4; 350 struct rtable *rt; 351 352 rt = ip_route_output_tunnel(dev_net(dev), &fl4, 353 tunnel->parms.iph.protocol, 354 iph->daddr, iph->saddr, 355 tunnel->parms.o_key, 356 RT_TOS(iph->tos), 357 tunnel->parms.link); 358 if (!IS_ERR(rt)) { 359 tdev = rt->dst.dev; 360 ip_rt_put(rt); 361 } 362 if (dev->type != ARPHRD_ETHER) 363 dev->flags |= IFF_POINTOPOINT; 364 } 365 366 if (!tdev && tunnel->parms.link) 367 tdev = __dev_get_by_index(dev_net(dev), tunnel->parms.link); 368 369 if (tdev) { 370 hlen = tdev->hard_header_len + tdev->needed_headroom; 371 mtu = tdev->mtu; 372 } 373 dev->iflink = tunnel->parms.link; 374 375 dev->needed_headroom = t_hlen + hlen; 376 mtu -= (dev->hard_header_len + t_hlen); 377 378 if (mtu < 68) 379 mtu = 68; 380 381 return mtu; 382 } 383 384 static struct ip_tunnel *ip_tunnel_create(struct net *net, 385 struct ip_tunnel_net *itn, 386 struct ip_tunnel_parm *parms) 387 { 388 struct ip_tunnel *nt, *fbt; 389 struct net_device *dev; 390 391 BUG_ON(!itn->fb_tunnel_dev); 392 fbt = netdev_priv(itn->fb_tunnel_dev); 393 dev = __ip_tunnel_create(net, itn->fb_tunnel_dev->rtnl_link_ops, parms); 394 if (IS_ERR(dev)) 395 return NULL; 396 397 dev->mtu = ip_tunnel_bind_dev(dev); 398 399 nt = netdev_priv(dev); 400 ip_tunnel_add(itn, nt); 401 return nt; 402 } 403 404 int ip_tunnel_rcv(struct ip_tunnel *tunnel, struct sk_buff *skb, 405 const struct tnl_ptk_info *tpi, bool log_ecn_error) 406 { 407 struct pcpu_tstats *tstats; 408 const struct iphdr *iph = ip_hdr(skb); 409 int err; 410 411 secpath_reset(skb); 412 413 skb->protocol = tpi->proto; 414 415 skb->mac_header = skb->network_header; 416 __pskb_pull(skb, tunnel->hlen); 417 skb_postpull_rcsum(skb, skb_transport_header(skb), tunnel->hlen); 418 #ifdef CONFIG_NET_IPGRE_BROADCAST 419 if (ipv4_is_multicast(iph->daddr)) { 420 /* Looped back packet, drop it! */ 421 if (rt_is_output_route(skb_rtable(skb))) 422 goto drop; 423 tunnel->dev->stats.multicast++; 424 skb->pkt_type = PACKET_BROADCAST; 425 } 426 #endif 427 428 if ((!(tpi->flags&TUNNEL_CSUM) && (tunnel->parms.i_flags&TUNNEL_CSUM)) || 429 ((tpi->flags&TUNNEL_CSUM) && !(tunnel->parms.i_flags&TUNNEL_CSUM))) { 430 tunnel->dev->stats.rx_crc_errors++; 431 tunnel->dev->stats.rx_errors++; 432 goto drop; 433 } 434 435 if (tunnel->parms.i_flags&TUNNEL_SEQ) { 436 if (!(tpi->flags&TUNNEL_SEQ) || 437 (tunnel->i_seqno && (s32)(ntohl(tpi->seq) - tunnel->i_seqno) < 0)) { 438 tunnel->dev->stats.rx_fifo_errors++; 439 tunnel->dev->stats.rx_errors++; 440 goto drop; 441 } 442 tunnel->i_seqno = ntohl(tpi->seq) + 1; 443 } 444 445 /* Warning: All skb pointers will be invalidated! */ 446 if (tunnel->dev->type == ARPHRD_ETHER) { 447 if (!pskb_may_pull(skb, ETH_HLEN)) { 448 tunnel->dev->stats.rx_length_errors++; 449 tunnel->dev->stats.rx_errors++; 450 goto drop; 451 } 452 453 iph = ip_hdr(skb); 454 skb->protocol = eth_type_trans(skb, tunnel->dev); 455 skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN); 456 } 457 458 skb->pkt_type = PACKET_HOST; 459 __skb_tunnel_rx(skb, tunnel->dev); 460 461 skb_reset_network_header(skb); 462 err = IP_ECN_decapsulate(iph, skb); 463 if (unlikely(err)) { 464 if (log_ecn_error) 465 net_info_ratelimited("non-ECT from %pI4 with TOS=%#x\n", 466 &iph->saddr, iph->tos); 467 if (err > 1) { 468 ++tunnel->dev->stats.rx_frame_errors; 469 ++tunnel->dev->stats.rx_errors; 470 goto drop; 471 } 472 } 473 474 tstats = this_cpu_ptr(tunnel->dev->tstats); 475 u64_stats_update_begin(&tstats->syncp); 476 tstats->rx_packets++; 477 tstats->rx_bytes += skb->len; 478 u64_stats_update_end(&tstats->syncp); 479 480 gro_cells_receive(&tunnel->gro_cells, skb); 481 return 0; 482 483 drop: 484 kfree_skb(skb); 485 return 0; 486 } 487 EXPORT_SYMBOL_GPL(ip_tunnel_rcv); 488 489 void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev, 490 const struct iphdr *tnl_params) 491 { 492 struct ip_tunnel *tunnel = netdev_priv(dev); 493 const struct iphdr *inner_iph; 494 struct iphdr *iph; 495 struct flowi4 fl4; 496 u8 tos, ttl; 497 __be16 df; 498 struct rtable *rt; /* Route to the other host */ 499 struct net_device *tdev; /* Device to other host */ 500 unsigned int max_headroom; /* The extra header space needed */ 501 __be32 dst; 502 int mtu; 503 504 inner_iph = (const struct iphdr *)skb_inner_network_header(skb); 505 506 memset(IPCB(skb), 0, sizeof(*IPCB(skb))); 507 dst = tnl_params->daddr; 508 if (dst == 0) { 509 /* NBMA tunnel */ 510 511 if (skb_dst(skb) == NULL) { 512 dev->stats.tx_fifo_errors++; 513 goto tx_error; 514 } 515 516 if (skb->protocol == htons(ETH_P_IP)) { 517 rt = skb_rtable(skb); 518 dst = rt_nexthop(rt, inner_iph->daddr); 519 } 520 #if IS_ENABLED(CONFIG_IPV6) 521 else if (skb->protocol == htons(ETH_P_IPV6)) { 522 const struct in6_addr *addr6; 523 struct neighbour *neigh; 524 bool do_tx_error_icmp; 525 int addr_type; 526 527 neigh = dst_neigh_lookup(skb_dst(skb), 528 &ipv6_hdr(skb)->daddr); 529 if (neigh == NULL) 530 goto tx_error; 531 532 addr6 = (const struct in6_addr *)&neigh->primary_key; 533 addr_type = ipv6_addr_type(addr6); 534 535 if (addr_type == IPV6_ADDR_ANY) { 536 addr6 = &ipv6_hdr(skb)->daddr; 537 addr_type = ipv6_addr_type(addr6); 538 } 539 540 if ((addr_type & IPV6_ADDR_COMPATv4) == 0) 541 do_tx_error_icmp = true; 542 else { 543 do_tx_error_icmp = false; 544 dst = addr6->s6_addr32[3]; 545 } 546 neigh_release(neigh); 547 if (do_tx_error_icmp) 548 goto tx_error_icmp; 549 } 550 #endif 551 else 552 goto tx_error; 553 } 554 555 tos = tnl_params->tos; 556 if (tos & 0x1) { 557 tos &= ~0x1; 558 if (skb->protocol == htons(ETH_P_IP)) 559 tos = inner_iph->tos; 560 else if (skb->protocol == htons(ETH_P_IPV6)) 561 tos = ipv6_get_dsfield((const struct ipv6hdr *)inner_iph); 562 } 563 564 rt = ip_route_output_tunnel(dev_net(dev), &fl4, 565 tunnel->parms.iph.protocol, 566 dst, tnl_params->saddr, 567 tunnel->parms.o_key, 568 RT_TOS(tos), 569 tunnel->parms.link); 570 if (IS_ERR(rt)) { 571 dev->stats.tx_carrier_errors++; 572 goto tx_error; 573 } 574 tdev = rt->dst.dev; 575 576 if (tdev == dev) { 577 ip_rt_put(rt); 578 dev->stats.collisions++; 579 goto tx_error; 580 } 581 582 df = tnl_params->frag_off; 583 584 if (df) 585 mtu = dst_mtu(&rt->dst) - dev->hard_header_len 586 - sizeof(struct iphdr); 587 else 588 mtu = skb_dst(skb) ? dst_mtu(skb_dst(skb)) : dev->mtu; 589 590 if (skb_dst(skb)) 591 skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, skb, mtu); 592 593 if (skb->protocol == htons(ETH_P_IP)) { 594 df |= (inner_iph->frag_off&htons(IP_DF)); 595 596 if (!skb_is_gso(skb) && 597 (inner_iph->frag_off&htons(IP_DF)) && 598 mtu < ntohs(inner_iph->tot_len)) { 599 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu)); 600 ip_rt_put(rt); 601 goto tx_error; 602 } 603 } 604 #if IS_ENABLED(CONFIG_IPV6) 605 else if (skb->protocol == htons(ETH_P_IPV6)) { 606 struct rt6_info *rt6 = (struct rt6_info *)skb_dst(skb); 607 608 if (rt6 && mtu < dst_mtu(skb_dst(skb)) && 609 mtu >= IPV6_MIN_MTU) { 610 if ((tunnel->parms.iph.daddr && 611 !ipv4_is_multicast(tunnel->parms.iph.daddr)) || 612 rt6->rt6i_dst.plen == 128) { 613 rt6->rt6i_flags |= RTF_MODIFIED; 614 dst_metric_set(skb_dst(skb), RTAX_MTU, mtu); 615 } 616 } 617 618 if (!skb_is_gso(skb) && mtu >= IPV6_MIN_MTU && 619 mtu < skb->len) { 620 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); 621 ip_rt_put(rt); 622 goto tx_error; 623 } 624 } 625 #endif 626 627 if (tunnel->err_count > 0) { 628 if (time_before(jiffies, 629 tunnel->err_time + IPTUNNEL_ERR_TIMEO)) { 630 tunnel->err_count--; 631 632 dst_link_failure(skb); 633 } else 634 tunnel->err_count = 0; 635 } 636 637 ttl = tnl_params->ttl; 638 if (ttl == 0) { 639 if (skb->protocol == htons(ETH_P_IP)) 640 ttl = inner_iph->ttl; 641 #if IS_ENABLED(CONFIG_IPV6) 642 else if (skb->protocol == htons(ETH_P_IPV6)) 643 ttl = ((const struct ipv6hdr *)inner_iph)->hop_limit; 644 #endif 645 else 646 ttl = ip4_dst_hoplimit(&rt->dst); 647 } 648 649 max_headroom = LL_RESERVED_SPACE(tdev) + sizeof(struct iphdr) 650 + rt->dst.header_len; 651 if (max_headroom > dev->needed_headroom) { 652 dev->needed_headroom = max_headroom; 653 if (skb_cow_head(skb, dev->needed_headroom)) { 654 dev->stats.tx_dropped++; 655 dev_kfree_skb(skb); 656 return; 657 } 658 } 659 660 skb_dst_drop(skb); 661 skb_dst_set(skb, &rt->dst); 662 663 /* Push down and install the IP header. */ 664 skb_push(skb, sizeof(struct iphdr)); 665 skb_reset_network_header(skb); 666 667 iph = ip_hdr(skb); 668 inner_iph = (const struct iphdr *)skb_inner_network_header(skb); 669 670 iph->version = 4; 671 iph->ihl = sizeof(struct iphdr) >> 2; 672 iph->frag_off = df; 673 iph->protocol = tnl_params->protocol; 674 iph->tos = ip_tunnel_ecn_encap(tos, inner_iph, skb); 675 iph->daddr = fl4.daddr; 676 iph->saddr = fl4.saddr; 677 iph->ttl = ttl; 678 tunnel_ip_select_ident(skb, inner_iph, &rt->dst); 679 680 iptunnel_xmit(skb, dev); 681 return; 682 683 #if IS_ENABLED(CONFIG_IPV6) 684 tx_error_icmp: 685 dst_link_failure(skb); 686 #endif 687 tx_error: 688 dev->stats.tx_errors++; 689 dev_kfree_skb(skb); 690 } 691 EXPORT_SYMBOL_GPL(ip_tunnel_xmit); 692 693 static void ip_tunnel_update(struct ip_tunnel_net *itn, 694 struct ip_tunnel *t, 695 struct net_device *dev, 696 struct ip_tunnel_parm *p, 697 bool set_mtu) 698 { 699 ip_tunnel_del(t); 700 t->parms.iph.saddr = p->iph.saddr; 701 t->parms.iph.daddr = p->iph.daddr; 702 t->parms.i_key = p->i_key; 703 t->parms.o_key = p->o_key; 704 if (dev->type != ARPHRD_ETHER) { 705 memcpy(dev->dev_addr, &p->iph.saddr, 4); 706 memcpy(dev->broadcast, &p->iph.daddr, 4); 707 } 708 ip_tunnel_add(itn, t); 709 710 t->parms.iph.ttl = p->iph.ttl; 711 t->parms.iph.tos = p->iph.tos; 712 t->parms.iph.frag_off = p->iph.frag_off; 713 714 if (t->parms.link != p->link) { 715 int mtu; 716 717 t->parms.link = p->link; 718 mtu = ip_tunnel_bind_dev(dev); 719 if (set_mtu) 720 dev->mtu = mtu; 721 } 722 netdev_state_change(dev); 723 } 724 725 int ip_tunnel_ioctl(struct net_device *dev, struct ip_tunnel_parm *p, int cmd) 726 { 727 int err = 0; 728 struct ip_tunnel *t; 729 struct net *net = dev_net(dev); 730 struct ip_tunnel *tunnel = netdev_priv(dev); 731 struct ip_tunnel_net *itn = net_generic(net, tunnel->ip_tnl_net_id); 732 733 BUG_ON(!itn->fb_tunnel_dev); 734 switch (cmd) { 735 case SIOCGETTUNNEL: 736 t = NULL; 737 if (dev == itn->fb_tunnel_dev) 738 t = ip_tunnel_find(itn, p, itn->fb_tunnel_dev->type); 739 if (t == NULL) 740 t = netdev_priv(dev); 741 memcpy(p, &t->parms, sizeof(*p)); 742 break; 743 744 case SIOCADDTUNNEL: 745 case SIOCCHGTUNNEL: 746 err = -EPERM; 747 if (!ns_capable(net->user_ns, CAP_NET_ADMIN)) 748 goto done; 749 if (p->iph.ttl) 750 p->iph.frag_off |= htons(IP_DF); 751 if (!(p->i_flags&TUNNEL_KEY)) 752 p->i_key = 0; 753 if (!(p->o_flags&TUNNEL_KEY)) 754 p->o_key = 0; 755 756 t = ip_tunnel_find(itn, p, itn->fb_tunnel_dev->type); 757 758 if (!t && (cmd == SIOCADDTUNNEL)) 759 t = ip_tunnel_create(net, itn, p); 760 761 if (dev != itn->fb_tunnel_dev && cmd == SIOCCHGTUNNEL) { 762 if (t != NULL) { 763 if (t->dev != dev) { 764 err = -EEXIST; 765 break; 766 } 767 } else { 768 unsigned int nflags = 0; 769 770 if (ipv4_is_multicast(p->iph.daddr)) 771 nflags = IFF_BROADCAST; 772 else if (p->iph.daddr) 773 nflags = IFF_POINTOPOINT; 774 775 if ((dev->flags^nflags)&(IFF_POINTOPOINT|IFF_BROADCAST)) { 776 err = -EINVAL; 777 break; 778 } 779 780 t = netdev_priv(dev); 781 } 782 } 783 784 if (t) { 785 err = 0; 786 ip_tunnel_update(itn, t, dev, p, true); 787 } else 788 err = (cmd == SIOCADDTUNNEL ? -ENOBUFS : -ENOENT); 789 break; 790 791 case SIOCDELTUNNEL: 792 err = -EPERM; 793 if (!ns_capable(net->user_ns, CAP_NET_ADMIN)) 794 goto done; 795 796 if (dev == itn->fb_tunnel_dev) { 797 err = -ENOENT; 798 t = ip_tunnel_find(itn, p, itn->fb_tunnel_dev->type); 799 if (t == NULL) 800 goto done; 801 err = -EPERM; 802 if (t == netdev_priv(itn->fb_tunnel_dev)) 803 goto done; 804 dev = t->dev; 805 } 806 unregister_netdevice(dev); 807 err = 0; 808 break; 809 810 default: 811 err = -EINVAL; 812 } 813 814 done: 815 return err; 816 } 817 EXPORT_SYMBOL_GPL(ip_tunnel_ioctl); 818 819 int ip_tunnel_change_mtu(struct net_device *dev, int new_mtu) 820 { 821 struct ip_tunnel *tunnel = netdev_priv(dev); 822 int t_hlen = tunnel->hlen + sizeof(struct iphdr); 823 824 if (new_mtu < 68 || 825 new_mtu > 0xFFF8 - dev->hard_header_len - t_hlen) 826 return -EINVAL; 827 dev->mtu = new_mtu; 828 return 0; 829 } 830 EXPORT_SYMBOL_GPL(ip_tunnel_change_mtu); 831 832 static void ip_tunnel_dev_free(struct net_device *dev) 833 { 834 struct ip_tunnel *tunnel = netdev_priv(dev); 835 836 gro_cells_destroy(&tunnel->gro_cells); 837 free_percpu(dev->tstats); 838 free_netdev(dev); 839 } 840 841 void ip_tunnel_dellink(struct net_device *dev, struct list_head *head) 842 { 843 struct net *net = dev_net(dev); 844 struct ip_tunnel *tunnel = netdev_priv(dev); 845 struct ip_tunnel_net *itn; 846 847 itn = net_generic(net, tunnel->ip_tnl_net_id); 848 849 if (itn->fb_tunnel_dev != dev) { 850 ip_tunnel_del(netdev_priv(dev)); 851 unregister_netdevice_queue(dev, head); 852 } 853 } 854 EXPORT_SYMBOL_GPL(ip_tunnel_dellink); 855 856 int ip_tunnel_init_net(struct net *net, int ip_tnl_net_id, 857 struct rtnl_link_ops *ops, char *devname) 858 { 859 struct ip_tunnel_net *itn = net_generic(net, ip_tnl_net_id); 860 struct ip_tunnel_parm parms; 861 862 itn->tunnels = kzalloc(IP_TNL_HASH_SIZE * sizeof(struct hlist_head), GFP_KERNEL); 863 if (!itn->tunnels) 864 return -ENOMEM; 865 866 if (!ops) { 867 itn->fb_tunnel_dev = NULL; 868 return 0; 869 } 870 memset(&parms, 0, sizeof(parms)); 871 if (devname) 872 strlcpy(parms.name, devname, IFNAMSIZ); 873 874 rtnl_lock(); 875 itn->fb_tunnel_dev = __ip_tunnel_create(net, ops, &parms); 876 rtnl_unlock(); 877 if (IS_ERR(itn->fb_tunnel_dev)) { 878 kfree(itn->tunnels); 879 return PTR_ERR(itn->fb_tunnel_dev); 880 } 881 882 return 0; 883 } 884 EXPORT_SYMBOL_GPL(ip_tunnel_init_net); 885 886 static void ip_tunnel_destroy(struct ip_tunnel_net *itn, struct list_head *head) 887 { 888 int h; 889 890 for (h = 0; h < IP_TNL_HASH_SIZE; h++) { 891 struct ip_tunnel *t; 892 struct hlist_node *n; 893 struct hlist_head *thead = &itn->tunnels[h]; 894 895 hlist_for_each_entry_safe(t, n, thead, hash_node) 896 unregister_netdevice_queue(t->dev, head); 897 } 898 if (itn->fb_tunnel_dev) 899 unregister_netdevice_queue(itn->fb_tunnel_dev, head); 900 } 901 902 void ip_tunnel_delete_net(struct ip_tunnel_net *itn) 903 { 904 LIST_HEAD(list); 905 906 rtnl_lock(); 907 ip_tunnel_destroy(itn, &list); 908 unregister_netdevice_many(&list); 909 rtnl_unlock(); 910 kfree(itn->tunnels); 911 } 912 EXPORT_SYMBOL_GPL(ip_tunnel_delete_net); 913 914 int ip_tunnel_newlink(struct net_device *dev, struct nlattr *tb[], 915 struct ip_tunnel_parm *p) 916 { 917 struct ip_tunnel *nt; 918 struct net *net = dev_net(dev); 919 struct ip_tunnel_net *itn; 920 int mtu; 921 int err; 922 923 nt = netdev_priv(dev); 924 itn = net_generic(net, nt->ip_tnl_net_id); 925 926 if (ip_tunnel_find(itn, p, dev->type)) 927 return -EEXIST; 928 929 nt->parms = *p; 930 err = register_netdevice(dev); 931 if (err) 932 goto out; 933 934 if (dev->type == ARPHRD_ETHER && !tb[IFLA_ADDRESS]) 935 eth_hw_addr_random(dev); 936 937 mtu = ip_tunnel_bind_dev(dev); 938 if (!tb[IFLA_MTU]) 939 dev->mtu = mtu; 940 941 ip_tunnel_add(itn, nt); 942 943 out: 944 return err; 945 } 946 EXPORT_SYMBOL_GPL(ip_tunnel_newlink); 947 948 int ip_tunnel_changelink(struct net_device *dev, struct nlattr *tb[], 949 struct ip_tunnel_parm *p) 950 { 951 struct ip_tunnel *t, *nt; 952 struct net *net = dev_net(dev); 953 struct ip_tunnel *tunnel = netdev_priv(dev); 954 struct ip_tunnel_net *itn = net_generic(net, tunnel->ip_tnl_net_id); 955 956 if (dev == itn->fb_tunnel_dev) 957 return -EINVAL; 958 959 nt = netdev_priv(dev); 960 961 t = ip_tunnel_find(itn, p, dev->type); 962 963 if (t) { 964 if (t->dev != dev) 965 return -EEXIST; 966 } else { 967 t = nt; 968 969 if (dev->type != ARPHRD_ETHER) { 970 unsigned int nflags = 0; 971 972 if (ipv4_is_multicast(p->iph.daddr)) 973 nflags = IFF_BROADCAST; 974 else if (p->iph.daddr) 975 nflags = IFF_POINTOPOINT; 976 977 if ((dev->flags ^ nflags) & 978 (IFF_POINTOPOINT | IFF_BROADCAST)) 979 return -EINVAL; 980 } 981 } 982 983 ip_tunnel_update(itn, t, dev, p, !tb[IFLA_MTU]); 984 return 0; 985 } 986 EXPORT_SYMBOL_GPL(ip_tunnel_changelink); 987 988 int ip_tunnel_init(struct net_device *dev) 989 { 990 struct ip_tunnel *tunnel = netdev_priv(dev); 991 struct iphdr *iph = &tunnel->parms.iph; 992 int err; 993 994 dev->destructor = ip_tunnel_dev_free; 995 dev->tstats = alloc_percpu(struct pcpu_tstats); 996 if (!dev->tstats) 997 return -ENOMEM; 998 999 err = gro_cells_init(&tunnel->gro_cells, dev); 1000 if (err) { 1001 free_percpu(dev->tstats); 1002 return err; 1003 } 1004 1005 tunnel->dev = dev; 1006 strcpy(tunnel->parms.name, dev->name); 1007 iph->version = 4; 1008 iph->ihl = 5; 1009 1010 return 0; 1011 } 1012 EXPORT_SYMBOL_GPL(ip_tunnel_init); 1013 1014 void ip_tunnel_uninit(struct net_device *dev) 1015 { 1016 struct net *net = dev_net(dev); 1017 struct ip_tunnel *tunnel = netdev_priv(dev); 1018 struct ip_tunnel_net *itn; 1019 1020 itn = net_generic(net, tunnel->ip_tnl_net_id); 1021 /* fb_tunnel_dev will be unregisted in net-exit call. */ 1022 if (itn->fb_tunnel_dev != dev) 1023 ip_tunnel_del(netdev_priv(dev)); 1024 } 1025 EXPORT_SYMBOL_GPL(ip_tunnel_uninit); 1026 1027 /* Do least required initialization, rest of init is done in tunnel_init call */ 1028 void ip_tunnel_setup(struct net_device *dev, int net_id) 1029 { 1030 struct ip_tunnel *tunnel = netdev_priv(dev); 1031 tunnel->ip_tnl_net_id = net_id; 1032 } 1033 EXPORT_SYMBOL_GPL(ip_tunnel_setup); 1034 1035 MODULE_LICENSE("GPL"); 1036