1 /* 2 * IPv6 tunneling device 3 * Linux INET6 implementation 4 * 5 * Authors: 6 * Ville Nuorvala <vnuorval@tcs.hut.fi> 7 * Yasuyuki Kozakai <kozakai@linux-ipv6.org> 8 * 9 * Based on: 10 * linux/net/ipv6/sit.c and linux/net/ipv4/ipip.c 11 * 12 * RFC 2473 13 * 14 * This program is free software; you can redistribute it and/or 15 * modify it under the terms of the GNU General Public License 16 * as published by the Free Software Foundation; either version 17 * 2 of the License, or (at your option) any later version. 18 * 19 */ 20 21 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 22 23 #include <linux/module.h> 24 #include <linux/capability.h> 25 #include <linux/errno.h> 26 #include <linux/types.h> 27 #include <linux/sockios.h> 28 #include <linux/icmp.h> 29 #include <linux/if.h> 30 #include <linux/in.h> 31 #include <linux/ip.h> 32 #include <linux/net.h> 33 #include <linux/in6.h> 34 #include <linux/netdevice.h> 35 #include <linux/if_arp.h> 36 #include <linux/icmpv6.h> 37 #include <linux/init.h> 38 #include <linux/route.h> 39 #include <linux/rtnetlink.h> 40 #include <linux/netfilter_ipv6.h> 41 #include <linux/slab.h> 42 #include <linux/hash.h> 43 #include <linux/etherdevice.h> 44 45 #include <asm/uaccess.h> 46 #include <linux/atomic.h> 47 48 #include <net/icmp.h> 49 #include <net/ip.h> 50 #include <net/ip_tunnels.h> 51 #include <net/ipv6.h> 52 #include <net/ip6_route.h> 53 #include <net/addrconf.h> 54 #include <net/ip6_tunnel.h> 55 #include <net/xfrm.h> 56 #include <net/dsfield.h> 57 #include <net/inet_ecn.h> 58 #include <net/net_namespace.h> 59 #include <net/netns/generic.h> 60 61 MODULE_AUTHOR("Ville Nuorvala"); 62 MODULE_DESCRIPTION("IPv6 tunneling device"); 63 MODULE_LICENSE("GPL"); 64 MODULE_ALIAS_RTNL_LINK("ip6tnl"); 65 MODULE_ALIAS_NETDEV("ip6tnl0"); 66 67 #ifdef IP6_TNL_DEBUG 68 #define IP6_TNL_TRACE(x...) pr_debug("%s:" x "\n", __func__) 69 #else 70 #define IP6_TNL_TRACE(x...) do {;} while(0) 71 #endif 72 73 #define HASH_SIZE_SHIFT 5 74 #define HASH_SIZE (1 << HASH_SIZE_SHIFT) 75 76 static bool log_ecn_error = true; 77 module_param(log_ecn_error, bool, 0644); 78 MODULE_PARM_DESC(log_ecn_error, "Log packets received with corrupted ECN"); 79 80 static u32 HASH(const struct in6_addr *addr1, const struct in6_addr *addr2) 81 { 82 u32 hash = ipv6_addr_hash(addr1) ^ ipv6_addr_hash(addr2); 83 84 return hash_32(hash, HASH_SIZE_SHIFT); 85 } 86 87 static int ip6_tnl_dev_init(struct net_device *dev); 88 static void ip6_tnl_dev_setup(struct net_device *dev); 89 static struct rtnl_link_ops ip6_link_ops __read_mostly; 90 91 static int ip6_tnl_net_id __read_mostly; 92 struct ip6_tnl_net { 93 /* the IPv6 tunnel fallback device */ 94 struct net_device *fb_tnl_dev; 95 /* lists for storing tunnels in use */ 96 struct ip6_tnl __rcu *tnls_r_l[HASH_SIZE]; 97 struct ip6_tnl __rcu *tnls_wc[1]; 98 struct ip6_tnl __rcu **tnls[2]; 99 }; 100 101 static struct net_device_stats *ip6_get_stats(struct net_device *dev) 102 { 103 struct pcpu_sw_netstats tmp, sum = { 0 }; 104 int i; 105 106 for_each_possible_cpu(i) { 107 unsigned int start; 108 const struct pcpu_sw_netstats *tstats = 109 per_cpu_ptr(dev->tstats, i); 110 111 do { 112 start = u64_stats_fetch_begin_irq(&tstats->syncp); 113 tmp.rx_packets = tstats->rx_packets; 114 tmp.rx_bytes = tstats->rx_bytes; 115 tmp.tx_packets = tstats->tx_packets; 116 tmp.tx_bytes = tstats->tx_bytes; 117 } while (u64_stats_fetch_retry_irq(&tstats->syncp, start)); 118 119 sum.rx_packets += tmp.rx_packets; 120 sum.rx_bytes += tmp.rx_bytes; 121 sum.tx_packets += tmp.tx_packets; 122 sum.tx_bytes += tmp.tx_bytes; 123 } 124 dev->stats.rx_packets = sum.rx_packets; 125 dev->stats.rx_bytes = sum.rx_bytes; 126 dev->stats.tx_packets = sum.tx_packets; 127 dev->stats.tx_bytes = sum.tx_bytes; 128 return &dev->stats; 129 } 130 131 /* 132 * Locking : hash tables are protected by RCU and RTNL 133 */ 134 135 struct dst_entry *ip6_tnl_dst_check(struct ip6_tnl *t) 136 { 137 struct dst_entry *dst = t->dst_cache; 138 139 if (dst && dst->obsolete && 140 dst->ops->check(dst, t->dst_cookie) == NULL) { 141 t->dst_cache = NULL; 142 dst_release(dst); 143 return NULL; 144 } 145 146 return dst; 147 } 148 EXPORT_SYMBOL_GPL(ip6_tnl_dst_check); 149 150 void ip6_tnl_dst_reset(struct ip6_tnl *t) 151 { 152 dst_release(t->dst_cache); 153 t->dst_cache = NULL; 154 } 155 EXPORT_SYMBOL_GPL(ip6_tnl_dst_reset); 156 157 void ip6_tnl_dst_store(struct ip6_tnl *t, struct dst_entry *dst) 158 { 159 struct rt6_info *rt = (struct rt6_info *) dst; 160 t->dst_cookie = rt->rt6i_node ? rt->rt6i_node->fn_sernum : 0; 161 dst_release(t->dst_cache); 162 t->dst_cache = dst; 163 } 164 EXPORT_SYMBOL_GPL(ip6_tnl_dst_store); 165 166 /** 167 * ip6_tnl_lookup - fetch tunnel matching the end-point addresses 168 * @remote: the address of the tunnel exit-point 169 * @local: the address of the tunnel entry-point 170 * 171 * Return: 172 * tunnel matching given end-points if found, 173 * else fallback tunnel if its device is up, 174 * else %NULL 175 **/ 176 177 #define for_each_ip6_tunnel_rcu(start) \ 178 for (t = rcu_dereference(start); t; t = rcu_dereference(t->next)) 179 180 static struct ip6_tnl * 181 ip6_tnl_lookup(struct net *net, const struct in6_addr *remote, const struct in6_addr *local) 182 { 183 unsigned int hash = HASH(remote, local); 184 struct ip6_tnl *t; 185 struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id); 186 187 for_each_ip6_tunnel_rcu(ip6n->tnls_r_l[hash]) { 188 if (ipv6_addr_equal(local, &t->parms.laddr) && 189 ipv6_addr_equal(remote, &t->parms.raddr) && 190 (t->dev->flags & IFF_UP)) 191 return t; 192 } 193 t = rcu_dereference(ip6n->tnls_wc[0]); 194 if (t && (t->dev->flags & IFF_UP)) 195 return t; 196 197 return NULL; 198 } 199 200 /** 201 * ip6_tnl_bucket - get head of list matching given tunnel parameters 202 * @p: parameters containing tunnel end-points 203 * 204 * Description: 205 * ip6_tnl_bucket() returns the head of the list matching the 206 * &struct in6_addr entries laddr and raddr in @p. 207 * 208 * Return: head of IPv6 tunnel list 209 **/ 210 211 static struct ip6_tnl __rcu ** 212 ip6_tnl_bucket(struct ip6_tnl_net *ip6n, const struct __ip6_tnl_parm *p) 213 { 214 const struct in6_addr *remote = &p->raddr; 215 const struct in6_addr *local = &p->laddr; 216 unsigned int h = 0; 217 int prio = 0; 218 219 if (!ipv6_addr_any(remote) || !ipv6_addr_any(local)) { 220 prio = 1; 221 h = HASH(remote, local); 222 } 223 return &ip6n->tnls[prio][h]; 224 } 225 226 /** 227 * ip6_tnl_link - add tunnel to hash table 228 * @t: tunnel to be added 229 **/ 230 231 static void 232 ip6_tnl_link(struct ip6_tnl_net *ip6n, struct ip6_tnl *t) 233 { 234 struct ip6_tnl __rcu **tp = ip6_tnl_bucket(ip6n, &t->parms); 235 236 rcu_assign_pointer(t->next , rtnl_dereference(*tp)); 237 rcu_assign_pointer(*tp, t); 238 } 239 240 /** 241 * ip6_tnl_unlink - remove tunnel from hash table 242 * @t: tunnel to be removed 243 **/ 244 245 static void 246 ip6_tnl_unlink(struct ip6_tnl_net *ip6n, struct ip6_tnl *t) 247 { 248 struct ip6_tnl __rcu **tp; 249 struct ip6_tnl *iter; 250 251 for (tp = ip6_tnl_bucket(ip6n, &t->parms); 252 (iter = rtnl_dereference(*tp)) != NULL; 253 tp = &iter->next) { 254 if (t == iter) { 255 rcu_assign_pointer(*tp, t->next); 256 break; 257 } 258 } 259 } 260 261 static void ip6_dev_free(struct net_device *dev) 262 { 263 free_percpu(dev->tstats); 264 free_netdev(dev); 265 } 266 267 static int ip6_tnl_create2(struct net_device *dev) 268 { 269 struct ip6_tnl *t = netdev_priv(dev); 270 struct net *net = dev_net(dev); 271 struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id); 272 int err; 273 274 t = netdev_priv(dev); 275 err = ip6_tnl_dev_init(dev); 276 if (err < 0) 277 goto out; 278 279 err = register_netdevice(dev); 280 if (err < 0) 281 goto out; 282 283 strcpy(t->parms.name, dev->name); 284 dev->rtnl_link_ops = &ip6_link_ops; 285 286 dev_hold(dev); 287 ip6_tnl_link(ip6n, t); 288 return 0; 289 290 out: 291 return err; 292 } 293 294 /** 295 * ip6_tnl_create - create a new tunnel 296 * @p: tunnel parameters 297 * @pt: pointer to new tunnel 298 * 299 * Description: 300 * Create tunnel matching given parameters. 301 * 302 * Return: 303 * created tunnel or NULL 304 **/ 305 306 static struct ip6_tnl *ip6_tnl_create(struct net *net, struct __ip6_tnl_parm *p) 307 { 308 struct net_device *dev; 309 struct ip6_tnl *t; 310 char name[IFNAMSIZ]; 311 int err; 312 313 if (p->name[0]) 314 strlcpy(name, p->name, IFNAMSIZ); 315 else 316 sprintf(name, "ip6tnl%%d"); 317 318 dev = alloc_netdev(sizeof(*t), name, NET_NAME_UNKNOWN, 319 ip6_tnl_dev_setup); 320 if (dev == NULL) 321 goto failed; 322 323 dev_net_set(dev, net); 324 325 t = netdev_priv(dev); 326 t->parms = *p; 327 t->net = dev_net(dev); 328 err = ip6_tnl_create2(dev); 329 if (err < 0) 330 goto failed_free; 331 332 return t; 333 334 failed_free: 335 ip6_dev_free(dev); 336 failed: 337 return NULL; 338 } 339 340 /** 341 * ip6_tnl_locate - find or create tunnel matching given parameters 342 * @p: tunnel parameters 343 * @create: != 0 if allowed to create new tunnel if no match found 344 * 345 * Description: 346 * ip6_tnl_locate() first tries to locate an existing tunnel 347 * based on @parms. If this is unsuccessful, but @create is set a new 348 * tunnel device is created and registered for use. 349 * 350 * Return: 351 * matching tunnel or NULL 352 **/ 353 354 static struct ip6_tnl *ip6_tnl_locate(struct net *net, 355 struct __ip6_tnl_parm *p, int create) 356 { 357 const struct in6_addr *remote = &p->raddr; 358 const struct in6_addr *local = &p->laddr; 359 struct ip6_tnl __rcu **tp; 360 struct ip6_tnl *t; 361 struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id); 362 363 for (tp = ip6_tnl_bucket(ip6n, p); 364 (t = rtnl_dereference(*tp)) != NULL; 365 tp = &t->next) { 366 if (ipv6_addr_equal(local, &t->parms.laddr) && 367 ipv6_addr_equal(remote, &t->parms.raddr)) 368 return t; 369 } 370 if (!create) 371 return NULL; 372 return ip6_tnl_create(net, p); 373 } 374 375 /** 376 * ip6_tnl_dev_uninit - tunnel device uninitializer 377 * @dev: the device to be destroyed 378 * 379 * Description: 380 * ip6_tnl_dev_uninit() removes tunnel from its list 381 **/ 382 383 static void 384 ip6_tnl_dev_uninit(struct net_device *dev) 385 { 386 struct ip6_tnl *t = netdev_priv(dev); 387 struct net *net = t->net; 388 struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id); 389 390 if (dev == ip6n->fb_tnl_dev) 391 RCU_INIT_POINTER(ip6n->tnls_wc[0], NULL); 392 else 393 ip6_tnl_unlink(ip6n, t); 394 ip6_tnl_dst_reset(t); 395 dev_put(dev); 396 } 397 398 /** 399 * parse_tvl_tnl_enc_lim - handle encapsulation limit option 400 * @skb: received socket buffer 401 * 402 * Return: 403 * 0 if none was found, 404 * else index to encapsulation limit 405 **/ 406 407 __u16 ip6_tnl_parse_tlv_enc_lim(struct sk_buff *skb, __u8 *raw) 408 { 409 const struct ipv6hdr *ipv6h = (const struct ipv6hdr *) raw; 410 __u8 nexthdr = ipv6h->nexthdr; 411 __u16 off = sizeof (*ipv6h); 412 413 while (ipv6_ext_hdr(nexthdr) && nexthdr != NEXTHDR_NONE) { 414 __u16 optlen = 0; 415 struct ipv6_opt_hdr *hdr; 416 if (raw + off + sizeof (*hdr) > skb->data && 417 !pskb_may_pull(skb, raw - skb->data + off + sizeof (*hdr))) 418 break; 419 420 hdr = (struct ipv6_opt_hdr *) (raw + off); 421 if (nexthdr == NEXTHDR_FRAGMENT) { 422 struct frag_hdr *frag_hdr = (struct frag_hdr *) hdr; 423 if (frag_hdr->frag_off) 424 break; 425 optlen = 8; 426 } else if (nexthdr == NEXTHDR_AUTH) { 427 optlen = (hdr->hdrlen + 2) << 2; 428 } else { 429 optlen = ipv6_optlen(hdr); 430 } 431 if (nexthdr == NEXTHDR_DEST) { 432 __u16 i = off + 2; 433 while (1) { 434 struct ipv6_tlv_tnl_enc_lim *tel; 435 436 /* No more room for encapsulation limit */ 437 if (i + sizeof (*tel) > off + optlen) 438 break; 439 440 tel = (struct ipv6_tlv_tnl_enc_lim *) &raw[i]; 441 /* return index of option if found and valid */ 442 if (tel->type == IPV6_TLV_TNL_ENCAP_LIMIT && 443 tel->length == 1) 444 return i; 445 /* else jump to next option */ 446 if (tel->type) 447 i += tel->length + 2; 448 else 449 i++; 450 } 451 } 452 nexthdr = hdr->nexthdr; 453 off += optlen; 454 } 455 return 0; 456 } 457 EXPORT_SYMBOL(ip6_tnl_parse_tlv_enc_lim); 458 459 /** 460 * ip6_tnl_err - tunnel error handler 461 * 462 * Description: 463 * ip6_tnl_err() should handle errors in the tunnel according 464 * to the specifications in RFC 2473. 465 **/ 466 467 static int 468 ip6_tnl_err(struct sk_buff *skb, __u8 ipproto, struct inet6_skb_parm *opt, 469 u8 *type, u8 *code, int *msg, __u32 *info, int offset) 470 { 471 const struct ipv6hdr *ipv6h = (const struct ipv6hdr *) skb->data; 472 struct ip6_tnl *t; 473 int rel_msg = 0; 474 u8 rel_type = ICMPV6_DEST_UNREACH; 475 u8 rel_code = ICMPV6_ADDR_UNREACH; 476 __u32 rel_info = 0; 477 __u16 len; 478 int err = -ENOENT; 479 480 /* If the packet doesn't contain the original IPv6 header we are 481 in trouble since we might need the source address for further 482 processing of the error. */ 483 484 rcu_read_lock(); 485 if ((t = ip6_tnl_lookup(dev_net(skb->dev), &ipv6h->daddr, 486 &ipv6h->saddr)) == NULL) 487 goto out; 488 489 if (t->parms.proto != ipproto && t->parms.proto != 0) 490 goto out; 491 492 err = 0; 493 494 switch (*type) { 495 __u32 teli; 496 struct ipv6_tlv_tnl_enc_lim *tel; 497 __u32 mtu; 498 case ICMPV6_DEST_UNREACH: 499 net_warn_ratelimited("%s: Path to destination invalid or inactive!\n", 500 t->parms.name); 501 rel_msg = 1; 502 break; 503 case ICMPV6_TIME_EXCEED: 504 if ((*code) == ICMPV6_EXC_HOPLIMIT) { 505 net_warn_ratelimited("%s: Too small hop limit or routing loop in tunnel!\n", 506 t->parms.name); 507 rel_msg = 1; 508 } 509 break; 510 case ICMPV6_PARAMPROB: 511 teli = 0; 512 if ((*code) == ICMPV6_HDR_FIELD) 513 teli = ip6_tnl_parse_tlv_enc_lim(skb, skb->data); 514 515 if (teli && teli == *info - 2) { 516 tel = (struct ipv6_tlv_tnl_enc_lim *) &skb->data[teli]; 517 if (tel->encap_limit == 0) { 518 net_warn_ratelimited("%s: Too small encapsulation limit or routing loop in tunnel!\n", 519 t->parms.name); 520 rel_msg = 1; 521 } 522 } else { 523 net_warn_ratelimited("%s: Recipient unable to parse tunneled packet!\n", 524 t->parms.name); 525 } 526 break; 527 case ICMPV6_PKT_TOOBIG: 528 mtu = *info - offset; 529 if (mtu < IPV6_MIN_MTU) 530 mtu = IPV6_MIN_MTU; 531 t->dev->mtu = mtu; 532 533 if ((len = sizeof (*ipv6h) + ntohs(ipv6h->payload_len)) > mtu) { 534 rel_type = ICMPV6_PKT_TOOBIG; 535 rel_code = 0; 536 rel_info = mtu; 537 rel_msg = 1; 538 } 539 break; 540 } 541 542 *type = rel_type; 543 *code = rel_code; 544 *info = rel_info; 545 *msg = rel_msg; 546 547 out: 548 rcu_read_unlock(); 549 return err; 550 } 551 552 static int 553 ip4ip6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, 554 u8 type, u8 code, int offset, __be32 info) 555 { 556 int rel_msg = 0; 557 u8 rel_type = type; 558 u8 rel_code = code; 559 __u32 rel_info = ntohl(info); 560 int err; 561 struct sk_buff *skb2; 562 const struct iphdr *eiph; 563 struct rtable *rt; 564 struct flowi4 fl4; 565 566 err = ip6_tnl_err(skb, IPPROTO_IPIP, opt, &rel_type, &rel_code, 567 &rel_msg, &rel_info, offset); 568 if (err < 0) 569 return err; 570 571 if (rel_msg == 0) 572 return 0; 573 574 switch (rel_type) { 575 case ICMPV6_DEST_UNREACH: 576 if (rel_code != ICMPV6_ADDR_UNREACH) 577 return 0; 578 rel_type = ICMP_DEST_UNREACH; 579 rel_code = ICMP_HOST_UNREACH; 580 break; 581 case ICMPV6_PKT_TOOBIG: 582 if (rel_code != 0) 583 return 0; 584 rel_type = ICMP_DEST_UNREACH; 585 rel_code = ICMP_FRAG_NEEDED; 586 break; 587 case NDISC_REDIRECT: 588 rel_type = ICMP_REDIRECT; 589 rel_code = ICMP_REDIR_HOST; 590 default: 591 return 0; 592 } 593 594 if (!pskb_may_pull(skb, offset + sizeof(struct iphdr))) 595 return 0; 596 597 skb2 = skb_clone(skb, GFP_ATOMIC); 598 if (!skb2) 599 return 0; 600 601 skb_dst_drop(skb2); 602 603 skb_pull(skb2, offset); 604 skb_reset_network_header(skb2); 605 eiph = ip_hdr(skb2); 606 607 /* Try to guess incoming interface */ 608 rt = ip_route_output_ports(dev_net(skb->dev), &fl4, NULL, 609 eiph->saddr, 0, 610 0, 0, 611 IPPROTO_IPIP, RT_TOS(eiph->tos), 0); 612 if (IS_ERR(rt)) 613 goto out; 614 615 skb2->dev = rt->dst.dev; 616 617 /* route "incoming" packet */ 618 if (rt->rt_flags & RTCF_LOCAL) { 619 ip_rt_put(rt); 620 rt = NULL; 621 rt = ip_route_output_ports(dev_net(skb->dev), &fl4, NULL, 622 eiph->daddr, eiph->saddr, 623 0, 0, 624 IPPROTO_IPIP, 625 RT_TOS(eiph->tos), 0); 626 if (IS_ERR(rt) || 627 rt->dst.dev->type != ARPHRD_TUNNEL) { 628 if (!IS_ERR(rt)) 629 ip_rt_put(rt); 630 goto out; 631 } 632 skb_dst_set(skb2, &rt->dst); 633 } else { 634 ip_rt_put(rt); 635 if (ip_route_input(skb2, eiph->daddr, eiph->saddr, eiph->tos, 636 skb2->dev) || 637 skb_dst(skb2)->dev->type != ARPHRD_TUNNEL) 638 goto out; 639 } 640 641 /* change mtu on this route */ 642 if (rel_type == ICMP_DEST_UNREACH && rel_code == ICMP_FRAG_NEEDED) { 643 if (rel_info > dst_mtu(skb_dst(skb2))) 644 goto out; 645 646 skb_dst(skb2)->ops->update_pmtu(skb_dst(skb2), NULL, skb2, rel_info); 647 } 648 if (rel_type == ICMP_REDIRECT) 649 skb_dst(skb2)->ops->redirect(skb_dst(skb2), NULL, skb2); 650 651 icmp_send(skb2, rel_type, rel_code, htonl(rel_info)); 652 653 out: 654 kfree_skb(skb2); 655 return 0; 656 } 657 658 static int 659 ip6ip6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, 660 u8 type, u8 code, int offset, __be32 info) 661 { 662 int rel_msg = 0; 663 u8 rel_type = type; 664 u8 rel_code = code; 665 __u32 rel_info = ntohl(info); 666 int err; 667 668 err = ip6_tnl_err(skb, IPPROTO_IPV6, opt, &rel_type, &rel_code, 669 &rel_msg, &rel_info, offset); 670 if (err < 0) 671 return err; 672 673 if (rel_msg && pskb_may_pull(skb, offset + sizeof(struct ipv6hdr))) { 674 struct rt6_info *rt; 675 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC); 676 677 if (!skb2) 678 return 0; 679 680 skb_dst_drop(skb2); 681 skb_pull(skb2, offset); 682 skb_reset_network_header(skb2); 683 684 /* Try to guess incoming interface */ 685 rt = rt6_lookup(dev_net(skb->dev), &ipv6_hdr(skb2)->saddr, 686 NULL, 0, 0); 687 688 if (rt && rt->dst.dev) 689 skb2->dev = rt->dst.dev; 690 691 icmpv6_send(skb2, rel_type, rel_code, rel_info); 692 693 ip6_rt_put(rt); 694 695 kfree_skb(skb2); 696 } 697 698 return 0; 699 } 700 701 static int ip4ip6_dscp_ecn_decapsulate(const struct ip6_tnl *t, 702 const struct ipv6hdr *ipv6h, 703 struct sk_buff *skb) 704 { 705 __u8 dsfield = ipv6_get_dsfield(ipv6h) & ~INET_ECN_MASK; 706 707 if (t->parms.flags & IP6_TNL_F_RCV_DSCP_COPY) 708 ipv4_change_dsfield(ip_hdr(skb), INET_ECN_MASK, dsfield); 709 710 return IP6_ECN_decapsulate(ipv6h, skb); 711 } 712 713 static int ip6ip6_dscp_ecn_decapsulate(const struct ip6_tnl *t, 714 const struct ipv6hdr *ipv6h, 715 struct sk_buff *skb) 716 { 717 if (t->parms.flags & IP6_TNL_F_RCV_DSCP_COPY) 718 ipv6_copy_dscp(ipv6_get_dsfield(ipv6h), ipv6_hdr(skb)); 719 720 return IP6_ECN_decapsulate(ipv6h, skb); 721 } 722 723 __u32 ip6_tnl_get_cap(struct ip6_tnl *t, 724 const struct in6_addr *laddr, 725 const struct in6_addr *raddr) 726 { 727 struct __ip6_tnl_parm *p = &t->parms; 728 int ltype = ipv6_addr_type(laddr); 729 int rtype = ipv6_addr_type(raddr); 730 __u32 flags = 0; 731 732 if (ltype == IPV6_ADDR_ANY || rtype == IPV6_ADDR_ANY) { 733 flags = IP6_TNL_F_CAP_PER_PACKET; 734 } else if (ltype & (IPV6_ADDR_UNICAST|IPV6_ADDR_MULTICAST) && 735 rtype & (IPV6_ADDR_UNICAST|IPV6_ADDR_MULTICAST) && 736 !((ltype|rtype) & IPV6_ADDR_LOOPBACK) && 737 (!((ltype|rtype) & IPV6_ADDR_LINKLOCAL) || p->link)) { 738 if (ltype&IPV6_ADDR_UNICAST) 739 flags |= IP6_TNL_F_CAP_XMIT; 740 if (rtype&IPV6_ADDR_UNICAST) 741 flags |= IP6_TNL_F_CAP_RCV; 742 } 743 return flags; 744 } 745 EXPORT_SYMBOL(ip6_tnl_get_cap); 746 747 /* called with rcu_read_lock() */ 748 int ip6_tnl_rcv_ctl(struct ip6_tnl *t, 749 const struct in6_addr *laddr, 750 const struct in6_addr *raddr) 751 { 752 struct __ip6_tnl_parm *p = &t->parms; 753 int ret = 0; 754 struct net *net = t->net; 755 756 if ((p->flags & IP6_TNL_F_CAP_RCV) || 757 ((p->flags & IP6_TNL_F_CAP_PER_PACKET) && 758 (ip6_tnl_get_cap(t, laddr, raddr) & IP6_TNL_F_CAP_RCV))) { 759 struct net_device *ldev = NULL; 760 761 if (p->link) 762 ldev = dev_get_by_index_rcu(net, p->link); 763 764 if ((ipv6_addr_is_multicast(laddr) || 765 likely(ipv6_chk_addr(net, laddr, ldev, 0))) && 766 likely(!ipv6_chk_addr(net, raddr, NULL, 0))) 767 ret = 1; 768 } 769 return ret; 770 } 771 EXPORT_SYMBOL_GPL(ip6_tnl_rcv_ctl); 772 773 /** 774 * ip6_tnl_rcv - decapsulate IPv6 packet and retransmit it locally 775 * @skb: received socket buffer 776 * @protocol: ethernet protocol ID 777 * @dscp_ecn_decapsulate: the function to decapsulate DSCP code and ECN 778 * 779 * Return: 0 780 **/ 781 782 static int ip6_tnl_rcv(struct sk_buff *skb, __u16 protocol, 783 __u8 ipproto, 784 int (*dscp_ecn_decapsulate)(const struct ip6_tnl *t, 785 const struct ipv6hdr *ipv6h, 786 struct sk_buff *skb)) 787 { 788 struct ip6_tnl *t; 789 const struct ipv6hdr *ipv6h = ipv6_hdr(skb); 790 int err; 791 792 rcu_read_lock(); 793 794 if ((t = ip6_tnl_lookup(dev_net(skb->dev), &ipv6h->saddr, 795 &ipv6h->daddr)) != NULL) { 796 struct pcpu_sw_netstats *tstats; 797 798 if (t->parms.proto != ipproto && t->parms.proto != 0) { 799 rcu_read_unlock(); 800 goto discard; 801 } 802 803 if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) { 804 rcu_read_unlock(); 805 goto discard; 806 } 807 808 if (!ip6_tnl_rcv_ctl(t, &ipv6h->daddr, &ipv6h->saddr)) { 809 t->dev->stats.rx_dropped++; 810 rcu_read_unlock(); 811 goto discard; 812 } 813 skb->mac_header = skb->network_header; 814 skb_reset_network_header(skb); 815 skb->protocol = htons(protocol); 816 memset(skb->cb, 0, sizeof(struct inet6_skb_parm)); 817 818 __skb_tunnel_rx(skb, t->dev, t->net); 819 820 err = dscp_ecn_decapsulate(t, ipv6h, skb); 821 if (unlikely(err)) { 822 if (log_ecn_error) 823 net_info_ratelimited("non-ECT from %pI6 with dsfield=%#x\n", 824 &ipv6h->saddr, 825 ipv6_get_dsfield(ipv6h)); 826 if (err > 1) { 827 ++t->dev->stats.rx_frame_errors; 828 ++t->dev->stats.rx_errors; 829 rcu_read_unlock(); 830 goto discard; 831 } 832 } 833 834 tstats = this_cpu_ptr(t->dev->tstats); 835 u64_stats_update_begin(&tstats->syncp); 836 tstats->rx_packets++; 837 tstats->rx_bytes += skb->len; 838 u64_stats_update_end(&tstats->syncp); 839 840 netif_rx(skb); 841 842 rcu_read_unlock(); 843 return 0; 844 } 845 rcu_read_unlock(); 846 return 1; 847 848 discard: 849 kfree_skb(skb); 850 return 0; 851 } 852 853 static int ip4ip6_rcv(struct sk_buff *skb) 854 { 855 return ip6_tnl_rcv(skb, ETH_P_IP, IPPROTO_IPIP, 856 ip4ip6_dscp_ecn_decapsulate); 857 } 858 859 static int ip6ip6_rcv(struct sk_buff *skb) 860 { 861 return ip6_tnl_rcv(skb, ETH_P_IPV6, IPPROTO_IPV6, 862 ip6ip6_dscp_ecn_decapsulate); 863 } 864 865 struct ipv6_tel_txoption { 866 struct ipv6_txoptions ops; 867 __u8 dst_opt[8]; 868 }; 869 870 static void init_tel_txopt(struct ipv6_tel_txoption *opt, __u8 encap_limit) 871 { 872 memset(opt, 0, sizeof(struct ipv6_tel_txoption)); 873 874 opt->dst_opt[2] = IPV6_TLV_TNL_ENCAP_LIMIT; 875 opt->dst_opt[3] = 1; 876 opt->dst_opt[4] = encap_limit; 877 opt->dst_opt[5] = IPV6_TLV_PADN; 878 opt->dst_opt[6] = 1; 879 880 opt->ops.dst0opt = (struct ipv6_opt_hdr *) opt->dst_opt; 881 opt->ops.opt_nflen = 8; 882 } 883 884 /** 885 * ip6_tnl_addr_conflict - compare packet addresses to tunnel's own 886 * @t: the outgoing tunnel device 887 * @hdr: IPv6 header from the incoming packet 888 * 889 * Description: 890 * Avoid trivial tunneling loop by checking that tunnel exit-point 891 * doesn't match source of incoming packet. 892 * 893 * Return: 894 * 1 if conflict, 895 * 0 else 896 **/ 897 898 static inline bool 899 ip6_tnl_addr_conflict(const struct ip6_tnl *t, const struct ipv6hdr *hdr) 900 { 901 return ipv6_addr_equal(&t->parms.raddr, &hdr->saddr); 902 } 903 904 int ip6_tnl_xmit_ctl(struct ip6_tnl *t) 905 { 906 struct __ip6_tnl_parm *p = &t->parms; 907 int ret = 0; 908 struct net *net = t->net; 909 910 if (p->flags & IP6_TNL_F_CAP_XMIT) { 911 struct net_device *ldev = NULL; 912 913 rcu_read_lock(); 914 if (p->link) 915 ldev = dev_get_by_index_rcu(net, p->link); 916 917 if (unlikely(!ipv6_chk_addr(net, &p->laddr, ldev, 0))) 918 pr_warn("%s xmit: Local address not yet configured!\n", 919 p->name); 920 else if (!ipv6_addr_is_multicast(&p->raddr) && 921 unlikely(ipv6_chk_addr(net, &p->raddr, NULL, 0))) 922 pr_warn("%s xmit: Routing loop! Remote address found on this node!\n", 923 p->name); 924 else 925 ret = 1; 926 rcu_read_unlock(); 927 } 928 return ret; 929 } 930 EXPORT_SYMBOL_GPL(ip6_tnl_xmit_ctl); 931 932 /** 933 * ip6_tnl_xmit2 - encapsulate packet and send 934 * @skb: the outgoing socket buffer 935 * @dev: the outgoing tunnel device 936 * @dsfield: dscp code for outer header 937 * @fl: flow of tunneled packet 938 * @encap_limit: encapsulation limit 939 * @pmtu: Path MTU is stored if packet is too big 940 * 941 * Description: 942 * Build new header and do some sanity checks on the packet before sending 943 * it. 944 * 945 * Return: 946 * 0 on success 947 * -1 fail 948 * %-EMSGSIZE message too big. return mtu in this case. 949 **/ 950 951 static int ip6_tnl_xmit2(struct sk_buff *skb, 952 struct net_device *dev, 953 __u8 dsfield, 954 struct flowi6 *fl6, 955 int encap_limit, 956 __u32 *pmtu) 957 { 958 struct ip6_tnl *t = netdev_priv(dev); 959 struct net *net = t->net; 960 struct net_device_stats *stats = &t->dev->stats; 961 struct ipv6hdr *ipv6h = ipv6_hdr(skb); 962 struct ipv6_tel_txoption opt; 963 struct dst_entry *dst = NULL, *ndst = NULL; 964 struct net_device *tdev; 965 int mtu; 966 unsigned int max_headroom = sizeof(struct ipv6hdr); 967 u8 proto; 968 int err = -1; 969 970 if (!fl6->flowi6_mark) 971 dst = ip6_tnl_dst_check(t); 972 if (!dst) { 973 ndst = ip6_route_output(net, NULL, fl6); 974 975 if (ndst->error) 976 goto tx_err_link_failure; 977 ndst = xfrm_lookup(net, ndst, flowi6_to_flowi(fl6), NULL, 0); 978 if (IS_ERR(ndst)) { 979 err = PTR_ERR(ndst); 980 ndst = NULL; 981 goto tx_err_link_failure; 982 } 983 dst = ndst; 984 } 985 986 tdev = dst->dev; 987 988 if (tdev == dev) { 989 stats->collisions++; 990 net_warn_ratelimited("%s: Local routing loop detected!\n", 991 t->parms.name); 992 goto tx_err_dst_release; 993 } 994 mtu = dst_mtu(dst) - sizeof (*ipv6h); 995 if (encap_limit >= 0) { 996 max_headroom += 8; 997 mtu -= 8; 998 } 999 if (mtu < IPV6_MIN_MTU) 1000 mtu = IPV6_MIN_MTU; 1001 if (skb_dst(skb)) 1002 skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, skb, mtu); 1003 if (skb->len > mtu) { 1004 *pmtu = mtu; 1005 err = -EMSGSIZE; 1006 goto tx_err_dst_release; 1007 } 1008 1009 skb_scrub_packet(skb, !net_eq(t->net, dev_net(dev))); 1010 1011 /* 1012 * Okay, now see if we can stuff it in the buffer as-is. 1013 */ 1014 max_headroom += LL_RESERVED_SPACE(tdev); 1015 1016 if (skb_headroom(skb) < max_headroom || skb_shared(skb) || 1017 (skb_cloned(skb) && !skb_clone_writable(skb, 0))) { 1018 struct sk_buff *new_skb; 1019 1020 if (!(new_skb = skb_realloc_headroom(skb, max_headroom))) 1021 goto tx_err_dst_release; 1022 1023 if (skb->sk) 1024 skb_set_owner_w(new_skb, skb->sk); 1025 consume_skb(skb); 1026 skb = new_skb; 1027 } 1028 if (fl6->flowi6_mark) { 1029 skb_dst_set(skb, dst); 1030 ndst = NULL; 1031 } else { 1032 skb_dst_set_noref(skb, dst); 1033 } 1034 skb->transport_header = skb->network_header; 1035 1036 proto = fl6->flowi6_proto; 1037 if (encap_limit >= 0) { 1038 init_tel_txopt(&opt, encap_limit); 1039 ipv6_push_nfrag_opts(skb, &opt.ops, &proto, NULL); 1040 } 1041 1042 if (likely(!skb->encapsulation)) { 1043 skb_reset_inner_headers(skb); 1044 skb->encapsulation = 1; 1045 } 1046 1047 skb_push(skb, sizeof(struct ipv6hdr)); 1048 skb_reset_network_header(skb); 1049 ipv6h = ipv6_hdr(skb); 1050 ip6_flow_hdr(ipv6h, INET_ECN_encapsulate(0, dsfield), 1051 ip6_make_flowlabel(net, skb, fl6->flowlabel, false)); 1052 ipv6h->hop_limit = t->parms.hop_limit; 1053 ipv6h->nexthdr = proto; 1054 ipv6h->saddr = fl6->saddr; 1055 ipv6h->daddr = fl6->daddr; 1056 ip6tunnel_xmit(skb, dev); 1057 if (ndst) 1058 ip6_tnl_dst_store(t, ndst); 1059 return 0; 1060 tx_err_link_failure: 1061 stats->tx_carrier_errors++; 1062 dst_link_failure(skb); 1063 tx_err_dst_release: 1064 dst_release(ndst); 1065 return err; 1066 } 1067 1068 static inline int 1069 ip4ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev) 1070 { 1071 struct ip6_tnl *t = netdev_priv(dev); 1072 const struct iphdr *iph = ip_hdr(skb); 1073 int encap_limit = -1; 1074 struct flowi6 fl6; 1075 __u8 dsfield; 1076 __u32 mtu; 1077 int err; 1078 1079 if ((t->parms.proto != IPPROTO_IPIP && t->parms.proto != 0) || 1080 !ip6_tnl_xmit_ctl(t)) 1081 return -1; 1082 1083 if (!(t->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT)) 1084 encap_limit = t->parms.encap_limit; 1085 1086 memcpy(&fl6, &t->fl.u.ip6, sizeof (fl6)); 1087 fl6.flowi6_proto = IPPROTO_IPIP; 1088 1089 dsfield = ipv4_get_dsfield(iph); 1090 1091 if (t->parms.flags & IP6_TNL_F_USE_ORIG_TCLASS) 1092 fl6.flowlabel |= htonl((__u32)iph->tos << IPV6_TCLASS_SHIFT) 1093 & IPV6_TCLASS_MASK; 1094 if (t->parms.flags & IP6_TNL_F_USE_ORIG_FWMARK) 1095 fl6.flowi6_mark = skb->mark; 1096 1097 err = ip6_tnl_xmit2(skb, dev, dsfield, &fl6, encap_limit, &mtu); 1098 if (err != 0) { 1099 /* XXX: send ICMP error even if DF is not set. */ 1100 if (err == -EMSGSIZE) 1101 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, 1102 htonl(mtu)); 1103 return -1; 1104 } 1105 1106 return 0; 1107 } 1108 1109 static inline int 1110 ip6ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev) 1111 { 1112 struct ip6_tnl *t = netdev_priv(dev); 1113 struct ipv6hdr *ipv6h = ipv6_hdr(skb); 1114 int encap_limit = -1; 1115 __u16 offset; 1116 struct flowi6 fl6; 1117 __u8 dsfield; 1118 __u32 mtu; 1119 int err; 1120 1121 if ((t->parms.proto != IPPROTO_IPV6 && t->parms.proto != 0) || 1122 !ip6_tnl_xmit_ctl(t) || ip6_tnl_addr_conflict(t, ipv6h)) 1123 return -1; 1124 1125 offset = ip6_tnl_parse_tlv_enc_lim(skb, skb_network_header(skb)); 1126 if (offset > 0) { 1127 struct ipv6_tlv_tnl_enc_lim *tel; 1128 tel = (struct ipv6_tlv_tnl_enc_lim *)&skb_network_header(skb)[offset]; 1129 if (tel->encap_limit == 0) { 1130 icmpv6_send(skb, ICMPV6_PARAMPROB, 1131 ICMPV6_HDR_FIELD, offset + 2); 1132 return -1; 1133 } 1134 encap_limit = tel->encap_limit - 1; 1135 } else if (!(t->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT)) 1136 encap_limit = t->parms.encap_limit; 1137 1138 memcpy(&fl6, &t->fl.u.ip6, sizeof (fl6)); 1139 fl6.flowi6_proto = IPPROTO_IPV6; 1140 1141 dsfield = ipv6_get_dsfield(ipv6h); 1142 if (t->parms.flags & IP6_TNL_F_USE_ORIG_TCLASS) 1143 fl6.flowlabel |= (*(__be32 *) ipv6h & IPV6_TCLASS_MASK); 1144 if (t->parms.flags & IP6_TNL_F_USE_ORIG_FLOWLABEL) 1145 fl6.flowlabel |= ip6_flowlabel(ipv6h); 1146 if (t->parms.flags & IP6_TNL_F_USE_ORIG_FWMARK) 1147 fl6.flowi6_mark = skb->mark; 1148 1149 err = ip6_tnl_xmit2(skb, dev, dsfield, &fl6, encap_limit, &mtu); 1150 if (err != 0) { 1151 if (err == -EMSGSIZE) 1152 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); 1153 return -1; 1154 } 1155 1156 return 0; 1157 } 1158 1159 static netdev_tx_t 1160 ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev) 1161 { 1162 struct ip6_tnl *t = netdev_priv(dev); 1163 struct net_device_stats *stats = &t->dev->stats; 1164 int ret; 1165 1166 switch (skb->protocol) { 1167 case htons(ETH_P_IP): 1168 ret = ip4ip6_tnl_xmit(skb, dev); 1169 break; 1170 case htons(ETH_P_IPV6): 1171 ret = ip6ip6_tnl_xmit(skb, dev); 1172 break; 1173 default: 1174 goto tx_err; 1175 } 1176 1177 if (ret < 0) 1178 goto tx_err; 1179 1180 return NETDEV_TX_OK; 1181 1182 tx_err: 1183 stats->tx_errors++; 1184 stats->tx_dropped++; 1185 kfree_skb(skb); 1186 return NETDEV_TX_OK; 1187 } 1188 1189 static void ip6_tnl_link_config(struct ip6_tnl *t) 1190 { 1191 struct net_device *dev = t->dev; 1192 struct __ip6_tnl_parm *p = &t->parms; 1193 struct flowi6 *fl6 = &t->fl.u.ip6; 1194 1195 memcpy(dev->dev_addr, &p->laddr, sizeof(struct in6_addr)); 1196 memcpy(dev->broadcast, &p->raddr, sizeof(struct in6_addr)); 1197 1198 /* Set up flowi template */ 1199 fl6->saddr = p->laddr; 1200 fl6->daddr = p->raddr; 1201 fl6->flowi6_oif = p->link; 1202 fl6->flowlabel = 0; 1203 1204 if (!(p->flags&IP6_TNL_F_USE_ORIG_TCLASS)) 1205 fl6->flowlabel |= IPV6_TCLASS_MASK & p->flowinfo; 1206 if (!(p->flags&IP6_TNL_F_USE_ORIG_FLOWLABEL)) 1207 fl6->flowlabel |= IPV6_FLOWLABEL_MASK & p->flowinfo; 1208 1209 p->flags &= ~(IP6_TNL_F_CAP_XMIT|IP6_TNL_F_CAP_RCV|IP6_TNL_F_CAP_PER_PACKET); 1210 p->flags |= ip6_tnl_get_cap(t, &p->laddr, &p->raddr); 1211 1212 if (p->flags&IP6_TNL_F_CAP_XMIT && p->flags&IP6_TNL_F_CAP_RCV) 1213 dev->flags |= IFF_POINTOPOINT; 1214 else 1215 dev->flags &= ~IFF_POINTOPOINT; 1216 1217 dev->iflink = p->link; 1218 1219 if (p->flags & IP6_TNL_F_CAP_XMIT) { 1220 int strict = (ipv6_addr_type(&p->raddr) & 1221 (IPV6_ADDR_MULTICAST|IPV6_ADDR_LINKLOCAL)); 1222 1223 struct rt6_info *rt = rt6_lookup(t->net, 1224 &p->raddr, &p->laddr, 1225 p->link, strict); 1226 1227 if (rt == NULL) 1228 return; 1229 1230 if (rt->dst.dev) { 1231 dev->hard_header_len = rt->dst.dev->hard_header_len + 1232 sizeof (struct ipv6hdr); 1233 1234 dev->mtu = rt->dst.dev->mtu - sizeof (struct ipv6hdr); 1235 if (!(t->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT)) 1236 dev->mtu-=8; 1237 1238 if (dev->mtu < IPV6_MIN_MTU) 1239 dev->mtu = IPV6_MIN_MTU; 1240 } 1241 ip6_rt_put(rt); 1242 } 1243 } 1244 1245 /** 1246 * ip6_tnl_change - update the tunnel parameters 1247 * @t: tunnel to be changed 1248 * @p: tunnel configuration parameters 1249 * 1250 * Description: 1251 * ip6_tnl_change() updates the tunnel parameters 1252 **/ 1253 1254 static int 1255 ip6_tnl_change(struct ip6_tnl *t, const struct __ip6_tnl_parm *p) 1256 { 1257 t->parms.laddr = p->laddr; 1258 t->parms.raddr = p->raddr; 1259 t->parms.flags = p->flags; 1260 t->parms.hop_limit = p->hop_limit; 1261 t->parms.encap_limit = p->encap_limit; 1262 t->parms.flowinfo = p->flowinfo; 1263 t->parms.link = p->link; 1264 t->parms.proto = p->proto; 1265 ip6_tnl_dst_reset(t); 1266 ip6_tnl_link_config(t); 1267 return 0; 1268 } 1269 1270 static int ip6_tnl_update(struct ip6_tnl *t, struct __ip6_tnl_parm *p) 1271 { 1272 struct net *net = t->net; 1273 struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id); 1274 int err; 1275 1276 ip6_tnl_unlink(ip6n, t); 1277 synchronize_net(); 1278 err = ip6_tnl_change(t, p); 1279 ip6_tnl_link(ip6n, t); 1280 netdev_state_change(t->dev); 1281 return err; 1282 } 1283 1284 static void 1285 ip6_tnl_parm_from_user(struct __ip6_tnl_parm *p, const struct ip6_tnl_parm *u) 1286 { 1287 p->laddr = u->laddr; 1288 p->raddr = u->raddr; 1289 p->flags = u->flags; 1290 p->hop_limit = u->hop_limit; 1291 p->encap_limit = u->encap_limit; 1292 p->flowinfo = u->flowinfo; 1293 p->link = u->link; 1294 p->proto = u->proto; 1295 memcpy(p->name, u->name, sizeof(u->name)); 1296 } 1297 1298 static void 1299 ip6_tnl_parm_to_user(struct ip6_tnl_parm *u, const struct __ip6_tnl_parm *p) 1300 { 1301 u->laddr = p->laddr; 1302 u->raddr = p->raddr; 1303 u->flags = p->flags; 1304 u->hop_limit = p->hop_limit; 1305 u->encap_limit = p->encap_limit; 1306 u->flowinfo = p->flowinfo; 1307 u->link = p->link; 1308 u->proto = p->proto; 1309 memcpy(u->name, p->name, sizeof(u->name)); 1310 } 1311 1312 /** 1313 * ip6_tnl_ioctl - configure ipv6 tunnels from userspace 1314 * @dev: virtual device associated with tunnel 1315 * @ifr: parameters passed from userspace 1316 * @cmd: command to be performed 1317 * 1318 * Description: 1319 * ip6_tnl_ioctl() is used for managing IPv6 tunnels 1320 * from userspace. 1321 * 1322 * The possible commands are the following: 1323 * %SIOCGETTUNNEL: get tunnel parameters for device 1324 * %SIOCADDTUNNEL: add tunnel matching given tunnel parameters 1325 * %SIOCCHGTUNNEL: change tunnel parameters to those given 1326 * %SIOCDELTUNNEL: delete tunnel 1327 * 1328 * The fallback device "ip6tnl0", created during module 1329 * initialization, can be used for creating other tunnel devices. 1330 * 1331 * Return: 1332 * 0 on success, 1333 * %-EFAULT if unable to copy data to or from userspace, 1334 * %-EPERM if current process hasn't %CAP_NET_ADMIN set 1335 * %-EINVAL if passed tunnel parameters are invalid, 1336 * %-EEXIST if changing a tunnel's parameters would cause a conflict 1337 * %-ENODEV if attempting to change or delete a nonexisting device 1338 **/ 1339 1340 static int 1341 ip6_tnl_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) 1342 { 1343 int err = 0; 1344 struct ip6_tnl_parm p; 1345 struct __ip6_tnl_parm p1; 1346 struct ip6_tnl *t = netdev_priv(dev); 1347 struct net *net = t->net; 1348 struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id); 1349 1350 switch (cmd) { 1351 case SIOCGETTUNNEL: 1352 if (dev == ip6n->fb_tnl_dev) { 1353 if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof (p))) { 1354 err = -EFAULT; 1355 break; 1356 } 1357 ip6_tnl_parm_from_user(&p1, &p); 1358 t = ip6_tnl_locate(net, &p1, 0); 1359 if (t == NULL) 1360 t = netdev_priv(dev); 1361 } else { 1362 memset(&p, 0, sizeof(p)); 1363 } 1364 ip6_tnl_parm_to_user(&p, &t->parms); 1365 if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof (p))) { 1366 err = -EFAULT; 1367 } 1368 break; 1369 case SIOCADDTUNNEL: 1370 case SIOCCHGTUNNEL: 1371 err = -EPERM; 1372 if (!ns_capable(net->user_ns, CAP_NET_ADMIN)) 1373 break; 1374 err = -EFAULT; 1375 if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof (p))) 1376 break; 1377 err = -EINVAL; 1378 if (p.proto != IPPROTO_IPV6 && p.proto != IPPROTO_IPIP && 1379 p.proto != 0) 1380 break; 1381 ip6_tnl_parm_from_user(&p1, &p); 1382 t = ip6_tnl_locate(net, &p1, cmd == SIOCADDTUNNEL); 1383 if (dev != ip6n->fb_tnl_dev && cmd == SIOCCHGTUNNEL) { 1384 if (t != NULL) { 1385 if (t->dev != dev) { 1386 err = -EEXIST; 1387 break; 1388 } 1389 } else 1390 t = netdev_priv(dev); 1391 1392 err = ip6_tnl_update(t, &p1); 1393 } 1394 if (t) { 1395 err = 0; 1396 ip6_tnl_parm_to_user(&p, &t->parms); 1397 if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p))) 1398 err = -EFAULT; 1399 1400 } else 1401 err = (cmd == SIOCADDTUNNEL ? -ENOBUFS : -ENOENT); 1402 break; 1403 case SIOCDELTUNNEL: 1404 err = -EPERM; 1405 if (!ns_capable(net->user_ns, CAP_NET_ADMIN)) 1406 break; 1407 1408 if (dev == ip6n->fb_tnl_dev) { 1409 err = -EFAULT; 1410 if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof (p))) 1411 break; 1412 err = -ENOENT; 1413 ip6_tnl_parm_from_user(&p1, &p); 1414 t = ip6_tnl_locate(net, &p1, 0); 1415 if (t == NULL) 1416 break; 1417 err = -EPERM; 1418 if (t->dev == ip6n->fb_tnl_dev) 1419 break; 1420 dev = t->dev; 1421 } 1422 err = 0; 1423 unregister_netdevice(dev); 1424 break; 1425 default: 1426 err = -EINVAL; 1427 } 1428 return err; 1429 } 1430 1431 /** 1432 * ip6_tnl_change_mtu - change mtu manually for tunnel device 1433 * @dev: virtual device associated with tunnel 1434 * @new_mtu: the new mtu 1435 * 1436 * Return: 1437 * 0 on success, 1438 * %-EINVAL if mtu too small 1439 **/ 1440 1441 static int 1442 ip6_tnl_change_mtu(struct net_device *dev, int new_mtu) 1443 { 1444 struct ip6_tnl *tnl = netdev_priv(dev); 1445 1446 if (tnl->parms.proto == IPPROTO_IPIP) { 1447 if (new_mtu < 68) 1448 return -EINVAL; 1449 } else { 1450 if (new_mtu < IPV6_MIN_MTU) 1451 return -EINVAL; 1452 } 1453 if (new_mtu > 0xFFF8 - dev->hard_header_len) 1454 return -EINVAL; 1455 dev->mtu = new_mtu; 1456 return 0; 1457 } 1458 1459 1460 static const struct net_device_ops ip6_tnl_netdev_ops = { 1461 .ndo_uninit = ip6_tnl_dev_uninit, 1462 .ndo_start_xmit = ip6_tnl_xmit, 1463 .ndo_do_ioctl = ip6_tnl_ioctl, 1464 .ndo_change_mtu = ip6_tnl_change_mtu, 1465 .ndo_get_stats = ip6_get_stats, 1466 }; 1467 1468 1469 /** 1470 * ip6_tnl_dev_setup - setup virtual tunnel device 1471 * @dev: virtual device associated with tunnel 1472 * 1473 * Description: 1474 * Initialize function pointers and device parameters 1475 **/ 1476 1477 static void ip6_tnl_dev_setup(struct net_device *dev) 1478 { 1479 struct ip6_tnl *t; 1480 1481 dev->netdev_ops = &ip6_tnl_netdev_ops; 1482 dev->destructor = ip6_dev_free; 1483 1484 dev->type = ARPHRD_TUNNEL6; 1485 dev->hard_header_len = LL_MAX_HEADER + sizeof (struct ipv6hdr); 1486 dev->mtu = ETH_DATA_LEN - sizeof (struct ipv6hdr); 1487 t = netdev_priv(dev); 1488 if (!(t->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT)) 1489 dev->mtu-=8; 1490 dev->flags |= IFF_NOARP; 1491 dev->addr_len = sizeof(struct in6_addr); 1492 dev->priv_flags &= ~IFF_XMIT_DST_RELEASE; 1493 /* This perm addr will be used as interface identifier by IPv6 */ 1494 dev->addr_assign_type = NET_ADDR_RANDOM; 1495 eth_random_addr(dev->perm_addr); 1496 } 1497 1498 1499 /** 1500 * ip6_tnl_dev_init_gen - general initializer for all tunnel devices 1501 * @dev: virtual device associated with tunnel 1502 **/ 1503 1504 static inline int 1505 ip6_tnl_dev_init_gen(struct net_device *dev) 1506 { 1507 struct ip6_tnl *t = netdev_priv(dev); 1508 1509 t->dev = dev; 1510 t->net = dev_net(dev); 1511 dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats); 1512 if (!dev->tstats) 1513 return -ENOMEM; 1514 return 0; 1515 } 1516 1517 /** 1518 * ip6_tnl_dev_init - initializer for all non fallback tunnel devices 1519 * @dev: virtual device associated with tunnel 1520 **/ 1521 1522 static int ip6_tnl_dev_init(struct net_device *dev) 1523 { 1524 struct ip6_tnl *t = netdev_priv(dev); 1525 int err = ip6_tnl_dev_init_gen(dev); 1526 1527 if (err) 1528 return err; 1529 ip6_tnl_link_config(t); 1530 return 0; 1531 } 1532 1533 /** 1534 * ip6_fb_tnl_dev_init - initializer for fallback tunnel device 1535 * @dev: fallback device 1536 * 1537 * Return: 0 1538 **/ 1539 1540 static int __net_init ip6_fb_tnl_dev_init(struct net_device *dev) 1541 { 1542 struct ip6_tnl *t = netdev_priv(dev); 1543 struct net *net = dev_net(dev); 1544 struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id); 1545 int err = ip6_tnl_dev_init_gen(dev); 1546 1547 if (err) 1548 return err; 1549 1550 t->parms.proto = IPPROTO_IPV6; 1551 dev_hold(dev); 1552 1553 ip6_tnl_link_config(t); 1554 1555 rcu_assign_pointer(ip6n->tnls_wc[0], t); 1556 return 0; 1557 } 1558 1559 static int ip6_tnl_validate(struct nlattr *tb[], struct nlattr *data[]) 1560 { 1561 u8 proto; 1562 1563 if (!data || !data[IFLA_IPTUN_PROTO]) 1564 return 0; 1565 1566 proto = nla_get_u8(data[IFLA_IPTUN_PROTO]); 1567 if (proto != IPPROTO_IPV6 && 1568 proto != IPPROTO_IPIP && 1569 proto != 0) 1570 return -EINVAL; 1571 1572 return 0; 1573 } 1574 1575 static void ip6_tnl_netlink_parms(struct nlattr *data[], 1576 struct __ip6_tnl_parm *parms) 1577 { 1578 memset(parms, 0, sizeof(*parms)); 1579 1580 if (!data) 1581 return; 1582 1583 if (data[IFLA_IPTUN_LINK]) 1584 parms->link = nla_get_u32(data[IFLA_IPTUN_LINK]); 1585 1586 if (data[IFLA_IPTUN_LOCAL]) 1587 nla_memcpy(&parms->laddr, data[IFLA_IPTUN_LOCAL], 1588 sizeof(struct in6_addr)); 1589 1590 if (data[IFLA_IPTUN_REMOTE]) 1591 nla_memcpy(&parms->raddr, data[IFLA_IPTUN_REMOTE], 1592 sizeof(struct in6_addr)); 1593 1594 if (data[IFLA_IPTUN_TTL]) 1595 parms->hop_limit = nla_get_u8(data[IFLA_IPTUN_TTL]); 1596 1597 if (data[IFLA_IPTUN_ENCAP_LIMIT]) 1598 parms->encap_limit = nla_get_u8(data[IFLA_IPTUN_ENCAP_LIMIT]); 1599 1600 if (data[IFLA_IPTUN_FLOWINFO]) 1601 parms->flowinfo = nla_get_be32(data[IFLA_IPTUN_FLOWINFO]); 1602 1603 if (data[IFLA_IPTUN_FLAGS]) 1604 parms->flags = nla_get_u32(data[IFLA_IPTUN_FLAGS]); 1605 1606 if (data[IFLA_IPTUN_PROTO]) 1607 parms->proto = nla_get_u8(data[IFLA_IPTUN_PROTO]); 1608 } 1609 1610 static int ip6_tnl_newlink(struct net *src_net, struct net_device *dev, 1611 struct nlattr *tb[], struct nlattr *data[]) 1612 { 1613 struct net *net = dev_net(dev); 1614 struct ip6_tnl *nt; 1615 1616 nt = netdev_priv(dev); 1617 ip6_tnl_netlink_parms(data, &nt->parms); 1618 1619 if (ip6_tnl_locate(net, &nt->parms, 0)) 1620 return -EEXIST; 1621 1622 return ip6_tnl_create2(dev); 1623 } 1624 1625 static int ip6_tnl_changelink(struct net_device *dev, struct nlattr *tb[], 1626 struct nlattr *data[]) 1627 { 1628 struct ip6_tnl *t = netdev_priv(dev); 1629 struct __ip6_tnl_parm p; 1630 struct net *net = t->net; 1631 struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id); 1632 1633 if (dev == ip6n->fb_tnl_dev) 1634 return -EINVAL; 1635 1636 ip6_tnl_netlink_parms(data, &p); 1637 1638 t = ip6_tnl_locate(net, &p, 0); 1639 1640 if (t) { 1641 if (t->dev != dev) 1642 return -EEXIST; 1643 } else 1644 t = netdev_priv(dev); 1645 1646 return ip6_tnl_update(t, &p); 1647 } 1648 1649 static void ip6_tnl_dellink(struct net_device *dev, struct list_head *head) 1650 { 1651 struct net *net = dev_net(dev); 1652 struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id); 1653 1654 if (dev != ip6n->fb_tnl_dev) 1655 unregister_netdevice_queue(dev, head); 1656 } 1657 1658 static size_t ip6_tnl_get_size(const struct net_device *dev) 1659 { 1660 return 1661 /* IFLA_IPTUN_LINK */ 1662 nla_total_size(4) + 1663 /* IFLA_IPTUN_LOCAL */ 1664 nla_total_size(sizeof(struct in6_addr)) + 1665 /* IFLA_IPTUN_REMOTE */ 1666 nla_total_size(sizeof(struct in6_addr)) + 1667 /* IFLA_IPTUN_TTL */ 1668 nla_total_size(1) + 1669 /* IFLA_IPTUN_ENCAP_LIMIT */ 1670 nla_total_size(1) + 1671 /* IFLA_IPTUN_FLOWINFO */ 1672 nla_total_size(4) + 1673 /* IFLA_IPTUN_FLAGS */ 1674 nla_total_size(4) + 1675 /* IFLA_IPTUN_PROTO */ 1676 nla_total_size(1) + 1677 0; 1678 } 1679 1680 static int ip6_tnl_fill_info(struct sk_buff *skb, const struct net_device *dev) 1681 { 1682 struct ip6_tnl *tunnel = netdev_priv(dev); 1683 struct __ip6_tnl_parm *parm = &tunnel->parms; 1684 1685 if (nla_put_u32(skb, IFLA_IPTUN_LINK, parm->link) || 1686 nla_put(skb, IFLA_IPTUN_LOCAL, sizeof(struct in6_addr), 1687 &parm->laddr) || 1688 nla_put(skb, IFLA_IPTUN_REMOTE, sizeof(struct in6_addr), 1689 &parm->raddr) || 1690 nla_put_u8(skb, IFLA_IPTUN_TTL, parm->hop_limit) || 1691 nla_put_u8(skb, IFLA_IPTUN_ENCAP_LIMIT, parm->encap_limit) || 1692 nla_put_be32(skb, IFLA_IPTUN_FLOWINFO, parm->flowinfo) || 1693 nla_put_u32(skb, IFLA_IPTUN_FLAGS, parm->flags) || 1694 nla_put_u8(skb, IFLA_IPTUN_PROTO, parm->proto)) 1695 goto nla_put_failure; 1696 return 0; 1697 1698 nla_put_failure: 1699 return -EMSGSIZE; 1700 } 1701 1702 static const struct nla_policy ip6_tnl_policy[IFLA_IPTUN_MAX + 1] = { 1703 [IFLA_IPTUN_LINK] = { .type = NLA_U32 }, 1704 [IFLA_IPTUN_LOCAL] = { .len = sizeof(struct in6_addr) }, 1705 [IFLA_IPTUN_REMOTE] = { .len = sizeof(struct in6_addr) }, 1706 [IFLA_IPTUN_TTL] = { .type = NLA_U8 }, 1707 [IFLA_IPTUN_ENCAP_LIMIT] = { .type = NLA_U8 }, 1708 [IFLA_IPTUN_FLOWINFO] = { .type = NLA_U32 }, 1709 [IFLA_IPTUN_FLAGS] = { .type = NLA_U32 }, 1710 [IFLA_IPTUN_PROTO] = { .type = NLA_U8 }, 1711 }; 1712 1713 static struct rtnl_link_ops ip6_link_ops __read_mostly = { 1714 .kind = "ip6tnl", 1715 .maxtype = IFLA_IPTUN_MAX, 1716 .policy = ip6_tnl_policy, 1717 .priv_size = sizeof(struct ip6_tnl), 1718 .setup = ip6_tnl_dev_setup, 1719 .validate = ip6_tnl_validate, 1720 .newlink = ip6_tnl_newlink, 1721 .changelink = ip6_tnl_changelink, 1722 .dellink = ip6_tnl_dellink, 1723 .get_size = ip6_tnl_get_size, 1724 .fill_info = ip6_tnl_fill_info, 1725 }; 1726 1727 static struct xfrm6_tunnel ip4ip6_handler __read_mostly = { 1728 .handler = ip4ip6_rcv, 1729 .err_handler = ip4ip6_err, 1730 .priority = 1, 1731 }; 1732 1733 static struct xfrm6_tunnel ip6ip6_handler __read_mostly = { 1734 .handler = ip6ip6_rcv, 1735 .err_handler = ip6ip6_err, 1736 .priority = 1, 1737 }; 1738 1739 static void __net_exit ip6_tnl_destroy_tunnels(struct net *net) 1740 { 1741 struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id); 1742 struct net_device *dev, *aux; 1743 int h; 1744 struct ip6_tnl *t; 1745 LIST_HEAD(list); 1746 1747 for_each_netdev_safe(net, dev, aux) 1748 if (dev->rtnl_link_ops == &ip6_link_ops) 1749 unregister_netdevice_queue(dev, &list); 1750 1751 for (h = 0; h < HASH_SIZE; h++) { 1752 t = rtnl_dereference(ip6n->tnls_r_l[h]); 1753 while (t != NULL) { 1754 /* If dev is in the same netns, it has already 1755 * been added to the list by the previous loop. 1756 */ 1757 if (!net_eq(dev_net(t->dev), net)) 1758 unregister_netdevice_queue(t->dev, &list); 1759 t = rtnl_dereference(t->next); 1760 } 1761 } 1762 1763 unregister_netdevice_many(&list); 1764 } 1765 1766 static int __net_init ip6_tnl_init_net(struct net *net) 1767 { 1768 struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id); 1769 struct ip6_tnl *t = NULL; 1770 int err; 1771 1772 ip6n->tnls[0] = ip6n->tnls_wc; 1773 ip6n->tnls[1] = ip6n->tnls_r_l; 1774 1775 err = -ENOMEM; 1776 ip6n->fb_tnl_dev = alloc_netdev(sizeof(struct ip6_tnl), "ip6tnl0", 1777 NET_NAME_UNKNOWN, ip6_tnl_dev_setup); 1778 1779 if (!ip6n->fb_tnl_dev) 1780 goto err_alloc_dev; 1781 dev_net_set(ip6n->fb_tnl_dev, net); 1782 ip6n->fb_tnl_dev->rtnl_link_ops = &ip6_link_ops; 1783 /* FB netdevice is special: we have one, and only one per netns. 1784 * Allowing to move it to another netns is clearly unsafe. 1785 */ 1786 ip6n->fb_tnl_dev->features |= NETIF_F_NETNS_LOCAL; 1787 1788 err = ip6_fb_tnl_dev_init(ip6n->fb_tnl_dev); 1789 if (err < 0) 1790 goto err_register; 1791 1792 err = register_netdev(ip6n->fb_tnl_dev); 1793 if (err < 0) 1794 goto err_register; 1795 1796 t = netdev_priv(ip6n->fb_tnl_dev); 1797 1798 strcpy(t->parms.name, ip6n->fb_tnl_dev->name); 1799 return 0; 1800 1801 err_register: 1802 ip6_dev_free(ip6n->fb_tnl_dev); 1803 err_alloc_dev: 1804 return err; 1805 } 1806 1807 static void __net_exit ip6_tnl_exit_net(struct net *net) 1808 { 1809 rtnl_lock(); 1810 ip6_tnl_destroy_tunnels(net); 1811 rtnl_unlock(); 1812 } 1813 1814 static struct pernet_operations ip6_tnl_net_ops = { 1815 .init = ip6_tnl_init_net, 1816 .exit = ip6_tnl_exit_net, 1817 .id = &ip6_tnl_net_id, 1818 .size = sizeof(struct ip6_tnl_net), 1819 }; 1820 1821 /** 1822 * ip6_tunnel_init - register protocol and reserve needed resources 1823 * 1824 * Return: 0 on success 1825 **/ 1826 1827 static int __init ip6_tunnel_init(void) 1828 { 1829 int err; 1830 1831 err = register_pernet_device(&ip6_tnl_net_ops); 1832 if (err < 0) 1833 goto out_pernet; 1834 1835 err = xfrm6_tunnel_register(&ip4ip6_handler, AF_INET); 1836 if (err < 0) { 1837 pr_err("%s: can't register ip4ip6\n", __func__); 1838 goto out_ip4ip6; 1839 } 1840 1841 err = xfrm6_tunnel_register(&ip6ip6_handler, AF_INET6); 1842 if (err < 0) { 1843 pr_err("%s: can't register ip6ip6\n", __func__); 1844 goto out_ip6ip6; 1845 } 1846 err = rtnl_link_register(&ip6_link_ops); 1847 if (err < 0) 1848 goto rtnl_link_failed; 1849 1850 return 0; 1851 1852 rtnl_link_failed: 1853 xfrm6_tunnel_deregister(&ip6ip6_handler, AF_INET6); 1854 out_ip6ip6: 1855 xfrm6_tunnel_deregister(&ip4ip6_handler, AF_INET); 1856 out_ip4ip6: 1857 unregister_pernet_device(&ip6_tnl_net_ops); 1858 out_pernet: 1859 return err; 1860 } 1861 1862 /** 1863 * ip6_tunnel_cleanup - free resources and unregister protocol 1864 **/ 1865 1866 static void __exit ip6_tunnel_cleanup(void) 1867 { 1868 rtnl_link_unregister(&ip6_link_ops); 1869 if (xfrm6_tunnel_deregister(&ip4ip6_handler, AF_INET)) 1870 pr_info("%s: can't deregister ip4ip6\n", __func__); 1871 1872 if (xfrm6_tunnel_deregister(&ip6ip6_handler, AF_INET6)) 1873 pr_info("%s: can't deregister ip6ip6\n", __func__); 1874 1875 unregister_pernet_device(&ip6_tnl_net_ops); 1876 } 1877 1878 module_init(ip6_tunnel_init); 1879 module_exit(ip6_tunnel_cleanup); 1880