1 /* 2 * IPv6 over IPv4 tunnel device - Simple Internet Transition (SIT) 3 * Linux INET6 implementation 4 * 5 * Authors: 6 * Pedro Roque <roque@di.fc.ul.pt> 7 * Alexey Kuznetsov <kuznet@ms2.inr.ac.ru> 8 * 9 * This program is free software; you can redistribute it and/or 10 * modify it under the terms of the GNU General Public License 11 * as published by the Free Software Foundation; either version 12 * 2 of the License, or (at your option) any later version. 13 * 14 * Changes: 15 * Roger Venning <r.venning@telstra.com>: 6to4 support 16 * Nate Thompson <nate@thebog.net>: 6to4 support 17 * Fred Templin <fred.l.templin@boeing.com>: isatap support 18 */ 19 20 #include <linux/module.h> 21 #include <linux/capability.h> 22 #include <linux/errno.h> 23 #include <linux/types.h> 24 #include <linux/socket.h> 25 #include <linux/sockios.h> 26 #include <linux/net.h> 27 #include <linux/in6.h> 28 #include <linux/netdevice.h> 29 #include <linux/if_arp.h> 30 #include <linux/icmp.h> 31 #include <asm/uaccess.h> 32 #include <linux/init.h> 33 #include <linux/netfilter_ipv4.h> 34 #include <linux/if_ether.h> 35 36 #include <net/sock.h> 37 #include <net/snmp.h> 38 39 #include <net/ipv6.h> 40 #include <net/protocol.h> 41 #include <net/transp_v6.h> 42 #include <net/ip6_fib.h> 43 #include <net/ip6_route.h> 44 #include <net/ndisc.h> 45 #include <net/addrconf.h> 46 #include <net/ip.h> 47 #include <net/udp.h> 48 #include <net/icmp.h> 49 #include <net/ipip.h> 50 #include <net/inet_ecn.h> 51 #include <net/xfrm.h> 52 #include <net/dsfield.h> 53 #include <net/net_namespace.h> 54 #include <net/netns/generic.h> 55 56 /* 57 This version of net/ipv6/sit.c is cloned of net/ipv4/ip_gre.c 58 59 For comments look at net/ipv4/ip_gre.c --ANK 60 */ 61 62 #define HASH_SIZE 16 63 #define HASH(addr) (((__force u32)addr^((__force u32)addr>>4))&0xF) 64 65 static int ipip6_fb_tunnel_init(struct net_device *dev); 66 static int ipip6_tunnel_init(struct net_device *dev); 67 static void ipip6_tunnel_setup(struct net_device *dev); 68 69 static int sit_net_id; 70 struct sit_net { 71 struct ip_tunnel *tunnels_r_l[HASH_SIZE]; 72 struct ip_tunnel *tunnels_r[HASH_SIZE]; 73 struct ip_tunnel *tunnels_l[HASH_SIZE]; 74 struct ip_tunnel *tunnels_wc[1]; 75 struct ip_tunnel **tunnels[4]; 76 77 struct net_device *fb_tunnel_dev; 78 }; 79 80 static DEFINE_RWLOCK(ipip6_lock); 81 82 static struct ip_tunnel * ipip6_tunnel_lookup(struct net *net, 83 __be32 remote, __be32 local) 84 { 85 unsigned h0 = HASH(remote); 86 unsigned h1 = HASH(local); 87 struct ip_tunnel *t; 88 struct sit_net *sitn = net_generic(net, sit_net_id); 89 90 for (t = sitn->tunnels_r_l[h0^h1]; t; t = t->next) { 91 if (local == t->parms.iph.saddr && 92 remote == t->parms.iph.daddr && (t->dev->flags&IFF_UP)) 93 return t; 94 } 95 for (t = sitn->tunnels_r[h0]; t; t = t->next) { 96 if (remote == t->parms.iph.daddr && (t->dev->flags&IFF_UP)) 97 return t; 98 } 99 for (t = sitn->tunnels_l[h1]; t; t = t->next) { 100 if (local == t->parms.iph.saddr && (t->dev->flags&IFF_UP)) 101 return t; 102 } 103 if ((t = sitn->tunnels_wc[0]) != NULL && (t->dev->flags&IFF_UP)) 104 return t; 105 return NULL; 106 } 107 108 static struct ip_tunnel **__ipip6_bucket(struct sit_net *sitn, 109 struct ip_tunnel_parm *parms) 110 { 111 __be32 remote = parms->iph.daddr; 112 __be32 local = parms->iph.saddr; 113 unsigned h = 0; 114 int prio = 0; 115 116 if (remote) { 117 prio |= 2; 118 h ^= HASH(remote); 119 } 120 if (local) { 121 prio |= 1; 122 h ^= HASH(local); 123 } 124 return &sitn->tunnels[prio][h]; 125 } 126 127 static inline struct ip_tunnel **ipip6_bucket(struct sit_net *sitn, 128 struct ip_tunnel *t) 129 { 130 return __ipip6_bucket(sitn, &t->parms); 131 } 132 133 static void ipip6_tunnel_unlink(struct sit_net *sitn, struct ip_tunnel *t) 134 { 135 struct ip_tunnel **tp; 136 137 for (tp = ipip6_bucket(sitn, t); *tp; tp = &(*tp)->next) { 138 if (t == *tp) { 139 write_lock_bh(&ipip6_lock); 140 *tp = t->next; 141 write_unlock_bh(&ipip6_lock); 142 break; 143 } 144 } 145 } 146 147 static void ipip6_tunnel_link(struct sit_net *sitn, struct ip_tunnel *t) 148 { 149 struct ip_tunnel **tp = ipip6_bucket(sitn, t); 150 151 t->next = *tp; 152 write_lock_bh(&ipip6_lock); 153 *tp = t; 154 write_unlock_bh(&ipip6_lock); 155 } 156 157 static struct ip_tunnel * ipip6_tunnel_locate(struct net *net, 158 struct ip_tunnel_parm *parms, int create) 159 { 160 __be32 remote = parms->iph.daddr; 161 __be32 local = parms->iph.saddr; 162 struct ip_tunnel *t, **tp, *nt; 163 struct net_device *dev; 164 char name[IFNAMSIZ]; 165 struct sit_net *sitn = net_generic(net, sit_net_id); 166 167 for (tp = __ipip6_bucket(sitn, parms); (t = *tp) != NULL; tp = &t->next) { 168 if (local == t->parms.iph.saddr && remote == t->parms.iph.daddr) 169 return t; 170 } 171 if (!create) 172 goto failed; 173 174 if (parms->name[0]) 175 strlcpy(name, parms->name, IFNAMSIZ); 176 else 177 sprintf(name, "sit%%d"); 178 179 dev = alloc_netdev(sizeof(*t), name, ipip6_tunnel_setup); 180 if (dev == NULL) 181 return NULL; 182 183 dev_net_set(dev, net); 184 185 if (strchr(name, '%')) { 186 if (dev_alloc_name(dev, name) < 0) 187 goto failed_free; 188 } 189 190 nt = netdev_priv(dev); 191 dev->init = ipip6_tunnel_init; 192 nt->parms = *parms; 193 194 if (parms->i_flags & SIT_ISATAP) 195 dev->priv_flags |= IFF_ISATAP; 196 197 if (register_netdevice(dev) < 0) 198 goto failed_free; 199 200 dev_hold(dev); 201 202 ipip6_tunnel_link(sitn, nt); 203 return nt; 204 205 failed_free: 206 free_netdev(dev); 207 failed: 208 return NULL; 209 } 210 211 static struct ip_tunnel_prl_entry * 212 __ipip6_tunnel_locate_prl(struct ip_tunnel *t, __be32 addr) 213 { 214 struct ip_tunnel_prl_entry *p = (struct ip_tunnel_prl_entry *)NULL; 215 216 for (p = t->prl; p; p = p->next) 217 if (p->addr == addr) 218 break; 219 return p; 220 221 } 222 223 static int ipip6_tunnel_get_prl(struct ip_tunnel *t, 224 struct ip_tunnel_prl __user *a) 225 { 226 struct ip_tunnel_prl kprl, *kp; 227 struct ip_tunnel_prl_entry *prl; 228 unsigned int cmax, c = 0, ca, len; 229 int ret = 0; 230 231 if (copy_from_user(&kprl, a, sizeof(kprl))) 232 return -EFAULT; 233 cmax = kprl.datalen / sizeof(kprl); 234 if (cmax > 1 && kprl.addr != htonl(INADDR_ANY)) 235 cmax = 1; 236 237 /* For simple GET or for root users, 238 * we try harder to allocate. 239 */ 240 kp = (cmax <= 1 || capable(CAP_NET_ADMIN)) ? 241 kcalloc(cmax, sizeof(*kp), GFP_KERNEL) : 242 NULL; 243 244 read_lock(&ipip6_lock); 245 246 ca = t->prl_count < cmax ? t->prl_count : cmax; 247 248 if (!kp) { 249 /* We don't try hard to allocate much memory for 250 * non-root users. 251 * For root users, retry allocating enough memory for 252 * the answer. 253 */ 254 kp = kcalloc(ca, sizeof(*kp), GFP_ATOMIC); 255 if (!kp) { 256 ret = -ENOMEM; 257 goto out; 258 } 259 } 260 261 c = 0; 262 for (prl = t->prl; prl; prl = prl->next) { 263 if (c > cmax) 264 break; 265 if (kprl.addr != htonl(INADDR_ANY) && prl->addr != kprl.addr) 266 continue; 267 kp[c].addr = prl->addr; 268 kp[c].flags = prl->flags; 269 c++; 270 if (kprl.addr != htonl(INADDR_ANY)) 271 break; 272 } 273 out: 274 read_unlock(&ipip6_lock); 275 276 len = sizeof(*kp) * c; 277 ret = 0; 278 if ((len && copy_to_user(a + 1, kp, len)) || put_user(len, &a->datalen)) 279 ret = -EFAULT; 280 281 kfree(kp); 282 283 return ret; 284 } 285 286 static int 287 ipip6_tunnel_add_prl(struct ip_tunnel *t, struct ip_tunnel_prl *a, int chg) 288 { 289 struct ip_tunnel_prl_entry *p; 290 int err = 0; 291 292 if (a->addr == htonl(INADDR_ANY)) 293 return -EINVAL; 294 295 write_lock(&ipip6_lock); 296 297 for (p = t->prl; p; p = p->next) { 298 if (p->addr == a->addr) { 299 if (chg) 300 goto update; 301 err = -EEXIST; 302 goto out; 303 } 304 } 305 306 if (chg) { 307 err = -ENXIO; 308 goto out; 309 } 310 311 p = kzalloc(sizeof(struct ip_tunnel_prl_entry), GFP_KERNEL); 312 if (!p) { 313 err = -ENOBUFS; 314 goto out; 315 } 316 317 p->next = t->prl; 318 t->prl = p; 319 t->prl_count++; 320 update: 321 p->addr = a->addr; 322 p->flags = a->flags; 323 out: 324 write_unlock(&ipip6_lock); 325 return err; 326 } 327 328 static int 329 ipip6_tunnel_del_prl(struct ip_tunnel *t, struct ip_tunnel_prl *a) 330 { 331 struct ip_tunnel_prl_entry *x, **p; 332 int err = 0; 333 334 write_lock(&ipip6_lock); 335 336 if (a && a->addr != htonl(INADDR_ANY)) { 337 for (p = &t->prl; *p; p = &(*p)->next) { 338 if ((*p)->addr == a->addr) { 339 x = *p; 340 *p = x->next; 341 kfree(x); 342 t->prl_count--; 343 goto out; 344 } 345 } 346 err = -ENXIO; 347 } else { 348 while (t->prl) { 349 x = t->prl; 350 t->prl = t->prl->next; 351 kfree(x); 352 t->prl_count--; 353 } 354 } 355 out: 356 write_unlock(&ipip6_lock); 357 return 0; 358 } 359 360 static int 361 isatap_chksrc(struct sk_buff *skb, struct iphdr *iph, struct ip_tunnel *t) 362 { 363 struct ip_tunnel_prl_entry *p; 364 int ok = 1; 365 366 read_lock(&ipip6_lock); 367 p = __ipip6_tunnel_locate_prl(t, iph->saddr); 368 if (p) { 369 if (p->flags & PRL_DEFAULT) 370 skb->ndisc_nodetype = NDISC_NODETYPE_DEFAULT; 371 else 372 skb->ndisc_nodetype = NDISC_NODETYPE_NODEFAULT; 373 } else { 374 struct in6_addr *addr6 = &ipv6_hdr(skb)->saddr; 375 if (ipv6_addr_is_isatap(addr6) && 376 (addr6->s6_addr32[3] == iph->saddr) && 377 ipv6_chk_prefix(addr6, t->dev)) 378 skb->ndisc_nodetype = NDISC_NODETYPE_HOST; 379 else 380 ok = 0; 381 } 382 read_unlock(&ipip6_lock); 383 return ok; 384 } 385 386 static void ipip6_tunnel_uninit(struct net_device *dev) 387 { 388 struct net *net = dev_net(dev); 389 struct sit_net *sitn = net_generic(net, sit_net_id); 390 391 if (dev == sitn->fb_tunnel_dev) { 392 write_lock_bh(&ipip6_lock); 393 sitn->tunnels_wc[0] = NULL; 394 write_unlock_bh(&ipip6_lock); 395 dev_put(dev); 396 } else { 397 ipip6_tunnel_unlink(sitn, netdev_priv(dev)); 398 ipip6_tunnel_del_prl(netdev_priv(dev), NULL); 399 dev_put(dev); 400 } 401 } 402 403 404 static int ipip6_err(struct sk_buff *skb, u32 info) 405 { 406 407 /* All the routers (except for Linux) return only 408 8 bytes of packet payload. It means, that precise relaying of 409 ICMP in the real Internet is absolutely infeasible. 410 */ 411 struct iphdr *iph = (struct iphdr*)skb->data; 412 const int type = icmp_hdr(skb)->type; 413 const int code = icmp_hdr(skb)->code; 414 struct ip_tunnel *t; 415 int err; 416 417 switch (type) { 418 default: 419 case ICMP_PARAMETERPROB: 420 return 0; 421 422 case ICMP_DEST_UNREACH: 423 switch (code) { 424 case ICMP_SR_FAILED: 425 case ICMP_PORT_UNREACH: 426 /* Impossible event. */ 427 return 0; 428 case ICMP_FRAG_NEEDED: 429 /* Soft state for pmtu is maintained by IP core. */ 430 return 0; 431 default: 432 /* All others are translated to HOST_UNREACH. 433 rfc2003 contains "deep thoughts" about NET_UNREACH, 434 I believe they are just ether pollution. --ANK 435 */ 436 break; 437 } 438 break; 439 case ICMP_TIME_EXCEEDED: 440 if (code != ICMP_EXC_TTL) 441 return 0; 442 break; 443 } 444 445 err = -ENOENT; 446 447 read_lock(&ipip6_lock); 448 t = ipip6_tunnel_lookup(dev_net(skb->dev), iph->daddr, iph->saddr); 449 if (t == NULL || t->parms.iph.daddr == 0) 450 goto out; 451 452 err = 0; 453 if (t->parms.iph.ttl == 0 && type == ICMP_TIME_EXCEEDED) 454 goto out; 455 456 if (jiffies - t->err_time < IPTUNNEL_ERR_TIMEO) 457 t->err_count++; 458 else 459 t->err_count = 1; 460 t->err_time = jiffies; 461 out: 462 read_unlock(&ipip6_lock); 463 return err; 464 } 465 466 static inline void ipip6_ecn_decapsulate(struct iphdr *iph, struct sk_buff *skb) 467 { 468 if (INET_ECN_is_ce(iph->tos)) 469 IP6_ECN_set_ce(ipv6_hdr(skb)); 470 } 471 472 static int ipip6_rcv(struct sk_buff *skb) 473 { 474 struct iphdr *iph; 475 struct ip_tunnel *tunnel; 476 477 if (!pskb_may_pull(skb, sizeof(struct ipv6hdr))) 478 goto out; 479 480 iph = ip_hdr(skb); 481 482 read_lock(&ipip6_lock); 483 if ((tunnel = ipip6_tunnel_lookup(dev_net(skb->dev), 484 iph->saddr, iph->daddr)) != NULL) { 485 secpath_reset(skb); 486 skb->mac_header = skb->network_header; 487 skb_reset_network_header(skb); 488 IPCB(skb)->flags = 0; 489 skb->protocol = htons(ETH_P_IPV6); 490 skb->pkt_type = PACKET_HOST; 491 492 if ((tunnel->dev->priv_flags & IFF_ISATAP) && 493 !isatap_chksrc(skb, iph, tunnel)) { 494 tunnel->dev->stats.rx_errors++; 495 read_unlock(&ipip6_lock); 496 kfree_skb(skb); 497 return 0; 498 } 499 tunnel->dev->stats.rx_packets++; 500 tunnel->dev->stats.rx_bytes += skb->len; 501 skb->dev = tunnel->dev; 502 dst_release(skb->dst); 503 skb->dst = NULL; 504 nf_reset(skb); 505 ipip6_ecn_decapsulate(iph, skb); 506 netif_rx(skb); 507 read_unlock(&ipip6_lock); 508 return 0; 509 } 510 511 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0); 512 read_unlock(&ipip6_lock); 513 out: 514 kfree_skb(skb); 515 return 0; 516 } 517 518 /* Returns the embedded IPv4 address if the IPv6 address 519 comes from 6to4 (RFC 3056) addr space */ 520 521 static inline __be32 try_6to4(struct in6_addr *v6dst) 522 { 523 __be32 dst = 0; 524 525 if (v6dst->s6_addr16[0] == htons(0x2002)) { 526 /* 6to4 v6 addr has 16 bits prefix, 32 v4addr, 16 SLA, ... */ 527 memcpy(&dst, &v6dst->s6_addr16[1], 4); 528 } 529 return dst; 530 } 531 532 /* 533 * This function assumes it is being called from dev_queue_xmit() 534 * and that skb is filled properly by that function. 535 */ 536 537 static int ipip6_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) 538 { 539 struct ip_tunnel *tunnel = netdev_priv(dev); 540 struct net_device_stats *stats = &tunnel->dev->stats; 541 struct iphdr *tiph = &tunnel->parms.iph; 542 struct ipv6hdr *iph6 = ipv6_hdr(skb); 543 u8 tos = tunnel->parms.iph.tos; 544 struct rtable *rt; /* Route to the other host */ 545 struct net_device *tdev; /* Device to other host */ 546 struct iphdr *iph; /* Our new IP header */ 547 unsigned int max_headroom; /* The extra header space needed */ 548 __be32 dst = tiph->daddr; 549 int mtu; 550 struct in6_addr *addr6; 551 int addr_type; 552 553 if (tunnel->recursion++) { 554 stats->collisions++; 555 goto tx_error; 556 } 557 558 if (skb->protocol != htons(ETH_P_IPV6)) 559 goto tx_error; 560 561 /* ISATAP (RFC4214) - must come before 6to4 */ 562 if (dev->priv_flags & IFF_ISATAP) { 563 struct neighbour *neigh = NULL; 564 565 if (skb->dst) 566 neigh = skb->dst->neighbour; 567 568 if (neigh == NULL) { 569 if (net_ratelimit()) 570 printk(KERN_DEBUG "sit: nexthop == NULL\n"); 571 goto tx_error; 572 } 573 574 addr6 = (struct in6_addr*)&neigh->primary_key; 575 addr_type = ipv6_addr_type(addr6); 576 577 if ((addr_type & IPV6_ADDR_UNICAST) && 578 ipv6_addr_is_isatap(addr6)) 579 dst = addr6->s6_addr32[3]; 580 else 581 goto tx_error; 582 } 583 584 if (!dst) 585 dst = try_6to4(&iph6->daddr); 586 587 if (!dst) { 588 struct neighbour *neigh = NULL; 589 590 if (skb->dst) 591 neigh = skb->dst->neighbour; 592 593 if (neigh == NULL) { 594 if (net_ratelimit()) 595 printk(KERN_DEBUG "sit: nexthop == NULL\n"); 596 goto tx_error; 597 } 598 599 addr6 = (struct in6_addr*)&neigh->primary_key; 600 addr_type = ipv6_addr_type(addr6); 601 602 if (addr_type == IPV6_ADDR_ANY) { 603 addr6 = &ipv6_hdr(skb)->daddr; 604 addr_type = ipv6_addr_type(addr6); 605 } 606 607 if ((addr_type & IPV6_ADDR_COMPATv4) == 0) 608 goto tx_error_icmp; 609 610 dst = addr6->s6_addr32[3]; 611 } 612 613 { 614 struct flowi fl = { .nl_u = { .ip4_u = 615 { .daddr = dst, 616 .saddr = tiph->saddr, 617 .tos = RT_TOS(tos) } }, 618 .oif = tunnel->parms.link, 619 .proto = IPPROTO_IPV6 }; 620 if (ip_route_output_key(dev_net(dev), &rt, &fl)) { 621 stats->tx_carrier_errors++; 622 goto tx_error_icmp; 623 } 624 } 625 if (rt->rt_type != RTN_UNICAST) { 626 ip_rt_put(rt); 627 stats->tx_carrier_errors++; 628 goto tx_error_icmp; 629 } 630 tdev = rt->u.dst.dev; 631 632 if (tdev == dev) { 633 ip_rt_put(rt); 634 stats->collisions++; 635 goto tx_error; 636 } 637 638 if (tiph->frag_off) 639 mtu = dst_mtu(&rt->u.dst) - sizeof(struct iphdr); 640 else 641 mtu = skb->dst ? dst_mtu(skb->dst) : dev->mtu; 642 643 if (mtu < 68) { 644 stats->collisions++; 645 ip_rt_put(rt); 646 goto tx_error; 647 } 648 if (mtu < IPV6_MIN_MTU) 649 mtu = IPV6_MIN_MTU; 650 if (tunnel->parms.iph.daddr && skb->dst) 651 skb->dst->ops->update_pmtu(skb->dst, mtu); 652 653 if (skb->len > mtu) { 654 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, dev); 655 ip_rt_put(rt); 656 goto tx_error; 657 } 658 659 if (tunnel->err_count > 0) { 660 if (jiffies - tunnel->err_time < IPTUNNEL_ERR_TIMEO) { 661 tunnel->err_count--; 662 dst_link_failure(skb); 663 } else 664 tunnel->err_count = 0; 665 } 666 667 /* 668 * Okay, now see if we can stuff it in the buffer as-is. 669 */ 670 max_headroom = LL_RESERVED_SPACE(tdev)+sizeof(struct iphdr); 671 672 if (skb_headroom(skb) < max_headroom || skb_shared(skb) || 673 (skb_cloned(skb) && !skb_clone_writable(skb, 0))) { 674 struct sk_buff *new_skb = skb_realloc_headroom(skb, max_headroom); 675 if (!new_skb) { 676 ip_rt_put(rt); 677 stats->tx_dropped++; 678 dev_kfree_skb(skb); 679 tunnel->recursion--; 680 return 0; 681 } 682 if (skb->sk) 683 skb_set_owner_w(new_skb, skb->sk); 684 dev_kfree_skb(skb); 685 skb = new_skb; 686 iph6 = ipv6_hdr(skb); 687 } 688 689 skb->transport_header = skb->network_header; 690 skb_push(skb, sizeof(struct iphdr)); 691 skb_reset_network_header(skb); 692 memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt)); 693 IPCB(skb)->flags = 0; 694 dst_release(skb->dst); 695 skb->dst = &rt->u.dst; 696 697 /* 698 * Push down and install the IPIP header. 699 */ 700 701 iph = ip_hdr(skb); 702 iph->version = 4; 703 iph->ihl = sizeof(struct iphdr)>>2; 704 if (mtu > IPV6_MIN_MTU) 705 iph->frag_off = htons(IP_DF); 706 else 707 iph->frag_off = 0; 708 709 iph->protocol = IPPROTO_IPV6; 710 iph->tos = INET_ECN_encapsulate(tos, ipv6_get_dsfield(iph6)); 711 iph->daddr = rt->rt_dst; 712 iph->saddr = rt->rt_src; 713 714 if ((iph->ttl = tiph->ttl) == 0) 715 iph->ttl = iph6->hop_limit; 716 717 nf_reset(skb); 718 719 IPTUNNEL_XMIT(); 720 tunnel->recursion--; 721 return 0; 722 723 tx_error_icmp: 724 dst_link_failure(skb); 725 tx_error: 726 stats->tx_errors++; 727 dev_kfree_skb(skb); 728 tunnel->recursion--; 729 return 0; 730 } 731 732 static void ipip6_tunnel_bind_dev(struct net_device *dev) 733 { 734 struct net_device *tdev = NULL; 735 struct ip_tunnel *tunnel; 736 struct iphdr *iph; 737 738 tunnel = netdev_priv(dev); 739 iph = &tunnel->parms.iph; 740 741 if (iph->daddr) { 742 struct flowi fl = { .nl_u = { .ip4_u = 743 { .daddr = iph->daddr, 744 .saddr = iph->saddr, 745 .tos = RT_TOS(iph->tos) } }, 746 .oif = tunnel->parms.link, 747 .proto = IPPROTO_IPV6 }; 748 struct rtable *rt; 749 if (!ip_route_output_key(dev_net(dev), &rt, &fl)) { 750 tdev = rt->u.dst.dev; 751 ip_rt_put(rt); 752 } 753 dev->flags |= IFF_POINTOPOINT; 754 } 755 756 if (!tdev && tunnel->parms.link) 757 tdev = __dev_get_by_index(dev_net(dev), tunnel->parms.link); 758 759 if (tdev) { 760 dev->hard_header_len = tdev->hard_header_len + sizeof(struct iphdr); 761 dev->mtu = tdev->mtu - sizeof(struct iphdr); 762 if (dev->mtu < IPV6_MIN_MTU) 763 dev->mtu = IPV6_MIN_MTU; 764 } 765 dev->iflink = tunnel->parms.link; 766 } 767 768 static int 769 ipip6_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd) 770 { 771 int err = 0; 772 struct ip_tunnel_parm p; 773 struct ip_tunnel_prl prl; 774 struct ip_tunnel *t; 775 struct net *net = dev_net(dev); 776 struct sit_net *sitn = net_generic(net, sit_net_id); 777 778 switch (cmd) { 779 case SIOCGETTUNNEL: 780 t = NULL; 781 if (dev == sitn->fb_tunnel_dev) { 782 if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) { 783 err = -EFAULT; 784 break; 785 } 786 t = ipip6_tunnel_locate(net, &p, 0); 787 } 788 if (t == NULL) 789 t = netdev_priv(dev); 790 memcpy(&p, &t->parms, sizeof(p)); 791 if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p))) 792 err = -EFAULT; 793 break; 794 795 case SIOCADDTUNNEL: 796 case SIOCCHGTUNNEL: 797 err = -EPERM; 798 if (!capable(CAP_NET_ADMIN)) 799 goto done; 800 801 err = -EFAULT; 802 if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) 803 goto done; 804 805 err = -EINVAL; 806 if (p.iph.version != 4 || p.iph.protocol != IPPROTO_IPV6 || 807 p.iph.ihl != 5 || (p.iph.frag_off&htons(~IP_DF))) 808 goto done; 809 if (p.iph.ttl) 810 p.iph.frag_off |= htons(IP_DF); 811 812 t = ipip6_tunnel_locate(net, &p, cmd == SIOCADDTUNNEL); 813 814 if (dev != sitn->fb_tunnel_dev && cmd == SIOCCHGTUNNEL) { 815 if (t != NULL) { 816 if (t->dev != dev) { 817 err = -EEXIST; 818 break; 819 } 820 } else { 821 if (((dev->flags&IFF_POINTOPOINT) && !p.iph.daddr) || 822 (!(dev->flags&IFF_POINTOPOINT) && p.iph.daddr)) { 823 err = -EINVAL; 824 break; 825 } 826 t = netdev_priv(dev); 827 ipip6_tunnel_unlink(sitn, t); 828 t->parms.iph.saddr = p.iph.saddr; 829 t->parms.iph.daddr = p.iph.daddr; 830 memcpy(dev->dev_addr, &p.iph.saddr, 4); 831 memcpy(dev->broadcast, &p.iph.daddr, 4); 832 ipip6_tunnel_link(sitn, t); 833 netdev_state_change(dev); 834 } 835 } 836 837 if (t) { 838 err = 0; 839 if (cmd == SIOCCHGTUNNEL) { 840 t->parms.iph.ttl = p.iph.ttl; 841 t->parms.iph.tos = p.iph.tos; 842 if (t->parms.link != p.link) { 843 t->parms.link = p.link; 844 ipip6_tunnel_bind_dev(dev); 845 netdev_state_change(dev); 846 } 847 } 848 if (copy_to_user(ifr->ifr_ifru.ifru_data, &t->parms, sizeof(p))) 849 err = -EFAULT; 850 } else 851 err = (cmd == SIOCADDTUNNEL ? -ENOBUFS : -ENOENT); 852 break; 853 854 case SIOCDELTUNNEL: 855 err = -EPERM; 856 if (!capable(CAP_NET_ADMIN)) 857 goto done; 858 859 if (dev == sitn->fb_tunnel_dev) { 860 err = -EFAULT; 861 if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) 862 goto done; 863 err = -ENOENT; 864 if ((t = ipip6_tunnel_locate(net, &p, 0)) == NULL) 865 goto done; 866 err = -EPERM; 867 if (t == netdev_priv(sitn->fb_tunnel_dev)) 868 goto done; 869 dev = t->dev; 870 } 871 unregister_netdevice(dev); 872 err = 0; 873 break; 874 875 case SIOCGETPRL: 876 err = -EINVAL; 877 if (dev == sitn->fb_tunnel_dev) 878 goto done; 879 err = -ENOENT; 880 if (!(t = netdev_priv(dev))) 881 goto done; 882 err = ipip6_tunnel_get_prl(t, ifr->ifr_ifru.ifru_data); 883 break; 884 885 case SIOCADDPRL: 886 case SIOCDELPRL: 887 case SIOCCHGPRL: 888 err = -EPERM; 889 if (!capable(CAP_NET_ADMIN)) 890 goto done; 891 err = -EINVAL; 892 if (dev == sitn->fb_tunnel_dev) 893 goto done; 894 err = -EFAULT; 895 if (copy_from_user(&prl, ifr->ifr_ifru.ifru_data, sizeof(prl))) 896 goto done; 897 err = -ENOENT; 898 if (!(t = netdev_priv(dev))) 899 goto done; 900 901 switch (cmd) { 902 case SIOCDELPRL: 903 err = ipip6_tunnel_del_prl(t, &prl); 904 break; 905 case SIOCADDPRL: 906 case SIOCCHGPRL: 907 err = ipip6_tunnel_add_prl(t, &prl, cmd == SIOCCHGPRL); 908 break; 909 } 910 netdev_state_change(dev); 911 break; 912 913 default: 914 err = -EINVAL; 915 } 916 917 done: 918 return err; 919 } 920 921 static int ipip6_tunnel_change_mtu(struct net_device *dev, int new_mtu) 922 { 923 if (new_mtu < IPV6_MIN_MTU || new_mtu > 0xFFF8 - sizeof(struct iphdr)) 924 return -EINVAL; 925 dev->mtu = new_mtu; 926 return 0; 927 } 928 929 static void ipip6_tunnel_setup(struct net_device *dev) 930 { 931 dev->uninit = ipip6_tunnel_uninit; 932 dev->destructor = free_netdev; 933 dev->hard_start_xmit = ipip6_tunnel_xmit; 934 dev->do_ioctl = ipip6_tunnel_ioctl; 935 dev->change_mtu = ipip6_tunnel_change_mtu; 936 937 dev->type = ARPHRD_SIT; 938 dev->hard_header_len = LL_MAX_HEADER + sizeof(struct iphdr); 939 dev->mtu = ETH_DATA_LEN - sizeof(struct iphdr); 940 dev->flags = IFF_NOARP; 941 dev->iflink = 0; 942 dev->addr_len = 4; 943 dev->features |= NETIF_F_NETNS_LOCAL; 944 } 945 946 static int ipip6_tunnel_init(struct net_device *dev) 947 { 948 struct ip_tunnel *tunnel; 949 950 tunnel = netdev_priv(dev); 951 952 tunnel->dev = dev; 953 strcpy(tunnel->parms.name, dev->name); 954 955 memcpy(dev->dev_addr, &tunnel->parms.iph.saddr, 4); 956 memcpy(dev->broadcast, &tunnel->parms.iph.daddr, 4); 957 958 ipip6_tunnel_bind_dev(dev); 959 960 return 0; 961 } 962 963 static int ipip6_fb_tunnel_init(struct net_device *dev) 964 { 965 struct ip_tunnel *tunnel = netdev_priv(dev); 966 struct iphdr *iph = &tunnel->parms.iph; 967 struct net *net = dev_net(dev); 968 struct sit_net *sitn = net_generic(net, sit_net_id); 969 970 tunnel->dev = dev; 971 strcpy(tunnel->parms.name, dev->name); 972 973 iph->version = 4; 974 iph->protocol = IPPROTO_IPV6; 975 iph->ihl = 5; 976 iph->ttl = 64; 977 978 dev_hold(dev); 979 sitn->tunnels_wc[0] = tunnel; 980 return 0; 981 } 982 983 static struct xfrm_tunnel sit_handler = { 984 .handler = ipip6_rcv, 985 .err_handler = ipip6_err, 986 .priority = 1, 987 }; 988 989 static void sit_destroy_tunnels(struct sit_net *sitn) 990 { 991 int prio; 992 993 for (prio = 1; prio < 4; prio++) { 994 int h; 995 for (h = 0; h < HASH_SIZE; h++) { 996 struct ip_tunnel *t; 997 while ((t = sitn->tunnels[prio][h]) != NULL) 998 unregister_netdevice(t->dev); 999 } 1000 } 1001 } 1002 1003 static int sit_init_net(struct net *net) 1004 { 1005 int err; 1006 struct sit_net *sitn; 1007 1008 err = -ENOMEM; 1009 sitn = kzalloc(sizeof(struct sit_net), GFP_KERNEL); 1010 if (sitn == NULL) 1011 goto err_alloc; 1012 1013 err = net_assign_generic(net, sit_net_id, sitn); 1014 if (err < 0) 1015 goto err_assign; 1016 1017 sitn->tunnels[0] = sitn->tunnels_wc; 1018 sitn->tunnels[1] = sitn->tunnels_l; 1019 sitn->tunnels[2] = sitn->tunnels_r; 1020 sitn->tunnels[3] = sitn->tunnels_r_l; 1021 1022 sitn->fb_tunnel_dev = alloc_netdev(sizeof(struct ip_tunnel), "sit0", 1023 ipip6_tunnel_setup); 1024 if (!sitn->fb_tunnel_dev) { 1025 err = -ENOMEM; 1026 goto err_alloc_dev; 1027 } 1028 1029 sitn->fb_tunnel_dev->init = ipip6_fb_tunnel_init; 1030 dev_net_set(sitn->fb_tunnel_dev, net); 1031 1032 if ((err = register_netdev(sitn->fb_tunnel_dev))) 1033 goto err_reg_dev; 1034 1035 return 0; 1036 1037 err_reg_dev: 1038 free_netdev(sitn->fb_tunnel_dev); 1039 err_alloc_dev: 1040 /* nothing */ 1041 err_assign: 1042 kfree(sitn); 1043 err_alloc: 1044 return err; 1045 } 1046 1047 static void sit_exit_net(struct net *net) 1048 { 1049 struct sit_net *sitn; 1050 1051 sitn = net_generic(net, sit_net_id); 1052 rtnl_lock(); 1053 sit_destroy_tunnels(sitn); 1054 unregister_netdevice(sitn->fb_tunnel_dev); 1055 rtnl_unlock(); 1056 kfree(sitn); 1057 } 1058 1059 static struct pernet_operations sit_net_ops = { 1060 .init = sit_init_net, 1061 .exit = sit_exit_net, 1062 }; 1063 1064 static void __exit sit_cleanup(void) 1065 { 1066 xfrm4_tunnel_deregister(&sit_handler, AF_INET6); 1067 1068 unregister_pernet_gen_device(sit_net_id, &sit_net_ops); 1069 } 1070 1071 static int __init sit_init(void) 1072 { 1073 int err; 1074 1075 printk(KERN_INFO "IPv6 over IPv4 tunneling driver\n"); 1076 1077 if (xfrm4_tunnel_register(&sit_handler, AF_INET6) < 0) { 1078 printk(KERN_INFO "sit init: Can't add protocol\n"); 1079 return -EAGAIN; 1080 } 1081 1082 err = register_pernet_gen_device(&sit_net_id, &sit_net_ops); 1083 if (err < 0) 1084 xfrm4_tunnel_deregister(&sit_handler, AF_INET6); 1085 1086 return err; 1087 } 1088 1089 module_init(sit_init); 1090 module_exit(sit_cleanup); 1091 MODULE_LICENSE("GPL"); 1092 MODULE_ALIAS("sit0"); 1093