1 /* 2 * TUN - Universal TUN/TAP device driver. 3 * Copyright (C) 1999-2002 Maxim Krasnyansky <maxk@qualcomm.com> 4 * 5 * This program is free software; you can redistribute it and/or modify 6 * it under the terms of the GNU General Public License as published by 7 * the Free Software Foundation; either version 2 of the License, or 8 * (at your option) any later version. 9 * 10 * This program is distributed in the hope that it will be useful, 11 * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 * GNU General Public License for more details. 14 * 15 * $Id: tun.c,v 1.15 2002/03/01 02:44:24 maxk Exp $ 16 */ 17 18 /* 19 * Changes: 20 * 21 * Mike Kershaw <dragorn@kismetwireless.net> 2005/08/14 22 * Add TUNSETLINK ioctl to set the link encapsulation 23 * 24 * Mark Smith <markzzzsmith@yahoo.com.au> 25 * Use random_ether_addr() for tap MAC address. 26 * 27 * Harald Roelle <harald.roelle@ifi.lmu.de> 2004/04/20 28 * Fixes in packet dropping, queue length setting and queue wakeup. 29 * Increased default tx queue length. 30 * Added ethtool API. 31 * Minor cleanups 32 * 33 * Daniel Podlejski <underley@underley.eu.org> 34 * Modifications for 2.3.99-pre5 kernel. 35 */ 36 37 #define DRV_NAME "tun" 38 #define DRV_VERSION "1.6" 39 #define DRV_DESCRIPTION "Universal TUN/TAP device driver" 40 #define DRV_COPYRIGHT "(C) 1999-2004 Max Krasnyansky <maxk@qualcomm.com>" 41 42 #include <linux/module.h> 43 #include <linux/errno.h> 44 #include <linux/kernel.h> 45 #include <linux/major.h> 46 #include <linux/slab.h> 47 #include <linux/smp_lock.h> 48 #include <linux/poll.h> 49 #include <linux/fcntl.h> 50 #include <linux/init.h> 51 #include <linux/skbuff.h> 52 #include <linux/netdevice.h> 53 #include <linux/etherdevice.h> 54 #include <linux/miscdevice.h> 55 #include <linux/ethtool.h> 56 #include <linux/rtnetlink.h> 57 #include <linux/if.h> 58 #include <linux/if_arp.h> 59 #include <linux/if_ether.h> 60 #include <linux/if_tun.h> 61 #include <linux/crc32.h> 62 #include <linux/nsproxy.h> 63 #include <linux/virtio_net.h> 64 #include <net/net_namespace.h> 65 #include <net/netns/generic.h> 66 67 #include <asm/system.h> 68 #include <asm/uaccess.h> 69 70 /* Uncomment to enable debugging */ 71 /* #define TUN_DEBUG 1 */ 72 73 #ifdef TUN_DEBUG 74 static int debug; 75 76 #define DBG if(tun->debug)printk 77 #define DBG1 if(debug==2)printk 78 #else 79 #define DBG( a... ) 80 #define DBG1( a... ) 81 #endif 82 83 #define FLT_EXACT_COUNT 8 84 struct tap_filter { 85 unsigned int count; /* Number of addrs. Zero means disabled */ 86 u32 mask[2]; /* Mask of the hashed addrs */ 87 unsigned char addr[FLT_EXACT_COUNT][ETH_ALEN]; 88 }; 89 90 struct tun_struct { 91 unsigned int flags; 92 int attached; 93 uid_t owner; 94 gid_t group; 95 96 wait_queue_head_t read_wait; 97 struct sk_buff_head readq; 98 99 struct net_device *dev; 100 struct fasync_struct *fasync; 101 102 struct tap_filter txflt; 103 104 #ifdef TUN_DEBUG 105 int debug; 106 #endif 107 }; 108 109 static int tun_attach(struct tun_struct *tun, struct file *file) 110 { 111 const struct cred *cred = current_cred(); 112 113 ASSERT_RTNL(); 114 115 if (file->private_data) 116 return -EINVAL; 117 118 if (tun->attached) 119 return -EBUSY; 120 121 /* Check permissions */ 122 if (((tun->owner != -1 && cred->euid != tun->owner) || 123 (tun->group != -1 && cred->egid != tun->group)) && 124 !capable(CAP_NET_ADMIN)) 125 return -EPERM; 126 127 file->private_data = tun; 128 tun->attached = 1; 129 get_net(dev_net(tun->dev)); 130 131 return 0; 132 } 133 134 /* TAP filterting */ 135 static void addr_hash_set(u32 *mask, const u8 *addr) 136 { 137 int n = ether_crc(ETH_ALEN, addr) >> 26; 138 mask[n >> 5] |= (1 << (n & 31)); 139 } 140 141 static unsigned int addr_hash_test(const u32 *mask, const u8 *addr) 142 { 143 int n = ether_crc(ETH_ALEN, addr) >> 26; 144 return mask[n >> 5] & (1 << (n & 31)); 145 } 146 147 static int update_filter(struct tap_filter *filter, void __user *arg) 148 { 149 struct { u8 u[ETH_ALEN]; } *addr; 150 struct tun_filter uf; 151 int err, alen, n, nexact; 152 153 if (copy_from_user(&uf, arg, sizeof(uf))) 154 return -EFAULT; 155 156 if (!uf.count) { 157 /* Disabled */ 158 filter->count = 0; 159 return 0; 160 } 161 162 alen = ETH_ALEN * uf.count; 163 addr = kmalloc(alen, GFP_KERNEL); 164 if (!addr) 165 return -ENOMEM; 166 167 if (copy_from_user(addr, arg + sizeof(uf), alen)) { 168 err = -EFAULT; 169 goto done; 170 } 171 172 /* The filter is updated without holding any locks. Which is 173 * perfectly safe. We disable it first and in the worst 174 * case we'll accept a few undesired packets. */ 175 filter->count = 0; 176 wmb(); 177 178 /* Use first set of addresses as an exact filter */ 179 for (n = 0; n < uf.count && n < FLT_EXACT_COUNT; n++) 180 memcpy(filter->addr[n], addr[n].u, ETH_ALEN); 181 182 nexact = n; 183 184 /* The rest is hashed */ 185 memset(filter->mask, 0, sizeof(filter->mask)); 186 for (; n < uf.count; n++) 187 addr_hash_set(filter->mask, addr[n].u); 188 189 /* For ALLMULTI just set the mask to all ones. 190 * This overrides the mask populated above. */ 191 if ((uf.flags & TUN_FLT_ALLMULTI)) 192 memset(filter->mask, ~0, sizeof(filter->mask)); 193 194 /* Now enable the filter */ 195 wmb(); 196 filter->count = nexact; 197 198 /* Return the number of exact filters */ 199 err = nexact; 200 201 done: 202 kfree(addr); 203 return err; 204 } 205 206 /* Returns: 0 - drop, !=0 - accept */ 207 static int run_filter(struct tap_filter *filter, const struct sk_buff *skb) 208 { 209 /* Cannot use eth_hdr(skb) here because skb_mac_hdr() is incorrect 210 * at this point. */ 211 struct ethhdr *eh = (struct ethhdr *) skb->data; 212 int i; 213 214 /* Exact match */ 215 for (i = 0; i < filter->count; i++) 216 if (!compare_ether_addr(eh->h_dest, filter->addr[i])) 217 return 1; 218 219 /* Inexact match (multicast only) */ 220 if (is_multicast_ether_addr(eh->h_dest)) 221 return addr_hash_test(filter->mask, eh->h_dest); 222 223 return 0; 224 } 225 226 /* 227 * Checks whether the packet is accepted or not. 228 * Returns: 0 - drop, !=0 - accept 229 */ 230 static int check_filter(struct tap_filter *filter, const struct sk_buff *skb) 231 { 232 if (!filter->count) 233 return 1; 234 235 return run_filter(filter, skb); 236 } 237 238 /* Network device part of the driver */ 239 240 static const struct ethtool_ops tun_ethtool_ops; 241 242 /* Net device open. */ 243 static int tun_net_open(struct net_device *dev) 244 { 245 netif_start_queue(dev); 246 return 0; 247 } 248 249 /* Net device close. */ 250 static int tun_net_close(struct net_device *dev) 251 { 252 netif_stop_queue(dev); 253 return 0; 254 } 255 256 /* Net device start xmit */ 257 static int tun_net_xmit(struct sk_buff *skb, struct net_device *dev) 258 { 259 struct tun_struct *tun = netdev_priv(dev); 260 261 DBG(KERN_INFO "%s: tun_net_xmit %d\n", tun->dev->name, skb->len); 262 263 /* Drop packet if interface is not attached */ 264 if (!tun->attached) 265 goto drop; 266 267 /* Drop if the filter does not like it. 268 * This is a noop if the filter is disabled. 269 * Filter can be enabled only for the TAP devices. */ 270 if (!check_filter(&tun->txflt, skb)) 271 goto drop; 272 273 if (skb_queue_len(&tun->readq) >= dev->tx_queue_len) { 274 if (!(tun->flags & TUN_ONE_QUEUE)) { 275 /* Normal queueing mode. */ 276 /* Packet scheduler handles dropping of further packets. */ 277 netif_stop_queue(dev); 278 279 /* We won't see all dropped packets individually, so overrun 280 * error is more appropriate. */ 281 dev->stats.tx_fifo_errors++; 282 } else { 283 /* Single queue mode. 284 * Driver handles dropping of all packets itself. */ 285 goto drop; 286 } 287 } 288 289 /* Enqueue packet */ 290 skb_queue_tail(&tun->readq, skb); 291 dev->trans_start = jiffies; 292 293 /* Notify and wake up reader process */ 294 if (tun->flags & TUN_FASYNC) 295 kill_fasync(&tun->fasync, SIGIO, POLL_IN); 296 wake_up_interruptible(&tun->read_wait); 297 return 0; 298 299 drop: 300 dev->stats.tx_dropped++; 301 kfree_skb(skb); 302 return 0; 303 } 304 305 static void tun_net_mclist(struct net_device *dev) 306 { 307 /* 308 * This callback is supposed to deal with mc filter in 309 * _rx_ path and has nothing to do with the _tx_ path. 310 * In rx path we always accept everything userspace gives us. 311 */ 312 return; 313 } 314 315 #define MIN_MTU 68 316 #define MAX_MTU 65535 317 318 static int 319 tun_net_change_mtu(struct net_device *dev, int new_mtu) 320 { 321 if (new_mtu < MIN_MTU || new_mtu + dev->hard_header_len > MAX_MTU) 322 return -EINVAL; 323 dev->mtu = new_mtu; 324 return 0; 325 } 326 327 static const struct net_device_ops tun_netdev_ops = { 328 .ndo_open = tun_net_open, 329 .ndo_stop = tun_net_close, 330 .ndo_start_xmit = tun_net_xmit, 331 .ndo_change_mtu = tun_net_change_mtu, 332 }; 333 334 static const struct net_device_ops tap_netdev_ops = { 335 .ndo_open = tun_net_open, 336 .ndo_stop = tun_net_close, 337 .ndo_start_xmit = tun_net_xmit, 338 .ndo_change_mtu = tun_net_change_mtu, 339 .ndo_set_multicast_list = tun_net_mclist, 340 .ndo_set_mac_address = eth_mac_addr, 341 .ndo_validate_addr = eth_validate_addr, 342 }; 343 344 /* Initialize net device. */ 345 static void tun_net_init(struct net_device *dev) 346 { 347 struct tun_struct *tun = netdev_priv(dev); 348 349 switch (tun->flags & TUN_TYPE_MASK) { 350 case TUN_TUN_DEV: 351 dev->netdev_ops = &tun_netdev_ops; 352 353 /* Point-to-Point TUN Device */ 354 dev->hard_header_len = 0; 355 dev->addr_len = 0; 356 dev->mtu = 1500; 357 358 /* Zero header length */ 359 dev->type = ARPHRD_NONE; 360 dev->flags = IFF_POINTOPOINT | IFF_NOARP | IFF_MULTICAST; 361 dev->tx_queue_len = TUN_READQ_SIZE; /* We prefer our own queue length */ 362 break; 363 364 case TUN_TAP_DEV: 365 dev->netdev_ops = &tap_netdev_ops; 366 /* Ethernet TAP Device */ 367 ether_setup(dev); 368 369 random_ether_addr(dev->dev_addr); 370 371 dev->tx_queue_len = TUN_READQ_SIZE; /* We prefer our own queue length */ 372 break; 373 } 374 } 375 376 /* Character device part */ 377 378 /* Poll */ 379 static unsigned int tun_chr_poll(struct file *file, poll_table * wait) 380 { 381 struct tun_struct *tun = file->private_data; 382 unsigned int mask = POLLOUT | POLLWRNORM; 383 384 if (!tun) 385 return POLLERR; 386 387 DBG(KERN_INFO "%s: tun_chr_poll\n", tun->dev->name); 388 389 poll_wait(file, &tun->read_wait, wait); 390 391 if (!skb_queue_empty(&tun->readq)) 392 mask |= POLLIN | POLLRDNORM; 393 394 return mask; 395 } 396 397 /* prepad is the amount to reserve at front. len is length after that. 398 * linear is a hint as to how much to copy (usually headers). */ 399 static struct sk_buff *tun_alloc_skb(size_t prepad, size_t len, size_t linear, 400 gfp_t gfp) 401 { 402 struct sk_buff *skb; 403 unsigned int i; 404 405 skb = alloc_skb(prepad + len, gfp|__GFP_NOWARN); 406 if (skb) { 407 skb_reserve(skb, prepad); 408 skb_put(skb, len); 409 return skb; 410 } 411 412 /* Under a page? Don't bother with paged skb. */ 413 if (prepad + len < PAGE_SIZE) 414 return NULL; 415 416 /* Start with a normal skb, and add pages. */ 417 skb = alloc_skb(prepad + linear, gfp); 418 if (!skb) 419 return NULL; 420 421 skb_reserve(skb, prepad); 422 skb_put(skb, linear); 423 424 len -= linear; 425 426 for (i = 0; i < MAX_SKB_FRAGS; i++) { 427 skb_frag_t *f = &skb_shinfo(skb)->frags[i]; 428 429 f->page = alloc_page(gfp|__GFP_ZERO); 430 if (!f->page) 431 break; 432 433 f->page_offset = 0; 434 f->size = PAGE_SIZE; 435 436 skb->data_len += PAGE_SIZE; 437 skb->len += PAGE_SIZE; 438 skb->truesize += PAGE_SIZE; 439 skb_shinfo(skb)->nr_frags++; 440 441 if (len < PAGE_SIZE) { 442 len = 0; 443 break; 444 } 445 len -= PAGE_SIZE; 446 } 447 448 /* Too large, or alloc fail? */ 449 if (unlikely(len)) { 450 kfree_skb(skb); 451 skb = NULL; 452 } 453 454 return skb; 455 } 456 457 /* Get packet from user space buffer */ 458 static __inline__ ssize_t tun_get_user(struct tun_struct *tun, struct iovec *iv, size_t count) 459 { 460 struct tun_pi pi = { 0, __constant_htons(ETH_P_IP) }; 461 struct sk_buff *skb; 462 size_t len = count, align = 0; 463 struct virtio_net_hdr gso = { 0 }; 464 465 if (!(tun->flags & TUN_NO_PI)) { 466 if ((len -= sizeof(pi)) > count) 467 return -EINVAL; 468 469 if(memcpy_fromiovec((void *)&pi, iv, sizeof(pi))) 470 return -EFAULT; 471 } 472 473 if (tun->flags & TUN_VNET_HDR) { 474 if ((len -= sizeof(gso)) > count) 475 return -EINVAL; 476 477 if (memcpy_fromiovec((void *)&gso, iv, sizeof(gso))) 478 return -EFAULT; 479 480 if (gso.hdr_len > len) 481 return -EINVAL; 482 } 483 484 if ((tun->flags & TUN_TYPE_MASK) == TUN_TAP_DEV) { 485 align = NET_IP_ALIGN; 486 if (unlikely(len < ETH_HLEN)) 487 return -EINVAL; 488 } 489 490 if (!(skb = tun_alloc_skb(align, len, gso.hdr_len, GFP_KERNEL))) { 491 tun->dev->stats.rx_dropped++; 492 return -ENOMEM; 493 } 494 495 if (skb_copy_datagram_from_iovec(skb, 0, iv, len)) { 496 tun->dev->stats.rx_dropped++; 497 kfree_skb(skb); 498 return -EFAULT; 499 } 500 501 if (gso.flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) { 502 if (!skb_partial_csum_set(skb, gso.csum_start, 503 gso.csum_offset)) { 504 tun->dev->stats.rx_frame_errors++; 505 kfree_skb(skb); 506 return -EINVAL; 507 } 508 } else if (tun->flags & TUN_NOCHECKSUM) 509 skb->ip_summed = CHECKSUM_UNNECESSARY; 510 511 switch (tun->flags & TUN_TYPE_MASK) { 512 case TUN_TUN_DEV: 513 if (tun->flags & TUN_NO_PI) { 514 switch (skb->data[0] & 0xf0) { 515 case 0x40: 516 pi.proto = htons(ETH_P_IP); 517 break; 518 case 0x60: 519 pi.proto = htons(ETH_P_IPV6); 520 break; 521 default: 522 tun->dev->stats.rx_dropped++; 523 kfree_skb(skb); 524 return -EINVAL; 525 } 526 } 527 528 skb_reset_mac_header(skb); 529 skb->protocol = pi.proto; 530 skb->dev = tun->dev; 531 break; 532 case TUN_TAP_DEV: 533 skb->protocol = eth_type_trans(skb, tun->dev); 534 break; 535 }; 536 537 if (gso.gso_type != VIRTIO_NET_HDR_GSO_NONE) { 538 pr_debug("GSO!\n"); 539 switch (gso.gso_type & ~VIRTIO_NET_HDR_GSO_ECN) { 540 case VIRTIO_NET_HDR_GSO_TCPV4: 541 skb_shinfo(skb)->gso_type = SKB_GSO_TCPV4; 542 break; 543 case VIRTIO_NET_HDR_GSO_TCPV6: 544 skb_shinfo(skb)->gso_type = SKB_GSO_TCPV6; 545 break; 546 default: 547 tun->dev->stats.rx_frame_errors++; 548 kfree_skb(skb); 549 return -EINVAL; 550 } 551 552 if (gso.gso_type & VIRTIO_NET_HDR_GSO_ECN) 553 skb_shinfo(skb)->gso_type |= SKB_GSO_TCP_ECN; 554 555 skb_shinfo(skb)->gso_size = gso.gso_size; 556 if (skb_shinfo(skb)->gso_size == 0) { 557 tun->dev->stats.rx_frame_errors++; 558 kfree_skb(skb); 559 return -EINVAL; 560 } 561 562 /* Header must be checked, and gso_segs computed. */ 563 skb_shinfo(skb)->gso_type |= SKB_GSO_DODGY; 564 skb_shinfo(skb)->gso_segs = 0; 565 } 566 567 netif_rx_ni(skb); 568 569 tun->dev->stats.rx_packets++; 570 tun->dev->stats.rx_bytes += len; 571 572 return count; 573 } 574 575 static ssize_t tun_chr_aio_write(struct kiocb *iocb, const struct iovec *iv, 576 unsigned long count, loff_t pos) 577 { 578 struct tun_struct *tun = iocb->ki_filp->private_data; 579 580 if (!tun) 581 return -EBADFD; 582 583 DBG(KERN_INFO "%s: tun_chr_write %ld\n", tun->dev->name, count); 584 585 return tun_get_user(tun, (struct iovec *) iv, iov_length(iv, count)); 586 } 587 588 /* Put packet to the user space buffer */ 589 static __inline__ ssize_t tun_put_user(struct tun_struct *tun, 590 struct sk_buff *skb, 591 struct iovec *iv, int len) 592 { 593 struct tun_pi pi = { 0, skb->protocol }; 594 ssize_t total = 0; 595 596 if (!(tun->flags & TUN_NO_PI)) { 597 if ((len -= sizeof(pi)) < 0) 598 return -EINVAL; 599 600 if (len < skb->len) { 601 /* Packet will be striped */ 602 pi.flags |= TUN_PKT_STRIP; 603 } 604 605 if (memcpy_toiovec(iv, (void *) &pi, sizeof(pi))) 606 return -EFAULT; 607 total += sizeof(pi); 608 } 609 610 if (tun->flags & TUN_VNET_HDR) { 611 struct virtio_net_hdr gso = { 0 }; /* no info leak */ 612 if ((len -= sizeof(gso)) < 0) 613 return -EINVAL; 614 615 if (skb_is_gso(skb)) { 616 struct skb_shared_info *sinfo = skb_shinfo(skb); 617 618 /* This is a hint as to how much should be linear. */ 619 gso.hdr_len = skb_headlen(skb); 620 gso.gso_size = sinfo->gso_size; 621 if (sinfo->gso_type & SKB_GSO_TCPV4) 622 gso.gso_type = VIRTIO_NET_HDR_GSO_TCPV4; 623 else if (sinfo->gso_type & SKB_GSO_TCPV6) 624 gso.gso_type = VIRTIO_NET_HDR_GSO_TCPV6; 625 else 626 BUG(); 627 if (sinfo->gso_type & SKB_GSO_TCP_ECN) 628 gso.gso_type |= VIRTIO_NET_HDR_GSO_ECN; 629 } else 630 gso.gso_type = VIRTIO_NET_HDR_GSO_NONE; 631 632 if (skb->ip_summed == CHECKSUM_PARTIAL) { 633 gso.flags = VIRTIO_NET_HDR_F_NEEDS_CSUM; 634 gso.csum_start = skb->csum_start - skb_headroom(skb); 635 gso.csum_offset = skb->csum_offset; 636 } /* else everything is zero */ 637 638 if (unlikely(memcpy_toiovec(iv, (void *)&gso, sizeof(gso)))) 639 return -EFAULT; 640 total += sizeof(gso); 641 } 642 643 len = min_t(int, skb->len, len); 644 645 skb_copy_datagram_iovec(skb, 0, iv, len); 646 total += len; 647 648 tun->dev->stats.tx_packets++; 649 tun->dev->stats.tx_bytes += len; 650 651 return total; 652 } 653 654 static ssize_t tun_chr_aio_read(struct kiocb *iocb, const struct iovec *iv, 655 unsigned long count, loff_t pos) 656 { 657 struct file *file = iocb->ki_filp; 658 struct tun_struct *tun = file->private_data; 659 DECLARE_WAITQUEUE(wait, current); 660 struct sk_buff *skb; 661 ssize_t len, ret = 0; 662 663 if (!tun) 664 return -EBADFD; 665 666 DBG(KERN_INFO "%s: tun_chr_read\n", tun->dev->name); 667 668 len = iov_length(iv, count); 669 if (len < 0) 670 return -EINVAL; 671 672 add_wait_queue(&tun->read_wait, &wait); 673 while (len) { 674 current->state = TASK_INTERRUPTIBLE; 675 676 /* Read frames from the queue */ 677 if (!(skb=skb_dequeue(&tun->readq))) { 678 if (file->f_flags & O_NONBLOCK) { 679 ret = -EAGAIN; 680 break; 681 } 682 if (signal_pending(current)) { 683 ret = -ERESTARTSYS; 684 break; 685 } 686 687 /* Nothing to read, let's sleep */ 688 schedule(); 689 continue; 690 } 691 netif_wake_queue(tun->dev); 692 693 ret = tun_put_user(tun, skb, (struct iovec *) iv, len); 694 kfree_skb(skb); 695 break; 696 } 697 698 current->state = TASK_RUNNING; 699 remove_wait_queue(&tun->read_wait, &wait); 700 701 return ret; 702 } 703 704 static void tun_setup(struct net_device *dev) 705 { 706 struct tun_struct *tun = netdev_priv(dev); 707 708 skb_queue_head_init(&tun->readq); 709 init_waitqueue_head(&tun->read_wait); 710 711 tun->owner = -1; 712 tun->group = -1; 713 714 dev->ethtool_ops = &tun_ethtool_ops; 715 dev->destructor = free_netdev; 716 dev->features |= NETIF_F_NETNS_LOCAL; 717 } 718 719 static int tun_set_iff(struct net *net, struct file *file, struct ifreq *ifr) 720 { 721 struct tun_struct *tun; 722 struct net_device *dev; 723 int err; 724 725 dev = __dev_get_by_name(net, ifr->ifr_name); 726 if (dev) { 727 if ((ifr->ifr_flags & IFF_TUN) && dev->netdev_ops == &tun_netdev_ops) 728 tun = netdev_priv(dev); 729 else if ((ifr->ifr_flags & IFF_TAP) && dev->netdev_ops == &tap_netdev_ops) 730 tun = netdev_priv(dev); 731 else 732 return -EINVAL; 733 734 err = tun_attach(tun, file); 735 if (err < 0) 736 return err; 737 } 738 else { 739 char *name; 740 unsigned long flags = 0; 741 742 err = -EINVAL; 743 744 if (!capable(CAP_NET_ADMIN)) 745 return -EPERM; 746 747 /* Set dev type */ 748 if (ifr->ifr_flags & IFF_TUN) { 749 /* TUN device */ 750 flags |= TUN_TUN_DEV; 751 name = "tun%d"; 752 } else if (ifr->ifr_flags & IFF_TAP) { 753 /* TAP device */ 754 flags |= TUN_TAP_DEV; 755 name = "tap%d"; 756 } else 757 goto failed; 758 759 if (*ifr->ifr_name) 760 name = ifr->ifr_name; 761 762 dev = alloc_netdev(sizeof(struct tun_struct), name, 763 tun_setup); 764 if (!dev) 765 return -ENOMEM; 766 767 dev_net_set(dev, net); 768 769 tun = netdev_priv(dev); 770 tun->dev = dev; 771 tun->flags = flags; 772 tun->txflt.count = 0; 773 774 tun_net_init(dev); 775 776 if (strchr(dev->name, '%')) { 777 err = dev_alloc_name(dev, dev->name); 778 if (err < 0) 779 goto err_free_dev; 780 } 781 782 err = register_netdevice(tun->dev); 783 if (err < 0) 784 goto err_free_dev; 785 786 err = tun_attach(tun, file); 787 if (err < 0) 788 goto err_free_dev; 789 } 790 791 DBG(KERN_INFO "%s: tun_set_iff\n", tun->dev->name); 792 793 if (ifr->ifr_flags & IFF_NO_PI) 794 tun->flags |= TUN_NO_PI; 795 else 796 tun->flags &= ~TUN_NO_PI; 797 798 if (ifr->ifr_flags & IFF_ONE_QUEUE) 799 tun->flags |= TUN_ONE_QUEUE; 800 else 801 tun->flags &= ~TUN_ONE_QUEUE; 802 803 if (ifr->ifr_flags & IFF_VNET_HDR) 804 tun->flags |= TUN_VNET_HDR; 805 else 806 tun->flags &= ~TUN_VNET_HDR; 807 808 /* Make sure persistent devices do not get stuck in 809 * xoff state. 810 */ 811 if (netif_running(tun->dev)) 812 netif_wake_queue(tun->dev); 813 814 strcpy(ifr->ifr_name, tun->dev->name); 815 return 0; 816 817 err_free_dev: 818 free_netdev(dev); 819 failed: 820 return err; 821 } 822 823 static int tun_get_iff(struct net *net, struct file *file, struct ifreq *ifr) 824 { 825 struct tun_struct *tun = file->private_data; 826 827 if (!tun) 828 return -EBADFD; 829 830 DBG(KERN_INFO "%s: tun_get_iff\n", tun->dev->name); 831 832 strcpy(ifr->ifr_name, tun->dev->name); 833 834 ifr->ifr_flags = 0; 835 836 if (ifr->ifr_flags & TUN_TUN_DEV) 837 ifr->ifr_flags |= IFF_TUN; 838 else 839 ifr->ifr_flags |= IFF_TAP; 840 841 if (tun->flags & TUN_NO_PI) 842 ifr->ifr_flags |= IFF_NO_PI; 843 844 if (tun->flags & TUN_ONE_QUEUE) 845 ifr->ifr_flags |= IFF_ONE_QUEUE; 846 847 if (tun->flags & TUN_VNET_HDR) 848 ifr->ifr_flags |= IFF_VNET_HDR; 849 850 return 0; 851 } 852 853 /* This is like a cut-down ethtool ops, except done via tun fd so no 854 * privs required. */ 855 static int set_offload(struct net_device *dev, unsigned long arg) 856 { 857 unsigned int old_features, features; 858 859 old_features = dev->features; 860 /* Unset features, set them as we chew on the arg. */ 861 features = (old_features & ~(NETIF_F_HW_CSUM|NETIF_F_SG|NETIF_F_FRAGLIST 862 |NETIF_F_TSO_ECN|NETIF_F_TSO|NETIF_F_TSO6)); 863 864 if (arg & TUN_F_CSUM) { 865 features |= NETIF_F_HW_CSUM|NETIF_F_SG|NETIF_F_FRAGLIST; 866 arg &= ~TUN_F_CSUM; 867 868 if (arg & (TUN_F_TSO4|TUN_F_TSO6)) { 869 if (arg & TUN_F_TSO_ECN) { 870 features |= NETIF_F_TSO_ECN; 871 arg &= ~TUN_F_TSO_ECN; 872 } 873 if (arg & TUN_F_TSO4) 874 features |= NETIF_F_TSO; 875 if (arg & TUN_F_TSO6) 876 features |= NETIF_F_TSO6; 877 arg &= ~(TUN_F_TSO4|TUN_F_TSO6); 878 } 879 } 880 881 /* This gives the user a way to test for new features in future by 882 * trying to set them. */ 883 if (arg) 884 return -EINVAL; 885 886 dev->features = features; 887 if (old_features != dev->features) 888 netdev_features_change(dev); 889 890 return 0; 891 } 892 893 static int tun_chr_ioctl(struct inode *inode, struct file *file, 894 unsigned int cmd, unsigned long arg) 895 { 896 struct tun_struct *tun = file->private_data; 897 void __user* argp = (void __user*)arg; 898 struct ifreq ifr; 899 int ret; 900 901 if (cmd == TUNSETIFF || _IOC_TYPE(cmd) == 0x89) 902 if (copy_from_user(&ifr, argp, sizeof ifr)) 903 return -EFAULT; 904 905 if (cmd == TUNSETIFF && !tun) { 906 int err; 907 908 ifr.ifr_name[IFNAMSIZ-1] = '\0'; 909 910 rtnl_lock(); 911 err = tun_set_iff(current->nsproxy->net_ns, file, &ifr); 912 rtnl_unlock(); 913 914 if (err) 915 return err; 916 917 if (copy_to_user(argp, &ifr, sizeof(ifr))) 918 return -EFAULT; 919 return 0; 920 } 921 922 if (cmd == TUNGETFEATURES) { 923 /* Currently this just means: "what IFF flags are valid?". 924 * This is needed because we never checked for invalid flags on 925 * TUNSETIFF. */ 926 return put_user(IFF_TUN | IFF_TAP | IFF_NO_PI | IFF_ONE_QUEUE | 927 IFF_VNET_HDR, 928 (unsigned int __user*)argp); 929 } 930 931 if (!tun) 932 return -EBADFD; 933 934 DBG(KERN_INFO "%s: tun_chr_ioctl cmd %d\n", tun->dev->name, cmd); 935 936 switch (cmd) { 937 case TUNGETIFF: 938 ret = tun_get_iff(current->nsproxy->net_ns, file, &ifr); 939 if (ret) 940 return ret; 941 942 if (copy_to_user(argp, &ifr, sizeof(ifr))) 943 return -EFAULT; 944 break; 945 946 case TUNSETNOCSUM: 947 /* Disable/Enable checksum */ 948 if (arg) 949 tun->flags |= TUN_NOCHECKSUM; 950 else 951 tun->flags &= ~TUN_NOCHECKSUM; 952 953 DBG(KERN_INFO "%s: checksum %s\n", 954 tun->dev->name, arg ? "disabled" : "enabled"); 955 break; 956 957 case TUNSETPERSIST: 958 /* Disable/Enable persist mode */ 959 if (arg) 960 tun->flags |= TUN_PERSIST; 961 else 962 tun->flags &= ~TUN_PERSIST; 963 964 DBG(KERN_INFO "%s: persist %s\n", 965 tun->dev->name, arg ? "enabled" : "disabled"); 966 break; 967 968 case TUNSETOWNER: 969 /* Set owner of the device */ 970 tun->owner = (uid_t) arg; 971 972 DBG(KERN_INFO "%s: owner set to %d\n", tun->dev->name, tun->owner); 973 break; 974 975 case TUNSETGROUP: 976 /* Set group of the device */ 977 tun->group= (gid_t) arg; 978 979 DBG(KERN_INFO "%s: group set to %d\n", tun->dev->name, tun->group); 980 break; 981 982 case TUNSETLINK: 983 /* Only allow setting the type when the interface is down */ 984 rtnl_lock(); 985 if (tun->dev->flags & IFF_UP) { 986 DBG(KERN_INFO "%s: Linktype set failed because interface is up\n", 987 tun->dev->name); 988 ret = -EBUSY; 989 } else { 990 tun->dev->type = (int) arg; 991 DBG(KERN_INFO "%s: linktype set to %d\n", tun->dev->name, tun->dev->type); 992 ret = 0; 993 } 994 rtnl_unlock(); 995 return ret; 996 997 #ifdef TUN_DEBUG 998 case TUNSETDEBUG: 999 tun->debug = arg; 1000 break; 1001 #endif 1002 case TUNSETOFFLOAD: 1003 rtnl_lock(); 1004 ret = set_offload(tun->dev, arg); 1005 rtnl_unlock(); 1006 return ret; 1007 1008 case TUNSETTXFILTER: 1009 /* Can be set only for TAPs */ 1010 if ((tun->flags & TUN_TYPE_MASK) != TUN_TAP_DEV) 1011 return -EINVAL; 1012 rtnl_lock(); 1013 ret = update_filter(&tun->txflt, (void __user *)arg); 1014 rtnl_unlock(); 1015 return ret; 1016 1017 case SIOCGIFHWADDR: 1018 /* Get hw addres */ 1019 memcpy(ifr.ifr_hwaddr.sa_data, tun->dev->dev_addr, ETH_ALEN); 1020 ifr.ifr_hwaddr.sa_family = tun->dev->type; 1021 if (copy_to_user(argp, &ifr, sizeof ifr)) 1022 return -EFAULT; 1023 return 0; 1024 1025 case SIOCSIFHWADDR: 1026 /* Set hw address */ 1027 DBG(KERN_DEBUG "%s: set hw address: %pM\n", 1028 tun->dev->name, ifr.ifr_hwaddr.sa_data); 1029 1030 rtnl_lock(); 1031 ret = dev_set_mac_address(tun->dev, &ifr.ifr_hwaddr); 1032 rtnl_unlock(); 1033 return ret; 1034 1035 default: 1036 return -EINVAL; 1037 }; 1038 1039 return 0; 1040 } 1041 1042 static int tun_chr_fasync(int fd, struct file *file, int on) 1043 { 1044 struct tun_struct *tun = file->private_data; 1045 int ret; 1046 1047 if (!tun) 1048 return -EBADFD; 1049 1050 DBG(KERN_INFO "%s: tun_chr_fasync %d\n", tun->dev->name, on); 1051 1052 lock_kernel(); 1053 if ((ret = fasync_helper(fd, file, on, &tun->fasync)) < 0) 1054 goto out; 1055 1056 if (on) { 1057 ret = __f_setown(file, task_pid(current), PIDTYPE_PID, 0); 1058 if (ret) 1059 goto out; 1060 tun->flags |= TUN_FASYNC; 1061 } else 1062 tun->flags &= ~TUN_FASYNC; 1063 ret = 0; 1064 out: 1065 unlock_kernel(); 1066 return ret; 1067 } 1068 1069 static int tun_chr_open(struct inode *inode, struct file * file) 1070 { 1071 cycle_kernel_lock(); 1072 DBG1(KERN_INFO "tunX: tun_chr_open\n"); 1073 file->private_data = NULL; 1074 return 0; 1075 } 1076 1077 static int tun_chr_close(struct inode *inode, struct file *file) 1078 { 1079 struct tun_struct *tun = file->private_data; 1080 1081 if (!tun) 1082 return 0; 1083 1084 DBG(KERN_INFO "%s: tun_chr_close\n", tun->dev->name); 1085 1086 rtnl_lock(); 1087 1088 /* Detach from net device */ 1089 file->private_data = NULL; 1090 tun->attached = 0; 1091 put_net(dev_net(tun->dev)); 1092 1093 /* Drop read queue */ 1094 skb_queue_purge(&tun->readq); 1095 1096 if (!(tun->flags & TUN_PERSIST)) 1097 unregister_netdevice(tun->dev); 1098 1099 rtnl_unlock(); 1100 1101 return 0; 1102 } 1103 1104 static const struct file_operations tun_fops = { 1105 .owner = THIS_MODULE, 1106 .llseek = no_llseek, 1107 .read = do_sync_read, 1108 .aio_read = tun_chr_aio_read, 1109 .write = do_sync_write, 1110 .aio_write = tun_chr_aio_write, 1111 .poll = tun_chr_poll, 1112 .ioctl = tun_chr_ioctl, 1113 .open = tun_chr_open, 1114 .release = tun_chr_close, 1115 .fasync = tun_chr_fasync 1116 }; 1117 1118 static struct miscdevice tun_miscdev = { 1119 .minor = TUN_MINOR, 1120 .name = "tun", 1121 .fops = &tun_fops, 1122 }; 1123 1124 /* ethtool interface */ 1125 1126 static int tun_get_settings(struct net_device *dev, struct ethtool_cmd *cmd) 1127 { 1128 cmd->supported = 0; 1129 cmd->advertising = 0; 1130 cmd->speed = SPEED_10; 1131 cmd->duplex = DUPLEX_FULL; 1132 cmd->port = PORT_TP; 1133 cmd->phy_address = 0; 1134 cmd->transceiver = XCVR_INTERNAL; 1135 cmd->autoneg = AUTONEG_DISABLE; 1136 cmd->maxtxpkt = 0; 1137 cmd->maxrxpkt = 0; 1138 return 0; 1139 } 1140 1141 static void tun_get_drvinfo(struct net_device *dev, struct ethtool_drvinfo *info) 1142 { 1143 struct tun_struct *tun = netdev_priv(dev); 1144 1145 strcpy(info->driver, DRV_NAME); 1146 strcpy(info->version, DRV_VERSION); 1147 strcpy(info->fw_version, "N/A"); 1148 1149 switch (tun->flags & TUN_TYPE_MASK) { 1150 case TUN_TUN_DEV: 1151 strcpy(info->bus_info, "tun"); 1152 break; 1153 case TUN_TAP_DEV: 1154 strcpy(info->bus_info, "tap"); 1155 break; 1156 } 1157 } 1158 1159 static u32 tun_get_msglevel(struct net_device *dev) 1160 { 1161 #ifdef TUN_DEBUG 1162 struct tun_struct *tun = netdev_priv(dev); 1163 return tun->debug; 1164 #else 1165 return -EOPNOTSUPP; 1166 #endif 1167 } 1168 1169 static void tun_set_msglevel(struct net_device *dev, u32 value) 1170 { 1171 #ifdef TUN_DEBUG 1172 struct tun_struct *tun = netdev_priv(dev); 1173 tun->debug = value; 1174 #endif 1175 } 1176 1177 static u32 tun_get_link(struct net_device *dev) 1178 { 1179 struct tun_struct *tun = netdev_priv(dev); 1180 return tun->attached; 1181 } 1182 1183 static u32 tun_get_rx_csum(struct net_device *dev) 1184 { 1185 struct tun_struct *tun = netdev_priv(dev); 1186 return (tun->flags & TUN_NOCHECKSUM) == 0; 1187 } 1188 1189 static int tun_set_rx_csum(struct net_device *dev, u32 data) 1190 { 1191 struct tun_struct *tun = netdev_priv(dev); 1192 if (data) 1193 tun->flags &= ~TUN_NOCHECKSUM; 1194 else 1195 tun->flags |= TUN_NOCHECKSUM; 1196 return 0; 1197 } 1198 1199 static const struct ethtool_ops tun_ethtool_ops = { 1200 .get_settings = tun_get_settings, 1201 .get_drvinfo = tun_get_drvinfo, 1202 .get_msglevel = tun_get_msglevel, 1203 .set_msglevel = tun_set_msglevel, 1204 .get_link = tun_get_link, 1205 .get_rx_csum = tun_get_rx_csum, 1206 .set_rx_csum = tun_set_rx_csum 1207 }; 1208 1209 static int tun_init_net(struct net *net) 1210 { 1211 return 0; 1212 } 1213 1214 static void tun_exit_net(struct net *net) 1215 { 1216 struct net_device *dev, *next; 1217 1218 rtnl_lock(); 1219 for_each_netdev_safe(net, dev, next) { 1220 if (dev->ethtool_ops != &tun_ethtool_ops) 1221 continue; 1222 DBG(KERN_INFO "%s cleaned up\n", dev->name); 1223 unregister_netdevice(dev); 1224 } 1225 rtnl_unlock(); 1226 } 1227 1228 static struct pernet_operations tun_net_ops = { 1229 .init = tun_init_net, 1230 .exit = tun_exit_net, 1231 }; 1232 1233 static int __init tun_init(void) 1234 { 1235 int ret = 0; 1236 1237 printk(KERN_INFO "tun: %s, %s\n", DRV_DESCRIPTION, DRV_VERSION); 1238 printk(KERN_INFO "tun: %s\n", DRV_COPYRIGHT); 1239 1240 ret = register_pernet_device(&tun_net_ops); 1241 if (ret) { 1242 printk(KERN_ERR "tun: Can't register pernet ops\n"); 1243 goto err_pernet; 1244 } 1245 1246 ret = misc_register(&tun_miscdev); 1247 if (ret) { 1248 printk(KERN_ERR "tun: Can't register misc device %d\n", TUN_MINOR); 1249 goto err_misc; 1250 } 1251 return 0; 1252 1253 err_misc: 1254 unregister_pernet_device(&tun_net_ops); 1255 err_pernet: 1256 return ret; 1257 } 1258 1259 static void tun_cleanup(void) 1260 { 1261 misc_deregister(&tun_miscdev); 1262 unregister_pernet_device(&tun_net_ops); 1263 } 1264 1265 module_init(tun_init); 1266 module_exit(tun_cleanup); 1267 MODULE_DESCRIPTION(DRV_DESCRIPTION); 1268 MODULE_AUTHOR(DRV_COPYRIGHT); 1269 MODULE_LICENSE("GPL"); 1270 MODULE_ALIAS_MISCDEV(TUN_MINOR); 1271