1 /* 2 * TUN - Universal TUN/TAP device driver. 3 * Copyright (C) 1999-2002 Maxim Krasnyansky <maxk@qualcomm.com> 4 * 5 * This program is free software; you can redistribute it and/or modify 6 * it under the terms of the GNU General Public License as published by 7 * the Free Software Foundation; either version 2 of the License, or 8 * (at your option) any later version. 9 * 10 * This program is distributed in the hope that it will be useful, 11 * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 * GNU General Public License for more details. 14 * 15 * $Id: tun.c,v 1.15 2002/03/01 02:44:24 maxk Exp $ 16 */ 17 18 /* 19 * Changes: 20 * 21 * Mike Kershaw <dragorn@kismetwireless.net> 2005/08/14 22 * Add TUNSETLINK ioctl to set the link encapsulation 23 * 24 * Mark Smith <markzzzsmith@yahoo.com.au> 25 * Use eth_random_addr() for tap MAC address. 26 * 27 * Harald Roelle <harald.roelle@ifi.lmu.de> 2004/04/20 28 * Fixes in packet dropping, queue length setting and queue wakeup. 29 * Increased default tx queue length. 30 * Added ethtool API. 31 * Minor cleanups 32 * 33 * Daniel Podlejski <underley@underley.eu.org> 34 * Modifications for 2.3.99-pre5 kernel. 35 */ 36 37 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 38 39 #define DRV_NAME "tun" 40 #define DRV_VERSION "1.6" 41 #define DRV_DESCRIPTION "Universal TUN/TAP device driver" 42 #define DRV_COPYRIGHT "(C) 1999-2004 Max Krasnyansky <maxk@qualcomm.com>" 43 44 #include <linux/module.h> 45 #include <linux/errno.h> 46 #include <linux/kernel.h> 47 #include <linux/major.h> 48 #include <linux/slab.h> 49 #include <linux/poll.h> 50 #include <linux/fcntl.h> 51 #include <linux/init.h> 52 #include <linux/skbuff.h> 53 #include <linux/netdevice.h> 54 #include <linux/etherdevice.h> 55 #include <linux/miscdevice.h> 56 #include <linux/ethtool.h> 57 #include <linux/rtnetlink.h> 58 #include <linux/compat.h> 59 #include <linux/if.h> 60 #include <linux/if_arp.h> 61 #include <linux/if_ether.h> 62 #include <linux/if_tun.h> 63 #include <linux/if_vlan.h> 64 #include <linux/crc32.h> 65 #include <linux/nsproxy.h> 66 #include <linux/virtio_net.h> 67 #include <linux/rcupdate.h> 68 #include <net/net_namespace.h> 69 #include <net/netns/generic.h> 70 #include <net/rtnetlink.h> 71 #include <net/sock.h> 72 #include <linux/seq_file.h> 73 #include <linux/uio.h> 74 75 #include <asm/uaccess.h> 76 77 /* Uncomment to enable debugging */ 78 /* #define TUN_DEBUG 1 */ 79 80 #ifdef TUN_DEBUG 81 static int debug; 82 83 #define tun_debug(level, tun, fmt, args...) \ 84 do { \ 85 if (tun->debug) \ 86 netdev_printk(level, tun->dev, fmt, ##args); \ 87 } while (0) 88 #define DBG1(level, fmt, args...) \ 89 do { \ 90 if (debug == 2) \ 91 printk(level fmt, ##args); \ 92 } while (0) 93 #else 94 #define tun_debug(level, tun, fmt, args...) \ 95 do { \ 96 if (0) \ 97 netdev_printk(level, tun->dev, fmt, ##args); \ 98 } while (0) 99 #define DBG1(level, fmt, args...) \ 100 do { \ 101 if (0) \ 102 printk(level fmt, ##args); \ 103 } while (0) 104 #endif 105 106 /* TUN device flags */ 107 108 /* IFF_ATTACH_QUEUE is never stored in device flags, 109 * overload it to mean fasync when stored there. 110 */ 111 #define TUN_FASYNC IFF_ATTACH_QUEUE 112 /* High bits in flags field are unused. */ 113 #define TUN_VNET_LE 0x80000000 114 #define TUN_VNET_BE 0x40000000 115 116 #define TUN_FEATURES (IFF_NO_PI | IFF_ONE_QUEUE | IFF_VNET_HDR | \ 117 IFF_MULTI_QUEUE) 118 #define GOODCOPY_LEN 128 119 120 #define FLT_EXACT_COUNT 8 121 struct tap_filter { 122 unsigned int count; /* Number of addrs. Zero means disabled */ 123 u32 mask[2]; /* Mask of the hashed addrs */ 124 unsigned char addr[FLT_EXACT_COUNT][ETH_ALEN]; 125 }; 126 127 /* MAX_TAP_QUEUES 256 is chosen to allow rx/tx queues to be equal 128 * to max number of VCPUs in guest. */ 129 #define MAX_TAP_QUEUES 256 130 #define MAX_TAP_FLOWS 4096 131 132 #define TUN_FLOW_EXPIRE (3 * HZ) 133 134 /* A tun_file connects an open character device to a tuntap netdevice. It 135 * also contains all socket related structures (except sock_fprog and tap_filter) 136 * to serve as one transmit queue for tuntap device. The sock_fprog and 137 * tap_filter were kept in tun_struct since they were used for filtering for the 138 * netdevice not for a specific queue (at least I didn't see the requirement for 139 * this). 140 * 141 * RCU usage: 142 * The tun_file and tun_struct are loosely coupled, the pointer from one to the 143 * other can only be read while rcu_read_lock or rtnl_lock is held. 144 */ 145 struct tun_file { 146 struct sock sk; 147 struct socket socket; 148 struct socket_wq wq; 149 struct tun_struct __rcu *tun; 150 struct fasync_struct *fasync; 151 /* only used for fasnyc */ 152 unsigned int flags; 153 union { 154 u16 queue_index; 155 unsigned int ifindex; 156 }; 157 struct list_head next; 158 struct tun_struct *detached; 159 }; 160 161 struct tun_flow_entry { 162 struct hlist_node hash_link; 163 struct rcu_head rcu; 164 struct tun_struct *tun; 165 166 u32 rxhash; 167 u32 rps_rxhash; 168 int queue_index; 169 unsigned long updated; 170 }; 171 172 #define TUN_NUM_FLOW_ENTRIES 1024 173 174 /* Since the socket were moved to tun_file, to preserve the behavior of persist 175 * device, socket filter, sndbuf and vnet header size were restore when the 176 * file were attached to a persist device. 177 */ 178 struct tun_struct { 179 struct tun_file __rcu *tfiles[MAX_TAP_QUEUES]; 180 unsigned int numqueues; 181 unsigned int flags; 182 kuid_t owner; 183 kgid_t group; 184 185 struct net_device *dev; 186 netdev_features_t set_features; 187 #define TUN_USER_FEATURES (NETIF_F_HW_CSUM|NETIF_F_TSO_ECN|NETIF_F_TSO| \ 188 NETIF_F_TSO6|NETIF_F_UFO) 189 190 int align; 191 int vnet_hdr_sz; 192 int sndbuf; 193 struct tap_filter txflt; 194 struct sock_fprog fprog; 195 /* protected by rtnl lock */ 196 bool filter_attached; 197 #ifdef TUN_DEBUG 198 int debug; 199 #endif 200 spinlock_t lock; 201 struct hlist_head flows[TUN_NUM_FLOW_ENTRIES]; 202 struct timer_list flow_gc_timer; 203 unsigned long ageing_time; 204 unsigned int numdisabled; 205 struct list_head disabled; 206 void *security; 207 u32 flow_count; 208 }; 209 210 #ifdef CONFIG_TUN_VNET_CROSS_LE 211 static inline bool tun_legacy_is_little_endian(struct tun_struct *tun) 212 { 213 return tun->flags & TUN_VNET_BE ? false : 214 virtio_legacy_is_little_endian(); 215 } 216 217 static long tun_get_vnet_be(struct tun_struct *tun, int __user *argp) 218 { 219 int be = !!(tun->flags & TUN_VNET_BE); 220 221 if (put_user(be, argp)) 222 return -EFAULT; 223 224 return 0; 225 } 226 227 static long tun_set_vnet_be(struct tun_struct *tun, int __user *argp) 228 { 229 int be; 230 231 if (get_user(be, argp)) 232 return -EFAULT; 233 234 if (be) 235 tun->flags |= TUN_VNET_BE; 236 else 237 tun->flags &= ~TUN_VNET_BE; 238 239 return 0; 240 } 241 #else 242 static inline bool tun_legacy_is_little_endian(struct tun_struct *tun) 243 { 244 return virtio_legacy_is_little_endian(); 245 } 246 247 static long tun_get_vnet_be(struct tun_struct *tun, int __user *argp) 248 { 249 return -EINVAL; 250 } 251 252 static long tun_set_vnet_be(struct tun_struct *tun, int __user *argp) 253 { 254 return -EINVAL; 255 } 256 #endif /* CONFIG_TUN_VNET_CROSS_LE */ 257 258 static inline bool tun_is_little_endian(struct tun_struct *tun) 259 { 260 return tun->flags & TUN_VNET_LE || 261 tun_legacy_is_little_endian(tun); 262 } 263 264 static inline u16 tun16_to_cpu(struct tun_struct *tun, __virtio16 val) 265 { 266 return __virtio16_to_cpu(tun_is_little_endian(tun), val); 267 } 268 269 static inline __virtio16 cpu_to_tun16(struct tun_struct *tun, u16 val) 270 { 271 return __cpu_to_virtio16(tun_is_little_endian(tun), val); 272 } 273 274 static inline u32 tun_hashfn(u32 rxhash) 275 { 276 return rxhash & 0x3ff; 277 } 278 279 static struct tun_flow_entry *tun_flow_find(struct hlist_head *head, u32 rxhash) 280 { 281 struct tun_flow_entry *e; 282 283 hlist_for_each_entry_rcu(e, head, hash_link) { 284 if (e->rxhash == rxhash) 285 return e; 286 } 287 return NULL; 288 } 289 290 static struct tun_flow_entry *tun_flow_create(struct tun_struct *tun, 291 struct hlist_head *head, 292 u32 rxhash, u16 queue_index) 293 { 294 struct tun_flow_entry *e = kmalloc(sizeof(*e), GFP_ATOMIC); 295 296 if (e) { 297 tun_debug(KERN_INFO, tun, "create flow: hash %u index %u\n", 298 rxhash, queue_index); 299 e->updated = jiffies; 300 e->rxhash = rxhash; 301 e->rps_rxhash = 0; 302 e->queue_index = queue_index; 303 e->tun = tun; 304 hlist_add_head_rcu(&e->hash_link, head); 305 ++tun->flow_count; 306 } 307 return e; 308 } 309 310 static void tun_flow_delete(struct tun_struct *tun, struct tun_flow_entry *e) 311 { 312 tun_debug(KERN_INFO, tun, "delete flow: hash %u index %u\n", 313 e->rxhash, e->queue_index); 314 hlist_del_rcu(&e->hash_link); 315 kfree_rcu(e, rcu); 316 --tun->flow_count; 317 } 318 319 static void tun_flow_flush(struct tun_struct *tun) 320 { 321 int i; 322 323 spin_lock_bh(&tun->lock); 324 for (i = 0; i < TUN_NUM_FLOW_ENTRIES; i++) { 325 struct tun_flow_entry *e; 326 struct hlist_node *n; 327 328 hlist_for_each_entry_safe(e, n, &tun->flows[i], hash_link) 329 tun_flow_delete(tun, e); 330 } 331 spin_unlock_bh(&tun->lock); 332 } 333 334 static void tun_flow_delete_by_queue(struct tun_struct *tun, u16 queue_index) 335 { 336 int i; 337 338 spin_lock_bh(&tun->lock); 339 for (i = 0; i < TUN_NUM_FLOW_ENTRIES; i++) { 340 struct tun_flow_entry *e; 341 struct hlist_node *n; 342 343 hlist_for_each_entry_safe(e, n, &tun->flows[i], hash_link) { 344 if (e->queue_index == queue_index) 345 tun_flow_delete(tun, e); 346 } 347 } 348 spin_unlock_bh(&tun->lock); 349 } 350 351 static void tun_flow_cleanup(unsigned long data) 352 { 353 struct tun_struct *tun = (struct tun_struct *)data; 354 unsigned long delay = tun->ageing_time; 355 unsigned long next_timer = jiffies + delay; 356 unsigned long count = 0; 357 int i; 358 359 tun_debug(KERN_INFO, tun, "tun_flow_cleanup\n"); 360 361 spin_lock_bh(&tun->lock); 362 for (i = 0; i < TUN_NUM_FLOW_ENTRIES; i++) { 363 struct tun_flow_entry *e; 364 struct hlist_node *n; 365 366 hlist_for_each_entry_safe(e, n, &tun->flows[i], hash_link) { 367 unsigned long this_timer; 368 count++; 369 this_timer = e->updated + delay; 370 if (time_before_eq(this_timer, jiffies)) 371 tun_flow_delete(tun, e); 372 else if (time_before(this_timer, next_timer)) 373 next_timer = this_timer; 374 } 375 } 376 377 if (count) 378 mod_timer(&tun->flow_gc_timer, round_jiffies_up(next_timer)); 379 spin_unlock_bh(&tun->lock); 380 } 381 382 static void tun_flow_update(struct tun_struct *tun, u32 rxhash, 383 struct tun_file *tfile) 384 { 385 struct hlist_head *head; 386 struct tun_flow_entry *e; 387 unsigned long delay = tun->ageing_time; 388 u16 queue_index = tfile->queue_index; 389 390 if (!rxhash) 391 return; 392 else 393 head = &tun->flows[tun_hashfn(rxhash)]; 394 395 rcu_read_lock(); 396 397 /* We may get a very small possibility of OOO during switching, not 398 * worth to optimize.*/ 399 if (tun->numqueues == 1 || tfile->detached) 400 goto unlock; 401 402 e = tun_flow_find(head, rxhash); 403 if (likely(e)) { 404 /* TODO: keep queueing to old queue until it's empty? */ 405 e->queue_index = queue_index; 406 e->updated = jiffies; 407 sock_rps_record_flow_hash(e->rps_rxhash); 408 } else { 409 spin_lock_bh(&tun->lock); 410 if (!tun_flow_find(head, rxhash) && 411 tun->flow_count < MAX_TAP_FLOWS) 412 tun_flow_create(tun, head, rxhash, queue_index); 413 414 if (!timer_pending(&tun->flow_gc_timer)) 415 mod_timer(&tun->flow_gc_timer, 416 round_jiffies_up(jiffies + delay)); 417 spin_unlock_bh(&tun->lock); 418 } 419 420 unlock: 421 rcu_read_unlock(); 422 } 423 424 /** 425 * Save the hash received in the stack receive path and update the 426 * flow_hash table accordingly. 427 */ 428 static inline void tun_flow_save_rps_rxhash(struct tun_flow_entry *e, u32 hash) 429 { 430 if (unlikely(e->rps_rxhash != hash)) 431 e->rps_rxhash = hash; 432 } 433 434 /* We try to identify a flow through its rxhash first. The reason that 435 * we do not check rxq no. is because some cards(e.g 82599), chooses 436 * the rxq based on the txq where the last packet of the flow comes. As 437 * the userspace application move between processors, we may get a 438 * different rxq no. here. If we could not get rxhash, then we would 439 * hope the rxq no. may help here. 440 */ 441 static u16 tun_select_queue(struct net_device *dev, struct sk_buff *skb, 442 void *accel_priv, select_queue_fallback_t fallback) 443 { 444 struct tun_struct *tun = netdev_priv(dev); 445 struct tun_flow_entry *e; 446 u32 txq = 0; 447 u32 numqueues = 0; 448 449 rcu_read_lock(); 450 numqueues = ACCESS_ONCE(tun->numqueues); 451 452 txq = skb_get_hash(skb); 453 if (txq) { 454 e = tun_flow_find(&tun->flows[tun_hashfn(txq)], txq); 455 if (e) { 456 tun_flow_save_rps_rxhash(e, txq); 457 txq = e->queue_index; 458 } else 459 /* use multiply and shift instead of expensive divide */ 460 txq = ((u64)txq * numqueues) >> 32; 461 } else if (likely(skb_rx_queue_recorded(skb))) { 462 txq = skb_get_rx_queue(skb); 463 while (unlikely(txq >= numqueues)) 464 txq -= numqueues; 465 } 466 467 rcu_read_unlock(); 468 return txq; 469 } 470 471 static inline bool tun_not_capable(struct tun_struct *tun) 472 { 473 const struct cred *cred = current_cred(); 474 struct net *net = dev_net(tun->dev); 475 476 return ((uid_valid(tun->owner) && !uid_eq(cred->euid, tun->owner)) || 477 (gid_valid(tun->group) && !in_egroup_p(tun->group))) && 478 !ns_capable(net->user_ns, CAP_NET_ADMIN); 479 } 480 481 static void tun_set_real_num_queues(struct tun_struct *tun) 482 { 483 netif_set_real_num_tx_queues(tun->dev, tun->numqueues); 484 netif_set_real_num_rx_queues(tun->dev, tun->numqueues); 485 } 486 487 static void tun_disable_queue(struct tun_struct *tun, struct tun_file *tfile) 488 { 489 tfile->detached = tun; 490 list_add_tail(&tfile->next, &tun->disabled); 491 ++tun->numdisabled; 492 } 493 494 static struct tun_struct *tun_enable_queue(struct tun_file *tfile) 495 { 496 struct tun_struct *tun = tfile->detached; 497 498 tfile->detached = NULL; 499 list_del_init(&tfile->next); 500 --tun->numdisabled; 501 return tun; 502 } 503 504 static void tun_queue_purge(struct tun_file *tfile) 505 { 506 skb_queue_purge(&tfile->sk.sk_receive_queue); 507 skb_queue_purge(&tfile->sk.sk_error_queue); 508 } 509 510 static void __tun_detach(struct tun_file *tfile, bool clean) 511 { 512 struct tun_file *ntfile; 513 struct tun_struct *tun; 514 515 tun = rtnl_dereference(tfile->tun); 516 517 if (tun && !tfile->detached) { 518 u16 index = tfile->queue_index; 519 BUG_ON(index >= tun->numqueues); 520 521 rcu_assign_pointer(tun->tfiles[index], 522 tun->tfiles[tun->numqueues - 1]); 523 ntfile = rtnl_dereference(tun->tfiles[index]); 524 ntfile->queue_index = index; 525 526 --tun->numqueues; 527 if (clean) { 528 RCU_INIT_POINTER(tfile->tun, NULL); 529 sock_put(&tfile->sk); 530 } else 531 tun_disable_queue(tun, tfile); 532 533 synchronize_net(); 534 tun_flow_delete_by_queue(tun, tun->numqueues + 1); 535 /* Drop read queue */ 536 tun_queue_purge(tfile); 537 tun_set_real_num_queues(tun); 538 } else if (tfile->detached && clean) { 539 tun = tun_enable_queue(tfile); 540 sock_put(&tfile->sk); 541 } 542 543 if (clean) { 544 if (tun && tun->numqueues == 0 && tun->numdisabled == 0) { 545 netif_carrier_off(tun->dev); 546 547 if (!(tun->flags & IFF_PERSIST) && 548 tun->dev->reg_state == NETREG_REGISTERED) 549 unregister_netdevice(tun->dev); 550 } 551 sock_put(&tfile->sk); 552 } 553 } 554 555 static void tun_detach(struct tun_file *tfile, bool clean) 556 { 557 rtnl_lock(); 558 __tun_detach(tfile, clean); 559 rtnl_unlock(); 560 } 561 562 static void tun_detach_all(struct net_device *dev) 563 { 564 struct tun_struct *tun = netdev_priv(dev); 565 struct tun_file *tfile, *tmp; 566 int i, n = tun->numqueues; 567 568 for (i = 0; i < n; i++) { 569 tfile = rtnl_dereference(tun->tfiles[i]); 570 BUG_ON(!tfile); 571 tfile->socket.sk->sk_data_ready(tfile->socket.sk); 572 RCU_INIT_POINTER(tfile->tun, NULL); 573 --tun->numqueues; 574 } 575 list_for_each_entry(tfile, &tun->disabled, next) { 576 tfile->socket.sk->sk_data_ready(tfile->socket.sk); 577 RCU_INIT_POINTER(tfile->tun, NULL); 578 } 579 BUG_ON(tun->numqueues != 0); 580 581 synchronize_net(); 582 for (i = 0; i < n; i++) { 583 tfile = rtnl_dereference(tun->tfiles[i]); 584 /* Drop read queue */ 585 tun_queue_purge(tfile); 586 sock_put(&tfile->sk); 587 } 588 list_for_each_entry_safe(tfile, tmp, &tun->disabled, next) { 589 tun_enable_queue(tfile); 590 tun_queue_purge(tfile); 591 sock_put(&tfile->sk); 592 } 593 BUG_ON(tun->numdisabled != 0); 594 595 if (tun->flags & IFF_PERSIST) 596 module_put(THIS_MODULE); 597 } 598 599 static int tun_attach(struct tun_struct *tun, struct file *file, bool skip_filter) 600 { 601 struct tun_file *tfile = file->private_data; 602 int err; 603 604 err = security_tun_dev_attach(tfile->socket.sk, tun->security); 605 if (err < 0) 606 goto out; 607 608 err = -EINVAL; 609 if (rtnl_dereference(tfile->tun) && !tfile->detached) 610 goto out; 611 612 err = -EBUSY; 613 if (!(tun->flags & IFF_MULTI_QUEUE) && tun->numqueues == 1) 614 goto out; 615 616 err = -E2BIG; 617 if (!tfile->detached && 618 tun->numqueues + tun->numdisabled == MAX_TAP_QUEUES) 619 goto out; 620 621 err = 0; 622 623 /* Re-attach the filter to persist device */ 624 if (!skip_filter && (tun->filter_attached == true)) { 625 err = sk_attach_filter(&tun->fprog, tfile->socket.sk); 626 if (!err) 627 goto out; 628 } 629 tfile->queue_index = tun->numqueues; 630 rcu_assign_pointer(tfile->tun, tun); 631 rcu_assign_pointer(tun->tfiles[tun->numqueues], tfile); 632 tun->numqueues++; 633 634 if (tfile->detached) 635 tun_enable_queue(tfile); 636 else 637 sock_hold(&tfile->sk); 638 639 tun_set_real_num_queues(tun); 640 641 /* device is allowed to go away first, so no need to hold extra 642 * refcnt. 643 */ 644 645 out: 646 return err; 647 } 648 649 static struct tun_struct *__tun_get(struct tun_file *tfile) 650 { 651 struct tun_struct *tun; 652 653 rcu_read_lock(); 654 tun = rcu_dereference(tfile->tun); 655 if (tun) 656 dev_hold(tun->dev); 657 rcu_read_unlock(); 658 659 return tun; 660 } 661 662 static struct tun_struct *tun_get(struct file *file) 663 { 664 return __tun_get(file->private_data); 665 } 666 667 static void tun_put(struct tun_struct *tun) 668 { 669 dev_put(tun->dev); 670 } 671 672 /* TAP filtering */ 673 static void addr_hash_set(u32 *mask, const u8 *addr) 674 { 675 int n = ether_crc(ETH_ALEN, addr) >> 26; 676 mask[n >> 5] |= (1 << (n & 31)); 677 } 678 679 static unsigned int addr_hash_test(const u32 *mask, const u8 *addr) 680 { 681 int n = ether_crc(ETH_ALEN, addr) >> 26; 682 return mask[n >> 5] & (1 << (n & 31)); 683 } 684 685 static int update_filter(struct tap_filter *filter, void __user *arg) 686 { 687 struct { u8 u[ETH_ALEN]; } *addr; 688 struct tun_filter uf; 689 int err, alen, n, nexact; 690 691 if (copy_from_user(&uf, arg, sizeof(uf))) 692 return -EFAULT; 693 694 if (!uf.count) { 695 /* Disabled */ 696 filter->count = 0; 697 return 0; 698 } 699 700 alen = ETH_ALEN * uf.count; 701 addr = kmalloc(alen, GFP_KERNEL); 702 if (!addr) 703 return -ENOMEM; 704 705 if (copy_from_user(addr, arg + sizeof(uf), alen)) { 706 err = -EFAULT; 707 goto done; 708 } 709 710 /* The filter is updated without holding any locks. Which is 711 * perfectly safe. We disable it first and in the worst 712 * case we'll accept a few undesired packets. */ 713 filter->count = 0; 714 wmb(); 715 716 /* Use first set of addresses as an exact filter */ 717 for (n = 0; n < uf.count && n < FLT_EXACT_COUNT; n++) 718 memcpy(filter->addr[n], addr[n].u, ETH_ALEN); 719 720 nexact = n; 721 722 /* Remaining multicast addresses are hashed, 723 * unicast will leave the filter disabled. */ 724 memset(filter->mask, 0, sizeof(filter->mask)); 725 for (; n < uf.count; n++) { 726 if (!is_multicast_ether_addr(addr[n].u)) { 727 err = 0; /* no filter */ 728 goto done; 729 } 730 addr_hash_set(filter->mask, addr[n].u); 731 } 732 733 /* For ALLMULTI just set the mask to all ones. 734 * This overrides the mask populated above. */ 735 if ((uf.flags & TUN_FLT_ALLMULTI)) 736 memset(filter->mask, ~0, sizeof(filter->mask)); 737 738 /* Now enable the filter */ 739 wmb(); 740 filter->count = nexact; 741 742 /* Return the number of exact filters */ 743 err = nexact; 744 745 done: 746 kfree(addr); 747 return err; 748 } 749 750 /* Returns: 0 - drop, !=0 - accept */ 751 static int run_filter(struct tap_filter *filter, const struct sk_buff *skb) 752 { 753 /* Cannot use eth_hdr(skb) here because skb_mac_hdr() is incorrect 754 * at this point. */ 755 struct ethhdr *eh = (struct ethhdr *) skb->data; 756 int i; 757 758 /* Exact match */ 759 for (i = 0; i < filter->count; i++) 760 if (ether_addr_equal(eh->h_dest, filter->addr[i])) 761 return 1; 762 763 /* Inexact match (multicast only) */ 764 if (is_multicast_ether_addr(eh->h_dest)) 765 return addr_hash_test(filter->mask, eh->h_dest); 766 767 return 0; 768 } 769 770 /* 771 * Checks whether the packet is accepted or not. 772 * Returns: 0 - drop, !=0 - accept 773 */ 774 static int check_filter(struct tap_filter *filter, const struct sk_buff *skb) 775 { 776 if (!filter->count) 777 return 1; 778 779 return run_filter(filter, skb); 780 } 781 782 /* Network device part of the driver */ 783 784 static const struct ethtool_ops tun_ethtool_ops; 785 786 /* Net device detach from fd. */ 787 static void tun_net_uninit(struct net_device *dev) 788 { 789 tun_detach_all(dev); 790 } 791 792 /* Net device open. */ 793 static int tun_net_open(struct net_device *dev) 794 { 795 netif_tx_start_all_queues(dev); 796 return 0; 797 } 798 799 /* Net device close. */ 800 static int tun_net_close(struct net_device *dev) 801 { 802 netif_tx_stop_all_queues(dev); 803 return 0; 804 } 805 806 /* Net device start xmit */ 807 static netdev_tx_t tun_net_xmit(struct sk_buff *skb, struct net_device *dev) 808 { 809 struct tun_struct *tun = netdev_priv(dev); 810 int txq = skb->queue_mapping; 811 struct tun_file *tfile; 812 u32 numqueues = 0; 813 814 rcu_read_lock(); 815 tfile = rcu_dereference(tun->tfiles[txq]); 816 numqueues = ACCESS_ONCE(tun->numqueues); 817 818 /* Drop packet if interface is not attached */ 819 if (txq >= numqueues) 820 goto drop; 821 822 if (numqueues == 1) { 823 /* Select queue was not called for the skbuff, so we extract the 824 * RPS hash and save it into the flow_table here. 825 */ 826 __u32 rxhash; 827 828 rxhash = skb_get_hash(skb); 829 if (rxhash) { 830 struct tun_flow_entry *e; 831 e = tun_flow_find(&tun->flows[tun_hashfn(rxhash)], 832 rxhash); 833 if (e) 834 tun_flow_save_rps_rxhash(e, rxhash); 835 } 836 } 837 838 tun_debug(KERN_INFO, tun, "tun_net_xmit %d\n", skb->len); 839 840 BUG_ON(!tfile); 841 842 /* Drop if the filter does not like it. 843 * This is a noop if the filter is disabled. 844 * Filter can be enabled only for the TAP devices. */ 845 if (!check_filter(&tun->txflt, skb)) 846 goto drop; 847 848 if (tfile->socket.sk->sk_filter && 849 sk_filter(tfile->socket.sk, skb)) 850 goto drop; 851 852 /* Limit the number of packets queued by dividing txq length with the 853 * number of queues. 854 */ 855 if (skb_queue_len(&tfile->socket.sk->sk_receive_queue) * numqueues 856 >= dev->tx_queue_len) 857 goto drop; 858 859 if (unlikely(skb_orphan_frags(skb, GFP_ATOMIC))) 860 goto drop; 861 862 if (skb->sk && sk_fullsock(skb->sk)) { 863 sock_tx_timestamp(skb->sk, &skb_shinfo(skb)->tx_flags); 864 sw_tx_timestamp(skb); 865 } 866 867 /* Orphan the skb - required as we might hang on to it 868 * for indefinite time. 869 */ 870 skb_orphan(skb); 871 872 nf_reset(skb); 873 874 /* Enqueue packet */ 875 skb_queue_tail(&tfile->socket.sk->sk_receive_queue, skb); 876 877 /* Notify and wake up reader process */ 878 if (tfile->flags & TUN_FASYNC) 879 kill_fasync(&tfile->fasync, SIGIO, POLL_IN); 880 tfile->socket.sk->sk_data_ready(tfile->socket.sk); 881 882 rcu_read_unlock(); 883 return NETDEV_TX_OK; 884 885 drop: 886 dev->stats.tx_dropped++; 887 skb_tx_error(skb); 888 kfree_skb(skb); 889 rcu_read_unlock(); 890 return NET_XMIT_DROP; 891 } 892 893 static void tun_net_mclist(struct net_device *dev) 894 { 895 /* 896 * This callback is supposed to deal with mc filter in 897 * _rx_ path and has nothing to do with the _tx_ path. 898 * In rx path we always accept everything userspace gives us. 899 */ 900 } 901 902 #define MIN_MTU 68 903 #define MAX_MTU 65535 904 905 static int 906 tun_net_change_mtu(struct net_device *dev, int new_mtu) 907 { 908 if (new_mtu < MIN_MTU || new_mtu + dev->hard_header_len > MAX_MTU) 909 return -EINVAL; 910 dev->mtu = new_mtu; 911 return 0; 912 } 913 914 static netdev_features_t tun_net_fix_features(struct net_device *dev, 915 netdev_features_t features) 916 { 917 struct tun_struct *tun = netdev_priv(dev); 918 919 return (features & tun->set_features) | (features & ~TUN_USER_FEATURES); 920 } 921 #ifdef CONFIG_NET_POLL_CONTROLLER 922 static void tun_poll_controller(struct net_device *dev) 923 { 924 /* 925 * Tun only receives frames when: 926 * 1) the char device endpoint gets data from user space 927 * 2) the tun socket gets a sendmsg call from user space 928 * Since both of those are synchronous operations, we are guaranteed 929 * never to have pending data when we poll for it 930 * so there is nothing to do here but return. 931 * We need this though so netpoll recognizes us as an interface that 932 * supports polling, which enables bridge devices in virt setups to 933 * still use netconsole 934 */ 935 return; 936 } 937 #endif 938 939 static void tun_set_headroom(struct net_device *dev, int new_hr) 940 { 941 struct tun_struct *tun = netdev_priv(dev); 942 943 if (new_hr < NET_SKB_PAD) 944 new_hr = NET_SKB_PAD; 945 946 tun->align = new_hr; 947 } 948 949 static const struct net_device_ops tun_netdev_ops = { 950 .ndo_uninit = tun_net_uninit, 951 .ndo_open = tun_net_open, 952 .ndo_stop = tun_net_close, 953 .ndo_start_xmit = tun_net_xmit, 954 .ndo_change_mtu = tun_net_change_mtu, 955 .ndo_fix_features = tun_net_fix_features, 956 .ndo_select_queue = tun_select_queue, 957 #ifdef CONFIG_NET_POLL_CONTROLLER 958 .ndo_poll_controller = tun_poll_controller, 959 #endif 960 .ndo_set_rx_headroom = tun_set_headroom, 961 }; 962 963 static const struct net_device_ops tap_netdev_ops = { 964 .ndo_uninit = tun_net_uninit, 965 .ndo_open = tun_net_open, 966 .ndo_stop = tun_net_close, 967 .ndo_start_xmit = tun_net_xmit, 968 .ndo_change_mtu = tun_net_change_mtu, 969 .ndo_fix_features = tun_net_fix_features, 970 .ndo_set_rx_mode = tun_net_mclist, 971 .ndo_set_mac_address = eth_mac_addr, 972 .ndo_validate_addr = eth_validate_addr, 973 .ndo_select_queue = tun_select_queue, 974 #ifdef CONFIG_NET_POLL_CONTROLLER 975 .ndo_poll_controller = tun_poll_controller, 976 #endif 977 .ndo_features_check = passthru_features_check, 978 .ndo_set_rx_headroom = tun_set_headroom, 979 }; 980 981 static void tun_flow_init(struct tun_struct *tun) 982 { 983 int i; 984 985 for (i = 0; i < TUN_NUM_FLOW_ENTRIES; i++) 986 INIT_HLIST_HEAD(&tun->flows[i]); 987 988 tun->ageing_time = TUN_FLOW_EXPIRE; 989 setup_timer(&tun->flow_gc_timer, tun_flow_cleanup, (unsigned long)tun); 990 mod_timer(&tun->flow_gc_timer, 991 round_jiffies_up(jiffies + tun->ageing_time)); 992 } 993 994 static void tun_flow_uninit(struct tun_struct *tun) 995 { 996 del_timer_sync(&tun->flow_gc_timer); 997 tun_flow_flush(tun); 998 } 999 1000 /* Initialize net device. */ 1001 static void tun_net_init(struct net_device *dev) 1002 { 1003 struct tun_struct *tun = netdev_priv(dev); 1004 1005 switch (tun->flags & TUN_TYPE_MASK) { 1006 case IFF_TUN: 1007 dev->netdev_ops = &tun_netdev_ops; 1008 1009 /* Point-to-Point TUN Device */ 1010 dev->hard_header_len = 0; 1011 dev->addr_len = 0; 1012 dev->mtu = 1500; 1013 1014 /* Zero header length */ 1015 dev->type = ARPHRD_NONE; 1016 dev->flags = IFF_POINTOPOINT | IFF_NOARP | IFF_MULTICAST; 1017 dev->tx_queue_len = TUN_READQ_SIZE; /* We prefer our own queue length */ 1018 break; 1019 1020 case IFF_TAP: 1021 dev->netdev_ops = &tap_netdev_ops; 1022 /* Ethernet TAP Device */ 1023 ether_setup(dev); 1024 dev->priv_flags &= ~IFF_TX_SKB_SHARING; 1025 dev->priv_flags |= IFF_LIVE_ADDR_CHANGE; 1026 1027 eth_hw_addr_random(dev); 1028 1029 dev->tx_queue_len = TUN_READQ_SIZE; /* We prefer our own queue length */ 1030 break; 1031 } 1032 } 1033 1034 /* Character device part */ 1035 1036 /* Poll */ 1037 static unsigned int tun_chr_poll(struct file *file, poll_table *wait) 1038 { 1039 struct tun_file *tfile = file->private_data; 1040 struct tun_struct *tun = __tun_get(tfile); 1041 struct sock *sk; 1042 unsigned int mask = 0; 1043 1044 if (!tun) 1045 return POLLERR; 1046 1047 sk = tfile->socket.sk; 1048 1049 tun_debug(KERN_INFO, tun, "tun_chr_poll\n"); 1050 1051 poll_wait(file, sk_sleep(sk), wait); 1052 1053 if (!skb_queue_empty(&sk->sk_receive_queue)) 1054 mask |= POLLIN | POLLRDNORM; 1055 1056 if (sock_writeable(sk) || 1057 (!test_and_set_bit(SOCKWQ_ASYNC_NOSPACE, &sk->sk_socket->flags) && 1058 sock_writeable(sk))) 1059 mask |= POLLOUT | POLLWRNORM; 1060 1061 if (tun->dev->reg_state != NETREG_REGISTERED) 1062 mask = POLLERR; 1063 1064 tun_put(tun); 1065 return mask; 1066 } 1067 1068 /* prepad is the amount to reserve at front. len is length after that. 1069 * linear is a hint as to how much to copy (usually headers). */ 1070 static struct sk_buff *tun_alloc_skb(struct tun_file *tfile, 1071 size_t prepad, size_t len, 1072 size_t linear, int noblock) 1073 { 1074 struct sock *sk = tfile->socket.sk; 1075 struct sk_buff *skb; 1076 int err; 1077 1078 /* Under a page? Don't bother with paged skb. */ 1079 if (prepad + len < PAGE_SIZE || !linear) 1080 linear = len; 1081 1082 skb = sock_alloc_send_pskb(sk, prepad + linear, len - linear, noblock, 1083 &err, 0); 1084 if (!skb) 1085 return ERR_PTR(err); 1086 1087 skb_reserve(skb, prepad); 1088 skb_put(skb, linear); 1089 skb->data_len = len - linear; 1090 skb->len += len - linear; 1091 1092 return skb; 1093 } 1094 1095 /* Get packet from user space buffer */ 1096 static ssize_t tun_get_user(struct tun_struct *tun, struct tun_file *tfile, 1097 void *msg_control, struct iov_iter *from, 1098 int noblock) 1099 { 1100 struct tun_pi pi = { 0, cpu_to_be16(ETH_P_IP) }; 1101 struct sk_buff *skb; 1102 size_t total_len = iov_iter_count(from); 1103 size_t len = total_len, align = tun->align, linear; 1104 struct virtio_net_hdr gso = { 0 }; 1105 int good_linear; 1106 int copylen; 1107 bool zerocopy = false; 1108 int err; 1109 u32 rxhash; 1110 ssize_t n; 1111 1112 if (!(tun->dev->flags & IFF_UP)) 1113 return -EIO; 1114 1115 if (!(tun->flags & IFF_NO_PI)) { 1116 if (len < sizeof(pi)) 1117 return -EINVAL; 1118 len -= sizeof(pi); 1119 1120 n = copy_from_iter(&pi, sizeof(pi), from); 1121 if (n != sizeof(pi)) 1122 return -EFAULT; 1123 } 1124 1125 if (tun->flags & IFF_VNET_HDR) { 1126 if (len < tun->vnet_hdr_sz) 1127 return -EINVAL; 1128 len -= tun->vnet_hdr_sz; 1129 1130 n = copy_from_iter(&gso, sizeof(gso), from); 1131 if (n != sizeof(gso)) 1132 return -EFAULT; 1133 1134 if ((gso.flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) && 1135 tun16_to_cpu(tun, gso.csum_start) + tun16_to_cpu(tun, gso.csum_offset) + 2 > tun16_to_cpu(tun, gso.hdr_len)) 1136 gso.hdr_len = cpu_to_tun16(tun, tun16_to_cpu(tun, gso.csum_start) + tun16_to_cpu(tun, gso.csum_offset) + 2); 1137 1138 if (tun16_to_cpu(tun, gso.hdr_len) > len) 1139 return -EINVAL; 1140 iov_iter_advance(from, tun->vnet_hdr_sz - sizeof(gso)); 1141 } 1142 1143 if ((tun->flags & TUN_TYPE_MASK) == IFF_TAP) { 1144 align += NET_IP_ALIGN; 1145 if (unlikely(len < ETH_HLEN || 1146 (gso.hdr_len && tun16_to_cpu(tun, gso.hdr_len) < ETH_HLEN))) 1147 return -EINVAL; 1148 } 1149 1150 good_linear = SKB_MAX_HEAD(align); 1151 1152 if (msg_control) { 1153 struct iov_iter i = *from; 1154 1155 /* There are 256 bytes to be copied in skb, so there is 1156 * enough room for skb expand head in case it is used. 1157 * The rest of the buffer is mapped from userspace. 1158 */ 1159 copylen = gso.hdr_len ? tun16_to_cpu(tun, gso.hdr_len) : GOODCOPY_LEN; 1160 if (copylen > good_linear) 1161 copylen = good_linear; 1162 linear = copylen; 1163 iov_iter_advance(&i, copylen); 1164 if (iov_iter_npages(&i, INT_MAX) <= MAX_SKB_FRAGS) 1165 zerocopy = true; 1166 } 1167 1168 if (!zerocopy) { 1169 copylen = len; 1170 if (tun16_to_cpu(tun, gso.hdr_len) > good_linear) 1171 linear = good_linear; 1172 else 1173 linear = tun16_to_cpu(tun, gso.hdr_len); 1174 } 1175 1176 skb = tun_alloc_skb(tfile, align, copylen, linear, noblock); 1177 if (IS_ERR(skb)) { 1178 if (PTR_ERR(skb) != -EAGAIN) 1179 tun->dev->stats.rx_dropped++; 1180 return PTR_ERR(skb); 1181 } 1182 1183 if (zerocopy) 1184 err = zerocopy_sg_from_iter(skb, from); 1185 else { 1186 err = skb_copy_datagram_from_iter(skb, 0, from, len); 1187 if (!err && msg_control) { 1188 struct ubuf_info *uarg = msg_control; 1189 uarg->callback(uarg, false); 1190 } 1191 } 1192 1193 if (err) { 1194 tun->dev->stats.rx_dropped++; 1195 kfree_skb(skb); 1196 return -EFAULT; 1197 } 1198 1199 if (gso.flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) { 1200 if (!skb_partial_csum_set(skb, tun16_to_cpu(tun, gso.csum_start), 1201 tun16_to_cpu(tun, gso.csum_offset))) { 1202 tun->dev->stats.rx_frame_errors++; 1203 kfree_skb(skb); 1204 return -EINVAL; 1205 } 1206 } 1207 1208 switch (tun->flags & TUN_TYPE_MASK) { 1209 case IFF_TUN: 1210 if (tun->flags & IFF_NO_PI) { 1211 switch (skb->data[0] & 0xf0) { 1212 case 0x40: 1213 pi.proto = htons(ETH_P_IP); 1214 break; 1215 case 0x60: 1216 pi.proto = htons(ETH_P_IPV6); 1217 break; 1218 default: 1219 tun->dev->stats.rx_dropped++; 1220 kfree_skb(skb); 1221 return -EINVAL; 1222 } 1223 } 1224 1225 skb_reset_mac_header(skb); 1226 skb->protocol = pi.proto; 1227 skb->dev = tun->dev; 1228 break; 1229 case IFF_TAP: 1230 skb->protocol = eth_type_trans(skb, tun->dev); 1231 break; 1232 } 1233 1234 if (gso.gso_type != VIRTIO_NET_HDR_GSO_NONE) { 1235 pr_debug("GSO!\n"); 1236 switch (gso.gso_type & ~VIRTIO_NET_HDR_GSO_ECN) { 1237 case VIRTIO_NET_HDR_GSO_TCPV4: 1238 skb_shinfo(skb)->gso_type = SKB_GSO_TCPV4; 1239 break; 1240 case VIRTIO_NET_HDR_GSO_TCPV6: 1241 skb_shinfo(skb)->gso_type = SKB_GSO_TCPV6; 1242 break; 1243 case VIRTIO_NET_HDR_GSO_UDP: 1244 skb_shinfo(skb)->gso_type = SKB_GSO_UDP; 1245 break; 1246 default: 1247 tun->dev->stats.rx_frame_errors++; 1248 kfree_skb(skb); 1249 return -EINVAL; 1250 } 1251 1252 if (gso.gso_type & VIRTIO_NET_HDR_GSO_ECN) 1253 skb_shinfo(skb)->gso_type |= SKB_GSO_TCP_ECN; 1254 1255 skb_shinfo(skb)->gso_size = tun16_to_cpu(tun, gso.gso_size); 1256 if (skb_shinfo(skb)->gso_size == 0) { 1257 tun->dev->stats.rx_frame_errors++; 1258 kfree_skb(skb); 1259 return -EINVAL; 1260 } 1261 1262 /* Header must be checked, and gso_segs computed. */ 1263 skb_shinfo(skb)->gso_type |= SKB_GSO_DODGY; 1264 skb_shinfo(skb)->gso_segs = 0; 1265 } 1266 1267 /* copy skb_ubuf_info for callback when skb has no error */ 1268 if (zerocopy) { 1269 skb_shinfo(skb)->destructor_arg = msg_control; 1270 skb_shinfo(skb)->tx_flags |= SKBTX_DEV_ZEROCOPY; 1271 skb_shinfo(skb)->tx_flags |= SKBTX_SHARED_FRAG; 1272 } 1273 1274 skb_reset_network_header(skb); 1275 skb_probe_transport_header(skb, 0); 1276 1277 rxhash = skb_get_hash(skb); 1278 netif_rx_ni(skb); 1279 1280 tun->dev->stats.rx_packets++; 1281 tun->dev->stats.rx_bytes += len; 1282 1283 tun_flow_update(tun, rxhash, tfile); 1284 return total_len; 1285 } 1286 1287 static ssize_t tun_chr_write_iter(struct kiocb *iocb, struct iov_iter *from) 1288 { 1289 struct file *file = iocb->ki_filp; 1290 struct tun_struct *tun = tun_get(file); 1291 struct tun_file *tfile = file->private_data; 1292 ssize_t result; 1293 1294 if (!tun) 1295 return -EBADFD; 1296 1297 result = tun_get_user(tun, tfile, NULL, from, file->f_flags & O_NONBLOCK); 1298 1299 tun_put(tun); 1300 return result; 1301 } 1302 1303 /* Put packet to the user space buffer */ 1304 static ssize_t tun_put_user(struct tun_struct *tun, 1305 struct tun_file *tfile, 1306 struct sk_buff *skb, 1307 struct iov_iter *iter) 1308 { 1309 struct tun_pi pi = { 0, skb->protocol }; 1310 ssize_t total; 1311 int vlan_offset = 0; 1312 int vlan_hlen = 0; 1313 int vnet_hdr_sz = 0; 1314 1315 if (skb_vlan_tag_present(skb)) 1316 vlan_hlen = VLAN_HLEN; 1317 1318 if (tun->flags & IFF_VNET_HDR) 1319 vnet_hdr_sz = tun->vnet_hdr_sz; 1320 1321 total = skb->len + vlan_hlen + vnet_hdr_sz; 1322 1323 if (!(tun->flags & IFF_NO_PI)) { 1324 if (iov_iter_count(iter) < sizeof(pi)) 1325 return -EINVAL; 1326 1327 total += sizeof(pi); 1328 if (iov_iter_count(iter) < total) { 1329 /* Packet will be striped */ 1330 pi.flags |= TUN_PKT_STRIP; 1331 } 1332 1333 if (copy_to_iter(&pi, sizeof(pi), iter) != sizeof(pi)) 1334 return -EFAULT; 1335 } 1336 1337 if (vnet_hdr_sz) { 1338 struct virtio_net_hdr gso = { 0 }; /* no info leak */ 1339 if (iov_iter_count(iter) < vnet_hdr_sz) 1340 return -EINVAL; 1341 1342 if (skb_is_gso(skb)) { 1343 struct skb_shared_info *sinfo = skb_shinfo(skb); 1344 1345 /* This is a hint as to how much should be linear. */ 1346 gso.hdr_len = cpu_to_tun16(tun, skb_headlen(skb)); 1347 gso.gso_size = cpu_to_tun16(tun, sinfo->gso_size); 1348 if (sinfo->gso_type & SKB_GSO_TCPV4) 1349 gso.gso_type = VIRTIO_NET_HDR_GSO_TCPV4; 1350 else if (sinfo->gso_type & SKB_GSO_TCPV6) 1351 gso.gso_type = VIRTIO_NET_HDR_GSO_TCPV6; 1352 else if (sinfo->gso_type & SKB_GSO_UDP) 1353 gso.gso_type = VIRTIO_NET_HDR_GSO_UDP; 1354 else { 1355 pr_err("unexpected GSO type: " 1356 "0x%x, gso_size %d, hdr_len %d\n", 1357 sinfo->gso_type, tun16_to_cpu(tun, gso.gso_size), 1358 tun16_to_cpu(tun, gso.hdr_len)); 1359 print_hex_dump(KERN_ERR, "tun: ", 1360 DUMP_PREFIX_NONE, 1361 16, 1, skb->head, 1362 min((int)tun16_to_cpu(tun, gso.hdr_len), 64), true); 1363 WARN_ON_ONCE(1); 1364 return -EINVAL; 1365 } 1366 if (sinfo->gso_type & SKB_GSO_TCP_ECN) 1367 gso.gso_type |= VIRTIO_NET_HDR_GSO_ECN; 1368 } else 1369 gso.gso_type = VIRTIO_NET_HDR_GSO_NONE; 1370 1371 if (skb->ip_summed == CHECKSUM_PARTIAL) { 1372 gso.flags = VIRTIO_NET_HDR_F_NEEDS_CSUM; 1373 gso.csum_start = cpu_to_tun16(tun, skb_checksum_start_offset(skb) + 1374 vlan_hlen); 1375 gso.csum_offset = cpu_to_tun16(tun, skb->csum_offset); 1376 } else if (skb->ip_summed == CHECKSUM_UNNECESSARY) { 1377 gso.flags = VIRTIO_NET_HDR_F_DATA_VALID; 1378 } /* else everything is zero */ 1379 1380 if (copy_to_iter(&gso, sizeof(gso), iter) != sizeof(gso)) 1381 return -EFAULT; 1382 1383 iov_iter_advance(iter, vnet_hdr_sz - sizeof(gso)); 1384 } 1385 1386 if (vlan_hlen) { 1387 int ret; 1388 struct { 1389 __be16 h_vlan_proto; 1390 __be16 h_vlan_TCI; 1391 } veth; 1392 1393 veth.h_vlan_proto = skb->vlan_proto; 1394 veth.h_vlan_TCI = htons(skb_vlan_tag_get(skb)); 1395 1396 vlan_offset = offsetof(struct vlan_ethhdr, h_vlan_proto); 1397 1398 ret = skb_copy_datagram_iter(skb, 0, iter, vlan_offset); 1399 if (ret || !iov_iter_count(iter)) 1400 goto done; 1401 1402 ret = copy_to_iter(&veth, sizeof(veth), iter); 1403 if (ret != sizeof(veth) || !iov_iter_count(iter)) 1404 goto done; 1405 } 1406 1407 skb_copy_datagram_iter(skb, vlan_offset, iter, skb->len - vlan_offset); 1408 1409 done: 1410 tun->dev->stats.tx_packets++; 1411 tun->dev->stats.tx_bytes += skb->len + vlan_hlen; 1412 1413 return total; 1414 } 1415 1416 static ssize_t tun_do_read(struct tun_struct *tun, struct tun_file *tfile, 1417 struct iov_iter *to, 1418 int noblock) 1419 { 1420 struct sk_buff *skb; 1421 ssize_t ret; 1422 int peeked, err, off = 0; 1423 1424 tun_debug(KERN_INFO, tun, "tun_do_read\n"); 1425 1426 if (!iov_iter_count(to)) 1427 return 0; 1428 1429 if (tun->dev->reg_state != NETREG_REGISTERED) 1430 return -EIO; 1431 1432 /* Read frames from queue */ 1433 skb = __skb_recv_datagram(tfile->socket.sk, noblock ? MSG_DONTWAIT : 0, 1434 &peeked, &off, &err); 1435 if (!skb) 1436 return err; 1437 1438 ret = tun_put_user(tun, tfile, skb, to); 1439 if (unlikely(ret < 0)) 1440 kfree_skb(skb); 1441 else 1442 consume_skb(skb); 1443 1444 return ret; 1445 } 1446 1447 static ssize_t tun_chr_read_iter(struct kiocb *iocb, struct iov_iter *to) 1448 { 1449 struct file *file = iocb->ki_filp; 1450 struct tun_file *tfile = file->private_data; 1451 struct tun_struct *tun = __tun_get(tfile); 1452 ssize_t len = iov_iter_count(to), ret; 1453 1454 if (!tun) 1455 return -EBADFD; 1456 ret = tun_do_read(tun, tfile, to, file->f_flags & O_NONBLOCK); 1457 ret = min_t(ssize_t, ret, len); 1458 if (ret > 0) 1459 iocb->ki_pos = ret; 1460 tun_put(tun); 1461 return ret; 1462 } 1463 1464 static void tun_free_netdev(struct net_device *dev) 1465 { 1466 struct tun_struct *tun = netdev_priv(dev); 1467 1468 BUG_ON(!(list_empty(&tun->disabled))); 1469 tun_flow_uninit(tun); 1470 security_tun_dev_free_security(tun->security); 1471 free_netdev(dev); 1472 } 1473 1474 static void tun_setup(struct net_device *dev) 1475 { 1476 struct tun_struct *tun = netdev_priv(dev); 1477 1478 tun->owner = INVALID_UID; 1479 tun->group = INVALID_GID; 1480 1481 dev->ethtool_ops = &tun_ethtool_ops; 1482 dev->destructor = tun_free_netdev; 1483 } 1484 1485 /* Trivial set of netlink ops to allow deleting tun or tap 1486 * device with netlink. 1487 */ 1488 static int tun_validate(struct nlattr *tb[], struct nlattr *data[]) 1489 { 1490 return -EINVAL; 1491 } 1492 1493 static struct rtnl_link_ops tun_link_ops __read_mostly = { 1494 .kind = DRV_NAME, 1495 .priv_size = sizeof(struct tun_struct), 1496 .setup = tun_setup, 1497 .validate = tun_validate, 1498 }; 1499 1500 static void tun_sock_write_space(struct sock *sk) 1501 { 1502 struct tun_file *tfile; 1503 wait_queue_head_t *wqueue; 1504 1505 if (!sock_writeable(sk)) 1506 return; 1507 1508 if (!test_and_clear_bit(SOCKWQ_ASYNC_NOSPACE, &sk->sk_socket->flags)) 1509 return; 1510 1511 wqueue = sk_sleep(sk); 1512 if (wqueue && waitqueue_active(wqueue)) 1513 wake_up_interruptible_sync_poll(wqueue, POLLOUT | 1514 POLLWRNORM | POLLWRBAND); 1515 1516 tfile = container_of(sk, struct tun_file, sk); 1517 kill_fasync(&tfile->fasync, SIGIO, POLL_OUT); 1518 } 1519 1520 static int tun_sendmsg(struct socket *sock, struct msghdr *m, size_t total_len) 1521 { 1522 int ret; 1523 struct tun_file *tfile = container_of(sock, struct tun_file, socket); 1524 struct tun_struct *tun = __tun_get(tfile); 1525 1526 if (!tun) 1527 return -EBADFD; 1528 1529 ret = tun_get_user(tun, tfile, m->msg_control, &m->msg_iter, 1530 m->msg_flags & MSG_DONTWAIT); 1531 tun_put(tun); 1532 return ret; 1533 } 1534 1535 static int tun_recvmsg(struct socket *sock, struct msghdr *m, size_t total_len, 1536 int flags) 1537 { 1538 struct tun_file *tfile = container_of(sock, struct tun_file, socket); 1539 struct tun_struct *tun = __tun_get(tfile); 1540 int ret; 1541 1542 if (!tun) 1543 return -EBADFD; 1544 1545 if (flags & ~(MSG_DONTWAIT|MSG_TRUNC|MSG_ERRQUEUE)) { 1546 ret = -EINVAL; 1547 goto out; 1548 } 1549 if (flags & MSG_ERRQUEUE) { 1550 ret = sock_recv_errqueue(sock->sk, m, total_len, 1551 SOL_PACKET, TUN_TX_TIMESTAMP); 1552 goto out; 1553 } 1554 ret = tun_do_read(tun, tfile, &m->msg_iter, flags & MSG_DONTWAIT); 1555 if (ret > (ssize_t)total_len) { 1556 m->msg_flags |= MSG_TRUNC; 1557 ret = flags & MSG_TRUNC ? ret : total_len; 1558 } 1559 out: 1560 tun_put(tun); 1561 return ret; 1562 } 1563 1564 /* Ops structure to mimic raw sockets with tun */ 1565 static const struct proto_ops tun_socket_ops = { 1566 .sendmsg = tun_sendmsg, 1567 .recvmsg = tun_recvmsg, 1568 }; 1569 1570 static struct proto tun_proto = { 1571 .name = "tun", 1572 .owner = THIS_MODULE, 1573 .obj_size = sizeof(struct tun_file), 1574 }; 1575 1576 static int tun_flags(struct tun_struct *tun) 1577 { 1578 return tun->flags & (TUN_FEATURES | IFF_PERSIST | IFF_TUN | IFF_TAP); 1579 } 1580 1581 static ssize_t tun_show_flags(struct device *dev, struct device_attribute *attr, 1582 char *buf) 1583 { 1584 struct tun_struct *tun = netdev_priv(to_net_dev(dev)); 1585 return sprintf(buf, "0x%x\n", tun_flags(tun)); 1586 } 1587 1588 static ssize_t tun_show_owner(struct device *dev, struct device_attribute *attr, 1589 char *buf) 1590 { 1591 struct tun_struct *tun = netdev_priv(to_net_dev(dev)); 1592 return uid_valid(tun->owner)? 1593 sprintf(buf, "%u\n", 1594 from_kuid_munged(current_user_ns(), tun->owner)): 1595 sprintf(buf, "-1\n"); 1596 } 1597 1598 static ssize_t tun_show_group(struct device *dev, struct device_attribute *attr, 1599 char *buf) 1600 { 1601 struct tun_struct *tun = netdev_priv(to_net_dev(dev)); 1602 return gid_valid(tun->group) ? 1603 sprintf(buf, "%u\n", 1604 from_kgid_munged(current_user_ns(), tun->group)): 1605 sprintf(buf, "-1\n"); 1606 } 1607 1608 static DEVICE_ATTR(tun_flags, 0444, tun_show_flags, NULL); 1609 static DEVICE_ATTR(owner, 0444, tun_show_owner, NULL); 1610 static DEVICE_ATTR(group, 0444, tun_show_group, NULL); 1611 1612 static struct attribute *tun_dev_attrs[] = { 1613 &dev_attr_tun_flags.attr, 1614 &dev_attr_owner.attr, 1615 &dev_attr_group.attr, 1616 NULL 1617 }; 1618 1619 static const struct attribute_group tun_attr_group = { 1620 .attrs = tun_dev_attrs 1621 }; 1622 1623 static int tun_set_iff(struct net *net, struct file *file, struct ifreq *ifr) 1624 { 1625 struct tun_struct *tun; 1626 struct tun_file *tfile = file->private_data; 1627 struct net_device *dev; 1628 int err; 1629 1630 if (tfile->detached) 1631 return -EINVAL; 1632 1633 dev = __dev_get_by_name(net, ifr->ifr_name); 1634 if (dev) { 1635 if (ifr->ifr_flags & IFF_TUN_EXCL) 1636 return -EBUSY; 1637 if ((ifr->ifr_flags & IFF_TUN) && dev->netdev_ops == &tun_netdev_ops) 1638 tun = netdev_priv(dev); 1639 else if ((ifr->ifr_flags & IFF_TAP) && dev->netdev_ops == &tap_netdev_ops) 1640 tun = netdev_priv(dev); 1641 else 1642 return -EINVAL; 1643 1644 if (!!(ifr->ifr_flags & IFF_MULTI_QUEUE) != 1645 !!(tun->flags & IFF_MULTI_QUEUE)) 1646 return -EINVAL; 1647 1648 if (tun_not_capable(tun)) 1649 return -EPERM; 1650 err = security_tun_dev_open(tun->security); 1651 if (err < 0) 1652 return err; 1653 1654 err = tun_attach(tun, file, ifr->ifr_flags & IFF_NOFILTER); 1655 if (err < 0) 1656 return err; 1657 1658 if (tun->flags & IFF_MULTI_QUEUE && 1659 (tun->numqueues + tun->numdisabled > 1)) { 1660 /* One or more queue has already been attached, no need 1661 * to initialize the device again. 1662 */ 1663 return 0; 1664 } 1665 } 1666 else { 1667 char *name; 1668 unsigned long flags = 0; 1669 int queues = ifr->ifr_flags & IFF_MULTI_QUEUE ? 1670 MAX_TAP_QUEUES : 1; 1671 1672 if (!ns_capable(net->user_ns, CAP_NET_ADMIN)) 1673 return -EPERM; 1674 err = security_tun_dev_create(); 1675 if (err < 0) 1676 return err; 1677 1678 /* Set dev type */ 1679 if (ifr->ifr_flags & IFF_TUN) { 1680 /* TUN device */ 1681 flags |= IFF_TUN; 1682 name = "tun%d"; 1683 } else if (ifr->ifr_flags & IFF_TAP) { 1684 /* TAP device */ 1685 flags |= IFF_TAP; 1686 name = "tap%d"; 1687 } else 1688 return -EINVAL; 1689 1690 if (*ifr->ifr_name) 1691 name = ifr->ifr_name; 1692 1693 dev = alloc_netdev_mqs(sizeof(struct tun_struct), name, 1694 NET_NAME_UNKNOWN, tun_setup, queues, 1695 queues); 1696 1697 if (!dev) 1698 return -ENOMEM; 1699 1700 dev_net_set(dev, net); 1701 dev->rtnl_link_ops = &tun_link_ops; 1702 dev->ifindex = tfile->ifindex; 1703 dev->sysfs_groups[0] = &tun_attr_group; 1704 1705 tun = netdev_priv(dev); 1706 tun->dev = dev; 1707 tun->flags = flags; 1708 tun->txflt.count = 0; 1709 tun->vnet_hdr_sz = sizeof(struct virtio_net_hdr); 1710 1711 tun->align = NET_SKB_PAD; 1712 tun->filter_attached = false; 1713 tun->sndbuf = tfile->socket.sk->sk_sndbuf; 1714 1715 spin_lock_init(&tun->lock); 1716 1717 err = security_tun_dev_alloc_security(&tun->security); 1718 if (err < 0) 1719 goto err_free_dev; 1720 1721 tun_net_init(dev); 1722 tun_flow_init(tun); 1723 1724 dev->hw_features = NETIF_F_SG | NETIF_F_FRAGLIST | 1725 TUN_USER_FEATURES | NETIF_F_HW_VLAN_CTAG_TX | 1726 NETIF_F_HW_VLAN_STAG_TX; 1727 dev->features = dev->hw_features; 1728 dev->vlan_features = dev->features & 1729 ~(NETIF_F_HW_VLAN_CTAG_TX | 1730 NETIF_F_HW_VLAN_STAG_TX); 1731 1732 INIT_LIST_HEAD(&tun->disabled); 1733 err = tun_attach(tun, file, false); 1734 if (err < 0) 1735 goto err_free_flow; 1736 1737 err = register_netdevice(tun->dev); 1738 if (err < 0) 1739 goto err_detach; 1740 } 1741 1742 netif_carrier_on(tun->dev); 1743 1744 tun_debug(KERN_INFO, tun, "tun_set_iff\n"); 1745 1746 tun->flags = (tun->flags & ~TUN_FEATURES) | 1747 (ifr->ifr_flags & TUN_FEATURES); 1748 1749 /* Make sure persistent devices do not get stuck in 1750 * xoff state. 1751 */ 1752 if (netif_running(tun->dev)) 1753 netif_tx_wake_all_queues(tun->dev); 1754 1755 strcpy(ifr->ifr_name, tun->dev->name); 1756 return 0; 1757 1758 err_detach: 1759 tun_detach_all(dev); 1760 err_free_flow: 1761 tun_flow_uninit(tun); 1762 security_tun_dev_free_security(tun->security); 1763 err_free_dev: 1764 free_netdev(dev); 1765 return err; 1766 } 1767 1768 static void tun_get_iff(struct net *net, struct tun_struct *tun, 1769 struct ifreq *ifr) 1770 { 1771 tun_debug(KERN_INFO, tun, "tun_get_iff\n"); 1772 1773 strcpy(ifr->ifr_name, tun->dev->name); 1774 1775 ifr->ifr_flags = tun_flags(tun); 1776 1777 } 1778 1779 /* This is like a cut-down ethtool ops, except done via tun fd so no 1780 * privs required. */ 1781 static int set_offload(struct tun_struct *tun, unsigned long arg) 1782 { 1783 netdev_features_t features = 0; 1784 1785 if (arg & TUN_F_CSUM) { 1786 features |= NETIF_F_HW_CSUM; 1787 arg &= ~TUN_F_CSUM; 1788 1789 if (arg & (TUN_F_TSO4|TUN_F_TSO6)) { 1790 if (arg & TUN_F_TSO_ECN) { 1791 features |= NETIF_F_TSO_ECN; 1792 arg &= ~TUN_F_TSO_ECN; 1793 } 1794 if (arg & TUN_F_TSO4) 1795 features |= NETIF_F_TSO; 1796 if (arg & TUN_F_TSO6) 1797 features |= NETIF_F_TSO6; 1798 arg &= ~(TUN_F_TSO4|TUN_F_TSO6); 1799 } 1800 1801 if (arg & TUN_F_UFO) { 1802 features |= NETIF_F_UFO; 1803 arg &= ~TUN_F_UFO; 1804 } 1805 } 1806 1807 /* This gives the user a way to test for new features in future by 1808 * trying to set them. */ 1809 if (arg) 1810 return -EINVAL; 1811 1812 tun->set_features = features; 1813 netdev_update_features(tun->dev); 1814 1815 return 0; 1816 } 1817 1818 static void tun_detach_filter(struct tun_struct *tun, int n) 1819 { 1820 int i; 1821 struct tun_file *tfile; 1822 1823 for (i = 0; i < n; i++) { 1824 tfile = rtnl_dereference(tun->tfiles[i]); 1825 sk_detach_filter(tfile->socket.sk); 1826 } 1827 1828 tun->filter_attached = false; 1829 } 1830 1831 static int tun_attach_filter(struct tun_struct *tun) 1832 { 1833 int i, ret = 0; 1834 struct tun_file *tfile; 1835 1836 for (i = 0; i < tun->numqueues; i++) { 1837 tfile = rtnl_dereference(tun->tfiles[i]); 1838 ret = sk_attach_filter(&tun->fprog, tfile->socket.sk); 1839 if (ret) { 1840 tun_detach_filter(tun, i); 1841 return ret; 1842 } 1843 } 1844 1845 tun->filter_attached = true; 1846 return ret; 1847 } 1848 1849 static void tun_set_sndbuf(struct tun_struct *tun) 1850 { 1851 struct tun_file *tfile; 1852 int i; 1853 1854 for (i = 0; i < tun->numqueues; i++) { 1855 tfile = rtnl_dereference(tun->tfiles[i]); 1856 tfile->socket.sk->sk_sndbuf = tun->sndbuf; 1857 } 1858 } 1859 1860 static int tun_set_queue(struct file *file, struct ifreq *ifr) 1861 { 1862 struct tun_file *tfile = file->private_data; 1863 struct tun_struct *tun; 1864 int ret = 0; 1865 1866 rtnl_lock(); 1867 1868 if (ifr->ifr_flags & IFF_ATTACH_QUEUE) { 1869 tun = tfile->detached; 1870 if (!tun) { 1871 ret = -EINVAL; 1872 goto unlock; 1873 } 1874 ret = security_tun_dev_attach_queue(tun->security); 1875 if (ret < 0) 1876 goto unlock; 1877 ret = tun_attach(tun, file, false); 1878 } else if (ifr->ifr_flags & IFF_DETACH_QUEUE) { 1879 tun = rtnl_dereference(tfile->tun); 1880 if (!tun || !(tun->flags & IFF_MULTI_QUEUE) || tfile->detached) 1881 ret = -EINVAL; 1882 else 1883 __tun_detach(tfile, false); 1884 } else 1885 ret = -EINVAL; 1886 1887 unlock: 1888 rtnl_unlock(); 1889 return ret; 1890 } 1891 1892 static long __tun_chr_ioctl(struct file *file, unsigned int cmd, 1893 unsigned long arg, int ifreq_len) 1894 { 1895 struct tun_file *tfile = file->private_data; 1896 struct tun_struct *tun; 1897 void __user* argp = (void __user*)arg; 1898 struct ifreq ifr; 1899 kuid_t owner; 1900 kgid_t group; 1901 int sndbuf; 1902 int vnet_hdr_sz; 1903 unsigned int ifindex; 1904 int le; 1905 int ret; 1906 1907 if (cmd == TUNSETIFF || cmd == TUNSETQUEUE || _IOC_TYPE(cmd) == 0x89) { 1908 if (copy_from_user(&ifr, argp, ifreq_len)) 1909 return -EFAULT; 1910 } else { 1911 memset(&ifr, 0, sizeof(ifr)); 1912 } 1913 if (cmd == TUNGETFEATURES) { 1914 /* Currently this just means: "what IFF flags are valid?". 1915 * This is needed because we never checked for invalid flags on 1916 * TUNSETIFF. 1917 */ 1918 return put_user(IFF_TUN | IFF_TAP | TUN_FEATURES, 1919 (unsigned int __user*)argp); 1920 } else if (cmd == TUNSETQUEUE) 1921 return tun_set_queue(file, &ifr); 1922 1923 ret = 0; 1924 rtnl_lock(); 1925 1926 tun = __tun_get(tfile); 1927 if (cmd == TUNSETIFF && !tun) { 1928 ifr.ifr_name[IFNAMSIZ-1] = '\0'; 1929 1930 ret = tun_set_iff(sock_net(&tfile->sk), file, &ifr); 1931 1932 if (ret) 1933 goto unlock; 1934 1935 if (copy_to_user(argp, &ifr, ifreq_len)) 1936 ret = -EFAULT; 1937 goto unlock; 1938 } 1939 if (cmd == TUNSETIFINDEX) { 1940 ret = -EPERM; 1941 if (tun) 1942 goto unlock; 1943 1944 ret = -EFAULT; 1945 if (copy_from_user(&ifindex, argp, sizeof(ifindex))) 1946 goto unlock; 1947 1948 ret = 0; 1949 tfile->ifindex = ifindex; 1950 goto unlock; 1951 } 1952 1953 ret = -EBADFD; 1954 if (!tun) 1955 goto unlock; 1956 1957 tun_debug(KERN_INFO, tun, "tun_chr_ioctl cmd %u\n", cmd); 1958 1959 ret = 0; 1960 switch (cmd) { 1961 case TUNGETIFF: 1962 tun_get_iff(current->nsproxy->net_ns, tun, &ifr); 1963 1964 if (tfile->detached) 1965 ifr.ifr_flags |= IFF_DETACH_QUEUE; 1966 if (!tfile->socket.sk->sk_filter) 1967 ifr.ifr_flags |= IFF_NOFILTER; 1968 1969 if (copy_to_user(argp, &ifr, ifreq_len)) 1970 ret = -EFAULT; 1971 break; 1972 1973 case TUNSETNOCSUM: 1974 /* Disable/Enable checksum */ 1975 1976 /* [unimplemented] */ 1977 tun_debug(KERN_INFO, tun, "ignored: set checksum %s\n", 1978 arg ? "disabled" : "enabled"); 1979 break; 1980 1981 case TUNSETPERSIST: 1982 /* Disable/Enable persist mode. Keep an extra reference to the 1983 * module to prevent the module being unprobed. 1984 */ 1985 if (arg && !(tun->flags & IFF_PERSIST)) { 1986 tun->flags |= IFF_PERSIST; 1987 __module_get(THIS_MODULE); 1988 } 1989 if (!arg && (tun->flags & IFF_PERSIST)) { 1990 tun->flags &= ~IFF_PERSIST; 1991 module_put(THIS_MODULE); 1992 } 1993 1994 tun_debug(KERN_INFO, tun, "persist %s\n", 1995 arg ? "enabled" : "disabled"); 1996 break; 1997 1998 case TUNSETOWNER: 1999 /* Set owner of the device */ 2000 owner = make_kuid(current_user_ns(), arg); 2001 if (!uid_valid(owner)) { 2002 ret = -EINVAL; 2003 break; 2004 } 2005 tun->owner = owner; 2006 tun_debug(KERN_INFO, tun, "owner set to %u\n", 2007 from_kuid(&init_user_ns, tun->owner)); 2008 break; 2009 2010 case TUNSETGROUP: 2011 /* Set group of the device */ 2012 group = make_kgid(current_user_ns(), arg); 2013 if (!gid_valid(group)) { 2014 ret = -EINVAL; 2015 break; 2016 } 2017 tun->group = group; 2018 tun_debug(KERN_INFO, tun, "group set to %u\n", 2019 from_kgid(&init_user_ns, tun->group)); 2020 break; 2021 2022 case TUNSETLINK: 2023 /* Only allow setting the type when the interface is down */ 2024 if (tun->dev->flags & IFF_UP) { 2025 tun_debug(KERN_INFO, tun, 2026 "Linktype set failed because interface is up\n"); 2027 ret = -EBUSY; 2028 } else { 2029 tun->dev->type = (int) arg; 2030 tun_debug(KERN_INFO, tun, "linktype set to %d\n", 2031 tun->dev->type); 2032 ret = 0; 2033 } 2034 break; 2035 2036 #ifdef TUN_DEBUG 2037 case TUNSETDEBUG: 2038 tun->debug = arg; 2039 break; 2040 #endif 2041 case TUNSETOFFLOAD: 2042 ret = set_offload(tun, arg); 2043 break; 2044 2045 case TUNSETTXFILTER: 2046 /* Can be set only for TAPs */ 2047 ret = -EINVAL; 2048 if ((tun->flags & TUN_TYPE_MASK) != IFF_TAP) 2049 break; 2050 ret = update_filter(&tun->txflt, (void __user *)arg); 2051 break; 2052 2053 case SIOCGIFHWADDR: 2054 /* Get hw address */ 2055 memcpy(ifr.ifr_hwaddr.sa_data, tun->dev->dev_addr, ETH_ALEN); 2056 ifr.ifr_hwaddr.sa_family = tun->dev->type; 2057 if (copy_to_user(argp, &ifr, ifreq_len)) 2058 ret = -EFAULT; 2059 break; 2060 2061 case SIOCSIFHWADDR: 2062 /* Set hw address */ 2063 tun_debug(KERN_DEBUG, tun, "set hw address: %pM\n", 2064 ifr.ifr_hwaddr.sa_data); 2065 2066 ret = dev_set_mac_address(tun->dev, &ifr.ifr_hwaddr); 2067 break; 2068 2069 case TUNGETSNDBUF: 2070 sndbuf = tfile->socket.sk->sk_sndbuf; 2071 if (copy_to_user(argp, &sndbuf, sizeof(sndbuf))) 2072 ret = -EFAULT; 2073 break; 2074 2075 case TUNSETSNDBUF: 2076 if (copy_from_user(&sndbuf, argp, sizeof(sndbuf))) { 2077 ret = -EFAULT; 2078 break; 2079 } 2080 2081 tun->sndbuf = sndbuf; 2082 tun_set_sndbuf(tun); 2083 break; 2084 2085 case TUNGETVNETHDRSZ: 2086 vnet_hdr_sz = tun->vnet_hdr_sz; 2087 if (copy_to_user(argp, &vnet_hdr_sz, sizeof(vnet_hdr_sz))) 2088 ret = -EFAULT; 2089 break; 2090 2091 case TUNSETVNETHDRSZ: 2092 if (copy_from_user(&vnet_hdr_sz, argp, sizeof(vnet_hdr_sz))) { 2093 ret = -EFAULT; 2094 break; 2095 } 2096 if (vnet_hdr_sz < (int)sizeof(struct virtio_net_hdr)) { 2097 ret = -EINVAL; 2098 break; 2099 } 2100 2101 tun->vnet_hdr_sz = vnet_hdr_sz; 2102 break; 2103 2104 case TUNGETVNETLE: 2105 le = !!(tun->flags & TUN_VNET_LE); 2106 if (put_user(le, (int __user *)argp)) 2107 ret = -EFAULT; 2108 break; 2109 2110 case TUNSETVNETLE: 2111 if (get_user(le, (int __user *)argp)) { 2112 ret = -EFAULT; 2113 break; 2114 } 2115 if (le) 2116 tun->flags |= TUN_VNET_LE; 2117 else 2118 tun->flags &= ~TUN_VNET_LE; 2119 break; 2120 2121 case TUNGETVNETBE: 2122 ret = tun_get_vnet_be(tun, argp); 2123 break; 2124 2125 case TUNSETVNETBE: 2126 ret = tun_set_vnet_be(tun, argp); 2127 break; 2128 2129 case TUNATTACHFILTER: 2130 /* Can be set only for TAPs */ 2131 ret = -EINVAL; 2132 if ((tun->flags & TUN_TYPE_MASK) != IFF_TAP) 2133 break; 2134 ret = -EFAULT; 2135 if (copy_from_user(&tun->fprog, argp, sizeof(tun->fprog))) 2136 break; 2137 2138 ret = tun_attach_filter(tun); 2139 break; 2140 2141 case TUNDETACHFILTER: 2142 /* Can be set only for TAPs */ 2143 ret = -EINVAL; 2144 if ((tun->flags & TUN_TYPE_MASK) != IFF_TAP) 2145 break; 2146 ret = 0; 2147 tun_detach_filter(tun, tun->numqueues); 2148 break; 2149 2150 case TUNGETFILTER: 2151 ret = -EINVAL; 2152 if ((tun->flags & TUN_TYPE_MASK) != IFF_TAP) 2153 break; 2154 ret = -EFAULT; 2155 if (copy_to_user(argp, &tun->fprog, sizeof(tun->fprog))) 2156 break; 2157 ret = 0; 2158 break; 2159 2160 default: 2161 ret = -EINVAL; 2162 break; 2163 } 2164 2165 unlock: 2166 rtnl_unlock(); 2167 if (tun) 2168 tun_put(tun); 2169 return ret; 2170 } 2171 2172 static long tun_chr_ioctl(struct file *file, 2173 unsigned int cmd, unsigned long arg) 2174 { 2175 return __tun_chr_ioctl(file, cmd, arg, sizeof (struct ifreq)); 2176 } 2177 2178 #ifdef CONFIG_COMPAT 2179 static long tun_chr_compat_ioctl(struct file *file, 2180 unsigned int cmd, unsigned long arg) 2181 { 2182 switch (cmd) { 2183 case TUNSETIFF: 2184 case TUNGETIFF: 2185 case TUNSETTXFILTER: 2186 case TUNGETSNDBUF: 2187 case TUNSETSNDBUF: 2188 case SIOCGIFHWADDR: 2189 case SIOCSIFHWADDR: 2190 arg = (unsigned long)compat_ptr(arg); 2191 break; 2192 default: 2193 arg = (compat_ulong_t)arg; 2194 break; 2195 } 2196 2197 /* 2198 * compat_ifreq is shorter than ifreq, so we must not access beyond 2199 * the end of that structure. All fields that are used in this 2200 * driver are compatible though, we don't need to convert the 2201 * contents. 2202 */ 2203 return __tun_chr_ioctl(file, cmd, arg, sizeof(struct compat_ifreq)); 2204 } 2205 #endif /* CONFIG_COMPAT */ 2206 2207 static int tun_chr_fasync(int fd, struct file *file, int on) 2208 { 2209 struct tun_file *tfile = file->private_data; 2210 int ret; 2211 2212 if ((ret = fasync_helper(fd, file, on, &tfile->fasync)) < 0) 2213 goto out; 2214 2215 if (on) { 2216 __f_setown(file, task_pid(current), PIDTYPE_PID, 0); 2217 tfile->flags |= TUN_FASYNC; 2218 } else 2219 tfile->flags &= ~TUN_FASYNC; 2220 ret = 0; 2221 out: 2222 return ret; 2223 } 2224 2225 static int tun_chr_open(struct inode *inode, struct file * file) 2226 { 2227 struct net *net = current->nsproxy->net_ns; 2228 struct tun_file *tfile; 2229 2230 DBG1(KERN_INFO, "tunX: tun_chr_open\n"); 2231 2232 tfile = (struct tun_file *)sk_alloc(net, AF_UNSPEC, GFP_KERNEL, 2233 &tun_proto, 0); 2234 if (!tfile) 2235 return -ENOMEM; 2236 RCU_INIT_POINTER(tfile->tun, NULL); 2237 tfile->flags = 0; 2238 tfile->ifindex = 0; 2239 2240 init_waitqueue_head(&tfile->wq.wait); 2241 RCU_INIT_POINTER(tfile->socket.wq, &tfile->wq); 2242 2243 tfile->socket.file = file; 2244 tfile->socket.ops = &tun_socket_ops; 2245 2246 sock_init_data(&tfile->socket, &tfile->sk); 2247 2248 tfile->sk.sk_write_space = tun_sock_write_space; 2249 tfile->sk.sk_sndbuf = INT_MAX; 2250 2251 file->private_data = tfile; 2252 INIT_LIST_HEAD(&tfile->next); 2253 2254 sock_set_flag(&tfile->sk, SOCK_ZEROCOPY); 2255 2256 return 0; 2257 } 2258 2259 static int tun_chr_close(struct inode *inode, struct file *file) 2260 { 2261 struct tun_file *tfile = file->private_data; 2262 2263 tun_detach(tfile, true); 2264 2265 return 0; 2266 } 2267 2268 #ifdef CONFIG_PROC_FS 2269 static void tun_chr_show_fdinfo(struct seq_file *m, struct file *f) 2270 { 2271 struct tun_struct *tun; 2272 struct ifreq ifr; 2273 2274 memset(&ifr, 0, sizeof(ifr)); 2275 2276 rtnl_lock(); 2277 tun = tun_get(f); 2278 if (tun) 2279 tun_get_iff(current->nsproxy->net_ns, tun, &ifr); 2280 rtnl_unlock(); 2281 2282 if (tun) 2283 tun_put(tun); 2284 2285 seq_printf(m, "iff:\t%s\n", ifr.ifr_name); 2286 } 2287 #endif 2288 2289 static const struct file_operations tun_fops = { 2290 .owner = THIS_MODULE, 2291 .llseek = no_llseek, 2292 .read_iter = tun_chr_read_iter, 2293 .write_iter = tun_chr_write_iter, 2294 .poll = tun_chr_poll, 2295 .unlocked_ioctl = tun_chr_ioctl, 2296 #ifdef CONFIG_COMPAT 2297 .compat_ioctl = tun_chr_compat_ioctl, 2298 #endif 2299 .open = tun_chr_open, 2300 .release = tun_chr_close, 2301 .fasync = tun_chr_fasync, 2302 #ifdef CONFIG_PROC_FS 2303 .show_fdinfo = tun_chr_show_fdinfo, 2304 #endif 2305 }; 2306 2307 static struct miscdevice tun_miscdev = { 2308 .minor = TUN_MINOR, 2309 .name = "tun", 2310 .nodename = "net/tun", 2311 .fops = &tun_fops, 2312 }; 2313 2314 /* ethtool interface */ 2315 2316 static int tun_get_settings(struct net_device *dev, struct ethtool_cmd *cmd) 2317 { 2318 cmd->supported = 0; 2319 cmd->advertising = 0; 2320 ethtool_cmd_speed_set(cmd, SPEED_10); 2321 cmd->duplex = DUPLEX_FULL; 2322 cmd->port = PORT_TP; 2323 cmd->phy_address = 0; 2324 cmd->transceiver = XCVR_INTERNAL; 2325 cmd->autoneg = AUTONEG_DISABLE; 2326 cmd->maxtxpkt = 0; 2327 cmd->maxrxpkt = 0; 2328 return 0; 2329 } 2330 2331 static void tun_get_drvinfo(struct net_device *dev, struct ethtool_drvinfo *info) 2332 { 2333 struct tun_struct *tun = netdev_priv(dev); 2334 2335 strlcpy(info->driver, DRV_NAME, sizeof(info->driver)); 2336 strlcpy(info->version, DRV_VERSION, sizeof(info->version)); 2337 2338 switch (tun->flags & TUN_TYPE_MASK) { 2339 case IFF_TUN: 2340 strlcpy(info->bus_info, "tun", sizeof(info->bus_info)); 2341 break; 2342 case IFF_TAP: 2343 strlcpy(info->bus_info, "tap", sizeof(info->bus_info)); 2344 break; 2345 } 2346 } 2347 2348 static u32 tun_get_msglevel(struct net_device *dev) 2349 { 2350 #ifdef TUN_DEBUG 2351 struct tun_struct *tun = netdev_priv(dev); 2352 return tun->debug; 2353 #else 2354 return -EOPNOTSUPP; 2355 #endif 2356 } 2357 2358 static void tun_set_msglevel(struct net_device *dev, u32 value) 2359 { 2360 #ifdef TUN_DEBUG 2361 struct tun_struct *tun = netdev_priv(dev); 2362 tun->debug = value; 2363 #endif 2364 } 2365 2366 static const struct ethtool_ops tun_ethtool_ops = { 2367 .get_settings = tun_get_settings, 2368 .get_drvinfo = tun_get_drvinfo, 2369 .get_msglevel = tun_get_msglevel, 2370 .set_msglevel = tun_set_msglevel, 2371 .get_link = ethtool_op_get_link, 2372 .get_ts_info = ethtool_op_get_ts_info, 2373 }; 2374 2375 2376 static int __init tun_init(void) 2377 { 2378 int ret = 0; 2379 2380 pr_info("%s, %s\n", DRV_DESCRIPTION, DRV_VERSION); 2381 pr_info("%s\n", DRV_COPYRIGHT); 2382 2383 ret = rtnl_link_register(&tun_link_ops); 2384 if (ret) { 2385 pr_err("Can't register link_ops\n"); 2386 goto err_linkops; 2387 } 2388 2389 ret = misc_register(&tun_miscdev); 2390 if (ret) { 2391 pr_err("Can't register misc device %d\n", TUN_MINOR); 2392 goto err_misc; 2393 } 2394 return 0; 2395 err_misc: 2396 rtnl_link_unregister(&tun_link_ops); 2397 err_linkops: 2398 return ret; 2399 } 2400 2401 static void tun_cleanup(void) 2402 { 2403 misc_deregister(&tun_miscdev); 2404 rtnl_link_unregister(&tun_link_ops); 2405 } 2406 2407 /* Get an underlying socket object from tun file. Returns error unless file is 2408 * attached to a device. The returned object works like a packet socket, it 2409 * can be used for sock_sendmsg/sock_recvmsg. The caller is responsible for 2410 * holding a reference to the file for as long as the socket is in use. */ 2411 struct socket *tun_get_socket(struct file *file) 2412 { 2413 struct tun_file *tfile; 2414 if (file->f_op != &tun_fops) 2415 return ERR_PTR(-EINVAL); 2416 tfile = file->private_data; 2417 if (!tfile) 2418 return ERR_PTR(-EBADFD); 2419 return &tfile->socket; 2420 } 2421 EXPORT_SYMBOL_GPL(tun_get_socket); 2422 2423 module_init(tun_init); 2424 module_exit(tun_cleanup); 2425 MODULE_DESCRIPTION(DRV_DESCRIPTION); 2426 MODULE_AUTHOR(DRV_COPYRIGHT); 2427 MODULE_LICENSE("GPL"); 2428 MODULE_ALIAS_MISCDEV(TUN_MINOR); 2429 MODULE_ALIAS("devname:net/tun"); 2430