1 #include <linux/etherdevice.h> 2 #include <linux/if_macvlan.h> 3 #include <linux/interrupt.h> 4 #include <linux/nsproxy.h> 5 #include <linux/compat.h> 6 #include <linux/if_tun.h> 7 #include <linux/module.h> 8 #include <linux/skbuff.h> 9 #include <linux/cache.h> 10 #include <linux/sched.h> 11 #include <linux/types.h> 12 #include <linux/slab.h> 13 #include <linux/init.h> 14 #include <linux/wait.h> 15 #include <linux/cdev.h> 16 #include <linux/fs.h> 17 18 #include <net/net_namespace.h> 19 #include <net/rtnetlink.h> 20 #include <net/sock.h> 21 #include <linux/virtio_net.h> 22 23 /* 24 * A macvtap queue is the central object of this driver, it connects 25 * an open character device to a macvlan interface. There can be 26 * multiple queues on one interface, which map back to queues 27 * implemented in hardware on the underlying device. 28 * 29 * macvtap_proto is used to allocate queues through the sock allocation 30 * mechanism. 31 * 32 * TODO: multiqueue support is currently not implemented, even though 33 * macvtap is basically prepared for that. We will need to add this 34 * here as well as in virtio-net and qemu to get line rate on 10gbit 35 * adapters from a guest. 36 */ 37 struct macvtap_queue { 38 struct sock sk; 39 struct socket sock; 40 struct socket_wq wq; 41 int vnet_hdr_sz; 42 struct macvlan_dev __rcu *vlan; 43 struct file *file; 44 unsigned int flags; 45 }; 46 47 static struct proto macvtap_proto = { 48 .name = "macvtap", 49 .owner = THIS_MODULE, 50 .obj_size = sizeof (struct macvtap_queue), 51 }; 52 53 /* 54 * Minor number matches netdev->ifindex, so need a potentially 55 * large value. This also makes it possible to split the 56 * tap functionality out again in the future by offering it 57 * from other drivers besides macvtap. As long as every device 58 * only has one tap, the interface numbers assure that the 59 * device nodes are unique. 60 */ 61 static dev_t macvtap_major; 62 #define MACVTAP_NUM_DEVS 65536 63 static struct class *macvtap_class; 64 static struct cdev macvtap_cdev; 65 66 static const struct proto_ops macvtap_socket_ops; 67 68 /* 69 * RCU usage: 70 * The macvtap_queue and the macvlan_dev are loosely coupled, the 71 * pointers from one to the other can only be read while rcu_read_lock 72 * or macvtap_lock is held. 73 * 74 * Both the file and the macvlan_dev hold a reference on the macvtap_queue 75 * through sock_hold(&q->sk). When the macvlan_dev goes away first, 76 * q->vlan becomes inaccessible. When the files gets closed, 77 * macvtap_get_queue() fails. 78 * 79 * There may still be references to the struct sock inside of the 80 * queue from outbound SKBs, but these never reference back to the 81 * file or the dev. The data structure is freed through __sk_free 82 * when both our references and any pending SKBs are gone. 83 */ 84 static DEFINE_SPINLOCK(macvtap_lock); 85 86 /* 87 * get_slot: return a [unused/occupied] slot in vlan->taps[]: 88 * - if 'q' is NULL, return the first empty slot; 89 * - otherwise, return the slot this pointer occupies. 90 */ 91 static int get_slot(struct macvlan_dev *vlan, struct macvtap_queue *q) 92 { 93 int i; 94 95 for (i = 0; i < MAX_MACVTAP_QUEUES; i++) { 96 if (rcu_dereference(vlan->taps[i]) == q) 97 return i; 98 } 99 100 /* Should never happen */ 101 BUG_ON(1); 102 } 103 104 static int macvtap_set_queue(struct net_device *dev, struct file *file, 105 struct macvtap_queue *q) 106 { 107 struct macvlan_dev *vlan = netdev_priv(dev); 108 int index; 109 int err = -EBUSY; 110 111 spin_lock(&macvtap_lock); 112 if (vlan->numvtaps == MAX_MACVTAP_QUEUES) 113 goto out; 114 115 err = 0; 116 index = get_slot(vlan, NULL); 117 rcu_assign_pointer(q->vlan, vlan); 118 rcu_assign_pointer(vlan->taps[index], q); 119 sock_hold(&q->sk); 120 121 q->file = file; 122 file->private_data = q; 123 124 vlan->numvtaps++; 125 126 out: 127 spin_unlock(&macvtap_lock); 128 return err; 129 } 130 131 /* 132 * The file owning the queue got closed, give up both 133 * the reference that the files holds as well as the 134 * one from the macvlan_dev if that still exists. 135 * 136 * Using the spinlock makes sure that we don't get 137 * to the queue again after destroying it. 138 */ 139 static void macvtap_put_queue(struct macvtap_queue *q) 140 { 141 struct macvlan_dev *vlan; 142 143 spin_lock(&macvtap_lock); 144 vlan = rcu_dereference_protected(q->vlan, 145 lockdep_is_held(&macvtap_lock)); 146 if (vlan) { 147 int index = get_slot(vlan, q); 148 149 rcu_assign_pointer(vlan->taps[index], NULL); 150 rcu_assign_pointer(q->vlan, NULL); 151 sock_put(&q->sk); 152 --vlan->numvtaps; 153 } 154 155 spin_unlock(&macvtap_lock); 156 157 synchronize_rcu(); 158 sock_put(&q->sk); 159 } 160 161 /* 162 * Select a queue based on the rxq of the device on which this packet 163 * arrived. If the incoming device is not mq, calculate a flow hash 164 * to select a queue. If all fails, find the first available queue. 165 * Cache vlan->numvtaps since it can become zero during the execution 166 * of this function. 167 */ 168 static struct macvtap_queue *macvtap_get_queue(struct net_device *dev, 169 struct sk_buff *skb) 170 { 171 struct macvlan_dev *vlan = netdev_priv(dev); 172 struct macvtap_queue *tap = NULL; 173 int numvtaps = vlan->numvtaps; 174 __u32 rxq; 175 176 if (!numvtaps) 177 goto out; 178 179 if (likely(skb_rx_queue_recorded(skb))) { 180 rxq = skb_get_rx_queue(skb); 181 182 while (unlikely(rxq >= numvtaps)) 183 rxq -= numvtaps; 184 185 tap = rcu_dereference(vlan->taps[rxq]); 186 if (tap) 187 goto out; 188 } 189 190 /* Check if we can use flow to select a queue */ 191 rxq = skb_get_rxhash(skb); 192 if (rxq) { 193 tap = rcu_dereference(vlan->taps[rxq % numvtaps]); 194 if (tap) 195 goto out; 196 } 197 198 /* Everything failed - find first available queue */ 199 for (rxq = 0; rxq < MAX_MACVTAP_QUEUES; rxq++) { 200 tap = rcu_dereference(vlan->taps[rxq]); 201 if (tap) 202 break; 203 } 204 205 out: 206 return tap; 207 } 208 209 /* 210 * The net_device is going away, give up the reference 211 * that it holds on all queues and safely set the pointer 212 * from the queues to NULL. 213 */ 214 static void macvtap_del_queues(struct net_device *dev) 215 { 216 struct macvlan_dev *vlan = netdev_priv(dev); 217 struct macvtap_queue *q, *qlist[MAX_MACVTAP_QUEUES]; 218 int i, j = 0; 219 220 /* macvtap_put_queue can free some slots, so go through all slots */ 221 spin_lock(&macvtap_lock); 222 for (i = 0; i < MAX_MACVTAP_QUEUES && vlan->numvtaps; i++) { 223 q = rcu_dereference_protected(vlan->taps[i], 224 lockdep_is_held(&macvtap_lock)); 225 if (q) { 226 qlist[j++] = q; 227 rcu_assign_pointer(vlan->taps[i], NULL); 228 rcu_assign_pointer(q->vlan, NULL); 229 vlan->numvtaps--; 230 } 231 } 232 BUG_ON(vlan->numvtaps != 0); 233 spin_unlock(&macvtap_lock); 234 235 synchronize_rcu(); 236 237 for (--j; j >= 0; j--) 238 sock_put(&qlist[j]->sk); 239 } 240 241 /* 242 * Forward happens for data that gets sent from one macvlan 243 * endpoint to another one in bridge mode. We just take 244 * the skb and put it into the receive queue. 245 */ 246 static int macvtap_forward(struct net_device *dev, struct sk_buff *skb) 247 { 248 struct macvtap_queue *q = macvtap_get_queue(dev, skb); 249 if (!q) 250 goto drop; 251 252 if (skb_queue_len(&q->sk.sk_receive_queue) >= dev->tx_queue_len) 253 goto drop; 254 255 skb_queue_tail(&q->sk.sk_receive_queue, skb); 256 wake_up_interruptible_poll(sk_sleep(&q->sk), POLLIN | POLLRDNORM | POLLRDBAND); 257 return NET_RX_SUCCESS; 258 259 drop: 260 kfree_skb(skb); 261 return NET_RX_DROP; 262 } 263 264 /* 265 * Receive is for data from the external interface (lowerdev), 266 * in case of macvtap, we can treat that the same way as 267 * forward, which macvlan cannot. 268 */ 269 static int macvtap_receive(struct sk_buff *skb) 270 { 271 skb_push(skb, ETH_HLEN); 272 return macvtap_forward(skb->dev, skb); 273 } 274 275 static int macvtap_newlink(struct net *src_net, 276 struct net_device *dev, 277 struct nlattr *tb[], 278 struct nlattr *data[]) 279 { 280 struct device *classdev; 281 dev_t devt; 282 int err; 283 284 err = macvlan_common_newlink(src_net, dev, tb, data, 285 macvtap_receive, macvtap_forward); 286 if (err) 287 goto out; 288 289 devt = MKDEV(MAJOR(macvtap_major), dev->ifindex); 290 291 classdev = device_create(macvtap_class, &dev->dev, devt, 292 dev, "tap%d", dev->ifindex); 293 if (IS_ERR(classdev)) { 294 err = PTR_ERR(classdev); 295 macvtap_del_queues(dev); 296 } 297 298 out: 299 return err; 300 } 301 302 static void macvtap_dellink(struct net_device *dev, 303 struct list_head *head) 304 { 305 device_destroy(macvtap_class, 306 MKDEV(MAJOR(macvtap_major), dev->ifindex)); 307 308 macvtap_del_queues(dev); 309 macvlan_dellink(dev, head); 310 } 311 312 static void macvtap_setup(struct net_device *dev) 313 { 314 macvlan_common_setup(dev); 315 dev->tx_queue_len = TUN_READQ_SIZE; 316 } 317 318 static struct rtnl_link_ops macvtap_link_ops __read_mostly = { 319 .kind = "macvtap", 320 .setup = macvtap_setup, 321 .newlink = macvtap_newlink, 322 .dellink = macvtap_dellink, 323 }; 324 325 326 static void macvtap_sock_write_space(struct sock *sk) 327 { 328 wait_queue_head_t *wqueue; 329 330 if (!sock_writeable(sk) || 331 !test_and_clear_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags)) 332 return; 333 334 wqueue = sk_sleep(sk); 335 if (wqueue && waitqueue_active(wqueue)) 336 wake_up_interruptible_poll(wqueue, POLLOUT | POLLWRNORM | POLLWRBAND); 337 } 338 339 static int macvtap_open(struct inode *inode, struct file *file) 340 { 341 struct net *net = current->nsproxy->net_ns; 342 struct net_device *dev = dev_get_by_index(net, iminor(inode)); 343 struct macvtap_queue *q; 344 int err; 345 346 err = -ENODEV; 347 if (!dev) 348 goto out; 349 350 /* check if this is a macvtap device */ 351 err = -EINVAL; 352 if (dev->rtnl_link_ops != &macvtap_link_ops) 353 goto out; 354 355 err = -ENOMEM; 356 q = (struct macvtap_queue *)sk_alloc(net, AF_UNSPEC, GFP_KERNEL, 357 &macvtap_proto); 358 if (!q) 359 goto out; 360 361 q->sock.wq = &q->wq; 362 init_waitqueue_head(&q->wq.wait); 363 q->sock.type = SOCK_RAW; 364 q->sock.state = SS_CONNECTED; 365 q->sock.file = file; 366 q->sock.ops = &macvtap_socket_ops; 367 sock_init_data(&q->sock, &q->sk); 368 q->sk.sk_write_space = macvtap_sock_write_space; 369 q->flags = IFF_VNET_HDR | IFF_NO_PI | IFF_TAP; 370 q->vnet_hdr_sz = sizeof(struct virtio_net_hdr); 371 372 err = macvtap_set_queue(dev, file, q); 373 if (err) 374 sock_put(&q->sk); 375 376 out: 377 if (dev) 378 dev_put(dev); 379 380 return err; 381 } 382 383 static int macvtap_release(struct inode *inode, struct file *file) 384 { 385 struct macvtap_queue *q = file->private_data; 386 macvtap_put_queue(q); 387 return 0; 388 } 389 390 static unsigned int macvtap_poll(struct file *file, poll_table * wait) 391 { 392 struct macvtap_queue *q = file->private_data; 393 unsigned int mask = POLLERR; 394 395 if (!q) 396 goto out; 397 398 mask = 0; 399 poll_wait(file, &q->wq.wait, wait); 400 401 if (!skb_queue_empty(&q->sk.sk_receive_queue)) 402 mask |= POLLIN | POLLRDNORM; 403 404 if (sock_writeable(&q->sk) || 405 (!test_and_set_bit(SOCK_ASYNC_NOSPACE, &q->sock.flags) && 406 sock_writeable(&q->sk))) 407 mask |= POLLOUT | POLLWRNORM; 408 409 out: 410 return mask; 411 } 412 413 static inline struct sk_buff *macvtap_alloc_skb(struct sock *sk, size_t prepad, 414 size_t len, size_t linear, 415 int noblock, int *err) 416 { 417 struct sk_buff *skb; 418 419 /* Under a page? Don't bother with paged skb. */ 420 if (prepad + len < PAGE_SIZE || !linear) 421 linear = len; 422 423 skb = sock_alloc_send_pskb(sk, prepad + linear, len - linear, noblock, 424 err); 425 if (!skb) 426 return NULL; 427 428 skb_reserve(skb, prepad); 429 skb_put(skb, linear); 430 skb->data_len = len - linear; 431 skb->len += len - linear; 432 433 return skb; 434 } 435 436 /* 437 * macvtap_skb_from_vnet_hdr and macvtap_skb_to_vnet_hdr should 438 * be shared with the tun/tap driver. 439 */ 440 static int macvtap_skb_from_vnet_hdr(struct sk_buff *skb, 441 struct virtio_net_hdr *vnet_hdr) 442 { 443 unsigned short gso_type = 0; 444 if (vnet_hdr->gso_type != VIRTIO_NET_HDR_GSO_NONE) { 445 switch (vnet_hdr->gso_type & ~VIRTIO_NET_HDR_GSO_ECN) { 446 case VIRTIO_NET_HDR_GSO_TCPV4: 447 gso_type = SKB_GSO_TCPV4; 448 break; 449 case VIRTIO_NET_HDR_GSO_TCPV6: 450 gso_type = SKB_GSO_TCPV6; 451 break; 452 case VIRTIO_NET_HDR_GSO_UDP: 453 gso_type = SKB_GSO_UDP; 454 break; 455 default: 456 return -EINVAL; 457 } 458 459 if (vnet_hdr->gso_type & VIRTIO_NET_HDR_GSO_ECN) 460 gso_type |= SKB_GSO_TCP_ECN; 461 462 if (vnet_hdr->gso_size == 0) 463 return -EINVAL; 464 } 465 466 if (vnet_hdr->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) { 467 if (!skb_partial_csum_set(skb, vnet_hdr->csum_start, 468 vnet_hdr->csum_offset)) 469 return -EINVAL; 470 } 471 472 if (vnet_hdr->gso_type != VIRTIO_NET_HDR_GSO_NONE) { 473 skb_shinfo(skb)->gso_size = vnet_hdr->gso_size; 474 skb_shinfo(skb)->gso_type = gso_type; 475 476 /* Header must be checked, and gso_segs computed. */ 477 skb_shinfo(skb)->gso_type |= SKB_GSO_DODGY; 478 skb_shinfo(skb)->gso_segs = 0; 479 } 480 return 0; 481 } 482 483 static int macvtap_skb_to_vnet_hdr(const struct sk_buff *skb, 484 struct virtio_net_hdr *vnet_hdr) 485 { 486 memset(vnet_hdr, 0, sizeof(*vnet_hdr)); 487 488 if (skb_is_gso(skb)) { 489 struct skb_shared_info *sinfo = skb_shinfo(skb); 490 491 /* This is a hint as to how much should be linear. */ 492 vnet_hdr->hdr_len = skb_headlen(skb); 493 vnet_hdr->gso_size = sinfo->gso_size; 494 if (sinfo->gso_type & SKB_GSO_TCPV4) 495 vnet_hdr->gso_type = VIRTIO_NET_HDR_GSO_TCPV4; 496 else if (sinfo->gso_type & SKB_GSO_TCPV6) 497 vnet_hdr->gso_type = VIRTIO_NET_HDR_GSO_TCPV6; 498 else if (sinfo->gso_type & SKB_GSO_UDP) 499 vnet_hdr->gso_type = VIRTIO_NET_HDR_GSO_UDP; 500 else 501 BUG(); 502 if (sinfo->gso_type & SKB_GSO_TCP_ECN) 503 vnet_hdr->gso_type |= VIRTIO_NET_HDR_GSO_ECN; 504 } else 505 vnet_hdr->gso_type = VIRTIO_NET_HDR_GSO_NONE; 506 507 if (skb->ip_summed == CHECKSUM_PARTIAL) { 508 vnet_hdr->flags = VIRTIO_NET_HDR_F_NEEDS_CSUM; 509 vnet_hdr->csum_start = skb_checksum_start_offset(skb); 510 vnet_hdr->csum_offset = skb->csum_offset; 511 } /* else everything is zero */ 512 513 return 0; 514 } 515 516 517 /* Get packet from user space buffer */ 518 static ssize_t macvtap_get_user(struct macvtap_queue *q, 519 const struct iovec *iv, size_t count, 520 int noblock) 521 { 522 struct sk_buff *skb; 523 struct macvlan_dev *vlan; 524 size_t len = count; 525 int err; 526 struct virtio_net_hdr vnet_hdr = { 0 }; 527 int vnet_hdr_len = 0; 528 529 if (q->flags & IFF_VNET_HDR) { 530 vnet_hdr_len = q->vnet_hdr_sz; 531 532 err = -EINVAL; 533 if (len < vnet_hdr_len) 534 goto err; 535 len -= vnet_hdr_len; 536 537 err = memcpy_fromiovecend((void *)&vnet_hdr, iv, 0, 538 sizeof(vnet_hdr)); 539 if (err < 0) 540 goto err; 541 if ((vnet_hdr.flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) && 542 vnet_hdr.csum_start + vnet_hdr.csum_offset + 2 > 543 vnet_hdr.hdr_len) 544 vnet_hdr.hdr_len = vnet_hdr.csum_start + 545 vnet_hdr.csum_offset + 2; 546 err = -EINVAL; 547 if (vnet_hdr.hdr_len > len) 548 goto err; 549 } 550 551 err = -EINVAL; 552 if (unlikely(len < ETH_HLEN)) 553 goto err; 554 555 skb = macvtap_alloc_skb(&q->sk, NET_IP_ALIGN, len, vnet_hdr.hdr_len, 556 noblock, &err); 557 if (!skb) 558 goto err; 559 560 err = skb_copy_datagram_from_iovec(skb, 0, iv, vnet_hdr_len, len); 561 if (err) 562 goto err_kfree; 563 564 skb_set_network_header(skb, ETH_HLEN); 565 skb_reset_mac_header(skb); 566 skb->protocol = eth_hdr(skb)->h_proto; 567 568 if (vnet_hdr_len) { 569 err = macvtap_skb_from_vnet_hdr(skb, &vnet_hdr); 570 if (err) 571 goto err_kfree; 572 } 573 574 rcu_read_lock_bh(); 575 vlan = rcu_dereference_bh(q->vlan); 576 if (vlan) 577 macvlan_start_xmit(skb, vlan->dev); 578 else 579 kfree_skb(skb); 580 rcu_read_unlock_bh(); 581 582 return count; 583 584 err_kfree: 585 kfree_skb(skb); 586 587 err: 588 rcu_read_lock_bh(); 589 vlan = rcu_dereference_bh(q->vlan); 590 if (vlan) 591 vlan->dev->stats.tx_dropped++; 592 rcu_read_unlock_bh(); 593 594 return err; 595 } 596 597 static ssize_t macvtap_aio_write(struct kiocb *iocb, const struct iovec *iv, 598 unsigned long count, loff_t pos) 599 { 600 struct file *file = iocb->ki_filp; 601 ssize_t result = -ENOLINK; 602 struct macvtap_queue *q = file->private_data; 603 604 result = macvtap_get_user(q, iv, iov_length(iv, count), 605 file->f_flags & O_NONBLOCK); 606 return result; 607 } 608 609 /* Put packet to the user space buffer */ 610 static ssize_t macvtap_put_user(struct macvtap_queue *q, 611 const struct sk_buff *skb, 612 const struct iovec *iv, int len) 613 { 614 struct macvlan_dev *vlan; 615 int ret; 616 int vnet_hdr_len = 0; 617 618 if (q->flags & IFF_VNET_HDR) { 619 struct virtio_net_hdr vnet_hdr; 620 vnet_hdr_len = q->vnet_hdr_sz; 621 if ((len -= vnet_hdr_len) < 0) 622 return -EINVAL; 623 624 ret = macvtap_skb_to_vnet_hdr(skb, &vnet_hdr); 625 if (ret) 626 return ret; 627 628 if (memcpy_toiovecend(iv, (void *)&vnet_hdr, 0, sizeof(vnet_hdr))) 629 return -EFAULT; 630 } 631 632 len = min_t(int, skb->len, len); 633 634 ret = skb_copy_datagram_const_iovec(skb, 0, iv, vnet_hdr_len, len); 635 636 rcu_read_lock_bh(); 637 vlan = rcu_dereference_bh(q->vlan); 638 if (vlan) 639 macvlan_count_rx(vlan, len, ret == 0, 0); 640 rcu_read_unlock_bh(); 641 642 return ret ? ret : (len + vnet_hdr_len); 643 } 644 645 static ssize_t macvtap_do_read(struct macvtap_queue *q, struct kiocb *iocb, 646 const struct iovec *iv, unsigned long len, 647 int noblock) 648 { 649 DECLARE_WAITQUEUE(wait, current); 650 struct sk_buff *skb; 651 ssize_t ret = 0; 652 653 add_wait_queue(sk_sleep(&q->sk), &wait); 654 while (len) { 655 current->state = TASK_INTERRUPTIBLE; 656 657 /* Read frames from the queue */ 658 skb = skb_dequeue(&q->sk.sk_receive_queue); 659 if (!skb) { 660 if (noblock) { 661 ret = -EAGAIN; 662 break; 663 } 664 if (signal_pending(current)) { 665 ret = -ERESTARTSYS; 666 break; 667 } 668 /* Nothing to read, let's sleep */ 669 schedule(); 670 continue; 671 } 672 ret = macvtap_put_user(q, skb, iv, len); 673 kfree_skb(skb); 674 break; 675 } 676 677 current->state = TASK_RUNNING; 678 remove_wait_queue(sk_sleep(&q->sk), &wait); 679 return ret; 680 } 681 682 static ssize_t macvtap_aio_read(struct kiocb *iocb, const struct iovec *iv, 683 unsigned long count, loff_t pos) 684 { 685 struct file *file = iocb->ki_filp; 686 struct macvtap_queue *q = file->private_data; 687 ssize_t len, ret = 0; 688 689 len = iov_length(iv, count); 690 if (len < 0) { 691 ret = -EINVAL; 692 goto out; 693 } 694 695 ret = macvtap_do_read(q, iocb, iv, len, file->f_flags & O_NONBLOCK); 696 ret = min_t(ssize_t, ret, len); /* XXX copied from tun.c. Why? */ 697 out: 698 return ret; 699 } 700 701 /* 702 * provide compatibility with generic tun/tap interface 703 */ 704 static long macvtap_ioctl(struct file *file, unsigned int cmd, 705 unsigned long arg) 706 { 707 struct macvtap_queue *q = file->private_data; 708 struct macvlan_dev *vlan; 709 void __user *argp = (void __user *)arg; 710 struct ifreq __user *ifr = argp; 711 unsigned int __user *up = argp; 712 unsigned int u; 713 int __user *sp = argp; 714 int s; 715 int ret; 716 717 switch (cmd) { 718 case TUNSETIFF: 719 /* ignore the name, just look at flags */ 720 if (get_user(u, &ifr->ifr_flags)) 721 return -EFAULT; 722 723 ret = 0; 724 if ((u & ~IFF_VNET_HDR) != (IFF_NO_PI | IFF_TAP)) 725 ret = -EINVAL; 726 else 727 q->flags = u; 728 729 return ret; 730 731 case TUNGETIFF: 732 rcu_read_lock_bh(); 733 vlan = rcu_dereference_bh(q->vlan); 734 if (vlan) 735 dev_hold(vlan->dev); 736 rcu_read_unlock_bh(); 737 738 if (!vlan) 739 return -ENOLINK; 740 741 ret = 0; 742 if (copy_to_user(&ifr->ifr_name, vlan->dev->name, IFNAMSIZ) || 743 put_user(q->flags, &ifr->ifr_flags)) 744 ret = -EFAULT; 745 dev_put(vlan->dev); 746 return ret; 747 748 case TUNGETFEATURES: 749 if (put_user(IFF_TAP | IFF_NO_PI | IFF_VNET_HDR, up)) 750 return -EFAULT; 751 return 0; 752 753 case TUNSETSNDBUF: 754 if (get_user(u, up)) 755 return -EFAULT; 756 757 q->sk.sk_sndbuf = u; 758 return 0; 759 760 case TUNGETVNETHDRSZ: 761 s = q->vnet_hdr_sz; 762 if (put_user(s, sp)) 763 return -EFAULT; 764 return 0; 765 766 case TUNSETVNETHDRSZ: 767 if (get_user(s, sp)) 768 return -EFAULT; 769 if (s < (int)sizeof(struct virtio_net_hdr)) 770 return -EINVAL; 771 772 q->vnet_hdr_sz = s; 773 return 0; 774 775 case TUNSETOFFLOAD: 776 /* let the user check for future flags */ 777 if (arg & ~(TUN_F_CSUM | TUN_F_TSO4 | TUN_F_TSO6 | 778 TUN_F_TSO_ECN | TUN_F_UFO)) 779 return -EINVAL; 780 781 /* TODO: only accept frames with the features that 782 got enabled for forwarded frames */ 783 if (!(q->flags & IFF_VNET_HDR)) 784 return -EINVAL; 785 return 0; 786 787 default: 788 return -EINVAL; 789 } 790 } 791 792 #ifdef CONFIG_COMPAT 793 static long macvtap_compat_ioctl(struct file *file, unsigned int cmd, 794 unsigned long arg) 795 { 796 return macvtap_ioctl(file, cmd, (unsigned long)compat_ptr(arg)); 797 } 798 #endif 799 800 static const struct file_operations macvtap_fops = { 801 .owner = THIS_MODULE, 802 .open = macvtap_open, 803 .release = macvtap_release, 804 .aio_read = macvtap_aio_read, 805 .aio_write = macvtap_aio_write, 806 .poll = macvtap_poll, 807 .llseek = no_llseek, 808 .unlocked_ioctl = macvtap_ioctl, 809 #ifdef CONFIG_COMPAT 810 .compat_ioctl = macvtap_compat_ioctl, 811 #endif 812 }; 813 814 static int macvtap_sendmsg(struct kiocb *iocb, struct socket *sock, 815 struct msghdr *m, size_t total_len) 816 { 817 struct macvtap_queue *q = container_of(sock, struct macvtap_queue, sock); 818 return macvtap_get_user(q, m->msg_iov, total_len, 819 m->msg_flags & MSG_DONTWAIT); 820 } 821 822 static int macvtap_recvmsg(struct kiocb *iocb, struct socket *sock, 823 struct msghdr *m, size_t total_len, 824 int flags) 825 { 826 struct macvtap_queue *q = container_of(sock, struct macvtap_queue, sock); 827 int ret; 828 if (flags & ~(MSG_DONTWAIT|MSG_TRUNC)) 829 return -EINVAL; 830 ret = macvtap_do_read(q, iocb, m->msg_iov, total_len, 831 flags & MSG_DONTWAIT); 832 if (ret > total_len) { 833 m->msg_flags |= MSG_TRUNC; 834 ret = flags & MSG_TRUNC ? ret : total_len; 835 } 836 return ret; 837 } 838 839 /* Ops structure to mimic raw sockets with tun */ 840 static const struct proto_ops macvtap_socket_ops = { 841 .sendmsg = macvtap_sendmsg, 842 .recvmsg = macvtap_recvmsg, 843 }; 844 845 /* Get an underlying socket object from tun file. Returns error unless file is 846 * attached to a device. The returned object works like a packet socket, it 847 * can be used for sock_sendmsg/sock_recvmsg. The caller is responsible for 848 * holding a reference to the file for as long as the socket is in use. */ 849 struct socket *macvtap_get_socket(struct file *file) 850 { 851 struct macvtap_queue *q; 852 if (file->f_op != &macvtap_fops) 853 return ERR_PTR(-EINVAL); 854 q = file->private_data; 855 if (!q) 856 return ERR_PTR(-EBADFD); 857 return &q->sock; 858 } 859 EXPORT_SYMBOL_GPL(macvtap_get_socket); 860 861 static int macvtap_init(void) 862 { 863 int err; 864 865 err = alloc_chrdev_region(&macvtap_major, 0, 866 MACVTAP_NUM_DEVS, "macvtap"); 867 if (err) 868 goto out1; 869 870 cdev_init(&macvtap_cdev, &macvtap_fops); 871 err = cdev_add(&macvtap_cdev, macvtap_major, MACVTAP_NUM_DEVS); 872 if (err) 873 goto out2; 874 875 macvtap_class = class_create(THIS_MODULE, "macvtap"); 876 if (IS_ERR(macvtap_class)) { 877 err = PTR_ERR(macvtap_class); 878 goto out3; 879 } 880 881 err = macvlan_link_register(&macvtap_link_ops); 882 if (err) 883 goto out4; 884 885 return 0; 886 887 out4: 888 class_unregister(macvtap_class); 889 out3: 890 cdev_del(&macvtap_cdev); 891 out2: 892 unregister_chrdev_region(macvtap_major, MACVTAP_NUM_DEVS); 893 out1: 894 return err; 895 } 896 module_init(macvtap_init); 897 898 static void macvtap_exit(void) 899 { 900 rtnl_link_unregister(&macvtap_link_ops); 901 class_unregister(macvtap_class); 902 cdev_del(&macvtap_cdev); 903 unregister_chrdev_region(macvtap_major, MACVTAP_NUM_DEVS); 904 } 905 module_exit(macvtap_exit); 906 907 MODULE_ALIAS_RTNL_LINK("macvtap"); 908 MODULE_AUTHOR("Arnd Bergmann <arnd@arndb.de>"); 909 MODULE_LICENSE("GPL"); 910