1 #include <linux/etherdevice.h> 2 #include <linux/if_macvlan.h> 3 #include <linux/interrupt.h> 4 #include <linux/nsproxy.h> 5 #include <linux/compat.h> 6 #include <linux/if_tun.h> 7 #include <linux/module.h> 8 #include <linux/skbuff.h> 9 #include <linux/cache.h> 10 #include <linux/sched.h> 11 #include <linux/types.h> 12 #include <linux/slab.h> 13 #include <linux/init.h> 14 #include <linux/wait.h> 15 #include <linux/cdev.h> 16 #include <linux/fs.h> 17 18 #include <net/net_namespace.h> 19 #include <net/rtnetlink.h> 20 #include <net/sock.h> 21 #include <linux/virtio_net.h> 22 23 /* 24 * A macvtap queue is the central object of this driver, it connects 25 * an open character device to a macvlan interface. There can be 26 * multiple queues on one interface, which map back to queues 27 * implemented in hardware on the underlying device. 28 * 29 * macvtap_proto is used to allocate queues through the sock allocation 30 * mechanism. 31 * 32 * TODO: multiqueue support is currently not implemented, even though 33 * macvtap is basically prepared for that. We will need to add this 34 * here as well as in virtio-net and qemu to get line rate on 10gbit 35 * adapters from a guest. 36 */ 37 struct macvtap_queue { 38 struct sock sk; 39 struct socket sock; 40 struct socket_wq wq; 41 int vnet_hdr_sz; 42 struct macvlan_dev *vlan; 43 struct file *file; 44 unsigned int flags; 45 }; 46 47 static struct proto macvtap_proto = { 48 .name = "macvtap", 49 .owner = THIS_MODULE, 50 .obj_size = sizeof (struct macvtap_queue), 51 }; 52 53 /* 54 * Minor number matches netdev->ifindex, so need a potentially 55 * large value. This also makes it possible to split the 56 * tap functionality out again in the future by offering it 57 * from other drivers besides macvtap. As long as every device 58 * only has one tap, the interface numbers assure that the 59 * device nodes are unique. 60 */ 61 static dev_t macvtap_major; 62 #define MACVTAP_NUM_DEVS 65536 63 static struct class *macvtap_class; 64 static struct cdev macvtap_cdev; 65 66 static const struct proto_ops macvtap_socket_ops; 67 68 /* 69 * RCU usage: 70 * The macvtap_queue and the macvlan_dev are loosely coupled, the 71 * pointers from one to the other can only be read while rcu_read_lock 72 * or macvtap_lock is held. 73 * 74 * Both the file and the macvlan_dev hold a reference on the macvtap_queue 75 * through sock_hold(&q->sk). When the macvlan_dev goes away first, 76 * q->vlan becomes inaccessible. When the files gets closed, 77 * macvtap_get_queue() fails. 78 * 79 * There may still be references to the struct sock inside of the 80 * queue from outbound SKBs, but these never reference back to the 81 * file or the dev. The data structure is freed through __sk_free 82 * when both our references and any pending SKBs are gone. 83 */ 84 static DEFINE_SPINLOCK(macvtap_lock); 85 86 /* 87 * get_slot: return a [unused/occupied] slot in vlan->taps[]: 88 * - if 'q' is NULL, return the first empty slot; 89 * - otherwise, return the slot this pointer occupies. 90 */ 91 static int get_slot(struct macvlan_dev *vlan, struct macvtap_queue *q) 92 { 93 int i; 94 95 for (i = 0; i < MAX_MACVTAP_QUEUES; i++) { 96 if (rcu_dereference(vlan->taps[i]) == q) 97 return i; 98 } 99 100 /* Should never happen */ 101 BUG_ON(1); 102 } 103 104 static int macvtap_set_queue(struct net_device *dev, struct file *file, 105 struct macvtap_queue *q) 106 { 107 struct macvlan_dev *vlan = netdev_priv(dev); 108 int index; 109 int err = -EBUSY; 110 111 spin_lock(&macvtap_lock); 112 if (vlan->numvtaps == MAX_MACVTAP_QUEUES) 113 goto out; 114 115 err = 0; 116 index = get_slot(vlan, NULL); 117 rcu_assign_pointer(q->vlan, vlan); 118 rcu_assign_pointer(vlan->taps[index], q); 119 sock_hold(&q->sk); 120 121 q->file = file; 122 file->private_data = q; 123 124 vlan->numvtaps++; 125 126 out: 127 spin_unlock(&macvtap_lock); 128 return err; 129 } 130 131 /* 132 * The file owning the queue got closed, give up both 133 * the reference that the files holds as well as the 134 * one from the macvlan_dev if that still exists. 135 * 136 * Using the spinlock makes sure that we don't get 137 * to the queue again after destroying it. 138 */ 139 static void macvtap_put_queue(struct macvtap_queue *q) 140 { 141 struct macvlan_dev *vlan; 142 143 spin_lock(&macvtap_lock); 144 vlan = rcu_dereference(q->vlan); 145 if (vlan) { 146 int index = get_slot(vlan, q); 147 148 rcu_assign_pointer(vlan->taps[index], NULL); 149 rcu_assign_pointer(q->vlan, NULL); 150 sock_put(&q->sk); 151 --vlan->numvtaps; 152 } 153 154 spin_unlock(&macvtap_lock); 155 156 synchronize_rcu(); 157 sock_put(&q->sk); 158 } 159 160 /* 161 * Select a queue based on the rxq of the device on which this packet 162 * arrived. If the incoming device is not mq, calculate a flow hash 163 * to select a queue. If all fails, find the first available queue. 164 * Cache vlan->numvtaps since it can become zero during the execution 165 * of this function. 166 */ 167 static struct macvtap_queue *macvtap_get_queue(struct net_device *dev, 168 struct sk_buff *skb) 169 { 170 struct macvlan_dev *vlan = netdev_priv(dev); 171 struct macvtap_queue *tap = NULL; 172 int numvtaps = vlan->numvtaps; 173 __u32 rxq; 174 175 if (!numvtaps) 176 goto out; 177 178 if (likely(skb_rx_queue_recorded(skb))) { 179 rxq = skb_get_rx_queue(skb); 180 181 while (unlikely(rxq >= numvtaps)) 182 rxq -= numvtaps; 183 184 tap = rcu_dereference(vlan->taps[rxq]); 185 if (tap) 186 goto out; 187 } 188 189 /* Check if we can use flow to select a queue */ 190 rxq = skb_get_rxhash(skb); 191 if (rxq) { 192 tap = rcu_dereference(vlan->taps[rxq % numvtaps]); 193 if (tap) 194 goto out; 195 } 196 197 /* Everything failed - find first available queue */ 198 for (rxq = 0; rxq < MAX_MACVTAP_QUEUES; rxq++) { 199 tap = rcu_dereference(vlan->taps[rxq]); 200 if (tap) 201 break; 202 } 203 204 out: 205 return tap; 206 } 207 208 /* 209 * The net_device is going away, give up the reference 210 * that it holds on all queues and safely set the pointer 211 * from the queues to NULL. 212 */ 213 static void macvtap_del_queues(struct net_device *dev) 214 { 215 struct macvlan_dev *vlan = netdev_priv(dev); 216 struct macvtap_queue *q, *qlist[MAX_MACVTAP_QUEUES]; 217 int i, j = 0; 218 219 /* macvtap_put_queue can free some slots, so go through all slots */ 220 spin_lock(&macvtap_lock); 221 for (i = 0; i < MAX_MACVTAP_QUEUES && vlan->numvtaps; i++) { 222 q = rcu_dereference(vlan->taps[i]); 223 if (q) { 224 qlist[j++] = q; 225 rcu_assign_pointer(vlan->taps[i], NULL); 226 rcu_assign_pointer(q->vlan, NULL); 227 vlan->numvtaps--; 228 } 229 } 230 BUG_ON(vlan->numvtaps != 0); 231 spin_unlock(&macvtap_lock); 232 233 synchronize_rcu(); 234 235 for (--j; j >= 0; j--) 236 sock_put(&qlist[j]->sk); 237 } 238 239 /* 240 * Forward happens for data that gets sent from one macvlan 241 * endpoint to another one in bridge mode. We just take 242 * the skb and put it into the receive queue. 243 */ 244 static int macvtap_forward(struct net_device *dev, struct sk_buff *skb) 245 { 246 struct macvtap_queue *q = macvtap_get_queue(dev, skb); 247 if (!q) 248 goto drop; 249 250 if (skb_queue_len(&q->sk.sk_receive_queue) >= dev->tx_queue_len) 251 goto drop; 252 253 skb_queue_tail(&q->sk.sk_receive_queue, skb); 254 wake_up_interruptible_poll(sk_sleep(&q->sk), POLLIN | POLLRDNORM | POLLRDBAND); 255 return NET_RX_SUCCESS; 256 257 drop: 258 kfree_skb(skb); 259 return NET_RX_DROP; 260 } 261 262 /* 263 * Receive is for data from the external interface (lowerdev), 264 * in case of macvtap, we can treat that the same way as 265 * forward, which macvlan cannot. 266 */ 267 static int macvtap_receive(struct sk_buff *skb) 268 { 269 skb_push(skb, ETH_HLEN); 270 return macvtap_forward(skb->dev, skb); 271 } 272 273 static int macvtap_newlink(struct net *src_net, 274 struct net_device *dev, 275 struct nlattr *tb[], 276 struct nlattr *data[]) 277 { 278 struct device *classdev; 279 dev_t devt; 280 int err; 281 282 err = macvlan_common_newlink(src_net, dev, tb, data, 283 macvtap_receive, macvtap_forward); 284 if (err) 285 goto out; 286 287 devt = MKDEV(MAJOR(macvtap_major), dev->ifindex); 288 289 classdev = device_create(macvtap_class, &dev->dev, devt, 290 dev, "tap%d", dev->ifindex); 291 if (IS_ERR(classdev)) { 292 err = PTR_ERR(classdev); 293 macvtap_del_queues(dev); 294 } 295 296 out: 297 return err; 298 } 299 300 static void macvtap_dellink(struct net_device *dev, 301 struct list_head *head) 302 { 303 device_destroy(macvtap_class, 304 MKDEV(MAJOR(macvtap_major), dev->ifindex)); 305 306 macvtap_del_queues(dev); 307 macvlan_dellink(dev, head); 308 } 309 310 static void macvtap_setup(struct net_device *dev) 311 { 312 macvlan_common_setup(dev); 313 dev->tx_queue_len = TUN_READQ_SIZE; 314 } 315 316 static struct rtnl_link_ops macvtap_link_ops __read_mostly = { 317 .kind = "macvtap", 318 .setup = macvtap_setup, 319 .newlink = macvtap_newlink, 320 .dellink = macvtap_dellink, 321 }; 322 323 324 static void macvtap_sock_write_space(struct sock *sk) 325 { 326 wait_queue_head_t *wqueue; 327 328 if (!sock_writeable(sk) || 329 !test_and_clear_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags)) 330 return; 331 332 wqueue = sk_sleep(sk); 333 if (wqueue && waitqueue_active(wqueue)) 334 wake_up_interruptible_poll(wqueue, POLLOUT | POLLWRNORM | POLLWRBAND); 335 } 336 337 static int macvtap_open(struct inode *inode, struct file *file) 338 { 339 struct net *net = current->nsproxy->net_ns; 340 struct net_device *dev = dev_get_by_index(net, iminor(inode)); 341 struct macvtap_queue *q; 342 int err; 343 344 err = -ENODEV; 345 if (!dev) 346 goto out; 347 348 /* check if this is a macvtap device */ 349 err = -EINVAL; 350 if (dev->rtnl_link_ops != &macvtap_link_ops) 351 goto out; 352 353 err = -ENOMEM; 354 q = (struct macvtap_queue *)sk_alloc(net, AF_UNSPEC, GFP_KERNEL, 355 &macvtap_proto); 356 if (!q) 357 goto out; 358 359 q->sock.wq = &q->wq; 360 init_waitqueue_head(&q->wq.wait); 361 q->sock.type = SOCK_RAW; 362 q->sock.state = SS_CONNECTED; 363 q->sock.file = file; 364 q->sock.ops = &macvtap_socket_ops; 365 sock_init_data(&q->sock, &q->sk); 366 q->sk.sk_write_space = macvtap_sock_write_space; 367 q->flags = IFF_VNET_HDR | IFF_NO_PI | IFF_TAP; 368 q->vnet_hdr_sz = sizeof(struct virtio_net_hdr); 369 370 err = macvtap_set_queue(dev, file, q); 371 if (err) 372 sock_put(&q->sk); 373 374 out: 375 if (dev) 376 dev_put(dev); 377 378 return err; 379 } 380 381 static int macvtap_release(struct inode *inode, struct file *file) 382 { 383 struct macvtap_queue *q = file->private_data; 384 macvtap_put_queue(q); 385 return 0; 386 } 387 388 static unsigned int macvtap_poll(struct file *file, poll_table * wait) 389 { 390 struct macvtap_queue *q = file->private_data; 391 unsigned int mask = POLLERR; 392 393 if (!q) 394 goto out; 395 396 mask = 0; 397 poll_wait(file, &q->wq.wait, wait); 398 399 if (!skb_queue_empty(&q->sk.sk_receive_queue)) 400 mask |= POLLIN | POLLRDNORM; 401 402 if (sock_writeable(&q->sk) || 403 (!test_and_set_bit(SOCK_ASYNC_NOSPACE, &q->sock.flags) && 404 sock_writeable(&q->sk))) 405 mask |= POLLOUT | POLLWRNORM; 406 407 out: 408 return mask; 409 } 410 411 static inline struct sk_buff *macvtap_alloc_skb(struct sock *sk, size_t prepad, 412 size_t len, size_t linear, 413 int noblock, int *err) 414 { 415 struct sk_buff *skb; 416 417 /* Under a page? Don't bother with paged skb. */ 418 if (prepad + len < PAGE_SIZE || !linear) 419 linear = len; 420 421 skb = sock_alloc_send_pskb(sk, prepad + linear, len - linear, noblock, 422 err); 423 if (!skb) 424 return NULL; 425 426 skb_reserve(skb, prepad); 427 skb_put(skb, linear); 428 skb->data_len = len - linear; 429 skb->len += len - linear; 430 431 return skb; 432 } 433 434 /* 435 * macvtap_skb_from_vnet_hdr and macvtap_skb_to_vnet_hdr should 436 * be shared with the tun/tap driver. 437 */ 438 static int macvtap_skb_from_vnet_hdr(struct sk_buff *skb, 439 struct virtio_net_hdr *vnet_hdr) 440 { 441 unsigned short gso_type = 0; 442 if (vnet_hdr->gso_type != VIRTIO_NET_HDR_GSO_NONE) { 443 switch (vnet_hdr->gso_type & ~VIRTIO_NET_HDR_GSO_ECN) { 444 case VIRTIO_NET_HDR_GSO_TCPV4: 445 gso_type = SKB_GSO_TCPV4; 446 break; 447 case VIRTIO_NET_HDR_GSO_TCPV6: 448 gso_type = SKB_GSO_TCPV6; 449 break; 450 case VIRTIO_NET_HDR_GSO_UDP: 451 gso_type = SKB_GSO_UDP; 452 break; 453 default: 454 return -EINVAL; 455 } 456 457 if (vnet_hdr->gso_type & VIRTIO_NET_HDR_GSO_ECN) 458 gso_type |= SKB_GSO_TCP_ECN; 459 460 if (vnet_hdr->gso_size == 0) 461 return -EINVAL; 462 } 463 464 if (vnet_hdr->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) { 465 if (!skb_partial_csum_set(skb, vnet_hdr->csum_start, 466 vnet_hdr->csum_offset)) 467 return -EINVAL; 468 } 469 470 if (vnet_hdr->gso_type != VIRTIO_NET_HDR_GSO_NONE) { 471 skb_shinfo(skb)->gso_size = vnet_hdr->gso_size; 472 skb_shinfo(skb)->gso_type = gso_type; 473 474 /* Header must be checked, and gso_segs computed. */ 475 skb_shinfo(skb)->gso_type |= SKB_GSO_DODGY; 476 skb_shinfo(skb)->gso_segs = 0; 477 } 478 return 0; 479 } 480 481 static int macvtap_skb_to_vnet_hdr(const struct sk_buff *skb, 482 struct virtio_net_hdr *vnet_hdr) 483 { 484 memset(vnet_hdr, 0, sizeof(*vnet_hdr)); 485 486 if (skb_is_gso(skb)) { 487 struct skb_shared_info *sinfo = skb_shinfo(skb); 488 489 /* This is a hint as to how much should be linear. */ 490 vnet_hdr->hdr_len = skb_headlen(skb); 491 vnet_hdr->gso_size = sinfo->gso_size; 492 if (sinfo->gso_type & SKB_GSO_TCPV4) 493 vnet_hdr->gso_type = VIRTIO_NET_HDR_GSO_TCPV4; 494 else if (sinfo->gso_type & SKB_GSO_TCPV6) 495 vnet_hdr->gso_type = VIRTIO_NET_HDR_GSO_TCPV6; 496 else if (sinfo->gso_type & SKB_GSO_UDP) 497 vnet_hdr->gso_type = VIRTIO_NET_HDR_GSO_UDP; 498 else 499 BUG(); 500 if (sinfo->gso_type & SKB_GSO_TCP_ECN) 501 vnet_hdr->gso_type |= VIRTIO_NET_HDR_GSO_ECN; 502 } else 503 vnet_hdr->gso_type = VIRTIO_NET_HDR_GSO_NONE; 504 505 if (skb->ip_summed == CHECKSUM_PARTIAL) { 506 vnet_hdr->flags = VIRTIO_NET_HDR_F_NEEDS_CSUM; 507 vnet_hdr->csum_start = skb_checksum_start_offset(skb); 508 vnet_hdr->csum_offset = skb->csum_offset; 509 } /* else everything is zero */ 510 511 return 0; 512 } 513 514 515 /* Get packet from user space buffer */ 516 static ssize_t macvtap_get_user(struct macvtap_queue *q, 517 const struct iovec *iv, size_t count, 518 int noblock) 519 { 520 struct sk_buff *skb; 521 struct macvlan_dev *vlan; 522 size_t len = count; 523 int err; 524 struct virtio_net_hdr vnet_hdr = { 0 }; 525 int vnet_hdr_len = 0; 526 527 if (q->flags & IFF_VNET_HDR) { 528 vnet_hdr_len = q->vnet_hdr_sz; 529 530 err = -EINVAL; 531 if ((len -= vnet_hdr_len) < 0) 532 goto err; 533 534 err = memcpy_fromiovecend((void *)&vnet_hdr, iv, 0, 535 sizeof(vnet_hdr)); 536 if (err < 0) 537 goto err; 538 if ((vnet_hdr.flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) && 539 vnet_hdr.csum_start + vnet_hdr.csum_offset + 2 > 540 vnet_hdr.hdr_len) 541 vnet_hdr.hdr_len = vnet_hdr.csum_start + 542 vnet_hdr.csum_offset + 2; 543 err = -EINVAL; 544 if (vnet_hdr.hdr_len > len) 545 goto err; 546 } 547 548 err = -EINVAL; 549 if (unlikely(len < ETH_HLEN)) 550 goto err; 551 552 skb = macvtap_alloc_skb(&q->sk, NET_IP_ALIGN, len, vnet_hdr.hdr_len, 553 noblock, &err); 554 if (!skb) 555 goto err; 556 557 err = skb_copy_datagram_from_iovec(skb, 0, iv, vnet_hdr_len, len); 558 if (err) 559 goto err_kfree; 560 561 skb_set_network_header(skb, ETH_HLEN); 562 skb_reset_mac_header(skb); 563 skb->protocol = eth_hdr(skb)->h_proto; 564 565 if (vnet_hdr_len) { 566 err = macvtap_skb_from_vnet_hdr(skb, &vnet_hdr); 567 if (err) 568 goto err_kfree; 569 } 570 571 rcu_read_lock_bh(); 572 vlan = rcu_dereference(q->vlan); 573 if (vlan) 574 macvlan_start_xmit(skb, vlan->dev); 575 else 576 kfree_skb(skb); 577 rcu_read_unlock_bh(); 578 579 return count; 580 581 err_kfree: 582 kfree_skb(skb); 583 584 err: 585 rcu_read_lock_bh(); 586 vlan = rcu_dereference(q->vlan); 587 if (vlan) 588 vlan->dev->stats.tx_dropped++; 589 rcu_read_unlock_bh(); 590 591 return err; 592 } 593 594 static ssize_t macvtap_aio_write(struct kiocb *iocb, const struct iovec *iv, 595 unsigned long count, loff_t pos) 596 { 597 struct file *file = iocb->ki_filp; 598 ssize_t result = -ENOLINK; 599 struct macvtap_queue *q = file->private_data; 600 601 result = macvtap_get_user(q, iv, iov_length(iv, count), 602 file->f_flags & O_NONBLOCK); 603 return result; 604 } 605 606 /* Put packet to the user space buffer */ 607 static ssize_t macvtap_put_user(struct macvtap_queue *q, 608 const struct sk_buff *skb, 609 const struct iovec *iv, int len) 610 { 611 struct macvlan_dev *vlan; 612 int ret; 613 int vnet_hdr_len = 0; 614 615 if (q->flags & IFF_VNET_HDR) { 616 struct virtio_net_hdr vnet_hdr; 617 vnet_hdr_len = q->vnet_hdr_sz; 618 if ((len -= vnet_hdr_len) < 0) 619 return -EINVAL; 620 621 ret = macvtap_skb_to_vnet_hdr(skb, &vnet_hdr); 622 if (ret) 623 return ret; 624 625 if (memcpy_toiovecend(iv, (void *)&vnet_hdr, 0, sizeof(vnet_hdr))) 626 return -EFAULT; 627 } 628 629 len = min_t(int, skb->len, len); 630 631 ret = skb_copy_datagram_const_iovec(skb, 0, iv, vnet_hdr_len, len); 632 633 rcu_read_lock_bh(); 634 vlan = rcu_dereference(q->vlan); 635 if (vlan) 636 macvlan_count_rx(vlan, len, ret == 0, 0); 637 rcu_read_unlock_bh(); 638 639 return ret ? ret : (len + vnet_hdr_len); 640 } 641 642 static ssize_t macvtap_do_read(struct macvtap_queue *q, struct kiocb *iocb, 643 const struct iovec *iv, unsigned long len, 644 int noblock) 645 { 646 DECLARE_WAITQUEUE(wait, current); 647 struct sk_buff *skb; 648 ssize_t ret = 0; 649 650 add_wait_queue(sk_sleep(&q->sk), &wait); 651 while (len) { 652 current->state = TASK_INTERRUPTIBLE; 653 654 /* Read frames from the queue */ 655 skb = skb_dequeue(&q->sk.sk_receive_queue); 656 if (!skb) { 657 if (noblock) { 658 ret = -EAGAIN; 659 break; 660 } 661 if (signal_pending(current)) { 662 ret = -ERESTARTSYS; 663 break; 664 } 665 /* Nothing to read, let's sleep */ 666 schedule(); 667 continue; 668 } 669 ret = macvtap_put_user(q, skb, iv, len); 670 kfree_skb(skb); 671 break; 672 } 673 674 current->state = TASK_RUNNING; 675 remove_wait_queue(sk_sleep(&q->sk), &wait); 676 return ret; 677 } 678 679 static ssize_t macvtap_aio_read(struct kiocb *iocb, const struct iovec *iv, 680 unsigned long count, loff_t pos) 681 { 682 struct file *file = iocb->ki_filp; 683 struct macvtap_queue *q = file->private_data; 684 ssize_t len, ret = 0; 685 686 len = iov_length(iv, count); 687 if (len < 0) { 688 ret = -EINVAL; 689 goto out; 690 } 691 692 ret = macvtap_do_read(q, iocb, iv, len, file->f_flags & O_NONBLOCK); 693 ret = min_t(ssize_t, ret, len); /* XXX copied from tun.c. Why? */ 694 out: 695 return ret; 696 } 697 698 /* 699 * provide compatibility with generic tun/tap interface 700 */ 701 static long macvtap_ioctl(struct file *file, unsigned int cmd, 702 unsigned long arg) 703 { 704 struct macvtap_queue *q = file->private_data; 705 struct macvlan_dev *vlan; 706 void __user *argp = (void __user *)arg; 707 struct ifreq __user *ifr = argp; 708 unsigned int __user *up = argp; 709 unsigned int u; 710 int __user *sp = argp; 711 int s; 712 int ret; 713 714 switch (cmd) { 715 case TUNSETIFF: 716 /* ignore the name, just look at flags */ 717 if (get_user(u, &ifr->ifr_flags)) 718 return -EFAULT; 719 720 ret = 0; 721 if ((u & ~IFF_VNET_HDR) != (IFF_NO_PI | IFF_TAP)) 722 ret = -EINVAL; 723 else 724 q->flags = u; 725 726 return ret; 727 728 case TUNGETIFF: 729 rcu_read_lock_bh(); 730 vlan = rcu_dereference(q->vlan); 731 if (vlan) 732 dev_hold(vlan->dev); 733 rcu_read_unlock_bh(); 734 735 if (!vlan) 736 return -ENOLINK; 737 738 ret = 0; 739 if (copy_to_user(&ifr->ifr_name, q->vlan->dev->name, IFNAMSIZ) || 740 put_user(q->flags, &ifr->ifr_flags)) 741 ret = -EFAULT; 742 dev_put(vlan->dev); 743 return ret; 744 745 case TUNGETFEATURES: 746 if (put_user(IFF_TAP | IFF_NO_PI | IFF_VNET_HDR, up)) 747 return -EFAULT; 748 return 0; 749 750 case TUNSETSNDBUF: 751 if (get_user(u, up)) 752 return -EFAULT; 753 754 q->sk.sk_sndbuf = u; 755 return 0; 756 757 case TUNGETVNETHDRSZ: 758 s = q->vnet_hdr_sz; 759 if (put_user(s, sp)) 760 return -EFAULT; 761 return 0; 762 763 case TUNSETVNETHDRSZ: 764 if (get_user(s, sp)) 765 return -EFAULT; 766 if (s < (int)sizeof(struct virtio_net_hdr)) 767 return -EINVAL; 768 769 q->vnet_hdr_sz = s; 770 return 0; 771 772 case TUNSETOFFLOAD: 773 /* let the user check for future flags */ 774 if (arg & ~(TUN_F_CSUM | TUN_F_TSO4 | TUN_F_TSO6 | 775 TUN_F_TSO_ECN | TUN_F_UFO)) 776 return -EINVAL; 777 778 /* TODO: only accept frames with the features that 779 got enabled for forwarded frames */ 780 if (!(q->flags & IFF_VNET_HDR)) 781 return -EINVAL; 782 return 0; 783 784 default: 785 return -EINVAL; 786 } 787 } 788 789 #ifdef CONFIG_COMPAT 790 static long macvtap_compat_ioctl(struct file *file, unsigned int cmd, 791 unsigned long arg) 792 { 793 return macvtap_ioctl(file, cmd, (unsigned long)compat_ptr(arg)); 794 } 795 #endif 796 797 static const struct file_operations macvtap_fops = { 798 .owner = THIS_MODULE, 799 .open = macvtap_open, 800 .release = macvtap_release, 801 .aio_read = macvtap_aio_read, 802 .aio_write = macvtap_aio_write, 803 .poll = macvtap_poll, 804 .llseek = no_llseek, 805 .unlocked_ioctl = macvtap_ioctl, 806 #ifdef CONFIG_COMPAT 807 .compat_ioctl = macvtap_compat_ioctl, 808 #endif 809 }; 810 811 static int macvtap_sendmsg(struct kiocb *iocb, struct socket *sock, 812 struct msghdr *m, size_t total_len) 813 { 814 struct macvtap_queue *q = container_of(sock, struct macvtap_queue, sock); 815 return macvtap_get_user(q, m->msg_iov, total_len, 816 m->msg_flags & MSG_DONTWAIT); 817 } 818 819 static int macvtap_recvmsg(struct kiocb *iocb, struct socket *sock, 820 struct msghdr *m, size_t total_len, 821 int flags) 822 { 823 struct macvtap_queue *q = container_of(sock, struct macvtap_queue, sock); 824 int ret; 825 if (flags & ~(MSG_DONTWAIT|MSG_TRUNC)) 826 return -EINVAL; 827 ret = macvtap_do_read(q, iocb, m->msg_iov, total_len, 828 flags & MSG_DONTWAIT); 829 if (ret > total_len) { 830 m->msg_flags |= MSG_TRUNC; 831 ret = flags & MSG_TRUNC ? ret : total_len; 832 } 833 return ret; 834 } 835 836 /* Ops structure to mimic raw sockets with tun */ 837 static const struct proto_ops macvtap_socket_ops = { 838 .sendmsg = macvtap_sendmsg, 839 .recvmsg = macvtap_recvmsg, 840 }; 841 842 /* Get an underlying socket object from tun file. Returns error unless file is 843 * attached to a device. The returned object works like a packet socket, it 844 * can be used for sock_sendmsg/sock_recvmsg. The caller is responsible for 845 * holding a reference to the file for as long as the socket is in use. */ 846 struct socket *macvtap_get_socket(struct file *file) 847 { 848 struct macvtap_queue *q; 849 if (file->f_op != &macvtap_fops) 850 return ERR_PTR(-EINVAL); 851 q = file->private_data; 852 if (!q) 853 return ERR_PTR(-EBADFD); 854 return &q->sock; 855 } 856 EXPORT_SYMBOL_GPL(macvtap_get_socket); 857 858 static int macvtap_init(void) 859 { 860 int err; 861 862 err = alloc_chrdev_region(&macvtap_major, 0, 863 MACVTAP_NUM_DEVS, "macvtap"); 864 if (err) 865 goto out1; 866 867 cdev_init(&macvtap_cdev, &macvtap_fops); 868 err = cdev_add(&macvtap_cdev, macvtap_major, MACVTAP_NUM_DEVS); 869 if (err) 870 goto out2; 871 872 macvtap_class = class_create(THIS_MODULE, "macvtap"); 873 if (IS_ERR(macvtap_class)) { 874 err = PTR_ERR(macvtap_class); 875 goto out3; 876 } 877 878 err = macvlan_link_register(&macvtap_link_ops); 879 if (err) 880 goto out4; 881 882 return 0; 883 884 out4: 885 class_unregister(macvtap_class); 886 out3: 887 cdev_del(&macvtap_cdev); 888 out2: 889 unregister_chrdev_region(macvtap_major, MACVTAP_NUM_DEVS); 890 out1: 891 return err; 892 } 893 module_init(macvtap_init); 894 895 static void macvtap_exit(void) 896 { 897 rtnl_link_unregister(&macvtap_link_ops); 898 class_unregister(macvtap_class); 899 cdev_del(&macvtap_cdev); 900 unregister_chrdev_region(macvtap_major, MACVTAP_NUM_DEVS); 901 } 902 module_exit(macvtap_exit); 903 904 MODULE_ALIAS_RTNL_LINK("macvtap"); 905 MODULE_AUTHOR("Arnd Bergmann <arnd@arndb.de>"); 906 MODULE_LICENSE("GPL"); 907