1 #include <linux/etherdevice.h> 2 #include <linux/if_macvlan.h> 3 #include <linux/interrupt.h> 4 #include <linux/nsproxy.h> 5 #include <linux/compat.h> 6 #include <linux/if_tun.h> 7 #include <linux/module.h> 8 #include <linux/skbuff.h> 9 #include <linux/cache.h> 10 #include <linux/sched.h> 11 #include <linux/types.h> 12 #include <linux/slab.h> 13 #include <linux/init.h> 14 #include <linux/wait.h> 15 #include <linux/cdev.h> 16 #include <linux/fs.h> 17 18 #include <net/net_namespace.h> 19 #include <net/rtnetlink.h> 20 #include <net/sock.h> 21 #include <linux/virtio_net.h> 22 23 /* 24 * A macvtap queue is the central object of this driver, it connects 25 * an open character device to a macvlan interface. There can be 26 * multiple queues on one interface, which map back to queues 27 * implemented in hardware on the underlying device. 28 * 29 * macvtap_proto is used to allocate queues through the sock allocation 30 * mechanism. 31 * 32 * TODO: multiqueue support is currently not implemented, even though 33 * macvtap is basically prepared for that. We will need to add this 34 * here as well as in virtio-net and qemu to get line rate on 10gbit 35 * adapters from a guest. 36 */ 37 struct macvtap_queue { 38 struct sock sk; 39 struct socket sock; 40 struct socket_wq wq; 41 int vnet_hdr_sz; 42 struct macvlan_dev *vlan; 43 struct file *file; 44 unsigned int flags; 45 }; 46 47 static struct proto macvtap_proto = { 48 .name = "macvtap", 49 .owner = THIS_MODULE, 50 .obj_size = sizeof (struct macvtap_queue), 51 }; 52 53 /* 54 * Minor number matches netdev->ifindex, so need a potentially 55 * large value. This also makes it possible to split the 56 * tap functionality out again in the future by offering it 57 * from other drivers besides macvtap. As long as every device 58 * only has one tap, the interface numbers assure that the 59 * device nodes are unique. 60 */ 61 static dev_t macvtap_major; 62 #define MACVTAP_NUM_DEVS 65536 63 static struct class *macvtap_class; 64 static struct cdev macvtap_cdev; 65 66 static const struct proto_ops macvtap_socket_ops; 67 68 /* 69 * RCU usage: 70 * The macvtap_queue and the macvlan_dev are loosely coupled, the 71 * pointers from one to the other can only be read while rcu_read_lock 72 * or macvtap_lock is held. 73 * 74 * Both the file and the macvlan_dev hold a reference on the macvtap_queue 75 * through sock_hold(&q->sk). When the macvlan_dev goes away first, 76 * q->vlan becomes inaccessible. When the files gets closed, 77 * macvtap_get_queue() fails. 78 * 79 * There may still be references to the struct sock inside of the 80 * queue from outbound SKBs, but these never reference back to the 81 * file or the dev. The data structure is freed through __sk_free 82 * when both our references and any pending SKBs are gone. 83 */ 84 static DEFINE_SPINLOCK(macvtap_lock); 85 86 /* 87 * get_slot: return a [unused/occupied] slot in vlan->taps[]: 88 * - if 'q' is NULL, return the first empty slot; 89 * - otherwise, return the slot this pointer occupies. 90 */ 91 static int get_slot(struct macvlan_dev *vlan, struct macvtap_queue *q) 92 { 93 int i; 94 95 for (i = 0; i < MAX_MACVTAP_QUEUES; i++) { 96 if (rcu_dereference(vlan->taps[i]) == q) 97 return i; 98 } 99 100 /* Should never happen */ 101 BUG_ON(1); 102 } 103 104 static int macvtap_set_queue(struct net_device *dev, struct file *file, 105 struct macvtap_queue *q) 106 { 107 struct macvlan_dev *vlan = netdev_priv(dev); 108 int index; 109 int err = -EBUSY; 110 111 spin_lock(&macvtap_lock); 112 if (vlan->numvtaps == MAX_MACVTAP_QUEUES) 113 goto out; 114 115 err = 0; 116 index = get_slot(vlan, NULL); 117 rcu_assign_pointer(q->vlan, vlan); 118 rcu_assign_pointer(vlan->taps[index], q); 119 sock_hold(&q->sk); 120 121 q->file = file; 122 file->private_data = q; 123 124 vlan->numvtaps++; 125 126 out: 127 spin_unlock(&macvtap_lock); 128 return err; 129 } 130 131 /* 132 * The file owning the queue got closed, give up both 133 * the reference that the files holds as well as the 134 * one from the macvlan_dev if that still exists. 135 * 136 * Using the spinlock makes sure that we don't get 137 * to the queue again after destroying it. 138 */ 139 static void macvtap_put_queue(struct macvtap_queue *q) 140 { 141 struct macvlan_dev *vlan; 142 143 spin_lock(&macvtap_lock); 144 vlan = rcu_dereference(q->vlan); 145 if (vlan) { 146 int index = get_slot(vlan, q); 147 148 rcu_assign_pointer(vlan->taps[index], NULL); 149 rcu_assign_pointer(q->vlan, NULL); 150 sock_put(&q->sk); 151 --vlan->numvtaps; 152 } 153 154 spin_unlock(&macvtap_lock); 155 156 synchronize_rcu(); 157 sock_put(&q->sk); 158 } 159 160 /* 161 * Select a queue based on the rxq of the device on which this packet 162 * arrived. If the incoming device is not mq, calculate a flow hash 163 * to select a queue. If all fails, find the first available queue. 164 * Cache vlan->numvtaps since it can become zero during the execution 165 * of this function. 166 */ 167 static struct macvtap_queue *macvtap_get_queue(struct net_device *dev, 168 struct sk_buff *skb) 169 { 170 struct macvlan_dev *vlan = netdev_priv(dev); 171 struct macvtap_queue *tap = NULL; 172 int numvtaps = vlan->numvtaps; 173 __u32 rxq; 174 175 if (!numvtaps) 176 goto out; 177 178 if (likely(skb_rx_queue_recorded(skb))) { 179 rxq = skb_get_rx_queue(skb); 180 181 while (unlikely(rxq >= numvtaps)) 182 rxq -= numvtaps; 183 184 tap = rcu_dereference(vlan->taps[rxq]); 185 if (tap) 186 goto out; 187 } 188 189 /* Check if we can use flow to select a queue */ 190 rxq = skb_get_rxhash(skb); 191 if (rxq) { 192 tap = rcu_dereference(vlan->taps[rxq % numvtaps]); 193 if (tap) 194 goto out; 195 } 196 197 /* Everything failed - find first available queue */ 198 for (rxq = 0; rxq < MAX_MACVTAP_QUEUES; rxq++) { 199 tap = rcu_dereference(vlan->taps[rxq]); 200 if (tap) 201 break; 202 } 203 204 out: 205 return tap; 206 } 207 208 /* 209 * The net_device is going away, give up the reference 210 * that it holds on all queues and safely set the pointer 211 * from the queues to NULL. 212 */ 213 static void macvtap_del_queues(struct net_device *dev) 214 { 215 struct macvlan_dev *vlan = netdev_priv(dev); 216 struct macvtap_queue *q, *qlist[MAX_MACVTAP_QUEUES]; 217 int i, j = 0; 218 219 /* macvtap_put_queue can free some slots, so go through all slots */ 220 spin_lock(&macvtap_lock); 221 for (i = 0; i < MAX_MACVTAP_QUEUES && vlan->numvtaps; i++) { 222 q = rcu_dereference(vlan->taps[i]); 223 if (q) { 224 qlist[j++] = q; 225 rcu_assign_pointer(vlan->taps[i], NULL); 226 rcu_assign_pointer(q->vlan, NULL); 227 vlan->numvtaps--; 228 } 229 } 230 BUG_ON(vlan->numvtaps != 0); 231 spin_unlock(&macvtap_lock); 232 233 synchronize_rcu(); 234 235 for (--j; j >= 0; j--) 236 sock_put(&qlist[j]->sk); 237 } 238 239 /* 240 * Forward happens for data that gets sent from one macvlan 241 * endpoint to another one in bridge mode. We just take 242 * the skb and put it into the receive queue. 243 */ 244 static int macvtap_forward(struct net_device *dev, struct sk_buff *skb) 245 { 246 struct macvtap_queue *q = macvtap_get_queue(dev, skb); 247 if (!q) 248 goto drop; 249 250 if (skb_queue_len(&q->sk.sk_receive_queue) >= dev->tx_queue_len) 251 goto drop; 252 253 skb_queue_tail(&q->sk.sk_receive_queue, skb); 254 wake_up_interruptible_poll(sk_sleep(&q->sk), POLLIN | POLLRDNORM | POLLRDBAND); 255 return NET_RX_SUCCESS; 256 257 drop: 258 kfree_skb(skb); 259 return NET_RX_DROP; 260 } 261 262 /* 263 * Receive is for data from the external interface (lowerdev), 264 * in case of macvtap, we can treat that the same way as 265 * forward, which macvlan cannot. 266 */ 267 static int macvtap_receive(struct sk_buff *skb) 268 { 269 skb_push(skb, ETH_HLEN); 270 return macvtap_forward(skb->dev, skb); 271 } 272 273 static int macvtap_newlink(struct net *src_net, 274 struct net_device *dev, 275 struct nlattr *tb[], 276 struct nlattr *data[]) 277 { 278 struct device *classdev; 279 dev_t devt; 280 int err; 281 282 err = macvlan_common_newlink(src_net, dev, tb, data, 283 macvtap_receive, macvtap_forward); 284 if (err) 285 goto out; 286 287 devt = MKDEV(MAJOR(macvtap_major), dev->ifindex); 288 289 classdev = device_create(macvtap_class, &dev->dev, devt, 290 dev, "tap%d", dev->ifindex); 291 if (IS_ERR(classdev)) { 292 err = PTR_ERR(classdev); 293 macvtap_del_queues(dev); 294 } 295 296 out: 297 return err; 298 } 299 300 static void macvtap_dellink(struct net_device *dev, 301 struct list_head *head) 302 { 303 device_destroy(macvtap_class, 304 MKDEV(MAJOR(macvtap_major), dev->ifindex)); 305 306 macvtap_del_queues(dev); 307 macvlan_dellink(dev, head); 308 } 309 310 static void macvtap_setup(struct net_device *dev) 311 { 312 macvlan_common_setup(dev); 313 dev->tx_queue_len = TUN_READQ_SIZE; 314 } 315 316 static struct rtnl_link_ops macvtap_link_ops __read_mostly = { 317 .kind = "macvtap", 318 .setup = macvtap_setup, 319 .newlink = macvtap_newlink, 320 .dellink = macvtap_dellink, 321 }; 322 323 324 static void macvtap_sock_write_space(struct sock *sk) 325 { 326 wait_queue_head_t *wqueue; 327 328 if (!sock_writeable(sk) || 329 !test_and_clear_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags)) 330 return; 331 332 wqueue = sk_sleep(sk); 333 if (wqueue && waitqueue_active(wqueue)) 334 wake_up_interruptible_poll(wqueue, POLLOUT | POLLWRNORM | POLLWRBAND); 335 } 336 337 static int macvtap_open(struct inode *inode, struct file *file) 338 { 339 struct net *net = current->nsproxy->net_ns; 340 struct net_device *dev = dev_get_by_index(net, iminor(inode)); 341 struct macvtap_queue *q; 342 int err; 343 344 err = -ENODEV; 345 if (!dev) 346 goto out; 347 348 /* check if this is a macvtap device */ 349 err = -EINVAL; 350 if (dev->rtnl_link_ops != &macvtap_link_ops) 351 goto out; 352 353 err = -ENOMEM; 354 q = (struct macvtap_queue *)sk_alloc(net, AF_UNSPEC, GFP_KERNEL, 355 &macvtap_proto); 356 if (!q) 357 goto out; 358 359 q->sock.wq = &q->wq; 360 init_waitqueue_head(&q->wq.wait); 361 q->sock.type = SOCK_RAW; 362 q->sock.state = SS_CONNECTED; 363 q->sock.file = file; 364 q->sock.ops = &macvtap_socket_ops; 365 sock_init_data(&q->sock, &q->sk); 366 q->sk.sk_write_space = macvtap_sock_write_space; 367 q->flags = IFF_VNET_HDR | IFF_NO_PI | IFF_TAP; 368 q->vnet_hdr_sz = sizeof(struct virtio_net_hdr); 369 370 err = macvtap_set_queue(dev, file, q); 371 if (err) 372 sock_put(&q->sk); 373 374 out: 375 if (dev) 376 dev_put(dev); 377 378 return err; 379 } 380 381 static int macvtap_release(struct inode *inode, struct file *file) 382 { 383 struct macvtap_queue *q = file->private_data; 384 macvtap_put_queue(q); 385 return 0; 386 } 387 388 static unsigned int macvtap_poll(struct file *file, poll_table * wait) 389 { 390 struct macvtap_queue *q = file->private_data; 391 unsigned int mask = POLLERR; 392 393 if (!q) 394 goto out; 395 396 mask = 0; 397 poll_wait(file, &q->wq.wait, wait); 398 399 if (!skb_queue_empty(&q->sk.sk_receive_queue)) 400 mask |= POLLIN | POLLRDNORM; 401 402 if (sock_writeable(&q->sk) || 403 (!test_and_set_bit(SOCK_ASYNC_NOSPACE, &q->sock.flags) && 404 sock_writeable(&q->sk))) 405 mask |= POLLOUT | POLLWRNORM; 406 407 out: 408 return mask; 409 } 410 411 static inline struct sk_buff *macvtap_alloc_skb(struct sock *sk, size_t prepad, 412 size_t len, size_t linear, 413 int noblock, int *err) 414 { 415 struct sk_buff *skb; 416 417 /* Under a page? Don't bother with paged skb. */ 418 if (prepad + len < PAGE_SIZE || !linear) 419 linear = len; 420 421 skb = sock_alloc_send_pskb(sk, prepad + linear, len - linear, noblock, 422 err); 423 if (!skb) 424 return NULL; 425 426 skb_reserve(skb, prepad); 427 skb_put(skb, linear); 428 skb->data_len = len - linear; 429 skb->len += len - linear; 430 431 return skb; 432 } 433 434 /* 435 * macvtap_skb_from_vnet_hdr and macvtap_skb_to_vnet_hdr should 436 * be shared with the tun/tap driver. 437 */ 438 static int macvtap_skb_from_vnet_hdr(struct sk_buff *skb, 439 struct virtio_net_hdr *vnet_hdr) 440 { 441 unsigned short gso_type = 0; 442 if (vnet_hdr->gso_type != VIRTIO_NET_HDR_GSO_NONE) { 443 switch (vnet_hdr->gso_type & ~VIRTIO_NET_HDR_GSO_ECN) { 444 case VIRTIO_NET_HDR_GSO_TCPV4: 445 gso_type = SKB_GSO_TCPV4; 446 break; 447 case VIRTIO_NET_HDR_GSO_TCPV6: 448 gso_type = SKB_GSO_TCPV6; 449 break; 450 case VIRTIO_NET_HDR_GSO_UDP: 451 gso_type = SKB_GSO_UDP; 452 break; 453 default: 454 return -EINVAL; 455 } 456 457 if (vnet_hdr->gso_type & VIRTIO_NET_HDR_GSO_ECN) 458 gso_type |= SKB_GSO_TCP_ECN; 459 460 if (vnet_hdr->gso_size == 0) 461 return -EINVAL; 462 } 463 464 if (vnet_hdr->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) { 465 if (!skb_partial_csum_set(skb, vnet_hdr->csum_start, 466 vnet_hdr->csum_offset)) 467 return -EINVAL; 468 } 469 470 if (vnet_hdr->gso_type != VIRTIO_NET_HDR_GSO_NONE) { 471 skb_shinfo(skb)->gso_size = vnet_hdr->gso_size; 472 skb_shinfo(skb)->gso_type = gso_type; 473 474 /* Header must be checked, and gso_segs computed. */ 475 skb_shinfo(skb)->gso_type |= SKB_GSO_DODGY; 476 skb_shinfo(skb)->gso_segs = 0; 477 } 478 return 0; 479 } 480 481 static int macvtap_skb_to_vnet_hdr(const struct sk_buff *skb, 482 struct virtio_net_hdr *vnet_hdr) 483 { 484 memset(vnet_hdr, 0, sizeof(*vnet_hdr)); 485 486 if (skb_is_gso(skb)) { 487 struct skb_shared_info *sinfo = skb_shinfo(skb); 488 489 /* This is a hint as to how much should be linear. */ 490 vnet_hdr->hdr_len = skb_headlen(skb); 491 vnet_hdr->gso_size = sinfo->gso_size; 492 if (sinfo->gso_type & SKB_GSO_TCPV4) 493 vnet_hdr->gso_type = VIRTIO_NET_HDR_GSO_TCPV4; 494 else if (sinfo->gso_type & SKB_GSO_TCPV6) 495 vnet_hdr->gso_type = VIRTIO_NET_HDR_GSO_TCPV6; 496 else if (sinfo->gso_type & SKB_GSO_UDP) 497 vnet_hdr->gso_type = VIRTIO_NET_HDR_GSO_UDP; 498 else 499 BUG(); 500 if (sinfo->gso_type & SKB_GSO_TCP_ECN) 501 vnet_hdr->gso_type |= VIRTIO_NET_HDR_GSO_ECN; 502 } else 503 vnet_hdr->gso_type = VIRTIO_NET_HDR_GSO_NONE; 504 505 if (skb->ip_summed == CHECKSUM_PARTIAL) { 506 vnet_hdr->flags = VIRTIO_NET_HDR_F_NEEDS_CSUM; 507 vnet_hdr->csum_start = skb_checksum_start_offset(skb); 508 vnet_hdr->csum_offset = skb->csum_offset; 509 } /* else everything is zero */ 510 511 return 0; 512 } 513 514 515 /* Get packet from user space buffer */ 516 static ssize_t macvtap_get_user(struct macvtap_queue *q, 517 const struct iovec *iv, size_t count, 518 int noblock) 519 { 520 struct sk_buff *skb; 521 struct macvlan_dev *vlan; 522 size_t len = count; 523 int err; 524 struct virtio_net_hdr vnet_hdr = { 0 }; 525 int vnet_hdr_len = 0; 526 527 if (q->flags & IFF_VNET_HDR) { 528 vnet_hdr_len = q->vnet_hdr_sz; 529 530 err = -EINVAL; 531 if (len < vnet_hdr_len) 532 goto err; 533 len -= vnet_hdr_len; 534 535 err = memcpy_fromiovecend((void *)&vnet_hdr, iv, 0, 536 sizeof(vnet_hdr)); 537 if (err < 0) 538 goto err; 539 if ((vnet_hdr.flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) && 540 vnet_hdr.csum_start + vnet_hdr.csum_offset + 2 > 541 vnet_hdr.hdr_len) 542 vnet_hdr.hdr_len = vnet_hdr.csum_start + 543 vnet_hdr.csum_offset + 2; 544 err = -EINVAL; 545 if (vnet_hdr.hdr_len > len) 546 goto err; 547 } 548 549 err = -EINVAL; 550 if (unlikely(len < ETH_HLEN)) 551 goto err; 552 553 skb = macvtap_alloc_skb(&q->sk, NET_IP_ALIGN, len, vnet_hdr.hdr_len, 554 noblock, &err); 555 if (!skb) 556 goto err; 557 558 err = skb_copy_datagram_from_iovec(skb, 0, iv, vnet_hdr_len, len); 559 if (err) 560 goto err_kfree; 561 562 skb_set_network_header(skb, ETH_HLEN); 563 skb_reset_mac_header(skb); 564 skb->protocol = eth_hdr(skb)->h_proto; 565 566 if (vnet_hdr_len) { 567 err = macvtap_skb_from_vnet_hdr(skb, &vnet_hdr); 568 if (err) 569 goto err_kfree; 570 } 571 572 rcu_read_lock_bh(); 573 vlan = rcu_dereference(q->vlan); 574 if (vlan) 575 macvlan_start_xmit(skb, vlan->dev); 576 else 577 kfree_skb(skb); 578 rcu_read_unlock_bh(); 579 580 return count; 581 582 err_kfree: 583 kfree_skb(skb); 584 585 err: 586 rcu_read_lock_bh(); 587 vlan = rcu_dereference(q->vlan); 588 if (vlan) 589 vlan->dev->stats.tx_dropped++; 590 rcu_read_unlock_bh(); 591 592 return err; 593 } 594 595 static ssize_t macvtap_aio_write(struct kiocb *iocb, const struct iovec *iv, 596 unsigned long count, loff_t pos) 597 { 598 struct file *file = iocb->ki_filp; 599 ssize_t result = -ENOLINK; 600 struct macvtap_queue *q = file->private_data; 601 602 result = macvtap_get_user(q, iv, iov_length(iv, count), 603 file->f_flags & O_NONBLOCK); 604 return result; 605 } 606 607 /* Put packet to the user space buffer */ 608 static ssize_t macvtap_put_user(struct macvtap_queue *q, 609 const struct sk_buff *skb, 610 const struct iovec *iv, int len) 611 { 612 struct macvlan_dev *vlan; 613 int ret; 614 int vnet_hdr_len = 0; 615 616 if (q->flags & IFF_VNET_HDR) { 617 struct virtio_net_hdr vnet_hdr; 618 vnet_hdr_len = q->vnet_hdr_sz; 619 if ((len -= vnet_hdr_len) < 0) 620 return -EINVAL; 621 622 ret = macvtap_skb_to_vnet_hdr(skb, &vnet_hdr); 623 if (ret) 624 return ret; 625 626 if (memcpy_toiovecend(iv, (void *)&vnet_hdr, 0, sizeof(vnet_hdr))) 627 return -EFAULT; 628 } 629 630 len = min_t(int, skb->len, len); 631 632 ret = skb_copy_datagram_const_iovec(skb, 0, iv, vnet_hdr_len, len); 633 634 rcu_read_lock_bh(); 635 vlan = rcu_dereference(q->vlan); 636 if (vlan) 637 macvlan_count_rx(vlan, len, ret == 0, 0); 638 rcu_read_unlock_bh(); 639 640 return ret ? ret : (len + vnet_hdr_len); 641 } 642 643 static ssize_t macvtap_do_read(struct macvtap_queue *q, struct kiocb *iocb, 644 const struct iovec *iv, unsigned long len, 645 int noblock) 646 { 647 DECLARE_WAITQUEUE(wait, current); 648 struct sk_buff *skb; 649 ssize_t ret = 0; 650 651 add_wait_queue(sk_sleep(&q->sk), &wait); 652 while (len) { 653 current->state = TASK_INTERRUPTIBLE; 654 655 /* Read frames from the queue */ 656 skb = skb_dequeue(&q->sk.sk_receive_queue); 657 if (!skb) { 658 if (noblock) { 659 ret = -EAGAIN; 660 break; 661 } 662 if (signal_pending(current)) { 663 ret = -ERESTARTSYS; 664 break; 665 } 666 /* Nothing to read, let's sleep */ 667 schedule(); 668 continue; 669 } 670 ret = macvtap_put_user(q, skb, iv, len); 671 kfree_skb(skb); 672 break; 673 } 674 675 current->state = TASK_RUNNING; 676 remove_wait_queue(sk_sleep(&q->sk), &wait); 677 return ret; 678 } 679 680 static ssize_t macvtap_aio_read(struct kiocb *iocb, const struct iovec *iv, 681 unsigned long count, loff_t pos) 682 { 683 struct file *file = iocb->ki_filp; 684 struct macvtap_queue *q = file->private_data; 685 ssize_t len, ret = 0; 686 687 len = iov_length(iv, count); 688 if (len < 0) { 689 ret = -EINVAL; 690 goto out; 691 } 692 693 ret = macvtap_do_read(q, iocb, iv, len, file->f_flags & O_NONBLOCK); 694 ret = min_t(ssize_t, ret, len); /* XXX copied from tun.c. Why? */ 695 out: 696 return ret; 697 } 698 699 /* 700 * provide compatibility with generic tun/tap interface 701 */ 702 static long macvtap_ioctl(struct file *file, unsigned int cmd, 703 unsigned long arg) 704 { 705 struct macvtap_queue *q = file->private_data; 706 struct macvlan_dev *vlan; 707 void __user *argp = (void __user *)arg; 708 struct ifreq __user *ifr = argp; 709 unsigned int __user *up = argp; 710 unsigned int u; 711 int __user *sp = argp; 712 int s; 713 int ret; 714 715 switch (cmd) { 716 case TUNSETIFF: 717 /* ignore the name, just look at flags */ 718 if (get_user(u, &ifr->ifr_flags)) 719 return -EFAULT; 720 721 ret = 0; 722 if ((u & ~IFF_VNET_HDR) != (IFF_NO_PI | IFF_TAP)) 723 ret = -EINVAL; 724 else 725 q->flags = u; 726 727 return ret; 728 729 case TUNGETIFF: 730 rcu_read_lock_bh(); 731 vlan = rcu_dereference(q->vlan); 732 if (vlan) 733 dev_hold(vlan->dev); 734 rcu_read_unlock_bh(); 735 736 if (!vlan) 737 return -ENOLINK; 738 739 ret = 0; 740 if (copy_to_user(&ifr->ifr_name, q->vlan->dev->name, IFNAMSIZ) || 741 put_user(q->flags, &ifr->ifr_flags)) 742 ret = -EFAULT; 743 dev_put(vlan->dev); 744 return ret; 745 746 case TUNGETFEATURES: 747 if (put_user(IFF_TAP | IFF_NO_PI | IFF_VNET_HDR, up)) 748 return -EFAULT; 749 return 0; 750 751 case TUNSETSNDBUF: 752 if (get_user(u, up)) 753 return -EFAULT; 754 755 q->sk.sk_sndbuf = u; 756 return 0; 757 758 case TUNGETVNETHDRSZ: 759 s = q->vnet_hdr_sz; 760 if (put_user(s, sp)) 761 return -EFAULT; 762 return 0; 763 764 case TUNSETVNETHDRSZ: 765 if (get_user(s, sp)) 766 return -EFAULT; 767 if (s < (int)sizeof(struct virtio_net_hdr)) 768 return -EINVAL; 769 770 q->vnet_hdr_sz = s; 771 return 0; 772 773 case TUNSETOFFLOAD: 774 /* let the user check for future flags */ 775 if (arg & ~(TUN_F_CSUM | TUN_F_TSO4 | TUN_F_TSO6 | 776 TUN_F_TSO_ECN | TUN_F_UFO)) 777 return -EINVAL; 778 779 /* TODO: only accept frames with the features that 780 got enabled for forwarded frames */ 781 if (!(q->flags & IFF_VNET_HDR)) 782 return -EINVAL; 783 return 0; 784 785 default: 786 return -EINVAL; 787 } 788 } 789 790 #ifdef CONFIG_COMPAT 791 static long macvtap_compat_ioctl(struct file *file, unsigned int cmd, 792 unsigned long arg) 793 { 794 return macvtap_ioctl(file, cmd, (unsigned long)compat_ptr(arg)); 795 } 796 #endif 797 798 static const struct file_operations macvtap_fops = { 799 .owner = THIS_MODULE, 800 .open = macvtap_open, 801 .release = macvtap_release, 802 .aio_read = macvtap_aio_read, 803 .aio_write = macvtap_aio_write, 804 .poll = macvtap_poll, 805 .llseek = no_llseek, 806 .unlocked_ioctl = macvtap_ioctl, 807 #ifdef CONFIG_COMPAT 808 .compat_ioctl = macvtap_compat_ioctl, 809 #endif 810 }; 811 812 static int macvtap_sendmsg(struct kiocb *iocb, struct socket *sock, 813 struct msghdr *m, size_t total_len) 814 { 815 struct macvtap_queue *q = container_of(sock, struct macvtap_queue, sock); 816 return macvtap_get_user(q, m->msg_iov, total_len, 817 m->msg_flags & MSG_DONTWAIT); 818 } 819 820 static int macvtap_recvmsg(struct kiocb *iocb, struct socket *sock, 821 struct msghdr *m, size_t total_len, 822 int flags) 823 { 824 struct macvtap_queue *q = container_of(sock, struct macvtap_queue, sock); 825 int ret; 826 if (flags & ~(MSG_DONTWAIT|MSG_TRUNC)) 827 return -EINVAL; 828 ret = macvtap_do_read(q, iocb, m->msg_iov, total_len, 829 flags & MSG_DONTWAIT); 830 if (ret > total_len) { 831 m->msg_flags |= MSG_TRUNC; 832 ret = flags & MSG_TRUNC ? ret : total_len; 833 } 834 return ret; 835 } 836 837 /* Ops structure to mimic raw sockets with tun */ 838 static const struct proto_ops macvtap_socket_ops = { 839 .sendmsg = macvtap_sendmsg, 840 .recvmsg = macvtap_recvmsg, 841 }; 842 843 /* Get an underlying socket object from tun file. Returns error unless file is 844 * attached to a device. The returned object works like a packet socket, it 845 * can be used for sock_sendmsg/sock_recvmsg. The caller is responsible for 846 * holding a reference to the file for as long as the socket is in use. */ 847 struct socket *macvtap_get_socket(struct file *file) 848 { 849 struct macvtap_queue *q; 850 if (file->f_op != &macvtap_fops) 851 return ERR_PTR(-EINVAL); 852 q = file->private_data; 853 if (!q) 854 return ERR_PTR(-EBADFD); 855 return &q->sock; 856 } 857 EXPORT_SYMBOL_GPL(macvtap_get_socket); 858 859 static int macvtap_init(void) 860 { 861 int err; 862 863 err = alloc_chrdev_region(&macvtap_major, 0, 864 MACVTAP_NUM_DEVS, "macvtap"); 865 if (err) 866 goto out1; 867 868 cdev_init(&macvtap_cdev, &macvtap_fops); 869 err = cdev_add(&macvtap_cdev, macvtap_major, MACVTAP_NUM_DEVS); 870 if (err) 871 goto out2; 872 873 macvtap_class = class_create(THIS_MODULE, "macvtap"); 874 if (IS_ERR(macvtap_class)) { 875 err = PTR_ERR(macvtap_class); 876 goto out3; 877 } 878 879 err = macvlan_link_register(&macvtap_link_ops); 880 if (err) 881 goto out4; 882 883 return 0; 884 885 out4: 886 class_unregister(macvtap_class); 887 out3: 888 cdev_del(&macvtap_cdev); 889 out2: 890 unregister_chrdev_region(macvtap_major, MACVTAP_NUM_DEVS); 891 out1: 892 return err; 893 } 894 module_init(macvtap_init); 895 896 static void macvtap_exit(void) 897 { 898 rtnl_link_unregister(&macvtap_link_ops); 899 class_unregister(macvtap_class); 900 cdev_del(&macvtap_cdev); 901 unregister_chrdev_region(macvtap_major, MACVTAP_NUM_DEVS); 902 } 903 module_exit(macvtap_exit); 904 905 MODULE_ALIAS_RTNL_LINK("macvtap"); 906 MODULE_AUTHOR("Arnd Bergmann <arnd@arndb.de>"); 907 MODULE_LICENSE("GPL"); 908