1 #include <linux/etherdevice.h> 2 #include <linux/if_macvlan.h> 3 #include <linux/interrupt.h> 4 #include <linux/nsproxy.h> 5 #include <linux/compat.h> 6 #include <linux/if_tun.h> 7 #include <linux/module.h> 8 #include <linux/skbuff.h> 9 #include <linux/cache.h> 10 #include <linux/sched.h> 11 #include <linux/types.h> 12 #include <linux/slab.h> 13 #include <linux/init.h> 14 #include <linux/wait.h> 15 #include <linux/cdev.h> 16 #include <linux/fs.h> 17 18 #include <net/net_namespace.h> 19 #include <net/rtnetlink.h> 20 #include <net/sock.h> 21 #include <linux/virtio_net.h> 22 23 /* 24 * A macvtap queue is the central object of this driver, it connects 25 * an open character device to a macvlan interface. There can be 26 * multiple queues on one interface, which map back to queues 27 * implemented in hardware on the underlying device. 28 * 29 * macvtap_proto is used to allocate queues through the sock allocation 30 * mechanism. 31 * 32 * TODO: multiqueue support is currently not implemented, even though 33 * macvtap is basically prepared for that. We will need to add this 34 * here as well as in virtio-net and qemu to get line rate on 10gbit 35 * adapters from a guest. 36 */ 37 struct macvtap_queue { 38 struct sock sk; 39 struct socket sock; 40 struct macvlan_dev *vlan; 41 struct file *file; 42 unsigned int flags; 43 }; 44 45 static struct proto macvtap_proto = { 46 .name = "macvtap", 47 .owner = THIS_MODULE, 48 .obj_size = sizeof (struct macvtap_queue), 49 }; 50 51 /* 52 * Minor number matches netdev->ifindex, so need a potentially 53 * large value. This also makes it possible to split the 54 * tap functionality out again in the future by offering it 55 * from other drivers besides macvtap. As long as every device 56 * only has one tap, the interface numbers assure that the 57 * device nodes are unique. 58 */ 59 static unsigned int macvtap_major; 60 #define MACVTAP_NUM_DEVS 65536 61 static struct class *macvtap_class; 62 static struct cdev macvtap_cdev; 63 64 static const struct proto_ops macvtap_socket_ops; 65 66 /* 67 * RCU usage: 68 * The macvtap_queue and the macvlan_dev are loosely coupled, the 69 * pointers from one to the other can only be read while rcu_read_lock 70 * or macvtap_lock is held. 71 * 72 * Both the file and the macvlan_dev hold a reference on the macvtap_queue 73 * through sock_hold(&q->sk). When the macvlan_dev goes away first, 74 * q->vlan becomes inaccessible. When the files gets closed, 75 * macvtap_get_queue() fails. 76 * 77 * There may still be references to the struct sock inside of the 78 * queue from outbound SKBs, but these never reference back to the 79 * file or the dev. The data structure is freed through __sk_free 80 * when both our references and any pending SKBs are gone. 81 */ 82 static DEFINE_SPINLOCK(macvtap_lock); 83 84 /* 85 * Choose the next free queue, for now there is only one 86 */ 87 static int macvtap_set_queue(struct net_device *dev, struct file *file, 88 struct macvtap_queue *q) 89 { 90 struct macvlan_dev *vlan = netdev_priv(dev); 91 int err = -EBUSY; 92 93 spin_lock(&macvtap_lock); 94 if (rcu_dereference(vlan->tap)) 95 goto out; 96 97 err = 0; 98 rcu_assign_pointer(q->vlan, vlan); 99 rcu_assign_pointer(vlan->tap, q); 100 sock_hold(&q->sk); 101 102 q->file = file; 103 file->private_data = q; 104 105 out: 106 spin_unlock(&macvtap_lock); 107 return err; 108 } 109 110 /* 111 * The file owning the queue got closed, give up both 112 * the reference that the files holds as well as the 113 * one from the macvlan_dev if that still exists. 114 * 115 * Using the spinlock makes sure that we don't get 116 * to the queue again after destroying it. 117 */ 118 static void macvtap_put_queue(struct macvtap_queue *q) 119 { 120 struct macvlan_dev *vlan; 121 122 spin_lock(&macvtap_lock); 123 vlan = rcu_dereference(q->vlan); 124 if (vlan) { 125 rcu_assign_pointer(vlan->tap, NULL); 126 rcu_assign_pointer(q->vlan, NULL); 127 sock_put(&q->sk); 128 } 129 130 spin_unlock(&macvtap_lock); 131 132 synchronize_rcu(); 133 sock_put(&q->sk); 134 } 135 136 /* 137 * Since we only support one queue, just dereference the pointer. 138 */ 139 static struct macvtap_queue *macvtap_get_queue(struct net_device *dev, 140 struct sk_buff *skb) 141 { 142 struct macvlan_dev *vlan = netdev_priv(dev); 143 144 return rcu_dereference(vlan->tap); 145 } 146 147 /* 148 * The net_device is going away, give up the reference 149 * that it holds on the queue (all the queues one day) 150 * and safely set the pointer from the queues to NULL. 151 */ 152 static void macvtap_del_queues(struct net_device *dev) 153 { 154 struct macvlan_dev *vlan = netdev_priv(dev); 155 struct macvtap_queue *q; 156 157 spin_lock(&macvtap_lock); 158 q = rcu_dereference(vlan->tap); 159 if (!q) { 160 spin_unlock(&macvtap_lock); 161 return; 162 } 163 164 rcu_assign_pointer(vlan->tap, NULL); 165 rcu_assign_pointer(q->vlan, NULL); 166 spin_unlock(&macvtap_lock); 167 168 synchronize_rcu(); 169 sock_put(&q->sk); 170 } 171 172 /* 173 * Forward happens for data that gets sent from one macvlan 174 * endpoint to another one in bridge mode. We just take 175 * the skb and put it into the receive queue. 176 */ 177 static int macvtap_forward(struct net_device *dev, struct sk_buff *skb) 178 { 179 struct macvtap_queue *q = macvtap_get_queue(dev, skb); 180 if (!q) 181 return -ENOLINK; 182 183 skb_queue_tail(&q->sk.sk_receive_queue, skb); 184 wake_up_interruptible_poll(q->sk.sk_sleep, POLLIN | POLLRDNORM | POLLRDBAND); 185 return 0; 186 } 187 188 /* 189 * Receive is for data from the external interface (lowerdev), 190 * in case of macvtap, we can treat that the same way as 191 * forward, which macvlan cannot. 192 */ 193 static int macvtap_receive(struct sk_buff *skb) 194 { 195 skb_push(skb, ETH_HLEN); 196 return macvtap_forward(skb->dev, skb); 197 } 198 199 static int macvtap_newlink(struct net *src_net, 200 struct net_device *dev, 201 struct nlattr *tb[], 202 struct nlattr *data[]) 203 { 204 struct device *classdev; 205 dev_t devt; 206 int err; 207 208 err = macvlan_common_newlink(src_net, dev, tb, data, 209 macvtap_receive, macvtap_forward); 210 if (err) 211 goto out; 212 213 devt = MKDEV(MAJOR(macvtap_major), dev->ifindex); 214 215 classdev = device_create(macvtap_class, &dev->dev, devt, 216 dev, "tap%d", dev->ifindex); 217 if (IS_ERR(classdev)) { 218 err = PTR_ERR(classdev); 219 macvtap_del_queues(dev); 220 } 221 222 out: 223 return err; 224 } 225 226 static void macvtap_dellink(struct net_device *dev, 227 struct list_head *head) 228 { 229 device_destroy(macvtap_class, 230 MKDEV(MAJOR(macvtap_major), dev->ifindex)); 231 232 macvtap_del_queues(dev); 233 macvlan_dellink(dev, head); 234 } 235 236 static struct rtnl_link_ops macvtap_link_ops __read_mostly = { 237 .kind = "macvtap", 238 .newlink = macvtap_newlink, 239 .dellink = macvtap_dellink, 240 }; 241 242 243 static void macvtap_sock_write_space(struct sock *sk) 244 { 245 if (!sock_writeable(sk) || 246 !test_and_clear_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags)) 247 return; 248 249 if (sk->sk_sleep && waitqueue_active(sk->sk_sleep)) 250 wake_up_interruptible_poll(sk->sk_sleep, POLLOUT | POLLWRNORM | POLLWRBAND); 251 } 252 253 static int macvtap_open(struct inode *inode, struct file *file) 254 { 255 struct net *net = current->nsproxy->net_ns; 256 struct net_device *dev = dev_get_by_index(net, iminor(inode)); 257 struct macvtap_queue *q; 258 int err; 259 260 err = -ENODEV; 261 if (!dev) 262 goto out; 263 264 /* check if this is a macvtap device */ 265 err = -EINVAL; 266 if (dev->rtnl_link_ops != &macvtap_link_ops) 267 goto out; 268 269 err = -ENOMEM; 270 q = (struct macvtap_queue *)sk_alloc(net, AF_UNSPEC, GFP_KERNEL, 271 &macvtap_proto); 272 if (!q) 273 goto out; 274 275 init_waitqueue_head(&q->sock.wait); 276 q->sock.type = SOCK_RAW; 277 q->sock.state = SS_CONNECTED; 278 q->sock.file = file; 279 q->sock.ops = &macvtap_socket_ops; 280 sock_init_data(&q->sock, &q->sk); 281 q->sk.sk_write_space = macvtap_sock_write_space; 282 q->flags = IFF_VNET_HDR | IFF_NO_PI | IFF_TAP; 283 284 err = macvtap_set_queue(dev, file, q); 285 if (err) 286 sock_put(&q->sk); 287 288 out: 289 if (dev) 290 dev_put(dev); 291 292 return err; 293 } 294 295 static int macvtap_release(struct inode *inode, struct file *file) 296 { 297 struct macvtap_queue *q = file->private_data; 298 macvtap_put_queue(q); 299 return 0; 300 } 301 302 static unsigned int macvtap_poll(struct file *file, poll_table * wait) 303 { 304 struct macvtap_queue *q = file->private_data; 305 unsigned int mask = POLLERR; 306 307 if (!q) 308 goto out; 309 310 mask = 0; 311 poll_wait(file, &q->sock.wait, wait); 312 313 if (!skb_queue_empty(&q->sk.sk_receive_queue)) 314 mask |= POLLIN | POLLRDNORM; 315 316 if (sock_writeable(&q->sk) || 317 (!test_and_set_bit(SOCK_ASYNC_NOSPACE, &q->sock.flags) && 318 sock_writeable(&q->sk))) 319 mask |= POLLOUT | POLLWRNORM; 320 321 out: 322 return mask; 323 } 324 325 static inline struct sk_buff *macvtap_alloc_skb(struct sock *sk, size_t prepad, 326 size_t len, size_t linear, 327 int noblock, int *err) 328 { 329 struct sk_buff *skb; 330 331 /* Under a page? Don't bother with paged skb. */ 332 if (prepad + len < PAGE_SIZE || !linear) 333 linear = len; 334 335 skb = sock_alloc_send_pskb(sk, prepad + linear, len - linear, noblock, 336 err); 337 if (!skb) 338 return NULL; 339 340 skb_reserve(skb, prepad); 341 skb_put(skb, linear); 342 skb->data_len = len - linear; 343 skb->len += len - linear; 344 345 return skb; 346 } 347 348 /* 349 * macvtap_skb_from_vnet_hdr and macvtap_skb_to_vnet_hdr should 350 * be shared with the tun/tap driver. 351 */ 352 static int macvtap_skb_from_vnet_hdr(struct sk_buff *skb, 353 struct virtio_net_hdr *vnet_hdr) 354 { 355 unsigned short gso_type = 0; 356 if (vnet_hdr->gso_type != VIRTIO_NET_HDR_GSO_NONE) { 357 switch (vnet_hdr->gso_type & ~VIRTIO_NET_HDR_GSO_ECN) { 358 case VIRTIO_NET_HDR_GSO_TCPV4: 359 gso_type = SKB_GSO_TCPV4; 360 break; 361 case VIRTIO_NET_HDR_GSO_TCPV6: 362 gso_type = SKB_GSO_TCPV6; 363 break; 364 case VIRTIO_NET_HDR_GSO_UDP: 365 gso_type = SKB_GSO_UDP; 366 break; 367 default: 368 return -EINVAL; 369 } 370 371 if (vnet_hdr->gso_type & VIRTIO_NET_HDR_GSO_ECN) 372 gso_type |= SKB_GSO_TCP_ECN; 373 374 if (vnet_hdr->gso_size == 0) 375 return -EINVAL; 376 } 377 378 if (vnet_hdr->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) { 379 if (!skb_partial_csum_set(skb, vnet_hdr->csum_start, 380 vnet_hdr->csum_offset)) 381 return -EINVAL; 382 } 383 384 if (vnet_hdr->gso_type != VIRTIO_NET_HDR_GSO_NONE) { 385 skb_shinfo(skb)->gso_size = vnet_hdr->gso_size; 386 skb_shinfo(skb)->gso_type = gso_type; 387 388 /* Header must be checked, and gso_segs computed. */ 389 skb_shinfo(skb)->gso_type |= SKB_GSO_DODGY; 390 skb_shinfo(skb)->gso_segs = 0; 391 } 392 return 0; 393 } 394 395 static int macvtap_skb_to_vnet_hdr(const struct sk_buff *skb, 396 struct virtio_net_hdr *vnet_hdr) 397 { 398 memset(vnet_hdr, 0, sizeof(*vnet_hdr)); 399 400 if (skb_is_gso(skb)) { 401 struct skb_shared_info *sinfo = skb_shinfo(skb); 402 403 /* This is a hint as to how much should be linear. */ 404 vnet_hdr->hdr_len = skb_headlen(skb); 405 vnet_hdr->gso_size = sinfo->gso_size; 406 if (sinfo->gso_type & SKB_GSO_TCPV4) 407 vnet_hdr->gso_type = VIRTIO_NET_HDR_GSO_TCPV4; 408 else if (sinfo->gso_type & SKB_GSO_TCPV6) 409 vnet_hdr->gso_type = VIRTIO_NET_HDR_GSO_TCPV6; 410 else if (sinfo->gso_type & SKB_GSO_UDP) 411 vnet_hdr->gso_type = VIRTIO_NET_HDR_GSO_UDP; 412 else 413 BUG(); 414 if (sinfo->gso_type & SKB_GSO_TCP_ECN) 415 vnet_hdr->gso_type |= VIRTIO_NET_HDR_GSO_ECN; 416 } else 417 vnet_hdr->gso_type = VIRTIO_NET_HDR_GSO_NONE; 418 419 if (skb->ip_summed == CHECKSUM_PARTIAL) { 420 vnet_hdr->flags = VIRTIO_NET_HDR_F_NEEDS_CSUM; 421 vnet_hdr->csum_start = skb->csum_start - 422 skb_headroom(skb); 423 vnet_hdr->csum_offset = skb->csum_offset; 424 } /* else everything is zero */ 425 426 return 0; 427 } 428 429 430 /* Get packet from user space buffer */ 431 static ssize_t macvtap_get_user(struct macvtap_queue *q, 432 const struct iovec *iv, size_t count, 433 int noblock) 434 { 435 struct sk_buff *skb; 436 struct macvlan_dev *vlan; 437 size_t len = count; 438 int err; 439 struct virtio_net_hdr vnet_hdr = { 0 }; 440 int vnet_hdr_len = 0; 441 442 if (q->flags & IFF_VNET_HDR) { 443 vnet_hdr_len = sizeof(vnet_hdr); 444 445 err = -EINVAL; 446 if ((len -= vnet_hdr_len) < 0) 447 goto err; 448 449 err = memcpy_fromiovecend((void *)&vnet_hdr, iv, 0, 450 vnet_hdr_len); 451 if (err < 0) 452 goto err; 453 if ((vnet_hdr.flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) && 454 vnet_hdr.csum_start + vnet_hdr.csum_offset + 2 > 455 vnet_hdr.hdr_len) 456 vnet_hdr.hdr_len = vnet_hdr.csum_start + 457 vnet_hdr.csum_offset + 2; 458 err = -EINVAL; 459 if (vnet_hdr.hdr_len > len) 460 goto err; 461 } 462 463 err = -EINVAL; 464 if (unlikely(len < ETH_HLEN)) 465 goto err; 466 467 skb = macvtap_alloc_skb(&q->sk, NET_IP_ALIGN, len, vnet_hdr.hdr_len, 468 noblock, &err); 469 if (!skb) 470 goto err; 471 472 err = skb_copy_datagram_from_iovec(skb, 0, iv, vnet_hdr_len, len); 473 if (err) 474 goto err_kfree; 475 476 skb_set_network_header(skb, ETH_HLEN); 477 skb_reset_mac_header(skb); 478 skb->protocol = eth_hdr(skb)->h_proto; 479 480 if (vnet_hdr_len) { 481 err = macvtap_skb_from_vnet_hdr(skb, &vnet_hdr); 482 if (err) 483 goto err_kfree; 484 } 485 486 rcu_read_lock_bh(); 487 vlan = rcu_dereference(q->vlan); 488 if (vlan) 489 macvlan_start_xmit(skb, vlan->dev); 490 else 491 kfree_skb(skb); 492 rcu_read_unlock_bh(); 493 494 return count; 495 496 err_kfree: 497 kfree_skb(skb); 498 499 err: 500 rcu_read_lock_bh(); 501 vlan = rcu_dereference(q->vlan); 502 if (vlan) 503 netdev_get_tx_queue(vlan->dev, 0)->tx_dropped++; 504 rcu_read_unlock_bh(); 505 506 return err; 507 } 508 509 static ssize_t macvtap_aio_write(struct kiocb *iocb, const struct iovec *iv, 510 unsigned long count, loff_t pos) 511 { 512 struct file *file = iocb->ki_filp; 513 ssize_t result = -ENOLINK; 514 struct macvtap_queue *q = file->private_data; 515 516 result = macvtap_get_user(q, iv, iov_length(iv, count), 517 file->f_flags & O_NONBLOCK); 518 return result; 519 } 520 521 /* Put packet to the user space buffer */ 522 static ssize_t macvtap_put_user(struct macvtap_queue *q, 523 const struct sk_buff *skb, 524 const struct iovec *iv, int len) 525 { 526 struct macvlan_dev *vlan; 527 int ret; 528 int vnet_hdr_len = 0; 529 530 if (q->flags & IFF_VNET_HDR) { 531 struct virtio_net_hdr vnet_hdr; 532 vnet_hdr_len = sizeof (vnet_hdr); 533 if ((len -= vnet_hdr_len) < 0) 534 return -EINVAL; 535 536 ret = macvtap_skb_to_vnet_hdr(skb, &vnet_hdr); 537 if (ret) 538 return ret; 539 540 if (memcpy_toiovecend(iv, (void *)&vnet_hdr, 0, vnet_hdr_len)) 541 return -EFAULT; 542 } 543 544 len = min_t(int, skb->len, len); 545 546 ret = skb_copy_datagram_const_iovec(skb, 0, iv, vnet_hdr_len, len); 547 548 rcu_read_lock_bh(); 549 vlan = rcu_dereference(q->vlan); 550 if (vlan) 551 macvlan_count_rx(vlan, len, ret == 0, 0); 552 rcu_read_unlock_bh(); 553 554 return ret ? ret : (len + vnet_hdr_len); 555 } 556 557 static ssize_t macvtap_do_read(struct macvtap_queue *q, struct kiocb *iocb, 558 const struct iovec *iv, unsigned long len, 559 int noblock) 560 { 561 DECLARE_WAITQUEUE(wait, current); 562 struct sk_buff *skb; 563 ssize_t ret = 0; 564 565 add_wait_queue(q->sk.sk_sleep, &wait); 566 while (len) { 567 current->state = TASK_INTERRUPTIBLE; 568 569 /* Read frames from the queue */ 570 skb = skb_dequeue(&q->sk.sk_receive_queue); 571 if (!skb) { 572 if (noblock) { 573 ret = -EAGAIN; 574 break; 575 } 576 if (signal_pending(current)) { 577 ret = -ERESTARTSYS; 578 break; 579 } 580 /* Nothing to read, let's sleep */ 581 schedule(); 582 continue; 583 } 584 ret = macvtap_put_user(q, skb, iv, len); 585 kfree_skb(skb); 586 break; 587 } 588 589 current->state = TASK_RUNNING; 590 remove_wait_queue(q->sk.sk_sleep, &wait); 591 return ret; 592 } 593 594 static ssize_t macvtap_aio_read(struct kiocb *iocb, const struct iovec *iv, 595 unsigned long count, loff_t pos) 596 { 597 struct file *file = iocb->ki_filp; 598 struct macvtap_queue *q = file->private_data; 599 ssize_t len, ret = 0; 600 601 len = iov_length(iv, count); 602 if (len < 0) { 603 ret = -EINVAL; 604 goto out; 605 } 606 607 ret = macvtap_do_read(q, iocb, iv, len, file->f_flags & O_NONBLOCK); 608 ret = min_t(ssize_t, ret, len); /* XXX copied from tun.c. Why? */ 609 out: 610 return ret; 611 } 612 613 /* 614 * provide compatibility with generic tun/tap interface 615 */ 616 static long macvtap_ioctl(struct file *file, unsigned int cmd, 617 unsigned long arg) 618 { 619 struct macvtap_queue *q = file->private_data; 620 struct macvlan_dev *vlan; 621 void __user *argp = (void __user *)arg; 622 struct ifreq __user *ifr = argp; 623 unsigned int __user *up = argp; 624 unsigned int u; 625 int ret; 626 627 switch (cmd) { 628 case TUNSETIFF: 629 /* ignore the name, just look at flags */ 630 if (get_user(u, &ifr->ifr_flags)) 631 return -EFAULT; 632 633 ret = 0; 634 if ((u & ~IFF_VNET_HDR) != (IFF_NO_PI | IFF_TAP)) 635 ret = -EINVAL; 636 else 637 q->flags = u; 638 639 return ret; 640 641 case TUNGETIFF: 642 rcu_read_lock_bh(); 643 vlan = rcu_dereference(q->vlan); 644 if (vlan) 645 dev_hold(vlan->dev); 646 rcu_read_unlock_bh(); 647 648 if (!vlan) 649 return -ENOLINK; 650 651 ret = 0; 652 if (copy_to_user(&ifr->ifr_name, q->vlan->dev->name, IFNAMSIZ) || 653 put_user(q->flags, &ifr->ifr_flags)) 654 ret = -EFAULT; 655 dev_put(vlan->dev); 656 return ret; 657 658 case TUNGETFEATURES: 659 if (put_user(IFF_TAP | IFF_NO_PI | IFF_VNET_HDR, up)) 660 return -EFAULT; 661 return 0; 662 663 case TUNSETSNDBUF: 664 if (get_user(u, up)) 665 return -EFAULT; 666 667 q->sk.sk_sndbuf = u; 668 return 0; 669 670 case TUNSETOFFLOAD: 671 /* let the user check for future flags */ 672 if (arg & ~(TUN_F_CSUM | TUN_F_TSO4 | TUN_F_TSO6 | 673 TUN_F_TSO_ECN | TUN_F_UFO)) 674 return -EINVAL; 675 676 /* TODO: only accept frames with the features that 677 got enabled for forwarded frames */ 678 if (!(q->flags & IFF_VNET_HDR)) 679 return -EINVAL; 680 return 0; 681 682 default: 683 return -EINVAL; 684 } 685 } 686 687 #ifdef CONFIG_COMPAT 688 static long macvtap_compat_ioctl(struct file *file, unsigned int cmd, 689 unsigned long arg) 690 { 691 return macvtap_ioctl(file, cmd, (unsigned long)compat_ptr(arg)); 692 } 693 #endif 694 695 static const struct file_operations macvtap_fops = { 696 .owner = THIS_MODULE, 697 .open = macvtap_open, 698 .release = macvtap_release, 699 .aio_read = macvtap_aio_read, 700 .aio_write = macvtap_aio_write, 701 .poll = macvtap_poll, 702 .llseek = no_llseek, 703 .unlocked_ioctl = macvtap_ioctl, 704 #ifdef CONFIG_COMPAT 705 .compat_ioctl = macvtap_compat_ioctl, 706 #endif 707 }; 708 709 static int macvtap_sendmsg(struct kiocb *iocb, struct socket *sock, 710 struct msghdr *m, size_t total_len) 711 { 712 struct macvtap_queue *q = container_of(sock, struct macvtap_queue, sock); 713 return macvtap_get_user(q, m->msg_iov, total_len, 714 m->msg_flags & MSG_DONTWAIT); 715 } 716 717 static int macvtap_recvmsg(struct kiocb *iocb, struct socket *sock, 718 struct msghdr *m, size_t total_len, 719 int flags) 720 { 721 struct macvtap_queue *q = container_of(sock, struct macvtap_queue, sock); 722 int ret; 723 if (flags & ~(MSG_DONTWAIT|MSG_TRUNC)) 724 return -EINVAL; 725 ret = macvtap_do_read(q, iocb, m->msg_iov, total_len, 726 flags & MSG_DONTWAIT); 727 if (ret > total_len) { 728 m->msg_flags |= MSG_TRUNC; 729 ret = flags & MSG_TRUNC ? ret : total_len; 730 } 731 return ret; 732 } 733 734 /* Ops structure to mimic raw sockets with tun */ 735 static const struct proto_ops macvtap_socket_ops = { 736 .sendmsg = macvtap_sendmsg, 737 .recvmsg = macvtap_recvmsg, 738 }; 739 740 /* Get an underlying socket object from tun file. Returns error unless file is 741 * attached to a device. The returned object works like a packet socket, it 742 * can be used for sock_sendmsg/sock_recvmsg. The caller is responsible for 743 * holding a reference to the file for as long as the socket is in use. */ 744 struct socket *macvtap_get_socket(struct file *file) 745 { 746 struct macvtap_queue *q; 747 if (file->f_op != &macvtap_fops) 748 return ERR_PTR(-EINVAL); 749 q = file->private_data; 750 if (!q) 751 return ERR_PTR(-EBADFD); 752 return &q->sock; 753 } 754 EXPORT_SYMBOL_GPL(macvtap_get_socket); 755 756 static int macvtap_init(void) 757 { 758 int err; 759 760 err = alloc_chrdev_region(&macvtap_major, 0, 761 MACVTAP_NUM_DEVS, "macvtap"); 762 if (err) 763 goto out1; 764 765 cdev_init(&macvtap_cdev, &macvtap_fops); 766 err = cdev_add(&macvtap_cdev, macvtap_major, MACVTAP_NUM_DEVS); 767 if (err) 768 goto out2; 769 770 macvtap_class = class_create(THIS_MODULE, "macvtap"); 771 if (IS_ERR(macvtap_class)) { 772 err = PTR_ERR(macvtap_class); 773 goto out3; 774 } 775 776 err = macvlan_link_register(&macvtap_link_ops); 777 if (err) 778 goto out4; 779 780 return 0; 781 782 out4: 783 class_unregister(macvtap_class); 784 out3: 785 cdev_del(&macvtap_cdev); 786 out2: 787 unregister_chrdev_region(macvtap_major, MACVTAP_NUM_DEVS); 788 out1: 789 return err; 790 } 791 module_init(macvtap_init); 792 793 static void macvtap_exit(void) 794 { 795 rtnl_link_unregister(&macvtap_link_ops); 796 class_unregister(macvtap_class); 797 cdev_del(&macvtap_cdev); 798 unregister_chrdev_region(macvtap_major, MACVTAP_NUM_DEVS); 799 } 800 module_exit(macvtap_exit); 801 802 MODULE_ALIAS_RTNL_LINK("macvtap"); 803 MODULE_AUTHOR("Arnd Bergmann <arnd@arndb.de>"); 804 MODULE_LICENSE("GPL"); 805