tun.c (7a085c3aad94cce7e11031c6800e41668418ae4c) tun.c (f29eb2a96c56ebff6b4d9b530d5ccd61b9f538d7)
1/*
2 * TUN - Universal TUN/TAP device driver.
3 * Copyright (C) 1999-2002 Maxim Krasnyansky <maxk@qualcomm.com>
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.

--- 99 unchanged lines hidden (view full) ---

108} while (0)
109#define DBG1(level, fmt, args...) \
110do { \
111 if (0) \
112 printk(level fmt, ##args); \
113} while (0)
114#endif
115
1/*
2 * TUN - Universal TUN/TAP device driver.
3 * Copyright (C) 1999-2002 Maxim Krasnyansky <maxk@qualcomm.com>
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.

--- 99 unchanged lines hidden (view full) ---

108} while (0)
109#define DBG1(level, fmt, args...) \
110do { \
111 if (0) \
112 printk(level fmt, ##args); \
113} while (0)
114#endif
115
116#define TUN_HEADROOM 256
117#define TUN_RX_PAD (NET_IP_ALIGN + NET_SKB_PAD)
118
119/* TUN device flags */
120
121/* IFF_ATTACH_QUEUE is never stored in device flags,
122 * overload it to mean fasync when stored there.
123 */
124#define TUN_FASYNC IFF_ATTACH_QUEUE

--- 51 unchanged lines hidden (view full) ---

176 /* only used for fasnyc */
177 unsigned int flags;
178 union {
179 u16 queue_index;
180 unsigned int ifindex;
181 };
182 struct napi_struct napi;
183 bool napi_enabled;
116#define TUN_RX_PAD (NET_IP_ALIGN + NET_SKB_PAD)
117
118/* TUN device flags */
119
120/* IFF_ATTACH_QUEUE is never stored in device flags,
121 * overload it to mean fasync when stored there.
122 */
123#define TUN_FASYNC IFF_ATTACH_QUEUE

--- 51 unchanged lines hidden (view full) ---

175 /* only used for fasnyc */
176 unsigned int flags;
177 union {
178 u16 queue_index;
179 unsigned int ifindex;
180 };
181 struct napi_struct napi;
182 bool napi_enabled;
183 bool napi_frags_enabled;
184 struct mutex napi_mutex; /* Protects access to the above napi */
185 struct list_head next;
186 struct tun_struct *detached;
187 struct ptr_ring tx_ring;
188 struct xdp_rxq_info xdp_rxq;
189};
190
191struct tun_flow_entry {

--- 116 unchanged lines hidden (view full) ---

308
309 if (received < budget)
310 napi_complete_done(napi, received);
311
312 return received;
313}
314
315static void tun_napi_init(struct tun_struct *tun, struct tun_file *tfile,
184 struct mutex napi_mutex; /* Protects access to the above napi */
185 struct list_head next;
186 struct tun_struct *detached;
187 struct ptr_ring tx_ring;
188 struct xdp_rxq_info xdp_rxq;
189};
190
191struct tun_flow_entry {

--- 116 unchanged lines hidden (view full) ---

308
309 if (received < budget)
310 napi_complete_done(napi, received);
311
312 return received;
313}
314
315static void tun_napi_init(struct tun_struct *tun, struct tun_file *tfile,
316 bool napi_en)
316 bool napi_en, bool napi_frags)
317{
318 tfile->napi_enabled = napi_en;
317{
318 tfile->napi_enabled = napi_en;
319 tfile->napi_frags_enabled = napi_en && napi_frags;
319 if (napi_en) {
320 netif_napi_add(tun->dev, &tfile->napi, tun_napi_poll,
321 NAPI_POLL_WEIGHT);
322 napi_enable(&tfile->napi);
320 if (napi_en) {
321 netif_napi_add(tun->dev, &tfile->napi, tun_napi_poll,
322 NAPI_POLL_WEIGHT);
323 napi_enable(&tfile->napi);
323 mutex_init(&tfile->napi_mutex);
324 }
325}
326
324 }
325}
326
327static void tun_napi_disable(struct tun_struct *tun, struct tun_file *tfile)
327static void tun_napi_disable(struct tun_file *tfile)
328{
329 if (tfile->napi_enabled)
330 napi_disable(&tfile->napi);
331}
332
328{
329 if (tfile->napi_enabled)
330 napi_disable(&tfile->napi);
331}
332
333static void tun_napi_del(struct tun_struct *tun, struct tun_file *tfile)
333static void tun_napi_del(struct tun_file *tfile)
334{
335 if (tfile->napi_enabled)
336 netif_napi_del(&tfile->napi);
337}
338
334{
335 if (tfile->napi_enabled)
336 netif_napi_del(&tfile->napi);
337}
338
339static bool tun_napi_frags_enabled(const struct tun_struct *tun)
339static bool tun_napi_frags_enabled(const struct tun_file *tfile)
340{
340{
341 return READ_ONCE(tun->flags) & IFF_NAPI_FRAGS;
341 return tfile->napi_frags_enabled;
342}
343
344#ifdef CONFIG_TUN_VNET_CROSS_LE
345static inline bool tun_legacy_is_little_endian(struct tun_struct *tun)
346{
347 return tun->flags & TUN_VNET_BE ? false :
348 virtio_legacy_is_little_endian();
349}

--- 207 unchanged lines hidden (view full) ---

557 * flow_hash table accordingly.
558 */
559static inline void tun_flow_save_rps_rxhash(struct tun_flow_entry *e, u32 hash)
560{
561 if (unlikely(e->rps_rxhash != hash))
562 e->rps_rxhash = hash;
563}
564
342}
343
344#ifdef CONFIG_TUN_VNET_CROSS_LE
345static inline bool tun_legacy_is_little_endian(struct tun_struct *tun)
346{
347 return tun->flags & TUN_VNET_BE ? false :
348 virtio_legacy_is_little_endian();
349}

--- 207 unchanged lines hidden (view full) ---

557 * flow_hash table accordingly.
558 */
559static inline void tun_flow_save_rps_rxhash(struct tun_flow_entry *e, u32 hash)
560{
561 if (unlikely(e->rps_rxhash != hash))
562 e->rps_rxhash = hash;
563}
564
565/* We try to identify a flow through its rxhash first. The reason that
565/* We try to identify a flow through its rxhash. The reason that
566 * we do not check rxq no. is because some cards(e.g 82599), chooses
567 * the rxq based on the txq where the last packet of the flow comes. As
568 * the userspace application move between processors, we may get a
566 * we do not check rxq no. is because some cards(e.g 82599), chooses
567 * the rxq based on the txq where the last packet of the flow comes. As
568 * the userspace application move between processors, we may get a
569 * different rxq no. here. If we could not get rxhash, then we would
570 * hope the rxq no. may help here.
569 * different rxq no. here.
571 */
572static u16 tun_automq_select_queue(struct tun_struct *tun, struct sk_buff *skb)
573{
574 struct tun_flow_entry *e;
575 u32 txq = 0;
576 u32 numqueues = 0;
577
578 numqueues = READ_ONCE(tun->numqueues);
579
580 txq = __skb_get_hash_symmetric(skb);
570 */
571static u16 tun_automq_select_queue(struct tun_struct *tun, struct sk_buff *skb)
572{
573 struct tun_flow_entry *e;
574 u32 txq = 0;
575 u32 numqueues = 0;
576
577 numqueues = READ_ONCE(tun->numqueues);
578
579 txq = __skb_get_hash_symmetric(skb);
581 if (txq) {
582 e = tun_flow_find(&tun->flows[tun_hashfn(txq)], txq);
583 if (e) {
584 tun_flow_save_rps_rxhash(e, txq);
585 txq = e->queue_index;
586 } else
587 /* use multiply and shift instead of expensive divide */
588 txq = ((u64)txq * numqueues) >> 32;
589 } else if (likely(skb_rx_queue_recorded(skb))) {
590 txq = skb_get_rx_queue(skb);
591 while (unlikely(txq >= numqueues))
592 txq -= numqueues;
580 e = tun_flow_find(&tun->flows[tun_hashfn(txq)], txq);
581 if (e) {
582 tun_flow_save_rps_rxhash(e, txq);
583 txq = e->queue_index;
584 } else {
585 /* use multiply and shift instead of expensive divide */
586 txq = ((u64)txq * numqueues) >> 32;
593 }
594
595 return txq;
596}
597
598static u16 tun_ebpf_select_queue(struct tun_struct *tun, struct sk_buff *skb)
599{
600 struct tun_prog *prog;

--- 84 unchanged lines hidden (view full) ---

685static void __tun_detach(struct tun_file *tfile, bool clean)
686{
687 struct tun_file *ntfile;
688 struct tun_struct *tun;
689
690 tun = rtnl_dereference(tfile->tun);
691
692 if (tun && clean) {
587 }
588
589 return txq;
590}
591
592static u16 tun_ebpf_select_queue(struct tun_struct *tun, struct sk_buff *skb)
593{
594 struct tun_prog *prog;

--- 84 unchanged lines hidden (view full) ---

679static void __tun_detach(struct tun_file *tfile, bool clean)
680{
681 struct tun_file *ntfile;
682 struct tun_struct *tun;
683
684 tun = rtnl_dereference(tfile->tun);
685
686 if (tun && clean) {
693 tun_napi_disable(tun, tfile);
694 tun_napi_del(tun, tfile);
687 tun_napi_disable(tfile);
688 tun_napi_del(tfile);
695 }
696
697 if (tun && !tfile->detached) {
698 u16 index = tfile->queue_index;
699 BUG_ON(index >= tun->numqueues);
700
701 rcu_assign_pointer(tun->tfiles[index],
702 tun->tfiles[tun->numqueues - 1]);

--- 50 unchanged lines hidden (view full) ---

753{
754 struct tun_struct *tun = netdev_priv(dev);
755 struct tun_file *tfile, *tmp;
756 int i, n = tun->numqueues;
757
758 for (i = 0; i < n; i++) {
759 tfile = rtnl_dereference(tun->tfiles[i]);
760 BUG_ON(!tfile);
689 }
690
691 if (tun && !tfile->detached) {
692 u16 index = tfile->queue_index;
693 BUG_ON(index >= tun->numqueues);
694
695 rcu_assign_pointer(tun->tfiles[index],
696 tun->tfiles[tun->numqueues - 1]);

--- 50 unchanged lines hidden (view full) ---

747{
748 struct tun_struct *tun = netdev_priv(dev);
749 struct tun_file *tfile, *tmp;
750 int i, n = tun->numqueues;
751
752 for (i = 0; i < n; i++) {
753 tfile = rtnl_dereference(tun->tfiles[i]);
754 BUG_ON(!tfile);
761 tun_napi_disable(tun, tfile);
755 tun_napi_disable(tfile);
762 tfile->socket.sk->sk_shutdown = RCV_SHUTDOWN;
763 tfile->socket.sk->sk_data_ready(tfile->socket.sk);
764 RCU_INIT_POINTER(tfile->tun, NULL);
765 --tun->numqueues;
766 }
767 list_for_each_entry(tfile, &tun->disabled, next) {
768 tfile->socket.sk->sk_shutdown = RCV_SHUTDOWN;
769 tfile->socket.sk->sk_data_ready(tfile->socket.sk);
770 RCU_INIT_POINTER(tfile->tun, NULL);
771 }
772 BUG_ON(tun->numqueues != 0);
773
774 synchronize_net();
775 for (i = 0; i < n; i++) {
776 tfile = rtnl_dereference(tun->tfiles[i]);
756 tfile->socket.sk->sk_shutdown = RCV_SHUTDOWN;
757 tfile->socket.sk->sk_data_ready(tfile->socket.sk);
758 RCU_INIT_POINTER(tfile->tun, NULL);
759 --tun->numqueues;
760 }
761 list_for_each_entry(tfile, &tun->disabled, next) {
762 tfile->socket.sk->sk_shutdown = RCV_SHUTDOWN;
763 tfile->socket.sk->sk_data_ready(tfile->socket.sk);
764 RCU_INIT_POINTER(tfile->tun, NULL);
765 }
766 BUG_ON(tun->numqueues != 0);
767
768 synchronize_net();
769 for (i = 0; i < n; i++) {
770 tfile = rtnl_dereference(tun->tfiles[i]);
777 tun_napi_del(tun, tfile);
771 tun_napi_del(tfile);
778 /* Drop read queue */
779 tun_queue_purge(tfile);
780 xdp_rxq_info_unreg(&tfile->xdp_rxq);
781 sock_put(&tfile->sk);
782 }
783 list_for_each_entry_safe(tfile, tmp, &tun->disabled, next) {
784 tun_enable_queue(tfile);
785 tun_queue_purge(tfile);
786 xdp_rxq_info_unreg(&tfile->xdp_rxq);
787 sock_put(&tfile->sk);
788 }
789 BUG_ON(tun->numdisabled != 0);
790
791 if (tun->flags & IFF_PERSIST)
792 module_put(THIS_MODULE);
793}
794
795static int tun_attach(struct tun_struct *tun, struct file *file,
772 /* Drop read queue */
773 tun_queue_purge(tfile);
774 xdp_rxq_info_unreg(&tfile->xdp_rxq);
775 sock_put(&tfile->sk);
776 }
777 list_for_each_entry_safe(tfile, tmp, &tun->disabled, next) {
778 tun_enable_queue(tfile);
779 tun_queue_purge(tfile);
780 xdp_rxq_info_unreg(&tfile->xdp_rxq);
781 sock_put(&tfile->sk);
782 }
783 BUG_ON(tun->numdisabled != 0);
784
785 if (tun->flags & IFF_PERSIST)
786 module_put(THIS_MODULE);
787}
788
789static int tun_attach(struct tun_struct *tun, struct file *file,
796 bool skip_filter, bool napi)
790 bool skip_filter, bool napi, bool napi_frags)
797{
798 struct tun_file *tfile = file->private_data;
799 struct net_device *dev = tun->dev;
800 int err;
801
802 err = security_tun_dev_attach(tfile->socket.sk, tun->security);
803 if (err < 0)
804 goto out;

--- 56 unchanged lines hidden (view full) ---

861 rcu_assign_pointer(tfile->tun, tun);
862 rcu_assign_pointer(tun->tfiles[tun->numqueues], tfile);
863 tun->numqueues++;
864
865 if (tfile->detached) {
866 tun_enable_queue(tfile);
867 } else {
868 sock_hold(&tfile->sk);
791{
792 struct tun_file *tfile = file->private_data;
793 struct net_device *dev = tun->dev;
794 int err;
795
796 err = security_tun_dev_attach(tfile->socket.sk, tun->security);
797 if (err < 0)
798 goto out;

--- 56 unchanged lines hidden (view full) ---

855 rcu_assign_pointer(tfile->tun, tun);
856 rcu_assign_pointer(tun->tfiles[tun->numqueues], tfile);
857 tun->numqueues++;
858
859 if (tfile->detached) {
860 tun_enable_queue(tfile);
861 } else {
862 sock_hold(&tfile->sk);
869 tun_napi_init(tun, tfile, napi);
863 tun_napi_init(tun, tfile, napi, napi_frags);
870 }
871
864 }
865
866 if (rtnl_dereference(tun->xdp_prog))
867 sock_set_flag(&tfile->sk, SOCK_XDP);
868
872 tun_set_real_num_queues(tun);
873
874 /* device is allowed to go away first, so no need to hold extra
875 * refcnt.
876 */
877
878out:
879 return err;

--- 159 unchanged lines hidden (view full) ---

1039/* Net device start xmit */
1040static void tun_automq_xmit(struct tun_struct *tun, struct sk_buff *skb)
1041{
1042#ifdef CONFIG_RPS
1043 if (tun->numqueues == 1 && static_key_false(&rps_needed)) {
1044 /* Select queue was not called for the skbuff, so we extract the
1045 * RPS hash and save it into the flow_table here.
1046 */
869 tun_set_real_num_queues(tun);
870
871 /* device is allowed to go away first, so no need to hold extra
872 * refcnt.
873 */
874
875out:
876 return err;

--- 159 unchanged lines hidden (view full) ---

1036/* Net device start xmit */
1037static void tun_automq_xmit(struct tun_struct *tun, struct sk_buff *skb)
1038{
1039#ifdef CONFIG_RPS
1040 if (tun->numqueues == 1 && static_key_false(&rps_needed)) {
1041 /* Select queue was not called for the skbuff, so we extract the
1042 * RPS hash and save it into the flow_table here.
1043 */
1044 struct tun_flow_entry *e;
1047 __u32 rxhash;
1048
1049 rxhash = __skb_get_hash_symmetric(skb);
1045 __u32 rxhash;
1046
1047 rxhash = __skb_get_hash_symmetric(skb);
1050 if (rxhash) {
1051 struct tun_flow_entry *e;
1052 e = tun_flow_find(&tun->flows[tun_hashfn(rxhash)],
1053 rxhash);
1054 if (e)
1055 tun_flow_save_rps_rxhash(e, rxhash);
1056 }
1048 e = tun_flow_find(&tun->flows[tun_hashfn(rxhash)], rxhash);
1049 if (e)
1050 tun_flow_save_rps_rxhash(e, rxhash);
1057 }
1058#endif
1059}
1060
1061static unsigned int run_ebpf_filter(struct tun_struct *tun,
1062 struct sk_buff *skb,
1063 int len)
1064{

--- 134 unchanged lines hidden (view full) ---

1199 stats->rx_frame_errors = rx_frame_errors;
1200 stats->tx_dropped = tx_dropped;
1201}
1202
1203static int tun_xdp_set(struct net_device *dev, struct bpf_prog *prog,
1204 struct netlink_ext_ack *extack)
1205{
1206 struct tun_struct *tun = netdev_priv(dev);
1051 }
1052#endif
1053}
1054
1055static unsigned int run_ebpf_filter(struct tun_struct *tun,
1056 struct sk_buff *skb,
1057 int len)
1058{

--- 134 unchanged lines hidden (view full) ---

1193 stats->rx_frame_errors = rx_frame_errors;
1194 stats->tx_dropped = tx_dropped;
1195}
1196
1197static int tun_xdp_set(struct net_device *dev, struct bpf_prog *prog,
1198 struct netlink_ext_ack *extack)
1199{
1200 struct tun_struct *tun = netdev_priv(dev);
1201 struct tun_file *tfile;
1207 struct bpf_prog *old_prog;
1202 struct bpf_prog *old_prog;
1203 int i;
1208
1209 old_prog = rtnl_dereference(tun->xdp_prog);
1210 rcu_assign_pointer(tun->xdp_prog, prog);
1211 if (old_prog)
1212 bpf_prog_put(old_prog);
1213
1204
1205 old_prog = rtnl_dereference(tun->xdp_prog);
1206 rcu_assign_pointer(tun->xdp_prog, prog);
1207 if (old_prog)
1208 bpf_prog_put(old_prog);
1209
1210 for (i = 0; i < tun->numqueues; i++) {
1211 tfile = rtnl_dereference(tun->tfiles[i]);
1212 if (prog)
1213 sock_set_flag(&tfile->sk, SOCK_XDP);
1214 else
1215 sock_reset_flag(&tfile->sk, SOCK_XDP);
1216 }
1217 list_for_each_entry(tfile, &tun->disabled, next) {
1218 if (prog)
1219 sock_set_flag(&tfile->sk, SOCK_XDP);
1220 else
1221 sock_reset_flag(&tfile->sk, SOCK_XDP);
1222 }
1223
1214 return 0;
1215}
1216
1217static u32 tun_xdp_query(struct net_device *dev)
1218{
1219 struct tun_struct *tun = netdev_priv(dev);
1220 const struct bpf_prog *xdp_prog;
1221

--- 347 unchanged lines hidden (view full) ---

1569
1570 if (SKB_DATA_ALIGN(len + TUN_RX_PAD) +
1571 SKB_DATA_ALIGN(sizeof(struct skb_shared_info)) > PAGE_SIZE)
1572 return false;
1573
1574 return true;
1575}
1576
1224 return 0;
1225}
1226
1227static u32 tun_xdp_query(struct net_device *dev)
1228{
1229 struct tun_struct *tun = netdev_priv(dev);
1230 const struct bpf_prog *xdp_prog;
1231

--- 347 unchanged lines hidden (view full) ---

1579
1580 if (SKB_DATA_ALIGN(len + TUN_RX_PAD) +
1581 SKB_DATA_ALIGN(sizeof(struct skb_shared_info)) > PAGE_SIZE)
1582 return false;
1583
1584 return true;
1585}
1586
1587static struct sk_buff *__tun_build_skb(struct page_frag *alloc_frag, char *buf,
1588 int buflen, int len, int pad)
1589{
1590 struct sk_buff *skb = build_skb(buf, buflen);
1591
1592 if (!skb)
1593 return ERR_PTR(-ENOMEM);
1594
1595 skb_reserve(skb, pad);
1596 skb_put(skb, len);
1597
1598 get_page(alloc_frag->page);
1599 alloc_frag->offset += buflen;
1600
1601 return skb;
1602}
1603
1604static int tun_xdp_act(struct tun_struct *tun, struct bpf_prog *xdp_prog,
1605 struct xdp_buff *xdp, u32 act)
1606{
1607 int err;
1608
1609 switch (act) {
1610 case XDP_REDIRECT:
1611 err = xdp_do_redirect(tun->dev, xdp, xdp_prog);
1612 if (err)
1613 return err;
1614 break;
1615 case XDP_TX:
1616 err = tun_xdp_tx(tun->dev, xdp);
1617 if (err < 0)
1618 return err;
1619 break;
1620 case XDP_PASS:
1621 break;
1622 default:
1623 bpf_warn_invalid_xdp_action(act);
1624 /* fall through */
1625 case XDP_ABORTED:
1626 trace_xdp_exception(tun->dev, xdp_prog, act);
1627 /* fall through */
1628 case XDP_DROP:
1629 this_cpu_inc(tun->pcpu_stats->rx_dropped);
1630 break;
1631 }
1632
1633 return act;
1634}
1635
1577static struct sk_buff *tun_build_skb(struct tun_struct *tun,
1578 struct tun_file *tfile,
1579 struct iov_iter *from,
1580 struct virtio_net_hdr *hdr,
1581 int len, int *skb_xdp)
1582{
1583 struct page_frag *alloc_frag = &current->task_frag;
1636static struct sk_buff *tun_build_skb(struct tun_struct *tun,
1637 struct tun_file *tfile,
1638 struct iov_iter *from,
1639 struct virtio_net_hdr *hdr,
1640 int len, int *skb_xdp)
1641{
1642 struct page_frag *alloc_frag = &current->task_frag;
1584 struct sk_buff *skb;
1585 struct bpf_prog *xdp_prog;
1586 int buflen = SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
1643 struct bpf_prog *xdp_prog;
1644 int buflen = SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
1587 unsigned int delta = 0;
1588 char *buf;
1589 size_t copied;
1645 char *buf;
1646 size_t copied;
1590 int err, pad = TUN_RX_PAD;
1647 int pad = TUN_RX_PAD;
1648 int err = 0;
1591
1592 rcu_read_lock();
1593 xdp_prog = rcu_dereference(tun->xdp_prog);
1594 if (xdp_prog)
1649
1650 rcu_read_lock();
1651 xdp_prog = rcu_dereference(tun->xdp_prog);
1652 if (xdp_prog)
1595 pad += TUN_HEADROOM;
1653 pad += XDP_PACKET_HEADROOM;
1596 buflen += SKB_DATA_ALIGN(len + pad);
1597 rcu_read_unlock();
1598
1599 alloc_frag->offset = ALIGN((u64)alloc_frag->offset, SMP_CACHE_BYTES);
1600 if (unlikely(!skb_page_frag_refill(buflen, alloc_frag, GFP_KERNEL)))
1601 return ERR_PTR(-ENOMEM);
1602
1603 buf = (char *)page_address(alloc_frag->page) + alloc_frag->offset;
1604 copied = copy_page_from_iter(alloc_frag->page,
1605 alloc_frag->offset + pad,
1606 len, from);
1607 if (copied != len)
1608 return ERR_PTR(-EFAULT);
1609
1610 /* There's a small window that XDP may be set after the check
1611 * of xdp_prog above, this should be rare and for simplicity
1612 * we do XDP on skb in case the headroom is not enough.
1613 */
1654 buflen += SKB_DATA_ALIGN(len + pad);
1655 rcu_read_unlock();
1656
1657 alloc_frag->offset = ALIGN((u64)alloc_frag->offset, SMP_CACHE_BYTES);
1658 if (unlikely(!skb_page_frag_refill(buflen, alloc_frag, GFP_KERNEL)))
1659 return ERR_PTR(-ENOMEM);
1660
1661 buf = (char *)page_address(alloc_frag->page) + alloc_frag->offset;
1662 copied = copy_page_from_iter(alloc_frag->page,
1663 alloc_frag->offset + pad,
1664 len, from);
1665 if (copied != len)
1666 return ERR_PTR(-EFAULT);
1667
1668 /* There's a small window that XDP may be set after the check
1669 * of xdp_prog above, this should be rare and for simplicity
1670 * we do XDP on skb in case the headroom is not enough.
1671 */
1614 if (hdr->gso_type || !xdp_prog)
1672 if (hdr->gso_type || !xdp_prog) {
1615 *skb_xdp = 1;
1673 *skb_xdp = 1;
1616 else
1617 *skb_xdp = 0;
1674 return __tun_build_skb(alloc_frag, buf, buflen, len, pad);
1675 }
1618
1676
1677 *skb_xdp = 0;
1678
1619 local_bh_disable();
1620 rcu_read_lock();
1621 xdp_prog = rcu_dereference(tun->xdp_prog);
1679 local_bh_disable();
1680 rcu_read_lock();
1681 xdp_prog = rcu_dereference(tun->xdp_prog);
1622 if (xdp_prog && !*skb_xdp) {
1682 if (xdp_prog) {
1623 struct xdp_buff xdp;
1683 struct xdp_buff xdp;
1624 void *orig_data;
1625 u32 act;
1626
1627 xdp.data_hard_start = buf;
1628 xdp.data = buf + pad;
1629 xdp_set_data_meta_invalid(&xdp);
1630 xdp.data_end = xdp.data + len;
1631 xdp.rxq = &tfile->xdp_rxq;
1684 u32 act;
1685
1686 xdp.data_hard_start = buf;
1687 xdp.data = buf + pad;
1688 xdp_set_data_meta_invalid(&xdp);
1689 xdp.data_end = xdp.data + len;
1690 xdp.rxq = &tfile->xdp_rxq;
1632 orig_data = xdp.data;
1633 act = bpf_prog_run_xdp(xdp_prog, &xdp);
1634
1691
1635 switch (act) {
1636 case XDP_REDIRECT:
1692 act = bpf_prog_run_xdp(xdp_prog, &xdp);
1693 if (act == XDP_REDIRECT || act == XDP_TX) {
1637 get_page(alloc_frag->page);
1638 alloc_frag->offset += buflen;
1694 get_page(alloc_frag->page);
1695 alloc_frag->offset += buflen;
1639 err = xdp_do_redirect(tun->dev, &xdp, xdp_prog);
1640 xdp_do_flush_map();
1641 if (err)
1642 goto err_redirect;
1643 rcu_read_unlock();
1644 local_bh_enable();
1645 return NULL;
1646 case XDP_TX:
1647 get_page(alloc_frag->page);
1648 alloc_frag->offset += buflen;
1649 if (tun_xdp_tx(tun->dev, &xdp) < 0)
1650 goto err_redirect;
1651 rcu_read_unlock();
1652 local_bh_enable();
1653 return NULL;
1654 case XDP_PASS:
1655 delta = orig_data - xdp.data;
1656 len = xdp.data_end - xdp.data;
1657 break;
1658 default:
1659 bpf_warn_invalid_xdp_action(act);
1660 /* fall through */
1661 case XDP_ABORTED:
1662 trace_xdp_exception(tun->dev, xdp_prog, act);
1663 /* fall through */
1664 case XDP_DROP:
1665 goto err_xdp;
1666 }
1696 }
1667 }
1697 err = tun_xdp_act(tun, xdp_prog, &xdp, act);
1698 if (err < 0)
1699 goto err_xdp;
1700 if (err == XDP_REDIRECT)
1701 xdp_do_flush_map();
1702 if (err != XDP_PASS)
1703 goto out;
1668
1704
1669 skb = build_skb(buf, buflen);
1670 if (!skb) {
1671 rcu_read_unlock();
1672 local_bh_enable();
1673 return ERR_PTR(-ENOMEM);
1705 pad = xdp.data - xdp.data_hard_start;
1706 len = xdp.data_end - xdp.data;
1674 }
1707 }
1675
1676 skb_reserve(skb, pad - delta);
1677 skb_put(skb, len);
1678 get_page(alloc_frag->page);
1679 alloc_frag->offset += buflen;
1680
1681 rcu_read_unlock();
1682 local_bh_enable();
1683
1708 rcu_read_unlock();
1709 local_bh_enable();
1710
1684 return skb;
1711 return __tun_build_skb(alloc_frag, buf, buflen, len, pad);
1685
1712
1686err_redirect:
1687 put_page(alloc_frag->page);
1688err_xdp:
1713err_xdp:
1714 put_page(alloc_frag->page);
1715out:
1689 rcu_read_unlock();
1690 local_bh_enable();
1716 rcu_read_unlock();
1717 local_bh_enable();
1691 this_cpu_inc(tun->pcpu_stats->rx_dropped);
1692 return NULL;
1693}
1694
1695/* Get packet from user space buffer */
1696static ssize_t tun_get_user(struct tun_struct *tun, struct tun_file *tfile,
1697 void *msg_control, struct iov_iter *from,
1698 int noblock, bool more)
1699{

--- 4 unchanged lines hidden (view full) ---

1704 struct virtio_net_hdr gso = { 0 };
1705 struct tun_pcpu_stats *stats;
1706 int good_linear;
1707 int copylen;
1708 bool zerocopy = false;
1709 int err;
1710 u32 rxhash = 0;
1711 int skb_xdp = 1;
1718 return NULL;
1719}
1720
1721/* Get packet from user space buffer */
1722static ssize_t tun_get_user(struct tun_struct *tun, struct tun_file *tfile,
1723 void *msg_control, struct iov_iter *from,
1724 int noblock, bool more)
1725{

--- 4 unchanged lines hidden (view full) ---

1730 struct virtio_net_hdr gso = { 0 };
1731 struct tun_pcpu_stats *stats;
1732 int good_linear;
1733 int copylen;
1734 bool zerocopy = false;
1735 int err;
1736 u32 rxhash = 0;
1737 int skb_xdp = 1;
1712 bool frags = tun_napi_frags_enabled(tun);
1738 bool frags = tun_napi_frags_enabled(tfile);
1713
1714 if (!(tun->dev->flags & IFF_UP))
1715 return -EIO;
1716
1717 if (!(tun->flags & IFF_NO_PI)) {
1718 if (len < sizeof(pi))
1719 return -EINVAL;
1720 len -= sizeof(pi);

--- 537 unchanged lines hidden (view full) ---

2258}
2259
2260/* Trivial set of netlink ops to allow deleting tun or tap
2261 * device with netlink.
2262 */
2263static int tun_validate(struct nlattr *tb[], struct nlattr *data[],
2264 struct netlink_ext_ack *extack)
2265{
1739
1740 if (!(tun->dev->flags & IFF_UP))
1741 return -EIO;
1742
1743 if (!(tun->flags & IFF_NO_PI)) {
1744 if (len < sizeof(pi))
1745 return -EINVAL;
1746 len -= sizeof(pi);

--- 537 unchanged lines hidden (view full) ---

2284}
2285
2286/* Trivial set of netlink ops to allow deleting tun or tap
2287 * device with netlink.
2288 */
2289static int tun_validate(struct nlattr *tb[], struct nlattr *data[],
2290 struct netlink_ext_ack *extack)
2291{
2292 if (!data)
2293 return 0;
2266 return -EINVAL;
2267}
2268
2269static size_t tun_get_size(const struct net_device *dev)
2270{
2271 BUILD_BUG_ON(sizeof(u32) != sizeof(uid_t));
2272 BUILD_BUG_ON(sizeof(u32) != sizeof(gid_t));
2273

--- 70 unchanged lines hidden (view full) ---

2344 if (wqueue && waitqueue_active(wqueue))
2345 wake_up_interruptible_sync_poll(wqueue, EPOLLOUT |
2346 EPOLLWRNORM | EPOLLWRBAND);
2347
2348 tfile = container_of(sk, struct tun_file, sk);
2349 kill_fasync(&tfile->fasync, SIGIO, POLL_OUT);
2350}
2351
2294 return -EINVAL;
2295}
2296
2297static size_t tun_get_size(const struct net_device *dev)
2298{
2299 BUILD_BUG_ON(sizeof(u32) != sizeof(uid_t));
2300 BUILD_BUG_ON(sizeof(u32) != sizeof(gid_t));
2301

--- 70 unchanged lines hidden (view full) ---

2372 if (wqueue && waitqueue_active(wqueue))
2373 wake_up_interruptible_sync_poll(wqueue, EPOLLOUT |
2374 EPOLLWRNORM | EPOLLWRBAND);
2375
2376 tfile = container_of(sk, struct tun_file, sk);
2377 kill_fasync(&tfile->fasync, SIGIO, POLL_OUT);
2378}
2379
2380static int tun_xdp_one(struct tun_struct *tun,
2381 struct tun_file *tfile,
2382 struct xdp_buff *xdp, int *flush)
2383{
2384 struct tun_xdp_hdr *hdr = xdp->data_hard_start;
2385 struct virtio_net_hdr *gso = &hdr->gso;
2386 struct tun_pcpu_stats *stats;
2387 struct bpf_prog *xdp_prog;
2388 struct sk_buff *skb = NULL;
2389 u32 rxhash = 0, act;
2390 int buflen = hdr->buflen;
2391 int err = 0;
2392 bool skb_xdp = false;
2393
2394 xdp_prog = rcu_dereference(tun->xdp_prog);
2395 if (xdp_prog) {
2396 if (gso->gso_type) {
2397 skb_xdp = true;
2398 goto build;
2399 }
2400 xdp_set_data_meta_invalid(xdp);
2401 xdp->rxq = &tfile->xdp_rxq;
2402
2403 act = bpf_prog_run_xdp(xdp_prog, xdp);
2404 err = tun_xdp_act(tun, xdp_prog, xdp, act);
2405 if (err < 0) {
2406 put_page(virt_to_head_page(xdp->data));
2407 return err;
2408 }
2409
2410 switch (err) {
2411 case XDP_REDIRECT:
2412 *flush = true;
2413 /* fall through */
2414 case XDP_TX:
2415 return 0;
2416 case XDP_PASS:
2417 break;
2418 default:
2419 put_page(virt_to_head_page(xdp->data));
2420 return 0;
2421 }
2422 }
2423
2424build:
2425 skb = build_skb(xdp->data_hard_start, buflen);
2426 if (!skb) {
2427 err = -ENOMEM;
2428 goto out;
2429 }
2430
2431 skb_reserve(skb, xdp->data - xdp->data_hard_start);
2432 skb_put(skb, xdp->data_end - xdp->data);
2433
2434 if (virtio_net_hdr_to_skb(skb, gso, tun_is_little_endian(tun))) {
2435 this_cpu_inc(tun->pcpu_stats->rx_frame_errors);
2436 kfree_skb(skb);
2437 err = -EINVAL;
2438 goto out;
2439 }
2440
2441 skb->protocol = eth_type_trans(skb, tun->dev);
2442 skb_reset_network_header(skb);
2443 skb_probe_transport_header(skb, 0);
2444
2445 if (skb_xdp) {
2446 err = do_xdp_generic(xdp_prog, skb);
2447 if (err != XDP_PASS)
2448 goto out;
2449 }
2450
2451 if (!rcu_dereference(tun->steering_prog) && tun->numqueues > 1 &&
2452 !tfile->detached)
2453 rxhash = __skb_get_hash_symmetric(skb);
2454
2455 netif_receive_skb(skb);
2456
2457 stats = get_cpu_ptr(tun->pcpu_stats);
2458 u64_stats_update_begin(&stats->syncp);
2459 stats->rx_packets++;
2460 stats->rx_bytes += skb->len;
2461 u64_stats_update_end(&stats->syncp);
2462 put_cpu_ptr(stats);
2463
2464 if (rxhash)
2465 tun_flow_update(tun, rxhash, tfile);
2466
2467out:
2468 return err;
2469}
2470
2352static int tun_sendmsg(struct socket *sock, struct msghdr *m, size_t total_len)
2353{
2471static int tun_sendmsg(struct socket *sock, struct msghdr *m, size_t total_len)
2472{
2354 int ret;
2473 int ret, i;
2355 struct tun_file *tfile = container_of(sock, struct tun_file, socket);
2356 struct tun_struct *tun = tun_get(tfile);
2474 struct tun_file *tfile = container_of(sock, struct tun_file, socket);
2475 struct tun_struct *tun = tun_get(tfile);
2476 struct tun_msg_ctl *ctl = m->msg_control;
2477 struct xdp_buff *xdp;
2357
2358 if (!tun)
2359 return -EBADFD;
2360
2478
2479 if (!tun)
2480 return -EBADFD;
2481
2361 ret = tun_get_user(tun, tfile, m->msg_control, &m->msg_iter,
2482 if (ctl && (ctl->type == TUN_MSG_PTR)) {
2483 int n = ctl->num;
2484 int flush = 0;
2485
2486 local_bh_disable();
2487 rcu_read_lock();
2488
2489 for (i = 0; i < n; i++) {
2490 xdp = &((struct xdp_buff *)ctl->ptr)[i];
2491 tun_xdp_one(tun, tfile, xdp, &flush);
2492 }
2493
2494 if (flush)
2495 xdp_do_flush_map();
2496
2497 rcu_read_unlock();
2498 local_bh_enable();
2499
2500 ret = total_len;
2501 goto out;
2502 }
2503
2504 ret = tun_get_user(tun, tfile, ctl ? ctl->ptr : NULL, &m->msg_iter,
2362 m->msg_flags & MSG_DONTWAIT,
2363 m->msg_flags & MSG_MORE);
2505 m->msg_flags & MSG_DONTWAIT,
2506 m->msg_flags & MSG_MORE);
2507out:
2364 tun_put(tun);
2365 return ret;
2366}
2367
2368static int tun_recvmsg(struct socket *sock, struct msghdr *m, size_t total_len,
2369 int flags)
2370{
2371 struct tun_file *tfile = container_of(sock, struct tun_file, socket);

--- 157 unchanged lines hidden (view full) ---

2529
2530 if (tun_not_capable(tun))
2531 return -EPERM;
2532 err = security_tun_dev_open(tun->security);
2533 if (err < 0)
2534 return err;
2535
2536 err = tun_attach(tun, file, ifr->ifr_flags & IFF_NOFILTER,
2508 tun_put(tun);
2509 return ret;
2510}
2511
2512static int tun_recvmsg(struct socket *sock, struct msghdr *m, size_t total_len,
2513 int flags)
2514{
2515 struct tun_file *tfile = container_of(sock, struct tun_file, socket);

--- 157 unchanged lines hidden (view full) ---

2673
2674 if (tun_not_capable(tun))
2675 return -EPERM;
2676 err = security_tun_dev_open(tun->security);
2677 if (err < 0)
2678 return err;
2679
2680 err = tun_attach(tun, file, ifr->ifr_flags & IFF_NOFILTER,
2537 ifr->ifr_flags & IFF_NAPI);
2681 ifr->ifr_flags & IFF_NAPI,
2682 ifr->ifr_flags & IFF_NAPI_FRAGS);
2538 if (err < 0)
2539 return err;
2540
2541 if (tun->flags & IFF_MULTI_QUEUE &&
2542 (tun->numqueues + tun->numdisabled > 1)) {
2543 /* One or more queue has already been attached, no need
2544 * to initialize the device again.
2545 */

--- 81 unchanged lines hidden (view full) ---

2627 dev->vlan_features = dev->features &
2628 ~(NETIF_F_HW_VLAN_CTAG_TX |
2629 NETIF_F_HW_VLAN_STAG_TX);
2630
2631 tun->flags = (tun->flags & ~TUN_FEATURES) |
2632 (ifr->ifr_flags & TUN_FEATURES);
2633
2634 INIT_LIST_HEAD(&tun->disabled);
2683 if (err < 0)
2684 return err;
2685
2686 if (tun->flags & IFF_MULTI_QUEUE &&
2687 (tun->numqueues + tun->numdisabled > 1)) {
2688 /* One or more queue has already been attached, no need
2689 * to initialize the device again.
2690 */

--- 81 unchanged lines hidden (view full) ---

2772 dev->vlan_features = dev->features &
2773 ~(NETIF_F_HW_VLAN_CTAG_TX |
2774 NETIF_F_HW_VLAN_STAG_TX);
2775
2776 tun->flags = (tun->flags & ~TUN_FEATURES) |
2777 (ifr->ifr_flags & TUN_FEATURES);
2778
2779 INIT_LIST_HEAD(&tun->disabled);
2635 err = tun_attach(tun, file, false, ifr->ifr_flags & IFF_NAPI);
2780 err = tun_attach(tun, file, false, ifr->ifr_flags & IFF_NAPI,
2781 ifr->ifr_flags & IFF_NAPI_FRAGS);
2636 if (err < 0)
2637 goto err_free_flow;
2638
2639 err = register_netdevice(tun->dev);
2640 if (err < 0)
2641 goto err_detach;
2642 }
2643

--- 132 unchanged lines hidden (view full) ---

2776 tun = tfile->detached;
2777 if (!tun) {
2778 ret = -EINVAL;
2779 goto unlock;
2780 }
2781 ret = security_tun_dev_attach_queue(tun->security);
2782 if (ret < 0)
2783 goto unlock;
2782 if (err < 0)
2783 goto err_free_flow;
2784
2785 err = register_netdevice(tun->dev);
2786 if (err < 0)
2787 goto err_detach;
2788 }
2789

--- 132 unchanged lines hidden (view full) ---

2922 tun = tfile->detached;
2923 if (!tun) {
2924 ret = -EINVAL;
2925 goto unlock;
2926 }
2927 ret = security_tun_dev_attach_queue(tun->security);
2928 if (ret < 0)
2929 goto unlock;
2784 ret = tun_attach(tun, file, false, tun->flags & IFF_NAPI);
2930 ret = tun_attach(tun, file, false, tun->flags & IFF_NAPI,
2931 tun->flags & IFF_NAPI_FRAGS);
2785 } else if (ifr->ifr_flags & IFF_DETACH_QUEUE) {
2786 tun = rtnl_dereference(tfile->tun);
2787 if (!tun || !(tun->flags & IFF_MULTI_QUEUE) || tfile->detached)
2788 ret = -EINVAL;
2789 else
2790 __tun_detach(tfile, false);
2791 } else
2792 ret = -EINVAL;

--- 401 unchanged lines hidden (view full) ---

3194 &tun_proto, 0);
3195 if (!tfile)
3196 return -ENOMEM;
3197 if (ptr_ring_init(&tfile->tx_ring, 0, GFP_KERNEL)) {
3198 sk_free(&tfile->sk);
3199 return -ENOMEM;
3200 }
3201
2932 } else if (ifr->ifr_flags & IFF_DETACH_QUEUE) {
2933 tun = rtnl_dereference(tfile->tun);
2934 if (!tun || !(tun->flags & IFF_MULTI_QUEUE) || tfile->detached)
2935 ret = -EINVAL;
2936 else
2937 __tun_detach(tfile, false);
2938 } else
2939 ret = -EINVAL;

--- 401 unchanged lines hidden (view full) ---

3341 &tun_proto, 0);
3342 if (!tfile)
3343 return -ENOMEM;
3344 if (ptr_ring_init(&tfile->tx_ring, 0, GFP_KERNEL)) {
3345 sk_free(&tfile->sk);
3346 return -ENOMEM;
3347 }
3348
3349 mutex_init(&tfile->napi_mutex);
3202 RCU_INIT_POINTER(tfile->tun, NULL);
3203 tfile->flags = 0;
3204 tfile->ifindex = 0;
3205
3206 init_waitqueue_head(&tfile->wq.wait);
3207 RCU_INIT_POINTER(tfile->socket.wq, &tfile->wq);
3208
3209 tfile->socket.file = file;

--- 302 unchanged lines hidden ---
3350 RCU_INIT_POINTER(tfile->tun, NULL);
3351 tfile->flags = 0;
3352 tfile->ifindex = 0;
3353
3354 init_waitqueue_head(&tfile->wq.wait);
3355 RCU_INIT_POINTER(tfile->socket.wq, &tfile->wq);
3356
3357 tfile->socket.file = file;

--- 302 unchanged lines hidden ---