tun.c (7a085c3aad94cce7e11031c6800e41668418ae4c) | tun.c (f29eb2a96c56ebff6b4d9b530d5ccd61b9f538d7) |
---|---|
1/* 2 * TUN - Universal TUN/TAP device driver. 3 * Copyright (C) 1999-2002 Maxim Krasnyansky <maxk@qualcomm.com> 4 * 5 * This program is free software; you can redistribute it and/or modify 6 * it under the terms of the GNU General Public License as published by 7 * the Free Software Foundation; either version 2 of the License, or 8 * (at your option) any later version. --- 99 unchanged lines hidden (view full) --- 108} while (0) 109#define DBG1(level, fmt, args...) \ 110do { \ 111 if (0) \ 112 printk(level fmt, ##args); \ 113} while (0) 114#endif 115 | 1/* 2 * TUN - Universal TUN/TAP device driver. 3 * Copyright (C) 1999-2002 Maxim Krasnyansky <maxk@qualcomm.com> 4 * 5 * This program is free software; you can redistribute it and/or modify 6 * it under the terms of the GNU General Public License as published by 7 * the Free Software Foundation; either version 2 of the License, or 8 * (at your option) any later version. --- 99 unchanged lines hidden (view full) --- 108} while (0) 109#define DBG1(level, fmt, args...) \ 110do { \ 111 if (0) \ 112 printk(level fmt, ##args); \ 113} while (0) 114#endif 115 |
116#define TUN_HEADROOM 256 | |
117#define TUN_RX_PAD (NET_IP_ALIGN + NET_SKB_PAD) 118 119/* TUN device flags */ 120 121/* IFF_ATTACH_QUEUE is never stored in device flags, 122 * overload it to mean fasync when stored there. 123 */ 124#define TUN_FASYNC IFF_ATTACH_QUEUE --- 51 unchanged lines hidden (view full) --- 176 /* only used for fasnyc */ 177 unsigned int flags; 178 union { 179 u16 queue_index; 180 unsigned int ifindex; 181 }; 182 struct napi_struct napi; 183 bool napi_enabled; | 116#define TUN_RX_PAD (NET_IP_ALIGN + NET_SKB_PAD) 117 118/* TUN device flags */ 119 120/* IFF_ATTACH_QUEUE is never stored in device flags, 121 * overload it to mean fasync when stored there. 122 */ 123#define TUN_FASYNC IFF_ATTACH_QUEUE --- 51 unchanged lines hidden (view full) --- 175 /* only used for fasnyc */ 176 unsigned int flags; 177 union { 178 u16 queue_index; 179 unsigned int ifindex; 180 }; 181 struct napi_struct napi; 182 bool napi_enabled; |
183 bool napi_frags_enabled; |
|
184 struct mutex napi_mutex; /* Protects access to the above napi */ 185 struct list_head next; 186 struct tun_struct *detached; 187 struct ptr_ring tx_ring; 188 struct xdp_rxq_info xdp_rxq; 189}; 190 191struct tun_flow_entry { --- 116 unchanged lines hidden (view full) --- 308 309 if (received < budget) 310 napi_complete_done(napi, received); 311 312 return received; 313} 314 315static void tun_napi_init(struct tun_struct *tun, struct tun_file *tfile, | 184 struct mutex napi_mutex; /* Protects access to the above napi */ 185 struct list_head next; 186 struct tun_struct *detached; 187 struct ptr_ring tx_ring; 188 struct xdp_rxq_info xdp_rxq; 189}; 190 191struct tun_flow_entry { --- 116 unchanged lines hidden (view full) --- 308 309 if (received < budget) 310 napi_complete_done(napi, received); 311 312 return received; 313} 314 315static void tun_napi_init(struct tun_struct *tun, struct tun_file *tfile, |
316 bool napi_en) | 316 bool napi_en, bool napi_frags) |
317{ 318 tfile->napi_enabled = napi_en; | 317{ 318 tfile->napi_enabled = napi_en; |
319 tfile->napi_frags_enabled = napi_en && napi_frags; |
|
319 if (napi_en) { 320 netif_napi_add(tun->dev, &tfile->napi, tun_napi_poll, 321 NAPI_POLL_WEIGHT); 322 napi_enable(&tfile->napi); | 320 if (napi_en) { 321 netif_napi_add(tun->dev, &tfile->napi, tun_napi_poll, 322 NAPI_POLL_WEIGHT); 323 napi_enable(&tfile->napi); |
323 mutex_init(&tfile->napi_mutex); | |
324 } 325} 326 | 324 } 325} 326 |
327static void tun_napi_disable(struct tun_struct *tun, struct tun_file *tfile) | 327static void tun_napi_disable(struct tun_file *tfile) |
328{ 329 if (tfile->napi_enabled) 330 napi_disable(&tfile->napi); 331} 332 | 328{ 329 if (tfile->napi_enabled) 330 napi_disable(&tfile->napi); 331} 332 |
333static void tun_napi_del(struct tun_struct *tun, struct tun_file *tfile) | 333static void tun_napi_del(struct tun_file *tfile) |
334{ 335 if (tfile->napi_enabled) 336 netif_napi_del(&tfile->napi); 337} 338 | 334{ 335 if (tfile->napi_enabled) 336 netif_napi_del(&tfile->napi); 337} 338 |
339static bool tun_napi_frags_enabled(const struct tun_struct *tun) | 339static bool tun_napi_frags_enabled(const struct tun_file *tfile) |
340{ | 340{ |
341 return READ_ONCE(tun->flags) & IFF_NAPI_FRAGS; | 341 return tfile->napi_frags_enabled; |
342} 343 344#ifdef CONFIG_TUN_VNET_CROSS_LE 345static inline bool tun_legacy_is_little_endian(struct tun_struct *tun) 346{ 347 return tun->flags & TUN_VNET_BE ? false : 348 virtio_legacy_is_little_endian(); 349} --- 207 unchanged lines hidden (view full) --- 557 * flow_hash table accordingly. 558 */ 559static inline void tun_flow_save_rps_rxhash(struct tun_flow_entry *e, u32 hash) 560{ 561 if (unlikely(e->rps_rxhash != hash)) 562 e->rps_rxhash = hash; 563} 564 | 342} 343 344#ifdef CONFIG_TUN_VNET_CROSS_LE 345static inline bool tun_legacy_is_little_endian(struct tun_struct *tun) 346{ 347 return tun->flags & TUN_VNET_BE ? false : 348 virtio_legacy_is_little_endian(); 349} --- 207 unchanged lines hidden (view full) --- 557 * flow_hash table accordingly. 558 */ 559static inline void tun_flow_save_rps_rxhash(struct tun_flow_entry *e, u32 hash) 560{ 561 if (unlikely(e->rps_rxhash != hash)) 562 e->rps_rxhash = hash; 563} 564 |
565/* We try to identify a flow through its rxhash first. The reason that | 565/* We try to identify a flow through its rxhash. The reason that |
566 * we do not check rxq no. is because some cards(e.g 82599), chooses 567 * the rxq based on the txq where the last packet of the flow comes. As 568 * the userspace application move between processors, we may get a | 566 * we do not check rxq no. is because some cards(e.g 82599), chooses 567 * the rxq based on the txq where the last packet of the flow comes. As 568 * the userspace application move between processors, we may get a |
569 * different rxq no. here. If we could not get rxhash, then we would 570 * hope the rxq no. may help here. | 569 * different rxq no. here. |
571 */ 572static u16 tun_automq_select_queue(struct tun_struct *tun, struct sk_buff *skb) 573{ 574 struct tun_flow_entry *e; 575 u32 txq = 0; 576 u32 numqueues = 0; 577 578 numqueues = READ_ONCE(tun->numqueues); 579 580 txq = __skb_get_hash_symmetric(skb); | 570 */ 571static u16 tun_automq_select_queue(struct tun_struct *tun, struct sk_buff *skb) 572{ 573 struct tun_flow_entry *e; 574 u32 txq = 0; 575 u32 numqueues = 0; 576 577 numqueues = READ_ONCE(tun->numqueues); 578 579 txq = __skb_get_hash_symmetric(skb); |
581 if (txq) { 582 e = tun_flow_find(&tun->flows[tun_hashfn(txq)], txq); 583 if (e) { 584 tun_flow_save_rps_rxhash(e, txq); 585 txq = e->queue_index; 586 } else 587 /* use multiply and shift instead of expensive divide */ 588 txq = ((u64)txq * numqueues) >> 32; 589 } else if (likely(skb_rx_queue_recorded(skb))) { 590 txq = skb_get_rx_queue(skb); 591 while (unlikely(txq >= numqueues)) 592 txq -= numqueues; | 580 e = tun_flow_find(&tun->flows[tun_hashfn(txq)], txq); 581 if (e) { 582 tun_flow_save_rps_rxhash(e, txq); 583 txq = e->queue_index; 584 } else { 585 /* use multiply and shift instead of expensive divide */ 586 txq = ((u64)txq * numqueues) >> 32; |
593 } 594 595 return txq; 596} 597 598static u16 tun_ebpf_select_queue(struct tun_struct *tun, struct sk_buff *skb) 599{ 600 struct tun_prog *prog; --- 84 unchanged lines hidden (view full) --- 685static void __tun_detach(struct tun_file *tfile, bool clean) 686{ 687 struct tun_file *ntfile; 688 struct tun_struct *tun; 689 690 tun = rtnl_dereference(tfile->tun); 691 692 if (tun && clean) { | 587 } 588 589 return txq; 590} 591 592static u16 tun_ebpf_select_queue(struct tun_struct *tun, struct sk_buff *skb) 593{ 594 struct tun_prog *prog; --- 84 unchanged lines hidden (view full) --- 679static void __tun_detach(struct tun_file *tfile, bool clean) 680{ 681 struct tun_file *ntfile; 682 struct tun_struct *tun; 683 684 tun = rtnl_dereference(tfile->tun); 685 686 if (tun && clean) { |
693 tun_napi_disable(tun, tfile); 694 tun_napi_del(tun, tfile); | 687 tun_napi_disable(tfile); 688 tun_napi_del(tfile); |
695 } 696 697 if (tun && !tfile->detached) { 698 u16 index = tfile->queue_index; 699 BUG_ON(index >= tun->numqueues); 700 701 rcu_assign_pointer(tun->tfiles[index], 702 tun->tfiles[tun->numqueues - 1]); --- 50 unchanged lines hidden (view full) --- 753{ 754 struct tun_struct *tun = netdev_priv(dev); 755 struct tun_file *tfile, *tmp; 756 int i, n = tun->numqueues; 757 758 for (i = 0; i < n; i++) { 759 tfile = rtnl_dereference(tun->tfiles[i]); 760 BUG_ON(!tfile); | 689 } 690 691 if (tun && !tfile->detached) { 692 u16 index = tfile->queue_index; 693 BUG_ON(index >= tun->numqueues); 694 695 rcu_assign_pointer(tun->tfiles[index], 696 tun->tfiles[tun->numqueues - 1]); --- 50 unchanged lines hidden (view full) --- 747{ 748 struct tun_struct *tun = netdev_priv(dev); 749 struct tun_file *tfile, *tmp; 750 int i, n = tun->numqueues; 751 752 for (i = 0; i < n; i++) { 753 tfile = rtnl_dereference(tun->tfiles[i]); 754 BUG_ON(!tfile); |
761 tun_napi_disable(tun, tfile); | 755 tun_napi_disable(tfile); |
762 tfile->socket.sk->sk_shutdown = RCV_SHUTDOWN; 763 tfile->socket.sk->sk_data_ready(tfile->socket.sk); 764 RCU_INIT_POINTER(tfile->tun, NULL); 765 --tun->numqueues; 766 } 767 list_for_each_entry(tfile, &tun->disabled, next) { 768 tfile->socket.sk->sk_shutdown = RCV_SHUTDOWN; 769 tfile->socket.sk->sk_data_ready(tfile->socket.sk); 770 RCU_INIT_POINTER(tfile->tun, NULL); 771 } 772 BUG_ON(tun->numqueues != 0); 773 774 synchronize_net(); 775 for (i = 0; i < n; i++) { 776 tfile = rtnl_dereference(tun->tfiles[i]); | 756 tfile->socket.sk->sk_shutdown = RCV_SHUTDOWN; 757 tfile->socket.sk->sk_data_ready(tfile->socket.sk); 758 RCU_INIT_POINTER(tfile->tun, NULL); 759 --tun->numqueues; 760 } 761 list_for_each_entry(tfile, &tun->disabled, next) { 762 tfile->socket.sk->sk_shutdown = RCV_SHUTDOWN; 763 tfile->socket.sk->sk_data_ready(tfile->socket.sk); 764 RCU_INIT_POINTER(tfile->tun, NULL); 765 } 766 BUG_ON(tun->numqueues != 0); 767 768 synchronize_net(); 769 for (i = 0; i < n; i++) { 770 tfile = rtnl_dereference(tun->tfiles[i]); |
777 tun_napi_del(tun, tfile); | 771 tun_napi_del(tfile); |
778 /* Drop read queue */ 779 tun_queue_purge(tfile); 780 xdp_rxq_info_unreg(&tfile->xdp_rxq); 781 sock_put(&tfile->sk); 782 } 783 list_for_each_entry_safe(tfile, tmp, &tun->disabled, next) { 784 tun_enable_queue(tfile); 785 tun_queue_purge(tfile); 786 xdp_rxq_info_unreg(&tfile->xdp_rxq); 787 sock_put(&tfile->sk); 788 } 789 BUG_ON(tun->numdisabled != 0); 790 791 if (tun->flags & IFF_PERSIST) 792 module_put(THIS_MODULE); 793} 794 795static int tun_attach(struct tun_struct *tun, struct file *file, | 772 /* Drop read queue */ 773 tun_queue_purge(tfile); 774 xdp_rxq_info_unreg(&tfile->xdp_rxq); 775 sock_put(&tfile->sk); 776 } 777 list_for_each_entry_safe(tfile, tmp, &tun->disabled, next) { 778 tun_enable_queue(tfile); 779 tun_queue_purge(tfile); 780 xdp_rxq_info_unreg(&tfile->xdp_rxq); 781 sock_put(&tfile->sk); 782 } 783 BUG_ON(tun->numdisabled != 0); 784 785 if (tun->flags & IFF_PERSIST) 786 module_put(THIS_MODULE); 787} 788 789static int tun_attach(struct tun_struct *tun, struct file *file, |
796 bool skip_filter, bool napi) | 790 bool skip_filter, bool napi, bool napi_frags) |
797{ 798 struct tun_file *tfile = file->private_data; 799 struct net_device *dev = tun->dev; 800 int err; 801 802 err = security_tun_dev_attach(tfile->socket.sk, tun->security); 803 if (err < 0) 804 goto out; --- 56 unchanged lines hidden (view full) --- 861 rcu_assign_pointer(tfile->tun, tun); 862 rcu_assign_pointer(tun->tfiles[tun->numqueues], tfile); 863 tun->numqueues++; 864 865 if (tfile->detached) { 866 tun_enable_queue(tfile); 867 } else { 868 sock_hold(&tfile->sk); | 791{ 792 struct tun_file *tfile = file->private_data; 793 struct net_device *dev = tun->dev; 794 int err; 795 796 err = security_tun_dev_attach(tfile->socket.sk, tun->security); 797 if (err < 0) 798 goto out; --- 56 unchanged lines hidden (view full) --- 855 rcu_assign_pointer(tfile->tun, tun); 856 rcu_assign_pointer(tun->tfiles[tun->numqueues], tfile); 857 tun->numqueues++; 858 859 if (tfile->detached) { 860 tun_enable_queue(tfile); 861 } else { 862 sock_hold(&tfile->sk); |
869 tun_napi_init(tun, tfile, napi); | 863 tun_napi_init(tun, tfile, napi, napi_frags); |
870 } 871 | 864 } 865 |
866 if (rtnl_dereference(tun->xdp_prog)) 867 sock_set_flag(&tfile->sk, SOCK_XDP); 868 |
|
872 tun_set_real_num_queues(tun); 873 874 /* device is allowed to go away first, so no need to hold extra 875 * refcnt. 876 */ 877 878out: 879 return err; --- 159 unchanged lines hidden (view full) --- 1039/* Net device start xmit */ 1040static void tun_automq_xmit(struct tun_struct *tun, struct sk_buff *skb) 1041{ 1042#ifdef CONFIG_RPS 1043 if (tun->numqueues == 1 && static_key_false(&rps_needed)) { 1044 /* Select queue was not called for the skbuff, so we extract the 1045 * RPS hash and save it into the flow_table here. 1046 */ | 869 tun_set_real_num_queues(tun); 870 871 /* device is allowed to go away first, so no need to hold extra 872 * refcnt. 873 */ 874 875out: 876 return err; --- 159 unchanged lines hidden (view full) --- 1036/* Net device start xmit */ 1037static void tun_automq_xmit(struct tun_struct *tun, struct sk_buff *skb) 1038{ 1039#ifdef CONFIG_RPS 1040 if (tun->numqueues == 1 && static_key_false(&rps_needed)) { 1041 /* Select queue was not called for the skbuff, so we extract the 1042 * RPS hash and save it into the flow_table here. 1043 */ |
1044 struct tun_flow_entry *e; |
|
1047 __u32 rxhash; 1048 1049 rxhash = __skb_get_hash_symmetric(skb); | 1045 __u32 rxhash; 1046 1047 rxhash = __skb_get_hash_symmetric(skb); |
1050 if (rxhash) { 1051 struct tun_flow_entry *e; 1052 e = tun_flow_find(&tun->flows[tun_hashfn(rxhash)], 1053 rxhash); 1054 if (e) 1055 tun_flow_save_rps_rxhash(e, rxhash); 1056 } | 1048 e = tun_flow_find(&tun->flows[tun_hashfn(rxhash)], rxhash); 1049 if (e) 1050 tun_flow_save_rps_rxhash(e, rxhash); |
1057 } 1058#endif 1059} 1060 1061static unsigned int run_ebpf_filter(struct tun_struct *tun, 1062 struct sk_buff *skb, 1063 int len) 1064{ --- 134 unchanged lines hidden (view full) --- 1199 stats->rx_frame_errors = rx_frame_errors; 1200 stats->tx_dropped = tx_dropped; 1201} 1202 1203static int tun_xdp_set(struct net_device *dev, struct bpf_prog *prog, 1204 struct netlink_ext_ack *extack) 1205{ 1206 struct tun_struct *tun = netdev_priv(dev); | 1051 } 1052#endif 1053} 1054 1055static unsigned int run_ebpf_filter(struct tun_struct *tun, 1056 struct sk_buff *skb, 1057 int len) 1058{ --- 134 unchanged lines hidden (view full) --- 1193 stats->rx_frame_errors = rx_frame_errors; 1194 stats->tx_dropped = tx_dropped; 1195} 1196 1197static int tun_xdp_set(struct net_device *dev, struct bpf_prog *prog, 1198 struct netlink_ext_ack *extack) 1199{ 1200 struct tun_struct *tun = netdev_priv(dev); |
1201 struct tun_file *tfile; |
|
1207 struct bpf_prog *old_prog; | 1202 struct bpf_prog *old_prog; |
1203 int i; |
|
1208 1209 old_prog = rtnl_dereference(tun->xdp_prog); 1210 rcu_assign_pointer(tun->xdp_prog, prog); 1211 if (old_prog) 1212 bpf_prog_put(old_prog); 1213 | 1204 1205 old_prog = rtnl_dereference(tun->xdp_prog); 1206 rcu_assign_pointer(tun->xdp_prog, prog); 1207 if (old_prog) 1208 bpf_prog_put(old_prog); 1209 |
1210 for (i = 0; i < tun->numqueues; i++) { 1211 tfile = rtnl_dereference(tun->tfiles[i]); 1212 if (prog) 1213 sock_set_flag(&tfile->sk, SOCK_XDP); 1214 else 1215 sock_reset_flag(&tfile->sk, SOCK_XDP); 1216 } 1217 list_for_each_entry(tfile, &tun->disabled, next) { 1218 if (prog) 1219 sock_set_flag(&tfile->sk, SOCK_XDP); 1220 else 1221 sock_reset_flag(&tfile->sk, SOCK_XDP); 1222 } 1223 |
|
1214 return 0; 1215} 1216 1217static u32 tun_xdp_query(struct net_device *dev) 1218{ 1219 struct tun_struct *tun = netdev_priv(dev); 1220 const struct bpf_prog *xdp_prog; 1221 --- 347 unchanged lines hidden (view full) --- 1569 1570 if (SKB_DATA_ALIGN(len + TUN_RX_PAD) + 1571 SKB_DATA_ALIGN(sizeof(struct skb_shared_info)) > PAGE_SIZE) 1572 return false; 1573 1574 return true; 1575} 1576 | 1224 return 0; 1225} 1226 1227static u32 tun_xdp_query(struct net_device *dev) 1228{ 1229 struct tun_struct *tun = netdev_priv(dev); 1230 const struct bpf_prog *xdp_prog; 1231 --- 347 unchanged lines hidden (view full) --- 1579 1580 if (SKB_DATA_ALIGN(len + TUN_RX_PAD) + 1581 SKB_DATA_ALIGN(sizeof(struct skb_shared_info)) > PAGE_SIZE) 1582 return false; 1583 1584 return true; 1585} 1586 |
1587static struct sk_buff *__tun_build_skb(struct page_frag *alloc_frag, char *buf, 1588 int buflen, int len, int pad) 1589{ 1590 struct sk_buff *skb = build_skb(buf, buflen); 1591 1592 if (!skb) 1593 return ERR_PTR(-ENOMEM); 1594 1595 skb_reserve(skb, pad); 1596 skb_put(skb, len); 1597 1598 get_page(alloc_frag->page); 1599 alloc_frag->offset += buflen; 1600 1601 return skb; 1602} 1603 1604static int tun_xdp_act(struct tun_struct *tun, struct bpf_prog *xdp_prog, 1605 struct xdp_buff *xdp, u32 act) 1606{ 1607 int err; 1608 1609 switch (act) { 1610 case XDP_REDIRECT: 1611 err = xdp_do_redirect(tun->dev, xdp, xdp_prog); 1612 if (err) 1613 return err; 1614 break; 1615 case XDP_TX: 1616 err = tun_xdp_tx(tun->dev, xdp); 1617 if (err < 0) 1618 return err; 1619 break; 1620 case XDP_PASS: 1621 break; 1622 default: 1623 bpf_warn_invalid_xdp_action(act); 1624 /* fall through */ 1625 case XDP_ABORTED: 1626 trace_xdp_exception(tun->dev, xdp_prog, act); 1627 /* fall through */ 1628 case XDP_DROP: 1629 this_cpu_inc(tun->pcpu_stats->rx_dropped); 1630 break; 1631 } 1632 1633 return act; 1634} 1635 |
|
1577static struct sk_buff *tun_build_skb(struct tun_struct *tun, 1578 struct tun_file *tfile, 1579 struct iov_iter *from, 1580 struct virtio_net_hdr *hdr, 1581 int len, int *skb_xdp) 1582{ 1583 struct page_frag *alloc_frag = ¤t->task_frag; | 1636static struct sk_buff *tun_build_skb(struct tun_struct *tun, 1637 struct tun_file *tfile, 1638 struct iov_iter *from, 1639 struct virtio_net_hdr *hdr, 1640 int len, int *skb_xdp) 1641{ 1642 struct page_frag *alloc_frag = ¤t->task_frag; |
1584 struct sk_buff *skb; | |
1585 struct bpf_prog *xdp_prog; 1586 int buflen = SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); | 1643 struct bpf_prog *xdp_prog; 1644 int buflen = SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); |
1587 unsigned int delta = 0; | |
1588 char *buf; 1589 size_t copied; | 1645 char *buf; 1646 size_t copied; |
1590 int err, pad = TUN_RX_PAD; | 1647 int pad = TUN_RX_PAD; 1648 int err = 0; |
1591 1592 rcu_read_lock(); 1593 xdp_prog = rcu_dereference(tun->xdp_prog); 1594 if (xdp_prog) | 1649 1650 rcu_read_lock(); 1651 xdp_prog = rcu_dereference(tun->xdp_prog); 1652 if (xdp_prog) |
1595 pad += TUN_HEADROOM; | 1653 pad += XDP_PACKET_HEADROOM; |
1596 buflen += SKB_DATA_ALIGN(len + pad); 1597 rcu_read_unlock(); 1598 1599 alloc_frag->offset = ALIGN((u64)alloc_frag->offset, SMP_CACHE_BYTES); 1600 if (unlikely(!skb_page_frag_refill(buflen, alloc_frag, GFP_KERNEL))) 1601 return ERR_PTR(-ENOMEM); 1602 1603 buf = (char *)page_address(alloc_frag->page) + alloc_frag->offset; 1604 copied = copy_page_from_iter(alloc_frag->page, 1605 alloc_frag->offset + pad, 1606 len, from); 1607 if (copied != len) 1608 return ERR_PTR(-EFAULT); 1609 1610 /* There's a small window that XDP may be set after the check 1611 * of xdp_prog above, this should be rare and for simplicity 1612 * we do XDP on skb in case the headroom is not enough. 1613 */ | 1654 buflen += SKB_DATA_ALIGN(len + pad); 1655 rcu_read_unlock(); 1656 1657 alloc_frag->offset = ALIGN((u64)alloc_frag->offset, SMP_CACHE_BYTES); 1658 if (unlikely(!skb_page_frag_refill(buflen, alloc_frag, GFP_KERNEL))) 1659 return ERR_PTR(-ENOMEM); 1660 1661 buf = (char *)page_address(alloc_frag->page) + alloc_frag->offset; 1662 copied = copy_page_from_iter(alloc_frag->page, 1663 alloc_frag->offset + pad, 1664 len, from); 1665 if (copied != len) 1666 return ERR_PTR(-EFAULT); 1667 1668 /* There's a small window that XDP may be set after the check 1669 * of xdp_prog above, this should be rare and for simplicity 1670 * we do XDP on skb in case the headroom is not enough. 1671 */ |
1614 if (hdr->gso_type || !xdp_prog) | 1672 if (hdr->gso_type || !xdp_prog) { |
1615 *skb_xdp = 1; | 1673 *skb_xdp = 1; |
1616 else 1617 *skb_xdp = 0; | 1674 return __tun_build_skb(alloc_frag, buf, buflen, len, pad); 1675 } |
1618 | 1676 |
1677 *skb_xdp = 0; 1678 |
|
1619 local_bh_disable(); 1620 rcu_read_lock(); 1621 xdp_prog = rcu_dereference(tun->xdp_prog); | 1679 local_bh_disable(); 1680 rcu_read_lock(); 1681 xdp_prog = rcu_dereference(tun->xdp_prog); |
1622 if (xdp_prog && !*skb_xdp) { | 1682 if (xdp_prog) { |
1623 struct xdp_buff xdp; | 1683 struct xdp_buff xdp; |
1624 void *orig_data; | |
1625 u32 act; 1626 1627 xdp.data_hard_start = buf; 1628 xdp.data = buf + pad; 1629 xdp_set_data_meta_invalid(&xdp); 1630 xdp.data_end = xdp.data + len; 1631 xdp.rxq = &tfile->xdp_rxq; | 1684 u32 act; 1685 1686 xdp.data_hard_start = buf; 1687 xdp.data = buf + pad; 1688 xdp_set_data_meta_invalid(&xdp); 1689 xdp.data_end = xdp.data + len; 1690 xdp.rxq = &tfile->xdp_rxq; |
1632 orig_data = xdp.data; 1633 act = bpf_prog_run_xdp(xdp_prog, &xdp); | |
1634 | 1691 |
1635 switch (act) { 1636 case XDP_REDIRECT: | 1692 act = bpf_prog_run_xdp(xdp_prog, &xdp); 1693 if (act == XDP_REDIRECT || act == XDP_TX) { |
1637 get_page(alloc_frag->page); 1638 alloc_frag->offset += buflen; | 1694 get_page(alloc_frag->page); 1695 alloc_frag->offset += buflen; |
1639 err = xdp_do_redirect(tun->dev, &xdp, xdp_prog); 1640 xdp_do_flush_map(); 1641 if (err) 1642 goto err_redirect; 1643 rcu_read_unlock(); 1644 local_bh_enable(); 1645 return NULL; 1646 case XDP_TX: 1647 get_page(alloc_frag->page); 1648 alloc_frag->offset += buflen; 1649 if (tun_xdp_tx(tun->dev, &xdp) < 0) 1650 goto err_redirect; 1651 rcu_read_unlock(); 1652 local_bh_enable(); 1653 return NULL; 1654 case XDP_PASS: 1655 delta = orig_data - xdp.data; 1656 len = xdp.data_end - xdp.data; 1657 break; 1658 default: 1659 bpf_warn_invalid_xdp_action(act); 1660 /* fall through */ 1661 case XDP_ABORTED: 1662 trace_xdp_exception(tun->dev, xdp_prog, act); 1663 /* fall through */ 1664 case XDP_DROP: 1665 goto err_xdp; | |
1666 } | 1696 } |
1667 } | 1697 err = tun_xdp_act(tun, xdp_prog, &xdp, act); 1698 if (err < 0) 1699 goto err_xdp; 1700 if (err == XDP_REDIRECT) 1701 xdp_do_flush_map(); 1702 if (err != XDP_PASS) 1703 goto out; |
1668 | 1704 |
1669 skb = build_skb(buf, buflen); 1670 if (!skb) { 1671 rcu_read_unlock(); 1672 local_bh_enable(); 1673 return ERR_PTR(-ENOMEM); | 1705 pad = xdp.data - xdp.data_hard_start; 1706 len = xdp.data_end - xdp.data; |
1674 } | 1707 } |
1675 1676 skb_reserve(skb, pad - delta); 1677 skb_put(skb, len); 1678 get_page(alloc_frag->page); 1679 alloc_frag->offset += buflen; 1680 | |
1681 rcu_read_unlock(); 1682 local_bh_enable(); 1683 | 1708 rcu_read_unlock(); 1709 local_bh_enable(); 1710 |
1684 return skb; | 1711 return __tun_build_skb(alloc_frag, buf, buflen, len, pad); |
1685 | 1712 |
1686err_redirect: 1687 put_page(alloc_frag->page); | |
1688err_xdp: | 1713err_xdp: |
1714 put_page(alloc_frag->page); 1715out: |
|
1689 rcu_read_unlock(); 1690 local_bh_enable(); | 1716 rcu_read_unlock(); 1717 local_bh_enable(); |
1691 this_cpu_inc(tun->pcpu_stats->rx_dropped); | |
1692 return NULL; 1693} 1694 1695/* Get packet from user space buffer */ 1696static ssize_t tun_get_user(struct tun_struct *tun, struct tun_file *tfile, 1697 void *msg_control, struct iov_iter *from, 1698 int noblock, bool more) 1699{ --- 4 unchanged lines hidden (view full) --- 1704 struct virtio_net_hdr gso = { 0 }; 1705 struct tun_pcpu_stats *stats; 1706 int good_linear; 1707 int copylen; 1708 bool zerocopy = false; 1709 int err; 1710 u32 rxhash = 0; 1711 int skb_xdp = 1; | 1718 return NULL; 1719} 1720 1721/* Get packet from user space buffer */ 1722static ssize_t tun_get_user(struct tun_struct *tun, struct tun_file *tfile, 1723 void *msg_control, struct iov_iter *from, 1724 int noblock, bool more) 1725{ --- 4 unchanged lines hidden (view full) --- 1730 struct virtio_net_hdr gso = { 0 }; 1731 struct tun_pcpu_stats *stats; 1732 int good_linear; 1733 int copylen; 1734 bool zerocopy = false; 1735 int err; 1736 u32 rxhash = 0; 1737 int skb_xdp = 1; |
1712 bool frags = tun_napi_frags_enabled(tun); | 1738 bool frags = tun_napi_frags_enabled(tfile); |
1713 1714 if (!(tun->dev->flags & IFF_UP)) 1715 return -EIO; 1716 1717 if (!(tun->flags & IFF_NO_PI)) { 1718 if (len < sizeof(pi)) 1719 return -EINVAL; 1720 len -= sizeof(pi); --- 537 unchanged lines hidden (view full) --- 2258} 2259 2260/* Trivial set of netlink ops to allow deleting tun or tap 2261 * device with netlink. 2262 */ 2263static int tun_validate(struct nlattr *tb[], struct nlattr *data[], 2264 struct netlink_ext_ack *extack) 2265{ | 1739 1740 if (!(tun->dev->flags & IFF_UP)) 1741 return -EIO; 1742 1743 if (!(tun->flags & IFF_NO_PI)) { 1744 if (len < sizeof(pi)) 1745 return -EINVAL; 1746 len -= sizeof(pi); --- 537 unchanged lines hidden (view full) --- 2284} 2285 2286/* Trivial set of netlink ops to allow deleting tun or tap 2287 * device with netlink. 2288 */ 2289static int tun_validate(struct nlattr *tb[], struct nlattr *data[], 2290 struct netlink_ext_ack *extack) 2291{ |
2292 if (!data) 2293 return 0; |
|
2266 return -EINVAL; 2267} 2268 2269static size_t tun_get_size(const struct net_device *dev) 2270{ 2271 BUILD_BUG_ON(sizeof(u32) != sizeof(uid_t)); 2272 BUILD_BUG_ON(sizeof(u32) != sizeof(gid_t)); 2273 --- 70 unchanged lines hidden (view full) --- 2344 if (wqueue && waitqueue_active(wqueue)) 2345 wake_up_interruptible_sync_poll(wqueue, EPOLLOUT | 2346 EPOLLWRNORM | EPOLLWRBAND); 2347 2348 tfile = container_of(sk, struct tun_file, sk); 2349 kill_fasync(&tfile->fasync, SIGIO, POLL_OUT); 2350} 2351 | 2294 return -EINVAL; 2295} 2296 2297static size_t tun_get_size(const struct net_device *dev) 2298{ 2299 BUILD_BUG_ON(sizeof(u32) != sizeof(uid_t)); 2300 BUILD_BUG_ON(sizeof(u32) != sizeof(gid_t)); 2301 --- 70 unchanged lines hidden (view full) --- 2372 if (wqueue && waitqueue_active(wqueue)) 2373 wake_up_interruptible_sync_poll(wqueue, EPOLLOUT | 2374 EPOLLWRNORM | EPOLLWRBAND); 2375 2376 tfile = container_of(sk, struct tun_file, sk); 2377 kill_fasync(&tfile->fasync, SIGIO, POLL_OUT); 2378} 2379 |
2380static int tun_xdp_one(struct tun_struct *tun, 2381 struct tun_file *tfile, 2382 struct xdp_buff *xdp, int *flush) 2383{ 2384 struct tun_xdp_hdr *hdr = xdp->data_hard_start; 2385 struct virtio_net_hdr *gso = &hdr->gso; 2386 struct tun_pcpu_stats *stats; 2387 struct bpf_prog *xdp_prog; 2388 struct sk_buff *skb = NULL; 2389 u32 rxhash = 0, act; 2390 int buflen = hdr->buflen; 2391 int err = 0; 2392 bool skb_xdp = false; 2393 2394 xdp_prog = rcu_dereference(tun->xdp_prog); 2395 if (xdp_prog) { 2396 if (gso->gso_type) { 2397 skb_xdp = true; 2398 goto build; 2399 } 2400 xdp_set_data_meta_invalid(xdp); 2401 xdp->rxq = &tfile->xdp_rxq; 2402 2403 act = bpf_prog_run_xdp(xdp_prog, xdp); 2404 err = tun_xdp_act(tun, xdp_prog, xdp, act); 2405 if (err < 0) { 2406 put_page(virt_to_head_page(xdp->data)); 2407 return err; 2408 } 2409 2410 switch (err) { 2411 case XDP_REDIRECT: 2412 *flush = true; 2413 /* fall through */ 2414 case XDP_TX: 2415 return 0; 2416 case XDP_PASS: 2417 break; 2418 default: 2419 put_page(virt_to_head_page(xdp->data)); 2420 return 0; 2421 } 2422 } 2423 2424build: 2425 skb = build_skb(xdp->data_hard_start, buflen); 2426 if (!skb) { 2427 err = -ENOMEM; 2428 goto out; 2429 } 2430 2431 skb_reserve(skb, xdp->data - xdp->data_hard_start); 2432 skb_put(skb, xdp->data_end - xdp->data); 2433 2434 if (virtio_net_hdr_to_skb(skb, gso, tun_is_little_endian(tun))) { 2435 this_cpu_inc(tun->pcpu_stats->rx_frame_errors); 2436 kfree_skb(skb); 2437 err = -EINVAL; 2438 goto out; 2439 } 2440 2441 skb->protocol = eth_type_trans(skb, tun->dev); 2442 skb_reset_network_header(skb); 2443 skb_probe_transport_header(skb, 0); 2444 2445 if (skb_xdp) { 2446 err = do_xdp_generic(xdp_prog, skb); 2447 if (err != XDP_PASS) 2448 goto out; 2449 } 2450 2451 if (!rcu_dereference(tun->steering_prog) && tun->numqueues > 1 && 2452 !tfile->detached) 2453 rxhash = __skb_get_hash_symmetric(skb); 2454 2455 netif_receive_skb(skb); 2456 2457 stats = get_cpu_ptr(tun->pcpu_stats); 2458 u64_stats_update_begin(&stats->syncp); 2459 stats->rx_packets++; 2460 stats->rx_bytes += skb->len; 2461 u64_stats_update_end(&stats->syncp); 2462 put_cpu_ptr(stats); 2463 2464 if (rxhash) 2465 tun_flow_update(tun, rxhash, tfile); 2466 2467out: 2468 return err; 2469} 2470 |
|
2352static int tun_sendmsg(struct socket *sock, struct msghdr *m, size_t total_len) 2353{ | 2471static int tun_sendmsg(struct socket *sock, struct msghdr *m, size_t total_len) 2472{ |
2354 int ret; | 2473 int ret, i; |
2355 struct tun_file *tfile = container_of(sock, struct tun_file, socket); 2356 struct tun_struct *tun = tun_get(tfile); | 2474 struct tun_file *tfile = container_of(sock, struct tun_file, socket); 2475 struct tun_struct *tun = tun_get(tfile); |
2476 struct tun_msg_ctl *ctl = m->msg_control; 2477 struct xdp_buff *xdp; |
|
2357 2358 if (!tun) 2359 return -EBADFD; 2360 | 2478 2479 if (!tun) 2480 return -EBADFD; 2481 |
2361 ret = tun_get_user(tun, tfile, m->msg_control, &m->msg_iter, | 2482 if (ctl && (ctl->type == TUN_MSG_PTR)) { 2483 int n = ctl->num; 2484 int flush = 0; 2485 2486 local_bh_disable(); 2487 rcu_read_lock(); 2488 2489 for (i = 0; i < n; i++) { 2490 xdp = &((struct xdp_buff *)ctl->ptr)[i]; 2491 tun_xdp_one(tun, tfile, xdp, &flush); 2492 } 2493 2494 if (flush) 2495 xdp_do_flush_map(); 2496 2497 rcu_read_unlock(); 2498 local_bh_enable(); 2499 2500 ret = total_len; 2501 goto out; 2502 } 2503 2504 ret = tun_get_user(tun, tfile, ctl ? ctl->ptr : NULL, &m->msg_iter, |
2362 m->msg_flags & MSG_DONTWAIT, 2363 m->msg_flags & MSG_MORE); | 2505 m->msg_flags & MSG_DONTWAIT, 2506 m->msg_flags & MSG_MORE); |
2507out: |
|
2364 tun_put(tun); 2365 return ret; 2366} 2367 2368static int tun_recvmsg(struct socket *sock, struct msghdr *m, size_t total_len, 2369 int flags) 2370{ 2371 struct tun_file *tfile = container_of(sock, struct tun_file, socket); --- 157 unchanged lines hidden (view full) --- 2529 2530 if (tun_not_capable(tun)) 2531 return -EPERM; 2532 err = security_tun_dev_open(tun->security); 2533 if (err < 0) 2534 return err; 2535 2536 err = tun_attach(tun, file, ifr->ifr_flags & IFF_NOFILTER, | 2508 tun_put(tun); 2509 return ret; 2510} 2511 2512static int tun_recvmsg(struct socket *sock, struct msghdr *m, size_t total_len, 2513 int flags) 2514{ 2515 struct tun_file *tfile = container_of(sock, struct tun_file, socket); --- 157 unchanged lines hidden (view full) --- 2673 2674 if (tun_not_capable(tun)) 2675 return -EPERM; 2676 err = security_tun_dev_open(tun->security); 2677 if (err < 0) 2678 return err; 2679 2680 err = tun_attach(tun, file, ifr->ifr_flags & IFF_NOFILTER, |
2537 ifr->ifr_flags & IFF_NAPI); | 2681 ifr->ifr_flags & IFF_NAPI, 2682 ifr->ifr_flags & IFF_NAPI_FRAGS); |
2538 if (err < 0) 2539 return err; 2540 2541 if (tun->flags & IFF_MULTI_QUEUE && 2542 (tun->numqueues + tun->numdisabled > 1)) { 2543 /* One or more queue has already been attached, no need 2544 * to initialize the device again. 2545 */ --- 81 unchanged lines hidden (view full) --- 2627 dev->vlan_features = dev->features & 2628 ~(NETIF_F_HW_VLAN_CTAG_TX | 2629 NETIF_F_HW_VLAN_STAG_TX); 2630 2631 tun->flags = (tun->flags & ~TUN_FEATURES) | 2632 (ifr->ifr_flags & TUN_FEATURES); 2633 2634 INIT_LIST_HEAD(&tun->disabled); | 2683 if (err < 0) 2684 return err; 2685 2686 if (tun->flags & IFF_MULTI_QUEUE && 2687 (tun->numqueues + tun->numdisabled > 1)) { 2688 /* One or more queue has already been attached, no need 2689 * to initialize the device again. 2690 */ --- 81 unchanged lines hidden (view full) --- 2772 dev->vlan_features = dev->features & 2773 ~(NETIF_F_HW_VLAN_CTAG_TX | 2774 NETIF_F_HW_VLAN_STAG_TX); 2775 2776 tun->flags = (tun->flags & ~TUN_FEATURES) | 2777 (ifr->ifr_flags & TUN_FEATURES); 2778 2779 INIT_LIST_HEAD(&tun->disabled); |
2635 err = tun_attach(tun, file, false, ifr->ifr_flags & IFF_NAPI); | 2780 err = tun_attach(tun, file, false, ifr->ifr_flags & IFF_NAPI, 2781 ifr->ifr_flags & IFF_NAPI_FRAGS); |
2636 if (err < 0) 2637 goto err_free_flow; 2638 2639 err = register_netdevice(tun->dev); 2640 if (err < 0) 2641 goto err_detach; 2642 } 2643 --- 132 unchanged lines hidden (view full) --- 2776 tun = tfile->detached; 2777 if (!tun) { 2778 ret = -EINVAL; 2779 goto unlock; 2780 } 2781 ret = security_tun_dev_attach_queue(tun->security); 2782 if (ret < 0) 2783 goto unlock; | 2782 if (err < 0) 2783 goto err_free_flow; 2784 2785 err = register_netdevice(tun->dev); 2786 if (err < 0) 2787 goto err_detach; 2788 } 2789 --- 132 unchanged lines hidden (view full) --- 2922 tun = tfile->detached; 2923 if (!tun) { 2924 ret = -EINVAL; 2925 goto unlock; 2926 } 2927 ret = security_tun_dev_attach_queue(tun->security); 2928 if (ret < 0) 2929 goto unlock; |
2784 ret = tun_attach(tun, file, false, tun->flags & IFF_NAPI); | 2930 ret = tun_attach(tun, file, false, tun->flags & IFF_NAPI, 2931 tun->flags & IFF_NAPI_FRAGS); |
2785 } else if (ifr->ifr_flags & IFF_DETACH_QUEUE) { 2786 tun = rtnl_dereference(tfile->tun); 2787 if (!tun || !(tun->flags & IFF_MULTI_QUEUE) || tfile->detached) 2788 ret = -EINVAL; 2789 else 2790 __tun_detach(tfile, false); 2791 } else 2792 ret = -EINVAL; --- 401 unchanged lines hidden (view full) --- 3194 &tun_proto, 0); 3195 if (!tfile) 3196 return -ENOMEM; 3197 if (ptr_ring_init(&tfile->tx_ring, 0, GFP_KERNEL)) { 3198 sk_free(&tfile->sk); 3199 return -ENOMEM; 3200 } 3201 | 2932 } else if (ifr->ifr_flags & IFF_DETACH_QUEUE) { 2933 tun = rtnl_dereference(tfile->tun); 2934 if (!tun || !(tun->flags & IFF_MULTI_QUEUE) || tfile->detached) 2935 ret = -EINVAL; 2936 else 2937 __tun_detach(tfile, false); 2938 } else 2939 ret = -EINVAL; --- 401 unchanged lines hidden (view full) --- 3341 &tun_proto, 0); 3342 if (!tfile) 3343 return -ENOMEM; 3344 if (ptr_ring_init(&tfile->tx_ring, 0, GFP_KERNEL)) { 3345 sk_free(&tfile->sk); 3346 return -ENOMEM; 3347 } 3348 |
3349 mutex_init(&tfile->napi_mutex); |
|
3202 RCU_INIT_POINTER(tfile->tun, NULL); 3203 tfile->flags = 0; 3204 tfile->ifindex = 0; 3205 3206 init_waitqueue_head(&tfile->wq.wait); 3207 RCU_INIT_POINTER(tfile->socket.wq, &tfile->wq); 3208 3209 tfile->socket.file = file; --- 302 unchanged lines hidden --- | 3350 RCU_INIT_POINTER(tfile->tun, NULL); 3351 tfile->flags = 0; 3352 tfile->ifindex = 0; 3353 3354 init_waitqueue_head(&tfile->wq.wait); 3355 RCU_INIT_POINTER(tfile->socket.wq, &tfile->wq); 3356 3357 tfile->socket.file = file; --- 302 unchanged lines hidden --- |