109c434b8SThomas Gleixner // SPDX-License-Identifier: GPL-2.0-only 2e314dbdcSPavel Emelyanov /* 3e314dbdcSPavel Emelyanov * drivers/net/veth.c 4e314dbdcSPavel Emelyanov * 5e314dbdcSPavel Emelyanov * Copyright (C) 2007 OpenVZ http://openvz.org, SWsoft Inc 6e314dbdcSPavel Emelyanov * 7e314dbdcSPavel Emelyanov * Author: Pavel Emelianov <xemul@openvz.org> 8e314dbdcSPavel Emelyanov * Ethtool interface from: Eric W. Biederman <ebiederm@xmission.com> 9e314dbdcSPavel Emelyanov * 10e314dbdcSPavel Emelyanov */ 11e314dbdcSPavel Emelyanov 12e314dbdcSPavel Emelyanov #include <linux/netdevice.h> 135a0e3ad6STejun Heo #include <linux/slab.h> 14e314dbdcSPavel Emelyanov #include <linux/ethtool.h> 15e314dbdcSPavel Emelyanov #include <linux/etherdevice.h> 16cf05c700SEric Dumazet #include <linux/u64_stats_sync.h> 17e314dbdcSPavel Emelyanov 18f7b12606SJiri Pirko #include <net/rtnetlink.h> 19e314dbdcSPavel Emelyanov #include <net/dst.h> 20e314dbdcSPavel Emelyanov #include <net/xfrm.h> 21af87a3aaSToshiaki Makita #include <net/xdp.h> 22ecef969eSStephen Hemminger #include <linux/veth.h> 239d9779e7SPaul Gortmaker #include <linux/module.h> 24948d4f21SToshiaki Makita #include <linux/bpf.h> 25948d4f21SToshiaki Makita #include <linux/filter.h> 26948d4f21SToshiaki Makita #include <linux/ptr_ring.h> 27948d4f21SToshiaki Makita #include <linux/bpf_trace.h> 28aa4e689eSMichael Walle #include <linux/net_tstamp.h> 29e314dbdcSPavel Emelyanov 30e314dbdcSPavel Emelyanov #define DRV_NAME "veth" 31e314dbdcSPavel Emelyanov #define DRV_VERSION "1.0" 32e314dbdcSPavel Emelyanov 339fc8d518SToshiaki Makita #define VETH_XDP_FLAG BIT(0) 34948d4f21SToshiaki Makita #define VETH_RING_SIZE 256 35948d4f21SToshiaki Makita #define VETH_XDP_HEADROOM (XDP_PACKET_HEADROOM + NET_IP_ALIGN) 36948d4f21SToshiaki Makita 379cda7807SToshiaki Makita #define VETH_XDP_TX_BULK_SIZE 16 3865e6dcf7SLorenzo Bianconi #define VETH_XDP_BATCH 16 399cda7807SToshiaki Makita 4065780c56SLorenzo Bianconi struct veth_stats { 411c5b82e5SLorenzo Bianconi u64 rx_drops; 421c5b82e5SLorenzo Bianconi /* xdp */ 434195e54aSToshiaki Makita u64 xdp_packets; 444195e54aSToshiaki Makita u64 xdp_bytes; 451c5b82e5SLorenzo Bianconi u64 xdp_redirect; 464195e54aSToshiaki Makita u64 xdp_drops; 471c5b82e5SLorenzo Bianconi u64 xdp_tx; 489152cff0SLorenzo Bianconi u64 xdp_tx_err; 495fe6e567SLorenzo Bianconi u64 peer_tq_xdp_xmit; 505fe6e567SLorenzo Bianconi u64 peer_tq_xdp_xmit_err; 5165780c56SLorenzo Bianconi }; 5265780c56SLorenzo Bianconi 5365780c56SLorenzo Bianconi struct veth_rq_stats { 5465780c56SLorenzo Bianconi struct veth_stats vs; 554195e54aSToshiaki Makita struct u64_stats_sync syncp; 564195e54aSToshiaki Makita }; 574195e54aSToshiaki Makita 58638264dcSToshiaki Makita struct veth_rq { 59948d4f21SToshiaki Makita struct napi_struct xdp_napi; 60d3256efdSPaolo Abeni struct napi_struct __rcu *napi; /* points to xdp_napi when the latter is initialized */ 61948d4f21SToshiaki Makita struct net_device *dev; 62948d4f21SToshiaki Makita struct bpf_prog __rcu *xdp_prog; 63d1396004SToshiaki Makita struct xdp_mem_info xdp_mem; 644195e54aSToshiaki Makita struct veth_rq_stats stats; 65948d4f21SToshiaki Makita bool rx_notify_masked; 66948d4f21SToshiaki Makita struct ptr_ring xdp_ring; 67948d4f21SToshiaki Makita struct xdp_rxq_info xdp_rxq; 68e314dbdcSPavel Emelyanov }; 69e314dbdcSPavel Emelyanov 70638264dcSToshiaki Makita struct veth_priv { 71638264dcSToshiaki Makita struct net_device __rcu *peer; 72638264dcSToshiaki Makita atomic64_t dropped; 73638264dcSToshiaki Makita struct bpf_prog *_xdp_prog; 74638264dcSToshiaki Makita struct veth_rq *rq; 75638264dcSToshiaki Makita unsigned int requested_headroom; 76638264dcSToshiaki Makita }; 77638264dcSToshiaki Makita 789cda7807SToshiaki Makita struct veth_xdp_tx_bq { 799cda7807SToshiaki Makita struct xdp_frame *q[VETH_XDP_TX_BULK_SIZE]; 809cda7807SToshiaki Makita unsigned int count; 819cda7807SToshiaki Makita }; 829cda7807SToshiaki Makita 83e314dbdcSPavel Emelyanov /* 84e314dbdcSPavel Emelyanov * ethtool interface 85e314dbdcSPavel Emelyanov */ 86e314dbdcSPavel Emelyanov 87d397b968SToshiaki Makita struct veth_q_stat_desc { 88d397b968SToshiaki Makita char desc[ETH_GSTRING_LEN]; 89d397b968SToshiaki Makita size_t offset; 90d397b968SToshiaki Makita }; 91d397b968SToshiaki Makita 9265780c56SLorenzo Bianconi #define VETH_RQ_STAT(m) offsetof(struct veth_stats, m) 93d397b968SToshiaki Makita 94d397b968SToshiaki Makita static const struct veth_q_stat_desc veth_rq_stats_desc[] = { 95d397b968SToshiaki Makita { "xdp_packets", VETH_RQ_STAT(xdp_packets) }, 96d397b968SToshiaki Makita { "xdp_bytes", VETH_RQ_STAT(xdp_bytes) }, 975fe6e567SLorenzo Bianconi { "drops", VETH_RQ_STAT(rx_drops) }, 985fe6e567SLorenzo Bianconi { "xdp_redirect", VETH_RQ_STAT(xdp_redirect) }, 995fe6e567SLorenzo Bianconi { "xdp_drops", VETH_RQ_STAT(xdp_drops) }, 1005fe6e567SLorenzo Bianconi { "xdp_tx", VETH_RQ_STAT(xdp_tx) }, 1015fe6e567SLorenzo Bianconi { "xdp_tx_errors", VETH_RQ_STAT(xdp_tx_err) }, 102d397b968SToshiaki Makita }; 103d397b968SToshiaki Makita 104d397b968SToshiaki Makita #define VETH_RQ_STATS_LEN ARRAY_SIZE(veth_rq_stats_desc) 105d397b968SToshiaki Makita 1065fe6e567SLorenzo Bianconi static const struct veth_q_stat_desc veth_tq_stats_desc[] = { 1075fe6e567SLorenzo Bianconi { "xdp_xmit", VETH_RQ_STAT(peer_tq_xdp_xmit) }, 1085fe6e567SLorenzo Bianconi { "xdp_xmit_errors", VETH_RQ_STAT(peer_tq_xdp_xmit_err) }, 1095fe6e567SLorenzo Bianconi }; 1105fe6e567SLorenzo Bianconi 1115fe6e567SLorenzo Bianconi #define VETH_TQ_STATS_LEN ARRAY_SIZE(veth_tq_stats_desc) 1125fe6e567SLorenzo Bianconi 113e314dbdcSPavel Emelyanov static struct { 114e314dbdcSPavel Emelyanov const char string[ETH_GSTRING_LEN]; 115e314dbdcSPavel Emelyanov } ethtool_stats_keys[] = { 116e314dbdcSPavel Emelyanov { "peer_ifindex" }, 117e314dbdcSPavel Emelyanov }; 118e314dbdcSPavel Emelyanov 119fefb695aSStanislav Fomichev struct veth_xdp_buff { 120fefb695aSStanislav Fomichev struct xdp_buff xdp; 121306531f0SStanislav Fomichev struct sk_buff *skb; 122fefb695aSStanislav Fomichev }; 123fefb695aSStanislav Fomichev 12456607b98SPhilippe Reynes static int veth_get_link_ksettings(struct net_device *dev, 12556607b98SPhilippe Reynes struct ethtool_link_ksettings *cmd) 126e314dbdcSPavel Emelyanov { 12756607b98SPhilippe Reynes cmd->base.speed = SPEED_10000; 12856607b98SPhilippe Reynes cmd->base.duplex = DUPLEX_FULL; 12956607b98SPhilippe Reynes cmd->base.port = PORT_TP; 13056607b98SPhilippe Reynes cmd->base.autoneg = AUTONEG_DISABLE; 131e314dbdcSPavel Emelyanov return 0; 132e314dbdcSPavel Emelyanov } 133e314dbdcSPavel Emelyanov 134e314dbdcSPavel Emelyanov static void veth_get_drvinfo(struct net_device *dev, struct ethtool_drvinfo *info) 135e314dbdcSPavel Emelyanov { 136fb3ceec1SWolfram Sang strscpy(info->driver, DRV_NAME, sizeof(info->driver)); 137fb3ceec1SWolfram Sang strscpy(info->version, DRV_VERSION, sizeof(info->version)); 138e314dbdcSPavel Emelyanov } 139e314dbdcSPavel Emelyanov 140e314dbdcSPavel Emelyanov static void veth_get_strings(struct net_device *dev, u32 stringset, u8 *buf) 141e314dbdcSPavel Emelyanov { 142a0341b73STonghao Zhang u8 *p = buf; 143d397b968SToshiaki Makita int i, j; 144d397b968SToshiaki Makita 145e314dbdcSPavel Emelyanov switch(stringset) { 146e314dbdcSPavel Emelyanov case ETH_SS_STATS: 147d397b968SToshiaki Makita memcpy(p, ðtool_stats_keys, sizeof(ethtool_stats_keys)); 148d397b968SToshiaki Makita p += sizeof(ethtool_stats_keys); 149a0341b73STonghao Zhang for (i = 0; i < dev->real_num_rx_queues; i++) 150a0341b73STonghao Zhang for (j = 0; j < VETH_RQ_STATS_LEN; j++) 151a0341b73STonghao Zhang ethtool_sprintf(&p, "rx_queue_%u_%.18s", 152d397b968SToshiaki Makita i, veth_rq_stats_desc[j].desc); 153a0341b73STonghao Zhang 154a0341b73STonghao Zhang for (i = 0; i < dev->real_num_tx_queues; i++) 155a0341b73STonghao Zhang for (j = 0; j < VETH_TQ_STATS_LEN; j++) 156a0341b73STonghao Zhang ethtool_sprintf(&p, "tx_queue_%u_%.18s", 1575fe6e567SLorenzo Bianconi i, veth_tq_stats_desc[j].desc); 158e314dbdcSPavel Emelyanov break; 159e314dbdcSPavel Emelyanov } 160e314dbdcSPavel Emelyanov } 161e314dbdcSPavel Emelyanov 162b9f2c044SJeff Garzik static int veth_get_sset_count(struct net_device *dev, int sset) 163e314dbdcSPavel Emelyanov { 164b9f2c044SJeff Garzik switch (sset) { 165b9f2c044SJeff Garzik case ETH_SS_STATS: 166d397b968SToshiaki Makita return ARRAY_SIZE(ethtool_stats_keys) + 1675fe6e567SLorenzo Bianconi VETH_RQ_STATS_LEN * dev->real_num_rx_queues + 1685fe6e567SLorenzo Bianconi VETH_TQ_STATS_LEN * dev->real_num_tx_queues; 169b9f2c044SJeff Garzik default: 170b9f2c044SJeff Garzik return -EOPNOTSUPP; 171b9f2c044SJeff Garzik } 172e314dbdcSPavel Emelyanov } 173e314dbdcSPavel Emelyanov 174e314dbdcSPavel Emelyanov static void veth_get_ethtool_stats(struct net_device *dev, 175e314dbdcSPavel Emelyanov struct ethtool_stats *stats, u64 *data) 176e314dbdcSPavel Emelyanov { 1775fe6e567SLorenzo Bianconi struct veth_priv *rcv_priv, *priv = netdev_priv(dev); 178d0e2c55eSEric Dumazet struct net_device *peer = rtnl_dereference(priv->peer); 179d397b968SToshiaki Makita int i, j, idx; 180e314dbdcSPavel Emelyanov 181d0e2c55eSEric Dumazet data[0] = peer ? peer->ifindex : 0; 182d397b968SToshiaki Makita idx = 1; 183d397b968SToshiaki Makita for (i = 0; i < dev->real_num_rx_queues; i++) { 184d397b968SToshiaki Makita const struct veth_rq_stats *rq_stats = &priv->rq[i].stats; 18565780c56SLorenzo Bianconi const void *stats_base = (void *)&rq_stats->vs; 186d397b968SToshiaki Makita unsigned int start; 187d397b968SToshiaki Makita size_t offset; 188d397b968SToshiaki Makita 189d397b968SToshiaki Makita do { 190068c38adSThomas Gleixner start = u64_stats_fetch_begin(&rq_stats->syncp); 191d397b968SToshiaki Makita for (j = 0; j < VETH_RQ_STATS_LEN; j++) { 192d397b968SToshiaki Makita offset = veth_rq_stats_desc[j].offset; 193d397b968SToshiaki Makita data[idx + j] = *(u64 *)(stats_base + offset); 194d397b968SToshiaki Makita } 195068c38adSThomas Gleixner } while (u64_stats_fetch_retry(&rq_stats->syncp, start)); 196d397b968SToshiaki Makita idx += VETH_RQ_STATS_LEN; 197d397b968SToshiaki Makita } 1985fe6e567SLorenzo Bianconi 1995fe6e567SLorenzo Bianconi if (!peer) 2005fe6e567SLorenzo Bianconi return; 2015fe6e567SLorenzo Bianconi 2025fe6e567SLorenzo Bianconi rcv_priv = netdev_priv(peer); 2035fe6e567SLorenzo Bianconi for (i = 0; i < peer->real_num_rx_queues; i++) { 2045fe6e567SLorenzo Bianconi const struct veth_rq_stats *rq_stats = &rcv_priv->rq[i].stats; 2055fe6e567SLorenzo Bianconi const void *base = (void *)&rq_stats->vs; 2065fe6e567SLorenzo Bianconi unsigned int start, tx_idx = idx; 2075fe6e567SLorenzo Bianconi size_t offset; 2085fe6e567SLorenzo Bianconi 2095fe6e567SLorenzo Bianconi tx_idx += (i % dev->real_num_tx_queues) * VETH_TQ_STATS_LEN; 2105fe6e567SLorenzo Bianconi do { 211068c38adSThomas Gleixner start = u64_stats_fetch_begin(&rq_stats->syncp); 2125fe6e567SLorenzo Bianconi for (j = 0; j < VETH_TQ_STATS_LEN; j++) { 2135fe6e567SLorenzo Bianconi offset = veth_tq_stats_desc[j].offset; 2145fe6e567SLorenzo Bianconi data[tx_idx + j] += *(u64 *)(base + offset); 2155fe6e567SLorenzo Bianconi } 216068c38adSThomas Gleixner } while (u64_stats_fetch_retry(&rq_stats->syncp, start)); 2175fe6e567SLorenzo Bianconi } 218e314dbdcSPavel Emelyanov } 219e314dbdcSPavel Emelyanov 22034829eecSMaciej Fijalkowski static void veth_get_channels(struct net_device *dev, 22134829eecSMaciej Fijalkowski struct ethtool_channels *channels) 22234829eecSMaciej Fijalkowski { 22334829eecSMaciej Fijalkowski channels->tx_count = dev->real_num_tx_queues; 22434829eecSMaciej Fijalkowski channels->rx_count = dev->real_num_rx_queues; 2254752eeb3SPaolo Abeni channels->max_tx = dev->num_tx_queues; 2264752eeb3SPaolo Abeni channels->max_rx = dev->num_rx_queues; 22734829eecSMaciej Fijalkowski } 22834829eecSMaciej Fijalkowski 2294752eeb3SPaolo Abeni static int veth_set_channels(struct net_device *dev, 2304752eeb3SPaolo Abeni struct ethtool_channels *ch); 2314752eeb3SPaolo Abeni 2320fc0b732SStephen Hemminger static const struct ethtool_ops veth_ethtool_ops = { 233e314dbdcSPavel Emelyanov .get_drvinfo = veth_get_drvinfo, 234e314dbdcSPavel Emelyanov .get_link = ethtool_op_get_link, 235e314dbdcSPavel Emelyanov .get_strings = veth_get_strings, 236b9f2c044SJeff Garzik .get_sset_count = veth_get_sset_count, 237e314dbdcSPavel Emelyanov .get_ethtool_stats = veth_get_ethtool_stats, 23856607b98SPhilippe Reynes .get_link_ksettings = veth_get_link_ksettings, 239056b21fbSJulian Wiedmann .get_ts_info = ethtool_op_get_ts_info, 24034829eecSMaciej Fijalkowski .get_channels = veth_get_channels, 2414752eeb3SPaolo Abeni .set_channels = veth_set_channels, 242e314dbdcSPavel Emelyanov }; 243e314dbdcSPavel Emelyanov 244948d4f21SToshiaki Makita /* general routines */ 245948d4f21SToshiaki Makita 2469fc8d518SToshiaki Makita static bool veth_is_xdp_frame(void *ptr) 2479fc8d518SToshiaki Makita { 2489fc8d518SToshiaki Makita return (unsigned long)ptr & VETH_XDP_FLAG; 2499fc8d518SToshiaki Makita } 2509fc8d518SToshiaki Makita 251defcffebSMaciej Żenczykowski static struct xdp_frame *veth_ptr_to_xdp(void *ptr) 2529fc8d518SToshiaki Makita { 2539fc8d518SToshiaki Makita return (void *)((unsigned long)ptr & ~VETH_XDP_FLAG); 2549fc8d518SToshiaki Makita } 2559fc8d518SToshiaki Makita 256defcffebSMaciej Żenczykowski static void *veth_xdp_to_ptr(struct xdp_frame *xdp) 257af87a3aaSToshiaki Makita { 258defcffebSMaciej Żenczykowski return (void *)((unsigned long)xdp | VETH_XDP_FLAG); 259af87a3aaSToshiaki Makita } 260af87a3aaSToshiaki Makita 2619fc8d518SToshiaki Makita static void veth_ptr_free(void *ptr) 2629fc8d518SToshiaki Makita { 2639fc8d518SToshiaki Makita if (veth_is_xdp_frame(ptr)) 2649fc8d518SToshiaki Makita xdp_return_frame(veth_ptr_to_xdp(ptr)); 2659fc8d518SToshiaki Makita else 2669fc8d518SToshiaki Makita kfree_skb(ptr); 2679fc8d518SToshiaki Makita } 2689fc8d518SToshiaki Makita 269638264dcSToshiaki Makita static void __veth_xdp_flush(struct veth_rq *rq) 270948d4f21SToshiaki Makita { 271948d4f21SToshiaki Makita /* Write ptr_ring before reading rx_notify_masked */ 272948d4f21SToshiaki Makita smp_mb(); 27368468d8cSEric Dumazet if (!READ_ONCE(rq->rx_notify_masked) && 27468468d8cSEric Dumazet napi_schedule_prep(&rq->xdp_napi)) { 27568468d8cSEric Dumazet WRITE_ONCE(rq->rx_notify_masked, true); 27668468d8cSEric Dumazet __napi_schedule(&rq->xdp_napi); 277948d4f21SToshiaki Makita } 278948d4f21SToshiaki Makita } 279948d4f21SToshiaki Makita 280638264dcSToshiaki Makita static int veth_xdp_rx(struct veth_rq *rq, struct sk_buff *skb) 281948d4f21SToshiaki Makita { 282638264dcSToshiaki Makita if (unlikely(ptr_ring_produce(&rq->xdp_ring, skb))) { 283948d4f21SToshiaki Makita dev_kfree_skb_any(skb); 284948d4f21SToshiaki Makita return NET_RX_DROP; 285948d4f21SToshiaki Makita } 286948d4f21SToshiaki Makita 287948d4f21SToshiaki Makita return NET_RX_SUCCESS; 288948d4f21SToshiaki Makita } 289948d4f21SToshiaki Makita 290638264dcSToshiaki Makita static int veth_forward_skb(struct net_device *dev, struct sk_buff *skb, 291638264dcSToshiaki Makita struct veth_rq *rq, bool xdp) 292e314dbdcSPavel Emelyanov { 293948d4f21SToshiaki Makita return __dev_forward_skb(dev, skb) ?: xdp ? 294638264dcSToshiaki Makita veth_xdp_rx(rq, skb) : 295baebdf48SSebastian Andrzej Siewior __netif_rx(skb); 296948d4f21SToshiaki Makita } 297948d4f21SToshiaki Makita 29847e550e0SPaolo Abeni /* return true if the specified skb has chances of GRO aggregation 29947e550e0SPaolo Abeni * Don't strive for accuracy, but try to avoid GRO overhead in the most 30047e550e0SPaolo Abeni * common scenarios. 30147e550e0SPaolo Abeni * When XDP is enabled, all traffic is considered eligible, as the xmit 30247e550e0SPaolo Abeni * device has TSO off. 30347e550e0SPaolo Abeni * When TSO is enabled on the xmit device, we are likely interested only 30447e550e0SPaolo Abeni * in UDP aggregation, explicitly check for that if the skb is suspected 30547e550e0SPaolo Abeni * - the sock_wfree destructor is used by UDP, ICMP and XDP sockets - 30647e550e0SPaolo Abeni * to belong to locally generated UDP traffic. 30747e550e0SPaolo Abeni */ 30847e550e0SPaolo Abeni static bool veth_skb_is_eligible_for_gro(const struct net_device *dev, 30947e550e0SPaolo Abeni const struct net_device *rcv, 31047e550e0SPaolo Abeni const struct sk_buff *skb) 31147e550e0SPaolo Abeni { 31247e550e0SPaolo Abeni return !(dev->features & NETIF_F_ALL_TSO) || 31347e550e0SPaolo Abeni (skb->destructor == sock_wfree && 31447e550e0SPaolo Abeni rcv->features & (NETIF_F_GRO_FRAGLIST | NETIF_F_GRO_UDP_FWD)); 31547e550e0SPaolo Abeni } 31647e550e0SPaolo Abeni 317948d4f21SToshiaki Makita static netdev_tx_t veth_xmit(struct sk_buff *skb, struct net_device *dev) 318948d4f21SToshiaki Makita { 319948d4f21SToshiaki Makita struct veth_priv *rcv_priv, *priv = netdev_priv(dev); 320638264dcSToshiaki Makita struct veth_rq *rq = NULL; 321d0e2c55eSEric Dumazet struct net_device *rcv; 3222681128fSEric Dumazet int length = skb->len; 323d3256efdSPaolo Abeni bool use_napi = false; 324638264dcSToshiaki Makita int rxq; 325e314dbdcSPavel Emelyanov 326d0e2c55eSEric Dumazet rcu_read_lock(); 327d0e2c55eSEric Dumazet rcv = rcu_dereference(priv->peer); 328726e2c59SGuillaume Nault if (unlikely(!rcv) || !pskb_may_pull(skb, ETH_HLEN)) { 329d0e2c55eSEric Dumazet kfree_skb(skb); 330d0e2c55eSEric Dumazet goto drop; 331d0e2c55eSEric Dumazet } 332e314dbdcSPavel Emelyanov 333948d4f21SToshiaki Makita rcv_priv = netdev_priv(rcv); 334638264dcSToshiaki Makita rxq = skb_get_queue_mapping(skb); 335638264dcSToshiaki Makita if (rxq < rcv->real_num_rx_queues) { 336638264dcSToshiaki Makita rq = &rcv_priv->rq[rxq]; 337d3256efdSPaolo Abeni 338d3256efdSPaolo Abeni /* The napi pointer is available when an XDP program is 339d3256efdSPaolo Abeni * attached or when GRO is enabled 34047e550e0SPaolo Abeni * Don't bother with napi/GRO if the skb can't be aggregated 341d3256efdSPaolo Abeni */ 34247e550e0SPaolo Abeni use_napi = rcu_access_pointer(rq->napi) && 34347e550e0SPaolo Abeni veth_skb_is_eligible_for_gro(dev, rcv, skb); 344638264dcSToshiaki Makita } 345948d4f21SToshiaki Makita 346aa4e689eSMichael Walle skb_tx_timestamp(skb); 347d3256efdSPaolo Abeni if (likely(veth_forward_skb(rcv, skb, rq, use_napi) == NET_RX_SUCCESS)) { 348d3256efdSPaolo Abeni if (!use_napi) 349b4fba476SEric Dumazet dev_lstats_add(dev, length); 3502681128fSEric Dumazet } else { 351d0e2c55eSEric Dumazet drop: 3522681128fSEric Dumazet atomic64_inc(&priv->dropped); 3532681128fSEric Dumazet } 354948d4f21SToshiaki Makita 355d3256efdSPaolo Abeni if (use_napi) 356638264dcSToshiaki Makita __veth_xdp_flush(rq); 357948d4f21SToshiaki Makita 358d0e2c55eSEric Dumazet rcu_read_unlock(); 359948d4f21SToshiaki Makita 3606ed10654SPatrick McHardy return NETDEV_TX_OK; 361e314dbdcSPavel Emelyanov } 362e314dbdcSPavel Emelyanov 363b4fba476SEric Dumazet static u64 veth_stats_tx(struct net_device *dev, u64 *packets, u64 *bytes) 364e314dbdcSPavel Emelyanov { 365cf05c700SEric Dumazet struct veth_priv *priv = netdev_priv(dev); 36611687a10SDavid S. Miller 367b4fba476SEric Dumazet dev_lstats_read(dev, packets, bytes); 3682681128fSEric Dumazet return atomic64_read(&priv->dropped); 3692681128fSEric Dumazet } 3702681128fSEric Dumazet 37165780c56SLorenzo Bianconi static void veth_stats_rx(struct veth_stats *result, struct net_device *dev) 3724195e54aSToshiaki Makita { 3734195e54aSToshiaki Makita struct veth_priv *priv = netdev_priv(dev); 3744195e54aSToshiaki Makita int i; 3754195e54aSToshiaki Makita 3765fe6e567SLorenzo Bianconi result->peer_tq_xdp_xmit_err = 0; 3774195e54aSToshiaki Makita result->xdp_packets = 0; 378d99a7c2fSLorenzo Bianconi result->xdp_tx_err = 0; 3794195e54aSToshiaki Makita result->xdp_bytes = 0; 38066fe4a07SLorenzo Bianconi result->rx_drops = 0; 3814195e54aSToshiaki Makita for (i = 0; i < dev->num_rx_queues; i++) { 3825fe6e567SLorenzo Bianconi u64 packets, bytes, drops, xdp_tx_err, peer_tq_xdp_xmit_err; 3834195e54aSToshiaki Makita struct veth_rq_stats *stats = &priv->rq[i].stats; 3844195e54aSToshiaki Makita unsigned int start; 3854195e54aSToshiaki Makita 3864195e54aSToshiaki Makita do { 387068c38adSThomas Gleixner start = u64_stats_fetch_begin(&stats->syncp); 3885fe6e567SLorenzo Bianconi peer_tq_xdp_xmit_err = stats->vs.peer_tq_xdp_xmit_err; 389d99a7c2fSLorenzo Bianconi xdp_tx_err = stats->vs.xdp_tx_err; 39065780c56SLorenzo Bianconi packets = stats->vs.xdp_packets; 39165780c56SLorenzo Bianconi bytes = stats->vs.xdp_bytes; 39266fe4a07SLorenzo Bianconi drops = stats->vs.rx_drops; 393068c38adSThomas Gleixner } while (u64_stats_fetch_retry(&stats->syncp, start)); 3945fe6e567SLorenzo Bianconi result->peer_tq_xdp_xmit_err += peer_tq_xdp_xmit_err; 395d99a7c2fSLorenzo Bianconi result->xdp_tx_err += xdp_tx_err; 3964195e54aSToshiaki Makita result->xdp_packets += packets; 3974195e54aSToshiaki Makita result->xdp_bytes += bytes; 39866fe4a07SLorenzo Bianconi result->rx_drops += drops; 3994195e54aSToshiaki Makita } 4004195e54aSToshiaki Makita } 4014195e54aSToshiaki Makita 402bc1f4470Sstephen hemminger static void veth_get_stats64(struct net_device *dev, 4032681128fSEric Dumazet struct rtnl_link_stats64 *tot) 4042681128fSEric Dumazet { 4052681128fSEric Dumazet struct veth_priv *priv = netdev_priv(dev); 406d0e2c55eSEric Dumazet struct net_device *peer; 40765780c56SLorenzo Bianconi struct veth_stats rx; 408b4fba476SEric Dumazet u64 packets, bytes; 4092681128fSEric Dumazet 410b4fba476SEric Dumazet tot->tx_dropped = veth_stats_tx(dev, &packets, &bytes); 411b4fba476SEric Dumazet tot->tx_bytes = bytes; 412b4fba476SEric Dumazet tot->tx_packets = packets; 4134195e54aSToshiaki Makita 4144195e54aSToshiaki Makita veth_stats_rx(&rx, dev); 4155fe6e567SLorenzo Bianconi tot->tx_dropped += rx.xdp_tx_err; 4165fe6e567SLorenzo Bianconi tot->rx_dropped = rx.rx_drops + rx.peer_tq_xdp_xmit_err; 4174195e54aSToshiaki Makita tot->rx_bytes = rx.xdp_bytes; 4184195e54aSToshiaki Makita tot->rx_packets = rx.xdp_packets; 4192681128fSEric Dumazet 420d0e2c55eSEric Dumazet rcu_read_lock(); 421d0e2c55eSEric Dumazet peer = rcu_dereference(priv->peer); 422d0e2c55eSEric Dumazet if (peer) { 423e25d5dbcSJiang Lidong veth_stats_tx(peer, &packets, &bytes); 424b4fba476SEric Dumazet tot->rx_bytes += bytes; 425b4fba476SEric Dumazet tot->rx_packets += packets; 4264195e54aSToshiaki Makita 4274195e54aSToshiaki Makita veth_stats_rx(&rx, peer); 4285fe6e567SLorenzo Bianconi tot->tx_dropped += rx.peer_tq_xdp_xmit_err; 4295fe6e567SLorenzo Bianconi tot->rx_dropped += rx.xdp_tx_err; 4304195e54aSToshiaki Makita tot->tx_bytes += rx.xdp_bytes; 4314195e54aSToshiaki Makita tot->tx_packets += rx.xdp_packets; 432d0e2c55eSEric Dumazet } 433d0e2c55eSEric Dumazet rcu_read_unlock(); 434e314dbdcSPavel Emelyanov } 435e314dbdcSPavel Emelyanov 4365c70ef85SGao feng /* fake multicast ability */ 4375c70ef85SGao feng static void veth_set_multicast_list(struct net_device *dev) 4385c70ef85SGao feng { 4395c70ef85SGao feng } 4405c70ef85SGao feng 441638264dcSToshiaki Makita static int veth_select_rxq(struct net_device *dev) 442638264dcSToshiaki Makita { 443638264dcSToshiaki Makita return smp_processor_id() % dev->real_num_rx_queues; 444638264dcSToshiaki Makita } 445638264dcSToshiaki Makita 4469aa1206eSDaniel Borkmann static struct net_device *veth_peer_dev(struct net_device *dev) 4479aa1206eSDaniel Borkmann { 4489aa1206eSDaniel Borkmann struct veth_priv *priv = netdev_priv(dev); 4499aa1206eSDaniel Borkmann 4509aa1206eSDaniel Borkmann /* Callers must be under RCU read side. */ 4519aa1206eSDaniel Borkmann return rcu_dereference(priv->peer); 4529aa1206eSDaniel Borkmann } 4539aa1206eSDaniel Borkmann 454af87a3aaSToshiaki Makita static int veth_xdp_xmit(struct net_device *dev, int n, 4559152cff0SLorenzo Bianconi struct xdp_frame **frames, 4569152cff0SLorenzo Bianconi u32 flags, bool ndo_xmit) 457af87a3aaSToshiaki Makita { 458af87a3aaSToshiaki Makita struct veth_priv *rcv_priv, *priv = netdev_priv(dev); 459fdc13979SLorenzo Bianconi int i, ret = -ENXIO, nxmit = 0; 460af87a3aaSToshiaki Makita struct net_device *rcv; 4615fe6e567SLorenzo Bianconi unsigned int max_len; 462638264dcSToshiaki Makita struct veth_rq *rq; 463af87a3aaSToshiaki Makita 4645fe6e567SLorenzo Bianconi if (unlikely(flags & ~XDP_XMIT_FLAGS_MASK)) 465d99a7c2fSLorenzo Bianconi return -EINVAL; 466af87a3aaSToshiaki Makita 4675fe6e567SLorenzo Bianconi rcu_read_lock(); 468af87a3aaSToshiaki Makita rcv = rcu_dereference(priv->peer); 4695fe6e567SLorenzo Bianconi if (unlikely(!rcv)) 4705fe6e567SLorenzo Bianconi goto out; 471af87a3aaSToshiaki Makita 472af87a3aaSToshiaki Makita rcv_priv = netdev_priv(rcv); 4735fe6e567SLorenzo Bianconi rq = &rcv_priv->rq[veth_select_rxq(rcv)]; 4740e672f30SToke Høiland-Jørgensen /* The napi pointer is set if NAPI is enabled, which ensures that 4750e672f30SToke Høiland-Jørgensen * xdp_ring is initialized on receive side and the peer device is up. 476af87a3aaSToshiaki Makita */ 4770e672f30SToke Høiland-Jørgensen if (!rcu_access_pointer(rq->napi)) 4785fe6e567SLorenzo Bianconi goto out; 479af87a3aaSToshiaki Makita 480af87a3aaSToshiaki Makita max_len = rcv->mtu + rcv->hard_header_len + VLAN_HLEN; 481af87a3aaSToshiaki Makita 482638264dcSToshiaki Makita spin_lock(&rq->xdp_ring.producer_lock); 483af87a3aaSToshiaki Makita for (i = 0; i < n; i++) { 484af87a3aaSToshiaki Makita struct xdp_frame *frame = frames[i]; 485af87a3aaSToshiaki Makita void *ptr = veth_xdp_to_ptr(frame); 486af87a3aaSToshiaki Makita 4875142239aSLorenzo Bianconi if (unlikely(xdp_get_frame_len(frame) > max_len || 488fdc13979SLorenzo Bianconi __ptr_ring_produce(&rq->xdp_ring, ptr))) 489fdc13979SLorenzo Bianconi break; 490fdc13979SLorenzo Bianconi nxmit++; 491af87a3aaSToshiaki Makita } 492638264dcSToshiaki Makita spin_unlock(&rq->xdp_ring.producer_lock); 493af87a3aaSToshiaki Makita 494af87a3aaSToshiaki Makita if (flags & XDP_XMIT_FLUSH) 495638264dcSToshiaki Makita __veth_xdp_flush(rq); 496af87a3aaSToshiaki Makita 497fdc13979SLorenzo Bianconi ret = nxmit; 4989152cff0SLorenzo Bianconi if (ndo_xmit) { 4995fe6e567SLorenzo Bianconi u64_stats_update_begin(&rq->stats.syncp); 500fdc13979SLorenzo Bianconi rq->stats.vs.peer_tq_xdp_xmit += nxmit; 501fdc13979SLorenzo Bianconi rq->stats.vs.peer_tq_xdp_xmit_err += n - nxmit; 5029152cff0SLorenzo Bianconi u64_stats_update_end(&rq->stats.syncp); 5035fe6e567SLorenzo Bianconi } 5049152cff0SLorenzo Bianconi 5055fe6e567SLorenzo Bianconi out: 506b23bfa56SJohn Fastabend rcu_read_unlock(); 5072131479dSToshiaki Makita 5082131479dSToshiaki Makita return ret; 509af87a3aaSToshiaki Makita } 510af87a3aaSToshiaki Makita 5119152cff0SLorenzo Bianconi static int veth_ndo_xdp_xmit(struct net_device *dev, int n, 5129152cff0SLorenzo Bianconi struct xdp_frame **frames, u32 flags) 5139152cff0SLorenzo Bianconi { 5145fe6e567SLorenzo Bianconi int err; 5155fe6e567SLorenzo Bianconi 5165fe6e567SLorenzo Bianconi err = veth_xdp_xmit(dev, n, frames, flags, true); 5175fe6e567SLorenzo Bianconi if (err < 0) { 5185fe6e567SLorenzo Bianconi struct veth_priv *priv = netdev_priv(dev); 5195fe6e567SLorenzo Bianconi 5205fe6e567SLorenzo Bianconi atomic64_add(n, &priv->dropped); 5215fe6e567SLorenzo Bianconi } 5225fe6e567SLorenzo Bianconi 5235fe6e567SLorenzo Bianconi return err; 5249152cff0SLorenzo Bianconi } 5259152cff0SLorenzo Bianconi 526bd32aa1fSLorenzo Bianconi static void veth_xdp_flush_bq(struct veth_rq *rq, struct veth_xdp_tx_bq *bq) 5279cda7807SToshiaki Makita { 528fdc13979SLorenzo Bianconi int sent, i, err = 0, drops; 5299cda7807SToshiaki Makita 530bd32aa1fSLorenzo Bianconi sent = veth_xdp_xmit(rq->dev, bq->count, bq->q, 0, false); 5319cda7807SToshiaki Makita if (sent < 0) { 5329cda7807SToshiaki Makita err = sent; 5339cda7807SToshiaki Makita sent = 0; 5349cda7807SToshiaki Makita } 535fdc13979SLorenzo Bianconi 536fdc13979SLorenzo Bianconi for (i = sent; unlikely(i < bq->count); i++) 537fdc13979SLorenzo Bianconi xdp_return_frame(bq->q[i]); 538fdc13979SLorenzo Bianconi 539fdc13979SLorenzo Bianconi drops = bq->count - sent; 540fdc13979SLorenzo Bianconi trace_xdp_bulk_tx(rq->dev, sent, drops, err); 5419cda7807SToshiaki Makita 5425fe6e567SLorenzo Bianconi u64_stats_update_begin(&rq->stats.syncp); 5435fe6e567SLorenzo Bianconi rq->stats.vs.xdp_tx += sent; 544fdc13979SLorenzo Bianconi rq->stats.vs.xdp_tx_err += drops; 5455fe6e567SLorenzo Bianconi u64_stats_update_end(&rq->stats.syncp); 5465fe6e567SLorenzo Bianconi 5479cda7807SToshiaki Makita bq->count = 0; 5489cda7807SToshiaki Makita } 5499cda7807SToshiaki Makita 550bd32aa1fSLorenzo Bianconi static void veth_xdp_flush(struct veth_rq *rq, struct veth_xdp_tx_bq *bq) 551d1396004SToshiaki Makita { 552bd32aa1fSLorenzo Bianconi struct veth_priv *rcv_priv, *priv = netdev_priv(rq->dev); 553d1396004SToshiaki Makita struct net_device *rcv; 554bd32aa1fSLorenzo Bianconi struct veth_rq *rcv_rq; 555d1396004SToshiaki Makita 556d1396004SToshiaki Makita rcu_read_lock(); 557bd32aa1fSLorenzo Bianconi veth_xdp_flush_bq(rq, bq); 558d1396004SToshiaki Makita rcv = rcu_dereference(priv->peer); 559d1396004SToshiaki Makita if (unlikely(!rcv)) 560d1396004SToshiaki Makita goto out; 561d1396004SToshiaki Makita 562d1396004SToshiaki Makita rcv_priv = netdev_priv(rcv); 563bd32aa1fSLorenzo Bianconi rcv_rq = &rcv_priv->rq[veth_select_rxq(rcv)]; 564d1396004SToshiaki Makita /* xdp_ring is initialized on receive side? */ 565bd32aa1fSLorenzo Bianconi if (unlikely(!rcu_access_pointer(rcv_rq->xdp_prog))) 566d1396004SToshiaki Makita goto out; 567d1396004SToshiaki Makita 568bd32aa1fSLorenzo Bianconi __veth_xdp_flush(rcv_rq); 569d1396004SToshiaki Makita out: 570d1396004SToshiaki Makita rcu_read_unlock(); 571d1396004SToshiaki Makita } 572d1396004SToshiaki Makita 573bd32aa1fSLorenzo Bianconi static int veth_xdp_tx(struct veth_rq *rq, struct xdp_buff *xdp, 5749cda7807SToshiaki Makita struct veth_xdp_tx_bq *bq) 575d1396004SToshiaki Makita { 5761b698fa5SLorenzo Bianconi struct xdp_frame *frame = xdp_convert_buff_to_frame(xdp); 577d1396004SToshiaki Makita 578d1396004SToshiaki Makita if (unlikely(!frame)) 579d1396004SToshiaki Makita return -EOVERFLOW; 580d1396004SToshiaki Makita 5819cda7807SToshiaki Makita if (unlikely(bq->count == VETH_XDP_TX_BULK_SIZE)) 582bd32aa1fSLorenzo Bianconi veth_xdp_flush_bq(rq, bq); 5839cda7807SToshiaki Makita 5849cda7807SToshiaki Makita bq->q[bq->count++] = frame; 5859cda7807SToshiaki Makita 5869cda7807SToshiaki Makita return 0; 587d1396004SToshiaki Makita } 588d1396004SToshiaki Makita 58965e6dcf7SLorenzo Bianconi static struct xdp_frame *veth_xdp_rcv_one(struct veth_rq *rq, 590d1396004SToshiaki Makita struct xdp_frame *frame, 5911c5b82e5SLorenzo Bianconi struct veth_xdp_tx_bq *bq, 5921c5b82e5SLorenzo Bianconi struct veth_stats *stats) 5939fc8d518SToshiaki Makita { 594d1396004SToshiaki Makita struct xdp_frame orig_frame; 5959fc8d518SToshiaki Makita struct bpf_prog *xdp_prog; 5969fc8d518SToshiaki Makita 5979fc8d518SToshiaki Makita rcu_read_lock(); 598638264dcSToshiaki Makita xdp_prog = rcu_dereference(rq->xdp_prog); 5999fc8d518SToshiaki Makita if (likely(xdp_prog)) { 600fefb695aSStanislav Fomichev struct veth_xdp_buff vxbuf; 601fefb695aSStanislav Fomichev struct xdp_buff *xdp = &vxbuf.xdp; 6029fc8d518SToshiaki Makita u32 act; 6039fc8d518SToshiaki Makita 604fefb695aSStanislav Fomichev xdp_convert_frame_to_buff(frame, xdp); 605fefb695aSStanislav Fomichev xdp->rxq = &rq->xdp_rxq; 606306531f0SStanislav Fomichev vxbuf.skb = NULL; 6079fc8d518SToshiaki Makita 608fefb695aSStanislav Fomichev act = bpf_prog_run_xdp(xdp_prog, xdp); 6099fc8d518SToshiaki Makita 6109fc8d518SToshiaki Makita switch (act) { 6119fc8d518SToshiaki Makita case XDP_PASS: 612fefb695aSStanislav Fomichev if (xdp_update_frame_from_buff(xdp, frame)) 61389f479f0SLorenzo Bianconi goto err_xdp; 6149fc8d518SToshiaki Makita break; 615d1396004SToshiaki Makita case XDP_TX: 616d1396004SToshiaki Makita orig_frame = *frame; 617fefb695aSStanislav Fomichev xdp->rxq->mem = frame->mem; 618fefb695aSStanislav Fomichev if (unlikely(veth_xdp_tx(rq, xdp, bq) < 0)) { 619638264dcSToshiaki Makita trace_xdp_exception(rq->dev, xdp_prog, act); 620d1396004SToshiaki Makita frame = &orig_frame; 6211c5b82e5SLorenzo Bianconi stats->rx_drops++; 622d1396004SToshiaki Makita goto err_xdp; 623d1396004SToshiaki Makita } 6241c5b82e5SLorenzo Bianconi stats->xdp_tx++; 625d1396004SToshiaki Makita rcu_read_unlock(); 626d1396004SToshiaki Makita goto xdp_xmit; 627d1396004SToshiaki Makita case XDP_REDIRECT: 628d1396004SToshiaki Makita orig_frame = *frame; 629fefb695aSStanislav Fomichev xdp->rxq->mem = frame->mem; 630fefb695aSStanislav Fomichev if (xdp_do_redirect(rq->dev, xdp, xdp_prog)) { 631d1396004SToshiaki Makita frame = &orig_frame; 6321c5b82e5SLorenzo Bianconi stats->rx_drops++; 633d1396004SToshiaki Makita goto err_xdp; 634d1396004SToshiaki Makita } 6351c5b82e5SLorenzo Bianconi stats->xdp_redirect++; 636d1396004SToshiaki Makita rcu_read_unlock(); 637d1396004SToshiaki Makita goto xdp_xmit; 6389fc8d518SToshiaki Makita default: 639c8064e5bSPaolo Abeni bpf_warn_invalid_xdp_action(rq->dev, xdp_prog, act); 640df561f66SGustavo A. R. Silva fallthrough; 6419fc8d518SToshiaki Makita case XDP_ABORTED: 642638264dcSToshiaki Makita trace_xdp_exception(rq->dev, xdp_prog, act); 643df561f66SGustavo A. R. Silva fallthrough; 6449fc8d518SToshiaki Makita case XDP_DROP: 6451c5b82e5SLorenzo Bianconi stats->xdp_drops++; 6469fc8d518SToshiaki Makita goto err_xdp; 6479fc8d518SToshiaki Makita } 6489fc8d518SToshiaki Makita } 6499fc8d518SToshiaki Makita rcu_read_unlock(); 6509fc8d518SToshiaki Makita 65165e6dcf7SLorenzo Bianconi return frame; 6529fc8d518SToshiaki Makita err_xdp: 6539fc8d518SToshiaki Makita rcu_read_unlock(); 6549fc8d518SToshiaki Makita xdp_return_frame(frame); 655d1396004SToshiaki Makita xdp_xmit: 6569fc8d518SToshiaki Makita return NULL; 6579fc8d518SToshiaki Makita } 6589fc8d518SToshiaki Makita 65965e6dcf7SLorenzo Bianconi /* frames array contains VETH_XDP_BATCH at most */ 66065e6dcf7SLorenzo Bianconi static void veth_xdp_rcv_bulk_skb(struct veth_rq *rq, void **frames, 66165e6dcf7SLorenzo Bianconi int n_xdpf, struct veth_xdp_tx_bq *bq, 66265e6dcf7SLorenzo Bianconi struct veth_stats *stats) 66365e6dcf7SLorenzo Bianconi { 66465e6dcf7SLorenzo Bianconi void *skbs[VETH_XDP_BATCH]; 66565e6dcf7SLorenzo Bianconi int i; 66665e6dcf7SLorenzo Bianconi 66765e6dcf7SLorenzo Bianconi if (xdp_alloc_skb_bulk(skbs, n_xdpf, 66865e6dcf7SLorenzo Bianconi GFP_ATOMIC | __GFP_ZERO) < 0) { 66965e6dcf7SLorenzo Bianconi for (i = 0; i < n_xdpf; i++) 67065e6dcf7SLorenzo Bianconi xdp_return_frame(frames[i]); 67165e6dcf7SLorenzo Bianconi stats->rx_drops += n_xdpf; 67265e6dcf7SLorenzo Bianconi 67365e6dcf7SLorenzo Bianconi return; 67465e6dcf7SLorenzo Bianconi } 67565e6dcf7SLorenzo Bianconi 67665e6dcf7SLorenzo Bianconi for (i = 0; i < n_xdpf; i++) { 67765e6dcf7SLorenzo Bianconi struct sk_buff *skb = skbs[i]; 67865e6dcf7SLorenzo Bianconi 67965e6dcf7SLorenzo Bianconi skb = __xdp_build_skb_from_frame(frames[i], skb, 68065e6dcf7SLorenzo Bianconi rq->dev); 68165e6dcf7SLorenzo Bianconi if (!skb) { 68265e6dcf7SLorenzo Bianconi xdp_return_frame(frames[i]); 68365e6dcf7SLorenzo Bianconi stats->rx_drops++; 68465e6dcf7SLorenzo Bianconi continue; 68565e6dcf7SLorenzo Bianconi } 68665e6dcf7SLorenzo Bianconi napi_gro_receive(&rq->xdp_napi, skb); 68765e6dcf7SLorenzo Bianconi } 68865e6dcf7SLorenzo Bianconi } 68965e6dcf7SLorenzo Bianconi 690718a18a0SLorenzo Bianconi static void veth_xdp_get(struct xdp_buff *xdp) 691718a18a0SLorenzo Bianconi { 692718a18a0SLorenzo Bianconi struct skb_shared_info *sinfo = xdp_get_shared_info_from_buff(xdp); 693718a18a0SLorenzo Bianconi int i; 694718a18a0SLorenzo Bianconi 695718a18a0SLorenzo Bianconi get_page(virt_to_page(xdp->data)); 696718a18a0SLorenzo Bianconi if (likely(!xdp_buff_has_frags(xdp))) 697718a18a0SLorenzo Bianconi return; 698718a18a0SLorenzo Bianconi 699718a18a0SLorenzo Bianconi for (i = 0; i < sinfo->nr_frags; i++) 700718a18a0SLorenzo Bianconi __skb_frag_ref(&sinfo->frags[i]); 701718a18a0SLorenzo Bianconi } 702718a18a0SLorenzo Bianconi 703718a18a0SLorenzo Bianconi static int veth_convert_skb_to_xdp_buff(struct veth_rq *rq, 704718a18a0SLorenzo Bianconi struct xdp_buff *xdp, 705718a18a0SLorenzo Bianconi struct sk_buff **pskb) 706718a18a0SLorenzo Bianconi { 707718a18a0SLorenzo Bianconi struct sk_buff *skb = *pskb; 708718a18a0SLorenzo Bianconi u32 frame_sz; 709718a18a0SLorenzo Bianconi 710718a18a0SLorenzo Bianconi if (skb_shared(skb) || skb_head_is_locked(skb) || 711718a18a0SLorenzo Bianconi skb_shinfo(skb)->nr_frags) { 712718a18a0SLorenzo Bianconi u32 size, len, max_head_size, off; 713718a18a0SLorenzo Bianconi struct sk_buff *nskb; 714718a18a0SLorenzo Bianconi struct page *page; 715718a18a0SLorenzo Bianconi int i, head_off; 716718a18a0SLorenzo Bianconi 717718a18a0SLorenzo Bianconi /* We need a private copy of the skb and data buffers since 718718a18a0SLorenzo Bianconi * the ebpf program can modify it. We segment the original skb 719718a18a0SLorenzo Bianconi * into order-0 pages without linearize it. 720718a18a0SLorenzo Bianconi * 721718a18a0SLorenzo Bianconi * Make sure we have enough space for linear and paged area 722718a18a0SLorenzo Bianconi */ 723718a18a0SLorenzo Bianconi max_head_size = SKB_WITH_OVERHEAD(PAGE_SIZE - 724718a18a0SLorenzo Bianconi VETH_XDP_HEADROOM); 725718a18a0SLorenzo Bianconi if (skb->len > PAGE_SIZE * MAX_SKB_FRAGS + max_head_size) 726718a18a0SLorenzo Bianconi goto drop; 727718a18a0SLorenzo Bianconi 728718a18a0SLorenzo Bianconi /* Allocate skb head */ 729718a18a0SLorenzo Bianconi page = alloc_page(GFP_ATOMIC | __GFP_NOWARN); 730718a18a0SLorenzo Bianconi if (!page) 731718a18a0SLorenzo Bianconi goto drop; 732718a18a0SLorenzo Bianconi 733718a18a0SLorenzo Bianconi nskb = build_skb(page_address(page), PAGE_SIZE); 734718a18a0SLorenzo Bianconi if (!nskb) { 735718a18a0SLorenzo Bianconi put_page(page); 736718a18a0SLorenzo Bianconi goto drop; 737718a18a0SLorenzo Bianconi } 738718a18a0SLorenzo Bianconi 739718a18a0SLorenzo Bianconi skb_reserve(nskb, VETH_XDP_HEADROOM); 740718a18a0SLorenzo Bianconi size = min_t(u32, skb->len, max_head_size); 741718a18a0SLorenzo Bianconi if (skb_copy_bits(skb, 0, nskb->data, size)) { 742718a18a0SLorenzo Bianconi consume_skb(nskb); 743718a18a0SLorenzo Bianconi goto drop; 744718a18a0SLorenzo Bianconi } 745718a18a0SLorenzo Bianconi skb_put(nskb, size); 746718a18a0SLorenzo Bianconi 747718a18a0SLorenzo Bianconi skb_copy_header(nskb, skb); 748718a18a0SLorenzo Bianconi head_off = skb_headroom(nskb) - skb_headroom(skb); 749718a18a0SLorenzo Bianconi skb_headers_offset_update(nskb, head_off); 750718a18a0SLorenzo Bianconi 751718a18a0SLorenzo Bianconi /* Allocate paged area of new skb */ 752718a18a0SLorenzo Bianconi off = size; 753718a18a0SLorenzo Bianconi len = skb->len - off; 754718a18a0SLorenzo Bianconi 755718a18a0SLorenzo Bianconi for (i = 0; i < MAX_SKB_FRAGS && off < skb->len; i++) { 756718a18a0SLorenzo Bianconi page = alloc_page(GFP_ATOMIC | __GFP_NOWARN); 757718a18a0SLorenzo Bianconi if (!page) { 758718a18a0SLorenzo Bianconi consume_skb(nskb); 759718a18a0SLorenzo Bianconi goto drop; 760718a18a0SLorenzo Bianconi } 761718a18a0SLorenzo Bianconi 762718a18a0SLorenzo Bianconi size = min_t(u32, len, PAGE_SIZE); 763718a18a0SLorenzo Bianconi skb_add_rx_frag(nskb, i, page, 0, size, PAGE_SIZE); 764718a18a0SLorenzo Bianconi if (skb_copy_bits(skb, off, page_address(page), 765718a18a0SLorenzo Bianconi size)) { 766718a18a0SLorenzo Bianconi consume_skb(nskb); 767718a18a0SLorenzo Bianconi goto drop; 768718a18a0SLorenzo Bianconi } 769718a18a0SLorenzo Bianconi 770718a18a0SLorenzo Bianconi len -= size; 771718a18a0SLorenzo Bianconi off += size; 772718a18a0SLorenzo Bianconi } 773718a18a0SLorenzo Bianconi 774718a18a0SLorenzo Bianconi consume_skb(skb); 775718a18a0SLorenzo Bianconi skb = nskb; 776718a18a0SLorenzo Bianconi } else if (skb_headroom(skb) < XDP_PACKET_HEADROOM && 777718a18a0SLorenzo Bianconi pskb_expand_head(skb, VETH_XDP_HEADROOM, 0, GFP_ATOMIC)) { 778718a18a0SLorenzo Bianconi goto drop; 779718a18a0SLorenzo Bianconi } 780718a18a0SLorenzo Bianconi 781718a18a0SLorenzo Bianconi /* SKB "head" area always have tailroom for skb_shared_info */ 782718a18a0SLorenzo Bianconi frame_sz = skb_end_pointer(skb) - skb->head; 783718a18a0SLorenzo Bianconi frame_sz += SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); 784718a18a0SLorenzo Bianconi xdp_init_buff(xdp, frame_sz, &rq->xdp_rxq); 785718a18a0SLorenzo Bianconi xdp_prepare_buff(xdp, skb->head, skb_headroom(skb), 786718a18a0SLorenzo Bianconi skb_headlen(skb), true); 787718a18a0SLorenzo Bianconi 788718a18a0SLorenzo Bianconi if (skb_is_nonlinear(skb)) { 789718a18a0SLorenzo Bianconi skb_shinfo(skb)->xdp_frags_size = skb->data_len; 790718a18a0SLorenzo Bianconi xdp_buff_set_frags_flag(xdp); 791718a18a0SLorenzo Bianconi } else { 792718a18a0SLorenzo Bianconi xdp_buff_clear_frags_flag(xdp); 793718a18a0SLorenzo Bianconi } 794718a18a0SLorenzo Bianconi *pskb = skb; 795718a18a0SLorenzo Bianconi 796718a18a0SLorenzo Bianconi return 0; 797718a18a0SLorenzo Bianconi drop: 798718a18a0SLorenzo Bianconi consume_skb(skb); 799718a18a0SLorenzo Bianconi *pskb = NULL; 800718a18a0SLorenzo Bianconi 801718a18a0SLorenzo Bianconi return -ENOMEM; 802718a18a0SLorenzo Bianconi } 803718a18a0SLorenzo Bianconi 8041c5b82e5SLorenzo Bianconi static struct sk_buff *veth_xdp_rcv_skb(struct veth_rq *rq, 8051c5b82e5SLorenzo Bianconi struct sk_buff *skb, 8061c5b82e5SLorenzo Bianconi struct veth_xdp_tx_bq *bq, 8071c5b82e5SLorenzo Bianconi struct veth_stats *stats) 808948d4f21SToshiaki Makita { 809948d4f21SToshiaki Makita void *orig_data, *orig_data_end; 810948d4f21SToshiaki Makita struct bpf_prog *xdp_prog; 811fefb695aSStanislav Fomichev struct veth_xdp_buff vxbuf; 812fefb695aSStanislav Fomichev struct xdp_buff *xdp = &vxbuf.xdp; 813718a18a0SLorenzo Bianconi u32 act, metalen; 814718a18a0SLorenzo Bianconi int off; 815948d4f21SToshiaki Makita 816d504fff0SPaolo Abeni skb_prepare_for_gro(skb); 8174bf9ffa0SToshiaki Makita 818948d4f21SToshiaki Makita rcu_read_lock(); 819638264dcSToshiaki Makita xdp_prog = rcu_dereference(rq->xdp_prog); 820948d4f21SToshiaki Makita if (unlikely(!xdp_prog)) { 821948d4f21SToshiaki Makita rcu_read_unlock(); 822948d4f21SToshiaki Makita goto out; 823948d4f21SToshiaki Makita } 824948d4f21SToshiaki Makita 825718a18a0SLorenzo Bianconi __skb_push(skb, skb->data - skb_mac_header(skb)); 826fefb695aSStanislav Fomichev if (veth_convert_skb_to_xdp_buff(rq, xdp, &skb)) 827948d4f21SToshiaki Makita goto drop; 828306531f0SStanislav Fomichev vxbuf.skb = skb; 829948d4f21SToshiaki Makita 830fefb695aSStanislav Fomichev orig_data = xdp->data; 831fefb695aSStanislav Fomichev orig_data_end = xdp->data_end; 832948d4f21SToshiaki Makita 833fefb695aSStanislav Fomichev act = bpf_prog_run_xdp(xdp_prog, xdp); 834948d4f21SToshiaki Makita 835948d4f21SToshiaki Makita switch (act) { 836948d4f21SToshiaki Makita case XDP_PASS: 837948d4f21SToshiaki Makita break; 838d1396004SToshiaki Makita case XDP_TX: 839fefb695aSStanislav Fomichev veth_xdp_get(xdp); 840d1396004SToshiaki Makita consume_skb(skb); 841fefb695aSStanislav Fomichev xdp->rxq->mem = rq->xdp_mem; 842fefb695aSStanislav Fomichev if (unlikely(veth_xdp_tx(rq, xdp, bq) < 0)) { 843638264dcSToshiaki Makita trace_xdp_exception(rq->dev, xdp_prog, act); 8441c5b82e5SLorenzo Bianconi stats->rx_drops++; 845d1396004SToshiaki Makita goto err_xdp; 846d1396004SToshiaki Makita } 8471c5b82e5SLorenzo Bianconi stats->xdp_tx++; 848d1396004SToshiaki Makita rcu_read_unlock(); 849d1396004SToshiaki Makita goto xdp_xmit; 850d1396004SToshiaki Makita case XDP_REDIRECT: 851fefb695aSStanislav Fomichev veth_xdp_get(xdp); 852d1396004SToshiaki Makita consume_skb(skb); 853fefb695aSStanislav Fomichev xdp->rxq->mem = rq->xdp_mem; 854fefb695aSStanislav Fomichev if (xdp_do_redirect(rq->dev, xdp, xdp_prog)) { 8551c5b82e5SLorenzo Bianconi stats->rx_drops++; 856d1396004SToshiaki Makita goto err_xdp; 8571c5b82e5SLorenzo Bianconi } 8581c5b82e5SLorenzo Bianconi stats->xdp_redirect++; 859d1396004SToshiaki Makita rcu_read_unlock(); 860d1396004SToshiaki Makita goto xdp_xmit; 861948d4f21SToshiaki Makita default: 862c8064e5bSPaolo Abeni bpf_warn_invalid_xdp_action(rq->dev, xdp_prog, act); 863df561f66SGustavo A. R. Silva fallthrough; 864948d4f21SToshiaki Makita case XDP_ABORTED: 865638264dcSToshiaki Makita trace_xdp_exception(rq->dev, xdp_prog, act); 866df561f66SGustavo A. R. Silva fallthrough; 867948d4f21SToshiaki Makita case XDP_DROP: 8681c5b82e5SLorenzo Bianconi stats->xdp_drops++; 8691c5b82e5SLorenzo Bianconi goto xdp_drop; 870948d4f21SToshiaki Makita } 871948d4f21SToshiaki Makita rcu_read_unlock(); 872948d4f21SToshiaki Makita 87345a9e6d8SJesper Dangaard Brouer /* check if bpf_xdp_adjust_head was used */ 874fefb695aSStanislav Fomichev off = orig_data - xdp->data; 875948d4f21SToshiaki Makita if (off > 0) 876948d4f21SToshiaki Makita __skb_push(skb, off); 877948d4f21SToshiaki Makita else if (off < 0) 878948d4f21SToshiaki Makita __skb_pull(skb, -off); 879718a18a0SLorenzo Bianconi 880718a18a0SLorenzo Bianconi skb_reset_mac_header(skb); 88145a9e6d8SJesper Dangaard Brouer 88245a9e6d8SJesper Dangaard Brouer /* check if bpf_xdp_adjust_tail was used */ 883fefb695aSStanislav Fomichev off = xdp->data_end - orig_data_end; 884948d4f21SToshiaki Makita if (off != 0) 88545a9e6d8SJesper Dangaard Brouer __skb_put(skb, off); /* positive on grow, negative on shrink */ 886718a18a0SLorenzo Bianconi 887718a18a0SLorenzo Bianconi /* XDP frag metadata (e.g. nr_frags) are updated in eBPF helpers 888718a18a0SLorenzo Bianconi * (e.g. bpf_xdp_adjust_tail), we need to update data_len here. 889718a18a0SLorenzo Bianconi */ 890fefb695aSStanislav Fomichev if (xdp_buff_has_frags(xdp)) 891718a18a0SLorenzo Bianconi skb->data_len = skb_shinfo(skb)->xdp_frags_size; 892718a18a0SLorenzo Bianconi else 893718a18a0SLorenzo Bianconi skb->data_len = 0; 894718a18a0SLorenzo Bianconi 895638264dcSToshiaki Makita skb->protocol = eth_type_trans(skb, rq->dev); 896948d4f21SToshiaki Makita 897fefb695aSStanislav Fomichev metalen = xdp->data - xdp->data_meta; 898948d4f21SToshiaki Makita if (metalen) 899948d4f21SToshiaki Makita skb_metadata_set(skb, metalen); 900948d4f21SToshiaki Makita out: 901948d4f21SToshiaki Makita return skb; 902948d4f21SToshiaki Makita drop: 9031c5b82e5SLorenzo Bianconi stats->rx_drops++; 9041c5b82e5SLorenzo Bianconi xdp_drop: 905948d4f21SToshiaki Makita rcu_read_unlock(); 906948d4f21SToshiaki Makita kfree_skb(skb); 907948d4f21SToshiaki Makita return NULL; 908d1396004SToshiaki Makita err_xdp: 909d1396004SToshiaki Makita rcu_read_unlock(); 910fefb695aSStanislav Fomichev xdp_return_buff(xdp); 911d1396004SToshiaki Makita xdp_xmit: 912d1396004SToshiaki Makita return NULL; 913948d4f21SToshiaki Makita } 914948d4f21SToshiaki Makita 9151c5b82e5SLorenzo Bianconi static int veth_xdp_rcv(struct veth_rq *rq, int budget, 9161c5b82e5SLorenzo Bianconi struct veth_xdp_tx_bq *bq, 9171c5b82e5SLorenzo Bianconi struct veth_stats *stats) 918948d4f21SToshiaki Makita { 91965e6dcf7SLorenzo Bianconi int i, done = 0, n_xdpf = 0; 92065e6dcf7SLorenzo Bianconi void *xdpf[VETH_XDP_BATCH]; 921948d4f21SToshiaki Makita 922948d4f21SToshiaki Makita for (i = 0; i < budget; i++) { 923638264dcSToshiaki Makita void *ptr = __ptr_ring_consume(&rq->xdp_ring); 924948d4f21SToshiaki Makita 9259fc8d518SToshiaki Makita if (!ptr) 926948d4f21SToshiaki Makita break; 927948d4f21SToshiaki Makita 928d1396004SToshiaki Makita if (veth_is_xdp_frame(ptr)) { 92965e6dcf7SLorenzo Bianconi /* ndo_xdp_xmit */ 9304195e54aSToshiaki Makita struct xdp_frame *frame = veth_ptr_to_xdp(ptr); 9314195e54aSToshiaki Makita 9325142239aSLorenzo Bianconi stats->xdp_bytes += xdp_get_frame_len(frame); 93365e6dcf7SLorenzo Bianconi frame = veth_xdp_rcv_one(rq, frame, bq, stats); 93465e6dcf7SLorenzo Bianconi if (frame) { 93565e6dcf7SLorenzo Bianconi /* XDP_PASS */ 93665e6dcf7SLorenzo Bianconi xdpf[n_xdpf++] = frame; 93765e6dcf7SLorenzo Bianconi if (n_xdpf == VETH_XDP_BATCH) { 93865e6dcf7SLorenzo Bianconi veth_xdp_rcv_bulk_skb(rq, xdpf, n_xdpf, 93965e6dcf7SLorenzo Bianconi bq, stats); 94065e6dcf7SLorenzo Bianconi n_xdpf = 0; 94165e6dcf7SLorenzo Bianconi } 94265e6dcf7SLorenzo Bianconi } 943d1396004SToshiaki Makita } else { 94465e6dcf7SLorenzo Bianconi /* ndo_start_xmit */ 94565e6dcf7SLorenzo Bianconi struct sk_buff *skb = ptr; 94665e6dcf7SLorenzo Bianconi 9471c5b82e5SLorenzo Bianconi stats->xdp_bytes += skb->len; 9481c5b82e5SLorenzo Bianconi skb = veth_xdp_rcv_skb(rq, skb, bq, stats); 9499695b7deSPaolo Abeni if (skb) { 9509695b7deSPaolo Abeni if (skb_shared(skb) || skb_unclone(skb, GFP_ATOMIC)) 9519695b7deSPaolo Abeni netif_receive_skb(skb); 9529695b7deSPaolo Abeni else 953638264dcSToshiaki Makita napi_gro_receive(&rq->xdp_napi, skb); 95465e6dcf7SLorenzo Bianconi } 9559695b7deSPaolo Abeni } 956948d4f21SToshiaki Makita done++; 957948d4f21SToshiaki Makita } 958948d4f21SToshiaki Makita 95965e6dcf7SLorenzo Bianconi if (n_xdpf) 96065e6dcf7SLorenzo Bianconi veth_xdp_rcv_bulk_skb(rq, xdpf, n_xdpf, bq, stats); 96165e6dcf7SLorenzo Bianconi 9624195e54aSToshiaki Makita u64_stats_update_begin(&rq->stats.syncp); 9639152cff0SLorenzo Bianconi rq->stats.vs.xdp_redirect += stats->xdp_redirect; 9641c5b82e5SLorenzo Bianconi rq->stats.vs.xdp_bytes += stats->xdp_bytes; 96566fe4a07SLorenzo Bianconi rq->stats.vs.xdp_drops += stats->xdp_drops; 96666fe4a07SLorenzo Bianconi rq->stats.vs.rx_drops += stats->rx_drops; 96765780c56SLorenzo Bianconi rq->stats.vs.xdp_packets += done; 9684195e54aSToshiaki Makita u64_stats_update_end(&rq->stats.syncp); 9694195e54aSToshiaki Makita 970948d4f21SToshiaki Makita return done; 971948d4f21SToshiaki Makita } 972948d4f21SToshiaki Makita 973948d4f21SToshiaki Makita static int veth_poll(struct napi_struct *napi, int budget) 974948d4f21SToshiaki Makita { 975638264dcSToshiaki Makita struct veth_rq *rq = 976638264dcSToshiaki Makita container_of(napi, struct veth_rq, xdp_napi); 9771c5b82e5SLorenzo Bianconi struct veth_stats stats = {}; 9789cda7807SToshiaki Makita struct veth_xdp_tx_bq bq; 979948d4f21SToshiaki Makita int done; 980948d4f21SToshiaki Makita 9819cda7807SToshiaki Makita bq.count = 0; 9829cda7807SToshiaki Makita 983d1396004SToshiaki Makita xdp_set_return_frame_no_direct(); 9841c5b82e5SLorenzo Bianconi done = veth_xdp_rcv(rq, budget, &bq, &stats); 985948d4f21SToshiaki Makita 986fa349e39SShawn Bohrer if (stats.xdp_redirect > 0) 987fa349e39SShawn Bohrer xdp_do_flush(); 988fa349e39SShawn Bohrer 989948d4f21SToshiaki Makita if (done < budget && napi_complete_done(napi, done)) { 990948d4f21SToshiaki Makita /* Write rx_notify_masked before reading ptr_ring */ 991638264dcSToshiaki Makita smp_store_mb(rq->rx_notify_masked, false); 992638264dcSToshiaki Makita if (unlikely(!__ptr_ring_empty(&rq->xdp_ring))) { 99368468d8cSEric Dumazet if (napi_schedule_prep(&rq->xdp_napi)) { 99468468d8cSEric Dumazet WRITE_ONCE(rq->rx_notify_masked, true); 99568468d8cSEric Dumazet __napi_schedule(&rq->xdp_napi); 99668468d8cSEric Dumazet } 997948d4f21SToshiaki Makita } 998948d4f21SToshiaki Makita } 999948d4f21SToshiaki Makita 10001c5b82e5SLorenzo Bianconi if (stats.xdp_tx > 0) 1001bd32aa1fSLorenzo Bianconi veth_xdp_flush(rq, &bq); 1002d1396004SToshiaki Makita xdp_clear_return_frame_no_direct(); 1003d1396004SToshiaki Makita 1004948d4f21SToshiaki Makita return done; 1005948d4f21SToshiaki Makita } 1006948d4f21SToshiaki Makita 1007dedd53c5SPaolo Abeni static int __veth_napi_enable_range(struct net_device *dev, int start, int end) 1008948d4f21SToshiaki Makita { 1009948d4f21SToshiaki Makita struct veth_priv *priv = netdev_priv(dev); 1010638264dcSToshiaki Makita int err, i; 1011948d4f21SToshiaki Makita 1012dedd53c5SPaolo Abeni for (i = start; i < end; i++) { 1013638264dcSToshiaki Makita struct veth_rq *rq = &priv->rq[i]; 1014638264dcSToshiaki Makita 1015638264dcSToshiaki Makita err = ptr_ring_init(&rq->xdp_ring, VETH_RING_SIZE, GFP_KERNEL); 1016948d4f21SToshiaki Makita if (err) 1017638264dcSToshiaki Makita goto err_xdp_ring; 1018638264dcSToshiaki Makita } 1019948d4f21SToshiaki Makita 1020dedd53c5SPaolo Abeni for (i = start; i < end; i++) { 1021638264dcSToshiaki Makita struct veth_rq *rq = &priv->rq[i]; 1022638264dcSToshiaki Makita 1023638264dcSToshiaki Makita napi_enable(&rq->xdp_napi); 1024d3256efdSPaolo Abeni rcu_assign_pointer(priv->rq[i].napi, &priv->rq[i].xdp_napi); 1025638264dcSToshiaki Makita } 1026948d4f21SToshiaki Makita 1027948d4f21SToshiaki Makita return 0; 1028dedd53c5SPaolo Abeni 1029638264dcSToshiaki Makita err_xdp_ring: 1030dedd53c5SPaolo Abeni for (i--; i >= start; i--) 1031638264dcSToshiaki Makita ptr_ring_cleanup(&priv->rq[i].xdp_ring, veth_ptr_free); 1032638264dcSToshiaki Makita 1033638264dcSToshiaki Makita return err; 1034948d4f21SToshiaki Makita } 1035948d4f21SToshiaki Makita 1036dedd53c5SPaolo Abeni static int __veth_napi_enable(struct net_device *dev) 1037dedd53c5SPaolo Abeni { 1038dedd53c5SPaolo Abeni return __veth_napi_enable_range(dev, 0, dev->real_num_rx_queues); 1039dedd53c5SPaolo Abeni } 1040dedd53c5SPaolo Abeni 1041dedd53c5SPaolo Abeni static void veth_napi_del_range(struct net_device *dev, int start, int end) 1042948d4f21SToshiaki Makita { 1043948d4f21SToshiaki Makita struct veth_priv *priv = netdev_priv(dev); 1044638264dcSToshiaki Makita int i; 1045948d4f21SToshiaki Makita 1046dedd53c5SPaolo Abeni for (i = start; i < end; i++) { 1047638264dcSToshiaki Makita struct veth_rq *rq = &priv->rq[i]; 1048638264dcSToshiaki Makita 1049d3256efdSPaolo Abeni rcu_assign_pointer(priv->rq[i].napi, NULL); 1050638264dcSToshiaki Makita napi_disable(&rq->xdp_napi); 10515198d545SJakub Kicinski __netif_napi_del(&rq->xdp_napi); 1052638264dcSToshiaki Makita } 1053638264dcSToshiaki Makita synchronize_net(); 1054638264dcSToshiaki Makita 1055dedd53c5SPaolo Abeni for (i = start; i < end; i++) { 1056638264dcSToshiaki Makita struct veth_rq *rq = &priv->rq[i]; 1057638264dcSToshiaki Makita 1058638264dcSToshiaki Makita rq->rx_notify_masked = false; 1059638264dcSToshiaki Makita ptr_ring_cleanup(&rq->xdp_ring, veth_ptr_free); 1060638264dcSToshiaki Makita } 1061948d4f21SToshiaki Makita } 1062948d4f21SToshiaki Makita 1063dedd53c5SPaolo Abeni static void veth_napi_del(struct net_device *dev) 1064dedd53c5SPaolo Abeni { 1065dedd53c5SPaolo Abeni veth_napi_del_range(dev, 0, dev->real_num_rx_queues); 1066dedd53c5SPaolo Abeni } 1067dedd53c5SPaolo Abeni 1068d3256efdSPaolo Abeni static bool veth_gro_requested(const struct net_device *dev) 1069d3256efdSPaolo Abeni { 1070d3256efdSPaolo Abeni return !!(dev->wanted_features & NETIF_F_GRO); 1071d3256efdSPaolo Abeni } 1072d3256efdSPaolo Abeni 1073dedd53c5SPaolo Abeni static int veth_enable_xdp_range(struct net_device *dev, int start, int end, 1074dedd53c5SPaolo Abeni bool napi_already_on) 1075948d4f21SToshiaki Makita { 1076948d4f21SToshiaki Makita struct veth_priv *priv = netdev_priv(dev); 1077638264dcSToshiaki Makita int err, i; 1078948d4f21SToshiaki Makita 1079dedd53c5SPaolo Abeni for (i = start; i < end; i++) { 1080638264dcSToshiaki Makita struct veth_rq *rq = &priv->rq[i]; 1081948d4f21SToshiaki Makita 1082d3256efdSPaolo Abeni if (!napi_already_on) 1083b48b89f9SJakub Kicinski netif_napi_add(dev, &rq->xdp_napi, veth_poll); 1084b02e5a0eSBjörn Töpel err = xdp_rxq_info_reg(&rq->xdp_rxq, dev, i, rq->xdp_napi.napi_id); 1085948d4f21SToshiaki Makita if (err < 0) 1086638264dcSToshiaki Makita goto err_rxq_reg; 1087638264dcSToshiaki Makita 1088638264dcSToshiaki Makita err = xdp_rxq_info_reg_mem_model(&rq->xdp_rxq, 1089638264dcSToshiaki Makita MEM_TYPE_PAGE_SHARED, 1090638264dcSToshiaki Makita NULL); 1091638264dcSToshiaki Makita if (err < 0) 1092638264dcSToshiaki Makita goto err_reg_mem; 1093638264dcSToshiaki Makita 1094638264dcSToshiaki Makita /* Save original mem info as it can be overwritten */ 1095638264dcSToshiaki Makita rq->xdp_mem = rq->xdp_rxq.mem; 1096638264dcSToshiaki Makita } 1097dedd53c5SPaolo Abeni return 0; 1098dedd53c5SPaolo Abeni 1099dedd53c5SPaolo Abeni err_reg_mem: 1100dedd53c5SPaolo Abeni xdp_rxq_info_unreg(&priv->rq[i].xdp_rxq); 1101dedd53c5SPaolo Abeni err_rxq_reg: 1102dedd53c5SPaolo Abeni for (i--; i >= start; i--) { 1103dedd53c5SPaolo Abeni struct veth_rq *rq = &priv->rq[i]; 1104dedd53c5SPaolo Abeni 1105dedd53c5SPaolo Abeni xdp_rxq_info_unreg(&rq->xdp_rxq); 1106dedd53c5SPaolo Abeni if (!napi_already_on) 1107dedd53c5SPaolo Abeni netif_napi_del(&rq->xdp_napi); 1108dedd53c5SPaolo Abeni } 1109dedd53c5SPaolo Abeni 1110dedd53c5SPaolo Abeni return err; 1111dedd53c5SPaolo Abeni } 1112dedd53c5SPaolo Abeni 1113dedd53c5SPaolo Abeni static void veth_disable_xdp_range(struct net_device *dev, int start, int end, 1114dedd53c5SPaolo Abeni bool delete_napi) 1115dedd53c5SPaolo Abeni { 1116dedd53c5SPaolo Abeni struct veth_priv *priv = netdev_priv(dev); 1117dedd53c5SPaolo Abeni int i; 1118dedd53c5SPaolo Abeni 1119dedd53c5SPaolo Abeni for (i = start; i < end; i++) { 1120dedd53c5SPaolo Abeni struct veth_rq *rq = &priv->rq[i]; 1121dedd53c5SPaolo Abeni 1122dedd53c5SPaolo Abeni rq->xdp_rxq.mem = rq->xdp_mem; 1123dedd53c5SPaolo Abeni xdp_rxq_info_unreg(&rq->xdp_rxq); 1124dedd53c5SPaolo Abeni 1125dedd53c5SPaolo Abeni if (delete_napi) 1126dedd53c5SPaolo Abeni netif_napi_del(&rq->xdp_napi); 1127dedd53c5SPaolo Abeni } 1128dedd53c5SPaolo Abeni } 1129dedd53c5SPaolo Abeni 1130dedd53c5SPaolo Abeni static int veth_enable_xdp(struct net_device *dev) 1131dedd53c5SPaolo Abeni { 11325e8d3dc7SHeng Qi bool napi_already_on = veth_gro_requested(dev) && (dev->flags & IFF_UP); 1133dedd53c5SPaolo Abeni struct veth_priv *priv = netdev_priv(dev); 1134dedd53c5SPaolo Abeni int err, i; 1135dedd53c5SPaolo Abeni 1136dedd53c5SPaolo Abeni if (!xdp_rxq_info_is_reg(&priv->rq[0].xdp_rxq)) { 1137dedd53c5SPaolo Abeni err = veth_enable_xdp_range(dev, 0, dev->real_num_rx_queues, napi_already_on); 1138dedd53c5SPaolo Abeni if (err) 1139dedd53c5SPaolo Abeni return err; 1140948d4f21SToshiaki Makita 1141d3256efdSPaolo Abeni if (!napi_already_on) { 1142d3256efdSPaolo Abeni err = __veth_napi_enable(dev); 1143dedd53c5SPaolo Abeni if (err) { 1144dedd53c5SPaolo Abeni veth_disable_xdp_range(dev, 0, dev->real_num_rx_queues, true); 1145dedd53c5SPaolo Abeni return err; 1146dedd53c5SPaolo Abeni } 1147d3256efdSPaolo Abeni 1148d3256efdSPaolo Abeni if (!veth_gro_requested(dev)) { 1149d3256efdSPaolo Abeni /* user-space did not require GRO, but adding XDP 1150d3256efdSPaolo Abeni * is supposed to get GRO working 1151d3256efdSPaolo Abeni */ 1152d3256efdSPaolo Abeni dev->features |= NETIF_F_GRO; 1153d3256efdSPaolo Abeni netdev_features_change(dev); 1154d3256efdSPaolo Abeni } 1155d3256efdSPaolo Abeni } 1156948d4f21SToshiaki Makita } 1157948d4f21SToshiaki Makita 1158d3256efdSPaolo Abeni for (i = 0; i < dev->real_num_rx_queues; i++) { 1159638264dcSToshiaki Makita rcu_assign_pointer(priv->rq[i].xdp_prog, priv->_xdp_prog); 1160d3256efdSPaolo Abeni rcu_assign_pointer(priv->rq[i].napi, &priv->rq[i].xdp_napi); 1161d3256efdSPaolo Abeni } 1162948d4f21SToshiaki Makita 1163948d4f21SToshiaki Makita return 0; 1164948d4f21SToshiaki Makita } 1165948d4f21SToshiaki Makita 1166948d4f21SToshiaki Makita static void veth_disable_xdp(struct net_device *dev) 1167948d4f21SToshiaki Makita { 1168948d4f21SToshiaki Makita struct veth_priv *priv = netdev_priv(dev); 1169638264dcSToshiaki Makita int i; 1170948d4f21SToshiaki Makita 1171638264dcSToshiaki Makita for (i = 0; i < dev->real_num_rx_queues; i++) 1172638264dcSToshiaki Makita rcu_assign_pointer(priv->rq[i].xdp_prog, NULL); 1173d3256efdSPaolo Abeni 1174d3256efdSPaolo Abeni if (!netif_running(dev) || !veth_gro_requested(dev)) { 1175948d4f21SToshiaki Makita veth_napi_del(dev); 1176d3256efdSPaolo Abeni 1177d3256efdSPaolo Abeni /* if user-space did not require GRO, since adding XDP 1178d3256efdSPaolo Abeni * enabled it, clear it now 1179d3256efdSPaolo Abeni */ 1180d3256efdSPaolo Abeni if (!veth_gro_requested(dev) && netif_running(dev)) { 1181d3256efdSPaolo Abeni dev->features &= ~NETIF_F_GRO; 1182d3256efdSPaolo Abeni netdev_features_change(dev); 1183d3256efdSPaolo Abeni } 1184d3256efdSPaolo Abeni } 1185d3256efdSPaolo Abeni 1186dedd53c5SPaolo Abeni veth_disable_xdp_range(dev, 0, dev->real_num_rx_queues, false); 1187948d4f21SToshiaki Makita } 1188948d4f21SToshiaki Makita 1189dedd53c5SPaolo Abeni static int veth_napi_enable_range(struct net_device *dev, int start, int end) 1190d3256efdSPaolo Abeni { 1191d3256efdSPaolo Abeni struct veth_priv *priv = netdev_priv(dev); 1192d3256efdSPaolo Abeni int err, i; 1193d3256efdSPaolo Abeni 1194dedd53c5SPaolo Abeni for (i = start; i < end; i++) { 1195d3256efdSPaolo Abeni struct veth_rq *rq = &priv->rq[i]; 1196d3256efdSPaolo Abeni 1197b48b89f9SJakub Kicinski netif_napi_add(dev, &rq->xdp_napi, veth_poll); 1198d3256efdSPaolo Abeni } 1199d3256efdSPaolo Abeni 1200dedd53c5SPaolo Abeni err = __veth_napi_enable_range(dev, start, end); 1201d3256efdSPaolo Abeni if (err) { 1202dedd53c5SPaolo Abeni for (i = start; i < end; i++) { 1203d3256efdSPaolo Abeni struct veth_rq *rq = &priv->rq[i]; 1204d3256efdSPaolo Abeni 1205d3256efdSPaolo Abeni netif_napi_del(&rq->xdp_napi); 1206d3256efdSPaolo Abeni } 1207d3256efdSPaolo Abeni return err; 1208d3256efdSPaolo Abeni } 1209d3256efdSPaolo Abeni return err; 1210d3256efdSPaolo Abeni } 1211d3256efdSPaolo Abeni 1212dedd53c5SPaolo Abeni static int veth_napi_enable(struct net_device *dev) 1213dedd53c5SPaolo Abeni { 1214dedd53c5SPaolo Abeni return veth_napi_enable_range(dev, 0, dev->real_num_rx_queues); 1215dedd53c5SPaolo Abeni } 1216dedd53c5SPaolo Abeni 12174752eeb3SPaolo Abeni static void veth_disable_range_safe(struct net_device *dev, int start, int end) 12184752eeb3SPaolo Abeni { 12194752eeb3SPaolo Abeni struct veth_priv *priv = netdev_priv(dev); 12204752eeb3SPaolo Abeni 12214752eeb3SPaolo Abeni if (start >= end) 12224752eeb3SPaolo Abeni return; 12234752eeb3SPaolo Abeni 12244752eeb3SPaolo Abeni if (priv->_xdp_prog) { 12254752eeb3SPaolo Abeni veth_napi_del_range(dev, start, end); 12264752eeb3SPaolo Abeni veth_disable_xdp_range(dev, start, end, false); 12274752eeb3SPaolo Abeni } else if (veth_gro_requested(dev)) { 12284752eeb3SPaolo Abeni veth_napi_del_range(dev, start, end); 12294752eeb3SPaolo Abeni } 12304752eeb3SPaolo Abeni } 12314752eeb3SPaolo Abeni 12324752eeb3SPaolo Abeni static int veth_enable_range_safe(struct net_device *dev, int start, int end) 12334752eeb3SPaolo Abeni { 12344752eeb3SPaolo Abeni struct veth_priv *priv = netdev_priv(dev); 12354752eeb3SPaolo Abeni int err; 12364752eeb3SPaolo Abeni 12374752eeb3SPaolo Abeni if (start >= end) 12384752eeb3SPaolo Abeni return 0; 12394752eeb3SPaolo Abeni 12404752eeb3SPaolo Abeni if (priv->_xdp_prog) { 12414752eeb3SPaolo Abeni /* these channels are freshly initialized, napi is not on there even 12424752eeb3SPaolo Abeni * when GRO is requeste 12434752eeb3SPaolo Abeni */ 12444752eeb3SPaolo Abeni err = veth_enable_xdp_range(dev, start, end, false); 12454752eeb3SPaolo Abeni if (err) 12464752eeb3SPaolo Abeni return err; 12474752eeb3SPaolo Abeni 12484752eeb3SPaolo Abeni err = __veth_napi_enable_range(dev, start, end); 12494752eeb3SPaolo Abeni if (err) { 12504752eeb3SPaolo Abeni /* on error always delete the newly added napis */ 12514752eeb3SPaolo Abeni veth_disable_xdp_range(dev, start, end, true); 12524752eeb3SPaolo Abeni return err; 12534752eeb3SPaolo Abeni } 12544752eeb3SPaolo Abeni } else if (veth_gro_requested(dev)) { 12554752eeb3SPaolo Abeni return veth_napi_enable_range(dev, start, end); 12564752eeb3SPaolo Abeni } 12574752eeb3SPaolo Abeni return 0; 12584752eeb3SPaolo Abeni } 12594752eeb3SPaolo Abeni 1260*fccca038SLorenzo Bianconi static void veth_set_xdp_features(struct net_device *dev) 1261*fccca038SLorenzo Bianconi { 1262*fccca038SLorenzo Bianconi struct veth_priv *priv = netdev_priv(dev); 1263*fccca038SLorenzo Bianconi struct net_device *peer; 1264*fccca038SLorenzo Bianconi 1265*fccca038SLorenzo Bianconi peer = rcu_dereference(priv->peer); 1266*fccca038SLorenzo Bianconi if (peer && peer->real_num_tx_queues <= dev->real_num_rx_queues) { 1267*fccca038SLorenzo Bianconi xdp_features_t val = NETDEV_XDP_ACT_BASIC | 1268*fccca038SLorenzo Bianconi NETDEV_XDP_ACT_REDIRECT | 1269*fccca038SLorenzo Bianconi NETDEV_XDP_ACT_RX_SG; 1270*fccca038SLorenzo Bianconi 1271*fccca038SLorenzo Bianconi if (priv->_xdp_prog || veth_gro_requested(dev)) 1272*fccca038SLorenzo Bianconi val |= NETDEV_XDP_ACT_NDO_XMIT | 1273*fccca038SLorenzo Bianconi NETDEV_XDP_ACT_NDO_XMIT_SG; 1274*fccca038SLorenzo Bianconi xdp_set_features_flag(dev, val); 1275*fccca038SLorenzo Bianconi } else { 1276*fccca038SLorenzo Bianconi xdp_clear_features_flag(dev); 1277*fccca038SLorenzo Bianconi } 1278*fccca038SLorenzo Bianconi } 1279*fccca038SLorenzo Bianconi 12804752eeb3SPaolo Abeni static int veth_set_channels(struct net_device *dev, 12814752eeb3SPaolo Abeni struct ethtool_channels *ch) 12824752eeb3SPaolo Abeni { 12834752eeb3SPaolo Abeni struct veth_priv *priv = netdev_priv(dev); 12844752eeb3SPaolo Abeni unsigned int old_rx_count, new_rx_count; 12854752eeb3SPaolo Abeni struct veth_priv *peer_priv; 12864752eeb3SPaolo Abeni struct net_device *peer; 12874752eeb3SPaolo Abeni int err; 12884752eeb3SPaolo Abeni 12894752eeb3SPaolo Abeni /* sanity check. Upper bounds are already enforced by the caller */ 12904752eeb3SPaolo Abeni if (!ch->rx_count || !ch->tx_count) 12914752eeb3SPaolo Abeni return -EINVAL; 12924752eeb3SPaolo Abeni 12934752eeb3SPaolo Abeni /* avoid braking XDP, if that is enabled */ 12944752eeb3SPaolo Abeni peer = rtnl_dereference(priv->peer); 12954752eeb3SPaolo Abeni peer_priv = peer ? netdev_priv(peer) : NULL; 12964752eeb3SPaolo Abeni if (priv->_xdp_prog && peer && ch->rx_count < peer->real_num_tx_queues) 12974752eeb3SPaolo Abeni return -EINVAL; 12984752eeb3SPaolo Abeni 12994752eeb3SPaolo Abeni if (peer && peer_priv && peer_priv->_xdp_prog && ch->tx_count > peer->real_num_rx_queues) 13004752eeb3SPaolo Abeni return -EINVAL; 13014752eeb3SPaolo Abeni 13024752eeb3SPaolo Abeni old_rx_count = dev->real_num_rx_queues; 13034752eeb3SPaolo Abeni new_rx_count = ch->rx_count; 13044752eeb3SPaolo Abeni if (netif_running(dev)) { 13054752eeb3SPaolo Abeni /* turn device off */ 13064752eeb3SPaolo Abeni netif_carrier_off(dev); 13074752eeb3SPaolo Abeni if (peer) 13084752eeb3SPaolo Abeni netif_carrier_off(peer); 13094752eeb3SPaolo Abeni 13104752eeb3SPaolo Abeni /* try to allocate new resurces, as needed*/ 13114752eeb3SPaolo Abeni err = veth_enable_range_safe(dev, old_rx_count, new_rx_count); 13124752eeb3SPaolo Abeni if (err) 13134752eeb3SPaolo Abeni goto out; 13144752eeb3SPaolo Abeni } 13154752eeb3SPaolo Abeni 13164752eeb3SPaolo Abeni err = netif_set_real_num_rx_queues(dev, ch->rx_count); 13174752eeb3SPaolo Abeni if (err) 13184752eeb3SPaolo Abeni goto revert; 13194752eeb3SPaolo Abeni 13204752eeb3SPaolo Abeni err = netif_set_real_num_tx_queues(dev, ch->tx_count); 13214752eeb3SPaolo Abeni if (err) { 13224752eeb3SPaolo Abeni int err2 = netif_set_real_num_rx_queues(dev, old_rx_count); 13234752eeb3SPaolo Abeni 13244752eeb3SPaolo Abeni /* this error condition could happen only if rx and tx change 13254752eeb3SPaolo Abeni * in opposite directions (e.g. tx nr raises, rx nr decreases) 13264752eeb3SPaolo Abeni * and we can't do anything to fully restore the original 13274752eeb3SPaolo Abeni * status 13284752eeb3SPaolo Abeni */ 13294752eeb3SPaolo Abeni if (err2) 13304752eeb3SPaolo Abeni pr_warn("Can't restore rx queues config %d -> %d %d", 13314752eeb3SPaolo Abeni new_rx_count, old_rx_count, err2); 13324752eeb3SPaolo Abeni else 13334752eeb3SPaolo Abeni goto revert; 13344752eeb3SPaolo Abeni } 13354752eeb3SPaolo Abeni 13364752eeb3SPaolo Abeni out: 13374752eeb3SPaolo Abeni if (netif_running(dev)) { 13384752eeb3SPaolo Abeni /* note that we need to swap the arguments WRT the enable part 13394752eeb3SPaolo Abeni * to identify the range we have to disable 13404752eeb3SPaolo Abeni */ 13414752eeb3SPaolo Abeni veth_disable_range_safe(dev, new_rx_count, old_rx_count); 13424752eeb3SPaolo Abeni netif_carrier_on(dev); 13434752eeb3SPaolo Abeni if (peer) 13444752eeb3SPaolo Abeni netif_carrier_on(peer); 13454752eeb3SPaolo Abeni } 1346*fccca038SLorenzo Bianconi 1347*fccca038SLorenzo Bianconi /* update XDP supported features */ 1348*fccca038SLorenzo Bianconi veth_set_xdp_features(dev); 1349*fccca038SLorenzo Bianconi if (peer) 1350*fccca038SLorenzo Bianconi veth_set_xdp_features(peer); 1351*fccca038SLorenzo Bianconi 13524752eeb3SPaolo Abeni return err; 13534752eeb3SPaolo Abeni 13544752eeb3SPaolo Abeni revert: 13554752eeb3SPaolo Abeni new_rx_count = old_rx_count; 13564752eeb3SPaolo Abeni old_rx_count = ch->rx_count; 13574752eeb3SPaolo Abeni goto out; 13584752eeb3SPaolo Abeni } 13594752eeb3SPaolo Abeni 1360e314dbdcSPavel Emelyanov static int veth_open(struct net_device *dev) 1361e314dbdcSPavel Emelyanov { 13625e8d3dc7SHeng Qi struct veth_priv *priv = netdev_priv(dev); 1363d0e2c55eSEric Dumazet struct net_device *peer = rtnl_dereference(priv->peer); 1364948d4f21SToshiaki Makita int err; 1365e314dbdcSPavel Emelyanov 1366d0e2c55eSEric Dumazet if (!peer) 1367e314dbdcSPavel Emelyanov return -ENOTCONN; 1368e314dbdcSPavel Emelyanov 1369948d4f21SToshiaki Makita if (priv->_xdp_prog) { 1370948d4f21SToshiaki Makita err = veth_enable_xdp(dev); 1371948d4f21SToshiaki Makita if (err) 1372948d4f21SToshiaki Makita return err; 13735e8d3dc7SHeng Qi } else if (veth_gro_requested(dev)) { 1374d3256efdSPaolo Abeni err = veth_napi_enable(dev); 1375d3256efdSPaolo Abeni if (err) 1376d3256efdSPaolo Abeni return err; 1377948d4f21SToshiaki Makita } 1378948d4f21SToshiaki Makita 1379d0e2c55eSEric Dumazet if (peer->flags & IFF_UP) { 1380e314dbdcSPavel Emelyanov netif_carrier_on(dev); 1381d0e2c55eSEric Dumazet netif_carrier_on(peer); 1382e314dbdcSPavel Emelyanov } 1383948d4f21SToshiaki Makita 1384e314dbdcSPavel Emelyanov return 0; 1385e314dbdcSPavel Emelyanov } 1386e314dbdcSPavel Emelyanov 13872cf48a10SEric W. Biederman static int veth_close(struct net_device *dev) 13882cf48a10SEric W. Biederman { 13895e8d3dc7SHeng Qi struct veth_priv *priv = netdev_priv(dev); 13902efd32eeSEric Dumazet struct net_device *peer = rtnl_dereference(priv->peer); 13912cf48a10SEric W. Biederman 13922cf48a10SEric W. Biederman netif_carrier_off(dev); 13932efd32eeSEric Dumazet if (peer) 13942efd32eeSEric Dumazet netif_carrier_off(peer); 13952cf48a10SEric W. Biederman 13965e8d3dc7SHeng Qi if (priv->_xdp_prog) 13975e8d3dc7SHeng Qi veth_disable_xdp(dev); 13985e8d3dc7SHeng Qi else if (veth_gro_requested(dev)) 13995e8d3dc7SHeng Qi veth_napi_del(dev); 14005e8d3dc7SHeng Qi 14012cf48a10SEric W. Biederman return 0; 14022cf48a10SEric W. Biederman } 14032cf48a10SEric W. Biederman 140491572088SJarod Wilson static int is_valid_veth_mtu(int mtu) 140538d40815SEric Biederman { 140691572088SJarod Wilson return mtu >= ETH_MIN_MTU && mtu <= ETH_MAX_MTU; 140738d40815SEric Biederman } 140838d40815SEric Biederman 14097797b93bSToshiaki Makita static int veth_alloc_queues(struct net_device *dev) 14107797b93bSToshiaki Makita { 14117797b93bSToshiaki Makita struct veth_priv *priv = netdev_priv(dev); 14127797b93bSToshiaki Makita int i; 14137797b93bSToshiaki Makita 1414961c6136SVasily Averin priv->rq = kcalloc(dev->num_rx_queues, sizeof(*priv->rq), GFP_KERNEL_ACCOUNT); 14157797b93bSToshiaki Makita if (!priv->rq) 14167797b93bSToshiaki Makita return -ENOMEM; 14177797b93bSToshiaki Makita 14184195e54aSToshiaki Makita for (i = 0; i < dev->num_rx_queues; i++) { 14197797b93bSToshiaki Makita priv->rq[i].dev = dev; 14204195e54aSToshiaki Makita u64_stats_init(&priv->rq[i].stats.syncp); 14214195e54aSToshiaki Makita } 14227797b93bSToshiaki Makita 14237797b93bSToshiaki Makita return 0; 14247797b93bSToshiaki Makita } 14257797b93bSToshiaki Makita 14267797b93bSToshiaki Makita static void veth_free_queues(struct net_device *dev) 14277797b93bSToshiaki Makita { 14287797b93bSToshiaki Makita struct veth_priv *priv = netdev_priv(dev); 14297797b93bSToshiaki Makita 14307797b93bSToshiaki Makita kfree(priv->rq); 14317797b93bSToshiaki Makita } 14327797b93bSToshiaki Makita 1433e314dbdcSPavel Emelyanov static int veth_dev_init(struct net_device *dev) 1434e314dbdcSPavel Emelyanov { 14357797b93bSToshiaki Makita int err; 14367797b93bSToshiaki Makita 143714d73416SLi RongQing dev->lstats = netdev_alloc_pcpu_stats(struct pcpu_lstats); 143814d73416SLi RongQing if (!dev->lstats) 1439e314dbdcSPavel Emelyanov return -ENOMEM; 14407797b93bSToshiaki Makita 14417797b93bSToshiaki Makita err = veth_alloc_queues(dev); 14427797b93bSToshiaki Makita if (err) { 144314d73416SLi RongQing free_percpu(dev->lstats); 14447797b93bSToshiaki Makita return err; 14457797b93bSToshiaki Makita } 14467797b93bSToshiaki Makita 1447e314dbdcSPavel Emelyanov return 0; 1448e314dbdcSPavel Emelyanov } 1449e314dbdcSPavel Emelyanov 145011687a10SDavid S. Miller static void veth_dev_free(struct net_device *dev) 145111687a10SDavid S. Miller { 14527797b93bSToshiaki Makita veth_free_queues(dev); 145314d73416SLi RongQing free_percpu(dev->lstats); 145411687a10SDavid S. Miller } 145511687a10SDavid S. Miller 1456bb446c19SWANG Cong #ifdef CONFIG_NET_POLL_CONTROLLER 1457bb446c19SWANG Cong static void veth_poll_controller(struct net_device *dev) 1458bb446c19SWANG Cong { 1459bb446c19SWANG Cong /* veth only receives frames when its peer sends one 1460948d4f21SToshiaki Makita * Since it has nothing to do with disabling irqs, we are guaranteed 1461bb446c19SWANG Cong * never to have pending data when we poll for it so 1462bb446c19SWANG Cong * there is nothing to do here. 1463bb446c19SWANG Cong * 1464bb446c19SWANG Cong * We need this though so netpoll recognizes us as an interface that 1465bb446c19SWANG Cong * supports polling, which enables bridge devices in virt setups to 1466bb446c19SWANG Cong * still use netconsole 1467bb446c19SWANG Cong */ 1468bb446c19SWANG Cong } 1469bb446c19SWANG Cong #endif /* CONFIG_NET_POLL_CONTROLLER */ 1470bb446c19SWANG Cong 1471a45253bfSNicolas Dichtel static int veth_get_iflink(const struct net_device *dev) 1472a45253bfSNicolas Dichtel { 1473a45253bfSNicolas Dichtel struct veth_priv *priv = netdev_priv(dev); 1474a45253bfSNicolas Dichtel struct net_device *peer; 1475a45253bfSNicolas Dichtel int iflink; 1476a45253bfSNicolas Dichtel 1477a45253bfSNicolas Dichtel rcu_read_lock(); 1478a45253bfSNicolas Dichtel peer = rcu_dereference(priv->peer); 1479a45253bfSNicolas Dichtel iflink = peer ? peer->ifindex : 0; 1480a45253bfSNicolas Dichtel rcu_read_unlock(); 1481a45253bfSNicolas Dichtel 1482a45253bfSNicolas Dichtel return iflink; 1483a45253bfSNicolas Dichtel } 1484a45253bfSNicolas Dichtel 1485dc224822SToshiaki Makita static netdev_features_t veth_fix_features(struct net_device *dev, 1486dc224822SToshiaki Makita netdev_features_t features) 1487dc224822SToshiaki Makita { 1488dc224822SToshiaki Makita struct veth_priv *priv = netdev_priv(dev); 1489dc224822SToshiaki Makita struct net_device *peer; 1490dc224822SToshiaki Makita 1491dc224822SToshiaki Makita peer = rtnl_dereference(priv->peer); 1492dc224822SToshiaki Makita if (peer) { 1493dc224822SToshiaki Makita struct veth_priv *peer_priv = netdev_priv(peer); 1494dc224822SToshiaki Makita 1495dc224822SToshiaki Makita if (peer_priv->_xdp_prog) 1496dc224822SToshiaki Makita features &= ~NETIF_F_GSO_SOFTWARE; 1497dc224822SToshiaki Makita } 1498d3256efdSPaolo Abeni if (priv->_xdp_prog) 1499d3256efdSPaolo Abeni features |= NETIF_F_GRO; 1500dc224822SToshiaki Makita 1501dc224822SToshiaki Makita return features; 1502dc224822SToshiaki Makita } 1503dc224822SToshiaki Makita 1504d3256efdSPaolo Abeni static int veth_set_features(struct net_device *dev, 1505d3256efdSPaolo Abeni netdev_features_t features) 1506d3256efdSPaolo Abeni { 1507d3256efdSPaolo Abeni netdev_features_t changed = features ^ dev->features; 1508d3256efdSPaolo Abeni struct veth_priv *priv = netdev_priv(dev); 1509d3256efdSPaolo Abeni int err; 1510d3256efdSPaolo Abeni 1511d3256efdSPaolo Abeni if (!(changed & NETIF_F_GRO) || !(dev->flags & IFF_UP) || priv->_xdp_prog) 1512d3256efdSPaolo Abeni return 0; 1513d3256efdSPaolo Abeni 1514d3256efdSPaolo Abeni if (features & NETIF_F_GRO) { 1515d3256efdSPaolo Abeni err = veth_napi_enable(dev); 1516d3256efdSPaolo Abeni if (err) 1517d3256efdSPaolo Abeni return err; 1518*fccca038SLorenzo Bianconi 1519*fccca038SLorenzo Bianconi xdp_features_set_redirect_target(dev, true); 1520d3256efdSPaolo Abeni } else { 1521*fccca038SLorenzo Bianconi xdp_features_clear_redirect_target(dev); 1522d3256efdSPaolo Abeni veth_napi_del(dev); 1523d3256efdSPaolo Abeni } 1524d3256efdSPaolo Abeni return 0; 1525d3256efdSPaolo Abeni } 1526d3256efdSPaolo Abeni 1527163e5292SPaolo Abeni static void veth_set_rx_headroom(struct net_device *dev, int new_hr) 1528163e5292SPaolo Abeni { 1529163e5292SPaolo Abeni struct veth_priv *peer_priv, *priv = netdev_priv(dev); 1530163e5292SPaolo Abeni struct net_device *peer; 1531163e5292SPaolo Abeni 1532163e5292SPaolo Abeni if (new_hr < 0) 1533163e5292SPaolo Abeni new_hr = 0; 1534163e5292SPaolo Abeni 1535163e5292SPaolo Abeni rcu_read_lock(); 1536163e5292SPaolo Abeni peer = rcu_dereference(priv->peer); 1537163e5292SPaolo Abeni if (unlikely(!peer)) 1538163e5292SPaolo Abeni goto out; 1539163e5292SPaolo Abeni 1540163e5292SPaolo Abeni peer_priv = netdev_priv(peer); 1541163e5292SPaolo Abeni priv->requested_headroom = new_hr; 1542163e5292SPaolo Abeni new_hr = max(priv->requested_headroom, peer_priv->requested_headroom); 1543163e5292SPaolo Abeni dev->needed_headroom = new_hr; 1544163e5292SPaolo Abeni peer->needed_headroom = new_hr; 1545163e5292SPaolo Abeni 1546163e5292SPaolo Abeni out: 1547163e5292SPaolo Abeni rcu_read_unlock(); 1548163e5292SPaolo Abeni } 1549163e5292SPaolo Abeni 1550948d4f21SToshiaki Makita static int veth_xdp_set(struct net_device *dev, struct bpf_prog *prog, 1551948d4f21SToshiaki Makita struct netlink_ext_ack *extack) 1552948d4f21SToshiaki Makita { 1553948d4f21SToshiaki Makita struct veth_priv *priv = netdev_priv(dev); 1554948d4f21SToshiaki Makita struct bpf_prog *old_prog; 1555948d4f21SToshiaki Makita struct net_device *peer; 1556dc224822SToshiaki Makita unsigned int max_mtu; 1557948d4f21SToshiaki Makita int err; 1558948d4f21SToshiaki Makita 1559948d4f21SToshiaki Makita old_prog = priv->_xdp_prog; 1560948d4f21SToshiaki Makita priv->_xdp_prog = prog; 1561948d4f21SToshiaki Makita peer = rtnl_dereference(priv->peer); 1562948d4f21SToshiaki Makita 1563948d4f21SToshiaki Makita if (prog) { 1564948d4f21SToshiaki Makita if (!peer) { 1565948d4f21SToshiaki Makita NL_SET_ERR_MSG_MOD(extack, "Cannot set XDP when peer is detached"); 1566948d4f21SToshiaki Makita err = -ENOTCONN; 1567948d4f21SToshiaki Makita goto err; 1568948d4f21SToshiaki Makita } 1569948d4f21SToshiaki Makita 15707cda76d8SLorenzo Bianconi max_mtu = SKB_WITH_OVERHEAD(PAGE_SIZE - VETH_XDP_HEADROOM) - 15717cda76d8SLorenzo Bianconi peer->hard_header_len; 15727cda76d8SLorenzo Bianconi /* Allow increasing the max_mtu if the program supports 15737cda76d8SLorenzo Bianconi * XDP fragments. 15747cda76d8SLorenzo Bianconi */ 15757cda76d8SLorenzo Bianconi if (prog->aux->xdp_has_frags) 15767cda76d8SLorenzo Bianconi max_mtu += PAGE_SIZE * MAX_SKB_FRAGS; 15777cda76d8SLorenzo Bianconi 1578dc224822SToshiaki Makita if (peer->mtu > max_mtu) { 1579dc224822SToshiaki Makita NL_SET_ERR_MSG_MOD(extack, "Peer MTU is too large to set XDP"); 1580dc224822SToshiaki Makita err = -ERANGE; 1581dc224822SToshiaki Makita goto err; 1582dc224822SToshiaki Makita } 1583dc224822SToshiaki Makita 1584638264dcSToshiaki Makita if (dev->real_num_rx_queues < peer->real_num_tx_queues) { 1585638264dcSToshiaki Makita NL_SET_ERR_MSG_MOD(extack, "XDP expects number of rx queues not less than peer tx queues"); 1586638264dcSToshiaki Makita err = -ENOSPC; 1587638264dcSToshiaki Makita goto err; 1588638264dcSToshiaki Makita } 1589638264dcSToshiaki Makita 1590948d4f21SToshiaki Makita if (dev->flags & IFF_UP) { 1591948d4f21SToshiaki Makita err = veth_enable_xdp(dev); 1592948d4f21SToshiaki Makita if (err) { 1593948d4f21SToshiaki Makita NL_SET_ERR_MSG_MOD(extack, "Setup for XDP failed"); 1594948d4f21SToshiaki Makita goto err; 1595948d4f21SToshiaki Makita } 1596948d4f21SToshiaki Makita } 1597dc224822SToshiaki Makita 1598dc224822SToshiaki Makita if (!old_prog) { 1599dc224822SToshiaki Makita peer->hw_features &= ~NETIF_F_GSO_SOFTWARE; 1600dc224822SToshiaki Makita peer->max_mtu = max_mtu; 1601dc224822SToshiaki Makita } 1602*fccca038SLorenzo Bianconi 1603*fccca038SLorenzo Bianconi xdp_features_set_redirect_target(dev, true); 1604948d4f21SToshiaki Makita } 1605948d4f21SToshiaki Makita 1606948d4f21SToshiaki Makita if (old_prog) { 1607dc224822SToshiaki Makita if (!prog) { 1608*fccca038SLorenzo Bianconi if (!veth_gro_requested(dev)) 1609*fccca038SLorenzo Bianconi xdp_features_clear_redirect_target(dev); 1610*fccca038SLorenzo Bianconi 1611dc224822SToshiaki Makita if (dev->flags & IFF_UP) 1612948d4f21SToshiaki Makita veth_disable_xdp(dev); 1613dc224822SToshiaki Makita 1614dc224822SToshiaki Makita if (peer) { 1615dc224822SToshiaki Makita peer->hw_features |= NETIF_F_GSO_SOFTWARE; 1616dc224822SToshiaki Makita peer->max_mtu = ETH_MAX_MTU; 1617dc224822SToshiaki Makita } 1618dc224822SToshiaki Makita } 1619948d4f21SToshiaki Makita bpf_prog_put(old_prog); 1620948d4f21SToshiaki Makita } 1621948d4f21SToshiaki Makita 1622dc224822SToshiaki Makita if ((!!old_prog ^ !!prog) && peer) 1623dc224822SToshiaki Makita netdev_update_features(peer); 1624dc224822SToshiaki Makita 1625948d4f21SToshiaki Makita return 0; 1626948d4f21SToshiaki Makita err: 1627948d4f21SToshiaki Makita priv->_xdp_prog = old_prog; 1628948d4f21SToshiaki Makita 1629948d4f21SToshiaki Makita return err; 1630948d4f21SToshiaki Makita } 1631948d4f21SToshiaki Makita 1632948d4f21SToshiaki Makita static int veth_xdp(struct net_device *dev, struct netdev_bpf *xdp) 1633948d4f21SToshiaki Makita { 1634948d4f21SToshiaki Makita switch (xdp->command) { 1635948d4f21SToshiaki Makita case XDP_SETUP_PROG: 1636948d4f21SToshiaki Makita return veth_xdp_set(dev, xdp->prog, xdp->extack); 1637948d4f21SToshiaki Makita default: 1638948d4f21SToshiaki Makita return -EINVAL; 1639948d4f21SToshiaki Makita } 1640948d4f21SToshiaki Makita } 1641948d4f21SToshiaki Makita 1642306531f0SStanislav Fomichev static int veth_xdp_rx_timestamp(const struct xdp_md *ctx, u64 *timestamp) 1643306531f0SStanislav Fomichev { 1644306531f0SStanislav Fomichev struct veth_xdp_buff *_ctx = (void *)ctx; 1645306531f0SStanislav Fomichev 1646306531f0SStanislav Fomichev if (!_ctx->skb) 1647306531f0SStanislav Fomichev return -EOPNOTSUPP; 1648306531f0SStanislav Fomichev 1649306531f0SStanislav Fomichev *timestamp = skb_hwtstamps(_ctx->skb)->hwtstamp; 1650306531f0SStanislav Fomichev return 0; 1651306531f0SStanislav Fomichev } 1652306531f0SStanislav Fomichev 1653306531f0SStanislav Fomichev static int veth_xdp_rx_hash(const struct xdp_md *ctx, u32 *hash) 1654306531f0SStanislav Fomichev { 1655306531f0SStanislav Fomichev struct veth_xdp_buff *_ctx = (void *)ctx; 1656306531f0SStanislav Fomichev 1657306531f0SStanislav Fomichev if (!_ctx->skb) 1658306531f0SStanislav Fomichev return -EOPNOTSUPP; 1659306531f0SStanislav Fomichev 1660306531f0SStanislav Fomichev *hash = skb_get_hash(_ctx->skb); 1661306531f0SStanislav Fomichev return 0; 1662306531f0SStanislav Fomichev } 1663306531f0SStanislav Fomichev 16644456e7bdSStephen Hemminger static const struct net_device_ops veth_netdev_ops = { 16654456e7bdSStephen Hemminger .ndo_init = veth_dev_init, 16664456e7bdSStephen Hemminger .ndo_open = veth_open, 16672cf48a10SEric W. Biederman .ndo_stop = veth_close, 166800829823SStephen Hemminger .ndo_start_xmit = veth_xmit, 16696311cc44Sstephen hemminger .ndo_get_stats64 = veth_get_stats64, 16705c70ef85SGao feng .ndo_set_rx_mode = veth_set_multicast_list, 1671ee923623SDaniel Lezcano .ndo_set_mac_address = eth_mac_addr, 1672bb446c19SWANG Cong #ifdef CONFIG_NET_POLL_CONTROLLER 1673bb446c19SWANG Cong .ndo_poll_controller = veth_poll_controller, 1674bb446c19SWANG Cong #endif 1675a45253bfSNicolas Dichtel .ndo_get_iflink = veth_get_iflink, 1676dc224822SToshiaki Makita .ndo_fix_features = veth_fix_features, 1677d3256efdSPaolo Abeni .ndo_set_features = veth_set_features, 16781a04a821SToshiaki Makita .ndo_features_check = passthru_features_check, 1679163e5292SPaolo Abeni .ndo_set_rx_headroom = veth_set_rx_headroom, 1680948d4f21SToshiaki Makita .ndo_bpf = veth_xdp, 16819152cff0SLorenzo Bianconi .ndo_xdp_xmit = veth_ndo_xdp_xmit, 16829aa1206eSDaniel Borkmann .ndo_get_peer_dev = veth_peer_dev, 16834456e7bdSStephen Hemminger }; 16844456e7bdSStephen Hemminger 1685306531f0SStanislav Fomichev static const struct xdp_metadata_ops veth_xdp_metadata_ops = { 1686306531f0SStanislav Fomichev .xmo_rx_timestamp = veth_xdp_rx_timestamp, 1687306531f0SStanislav Fomichev .xmo_rx_hash = veth_xdp_rx_hash, 1688306531f0SStanislav Fomichev }; 1689306531f0SStanislav Fomichev 1690732912d7SAlexander Duyck #define VETH_FEATURES (NETIF_F_SG | NETIF_F_FRAGLIST | NETIF_F_HW_CSUM | \ 1691c80fafbbSXin Long NETIF_F_RXCSUM | NETIF_F_SCTP_CRC | NETIF_F_HIGHDMA | \ 1692732912d7SAlexander Duyck NETIF_F_GSO_SOFTWARE | NETIF_F_GSO_ENCAP_ALL | \ 169328d2b136SPatrick McHardy NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_CTAG_RX | \ 169428d2b136SPatrick McHardy NETIF_F_HW_VLAN_STAG_TX | NETIF_F_HW_VLAN_STAG_RX ) 16958093315aSEric Dumazet 1696e314dbdcSPavel Emelyanov static void veth_setup(struct net_device *dev) 1697e314dbdcSPavel Emelyanov { 1698e314dbdcSPavel Emelyanov ether_setup(dev); 1699e314dbdcSPavel Emelyanov 1700550fd08cSNeil Horman dev->priv_flags &= ~IFF_TX_SKB_SHARING; 170123ea5a96SHannes Frederic Sowa dev->priv_flags |= IFF_LIVE_ADDR_CHANGE; 170202f01ec1SPhil Sutter dev->priv_flags |= IFF_NO_QUEUE; 1703163e5292SPaolo Abeni dev->priv_flags |= IFF_PHONY_HEADROOM; 1704550fd08cSNeil Horman 17054456e7bdSStephen Hemminger dev->netdev_ops = &veth_netdev_ops; 1706306531f0SStanislav Fomichev dev->xdp_metadata_ops = &veth_xdp_metadata_ops; 1707e314dbdcSPavel Emelyanov dev->ethtool_ops = &veth_ethtool_ops; 1708e314dbdcSPavel Emelyanov dev->features |= NETIF_F_LLTX; 17098093315aSEric Dumazet dev->features |= VETH_FEATURES; 17108d0d21f4SToshiaki Makita dev->vlan_features = dev->features & 17113f8c707bSVlad Yasevich ~(NETIF_F_HW_VLAN_CTAG_TX | 17123f8c707bSVlad Yasevich NETIF_F_HW_VLAN_STAG_TX | 17133f8c707bSVlad Yasevich NETIF_F_HW_VLAN_CTAG_RX | 17143f8c707bSVlad Yasevich NETIF_F_HW_VLAN_STAG_RX); 1715cf124db5SDavid S. Miller dev->needs_free_netdev = true; 1716cf124db5SDavid S. Miller dev->priv_destructor = veth_dev_free; 171791572088SJarod Wilson dev->max_mtu = ETH_MAX_MTU; 1718a2c725faSMichał Mirosław 17198093315aSEric Dumazet dev->hw_features = VETH_FEATURES; 172082d81898SEric Dumazet dev->hw_enc_features = VETH_FEATURES; 1721607fca9aSDavid Ahern dev->mpls_features = NETIF_F_HW_CSUM | NETIF_F_GSO_SOFTWARE; 1722d406099dSEric Dumazet netif_set_tso_max_size(dev, GSO_MAX_SIZE); 1723e314dbdcSPavel Emelyanov } 1724e314dbdcSPavel Emelyanov 1725e314dbdcSPavel Emelyanov /* 1726e314dbdcSPavel Emelyanov * netlink interface 1727e314dbdcSPavel Emelyanov */ 1728e314dbdcSPavel Emelyanov 1729a8b8a889SMatthias Schiffer static int veth_validate(struct nlattr *tb[], struct nlattr *data[], 1730a8b8a889SMatthias Schiffer struct netlink_ext_ack *extack) 1731e314dbdcSPavel Emelyanov { 1732e314dbdcSPavel Emelyanov if (tb[IFLA_ADDRESS]) { 1733e314dbdcSPavel Emelyanov if (nla_len(tb[IFLA_ADDRESS]) != ETH_ALEN) 1734e314dbdcSPavel Emelyanov return -EINVAL; 1735e314dbdcSPavel Emelyanov if (!is_valid_ether_addr(nla_data(tb[IFLA_ADDRESS]))) 1736e314dbdcSPavel Emelyanov return -EADDRNOTAVAIL; 1737e314dbdcSPavel Emelyanov } 173838d40815SEric Biederman if (tb[IFLA_MTU]) { 173938d40815SEric Biederman if (!is_valid_veth_mtu(nla_get_u32(tb[IFLA_MTU]))) 174038d40815SEric Biederman return -EINVAL; 174138d40815SEric Biederman } 1742e314dbdcSPavel Emelyanov return 0; 1743e314dbdcSPavel Emelyanov } 1744e314dbdcSPavel Emelyanov 1745e314dbdcSPavel Emelyanov static struct rtnl_link_ops veth_link_ops; 1746e314dbdcSPavel Emelyanov 1747d3256efdSPaolo Abeni static void veth_disable_gro(struct net_device *dev) 1748d3256efdSPaolo Abeni { 1749d3256efdSPaolo Abeni dev->features &= ~NETIF_F_GRO; 1750d3256efdSPaolo Abeni dev->wanted_features &= ~NETIF_F_GRO; 1751d3256efdSPaolo Abeni netdev_update_features(dev); 1752d3256efdSPaolo Abeni } 1753d3256efdSPaolo Abeni 17549d3684c2SPaolo Abeni static int veth_init_queues(struct net_device *dev, struct nlattr *tb[]) 17559d3684c2SPaolo Abeni { 17569d3684c2SPaolo Abeni int err; 17579d3684c2SPaolo Abeni 17589d3684c2SPaolo Abeni if (!tb[IFLA_NUM_TX_QUEUES] && dev->num_tx_queues > 1) { 17599d3684c2SPaolo Abeni err = netif_set_real_num_tx_queues(dev, 1); 17609d3684c2SPaolo Abeni if (err) 17619d3684c2SPaolo Abeni return err; 17629d3684c2SPaolo Abeni } 17639d3684c2SPaolo Abeni if (!tb[IFLA_NUM_RX_QUEUES] && dev->num_rx_queues > 1) { 17649d3684c2SPaolo Abeni err = netif_set_real_num_rx_queues(dev, 1); 17659d3684c2SPaolo Abeni if (err) 17669d3684c2SPaolo Abeni return err; 17679d3684c2SPaolo Abeni } 17689d3684c2SPaolo Abeni return 0; 17699d3684c2SPaolo Abeni } 17709d3684c2SPaolo Abeni 177181adee47SEric W. Biederman static int veth_newlink(struct net *src_net, struct net_device *dev, 17727a3f4a18SMatthias Schiffer struct nlattr *tb[], struct nlattr *data[], 17737a3f4a18SMatthias Schiffer struct netlink_ext_ack *extack) 1774e314dbdcSPavel Emelyanov { 17757797b93bSToshiaki Makita int err; 1776e314dbdcSPavel Emelyanov struct net_device *peer; 1777e314dbdcSPavel Emelyanov struct veth_priv *priv; 1778e314dbdcSPavel Emelyanov char ifname[IFNAMSIZ]; 1779e314dbdcSPavel Emelyanov struct nlattr *peer_tb[IFLA_MAX + 1], **tbp; 17805517750fSTom Gundersen unsigned char name_assign_type; 17813729d502SPatrick McHardy struct ifinfomsg *ifmp; 178281adee47SEric W. Biederman struct net *net; 1783e314dbdcSPavel Emelyanov 1784e314dbdcSPavel Emelyanov /* 1785e314dbdcSPavel Emelyanov * create and register peer first 1786e314dbdcSPavel Emelyanov */ 1787e314dbdcSPavel Emelyanov if (data != NULL && data[VETH_INFO_PEER] != NULL) { 1788e314dbdcSPavel Emelyanov struct nlattr *nla_peer; 1789e314dbdcSPavel Emelyanov 1790e314dbdcSPavel Emelyanov nla_peer = data[VETH_INFO_PEER]; 17913729d502SPatrick McHardy ifmp = nla_data(nla_peer); 1792f7b12606SJiri Pirko err = rtnl_nla_parse_ifla(peer_tb, 1793e314dbdcSPavel Emelyanov nla_data(nla_peer) + sizeof(struct ifinfomsg), 1794fceb6435SJohannes Berg nla_len(nla_peer) - sizeof(struct ifinfomsg), 1795fceb6435SJohannes Berg NULL); 1796e314dbdcSPavel Emelyanov if (err < 0) 1797e314dbdcSPavel Emelyanov return err; 1798e314dbdcSPavel Emelyanov 1799a8b8a889SMatthias Schiffer err = veth_validate(peer_tb, NULL, extack); 1800e314dbdcSPavel Emelyanov if (err < 0) 1801e314dbdcSPavel Emelyanov return err; 1802e314dbdcSPavel Emelyanov 1803e314dbdcSPavel Emelyanov tbp = peer_tb; 18043729d502SPatrick McHardy } else { 18053729d502SPatrick McHardy ifmp = NULL; 1806e314dbdcSPavel Emelyanov tbp = tb; 18073729d502SPatrick McHardy } 1808e314dbdcSPavel Emelyanov 1809191cdb38SSerhey Popovych if (ifmp && tbp[IFLA_IFNAME]) { 1810872f6903SFrancis Laniel nla_strscpy(ifname, tbp[IFLA_IFNAME], IFNAMSIZ); 18115517750fSTom Gundersen name_assign_type = NET_NAME_USER; 18125517750fSTom Gundersen } else { 1813e314dbdcSPavel Emelyanov snprintf(ifname, IFNAMSIZ, DRV_NAME "%%d"); 18145517750fSTom Gundersen name_assign_type = NET_NAME_ENUM; 18155517750fSTom Gundersen } 1816e314dbdcSPavel Emelyanov 181781adee47SEric W. Biederman net = rtnl_link_get_net(src_net, tbp); 181881adee47SEric W. Biederman if (IS_ERR(net)) 181981adee47SEric W. Biederman return PTR_ERR(net); 182081adee47SEric W. Biederman 18215517750fSTom Gundersen peer = rtnl_create_link(net, ifname, name_assign_type, 1822d0522f1cSDavid Ahern &veth_link_ops, tbp, extack); 182381adee47SEric W. Biederman if (IS_ERR(peer)) { 182481adee47SEric W. Biederman put_net(net); 1825e314dbdcSPavel Emelyanov return PTR_ERR(peer); 182681adee47SEric W. Biederman } 1827e314dbdcSPavel Emelyanov 1828191cdb38SSerhey Popovych if (!ifmp || !tbp[IFLA_ADDRESS]) 1829f2cedb63SDanny Kukawka eth_hw_addr_random(peer); 1830e314dbdcSPavel Emelyanov 1831e6f8f1a7SPavel Emelyanov if (ifmp && (dev->ifindex != 0)) 1832e6f8f1a7SPavel Emelyanov peer->ifindex = ifmp->ifi_index; 1833e6f8f1a7SPavel Emelyanov 18346df6398fSJakub Kicinski netif_inherit_tso_max(peer, dev); 183572d24955SStephen Hemminger 1836e314dbdcSPavel Emelyanov err = register_netdevice(peer); 183781adee47SEric W. Biederman put_net(net); 183881adee47SEric W. Biederman net = NULL; 1839e314dbdcSPavel Emelyanov if (err < 0) 1840e314dbdcSPavel Emelyanov goto err_register_peer; 1841e314dbdcSPavel Emelyanov 1842d3256efdSPaolo Abeni /* keep GRO disabled by default to be consistent with the established 1843d3256efdSPaolo Abeni * veth behavior 1844d3256efdSPaolo Abeni */ 1845d3256efdSPaolo Abeni veth_disable_gro(peer); 1846e314dbdcSPavel Emelyanov netif_carrier_off(peer); 1847e314dbdcSPavel Emelyanov 18481d997f10SHangbin Liu err = rtnl_configure_link(peer, ifmp, 0, NULL); 18493729d502SPatrick McHardy if (err < 0) 18503729d502SPatrick McHardy goto err_configure_peer; 18513729d502SPatrick McHardy 1852e314dbdcSPavel Emelyanov /* 1853e314dbdcSPavel Emelyanov * register dev last 1854e314dbdcSPavel Emelyanov * 1855e314dbdcSPavel Emelyanov * note, that since we've registered new device the dev's name 1856e314dbdcSPavel Emelyanov * should be re-allocated 1857e314dbdcSPavel Emelyanov */ 1858e314dbdcSPavel Emelyanov 1859e314dbdcSPavel Emelyanov if (tb[IFLA_ADDRESS] == NULL) 1860f2cedb63SDanny Kukawka eth_hw_addr_random(dev); 1861e314dbdcSPavel Emelyanov 18626c8c4446SJiri Pirko if (tb[IFLA_IFNAME]) 1863872f6903SFrancis Laniel nla_strscpy(dev->name, tb[IFLA_IFNAME], IFNAMSIZ); 18646c8c4446SJiri Pirko else 18656c8c4446SJiri Pirko snprintf(dev->name, IFNAMSIZ, DRV_NAME "%%d"); 18666c8c4446SJiri Pirko 1867e314dbdcSPavel Emelyanov err = register_netdevice(dev); 1868e314dbdcSPavel Emelyanov if (err < 0) 1869e314dbdcSPavel Emelyanov goto err_register_dev; 1870e314dbdcSPavel Emelyanov 1871e314dbdcSPavel Emelyanov netif_carrier_off(dev); 1872e314dbdcSPavel Emelyanov 1873e314dbdcSPavel Emelyanov /* 1874e314dbdcSPavel Emelyanov * tie the deviced together 1875e314dbdcSPavel Emelyanov */ 1876e314dbdcSPavel Emelyanov 1877e314dbdcSPavel Emelyanov priv = netdev_priv(dev); 1878d0e2c55eSEric Dumazet rcu_assign_pointer(priv->peer, peer); 18799d3684c2SPaolo Abeni err = veth_init_queues(dev, tb); 18809d3684c2SPaolo Abeni if (err) 18819d3684c2SPaolo Abeni goto err_queues; 1882e314dbdcSPavel Emelyanov 1883e314dbdcSPavel Emelyanov priv = netdev_priv(peer); 1884d0e2c55eSEric Dumazet rcu_assign_pointer(priv->peer, dev); 18859d3684c2SPaolo Abeni err = veth_init_queues(peer, tb); 18869d3684c2SPaolo Abeni if (err) 18879d3684c2SPaolo Abeni goto err_queues; 1888948d4f21SToshiaki Makita 1889d3256efdSPaolo Abeni veth_disable_gro(dev); 1890*fccca038SLorenzo Bianconi /* update XDP supported features */ 1891*fccca038SLorenzo Bianconi veth_set_xdp_features(dev); 1892*fccca038SLorenzo Bianconi veth_set_xdp_features(peer); 1893*fccca038SLorenzo Bianconi 1894e314dbdcSPavel Emelyanov return 0; 1895e314dbdcSPavel Emelyanov 18969d3684c2SPaolo Abeni err_queues: 18979d3684c2SPaolo Abeni unregister_netdevice(dev); 1898e314dbdcSPavel Emelyanov err_register_dev: 1899e314dbdcSPavel Emelyanov /* nothing to do */ 19003729d502SPatrick McHardy err_configure_peer: 1901e314dbdcSPavel Emelyanov unregister_netdevice(peer); 1902e314dbdcSPavel Emelyanov return err; 1903e314dbdcSPavel Emelyanov 1904e314dbdcSPavel Emelyanov err_register_peer: 1905e314dbdcSPavel Emelyanov free_netdev(peer); 1906e314dbdcSPavel Emelyanov return err; 1907e314dbdcSPavel Emelyanov } 1908e314dbdcSPavel Emelyanov 190923289a37SEric Dumazet static void veth_dellink(struct net_device *dev, struct list_head *head) 1910e314dbdcSPavel Emelyanov { 1911e314dbdcSPavel Emelyanov struct veth_priv *priv; 1912e314dbdcSPavel Emelyanov struct net_device *peer; 1913e314dbdcSPavel Emelyanov 1914e314dbdcSPavel Emelyanov priv = netdev_priv(dev); 1915d0e2c55eSEric Dumazet peer = rtnl_dereference(priv->peer); 1916d0e2c55eSEric Dumazet 1917d0e2c55eSEric Dumazet /* Note : dellink() is called from default_device_exit_batch(), 1918d0e2c55eSEric Dumazet * before a rcu_synchronize() point. The devices are guaranteed 1919d0e2c55eSEric Dumazet * not being freed before one RCU grace period. 1920d0e2c55eSEric Dumazet */ 1921d0e2c55eSEric Dumazet RCU_INIT_POINTER(priv->peer, NULL); 1922f45a5c26SEric Dumazet unregister_netdevice_queue(dev, head); 1923d0e2c55eSEric Dumazet 1924f45a5c26SEric Dumazet if (peer) { 1925d0e2c55eSEric Dumazet priv = netdev_priv(peer); 1926d0e2c55eSEric Dumazet RCU_INIT_POINTER(priv->peer, NULL); 192724540535SEric Dumazet unregister_netdevice_queue(peer, head); 1928e314dbdcSPavel Emelyanov } 1929f45a5c26SEric Dumazet } 1930e314dbdcSPavel Emelyanov 193123711438SThomas Graf static const struct nla_policy veth_policy[VETH_INFO_MAX + 1] = { 193223711438SThomas Graf [VETH_INFO_PEER] = { .len = sizeof(struct ifinfomsg) }, 193323711438SThomas Graf }; 1934e314dbdcSPavel Emelyanov 1935e5f4e7b9SNicolas Dichtel static struct net *veth_get_link_net(const struct net_device *dev) 1936e5f4e7b9SNicolas Dichtel { 1937e5f4e7b9SNicolas Dichtel struct veth_priv *priv = netdev_priv(dev); 1938e5f4e7b9SNicolas Dichtel struct net_device *peer = rtnl_dereference(priv->peer); 1939e5f4e7b9SNicolas Dichtel 1940e5f4e7b9SNicolas Dichtel return peer ? dev_net(peer) : dev_net(dev); 1941e5f4e7b9SNicolas Dichtel } 1942e5f4e7b9SNicolas Dichtel 19439d3684c2SPaolo Abeni static unsigned int veth_get_num_queues(void) 19449d3684c2SPaolo Abeni { 19459d3684c2SPaolo Abeni /* enforce the same queue limit as rtnl_create_link */ 19469d3684c2SPaolo Abeni int queues = num_possible_cpus(); 19479d3684c2SPaolo Abeni 19489d3684c2SPaolo Abeni if (queues > 4096) 19499d3684c2SPaolo Abeni queues = 4096; 19509d3684c2SPaolo Abeni return queues; 19519d3684c2SPaolo Abeni } 19529d3684c2SPaolo Abeni 1953e314dbdcSPavel Emelyanov static struct rtnl_link_ops veth_link_ops = { 1954e314dbdcSPavel Emelyanov .kind = DRV_NAME, 1955e314dbdcSPavel Emelyanov .priv_size = sizeof(struct veth_priv), 1956e314dbdcSPavel Emelyanov .setup = veth_setup, 1957e314dbdcSPavel Emelyanov .validate = veth_validate, 1958e314dbdcSPavel Emelyanov .newlink = veth_newlink, 1959e314dbdcSPavel Emelyanov .dellink = veth_dellink, 1960e314dbdcSPavel Emelyanov .policy = veth_policy, 1961e314dbdcSPavel Emelyanov .maxtype = VETH_INFO_MAX, 1962e5f4e7b9SNicolas Dichtel .get_link_net = veth_get_link_net, 19639d3684c2SPaolo Abeni .get_num_tx_queues = veth_get_num_queues, 19649d3684c2SPaolo Abeni .get_num_rx_queues = veth_get_num_queues, 1965e314dbdcSPavel Emelyanov }; 1966e314dbdcSPavel Emelyanov 1967e314dbdcSPavel Emelyanov /* 1968e314dbdcSPavel Emelyanov * init/fini 1969e314dbdcSPavel Emelyanov */ 1970e314dbdcSPavel Emelyanov 1971e314dbdcSPavel Emelyanov static __init int veth_init(void) 1972e314dbdcSPavel Emelyanov { 1973e314dbdcSPavel Emelyanov return rtnl_link_register(&veth_link_ops); 1974e314dbdcSPavel Emelyanov } 1975e314dbdcSPavel Emelyanov 1976e314dbdcSPavel Emelyanov static __exit void veth_exit(void) 1977e314dbdcSPavel Emelyanov { 197868365458SPatrick McHardy rtnl_link_unregister(&veth_link_ops); 1979e314dbdcSPavel Emelyanov } 1980e314dbdcSPavel Emelyanov 1981e314dbdcSPavel Emelyanov module_init(veth_init); 1982e314dbdcSPavel Emelyanov module_exit(veth_exit); 1983e314dbdcSPavel Emelyanov 1984e314dbdcSPavel Emelyanov MODULE_DESCRIPTION("Virtual Ethernet Tunnel"); 1985e314dbdcSPavel Emelyanov MODULE_LICENSE("GPL v2"); 1986e314dbdcSPavel Emelyanov MODULE_ALIAS_RTNL_LINK(DRV_NAME); 1987