1e314dbdcSPavel Emelyanov /* 2e314dbdcSPavel Emelyanov * drivers/net/veth.c 3e314dbdcSPavel Emelyanov * 4e314dbdcSPavel Emelyanov * Copyright (C) 2007 OpenVZ http://openvz.org, SWsoft Inc 5e314dbdcSPavel Emelyanov * 6e314dbdcSPavel Emelyanov * Author: Pavel Emelianov <xemul@openvz.org> 7e314dbdcSPavel Emelyanov * Ethtool interface from: Eric W. Biederman <ebiederm@xmission.com> 8e314dbdcSPavel Emelyanov * 9e314dbdcSPavel Emelyanov */ 10e314dbdcSPavel Emelyanov 11e314dbdcSPavel Emelyanov #include <linux/netdevice.h> 125a0e3ad6STejun Heo #include <linux/slab.h> 13e314dbdcSPavel Emelyanov #include <linux/ethtool.h> 14e314dbdcSPavel Emelyanov #include <linux/etherdevice.h> 15cf05c700SEric Dumazet #include <linux/u64_stats_sync.h> 16e314dbdcSPavel Emelyanov 17f7b12606SJiri Pirko #include <net/rtnetlink.h> 18e314dbdcSPavel Emelyanov #include <net/dst.h> 19e314dbdcSPavel Emelyanov #include <net/xfrm.h> 20af87a3aaSToshiaki Makita #include <net/xdp.h> 21ecef969eSStephen Hemminger #include <linux/veth.h> 229d9779e7SPaul Gortmaker #include <linux/module.h> 23948d4f21SToshiaki Makita #include <linux/bpf.h> 24948d4f21SToshiaki Makita #include <linux/filter.h> 25948d4f21SToshiaki Makita #include <linux/ptr_ring.h> 26948d4f21SToshiaki Makita #include <linux/bpf_trace.h> 27aa4e689eSMichael Walle #include <linux/net_tstamp.h> 28e314dbdcSPavel Emelyanov 29e314dbdcSPavel Emelyanov #define DRV_NAME "veth" 30e314dbdcSPavel Emelyanov #define DRV_VERSION "1.0" 31e314dbdcSPavel Emelyanov 329fc8d518SToshiaki Makita #define VETH_XDP_FLAG BIT(0) 33948d4f21SToshiaki Makita #define VETH_RING_SIZE 256 34948d4f21SToshiaki Makita #define VETH_XDP_HEADROOM (XDP_PACKET_HEADROOM + NET_IP_ALIGN) 35948d4f21SToshiaki Makita 36d1396004SToshiaki Makita /* Separating two types of XDP xmit */ 37d1396004SToshiaki Makita #define VETH_XDP_TX BIT(0) 38d1396004SToshiaki Makita #define VETH_XDP_REDIR BIT(1) 39d1396004SToshiaki Makita 404195e54aSToshiaki Makita struct veth_rq_stats { 414195e54aSToshiaki Makita u64 xdp_packets; 424195e54aSToshiaki Makita u64 xdp_bytes; 434195e54aSToshiaki Makita u64 xdp_drops; 444195e54aSToshiaki Makita struct u64_stats_sync syncp; 454195e54aSToshiaki Makita }; 464195e54aSToshiaki Makita 47638264dcSToshiaki Makita struct veth_rq { 48948d4f21SToshiaki Makita struct napi_struct xdp_napi; 49948d4f21SToshiaki Makita struct net_device *dev; 50948d4f21SToshiaki Makita struct bpf_prog __rcu *xdp_prog; 51d1396004SToshiaki Makita struct xdp_mem_info xdp_mem; 524195e54aSToshiaki Makita struct veth_rq_stats stats; 53948d4f21SToshiaki Makita bool rx_notify_masked; 54948d4f21SToshiaki Makita struct ptr_ring xdp_ring; 55948d4f21SToshiaki Makita struct xdp_rxq_info xdp_rxq; 56e314dbdcSPavel Emelyanov }; 57e314dbdcSPavel Emelyanov 58638264dcSToshiaki Makita struct veth_priv { 59638264dcSToshiaki Makita struct net_device __rcu *peer; 60638264dcSToshiaki Makita atomic64_t dropped; 61638264dcSToshiaki Makita struct bpf_prog *_xdp_prog; 62638264dcSToshiaki Makita struct veth_rq *rq; 63638264dcSToshiaki Makita unsigned int requested_headroom; 64638264dcSToshiaki Makita }; 65638264dcSToshiaki Makita 66e314dbdcSPavel Emelyanov /* 67e314dbdcSPavel Emelyanov * ethtool interface 68e314dbdcSPavel Emelyanov */ 69e314dbdcSPavel Emelyanov 70d397b968SToshiaki Makita struct veth_q_stat_desc { 71d397b968SToshiaki Makita char desc[ETH_GSTRING_LEN]; 72d397b968SToshiaki Makita size_t offset; 73d397b968SToshiaki Makita }; 74d397b968SToshiaki Makita 75d397b968SToshiaki Makita #define VETH_RQ_STAT(m) offsetof(struct veth_rq_stats, m) 76d397b968SToshiaki Makita 77d397b968SToshiaki Makita static const struct veth_q_stat_desc veth_rq_stats_desc[] = { 78d397b968SToshiaki Makita { "xdp_packets", VETH_RQ_STAT(xdp_packets) }, 79d397b968SToshiaki Makita { "xdp_bytes", VETH_RQ_STAT(xdp_bytes) }, 80d397b968SToshiaki Makita { "xdp_drops", VETH_RQ_STAT(xdp_drops) }, 81d397b968SToshiaki Makita }; 82d397b968SToshiaki Makita 83d397b968SToshiaki Makita #define VETH_RQ_STATS_LEN ARRAY_SIZE(veth_rq_stats_desc) 84d397b968SToshiaki Makita 85e314dbdcSPavel Emelyanov static struct { 86e314dbdcSPavel Emelyanov const char string[ETH_GSTRING_LEN]; 87e314dbdcSPavel Emelyanov } ethtool_stats_keys[] = { 88e314dbdcSPavel Emelyanov { "peer_ifindex" }, 89e314dbdcSPavel Emelyanov }; 90e314dbdcSPavel Emelyanov 9156607b98SPhilippe Reynes static int veth_get_link_ksettings(struct net_device *dev, 9256607b98SPhilippe Reynes struct ethtool_link_ksettings *cmd) 93e314dbdcSPavel Emelyanov { 9456607b98SPhilippe Reynes cmd->base.speed = SPEED_10000; 9556607b98SPhilippe Reynes cmd->base.duplex = DUPLEX_FULL; 9656607b98SPhilippe Reynes cmd->base.port = PORT_TP; 9756607b98SPhilippe Reynes cmd->base.autoneg = AUTONEG_DISABLE; 98e314dbdcSPavel Emelyanov return 0; 99e314dbdcSPavel Emelyanov } 100e314dbdcSPavel Emelyanov 101e314dbdcSPavel Emelyanov static void veth_get_drvinfo(struct net_device *dev, struct ethtool_drvinfo *info) 102e314dbdcSPavel Emelyanov { 10333a5ba14SRick Jones strlcpy(info->driver, DRV_NAME, sizeof(info->driver)); 10433a5ba14SRick Jones strlcpy(info->version, DRV_VERSION, sizeof(info->version)); 105e314dbdcSPavel Emelyanov } 106e314dbdcSPavel Emelyanov 107e314dbdcSPavel Emelyanov static void veth_get_strings(struct net_device *dev, u32 stringset, u8 *buf) 108e314dbdcSPavel Emelyanov { 109d397b968SToshiaki Makita char *p = (char *)buf; 110d397b968SToshiaki Makita int i, j; 111d397b968SToshiaki Makita 112e314dbdcSPavel Emelyanov switch(stringset) { 113e314dbdcSPavel Emelyanov case ETH_SS_STATS: 114d397b968SToshiaki Makita memcpy(p, ðtool_stats_keys, sizeof(ethtool_stats_keys)); 115d397b968SToshiaki Makita p += sizeof(ethtool_stats_keys); 116d397b968SToshiaki Makita for (i = 0; i < dev->real_num_rx_queues; i++) { 117d397b968SToshiaki Makita for (j = 0; j < VETH_RQ_STATS_LEN; j++) { 118d397b968SToshiaki Makita snprintf(p, ETH_GSTRING_LEN, "rx_queue_%u_%s", 119d397b968SToshiaki Makita i, veth_rq_stats_desc[j].desc); 120d397b968SToshiaki Makita p += ETH_GSTRING_LEN; 121d397b968SToshiaki Makita } 122d397b968SToshiaki Makita } 123e314dbdcSPavel Emelyanov break; 124e314dbdcSPavel Emelyanov } 125e314dbdcSPavel Emelyanov } 126e314dbdcSPavel Emelyanov 127b9f2c044SJeff Garzik static int veth_get_sset_count(struct net_device *dev, int sset) 128e314dbdcSPavel Emelyanov { 129b9f2c044SJeff Garzik switch (sset) { 130b9f2c044SJeff Garzik case ETH_SS_STATS: 131d397b968SToshiaki Makita return ARRAY_SIZE(ethtool_stats_keys) + 132d397b968SToshiaki Makita VETH_RQ_STATS_LEN * dev->real_num_rx_queues; 133b9f2c044SJeff Garzik default: 134b9f2c044SJeff Garzik return -EOPNOTSUPP; 135b9f2c044SJeff Garzik } 136e314dbdcSPavel Emelyanov } 137e314dbdcSPavel Emelyanov 138e314dbdcSPavel Emelyanov static void veth_get_ethtool_stats(struct net_device *dev, 139e314dbdcSPavel Emelyanov struct ethtool_stats *stats, u64 *data) 140e314dbdcSPavel Emelyanov { 141d0e2c55eSEric Dumazet struct veth_priv *priv = netdev_priv(dev); 142d0e2c55eSEric Dumazet struct net_device *peer = rtnl_dereference(priv->peer); 143d397b968SToshiaki Makita int i, j, idx; 144e314dbdcSPavel Emelyanov 145d0e2c55eSEric Dumazet data[0] = peer ? peer->ifindex : 0; 146d397b968SToshiaki Makita idx = 1; 147d397b968SToshiaki Makita for (i = 0; i < dev->real_num_rx_queues; i++) { 148d397b968SToshiaki Makita const struct veth_rq_stats *rq_stats = &priv->rq[i].stats; 149d397b968SToshiaki Makita const void *stats_base = (void *)rq_stats; 150d397b968SToshiaki Makita unsigned int start; 151d397b968SToshiaki Makita size_t offset; 152d397b968SToshiaki Makita 153d397b968SToshiaki Makita do { 154d397b968SToshiaki Makita start = u64_stats_fetch_begin_irq(&rq_stats->syncp); 155d397b968SToshiaki Makita for (j = 0; j < VETH_RQ_STATS_LEN; j++) { 156d397b968SToshiaki Makita offset = veth_rq_stats_desc[j].offset; 157d397b968SToshiaki Makita data[idx + j] = *(u64 *)(stats_base + offset); 158d397b968SToshiaki Makita } 159d397b968SToshiaki Makita } while (u64_stats_fetch_retry_irq(&rq_stats->syncp, start)); 160d397b968SToshiaki Makita idx += VETH_RQ_STATS_LEN; 161d397b968SToshiaki Makita } 162e314dbdcSPavel Emelyanov } 163e314dbdcSPavel Emelyanov 164aa4e689eSMichael Walle static int veth_get_ts_info(struct net_device *dev, 165aa4e689eSMichael Walle struct ethtool_ts_info *info) 166aa4e689eSMichael Walle { 167aa4e689eSMichael Walle info->so_timestamping = 168aa4e689eSMichael Walle SOF_TIMESTAMPING_TX_SOFTWARE | 169aa4e689eSMichael Walle SOF_TIMESTAMPING_RX_SOFTWARE | 170aa4e689eSMichael Walle SOF_TIMESTAMPING_SOFTWARE; 171aa4e689eSMichael Walle info->phc_index = -1; 172aa4e689eSMichael Walle 173aa4e689eSMichael Walle return 0; 174aa4e689eSMichael Walle } 175aa4e689eSMichael Walle 1760fc0b732SStephen Hemminger static const struct ethtool_ops veth_ethtool_ops = { 177e314dbdcSPavel Emelyanov .get_drvinfo = veth_get_drvinfo, 178e314dbdcSPavel Emelyanov .get_link = ethtool_op_get_link, 179e314dbdcSPavel Emelyanov .get_strings = veth_get_strings, 180b9f2c044SJeff Garzik .get_sset_count = veth_get_sset_count, 181e314dbdcSPavel Emelyanov .get_ethtool_stats = veth_get_ethtool_stats, 18256607b98SPhilippe Reynes .get_link_ksettings = veth_get_link_ksettings, 183aa4e689eSMichael Walle .get_ts_info = veth_get_ts_info, 184e314dbdcSPavel Emelyanov }; 185e314dbdcSPavel Emelyanov 186948d4f21SToshiaki Makita /* general routines */ 187948d4f21SToshiaki Makita 1889fc8d518SToshiaki Makita static bool veth_is_xdp_frame(void *ptr) 1899fc8d518SToshiaki Makita { 1909fc8d518SToshiaki Makita return (unsigned long)ptr & VETH_XDP_FLAG; 1919fc8d518SToshiaki Makita } 1929fc8d518SToshiaki Makita 1939fc8d518SToshiaki Makita static void *veth_ptr_to_xdp(void *ptr) 1949fc8d518SToshiaki Makita { 1959fc8d518SToshiaki Makita return (void *)((unsigned long)ptr & ~VETH_XDP_FLAG); 1969fc8d518SToshiaki Makita } 1979fc8d518SToshiaki Makita 198af87a3aaSToshiaki Makita static void *veth_xdp_to_ptr(void *ptr) 199af87a3aaSToshiaki Makita { 200af87a3aaSToshiaki Makita return (void *)((unsigned long)ptr | VETH_XDP_FLAG); 201af87a3aaSToshiaki Makita } 202af87a3aaSToshiaki Makita 2039fc8d518SToshiaki Makita static void veth_ptr_free(void *ptr) 2049fc8d518SToshiaki Makita { 2059fc8d518SToshiaki Makita if (veth_is_xdp_frame(ptr)) 2069fc8d518SToshiaki Makita xdp_return_frame(veth_ptr_to_xdp(ptr)); 2079fc8d518SToshiaki Makita else 2089fc8d518SToshiaki Makita kfree_skb(ptr); 2099fc8d518SToshiaki Makita } 2109fc8d518SToshiaki Makita 211638264dcSToshiaki Makita static void __veth_xdp_flush(struct veth_rq *rq) 212948d4f21SToshiaki Makita { 213948d4f21SToshiaki Makita /* Write ptr_ring before reading rx_notify_masked */ 214948d4f21SToshiaki Makita smp_mb(); 215638264dcSToshiaki Makita if (!rq->rx_notify_masked) { 216638264dcSToshiaki Makita rq->rx_notify_masked = true; 217638264dcSToshiaki Makita napi_schedule(&rq->xdp_napi); 218948d4f21SToshiaki Makita } 219948d4f21SToshiaki Makita } 220948d4f21SToshiaki Makita 221638264dcSToshiaki Makita static int veth_xdp_rx(struct veth_rq *rq, struct sk_buff *skb) 222948d4f21SToshiaki Makita { 223638264dcSToshiaki Makita if (unlikely(ptr_ring_produce(&rq->xdp_ring, skb))) { 224948d4f21SToshiaki Makita dev_kfree_skb_any(skb); 225948d4f21SToshiaki Makita return NET_RX_DROP; 226948d4f21SToshiaki Makita } 227948d4f21SToshiaki Makita 228948d4f21SToshiaki Makita return NET_RX_SUCCESS; 229948d4f21SToshiaki Makita } 230948d4f21SToshiaki Makita 231638264dcSToshiaki Makita static int veth_forward_skb(struct net_device *dev, struct sk_buff *skb, 232638264dcSToshiaki Makita struct veth_rq *rq, bool xdp) 233e314dbdcSPavel Emelyanov { 234948d4f21SToshiaki Makita return __dev_forward_skb(dev, skb) ?: xdp ? 235638264dcSToshiaki Makita veth_xdp_rx(rq, skb) : 236948d4f21SToshiaki Makita netif_rx(skb); 237948d4f21SToshiaki Makita } 238948d4f21SToshiaki Makita 239948d4f21SToshiaki Makita static netdev_tx_t veth_xmit(struct sk_buff *skb, struct net_device *dev) 240948d4f21SToshiaki Makita { 241948d4f21SToshiaki Makita struct veth_priv *rcv_priv, *priv = netdev_priv(dev); 242638264dcSToshiaki Makita struct veth_rq *rq = NULL; 243d0e2c55eSEric Dumazet struct net_device *rcv; 2442681128fSEric Dumazet int length = skb->len; 245948d4f21SToshiaki Makita bool rcv_xdp = false; 246638264dcSToshiaki Makita int rxq; 247e314dbdcSPavel Emelyanov 248d0e2c55eSEric Dumazet rcu_read_lock(); 249d0e2c55eSEric Dumazet rcv = rcu_dereference(priv->peer); 250d0e2c55eSEric Dumazet if (unlikely(!rcv)) { 251d0e2c55eSEric Dumazet kfree_skb(skb); 252d0e2c55eSEric Dumazet goto drop; 253d0e2c55eSEric Dumazet } 254e314dbdcSPavel Emelyanov 255948d4f21SToshiaki Makita rcv_priv = netdev_priv(rcv); 256638264dcSToshiaki Makita rxq = skb_get_queue_mapping(skb); 257638264dcSToshiaki Makita if (rxq < rcv->real_num_rx_queues) { 258638264dcSToshiaki Makita rq = &rcv_priv->rq[rxq]; 259638264dcSToshiaki Makita rcv_xdp = rcu_access_pointer(rq->xdp_prog); 260638264dcSToshiaki Makita if (rcv_xdp) 261638264dcSToshiaki Makita skb_record_rx_queue(skb, rxq); 262638264dcSToshiaki Makita } 263948d4f21SToshiaki Makita 264aa4e689eSMichael Walle skb_tx_timestamp(skb); 265638264dcSToshiaki Makita if (likely(veth_forward_skb(rcv, skb, rq, rcv_xdp) == NET_RX_SUCCESS)) { 2664195e54aSToshiaki Makita if (!rcv_xdp) { 26714d73416SLi RongQing struct pcpu_lstats *stats = this_cpu_ptr(dev->lstats); 268e314dbdcSPavel Emelyanov 269cf05c700SEric Dumazet u64_stats_update_begin(&stats->syncp); 2702681128fSEric Dumazet stats->bytes += length; 2712681128fSEric Dumazet stats->packets++; 272cf05c700SEric Dumazet u64_stats_update_end(&stats->syncp); 2734195e54aSToshiaki Makita } 2742681128fSEric Dumazet } else { 275d0e2c55eSEric Dumazet drop: 2762681128fSEric Dumazet atomic64_inc(&priv->dropped); 2772681128fSEric Dumazet } 278948d4f21SToshiaki Makita 279948d4f21SToshiaki Makita if (rcv_xdp) 280638264dcSToshiaki Makita __veth_xdp_flush(rq); 281948d4f21SToshiaki Makita 282d0e2c55eSEric Dumazet rcu_read_unlock(); 283948d4f21SToshiaki Makita 2846ed10654SPatrick McHardy return NETDEV_TX_OK; 285e314dbdcSPavel Emelyanov } 286e314dbdcSPavel Emelyanov 2874195e54aSToshiaki Makita static u64 veth_stats_tx(struct pcpu_lstats *result, struct net_device *dev) 288e314dbdcSPavel Emelyanov { 289cf05c700SEric Dumazet struct veth_priv *priv = netdev_priv(dev); 29011687a10SDavid S. Miller int cpu; 29111687a10SDavid S. Miller 2922681128fSEric Dumazet result->packets = 0; 2932681128fSEric Dumazet result->bytes = 0; 2942b1c8b0fSEric Dumazet for_each_possible_cpu(cpu) { 29514d73416SLi RongQing struct pcpu_lstats *stats = per_cpu_ptr(dev->lstats, cpu); 2962681128fSEric Dumazet u64 packets, bytes; 297cf05c700SEric Dumazet unsigned int start; 298e314dbdcSPavel Emelyanov 299cf05c700SEric Dumazet do { 30057a7744eSEric W. Biederman start = u64_stats_fetch_begin_irq(&stats->syncp); 3012681128fSEric Dumazet packets = stats->packets; 3022681128fSEric Dumazet bytes = stats->bytes; 30357a7744eSEric W. Biederman } while (u64_stats_fetch_retry_irq(&stats->syncp, start)); 3042681128fSEric Dumazet result->packets += packets; 3052681128fSEric Dumazet result->bytes += bytes; 306e314dbdcSPavel Emelyanov } 3072681128fSEric Dumazet return atomic64_read(&priv->dropped); 3082681128fSEric Dumazet } 3092681128fSEric Dumazet 3104195e54aSToshiaki Makita static void veth_stats_rx(struct veth_rq_stats *result, struct net_device *dev) 3114195e54aSToshiaki Makita { 3124195e54aSToshiaki Makita struct veth_priv *priv = netdev_priv(dev); 3134195e54aSToshiaki Makita int i; 3144195e54aSToshiaki Makita 3154195e54aSToshiaki Makita result->xdp_packets = 0; 3164195e54aSToshiaki Makita result->xdp_bytes = 0; 3174195e54aSToshiaki Makita result->xdp_drops = 0; 3184195e54aSToshiaki Makita for (i = 0; i < dev->num_rx_queues; i++) { 3194195e54aSToshiaki Makita struct veth_rq_stats *stats = &priv->rq[i].stats; 3204195e54aSToshiaki Makita u64 packets, bytes, drops; 3214195e54aSToshiaki Makita unsigned int start; 3224195e54aSToshiaki Makita 3234195e54aSToshiaki Makita do { 3244195e54aSToshiaki Makita start = u64_stats_fetch_begin_irq(&stats->syncp); 3254195e54aSToshiaki Makita packets = stats->xdp_packets; 3264195e54aSToshiaki Makita bytes = stats->xdp_bytes; 3274195e54aSToshiaki Makita drops = stats->xdp_drops; 3284195e54aSToshiaki Makita } while (u64_stats_fetch_retry_irq(&stats->syncp, start)); 3294195e54aSToshiaki Makita result->xdp_packets += packets; 3304195e54aSToshiaki Makita result->xdp_bytes += bytes; 3314195e54aSToshiaki Makita result->xdp_drops += drops; 3324195e54aSToshiaki Makita } 3334195e54aSToshiaki Makita } 3344195e54aSToshiaki Makita 335bc1f4470Sstephen hemminger static void veth_get_stats64(struct net_device *dev, 3362681128fSEric Dumazet struct rtnl_link_stats64 *tot) 3372681128fSEric Dumazet { 3382681128fSEric Dumazet struct veth_priv *priv = netdev_priv(dev); 339d0e2c55eSEric Dumazet struct net_device *peer; 3404195e54aSToshiaki Makita struct veth_rq_stats rx; 3414195e54aSToshiaki Makita struct pcpu_lstats tx; 3422681128fSEric Dumazet 3434195e54aSToshiaki Makita tot->tx_dropped = veth_stats_tx(&tx, dev); 3444195e54aSToshiaki Makita tot->tx_bytes = tx.bytes; 3454195e54aSToshiaki Makita tot->tx_packets = tx.packets; 3464195e54aSToshiaki Makita 3474195e54aSToshiaki Makita veth_stats_rx(&rx, dev); 3484195e54aSToshiaki Makita tot->rx_dropped = rx.xdp_drops; 3494195e54aSToshiaki Makita tot->rx_bytes = rx.xdp_bytes; 3504195e54aSToshiaki Makita tot->rx_packets = rx.xdp_packets; 3512681128fSEric Dumazet 352d0e2c55eSEric Dumazet rcu_read_lock(); 353d0e2c55eSEric Dumazet peer = rcu_dereference(priv->peer); 354d0e2c55eSEric Dumazet if (peer) { 3554195e54aSToshiaki Makita tot->rx_dropped += veth_stats_tx(&tx, peer); 3564195e54aSToshiaki Makita tot->rx_bytes += tx.bytes; 3574195e54aSToshiaki Makita tot->rx_packets += tx.packets; 3584195e54aSToshiaki Makita 3594195e54aSToshiaki Makita veth_stats_rx(&rx, peer); 3604195e54aSToshiaki Makita tot->tx_bytes += rx.xdp_bytes; 3614195e54aSToshiaki Makita tot->tx_packets += rx.xdp_packets; 362d0e2c55eSEric Dumazet } 363d0e2c55eSEric Dumazet rcu_read_unlock(); 364e314dbdcSPavel Emelyanov } 365e314dbdcSPavel Emelyanov 3665c70ef85SGao feng /* fake multicast ability */ 3675c70ef85SGao feng static void veth_set_multicast_list(struct net_device *dev) 3685c70ef85SGao feng { 3695c70ef85SGao feng } 3705c70ef85SGao feng 371948d4f21SToshiaki Makita static struct sk_buff *veth_build_skb(void *head, int headroom, int len, 372948d4f21SToshiaki Makita int buflen) 373948d4f21SToshiaki Makita { 374948d4f21SToshiaki Makita struct sk_buff *skb; 375948d4f21SToshiaki Makita 376948d4f21SToshiaki Makita if (!buflen) { 377948d4f21SToshiaki Makita buflen = SKB_DATA_ALIGN(headroom + len) + 378948d4f21SToshiaki Makita SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); 379948d4f21SToshiaki Makita } 380948d4f21SToshiaki Makita skb = build_skb(head, buflen); 381948d4f21SToshiaki Makita if (!skb) 382948d4f21SToshiaki Makita return NULL; 383948d4f21SToshiaki Makita 384948d4f21SToshiaki Makita skb_reserve(skb, headroom); 385948d4f21SToshiaki Makita skb_put(skb, len); 386948d4f21SToshiaki Makita 387948d4f21SToshiaki Makita return skb; 388948d4f21SToshiaki Makita } 389948d4f21SToshiaki Makita 390638264dcSToshiaki Makita static int veth_select_rxq(struct net_device *dev) 391638264dcSToshiaki Makita { 392638264dcSToshiaki Makita return smp_processor_id() % dev->real_num_rx_queues; 393638264dcSToshiaki Makita } 394638264dcSToshiaki Makita 395af87a3aaSToshiaki Makita static int veth_xdp_xmit(struct net_device *dev, int n, 396af87a3aaSToshiaki Makita struct xdp_frame **frames, u32 flags) 397af87a3aaSToshiaki Makita { 398af87a3aaSToshiaki Makita struct veth_priv *rcv_priv, *priv = netdev_priv(dev); 399af87a3aaSToshiaki Makita struct net_device *rcv; 4002131479dSToshiaki Makita int i, ret, drops = n; 401af87a3aaSToshiaki Makita unsigned int max_len; 402638264dcSToshiaki Makita struct veth_rq *rq; 403af87a3aaSToshiaki Makita 4042131479dSToshiaki Makita if (unlikely(flags & ~XDP_XMIT_FLAGS_MASK)) { 4052131479dSToshiaki Makita ret = -EINVAL; 4062131479dSToshiaki Makita goto drop; 4072131479dSToshiaki Makita } 408af87a3aaSToshiaki Makita 409af87a3aaSToshiaki Makita rcv = rcu_dereference(priv->peer); 4102131479dSToshiaki Makita if (unlikely(!rcv)) { 4112131479dSToshiaki Makita ret = -ENXIO; 4122131479dSToshiaki Makita goto drop; 4132131479dSToshiaki Makita } 414af87a3aaSToshiaki Makita 415af87a3aaSToshiaki Makita rcv_priv = netdev_priv(rcv); 416638264dcSToshiaki Makita rq = &rcv_priv->rq[veth_select_rxq(rcv)]; 417af87a3aaSToshiaki Makita /* Non-NULL xdp_prog ensures that xdp_ring is initialized on receive 418af87a3aaSToshiaki Makita * side. This means an XDP program is loaded on the peer and the peer 419af87a3aaSToshiaki Makita * device is up. 420af87a3aaSToshiaki Makita */ 4212131479dSToshiaki Makita if (!rcu_access_pointer(rq->xdp_prog)) { 4222131479dSToshiaki Makita ret = -ENXIO; 4232131479dSToshiaki Makita goto drop; 4242131479dSToshiaki Makita } 425af87a3aaSToshiaki Makita 4262131479dSToshiaki Makita drops = 0; 427af87a3aaSToshiaki Makita max_len = rcv->mtu + rcv->hard_header_len + VLAN_HLEN; 428af87a3aaSToshiaki Makita 429638264dcSToshiaki Makita spin_lock(&rq->xdp_ring.producer_lock); 430af87a3aaSToshiaki Makita for (i = 0; i < n; i++) { 431af87a3aaSToshiaki Makita struct xdp_frame *frame = frames[i]; 432af87a3aaSToshiaki Makita void *ptr = veth_xdp_to_ptr(frame); 433af87a3aaSToshiaki Makita 434af87a3aaSToshiaki Makita if (unlikely(frame->len > max_len || 435638264dcSToshiaki Makita __ptr_ring_produce(&rq->xdp_ring, ptr))) { 436af87a3aaSToshiaki Makita xdp_return_frame_rx_napi(frame); 437af87a3aaSToshiaki Makita drops++; 438af87a3aaSToshiaki Makita } 439af87a3aaSToshiaki Makita } 440638264dcSToshiaki Makita spin_unlock(&rq->xdp_ring.producer_lock); 441af87a3aaSToshiaki Makita 442af87a3aaSToshiaki Makita if (flags & XDP_XMIT_FLUSH) 443638264dcSToshiaki Makita __veth_xdp_flush(rq); 444af87a3aaSToshiaki Makita 4452131479dSToshiaki Makita if (likely(!drops)) 4462131479dSToshiaki Makita return n; 4472131479dSToshiaki Makita 4482131479dSToshiaki Makita ret = n - drops; 4492131479dSToshiaki Makita drop: 4502131479dSToshiaki Makita atomic64_add(drops, &priv->dropped); 4512131479dSToshiaki Makita 4522131479dSToshiaki Makita return ret; 453af87a3aaSToshiaki Makita } 454af87a3aaSToshiaki Makita 455d1396004SToshiaki Makita static void veth_xdp_flush(struct net_device *dev) 456d1396004SToshiaki Makita { 457d1396004SToshiaki Makita struct veth_priv *rcv_priv, *priv = netdev_priv(dev); 458d1396004SToshiaki Makita struct net_device *rcv; 459638264dcSToshiaki Makita struct veth_rq *rq; 460d1396004SToshiaki Makita 461d1396004SToshiaki Makita rcu_read_lock(); 462d1396004SToshiaki Makita rcv = rcu_dereference(priv->peer); 463d1396004SToshiaki Makita if (unlikely(!rcv)) 464d1396004SToshiaki Makita goto out; 465d1396004SToshiaki Makita 466d1396004SToshiaki Makita rcv_priv = netdev_priv(rcv); 467638264dcSToshiaki Makita rq = &rcv_priv->rq[veth_select_rxq(rcv)]; 468d1396004SToshiaki Makita /* xdp_ring is initialized on receive side? */ 469638264dcSToshiaki Makita if (unlikely(!rcu_access_pointer(rq->xdp_prog))) 470d1396004SToshiaki Makita goto out; 471d1396004SToshiaki Makita 472638264dcSToshiaki Makita __veth_xdp_flush(rq); 473d1396004SToshiaki Makita out: 474d1396004SToshiaki Makita rcu_read_unlock(); 475d1396004SToshiaki Makita } 476d1396004SToshiaki Makita 477d1396004SToshiaki Makita static int veth_xdp_tx(struct net_device *dev, struct xdp_buff *xdp) 478d1396004SToshiaki Makita { 479d1396004SToshiaki Makita struct xdp_frame *frame = convert_to_xdp_frame(xdp); 480d1396004SToshiaki Makita 481d1396004SToshiaki Makita if (unlikely(!frame)) 482d1396004SToshiaki Makita return -EOVERFLOW; 483d1396004SToshiaki Makita 484d1396004SToshiaki Makita return veth_xdp_xmit(dev, 1, &frame, 0); 485d1396004SToshiaki Makita } 486d1396004SToshiaki Makita 487638264dcSToshiaki Makita static struct sk_buff *veth_xdp_rcv_one(struct veth_rq *rq, 488d1396004SToshiaki Makita struct xdp_frame *frame, 489d1396004SToshiaki Makita unsigned int *xdp_xmit) 4909fc8d518SToshiaki Makita { 4919fc8d518SToshiaki Makita void *hard_start = frame->data - frame->headroom; 4929fc8d518SToshiaki Makita void *head = hard_start - sizeof(struct xdp_frame); 4939fc8d518SToshiaki Makita int len = frame->len, delta = 0; 494d1396004SToshiaki Makita struct xdp_frame orig_frame; 4959fc8d518SToshiaki Makita struct bpf_prog *xdp_prog; 4969fc8d518SToshiaki Makita unsigned int headroom; 4979fc8d518SToshiaki Makita struct sk_buff *skb; 4989fc8d518SToshiaki Makita 4999fc8d518SToshiaki Makita rcu_read_lock(); 500638264dcSToshiaki Makita xdp_prog = rcu_dereference(rq->xdp_prog); 5019fc8d518SToshiaki Makita if (likely(xdp_prog)) { 5029fc8d518SToshiaki Makita struct xdp_buff xdp; 5039fc8d518SToshiaki Makita u32 act; 5049fc8d518SToshiaki Makita 5059fc8d518SToshiaki Makita xdp.data_hard_start = hard_start; 5069fc8d518SToshiaki Makita xdp.data = frame->data; 5079fc8d518SToshiaki Makita xdp.data_end = frame->data + frame->len; 5089fc8d518SToshiaki Makita xdp.data_meta = frame->data - frame->metasize; 509638264dcSToshiaki Makita xdp.rxq = &rq->xdp_rxq; 5109fc8d518SToshiaki Makita 5119fc8d518SToshiaki Makita act = bpf_prog_run_xdp(xdp_prog, &xdp); 5129fc8d518SToshiaki Makita 5139fc8d518SToshiaki Makita switch (act) { 5149fc8d518SToshiaki Makita case XDP_PASS: 5159fc8d518SToshiaki Makita delta = frame->data - xdp.data; 5169fc8d518SToshiaki Makita len = xdp.data_end - xdp.data; 5179fc8d518SToshiaki Makita break; 518d1396004SToshiaki Makita case XDP_TX: 519d1396004SToshiaki Makita orig_frame = *frame; 520d1396004SToshiaki Makita xdp.data_hard_start = head; 521d1396004SToshiaki Makita xdp.rxq->mem = frame->mem; 522638264dcSToshiaki Makita if (unlikely(veth_xdp_tx(rq->dev, &xdp) < 0)) { 523638264dcSToshiaki Makita trace_xdp_exception(rq->dev, xdp_prog, act); 524d1396004SToshiaki Makita frame = &orig_frame; 525d1396004SToshiaki Makita goto err_xdp; 526d1396004SToshiaki Makita } 527d1396004SToshiaki Makita *xdp_xmit |= VETH_XDP_TX; 528d1396004SToshiaki Makita rcu_read_unlock(); 529d1396004SToshiaki Makita goto xdp_xmit; 530d1396004SToshiaki Makita case XDP_REDIRECT: 531d1396004SToshiaki Makita orig_frame = *frame; 532d1396004SToshiaki Makita xdp.data_hard_start = head; 533d1396004SToshiaki Makita xdp.rxq->mem = frame->mem; 534638264dcSToshiaki Makita if (xdp_do_redirect(rq->dev, &xdp, xdp_prog)) { 535d1396004SToshiaki Makita frame = &orig_frame; 536d1396004SToshiaki Makita goto err_xdp; 537d1396004SToshiaki Makita } 538d1396004SToshiaki Makita *xdp_xmit |= VETH_XDP_REDIR; 539d1396004SToshiaki Makita rcu_read_unlock(); 540d1396004SToshiaki Makita goto xdp_xmit; 5419fc8d518SToshiaki Makita default: 5429fc8d518SToshiaki Makita bpf_warn_invalid_xdp_action(act); 543a9b6d9efSGustavo A. R. Silva /* fall through */ 5449fc8d518SToshiaki Makita case XDP_ABORTED: 545638264dcSToshiaki Makita trace_xdp_exception(rq->dev, xdp_prog, act); 546a9b6d9efSGustavo A. R. Silva /* fall through */ 5479fc8d518SToshiaki Makita case XDP_DROP: 5489fc8d518SToshiaki Makita goto err_xdp; 5499fc8d518SToshiaki Makita } 5509fc8d518SToshiaki Makita } 5519fc8d518SToshiaki Makita rcu_read_unlock(); 5529fc8d518SToshiaki Makita 5539fc8d518SToshiaki Makita headroom = sizeof(struct xdp_frame) + frame->headroom - delta; 5549fc8d518SToshiaki Makita skb = veth_build_skb(head, headroom, len, 0); 5559fc8d518SToshiaki Makita if (!skb) { 5569fc8d518SToshiaki Makita xdp_return_frame(frame); 5579fc8d518SToshiaki Makita goto err; 5589fc8d518SToshiaki Makita } 5599fc8d518SToshiaki Makita 5609fc8d518SToshiaki Makita xdp_scrub_frame(frame); 561638264dcSToshiaki Makita skb->protocol = eth_type_trans(skb, rq->dev); 5629fc8d518SToshiaki Makita err: 5639fc8d518SToshiaki Makita return skb; 5649fc8d518SToshiaki Makita err_xdp: 5659fc8d518SToshiaki Makita rcu_read_unlock(); 5669fc8d518SToshiaki Makita xdp_return_frame(frame); 567d1396004SToshiaki Makita xdp_xmit: 5689fc8d518SToshiaki Makita return NULL; 5699fc8d518SToshiaki Makita } 5709fc8d518SToshiaki Makita 571638264dcSToshiaki Makita static struct sk_buff *veth_xdp_rcv_skb(struct veth_rq *rq, struct sk_buff *skb, 572d1396004SToshiaki Makita unsigned int *xdp_xmit) 573948d4f21SToshiaki Makita { 574948d4f21SToshiaki Makita u32 pktlen, headroom, act, metalen; 575948d4f21SToshiaki Makita void *orig_data, *orig_data_end; 576948d4f21SToshiaki Makita struct bpf_prog *xdp_prog; 577948d4f21SToshiaki Makita int mac_len, delta, off; 578948d4f21SToshiaki Makita struct xdp_buff xdp; 579948d4f21SToshiaki Makita 5804bf9ffa0SToshiaki Makita skb_orphan(skb); 5814bf9ffa0SToshiaki Makita 582948d4f21SToshiaki Makita rcu_read_lock(); 583638264dcSToshiaki Makita xdp_prog = rcu_dereference(rq->xdp_prog); 584948d4f21SToshiaki Makita if (unlikely(!xdp_prog)) { 585948d4f21SToshiaki Makita rcu_read_unlock(); 586948d4f21SToshiaki Makita goto out; 587948d4f21SToshiaki Makita } 588948d4f21SToshiaki Makita 589948d4f21SToshiaki Makita mac_len = skb->data - skb_mac_header(skb); 590948d4f21SToshiaki Makita pktlen = skb->len + mac_len; 591948d4f21SToshiaki Makita headroom = skb_headroom(skb) - mac_len; 592948d4f21SToshiaki Makita 593948d4f21SToshiaki Makita if (skb_shared(skb) || skb_head_is_locked(skb) || 594948d4f21SToshiaki Makita skb_is_nonlinear(skb) || headroom < XDP_PACKET_HEADROOM) { 595948d4f21SToshiaki Makita struct sk_buff *nskb; 596948d4f21SToshiaki Makita int size, head_off; 597948d4f21SToshiaki Makita void *head, *start; 598948d4f21SToshiaki Makita struct page *page; 599948d4f21SToshiaki Makita 600948d4f21SToshiaki Makita size = SKB_DATA_ALIGN(VETH_XDP_HEADROOM + pktlen) + 601948d4f21SToshiaki Makita SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); 602948d4f21SToshiaki Makita if (size > PAGE_SIZE) 603948d4f21SToshiaki Makita goto drop; 604948d4f21SToshiaki Makita 605948d4f21SToshiaki Makita page = alloc_page(GFP_ATOMIC | __GFP_NOWARN); 606948d4f21SToshiaki Makita if (!page) 607948d4f21SToshiaki Makita goto drop; 608948d4f21SToshiaki Makita 609948d4f21SToshiaki Makita head = page_address(page); 610948d4f21SToshiaki Makita start = head + VETH_XDP_HEADROOM; 611948d4f21SToshiaki Makita if (skb_copy_bits(skb, -mac_len, start, pktlen)) { 612948d4f21SToshiaki Makita page_frag_free(head); 613948d4f21SToshiaki Makita goto drop; 614948d4f21SToshiaki Makita } 615948d4f21SToshiaki Makita 616948d4f21SToshiaki Makita nskb = veth_build_skb(head, 617948d4f21SToshiaki Makita VETH_XDP_HEADROOM + mac_len, skb->len, 618948d4f21SToshiaki Makita PAGE_SIZE); 619948d4f21SToshiaki Makita if (!nskb) { 620948d4f21SToshiaki Makita page_frag_free(head); 621948d4f21SToshiaki Makita goto drop; 622948d4f21SToshiaki Makita } 623948d4f21SToshiaki Makita 624948d4f21SToshiaki Makita skb_copy_header(nskb, skb); 625948d4f21SToshiaki Makita head_off = skb_headroom(nskb) - skb_headroom(skb); 626948d4f21SToshiaki Makita skb_headers_offset_update(nskb, head_off); 627948d4f21SToshiaki Makita consume_skb(skb); 628948d4f21SToshiaki Makita skb = nskb; 629948d4f21SToshiaki Makita } 630948d4f21SToshiaki Makita 631948d4f21SToshiaki Makita xdp.data_hard_start = skb->head; 632948d4f21SToshiaki Makita xdp.data = skb_mac_header(skb); 633948d4f21SToshiaki Makita xdp.data_end = xdp.data + pktlen; 634948d4f21SToshiaki Makita xdp.data_meta = xdp.data; 635638264dcSToshiaki Makita xdp.rxq = &rq->xdp_rxq; 636948d4f21SToshiaki Makita orig_data = xdp.data; 637948d4f21SToshiaki Makita orig_data_end = xdp.data_end; 638948d4f21SToshiaki Makita 639948d4f21SToshiaki Makita act = bpf_prog_run_xdp(xdp_prog, &xdp); 640948d4f21SToshiaki Makita 641948d4f21SToshiaki Makita switch (act) { 642948d4f21SToshiaki Makita case XDP_PASS: 643948d4f21SToshiaki Makita break; 644d1396004SToshiaki Makita case XDP_TX: 645d1396004SToshiaki Makita get_page(virt_to_page(xdp.data)); 646d1396004SToshiaki Makita consume_skb(skb); 647638264dcSToshiaki Makita xdp.rxq->mem = rq->xdp_mem; 648638264dcSToshiaki Makita if (unlikely(veth_xdp_tx(rq->dev, &xdp) < 0)) { 649638264dcSToshiaki Makita trace_xdp_exception(rq->dev, xdp_prog, act); 650d1396004SToshiaki Makita goto err_xdp; 651d1396004SToshiaki Makita } 652d1396004SToshiaki Makita *xdp_xmit |= VETH_XDP_TX; 653d1396004SToshiaki Makita rcu_read_unlock(); 654d1396004SToshiaki Makita goto xdp_xmit; 655d1396004SToshiaki Makita case XDP_REDIRECT: 656d1396004SToshiaki Makita get_page(virt_to_page(xdp.data)); 657d1396004SToshiaki Makita consume_skb(skb); 658638264dcSToshiaki Makita xdp.rxq->mem = rq->xdp_mem; 659638264dcSToshiaki Makita if (xdp_do_redirect(rq->dev, &xdp, xdp_prog)) 660d1396004SToshiaki Makita goto err_xdp; 661d1396004SToshiaki Makita *xdp_xmit |= VETH_XDP_REDIR; 662d1396004SToshiaki Makita rcu_read_unlock(); 663d1396004SToshiaki Makita goto xdp_xmit; 664948d4f21SToshiaki Makita default: 665948d4f21SToshiaki Makita bpf_warn_invalid_xdp_action(act); 666a9b6d9efSGustavo A. R. Silva /* fall through */ 667948d4f21SToshiaki Makita case XDP_ABORTED: 668638264dcSToshiaki Makita trace_xdp_exception(rq->dev, xdp_prog, act); 669a9b6d9efSGustavo A. R. Silva /* fall through */ 670948d4f21SToshiaki Makita case XDP_DROP: 671948d4f21SToshiaki Makita goto drop; 672948d4f21SToshiaki Makita } 673948d4f21SToshiaki Makita rcu_read_unlock(); 674948d4f21SToshiaki Makita 675948d4f21SToshiaki Makita delta = orig_data - xdp.data; 676948d4f21SToshiaki Makita off = mac_len + delta; 677948d4f21SToshiaki Makita if (off > 0) 678948d4f21SToshiaki Makita __skb_push(skb, off); 679948d4f21SToshiaki Makita else if (off < 0) 680948d4f21SToshiaki Makita __skb_pull(skb, -off); 681948d4f21SToshiaki Makita skb->mac_header -= delta; 682948d4f21SToshiaki Makita off = xdp.data_end - orig_data_end; 683948d4f21SToshiaki Makita if (off != 0) 684948d4f21SToshiaki Makita __skb_put(skb, off); 685638264dcSToshiaki Makita skb->protocol = eth_type_trans(skb, rq->dev); 686948d4f21SToshiaki Makita 687948d4f21SToshiaki Makita metalen = xdp.data - xdp.data_meta; 688948d4f21SToshiaki Makita if (metalen) 689948d4f21SToshiaki Makita skb_metadata_set(skb, metalen); 690948d4f21SToshiaki Makita out: 691948d4f21SToshiaki Makita return skb; 692948d4f21SToshiaki Makita drop: 693948d4f21SToshiaki Makita rcu_read_unlock(); 694948d4f21SToshiaki Makita kfree_skb(skb); 695948d4f21SToshiaki Makita return NULL; 696d1396004SToshiaki Makita err_xdp: 697d1396004SToshiaki Makita rcu_read_unlock(); 698d1396004SToshiaki Makita page_frag_free(xdp.data); 699d1396004SToshiaki Makita xdp_xmit: 700d1396004SToshiaki Makita return NULL; 701948d4f21SToshiaki Makita } 702948d4f21SToshiaki Makita 703638264dcSToshiaki Makita static int veth_xdp_rcv(struct veth_rq *rq, int budget, unsigned int *xdp_xmit) 704948d4f21SToshiaki Makita { 7054195e54aSToshiaki Makita int i, done = 0, drops = 0, bytes = 0; 706948d4f21SToshiaki Makita 707948d4f21SToshiaki Makita for (i = 0; i < budget; i++) { 708638264dcSToshiaki Makita void *ptr = __ptr_ring_consume(&rq->xdp_ring); 7094195e54aSToshiaki Makita unsigned int xdp_xmit_one = 0; 7109fc8d518SToshiaki Makita struct sk_buff *skb; 711948d4f21SToshiaki Makita 7129fc8d518SToshiaki Makita if (!ptr) 713948d4f21SToshiaki Makita break; 714948d4f21SToshiaki Makita 715d1396004SToshiaki Makita if (veth_is_xdp_frame(ptr)) { 7164195e54aSToshiaki Makita struct xdp_frame *frame = veth_ptr_to_xdp(ptr); 7174195e54aSToshiaki Makita 7184195e54aSToshiaki Makita bytes += frame->len; 7194195e54aSToshiaki Makita skb = veth_xdp_rcv_one(rq, frame, &xdp_xmit_one); 720d1396004SToshiaki Makita } else { 7214195e54aSToshiaki Makita skb = ptr; 7224195e54aSToshiaki Makita bytes += skb->len; 7234195e54aSToshiaki Makita skb = veth_xdp_rcv_skb(rq, skb, &xdp_xmit_one); 724d1396004SToshiaki Makita } 7254195e54aSToshiaki Makita *xdp_xmit |= xdp_xmit_one; 726948d4f21SToshiaki Makita 727948d4f21SToshiaki Makita if (skb) 728638264dcSToshiaki Makita napi_gro_receive(&rq->xdp_napi, skb); 7294195e54aSToshiaki Makita else if (!xdp_xmit_one) 7304195e54aSToshiaki Makita drops++; 731948d4f21SToshiaki Makita 732948d4f21SToshiaki Makita done++; 733948d4f21SToshiaki Makita } 734948d4f21SToshiaki Makita 7354195e54aSToshiaki Makita u64_stats_update_begin(&rq->stats.syncp); 7364195e54aSToshiaki Makita rq->stats.xdp_packets += done; 7374195e54aSToshiaki Makita rq->stats.xdp_bytes += bytes; 7384195e54aSToshiaki Makita rq->stats.xdp_drops += drops; 7394195e54aSToshiaki Makita u64_stats_update_end(&rq->stats.syncp); 7404195e54aSToshiaki Makita 741948d4f21SToshiaki Makita return done; 742948d4f21SToshiaki Makita } 743948d4f21SToshiaki Makita 744948d4f21SToshiaki Makita static int veth_poll(struct napi_struct *napi, int budget) 745948d4f21SToshiaki Makita { 746638264dcSToshiaki Makita struct veth_rq *rq = 747638264dcSToshiaki Makita container_of(napi, struct veth_rq, xdp_napi); 748d1396004SToshiaki Makita unsigned int xdp_xmit = 0; 749948d4f21SToshiaki Makita int done; 750948d4f21SToshiaki Makita 751d1396004SToshiaki Makita xdp_set_return_frame_no_direct(); 752638264dcSToshiaki Makita done = veth_xdp_rcv(rq, budget, &xdp_xmit); 753948d4f21SToshiaki Makita 754948d4f21SToshiaki Makita if (done < budget && napi_complete_done(napi, done)) { 755948d4f21SToshiaki Makita /* Write rx_notify_masked before reading ptr_ring */ 756638264dcSToshiaki Makita smp_store_mb(rq->rx_notify_masked, false); 757638264dcSToshiaki Makita if (unlikely(!__ptr_ring_empty(&rq->xdp_ring))) { 758638264dcSToshiaki Makita rq->rx_notify_masked = true; 759638264dcSToshiaki Makita napi_schedule(&rq->xdp_napi); 760948d4f21SToshiaki Makita } 761948d4f21SToshiaki Makita } 762948d4f21SToshiaki Makita 763d1396004SToshiaki Makita if (xdp_xmit & VETH_XDP_TX) 764638264dcSToshiaki Makita veth_xdp_flush(rq->dev); 765d1396004SToshiaki Makita if (xdp_xmit & VETH_XDP_REDIR) 766d1396004SToshiaki Makita xdp_do_flush_map(); 767d1396004SToshiaki Makita xdp_clear_return_frame_no_direct(); 768d1396004SToshiaki Makita 769948d4f21SToshiaki Makita return done; 770948d4f21SToshiaki Makita } 771948d4f21SToshiaki Makita 772948d4f21SToshiaki Makita static int veth_napi_add(struct net_device *dev) 773948d4f21SToshiaki Makita { 774948d4f21SToshiaki Makita struct veth_priv *priv = netdev_priv(dev); 775638264dcSToshiaki Makita int err, i; 776948d4f21SToshiaki Makita 777638264dcSToshiaki Makita for (i = 0; i < dev->real_num_rx_queues; i++) { 778638264dcSToshiaki Makita struct veth_rq *rq = &priv->rq[i]; 779638264dcSToshiaki Makita 780638264dcSToshiaki Makita err = ptr_ring_init(&rq->xdp_ring, VETH_RING_SIZE, GFP_KERNEL); 781948d4f21SToshiaki Makita if (err) 782638264dcSToshiaki Makita goto err_xdp_ring; 783638264dcSToshiaki Makita } 784948d4f21SToshiaki Makita 785638264dcSToshiaki Makita for (i = 0; i < dev->real_num_rx_queues; i++) { 786638264dcSToshiaki Makita struct veth_rq *rq = &priv->rq[i]; 787638264dcSToshiaki Makita 788638264dcSToshiaki Makita netif_napi_add(dev, &rq->xdp_napi, veth_poll, NAPI_POLL_WEIGHT); 789638264dcSToshiaki Makita napi_enable(&rq->xdp_napi); 790638264dcSToshiaki Makita } 791948d4f21SToshiaki Makita 792948d4f21SToshiaki Makita return 0; 793638264dcSToshiaki Makita err_xdp_ring: 794638264dcSToshiaki Makita for (i--; i >= 0; i--) 795638264dcSToshiaki Makita ptr_ring_cleanup(&priv->rq[i].xdp_ring, veth_ptr_free); 796638264dcSToshiaki Makita 797638264dcSToshiaki Makita return err; 798948d4f21SToshiaki Makita } 799948d4f21SToshiaki Makita 800948d4f21SToshiaki Makita static void veth_napi_del(struct net_device *dev) 801948d4f21SToshiaki Makita { 802948d4f21SToshiaki Makita struct veth_priv *priv = netdev_priv(dev); 803638264dcSToshiaki Makita int i; 804948d4f21SToshiaki Makita 805638264dcSToshiaki Makita for (i = 0; i < dev->real_num_rx_queues; i++) { 806638264dcSToshiaki Makita struct veth_rq *rq = &priv->rq[i]; 807638264dcSToshiaki Makita 808638264dcSToshiaki Makita napi_disable(&rq->xdp_napi); 809638264dcSToshiaki Makita napi_hash_del(&rq->xdp_napi); 810638264dcSToshiaki Makita } 811638264dcSToshiaki Makita synchronize_net(); 812638264dcSToshiaki Makita 813638264dcSToshiaki Makita for (i = 0; i < dev->real_num_rx_queues; i++) { 814638264dcSToshiaki Makita struct veth_rq *rq = &priv->rq[i]; 815638264dcSToshiaki Makita 816638264dcSToshiaki Makita netif_napi_del(&rq->xdp_napi); 817638264dcSToshiaki Makita rq->rx_notify_masked = false; 818638264dcSToshiaki Makita ptr_ring_cleanup(&rq->xdp_ring, veth_ptr_free); 819638264dcSToshiaki Makita } 820948d4f21SToshiaki Makita } 821948d4f21SToshiaki Makita 822948d4f21SToshiaki Makita static int veth_enable_xdp(struct net_device *dev) 823948d4f21SToshiaki Makita { 824948d4f21SToshiaki Makita struct veth_priv *priv = netdev_priv(dev); 825638264dcSToshiaki Makita int err, i; 826948d4f21SToshiaki Makita 827638264dcSToshiaki Makita if (!xdp_rxq_info_is_reg(&priv->rq[0].xdp_rxq)) { 828638264dcSToshiaki Makita for (i = 0; i < dev->real_num_rx_queues; i++) { 829638264dcSToshiaki Makita struct veth_rq *rq = &priv->rq[i]; 830948d4f21SToshiaki Makita 831638264dcSToshiaki Makita err = xdp_rxq_info_reg(&rq->xdp_rxq, dev, i); 832948d4f21SToshiaki Makita if (err < 0) 833638264dcSToshiaki Makita goto err_rxq_reg; 834638264dcSToshiaki Makita 835638264dcSToshiaki Makita err = xdp_rxq_info_reg_mem_model(&rq->xdp_rxq, 836638264dcSToshiaki Makita MEM_TYPE_PAGE_SHARED, 837638264dcSToshiaki Makita NULL); 838638264dcSToshiaki Makita if (err < 0) 839638264dcSToshiaki Makita goto err_reg_mem; 840638264dcSToshiaki Makita 841638264dcSToshiaki Makita /* Save original mem info as it can be overwritten */ 842638264dcSToshiaki Makita rq->xdp_mem = rq->xdp_rxq.mem; 843638264dcSToshiaki Makita } 844948d4f21SToshiaki Makita 845948d4f21SToshiaki Makita err = veth_napi_add(dev); 846948d4f21SToshiaki Makita if (err) 847638264dcSToshiaki Makita goto err_rxq_reg; 848948d4f21SToshiaki Makita } 849948d4f21SToshiaki Makita 850638264dcSToshiaki Makita for (i = 0; i < dev->real_num_rx_queues; i++) 851638264dcSToshiaki Makita rcu_assign_pointer(priv->rq[i].xdp_prog, priv->_xdp_prog); 852948d4f21SToshiaki Makita 853948d4f21SToshiaki Makita return 0; 854638264dcSToshiaki Makita err_reg_mem: 855638264dcSToshiaki Makita xdp_rxq_info_unreg(&priv->rq[i].xdp_rxq); 856638264dcSToshiaki Makita err_rxq_reg: 857638264dcSToshiaki Makita for (i--; i >= 0; i--) 858638264dcSToshiaki Makita xdp_rxq_info_unreg(&priv->rq[i].xdp_rxq); 859948d4f21SToshiaki Makita 860948d4f21SToshiaki Makita return err; 861948d4f21SToshiaki Makita } 862948d4f21SToshiaki Makita 863948d4f21SToshiaki Makita static void veth_disable_xdp(struct net_device *dev) 864948d4f21SToshiaki Makita { 865948d4f21SToshiaki Makita struct veth_priv *priv = netdev_priv(dev); 866638264dcSToshiaki Makita int i; 867948d4f21SToshiaki Makita 868638264dcSToshiaki Makita for (i = 0; i < dev->real_num_rx_queues; i++) 869638264dcSToshiaki Makita rcu_assign_pointer(priv->rq[i].xdp_prog, NULL); 870948d4f21SToshiaki Makita veth_napi_del(dev); 871638264dcSToshiaki Makita for (i = 0; i < dev->real_num_rx_queues; i++) { 872638264dcSToshiaki Makita struct veth_rq *rq = &priv->rq[i]; 873638264dcSToshiaki Makita 874638264dcSToshiaki Makita rq->xdp_rxq.mem = rq->xdp_mem; 875638264dcSToshiaki Makita xdp_rxq_info_unreg(&rq->xdp_rxq); 876638264dcSToshiaki Makita } 877948d4f21SToshiaki Makita } 878948d4f21SToshiaki Makita 879e314dbdcSPavel Emelyanov static int veth_open(struct net_device *dev) 880e314dbdcSPavel Emelyanov { 881d0e2c55eSEric Dumazet struct veth_priv *priv = netdev_priv(dev); 882d0e2c55eSEric Dumazet struct net_device *peer = rtnl_dereference(priv->peer); 883948d4f21SToshiaki Makita int err; 884e314dbdcSPavel Emelyanov 885d0e2c55eSEric Dumazet if (!peer) 886e314dbdcSPavel Emelyanov return -ENOTCONN; 887e314dbdcSPavel Emelyanov 888948d4f21SToshiaki Makita if (priv->_xdp_prog) { 889948d4f21SToshiaki Makita err = veth_enable_xdp(dev); 890948d4f21SToshiaki Makita if (err) 891948d4f21SToshiaki Makita return err; 892948d4f21SToshiaki Makita } 893948d4f21SToshiaki Makita 894d0e2c55eSEric Dumazet if (peer->flags & IFF_UP) { 895e314dbdcSPavel Emelyanov netif_carrier_on(dev); 896d0e2c55eSEric Dumazet netif_carrier_on(peer); 897e314dbdcSPavel Emelyanov } 898948d4f21SToshiaki Makita 899e314dbdcSPavel Emelyanov return 0; 900e314dbdcSPavel Emelyanov } 901e314dbdcSPavel Emelyanov 9022cf48a10SEric W. Biederman static int veth_close(struct net_device *dev) 9032cf48a10SEric W. Biederman { 9042cf48a10SEric W. Biederman struct veth_priv *priv = netdev_priv(dev); 9052efd32eeSEric Dumazet struct net_device *peer = rtnl_dereference(priv->peer); 9062cf48a10SEric W. Biederman 9072cf48a10SEric W. Biederman netif_carrier_off(dev); 9082efd32eeSEric Dumazet if (peer) 9092efd32eeSEric Dumazet netif_carrier_off(peer); 9102cf48a10SEric W. Biederman 911948d4f21SToshiaki Makita if (priv->_xdp_prog) 912948d4f21SToshiaki Makita veth_disable_xdp(dev); 913948d4f21SToshiaki Makita 9142cf48a10SEric W. Biederman return 0; 9152cf48a10SEric W. Biederman } 9162cf48a10SEric W. Biederman 91791572088SJarod Wilson static int is_valid_veth_mtu(int mtu) 91838d40815SEric Biederman { 91991572088SJarod Wilson return mtu >= ETH_MIN_MTU && mtu <= ETH_MAX_MTU; 92038d40815SEric Biederman } 92138d40815SEric Biederman 9227797b93bSToshiaki Makita static int veth_alloc_queues(struct net_device *dev) 9237797b93bSToshiaki Makita { 9247797b93bSToshiaki Makita struct veth_priv *priv = netdev_priv(dev); 9257797b93bSToshiaki Makita int i; 9267797b93bSToshiaki Makita 9277797b93bSToshiaki Makita priv->rq = kcalloc(dev->num_rx_queues, sizeof(*priv->rq), GFP_KERNEL); 9287797b93bSToshiaki Makita if (!priv->rq) 9297797b93bSToshiaki Makita return -ENOMEM; 9307797b93bSToshiaki Makita 9314195e54aSToshiaki Makita for (i = 0; i < dev->num_rx_queues; i++) { 9327797b93bSToshiaki Makita priv->rq[i].dev = dev; 9334195e54aSToshiaki Makita u64_stats_init(&priv->rq[i].stats.syncp); 9344195e54aSToshiaki Makita } 9357797b93bSToshiaki Makita 9367797b93bSToshiaki Makita return 0; 9377797b93bSToshiaki Makita } 9387797b93bSToshiaki Makita 9397797b93bSToshiaki Makita static void veth_free_queues(struct net_device *dev) 9407797b93bSToshiaki Makita { 9417797b93bSToshiaki Makita struct veth_priv *priv = netdev_priv(dev); 9427797b93bSToshiaki Makita 9437797b93bSToshiaki Makita kfree(priv->rq); 9447797b93bSToshiaki Makita } 9457797b93bSToshiaki Makita 946e314dbdcSPavel Emelyanov static int veth_dev_init(struct net_device *dev) 947e314dbdcSPavel Emelyanov { 9487797b93bSToshiaki Makita int err; 9497797b93bSToshiaki Makita 95014d73416SLi RongQing dev->lstats = netdev_alloc_pcpu_stats(struct pcpu_lstats); 95114d73416SLi RongQing if (!dev->lstats) 952e314dbdcSPavel Emelyanov return -ENOMEM; 9537797b93bSToshiaki Makita 9547797b93bSToshiaki Makita err = veth_alloc_queues(dev); 9557797b93bSToshiaki Makita if (err) { 95614d73416SLi RongQing free_percpu(dev->lstats); 9577797b93bSToshiaki Makita return err; 9587797b93bSToshiaki Makita } 9597797b93bSToshiaki Makita 960e314dbdcSPavel Emelyanov return 0; 961e314dbdcSPavel Emelyanov } 962e314dbdcSPavel Emelyanov 96311687a10SDavid S. Miller static void veth_dev_free(struct net_device *dev) 96411687a10SDavid S. Miller { 9657797b93bSToshiaki Makita veth_free_queues(dev); 96614d73416SLi RongQing free_percpu(dev->lstats); 96711687a10SDavid S. Miller } 96811687a10SDavid S. Miller 969bb446c19SWANG Cong #ifdef CONFIG_NET_POLL_CONTROLLER 970bb446c19SWANG Cong static void veth_poll_controller(struct net_device *dev) 971bb446c19SWANG Cong { 972bb446c19SWANG Cong /* veth only receives frames when its peer sends one 973948d4f21SToshiaki Makita * Since it has nothing to do with disabling irqs, we are guaranteed 974bb446c19SWANG Cong * never to have pending data when we poll for it so 975bb446c19SWANG Cong * there is nothing to do here. 976bb446c19SWANG Cong * 977bb446c19SWANG Cong * We need this though so netpoll recognizes us as an interface that 978bb446c19SWANG Cong * supports polling, which enables bridge devices in virt setups to 979bb446c19SWANG Cong * still use netconsole 980bb446c19SWANG Cong */ 981bb446c19SWANG Cong } 982bb446c19SWANG Cong #endif /* CONFIG_NET_POLL_CONTROLLER */ 983bb446c19SWANG Cong 984a45253bfSNicolas Dichtel static int veth_get_iflink(const struct net_device *dev) 985a45253bfSNicolas Dichtel { 986a45253bfSNicolas Dichtel struct veth_priv *priv = netdev_priv(dev); 987a45253bfSNicolas Dichtel struct net_device *peer; 988a45253bfSNicolas Dichtel int iflink; 989a45253bfSNicolas Dichtel 990a45253bfSNicolas Dichtel rcu_read_lock(); 991a45253bfSNicolas Dichtel peer = rcu_dereference(priv->peer); 992a45253bfSNicolas Dichtel iflink = peer ? peer->ifindex : 0; 993a45253bfSNicolas Dichtel rcu_read_unlock(); 994a45253bfSNicolas Dichtel 995a45253bfSNicolas Dichtel return iflink; 996a45253bfSNicolas Dichtel } 997a45253bfSNicolas Dichtel 998dc224822SToshiaki Makita static netdev_features_t veth_fix_features(struct net_device *dev, 999dc224822SToshiaki Makita netdev_features_t features) 1000dc224822SToshiaki Makita { 1001dc224822SToshiaki Makita struct veth_priv *priv = netdev_priv(dev); 1002dc224822SToshiaki Makita struct net_device *peer; 1003dc224822SToshiaki Makita 1004dc224822SToshiaki Makita peer = rtnl_dereference(priv->peer); 1005dc224822SToshiaki Makita if (peer) { 1006dc224822SToshiaki Makita struct veth_priv *peer_priv = netdev_priv(peer); 1007dc224822SToshiaki Makita 1008dc224822SToshiaki Makita if (peer_priv->_xdp_prog) 1009dc224822SToshiaki Makita features &= ~NETIF_F_GSO_SOFTWARE; 1010dc224822SToshiaki Makita } 1011dc224822SToshiaki Makita 1012dc224822SToshiaki Makita return features; 1013dc224822SToshiaki Makita } 1014dc224822SToshiaki Makita 1015163e5292SPaolo Abeni static void veth_set_rx_headroom(struct net_device *dev, int new_hr) 1016163e5292SPaolo Abeni { 1017163e5292SPaolo Abeni struct veth_priv *peer_priv, *priv = netdev_priv(dev); 1018163e5292SPaolo Abeni struct net_device *peer; 1019163e5292SPaolo Abeni 1020163e5292SPaolo Abeni if (new_hr < 0) 1021163e5292SPaolo Abeni new_hr = 0; 1022163e5292SPaolo Abeni 1023163e5292SPaolo Abeni rcu_read_lock(); 1024163e5292SPaolo Abeni peer = rcu_dereference(priv->peer); 1025163e5292SPaolo Abeni if (unlikely(!peer)) 1026163e5292SPaolo Abeni goto out; 1027163e5292SPaolo Abeni 1028163e5292SPaolo Abeni peer_priv = netdev_priv(peer); 1029163e5292SPaolo Abeni priv->requested_headroom = new_hr; 1030163e5292SPaolo Abeni new_hr = max(priv->requested_headroom, peer_priv->requested_headroom); 1031163e5292SPaolo Abeni dev->needed_headroom = new_hr; 1032163e5292SPaolo Abeni peer->needed_headroom = new_hr; 1033163e5292SPaolo Abeni 1034163e5292SPaolo Abeni out: 1035163e5292SPaolo Abeni rcu_read_unlock(); 1036163e5292SPaolo Abeni } 1037163e5292SPaolo Abeni 1038948d4f21SToshiaki Makita static int veth_xdp_set(struct net_device *dev, struct bpf_prog *prog, 1039948d4f21SToshiaki Makita struct netlink_ext_ack *extack) 1040948d4f21SToshiaki Makita { 1041948d4f21SToshiaki Makita struct veth_priv *priv = netdev_priv(dev); 1042948d4f21SToshiaki Makita struct bpf_prog *old_prog; 1043948d4f21SToshiaki Makita struct net_device *peer; 1044dc224822SToshiaki Makita unsigned int max_mtu; 1045948d4f21SToshiaki Makita int err; 1046948d4f21SToshiaki Makita 1047948d4f21SToshiaki Makita old_prog = priv->_xdp_prog; 1048948d4f21SToshiaki Makita priv->_xdp_prog = prog; 1049948d4f21SToshiaki Makita peer = rtnl_dereference(priv->peer); 1050948d4f21SToshiaki Makita 1051948d4f21SToshiaki Makita if (prog) { 1052948d4f21SToshiaki Makita if (!peer) { 1053948d4f21SToshiaki Makita NL_SET_ERR_MSG_MOD(extack, "Cannot set XDP when peer is detached"); 1054948d4f21SToshiaki Makita err = -ENOTCONN; 1055948d4f21SToshiaki Makita goto err; 1056948d4f21SToshiaki Makita } 1057948d4f21SToshiaki Makita 1058dc224822SToshiaki Makita max_mtu = PAGE_SIZE - VETH_XDP_HEADROOM - 1059dc224822SToshiaki Makita peer->hard_header_len - 1060dc224822SToshiaki Makita SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); 1061dc224822SToshiaki Makita if (peer->mtu > max_mtu) { 1062dc224822SToshiaki Makita NL_SET_ERR_MSG_MOD(extack, "Peer MTU is too large to set XDP"); 1063dc224822SToshiaki Makita err = -ERANGE; 1064dc224822SToshiaki Makita goto err; 1065dc224822SToshiaki Makita } 1066dc224822SToshiaki Makita 1067638264dcSToshiaki Makita if (dev->real_num_rx_queues < peer->real_num_tx_queues) { 1068638264dcSToshiaki Makita NL_SET_ERR_MSG_MOD(extack, "XDP expects number of rx queues not less than peer tx queues"); 1069638264dcSToshiaki Makita err = -ENOSPC; 1070638264dcSToshiaki Makita goto err; 1071638264dcSToshiaki Makita } 1072638264dcSToshiaki Makita 1073948d4f21SToshiaki Makita if (dev->flags & IFF_UP) { 1074948d4f21SToshiaki Makita err = veth_enable_xdp(dev); 1075948d4f21SToshiaki Makita if (err) { 1076948d4f21SToshiaki Makita NL_SET_ERR_MSG_MOD(extack, "Setup for XDP failed"); 1077948d4f21SToshiaki Makita goto err; 1078948d4f21SToshiaki Makita } 1079948d4f21SToshiaki Makita } 1080dc224822SToshiaki Makita 1081dc224822SToshiaki Makita if (!old_prog) { 1082dc224822SToshiaki Makita peer->hw_features &= ~NETIF_F_GSO_SOFTWARE; 1083dc224822SToshiaki Makita peer->max_mtu = max_mtu; 1084dc224822SToshiaki Makita } 1085948d4f21SToshiaki Makita } 1086948d4f21SToshiaki Makita 1087948d4f21SToshiaki Makita if (old_prog) { 1088dc224822SToshiaki Makita if (!prog) { 1089dc224822SToshiaki Makita if (dev->flags & IFF_UP) 1090948d4f21SToshiaki Makita veth_disable_xdp(dev); 1091dc224822SToshiaki Makita 1092dc224822SToshiaki Makita if (peer) { 1093dc224822SToshiaki Makita peer->hw_features |= NETIF_F_GSO_SOFTWARE; 1094dc224822SToshiaki Makita peer->max_mtu = ETH_MAX_MTU; 1095dc224822SToshiaki Makita } 1096dc224822SToshiaki Makita } 1097948d4f21SToshiaki Makita bpf_prog_put(old_prog); 1098948d4f21SToshiaki Makita } 1099948d4f21SToshiaki Makita 1100dc224822SToshiaki Makita if ((!!old_prog ^ !!prog) && peer) 1101dc224822SToshiaki Makita netdev_update_features(peer); 1102dc224822SToshiaki Makita 1103948d4f21SToshiaki Makita return 0; 1104948d4f21SToshiaki Makita err: 1105948d4f21SToshiaki Makita priv->_xdp_prog = old_prog; 1106948d4f21SToshiaki Makita 1107948d4f21SToshiaki Makita return err; 1108948d4f21SToshiaki Makita } 1109948d4f21SToshiaki Makita 1110948d4f21SToshiaki Makita static u32 veth_xdp_query(struct net_device *dev) 1111948d4f21SToshiaki Makita { 1112948d4f21SToshiaki Makita struct veth_priv *priv = netdev_priv(dev); 1113948d4f21SToshiaki Makita const struct bpf_prog *xdp_prog; 1114948d4f21SToshiaki Makita 1115948d4f21SToshiaki Makita xdp_prog = priv->_xdp_prog; 1116948d4f21SToshiaki Makita if (xdp_prog) 1117948d4f21SToshiaki Makita return xdp_prog->aux->id; 1118948d4f21SToshiaki Makita 1119948d4f21SToshiaki Makita return 0; 1120948d4f21SToshiaki Makita } 1121948d4f21SToshiaki Makita 1122948d4f21SToshiaki Makita static int veth_xdp(struct net_device *dev, struct netdev_bpf *xdp) 1123948d4f21SToshiaki Makita { 1124948d4f21SToshiaki Makita switch (xdp->command) { 1125948d4f21SToshiaki Makita case XDP_SETUP_PROG: 1126948d4f21SToshiaki Makita return veth_xdp_set(dev, xdp->prog, xdp->extack); 1127948d4f21SToshiaki Makita case XDP_QUERY_PROG: 1128948d4f21SToshiaki Makita xdp->prog_id = veth_xdp_query(dev); 1129948d4f21SToshiaki Makita return 0; 1130948d4f21SToshiaki Makita default: 1131948d4f21SToshiaki Makita return -EINVAL; 1132948d4f21SToshiaki Makita } 1133948d4f21SToshiaki Makita } 1134948d4f21SToshiaki Makita 11354456e7bdSStephen Hemminger static const struct net_device_ops veth_netdev_ops = { 11364456e7bdSStephen Hemminger .ndo_init = veth_dev_init, 11374456e7bdSStephen Hemminger .ndo_open = veth_open, 11382cf48a10SEric W. Biederman .ndo_stop = veth_close, 113900829823SStephen Hemminger .ndo_start_xmit = veth_xmit, 11406311cc44Sstephen hemminger .ndo_get_stats64 = veth_get_stats64, 11415c70ef85SGao feng .ndo_set_rx_mode = veth_set_multicast_list, 1142ee923623SDaniel Lezcano .ndo_set_mac_address = eth_mac_addr, 1143bb446c19SWANG Cong #ifdef CONFIG_NET_POLL_CONTROLLER 1144bb446c19SWANG Cong .ndo_poll_controller = veth_poll_controller, 1145bb446c19SWANG Cong #endif 1146a45253bfSNicolas Dichtel .ndo_get_iflink = veth_get_iflink, 1147dc224822SToshiaki Makita .ndo_fix_features = veth_fix_features, 11481a04a821SToshiaki Makita .ndo_features_check = passthru_features_check, 1149163e5292SPaolo Abeni .ndo_set_rx_headroom = veth_set_rx_headroom, 1150948d4f21SToshiaki Makita .ndo_bpf = veth_xdp, 1151af87a3aaSToshiaki Makita .ndo_xdp_xmit = veth_xdp_xmit, 11524456e7bdSStephen Hemminger }; 11534456e7bdSStephen Hemminger 1154732912d7SAlexander Duyck #define VETH_FEATURES (NETIF_F_SG | NETIF_F_FRAGLIST | NETIF_F_HW_CSUM | \ 1155c80fafbbSXin Long NETIF_F_RXCSUM | NETIF_F_SCTP_CRC | NETIF_F_HIGHDMA | \ 1156732912d7SAlexander Duyck NETIF_F_GSO_SOFTWARE | NETIF_F_GSO_ENCAP_ALL | \ 115728d2b136SPatrick McHardy NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_CTAG_RX | \ 115828d2b136SPatrick McHardy NETIF_F_HW_VLAN_STAG_TX | NETIF_F_HW_VLAN_STAG_RX ) 11598093315aSEric Dumazet 1160e314dbdcSPavel Emelyanov static void veth_setup(struct net_device *dev) 1161e314dbdcSPavel Emelyanov { 1162e314dbdcSPavel Emelyanov ether_setup(dev); 1163e314dbdcSPavel Emelyanov 1164550fd08cSNeil Horman dev->priv_flags &= ~IFF_TX_SKB_SHARING; 116523ea5a96SHannes Frederic Sowa dev->priv_flags |= IFF_LIVE_ADDR_CHANGE; 116602f01ec1SPhil Sutter dev->priv_flags |= IFF_NO_QUEUE; 1167163e5292SPaolo Abeni dev->priv_flags |= IFF_PHONY_HEADROOM; 1168550fd08cSNeil Horman 11694456e7bdSStephen Hemminger dev->netdev_ops = &veth_netdev_ops; 1170e314dbdcSPavel Emelyanov dev->ethtool_ops = &veth_ethtool_ops; 1171e314dbdcSPavel Emelyanov dev->features |= NETIF_F_LLTX; 11728093315aSEric Dumazet dev->features |= VETH_FEATURES; 11738d0d21f4SToshiaki Makita dev->vlan_features = dev->features & 11743f8c707bSVlad Yasevich ~(NETIF_F_HW_VLAN_CTAG_TX | 11753f8c707bSVlad Yasevich NETIF_F_HW_VLAN_STAG_TX | 11763f8c707bSVlad Yasevich NETIF_F_HW_VLAN_CTAG_RX | 11773f8c707bSVlad Yasevich NETIF_F_HW_VLAN_STAG_RX); 1178cf124db5SDavid S. Miller dev->needs_free_netdev = true; 1179cf124db5SDavid S. Miller dev->priv_destructor = veth_dev_free; 118091572088SJarod Wilson dev->max_mtu = ETH_MAX_MTU; 1181a2c725faSMichał Mirosław 11828093315aSEric Dumazet dev->hw_features = VETH_FEATURES; 118382d81898SEric Dumazet dev->hw_enc_features = VETH_FEATURES; 1184607fca9aSDavid Ahern dev->mpls_features = NETIF_F_HW_CSUM | NETIF_F_GSO_SOFTWARE; 1185e314dbdcSPavel Emelyanov } 1186e314dbdcSPavel Emelyanov 1187e314dbdcSPavel Emelyanov /* 1188e314dbdcSPavel Emelyanov * netlink interface 1189e314dbdcSPavel Emelyanov */ 1190e314dbdcSPavel Emelyanov 1191a8b8a889SMatthias Schiffer static int veth_validate(struct nlattr *tb[], struct nlattr *data[], 1192a8b8a889SMatthias Schiffer struct netlink_ext_ack *extack) 1193e314dbdcSPavel Emelyanov { 1194e314dbdcSPavel Emelyanov if (tb[IFLA_ADDRESS]) { 1195e314dbdcSPavel Emelyanov if (nla_len(tb[IFLA_ADDRESS]) != ETH_ALEN) 1196e314dbdcSPavel Emelyanov return -EINVAL; 1197e314dbdcSPavel Emelyanov if (!is_valid_ether_addr(nla_data(tb[IFLA_ADDRESS]))) 1198e314dbdcSPavel Emelyanov return -EADDRNOTAVAIL; 1199e314dbdcSPavel Emelyanov } 120038d40815SEric Biederman if (tb[IFLA_MTU]) { 120138d40815SEric Biederman if (!is_valid_veth_mtu(nla_get_u32(tb[IFLA_MTU]))) 120238d40815SEric Biederman return -EINVAL; 120338d40815SEric Biederman } 1204e314dbdcSPavel Emelyanov return 0; 1205e314dbdcSPavel Emelyanov } 1206e314dbdcSPavel Emelyanov 1207e314dbdcSPavel Emelyanov static struct rtnl_link_ops veth_link_ops; 1208e314dbdcSPavel Emelyanov 120981adee47SEric W. Biederman static int veth_newlink(struct net *src_net, struct net_device *dev, 12107a3f4a18SMatthias Schiffer struct nlattr *tb[], struct nlattr *data[], 12117a3f4a18SMatthias Schiffer struct netlink_ext_ack *extack) 1212e314dbdcSPavel Emelyanov { 12137797b93bSToshiaki Makita int err; 1214e314dbdcSPavel Emelyanov struct net_device *peer; 1215e314dbdcSPavel Emelyanov struct veth_priv *priv; 1216e314dbdcSPavel Emelyanov char ifname[IFNAMSIZ]; 1217e314dbdcSPavel Emelyanov struct nlattr *peer_tb[IFLA_MAX + 1], **tbp; 12185517750fSTom Gundersen unsigned char name_assign_type; 12193729d502SPatrick McHardy struct ifinfomsg *ifmp; 122081adee47SEric W. Biederman struct net *net; 1221e314dbdcSPavel Emelyanov 1222e314dbdcSPavel Emelyanov /* 1223e314dbdcSPavel Emelyanov * create and register peer first 1224e314dbdcSPavel Emelyanov */ 1225e314dbdcSPavel Emelyanov if (data != NULL && data[VETH_INFO_PEER] != NULL) { 1226e314dbdcSPavel Emelyanov struct nlattr *nla_peer; 1227e314dbdcSPavel Emelyanov 1228e314dbdcSPavel Emelyanov nla_peer = data[VETH_INFO_PEER]; 12293729d502SPatrick McHardy ifmp = nla_data(nla_peer); 1230f7b12606SJiri Pirko err = rtnl_nla_parse_ifla(peer_tb, 1231e314dbdcSPavel Emelyanov nla_data(nla_peer) + sizeof(struct ifinfomsg), 1232fceb6435SJohannes Berg nla_len(nla_peer) - sizeof(struct ifinfomsg), 1233fceb6435SJohannes Berg NULL); 1234e314dbdcSPavel Emelyanov if (err < 0) 1235e314dbdcSPavel Emelyanov return err; 1236e314dbdcSPavel Emelyanov 1237a8b8a889SMatthias Schiffer err = veth_validate(peer_tb, NULL, extack); 1238e314dbdcSPavel Emelyanov if (err < 0) 1239e314dbdcSPavel Emelyanov return err; 1240e314dbdcSPavel Emelyanov 1241e314dbdcSPavel Emelyanov tbp = peer_tb; 12423729d502SPatrick McHardy } else { 12433729d502SPatrick McHardy ifmp = NULL; 1244e314dbdcSPavel Emelyanov tbp = tb; 12453729d502SPatrick McHardy } 1246e314dbdcSPavel Emelyanov 1247191cdb38SSerhey Popovych if (ifmp && tbp[IFLA_IFNAME]) { 1248e314dbdcSPavel Emelyanov nla_strlcpy(ifname, tbp[IFLA_IFNAME], IFNAMSIZ); 12495517750fSTom Gundersen name_assign_type = NET_NAME_USER; 12505517750fSTom Gundersen } else { 1251e314dbdcSPavel Emelyanov snprintf(ifname, IFNAMSIZ, DRV_NAME "%%d"); 12525517750fSTom Gundersen name_assign_type = NET_NAME_ENUM; 12535517750fSTom Gundersen } 1254e314dbdcSPavel Emelyanov 125581adee47SEric W. Biederman net = rtnl_link_get_net(src_net, tbp); 125681adee47SEric W. Biederman if (IS_ERR(net)) 125781adee47SEric W. Biederman return PTR_ERR(net); 125881adee47SEric W. Biederman 12595517750fSTom Gundersen peer = rtnl_create_link(net, ifname, name_assign_type, 1260d0522f1cSDavid Ahern &veth_link_ops, tbp, extack); 126181adee47SEric W. Biederman if (IS_ERR(peer)) { 126281adee47SEric W. Biederman put_net(net); 1263e314dbdcSPavel Emelyanov return PTR_ERR(peer); 126481adee47SEric W. Biederman } 1265e314dbdcSPavel Emelyanov 1266191cdb38SSerhey Popovych if (!ifmp || !tbp[IFLA_ADDRESS]) 1267f2cedb63SDanny Kukawka eth_hw_addr_random(peer); 1268e314dbdcSPavel Emelyanov 1269e6f8f1a7SPavel Emelyanov if (ifmp && (dev->ifindex != 0)) 1270e6f8f1a7SPavel Emelyanov peer->ifindex = ifmp->ifi_index; 1271e6f8f1a7SPavel Emelyanov 127272d24955SStephen Hemminger peer->gso_max_size = dev->gso_max_size; 127372d24955SStephen Hemminger peer->gso_max_segs = dev->gso_max_segs; 127472d24955SStephen Hemminger 1275e314dbdcSPavel Emelyanov err = register_netdevice(peer); 127681adee47SEric W. Biederman put_net(net); 127781adee47SEric W. Biederman net = NULL; 1278e314dbdcSPavel Emelyanov if (err < 0) 1279e314dbdcSPavel Emelyanov goto err_register_peer; 1280e314dbdcSPavel Emelyanov 1281e314dbdcSPavel Emelyanov netif_carrier_off(peer); 1282e314dbdcSPavel Emelyanov 12833729d502SPatrick McHardy err = rtnl_configure_link(peer, ifmp); 12843729d502SPatrick McHardy if (err < 0) 12853729d502SPatrick McHardy goto err_configure_peer; 12863729d502SPatrick McHardy 1287e314dbdcSPavel Emelyanov /* 1288e314dbdcSPavel Emelyanov * register dev last 1289e314dbdcSPavel Emelyanov * 1290e314dbdcSPavel Emelyanov * note, that since we've registered new device the dev's name 1291e314dbdcSPavel Emelyanov * should be re-allocated 1292e314dbdcSPavel Emelyanov */ 1293e314dbdcSPavel Emelyanov 1294e314dbdcSPavel Emelyanov if (tb[IFLA_ADDRESS] == NULL) 1295f2cedb63SDanny Kukawka eth_hw_addr_random(dev); 1296e314dbdcSPavel Emelyanov 12976c8c4446SJiri Pirko if (tb[IFLA_IFNAME]) 12986c8c4446SJiri Pirko nla_strlcpy(dev->name, tb[IFLA_IFNAME], IFNAMSIZ); 12996c8c4446SJiri Pirko else 13006c8c4446SJiri Pirko snprintf(dev->name, IFNAMSIZ, DRV_NAME "%%d"); 13016c8c4446SJiri Pirko 1302e314dbdcSPavel Emelyanov err = register_netdevice(dev); 1303e314dbdcSPavel Emelyanov if (err < 0) 1304e314dbdcSPavel Emelyanov goto err_register_dev; 1305e314dbdcSPavel Emelyanov 1306e314dbdcSPavel Emelyanov netif_carrier_off(dev); 1307e314dbdcSPavel Emelyanov 1308e314dbdcSPavel Emelyanov /* 1309e314dbdcSPavel Emelyanov * tie the deviced together 1310e314dbdcSPavel Emelyanov */ 1311e314dbdcSPavel Emelyanov 1312e314dbdcSPavel Emelyanov priv = netdev_priv(dev); 1313d0e2c55eSEric Dumazet rcu_assign_pointer(priv->peer, peer); 1314e314dbdcSPavel Emelyanov 1315e314dbdcSPavel Emelyanov priv = netdev_priv(peer); 1316d0e2c55eSEric Dumazet rcu_assign_pointer(priv->peer, dev); 1317948d4f21SToshiaki Makita 1318e314dbdcSPavel Emelyanov return 0; 1319e314dbdcSPavel Emelyanov 1320e314dbdcSPavel Emelyanov err_register_dev: 1321e314dbdcSPavel Emelyanov /* nothing to do */ 13223729d502SPatrick McHardy err_configure_peer: 1323e314dbdcSPavel Emelyanov unregister_netdevice(peer); 1324e314dbdcSPavel Emelyanov return err; 1325e314dbdcSPavel Emelyanov 1326e314dbdcSPavel Emelyanov err_register_peer: 1327e314dbdcSPavel Emelyanov free_netdev(peer); 1328e314dbdcSPavel Emelyanov return err; 1329e314dbdcSPavel Emelyanov } 1330e314dbdcSPavel Emelyanov 133123289a37SEric Dumazet static void veth_dellink(struct net_device *dev, struct list_head *head) 1332e314dbdcSPavel Emelyanov { 1333e314dbdcSPavel Emelyanov struct veth_priv *priv; 1334e314dbdcSPavel Emelyanov struct net_device *peer; 1335e314dbdcSPavel Emelyanov 1336e314dbdcSPavel Emelyanov priv = netdev_priv(dev); 1337d0e2c55eSEric Dumazet peer = rtnl_dereference(priv->peer); 1338d0e2c55eSEric Dumazet 1339d0e2c55eSEric Dumazet /* Note : dellink() is called from default_device_exit_batch(), 1340d0e2c55eSEric Dumazet * before a rcu_synchronize() point. The devices are guaranteed 1341d0e2c55eSEric Dumazet * not being freed before one RCU grace period. 1342d0e2c55eSEric Dumazet */ 1343d0e2c55eSEric Dumazet RCU_INIT_POINTER(priv->peer, NULL); 1344f45a5c26SEric Dumazet unregister_netdevice_queue(dev, head); 1345d0e2c55eSEric Dumazet 1346f45a5c26SEric Dumazet if (peer) { 1347d0e2c55eSEric Dumazet priv = netdev_priv(peer); 1348d0e2c55eSEric Dumazet RCU_INIT_POINTER(priv->peer, NULL); 134924540535SEric Dumazet unregister_netdevice_queue(peer, head); 1350e314dbdcSPavel Emelyanov } 1351f45a5c26SEric Dumazet } 1352e314dbdcSPavel Emelyanov 135323711438SThomas Graf static const struct nla_policy veth_policy[VETH_INFO_MAX + 1] = { 135423711438SThomas Graf [VETH_INFO_PEER] = { .len = sizeof(struct ifinfomsg) }, 135523711438SThomas Graf }; 1356e314dbdcSPavel Emelyanov 1357e5f4e7b9SNicolas Dichtel static struct net *veth_get_link_net(const struct net_device *dev) 1358e5f4e7b9SNicolas Dichtel { 1359e5f4e7b9SNicolas Dichtel struct veth_priv *priv = netdev_priv(dev); 1360e5f4e7b9SNicolas Dichtel struct net_device *peer = rtnl_dereference(priv->peer); 1361e5f4e7b9SNicolas Dichtel 1362e5f4e7b9SNicolas Dichtel return peer ? dev_net(peer) : dev_net(dev); 1363e5f4e7b9SNicolas Dichtel } 1364e5f4e7b9SNicolas Dichtel 1365e314dbdcSPavel Emelyanov static struct rtnl_link_ops veth_link_ops = { 1366e314dbdcSPavel Emelyanov .kind = DRV_NAME, 1367e314dbdcSPavel Emelyanov .priv_size = sizeof(struct veth_priv), 1368e314dbdcSPavel Emelyanov .setup = veth_setup, 1369e314dbdcSPavel Emelyanov .validate = veth_validate, 1370e314dbdcSPavel Emelyanov .newlink = veth_newlink, 1371e314dbdcSPavel Emelyanov .dellink = veth_dellink, 1372e314dbdcSPavel Emelyanov .policy = veth_policy, 1373e314dbdcSPavel Emelyanov .maxtype = VETH_INFO_MAX, 1374e5f4e7b9SNicolas Dichtel .get_link_net = veth_get_link_net, 1375e314dbdcSPavel Emelyanov }; 1376e314dbdcSPavel Emelyanov 1377e314dbdcSPavel Emelyanov /* 1378e314dbdcSPavel Emelyanov * init/fini 1379e314dbdcSPavel Emelyanov */ 1380e314dbdcSPavel Emelyanov 1381e314dbdcSPavel Emelyanov static __init int veth_init(void) 1382e314dbdcSPavel Emelyanov { 1383e314dbdcSPavel Emelyanov return rtnl_link_register(&veth_link_ops); 1384e314dbdcSPavel Emelyanov } 1385e314dbdcSPavel Emelyanov 1386e314dbdcSPavel Emelyanov static __exit void veth_exit(void) 1387e314dbdcSPavel Emelyanov { 138868365458SPatrick McHardy rtnl_link_unregister(&veth_link_ops); 1389e314dbdcSPavel Emelyanov } 1390e314dbdcSPavel Emelyanov 1391e314dbdcSPavel Emelyanov module_init(veth_init); 1392e314dbdcSPavel Emelyanov module_exit(veth_exit); 1393e314dbdcSPavel Emelyanov 1394e314dbdcSPavel Emelyanov MODULE_DESCRIPTION("Virtual Ethernet Tunnel"); 1395e314dbdcSPavel Emelyanov MODULE_LICENSE("GPL v2"); 1396e314dbdcSPavel Emelyanov MODULE_ALIAS_RTNL_LINK(DRV_NAME); 1397