1e314dbdcSPavel Emelyanov /* 2e314dbdcSPavel Emelyanov * drivers/net/veth.c 3e314dbdcSPavel Emelyanov * 4e314dbdcSPavel Emelyanov * Copyright (C) 2007 OpenVZ http://openvz.org, SWsoft Inc 5e314dbdcSPavel Emelyanov * 6e314dbdcSPavel Emelyanov * Author: Pavel Emelianov <xemul@openvz.org> 7e314dbdcSPavel Emelyanov * Ethtool interface from: Eric W. Biederman <ebiederm@xmission.com> 8e314dbdcSPavel Emelyanov * 9e314dbdcSPavel Emelyanov */ 10e314dbdcSPavel Emelyanov 11e314dbdcSPavel Emelyanov #include <linux/netdevice.h> 125a0e3ad6STejun Heo #include <linux/slab.h> 13e314dbdcSPavel Emelyanov #include <linux/ethtool.h> 14e314dbdcSPavel Emelyanov #include <linux/etherdevice.h> 15cf05c700SEric Dumazet #include <linux/u64_stats_sync.h> 16e314dbdcSPavel Emelyanov 17f7b12606SJiri Pirko #include <net/rtnetlink.h> 18e314dbdcSPavel Emelyanov #include <net/dst.h> 19e314dbdcSPavel Emelyanov #include <net/xfrm.h> 20af87a3aaSToshiaki Makita #include <net/xdp.h> 21ecef969eSStephen Hemminger #include <linux/veth.h> 229d9779e7SPaul Gortmaker #include <linux/module.h> 23948d4f21SToshiaki Makita #include <linux/bpf.h> 24948d4f21SToshiaki Makita #include <linux/filter.h> 25948d4f21SToshiaki Makita #include <linux/ptr_ring.h> 26948d4f21SToshiaki Makita #include <linux/bpf_trace.h> 27aa4e689eSMichael Walle #include <linux/net_tstamp.h> 28e314dbdcSPavel Emelyanov 29e314dbdcSPavel Emelyanov #define DRV_NAME "veth" 30e314dbdcSPavel Emelyanov #define DRV_VERSION "1.0" 31e314dbdcSPavel Emelyanov 329fc8d518SToshiaki Makita #define VETH_XDP_FLAG BIT(0) 33948d4f21SToshiaki Makita #define VETH_RING_SIZE 256 34948d4f21SToshiaki Makita #define VETH_XDP_HEADROOM (XDP_PACKET_HEADROOM + NET_IP_ALIGN) 35948d4f21SToshiaki Makita 36d1396004SToshiaki Makita /* Separating two types of XDP xmit */ 37d1396004SToshiaki Makita #define VETH_XDP_TX BIT(0) 38d1396004SToshiaki Makita #define VETH_XDP_REDIR BIT(1) 39d1396004SToshiaki Makita 402681128fSEric Dumazet struct pcpu_vstats { 412681128fSEric Dumazet u64 packets; 422681128fSEric Dumazet u64 bytes; 43cf05c700SEric Dumazet struct u64_stats_sync syncp; 44e314dbdcSPavel Emelyanov }; 45e314dbdcSPavel Emelyanov 46638264dcSToshiaki Makita struct veth_rq { 47948d4f21SToshiaki Makita struct napi_struct xdp_napi; 48948d4f21SToshiaki Makita struct net_device *dev; 49948d4f21SToshiaki Makita struct bpf_prog __rcu *xdp_prog; 50d1396004SToshiaki Makita struct xdp_mem_info xdp_mem; 51948d4f21SToshiaki Makita bool rx_notify_masked; 52948d4f21SToshiaki Makita struct ptr_ring xdp_ring; 53948d4f21SToshiaki Makita struct xdp_rxq_info xdp_rxq; 54e314dbdcSPavel Emelyanov }; 55e314dbdcSPavel Emelyanov 56638264dcSToshiaki Makita struct veth_priv { 57638264dcSToshiaki Makita struct net_device __rcu *peer; 58638264dcSToshiaki Makita atomic64_t dropped; 59638264dcSToshiaki Makita struct bpf_prog *_xdp_prog; 60638264dcSToshiaki Makita struct veth_rq *rq; 61638264dcSToshiaki Makita unsigned int requested_headroom; 62638264dcSToshiaki Makita }; 63638264dcSToshiaki Makita 64e314dbdcSPavel Emelyanov /* 65e314dbdcSPavel Emelyanov * ethtool interface 66e314dbdcSPavel Emelyanov */ 67e314dbdcSPavel Emelyanov 68e314dbdcSPavel Emelyanov static struct { 69e314dbdcSPavel Emelyanov const char string[ETH_GSTRING_LEN]; 70e314dbdcSPavel Emelyanov } ethtool_stats_keys[] = { 71e314dbdcSPavel Emelyanov { "peer_ifindex" }, 72e314dbdcSPavel Emelyanov }; 73e314dbdcSPavel Emelyanov 7456607b98SPhilippe Reynes static int veth_get_link_ksettings(struct net_device *dev, 7556607b98SPhilippe Reynes struct ethtool_link_ksettings *cmd) 76e314dbdcSPavel Emelyanov { 7756607b98SPhilippe Reynes cmd->base.speed = SPEED_10000; 7856607b98SPhilippe Reynes cmd->base.duplex = DUPLEX_FULL; 7956607b98SPhilippe Reynes cmd->base.port = PORT_TP; 8056607b98SPhilippe Reynes cmd->base.autoneg = AUTONEG_DISABLE; 81e314dbdcSPavel Emelyanov return 0; 82e314dbdcSPavel Emelyanov } 83e314dbdcSPavel Emelyanov 84e314dbdcSPavel Emelyanov static void veth_get_drvinfo(struct net_device *dev, struct ethtool_drvinfo *info) 85e314dbdcSPavel Emelyanov { 8633a5ba14SRick Jones strlcpy(info->driver, DRV_NAME, sizeof(info->driver)); 8733a5ba14SRick Jones strlcpy(info->version, DRV_VERSION, sizeof(info->version)); 88e314dbdcSPavel Emelyanov } 89e314dbdcSPavel Emelyanov 90e314dbdcSPavel Emelyanov static void veth_get_strings(struct net_device *dev, u32 stringset, u8 *buf) 91e314dbdcSPavel Emelyanov { 92e314dbdcSPavel Emelyanov switch(stringset) { 93e314dbdcSPavel Emelyanov case ETH_SS_STATS: 94e314dbdcSPavel Emelyanov memcpy(buf, ðtool_stats_keys, sizeof(ethtool_stats_keys)); 95e314dbdcSPavel Emelyanov break; 96e314dbdcSPavel Emelyanov } 97e314dbdcSPavel Emelyanov } 98e314dbdcSPavel Emelyanov 99b9f2c044SJeff Garzik static int veth_get_sset_count(struct net_device *dev, int sset) 100e314dbdcSPavel Emelyanov { 101b9f2c044SJeff Garzik switch (sset) { 102b9f2c044SJeff Garzik case ETH_SS_STATS: 103e314dbdcSPavel Emelyanov return ARRAY_SIZE(ethtool_stats_keys); 104b9f2c044SJeff Garzik default: 105b9f2c044SJeff Garzik return -EOPNOTSUPP; 106b9f2c044SJeff Garzik } 107e314dbdcSPavel Emelyanov } 108e314dbdcSPavel Emelyanov 109e314dbdcSPavel Emelyanov static void veth_get_ethtool_stats(struct net_device *dev, 110e314dbdcSPavel Emelyanov struct ethtool_stats *stats, u64 *data) 111e314dbdcSPavel Emelyanov { 112d0e2c55eSEric Dumazet struct veth_priv *priv = netdev_priv(dev); 113d0e2c55eSEric Dumazet struct net_device *peer = rtnl_dereference(priv->peer); 114e314dbdcSPavel Emelyanov 115d0e2c55eSEric Dumazet data[0] = peer ? peer->ifindex : 0; 116e314dbdcSPavel Emelyanov } 117e314dbdcSPavel Emelyanov 118aa4e689eSMichael Walle static int veth_get_ts_info(struct net_device *dev, 119aa4e689eSMichael Walle struct ethtool_ts_info *info) 120aa4e689eSMichael Walle { 121aa4e689eSMichael Walle info->so_timestamping = 122aa4e689eSMichael Walle SOF_TIMESTAMPING_TX_SOFTWARE | 123aa4e689eSMichael Walle SOF_TIMESTAMPING_RX_SOFTWARE | 124aa4e689eSMichael Walle SOF_TIMESTAMPING_SOFTWARE; 125aa4e689eSMichael Walle info->phc_index = -1; 126aa4e689eSMichael Walle 127aa4e689eSMichael Walle return 0; 128aa4e689eSMichael Walle } 129aa4e689eSMichael Walle 1300fc0b732SStephen Hemminger static const struct ethtool_ops veth_ethtool_ops = { 131e314dbdcSPavel Emelyanov .get_drvinfo = veth_get_drvinfo, 132e314dbdcSPavel Emelyanov .get_link = ethtool_op_get_link, 133e314dbdcSPavel Emelyanov .get_strings = veth_get_strings, 134b9f2c044SJeff Garzik .get_sset_count = veth_get_sset_count, 135e314dbdcSPavel Emelyanov .get_ethtool_stats = veth_get_ethtool_stats, 13656607b98SPhilippe Reynes .get_link_ksettings = veth_get_link_ksettings, 137aa4e689eSMichael Walle .get_ts_info = veth_get_ts_info, 138e314dbdcSPavel Emelyanov }; 139e314dbdcSPavel Emelyanov 140948d4f21SToshiaki Makita /* general routines */ 141948d4f21SToshiaki Makita 1429fc8d518SToshiaki Makita static bool veth_is_xdp_frame(void *ptr) 1439fc8d518SToshiaki Makita { 1449fc8d518SToshiaki Makita return (unsigned long)ptr & VETH_XDP_FLAG; 1459fc8d518SToshiaki Makita } 1469fc8d518SToshiaki Makita 1479fc8d518SToshiaki Makita static void *veth_ptr_to_xdp(void *ptr) 1489fc8d518SToshiaki Makita { 1499fc8d518SToshiaki Makita return (void *)((unsigned long)ptr & ~VETH_XDP_FLAG); 1509fc8d518SToshiaki Makita } 1519fc8d518SToshiaki Makita 152af87a3aaSToshiaki Makita static void *veth_xdp_to_ptr(void *ptr) 153af87a3aaSToshiaki Makita { 154af87a3aaSToshiaki Makita return (void *)((unsigned long)ptr | VETH_XDP_FLAG); 155af87a3aaSToshiaki Makita } 156af87a3aaSToshiaki Makita 1579fc8d518SToshiaki Makita static void veth_ptr_free(void *ptr) 1589fc8d518SToshiaki Makita { 1599fc8d518SToshiaki Makita if (veth_is_xdp_frame(ptr)) 1609fc8d518SToshiaki Makita xdp_return_frame(veth_ptr_to_xdp(ptr)); 1619fc8d518SToshiaki Makita else 1629fc8d518SToshiaki Makita kfree_skb(ptr); 1639fc8d518SToshiaki Makita } 1649fc8d518SToshiaki Makita 165638264dcSToshiaki Makita static void __veth_xdp_flush(struct veth_rq *rq) 166948d4f21SToshiaki Makita { 167948d4f21SToshiaki Makita /* Write ptr_ring before reading rx_notify_masked */ 168948d4f21SToshiaki Makita smp_mb(); 169638264dcSToshiaki Makita if (!rq->rx_notify_masked) { 170638264dcSToshiaki Makita rq->rx_notify_masked = true; 171638264dcSToshiaki Makita napi_schedule(&rq->xdp_napi); 172948d4f21SToshiaki Makita } 173948d4f21SToshiaki Makita } 174948d4f21SToshiaki Makita 175638264dcSToshiaki Makita static int veth_xdp_rx(struct veth_rq *rq, struct sk_buff *skb) 176948d4f21SToshiaki Makita { 177638264dcSToshiaki Makita if (unlikely(ptr_ring_produce(&rq->xdp_ring, skb))) { 178948d4f21SToshiaki Makita dev_kfree_skb_any(skb); 179948d4f21SToshiaki Makita return NET_RX_DROP; 180948d4f21SToshiaki Makita } 181948d4f21SToshiaki Makita 182948d4f21SToshiaki Makita return NET_RX_SUCCESS; 183948d4f21SToshiaki Makita } 184948d4f21SToshiaki Makita 185638264dcSToshiaki Makita static int veth_forward_skb(struct net_device *dev, struct sk_buff *skb, 186638264dcSToshiaki Makita struct veth_rq *rq, bool xdp) 187e314dbdcSPavel Emelyanov { 188948d4f21SToshiaki Makita return __dev_forward_skb(dev, skb) ?: xdp ? 189638264dcSToshiaki Makita veth_xdp_rx(rq, skb) : 190948d4f21SToshiaki Makita netif_rx(skb); 191948d4f21SToshiaki Makita } 192948d4f21SToshiaki Makita 193948d4f21SToshiaki Makita static netdev_tx_t veth_xmit(struct sk_buff *skb, struct net_device *dev) 194948d4f21SToshiaki Makita { 195948d4f21SToshiaki Makita struct veth_priv *rcv_priv, *priv = netdev_priv(dev); 196638264dcSToshiaki Makita struct veth_rq *rq = NULL; 197d0e2c55eSEric Dumazet struct net_device *rcv; 1982681128fSEric Dumazet int length = skb->len; 199948d4f21SToshiaki Makita bool rcv_xdp = false; 200638264dcSToshiaki Makita int rxq; 201e314dbdcSPavel Emelyanov 202d0e2c55eSEric Dumazet rcu_read_lock(); 203d0e2c55eSEric Dumazet rcv = rcu_dereference(priv->peer); 204d0e2c55eSEric Dumazet if (unlikely(!rcv)) { 205d0e2c55eSEric Dumazet kfree_skb(skb); 206d0e2c55eSEric Dumazet goto drop; 207d0e2c55eSEric Dumazet } 208e314dbdcSPavel Emelyanov 209948d4f21SToshiaki Makita rcv_priv = netdev_priv(rcv); 210638264dcSToshiaki Makita rxq = skb_get_queue_mapping(skb); 211638264dcSToshiaki Makita if (rxq < rcv->real_num_rx_queues) { 212638264dcSToshiaki Makita rq = &rcv_priv->rq[rxq]; 213638264dcSToshiaki Makita rcv_xdp = rcu_access_pointer(rq->xdp_prog); 214638264dcSToshiaki Makita if (rcv_xdp) 215638264dcSToshiaki Makita skb_record_rx_queue(skb, rxq); 216638264dcSToshiaki Makita } 217948d4f21SToshiaki Makita 218aa4e689eSMichael Walle skb_tx_timestamp(skb); 219638264dcSToshiaki Makita if (likely(veth_forward_skb(rcv, skb, rq, rcv_xdp) == NET_RX_SUCCESS)) { 2202681128fSEric Dumazet struct pcpu_vstats *stats = this_cpu_ptr(dev->vstats); 221e314dbdcSPavel Emelyanov 222cf05c700SEric Dumazet u64_stats_update_begin(&stats->syncp); 2232681128fSEric Dumazet stats->bytes += length; 2242681128fSEric Dumazet stats->packets++; 225cf05c700SEric Dumazet u64_stats_update_end(&stats->syncp); 2262681128fSEric Dumazet } else { 227d0e2c55eSEric Dumazet drop: 2282681128fSEric Dumazet atomic64_inc(&priv->dropped); 2292681128fSEric Dumazet } 230948d4f21SToshiaki Makita 231948d4f21SToshiaki Makita if (rcv_xdp) 232638264dcSToshiaki Makita __veth_xdp_flush(rq); 233948d4f21SToshiaki Makita 234d0e2c55eSEric Dumazet rcu_read_unlock(); 235948d4f21SToshiaki Makita 2366ed10654SPatrick McHardy return NETDEV_TX_OK; 237e314dbdcSPavel Emelyanov } 238e314dbdcSPavel Emelyanov 2392681128fSEric Dumazet static u64 veth_stats_one(struct pcpu_vstats *result, struct net_device *dev) 240e314dbdcSPavel Emelyanov { 241cf05c700SEric Dumazet struct veth_priv *priv = netdev_priv(dev); 24211687a10SDavid S. Miller int cpu; 24311687a10SDavid S. Miller 2442681128fSEric Dumazet result->packets = 0; 2452681128fSEric Dumazet result->bytes = 0; 2462b1c8b0fSEric Dumazet for_each_possible_cpu(cpu) { 2472681128fSEric Dumazet struct pcpu_vstats *stats = per_cpu_ptr(dev->vstats, cpu); 2482681128fSEric Dumazet u64 packets, bytes; 249cf05c700SEric Dumazet unsigned int start; 250e314dbdcSPavel Emelyanov 251cf05c700SEric Dumazet do { 25257a7744eSEric W. Biederman start = u64_stats_fetch_begin_irq(&stats->syncp); 2532681128fSEric Dumazet packets = stats->packets; 2542681128fSEric Dumazet bytes = stats->bytes; 25557a7744eSEric W. Biederman } while (u64_stats_fetch_retry_irq(&stats->syncp, start)); 2562681128fSEric Dumazet result->packets += packets; 2572681128fSEric Dumazet result->bytes += bytes; 258e314dbdcSPavel Emelyanov } 2592681128fSEric Dumazet return atomic64_read(&priv->dropped); 2602681128fSEric Dumazet } 2612681128fSEric Dumazet 262bc1f4470Sstephen hemminger static void veth_get_stats64(struct net_device *dev, 2632681128fSEric Dumazet struct rtnl_link_stats64 *tot) 2642681128fSEric Dumazet { 2652681128fSEric Dumazet struct veth_priv *priv = netdev_priv(dev); 266d0e2c55eSEric Dumazet struct net_device *peer; 2672681128fSEric Dumazet struct pcpu_vstats one; 2682681128fSEric Dumazet 2692681128fSEric Dumazet tot->tx_dropped = veth_stats_one(&one, dev); 2702681128fSEric Dumazet tot->tx_bytes = one.bytes; 2712681128fSEric Dumazet tot->tx_packets = one.packets; 2722681128fSEric Dumazet 273d0e2c55eSEric Dumazet rcu_read_lock(); 274d0e2c55eSEric Dumazet peer = rcu_dereference(priv->peer); 275d0e2c55eSEric Dumazet if (peer) { 276d0e2c55eSEric Dumazet tot->rx_dropped = veth_stats_one(&one, peer); 2772681128fSEric Dumazet tot->rx_bytes = one.bytes; 2782681128fSEric Dumazet tot->rx_packets = one.packets; 279d0e2c55eSEric Dumazet } 280d0e2c55eSEric Dumazet rcu_read_unlock(); 281e314dbdcSPavel Emelyanov } 282e314dbdcSPavel Emelyanov 2835c70ef85SGao feng /* fake multicast ability */ 2845c70ef85SGao feng static void veth_set_multicast_list(struct net_device *dev) 2855c70ef85SGao feng { 2865c70ef85SGao feng } 2875c70ef85SGao feng 288948d4f21SToshiaki Makita static struct sk_buff *veth_build_skb(void *head, int headroom, int len, 289948d4f21SToshiaki Makita int buflen) 290948d4f21SToshiaki Makita { 291948d4f21SToshiaki Makita struct sk_buff *skb; 292948d4f21SToshiaki Makita 293948d4f21SToshiaki Makita if (!buflen) { 294948d4f21SToshiaki Makita buflen = SKB_DATA_ALIGN(headroom + len) + 295948d4f21SToshiaki Makita SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); 296948d4f21SToshiaki Makita } 297948d4f21SToshiaki Makita skb = build_skb(head, buflen); 298948d4f21SToshiaki Makita if (!skb) 299948d4f21SToshiaki Makita return NULL; 300948d4f21SToshiaki Makita 301948d4f21SToshiaki Makita skb_reserve(skb, headroom); 302948d4f21SToshiaki Makita skb_put(skb, len); 303948d4f21SToshiaki Makita 304948d4f21SToshiaki Makita return skb; 305948d4f21SToshiaki Makita } 306948d4f21SToshiaki Makita 307638264dcSToshiaki Makita static int veth_select_rxq(struct net_device *dev) 308638264dcSToshiaki Makita { 309638264dcSToshiaki Makita return smp_processor_id() % dev->real_num_rx_queues; 310638264dcSToshiaki Makita } 311638264dcSToshiaki Makita 312af87a3aaSToshiaki Makita static int veth_xdp_xmit(struct net_device *dev, int n, 313af87a3aaSToshiaki Makita struct xdp_frame **frames, u32 flags) 314af87a3aaSToshiaki Makita { 315af87a3aaSToshiaki Makita struct veth_priv *rcv_priv, *priv = netdev_priv(dev); 316af87a3aaSToshiaki Makita struct net_device *rcv; 317af87a3aaSToshiaki Makita unsigned int max_len; 318638264dcSToshiaki Makita struct veth_rq *rq; 319af87a3aaSToshiaki Makita int i, drops = 0; 320af87a3aaSToshiaki Makita 321af87a3aaSToshiaki Makita if (unlikely(flags & ~XDP_XMIT_FLAGS_MASK)) 322af87a3aaSToshiaki Makita return -EINVAL; 323af87a3aaSToshiaki Makita 324af87a3aaSToshiaki Makita rcv = rcu_dereference(priv->peer); 325af87a3aaSToshiaki Makita if (unlikely(!rcv)) 326af87a3aaSToshiaki Makita return -ENXIO; 327af87a3aaSToshiaki Makita 328af87a3aaSToshiaki Makita rcv_priv = netdev_priv(rcv); 329638264dcSToshiaki Makita rq = &rcv_priv->rq[veth_select_rxq(rcv)]; 330af87a3aaSToshiaki Makita /* Non-NULL xdp_prog ensures that xdp_ring is initialized on receive 331af87a3aaSToshiaki Makita * side. This means an XDP program is loaded on the peer and the peer 332af87a3aaSToshiaki Makita * device is up. 333af87a3aaSToshiaki Makita */ 334638264dcSToshiaki Makita if (!rcu_access_pointer(rq->xdp_prog)) 335af87a3aaSToshiaki Makita return -ENXIO; 336af87a3aaSToshiaki Makita 337af87a3aaSToshiaki Makita max_len = rcv->mtu + rcv->hard_header_len + VLAN_HLEN; 338af87a3aaSToshiaki Makita 339638264dcSToshiaki Makita spin_lock(&rq->xdp_ring.producer_lock); 340af87a3aaSToshiaki Makita for (i = 0; i < n; i++) { 341af87a3aaSToshiaki Makita struct xdp_frame *frame = frames[i]; 342af87a3aaSToshiaki Makita void *ptr = veth_xdp_to_ptr(frame); 343af87a3aaSToshiaki Makita 344af87a3aaSToshiaki Makita if (unlikely(frame->len > max_len || 345638264dcSToshiaki Makita __ptr_ring_produce(&rq->xdp_ring, ptr))) { 346af87a3aaSToshiaki Makita xdp_return_frame_rx_napi(frame); 347af87a3aaSToshiaki Makita drops++; 348af87a3aaSToshiaki Makita } 349af87a3aaSToshiaki Makita } 350638264dcSToshiaki Makita spin_unlock(&rq->xdp_ring.producer_lock); 351af87a3aaSToshiaki Makita 352af87a3aaSToshiaki Makita if (flags & XDP_XMIT_FLUSH) 353638264dcSToshiaki Makita __veth_xdp_flush(rq); 354af87a3aaSToshiaki Makita 355af87a3aaSToshiaki Makita return n - drops; 356af87a3aaSToshiaki Makita } 357af87a3aaSToshiaki Makita 358d1396004SToshiaki Makita static void veth_xdp_flush(struct net_device *dev) 359d1396004SToshiaki Makita { 360d1396004SToshiaki Makita struct veth_priv *rcv_priv, *priv = netdev_priv(dev); 361d1396004SToshiaki Makita struct net_device *rcv; 362638264dcSToshiaki Makita struct veth_rq *rq; 363d1396004SToshiaki Makita 364d1396004SToshiaki Makita rcu_read_lock(); 365d1396004SToshiaki Makita rcv = rcu_dereference(priv->peer); 366d1396004SToshiaki Makita if (unlikely(!rcv)) 367d1396004SToshiaki Makita goto out; 368d1396004SToshiaki Makita 369d1396004SToshiaki Makita rcv_priv = netdev_priv(rcv); 370638264dcSToshiaki Makita rq = &rcv_priv->rq[veth_select_rxq(rcv)]; 371d1396004SToshiaki Makita /* xdp_ring is initialized on receive side? */ 372638264dcSToshiaki Makita if (unlikely(!rcu_access_pointer(rq->xdp_prog))) 373d1396004SToshiaki Makita goto out; 374d1396004SToshiaki Makita 375638264dcSToshiaki Makita __veth_xdp_flush(rq); 376d1396004SToshiaki Makita out: 377d1396004SToshiaki Makita rcu_read_unlock(); 378d1396004SToshiaki Makita } 379d1396004SToshiaki Makita 380d1396004SToshiaki Makita static int veth_xdp_tx(struct net_device *dev, struct xdp_buff *xdp) 381d1396004SToshiaki Makita { 382d1396004SToshiaki Makita struct xdp_frame *frame = convert_to_xdp_frame(xdp); 383d1396004SToshiaki Makita 384d1396004SToshiaki Makita if (unlikely(!frame)) 385d1396004SToshiaki Makita return -EOVERFLOW; 386d1396004SToshiaki Makita 387d1396004SToshiaki Makita return veth_xdp_xmit(dev, 1, &frame, 0); 388d1396004SToshiaki Makita } 389d1396004SToshiaki Makita 390638264dcSToshiaki Makita static struct sk_buff *veth_xdp_rcv_one(struct veth_rq *rq, 391d1396004SToshiaki Makita struct xdp_frame *frame, 392d1396004SToshiaki Makita unsigned int *xdp_xmit) 3939fc8d518SToshiaki Makita { 3949fc8d518SToshiaki Makita void *hard_start = frame->data - frame->headroom; 3959fc8d518SToshiaki Makita void *head = hard_start - sizeof(struct xdp_frame); 3969fc8d518SToshiaki Makita int len = frame->len, delta = 0; 397d1396004SToshiaki Makita struct xdp_frame orig_frame; 3989fc8d518SToshiaki Makita struct bpf_prog *xdp_prog; 3999fc8d518SToshiaki Makita unsigned int headroom; 4009fc8d518SToshiaki Makita struct sk_buff *skb; 4019fc8d518SToshiaki Makita 4029fc8d518SToshiaki Makita rcu_read_lock(); 403638264dcSToshiaki Makita xdp_prog = rcu_dereference(rq->xdp_prog); 4049fc8d518SToshiaki Makita if (likely(xdp_prog)) { 4059fc8d518SToshiaki Makita struct xdp_buff xdp; 4069fc8d518SToshiaki Makita u32 act; 4079fc8d518SToshiaki Makita 4089fc8d518SToshiaki Makita xdp.data_hard_start = hard_start; 4099fc8d518SToshiaki Makita xdp.data = frame->data; 4109fc8d518SToshiaki Makita xdp.data_end = frame->data + frame->len; 4119fc8d518SToshiaki Makita xdp.data_meta = frame->data - frame->metasize; 412638264dcSToshiaki Makita xdp.rxq = &rq->xdp_rxq; 4139fc8d518SToshiaki Makita 4149fc8d518SToshiaki Makita act = bpf_prog_run_xdp(xdp_prog, &xdp); 4159fc8d518SToshiaki Makita 4169fc8d518SToshiaki Makita switch (act) { 4179fc8d518SToshiaki Makita case XDP_PASS: 4189fc8d518SToshiaki Makita delta = frame->data - xdp.data; 4199fc8d518SToshiaki Makita len = xdp.data_end - xdp.data; 4209fc8d518SToshiaki Makita break; 421d1396004SToshiaki Makita case XDP_TX: 422d1396004SToshiaki Makita orig_frame = *frame; 423d1396004SToshiaki Makita xdp.data_hard_start = head; 424d1396004SToshiaki Makita xdp.rxq->mem = frame->mem; 425638264dcSToshiaki Makita if (unlikely(veth_xdp_tx(rq->dev, &xdp) < 0)) { 426638264dcSToshiaki Makita trace_xdp_exception(rq->dev, xdp_prog, act); 427d1396004SToshiaki Makita frame = &orig_frame; 428d1396004SToshiaki Makita goto err_xdp; 429d1396004SToshiaki Makita } 430d1396004SToshiaki Makita *xdp_xmit |= VETH_XDP_TX; 431d1396004SToshiaki Makita rcu_read_unlock(); 432d1396004SToshiaki Makita goto xdp_xmit; 433d1396004SToshiaki Makita case XDP_REDIRECT: 434d1396004SToshiaki Makita orig_frame = *frame; 435d1396004SToshiaki Makita xdp.data_hard_start = head; 436d1396004SToshiaki Makita xdp.rxq->mem = frame->mem; 437638264dcSToshiaki Makita if (xdp_do_redirect(rq->dev, &xdp, xdp_prog)) { 438d1396004SToshiaki Makita frame = &orig_frame; 439d1396004SToshiaki Makita goto err_xdp; 440d1396004SToshiaki Makita } 441d1396004SToshiaki Makita *xdp_xmit |= VETH_XDP_REDIR; 442d1396004SToshiaki Makita rcu_read_unlock(); 443d1396004SToshiaki Makita goto xdp_xmit; 4449fc8d518SToshiaki Makita default: 4459fc8d518SToshiaki Makita bpf_warn_invalid_xdp_action(act); 4469fc8d518SToshiaki Makita case XDP_ABORTED: 447638264dcSToshiaki Makita trace_xdp_exception(rq->dev, xdp_prog, act); 4489fc8d518SToshiaki Makita case XDP_DROP: 4499fc8d518SToshiaki Makita goto err_xdp; 4509fc8d518SToshiaki Makita } 4519fc8d518SToshiaki Makita } 4529fc8d518SToshiaki Makita rcu_read_unlock(); 4539fc8d518SToshiaki Makita 4549fc8d518SToshiaki Makita headroom = sizeof(struct xdp_frame) + frame->headroom - delta; 4559fc8d518SToshiaki Makita skb = veth_build_skb(head, headroom, len, 0); 4569fc8d518SToshiaki Makita if (!skb) { 4579fc8d518SToshiaki Makita xdp_return_frame(frame); 4589fc8d518SToshiaki Makita goto err; 4599fc8d518SToshiaki Makita } 4609fc8d518SToshiaki Makita 4619fc8d518SToshiaki Makita xdp_scrub_frame(frame); 462638264dcSToshiaki Makita skb->protocol = eth_type_trans(skb, rq->dev); 4639fc8d518SToshiaki Makita err: 4649fc8d518SToshiaki Makita return skb; 4659fc8d518SToshiaki Makita err_xdp: 4669fc8d518SToshiaki Makita rcu_read_unlock(); 4679fc8d518SToshiaki Makita xdp_return_frame(frame); 468d1396004SToshiaki Makita xdp_xmit: 4699fc8d518SToshiaki Makita return NULL; 4709fc8d518SToshiaki Makita } 4719fc8d518SToshiaki Makita 472638264dcSToshiaki Makita static struct sk_buff *veth_xdp_rcv_skb(struct veth_rq *rq, struct sk_buff *skb, 473d1396004SToshiaki Makita unsigned int *xdp_xmit) 474948d4f21SToshiaki Makita { 475948d4f21SToshiaki Makita u32 pktlen, headroom, act, metalen; 476948d4f21SToshiaki Makita void *orig_data, *orig_data_end; 477948d4f21SToshiaki Makita struct bpf_prog *xdp_prog; 478948d4f21SToshiaki Makita int mac_len, delta, off; 479948d4f21SToshiaki Makita struct xdp_buff xdp; 480948d4f21SToshiaki Makita 481948d4f21SToshiaki Makita rcu_read_lock(); 482638264dcSToshiaki Makita xdp_prog = rcu_dereference(rq->xdp_prog); 483948d4f21SToshiaki Makita if (unlikely(!xdp_prog)) { 484948d4f21SToshiaki Makita rcu_read_unlock(); 485948d4f21SToshiaki Makita goto out; 486948d4f21SToshiaki Makita } 487948d4f21SToshiaki Makita 488948d4f21SToshiaki Makita mac_len = skb->data - skb_mac_header(skb); 489948d4f21SToshiaki Makita pktlen = skb->len + mac_len; 490948d4f21SToshiaki Makita headroom = skb_headroom(skb) - mac_len; 491948d4f21SToshiaki Makita 492948d4f21SToshiaki Makita if (skb_shared(skb) || skb_head_is_locked(skb) || 493948d4f21SToshiaki Makita skb_is_nonlinear(skb) || headroom < XDP_PACKET_HEADROOM) { 494948d4f21SToshiaki Makita struct sk_buff *nskb; 495948d4f21SToshiaki Makita int size, head_off; 496948d4f21SToshiaki Makita void *head, *start; 497948d4f21SToshiaki Makita struct page *page; 498948d4f21SToshiaki Makita 499948d4f21SToshiaki Makita size = SKB_DATA_ALIGN(VETH_XDP_HEADROOM + pktlen) + 500948d4f21SToshiaki Makita SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); 501948d4f21SToshiaki Makita if (size > PAGE_SIZE) 502948d4f21SToshiaki Makita goto drop; 503948d4f21SToshiaki Makita 504948d4f21SToshiaki Makita page = alloc_page(GFP_ATOMIC | __GFP_NOWARN); 505948d4f21SToshiaki Makita if (!page) 506948d4f21SToshiaki Makita goto drop; 507948d4f21SToshiaki Makita 508948d4f21SToshiaki Makita head = page_address(page); 509948d4f21SToshiaki Makita start = head + VETH_XDP_HEADROOM; 510948d4f21SToshiaki Makita if (skb_copy_bits(skb, -mac_len, start, pktlen)) { 511948d4f21SToshiaki Makita page_frag_free(head); 512948d4f21SToshiaki Makita goto drop; 513948d4f21SToshiaki Makita } 514948d4f21SToshiaki Makita 515948d4f21SToshiaki Makita nskb = veth_build_skb(head, 516948d4f21SToshiaki Makita VETH_XDP_HEADROOM + mac_len, skb->len, 517948d4f21SToshiaki Makita PAGE_SIZE); 518948d4f21SToshiaki Makita if (!nskb) { 519948d4f21SToshiaki Makita page_frag_free(head); 520948d4f21SToshiaki Makita goto drop; 521948d4f21SToshiaki Makita } 522948d4f21SToshiaki Makita 523948d4f21SToshiaki Makita skb_copy_header(nskb, skb); 524948d4f21SToshiaki Makita head_off = skb_headroom(nskb) - skb_headroom(skb); 525948d4f21SToshiaki Makita skb_headers_offset_update(nskb, head_off); 526948d4f21SToshiaki Makita if (skb->sk) 527948d4f21SToshiaki Makita skb_set_owner_w(nskb, skb->sk); 528948d4f21SToshiaki Makita consume_skb(skb); 529948d4f21SToshiaki Makita skb = nskb; 530948d4f21SToshiaki Makita } 531948d4f21SToshiaki Makita 532948d4f21SToshiaki Makita xdp.data_hard_start = skb->head; 533948d4f21SToshiaki Makita xdp.data = skb_mac_header(skb); 534948d4f21SToshiaki Makita xdp.data_end = xdp.data + pktlen; 535948d4f21SToshiaki Makita xdp.data_meta = xdp.data; 536638264dcSToshiaki Makita xdp.rxq = &rq->xdp_rxq; 537948d4f21SToshiaki Makita orig_data = xdp.data; 538948d4f21SToshiaki Makita orig_data_end = xdp.data_end; 539948d4f21SToshiaki Makita 540948d4f21SToshiaki Makita act = bpf_prog_run_xdp(xdp_prog, &xdp); 541948d4f21SToshiaki Makita 542948d4f21SToshiaki Makita switch (act) { 543948d4f21SToshiaki Makita case XDP_PASS: 544948d4f21SToshiaki Makita break; 545d1396004SToshiaki Makita case XDP_TX: 546d1396004SToshiaki Makita get_page(virt_to_page(xdp.data)); 547d1396004SToshiaki Makita consume_skb(skb); 548638264dcSToshiaki Makita xdp.rxq->mem = rq->xdp_mem; 549638264dcSToshiaki Makita if (unlikely(veth_xdp_tx(rq->dev, &xdp) < 0)) { 550638264dcSToshiaki Makita trace_xdp_exception(rq->dev, xdp_prog, act); 551d1396004SToshiaki Makita goto err_xdp; 552d1396004SToshiaki Makita } 553d1396004SToshiaki Makita *xdp_xmit |= VETH_XDP_TX; 554d1396004SToshiaki Makita rcu_read_unlock(); 555d1396004SToshiaki Makita goto xdp_xmit; 556d1396004SToshiaki Makita case XDP_REDIRECT: 557d1396004SToshiaki Makita get_page(virt_to_page(xdp.data)); 558d1396004SToshiaki Makita consume_skb(skb); 559638264dcSToshiaki Makita xdp.rxq->mem = rq->xdp_mem; 560638264dcSToshiaki Makita if (xdp_do_redirect(rq->dev, &xdp, xdp_prog)) 561d1396004SToshiaki Makita goto err_xdp; 562d1396004SToshiaki Makita *xdp_xmit |= VETH_XDP_REDIR; 563d1396004SToshiaki Makita rcu_read_unlock(); 564d1396004SToshiaki Makita goto xdp_xmit; 565948d4f21SToshiaki Makita default: 566948d4f21SToshiaki Makita bpf_warn_invalid_xdp_action(act); 567948d4f21SToshiaki Makita case XDP_ABORTED: 568638264dcSToshiaki Makita trace_xdp_exception(rq->dev, xdp_prog, act); 569948d4f21SToshiaki Makita case XDP_DROP: 570948d4f21SToshiaki Makita goto drop; 571948d4f21SToshiaki Makita } 572948d4f21SToshiaki Makita rcu_read_unlock(); 573948d4f21SToshiaki Makita 574948d4f21SToshiaki Makita delta = orig_data - xdp.data; 575948d4f21SToshiaki Makita off = mac_len + delta; 576948d4f21SToshiaki Makita if (off > 0) 577948d4f21SToshiaki Makita __skb_push(skb, off); 578948d4f21SToshiaki Makita else if (off < 0) 579948d4f21SToshiaki Makita __skb_pull(skb, -off); 580948d4f21SToshiaki Makita skb->mac_header -= delta; 581948d4f21SToshiaki Makita off = xdp.data_end - orig_data_end; 582948d4f21SToshiaki Makita if (off != 0) 583948d4f21SToshiaki Makita __skb_put(skb, off); 584638264dcSToshiaki Makita skb->protocol = eth_type_trans(skb, rq->dev); 585948d4f21SToshiaki Makita 586948d4f21SToshiaki Makita metalen = xdp.data - xdp.data_meta; 587948d4f21SToshiaki Makita if (metalen) 588948d4f21SToshiaki Makita skb_metadata_set(skb, metalen); 589948d4f21SToshiaki Makita out: 590948d4f21SToshiaki Makita return skb; 591948d4f21SToshiaki Makita drop: 592948d4f21SToshiaki Makita rcu_read_unlock(); 593948d4f21SToshiaki Makita kfree_skb(skb); 594948d4f21SToshiaki Makita return NULL; 595d1396004SToshiaki Makita err_xdp: 596d1396004SToshiaki Makita rcu_read_unlock(); 597d1396004SToshiaki Makita page_frag_free(xdp.data); 598d1396004SToshiaki Makita xdp_xmit: 599d1396004SToshiaki Makita return NULL; 600948d4f21SToshiaki Makita } 601948d4f21SToshiaki Makita 602638264dcSToshiaki Makita static int veth_xdp_rcv(struct veth_rq *rq, int budget, unsigned int *xdp_xmit) 603948d4f21SToshiaki Makita { 604948d4f21SToshiaki Makita int i, done = 0; 605948d4f21SToshiaki Makita 606948d4f21SToshiaki Makita for (i = 0; i < budget; i++) { 607638264dcSToshiaki Makita void *ptr = __ptr_ring_consume(&rq->xdp_ring); 6089fc8d518SToshiaki Makita struct sk_buff *skb; 609948d4f21SToshiaki Makita 6109fc8d518SToshiaki Makita if (!ptr) 611948d4f21SToshiaki Makita break; 612948d4f21SToshiaki Makita 613d1396004SToshiaki Makita if (veth_is_xdp_frame(ptr)) { 614638264dcSToshiaki Makita skb = veth_xdp_rcv_one(rq, veth_ptr_to_xdp(ptr), 615d1396004SToshiaki Makita xdp_xmit); 616d1396004SToshiaki Makita } else { 617638264dcSToshiaki Makita skb = veth_xdp_rcv_skb(rq, ptr, xdp_xmit); 618d1396004SToshiaki Makita } 619948d4f21SToshiaki Makita 620948d4f21SToshiaki Makita if (skb) 621638264dcSToshiaki Makita napi_gro_receive(&rq->xdp_napi, skb); 622948d4f21SToshiaki Makita 623948d4f21SToshiaki Makita done++; 624948d4f21SToshiaki Makita } 625948d4f21SToshiaki Makita 626948d4f21SToshiaki Makita return done; 627948d4f21SToshiaki Makita } 628948d4f21SToshiaki Makita 629948d4f21SToshiaki Makita static int veth_poll(struct napi_struct *napi, int budget) 630948d4f21SToshiaki Makita { 631638264dcSToshiaki Makita struct veth_rq *rq = 632638264dcSToshiaki Makita container_of(napi, struct veth_rq, xdp_napi); 633d1396004SToshiaki Makita unsigned int xdp_xmit = 0; 634948d4f21SToshiaki Makita int done; 635948d4f21SToshiaki Makita 636d1396004SToshiaki Makita xdp_set_return_frame_no_direct(); 637638264dcSToshiaki Makita done = veth_xdp_rcv(rq, budget, &xdp_xmit); 638948d4f21SToshiaki Makita 639948d4f21SToshiaki Makita if (done < budget && napi_complete_done(napi, done)) { 640948d4f21SToshiaki Makita /* Write rx_notify_masked before reading ptr_ring */ 641638264dcSToshiaki Makita smp_store_mb(rq->rx_notify_masked, false); 642638264dcSToshiaki Makita if (unlikely(!__ptr_ring_empty(&rq->xdp_ring))) { 643638264dcSToshiaki Makita rq->rx_notify_masked = true; 644638264dcSToshiaki Makita napi_schedule(&rq->xdp_napi); 645948d4f21SToshiaki Makita } 646948d4f21SToshiaki Makita } 647948d4f21SToshiaki Makita 648d1396004SToshiaki Makita if (xdp_xmit & VETH_XDP_TX) 649638264dcSToshiaki Makita veth_xdp_flush(rq->dev); 650d1396004SToshiaki Makita if (xdp_xmit & VETH_XDP_REDIR) 651d1396004SToshiaki Makita xdp_do_flush_map(); 652d1396004SToshiaki Makita xdp_clear_return_frame_no_direct(); 653d1396004SToshiaki Makita 654948d4f21SToshiaki Makita return done; 655948d4f21SToshiaki Makita } 656948d4f21SToshiaki Makita 657948d4f21SToshiaki Makita static int veth_napi_add(struct net_device *dev) 658948d4f21SToshiaki Makita { 659948d4f21SToshiaki Makita struct veth_priv *priv = netdev_priv(dev); 660638264dcSToshiaki Makita int err, i; 661948d4f21SToshiaki Makita 662638264dcSToshiaki Makita for (i = 0; i < dev->real_num_rx_queues; i++) { 663638264dcSToshiaki Makita struct veth_rq *rq = &priv->rq[i]; 664638264dcSToshiaki Makita 665638264dcSToshiaki Makita err = ptr_ring_init(&rq->xdp_ring, VETH_RING_SIZE, GFP_KERNEL); 666948d4f21SToshiaki Makita if (err) 667638264dcSToshiaki Makita goto err_xdp_ring; 668638264dcSToshiaki Makita } 669948d4f21SToshiaki Makita 670638264dcSToshiaki Makita for (i = 0; i < dev->real_num_rx_queues; i++) { 671638264dcSToshiaki Makita struct veth_rq *rq = &priv->rq[i]; 672638264dcSToshiaki Makita 673638264dcSToshiaki Makita netif_napi_add(dev, &rq->xdp_napi, veth_poll, NAPI_POLL_WEIGHT); 674638264dcSToshiaki Makita napi_enable(&rq->xdp_napi); 675638264dcSToshiaki Makita } 676948d4f21SToshiaki Makita 677948d4f21SToshiaki Makita return 0; 678638264dcSToshiaki Makita err_xdp_ring: 679638264dcSToshiaki Makita for (i--; i >= 0; i--) 680638264dcSToshiaki Makita ptr_ring_cleanup(&priv->rq[i].xdp_ring, veth_ptr_free); 681638264dcSToshiaki Makita 682638264dcSToshiaki Makita return err; 683948d4f21SToshiaki Makita } 684948d4f21SToshiaki Makita 685948d4f21SToshiaki Makita static void veth_napi_del(struct net_device *dev) 686948d4f21SToshiaki Makita { 687948d4f21SToshiaki Makita struct veth_priv *priv = netdev_priv(dev); 688638264dcSToshiaki Makita int i; 689948d4f21SToshiaki Makita 690638264dcSToshiaki Makita for (i = 0; i < dev->real_num_rx_queues; i++) { 691638264dcSToshiaki Makita struct veth_rq *rq = &priv->rq[i]; 692638264dcSToshiaki Makita 693638264dcSToshiaki Makita napi_disable(&rq->xdp_napi); 694638264dcSToshiaki Makita napi_hash_del(&rq->xdp_napi); 695638264dcSToshiaki Makita } 696638264dcSToshiaki Makita synchronize_net(); 697638264dcSToshiaki Makita 698638264dcSToshiaki Makita for (i = 0; i < dev->real_num_rx_queues; i++) { 699638264dcSToshiaki Makita struct veth_rq *rq = &priv->rq[i]; 700638264dcSToshiaki Makita 701638264dcSToshiaki Makita netif_napi_del(&rq->xdp_napi); 702638264dcSToshiaki Makita rq->rx_notify_masked = false; 703638264dcSToshiaki Makita ptr_ring_cleanup(&rq->xdp_ring, veth_ptr_free); 704638264dcSToshiaki Makita } 705948d4f21SToshiaki Makita } 706948d4f21SToshiaki Makita 707948d4f21SToshiaki Makita static int veth_enable_xdp(struct net_device *dev) 708948d4f21SToshiaki Makita { 709948d4f21SToshiaki Makita struct veth_priv *priv = netdev_priv(dev); 710638264dcSToshiaki Makita int err, i; 711948d4f21SToshiaki Makita 712638264dcSToshiaki Makita if (!xdp_rxq_info_is_reg(&priv->rq[0].xdp_rxq)) { 713638264dcSToshiaki Makita for (i = 0; i < dev->real_num_rx_queues; i++) { 714638264dcSToshiaki Makita struct veth_rq *rq = &priv->rq[i]; 715948d4f21SToshiaki Makita 716638264dcSToshiaki Makita err = xdp_rxq_info_reg(&rq->xdp_rxq, dev, i); 717948d4f21SToshiaki Makita if (err < 0) 718638264dcSToshiaki Makita goto err_rxq_reg; 719638264dcSToshiaki Makita 720638264dcSToshiaki Makita err = xdp_rxq_info_reg_mem_model(&rq->xdp_rxq, 721638264dcSToshiaki Makita MEM_TYPE_PAGE_SHARED, 722638264dcSToshiaki Makita NULL); 723638264dcSToshiaki Makita if (err < 0) 724638264dcSToshiaki Makita goto err_reg_mem; 725638264dcSToshiaki Makita 726638264dcSToshiaki Makita /* Save original mem info as it can be overwritten */ 727638264dcSToshiaki Makita rq->xdp_mem = rq->xdp_rxq.mem; 728638264dcSToshiaki Makita } 729948d4f21SToshiaki Makita 730948d4f21SToshiaki Makita err = veth_napi_add(dev); 731948d4f21SToshiaki Makita if (err) 732638264dcSToshiaki Makita goto err_rxq_reg; 733948d4f21SToshiaki Makita } 734948d4f21SToshiaki Makita 735638264dcSToshiaki Makita for (i = 0; i < dev->real_num_rx_queues; i++) 736638264dcSToshiaki Makita rcu_assign_pointer(priv->rq[i].xdp_prog, priv->_xdp_prog); 737948d4f21SToshiaki Makita 738948d4f21SToshiaki Makita return 0; 739638264dcSToshiaki Makita err_reg_mem: 740638264dcSToshiaki Makita xdp_rxq_info_unreg(&priv->rq[i].xdp_rxq); 741638264dcSToshiaki Makita err_rxq_reg: 742638264dcSToshiaki Makita for (i--; i >= 0; i--) 743638264dcSToshiaki Makita xdp_rxq_info_unreg(&priv->rq[i].xdp_rxq); 744948d4f21SToshiaki Makita 745948d4f21SToshiaki Makita return err; 746948d4f21SToshiaki Makita } 747948d4f21SToshiaki Makita 748948d4f21SToshiaki Makita static void veth_disable_xdp(struct net_device *dev) 749948d4f21SToshiaki Makita { 750948d4f21SToshiaki Makita struct veth_priv *priv = netdev_priv(dev); 751638264dcSToshiaki Makita int i; 752948d4f21SToshiaki Makita 753638264dcSToshiaki Makita for (i = 0; i < dev->real_num_rx_queues; i++) 754638264dcSToshiaki Makita rcu_assign_pointer(priv->rq[i].xdp_prog, NULL); 755948d4f21SToshiaki Makita veth_napi_del(dev); 756638264dcSToshiaki Makita for (i = 0; i < dev->real_num_rx_queues; i++) { 757638264dcSToshiaki Makita struct veth_rq *rq = &priv->rq[i]; 758638264dcSToshiaki Makita 759638264dcSToshiaki Makita rq->xdp_rxq.mem = rq->xdp_mem; 760638264dcSToshiaki Makita xdp_rxq_info_unreg(&rq->xdp_rxq); 761638264dcSToshiaki Makita } 762948d4f21SToshiaki Makita } 763948d4f21SToshiaki Makita 764e314dbdcSPavel Emelyanov static int veth_open(struct net_device *dev) 765e314dbdcSPavel Emelyanov { 766d0e2c55eSEric Dumazet struct veth_priv *priv = netdev_priv(dev); 767d0e2c55eSEric Dumazet struct net_device *peer = rtnl_dereference(priv->peer); 768948d4f21SToshiaki Makita int err; 769e314dbdcSPavel Emelyanov 770d0e2c55eSEric Dumazet if (!peer) 771e314dbdcSPavel Emelyanov return -ENOTCONN; 772e314dbdcSPavel Emelyanov 773948d4f21SToshiaki Makita if (priv->_xdp_prog) { 774948d4f21SToshiaki Makita err = veth_enable_xdp(dev); 775948d4f21SToshiaki Makita if (err) 776948d4f21SToshiaki Makita return err; 777948d4f21SToshiaki Makita } 778948d4f21SToshiaki Makita 779d0e2c55eSEric Dumazet if (peer->flags & IFF_UP) { 780e314dbdcSPavel Emelyanov netif_carrier_on(dev); 781d0e2c55eSEric Dumazet netif_carrier_on(peer); 782e314dbdcSPavel Emelyanov } 783948d4f21SToshiaki Makita 784e314dbdcSPavel Emelyanov return 0; 785e314dbdcSPavel Emelyanov } 786e314dbdcSPavel Emelyanov 7872cf48a10SEric W. Biederman static int veth_close(struct net_device *dev) 7882cf48a10SEric W. Biederman { 7892cf48a10SEric W. Biederman struct veth_priv *priv = netdev_priv(dev); 7902efd32eeSEric Dumazet struct net_device *peer = rtnl_dereference(priv->peer); 7912cf48a10SEric W. Biederman 7922cf48a10SEric W. Biederman netif_carrier_off(dev); 7932efd32eeSEric Dumazet if (peer) 7942efd32eeSEric Dumazet netif_carrier_off(peer); 7952cf48a10SEric W. Biederman 796948d4f21SToshiaki Makita if (priv->_xdp_prog) 797948d4f21SToshiaki Makita veth_disable_xdp(dev); 798948d4f21SToshiaki Makita 7992cf48a10SEric W. Biederman return 0; 8002cf48a10SEric W. Biederman } 8012cf48a10SEric W. Biederman 80291572088SJarod Wilson static int is_valid_veth_mtu(int mtu) 80338d40815SEric Biederman { 80491572088SJarod Wilson return mtu >= ETH_MIN_MTU && mtu <= ETH_MAX_MTU; 80538d40815SEric Biederman } 80638d40815SEric Biederman 8077797b93bSToshiaki Makita static int veth_alloc_queues(struct net_device *dev) 8087797b93bSToshiaki Makita { 8097797b93bSToshiaki Makita struct veth_priv *priv = netdev_priv(dev); 8107797b93bSToshiaki Makita int i; 8117797b93bSToshiaki Makita 8127797b93bSToshiaki Makita priv->rq = kcalloc(dev->num_rx_queues, sizeof(*priv->rq), GFP_KERNEL); 8137797b93bSToshiaki Makita if (!priv->rq) 8147797b93bSToshiaki Makita return -ENOMEM; 8157797b93bSToshiaki Makita 8167797b93bSToshiaki Makita for (i = 0; i < dev->num_rx_queues; i++) 8177797b93bSToshiaki Makita priv->rq[i].dev = dev; 8187797b93bSToshiaki Makita 8197797b93bSToshiaki Makita return 0; 8207797b93bSToshiaki Makita } 8217797b93bSToshiaki Makita 8227797b93bSToshiaki Makita static void veth_free_queues(struct net_device *dev) 8237797b93bSToshiaki Makita { 8247797b93bSToshiaki Makita struct veth_priv *priv = netdev_priv(dev); 8257797b93bSToshiaki Makita 8267797b93bSToshiaki Makita kfree(priv->rq); 8277797b93bSToshiaki Makita } 8287797b93bSToshiaki Makita 829e314dbdcSPavel Emelyanov static int veth_dev_init(struct net_device *dev) 830e314dbdcSPavel Emelyanov { 8317797b93bSToshiaki Makita int err; 8327797b93bSToshiaki Makita 8331c213bd2SWANG Cong dev->vstats = netdev_alloc_pcpu_stats(struct pcpu_vstats); 8342681128fSEric Dumazet if (!dev->vstats) 835e314dbdcSPavel Emelyanov return -ENOMEM; 8367797b93bSToshiaki Makita 8377797b93bSToshiaki Makita err = veth_alloc_queues(dev); 8387797b93bSToshiaki Makita if (err) { 8397797b93bSToshiaki Makita free_percpu(dev->vstats); 8407797b93bSToshiaki Makita return err; 8417797b93bSToshiaki Makita } 8427797b93bSToshiaki Makita 843e314dbdcSPavel Emelyanov return 0; 844e314dbdcSPavel Emelyanov } 845e314dbdcSPavel Emelyanov 84611687a10SDavid S. Miller static void veth_dev_free(struct net_device *dev) 84711687a10SDavid S. Miller { 8487797b93bSToshiaki Makita veth_free_queues(dev); 8492681128fSEric Dumazet free_percpu(dev->vstats); 85011687a10SDavid S. Miller } 85111687a10SDavid S. Miller 852bb446c19SWANG Cong #ifdef CONFIG_NET_POLL_CONTROLLER 853bb446c19SWANG Cong static void veth_poll_controller(struct net_device *dev) 854bb446c19SWANG Cong { 855bb446c19SWANG Cong /* veth only receives frames when its peer sends one 856948d4f21SToshiaki Makita * Since it has nothing to do with disabling irqs, we are guaranteed 857bb446c19SWANG Cong * never to have pending data when we poll for it so 858bb446c19SWANG Cong * there is nothing to do here. 859bb446c19SWANG Cong * 860bb446c19SWANG Cong * We need this though so netpoll recognizes us as an interface that 861bb446c19SWANG Cong * supports polling, which enables bridge devices in virt setups to 862bb446c19SWANG Cong * still use netconsole 863bb446c19SWANG Cong */ 864bb446c19SWANG Cong } 865bb446c19SWANG Cong #endif /* CONFIG_NET_POLL_CONTROLLER */ 866bb446c19SWANG Cong 867a45253bfSNicolas Dichtel static int veth_get_iflink(const struct net_device *dev) 868a45253bfSNicolas Dichtel { 869a45253bfSNicolas Dichtel struct veth_priv *priv = netdev_priv(dev); 870a45253bfSNicolas Dichtel struct net_device *peer; 871a45253bfSNicolas Dichtel int iflink; 872a45253bfSNicolas Dichtel 873a45253bfSNicolas Dichtel rcu_read_lock(); 874a45253bfSNicolas Dichtel peer = rcu_dereference(priv->peer); 875a45253bfSNicolas Dichtel iflink = peer ? peer->ifindex : 0; 876a45253bfSNicolas Dichtel rcu_read_unlock(); 877a45253bfSNicolas Dichtel 878a45253bfSNicolas Dichtel return iflink; 879a45253bfSNicolas Dichtel } 880a45253bfSNicolas Dichtel 881dc224822SToshiaki Makita static netdev_features_t veth_fix_features(struct net_device *dev, 882dc224822SToshiaki Makita netdev_features_t features) 883dc224822SToshiaki Makita { 884dc224822SToshiaki Makita struct veth_priv *priv = netdev_priv(dev); 885dc224822SToshiaki Makita struct net_device *peer; 886dc224822SToshiaki Makita 887dc224822SToshiaki Makita peer = rtnl_dereference(priv->peer); 888dc224822SToshiaki Makita if (peer) { 889dc224822SToshiaki Makita struct veth_priv *peer_priv = netdev_priv(peer); 890dc224822SToshiaki Makita 891dc224822SToshiaki Makita if (peer_priv->_xdp_prog) 892dc224822SToshiaki Makita features &= ~NETIF_F_GSO_SOFTWARE; 893dc224822SToshiaki Makita } 894dc224822SToshiaki Makita 895dc224822SToshiaki Makita return features; 896dc224822SToshiaki Makita } 897dc224822SToshiaki Makita 898163e5292SPaolo Abeni static void veth_set_rx_headroom(struct net_device *dev, int new_hr) 899163e5292SPaolo Abeni { 900163e5292SPaolo Abeni struct veth_priv *peer_priv, *priv = netdev_priv(dev); 901163e5292SPaolo Abeni struct net_device *peer; 902163e5292SPaolo Abeni 903163e5292SPaolo Abeni if (new_hr < 0) 904163e5292SPaolo Abeni new_hr = 0; 905163e5292SPaolo Abeni 906163e5292SPaolo Abeni rcu_read_lock(); 907163e5292SPaolo Abeni peer = rcu_dereference(priv->peer); 908163e5292SPaolo Abeni if (unlikely(!peer)) 909163e5292SPaolo Abeni goto out; 910163e5292SPaolo Abeni 911163e5292SPaolo Abeni peer_priv = netdev_priv(peer); 912163e5292SPaolo Abeni priv->requested_headroom = new_hr; 913163e5292SPaolo Abeni new_hr = max(priv->requested_headroom, peer_priv->requested_headroom); 914163e5292SPaolo Abeni dev->needed_headroom = new_hr; 915163e5292SPaolo Abeni peer->needed_headroom = new_hr; 916163e5292SPaolo Abeni 917163e5292SPaolo Abeni out: 918163e5292SPaolo Abeni rcu_read_unlock(); 919163e5292SPaolo Abeni } 920163e5292SPaolo Abeni 921948d4f21SToshiaki Makita static int veth_xdp_set(struct net_device *dev, struct bpf_prog *prog, 922948d4f21SToshiaki Makita struct netlink_ext_ack *extack) 923948d4f21SToshiaki Makita { 924948d4f21SToshiaki Makita struct veth_priv *priv = netdev_priv(dev); 925948d4f21SToshiaki Makita struct bpf_prog *old_prog; 926948d4f21SToshiaki Makita struct net_device *peer; 927dc224822SToshiaki Makita unsigned int max_mtu; 928948d4f21SToshiaki Makita int err; 929948d4f21SToshiaki Makita 930948d4f21SToshiaki Makita old_prog = priv->_xdp_prog; 931948d4f21SToshiaki Makita priv->_xdp_prog = prog; 932948d4f21SToshiaki Makita peer = rtnl_dereference(priv->peer); 933948d4f21SToshiaki Makita 934948d4f21SToshiaki Makita if (prog) { 935948d4f21SToshiaki Makita if (!peer) { 936948d4f21SToshiaki Makita NL_SET_ERR_MSG_MOD(extack, "Cannot set XDP when peer is detached"); 937948d4f21SToshiaki Makita err = -ENOTCONN; 938948d4f21SToshiaki Makita goto err; 939948d4f21SToshiaki Makita } 940948d4f21SToshiaki Makita 941dc224822SToshiaki Makita max_mtu = PAGE_SIZE - VETH_XDP_HEADROOM - 942dc224822SToshiaki Makita peer->hard_header_len - 943dc224822SToshiaki Makita SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); 944dc224822SToshiaki Makita if (peer->mtu > max_mtu) { 945dc224822SToshiaki Makita NL_SET_ERR_MSG_MOD(extack, "Peer MTU is too large to set XDP"); 946dc224822SToshiaki Makita err = -ERANGE; 947dc224822SToshiaki Makita goto err; 948dc224822SToshiaki Makita } 949dc224822SToshiaki Makita 950638264dcSToshiaki Makita if (dev->real_num_rx_queues < peer->real_num_tx_queues) { 951638264dcSToshiaki Makita NL_SET_ERR_MSG_MOD(extack, "XDP expects number of rx queues not less than peer tx queues"); 952638264dcSToshiaki Makita err = -ENOSPC; 953638264dcSToshiaki Makita goto err; 954638264dcSToshiaki Makita } 955638264dcSToshiaki Makita 956948d4f21SToshiaki Makita if (dev->flags & IFF_UP) { 957948d4f21SToshiaki Makita err = veth_enable_xdp(dev); 958948d4f21SToshiaki Makita if (err) { 959948d4f21SToshiaki Makita NL_SET_ERR_MSG_MOD(extack, "Setup for XDP failed"); 960948d4f21SToshiaki Makita goto err; 961948d4f21SToshiaki Makita } 962948d4f21SToshiaki Makita } 963dc224822SToshiaki Makita 964dc224822SToshiaki Makita if (!old_prog) { 965dc224822SToshiaki Makita peer->hw_features &= ~NETIF_F_GSO_SOFTWARE; 966dc224822SToshiaki Makita peer->max_mtu = max_mtu; 967dc224822SToshiaki Makita } 968948d4f21SToshiaki Makita } 969948d4f21SToshiaki Makita 970948d4f21SToshiaki Makita if (old_prog) { 971dc224822SToshiaki Makita if (!prog) { 972dc224822SToshiaki Makita if (dev->flags & IFF_UP) 973948d4f21SToshiaki Makita veth_disable_xdp(dev); 974dc224822SToshiaki Makita 975dc224822SToshiaki Makita if (peer) { 976dc224822SToshiaki Makita peer->hw_features |= NETIF_F_GSO_SOFTWARE; 977dc224822SToshiaki Makita peer->max_mtu = ETH_MAX_MTU; 978dc224822SToshiaki Makita } 979dc224822SToshiaki Makita } 980948d4f21SToshiaki Makita bpf_prog_put(old_prog); 981948d4f21SToshiaki Makita } 982948d4f21SToshiaki Makita 983dc224822SToshiaki Makita if ((!!old_prog ^ !!prog) && peer) 984dc224822SToshiaki Makita netdev_update_features(peer); 985dc224822SToshiaki Makita 986948d4f21SToshiaki Makita return 0; 987948d4f21SToshiaki Makita err: 988948d4f21SToshiaki Makita priv->_xdp_prog = old_prog; 989948d4f21SToshiaki Makita 990948d4f21SToshiaki Makita return err; 991948d4f21SToshiaki Makita } 992948d4f21SToshiaki Makita 993948d4f21SToshiaki Makita static u32 veth_xdp_query(struct net_device *dev) 994948d4f21SToshiaki Makita { 995948d4f21SToshiaki Makita struct veth_priv *priv = netdev_priv(dev); 996948d4f21SToshiaki Makita const struct bpf_prog *xdp_prog; 997948d4f21SToshiaki Makita 998948d4f21SToshiaki Makita xdp_prog = priv->_xdp_prog; 999948d4f21SToshiaki Makita if (xdp_prog) 1000948d4f21SToshiaki Makita return xdp_prog->aux->id; 1001948d4f21SToshiaki Makita 1002948d4f21SToshiaki Makita return 0; 1003948d4f21SToshiaki Makita } 1004948d4f21SToshiaki Makita 1005948d4f21SToshiaki Makita static int veth_xdp(struct net_device *dev, struct netdev_bpf *xdp) 1006948d4f21SToshiaki Makita { 1007948d4f21SToshiaki Makita switch (xdp->command) { 1008948d4f21SToshiaki Makita case XDP_SETUP_PROG: 1009948d4f21SToshiaki Makita return veth_xdp_set(dev, xdp->prog, xdp->extack); 1010948d4f21SToshiaki Makita case XDP_QUERY_PROG: 1011948d4f21SToshiaki Makita xdp->prog_id = veth_xdp_query(dev); 1012948d4f21SToshiaki Makita return 0; 1013948d4f21SToshiaki Makita default: 1014948d4f21SToshiaki Makita return -EINVAL; 1015948d4f21SToshiaki Makita } 1016948d4f21SToshiaki Makita } 1017948d4f21SToshiaki Makita 10184456e7bdSStephen Hemminger static const struct net_device_ops veth_netdev_ops = { 10194456e7bdSStephen Hemminger .ndo_init = veth_dev_init, 10204456e7bdSStephen Hemminger .ndo_open = veth_open, 10212cf48a10SEric W. Biederman .ndo_stop = veth_close, 102200829823SStephen Hemminger .ndo_start_xmit = veth_xmit, 10236311cc44Sstephen hemminger .ndo_get_stats64 = veth_get_stats64, 10245c70ef85SGao feng .ndo_set_rx_mode = veth_set_multicast_list, 1025ee923623SDaniel Lezcano .ndo_set_mac_address = eth_mac_addr, 1026bb446c19SWANG Cong #ifdef CONFIG_NET_POLL_CONTROLLER 1027bb446c19SWANG Cong .ndo_poll_controller = veth_poll_controller, 1028bb446c19SWANG Cong #endif 1029a45253bfSNicolas Dichtel .ndo_get_iflink = veth_get_iflink, 1030dc224822SToshiaki Makita .ndo_fix_features = veth_fix_features, 10311a04a821SToshiaki Makita .ndo_features_check = passthru_features_check, 1032163e5292SPaolo Abeni .ndo_set_rx_headroom = veth_set_rx_headroom, 1033948d4f21SToshiaki Makita .ndo_bpf = veth_xdp, 1034af87a3aaSToshiaki Makita .ndo_xdp_xmit = veth_xdp_xmit, 10354456e7bdSStephen Hemminger }; 10364456e7bdSStephen Hemminger 1037732912d7SAlexander Duyck #define VETH_FEATURES (NETIF_F_SG | NETIF_F_FRAGLIST | NETIF_F_HW_CSUM | \ 1038c80fafbbSXin Long NETIF_F_RXCSUM | NETIF_F_SCTP_CRC | NETIF_F_HIGHDMA | \ 1039732912d7SAlexander Duyck NETIF_F_GSO_SOFTWARE | NETIF_F_GSO_ENCAP_ALL | \ 104028d2b136SPatrick McHardy NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_CTAG_RX | \ 104128d2b136SPatrick McHardy NETIF_F_HW_VLAN_STAG_TX | NETIF_F_HW_VLAN_STAG_RX ) 10428093315aSEric Dumazet 1043e314dbdcSPavel Emelyanov static void veth_setup(struct net_device *dev) 1044e314dbdcSPavel Emelyanov { 1045e314dbdcSPavel Emelyanov ether_setup(dev); 1046e314dbdcSPavel Emelyanov 1047550fd08cSNeil Horman dev->priv_flags &= ~IFF_TX_SKB_SHARING; 104823ea5a96SHannes Frederic Sowa dev->priv_flags |= IFF_LIVE_ADDR_CHANGE; 104902f01ec1SPhil Sutter dev->priv_flags |= IFF_NO_QUEUE; 1050163e5292SPaolo Abeni dev->priv_flags |= IFF_PHONY_HEADROOM; 1051550fd08cSNeil Horman 10524456e7bdSStephen Hemminger dev->netdev_ops = &veth_netdev_ops; 1053e314dbdcSPavel Emelyanov dev->ethtool_ops = &veth_ethtool_ops; 1054e314dbdcSPavel Emelyanov dev->features |= NETIF_F_LLTX; 10558093315aSEric Dumazet dev->features |= VETH_FEATURES; 10568d0d21f4SToshiaki Makita dev->vlan_features = dev->features & 10573f8c707bSVlad Yasevich ~(NETIF_F_HW_VLAN_CTAG_TX | 10583f8c707bSVlad Yasevich NETIF_F_HW_VLAN_STAG_TX | 10593f8c707bSVlad Yasevich NETIF_F_HW_VLAN_CTAG_RX | 10603f8c707bSVlad Yasevich NETIF_F_HW_VLAN_STAG_RX); 1061cf124db5SDavid S. Miller dev->needs_free_netdev = true; 1062cf124db5SDavid S. Miller dev->priv_destructor = veth_dev_free; 106391572088SJarod Wilson dev->max_mtu = ETH_MAX_MTU; 1064a2c725faSMichał Mirosław 10658093315aSEric Dumazet dev->hw_features = VETH_FEATURES; 106682d81898SEric Dumazet dev->hw_enc_features = VETH_FEATURES; 1067607fca9aSDavid Ahern dev->mpls_features = NETIF_F_HW_CSUM | NETIF_F_GSO_SOFTWARE; 1068e314dbdcSPavel Emelyanov } 1069e314dbdcSPavel Emelyanov 1070e314dbdcSPavel Emelyanov /* 1071e314dbdcSPavel Emelyanov * netlink interface 1072e314dbdcSPavel Emelyanov */ 1073e314dbdcSPavel Emelyanov 1074a8b8a889SMatthias Schiffer static int veth_validate(struct nlattr *tb[], struct nlattr *data[], 1075a8b8a889SMatthias Schiffer struct netlink_ext_ack *extack) 1076e314dbdcSPavel Emelyanov { 1077e314dbdcSPavel Emelyanov if (tb[IFLA_ADDRESS]) { 1078e314dbdcSPavel Emelyanov if (nla_len(tb[IFLA_ADDRESS]) != ETH_ALEN) 1079e314dbdcSPavel Emelyanov return -EINVAL; 1080e314dbdcSPavel Emelyanov if (!is_valid_ether_addr(nla_data(tb[IFLA_ADDRESS]))) 1081e314dbdcSPavel Emelyanov return -EADDRNOTAVAIL; 1082e314dbdcSPavel Emelyanov } 108338d40815SEric Biederman if (tb[IFLA_MTU]) { 108438d40815SEric Biederman if (!is_valid_veth_mtu(nla_get_u32(tb[IFLA_MTU]))) 108538d40815SEric Biederman return -EINVAL; 108638d40815SEric Biederman } 1087e314dbdcSPavel Emelyanov return 0; 1088e314dbdcSPavel Emelyanov } 1089e314dbdcSPavel Emelyanov 1090e314dbdcSPavel Emelyanov static struct rtnl_link_ops veth_link_ops; 1091e314dbdcSPavel Emelyanov 109281adee47SEric W. Biederman static int veth_newlink(struct net *src_net, struct net_device *dev, 10937a3f4a18SMatthias Schiffer struct nlattr *tb[], struct nlattr *data[], 10947a3f4a18SMatthias Schiffer struct netlink_ext_ack *extack) 1095e314dbdcSPavel Emelyanov { 10967797b93bSToshiaki Makita int err; 1097e314dbdcSPavel Emelyanov struct net_device *peer; 1098e314dbdcSPavel Emelyanov struct veth_priv *priv; 1099e314dbdcSPavel Emelyanov char ifname[IFNAMSIZ]; 1100e314dbdcSPavel Emelyanov struct nlattr *peer_tb[IFLA_MAX + 1], **tbp; 11015517750fSTom Gundersen unsigned char name_assign_type; 11023729d502SPatrick McHardy struct ifinfomsg *ifmp; 110381adee47SEric W. Biederman struct net *net; 1104e314dbdcSPavel Emelyanov 1105e314dbdcSPavel Emelyanov /* 1106e314dbdcSPavel Emelyanov * create and register peer first 1107e314dbdcSPavel Emelyanov */ 1108e314dbdcSPavel Emelyanov if (data != NULL && data[VETH_INFO_PEER] != NULL) { 1109e314dbdcSPavel Emelyanov struct nlattr *nla_peer; 1110e314dbdcSPavel Emelyanov 1111e314dbdcSPavel Emelyanov nla_peer = data[VETH_INFO_PEER]; 11123729d502SPatrick McHardy ifmp = nla_data(nla_peer); 1113f7b12606SJiri Pirko err = rtnl_nla_parse_ifla(peer_tb, 1114e314dbdcSPavel Emelyanov nla_data(nla_peer) + sizeof(struct ifinfomsg), 1115fceb6435SJohannes Berg nla_len(nla_peer) - sizeof(struct ifinfomsg), 1116fceb6435SJohannes Berg NULL); 1117e314dbdcSPavel Emelyanov if (err < 0) 1118e314dbdcSPavel Emelyanov return err; 1119e314dbdcSPavel Emelyanov 1120a8b8a889SMatthias Schiffer err = veth_validate(peer_tb, NULL, extack); 1121e314dbdcSPavel Emelyanov if (err < 0) 1122e314dbdcSPavel Emelyanov return err; 1123e314dbdcSPavel Emelyanov 1124e314dbdcSPavel Emelyanov tbp = peer_tb; 11253729d502SPatrick McHardy } else { 11263729d502SPatrick McHardy ifmp = NULL; 1127e314dbdcSPavel Emelyanov tbp = tb; 11283729d502SPatrick McHardy } 1129e314dbdcSPavel Emelyanov 1130191cdb38SSerhey Popovych if (ifmp && tbp[IFLA_IFNAME]) { 1131e314dbdcSPavel Emelyanov nla_strlcpy(ifname, tbp[IFLA_IFNAME], IFNAMSIZ); 11325517750fSTom Gundersen name_assign_type = NET_NAME_USER; 11335517750fSTom Gundersen } else { 1134e314dbdcSPavel Emelyanov snprintf(ifname, IFNAMSIZ, DRV_NAME "%%d"); 11355517750fSTom Gundersen name_assign_type = NET_NAME_ENUM; 11365517750fSTom Gundersen } 1137e314dbdcSPavel Emelyanov 113881adee47SEric W. Biederman net = rtnl_link_get_net(src_net, tbp); 113981adee47SEric W. Biederman if (IS_ERR(net)) 114081adee47SEric W. Biederman return PTR_ERR(net); 114181adee47SEric W. Biederman 11425517750fSTom Gundersen peer = rtnl_create_link(net, ifname, name_assign_type, 11435517750fSTom Gundersen &veth_link_ops, tbp); 114481adee47SEric W. Biederman if (IS_ERR(peer)) { 114581adee47SEric W. Biederman put_net(net); 1146e314dbdcSPavel Emelyanov return PTR_ERR(peer); 114781adee47SEric W. Biederman } 1148e314dbdcSPavel Emelyanov 1149191cdb38SSerhey Popovych if (!ifmp || !tbp[IFLA_ADDRESS]) 1150f2cedb63SDanny Kukawka eth_hw_addr_random(peer); 1151e314dbdcSPavel Emelyanov 1152e6f8f1a7SPavel Emelyanov if (ifmp && (dev->ifindex != 0)) 1153e6f8f1a7SPavel Emelyanov peer->ifindex = ifmp->ifi_index; 1154e6f8f1a7SPavel Emelyanov 115572d24955SStephen Hemminger peer->gso_max_size = dev->gso_max_size; 115672d24955SStephen Hemminger peer->gso_max_segs = dev->gso_max_segs; 115772d24955SStephen Hemminger 1158e314dbdcSPavel Emelyanov err = register_netdevice(peer); 115981adee47SEric W. Biederman put_net(net); 116081adee47SEric W. Biederman net = NULL; 1161e314dbdcSPavel Emelyanov if (err < 0) 1162e314dbdcSPavel Emelyanov goto err_register_peer; 1163e314dbdcSPavel Emelyanov 1164e314dbdcSPavel Emelyanov netif_carrier_off(peer); 1165e314dbdcSPavel Emelyanov 11663729d502SPatrick McHardy err = rtnl_configure_link(peer, ifmp); 11673729d502SPatrick McHardy if (err < 0) 11683729d502SPatrick McHardy goto err_configure_peer; 11693729d502SPatrick McHardy 1170e314dbdcSPavel Emelyanov /* 1171e314dbdcSPavel Emelyanov * register dev last 1172e314dbdcSPavel Emelyanov * 1173e314dbdcSPavel Emelyanov * note, that since we've registered new device the dev's name 1174e314dbdcSPavel Emelyanov * should be re-allocated 1175e314dbdcSPavel Emelyanov */ 1176e314dbdcSPavel Emelyanov 1177e314dbdcSPavel Emelyanov if (tb[IFLA_ADDRESS] == NULL) 1178f2cedb63SDanny Kukawka eth_hw_addr_random(dev); 1179e314dbdcSPavel Emelyanov 11806c8c4446SJiri Pirko if (tb[IFLA_IFNAME]) 11816c8c4446SJiri Pirko nla_strlcpy(dev->name, tb[IFLA_IFNAME], IFNAMSIZ); 11826c8c4446SJiri Pirko else 11836c8c4446SJiri Pirko snprintf(dev->name, IFNAMSIZ, DRV_NAME "%%d"); 11846c8c4446SJiri Pirko 1185e314dbdcSPavel Emelyanov err = register_netdevice(dev); 1186e314dbdcSPavel Emelyanov if (err < 0) 1187e314dbdcSPavel Emelyanov goto err_register_dev; 1188e314dbdcSPavel Emelyanov 1189e314dbdcSPavel Emelyanov netif_carrier_off(dev); 1190e314dbdcSPavel Emelyanov 1191e314dbdcSPavel Emelyanov /* 1192e314dbdcSPavel Emelyanov * tie the deviced together 1193e314dbdcSPavel Emelyanov */ 1194e314dbdcSPavel Emelyanov 1195e314dbdcSPavel Emelyanov priv = netdev_priv(dev); 1196d0e2c55eSEric Dumazet rcu_assign_pointer(priv->peer, peer); 1197e314dbdcSPavel Emelyanov 1198e314dbdcSPavel Emelyanov priv = netdev_priv(peer); 1199d0e2c55eSEric Dumazet rcu_assign_pointer(priv->peer, dev); 1200948d4f21SToshiaki Makita 1201e314dbdcSPavel Emelyanov return 0; 1202e314dbdcSPavel Emelyanov 1203e314dbdcSPavel Emelyanov err_register_dev: 1204e314dbdcSPavel Emelyanov /* nothing to do */ 12053729d502SPatrick McHardy err_configure_peer: 1206e314dbdcSPavel Emelyanov unregister_netdevice(peer); 1207e314dbdcSPavel Emelyanov return err; 1208e314dbdcSPavel Emelyanov 1209e314dbdcSPavel Emelyanov err_register_peer: 1210e314dbdcSPavel Emelyanov free_netdev(peer); 1211e314dbdcSPavel Emelyanov return err; 1212e314dbdcSPavel Emelyanov } 1213e314dbdcSPavel Emelyanov 121423289a37SEric Dumazet static void veth_dellink(struct net_device *dev, struct list_head *head) 1215e314dbdcSPavel Emelyanov { 1216e314dbdcSPavel Emelyanov struct veth_priv *priv; 1217e314dbdcSPavel Emelyanov struct net_device *peer; 1218e314dbdcSPavel Emelyanov 1219e314dbdcSPavel Emelyanov priv = netdev_priv(dev); 1220d0e2c55eSEric Dumazet peer = rtnl_dereference(priv->peer); 1221d0e2c55eSEric Dumazet 1222d0e2c55eSEric Dumazet /* Note : dellink() is called from default_device_exit_batch(), 1223d0e2c55eSEric Dumazet * before a rcu_synchronize() point. The devices are guaranteed 1224d0e2c55eSEric Dumazet * not being freed before one RCU grace period. 1225d0e2c55eSEric Dumazet */ 1226d0e2c55eSEric Dumazet RCU_INIT_POINTER(priv->peer, NULL); 1227f45a5c26SEric Dumazet unregister_netdevice_queue(dev, head); 1228d0e2c55eSEric Dumazet 1229f45a5c26SEric Dumazet if (peer) { 1230d0e2c55eSEric Dumazet priv = netdev_priv(peer); 1231d0e2c55eSEric Dumazet RCU_INIT_POINTER(priv->peer, NULL); 123224540535SEric Dumazet unregister_netdevice_queue(peer, head); 1233e314dbdcSPavel Emelyanov } 1234f45a5c26SEric Dumazet } 1235e314dbdcSPavel Emelyanov 123623711438SThomas Graf static const struct nla_policy veth_policy[VETH_INFO_MAX + 1] = { 123723711438SThomas Graf [VETH_INFO_PEER] = { .len = sizeof(struct ifinfomsg) }, 123823711438SThomas Graf }; 1239e314dbdcSPavel Emelyanov 1240e5f4e7b9SNicolas Dichtel static struct net *veth_get_link_net(const struct net_device *dev) 1241e5f4e7b9SNicolas Dichtel { 1242e5f4e7b9SNicolas Dichtel struct veth_priv *priv = netdev_priv(dev); 1243e5f4e7b9SNicolas Dichtel struct net_device *peer = rtnl_dereference(priv->peer); 1244e5f4e7b9SNicolas Dichtel 1245e5f4e7b9SNicolas Dichtel return peer ? dev_net(peer) : dev_net(dev); 1246e5f4e7b9SNicolas Dichtel } 1247e5f4e7b9SNicolas Dichtel 1248e314dbdcSPavel Emelyanov static struct rtnl_link_ops veth_link_ops = { 1249e314dbdcSPavel Emelyanov .kind = DRV_NAME, 1250e314dbdcSPavel Emelyanov .priv_size = sizeof(struct veth_priv), 1251e314dbdcSPavel Emelyanov .setup = veth_setup, 1252e314dbdcSPavel Emelyanov .validate = veth_validate, 1253e314dbdcSPavel Emelyanov .newlink = veth_newlink, 1254e314dbdcSPavel Emelyanov .dellink = veth_dellink, 1255e314dbdcSPavel Emelyanov .policy = veth_policy, 1256e314dbdcSPavel Emelyanov .maxtype = VETH_INFO_MAX, 1257e5f4e7b9SNicolas Dichtel .get_link_net = veth_get_link_net, 1258e314dbdcSPavel Emelyanov }; 1259e314dbdcSPavel Emelyanov 1260e314dbdcSPavel Emelyanov /* 1261e314dbdcSPavel Emelyanov * init/fini 1262e314dbdcSPavel Emelyanov */ 1263e314dbdcSPavel Emelyanov 1264e314dbdcSPavel Emelyanov static __init int veth_init(void) 1265e314dbdcSPavel Emelyanov { 1266e314dbdcSPavel Emelyanov return rtnl_link_register(&veth_link_ops); 1267e314dbdcSPavel Emelyanov } 1268e314dbdcSPavel Emelyanov 1269e314dbdcSPavel Emelyanov static __exit void veth_exit(void) 1270e314dbdcSPavel Emelyanov { 127168365458SPatrick McHardy rtnl_link_unregister(&veth_link_ops); 1272e314dbdcSPavel Emelyanov } 1273e314dbdcSPavel Emelyanov 1274e314dbdcSPavel Emelyanov module_init(veth_init); 1275e314dbdcSPavel Emelyanov module_exit(veth_exit); 1276e314dbdcSPavel Emelyanov 1277e314dbdcSPavel Emelyanov MODULE_DESCRIPTION("Virtual Ethernet Tunnel"); 1278e314dbdcSPavel Emelyanov MODULE_LICENSE("GPL v2"); 1279e314dbdcSPavel Emelyanov MODULE_ALIAS_RTNL_LINK(DRV_NAME); 1280