xref: /openbmc/linux/drivers/net/veth.c (revision 2131479d)
1e314dbdcSPavel Emelyanov /*
2e314dbdcSPavel Emelyanov  *  drivers/net/veth.c
3e314dbdcSPavel Emelyanov  *
4e314dbdcSPavel Emelyanov  *  Copyright (C) 2007 OpenVZ http://openvz.org, SWsoft Inc
5e314dbdcSPavel Emelyanov  *
6e314dbdcSPavel Emelyanov  * Author: Pavel Emelianov <xemul@openvz.org>
7e314dbdcSPavel Emelyanov  * Ethtool interface from: Eric W. Biederman <ebiederm@xmission.com>
8e314dbdcSPavel Emelyanov  *
9e314dbdcSPavel Emelyanov  */
10e314dbdcSPavel Emelyanov 
11e314dbdcSPavel Emelyanov #include <linux/netdevice.h>
125a0e3ad6STejun Heo #include <linux/slab.h>
13e314dbdcSPavel Emelyanov #include <linux/ethtool.h>
14e314dbdcSPavel Emelyanov #include <linux/etherdevice.h>
15cf05c700SEric Dumazet #include <linux/u64_stats_sync.h>
16e314dbdcSPavel Emelyanov 
17f7b12606SJiri Pirko #include <net/rtnetlink.h>
18e314dbdcSPavel Emelyanov #include <net/dst.h>
19e314dbdcSPavel Emelyanov #include <net/xfrm.h>
20af87a3aaSToshiaki Makita #include <net/xdp.h>
21ecef969eSStephen Hemminger #include <linux/veth.h>
229d9779e7SPaul Gortmaker #include <linux/module.h>
23948d4f21SToshiaki Makita #include <linux/bpf.h>
24948d4f21SToshiaki Makita #include <linux/filter.h>
25948d4f21SToshiaki Makita #include <linux/ptr_ring.h>
26948d4f21SToshiaki Makita #include <linux/bpf_trace.h>
27aa4e689eSMichael Walle #include <linux/net_tstamp.h>
28e314dbdcSPavel Emelyanov 
29e314dbdcSPavel Emelyanov #define DRV_NAME	"veth"
30e314dbdcSPavel Emelyanov #define DRV_VERSION	"1.0"
31e314dbdcSPavel Emelyanov 
329fc8d518SToshiaki Makita #define VETH_XDP_FLAG		BIT(0)
33948d4f21SToshiaki Makita #define VETH_RING_SIZE		256
34948d4f21SToshiaki Makita #define VETH_XDP_HEADROOM	(XDP_PACKET_HEADROOM + NET_IP_ALIGN)
35948d4f21SToshiaki Makita 
36d1396004SToshiaki Makita /* Separating two types of XDP xmit */
37d1396004SToshiaki Makita #define VETH_XDP_TX		BIT(0)
38d1396004SToshiaki Makita #define VETH_XDP_REDIR		BIT(1)
39d1396004SToshiaki Makita 
40638264dcSToshiaki Makita struct veth_rq {
41948d4f21SToshiaki Makita 	struct napi_struct	xdp_napi;
42948d4f21SToshiaki Makita 	struct net_device	*dev;
43948d4f21SToshiaki Makita 	struct bpf_prog __rcu	*xdp_prog;
44d1396004SToshiaki Makita 	struct xdp_mem_info	xdp_mem;
45948d4f21SToshiaki Makita 	bool			rx_notify_masked;
46948d4f21SToshiaki Makita 	struct ptr_ring		xdp_ring;
47948d4f21SToshiaki Makita 	struct xdp_rxq_info	xdp_rxq;
48e314dbdcSPavel Emelyanov };
49e314dbdcSPavel Emelyanov 
50638264dcSToshiaki Makita struct veth_priv {
51638264dcSToshiaki Makita 	struct net_device __rcu	*peer;
52638264dcSToshiaki Makita 	atomic64_t		dropped;
53638264dcSToshiaki Makita 	struct bpf_prog		*_xdp_prog;
54638264dcSToshiaki Makita 	struct veth_rq		*rq;
55638264dcSToshiaki Makita 	unsigned int		requested_headroom;
56638264dcSToshiaki Makita };
57638264dcSToshiaki Makita 
58e314dbdcSPavel Emelyanov /*
59e314dbdcSPavel Emelyanov  * ethtool interface
60e314dbdcSPavel Emelyanov  */
61e314dbdcSPavel Emelyanov 
62e314dbdcSPavel Emelyanov static struct {
63e314dbdcSPavel Emelyanov 	const char string[ETH_GSTRING_LEN];
64e314dbdcSPavel Emelyanov } ethtool_stats_keys[] = {
65e314dbdcSPavel Emelyanov 	{ "peer_ifindex" },
66e314dbdcSPavel Emelyanov };
67e314dbdcSPavel Emelyanov 
6856607b98SPhilippe Reynes static int veth_get_link_ksettings(struct net_device *dev,
6956607b98SPhilippe Reynes 				   struct ethtool_link_ksettings *cmd)
70e314dbdcSPavel Emelyanov {
7156607b98SPhilippe Reynes 	cmd->base.speed		= SPEED_10000;
7256607b98SPhilippe Reynes 	cmd->base.duplex	= DUPLEX_FULL;
7356607b98SPhilippe Reynes 	cmd->base.port		= PORT_TP;
7456607b98SPhilippe Reynes 	cmd->base.autoneg	= AUTONEG_DISABLE;
75e314dbdcSPavel Emelyanov 	return 0;
76e314dbdcSPavel Emelyanov }
77e314dbdcSPavel Emelyanov 
78e314dbdcSPavel Emelyanov static void veth_get_drvinfo(struct net_device *dev, struct ethtool_drvinfo *info)
79e314dbdcSPavel Emelyanov {
8033a5ba14SRick Jones 	strlcpy(info->driver, DRV_NAME, sizeof(info->driver));
8133a5ba14SRick Jones 	strlcpy(info->version, DRV_VERSION, sizeof(info->version));
82e314dbdcSPavel Emelyanov }
83e314dbdcSPavel Emelyanov 
84e314dbdcSPavel Emelyanov static void veth_get_strings(struct net_device *dev, u32 stringset, u8 *buf)
85e314dbdcSPavel Emelyanov {
86e314dbdcSPavel Emelyanov 	switch(stringset) {
87e314dbdcSPavel Emelyanov 	case ETH_SS_STATS:
88e314dbdcSPavel Emelyanov 		memcpy(buf, &ethtool_stats_keys, sizeof(ethtool_stats_keys));
89e314dbdcSPavel Emelyanov 		break;
90e314dbdcSPavel Emelyanov 	}
91e314dbdcSPavel Emelyanov }
92e314dbdcSPavel Emelyanov 
93b9f2c044SJeff Garzik static int veth_get_sset_count(struct net_device *dev, int sset)
94e314dbdcSPavel Emelyanov {
95b9f2c044SJeff Garzik 	switch (sset) {
96b9f2c044SJeff Garzik 	case ETH_SS_STATS:
97e314dbdcSPavel Emelyanov 		return ARRAY_SIZE(ethtool_stats_keys);
98b9f2c044SJeff Garzik 	default:
99b9f2c044SJeff Garzik 		return -EOPNOTSUPP;
100b9f2c044SJeff Garzik 	}
101e314dbdcSPavel Emelyanov }
102e314dbdcSPavel Emelyanov 
103e314dbdcSPavel Emelyanov static void veth_get_ethtool_stats(struct net_device *dev,
104e314dbdcSPavel Emelyanov 		struct ethtool_stats *stats, u64 *data)
105e314dbdcSPavel Emelyanov {
106d0e2c55eSEric Dumazet 	struct veth_priv *priv = netdev_priv(dev);
107d0e2c55eSEric Dumazet 	struct net_device *peer = rtnl_dereference(priv->peer);
108e314dbdcSPavel Emelyanov 
109d0e2c55eSEric Dumazet 	data[0] = peer ? peer->ifindex : 0;
110e314dbdcSPavel Emelyanov }
111e314dbdcSPavel Emelyanov 
112aa4e689eSMichael Walle static int veth_get_ts_info(struct net_device *dev,
113aa4e689eSMichael Walle 			    struct ethtool_ts_info *info)
114aa4e689eSMichael Walle {
115aa4e689eSMichael Walle 	info->so_timestamping =
116aa4e689eSMichael Walle 		SOF_TIMESTAMPING_TX_SOFTWARE |
117aa4e689eSMichael Walle 		SOF_TIMESTAMPING_RX_SOFTWARE |
118aa4e689eSMichael Walle 		SOF_TIMESTAMPING_SOFTWARE;
119aa4e689eSMichael Walle 	info->phc_index = -1;
120aa4e689eSMichael Walle 
121aa4e689eSMichael Walle 	return 0;
122aa4e689eSMichael Walle }
123aa4e689eSMichael Walle 
1240fc0b732SStephen Hemminger static const struct ethtool_ops veth_ethtool_ops = {
125e314dbdcSPavel Emelyanov 	.get_drvinfo		= veth_get_drvinfo,
126e314dbdcSPavel Emelyanov 	.get_link		= ethtool_op_get_link,
127e314dbdcSPavel Emelyanov 	.get_strings		= veth_get_strings,
128b9f2c044SJeff Garzik 	.get_sset_count		= veth_get_sset_count,
129e314dbdcSPavel Emelyanov 	.get_ethtool_stats	= veth_get_ethtool_stats,
13056607b98SPhilippe Reynes 	.get_link_ksettings	= veth_get_link_ksettings,
131aa4e689eSMichael Walle 	.get_ts_info		= veth_get_ts_info,
132e314dbdcSPavel Emelyanov };
133e314dbdcSPavel Emelyanov 
134948d4f21SToshiaki Makita /* general routines */
135948d4f21SToshiaki Makita 
1369fc8d518SToshiaki Makita static bool veth_is_xdp_frame(void *ptr)
1379fc8d518SToshiaki Makita {
1389fc8d518SToshiaki Makita 	return (unsigned long)ptr & VETH_XDP_FLAG;
1399fc8d518SToshiaki Makita }
1409fc8d518SToshiaki Makita 
1419fc8d518SToshiaki Makita static void *veth_ptr_to_xdp(void *ptr)
1429fc8d518SToshiaki Makita {
1439fc8d518SToshiaki Makita 	return (void *)((unsigned long)ptr & ~VETH_XDP_FLAG);
1449fc8d518SToshiaki Makita }
1459fc8d518SToshiaki Makita 
146af87a3aaSToshiaki Makita static void *veth_xdp_to_ptr(void *ptr)
147af87a3aaSToshiaki Makita {
148af87a3aaSToshiaki Makita 	return (void *)((unsigned long)ptr | VETH_XDP_FLAG);
149af87a3aaSToshiaki Makita }
150af87a3aaSToshiaki Makita 
1519fc8d518SToshiaki Makita static void veth_ptr_free(void *ptr)
1529fc8d518SToshiaki Makita {
1539fc8d518SToshiaki Makita 	if (veth_is_xdp_frame(ptr))
1549fc8d518SToshiaki Makita 		xdp_return_frame(veth_ptr_to_xdp(ptr));
1559fc8d518SToshiaki Makita 	else
1569fc8d518SToshiaki Makita 		kfree_skb(ptr);
1579fc8d518SToshiaki Makita }
1589fc8d518SToshiaki Makita 
159638264dcSToshiaki Makita static void __veth_xdp_flush(struct veth_rq *rq)
160948d4f21SToshiaki Makita {
161948d4f21SToshiaki Makita 	/* Write ptr_ring before reading rx_notify_masked */
162948d4f21SToshiaki Makita 	smp_mb();
163638264dcSToshiaki Makita 	if (!rq->rx_notify_masked) {
164638264dcSToshiaki Makita 		rq->rx_notify_masked = true;
165638264dcSToshiaki Makita 		napi_schedule(&rq->xdp_napi);
166948d4f21SToshiaki Makita 	}
167948d4f21SToshiaki Makita }
168948d4f21SToshiaki Makita 
169638264dcSToshiaki Makita static int veth_xdp_rx(struct veth_rq *rq, struct sk_buff *skb)
170948d4f21SToshiaki Makita {
171638264dcSToshiaki Makita 	if (unlikely(ptr_ring_produce(&rq->xdp_ring, skb))) {
172948d4f21SToshiaki Makita 		dev_kfree_skb_any(skb);
173948d4f21SToshiaki Makita 		return NET_RX_DROP;
174948d4f21SToshiaki Makita 	}
175948d4f21SToshiaki Makita 
176948d4f21SToshiaki Makita 	return NET_RX_SUCCESS;
177948d4f21SToshiaki Makita }
178948d4f21SToshiaki Makita 
179638264dcSToshiaki Makita static int veth_forward_skb(struct net_device *dev, struct sk_buff *skb,
180638264dcSToshiaki Makita 			    struct veth_rq *rq, bool xdp)
181e314dbdcSPavel Emelyanov {
182948d4f21SToshiaki Makita 	return __dev_forward_skb(dev, skb) ?: xdp ?
183638264dcSToshiaki Makita 		veth_xdp_rx(rq, skb) :
184948d4f21SToshiaki Makita 		netif_rx(skb);
185948d4f21SToshiaki Makita }
186948d4f21SToshiaki Makita 
187948d4f21SToshiaki Makita static netdev_tx_t veth_xmit(struct sk_buff *skb, struct net_device *dev)
188948d4f21SToshiaki Makita {
189948d4f21SToshiaki Makita 	struct veth_priv *rcv_priv, *priv = netdev_priv(dev);
190638264dcSToshiaki Makita 	struct veth_rq *rq = NULL;
191d0e2c55eSEric Dumazet 	struct net_device *rcv;
1922681128fSEric Dumazet 	int length = skb->len;
193948d4f21SToshiaki Makita 	bool rcv_xdp = false;
194638264dcSToshiaki Makita 	int rxq;
195e314dbdcSPavel Emelyanov 
196d0e2c55eSEric Dumazet 	rcu_read_lock();
197d0e2c55eSEric Dumazet 	rcv = rcu_dereference(priv->peer);
198d0e2c55eSEric Dumazet 	if (unlikely(!rcv)) {
199d0e2c55eSEric Dumazet 		kfree_skb(skb);
200d0e2c55eSEric Dumazet 		goto drop;
201d0e2c55eSEric Dumazet 	}
202e314dbdcSPavel Emelyanov 
203948d4f21SToshiaki Makita 	rcv_priv = netdev_priv(rcv);
204638264dcSToshiaki Makita 	rxq = skb_get_queue_mapping(skb);
205638264dcSToshiaki Makita 	if (rxq < rcv->real_num_rx_queues) {
206638264dcSToshiaki Makita 		rq = &rcv_priv->rq[rxq];
207638264dcSToshiaki Makita 		rcv_xdp = rcu_access_pointer(rq->xdp_prog);
208638264dcSToshiaki Makita 		if (rcv_xdp)
209638264dcSToshiaki Makita 			skb_record_rx_queue(skb, rxq);
210638264dcSToshiaki Makita 	}
211948d4f21SToshiaki Makita 
212aa4e689eSMichael Walle 	skb_tx_timestamp(skb);
213638264dcSToshiaki Makita 	if (likely(veth_forward_skb(rcv, skb, rq, rcv_xdp) == NET_RX_SUCCESS)) {
21414d73416SLi RongQing 		struct pcpu_lstats *stats = this_cpu_ptr(dev->lstats);
215e314dbdcSPavel Emelyanov 
216cf05c700SEric Dumazet 		u64_stats_update_begin(&stats->syncp);
2172681128fSEric Dumazet 		stats->bytes += length;
2182681128fSEric Dumazet 		stats->packets++;
219cf05c700SEric Dumazet 		u64_stats_update_end(&stats->syncp);
2202681128fSEric Dumazet 	} else {
221d0e2c55eSEric Dumazet drop:
2222681128fSEric Dumazet 		atomic64_inc(&priv->dropped);
2232681128fSEric Dumazet 	}
224948d4f21SToshiaki Makita 
225948d4f21SToshiaki Makita 	if (rcv_xdp)
226638264dcSToshiaki Makita 		__veth_xdp_flush(rq);
227948d4f21SToshiaki Makita 
228d0e2c55eSEric Dumazet 	rcu_read_unlock();
229948d4f21SToshiaki Makita 
2306ed10654SPatrick McHardy 	return NETDEV_TX_OK;
231e314dbdcSPavel Emelyanov }
232e314dbdcSPavel Emelyanov 
23314d73416SLi RongQing static u64 veth_stats_one(struct pcpu_lstats *result, struct net_device *dev)
234e314dbdcSPavel Emelyanov {
235cf05c700SEric Dumazet 	struct veth_priv *priv = netdev_priv(dev);
23611687a10SDavid S. Miller 	int cpu;
23711687a10SDavid S. Miller 
2382681128fSEric Dumazet 	result->packets = 0;
2392681128fSEric Dumazet 	result->bytes = 0;
2402b1c8b0fSEric Dumazet 	for_each_possible_cpu(cpu) {
24114d73416SLi RongQing 		struct pcpu_lstats *stats = per_cpu_ptr(dev->lstats, cpu);
2422681128fSEric Dumazet 		u64 packets, bytes;
243cf05c700SEric Dumazet 		unsigned int start;
244e314dbdcSPavel Emelyanov 
245cf05c700SEric Dumazet 		do {
24657a7744eSEric W. Biederman 			start = u64_stats_fetch_begin_irq(&stats->syncp);
2472681128fSEric Dumazet 			packets = stats->packets;
2482681128fSEric Dumazet 			bytes = stats->bytes;
24957a7744eSEric W. Biederman 		} while (u64_stats_fetch_retry_irq(&stats->syncp, start));
2502681128fSEric Dumazet 		result->packets += packets;
2512681128fSEric Dumazet 		result->bytes += bytes;
252e314dbdcSPavel Emelyanov 	}
2532681128fSEric Dumazet 	return atomic64_read(&priv->dropped);
2542681128fSEric Dumazet }
2552681128fSEric Dumazet 
256bc1f4470Sstephen hemminger static void veth_get_stats64(struct net_device *dev,
2572681128fSEric Dumazet 			     struct rtnl_link_stats64 *tot)
2582681128fSEric Dumazet {
2592681128fSEric Dumazet 	struct veth_priv *priv = netdev_priv(dev);
260d0e2c55eSEric Dumazet 	struct net_device *peer;
26114d73416SLi RongQing 	struct pcpu_lstats one;
2622681128fSEric Dumazet 
2632681128fSEric Dumazet 	tot->tx_dropped = veth_stats_one(&one, dev);
2642681128fSEric Dumazet 	tot->tx_bytes = one.bytes;
2652681128fSEric Dumazet 	tot->tx_packets = one.packets;
2662681128fSEric Dumazet 
267d0e2c55eSEric Dumazet 	rcu_read_lock();
268d0e2c55eSEric Dumazet 	peer = rcu_dereference(priv->peer);
269d0e2c55eSEric Dumazet 	if (peer) {
270d0e2c55eSEric Dumazet 		tot->rx_dropped = veth_stats_one(&one, peer);
2712681128fSEric Dumazet 		tot->rx_bytes = one.bytes;
2722681128fSEric Dumazet 		tot->rx_packets = one.packets;
273d0e2c55eSEric Dumazet 	}
274d0e2c55eSEric Dumazet 	rcu_read_unlock();
275e314dbdcSPavel Emelyanov }
276e314dbdcSPavel Emelyanov 
2775c70ef85SGao feng /* fake multicast ability */
2785c70ef85SGao feng static void veth_set_multicast_list(struct net_device *dev)
2795c70ef85SGao feng {
2805c70ef85SGao feng }
2815c70ef85SGao feng 
282948d4f21SToshiaki Makita static struct sk_buff *veth_build_skb(void *head, int headroom, int len,
283948d4f21SToshiaki Makita 				      int buflen)
284948d4f21SToshiaki Makita {
285948d4f21SToshiaki Makita 	struct sk_buff *skb;
286948d4f21SToshiaki Makita 
287948d4f21SToshiaki Makita 	if (!buflen) {
288948d4f21SToshiaki Makita 		buflen = SKB_DATA_ALIGN(headroom + len) +
289948d4f21SToshiaki Makita 			 SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
290948d4f21SToshiaki Makita 	}
291948d4f21SToshiaki Makita 	skb = build_skb(head, buflen);
292948d4f21SToshiaki Makita 	if (!skb)
293948d4f21SToshiaki Makita 		return NULL;
294948d4f21SToshiaki Makita 
295948d4f21SToshiaki Makita 	skb_reserve(skb, headroom);
296948d4f21SToshiaki Makita 	skb_put(skb, len);
297948d4f21SToshiaki Makita 
298948d4f21SToshiaki Makita 	return skb;
299948d4f21SToshiaki Makita }
300948d4f21SToshiaki Makita 
301638264dcSToshiaki Makita static int veth_select_rxq(struct net_device *dev)
302638264dcSToshiaki Makita {
303638264dcSToshiaki Makita 	return smp_processor_id() % dev->real_num_rx_queues;
304638264dcSToshiaki Makita }
305638264dcSToshiaki Makita 
306af87a3aaSToshiaki Makita static int veth_xdp_xmit(struct net_device *dev, int n,
307af87a3aaSToshiaki Makita 			 struct xdp_frame **frames, u32 flags)
308af87a3aaSToshiaki Makita {
309af87a3aaSToshiaki Makita 	struct veth_priv *rcv_priv, *priv = netdev_priv(dev);
310af87a3aaSToshiaki Makita 	struct net_device *rcv;
3112131479dSToshiaki Makita 	int i, ret, drops = n;
312af87a3aaSToshiaki Makita 	unsigned int max_len;
313638264dcSToshiaki Makita 	struct veth_rq *rq;
314af87a3aaSToshiaki Makita 
3152131479dSToshiaki Makita 	if (unlikely(flags & ~XDP_XMIT_FLAGS_MASK)) {
3162131479dSToshiaki Makita 		ret = -EINVAL;
3172131479dSToshiaki Makita 		goto drop;
3182131479dSToshiaki Makita 	}
319af87a3aaSToshiaki Makita 
320af87a3aaSToshiaki Makita 	rcv = rcu_dereference(priv->peer);
3212131479dSToshiaki Makita 	if (unlikely(!rcv)) {
3222131479dSToshiaki Makita 		ret = -ENXIO;
3232131479dSToshiaki Makita 		goto drop;
3242131479dSToshiaki Makita 	}
325af87a3aaSToshiaki Makita 
326af87a3aaSToshiaki Makita 	rcv_priv = netdev_priv(rcv);
327638264dcSToshiaki Makita 	rq = &rcv_priv->rq[veth_select_rxq(rcv)];
328af87a3aaSToshiaki Makita 	/* Non-NULL xdp_prog ensures that xdp_ring is initialized on receive
329af87a3aaSToshiaki Makita 	 * side. This means an XDP program is loaded on the peer and the peer
330af87a3aaSToshiaki Makita 	 * device is up.
331af87a3aaSToshiaki Makita 	 */
3322131479dSToshiaki Makita 	if (!rcu_access_pointer(rq->xdp_prog)) {
3332131479dSToshiaki Makita 		ret = -ENXIO;
3342131479dSToshiaki Makita 		goto drop;
3352131479dSToshiaki Makita 	}
336af87a3aaSToshiaki Makita 
3372131479dSToshiaki Makita 	drops = 0;
338af87a3aaSToshiaki Makita 	max_len = rcv->mtu + rcv->hard_header_len + VLAN_HLEN;
339af87a3aaSToshiaki Makita 
340638264dcSToshiaki Makita 	spin_lock(&rq->xdp_ring.producer_lock);
341af87a3aaSToshiaki Makita 	for (i = 0; i < n; i++) {
342af87a3aaSToshiaki Makita 		struct xdp_frame *frame = frames[i];
343af87a3aaSToshiaki Makita 		void *ptr = veth_xdp_to_ptr(frame);
344af87a3aaSToshiaki Makita 
345af87a3aaSToshiaki Makita 		if (unlikely(frame->len > max_len ||
346638264dcSToshiaki Makita 			     __ptr_ring_produce(&rq->xdp_ring, ptr))) {
347af87a3aaSToshiaki Makita 			xdp_return_frame_rx_napi(frame);
348af87a3aaSToshiaki Makita 			drops++;
349af87a3aaSToshiaki Makita 		}
350af87a3aaSToshiaki Makita 	}
351638264dcSToshiaki Makita 	spin_unlock(&rq->xdp_ring.producer_lock);
352af87a3aaSToshiaki Makita 
353af87a3aaSToshiaki Makita 	if (flags & XDP_XMIT_FLUSH)
354638264dcSToshiaki Makita 		__veth_xdp_flush(rq);
355af87a3aaSToshiaki Makita 
3562131479dSToshiaki Makita 	if (likely(!drops))
3572131479dSToshiaki Makita 		return n;
3582131479dSToshiaki Makita 
3592131479dSToshiaki Makita 	ret = n - drops;
3602131479dSToshiaki Makita drop:
3612131479dSToshiaki Makita 	atomic64_add(drops, &priv->dropped);
3622131479dSToshiaki Makita 
3632131479dSToshiaki Makita 	return ret;
364af87a3aaSToshiaki Makita }
365af87a3aaSToshiaki Makita 
366d1396004SToshiaki Makita static void veth_xdp_flush(struct net_device *dev)
367d1396004SToshiaki Makita {
368d1396004SToshiaki Makita 	struct veth_priv *rcv_priv, *priv = netdev_priv(dev);
369d1396004SToshiaki Makita 	struct net_device *rcv;
370638264dcSToshiaki Makita 	struct veth_rq *rq;
371d1396004SToshiaki Makita 
372d1396004SToshiaki Makita 	rcu_read_lock();
373d1396004SToshiaki Makita 	rcv = rcu_dereference(priv->peer);
374d1396004SToshiaki Makita 	if (unlikely(!rcv))
375d1396004SToshiaki Makita 		goto out;
376d1396004SToshiaki Makita 
377d1396004SToshiaki Makita 	rcv_priv = netdev_priv(rcv);
378638264dcSToshiaki Makita 	rq = &rcv_priv->rq[veth_select_rxq(rcv)];
379d1396004SToshiaki Makita 	/* xdp_ring is initialized on receive side? */
380638264dcSToshiaki Makita 	if (unlikely(!rcu_access_pointer(rq->xdp_prog)))
381d1396004SToshiaki Makita 		goto out;
382d1396004SToshiaki Makita 
383638264dcSToshiaki Makita 	__veth_xdp_flush(rq);
384d1396004SToshiaki Makita out:
385d1396004SToshiaki Makita 	rcu_read_unlock();
386d1396004SToshiaki Makita }
387d1396004SToshiaki Makita 
388d1396004SToshiaki Makita static int veth_xdp_tx(struct net_device *dev, struct xdp_buff *xdp)
389d1396004SToshiaki Makita {
390d1396004SToshiaki Makita 	struct xdp_frame *frame = convert_to_xdp_frame(xdp);
391d1396004SToshiaki Makita 
392d1396004SToshiaki Makita 	if (unlikely(!frame))
393d1396004SToshiaki Makita 		return -EOVERFLOW;
394d1396004SToshiaki Makita 
395d1396004SToshiaki Makita 	return veth_xdp_xmit(dev, 1, &frame, 0);
396d1396004SToshiaki Makita }
397d1396004SToshiaki Makita 
398638264dcSToshiaki Makita static struct sk_buff *veth_xdp_rcv_one(struct veth_rq *rq,
399d1396004SToshiaki Makita 					struct xdp_frame *frame,
400d1396004SToshiaki Makita 					unsigned int *xdp_xmit)
4019fc8d518SToshiaki Makita {
4029fc8d518SToshiaki Makita 	void *hard_start = frame->data - frame->headroom;
4039fc8d518SToshiaki Makita 	void *head = hard_start - sizeof(struct xdp_frame);
4049fc8d518SToshiaki Makita 	int len = frame->len, delta = 0;
405d1396004SToshiaki Makita 	struct xdp_frame orig_frame;
4069fc8d518SToshiaki Makita 	struct bpf_prog *xdp_prog;
4079fc8d518SToshiaki Makita 	unsigned int headroom;
4089fc8d518SToshiaki Makita 	struct sk_buff *skb;
4099fc8d518SToshiaki Makita 
4109fc8d518SToshiaki Makita 	rcu_read_lock();
411638264dcSToshiaki Makita 	xdp_prog = rcu_dereference(rq->xdp_prog);
4129fc8d518SToshiaki Makita 	if (likely(xdp_prog)) {
4139fc8d518SToshiaki Makita 		struct xdp_buff xdp;
4149fc8d518SToshiaki Makita 		u32 act;
4159fc8d518SToshiaki Makita 
4169fc8d518SToshiaki Makita 		xdp.data_hard_start = hard_start;
4179fc8d518SToshiaki Makita 		xdp.data = frame->data;
4189fc8d518SToshiaki Makita 		xdp.data_end = frame->data + frame->len;
4199fc8d518SToshiaki Makita 		xdp.data_meta = frame->data - frame->metasize;
420638264dcSToshiaki Makita 		xdp.rxq = &rq->xdp_rxq;
4219fc8d518SToshiaki Makita 
4229fc8d518SToshiaki Makita 		act = bpf_prog_run_xdp(xdp_prog, &xdp);
4239fc8d518SToshiaki Makita 
4249fc8d518SToshiaki Makita 		switch (act) {
4259fc8d518SToshiaki Makita 		case XDP_PASS:
4269fc8d518SToshiaki Makita 			delta = frame->data - xdp.data;
4279fc8d518SToshiaki Makita 			len = xdp.data_end - xdp.data;
4289fc8d518SToshiaki Makita 			break;
429d1396004SToshiaki Makita 		case XDP_TX:
430d1396004SToshiaki Makita 			orig_frame = *frame;
431d1396004SToshiaki Makita 			xdp.data_hard_start = head;
432d1396004SToshiaki Makita 			xdp.rxq->mem = frame->mem;
433638264dcSToshiaki Makita 			if (unlikely(veth_xdp_tx(rq->dev, &xdp) < 0)) {
434638264dcSToshiaki Makita 				trace_xdp_exception(rq->dev, xdp_prog, act);
435d1396004SToshiaki Makita 				frame = &orig_frame;
436d1396004SToshiaki Makita 				goto err_xdp;
437d1396004SToshiaki Makita 			}
438d1396004SToshiaki Makita 			*xdp_xmit |= VETH_XDP_TX;
439d1396004SToshiaki Makita 			rcu_read_unlock();
440d1396004SToshiaki Makita 			goto xdp_xmit;
441d1396004SToshiaki Makita 		case XDP_REDIRECT:
442d1396004SToshiaki Makita 			orig_frame = *frame;
443d1396004SToshiaki Makita 			xdp.data_hard_start = head;
444d1396004SToshiaki Makita 			xdp.rxq->mem = frame->mem;
445638264dcSToshiaki Makita 			if (xdp_do_redirect(rq->dev, &xdp, xdp_prog)) {
446d1396004SToshiaki Makita 				frame = &orig_frame;
447d1396004SToshiaki Makita 				goto err_xdp;
448d1396004SToshiaki Makita 			}
449d1396004SToshiaki Makita 			*xdp_xmit |= VETH_XDP_REDIR;
450d1396004SToshiaki Makita 			rcu_read_unlock();
451d1396004SToshiaki Makita 			goto xdp_xmit;
4529fc8d518SToshiaki Makita 		default:
4539fc8d518SToshiaki Makita 			bpf_warn_invalid_xdp_action(act);
4549fc8d518SToshiaki Makita 		case XDP_ABORTED:
455638264dcSToshiaki Makita 			trace_xdp_exception(rq->dev, xdp_prog, act);
4569fc8d518SToshiaki Makita 		case XDP_DROP:
4579fc8d518SToshiaki Makita 			goto err_xdp;
4589fc8d518SToshiaki Makita 		}
4599fc8d518SToshiaki Makita 	}
4609fc8d518SToshiaki Makita 	rcu_read_unlock();
4619fc8d518SToshiaki Makita 
4629fc8d518SToshiaki Makita 	headroom = sizeof(struct xdp_frame) + frame->headroom - delta;
4639fc8d518SToshiaki Makita 	skb = veth_build_skb(head, headroom, len, 0);
4649fc8d518SToshiaki Makita 	if (!skb) {
4659fc8d518SToshiaki Makita 		xdp_return_frame(frame);
4669fc8d518SToshiaki Makita 		goto err;
4679fc8d518SToshiaki Makita 	}
4689fc8d518SToshiaki Makita 
4699fc8d518SToshiaki Makita 	xdp_scrub_frame(frame);
470638264dcSToshiaki Makita 	skb->protocol = eth_type_trans(skb, rq->dev);
4719fc8d518SToshiaki Makita err:
4729fc8d518SToshiaki Makita 	return skb;
4739fc8d518SToshiaki Makita err_xdp:
4749fc8d518SToshiaki Makita 	rcu_read_unlock();
4759fc8d518SToshiaki Makita 	xdp_return_frame(frame);
476d1396004SToshiaki Makita xdp_xmit:
4779fc8d518SToshiaki Makita 	return NULL;
4789fc8d518SToshiaki Makita }
4799fc8d518SToshiaki Makita 
480638264dcSToshiaki Makita static struct sk_buff *veth_xdp_rcv_skb(struct veth_rq *rq, struct sk_buff *skb,
481d1396004SToshiaki Makita 					unsigned int *xdp_xmit)
482948d4f21SToshiaki Makita {
483948d4f21SToshiaki Makita 	u32 pktlen, headroom, act, metalen;
484948d4f21SToshiaki Makita 	void *orig_data, *orig_data_end;
485948d4f21SToshiaki Makita 	struct bpf_prog *xdp_prog;
486948d4f21SToshiaki Makita 	int mac_len, delta, off;
487948d4f21SToshiaki Makita 	struct xdp_buff xdp;
488948d4f21SToshiaki Makita 
4894bf9ffa0SToshiaki Makita 	skb_orphan(skb);
4904bf9ffa0SToshiaki Makita 
491948d4f21SToshiaki Makita 	rcu_read_lock();
492638264dcSToshiaki Makita 	xdp_prog = rcu_dereference(rq->xdp_prog);
493948d4f21SToshiaki Makita 	if (unlikely(!xdp_prog)) {
494948d4f21SToshiaki Makita 		rcu_read_unlock();
495948d4f21SToshiaki Makita 		goto out;
496948d4f21SToshiaki Makita 	}
497948d4f21SToshiaki Makita 
498948d4f21SToshiaki Makita 	mac_len = skb->data - skb_mac_header(skb);
499948d4f21SToshiaki Makita 	pktlen = skb->len + mac_len;
500948d4f21SToshiaki Makita 	headroom = skb_headroom(skb) - mac_len;
501948d4f21SToshiaki Makita 
502948d4f21SToshiaki Makita 	if (skb_shared(skb) || skb_head_is_locked(skb) ||
503948d4f21SToshiaki Makita 	    skb_is_nonlinear(skb) || headroom < XDP_PACKET_HEADROOM) {
504948d4f21SToshiaki Makita 		struct sk_buff *nskb;
505948d4f21SToshiaki Makita 		int size, head_off;
506948d4f21SToshiaki Makita 		void *head, *start;
507948d4f21SToshiaki Makita 		struct page *page;
508948d4f21SToshiaki Makita 
509948d4f21SToshiaki Makita 		size = SKB_DATA_ALIGN(VETH_XDP_HEADROOM + pktlen) +
510948d4f21SToshiaki Makita 		       SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
511948d4f21SToshiaki Makita 		if (size > PAGE_SIZE)
512948d4f21SToshiaki Makita 			goto drop;
513948d4f21SToshiaki Makita 
514948d4f21SToshiaki Makita 		page = alloc_page(GFP_ATOMIC | __GFP_NOWARN);
515948d4f21SToshiaki Makita 		if (!page)
516948d4f21SToshiaki Makita 			goto drop;
517948d4f21SToshiaki Makita 
518948d4f21SToshiaki Makita 		head = page_address(page);
519948d4f21SToshiaki Makita 		start = head + VETH_XDP_HEADROOM;
520948d4f21SToshiaki Makita 		if (skb_copy_bits(skb, -mac_len, start, pktlen)) {
521948d4f21SToshiaki Makita 			page_frag_free(head);
522948d4f21SToshiaki Makita 			goto drop;
523948d4f21SToshiaki Makita 		}
524948d4f21SToshiaki Makita 
525948d4f21SToshiaki Makita 		nskb = veth_build_skb(head,
526948d4f21SToshiaki Makita 				      VETH_XDP_HEADROOM + mac_len, skb->len,
527948d4f21SToshiaki Makita 				      PAGE_SIZE);
528948d4f21SToshiaki Makita 		if (!nskb) {
529948d4f21SToshiaki Makita 			page_frag_free(head);
530948d4f21SToshiaki Makita 			goto drop;
531948d4f21SToshiaki Makita 		}
532948d4f21SToshiaki Makita 
533948d4f21SToshiaki Makita 		skb_copy_header(nskb, skb);
534948d4f21SToshiaki Makita 		head_off = skb_headroom(nskb) - skb_headroom(skb);
535948d4f21SToshiaki Makita 		skb_headers_offset_update(nskb, head_off);
536948d4f21SToshiaki Makita 		consume_skb(skb);
537948d4f21SToshiaki Makita 		skb = nskb;
538948d4f21SToshiaki Makita 	}
539948d4f21SToshiaki Makita 
540948d4f21SToshiaki Makita 	xdp.data_hard_start = skb->head;
541948d4f21SToshiaki Makita 	xdp.data = skb_mac_header(skb);
542948d4f21SToshiaki Makita 	xdp.data_end = xdp.data + pktlen;
543948d4f21SToshiaki Makita 	xdp.data_meta = xdp.data;
544638264dcSToshiaki Makita 	xdp.rxq = &rq->xdp_rxq;
545948d4f21SToshiaki Makita 	orig_data = xdp.data;
546948d4f21SToshiaki Makita 	orig_data_end = xdp.data_end;
547948d4f21SToshiaki Makita 
548948d4f21SToshiaki Makita 	act = bpf_prog_run_xdp(xdp_prog, &xdp);
549948d4f21SToshiaki Makita 
550948d4f21SToshiaki Makita 	switch (act) {
551948d4f21SToshiaki Makita 	case XDP_PASS:
552948d4f21SToshiaki Makita 		break;
553d1396004SToshiaki Makita 	case XDP_TX:
554d1396004SToshiaki Makita 		get_page(virt_to_page(xdp.data));
555d1396004SToshiaki Makita 		consume_skb(skb);
556638264dcSToshiaki Makita 		xdp.rxq->mem = rq->xdp_mem;
557638264dcSToshiaki Makita 		if (unlikely(veth_xdp_tx(rq->dev, &xdp) < 0)) {
558638264dcSToshiaki Makita 			trace_xdp_exception(rq->dev, xdp_prog, act);
559d1396004SToshiaki Makita 			goto err_xdp;
560d1396004SToshiaki Makita 		}
561d1396004SToshiaki Makita 		*xdp_xmit |= VETH_XDP_TX;
562d1396004SToshiaki Makita 		rcu_read_unlock();
563d1396004SToshiaki Makita 		goto xdp_xmit;
564d1396004SToshiaki Makita 	case XDP_REDIRECT:
565d1396004SToshiaki Makita 		get_page(virt_to_page(xdp.data));
566d1396004SToshiaki Makita 		consume_skb(skb);
567638264dcSToshiaki Makita 		xdp.rxq->mem = rq->xdp_mem;
568638264dcSToshiaki Makita 		if (xdp_do_redirect(rq->dev, &xdp, xdp_prog))
569d1396004SToshiaki Makita 			goto err_xdp;
570d1396004SToshiaki Makita 		*xdp_xmit |= VETH_XDP_REDIR;
571d1396004SToshiaki Makita 		rcu_read_unlock();
572d1396004SToshiaki Makita 		goto xdp_xmit;
573948d4f21SToshiaki Makita 	default:
574948d4f21SToshiaki Makita 		bpf_warn_invalid_xdp_action(act);
575948d4f21SToshiaki Makita 	case XDP_ABORTED:
576638264dcSToshiaki Makita 		trace_xdp_exception(rq->dev, xdp_prog, act);
577948d4f21SToshiaki Makita 	case XDP_DROP:
578948d4f21SToshiaki Makita 		goto drop;
579948d4f21SToshiaki Makita 	}
580948d4f21SToshiaki Makita 	rcu_read_unlock();
581948d4f21SToshiaki Makita 
582948d4f21SToshiaki Makita 	delta = orig_data - xdp.data;
583948d4f21SToshiaki Makita 	off = mac_len + delta;
584948d4f21SToshiaki Makita 	if (off > 0)
585948d4f21SToshiaki Makita 		__skb_push(skb, off);
586948d4f21SToshiaki Makita 	else if (off < 0)
587948d4f21SToshiaki Makita 		__skb_pull(skb, -off);
588948d4f21SToshiaki Makita 	skb->mac_header -= delta;
589948d4f21SToshiaki Makita 	off = xdp.data_end - orig_data_end;
590948d4f21SToshiaki Makita 	if (off != 0)
591948d4f21SToshiaki Makita 		__skb_put(skb, off);
592638264dcSToshiaki Makita 	skb->protocol = eth_type_trans(skb, rq->dev);
593948d4f21SToshiaki Makita 
594948d4f21SToshiaki Makita 	metalen = xdp.data - xdp.data_meta;
595948d4f21SToshiaki Makita 	if (metalen)
596948d4f21SToshiaki Makita 		skb_metadata_set(skb, metalen);
597948d4f21SToshiaki Makita out:
598948d4f21SToshiaki Makita 	return skb;
599948d4f21SToshiaki Makita drop:
600948d4f21SToshiaki Makita 	rcu_read_unlock();
601948d4f21SToshiaki Makita 	kfree_skb(skb);
602948d4f21SToshiaki Makita 	return NULL;
603d1396004SToshiaki Makita err_xdp:
604d1396004SToshiaki Makita 	rcu_read_unlock();
605d1396004SToshiaki Makita 	page_frag_free(xdp.data);
606d1396004SToshiaki Makita xdp_xmit:
607d1396004SToshiaki Makita 	return NULL;
608948d4f21SToshiaki Makita }
609948d4f21SToshiaki Makita 
610638264dcSToshiaki Makita static int veth_xdp_rcv(struct veth_rq *rq, int budget, unsigned int *xdp_xmit)
611948d4f21SToshiaki Makita {
612948d4f21SToshiaki Makita 	int i, done = 0;
613948d4f21SToshiaki Makita 
614948d4f21SToshiaki Makita 	for (i = 0; i < budget; i++) {
615638264dcSToshiaki Makita 		void *ptr = __ptr_ring_consume(&rq->xdp_ring);
6169fc8d518SToshiaki Makita 		struct sk_buff *skb;
617948d4f21SToshiaki Makita 
6189fc8d518SToshiaki Makita 		if (!ptr)
619948d4f21SToshiaki Makita 			break;
620948d4f21SToshiaki Makita 
621d1396004SToshiaki Makita 		if (veth_is_xdp_frame(ptr)) {
622638264dcSToshiaki Makita 			skb = veth_xdp_rcv_one(rq, veth_ptr_to_xdp(ptr),
623d1396004SToshiaki Makita 					       xdp_xmit);
624d1396004SToshiaki Makita 		} else {
625638264dcSToshiaki Makita 			skb = veth_xdp_rcv_skb(rq, ptr, xdp_xmit);
626d1396004SToshiaki Makita 		}
627948d4f21SToshiaki Makita 
628948d4f21SToshiaki Makita 		if (skb)
629638264dcSToshiaki Makita 			napi_gro_receive(&rq->xdp_napi, skb);
630948d4f21SToshiaki Makita 
631948d4f21SToshiaki Makita 		done++;
632948d4f21SToshiaki Makita 	}
633948d4f21SToshiaki Makita 
634948d4f21SToshiaki Makita 	return done;
635948d4f21SToshiaki Makita }
636948d4f21SToshiaki Makita 
637948d4f21SToshiaki Makita static int veth_poll(struct napi_struct *napi, int budget)
638948d4f21SToshiaki Makita {
639638264dcSToshiaki Makita 	struct veth_rq *rq =
640638264dcSToshiaki Makita 		container_of(napi, struct veth_rq, xdp_napi);
641d1396004SToshiaki Makita 	unsigned int xdp_xmit = 0;
642948d4f21SToshiaki Makita 	int done;
643948d4f21SToshiaki Makita 
644d1396004SToshiaki Makita 	xdp_set_return_frame_no_direct();
645638264dcSToshiaki Makita 	done = veth_xdp_rcv(rq, budget, &xdp_xmit);
646948d4f21SToshiaki Makita 
647948d4f21SToshiaki Makita 	if (done < budget && napi_complete_done(napi, done)) {
648948d4f21SToshiaki Makita 		/* Write rx_notify_masked before reading ptr_ring */
649638264dcSToshiaki Makita 		smp_store_mb(rq->rx_notify_masked, false);
650638264dcSToshiaki Makita 		if (unlikely(!__ptr_ring_empty(&rq->xdp_ring))) {
651638264dcSToshiaki Makita 			rq->rx_notify_masked = true;
652638264dcSToshiaki Makita 			napi_schedule(&rq->xdp_napi);
653948d4f21SToshiaki Makita 		}
654948d4f21SToshiaki Makita 	}
655948d4f21SToshiaki Makita 
656d1396004SToshiaki Makita 	if (xdp_xmit & VETH_XDP_TX)
657638264dcSToshiaki Makita 		veth_xdp_flush(rq->dev);
658d1396004SToshiaki Makita 	if (xdp_xmit & VETH_XDP_REDIR)
659d1396004SToshiaki Makita 		xdp_do_flush_map();
660d1396004SToshiaki Makita 	xdp_clear_return_frame_no_direct();
661d1396004SToshiaki Makita 
662948d4f21SToshiaki Makita 	return done;
663948d4f21SToshiaki Makita }
664948d4f21SToshiaki Makita 
665948d4f21SToshiaki Makita static int veth_napi_add(struct net_device *dev)
666948d4f21SToshiaki Makita {
667948d4f21SToshiaki Makita 	struct veth_priv *priv = netdev_priv(dev);
668638264dcSToshiaki Makita 	int err, i;
669948d4f21SToshiaki Makita 
670638264dcSToshiaki Makita 	for (i = 0; i < dev->real_num_rx_queues; i++) {
671638264dcSToshiaki Makita 		struct veth_rq *rq = &priv->rq[i];
672638264dcSToshiaki Makita 
673638264dcSToshiaki Makita 		err = ptr_ring_init(&rq->xdp_ring, VETH_RING_SIZE, GFP_KERNEL);
674948d4f21SToshiaki Makita 		if (err)
675638264dcSToshiaki Makita 			goto err_xdp_ring;
676638264dcSToshiaki Makita 	}
677948d4f21SToshiaki Makita 
678638264dcSToshiaki Makita 	for (i = 0; i < dev->real_num_rx_queues; i++) {
679638264dcSToshiaki Makita 		struct veth_rq *rq = &priv->rq[i];
680638264dcSToshiaki Makita 
681638264dcSToshiaki Makita 		netif_napi_add(dev, &rq->xdp_napi, veth_poll, NAPI_POLL_WEIGHT);
682638264dcSToshiaki Makita 		napi_enable(&rq->xdp_napi);
683638264dcSToshiaki Makita 	}
684948d4f21SToshiaki Makita 
685948d4f21SToshiaki Makita 	return 0;
686638264dcSToshiaki Makita err_xdp_ring:
687638264dcSToshiaki Makita 	for (i--; i >= 0; i--)
688638264dcSToshiaki Makita 		ptr_ring_cleanup(&priv->rq[i].xdp_ring, veth_ptr_free);
689638264dcSToshiaki Makita 
690638264dcSToshiaki Makita 	return err;
691948d4f21SToshiaki Makita }
692948d4f21SToshiaki Makita 
693948d4f21SToshiaki Makita static void veth_napi_del(struct net_device *dev)
694948d4f21SToshiaki Makita {
695948d4f21SToshiaki Makita 	struct veth_priv *priv = netdev_priv(dev);
696638264dcSToshiaki Makita 	int i;
697948d4f21SToshiaki Makita 
698638264dcSToshiaki Makita 	for (i = 0; i < dev->real_num_rx_queues; i++) {
699638264dcSToshiaki Makita 		struct veth_rq *rq = &priv->rq[i];
700638264dcSToshiaki Makita 
701638264dcSToshiaki Makita 		napi_disable(&rq->xdp_napi);
702638264dcSToshiaki Makita 		napi_hash_del(&rq->xdp_napi);
703638264dcSToshiaki Makita 	}
704638264dcSToshiaki Makita 	synchronize_net();
705638264dcSToshiaki Makita 
706638264dcSToshiaki Makita 	for (i = 0; i < dev->real_num_rx_queues; i++) {
707638264dcSToshiaki Makita 		struct veth_rq *rq = &priv->rq[i];
708638264dcSToshiaki Makita 
709638264dcSToshiaki Makita 		netif_napi_del(&rq->xdp_napi);
710638264dcSToshiaki Makita 		rq->rx_notify_masked = false;
711638264dcSToshiaki Makita 		ptr_ring_cleanup(&rq->xdp_ring, veth_ptr_free);
712638264dcSToshiaki Makita 	}
713948d4f21SToshiaki Makita }
714948d4f21SToshiaki Makita 
715948d4f21SToshiaki Makita static int veth_enable_xdp(struct net_device *dev)
716948d4f21SToshiaki Makita {
717948d4f21SToshiaki Makita 	struct veth_priv *priv = netdev_priv(dev);
718638264dcSToshiaki Makita 	int err, i;
719948d4f21SToshiaki Makita 
720638264dcSToshiaki Makita 	if (!xdp_rxq_info_is_reg(&priv->rq[0].xdp_rxq)) {
721638264dcSToshiaki Makita 		for (i = 0; i < dev->real_num_rx_queues; i++) {
722638264dcSToshiaki Makita 			struct veth_rq *rq = &priv->rq[i];
723948d4f21SToshiaki Makita 
724638264dcSToshiaki Makita 			err = xdp_rxq_info_reg(&rq->xdp_rxq, dev, i);
725948d4f21SToshiaki Makita 			if (err < 0)
726638264dcSToshiaki Makita 				goto err_rxq_reg;
727638264dcSToshiaki Makita 
728638264dcSToshiaki Makita 			err = xdp_rxq_info_reg_mem_model(&rq->xdp_rxq,
729638264dcSToshiaki Makita 							 MEM_TYPE_PAGE_SHARED,
730638264dcSToshiaki Makita 							 NULL);
731638264dcSToshiaki Makita 			if (err < 0)
732638264dcSToshiaki Makita 				goto err_reg_mem;
733638264dcSToshiaki Makita 
734638264dcSToshiaki Makita 			/* Save original mem info as it can be overwritten */
735638264dcSToshiaki Makita 			rq->xdp_mem = rq->xdp_rxq.mem;
736638264dcSToshiaki Makita 		}
737948d4f21SToshiaki Makita 
738948d4f21SToshiaki Makita 		err = veth_napi_add(dev);
739948d4f21SToshiaki Makita 		if (err)
740638264dcSToshiaki Makita 			goto err_rxq_reg;
741948d4f21SToshiaki Makita 	}
742948d4f21SToshiaki Makita 
743638264dcSToshiaki Makita 	for (i = 0; i < dev->real_num_rx_queues; i++)
744638264dcSToshiaki Makita 		rcu_assign_pointer(priv->rq[i].xdp_prog, priv->_xdp_prog);
745948d4f21SToshiaki Makita 
746948d4f21SToshiaki Makita 	return 0;
747638264dcSToshiaki Makita err_reg_mem:
748638264dcSToshiaki Makita 	xdp_rxq_info_unreg(&priv->rq[i].xdp_rxq);
749638264dcSToshiaki Makita err_rxq_reg:
750638264dcSToshiaki Makita 	for (i--; i >= 0; i--)
751638264dcSToshiaki Makita 		xdp_rxq_info_unreg(&priv->rq[i].xdp_rxq);
752948d4f21SToshiaki Makita 
753948d4f21SToshiaki Makita 	return err;
754948d4f21SToshiaki Makita }
755948d4f21SToshiaki Makita 
756948d4f21SToshiaki Makita static void veth_disable_xdp(struct net_device *dev)
757948d4f21SToshiaki Makita {
758948d4f21SToshiaki Makita 	struct veth_priv *priv = netdev_priv(dev);
759638264dcSToshiaki Makita 	int i;
760948d4f21SToshiaki Makita 
761638264dcSToshiaki Makita 	for (i = 0; i < dev->real_num_rx_queues; i++)
762638264dcSToshiaki Makita 		rcu_assign_pointer(priv->rq[i].xdp_prog, NULL);
763948d4f21SToshiaki Makita 	veth_napi_del(dev);
764638264dcSToshiaki Makita 	for (i = 0; i < dev->real_num_rx_queues; i++) {
765638264dcSToshiaki Makita 		struct veth_rq *rq = &priv->rq[i];
766638264dcSToshiaki Makita 
767638264dcSToshiaki Makita 		rq->xdp_rxq.mem = rq->xdp_mem;
768638264dcSToshiaki Makita 		xdp_rxq_info_unreg(&rq->xdp_rxq);
769638264dcSToshiaki Makita 	}
770948d4f21SToshiaki Makita }
771948d4f21SToshiaki Makita 
772e314dbdcSPavel Emelyanov static int veth_open(struct net_device *dev)
773e314dbdcSPavel Emelyanov {
774d0e2c55eSEric Dumazet 	struct veth_priv *priv = netdev_priv(dev);
775d0e2c55eSEric Dumazet 	struct net_device *peer = rtnl_dereference(priv->peer);
776948d4f21SToshiaki Makita 	int err;
777e314dbdcSPavel Emelyanov 
778d0e2c55eSEric Dumazet 	if (!peer)
779e314dbdcSPavel Emelyanov 		return -ENOTCONN;
780e314dbdcSPavel Emelyanov 
781948d4f21SToshiaki Makita 	if (priv->_xdp_prog) {
782948d4f21SToshiaki Makita 		err = veth_enable_xdp(dev);
783948d4f21SToshiaki Makita 		if (err)
784948d4f21SToshiaki Makita 			return err;
785948d4f21SToshiaki Makita 	}
786948d4f21SToshiaki Makita 
787d0e2c55eSEric Dumazet 	if (peer->flags & IFF_UP) {
788e314dbdcSPavel Emelyanov 		netif_carrier_on(dev);
789d0e2c55eSEric Dumazet 		netif_carrier_on(peer);
790e314dbdcSPavel Emelyanov 	}
791948d4f21SToshiaki Makita 
792e314dbdcSPavel Emelyanov 	return 0;
793e314dbdcSPavel Emelyanov }
794e314dbdcSPavel Emelyanov 
7952cf48a10SEric W. Biederman static int veth_close(struct net_device *dev)
7962cf48a10SEric W. Biederman {
7972cf48a10SEric W. Biederman 	struct veth_priv *priv = netdev_priv(dev);
7982efd32eeSEric Dumazet 	struct net_device *peer = rtnl_dereference(priv->peer);
7992cf48a10SEric W. Biederman 
8002cf48a10SEric W. Biederman 	netif_carrier_off(dev);
8012efd32eeSEric Dumazet 	if (peer)
8022efd32eeSEric Dumazet 		netif_carrier_off(peer);
8032cf48a10SEric W. Biederman 
804948d4f21SToshiaki Makita 	if (priv->_xdp_prog)
805948d4f21SToshiaki Makita 		veth_disable_xdp(dev);
806948d4f21SToshiaki Makita 
8072cf48a10SEric W. Biederman 	return 0;
8082cf48a10SEric W. Biederman }
8092cf48a10SEric W. Biederman 
81091572088SJarod Wilson static int is_valid_veth_mtu(int mtu)
81138d40815SEric Biederman {
81291572088SJarod Wilson 	return mtu >= ETH_MIN_MTU && mtu <= ETH_MAX_MTU;
81338d40815SEric Biederman }
81438d40815SEric Biederman 
8157797b93bSToshiaki Makita static int veth_alloc_queues(struct net_device *dev)
8167797b93bSToshiaki Makita {
8177797b93bSToshiaki Makita 	struct veth_priv *priv = netdev_priv(dev);
8187797b93bSToshiaki Makita 	int i;
8197797b93bSToshiaki Makita 
8207797b93bSToshiaki Makita 	priv->rq = kcalloc(dev->num_rx_queues, sizeof(*priv->rq), GFP_KERNEL);
8217797b93bSToshiaki Makita 	if (!priv->rq)
8227797b93bSToshiaki Makita 		return -ENOMEM;
8237797b93bSToshiaki Makita 
8247797b93bSToshiaki Makita 	for (i = 0; i < dev->num_rx_queues; i++)
8257797b93bSToshiaki Makita 		priv->rq[i].dev = dev;
8267797b93bSToshiaki Makita 
8277797b93bSToshiaki Makita 	return 0;
8287797b93bSToshiaki Makita }
8297797b93bSToshiaki Makita 
8307797b93bSToshiaki Makita static void veth_free_queues(struct net_device *dev)
8317797b93bSToshiaki Makita {
8327797b93bSToshiaki Makita 	struct veth_priv *priv = netdev_priv(dev);
8337797b93bSToshiaki Makita 
8347797b93bSToshiaki Makita 	kfree(priv->rq);
8357797b93bSToshiaki Makita }
8367797b93bSToshiaki Makita 
837e314dbdcSPavel Emelyanov static int veth_dev_init(struct net_device *dev)
838e314dbdcSPavel Emelyanov {
8397797b93bSToshiaki Makita 	int err;
8407797b93bSToshiaki Makita 
84114d73416SLi RongQing 	dev->lstats = netdev_alloc_pcpu_stats(struct pcpu_lstats);
84214d73416SLi RongQing 	if (!dev->lstats)
843e314dbdcSPavel Emelyanov 		return -ENOMEM;
8447797b93bSToshiaki Makita 
8457797b93bSToshiaki Makita 	err = veth_alloc_queues(dev);
8467797b93bSToshiaki Makita 	if (err) {
84714d73416SLi RongQing 		free_percpu(dev->lstats);
8487797b93bSToshiaki Makita 		return err;
8497797b93bSToshiaki Makita 	}
8507797b93bSToshiaki Makita 
851e314dbdcSPavel Emelyanov 	return 0;
852e314dbdcSPavel Emelyanov }
853e314dbdcSPavel Emelyanov 
85411687a10SDavid S. Miller static void veth_dev_free(struct net_device *dev)
85511687a10SDavid S. Miller {
8567797b93bSToshiaki Makita 	veth_free_queues(dev);
85714d73416SLi RongQing 	free_percpu(dev->lstats);
85811687a10SDavid S. Miller }
85911687a10SDavid S. Miller 
860bb446c19SWANG Cong #ifdef CONFIG_NET_POLL_CONTROLLER
861bb446c19SWANG Cong static void veth_poll_controller(struct net_device *dev)
862bb446c19SWANG Cong {
863bb446c19SWANG Cong 	/* veth only receives frames when its peer sends one
864948d4f21SToshiaki Makita 	 * Since it has nothing to do with disabling irqs, we are guaranteed
865bb446c19SWANG Cong 	 * never to have pending data when we poll for it so
866bb446c19SWANG Cong 	 * there is nothing to do here.
867bb446c19SWANG Cong 	 *
868bb446c19SWANG Cong 	 * We need this though so netpoll recognizes us as an interface that
869bb446c19SWANG Cong 	 * supports polling, which enables bridge devices in virt setups to
870bb446c19SWANG Cong 	 * still use netconsole
871bb446c19SWANG Cong 	 */
872bb446c19SWANG Cong }
873bb446c19SWANG Cong #endif	/* CONFIG_NET_POLL_CONTROLLER */
874bb446c19SWANG Cong 
875a45253bfSNicolas Dichtel static int veth_get_iflink(const struct net_device *dev)
876a45253bfSNicolas Dichtel {
877a45253bfSNicolas Dichtel 	struct veth_priv *priv = netdev_priv(dev);
878a45253bfSNicolas Dichtel 	struct net_device *peer;
879a45253bfSNicolas Dichtel 	int iflink;
880a45253bfSNicolas Dichtel 
881a45253bfSNicolas Dichtel 	rcu_read_lock();
882a45253bfSNicolas Dichtel 	peer = rcu_dereference(priv->peer);
883a45253bfSNicolas Dichtel 	iflink = peer ? peer->ifindex : 0;
884a45253bfSNicolas Dichtel 	rcu_read_unlock();
885a45253bfSNicolas Dichtel 
886a45253bfSNicolas Dichtel 	return iflink;
887a45253bfSNicolas Dichtel }
888a45253bfSNicolas Dichtel 
889dc224822SToshiaki Makita static netdev_features_t veth_fix_features(struct net_device *dev,
890dc224822SToshiaki Makita 					   netdev_features_t features)
891dc224822SToshiaki Makita {
892dc224822SToshiaki Makita 	struct veth_priv *priv = netdev_priv(dev);
893dc224822SToshiaki Makita 	struct net_device *peer;
894dc224822SToshiaki Makita 
895dc224822SToshiaki Makita 	peer = rtnl_dereference(priv->peer);
896dc224822SToshiaki Makita 	if (peer) {
897dc224822SToshiaki Makita 		struct veth_priv *peer_priv = netdev_priv(peer);
898dc224822SToshiaki Makita 
899dc224822SToshiaki Makita 		if (peer_priv->_xdp_prog)
900dc224822SToshiaki Makita 			features &= ~NETIF_F_GSO_SOFTWARE;
901dc224822SToshiaki Makita 	}
902dc224822SToshiaki Makita 
903dc224822SToshiaki Makita 	return features;
904dc224822SToshiaki Makita }
905dc224822SToshiaki Makita 
906163e5292SPaolo Abeni static void veth_set_rx_headroom(struct net_device *dev, int new_hr)
907163e5292SPaolo Abeni {
908163e5292SPaolo Abeni 	struct veth_priv *peer_priv, *priv = netdev_priv(dev);
909163e5292SPaolo Abeni 	struct net_device *peer;
910163e5292SPaolo Abeni 
911163e5292SPaolo Abeni 	if (new_hr < 0)
912163e5292SPaolo Abeni 		new_hr = 0;
913163e5292SPaolo Abeni 
914163e5292SPaolo Abeni 	rcu_read_lock();
915163e5292SPaolo Abeni 	peer = rcu_dereference(priv->peer);
916163e5292SPaolo Abeni 	if (unlikely(!peer))
917163e5292SPaolo Abeni 		goto out;
918163e5292SPaolo Abeni 
919163e5292SPaolo Abeni 	peer_priv = netdev_priv(peer);
920163e5292SPaolo Abeni 	priv->requested_headroom = new_hr;
921163e5292SPaolo Abeni 	new_hr = max(priv->requested_headroom, peer_priv->requested_headroom);
922163e5292SPaolo Abeni 	dev->needed_headroom = new_hr;
923163e5292SPaolo Abeni 	peer->needed_headroom = new_hr;
924163e5292SPaolo Abeni 
925163e5292SPaolo Abeni out:
926163e5292SPaolo Abeni 	rcu_read_unlock();
927163e5292SPaolo Abeni }
928163e5292SPaolo Abeni 
929948d4f21SToshiaki Makita static int veth_xdp_set(struct net_device *dev, struct bpf_prog *prog,
930948d4f21SToshiaki Makita 			struct netlink_ext_ack *extack)
931948d4f21SToshiaki Makita {
932948d4f21SToshiaki Makita 	struct veth_priv *priv = netdev_priv(dev);
933948d4f21SToshiaki Makita 	struct bpf_prog *old_prog;
934948d4f21SToshiaki Makita 	struct net_device *peer;
935dc224822SToshiaki Makita 	unsigned int max_mtu;
936948d4f21SToshiaki Makita 	int err;
937948d4f21SToshiaki Makita 
938948d4f21SToshiaki Makita 	old_prog = priv->_xdp_prog;
939948d4f21SToshiaki Makita 	priv->_xdp_prog = prog;
940948d4f21SToshiaki Makita 	peer = rtnl_dereference(priv->peer);
941948d4f21SToshiaki Makita 
942948d4f21SToshiaki Makita 	if (prog) {
943948d4f21SToshiaki Makita 		if (!peer) {
944948d4f21SToshiaki Makita 			NL_SET_ERR_MSG_MOD(extack, "Cannot set XDP when peer is detached");
945948d4f21SToshiaki Makita 			err = -ENOTCONN;
946948d4f21SToshiaki Makita 			goto err;
947948d4f21SToshiaki Makita 		}
948948d4f21SToshiaki Makita 
949dc224822SToshiaki Makita 		max_mtu = PAGE_SIZE - VETH_XDP_HEADROOM -
950dc224822SToshiaki Makita 			  peer->hard_header_len -
951dc224822SToshiaki Makita 			  SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
952dc224822SToshiaki Makita 		if (peer->mtu > max_mtu) {
953dc224822SToshiaki Makita 			NL_SET_ERR_MSG_MOD(extack, "Peer MTU is too large to set XDP");
954dc224822SToshiaki Makita 			err = -ERANGE;
955dc224822SToshiaki Makita 			goto err;
956dc224822SToshiaki Makita 		}
957dc224822SToshiaki Makita 
958638264dcSToshiaki Makita 		if (dev->real_num_rx_queues < peer->real_num_tx_queues) {
959638264dcSToshiaki Makita 			NL_SET_ERR_MSG_MOD(extack, "XDP expects number of rx queues not less than peer tx queues");
960638264dcSToshiaki Makita 			err = -ENOSPC;
961638264dcSToshiaki Makita 			goto err;
962638264dcSToshiaki Makita 		}
963638264dcSToshiaki Makita 
964948d4f21SToshiaki Makita 		if (dev->flags & IFF_UP) {
965948d4f21SToshiaki Makita 			err = veth_enable_xdp(dev);
966948d4f21SToshiaki Makita 			if (err) {
967948d4f21SToshiaki Makita 				NL_SET_ERR_MSG_MOD(extack, "Setup for XDP failed");
968948d4f21SToshiaki Makita 				goto err;
969948d4f21SToshiaki Makita 			}
970948d4f21SToshiaki Makita 		}
971dc224822SToshiaki Makita 
972dc224822SToshiaki Makita 		if (!old_prog) {
973dc224822SToshiaki Makita 			peer->hw_features &= ~NETIF_F_GSO_SOFTWARE;
974dc224822SToshiaki Makita 			peer->max_mtu = max_mtu;
975dc224822SToshiaki Makita 		}
976948d4f21SToshiaki Makita 	}
977948d4f21SToshiaki Makita 
978948d4f21SToshiaki Makita 	if (old_prog) {
979dc224822SToshiaki Makita 		if (!prog) {
980dc224822SToshiaki Makita 			if (dev->flags & IFF_UP)
981948d4f21SToshiaki Makita 				veth_disable_xdp(dev);
982dc224822SToshiaki Makita 
983dc224822SToshiaki Makita 			if (peer) {
984dc224822SToshiaki Makita 				peer->hw_features |= NETIF_F_GSO_SOFTWARE;
985dc224822SToshiaki Makita 				peer->max_mtu = ETH_MAX_MTU;
986dc224822SToshiaki Makita 			}
987dc224822SToshiaki Makita 		}
988948d4f21SToshiaki Makita 		bpf_prog_put(old_prog);
989948d4f21SToshiaki Makita 	}
990948d4f21SToshiaki Makita 
991dc224822SToshiaki Makita 	if ((!!old_prog ^ !!prog) && peer)
992dc224822SToshiaki Makita 		netdev_update_features(peer);
993dc224822SToshiaki Makita 
994948d4f21SToshiaki Makita 	return 0;
995948d4f21SToshiaki Makita err:
996948d4f21SToshiaki Makita 	priv->_xdp_prog = old_prog;
997948d4f21SToshiaki Makita 
998948d4f21SToshiaki Makita 	return err;
999948d4f21SToshiaki Makita }
1000948d4f21SToshiaki Makita 
1001948d4f21SToshiaki Makita static u32 veth_xdp_query(struct net_device *dev)
1002948d4f21SToshiaki Makita {
1003948d4f21SToshiaki Makita 	struct veth_priv *priv = netdev_priv(dev);
1004948d4f21SToshiaki Makita 	const struct bpf_prog *xdp_prog;
1005948d4f21SToshiaki Makita 
1006948d4f21SToshiaki Makita 	xdp_prog = priv->_xdp_prog;
1007948d4f21SToshiaki Makita 	if (xdp_prog)
1008948d4f21SToshiaki Makita 		return xdp_prog->aux->id;
1009948d4f21SToshiaki Makita 
1010948d4f21SToshiaki Makita 	return 0;
1011948d4f21SToshiaki Makita }
1012948d4f21SToshiaki Makita 
1013948d4f21SToshiaki Makita static int veth_xdp(struct net_device *dev, struct netdev_bpf *xdp)
1014948d4f21SToshiaki Makita {
1015948d4f21SToshiaki Makita 	switch (xdp->command) {
1016948d4f21SToshiaki Makita 	case XDP_SETUP_PROG:
1017948d4f21SToshiaki Makita 		return veth_xdp_set(dev, xdp->prog, xdp->extack);
1018948d4f21SToshiaki Makita 	case XDP_QUERY_PROG:
1019948d4f21SToshiaki Makita 		xdp->prog_id = veth_xdp_query(dev);
1020948d4f21SToshiaki Makita 		return 0;
1021948d4f21SToshiaki Makita 	default:
1022948d4f21SToshiaki Makita 		return -EINVAL;
1023948d4f21SToshiaki Makita 	}
1024948d4f21SToshiaki Makita }
1025948d4f21SToshiaki Makita 
10264456e7bdSStephen Hemminger static const struct net_device_ops veth_netdev_ops = {
10274456e7bdSStephen Hemminger 	.ndo_init            = veth_dev_init,
10284456e7bdSStephen Hemminger 	.ndo_open            = veth_open,
10292cf48a10SEric W. Biederman 	.ndo_stop            = veth_close,
103000829823SStephen Hemminger 	.ndo_start_xmit      = veth_xmit,
10316311cc44Sstephen hemminger 	.ndo_get_stats64     = veth_get_stats64,
10325c70ef85SGao feng 	.ndo_set_rx_mode     = veth_set_multicast_list,
1033ee923623SDaniel Lezcano 	.ndo_set_mac_address = eth_mac_addr,
1034bb446c19SWANG Cong #ifdef CONFIG_NET_POLL_CONTROLLER
1035bb446c19SWANG Cong 	.ndo_poll_controller	= veth_poll_controller,
1036bb446c19SWANG Cong #endif
1037a45253bfSNicolas Dichtel 	.ndo_get_iflink		= veth_get_iflink,
1038dc224822SToshiaki Makita 	.ndo_fix_features	= veth_fix_features,
10391a04a821SToshiaki Makita 	.ndo_features_check	= passthru_features_check,
1040163e5292SPaolo Abeni 	.ndo_set_rx_headroom	= veth_set_rx_headroom,
1041948d4f21SToshiaki Makita 	.ndo_bpf		= veth_xdp,
1042af87a3aaSToshiaki Makita 	.ndo_xdp_xmit		= veth_xdp_xmit,
10434456e7bdSStephen Hemminger };
10444456e7bdSStephen Hemminger 
1045732912d7SAlexander Duyck #define VETH_FEATURES (NETIF_F_SG | NETIF_F_FRAGLIST | NETIF_F_HW_CSUM | \
1046c80fafbbSXin Long 		       NETIF_F_RXCSUM | NETIF_F_SCTP_CRC | NETIF_F_HIGHDMA | \
1047732912d7SAlexander Duyck 		       NETIF_F_GSO_SOFTWARE | NETIF_F_GSO_ENCAP_ALL | \
104828d2b136SPatrick McHardy 		       NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_CTAG_RX | \
104928d2b136SPatrick McHardy 		       NETIF_F_HW_VLAN_STAG_TX | NETIF_F_HW_VLAN_STAG_RX )
10508093315aSEric Dumazet 
1051e314dbdcSPavel Emelyanov static void veth_setup(struct net_device *dev)
1052e314dbdcSPavel Emelyanov {
1053e314dbdcSPavel Emelyanov 	ether_setup(dev);
1054e314dbdcSPavel Emelyanov 
1055550fd08cSNeil Horman 	dev->priv_flags &= ~IFF_TX_SKB_SHARING;
105623ea5a96SHannes Frederic Sowa 	dev->priv_flags |= IFF_LIVE_ADDR_CHANGE;
105702f01ec1SPhil Sutter 	dev->priv_flags |= IFF_NO_QUEUE;
1058163e5292SPaolo Abeni 	dev->priv_flags |= IFF_PHONY_HEADROOM;
1059550fd08cSNeil Horman 
10604456e7bdSStephen Hemminger 	dev->netdev_ops = &veth_netdev_ops;
1061e314dbdcSPavel Emelyanov 	dev->ethtool_ops = &veth_ethtool_ops;
1062e314dbdcSPavel Emelyanov 	dev->features |= NETIF_F_LLTX;
10638093315aSEric Dumazet 	dev->features |= VETH_FEATURES;
10648d0d21f4SToshiaki Makita 	dev->vlan_features = dev->features &
10653f8c707bSVlad Yasevich 			     ~(NETIF_F_HW_VLAN_CTAG_TX |
10663f8c707bSVlad Yasevich 			       NETIF_F_HW_VLAN_STAG_TX |
10673f8c707bSVlad Yasevich 			       NETIF_F_HW_VLAN_CTAG_RX |
10683f8c707bSVlad Yasevich 			       NETIF_F_HW_VLAN_STAG_RX);
1069cf124db5SDavid S. Miller 	dev->needs_free_netdev = true;
1070cf124db5SDavid S. Miller 	dev->priv_destructor = veth_dev_free;
107191572088SJarod Wilson 	dev->max_mtu = ETH_MAX_MTU;
1072a2c725faSMichał Mirosław 
10738093315aSEric Dumazet 	dev->hw_features = VETH_FEATURES;
107482d81898SEric Dumazet 	dev->hw_enc_features = VETH_FEATURES;
1075607fca9aSDavid Ahern 	dev->mpls_features = NETIF_F_HW_CSUM | NETIF_F_GSO_SOFTWARE;
1076e314dbdcSPavel Emelyanov }
1077e314dbdcSPavel Emelyanov 
1078e314dbdcSPavel Emelyanov /*
1079e314dbdcSPavel Emelyanov  * netlink interface
1080e314dbdcSPavel Emelyanov  */
1081e314dbdcSPavel Emelyanov 
1082a8b8a889SMatthias Schiffer static int veth_validate(struct nlattr *tb[], struct nlattr *data[],
1083a8b8a889SMatthias Schiffer 			 struct netlink_ext_ack *extack)
1084e314dbdcSPavel Emelyanov {
1085e314dbdcSPavel Emelyanov 	if (tb[IFLA_ADDRESS]) {
1086e314dbdcSPavel Emelyanov 		if (nla_len(tb[IFLA_ADDRESS]) != ETH_ALEN)
1087e314dbdcSPavel Emelyanov 			return -EINVAL;
1088e314dbdcSPavel Emelyanov 		if (!is_valid_ether_addr(nla_data(tb[IFLA_ADDRESS])))
1089e314dbdcSPavel Emelyanov 			return -EADDRNOTAVAIL;
1090e314dbdcSPavel Emelyanov 	}
109138d40815SEric Biederman 	if (tb[IFLA_MTU]) {
109238d40815SEric Biederman 		if (!is_valid_veth_mtu(nla_get_u32(tb[IFLA_MTU])))
109338d40815SEric Biederman 			return -EINVAL;
109438d40815SEric Biederman 	}
1095e314dbdcSPavel Emelyanov 	return 0;
1096e314dbdcSPavel Emelyanov }
1097e314dbdcSPavel Emelyanov 
1098e314dbdcSPavel Emelyanov static struct rtnl_link_ops veth_link_ops;
1099e314dbdcSPavel Emelyanov 
110081adee47SEric W. Biederman static int veth_newlink(struct net *src_net, struct net_device *dev,
11017a3f4a18SMatthias Schiffer 			struct nlattr *tb[], struct nlattr *data[],
11027a3f4a18SMatthias Schiffer 			struct netlink_ext_ack *extack)
1103e314dbdcSPavel Emelyanov {
11047797b93bSToshiaki Makita 	int err;
1105e314dbdcSPavel Emelyanov 	struct net_device *peer;
1106e314dbdcSPavel Emelyanov 	struct veth_priv *priv;
1107e314dbdcSPavel Emelyanov 	char ifname[IFNAMSIZ];
1108e314dbdcSPavel Emelyanov 	struct nlattr *peer_tb[IFLA_MAX + 1], **tbp;
11095517750fSTom Gundersen 	unsigned char name_assign_type;
11103729d502SPatrick McHardy 	struct ifinfomsg *ifmp;
111181adee47SEric W. Biederman 	struct net *net;
1112e314dbdcSPavel Emelyanov 
1113e314dbdcSPavel Emelyanov 	/*
1114e314dbdcSPavel Emelyanov 	 * create and register peer first
1115e314dbdcSPavel Emelyanov 	 */
1116e314dbdcSPavel Emelyanov 	if (data != NULL && data[VETH_INFO_PEER] != NULL) {
1117e314dbdcSPavel Emelyanov 		struct nlattr *nla_peer;
1118e314dbdcSPavel Emelyanov 
1119e314dbdcSPavel Emelyanov 		nla_peer = data[VETH_INFO_PEER];
11203729d502SPatrick McHardy 		ifmp = nla_data(nla_peer);
1121f7b12606SJiri Pirko 		err = rtnl_nla_parse_ifla(peer_tb,
1122e314dbdcSPavel Emelyanov 					  nla_data(nla_peer) + sizeof(struct ifinfomsg),
1123fceb6435SJohannes Berg 					  nla_len(nla_peer) - sizeof(struct ifinfomsg),
1124fceb6435SJohannes Berg 					  NULL);
1125e314dbdcSPavel Emelyanov 		if (err < 0)
1126e314dbdcSPavel Emelyanov 			return err;
1127e314dbdcSPavel Emelyanov 
1128a8b8a889SMatthias Schiffer 		err = veth_validate(peer_tb, NULL, extack);
1129e314dbdcSPavel Emelyanov 		if (err < 0)
1130e314dbdcSPavel Emelyanov 			return err;
1131e314dbdcSPavel Emelyanov 
1132e314dbdcSPavel Emelyanov 		tbp = peer_tb;
11333729d502SPatrick McHardy 	} else {
11343729d502SPatrick McHardy 		ifmp = NULL;
1135e314dbdcSPavel Emelyanov 		tbp = tb;
11363729d502SPatrick McHardy 	}
1137e314dbdcSPavel Emelyanov 
1138191cdb38SSerhey Popovych 	if (ifmp && tbp[IFLA_IFNAME]) {
1139e314dbdcSPavel Emelyanov 		nla_strlcpy(ifname, tbp[IFLA_IFNAME], IFNAMSIZ);
11405517750fSTom Gundersen 		name_assign_type = NET_NAME_USER;
11415517750fSTom Gundersen 	} else {
1142e314dbdcSPavel Emelyanov 		snprintf(ifname, IFNAMSIZ, DRV_NAME "%%d");
11435517750fSTom Gundersen 		name_assign_type = NET_NAME_ENUM;
11445517750fSTom Gundersen 	}
1145e314dbdcSPavel Emelyanov 
114681adee47SEric W. Biederman 	net = rtnl_link_get_net(src_net, tbp);
114781adee47SEric W. Biederman 	if (IS_ERR(net))
114881adee47SEric W. Biederman 		return PTR_ERR(net);
114981adee47SEric W. Biederman 
11505517750fSTom Gundersen 	peer = rtnl_create_link(net, ifname, name_assign_type,
11515517750fSTom Gundersen 				&veth_link_ops, tbp);
115281adee47SEric W. Biederman 	if (IS_ERR(peer)) {
115381adee47SEric W. Biederman 		put_net(net);
1154e314dbdcSPavel Emelyanov 		return PTR_ERR(peer);
115581adee47SEric W. Biederman 	}
1156e314dbdcSPavel Emelyanov 
1157191cdb38SSerhey Popovych 	if (!ifmp || !tbp[IFLA_ADDRESS])
1158f2cedb63SDanny Kukawka 		eth_hw_addr_random(peer);
1159e314dbdcSPavel Emelyanov 
1160e6f8f1a7SPavel Emelyanov 	if (ifmp && (dev->ifindex != 0))
1161e6f8f1a7SPavel Emelyanov 		peer->ifindex = ifmp->ifi_index;
1162e6f8f1a7SPavel Emelyanov 
116372d24955SStephen Hemminger 	peer->gso_max_size = dev->gso_max_size;
116472d24955SStephen Hemminger 	peer->gso_max_segs = dev->gso_max_segs;
116572d24955SStephen Hemminger 
1166e314dbdcSPavel Emelyanov 	err = register_netdevice(peer);
116781adee47SEric W. Biederman 	put_net(net);
116881adee47SEric W. Biederman 	net = NULL;
1169e314dbdcSPavel Emelyanov 	if (err < 0)
1170e314dbdcSPavel Emelyanov 		goto err_register_peer;
1171e314dbdcSPavel Emelyanov 
1172e314dbdcSPavel Emelyanov 	netif_carrier_off(peer);
1173e314dbdcSPavel Emelyanov 
11743729d502SPatrick McHardy 	err = rtnl_configure_link(peer, ifmp);
11753729d502SPatrick McHardy 	if (err < 0)
11763729d502SPatrick McHardy 		goto err_configure_peer;
11773729d502SPatrick McHardy 
1178e314dbdcSPavel Emelyanov 	/*
1179e314dbdcSPavel Emelyanov 	 * register dev last
1180e314dbdcSPavel Emelyanov 	 *
1181e314dbdcSPavel Emelyanov 	 * note, that since we've registered new device the dev's name
1182e314dbdcSPavel Emelyanov 	 * should be re-allocated
1183e314dbdcSPavel Emelyanov 	 */
1184e314dbdcSPavel Emelyanov 
1185e314dbdcSPavel Emelyanov 	if (tb[IFLA_ADDRESS] == NULL)
1186f2cedb63SDanny Kukawka 		eth_hw_addr_random(dev);
1187e314dbdcSPavel Emelyanov 
11886c8c4446SJiri Pirko 	if (tb[IFLA_IFNAME])
11896c8c4446SJiri Pirko 		nla_strlcpy(dev->name, tb[IFLA_IFNAME], IFNAMSIZ);
11906c8c4446SJiri Pirko 	else
11916c8c4446SJiri Pirko 		snprintf(dev->name, IFNAMSIZ, DRV_NAME "%%d");
11926c8c4446SJiri Pirko 
1193e314dbdcSPavel Emelyanov 	err = register_netdevice(dev);
1194e314dbdcSPavel Emelyanov 	if (err < 0)
1195e314dbdcSPavel Emelyanov 		goto err_register_dev;
1196e314dbdcSPavel Emelyanov 
1197e314dbdcSPavel Emelyanov 	netif_carrier_off(dev);
1198e314dbdcSPavel Emelyanov 
1199e314dbdcSPavel Emelyanov 	/*
1200e314dbdcSPavel Emelyanov 	 * tie the deviced together
1201e314dbdcSPavel Emelyanov 	 */
1202e314dbdcSPavel Emelyanov 
1203e314dbdcSPavel Emelyanov 	priv = netdev_priv(dev);
1204d0e2c55eSEric Dumazet 	rcu_assign_pointer(priv->peer, peer);
1205e314dbdcSPavel Emelyanov 
1206e314dbdcSPavel Emelyanov 	priv = netdev_priv(peer);
1207d0e2c55eSEric Dumazet 	rcu_assign_pointer(priv->peer, dev);
1208948d4f21SToshiaki Makita 
1209e314dbdcSPavel Emelyanov 	return 0;
1210e314dbdcSPavel Emelyanov 
1211e314dbdcSPavel Emelyanov err_register_dev:
1212e314dbdcSPavel Emelyanov 	/* nothing to do */
12133729d502SPatrick McHardy err_configure_peer:
1214e314dbdcSPavel Emelyanov 	unregister_netdevice(peer);
1215e314dbdcSPavel Emelyanov 	return err;
1216e314dbdcSPavel Emelyanov 
1217e314dbdcSPavel Emelyanov err_register_peer:
1218e314dbdcSPavel Emelyanov 	free_netdev(peer);
1219e314dbdcSPavel Emelyanov 	return err;
1220e314dbdcSPavel Emelyanov }
1221e314dbdcSPavel Emelyanov 
122223289a37SEric Dumazet static void veth_dellink(struct net_device *dev, struct list_head *head)
1223e314dbdcSPavel Emelyanov {
1224e314dbdcSPavel Emelyanov 	struct veth_priv *priv;
1225e314dbdcSPavel Emelyanov 	struct net_device *peer;
1226e314dbdcSPavel Emelyanov 
1227e314dbdcSPavel Emelyanov 	priv = netdev_priv(dev);
1228d0e2c55eSEric Dumazet 	peer = rtnl_dereference(priv->peer);
1229d0e2c55eSEric Dumazet 
1230d0e2c55eSEric Dumazet 	/* Note : dellink() is called from default_device_exit_batch(),
1231d0e2c55eSEric Dumazet 	 * before a rcu_synchronize() point. The devices are guaranteed
1232d0e2c55eSEric Dumazet 	 * not being freed before one RCU grace period.
1233d0e2c55eSEric Dumazet 	 */
1234d0e2c55eSEric Dumazet 	RCU_INIT_POINTER(priv->peer, NULL);
1235f45a5c26SEric Dumazet 	unregister_netdevice_queue(dev, head);
1236d0e2c55eSEric Dumazet 
1237f45a5c26SEric Dumazet 	if (peer) {
1238d0e2c55eSEric Dumazet 		priv = netdev_priv(peer);
1239d0e2c55eSEric Dumazet 		RCU_INIT_POINTER(priv->peer, NULL);
124024540535SEric Dumazet 		unregister_netdevice_queue(peer, head);
1241e314dbdcSPavel Emelyanov 	}
1242f45a5c26SEric Dumazet }
1243e314dbdcSPavel Emelyanov 
124423711438SThomas Graf static const struct nla_policy veth_policy[VETH_INFO_MAX + 1] = {
124523711438SThomas Graf 	[VETH_INFO_PEER]	= { .len = sizeof(struct ifinfomsg) },
124623711438SThomas Graf };
1247e314dbdcSPavel Emelyanov 
1248e5f4e7b9SNicolas Dichtel static struct net *veth_get_link_net(const struct net_device *dev)
1249e5f4e7b9SNicolas Dichtel {
1250e5f4e7b9SNicolas Dichtel 	struct veth_priv *priv = netdev_priv(dev);
1251e5f4e7b9SNicolas Dichtel 	struct net_device *peer = rtnl_dereference(priv->peer);
1252e5f4e7b9SNicolas Dichtel 
1253e5f4e7b9SNicolas Dichtel 	return peer ? dev_net(peer) : dev_net(dev);
1254e5f4e7b9SNicolas Dichtel }
1255e5f4e7b9SNicolas Dichtel 
1256e314dbdcSPavel Emelyanov static struct rtnl_link_ops veth_link_ops = {
1257e314dbdcSPavel Emelyanov 	.kind		= DRV_NAME,
1258e314dbdcSPavel Emelyanov 	.priv_size	= sizeof(struct veth_priv),
1259e314dbdcSPavel Emelyanov 	.setup		= veth_setup,
1260e314dbdcSPavel Emelyanov 	.validate	= veth_validate,
1261e314dbdcSPavel Emelyanov 	.newlink	= veth_newlink,
1262e314dbdcSPavel Emelyanov 	.dellink	= veth_dellink,
1263e314dbdcSPavel Emelyanov 	.policy		= veth_policy,
1264e314dbdcSPavel Emelyanov 	.maxtype	= VETH_INFO_MAX,
1265e5f4e7b9SNicolas Dichtel 	.get_link_net	= veth_get_link_net,
1266e314dbdcSPavel Emelyanov };
1267e314dbdcSPavel Emelyanov 
1268e314dbdcSPavel Emelyanov /*
1269e314dbdcSPavel Emelyanov  * init/fini
1270e314dbdcSPavel Emelyanov  */
1271e314dbdcSPavel Emelyanov 
1272e314dbdcSPavel Emelyanov static __init int veth_init(void)
1273e314dbdcSPavel Emelyanov {
1274e314dbdcSPavel Emelyanov 	return rtnl_link_register(&veth_link_ops);
1275e314dbdcSPavel Emelyanov }
1276e314dbdcSPavel Emelyanov 
1277e314dbdcSPavel Emelyanov static __exit void veth_exit(void)
1278e314dbdcSPavel Emelyanov {
127968365458SPatrick McHardy 	rtnl_link_unregister(&veth_link_ops);
1280e314dbdcSPavel Emelyanov }
1281e314dbdcSPavel Emelyanov 
1282e314dbdcSPavel Emelyanov module_init(veth_init);
1283e314dbdcSPavel Emelyanov module_exit(veth_exit);
1284e314dbdcSPavel Emelyanov 
1285e314dbdcSPavel Emelyanov MODULE_DESCRIPTION("Virtual Ethernet Tunnel");
1286e314dbdcSPavel Emelyanov MODULE_LICENSE("GPL v2");
1287e314dbdcSPavel Emelyanov MODULE_ALIAS_RTNL_LINK(DRV_NAME);
1288