xref: /openbmc/linux/drivers/net/veth.c (revision fccca038)
109c434b8SThomas Gleixner // SPDX-License-Identifier: GPL-2.0-only
2e314dbdcSPavel Emelyanov /*
3e314dbdcSPavel Emelyanov  *  drivers/net/veth.c
4e314dbdcSPavel Emelyanov  *
5e314dbdcSPavel Emelyanov  *  Copyright (C) 2007 OpenVZ http://openvz.org, SWsoft Inc
6e314dbdcSPavel Emelyanov  *
7e314dbdcSPavel Emelyanov  * Author: Pavel Emelianov <xemul@openvz.org>
8e314dbdcSPavel Emelyanov  * Ethtool interface from: Eric W. Biederman <ebiederm@xmission.com>
9e314dbdcSPavel Emelyanov  *
10e314dbdcSPavel Emelyanov  */
11e314dbdcSPavel Emelyanov 
12e314dbdcSPavel Emelyanov #include <linux/netdevice.h>
135a0e3ad6STejun Heo #include <linux/slab.h>
14e314dbdcSPavel Emelyanov #include <linux/ethtool.h>
15e314dbdcSPavel Emelyanov #include <linux/etherdevice.h>
16cf05c700SEric Dumazet #include <linux/u64_stats_sync.h>
17e314dbdcSPavel Emelyanov 
18f7b12606SJiri Pirko #include <net/rtnetlink.h>
19e314dbdcSPavel Emelyanov #include <net/dst.h>
20e314dbdcSPavel Emelyanov #include <net/xfrm.h>
21af87a3aaSToshiaki Makita #include <net/xdp.h>
22ecef969eSStephen Hemminger #include <linux/veth.h>
239d9779e7SPaul Gortmaker #include <linux/module.h>
24948d4f21SToshiaki Makita #include <linux/bpf.h>
25948d4f21SToshiaki Makita #include <linux/filter.h>
26948d4f21SToshiaki Makita #include <linux/ptr_ring.h>
27948d4f21SToshiaki Makita #include <linux/bpf_trace.h>
28aa4e689eSMichael Walle #include <linux/net_tstamp.h>
29e314dbdcSPavel Emelyanov 
30e314dbdcSPavel Emelyanov #define DRV_NAME	"veth"
31e314dbdcSPavel Emelyanov #define DRV_VERSION	"1.0"
32e314dbdcSPavel Emelyanov 
339fc8d518SToshiaki Makita #define VETH_XDP_FLAG		BIT(0)
34948d4f21SToshiaki Makita #define VETH_RING_SIZE		256
35948d4f21SToshiaki Makita #define VETH_XDP_HEADROOM	(XDP_PACKET_HEADROOM + NET_IP_ALIGN)
36948d4f21SToshiaki Makita 
379cda7807SToshiaki Makita #define VETH_XDP_TX_BULK_SIZE	16
3865e6dcf7SLorenzo Bianconi #define VETH_XDP_BATCH		16
399cda7807SToshiaki Makita 
4065780c56SLorenzo Bianconi struct veth_stats {
411c5b82e5SLorenzo Bianconi 	u64	rx_drops;
421c5b82e5SLorenzo Bianconi 	/* xdp */
434195e54aSToshiaki Makita 	u64	xdp_packets;
444195e54aSToshiaki Makita 	u64	xdp_bytes;
451c5b82e5SLorenzo Bianconi 	u64	xdp_redirect;
464195e54aSToshiaki Makita 	u64	xdp_drops;
471c5b82e5SLorenzo Bianconi 	u64	xdp_tx;
489152cff0SLorenzo Bianconi 	u64	xdp_tx_err;
495fe6e567SLorenzo Bianconi 	u64	peer_tq_xdp_xmit;
505fe6e567SLorenzo Bianconi 	u64	peer_tq_xdp_xmit_err;
5165780c56SLorenzo Bianconi };
5265780c56SLorenzo Bianconi 
5365780c56SLorenzo Bianconi struct veth_rq_stats {
5465780c56SLorenzo Bianconi 	struct veth_stats	vs;
554195e54aSToshiaki Makita 	struct u64_stats_sync	syncp;
564195e54aSToshiaki Makita };
574195e54aSToshiaki Makita 
58638264dcSToshiaki Makita struct veth_rq {
59948d4f21SToshiaki Makita 	struct napi_struct	xdp_napi;
60d3256efdSPaolo Abeni 	struct napi_struct __rcu *napi; /* points to xdp_napi when the latter is initialized */
61948d4f21SToshiaki Makita 	struct net_device	*dev;
62948d4f21SToshiaki Makita 	struct bpf_prog __rcu	*xdp_prog;
63d1396004SToshiaki Makita 	struct xdp_mem_info	xdp_mem;
644195e54aSToshiaki Makita 	struct veth_rq_stats	stats;
65948d4f21SToshiaki Makita 	bool			rx_notify_masked;
66948d4f21SToshiaki Makita 	struct ptr_ring		xdp_ring;
67948d4f21SToshiaki Makita 	struct xdp_rxq_info	xdp_rxq;
68e314dbdcSPavel Emelyanov };
69e314dbdcSPavel Emelyanov 
70638264dcSToshiaki Makita struct veth_priv {
71638264dcSToshiaki Makita 	struct net_device __rcu	*peer;
72638264dcSToshiaki Makita 	atomic64_t		dropped;
73638264dcSToshiaki Makita 	struct bpf_prog		*_xdp_prog;
74638264dcSToshiaki Makita 	struct veth_rq		*rq;
75638264dcSToshiaki Makita 	unsigned int		requested_headroom;
76638264dcSToshiaki Makita };
77638264dcSToshiaki Makita 
789cda7807SToshiaki Makita struct veth_xdp_tx_bq {
799cda7807SToshiaki Makita 	struct xdp_frame *q[VETH_XDP_TX_BULK_SIZE];
809cda7807SToshiaki Makita 	unsigned int count;
819cda7807SToshiaki Makita };
829cda7807SToshiaki Makita 
83e314dbdcSPavel Emelyanov /*
84e314dbdcSPavel Emelyanov  * ethtool interface
85e314dbdcSPavel Emelyanov  */
86e314dbdcSPavel Emelyanov 
87d397b968SToshiaki Makita struct veth_q_stat_desc {
88d397b968SToshiaki Makita 	char	desc[ETH_GSTRING_LEN];
89d397b968SToshiaki Makita 	size_t	offset;
90d397b968SToshiaki Makita };
91d397b968SToshiaki Makita 
9265780c56SLorenzo Bianconi #define VETH_RQ_STAT(m)	offsetof(struct veth_stats, m)
93d397b968SToshiaki Makita 
94d397b968SToshiaki Makita static const struct veth_q_stat_desc veth_rq_stats_desc[] = {
95d397b968SToshiaki Makita 	{ "xdp_packets",	VETH_RQ_STAT(xdp_packets) },
96d397b968SToshiaki Makita 	{ "xdp_bytes",		VETH_RQ_STAT(xdp_bytes) },
975fe6e567SLorenzo Bianconi 	{ "drops",		VETH_RQ_STAT(rx_drops) },
985fe6e567SLorenzo Bianconi 	{ "xdp_redirect",	VETH_RQ_STAT(xdp_redirect) },
995fe6e567SLorenzo Bianconi 	{ "xdp_drops",		VETH_RQ_STAT(xdp_drops) },
1005fe6e567SLorenzo Bianconi 	{ "xdp_tx",		VETH_RQ_STAT(xdp_tx) },
1015fe6e567SLorenzo Bianconi 	{ "xdp_tx_errors",	VETH_RQ_STAT(xdp_tx_err) },
102d397b968SToshiaki Makita };
103d397b968SToshiaki Makita 
104d397b968SToshiaki Makita #define VETH_RQ_STATS_LEN	ARRAY_SIZE(veth_rq_stats_desc)
105d397b968SToshiaki Makita 
1065fe6e567SLorenzo Bianconi static const struct veth_q_stat_desc veth_tq_stats_desc[] = {
1075fe6e567SLorenzo Bianconi 	{ "xdp_xmit",		VETH_RQ_STAT(peer_tq_xdp_xmit) },
1085fe6e567SLorenzo Bianconi 	{ "xdp_xmit_errors",	VETH_RQ_STAT(peer_tq_xdp_xmit_err) },
1095fe6e567SLorenzo Bianconi };
1105fe6e567SLorenzo Bianconi 
1115fe6e567SLorenzo Bianconi #define VETH_TQ_STATS_LEN	ARRAY_SIZE(veth_tq_stats_desc)
1125fe6e567SLorenzo Bianconi 
113e314dbdcSPavel Emelyanov static struct {
114e314dbdcSPavel Emelyanov 	const char string[ETH_GSTRING_LEN];
115e314dbdcSPavel Emelyanov } ethtool_stats_keys[] = {
116e314dbdcSPavel Emelyanov 	{ "peer_ifindex" },
117e314dbdcSPavel Emelyanov };
118e314dbdcSPavel Emelyanov 
119fefb695aSStanislav Fomichev struct veth_xdp_buff {
120fefb695aSStanislav Fomichev 	struct xdp_buff xdp;
121306531f0SStanislav Fomichev 	struct sk_buff *skb;
122fefb695aSStanislav Fomichev };
123fefb695aSStanislav Fomichev 
12456607b98SPhilippe Reynes static int veth_get_link_ksettings(struct net_device *dev,
12556607b98SPhilippe Reynes 				   struct ethtool_link_ksettings *cmd)
126e314dbdcSPavel Emelyanov {
12756607b98SPhilippe Reynes 	cmd->base.speed		= SPEED_10000;
12856607b98SPhilippe Reynes 	cmd->base.duplex	= DUPLEX_FULL;
12956607b98SPhilippe Reynes 	cmd->base.port		= PORT_TP;
13056607b98SPhilippe Reynes 	cmd->base.autoneg	= AUTONEG_DISABLE;
131e314dbdcSPavel Emelyanov 	return 0;
132e314dbdcSPavel Emelyanov }
133e314dbdcSPavel Emelyanov 
134e314dbdcSPavel Emelyanov static void veth_get_drvinfo(struct net_device *dev, struct ethtool_drvinfo *info)
135e314dbdcSPavel Emelyanov {
136fb3ceec1SWolfram Sang 	strscpy(info->driver, DRV_NAME, sizeof(info->driver));
137fb3ceec1SWolfram Sang 	strscpy(info->version, DRV_VERSION, sizeof(info->version));
138e314dbdcSPavel Emelyanov }
139e314dbdcSPavel Emelyanov 
140e314dbdcSPavel Emelyanov static void veth_get_strings(struct net_device *dev, u32 stringset, u8 *buf)
141e314dbdcSPavel Emelyanov {
142a0341b73STonghao Zhang 	u8 *p = buf;
143d397b968SToshiaki Makita 	int i, j;
144d397b968SToshiaki Makita 
145e314dbdcSPavel Emelyanov 	switch(stringset) {
146e314dbdcSPavel Emelyanov 	case ETH_SS_STATS:
147d397b968SToshiaki Makita 		memcpy(p, &ethtool_stats_keys, sizeof(ethtool_stats_keys));
148d397b968SToshiaki Makita 		p += sizeof(ethtool_stats_keys);
149a0341b73STonghao Zhang 		for (i = 0; i < dev->real_num_rx_queues; i++)
150a0341b73STonghao Zhang 			for (j = 0; j < VETH_RQ_STATS_LEN; j++)
151a0341b73STonghao Zhang 				ethtool_sprintf(&p, "rx_queue_%u_%.18s",
152d397b968SToshiaki Makita 						i, veth_rq_stats_desc[j].desc);
153a0341b73STonghao Zhang 
154a0341b73STonghao Zhang 		for (i = 0; i < dev->real_num_tx_queues; i++)
155a0341b73STonghao Zhang 			for (j = 0; j < VETH_TQ_STATS_LEN; j++)
156a0341b73STonghao Zhang 				ethtool_sprintf(&p, "tx_queue_%u_%.18s",
1575fe6e567SLorenzo Bianconi 						i, veth_tq_stats_desc[j].desc);
158e314dbdcSPavel Emelyanov 		break;
159e314dbdcSPavel Emelyanov 	}
160e314dbdcSPavel Emelyanov }
161e314dbdcSPavel Emelyanov 
162b9f2c044SJeff Garzik static int veth_get_sset_count(struct net_device *dev, int sset)
163e314dbdcSPavel Emelyanov {
164b9f2c044SJeff Garzik 	switch (sset) {
165b9f2c044SJeff Garzik 	case ETH_SS_STATS:
166d397b968SToshiaki Makita 		return ARRAY_SIZE(ethtool_stats_keys) +
1675fe6e567SLorenzo Bianconi 		       VETH_RQ_STATS_LEN * dev->real_num_rx_queues +
1685fe6e567SLorenzo Bianconi 		       VETH_TQ_STATS_LEN * dev->real_num_tx_queues;
169b9f2c044SJeff Garzik 	default:
170b9f2c044SJeff Garzik 		return -EOPNOTSUPP;
171b9f2c044SJeff Garzik 	}
172e314dbdcSPavel Emelyanov }
173e314dbdcSPavel Emelyanov 
174e314dbdcSPavel Emelyanov static void veth_get_ethtool_stats(struct net_device *dev,
175e314dbdcSPavel Emelyanov 		struct ethtool_stats *stats, u64 *data)
176e314dbdcSPavel Emelyanov {
1775fe6e567SLorenzo Bianconi 	struct veth_priv *rcv_priv, *priv = netdev_priv(dev);
178d0e2c55eSEric Dumazet 	struct net_device *peer = rtnl_dereference(priv->peer);
179d397b968SToshiaki Makita 	int i, j, idx;
180e314dbdcSPavel Emelyanov 
181d0e2c55eSEric Dumazet 	data[0] = peer ? peer->ifindex : 0;
182d397b968SToshiaki Makita 	idx = 1;
183d397b968SToshiaki Makita 	for (i = 0; i < dev->real_num_rx_queues; i++) {
184d397b968SToshiaki Makita 		const struct veth_rq_stats *rq_stats = &priv->rq[i].stats;
18565780c56SLorenzo Bianconi 		const void *stats_base = (void *)&rq_stats->vs;
186d397b968SToshiaki Makita 		unsigned int start;
187d397b968SToshiaki Makita 		size_t offset;
188d397b968SToshiaki Makita 
189d397b968SToshiaki Makita 		do {
190068c38adSThomas Gleixner 			start = u64_stats_fetch_begin(&rq_stats->syncp);
191d397b968SToshiaki Makita 			for (j = 0; j < VETH_RQ_STATS_LEN; j++) {
192d397b968SToshiaki Makita 				offset = veth_rq_stats_desc[j].offset;
193d397b968SToshiaki Makita 				data[idx + j] = *(u64 *)(stats_base + offset);
194d397b968SToshiaki Makita 			}
195068c38adSThomas Gleixner 		} while (u64_stats_fetch_retry(&rq_stats->syncp, start));
196d397b968SToshiaki Makita 		idx += VETH_RQ_STATS_LEN;
197d397b968SToshiaki Makita 	}
1985fe6e567SLorenzo Bianconi 
1995fe6e567SLorenzo Bianconi 	if (!peer)
2005fe6e567SLorenzo Bianconi 		return;
2015fe6e567SLorenzo Bianconi 
2025fe6e567SLorenzo Bianconi 	rcv_priv = netdev_priv(peer);
2035fe6e567SLorenzo Bianconi 	for (i = 0; i < peer->real_num_rx_queues; i++) {
2045fe6e567SLorenzo Bianconi 		const struct veth_rq_stats *rq_stats = &rcv_priv->rq[i].stats;
2055fe6e567SLorenzo Bianconi 		const void *base = (void *)&rq_stats->vs;
2065fe6e567SLorenzo Bianconi 		unsigned int start, tx_idx = idx;
2075fe6e567SLorenzo Bianconi 		size_t offset;
2085fe6e567SLorenzo Bianconi 
2095fe6e567SLorenzo Bianconi 		tx_idx += (i % dev->real_num_tx_queues) * VETH_TQ_STATS_LEN;
2105fe6e567SLorenzo Bianconi 		do {
211068c38adSThomas Gleixner 			start = u64_stats_fetch_begin(&rq_stats->syncp);
2125fe6e567SLorenzo Bianconi 			for (j = 0; j < VETH_TQ_STATS_LEN; j++) {
2135fe6e567SLorenzo Bianconi 				offset = veth_tq_stats_desc[j].offset;
2145fe6e567SLorenzo Bianconi 				data[tx_idx + j] += *(u64 *)(base + offset);
2155fe6e567SLorenzo Bianconi 			}
216068c38adSThomas Gleixner 		} while (u64_stats_fetch_retry(&rq_stats->syncp, start));
2175fe6e567SLorenzo Bianconi 	}
218e314dbdcSPavel Emelyanov }
219e314dbdcSPavel Emelyanov 
22034829eecSMaciej Fijalkowski static void veth_get_channels(struct net_device *dev,
22134829eecSMaciej Fijalkowski 			      struct ethtool_channels *channels)
22234829eecSMaciej Fijalkowski {
22334829eecSMaciej Fijalkowski 	channels->tx_count = dev->real_num_tx_queues;
22434829eecSMaciej Fijalkowski 	channels->rx_count = dev->real_num_rx_queues;
2254752eeb3SPaolo Abeni 	channels->max_tx = dev->num_tx_queues;
2264752eeb3SPaolo Abeni 	channels->max_rx = dev->num_rx_queues;
22734829eecSMaciej Fijalkowski }
22834829eecSMaciej Fijalkowski 
2294752eeb3SPaolo Abeni static int veth_set_channels(struct net_device *dev,
2304752eeb3SPaolo Abeni 			     struct ethtool_channels *ch);
2314752eeb3SPaolo Abeni 
2320fc0b732SStephen Hemminger static const struct ethtool_ops veth_ethtool_ops = {
233e314dbdcSPavel Emelyanov 	.get_drvinfo		= veth_get_drvinfo,
234e314dbdcSPavel Emelyanov 	.get_link		= ethtool_op_get_link,
235e314dbdcSPavel Emelyanov 	.get_strings		= veth_get_strings,
236b9f2c044SJeff Garzik 	.get_sset_count		= veth_get_sset_count,
237e314dbdcSPavel Emelyanov 	.get_ethtool_stats	= veth_get_ethtool_stats,
23856607b98SPhilippe Reynes 	.get_link_ksettings	= veth_get_link_ksettings,
239056b21fbSJulian Wiedmann 	.get_ts_info		= ethtool_op_get_ts_info,
24034829eecSMaciej Fijalkowski 	.get_channels		= veth_get_channels,
2414752eeb3SPaolo Abeni 	.set_channels		= veth_set_channels,
242e314dbdcSPavel Emelyanov };
243e314dbdcSPavel Emelyanov 
244948d4f21SToshiaki Makita /* general routines */
245948d4f21SToshiaki Makita 
2469fc8d518SToshiaki Makita static bool veth_is_xdp_frame(void *ptr)
2479fc8d518SToshiaki Makita {
2489fc8d518SToshiaki Makita 	return (unsigned long)ptr & VETH_XDP_FLAG;
2499fc8d518SToshiaki Makita }
2509fc8d518SToshiaki Makita 
251defcffebSMaciej Żenczykowski static struct xdp_frame *veth_ptr_to_xdp(void *ptr)
2529fc8d518SToshiaki Makita {
2539fc8d518SToshiaki Makita 	return (void *)((unsigned long)ptr & ~VETH_XDP_FLAG);
2549fc8d518SToshiaki Makita }
2559fc8d518SToshiaki Makita 
256defcffebSMaciej Żenczykowski static void *veth_xdp_to_ptr(struct xdp_frame *xdp)
257af87a3aaSToshiaki Makita {
258defcffebSMaciej Żenczykowski 	return (void *)((unsigned long)xdp | VETH_XDP_FLAG);
259af87a3aaSToshiaki Makita }
260af87a3aaSToshiaki Makita 
2619fc8d518SToshiaki Makita static void veth_ptr_free(void *ptr)
2629fc8d518SToshiaki Makita {
2639fc8d518SToshiaki Makita 	if (veth_is_xdp_frame(ptr))
2649fc8d518SToshiaki Makita 		xdp_return_frame(veth_ptr_to_xdp(ptr));
2659fc8d518SToshiaki Makita 	else
2669fc8d518SToshiaki Makita 		kfree_skb(ptr);
2679fc8d518SToshiaki Makita }
2689fc8d518SToshiaki Makita 
269638264dcSToshiaki Makita static void __veth_xdp_flush(struct veth_rq *rq)
270948d4f21SToshiaki Makita {
271948d4f21SToshiaki Makita 	/* Write ptr_ring before reading rx_notify_masked */
272948d4f21SToshiaki Makita 	smp_mb();
27368468d8cSEric Dumazet 	if (!READ_ONCE(rq->rx_notify_masked) &&
27468468d8cSEric Dumazet 	    napi_schedule_prep(&rq->xdp_napi)) {
27568468d8cSEric Dumazet 		WRITE_ONCE(rq->rx_notify_masked, true);
27668468d8cSEric Dumazet 		__napi_schedule(&rq->xdp_napi);
277948d4f21SToshiaki Makita 	}
278948d4f21SToshiaki Makita }
279948d4f21SToshiaki Makita 
280638264dcSToshiaki Makita static int veth_xdp_rx(struct veth_rq *rq, struct sk_buff *skb)
281948d4f21SToshiaki Makita {
282638264dcSToshiaki Makita 	if (unlikely(ptr_ring_produce(&rq->xdp_ring, skb))) {
283948d4f21SToshiaki Makita 		dev_kfree_skb_any(skb);
284948d4f21SToshiaki Makita 		return NET_RX_DROP;
285948d4f21SToshiaki Makita 	}
286948d4f21SToshiaki Makita 
287948d4f21SToshiaki Makita 	return NET_RX_SUCCESS;
288948d4f21SToshiaki Makita }
289948d4f21SToshiaki Makita 
290638264dcSToshiaki Makita static int veth_forward_skb(struct net_device *dev, struct sk_buff *skb,
291638264dcSToshiaki Makita 			    struct veth_rq *rq, bool xdp)
292e314dbdcSPavel Emelyanov {
293948d4f21SToshiaki Makita 	return __dev_forward_skb(dev, skb) ?: xdp ?
294638264dcSToshiaki Makita 		veth_xdp_rx(rq, skb) :
295baebdf48SSebastian Andrzej Siewior 		__netif_rx(skb);
296948d4f21SToshiaki Makita }
297948d4f21SToshiaki Makita 
29847e550e0SPaolo Abeni /* return true if the specified skb has chances of GRO aggregation
29947e550e0SPaolo Abeni  * Don't strive for accuracy, but try to avoid GRO overhead in the most
30047e550e0SPaolo Abeni  * common scenarios.
30147e550e0SPaolo Abeni  * When XDP is enabled, all traffic is considered eligible, as the xmit
30247e550e0SPaolo Abeni  * device has TSO off.
30347e550e0SPaolo Abeni  * When TSO is enabled on the xmit device, we are likely interested only
30447e550e0SPaolo Abeni  * in UDP aggregation, explicitly check for that if the skb is suspected
30547e550e0SPaolo Abeni  * - the sock_wfree destructor is used by UDP, ICMP and XDP sockets -
30647e550e0SPaolo Abeni  * to belong to locally generated UDP traffic.
30747e550e0SPaolo Abeni  */
30847e550e0SPaolo Abeni static bool veth_skb_is_eligible_for_gro(const struct net_device *dev,
30947e550e0SPaolo Abeni 					 const struct net_device *rcv,
31047e550e0SPaolo Abeni 					 const struct sk_buff *skb)
31147e550e0SPaolo Abeni {
31247e550e0SPaolo Abeni 	return !(dev->features & NETIF_F_ALL_TSO) ||
31347e550e0SPaolo Abeni 		(skb->destructor == sock_wfree &&
31447e550e0SPaolo Abeni 		 rcv->features & (NETIF_F_GRO_FRAGLIST | NETIF_F_GRO_UDP_FWD));
31547e550e0SPaolo Abeni }
31647e550e0SPaolo Abeni 
317948d4f21SToshiaki Makita static netdev_tx_t veth_xmit(struct sk_buff *skb, struct net_device *dev)
318948d4f21SToshiaki Makita {
319948d4f21SToshiaki Makita 	struct veth_priv *rcv_priv, *priv = netdev_priv(dev);
320638264dcSToshiaki Makita 	struct veth_rq *rq = NULL;
321d0e2c55eSEric Dumazet 	struct net_device *rcv;
3222681128fSEric Dumazet 	int length = skb->len;
323d3256efdSPaolo Abeni 	bool use_napi = false;
324638264dcSToshiaki Makita 	int rxq;
325e314dbdcSPavel Emelyanov 
326d0e2c55eSEric Dumazet 	rcu_read_lock();
327d0e2c55eSEric Dumazet 	rcv = rcu_dereference(priv->peer);
328726e2c59SGuillaume Nault 	if (unlikely(!rcv) || !pskb_may_pull(skb, ETH_HLEN)) {
329d0e2c55eSEric Dumazet 		kfree_skb(skb);
330d0e2c55eSEric Dumazet 		goto drop;
331d0e2c55eSEric Dumazet 	}
332e314dbdcSPavel Emelyanov 
333948d4f21SToshiaki Makita 	rcv_priv = netdev_priv(rcv);
334638264dcSToshiaki Makita 	rxq = skb_get_queue_mapping(skb);
335638264dcSToshiaki Makita 	if (rxq < rcv->real_num_rx_queues) {
336638264dcSToshiaki Makita 		rq = &rcv_priv->rq[rxq];
337d3256efdSPaolo Abeni 
338d3256efdSPaolo Abeni 		/* The napi pointer is available when an XDP program is
339d3256efdSPaolo Abeni 		 * attached or when GRO is enabled
34047e550e0SPaolo Abeni 		 * Don't bother with napi/GRO if the skb can't be aggregated
341d3256efdSPaolo Abeni 		 */
34247e550e0SPaolo Abeni 		use_napi = rcu_access_pointer(rq->napi) &&
34347e550e0SPaolo Abeni 			   veth_skb_is_eligible_for_gro(dev, rcv, skb);
344638264dcSToshiaki Makita 	}
345948d4f21SToshiaki Makita 
346aa4e689eSMichael Walle 	skb_tx_timestamp(skb);
347d3256efdSPaolo Abeni 	if (likely(veth_forward_skb(rcv, skb, rq, use_napi) == NET_RX_SUCCESS)) {
348d3256efdSPaolo Abeni 		if (!use_napi)
349b4fba476SEric Dumazet 			dev_lstats_add(dev, length);
3502681128fSEric Dumazet 	} else {
351d0e2c55eSEric Dumazet drop:
3522681128fSEric Dumazet 		atomic64_inc(&priv->dropped);
3532681128fSEric Dumazet 	}
354948d4f21SToshiaki Makita 
355d3256efdSPaolo Abeni 	if (use_napi)
356638264dcSToshiaki Makita 		__veth_xdp_flush(rq);
357948d4f21SToshiaki Makita 
358d0e2c55eSEric Dumazet 	rcu_read_unlock();
359948d4f21SToshiaki Makita 
3606ed10654SPatrick McHardy 	return NETDEV_TX_OK;
361e314dbdcSPavel Emelyanov }
362e314dbdcSPavel Emelyanov 
363b4fba476SEric Dumazet static u64 veth_stats_tx(struct net_device *dev, u64 *packets, u64 *bytes)
364e314dbdcSPavel Emelyanov {
365cf05c700SEric Dumazet 	struct veth_priv *priv = netdev_priv(dev);
36611687a10SDavid S. Miller 
367b4fba476SEric Dumazet 	dev_lstats_read(dev, packets, bytes);
3682681128fSEric Dumazet 	return atomic64_read(&priv->dropped);
3692681128fSEric Dumazet }
3702681128fSEric Dumazet 
37165780c56SLorenzo Bianconi static void veth_stats_rx(struct veth_stats *result, struct net_device *dev)
3724195e54aSToshiaki Makita {
3734195e54aSToshiaki Makita 	struct veth_priv *priv = netdev_priv(dev);
3744195e54aSToshiaki Makita 	int i;
3754195e54aSToshiaki Makita 
3765fe6e567SLorenzo Bianconi 	result->peer_tq_xdp_xmit_err = 0;
3774195e54aSToshiaki Makita 	result->xdp_packets = 0;
378d99a7c2fSLorenzo Bianconi 	result->xdp_tx_err = 0;
3794195e54aSToshiaki Makita 	result->xdp_bytes = 0;
38066fe4a07SLorenzo Bianconi 	result->rx_drops = 0;
3814195e54aSToshiaki Makita 	for (i = 0; i < dev->num_rx_queues; i++) {
3825fe6e567SLorenzo Bianconi 		u64 packets, bytes, drops, xdp_tx_err, peer_tq_xdp_xmit_err;
3834195e54aSToshiaki Makita 		struct veth_rq_stats *stats = &priv->rq[i].stats;
3844195e54aSToshiaki Makita 		unsigned int start;
3854195e54aSToshiaki Makita 
3864195e54aSToshiaki Makita 		do {
387068c38adSThomas Gleixner 			start = u64_stats_fetch_begin(&stats->syncp);
3885fe6e567SLorenzo Bianconi 			peer_tq_xdp_xmit_err = stats->vs.peer_tq_xdp_xmit_err;
389d99a7c2fSLorenzo Bianconi 			xdp_tx_err = stats->vs.xdp_tx_err;
39065780c56SLorenzo Bianconi 			packets = stats->vs.xdp_packets;
39165780c56SLorenzo Bianconi 			bytes = stats->vs.xdp_bytes;
39266fe4a07SLorenzo Bianconi 			drops = stats->vs.rx_drops;
393068c38adSThomas Gleixner 		} while (u64_stats_fetch_retry(&stats->syncp, start));
3945fe6e567SLorenzo Bianconi 		result->peer_tq_xdp_xmit_err += peer_tq_xdp_xmit_err;
395d99a7c2fSLorenzo Bianconi 		result->xdp_tx_err += xdp_tx_err;
3964195e54aSToshiaki Makita 		result->xdp_packets += packets;
3974195e54aSToshiaki Makita 		result->xdp_bytes += bytes;
39866fe4a07SLorenzo Bianconi 		result->rx_drops += drops;
3994195e54aSToshiaki Makita 	}
4004195e54aSToshiaki Makita }
4014195e54aSToshiaki Makita 
402bc1f4470Sstephen hemminger static void veth_get_stats64(struct net_device *dev,
4032681128fSEric Dumazet 			     struct rtnl_link_stats64 *tot)
4042681128fSEric Dumazet {
4052681128fSEric Dumazet 	struct veth_priv *priv = netdev_priv(dev);
406d0e2c55eSEric Dumazet 	struct net_device *peer;
40765780c56SLorenzo Bianconi 	struct veth_stats rx;
408b4fba476SEric Dumazet 	u64 packets, bytes;
4092681128fSEric Dumazet 
410b4fba476SEric Dumazet 	tot->tx_dropped = veth_stats_tx(dev, &packets, &bytes);
411b4fba476SEric Dumazet 	tot->tx_bytes = bytes;
412b4fba476SEric Dumazet 	tot->tx_packets = packets;
4134195e54aSToshiaki Makita 
4144195e54aSToshiaki Makita 	veth_stats_rx(&rx, dev);
4155fe6e567SLorenzo Bianconi 	tot->tx_dropped += rx.xdp_tx_err;
4165fe6e567SLorenzo Bianconi 	tot->rx_dropped = rx.rx_drops + rx.peer_tq_xdp_xmit_err;
4174195e54aSToshiaki Makita 	tot->rx_bytes = rx.xdp_bytes;
4184195e54aSToshiaki Makita 	tot->rx_packets = rx.xdp_packets;
4192681128fSEric Dumazet 
420d0e2c55eSEric Dumazet 	rcu_read_lock();
421d0e2c55eSEric Dumazet 	peer = rcu_dereference(priv->peer);
422d0e2c55eSEric Dumazet 	if (peer) {
423e25d5dbcSJiang Lidong 		veth_stats_tx(peer, &packets, &bytes);
424b4fba476SEric Dumazet 		tot->rx_bytes += bytes;
425b4fba476SEric Dumazet 		tot->rx_packets += packets;
4264195e54aSToshiaki Makita 
4274195e54aSToshiaki Makita 		veth_stats_rx(&rx, peer);
4285fe6e567SLorenzo Bianconi 		tot->tx_dropped += rx.peer_tq_xdp_xmit_err;
4295fe6e567SLorenzo Bianconi 		tot->rx_dropped += rx.xdp_tx_err;
4304195e54aSToshiaki Makita 		tot->tx_bytes += rx.xdp_bytes;
4314195e54aSToshiaki Makita 		tot->tx_packets += rx.xdp_packets;
432d0e2c55eSEric Dumazet 	}
433d0e2c55eSEric Dumazet 	rcu_read_unlock();
434e314dbdcSPavel Emelyanov }
435e314dbdcSPavel Emelyanov 
4365c70ef85SGao feng /* fake multicast ability */
4375c70ef85SGao feng static void veth_set_multicast_list(struct net_device *dev)
4385c70ef85SGao feng {
4395c70ef85SGao feng }
4405c70ef85SGao feng 
441638264dcSToshiaki Makita static int veth_select_rxq(struct net_device *dev)
442638264dcSToshiaki Makita {
443638264dcSToshiaki Makita 	return smp_processor_id() % dev->real_num_rx_queues;
444638264dcSToshiaki Makita }
445638264dcSToshiaki Makita 
4469aa1206eSDaniel Borkmann static struct net_device *veth_peer_dev(struct net_device *dev)
4479aa1206eSDaniel Borkmann {
4489aa1206eSDaniel Borkmann 	struct veth_priv *priv = netdev_priv(dev);
4499aa1206eSDaniel Borkmann 
4509aa1206eSDaniel Borkmann 	/* Callers must be under RCU read side. */
4519aa1206eSDaniel Borkmann 	return rcu_dereference(priv->peer);
4529aa1206eSDaniel Borkmann }
4539aa1206eSDaniel Borkmann 
454af87a3aaSToshiaki Makita static int veth_xdp_xmit(struct net_device *dev, int n,
4559152cff0SLorenzo Bianconi 			 struct xdp_frame **frames,
4569152cff0SLorenzo Bianconi 			 u32 flags, bool ndo_xmit)
457af87a3aaSToshiaki Makita {
458af87a3aaSToshiaki Makita 	struct veth_priv *rcv_priv, *priv = netdev_priv(dev);
459fdc13979SLorenzo Bianconi 	int i, ret = -ENXIO, nxmit = 0;
460af87a3aaSToshiaki Makita 	struct net_device *rcv;
4615fe6e567SLorenzo Bianconi 	unsigned int max_len;
462638264dcSToshiaki Makita 	struct veth_rq *rq;
463af87a3aaSToshiaki Makita 
4645fe6e567SLorenzo Bianconi 	if (unlikely(flags & ~XDP_XMIT_FLAGS_MASK))
465d99a7c2fSLorenzo Bianconi 		return -EINVAL;
466af87a3aaSToshiaki Makita 
4675fe6e567SLorenzo Bianconi 	rcu_read_lock();
468af87a3aaSToshiaki Makita 	rcv = rcu_dereference(priv->peer);
4695fe6e567SLorenzo Bianconi 	if (unlikely(!rcv))
4705fe6e567SLorenzo Bianconi 		goto out;
471af87a3aaSToshiaki Makita 
472af87a3aaSToshiaki Makita 	rcv_priv = netdev_priv(rcv);
4735fe6e567SLorenzo Bianconi 	rq = &rcv_priv->rq[veth_select_rxq(rcv)];
4740e672f30SToke Høiland-Jørgensen 	/* The napi pointer is set if NAPI is enabled, which ensures that
4750e672f30SToke Høiland-Jørgensen 	 * xdp_ring is initialized on receive side and the peer device is up.
476af87a3aaSToshiaki Makita 	 */
4770e672f30SToke Høiland-Jørgensen 	if (!rcu_access_pointer(rq->napi))
4785fe6e567SLorenzo Bianconi 		goto out;
479af87a3aaSToshiaki Makita 
480af87a3aaSToshiaki Makita 	max_len = rcv->mtu + rcv->hard_header_len + VLAN_HLEN;
481af87a3aaSToshiaki Makita 
482638264dcSToshiaki Makita 	spin_lock(&rq->xdp_ring.producer_lock);
483af87a3aaSToshiaki Makita 	for (i = 0; i < n; i++) {
484af87a3aaSToshiaki Makita 		struct xdp_frame *frame = frames[i];
485af87a3aaSToshiaki Makita 		void *ptr = veth_xdp_to_ptr(frame);
486af87a3aaSToshiaki Makita 
4875142239aSLorenzo Bianconi 		if (unlikely(xdp_get_frame_len(frame) > max_len ||
488fdc13979SLorenzo Bianconi 			     __ptr_ring_produce(&rq->xdp_ring, ptr)))
489fdc13979SLorenzo Bianconi 			break;
490fdc13979SLorenzo Bianconi 		nxmit++;
491af87a3aaSToshiaki Makita 	}
492638264dcSToshiaki Makita 	spin_unlock(&rq->xdp_ring.producer_lock);
493af87a3aaSToshiaki Makita 
494af87a3aaSToshiaki Makita 	if (flags & XDP_XMIT_FLUSH)
495638264dcSToshiaki Makita 		__veth_xdp_flush(rq);
496af87a3aaSToshiaki Makita 
497fdc13979SLorenzo Bianconi 	ret = nxmit;
4989152cff0SLorenzo Bianconi 	if (ndo_xmit) {
4995fe6e567SLorenzo Bianconi 		u64_stats_update_begin(&rq->stats.syncp);
500fdc13979SLorenzo Bianconi 		rq->stats.vs.peer_tq_xdp_xmit += nxmit;
501fdc13979SLorenzo Bianconi 		rq->stats.vs.peer_tq_xdp_xmit_err += n - nxmit;
5029152cff0SLorenzo Bianconi 		u64_stats_update_end(&rq->stats.syncp);
5035fe6e567SLorenzo Bianconi 	}
5049152cff0SLorenzo Bianconi 
5055fe6e567SLorenzo Bianconi out:
506b23bfa56SJohn Fastabend 	rcu_read_unlock();
5072131479dSToshiaki Makita 
5082131479dSToshiaki Makita 	return ret;
509af87a3aaSToshiaki Makita }
510af87a3aaSToshiaki Makita 
5119152cff0SLorenzo Bianconi static int veth_ndo_xdp_xmit(struct net_device *dev, int n,
5129152cff0SLorenzo Bianconi 			     struct xdp_frame **frames, u32 flags)
5139152cff0SLorenzo Bianconi {
5145fe6e567SLorenzo Bianconi 	int err;
5155fe6e567SLorenzo Bianconi 
5165fe6e567SLorenzo Bianconi 	err = veth_xdp_xmit(dev, n, frames, flags, true);
5175fe6e567SLorenzo Bianconi 	if (err < 0) {
5185fe6e567SLorenzo Bianconi 		struct veth_priv *priv = netdev_priv(dev);
5195fe6e567SLorenzo Bianconi 
5205fe6e567SLorenzo Bianconi 		atomic64_add(n, &priv->dropped);
5215fe6e567SLorenzo Bianconi 	}
5225fe6e567SLorenzo Bianconi 
5235fe6e567SLorenzo Bianconi 	return err;
5249152cff0SLorenzo Bianconi }
5259152cff0SLorenzo Bianconi 
526bd32aa1fSLorenzo Bianconi static void veth_xdp_flush_bq(struct veth_rq *rq, struct veth_xdp_tx_bq *bq)
5279cda7807SToshiaki Makita {
528fdc13979SLorenzo Bianconi 	int sent, i, err = 0, drops;
5299cda7807SToshiaki Makita 
530bd32aa1fSLorenzo Bianconi 	sent = veth_xdp_xmit(rq->dev, bq->count, bq->q, 0, false);
5319cda7807SToshiaki Makita 	if (sent < 0) {
5329cda7807SToshiaki Makita 		err = sent;
5339cda7807SToshiaki Makita 		sent = 0;
5349cda7807SToshiaki Makita 	}
535fdc13979SLorenzo Bianconi 
536fdc13979SLorenzo Bianconi 	for (i = sent; unlikely(i < bq->count); i++)
537fdc13979SLorenzo Bianconi 		xdp_return_frame(bq->q[i]);
538fdc13979SLorenzo Bianconi 
539fdc13979SLorenzo Bianconi 	drops = bq->count - sent;
540fdc13979SLorenzo Bianconi 	trace_xdp_bulk_tx(rq->dev, sent, drops, err);
5419cda7807SToshiaki Makita 
5425fe6e567SLorenzo Bianconi 	u64_stats_update_begin(&rq->stats.syncp);
5435fe6e567SLorenzo Bianconi 	rq->stats.vs.xdp_tx += sent;
544fdc13979SLorenzo Bianconi 	rq->stats.vs.xdp_tx_err += drops;
5455fe6e567SLorenzo Bianconi 	u64_stats_update_end(&rq->stats.syncp);
5465fe6e567SLorenzo Bianconi 
5479cda7807SToshiaki Makita 	bq->count = 0;
5489cda7807SToshiaki Makita }
5499cda7807SToshiaki Makita 
550bd32aa1fSLorenzo Bianconi static void veth_xdp_flush(struct veth_rq *rq, struct veth_xdp_tx_bq *bq)
551d1396004SToshiaki Makita {
552bd32aa1fSLorenzo Bianconi 	struct veth_priv *rcv_priv, *priv = netdev_priv(rq->dev);
553d1396004SToshiaki Makita 	struct net_device *rcv;
554bd32aa1fSLorenzo Bianconi 	struct veth_rq *rcv_rq;
555d1396004SToshiaki Makita 
556d1396004SToshiaki Makita 	rcu_read_lock();
557bd32aa1fSLorenzo Bianconi 	veth_xdp_flush_bq(rq, bq);
558d1396004SToshiaki Makita 	rcv = rcu_dereference(priv->peer);
559d1396004SToshiaki Makita 	if (unlikely(!rcv))
560d1396004SToshiaki Makita 		goto out;
561d1396004SToshiaki Makita 
562d1396004SToshiaki Makita 	rcv_priv = netdev_priv(rcv);
563bd32aa1fSLorenzo Bianconi 	rcv_rq = &rcv_priv->rq[veth_select_rxq(rcv)];
564d1396004SToshiaki Makita 	/* xdp_ring is initialized on receive side? */
565bd32aa1fSLorenzo Bianconi 	if (unlikely(!rcu_access_pointer(rcv_rq->xdp_prog)))
566d1396004SToshiaki Makita 		goto out;
567d1396004SToshiaki Makita 
568bd32aa1fSLorenzo Bianconi 	__veth_xdp_flush(rcv_rq);
569d1396004SToshiaki Makita out:
570d1396004SToshiaki Makita 	rcu_read_unlock();
571d1396004SToshiaki Makita }
572d1396004SToshiaki Makita 
573bd32aa1fSLorenzo Bianconi static int veth_xdp_tx(struct veth_rq *rq, struct xdp_buff *xdp,
5749cda7807SToshiaki Makita 		       struct veth_xdp_tx_bq *bq)
575d1396004SToshiaki Makita {
5761b698fa5SLorenzo Bianconi 	struct xdp_frame *frame = xdp_convert_buff_to_frame(xdp);
577d1396004SToshiaki Makita 
578d1396004SToshiaki Makita 	if (unlikely(!frame))
579d1396004SToshiaki Makita 		return -EOVERFLOW;
580d1396004SToshiaki Makita 
5819cda7807SToshiaki Makita 	if (unlikely(bq->count == VETH_XDP_TX_BULK_SIZE))
582bd32aa1fSLorenzo Bianconi 		veth_xdp_flush_bq(rq, bq);
5839cda7807SToshiaki Makita 
5849cda7807SToshiaki Makita 	bq->q[bq->count++] = frame;
5859cda7807SToshiaki Makita 
5869cda7807SToshiaki Makita 	return 0;
587d1396004SToshiaki Makita }
588d1396004SToshiaki Makita 
58965e6dcf7SLorenzo Bianconi static struct xdp_frame *veth_xdp_rcv_one(struct veth_rq *rq,
590d1396004SToshiaki Makita 					  struct xdp_frame *frame,
5911c5b82e5SLorenzo Bianconi 					  struct veth_xdp_tx_bq *bq,
5921c5b82e5SLorenzo Bianconi 					  struct veth_stats *stats)
5939fc8d518SToshiaki Makita {
594d1396004SToshiaki Makita 	struct xdp_frame orig_frame;
5959fc8d518SToshiaki Makita 	struct bpf_prog *xdp_prog;
5969fc8d518SToshiaki Makita 
5979fc8d518SToshiaki Makita 	rcu_read_lock();
598638264dcSToshiaki Makita 	xdp_prog = rcu_dereference(rq->xdp_prog);
5999fc8d518SToshiaki Makita 	if (likely(xdp_prog)) {
600fefb695aSStanislav Fomichev 		struct veth_xdp_buff vxbuf;
601fefb695aSStanislav Fomichev 		struct xdp_buff *xdp = &vxbuf.xdp;
6029fc8d518SToshiaki Makita 		u32 act;
6039fc8d518SToshiaki Makita 
604fefb695aSStanislav Fomichev 		xdp_convert_frame_to_buff(frame, xdp);
605fefb695aSStanislav Fomichev 		xdp->rxq = &rq->xdp_rxq;
606306531f0SStanislav Fomichev 		vxbuf.skb = NULL;
6079fc8d518SToshiaki Makita 
608fefb695aSStanislav Fomichev 		act = bpf_prog_run_xdp(xdp_prog, xdp);
6099fc8d518SToshiaki Makita 
6109fc8d518SToshiaki Makita 		switch (act) {
6119fc8d518SToshiaki Makita 		case XDP_PASS:
612fefb695aSStanislav Fomichev 			if (xdp_update_frame_from_buff(xdp, frame))
61389f479f0SLorenzo Bianconi 				goto err_xdp;
6149fc8d518SToshiaki Makita 			break;
615d1396004SToshiaki Makita 		case XDP_TX:
616d1396004SToshiaki Makita 			orig_frame = *frame;
617fefb695aSStanislav Fomichev 			xdp->rxq->mem = frame->mem;
618fefb695aSStanislav Fomichev 			if (unlikely(veth_xdp_tx(rq, xdp, bq) < 0)) {
619638264dcSToshiaki Makita 				trace_xdp_exception(rq->dev, xdp_prog, act);
620d1396004SToshiaki Makita 				frame = &orig_frame;
6211c5b82e5SLorenzo Bianconi 				stats->rx_drops++;
622d1396004SToshiaki Makita 				goto err_xdp;
623d1396004SToshiaki Makita 			}
6241c5b82e5SLorenzo Bianconi 			stats->xdp_tx++;
625d1396004SToshiaki Makita 			rcu_read_unlock();
626d1396004SToshiaki Makita 			goto xdp_xmit;
627d1396004SToshiaki Makita 		case XDP_REDIRECT:
628d1396004SToshiaki Makita 			orig_frame = *frame;
629fefb695aSStanislav Fomichev 			xdp->rxq->mem = frame->mem;
630fefb695aSStanislav Fomichev 			if (xdp_do_redirect(rq->dev, xdp, xdp_prog)) {
631d1396004SToshiaki Makita 				frame = &orig_frame;
6321c5b82e5SLorenzo Bianconi 				stats->rx_drops++;
633d1396004SToshiaki Makita 				goto err_xdp;
634d1396004SToshiaki Makita 			}
6351c5b82e5SLorenzo Bianconi 			stats->xdp_redirect++;
636d1396004SToshiaki Makita 			rcu_read_unlock();
637d1396004SToshiaki Makita 			goto xdp_xmit;
6389fc8d518SToshiaki Makita 		default:
639c8064e5bSPaolo Abeni 			bpf_warn_invalid_xdp_action(rq->dev, xdp_prog, act);
640df561f66SGustavo A. R. Silva 			fallthrough;
6419fc8d518SToshiaki Makita 		case XDP_ABORTED:
642638264dcSToshiaki Makita 			trace_xdp_exception(rq->dev, xdp_prog, act);
643df561f66SGustavo A. R. Silva 			fallthrough;
6449fc8d518SToshiaki Makita 		case XDP_DROP:
6451c5b82e5SLorenzo Bianconi 			stats->xdp_drops++;
6469fc8d518SToshiaki Makita 			goto err_xdp;
6479fc8d518SToshiaki Makita 		}
6489fc8d518SToshiaki Makita 	}
6499fc8d518SToshiaki Makita 	rcu_read_unlock();
6509fc8d518SToshiaki Makita 
65165e6dcf7SLorenzo Bianconi 	return frame;
6529fc8d518SToshiaki Makita err_xdp:
6539fc8d518SToshiaki Makita 	rcu_read_unlock();
6549fc8d518SToshiaki Makita 	xdp_return_frame(frame);
655d1396004SToshiaki Makita xdp_xmit:
6569fc8d518SToshiaki Makita 	return NULL;
6579fc8d518SToshiaki Makita }
6589fc8d518SToshiaki Makita 
65965e6dcf7SLorenzo Bianconi /* frames array contains VETH_XDP_BATCH at most */
66065e6dcf7SLorenzo Bianconi static void veth_xdp_rcv_bulk_skb(struct veth_rq *rq, void **frames,
66165e6dcf7SLorenzo Bianconi 				  int n_xdpf, struct veth_xdp_tx_bq *bq,
66265e6dcf7SLorenzo Bianconi 				  struct veth_stats *stats)
66365e6dcf7SLorenzo Bianconi {
66465e6dcf7SLorenzo Bianconi 	void *skbs[VETH_XDP_BATCH];
66565e6dcf7SLorenzo Bianconi 	int i;
66665e6dcf7SLorenzo Bianconi 
66765e6dcf7SLorenzo Bianconi 	if (xdp_alloc_skb_bulk(skbs, n_xdpf,
66865e6dcf7SLorenzo Bianconi 			       GFP_ATOMIC | __GFP_ZERO) < 0) {
66965e6dcf7SLorenzo Bianconi 		for (i = 0; i < n_xdpf; i++)
67065e6dcf7SLorenzo Bianconi 			xdp_return_frame(frames[i]);
67165e6dcf7SLorenzo Bianconi 		stats->rx_drops += n_xdpf;
67265e6dcf7SLorenzo Bianconi 
67365e6dcf7SLorenzo Bianconi 		return;
67465e6dcf7SLorenzo Bianconi 	}
67565e6dcf7SLorenzo Bianconi 
67665e6dcf7SLorenzo Bianconi 	for (i = 0; i < n_xdpf; i++) {
67765e6dcf7SLorenzo Bianconi 		struct sk_buff *skb = skbs[i];
67865e6dcf7SLorenzo Bianconi 
67965e6dcf7SLorenzo Bianconi 		skb = __xdp_build_skb_from_frame(frames[i], skb,
68065e6dcf7SLorenzo Bianconi 						 rq->dev);
68165e6dcf7SLorenzo Bianconi 		if (!skb) {
68265e6dcf7SLorenzo Bianconi 			xdp_return_frame(frames[i]);
68365e6dcf7SLorenzo Bianconi 			stats->rx_drops++;
68465e6dcf7SLorenzo Bianconi 			continue;
68565e6dcf7SLorenzo Bianconi 		}
68665e6dcf7SLorenzo Bianconi 		napi_gro_receive(&rq->xdp_napi, skb);
68765e6dcf7SLorenzo Bianconi 	}
68865e6dcf7SLorenzo Bianconi }
68965e6dcf7SLorenzo Bianconi 
690718a18a0SLorenzo Bianconi static void veth_xdp_get(struct xdp_buff *xdp)
691718a18a0SLorenzo Bianconi {
692718a18a0SLorenzo Bianconi 	struct skb_shared_info *sinfo = xdp_get_shared_info_from_buff(xdp);
693718a18a0SLorenzo Bianconi 	int i;
694718a18a0SLorenzo Bianconi 
695718a18a0SLorenzo Bianconi 	get_page(virt_to_page(xdp->data));
696718a18a0SLorenzo Bianconi 	if (likely(!xdp_buff_has_frags(xdp)))
697718a18a0SLorenzo Bianconi 		return;
698718a18a0SLorenzo Bianconi 
699718a18a0SLorenzo Bianconi 	for (i = 0; i < sinfo->nr_frags; i++)
700718a18a0SLorenzo Bianconi 		__skb_frag_ref(&sinfo->frags[i]);
701718a18a0SLorenzo Bianconi }
702718a18a0SLorenzo Bianconi 
703718a18a0SLorenzo Bianconi static int veth_convert_skb_to_xdp_buff(struct veth_rq *rq,
704718a18a0SLorenzo Bianconi 					struct xdp_buff *xdp,
705718a18a0SLorenzo Bianconi 					struct sk_buff **pskb)
706718a18a0SLorenzo Bianconi {
707718a18a0SLorenzo Bianconi 	struct sk_buff *skb = *pskb;
708718a18a0SLorenzo Bianconi 	u32 frame_sz;
709718a18a0SLorenzo Bianconi 
710718a18a0SLorenzo Bianconi 	if (skb_shared(skb) || skb_head_is_locked(skb) ||
711718a18a0SLorenzo Bianconi 	    skb_shinfo(skb)->nr_frags) {
712718a18a0SLorenzo Bianconi 		u32 size, len, max_head_size, off;
713718a18a0SLorenzo Bianconi 		struct sk_buff *nskb;
714718a18a0SLorenzo Bianconi 		struct page *page;
715718a18a0SLorenzo Bianconi 		int i, head_off;
716718a18a0SLorenzo Bianconi 
717718a18a0SLorenzo Bianconi 		/* We need a private copy of the skb and data buffers since
718718a18a0SLorenzo Bianconi 		 * the ebpf program can modify it. We segment the original skb
719718a18a0SLorenzo Bianconi 		 * into order-0 pages without linearize it.
720718a18a0SLorenzo Bianconi 		 *
721718a18a0SLorenzo Bianconi 		 * Make sure we have enough space for linear and paged area
722718a18a0SLorenzo Bianconi 		 */
723718a18a0SLorenzo Bianconi 		max_head_size = SKB_WITH_OVERHEAD(PAGE_SIZE -
724718a18a0SLorenzo Bianconi 						  VETH_XDP_HEADROOM);
725718a18a0SLorenzo Bianconi 		if (skb->len > PAGE_SIZE * MAX_SKB_FRAGS + max_head_size)
726718a18a0SLorenzo Bianconi 			goto drop;
727718a18a0SLorenzo Bianconi 
728718a18a0SLorenzo Bianconi 		/* Allocate skb head */
729718a18a0SLorenzo Bianconi 		page = alloc_page(GFP_ATOMIC | __GFP_NOWARN);
730718a18a0SLorenzo Bianconi 		if (!page)
731718a18a0SLorenzo Bianconi 			goto drop;
732718a18a0SLorenzo Bianconi 
733718a18a0SLorenzo Bianconi 		nskb = build_skb(page_address(page), PAGE_SIZE);
734718a18a0SLorenzo Bianconi 		if (!nskb) {
735718a18a0SLorenzo Bianconi 			put_page(page);
736718a18a0SLorenzo Bianconi 			goto drop;
737718a18a0SLorenzo Bianconi 		}
738718a18a0SLorenzo Bianconi 
739718a18a0SLorenzo Bianconi 		skb_reserve(nskb, VETH_XDP_HEADROOM);
740718a18a0SLorenzo Bianconi 		size = min_t(u32, skb->len, max_head_size);
741718a18a0SLorenzo Bianconi 		if (skb_copy_bits(skb, 0, nskb->data, size)) {
742718a18a0SLorenzo Bianconi 			consume_skb(nskb);
743718a18a0SLorenzo Bianconi 			goto drop;
744718a18a0SLorenzo Bianconi 		}
745718a18a0SLorenzo Bianconi 		skb_put(nskb, size);
746718a18a0SLorenzo Bianconi 
747718a18a0SLorenzo Bianconi 		skb_copy_header(nskb, skb);
748718a18a0SLorenzo Bianconi 		head_off = skb_headroom(nskb) - skb_headroom(skb);
749718a18a0SLorenzo Bianconi 		skb_headers_offset_update(nskb, head_off);
750718a18a0SLorenzo Bianconi 
751718a18a0SLorenzo Bianconi 		/* Allocate paged area of new skb */
752718a18a0SLorenzo Bianconi 		off = size;
753718a18a0SLorenzo Bianconi 		len = skb->len - off;
754718a18a0SLorenzo Bianconi 
755718a18a0SLorenzo Bianconi 		for (i = 0; i < MAX_SKB_FRAGS && off < skb->len; i++) {
756718a18a0SLorenzo Bianconi 			page = alloc_page(GFP_ATOMIC | __GFP_NOWARN);
757718a18a0SLorenzo Bianconi 			if (!page) {
758718a18a0SLorenzo Bianconi 				consume_skb(nskb);
759718a18a0SLorenzo Bianconi 				goto drop;
760718a18a0SLorenzo Bianconi 			}
761718a18a0SLorenzo Bianconi 
762718a18a0SLorenzo Bianconi 			size = min_t(u32, len, PAGE_SIZE);
763718a18a0SLorenzo Bianconi 			skb_add_rx_frag(nskb, i, page, 0, size, PAGE_SIZE);
764718a18a0SLorenzo Bianconi 			if (skb_copy_bits(skb, off, page_address(page),
765718a18a0SLorenzo Bianconi 					  size)) {
766718a18a0SLorenzo Bianconi 				consume_skb(nskb);
767718a18a0SLorenzo Bianconi 				goto drop;
768718a18a0SLorenzo Bianconi 			}
769718a18a0SLorenzo Bianconi 
770718a18a0SLorenzo Bianconi 			len -= size;
771718a18a0SLorenzo Bianconi 			off += size;
772718a18a0SLorenzo Bianconi 		}
773718a18a0SLorenzo Bianconi 
774718a18a0SLorenzo Bianconi 		consume_skb(skb);
775718a18a0SLorenzo Bianconi 		skb = nskb;
776718a18a0SLorenzo Bianconi 	} else if (skb_headroom(skb) < XDP_PACKET_HEADROOM &&
777718a18a0SLorenzo Bianconi 		   pskb_expand_head(skb, VETH_XDP_HEADROOM, 0, GFP_ATOMIC)) {
778718a18a0SLorenzo Bianconi 		goto drop;
779718a18a0SLorenzo Bianconi 	}
780718a18a0SLorenzo Bianconi 
781718a18a0SLorenzo Bianconi 	/* SKB "head" area always have tailroom for skb_shared_info */
782718a18a0SLorenzo Bianconi 	frame_sz = skb_end_pointer(skb) - skb->head;
783718a18a0SLorenzo Bianconi 	frame_sz += SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
784718a18a0SLorenzo Bianconi 	xdp_init_buff(xdp, frame_sz, &rq->xdp_rxq);
785718a18a0SLorenzo Bianconi 	xdp_prepare_buff(xdp, skb->head, skb_headroom(skb),
786718a18a0SLorenzo Bianconi 			 skb_headlen(skb), true);
787718a18a0SLorenzo Bianconi 
788718a18a0SLorenzo Bianconi 	if (skb_is_nonlinear(skb)) {
789718a18a0SLorenzo Bianconi 		skb_shinfo(skb)->xdp_frags_size = skb->data_len;
790718a18a0SLorenzo Bianconi 		xdp_buff_set_frags_flag(xdp);
791718a18a0SLorenzo Bianconi 	} else {
792718a18a0SLorenzo Bianconi 		xdp_buff_clear_frags_flag(xdp);
793718a18a0SLorenzo Bianconi 	}
794718a18a0SLorenzo Bianconi 	*pskb = skb;
795718a18a0SLorenzo Bianconi 
796718a18a0SLorenzo Bianconi 	return 0;
797718a18a0SLorenzo Bianconi drop:
798718a18a0SLorenzo Bianconi 	consume_skb(skb);
799718a18a0SLorenzo Bianconi 	*pskb = NULL;
800718a18a0SLorenzo Bianconi 
801718a18a0SLorenzo Bianconi 	return -ENOMEM;
802718a18a0SLorenzo Bianconi }
803718a18a0SLorenzo Bianconi 
8041c5b82e5SLorenzo Bianconi static struct sk_buff *veth_xdp_rcv_skb(struct veth_rq *rq,
8051c5b82e5SLorenzo Bianconi 					struct sk_buff *skb,
8061c5b82e5SLorenzo Bianconi 					struct veth_xdp_tx_bq *bq,
8071c5b82e5SLorenzo Bianconi 					struct veth_stats *stats)
808948d4f21SToshiaki Makita {
809948d4f21SToshiaki Makita 	void *orig_data, *orig_data_end;
810948d4f21SToshiaki Makita 	struct bpf_prog *xdp_prog;
811fefb695aSStanislav Fomichev 	struct veth_xdp_buff vxbuf;
812fefb695aSStanislav Fomichev 	struct xdp_buff *xdp = &vxbuf.xdp;
813718a18a0SLorenzo Bianconi 	u32 act, metalen;
814718a18a0SLorenzo Bianconi 	int off;
815948d4f21SToshiaki Makita 
816d504fff0SPaolo Abeni 	skb_prepare_for_gro(skb);
8174bf9ffa0SToshiaki Makita 
818948d4f21SToshiaki Makita 	rcu_read_lock();
819638264dcSToshiaki Makita 	xdp_prog = rcu_dereference(rq->xdp_prog);
820948d4f21SToshiaki Makita 	if (unlikely(!xdp_prog)) {
821948d4f21SToshiaki Makita 		rcu_read_unlock();
822948d4f21SToshiaki Makita 		goto out;
823948d4f21SToshiaki Makita 	}
824948d4f21SToshiaki Makita 
825718a18a0SLorenzo Bianconi 	__skb_push(skb, skb->data - skb_mac_header(skb));
826fefb695aSStanislav Fomichev 	if (veth_convert_skb_to_xdp_buff(rq, xdp, &skb))
827948d4f21SToshiaki Makita 		goto drop;
828306531f0SStanislav Fomichev 	vxbuf.skb = skb;
829948d4f21SToshiaki Makita 
830fefb695aSStanislav Fomichev 	orig_data = xdp->data;
831fefb695aSStanislav Fomichev 	orig_data_end = xdp->data_end;
832948d4f21SToshiaki Makita 
833fefb695aSStanislav Fomichev 	act = bpf_prog_run_xdp(xdp_prog, xdp);
834948d4f21SToshiaki Makita 
835948d4f21SToshiaki Makita 	switch (act) {
836948d4f21SToshiaki Makita 	case XDP_PASS:
837948d4f21SToshiaki Makita 		break;
838d1396004SToshiaki Makita 	case XDP_TX:
839fefb695aSStanislav Fomichev 		veth_xdp_get(xdp);
840d1396004SToshiaki Makita 		consume_skb(skb);
841fefb695aSStanislav Fomichev 		xdp->rxq->mem = rq->xdp_mem;
842fefb695aSStanislav Fomichev 		if (unlikely(veth_xdp_tx(rq, xdp, bq) < 0)) {
843638264dcSToshiaki Makita 			trace_xdp_exception(rq->dev, xdp_prog, act);
8441c5b82e5SLorenzo Bianconi 			stats->rx_drops++;
845d1396004SToshiaki Makita 			goto err_xdp;
846d1396004SToshiaki Makita 		}
8471c5b82e5SLorenzo Bianconi 		stats->xdp_tx++;
848d1396004SToshiaki Makita 		rcu_read_unlock();
849d1396004SToshiaki Makita 		goto xdp_xmit;
850d1396004SToshiaki Makita 	case XDP_REDIRECT:
851fefb695aSStanislav Fomichev 		veth_xdp_get(xdp);
852d1396004SToshiaki Makita 		consume_skb(skb);
853fefb695aSStanislav Fomichev 		xdp->rxq->mem = rq->xdp_mem;
854fefb695aSStanislav Fomichev 		if (xdp_do_redirect(rq->dev, xdp, xdp_prog)) {
8551c5b82e5SLorenzo Bianconi 			stats->rx_drops++;
856d1396004SToshiaki Makita 			goto err_xdp;
8571c5b82e5SLorenzo Bianconi 		}
8581c5b82e5SLorenzo Bianconi 		stats->xdp_redirect++;
859d1396004SToshiaki Makita 		rcu_read_unlock();
860d1396004SToshiaki Makita 		goto xdp_xmit;
861948d4f21SToshiaki Makita 	default:
862c8064e5bSPaolo Abeni 		bpf_warn_invalid_xdp_action(rq->dev, xdp_prog, act);
863df561f66SGustavo A. R. Silva 		fallthrough;
864948d4f21SToshiaki Makita 	case XDP_ABORTED:
865638264dcSToshiaki Makita 		trace_xdp_exception(rq->dev, xdp_prog, act);
866df561f66SGustavo A. R. Silva 		fallthrough;
867948d4f21SToshiaki Makita 	case XDP_DROP:
8681c5b82e5SLorenzo Bianconi 		stats->xdp_drops++;
8691c5b82e5SLorenzo Bianconi 		goto xdp_drop;
870948d4f21SToshiaki Makita 	}
871948d4f21SToshiaki Makita 	rcu_read_unlock();
872948d4f21SToshiaki Makita 
87345a9e6d8SJesper Dangaard Brouer 	/* check if bpf_xdp_adjust_head was used */
874fefb695aSStanislav Fomichev 	off = orig_data - xdp->data;
875948d4f21SToshiaki Makita 	if (off > 0)
876948d4f21SToshiaki Makita 		__skb_push(skb, off);
877948d4f21SToshiaki Makita 	else if (off < 0)
878948d4f21SToshiaki Makita 		__skb_pull(skb, -off);
879718a18a0SLorenzo Bianconi 
880718a18a0SLorenzo Bianconi 	skb_reset_mac_header(skb);
88145a9e6d8SJesper Dangaard Brouer 
88245a9e6d8SJesper Dangaard Brouer 	/* check if bpf_xdp_adjust_tail was used */
883fefb695aSStanislav Fomichev 	off = xdp->data_end - orig_data_end;
884948d4f21SToshiaki Makita 	if (off != 0)
88545a9e6d8SJesper Dangaard Brouer 		__skb_put(skb, off); /* positive on grow, negative on shrink */
886718a18a0SLorenzo Bianconi 
887718a18a0SLorenzo Bianconi 	/* XDP frag metadata (e.g. nr_frags) are updated in eBPF helpers
888718a18a0SLorenzo Bianconi 	 * (e.g. bpf_xdp_adjust_tail), we need to update data_len here.
889718a18a0SLorenzo Bianconi 	 */
890fefb695aSStanislav Fomichev 	if (xdp_buff_has_frags(xdp))
891718a18a0SLorenzo Bianconi 		skb->data_len = skb_shinfo(skb)->xdp_frags_size;
892718a18a0SLorenzo Bianconi 	else
893718a18a0SLorenzo Bianconi 		skb->data_len = 0;
894718a18a0SLorenzo Bianconi 
895638264dcSToshiaki Makita 	skb->protocol = eth_type_trans(skb, rq->dev);
896948d4f21SToshiaki Makita 
897fefb695aSStanislav Fomichev 	metalen = xdp->data - xdp->data_meta;
898948d4f21SToshiaki Makita 	if (metalen)
899948d4f21SToshiaki Makita 		skb_metadata_set(skb, metalen);
900948d4f21SToshiaki Makita out:
901948d4f21SToshiaki Makita 	return skb;
902948d4f21SToshiaki Makita drop:
9031c5b82e5SLorenzo Bianconi 	stats->rx_drops++;
9041c5b82e5SLorenzo Bianconi xdp_drop:
905948d4f21SToshiaki Makita 	rcu_read_unlock();
906948d4f21SToshiaki Makita 	kfree_skb(skb);
907948d4f21SToshiaki Makita 	return NULL;
908d1396004SToshiaki Makita err_xdp:
909d1396004SToshiaki Makita 	rcu_read_unlock();
910fefb695aSStanislav Fomichev 	xdp_return_buff(xdp);
911d1396004SToshiaki Makita xdp_xmit:
912d1396004SToshiaki Makita 	return NULL;
913948d4f21SToshiaki Makita }
914948d4f21SToshiaki Makita 
9151c5b82e5SLorenzo Bianconi static int veth_xdp_rcv(struct veth_rq *rq, int budget,
9161c5b82e5SLorenzo Bianconi 			struct veth_xdp_tx_bq *bq,
9171c5b82e5SLorenzo Bianconi 			struct veth_stats *stats)
918948d4f21SToshiaki Makita {
91965e6dcf7SLorenzo Bianconi 	int i, done = 0, n_xdpf = 0;
92065e6dcf7SLorenzo Bianconi 	void *xdpf[VETH_XDP_BATCH];
921948d4f21SToshiaki Makita 
922948d4f21SToshiaki Makita 	for (i = 0; i < budget; i++) {
923638264dcSToshiaki Makita 		void *ptr = __ptr_ring_consume(&rq->xdp_ring);
924948d4f21SToshiaki Makita 
9259fc8d518SToshiaki Makita 		if (!ptr)
926948d4f21SToshiaki Makita 			break;
927948d4f21SToshiaki Makita 
928d1396004SToshiaki Makita 		if (veth_is_xdp_frame(ptr)) {
92965e6dcf7SLorenzo Bianconi 			/* ndo_xdp_xmit */
9304195e54aSToshiaki Makita 			struct xdp_frame *frame = veth_ptr_to_xdp(ptr);
9314195e54aSToshiaki Makita 
9325142239aSLorenzo Bianconi 			stats->xdp_bytes += xdp_get_frame_len(frame);
93365e6dcf7SLorenzo Bianconi 			frame = veth_xdp_rcv_one(rq, frame, bq, stats);
93465e6dcf7SLorenzo Bianconi 			if (frame) {
93565e6dcf7SLorenzo Bianconi 				/* XDP_PASS */
93665e6dcf7SLorenzo Bianconi 				xdpf[n_xdpf++] = frame;
93765e6dcf7SLorenzo Bianconi 				if (n_xdpf == VETH_XDP_BATCH) {
93865e6dcf7SLorenzo Bianconi 					veth_xdp_rcv_bulk_skb(rq, xdpf, n_xdpf,
93965e6dcf7SLorenzo Bianconi 							      bq, stats);
94065e6dcf7SLorenzo Bianconi 					n_xdpf = 0;
94165e6dcf7SLorenzo Bianconi 				}
94265e6dcf7SLorenzo Bianconi 			}
943d1396004SToshiaki Makita 		} else {
94465e6dcf7SLorenzo Bianconi 			/* ndo_start_xmit */
94565e6dcf7SLorenzo Bianconi 			struct sk_buff *skb = ptr;
94665e6dcf7SLorenzo Bianconi 
9471c5b82e5SLorenzo Bianconi 			stats->xdp_bytes += skb->len;
9481c5b82e5SLorenzo Bianconi 			skb = veth_xdp_rcv_skb(rq, skb, bq, stats);
9499695b7deSPaolo Abeni 			if (skb) {
9509695b7deSPaolo Abeni 				if (skb_shared(skb) || skb_unclone(skb, GFP_ATOMIC))
9519695b7deSPaolo Abeni 					netif_receive_skb(skb);
9529695b7deSPaolo Abeni 				else
953638264dcSToshiaki Makita 					napi_gro_receive(&rq->xdp_napi, skb);
95465e6dcf7SLorenzo Bianconi 			}
9559695b7deSPaolo Abeni 		}
956948d4f21SToshiaki Makita 		done++;
957948d4f21SToshiaki Makita 	}
958948d4f21SToshiaki Makita 
95965e6dcf7SLorenzo Bianconi 	if (n_xdpf)
96065e6dcf7SLorenzo Bianconi 		veth_xdp_rcv_bulk_skb(rq, xdpf, n_xdpf, bq, stats);
96165e6dcf7SLorenzo Bianconi 
9624195e54aSToshiaki Makita 	u64_stats_update_begin(&rq->stats.syncp);
9639152cff0SLorenzo Bianconi 	rq->stats.vs.xdp_redirect += stats->xdp_redirect;
9641c5b82e5SLorenzo Bianconi 	rq->stats.vs.xdp_bytes += stats->xdp_bytes;
96566fe4a07SLorenzo Bianconi 	rq->stats.vs.xdp_drops += stats->xdp_drops;
96666fe4a07SLorenzo Bianconi 	rq->stats.vs.rx_drops += stats->rx_drops;
96765780c56SLorenzo Bianconi 	rq->stats.vs.xdp_packets += done;
9684195e54aSToshiaki Makita 	u64_stats_update_end(&rq->stats.syncp);
9694195e54aSToshiaki Makita 
970948d4f21SToshiaki Makita 	return done;
971948d4f21SToshiaki Makita }
972948d4f21SToshiaki Makita 
973948d4f21SToshiaki Makita static int veth_poll(struct napi_struct *napi, int budget)
974948d4f21SToshiaki Makita {
975638264dcSToshiaki Makita 	struct veth_rq *rq =
976638264dcSToshiaki Makita 		container_of(napi, struct veth_rq, xdp_napi);
9771c5b82e5SLorenzo Bianconi 	struct veth_stats stats = {};
9789cda7807SToshiaki Makita 	struct veth_xdp_tx_bq bq;
979948d4f21SToshiaki Makita 	int done;
980948d4f21SToshiaki Makita 
9819cda7807SToshiaki Makita 	bq.count = 0;
9829cda7807SToshiaki Makita 
983d1396004SToshiaki Makita 	xdp_set_return_frame_no_direct();
9841c5b82e5SLorenzo Bianconi 	done = veth_xdp_rcv(rq, budget, &bq, &stats);
985948d4f21SToshiaki Makita 
986fa349e39SShawn Bohrer 	if (stats.xdp_redirect > 0)
987fa349e39SShawn Bohrer 		xdp_do_flush();
988fa349e39SShawn Bohrer 
989948d4f21SToshiaki Makita 	if (done < budget && napi_complete_done(napi, done)) {
990948d4f21SToshiaki Makita 		/* Write rx_notify_masked before reading ptr_ring */
991638264dcSToshiaki Makita 		smp_store_mb(rq->rx_notify_masked, false);
992638264dcSToshiaki Makita 		if (unlikely(!__ptr_ring_empty(&rq->xdp_ring))) {
99368468d8cSEric Dumazet 			if (napi_schedule_prep(&rq->xdp_napi)) {
99468468d8cSEric Dumazet 				WRITE_ONCE(rq->rx_notify_masked, true);
99568468d8cSEric Dumazet 				__napi_schedule(&rq->xdp_napi);
99668468d8cSEric Dumazet 			}
997948d4f21SToshiaki Makita 		}
998948d4f21SToshiaki Makita 	}
999948d4f21SToshiaki Makita 
10001c5b82e5SLorenzo Bianconi 	if (stats.xdp_tx > 0)
1001bd32aa1fSLorenzo Bianconi 		veth_xdp_flush(rq, &bq);
1002d1396004SToshiaki Makita 	xdp_clear_return_frame_no_direct();
1003d1396004SToshiaki Makita 
1004948d4f21SToshiaki Makita 	return done;
1005948d4f21SToshiaki Makita }
1006948d4f21SToshiaki Makita 
1007dedd53c5SPaolo Abeni static int __veth_napi_enable_range(struct net_device *dev, int start, int end)
1008948d4f21SToshiaki Makita {
1009948d4f21SToshiaki Makita 	struct veth_priv *priv = netdev_priv(dev);
1010638264dcSToshiaki Makita 	int err, i;
1011948d4f21SToshiaki Makita 
1012dedd53c5SPaolo Abeni 	for (i = start; i < end; i++) {
1013638264dcSToshiaki Makita 		struct veth_rq *rq = &priv->rq[i];
1014638264dcSToshiaki Makita 
1015638264dcSToshiaki Makita 		err = ptr_ring_init(&rq->xdp_ring, VETH_RING_SIZE, GFP_KERNEL);
1016948d4f21SToshiaki Makita 		if (err)
1017638264dcSToshiaki Makita 			goto err_xdp_ring;
1018638264dcSToshiaki Makita 	}
1019948d4f21SToshiaki Makita 
1020dedd53c5SPaolo Abeni 	for (i = start; i < end; i++) {
1021638264dcSToshiaki Makita 		struct veth_rq *rq = &priv->rq[i];
1022638264dcSToshiaki Makita 
1023638264dcSToshiaki Makita 		napi_enable(&rq->xdp_napi);
1024d3256efdSPaolo Abeni 		rcu_assign_pointer(priv->rq[i].napi, &priv->rq[i].xdp_napi);
1025638264dcSToshiaki Makita 	}
1026948d4f21SToshiaki Makita 
1027948d4f21SToshiaki Makita 	return 0;
1028dedd53c5SPaolo Abeni 
1029638264dcSToshiaki Makita err_xdp_ring:
1030dedd53c5SPaolo Abeni 	for (i--; i >= start; i--)
1031638264dcSToshiaki Makita 		ptr_ring_cleanup(&priv->rq[i].xdp_ring, veth_ptr_free);
1032638264dcSToshiaki Makita 
1033638264dcSToshiaki Makita 	return err;
1034948d4f21SToshiaki Makita }
1035948d4f21SToshiaki Makita 
1036dedd53c5SPaolo Abeni static int __veth_napi_enable(struct net_device *dev)
1037dedd53c5SPaolo Abeni {
1038dedd53c5SPaolo Abeni 	return __veth_napi_enable_range(dev, 0, dev->real_num_rx_queues);
1039dedd53c5SPaolo Abeni }
1040dedd53c5SPaolo Abeni 
1041dedd53c5SPaolo Abeni static void veth_napi_del_range(struct net_device *dev, int start, int end)
1042948d4f21SToshiaki Makita {
1043948d4f21SToshiaki Makita 	struct veth_priv *priv = netdev_priv(dev);
1044638264dcSToshiaki Makita 	int i;
1045948d4f21SToshiaki Makita 
1046dedd53c5SPaolo Abeni 	for (i = start; i < end; i++) {
1047638264dcSToshiaki Makita 		struct veth_rq *rq = &priv->rq[i];
1048638264dcSToshiaki Makita 
1049d3256efdSPaolo Abeni 		rcu_assign_pointer(priv->rq[i].napi, NULL);
1050638264dcSToshiaki Makita 		napi_disable(&rq->xdp_napi);
10515198d545SJakub Kicinski 		__netif_napi_del(&rq->xdp_napi);
1052638264dcSToshiaki Makita 	}
1053638264dcSToshiaki Makita 	synchronize_net();
1054638264dcSToshiaki Makita 
1055dedd53c5SPaolo Abeni 	for (i = start; i < end; i++) {
1056638264dcSToshiaki Makita 		struct veth_rq *rq = &priv->rq[i];
1057638264dcSToshiaki Makita 
1058638264dcSToshiaki Makita 		rq->rx_notify_masked = false;
1059638264dcSToshiaki Makita 		ptr_ring_cleanup(&rq->xdp_ring, veth_ptr_free);
1060638264dcSToshiaki Makita 	}
1061948d4f21SToshiaki Makita }
1062948d4f21SToshiaki Makita 
1063dedd53c5SPaolo Abeni static void veth_napi_del(struct net_device *dev)
1064dedd53c5SPaolo Abeni {
1065dedd53c5SPaolo Abeni 	veth_napi_del_range(dev, 0, dev->real_num_rx_queues);
1066dedd53c5SPaolo Abeni }
1067dedd53c5SPaolo Abeni 
1068d3256efdSPaolo Abeni static bool veth_gro_requested(const struct net_device *dev)
1069d3256efdSPaolo Abeni {
1070d3256efdSPaolo Abeni 	return !!(dev->wanted_features & NETIF_F_GRO);
1071d3256efdSPaolo Abeni }
1072d3256efdSPaolo Abeni 
1073dedd53c5SPaolo Abeni static int veth_enable_xdp_range(struct net_device *dev, int start, int end,
1074dedd53c5SPaolo Abeni 				 bool napi_already_on)
1075948d4f21SToshiaki Makita {
1076948d4f21SToshiaki Makita 	struct veth_priv *priv = netdev_priv(dev);
1077638264dcSToshiaki Makita 	int err, i;
1078948d4f21SToshiaki Makita 
1079dedd53c5SPaolo Abeni 	for (i = start; i < end; i++) {
1080638264dcSToshiaki Makita 		struct veth_rq *rq = &priv->rq[i];
1081948d4f21SToshiaki Makita 
1082d3256efdSPaolo Abeni 		if (!napi_already_on)
1083b48b89f9SJakub Kicinski 			netif_napi_add(dev, &rq->xdp_napi, veth_poll);
1084b02e5a0eSBjörn Töpel 		err = xdp_rxq_info_reg(&rq->xdp_rxq, dev, i, rq->xdp_napi.napi_id);
1085948d4f21SToshiaki Makita 		if (err < 0)
1086638264dcSToshiaki Makita 			goto err_rxq_reg;
1087638264dcSToshiaki Makita 
1088638264dcSToshiaki Makita 		err = xdp_rxq_info_reg_mem_model(&rq->xdp_rxq,
1089638264dcSToshiaki Makita 						 MEM_TYPE_PAGE_SHARED,
1090638264dcSToshiaki Makita 						 NULL);
1091638264dcSToshiaki Makita 		if (err < 0)
1092638264dcSToshiaki Makita 			goto err_reg_mem;
1093638264dcSToshiaki Makita 
1094638264dcSToshiaki Makita 		/* Save original mem info as it can be overwritten */
1095638264dcSToshiaki Makita 		rq->xdp_mem = rq->xdp_rxq.mem;
1096638264dcSToshiaki Makita 	}
1097dedd53c5SPaolo Abeni 	return 0;
1098dedd53c5SPaolo Abeni 
1099dedd53c5SPaolo Abeni err_reg_mem:
1100dedd53c5SPaolo Abeni 	xdp_rxq_info_unreg(&priv->rq[i].xdp_rxq);
1101dedd53c5SPaolo Abeni err_rxq_reg:
1102dedd53c5SPaolo Abeni 	for (i--; i >= start; i--) {
1103dedd53c5SPaolo Abeni 		struct veth_rq *rq = &priv->rq[i];
1104dedd53c5SPaolo Abeni 
1105dedd53c5SPaolo Abeni 		xdp_rxq_info_unreg(&rq->xdp_rxq);
1106dedd53c5SPaolo Abeni 		if (!napi_already_on)
1107dedd53c5SPaolo Abeni 			netif_napi_del(&rq->xdp_napi);
1108dedd53c5SPaolo Abeni 	}
1109dedd53c5SPaolo Abeni 
1110dedd53c5SPaolo Abeni 	return err;
1111dedd53c5SPaolo Abeni }
1112dedd53c5SPaolo Abeni 
1113dedd53c5SPaolo Abeni static void veth_disable_xdp_range(struct net_device *dev, int start, int end,
1114dedd53c5SPaolo Abeni 				   bool delete_napi)
1115dedd53c5SPaolo Abeni {
1116dedd53c5SPaolo Abeni 	struct veth_priv *priv = netdev_priv(dev);
1117dedd53c5SPaolo Abeni 	int i;
1118dedd53c5SPaolo Abeni 
1119dedd53c5SPaolo Abeni 	for (i = start; i < end; i++) {
1120dedd53c5SPaolo Abeni 		struct veth_rq *rq = &priv->rq[i];
1121dedd53c5SPaolo Abeni 
1122dedd53c5SPaolo Abeni 		rq->xdp_rxq.mem = rq->xdp_mem;
1123dedd53c5SPaolo Abeni 		xdp_rxq_info_unreg(&rq->xdp_rxq);
1124dedd53c5SPaolo Abeni 
1125dedd53c5SPaolo Abeni 		if (delete_napi)
1126dedd53c5SPaolo Abeni 			netif_napi_del(&rq->xdp_napi);
1127dedd53c5SPaolo Abeni 	}
1128dedd53c5SPaolo Abeni }
1129dedd53c5SPaolo Abeni 
1130dedd53c5SPaolo Abeni static int veth_enable_xdp(struct net_device *dev)
1131dedd53c5SPaolo Abeni {
11325e8d3dc7SHeng Qi 	bool napi_already_on = veth_gro_requested(dev) && (dev->flags & IFF_UP);
1133dedd53c5SPaolo Abeni 	struct veth_priv *priv = netdev_priv(dev);
1134dedd53c5SPaolo Abeni 	int err, i;
1135dedd53c5SPaolo Abeni 
1136dedd53c5SPaolo Abeni 	if (!xdp_rxq_info_is_reg(&priv->rq[0].xdp_rxq)) {
1137dedd53c5SPaolo Abeni 		err = veth_enable_xdp_range(dev, 0, dev->real_num_rx_queues, napi_already_on);
1138dedd53c5SPaolo Abeni 		if (err)
1139dedd53c5SPaolo Abeni 			return err;
1140948d4f21SToshiaki Makita 
1141d3256efdSPaolo Abeni 		if (!napi_already_on) {
1142d3256efdSPaolo Abeni 			err = __veth_napi_enable(dev);
1143dedd53c5SPaolo Abeni 			if (err) {
1144dedd53c5SPaolo Abeni 				veth_disable_xdp_range(dev, 0, dev->real_num_rx_queues, true);
1145dedd53c5SPaolo Abeni 				return err;
1146dedd53c5SPaolo Abeni 			}
1147d3256efdSPaolo Abeni 
1148d3256efdSPaolo Abeni 			if (!veth_gro_requested(dev)) {
1149d3256efdSPaolo Abeni 				/* user-space did not require GRO, but adding XDP
1150d3256efdSPaolo Abeni 				 * is supposed to get GRO working
1151d3256efdSPaolo Abeni 				 */
1152d3256efdSPaolo Abeni 				dev->features |= NETIF_F_GRO;
1153d3256efdSPaolo Abeni 				netdev_features_change(dev);
1154d3256efdSPaolo Abeni 			}
1155d3256efdSPaolo Abeni 		}
1156948d4f21SToshiaki Makita 	}
1157948d4f21SToshiaki Makita 
1158d3256efdSPaolo Abeni 	for (i = 0; i < dev->real_num_rx_queues; i++) {
1159638264dcSToshiaki Makita 		rcu_assign_pointer(priv->rq[i].xdp_prog, priv->_xdp_prog);
1160d3256efdSPaolo Abeni 		rcu_assign_pointer(priv->rq[i].napi, &priv->rq[i].xdp_napi);
1161d3256efdSPaolo Abeni 	}
1162948d4f21SToshiaki Makita 
1163948d4f21SToshiaki Makita 	return 0;
1164948d4f21SToshiaki Makita }
1165948d4f21SToshiaki Makita 
1166948d4f21SToshiaki Makita static void veth_disable_xdp(struct net_device *dev)
1167948d4f21SToshiaki Makita {
1168948d4f21SToshiaki Makita 	struct veth_priv *priv = netdev_priv(dev);
1169638264dcSToshiaki Makita 	int i;
1170948d4f21SToshiaki Makita 
1171638264dcSToshiaki Makita 	for (i = 0; i < dev->real_num_rx_queues; i++)
1172638264dcSToshiaki Makita 		rcu_assign_pointer(priv->rq[i].xdp_prog, NULL);
1173d3256efdSPaolo Abeni 
1174d3256efdSPaolo Abeni 	if (!netif_running(dev) || !veth_gro_requested(dev)) {
1175948d4f21SToshiaki Makita 		veth_napi_del(dev);
1176d3256efdSPaolo Abeni 
1177d3256efdSPaolo Abeni 		/* if user-space did not require GRO, since adding XDP
1178d3256efdSPaolo Abeni 		 * enabled it, clear it now
1179d3256efdSPaolo Abeni 		 */
1180d3256efdSPaolo Abeni 		if (!veth_gro_requested(dev) && netif_running(dev)) {
1181d3256efdSPaolo Abeni 			dev->features &= ~NETIF_F_GRO;
1182d3256efdSPaolo Abeni 			netdev_features_change(dev);
1183d3256efdSPaolo Abeni 		}
1184d3256efdSPaolo Abeni 	}
1185d3256efdSPaolo Abeni 
1186dedd53c5SPaolo Abeni 	veth_disable_xdp_range(dev, 0, dev->real_num_rx_queues, false);
1187948d4f21SToshiaki Makita }
1188948d4f21SToshiaki Makita 
1189dedd53c5SPaolo Abeni static int veth_napi_enable_range(struct net_device *dev, int start, int end)
1190d3256efdSPaolo Abeni {
1191d3256efdSPaolo Abeni 	struct veth_priv *priv = netdev_priv(dev);
1192d3256efdSPaolo Abeni 	int err, i;
1193d3256efdSPaolo Abeni 
1194dedd53c5SPaolo Abeni 	for (i = start; i < end; i++) {
1195d3256efdSPaolo Abeni 		struct veth_rq *rq = &priv->rq[i];
1196d3256efdSPaolo Abeni 
1197b48b89f9SJakub Kicinski 		netif_napi_add(dev, &rq->xdp_napi, veth_poll);
1198d3256efdSPaolo Abeni 	}
1199d3256efdSPaolo Abeni 
1200dedd53c5SPaolo Abeni 	err = __veth_napi_enable_range(dev, start, end);
1201d3256efdSPaolo Abeni 	if (err) {
1202dedd53c5SPaolo Abeni 		for (i = start; i < end; i++) {
1203d3256efdSPaolo Abeni 			struct veth_rq *rq = &priv->rq[i];
1204d3256efdSPaolo Abeni 
1205d3256efdSPaolo Abeni 			netif_napi_del(&rq->xdp_napi);
1206d3256efdSPaolo Abeni 		}
1207d3256efdSPaolo Abeni 		return err;
1208d3256efdSPaolo Abeni 	}
1209d3256efdSPaolo Abeni 	return err;
1210d3256efdSPaolo Abeni }
1211d3256efdSPaolo Abeni 
1212dedd53c5SPaolo Abeni static int veth_napi_enable(struct net_device *dev)
1213dedd53c5SPaolo Abeni {
1214dedd53c5SPaolo Abeni 	return veth_napi_enable_range(dev, 0, dev->real_num_rx_queues);
1215dedd53c5SPaolo Abeni }
1216dedd53c5SPaolo Abeni 
12174752eeb3SPaolo Abeni static void veth_disable_range_safe(struct net_device *dev, int start, int end)
12184752eeb3SPaolo Abeni {
12194752eeb3SPaolo Abeni 	struct veth_priv *priv = netdev_priv(dev);
12204752eeb3SPaolo Abeni 
12214752eeb3SPaolo Abeni 	if (start >= end)
12224752eeb3SPaolo Abeni 		return;
12234752eeb3SPaolo Abeni 
12244752eeb3SPaolo Abeni 	if (priv->_xdp_prog) {
12254752eeb3SPaolo Abeni 		veth_napi_del_range(dev, start, end);
12264752eeb3SPaolo Abeni 		veth_disable_xdp_range(dev, start, end, false);
12274752eeb3SPaolo Abeni 	} else if (veth_gro_requested(dev)) {
12284752eeb3SPaolo Abeni 		veth_napi_del_range(dev, start, end);
12294752eeb3SPaolo Abeni 	}
12304752eeb3SPaolo Abeni }
12314752eeb3SPaolo Abeni 
12324752eeb3SPaolo Abeni static int veth_enable_range_safe(struct net_device *dev, int start, int end)
12334752eeb3SPaolo Abeni {
12344752eeb3SPaolo Abeni 	struct veth_priv *priv = netdev_priv(dev);
12354752eeb3SPaolo Abeni 	int err;
12364752eeb3SPaolo Abeni 
12374752eeb3SPaolo Abeni 	if (start >= end)
12384752eeb3SPaolo Abeni 		return 0;
12394752eeb3SPaolo Abeni 
12404752eeb3SPaolo Abeni 	if (priv->_xdp_prog) {
12414752eeb3SPaolo Abeni 		/* these channels are freshly initialized, napi is not on there even
12424752eeb3SPaolo Abeni 		 * when GRO is requeste
12434752eeb3SPaolo Abeni 		 */
12444752eeb3SPaolo Abeni 		err = veth_enable_xdp_range(dev, start, end, false);
12454752eeb3SPaolo Abeni 		if (err)
12464752eeb3SPaolo Abeni 			return err;
12474752eeb3SPaolo Abeni 
12484752eeb3SPaolo Abeni 		err = __veth_napi_enable_range(dev, start, end);
12494752eeb3SPaolo Abeni 		if (err) {
12504752eeb3SPaolo Abeni 			/* on error always delete the newly added napis */
12514752eeb3SPaolo Abeni 			veth_disable_xdp_range(dev, start, end, true);
12524752eeb3SPaolo Abeni 			return err;
12534752eeb3SPaolo Abeni 		}
12544752eeb3SPaolo Abeni 	} else if (veth_gro_requested(dev)) {
12554752eeb3SPaolo Abeni 		return veth_napi_enable_range(dev, start, end);
12564752eeb3SPaolo Abeni 	}
12574752eeb3SPaolo Abeni 	return 0;
12584752eeb3SPaolo Abeni }
12594752eeb3SPaolo Abeni 
1260*fccca038SLorenzo Bianconi static void veth_set_xdp_features(struct net_device *dev)
1261*fccca038SLorenzo Bianconi {
1262*fccca038SLorenzo Bianconi 	struct veth_priv *priv = netdev_priv(dev);
1263*fccca038SLorenzo Bianconi 	struct net_device *peer;
1264*fccca038SLorenzo Bianconi 
1265*fccca038SLorenzo Bianconi 	peer = rcu_dereference(priv->peer);
1266*fccca038SLorenzo Bianconi 	if (peer && peer->real_num_tx_queues <= dev->real_num_rx_queues) {
1267*fccca038SLorenzo Bianconi 		xdp_features_t val = NETDEV_XDP_ACT_BASIC |
1268*fccca038SLorenzo Bianconi 				     NETDEV_XDP_ACT_REDIRECT |
1269*fccca038SLorenzo Bianconi 				     NETDEV_XDP_ACT_RX_SG;
1270*fccca038SLorenzo Bianconi 
1271*fccca038SLorenzo Bianconi 		if (priv->_xdp_prog || veth_gro_requested(dev))
1272*fccca038SLorenzo Bianconi 			val |= NETDEV_XDP_ACT_NDO_XMIT |
1273*fccca038SLorenzo Bianconi 			       NETDEV_XDP_ACT_NDO_XMIT_SG;
1274*fccca038SLorenzo Bianconi 		xdp_set_features_flag(dev, val);
1275*fccca038SLorenzo Bianconi 	} else {
1276*fccca038SLorenzo Bianconi 		xdp_clear_features_flag(dev);
1277*fccca038SLorenzo Bianconi 	}
1278*fccca038SLorenzo Bianconi }
1279*fccca038SLorenzo Bianconi 
12804752eeb3SPaolo Abeni static int veth_set_channels(struct net_device *dev,
12814752eeb3SPaolo Abeni 			     struct ethtool_channels *ch)
12824752eeb3SPaolo Abeni {
12834752eeb3SPaolo Abeni 	struct veth_priv *priv = netdev_priv(dev);
12844752eeb3SPaolo Abeni 	unsigned int old_rx_count, new_rx_count;
12854752eeb3SPaolo Abeni 	struct veth_priv *peer_priv;
12864752eeb3SPaolo Abeni 	struct net_device *peer;
12874752eeb3SPaolo Abeni 	int err;
12884752eeb3SPaolo Abeni 
12894752eeb3SPaolo Abeni 	/* sanity check. Upper bounds are already enforced by the caller */
12904752eeb3SPaolo Abeni 	if (!ch->rx_count || !ch->tx_count)
12914752eeb3SPaolo Abeni 		return -EINVAL;
12924752eeb3SPaolo Abeni 
12934752eeb3SPaolo Abeni 	/* avoid braking XDP, if that is enabled */
12944752eeb3SPaolo Abeni 	peer = rtnl_dereference(priv->peer);
12954752eeb3SPaolo Abeni 	peer_priv = peer ? netdev_priv(peer) : NULL;
12964752eeb3SPaolo Abeni 	if (priv->_xdp_prog && peer && ch->rx_count < peer->real_num_tx_queues)
12974752eeb3SPaolo Abeni 		return -EINVAL;
12984752eeb3SPaolo Abeni 
12994752eeb3SPaolo Abeni 	if (peer && peer_priv && peer_priv->_xdp_prog && ch->tx_count > peer->real_num_rx_queues)
13004752eeb3SPaolo Abeni 		return -EINVAL;
13014752eeb3SPaolo Abeni 
13024752eeb3SPaolo Abeni 	old_rx_count = dev->real_num_rx_queues;
13034752eeb3SPaolo Abeni 	new_rx_count = ch->rx_count;
13044752eeb3SPaolo Abeni 	if (netif_running(dev)) {
13054752eeb3SPaolo Abeni 		/* turn device off */
13064752eeb3SPaolo Abeni 		netif_carrier_off(dev);
13074752eeb3SPaolo Abeni 		if (peer)
13084752eeb3SPaolo Abeni 			netif_carrier_off(peer);
13094752eeb3SPaolo Abeni 
13104752eeb3SPaolo Abeni 		/* try to allocate new resurces, as needed*/
13114752eeb3SPaolo Abeni 		err = veth_enable_range_safe(dev, old_rx_count, new_rx_count);
13124752eeb3SPaolo Abeni 		if (err)
13134752eeb3SPaolo Abeni 			goto out;
13144752eeb3SPaolo Abeni 	}
13154752eeb3SPaolo Abeni 
13164752eeb3SPaolo Abeni 	err = netif_set_real_num_rx_queues(dev, ch->rx_count);
13174752eeb3SPaolo Abeni 	if (err)
13184752eeb3SPaolo Abeni 		goto revert;
13194752eeb3SPaolo Abeni 
13204752eeb3SPaolo Abeni 	err = netif_set_real_num_tx_queues(dev, ch->tx_count);
13214752eeb3SPaolo Abeni 	if (err) {
13224752eeb3SPaolo Abeni 		int err2 = netif_set_real_num_rx_queues(dev, old_rx_count);
13234752eeb3SPaolo Abeni 
13244752eeb3SPaolo Abeni 		/* this error condition could happen only if rx and tx change
13254752eeb3SPaolo Abeni 		 * in opposite directions (e.g. tx nr raises, rx nr decreases)
13264752eeb3SPaolo Abeni 		 * and we can't do anything to fully restore the original
13274752eeb3SPaolo Abeni 		 * status
13284752eeb3SPaolo Abeni 		 */
13294752eeb3SPaolo Abeni 		if (err2)
13304752eeb3SPaolo Abeni 			pr_warn("Can't restore rx queues config %d -> %d %d",
13314752eeb3SPaolo Abeni 				new_rx_count, old_rx_count, err2);
13324752eeb3SPaolo Abeni 		else
13334752eeb3SPaolo Abeni 			goto revert;
13344752eeb3SPaolo Abeni 	}
13354752eeb3SPaolo Abeni 
13364752eeb3SPaolo Abeni out:
13374752eeb3SPaolo Abeni 	if (netif_running(dev)) {
13384752eeb3SPaolo Abeni 		/* note that we need to swap the arguments WRT the enable part
13394752eeb3SPaolo Abeni 		 * to identify the range we have to disable
13404752eeb3SPaolo Abeni 		 */
13414752eeb3SPaolo Abeni 		veth_disable_range_safe(dev, new_rx_count, old_rx_count);
13424752eeb3SPaolo Abeni 		netif_carrier_on(dev);
13434752eeb3SPaolo Abeni 		if (peer)
13444752eeb3SPaolo Abeni 			netif_carrier_on(peer);
13454752eeb3SPaolo Abeni 	}
1346*fccca038SLorenzo Bianconi 
1347*fccca038SLorenzo Bianconi 	/* update XDP supported features */
1348*fccca038SLorenzo Bianconi 	veth_set_xdp_features(dev);
1349*fccca038SLorenzo Bianconi 	if (peer)
1350*fccca038SLorenzo Bianconi 		veth_set_xdp_features(peer);
1351*fccca038SLorenzo Bianconi 
13524752eeb3SPaolo Abeni 	return err;
13534752eeb3SPaolo Abeni 
13544752eeb3SPaolo Abeni revert:
13554752eeb3SPaolo Abeni 	new_rx_count = old_rx_count;
13564752eeb3SPaolo Abeni 	old_rx_count = ch->rx_count;
13574752eeb3SPaolo Abeni 	goto out;
13584752eeb3SPaolo Abeni }
13594752eeb3SPaolo Abeni 
1360e314dbdcSPavel Emelyanov static int veth_open(struct net_device *dev)
1361e314dbdcSPavel Emelyanov {
13625e8d3dc7SHeng Qi 	struct veth_priv *priv = netdev_priv(dev);
1363d0e2c55eSEric Dumazet 	struct net_device *peer = rtnl_dereference(priv->peer);
1364948d4f21SToshiaki Makita 	int err;
1365e314dbdcSPavel Emelyanov 
1366d0e2c55eSEric Dumazet 	if (!peer)
1367e314dbdcSPavel Emelyanov 		return -ENOTCONN;
1368e314dbdcSPavel Emelyanov 
1369948d4f21SToshiaki Makita 	if (priv->_xdp_prog) {
1370948d4f21SToshiaki Makita 		err = veth_enable_xdp(dev);
1371948d4f21SToshiaki Makita 		if (err)
1372948d4f21SToshiaki Makita 			return err;
13735e8d3dc7SHeng Qi 	} else if (veth_gro_requested(dev)) {
1374d3256efdSPaolo Abeni 		err = veth_napi_enable(dev);
1375d3256efdSPaolo Abeni 		if (err)
1376d3256efdSPaolo Abeni 			return err;
1377948d4f21SToshiaki Makita 	}
1378948d4f21SToshiaki Makita 
1379d0e2c55eSEric Dumazet 	if (peer->flags & IFF_UP) {
1380e314dbdcSPavel Emelyanov 		netif_carrier_on(dev);
1381d0e2c55eSEric Dumazet 		netif_carrier_on(peer);
1382e314dbdcSPavel Emelyanov 	}
1383948d4f21SToshiaki Makita 
1384e314dbdcSPavel Emelyanov 	return 0;
1385e314dbdcSPavel Emelyanov }
1386e314dbdcSPavel Emelyanov 
13872cf48a10SEric W. Biederman static int veth_close(struct net_device *dev)
13882cf48a10SEric W. Biederman {
13895e8d3dc7SHeng Qi 	struct veth_priv *priv = netdev_priv(dev);
13902efd32eeSEric Dumazet 	struct net_device *peer = rtnl_dereference(priv->peer);
13912cf48a10SEric W. Biederman 
13922cf48a10SEric W. Biederman 	netif_carrier_off(dev);
13932efd32eeSEric Dumazet 	if (peer)
13942efd32eeSEric Dumazet 		netif_carrier_off(peer);
13952cf48a10SEric W. Biederman 
13965e8d3dc7SHeng Qi 	if (priv->_xdp_prog)
13975e8d3dc7SHeng Qi 		veth_disable_xdp(dev);
13985e8d3dc7SHeng Qi 	else if (veth_gro_requested(dev))
13995e8d3dc7SHeng Qi 		veth_napi_del(dev);
14005e8d3dc7SHeng Qi 
14012cf48a10SEric W. Biederman 	return 0;
14022cf48a10SEric W. Biederman }
14032cf48a10SEric W. Biederman 
140491572088SJarod Wilson static int is_valid_veth_mtu(int mtu)
140538d40815SEric Biederman {
140691572088SJarod Wilson 	return mtu >= ETH_MIN_MTU && mtu <= ETH_MAX_MTU;
140738d40815SEric Biederman }
140838d40815SEric Biederman 
14097797b93bSToshiaki Makita static int veth_alloc_queues(struct net_device *dev)
14107797b93bSToshiaki Makita {
14117797b93bSToshiaki Makita 	struct veth_priv *priv = netdev_priv(dev);
14127797b93bSToshiaki Makita 	int i;
14137797b93bSToshiaki Makita 
1414961c6136SVasily Averin 	priv->rq = kcalloc(dev->num_rx_queues, sizeof(*priv->rq), GFP_KERNEL_ACCOUNT);
14157797b93bSToshiaki Makita 	if (!priv->rq)
14167797b93bSToshiaki Makita 		return -ENOMEM;
14177797b93bSToshiaki Makita 
14184195e54aSToshiaki Makita 	for (i = 0; i < dev->num_rx_queues; i++) {
14197797b93bSToshiaki Makita 		priv->rq[i].dev = dev;
14204195e54aSToshiaki Makita 		u64_stats_init(&priv->rq[i].stats.syncp);
14214195e54aSToshiaki Makita 	}
14227797b93bSToshiaki Makita 
14237797b93bSToshiaki Makita 	return 0;
14247797b93bSToshiaki Makita }
14257797b93bSToshiaki Makita 
14267797b93bSToshiaki Makita static void veth_free_queues(struct net_device *dev)
14277797b93bSToshiaki Makita {
14287797b93bSToshiaki Makita 	struct veth_priv *priv = netdev_priv(dev);
14297797b93bSToshiaki Makita 
14307797b93bSToshiaki Makita 	kfree(priv->rq);
14317797b93bSToshiaki Makita }
14327797b93bSToshiaki Makita 
1433e314dbdcSPavel Emelyanov static int veth_dev_init(struct net_device *dev)
1434e314dbdcSPavel Emelyanov {
14357797b93bSToshiaki Makita 	int err;
14367797b93bSToshiaki Makita 
143714d73416SLi RongQing 	dev->lstats = netdev_alloc_pcpu_stats(struct pcpu_lstats);
143814d73416SLi RongQing 	if (!dev->lstats)
1439e314dbdcSPavel Emelyanov 		return -ENOMEM;
14407797b93bSToshiaki Makita 
14417797b93bSToshiaki Makita 	err = veth_alloc_queues(dev);
14427797b93bSToshiaki Makita 	if (err) {
144314d73416SLi RongQing 		free_percpu(dev->lstats);
14447797b93bSToshiaki Makita 		return err;
14457797b93bSToshiaki Makita 	}
14467797b93bSToshiaki Makita 
1447e314dbdcSPavel Emelyanov 	return 0;
1448e314dbdcSPavel Emelyanov }
1449e314dbdcSPavel Emelyanov 
145011687a10SDavid S. Miller static void veth_dev_free(struct net_device *dev)
145111687a10SDavid S. Miller {
14527797b93bSToshiaki Makita 	veth_free_queues(dev);
145314d73416SLi RongQing 	free_percpu(dev->lstats);
145411687a10SDavid S. Miller }
145511687a10SDavid S. Miller 
1456bb446c19SWANG Cong #ifdef CONFIG_NET_POLL_CONTROLLER
1457bb446c19SWANG Cong static void veth_poll_controller(struct net_device *dev)
1458bb446c19SWANG Cong {
1459bb446c19SWANG Cong 	/* veth only receives frames when its peer sends one
1460948d4f21SToshiaki Makita 	 * Since it has nothing to do with disabling irqs, we are guaranteed
1461bb446c19SWANG Cong 	 * never to have pending data when we poll for it so
1462bb446c19SWANG Cong 	 * there is nothing to do here.
1463bb446c19SWANG Cong 	 *
1464bb446c19SWANG Cong 	 * We need this though so netpoll recognizes us as an interface that
1465bb446c19SWANG Cong 	 * supports polling, which enables bridge devices in virt setups to
1466bb446c19SWANG Cong 	 * still use netconsole
1467bb446c19SWANG Cong 	 */
1468bb446c19SWANG Cong }
1469bb446c19SWANG Cong #endif	/* CONFIG_NET_POLL_CONTROLLER */
1470bb446c19SWANG Cong 
1471a45253bfSNicolas Dichtel static int veth_get_iflink(const struct net_device *dev)
1472a45253bfSNicolas Dichtel {
1473a45253bfSNicolas Dichtel 	struct veth_priv *priv = netdev_priv(dev);
1474a45253bfSNicolas Dichtel 	struct net_device *peer;
1475a45253bfSNicolas Dichtel 	int iflink;
1476a45253bfSNicolas Dichtel 
1477a45253bfSNicolas Dichtel 	rcu_read_lock();
1478a45253bfSNicolas Dichtel 	peer = rcu_dereference(priv->peer);
1479a45253bfSNicolas Dichtel 	iflink = peer ? peer->ifindex : 0;
1480a45253bfSNicolas Dichtel 	rcu_read_unlock();
1481a45253bfSNicolas Dichtel 
1482a45253bfSNicolas Dichtel 	return iflink;
1483a45253bfSNicolas Dichtel }
1484a45253bfSNicolas Dichtel 
1485dc224822SToshiaki Makita static netdev_features_t veth_fix_features(struct net_device *dev,
1486dc224822SToshiaki Makita 					   netdev_features_t features)
1487dc224822SToshiaki Makita {
1488dc224822SToshiaki Makita 	struct veth_priv *priv = netdev_priv(dev);
1489dc224822SToshiaki Makita 	struct net_device *peer;
1490dc224822SToshiaki Makita 
1491dc224822SToshiaki Makita 	peer = rtnl_dereference(priv->peer);
1492dc224822SToshiaki Makita 	if (peer) {
1493dc224822SToshiaki Makita 		struct veth_priv *peer_priv = netdev_priv(peer);
1494dc224822SToshiaki Makita 
1495dc224822SToshiaki Makita 		if (peer_priv->_xdp_prog)
1496dc224822SToshiaki Makita 			features &= ~NETIF_F_GSO_SOFTWARE;
1497dc224822SToshiaki Makita 	}
1498d3256efdSPaolo Abeni 	if (priv->_xdp_prog)
1499d3256efdSPaolo Abeni 		features |= NETIF_F_GRO;
1500dc224822SToshiaki Makita 
1501dc224822SToshiaki Makita 	return features;
1502dc224822SToshiaki Makita }
1503dc224822SToshiaki Makita 
1504d3256efdSPaolo Abeni static int veth_set_features(struct net_device *dev,
1505d3256efdSPaolo Abeni 			     netdev_features_t features)
1506d3256efdSPaolo Abeni {
1507d3256efdSPaolo Abeni 	netdev_features_t changed = features ^ dev->features;
1508d3256efdSPaolo Abeni 	struct veth_priv *priv = netdev_priv(dev);
1509d3256efdSPaolo Abeni 	int err;
1510d3256efdSPaolo Abeni 
1511d3256efdSPaolo Abeni 	if (!(changed & NETIF_F_GRO) || !(dev->flags & IFF_UP) || priv->_xdp_prog)
1512d3256efdSPaolo Abeni 		return 0;
1513d3256efdSPaolo Abeni 
1514d3256efdSPaolo Abeni 	if (features & NETIF_F_GRO) {
1515d3256efdSPaolo Abeni 		err = veth_napi_enable(dev);
1516d3256efdSPaolo Abeni 		if (err)
1517d3256efdSPaolo Abeni 			return err;
1518*fccca038SLorenzo Bianconi 
1519*fccca038SLorenzo Bianconi 		xdp_features_set_redirect_target(dev, true);
1520d3256efdSPaolo Abeni 	} else {
1521*fccca038SLorenzo Bianconi 		xdp_features_clear_redirect_target(dev);
1522d3256efdSPaolo Abeni 		veth_napi_del(dev);
1523d3256efdSPaolo Abeni 	}
1524d3256efdSPaolo Abeni 	return 0;
1525d3256efdSPaolo Abeni }
1526d3256efdSPaolo Abeni 
1527163e5292SPaolo Abeni static void veth_set_rx_headroom(struct net_device *dev, int new_hr)
1528163e5292SPaolo Abeni {
1529163e5292SPaolo Abeni 	struct veth_priv *peer_priv, *priv = netdev_priv(dev);
1530163e5292SPaolo Abeni 	struct net_device *peer;
1531163e5292SPaolo Abeni 
1532163e5292SPaolo Abeni 	if (new_hr < 0)
1533163e5292SPaolo Abeni 		new_hr = 0;
1534163e5292SPaolo Abeni 
1535163e5292SPaolo Abeni 	rcu_read_lock();
1536163e5292SPaolo Abeni 	peer = rcu_dereference(priv->peer);
1537163e5292SPaolo Abeni 	if (unlikely(!peer))
1538163e5292SPaolo Abeni 		goto out;
1539163e5292SPaolo Abeni 
1540163e5292SPaolo Abeni 	peer_priv = netdev_priv(peer);
1541163e5292SPaolo Abeni 	priv->requested_headroom = new_hr;
1542163e5292SPaolo Abeni 	new_hr = max(priv->requested_headroom, peer_priv->requested_headroom);
1543163e5292SPaolo Abeni 	dev->needed_headroom = new_hr;
1544163e5292SPaolo Abeni 	peer->needed_headroom = new_hr;
1545163e5292SPaolo Abeni 
1546163e5292SPaolo Abeni out:
1547163e5292SPaolo Abeni 	rcu_read_unlock();
1548163e5292SPaolo Abeni }
1549163e5292SPaolo Abeni 
1550948d4f21SToshiaki Makita static int veth_xdp_set(struct net_device *dev, struct bpf_prog *prog,
1551948d4f21SToshiaki Makita 			struct netlink_ext_ack *extack)
1552948d4f21SToshiaki Makita {
1553948d4f21SToshiaki Makita 	struct veth_priv *priv = netdev_priv(dev);
1554948d4f21SToshiaki Makita 	struct bpf_prog *old_prog;
1555948d4f21SToshiaki Makita 	struct net_device *peer;
1556dc224822SToshiaki Makita 	unsigned int max_mtu;
1557948d4f21SToshiaki Makita 	int err;
1558948d4f21SToshiaki Makita 
1559948d4f21SToshiaki Makita 	old_prog = priv->_xdp_prog;
1560948d4f21SToshiaki Makita 	priv->_xdp_prog = prog;
1561948d4f21SToshiaki Makita 	peer = rtnl_dereference(priv->peer);
1562948d4f21SToshiaki Makita 
1563948d4f21SToshiaki Makita 	if (prog) {
1564948d4f21SToshiaki Makita 		if (!peer) {
1565948d4f21SToshiaki Makita 			NL_SET_ERR_MSG_MOD(extack, "Cannot set XDP when peer is detached");
1566948d4f21SToshiaki Makita 			err = -ENOTCONN;
1567948d4f21SToshiaki Makita 			goto err;
1568948d4f21SToshiaki Makita 		}
1569948d4f21SToshiaki Makita 
15707cda76d8SLorenzo Bianconi 		max_mtu = SKB_WITH_OVERHEAD(PAGE_SIZE - VETH_XDP_HEADROOM) -
15717cda76d8SLorenzo Bianconi 			  peer->hard_header_len;
15727cda76d8SLorenzo Bianconi 		/* Allow increasing the max_mtu if the program supports
15737cda76d8SLorenzo Bianconi 		 * XDP fragments.
15747cda76d8SLorenzo Bianconi 		 */
15757cda76d8SLorenzo Bianconi 		if (prog->aux->xdp_has_frags)
15767cda76d8SLorenzo Bianconi 			max_mtu += PAGE_SIZE * MAX_SKB_FRAGS;
15777cda76d8SLorenzo Bianconi 
1578dc224822SToshiaki Makita 		if (peer->mtu > max_mtu) {
1579dc224822SToshiaki Makita 			NL_SET_ERR_MSG_MOD(extack, "Peer MTU is too large to set XDP");
1580dc224822SToshiaki Makita 			err = -ERANGE;
1581dc224822SToshiaki Makita 			goto err;
1582dc224822SToshiaki Makita 		}
1583dc224822SToshiaki Makita 
1584638264dcSToshiaki Makita 		if (dev->real_num_rx_queues < peer->real_num_tx_queues) {
1585638264dcSToshiaki Makita 			NL_SET_ERR_MSG_MOD(extack, "XDP expects number of rx queues not less than peer tx queues");
1586638264dcSToshiaki Makita 			err = -ENOSPC;
1587638264dcSToshiaki Makita 			goto err;
1588638264dcSToshiaki Makita 		}
1589638264dcSToshiaki Makita 
1590948d4f21SToshiaki Makita 		if (dev->flags & IFF_UP) {
1591948d4f21SToshiaki Makita 			err = veth_enable_xdp(dev);
1592948d4f21SToshiaki Makita 			if (err) {
1593948d4f21SToshiaki Makita 				NL_SET_ERR_MSG_MOD(extack, "Setup for XDP failed");
1594948d4f21SToshiaki Makita 				goto err;
1595948d4f21SToshiaki Makita 			}
1596948d4f21SToshiaki Makita 		}
1597dc224822SToshiaki Makita 
1598dc224822SToshiaki Makita 		if (!old_prog) {
1599dc224822SToshiaki Makita 			peer->hw_features &= ~NETIF_F_GSO_SOFTWARE;
1600dc224822SToshiaki Makita 			peer->max_mtu = max_mtu;
1601dc224822SToshiaki Makita 		}
1602*fccca038SLorenzo Bianconi 
1603*fccca038SLorenzo Bianconi 		xdp_features_set_redirect_target(dev, true);
1604948d4f21SToshiaki Makita 	}
1605948d4f21SToshiaki Makita 
1606948d4f21SToshiaki Makita 	if (old_prog) {
1607dc224822SToshiaki Makita 		if (!prog) {
1608*fccca038SLorenzo Bianconi 			if (!veth_gro_requested(dev))
1609*fccca038SLorenzo Bianconi 				xdp_features_clear_redirect_target(dev);
1610*fccca038SLorenzo Bianconi 
1611dc224822SToshiaki Makita 			if (dev->flags & IFF_UP)
1612948d4f21SToshiaki Makita 				veth_disable_xdp(dev);
1613dc224822SToshiaki Makita 
1614dc224822SToshiaki Makita 			if (peer) {
1615dc224822SToshiaki Makita 				peer->hw_features |= NETIF_F_GSO_SOFTWARE;
1616dc224822SToshiaki Makita 				peer->max_mtu = ETH_MAX_MTU;
1617dc224822SToshiaki Makita 			}
1618dc224822SToshiaki Makita 		}
1619948d4f21SToshiaki Makita 		bpf_prog_put(old_prog);
1620948d4f21SToshiaki Makita 	}
1621948d4f21SToshiaki Makita 
1622dc224822SToshiaki Makita 	if ((!!old_prog ^ !!prog) && peer)
1623dc224822SToshiaki Makita 		netdev_update_features(peer);
1624dc224822SToshiaki Makita 
1625948d4f21SToshiaki Makita 	return 0;
1626948d4f21SToshiaki Makita err:
1627948d4f21SToshiaki Makita 	priv->_xdp_prog = old_prog;
1628948d4f21SToshiaki Makita 
1629948d4f21SToshiaki Makita 	return err;
1630948d4f21SToshiaki Makita }
1631948d4f21SToshiaki Makita 
1632948d4f21SToshiaki Makita static int veth_xdp(struct net_device *dev, struct netdev_bpf *xdp)
1633948d4f21SToshiaki Makita {
1634948d4f21SToshiaki Makita 	switch (xdp->command) {
1635948d4f21SToshiaki Makita 	case XDP_SETUP_PROG:
1636948d4f21SToshiaki Makita 		return veth_xdp_set(dev, xdp->prog, xdp->extack);
1637948d4f21SToshiaki Makita 	default:
1638948d4f21SToshiaki Makita 		return -EINVAL;
1639948d4f21SToshiaki Makita 	}
1640948d4f21SToshiaki Makita }
1641948d4f21SToshiaki Makita 
1642306531f0SStanislav Fomichev static int veth_xdp_rx_timestamp(const struct xdp_md *ctx, u64 *timestamp)
1643306531f0SStanislav Fomichev {
1644306531f0SStanislav Fomichev 	struct veth_xdp_buff *_ctx = (void *)ctx;
1645306531f0SStanislav Fomichev 
1646306531f0SStanislav Fomichev 	if (!_ctx->skb)
1647306531f0SStanislav Fomichev 		return -EOPNOTSUPP;
1648306531f0SStanislav Fomichev 
1649306531f0SStanislav Fomichev 	*timestamp = skb_hwtstamps(_ctx->skb)->hwtstamp;
1650306531f0SStanislav Fomichev 	return 0;
1651306531f0SStanislav Fomichev }
1652306531f0SStanislav Fomichev 
1653306531f0SStanislav Fomichev static int veth_xdp_rx_hash(const struct xdp_md *ctx, u32 *hash)
1654306531f0SStanislav Fomichev {
1655306531f0SStanislav Fomichev 	struct veth_xdp_buff *_ctx = (void *)ctx;
1656306531f0SStanislav Fomichev 
1657306531f0SStanislav Fomichev 	if (!_ctx->skb)
1658306531f0SStanislav Fomichev 		return -EOPNOTSUPP;
1659306531f0SStanislav Fomichev 
1660306531f0SStanislav Fomichev 	*hash = skb_get_hash(_ctx->skb);
1661306531f0SStanislav Fomichev 	return 0;
1662306531f0SStanislav Fomichev }
1663306531f0SStanislav Fomichev 
16644456e7bdSStephen Hemminger static const struct net_device_ops veth_netdev_ops = {
16654456e7bdSStephen Hemminger 	.ndo_init            = veth_dev_init,
16664456e7bdSStephen Hemminger 	.ndo_open            = veth_open,
16672cf48a10SEric W. Biederman 	.ndo_stop            = veth_close,
166800829823SStephen Hemminger 	.ndo_start_xmit      = veth_xmit,
16696311cc44Sstephen hemminger 	.ndo_get_stats64     = veth_get_stats64,
16705c70ef85SGao feng 	.ndo_set_rx_mode     = veth_set_multicast_list,
1671ee923623SDaniel Lezcano 	.ndo_set_mac_address = eth_mac_addr,
1672bb446c19SWANG Cong #ifdef CONFIG_NET_POLL_CONTROLLER
1673bb446c19SWANG Cong 	.ndo_poll_controller	= veth_poll_controller,
1674bb446c19SWANG Cong #endif
1675a45253bfSNicolas Dichtel 	.ndo_get_iflink		= veth_get_iflink,
1676dc224822SToshiaki Makita 	.ndo_fix_features	= veth_fix_features,
1677d3256efdSPaolo Abeni 	.ndo_set_features	= veth_set_features,
16781a04a821SToshiaki Makita 	.ndo_features_check	= passthru_features_check,
1679163e5292SPaolo Abeni 	.ndo_set_rx_headroom	= veth_set_rx_headroom,
1680948d4f21SToshiaki Makita 	.ndo_bpf		= veth_xdp,
16819152cff0SLorenzo Bianconi 	.ndo_xdp_xmit		= veth_ndo_xdp_xmit,
16829aa1206eSDaniel Borkmann 	.ndo_get_peer_dev	= veth_peer_dev,
16834456e7bdSStephen Hemminger };
16844456e7bdSStephen Hemminger 
1685306531f0SStanislav Fomichev static const struct xdp_metadata_ops veth_xdp_metadata_ops = {
1686306531f0SStanislav Fomichev 	.xmo_rx_timestamp		= veth_xdp_rx_timestamp,
1687306531f0SStanislav Fomichev 	.xmo_rx_hash			= veth_xdp_rx_hash,
1688306531f0SStanislav Fomichev };
1689306531f0SStanislav Fomichev 
1690732912d7SAlexander Duyck #define VETH_FEATURES (NETIF_F_SG | NETIF_F_FRAGLIST | NETIF_F_HW_CSUM | \
1691c80fafbbSXin Long 		       NETIF_F_RXCSUM | NETIF_F_SCTP_CRC | NETIF_F_HIGHDMA | \
1692732912d7SAlexander Duyck 		       NETIF_F_GSO_SOFTWARE | NETIF_F_GSO_ENCAP_ALL | \
169328d2b136SPatrick McHardy 		       NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_CTAG_RX | \
169428d2b136SPatrick McHardy 		       NETIF_F_HW_VLAN_STAG_TX | NETIF_F_HW_VLAN_STAG_RX )
16958093315aSEric Dumazet 
1696e314dbdcSPavel Emelyanov static void veth_setup(struct net_device *dev)
1697e314dbdcSPavel Emelyanov {
1698e314dbdcSPavel Emelyanov 	ether_setup(dev);
1699e314dbdcSPavel Emelyanov 
1700550fd08cSNeil Horman 	dev->priv_flags &= ~IFF_TX_SKB_SHARING;
170123ea5a96SHannes Frederic Sowa 	dev->priv_flags |= IFF_LIVE_ADDR_CHANGE;
170202f01ec1SPhil Sutter 	dev->priv_flags |= IFF_NO_QUEUE;
1703163e5292SPaolo Abeni 	dev->priv_flags |= IFF_PHONY_HEADROOM;
1704550fd08cSNeil Horman 
17054456e7bdSStephen Hemminger 	dev->netdev_ops = &veth_netdev_ops;
1706306531f0SStanislav Fomichev 	dev->xdp_metadata_ops = &veth_xdp_metadata_ops;
1707e314dbdcSPavel Emelyanov 	dev->ethtool_ops = &veth_ethtool_ops;
1708e314dbdcSPavel Emelyanov 	dev->features |= NETIF_F_LLTX;
17098093315aSEric Dumazet 	dev->features |= VETH_FEATURES;
17108d0d21f4SToshiaki Makita 	dev->vlan_features = dev->features &
17113f8c707bSVlad Yasevich 			     ~(NETIF_F_HW_VLAN_CTAG_TX |
17123f8c707bSVlad Yasevich 			       NETIF_F_HW_VLAN_STAG_TX |
17133f8c707bSVlad Yasevich 			       NETIF_F_HW_VLAN_CTAG_RX |
17143f8c707bSVlad Yasevich 			       NETIF_F_HW_VLAN_STAG_RX);
1715cf124db5SDavid S. Miller 	dev->needs_free_netdev = true;
1716cf124db5SDavid S. Miller 	dev->priv_destructor = veth_dev_free;
171791572088SJarod Wilson 	dev->max_mtu = ETH_MAX_MTU;
1718a2c725faSMichał Mirosław 
17198093315aSEric Dumazet 	dev->hw_features = VETH_FEATURES;
172082d81898SEric Dumazet 	dev->hw_enc_features = VETH_FEATURES;
1721607fca9aSDavid Ahern 	dev->mpls_features = NETIF_F_HW_CSUM | NETIF_F_GSO_SOFTWARE;
1722d406099dSEric Dumazet 	netif_set_tso_max_size(dev, GSO_MAX_SIZE);
1723e314dbdcSPavel Emelyanov }
1724e314dbdcSPavel Emelyanov 
1725e314dbdcSPavel Emelyanov /*
1726e314dbdcSPavel Emelyanov  * netlink interface
1727e314dbdcSPavel Emelyanov  */
1728e314dbdcSPavel Emelyanov 
1729a8b8a889SMatthias Schiffer static int veth_validate(struct nlattr *tb[], struct nlattr *data[],
1730a8b8a889SMatthias Schiffer 			 struct netlink_ext_ack *extack)
1731e314dbdcSPavel Emelyanov {
1732e314dbdcSPavel Emelyanov 	if (tb[IFLA_ADDRESS]) {
1733e314dbdcSPavel Emelyanov 		if (nla_len(tb[IFLA_ADDRESS]) != ETH_ALEN)
1734e314dbdcSPavel Emelyanov 			return -EINVAL;
1735e314dbdcSPavel Emelyanov 		if (!is_valid_ether_addr(nla_data(tb[IFLA_ADDRESS])))
1736e314dbdcSPavel Emelyanov 			return -EADDRNOTAVAIL;
1737e314dbdcSPavel Emelyanov 	}
173838d40815SEric Biederman 	if (tb[IFLA_MTU]) {
173938d40815SEric Biederman 		if (!is_valid_veth_mtu(nla_get_u32(tb[IFLA_MTU])))
174038d40815SEric Biederman 			return -EINVAL;
174138d40815SEric Biederman 	}
1742e314dbdcSPavel Emelyanov 	return 0;
1743e314dbdcSPavel Emelyanov }
1744e314dbdcSPavel Emelyanov 
1745e314dbdcSPavel Emelyanov static struct rtnl_link_ops veth_link_ops;
1746e314dbdcSPavel Emelyanov 
1747d3256efdSPaolo Abeni static void veth_disable_gro(struct net_device *dev)
1748d3256efdSPaolo Abeni {
1749d3256efdSPaolo Abeni 	dev->features &= ~NETIF_F_GRO;
1750d3256efdSPaolo Abeni 	dev->wanted_features &= ~NETIF_F_GRO;
1751d3256efdSPaolo Abeni 	netdev_update_features(dev);
1752d3256efdSPaolo Abeni }
1753d3256efdSPaolo Abeni 
17549d3684c2SPaolo Abeni static int veth_init_queues(struct net_device *dev, struct nlattr *tb[])
17559d3684c2SPaolo Abeni {
17569d3684c2SPaolo Abeni 	int err;
17579d3684c2SPaolo Abeni 
17589d3684c2SPaolo Abeni 	if (!tb[IFLA_NUM_TX_QUEUES] && dev->num_tx_queues > 1) {
17599d3684c2SPaolo Abeni 		err = netif_set_real_num_tx_queues(dev, 1);
17609d3684c2SPaolo Abeni 		if (err)
17619d3684c2SPaolo Abeni 			return err;
17629d3684c2SPaolo Abeni 	}
17639d3684c2SPaolo Abeni 	if (!tb[IFLA_NUM_RX_QUEUES] && dev->num_rx_queues > 1) {
17649d3684c2SPaolo Abeni 		err = netif_set_real_num_rx_queues(dev, 1);
17659d3684c2SPaolo Abeni 		if (err)
17669d3684c2SPaolo Abeni 			return err;
17679d3684c2SPaolo Abeni 	}
17689d3684c2SPaolo Abeni 	return 0;
17699d3684c2SPaolo Abeni }
17709d3684c2SPaolo Abeni 
177181adee47SEric W. Biederman static int veth_newlink(struct net *src_net, struct net_device *dev,
17727a3f4a18SMatthias Schiffer 			struct nlattr *tb[], struct nlattr *data[],
17737a3f4a18SMatthias Schiffer 			struct netlink_ext_ack *extack)
1774e314dbdcSPavel Emelyanov {
17757797b93bSToshiaki Makita 	int err;
1776e314dbdcSPavel Emelyanov 	struct net_device *peer;
1777e314dbdcSPavel Emelyanov 	struct veth_priv *priv;
1778e314dbdcSPavel Emelyanov 	char ifname[IFNAMSIZ];
1779e314dbdcSPavel Emelyanov 	struct nlattr *peer_tb[IFLA_MAX + 1], **tbp;
17805517750fSTom Gundersen 	unsigned char name_assign_type;
17813729d502SPatrick McHardy 	struct ifinfomsg *ifmp;
178281adee47SEric W. Biederman 	struct net *net;
1783e314dbdcSPavel Emelyanov 
1784e314dbdcSPavel Emelyanov 	/*
1785e314dbdcSPavel Emelyanov 	 * create and register peer first
1786e314dbdcSPavel Emelyanov 	 */
1787e314dbdcSPavel Emelyanov 	if (data != NULL && data[VETH_INFO_PEER] != NULL) {
1788e314dbdcSPavel Emelyanov 		struct nlattr *nla_peer;
1789e314dbdcSPavel Emelyanov 
1790e314dbdcSPavel Emelyanov 		nla_peer = data[VETH_INFO_PEER];
17913729d502SPatrick McHardy 		ifmp = nla_data(nla_peer);
1792f7b12606SJiri Pirko 		err = rtnl_nla_parse_ifla(peer_tb,
1793e314dbdcSPavel Emelyanov 					  nla_data(nla_peer) + sizeof(struct ifinfomsg),
1794fceb6435SJohannes Berg 					  nla_len(nla_peer) - sizeof(struct ifinfomsg),
1795fceb6435SJohannes Berg 					  NULL);
1796e314dbdcSPavel Emelyanov 		if (err < 0)
1797e314dbdcSPavel Emelyanov 			return err;
1798e314dbdcSPavel Emelyanov 
1799a8b8a889SMatthias Schiffer 		err = veth_validate(peer_tb, NULL, extack);
1800e314dbdcSPavel Emelyanov 		if (err < 0)
1801e314dbdcSPavel Emelyanov 			return err;
1802e314dbdcSPavel Emelyanov 
1803e314dbdcSPavel Emelyanov 		tbp = peer_tb;
18043729d502SPatrick McHardy 	} else {
18053729d502SPatrick McHardy 		ifmp = NULL;
1806e314dbdcSPavel Emelyanov 		tbp = tb;
18073729d502SPatrick McHardy 	}
1808e314dbdcSPavel Emelyanov 
1809191cdb38SSerhey Popovych 	if (ifmp && tbp[IFLA_IFNAME]) {
1810872f6903SFrancis Laniel 		nla_strscpy(ifname, tbp[IFLA_IFNAME], IFNAMSIZ);
18115517750fSTom Gundersen 		name_assign_type = NET_NAME_USER;
18125517750fSTom Gundersen 	} else {
1813e314dbdcSPavel Emelyanov 		snprintf(ifname, IFNAMSIZ, DRV_NAME "%%d");
18145517750fSTom Gundersen 		name_assign_type = NET_NAME_ENUM;
18155517750fSTom Gundersen 	}
1816e314dbdcSPavel Emelyanov 
181781adee47SEric W. Biederman 	net = rtnl_link_get_net(src_net, tbp);
181881adee47SEric W. Biederman 	if (IS_ERR(net))
181981adee47SEric W. Biederman 		return PTR_ERR(net);
182081adee47SEric W. Biederman 
18215517750fSTom Gundersen 	peer = rtnl_create_link(net, ifname, name_assign_type,
1822d0522f1cSDavid Ahern 				&veth_link_ops, tbp, extack);
182381adee47SEric W. Biederman 	if (IS_ERR(peer)) {
182481adee47SEric W. Biederman 		put_net(net);
1825e314dbdcSPavel Emelyanov 		return PTR_ERR(peer);
182681adee47SEric W. Biederman 	}
1827e314dbdcSPavel Emelyanov 
1828191cdb38SSerhey Popovych 	if (!ifmp || !tbp[IFLA_ADDRESS])
1829f2cedb63SDanny Kukawka 		eth_hw_addr_random(peer);
1830e314dbdcSPavel Emelyanov 
1831e6f8f1a7SPavel Emelyanov 	if (ifmp && (dev->ifindex != 0))
1832e6f8f1a7SPavel Emelyanov 		peer->ifindex = ifmp->ifi_index;
1833e6f8f1a7SPavel Emelyanov 
18346df6398fSJakub Kicinski 	netif_inherit_tso_max(peer, dev);
183572d24955SStephen Hemminger 
1836e314dbdcSPavel Emelyanov 	err = register_netdevice(peer);
183781adee47SEric W. Biederman 	put_net(net);
183881adee47SEric W. Biederman 	net = NULL;
1839e314dbdcSPavel Emelyanov 	if (err < 0)
1840e314dbdcSPavel Emelyanov 		goto err_register_peer;
1841e314dbdcSPavel Emelyanov 
1842d3256efdSPaolo Abeni 	/* keep GRO disabled by default to be consistent with the established
1843d3256efdSPaolo Abeni 	 * veth behavior
1844d3256efdSPaolo Abeni 	 */
1845d3256efdSPaolo Abeni 	veth_disable_gro(peer);
1846e314dbdcSPavel Emelyanov 	netif_carrier_off(peer);
1847e314dbdcSPavel Emelyanov 
18481d997f10SHangbin Liu 	err = rtnl_configure_link(peer, ifmp, 0, NULL);
18493729d502SPatrick McHardy 	if (err < 0)
18503729d502SPatrick McHardy 		goto err_configure_peer;
18513729d502SPatrick McHardy 
1852e314dbdcSPavel Emelyanov 	/*
1853e314dbdcSPavel Emelyanov 	 * register dev last
1854e314dbdcSPavel Emelyanov 	 *
1855e314dbdcSPavel Emelyanov 	 * note, that since we've registered new device the dev's name
1856e314dbdcSPavel Emelyanov 	 * should be re-allocated
1857e314dbdcSPavel Emelyanov 	 */
1858e314dbdcSPavel Emelyanov 
1859e314dbdcSPavel Emelyanov 	if (tb[IFLA_ADDRESS] == NULL)
1860f2cedb63SDanny Kukawka 		eth_hw_addr_random(dev);
1861e314dbdcSPavel Emelyanov 
18626c8c4446SJiri Pirko 	if (tb[IFLA_IFNAME])
1863872f6903SFrancis Laniel 		nla_strscpy(dev->name, tb[IFLA_IFNAME], IFNAMSIZ);
18646c8c4446SJiri Pirko 	else
18656c8c4446SJiri Pirko 		snprintf(dev->name, IFNAMSIZ, DRV_NAME "%%d");
18666c8c4446SJiri Pirko 
1867e314dbdcSPavel Emelyanov 	err = register_netdevice(dev);
1868e314dbdcSPavel Emelyanov 	if (err < 0)
1869e314dbdcSPavel Emelyanov 		goto err_register_dev;
1870e314dbdcSPavel Emelyanov 
1871e314dbdcSPavel Emelyanov 	netif_carrier_off(dev);
1872e314dbdcSPavel Emelyanov 
1873e314dbdcSPavel Emelyanov 	/*
1874e314dbdcSPavel Emelyanov 	 * tie the deviced together
1875e314dbdcSPavel Emelyanov 	 */
1876e314dbdcSPavel Emelyanov 
1877e314dbdcSPavel Emelyanov 	priv = netdev_priv(dev);
1878d0e2c55eSEric Dumazet 	rcu_assign_pointer(priv->peer, peer);
18799d3684c2SPaolo Abeni 	err = veth_init_queues(dev, tb);
18809d3684c2SPaolo Abeni 	if (err)
18819d3684c2SPaolo Abeni 		goto err_queues;
1882e314dbdcSPavel Emelyanov 
1883e314dbdcSPavel Emelyanov 	priv = netdev_priv(peer);
1884d0e2c55eSEric Dumazet 	rcu_assign_pointer(priv->peer, dev);
18859d3684c2SPaolo Abeni 	err = veth_init_queues(peer, tb);
18869d3684c2SPaolo Abeni 	if (err)
18879d3684c2SPaolo Abeni 		goto err_queues;
1888948d4f21SToshiaki Makita 
1889d3256efdSPaolo Abeni 	veth_disable_gro(dev);
1890*fccca038SLorenzo Bianconi 	/* update XDP supported features */
1891*fccca038SLorenzo Bianconi 	veth_set_xdp_features(dev);
1892*fccca038SLorenzo Bianconi 	veth_set_xdp_features(peer);
1893*fccca038SLorenzo Bianconi 
1894e314dbdcSPavel Emelyanov 	return 0;
1895e314dbdcSPavel Emelyanov 
18969d3684c2SPaolo Abeni err_queues:
18979d3684c2SPaolo Abeni 	unregister_netdevice(dev);
1898e314dbdcSPavel Emelyanov err_register_dev:
1899e314dbdcSPavel Emelyanov 	/* nothing to do */
19003729d502SPatrick McHardy err_configure_peer:
1901e314dbdcSPavel Emelyanov 	unregister_netdevice(peer);
1902e314dbdcSPavel Emelyanov 	return err;
1903e314dbdcSPavel Emelyanov 
1904e314dbdcSPavel Emelyanov err_register_peer:
1905e314dbdcSPavel Emelyanov 	free_netdev(peer);
1906e314dbdcSPavel Emelyanov 	return err;
1907e314dbdcSPavel Emelyanov }
1908e314dbdcSPavel Emelyanov 
190923289a37SEric Dumazet static void veth_dellink(struct net_device *dev, struct list_head *head)
1910e314dbdcSPavel Emelyanov {
1911e314dbdcSPavel Emelyanov 	struct veth_priv *priv;
1912e314dbdcSPavel Emelyanov 	struct net_device *peer;
1913e314dbdcSPavel Emelyanov 
1914e314dbdcSPavel Emelyanov 	priv = netdev_priv(dev);
1915d0e2c55eSEric Dumazet 	peer = rtnl_dereference(priv->peer);
1916d0e2c55eSEric Dumazet 
1917d0e2c55eSEric Dumazet 	/* Note : dellink() is called from default_device_exit_batch(),
1918d0e2c55eSEric Dumazet 	 * before a rcu_synchronize() point. The devices are guaranteed
1919d0e2c55eSEric Dumazet 	 * not being freed before one RCU grace period.
1920d0e2c55eSEric Dumazet 	 */
1921d0e2c55eSEric Dumazet 	RCU_INIT_POINTER(priv->peer, NULL);
1922f45a5c26SEric Dumazet 	unregister_netdevice_queue(dev, head);
1923d0e2c55eSEric Dumazet 
1924f45a5c26SEric Dumazet 	if (peer) {
1925d0e2c55eSEric Dumazet 		priv = netdev_priv(peer);
1926d0e2c55eSEric Dumazet 		RCU_INIT_POINTER(priv->peer, NULL);
192724540535SEric Dumazet 		unregister_netdevice_queue(peer, head);
1928e314dbdcSPavel Emelyanov 	}
1929f45a5c26SEric Dumazet }
1930e314dbdcSPavel Emelyanov 
193123711438SThomas Graf static const struct nla_policy veth_policy[VETH_INFO_MAX + 1] = {
193223711438SThomas Graf 	[VETH_INFO_PEER]	= { .len = sizeof(struct ifinfomsg) },
193323711438SThomas Graf };
1934e314dbdcSPavel Emelyanov 
1935e5f4e7b9SNicolas Dichtel static struct net *veth_get_link_net(const struct net_device *dev)
1936e5f4e7b9SNicolas Dichtel {
1937e5f4e7b9SNicolas Dichtel 	struct veth_priv *priv = netdev_priv(dev);
1938e5f4e7b9SNicolas Dichtel 	struct net_device *peer = rtnl_dereference(priv->peer);
1939e5f4e7b9SNicolas Dichtel 
1940e5f4e7b9SNicolas Dichtel 	return peer ? dev_net(peer) : dev_net(dev);
1941e5f4e7b9SNicolas Dichtel }
1942e5f4e7b9SNicolas Dichtel 
19439d3684c2SPaolo Abeni static unsigned int veth_get_num_queues(void)
19449d3684c2SPaolo Abeni {
19459d3684c2SPaolo Abeni 	/* enforce the same queue limit as rtnl_create_link */
19469d3684c2SPaolo Abeni 	int queues = num_possible_cpus();
19479d3684c2SPaolo Abeni 
19489d3684c2SPaolo Abeni 	if (queues > 4096)
19499d3684c2SPaolo Abeni 		queues = 4096;
19509d3684c2SPaolo Abeni 	return queues;
19519d3684c2SPaolo Abeni }
19529d3684c2SPaolo Abeni 
1953e314dbdcSPavel Emelyanov static struct rtnl_link_ops veth_link_ops = {
1954e314dbdcSPavel Emelyanov 	.kind		= DRV_NAME,
1955e314dbdcSPavel Emelyanov 	.priv_size	= sizeof(struct veth_priv),
1956e314dbdcSPavel Emelyanov 	.setup		= veth_setup,
1957e314dbdcSPavel Emelyanov 	.validate	= veth_validate,
1958e314dbdcSPavel Emelyanov 	.newlink	= veth_newlink,
1959e314dbdcSPavel Emelyanov 	.dellink	= veth_dellink,
1960e314dbdcSPavel Emelyanov 	.policy		= veth_policy,
1961e314dbdcSPavel Emelyanov 	.maxtype	= VETH_INFO_MAX,
1962e5f4e7b9SNicolas Dichtel 	.get_link_net	= veth_get_link_net,
19639d3684c2SPaolo Abeni 	.get_num_tx_queues	= veth_get_num_queues,
19649d3684c2SPaolo Abeni 	.get_num_rx_queues	= veth_get_num_queues,
1965e314dbdcSPavel Emelyanov };
1966e314dbdcSPavel Emelyanov 
1967e314dbdcSPavel Emelyanov /*
1968e314dbdcSPavel Emelyanov  * init/fini
1969e314dbdcSPavel Emelyanov  */
1970e314dbdcSPavel Emelyanov 
1971e314dbdcSPavel Emelyanov static __init int veth_init(void)
1972e314dbdcSPavel Emelyanov {
1973e314dbdcSPavel Emelyanov 	return rtnl_link_register(&veth_link_ops);
1974e314dbdcSPavel Emelyanov }
1975e314dbdcSPavel Emelyanov 
1976e314dbdcSPavel Emelyanov static __exit void veth_exit(void)
1977e314dbdcSPavel Emelyanov {
197868365458SPatrick McHardy 	rtnl_link_unregister(&veth_link_ops);
1979e314dbdcSPavel Emelyanov }
1980e314dbdcSPavel Emelyanov 
1981e314dbdcSPavel Emelyanov module_init(veth_init);
1982e314dbdcSPavel Emelyanov module_exit(veth_exit);
1983e314dbdcSPavel Emelyanov 
1984e314dbdcSPavel Emelyanov MODULE_DESCRIPTION("Virtual Ethernet Tunnel");
1985e314dbdcSPavel Emelyanov MODULE_LICENSE("GPL v2");
1986e314dbdcSPavel Emelyanov MODULE_ALIAS_RTNL_LINK(DRV_NAME);
1987