xref: /openbmc/linux/drivers/net/veth.c (revision 732912d7)
1e314dbdcSPavel Emelyanov /*
2e314dbdcSPavel Emelyanov  *  drivers/net/veth.c
3e314dbdcSPavel Emelyanov  *
4e314dbdcSPavel Emelyanov  *  Copyright (C) 2007 OpenVZ http://openvz.org, SWsoft Inc
5e314dbdcSPavel Emelyanov  *
6e314dbdcSPavel Emelyanov  * Author: Pavel Emelianov <xemul@openvz.org>
7e314dbdcSPavel Emelyanov  * Ethtool interface from: Eric W. Biederman <ebiederm@xmission.com>
8e314dbdcSPavel Emelyanov  *
9e314dbdcSPavel Emelyanov  */
10e314dbdcSPavel Emelyanov 
11e314dbdcSPavel Emelyanov #include <linux/netdevice.h>
125a0e3ad6STejun Heo #include <linux/slab.h>
13e314dbdcSPavel Emelyanov #include <linux/ethtool.h>
14e314dbdcSPavel Emelyanov #include <linux/etherdevice.h>
15cf05c700SEric Dumazet #include <linux/u64_stats_sync.h>
16e314dbdcSPavel Emelyanov 
17f7b12606SJiri Pirko #include <net/rtnetlink.h>
18e314dbdcSPavel Emelyanov #include <net/dst.h>
19e314dbdcSPavel Emelyanov #include <net/xfrm.h>
20ecef969eSStephen Hemminger #include <linux/veth.h>
219d9779e7SPaul Gortmaker #include <linux/module.h>
22e314dbdcSPavel Emelyanov 
23e314dbdcSPavel Emelyanov #define DRV_NAME	"veth"
24e314dbdcSPavel Emelyanov #define DRV_VERSION	"1.0"
25e314dbdcSPavel Emelyanov 
2638d40815SEric Biederman #define MIN_MTU 68		/* Min L3 MTU */
2738d40815SEric Biederman #define MAX_MTU 65535		/* Max L3 MTU (arbitrary) */
2838d40815SEric Biederman 
292681128fSEric Dumazet struct pcpu_vstats {
302681128fSEric Dumazet 	u64			packets;
312681128fSEric Dumazet 	u64			bytes;
32cf05c700SEric Dumazet 	struct u64_stats_sync	syncp;
33e314dbdcSPavel Emelyanov };
34e314dbdcSPavel Emelyanov 
35e314dbdcSPavel Emelyanov struct veth_priv {
36d0e2c55eSEric Dumazet 	struct net_device __rcu	*peer;
372681128fSEric Dumazet 	atomic64_t		dropped;
38163e5292SPaolo Abeni 	unsigned		requested_headroom;
39e314dbdcSPavel Emelyanov };
40e314dbdcSPavel Emelyanov 
41e314dbdcSPavel Emelyanov /*
42e314dbdcSPavel Emelyanov  * ethtool interface
43e314dbdcSPavel Emelyanov  */
44e314dbdcSPavel Emelyanov 
45e314dbdcSPavel Emelyanov static struct {
46e314dbdcSPavel Emelyanov 	const char string[ETH_GSTRING_LEN];
47e314dbdcSPavel Emelyanov } ethtool_stats_keys[] = {
48e314dbdcSPavel Emelyanov 	{ "peer_ifindex" },
49e314dbdcSPavel Emelyanov };
50e314dbdcSPavel Emelyanov 
51e314dbdcSPavel Emelyanov static int veth_get_settings(struct net_device *dev, struct ethtool_cmd *cmd)
52e314dbdcSPavel Emelyanov {
53e314dbdcSPavel Emelyanov 	cmd->supported		= 0;
54e314dbdcSPavel Emelyanov 	cmd->advertising	= 0;
5570739497SDavid Decotigny 	ethtool_cmd_speed_set(cmd, SPEED_10000);
56e314dbdcSPavel Emelyanov 	cmd->duplex		= DUPLEX_FULL;
57e314dbdcSPavel Emelyanov 	cmd->port		= PORT_TP;
58e314dbdcSPavel Emelyanov 	cmd->phy_address	= 0;
59e314dbdcSPavel Emelyanov 	cmd->transceiver	= XCVR_INTERNAL;
60e314dbdcSPavel Emelyanov 	cmd->autoneg		= AUTONEG_DISABLE;
61e314dbdcSPavel Emelyanov 	cmd->maxtxpkt		= 0;
62e314dbdcSPavel Emelyanov 	cmd->maxrxpkt		= 0;
63e314dbdcSPavel Emelyanov 	return 0;
64e314dbdcSPavel Emelyanov }
65e314dbdcSPavel Emelyanov 
66e314dbdcSPavel Emelyanov static void veth_get_drvinfo(struct net_device *dev, struct ethtool_drvinfo *info)
67e314dbdcSPavel Emelyanov {
6833a5ba14SRick Jones 	strlcpy(info->driver, DRV_NAME, sizeof(info->driver));
6933a5ba14SRick Jones 	strlcpy(info->version, DRV_VERSION, sizeof(info->version));
70e314dbdcSPavel Emelyanov }
71e314dbdcSPavel Emelyanov 
72e314dbdcSPavel Emelyanov static void veth_get_strings(struct net_device *dev, u32 stringset, u8 *buf)
73e314dbdcSPavel Emelyanov {
74e314dbdcSPavel Emelyanov 	switch(stringset) {
75e314dbdcSPavel Emelyanov 	case ETH_SS_STATS:
76e314dbdcSPavel Emelyanov 		memcpy(buf, &ethtool_stats_keys, sizeof(ethtool_stats_keys));
77e314dbdcSPavel Emelyanov 		break;
78e314dbdcSPavel Emelyanov 	}
79e314dbdcSPavel Emelyanov }
80e314dbdcSPavel Emelyanov 
81b9f2c044SJeff Garzik static int veth_get_sset_count(struct net_device *dev, int sset)
82e314dbdcSPavel Emelyanov {
83b9f2c044SJeff Garzik 	switch (sset) {
84b9f2c044SJeff Garzik 	case ETH_SS_STATS:
85e314dbdcSPavel Emelyanov 		return ARRAY_SIZE(ethtool_stats_keys);
86b9f2c044SJeff Garzik 	default:
87b9f2c044SJeff Garzik 		return -EOPNOTSUPP;
88b9f2c044SJeff Garzik 	}
89e314dbdcSPavel Emelyanov }
90e314dbdcSPavel Emelyanov 
91e314dbdcSPavel Emelyanov static void veth_get_ethtool_stats(struct net_device *dev,
92e314dbdcSPavel Emelyanov 		struct ethtool_stats *stats, u64 *data)
93e314dbdcSPavel Emelyanov {
94d0e2c55eSEric Dumazet 	struct veth_priv *priv = netdev_priv(dev);
95d0e2c55eSEric Dumazet 	struct net_device *peer = rtnl_dereference(priv->peer);
96e314dbdcSPavel Emelyanov 
97d0e2c55eSEric Dumazet 	data[0] = peer ? peer->ifindex : 0;
98e314dbdcSPavel Emelyanov }
99e314dbdcSPavel Emelyanov 
1000fc0b732SStephen Hemminger static const struct ethtool_ops veth_ethtool_ops = {
101e314dbdcSPavel Emelyanov 	.get_settings		= veth_get_settings,
102e314dbdcSPavel Emelyanov 	.get_drvinfo		= veth_get_drvinfo,
103e314dbdcSPavel Emelyanov 	.get_link		= ethtool_op_get_link,
104e314dbdcSPavel Emelyanov 	.get_strings		= veth_get_strings,
105b9f2c044SJeff Garzik 	.get_sset_count		= veth_get_sset_count,
106e314dbdcSPavel Emelyanov 	.get_ethtool_stats	= veth_get_ethtool_stats,
107e314dbdcSPavel Emelyanov };
108e314dbdcSPavel Emelyanov 
109424efe9cSStephen Hemminger static netdev_tx_t veth_xmit(struct sk_buff *skb, struct net_device *dev)
110e314dbdcSPavel Emelyanov {
1112681128fSEric Dumazet 	struct veth_priv *priv = netdev_priv(dev);
112d0e2c55eSEric Dumazet 	struct net_device *rcv;
1132681128fSEric Dumazet 	int length = skb->len;
114e314dbdcSPavel Emelyanov 
115d0e2c55eSEric Dumazet 	rcu_read_lock();
116d0e2c55eSEric Dumazet 	rcv = rcu_dereference(priv->peer);
117d0e2c55eSEric Dumazet 	if (unlikely(!rcv)) {
118d0e2c55eSEric Dumazet 		kfree_skb(skb);
119d0e2c55eSEric Dumazet 		goto drop;
120d0e2c55eSEric Dumazet 	}
121e314dbdcSPavel Emelyanov 
1222681128fSEric Dumazet 	if (likely(dev_forward_skb(rcv, skb) == NET_RX_SUCCESS)) {
1232681128fSEric Dumazet 		struct pcpu_vstats *stats = this_cpu_ptr(dev->vstats);
124e314dbdcSPavel Emelyanov 
125cf05c700SEric Dumazet 		u64_stats_update_begin(&stats->syncp);
1262681128fSEric Dumazet 		stats->bytes += length;
1272681128fSEric Dumazet 		stats->packets++;
128cf05c700SEric Dumazet 		u64_stats_update_end(&stats->syncp);
1292681128fSEric Dumazet 	} else {
130d0e2c55eSEric Dumazet drop:
1312681128fSEric Dumazet 		atomic64_inc(&priv->dropped);
1322681128fSEric Dumazet 	}
133d0e2c55eSEric Dumazet 	rcu_read_unlock();
1346ed10654SPatrick McHardy 	return NETDEV_TX_OK;
135e314dbdcSPavel Emelyanov }
136e314dbdcSPavel Emelyanov 
137e314dbdcSPavel Emelyanov /*
138e314dbdcSPavel Emelyanov  * general routines
139e314dbdcSPavel Emelyanov  */
140e314dbdcSPavel Emelyanov 
1412681128fSEric Dumazet static u64 veth_stats_one(struct pcpu_vstats *result, struct net_device *dev)
142e314dbdcSPavel Emelyanov {
143cf05c700SEric Dumazet 	struct veth_priv *priv = netdev_priv(dev);
14411687a10SDavid S. Miller 	int cpu;
14511687a10SDavid S. Miller 
1462681128fSEric Dumazet 	result->packets = 0;
1472681128fSEric Dumazet 	result->bytes = 0;
1482b1c8b0fSEric Dumazet 	for_each_possible_cpu(cpu) {
1492681128fSEric Dumazet 		struct pcpu_vstats *stats = per_cpu_ptr(dev->vstats, cpu);
1502681128fSEric Dumazet 		u64 packets, bytes;
151cf05c700SEric Dumazet 		unsigned int start;
152e314dbdcSPavel Emelyanov 
153cf05c700SEric Dumazet 		do {
15457a7744eSEric W. Biederman 			start = u64_stats_fetch_begin_irq(&stats->syncp);
1552681128fSEric Dumazet 			packets = stats->packets;
1562681128fSEric Dumazet 			bytes = stats->bytes;
15757a7744eSEric W. Biederman 		} while (u64_stats_fetch_retry_irq(&stats->syncp, start));
1582681128fSEric Dumazet 		result->packets += packets;
1592681128fSEric Dumazet 		result->bytes += bytes;
160e314dbdcSPavel Emelyanov 	}
1612681128fSEric Dumazet 	return atomic64_read(&priv->dropped);
1622681128fSEric Dumazet }
1632681128fSEric Dumazet 
1642681128fSEric Dumazet static struct rtnl_link_stats64 *veth_get_stats64(struct net_device *dev,
1652681128fSEric Dumazet 						  struct rtnl_link_stats64 *tot)
1662681128fSEric Dumazet {
1672681128fSEric Dumazet 	struct veth_priv *priv = netdev_priv(dev);
168d0e2c55eSEric Dumazet 	struct net_device *peer;
1692681128fSEric Dumazet 	struct pcpu_vstats one;
1702681128fSEric Dumazet 
1712681128fSEric Dumazet 	tot->tx_dropped = veth_stats_one(&one, dev);
1722681128fSEric Dumazet 	tot->tx_bytes = one.bytes;
1732681128fSEric Dumazet 	tot->tx_packets = one.packets;
1742681128fSEric Dumazet 
175d0e2c55eSEric Dumazet 	rcu_read_lock();
176d0e2c55eSEric Dumazet 	peer = rcu_dereference(priv->peer);
177d0e2c55eSEric Dumazet 	if (peer) {
178d0e2c55eSEric Dumazet 		tot->rx_dropped = veth_stats_one(&one, peer);
1792681128fSEric Dumazet 		tot->rx_bytes = one.bytes;
1802681128fSEric Dumazet 		tot->rx_packets = one.packets;
181d0e2c55eSEric Dumazet 	}
182d0e2c55eSEric Dumazet 	rcu_read_unlock();
183e314dbdcSPavel Emelyanov 
1846311cc44Sstephen hemminger 	return tot;
185e314dbdcSPavel Emelyanov }
186e314dbdcSPavel Emelyanov 
1875c70ef85SGao feng /* fake multicast ability */
1885c70ef85SGao feng static void veth_set_multicast_list(struct net_device *dev)
1895c70ef85SGao feng {
1905c70ef85SGao feng }
1915c70ef85SGao feng 
192e314dbdcSPavel Emelyanov static int veth_open(struct net_device *dev)
193e314dbdcSPavel Emelyanov {
194d0e2c55eSEric Dumazet 	struct veth_priv *priv = netdev_priv(dev);
195d0e2c55eSEric Dumazet 	struct net_device *peer = rtnl_dereference(priv->peer);
196e314dbdcSPavel Emelyanov 
197d0e2c55eSEric Dumazet 	if (!peer)
198e314dbdcSPavel Emelyanov 		return -ENOTCONN;
199e314dbdcSPavel Emelyanov 
200d0e2c55eSEric Dumazet 	if (peer->flags & IFF_UP) {
201e314dbdcSPavel Emelyanov 		netif_carrier_on(dev);
202d0e2c55eSEric Dumazet 		netif_carrier_on(peer);
203e314dbdcSPavel Emelyanov 	}
204e314dbdcSPavel Emelyanov 	return 0;
205e314dbdcSPavel Emelyanov }
206e314dbdcSPavel Emelyanov 
2072cf48a10SEric W. Biederman static int veth_close(struct net_device *dev)
2082cf48a10SEric W. Biederman {
2092cf48a10SEric W. Biederman 	struct veth_priv *priv = netdev_priv(dev);
2102efd32eeSEric Dumazet 	struct net_device *peer = rtnl_dereference(priv->peer);
2112cf48a10SEric W. Biederman 
2122cf48a10SEric W. Biederman 	netif_carrier_off(dev);
2132efd32eeSEric Dumazet 	if (peer)
2142efd32eeSEric Dumazet 		netif_carrier_off(peer);
2152cf48a10SEric W. Biederman 
2162cf48a10SEric W. Biederman 	return 0;
2172cf48a10SEric W. Biederman }
2182cf48a10SEric W. Biederman 
21938d40815SEric Biederman static int is_valid_veth_mtu(int new_mtu)
22038d40815SEric Biederman {
221807540baSEric Dumazet 	return new_mtu >= MIN_MTU && new_mtu <= MAX_MTU;
22238d40815SEric Biederman }
22338d40815SEric Biederman 
22438d40815SEric Biederman static int veth_change_mtu(struct net_device *dev, int new_mtu)
22538d40815SEric Biederman {
22638d40815SEric Biederman 	if (!is_valid_veth_mtu(new_mtu))
22738d40815SEric Biederman 		return -EINVAL;
22838d40815SEric Biederman 	dev->mtu = new_mtu;
22938d40815SEric Biederman 	return 0;
23038d40815SEric Biederman }
23138d40815SEric Biederman 
232e314dbdcSPavel Emelyanov static int veth_dev_init(struct net_device *dev)
233e314dbdcSPavel Emelyanov {
2341c213bd2SWANG Cong 	dev->vstats = netdev_alloc_pcpu_stats(struct pcpu_vstats);
2352681128fSEric Dumazet 	if (!dev->vstats)
236e314dbdcSPavel Emelyanov 		return -ENOMEM;
237e314dbdcSPavel Emelyanov 	return 0;
238e314dbdcSPavel Emelyanov }
239e314dbdcSPavel Emelyanov 
24011687a10SDavid S. Miller static void veth_dev_free(struct net_device *dev)
24111687a10SDavid S. Miller {
2422681128fSEric Dumazet 	free_percpu(dev->vstats);
24311687a10SDavid S. Miller 	free_netdev(dev);
24411687a10SDavid S. Miller }
24511687a10SDavid S. Miller 
246bb446c19SWANG Cong #ifdef CONFIG_NET_POLL_CONTROLLER
247bb446c19SWANG Cong static void veth_poll_controller(struct net_device *dev)
248bb446c19SWANG Cong {
249bb446c19SWANG Cong 	/* veth only receives frames when its peer sends one
250bb446c19SWANG Cong 	 * Since it's a synchronous operation, we are guaranteed
251bb446c19SWANG Cong 	 * never to have pending data when we poll for it so
252bb446c19SWANG Cong 	 * there is nothing to do here.
253bb446c19SWANG Cong 	 *
254bb446c19SWANG Cong 	 * We need this though so netpoll recognizes us as an interface that
255bb446c19SWANG Cong 	 * supports polling, which enables bridge devices in virt setups to
256bb446c19SWANG Cong 	 * still use netconsole
257bb446c19SWANG Cong 	 */
258bb446c19SWANG Cong }
259bb446c19SWANG Cong #endif	/* CONFIG_NET_POLL_CONTROLLER */
260bb446c19SWANG Cong 
261a45253bfSNicolas Dichtel static int veth_get_iflink(const struct net_device *dev)
262a45253bfSNicolas Dichtel {
263a45253bfSNicolas Dichtel 	struct veth_priv *priv = netdev_priv(dev);
264a45253bfSNicolas Dichtel 	struct net_device *peer;
265a45253bfSNicolas Dichtel 	int iflink;
266a45253bfSNicolas Dichtel 
267a45253bfSNicolas Dichtel 	rcu_read_lock();
268a45253bfSNicolas Dichtel 	peer = rcu_dereference(priv->peer);
269a45253bfSNicolas Dichtel 	iflink = peer ? peer->ifindex : 0;
270a45253bfSNicolas Dichtel 	rcu_read_unlock();
271a45253bfSNicolas Dichtel 
272a45253bfSNicolas Dichtel 	return iflink;
273a45253bfSNicolas Dichtel }
274a45253bfSNicolas Dichtel 
275163e5292SPaolo Abeni static void veth_set_rx_headroom(struct net_device *dev, int new_hr)
276163e5292SPaolo Abeni {
277163e5292SPaolo Abeni 	struct veth_priv *peer_priv, *priv = netdev_priv(dev);
278163e5292SPaolo Abeni 	struct net_device *peer;
279163e5292SPaolo Abeni 
280163e5292SPaolo Abeni 	if (new_hr < 0)
281163e5292SPaolo Abeni 		new_hr = 0;
282163e5292SPaolo Abeni 
283163e5292SPaolo Abeni 	rcu_read_lock();
284163e5292SPaolo Abeni 	peer = rcu_dereference(priv->peer);
285163e5292SPaolo Abeni 	if (unlikely(!peer))
286163e5292SPaolo Abeni 		goto out;
287163e5292SPaolo Abeni 
288163e5292SPaolo Abeni 	peer_priv = netdev_priv(peer);
289163e5292SPaolo Abeni 	priv->requested_headroom = new_hr;
290163e5292SPaolo Abeni 	new_hr = max(priv->requested_headroom, peer_priv->requested_headroom);
291163e5292SPaolo Abeni 	dev->needed_headroom = new_hr;
292163e5292SPaolo Abeni 	peer->needed_headroom = new_hr;
293163e5292SPaolo Abeni 
294163e5292SPaolo Abeni out:
295163e5292SPaolo Abeni 	rcu_read_unlock();
296163e5292SPaolo Abeni }
297163e5292SPaolo Abeni 
2984456e7bdSStephen Hemminger static const struct net_device_ops veth_netdev_ops = {
2994456e7bdSStephen Hemminger 	.ndo_init            = veth_dev_init,
3004456e7bdSStephen Hemminger 	.ndo_open            = veth_open,
3012cf48a10SEric W. Biederman 	.ndo_stop            = veth_close,
30200829823SStephen Hemminger 	.ndo_start_xmit      = veth_xmit,
30338d40815SEric Biederman 	.ndo_change_mtu      = veth_change_mtu,
3046311cc44Sstephen hemminger 	.ndo_get_stats64     = veth_get_stats64,
3055c70ef85SGao feng 	.ndo_set_rx_mode     = veth_set_multicast_list,
306ee923623SDaniel Lezcano 	.ndo_set_mac_address = eth_mac_addr,
307bb446c19SWANG Cong #ifdef CONFIG_NET_POLL_CONTROLLER
308bb446c19SWANG Cong 	.ndo_poll_controller	= veth_poll_controller,
309bb446c19SWANG Cong #endif
310a45253bfSNicolas Dichtel 	.ndo_get_iflink		= veth_get_iflink,
3111a04a821SToshiaki Makita 	.ndo_features_check	= passthru_features_check,
312163e5292SPaolo Abeni 	.ndo_set_rx_headroom	= veth_set_rx_headroom,
3134456e7bdSStephen Hemminger };
3144456e7bdSStephen Hemminger 
315732912d7SAlexander Duyck #define VETH_FEATURES (NETIF_F_SG | NETIF_F_FRAGLIST | NETIF_F_HW_CSUM | \
316732912d7SAlexander Duyck 		       NETIF_F_RXCSUM | NETIF_F_HIGHDMA | \
317732912d7SAlexander Duyck 		       NETIF_F_GSO_SOFTWARE | NETIF_F_GSO_ENCAP_ALL | \
31828d2b136SPatrick McHardy 		       NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_CTAG_RX | \
31928d2b136SPatrick McHardy 		       NETIF_F_HW_VLAN_STAG_TX | NETIF_F_HW_VLAN_STAG_RX )
3208093315aSEric Dumazet 
321e314dbdcSPavel Emelyanov static void veth_setup(struct net_device *dev)
322e314dbdcSPavel Emelyanov {
323e314dbdcSPavel Emelyanov 	ether_setup(dev);
324e314dbdcSPavel Emelyanov 
325550fd08cSNeil Horman 	dev->priv_flags &= ~IFF_TX_SKB_SHARING;
32623ea5a96SHannes Frederic Sowa 	dev->priv_flags |= IFF_LIVE_ADDR_CHANGE;
32702f01ec1SPhil Sutter 	dev->priv_flags |= IFF_NO_QUEUE;
328163e5292SPaolo Abeni 	dev->priv_flags |= IFF_PHONY_HEADROOM;
329550fd08cSNeil Horman 
3304456e7bdSStephen Hemminger 	dev->netdev_ops = &veth_netdev_ops;
331e314dbdcSPavel Emelyanov 	dev->ethtool_ops = &veth_ethtool_ops;
332e314dbdcSPavel Emelyanov 	dev->features |= NETIF_F_LLTX;
3338093315aSEric Dumazet 	dev->features |= VETH_FEATURES;
3348d0d21f4SToshiaki Makita 	dev->vlan_features = dev->features &
3353f8c707bSVlad Yasevich 			     ~(NETIF_F_HW_VLAN_CTAG_TX |
3363f8c707bSVlad Yasevich 			       NETIF_F_HW_VLAN_STAG_TX |
3373f8c707bSVlad Yasevich 			       NETIF_F_HW_VLAN_CTAG_RX |
3383f8c707bSVlad Yasevich 			       NETIF_F_HW_VLAN_STAG_RX);
33911687a10SDavid S. Miller 	dev->destructor = veth_dev_free;
340a2c725faSMichał Mirosław 
3418093315aSEric Dumazet 	dev->hw_features = VETH_FEATURES;
34282d81898SEric Dumazet 	dev->hw_enc_features = VETH_FEATURES;
343e314dbdcSPavel Emelyanov }
344e314dbdcSPavel Emelyanov 
345e314dbdcSPavel Emelyanov /*
346e314dbdcSPavel Emelyanov  * netlink interface
347e314dbdcSPavel Emelyanov  */
348e314dbdcSPavel Emelyanov 
349e314dbdcSPavel Emelyanov static int veth_validate(struct nlattr *tb[], struct nlattr *data[])
350e314dbdcSPavel Emelyanov {
351e314dbdcSPavel Emelyanov 	if (tb[IFLA_ADDRESS]) {
352e314dbdcSPavel Emelyanov 		if (nla_len(tb[IFLA_ADDRESS]) != ETH_ALEN)
353e314dbdcSPavel Emelyanov 			return -EINVAL;
354e314dbdcSPavel Emelyanov 		if (!is_valid_ether_addr(nla_data(tb[IFLA_ADDRESS])))
355e314dbdcSPavel Emelyanov 			return -EADDRNOTAVAIL;
356e314dbdcSPavel Emelyanov 	}
35738d40815SEric Biederman 	if (tb[IFLA_MTU]) {
35838d40815SEric Biederman 		if (!is_valid_veth_mtu(nla_get_u32(tb[IFLA_MTU])))
35938d40815SEric Biederman 			return -EINVAL;
36038d40815SEric Biederman 	}
361e314dbdcSPavel Emelyanov 	return 0;
362e314dbdcSPavel Emelyanov }
363e314dbdcSPavel Emelyanov 
364e314dbdcSPavel Emelyanov static struct rtnl_link_ops veth_link_ops;
365e314dbdcSPavel Emelyanov 
36681adee47SEric W. Biederman static int veth_newlink(struct net *src_net, struct net_device *dev,
367e314dbdcSPavel Emelyanov 			 struct nlattr *tb[], struct nlattr *data[])
368e314dbdcSPavel Emelyanov {
369e314dbdcSPavel Emelyanov 	int err;
370e314dbdcSPavel Emelyanov 	struct net_device *peer;
371e314dbdcSPavel Emelyanov 	struct veth_priv *priv;
372e314dbdcSPavel Emelyanov 	char ifname[IFNAMSIZ];
373e314dbdcSPavel Emelyanov 	struct nlattr *peer_tb[IFLA_MAX + 1], **tbp;
3745517750fSTom Gundersen 	unsigned char name_assign_type;
3753729d502SPatrick McHardy 	struct ifinfomsg *ifmp;
37681adee47SEric W. Biederman 	struct net *net;
377e314dbdcSPavel Emelyanov 
378e314dbdcSPavel Emelyanov 	/*
379e314dbdcSPavel Emelyanov 	 * create and register peer first
380e314dbdcSPavel Emelyanov 	 */
381e314dbdcSPavel Emelyanov 	if (data != NULL && data[VETH_INFO_PEER] != NULL) {
382e314dbdcSPavel Emelyanov 		struct nlattr *nla_peer;
383e314dbdcSPavel Emelyanov 
384e314dbdcSPavel Emelyanov 		nla_peer = data[VETH_INFO_PEER];
3853729d502SPatrick McHardy 		ifmp = nla_data(nla_peer);
386f7b12606SJiri Pirko 		err = rtnl_nla_parse_ifla(peer_tb,
387e314dbdcSPavel Emelyanov 					  nla_data(nla_peer) + sizeof(struct ifinfomsg),
388f7b12606SJiri Pirko 					  nla_len(nla_peer) - sizeof(struct ifinfomsg));
389e314dbdcSPavel Emelyanov 		if (err < 0)
390e314dbdcSPavel Emelyanov 			return err;
391e314dbdcSPavel Emelyanov 
392e314dbdcSPavel Emelyanov 		err = veth_validate(peer_tb, NULL);
393e314dbdcSPavel Emelyanov 		if (err < 0)
394e314dbdcSPavel Emelyanov 			return err;
395e314dbdcSPavel Emelyanov 
396e314dbdcSPavel Emelyanov 		tbp = peer_tb;
3973729d502SPatrick McHardy 	} else {
3983729d502SPatrick McHardy 		ifmp = NULL;
399e314dbdcSPavel Emelyanov 		tbp = tb;
4003729d502SPatrick McHardy 	}
401e314dbdcSPavel Emelyanov 
4025517750fSTom Gundersen 	if (tbp[IFLA_IFNAME]) {
403e314dbdcSPavel Emelyanov 		nla_strlcpy(ifname, tbp[IFLA_IFNAME], IFNAMSIZ);
4045517750fSTom Gundersen 		name_assign_type = NET_NAME_USER;
4055517750fSTom Gundersen 	} else {
406e314dbdcSPavel Emelyanov 		snprintf(ifname, IFNAMSIZ, DRV_NAME "%%d");
4075517750fSTom Gundersen 		name_assign_type = NET_NAME_ENUM;
4085517750fSTom Gundersen 	}
409e314dbdcSPavel Emelyanov 
41081adee47SEric W. Biederman 	net = rtnl_link_get_net(src_net, tbp);
41181adee47SEric W. Biederman 	if (IS_ERR(net))
41281adee47SEric W. Biederman 		return PTR_ERR(net);
41381adee47SEric W. Biederman 
4145517750fSTom Gundersen 	peer = rtnl_create_link(net, ifname, name_assign_type,
4155517750fSTom Gundersen 				&veth_link_ops, tbp);
41681adee47SEric W. Biederman 	if (IS_ERR(peer)) {
41781adee47SEric W. Biederman 		put_net(net);
418e314dbdcSPavel Emelyanov 		return PTR_ERR(peer);
41981adee47SEric W. Biederman 	}
420e314dbdcSPavel Emelyanov 
421e314dbdcSPavel Emelyanov 	if (tbp[IFLA_ADDRESS] == NULL)
422f2cedb63SDanny Kukawka 		eth_hw_addr_random(peer);
423e314dbdcSPavel Emelyanov 
424e6f8f1a7SPavel Emelyanov 	if (ifmp && (dev->ifindex != 0))
425e6f8f1a7SPavel Emelyanov 		peer->ifindex = ifmp->ifi_index;
426e6f8f1a7SPavel Emelyanov 
427e314dbdcSPavel Emelyanov 	err = register_netdevice(peer);
42881adee47SEric W. Biederman 	put_net(net);
42981adee47SEric W. Biederman 	net = NULL;
430e314dbdcSPavel Emelyanov 	if (err < 0)
431e314dbdcSPavel Emelyanov 		goto err_register_peer;
432e314dbdcSPavel Emelyanov 
433e314dbdcSPavel Emelyanov 	netif_carrier_off(peer);
434e314dbdcSPavel Emelyanov 
4353729d502SPatrick McHardy 	err = rtnl_configure_link(peer, ifmp);
4363729d502SPatrick McHardy 	if (err < 0)
4373729d502SPatrick McHardy 		goto err_configure_peer;
4383729d502SPatrick McHardy 
439e314dbdcSPavel Emelyanov 	/*
440e314dbdcSPavel Emelyanov 	 * register dev last
441e314dbdcSPavel Emelyanov 	 *
442e314dbdcSPavel Emelyanov 	 * note, that since we've registered new device the dev's name
443e314dbdcSPavel Emelyanov 	 * should be re-allocated
444e314dbdcSPavel Emelyanov 	 */
445e314dbdcSPavel Emelyanov 
446e314dbdcSPavel Emelyanov 	if (tb[IFLA_ADDRESS] == NULL)
447f2cedb63SDanny Kukawka 		eth_hw_addr_random(dev);
448e314dbdcSPavel Emelyanov 
4496c8c4446SJiri Pirko 	if (tb[IFLA_IFNAME])
4506c8c4446SJiri Pirko 		nla_strlcpy(dev->name, tb[IFLA_IFNAME], IFNAMSIZ);
4516c8c4446SJiri Pirko 	else
4526c8c4446SJiri Pirko 		snprintf(dev->name, IFNAMSIZ, DRV_NAME "%%d");
4536c8c4446SJiri Pirko 
454e314dbdcSPavel Emelyanov 	err = register_netdevice(dev);
455e314dbdcSPavel Emelyanov 	if (err < 0)
456e314dbdcSPavel Emelyanov 		goto err_register_dev;
457e314dbdcSPavel Emelyanov 
458e314dbdcSPavel Emelyanov 	netif_carrier_off(dev);
459e314dbdcSPavel Emelyanov 
460e314dbdcSPavel Emelyanov 	/*
461e314dbdcSPavel Emelyanov 	 * tie the deviced together
462e314dbdcSPavel Emelyanov 	 */
463e314dbdcSPavel Emelyanov 
464e314dbdcSPavel Emelyanov 	priv = netdev_priv(dev);
465d0e2c55eSEric Dumazet 	rcu_assign_pointer(priv->peer, peer);
466e314dbdcSPavel Emelyanov 
467e314dbdcSPavel Emelyanov 	priv = netdev_priv(peer);
468d0e2c55eSEric Dumazet 	rcu_assign_pointer(priv->peer, dev);
469e314dbdcSPavel Emelyanov 	return 0;
470e314dbdcSPavel Emelyanov 
471e314dbdcSPavel Emelyanov err_register_dev:
472e314dbdcSPavel Emelyanov 	/* nothing to do */
4733729d502SPatrick McHardy err_configure_peer:
474e314dbdcSPavel Emelyanov 	unregister_netdevice(peer);
475e314dbdcSPavel Emelyanov 	return err;
476e314dbdcSPavel Emelyanov 
477e314dbdcSPavel Emelyanov err_register_peer:
478e314dbdcSPavel Emelyanov 	free_netdev(peer);
479e314dbdcSPavel Emelyanov 	return err;
480e314dbdcSPavel Emelyanov }
481e314dbdcSPavel Emelyanov 
48223289a37SEric Dumazet static void veth_dellink(struct net_device *dev, struct list_head *head)
483e314dbdcSPavel Emelyanov {
484e314dbdcSPavel Emelyanov 	struct veth_priv *priv;
485e314dbdcSPavel Emelyanov 	struct net_device *peer;
486e314dbdcSPavel Emelyanov 
487e314dbdcSPavel Emelyanov 	priv = netdev_priv(dev);
488d0e2c55eSEric Dumazet 	peer = rtnl_dereference(priv->peer);
489d0e2c55eSEric Dumazet 
490d0e2c55eSEric Dumazet 	/* Note : dellink() is called from default_device_exit_batch(),
491d0e2c55eSEric Dumazet 	 * before a rcu_synchronize() point. The devices are guaranteed
492d0e2c55eSEric Dumazet 	 * not being freed before one RCU grace period.
493d0e2c55eSEric Dumazet 	 */
494d0e2c55eSEric Dumazet 	RCU_INIT_POINTER(priv->peer, NULL);
495f45a5c26SEric Dumazet 	unregister_netdevice_queue(dev, head);
496d0e2c55eSEric Dumazet 
497f45a5c26SEric Dumazet 	if (peer) {
498d0e2c55eSEric Dumazet 		priv = netdev_priv(peer);
499d0e2c55eSEric Dumazet 		RCU_INIT_POINTER(priv->peer, NULL);
50024540535SEric Dumazet 		unregister_netdevice_queue(peer, head);
501e314dbdcSPavel Emelyanov 	}
502f45a5c26SEric Dumazet }
503e314dbdcSPavel Emelyanov 
50423711438SThomas Graf static const struct nla_policy veth_policy[VETH_INFO_MAX + 1] = {
50523711438SThomas Graf 	[VETH_INFO_PEER]	= { .len = sizeof(struct ifinfomsg) },
50623711438SThomas Graf };
507e314dbdcSPavel Emelyanov 
508e5f4e7b9SNicolas Dichtel static struct net *veth_get_link_net(const struct net_device *dev)
509e5f4e7b9SNicolas Dichtel {
510e5f4e7b9SNicolas Dichtel 	struct veth_priv *priv = netdev_priv(dev);
511e5f4e7b9SNicolas Dichtel 	struct net_device *peer = rtnl_dereference(priv->peer);
512e5f4e7b9SNicolas Dichtel 
513e5f4e7b9SNicolas Dichtel 	return peer ? dev_net(peer) : dev_net(dev);
514e5f4e7b9SNicolas Dichtel }
515e5f4e7b9SNicolas Dichtel 
516e314dbdcSPavel Emelyanov static struct rtnl_link_ops veth_link_ops = {
517e314dbdcSPavel Emelyanov 	.kind		= DRV_NAME,
518e314dbdcSPavel Emelyanov 	.priv_size	= sizeof(struct veth_priv),
519e314dbdcSPavel Emelyanov 	.setup		= veth_setup,
520e314dbdcSPavel Emelyanov 	.validate	= veth_validate,
521e314dbdcSPavel Emelyanov 	.newlink	= veth_newlink,
522e314dbdcSPavel Emelyanov 	.dellink	= veth_dellink,
523e314dbdcSPavel Emelyanov 	.policy		= veth_policy,
524e314dbdcSPavel Emelyanov 	.maxtype	= VETH_INFO_MAX,
525e5f4e7b9SNicolas Dichtel 	.get_link_net	= veth_get_link_net,
526e314dbdcSPavel Emelyanov };
527e314dbdcSPavel Emelyanov 
528e314dbdcSPavel Emelyanov /*
529e314dbdcSPavel Emelyanov  * init/fini
530e314dbdcSPavel Emelyanov  */
531e314dbdcSPavel Emelyanov 
532e314dbdcSPavel Emelyanov static __init int veth_init(void)
533e314dbdcSPavel Emelyanov {
534e314dbdcSPavel Emelyanov 	return rtnl_link_register(&veth_link_ops);
535e314dbdcSPavel Emelyanov }
536e314dbdcSPavel Emelyanov 
537e314dbdcSPavel Emelyanov static __exit void veth_exit(void)
538e314dbdcSPavel Emelyanov {
53968365458SPatrick McHardy 	rtnl_link_unregister(&veth_link_ops);
540e314dbdcSPavel Emelyanov }
541e314dbdcSPavel Emelyanov 
542e314dbdcSPavel Emelyanov module_init(veth_init);
543e314dbdcSPavel Emelyanov module_exit(veth_exit);
544e314dbdcSPavel Emelyanov 
545e314dbdcSPavel Emelyanov MODULE_DESCRIPTION("Virtual Ethernet Tunnel");
546e314dbdcSPavel Emelyanov MODULE_LICENSE("GPL v2");
547e314dbdcSPavel Emelyanov MODULE_ALIAS_RTNL_LINK(DRV_NAME);
548