xref: /openbmc/linux/drivers/net/veth.c (revision 02f01ec1)
1e314dbdcSPavel Emelyanov /*
2e314dbdcSPavel Emelyanov  *  drivers/net/veth.c
3e314dbdcSPavel Emelyanov  *
4e314dbdcSPavel Emelyanov  *  Copyright (C) 2007 OpenVZ http://openvz.org, SWsoft Inc
5e314dbdcSPavel Emelyanov  *
6e314dbdcSPavel Emelyanov  * Author: Pavel Emelianov <xemul@openvz.org>
7e314dbdcSPavel Emelyanov  * Ethtool interface from: Eric W. Biederman <ebiederm@xmission.com>
8e314dbdcSPavel Emelyanov  *
9e314dbdcSPavel Emelyanov  */
10e314dbdcSPavel Emelyanov 
11e314dbdcSPavel Emelyanov #include <linux/netdevice.h>
125a0e3ad6STejun Heo #include <linux/slab.h>
13e314dbdcSPavel Emelyanov #include <linux/ethtool.h>
14e314dbdcSPavel Emelyanov #include <linux/etherdevice.h>
15cf05c700SEric Dumazet #include <linux/u64_stats_sync.h>
16e314dbdcSPavel Emelyanov 
17f7b12606SJiri Pirko #include <net/rtnetlink.h>
18e314dbdcSPavel Emelyanov #include <net/dst.h>
19e314dbdcSPavel Emelyanov #include <net/xfrm.h>
20ecef969eSStephen Hemminger #include <linux/veth.h>
219d9779e7SPaul Gortmaker #include <linux/module.h>
22e314dbdcSPavel Emelyanov 
23e314dbdcSPavel Emelyanov #define DRV_NAME	"veth"
24e314dbdcSPavel Emelyanov #define DRV_VERSION	"1.0"
25e314dbdcSPavel Emelyanov 
2638d40815SEric Biederman #define MIN_MTU 68		/* Min L3 MTU */
2738d40815SEric Biederman #define MAX_MTU 65535		/* Max L3 MTU (arbitrary) */
2838d40815SEric Biederman 
292681128fSEric Dumazet struct pcpu_vstats {
302681128fSEric Dumazet 	u64			packets;
312681128fSEric Dumazet 	u64			bytes;
32cf05c700SEric Dumazet 	struct u64_stats_sync	syncp;
33e314dbdcSPavel Emelyanov };
34e314dbdcSPavel Emelyanov 
35e314dbdcSPavel Emelyanov struct veth_priv {
36d0e2c55eSEric Dumazet 	struct net_device __rcu	*peer;
372681128fSEric Dumazet 	atomic64_t		dropped;
38e314dbdcSPavel Emelyanov };
39e314dbdcSPavel Emelyanov 
40e314dbdcSPavel Emelyanov /*
41e314dbdcSPavel Emelyanov  * ethtool interface
42e314dbdcSPavel Emelyanov  */
43e314dbdcSPavel Emelyanov 
44e314dbdcSPavel Emelyanov static struct {
45e314dbdcSPavel Emelyanov 	const char string[ETH_GSTRING_LEN];
46e314dbdcSPavel Emelyanov } ethtool_stats_keys[] = {
47e314dbdcSPavel Emelyanov 	{ "peer_ifindex" },
48e314dbdcSPavel Emelyanov };
49e314dbdcSPavel Emelyanov 
50e314dbdcSPavel Emelyanov static int veth_get_settings(struct net_device *dev, struct ethtool_cmd *cmd)
51e314dbdcSPavel Emelyanov {
52e314dbdcSPavel Emelyanov 	cmd->supported		= 0;
53e314dbdcSPavel Emelyanov 	cmd->advertising	= 0;
5470739497SDavid Decotigny 	ethtool_cmd_speed_set(cmd, SPEED_10000);
55e314dbdcSPavel Emelyanov 	cmd->duplex		= DUPLEX_FULL;
56e314dbdcSPavel Emelyanov 	cmd->port		= PORT_TP;
57e314dbdcSPavel Emelyanov 	cmd->phy_address	= 0;
58e314dbdcSPavel Emelyanov 	cmd->transceiver	= XCVR_INTERNAL;
59e314dbdcSPavel Emelyanov 	cmd->autoneg		= AUTONEG_DISABLE;
60e314dbdcSPavel Emelyanov 	cmd->maxtxpkt		= 0;
61e314dbdcSPavel Emelyanov 	cmd->maxrxpkt		= 0;
62e314dbdcSPavel Emelyanov 	return 0;
63e314dbdcSPavel Emelyanov }
64e314dbdcSPavel Emelyanov 
65e314dbdcSPavel Emelyanov static void veth_get_drvinfo(struct net_device *dev, struct ethtool_drvinfo *info)
66e314dbdcSPavel Emelyanov {
6733a5ba14SRick Jones 	strlcpy(info->driver, DRV_NAME, sizeof(info->driver));
6833a5ba14SRick Jones 	strlcpy(info->version, DRV_VERSION, sizeof(info->version));
69e314dbdcSPavel Emelyanov }
70e314dbdcSPavel Emelyanov 
71e314dbdcSPavel Emelyanov static void veth_get_strings(struct net_device *dev, u32 stringset, u8 *buf)
72e314dbdcSPavel Emelyanov {
73e314dbdcSPavel Emelyanov 	switch(stringset) {
74e314dbdcSPavel Emelyanov 	case ETH_SS_STATS:
75e314dbdcSPavel Emelyanov 		memcpy(buf, &ethtool_stats_keys, sizeof(ethtool_stats_keys));
76e314dbdcSPavel Emelyanov 		break;
77e314dbdcSPavel Emelyanov 	}
78e314dbdcSPavel Emelyanov }
79e314dbdcSPavel Emelyanov 
80b9f2c044SJeff Garzik static int veth_get_sset_count(struct net_device *dev, int sset)
81e314dbdcSPavel Emelyanov {
82b9f2c044SJeff Garzik 	switch (sset) {
83b9f2c044SJeff Garzik 	case ETH_SS_STATS:
84e314dbdcSPavel Emelyanov 		return ARRAY_SIZE(ethtool_stats_keys);
85b9f2c044SJeff Garzik 	default:
86b9f2c044SJeff Garzik 		return -EOPNOTSUPP;
87b9f2c044SJeff Garzik 	}
88e314dbdcSPavel Emelyanov }
89e314dbdcSPavel Emelyanov 
90e314dbdcSPavel Emelyanov static void veth_get_ethtool_stats(struct net_device *dev,
91e314dbdcSPavel Emelyanov 		struct ethtool_stats *stats, u64 *data)
92e314dbdcSPavel Emelyanov {
93d0e2c55eSEric Dumazet 	struct veth_priv *priv = netdev_priv(dev);
94d0e2c55eSEric Dumazet 	struct net_device *peer = rtnl_dereference(priv->peer);
95e314dbdcSPavel Emelyanov 
96d0e2c55eSEric Dumazet 	data[0] = peer ? peer->ifindex : 0;
97e314dbdcSPavel Emelyanov }
98e314dbdcSPavel Emelyanov 
990fc0b732SStephen Hemminger static const struct ethtool_ops veth_ethtool_ops = {
100e314dbdcSPavel Emelyanov 	.get_settings		= veth_get_settings,
101e314dbdcSPavel Emelyanov 	.get_drvinfo		= veth_get_drvinfo,
102e314dbdcSPavel Emelyanov 	.get_link		= ethtool_op_get_link,
103e314dbdcSPavel Emelyanov 	.get_strings		= veth_get_strings,
104b9f2c044SJeff Garzik 	.get_sset_count		= veth_get_sset_count,
105e314dbdcSPavel Emelyanov 	.get_ethtool_stats	= veth_get_ethtool_stats,
106e314dbdcSPavel Emelyanov };
107e314dbdcSPavel Emelyanov 
108424efe9cSStephen Hemminger static netdev_tx_t veth_xmit(struct sk_buff *skb, struct net_device *dev)
109e314dbdcSPavel Emelyanov {
1102681128fSEric Dumazet 	struct veth_priv *priv = netdev_priv(dev);
111d0e2c55eSEric Dumazet 	struct net_device *rcv;
1122681128fSEric Dumazet 	int length = skb->len;
113e314dbdcSPavel Emelyanov 
114d0e2c55eSEric Dumazet 	rcu_read_lock();
115d0e2c55eSEric Dumazet 	rcv = rcu_dereference(priv->peer);
116d0e2c55eSEric Dumazet 	if (unlikely(!rcv)) {
117d0e2c55eSEric Dumazet 		kfree_skb(skb);
118d0e2c55eSEric Dumazet 		goto drop;
119d0e2c55eSEric Dumazet 	}
1200b796750SMichał Mirosław 	/* don't change ip_summed == CHECKSUM_PARTIAL, as that
1212681128fSEric Dumazet 	 * will cause bad checksum on forwarded packets
1222681128fSEric Dumazet 	 */
123a2c725faSMichał Mirosław 	if (skb->ip_summed == CHECKSUM_NONE &&
124a2c725faSMichał Mirosław 	    rcv->features & NETIF_F_RXCSUM)
125a2c725faSMichał Mirosław 		skb->ip_summed = CHECKSUM_UNNECESSARY;
126e314dbdcSPavel Emelyanov 
1272681128fSEric Dumazet 	if (likely(dev_forward_skb(rcv, skb) == NET_RX_SUCCESS)) {
1282681128fSEric Dumazet 		struct pcpu_vstats *stats = this_cpu_ptr(dev->vstats);
129e314dbdcSPavel Emelyanov 
130cf05c700SEric Dumazet 		u64_stats_update_begin(&stats->syncp);
1312681128fSEric Dumazet 		stats->bytes += length;
1322681128fSEric Dumazet 		stats->packets++;
133cf05c700SEric Dumazet 		u64_stats_update_end(&stats->syncp);
1342681128fSEric Dumazet 	} else {
135d0e2c55eSEric Dumazet drop:
1362681128fSEric Dumazet 		atomic64_inc(&priv->dropped);
1372681128fSEric Dumazet 	}
138d0e2c55eSEric Dumazet 	rcu_read_unlock();
1396ed10654SPatrick McHardy 	return NETDEV_TX_OK;
140e314dbdcSPavel Emelyanov }
141e314dbdcSPavel Emelyanov 
142e314dbdcSPavel Emelyanov /*
143e314dbdcSPavel Emelyanov  * general routines
144e314dbdcSPavel Emelyanov  */
145e314dbdcSPavel Emelyanov 
1462681128fSEric Dumazet static u64 veth_stats_one(struct pcpu_vstats *result, struct net_device *dev)
147e314dbdcSPavel Emelyanov {
148cf05c700SEric Dumazet 	struct veth_priv *priv = netdev_priv(dev);
14911687a10SDavid S. Miller 	int cpu;
15011687a10SDavid S. Miller 
1512681128fSEric Dumazet 	result->packets = 0;
1522681128fSEric Dumazet 	result->bytes = 0;
1532b1c8b0fSEric Dumazet 	for_each_possible_cpu(cpu) {
1542681128fSEric Dumazet 		struct pcpu_vstats *stats = per_cpu_ptr(dev->vstats, cpu);
1552681128fSEric Dumazet 		u64 packets, bytes;
156cf05c700SEric Dumazet 		unsigned int start;
157e314dbdcSPavel Emelyanov 
158cf05c700SEric Dumazet 		do {
15957a7744eSEric W. Biederman 			start = u64_stats_fetch_begin_irq(&stats->syncp);
1602681128fSEric Dumazet 			packets = stats->packets;
1612681128fSEric Dumazet 			bytes = stats->bytes;
16257a7744eSEric W. Biederman 		} while (u64_stats_fetch_retry_irq(&stats->syncp, start));
1632681128fSEric Dumazet 		result->packets += packets;
1642681128fSEric Dumazet 		result->bytes += bytes;
165e314dbdcSPavel Emelyanov 	}
1662681128fSEric Dumazet 	return atomic64_read(&priv->dropped);
1672681128fSEric Dumazet }
1682681128fSEric Dumazet 
1692681128fSEric Dumazet static struct rtnl_link_stats64 *veth_get_stats64(struct net_device *dev,
1702681128fSEric Dumazet 						  struct rtnl_link_stats64 *tot)
1712681128fSEric Dumazet {
1722681128fSEric Dumazet 	struct veth_priv *priv = netdev_priv(dev);
173d0e2c55eSEric Dumazet 	struct net_device *peer;
1742681128fSEric Dumazet 	struct pcpu_vstats one;
1752681128fSEric Dumazet 
1762681128fSEric Dumazet 	tot->tx_dropped = veth_stats_one(&one, dev);
1772681128fSEric Dumazet 	tot->tx_bytes = one.bytes;
1782681128fSEric Dumazet 	tot->tx_packets = one.packets;
1792681128fSEric Dumazet 
180d0e2c55eSEric Dumazet 	rcu_read_lock();
181d0e2c55eSEric Dumazet 	peer = rcu_dereference(priv->peer);
182d0e2c55eSEric Dumazet 	if (peer) {
183d0e2c55eSEric Dumazet 		tot->rx_dropped = veth_stats_one(&one, peer);
1842681128fSEric Dumazet 		tot->rx_bytes = one.bytes;
1852681128fSEric Dumazet 		tot->rx_packets = one.packets;
186d0e2c55eSEric Dumazet 	}
187d0e2c55eSEric Dumazet 	rcu_read_unlock();
188e314dbdcSPavel Emelyanov 
1896311cc44Sstephen hemminger 	return tot;
190e314dbdcSPavel Emelyanov }
191e314dbdcSPavel Emelyanov 
1925c70ef85SGao feng /* fake multicast ability */
1935c70ef85SGao feng static void veth_set_multicast_list(struct net_device *dev)
1945c70ef85SGao feng {
1955c70ef85SGao feng }
1965c70ef85SGao feng 
197e314dbdcSPavel Emelyanov static int veth_open(struct net_device *dev)
198e314dbdcSPavel Emelyanov {
199d0e2c55eSEric Dumazet 	struct veth_priv *priv = netdev_priv(dev);
200d0e2c55eSEric Dumazet 	struct net_device *peer = rtnl_dereference(priv->peer);
201e314dbdcSPavel Emelyanov 
202d0e2c55eSEric Dumazet 	if (!peer)
203e314dbdcSPavel Emelyanov 		return -ENOTCONN;
204e314dbdcSPavel Emelyanov 
205d0e2c55eSEric Dumazet 	if (peer->flags & IFF_UP) {
206e314dbdcSPavel Emelyanov 		netif_carrier_on(dev);
207d0e2c55eSEric Dumazet 		netif_carrier_on(peer);
208e314dbdcSPavel Emelyanov 	}
209e314dbdcSPavel Emelyanov 	return 0;
210e314dbdcSPavel Emelyanov }
211e314dbdcSPavel Emelyanov 
2122cf48a10SEric W. Biederman static int veth_close(struct net_device *dev)
2132cf48a10SEric W. Biederman {
2142cf48a10SEric W. Biederman 	struct veth_priv *priv = netdev_priv(dev);
2152efd32eeSEric Dumazet 	struct net_device *peer = rtnl_dereference(priv->peer);
2162cf48a10SEric W. Biederman 
2172cf48a10SEric W. Biederman 	netif_carrier_off(dev);
2182efd32eeSEric Dumazet 	if (peer)
2192efd32eeSEric Dumazet 		netif_carrier_off(peer);
2202cf48a10SEric W. Biederman 
2212cf48a10SEric W. Biederman 	return 0;
2222cf48a10SEric W. Biederman }
2232cf48a10SEric W. Biederman 
22438d40815SEric Biederman static int is_valid_veth_mtu(int new_mtu)
22538d40815SEric Biederman {
226807540baSEric Dumazet 	return new_mtu >= MIN_MTU && new_mtu <= MAX_MTU;
22738d40815SEric Biederman }
22838d40815SEric Biederman 
22938d40815SEric Biederman static int veth_change_mtu(struct net_device *dev, int new_mtu)
23038d40815SEric Biederman {
23138d40815SEric Biederman 	if (!is_valid_veth_mtu(new_mtu))
23238d40815SEric Biederman 		return -EINVAL;
23338d40815SEric Biederman 	dev->mtu = new_mtu;
23438d40815SEric Biederman 	return 0;
23538d40815SEric Biederman }
23638d40815SEric Biederman 
237e314dbdcSPavel Emelyanov static int veth_dev_init(struct net_device *dev)
238e314dbdcSPavel Emelyanov {
2391c213bd2SWANG Cong 	dev->vstats = netdev_alloc_pcpu_stats(struct pcpu_vstats);
2402681128fSEric Dumazet 	if (!dev->vstats)
241e314dbdcSPavel Emelyanov 		return -ENOMEM;
242e314dbdcSPavel Emelyanov 	return 0;
243e314dbdcSPavel Emelyanov }
244e314dbdcSPavel Emelyanov 
24511687a10SDavid S. Miller static void veth_dev_free(struct net_device *dev)
24611687a10SDavid S. Miller {
2472681128fSEric Dumazet 	free_percpu(dev->vstats);
24811687a10SDavid S. Miller 	free_netdev(dev);
24911687a10SDavid S. Miller }
25011687a10SDavid S. Miller 
251bb446c19SWANG Cong #ifdef CONFIG_NET_POLL_CONTROLLER
252bb446c19SWANG Cong static void veth_poll_controller(struct net_device *dev)
253bb446c19SWANG Cong {
254bb446c19SWANG Cong 	/* veth only receives frames when its peer sends one
255bb446c19SWANG Cong 	 * Since it's a synchronous operation, we are guaranteed
256bb446c19SWANG Cong 	 * never to have pending data when we poll for it so
257bb446c19SWANG Cong 	 * there is nothing to do here.
258bb446c19SWANG Cong 	 *
259bb446c19SWANG Cong 	 * We need this though so netpoll recognizes us as an interface that
260bb446c19SWANG Cong 	 * supports polling, which enables bridge devices in virt setups to
261bb446c19SWANG Cong 	 * still use netconsole
262bb446c19SWANG Cong 	 */
263bb446c19SWANG Cong }
264bb446c19SWANG Cong #endif	/* CONFIG_NET_POLL_CONTROLLER */
265bb446c19SWANG Cong 
266a45253bfSNicolas Dichtel static int veth_get_iflink(const struct net_device *dev)
267a45253bfSNicolas Dichtel {
268a45253bfSNicolas Dichtel 	struct veth_priv *priv = netdev_priv(dev);
269a45253bfSNicolas Dichtel 	struct net_device *peer;
270a45253bfSNicolas Dichtel 	int iflink;
271a45253bfSNicolas Dichtel 
272a45253bfSNicolas Dichtel 	rcu_read_lock();
273a45253bfSNicolas Dichtel 	peer = rcu_dereference(priv->peer);
274a45253bfSNicolas Dichtel 	iflink = peer ? peer->ifindex : 0;
275a45253bfSNicolas Dichtel 	rcu_read_unlock();
276a45253bfSNicolas Dichtel 
277a45253bfSNicolas Dichtel 	return iflink;
278a45253bfSNicolas Dichtel }
279a45253bfSNicolas Dichtel 
2804456e7bdSStephen Hemminger static const struct net_device_ops veth_netdev_ops = {
2814456e7bdSStephen Hemminger 	.ndo_init            = veth_dev_init,
2824456e7bdSStephen Hemminger 	.ndo_open            = veth_open,
2832cf48a10SEric W. Biederman 	.ndo_stop            = veth_close,
28400829823SStephen Hemminger 	.ndo_start_xmit      = veth_xmit,
28538d40815SEric Biederman 	.ndo_change_mtu      = veth_change_mtu,
2866311cc44Sstephen hemminger 	.ndo_get_stats64     = veth_get_stats64,
2875c70ef85SGao feng 	.ndo_set_rx_mode     = veth_set_multicast_list,
288ee923623SDaniel Lezcano 	.ndo_set_mac_address = eth_mac_addr,
289bb446c19SWANG Cong #ifdef CONFIG_NET_POLL_CONTROLLER
290bb446c19SWANG Cong 	.ndo_poll_controller	= veth_poll_controller,
291bb446c19SWANG Cong #endif
292a45253bfSNicolas Dichtel 	.ndo_get_iflink		= veth_get_iflink,
2931a04a821SToshiaki Makita 	.ndo_features_check	= passthru_features_check,
2944456e7bdSStephen Hemminger };
2954456e7bdSStephen Hemminger 
2968093315aSEric Dumazet #define VETH_FEATURES (NETIF_F_SG | NETIF_F_FRAGLIST | NETIF_F_ALL_TSO |    \
2978093315aSEric Dumazet 		       NETIF_F_HW_CSUM | NETIF_F_RXCSUM | NETIF_F_HIGHDMA | \
29882d81898SEric Dumazet 		       NETIF_F_GSO_GRE | NETIF_F_GSO_UDP_TUNNEL |	    \
29982d81898SEric Dumazet 		       NETIF_F_GSO_IPIP | NETIF_F_GSO_SIT | NETIF_F_UFO	|   \
30028d2b136SPatrick McHardy 		       NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_CTAG_RX | \
30128d2b136SPatrick McHardy 		       NETIF_F_HW_VLAN_STAG_TX | NETIF_F_HW_VLAN_STAG_RX )
3028093315aSEric Dumazet 
303e314dbdcSPavel Emelyanov static void veth_setup(struct net_device *dev)
304e314dbdcSPavel Emelyanov {
305e314dbdcSPavel Emelyanov 	ether_setup(dev);
306e314dbdcSPavel Emelyanov 
307550fd08cSNeil Horman 	dev->priv_flags &= ~IFF_TX_SKB_SHARING;
30823ea5a96SHannes Frederic Sowa 	dev->priv_flags |= IFF_LIVE_ADDR_CHANGE;
30902f01ec1SPhil Sutter 	dev->priv_flags |= IFF_NO_QUEUE;
310550fd08cSNeil Horman 
3114456e7bdSStephen Hemminger 	dev->netdev_ops = &veth_netdev_ops;
312e314dbdcSPavel Emelyanov 	dev->ethtool_ops = &veth_ethtool_ops;
313e314dbdcSPavel Emelyanov 	dev->features |= NETIF_F_LLTX;
3148093315aSEric Dumazet 	dev->features |= VETH_FEATURES;
3158d0d21f4SToshiaki Makita 	dev->vlan_features = dev->features &
3163f8c707bSVlad Yasevich 			     ~(NETIF_F_HW_VLAN_CTAG_TX |
3173f8c707bSVlad Yasevich 			       NETIF_F_HW_VLAN_STAG_TX |
3183f8c707bSVlad Yasevich 			       NETIF_F_HW_VLAN_CTAG_RX |
3193f8c707bSVlad Yasevich 			       NETIF_F_HW_VLAN_STAG_RX);
32011687a10SDavid S. Miller 	dev->destructor = veth_dev_free;
321a2c725faSMichał Mirosław 
3228093315aSEric Dumazet 	dev->hw_features = VETH_FEATURES;
32382d81898SEric Dumazet 	dev->hw_enc_features = VETH_FEATURES;
324e314dbdcSPavel Emelyanov }
325e314dbdcSPavel Emelyanov 
326e314dbdcSPavel Emelyanov /*
327e314dbdcSPavel Emelyanov  * netlink interface
328e314dbdcSPavel Emelyanov  */
329e314dbdcSPavel Emelyanov 
330e314dbdcSPavel Emelyanov static int veth_validate(struct nlattr *tb[], struct nlattr *data[])
331e314dbdcSPavel Emelyanov {
332e314dbdcSPavel Emelyanov 	if (tb[IFLA_ADDRESS]) {
333e314dbdcSPavel Emelyanov 		if (nla_len(tb[IFLA_ADDRESS]) != ETH_ALEN)
334e314dbdcSPavel Emelyanov 			return -EINVAL;
335e314dbdcSPavel Emelyanov 		if (!is_valid_ether_addr(nla_data(tb[IFLA_ADDRESS])))
336e314dbdcSPavel Emelyanov 			return -EADDRNOTAVAIL;
337e314dbdcSPavel Emelyanov 	}
33838d40815SEric Biederman 	if (tb[IFLA_MTU]) {
33938d40815SEric Biederman 		if (!is_valid_veth_mtu(nla_get_u32(tb[IFLA_MTU])))
34038d40815SEric Biederman 			return -EINVAL;
34138d40815SEric Biederman 	}
342e314dbdcSPavel Emelyanov 	return 0;
343e314dbdcSPavel Emelyanov }
344e314dbdcSPavel Emelyanov 
345e314dbdcSPavel Emelyanov static struct rtnl_link_ops veth_link_ops;
346e314dbdcSPavel Emelyanov 
34781adee47SEric W. Biederman static int veth_newlink(struct net *src_net, struct net_device *dev,
348e314dbdcSPavel Emelyanov 			 struct nlattr *tb[], struct nlattr *data[])
349e314dbdcSPavel Emelyanov {
350e314dbdcSPavel Emelyanov 	int err;
351e314dbdcSPavel Emelyanov 	struct net_device *peer;
352e314dbdcSPavel Emelyanov 	struct veth_priv *priv;
353e314dbdcSPavel Emelyanov 	char ifname[IFNAMSIZ];
354e314dbdcSPavel Emelyanov 	struct nlattr *peer_tb[IFLA_MAX + 1], **tbp;
3555517750fSTom Gundersen 	unsigned char name_assign_type;
3563729d502SPatrick McHardy 	struct ifinfomsg *ifmp;
35781adee47SEric W. Biederman 	struct net *net;
358e314dbdcSPavel Emelyanov 
359e314dbdcSPavel Emelyanov 	/*
360e314dbdcSPavel Emelyanov 	 * create and register peer first
361e314dbdcSPavel Emelyanov 	 */
362e314dbdcSPavel Emelyanov 	if (data != NULL && data[VETH_INFO_PEER] != NULL) {
363e314dbdcSPavel Emelyanov 		struct nlattr *nla_peer;
364e314dbdcSPavel Emelyanov 
365e314dbdcSPavel Emelyanov 		nla_peer = data[VETH_INFO_PEER];
3663729d502SPatrick McHardy 		ifmp = nla_data(nla_peer);
367f7b12606SJiri Pirko 		err = rtnl_nla_parse_ifla(peer_tb,
368e314dbdcSPavel Emelyanov 					  nla_data(nla_peer) + sizeof(struct ifinfomsg),
369f7b12606SJiri Pirko 					  nla_len(nla_peer) - sizeof(struct ifinfomsg));
370e314dbdcSPavel Emelyanov 		if (err < 0)
371e314dbdcSPavel Emelyanov 			return err;
372e314dbdcSPavel Emelyanov 
373e314dbdcSPavel Emelyanov 		err = veth_validate(peer_tb, NULL);
374e314dbdcSPavel Emelyanov 		if (err < 0)
375e314dbdcSPavel Emelyanov 			return err;
376e314dbdcSPavel Emelyanov 
377e314dbdcSPavel Emelyanov 		tbp = peer_tb;
3783729d502SPatrick McHardy 	} else {
3793729d502SPatrick McHardy 		ifmp = NULL;
380e314dbdcSPavel Emelyanov 		tbp = tb;
3813729d502SPatrick McHardy 	}
382e314dbdcSPavel Emelyanov 
3835517750fSTom Gundersen 	if (tbp[IFLA_IFNAME]) {
384e314dbdcSPavel Emelyanov 		nla_strlcpy(ifname, tbp[IFLA_IFNAME], IFNAMSIZ);
3855517750fSTom Gundersen 		name_assign_type = NET_NAME_USER;
3865517750fSTom Gundersen 	} else {
387e314dbdcSPavel Emelyanov 		snprintf(ifname, IFNAMSIZ, DRV_NAME "%%d");
3885517750fSTom Gundersen 		name_assign_type = NET_NAME_ENUM;
3895517750fSTom Gundersen 	}
390e314dbdcSPavel Emelyanov 
39181adee47SEric W. Biederman 	net = rtnl_link_get_net(src_net, tbp);
39281adee47SEric W. Biederman 	if (IS_ERR(net))
39381adee47SEric W. Biederman 		return PTR_ERR(net);
39481adee47SEric W. Biederman 
3955517750fSTom Gundersen 	peer = rtnl_create_link(net, ifname, name_assign_type,
3965517750fSTom Gundersen 				&veth_link_ops, tbp);
39781adee47SEric W. Biederman 	if (IS_ERR(peer)) {
39881adee47SEric W. Biederman 		put_net(net);
399e314dbdcSPavel Emelyanov 		return PTR_ERR(peer);
40081adee47SEric W. Biederman 	}
401e314dbdcSPavel Emelyanov 
402e314dbdcSPavel Emelyanov 	if (tbp[IFLA_ADDRESS] == NULL)
403f2cedb63SDanny Kukawka 		eth_hw_addr_random(peer);
404e314dbdcSPavel Emelyanov 
405e6f8f1a7SPavel Emelyanov 	if (ifmp && (dev->ifindex != 0))
406e6f8f1a7SPavel Emelyanov 		peer->ifindex = ifmp->ifi_index;
407e6f8f1a7SPavel Emelyanov 
408e314dbdcSPavel Emelyanov 	err = register_netdevice(peer);
40981adee47SEric W. Biederman 	put_net(net);
41081adee47SEric W. Biederman 	net = NULL;
411e314dbdcSPavel Emelyanov 	if (err < 0)
412e314dbdcSPavel Emelyanov 		goto err_register_peer;
413e314dbdcSPavel Emelyanov 
414e314dbdcSPavel Emelyanov 	netif_carrier_off(peer);
415e314dbdcSPavel Emelyanov 
4163729d502SPatrick McHardy 	err = rtnl_configure_link(peer, ifmp);
4173729d502SPatrick McHardy 	if (err < 0)
4183729d502SPatrick McHardy 		goto err_configure_peer;
4193729d502SPatrick McHardy 
420e314dbdcSPavel Emelyanov 	/*
421e314dbdcSPavel Emelyanov 	 * register dev last
422e314dbdcSPavel Emelyanov 	 *
423e314dbdcSPavel Emelyanov 	 * note, that since we've registered new device the dev's name
424e314dbdcSPavel Emelyanov 	 * should be re-allocated
425e314dbdcSPavel Emelyanov 	 */
426e314dbdcSPavel Emelyanov 
427e314dbdcSPavel Emelyanov 	if (tb[IFLA_ADDRESS] == NULL)
428f2cedb63SDanny Kukawka 		eth_hw_addr_random(dev);
429e314dbdcSPavel Emelyanov 
4306c8c4446SJiri Pirko 	if (tb[IFLA_IFNAME])
4316c8c4446SJiri Pirko 		nla_strlcpy(dev->name, tb[IFLA_IFNAME], IFNAMSIZ);
4326c8c4446SJiri Pirko 	else
4336c8c4446SJiri Pirko 		snprintf(dev->name, IFNAMSIZ, DRV_NAME "%%d");
4346c8c4446SJiri Pirko 
435e314dbdcSPavel Emelyanov 	err = register_netdevice(dev);
436e314dbdcSPavel Emelyanov 	if (err < 0)
437e314dbdcSPavel Emelyanov 		goto err_register_dev;
438e314dbdcSPavel Emelyanov 
439e314dbdcSPavel Emelyanov 	netif_carrier_off(dev);
440e314dbdcSPavel Emelyanov 
441e314dbdcSPavel Emelyanov 	/*
442e314dbdcSPavel Emelyanov 	 * tie the deviced together
443e314dbdcSPavel Emelyanov 	 */
444e314dbdcSPavel Emelyanov 
445e314dbdcSPavel Emelyanov 	priv = netdev_priv(dev);
446d0e2c55eSEric Dumazet 	rcu_assign_pointer(priv->peer, peer);
447e314dbdcSPavel Emelyanov 
448e314dbdcSPavel Emelyanov 	priv = netdev_priv(peer);
449d0e2c55eSEric Dumazet 	rcu_assign_pointer(priv->peer, dev);
450e314dbdcSPavel Emelyanov 	return 0;
451e314dbdcSPavel Emelyanov 
452e314dbdcSPavel Emelyanov err_register_dev:
453e314dbdcSPavel Emelyanov 	/* nothing to do */
4543729d502SPatrick McHardy err_configure_peer:
455e314dbdcSPavel Emelyanov 	unregister_netdevice(peer);
456e314dbdcSPavel Emelyanov 	return err;
457e314dbdcSPavel Emelyanov 
458e314dbdcSPavel Emelyanov err_register_peer:
459e314dbdcSPavel Emelyanov 	free_netdev(peer);
460e314dbdcSPavel Emelyanov 	return err;
461e314dbdcSPavel Emelyanov }
462e314dbdcSPavel Emelyanov 
46323289a37SEric Dumazet static void veth_dellink(struct net_device *dev, struct list_head *head)
464e314dbdcSPavel Emelyanov {
465e314dbdcSPavel Emelyanov 	struct veth_priv *priv;
466e314dbdcSPavel Emelyanov 	struct net_device *peer;
467e314dbdcSPavel Emelyanov 
468e314dbdcSPavel Emelyanov 	priv = netdev_priv(dev);
469d0e2c55eSEric Dumazet 	peer = rtnl_dereference(priv->peer);
470d0e2c55eSEric Dumazet 
471d0e2c55eSEric Dumazet 	/* Note : dellink() is called from default_device_exit_batch(),
472d0e2c55eSEric Dumazet 	 * before a rcu_synchronize() point. The devices are guaranteed
473d0e2c55eSEric Dumazet 	 * not being freed before one RCU grace period.
474d0e2c55eSEric Dumazet 	 */
475d0e2c55eSEric Dumazet 	RCU_INIT_POINTER(priv->peer, NULL);
476f45a5c26SEric Dumazet 	unregister_netdevice_queue(dev, head);
477d0e2c55eSEric Dumazet 
478f45a5c26SEric Dumazet 	if (peer) {
479d0e2c55eSEric Dumazet 		priv = netdev_priv(peer);
480d0e2c55eSEric Dumazet 		RCU_INIT_POINTER(priv->peer, NULL);
48124540535SEric Dumazet 		unregister_netdevice_queue(peer, head);
482e314dbdcSPavel Emelyanov 	}
483f45a5c26SEric Dumazet }
484e314dbdcSPavel Emelyanov 
48523711438SThomas Graf static const struct nla_policy veth_policy[VETH_INFO_MAX + 1] = {
48623711438SThomas Graf 	[VETH_INFO_PEER]	= { .len = sizeof(struct ifinfomsg) },
48723711438SThomas Graf };
488e314dbdcSPavel Emelyanov 
489e5f4e7b9SNicolas Dichtel static struct net *veth_get_link_net(const struct net_device *dev)
490e5f4e7b9SNicolas Dichtel {
491e5f4e7b9SNicolas Dichtel 	struct veth_priv *priv = netdev_priv(dev);
492e5f4e7b9SNicolas Dichtel 	struct net_device *peer = rtnl_dereference(priv->peer);
493e5f4e7b9SNicolas Dichtel 
494e5f4e7b9SNicolas Dichtel 	return peer ? dev_net(peer) : dev_net(dev);
495e5f4e7b9SNicolas Dichtel }
496e5f4e7b9SNicolas Dichtel 
497e314dbdcSPavel Emelyanov static struct rtnl_link_ops veth_link_ops = {
498e314dbdcSPavel Emelyanov 	.kind		= DRV_NAME,
499e314dbdcSPavel Emelyanov 	.priv_size	= sizeof(struct veth_priv),
500e314dbdcSPavel Emelyanov 	.setup		= veth_setup,
501e314dbdcSPavel Emelyanov 	.validate	= veth_validate,
502e314dbdcSPavel Emelyanov 	.newlink	= veth_newlink,
503e314dbdcSPavel Emelyanov 	.dellink	= veth_dellink,
504e314dbdcSPavel Emelyanov 	.policy		= veth_policy,
505e314dbdcSPavel Emelyanov 	.maxtype	= VETH_INFO_MAX,
506e5f4e7b9SNicolas Dichtel 	.get_link_net	= veth_get_link_net,
507e314dbdcSPavel Emelyanov };
508e314dbdcSPavel Emelyanov 
509e314dbdcSPavel Emelyanov /*
510e314dbdcSPavel Emelyanov  * init/fini
511e314dbdcSPavel Emelyanov  */
512e314dbdcSPavel Emelyanov 
513e314dbdcSPavel Emelyanov static __init int veth_init(void)
514e314dbdcSPavel Emelyanov {
515e314dbdcSPavel Emelyanov 	return rtnl_link_register(&veth_link_ops);
516e314dbdcSPavel Emelyanov }
517e314dbdcSPavel Emelyanov 
518e314dbdcSPavel Emelyanov static __exit void veth_exit(void)
519e314dbdcSPavel Emelyanov {
52068365458SPatrick McHardy 	rtnl_link_unregister(&veth_link_ops);
521e314dbdcSPavel Emelyanov }
522e314dbdcSPavel Emelyanov 
523e314dbdcSPavel Emelyanov module_init(veth_init);
524e314dbdcSPavel Emelyanov module_exit(veth_exit);
525e314dbdcSPavel Emelyanov 
526e314dbdcSPavel Emelyanov MODULE_DESCRIPTION("Virtual Ethernet Tunnel");
527e314dbdcSPavel Emelyanov MODULE_LICENSE("GPL v2");
528e314dbdcSPavel Emelyanov MODULE_ALIAS_RTNL_LINK(DRV_NAME);
529