xref: /openbmc/linux/drivers/net/vrf.c (revision 278002edb19bce2c628fafb0af936e77000f3a5b)
12874c5fdSThomas Gleixner // SPDX-License-Identifier: GPL-2.0-or-later
2193125dbSDavid Ahern /*
3193125dbSDavid Ahern  * vrf.c: device driver to encapsulate a VRF space
4193125dbSDavid Ahern  *
5193125dbSDavid Ahern  * Copyright (c) 2015 Cumulus Networks. All rights reserved.
6193125dbSDavid Ahern  * Copyright (c) 2015 Shrijeet Mukherjee <shm@cumulusnetworks.com>
7193125dbSDavid Ahern  * Copyright (c) 2015 David Ahern <dsa@cumulusnetworks.com>
8193125dbSDavid Ahern  *
9193125dbSDavid Ahern  * Based on dummy, team and ipvlan drivers
10193125dbSDavid Ahern  */
11193125dbSDavid Ahern 
12cc69837fSJakub Kicinski #include <linux/ethtool.h>
13193125dbSDavid Ahern #include <linux/module.h>
14193125dbSDavid Ahern #include <linux/kernel.h>
15193125dbSDavid Ahern #include <linux/netdevice.h>
16193125dbSDavid Ahern #include <linux/etherdevice.h>
17193125dbSDavid Ahern #include <linux/ip.h>
18193125dbSDavid Ahern #include <linux/init.h>
19193125dbSDavid Ahern #include <linux/moduleparam.h>
20193125dbSDavid Ahern #include <linux/netfilter.h>
21193125dbSDavid Ahern #include <linux/rtnetlink.h>
22193125dbSDavid Ahern #include <net/rtnetlink.h>
23193125dbSDavid Ahern #include <linux/u64_stats_sync.h>
24193125dbSDavid Ahern #include <linux/hashtable.h>
25c8baec38SAndrea Mayer #include <linux/spinlock_types.h>
26193125dbSDavid Ahern 
27193125dbSDavid Ahern #include <linux/inetdevice.h>
288f58336dSDavid Ahern #include <net/arp.h>
29193125dbSDavid Ahern #include <net/ip.h>
30193125dbSDavid Ahern #include <net/ip_fib.h>
3135402e31SDavid Ahern #include <net/ip6_fib.h>
32193125dbSDavid Ahern #include <net/ip6_route.h>
33193125dbSDavid Ahern #include <net/route.h>
34193125dbSDavid Ahern #include <net/addrconf.h>
35ee15ee5dSDavid Ahern #include <net/l3mdev.h>
361aa6c4f6SDavid Ahern #include <net/fib_rules.h>
37b6459415SJakub Kicinski #include <net/sch_generic.h>
38097d3c95SDavid Ahern #include <net/netns/generic.h>
398c9c296aSFlorian Westphal #include <net/netfilter/nf_conntrack.h>
40193125dbSDavid Ahern 
41193125dbSDavid Ahern #define DRV_NAME	"vrf"
42c8baec38SAndrea Mayer #define DRV_VERSION	"1.1"
43193125dbSDavid Ahern 
441aa6c4f6SDavid Ahern #define FIB_RULE_PREF  1000       /* default preference for FIB rules */
45097d3c95SDavid Ahern 
46c8baec38SAndrea Mayer #define HT_MAP_BITS	4
47c8baec38SAndrea Mayer #define HASH_INITVAL	((u32)0xcafef00d)
48c8baec38SAndrea Mayer 
49c8baec38SAndrea Mayer struct  vrf_map {
50c8baec38SAndrea Mayer 	DECLARE_HASHTABLE(ht, HT_MAP_BITS);
51c8baec38SAndrea Mayer 	spinlock_t vmap_lock;
52c8baec38SAndrea Mayer 
53c8baec38SAndrea Mayer 	/* shared_tables:
54c8baec38SAndrea Mayer 	 * count how many distinct tables do not comply with the strict mode
55c8baec38SAndrea Mayer 	 * requirement.
56c8baec38SAndrea Mayer 	 * shared_tables value must be 0 in order to enable the strict mode.
57c8baec38SAndrea Mayer 	 *
58c8baec38SAndrea Mayer 	 * example of the evolution of shared_tables:
59c8baec38SAndrea Mayer 	 *                                                        | time
60c8baec38SAndrea Mayer 	 * add  vrf0 --> table 100        shared_tables = 0       | t0
61c8baec38SAndrea Mayer 	 * add  vrf1 --> table 101        shared_tables = 0       | t1
62c8baec38SAndrea Mayer 	 * add  vrf2 --> table 100        shared_tables = 1       | t2
63c8baec38SAndrea Mayer 	 * add  vrf3 --> table 100        shared_tables = 1       | t3
64c8baec38SAndrea Mayer 	 * add  vrf4 --> table 101        shared_tables = 2       v t4
65c8baec38SAndrea Mayer 	 *
66c8baec38SAndrea Mayer 	 * shared_tables is a "step function" (or "staircase function")
67c8baec38SAndrea Mayer 	 * and it is increased by one when the second vrf is associated to a
68c8baec38SAndrea Mayer 	 * table.
69c8baec38SAndrea Mayer 	 *
70c8baec38SAndrea Mayer 	 * at t2, vrf0 and vrf2 are bound to table 100: shared_tables = 1.
71c8baec38SAndrea Mayer 	 *
72c8baec38SAndrea Mayer 	 * at t3, another dev (vrf3) is bound to the same table 100 but the
73c8baec38SAndrea Mayer 	 * value of shared_tables is still 1.
74c8baec38SAndrea Mayer 	 * This means that no matter how many new vrfs will register on the
75c8baec38SAndrea Mayer 	 * table 100, the shared_tables will not increase (considering only
76c8baec38SAndrea Mayer 	 * table 100).
77c8baec38SAndrea Mayer 	 *
78c8baec38SAndrea Mayer 	 * at t4, vrf4 is bound to table 101, and shared_tables = 2.
79c8baec38SAndrea Mayer 	 *
80c8baec38SAndrea Mayer 	 * Looking at the value of shared_tables we can immediately know if
81c8baec38SAndrea Mayer 	 * the strict_mode can or cannot be enforced. Indeed, strict_mode
82c8baec38SAndrea Mayer 	 * can be enforced iff shared_tables = 0.
83c8baec38SAndrea Mayer 	 *
84c8baec38SAndrea Mayer 	 * Conversely, shared_tables is decreased when a vrf is de-associated
85c8baec38SAndrea Mayer 	 * from a table with exactly two associated vrfs.
86c8baec38SAndrea Mayer 	 */
87c8baec38SAndrea Mayer 	u32 shared_tables;
88c8baec38SAndrea Mayer 
89c8baec38SAndrea Mayer 	bool strict_mode;
90c8baec38SAndrea Mayer };
91c8baec38SAndrea Mayer 
92c8baec38SAndrea Mayer struct vrf_map_elem {
93c8baec38SAndrea Mayer 	struct hlist_node hnode;
94c8baec38SAndrea Mayer 	struct list_head vrf_list;  /* VRFs registered to this table */
95c8baec38SAndrea Mayer 
96c8baec38SAndrea Mayer 	u32 table_id;
97c8baec38SAndrea Mayer 	int users;
98c8baec38SAndrea Mayer 	int ifindex;
99c8baec38SAndrea Mayer };
100c8baec38SAndrea Mayer 
101097d3c95SDavid Ahern static unsigned int vrf_net_id;
1021aa6c4f6SDavid Ahern 
103c8baec38SAndrea Mayer /* per netns vrf data */
104c8baec38SAndrea Mayer struct netns_vrf {
105c8baec38SAndrea Mayer 	/* protected by rtnl lock */
106c8baec38SAndrea Mayer 	bool add_fib_rules;
107c8baec38SAndrea Mayer 
108c8baec38SAndrea Mayer 	struct vrf_map vmap;
10933306f1aSAndrea Mayer 	struct ctl_table_header	*ctl_hdr;
110c8baec38SAndrea Mayer };
111c8baec38SAndrea Mayer 
112ec539514SDavid Ahern struct net_vrf {
113b0e95ccdSDavid Ahern 	struct rtable __rcu	*rth;
114b0e95ccdSDavid Ahern 	struct rt6_info	__rcu	*rt6;
11543b059a3SDavid Ahern #if IS_ENABLED(CONFIG_IPV6)
11643b059a3SDavid Ahern 	struct fib6_table	*fib6_table;
11743b059a3SDavid Ahern #endif
118ec539514SDavid Ahern 	u32                     tb_id;
119c8baec38SAndrea Mayer 
120c8baec38SAndrea Mayer 	struct list_head	me_list;   /* entry in vrf_map_elem */
121c8baec38SAndrea Mayer 	int			ifindex;
122ec539514SDavid Ahern };
123ec539514SDavid Ahern 
vrf_rx_stats(struct net_device * dev,int len)124afe80a49SDavid Ahern static void vrf_rx_stats(struct net_device *dev, int len)
125afe80a49SDavid Ahern {
126afe80a49SDavid Ahern 	struct pcpu_dstats *dstats = this_cpu_ptr(dev->dstats);
127afe80a49SDavid Ahern 
128afe80a49SDavid Ahern 	u64_stats_update_begin(&dstats->syncp);
12995f068b0SDaniel Borkmann 	dstats->rx_packets++;
130afe80a49SDavid Ahern 	dstats->rx_bytes += len;
131afe80a49SDavid Ahern 	u64_stats_update_end(&dstats->syncp);
132afe80a49SDavid Ahern }
133afe80a49SDavid Ahern 
vrf_tx_error(struct net_device * vrf_dev,struct sk_buff * skb)13457b8efa1SNikolay Aleksandrov static void vrf_tx_error(struct net_device *vrf_dev, struct sk_buff *skb)
13557b8efa1SNikolay Aleksandrov {
13657b8efa1SNikolay Aleksandrov 	vrf_dev->stats.tx_errors++;
13757b8efa1SNikolay Aleksandrov 	kfree_skb(skb);
13857b8efa1SNikolay Aleksandrov }
13957b8efa1SNikolay Aleksandrov 
vrf_get_stats64(struct net_device * dev,struct rtnl_link_stats64 * stats)140bc1f4470Sstephen hemminger static void vrf_get_stats64(struct net_device *dev,
141193125dbSDavid Ahern 			    struct rtnl_link_stats64 *stats)
142193125dbSDavid Ahern {
143193125dbSDavid Ahern 	int i;
144193125dbSDavid Ahern 
145193125dbSDavid Ahern 	for_each_possible_cpu(i) {
146193125dbSDavid Ahern 		const struct pcpu_dstats *dstats;
147193125dbSDavid Ahern 		u64 tbytes, tpkts, tdrops, rbytes, rpkts;
148193125dbSDavid Ahern 		unsigned int start;
149193125dbSDavid Ahern 
150193125dbSDavid Ahern 		dstats = per_cpu_ptr(dev->dstats, i);
151193125dbSDavid Ahern 		do {
152068c38adSThomas Gleixner 			start = u64_stats_fetch_begin(&dstats->syncp);
153193125dbSDavid Ahern 			tbytes = dstats->tx_bytes;
15495f068b0SDaniel Borkmann 			tpkts = dstats->tx_packets;
15595f068b0SDaniel Borkmann 			tdrops = dstats->tx_drops;
156193125dbSDavid Ahern 			rbytes = dstats->rx_bytes;
15795f068b0SDaniel Borkmann 			rpkts = dstats->rx_packets;
158068c38adSThomas Gleixner 		} while (u64_stats_fetch_retry(&dstats->syncp, start));
159193125dbSDavid Ahern 		stats->tx_bytes += tbytes;
160193125dbSDavid Ahern 		stats->tx_packets += tpkts;
161193125dbSDavid Ahern 		stats->tx_dropped += tdrops;
162193125dbSDavid Ahern 		stats->rx_bytes += rbytes;
163193125dbSDavid Ahern 		stats->rx_packets += rpkts;
164193125dbSDavid Ahern 	}
165193125dbSDavid Ahern }
166193125dbSDavid Ahern 
netns_vrf_map(struct net * net)167c8baec38SAndrea Mayer static struct vrf_map *netns_vrf_map(struct net *net)
168c8baec38SAndrea Mayer {
169c8baec38SAndrea Mayer 	struct netns_vrf *nn_vrf = net_generic(net, vrf_net_id);
170c8baec38SAndrea Mayer 
171c8baec38SAndrea Mayer 	return &nn_vrf->vmap;
172c8baec38SAndrea Mayer }
173c8baec38SAndrea Mayer 
netns_vrf_map_by_dev(struct net_device * dev)174c8baec38SAndrea Mayer static struct vrf_map *netns_vrf_map_by_dev(struct net_device *dev)
175c8baec38SAndrea Mayer {
176c8baec38SAndrea Mayer 	return netns_vrf_map(dev_net(dev));
177c8baec38SAndrea Mayer }
178c8baec38SAndrea Mayer 
vrf_map_elem_get_vrf_ifindex(struct vrf_map_elem * me)179a59a8ffdSAndrea Mayer static int vrf_map_elem_get_vrf_ifindex(struct vrf_map_elem *me)
180a59a8ffdSAndrea Mayer {
181a59a8ffdSAndrea Mayer 	struct list_head *me_head = &me->vrf_list;
182a59a8ffdSAndrea Mayer 	struct net_vrf *vrf;
183a59a8ffdSAndrea Mayer 
184a59a8ffdSAndrea Mayer 	if (list_empty(me_head))
185a59a8ffdSAndrea Mayer 		return -ENODEV;
186a59a8ffdSAndrea Mayer 
187a59a8ffdSAndrea Mayer 	vrf = list_first_entry(me_head, struct net_vrf, me_list);
188a59a8ffdSAndrea Mayer 
189a59a8ffdSAndrea Mayer 	return vrf->ifindex;
190a59a8ffdSAndrea Mayer }
191a59a8ffdSAndrea Mayer 
vrf_map_elem_alloc(gfp_t flags)192c8baec38SAndrea Mayer static struct vrf_map_elem *vrf_map_elem_alloc(gfp_t flags)
193c8baec38SAndrea Mayer {
194c8baec38SAndrea Mayer 	struct vrf_map_elem *me;
195c8baec38SAndrea Mayer 
196c8baec38SAndrea Mayer 	me = kmalloc(sizeof(*me), flags);
197c8baec38SAndrea Mayer 	if (!me)
198c8baec38SAndrea Mayer 		return NULL;
199c8baec38SAndrea Mayer 
200c8baec38SAndrea Mayer 	return me;
201c8baec38SAndrea Mayer }
202c8baec38SAndrea Mayer 
vrf_map_elem_free(struct vrf_map_elem * me)203c8baec38SAndrea Mayer static void vrf_map_elem_free(struct vrf_map_elem *me)
204c8baec38SAndrea Mayer {
205c8baec38SAndrea Mayer 	kfree(me);
206c8baec38SAndrea Mayer }
207c8baec38SAndrea Mayer 
vrf_map_elem_init(struct vrf_map_elem * me,int table_id,int ifindex,int users)208c8baec38SAndrea Mayer static void vrf_map_elem_init(struct vrf_map_elem *me, int table_id,
209c8baec38SAndrea Mayer 			      int ifindex, int users)
210c8baec38SAndrea Mayer {
211c8baec38SAndrea Mayer 	me->table_id = table_id;
212c8baec38SAndrea Mayer 	me->ifindex = ifindex;
213c8baec38SAndrea Mayer 	me->users = users;
214c8baec38SAndrea Mayer 	INIT_LIST_HEAD(&me->vrf_list);
215c8baec38SAndrea Mayer }
216c8baec38SAndrea Mayer 
vrf_map_lookup_elem(struct vrf_map * vmap,u32 table_id)217c8baec38SAndrea Mayer static struct vrf_map_elem *vrf_map_lookup_elem(struct vrf_map *vmap,
218c8baec38SAndrea Mayer 						u32 table_id)
219c8baec38SAndrea Mayer {
220c8baec38SAndrea Mayer 	struct vrf_map_elem *me;
221c8baec38SAndrea Mayer 	u32 key;
222c8baec38SAndrea Mayer 
223c8baec38SAndrea Mayer 	key = jhash_1word(table_id, HASH_INITVAL);
224c8baec38SAndrea Mayer 	hash_for_each_possible(vmap->ht, me, hnode, key) {
225c8baec38SAndrea Mayer 		if (me->table_id == table_id)
226c8baec38SAndrea Mayer 			return me;
227c8baec38SAndrea Mayer 	}
228c8baec38SAndrea Mayer 
229c8baec38SAndrea Mayer 	return NULL;
230c8baec38SAndrea Mayer }
231c8baec38SAndrea Mayer 
vrf_map_add_elem(struct vrf_map * vmap,struct vrf_map_elem * me)232c8baec38SAndrea Mayer static void vrf_map_add_elem(struct vrf_map *vmap, struct vrf_map_elem *me)
233c8baec38SAndrea Mayer {
234c8baec38SAndrea Mayer 	u32 table_id = me->table_id;
235c8baec38SAndrea Mayer 	u32 key;
236c8baec38SAndrea Mayer 
237c8baec38SAndrea Mayer 	key = jhash_1word(table_id, HASH_INITVAL);
238c8baec38SAndrea Mayer 	hash_add(vmap->ht, &me->hnode, key);
239c8baec38SAndrea Mayer }
240c8baec38SAndrea Mayer 
vrf_map_del_elem(struct vrf_map_elem * me)241c8baec38SAndrea Mayer static void vrf_map_del_elem(struct vrf_map_elem *me)
242c8baec38SAndrea Mayer {
243c8baec38SAndrea Mayer 	hash_del(&me->hnode);
244c8baec38SAndrea Mayer }
245c8baec38SAndrea Mayer 
vrf_map_lock(struct vrf_map * vmap)246c8baec38SAndrea Mayer static void vrf_map_lock(struct vrf_map *vmap) __acquires(&vmap->vmap_lock)
247c8baec38SAndrea Mayer {
248c8baec38SAndrea Mayer 	spin_lock(&vmap->vmap_lock);
249c8baec38SAndrea Mayer }
250c8baec38SAndrea Mayer 
vrf_map_unlock(struct vrf_map * vmap)251c8baec38SAndrea Mayer static void vrf_map_unlock(struct vrf_map *vmap) __releases(&vmap->vmap_lock)
252c8baec38SAndrea Mayer {
253c8baec38SAndrea Mayer 	spin_unlock(&vmap->vmap_lock);
254c8baec38SAndrea Mayer }
255c8baec38SAndrea Mayer 
256c8baec38SAndrea Mayer /* called with rtnl lock held */
257c8baec38SAndrea Mayer static int
vrf_map_register_dev(struct net_device * dev,struct netlink_ext_ack * extack)258c8baec38SAndrea Mayer vrf_map_register_dev(struct net_device *dev, struct netlink_ext_ack *extack)
259c8baec38SAndrea Mayer {
260c8baec38SAndrea Mayer 	struct vrf_map *vmap = netns_vrf_map_by_dev(dev);
261c8baec38SAndrea Mayer 	struct net_vrf *vrf = netdev_priv(dev);
262c8baec38SAndrea Mayer 	struct vrf_map_elem *new_me, *me;
263c8baec38SAndrea Mayer 	u32 table_id = vrf->tb_id;
264c8baec38SAndrea Mayer 	bool free_new_me = false;
265c8baec38SAndrea Mayer 	int users;
266c8baec38SAndrea Mayer 	int res;
267c8baec38SAndrea Mayer 
268c8baec38SAndrea Mayer 	/* we pre-allocate elements used in the spin-locked section (so that we
269e9a0bf6dSZheng Yongjun 	 * keep the spinlock as short as possible).
270c8baec38SAndrea Mayer 	 */
271c8baec38SAndrea Mayer 	new_me = vrf_map_elem_alloc(GFP_KERNEL);
272c8baec38SAndrea Mayer 	if (!new_me)
273c8baec38SAndrea Mayer 		return -ENOMEM;
274c8baec38SAndrea Mayer 
275c8baec38SAndrea Mayer 	vrf_map_elem_init(new_me, table_id, dev->ifindex, 0);
276c8baec38SAndrea Mayer 
277c8baec38SAndrea Mayer 	vrf_map_lock(vmap);
278c8baec38SAndrea Mayer 
279c8baec38SAndrea Mayer 	me = vrf_map_lookup_elem(vmap, table_id);
280c8baec38SAndrea Mayer 	if (!me) {
281c8baec38SAndrea Mayer 		me = new_me;
282c8baec38SAndrea Mayer 		vrf_map_add_elem(vmap, me);
283c8baec38SAndrea Mayer 		goto link_vrf;
284c8baec38SAndrea Mayer 	}
285c8baec38SAndrea Mayer 
286c8baec38SAndrea Mayer 	/* we already have an entry in the vrf_map, so it means there is (at
287c8baec38SAndrea Mayer 	 * least) a vrf registered on the specific table.
288c8baec38SAndrea Mayer 	 */
289c8baec38SAndrea Mayer 	free_new_me = true;
290c8baec38SAndrea Mayer 	if (vmap->strict_mode) {
291c8baec38SAndrea Mayer 		/* vrfs cannot share the same table */
292c8baec38SAndrea Mayer 		NL_SET_ERR_MSG(extack, "Table is used by another VRF");
293c8baec38SAndrea Mayer 		res = -EBUSY;
294c8baec38SAndrea Mayer 		goto unlock;
295c8baec38SAndrea Mayer 	}
296c8baec38SAndrea Mayer 
297c8baec38SAndrea Mayer link_vrf:
298c8baec38SAndrea Mayer 	users = ++me->users;
299c8baec38SAndrea Mayer 	if (users == 2)
300c8baec38SAndrea Mayer 		++vmap->shared_tables;
301c8baec38SAndrea Mayer 
302c8baec38SAndrea Mayer 	list_add(&vrf->me_list, &me->vrf_list);
303c8baec38SAndrea Mayer 
304c8baec38SAndrea Mayer 	res = 0;
305c8baec38SAndrea Mayer 
306c8baec38SAndrea Mayer unlock:
307c8baec38SAndrea Mayer 	vrf_map_unlock(vmap);
308c8baec38SAndrea Mayer 
309c8baec38SAndrea Mayer 	/* clean-up, if needed */
310c8baec38SAndrea Mayer 	if (free_new_me)
311c8baec38SAndrea Mayer 		vrf_map_elem_free(new_me);
312c8baec38SAndrea Mayer 
313c8baec38SAndrea Mayer 	return res;
314c8baec38SAndrea Mayer }
315c8baec38SAndrea Mayer 
316c8baec38SAndrea Mayer /* called with rtnl lock held */
vrf_map_unregister_dev(struct net_device * dev)317c8baec38SAndrea Mayer static void vrf_map_unregister_dev(struct net_device *dev)
318c8baec38SAndrea Mayer {
319c8baec38SAndrea Mayer 	struct vrf_map *vmap = netns_vrf_map_by_dev(dev);
320c8baec38SAndrea Mayer 	struct net_vrf *vrf = netdev_priv(dev);
321c8baec38SAndrea Mayer 	u32 table_id = vrf->tb_id;
322c8baec38SAndrea Mayer 	struct vrf_map_elem *me;
323c8baec38SAndrea Mayer 	int users;
324c8baec38SAndrea Mayer 
325c8baec38SAndrea Mayer 	vrf_map_lock(vmap);
326c8baec38SAndrea Mayer 
327c8baec38SAndrea Mayer 	me = vrf_map_lookup_elem(vmap, table_id);
328c8baec38SAndrea Mayer 	if (!me)
329c8baec38SAndrea Mayer 		goto unlock;
330c8baec38SAndrea Mayer 
331c8baec38SAndrea Mayer 	list_del(&vrf->me_list);
332c8baec38SAndrea Mayer 
333c8baec38SAndrea Mayer 	users = --me->users;
334c8baec38SAndrea Mayer 	if (users == 1) {
335c8baec38SAndrea Mayer 		--vmap->shared_tables;
336c8baec38SAndrea Mayer 	} else if (users == 0) {
337c8baec38SAndrea Mayer 		vrf_map_del_elem(me);
338c8baec38SAndrea Mayer 
339c8baec38SAndrea Mayer 		/* no one will refer to this element anymore */
340c8baec38SAndrea Mayer 		vrf_map_elem_free(me);
341c8baec38SAndrea Mayer 	}
342c8baec38SAndrea Mayer 
343c8baec38SAndrea Mayer unlock:
344c8baec38SAndrea Mayer 	vrf_map_unlock(vmap);
345c8baec38SAndrea Mayer }
346c8baec38SAndrea Mayer 
347a59a8ffdSAndrea Mayer /* return the vrf device index associated with the table_id */
vrf_ifindex_lookup_by_table_id(struct net * net,u32 table_id)348a59a8ffdSAndrea Mayer static int vrf_ifindex_lookup_by_table_id(struct net *net, u32 table_id)
349a59a8ffdSAndrea Mayer {
350a59a8ffdSAndrea Mayer 	struct vrf_map *vmap = netns_vrf_map(net);
351a59a8ffdSAndrea Mayer 	struct vrf_map_elem *me;
352a59a8ffdSAndrea Mayer 	int ifindex;
353a59a8ffdSAndrea Mayer 
354a59a8ffdSAndrea Mayer 	vrf_map_lock(vmap);
355a59a8ffdSAndrea Mayer 
356a59a8ffdSAndrea Mayer 	if (!vmap->strict_mode) {
357a59a8ffdSAndrea Mayer 		ifindex = -EPERM;
358a59a8ffdSAndrea Mayer 		goto unlock;
359a59a8ffdSAndrea Mayer 	}
360a59a8ffdSAndrea Mayer 
361a59a8ffdSAndrea Mayer 	me = vrf_map_lookup_elem(vmap, table_id);
362a59a8ffdSAndrea Mayer 	if (!me) {
363a59a8ffdSAndrea Mayer 		ifindex = -ENODEV;
364a59a8ffdSAndrea Mayer 		goto unlock;
365a59a8ffdSAndrea Mayer 	}
366a59a8ffdSAndrea Mayer 
367a59a8ffdSAndrea Mayer 	ifindex = vrf_map_elem_get_vrf_ifindex(me);
368a59a8ffdSAndrea Mayer 
369a59a8ffdSAndrea Mayer unlock:
370a59a8ffdSAndrea Mayer 	vrf_map_unlock(vmap);
371a59a8ffdSAndrea Mayer 
372a59a8ffdSAndrea Mayer 	return ifindex;
373a59a8ffdSAndrea Mayer }
374a59a8ffdSAndrea Mayer 
375dcdd43c4SDavid Ahern /* by default VRF devices do not have a qdisc and are expected
376dcdd43c4SDavid Ahern  * to be created with only a single queue.
377dcdd43c4SDavid Ahern  */
qdisc_tx_is_default(const struct net_device * dev)378dcdd43c4SDavid Ahern static bool qdisc_tx_is_default(const struct net_device *dev)
379dcdd43c4SDavid Ahern {
380dcdd43c4SDavid Ahern 	struct netdev_queue *txq;
381dcdd43c4SDavid Ahern 	struct Qdisc *qdisc;
382dcdd43c4SDavid Ahern 
383dcdd43c4SDavid Ahern 	if (dev->num_tx_queues > 1)
384dcdd43c4SDavid Ahern 		return false;
385dcdd43c4SDavid Ahern 
386dcdd43c4SDavid Ahern 	txq = netdev_get_tx_queue(dev, 0);
387dcdd43c4SDavid Ahern 	qdisc = rcu_access_pointer(txq->qdisc);
388dcdd43c4SDavid Ahern 
389dcdd43c4SDavid Ahern 	return !qdisc->enqueue;
390dcdd43c4SDavid Ahern }
391dcdd43c4SDavid Ahern 
392afe80a49SDavid Ahern /* Local traffic destined to local address. Reinsert the packet to rx
393afe80a49SDavid Ahern  * path, similar to loopback handling.
394afe80a49SDavid Ahern  */
vrf_local_xmit(struct sk_buff * skb,struct net_device * dev,struct dst_entry * dst)395afe80a49SDavid Ahern static int vrf_local_xmit(struct sk_buff *skb, struct net_device *dev,
396afe80a49SDavid Ahern 			  struct dst_entry *dst)
397afe80a49SDavid Ahern {
398afe80a49SDavid Ahern 	int len = skb->len;
399afe80a49SDavid Ahern 
400afe80a49SDavid Ahern 	skb_orphan(skb);
401afe80a49SDavid Ahern 
402afe80a49SDavid Ahern 	skb_dst_set(skb, dst);
403afe80a49SDavid Ahern 
404afe80a49SDavid Ahern 	/* set pkt_type to avoid skb hitting packet taps twice -
405afe80a49SDavid Ahern 	 * once on Tx and again in Rx processing
406afe80a49SDavid Ahern 	 */
407afe80a49SDavid Ahern 	skb->pkt_type = PACKET_LOOPBACK;
408afe80a49SDavid Ahern 
409afe80a49SDavid Ahern 	skb->protocol = eth_type_trans(skb, dev);
410afe80a49SDavid Ahern 
411baebdf48SSebastian Andrzej Siewior 	if (likely(__netif_rx(skb) == NET_RX_SUCCESS))
412afe80a49SDavid Ahern 		vrf_rx_stats(dev, len);
413afe80a49SDavid Ahern 	else
41495f068b0SDaniel Borkmann 		this_cpu_inc(dev->dstats->rx_drops);
415afe80a49SDavid Ahern 
416afe80a49SDavid Ahern 	return NETDEV_TX_OK;
417afe80a49SDavid Ahern }
418afe80a49SDavid Ahern 
vrf_nf_set_untracked(struct sk_buff * skb)4198c9c296aSFlorian Westphal static void vrf_nf_set_untracked(struct sk_buff *skb)
4208c9c296aSFlorian Westphal {
4218c9c296aSFlorian Westphal 	if (skb_get_nfct(skb) == 0)
4228c9c296aSFlorian Westphal 		nf_ct_set(skb, NULL, IP_CT_UNTRACKED);
4238c9c296aSFlorian Westphal }
4248c9c296aSFlorian Westphal 
vrf_nf_reset_ct(struct sk_buff * skb)4258c9c296aSFlorian Westphal static void vrf_nf_reset_ct(struct sk_buff *skb)
4268c9c296aSFlorian Westphal {
4278c9c296aSFlorian Westphal 	if (skb_get_nfct(skb) == IP_CT_UNTRACKED)
4288c9c296aSFlorian Westphal 		nf_reset_ct(skb);
4298c9c296aSFlorian Westphal }
4308c9c296aSFlorian Westphal 
43135402e31SDavid Ahern #if IS_ENABLED(CONFIG_IPV6)
vrf_ip6_local_out(struct net * net,struct sock * sk,struct sk_buff * skb)4324c1feac5SDavid Ahern static int vrf_ip6_local_out(struct net *net, struct sock *sk,
4334c1feac5SDavid Ahern 			     struct sk_buff *skb)
4344c1feac5SDavid Ahern {
4354c1feac5SDavid Ahern 	int err;
4364c1feac5SDavid Ahern 
4378c9c296aSFlorian Westphal 	vrf_nf_reset_ct(skb);
4388c9c296aSFlorian Westphal 
4394c1feac5SDavid Ahern 	err = nf_hook(NFPROTO_IPV6, NF_INET_LOCAL_OUT, net,
4404c1feac5SDavid Ahern 		      sk, skb, NULL, skb_dst(skb)->dev, dst_output);
4414c1feac5SDavid Ahern 
4424c1feac5SDavid Ahern 	if (likely(err == 1))
4434c1feac5SDavid Ahern 		err = dst_output(net, sk, skb);
4444c1feac5SDavid Ahern 
4454c1feac5SDavid Ahern 	return err;
4464c1feac5SDavid Ahern }
4474c1feac5SDavid Ahern 
vrf_process_v6_outbound(struct sk_buff * skb,struct net_device * dev)44835402e31SDavid Ahern static netdev_tx_t vrf_process_v6_outbound(struct sk_buff *skb,
44935402e31SDavid Ahern 					   struct net_device *dev)
45035402e31SDavid Ahern {
451107e47ccSPeter Kosyh 	const struct ipv6hdr *iph;
45235402e31SDavid Ahern 	struct net *net = dev_net(skb->dev);
453107e47ccSPeter Kosyh 	struct flowi6 fl6;
45435402e31SDavid Ahern 	int ret = NET_XMIT_DROP;
45535402e31SDavid Ahern 	struct dst_entry *dst;
45635402e31SDavid Ahern 	struct dst_entry *dst_null = &net->ipv6.ip6_null_entry->dst;
45735402e31SDavid Ahern 
458107e47ccSPeter Kosyh 	if (!pskb_may_pull(skb, ETH_HLEN + sizeof(struct ipv6hdr)))
459107e47ccSPeter Kosyh 		goto err;
460107e47ccSPeter Kosyh 
461107e47ccSPeter Kosyh 	iph = ipv6_hdr(skb);
462107e47ccSPeter Kosyh 
463107e47ccSPeter Kosyh 	memset(&fl6, 0, sizeof(fl6));
464107e47ccSPeter Kosyh 	/* needed to match OIF rule */
46540867d74SDavid Ahern 	fl6.flowi6_l3mdev = dev->ifindex;
466107e47ccSPeter Kosyh 	fl6.flowi6_iif = LOOPBACK_IFINDEX;
467107e47ccSPeter Kosyh 	fl6.daddr = iph->daddr;
468107e47ccSPeter Kosyh 	fl6.saddr = iph->saddr;
469107e47ccSPeter Kosyh 	fl6.flowlabel = ip6_flowinfo(iph);
470107e47ccSPeter Kosyh 	fl6.flowi6_mark = skb->mark;
471107e47ccSPeter Kosyh 	fl6.flowi6_proto = iph->nexthdr;
472107e47ccSPeter Kosyh 
473a53c1028SDavid Ahern 	dst = ip6_dst_lookup_flow(net, NULL, &fl6, NULL);
474a53c1028SDavid Ahern 	if (IS_ERR(dst) || dst == dst_null)
47535402e31SDavid Ahern 		goto err;
47635402e31SDavid Ahern 
47735402e31SDavid Ahern 	skb_dst_drop(skb);
478b4869aa2SDavid Ahern 
4792e1534f3SNicolas Dichtel 	/* if dst.dev is the VRF device again this is locally originated traffic
4802e1534f3SNicolas Dichtel 	 * destined to a local address. Short circuit to Rx path.
481b4869aa2SDavid Ahern 	 */
4824f04256cSDavid Ahern 	if (dst->dev == dev)
4834f04256cSDavid Ahern 		return vrf_local_xmit(skb, dev, dst);
484b4869aa2SDavid Ahern 
48535402e31SDavid Ahern 	skb_dst_set(skb, dst);
48635402e31SDavid Ahern 
487911a66fbSDavid Ahern 	/* strip the ethernet header added for pass through VRF device */
488911a66fbSDavid Ahern 	__skb_pull(skb, skb_network_offset(skb));
489911a66fbSDavid Ahern 
490ee201011SStephen Suryaputra 	memset(IP6CB(skb), 0, sizeof(*IP6CB(skb)));
4914c1feac5SDavid Ahern 	ret = vrf_ip6_local_out(net, skb->sk, skb);
49235402e31SDavid Ahern 	if (unlikely(net_xmit_eval(ret)))
49335402e31SDavid Ahern 		dev->stats.tx_errors++;
49435402e31SDavid Ahern 	else
49535402e31SDavid Ahern 		ret = NET_XMIT_SUCCESS;
49635402e31SDavid Ahern 
49735402e31SDavid Ahern 	return ret;
49835402e31SDavid Ahern err:
49935402e31SDavid Ahern 	vrf_tx_error(dev, skb);
50035402e31SDavid Ahern 	return NET_XMIT_DROP;
50135402e31SDavid Ahern }
50235402e31SDavid Ahern #else
vrf_process_v6_outbound(struct sk_buff * skb,struct net_device * dev)503193125dbSDavid Ahern static netdev_tx_t vrf_process_v6_outbound(struct sk_buff *skb,
504193125dbSDavid Ahern 					   struct net_device *dev)
505193125dbSDavid Ahern {
50657b8efa1SNikolay Aleksandrov 	vrf_tx_error(dev, skb);
50757b8efa1SNikolay Aleksandrov 	return NET_XMIT_DROP;
508193125dbSDavid Ahern }
50935402e31SDavid Ahern #endif
510193125dbSDavid Ahern 
511ebfc102cSDavid Ahern /* based on ip_local_out; can't use it b/c the dst is switched pointing to us */
vrf_ip_local_out(struct net * net,struct sock * sk,struct sk_buff * skb)512ebfc102cSDavid Ahern static int vrf_ip_local_out(struct net *net, struct sock *sk,
513ebfc102cSDavid Ahern 			    struct sk_buff *skb)
514ebfc102cSDavid Ahern {
515ebfc102cSDavid Ahern 	int err;
516ebfc102cSDavid Ahern 
5178c9c296aSFlorian Westphal 	vrf_nf_reset_ct(skb);
5188c9c296aSFlorian Westphal 
519ebfc102cSDavid Ahern 	err = nf_hook(NFPROTO_IPV4, NF_INET_LOCAL_OUT, net, sk,
520ebfc102cSDavid Ahern 		      skb, NULL, skb_dst(skb)->dev, dst_output);
521ebfc102cSDavid Ahern 	if (likely(err == 1))
522ebfc102cSDavid Ahern 		err = dst_output(net, sk, skb);
523ebfc102cSDavid Ahern 
524ebfc102cSDavid Ahern 	return err;
525ebfc102cSDavid Ahern }
526ebfc102cSDavid Ahern 
vrf_process_v4_outbound(struct sk_buff * skb,struct net_device * vrf_dev)527193125dbSDavid Ahern static netdev_tx_t vrf_process_v4_outbound(struct sk_buff *skb,
528193125dbSDavid Ahern 					   struct net_device *vrf_dev)
529193125dbSDavid Ahern {
530107e47ccSPeter Kosyh 	struct iphdr *ip4h;
531193125dbSDavid Ahern 	int ret = NET_XMIT_DROP;
532107e47ccSPeter Kosyh 	struct flowi4 fl4;
533911a66fbSDavid Ahern 	struct net *net = dev_net(vrf_dev);
534911a66fbSDavid Ahern 	struct rtable *rt;
535193125dbSDavid Ahern 
536107e47ccSPeter Kosyh 	if (!pskb_may_pull(skb, ETH_HLEN + sizeof(struct iphdr)))
537107e47ccSPeter Kosyh 		goto err;
538107e47ccSPeter Kosyh 
539107e47ccSPeter Kosyh 	ip4h = ip_hdr(skb);
540107e47ccSPeter Kosyh 
541107e47ccSPeter Kosyh 	memset(&fl4, 0, sizeof(fl4));
542107e47ccSPeter Kosyh 	/* needed to match OIF rule */
54340867d74SDavid Ahern 	fl4.flowi4_l3mdev = vrf_dev->ifindex;
544107e47ccSPeter Kosyh 	fl4.flowi4_iif = LOOPBACK_IFINDEX;
545107e47ccSPeter Kosyh 	fl4.flowi4_tos = RT_TOS(ip4h->tos);
54640867d74SDavid Ahern 	fl4.flowi4_flags = FLOWI_FLAG_ANYSRC;
547107e47ccSPeter Kosyh 	fl4.flowi4_proto = ip4h->protocol;
548107e47ccSPeter Kosyh 	fl4.daddr = ip4h->daddr;
549107e47ccSPeter Kosyh 	fl4.saddr = ip4h->saddr;
550107e47ccSPeter Kosyh 
551911a66fbSDavid Ahern 	rt = ip_route_output_flow(net, &fl4, NULL);
552911a66fbSDavid Ahern 	if (IS_ERR(rt))
553193125dbSDavid Ahern 		goto err;
554193125dbSDavid Ahern 
555911a66fbSDavid Ahern 	skb_dst_drop(skb);
556afe80a49SDavid Ahern 
5572e1534f3SNicolas Dichtel 	/* if dst.dev is the VRF device again this is locally originated traffic
5582e1534f3SNicolas Dichtel 	 * destined to a local address. Short circuit to Rx path.
559afe80a49SDavid Ahern 	 */
5604f04256cSDavid Ahern 	if (rt->dst.dev == vrf_dev)
5614f04256cSDavid Ahern 		return vrf_local_xmit(skb, vrf_dev, &rt->dst);
562afe80a49SDavid Ahern 
563911a66fbSDavid Ahern 	skb_dst_set(skb, &rt->dst);
564911a66fbSDavid Ahern 
565911a66fbSDavid Ahern 	/* strip the ethernet header added for pass through VRF device */
566911a66fbSDavid Ahern 	__skb_pull(skb, skb_network_offset(skb));
567911a66fbSDavid Ahern 
568193125dbSDavid Ahern 	if (!ip4h->saddr) {
569193125dbSDavid Ahern 		ip4h->saddr = inet_select_addr(skb_dst(skb)->dev, 0,
570193125dbSDavid Ahern 					       RT_SCOPE_LINK);
571193125dbSDavid Ahern 	}
572193125dbSDavid Ahern 
573ee201011SStephen Suryaputra 	memset(IPCB(skb), 0, sizeof(*IPCB(skb)));
574ebfc102cSDavid Ahern 	ret = vrf_ip_local_out(dev_net(skb_dst(skb)->dev), skb->sk, skb);
575193125dbSDavid Ahern 	if (unlikely(net_xmit_eval(ret)))
576193125dbSDavid Ahern 		vrf_dev->stats.tx_errors++;
577193125dbSDavid Ahern 	else
578193125dbSDavid Ahern 		ret = NET_XMIT_SUCCESS;
579193125dbSDavid Ahern 
580193125dbSDavid Ahern out:
581193125dbSDavid Ahern 	return ret;
582193125dbSDavid Ahern err:
58357b8efa1SNikolay Aleksandrov 	vrf_tx_error(vrf_dev, skb);
584193125dbSDavid Ahern 	goto out;
585193125dbSDavid Ahern }
586193125dbSDavid Ahern 
is_ip_tx_frame(struct sk_buff * skb,struct net_device * dev)587193125dbSDavid Ahern static netdev_tx_t is_ip_tx_frame(struct sk_buff *skb, struct net_device *dev)
588193125dbSDavid Ahern {
589193125dbSDavid Ahern 	switch (skb->protocol) {
590193125dbSDavid Ahern 	case htons(ETH_P_IP):
591193125dbSDavid Ahern 		return vrf_process_v4_outbound(skb, dev);
592193125dbSDavid Ahern 	case htons(ETH_P_IPV6):
593193125dbSDavid Ahern 		return vrf_process_v6_outbound(skb, dev);
594193125dbSDavid Ahern 	default:
59557b8efa1SNikolay Aleksandrov 		vrf_tx_error(dev, skb);
596193125dbSDavid Ahern 		return NET_XMIT_DROP;
597193125dbSDavid Ahern 	}
598193125dbSDavid Ahern }
599193125dbSDavid Ahern 
vrf_xmit(struct sk_buff * skb,struct net_device * dev)600193125dbSDavid Ahern static netdev_tx_t vrf_xmit(struct sk_buff *skb, struct net_device *dev)
601193125dbSDavid Ahern {
602f7887d40SDavid Ahern 	int len = skb->len;
603193125dbSDavid Ahern 	netdev_tx_t ret = is_ip_tx_frame(skb, dev);
604193125dbSDavid Ahern 
605193125dbSDavid Ahern 	if (likely(ret == NET_XMIT_SUCCESS || ret == NET_XMIT_CN)) {
606193125dbSDavid Ahern 		struct pcpu_dstats *dstats = this_cpu_ptr(dev->dstats);
607193125dbSDavid Ahern 
608193125dbSDavid Ahern 		u64_stats_update_begin(&dstats->syncp);
60995f068b0SDaniel Borkmann 		dstats->tx_packets++;
610f7887d40SDavid Ahern 		dstats->tx_bytes += len;
611193125dbSDavid Ahern 		u64_stats_update_end(&dstats->syncp);
612193125dbSDavid Ahern 	} else {
61395f068b0SDaniel Borkmann 		this_cpu_inc(dev->dstats->tx_drops);
614193125dbSDavid Ahern 	}
615193125dbSDavid Ahern 
616193125dbSDavid Ahern 	return ret;
617193125dbSDavid Ahern }
618193125dbSDavid Ahern 
vrf_finish_direct(struct sk_buff * skb)6199e2b7fa2SMartin Willi static void vrf_finish_direct(struct sk_buff *skb)
620dcdd43c4SDavid Ahern {
621dcdd43c4SDavid Ahern 	struct net_device *vrf_dev = skb->dev;
622dcdd43c4SDavid Ahern 
623dcdd43c4SDavid Ahern 	if (!list_empty(&vrf_dev->ptype_all) &&
624dcdd43c4SDavid Ahern 	    likely(skb_headroom(skb) >= ETH_HLEN)) {
625d58ff351SJohannes Berg 		struct ethhdr *eth = skb_push(skb, ETH_HLEN);
626dcdd43c4SDavid Ahern 
627dcdd43c4SDavid Ahern 		ether_addr_copy(eth->h_source, vrf_dev->dev_addr);
628dcdd43c4SDavid Ahern 		eth_zero_addr(eth->h_dest);
629dcdd43c4SDavid Ahern 		eth->h_proto = skb->protocol;
630dcdd43c4SDavid Ahern 
631718a752bSWillem de Bruijn 		rcu_read_lock_bh();
632dcdd43c4SDavid Ahern 		dev_queue_xmit_nit(skb, vrf_dev);
633718a752bSWillem de Bruijn 		rcu_read_unlock_bh();
634dcdd43c4SDavid Ahern 
635dcdd43c4SDavid Ahern 		skb_pull(skb, ETH_HLEN);
636dcdd43c4SDavid Ahern 	}
637dcdd43c4SDavid Ahern 
6388c9c296aSFlorian Westphal 	vrf_nf_reset_ct(skb);
639dcdd43c4SDavid Ahern }
640dcdd43c4SDavid Ahern 
64135402e31SDavid Ahern #if IS_ENABLED(CONFIG_IPV6)
64235402e31SDavid Ahern /* modelled after ip6_finish_output2 */
vrf_finish_output6(struct net * net,struct sock * sk,struct sk_buff * skb)64335402e31SDavid Ahern static int vrf_finish_output6(struct net *net, struct sock *sk,
64435402e31SDavid Ahern 			      struct sk_buff *skb)
64535402e31SDavid Ahern {
64635402e31SDavid Ahern 	struct dst_entry *dst = skb_dst(skb);
64735402e31SDavid Ahern 	struct net_device *dev = dst->dev;
6489b1c1ef1SNicolas Dichtel 	const struct in6_addr *nexthop;
64935402e31SDavid Ahern 	struct neighbour *neigh;
65035402e31SDavid Ahern 	int ret;
65135402e31SDavid Ahern 
6528c9c296aSFlorian Westphal 	vrf_nf_reset_ct(skb);
653eb63ecc1SDavid Ahern 
65435402e31SDavid Ahern 	skb->protocol = htons(ETH_P_IPV6);
65535402e31SDavid Ahern 	skb->dev = dev;
65635402e31SDavid Ahern 
6572033ab90SIdo Schimmel 	rcu_read_lock();
658*797a4c1fSEric Dumazet 	nexthop = rt6_nexthop(dst_rt6_info(dst), &ipv6_hdr(skb)->daddr);
65935402e31SDavid Ahern 	neigh = __ipv6_neigh_lookup_noref(dst->dev, nexthop);
66035402e31SDavid Ahern 	if (unlikely(!neigh))
66135402e31SDavid Ahern 		neigh = __neigh_create(&nd_tbl, nexthop, dst->dev, false);
66235402e31SDavid Ahern 	if (!IS_ERR(neigh)) {
6634ff06203SJulian Anastasov 		sock_confirm_neigh(skb, neigh);
6640353f282SDavid Ahern 		ret = neigh_output(neigh, skb, false);
6652033ab90SIdo Schimmel 		rcu_read_unlock();
66635402e31SDavid Ahern 		return ret;
66735402e31SDavid Ahern 	}
6682033ab90SIdo Schimmel 	rcu_read_unlock();
66935402e31SDavid Ahern 
67035402e31SDavid Ahern 	IP6_INC_STATS(dev_net(dst->dev),
67135402e31SDavid Ahern 		      ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES);
67235402e31SDavid Ahern 	kfree_skb(skb);
67335402e31SDavid Ahern 	return -EINVAL;
67435402e31SDavid Ahern }
67535402e31SDavid Ahern 
67635402e31SDavid Ahern /* modelled after ip6_output */
vrf_output6(struct net * net,struct sock * sk,struct sk_buff * skb)67735402e31SDavid Ahern static int vrf_output6(struct net *net, struct sock *sk, struct sk_buff *skb)
67835402e31SDavid Ahern {
67935402e31SDavid Ahern 	return NF_HOOK_COND(NFPROTO_IPV6, NF_INET_POST_ROUTING,
68035402e31SDavid Ahern 			    net, sk, skb, NULL, skb_dst(skb)->dev,
68135402e31SDavid Ahern 			    vrf_finish_output6,
68235402e31SDavid Ahern 			    !(IP6CB(skb)->flags & IP6SKB_REROUTED));
68335402e31SDavid Ahern }
68435402e31SDavid Ahern 
6854c1feac5SDavid Ahern /* set dst on skb to send packet to us via dev_xmit path. Allows
6864c1feac5SDavid Ahern  * packet to go through device based features such as qdisc, netfilter
6874c1feac5SDavid Ahern  * hooks and packet sockets with skb->dev set to vrf device.
6884c1feac5SDavid Ahern  */
vrf_ip6_out_redirect(struct net_device * vrf_dev,struct sk_buff * skb)689a9ec54d1SDavid Ahern static struct sk_buff *vrf_ip6_out_redirect(struct net_device *vrf_dev,
6904c1feac5SDavid Ahern 					    struct sk_buff *skb)
6914c1feac5SDavid Ahern {
6924c1feac5SDavid Ahern 	struct net_vrf *vrf = netdev_priv(vrf_dev);
6934c1feac5SDavid Ahern 	struct dst_entry *dst = NULL;
6944c1feac5SDavid Ahern 	struct rt6_info *rt6;
6954c1feac5SDavid Ahern 
6964c1feac5SDavid Ahern 	rcu_read_lock();
6974c1feac5SDavid Ahern 
6984c1feac5SDavid Ahern 	rt6 = rcu_dereference(vrf->rt6);
6994c1feac5SDavid Ahern 	if (likely(rt6)) {
7004c1feac5SDavid Ahern 		dst = &rt6->dst;
7014c1feac5SDavid Ahern 		dst_hold(dst);
7024c1feac5SDavid Ahern 	}
7034c1feac5SDavid Ahern 
7044c1feac5SDavid Ahern 	rcu_read_unlock();
7054c1feac5SDavid Ahern 
7064c1feac5SDavid Ahern 	if (unlikely(!dst)) {
7074c1feac5SDavid Ahern 		vrf_tx_error(vrf_dev, skb);
7084c1feac5SDavid Ahern 		return NULL;
7094c1feac5SDavid Ahern 	}
7104c1feac5SDavid Ahern 
7114c1feac5SDavid Ahern 	skb_dst_drop(skb);
7124c1feac5SDavid Ahern 	skb_dst_set(skb, dst);
7134c1feac5SDavid Ahern 
7144c1feac5SDavid Ahern 	return skb;
7154c1feac5SDavid Ahern }
7164c1feac5SDavid Ahern 
vrf_output6_direct_finish(struct net * net,struct sock * sk,struct sk_buff * skb)7179e2b7fa2SMartin Willi static int vrf_output6_direct_finish(struct net *net, struct sock *sk,
7189e2b7fa2SMartin Willi 				     struct sk_buff *skb)
7199e2b7fa2SMartin Willi {
7209e2b7fa2SMartin Willi 	vrf_finish_direct(skb);
7219e2b7fa2SMartin Willi 
7229e2b7fa2SMartin Willi 	return vrf_ip6_local_out(net, sk, skb);
7239e2b7fa2SMartin Willi }
7249e2b7fa2SMartin Willi 
vrf_output6_direct(struct net * net,struct sock * sk,struct sk_buff * skb)725a9ec54d1SDavid Ahern static int vrf_output6_direct(struct net *net, struct sock *sk,
726a9ec54d1SDavid Ahern 			      struct sk_buff *skb)
727a9ec54d1SDavid Ahern {
7289e2b7fa2SMartin Willi 	int err = 1;
7299e2b7fa2SMartin Willi 
730a9ec54d1SDavid Ahern 	skb->protocol = htons(ETH_P_IPV6);
731a9ec54d1SDavid Ahern 
7329e2b7fa2SMartin Willi 	if (!(IPCB(skb)->flags & IPSKB_REROUTED))
7339e2b7fa2SMartin Willi 		err = nf_hook(NFPROTO_IPV6, NF_INET_POST_ROUTING, net, sk, skb,
7349e2b7fa2SMartin Willi 			      NULL, skb->dev, vrf_output6_direct_finish);
7359e2b7fa2SMartin Willi 
7369e2b7fa2SMartin Willi 	if (likely(err == 1))
7379e2b7fa2SMartin Willi 		vrf_finish_direct(skb);
7389e2b7fa2SMartin Willi 
7399e2b7fa2SMartin Willi 	return err;
7409e2b7fa2SMartin Willi }
7419e2b7fa2SMartin Willi 
vrf_ip6_out_direct_finish(struct net * net,struct sock * sk,struct sk_buff * skb)7429e2b7fa2SMartin Willi static int vrf_ip6_out_direct_finish(struct net *net, struct sock *sk,
7439e2b7fa2SMartin Willi 				     struct sk_buff *skb)
7449e2b7fa2SMartin Willi {
7459e2b7fa2SMartin Willi 	int err;
7469e2b7fa2SMartin Willi 
7479e2b7fa2SMartin Willi 	err = vrf_output6_direct(net, sk, skb);
7489e2b7fa2SMartin Willi 	if (likely(err == 1))
7499e2b7fa2SMartin Willi 		err = vrf_ip6_local_out(net, sk, skb);
7509e2b7fa2SMartin Willi 
7519e2b7fa2SMartin Willi 	return err;
752a9ec54d1SDavid Ahern }
753a9ec54d1SDavid Ahern 
vrf_ip6_out_direct(struct net_device * vrf_dev,struct sock * sk,struct sk_buff * skb)754a9ec54d1SDavid Ahern static struct sk_buff *vrf_ip6_out_direct(struct net_device *vrf_dev,
755a9ec54d1SDavid Ahern 					  struct sock *sk,
756a9ec54d1SDavid Ahern 					  struct sk_buff *skb)
757a9ec54d1SDavid Ahern {
758a9ec54d1SDavid Ahern 	struct net *net = dev_net(vrf_dev);
759a9ec54d1SDavid Ahern 	int err;
760a9ec54d1SDavid Ahern 
761a9ec54d1SDavid Ahern 	skb->dev = vrf_dev;
762a9ec54d1SDavid Ahern 
763a9ec54d1SDavid Ahern 	err = nf_hook(NFPROTO_IPV6, NF_INET_LOCAL_OUT, net, sk,
7649e2b7fa2SMartin Willi 		      skb, NULL, vrf_dev, vrf_ip6_out_direct_finish);
765a9ec54d1SDavid Ahern 
766a9ec54d1SDavid Ahern 	if (likely(err == 1))
767a9ec54d1SDavid Ahern 		err = vrf_output6_direct(net, sk, skb);
768a9ec54d1SDavid Ahern 
769a9ec54d1SDavid Ahern 	if (likely(err == 1))
770a9ec54d1SDavid Ahern 		return skb;
7719e2b7fa2SMartin Willi 
7729e2b7fa2SMartin Willi 	return NULL;
773a9ec54d1SDavid Ahern }
774a9ec54d1SDavid Ahern 
vrf_ip6_out(struct net_device * vrf_dev,struct sock * sk,struct sk_buff * skb)775a9ec54d1SDavid Ahern static struct sk_buff *vrf_ip6_out(struct net_device *vrf_dev,
776a9ec54d1SDavid Ahern 				   struct sock *sk,
777a9ec54d1SDavid Ahern 				   struct sk_buff *skb)
778a9ec54d1SDavid Ahern {
779a9ec54d1SDavid Ahern 	/* don't divert link scope packets */
780a9ec54d1SDavid Ahern 	if (rt6_need_strict(&ipv6_hdr(skb)->daddr))
781a9ec54d1SDavid Ahern 		return skb;
782a9ec54d1SDavid Ahern 
783d43b75fbSNicolas Dichtel 	vrf_nf_set_untracked(skb);
784d43b75fbSNicolas Dichtel 
78516b9db1cSDavid Ahern 	if (qdisc_tx_is_default(vrf_dev) ||
78616b9db1cSDavid Ahern 	    IP6CB(skb)->flags & IP6SKB_XFRM_TRANSFORMED)
787a9ec54d1SDavid Ahern 		return vrf_ip6_out_direct(vrf_dev, sk, skb);
788a9ec54d1SDavid Ahern 
789a9ec54d1SDavid Ahern 	return vrf_ip6_out_redirect(vrf_dev, skb);
790a9ec54d1SDavid Ahern }
791a9ec54d1SDavid Ahern 
792b0e95ccdSDavid Ahern /* holding rtnl */
vrf_rt6_release(struct net_device * dev,struct net_vrf * vrf)793810e530bSDavid Ahern static void vrf_rt6_release(struct net_device *dev, struct net_vrf *vrf)
79435402e31SDavid Ahern {
795b0e95ccdSDavid Ahern 	struct rt6_info *rt6 = rtnl_dereference(vrf->rt6);
796810e530bSDavid Ahern 	struct net *net = dev_net(dev);
797810e530bSDavid Ahern 	struct dst_entry *dst;
798b0e95ccdSDavid Ahern 
799b4869aa2SDavid Ahern 	RCU_INIT_POINTER(vrf->rt6, NULL);
800b4869aa2SDavid Ahern 	synchronize_rcu();
801b0e95ccdSDavid Ahern 
802810e530bSDavid Ahern 	/* move dev in dst's to loopback so this VRF device can be deleted
803810e530bSDavid Ahern 	 * - based on dst_ifdown
804810e530bSDavid Ahern 	 */
805810e530bSDavid Ahern 	if (rt6) {
806810e530bSDavid Ahern 		dst = &rt6->dst;
807d62607c3SJakub Kicinski 		netdev_ref_replace(dst->dev, net->loopback_dev,
808c0e5e11aSEric Dumazet 				   &dst->dev_tracker, GFP_KERNEL);
809810e530bSDavid Ahern 		dst->dev = net->loopback_dev;
810810e530bSDavid Ahern 		dst_release(dst);
811810e530bSDavid Ahern 	}
81235402e31SDavid Ahern }
81335402e31SDavid Ahern 
vrf_rt6_create(struct net_device * dev)81435402e31SDavid Ahern static int vrf_rt6_create(struct net_device *dev)
81535402e31SDavid Ahern {
816af13b3c3SDavid Laight 	int flags = DST_NOPOLICY | DST_NOXFRM;
81735402e31SDavid Ahern 	struct net_vrf *vrf = netdev_priv(dev);
8189ab179d8SDavid Ahern 	struct net *net = dev_net(dev);
8194f04256cSDavid Ahern 	struct rt6_info *rt6;
82035402e31SDavid Ahern 	int rc = -ENOMEM;
82135402e31SDavid Ahern 
822e4348637SDavid Ahern 	/* IPv6 can be CONFIG enabled and then disabled runtime */
823e4348637SDavid Ahern 	if (!ipv6_mod_enabled())
824e4348637SDavid Ahern 		return 0;
825e4348637SDavid Ahern 
82643b059a3SDavid Ahern 	vrf->fib6_table = fib6_new_table(net, vrf->tb_id);
82743b059a3SDavid Ahern 	if (!vrf->fib6_table)
828b3b4663cSDavid Ahern 		goto out;
829b3b4663cSDavid Ahern 
830b4869aa2SDavid Ahern 	/* create a dst for routing packets out a VRF device */
831b4869aa2SDavid Ahern 	rt6 = ip6_dst_alloc(net, dev, flags);
83235402e31SDavid Ahern 	if (!rt6)
83335402e31SDavid Ahern 		goto out;
83435402e31SDavid Ahern 
835b3b4663cSDavid Ahern 	rt6->dst.output	= vrf_output6;
836b4869aa2SDavid Ahern 
837b0e95ccdSDavid Ahern 	rcu_assign_pointer(vrf->rt6, rt6);
838b0e95ccdSDavid Ahern 
83935402e31SDavid Ahern 	rc = 0;
84035402e31SDavid Ahern out:
84135402e31SDavid Ahern 	return rc;
84235402e31SDavid Ahern }
84335402e31SDavid Ahern #else
vrf_ip6_out(struct net_device * vrf_dev,struct sock * sk,struct sk_buff * skb)8444c1feac5SDavid Ahern static struct sk_buff *vrf_ip6_out(struct net_device *vrf_dev,
8454c1feac5SDavid Ahern 				   struct sock *sk,
8464c1feac5SDavid Ahern 				   struct sk_buff *skb)
8474c1feac5SDavid Ahern {
8484c1feac5SDavid Ahern 	return skb;
8494c1feac5SDavid Ahern }
8504c1feac5SDavid Ahern 
vrf_rt6_release(struct net_device * dev,struct net_vrf * vrf)851810e530bSDavid Ahern static void vrf_rt6_release(struct net_device *dev, struct net_vrf *vrf)
85235402e31SDavid Ahern {
85335402e31SDavid Ahern }
85435402e31SDavid Ahern 
vrf_rt6_create(struct net_device * dev)85535402e31SDavid Ahern static int vrf_rt6_create(struct net_device *dev)
85635402e31SDavid Ahern {
85735402e31SDavid Ahern 	return 0;
85835402e31SDavid Ahern }
85935402e31SDavid Ahern #endif
86035402e31SDavid Ahern 
8618f58336dSDavid Ahern /* modelled after ip_finish_output2 */
vrf_finish_output(struct net * net,struct sock * sk,struct sk_buff * skb)8620c4b51f0SEric W. Biederman static int vrf_finish_output(struct net *net, struct sock *sk, struct sk_buff *skb)
863193125dbSDavid Ahern {
8648f58336dSDavid Ahern 	struct dst_entry *dst = skb_dst(skb);
8658f58336dSDavid Ahern 	struct rtable *rt = (struct rtable *)dst;
8668f58336dSDavid Ahern 	struct net_device *dev = dst->dev;
8678f58336dSDavid Ahern 	unsigned int hh_len = LL_RESERVED_SPACE(dev);
8688f58336dSDavid Ahern 	struct neighbour *neigh;
8695c9f7c1dSDavid Ahern 	bool is_v6gw = false;
8708f58336dSDavid Ahern 
8718c9c296aSFlorian Westphal 	vrf_nf_reset_ct(skb);
872eb63ecc1SDavid Ahern 
8738f58336dSDavid Ahern 	/* Be paranoid, rather than too clever. */
8748f58336dSDavid Ahern 	if (unlikely(skb_headroom(skb) < hh_len && dev->header_ops)) {
87514ee70caSVasily Averin 		skb = skb_expand_head(skb, hh_len);
87614ee70caSVasily Averin 		if (!skb) {
87706669e68SDan Carpenter 			dev->stats.tx_errors++;
87814ee70caSVasily Averin 			return -ENOMEM;
8798f58336dSDavid Ahern 		}
8808f58336dSDavid Ahern 	}
8818f58336dSDavid Ahern 
8822033ab90SIdo Schimmel 	rcu_read_lock();
8838f58336dSDavid Ahern 
8845c9f7c1dSDavid Ahern 	neigh = ip_neigh_for_gw(rt, skb, &is_v6gw);
8854ff06203SJulian Anastasov 	if (!IS_ERR(neigh)) {
88614ee70caSVasily Averin 		int ret;
88714ee70caSVasily Averin 
8884ff06203SJulian Anastasov 		sock_confirm_neigh(skb, neigh);
8895c9f7c1dSDavid Ahern 		/* if crossing protocols, can not use the cached header */
8905c9f7c1dSDavid Ahern 		ret = neigh_output(neigh, skb, is_v6gw);
8912033ab90SIdo Schimmel 		rcu_read_unlock();
89282dd0d2aSDavid Ahern 		return ret;
8934ff06203SJulian Anastasov 	}
8948f58336dSDavid Ahern 
8952033ab90SIdo Schimmel 	rcu_read_unlock();
8968f58336dSDavid Ahern 	vrf_tx_error(skb->dev, skb);
89714ee70caSVasily Averin 	return -EINVAL;
898193125dbSDavid Ahern }
899193125dbSDavid Ahern 
vrf_output(struct net * net,struct sock * sk,struct sk_buff * skb)900ede2059dSEric W. Biederman static int vrf_output(struct net *net, struct sock *sk, struct sk_buff *skb)
901193125dbSDavid Ahern {
902193125dbSDavid Ahern 	struct net_device *dev = skb_dst(skb)->dev;
903193125dbSDavid Ahern 
90429a26a56SEric W. Biederman 	IP_UPD_PO_STATS(net, IPSTATS_MIB_OUT, skb->len);
905193125dbSDavid Ahern 
906193125dbSDavid Ahern 	skb->dev = dev;
907193125dbSDavid Ahern 	skb->protocol = htons(ETH_P_IP);
908193125dbSDavid Ahern 
90929a26a56SEric W. Biederman 	return NF_HOOK_COND(NFPROTO_IPV4, NF_INET_POST_ROUTING,
91029a26a56SEric W. Biederman 			    net, sk, skb, NULL, dev,
9118f58336dSDavid Ahern 			    vrf_finish_output,
912193125dbSDavid Ahern 			    !(IPCB(skb)->flags & IPSKB_REROUTED));
913193125dbSDavid Ahern }
914193125dbSDavid Ahern 
915ebfc102cSDavid Ahern /* set dst on skb to send packet to us via dev_xmit path. Allows
916ebfc102cSDavid Ahern  * packet to go through device based features such as qdisc, netfilter
917ebfc102cSDavid Ahern  * hooks and packet sockets with skb->dev set to vrf device.
918ebfc102cSDavid Ahern  */
vrf_ip_out_redirect(struct net_device * vrf_dev,struct sk_buff * skb)919dcdd43c4SDavid Ahern static struct sk_buff *vrf_ip_out_redirect(struct net_device *vrf_dev,
920ebfc102cSDavid Ahern 					   struct sk_buff *skb)
921ebfc102cSDavid Ahern {
922ebfc102cSDavid Ahern 	struct net_vrf *vrf = netdev_priv(vrf_dev);
923ebfc102cSDavid Ahern 	struct dst_entry *dst = NULL;
924ebfc102cSDavid Ahern 	struct rtable *rth;
925ebfc102cSDavid Ahern 
926ebfc102cSDavid Ahern 	rcu_read_lock();
927ebfc102cSDavid Ahern 
928ebfc102cSDavid Ahern 	rth = rcu_dereference(vrf->rth);
929ebfc102cSDavid Ahern 	if (likely(rth)) {
930ebfc102cSDavid Ahern 		dst = &rth->dst;
931ebfc102cSDavid Ahern 		dst_hold(dst);
932ebfc102cSDavid Ahern 	}
933ebfc102cSDavid Ahern 
934ebfc102cSDavid Ahern 	rcu_read_unlock();
935ebfc102cSDavid Ahern 
936ebfc102cSDavid Ahern 	if (unlikely(!dst)) {
937ebfc102cSDavid Ahern 		vrf_tx_error(vrf_dev, skb);
938ebfc102cSDavid Ahern 		return NULL;
939ebfc102cSDavid Ahern 	}
940ebfc102cSDavid Ahern 
941ebfc102cSDavid Ahern 	skb_dst_drop(skb);
942ebfc102cSDavid Ahern 	skb_dst_set(skb, dst);
943ebfc102cSDavid Ahern 
944ebfc102cSDavid Ahern 	return skb;
945ebfc102cSDavid Ahern }
946ebfc102cSDavid Ahern 
vrf_output_direct_finish(struct net * net,struct sock * sk,struct sk_buff * skb)9479e2b7fa2SMartin Willi static int vrf_output_direct_finish(struct net *net, struct sock *sk,
9489e2b7fa2SMartin Willi 				    struct sk_buff *skb)
9499e2b7fa2SMartin Willi {
9509e2b7fa2SMartin Willi 	vrf_finish_direct(skb);
9519e2b7fa2SMartin Willi 
9529e2b7fa2SMartin Willi 	return vrf_ip_local_out(net, sk, skb);
9539e2b7fa2SMartin Willi }
9549e2b7fa2SMartin Willi 
vrf_output_direct(struct net * net,struct sock * sk,struct sk_buff * skb)955dcdd43c4SDavid Ahern static int vrf_output_direct(struct net *net, struct sock *sk,
956dcdd43c4SDavid Ahern 			     struct sk_buff *skb)
957dcdd43c4SDavid Ahern {
9589e2b7fa2SMartin Willi 	int err = 1;
9599e2b7fa2SMartin Willi 
960dcdd43c4SDavid Ahern 	skb->protocol = htons(ETH_P_IP);
961dcdd43c4SDavid Ahern 
9629e2b7fa2SMartin Willi 	if (!(IPCB(skb)->flags & IPSKB_REROUTED))
9639e2b7fa2SMartin Willi 		err = nf_hook(NFPROTO_IPV4, NF_INET_POST_ROUTING, net, sk, skb,
9649e2b7fa2SMartin Willi 			      NULL, skb->dev, vrf_output_direct_finish);
9659e2b7fa2SMartin Willi 
9669e2b7fa2SMartin Willi 	if (likely(err == 1))
9679e2b7fa2SMartin Willi 		vrf_finish_direct(skb);
9689e2b7fa2SMartin Willi 
9699e2b7fa2SMartin Willi 	return err;
9709e2b7fa2SMartin Willi }
9719e2b7fa2SMartin Willi 
vrf_ip_out_direct_finish(struct net * net,struct sock * sk,struct sk_buff * skb)9729e2b7fa2SMartin Willi static int vrf_ip_out_direct_finish(struct net *net, struct sock *sk,
9739e2b7fa2SMartin Willi 				    struct sk_buff *skb)
9749e2b7fa2SMartin Willi {
9759e2b7fa2SMartin Willi 	int err;
9769e2b7fa2SMartin Willi 
9779e2b7fa2SMartin Willi 	err = vrf_output_direct(net, sk, skb);
9789e2b7fa2SMartin Willi 	if (likely(err == 1))
9799e2b7fa2SMartin Willi 		err = vrf_ip_local_out(net, sk, skb);
9809e2b7fa2SMartin Willi 
9819e2b7fa2SMartin Willi 	return err;
982dcdd43c4SDavid Ahern }
983dcdd43c4SDavid Ahern 
vrf_ip_out_direct(struct net_device * vrf_dev,struct sock * sk,struct sk_buff * skb)984dcdd43c4SDavid Ahern static struct sk_buff *vrf_ip_out_direct(struct net_device *vrf_dev,
985dcdd43c4SDavid Ahern 					 struct sock *sk,
986dcdd43c4SDavid Ahern 					 struct sk_buff *skb)
987dcdd43c4SDavid Ahern {
988dcdd43c4SDavid Ahern 	struct net *net = dev_net(vrf_dev);
989dcdd43c4SDavid Ahern 	int err;
990dcdd43c4SDavid Ahern 
991dcdd43c4SDavid Ahern 	skb->dev = vrf_dev;
992dcdd43c4SDavid Ahern 
993dcdd43c4SDavid Ahern 	err = nf_hook(NFPROTO_IPV4, NF_INET_LOCAL_OUT, net, sk,
9949e2b7fa2SMartin Willi 		      skb, NULL, vrf_dev, vrf_ip_out_direct_finish);
995dcdd43c4SDavid Ahern 
996dcdd43c4SDavid Ahern 	if (likely(err == 1))
997dcdd43c4SDavid Ahern 		err = vrf_output_direct(net, sk, skb);
998dcdd43c4SDavid Ahern 
999dcdd43c4SDavid Ahern 	if (likely(err == 1))
1000dcdd43c4SDavid Ahern 		return skb;
10019e2b7fa2SMartin Willi 
10029e2b7fa2SMartin Willi 	return NULL;
1003dcdd43c4SDavid Ahern }
1004dcdd43c4SDavid Ahern 
vrf_ip_out(struct net_device * vrf_dev,struct sock * sk,struct sk_buff * skb)1005dcdd43c4SDavid Ahern static struct sk_buff *vrf_ip_out(struct net_device *vrf_dev,
1006dcdd43c4SDavid Ahern 				  struct sock *sk,
1007dcdd43c4SDavid Ahern 				  struct sk_buff *skb)
1008dcdd43c4SDavid Ahern {
10091e19c4d6SDavid Ahern 	/* don't divert multicast or local broadcast */
10101e19c4d6SDavid Ahern 	if (ipv4_is_multicast(ip_hdr(skb)->daddr) ||
10111e19c4d6SDavid Ahern 	    ipv4_is_lbcast(ip_hdr(skb)->daddr))
1012dcdd43c4SDavid Ahern 		return skb;
1013dcdd43c4SDavid Ahern 
1014d43b75fbSNicolas Dichtel 	vrf_nf_set_untracked(skb);
1015d43b75fbSNicolas Dichtel 
101616b9db1cSDavid Ahern 	if (qdisc_tx_is_default(vrf_dev) ||
101716b9db1cSDavid Ahern 	    IPCB(skb)->flags & IPSKB_XFRM_TRANSFORMED)
1018dcdd43c4SDavid Ahern 		return vrf_ip_out_direct(vrf_dev, sk, skb);
1019dcdd43c4SDavid Ahern 
1020dcdd43c4SDavid Ahern 	return vrf_ip_out_redirect(vrf_dev, skb);
1021dcdd43c4SDavid Ahern }
1022dcdd43c4SDavid Ahern 
1023ebfc102cSDavid Ahern /* called with rcu lock held */
vrf_l3_out(struct net_device * vrf_dev,struct sock * sk,struct sk_buff * skb,u16 proto)1024ebfc102cSDavid Ahern static struct sk_buff *vrf_l3_out(struct net_device *vrf_dev,
1025ebfc102cSDavid Ahern 				  struct sock *sk,
1026ebfc102cSDavid Ahern 				  struct sk_buff *skb,
1027ebfc102cSDavid Ahern 				  u16 proto)
1028ebfc102cSDavid Ahern {
1029ebfc102cSDavid Ahern 	switch (proto) {
1030ebfc102cSDavid Ahern 	case AF_INET:
1031ebfc102cSDavid Ahern 		return vrf_ip_out(vrf_dev, sk, skb);
10324c1feac5SDavid Ahern 	case AF_INET6:
10334c1feac5SDavid Ahern 		return vrf_ip6_out(vrf_dev, sk, skb);
1034ebfc102cSDavid Ahern 	}
1035ebfc102cSDavid Ahern 
1036ebfc102cSDavid Ahern 	return skb;
1037ebfc102cSDavid Ahern }
1038ebfc102cSDavid Ahern 
1039b0e95ccdSDavid Ahern /* holding rtnl */
vrf_rtable_release(struct net_device * dev,struct net_vrf * vrf)1040810e530bSDavid Ahern static void vrf_rtable_release(struct net_device *dev, struct net_vrf *vrf)
1041193125dbSDavid Ahern {
1042b0e95ccdSDavid Ahern 	struct rtable *rth = rtnl_dereference(vrf->rth);
1043810e530bSDavid Ahern 	struct net *net = dev_net(dev);
1044810e530bSDavid Ahern 	struct dst_entry *dst;
1045193125dbSDavid Ahern 
1046afe80a49SDavid Ahern 	RCU_INIT_POINTER(vrf->rth, NULL);
1047afe80a49SDavid Ahern 	synchronize_rcu();
1048b0e95ccdSDavid Ahern 
1049810e530bSDavid Ahern 	/* move dev in dst's to loopback so this VRF device can be deleted
1050810e530bSDavid Ahern 	 * - based on dst_ifdown
1051810e530bSDavid Ahern 	 */
1052810e530bSDavid Ahern 	if (rth) {
1053810e530bSDavid Ahern 		dst = &rth->dst;
1054d62607c3SJakub Kicinski 		netdev_ref_replace(dst->dev, net->loopback_dev,
1055c0e5e11aSEric Dumazet 				   &dst->dev_tracker, GFP_KERNEL);
1056810e530bSDavid Ahern 		dst->dev = net->loopback_dev;
1057810e530bSDavid Ahern 		dst_release(dst);
1058810e530bSDavid Ahern 	}
1059193125dbSDavid Ahern }
1060193125dbSDavid Ahern 
vrf_rtable_create(struct net_device * dev)1061b0e95ccdSDavid Ahern static int vrf_rtable_create(struct net_device *dev)
1062193125dbSDavid Ahern {
1063b7503e0cSDavid Ahern 	struct net_vrf *vrf = netdev_priv(dev);
10644f04256cSDavid Ahern 	struct rtable *rth;
1065193125dbSDavid Ahern 
1066b3b4663cSDavid Ahern 	if (!fib_new_table(dev_net(dev), vrf->tb_id))
1067b0e95ccdSDavid Ahern 		return -ENOMEM;
1068b3b4663cSDavid Ahern 
1069afe80a49SDavid Ahern 	/* create a dst for routing packets out through a VRF device */
1070b5c8b3feSEyal Birger 	rth = rt_dst_alloc(dev, 0, RTN_UNICAST, 1);
1071b0e95ccdSDavid Ahern 	if (!rth)
1072b0e95ccdSDavid Ahern 		return -ENOMEM;
1073b0e95ccdSDavid Ahern 
1074193125dbSDavid Ahern 	rth->dst.output	= vrf_output;
1075193125dbSDavid Ahern 
1076b0e95ccdSDavid Ahern 	rcu_assign_pointer(vrf->rth, rth);
1077b0e95ccdSDavid Ahern 
1078b0e95ccdSDavid Ahern 	return 0;
1079193125dbSDavid Ahern }
1080193125dbSDavid Ahern 
1081193125dbSDavid Ahern /**************************** device handling ********************/
1082193125dbSDavid Ahern 
1083193125dbSDavid Ahern /* cycle interface to flush neighbor cache and move routes across tables */
cycle_netdev(struct net_device * dev,struct netlink_ext_ack * extack)1084dc1aea1eSPetr Machata static void cycle_netdev(struct net_device *dev,
1085dc1aea1eSPetr Machata 			 struct netlink_ext_ack *extack)
1086193125dbSDavid Ahern {
1087193125dbSDavid Ahern 	unsigned int flags = dev->flags;
1088193125dbSDavid Ahern 	int ret;
1089193125dbSDavid Ahern 
1090193125dbSDavid Ahern 	if (!netif_running(dev))
1091193125dbSDavid Ahern 		return;
1092193125dbSDavid Ahern 
1093567c5e13SPetr Machata 	ret = dev_change_flags(dev, flags & ~IFF_UP, extack);
1094193125dbSDavid Ahern 	if (ret >= 0)
1095567c5e13SPetr Machata 		ret = dev_change_flags(dev, flags, extack);
1096193125dbSDavid Ahern 
1097193125dbSDavid Ahern 	if (ret < 0) {
1098193125dbSDavid Ahern 		netdev_err(dev,
1099193125dbSDavid Ahern 			   "Failed to cycle device %s; route tables might be wrong!\n",
1100193125dbSDavid Ahern 			   dev->name);
1101193125dbSDavid Ahern 	}
1102193125dbSDavid Ahern }
1103193125dbSDavid Ahern 
do_vrf_add_slave(struct net_device * dev,struct net_device * port_dev,struct netlink_ext_ack * extack)110442ab19eeSDavid Ahern static int do_vrf_add_slave(struct net_device *dev, struct net_device *port_dev,
110542ab19eeSDavid Ahern 			    struct netlink_ext_ack *extack)
1106193125dbSDavid Ahern {
1107bad53162SNikolay Aleksandrov 	int ret;
1108193125dbSDavid Ahern 
110926d31ac1SDavid Ahern 	/* do not allow loopback device to be enslaved to a VRF.
111026d31ac1SDavid Ahern 	 * The vrf device acts as the loopback for the vrf.
111126d31ac1SDavid Ahern 	 */
1112de3baa3eSDavid Ahern 	if (port_dev == dev_net(dev)->loopback_dev) {
1113de3baa3eSDavid Ahern 		NL_SET_ERR_MSG(extack,
1114de3baa3eSDavid Ahern 			       "Can not enslave loopback device to a VRF");
111526d31ac1SDavid Ahern 		return -EOPNOTSUPP;
1116de3baa3eSDavid Ahern 	}
111726d31ac1SDavid Ahern 
1118fdeea7beSIdo Schimmel 	port_dev->priv_flags |= IFF_L3MDEV_SLAVE;
111942ab19eeSDavid Ahern 	ret = netdev_master_upper_dev_link(port_dev, dev, NULL, NULL, extack);
1120193125dbSDavid Ahern 	if (ret < 0)
1121fdeea7beSIdo Schimmel 		goto err;
1122193125dbSDavid Ahern 
1123dc1aea1eSPetr Machata 	cycle_netdev(port_dev, extack);
1124193125dbSDavid Ahern 
1125193125dbSDavid Ahern 	return 0;
1126fdeea7beSIdo Schimmel 
1127fdeea7beSIdo Schimmel err:
1128fdeea7beSIdo Schimmel 	port_dev->priv_flags &= ~IFF_L3MDEV_SLAVE;
1129fdeea7beSIdo Schimmel 	return ret;
1130193125dbSDavid Ahern }
1131193125dbSDavid Ahern 
vrf_add_slave(struct net_device * dev,struct net_device * port_dev,struct netlink_ext_ack * extack)113233eaf2a6SDavid Ahern static int vrf_add_slave(struct net_device *dev, struct net_device *port_dev,
113333eaf2a6SDavid Ahern 			 struct netlink_ext_ack *extack)
1134193125dbSDavid Ahern {
1135de3baa3eSDavid Ahern 	if (netif_is_l3_master(port_dev)) {
1136de3baa3eSDavid Ahern 		NL_SET_ERR_MSG(extack,
1137de3baa3eSDavid Ahern 			       "Can not enslave an L3 master device to a VRF");
1138de3baa3eSDavid Ahern 		return -EINVAL;
1139de3baa3eSDavid Ahern 	}
1140de3baa3eSDavid Ahern 
1141de3baa3eSDavid Ahern 	if (netif_is_l3_slave(port_dev))
1142193125dbSDavid Ahern 		return -EINVAL;
1143193125dbSDavid Ahern 
114442ab19eeSDavid Ahern 	return do_vrf_add_slave(dev, port_dev, extack);
1145193125dbSDavid Ahern }
1146193125dbSDavid Ahern 
1147193125dbSDavid Ahern /* inverse of do_vrf_add_slave */
do_vrf_del_slave(struct net_device * dev,struct net_device * port_dev)1148193125dbSDavid Ahern static int do_vrf_del_slave(struct net_device *dev, struct net_device *port_dev)
1149193125dbSDavid Ahern {
1150193125dbSDavid Ahern 	netdev_upper_dev_unlink(port_dev, dev);
1151fee6d4c7SDavid Ahern 	port_dev->priv_flags &= ~IFF_L3MDEV_SLAVE;
1152193125dbSDavid Ahern 
1153dc1aea1eSPetr Machata 	cycle_netdev(port_dev, NULL);
1154193125dbSDavid Ahern 
1155193125dbSDavid Ahern 	return 0;
1156193125dbSDavid Ahern }
1157193125dbSDavid Ahern 
vrf_del_slave(struct net_device * dev,struct net_device * port_dev)1158193125dbSDavid Ahern static int vrf_del_slave(struct net_device *dev, struct net_device *port_dev)
1159193125dbSDavid Ahern {
1160193125dbSDavid Ahern 	return do_vrf_del_slave(dev, port_dev);
1161193125dbSDavid Ahern }
1162193125dbSDavid Ahern 
vrf_dev_uninit(struct net_device * dev)1163193125dbSDavid Ahern static void vrf_dev_uninit(struct net_device *dev)
1164193125dbSDavid Ahern {
1165193125dbSDavid Ahern 	struct net_vrf *vrf = netdev_priv(dev);
1166193125dbSDavid Ahern 
1167810e530bSDavid Ahern 	vrf_rtable_release(dev, vrf);
1168810e530bSDavid Ahern 	vrf_rt6_release(dev, vrf);
1169193125dbSDavid Ahern }
1170193125dbSDavid Ahern 
vrf_dev_init(struct net_device * dev)1171193125dbSDavid Ahern static int vrf_dev_init(struct net_device *dev)
1172193125dbSDavid Ahern {
1173193125dbSDavid Ahern 	struct net_vrf *vrf = netdev_priv(dev);
1174193125dbSDavid Ahern 
1175193125dbSDavid Ahern 	/* create the default dst which points back to us */
1176b0e95ccdSDavid Ahern 	if (vrf_rtable_create(dev) != 0)
11776ae7b3fcSDaniel Borkmann 		goto out_nomem;
1178193125dbSDavid Ahern 
117935402e31SDavid Ahern 	if (vrf_rt6_create(dev) != 0)
118035402e31SDavid Ahern 		goto out_rth;
118135402e31SDavid Ahern 
1182193125dbSDavid Ahern 	dev->flags = IFF_MASTER | IFF_NOARP;
1183193125dbSDavid Ahern 
1184b87ab6b8SDavid Ahern 	/* similarly, oper state is irrelevant; set to up to avoid confusion */
1185b87ab6b8SDavid Ahern 	dev->operstate = IF_OPER_UP;
11861a33e10eSCong Wang 	netdev_lockdep_set_classes(dev);
1187193125dbSDavid Ahern 	return 0;
1188193125dbSDavid Ahern 
118935402e31SDavid Ahern out_rth:
1190810e530bSDavid Ahern 	vrf_rtable_release(dev, vrf);
1191193125dbSDavid Ahern out_nomem:
1192193125dbSDavid Ahern 	return -ENOMEM;
1193193125dbSDavid Ahern }
1194193125dbSDavid Ahern 
1195193125dbSDavid Ahern static const struct net_device_ops vrf_netdev_ops = {
1196193125dbSDavid Ahern 	.ndo_init		= vrf_dev_init,
1197193125dbSDavid Ahern 	.ndo_uninit		= vrf_dev_uninit,
1198193125dbSDavid Ahern 	.ndo_start_xmit		= vrf_xmit,
11996819e3f6SMiaohe Lin 	.ndo_set_mac_address	= eth_mac_addr,
1200193125dbSDavid Ahern 	.ndo_get_stats64	= vrf_get_stats64,
1201193125dbSDavid Ahern 	.ndo_add_slave		= vrf_add_slave,
1202193125dbSDavid Ahern 	.ndo_del_slave		= vrf_del_slave,
1203193125dbSDavid Ahern };
1204193125dbSDavid Ahern 
vrf_fib_table(const struct net_device * dev)1205ee15ee5dSDavid Ahern static u32 vrf_fib_table(const struct net_device *dev)
1206ee15ee5dSDavid Ahern {
1207ee15ee5dSDavid Ahern 	struct net_vrf *vrf = netdev_priv(dev);
1208ee15ee5dSDavid Ahern 
1209ee15ee5dSDavid Ahern 	return vrf->tb_id;
1210ee15ee5dSDavid Ahern }
1211ee15ee5dSDavid Ahern 
vrf_rcv_finish(struct net * net,struct sock * sk,struct sk_buff * skb)121273e20b76SDavid Ahern static int vrf_rcv_finish(struct net *net, struct sock *sk, struct sk_buff *skb)
121373e20b76SDavid Ahern {
12141a4a5bf5SGao Feng 	kfree_skb(skb);
121573e20b76SDavid Ahern 	return 0;
121673e20b76SDavid Ahern }
121773e20b76SDavid Ahern 
vrf_rcv_nfhook(u8 pf,unsigned int hook,struct sk_buff * skb,struct net_device * dev)121873e20b76SDavid Ahern static struct sk_buff *vrf_rcv_nfhook(u8 pf, unsigned int hook,
121973e20b76SDavid Ahern 				      struct sk_buff *skb,
122073e20b76SDavid Ahern 				      struct net_device *dev)
122173e20b76SDavid Ahern {
122273e20b76SDavid Ahern 	struct net *net = dev_net(dev);
122373e20b76SDavid Ahern 
12241a4a5bf5SGao Feng 	if (nf_hook(pf, hook, net, NULL, skb, dev, NULL, vrf_rcv_finish) != 1)
122573e20b76SDavid Ahern 		skb = NULL;    /* kfree_skb(skb) handled by nf code */
122673e20b76SDavid Ahern 
122773e20b76SDavid Ahern 	return skb;
122873e20b76SDavid Ahern }
122973e20b76SDavid Ahern 
vrf_prepare_mac_header(struct sk_buff * skb,struct net_device * vrf_dev,u16 proto)12309125abe7SAndrea Mayer static int vrf_prepare_mac_header(struct sk_buff *skb,
12319125abe7SAndrea Mayer 				  struct net_device *vrf_dev, u16 proto)
12329125abe7SAndrea Mayer {
12339125abe7SAndrea Mayer 	struct ethhdr *eth;
12349125abe7SAndrea Mayer 	int err;
12359125abe7SAndrea Mayer 
12369125abe7SAndrea Mayer 	/* in general, we do not know if there is enough space in the head of
12379125abe7SAndrea Mayer 	 * the packet for hosting the mac header.
12389125abe7SAndrea Mayer 	 */
12399125abe7SAndrea Mayer 	err = skb_cow_head(skb, LL_RESERVED_SPACE(vrf_dev));
12409125abe7SAndrea Mayer 	if (unlikely(err))
12419125abe7SAndrea Mayer 		/* no space in the skb head */
12429125abe7SAndrea Mayer 		return -ENOBUFS;
12439125abe7SAndrea Mayer 
12449125abe7SAndrea Mayer 	__skb_push(skb, ETH_HLEN);
12459125abe7SAndrea Mayer 	eth = (struct ethhdr *)skb->data;
12469125abe7SAndrea Mayer 
12479125abe7SAndrea Mayer 	skb_reset_mac_header(skb);
1248012d69fbSEyal Birger 	skb_reset_mac_len(skb);
12499125abe7SAndrea Mayer 
12509125abe7SAndrea Mayer 	/* we set the ethernet destination and the source addresses to the
12519125abe7SAndrea Mayer 	 * address of the VRF device.
12529125abe7SAndrea Mayer 	 */
12539125abe7SAndrea Mayer 	ether_addr_copy(eth->h_dest, vrf_dev->dev_addr);
12549125abe7SAndrea Mayer 	ether_addr_copy(eth->h_source, vrf_dev->dev_addr);
12559125abe7SAndrea Mayer 	eth->h_proto = htons(proto);
12569125abe7SAndrea Mayer 
12579125abe7SAndrea Mayer 	/* the destination address of the Ethernet frame corresponds to the
12589125abe7SAndrea Mayer 	 * address set on the VRF interface; therefore, the packet is intended
12599125abe7SAndrea Mayer 	 * to be processed locally.
12609125abe7SAndrea Mayer 	 */
12619125abe7SAndrea Mayer 	skb->protocol = eth->h_proto;
12629125abe7SAndrea Mayer 	skb->pkt_type = PACKET_HOST;
12639125abe7SAndrea Mayer 
12649125abe7SAndrea Mayer 	skb_postpush_rcsum(skb, skb->data, ETH_HLEN);
12659125abe7SAndrea Mayer 
12669125abe7SAndrea Mayer 	skb_pull_inline(skb, ETH_HLEN);
12679125abe7SAndrea Mayer 
12689125abe7SAndrea Mayer 	return 0;
12699125abe7SAndrea Mayer }
12709125abe7SAndrea Mayer 
12719125abe7SAndrea Mayer /* prepare and add the mac header to the packet if it was not set previously.
12729125abe7SAndrea Mayer  * In this way, packet sniffers such as tcpdump can parse the packet correctly.
12739125abe7SAndrea Mayer  * If the mac header was already set, the original mac header is left
12749125abe7SAndrea Mayer  * untouched and the function returns immediately.
12759125abe7SAndrea Mayer  */
vrf_add_mac_header_if_unset(struct sk_buff * skb,struct net_device * vrf_dev,u16 proto,struct net_device * orig_dev)12769125abe7SAndrea Mayer static int vrf_add_mac_header_if_unset(struct sk_buff *skb,
12779125abe7SAndrea Mayer 				       struct net_device *vrf_dev,
1278012d69fbSEyal Birger 				       u16 proto, struct net_device *orig_dev)
12799125abe7SAndrea Mayer {
1280012d69fbSEyal Birger 	if (skb_mac_header_was_set(skb) && dev_has_header(orig_dev))
12819125abe7SAndrea Mayer 		return 0;
12829125abe7SAndrea Mayer 
12839125abe7SAndrea Mayer 	return vrf_prepare_mac_header(skb, vrf_dev, proto);
12849125abe7SAndrea Mayer }
12859125abe7SAndrea Mayer 
128635402e31SDavid Ahern #if IS_ENABLED(CONFIG_IPV6)
128774b20582SDavid Ahern /* neighbor handling is done with actual device; do not want
128874b20582SDavid Ahern  * to flip skb->dev for those ndisc packets. This really fails
128974b20582SDavid Ahern  * for multiple next protocols (e.g., NEXTHDR_HOP). But it is
129074b20582SDavid Ahern  * a start.
129174b20582SDavid Ahern  */
ipv6_ndisc_frame(const struct sk_buff * skb)129274b20582SDavid Ahern static bool ipv6_ndisc_frame(const struct sk_buff *skb)
129374b20582SDavid Ahern {
129474b20582SDavid Ahern 	const struct ipv6hdr *iph = ipv6_hdr(skb);
129574b20582SDavid Ahern 	bool rc = false;
129674b20582SDavid Ahern 
129774b20582SDavid Ahern 	if (iph->nexthdr == NEXTHDR_ICMP) {
129874b20582SDavid Ahern 		const struct icmp6hdr *icmph;
129974b20582SDavid Ahern 		struct icmp6hdr _icmph;
130074b20582SDavid Ahern 
130174b20582SDavid Ahern 		icmph = skb_header_pointer(skb, sizeof(*iph),
130274b20582SDavid Ahern 					   sizeof(_icmph), &_icmph);
130374b20582SDavid Ahern 		if (!icmph)
130474b20582SDavid Ahern 			goto out;
130574b20582SDavid Ahern 
130674b20582SDavid Ahern 		switch (icmph->icmp6_type) {
130774b20582SDavid Ahern 		case NDISC_ROUTER_SOLICITATION:
130874b20582SDavid Ahern 		case NDISC_ROUTER_ADVERTISEMENT:
130974b20582SDavid Ahern 		case NDISC_NEIGHBOUR_SOLICITATION:
131074b20582SDavid Ahern 		case NDISC_NEIGHBOUR_ADVERTISEMENT:
131174b20582SDavid Ahern 		case NDISC_REDIRECT:
131274b20582SDavid Ahern 			rc = true;
131374b20582SDavid Ahern 			break;
131474b20582SDavid Ahern 		}
131574b20582SDavid Ahern 	}
131674b20582SDavid Ahern 
131774b20582SDavid Ahern out:
131874b20582SDavid Ahern 	return rc;
131974b20582SDavid Ahern }
132074b20582SDavid Ahern 
vrf_ip6_route_lookup(struct net * net,const struct net_device * dev,struct flowi6 * fl6,int ifindex,const struct sk_buff * skb,int flags)13219ff74384SDavid Ahern static struct rt6_info *vrf_ip6_route_lookup(struct net *net,
13229ff74384SDavid Ahern 					     const struct net_device *dev,
13239ff74384SDavid Ahern 					     struct flowi6 *fl6,
13249ff74384SDavid Ahern 					     int ifindex,
1325b75cc8f9SDavid Ahern 					     const struct sk_buff *skb,
13269ff74384SDavid Ahern 					     int flags)
13279ff74384SDavid Ahern {
13289ff74384SDavid Ahern 	struct net_vrf *vrf = netdev_priv(dev);
13299ff74384SDavid Ahern 
133043b059a3SDavid Ahern 	return ip6_pol_route(net, vrf->fib6_table, ifindex, fl6, skb, flags);
13319ff74384SDavid Ahern }
13329ff74384SDavid Ahern 
vrf_ip6_input_dst(struct sk_buff * skb,struct net_device * vrf_dev,int ifindex)13339ff74384SDavid Ahern static void vrf_ip6_input_dst(struct sk_buff *skb, struct net_device *vrf_dev,
13349ff74384SDavid Ahern 			      int ifindex)
13359ff74384SDavid Ahern {
13369ff74384SDavid Ahern 	const struct ipv6hdr *iph = ipv6_hdr(skb);
13379ff74384SDavid Ahern 	struct flowi6 fl6 = {
1338ecf09117SArnd Bergmann 		.flowi6_iif     = ifindex,
1339ecf09117SArnd Bergmann 		.flowi6_mark    = skb->mark,
1340ecf09117SArnd Bergmann 		.flowi6_proto   = iph->nexthdr,
13419ff74384SDavid Ahern 		.daddr          = iph->daddr,
13429ff74384SDavid Ahern 		.saddr          = iph->saddr,
13439ff74384SDavid Ahern 		.flowlabel      = ip6_flowinfo(iph),
13449ff74384SDavid Ahern 	};
13459ff74384SDavid Ahern 	struct net *net = dev_net(vrf_dev);
13469ff74384SDavid Ahern 	struct rt6_info *rt6;
13479ff74384SDavid Ahern 
1348b75cc8f9SDavid Ahern 	rt6 = vrf_ip6_route_lookup(net, vrf_dev, &fl6, ifindex, skb,
13499ff74384SDavid Ahern 				   RT6_LOOKUP_F_HAS_SADDR | RT6_LOOKUP_F_IFACE);
13509ff74384SDavid Ahern 	if (unlikely(!rt6))
13519ff74384SDavid Ahern 		return;
13529ff74384SDavid Ahern 
13539ff74384SDavid Ahern 	if (unlikely(&rt6->dst == &net->ipv6.ip6_null_entry->dst))
13549ff74384SDavid Ahern 		return;
13559ff74384SDavid Ahern 
13569ff74384SDavid Ahern 	skb_dst_set(skb, &rt6->dst);
13579ff74384SDavid Ahern }
13589ff74384SDavid Ahern 
vrf_ip6_rcv(struct net_device * vrf_dev,struct sk_buff * skb)135974b20582SDavid Ahern static struct sk_buff *vrf_ip6_rcv(struct net_device *vrf_dev,
136074b20582SDavid Ahern 				   struct sk_buff *skb)
136174b20582SDavid Ahern {
13629ff74384SDavid Ahern 	int orig_iif = skb->skb_iif;
13636f12fa77SMike Manning 	bool need_strict = rt6_need_strict(&ipv6_hdr(skb)->daddr);
13646f12fa77SMike Manning 	bool is_ndisc = ipv6_ndisc_frame(skb);
13659ff74384SDavid Ahern 
13666f12fa77SMike Manning 	/* loopback, multicast & non-ND link-local traffic; do not push through
1367205704c6SStephen Suryaputra 	 * packet taps again. Reset pkt_type for upper layers to process skb.
1368f2575c8fSAntoine Tenart 	 * For non-loopback strict packets, determine the dst using the original
1369f2575c8fSAntoine Tenart 	 * ifindex.
1370b4869aa2SDavid Ahern 	 */
1371603113c5SAntoine Tenart 	if (skb->pkt_type == PACKET_LOOPBACK || (need_strict && !is_ndisc)) {
1372b4869aa2SDavid Ahern 		skb->dev = vrf_dev;
1373b4869aa2SDavid Ahern 		skb->skb_iif = vrf_dev->ifindex;
1374a04a480dSDavid Ahern 		IP6CB(skb)->flags |= IP6SKB_L3SLAVE;
1375603113c5SAntoine Tenart 
13766f12fa77SMike Manning 		if (skb->pkt_type == PACKET_LOOPBACK)
1377b4869aa2SDavid Ahern 			skb->pkt_type = PACKET_HOST;
1378f2575c8fSAntoine Tenart 		else
1379603113c5SAntoine Tenart 			vrf_ip6_input_dst(skb, vrf_dev, orig_iif);
1380603113c5SAntoine Tenart 
1381b4869aa2SDavid Ahern 		goto out;
1382b4869aa2SDavid Ahern 	}
1383b4869aa2SDavid Ahern 
13846f12fa77SMike Manning 	/* if packet is NDISC then keep the ingress interface */
13856f12fa77SMike Manning 	if (!is_ndisc) {
1386012d69fbSEyal Birger 		struct net_device *orig_dev = skb->dev;
1387012d69fbSEyal Birger 
1388926d93a3SDavid Ahern 		vrf_rx_stats(vrf_dev, skb->len);
138974b20582SDavid Ahern 		skb->dev = vrf_dev;
139074b20582SDavid Ahern 		skb->skb_iif = vrf_dev->ifindex;
139174b20582SDavid Ahern 
1392a9ec54d1SDavid Ahern 		if (!list_empty(&vrf_dev->ptype_all)) {
139304893908SAndrea Mayer 			int err;
139404893908SAndrea Mayer 
139504893908SAndrea Mayer 			err = vrf_add_mac_header_if_unset(skb, vrf_dev,
1396012d69fbSEyal Birger 							  ETH_P_IPV6,
1397012d69fbSEyal Birger 							  orig_dev);
139804893908SAndrea Mayer 			if (likely(!err)) {
139974b20582SDavid Ahern 				skb_push(skb, skb->mac_len);
140074b20582SDavid Ahern 				dev_queue_xmit_nit(skb, vrf_dev);
140174b20582SDavid Ahern 				skb_pull(skb, skb->mac_len);
1402a9ec54d1SDavid Ahern 			}
140304893908SAndrea Mayer 		}
140474b20582SDavid Ahern 
140574b20582SDavid Ahern 		IP6CB(skb)->flags |= IP6SKB_L3SLAVE;
140674b20582SDavid Ahern 	}
140774b20582SDavid Ahern 
14089ff74384SDavid Ahern 	if (need_strict)
14099ff74384SDavid Ahern 		vrf_ip6_input_dst(skb, vrf_dev, orig_iif);
14109ff74384SDavid Ahern 
141173e20b76SDavid Ahern 	skb = vrf_rcv_nfhook(NFPROTO_IPV6, NF_INET_PRE_ROUTING, skb, vrf_dev);
1412b4869aa2SDavid Ahern out:
141374b20582SDavid Ahern 	return skb;
141474b20582SDavid Ahern }
141574b20582SDavid Ahern 
141674b20582SDavid Ahern #else
vrf_ip6_rcv(struct net_device * vrf_dev,struct sk_buff * skb)141774b20582SDavid Ahern static struct sk_buff *vrf_ip6_rcv(struct net_device *vrf_dev,
141874b20582SDavid Ahern 				   struct sk_buff *skb)
141974b20582SDavid Ahern {
142074b20582SDavid Ahern 	return skb;
142174b20582SDavid Ahern }
142274b20582SDavid Ahern #endif
142374b20582SDavid Ahern 
vrf_ip_rcv(struct net_device * vrf_dev,struct sk_buff * skb)142474b20582SDavid Ahern static struct sk_buff *vrf_ip_rcv(struct net_device *vrf_dev,
142574b20582SDavid Ahern 				  struct sk_buff *skb)
142674b20582SDavid Ahern {
1427012d69fbSEyal Birger 	struct net_device *orig_dev = skb->dev;
1428012d69fbSEyal Birger 
142974b20582SDavid Ahern 	skb->dev = vrf_dev;
143074b20582SDavid Ahern 	skb->skb_iif = vrf_dev->ifindex;
1431a04a480dSDavid Ahern 	IPCB(skb)->flags |= IPSKB_L3SLAVE;
143274b20582SDavid Ahern 
1433e58e4159SDavid Ahern 	if (ipv4_is_multicast(ip_hdr(skb)->daddr))
1434e58e4159SDavid Ahern 		goto out;
1435e58e4159SDavid Ahern 
1436afe80a49SDavid Ahern 	/* loopback traffic; do not push through packet taps again.
1437afe80a49SDavid Ahern 	 * Reset pkt_type for upper layers to process skb
1438afe80a49SDavid Ahern 	 */
1439afe80a49SDavid Ahern 	if (skb->pkt_type == PACKET_LOOPBACK) {
1440afe80a49SDavid Ahern 		skb->pkt_type = PACKET_HOST;
1441afe80a49SDavid Ahern 		goto out;
1442afe80a49SDavid Ahern 	}
1443afe80a49SDavid Ahern 
1444926d93a3SDavid Ahern 	vrf_rx_stats(vrf_dev, skb->len);
1445926d93a3SDavid Ahern 
1446dcdd43c4SDavid Ahern 	if (!list_empty(&vrf_dev->ptype_all)) {
144704893908SAndrea Mayer 		int err;
144804893908SAndrea Mayer 
1449012d69fbSEyal Birger 		err = vrf_add_mac_header_if_unset(skb, vrf_dev, ETH_P_IP,
1450012d69fbSEyal Birger 						  orig_dev);
145104893908SAndrea Mayer 		if (likely(!err)) {
145274b20582SDavid Ahern 			skb_push(skb, skb->mac_len);
145374b20582SDavid Ahern 			dev_queue_xmit_nit(skb, vrf_dev);
145474b20582SDavid Ahern 			skb_pull(skb, skb->mac_len);
1455dcdd43c4SDavid Ahern 		}
145604893908SAndrea Mayer 	}
145774b20582SDavid Ahern 
145873e20b76SDavid Ahern 	skb = vrf_rcv_nfhook(NFPROTO_IPV4, NF_INET_PRE_ROUTING, skb, vrf_dev);
1459afe80a49SDavid Ahern out:
146074b20582SDavid Ahern 	return skb;
146174b20582SDavid Ahern }
146274b20582SDavid Ahern 
146374b20582SDavid Ahern /* called with rcu lock held */
vrf_l3_rcv(struct net_device * vrf_dev,struct sk_buff * skb,u16 proto)146474b20582SDavid Ahern static struct sk_buff *vrf_l3_rcv(struct net_device *vrf_dev,
146574b20582SDavid Ahern 				  struct sk_buff *skb,
146674b20582SDavid Ahern 				  u16 proto)
146774b20582SDavid Ahern {
146874b20582SDavid Ahern 	switch (proto) {
146974b20582SDavid Ahern 	case AF_INET:
147074b20582SDavid Ahern 		return vrf_ip_rcv(vrf_dev, skb);
147174b20582SDavid Ahern 	case AF_INET6:
147274b20582SDavid Ahern 		return vrf_ip6_rcv(vrf_dev, skb);
147374b20582SDavid Ahern 	}
147474b20582SDavid Ahern 
147574b20582SDavid Ahern 	return skb;
147674b20582SDavid Ahern }
147774b20582SDavid Ahern 
147874b20582SDavid Ahern #if IS_ENABLED(CONFIG_IPV6)
14794c1feac5SDavid Ahern /* send to link-local or multicast address via interface enslaved to
14804c1feac5SDavid Ahern  * VRF device. Force lookup to VRF table without changing flow struct
14817d9e5f42SWei Wang  * Note: Caller to this function must hold rcu_read_lock() and no refcnt
14827d9e5f42SWei Wang  * is taken on the dst by this function.
14834c1feac5SDavid Ahern  */
vrf_link_scope_lookup(const struct net_device * dev,struct flowi6 * fl6)14844c1feac5SDavid Ahern static struct dst_entry *vrf_link_scope_lookup(const struct net_device *dev,
1485cd2a9e62SDavid Ahern 					      struct flowi6 *fl6)
148635402e31SDavid Ahern {
14879ff74384SDavid Ahern 	struct net *net = dev_net(dev);
14887d9e5f42SWei Wang 	int flags = RT6_LOOKUP_F_IFACE | RT6_LOOKUP_F_DST_NOREF;
14899ff74384SDavid Ahern 	struct dst_entry *dst = NULL;
1490b0e95ccdSDavid Ahern 	struct rt6_info *rt;
149135402e31SDavid Ahern 
14929ff74384SDavid Ahern 	/* VRF device does not have a link-local address and
14939ff74384SDavid Ahern 	 * sending packets to link-local or mcast addresses over
14949ff74384SDavid Ahern 	 * a VRF device does not make sense
14959ff74384SDavid Ahern 	 */
14969ff74384SDavid Ahern 	if (fl6->flowi6_oif == dev->ifindex) {
14974c1feac5SDavid Ahern 		dst = &net->ipv6.ip6_null_entry->dst;
14989ff74384SDavid Ahern 		return dst;
14999ff74384SDavid Ahern 	}
15009ff74384SDavid Ahern 
15019ff74384SDavid Ahern 	if (!ipv6_addr_any(&fl6->saddr))
15029ff74384SDavid Ahern 		flags |= RT6_LOOKUP_F_HAS_SADDR;
15039ff74384SDavid Ahern 
1504b75cc8f9SDavid Ahern 	rt = vrf_ip6_route_lookup(net, dev, fl6, fl6->flowi6_oif, NULL, flags);
15059ff74384SDavid Ahern 	if (rt)
15069ff74384SDavid Ahern 		dst = &rt->dst;
15079ff74384SDavid Ahern 
1508b0e95ccdSDavid Ahern 	return dst;
150935402e31SDavid Ahern }
151035402e31SDavid Ahern #endif
151135402e31SDavid Ahern 
1512ee15ee5dSDavid Ahern static const struct l3mdev_ops vrf_l3mdev_ops = {
1513ee15ee5dSDavid Ahern 	.l3mdev_fib_table	= vrf_fib_table,
151474b20582SDavid Ahern 	.l3mdev_l3_rcv		= vrf_l3_rcv,
1515ebfc102cSDavid Ahern 	.l3mdev_l3_out		= vrf_l3_out,
151635402e31SDavid Ahern #if IS_ENABLED(CONFIG_IPV6)
15174c1feac5SDavid Ahern 	.l3mdev_link_scope_lookup = vrf_link_scope_lookup,
151835402e31SDavid Ahern #endif
1519ee15ee5dSDavid Ahern };
1520ee15ee5dSDavid Ahern 
vrf_get_drvinfo(struct net_device * dev,struct ethtool_drvinfo * info)1521193125dbSDavid Ahern static void vrf_get_drvinfo(struct net_device *dev,
1522193125dbSDavid Ahern 			    struct ethtool_drvinfo *info)
1523193125dbSDavid Ahern {
1524fb3ceec1SWolfram Sang 	strscpy(info->driver, DRV_NAME, sizeof(info->driver));
1525fb3ceec1SWolfram Sang 	strscpy(info->version, DRV_VERSION, sizeof(info->version));
1526193125dbSDavid Ahern }
1527193125dbSDavid Ahern 
1528193125dbSDavid Ahern static const struct ethtool_ops vrf_ethtool_ops = {
1529193125dbSDavid Ahern 	.get_drvinfo	= vrf_get_drvinfo,
1530193125dbSDavid Ahern };
1531193125dbSDavid Ahern 
vrf_fib_rule_nl_size(void)15321aa6c4f6SDavid Ahern static inline size_t vrf_fib_rule_nl_size(void)
15331aa6c4f6SDavid Ahern {
15341aa6c4f6SDavid Ahern 	size_t sz;
15351aa6c4f6SDavid Ahern 
15361aa6c4f6SDavid Ahern 	sz  = NLMSG_ALIGN(sizeof(struct fib_rule_hdr));
15371aa6c4f6SDavid Ahern 	sz += nla_total_size(sizeof(u8));	/* FRA_L3MDEV */
15381aa6c4f6SDavid Ahern 	sz += nla_total_size(sizeof(u32));	/* FRA_PRIORITY */
15391b71af60SDonald Sharp 	sz += nla_total_size(sizeof(u8));       /* FRA_PROTOCOL */
15401aa6c4f6SDavid Ahern 
15411aa6c4f6SDavid Ahern 	return sz;
15421aa6c4f6SDavid Ahern }
15431aa6c4f6SDavid Ahern 
vrf_fib_rule(const struct net_device * dev,__u8 family,bool add_it)15441aa6c4f6SDavid Ahern static int vrf_fib_rule(const struct net_device *dev, __u8 family, bool add_it)
15451aa6c4f6SDavid Ahern {
15461aa6c4f6SDavid Ahern 	struct fib_rule_hdr *frh;
15471aa6c4f6SDavid Ahern 	struct nlmsghdr *nlh;
15481aa6c4f6SDavid Ahern 	struct sk_buff *skb;
15491aa6c4f6SDavid Ahern 	int err;
15501aa6c4f6SDavid Ahern 
1551dac91170SDavid Ahern 	if ((family == AF_INET6 || family == RTNL_FAMILY_IP6MR) &&
1552dac91170SDavid Ahern 	    !ipv6_mod_enabled())
1553e4348637SDavid Ahern 		return 0;
1554e4348637SDavid Ahern 
15551aa6c4f6SDavid Ahern 	skb = nlmsg_new(vrf_fib_rule_nl_size(), GFP_KERNEL);
15561aa6c4f6SDavid Ahern 	if (!skb)
15571aa6c4f6SDavid Ahern 		return -ENOMEM;
15581aa6c4f6SDavid Ahern 
15591aa6c4f6SDavid Ahern 	nlh = nlmsg_put(skb, 0, 0, 0, sizeof(*frh), 0);
15601aa6c4f6SDavid Ahern 	if (!nlh)
15611aa6c4f6SDavid Ahern 		goto nla_put_failure;
15621aa6c4f6SDavid Ahern 
15631aa6c4f6SDavid Ahern 	/* rule only needs to appear once */
1564426c87caSDavid Ahern 	nlh->nlmsg_flags |= NLM_F_EXCL;
15651aa6c4f6SDavid Ahern 
15661aa6c4f6SDavid Ahern 	frh = nlmsg_data(nlh);
15671aa6c4f6SDavid Ahern 	memset(frh, 0, sizeof(*frh));
15681aa6c4f6SDavid Ahern 	frh->family = family;
15691aa6c4f6SDavid Ahern 	frh->action = FR_ACT_TO_TBL;
15701b71af60SDonald Sharp 
15711b71af60SDonald Sharp 	if (nla_put_u8(skb, FRA_PROTOCOL, RTPROT_KERNEL))
15721b71af60SDonald Sharp 		goto nla_put_failure;
15731aa6c4f6SDavid Ahern 
157418129a24SJeff Barnhill 	if (nla_put_u8(skb, FRA_L3MDEV, 1))
15751aa6c4f6SDavid Ahern 		goto nla_put_failure;
15761aa6c4f6SDavid Ahern 
15771aa6c4f6SDavid Ahern 	if (nla_put_u32(skb, FRA_PRIORITY, FIB_RULE_PREF))
15781aa6c4f6SDavid Ahern 		goto nla_put_failure;
15791aa6c4f6SDavid Ahern 
15801aa6c4f6SDavid Ahern 	nlmsg_end(skb, nlh);
15811aa6c4f6SDavid Ahern 
15821aa6c4f6SDavid Ahern 	/* fib_nl_{new,del}rule handling looks for net from skb->sk */
15831aa6c4f6SDavid Ahern 	skb->sk = dev_net(dev)->rtnl;
15841aa6c4f6SDavid Ahern 	if (add_it) {
1585c21ef3e3SDavid Ahern 		err = fib_nl_newrule(skb, nlh, NULL);
15861aa6c4f6SDavid Ahern 		if (err == -EEXIST)
15871aa6c4f6SDavid Ahern 			err = 0;
15881aa6c4f6SDavid Ahern 	} else {
1589c21ef3e3SDavid Ahern 		err = fib_nl_delrule(skb, nlh, NULL);
15901aa6c4f6SDavid Ahern 		if (err == -ENOENT)
15911aa6c4f6SDavid Ahern 			err = 0;
15921aa6c4f6SDavid Ahern 	}
15931aa6c4f6SDavid Ahern 	nlmsg_free(skb);
15941aa6c4f6SDavid Ahern 
15951aa6c4f6SDavid Ahern 	return err;
15961aa6c4f6SDavid Ahern 
15971aa6c4f6SDavid Ahern nla_put_failure:
15981aa6c4f6SDavid Ahern 	nlmsg_free(skb);
15991aa6c4f6SDavid Ahern 
16001aa6c4f6SDavid Ahern 	return -EMSGSIZE;
16011aa6c4f6SDavid Ahern }
16021aa6c4f6SDavid Ahern 
vrf_add_fib_rules(const struct net_device * dev)16031aa6c4f6SDavid Ahern static int vrf_add_fib_rules(const struct net_device *dev)
16041aa6c4f6SDavid Ahern {
16051aa6c4f6SDavid Ahern 	int err;
16061aa6c4f6SDavid Ahern 
16071aa6c4f6SDavid Ahern 	err = vrf_fib_rule(dev, AF_INET,  true);
16081aa6c4f6SDavid Ahern 	if (err < 0)
16091aa6c4f6SDavid Ahern 		goto out_err;
16101aa6c4f6SDavid Ahern 
16111aa6c4f6SDavid Ahern 	err = vrf_fib_rule(dev, AF_INET6, true);
16121aa6c4f6SDavid Ahern 	if (err < 0)
16131aa6c4f6SDavid Ahern 		goto ipv6_err;
16141aa6c4f6SDavid Ahern 
1615e58e4159SDavid Ahern #if IS_ENABLED(CONFIG_IP_MROUTE_MULTIPLE_TABLES)
1616e58e4159SDavid Ahern 	err = vrf_fib_rule(dev, RTNL_FAMILY_IPMR, true);
1617e58e4159SDavid Ahern 	if (err < 0)
1618e58e4159SDavid Ahern 		goto ipmr_err;
1619e58e4159SDavid Ahern #endif
1620e58e4159SDavid Ahern 
1621e4a38c0cSPatrick Ruddy #if IS_ENABLED(CONFIG_IPV6_MROUTE_MULTIPLE_TABLES)
1622e4a38c0cSPatrick Ruddy 	err = vrf_fib_rule(dev, RTNL_FAMILY_IP6MR, true);
1623e4a38c0cSPatrick Ruddy 	if (err < 0)
1624e4a38c0cSPatrick Ruddy 		goto ip6mr_err;
1625e4a38c0cSPatrick Ruddy #endif
1626e4a38c0cSPatrick Ruddy 
16271aa6c4f6SDavid Ahern 	return 0;
16281aa6c4f6SDavid Ahern 
1629e4a38c0cSPatrick Ruddy #if IS_ENABLED(CONFIG_IPV6_MROUTE_MULTIPLE_TABLES)
1630e4a38c0cSPatrick Ruddy ip6mr_err:
1631e4a38c0cSPatrick Ruddy 	vrf_fib_rule(dev, RTNL_FAMILY_IPMR,  false);
1632e4a38c0cSPatrick Ruddy #endif
1633e4a38c0cSPatrick Ruddy 
1634e58e4159SDavid Ahern #if IS_ENABLED(CONFIG_IP_MROUTE_MULTIPLE_TABLES)
1635e58e4159SDavid Ahern ipmr_err:
1636e58e4159SDavid Ahern 	vrf_fib_rule(dev, AF_INET6,  false);
1637e58e4159SDavid Ahern #endif
1638e58e4159SDavid Ahern 
16391aa6c4f6SDavid Ahern ipv6_err:
16401aa6c4f6SDavid Ahern 	vrf_fib_rule(dev, AF_INET,  false);
16411aa6c4f6SDavid Ahern 
16421aa6c4f6SDavid Ahern out_err:
16431aa6c4f6SDavid Ahern 	netdev_err(dev, "Failed to add FIB rules.\n");
16441aa6c4f6SDavid Ahern 	return err;
16451aa6c4f6SDavid Ahern }
16461aa6c4f6SDavid Ahern 
vrf_setup(struct net_device * dev)1647193125dbSDavid Ahern static void vrf_setup(struct net_device *dev)
1648193125dbSDavid Ahern {
1649193125dbSDavid Ahern 	ether_setup(dev);
1650193125dbSDavid Ahern 
1651193125dbSDavid Ahern 	/* Initialize the device structure. */
1652193125dbSDavid Ahern 	dev->netdev_ops = &vrf_netdev_ops;
1653ee15ee5dSDavid Ahern 	dev->l3mdev_ops = &vrf_l3mdev_ops;
1654193125dbSDavid Ahern 	dev->ethtool_ops = &vrf_ethtool_ops;
1655cf124db5SDavid S. Miller 	dev->needs_free_netdev = true;
1656193125dbSDavid Ahern 
1657193125dbSDavid Ahern 	/* Fill in device structure with ethernet-generic values. */
1658193125dbSDavid Ahern 	eth_hw_addr_random(dev);
1659193125dbSDavid Ahern 
1660193125dbSDavid Ahern 	/* don't acquire vrf device's netif_tx_lock when transmitting */
1661193125dbSDavid Ahern 	dev->features |= NETIF_F_LLTX;
1662193125dbSDavid Ahern 
1663193125dbSDavid Ahern 	/* don't allow vrf devices to change network namespaces. */
1664193125dbSDavid Ahern 	dev->features |= NETIF_F_NETNS_LOCAL;
16657889681fSDavid Ahern 
16667889681fSDavid Ahern 	/* does not make sense for a VLAN to be added to a vrf device */
16677889681fSDavid Ahern 	dev->features   |= NETIF_F_VLAN_CHALLENGED;
16687889681fSDavid Ahern 
16697889681fSDavid Ahern 	/* enable offload features */
16707889681fSDavid Ahern 	dev->features   |= NETIF_F_GSO_SOFTWARE;
1671cb160394SDavide Caratti 	dev->features   |= NETIF_F_RXCSUM | NETIF_F_HW_CSUM | NETIF_F_SCTP_CRC;
16727889681fSDavid Ahern 	dev->features   |= NETIF_F_SG | NETIF_F_FRAGLIST | NETIF_F_HIGHDMA;
16737889681fSDavid Ahern 
16747889681fSDavid Ahern 	dev->hw_features = dev->features;
16757889681fSDavid Ahern 	dev->hw_enc_features = dev->features;
16767889681fSDavid Ahern 
16777889681fSDavid Ahern 	/* default to no qdisc; user can add if desired */
16787889681fSDavid Ahern 	dev->priv_flags |= IFF_NO_QUEUE;
16791017e098SSabrina Dubroca 	dev->priv_flags |= IFF_NO_RX_HANDLER;
16806819e3f6SMiaohe Lin 	dev->priv_flags |= IFF_LIVE_ADDR_CHANGE;
1681ad49bc63SHangbin Liu 
16825055376aSMiaohe Lin 	/* VRF devices do not care about MTU, but if the MTU is set
16835055376aSMiaohe Lin 	 * too low then the ipv4 and ipv6 protocols are disabled
16845055376aSMiaohe Lin 	 * which breaks networking.
16855055376aSMiaohe Lin 	 */
16865055376aSMiaohe Lin 	dev->min_mtu = IPV6_MIN_MTU;
16879bb392f6SNicolas Dichtel 	dev->max_mtu = IP6_MAX_MTU;
16889bb392f6SNicolas Dichtel 	dev->mtu = dev->max_mtu;
16896ae7b3fcSDaniel Borkmann 
16906ae7b3fcSDaniel Borkmann 	dev->pcpu_stat_type = NETDEV_PCPU_STAT_DSTATS;
1691193125dbSDavid Ahern }
1692193125dbSDavid Ahern 
vrf_validate(struct nlattr * tb[],struct nlattr * data[],struct netlink_ext_ack * extack)1693a8b8a889SMatthias Schiffer static int vrf_validate(struct nlattr *tb[], struct nlattr *data[],
1694a8b8a889SMatthias Schiffer 			struct netlink_ext_ack *extack)
1695193125dbSDavid Ahern {
1696193125dbSDavid Ahern 	if (tb[IFLA_ADDRESS]) {
169753b94835SDavid Ahern 		if (nla_len(tb[IFLA_ADDRESS]) != ETH_ALEN) {
169853b94835SDavid Ahern 			NL_SET_ERR_MSG(extack, "Invalid hardware address");
1699193125dbSDavid Ahern 			return -EINVAL;
170053b94835SDavid Ahern 		}
170153b94835SDavid Ahern 		if (!is_valid_ether_addr(nla_data(tb[IFLA_ADDRESS]))) {
170253b94835SDavid Ahern 			NL_SET_ERR_MSG(extack, "Invalid hardware address");
1703193125dbSDavid Ahern 			return -EADDRNOTAVAIL;
1704193125dbSDavid Ahern 		}
170553b94835SDavid Ahern 	}
1706193125dbSDavid Ahern 	return 0;
1707193125dbSDavid Ahern }
1708193125dbSDavid Ahern 
vrf_dellink(struct net_device * dev,struct list_head * head)1709193125dbSDavid Ahern static void vrf_dellink(struct net_device *dev, struct list_head *head)
1710193125dbSDavid Ahern {
1711f630c38eSNikolay Aleksandrov 	struct net_device *port_dev;
1712f630c38eSNikolay Aleksandrov 	struct list_head *iter;
1713f630c38eSNikolay Aleksandrov 
1714f630c38eSNikolay Aleksandrov 	netdev_for_each_lower_dev(dev, port_dev, iter)
1715f630c38eSNikolay Aleksandrov 		vrf_del_slave(dev, port_dev);
1716f630c38eSNikolay Aleksandrov 
1717c8baec38SAndrea Mayer 	vrf_map_unregister_dev(dev);
1718c8baec38SAndrea Mayer 
1719193125dbSDavid Ahern 	unregister_netdevice_queue(dev, head);
1720193125dbSDavid Ahern }
1721193125dbSDavid Ahern 
vrf_newlink(struct net * src_net,struct net_device * dev,struct nlattr * tb[],struct nlattr * data[],struct netlink_ext_ack * extack)1722193125dbSDavid Ahern static int vrf_newlink(struct net *src_net, struct net_device *dev,
17237a3f4a18SMatthias Schiffer 		       struct nlattr *tb[], struct nlattr *data[],
17247a3f4a18SMatthias Schiffer 		       struct netlink_ext_ack *extack)
1725193125dbSDavid Ahern {
1726193125dbSDavid Ahern 	struct net_vrf *vrf = netdev_priv(dev);
1727c8baec38SAndrea Mayer 	struct netns_vrf *nn_vrf;
1728097d3c95SDavid Ahern 	bool *add_fib_rules;
1729097d3c95SDavid Ahern 	struct net *net;
17301aa6c4f6SDavid Ahern 	int err;
1731193125dbSDavid Ahern 
173253b94835SDavid Ahern 	if (!data || !data[IFLA_VRF_TABLE]) {
173353b94835SDavid Ahern 		NL_SET_ERR_MSG(extack, "VRF table id is missing");
1734193125dbSDavid Ahern 		return -EINVAL;
173553b94835SDavid Ahern 	}
1736193125dbSDavid Ahern 
1737193125dbSDavid Ahern 	vrf->tb_id = nla_get_u32(data[IFLA_VRF_TABLE]);
173853b94835SDavid Ahern 	if (vrf->tb_id == RT_TABLE_UNSPEC) {
173953b94835SDavid Ahern 		NL_SET_ERR_MSG_ATTR(extack, data[IFLA_VRF_TABLE],
174053b94835SDavid Ahern 				    "Invalid VRF table id");
174124c63bbcSDavid Ahern 		return -EINVAL;
174253b94835SDavid Ahern 	}
1743193125dbSDavid Ahern 
1744007979eaSDavid Ahern 	dev->priv_flags |= IFF_L3MDEV_MASTER;
1745193125dbSDavid Ahern 
17461aa6c4f6SDavid Ahern 	err = register_netdevice(dev);
17471aa6c4f6SDavid Ahern 	if (err)
17481aa6c4f6SDavid Ahern 		goto out;
17491aa6c4f6SDavid Ahern 
1750c8baec38SAndrea Mayer 	/* mapping between table_id and vrf;
1751c8baec38SAndrea Mayer 	 * note: such binding could not be done in the dev init function
1752c8baec38SAndrea Mayer 	 * because dev->ifindex id is not available yet.
1753c8baec38SAndrea Mayer 	 */
1754c8baec38SAndrea Mayer 	vrf->ifindex = dev->ifindex;
1755c8baec38SAndrea Mayer 
1756c8baec38SAndrea Mayer 	err = vrf_map_register_dev(dev, extack);
1757c8baec38SAndrea Mayer 	if (err) {
1758c8baec38SAndrea Mayer 		unregister_netdevice(dev);
1759c8baec38SAndrea Mayer 		goto out;
1760c8baec38SAndrea Mayer 	}
1761c8baec38SAndrea Mayer 
1762097d3c95SDavid Ahern 	net = dev_net(dev);
1763c8baec38SAndrea Mayer 	nn_vrf = net_generic(net, vrf_net_id);
1764c8baec38SAndrea Mayer 
1765c8baec38SAndrea Mayer 	add_fib_rules = &nn_vrf->add_fib_rules;
1766097d3c95SDavid Ahern 	if (*add_fib_rules) {
17671aa6c4f6SDavid Ahern 		err = vrf_add_fib_rules(dev);
17681aa6c4f6SDavid Ahern 		if (err) {
1769c8baec38SAndrea Mayer 			vrf_map_unregister_dev(dev);
17701aa6c4f6SDavid Ahern 			unregister_netdevice(dev);
17711aa6c4f6SDavid Ahern 			goto out;
17721aa6c4f6SDavid Ahern 		}
1773097d3c95SDavid Ahern 		*add_fib_rules = false;
17741aa6c4f6SDavid Ahern 	}
17751aa6c4f6SDavid Ahern 
17761aa6c4f6SDavid Ahern out:
17771aa6c4f6SDavid Ahern 	return err;
1778193125dbSDavid Ahern }
1779193125dbSDavid Ahern 
vrf_nl_getsize(const struct net_device * dev)1780193125dbSDavid Ahern static size_t vrf_nl_getsize(const struct net_device *dev)
1781193125dbSDavid Ahern {
1782193125dbSDavid Ahern 	return nla_total_size(sizeof(u32));  /* IFLA_VRF_TABLE */
1783193125dbSDavid Ahern }
1784193125dbSDavid Ahern 
vrf_fillinfo(struct sk_buff * skb,const struct net_device * dev)1785193125dbSDavid Ahern static int vrf_fillinfo(struct sk_buff *skb,
1786193125dbSDavid Ahern 			const struct net_device *dev)
1787193125dbSDavid Ahern {
1788193125dbSDavid Ahern 	struct net_vrf *vrf = netdev_priv(dev);
1789193125dbSDavid Ahern 
1790193125dbSDavid Ahern 	return nla_put_u32(skb, IFLA_VRF_TABLE, vrf->tb_id);
1791193125dbSDavid Ahern }
1792193125dbSDavid Ahern 
vrf_get_slave_size(const struct net_device * bond_dev,const struct net_device * slave_dev)179367eb0331SDavid Ahern static size_t vrf_get_slave_size(const struct net_device *bond_dev,
179467eb0331SDavid Ahern 				 const struct net_device *slave_dev)
179567eb0331SDavid Ahern {
179667eb0331SDavid Ahern 	return nla_total_size(sizeof(u32));  /* IFLA_VRF_PORT_TABLE */
179767eb0331SDavid Ahern }
179867eb0331SDavid Ahern 
vrf_fill_slave_info(struct sk_buff * skb,const struct net_device * vrf_dev,const struct net_device * slave_dev)179967eb0331SDavid Ahern static int vrf_fill_slave_info(struct sk_buff *skb,
180067eb0331SDavid Ahern 			       const struct net_device *vrf_dev,
180167eb0331SDavid Ahern 			       const struct net_device *slave_dev)
180267eb0331SDavid Ahern {
180367eb0331SDavid Ahern 	struct net_vrf *vrf = netdev_priv(vrf_dev);
180467eb0331SDavid Ahern 
180567eb0331SDavid Ahern 	if (nla_put_u32(skb, IFLA_VRF_PORT_TABLE, vrf->tb_id))
180667eb0331SDavid Ahern 		return -EMSGSIZE;
180767eb0331SDavid Ahern 
180867eb0331SDavid Ahern 	return 0;
180967eb0331SDavid Ahern }
181067eb0331SDavid Ahern 
1811193125dbSDavid Ahern static const struct nla_policy vrf_nl_policy[IFLA_VRF_MAX + 1] = {
1812193125dbSDavid Ahern 	[IFLA_VRF_TABLE] = { .type = NLA_U32 },
1813193125dbSDavid Ahern };
1814193125dbSDavid Ahern 
1815193125dbSDavid Ahern static struct rtnl_link_ops vrf_link_ops __read_mostly = {
1816193125dbSDavid Ahern 	.kind		= DRV_NAME,
1817193125dbSDavid Ahern 	.priv_size	= sizeof(struct net_vrf),
1818193125dbSDavid Ahern 
1819193125dbSDavid Ahern 	.get_size	= vrf_nl_getsize,
1820193125dbSDavid Ahern 	.policy		= vrf_nl_policy,
1821193125dbSDavid Ahern 	.validate	= vrf_validate,
1822193125dbSDavid Ahern 	.fill_info	= vrf_fillinfo,
1823193125dbSDavid Ahern 
182467eb0331SDavid Ahern 	.get_slave_size  = vrf_get_slave_size,
182567eb0331SDavid Ahern 	.fill_slave_info = vrf_fill_slave_info,
182667eb0331SDavid Ahern 
1827193125dbSDavid Ahern 	.newlink	= vrf_newlink,
1828193125dbSDavid Ahern 	.dellink	= vrf_dellink,
1829193125dbSDavid Ahern 	.setup		= vrf_setup,
1830193125dbSDavid Ahern 	.maxtype	= IFLA_VRF_MAX,
1831193125dbSDavid Ahern };
1832193125dbSDavid Ahern 
vrf_device_event(struct notifier_block * unused,unsigned long event,void * ptr)1833193125dbSDavid Ahern static int vrf_device_event(struct notifier_block *unused,
1834193125dbSDavid Ahern 			    unsigned long event, void *ptr)
1835193125dbSDavid Ahern {
1836193125dbSDavid Ahern 	struct net_device *dev = netdev_notifier_info_to_dev(ptr);
1837193125dbSDavid Ahern 
1838193125dbSDavid Ahern 	/* only care about unregister events to drop slave references */
1839193125dbSDavid Ahern 	if (event == NETDEV_UNREGISTER) {
1840193125dbSDavid Ahern 		struct net_device *vrf_dev;
1841193125dbSDavid Ahern 
1842fee6d4c7SDavid Ahern 		if (!netif_is_l3_slave(dev))
1843193125dbSDavid Ahern 			goto out;
1844193125dbSDavid Ahern 
184558aa9087SNikolay Aleksandrov 		vrf_dev = netdev_master_upper_dev_get(dev);
1846193125dbSDavid Ahern 		vrf_del_slave(vrf_dev, dev);
1847193125dbSDavid Ahern 	}
1848193125dbSDavid Ahern out:
1849193125dbSDavid Ahern 	return NOTIFY_DONE;
1850193125dbSDavid Ahern }
1851193125dbSDavid Ahern 
1852193125dbSDavid Ahern static struct notifier_block vrf_notifier_block __read_mostly = {
1853193125dbSDavid Ahern 	.notifier_call = vrf_device_event,
1854193125dbSDavid Ahern };
1855193125dbSDavid Ahern 
vrf_map_init(struct vrf_map * vmap)1856c8baec38SAndrea Mayer static int vrf_map_init(struct vrf_map *vmap)
1857c8baec38SAndrea Mayer {
1858c8baec38SAndrea Mayer 	spin_lock_init(&vmap->vmap_lock);
1859c8baec38SAndrea Mayer 	hash_init(vmap->ht);
1860c8baec38SAndrea Mayer 
1861c8baec38SAndrea Mayer 	vmap->strict_mode = false;
1862c8baec38SAndrea Mayer 
1863c8baec38SAndrea Mayer 	return 0;
1864c8baec38SAndrea Mayer }
1865c8baec38SAndrea Mayer 
18661b6687e3SDavid Ahern #ifdef CONFIG_SYSCTL
vrf_strict_mode(struct vrf_map * vmap)18671b6687e3SDavid Ahern static bool vrf_strict_mode(struct vrf_map *vmap)
18681b6687e3SDavid Ahern {
18691b6687e3SDavid Ahern 	bool strict_mode;
18701b6687e3SDavid Ahern 
18711b6687e3SDavid Ahern 	vrf_map_lock(vmap);
18721b6687e3SDavid Ahern 	strict_mode = vmap->strict_mode;
18731b6687e3SDavid Ahern 	vrf_map_unlock(vmap);
18741b6687e3SDavid Ahern 
18751b6687e3SDavid Ahern 	return strict_mode;
18761b6687e3SDavid Ahern }
18771b6687e3SDavid Ahern 
vrf_strict_mode_change(struct vrf_map * vmap,bool new_mode)18781b6687e3SDavid Ahern static int vrf_strict_mode_change(struct vrf_map *vmap, bool new_mode)
18791b6687e3SDavid Ahern {
18801b6687e3SDavid Ahern 	bool *cur_mode;
18811b6687e3SDavid Ahern 	int res = 0;
18821b6687e3SDavid Ahern 
18831b6687e3SDavid Ahern 	vrf_map_lock(vmap);
18841b6687e3SDavid Ahern 
18851b6687e3SDavid Ahern 	cur_mode = &vmap->strict_mode;
18861b6687e3SDavid Ahern 	if (*cur_mode == new_mode)
18871b6687e3SDavid Ahern 		goto unlock;
18881b6687e3SDavid Ahern 
18891b6687e3SDavid Ahern 	if (*cur_mode) {
18901b6687e3SDavid Ahern 		/* disable strict mode */
18911b6687e3SDavid Ahern 		*cur_mode = false;
18921b6687e3SDavid Ahern 	} else {
18931b6687e3SDavid Ahern 		if (vmap->shared_tables) {
18941b6687e3SDavid Ahern 			/* we cannot allow strict_mode because there are some
18951b6687e3SDavid Ahern 			 * vrfs that share one or more tables.
18961b6687e3SDavid Ahern 			 */
18971b6687e3SDavid Ahern 			res = -EBUSY;
18981b6687e3SDavid Ahern 			goto unlock;
18991b6687e3SDavid Ahern 		}
19001b6687e3SDavid Ahern 
19011b6687e3SDavid Ahern 		/* no tables are shared among vrfs, so we can go back
19021b6687e3SDavid Ahern 		 * to 1:1 association between a vrf with its table.
19031b6687e3SDavid Ahern 		 */
19041b6687e3SDavid Ahern 		*cur_mode = true;
19051b6687e3SDavid Ahern 	}
19061b6687e3SDavid Ahern 
19071b6687e3SDavid Ahern unlock:
19081b6687e3SDavid Ahern 	vrf_map_unlock(vmap);
19091b6687e3SDavid Ahern 
19101b6687e3SDavid Ahern 	return res;
19111b6687e3SDavid Ahern }
19121b6687e3SDavid Ahern 
vrf_shared_table_handler(struct ctl_table * table,int write,void * buffer,size_t * lenp,loff_t * ppos)191333306f1aSAndrea Mayer static int vrf_shared_table_handler(struct ctl_table *table, int write,
191433306f1aSAndrea Mayer 				    void *buffer, size_t *lenp, loff_t *ppos)
191533306f1aSAndrea Mayer {
191633306f1aSAndrea Mayer 	struct net *net = (struct net *)table->extra1;
191733306f1aSAndrea Mayer 	struct vrf_map *vmap = netns_vrf_map(net);
191833306f1aSAndrea Mayer 	int proc_strict_mode = 0;
191933306f1aSAndrea Mayer 	struct ctl_table tmp = {
192033306f1aSAndrea Mayer 		.procname	= table->procname,
192133306f1aSAndrea Mayer 		.data		= &proc_strict_mode,
192233306f1aSAndrea Mayer 		.maxlen		= sizeof(int),
192333306f1aSAndrea Mayer 		.mode		= table->mode,
192433306f1aSAndrea Mayer 		.extra1		= SYSCTL_ZERO,
192533306f1aSAndrea Mayer 		.extra2		= SYSCTL_ONE,
192633306f1aSAndrea Mayer 	};
192733306f1aSAndrea Mayer 	int ret;
192833306f1aSAndrea Mayer 
192933306f1aSAndrea Mayer 	if (!write)
193033306f1aSAndrea Mayer 		proc_strict_mode = vrf_strict_mode(vmap);
193133306f1aSAndrea Mayer 
193233306f1aSAndrea Mayer 	ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos);
193333306f1aSAndrea Mayer 
193433306f1aSAndrea Mayer 	if (write && ret == 0)
193533306f1aSAndrea Mayer 		ret = vrf_strict_mode_change(vmap, (bool)proc_strict_mode);
193633306f1aSAndrea Mayer 
193733306f1aSAndrea Mayer 	return ret;
193833306f1aSAndrea Mayer }
193933306f1aSAndrea Mayer 
194033306f1aSAndrea Mayer static const struct ctl_table vrf_table[] = {
194133306f1aSAndrea Mayer 	{
194233306f1aSAndrea Mayer 		.procname	= "strict_mode",
194333306f1aSAndrea Mayer 		.data		= NULL,
194433306f1aSAndrea Mayer 		.maxlen		= sizeof(int),
194533306f1aSAndrea Mayer 		.mode		= 0644,
194633306f1aSAndrea Mayer 		.proc_handler	= vrf_shared_table_handler,
194733306f1aSAndrea Mayer 		/* set by the vrf_netns_init */
194833306f1aSAndrea Mayer 		.extra1		= NULL,
194933306f1aSAndrea Mayer 	},
195033306f1aSAndrea Mayer 	{ },
195133306f1aSAndrea Mayer };
195233306f1aSAndrea Mayer 
vrf_netns_init_sysctl(struct net * net,struct netns_vrf * nn_vrf)19531b6687e3SDavid Ahern static int vrf_netns_init_sysctl(struct net *net, struct netns_vrf *nn_vrf)
1954097d3c95SDavid Ahern {
195533306f1aSAndrea Mayer 	struct ctl_table *table;
1956097d3c95SDavid Ahern 
195733306f1aSAndrea Mayer 	table = kmemdup(vrf_table, sizeof(vrf_table), GFP_KERNEL);
195833306f1aSAndrea Mayer 	if (!table)
195933306f1aSAndrea Mayer 		return -ENOMEM;
196033306f1aSAndrea Mayer 
196133306f1aSAndrea Mayer 	/* init the extra1 parameter with the reference to current netns */
196233306f1aSAndrea Mayer 	table[0].extra1 = net;
196333306f1aSAndrea Mayer 
19643ca9aa74SJoel Granados 	nn_vrf->ctl_hdr = register_net_sysctl_sz(net, "net/vrf", table,
19653ca9aa74SJoel Granados 						 ARRAY_SIZE(vrf_table));
196633306f1aSAndrea Mayer 	if (!nn_vrf->ctl_hdr) {
19671b6687e3SDavid Ahern 		kfree(table);
19681b6687e3SDavid Ahern 		return -ENOMEM;
196933306f1aSAndrea Mayer 	}
197033306f1aSAndrea Mayer 
1971097d3c95SDavid Ahern 	return 0;
197233306f1aSAndrea Mayer }
197333306f1aSAndrea Mayer 
vrf_netns_exit_sysctl(struct net * net)19741b6687e3SDavid Ahern static void vrf_netns_exit_sysctl(struct net *net)
197533306f1aSAndrea Mayer {
197633306f1aSAndrea Mayer 	struct netns_vrf *nn_vrf = net_generic(net, vrf_net_id);
197733306f1aSAndrea Mayer 	struct ctl_table *table;
197833306f1aSAndrea Mayer 
197933306f1aSAndrea Mayer 	table = nn_vrf->ctl_hdr->ctl_table_arg;
198033306f1aSAndrea Mayer 	unregister_net_sysctl_table(nn_vrf->ctl_hdr);
198133306f1aSAndrea Mayer 	kfree(table);
1982097d3c95SDavid Ahern }
19831b6687e3SDavid Ahern #else
vrf_netns_init_sysctl(struct net * net,struct netns_vrf * nn_vrf)19841b6687e3SDavid Ahern static int vrf_netns_init_sysctl(struct net *net, struct netns_vrf *nn_vrf)
19851b6687e3SDavid Ahern {
19861b6687e3SDavid Ahern 	return 0;
19871b6687e3SDavid Ahern }
19881b6687e3SDavid Ahern 
vrf_netns_exit_sysctl(struct net * net)19891b6687e3SDavid Ahern static void vrf_netns_exit_sysctl(struct net *net)
19901b6687e3SDavid Ahern {
19911b6687e3SDavid Ahern }
19921b6687e3SDavid Ahern #endif
19931b6687e3SDavid Ahern 
19941b6687e3SDavid Ahern /* Initialize per network namespace state */
vrf_netns_init(struct net * net)19951b6687e3SDavid Ahern static int __net_init vrf_netns_init(struct net *net)
19961b6687e3SDavid Ahern {
19971b6687e3SDavid Ahern 	struct netns_vrf *nn_vrf = net_generic(net, vrf_net_id);
19981b6687e3SDavid Ahern 
19991b6687e3SDavid Ahern 	nn_vrf->add_fib_rules = true;
20001b6687e3SDavid Ahern 	vrf_map_init(&nn_vrf->vmap);
20011b6687e3SDavid Ahern 
20021b6687e3SDavid Ahern 	return vrf_netns_init_sysctl(net, nn_vrf);
20031b6687e3SDavid Ahern }
20041b6687e3SDavid Ahern 
vrf_netns_exit(struct net * net)20051b6687e3SDavid Ahern static void __net_exit vrf_netns_exit(struct net *net)
20061b6687e3SDavid Ahern {
20071b6687e3SDavid Ahern 	vrf_netns_exit_sysctl(net);
20081b6687e3SDavid Ahern }
2009097d3c95SDavid Ahern 
2010097d3c95SDavid Ahern static struct pernet_operations vrf_net_ops __net_initdata = {
2011097d3c95SDavid Ahern 	.init = vrf_netns_init,
201233306f1aSAndrea Mayer 	.exit = vrf_netns_exit,
2013097d3c95SDavid Ahern 	.id   = &vrf_net_id,
2014c8baec38SAndrea Mayer 	.size = sizeof(struct netns_vrf),
2015097d3c95SDavid Ahern };
2016097d3c95SDavid Ahern 
vrf_init_module(void)2017193125dbSDavid Ahern static int __init vrf_init_module(void)
2018193125dbSDavid Ahern {
2019193125dbSDavid Ahern 	int rc;
2020193125dbSDavid Ahern 
2021193125dbSDavid Ahern 	register_netdevice_notifier(&vrf_notifier_block);
2022193125dbSDavid Ahern 
2023097d3c95SDavid Ahern 	rc = register_pernet_subsys(&vrf_net_ops);
2024193125dbSDavid Ahern 	if (rc < 0)
2025193125dbSDavid Ahern 		goto error;
2026193125dbSDavid Ahern 
2027a59a8ffdSAndrea Mayer 	rc = l3mdev_table_lookup_register(L3MDEV_TYPE_VRF,
2028a59a8ffdSAndrea Mayer 					  vrf_ifindex_lookup_by_table_id);
2029a59a8ffdSAndrea Mayer 	if (rc < 0)
2030a59a8ffdSAndrea Mayer 		goto unreg_pernet;
2031a59a8ffdSAndrea Mayer 
2032097d3c95SDavid Ahern 	rc = rtnl_link_register(&vrf_link_ops);
2033a59a8ffdSAndrea Mayer 	if (rc < 0)
2034a59a8ffdSAndrea Mayer 		goto table_lookup_unreg;
2035097d3c95SDavid Ahern 
2036193125dbSDavid Ahern 	return 0;
2037193125dbSDavid Ahern 
2038a59a8ffdSAndrea Mayer table_lookup_unreg:
2039a59a8ffdSAndrea Mayer 	l3mdev_table_lookup_unregister(L3MDEV_TYPE_VRF,
2040a59a8ffdSAndrea Mayer 				       vrf_ifindex_lookup_by_table_id);
2041a59a8ffdSAndrea Mayer 
2042a59a8ffdSAndrea Mayer unreg_pernet:
2043a59a8ffdSAndrea Mayer 	unregister_pernet_subsys(&vrf_net_ops);
2044a59a8ffdSAndrea Mayer 
2045193125dbSDavid Ahern error:
2046193125dbSDavid Ahern 	unregister_netdevice_notifier(&vrf_notifier_block);
2047193125dbSDavid Ahern 	return rc;
2048193125dbSDavid Ahern }
2049193125dbSDavid Ahern 
2050193125dbSDavid Ahern module_init(vrf_init_module);
2051193125dbSDavid Ahern MODULE_AUTHOR("Shrijeet Mukherjee, David Ahern");
2052193125dbSDavid Ahern MODULE_DESCRIPTION("Device driver to instantiate VRF domains");
2053193125dbSDavid Ahern MODULE_LICENSE("GPL");
2054193125dbSDavid Ahern MODULE_ALIAS_RTNL_LINK(DRV_NAME);
2055193125dbSDavid Ahern MODULE_VERSION(DRV_VERSION);
2056