12874c5fdSThomas Gleixner // SPDX-License-Identifier: GPL-2.0-or-later
2193125dbSDavid Ahern /*
3193125dbSDavid Ahern * vrf.c: device driver to encapsulate a VRF space
4193125dbSDavid Ahern *
5193125dbSDavid Ahern * Copyright (c) 2015 Cumulus Networks. All rights reserved.
6193125dbSDavid Ahern * Copyright (c) 2015 Shrijeet Mukherjee <shm@cumulusnetworks.com>
7193125dbSDavid Ahern * Copyright (c) 2015 David Ahern <dsa@cumulusnetworks.com>
8193125dbSDavid Ahern *
9193125dbSDavid Ahern * Based on dummy, team and ipvlan drivers
10193125dbSDavid Ahern */
11193125dbSDavid Ahern
12cc69837fSJakub Kicinski #include <linux/ethtool.h>
13193125dbSDavid Ahern #include <linux/module.h>
14193125dbSDavid Ahern #include <linux/kernel.h>
15193125dbSDavid Ahern #include <linux/netdevice.h>
16193125dbSDavid Ahern #include <linux/etherdevice.h>
17193125dbSDavid Ahern #include <linux/ip.h>
18193125dbSDavid Ahern #include <linux/init.h>
19193125dbSDavid Ahern #include <linux/moduleparam.h>
20193125dbSDavid Ahern #include <linux/netfilter.h>
21193125dbSDavid Ahern #include <linux/rtnetlink.h>
22193125dbSDavid Ahern #include <net/rtnetlink.h>
23193125dbSDavid Ahern #include <linux/u64_stats_sync.h>
24193125dbSDavid Ahern #include <linux/hashtable.h>
25c8baec38SAndrea Mayer #include <linux/spinlock_types.h>
26193125dbSDavid Ahern
27193125dbSDavid Ahern #include <linux/inetdevice.h>
288f58336dSDavid Ahern #include <net/arp.h>
29193125dbSDavid Ahern #include <net/ip.h>
30193125dbSDavid Ahern #include <net/ip_fib.h>
3135402e31SDavid Ahern #include <net/ip6_fib.h>
32193125dbSDavid Ahern #include <net/ip6_route.h>
33193125dbSDavid Ahern #include <net/route.h>
34193125dbSDavid Ahern #include <net/addrconf.h>
35ee15ee5dSDavid Ahern #include <net/l3mdev.h>
361aa6c4f6SDavid Ahern #include <net/fib_rules.h>
37b6459415SJakub Kicinski #include <net/sch_generic.h>
38097d3c95SDavid Ahern #include <net/netns/generic.h>
398c9c296aSFlorian Westphal #include <net/netfilter/nf_conntrack.h>
40193125dbSDavid Ahern
41193125dbSDavid Ahern #define DRV_NAME "vrf"
42c8baec38SAndrea Mayer #define DRV_VERSION "1.1"
43193125dbSDavid Ahern
441aa6c4f6SDavid Ahern #define FIB_RULE_PREF 1000 /* default preference for FIB rules */
45097d3c95SDavid Ahern
46c8baec38SAndrea Mayer #define HT_MAP_BITS 4
47c8baec38SAndrea Mayer #define HASH_INITVAL ((u32)0xcafef00d)
48c8baec38SAndrea Mayer
49c8baec38SAndrea Mayer struct vrf_map {
50c8baec38SAndrea Mayer DECLARE_HASHTABLE(ht, HT_MAP_BITS);
51c8baec38SAndrea Mayer spinlock_t vmap_lock;
52c8baec38SAndrea Mayer
53c8baec38SAndrea Mayer /* shared_tables:
54c8baec38SAndrea Mayer * count how many distinct tables do not comply with the strict mode
55c8baec38SAndrea Mayer * requirement.
56c8baec38SAndrea Mayer * shared_tables value must be 0 in order to enable the strict mode.
57c8baec38SAndrea Mayer *
58c8baec38SAndrea Mayer * example of the evolution of shared_tables:
59c8baec38SAndrea Mayer * | time
60c8baec38SAndrea Mayer * add vrf0 --> table 100 shared_tables = 0 | t0
61c8baec38SAndrea Mayer * add vrf1 --> table 101 shared_tables = 0 | t1
62c8baec38SAndrea Mayer * add vrf2 --> table 100 shared_tables = 1 | t2
63c8baec38SAndrea Mayer * add vrf3 --> table 100 shared_tables = 1 | t3
64c8baec38SAndrea Mayer * add vrf4 --> table 101 shared_tables = 2 v t4
65c8baec38SAndrea Mayer *
66c8baec38SAndrea Mayer * shared_tables is a "step function" (or "staircase function")
67c8baec38SAndrea Mayer * and it is increased by one when the second vrf is associated to a
68c8baec38SAndrea Mayer * table.
69c8baec38SAndrea Mayer *
70c8baec38SAndrea Mayer * at t2, vrf0 and vrf2 are bound to table 100: shared_tables = 1.
71c8baec38SAndrea Mayer *
72c8baec38SAndrea Mayer * at t3, another dev (vrf3) is bound to the same table 100 but the
73c8baec38SAndrea Mayer * value of shared_tables is still 1.
74c8baec38SAndrea Mayer * This means that no matter how many new vrfs will register on the
75c8baec38SAndrea Mayer * table 100, the shared_tables will not increase (considering only
76c8baec38SAndrea Mayer * table 100).
77c8baec38SAndrea Mayer *
78c8baec38SAndrea Mayer * at t4, vrf4 is bound to table 101, and shared_tables = 2.
79c8baec38SAndrea Mayer *
80c8baec38SAndrea Mayer * Looking at the value of shared_tables we can immediately know if
81c8baec38SAndrea Mayer * the strict_mode can or cannot be enforced. Indeed, strict_mode
82c8baec38SAndrea Mayer * can be enforced iff shared_tables = 0.
83c8baec38SAndrea Mayer *
84c8baec38SAndrea Mayer * Conversely, shared_tables is decreased when a vrf is de-associated
85c8baec38SAndrea Mayer * from a table with exactly two associated vrfs.
86c8baec38SAndrea Mayer */
87c8baec38SAndrea Mayer u32 shared_tables;
88c8baec38SAndrea Mayer
89c8baec38SAndrea Mayer bool strict_mode;
90c8baec38SAndrea Mayer };
91c8baec38SAndrea Mayer
92c8baec38SAndrea Mayer struct vrf_map_elem {
93c8baec38SAndrea Mayer struct hlist_node hnode;
94c8baec38SAndrea Mayer struct list_head vrf_list; /* VRFs registered to this table */
95c8baec38SAndrea Mayer
96c8baec38SAndrea Mayer u32 table_id;
97c8baec38SAndrea Mayer int users;
98c8baec38SAndrea Mayer int ifindex;
99c8baec38SAndrea Mayer };
100c8baec38SAndrea Mayer
101097d3c95SDavid Ahern static unsigned int vrf_net_id;
1021aa6c4f6SDavid Ahern
103c8baec38SAndrea Mayer /* per netns vrf data */
104c8baec38SAndrea Mayer struct netns_vrf {
105c8baec38SAndrea Mayer /* protected by rtnl lock */
106c8baec38SAndrea Mayer bool add_fib_rules;
107c8baec38SAndrea Mayer
108c8baec38SAndrea Mayer struct vrf_map vmap;
10933306f1aSAndrea Mayer struct ctl_table_header *ctl_hdr;
110c8baec38SAndrea Mayer };
111c8baec38SAndrea Mayer
112ec539514SDavid Ahern struct net_vrf {
113b0e95ccdSDavid Ahern struct rtable __rcu *rth;
114b0e95ccdSDavid Ahern struct rt6_info __rcu *rt6;
11543b059a3SDavid Ahern #if IS_ENABLED(CONFIG_IPV6)
11643b059a3SDavid Ahern struct fib6_table *fib6_table;
11743b059a3SDavid Ahern #endif
118ec539514SDavid Ahern u32 tb_id;
119c8baec38SAndrea Mayer
120c8baec38SAndrea Mayer struct list_head me_list; /* entry in vrf_map_elem */
121c8baec38SAndrea Mayer int ifindex;
122ec539514SDavid Ahern };
123ec539514SDavid Ahern
vrf_rx_stats(struct net_device * dev,int len)124afe80a49SDavid Ahern static void vrf_rx_stats(struct net_device *dev, int len)
125afe80a49SDavid Ahern {
126afe80a49SDavid Ahern struct pcpu_dstats *dstats = this_cpu_ptr(dev->dstats);
127afe80a49SDavid Ahern
128afe80a49SDavid Ahern u64_stats_update_begin(&dstats->syncp);
12995f068b0SDaniel Borkmann dstats->rx_packets++;
130afe80a49SDavid Ahern dstats->rx_bytes += len;
131afe80a49SDavid Ahern u64_stats_update_end(&dstats->syncp);
132afe80a49SDavid Ahern }
133afe80a49SDavid Ahern
vrf_tx_error(struct net_device * vrf_dev,struct sk_buff * skb)13457b8efa1SNikolay Aleksandrov static void vrf_tx_error(struct net_device *vrf_dev, struct sk_buff *skb)
13557b8efa1SNikolay Aleksandrov {
13657b8efa1SNikolay Aleksandrov vrf_dev->stats.tx_errors++;
13757b8efa1SNikolay Aleksandrov kfree_skb(skb);
13857b8efa1SNikolay Aleksandrov }
13957b8efa1SNikolay Aleksandrov
vrf_get_stats64(struct net_device * dev,struct rtnl_link_stats64 * stats)140bc1f4470Sstephen hemminger static void vrf_get_stats64(struct net_device *dev,
141193125dbSDavid Ahern struct rtnl_link_stats64 *stats)
142193125dbSDavid Ahern {
143193125dbSDavid Ahern int i;
144193125dbSDavid Ahern
145193125dbSDavid Ahern for_each_possible_cpu(i) {
146193125dbSDavid Ahern const struct pcpu_dstats *dstats;
147193125dbSDavid Ahern u64 tbytes, tpkts, tdrops, rbytes, rpkts;
148193125dbSDavid Ahern unsigned int start;
149193125dbSDavid Ahern
150193125dbSDavid Ahern dstats = per_cpu_ptr(dev->dstats, i);
151193125dbSDavid Ahern do {
152068c38adSThomas Gleixner start = u64_stats_fetch_begin(&dstats->syncp);
153193125dbSDavid Ahern tbytes = dstats->tx_bytes;
15495f068b0SDaniel Borkmann tpkts = dstats->tx_packets;
15595f068b0SDaniel Borkmann tdrops = dstats->tx_drops;
156193125dbSDavid Ahern rbytes = dstats->rx_bytes;
15795f068b0SDaniel Borkmann rpkts = dstats->rx_packets;
158068c38adSThomas Gleixner } while (u64_stats_fetch_retry(&dstats->syncp, start));
159193125dbSDavid Ahern stats->tx_bytes += tbytes;
160193125dbSDavid Ahern stats->tx_packets += tpkts;
161193125dbSDavid Ahern stats->tx_dropped += tdrops;
162193125dbSDavid Ahern stats->rx_bytes += rbytes;
163193125dbSDavid Ahern stats->rx_packets += rpkts;
164193125dbSDavid Ahern }
165193125dbSDavid Ahern }
166193125dbSDavid Ahern
netns_vrf_map(struct net * net)167c8baec38SAndrea Mayer static struct vrf_map *netns_vrf_map(struct net *net)
168c8baec38SAndrea Mayer {
169c8baec38SAndrea Mayer struct netns_vrf *nn_vrf = net_generic(net, vrf_net_id);
170c8baec38SAndrea Mayer
171c8baec38SAndrea Mayer return &nn_vrf->vmap;
172c8baec38SAndrea Mayer }
173c8baec38SAndrea Mayer
netns_vrf_map_by_dev(struct net_device * dev)174c8baec38SAndrea Mayer static struct vrf_map *netns_vrf_map_by_dev(struct net_device *dev)
175c8baec38SAndrea Mayer {
176c8baec38SAndrea Mayer return netns_vrf_map(dev_net(dev));
177c8baec38SAndrea Mayer }
178c8baec38SAndrea Mayer
vrf_map_elem_get_vrf_ifindex(struct vrf_map_elem * me)179a59a8ffdSAndrea Mayer static int vrf_map_elem_get_vrf_ifindex(struct vrf_map_elem *me)
180a59a8ffdSAndrea Mayer {
181a59a8ffdSAndrea Mayer struct list_head *me_head = &me->vrf_list;
182a59a8ffdSAndrea Mayer struct net_vrf *vrf;
183a59a8ffdSAndrea Mayer
184a59a8ffdSAndrea Mayer if (list_empty(me_head))
185a59a8ffdSAndrea Mayer return -ENODEV;
186a59a8ffdSAndrea Mayer
187a59a8ffdSAndrea Mayer vrf = list_first_entry(me_head, struct net_vrf, me_list);
188a59a8ffdSAndrea Mayer
189a59a8ffdSAndrea Mayer return vrf->ifindex;
190a59a8ffdSAndrea Mayer }
191a59a8ffdSAndrea Mayer
vrf_map_elem_alloc(gfp_t flags)192c8baec38SAndrea Mayer static struct vrf_map_elem *vrf_map_elem_alloc(gfp_t flags)
193c8baec38SAndrea Mayer {
194c8baec38SAndrea Mayer struct vrf_map_elem *me;
195c8baec38SAndrea Mayer
196c8baec38SAndrea Mayer me = kmalloc(sizeof(*me), flags);
197c8baec38SAndrea Mayer if (!me)
198c8baec38SAndrea Mayer return NULL;
199c8baec38SAndrea Mayer
200c8baec38SAndrea Mayer return me;
201c8baec38SAndrea Mayer }
202c8baec38SAndrea Mayer
vrf_map_elem_free(struct vrf_map_elem * me)203c8baec38SAndrea Mayer static void vrf_map_elem_free(struct vrf_map_elem *me)
204c8baec38SAndrea Mayer {
205c8baec38SAndrea Mayer kfree(me);
206c8baec38SAndrea Mayer }
207c8baec38SAndrea Mayer
vrf_map_elem_init(struct vrf_map_elem * me,int table_id,int ifindex,int users)208c8baec38SAndrea Mayer static void vrf_map_elem_init(struct vrf_map_elem *me, int table_id,
209c8baec38SAndrea Mayer int ifindex, int users)
210c8baec38SAndrea Mayer {
211c8baec38SAndrea Mayer me->table_id = table_id;
212c8baec38SAndrea Mayer me->ifindex = ifindex;
213c8baec38SAndrea Mayer me->users = users;
214c8baec38SAndrea Mayer INIT_LIST_HEAD(&me->vrf_list);
215c8baec38SAndrea Mayer }
216c8baec38SAndrea Mayer
vrf_map_lookup_elem(struct vrf_map * vmap,u32 table_id)217c8baec38SAndrea Mayer static struct vrf_map_elem *vrf_map_lookup_elem(struct vrf_map *vmap,
218c8baec38SAndrea Mayer u32 table_id)
219c8baec38SAndrea Mayer {
220c8baec38SAndrea Mayer struct vrf_map_elem *me;
221c8baec38SAndrea Mayer u32 key;
222c8baec38SAndrea Mayer
223c8baec38SAndrea Mayer key = jhash_1word(table_id, HASH_INITVAL);
224c8baec38SAndrea Mayer hash_for_each_possible(vmap->ht, me, hnode, key) {
225c8baec38SAndrea Mayer if (me->table_id == table_id)
226c8baec38SAndrea Mayer return me;
227c8baec38SAndrea Mayer }
228c8baec38SAndrea Mayer
229c8baec38SAndrea Mayer return NULL;
230c8baec38SAndrea Mayer }
231c8baec38SAndrea Mayer
vrf_map_add_elem(struct vrf_map * vmap,struct vrf_map_elem * me)232c8baec38SAndrea Mayer static void vrf_map_add_elem(struct vrf_map *vmap, struct vrf_map_elem *me)
233c8baec38SAndrea Mayer {
234c8baec38SAndrea Mayer u32 table_id = me->table_id;
235c8baec38SAndrea Mayer u32 key;
236c8baec38SAndrea Mayer
237c8baec38SAndrea Mayer key = jhash_1word(table_id, HASH_INITVAL);
238c8baec38SAndrea Mayer hash_add(vmap->ht, &me->hnode, key);
239c8baec38SAndrea Mayer }
240c8baec38SAndrea Mayer
vrf_map_del_elem(struct vrf_map_elem * me)241c8baec38SAndrea Mayer static void vrf_map_del_elem(struct vrf_map_elem *me)
242c8baec38SAndrea Mayer {
243c8baec38SAndrea Mayer hash_del(&me->hnode);
244c8baec38SAndrea Mayer }
245c8baec38SAndrea Mayer
vrf_map_lock(struct vrf_map * vmap)246c8baec38SAndrea Mayer static void vrf_map_lock(struct vrf_map *vmap) __acquires(&vmap->vmap_lock)
247c8baec38SAndrea Mayer {
248c8baec38SAndrea Mayer spin_lock(&vmap->vmap_lock);
249c8baec38SAndrea Mayer }
250c8baec38SAndrea Mayer
vrf_map_unlock(struct vrf_map * vmap)251c8baec38SAndrea Mayer static void vrf_map_unlock(struct vrf_map *vmap) __releases(&vmap->vmap_lock)
252c8baec38SAndrea Mayer {
253c8baec38SAndrea Mayer spin_unlock(&vmap->vmap_lock);
254c8baec38SAndrea Mayer }
255c8baec38SAndrea Mayer
256c8baec38SAndrea Mayer /* called with rtnl lock held */
257c8baec38SAndrea Mayer static int
vrf_map_register_dev(struct net_device * dev,struct netlink_ext_ack * extack)258c8baec38SAndrea Mayer vrf_map_register_dev(struct net_device *dev, struct netlink_ext_ack *extack)
259c8baec38SAndrea Mayer {
260c8baec38SAndrea Mayer struct vrf_map *vmap = netns_vrf_map_by_dev(dev);
261c8baec38SAndrea Mayer struct net_vrf *vrf = netdev_priv(dev);
262c8baec38SAndrea Mayer struct vrf_map_elem *new_me, *me;
263c8baec38SAndrea Mayer u32 table_id = vrf->tb_id;
264c8baec38SAndrea Mayer bool free_new_me = false;
265c8baec38SAndrea Mayer int users;
266c8baec38SAndrea Mayer int res;
267c8baec38SAndrea Mayer
268c8baec38SAndrea Mayer /* we pre-allocate elements used in the spin-locked section (so that we
269e9a0bf6dSZheng Yongjun * keep the spinlock as short as possible).
270c8baec38SAndrea Mayer */
271c8baec38SAndrea Mayer new_me = vrf_map_elem_alloc(GFP_KERNEL);
272c8baec38SAndrea Mayer if (!new_me)
273c8baec38SAndrea Mayer return -ENOMEM;
274c8baec38SAndrea Mayer
275c8baec38SAndrea Mayer vrf_map_elem_init(new_me, table_id, dev->ifindex, 0);
276c8baec38SAndrea Mayer
277c8baec38SAndrea Mayer vrf_map_lock(vmap);
278c8baec38SAndrea Mayer
279c8baec38SAndrea Mayer me = vrf_map_lookup_elem(vmap, table_id);
280c8baec38SAndrea Mayer if (!me) {
281c8baec38SAndrea Mayer me = new_me;
282c8baec38SAndrea Mayer vrf_map_add_elem(vmap, me);
283c8baec38SAndrea Mayer goto link_vrf;
284c8baec38SAndrea Mayer }
285c8baec38SAndrea Mayer
286c8baec38SAndrea Mayer /* we already have an entry in the vrf_map, so it means there is (at
287c8baec38SAndrea Mayer * least) a vrf registered on the specific table.
288c8baec38SAndrea Mayer */
289c8baec38SAndrea Mayer free_new_me = true;
290c8baec38SAndrea Mayer if (vmap->strict_mode) {
291c8baec38SAndrea Mayer /* vrfs cannot share the same table */
292c8baec38SAndrea Mayer NL_SET_ERR_MSG(extack, "Table is used by another VRF");
293c8baec38SAndrea Mayer res = -EBUSY;
294c8baec38SAndrea Mayer goto unlock;
295c8baec38SAndrea Mayer }
296c8baec38SAndrea Mayer
297c8baec38SAndrea Mayer link_vrf:
298c8baec38SAndrea Mayer users = ++me->users;
299c8baec38SAndrea Mayer if (users == 2)
300c8baec38SAndrea Mayer ++vmap->shared_tables;
301c8baec38SAndrea Mayer
302c8baec38SAndrea Mayer list_add(&vrf->me_list, &me->vrf_list);
303c8baec38SAndrea Mayer
304c8baec38SAndrea Mayer res = 0;
305c8baec38SAndrea Mayer
306c8baec38SAndrea Mayer unlock:
307c8baec38SAndrea Mayer vrf_map_unlock(vmap);
308c8baec38SAndrea Mayer
309c8baec38SAndrea Mayer /* clean-up, if needed */
310c8baec38SAndrea Mayer if (free_new_me)
311c8baec38SAndrea Mayer vrf_map_elem_free(new_me);
312c8baec38SAndrea Mayer
313c8baec38SAndrea Mayer return res;
314c8baec38SAndrea Mayer }
315c8baec38SAndrea Mayer
316c8baec38SAndrea Mayer /* called with rtnl lock held */
vrf_map_unregister_dev(struct net_device * dev)317c8baec38SAndrea Mayer static void vrf_map_unregister_dev(struct net_device *dev)
318c8baec38SAndrea Mayer {
319c8baec38SAndrea Mayer struct vrf_map *vmap = netns_vrf_map_by_dev(dev);
320c8baec38SAndrea Mayer struct net_vrf *vrf = netdev_priv(dev);
321c8baec38SAndrea Mayer u32 table_id = vrf->tb_id;
322c8baec38SAndrea Mayer struct vrf_map_elem *me;
323c8baec38SAndrea Mayer int users;
324c8baec38SAndrea Mayer
325c8baec38SAndrea Mayer vrf_map_lock(vmap);
326c8baec38SAndrea Mayer
327c8baec38SAndrea Mayer me = vrf_map_lookup_elem(vmap, table_id);
328c8baec38SAndrea Mayer if (!me)
329c8baec38SAndrea Mayer goto unlock;
330c8baec38SAndrea Mayer
331c8baec38SAndrea Mayer list_del(&vrf->me_list);
332c8baec38SAndrea Mayer
333c8baec38SAndrea Mayer users = --me->users;
334c8baec38SAndrea Mayer if (users == 1) {
335c8baec38SAndrea Mayer --vmap->shared_tables;
336c8baec38SAndrea Mayer } else if (users == 0) {
337c8baec38SAndrea Mayer vrf_map_del_elem(me);
338c8baec38SAndrea Mayer
339c8baec38SAndrea Mayer /* no one will refer to this element anymore */
340c8baec38SAndrea Mayer vrf_map_elem_free(me);
341c8baec38SAndrea Mayer }
342c8baec38SAndrea Mayer
343c8baec38SAndrea Mayer unlock:
344c8baec38SAndrea Mayer vrf_map_unlock(vmap);
345c8baec38SAndrea Mayer }
346c8baec38SAndrea Mayer
347a59a8ffdSAndrea Mayer /* return the vrf device index associated with the table_id */
vrf_ifindex_lookup_by_table_id(struct net * net,u32 table_id)348a59a8ffdSAndrea Mayer static int vrf_ifindex_lookup_by_table_id(struct net *net, u32 table_id)
349a59a8ffdSAndrea Mayer {
350a59a8ffdSAndrea Mayer struct vrf_map *vmap = netns_vrf_map(net);
351a59a8ffdSAndrea Mayer struct vrf_map_elem *me;
352a59a8ffdSAndrea Mayer int ifindex;
353a59a8ffdSAndrea Mayer
354a59a8ffdSAndrea Mayer vrf_map_lock(vmap);
355a59a8ffdSAndrea Mayer
356a59a8ffdSAndrea Mayer if (!vmap->strict_mode) {
357a59a8ffdSAndrea Mayer ifindex = -EPERM;
358a59a8ffdSAndrea Mayer goto unlock;
359a59a8ffdSAndrea Mayer }
360a59a8ffdSAndrea Mayer
361a59a8ffdSAndrea Mayer me = vrf_map_lookup_elem(vmap, table_id);
362a59a8ffdSAndrea Mayer if (!me) {
363a59a8ffdSAndrea Mayer ifindex = -ENODEV;
364a59a8ffdSAndrea Mayer goto unlock;
365a59a8ffdSAndrea Mayer }
366a59a8ffdSAndrea Mayer
367a59a8ffdSAndrea Mayer ifindex = vrf_map_elem_get_vrf_ifindex(me);
368a59a8ffdSAndrea Mayer
369a59a8ffdSAndrea Mayer unlock:
370a59a8ffdSAndrea Mayer vrf_map_unlock(vmap);
371a59a8ffdSAndrea Mayer
372a59a8ffdSAndrea Mayer return ifindex;
373a59a8ffdSAndrea Mayer }
374a59a8ffdSAndrea Mayer
375dcdd43c4SDavid Ahern /* by default VRF devices do not have a qdisc and are expected
376dcdd43c4SDavid Ahern * to be created with only a single queue.
377dcdd43c4SDavid Ahern */
qdisc_tx_is_default(const struct net_device * dev)378dcdd43c4SDavid Ahern static bool qdisc_tx_is_default(const struct net_device *dev)
379dcdd43c4SDavid Ahern {
380dcdd43c4SDavid Ahern struct netdev_queue *txq;
381dcdd43c4SDavid Ahern struct Qdisc *qdisc;
382dcdd43c4SDavid Ahern
383dcdd43c4SDavid Ahern if (dev->num_tx_queues > 1)
384dcdd43c4SDavid Ahern return false;
385dcdd43c4SDavid Ahern
386dcdd43c4SDavid Ahern txq = netdev_get_tx_queue(dev, 0);
387dcdd43c4SDavid Ahern qdisc = rcu_access_pointer(txq->qdisc);
388dcdd43c4SDavid Ahern
389dcdd43c4SDavid Ahern return !qdisc->enqueue;
390dcdd43c4SDavid Ahern }
391dcdd43c4SDavid Ahern
392afe80a49SDavid Ahern /* Local traffic destined to local address. Reinsert the packet to rx
393afe80a49SDavid Ahern * path, similar to loopback handling.
394afe80a49SDavid Ahern */
vrf_local_xmit(struct sk_buff * skb,struct net_device * dev,struct dst_entry * dst)395afe80a49SDavid Ahern static int vrf_local_xmit(struct sk_buff *skb, struct net_device *dev,
396afe80a49SDavid Ahern struct dst_entry *dst)
397afe80a49SDavid Ahern {
398afe80a49SDavid Ahern int len = skb->len;
399afe80a49SDavid Ahern
400afe80a49SDavid Ahern skb_orphan(skb);
401afe80a49SDavid Ahern
402afe80a49SDavid Ahern skb_dst_set(skb, dst);
403afe80a49SDavid Ahern
404afe80a49SDavid Ahern /* set pkt_type to avoid skb hitting packet taps twice -
405afe80a49SDavid Ahern * once on Tx and again in Rx processing
406afe80a49SDavid Ahern */
407afe80a49SDavid Ahern skb->pkt_type = PACKET_LOOPBACK;
408afe80a49SDavid Ahern
409afe80a49SDavid Ahern skb->protocol = eth_type_trans(skb, dev);
410afe80a49SDavid Ahern
411baebdf48SSebastian Andrzej Siewior if (likely(__netif_rx(skb) == NET_RX_SUCCESS))
412afe80a49SDavid Ahern vrf_rx_stats(dev, len);
413afe80a49SDavid Ahern else
41495f068b0SDaniel Borkmann this_cpu_inc(dev->dstats->rx_drops);
415afe80a49SDavid Ahern
416afe80a49SDavid Ahern return NETDEV_TX_OK;
417afe80a49SDavid Ahern }
418afe80a49SDavid Ahern
vrf_nf_set_untracked(struct sk_buff * skb)4198c9c296aSFlorian Westphal static void vrf_nf_set_untracked(struct sk_buff *skb)
4208c9c296aSFlorian Westphal {
4218c9c296aSFlorian Westphal if (skb_get_nfct(skb) == 0)
4228c9c296aSFlorian Westphal nf_ct_set(skb, NULL, IP_CT_UNTRACKED);
4238c9c296aSFlorian Westphal }
4248c9c296aSFlorian Westphal
vrf_nf_reset_ct(struct sk_buff * skb)4258c9c296aSFlorian Westphal static void vrf_nf_reset_ct(struct sk_buff *skb)
4268c9c296aSFlorian Westphal {
4278c9c296aSFlorian Westphal if (skb_get_nfct(skb) == IP_CT_UNTRACKED)
4288c9c296aSFlorian Westphal nf_reset_ct(skb);
4298c9c296aSFlorian Westphal }
4308c9c296aSFlorian Westphal
43135402e31SDavid Ahern #if IS_ENABLED(CONFIG_IPV6)
vrf_ip6_local_out(struct net * net,struct sock * sk,struct sk_buff * skb)4324c1feac5SDavid Ahern static int vrf_ip6_local_out(struct net *net, struct sock *sk,
4334c1feac5SDavid Ahern struct sk_buff *skb)
4344c1feac5SDavid Ahern {
4354c1feac5SDavid Ahern int err;
4364c1feac5SDavid Ahern
4378c9c296aSFlorian Westphal vrf_nf_reset_ct(skb);
4388c9c296aSFlorian Westphal
4394c1feac5SDavid Ahern err = nf_hook(NFPROTO_IPV6, NF_INET_LOCAL_OUT, net,
4404c1feac5SDavid Ahern sk, skb, NULL, skb_dst(skb)->dev, dst_output);
4414c1feac5SDavid Ahern
4424c1feac5SDavid Ahern if (likely(err == 1))
4434c1feac5SDavid Ahern err = dst_output(net, sk, skb);
4444c1feac5SDavid Ahern
4454c1feac5SDavid Ahern return err;
4464c1feac5SDavid Ahern }
4474c1feac5SDavid Ahern
vrf_process_v6_outbound(struct sk_buff * skb,struct net_device * dev)44835402e31SDavid Ahern static netdev_tx_t vrf_process_v6_outbound(struct sk_buff *skb,
44935402e31SDavid Ahern struct net_device *dev)
45035402e31SDavid Ahern {
451107e47ccSPeter Kosyh const struct ipv6hdr *iph;
45235402e31SDavid Ahern struct net *net = dev_net(skb->dev);
453107e47ccSPeter Kosyh struct flowi6 fl6;
45435402e31SDavid Ahern int ret = NET_XMIT_DROP;
45535402e31SDavid Ahern struct dst_entry *dst;
45635402e31SDavid Ahern struct dst_entry *dst_null = &net->ipv6.ip6_null_entry->dst;
45735402e31SDavid Ahern
458107e47ccSPeter Kosyh if (!pskb_may_pull(skb, ETH_HLEN + sizeof(struct ipv6hdr)))
459107e47ccSPeter Kosyh goto err;
460107e47ccSPeter Kosyh
461107e47ccSPeter Kosyh iph = ipv6_hdr(skb);
462107e47ccSPeter Kosyh
463107e47ccSPeter Kosyh memset(&fl6, 0, sizeof(fl6));
464107e47ccSPeter Kosyh /* needed to match OIF rule */
46540867d74SDavid Ahern fl6.flowi6_l3mdev = dev->ifindex;
466107e47ccSPeter Kosyh fl6.flowi6_iif = LOOPBACK_IFINDEX;
467107e47ccSPeter Kosyh fl6.daddr = iph->daddr;
468107e47ccSPeter Kosyh fl6.saddr = iph->saddr;
469107e47ccSPeter Kosyh fl6.flowlabel = ip6_flowinfo(iph);
470107e47ccSPeter Kosyh fl6.flowi6_mark = skb->mark;
471107e47ccSPeter Kosyh fl6.flowi6_proto = iph->nexthdr;
472107e47ccSPeter Kosyh
473a53c1028SDavid Ahern dst = ip6_dst_lookup_flow(net, NULL, &fl6, NULL);
474a53c1028SDavid Ahern if (IS_ERR(dst) || dst == dst_null)
47535402e31SDavid Ahern goto err;
47635402e31SDavid Ahern
47735402e31SDavid Ahern skb_dst_drop(skb);
478b4869aa2SDavid Ahern
4792e1534f3SNicolas Dichtel /* if dst.dev is the VRF device again this is locally originated traffic
4802e1534f3SNicolas Dichtel * destined to a local address. Short circuit to Rx path.
481b4869aa2SDavid Ahern */
4824f04256cSDavid Ahern if (dst->dev == dev)
4834f04256cSDavid Ahern return vrf_local_xmit(skb, dev, dst);
484b4869aa2SDavid Ahern
48535402e31SDavid Ahern skb_dst_set(skb, dst);
48635402e31SDavid Ahern
487911a66fbSDavid Ahern /* strip the ethernet header added for pass through VRF device */
488911a66fbSDavid Ahern __skb_pull(skb, skb_network_offset(skb));
489911a66fbSDavid Ahern
490ee201011SStephen Suryaputra memset(IP6CB(skb), 0, sizeof(*IP6CB(skb)));
4914c1feac5SDavid Ahern ret = vrf_ip6_local_out(net, skb->sk, skb);
49235402e31SDavid Ahern if (unlikely(net_xmit_eval(ret)))
49335402e31SDavid Ahern dev->stats.tx_errors++;
49435402e31SDavid Ahern else
49535402e31SDavid Ahern ret = NET_XMIT_SUCCESS;
49635402e31SDavid Ahern
49735402e31SDavid Ahern return ret;
49835402e31SDavid Ahern err:
49935402e31SDavid Ahern vrf_tx_error(dev, skb);
50035402e31SDavid Ahern return NET_XMIT_DROP;
50135402e31SDavid Ahern }
50235402e31SDavid Ahern #else
vrf_process_v6_outbound(struct sk_buff * skb,struct net_device * dev)503193125dbSDavid Ahern static netdev_tx_t vrf_process_v6_outbound(struct sk_buff *skb,
504193125dbSDavid Ahern struct net_device *dev)
505193125dbSDavid Ahern {
50657b8efa1SNikolay Aleksandrov vrf_tx_error(dev, skb);
50757b8efa1SNikolay Aleksandrov return NET_XMIT_DROP;
508193125dbSDavid Ahern }
50935402e31SDavid Ahern #endif
510193125dbSDavid Ahern
511ebfc102cSDavid Ahern /* based on ip_local_out; can't use it b/c the dst is switched pointing to us */
vrf_ip_local_out(struct net * net,struct sock * sk,struct sk_buff * skb)512ebfc102cSDavid Ahern static int vrf_ip_local_out(struct net *net, struct sock *sk,
513ebfc102cSDavid Ahern struct sk_buff *skb)
514ebfc102cSDavid Ahern {
515ebfc102cSDavid Ahern int err;
516ebfc102cSDavid Ahern
5178c9c296aSFlorian Westphal vrf_nf_reset_ct(skb);
5188c9c296aSFlorian Westphal
519ebfc102cSDavid Ahern err = nf_hook(NFPROTO_IPV4, NF_INET_LOCAL_OUT, net, sk,
520ebfc102cSDavid Ahern skb, NULL, skb_dst(skb)->dev, dst_output);
521ebfc102cSDavid Ahern if (likely(err == 1))
522ebfc102cSDavid Ahern err = dst_output(net, sk, skb);
523ebfc102cSDavid Ahern
524ebfc102cSDavid Ahern return err;
525ebfc102cSDavid Ahern }
526ebfc102cSDavid Ahern
vrf_process_v4_outbound(struct sk_buff * skb,struct net_device * vrf_dev)527193125dbSDavid Ahern static netdev_tx_t vrf_process_v4_outbound(struct sk_buff *skb,
528193125dbSDavid Ahern struct net_device *vrf_dev)
529193125dbSDavid Ahern {
530107e47ccSPeter Kosyh struct iphdr *ip4h;
531193125dbSDavid Ahern int ret = NET_XMIT_DROP;
532107e47ccSPeter Kosyh struct flowi4 fl4;
533911a66fbSDavid Ahern struct net *net = dev_net(vrf_dev);
534911a66fbSDavid Ahern struct rtable *rt;
535193125dbSDavid Ahern
536107e47ccSPeter Kosyh if (!pskb_may_pull(skb, ETH_HLEN + sizeof(struct iphdr)))
537107e47ccSPeter Kosyh goto err;
538107e47ccSPeter Kosyh
539107e47ccSPeter Kosyh ip4h = ip_hdr(skb);
540107e47ccSPeter Kosyh
541107e47ccSPeter Kosyh memset(&fl4, 0, sizeof(fl4));
542107e47ccSPeter Kosyh /* needed to match OIF rule */
54340867d74SDavid Ahern fl4.flowi4_l3mdev = vrf_dev->ifindex;
544107e47ccSPeter Kosyh fl4.flowi4_iif = LOOPBACK_IFINDEX;
545107e47ccSPeter Kosyh fl4.flowi4_tos = RT_TOS(ip4h->tos);
54640867d74SDavid Ahern fl4.flowi4_flags = FLOWI_FLAG_ANYSRC;
547107e47ccSPeter Kosyh fl4.flowi4_proto = ip4h->protocol;
548107e47ccSPeter Kosyh fl4.daddr = ip4h->daddr;
549107e47ccSPeter Kosyh fl4.saddr = ip4h->saddr;
550107e47ccSPeter Kosyh
551911a66fbSDavid Ahern rt = ip_route_output_flow(net, &fl4, NULL);
552911a66fbSDavid Ahern if (IS_ERR(rt))
553193125dbSDavid Ahern goto err;
554193125dbSDavid Ahern
555911a66fbSDavid Ahern skb_dst_drop(skb);
556afe80a49SDavid Ahern
5572e1534f3SNicolas Dichtel /* if dst.dev is the VRF device again this is locally originated traffic
5582e1534f3SNicolas Dichtel * destined to a local address. Short circuit to Rx path.
559afe80a49SDavid Ahern */
5604f04256cSDavid Ahern if (rt->dst.dev == vrf_dev)
5614f04256cSDavid Ahern return vrf_local_xmit(skb, vrf_dev, &rt->dst);
562afe80a49SDavid Ahern
563911a66fbSDavid Ahern skb_dst_set(skb, &rt->dst);
564911a66fbSDavid Ahern
565911a66fbSDavid Ahern /* strip the ethernet header added for pass through VRF device */
566911a66fbSDavid Ahern __skb_pull(skb, skb_network_offset(skb));
567911a66fbSDavid Ahern
568193125dbSDavid Ahern if (!ip4h->saddr) {
569193125dbSDavid Ahern ip4h->saddr = inet_select_addr(skb_dst(skb)->dev, 0,
570193125dbSDavid Ahern RT_SCOPE_LINK);
571193125dbSDavid Ahern }
572193125dbSDavid Ahern
573ee201011SStephen Suryaputra memset(IPCB(skb), 0, sizeof(*IPCB(skb)));
574ebfc102cSDavid Ahern ret = vrf_ip_local_out(dev_net(skb_dst(skb)->dev), skb->sk, skb);
575193125dbSDavid Ahern if (unlikely(net_xmit_eval(ret)))
576193125dbSDavid Ahern vrf_dev->stats.tx_errors++;
577193125dbSDavid Ahern else
578193125dbSDavid Ahern ret = NET_XMIT_SUCCESS;
579193125dbSDavid Ahern
580193125dbSDavid Ahern out:
581193125dbSDavid Ahern return ret;
582193125dbSDavid Ahern err:
58357b8efa1SNikolay Aleksandrov vrf_tx_error(vrf_dev, skb);
584193125dbSDavid Ahern goto out;
585193125dbSDavid Ahern }
586193125dbSDavid Ahern
is_ip_tx_frame(struct sk_buff * skb,struct net_device * dev)587193125dbSDavid Ahern static netdev_tx_t is_ip_tx_frame(struct sk_buff *skb, struct net_device *dev)
588193125dbSDavid Ahern {
589193125dbSDavid Ahern switch (skb->protocol) {
590193125dbSDavid Ahern case htons(ETH_P_IP):
591193125dbSDavid Ahern return vrf_process_v4_outbound(skb, dev);
592193125dbSDavid Ahern case htons(ETH_P_IPV6):
593193125dbSDavid Ahern return vrf_process_v6_outbound(skb, dev);
594193125dbSDavid Ahern default:
59557b8efa1SNikolay Aleksandrov vrf_tx_error(dev, skb);
596193125dbSDavid Ahern return NET_XMIT_DROP;
597193125dbSDavid Ahern }
598193125dbSDavid Ahern }
599193125dbSDavid Ahern
vrf_xmit(struct sk_buff * skb,struct net_device * dev)600193125dbSDavid Ahern static netdev_tx_t vrf_xmit(struct sk_buff *skb, struct net_device *dev)
601193125dbSDavid Ahern {
602f7887d40SDavid Ahern int len = skb->len;
603193125dbSDavid Ahern netdev_tx_t ret = is_ip_tx_frame(skb, dev);
604193125dbSDavid Ahern
605193125dbSDavid Ahern if (likely(ret == NET_XMIT_SUCCESS || ret == NET_XMIT_CN)) {
606193125dbSDavid Ahern struct pcpu_dstats *dstats = this_cpu_ptr(dev->dstats);
607193125dbSDavid Ahern
608193125dbSDavid Ahern u64_stats_update_begin(&dstats->syncp);
60995f068b0SDaniel Borkmann dstats->tx_packets++;
610f7887d40SDavid Ahern dstats->tx_bytes += len;
611193125dbSDavid Ahern u64_stats_update_end(&dstats->syncp);
612193125dbSDavid Ahern } else {
61395f068b0SDaniel Borkmann this_cpu_inc(dev->dstats->tx_drops);
614193125dbSDavid Ahern }
615193125dbSDavid Ahern
616193125dbSDavid Ahern return ret;
617193125dbSDavid Ahern }
618193125dbSDavid Ahern
vrf_finish_direct(struct sk_buff * skb)6199e2b7fa2SMartin Willi static void vrf_finish_direct(struct sk_buff *skb)
620dcdd43c4SDavid Ahern {
621dcdd43c4SDavid Ahern struct net_device *vrf_dev = skb->dev;
622dcdd43c4SDavid Ahern
623dcdd43c4SDavid Ahern if (!list_empty(&vrf_dev->ptype_all) &&
624dcdd43c4SDavid Ahern likely(skb_headroom(skb) >= ETH_HLEN)) {
625d58ff351SJohannes Berg struct ethhdr *eth = skb_push(skb, ETH_HLEN);
626dcdd43c4SDavid Ahern
627dcdd43c4SDavid Ahern ether_addr_copy(eth->h_source, vrf_dev->dev_addr);
628dcdd43c4SDavid Ahern eth_zero_addr(eth->h_dest);
629dcdd43c4SDavid Ahern eth->h_proto = skb->protocol;
630dcdd43c4SDavid Ahern
631718a752bSWillem de Bruijn rcu_read_lock_bh();
632dcdd43c4SDavid Ahern dev_queue_xmit_nit(skb, vrf_dev);
633718a752bSWillem de Bruijn rcu_read_unlock_bh();
634dcdd43c4SDavid Ahern
635dcdd43c4SDavid Ahern skb_pull(skb, ETH_HLEN);
636dcdd43c4SDavid Ahern }
637dcdd43c4SDavid Ahern
6388c9c296aSFlorian Westphal vrf_nf_reset_ct(skb);
639dcdd43c4SDavid Ahern }
640dcdd43c4SDavid Ahern
64135402e31SDavid Ahern #if IS_ENABLED(CONFIG_IPV6)
64235402e31SDavid Ahern /* modelled after ip6_finish_output2 */
vrf_finish_output6(struct net * net,struct sock * sk,struct sk_buff * skb)64335402e31SDavid Ahern static int vrf_finish_output6(struct net *net, struct sock *sk,
64435402e31SDavid Ahern struct sk_buff *skb)
64535402e31SDavid Ahern {
64635402e31SDavid Ahern struct dst_entry *dst = skb_dst(skb);
64735402e31SDavid Ahern struct net_device *dev = dst->dev;
6489b1c1ef1SNicolas Dichtel const struct in6_addr *nexthop;
64935402e31SDavid Ahern struct neighbour *neigh;
65035402e31SDavid Ahern int ret;
65135402e31SDavid Ahern
6528c9c296aSFlorian Westphal vrf_nf_reset_ct(skb);
653eb63ecc1SDavid Ahern
65435402e31SDavid Ahern skb->protocol = htons(ETH_P_IPV6);
65535402e31SDavid Ahern skb->dev = dev;
65635402e31SDavid Ahern
6572033ab90SIdo Schimmel rcu_read_lock();
658*797a4c1fSEric Dumazet nexthop = rt6_nexthop(dst_rt6_info(dst), &ipv6_hdr(skb)->daddr);
65935402e31SDavid Ahern neigh = __ipv6_neigh_lookup_noref(dst->dev, nexthop);
66035402e31SDavid Ahern if (unlikely(!neigh))
66135402e31SDavid Ahern neigh = __neigh_create(&nd_tbl, nexthop, dst->dev, false);
66235402e31SDavid Ahern if (!IS_ERR(neigh)) {
6634ff06203SJulian Anastasov sock_confirm_neigh(skb, neigh);
6640353f282SDavid Ahern ret = neigh_output(neigh, skb, false);
6652033ab90SIdo Schimmel rcu_read_unlock();
66635402e31SDavid Ahern return ret;
66735402e31SDavid Ahern }
6682033ab90SIdo Schimmel rcu_read_unlock();
66935402e31SDavid Ahern
67035402e31SDavid Ahern IP6_INC_STATS(dev_net(dst->dev),
67135402e31SDavid Ahern ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES);
67235402e31SDavid Ahern kfree_skb(skb);
67335402e31SDavid Ahern return -EINVAL;
67435402e31SDavid Ahern }
67535402e31SDavid Ahern
67635402e31SDavid Ahern /* modelled after ip6_output */
vrf_output6(struct net * net,struct sock * sk,struct sk_buff * skb)67735402e31SDavid Ahern static int vrf_output6(struct net *net, struct sock *sk, struct sk_buff *skb)
67835402e31SDavid Ahern {
67935402e31SDavid Ahern return NF_HOOK_COND(NFPROTO_IPV6, NF_INET_POST_ROUTING,
68035402e31SDavid Ahern net, sk, skb, NULL, skb_dst(skb)->dev,
68135402e31SDavid Ahern vrf_finish_output6,
68235402e31SDavid Ahern !(IP6CB(skb)->flags & IP6SKB_REROUTED));
68335402e31SDavid Ahern }
68435402e31SDavid Ahern
6854c1feac5SDavid Ahern /* set dst on skb to send packet to us via dev_xmit path. Allows
6864c1feac5SDavid Ahern * packet to go through device based features such as qdisc, netfilter
6874c1feac5SDavid Ahern * hooks and packet sockets with skb->dev set to vrf device.
6884c1feac5SDavid Ahern */
vrf_ip6_out_redirect(struct net_device * vrf_dev,struct sk_buff * skb)689a9ec54d1SDavid Ahern static struct sk_buff *vrf_ip6_out_redirect(struct net_device *vrf_dev,
6904c1feac5SDavid Ahern struct sk_buff *skb)
6914c1feac5SDavid Ahern {
6924c1feac5SDavid Ahern struct net_vrf *vrf = netdev_priv(vrf_dev);
6934c1feac5SDavid Ahern struct dst_entry *dst = NULL;
6944c1feac5SDavid Ahern struct rt6_info *rt6;
6954c1feac5SDavid Ahern
6964c1feac5SDavid Ahern rcu_read_lock();
6974c1feac5SDavid Ahern
6984c1feac5SDavid Ahern rt6 = rcu_dereference(vrf->rt6);
6994c1feac5SDavid Ahern if (likely(rt6)) {
7004c1feac5SDavid Ahern dst = &rt6->dst;
7014c1feac5SDavid Ahern dst_hold(dst);
7024c1feac5SDavid Ahern }
7034c1feac5SDavid Ahern
7044c1feac5SDavid Ahern rcu_read_unlock();
7054c1feac5SDavid Ahern
7064c1feac5SDavid Ahern if (unlikely(!dst)) {
7074c1feac5SDavid Ahern vrf_tx_error(vrf_dev, skb);
7084c1feac5SDavid Ahern return NULL;
7094c1feac5SDavid Ahern }
7104c1feac5SDavid Ahern
7114c1feac5SDavid Ahern skb_dst_drop(skb);
7124c1feac5SDavid Ahern skb_dst_set(skb, dst);
7134c1feac5SDavid Ahern
7144c1feac5SDavid Ahern return skb;
7154c1feac5SDavid Ahern }
7164c1feac5SDavid Ahern
vrf_output6_direct_finish(struct net * net,struct sock * sk,struct sk_buff * skb)7179e2b7fa2SMartin Willi static int vrf_output6_direct_finish(struct net *net, struct sock *sk,
7189e2b7fa2SMartin Willi struct sk_buff *skb)
7199e2b7fa2SMartin Willi {
7209e2b7fa2SMartin Willi vrf_finish_direct(skb);
7219e2b7fa2SMartin Willi
7229e2b7fa2SMartin Willi return vrf_ip6_local_out(net, sk, skb);
7239e2b7fa2SMartin Willi }
7249e2b7fa2SMartin Willi
vrf_output6_direct(struct net * net,struct sock * sk,struct sk_buff * skb)725a9ec54d1SDavid Ahern static int vrf_output6_direct(struct net *net, struct sock *sk,
726a9ec54d1SDavid Ahern struct sk_buff *skb)
727a9ec54d1SDavid Ahern {
7289e2b7fa2SMartin Willi int err = 1;
7299e2b7fa2SMartin Willi
730a9ec54d1SDavid Ahern skb->protocol = htons(ETH_P_IPV6);
731a9ec54d1SDavid Ahern
7329e2b7fa2SMartin Willi if (!(IPCB(skb)->flags & IPSKB_REROUTED))
7339e2b7fa2SMartin Willi err = nf_hook(NFPROTO_IPV6, NF_INET_POST_ROUTING, net, sk, skb,
7349e2b7fa2SMartin Willi NULL, skb->dev, vrf_output6_direct_finish);
7359e2b7fa2SMartin Willi
7369e2b7fa2SMartin Willi if (likely(err == 1))
7379e2b7fa2SMartin Willi vrf_finish_direct(skb);
7389e2b7fa2SMartin Willi
7399e2b7fa2SMartin Willi return err;
7409e2b7fa2SMartin Willi }
7419e2b7fa2SMartin Willi
vrf_ip6_out_direct_finish(struct net * net,struct sock * sk,struct sk_buff * skb)7429e2b7fa2SMartin Willi static int vrf_ip6_out_direct_finish(struct net *net, struct sock *sk,
7439e2b7fa2SMartin Willi struct sk_buff *skb)
7449e2b7fa2SMartin Willi {
7459e2b7fa2SMartin Willi int err;
7469e2b7fa2SMartin Willi
7479e2b7fa2SMartin Willi err = vrf_output6_direct(net, sk, skb);
7489e2b7fa2SMartin Willi if (likely(err == 1))
7499e2b7fa2SMartin Willi err = vrf_ip6_local_out(net, sk, skb);
7509e2b7fa2SMartin Willi
7519e2b7fa2SMartin Willi return err;
752a9ec54d1SDavid Ahern }
753a9ec54d1SDavid Ahern
vrf_ip6_out_direct(struct net_device * vrf_dev,struct sock * sk,struct sk_buff * skb)754a9ec54d1SDavid Ahern static struct sk_buff *vrf_ip6_out_direct(struct net_device *vrf_dev,
755a9ec54d1SDavid Ahern struct sock *sk,
756a9ec54d1SDavid Ahern struct sk_buff *skb)
757a9ec54d1SDavid Ahern {
758a9ec54d1SDavid Ahern struct net *net = dev_net(vrf_dev);
759a9ec54d1SDavid Ahern int err;
760a9ec54d1SDavid Ahern
761a9ec54d1SDavid Ahern skb->dev = vrf_dev;
762a9ec54d1SDavid Ahern
763a9ec54d1SDavid Ahern err = nf_hook(NFPROTO_IPV6, NF_INET_LOCAL_OUT, net, sk,
7649e2b7fa2SMartin Willi skb, NULL, vrf_dev, vrf_ip6_out_direct_finish);
765a9ec54d1SDavid Ahern
766a9ec54d1SDavid Ahern if (likely(err == 1))
767a9ec54d1SDavid Ahern err = vrf_output6_direct(net, sk, skb);
768a9ec54d1SDavid Ahern
769a9ec54d1SDavid Ahern if (likely(err == 1))
770a9ec54d1SDavid Ahern return skb;
7719e2b7fa2SMartin Willi
7729e2b7fa2SMartin Willi return NULL;
773a9ec54d1SDavid Ahern }
774a9ec54d1SDavid Ahern
vrf_ip6_out(struct net_device * vrf_dev,struct sock * sk,struct sk_buff * skb)775a9ec54d1SDavid Ahern static struct sk_buff *vrf_ip6_out(struct net_device *vrf_dev,
776a9ec54d1SDavid Ahern struct sock *sk,
777a9ec54d1SDavid Ahern struct sk_buff *skb)
778a9ec54d1SDavid Ahern {
779a9ec54d1SDavid Ahern /* don't divert link scope packets */
780a9ec54d1SDavid Ahern if (rt6_need_strict(&ipv6_hdr(skb)->daddr))
781a9ec54d1SDavid Ahern return skb;
782a9ec54d1SDavid Ahern
783d43b75fbSNicolas Dichtel vrf_nf_set_untracked(skb);
784d43b75fbSNicolas Dichtel
78516b9db1cSDavid Ahern if (qdisc_tx_is_default(vrf_dev) ||
78616b9db1cSDavid Ahern IP6CB(skb)->flags & IP6SKB_XFRM_TRANSFORMED)
787a9ec54d1SDavid Ahern return vrf_ip6_out_direct(vrf_dev, sk, skb);
788a9ec54d1SDavid Ahern
789a9ec54d1SDavid Ahern return vrf_ip6_out_redirect(vrf_dev, skb);
790a9ec54d1SDavid Ahern }
791a9ec54d1SDavid Ahern
792b0e95ccdSDavid Ahern /* holding rtnl */
vrf_rt6_release(struct net_device * dev,struct net_vrf * vrf)793810e530bSDavid Ahern static void vrf_rt6_release(struct net_device *dev, struct net_vrf *vrf)
79435402e31SDavid Ahern {
795b0e95ccdSDavid Ahern struct rt6_info *rt6 = rtnl_dereference(vrf->rt6);
796810e530bSDavid Ahern struct net *net = dev_net(dev);
797810e530bSDavid Ahern struct dst_entry *dst;
798b0e95ccdSDavid Ahern
799b4869aa2SDavid Ahern RCU_INIT_POINTER(vrf->rt6, NULL);
800b4869aa2SDavid Ahern synchronize_rcu();
801b0e95ccdSDavid Ahern
802810e530bSDavid Ahern /* move dev in dst's to loopback so this VRF device can be deleted
803810e530bSDavid Ahern * - based on dst_ifdown
804810e530bSDavid Ahern */
805810e530bSDavid Ahern if (rt6) {
806810e530bSDavid Ahern dst = &rt6->dst;
807d62607c3SJakub Kicinski netdev_ref_replace(dst->dev, net->loopback_dev,
808c0e5e11aSEric Dumazet &dst->dev_tracker, GFP_KERNEL);
809810e530bSDavid Ahern dst->dev = net->loopback_dev;
810810e530bSDavid Ahern dst_release(dst);
811810e530bSDavid Ahern }
81235402e31SDavid Ahern }
81335402e31SDavid Ahern
vrf_rt6_create(struct net_device * dev)81435402e31SDavid Ahern static int vrf_rt6_create(struct net_device *dev)
81535402e31SDavid Ahern {
816af13b3c3SDavid Laight int flags = DST_NOPOLICY | DST_NOXFRM;
81735402e31SDavid Ahern struct net_vrf *vrf = netdev_priv(dev);
8189ab179d8SDavid Ahern struct net *net = dev_net(dev);
8194f04256cSDavid Ahern struct rt6_info *rt6;
82035402e31SDavid Ahern int rc = -ENOMEM;
82135402e31SDavid Ahern
822e4348637SDavid Ahern /* IPv6 can be CONFIG enabled and then disabled runtime */
823e4348637SDavid Ahern if (!ipv6_mod_enabled())
824e4348637SDavid Ahern return 0;
825e4348637SDavid Ahern
82643b059a3SDavid Ahern vrf->fib6_table = fib6_new_table(net, vrf->tb_id);
82743b059a3SDavid Ahern if (!vrf->fib6_table)
828b3b4663cSDavid Ahern goto out;
829b3b4663cSDavid Ahern
830b4869aa2SDavid Ahern /* create a dst for routing packets out a VRF device */
831b4869aa2SDavid Ahern rt6 = ip6_dst_alloc(net, dev, flags);
83235402e31SDavid Ahern if (!rt6)
83335402e31SDavid Ahern goto out;
83435402e31SDavid Ahern
835b3b4663cSDavid Ahern rt6->dst.output = vrf_output6;
836b4869aa2SDavid Ahern
837b0e95ccdSDavid Ahern rcu_assign_pointer(vrf->rt6, rt6);
838b0e95ccdSDavid Ahern
83935402e31SDavid Ahern rc = 0;
84035402e31SDavid Ahern out:
84135402e31SDavid Ahern return rc;
84235402e31SDavid Ahern }
84335402e31SDavid Ahern #else
vrf_ip6_out(struct net_device * vrf_dev,struct sock * sk,struct sk_buff * skb)8444c1feac5SDavid Ahern static struct sk_buff *vrf_ip6_out(struct net_device *vrf_dev,
8454c1feac5SDavid Ahern struct sock *sk,
8464c1feac5SDavid Ahern struct sk_buff *skb)
8474c1feac5SDavid Ahern {
8484c1feac5SDavid Ahern return skb;
8494c1feac5SDavid Ahern }
8504c1feac5SDavid Ahern
vrf_rt6_release(struct net_device * dev,struct net_vrf * vrf)851810e530bSDavid Ahern static void vrf_rt6_release(struct net_device *dev, struct net_vrf *vrf)
85235402e31SDavid Ahern {
85335402e31SDavid Ahern }
85435402e31SDavid Ahern
vrf_rt6_create(struct net_device * dev)85535402e31SDavid Ahern static int vrf_rt6_create(struct net_device *dev)
85635402e31SDavid Ahern {
85735402e31SDavid Ahern return 0;
85835402e31SDavid Ahern }
85935402e31SDavid Ahern #endif
86035402e31SDavid Ahern
8618f58336dSDavid Ahern /* modelled after ip_finish_output2 */
vrf_finish_output(struct net * net,struct sock * sk,struct sk_buff * skb)8620c4b51f0SEric W. Biederman static int vrf_finish_output(struct net *net, struct sock *sk, struct sk_buff *skb)
863193125dbSDavid Ahern {
8648f58336dSDavid Ahern struct dst_entry *dst = skb_dst(skb);
8658f58336dSDavid Ahern struct rtable *rt = (struct rtable *)dst;
8668f58336dSDavid Ahern struct net_device *dev = dst->dev;
8678f58336dSDavid Ahern unsigned int hh_len = LL_RESERVED_SPACE(dev);
8688f58336dSDavid Ahern struct neighbour *neigh;
8695c9f7c1dSDavid Ahern bool is_v6gw = false;
8708f58336dSDavid Ahern
8718c9c296aSFlorian Westphal vrf_nf_reset_ct(skb);
872eb63ecc1SDavid Ahern
8738f58336dSDavid Ahern /* Be paranoid, rather than too clever. */
8748f58336dSDavid Ahern if (unlikely(skb_headroom(skb) < hh_len && dev->header_ops)) {
87514ee70caSVasily Averin skb = skb_expand_head(skb, hh_len);
87614ee70caSVasily Averin if (!skb) {
87706669e68SDan Carpenter dev->stats.tx_errors++;
87814ee70caSVasily Averin return -ENOMEM;
8798f58336dSDavid Ahern }
8808f58336dSDavid Ahern }
8818f58336dSDavid Ahern
8822033ab90SIdo Schimmel rcu_read_lock();
8838f58336dSDavid Ahern
8845c9f7c1dSDavid Ahern neigh = ip_neigh_for_gw(rt, skb, &is_v6gw);
8854ff06203SJulian Anastasov if (!IS_ERR(neigh)) {
88614ee70caSVasily Averin int ret;
88714ee70caSVasily Averin
8884ff06203SJulian Anastasov sock_confirm_neigh(skb, neigh);
8895c9f7c1dSDavid Ahern /* if crossing protocols, can not use the cached header */
8905c9f7c1dSDavid Ahern ret = neigh_output(neigh, skb, is_v6gw);
8912033ab90SIdo Schimmel rcu_read_unlock();
89282dd0d2aSDavid Ahern return ret;
8934ff06203SJulian Anastasov }
8948f58336dSDavid Ahern
8952033ab90SIdo Schimmel rcu_read_unlock();
8968f58336dSDavid Ahern vrf_tx_error(skb->dev, skb);
89714ee70caSVasily Averin return -EINVAL;
898193125dbSDavid Ahern }
899193125dbSDavid Ahern
vrf_output(struct net * net,struct sock * sk,struct sk_buff * skb)900ede2059dSEric W. Biederman static int vrf_output(struct net *net, struct sock *sk, struct sk_buff *skb)
901193125dbSDavid Ahern {
902193125dbSDavid Ahern struct net_device *dev = skb_dst(skb)->dev;
903193125dbSDavid Ahern
90429a26a56SEric W. Biederman IP_UPD_PO_STATS(net, IPSTATS_MIB_OUT, skb->len);
905193125dbSDavid Ahern
906193125dbSDavid Ahern skb->dev = dev;
907193125dbSDavid Ahern skb->protocol = htons(ETH_P_IP);
908193125dbSDavid Ahern
90929a26a56SEric W. Biederman return NF_HOOK_COND(NFPROTO_IPV4, NF_INET_POST_ROUTING,
91029a26a56SEric W. Biederman net, sk, skb, NULL, dev,
9118f58336dSDavid Ahern vrf_finish_output,
912193125dbSDavid Ahern !(IPCB(skb)->flags & IPSKB_REROUTED));
913193125dbSDavid Ahern }
914193125dbSDavid Ahern
915ebfc102cSDavid Ahern /* set dst on skb to send packet to us via dev_xmit path. Allows
916ebfc102cSDavid Ahern * packet to go through device based features such as qdisc, netfilter
917ebfc102cSDavid Ahern * hooks and packet sockets with skb->dev set to vrf device.
918ebfc102cSDavid Ahern */
vrf_ip_out_redirect(struct net_device * vrf_dev,struct sk_buff * skb)919dcdd43c4SDavid Ahern static struct sk_buff *vrf_ip_out_redirect(struct net_device *vrf_dev,
920ebfc102cSDavid Ahern struct sk_buff *skb)
921ebfc102cSDavid Ahern {
922ebfc102cSDavid Ahern struct net_vrf *vrf = netdev_priv(vrf_dev);
923ebfc102cSDavid Ahern struct dst_entry *dst = NULL;
924ebfc102cSDavid Ahern struct rtable *rth;
925ebfc102cSDavid Ahern
926ebfc102cSDavid Ahern rcu_read_lock();
927ebfc102cSDavid Ahern
928ebfc102cSDavid Ahern rth = rcu_dereference(vrf->rth);
929ebfc102cSDavid Ahern if (likely(rth)) {
930ebfc102cSDavid Ahern dst = &rth->dst;
931ebfc102cSDavid Ahern dst_hold(dst);
932ebfc102cSDavid Ahern }
933ebfc102cSDavid Ahern
934ebfc102cSDavid Ahern rcu_read_unlock();
935ebfc102cSDavid Ahern
936ebfc102cSDavid Ahern if (unlikely(!dst)) {
937ebfc102cSDavid Ahern vrf_tx_error(vrf_dev, skb);
938ebfc102cSDavid Ahern return NULL;
939ebfc102cSDavid Ahern }
940ebfc102cSDavid Ahern
941ebfc102cSDavid Ahern skb_dst_drop(skb);
942ebfc102cSDavid Ahern skb_dst_set(skb, dst);
943ebfc102cSDavid Ahern
944ebfc102cSDavid Ahern return skb;
945ebfc102cSDavid Ahern }
946ebfc102cSDavid Ahern
vrf_output_direct_finish(struct net * net,struct sock * sk,struct sk_buff * skb)9479e2b7fa2SMartin Willi static int vrf_output_direct_finish(struct net *net, struct sock *sk,
9489e2b7fa2SMartin Willi struct sk_buff *skb)
9499e2b7fa2SMartin Willi {
9509e2b7fa2SMartin Willi vrf_finish_direct(skb);
9519e2b7fa2SMartin Willi
9529e2b7fa2SMartin Willi return vrf_ip_local_out(net, sk, skb);
9539e2b7fa2SMartin Willi }
9549e2b7fa2SMartin Willi
vrf_output_direct(struct net * net,struct sock * sk,struct sk_buff * skb)955dcdd43c4SDavid Ahern static int vrf_output_direct(struct net *net, struct sock *sk,
956dcdd43c4SDavid Ahern struct sk_buff *skb)
957dcdd43c4SDavid Ahern {
9589e2b7fa2SMartin Willi int err = 1;
9599e2b7fa2SMartin Willi
960dcdd43c4SDavid Ahern skb->protocol = htons(ETH_P_IP);
961dcdd43c4SDavid Ahern
9629e2b7fa2SMartin Willi if (!(IPCB(skb)->flags & IPSKB_REROUTED))
9639e2b7fa2SMartin Willi err = nf_hook(NFPROTO_IPV4, NF_INET_POST_ROUTING, net, sk, skb,
9649e2b7fa2SMartin Willi NULL, skb->dev, vrf_output_direct_finish);
9659e2b7fa2SMartin Willi
9669e2b7fa2SMartin Willi if (likely(err == 1))
9679e2b7fa2SMartin Willi vrf_finish_direct(skb);
9689e2b7fa2SMartin Willi
9699e2b7fa2SMartin Willi return err;
9709e2b7fa2SMartin Willi }
9719e2b7fa2SMartin Willi
vrf_ip_out_direct_finish(struct net * net,struct sock * sk,struct sk_buff * skb)9729e2b7fa2SMartin Willi static int vrf_ip_out_direct_finish(struct net *net, struct sock *sk,
9739e2b7fa2SMartin Willi struct sk_buff *skb)
9749e2b7fa2SMartin Willi {
9759e2b7fa2SMartin Willi int err;
9769e2b7fa2SMartin Willi
9779e2b7fa2SMartin Willi err = vrf_output_direct(net, sk, skb);
9789e2b7fa2SMartin Willi if (likely(err == 1))
9799e2b7fa2SMartin Willi err = vrf_ip_local_out(net, sk, skb);
9809e2b7fa2SMartin Willi
9819e2b7fa2SMartin Willi return err;
982dcdd43c4SDavid Ahern }
983dcdd43c4SDavid Ahern
vrf_ip_out_direct(struct net_device * vrf_dev,struct sock * sk,struct sk_buff * skb)984dcdd43c4SDavid Ahern static struct sk_buff *vrf_ip_out_direct(struct net_device *vrf_dev,
985dcdd43c4SDavid Ahern struct sock *sk,
986dcdd43c4SDavid Ahern struct sk_buff *skb)
987dcdd43c4SDavid Ahern {
988dcdd43c4SDavid Ahern struct net *net = dev_net(vrf_dev);
989dcdd43c4SDavid Ahern int err;
990dcdd43c4SDavid Ahern
991dcdd43c4SDavid Ahern skb->dev = vrf_dev;
992dcdd43c4SDavid Ahern
993dcdd43c4SDavid Ahern err = nf_hook(NFPROTO_IPV4, NF_INET_LOCAL_OUT, net, sk,
9949e2b7fa2SMartin Willi skb, NULL, vrf_dev, vrf_ip_out_direct_finish);
995dcdd43c4SDavid Ahern
996dcdd43c4SDavid Ahern if (likely(err == 1))
997dcdd43c4SDavid Ahern err = vrf_output_direct(net, sk, skb);
998dcdd43c4SDavid Ahern
999dcdd43c4SDavid Ahern if (likely(err == 1))
1000dcdd43c4SDavid Ahern return skb;
10019e2b7fa2SMartin Willi
10029e2b7fa2SMartin Willi return NULL;
1003dcdd43c4SDavid Ahern }
1004dcdd43c4SDavid Ahern
vrf_ip_out(struct net_device * vrf_dev,struct sock * sk,struct sk_buff * skb)1005dcdd43c4SDavid Ahern static struct sk_buff *vrf_ip_out(struct net_device *vrf_dev,
1006dcdd43c4SDavid Ahern struct sock *sk,
1007dcdd43c4SDavid Ahern struct sk_buff *skb)
1008dcdd43c4SDavid Ahern {
10091e19c4d6SDavid Ahern /* don't divert multicast or local broadcast */
10101e19c4d6SDavid Ahern if (ipv4_is_multicast(ip_hdr(skb)->daddr) ||
10111e19c4d6SDavid Ahern ipv4_is_lbcast(ip_hdr(skb)->daddr))
1012dcdd43c4SDavid Ahern return skb;
1013dcdd43c4SDavid Ahern
1014d43b75fbSNicolas Dichtel vrf_nf_set_untracked(skb);
1015d43b75fbSNicolas Dichtel
101616b9db1cSDavid Ahern if (qdisc_tx_is_default(vrf_dev) ||
101716b9db1cSDavid Ahern IPCB(skb)->flags & IPSKB_XFRM_TRANSFORMED)
1018dcdd43c4SDavid Ahern return vrf_ip_out_direct(vrf_dev, sk, skb);
1019dcdd43c4SDavid Ahern
1020dcdd43c4SDavid Ahern return vrf_ip_out_redirect(vrf_dev, skb);
1021dcdd43c4SDavid Ahern }
1022dcdd43c4SDavid Ahern
1023ebfc102cSDavid Ahern /* called with rcu lock held */
vrf_l3_out(struct net_device * vrf_dev,struct sock * sk,struct sk_buff * skb,u16 proto)1024ebfc102cSDavid Ahern static struct sk_buff *vrf_l3_out(struct net_device *vrf_dev,
1025ebfc102cSDavid Ahern struct sock *sk,
1026ebfc102cSDavid Ahern struct sk_buff *skb,
1027ebfc102cSDavid Ahern u16 proto)
1028ebfc102cSDavid Ahern {
1029ebfc102cSDavid Ahern switch (proto) {
1030ebfc102cSDavid Ahern case AF_INET:
1031ebfc102cSDavid Ahern return vrf_ip_out(vrf_dev, sk, skb);
10324c1feac5SDavid Ahern case AF_INET6:
10334c1feac5SDavid Ahern return vrf_ip6_out(vrf_dev, sk, skb);
1034ebfc102cSDavid Ahern }
1035ebfc102cSDavid Ahern
1036ebfc102cSDavid Ahern return skb;
1037ebfc102cSDavid Ahern }
1038ebfc102cSDavid Ahern
1039b0e95ccdSDavid Ahern /* holding rtnl */
vrf_rtable_release(struct net_device * dev,struct net_vrf * vrf)1040810e530bSDavid Ahern static void vrf_rtable_release(struct net_device *dev, struct net_vrf *vrf)
1041193125dbSDavid Ahern {
1042b0e95ccdSDavid Ahern struct rtable *rth = rtnl_dereference(vrf->rth);
1043810e530bSDavid Ahern struct net *net = dev_net(dev);
1044810e530bSDavid Ahern struct dst_entry *dst;
1045193125dbSDavid Ahern
1046afe80a49SDavid Ahern RCU_INIT_POINTER(vrf->rth, NULL);
1047afe80a49SDavid Ahern synchronize_rcu();
1048b0e95ccdSDavid Ahern
1049810e530bSDavid Ahern /* move dev in dst's to loopback so this VRF device can be deleted
1050810e530bSDavid Ahern * - based on dst_ifdown
1051810e530bSDavid Ahern */
1052810e530bSDavid Ahern if (rth) {
1053810e530bSDavid Ahern dst = &rth->dst;
1054d62607c3SJakub Kicinski netdev_ref_replace(dst->dev, net->loopback_dev,
1055c0e5e11aSEric Dumazet &dst->dev_tracker, GFP_KERNEL);
1056810e530bSDavid Ahern dst->dev = net->loopback_dev;
1057810e530bSDavid Ahern dst_release(dst);
1058810e530bSDavid Ahern }
1059193125dbSDavid Ahern }
1060193125dbSDavid Ahern
vrf_rtable_create(struct net_device * dev)1061b0e95ccdSDavid Ahern static int vrf_rtable_create(struct net_device *dev)
1062193125dbSDavid Ahern {
1063b7503e0cSDavid Ahern struct net_vrf *vrf = netdev_priv(dev);
10644f04256cSDavid Ahern struct rtable *rth;
1065193125dbSDavid Ahern
1066b3b4663cSDavid Ahern if (!fib_new_table(dev_net(dev), vrf->tb_id))
1067b0e95ccdSDavid Ahern return -ENOMEM;
1068b3b4663cSDavid Ahern
1069afe80a49SDavid Ahern /* create a dst for routing packets out through a VRF device */
1070b5c8b3feSEyal Birger rth = rt_dst_alloc(dev, 0, RTN_UNICAST, 1);
1071b0e95ccdSDavid Ahern if (!rth)
1072b0e95ccdSDavid Ahern return -ENOMEM;
1073b0e95ccdSDavid Ahern
1074193125dbSDavid Ahern rth->dst.output = vrf_output;
1075193125dbSDavid Ahern
1076b0e95ccdSDavid Ahern rcu_assign_pointer(vrf->rth, rth);
1077b0e95ccdSDavid Ahern
1078b0e95ccdSDavid Ahern return 0;
1079193125dbSDavid Ahern }
1080193125dbSDavid Ahern
1081193125dbSDavid Ahern /**************************** device handling ********************/
1082193125dbSDavid Ahern
1083193125dbSDavid Ahern /* cycle interface to flush neighbor cache and move routes across tables */
cycle_netdev(struct net_device * dev,struct netlink_ext_ack * extack)1084dc1aea1eSPetr Machata static void cycle_netdev(struct net_device *dev,
1085dc1aea1eSPetr Machata struct netlink_ext_ack *extack)
1086193125dbSDavid Ahern {
1087193125dbSDavid Ahern unsigned int flags = dev->flags;
1088193125dbSDavid Ahern int ret;
1089193125dbSDavid Ahern
1090193125dbSDavid Ahern if (!netif_running(dev))
1091193125dbSDavid Ahern return;
1092193125dbSDavid Ahern
1093567c5e13SPetr Machata ret = dev_change_flags(dev, flags & ~IFF_UP, extack);
1094193125dbSDavid Ahern if (ret >= 0)
1095567c5e13SPetr Machata ret = dev_change_flags(dev, flags, extack);
1096193125dbSDavid Ahern
1097193125dbSDavid Ahern if (ret < 0) {
1098193125dbSDavid Ahern netdev_err(dev,
1099193125dbSDavid Ahern "Failed to cycle device %s; route tables might be wrong!\n",
1100193125dbSDavid Ahern dev->name);
1101193125dbSDavid Ahern }
1102193125dbSDavid Ahern }
1103193125dbSDavid Ahern
do_vrf_add_slave(struct net_device * dev,struct net_device * port_dev,struct netlink_ext_ack * extack)110442ab19eeSDavid Ahern static int do_vrf_add_slave(struct net_device *dev, struct net_device *port_dev,
110542ab19eeSDavid Ahern struct netlink_ext_ack *extack)
1106193125dbSDavid Ahern {
1107bad53162SNikolay Aleksandrov int ret;
1108193125dbSDavid Ahern
110926d31ac1SDavid Ahern /* do not allow loopback device to be enslaved to a VRF.
111026d31ac1SDavid Ahern * The vrf device acts as the loopback for the vrf.
111126d31ac1SDavid Ahern */
1112de3baa3eSDavid Ahern if (port_dev == dev_net(dev)->loopback_dev) {
1113de3baa3eSDavid Ahern NL_SET_ERR_MSG(extack,
1114de3baa3eSDavid Ahern "Can not enslave loopback device to a VRF");
111526d31ac1SDavid Ahern return -EOPNOTSUPP;
1116de3baa3eSDavid Ahern }
111726d31ac1SDavid Ahern
1118fdeea7beSIdo Schimmel port_dev->priv_flags |= IFF_L3MDEV_SLAVE;
111942ab19eeSDavid Ahern ret = netdev_master_upper_dev_link(port_dev, dev, NULL, NULL, extack);
1120193125dbSDavid Ahern if (ret < 0)
1121fdeea7beSIdo Schimmel goto err;
1122193125dbSDavid Ahern
1123dc1aea1eSPetr Machata cycle_netdev(port_dev, extack);
1124193125dbSDavid Ahern
1125193125dbSDavid Ahern return 0;
1126fdeea7beSIdo Schimmel
1127fdeea7beSIdo Schimmel err:
1128fdeea7beSIdo Schimmel port_dev->priv_flags &= ~IFF_L3MDEV_SLAVE;
1129fdeea7beSIdo Schimmel return ret;
1130193125dbSDavid Ahern }
1131193125dbSDavid Ahern
vrf_add_slave(struct net_device * dev,struct net_device * port_dev,struct netlink_ext_ack * extack)113233eaf2a6SDavid Ahern static int vrf_add_slave(struct net_device *dev, struct net_device *port_dev,
113333eaf2a6SDavid Ahern struct netlink_ext_ack *extack)
1134193125dbSDavid Ahern {
1135de3baa3eSDavid Ahern if (netif_is_l3_master(port_dev)) {
1136de3baa3eSDavid Ahern NL_SET_ERR_MSG(extack,
1137de3baa3eSDavid Ahern "Can not enslave an L3 master device to a VRF");
1138de3baa3eSDavid Ahern return -EINVAL;
1139de3baa3eSDavid Ahern }
1140de3baa3eSDavid Ahern
1141de3baa3eSDavid Ahern if (netif_is_l3_slave(port_dev))
1142193125dbSDavid Ahern return -EINVAL;
1143193125dbSDavid Ahern
114442ab19eeSDavid Ahern return do_vrf_add_slave(dev, port_dev, extack);
1145193125dbSDavid Ahern }
1146193125dbSDavid Ahern
1147193125dbSDavid Ahern /* inverse of do_vrf_add_slave */
do_vrf_del_slave(struct net_device * dev,struct net_device * port_dev)1148193125dbSDavid Ahern static int do_vrf_del_slave(struct net_device *dev, struct net_device *port_dev)
1149193125dbSDavid Ahern {
1150193125dbSDavid Ahern netdev_upper_dev_unlink(port_dev, dev);
1151fee6d4c7SDavid Ahern port_dev->priv_flags &= ~IFF_L3MDEV_SLAVE;
1152193125dbSDavid Ahern
1153dc1aea1eSPetr Machata cycle_netdev(port_dev, NULL);
1154193125dbSDavid Ahern
1155193125dbSDavid Ahern return 0;
1156193125dbSDavid Ahern }
1157193125dbSDavid Ahern
vrf_del_slave(struct net_device * dev,struct net_device * port_dev)1158193125dbSDavid Ahern static int vrf_del_slave(struct net_device *dev, struct net_device *port_dev)
1159193125dbSDavid Ahern {
1160193125dbSDavid Ahern return do_vrf_del_slave(dev, port_dev);
1161193125dbSDavid Ahern }
1162193125dbSDavid Ahern
vrf_dev_uninit(struct net_device * dev)1163193125dbSDavid Ahern static void vrf_dev_uninit(struct net_device *dev)
1164193125dbSDavid Ahern {
1165193125dbSDavid Ahern struct net_vrf *vrf = netdev_priv(dev);
1166193125dbSDavid Ahern
1167810e530bSDavid Ahern vrf_rtable_release(dev, vrf);
1168810e530bSDavid Ahern vrf_rt6_release(dev, vrf);
1169193125dbSDavid Ahern }
1170193125dbSDavid Ahern
vrf_dev_init(struct net_device * dev)1171193125dbSDavid Ahern static int vrf_dev_init(struct net_device *dev)
1172193125dbSDavid Ahern {
1173193125dbSDavid Ahern struct net_vrf *vrf = netdev_priv(dev);
1174193125dbSDavid Ahern
1175193125dbSDavid Ahern /* create the default dst which points back to us */
1176b0e95ccdSDavid Ahern if (vrf_rtable_create(dev) != 0)
11776ae7b3fcSDaniel Borkmann goto out_nomem;
1178193125dbSDavid Ahern
117935402e31SDavid Ahern if (vrf_rt6_create(dev) != 0)
118035402e31SDavid Ahern goto out_rth;
118135402e31SDavid Ahern
1182193125dbSDavid Ahern dev->flags = IFF_MASTER | IFF_NOARP;
1183193125dbSDavid Ahern
1184b87ab6b8SDavid Ahern /* similarly, oper state is irrelevant; set to up to avoid confusion */
1185b87ab6b8SDavid Ahern dev->operstate = IF_OPER_UP;
11861a33e10eSCong Wang netdev_lockdep_set_classes(dev);
1187193125dbSDavid Ahern return 0;
1188193125dbSDavid Ahern
118935402e31SDavid Ahern out_rth:
1190810e530bSDavid Ahern vrf_rtable_release(dev, vrf);
1191193125dbSDavid Ahern out_nomem:
1192193125dbSDavid Ahern return -ENOMEM;
1193193125dbSDavid Ahern }
1194193125dbSDavid Ahern
1195193125dbSDavid Ahern static const struct net_device_ops vrf_netdev_ops = {
1196193125dbSDavid Ahern .ndo_init = vrf_dev_init,
1197193125dbSDavid Ahern .ndo_uninit = vrf_dev_uninit,
1198193125dbSDavid Ahern .ndo_start_xmit = vrf_xmit,
11996819e3f6SMiaohe Lin .ndo_set_mac_address = eth_mac_addr,
1200193125dbSDavid Ahern .ndo_get_stats64 = vrf_get_stats64,
1201193125dbSDavid Ahern .ndo_add_slave = vrf_add_slave,
1202193125dbSDavid Ahern .ndo_del_slave = vrf_del_slave,
1203193125dbSDavid Ahern };
1204193125dbSDavid Ahern
vrf_fib_table(const struct net_device * dev)1205ee15ee5dSDavid Ahern static u32 vrf_fib_table(const struct net_device *dev)
1206ee15ee5dSDavid Ahern {
1207ee15ee5dSDavid Ahern struct net_vrf *vrf = netdev_priv(dev);
1208ee15ee5dSDavid Ahern
1209ee15ee5dSDavid Ahern return vrf->tb_id;
1210ee15ee5dSDavid Ahern }
1211ee15ee5dSDavid Ahern
vrf_rcv_finish(struct net * net,struct sock * sk,struct sk_buff * skb)121273e20b76SDavid Ahern static int vrf_rcv_finish(struct net *net, struct sock *sk, struct sk_buff *skb)
121373e20b76SDavid Ahern {
12141a4a5bf5SGao Feng kfree_skb(skb);
121573e20b76SDavid Ahern return 0;
121673e20b76SDavid Ahern }
121773e20b76SDavid Ahern
vrf_rcv_nfhook(u8 pf,unsigned int hook,struct sk_buff * skb,struct net_device * dev)121873e20b76SDavid Ahern static struct sk_buff *vrf_rcv_nfhook(u8 pf, unsigned int hook,
121973e20b76SDavid Ahern struct sk_buff *skb,
122073e20b76SDavid Ahern struct net_device *dev)
122173e20b76SDavid Ahern {
122273e20b76SDavid Ahern struct net *net = dev_net(dev);
122373e20b76SDavid Ahern
12241a4a5bf5SGao Feng if (nf_hook(pf, hook, net, NULL, skb, dev, NULL, vrf_rcv_finish) != 1)
122573e20b76SDavid Ahern skb = NULL; /* kfree_skb(skb) handled by nf code */
122673e20b76SDavid Ahern
122773e20b76SDavid Ahern return skb;
122873e20b76SDavid Ahern }
122973e20b76SDavid Ahern
vrf_prepare_mac_header(struct sk_buff * skb,struct net_device * vrf_dev,u16 proto)12309125abe7SAndrea Mayer static int vrf_prepare_mac_header(struct sk_buff *skb,
12319125abe7SAndrea Mayer struct net_device *vrf_dev, u16 proto)
12329125abe7SAndrea Mayer {
12339125abe7SAndrea Mayer struct ethhdr *eth;
12349125abe7SAndrea Mayer int err;
12359125abe7SAndrea Mayer
12369125abe7SAndrea Mayer /* in general, we do not know if there is enough space in the head of
12379125abe7SAndrea Mayer * the packet for hosting the mac header.
12389125abe7SAndrea Mayer */
12399125abe7SAndrea Mayer err = skb_cow_head(skb, LL_RESERVED_SPACE(vrf_dev));
12409125abe7SAndrea Mayer if (unlikely(err))
12419125abe7SAndrea Mayer /* no space in the skb head */
12429125abe7SAndrea Mayer return -ENOBUFS;
12439125abe7SAndrea Mayer
12449125abe7SAndrea Mayer __skb_push(skb, ETH_HLEN);
12459125abe7SAndrea Mayer eth = (struct ethhdr *)skb->data;
12469125abe7SAndrea Mayer
12479125abe7SAndrea Mayer skb_reset_mac_header(skb);
1248012d69fbSEyal Birger skb_reset_mac_len(skb);
12499125abe7SAndrea Mayer
12509125abe7SAndrea Mayer /* we set the ethernet destination and the source addresses to the
12519125abe7SAndrea Mayer * address of the VRF device.
12529125abe7SAndrea Mayer */
12539125abe7SAndrea Mayer ether_addr_copy(eth->h_dest, vrf_dev->dev_addr);
12549125abe7SAndrea Mayer ether_addr_copy(eth->h_source, vrf_dev->dev_addr);
12559125abe7SAndrea Mayer eth->h_proto = htons(proto);
12569125abe7SAndrea Mayer
12579125abe7SAndrea Mayer /* the destination address of the Ethernet frame corresponds to the
12589125abe7SAndrea Mayer * address set on the VRF interface; therefore, the packet is intended
12599125abe7SAndrea Mayer * to be processed locally.
12609125abe7SAndrea Mayer */
12619125abe7SAndrea Mayer skb->protocol = eth->h_proto;
12629125abe7SAndrea Mayer skb->pkt_type = PACKET_HOST;
12639125abe7SAndrea Mayer
12649125abe7SAndrea Mayer skb_postpush_rcsum(skb, skb->data, ETH_HLEN);
12659125abe7SAndrea Mayer
12669125abe7SAndrea Mayer skb_pull_inline(skb, ETH_HLEN);
12679125abe7SAndrea Mayer
12689125abe7SAndrea Mayer return 0;
12699125abe7SAndrea Mayer }
12709125abe7SAndrea Mayer
12719125abe7SAndrea Mayer /* prepare and add the mac header to the packet if it was not set previously.
12729125abe7SAndrea Mayer * In this way, packet sniffers such as tcpdump can parse the packet correctly.
12739125abe7SAndrea Mayer * If the mac header was already set, the original mac header is left
12749125abe7SAndrea Mayer * untouched and the function returns immediately.
12759125abe7SAndrea Mayer */
vrf_add_mac_header_if_unset(struct sk_buff * skb,struct net_device * vrf_dev,u16 proto,struct net_device * orig_dev)12769125abe7SAndrea Mayer static int vrf_add_mac_header_if_unset(struct sk_buff *skb,
12779125abe7SAndrea Mayer struct net_device *vrf_dev,
1278012d69fbSEyal Birger u16 proto, struct net_device *orig_dev)
12799125abe7SAndrea Mayer {
1280012d69fbSEyal Birger if (skb_mac_header_was_set(skb) && dev_has_header(orig_dev))
12819125abe7SAndrea Mayer return 0;
12829125abe7SAndrea Mayer
12839125abe7SAndrea Mayer return vrf_prepare_mac_header(skb, vrf_dev, proto);
12849125abe7SAndrea Mayer }
12859125abe7SAndrea Mayer
128635402e31SDavid Ahern #if IS_ENABLED(CONFIG_IPV6)
128774b20582SDavid Ahern /* neighbor handling is done with actual device; do not want
128874b20582SDavid Ahern * to flip skb->dev for those ndisc packets. This really fails
128974b20582SDavid Ahern * for multiple next protocols (e.g., NEXTHDR_HOP). But it is
129074b20582SDavid Ahern * a start.
129174b20582SDavid Ahern */
ipv6_ndisc_frame(const struct sk_buff * skb)129274b20582SDavid Ahern static bool ipv6_ndisc_frame(const struct sk_buff *skb)
129374b20582SDavid Ahern {
129474b20582SDavid Ahern const struct ipv6hdr *iph = ipv6_hdr(skb);
129574b20582SDavid Ahern bool rc = false;
129674b20582SDavid Ahern
129774b20582SDavid Ahern if (iph->nexthdr == NEXTHDR_ICMP) {
129874b20582SDavid Ahern const struct icmp6hdr *icmph;
129974b20582SDavid Ahern struct icmp6hdr _icmph;
130074b20582SDavid Ahern
130174b20582SDavid Ahern icmph = skb_header_pointer(skb, sizeof(*iph),
130274b20582SDavid Ahern sizeof(_icmph), &_icmph);
130374b20582SDavid Ahern if (!icmph)
130474b20582SDavid Ahern goto out;
130574b20582SDavid Ahern
130674b20582SDavid Ahern switch (icmph->icmp6_type) {
130774b20582SDavid Ahern case NDISC_ROUTER_SOLICITATION:
130874b20582SDavid Ahern case NDISC_ROUTER_ADVERTISEMENT:
130974b20582SDavid Ahern case NDISC_NEIGHBOUR_SOLICITATION:
131074b20582SDavid Ahern case NDISC_NEIGHBOUR_ADVERTISEMENT:
131174b20582SDavid Ahern case NDISC_REDIRECT:
131274b20582SDavid Ahern rc = true;
131374b20582SDavid Ahern break;
131474b20582SDavid Ahern }
131574b20582SDavid Ahern }
131674b20582SDavid Ahern
131774b20582SDavid Ahern out:
131874b20582SDavid Ahern return rc;
131974b20582SDavid Ahern }
132074b20582SDavid Ahern
vrf_ip6_route_lookup(struct net * net,const struct net_device * dev,struct flowi6 * fl6,int ifindex,const struct sk_buff * skb,int flags)13219ff74384SDavid Ahern static struct rt6_info *vrf_ip6_route_lookup(struct net *net,
13229ff74384SDavid Ahern const struct net_device *dev,
13239ff74384SDavid Ahern struct flowi6 *fl6,
13249ff74384SDavid Ahern int ifindex,
1325b75cc8f9SDavid Ahern const struct sk_buff *skb,
13269ff74384SDavid Ahern int flags)
13279ff74384SDavid Ahern {
13289ff74384SDavid Ahern struct net_vrf *vrf = netdev_priv(dev);
13299ff74384SDavid Ahern
133043b059a3SDavid Ahern return ip6_pol_route(net, vrf->fib6_table, ifindex, fl6, skb, flags);
13319ff74384SDavid Ahern }
13329ff74384SDavid Ahern
vrf_ip6_input_dst(struct sk_buff * skb,struct net_device * vrf_dev,int ifindex)13339ff74384SDavid Ahern static void vrf_ip6_input_dst(struct sk_buff *skb, struct net_device *vrf_dev,
13349ff74384SDavid Ahern int ifindex)
13359ff74384SDavid Ahern {
13369ff74384SDavid Ahern const struct ipv6hdr *iph = ipv6_hdr(skb);
13379ff74384SDavid Ahern struct flowi6 fl6 = {
1338ecf09117SArnd Bergmann .flowi6_iif = ifindex,
1339ecf09117SArnd Bergmann .flowi6_mark = skb->mark,
1340ecf09117SArnd Bergmann .flowi6_proto = iph->nexthdr,
13419ff74384SDavid Ahern .daddr = iph->daddr,
13429ff74384SDavid Ahern .saddr = iph->saddr,
13439ff74384SDavid Ahern .flowlabel = ip6_flowinfo(iph),
13449ff74384SDavid Ahern };
13459ff74384SDavid Ahern struct net *net = dev_net(vrf_dev);
13469ff74384SDavid Ahern struct rt6_info *rt6;
13479ff74384SDavid Ahern
1348b75cc8f9SDavid Ahern rt6 = vrf_ip6_route_lookup(net, vrf_dev, &fl6, ifindex, skb,
13499ff74384SDavid Ahern RT6_LOOKUP_F_HAS_SADDR | RT6_LOOKUP_F_IFACE);
13509ff74384SDavid Ahern if (unlikely(!rt6))
13519ff74384SDavid Ahern return;
13529ff74384SDavid Ahern
13539ff74384SDavid Ahern if (unlikely(&rt6->dst == &net->ipv6.ip6_null_entry->dst))
13549ff74384SDavid Ahern return;
13559ff74384SDavid Ahern
13569ff74384SDavid Ahern skb_dst_set(skb, &rt6->dst);
13579ff74384SDavid Ahern }
13589ff74384SDavid Ahern
vrf_ip6_rcv(struct net_device * vrf_dev,struct sk_buff * skb)135974b20582SDavid Ahern static struct sk_buff *vrf_ip6_rcv(struct net_device *vrf_dev,
136074b20582SDavid Ahern struct sk_buff *skb)
136174b20582SDavid Ahern {
13629ff74384SDavid Ahern int orig_iif = skb->skb_iif;
13636f12fa77SMike Manning bool need_strict = rt6_need_strict(&ipv6_hdr(skb)->daddr);
13646f12fa77SMike Manning bool is_ndisc = ipv6_ndisc_frame(skb);
13659ff74384SDavid Ahern
13666f12fa77SMike Manning /* loopback, multicast & non-ND link-local traffic; do not push through
1367205704c6SStephen Suryaputra * packet taps again. Reset pkt_type for upper layers to process skb.
1368f2575c8fSAntoine Tenart * For non-loopback strict packets, determine the dst using the original
1369f2575c8fSAntoine Tenart * ifindex.
1370b4869aa2SDavid Ahern */
1371603113c5SAntoine Tenart if (skb->pkt_type == PACKET_LOOPBACK || (need_strict && !is_ndisc)) {
1372b4869aa2SDavid Ahern skb->dev = vrf_dev;
1373b4869aa2SDavid Ahern skb->skb_iif = vrf_dev->ifindex;
1374a04a480dSDavid Ahern IP6CB(skb)->flags |= IP6SKB_L3SLAVE;
1375603113c5SAntoine Tenart
13766f12fa77SMike Manning if (skb->pkt_type == PACKET_LOOPBACK)
1377b4869aa2SDavid Ahern skb->pkt_type = PACKET_HOST;
1378f2575c8fSAntoine Tenart else
1379603113c5SAntoine Tenart vrf_ip6_input_dst(skb, vrf_dev, orig_iif);
1380603113c5SAntoine Tenart
1381b4869aa2SDavid Ahern goto out;
1382b4869aa2SDavid Ahern }
1383b4869aa2SDavid Ahern
13846f12fa77SMike Manning /* if packet is NDISC then keep the ingress interface */
13856f12fa77SMike Manning if (!is_ndisc) {
1386012d69fbSEyal Birger struct net_device *orig_dev = skb->dev;
1387012d69fbSEyal Birger
1388926d93a3SDavid Ahern vrf_rx_stats(vrf_dev, skb->len);
138974b20582SDavid Ahern skb->dev = vrf_dev;
139074b20582SDavid Ahern skb->skb_iif = vrf_dev->ifindex;
139174b20582SDavid Ahern
1392a9ec54d1SDavid Ahern if (!list_empty(&vrf_dev->ptype_all)) {
139304893908SAndrea Mayer int err;
139404893908SAndrea Mayer
139504893908SAndrea Mayer err = vrf_add_mac_header_if_unset(skb, vrf_dev,
1396012d69fbSEyal Birger ETH_P_IPV6,
1397012d69fbSEyal Birger orig_dev);
139804893908SAndrea Mayer if (likely(!err)) {
139974b20582SDavid Ahern skb_push(skb, skb->mac_len);
140074b20582SDavid Ahern dev_queue_xmit_nit(skb, vrf_dev);
140174b20582SDavid Ahern skb_pull(skb, skb->mac_len);
1402a9ec54d1SDavid Ahern }
140304893908SAndrea Mayer }
140474b20582SDavid Ahern
140574b20582SDavid Ahern IP6CB(skb)->flags |= IP6SKB_L3SLAVE;
140674b20582SDavid Ahern }
140774b20582SDavid Ahern
14089ff74384SDavid Ahern if (need_strict)
14099ff74384SDavid Ahern vrf_ip6_input_dst(skb, vrf_dev, orig_iif);
14109ff74384SDavid Ahern
141173e20b76SDavid Ahern skb = vrf_rcv_nfhook(NFPROTO_IPV6, NF_INET_PRE_ROUTING, skb, vrf_dev);
1412b4869aa2SDavid Ahern out:
141374b20582SDavid Ahern return skb;
141474b20582SDavid Ahern }
141574b20582SDavid Ahern
141674b20582SDavid Ahern #else
vrf_ip6_rcv(struct net_device * vrf_dev,struct sk_buff * skb)141774b20582SDavid Ahern static struct sk_buff *vrf_ip6_rcv(struct net_device *vrf_dev,
141874b20582SDavid Ahern struct sk_buff *skb)
141974b20582SDavid Ahern {
142074b20582SDavid Ahern return skb;
142174b20582SDavid Ahern }
142274b20582SDavid Ahern #endif
142374b20582SDavid Ahern
vrf_ip_rcv(struct net_device * vrf_dev,struct sk_buff * skb)142474b20582SDavid Ahern static struct sk_buff *vrf_ip_rcv(struct net_device *vrf_dev,
142574b20582SDavid Ahern struct sk_buff *skb)
142674b20582SDavid Ahern {
1427012d69fbSEyal Birger struct net_device *orig_dev = skb->dev;
1428012d69fbSEyal Birger
142974b20582SDavid Ahern skb->dev = vrf_dev;
143074b20582SDavid Ahern skb->skb_iif = vrf_dev->ifindex;
1431a04a480dSDavid Ahern IPCB(skb)->flags |= IPSKB_L3SLAVE;
143274b20582SDavid Ahern
1433e58e4159SDavid Ahern if (ipv4_is_multicast(ip_hdr(skb)->daddr))
1434e58e4159SDavid Ahern goto out;
1435e58e4159SDavid Ahern
1436afe80a49SDavid Ahern /* loopback traffic; do not push through packet taps again.
1437afe80a49SDavid Ahern * Reset pkt_type for upper layers to process skb
1438afe80a49SDavid Ahern */
1439afe80a49SDavid Ahern if (skb->pkt_type == PACKET_LOOPBACK) {
1440afe80a49SDavid Ahern skb->pkt_type = PACKET_HOST;
1441afe80a49SDavid Ahern goto out;
1442afe80a49SDavid Ahern }
1443afe80a49SDavid Ahern
1444926d93a3SDavid Ahern vrf_rx_stats(vrf_dev, skb->len);
1445926d93a3SDavid Ahern
1446dcdd43c4SDavid Ahern if (!list_empty(&vrf_dev->ptype_all)) {
144704893908SAndrea Mayer int err;
144804893908SAndrea Mayer
1449012d69fbSEyal Birger err = vrf_add_mac_header_if_unset(skb, vrf_dev, ETH_P_IP,
1450012d69fbSEyal Birger orig_dev);
145104893908SAndrea Mayer if (likely(!err)) {
145274b20582SDavid Ahern skb_push(skb, skb->mac_len);
145374b20582SDavid Ahern dev_queue_xmit_nit(skb, vrf_dev);
145474b20582SDavid Ahern skb_pull(skb, skb->mac_len);
1455dcdd43c4SDavid Ahern }
145604893908SAndrea Mayer }
145774b20582SDavid Ahern
145873e20b76SDavid Ahern skb = vrf_rcv_nfhook(NFPROTO_IPV4, NF_INET_PRE_ROUTING, skb, vrf_dev);
1459afe80a49SDavid Ahern out:
146074b20582SDavid Ahern return skb;
146174b20582SDavid Ahern }
146274b20582SDavid Ahern
146374b20582SDavid Ahern /* called with rcu lock held */
vrf_l3_rcv(struct net_device * vrf_dev,struct sk_buff * skb,u16 proto)146474b20582SDavid Ahern static struct sk_buff *vrf_l3_rcv(struct net_device *vrf_dev,
146574b20582SDavid Ahern struct sk_buff *skb,
146674b20582SDavid Ahern u16 proto)
146774b20582SDavid Ahern {
146874b20582SDavid Ahern switch (proto) {
146974b20582SDavid Ahern case AF_INET:
147074b20582SDavid Ahern return vrf_ip_rcv(vrf_dev, skb);
147174b20582SDavid Ahern case AF_INET6:
147274b20582SDavid Ahern return vrf_ip6_rcv(vrf_dev, skb);
147374b20582SDavid Ahern }
147474b20582SDavid Ahern
147574b20582SDavid Ahern return skb;
147674b20582SDavid Ahern }
147774b20582SDavid Ahern
147874b20582SDavid Ahern #if IS_ENABLED(CONFIG_IPV6)
14794c1feac5SDavid Ahern /* send to link-local or multicast address via interface enslaved to
14804c1feac5SDavid Ahern * VRF device. Force lookup to VRF table without changing flow struct
14817d9e5f42SWei Wang * Note: Caller to this function must hold rcu_read_lock() and no refcnt
14827d9e5f42SWei Wang * is taken on the dst by this function.
14834c1feac5SDavid Ahern */
vrf_link_scope_lookup(const struct net_device * dev,struct flowi6 * fl6)14844c1feac5SDavid Ahern static struct dst_entry *vrf_link_scope_lookup(const struct net_device *dev,
1485cd2a9e62SDavid Ahern struct flowi6 *fl6)
148635402e31SDavid Ahern {
14879ff74384SDavid Ahern struct net *net = dev_net(dev);
14887d9e5f42SWei Wang int flags = RT6_LOOKUP_F_IFACE | RT6_LOOKUP_F_DST_NOREF;
14899ff74384SDavid Ahern struct dst_entry *dst = NULL;
1490b0e95ccdSDavid Ahern struct rt6_info *rt;
149135402e31SDavid Ahern
14929ff74384SDavid Ahern /* VRF device does not have a link-local address and
14939ff74384SDavid Ahern * sending packets to link-local or mcast addresses over
14949ff74384SDavid Ahern * a VRF device does not make sense
14959ff74384SDavid Ahern */
14969ff74384SDavid Ahern if (fl6->flowi6_oif == dev->ifindex) {
14974c1feac5SDavid Ahern dst = &net->ipv6.ip6_null_entry->dst;
14989ff74384SDavid Ahern return dst;
14999ff74384SDavid Ahern }
15009ff74384SDavid Ahern
15019ff74384SDavid Ahern if (!ipv6_addr_any(&fl6->saddr))
15029ff74384SDavid Ahern flags |= RT6_LOOKUP_F_HAS_SADDR;
15039ff74384SDavid Ahern
1504b75cc8f9SDavid Ahern rt = vrf_ip6_route_lookup(net, dev, fl6, fl6->flowi6_oif, NULL, flags);
15059ff74384SDavid Ahern if (rt)
15069ff74384SDavid Ahern dst = &rt->dst;
15079ff74384SDavid Ahern
1508b0e95ccdSDavid Ahern return dst;
150935402e31SDavid Ahern }
151035402e31SDavid Ahern #endif
151135402e31SDavid Ahern
1512ee15ee5dSDavid Ahern static const struct l3mdev_ops vrf_l3mdev_ops = {
1513ee15ee5dSDavid Ahern .l3mdev_fib_table = vrf_fib_table,
151474b20582SDavid Ahern .l3mdev_l3_rcv = vrf_l3_rcv,
1515ebfc102cSDavid Ahern .l3mdev_l3_out = vrf_l3_out,
151635402e31SDavid Ahern #if IS_ENABLED(CONFIG_IPV6)
15174c1feac5SDavid Ahern .l3mdev_link_scope_lookup = vrf_link_scope_lookup,
151835402e31SDavid Ahern #endif
1519ee15ee5dSDavid Ahern };
1520ee15ee5dSDavid Ahern
vrf_get_drvinfo(struct net_device * dev,struct ethtool_drvinfo * info)1521193125dbSDavid Ahern static void vrf_get_drvinfo(struct net_device *dev,
1522193125dbSDavid Ahern struct ethtool_drvinfo *info)
1523193125dbSDavid Ahern {
1524fb3ceec1SWolfram Sang strscpy(info->driver, DRV_NAME, sizeof(info->driver));
1525fb3ceec1SWolfram Sang strscpy(info->version, DRV_VERSION, sizeof(info->version));
1526193125dbSDavid Ahern }
1527193125dbSDavid Ahern
1528193125dbSDavid Ahern static const struct ethtool_ops vrf_ethtool_ops = {
1529193125dbSDavid Ahern .get_drvinfo = vrf_get_drvinfo,
1530193125dbSDavid Ahern };
1531193125dbSDavid Ahern
vrf_fib_rule_nl_size(void)15321aa6c4f6SDavid Ahern static inline size_t vrf_fib_rule_nl_size(void)
15331aa6c4f6SDavid Ahern {
15341aa6c4f6SDavid Ahern size_t sz;
15351aa6c4f6SDavid Ahern
15361aa6c4f6SDavid Ahern sz = NLMSG_ALIGN(sizeof(struct fib_rule_hdr));
15371aa6c4f6SDavid Ahern sz += nla_total_size(sizeof(u8)); /* FRA_L3MDEV */
15381aa6c4f6SDavid Ahern sz += nla_total_size(sizeof(u32)); /* FRA_PRIORITY */
15391b71af60SDonald Sharp sz += nla_total_size(sizeof(u8)); /* FRA_PROTOCOL */
15401aa6c4f6SDavid Ahern
15411aa6c4f6SDavid Ahern return sz;
15421aa6c4f6SDavid Ahern }
15431aa6c4f6SDavid Ahern
vrf_fib_rule(const struct net_device * dev,__u8 family,bool add_it)15441aa6c4f6SDavid Ahern static int vrf_fib_rule(const struct net_device *dev, __u8 family, bool add_it)
15451aa6c4f6SDavid Ahern {
15461aa6c4f6SDavid Ahern struct fib_rule_hdr *frh;
15471aa6c4f6SDavid Ahern struct nlmsghdr *nlh;
15481aa6c4f6SDavid Ahern struct sk_buff *skb;
15491aa6c4f6SDavid Ahern int err;
15501aa6c4f6SDavid Ahern
1551dac91170SDavid Ahern if ((family == AF_INET6 || family == RTNL_FAMILY_IP6MR) &&
1552dac91170SDavid Ahern !ipv6_mod_enabled())
1553e4348637SDavid Ahern return 0;
1554e4348637SDavid Ahern
15551aa6c4f6SDavid Ahern skb = nlmsg_new(vrf_fib_rule_nl_size(), GFP_KERNEL);
15561aa6c4f6SDavid Ahern if (!skb)
15571aa6c4f6SDavid Ahern return -ENOMEM;
15581aa6c4f6SDavid Ahern
15591aa6c4f6SDavid Ahern nlh = nlmsg_put(skb, 0, 0, 0, sizeof(*frh), 0);
15601aa6c4f6SDavid Ahern if (!nlh)
15611aa6c4f6SDavid Ahern goto nla_put_failure;
15621aa6c4f6SDavid Ahern
15631aa6c4f6SDavid Ahern /* rule only needs to appear once */
1564426c87caSDavid Ahern nlh->nlmsg_flags |= NLM_F_EXCL;
15651aa6c4f6SDavid Ahern
15661aa6c4f6SDavid Ahern frh = nlmsg_data(nlh);
15671aa6c4f6SDavid Ahern memset(frh, 0, sizeof(*frh));
15681aa6c4f6SDavid Ahern frh->family = family;
15691aa6c4f6SDavid Ahern frh->action = FR_ACT_TO_TBL;
15701b71af60SDonald Sharp
15711b71af60SDonald Sharp if (nla_put_u8(skb, FRA_PROTOCOL, RTPROT_KERNEL))
15721b71af60SDonald Sharp goto nla_put_failure;
15731aa6c4f6SDavid Ahern
157418129a24SJeff Barnhill if (nla_put_u8(skb, FRA_L3MDEV, 1))
15751aa6c4f6SDavid Ahern goto nla_put_failure;
15761aa6c4f6SDavid Ahern
15771aa6c4f6SDavid Ahern if (nla_put_u32(skb, FRA_PRIORITY, FIB_RULE_PREF))
15781aa6c4f6SDavid Ahern goto nla_put_failure;
15791aa6c4f6SDavid Ahern
15801aa6c4f6SDavid Ahern nlmsg_end(skb, nlh);
15811aa6c4f6SDavid Ahern
15821aa6c4f6SDavid Ahern /* fib_nl_{new,del}rule handling looks for net from skb->sk */
15831aa6c4f6SDavid Ahern skb->sk = dev_net(dev)->rtnl;
15841aa6c4f6SDavid Ahern if (add_it) {
1585c21ef3e3SDavid Ahern err = fib_nl_newrule(skb, nlh, NULL);
15861aa6c4f6SDavid Ahern if (err == -EEXIST)
15871aa6c4f6SDavid Ahern err = 0;
15881aa6c4f6SDavid Ahern } else {
1589c21ef3e3SDavid Ahern err = fib_nl_delrule(skb, nlh, NULL);
15901aa6c4f6SDavid Ahern if (err == -ENOENT)
15911aa6c4f6SDavid Ahern err = 0;
15921aa6c4f6SDavid Ahern }
15931aa6c4f6SDavid Ahern nlmsg_free(skb);
15941aa6c4f6SDavid Ahern
15951aa6c4f6SDavid Ahern return err;
15961aa6c4f6SDavid Ahern
15971aa6c4f6SDavid Ahern nla_put_failure:
15981aa6c4f6SDavid Ahern nlmsg_free(skb);
15991aa6c4f6SDavid Ahern
16001aa6c4f6SDavid Ahern return -EMSGSIZE;
16011aa6c4f6SDavid Ahern }
16021aa6c4f6SDavid Ahern
vrf_add_fib_rules(const struct net_device * dev)16031aa6c4f6SDavid Ahern static int vrf_add_fib_rules(const struct net_device *dev)
16041aa6c4f6SDavid Ahern {
16051aa6c4f6SDavid Ahern int err;
16061aa6c4f6SDavid Ahern
16071aa6c4f6SDavid Ahern err = vrf_fib_rule(dev, AF_INET, true);
16081aa6c4f6SDavid Ahern if (err < 0)
16091aa6c4f6SDavid Ahern goto out_err;
16101aa6c4f6SDavid Ahern
16111aa6c4f6SDavid Ahern err = vrf_fib_rule(dev, AF_INET6, true);
16121aa6c4f6SDavid Ahern if (err < 0)
16131aa6c4f6SDavid Ahern goto ipv6_err;
16141aa6c4f6SDavid Ahern
1615e58e4159SDavid Ahern #if IS_ENABLED(CONFIG_IP_MROUTE_MULTIPLE_TABLES)
1616e58e4159SDavid Ahern err = vrf_fib_rule(dev, RTNL_FAMILY_IPMR, true);
1617e58e4159SDavid Ahern if (err < 0)
1618e58e4159SDavid Ahern goto ipmr_err;
1619e58e4159SDavid Ahern #endif
1620e58e4159SDavid Ahern
1621e4a38c0cSPatrick Ruddy #if IS_ENABLED(CONFIG_IPV6_MROUTE_MULTIPLE_TABLES)
1622e4a38c0cSPatrick Ruddy err = vrf_fib_rule(dev, RTNL_FAMILY_IP6MR, true);
1623e4a38c0cSPatrick Ruddy if (err < 0)
1624e4a38c0cSPatrick Ruddy goto ip6mr_err;
1625e4a38c0cSPatrick Ruddy #endif
1626e4a38c0cSPatrick Ruddy
16271aa6c4f6SDavid Ahern return 0;
16281aa6c4f6SDavid Ahern
1629e4a38c0cSPatrick Ruddy #if IS_ENABLED(CONFIG_IPV6_MROUTE_MULTIPLE_TABLES)
1630e4a38c0cSPatrick Ruddy ip6mr_err:
1631e4a38c0cSPatrick Ruddy vrf_fib_rule(dev, RTNL_FAMILY_IPMR, false);
1632e4a38c0cSPatrick Ruddy #endif
1633e4a38c0cSPatrick Ruddy
1634e58e4159SDavid Ahern #if IS_ENABLED(CONFIG_IP_MROUTE_MULTIPLE_TABLES)
1635e58e4159SDavid Ahern ipmr_err:
1636e58e4159SDavid Ahern vrf_fib_rule(dev, AF_INET6, false);
1637e58e4159SDavid Ahern #endif
1638e58e4159SDavid Ahern
16391aa6c4f6SDavid Ahern ipv6_err:
16401aa6c4f6SDavid Ahern vrf_fib_rule(dev, AF_INET, false);
16411aa6c4f6SDavid Ahern
16421aa6c4f6SDavid Ahern out_err:
16431aa6c4f6SDavid Ahern netdev_err(dev, "Failed to add FIB rules.\n");
16441aa6c4f6SDavid Ahern return err;
16451aa6c4f6SDavid Ahern }
16461aa6c4f6SDavid Ahern
vrf_setup(struct net_device * dev)1647193125dbSDavid Ahern static void vrf_setup(struct net_device *dev)
1648193125dbSDavid Ahern {
1649193125dbSDavid Ahern ether_setup(dev);
1650193125dbSDavid Ahern
1651193125dbSDavid Ahern /* Initialize the device structure. */
1652193125dbSDavid Ahern dev->netdev_ops = &vrf_netdev_ops;
1653ee15ee5dSDavid Ahern dev->l3mdev_ops = &vrf_l3mdev_ops;
1654193125dbSDavid Ahern dev->ethtool_ops = &vrf_ethtool_ops;
1655cf124db5SDavid S. Miller dev->needs_free_netdev = true;
1656193125dbSDavid Ahern
1657193125dbSDavid Ahern /* Fill in device structure with ethernet-generic values. */
1658193125dbSDavid Ahern eth_hw_addr_random(dev);
1659193125dbSDavid Ahern
1660193125dbSDavid Ahern /* don't acquire vrf device's netif_tx_lock when transmitting */
1661193125dbSDavid Ahern dev->features |= NETIF_F_LLTX;
1662193125dbSDavid Ahern
1663193125dbSDavid Ahern /* don't allow vrf devices to change network namespaces. */
1664193125dbSDavid Ahern dev->features |= NETIF_F_NETNS_LOCAL;
16657889681fSDavid Ahern
16667889681fSDavid Ahern /* does not make sense for a VLAN to be added to a vrf device */
16677889681fSDavid Ahern dev->features |= NETIF_F_VLAN_CHALLENGED;
16687889681fSDavid Ahern
16697889681fSDavid Ahern /* enable offload features */
16707889681fSDavid Ahern dev->features |= NETIF_F_GSO_SOFTWARE;
1671cb160394SDavide Caratti dev->features |= NETIF_F_RXCSUM | NETIF_F_HW_CSUM | NETIF_F_SCTP_CRC;
16727889681fSDavid Ahern dev->features |= NETIF_F_SG | NETIF_F_FRAGLIST | NETIF_F_HIGHDMA;
16737889681fSDavid Ahern
16747889681fSDavid Ahern dev->hw_features = dev->features;
16757889681fSDavid Ahern dev->hw_enc_features = dev->features;
16767889681fSDavid Ahern
16777889681fSDavid Ahern /* default to no qdisc; user can add if desired */
16787889681fSDavid Ahern dev->priv_flags |= IFF_NO_QUEUE;
16791017e098SSabrina Dubroca dev->priv_flags |= IFF_NO_RX_HANDLER;
16806819e3f6SMiaohe Lin dev->priv_flags |= IFF_LIVE_ADDR_CHANGE;
1681ad49bc63SHangbin Liu
16825055376aSMiaohe Lin /* VRF devices do not care about MTU, but if the MTU is set
16835055376aSMiaohe Lin * too low then the ipv4 and ipv6 protocols are disabled
16845055376aSMiaohe Lin * which breaks networking.
16855055376aSMiaohe Lin */
16865055376aSMiaohe Lin dev->min_mtu = IPV6_MIN_MTU;
16879bb392f6SNicolas Dichtel dev->max_mtu = IP6_MAX_MTU;
16889bb392f6SNicolas Dichtel dev->mtu = dev->max_mtu;
16896ae7b3fcSDaniel Borkmann
16906ae7b3fcSDaniel Borkmann dev->pcpu_stat_type = NETDEV_PCPU_STAT_DSTATS;
1691193125dbSDavid Ahern }
1692193125dbSDavid Ahern
vrf_validate(struct nlattr * tb[],struct nlattr * data[],struct netlink_ext_ack * extack)1693a8b8a889SMatthias Schiffer static int vrf_validate(struct nlattr *tb[], struct nlattr *data[],
1694a8b8a889SMatthias Schiffer struct netlink_ext_ack *extack)
1695193125dbSDavid Ahern {
1696193125dbSDavid Ahern if (tb[IFLA_ADDRESS]) {
169753b94835SDavid Ahern if (nla_len(tb[IFLA_ADDRESS]) != ETH_ALEN) {
169853b94835SDavid Ahern NL_SET_ERR_MSG(extack, "Invalid hardware address");
1699193125dbSDavid Ahern return -EINVAL;
170053b94835SDavid Ahern }
170153b94835SDavid Ahern if (!is_valid_ether_addr(nla_data(tb[IFLA_ADDRESS]))) {
170253b94835SDavid Ahern NL_SET_ERR_MSG(extack, "Invalid hardware address");
1703193125dbSDavid Ahern return -EADDRNOTAVAIL;
1704193125dbSDavid Ahern }
170553b94835SDavid Ahern }
1706193125dbSDavid Ahern return 0;
1707193125dbSDavid Ahern }
1708193125dbSDavid Ahern
vrf_dellink(struct net_device * dev,struct list_head * head)1709193125dbSDavid Ahern static void vrf_dellink(struct net_device *dev, struct list_head *head)
1710193125dbSDavid Ahern {
1711f630c38eSNikolay Aleksandrov struct net_device *port_dev;
1712f630c38eSNikolay Aleksandrov struct list_head *iter;
1713f630c38eSNikolay Aleksandrov
1714f630c38eSNikolay Aleksandrov netdev_for_each_lower_dev(dev, port_dev, iter)
1715f630c38eSNikolay Aleksandrov vrf_del_slave(dev, port_dev);
1716f630c38eSNikolay Aleksandrov
1717c8baec38SAndrea Mayer vrf_map_unregister_dev(dev);
1718c8baec38SAndrea Mayer
1719193125dbSDavid Ahern unregister_netdevice_queue(dev, head);
1720193125dbSDavid Ahern }
1721193125dbSDavid Ahern
vrf_newlink(struct net * src_net,struct net_device * dev,struct nlattr * tb[],struct nlattr * data[],struct netlink_ext_ack * extack)1722193125dbSDavid Ahern static int vrf_newlink(struct net *src_net, struct net_device *dev,
17237a3f4a18SMatthias Schiffer struct nlattr *tb[], struct nlattr *data[],
17247a3f4a18SMatthias Schiffer struct netlink_ext_ack *extack)
1725193125dbSDavid Ahern {
1726193125dbSDavid Ahern struct net_vrf *vrf = netdev_priv(dev);
1727c8baec38SAndrea Mayer struct netns_vrf *nn_vrf;
1728097d3c95SDavid Ahern bool *add_fib_rules;
1729097d3c95SDavid Ahern struct net *net;
17301aa6c4f6SDavid Ahern int err;
1731193125dbSDavid Ahern
173253b94835SDavid Ahern if (!data || !data[IFLA_VRF_TABLE]) {
173353b94835SDavid Ahern NL_SET_ERR_MSG(extack, "VRF table id is missing");
1734193125dbSDavid Ahern return -EINVAL;
173553b94835SDavid Ahern }
1736193125dbSDavid Ahern
1737193125dbSDavid Ahern vrf->tb_id = nla_get_u32(data[IFLA_VRF_TABLE]);
173853b94835SDavid Ahern if (vrf->tb_id == RT_TABLE_UNSPEC) {
173953b94835SDavid Ahern NL_SET_ERR_MSG_ATTR(extack, data[IFLA_VRF_TABLE],
174053b94835SDavid Ahern "Invalid VRF table id");
174124c63bbcSDavid Ahern return -EINVAL;
174253b94835SDavid Ahern }
1743193125dbSDavid Ahern
1744007979eaSDavid Ahern dev->priv_flags |= IFF_L3MDEV_MASTER;
1745193125dbSDavid Ahern
17461aa6c4f6SDavid Ahern err = register_netdevice(dev);
17471aa6c4f6SDavid Ahern if (err)
17481aa6c4f6SDavid Ahern goto out;
17491aa6c4f6SDavid Ahern
1750c8baec38SAndrea Mayer /* mapping between table_id and vrf;
1751c8baec38SAndrea Mayer * note: such binding could not be done in the dev init function
1752c8baec38SAndrea Mayer * because dev->ifindex id is not available yet.
1753c8baec38SAndrea Mayer */
1754c8baec38SAndrea Mayer vrf->ifindex = dev->ifindex;
1755c8baec38SAndrea Mayer
1756c8baec38SAndrea Mayer err = vrf_map_register_dev(dev, extack);
1757c8baec38SAndrea Mayer if (err) {
1758c8baec38SAndrea Mayer unregister_netdevice(dev);
1759c8baec38SAndrea Mayer goto out;
1760c8baec38SAndrea Mayer }
1761c8baec38SAndrea Mayer
1762097d3c95SDavid Ahern net = dev_net(dev);
1763c8baec38SAndrea Mayer nn_vrf = net_generic(net, vrf_net_id);
1764c8baec38SAndrea Mayer
1765c8baec38SAndrea Mayer add_fib_rules = &nn_vrf->add_fib_rules;
1766097d3c95SDavid Ahern if (*add_fib_rules) {
17671aa6c4f6SDavid Ahern err = vrf_add_fib_rules(dev);
17681aa6c4f6SDavid Ahern if (err) {
1769c8baec38SAndrea Mayer vrf_map_unregister_dev(dev);
17701aa6c4f6SDavid Ahern unregister_netdevice(dev);
17711aa6c4f6SDavid Ahern goto out;
17721aa6c4f6SDavid Ahern }
1773097d3c95SDavid Ahern *add_fib_rules = false;
17741aa6c4f6SDavid Ahern }
17751aa6c4f6SDavid Ahern
17761aa6c4f6SDavid Ahern out:
17771aa6c4f6SDavid Ahern return err;
1778193125dbSDavid Ahern }
1779193125dbSDavid Ahern
vrf_nl_getsize(const struct net_device * dev)1780193125dbSDavid Ahern static size_t vrf_nl_getsize(const struct net_device *dev)
1781193125dbSDavid Ahern {
1782193125dbSDavid Ahern return nla_total_size(sizeof(u32)); /* IFLA_VRF_TABLE */
1783193125dbSDavid Ahern }
1784193125dbSDavid Ahern
vrf_fillinfo(struct sk_buff * skb,const struct net_device * dev)1785193125dbSDavid Ahern static int vrf_fillinfo(struct sk_buff *skb,
1786193125dbSDavid Ahern const struct net_device *dev)
1787193125dbSDavid Ahern {
1788193125dbSDavid Ahern struct net_vrf *vrf = netdev_priv(dev);
1789193125dbSDavid Ahern
1790193125dbSDavid Ahern return nla_put_u32(skb, IFLA_VRF_TABLE, vrf->tb_id);
1791193125dbSDavid Ahern }
1792193125dbSDavid Ahern
vrf_get_slave_size(const struct net_device * bond_dev,const struct net_device * slave_dev)179367eb0331SDavid Ahern static size_t vrf_get_slave_size(const struct net_device *bond_dev,
179467eb0331SDavid Ahern const struct net_device *slave_dev)
179567eb0331SDavid Ahern {
179667eb0331SDavid Ahern return nla_total_size(sizeof(u32)); /* IFLA_VRF_PORT_TABLE */
179767eb0331SDavid Ahern }
179867eb0331SDavid Ahern
vrf_fill_slave_info(struct sk_buff * skb,const struct net_device * vrf_dev,const struct net_device * slave_dev)179967eb0331SDavid Ahern static int vrf_fill_slave_info(struct sk_buff *skb,
180067eb0331SDavid Ahern const struct net_device *vrf_dev,
180167eb0331SDavid Ahern const struct net_device *slave_dev)
180267eb0331SDavid Ahern {
180367eb0331SDavid Ahern struct net_vrf *vrf = netdev_priv(vrf_dev);
180467eb0331SDavid Ahern
180567eb0331SDavid Ahern if (nla_put_u32(skb, IFLA_VRF_PORT_TABLE, vrf->tb_id))
180667eb0331SDavid Ahern return -EMSGSIZE;
180767eb0331SDavid Ahern
180867eb0331SDavid Ahern return 0;
180967eb0331SDavid Ahern }
181067eb0331SDavid Ahern
1811193125dbSDavid Ahern static const struct nla_policy vrf_nl_policy[IFLA_VRF_MAX + 1] = {
1812193125dbSDavid Ahern [IFLA_VRF_TABLE] = { .type = NLA_U32 },
1813193125dbSDavid Ahern };
1814193125dbSDavid Ahern
1815193125dbSDavid Ahern static struct rtnl_link_ops vrf_link_ops __read_mostly = {
1816193125dbSDavid Ahern .kind = DRV_NAME,
1817193125dbSDavid Ahern .priv_size = sizeof(struct net_vrf),
1818193125dbSDavid Ahern
1819193125dbSDavid Ahern .get_size = vrf_nl_getsize,
1820193125dbSDavid Ahern .policy = vrf_nl_policy,
1821193125dbSDavid Ahern .validate = vrf_validate,
1822193125dbSDavid Ahern .fill_info = vrf_fillinfo,
1823193125dbSDavid Ahern
182467eb0331SDavid Ahern .get_slave_size = vrf_get_slave_size,
182567eb0331SDavid Ahern .fill_slave_info = vrf_fill_slave_info,
182667eb0331SDavid Ahern
1827193125dbSDavid Ahern .newlink = vrf_newlink,
1828193125dbSDavid Ahern .dellink = vrf_dellink,
1829193125dbSDavid Ahern .setup = vrf_setup,
1830193125dbSDavid Ahern .maxtype = IFLA_VRF_MAX,
1831193125dbSDavid Ahern };
1832193125dbSDavid Ahern
vrf_device_event(struct notifier_block * unused,unsigned long event,void * ptr)1833193125dbSDavid Ahern static int vrf_device_event(struct notifier_block *unused,
1834193125dbSDavid Ahern unsigned long event, void *ptr)
1835193125dbSDavid Ahern {
1836193125dbSDavid Ahern struct net_device *dev = netdev_notifier_info_to_dev(ptr);
1837193125dbSDavid Ahern
1838193125dbSDavid Ahern /* only care about unregister events to drop slave references */
1839193125dbSDavid Ahern if (event == NETDEV_UNREGISTER) {
1840193125dbSDavid Ahern struct net_device *vrf_dev;
1841193125dbSDavid Ahern
1842fee6d4c7SDavid Ahern if (!netif_is_l3_slave(dev))
1843193125dbSDavid Ahern goto out;
1844193125dbSDavid Ahern
184558aa9087SNikolay Aleksandrov vrf_dev = netdev_master_upper_dev_get(dev);
1846193125dbSDavid Ahern vrf_del_slave(vrf_dev, dev);
1847193125dbSDavid Ahern }
1848193125dbSDavid Ahern out:
1849193125dbSDavid Ahern return NOTIFY_DONE;
1850193125dbSDavid Ahern }
1851193125dbSDavid Ahern
1852193125dbSDavid Ahern static struct notifier_block vrf_notifier_block __read_mostly = {
1853193125dbSDavid Ahern .notifier_call = vrf_device_event,
1854193125dbSDavid Ahern };
1855193125dbSDavid Ahern
vrf_map_init(struct vrf_map * vmap)1856c8baec38SAndrea Mayer static int vrf_map_init(struct vrf_map *vmap)
1857c8baec38SAndrea Mayer {
1858c8baec38SAndrea Mayer spin_lock_init(&vmap->vmap_lock);
1859c8baec38SAndrea Mayer hash_init(vmap->ht);
1860c8baec38SAndrea Mayer
1861c8baec38SAndrea Mayer vmap->strict_mode = false;
1862c8baec38SAndrea Mayer
1863c8baec38SAndrea Mayer return 0;
1864c8baec38SAndrea Mayer }
1865c8baec38SAndrea Mayer
18661b6687e3SDavid Ahern #ifdef CONFIG_SYSCTL
vrf_strict_mode(struct vrf_map * vmap)18671b6687e3SDavid Ahern static bool vrf_strict_mode(struct vrf_map *vmap)
18681b6687e3SDavid Ahern {
18691b6687e3SDavid Ahern bool strict_mode;
18701b6687e3SDavid Ahern
18711b6687e3SDavid Ahern vrf_map_lock(vmap);
18721b6687e3SDavid Ahern strict_mode = vmap->strict_mode;
18731b6687e3SDavid Ahern vrf_map_unlock(vmap);
18741b6687e3SDavid Ahern
18751b6687e3SDavid Ahern return strict_mode;
18761b6687e3SDavid Ahern }
18771b6687e3SDavid Ahern
vrf_strict_mode_change(struct vrf_map * vmap,bool new_mode)18781b6687e3SDavid Ahern static int vrf_strict_mode_change(struct vrf_map *vmap, bool new_mode)
18791b6687e3SDavid Ahern {
18801b6687e3SDavid Ahern bool *cur_mode;
18811b6687e3SDavid Ahern int res = 0;
18821b6687e3SDavid Ahern
18831b6687e3SDavid Ahern vrf_map_lock(vmap);
18841b6687e3SDavid Ahern
18851b6687e3SDavid Ahern cur_mode = &vmap->strict_mode;
18861b6687e3SDavid Ahern if (*cur_mode == new_mode)
18871b6687e3SDavid Ahern goto unlock;
18881b6687e3SDavid Ahern
18891b6687e3SDavid Ahern if (*cur_mode) {
18901b6687e3SDavid Ahern /* disable strict mode */
18911b6687e3SDavid Ahern *cur_mode = false;
18921b6687e3SDavid Ahern } else {
18931b6687e3SDavid Ahern if (vmap->shared_tables) {
18941b6687e3SDavid Ahern /* we cannot allow strict_mode because there are some
18951b6687e3SDavid Ahern * vrfs that share one or more tables.
18961b6687e3SDavid Ahern */
18971b6687e3SDavid Ahern res = -EBUSY;
18981b6687e3SDavid Ahern goto unlock;
18991b6687e3SDavid Ahern }
19001b6687e3SDavid Ahern
19011b6687e3SDavid Ahern /* no tables are shared among vrfs, so we can go back
19021b6687e3SDavid Ahern * to 1:1 association between a vrf with its table.
19031b6687e3SDavid Ahern */
19041b6687e3SDavid Ahern *cur_mode = true;
19051b6687e3SDavid Ahern }
19061b6687e3SDavid Ahern
19071b6687e3SDavid Ahern unlock:
19081b6687e3SDavid Ahern vrf_map_unlock(vmap);
19091b6687e3SDavid Ahern
19101b6687e3SDavid Ahern return res;
19111b6687e3SDavid Ahern }
19121b6687e3SDavid Ahern
vrf_shared_table_handler(struct ctl_table * table,int write,void * buffer,size_t * lenp,loff_t * ppos)191333306f1aSAndrea Mayer static int vrf_shared_table_handler(struct ctl_table *table, int write,
191433306f1aSAndrea Mayer void *buffer, size_t *lenp, loff_t *ppos)
191533306f1aSAndrea Mayer {
191633306f1aSAndrea Mayer struct net *net = (struct net *)table->extra1;
191733306f1aSAndrea Mayer struct vrf_map *vmap = netns_vrf_map(net);
191833306f1aSAndrea Mayer int proc_strict_mode = 0;
191933306f1aSAndrea Mayer struct ctl_table tmp = {
192033306f1aSAndrea Mayer .procname = table->procname,
192133306f1aSAndrea Mayer .data = &proc_strict_mode,
192233306f1aSAndrea Mayer .maxlen = sizeof(int),
192333306f1aSAndrea Mayer .mode = table->mode,
192433306f1aSAndrea Mayer .extra1 = SYSCTL_ZERO,
192533306f1aSAndrea Mayer .extra2 = SYSCTL_ONE,
192633306f1aSAndrea Mayer };
192733306f1aSAndrea Mayer int ret;
192833306f1aSAndrea Mayer
192933306f1aSAndrea Mayer if (!write)
193033306f1aSAndrea Mayer proc_strict_mode = vrf_strict_mode(vmap);
193133306f1aSAndrea Mayer
193233306f1aSAndrea Mayer ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos);
193333306f1aSAndrea Mayer
193433306f1aSAndrea Mayer if (write && ret == 0)
193533306f1aSAndrea Mayer ret = vrf_strict_mode_change(vmap, (bool)proc_strict_mode);
193633306f1aSAndrea Mayer
193733306f1aSAndrea Mayer return ret;
193833306f1aSAndrea Mayer }
193933306f1aSAndrea Mayer
194033306f1aSAndrea Mayer static const struct ctl_table vrf_table[] = {
194133306f1aSAndrea Mayer {
194233306f1aSAndrea Mayer .procname = "strict_mode",
194333306f1aSAndrea Mayer .data = NULL,
194433306f1aSAndrea Mayer .maxlen = sizeof(int),
194533306f1aSAndrea Mayer .mode = 0644,
194633306f1aSAndrea Mayer .proc_handler = vrf_shared_table_handler,
194733306f1aSAndrea Mayer /* set by the vrf_netns_init */
194833306f1aSAndrea Mayer .extra1 = NULL,
194933306f1aSAndrea Mayer },
195033306f1aSAndrea Mayer { },
195133306f1aSAndrea Mayer };
195233306f1aSAndrea Mayer
vrf_netns_init_sysctl(struct net * net,struct netns_vrf * nn_vrf)19531b6687e3SDavid Ahern static int vrf_netns_init_sysctl(struct net *net, struct netns_vrf *nn_vrf)
1954097d3c95SDavid Ahern {
195533306f1aSAndrea Mayer struct ctl_table *table;
1956097d3c95SDavid Ahern
195733306f1aSAndrea Mayer table = kmemdup(vrf_table, sizeof(vrf_table), GFP_KERNEL);
195833306f1aSAndrea Mayer if (!table)
195933306f1aSAndrea Mayer return -ENOMEM;
196033306f1aSAndrea Mayer
196133306f1aSAndrea Mayer /* init the extra1 parameter with the reference to current netns */
196233306f1aSAndrea Mayer table[0].extra1 = net;
196333306f1aSAndrea Mayer
19643ca9aa74SJoel Granados nn_vrf->ctl_hdr = register_net_sysctl_sz(net, "net/vrf", table,
19653ca9aa74SJoel Granados ARRAY_SIZE(vrf_table));
196633306f1aSAndrea Mayer if (!nn_vrf->ctl_hdr) {
19671b6687e3SDavid Ahern kfree(table);
19681b6687e3SDavid Ahern return -ENOMEM;
196933306f1aSAndrea Mayer }
197033306f1aSAndrea Mayer
1971097d3c95SDavid Ahern return 0;
197233306f1aSAndrea Mayer }
197333306f1aSAndrea Mayer
vrf_netns_exit_sysctl(struct net * net)19741b6687e3SDavid Ahern static void vrf_netns_exit_sysctl(struct net *net)
197533306f1aSAndrea Mayer {
197633306f1aSAndrea Mayer struct netns_vrf *nn_vrf = net_generic(net, vrf_net_id);
197733306f1aSAndrea Mayer struct ctl_table *table;
197833306f1aSAndrea Mayer
197933306f1aSAndrea Mayer table = nn_vrf->ctl_hdr->ctl_table_arg;
198033306f1aSAndrea Mayer unregister_net_sysctl_table(nn_vrf->ctl_hdr);
198133306f1aSAndrea Mayer kfree(table);
1982097d3c95SDavid Ahern }
19831b6687e3SDavid Ahern #else
vrf_netns_init_sysctl(struct net * net,struct netns_vrf * nn_vrf)19841b6687e3SDavid Ahern static int vrf_netns_init_sysctl(struct net *net, struct netns_vrf *nn_vrf)
19851b6687e3SDavid Ahern {
19861b6687e3SDavid Ahern return 0;
19871b6687e3SDavid Ahern }
19881b6687e3SDavid Ahern
vrf_netns_exit_sysctl(struct net * net)19891b6687e3SDavid Ahern static void vrf_netns_exit_sysctl(struct net *net)
19901b6687e3SDavid Ahern {
19911b6687e3SDavid Ahern }
19921b6687e3SDavid Ahern #endif
19931b6687e3SDavid Ahern
19941b6687e3SDavid Ahern /* Initialize per network namespace state */
vrf_netns_init(struct net * net)19951b6687e3SDavid Ahern static int __net_init vrf_netns_init(struct net *net)
19961b6687e3SDavid Ahern {
19971b6687e3SDavid Ahern struct netns_vrf *nn_vrf = net_generic(net, vrf_net_id);
19981b6687e3SDavid Ahern
19991b6687e3SDavid Ahern nn_vrf->add_fib_rules = true;
20001b6687e3SDavid Ahern vrf_map_init(&nn_vrf->vmap);
20011b6687e3SDavid Ahern
20021b6687e3SDavid Ahern return vrf_netns_init_sysctl(net, nn_vrf);
20031b6687e3SDavid Ahern }
20041b6687e3SDavid Ahern
vrf_netns_exit(struct net * net)20051b6687e3SDavid Ahern static void __net_exit vrf_netns_exit(struct net *net)
20061b6687e3SDavid Ahern {
20071b6687e3SDavid Ahern vrf_netns_exit_sysctl(net);
20081b6687e3SDavid Ahern }
2009097d3c95SDavid Ahern
2010097d3c95SDavid Ahern static struct pernet_operations vrf_net_ops __net_initdata = {
2011097d3c95SDavid Ahern .init = vrf_netns_init,
201233306f1aSAndrea Mayer .exit = vrf_netns_exit,
2013097d3c95SDavid Ahern .id = &vrf_net_id,
2014c8baec38SAndrea Mayer .size = sizeof(struct netns_vrf),
2015097d3c95SDavid Ahern };
2016097d3c95SDavid Ahern
vrf_init_module(void)2017193125dbSDavid Ahern static int __init vrf_init_module(void)
2018193125dbSDavid Ahern {
2019193125dbSDavid Ahern int rc;
2020193125dbSDavid Ahern
2021193125dbSDavid Ahern register_netdevice_notifier(&vrf_notifier_block);
2022193125dbSDavid Ahern
2023097d3c95SDavid Ahern rc = register_pernet_subsys(&vrf_net_ops);
2024193125dbSDavid Ahern if (rc < 0)
2025193125dbSDavid Ahern goto error;
2026193125dbSDavid Ahern
2027a59a8ffdSAndrea Mayer rc = l3mdev_table_lookup_register(L3MDEV_TYPE_VRF,
2028a59a8ffdSAndrea Mayer vrf_ifindex_lookup_by_table_id);
2029a59a8ffdSAndrea Mayer if (rc < 0)
2030a59a8ffdSAndrea Mayer goto unreg_pernet;
2031a59a8ffdSAndrea Mayer
2032097d3c95SDavid Ahern rc = rtnl_link_register(&vrf_link_ops);
2033a59a8ffdSAndrea Mayer if (rc < 0)
2034a59a8ffdSAndrea Mayer goto table_lookup_unreg;
2035097d3c95SDavid Ahern
2036193125dbSDavid Ahern return 0;
2037193125dbSDavid Ahern
2038a59a8ffdSAndrea Mayer table_lookup_unreg:
2039a59a8ffdSAndrea Mayer l3mdev_table_lookup_unregister(L3MDEV_TYPE_VRF,
2040a59a8ffdSAndrea Mayer vrf_ifindex_lookup_by_table_id);
2041a59a8ffdSAndrea Mayer
2042a59a8ffdSAndrea Mayer unreg_pernet:
2043a59a8ffdSAndrea Mayer unregister_pernet_subsys(&vrf_net_ops);
2044a59a8ffdSAndrea Mayer
2045193125dbSDavid Ahern error:
2046193125dbSDavid Ahern unregister_netdevice_notifier(&vrf_notifier_block);
2047193125dbSDavid Ahern return rc;
2048193125dbSDavid Ahern }
2049193125dbSDavid Ahern
2050193125dbSDavid Ahern module_init(vrf_init_module);
2051193125dbSDavid Ahern MODULE_AUTHOR("Shrijeet Mukherjee, David Ahern");
2052193125dbSDavid Ahern MODULE_DESCRIPTION("Device driver to instantiate VRF domains");
2053193125dbSDavid Ahern MODULE_LICENSE("GPL");
2054193125dbSDavid Ahern MODULE_ALIAS_RTNL_LINK(DRV_NAME);
2055193125dbSDavid Ahern MODULE_VERSION(DRV_VERSION);
2056