1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3 * Vxlan vni filter for collect metadata mode
4 *
5 * Authors: Roopa Prabhu <roopa@nvidia.com>
6 *
7 */
8
9 #include <linux/kernel.h>
10 #include <linux/slab.h>
11 #include <linux/etherdevice.h>
12 #include <linux/rhashtable.h>
13 #include <net/rtnetlink.h>
14 #include <net/net_namespace.h>
15 #include <net/sock.h>
16 #include <net/vxlan.h>
17
18 #include "vxlan_private.h"
19
vxlan_vni_cmp(struct rhashtable_compare_arg * arg,const void * ptr)20 static inline int vxlan_vni_cmp(struct rhashtable_compare_arg *arg,
21 const void *ptr)
22 {
23 const struct vxlan_vni_node *vnode = ptr;
24 __be32 vni = *(__be32 *)arg->key;
25
26 return vnode->vni != vni;
27 }
28
29 const struct rhashtable_params vxlan_vni_rht_params = {
30 .head_offset = offsetof(struct vxlan_vni_node, vnode),
31 .key_offset = offsetof(struct vxlan_vni_node, vni),
32 .key_len = sizeof(__be32),
33 .nelem_hint = 3,
34 .max_size = VXLAN_N_VID,
35 .obj_cmpfn = vxlan_vni_cmp,
36 .automatic_shrinking = true,
37 };
38
vxlan_vs_add_del_vninode(struct vxlan_dev * vxlan,struct vxlan_vni_node * v,bool del)39 static void vxlan_vs_add_del_vninode(struct vxlan_dev *vxlan,
40 struct vxlan_vni_node *v,
41 bool del)
42 {
43 struct vxlan_net *vn = net_generic(vxlan->net, vxlan_net_id);
44 struct vxlan_dev_node *node;
45 struct vxlan_sock *vs;
46
47 spin_lock(&vn->sock_lock);
48 if (del) {
49 if (!hlist_unhashed(&v->hlist4.hlist))
50 hlist_del_init_rcu(&v->hlist4.hlist);
51 #if IS_ENABLED(CONFIG_IPV6)
52 if (!hlist_unhashed(&v->hlist6.hlist))
53 hlist_del_init_rcu(&v->hlist6.hlist);
54 #endif
55 goto out;
56 }
57
58 #if IS_ENABLED(CONFIG_IPV6)
59 vs = rtnl_dereference(vxlan->vn6_sock);
60 if (vs && v) {
61 node = &v->hlist6;
62 hlist_add_head_rcu(&node->hlist, vni_head(vs, v->vni));
63 }
64 #endif
65 vs = rtnl_dereference(vxlan->vn4_sock);
66 if (vs && v) {
67 node = &v->hlist4;
68 hlist_add_head_rcu(&node->hlist, vni_head(vs, v->vni));
69 }
70 out:
71 spin_unlock(&vn->sock_lock);
72 }
73
vxlan_vs_add_vnigrp(struct vxlan_dev * vxlan,struct vxlan_sock * vs,bool ipv6)74 void vxlan_vs_add_vnigrp(struct vxlan_dev *vxlan,
75 struct vxlan_sock *vs,
76 bool ipv6)
77 {
78 struct vxlan_net *vn = net_generic(vxlan->net, vxlan_net_id);
79 struct vxlan_vni_group *vg = rtnl_dereference(vxlan->vnigrp);
80 struct vxlan_vni_node *v, *tmp;
81 struct vxlan_dev_node *node;
82
83 if (!vg)
84 return;
85
86 spin_lock(&vn->sock_lock);
87 list_for_each_entry_safe(v, tmp, &vg->vni_list, vlist) {
88 #if IS_ENABLED(CONFIG_IPV6)
89 if (ipv6)
90 node = &v->hlist6;
91 else
92 #endif
93 node = &v->hlist4;
94 node->vxlan = vxlan;
95 hlist_add_head_rcu(&node->hlist, vni_head(vs, v->vni));
96 }
97 spin_unlock(&vn->sock_lock);
98 }
99
vxlan_vs_del_vnigrp(struct vxlan_dev * vxlan)100 void vxlan_vs_del_vnigrp(struct vxlan_dev *vxlan)
101 {
102 struct vxlan_vni_group *vg = rtnl_dereference(vxlan->vnigrp);
103 struct vxlan_net *vn = net_generic(vxlan->net, vxlan_net_id);
104 struct vxlan_vni_node *v, *tmp;
105
106 if (!vg)
107 return;
108
109 spin_lock(&vn->sock_lock);
110 list_for_each_entry_safe(v, tmp, &vg->vni_list, vlist) {
111 hlist_del_init_rcu(&v->hlist4.hlist);
112 #if IS_ENABLED(CONFIG_IPV6)
113 hlist_del_init_rcu(&v->hlist6.hlist);
114 #endif
115 }
116 spin_unlock(&vn->sock_lock);
117 }
118
vxlan_vnifilter_stats_get(const struct vxlan_vni_node * vninode,struct vxlan_vni_stats * dest)119 static void vxlan_vnifilter_stats_get(const struct vxlan_vni_node *vninode,
120 struct vxlan_vni_stats *dest)
121 {
122 int i;
123
124 memset(dest, 0, sizeof(*dest));
125 for_each_possible_cpu(i) {
126 struct vxlan_vni_stats_pcpu *pstats;
127 struct vxlan_vni_stats temp;
128 unsigned int start;
129
130 pstats = per_cpu_ptr(vninode->stats, i);
131 do {
132 start = u64_stats_fetch_begin(&pstats->syncp);
133 memcpy(&temp, &pstats->stats, sizeof(temp));
134 } while (u64_stats_fetch_retry(&pstats->syncp, start));
135
136 dest->rx_packets += temp.rx_packets;
137 dest->rx_bytes += temp.rx_bytes;
138 dest->rx_drops += temp.rx_drops;
139 dest->rx_errors += temp.rx_errors;
140 dest->tx_packets += temp.tx_packets;
141 dest->tx_bytes += temp.tx_bytes;
142 dest->tx_drops += temp.tx_drops;
143 dest->tx_errors += temp.tx_errors;
144 }
145 }
146
vxlan_vnifilter_stats_add(struct vxlan_vni_node * vninode,int type,unsigned int len)147 static void vxlan_vnifilter_stats_add(struct vxlan_vni_node *vninode,
148 int type, unsigned int len)
149 {
150 struct vxlan_vni_stats_pcpu *pstats = this_cpu_ptr(vninode->stats);
151
152 u64_stats_update_begin(&pstats->syncp);
153 switch (type) {
154 case VXLAN_VNI_STATS_RX:
155 pstats->stats.rx_bytes += len;
156 pstats->stats.rx_packets++;
157 break;
158 case VXLAN_VNI_STATS_RX_DROPS:
159 pstats->stats.rx_drops++;
160 break;
161 case VXLAN_VNI_STATS_RX_ERRORS:
162 pstats->stats.rx_errors++;
163 break;
164 case VXLAN_VNI_STATS_TX:
165 pstats->stats.tx_bytes += len;
166 pstats->stats.tx_packets++;
167 break;
168 case VXLAN_VNI_STATS_TX_DROPS:
169 pstats->stats.tx_drops++;
170 break;
171 case VXLAN_VNI_STATS_TX_ERRORS:
172 pstats->stats.tx_errors++;
173 break;
174 }
175 u64_stats_update_end(&pstats->syncp);
176 }
177
vxlan_vnifilter_count(struct vxlan_dev * vxlan,__be32 vni,struct vxlan_vni_node * vninode,int type,unsigned int len)178 void vxlan_vnifilter_count(struct vxlan_dev *vxlan, __be32 vni,
179 struct vxlan_vni_node *vninode,
180 int type, unsigned int len)
181 {
182 struct vxlan_vni_node *vnode;
183
184 if (!(vxlan->cfg.flags & VXLAN_F_VNIFILTER))
185 return;
186
187 if (vninode) {
188 vnode = vninode;
189 } else {
190 vnode = vxlan_vnifilter_lookup(vxlan, vni);
191 if (!vnode)
192 return;
193 }
194
195 vxlan_vnifilter_stats_add(vnode, type, len);
196 }
197
vnirange(struct vxlan_vni_node * vbegin,struct vxlan_vni_node * vend)198 static u32 vnirange(struct vxlan_vni_node *vbegin,
199 struct vxlan_vni_node *vend)
200 {
201 return (be32_to_cpu(vend->vni) - be32_to_cpu(vbegin->vni));
202 }
203
vxlan_vnifilter_entry_nlmsg_size(void)204 static size_t vxlan_vnifilter_entry_nlmsg_size(void)
205 {
206 return NLMSG_ALIGN(sizeof(struct tunnel_msg))
207 + nla_total_size(0) /* VXLAN_VNIFILTER_ENTRY */
208 + nla_total_size(sizeof(u32)) /* VXLAN_VNIFILTER_ENTRY_START */
209 + nla_total_size(sizeof(u32)) /* VXLAN_VNIFILTER_ENTRY_END */
210 + nla_total_size(sizeof(struct in6_addr));/* VXLAN_VNIFILTER_ENTRY_GROUP{6} */
211 }
212
__vnifilter_entry_fill_stats(struct sk_buff * skb,const struct vxlan_vni_node * vbegin)213 static int __vnifilter_entry_fill_stats(struct sk_buff *skb,
214 const struct vxlan_vni_node *vbegin)
215 {
216 struct vxlan_vni_stats vstats;
217 struct nlattr *vstats_attr;
218
219 vstats_attr = nla_nest_start(skb, VXLAN_VNIFILTER_ENTRY_STATS);
220 if (!vstats_attr)
221 goto out_stats_err;
222
223 vxlan_vnifilter_stats_get(vbegin, &vstats);
224 if (nla_put_u64_64bit(skb, VNIFILTER_ENTRY_STATS_RX_BYTES,
225 vstats.rx_bytes, VNIFILTER_ENTRY_STATS_PAD) ||
226 nla_put_u64_64bit(skb, VNIFILTER_ENTRY_STATS_RX_PKTS,
227 vstats.rx_packets, VNIFILTER_ENTRY_STATS_PAD) ||
228 nla_put_u64_64bit(skb, VNIFILTER_ENTRY_STATS_RX_DROPS,
229 vstats.rx_drops, VNIFILTER_ENTRY_STATS_PAD) ||
230 nla_put_u64_64bit(skb, VNIFILTER_ENTRY_STATS_RX_ERRORS,
231 vstats.rx_errors, VNIFILTER_ENTRY_STATS_PAD) ||
232 nla_put_u64_64bit(skb, VNIFILTER_ENTRY_STATS_TX_BYTES,
233 vstats.tx_bytes, VNIFILTER_ENTRY_STATS_PAD) ||
234 nla_put_u64_64bit(skb, VNIFILTER_ENTRY_STATS_TX_PKTS,
235 vstats.tx_packets, VNIFILTER_ENTRY_STATS_PAD) ||
236 nla_put_u64_64bit(skb, VNIFILTER_ENTRY_STATS_TX_DROPS,
237 vstats.tx_drops, VNIFILTER_ENTRY_STATS_PAD) ||
238 nla_put_u64_64bit(skb, VNIFILTER_ENTRY_STATS_TX_ERRORS,
239 vstats.tx_errors, VNIFILTER_ENTRY_STATS_PAD))
240 goto out_stats_err;
241
242 nla_nest_end(skb, vstats_attr);
243
244 return 0;
245
246 out_stats_err:
247 nla_nest_cancel(skb, vstats_attr);
248 return -EMSGSIZE;
249 }
250
vxlan_fill_vni_filter_entry(struct sk_buff * skb,struct vxlan_vni_node * vbegin,struct vxlan_vni_node * vend,bool fill_stats)251 static bool vxlan_fill_vni_filter_entry(struct sk_buff *skb,
252 struct vxlan_vni_node *vbegin,
253 struct vxlan_vni_node *vend,
254 bool fill_stats)
255 {
256 struct nlattr *ventry;
257 u32 vs = be32_to_cpu(vbegin->vni);
258 u32 ve = 0;
259
260 if (vbegin != vend)
261 ve = be32_to_cpu(vend->vni);
262
263 ventry = nla_nest_start(skb, VXLAN_VNIFILTER_ENTRY);
264 if (!ventry)
265 return false;
266
267 if (nla_put_u32(skb, VXLAN_VNIFILTER_ENTRY_START, vs))
268 goto out_err;
269
270 if (ve && nla_put_u32(skb, VXLAN_VNIFILTER_ENTRY_END, ve))
271 goto out_err;
272
273 if (!vxlan_addr_any(&vbegin->remote_ip)) {
274 if (vbegin->remote_ip.sa.sa_family == AF_INET) {
275 if (nla_put_in_addr(skb, VXLAN_VNIFILTER_ENTRY_GROUP,
276 vbegin->remote_ip.sin.sin_addr.s_addr))
277 goto out_err;
278 #if IS_ENABLED(CONFIG_IPV6)
279 } else {
280 if (nla_put_in6_addr(skb, VXLAN_VNIFILTER_ENTRY_GROUP6,
281 &vbegin->remote_ip.sin6.sin6_addr))
282 goto out_err;
283 #endif
284 }
285 }
286
287 if (fill_stats && __vnifilter_entry_fill_stats(skb, vbegin))
288 goto out_err;
289
290 nla_nest_end(skb, ventry);
291
292 return true;
293
294 out_err:
295 nla_nest_cancel(skb, ventry);
296
297 return false;
298 }
299
vxlan_vnifilter_notify(const struct vxlan_dev * vxlan,struct vxlan_vni_node * vninode,int cmd)300 static void vxlan_vnifilter_notify(const struct vxlan_dev *vxlan,
301 struct vxlan_vni_node *vninode, int cmd)
302 {
303 struct tunnel_msg *tmsg;
304 struct sk_buff *skb;
305 struct nlmsghdr *nlh;
306 struct net *net = dev_net(vxlan->dev);
307 int err = -ENOBUFS;
308
309 skb = nlmsg_new(vxlan_vnifilter_entry_nlmsg_size(), GFP_KERNEL);
310 if (!skb)
311 goto out_err;
312
313 err = -EMSGSIZE;
314 nlh = nlmsg_put(skb, 0, 0, cmd, sizeof(*tmsg), 0);
315 if (!nlh)
316 goto out_err;
317 tmsg = nlmsg_data(nlh);
318 memset(tmsg, 0, sizeof(*tmsg));
319 tmsg->family = AF_BRIDGE;
320 tmsg->ifindex = vxlan->dev->ifindex;
321
322 if (!vxlan_fill_vni_filter_entry(skb, vninode, vninode, false))
323 goto out_err;
324
325 nlmsg_end(skb, nlh);
326 rtnl_notify(skb, net, 0, RTNLGRP_TUNNEL, NULL, GFP_KERNEL);
327
328 return;
329
330 out_err:
331 rtnl_set_sk_err(net, RTNLGRP_TUNNEL, err);
332
333 kfree_skb(skb);
334 }
335
vxlan_vnifilter_dump_dev(const struct net_device * dev,struct sk_buff * skb,struct netlink_callback * cb)336 static int vxlan_vnifilter_dump_dev(const struct net_device *dev,
337 struct sk_buff *skb,
338 struct netlink_callback *cb)
339 {
340 struct vxlan_vni_node *tmp, *v, *vbegin = NULL, *vend = NULL;
341 struct vxlan_dev *vxlan = netdev_priv(dev);
342 struct tunnel_msg *new_tmsg, *tmsg;
343 int idx = 0, s_idx = cb->args[1];
344 struct vxlan_vni_group *vg;
345 struct nlmsghdr *nlh;
346 bool dump_stats;
347 int err = 0;
348
349 if (!(vxlan->cfg.flags & VXLAN_F_VNIFILTER))
350 return -EINVAL;
351
352 /* RCU needed because of the vni locking rules (rcu || rtnl) */
353 vg = rcu_dereference(vxlan->vnigrp);
354 if (!vg || !vg->num_vnis)
355 return 0;
356
357 tmsg = nlmsg_data(cb->nlh);
358 dump_stats = !!(tmsg->flags & TUNNEL_MSG_FLAG_STATS);
359
360 nlh = nlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq,
361 RTM_NEWTUNNEL, sizeof(*new_tmsg), NLM_F_MULTI);
362 if (!nlh)
363 return -EMSGSIZE;
364 new_tmsg = nlmsg_data(nlh);
365 memset(new_tmsg, 0, sizeof(*new_tmsg));
366 new_tmsg->family = PF_BRIDGE;
367 new_tmsg->ifindex = dev->ifindex;
368
369 list_for_each_entry_safe(v, tmp, &vg->vni_list, vlist) {
370 if (idx < s_idx) {
371 idx++;
372 continue;
373 }
374 if (!vbegin) {
375 vbegin = v;
376 vend = v;
377 continue;
378 }
379 if (!dump_stats && vnirange(vend, v) == 1 &&
380 vxlan_addr_equal(&v->remote_ip, &vend->remote_ip)) {
381 goto update_end;
382 } else {
383 if (!vxlan_fill_vni_filter_entry(skb, vbegin, vend,
384 dump_stats)) {
385 err = -EMSGSIZE;
386 break;
387 }
388 idx += vnirange(vbegin, vend) + 1;
389 vbegin = v;
390 }
391 update_end:
392 vend = v;
393 }
394
395 if (!err && vbegin) {
396 if (!vxlan_fill_vni_filter_entry(skb, vbegin, vend, dump_stats))
397 err = -EMSGSIZE;
398 }
399
400 cb->args[1] = err ? idx : 0;
401
402 nlmsg_end(skb, nlh);
403
404 return err;
405 }
406
vxlan_vnifilter_dump(struct sk_buff * skb,struct netlink_callback * cb)407 static int vxlan_vnifilter_dump(struct sk_buff *skb, struct netlink_callback *cb)
408 {
409 int idx = 0, err = 0, s_idx = cb->args[0];
410 struct net *net = sock_net(skb->sk);
411 struct tunnel_msg *tmsg;
412 struct net_device *dev;
413
414 if (cb->nlh->nlmsg_len < nlmsg_msg_size(sizeof(struct tunnel_msg))) {
415 NL_SET_ERR_MSG(cb->extack, "Invalid msg length");
416 return -EINVAL;
417 }
418
419 tmsg = nlmsg_data(cb->nlh);
420
421 if (tmsg->flags & ~TUNNEL_MSG_VALID_USER_FLAGS) {
422 NL_SET_ERR_MSG(cb->extack, "Invalid tunnelmsg flags in ancillary header");
423 return -EINVAL;
424 }
425
426 rcu_read_lock();
427 if (tmsg->ifindex) {
428 dev = dev_get_by_index_rcu(net, tmsg->ifindex);
429 if (!dev) {
430 err = -ENODEV;
431 goto out_err;
432 }
433 if (!netif_is_vxlan(dev)) {
434 NL_SET_ERR_MSG(cb->extack,
435 "The device is not a vxlan device");
436 err = -EINVAL;
437 goto out_err;
438 }
439 err = vxlan_vnifilter_dump_dev(dev, skb, cb);
440 /* if the dump completed without an error we return 0 here */
441 if (err != -EMSGSIZE)
442 goto out_err;
443 } else {
444 for_each_netdev_rcu(net, dev) {
445 if (!netif_is_vxlan(dev))
446 continue;
447 if (idx < s_idx)
448 goto skip;
449 err = vxlan_vnifilter_dump_dev(dev, skb, cb);
450 if (err == -EMSGSIZE)
451 break;
452 skip:
453 idx++;
454 }
455 }
456 cb->args[0] = idx;
457 rcu_read_unlock();
458
459 return skb->len;
460
461 out_err:
462 rcu_read_unlock();
463
464 return err;
465 }
466
467 static const struct nla_policy vni_filter_entry_policy[VXLAN_VNIFILTER_ENTRY_MAX + 1] = {
468 [VXLAN_VNIFILTER_ENTRY_START] = { .type = NLA_U32 },
469 [VXLAN_VNIFILTER_ENTRY_END] = { .type = NLA_U32 },
470 [VXLAN_VNIFILTER_ENTRY_GROUP] = { .type = NLA_BINARY,
471 .len = sizeof_field(struct iphdr, daddr) },
472 [VXLAN_VNIFILTER_ENTRY_GROUP6] = { .type = NLA_BINARY,
473 .len = sizeof(struct in6_addr) },
474 };
475
476 static const struct nla_policy vni_filter_policy[VXLAN_VNIFILTER_MAX + 1] = {
477 [VXLAN_VNIFILTER_ENTRY] = { .type = NLA_NESTED },
478 };
479
vxlan_update_default_fdb_entry(struct vxlan_dev * vxlan,__be32 vni,union vxlan_addr * old_remote_ip,union vxlan_addr * remote_ip,struct netlink_ext_ack * extack)480 static int vxlan_update_default_fdb_entry(struct vxlan_dev *vxlan, __be32 vni,
481 union vxlan_addr *old_remote_ip,
482 union vxlan_addr *remote_ip,
483 struct netlink_ext_ack *extack)
484 {
485 struct vxlan_rdst *dst = &vxlan->default_dst;
486 u32 hash_index;
487 int err = 0;
488
489 hash_index = fdb_head_index(vxlan, all_zeros_mac, vni);
490 spin_lock_bh(&vxlan->hash_lock[hash_index]);
491 if (remote_ip && !vxlan_addr_any(remote_ip)) {
492 err = vxlan_fdb_update(vxlan, all_zeros_mac,
493 remote_ip,
494 NUD_REACHABLE | NUD_PERMANENT,
495 NLM_F_APPEND | NLM_F_CREATE,
496 vxlan->cfg.dst_port,
497 vni,
498 vni,
499 dst->remote_ifindex,
500 NTF_SELF, 0, true, extack);
501 if (err) {
502 spin_unlock_bh(&vxlan->hash_lock[hash_index]);
503 return err;
504 }
505 }
506
507 if (old_remote_ip && !vxlan_addr_any(old_remote_ip)) {
508 __vxlan_fdb_delete(vxlan, all_zeros_mac,
509 *old_remote_ip,
510 vxlan->cfg.dst_port,
511 vni, vni,
512 dst->remote_ifindex,
513 true);
514 }
515 spin_unlock_bh(&vxlan->hash_lock[hash_index]);
516
517 return err;
518 }
519
vxlan_vni_update_group(struct vxlan_dev * vxlan,struct vxlan_vni_node * vninode,union vxlan_addr * group,bool create,bool * changed,struct netlink_ext_ack * extack)520 static int vxlan_vni_update_group(struct vxlan_dev *vxlan,
521 struct vxlan_vni_node *vninode,
522 union vxlan_addr *group,
523 bool create, bool *changed,
524 struct netlink_ext_ack *extack)
525 {
526 struct vxlan_net *vn = net_generic(vxlan->net, vxlan_net_id);
527 struct vxlan_rdst *dst = &vxlan->default_dst;
528 union vxlan_addr *newrip = NULL, *oldrip = NULL;
529 union vxlan_addr old_remote_ip;
530 int ret = 0;
531
532 memcpy(&old_remote_ip, &vninode->remote_ip, sizeof(old_remote_ip));
533
534 /* if per vni remote ip is not present use vxlan dev
535 * default dst remote ip for fdb entry
536 */
537 if (group && !vxlan_addr_any(group)) {
538 newrip = group;
539 } else {
540 if (!vxlan_addr_any(&dst->remote_ip))
541 newrip = &dst->remote_ip;
542 }
543
544 /* if old rip exists, and no newrip,
545 * explicitly delete old rip
546 */
547 if (!newrip && !vxlan_addr_any(&old_remote_ip))
548 oldrip = &old_remote_ip;
549
550 if (!newrip && !oldrip)
551 return 0;
552
553 if (!create && oldrip && newrip && vxlan_addr_equal(oldrip, newrip))
554 return 0;
555
556 ret = vxlan_update_default_fdb_entry(vxlan, vninode->vni,
557 oldrip, newrip,
558 extack);
559 if (ret)
560 goto out;
561
562 if (group)
563 memcpy(&vninode->remote_ip, group, sizeof(vninode->remote_ip));
564
565 if (vxlan->dev->flags & IFF_UP) {
566 if (vxlan_addr_multicast(&old_remote_ip) &&
567 !vxlan_group_used(vn, vxlan, vninode->vni,
568 &old_remote_ip,
569 vxlan->default_dst.remote_ifindex)) {
570 ret = vxlan_igmp_leave(vxlan, &old_remote_ip,
571 0);
572 if (ret)
573 goto out;
574 }
575
576 if (vxlan_addr_multicast(&vninode->remote_ip)) {
577 ret = vxlan_igmp_join(vxlan, &vninode->remote_ip, 0);
578 if (ret == -EADDRINUSE)
579 ret = 0;
580 if (ret)
581 goto out;
582 }
583 }
584
585 *changed = true;
586
587 return 0;
588 out:
589 return ret;
590 }
591
vxlan_vnilist_update_group(struct vxlan_dev * vxlan,union vxlan_addr * old_remote_ip,union vxlan_addr * new_remote_ip,struct netlink_ext_ack * extack)592 int vxlan_vnilist_update_group(struct vxlan_dev *vxlan,
593 union vxlan_addr *old_remote_ip,
594 union vxlan_addr *new_remote_ip,
595 struct netlink_ext_ack *extack)
596 {
597 struct list_head *headp, *hpos;
598 struct vxlan_vni_group *vg;
599 struct vxlan_vni_node *vent;
600 int ret;
601
602 vg = rtnl_dereference(vxlan->vnigrp);
603
604 headp = &vg->vni_list;
605 list_for_each_prev(hpos, headp) {
606 vent = list_entry(hpos, struct vxlan_vni_node, vlist);
607 if (vxlan_addr_any(&vent->remote_ip)) {
608 ret = vxlan_update_default_fdb_entry(vxlan, vent->vni,
609 old_remote_ip,
610 new_remote_ip,
611 extack);
612 if (ret)
613 return ret;
614 }
615 }
616
617 return 0;
618 }
619
vxlan_vni_delete_group(struct vxlan_dev * vxlan,struct vxlan_vni_node * vninode)620 static void vxlan_vni_delete_group(struct vxlan_dev *vxlan,
621 struct vxlan_vni_node *vninode)
622 {
623 struct vxlan_net *vn = net_generic(vxlan->net, vxlan_net_id);
624 struct vxlan_rdst *dst = &vxlan->default_dst;
625
626 /* if per vni remote_ip not present, delete the
627 * default dst remote_ip previously added for this vni
628 */
629 if (!vxlan_addr_any(&vninode->remote_ip) ||
630 !vxlan_addr_any(&dst->remote_ip))
631 __vxlan_fdb_delete(vxlan, all_zeros_mac,
632 (vxlan_addr_any(&vninode->remote_ip) ?
633 dst->remote_ip : vninode->remote_ip),
634 vxlan->cfg.dst_port,
635 vninode->vni, vninode->vni,
636 dst->remote_ifindex,
637 true);
638
639 if (vxlan->dev->flags & IFF_UP) {
640 if (vxlan_addr_multicast(&vninode->remote_ip) &&
641 !vxlan_group_used(vn, vxlan, vninode->vni,
642 &vninode->remote_ip,
643 dst->remote_ifindex)) {
644 vxlan_igmp_leave(vxlan, &vninode->remote_ip, 0);
645 }
646 }
647 }
648
vxlan_vni_update(struct vxlan_dev * vxlan,struct vxlan_vni_group * vg,__be32 vni,union vxlan_addr * group,bool * changed,struct netlink_ext_ack * extack)649 static int vxlan_vni_update(struct vxlan_dev *vxlan,
650 struct vxlan_vni_group *vg,
651 __be32 vni, union vxlan_addr *group,
652 bool *changed,
653 struct netlink_ext_ack *extack)
654 {
655 struct vxlan_vni_node *vninode;
656 int ret;
657
658 vninode = rhashtable_lookup_fast(&vg->vni_hash, &vni,
659 vxlan_vni_rht_params);
660 if (!vninode)
661 return 0;
662
663 ret = vxlan_vni_update_group(vxlan, vninode, group, false, changed,
664 extack);
665 if (ret)
666 return ret;
667
668 if (changed)
669 vxlan_vnifilter_notify(vxlan, vninode, RTM_NEWTUNNEL);
670
671 return 0;
672 }
673
__vxlan_vni_add_list(struct vxlan_vni_group * vg,struct vxlan_vni_node * v)674 static void __vxlan_vni_add_list(struct vxlan_vni_group *vg,
675 struct vxlan_vni_node *v)
676 {
677 struct list_head *headp, *hpos;
678 struct vxlan_vni_node *vent;
679
680 headp = &vg->vni_list;
681 list_for_each_prev(hpos, headp) {
682 vent = list_entry(hpos, struct vxlan_vni_node, vlist);
683 if (be32_to_cpu(v->vni) < be32_to_cpu(vent->vni))
684 continue;
685 else
686 break;
687 }
688 list_add_rcu(&v->vlist, hpos);
689 vg->num_vnis++;
690 }
691
__vxlan_vni_del_list(struct vxlan_vni_group * vg,struct vxlan_vni_node * v)692 static void __vxlan_vni_del_list(struct vxlan_vni_group *vg,
693 struct vxlan_vni_node *v)
694 {
695 list_del_rcu(&v->vlist);
696 vg->num_vnis--;
697 }
698
vxlan_vni_alloc(struct vxlan_dev * vxlan,__be32 vni)699 static struct vxlan_vni_node *vxlan_vni_alloc(struct vxlan_dev *vxlan,
700 __be32 vni)
701 {
702 struct vxlan_vni_node *vninode;
703
704 vninode = kzalloc(sizeof(*vninode), GFP_KERNEL);
705 if (!vninode)
706 return NULL;
707 vninode->stats = netdev_alloc_pcpu_stats(struct vxlan_vni_stats_pcpu);
708 if (!vninode->stats) {
709 kfree(vninode);
710 return NULL;
711 }
712 vninode->vni = vni;
713 vninode->hlist4.vxlan = vxlan;
714 #if IS_ENABLED(CONFIG_IPV6)
715 vninode->hlist6.vxlan = vxlan;
716 #endif
717
718 return vninode;
719 }
720
vxlan_vni_free(struct vxlan_vni_node * vninode)721 static void vxlan_vni_free(struct vxlan_vni_node *vninode)
722 {
723 free_percpu(vninode->stats);
724 kfree(vninode);
725 }
726
vxlan_vni_add(struct vxlan_dev * vxlan,struct vxlan_vni_group * vg,u32 vni,union vxlan_addr * group,struct netlink_ext_ack * extack)727 static int vxlan_vni_add(struct vxlan_dev *vxlan,
728 struct vxlan_vni_group *vg,
729 u32 vni, union vxlan_addr *group,
730 struct netlink_ext_ack *extack)
731 {
732 struct vxlan_vni_node *vninode;
733 __be32 v = cpu_to_be32(vni);
734 bool changed = false;
735 int err = 0;
736
737 if (vxlan_vnifilter_lookup(vxlan, v))
738 return vxlan_vni_update(vxlan, vg, v, group, &changed, extack);
739
740 err = vxlan_vni_in_use(vxlan->net, vxlan, &vxlan->cfg, v);
741 if (err) {
742 NL_SET_ERR_MSG(extack, "VNI in use");
743 return err;
744 }
745
746 vninode = vxlan_vni_alloc(vxlan, v);
747 if (!vninode)
748 return -ENOMEM;
749
750 err = rhashtable_lookup_insert_fast(&vg->vni_hash,
751 &vninode->vnode,
752 vxlan_vni_rht_params);
753 if (err) {
754 vxlan_vni_free(vninode);
755 return err;
756 }
757
758 __vxlan_vni_add_list(vg, vninode);
759
760 if (vxlan->dev->flags & IFF_UP)
761 vxlan_vs_add_del_vninode(vxlan, vninode, false);
762
763 err = vxlan_vni_update_group(vxlan, vninode, group, true, &changed,
764 extack);
765
766 if (changed)
767 vxlan_vnifilter_notify(vxlan, vninode, RTM_NEWTUNNEL);
768
769 return err;
770 }
771
vxlan_vni_node_rcu_free(struct rcu_head * rcu)772 static void vxlan_vni_node_rcu_free(struct rcu_head *rcu)
773 {
774 struct vxlan_vni_node *v;
775
776 v = container_of(rcu, struct vxlan_vni_node, rcu);
777 vxlan_vni_free(v);
778 }
779
vxlan_vni_del(struct vxlan_dev * vxlan,struct vxlan_vni_group * vg,u32 vni,struct netlink_ext_ack * extack)780 static int vxlan_vni_del(struct vxlan_dev *vxlan,
781 struct vxlan_vni_group *vg,
782 u32 vni, struct netlink_ext_ack *extack)
783 {
784 struct vxlan_vni_node *vninode;
785 __be32 v = cpu_to_be32(vni);
786 int err = 0;
787
788 vg = rtnl_dereference(vxlan->vnigrp);
789
790 vninode = rhashtable_lookup_fast(&vg->vni_hash, &v,
791 vxlan_vni_rht_params);
792 if (!vninode) {
793 err = -ENOENT;
794 goto out;
795 }
796
797 vxlan_vni_delete_group(vxlan, vninode);
798
799 err = rhashtable_remove_fast(&vg->vni_hash,
800 &vninode->vnode,
801 vxlan_vni_rht_params);
802 if (err)
803 goto out;
804
805 __vxlan_vni_del_list(vg, vninode);
806
807 vxlan_vnifilter_notify(vxlan, vninode, RTM_DELTUNNEL);
808
809 if (vxlan->dev->flags & IFF_UP)
810 vxlan_vs_add_del_vninode(vxlan, vninode, true);
811
812 call_rcu(&vninode->rcu, vxlan_vni_node_rcu_free);
813
814 return 0;
815 out:
816 return err;
817 }
818
vxlan_vni_add_del(struct vxlan_dev * vxlan,__u32 start_vni,__u32 end_vni,union vxlan_addr * group,int cmd,struct netlink_ext_ack * extack)819 static int vxlan_vni_add_del(struct vxlan_dev *vxlan, __u32 start_vni,
820 __u32 end_vni, union vxlan_addr *group,
821 int cmd, struct netlink_ext_ack *extack)
822 {
823 struct vxlan_vni_group *vg;
824 int v, err = 0;
825
826 vg = rtnl_dereference(vxlan->vnigrp);
827
828 for (v = start_vni; v <= end_vni; v++) {
829 switch (cmd) {
830 case RTM_NEWTUNNEL:
831 err = vxlan_vni_add(vxlan, vg, v, group, extack);
832 break;
833 case RTM_DELTUNNEL:
834 err = vxlan_vni_del(vxlan, vg, v, extack);
835 break;
836 default:
837 err = -EOPNOTSUPP;
838 break;
839 }
840 if (err)
841 goto out;
842 }
843
844 return 0;
845 out:
846 return err;
847 }
848
vxlan_process_vni_filter(struct vxlan_dev * vxlan,struct nlattr * nlvnifilter,int cmd,struct netlink_ext_ack * extack)849 static int vxlan_process_vni_filter(struct vxlan_dev *vxlan,
850 struct nlattr *nlvnifilter,
851 int cmd, struct netlink_ext_ack *extack)
852 {
853 struct nlattr *vattrs[VXLAN_VNIFILTER_ENTRY_MAX + 1];
854 u32 vni_start = 0, vni_end = 0;
855 union vxlan_addr group;
856 int err;
857
858 err = nla_parse_nested(vattrs,
859 VXLAN_VNIFILTER_ENTRY_MAX,
860 nlvnifilter, vni_filter_entry_policy,
861 extack);
862 if (err)
863 return err;
864
865 if (vattrs[VXLAN_VNIFILTER_ENTRY_START]) {
866 vni_start = nla_get_u32(vattrs[VXLAN_VNIFILTER_ENTRY_START]);
867 vni_end = vni_start;
868 }
869
870 if (vattrs[VXLAN_VNIFILTER_ENTRY_END])
871 vni_end = nla_get_u32(vattrs[VXLAN_VNIFILTER_ENTRY_END]);
872
873 if (!vni_start && !vni_end) {
874 NL_SET_ERR_MSG_ATTR(extack, nlvnifilter,
875 "vni start nor end found in vni entry");
876 return -EINVAL;
877 }
878
879 if (vattrs[VXLAN_VNIFILTER_ENTRY_GROUP]) {
880 group.sin.sin_addr.s_addr =
881 nla_get_in_addr(vattrs[VXLAN_VNIFILTER_ENTRY_GROUP]);
882 group.sa.sa_family = AF_INET;
883 } else if (vattrs[VXLAN_VNIFILTER_ENTRY_GROUP6]) {
884 group.sin6.sin6_addr =
885 nla_get_in6_addr(vattrs[VXLAN_VNIFILTER_ENTRY_GROUP6]);
886 group.sa.sa_family = AF_INET6;
887 } else {
888 memset(&group, 0, sizeof(group));
889 }
890
891 if (vxlan_addr_multicast(&group) && !vxlan->default_dst.remote_ifindex) {
892 NL_SET_ERR_MSG(extack,
893 "Local interface required for multicast remote group");
894
895 return -EINVAL;
896 }
897
898 err = vxlan_vni_add_del(vxlan, vni_start, vni_end, &group, cmd,
899 extack);
900 if (err)
901 return err;
902
903 return 0;
904 }
905
vxlan_vnigroup_uninit(struct vxlan_dev * vxlan)906 void vxlan_vnigroup_uninit(struct vxlan_dev *vxlan)
907 {
908 struct vxlan_vni_node *v, *tmp;
909 struct vxlan_vni_group *vg;
910
911 vg = rtnl_dereference(vxlan->vnigrp);
912 list_for_each_entry_safe(v, tmp, &vg->vni_list, vlist) {
913 rhashtable_remove_fast(&vg->vni_hash, &v->vnode,
914 vxlan_vni_rht_params);
915 hlist_del_init_rcu(&v->hlist4.hlist);
916 #if IS_ENABLED(CONFIG_IPV6)
917 hlist_del_init_rcu(&v->hlist6.hlist);
918 #endif
919 __vxlan_vni_del_list(vg, v);
920 vxlan_vnifilter_notify(vxlan, v, RTM_DELTUNNEL);
921 call_rcu(&v->rcu, vxlan_vni_node_rcu_free);
922 }
923 rhashtable_destroy(&vg->vni_hash);
924 kfree(vg);
925 }
926
vxlan_vnigroup_init(struct vxlan_dev * vxlan)927 int vxlan_vnigroup_init(struct vxlan_dev *vxlan)
928 {
929 struct vxlan_vni_group *vg;
930 int ret;
931
932 vg = kzalloc(sizeof(*vg), GFP_KERNEL);
933 if (!vg)
934 return -ENOMEM;
935 ret = rhashtable_init(&vg->vni_hash, &vxlan_vni_rht_params);
936 if (ret) {
937 kfree(vg);
938 return ret;
939 }
940 INIT_LIST_HEAD(&vg->vni_list);
941 rcu_assign_pointer(vxlan->vnigrp, vg);
942
943 return 0;
944 }
945
vxlan_vnifilter_process(struct sk_buff * skb,struct nlmsghdr * nlh,struct netlink_ext_ack * extack)946 static int vxlan_vnifilter_process(struct sk_buff *skb, struct nlmsghdr *nlh,
947 struct netlink_ext_ack *extack)
948 {
949 struct net *net = sock_net(skb->sk);
950 struct tunnel_msg *tmsg;
951 struct vxlan_dev *vxlan;
952 struct net_device *dev;
953 struct nlattr *attr;
954 int err, vnis = 0;
955 int rem;
956
957 /* this should validate the header and check for remaining bytes */
958 err = nlmsg_parse(nlh, sizeof(*tmsg), NULL, VXLAN_VNIFILTER_MAX,
959 vni_filter_policy, extack);
960 if (err < 0)
961 return err;
962
963 tmsg = nlmsg_data(nlh);
964 dev = __dev_get_by_index(net, tmsg->ifindex);
965 if (!dev)
966 return -ENODEV;
967
968 if (!netif_is_vxlan(dev)) {
969 NL_SET_ERR_MSG_MOD(extack, "The device is not a vxlan device");
970 return -EINVAL;
971 }
972
973 vxlan = netdev_priv(dev);
974
975 if (!(vxlan->cfg.flags & VXLAN_F_VNIFILTER))
976 return -EOPNOTSUPP;
977
978 nlmsg_for_each_attr(attr, nlh, sizeof(*tmsg), rem) {
979 switch (nla_type(attr)) {
980 case VXLAN_VNIFILTER_ENTRY:
981 err = vxlan_process_vni_filter(vxlan, attr,
982 nlh->nlmsg_type, extack);
983 break;
984 default:
985 continue;
986 }
987 vnis++;
988 if (err)
989 break;
990 }
991
992 if (!vnis) {
993 NL_SET_ERR_MSG_MOD(extack, "No vnis found to process");
994 err = -EINVAL;
995 }
996
997 return err;
998 }
999
1000 static const struct rtnl_msg_handler vxlan_vnifilter_rtnl_msg_handlers[] = {
1001 {THIS_MODULE, PF_BRIDGE, RTM_GETTUNNEL, NULL, vxlan_vnifilter_dump, 0},
1002 {THIS_MODULE, PF_BRIDGE, RTM_NEWTUNNEL, vxlan_vnifilter_process, NULL, 0},
1003 {THIS_MODULE, PF_BRIDGE, RTM_DELTUNNEL, vxlan_vnifilter_process, NULL, 0},
1004 };
1005
vxlan_vnifilter_init(void)1006 int vxlan_vnifilter_init(void)
1007 {
1008 return rtnl_register_many(vxlan_vnifilter_rtnl_msg_handlers);
1009 }
1010
vxlan_vnifilter_uninit(void)1011 void vxlan_vnifilter_uninit(void)
1012 {
1013 rtnl_unregister_many(vxlan_vnifilter_rtnl_msg_handlers);
1014 }
1015