xref: /openbmc/linux/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c (revision 3d40aed862874db14e1dd41fd6f12636dcfdcc3e)
1 // SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0
2 /* Copyright (c) 2016-2018 Mellanox Technologies. All rights reserved */
3 
4 #include <linux/kernel.h>
5 #include <linux/types.h>
6 #include <linux/rhashtable.h>
7 #include <linux/bitops.h>
8 #include <linux/in6.h>
9 #include <linux/notifier.h>
10 #include <linux/inetdevice.h>
11 #include <linux/netdevice.h>
12 #include <linux/if_bridge.h>
13 #include <linux/socket.h>
14 #include <linux/route.h>
15 #include <linux/gcd.h>
16 #include <linux/if_macvlan.h>
17 #include <linux/refcount.h>
18 #include <linux/jhash.h>
19 #include <linux/net_namespace.h>
20 #include <linux/mutex.h>
21 #include <linux/genalloc.h>
22 #include <net/netevent.h>
23 #include <net/neighbour.h>
24 #include <net/arp.h>
25 #include <net/inet_dscp.h>
26 #include <net/ip_fib.h>
27 #include <net/ip6_fib.h>
28 #include <net/nexthop.h>
29 #include <net/fib_rules.h>
30 #include <net/ip_tunnels.h>
31 #include <net/l3mdev.h>
32 #include <net/addrconf.h>
33 #include <net/ndisc.h>
34 #include <net/ipv6.h>
35 #include <net/fib_notifier.h>
36 #include <net/switchdev.h>
37 
38 #include "spectrum.h"
39 #include "core.h"
40 #include "reg.h"
41 #include "spectrum_cnt.h"
42 #include "spectrum_dpipe.h"
43 #include "spectrum_ipip.h"
44 #include "spectrum_mr.h"
45 #include "spectrum_mr_tcam.h"
46 #include "spectrum_router.h"
47 #include "spectrum_span.h"
48 
49 struct mlxsw_sp_fib;
50 struct mlxsw_sp_vr;
51 struct mlxsw_sp_lpm_tree;
52 struct mlxsw_sp_rif_ops;
53 
54 struct mlxsw_sp_crif_key {
55 	struct net_device *dev;
56 };
57 
58 struct mlxsw_sp_crif {
59 	struct mlxsw_sp_crif_key key;
60 	struct rhash_head ht_node;
61 	bool can_destroy;
62 	struct list_head nexthop_list;
63 	struct mlxsw_sp_rif *rif;
64 };
65 
66 static const struct rhashtable_params mlxsw_sp_crif_ht_params = {
67 	.key_offset = offsetof(struct mlxsw_sp_crif, key),
68 	.key_len = sizeof_field(struct mlxsw_sp_crif, key),
69 	.head_offset = offsetof(struct mlxsw_sp_crif, ht_node),
70 };
71 
72 struct mlxsw_sp_rif {
73 	struct mlxsw_sp_crif *crif; /* NULL for underlay RIF */
74 	struct list_head neigh_list;
75 	struct mlxsw_sp_fid *fid;
76 	unsigned char addr[ETH_ALEN];
77 	int mtu;
78 	u16 rif_index;
79 	u8 mac_profile_id;
80 	u8 rif_entries;
81 	u16 vr_id;
82 	const struct mlxsw_sp_rif_ops *ops;
83 	struct mlxsw_sp *mlxsw_sp;
84 
85 	unsigned int counter_ingress;
86 	bool counter_ingress_valid;
87 	unsigned int counter_egress;
88 	bool counter_egress_valid;
89 };
90 
91 static struct net_device *mlxsw_sp_rif_dev(const struct mlxsw_sp_rif *rif)
92 {
93 	if (!rif->crif)
94 		return NULL;
95 	return rif->crif->key.dev;
96 }
97 
98 struct mlxsw_sp_rif_params {
99 	struct net_device *dev;
100 	union {
101 		u16 system_port;
102 		u16 lag_id;
103 	};
104 	u16 vid;
105 	bool lag;
106 	bool double_entry;
107 };
108 
109 struct mlxsw_sp_rif_subport {
110 	struct mlxsw_sp_rif common;
111 	refcount_t ref_count;
112 	union {
113 		u16 system_port;
114 		u16 lag_id;
115 	};
116 	u16 vid;
117 	bool lag;
118 };
119 
120 struct mlxsw_sp_rif_ipip_lb {
121 	struct mlxsw_sp_rif common;
122 	struct mlxsw_sp_rif_ipip_lb_config lb_config;
123 	u16 ul_vr_id;	/* Spectrum-1. */
124 	u16 ul_rif_id;	/* Spectrum-2+. */
125 };
126 
127 struct mlxsw_sp_rif_params_ipip_lb {
128 	struct mlxsw_sp_rif_params common;
129 	struct mlxsw_sp_rif_ipip_lb_config lb_config;
130 };
131 
132 struct mlxsw_sp_rif_ops {
133 	enum mlxsw_sp_rif_type type;
134 	size_t rif_size;
135 
136 	void (*setup)(struct mlxsw_sp_rif *rif,
137 		      const struct mlxsw_sp_rif_params *params);
138 	int (*configure)(struct mlxsw_sp_rif *rif,
139 			 struct netlink_ext_ack *extack);
140 	void (*deconfigure)(struct mlxsw_sp_rif *rif);
141 	struct mlxsw_sp_fid * (*fid_get)(struct mlxsw_sp_rif *rif,
142 					 const struct mlxsw_sp_rif_params *params,
143 					 struct netlink_ext_ack *extack);
144 	void (*fdb_del)(struct mlxsw_sp_rif *rif, const char *mac);
145 };
146 
147 struct mlxsw_sp_rif_mac_profile {
148 	unsigned char mac_prefix[ETH_ALEN];
149 	refcount_t ref_count;
150 	u8 id;
151 };
152 
153 struct mlxsw_sp_router_ops {
154 	int (*init)(struct mlxsw_sp *mlxsw_sp);
155 	int (*ipips_init)(struct mlxsw_sp *mlxsw_sp);
156 };
157 
158 static struct mlxsw_sp_rif *
159 mlxsw_sp_rif_find_by_dev(const struct mlxsw_sp *mlxsw_sp,
160 			 const struct net_device *dev);
161 static void mlxsw_sp_rif_destroy(struct mlxsw_sp_rif *rif);
162 static void mlxsw_sp_lpm_tree_hold(struct mlxsw_sp_lpm_tree *lpm_tree);
163 static void mlxsw_sp_lpm_tree_put(struct mlxsw_sp *mlxsw_sp,
164 				  struct mlxsw_sp_lpm_tree *lpm_tree);
165 static int mlxsw_sp_vr_lpm_tree_bind(struct mlxsw_sp *mlxsw_sp,
166 				     const struct mlxsw_sp_fib *fib,
167 				     u8 tree_id);
168 static int mlxsw_sp_vr_lpm_tree_unbind(struct mlxsw_sp *mlxsw_sp,
169 				       const struct mlxsw_sp_fib *fib);
170 
171 static unsigned int *
172 mlxsw_sp_rif_p_counter_get(struct mlxsw_sp_rif *rif,
173 			   enum mlxsw_sp_rif_counter_dir dir)
174 {
175 	switch (dir) {
176 	case MLXSW_SP_RIF_COUNTER_EGRESS:
177 		return &rif->counter_egress;
178 	case MLXSW_SP_RIF_COUNTER_INGRESS:
179 		return &rif->counter_ingress;
180 	}
181 	return NULL;
182 }
183 
184 static bool
185 mlxsw_sp_rif_counter_valid_get(struct mlxsw_sp_rif *rif,
186 			       enum mlxsw_sp_rif_counter_dir dir)
187 {
188 	switch (dir) {
189 	case MLXSW_SP_RIF_COUNTER_EGRESS:
190 		return rif->counter_egress_valid;
191 	case MLXSW_SP_RIF_COUNTER_INGRESS:
192 		return rif->counter_ingress_valid;
193 	}
194 	return false;
195 }
196 
197 static void
198 mlxsw_sp_rif_counter_valid_set(struct mlxsw_sp_rif *rif,
199 			       enum mlxsw_sp_rif_counter_dir dir,
200 			       bool valid)
201 {
202 	switch (dir) {
203 	case MLXSW_SP_RIF_COUNTER_EGRESS:
204 		rif->counter_egress_valid = valid;
205 		break;
206 	case MLXSW_SP_RIF_COUNTER_INGRESS:
207 		rif->counter_ingress_valid = valid;
208 		break;
209 	}
210 }
211 
212 static int mlxsw_sp_rif_counter_edit(struct mlxsw_sp *mlxsw_sp, u16 rif_index,
213 				     unsigned int counter_index, bool enable,
214 				     enum mlxsw_sp_rif_counter_dir dir)
215 {
216 	char ritr_pl[MLXSW_REG_RITR_LEN];
217 	bool is_egress = false;
218 	int err;
219 
220 	if (dir == MLXSW_SP_RIF_COUNTER_EGRESS)
221 		is_egress = true;
222 	mlxsw_reg_ritr_rif_pack(ritr_pl, rif_index);
223 	err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
224 	if (err)
225 		return err;
226 
227 	mlxsw_reg_ritr_counter_pack(ritr_pl, counter_index, enable,
228 				    is_egress);
229 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
230 }
231 
232 int mlxsw_sp_rif_counter_value_get(struct mlxsw_sp *mlxsw_sp,
233 				   struct mlxsw_sp_rif *rif,
234 				   enum mlxsw_sp_rif_counter_dir dir, u64 *cnt)
235 {
236 	char ricnt_pl[MLXSW_REG_RICNT_LEN];
237 	unsigned int *p_counter_index;
238 	bool valid;
239 	int err;
240 
241 	valid = mlxsw_sp_rif_counter_valid_get(rif, dir);
242 	if (!valid)
243 		return -EINVAL;
244 
245 	p_counter_index = mlxsw_sp_rif_p_counter_get(rif, dir);
246 	if (!p_counter_index)
247 		return -EINVAL;
248 	mlxsw_reg_ricnt_pack(ricnt_pl, *p_counter_index,
249 			     MLXSW_REG_RICNT_OPCODE_NOP);
250 	err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(ricnt), ricnt_pl);
251 	if (err)
252 		return err;
253 	*cnt = mlxsw_reg_ricnt_good_unicast_packets_get(ricnt_pl);
254 	return 0;
255 }
256 
257 struct mlxsw_sp_rif_counter_set_basic {
258 	u64 good_unicast_packets;
259 	u64 good_multicast_packets;
260 	u64 good_broadcast_packets;
261 	u64 good_unicast_bytes;
262 	u64 good_multicast_bytes;
263 	u64 good_broadcast_bytes;
264 	u64 error_packets;
265 	u64 discard_packets;
266 	u64 error_bytes;
267 	u64 discard_bytes;
268 };
269 
270 static int
271 mlxsw_sp_rif_counter_fetch_clear(struct mlxsw_sp_rif *rif,
272 				 enum mlxsw_sp_rif_counter_dir dir,
273 				 struct mlxsw_sp_rif_counter_set_basic *set)
274 {
275 	struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
276 	char ricnt_pl[MLXSW_REG_RICNT_LEN];
277 	unsigned int *p_counter_index;
278 	int err;
279 
280 	if (!mlxsw_sp_rif_counter_valid_get(rif, dir))
281 		return -EINVAL;
282 
283 	p_counter_index = mlxsw_sp_rif_p_counter_get(rif, dir);
284 	if (!p_counter_index)
285 		return -EINVAL;
286 
287 	mlxsw_reg_ricnt_pack(ricnt_pl, *p_counter_index,
288 			     MLXSW_REG_RICNT_OPCODE_CLEAR);
289 	err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(ricnt), ricnt_pl);
290 	if (err)
291 		return err;
292 
293 	if (!set)
294 		return 0;
295 
296 #define MLXSW_SP_RIF_COUNTER_EXTRACT(NAME)				\
297 		(set->NAME = mlxsw_reg_ricnt_ ## NAME ## _get(ricnt_pl))
298 
299 	MLXSW_SP_RIF_COUNTER_EXTRACT(good_unicast_packets);
300 	MLXSW_SP_RIF_COUNTER_EXTRACT(good_multicast_packets);
301 	MLXSW_SP_RIF_COUNTER_EXTRACT(good_broadcast_packets);
302 	MLXSW_SP_RIF_COUNTER_EXTRACT(good_unicast_bytes);
303 	MLXSW_SP_RIF_COUNTER_EXTRACT(good_multicast_bytes);
304 	MLXSW_SP_RIF_COUNTER_EXTRACT(good_broadcast_bytes);
305 	MLXSW_SP_RIF_COUNTER_EXTRACT(error_packets);
306 	MLXSW_SP_RIF_COUNTER_EXTRACT(discard_packets);
307 	MLXSW_SP_RIF_COUNTER_EXTRACT(error_bytes);
308 	MLXSW_SP_RIF_COUNTER_EXTRACT(discard_bytes);
309 
310 #undef MLXSW_SP_RIF_COUNTER_EXTRACT
311 
312 	return 0;
313 }
314 
315 static int mlxsw_sp_rif_counter_clear(struct mlxsw_sp *mlxsw_sp,
316 				      unsigned int counter_index)
317 {
318 	char ricnt_pl[MLXSW_REG_RICNT_LEN];
319 
320 	mlxsw_reg_ricnt_pack(ricnt_pl, counter_index,
321 			     MLXSW_REG_RICNT_OPCODE_CLEAR);
322 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ricnt), ricnt_pl);
323 }
324 
325 int mlxsw_sp_rif_counter_alloc(struct mlxsw_sp_rif *rif,
326 			       enum mlxsw_sp_rif_counter_dir dir)
327 {
328 	struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
329 	unsigned int *p_counter_index;
330 	int err;
331 
332 	if (mlxsw_sp_rif_counter_valid_get(rif, dir))
333 		return 0;
334 
335 	p_counter_index = mlxsw_sp_rif_p_counter_get(rif, dir);
336 	if (!p_counter_index)
337 		return -EINVAL;
338 
339 	err = mlxsw_sp_counter_alloc(mlxsw_sp, MLXSW_SP_COUNTER_SUB_POOL_RIF,
340 				     p_counter_index);
341 	if (err)
342 		return err;
343 
344 	err = mlxsw_sp_rif_counter_clear(mlxsw_sp, *p_counter_index);
345 	if (err)
346 		goto err_counter_clear;
347 
348 	err = mlxsw_sp_rif_counter_edit(mlxsw_sp, rif->rif_index,
349 					*p_counter_index, true, dir);
350 	if (err)
351 		goto err_counter_edit;
352 	mlxsw_sp_rif_counter_valid_set(rif, dir, true);
353 	return 0;
354 
355 err_counter_edit:
356 err_counter_clear:
357 	mlxsw_sp_counter_free(mlxsw_sp, MLXSW_SP_COUNTER_SUB_POOL_RIF,
358 			      *p_counter_index);
359 	return err;
360 }
361 
362 void mlxsw_sp_rif_counter_free(struct mlxsw_sp_rif *rif,
363 			       enum mlxsw_sp_rif_counter_dir dir)
364 {
365 	struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
366 	unsigned int *p_counter_index;
367 
368 	if (!mlxsw_sp_rif_counter_valid_get(rif, dir))
369 		return;
370 
371 	p_counter_index = mlxsw_sp_rif_p_counter_get(rif, dir);
372 	if (WARN_ON(!p_counter_index))
373 		return;
374 	mlxsw_sp_rif_counter_edit(mlxsw_sp, rif->rif_index,
375 				  *p_counter_index, false, dir);
376 	mlxsw_sp_counter_free(mlxsw_sp, MLXSW_SP_COUNTER_SUB_POOL_RIF,
377 			      *p_counter_index);
378 	mlxsw_sp_rif_counter_valid_set(rif, dir, false);
379 }
380 
381 static void mlxsw_sp_rif_counters_alloc(struct mlxsw_sp_rif *rif)
382 {
383 	struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
384 	struct devlink *devlink;
385 
386 	devlink = priv_to_devlink(mlxsw_sp->core);
387 	if (!devlink_dpipe_table_counter_enabled(devlink,
388 						 MLXSW_SP_DPIPE_TABLE_NAME_ERIF))
389 		return;
390 	mlxsw_sp_rif_counter_alloc(rif, MLXSW_SP_RIF_COUNTER_EGRESS);
391 }
392 
393 static void mlxsw_sp_rif_counters_free(struct mlxsw_sp_rif *rif)
394 {
395 	mlxsw_sp_rif_counter_free(rif, MLXSW_SP_RIF_COUNTER_EGRESS);
396 }
397 
398 #define MLXSW_SP_PREFIX_COUNT (sizeof(struct in6_addr) * BITS_PER_BYTE + 1)
399 
400 struct mlxsw_sp_prefix_usage {
401 	DECLARE_BITMAP(b, MLXSW_SP_PREFIX_COUNT);
402 };
403 
404 #define mlxsw_sp_prefix_usage_for_each(prefix, prefix_usage) \
405 	for_each_set_bit(prefix, (prefix_usage)->b, MLXSW_SP_PREFIX_COUNT)
406 
407 static bool
408 mlxsw_sp_prefix_usage_eq(struct mlxsw_sp_prefix_usage *prefix_usage1,
409 			 struct mlxsw_sp_prefix_usage *prefix_usage2)
410 {
411 	return !memcmp(prefix_usage1, prefix_usage2, sizeof(*prefix_usage1));
412 }
413 
414 static void
415 mlxsw_sp_prefix_usage_cpy(struct mlxsw_sp_prefix_usage *prefix_usage1,
416 			  struct mlxsw_sp_prefix_usage *prefix_usage2)
417 {
418 	memcpy(prefix_usage1, prefix_usage2, sizeof(*prefix_usage1));
419 }
420 
421 static void
422 mlxsw_sp_prefix_usage_set(struct mlxsw_sp_prefix_usage *prefix_usage,
423 			  unsigned char prefix_len)
424 {
425 	set_bit(prefix_len, prefix_usage->b);
426 }
427 
428 static void
429 mlxsw_sp_prefix_usage_clear(struct mlxsw_sp_prefix_usage *prefix_usage,
430 			    unsigned char prefix_len)
431 {
432 	clear_bit(prefix_len, prefix_usage->b);
433 }
434 
435 struct mlxsw_sp_fib_key {
436 	unsigned char addr[sizeof(struct in6_addr)];
437 	unsigned char prefix_len;
438 };
439 
440 enum mlxsw_sp_fib_entry_type {
441 	MLXSW_SP_FIB_ENTRY_TYPE_REMOTE,
442 	MLXSW_SP_FIB_ENTRY_TYPE_LOCAL,
443 	MLXSW_SP_FIB_ENTRY_TYPE_TRAP,
444 	MLXSW_SP_FIB_ENTRY_TYPE_BLACKHOLE,
445 	MLXSW_SP_FIB_ENTRY_TYPE_UNREACHABLE,
446 
447 	/* This is a special case of local delivery, where a packet should be
448 	 * decapsulated on reception. Note that there is no corresponding ENCAP,
449 	 * because that's a type of next hop, not of FIB entry. (There can be
450 	 * several next hops in a REMOTE entry, and some of them may be
451 	 * encapsulating entries.)
452 	 */
453 	MLXSW_SP_FIB_ENTRY_TYPE_IPIP_DECAP,
454 	MLXSW_SP_FIB_ENTRY_TYPE_NVE_DECAP,
455 };
456 
457 struct mlxsw_sp_nexthop_group_info;
458 struct mlxsw_sp_nexthop_group;
459 struct mlxsw_sp_fib_entry;
460 
461 struct mlxsw_sp_fib_node {
462 	struct mlxsw_sp_fib_entry *fib_entry;
463 	struct list_head list;
464 	struct rhash_head ht_node;
465 	struct mlxsw_sp_fib *fib;
466 	struct mlxsw_sp_fib_key key;
467 };
468 
469 struct mlxsw_sp_fib_entry_decap {
470 	struct mlxsw_sp_ipip_entry *ipip_entry;
471 	u32 tunnel_index;
472 };
473 
474 struct mlxsw_sp_fib_entry {
475 	struct mlxsw_sp_fib_node *fib_node;
476 	enum mlxsw_sp_fib_entry_type type;
477 	struct list_head nexthop_group_node;
478 	struct mlxsw_sp_nexthop_group *nh_group;
479 	struct mlxsw_sp_fib_entry_decap decap; /* Valid for decap entries. */
480 };
481 
482 struct mlxsw_sp_fib4_entry {
483 	struct mlxsw_sp_fib_entry common;
484 	struct fib_info *fi;
485 	u32 tb_id;
486 	dscp_t dscp;
487 	u8 type;
488 };
489 
490 struct mlxsw_sp_fib6_entry {
491 	struct mlxsw_sp_fib_entry common;
492 	struct list_head rt6_list;
493 	unsigned int nrt6;
494 };
495 
496 struct mlxsw_sp_rt6 {
497 	struct list_head list;
498 	struct fib6_info *rt;
499 };
500 
501 struct mlxsw_sp_lpm_tree {
502 	u8 id; /* tree ID */
503 	unsigned int ref_count;
504 	enum mlxsw_sp_l3proto proto;
505 	unsigned long prefix_ref_count[MLXSW_SP_PREFIX_COUNT];
506 	struct mlxsw_sp_prefix_usage prefix_usage;
507 };
508 
509 struct mlxsw_sp_fib {
510 	struct rhashtable ht;
511 	struct list_head node_list;
512 	struct mlxsw_sp_vr *vr;
513 	struct mlxsw_sp_lpm_tree *lpm_tree;
514 	enum mlxsw_sp_l3proto proto;
515 };
516 
517 struct mlxsw_sp_vr {
518 	u16 id; /* virtual router ID */
519 	u32 tb_id; /* kernel fib table id */
520 	unsigned int rif_count;
521 	struct mlxsw_sp_fib *fib4;
522 	struct mlxsw_sp_fib *fib6;
523 	struct mlxsw_sp_mr_table *mr_table[MLXSW_SP_L3_PROTO_MAX];
524 	struct mlxsw_sp_rif *ul_rif;
525 	refcount_t ul_rif_refcnt;
526 };
527 
528 static const struct rhashtable_params mlxsw_sp_fib_ht_params;
529 
530 static struct mlxsw_sp_fib *mlxsw_sp_fib_create(struct mlxsw_sp *mlxsw_sp,
531 						struct mlxsw_sp_vr *vr,
532 						enum mlxsw_sp_l3proto proto)
533 {
534 	struct mlxsw_sp_lpm_tree *lpm_tree;
535 	struct mlxsw_sp_fib *fib;
536 	int err;
537 
538 	lpm_tree = mlxsw_sp->router->lpm.proto_trees[proto];
539 	fib = kzalloc(sizeof(*fib), GFP_KERNEL);
540 	if (!fib)
541 		return ERR_PTR(-ENOMEM);
542 	err = rhashtable_init(&fib->ht, &mlxsw_sp_fib_ht_params);
543 	if (err)
544 		goto err_rhashtable_init;
545 	INIT_LIST_HEAD(&fib->node_list);
546 	fib->proto = proto;
547 	fib->vr = vr;
548 	fib->lpm_tree = lpm_tree;
549 	mlxsw_sp_lpm_tree_hold(lpm_tree);
550 	err = mlxsw_sp_vr_lpm_tree_bind(mlxsw_sp, fib, lpm_tree->id);
551 	if (err)
552 		goto err_lpm_tree_bind;
553 	return fib;
554 
555 err_lpm_tree_bind:
556 	mlxsw_sp_lpm_tree_put(mlxsw_sp, lpm_tree);
557 err_rhashtable_init:
558 	kfree(fib);
559 	return ERR_PTR(err);
560 }
561 
562 static void mlxsw_sp_fib_destroy(struct mlxsw_sp *mlxsw_sp,
563 				 struct mlxsw_sp_fib *fib)
564 {
565 	mlxsw_sp_vr_lpm_tree_unbind(mlxsw_sp, fib);
566 	mlxsw_sp_lpm_tree_put(mlxsw_sp, fib->lpm_tree);
567 	WARN_ON(!list_empty(&fib->node_list));
568 	rhashtable_destroy(&fib->ht);
569 	kfree(fib);
570 }
571 
572 static struct mlxsw_sp_lpm_tree *
573 mlxsw_sp_lpm_tree_find_unused(struct mlxsw_sp *mlxsw_sp)
574 {
575 	static struct mlxsw_sp_lpm_tree *lpm_tree;
576 	int i;
577 
578 	for (i = 0; i < mlxsw_sp->router->lpm.tree_count; i++) {
579 		lpm_tree = &mlxsw_sp->router->lpm.trees[i];
580 		if (lpm_tree->ref_count == 0)
581 			return lpm_tree;
582 	}
583 	return NULL;
584 }
585 
586 static int mlxsw_sp_lpm_tree_alloc(struct mlxsw_sp *mlxsw_sp,
587 				   struct mlxsw_sp_lpm_tree *lpm_tree)
588 {
589 	char ralta_pl[MLXSW_REG_RALTA_LEN];
590 
591 	mlxsw_reg_ralta_pack(ralta_pl, true,
592 			     (enum mlxsw_reg_ralxx_protocol) lpm_tree->proto,
593 			     lpm_tree->id);
594 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralta), ralta_pl);
595 }
596 
597 static void mlxsw_sp_lpm_tree_free(struct mlxsw_sp *mlxsw_sp,
598 				   struct mlxsw_sp_lpm_tree *lpm_tree)
599 {
600 	char ralta_pl[MLXSW_REG_RALTA_LEN];
601 
602 	mlxsw_reg_ralta_pack(ralta_pl, false,
603 			     (enum mlxsw_reg_ralxx_protocol) lpm_tree->proto,
604 			     lpm_tree->id);
605 	mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralta), ralta_pl);
606 }
607 
608 static int
609 mlxsw_sp_lpm_tree_left_struct_set(struct mlxsw_sp *mlxsw_sp,
610 				  struct mlxsw_sp_prefix_usage *prefix_usage,
611 				  struct mlxsw_sp_lpm_tree *lpm_tree)
612 {
613 	char ralst_pl[MLXSW_REG_RALST_LEN];
614 	u8 root_bin = 0;
615 	u8 prefix;
616 	u8 last_prefix = MLXSW_REG_RALST_BIN_NO_CHILD;
617 
618 	mlxsw_sp_prefix_usage_for_each(prefix, prefix_usage)
619 		root_bin = prefix;
620 
621 	mlxsw_reg_ralst_pack(ralst_pl, root_bin, lpm_tree->id);
622 	mlxsw_sp_prefix_usage_for_each(prefix, prefix_usage) {
623 		if (prefix == 0)
624 			continue;
625 		mlxsw_reg_ralst_bin_pack(ralst_pl, prefix, last_prefix,
626 					 MLXSW_REG_RALST_BIN_NO_CHILD);
627 		last_prefix = prefix;
628 	}
629 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralst), ralst_pl);
630 }
631 
632 static struct mlxsw_sp_lpm_tree *
633 mlxsw_sp_lpm_tree_create(struct mlxsw_sp *mlxsw_sp,
634 			 struct mlxsw_sp_prefix_usage *prefix_usage,
635 			 enum mlxsw_sp_l3proto proto)
636 {
637 	struct mlxsw_sp_lpm_tree *lpm_tree;
638 	int err;
639 
640 	lpm_tree = mlxsw_sp_lpm_tree_find_unused(mlxsw_sp);
641 	if (!lpm_tree)
642 		return ERR_PTR(-EBUSY);
643 	lpm_tree->proto = proto;
644 	err = mlxsw_sp_lpm_tree_alloc(mlxsw_sp, lpm_tree);
645 	if (err)
646 		return ERR_PTR(err);
647 
648 	err = mlxsw_sp_lpm_tree_left_struct_set(mlxsw_sp, prefix_usage,
649 						lpm_tree);
650 	if (err)
651 		goto err_left_struct_set;
652 	memcpy(&lpm_tree->prefix_usage, prefix_usage,
653 	       sizeof(lpm_tree->prefix_usage));
654 	memset(&lpm_tree->prefix_ref_count, 0,
655 	       sizeof(lpm_tree->prefix_ref_count));
656 	lpm_tree->ref_count = 1;
657 	return lpm_tree;
658 
659 err_left_struct_set:
660 	mlxsw_sp_lpm_tree_free(mlxsw_sp, lpm_tree);
661 	return ERR_PTR(err);
662 }
663 
664 static void mlxsw_sp_lpm_tree_destroy(struct mlxsw_sp *mlxsw_sp,
665 				      struct mlxsw_sp_lpm_tree *lpm_tree)
666 {
667 	mlxsw_sp_lpm_tree_free(mlxsw_sp, lpm_tree);
668 }
669 
670 static struct mlxsw_sp_lpm_tree *
671 mlxsw_sp_lpm_tree_get(struct mlxsw_sp *mlxsw_sp,
672 		      struct mlxsw_sp_prefix_usage *prefix_usage,
673 		      enum mlxsw_sp_l3proto proto)
674 {
675 	struct mlxsw_sp_lpm_tree *lpm_tree;
676 	int i;
677 
678 	for (i = 0; i < mlxsw_sp->router->lpm.tree_count; i++) {
679 		lpm_tree = &mlxsw_sp->router->lpm.trees[i];
680 		if (lpm_tree->ref_count != 0 &&
681 		    lpm_tree->proto == proto &&
682 		    mlxsw_sp_prefix_usage_eq(&lpm_tree->prefix_usage,
683 					     prefix_usage)) {
684 			mlxsw_sp_lpm_tree_hold(lpm_tree);
685 			return lpm_tree;
686 		}
687 	}
688 	return mlxsw_sp_lpm_tree_create(mlxsw_sp, prefix_usage, proto);
689 }
690 
691 static void mlxsw_sp_lpm_tree_hold(struct mlxsw_sp_lpm_tree *lpm_tree)
692 {
693 	lpm_tree->ref_count++;
694 }
695 
696 static void mlxsw_sp_lpm_tree_put(struct mlxsw_sp *mlxsw_sp,
697 				  struct mlxsw_sp_lpm_tree *lpm_tree)
698 {
699 	if (--lpm_tree->ref_count == 0)
700 		mlxsw_sp_lpm_tree_destroy(mlxsw_sp, lpm_tree);
701 }
702 
703 #define MLXSW_SP_LPM_TREE_MIN 1 /* tree 0 is reserved */
704 
705 static int mlxsw_sp_lpm_init(struct mlxsw_sp *mlxsw_sp)
706 {
707 	struct mlxsw_sp_prefix_usage req_prefix_usage = {{ 0 } };
708 	struct mlxsw_sp_lpm_tree *lpm_tree;
709 	u64 max_trees;
710 	int err, i;
711 
712 	if (!MLXSW_CORE_RES_VALID(mlxsw_sp->core, MAX_LPM_TREES))
713 		return -EIO;
714 
715 	max_trees = MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_LPM_TREES);
716 	mlxsw_sp->router->lpm.tree_count = max_trees - MLXSW_SP_LPM_TREE_MIN;
717 	mlxsw_sp->router->lpm.trees = kcalloc(mlxsw_sp->router->lpm.tree_count,
718 					     sizeof(struct mlxsw_sp_lpm_tree),
719 					     GFP_KERNEL);
720 	if (!mlxsw_sp->router->lpm.trees)
721 		return -ENOMEM;
722 
723 	for (i = 0; i < mlxsw_sp->router->lpm.tree_count; i++) {
724 		lpm_tree = &mlxsw_sp->router->lpm.trees[i];
725 		lpm_tree->id = i + MLXSW_SP_LPM_TREE_MIN;
726 	}
727 
728 	lpm_tree = mlxsw_sp_lpm_tree_get(mlxsw_sp, &req_prefix_usage,
729 					 MLXSW_SP_L3_PROTO_IPV4);
730 	if (IS_ERR(lpm_tree)) {
731 		err = PTR_ERR(lpm_tree);
732 		goto err_ipv4_tree_get;
733 	}
734 	mlxsw_sp->router->lpm.proto_trees[MLXSW_SP_L3_PROTO_IPV4] = lpm_tree;
735 
736 	lpm_tree = mlxsw_sp_lpm_tree_get(mlxsw_sp, &req_prefix_usage,
737 					 MLXSW_SP_L3_PROTO_IPV6);
738 	if (IS_ERR(lpm_tree)) {
739 		err = PTR_ERR(lpm_tree);
740 		goto err_ipv6_tree_get;
741 	}
742 	mlxsw_sp->router->lpm.proto_trees[MLXSW_SP_L3_PROTO_IPV6] = lpm_tree;
743 
744 	return 0;
745 
746 err_ipv6_tree_get:
747 	lpm_tree = mlxsw_sp->router->lpm.proto_trees[MLXSW_SP_L3_PROTO_IPV4];
748 	mlxsw_sp_lpm_tree_put(mlxsw_sp, lpm_tree);
749 err_ipv4_tree_get:
750 	kfree(mlxsw_sp->router->lpm.trees);
751 	return err;
752 }
753 
754 static void mlxsw_sp_lpm_fini(struct mlxsw_sp *mlxsw_sp)
755 {
756 	struct mlxsw_sp_lpm_tree *lpm_tree;
757 
758 	lpm_tree = mlxsw_sp->router->lpm.proto_trees[MLXSW_SP_L3_PROTO_IPV6];
759 	mlxsw_sp_lpm_tree_put(mlxsw_sp, lpm_tree);
760 
761 	lpm_tree = mlxsw_sp->router->lpm.proto_trees[MLXSW_SP_L3_PROTO_IPV4];
762 	mlxsw_sp_lpm_tree_put(mlxsw_sp, lpm_tree);
763 
764 	kfree(mlxsw_sp->router->lpm.trees);
765 }
766 
767 static bool mlxsw_sp_vr_is_used(const struct mlxsw_sp_vr *vr)
768 {
769 	return !!vr->fib4 || !!vr->fib6 ||
770 	       !!vr->mr_table[MLXSW_SP_L3_PROTO_IPV4] ||
771 	       !!vr->mr_table[MLXSW_SP_L3_PROTO_IPV6];
772 }
773 
774 static struct mlxsw_sp_vr *mlxsw_sp_vr_find_unused(struct mlxsw_sp *mlxsw_sp)
775 {
776 	int max_vrs = MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_VRS);
777 	struct mlxsw_sp_vr *vr;
778 	int i;
779 
780 	for (i = 0; i < max_vrs; i++) {
781 		vr = &mlxsw_sp->router->vrs[i];
782 		if (!mlxsw_sp_vr_is_used(vr))
783 			return vr;
784 	}
785 	return NULL;
786 }
787 
788 static int mlxsw_sp_vr_lpm_tree_bind(struct mlxsw_sp *mlxsw_sp,
789 				     const struct mlxsw_sp_fib *fib, u8 tree_id)
790 {
791 	char raltb_pl[MLXSW_REG_RALTB_LEN];
792 
793 	mlxsw_reg_raltb_pack(raltb_pl, fib->vr->id,
794 			     (enum mlxsw_reg_ralxx_protocol) fib->proto,
795 			     tree_id);
796 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(raltb), raltb_pl);
797 }
798 
799 static int mlxsw_sp_vr_lpm_tree_unbind(struct mlxsw_sp *mlxsw_sp,
800 				       const struct mlxsw_sp_fib *fib)
801 {
802 	char raltb_pl[MLXSW_REG_RALTB_LEN];
803 
804 	/* Bind to tree 0 which is default */
805 	mlxsw_reg_raltb_pack(raltb_pl, fib->vr->id,
806 			     (enum mlxsw_reg_ralxx_protocol) fib->proto, 0);
807 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(raltb), raltb_pl);
808 }
809 
810 static u32 mlxsw_sp_fix_tb_id(u32 tb_id)
811 {
812 	/* For our purpose, squash main, default and local tables into one */
813 	if (tb_id == RT_TABLE_LOCAL || tb_id == RT_TABLE_DEFAULT)
814 		tb_id = RT_TABLE_MAIN;
815 	return tb_id;
816 }
817 
818 static struct mlxsw_sp_vr *mlxsw_sp_vr_find(struct mlxsw_sp *mlxsw_sp,
819 					    u32 tb_id)
820 {
821 	int max_vrs = MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_VRS);
822 	struct mlxsw_sp_vr *vr;
823 	int i;
824 
825 	tb_id = mlxsw_sp_fix_tb_id(tb_id);
826 
827 	for (i = 0; i < max_vrs; i++) {
828 		vr = &mlxsw_sp->router->vrs[i];
829 		if (mlxsw_sp_vr_is_used(vr) && vr->tb_id == tb_id)
830 			return vr;
831 	}
832 	return NULL;
833 }
834 
835 int mlxsw_sp_router_tb_id_vr_id(struct mlxsw_sp *mlxsw_sp, u32 tb_id,
836 				u16 *vr_id)
837 {
838 	struct mlxsw_sp_vr *vr;
839 	int err = 0;
840 
841 	mutex_lock(&mlxsw_sp->router->lock);
842 	vr = mlxsw_sp_vr_find(mlxsw_sp, tb_id);
843 	if (!vr) {
844 		err = -ESRCH;
845 		goto out;
846 	}
847 	*vr_id = vr->id;
848 out:
849 	mutex_unlock(&mlxsw_sp->router->lock);
850 	return err;
851 }
852 
853 static struct mlxsw_sp_fib *mlxsw_sp_vr_fib(const struct mlxsw_sp_vr *vr,
854 					    enum mlxsw_sp_l3proto proto)
855 {
856 	switch (proto) {
857 	case MLXSW_SP_L3_PROTO_IPV4:
858 		return vr->fib4;
859 	case MLXSW_SP_L3_PROTO_IPV6:
860 		return vr->fib6;
861 	}
862 	return NULL;
863 }
864 
865 static struct mlxsw_sp_vr *mlxsw_sp_vr_create(struct mlxsw_sp *mlxsw_sp,
866 					      u32 tb_id,
867 					      struct netlink_ext_ack *extack)
868 {
869 	struct mlxsw_sp_mr_table *mr4_table, *mr6_table;
870 	struct mlxsw_sp_fib *fib4;
871 	struct mlxsw_sp_fib *fib6;
872 	struct mlxsw_sp_vr *vr;
873 	int err;
874 
875 	vr = mlxsw_sp_vr_find_unused(mlxsw_sp);
876 	if (!vr) {
877 		NL_SET_ERR_MSG_MOD(extack, "Exceeded number of supported virtual routers");
878 		return ERR_PTR(-EBUSY);
879 	}
880 	fib4 = mlxsw_sp_fib_create(mlxsw_sp, vr, MLXSW_SP_L3_PROTO_IPV4);
881 	if (IS_ERR(fib4))
882 		return ERR_CAST(fib4);
883 	fib6 = mlxsw_sp_fib_create(mlxsw_sp, vr, MLXSW_SP_L3_PROTO_IPV6);
884 	if (IS_ERR(fib6)) {
885 		err = PTR_ERR(fib6);
886 		goto err_fib6_create;
887 	}
888 	mr4_table = mlxsw_sp_mr_table_create(mlxsw_sp, vr->id,
889 					     MLXSW_SP_L3_PROTO_IPV4);
890 	if (IS_ERR(mr4_table)) {
891 		err = PTR_ERR(mr4_table);
892 		goto err_mr4_table_create;
893 	}
894 	mr6_table = mlxsw_sp_mr_table_create(mlxsw_sp, vr->id,
895 					     MLXSW_SP_L3_PROTO_IPV6);
896 	if (IS_ERR(mr6_table)) {
897 		err = PTR_ERR(mr6_table);
898 		goto err_mr6_table_create;
899 	}
900 
901 	vr->fib4 = fib4;
902 	vr->fib6 = fib6;
903 	vr->mr_table[MLXSW_SP_L3_PROTO_IPV4] = mr4_table;
904 	vr->mr_table[MLXSW_SP_L3_PROTO_IPV6] = mr6_table;
905 	vr->tb_id = tb_id;
906 	return vr;
907 
908 err_mr6_table_create:
909 	mlxsw_sp_mr_table_destroy(mr4_table);
910 err_mr4_table_create:
911 	mlxsw_sp_fib_destroy(mlxsw_sp, fib6);
912 err_fib6_create:
913 	mlxsw_sp_fib_destroy(mlxsw_sp, fib4);
914 	return ERR_PTR(err);
915 }
916 
917 static void mlxsw_sp_vr_destroy(struct mlxsw_sp *mlxsw_sp,
918 				struct mlxsw_sp_vr *vr)
919 {
920 	mlxsw_sp_mr_table_destroy(vr->mr_table[MLXSW_SP_L3_PROTO_IPV6]);
921 	vr->mr_table[MLXSW_SP_L3_PROTO_IPV6] = NULL;
922 	mlxsw_sp_mr_table_destroy(vr->mr_table[MLXSW_SP_L3_PROTO_IPV4]);
923 	vr->mr_table[MLXSW_SP_L3_PROTO_IPV4] = NULL;
924 	mlxsw_sp_fib_destroy(mlxsw_sp, vr->fib6);
925 	vr->fib6 = NULL;
926 	mlxsw_sp_fib_destroy(mlxsw_sp, vr->fib4);
927 	vr->fib4 = NULL;
928 }
929 
930 static struct mlxsw_sp_vr *mlxsw_sp_vr_get(struct mlxsw_sp *mlxsw_sp, u32 tb_id,
931 					   struct netlink_ext_ack *extack)
932 {
933 	struct mlxsw_sp_vr *vr;
934 
935 	tb_id = mlxsw_sp_fix_tb_id(tb_id);
936 	vr = mlxsw_sp_vr_find(mlxsw_sp, tb_id);
937 	if (!vr)
938 		vr = mlxsw_sp_vr_create(mlxsw_sp, tb_id, extack);
939 	return vr;
940 }
941 
942 static void mlxsw_sp_vr_put(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_vr *vr)
943 {
944 	if (!vr->rif_count && list_empty(&vr->fib4->node_list) &&
945 	    list_empty(&vr->fib6->node_list) &&
946 	    mlxsw_sp_mr_table_empty(vr->mr_table[MLXSW_SP_L3_PROTO_IPV4]) &&
947 	    mlxsw_sp_mr_table_empty(vr->mr_table[MLXSW_SP_L3_PROTO_IPV6]))
948 		mlxsw_sp_vr_destroy(mlxsw_sp, vr);
949 }
950 
951 static bool
952 mlxsw_sp_vr_lpm_tree_should_replace(struct mlxsw_sp_vr *vr,
953 				    enum mlxsw_sp_l3proto proto, u8 tree_id)
954 {
955 	struct mlxsw_sp_fib *fib = mlxsw_sp_vr_fib(vr, proto);
956 
957 	if (!mlxsw_sp_vr_is_used(vr))
958 		return false;
959 	if (fib->lpm_tree->id == tree_id)
960 		return true;
961 	return false;
962 }
963 
964 static int mlxsw_sp_vr_lpm_tree_replace(struct mlxsw_sp *mlxsw_sp,
965 					struct mlxsw_sp_fib *fib,
966 					struct mlxsw_sp_lpm_tree *new_tree)
967 {
968 	struct mlxsw_sp_lpm_tree *old_tree = fib->lpm_tree;
969 	int err;
970 
971 	fib->lpm_tree = new_tree;
972 	mlxsw_sp_lpm_tree_hold(new_tree);
973 	err = mlxsw_sp_vr_lpm_tree_bind(mlxsw_sp, fib, new_tree->id);
974 	if (err)
975 		goto err_tree_bind;
976 	mlxsw_sp_lpm_tree_put(mlxsw_sp, old_tree);
977 	return 0;
978 
979 err_tree_bind:
980 	mlxsw_sp_lpm_tree_put(mlxsw_sp, new_tree);
981 	fib->lpm_tree = old_tree;
982 	return err;
983 }
984 
985 static int mlxsw_sp_vrs_lpm_tree_replace(struct mlxsw_sp *mlxsw_sp,
986 					 struct mlxsw_sp_fib *fib,
987 					 struct mlxsw_sp_lpm_tree *new_tree)
988 {
989 	int max_vrs = MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_VRS);
990 	enum mlxsw_sp_l3proto proto = fib->proto;
991 	struct mlxsw_sp_lpm_tree *old_tree;
992 	u8 old_id, new_id = new_tree->id;
993 	struct mlxsw_sp_vr *vr;
994 	int i, err;
995 
996 	old_tree = mlxsw_sp->router->lpm.proto_trees[proto];
997 	old_id = old_tree->id;
998 
999 	for (i = 0; i < max_vrs; i++) {
1000 		vr = &mlxsw_sp->router->vrs[i];
1001 		if (!mlxsw_sp_vr_lpm_tree_should_replace(vr, proto, old_id))
1002 			continue;
1003 		err = mlxsw_sp_vr_lpm_tree_replace(mlxsw_sp,
1004 						   mlxsw_sp_vr_fib(vr, proto),
1005 						   new_tree);
1006 		if (err)
1007 			goto err_tree_replace;
1008 	}
1009 
1010 	memcpy(new_tree->prefix_ref_count, old_tree->prefix_ref_count,
1011 	       sizeof(new_tree->prefix_ref_count));
1012 	mlxsw_sp->router->lpm.proto_trees[proto] = new_tree;
1013 	mlxsw_sp_lpm_tree_put(mlxsw_sp, old_tree);
1014 
1015 	return 0;
1016 
1017 err_tree_replace:
1018 	for (i--; i >= 0; i--) {
1019 		if (!mlxsw_sp_vr_lpm_tree_should_replace(vr, proto, new_id))
1020 			continue;
1021 		mlxsw_sp_vr_lpm_tree_replace(mlxsw_sp,
1022 					     mlxsw_sp_vr_fib(vr, proto),
1023 					     old_tree);
1024 	}
1025 	return err;
1026 }
1027 
1028 static int mlxsw_sp_vrs_init(struct mlxsw_sp *mlxsw_sp)
1029 {
1030 	struct mlxsw_sp_vr *vr;
1031 	u64 max_vrs;
1032 	int i;
1033 
1034 	if (!MLXSW_CORE_RES_VALID(mlxsw_sp->core, MAX_VRS))
1035 		return -EIO;
1036 
1037 	max_vrs = MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_VRS);
1038 	mlxsw_sp->router->vrs = kcalloc(max_vrs, sizeof(struct mlxsw_sp_vr),
1039 					GFP_KERNEL);
1040 	if (!mlxsw_sp->router->vrs)
1041 		return -ENOMEM;
1042 
1043 	for (i = 0; i < max_vrs; i++) {
1044 		vr = &mlxsw_sp->router->vrs[i];
1045 		vr->id = i;
1046 	}
1047 
1048 	return 0;
1049 }
1050 
1051 static void mlxsw_sp_router_fib_flush(struct mlxsw_sp *mlxsw_sp);
1052 
1053 static void mlxsw_sp_vrs_fini(struct mlxsw_sp *mlxsw_sp)
1054 {
1055 	/* At this stage we're guaranteed not to have new incoming
1056 	 * FIB notifications and the work queue is free from FIBs
1057 	 * sitting on top of mlxsw netdevs. However, we can still
1058 	 * have other FIBs queued. Flush the queue before flushing
1059 	 * the device's tables. No need for locks, as we're the only
1060 	 * writer.
1061 	 */
1062 	mlxsw_core_flush_owq();
1063 	mlxsw_sp_router_fib_flush(mlxsw_sp);
1064 	kfree(mlxsw_sp->router->vrs);
1065 }
1066 
1067 u32 mlxsw_sp_ipip_dev_ul_tb_id(const struct net_device *ol_dev)
1068 {
1069 	struct net_device *d;
1070 	u32 tb_id;
1071 
1072 	rcu_read_lock();
1073 	d = mlxsw_sp_ipip_netdev_ul_dev_get(ol_dev);
1074 	if (d)
1075 		tb_id = l3mdev_fib_table(d) ? : RT_TABLE_MAIN;
1076 	else
1077 		tb_id = RT_TABLE_MAIN;
1078 	rcu_read_unlock();
1079 
1080 	return tb_id;
1081 }
1082 
1083 static void
1084 mlxsw_sp_crif_init(struct mlxsw_sp_crif *crif, struct net_device *dev)
1085 {
1086 	crif->key.dev = dev;
1087 	INIT_LIST_HEAD(&crif->nexthop_list);
1088 }
1089 
1090 static struct mlxsw_sp_crif *
1091 mlxsw_sp_crif_alloc(struct net_device *dev)
1092 {
1093 	struct mlxsw_sp_crif *crif;
1094 
1095 	crif = kzalloc(sizeof(*crif), GFP_KERNEL);
1096 	if (!crif)
1097 		return NULL;
1098 
1099 	mlxsw_sp_crif_init(crif, dev);
1100 	return crif;
1101 }
1102 
1103 static void mlxsw_sp_crif_free(struct mlxsw_sp_crif *crif)
1104 {
1105 	if (WARN_ON(crif->rif))
1106 		return;
1107 
1108 	WARN_ON(!list_empty(&crif->nexthop_list));
1109 	kfree(crif);
1110 }
1111 
1112 static int mlxsw_sp_crif_insert(struct mlxsw_sp_router *router,
1113 				struct mlxsw_sp_crif *crif)
1114 {
1115 	return rhashtable_insert_fast(&router->crif_ht, &crif->ht_node,
1116 				      mlxsw_sp_crif_ht_params);
1117 }
1118 
1119 static void mlxsw_sp_crif_remove(struct mlxsw_sp_router *router,
1120 				 struct mlxsw_sp_crif *crif)
1121 {
1122 	rhashtable_remove_fast(&router->crif_ht, &crif->ht_node,
1123 			       mlxsw_sp_crif_ht_params);
1124 }
1125 
1126 static struct mlxsw_sp_crif *
1127 mlxsw_sp_crif_lookup(struct mlxsw_sp_router *router,
1128 		     const struct net_device *dev)
1129 {
1130 	struct mlxsw_sp_crif_key key = {
1131 		.dev = (struct net_device *)dev,
1132 	};
1133 
1134 	return rhashtable_lookup_fast(&router->crif_ht, &key,
1135 				      mlxsw_sp_crif_ht_params);
1136 }
1137 
1138 static struct mlxsw_sp_rif *
1139 mlxsw_sp_rif_create(struct mlxsw_sp *mlxsw_sp,
1140 		    const struct mlxsw_sp_rif_params *params,
1141 		    struct netlink_ext_ack *extack);
1142 
1143 static struct mlxsw_sp_rif_ipip_lb *
1144 mlxsw_sp_ipip_ol_ipip_lb_create(struct mlxsw_sp *mlxsw_sp,
1145 				enum mlxsw_sp_ipip_type ipipt,
1146 				struct net_device *ol_dev,
1147 				struct netlink_ext_ack *extack)
1148 {
1149 	struct mlxsw_sp_rif_params_ipip_lb lb_params;
1150 	const struct mlxsw_sp_ipip_ops *ipip_ops;
1151 	struct mlxsw_sp_rif *rif;
1152 
1153 	ipip_ops = mlxsw_sp->router->ipip_ops_arr[ipipt];
1154 	lb_params = (struct mlxsw_sp_rif_params_ipip_lb) {
1155 		.common.dev = ol_dev,
1156 		.common.lag = false,
1157 		.common.double_entry = ipip_ops->double_rif_entry,
1158 		.lb_config = ipip_ops->ol_loopback_config(mlxsw_sp, ol_dev),
1159 	};
1160 
1161 	rif = mlxsw_sp_rif_create(mlxsw_sp, &lb_params.common, extack);
1162 	if (IS_ERR(rif))
1163 		return ERR_CAST(rif);
1164 	return container_of(rif, struct mlxsw_sp_rif_ipip_lb, common);
1165 }
1166 
1167 static struct mlxsw_sp_ipip_entry *
1168 mlxsw_sp_ipip_entry_alloc(struct mlxsw_sp *mlxsw_sp,
1169 			  enum mlxsw_sp_ipip_type ipipt,
1170 			  struct net_device *ol_dev)
1171 {
1172 	const struct mlxsw_sp_ipip_ops *ipip_ops;
1173 	struct mlxsw_sp_ipip_entry *ipip_entry;
1174 	struct mlxsw_sp_ipip_entry *ret = NULL;
1175 	int err;
1176 
1177 	ipip_ops = mlxsw_sp->router->ipip_ops_arr[ipipt];
1178 	ipip_entry = kzalloc(sizeof(*ipip_entry), GFP_KERNEL);
1179 	if (!ipip_entry)
1180 		return ERR_PTR(-ENOMEM);
1181 
1182 	ipip_entry->ol_lb = mlxsw_sp_ipip_ol_ipip_lb_create(mlxsw_sp, ipipt,
1183 							    ol_dev, NULL);
1184 	if (IS_ERR(ipip_entry->ol_lb)) {
1185 		ret = ERR_CAST(ipip_entry->ol_lb);
1186 		goto err_ol_ipip_lb_create;
1187 	}
1188 
1189 	ipip_entry->ipipt = ipipt;
1190 	ipip_entry->ol_dev = ol_dev;
1191 	ipip_entry->parms = ipip_ops->parms_init(ol_dev);
1192 
1193 	err = ipip_ops->rem_ip_addr_set(mlxsw_sp, ipip_entry);
1194 	if (err) {
1195 		ret = ERR_PTR(err);
1196 		goto err_rem_ip_addr_set;
1197 	}
1198 
1199 	return ipip_entry;
1200 
1201 err_rem_ip_addr_set:
1202 	mlxsw_sp_rif_destroy(&ipip_entry->ol_lb->common);
1203 err_ol_ipip_lb_create:
1204 	kfree(ipip_entry);
1205 	return ret;
1206 }
1207 
1208 static void mlxsw_sp_ipip_entry_dealloc(struct mlxsw_sp *mlxsw_sp,
1209 					struct mlxsw_sp_ipip_entry *ipip_entry)
1210 {
1211 	const struct mlxsw_sp_ipip_ops *ipip_ops =
1212 		mlxsw_sp->router->ipip_ops_arr[ipip_entry->ipipt];
1213 
1214 	ipip_ops->rem_ip_addr_unset(mlxsw_sp, ipip_entry);
1215 	mlxsw_sp_rif_destroy(&ipip_entry->ol_lb->common);
1216 	kfree(ipip_entry);
1217 }
1218 
1219 static bool
1220 mlxsw_sp_ipip_entry_saddr_matches(struct mlxsw_sp *mlxsw_sp,
1221 				  const enum mlxsw_sp_l3proto ul_proto,
1222 				  union mlxsw_sp_l3addr saddr,
1223 				  u32 ul_tb_id,
1224 				  struct mlxsw_sp_ipip_entry *ipip_entry)
1225 {
1226 	u32 tun_ul_tb_id = mlxsw_sp_ipip_dev_ul_tb_id(ipip_entry->ol_dev);
1227 	enum mlxsw_sp_ipip_type ipipt = ipip_entry->ipipt;
1228 	union mlxsw_sp_l3addr tun_saddr;
1229 
1230 	if (mlxsw_sp->router->ipip_ops_arr[ipipt]->ul_proto != ul_proto)
1231 		return false;
1232 
1233 	tun_saddr = mlxsw_sp_ipip_netdev_saddr(ul_proto, ipip_entry->ol_dev);
1234 	return tun_ul_tb_id == ul_tb_id &&
1235 	       mlxsw_sp_l3addr_eq(&tun_saddr, &saddr);
1236 }
1237 
1238 static int mlxsw_sp_ipip_decap_parsing_depth_inc(struct mlxsw_sp *mlxsw_sp,
1239 						 enum mlxsw_sp_ipip_type ipipt)
1240 {
1241 	const struct mlxsw_sp_ipip_ops *ipip_ops;
1242 
1243 	ipip_ops = mlxsw_sp->router->ipip_ops_arr[ipipt];
1244 
1245 	/* Not all tunnels require to increase the default pasing depth
1246 	 * (96 bytes).
1247 	 */
1248 	if (ipip_ops->inc_parsing_depth)
1249 		return mlxsw_sp_parsing_depth_inc(mlxsw_sp);
1250 
1251 	return 0;
1252 }
1253 
1254 static void mlxsw_sp_ipip_decap_parsing_depth_dec(struct mlxsw_sp *mlxsw_sp,
1255 						  enum mlxsw_sp_ipip_type ipipt)
1256 {
1257 	const struct mlxsw_sp_ipip_ops *ipip_ops =
1258 		mlxsw_sp->router->ipip_ops_arr[ipipt];
1259 
1260 	if (ipip_ops->inc_parsing_depth)
1261 		mlxsw_sp_parsing_depth_dec(mlxsw_sp);
1262 }
1263 
1264 static int
1265 mlxsw_sp_fib_entry_decap_init(struct mlxsw_sp *mlxsw_sp,
1266 			      struct mlxsw_sp_fib_entry *fib_entry,
1267 			      struct mlxsw_sp_ipip_entry *ipip_entry)
1268 {
1269 	u32 tunnel_index;
1270 	int err;
1271 
1272 	err = mlxsw_sp_kvdl_alloc(mlxsw_sp, MLXSW_SP_KVDL_ENTRY_TYPE_ADJ,
1273 				  1, &tunnel_index);
1274 	if (err)
1275 		return err;
1276 
1277 	err = mlxsw_sp_ipip_decap_parsing_depth_inc(mlxsw_sp,
1278 						    ipip_entry->ipipt);
1279 	if (err)
1280 		goto err_parsing_depth_inc;
1281 
1282 	ipip_entry->decap_fib_entry = fib_entry;
1283 	fib_entry->decap.ipip_entry = ipip_entry;
1284 	fib_entry->decap.tunnel_index = tunnel_index;
1285 
1286 	return 0;
1287 
1288 err_parsing_depth_inc:
1289 	mlxsw_sp_kvdl_free(mlxsw_sp, MLXSW_SP_KVDL_ENTRY_TYPE_ADJ, 1,
1290 			   fib_entry->decap.tunnel_index);
1291 	return err;
1292 }
1293 
1294 static void mlxsw_sp_fib_entry_decap_fini(struct mlxsw_sp *mlxsw_sp,
1295 					  struct mlxsw_sp_fib_entry *fib_entry)
1296 {
1297 	enum mlxsw_sp_ipip_type ipipt = fib_entry->decap.ipip_entry->ipipt;
1298 
1299 	/* Unlink this node from the IPIP entry that it's the decap entry of. */
1300 	fib_entry->decap.ipip_entry->decap_fib_entry = NULL;
1301 	fib_entry->decap.ipip_entry = NULL;
1302 	mlxsw_sp_ipip_decap_parsing_depth_dec(mlxsw_sp, ipipt);
1303 	mlxsw_sp_kvdl_free(mlxsw_sp, MLXSW_SP_KVDL_ENTRY_TYPE_ADJ,
1304 			   1, fib_entry->decap.tunnel_index);
1305 }
1306 
1307 static struct mlxsw_sp_fib_node *
1308 mlxsw_sp_fib_node_lookup(struct mlxsw_sp_fib *fib, const void *addr,
1309 			 size_t addr_len, unsigned char prefix_len);
1310 static int mlxsw_sp_fib_entry_update(struct mlxsw_sp *mlxsw_sp,
1311 				     struct mlxsw_sp_fib_entry *fib_entry);
1312 
1313 static void
1314 mlxsw_sp_ipip_entry_demote_decap(struct mlxsw_sp *mlxsw_sp,
1315 				 struct mlxsw_sp_ipip_entry *ipip_entry)
1316 {
1317 	struct mlxsw_sp_fib_entry *fib_entry = ipip_entry->decap_fib_entry;
1318 
1319 	mlxsw_sp_fib_entry_decap_fini(mlxsw_sp, fib_entry);
1320 	fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_TRAP;
1321 
1322 	mlxsw_sp_fib_entry_update(mlxsw_sp, fib_entry);
1323 }
1324 
1325 static void
1326 mlxsw_sp_ipip_entry_promote_decap(struct mlxsw_sp *mlxsw_sp,
1327 				  struct mlxsw_sp_ipip_entry *ipip_entry,
1328 				  struct mlxsw_sp_fib_entry *decap_fib_entry)
1329 {
1330 	if (mlxsw_sp_fib_entry_decap_init(mlxsw_sp, decap_fib_entry,
1331 					  ipip_entry))
1332 		return;
1333 	decap_fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_IPIP_DECAP;
1334 
1335 	if (mlxsw_sp_fib_entry_update(mlxsw_sp, decap_fib_entry))
1336 		mlxsw_sp_ipip_entry_demote_decap(mlxsw_sp, ipip_entry);
1337 }
1338 
1339 static struct mlxsw_sp_fib_entry *
1340 mlxsw_sp_router_ip2me_fib_entry_find(struct mlxsw_sp *mlxsw_sp, u32 tb_id,
1341 				     enum mlxsw_sp_l3proto proto,
1342 				     const union mlxsw_sp_l3addr *addr,
1343 				     enum mlxsw_sp_fib_entry_type type)
1344 {
1345 	struct mlxsw_sp_fib_node *fib_node;
1346 	unsigned char addr_prefix_len;
1347 	struct mlxsw_sp_fib *fib;
1348 	struct mlxsw_sp_vr *vr;
1349 	const void *addrp;
1350 	size_t addr_len;
1351 	u32 addr4;
1352 
1353 	vr = mlxsw_sp_vr_find(mlxsw_sp, tb_id);
1354 	if (!vr)
1355 		return NULL;
1356 	fib = mlxsw_sp_vr_fib(vr, proto);
1357 
1358 	switch (proto) {
1359 	case MLXSW_SP_L3_PROTO_IPV4:
1360 		addr4 = be32_to_cpu(addr->addr4);
1361 		addrp = &addr4;
1362 		addr_len = 4;
1363 		addr_prefix_len = 32;
1364 		break;
1365 	case MLXSW_SP_L3_PROTO_IPV6:
1366 		addrp = &addr->addr6;
1367 		addr_len = 16;
1368 		addr_prefix_len = 128;
1369 		break;
1370 	default:
1371 		WARN_ON(1);
1372 		return NULL;
1373 	}
1374 
1375 	fib_node = mlxsw_sp_fib_node_lookup(fib, addrp, addr_len,
1376 					    addr_prefix_len);
1377 	if (!fib_node || fib_node->fib_entry->type != type)
1378 		return NULL;
1379 
1380 	return fib_node->fib_entry;
1381 }
1382 
1383 /* Given an IPIP entry, find the corresponding decap route. */
1384 static struct mlxsw_sp_fib_entry *
1385 mlxsw_sp_ipip_entry_find_decap(struct mlxsw_sp *mlxsw_sp,
1386 			       struct mlxsw_sp_ipip_entry *ipip_entry)
1387 {
1388 	static struct mlxsw_sp_fib_node *fib_node;
1389 	const struct mlxsw_sp_ipip_ops *ipip_ops;
1390 	unsigned char saddr_prefix_len;
1391 	union mlxsw_sp_l3addr saddr;
1392 	struct mlxsw_sp_fib *ul_fib;
1393 	struct mlxsw_sp_vr *ul_vr;
1394 	const void *saddrp;
1395 	size_t saddr_len;
1396 	u32 ul_tb_id;
1397 	u32 saddr4;
1398 
1399 	ipip_ops = mlxsw_sp->router->ipip_ops_arr[ipip_entry->ipipt];
1400 
1401 	ul_tb_id = mlxsw_sp_ipip_dev_ul_tb_id(ipip_entry->ol_dev);
1402 	ul_vr = mlxsw_sp_vr_find(mlxsw_sp, ul_tb_id);
1403 	if (!ul_vr)
1404 		return NULL;
1405 
1406 	ul_fib = mlxsw_sp_vr_fib(ul_vr, ipip_ops->ul_proto);
1407 	saddr = mlxsw_sp_ipip_netdev_saddr(ipip_ops->ul_proto,
1408 					   ipip_entry->ol_dev);
1409 
1410 	switch (ipip_ops->ul_proto) {
1411 	case MLXSW_SP_L3_PROTO_IPV4:
1412 		saddr4 = be32_to_cpu(saddr.addr4);
1413 		saddrp = &saddr4;
1414 		saddr_len = 4;
1415 		saddr_prefix_len = 32;
1416 		break;
1417 	case MLXSW_SP_L3_PROTO_IPV6:
1418 		saddrp = &saddr.addr6;
1419 		saddr_len = 16;
1420 		saddr_prefix_len = 128;
1421 		break;
1422 	default:
1423 		WARN_ON(1);
1424 		return NULL;
1425 	}
1426 
1427 	fib_node = mlxsw_sp_fib_node_lookup(ul_fib, saddrp, saddr_len,
1428 					    saddr_prefix_len);
1429 	if (!fib_node ||
1430 	    fib_node->fib_entry->type != MLXSW_SP_FIB_ENTRY_TYPE_TRAP)
1431 		return NULL;
1432 
1433 	return fib_node->fib_entry;
1434 }
1435 
1436 static struct mlxsw_sp_ipip_entry *
1437 mlxsw_sp_ipip_entry_create(struct mlxsw_sp *mlxsw_sp,
1438 			   enum mlxsw_sp_ipip_type ipipt,
1439 			   struct net_device *ol_dev)
1440 {
1441 	struct mlxsw_sp_ipip_entry *ipip_entry;
1442 
1443 	ipip_entry = mlxsw_sp_ipip_entry_alloc(mlxsw_sp, ipipt, ol_dev);
1444 	if (IS_ERR(ipip_entry))
1445 		return ipip_entry;
1446 
1447 	list_add_tail(&ipip_entry->ipip_list_node,
1448 		      &mlxsw_sp->router->ipip_list);
1449 
1450 	return ipip_entry;
1451 }
1452 
1453 static void
1454 mlxsw_sp_ipip_entry_destroy(struct mlxsw_sp *mlxsw_sp,
1455 			    struct mlxsw_sp_ipip_entry *ipip_entry)
1456 {
1457 	list_del(&ipip_entry->ipip_list_node);
1458 	mlxsw_sp_ipip_entry_dealloc(mlxsw_sp, ipip_entry);
1459 }
1460 
1461 static bool
1462 mlxsw_sp_ipip_entry_matches_decap(struct mlxsw_sp *mlxsw_sp,
1463 				  const struct net_device *ul_dev,
1464 				  enum mlxsw_sp_l3proto ul_proto,
1465 				  union mlxsw_sp_l3addr ul_dip,
1466 				  struct mlxsw_sp_ipip_entry *ipip_entry)
1467 {
1468 	u32 ul_tb_id = l3mdev_fib_table(ul_dev) ? : RT_TABLE_MAIN;
1469 	enum mlxsw_sp_ipip_type ipipt = ipip_entry->ipipt;
1470 
1471 	if (mlxsw_sp->router->ipip_ops_arr[ipipt]->ul_proto != ul_proto)
1472 		return false;
1473 
1474 	return mlxsw_sp_ipip_entry_saddr_matches(mlxsw_sp, ul_proto, ul_dip,
1475 						 ul_tb_id, ipip_entry);
1476 }
1477 
1478 /* Given decap parameters, find the corresponding IPIP entry. */
1479 static struct mlxsw_sp_ipip_entry *
1480 mlxsw_sp_ipip_entry_find_by_decap(struct mlxsw_sp *mlxsw_sp, int ul_dev_ifindex,
1481 				  enum mlxsw_sp_l3proto ul_proto,
1482 				  union mlxsw_sp_l3addr ul_dip)
1483 {
1484 	struct mlxsw_sp_ipip_entry *ipip_entry = NULL;
1485 	struct net_device *ul_dev;
1486 
1487 	rcu_read_lock();
1488 
1489 	ul_dev = dev_get_by_index_rcu(mlxsw_sp_net(mlxsw_sp), ul_dev_ifindex);
1490 	if (!ul_dev)
1491 		goto out_unlock;
1492 
1493 	list_for_each_entry(ipip_entry, &mlxsw_sp->router->ipip_list,
1494 			    ipip_list_node)
1495 		if (mlxsw_sp_ipip_entry_matches_decap(mlxsw_sp, ul_dev,
1496 						      ul_proto, ul_dip,
1497 						      ipip_entry))
1498 			goto out_unlock;
1499 
1500 	rcu_read_unlock();
1501 
1502 	return NULL;
1503 
1504 out_unlock:
1505 	rcu_read_unlock();
1506 	return ipip_entry;
1507 }
1508 
1509 static bool mlxsw_sp_netdev_ipip_type(const struct mlxsw_sp *mlxsw_sp,
1510 				      const struct net_device *dev,
1511 				      enum mlxsw_sp_ipip_type *p_type)
1512 {
1513 	struct mlxsw_sp_router *router = mlxsw_sp->router;
1514 	const struct mlxsw_sp_ipip_ops *ipip_ops;
1515 	enum mlxsw_sp_ipip_type ipipt;
1516 
1517 	for (ipipt = 0; ipipt < MLXSW_SP_IPIP_TYPE_MAX; ++ipipt) {
1518 		ipip_ops = router->ipip_ops_arr[ipipt];
1519 		if (dev->type == ipip_ops->dev_type) {
1520 			if (p_type)
1521 				*p_type = ipipt;
1522 			return true;
1523 		}
1524 	}
1525 	return false;
1526 }
1527 
1528 static bool mlxsw_sp_netdev_is_ipip_ol(const struct mlxsw_sp *mlxsw_sp,
1529 				       const struct net_device *dev)
1530 {
1531 	return mlxsw_sp_netdev_ipip_type(mlxsw_sp, dev, NULL);
1532 }
1533 
1534 static struct mlxsw_sp_ipip_entry *
1535 mlxsw_sp_ipip_entry_find_by_ol_dev(struct mlxsw_sp *mlxsw_sp,
1536 				   const struct net_device *ol_dev)
1537 {
1538 	struct mlxsw_sp_ipip_entry *ipip_entry;
1539 
1540 	list_for_each_entry(ipip_entry, &mlxsw_sp->router->ipip_list,
1541 			    ipip_list_node)
1542 		if (ipip_entry->ol_dev == ol_dev)
1543 			return ipip_entry;
1544 
1545 	return NULL;
1546 }
1547 
1548 static struct mlxsw_sp_ipip_entry *
1549 mlxsw_sp_ipip_entry_find_by_ul_dev(const struct mlxsw_sp *mlxsw_sp,
1550 				   const struct net_device *ul_dev,
1551 				   struct mlxsw_sp_ipip_entry *start)
1552 {
1553 	struct mlxsw_sp_ipip_entry *ipip_entry;
1554 
1555 	ipip_entry = list_prepare_entry(start, &mlxsw_sp->router->ipip_list,
1556 					ipip_list_node);
1557 	list_for_each_entry_continue(ipip_entry, &mlxsw_sp->router->ipip_list,
1558 				     ipip_list_node) {
1559 		struct net_device *ol_dev = ipip_entry->ol_dev;
1560 		struct net_device *ipip_ul_dev;
1561 
1562 		rcu_read_lock();
1563 		ipip_ul_dev = mlxsw_sp_ipip_netdev_ul_dev_get(ol_dev);
1564 		rcu_read_unlock();
1565 
1566 		if (ipip_ul_dev == ul_dev)
1567 			return ipip_entry;
1568 	}
1569 
1570 	return NULL;
1571 }
1572 
1573 static bool mlxsw_sp_netdev_is_ipip_ul(struct mlxsw_sp *mlxsw_sp,
1574 				       const struct net_device *dev)
1575 {
1576 	return mlxsw_sp_ipip_entry_find_by_ul_dev(mlxsw_sp, dev, NULL);
1577 }
1578 
1579 static bool mlxsw_sp_netdevice_ipip_can_offload(struct mlxsw_sp *mlxsw_sp,
1580 						const struct net_device *ol_dev,
1581 						enum mlxsw_sp_ipip_type ipipt)
1582 {
1583 	const struct mlxsw_sp_ipip_ops *ops
1584 		= mlxsw_sp->router->ipip_ops_arr[ipipt];
1585 
1586 	return ops->can_offload(mlxsw_sp, ol_dev);
1587 }
1588 
1589 static int mlxsw_sp_netdevice_ipip_ol_reg_event(struct mlxsw_sp *mlxsw_sp,
1590 						struct net_device *ol_dev)
1591 {
1592 	enum mlxsw_sp_ipip_type ipipt = MLXSW_SP_IPIP_TYPE_MAX;
1593 	struct mlxsw_sp_ipip_entry *ipip_entry;
1594 	enum mlxsw_sp_l3proto ul_proto;
1595 	union mlxsw_sp_l3addr saddr;
1596 	u32 ul_tb_id;
1597 
1598 	mlxsw_sp_netdev_ipip_type(mlxsw_sp, ol_dev, &ipipt);
1599 	if (mlxsw_sp_netdevice_ipip_can_offload(mlxsw_sp, ol_dev, ipipt)) {
1600 		ul_tb_id = mlxsw_sp_ipip_dev_ul_tb_id(ol_dev);
1601 		ul_proto = mlxsw_sp->router->ipip_ops_arr[ipipt]->ul_proto;
1602 		saddr = mlxsw_sp_ipip_netdev_saddr(ul_proto, ol_dev);
1603 		if (!mlxsw_sp_ipip_demote_tunnel_by_saddr(mlxsw_sp, ul_proto,
1604 							  saddr, ul_tb_id,
1605 							  NULL)) {
1606 			ipip_entry = mlxsw_sp_ipip_entry_create(mlxsw_sp, ipipt,
1607 								ol_dev);
1608 			if (IS_ERR(ipip_entry))
1609 				return PTR_ERR(ipip_entry);
1610 		}
1611 	}
1612 
1613 	return 0;
1614 }
1615 
1616 static void mlxsw_sp_netdevice_ipip_ol_unreg_event(struct mlxsw_sp *mlxsw_sp,
1617 						   struct net_device *ol_dev)
1618 {
1619 	struct mlxsw_sp_ipip_entry *ipip_entry;
1620 
1621 	ipip_entry = mlxsw_sp_ipip_entry_find_by_ol_dev(mlxsw_sp, ol_dev);
1622 	if (ipip_entry)
1623 		mlxsw_sp_ipip_entry_destroy(mlxsw_sp, ipip_entry);
1624 }
1625 
1626 static void
1627 mlxsw_sp_ipip_entry_ol_up_event(struct mlxsw_sp *mlxsw_sp,
1628 				struct mlxsw_sp_ipip_entry *ipip_entry)
1629 {
1630 	struct mlxsw_sp_fib_entry *decap_fib_entry;
1631 
1632 	decap_fib_entry = mlxsw_sp_ipip_entry_find_decap(mlxsw_sp, ipip_entry);
1633 	if (decap_fib_entry)
1634 		mlxsw_sp_ipip_entry_promote_decap(mlxsw_sp, ipip_entry,
1635 						  decap_fib_entry);
1636 }
1637 
1638 static int
1639 mlxsw_sp_rif_ipip_lb_op(struct mlxsw_sp_rif_ipip_lb *lb_rif, u16 ul_vr_id,
1640 			u16 ul_rif_id, bool enable)
1641 {
1642 	struct mlxsw_sp_rif_ipip_lb_config lb_cf = lb_rif->lb_config;
1643 	struct net_device *dev = mlxsw_sp_rif_dev(&lb_rif->common);
1644 	enum mlxsw_reg_ritr_loopback_ipip_options ipip_options;
1645 	struct mlxsw_sp_rif *rif = &lb_rif->common;
1646 	struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
1647 	char ritr_pl[MLXSW_REG_RITR_LEN];
1648 	struct in6_addr *saddr6;
1649 	u32 saddr4;
1650 
1651 	ipip_options = MLXSW_REG_RITR_LOOPBACK_IPIP_OPTIONS_GRE_KEY_PRESET;
1652 	switch (lb_cf.ul_protocol) {
1653 	case MLXSW_SP_L3_PROTO_IPV4:
1654 		saddr4 = be32_to_cpu(lb_cf.saddr.addr4);
1655 		mlxsw_reg_ritr_pack(ritr_pl, enable, MLXSW_REG_RITR_LOOPBACK_IF,
1656 				    rif->rif_index, rif->vr_id, dev->mtu);
1657 		mlxsw_reg_ritr_loopback_ipip4_pack(ritr_pl, lb_cf.lb_ipipt,
1658 						   ipip_options, ul_vr_id,
1659 						   ul_rif_id, saddr4,
1660 						   lb_cf.okey);
1661 		break;
1662 
1663 	case MLXSW_SP_L3_PROTO_IPV6:
1664 		saddr6 = &lb_cf.saddr.addr6;
1665 		mlxsw_reg_ritr_pack(ritr_pl, enable, MLXSW_REG_RITR_LOOPBACK_IF,
1666 				    rif->rif_index, rif->vr_id, dev->mtu);
1667 		mlxsw_reg_ritr_loopback_ipip6_pack(ritr_pl, lb_cf.lb_ipipt,
1668 						   ipip_options, ul_vr_id,
1669 						   ul_rif_id, saddr6,
1670 						   lb_cf.okey);
1671 		break;
1672 	}
1673 
1674 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
1675 }
1676 
1677 static int mlxsw_sp_netdevice_ipip_ol_update_mtu(struct mlxsw_sp *mlxsw_sp,
1678 						 struct net_device *ol_dev)
1679 {
1680 	struct mlxsw_sp_ipip_entry *ipip_entry;
1681 	struct mlxsw_sp_rif_ipip_lb *lb_rif;
1682 	int err = 0;
1683 
1684 	ipip_entry = mlxsw_sp_ipip_entry_find_by_ol_dev(mlxsw_sp, ol_dev);
1685 	if (ipip_entry) {
1686 		lb_rif = ipip_entry->ol_lb;
1687 		err = mlxsw_sp_rif_ipip_lb_op(lb_rif, lb_rif->ul_vr_id,
1688 					      lb_rif->ul_rif_id, true);
1689 		if (err)
1690 			goto out;
1691 		lb_rif->common.mtu = ol_dev->mtu;
1692 	}
1693 
1694 out:
1695 	return err;
1696 }
1697 
1698 static void mlxsw_sp_netdevice_ipip_ol_up_event(struct mlxsw_sp *mlxsw_sp,
1699 						struct net_device *ol_dev)
1700 {
1701 	struct mlxsw_sp_ipip_entry *ipip_entry;
1702 
1703 	ipip_entry = mlxsw_sp_ipip_entry_find_by_ol_dev(mlxsw_sp, ol_dev);
1704 	if (ipip_entry)
1705 		mlxsw_sp_ipip_entry_ol_up_event(mlxsw_sp, ipip_entry);
1706 }
1707 
1708 static void
1709 mlxsw_sp_ipip_entry_ol_down_event(struct mlxsw_sp *mlxsw_sp,
1710 				  struct mlxsw_sp_ipip_entry *ipip_entry)
1711 {
1712 	if (ipip_entry->decap_fib_entry)
1713 		mlxsw_sp_ipip_entry_demote_decap(mlxsw_sp, ipip_entry);
1714 }
1715 
1716 static void mlxsw_sp_netdevice_ipip_ol_down_event(struct mlxsw_sp *mlxsw_sp,
1717 						  struct net_device *ol_dev)
1718 {
1719 	struct mlxsw_sp_ipip_entry *ipip_entry;
1720 
1721 	ipip_entry = mlxsw_sp_ipip_entry_find_by_ol_dev(mlxsw_sp, ol_dev);
1722 	if (ipip_entry)
1723 		mlxsw_sp_ipip_entry_ol_down_event(mlxsw_sp, ipip_entry);
1724 }
1725 
1726 static void mlxsw_sp_nexthop_rif_update(struct mlxsw_sp *mlxsw_sp,
1727 					struct mlxsw_sp_rif *rif);
1728 
1729 static void mlxsw_sp_rif_migrate_destroy(struct mlxsw_sp *mlxsw_sp,
1730 					 struct mlxsw_sp_rif *old_rif,
1731 					 struct mlxsw_sp_rif *new_rif,
1732 					 bool migrate_nhs)
1733 {
1734 	struct mlxsw_sp_crif *crif = old_rif->crif;
1735 	struct mlxsw_sp_crif mock_crif = {};
1736 
1737 	if (migrate_nhs)
1738 		mlxsw_sp_nexthop_rif_update(mlxsw_sp, new_rif);
1739 
1740 	/* Plant a mock CRIF so that destroying the old RIF doesn't unoffload
1741 	 * our nexthops and IPIP tunnels, and doesn't sever the crif->rif link.
1742 	 */
1743 	mlxsw_sp_crif_init(&mock_crif, crif->key.dev);
1744 	old_rif->crif = &mock_crif;
1745 	mock_crif.rif = old_rif;
1746 	mlxsw_sp_rif_destroy(old_rif);
1747 }
1748 
1749 static int
1750 mlxsw_sp_ipip_entry_ol_lb_update(struct mlxsw_sp *mlxsw_sp,
1751 				 struct mlxsw_sp_ipip_entry *ipip_entry,
1752 				 bool keep_encap,
1753 				 struct netlink_ext_ack *extack)
1754 {
1755 	struct mlxsw_sp_rif_ipip_lb *old_lb_rif = ipip_entry->ol_lb;
1756 	struct mlxsw_sp_rif_ipip_lb *new_lb_rif;
1757 
1758 	new_lb_rif = mlxsw_sp_ipip_ol_ipip_lb_create(mlxsw_sp,
1759 						     ipip_entry->ipipt,
1760 						     ipip_entry->ol_dev,
1761 						     extack);
1762 	if (IS_ERR(new_lb_rif))
1763 		return PTR_ERR(new_lb_rif);
1764 	ipip_entry->ol_lb = new_lb_rif;
1765 
1766 	mlxsw_sp_rif_migrate_destroy(mlxsw_sp, &old_lb_rif->common,
1767 				     &new_lb_rif->common, keep_encap);
1768 	return 0;
1769 }
1770 
1771 /**
1772  * __mlxsw_sp_ipip_entry_update_tunnel - Update offload related to IPIP entry.
1773  * @mlxsw_sp: mlxsw_sp.
1774  * @ipip_entry: IPIP entry.
1775  * @recreate_loopback: Recreates the associated loopback RIF.
1776  * @keep_encap: Updates next hops that use the tunnel netdevice. This is only
1777  *              relevant when recreate_loopback is true.
1778  * @update_nexthops: Updates next hops, keeping the current loopback RIF. This
1779  *                   is only relevant when recreate_loopback is false.
1780  * @extack: extack.
1781  *
1782  * Return: Non-zero value on failure.
1783  */
1784 int __mlxsw_sp_ipip_entry_update_tunnel(struct mlxsw_sp *mlxsw_sp,
1785 					struct mlxsw_sp_ipip_entry *ipip_entry,
1786 					bool recreate_loopback,
1787 					bool keep_encap,
1788 					bool update_nexthops,
1789 					struct netlink_ext_ack *extack)
1790 {
1791 	int err;
1792 
1793 	/* RIFs can't be edited, so to update loopback, we need to destroy and
1794 	 * recreate it. That creates a window of opportunity where RALUE and
1795 	 * RATR registers end up referencing a RIF that's already gone. RATRs
1796 	 * are handled in mlxsw_sp_ipip_entry_ol_lb_update(), and to take care
1797 	 * of RALUE, demote the decap route back.
1798 	 */
1799 	if (ipip_entry->decap_fib_entry)
1800 		mlxsw_sp_ipip_entry_demote_decap(mlxsw_sp, ipip_entry);
1801 
1802 	if (recreate_loopback) {
1803 		err = mlxsw_sp_ipip_entry_ol_lb_update(mlxsw_sp, ipip_entry,
1804 						       keep_encap, extack);
1805 		if (err)
1806 			return err;
1807 	} else if (update_nexthops) {
1808 		mlxsw_sp_nexthop_rif_update(mlxsw_sp,
1809 					    &ipip_entry->ol_lb->common);
1810 	}
1811 
1812 	if (ipip_entry->ol_dev->flags & IFF_UP)
1813 		mlxsw_sp_ipip_entry_ol_up_event(mlxsw_sp, ipip_entry);
1814 
1815 	return 0;
1816 }
1817 
1818 static int mlxsw_sp_netdevice_ipip_ol_vrf_event(struct mlxsw_sp *mlxsw_sp,
1819 						struct net_device *ol_dev,
1820 						struct netlink_ext_ack *extack)
1821 {
1822 	struct mlxsw_sp_ipip_entry *ipip_entry =
1823 		mlxsw_sp_ipip_entry_find_by_ol_dev(mlxsw_sp, ol_dev);
1824 
1825 	if (!ipip_entry)
1826 		return 0;
1827 
1828 	return __mlxsw_sp_ipip_entry_update_tunnel(mlxsw_sp, ipip_entry,
1829 						   true, false, false, extack);
1830 }
1831 
1832 static int
1833 mlxsw_sp_netdevice_ipip_ul_vrf_event(struct mlxsw_sp *mlxsw_sp,
1834 				     struct mlxsw_sp_ipip_entry *ipip_entry,
1835 				     struct net_device *ul_dev,
1836 				     bool *demote_this,
1837 				     struct netlink_ext_ack *extack)
1838 {
1839 	u32 ul_tb_id = l3mdev_fib_table(ul_dev) ? : RT_TABLE_MAIN;
1840 	enum mlxsw_sp_l3proto ul_proto;
1841 	union mlxsw_sp_l3addr saddr;
1842 
1843 	/* Moving underlay to a different VRF might cause local address
1844 	 * conflict, and the conflicting tunnels need to be demoted.
1845 	 */
1846 	ul_proto = mlxsw_sp->router->ipip_ops_arr[ipip_entry->ipipt]->ul_proto;
1847 	saddr = mlxsw_sp_ipip_netdev_saddr(ul_proto, ipip_entry->ol_dev);
1848 	if (mlxsw_sp_ipip_demote_tunnel_by_saddr(mlxsw_sp, ul_proto,
1849 						 saddr, ul_tb_id,
1850 						 ipip_entry)) {
1851 		*demote_this = true;
1852 		return 0;
1853 	}
1854 
1855 	return __mlxsw_sp_ipip_entry_update_tunnel(mlxsw_sp, ipip_entry,
1856 						   true, true, false, extack);
1857 }
1858 
1859 static int
1860 mlxsw_sp_netdevice_ipip_ul_up_event(struct mlxsw_sp *mlxsw_sp,
1861 				    struct mlxsw_sp_ipip_entry *ipip_entry,
1862 				    struct net_device *ul_dev)
1863 {
1864 	return __mlxsw_sp_ipip_entry_update_tunnel(mlxsw_sp, ipip_entry,
1865 						   false, false, true, NULL);
1866 }
1867 
1868 static int
1869 mlxsw_sp_netdevice_ipip_ul_down_event(struct mlxsw_sp *mlxsw_sp,
1870 				      struct mlxsw_sp_ipip_entry *ipip_entry,
1871 				      struct net_device *ul_dev)
1872 {
1873 	/* A down underlay device causes encapsulated packets to not be
1874 	 * forwarded, but decap still works. So refresh next hops without
1875 	 * touching anything else.
1876 	 */
1877 	return __mlxsw_sp_ipip_entry_update_tunnel(mlxsw_sp, ipip_entry,
1878 						   false, false, true, NULL);
1879 }
1880 
1881 static int
1882 mlxsw_sp_netdevice_ipip_ol_change_event(struct mlxsw_sp *mlxsw_sp,
1883 					struct net_device *ol_dev,
1884 					struct netlink_ext_ack *extack)
1885 {
1886 	const struct mlxsw_sp_ipip_ops *ipip_ops;
1887 	struct mlxsw_sp_ipip_entry *ipip_entry;
1888 	int err;
1889 
1890 	ipip_entry = mlxsw_sp_ipip_entry_find_by_ol_dev(mlxsw_sp, ol_dev);
1891 	if (!ipip_entry)
1892 		/* A change might make a tunnel eligible for offloading, but
1893 		 * that is currently not implemented. What falls to slow path
1894 		 * stays there.
1895 		 */
1896 		return 0;
1897 
1898 	/* A change might make a tunnel not eligible for offloading. */
1899 	if (!mlxsw_sp_netdevice_ipip_can_offload(mlxsw_sp, ol_dev,
1900 						 ipip_entry->ipipt)) {
1901 		mlxsw_sp_ipip_entry_demote_tunnel(mlxsw_sp, ipip_entry);
1902 		return 0;
1903 	}
1904 
1905 	ipip_ops = mlxsw_sp->router->ipip_ops_arr[ipip_entry->ipipt];
1906 	err = ipip_ops->ol_netdev_change(mlxsw_sp, ipip_entry, extack);
1907 	return err;
1908 }
1909 
1910 void mlxsw_sp_ipip_entry_demote_tunnel(struct mlxsw_sp *mlxsw_sp,
1911 				       struct mlxsw_sp_ipip_entry *ipip_entry)
1912 {
1913 	struct net_device *ol_dev = ipip_entry->ol_dev;
1914 
1915 	if (ol_dev->flags & IFF_UP)
1916 		mlxsw_sp_ipip_entry_ol_down_event(mlxsw_sp, ipip_entry);
1917 	mlxsw_sp_ipip_entry_destroy(mlxsw_sp, ipip_entry);
1918 }
1919 
1920 /* The configuration where several tunnels have the same local address in the
1921  * same underlay table needs special treatment in the HW. That is currently not
1922  * implemented in the driver. This function finds and demotes the first tunnel
1923  * with a given source address, except the one passed in the argument
1924  * `except'.
1925  */
1926 bool
1927 mlxsw_sp_ipip_demote_tunnel_by_saddr(struct mlxsw_sp *mlxsw_sp,
1928 				     enum mlxsw_sp_l3proto ul_proto,
1929 				     union mlxsw_sp_l3addr saddr,
1930 				     u32 ul_tb_id,
1931 				     const struct mlxsw_sp_ipip_entry *except)
1932 {
1933 	struct mlxsw_sp_ipip_entry *ipip_entry, *tmp;
1934 
1935 	list_for_each_entry_safe(ipip_entry, tmp, &mlxsw_sp->router->ipip_list,
1936 				 ipip_list_node) {
1937 		if (ipip_entry != except &&
1938 		    mlxsw_sp_ipip_entry_saddr_matches(mlxsw_sp, ul_proto, saddr,
1939 						      ul_tb_id, ipip_entry)) {
1940 			mlxsw_sp_ipip_entry_demote_tunnel(mlxsw_sp, ipip_entry);
1941 			return true;
1942 		}
1943 	}
1944 
1945 	return false;
1946 }
1947 
1948 static void mlxsw_sp_ipip_demote_tunnel_by_ul_netdev(struct mlxsw_sp *mlxsw_sp,
1949 						     struct net_device *ul_dev)
1950 {
1951 	struct mlxsw_sp_ipip_entry *ipip_entry, *tmp;
1952 
1953 	list_for_each_entry_safe(ipip_entry, tmp, &mlxsw_sp->router->ipip_list,
1954 				 ipip_list_node) {
1955 		struct net_device *ol_dev = ipip_entry->ol_dev;
1956 		struct net_device *ipip_ul_dev;
1957 
1958 		rcu_read_lock();
1959 		ipip_ul_dev = mlxsw_sp_ipip_netdev_ul_dev_get(ol_dev);
1960 		rcu_read_unlock();
1961 		if (ipip_ul_dev == ul_dev)
1962 			mlxsw_sp_ipip_entry_demote_tunnel(mlxsw_sp, ipip_entry);
1963 	}
1964 }
1965 
1966 static int mlxsw_sp_netdevice_ipip_ol_event(struct mlxsw_sp *mlxsw_sp,
1967 					    struct net_device *ol_dev,
1968 					    unsigned long event,
1969 					    struct netdev_notifier_info *info)
1970 {
1971 	struct netdev_notifier_changeupper_info *chup;
1972 	struct netlink_ext_ack *extack;
1973 	int err = 0;
1974 
1975 	switch (event) {
1976 	case NETDEV_REGISTER:
1977 		err = mlxsw_sp_netdevice_ipip_ol_reg_event(mlxsw_sp, ol_dev);
1978 		break;
1979 	case NETDEV_UNREGISTER:
1980 		mlxsw_sp_netdevice_ipip_ol_unreg_event(mlxsw_sp, ol_dev);
1981 		break;
1982 	case NETDEV_UP:
1983 		mlxsw_sp_netdevice_ipip_ol_up_event(mlxsw_sp, ol_dev);
1984 		break;
1985 	case NETDEV_DOWN:
1986 		mlxsw_sp_netdevice_ipip_ol_down_event(mlxsw_sp, ol_dev);
1987 		break;
1988 	case NETDEV_CHANGEUPPER:
1989 		chup = container_of(info, typeof(*chup), info);
1990 		extack = info->extack;
1991 		if (netif_is_l3_master(chup->upper_dev))
1992 			err = mlxsw_sp_netdevice_ipip_ol_vrf_event(mlxsw_sp,
1993 								   ol_dev,
1994 								   extack);
1995 		break;
1996 	case NETDEV_CHANGE:
1997 		extack = info->extack;
1998 		err = mlxsw_sp_netdevice_ipip_ol_change_event(mlxsw_sp,
1999 							      ol_dev, extack);
2000 		break;
2001 	case NETDEV_CHANGEMTU:
2002 		err = mlxsw_sp_netdevice_ipip_ol_update_mtu(mlxsw_sp, ol_dev);
2003 		break;
2004 	}
2005 	return err;
2006 }
2007 
2008 static int
2009 __mlxsw_sp_netdevice_ipip_ul_event(struct mlxsw_sp *mlxsw_sp,
2010 				   struct mlxsw_sp_ipip_entry *ipip_entry,
2011 				   struct net_device *ul_dev,
2012 				   bool *demote_this,
2013 				   unsigned long event,
2014 				   struct netdev_notifier_info *info)
2015 {
2016 	struct netdev_notifier_changeupper_info *chup;
2017 	struct netlink_ext_ack *extack;
2018 
2019 	switch (event) {
2020 	case NETDEV_CHANGEUPPER:
2021 		chup = container_of(info, typeof(*chup), info);
2022 		extack = info->extack;
2023 		if (netif_is_l3_master(chup->upper_dev))
2024 			return mlxsw_sp_netdevice_ipip_ul_vrf_event(mlxsw_sp,
2025 								    ipip_entry,
2026 								    ul_dev,
2027 								    demote_this,
2028 								    extack);
2029 		break;
2030 
2031 	case NETDEV_UP:
2032 		return mlxsw_sp_netdevice_ipip_ul_up_event(mlxsw_sp, ipip_entry,
2033 							   ul_dev);
2034 	case NETDEV_DOWN:
2035 		return mlxsw_sp_netdevice_ipip_ul_down_event(mlxsw_sp,
2036 							     ipip_entry,
2037 							     ul_dev);
2038 	}
2039 	return 0;
2040 }
2041 
2042 static int
2043 mlxsw_sp_netdevice_ipip_ul_event(struct mlxsw_sp *mlxsw_sp,
2044 				 struct net_device *ul_dev,
2045 				 unsigned long event,
2046 				 struct netdev_notifier_info *info)
2047 {
2048 	struct mlxsw_sp_ipip_entry *ipip_entry = NULL;
2049 	int err;
2050 
2051 	while ((ipip_entry = mlxsw_sp_ipip_entry_find_by_ul_dev(mlxsw_sp,
2052 								ul_dev,
2053 								ipip_entry))) {
2054 		struct mlxsw_sp_ipip_entry *prev;
2055 		bool demote_this = false;
2056 
2057 		err = __mlxsw_sp_netdevice_ipip_ul_event(mlxsw_sp, ipip_entry,
2058 							 ul_dev, &demote_this,
2059 							 event, info);
2060 		if (err) {
2061 			mlxsw_sp_ipip_demote_tunnel_by_ul_netdev(mlxsw_sp,
2062 								 ul_dev);
2063 			return err;
2064 		}
2065 
2066 		if (demote_this) {
2067 			if (list_is_first(&ipip_entry->ipip_list_node,
2068 					  &mlxsw_sp->router->ipip_list))
2069 				prev = NULL;
2070 			else
2071 				/* This can't be cached from previous iteration,
2072 				 * because that entry could be gone now.
2073 				 */
2074 				prev = list_prev_entry(ipip_entry,
2075 						       ipip_list_node);
2076 			mlxsw_sp_ipip_entry_demote_tunnel(mlxsw_sp, ipip_entry);
2077 			ipip_entry = prev;
2078 		}
2079 	}
2080 
2081 	return 0;
2082 }
2083 
2084 int mlxsw_sp_router_nve_promote_decap(struct mlxsw_sp *mlxsw_sp, u32 ul_tb_id,
2085 				      enum mlxsw_sp_l3proto ul_proto,
2086 				      const union mlxsw_sp_l3addr *ul_sip,
2087 				      u32 tunnel_index)
2088 {
2089 	enum mlxsw_sp_fib_entry_type type = MLXSW_SP_FIB_ENTRY_TYPE_TRAP;
2090 	struct mlxsw_sp_router *router = mlxsw_sp->router;
2091 	struct mlxsw_sp_fib_entry *fib_entry;
2092 	int err = 0;
2093 
2094 	mutex_lock(&mlxsw_sp->router->lock);
2095 
2096 	if (WARN_ON_ONCE(router->nve_decap_config.valid)) {
2097 		err = -EINVAL;
2098 		goto out;
2099 	}
2100 
2101 	router->nve_decap_config.ul_tb_id = ul_tb_id;
2102 	router->nve_decap_config.tunnel_index = tunnel_index;
2103 	router->nve_decap_config.ul_proto = ul_proto;
2104 	router->nve_decap_config.ul_sip = *ul_sip;
2105 	router->nve_decap_config.valid = true;
2106 
2107 	/* It is valid to create a tunnel with a local IP and only later
2108 	 * assign this IP address to a local interface
2109 	 */
2110 	fib_entry = mlxsw_sp_router_ip2me_fib_entry_find(mlxsw_sp, ul_tb_id,
2111 							 ul_proto, ul_sip,
2112 							 type);
2113 	if (!fib_entry)
2114 		goto out;
2115 
2116 	fib_entry->decap.tunnel_index = tunnel_index;
2117 	fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_NVE_DECAP;
2118 
2119 	err = mlxsw_sp_fib_entry_update(mlxsw_sp, fib_entry);
2120 	if (err)
2121 		goto err_fib_entry_update;
2122 
2123 	goto out;
2124 
2125 err_fib_entry_update:
2126 	fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_TRAP;
2127 	mlxsw_sp_fib_entry_update(mlxsw_sp, fib_entry);
2128 out:
2129 	mutex_unlock(&mlxsw_sp->router->lock);
2130 	return err;
2131 }
2132 
2133 void mlxsw_sp_router_nve_demote_decap(struct mlxsw_sp *mlxsw_sp, u32 ul_tb_id,
2134 				      enum mlxsw_sp_l3proto ul_proto,
2135 				      const union mlxsw_sp_l3addr *ul_sip)
2136 {
2137 	enum mlxsw_sp_fib_entry_type type = MLXSW_SP_FIB_ENTRY_TYPE_NVE_DECAP;
2138 	struct mlxsw_sp_router *router = mlxsw_sp->router;
2139 	struct mlxsw_sp_fib_entry *fib_entry;
2140 
2141 	mutex_lock(&mlxsw_sp->router->lock);
2142 
2143 	if (WARN_ON_ONCE(!router->nve_decap_config.valid))
2144 		goto out;
2145 
2146 	router->nve_decap_config.valid = false;
2147 
2148 	fib_entry = mlxsw_sp_router_ip2me_fib_entry_find(mlxsw_sp, ul_tb_id,
2149 							 ul_proto, ul_sip,
2150 							 type);
2151 	if (!fib_entry)
2152 		goto out;
2153 
2154 	fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_TRAP;
2155 	mlxsw_sp_fib_entry_update(mlxsw_sp, fib_entry);
2156 out:
2157 	mutex_unlock(&mlxsw_sp->router->lock);
2158 }
2159 
2160 static bool mlxsw_sp_router_nve_is_decap(struct mlxsw_sp *mlxsw_sp,
2161 					 u32 ul_tb_id,
2162 					 enum mlxsw_sp_l3proto ul_proto,
2163 					 const union mlxsw_sp_l3addr *ul_sip)
2164 {
2165 	struct mlxsw_sp_router *router = mlxsw_sp->router;
2166 
2167 	return router->nve_decap_config.valid &&
2168 	       router->nve_decap_config.ul_tb_id == ul_tb_id &&
2169 	       router->nve_decap_config.ul_proto == ul_proto &&
2170 	       !memcmp(&router->nve_decap_config.ul_sip, ul_sip,
2171 		       sizeof(*ul_sip));
2172 }
2173 
2174 struct mlxsw_sp_neigh_key {
2175 	struct neighbour *n;
2176 };
2177 
2178 struct mlxsw_sp_neigh_entry {
2179 	struct list_head rif_list_node;
2180 	struct rhash_head ht_node;
2181 	struct mlxsw_sp_neigh_key key;
2182 	u16 rif;
2183 	bool connected;
2184 	unsigned char ha[ETH_ALEN];
2185 	struct list_head nexthop_list; /* list of nexthops using
2186 					* this neigh entry
2187 					*/
2188 	struct list_head nexthop_neighs_list_node;
2189 	unsigned int counter_index;
2190 	bool counter_valid;
2191 };
2192 
2193 static const struct rhashtable_params mlxsw_sp_neigh_ht_params = {
2194 	.key_offset = offsetof(struct mlxsw_sp_neigh_entry, key),
2195 	.head_offset = offsetof(struct mlxsw_sp_neigh_entry, ht_node),
2196 	.key_len = sizeof(struct mlxsw_sp_neigh_key),
2197 };
2198 
2199 struct mlxsw_sp_neigh_entry *
2200 mlxsw_sp_rif_neigh_next(struct mlxsw_sp_rif *rif,
2201 			struct mlxsw_sp_neigh_entry *neigh_entry)
2202 {
2203 	if (!neigh_entry) {
2204 		if (list_empty(&rif->neigh_list))
2205 			return NULL;
2206 		else
2207 			return list_first_entry(&rif->neigh_list,
2208 						typeof(*neigh_entry),
2209 						rif_list_node);
2210 	}
2211 	if (list_is_last(&neigh_entry->rif_list_node, &rif->neigh_list))
2212 		return NULL;
2213 	return list_next_entry(neigh_entry, rif_list_node);
2214 }
2215 
2216 int mlxsw_sp_neigh_entry_type(struct mlxsw_sp_neigh_entry *neigh_entry)
2217 {
2218 	return neigh_entry->key.n->tbl->family;
2219 }
2220 
2221 unsigned char *
2222 mlxsw_sp_neigh_entry_ha(struct mlxsw_sp_neigh_entry *neigh_entry)
2223 {
2224 	return neigh_entry->ha;
2225 }
2226 
2227 u32 mlxsw_sp_neigh4_entry_dip(struct mlxsw_sp_neigh_entry *neigh_entry)
2228 {
2229 	struct neighbour *n;
2230 
2231 	n = neigh_entry->key.n;
2232 	return ntohl(*((__be32 *) n->primary_key));
2233 }
2234 
2235 struct in6_addr *
2236 mlxsw_sp_neigh6_entry_dip(struct mlxsw_sp_neigh_entry *neigh_entry)
2237 {
2238 	struct neighbour *n;
2239 
2240 	n = neigh_entry->key.n;
2241 	return (struct in6_addr *) &n->primary_key;
2242 }
2243 
2244 int mlxsw_sp_neigh_counter_get(struct mlxsw_sp *mlxsw_sp,
2245 			       struct mlxsw_sp_neigh_entry *neigh_entry,
2246 			       u64 *p_counter)
2247 {
2248 	if (!neigh_entry->counter_valid)
2249 		return -EINVAL;
2250 
2251 	return mlxsw_sp_flow_counter_get(mlxsw_sp, neigh_entry->counter_index,
2252 					 p_counter, NULL);
2253 }
2254 
2255 static struct mlxsw_sp_neigh_entry *
2256 mlxsw_sp_neigh_entry_alloc(struct mlxsw_sp *mlxsw_sp, struct neighbour *n,
2257 			   u16 rif)
2258 {
2259 	struct mlxsw_sp_neigh_entry *neigh_entry;
2260 
2261 	neigh_entry = kzalloc(sizeof(*neigh_entry), GFP_KERNEL);
2262 	if (!neigh_entry)
2263 		return NULL;
2264 
2265 	neigh_entry->key.n = n;
2266 	neigh_entry->rif = rif;
2267 	INIT_LIST_HEAD(&neigh_entry->nexthop_list);
2268 
2269 	return neigh_entry;
2270 }
2271 
2272 static void mlxsw_sp_neigh_entry_free(struct mlxsw_sp_neigh_entry *neigh_entry)
2273 {
2274 	kfree(neigh_entry);
2275 }
2276 
2277 static int
2278 mlxsw_sp_neigh_entry_insert(struct mlxsw_sp *mlxsw_sp,
2279 			    struct mlxsw_sp_neigh_entry *neigh_entry)
2280 {
2281 	return rhashtable_insert_fast(&mlxsw_sp->router->neigh_ht,
2282 				      &neigh_entry->ht_node,
2283 				      mlxsw_sp_neigh_ht_params);
2284 }
2285 
2286 static void
2287 mlxsw_sp_neigh_entry_remove(struct mlxsw_sp *mlxsw_sp,
2288 			    struct mlxsw_sp_neigh_entry *neigh_entry)
2289 {
2290 	rhashtable_remove_fast(&mlxsw_sp->router->neigh_ht,
2291 			       &neigh_entry->ht_node,
2292 			       mlxsw_sp_neigh_ht_params);
2293 }
2294 
2295 static bool
2296 mlxsw_sp_neigh_counter_should_alloc(struct mlxsw_sp *mlxsw_sp,
2297 				    struct mlxsw_sp_neigh_entry *neigh_entry)
2298 {
2299 	struct devlink *devlink;
2300 	const char *table_name;
2301 
2302 	switch (mlxsw_sp_neigh_entry_type(neigh_entry)) {
2303 	case AF_INET:
2304 		table_name = MLXSW_SP_DPIPE_TABLE_NAME_HOST4;
2305 		break;
2306 	case AF_INET6:
2307 		table_name = MLXSW_SP_DPIPE_TABLE_NAME_HOST6;
2308 		break;
2309 	default:
2310 		WARN_ON(1);
2311 		return false;
2312 	}
2313 
2314 	devlink = priv_to_devlink(mlxsw_sp->core);
2315 	return devlink_dpipe_table_counter_enabled(devlink, table_name);
2316 }
2317 
2318 static void
2319 mlxsw_sp_neigh_counter_alloc(struct mlxsw_sp *mlxsw_sp,
2320 			     struct mlxsw_sp_neigh_entry *neigh_entry)
2321 {
2322 	if (!mlxsw_sp_neigh_counter_should_alloc(mlxsw_sp, neigh_entry))
2323 		return;
2324 
2325 	if (mlxsw_sp_flow_counter_alloc(mlxsw_sp, &neigh_entry->counter_index))
2326 		return;
2327 
2328 	neigh_entry->counter_valid = true;
2329 }
2330 
2331 static void
2332 mlxsw_sp_neigh_counter_free(struct mlxsw_sp *mlxsw_sp,
2333 			    struct mlxsw_sp_neigh_entry *neigh_entry)
2334 {
2335 	if (!neigh_entry->counter_valid)
2336 		return;
2337 	mlxsw_sp_flow_counter_free(mlxsw_sp,
2338 				   neigh_entry->counter_index);
2339 	neigh_entry->counter_valid = false;
2340 }
2341 
2342 static struct mlxsw_sp_neigh_entry *
2343 mlxsw_sp_neigh_entry_create(struct mlxsw_sp *mlxsw_sp, struct neighbour *n)
2344 {
2345 	struct mlxsw_sp_neigh_entry *neigh_entry;
2346 	struct mlxsw_sp_rif *rif;
2347 	int err;
2348 
2349 	rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, n->dev);
2350 	if (!rif)
2351 		return ERR_PTR(-EINVAL);
2352 
2353 	neigh_entry = mlxsw_sp_neigh_entry_alloc(mlxsw_sp, n, rif->rif_index);
2354 	if (!neigh_entry)
2355 		return ERR_PTR(-ENOMEM);
2356 
2357 	err = mlxsw_sp_neigh_entry_insert(mlxsw_sp, neigh_entry);
2358 	if (err)
2359 		goto err_neigh_entry_insert;
2360 
2361 	mlxsw_sp_neigh_counter_alloc(mlxsw_sp, neigh_entry);
2362 	atomic_inc(&mlxsw_sp->router->neighs_update.neigh_count);
2363 	list_add(&neigh_entry->rif_list_node, &rif->neigh_list);
2364 
2365 	return neigh_entry;
2366 
2367 err_neigh_entry_insert:
2368 	mlxsw_sp_neigh_entry_free(neigh_entry);
2369 	return ERR_PTR(err);
2370 }
2371 
2372 static void
2373 mlxsw_sp_neigh_entry_destroy(struct mlxsw_sp *mlxsw_sp,
2374 			     struct mlxsw_sp_neigh_entry *neigh_entry)
2375 {
2376 	list_del(&neigh_entry->rif_list_node);
2377 	atomic_dec(&mlxsw_sp->router->neighs_update.neigh_count);
2378 	mlxsw_sp_neigh_counter_free(mlxsw_sp, neigh_entry);
2379 	mlxsw_sp_neigh_entry_remove(mlxsw_sp, neigh_entry);
2380 	mlxsw_sp_neigh_entry_free(neigh_entry);
2381 }
2382 
2383 static struct mlxsw_sp_neigh_entry *
2384 mlxsw_sp_neigh_entry_lookup(struct mlxsw_sp *mlxsw_sp, struct neighbour *n)
2385 {
2386 	struct mlxsw_sp_neigh_key key;
2387 
2388 	key.n = n;
2389 	return rhashtable_lookup_fast(&mlxsw_sp->router->neigh_ht,
2390 				      &key, mlxsw_sp_neigh_ht_params);
2391 }
2392 
2393 static void
2394 mlxsw_sp_router_neighs_update_interval_init(struct mlxsw_sp *mlxsw_sp)
2395 {
2396 	unsigned long interval;
2397 
2398 #if IS_ENABLED(CONFIG_IPV6)
2399 	interval = min_t(unsigned long,
2400 			 NEIGH_VAR(&arp_tbl.parms, DELAY_PROBE_TIME),
2401 			 NEIGH_VAR(&nd_tbl.parms, DELAY_PROBE_TIME));
2402 #else
2403 	interval = NEIGH_VAR(&arp_tbl.parms, DELAY_PROBE_TIME);
2404 #endif
2405 	mlxsw_sp->router->neighs_update.interval = jiffies_to_msecs(interval);
2406 }
2407 
2408 static void mlxsw_sp_router_neigh_ent_ipv4_process(struct mlxsw_sp *mlxsw_sp,
2409 						   char *rauhtd_pl,
2410 						   int ent_index)
2411 {
2412 	u64 max_rifs = MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_RIFS);
2413 	struct net_device *dev;
2414 	struct neighbour *n;
2415 	__be32 dipn;
2416 	u32 dip;
2417 	u16 rif;
2418 
2419 	mlxsw_reg_rauhtd_ent_ipv4_unpack(rauhtd_pl, ent_index, &rif, &dip);
2420 
2421 	if (WARN_ON_ONCE(rif >= max_rifs))
2422 		return;
2423 	if (!mlxsw_sp->router->rifs[rif]) {
2424 		dev_err_ratelimited(mlxsw_sp->bus_info->dev, "Incorrect RIF in neighbour entry\n");
2425 		return;
2426 	}
2427 
2428 	dipn = htonl(dip);
2429 	dev = mlxsw_sp_rif_dev(mlxsw_sp->router->rifs[rif]);
2430 	n = neigh_lookup(&arp_tbl, &dipn, dev);
2431 	if (!n)
2432 		return;
2433 
2434 	netdev_dbg(dev, "Updating neighbour with IP=%pI4h\n", &dip);
2435 	neigh_event_send(n, NULL);
2436 	neigh_release(n);
2437 }
2438 
2439 #if IS_ENABLED(CONFIG_IPV6)
2440 static void mlxsw_sp_router_neigh_ent_ipv6_process(struct mlxsw_sp *mlxsw_sp,
2441 						   char *rauhtd_pl,
2442 						   int rec_index)
2443 {
2444 	struct net_device *dev;
2445 	struct neighbour *n;
2446 	struct in6_addr dip;
2447 	u16 rif;
2448 
2449 	mlxsw_reg_rauhtd_ent_ipv6_unpack(rauhtd_pl, rec_index, &rif,
2450 					 (char *) &dip);
2451 
2452 	if (!mlxsw_sp->router->rifs[rif]) {
2453 		dev_err_ratelimited(mlxsw_sp->bus_info->dev, "Incorrect RIF in neighbour entry\n");
2454 		return;
2455 	}
2456 
2457 	dev = mlxsw_sp_rif_dev(mlxsw_sp->router->rifs[rif]);
2458 	n = neigh_lookup(&nd_tbl, &dip, dev);
2459 	if (!n)
2460 		return;
2461 
2462 	netdev_dbg(dev, "Updating neighbour with IP=%pI6c\n", &dip);
2463 	neigh_event_send(n, NULL);
2464 	neigh_release(n);
2465 }
2466 #else
2467 static void mlxsw_sp_router_neigh_ent_ipv6_process(struct mlxsw_sp *mlxsw_sp,
2468 						   char *rauhtd_pl,
2469 						   int rec_index)
2470 {
2471 }
2472 #endif
2473 
2474 static void mlxsw_sp_router_neigh_rec_ipv4_process(struct mlxsw_sp *mlxsw_sp,
2475 						   char *rauhtd_pl,
2476 						   int rec_index)
2477 {
2478 	u8 num_entries;
2479 	int i;
2480 
2481 	num_entries = mlxsw_reg_rauhtd_ipv4_rec_num_entries_get(rauhtd_pl,
2482 								rec_index);
2483 	/* Hardware starts counting at 0, so add 1. */
2484 	num_entries++;
2485 
2486 	/* Each record consists of several neighbour entries. */
2487 	for (i = 0; i < num_entries; i++) {
2488 		int ent_index;
2489 
2490 		ent_index = rec_index * MLXSW_REG_RAUHTD_IPV4_ENT_PER_REC + i;
2491 		mlxsw_sp_router_neigh_ent_ipv4_process(mlxsw_sp, rauhtd_pl,
2492 						       ent_index);
2493 	}
2494 
2495 }
2496 
2497 static void mlxsw_sp_router_neigh_rec_ipv6_process(struct mlxsw_sp *mlxsw_sp,
2498 						   char *rauhtd_pl,
2499 						   int rec_index)
2500 {
2501 	/* One record contains one entry. */
2502 	mlxsw_sp_router_neigh_ent_ipv6_process(mlxsw_sp, rauhtd_pl,
2503 					       rec_index);
2504 }
2505 
2506 static void mlxsw_sp_router_neigh_rec_process(struct mlxsw_sp *mlxsw_sp,
2507 					      char *rauhtd_pl, int rec_index)
2508 {
2509 	switch (mlxsw_reg_rauhtd_rec_type_get(rauhtd_pl, rec_index)) {
2510 	case MLXSW_REG_RAUHTD_TYPE_IPV4:
2511 		mlxsw_sp_router_neigh_rec_ipv4_process(mlxsw_sp, rauhtd_pl,
2512 						       rec_index);
2513 		break;
2514 	case MLXSW_REG_RAUHTD_TYPE_IPV6:
2515 		mlxsw_sp_router_neigh_rec_ipv6_process(mlxsw_sp, rauhtd_pl,
2516 						       rec_index);
2517 		break;
2518 	}
2519 }
2520 
2521 static bool mlxsw_sp_router_rauhtd_is_full(char *rauhtd_pl)
2522 {
2523 	u8 num_rec, last_rec_index, num_entries;
2524 
2525 	num_rec = mlxsw_reg_rauhtd_num_rec_get(rauhtd_pl);
2526 	last_rec_index = num_rec - 1;
2527 
2528 	if (num_rec < MLXSW_REG_RAUHTD_REC_MAX_NUM)
2529 		return false;
2530 	if (mlxsw_reg_rauhtd_rec_type_get(rauhtd_pl, last_rec_index) ==
2531 	    MLXSW_REG_RAUHTD_TYPE_IPV6)
2532 		return true;
2533 
2534 	num_entries = mlxsw_reg_rauhtd_ipv4_rec_num_entries_get(rauhtd_pl,
2535 								last_rec_index);
2536 	if (++num_entries == MLXSW_REG_RAUHTD_IPV4_ENT_PER_REC)
2537 		return true;
2538 	return false;
2539 }
2540 
2541 static int
2542 __mlxsw_sp_router_neighs_update_rauhtd(struct mlxsw_sp *mlxsw_sp,
2543 				       char *rauhtd_pl,
2544 				       enum mlxsw_reg_rauhtd_type type)
2545 {
2546 	int i, num_rec;
2547 	int err;
2548 
2549 	/* Ensure the RIF we read from the device does not change mid-dump. */
2550 	mutex_lock(&mlxsw_sp->router->lock);
2551 	do {
2552 		mlxsw_reg_rauhtd_pack(rauhtd_pl, type);
2553 		err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(rauhtd),
2554 				      rauhtd_pl);
2555 		if (err) {
2556 			dev_err_ratelimited(mlxsw_sp->bus_info->dev, "Failed to dump neighbour table\n");
2557 			break;
2558 		}
2559 		num_rec = mlxsw_reg_rauhtd_num_rec_get(rauhtd_pl);
2560 		for (i = 0; i < num_rec; i++)
2561 			mlxsw_sp_router_neigh_rec_process(mlxsw_sp, rauhtd_pl,
2562 							  i);
2563 	} while (mlxsw_sp_router_rauhtd_is_full(rauhtd_pl));
2564 	mutex_unlock(&mlxsw_sp->router->lock);
2565 
2566 	return err;
2567 }
2568 
2569 static int mlxsw_sp_router_neighs_update_rauhtd(struct mlxsw_sp *mlxsw_sp)
2570 {
2571 	enum mlxsw_reg_rauhtd_type type;
2572 	char *rauhtd_pl;
2573 	int err;
2574 
2575 	if (!atomic_read(&mlxsw_sp->router->neighs_update.neigh_count))
2576 		return 0;
2577 
2578 	rauhtd_pl = kmalloc(MLXSW_REG_RAUHTD_LEN, GFP_KERNEL);
2579 	if (!rauhtd_pl)
2580 		return -ENOMEM;
2581 
2582 	type = MLXSW_REG_RAUHTD_TYPE_IPV4;
2583 	err = __mlxsw_sp_router_neighs_update_rauhtd(mlxsw_sp, rauhtd_pl, type);
2584 	if (err)
2585 		goto out;
2586 
2587 	type = MLXSW_REG_RAUHTD_TYPE_IPV6;
2588 	err = __mlxsw_sp_router_neighs_update_rauhtd(mlxsw_sp, rauhtd_pl, type);
2589 out:
2590 	kfree(rauhtd_pl);
2591 	return err;
2592 }
2593 
2594 static void mlxsw_sp_router_neighs_update_nh(struct mlxsw_sp *mlxsw_sp)
2595 {
2596 	struct mlxsw_sp_neigh_entry *neigh_entry;
2597 
2598 	mutex_lock(&mlxsw_sp->router->lock);
2599 	list_for_each_entry(neigh_entry, &mlxsw_sp->router->nexthop_neighs_list,
2600 			    nexthop_neighs_list_node)
2601 		/* If this neigh have nexthops, make the kernel think this neigh
2602 		 * is active regardless of the traffic.
2603 		 */
2604 		neigh_event_send(neigh_entry->key.n, NULL);
2605 	mutex_unlock(&mlxsw_sp->router->lock);
2606 }
2607 
2608 static void
2609 mlxsw_sp_router_neighs_update_work_schedule(struct mlxsw_sp *mlxsw_sp)
2610 {
2611 	unsigned long interval = mlxsw_sp->router->neighs_update.interval;
2612 
2613 	mlxsw_core_schedule_dw(&mlxsw_sp->router->neighs_update.dw,
2614 			       msecs_to_jiffies(interval));
2615 }
2616 
2617 static void mlxsw_sp_router_neighs_update_work(struct work_struct *work)
2618 {
2619 	struct mlxsw_sp_router *router;
2620 	int err;
2621 
2622 	router = container_of(work, struct mlxsw_sp_router,
2623 			      neighs_update.dw.work);
2624 	err = mlxsw_sp_router_neighs_update_rauhtd(router->mlxsw_sp);
2625 	if (err)
2626 		dev_err(router->mlxsw_sp->bus_info->dev, "Could not update kernel for neigh activity");
2627 
2628 	mlxsw_sp_router_neighs_update_nh(router->mlxsw_sp);
2629 
2630 	mlxsw_sp_router_neighs_update_work_schedule(router->mlxsw_sp);
2631 }
2632 
2633 static void mlxsw_sp_router_probe_unresolved_nexthops(struct work_struct *work)
2634 {
2635 	struct mlxsw_sp_neigh_entry *neigh_entry;
2636 	struct mlxsw_sp_router *router;
2637 
2638 	router = container_of(work, struct mlxsw_sp_router,
2639 			      nexthop_probe_dw.work);
2640 	/* Iterate over nexthop neighbours, find those who are unresolved and
2641 	 * send arp on them. This solves the chicken-egg problem when
2642 	 * the nexthop wouldn't get offloaded until the neighbor is resolved
2643 	 * but it wouldn't get resolved ever in case traffic is flowing in HW
2644 	 * using different nexthop.
2645 	 */
2646 	mutex_lock(&router->lock);
2647 	list_for_each_entry(neigh_entry, &router->nexthop_neighs_list,
2648 			    nexthop_neighs_list_node)
2649 		if (!neigh_entry->connected)
2650 			neigh_event_send(neigh_entry->key.n, NULL);
2651 	mutex_unlock(&router->lock);
2652 
2653 	mlxsw_core_schedule_dw(&router->nexthop_probe_dw,
2654 			       MLXSW_SP_UNRESOLVED_NH_PROBE_INTERVAL);
2655 }
2656 
2657 static void
2658 mlxsw_sp_nexthop_neigh_update(struct mlxsw_sp *mlxsw_sp,
2659 			      struct mlxsw_sp_neigh_entry *neigh_entry,
2660 			      bool removing, bool dead);
2661 
2662 static enum mlxsw_reg_rauht_op mlxsw_sp_rauht_op(bool adding)
2663 {
2664 	return adding ? MLXSW_REG_RAUHT_OP_WRITE_ADD :
2665 			MLXSW_REG_RAUHT_OP_WRITE_DELETE;
2666 }
2667 
2668 static int
2669 mlxsw_sp_router_neigh_entry_op4(struct mlxsw_sp *mlxsw_sp,
2670 				struct mlxsw_sp_neigh_entry *neigh_entry,
2671 				enum mlxsw_reg_rauht_op op)
2672 {
2673 	struct neighbour *n = neigh_entry->key.n;
2674 	u32 dip = ntohl(*((__be32 *) n->primary_key));
2675 	char rauht_pl[MLXSW_REG_RAUHT_LEN];
2676 
2677 	mlxsw_reg_rauht_pack4(rauht_pl, op, neigh_entry->rif, neigh_entry->ha,
2678 			      dip);
2679 	if (neigh_entry->counter_valid)
2680 		mlxsw_reg_rauht_pack_counter(rauht_pl,
2681 					     neigh_entry->counter_index);
2682 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rauht), rauht_pl);
2683 }
2684 
2685 static int
2686 mlxsw_sp_router_neigh_entry_op6(struct mlxsw_sp *mlxsw_sp,
2687 				struct mlxsw_sp_neigh_entry *neigh_entry,
2688 				enum mlxsw_reg_rauht_op op)
2689 {
2690 	struct neighbour *n = neigh_entry->key.n;
2691 	char rauht_pl[MLXSW_REG_RAUHT_LEN];
2692 	const char *dip = n->primary_key;
2693 
2694 	mlxsw_reg_rauht_pack6(rauht_pl, op, neigh_entry->rif, neigh_entry->ha,
2695 			      dip);
2696 	if (neigh_entry->counter_valid)
2697 		mlxsw_reg_rauht_pack_counter(rauht_pl,
2698 					     neigh_entry->counter_index);
2699 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rauht), rauht_pl);
2700 }
2701 
2702 bool mlxsw_sp_neigh_ipv6_ignore(struct mlxsw_sp_neigh_entry *neigh_entry)
2703 {
2704 	struct neighbour *n = neigh_entry->key.n;
2705 
2706 	/* Packets with a link-local destination address are trapped
2707 	 * after LPM lookup and never reach the neighbour table, so
2708 	 * there is no need to program such neighbours to the device.
2709 	 */
2710 	if (ipv6_addr_type((struct in6_addr *) &n->primary_key) &
2711 	    IPV6_ADDR_LINKLOCAL)
2712 		return true;
2713 	return false;
2714 }
2715 
2716 static void
2717 mlxsw_sp_neigh_entry_update(struct mlxsw_sp *mlxsw_sp,
2718 			    struct mlxsw_sp_neigh_entry *neigh_entry,
2719 			    bool adding)
2720 {
2721 	enum mlxsw_reg_rauht_op op = mlxsw_sp_rauht_op(adding);
2722 	int err;
2723 
2724 	if (!adding && !neigh_entry->connected)
2725 		return;
2726 	neigh_entry->connected = adding;
2727 	if (neigh_entry->key.n->tbl->family == AF_INET) {
2728 		err = mlxsw_sp_router_neigh_entry_op4(mlxsw_sp, neigh_entry,
2729 						      op);
2730 		if (err)
2731 			return;
2732 	} else if (neigh_entry->key.n->tbl->family == AF_INET6) {
2733 		if (mlxsw_sp_neigh_ipv6_ignore(neigh_entry))
2734 			return;
2735 		err = mlxsw_sp_router_neigh_entry_op6(mlxsw_sp, neigh_entry,
2736 						      op);
2737 		if (err)
2738 			return;
2739 	} else {
2740 		WARN_ON_ONCE(1);
2741 		return;
2742 	}
2743 
2744 	if (adding)
2745 		neigh_entry->key.n->flags |= NTF_OFFLOADED;
2746 	else
2747 		neigh_entry->key.n->flags &= ~NTF_OFFLOADED;
2748 }
2749 
2750 void
2751 mlxsw_sp_neigh_entry_counter_update(struct mlxsw_sp *mlxsw_sp,
2752 				    struct mlxsw_sp_neigh_entry *neigh_entry,
2753 				    bool adding)
2754 {
2755 	if (adding)
2756 		mlxsw_sp_neigh_counter_alloc(mlxsw_sp, neigh_entry);
2757 	else
2758 		mlxsw_sp_neigh_counter_free(mlxsw_sp, neigh_entry);
2759 	mlxsw_sp_neigh_entry_update(mlxsw_sp, neigh_entry, true);
2760 }
2761 
2762 struct mlxsw_sp_netevent_work {
2763 	struct work_struct work;
2764 	struct mlxsw_sp *mlxsw_sp;
2765 	struct neighbour *n;
2766 };
2767 
2768 static void mlxsw_sp_router_neigh_event_work(struct work_struct *work)
2769 {
2770 	struct mlxsw_sp_netevent_work *net_work =
2771 		container_of(work, struct mlxsw_sp_netevent_work, work);
2772 	struct mlxsw_sp *mlxsw_sp = net_work->mlxsw_sp;
2773 	struct mlxsw_sp_neigh_entry *neigh_entry;
2774 	struct neighbour *n = net_work->n;
2775 	unsigned char ha[ETH_ALEN];
2776 	bool entry_connected;
2777 	u8 nud_state, dead;
2778 
2779 	/* If these parameters are changed after we release the lock,
2780 	 * then we are guaranteed to receive another event letting us
2781 	 * know about it.
2782 	 */
2783 	read_lock_bh(&n->lock);
2784 	memcpy(ha, n->ha, ETH_ALEN);
2785 	nud_state = n->nud_state;
2786 	dead = n->dead;
2787 	read_unlock_bh(&n->lock);
2788 
2789 	mutex_lock(&mlxsw_sp->router->lock);
2790 	mlxsw_sp_span_respin(mlxsw_sp);
2791 
2792 	entry_connected = nud_state & NUD_VALID && !dead;
2793 	neigh_entry = mlxsw_sp_neigh_entry_lookup(mlxsw_sp, n);
2794 	if (!entry_connected && !neigh_entry)
2795 		goto out;
2796 	if (!neigh_entry) {
2797 		neigh_entry = mlxsw_sp_neigh_entry_create(mlxsw_sp, n);
2798 		if (IS_ERR(neigh_entry))
2799 			goto out;
2800 	}
2801 
2802 	if (neigh_entry->connected && entry_connected &&
2803 	    !memcmp(neigh_entry->ha, ha, ETH_ALEN))
2804 		goto out;
2805 
2806 	memcpy(neigh_entry->ha, ha, ETH_ALEN);
2807 	mlxsw_sp_neigh_entry_update(mlxsw_sp, neigh_entry, entry_connected);
2808 	mlxsw_sp_nexthop_neigh_update(mlxsw_sp, neigh_entry, !entry_connected,
2809 				      dead);
2810 
2811 	if (!neigh_entry->connected && list_empty(&neigh_entry->nexthop_list))
2812 		mlxsw_sp_neigh_entry_destroy(mlxsw_sp, neigh_entry);
2813 
2814 out:
2815 	mutex_unlock(&mlxsw_sp->router->lock);
2816 	neigh_release(n);
2817 	kfree(net_work);
2818 }
2819 
2820 static int mlxsw_sp_mp_hash_init(struct mlxsw_sp *mlxsw_sp);
2821 
2822 static void mlxsw_sp_router_mp_hash_event_work(struct work_struct *work)
2823 {
2824 	struct mlxsw_sp_netevent_work *net_work =
2825 		container_of(work, struct mlxsw_sp_netevent_work, work);
2826 	struct mlxsw_sp *mlxsw_sp = net_work->mlxsw_sp;
2827 
2828 	mlxsw_sp_mp_hash_init(mlxsw_sp);
2829 	kfree(net_work);
2830 }
2831 
2832 static int __mlxsw_sp_router_init(struct mlxsw_sp *mlxsw_sp);
2833 
2834 static void mlxsw_sp_router_update_priority_work(struct work_struct *work)
2835 {
2836 	struct mlxsw_sp_netevent_work *net_work =
2837 		container_of(work, struct mlxsw_sp_netevent_work, work);
2838 	struct mlxsw_sp *mlxsw_sp = net_work->mlxsw_sp;
2839 
2840 	__mlxsw_sp_router_init(mlxsw_sp);
2841 	kfree(net_work);
2842 }
2843 
2844 static int mlxsw_sp_router_schedule_work(struct net *net,
2845 					 struct mlxsw_sp_router *router,
2846 					 struct neighbour *n,
2847 					 void (*cb)(struct work_struct *))
2848 {
2849 	struct mlxsw_sp_netevent_work *net_work;
2850 
2851 	if (!net_eq(net, mlxsw_sp_net(router->mlxsw_sp)))
2852 		return NOTIFY_DONE;
2853 
2854 	net_work = kzalloc(sizeof(*net_work), GFP_ATOMIC);
2855 	if (!net_work)
2856 		return NOTIFY_BAD;
2857 
2858 	INIT_WORK(&net_work->work, cb);
2859 	net_work->mlxsw_sp = router->mlxsw_sp;
2860 	net_work->n = n;
2861 	mlxsw_core_schedule_work(&net_work->work);
2862 	return NOTIFY_DONE;
2863 }
2864 
2865 static bool mlxsw_sp_dev_lower_is_port(struct net_device *dev)
2866 {
2867 	struct mlxsw_sp_port *mlxsw_sp_port;
2868 
2869 	rcu_read_lock();
2870 	mlxsw_sp_port = mlxsw_sp_port_dev_lower_find_rcu(dev);
2871 	rcu_read_unlock();
2872 	return !!mlxsw_sp_port;
2873 }
2874 
2875 static int mlxsw_sp_router_schedule_neigh_work(struct mlxsw_sp_router *router,
2876 					       struct neighbour *n)
2877 {
2878 	struct net *net;
2879 
2880 	net = neigh_parms_net(n->parms);
2881 
2882 	/* Take a reference to ensure the neighbour won't be destructed until we
2883 	 * drop the reference in delayed work.
2884 	 */
2885 	neigh_clone(n);
2886 	return mlxsw_sp_router_schedule_work(net, router, n,
2887 					     mlxsw_sp_router_neigh_event_work);
2888 }
2889 
2890 static int mlxsw_sp_router_netevent_event(struct notifier_block *nb,
2891 					  unsigned long event, void *ptr)
2892 {
2893 	struct mlxsw_sp_router *router;
2894 	unsigned long interval;
2895 	struct neigh_parms *p;
2896 	struct neighbour *n;
2897 
2898 	router = container_of(nb, struct mlxsw_sp_router, netevent_nb);
2899 
2900 	switch (event) {
2901 	case NETEVENT_DELAY_PROBE_TIME_UPDATE:
2902 		p = ptr;
2903 
2904 		/* We don't care about changes in the default table. */
2905 		if (!p->dev || (p->tbl->family != AF_INET &&
2906 				p->tbl->family != AF_INET6))
2907 			return NOTIFY_DONE;
2908 
2909 		/* We are in atomic context and can't take RTNL mutex,
2910 		 * so use RCU variant to walk the device chain.
2911 		 */
2912 		if (!mlxsw_sp_dev_lower_is_port(p->dev))
2913 			return NOTIFY_DONE;
2914 
2915 		interval = jiffies_to_msecs(NEIGH_VAR(p, DELAY_PROBE_TIME));
2916 		router->neighs_update.interval = interval;
2917 		break;
2918 	case NETEVENT_NEIGH_UPDATE:
2919 		n = ptr;
2920 
2921 		if (n->tbl->family != AF_INET && n->tbl->family != AF_INET6)
2922 			return NOTIFY_DONE;
2923 
2924 		if (!mlxsw_sp_dev_lower_is_port(n->dev))
2925 			return NOTIFY_DONE;
2926 
2927 		return mlxsw_sp_router_schedule_neigh_work(router, n);
2928 
2929 	case NETEVENT_IPV4_MPATH_HASH_UPDATE:
2930 	case NETEVENT_IPV6_MPATH_HASH_UPDATE:
2931 		return mlxsw_sp_router_schedule_work(ptr, router, NULL,
2932 				mlxsw_sp_router_mp_hash_event_work);
2933 
2934 	case NETEVENT_IPV4_FWD_UPDATE_PRIORITY_UPDATE:
2935 		return mlxsw_sp_router_schedule_work(ptr, router, NULL,
2936 				mlxsw_sp_router_update_priority_work);
2937 	}
2938 
2939 	return NOTIFY_DONE;
2940 }
2941 
2942 static int mlxsw_sp_neigh_init(struct mlxsw_sp *mlxsw_sp)
2943 {
2944 	int err;
2945 
2946 	err = rhashtable_init(&mlxsw_sp->router->neigh_ht,
2947 			      &mlxsw_sp_neigh_ht_params);
2948 	if (err)
2949 		return err;
2950 
2951 	/* Initialize the polling interval according to the default
2952 	 * table.
2953 	 */
2954 	mlxsw_sp_router_neighs_update_interval_init(mlxsw_sp);
2955 
2956 	/* Create the delayed works for the activity_update */
2957 	INIT_DELAYED_WORK(&mlxsw_sp->router->neighs_update.dw,
2958 			  mlxsw_sp_router_neighs_update_work);
2959 	INIT_DELAYED_WORK(&mlxsw_sp->router->nexthop_probe_dw,
2960 			  mlxsw_sp_router_probe_unresolved_nexthops);
2961 	atomic_set(&mlxsw_sp->router->neighs_update.neigh_count, 0);
2962 	mlxsw_core_schedule_dw(&mlxsw_sp->router->neighs_update.dw, 0);
2963 	mlxsw_core_schedule_dw(&mlxsw_sp->router->nexthop_probe_dw, 0);
2964 	return 0;
2965 }
2966 
2967 static void mlxsw_sp_neigh_fini(struct mlxsw_sp *mlxsw_sp)
2968 {
2969 	cancel_delayed_work_sync(&mlxsw_sp->router->neighs_update.dw);
2970 	cancel_delayed_work_sync(&mlxsw_sp->router->nexthop_probe_dw);
2971 	rhashtable_destroy(&mlxsw_sp->router->neigh_ht);
2972 }
2973 
2974 static void mlxsw_sp_neigh_rif_gone_sync(struct mlxsw_sp *mlxsw_sp,
2975 					 struct mlxsw_sp_rif *rif)
2976 {
2977 	struct mlxsw_sp_neigh_entry *neigh_entry, *tmp;
2978 
2979 	list_for_each_entry_safe(neigh_entry, tmp, &rif->neigh_list,
2980 				 rif_list_node) {
2981 		mlxsw_sp_neigh_entry_update(mlxsw_sp, neigh_entry, false);
2982 		mlxsw_sp_neigh_entry_destroy(mlxsw_sp, neigh_entry);
2983 	}
2984 }
2985 
2986 struct mlxsw_sp_neigh_rif_made_sync {
2987 	struct mlxsw_sp *mlxsw_sp;
2988 	struct mlxsw_sp_rif *rif;
2989 	int err;
2990 };
2991 
2992 static void mlxsw_sp_neigh_rif_made_sync_each(struct neighbour *n, void *data)
2993 {
2994 	struct mlxsw_sp_neigh_rif_made_sync *rms = data;
2995 	int rc;
2996 
2997 	if (rms->err)
2998 		return;
2999 	if (n->dev != mlxsw_sp_rif_dev(rms->rif))
3000 		return;
3001 	rc = mlxsw_sp_router_schedule_neigh_work(rms->mlxsw_sp->router, n);
3002 	if (rc != NOTIFY_DONE)
3003 		rms->err = -ENOMEM;
3004 }
3005 
3006 static int mlxsw_sp_neigh_rif_made_sync(struct mlxsw_sp *mlxsw_sp,
3007 					struct mlxsw_sp_rif *rif)
3008 {
3009 	struct mlxsw_sp_neigh_rif_made_sync rms = {
3010 		.mlxsw_sp = mlxsw_sp,
3011 		.rif = rif,
3012 	};
3013 
3014 	neigh_for_each(&arp_tbl, mlxsw_sp_neigh_rif_made_sync_each, &rms);
3015 	if (rms.err)
3016 		goto err_arp;
3017 
3018 #if IS_ENABLED(CONFIG_IPV6)
3019 	neigh_for_each(&nd_tbl, mlxsw_sp_neigh_rif_made_sync_each, &rms);
3020 #endif
3021 	if (rms.err)
3022 		goto err_nd;
3023 
3024 	return 0;
3025 
3026 err_nd:
3027 err_arp:
3028 	mlxsw_sp_neigh_rif_gone_sync(mlxsw_sp, rif);
3029 	return rms.err;
3030 }
3031 
3032 enum mlxsw_sp_nexthop_type {
3033 	MLXSW_SP_NEXTHOP_TYPE_ETH,
3034 	MLXSW_SP_NEXTHOP_TYPE_IPIP,
3035 };
3036 
3037 enum mlxsw_sp_nexthop_action {
3038 	/* Nexthop forwards packets to an egress RIF */
3039 	MLXSW_SP_NEXTHOP_ACTION_FORWARD,
3040 	/* Nexthop discards packets */
3041 	MLXSW_SP_NEXTHOP_ACTION_DISCARD,
3042 	/* Nexthop traps packets */
3043 	MLXSW_SP_NEXTHOP_ACTION_TRAP,
3044 };
3045 
3046 struct mlxsw_sp_nexthop_key {
3047 	struct fib_nh *fib_nh;
3048 };
3049 
3050 struct mlxsw_sp_nexthop {
3051 	struct list_head neigh_list_node; /* member of neigh entry list */
3052 	struct list_head crif_list_node;
3053 	struct list_head router_list_node;
3054 	struct mlxsw_sp_nexthop_group_info *nhgi; /* pointer back to the group
3055 						   * this nexthop belongs to
3056 						   */
3057 	struct rhash_head ht_node;
3058 	struct neigh_table *neigh_tbl;
3059 	struct mlxsw_sp_nexthop_key key;
3060 	unsigned char gw_addr[sizeof(struct in6_addr)];
3061 	int ifindex;
3062 	int nh_weight;
3063 	int norm_nh_weight;
3064 	int num_adj_entries;
3065 	struct mlxsw_sp_crif *crif;
3066 	u8 should_offload:1, /* set indicates this nexthop should be written
3067 			      * to the adjacency table.
3068 			      */
3069 	   offloaded:1, /* set indicates this nexthop was written to the
3070 			 * adjacency table.
3071 			 */
3072 	   update:1; /* set indicates this nexthop should be updated in the
3073 		      * adjacency table (f.e., its MAC changed).
3074 		      */
3075 	enum mlxsw_sp_nexthop_action action;
3076 	enum mlxsw_sp_nexthop_type type;
3077 	union {
3078 		struct mlxsw_sp_neigh_entry *neigh_entry;
3079 		struct mlxsw_sp_ipip_entry *ipip_entry;
3080 	};
3081 	unsigned int counter_index;
3082 	bool counter_valid;
3083 };
3084 
3085 static struct net_device *
3086 mlxsw_sp_nexthop_dev(const struct mlxsw_sp_nexthop *nh)
3087 {
3088 	if (!nh->crif)
3089 		return NULL;
3090 	return nh->crif->key.dev;
3091 }
3092 
3093 enum mlxsw_sp_nexthop_group_type {
3094 	MLXSW_SP_NEXTHOP_GROUP_TYPE_IPV4,
3095 	MLXSW_SP_NEXTHOP_GROUP_TYPE_IPV6,
3096 	MLXSW_SP_NEXTHOP_GROUP_TYPE_OBJ,
3097 };
3098 
3099 struct mlxsw_sp_nexthop_group_info {
3100 	struct mlxsw_sp_nexthop_group *nh_grp;
3101 	u32 adj_index;
3102 	u16 ecmp_size;
3103 	u16 count;
3104 	int sum_norm_weight;
3105 	u8 adj_index_valid:1,
3106 	   gateway:1, /* routes using the group use a gateway */
3107 	   is_resilient:1;
3108 	struct list_head list; /* member in nh_res_grp_list */
3109 	struct mlxsw_sp_nexthop nexthops[];
3110 };
3111 
3112 static struct mlxsw_sp_rif *
3113 mlxsw_sp_nhgi_rif(const struct mlxsw_sp_nexthop_group_info *nhgi)
3114 {
3115 	struct mlxsw_sp_crif *crif = nhgi->nexthops[0].crif;
3116 
3117 	if (!crif)
3118 		return NULL;
3119 	return crif->rif;
3120 }
3121 
3122 struct mlxsw_sp_nexthop_group_vr_key {
3123 	u16 vr_id;
3124 	enum mlxsw_sp_l3proto proto;
3125 };
3126 
3127 struct mlxsw_sp_nexthop_group_vr_entry {
3128 	struct list_head list; /* member in vr_list */
3129 	struct rhash_head ht_node; /* member in vr_ht */
3130 	refcount_t ref_count;
3131 	struct mlxsw_sp_nexthop_group_vr_key key;
3132 };
3133 
3134 struct mlxsw_sp_nexthop_group {
3135 	struct rhash_head ht_node;
3136 	struct list_head fib_list; /* list of fib entries that use this group */
3137 	union {
3138 		struct {
3139 			struct fib_info *fi;
3140 		} ipv4;
3141 		struct {
3142 			u32 id;
3143 		} obj;
3144 	};
3145 	struct mlxsw_sp_nexthop_group_info *nhgi;
3146 	struct list_head vr_list;
3147 	struct rhashtable vr_ht;
3148 	enum mlxsw_sp_nexthop_group_type type;
3149 	bool can_destroy;
3150 };
3151 
3152 void mlxsw_sp_nexthop_counter_alloc(struct mlxsw_sp *mlxsw_sp,
3153 				    struct mlxsw_sp_nexthop *nh)
3154 {
3155 	struct devlink *devlink;
3156 
3157 	devlink = priv_to_devlink(mlxsw_sp->core);
3158 	if (!devlink_dpipe_table_counter_enabled(devlink,
3159 						 MLXSW_SP_DPIPE_TABLE_NAME_ADJ))
3160 		return;
3161 
3162 	if (mlxsw_sp_flow_counter_alloc(mlxsw_sp, &nh->counter_index))
3163 		return;
3164 
3165 	nh->counter_valid = true;
3166 }
3167 
3168 void mlxsw_sp_nexthop_counter_free(struct mlxsw_sp *mlxsw_sp,
3169 				   struct mlxsw_sp_nexthop *nh)
3170 {
3171 	if (!nh->counter_valid)
3172 		return;
3173 	mlxsw_sp_flow_counter_free(mlxsw_sp, nh->counter_index);
3174 	nh->counter_valid = false;
3175 }
3176 
3177 int mlxsw_sp_nexthop_counter_get(struct mlxsw_sp *mlxsw_sp,
3178 				 struct mlxsw_sp_nexthop *nh, u64 *p_counter)
3179 {
3180 	if (!nh->counter_valid)
3181 		return -EINVAL;
3182 
3183 	return mlxsw_sp_flow_counter_get(mlxsw_sp, nh->counter_index,
3184 					 p_counter, NULL);
3185 }
3186 
3187 struct mlxsw_sp_nexthop *mlxsw_sp_nexthop_next(struct mlxsw_sp_router *router,
3188 					       struct mlxsw_sp_nexthop *nh)
3189 {
3190 	if (!nh) {
3191 		if (list_empty(&router->nexthop_list))
3192 			return NULL;
3193 		else
3194 			return list_first_entry(&router->nexthop_list,
3195 						typeof(*nh), router_list_node);
3196 	}
3197 	if (list_is_last(&nh->router_list_node, &router->nexthop_list))
3198 		return NULL;
3199 	return list_next_entry(nh, router_list_node);
3200 }
3201 
3202 bool mlxsw_sp_nexthop_is_forward(const struct mlxsw_sp_nexthop *nh)
3203 {
3204 	return nh->offloaded && nh->action == MLXSW_SP_NEXTHOP_ACTION_FORWARD;
3205 }
3206 
3207 unsigned char *mlxsw_sp_nexthop_ha(struct mlxsw_sp_nexthop *nh)
3208 {
3209 	if (nh->type != MLXSW_SP_NEXTHOP_TYPE_ETH ||
3210 	    !mlxsw_sp_nexthop_is_forward(nh))
3211 		return NULL;
3212 	return nh->neigh_entry->ha;
3213 }
3214 
3215 int mlxsw_sp_nexthop_indexes(struct mlxsw_sp_nexthop *nh, u32 *p_adj_index,
3216 			     u32 *p_adj_size, u32 *p_adj_hash_index)
3217 {
3218 	struct mlxsw_sp_nexthop_group_info *nhgi = nh->nhgi;
3219 	u32 adj_hash_index = 0;
3220 	int i;
3221 
3222 	if (!nh->offloaded || !nhgi->adj_index_valid)
3223 		return -EINVAL;
3224 
3225 	*p_adj_index = nhgi->adj_index;
3226 	*p_adj_size = nhgi->ecmp_size;
3227 
3228 	for (i = 0; i < nhgi->count; i++) {
3229 		struct mlxsw_sp_nexthop *nh_iter = &nhgi->nexthops[i];
3230 
3231 		if (nh_iter == nh)
3232 			break;
3233 		if (nh_iter->offloaded)
3234 			adj_hash_index += nh_iter->num_adj_entries;
3235 	}
3236 
3237 	*p_adj_hash_index = adj_hash_index;
3238 	return 0;
3239 }
3240 
3241 struct mlxsw_sp_rif *mlxsw_sp_nexthop_rif(struct mlxsw_sp_nexthop *nh)
3242 {
3243 	if (WARN_ON(!nh->crif))
3244 		return NULL;
3245 	return nh->crif->rif;
3246 }
3247 
3248 bool mlxsw_sp_nexthop_group_has_ipip(struct mlxsw_sp_nexthop *nh)
3249 {
3250 	struct mlxsw_sp_nexthop_group_info *nhgi = nh->nhgi;
3251 	int i;
3252 
3253 	for (i = 0; i < nhgi->count; i++) {
3254 		struct mlxsw_sp_nexthop *nh_iter = &nhgi->nexthops[i];
3255 
3256 		if (nh_iter->type == MLXSW_SP_NEXTHOP_TYPE_IPIP)
3257 			return true;
3258 	}
3259 	return false;
3260 }
3261 
3262 static const struct rhashtable_params mlxsw_sp_nexthop_group_vr_ht_params = {
3263 	.key_offset = offsetof(struct mlxsw_sp_nexthop_group_vr_entry, key),
3264 	.head_offset = offsetof(struct mlxsw_sp_nexthop_group_vr_entry, ht_node),
3265 	.key_len = sizeof(struct mlxsw_sp_nexthop_group_vr_key),
3266 	.automatic_shrinking = true,
3267 };
3268 
3269 static struct mlxsw_sp_nexthop_group_vr_entry *
3270 mlxsw_sp_nexthop_group_vr_entry_lookup(struct mlxsw_sp_nexthop_group *nh_grp,
3271 				       const struct mlxsw_sp_fib *fib)
3272 {
3273 	struct mlxsw_sp_nexthop_group_vr_key key;
3274 
3275 	memset(&key, 0, sizeof(key));
3276 	key.vr_id = fib->vr->id;
3277 	key.proto = fib->proto;
3278 	return rhashtable_lookup_fast(&nh_grp->vr_ht, &key,
3279 				      mlxsw_sp_nexthop_group_vr_ht_params);
3280 }
3281 
3282 static int
3283 mlxsw_sp_nexthop_group_vr_entry_create(struct mlxsw_sp_nexthop_group *nh_grp,
3284 				       const struct mlxsw_sp_fib *fib)
3285 {
3286 	struct mlxsw_sp_nexthop_group_vr_entry *vr_entry;
3287 	int err;
3288 
3289 	vr_entry = kzalloc(sizeof(*vr_entry), GFP_KERNEL);
3290 	if (!vr_entry)
3291 		return -ENOMEM;
3292 
3293 	vr_entry->key.vr_id = fib->vr->id;
3294 	vr_entry->key.proto = fib->proto;
3295 	refcount_set(&vr_entry->ref_count, 1);
3296 
3297 	err = rhashtable_insert_fast(&nh_grp->vr_ht, &vr_entry->ht_node,
3298 				     mlxsw_sp_nexthop_group_vr_ht_params);
3299 	if (err)
3300 		goto err_hashtable_insert;
3301 
3302 	list_add(&vr_entry->list, &nh_grp->vr_list);
3303 
3304 	return 0;
3305 
3306 err_hashtable_insert:
3307 	kfree(vr_entry);
3308 	return err;
3309 }
3310 
3311 static void
3312 mlxsw_sp_nexthop_group_vr_entry_destroy(struct mlxsw_sp_nexthop_group *nh_grp,
3313 					struct mlxsw_sp_nexthop_group_vr_entry *vr_entry)
3314 {
3315 	list_del(&vr_entry->list);
3316 	rhashtable_remove_fast(&nh_grp->vr_ht, &vr_entry->ht_node,
3317 			       mlxsw_sp_nexthop_group_vr_ht_params);
3318 	kfree(vr_entry);
3319 }
3320 
3321 static int
3322 mlxsw_sp_nexthop_group_vr_link(struct mlxsw_sp_nexthop_group *nh_grp,
3323 			       const struct mlxsw_sp_fib *fib)
3324 {
3325 	struct mlxsw_sp_nexthop_group_vr_entry *vr_entry;
3326 
3327 	vr_entry = mlxsw_sp_nexthop_group_vr_entry_lookup(nh_grp, fib);
3328 	if (vr_entry) {
3329 		refcount_inc(&vr_entry->ref_count);
3330 		return 0;
3331 	}
3332 
3333 	return mlxsw_sp_nexthop_group_vr_entry_create(nh_grp, fib);
3334 }
3335 
3336 static void
3337 mlxsw_sp_nexthop_group_vr_unlink(struct mlxsw_sp_nexthop_group *nh_grp,
3338 				 const struct mlxsw_sp_fib *fib)
3339 {
3340 	struct mlxsw_sp_nexthop_group_vr_entry *vr_entry;
3341 
3342 	vr_entry = mlxsw_sp_nexthop_group_vr_entry_lookup(nh_grp, fib);
3343 	if (WARN_ON_ONCE(!vr_entry))
3344 		return;
3345 
3346 	if (!refcount_dec_and_test(&vr_entry->ref_count))
3347 		return;
3348 
3349 	mlxsw_sp_nexthop_group_vr_entry_destroy(nh_grp, vr_entry);
3350 }
3351 
3352 struct mlxsw_sp_nexthop_group_cmp_arg {
3353 	enum mlxsw_sp_nexthop_group_type type;
3354 	union {
3355 		struct fib_info *fi;
3356 		struct mlxsw_sp_fib6_entry *fib6_entry;
3357 		u32 id;
3358 	};
3359 };
3360 
3361 static bool
3362 mlxsw_sp_nexthop6_group_has_nexthop(const struct mlxsw_sp_nexthop_group *nh_grp,
3363 				    const struct in6_addr *gw, int ifindex,
3364 				    int weight)
3365 {
3366 	int i;
3367 
3368 	for (i = 0; i < nh_grp->nhgi->count; i++) {
3369 		const struct mlxsw_sp_nexthop *nh;
3370 
3371 		nh = &nh_grp->nhgi->nexthops[i];
3372 		if (nh->ifindex == ifindex && nh->nh_weight == weight &&
3373 		    ipv6_addr_equal(gw, (struct in6_addr *) nh->gw_addr))
3374 			return true;
3375 	}
3376 
3377 	return false;
3378 }
3379 
3380 static bool
3381 mlxsw_sp_nexthop6_group_cmp(const struct mlxsw_sp_nexthop_group *nh_grp,
3382 			    const struct mlxsw_sp_fib6_entry *fib6_entry)
3383 {
3384 	struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
3385 
3386 	if (nh_grp->nhgi->count != fib6_entry->nrt6)
3387 		return false;
3388 
3389 	list_for_each_entry(mlxsw_sp_rt6, &fib6_entry->rt6_list, list) {
3390 		struct fib6_nh *fib6_nh = mlxsw_sp_rt6->rt->fib6_nh;
3391 		struct in6_addr *gw;
3392 		int ifindex, weight;
3393 
3394 		ifindex = fib6_nh->fib_nh_dev->ifindex;
3395 		weight = fib6_nh->fib_nh_weight;
3396 		gw = &fib6_nh->fib_nh_gw6;
3397 		if (!mlxsw_sp_nexthop6_group_has_nexthop(nh_grp, gw, ifindex,
3398 							 weight))
3399 			return false;
3400 	}
3401 
3402 	return true;
3403 }
3404 
3405 static int
3406 mlxsw_sp_nexthop_group_cmp(struct rhashtable_compare_arg *arg, const void *ptr)
3407 {
3408 	const struct mlxsw_sp_nexthop_group_cmp_arg *cmp_arg = arg->key;
3409 	const struct mlxsw_sp_nexthop_group *nh_grp = ptr;
3410 
3411 	if (nh_grp->type != cmp_arg->type)
3412 		return 1;
3413 
3414 	switch (cmp_arg->type) {
3415 	case MLXSW_SP_NEXTHOP_GROUP_TYPE_IPV4:
3416 		return cmp_arg->fi != nh_grp->ipv4.fi;
3417 	case MLXSW_SP_NEXTHOP_GROUP_TYPE_IPV6:
3418 		return !mlxsw_sp_nexthop6_group_cmp(nh_grp,
3419 						    cmp_arg->fib6_entry);
3420 	case MLXSW_SP_NEXTHOP_GROUP_TYPE_OBJ:
3421 		return cmp_arg->id != nh_grp->obj.id;
3422 	default:
3423 		WARN_ON(1);
3424 		return 1;
3425 	}
3426 }
3427 
3428 static u32 mlxsw_sp_nexthop_group_hash_obj(const void *data, u32 len, u32 seed)
3429 {
3430 	const struct mlxsw_sp_nexthop_group *nh_grp = data;
3431 	const struct mlxsw_sp_nexthop *nh;
3432 	struct fib_info *fi;
3433 	unsigned int val;
3434 	int i;
3435 
3436 	switch (nh_grp->type) {
3437 	case MLXSW_SP_NEXTHOP_GROUP_TYPE_IPV4:
3438 		fi = nh_grp->ipv4.fi;
3439 		return jhash(&fi, sizeof(fi), seed);
3440 	case MLXSW_SP_NEXTHOP_GROUP_TYPE_IPV6:
3441 		val = nh_grp->nhgi->count;
3442 		for (i = 0; i < nh_grp->nhgi->count; i++) {
3443 			nh = &nh_grp->nhgi->nexthops[i];
3444 			val ^= jhash(&nh->ifindex, sizeof(nh->ifindex), seed);
3445 			val ^= jhash(&nh->gw_addr, sizeof(nh->gw_addr), seed);
3446 		}
3447 		return jhash(&val, sizeof(val), seed);
3448 	case MLXSW_SP_NEXTHOP_GROUP_TYPE_OBJ:
3449 		return jhash(&nh_grp->obj.id, sizeof(nh_grp->obj.id), seed);
3450 	default:
3451 		WARN_ON(1);
3452 		return 0;
3453 	}
3454 }
3455 
3456 static u32
3457 mlxsw_sp_nexthop6_group_hash(struct mlxsw_sp_fib6_entry *fib6_entry, u32 seed)
3458 {
3459 	unsigned int val = fib6_entry->nrt6;
3460 	struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
3461 
3462 	list_for_each_entry(mlxsw_sp_rt6, &fib6_entry->rt6_list, list) {
3463 		struct fib6_nh *fib6_nh = mlxsw_sp_rt6->rt->fib6_nh;
3464 		struct net_device *dev = fib6_nh->fib_nh_dev;
3465 		struct in6_addr *gw = &fib6_nh->fib_nh_gw6;
3466 
3467 		val ^= jhash(&dev->ifindex, sizeof(dev->ifindex), seed);
3468 		val ^= jhash(gw, sizeof(*gw), seed);
3469 	}
3470 
3471 	return jhash(&val, sizeof(val), seed);
3472 }
3473 
3474 static u32
3475 mlxsw_sp_nexthop_group_hash(const void *data, u32 len, u32 seed)
3476 {
3477 	const struct mlxsw_sp_nexthop_group_cmp_arg *cmp_arg = data;
3478 
3479 	switch (cmp_arg->type) {
3480 	case MLXSW_SP_NEXTHOP_GROUP_TYPE_IPV4:
3481 		return jhash(&cmp_arg->fi, sizeof(cmp_arg->fi), seed);
3482 	case MLXSW_SP_NEXTHOP_GROUP_TYPE_IPV6:
3483 		return mlxsw_sp_nexthop6_group_hash(cmp_arg->fib6_entry, seed);
3484 	case MLXSW_SP_NEXTHOP_GROUP_TYPE_OBJ:
3485 		return jhash(&cmp_arg->id, sizeof(cmp_arg->id), seed);
3486 	default:
3487 		WARN_ON(1);
3488 		return 0;
3489 	}
3490 }
3491 
3492 static const struct rhashtable_params mlxsw_sp_nexthop_group_ht_params = {
3493 	.head_offset = offsetof(struct mlxsw_sp_nexthop_group, ht_node),
3494 	.hashfn	     = mlxsw_sp_nexthop_group_hash,
3495 	.obj_hashfn  = mlxsw_sp_nexthop_group_hash_obj,
3496 	.obj_cmpfn   = mlxsw_sp_nexthop_group_cmp,
3497 };
3498 
3499 static int mlxsw_sp_nexthop_group_insert(struct mlxsw_sp *mlxsw_sp,
3500 					 struct mlxsw_sp_nexthop_group *nh_grp)
3501 {
3502 	if (nh_grp->type == MLXSW_SP_NEXTHOP_GROUP_TYPE_IPV6 &&
3503 	    !nh_grp->nhgi->gateway)
3504 		return 0;
3505 
3506 	return rhashtable_insert_fast(&mlxsw_sp->router->nexthop_group_ht,
3507 				      &nh_grp->ht_node,
3508 				      mlxsw_sp_nexthop_group_ht_params);
3509 }
3510 
3511 static void mlxsw_sp_nexthop_group_remove(struct mlxsw_sp *mlxsw_sp,
3512 					  struct mlxsw_sp_nexthop_group *nh_grp)
3513 {
3514 	if (nh_grp->type == MLXSW_SP_NEXTHOP_GROUP_TYPE_IPV6 &&
3515 	    !nh_grp->nhgi->gateway)
3516 		return;
3517 
3518 	rhashtable_remove_fast(&mlxsw_sp->router->nexthop_group_ht,
3519 			       &nh_grp->ht_node,
3520 			       mlxsw_sp_nexthop_group_ht_params);
3521 }
3522 
3523 static struct mlxsw_sp_nexthop_group *
3524 mlxsw_sp_nexthop4_group_lookup(struct mlxsw_sp *mlxsw_sp,
3525 			       struct fib_info *fi)
3526 {
3527 	struct mlxsw_sp_nexthop_group_cmp_arg cmp_arg;
3528 
3529 	cmp_arg.type = MLXSW_SP_NEXTHOP_GROUP_TYPE_IPV4;
3530 	cmp_arg.fi = fi;
3531 	return rhashtable_lookup_fast(&mlxsw_sp->router->nexthop_group_ht,
3532 				      &cmp_arg,
3533 				      mlxsw_sp_nexthop_group_ht_params);
3534 }
3535 
3536 static struct mlxsw_sp_nexthop_group *
3537 mlxsw_sp_nexthop6_group_lookup(struct mlxsw_sp *mlxsw_sp,
3538 			       struct mlxsw_sp_fib6_entry *fib6_entry)
3539 {
3540 	struct mlxsw_sp_nexthop_group_cmp_arg cmp_arg;
3541 
3542 	cmp_arg.type = MLXSW_SP_NEXTHOP_GROUP_TYPE_IPV6;
3543 	cmp_arg.fib6_entry = fib6_entry;
3544 	return rhashtable_lookup_fast(&mlxsw_sp->router->nexthop_group_ht,
3545 				      &cmp_arg,
3546 				      mlxsw_sp_nexthop_group_ht_params);
3547 }
3548 
3549 static const struct rhashtable_params mlxsw_sp_nexthop_ht_params = {
3550 	.key_offset = offsetof(struct mlxsw_sp_nexthop, key),
3551 	.head_offset = offsetof(struct mlxsw_sp_nexthop, ht_node),
3552 	.key_len = sizeof(struct mlxsw_sp_nexthop_key),
3553 };
3554 
3555 static int mlxsw_sp_nexthop_insert(struct mlxsw_sp *mlxsw_sp,
3556 				   struct mlxsw_sp_nexthop *nh)
3557 {
3558 	return rhashtable_insert_fast(&mlxsw_sp->router->nexthop_ht,
3559 				      &nh->ht_node, mlxsw_sp_nexthop_ht_params);
3560 }
3561 
3562 static void mlxsw_sp_nexthop_remove(struct mlxsw_sp *mlxsw_sp,
3563 				    struct mlxsw_sp_nexthop *nh)
3564 {
3565 	rhashtable_remove_fast(&mlxsw_sp->router->nexthop_ht, &nh->ht_node,
3566 			       mlxsw_sp_nexthop_ht_params);
3567 }
3568 
3569 static struct mlxsw_sp_nexthop *
3570 mlxsw_sp_nexthop_lookup(struct mlxsw_sp *mlxsw_sp,
3571 			struct mlxsw_sp_nexthop_key key)
3572 {
3573 	return rhashtable_lookup_fast(&mlxsw_sp->router->nexthop_ht, &key,
3574 				      mlxsw_sp_nexthop_ht_params);
3575 }
3576 
3577 static int mlxsw_sp_adj_index_mass_update_vr(struct mlxsw_sp *mlxsw_sp,
3578 					     enum mlxsw_sp_l3proto proto,
3579 					     u16 vr_id,
3580 					     u32 adj_index, u16 ecmp_size,
3581 					     u32 new_adj_index,
3582 					     u16 new_ecmp_size)
3583 {
3584 	char raleu_pl[MLXSW_REG_RALEU_LEN];
3585 
3586 	mlxsw_reg_raleu_pack(raleu_pl,
3587 			     (enum mlxsw_reg_ralxx_protocol) proto, vr_id,
3588 			     adj_index, ecmp_size, new_adj_index,
3589 			     new_ecmp_size);
3590 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(raleu), raleu_pl);
3591 }
3592 
3593 static int mlxsw_sp_adj_index_mass_update(struct mlxsw_sp *mlxsw_sp,
3594 					  struct mlxsw_sp_nexthop_group *nh_grp,
3595 					  u32 old_adj_index, u16 old_ecmp_size)
3596 {
3597 	struct mlxsw_sp_nexthop_group_info *nhgi = nh_grp->nhgi;
3598 	struct mlxsw_sp_nexthop_group_vr_entry *vr_entry;
3599 	int err;
3600 
3601 	list_for_each_entry(vr_entry, &nh_grp->vr_list, list) {
3602 		err = mlxsw_sp_adj_index_mass_update_vr(mlxsw_sp,
3603 							vr_entry->key.proto,
3604 							vr_entry->key.vr_id,
3605 							old_adj_index,
3606 							old_ecmp_size,
3607 							nhgi->adj_index,
3608 							nhgi->ecmp_size);
3609 		if (err)
3610 			goto err_mass_update_vr;
3611 	}
3612 	return 0;
3613 
3614 err_mass_update_vr:
3615 	list_for_each_entry_continue_reverse(vr_entry, &nh_grp->vr_list, list)
3616 		mlxsw_sp_adj_index_mass_update_vr(mlxsw_sp, vr_entry->key.proto,
3617 						  vr_entry->key.vr_id,
3618 						  nhgi->adj_index,
3619 						  nhgi->ecmp_size,
3620 						  old_adj_index, old_ecmp_size);
3621 	return err;
3622 }
3623 
3624 static int __mlxsw_sp_nexthop_eth_update(struct mlxsw_sp *mlxsw_sp,
3625 					 u32 adj_index,
3626 					 struct mlxsw_sp_nexthop *nh,
3627 					 bool force, char *ratr_pl)
3628 {
3629 	struct mlxsw_sp_neigh_entry *neigh_entry = nh->neigh_entry;
3630 	struct mlxsw_sp_rif *rif = mlxsw_sp_nexthop_rif(nh);
3631 	enum mlxsw_reg_ratr_op op;
3632 	u16 rif_index;
3633 
3634 	rif_index = rif ? rif->rif_index :
3635 			  mlxsw_sp->router->lb_crif->rif->rif_index;
3636 	op = force ? MLXSW_REG_RATR_OP_WRITE_WRITE_ENTRY :
3637 		     MLXSW_REG_RATR_OP_WRITE_WRITE_ENTRY_ON_ACTIVITY;
3638 	mlxsw_reg_ratr_pack(ratr_pl, op, true, MLXSW_REG_RATR_TYPE_ETHERNET,
3639 			    adj_index, rif_index);
3640 	switch (nh->action) {
3641 	case MLXSW_SP_NEXTHOP_ACTION_FORWARD:
3642 		mlxsw_reg_ratr_eth_entry_pack(ratr_pl, neigh_entry->ha);
3643 		break;
3644 	case MLXSW_SP_NEXTHOP_ACTION_DISCARD:
3645 		mlxsw_reg_ratr_trap_action_set(ratr_pl,
3646 					       MLXSW_REG_RATR_TRAP_ACTION_DISCARD_ERRORS);
3647 		break;
3648 	case MLXSW_SP_NEXTHOP_ACTION_TRAP:
3649 		mlxsw_reg_ratr_trap_action_set(ratr_pl,
3650 					       MLXSW_REG_RATR_TRAP_ACTION_TRAP);
3651 		mlxsw_reg_ratr_trap_id_set(ratr_pl, MLXSW_TRAP_ID_RTR_EGRESS0);
3652 		break;
3653 	default:
3654 		WARN_ON_ONCE(1);
3655 		return -EINVAL;
3656 	}
3657 	if (nh->counter_valid)
3658 		mlxsw_reg_ratr_counter_pack(ratr_pl, nh->counter_index, true);
3659 	else
3660 		mlxsw_reg_ratr_counter_pack(ratr_pl, 0, false);
3661 
3662 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ratr), ratr_pl);
3663 }
3664 
3665 int mlxsw_sp_nexthop_eth_update(struct mlxsw_sp *mlxsw_sp, u32 adj_index,
3666 				struct mlxsw_sp_nexthop *nh, bool force,
3667 				char *ratr_pl)
3668 {
3669 	int i;
3670 
3671 	for (i = 0; i < nh->num_adj_entries; i++) {
3672 		int err;
3673 
3674 		err = __mlxsw_sp_nexthop_eth_update(mlxsw_sp, adj_index + i,
3675 						    nh, force, ratr_pl);
3676 		if (err)
3677 			return err;
3678 	}
3679 
3680 	return 0;
3681 }
3682 
3683 static int __mlxsw_sp_nexthop_ipip_update(struct mlxsw_sp *mlxsw_sp,
3684 					  u32 adj_index,
3685 					  struct mlxsw_sp_nexthop *nh,
3686 					  bool force, char *ratr_pl)
3687 {
3688 	const struct mlxsw_sp_ipip_ops *ipip_ops;
3689 
3690 	ipip_ops = mlxsw_sp->router->ipip_ops_arr[nh->ipip_entry->ipipt];
3691 	return ipip_ops->nexthop_update(mlxsw_sp, adj_index, nh->ipip_entry,
3692 					force, ratr_pl);
3693 }
3694 
3695 static int mlxsw_sp_nexthop_ipip_update(struct mlxsw_sp *mlxsw_sp,
3696 					u32 adj_index,
3697 					struct mlxsw_sp_nexthop *nh, bool force,
3698 					char *ratr_pl)
3699 {
3700 	int i;
3701 
3702 	for (i = 0; i < nh->num_adj_entries; i++) {
3703 		int err;
3704 
3705 		err = __mlxsw_sp_nexthop_ipip_update(mlxsw_sp, adj_index + i,
3706 						     nh, force, ratr_pl);
3707 		if (err)
3708 			return err;
3709 	}
3710 
3711 	return 0;
3712 }
3713 
3714 static int mlxsw_sp_nexthop_update(struct mlxsw_sp *mlxsw_sp, u32 adj_index,
3715 				   struct mlxsw_sp_nexthop *nh, bool force,
3716 				   char *ratr_pl)
3717 {
3718 	/* When action is discard or trap, the nexthop must be
3719 	 * programmed as an Ethernet nexthop.
3720 	 */
3721 	if (nh->type == MLXSW_SP_NEXTHOP_TYPE_ETH ||
3722 	    nh->action == MLXSW_SP_NEXTHOP_ACTION_DISCARD ||
3723 	    nh->action == MLXSW_SP_NEXTHOP_ACTION_TRAP)
3724 		return mlxsw_sp_nexthop_eth_update(mlxsw_sp, adj_index, nh,
3725 						   force, ratr_pl);
3726 	else
3727 		return mlxsw_sp_nexthop_ipip_update(mlxsw_sp, adj_index, nh,
3728 						    force, ratr_pl);
3729 }
3730 
3731 static int
3732 mlxsw_sp_nexthop_group_update(struct mlxsw_sp *mlxsw_sp,
3733 			      struct mlxsw_sp_nexthop_group_info *nhgi,
3734 			      bool reallocate)
3735 {
3736 	char ratr_pl[MLXSW_REG_RATR_LEN];
3737 	u32 adj_index = nhgi->adj_index; /* base */
3738 	struct mlxsw_sp_nexthop *nh;
3739 	int i;
3740 
3741 	for (i = 0; i < nhgi->count; i++) {
3742 		nh = &nhgi->nexthops[i];
3743 
3744 		if (!nh->should_offload) {
3745 			nh->offloaded = 0;
3746 			continue;
3747 		}
3748 
3749 		if (nh->update || reallocate) {
3750 			int err = 0;
3751 
3752 			err = mlxsw_sp_nexthop_update(mlxsw_sp, adj_index, nh,
3753 						      true, ratr_pl);
3754 			if (err)
3755 				return err;
3756 			nh->update = 0;
3757 			nh->offloaded = 1;
3758 		}
3759 		adj_index += nh->num_adj_entries;
3760 	}
3761 	return 0;
3762 }
3763 
3764 static int
3765 mlxsw_sp_nexthop_fib_entries_update(struct mlxsw_sp *mlxsw_sp,
3766 				    struct mlxsw_sp_nexthop_group *nh_grp)
3767 {
3768 	struct mlxsw_sp_fib_entry *fib_entry;
3769 	int err;
3770 
3771 	list_for_each_entry(fib_entry, &nh_grp->fib_list, nexthop_group_node) {
3772 		err = mlxsw_sp_fib_entry_update(mlxsw_sp, fib_entry);
3773 		if (err)
3774 			return err;
3775 	}
3776 	return 0;
3777 }
3778 
3779 struct mlxsw_sp_adj_grp_size_range {
3780 	u16 start; /* Inclusive */
3781 	u16 end; /* Inclusive */
3782 };
3783 
3784 /* Ordered by range start value */
3785 static const struct mlxsw_sp_adj_grp_size_range
3786 mlxsw_sp1_adj_grp_size_ranges[] = {
3787 	{ .start = 1, .end = 64 },
3788 	{ .start = 512, .end = 512 },
3789 	{ .start = 1024, .end = 1024 },
3790 	{ .start = 2048, .end = 2048 },
3791 	{ .start = 4096, .end = 4096 },
3792 };
3793 
3794 /* Ordered by range start value */
3795 static const struct mlxsw_sp_adj_grp_size_range
3796 mlxsw_sp2_adj_grp_size_ranges[] = {
3797 	{ .start = 1, .end = 128 },
3798 	{ .start = 256, .end = 256 },
3799 	{ .start = 512, .end = 512 },
3800 	{ .start = 1024, .end = 1024 },
3801 	{ .start = 2048, .end = 2048 },
3802 	{ .start = 4096, .end = 4096 },
3803 };
3804 
3805 static void mlxsw_sp_adj_grp_size_round_up(const struct mlxsw_sp *mlxsw_sp,
3806 					   u16 *p_adj_grp_size)
3807 {
3808 	int i;
3809 
3810 	for (i = 0; i < mlxsw_sp->router->adj_grp_size_ranges_count; i++) {
3811 		const struct mlxsw_sp_adj_grp_size_range *size_range;
3812 
3813 		size_range = &mlxsw_sp->router->adj_grp_size_ranges[i];
3814 
3815 		if (*p_adj_grp_size >= size_range->start &&
3816 		    *p_adj_grp_size <= size_range->end)
3817 			return;
3818 
3819 		if (*p_adj_grp_size <= size_range->end) {
3820 			*p_adj_grp_size = size_range->end;
3821 			return;
3822 		}
3823 	}
3824 }
3825 
3826 static void mlxsw_sp_adj_grp_size_round_down(const struct mlxsw_sp *mlxsw_sp,
3827 					     u16 *p_adj_grp_size,
3828 					     unsigned int alloc_size)
3829 {
3830 	int i;
3831 
3832 	for (i = mlxsw_sp->router->adj_grp_size_ranges_count - 1; i >= 0; i--) {
3833 		const struct mlxsw_sp_adj_grp_size_range *size_range;
3834 
3835 		size_range = &mlxsw_sp->router->adj_grp_size_ranges[i];
3836 
3837 		if (alloc_size >= size_range->end) {
3838 			*p_adj_grp_size = size_range->end;
3839 			return;
3840 		}
3841 	}
3842 }
3843 
3844 static int mlxsw_sp_fix_adj_grp_size(struct mlxsw_sp *mlxsw_sp,
3845 				     u16 *p_adj_grp_size)
3846 {
3847 	unsigned int alloc_size;
3848 	int err;
3849 
3850 	/* Round up the requested group size to the next size supported
3851 	 * by the device and make sure the request can be satisfied.
3852 	 */
3853 	mlxsw_sp_adj_grp_size_round_up(mlxsw_sp, p_adj_grp_size);
3854 	err = mlxsw_sp_kvdl_alloc_count_query(mlxsw_sp,
3855 					      MLXSW_SP_KVDL_ENTRY_TYPE_ADJ,
3856 					      *p_adj_grp_size, &alloc_size);
3857 	if (err)
3858 		return err;
3859 	/* It is possible the allocation results in more allocated
3860 	 * entries than requested. Try to use as much of them as
3861 	 * possible.
3862 	 */
3863 	mlxsw_sp_adj_grp_size_round_down(mlxsw_sp, p_adj_grp_size, alloc_size);
3864 
3865 	return 0;
3866 }
3867 
3868 static void
3869 mlxsw_sp_nexthop_group_normalize(struct mlxsw_sp_nexthop_group_info *nhgi)
3870 {
3871 	int i, g = 0, sum_norm_weight = 0;
3872 	struct mlxsw_sp_nexthop *nh;
3873 
3874 	for (i = 0; i < nhgi->count; i++) {
3875 		nh = &nhgi->nexthops[i];
3876 
3877 		if (!nh->should_offload)
3878 			continue;
3879 		if (g > 0)
3880 			g = gcd(nh->nh_weight, g);
3881 		else
3882 			g = nh->nh_weight;
3883 	}
3884 
3885 	for (i = 0; i < nhgi->count; i++) {
3886 		nh = &nhgi->nexthops[i];
3887 
3888 		if (!nh->should_offload)
3889 			continue;
3890 		nh->norm_nh_weight = nh->nh_weight / g;
3891 		sum_norm_weight += nh->norm_nh_weight;
3892 	}
3893 
3894 	nhgi->sum_norm_weight = sum_norm_weight;
3895 }
3896 
3897 static void
3898 mlxsw_sp_nexthop_group_rebalance(struct mlxsw_sp_nexthop_group_info *nhgi)
3899 {
3900 	int i, weight = 0, lower_bound = 0;
3901 	int total = nhgi->sum_norm_weight;
3902 	u16 ecmp_size = nhgi->ecmp_size;
3903 
3904 	for (i = 0; i < nhgi->count; i++) {
3905 		struct mlxsw_sp_nexthop *nh = &nhgi->nexthops[i];
3906 		int upper_bound;
3907 
3908 		if (!nh->should_offload)
3909 			continue;
3910 		weight += nh->norm_nh_weight;
3911 		upper_bound = DIV_ROUND_CLOSEST(ecmp_size * weight, total);
3912 		nh->num_adj_entries = upper_bound - lower_bound;
3913 		lower_bound = upper_bound;
3914 	}
3915 }
3916 
3917 static struct mlxsw_sp_nexthop *
3918 mlxsw_sp_rt6_nexthop(struct mlxsw_sp_nexthop_group *nh_grp,
3919 		     const struct mlxsw_sp_rt6 *mlxsw_sp_rt6);
3920 
3921 static void
3922 mlxsw_sp_nexthop4_group_offload_refresh(struct mlxsw_sp *mlxsw_sp,
3923 					struct mlxsw_sp_nexthop_group *nh_grp)
3924 {
3925 	int i;
3926 
3927 	for (i = 0; i < nh_grp->nhgi->count; i++) {
3928 		struct mlxsw_sp_nexthop *nh = &nh_grp->nhgi->nexthops[i];
3929 
3930 		if (nh->offloaded)
3931 			nh->key.fib_nh->fib_nh_flags |= RTNH_F_OFFLOAD;
3932 		else
3933 			nh->key.fib_nh->fib_nh_flags &= ~RTNH_F_OFFLOAD;
3934 	}
3935 }
3936 
3937 static void
3938 __mlxsw_sp_nexthop6_group_offload_refresh(struct mlxsw_sp_nexthop_group *nh_grp,
3939 					  struct mlxsw_sp_fib6_entry *fib6_entry)
3940 {
3941 	struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
3942 
3943 	list_for_each_entry(mlxsw_sp_rt6, &fib6_entry->rt6_list, list) {
3944 		struct fib6_nh *fib6_nh = mlxsw_sp_rt6->rt->fib6_nh;
3945 		struct mlxsw_sp_nexthop *nh;
3946 
3947 		nh = mlxsw_sp_rt6_nexthop(nh_grp, mlxsw_sp_rt6);
3948 		if (nh && nh->offloaded)
3949 			fib6_nh->fib_nh_flags |= RTNH_F_OFFLOAD;
3950 		else
3951 			fib6_nh->fib_nh_flags &= ~RTNH_F_OFFLOAD;
3952 	}
3953 }
3954 
3955 static void
3956 mlxsw_sp_nexthop6_group_offload_refresh(struct mlxsw_sp *mlxsw_sp,
3957 					struct mlxsw_sp_nexthop_group *nh_grp)
3958 {
3959 	struct mlxsw_sp_fib6_entry *fib6_entry;
3960 
3961 	/* Unfortunately, in IPv6 the route and the nexthop are described by
3962 	 * the same struct, so we need to iterate over all the routes using the
3963 	 * nexthop group and set / clear the offload indication for them.
3964 	 */
3965 	list_for_each_entry(fib6_entry, &nh_grp->fib_list,
3966 			    common.nexthop_group_node)
3967 		__mlxsw_sp_nexthop6_group_offload_refresh(nh_grp, fib6_entry);
3968 }
3969 
3970 static void
3971 mlxsw_sp_nexthop_bucket_offload_refresh(struct mlxsw_sp *mlxsw_sp,
3972 					const struct mlxsw_sp_nexthop *nh,
3973 					u16 bucket_index)
3974 {
3975 	struct mlxsw_sp_nexthop_group *nh_grp = nh->nhgi->nh_grp;
3976 	bool offload = false, trap = false;
3977 
3978 	if (nh->offloaded) {
3979 		if (nh->action == MLXSW_SP_NEXTHOP_ACTION_TRAP)
3980 			trap = true;
3981 		else
3982 			offload = true;
3983 	}
3984 	nexthop_bucket_set_hw_flags(mlxsw_sp_net(mlxsw_sp), nh_grp->obj.id,
3985 				    bucket_index, offload, trap);
3986 }
3987 
3988 static void
3989 mlxsw_sp_nexthop_obj_group_offload_refresh(struct mlxsw_sp *mlxsw_sp,
3990 					   struct mlxsw_sp_nexthop_group *nh_grp)
3991 {
3992 	int i;
3993 
3994 	/* Do not update the flags if the nexthop group is being destroyed
3995 	 * since:
3996 	 * 1. The nexthop objects is being deleted, in which case the flags are
3997 	 * irrelevant.
3998 	 * 2. The nexthop group was replaced by a newer group, in which case
3999 	 * the flags of the nexthop object were already updated based on the
4000 	 * new group.
4001 	 */
4002 	if (nh_grp->can_destroy)
4003 		return;
4004 
4005 	nexthop_set_hw_flags(mlxsw_sp_net(mlxsw_sp), nh_grp->obj.id,
4006 			     nh_grp->nhgi->adj_index_valid, false);
4007 
4008 	/* Update flags of individual nexthop buckets in case of a resilient
4009 	 * nexthop group.
4010 	 */
4011 	if (!nh_grp->nhgi->is_resilient)
4012 		return;
4013 
4014 	for (i = 0; i < nh_grp->nhgi->count; i++) {
4015 		struct mlxsw_sp_nexthop *nh = &nh_grp->nhgi->nexthops[i];
4016 
4017 		mlxsw_sp_nexthop_bucket_offload_refresh(mlxsw_sp, nh, i);
4018 	}
4019 }
4020 
4021 static void
4022 mlxsw_sp_nexthop_group_offload_refresh(struct mlxsw_sp *mlxsw_sp,
4023 				       struct mlxsw_sp_nexthop_group *nh_grp)
4024 {
4025 	switch (nh_grp->type) {
4026 	case MLXSW_SP_NEXTHOP_GROUP_TYPE_IPV4:
4027 		mlxsw_sp_nexthop4_group_offload_refresh(mlxsw_sp, nh_grp);
4028 		break;
4029 	case MLXSW_SP_NEXTHOP_GROUP_TYPE_IPV6:
4030 		mlxsw_sp_nexthop6_group_offload_refresh(mlxsw_sp, nh_grp);
4031 		break;
4032 	case MLXSW_SP_NEXTHOP_GROUP_TYPE_OBJ:
4033 		mlxsw_sp_nexthop_obj_group_offload_refresh(mlxsw_sp, nh_grp);
4034 		break;
4035 	}
4036 }
4037 
4038 static int
4039 mlxsw_sp_nexthop_group_refresh(struct mlxsw_sp *mlxsw_sp,
4040 			       struct mlxsw_sp_nexthop_group *nh_grp)
4041 {
4042 	struct mlxsw_sp_nexthop_group_info *nhgi = nh_grp->nhgi;
4043 	u16 ecmp_size, old_ecmp_size;
4044 	struct mlxsw_sp_nexthop *nh;
4045 	bool offload_change = false;
4046 	u32 adj_index;
4047 	bool old_adj_index_valid;
4048 	u32 old_adj_index;
4049 	int i, err2, err;
4050 
4051 	if (!nhgi->gateway)
4052 		return mlxsw_sp_nexthop_fib_entries_update(mlxsw_sp, nh_grp);
4053 
4054 	for (i = 0; i < nhgi->count; i++) {
4055 		nh = &nhgi->nexthops[i];
4056 
4057 		if (nh->should_offload != nh->offloaded) {
4058 			offload_change = true;
4059 			if (nh->should_offload)
4060 				nh->update = 1;
4061 		}
4062 	}
4063 	if (!offload_change) {
4064 		/* Nothing was added or removed, so no need to reallocate. Just
4065 		 * update MAC on existing adjacency indexes.
4066 		 */
4067 		err = mlxsw_sp_nexthop_group_update(mlxsw_sp, nhgi, false);
4068 		if (err) {
4069 			dev_warn(mlxsw_sp->bus_info->dev, "Failed to update neigh MAC in adjacency table.\n");
4070 			goto set_trap;
4071 		}
4072 		/* Flags of individual nexthop buckets might need to be
4073 		 * updated.
4074 		 */
4075 		mlxsw_sp_nexthop_group_offload_refresh(mlxsw_sp, nh_grp);
4076 		return 0;
4077 	}
4078 	mlxsw_sp_nexthop_group_normalize(nhgi);
4079 	if (!nhgi->sum_norm_weight) {
4080 		/* No neigh of this group is connected so we just set
4081 		 * the trap and let everthing flow through kernel.
4082 		 */
4083 		err = 0;
4084 		goto set_trap;
4085 	}
4086 
4087 	ecmp_size = nhgi->sum_norm_weight;
4088 	err = mlxsw_sp_fix_adj_grp_size(mlxsw_sp, &ecmp_size);
4089 	if (err)
4090 		/* No valid allocation size available. */
4091 		goto set_trap;
4092 
4093 	err = mlxsw_sp_kvdl_alloc(mlxsw_sp, MLXSW_SP_KVDL_ENTRY_TYPE_ADJ,
4094 				  ecmp_size, &adj_index);
4095 	if (err) {
4096 		/* We ran out of KVD linear space, just set the
4097 		 * trap and let everything flow through kernel.
4098 		 */
4099 		dev_warn(mlxsw_sp->bus_info->dev, "Failed to allocate KVD linear area for nexthop group.\n");
4100 		goto set_trap;
4101 	}
4102 	old_adj_index_valid = nhgi->adj_index_valid;
4103 	old_adj_index = nhgi->adj_index;
4104 	old_ecmp_size = nhgi->ecmp_size;
4105 	nhgi->adj_index_valid = 1;
4106 	nhgi->adj_index = adj_index;
4107 	nhgi->ecmp_size = ecmp_size;
4108 	mlxsw_sp_nexthop_group_rebalance(nhgi);
4109 	err = mlxsw_sp_nexthop_group_update(mlxsw_sp, nhgi, true);
4110 	if (err) {
4111 		dev_warn(mlxsw_sp->bus_info->dev, "Failed to update neigh MAC in adjacency table.\n");
4112 		goto set_trap;
4113 	}
4114 
4115 	mlxsw_sp_nexthop_group_offload_refresh(mlxsw_sp, nh_grp);
4116 
4117 	if (!old_adj_index_valid) {
4118 		/* The trap was set for fib entries, so we have to call
4119 		 * fib entry update to unset it and use adjacency index.
4120 		 */
4121 		err = mlxsw_sp_nexthop_fib_entries_update(mlxsw_sp, nh_grp);
4122 		if (err) {
4123 			dev_warn(mlxsw_sp->bus_info->dev, "Failed to add adjacency index to fib entries.\n");
4124 			goto set_trap;
4125 		}
4126 		return 0;
4127 	}
4128 
4129 	err = mlxsw_sp_adj_index_mass_update(mlxsw_sp, nh_grp,
4130 					     old_adj_index, old_ecmp_size);
4131 	mlxsw_sp_kvdl_free(mlxsw_sp, MLXSW_SP_KVDL_ENTRY_TYPE_ADJ,
4132 			   old_ecmp_size, old_adj_index);
4133 	if (err) {
4134 		dev_warn(mlxsw_sp->bus_info->dev, "Failed to mass-update adjacency index for nexthop group.\n");
4135 		goto set_trap;
4136 	}
4137 
4138 	return 0;
4139 
4140 set_trap:
4141 	old_adj_index_valid = nhgi->adj_index_valid;
4142 	nhgi->adj_index_valid = 0;
4143 	for (i = 0; i < nhgi->count; i++) {
4144 		nh = &nhgi->nexthops[i];
4145 		nh->offloaded = 0;
4146 	}
4147 	err2 = mlxsw_sp_nexthop_fib_entries_update(mlxsw_sp, nh_grp);
4148 	if (err2)
4149 		dev_warn(mlxsw_sp->bus_info->dev, "Failed to set traps for fib entries.\n");
4150 	mlxsw_sp_nexthop_group_offload_refresh(mlxsw_sp, nh_grp);
4151 	if (old_adj_index_valid)
4152 		mlxsw_sp_kvdl_free(mlxsw_sp, MLXSW_SP_KVDL_ENTRY_TYPE_ADJ,
4153 				   nhgi->ecmp_size, nhgi->adj_index);
4154 	return err;
4155 }
4156 
4157 static void __mlxsw_sp_nexthop_neigh_update(struct mlxsw_sp_nexthop *nh,
4158 					    bool removing)
4159 {
4160 	if (!removing) {
4161 		nh->action = MLXSW_SP_NEXTHOP_ACTION_FORWARD;
4162 		nh->should_offload = 1;
4163 	} else if (nh->nhgi->is_resilient) {
4164 		nh->action = MLXSW_SP_NEXTHOP_ACTION_TRAP;
4165 		nh->should_offload = 1;
4166 	} else {
4167 		nh->should_offload = 0;
4168 	}
4169 	nh->update = 1;
4170 }
4171 
4172 static int
4173 mlxsw_sp_nexthop_dead_neigh_replace(struct mlxsw_sp *mlxsw_sp,
4174 				    struct mlxsw_sp_neigh_entry *neigh_entry)
4175 {
4176 	struct neighbour *n, *old_n = neigh_entry->key.n;
4177 	struct mlxsw_sp_nexthop *nh;
4178 	struct net_device *dev;
4179 	bool entry_connected;
4180 	u8 nud_state, dead;
4181 	int err;
4182 
4183 	nh = list_first_entry(&neigh_entry->nexthop_list,
4184 			      struct mlxsw_sp_nexthop, neigh_list_node);
4185 	dev = mlxsw_sp_nexthop_dev(nh);
4186 
4187 	n = neigh_lookup(nh->neigh_tbl, &nh->gw_addr, dev);
4188 	if (!n) {
4189 		n = neigh_create(nh->neigh_tbl, &nh->gw_addr, dev);
4190 		if (IS_ERR(n))
4191 			return PTR_ERR(n);
4192 		neigh_event_send(n, NULL);
4193 	}
4194 
4195 	mlxsw_sp_neigh_entry_remove(mlxsw_sp, neigh_entry);
4196 	neigh_entry->key.n = n;
4197 	err = mlxsw_sp_neigh_entry_insert(mlxsw_sp, neigh_entry);
4198 	if (err)
4199 		goto err_neigh_entry_insert;
4200 
4201 	read_lock_bh(&n->lock);
4202 	nud_state = n->nud_state;
4203 	dead = n->dead;
4204 	read_unlock_bh(&n->lock);
4205 	entry_connected = nud_state & NUD_VALID && !dead;
4206 
4207 	list_for_each_entry(nh, &neigh_entry->nexthop_list,
4208 			    neigh_list_node) {
4209 		neigh_release(old_n);
4210 		neigh_clone(n);
4211 		__mlxsw_sp_nexthop_neigh_update(nh, !entry_connected);
4212 		mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh->nhgi->nh_grp);
4213 	}
4214 
4215 	neigh_release(n);
4216 
4217 	return 0;
4218 
4219 err_neigh_entry_insert:
4220 	neigh_entry->key.n = old_n;
4221 	mlxsw_sp_neigh_entry_insert(mlxsw_sp, neigh_entry);
4222 	neigh_release(n);
4223 	return err;
4224 }
4225 
4226 static void
4227 mlxsw_sp_nexthop_neigh_update(struct mlxsw_sp *mlxsw_sp,
4228 			      struct mlxsw_sp_neigh_entry *neigh_entry,
4229 			      bool removing, bool dead)
4230 {
4231 	struct mlxsw_sp_nexthop *nh;
4232 
4233 	if (list_empty(&neigh_entry->nexthop_list))
4234 		return;
4235 
4236 	if (dead) {
4237 		int err;
4238 
4239 		err = mlxsw_sp_nexthop_dead_neigh_replace(mlxsw_sp,
4240 							  neigh_entry);
4241 		if (err)
4242 			dev_err(mlxsw_sp->bus_info->dev, "Failed to replace dead neigh\n");
4243 		return;
4244 	}
4245 
4246 	list_for_each_entry(nh, &neigh_entry->nexthop_list,
4247 			    neigh_list_node) {
4248 		__mlxsw_sp_nexthop_neigh_update(nh, removing);
4249 		mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh->nhgi->nh_grp);
4250 	}
4251 }
4252 
4253 static void mlxsw_sp_nexthop_crif_init(struct mlxsw_sp_nexthop *nh,
4254 				       struct mlxsw_sp_crif *crif)
4255 {
4256 	if (nh->crif)
4257 		return;
4258 
4259 	nh->crif = crif;
4260 	list_add(&nh->crif_list_node, &crif->nexthop_list);
4261 }
4262 
4263 static void mlxsw_sp_nexthop_crif_fini(struct mlxsw_sp_nexthop *nh)
4264 {
4265 	if (!nh->crif)
4266 		return;
4267 
4268 	list_del(&nh->crif_list_node);
4269 	nh->crif = NULL;
4270 }
4271 
4272 static int mlxsw_sp_nexthop_neigh_init(struct mlxsw_sp *mlxsw_sp,
4273 				       struct mlxsw_sp_nexthop *nh)
4274 {
4275 	struct mlxsw_sp_neigh_entry *neigh_entry;
4276 	struct net_device *dev;
4277 	struct neighbour *n;
4278 	u8 nud_state, dead;
4279 	int err;
4280 
4281 	if (WARN_ON(!nh->crif->rif))
4282 		return 0;
4283 
4284 	if (!nh->nhgi->gateway || nh->neigh_entry)
4285 		return 0;
4286 	dev = mlxsw_sp_nexthop_dev(nh);
4287 
4288 	/* Take a reference of neigh here ensuring that neigh would
4289 	 * not be destructed before the nexthop entry is finished.
4290 	 * The reference is taken either in neigh_lookup() or
4291 	 * in neigh_create() in case n is not found.
4292 	 */
4293 	n = neigh_lookup(nh->neigh_tbl, &nh->gw_addr, dev);
4294 	if (!n) {
4295 		n = neigh_create(nh->neigh_tbl, &nh->gw_addr, dev);
4296 		if (IS_ERR(n))
4297 			return PTR_ERR(n);
4298 		neigh_event_send(n, NULL);
4299 	}
4300 	neigh_entry = mlxsw_sp_neigh_entry_lookup(mlxsw_sp, n);
4301 	if (!neigh_entry) {
4302 		neigh_entry = mlxsw_sp_neigh_entry_create(mlxsw_sp, n);
4303 		if (IS_ERR(neigh_entry)) {
4304 			err = -EINVAL;
4305 			goto err_neigh_entry_create;
4306 		}
4307 	}
4308 
4309 	/* If that is the first nexthop connected to that neigh, add to
4310 	 * nexthop_neighs_list
4311 	 */
4312 	if (list_empty(&neigh_entry->nexthop_list))
4313 		list_add_tail(&neigh_entry->nexthop_neighs_list_node,
4314 			      &mlxsw_sp->router->nexthop_neighs_list);
4315 
4316 	nh->neigh_entry = neigh_entry;
4317 	list_add_tail(&nh->neigh_list_node, &neigh_entry->nexthop_list);
4318 	read_lock_bh(&n->lock);
4319 	nud_state = n->nud_state;
4320 	dead = n->dead;
4321 	read_unlock_bh(&n->lock);
4322 	__mlxsw_sp_nexthop_neigh_update(nh, !(nud_state & NUD_VALID && !dead));
4323 
4324 	return 0;
4325 
4326 err_neigh_entry_create:
4327 	neigh_release(n);
4328 	return err;
4329 }
4330 
4331 static void mlxsw_sp_nexthop_neigh_fini(struct mlxsw_sp *mlxsw_sp,
4332 					struct mlxsw_sp_nexthop *nh)
4333 {
4334 	struct mlxsw_sp_neigh_entry *neigh_entry = nh->neigh_entry;
4335 	struct neighbour *n;
4336 
4337 	if (!neigh_entry)
4338 		return;
4339 	n = neigh_entry->key.n;
4340 
4341 	__mlxsw_sp_nexthop_neigh_update(nh, true);
4342 	list_del(&nh->neigh_list_node);
4343 	nh->neigh_entry = NULL;
4344 
4345 	/* If that is the last nexthop connected to that neigh, remove from
4346 	 * nexthop_neighs_list
4347 	 */
4348 	if (list_empty(&neigh_entry->nexthop_list))
4349 		list_del(&neigh_entry->nexthop_neighs_list_node);
4350 
4351 	if (!neigh_entry->connected && list_empty(&neigh_entry->nexthop_list))
4352 		mlxsw_sp_neigh_entry_destroy(mlxsw_sp, neigh_entry);
4353 
4354 	neigh_release(n);
4355 }
4356 
4357 static bool mlxsw_sp_ipip_netdev_ul_up(struct net_device *ol_dev)
4358 {
4359 	struct net_device *ul_dev;
4360 	bool is_up;
4361 
4362 	rcu_read_lock();
4363 	ul_dev = mlxsw_sp_ipip_netdev_ul_dev_get(ol_dev);
4364 	is_up = ul_dev ? (ul_dev->flags & IFF_UP) : true;
4365 	rcu_read_unlock();
4366 
4367 	return is_up;
4368 }
4369 
4370 static void mlxsw_sp_nexthop_ipip_init(struct mlxsw_sp *mlxsw_sp,
4371 				       struct mlxsw_sp_nexthop *nh,
4372 				       struct mlxsw_sp_ipip_entry *ipip_entry)
4373 {
4374 	struct mlxsw_sp_crif *crif;
4375 	bool removing;
4376 
4377 	if (!nh->nhgi->gateway || nh->ipip_entry)
4378 		return;
4379 
4380 	crif = mlxsw_sp_crif_lookup(mlxsw_sp->router, ipip_entry->ol_dev);
4381 	if (WARN_ON(!crif))
4382 		return;
4383 
4384 	nh->ipip_entry = ipip_entry;
4385 	removing = !mlxsw_sp_ipip_netdev_ul_up(ipip_entry->ol_dev);
4386 	__mlxsw_sp_nexthop_neigh_update(nh, removing);
4387 	mlxsw_sp_nexthop_crif_init(nh, crif);
4388 }
4389 
4390 static void mlxsw_sp_nexthop_ipip_fini(struct mlxsw_sp *mlxsw_sp,
4391 				       struct mlxsw_sp_nexthop *nh)
4392 {
4393 	struct mlxsw_sp_ipip_entry *ipip_entry = nh->ipip_entry;
4394 
4395 	if (!ipip_entry)
4396 		return;
4397 
4398 	__mlxsw_sp_nexthop_neigh_update(nh, true);
4399 	nh->ipip_entry = NULL;
4400 }
4401 
4402 static bool mlxsw_sp_nexthop4_ipip_type(const struct mlxsw_sp *mlxsw_sp,
4403 					const struct fib_nh *fib_nh,
4404 					enum mlxsw_sp_ipip_type *p_ipipt)
4405 {
4406 	struct net_device *dev = fib_nh->fib_nh_dev;
4407 
4408 	return dev &&
4409 	       fib_nh->nh_parent->fib_type == RTN_UNICAST &&
4410 	       mlxsw_sp_netdev_ipip_type(mlxsw_sp, dev, p_ipipt);
4411 }
4412 
4413 static int mlxsw_sp_nexthop_type_init(struct mlxsw_sp *mlxsw_sp,
4414 				      struct mlxsw_sp_nexthop *nh,
4415 				      const struct net_device *dev)
4416 {
4417 	const struct mlxsw_sp_ipip_ops *ipip_ops;
4418 	struct mlxsw_sp_ipip_entry *ipip_entry;
4419 	struct mlxsw_sp_crif *crif;
4420 	int err;
4421 
4422 	ipip_entry = mlxsw_sp_ipip_entry_find_by_ol_dev(mlxsw_sp, dev);
4423 	if (ipip_entry) {
4424 		ipip_ops = mlxsw_sp->router->ipip_ops_arr[ipip_entry->ipipt];
4425 		if (ipip_ops->can_offload(mlxsw_sp, dev)) {
4426 			nh->type = MLXSW_SP_NEXTHOP_TYPE_IPIP;
4427 			mlxsw_sp_nexthop_ipip_init(mlxsw_sp, nh, ipip_entry);
4428 			return 0;
4429 		}
4430 	}
4431 
4432 	nh->type = MLXSW_SP_NEXTHOP_TYPE_ETH;
4433 	crif = mlxsw_sp_crif_lookup(mlxsw_sp->router, dev);
4434 	if (!crif)
4435 		return 0;
4436 
4437 	mlxsw_sp_nexthop_crif_init(nh, crif);
4438 
4439 	if (!crif->rif)
4440 		return 0;
4441 
4442 	err = mlxsw_sp_nexthop_neigh_init(mlxsw_sp, nh);
4443 	if (err)
4444 		goto err_neigh_init;
4445 
4446 	return 0;
4447 
4448 err_neigh_init:
4449 	mlxsw_sp_nexthop_crif_fini(nh);
4450 	return err;
4451 }
4452 
4453 static int mlxsw_sp_nexthop_type_rif_made(struct mlxsw_sp *mlxsw_sp,
4454 					  struct mlxsw_sp_nexthop *nh)
4455 {
4456 	switch (nh->type) {
4457 	case MLXSW_SP_NEXTHOP_TYPE_ETH:
4458 		return mlxsw_sp_nexthop_neigh_init(mlxsw_sp, nh);
4459 	case MLXSW_SP_NEXTHOP_TYPE_IPIP:
4460 		break;
4461 	}
4462 
4463 	return 0;
4464 }
4465 
4466 static void mlxsw_sp_nexthop_type_rif_gone(struct mlxsw_sp *mlxsw_sp,
4467 					   struct mlxsw_sp_nexthop *nh)
4468 {
4469 	switch (nh->type) {
4470 	case MLXSW_SP_NEXTHOP_TYPE_ETH:
4471 		mlxsw_sp_nexthop_neigh_fini(mlxsw_sp, nh);
4472 		break;
4473 	case MLXSW_SP_NEXTHOP_TYPE_IPIP:
4474 		mlxsw_sp_nexthop_ipip_fini(mlxsw_sp, nh);
4475 		break;
4476 	}
4477 }
4478 
4479 static void mlxsw_sp_nexthop_type_fini(struct mlxsw_sp *mlxsw_sp,
4480 				       struct mlxsw_sp_nexthop *nh)
4481 {
4482 	mlxsw_sp_nexthop_type_rif_gone(mlxsw_sp, nh);
4483 	mlxsw_sp_nexthop_crif_fini(nh);
4484 }
4485 
4486 static int mlxsw_sp_nexthop4_init(struct mlxsw_sp *mlxsw_sp,
4487 				  struct mlxsw_sp_nexthop_group *nh_grp,
4488 				  struct mlxsw_sp_nexthop *nh,
4489 				  struct fib_nh *fib_nh)
4490 {
4491 	struct net_device *dev = fib_nh->fib_nh_dev;
4492 	struct in_device *in_dev;
4493 	int err;
4494 
4495 	nh->nhgi = nh_grp->nhgi;
4496 	nh->key.fib_nh = fib_nh;
4497 #ifdef CONFIG_IP_ROUTE_MULTIPATH
4498 	nh->nh_weight = fib_nh->fib_nh_weight;
4499 #else
4500 	nh->nh_weight = 1;
4501 #endif
4502 	memcpy(&nh->gw_addr, &fib_nh->fib_nh_gw4, sizeof(fib_nh->fib_nh_gw4));
4503 	nh->neigh_tbl = &arp_tbl;
4504 	err = mlxsw_sp_nexthop_insert(mlxsw_sp, nh);
4505 	if (err)
4506 		return err;
4507 
4508 	mlxsw_sp_nexthop_counter_alloc(mlxsw_sp, nh);
4509 	list_add_tail(&nh->router_list_node, &mlxsw_sp->router->nexthop_list);
4510 
4511 	if (!dev)
4512 		return 0;
4513 	nh->ifindex = dev->ifindex;
4514 
4515 	rcu_read_lock();
4516 	in_dev = __in_dev_get_rcu(dev);
4517 	if (in_dev && IN_DEV_IGNORE_ROUTES_WITH_LINKDOWN(in_dev) &&
4518 	    fib_nh->fib_nh_flags & RTNH_F_LINKDOWN) {
4519 		rcu_read_unlock();
4520 		return 0;
4521 	}
4522 	rcu_read_unlock();
4523 
4524 	err = mlxsw_sp_nexthop_type_init(mlxsw_sp, nh, dev);
4525 	if (err)
4526 		goto err_nexthop_neigh_init;
4527 
4528 	return 0;
4529 
4530 err_nexthop_neigh_init:
4531 	list_del(&nh->router_list_node);
4532 	mlxsw_sp_nexthop_counter_free(mlxsw_sp, nh);
4533 	mlxsw_sp_nexthop_remove(mlxsw_sp, nh);
4534 	return err;
4535 }
4536 
4537 static void mlxsw_sp_nexthop4_fini(struct mlxsw_sp *mlxsw_sp,
4538 				   struct mlxsw_sp_nexthop *nh)
4539 {
4540 	mlxsw_sp_nexthop_type_fini(mlxsw_sp, nh);
4541 	list_del(&nh->router_list_node);
4542 	mlxsw_sp_nexthop_counter_free(mlxsw_sp, nh);
4543 	mlxsw_sp_nexthop_remove(mlxsw_sp, nh);
4544 }
4545 
4546 static void mlxsw_sp_nexthop4_event(struct mlxsw_sp *mlxsw_sp,
4547 				    unsigned long event, struct fib_nh *fib_nh)
4548 {
4549 	struct mlxsw_sp_nexthop_key key;
4550 	struct mlxsw_sp_nexthop *nh;
4551 
4552 	key.fib_nh = fib_nh;
4553 	nh = mlxsw_sp_nexthop_lookup(mlxsw_sp, key);
4554 	if (!nh)
4555 		return;
4556 
4557 	switch (event) {
4558 	case FIB_EVENT_NH_ADD:
4559 		mlxsw_sp_nexthop_type_init(mlxsw_sp, nh, fib_nh->fib_nh_dev);
4560 		break;
4561 	case FIB_EVENT_NH_DEL:
4562 		mlxsw_sp_nexthop_type_fini(mlxsw_sp, nh);
4563 		break;
4564 	}
4565 
4566 	mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh->nhgi->nh_grp);
4567 }
4568 
4569 static void mlxsw_sp_nexthop_rif_update(struct mlxsw_sp *mlxsw_sp,
4570 					struct mlxsw_sp_rif *rif)
4571 {
4572 	struct net_device *dev = mlxsw_sp_rif_dev(rif);
4573 	struct mlxsw_sp_nexthop *nh;
4574 	bool removing;
4575 
4576 	list_for_each_entry(nh, &rif->crif->nexthop_list, crif_list_node) {
4577 		switch (nh->type) {
4578 		case MLXSW_SP_NEXTHOP_TYPE_ETH:
4579 			removing = false;
4580 			break;
4581 		case MLXSW_SP_NEXTHOP_TYPE_IPIP:
4582 			removing = !mlxsw_sp_ipip_netdev_ul_up(dev);
4583 			break;
4584 		default:
4585 			WARN_ON(1);
4586 			continue;
4587 		}
4588 
4589 		__mlxsw_sp_nexthop_neigh_update(nh, removing);
4590 		mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh->nhgi->nh_grp);
4591 	}
4592 }
4593 
4594 static int mlxsw_sp_nexthop_rif_made_sync(struct mlxsw_sp *mlxsw_sp,
4595 					  struct mlxsw_sp_rif *rif)
4596 {
4597 	struct mlxsw_sp_nexthop *nh, *tmp;
4598 	unsigned int n = 0;
4599 	int err;
4600 
4601 	list_for_each_entry_safe(nh, tmp, &rif->crif->nexthop_list,
4602 				 crif_list_node) {
4603 		err = mlxsw_sp_nexthop_type_rif_made(mlxsw_sp, nh);
4604 		if (err)
4605 			goto err_nexthop_type_rif;
4606 		mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh->nhgi->nh_grp);
4607 		n++;
4608 	}
4609 
4610 	return 0;
4611 
4612 err_nexthop_type_rif:
4613 	list_for_each_entry_safe(nh, tmp, &rif->crif->nexthop_list,
4614 				 crif_list_node) {
4615 		if (!n--)
4616 			break;
4617 		mlxsw_sp_nexthop_type_rif_gone(mlxsw_sp, nh);
4618 		mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh->nhgi->nh_grp);
4619 	}
4620 	return err;
4621 }
4622 
4623 static void mlxsw_sp_nexthop_rif_gone_sync(struct mlxsw_sp *mlxsw_sp,
4624 					   struct mlxsw_sp_rif *rif)
4625 {
4626 	struct mlxsw_sp_nexthop *nh, *tmp;
4627 
4628 	list_for_each_entry_safe(nh, tmp, &rif->crif->nexthop_list,
4629 				 crif_list_node) {
4630 		mlxsw_sp_nexthop_type_rif_gone(mlxsw_sp, nh);
4631 		mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh->nhgi->nh_grp);
4632 	}
4633 }
4634 
4635 static int mlxsw_sp_adj_trap_entry_init(struct mlxsw_sp *mlxsw_sp)
4636 {
4637 	enum mlxsw_reg_ratr_trap_action trap_action;
4638 	char ratr_pl[MLXSW_REG_RATR_LEN];
4639 	int err;
4640 
4641 	err = mlxsw_sp_kvdl_alloc(mlxsw_sp, MLXSW_SP_KVDL_ENTRY_TYPE_ADJ, 1,
4642 				  &mlxsw_sp->router->adj_trap_index);
4643 	if (err)
4644 		return err;
4645 
4646 	trap_action = MLXSW_REG_RATR_TRAP_ACTION_TRAP;
4647 	mlxsw_reg_ratr_pack(ratr_pl, MLXSW_REG_RATR_OP_WRITE_WRITE_ENTRY, true,
4648 			    MLXSW_REG_RATR_TYPE_ETHERNET,
4649 			    mlxsw_sp->router->adj_trap_index,
4650 			    mlxsw_sp->router->lb_crif->rif->rif_index);
4651 	mlxsw_reg_ratr_trap_action_set(ratr_pl, trap_action);
4652 	mlxsw_reg_ratr_trap_id_set(ratr_pl, MLXSW_TRAP_ID_RTR_EGRESS0);
4653 	err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ratr), ratr_pl);
4654 	if (err)
4655 		goto err_ratr_write;
4656 
4657 	return 0;
4658 
4659 err_ratr_write:
4660 	mlxsw_sp_kvdl_free(mlxsw_sp, MLXSW_SP_KVDL_ENTRY_TYPE_ADJ, 1,
4661 			   mlxsw_sp->router->adj_trap_index);
4662 	return err;
4663 }
4664 
4665 static void mlxsw_sp_adj_trap_entry_fini(struct mlxsw_sp *mlxsw_sp)
4666 {
4667 	mlxsw_sp_kvdl_free(mlxsw_sp, MLXSW_SP_KVDL_ENTRY_TYPE_ADJ, 1,
4668 			   mlxsw_sp->router->adj_trap_index);
4669 }
4670 
4671 static int mlxsw_sp_nexthop_group_inc(struct mlxsw_sp *mlxsw_sp)
4672 {
4673 	int err;
4674 
4675 	if (refcount_inc_not_zero(&mlxsw_sp->router->num_groups))
4676 		return 0;
4677 
4678 	err = mlxsw_sp_adj_trap_entry_init(mlxsw_sp);
4679 	if (err)
4680 		return err;
4681 
4682 	refcount_set(&mlxsw_sp->router->num_groups, 1);
4683 
4684 	return 0;
4685 }
4686 
4687 static void mlxsw_sp_nexthop_group_dec(struct mlxsw_sp *mlxsw_sp)
4688 {
4689 	if (!refcount_dec_and_test(&mlxsw_sp->router->num_groups))
4690 		return;
4691 
4692 	mlxsw_sp_adj_trap_entry_fini(mlxsw_sp);
4693 }
4694 
4695 static void
4696 mlxsw_sp_nh_grp_activity_get(struct mlxsw_sp *mlxsw_sp,
4697 			     const struct mlxsw_sp_nexthop_group *nh_grp,
4698 			     unsigned long *activity)
4699 {
4700 	char *ratrad_pl;
4701 	int i, err;
4702 
4703 	ratrad_pl = kmalloc(MLXSW_REG_RATRAD_LEN, GFP_KERNEL);
4704 	if (!ratrad_pl)
4705 		return;
4706 
4707 	mlxsw_reg_ratrad_pack(ratrad_pl, nh_grp->nhgi->adj_index,
4708 			      nh_grp->nhgi->count);
4709 	err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(ratrad), ratrad_pl);
4710 	if (err)
4711 		goto out;
4712 
4713 	for (i = 0; i < nh_grp->nhgi->count; i++) {
4714 		if (!mlxsw_reg_ratrad_activity_vector_get(ratrad_pl, i))
4715 			continue;
4716 		bitmap_set(activity, i, 1);
4717 	}
4718 
4719 out:
4720 	kfree(ratrad_pl);
4721 }
4722 
4723 #define MLXSW_SP_NH_GRP_ACTIVITY_UPDATE_INTERVAL 1000 /* ms */
4724 
4725 static void
4726 mlxsw_sp_nh_grp_activity_update(struct mlxsw_sp *mlxsw_sp,
4727 				const struct mlxsw_sp_nexthop_group *nh_grp)
4728 {
4729 	unsigned long *activity;
4730 
4731 	activity = bitmap_zalloc(nh_grp->nhgi->count, GFP_KERNEL);
4732 	if (!activity)
4733 		return;
4734 
4735 	mlxsw_sp_nh_grp_activity_get(mlxsw_sp, nh_grp, activity);
4736 	nexthop_res_grp_activity_update(mlxsw_sp_net(mlxsw_sp), nh_grp->obj.id,
4737 					nh_grp->nhgi->count, activity);
4738 
4739 	bitmap_free(activity);
4740 }
4741 
4742 static void
4743 mlxsw_sp_nh_grp_activity_work_schedule(struct mlxsw_sp *mlxsw_sp)
4744 {
4745 	unsigned int interval = MLXSW_SP_NH_GRP_ACTIVITY_UPDATE_INTERVAL;
4746 
4747 	mlxsw_core_schedule_dw(&mlxsw_sp->router->nh_grp_activity_dw,
4748 			       msecs_to_jiffies(interval));
4749 }
4750 
4751 static void mlxsw_sp_nh_grp_activity_work(struct work_struct *work)
4752 {
4753 	struct mlxsw_sp_nexthop_group_info *nhgi;
4754 	struct mlxsw_sp_router *router;
4755 	bool reschedule = false;
4756 
4757 	router = container_of(work, struct mlxsw_sp_router,
4758 			      nh_grp_activity_dw.work);
4759 
4760 	mutex_lock(&router->lock);
4761 
4762 	list_for_each_entry(nhgi, &router->nh_res_grp_list, list) {
4763 		mlxsw_sp_nh_grp_activity_update(router->mlxsw_sp, nhgi->nh_grp);
4764 		reschedule = true;
4765 	}
4766 
4767 	mutex_unlock(&router->lock);
4768 
4769 	if (!reschedule)
4770 		return;
4771 	mlxsw_sp_nh_grp_activity_work_schedule(router->mlxsw_sp);
4772 }
4773 
4774 static int
4775 mlxsw_sp_nexthop_obj_single_validate(struct mlxsw_sp *mlxsw_sp,
4776 				     const struct nh_notifier_single_info *nh,
4777 				     struct netlink_ext_ack *extack)
4778 {
4779 	int err = -EINVAL;
4780 
4781 	if (nh->is_fdb)
4782 		NL_SET_ERR_MSG_MOD(extack, "FDB nexthops are not supported");
4783 	else if (nh->has_encap)
4784 		NL_SET_ERR_MSG_MOD(extack, "Encapsulating nexthops are not supported");
4785 	else
4786 		err = 0;
4787 
4788 	return err;
4789 }
4790 
4791 static int
4792 mlxsw_sp_nexthop_obj_group_entry_validate(struct mlxsw_sp *mlxsw_sp,
4793 					  const struct nh_notifier_single_info *nh,
4794 					  struct netlink_ext_ack *extack)
4795 {
4796 	int err;
4797 
4798 	err = mlxsw_sp_nexthop_obj_single_validate(mlxsw_sp, nh, extack);
4799 	if (err)
4800 		return err;
4801 
4802 	/* Device only nexthops with an IPIP device are programmed as
4803 	 * encapsulating adjacency entries.
4804 	 */
4805 	if (!nh->gw_family && !nh->is_reject &&
4806 	    !mlxsw_sp_netdev_ipip_type(mlxsw_sp, nh->dev, NULL)) {
4807 		NL_SET_ERR_MSG_MOD(extack, "Nexthop group entry does not have a gateway");
4808 		return -EINVAL;
4809 	}
4810 
4811 	return 0;
4812 }
4813 
4814 static int
4815 mlxsw_sp_nexthop_obj_group_validate(struct mlxsw_sp *mlxsw_sp,
4816 				    const struct nh_notifier_grp_info *nh_grp,
4817 				    struct netlink_ext_ack *extack)
4818 {
4819 	int i;
4820 
4821 	if (nh_grp->is_fdb) {
4822 		NL_SET_ERR_MSG_MOD(extack, "FDB nexthop groups are not supported");
4823 		return -EINVAL;
4824 	}
4825 
4826 	for (i = 0; i < nh_grp->num_nh; i++) {
4827 		const struct nh_notifier_single_info *nh;
4828 		int err;
4829 
4830 		nh = &nh_grp->nh_entries[i].nh;
4831 		err = mlxsw_sp_nexthop_obj_group_entry_validate(mlxsw_sp, nh,
4832 								extack);
4833 		if (err)
4834 			return err;
4835 	}
4836 
4837 	return 0;
4838 }
4839 
4840 static int
4841 mlxsw_sp_nexthop_obj_res_group_size_validate(struct mlxsw_sp *mlxsw_sp,
4842 					     const struct nh_notifier_res_table_info *nh_res_table,
4843 					     struct netlink_ext_ack *extack)
4844 {
4845 	unsigned int alloc_size;
4846 	bool valid_size = false;
4847 	int err, i;
4848 
4849 	if (nh_res_table->num_nh_buckets < 32) {
4850 		NL_SET_ERR_MSG_MOD(extack, "Minimum number of buckets is 32");
4851 		return -EINVAL;
4852 	}
4853 
4854 	for (i = 0; i < mlxsw_sp->router->adj_grp_size_ranges_count; i++) {
4855 		const struct mlxsw_sp_adj_grp_size_range *size_range;
4856 
4857 		size_range = &mlxsw_sp->router->adj_grp_size_ranges[i];
4858 
4859 		if (nh_res_table->num_nh_buckets >= size_range->start &&
4860 		    nh_res_table->num_nh_buckets <= size_range->end) {
4861 			valid_size = true;
4862 			break;
4863 		}
4864 	}
4865 
4866 	if (!valid_size) {
4867 		NL_SET_ERR_MSG_MOD(extack, "Invalid number of buckets");
4868 		return -EINVAL;
4869 	}
4870 
4871 	err = mlxsw_sp_kvdl_alloc_count_query(mlxsw_sp,
4872 					      MLXSW_SP_KVDL_ENTRY_TYPE_ADJ,
4873 					      nh_res_table->num_nh_buckets,
4874 					      &alloc_size);
4875 	if (err || nh_res_table->num_nh_buckets != alloc_size) {
4876 		NL_SET_ERR_MSG_MOD(extack, "Number of buckets does not fit allocation size of any KVDL partition");
4877 		return -EINVAL;
4878 	}
4879 
4880 	return 0;
4881 }
4882 
4883 static int
4884 mlxsw_sp_nexthop_obj_res_group_validate(struct mlxsw_sp *mlxsw_sp,
4885 					const struct nh_notifier_res_table_info *nh_res_table,
4886 					struct netlink_ext_ack *extack)
4887 {
4888 	int err;
4889 	u16 i;
4890 
4891 	err = mlxsw_sp_nexthop_obj_res_group_size_validate(mlxsw_sp,
4892 							   nh_res_table,
4893 							   extack);
4894 	if (err)
4895 		return err;
4896 
4897 	for (i = 0; i < nh_res_table->num_nh_buckets; i++) {
4898 		const struct nh_notifier_single_info *nh;
4899 		int err;
4900 
4901 		nh = &nh_res_table->nhs[i];
4902 		err = mlxsw_sp_nexthop_obj_group_entry_validate(mlxsw_sp, nh,
4903 								extack);
4904 		if (err)
4905 			return err;
4906 	}
4907 
4908 	return 0;
4909 }
4910 
4911 static int mlxsw_sp_nexthop_obj_validate(struct mlxsw_sp *mlxsw_sp,
4912 					 unsigned long event,
4913 					 struct nh_notifier_info *info)
4914 {
4915 	struct nh_notifier_single_info *nh;
4916 
4917 	if (event != NEXTHOP_EVENT_REPLACE &&
4918 	    event != NEXTHOP_EVENT_RES_TABLE_PRE_REPLACE &&
4919 	    event != NEXTHOP_EVENT_BUCKET_REPLACE)
4920 		return 0;
4921 
4922 	switch (info->type) {
4923 	case NH_NOTIFIER_INFO_TYPE_SINGLE:
4924 		return mlxsw_sp_nexthop_obj_single_validate(mlxsw_sp, info->nh,
4925 							    info->extack);
4926 	case NH_NOTIFIER_INFO_TYPE_GRP:
4927 		return mlxsw_sp_nexthop_obj_group_validate(mlxsw_sp,
4928 							   info->nh_grp,
4929 							   info->extack);
4930 	case NH_NOTIFIER_INFO_TYPE_RES_TABLE:
4931 		return mlxsw_sp_nexthop_obj_res_group_validate(mlxsw_sp,
4932 							       info->nh_res_table,
4933 							       info->extack);
4934 	case NH_NOTIFIER_INFO_TYPE_RES_BUCKET:
4935 		nh = &info->nh_res_bucket->new_nh;
4936 		return mlxsw_sp_nexthop_obj_group_entry_validate(mlxsw_sp, nh,
4937 								 info->extack);
4938 	default:
4939 		NL_SET_ERR_MSG_MOD(info->extack, "Unsupported nexthop type");
4940 		return -EOPNOTSUPP;
4941 	}
4942 }
4943 
4944 static bool mlxsw_sp_nexthop_obj_is_gateway(struct mlxsw_sp *mlxsw_sp,
4945 					    const struct nh_notifier_info *info)
4946 {
4947 	const struct net_device *dev;
4948 
4949 	switch (info->type) {
4950 	case NH_NOTIFIER_INFO_TYPE_SINGLE:
4951 		dev = info->nh->dev;
4952 		return info->nh->gw_family || info->nh->is_reject ||
4953 		       mlxsw_sp_netdev_ipip_type(mlxsw_sp, dev, NULL);
4954 	case NH_NOTIFIER_INFO_TYPE_GRP:
4955 	case NH_NOTIFIER_INFO_TYPE_RES_TABLE:
4956 		/* Already validated earlier. */
4957 		return true;
4958 	default:
4959 		return false;
4960 	}
4961 }
4962 
4963 static void mlxsw_sp_nexthop_obj_blackhole_init(struct mlxsw_sp *mlxsw_sp,
4964 						struct mlxsw_sp_nexthop *nh)
4965 {
4966 	nh->action = MLXSW_SP_NEXTHOP_ACTION_DISCARD;
4967 	nh->should_offload = 1;
4968 	/* While nexthops that discard packets do not forward packets
4969 	 * via an egress RIF, they still need to be programmed using a
4970 	 * valid RIF, so use the loopback RIF created during init.
4971 	 */
4972 	nh->crif = mlxsw_sp->router->lb_crif;
4973 }
4974 
4975 static void mlxsw_sp_nexthop_obj_blackhole_fini(struct mlxsw_sp *mlxsw_sp,
4976 						struct mlxsw_sp_nexthop *nh)
4977 {
4978 	nh->crif = NULL;
4979 	nh->should_offload = 0;
4980 }
4981 
4982 static int
4983 mlxsw_sp_nexthop_obj_init(struct mlxsw_sp *mlxsw_sp,
4984 			  struct mlxsw_sp_nexthop_group *nh_grp,
4985 			  struct mlxsw_sp_nexthop *nh,
4986 			  struct nh_notifier_single_info *nh_obj, int weight)
4987 {
4988 	struct net_device *dev = nh_obj->dev;
4989 	int err;
4990 
4991 	nh->nhgi = nh_grp->nhgi;
4992 	nh->nh_weight = weight;
4993 
4994 	switch (nh_obj->gw_family) {
4995 	case AF_INET:
4996 		memcpy(&nh->gw_addr, &nh_obj->ipv4, sizeof(nh_obj->ipv4));
4997 		nh->neigh_tbl = &arp_tbl;
4998 		break;
4999 	case AF_INET6:
5000 		memcpy(&nh->gw_addr, &nh_obj->ipv6, sizeof(nh_obj->ipv6));
5001 #if IS_ENABLED(CONFIG_IPV6)
5002 		nh->neigh_tbl = &nd_tbl;
5003 #endif
5004 		break;
5005 	}
5006 
5007 	mlxsw_sp_nexthop_counter_alloc(mlxsw_sp, nh);
5008 	list_add_tail(&nh->router_list_node, &mlxsw_sp->router->nexthop_list);
5009 	nh->ifindex = dev->ifindex;
5010 
5011 	err = mlxsw_sp_nexthop_type_init(mlxsw_sp, nh, dev);
5012 	if (err)
5013 		goto err_type_init;
5014 
5015 	if (nh_obj->is_reject)
5016 		mlxsw_sp_nexthop_obj_blackhole_init(mlxsw_sp, nh);
5017 
5018 	/* In a resilient nexthop group, all the nexthops must be written to
5019 	 * the adjacency table. Even if they do not have a valid neighbour or
5020 	 * RIF.
5021 	 */
5022 	if (nh_grp->nhgi->is_resilient && !nh->should_offload) {
5023 		nh->action = MLXSW_SP_NEXTHOP_ACTION_TRAP;
5024 		nh->should_offload = 1;
5025 	}
5026 
5027 	return 0;
5028 
5029 err_type_init:
5030 	list_del(&nh->router_list_node);
5031 	mlxsw_sp_nexthop_counter_free(mlxsw_sp, nh);
5032 	return err;
5033 }
5034 
5035 static void mlxsw_sp_nexthop_obj_fini(struct mlxsw_sp *mlxsw_sp,
5036 				      struct mlxsw_sp_nexthop *nh)
5037 {
5038 	if (nh->action == MLXSW_SP_NEXTHOP_ACTION_DISCARD)
5039 		mlxsw_sp_nexthop_obj_blackhole_fini(mlxsw_sp, nh);
5040 	mlxsw_sp_nexthop_type_fini(mlxsw_sp, nh);
5041 	list_del(&nh->router_list_node);
5042 	mlxsw_sp_nexthop_counter_free(mlxsw_sp, nh);
5043 	nh->should_offload = 0;
5044 }
5045 
5046 static int
5047 mlxsw_sp_nexthop_obj_group_info_init(struct mlxsw_sp *mlxsw_sp,
5048 				     struct mlxsw_sp_nexthop_group *nh_grp,
5049 				     struct nh_notifier_info *info)
5050 {
5051 	struct mlxsw_sp_nexthop_group_info *nhgi;
5052 	struct mlxsw_sp_nexthop *nh;
5053 	bool is_resilient = false;
5054 	unsigned int nhs;
5055 	int err, i;
5056 
5057 	switch (info->type) {
5058 	case NH_NOTIFIER_INFO_TYPE_SINGLE:
5059 		nhs = 1;
5060 		break;
5061 	case NH_NOTIFIER_INFO_TYPE_GRP:
5062 		nhs = info->nh_grp->num_nh;
5063 		break;
5064 	case NH_NOTIFIER_INFO_TYPE_RES_TABLE:
5065 		nhs = info->nh_res_table->num_nh_buckets;
5066 		is_resilient = true;
5067 		break;
5068 	default:
5069 		return -EINVAL;
5070 	}
5071 
5072 	nhgi = kzalloc(struct_size(nhgi, nexthops, nhs), GFP_KERNEL);
5073 	if (!nhgi)
5074 		return -ENOMEM;
5075 	nh_grp->nhgi = nhgi;
5076 	nhgi->nh_grp = nh_grp;
5077 	nhgi->gateway = mlxsw_sp_nexthop_obj_is_gateway(mlxsw_sp, info);
5078 	nhgi->is_resilient = is_resilient;
5079 	nhgi->count = nhs;
5080 	for (i = 0; i < nhgi->count; i++) {
5081 		struct nh_notifier_single_info *nh_obj;
5082 		int weight;
5083 
5084 		nh = &nhgi->nexthops[i];
5085 		switch (info->type) {
5086 		case NH_NOTIFIER_INFO_TYPE_SINGLE:
5087 			nh_obj = info->nh;
5088 			weight = 1;
5089 			break;
5090 		case NH_NOTIFIER_INFO_TYPE_GRP:
5091 			nh_obj = &info->nh_grp->nh_entries[i].nh;
5092 			weight = info->nh_grp->nh_entries[i].weight;
5093 			break;
5094 		case NH_NOTIFIER_INFO_TYPE_RES_TABLE:
5095 			nh_obj = &info->nh_res_table->nhs[i];
5096 			weight = 1;
5097 			break;
5098 		default:
5099 			err = -EINVAL;
5100 			goto err_nexthop_obj_init;
5101 		}
5102 		err = mlxsw_sp_nexthop_obj_init(mlxsw_sp, nh_grp, nh, nh_obj,
5103 						weight);
5104 		if (err)
5105 			goto err_nexthop_obj_init;
5106 	}
5107 	err = mlxsw_sp_nexthop_group_inc(mlxsw_sp);
5108 	if (err)
5109 		goto err_group_inc;
5110 	err = mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh_grp);
5111 	if (err) {
5112 		NL_SET_ERR_MSG_MOD(info->extack, "Failed to write adjacency entries to the device");
5113 		goto err_group_refresh;
5114 	}
5115 
5116 	/* Add resilient nexthop groups to a list so that the activity of their
5117 	 * nexthop buckets will be periodically queried and cleared.
5118 	 */
5119 	if (nhgi->is_resilient) {
5120 		if (list_empty(&mlxsw_sp->router->nh_res_grp_list))
5121 			mlxsw_sp_nh_grp_activity_work_schedule(mlxsw_sp);
5122 		list_add(&nhgi->list, &mlxsw_sp->router->nh_res_grp_list);
5123 	}
5124 
5125 	return 0;
5126 
5127 err_group_refresh:
5128 	mlxsw_sp_nexthop_group_dec(mlxsw_sp);
5129 err_group_inc:
5130 	i = nhgi->count;
5131 err_nexthop_obj_init:
5132 	for (i--; i >= 0; i--) {
5133 		nh = &nhgi->nexthops[i];
5134 		mlxsw_sp_nexthop_obj_fini(mlxsw_sp, nh);
5135 	}
5136 	kfree(nhgi);
5137 	return err;
5138 }
5139 
5140 static void
5141 mlxsw_sp_nexthop_obj_group_info_fini(struct mlxsw_sp *mlxsw_sp,
5142 				     struct mlxsw_sp_nexthop_group *nh_grp)
5143 {
5144 	struct mlxsw_sp_nexthop_group_info *nhgi = nh_grp->nhgi;
5145 	struct mlxsw_sp_router *router = mlxsw_sp->router;
5146 	int i;
5147 
5148 	if (nhgi->is_resilient) {
5149 		list_del(&nhgi->list);
5150 		if (list_empty(&mlxsw_sp->router->nh_res_grp_list))
5151 			cancel_delayed_work(&router->nh_grp_activity_dw);
5152 	}
5153 
5154 	mlxsw_sp_nexthop_group_dec(mlxsw_sp);
5155 	for (i = nhgi->count - 1; i >= 0; i--) {
5156 		struct mlxsw_sp_nexthop *nh = &nhgi->nexthops[i];
5157 
5158 		mlxsw_sp_nexthop_obj_fini(mlxsw_sp, nh);
5159 	}
5160 	mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh_grp);
5161 	WARN_ON_ONCE(nhgi->adj_index_valid);
5162 	kfree(nhgi);
5163 }
5164 
5165 static struct mlxsw_sp_nexthop_group *
5166 mlxsw_sp_nexthop_obj_group_create(struct mlxsw_sp *mlxsw_sp,
5167 				  struct nh_notifier_info *info)
5168 {
5169 	struct mlxsw_sp_nexthop_group *nh_grp;
5170 	int err;
5171 
5172 	nh_grp = kzalloc(sizeof(*nh_grp), GFP_KERNEL);
5173 	if (!nh_grp)
5174 		return ERR_PTR(-ENOMEM);
5175 	INIT_LIST_HEAD(&nh_grp->vr_list);
5176 	err = rhashtable_init(&nh_grp->vr_ht,
5177 			      &mlxsw_sp_nexthop_group_vr_ht_params);
5178 	if (err)
5179 		goto err_nexthop_group_vr_ht_init;
5180 	INIT_LIST_HEAD(&nh_grp->fib_list);
5181 	nh_grp->type = MLXSW_SP_NEXTHOP_GROUP_TYPE_OBJ;
5182 	nh_grp->obj.id = info->id;
5183 
5184 	err = mlxsw_sp_nexthop_obj_group_info_init(mlxsw_sp, nh_grp, info);
5185 	if (err)
5186 		goto err_nexthop_group_info_init;
5187 
5188 	nh_grp->can_destroy = false;
5189 
5190 	return nh_grp;
5191 
5192 err_nexthop_group_info_init:
5193 	rhashtable_destroy(&nh_grp->vr_ht);
5194 err_nexthop_group_vr_ht_init:
5195 	kfree(nh_grp);
5196 	return ERR_PTR(err);
5197 }
5198 
5199 static void
5200 mlxsw_sp_nexthop_obj_group_destroy(struct mlxsw_sp *mlxsw_sp,
5201 				   struct mlxsw_sp_nexthop_group *nh_grp)
5202 {
5203 	if (!nh_grp->can_destroy)
5204 		return;
5205 	mlxsw_sp_nexthop_obj_group_info_fini(mlxsw_sp, nh_grp);
5206 	WARN_ON_ONCE(!list_empty(&nh_grp->fib_list));
5207 	WARN_ON_ONCE(!list_empty(&nh_grp->vr_list));
5208 	rhashtable_destroy(&nh_grp->vr_ht);
5209 	kfree(nh_grp);
5210 }
5211 
5212 static struct mlxsw_sp_nexthop_group *
5213 mlxsw_sp_nexthop_obj_group_lookup(struct mlxsw_sp *mlxsw_sp, u32 id)
5214 {
5215 	struct mlxsw_sp_nexthop_group_cmp_arg cmp_arg;
5216 
5217 	cmp_arg.type = MLXSW_SP_NEXTHOP_GROUP_TYPE_OBJ;
5218 	cmp_arg.id = id;
5219 	return rhashtable_lookup_fast(&mlxsw_sp->router->nexthop_group_ht,
5220 				      &cmp_arg,
5221 				      mlxsw_sp_nexthop_group_ht_params);
5222 }
5223 
5224 static int mlxsw_sp_nexthop_obj_group_add(struct mlxsw_sp *mlxsw_sp,
5225 					  struct mlxsw_sp_nexthop_group *nh_grp)
5226 {
5227 	return mlxsw_sp_nexthop_group_insert(mlxsw_sp, nh_grp);
5228 }
5229 
5230 static int
5231 mlxsw_sp_nexthop_obj_group_replace(struct mlxsw_sp *mlxsw_sp,
5232 				   struct mlxsw_sp_nexthop_group *nh_grp,
5233 				   struct mlxsw_sp_nexthop_group *old_nh_grp,
5234 				   struct netlink_ext_ack *extack)
5235 {
5236 	struct mlxsw_sp_nexthop_group_info *old_nhgi = old_nh_grp->nhgi;
5237 	struct mlxsw_sp_nexthop_group_info *new_nhgi = nh_grp->nhgi;
5238 	int err;
5239 
5240 	old_nh_grp->nhgi = new_nhgi;
5241 	new_nhgi->nh_grp = old_nh_grp;
5242 	nh_grp->nhgi = old_nhgi;
5243 	old_nhgi->nh_grp = nh_grp;
5244 
5245 	if (old_nhgi->adj_index_valid && new_nhgi->adj_index_valid) {
5246 		/* Both the old adjacency index and the new one are valid.
5247 		 * Routes are currently using the old one. Tell the device to
5248 		 * replace the old adjacency index with the new one.
5249 		 */
5250 		err = mlxsw_sp_adj_index_mass_update(mlxsw_sp, old_nh_grp,
5251 						     old_nhgi->adj_index,
5252 						     old_nhgi->ecmp_size);
5253 		if (err) {
5254 			NL_SET_ERR_MSG_MOD(extack, "Failed to replace old adjacency index with new one");
5255 			goto err_out;
5256 		}
5257 	} else if (old_nhgi->adj_index_valid && !new_nhgi->adj_index_valid) {
5258 		/* The old adjacency index is valid, while the new one is not.
5259 		 * Iterate over all the routes using the group and change them
5260 		 * to trap packets to the CPU.
5261 		 */
5262 		err = mlxsw_sp_nexthop_fib_entries_update(mlxsw_sp, old_nh_grp);
5263 		if (err) {
5264 			NL_SET_ERR_MSG_MOD(extack, "Failed to update routes to trap packets");
5265 			goto err_out;
5266 		}
5267 	} else if (!old_nhgi->adj_index_valid && new_nhgi->adj_index_valid) {
5268 		/* The old adjacency index is invalid, while the new one is.
5269 		 * Iterate over all the routes using the group and change them
5270 		 * to forward packets using the new valid index.
5271 		 */
5272 		err = mlxsw_sp_nexthop_fib_entries_update(mlxsw_sp, old_nh_grp);
5273 		if (err) {
5274 			NL_SET_ERR_MSG_MOD(extack, "Failed to update routes to forward packets");
5275 			goto err_out;
5276 		}
5277 	}
5278 
5279 	/* Make sure the flags are set / cleared based on the new nexthop group
5280 	 * information.
5281 	 */
5282 	mlxsw_sp_nexthop_obj_group_offload_refresh(mlxsw_sp, old_nh_grp);
5283 
5284 	/* At this point 'nh_grp' is just a shell that is not used by anyone
5285 	 * and its nexthop group info is the old info that was just replaced
5286 	 * with the new one. Remove it.
5287 	 */
5288 	nh_grp->can_destroy = true;
5289 	mlxsw_sp_nexthop_obj_group_destroy(mlxsw_sp, nh_grp);
5290 
5291 	return 0;
5292 
5293 err_out:
5294 	old_nhgi->nh_grp = old_nh_grp;
5295 	nh_grp->nhgi = new_nhgi;
5296 	new_nhgi->nh_grp = nh_grp;
5297 	old_nh_grp->nhgi = old_nhgi;
5298 	return err;
5299 }
5300 
5301 static int mlxsw_sp_nexthop_obj_new(struct mlxsw_sp *mlxsw_sp,
5302 				    struct nh_notifier_info *info)
5303 {
5304 	struct mlxsw_sp_nexthop_group *nh_grp, *old_nh_grp;
5305 	struct netlink_ext_ack *extack = info->extack;
5306 	int err;
5307 
5308 	nh_grp = mlxsw_sp_nexthop_obj_group_create(mlxsw_sp, info);
5309 	if (IS_ERR(nh_grp))
5310 		return PTR_ERR(nh_grp);
5311 
5312 	old_nh_grp = mlxsw_sp_nexthop_obj_group_lookup(mlxsw_sp, info->id);
5313 	if (!old_nh_grp)
5314 		err = mlxsw_sp_nexthop_obj_group_add(mlxsw_sp, nh_grp);
5315 	else
5316 		err = mlxsw_sp_nexthop_obj_group_replace(mlxsw_sp, nh_grp,
5317 							 old_nh_grp, extack);
5318 
5319 	if (err) {
5320 		nh_grp->can_destroy = true;
5321 		mlxsw_sp_nexthop_obj_group_destroy(mlxsw_sp, nh_grp);
5322 	}
5323 
5324 	return err;
5325 }
5326 
5327 static void mlxsw_sp_nexthop_obj_del(struct mlxsw_sp *mlxsw_sp,
5328 				     struct nh_notifier_info *info)
5329 {
5330 	struct mlxsw_sp_nexthop_group *nh_grp;
5331 
5332 	nh_grp = mlxsw_sp_nexthop_obj_group_lookup(mlxsw_sp, info->id);
5333 	if (!nh_grp)
5334 		return;
5335 
5336 	nh_grp->can_destroy = true;
5337 	mlxsw_sp_nexthop_group_remove(mlxsw_sp, nh_grp);
5338 
5339 	/* If the group still has routes using it, then defer the delete
5340 	 * operation until the last route using it is deleted.
5341 	 */
5342 	if (!list_empty(&nh_grp->fib_list))
5343 		return;
5344 	mlxsw_sp_nexthop_obj_group_destroy(mlxsw_sp, nh_grp);
5345 }
5346 
5347 static int mlxsw_sp_nexthop_obj_bucket_query(struct mlxsw_sp *mlxsw_sp,
5348 					     u32 adj_index, char *ratr_pl)
5349 {
5350 	MLXSW_REG_ZERO(ratr, ratr_pl);
5351 	mlxsw_reg_ratr_op_set(ratr_pl, MLXSW_REG_RATR_OP_QUERY_READ);
5352 	mlxsw_reg_ratr_adjacency_index_low_set(ratr_pl, adj_index);
5353 	mlxsw_reg_ratr_adjacency_index_high_set(ratr_pl, adj_index >> 16);
5354 
5355 	return mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(ratr), ratr_pl);
5356 }
5357 
5358 static int mlxsw_sp_nexthop_obj_bucket_compare(char *ratr_pl, char *ratr_pl_new)
5359 {
5360 	/* Clear the opcode and activity on both the old and new payload as
5361 	 * they are irrelevant for the comparison.
5362 	 */
5363 	mlxsw_reg_ratr_op_set(ratr_pl, MLXSW_REG_RATR_OP_QUERY_READ);
5364 	mlxsw_reg_ratr_a_set(ratr_pl, 0);
5365 	mlxsw_reg_ratr_op_set(ratr_pl_new, MLXSW_REG_RATR_OP_QUERY_READ);
5366 	mlxsw_reg_ratr_a_set(ratr_pl_new, 0);
5367 
5368 	/* If the contents of the adjacency entry are consistent with the
5369 	 * replacement request, then replacement was successful.
5370 	 */
5371 	if (!memcmp(ratr_pl, ratr_pl_new, MLXSW_REG_RATR_LEN))
5372 		return 0;
5373 
5374 	return -EINVAL;
5375 }
5376 
5377 static int
5378 mlxsw_sp_nexthop_obj_bucket_adj_update(struct mlxsw_sp *mlxsw_sp,
5379 				       struct mlxsw_sp_nexthop *nh,
5380 				       struct nh_notifier_info *info)
5381 {
5382 	u16 bucket_index = info->nh_res_bucket->bucket_index;
5383 	struct netlink_ext_ack *extack = info->extack;
5384 	bool force = info->nh_res_bucket->force;
5385 	char ratr_pl_new[MLXSW_REG_RATR_LEN];
5386 	char ratr_pl[MLXSW_REG_RATR_LEN];
5387 	u32 adj_index;
5388 	int err;
5389 
5390 	/* No point in trying an atomic replacement if the idle timer interval
5391 	 * is smaller than the interval in which we query and clear activity.
5392 	 */
5393 	if (!force && info->nh_res_bucket->idle_timer_ms <
5394 	    MLXSW_SP_NH_GRP_ACTIVITY_UPDATE_INTERVAL)
5395 		force = true;
5396 
5397 	adj_index = nh->nhgi->adj_index + bucket_index;
5398 	err = mlxsw_sp_nexthop_update(mlxsw_sp, adj_index, nh, force, ratr_pl);
5399 	if (err) {
5400 		NL_SET_ERR_MSG_MOD(extack, "Failed to overwrite nexthop bucket");
5401 		return err;
5402 	}
5403 
5404 	if (!force) {
5405 		err = mlxsw_sp_nexthop_obj_bucket_query(mlxsw_sp, adj_index,
5406 							ratr_pl_new);
5407 		if (err) {
5408 			NL_SET_ERR_MSG_MOD(extack, "Failed to query nexthop bucket state after replacement. State might be inconsistent");
5409 			return err;
5410 		}
5411 
5412 		err = mlxsw_sp_nexthop_obj_bucket_compare(ratr_pl, ratr_pl_new);
5413 		if (err) {
5414 			NL_SET_ERR_MSG_MOD(extack, "Nexthop bucket was not replaced because it was active during replacement");
5415 			return err;
5416 		}
5417 	}
5418 
5419 	nh->update = 0;
5420 	nh->offloaded = 1;
5421 	mlxsw_sp_nexthop_bucket_offload_refresh(mlxsw_sp, nh, bucket_index);
5422 
5423 	return 0;
5424 }
5425 
5426 static int mlxsw_sp_nexthop_obj_bucket_replace(struct mlxsw_sp *mlxsw_sp,
5427 					       struct nh_notifier_info *info)
5428 {
5429 	u16 bucket_index = info->nh_res_bucket->bucket_index;
5430 	struct netlink_ext_ack *extack = info->extack;
5431 	struct mlxsw_sp_nexthop_group_info *nhgi;
5432 	struct nh_notifier_single_info *nh_obj;
5433 	struct mlxsw_sp_nexthop_group *nh_grp;
5434 	struct mlxsw_sp_nexthop *nh;
5435 	int err;
5436 
5437 	nh_grp = mlxsw_sp_nexthop_obj_group_lookup(mlxsw_sp, info->id);
5438 	if (!nh_grp) {
5439 		NL_SET_ERR_MSG_MOD(extack, "Nexthop group was not found");
5440 		return -EINVAL;
5441 	}
5442 
5443 	nhgi = nh_grp->nhgi;
5444 
5445 	if (bucket_index >= nhgi->count) {
5446 		NL_SET_ERR_MSG_MOD(extack, "Nexthop bucket index out of range");
5447 		return -EINVAL;
5448 	}
5449 
5450 	nh = &nhgi->nexthops[bucket_index];
5451 	mlxsw_sp_nexthop_obj_fini(mlxsw_sp, nh);
5452 
5453 	nh_obj = &info->nh_res_bucket->new_nh;
5454 	err = mlxsw_sp_nexthop_obj_init(mlxsw_sp, nh_grp, nh, nh_obj, 1);
5455 	if (err) {
5456 		NL_SET_ERR_MSG_MOD(extack, "Failed to initialize nexthop object for nexthop bucket replacement");
5457 		goto err_nexthop_obj_init;
5458 	}
5459 
5460 	err = mlxsw_sp_nexthop_obj_bucket_adj_update(mlxsw_sp, nh, info);
5461 	if (err)
5462 		goto err_nexthop_obj_bucket_adj_update;
5463 
5464 	return 0;
5465 
5466 err_nexthop_obj_bucket_adj_update:
5467 	mlxsw_sp_nexthop_obj_fini(mlxsw_sp, nh);
5468 err_nexthop_obj_init:
5469 	nh_obj = &info->nh_res_bucket->old_nh;
5470 	mlxsw_sp_nexthop_obj_init(mlxsw_sp, nh_grp, nh, nh_obj, 1);
5471 	/* The old adjacency entry was not overwritten */
5472 	nh->update = 0;
5473 	nh->offloaded = 1;
5474 	return err;
5475 }
5476 
5477 static int mlxsw_sp_nexthop_obj_event(struct notifier_block *nb,
5478 				      unsigned long event, void *ptr)
5479 {
5480 	struct nh_notifier_info *info = ptr;
5481 	struct mlxsw_sp_router *router;
5482 	int err = 0;
5483 
5484 	router = container_of(nb, struct mlxsw_sp_router, nexthop_nb);
5485 	err = mlxsw_sp_nexthop_obj_validate(router->mlxsw_sp, event, info);
5486 	if (err)
5487 		goto out;
5488 
5489 	mutex_lock(&router->lock);
5490 
5491 	switch (event) {
5492 	case NEXTHOP_EVENT_REPLACE:
5493 		err = mlxsw_sp_nexthop_obj_new(router->mlxsw_sp, info);
5494 		break;
5495 	case NEXTHOP_EVENT_DEL:
5496 		mlxsw_sp_nexthop_obj_del(router->mlxsw_sp, info);
5497 		break;
5498 	case NEXTHOP_EVENT_BUCKET_REPLACE:
5499 		err = mlxsw_sp_nexthop_obj_bucket_replace(router->mlxsw_sp,
5500 							  info);
5501 		break;
5502 	default:
5503 		break;
5504 	}
5505 
5506 	mutex_unlock(&router->lock);
5507 
5508 out:
5509 	return notifier_from_errno(err);
5510 }
5511 
5512 static bool mlxsw_sp_fi_is_gateway(const struct mlxsw_sp *mlxsw_sp,
5513 				   struct fib_info *fi)
5514 {
5515 	const struct fib_nh *nh = fib_info_nh(fi, 0);
5516 
5517 	return nh->fib_nh_gw_family ||
5518 	       mlxsw_sp_nexthop4_ipip_type(mlxsw_sp, nh, NULL);
5519 }
5520 
5521 static int
5522 mlxsw_sp_nexthop4_group_info_init(struct mlxsw_sp *mlxsw_sp,
5523 				  struct mlxsw_sp_nexthop_group *nh_grp)
5524 {
5525 	unsigned int nhs = fib_info_num_path(nh_grp->ipv4.fi);
5526 	struct mlxsw_sp_nexthop_group_info *nhgi;
5527 	struct mlxsw_sp_nexthop *nh;
5528 	int err, i;
5529 
5530 	nhgi = kzalloc(struct_size(nhgi, nexthops, nhs), GFP_KERNEL);
5531 	if (!nhgi)
5532 		return -ENOMEM;
5533 	nh_grp->nhgi = nhgi;
5534 	nhgi->nh_grp = nh_grp;
5535 	nhgi->gateway = mlxsw_sp_fi_is_gateway(mlxsw_sp, nh_grp->ipv4.fi);
5536 	nhgi->count = nhs;
5537 	for (i = 0; i < nhgi->count; i++) {
5538 		struct fib_nh *fib_nh;
5539 
5540 		nh = &nhgi->nexthops[i];
5541 		fib_nh = fib_info_nh(nh_grp->ipv4.fi, i);
5542 		err = mlxsw_sp_nexthop4_init(mlxsw_sp, nh_grp, nh, fib_nh);
5543 		if (err)
5544 			goto err_nexthop4_init;
5545 	}
5546 	err = mlxsw_sp_nexthop_group_inc(mlxsw_sp);
5547 	if (err)
5548 		goto err_group_inc;
5549 	err = mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh_grp);
5550 	if (err)
5551 		goto err_group_refresh;
5552 
5553 	return 0;
5554 
5555 err_group_refresh:
5556 	mlxsw_sp_nexthop_group_dec(mlxsw_sp);
5557 err_group_inc:
5558 	i = nhgi->count;
5559 err_nexthop4_init:
5560 	for (i--; i >= 0; i--) {
5561 		nh = &nhgi->nexthops[i];
5562 		mlxsw_sp_nexthop4_fini(mlxsw_sp, nh);
5563 	}
5564 	kfree(nhgi);
5565 	return err;
5566 }
5567 
5568 static void
5569 mlxsw_sp_nexthop4_group_info_fini(struct mlxsw_sp *mlxsw_sp,
5570 				  struct mlxsw_sp_nexthop_group *nh_grp)
5571 {
5572 	struct mlxsw_sp_nexthop_group_info *nhgi = nh_grp->nhgi;
5573 	int i;
5574 
5575 	mlxsw_sp_nexthop_group_dec(mlxsw_sp);
5576 	for (i = nhgi->count - 1; i >= 0; i--) {
5577 		struct mlxsw_sp_nexthop *nh = &nhgi->nexthops[i];
5578 
5579 		mlxsw_sp_nexthop4_fini(mlxsw_sp, nh);
5580 	}
5581 	mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh_grp);
5582 	WARN_ON_ONCE(nhgi->adj_index_valid);
5583 	kfree(nhgi);
5584 }
5585 
5586 static struct mlxsw_sp_nexthop_group *
5587 mlxsw_sp_nexthop4_group_create(struct mlxsw_sp *mlxsw_sp, struct fib_info *fi)
5588 {
5589 	struct mlxsw_sp_nexthop_group *nh_grp;
5590 	int err;
5591 
5592 	nh_grp = kzalloc(sizeof(*nh_grp), GFP_KERNEL);
5593 	if (!nh_grp)
5594 		return ERR_PTR(-ENOMEM);
5595 	INIT_LIST_HEAD(&nh_grp->vr_list);
5596 	err = rhashtable_init(&nh_grp->vr_ht,
5597 			      &mlxsw_sp_nexthop_group_vr_ht_params);
5598 	if (err)
5599 		goto err_nexthop_group_vr_ht_init;
5600 	INIT_LIST_HEAD(&nh_grp->fib_list);
5601 	nh_grp->type = MLXSW_SP_NEXTHOP_GROUP_TYPE_IPV4;
5602 	nh_grp->ipv4.fi = fi;
5603 	fib_info_hold(fi);
5604 
5605 	err = mlxsw_sp_nexthop4_group_info_init(mlxsw_sp, nh_grp);
5606 	if (err)
5607 		goto err_nexthop_group_info_init;
5608 
5609 	err = mlxsw_sp_nexthop_group_insert(mlxsw_sp, nh_grp);
5610 	if (err)
5611 		goto err_nexthop_group_insert;
5612 
5613 	nh_grp->can_destroy = true;
5614 
5615 	return nh_grp;
5616 
5617 err_nexthop_group_insert:
5618 	mlxsw_sp_nexthop4_group_info_fini(mlxsw_sp, nh_grp);
5619 err_nexthop_group_info_init:
5620 	fib_info_put(fi);
5621 	rhashtable_destroy(&nh_grp->vr_ht);
5622 err_nexthop_group_vr_ht_init:
5623 	kfree(nh_grp);
5624 	return ERR_PTR(err);
5625 }
5626 
5627 static void
5628 mlxsw_sp_nexthop4_group_destroy(struct mlxsw_sp *mlxsw_sp,
5629 				struct mlxsw_sp_nexthop_group *nh_grp)
5630 {
5631 	if (!nh_grp->can_destroy)
5632 		return;
5633 	mlxsw_sp_nexthop_group_remove(mlxsw_sp, nh_grp);
5634 	mlxsw_sp_nexthop4_group_info_fini(mlxsw_sp, nh_grp);
5635 	fib_info_put(nh_grp->ipv4.fi);
5636 	WARN_ON_ONCE(!list_empty(&nh_grp->vr_list));
5637 	rhashtable_destroy(&nh_grp->vr_ht);
5638 	kfree(nh_grp);
5639 }
5640 
5641 static int mlxsw_sp_nexthop4_group_get(struct mlxsw_sp *mlxsw_sp,
5642 				       struct mlxsw_sp_fib_entry *fib_entry,
5643 				       struct fib_info *fi)
5644 {
5645 	struct mlxsw_sp_nexthop_group *nh_grp;
5646 
5647 	if (fi->nh) {
5648 		nh_grp = mlxsw_sp_nexthop_obj_group_lookup(mlxsw_sp,
5649 							   fi->nh->id);
5650 		if (WARN_ON_ONCE(!nh_grp))
5651 			return -EINVAL;
5652 		goto out;
5653 	}
5654 
5655 	nh_grp = mlxsw_sp_nexthop4_group_lookup(mlxsw_sp, fi);
5656 	if (!nh_grp) {
5657 		nh_grp = mlxsw_sp_nexthop4_group_create(mlxsw_sp, fi);
5658 		if (IS_ERR(nh_grp))
5659 			return PTR_ERR(nh_grp);
5660 	}
5661 out:
5662 	list_add_tail(&fib_entry->nexthop_group_node, &nh_grp->fib_list);
5663 	fib_entry->nh_group = nh_grp;
5664 	return 0;
5665 }
5666 
5667 static void mlxsw_sp_nexthop4_group_put(struct mlxsw_sp *mlxsw_sp,
5668 					struct mlxsw_sp_fib_entry *fib_entry)
5669 {
5670 	struct mlxsw_sp_nexthop_group *nh_grp = fib_entry->nh_group;
5671 
5672 	list_del(&fib_entry->nexthop_group_node);
5673 	if (!list_empty(&nh_grp->fib_list))
5674 		return;
5675 
5676 	if (nh_grp->type == MLXSW_SP_NEXTHOP_GROUP_TYPE_OBJ) {
5677 		mlxsw_sp_nexthop_obj_group_destroy(mlxsw_sp, nh_grp);
5678 		return;
5679 	}
5680 
5681 	mlxsw_sp_nexthop4_group_destroy(mlxsw_sp, nh_grp);
5682 }
5683 
5684 static bool
5685 mlxsw_sp_fib4_entry_should_offload(const struct mlxsw_sp_fib_entry *fib_entry)
5686 {
5687 	struct mlxsw_sp_fib4_entry *fib4_entry;
5688 
5689 	fib4_entry = container_of(fib_entry, struct mlxsw_sp_fib4_entry,
5690 				  common);
5691 	return !fib4_entry->dscp;
5692 }
5693 
5694 static bool
5695 mlxsw_sp_fib_entry_should_offload(const struct mlxsw_sp_fib_entry *fib_entry)
5696 {
5697 	struct mlxsw_sp_nexthop_group *nh_group = fib_entry->nh_group;
5698 
5699 	switch (fib_entry->fib_node->fib->proto) {
5700 	case MLXSW_SP_L3_PROTO_IPV4:
5701 		if (!mlxsw_sp_fib4_entry_should_offload(fib_entry))
5702 			return false;
5703 		break;
5704 	case MLXSW_SP_L3_PROTO_IPV6:
5705 		break;
5706 	}
5707 
5708 	switch (fib_entry->type) {
5709 	case MLXSW_SP_FIB_ENTRY_TYPE_REMOTE:
5710 		return !!nh_group->nhgi->adj_index_valid;
5711 	case MLXSW_SP_FIB_ENTRY_TYPE_LOCAL:
5712 		return !!mlxsw_sp_nhgi_rif(nh_group->nhgi);
5713 	case MLXSW_SP_FIB_ENTRY_TYPE_BLACKHOLE:
5714 	case MLXSW_SP_FIB_ENTRY_TYPE_IPIP_DECAP:
5715 	case MLXSW_SP_FIB_ENTRY_TYPE_NVE_DECAP:
5716 		return true;
5717 	default:
5718 		return false;
5719 	}
5720 }
5721 
5722 static struct mlxsw_sp_nexthop *
5723 mlxsw_sp_rt6_nexthop(struct mlxsw_sp_nexthop_group *nh_grp,
5724 		     const struct mlxsw_sp_rt6 *mlxsw_sp_rt6)
5725 {
5726 	int i;
5727 
5728 	for (i = 0; i < nh_grp->nhgi->count; i++) {
5729 		struct mlxsw_sp_nexthop *nh = &nh_grp->nhgi->nexthops[i];
5730 		struct net_device *dev = mlxsw_sp_nexthop_dev(nh);
5731 		struct fib6_info *rt = mlxsw_sp_rt6->rt;
5732 
5733 		if (dev && dev == rt->fib6_nh->fib_nh_dev &&
5734 		    ipv6_addr_equal((const struct in6_addr *) &nh->gw_addr,
5735 				    &rt->fib6_nh->fib_nh_gw6))
5736 			return nh;
5737 	}
5738 
5739 	return NULL;
5740 }
5741 
5742 static void
5743 mlxsw_sp_fib4_offload_failed_flag_set(struct mlxsw_sp *mlxsw_sp,
5744 				      struct fib_entry_notifier_info *fen_info)
5745 {
5746 	u32 *p_dst = (u32 *) &fen_info->dst;
5747 	struct fib_rt_info fri;
5748 
5749 	fri.fi = fen_info->fi;
5750 	fri.tb_id = fen_info->tb_id;
5751 	fri.dst = cpu_to_be32(*p_dst);
5752 	fri.dst_len = fen_info->dst_len;
5753 	fri.dscp = fen_info->dscp;
5754 	fri.type = fen_info->type;
5755 	fri.offload = false;
5756 	fri.trap = false;
5757 	fri.offload_failed = true;
5758 	fib_alias_hw_flags_set(mlxsw_sp_net(mlxsw_sp), &fri);
5759 }
5760 
5761 static void
5762 mlxsw_sp_fib4_entry_hw_flags_set(struct mlxsw_sp *mlxsw_sp,
5763 				 struct mlxsw_sp_fib_entry *fib_entry)
5764 {
5765 	u32 *p_dst = (u32 *) fib_entry->fib_node->key.addr;
5766 	int dst_len = fib_entry->fib_node->key.prefix_len;
5767 	struct mlxsw_sp_fib4_entry *fib4_entry;
5768 	struct fib_rt_info fri;
5769 	bool should_offload;
5770 
5771 	should_offload = mlxsw_sp_fib_entry_should_offload(fib_entry);
5772 	fib4_entry = container_of(fib_entry, struct mlxsw_sp_fib4_entry,
5773 				  common);
5774 	fri.fi = fib4_entry->fi;
5775 	fri.tb_id = fib4_entry->tb_id;
5776 	fri.dst = cpu_to_be32(*p_dst);
5777 	fri.dst_len = dst_len;
5778 	fri.dscp = fib4_entry->dscp;
5779 	fri.type = fib4_entry->type;
5780 	fri.offload = should_offload;
5781 	fri.trap = !should_offload;
5782 	fri.offload_failed = false;
5783 	fib_alias_hw_flags_set(mlxsw_sp_net(mlxsw_sp), &fri);
5784 }
5785 
5786 static void
5787 mlxsw_sp_fib4_entry_hw_flags_clear(struct mlxsw_sp *mlxsw_sp,
5788 				   struct mlxsw_sp_fib_entry *fib_entry)
5789 {
5790 	u32 *p_dst = (u32 *) fib_entry->fib_node->key.addr;
5791 	int dst_len = fib_entry->fib_node->key.prefix_len;
5792 	struct mlxsw_sp_fib4_entry *fib4_entry;
5793 	struct fib_rt_info fri;
5794 
5795 	fib4_entry = container_of(fib_entry, struct mlxsw_sp_fib4_entry,
5796 				  common);
5797 	fri.fi = fib4_entry->fi;
5798 	fri.tb_id = fib4_entry->tb_id;
5799 	fri.dst = cpu_to_be32(*p_dst);
5800 	fri.dst_len = dst_len;
5801 	fri.dscp = fib4_entry->dscp;
5802 	fri.type = fib4_entry->type;
5803 	fri.offload = false;
5804 	fri.trap = false;
5805 	fri.offload_failed = false;
5806 	fib_alias_hw_flags_set(mlxsw_sp_net(mlxsw_sp), &fri);
5807 }
5808 
5809 #if IS_ENABLED(CONFIG_IPV6)
5810 static void
5811 mlxsw_sp_fib6_offload_failed_flag_set(struct mlxsw_sp *mlxsw_sp,
5812 				      struct fib6_info **rt_arr,
5813 				      unsigned int nrt6)
5814 {
5815 	int i;
5816 
5817 	/* In IPv6 a multipath route is represented using multiple routes, so
5818 	 * we need to set the flags on all of them.
5819 	 */
5820 	for (i = 0; i < nrt6; i++)
5821 		fib6_info_hw_flags_set(mlxsw_sp_net(mlxsw_sp), rt_arr[i],
5822 				       false, false, true);
5823 }
5824 #else
5825 static void
5826 mlxsw_sp_fib6_offload_failed_flag_set(struct mlxsw_sp *mlxsw_sp,
5827 				      struct fib6_info **rt_arr,
5828 				      unsigned int nrt6)
5829 {
5830 }
5831 #endif
5832 
5833 #if IS_ENABLED(CONFIG_IPV6)
5834 static void
5835 mlxsw_sp_fib6_entry_hw_flags_set(struct mlxsw_sp *mlxsw_sp,
5836 				 struct mlxsw_sp_fib_entry *fib_entry)
5837 {
5838 	struct mlxsw_sp_fib6_entry *fib6_entry;
5839 	struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
5840 	bool should_offload;
5841 
5842 	should_offload = mlxsw_sp_fib_entry_should_offload(fib_entry);
5843 
5844 	/* In IPv6 a multipath route is represented using multiple routes, so
5845 	 * we need to set the flags on all of them.
5846 	 */
5847 	fib6_entry = container_of(fib_entry, struct mlxsw_sp_fib6_entry,
5848 				  common);
5849 	list_for_each_entry(mlxsw_sp_rt6, &fib6_entry->rt6_list, list)
5850 		fib6_info_hw_flags_set(mlxsw_sp_net(mlxsw_sp), mlxsw_sp_rt6->rt,
5851 				       should_offload, !should_offload, false);
5852 }
5853 #else
5854 static void
5855 mlxsw_sp_fib6_entry_hw_flags_set(struct mlxsw_sp *mlxsw_sp,
5856 				 struct mlxsw_sp_fib_entry *fib_entry)
5857 {
5858 }
5859 #endif
5860 
5861 #if IS_ENABLED(CONFIG_IPV6)
5862 static void
5863 mlxsw_sp_fib6_entry_hw_flags_clear(struct mlxsw_sp *mlxsw_sp,
5864 				   struct mlxsw_sp_fib_entry *fib_entry)
5865 {
5866 	struct mlxsw_sp_fib6_entry *fib6_entry;
5867 	struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
5868 
5869 	fib6_entry = container_of(fib_entry, struct mlxsw_sp_fib6_entry,
5870 				  common);
5871 	list_for_each_entry(mlxsw_sp_rt6, &fib6_entry->rt6_list, list)
5872 		fib6_info_hw_flags_set(mlxsw_sp_net(mlxsw_sp), mlxsw_sp_rt6->rt,
5873 				       false, false, false);
5874 }
5875 #else
5876 static void
5877 mlxsw_sp_fib6_entry_hw_flags_clear(struct mlxsw_sp *mlxsw_sp,
5878 				   struct mlxsw_sp_fib_entry *fib_entry)
5879 {
5880 }
5881 #endif
5882 
5883 static void
5884 mlxsw_sp_fib_entry_hw_flags_set(struct mlxsw_sp *mlxsw_sp,
5885 				struct mlxsw_sp_fib_entry *fib_entry)
5886 {
5887 	switch (fib_entry->fib_node->fib->proto) {
5888 	case MLXSW_SP_L3_PROTO_IPV4:
5889 		mlxsw_sp_fib4_entry_hw_flags_set(mlxsw_sp, fib_entry);
5890 		break;
5891 	case MLXSW_SP_L3_PROTO_IPV6:
5892 		mlxsw_sp_fib6_entry_hw_flags_set(mlxsw_sp, fib_entry);
5893 		break;
5894 	}
5895 }
5896 
5897 static void
5898 mlxsw_sp_fib_entry_hw_flags_clear(struct mlxsw_sp *mlxsw_sp,
5899 				  struct mlxsw_sp_fib_entry *fib_entry)
5900 {
5901 	switch (fib_entry->fib_node->fib->proto) {
5902 	case MLXSW_SP_L3_PROTO_IPV4:
5903 		mlxsw_sp_fib4_entry_hw_flags_clear(mlxsw_sp, fib_entry);
5904 		break;
5905 	case MLXSW_SP_L3_PROTO_IPV6:
5906 		mlxsw_sp_fib6_entry_hw_flags_clear(mlxsw_sp, fib_entry);
5907 		break;
5908 	}
5909 }
5910 
5911 static void
5912 mlxsw_sp_fib_entry_hw_flags_refresh(struct mlxsw_sp *mlxsw_sp,
5913 				    struct mlxsw_sp_fib_entry *fib_entry,
5914 				    enum mlxsw_reg_ralue_op op)
5915 {
5916 	switch (op) {
5917 	case MLXSW_REG_RALUE_OP_WRITE_WRITE:
5918 		mlxsw_sp_fib_entry_hw_flags_set(mlxsw_sp, fib_entry);
5919 		break;
5920 	case MLXSW_REG_RALUE_OP_WRITE_DELETE:
5921 		mlxsw_sp_fib_entry_hw_flags_clear(mlxsw_sp, fib_entry);
5922 		break;
5923 	default:
5924 		break;
5925 	}
5926 }
5927 
5928 static void
5929 mlxsw_sp_fib_entry_ralue_pack(char *ralue_pl,
5930 			      const struct mlxsw_sp_fib_entry *fib_entry,
5931 			      enum mlxsw_reg_ralue_op op)
5932 {
5933 	struct mlxsw_sp_fib *fib = fib_entry->fib_node->fib;
5934 	enum mlxsw_reg_ralxx_protocol proto;
5935 	u32 *p_dip;
5936 
5937 	proto = (enum mlxsw_reg_ralxx_protocol) fib->proto;
5938 
5939 	switch (fib->proto) {
5940 	case MLXSW_SP_L3_PROTO_IPV4:
5941 		p_dip = (u32 *) fib_entry->fib_node->key.addr;
5942 		mlxsw_reg_ralue_pack4(ralue_pl, proto, op, fib->vr->id,
5943 				      fib_entry->fib_node->key.prefix_len,
5944 				      *p_dip);
5945 		break;
5946 	case MLXSW_SP_L3_PROTO_IPV6:
5947 		mlxsw_reg_ralue_pack6(ralue_pl, proto, op, fib->vr->id,
5948 				      fib_entry->fib_node->key.prefix_len,
5949 				      fib_entry->fib_node->key.addr);
5950 		break;
5951 	}
5952 }
5953 
5954 static int mlxsw_sp_fib_entry_op_remote(struct mlxsw_sp *mlxsw_sp,
5955 					struct mlxsw_sp_fib_entry *fib_entry,
5956 					enum mlxsw_reg_ralue_op op)
5957 {
5958 	struct mlxsw_sp_nexthop_group *nh_group = fib_entry->nh_group;
5959 	struct mlxsw_sp_nexthop_group_info *nhgi = nh_group->nhgi;
5960 	char ralue_pl[MLXSW_REG_RALUE_LEN];
5961 	enum mlxsw_reg_ralue_trap_action trap_action;
5962 	u16 trap_id = 0;
5963 	u32 adjacency_index = 0;
5964 	u16 ecmp_size = 0;
5965 
5966 	/* In case the nexthop group adjacency index is valid, use it
5967 	 * with provided ECMP size. Otherwise, setup trap and pass
5968 	 * traffic to kernel.
5969 	 */
5970 	if (mlxsw_sp_fib_entry_should_offload(fib_entry)) {
5971 		trap_action = MLXSW_REG_RALUE_TRAP_ACTION_NOP;
5972 		adjacency_index = nhgi->adj_index;
5973 		ecmp_size = nhgi->ecmp_size;
5974 	} else if (!nhgi->adj_index_valid && nhgi->count &&
5975 		   mlxsw_sp_nhgi_rif(nhgi)) {
5976 		trap_action = MLXSW_REG_RALUE_TRAP_ACTION_NOP;
5977 		adjacency_index = mlxsw_sp->router->adj_trap_index;
5978 		ecmp_size = 1;
5979 	} else {
5980 		trap_action = MLXSW_REG_RALUE_TRAP_ACTION_TRAP;
5981 		trap_id = MLXSW_TRAP_ID_RTR_INGRESS0;
5982 	}
5983 
5984 	mlxsw_sp_fib_entry_ralue_pack(ralue_pl, fib_entry, op);
5985 	mlxsw_reg_ralue_act_remote_pack(ralue_pl, trap_action, trap_id,
5986 					adjacency_index, ecmp_size);
5987 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralue), ralue_pl);
5988 }
5989 
5990 static int mlxsw_sp_fib_entry_op_local(struct mlxsw_sp *mlxsw_sp,
5991 				       struct mlxsw_sp_fib_entry *fib_entry,
5992 				       enum mlxsw_reg_ralue_op op)
5993 {
5994 	struct mlxsw_sp_rif *rif = mlxsw_sp_nhgi_rif(fib_entry->nh_group->nhgi);
5995 	enum mlxsw_reg_ralue_trap_action trap_action;
5996 	char ralue_pl[MLXSW_REG_RALUE_LEN];
5997 	u16 trap_id = 0;
5998 	u16 rif_index = 0;
5999 
6000 	if (mlxsw_sp_fib_entry_should_offload(fib_entry)) {
6001 		trap_action = MLXSW_REG_RALUE_TRAP_ACTION_NOP;
6002 		rif_index = rif->rif_index;
6003 	} else {
6004 		trap_action = MLXSW_REG_RALUE_TRAP_ACTION_TRAP;
6005 		trap_id = MLXSW_TRAP_ID_RTR_INGRESS0;
6006 	}
6007 
6008 	mlxsw_sp_fib_entry_ralue_pack(ralue_pl, fib_entry, op);
6009 	mlxsw_reg_ralue_act_local_pack(ralue_pl, trap_action, trap_id,
6010 				       rif_index);
6011 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralue), ralue_pl);
6012 }
6013 
6014 static int mlxsw_sp_fib_entry_op_trap(struct mlxsw_sp *mlxsw_sp,
6015 				      struct mlxsw_sp_fib_entry *fib_entry,
6016 				      enum mlxsw_reg_ralue_op op)
6017 {
6018 	char ralue_pl[MLXSW_REG_RALUE_LEN];
6019 
6020 	mlxsw_sp_fib_entry_ralue_pack(ralue_pl, fib_entry, op);
6021 	mlxsw_reg_ralue_act_ip2me_pack(ralue_pl);
6022 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralue), ralue_pl);
6023 }
6024 
6025 static int mlxsw_sp_fib_entry_op_blackhole(struct mlxsw_sp *mlxsw_sp,
6026 					   struct mlxsw_sp_fib_entry *fib_entry,
6027 					   enum mlxsw_reg_ralue_op op)
6028 {
6029 	enum mlxsw_reg_ralue_trap_action trap_action;
6030 	char ralue_pl[MLXSW_REG_RALUE_LEN];
6031 
6032 	trap_action = MLXSW_REG_RALUE_TRAP_ACTION_DISCARD_ERROR;
6033 	mlxsw_sp_fib_entry_ralue_pack(ralue_pl, fib_entry, op);
6034 	mlxsw_reg_ralue_act_local_pack(ralue_pl, trap_action, 0, 0);
6035 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralue), ralue_pl);
6036 }
6037 
6038 static int
6039 mlxsw_sp_fib_entry_op_unreachable(struct mlxsw_sp *mlxsw_sp,
6040 				  struct mlxsw_sp_fib_entry *fib_entry,
6041 				  enum mlxsw_reg_ralue_op op)
6042 {
6043 	enum mlxsw_reg_ralue_trap_action trap_action;
6044 	char ralue_pl[MLXSW_REG_RALUE_LEN];
6045 	u16 trap_id;
6046 
6047 	trap_action = MLXSW_REG_RALUE_TRAP_ACTION_TRAP;
6048 	trap_id = MLXSW_TRAP_ID_RTR_INGRESS1;
6049 
6050 	mlxsw_sp_fib_entry_ralue_pack(ralue_pl, fib_entry, op);
6051 	mlxsw_reg_ralue_act_local_pack(ralue_pl, trap_action, trap_id, 0);
6052 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralue), ralue_pl);
6053 }
6054 
6055 static int
6056 mlxsw_sp_fib_entry_op_ipip_decap(struct mlxsw_sp *mlxsw_sp,
6057 				 struct mlxsw_sp_fib_entry *fib_entry,
6058 				 enum mlxsw_reg_ralue_op op)
6059 {
6060 	struct mlxsw_sp_ipip_entry *ipip_entry = fib_entry->decap.ipip_entry;
6061 	const struct mlxsw_sp_ipip_ops *ipip_ops;
6062 	char ralue_pl[MLXSW_REG_RALUE_LEN];
6063 	int err;
6064 
6065 	if (WARN_ON(!ipip_entry))
6066 		return -EINVAL;
6067 
6068 	ipip_ops = mlxsw_sp->router->ipip_ops_arr[ipip_entry->ipipt];
6069 	err = ipip_ops->decap_config(mlxsw_sp, ipip_entry,
6070 				     fib_entry->decap.tunnel_index);
6071 	if (err)
6072 		return err;
6073 
6074 	mlxsw_sp_fib_entry_ralue_pack(ralue_pl, fib_entry, op);
6075 	mlxsw_reg_ralue_act_ip2me_tun_pack(ralue_pl,
6076 					   fib_entry->decap.tunnel_index);
6077 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralue), ralue_pl);
6078 }
6079 
6080 static int mlxsw_sp_fib_entry_op_nve_decap(struct mlxsw_sp *mlxsw_sp,
6081 					   struct mlxsw_sp_fib_entry *fib_entry,
6082 					   enum mlxsw_reg_ralue_op op)
6083 {
6084 	char ralue_pl[MLXSW_REG_RALUE_LEN];
6085 
6086 	mlxsw_sp_fib_entry_ralue_pack(ralue_pl, fib_entry, op);
6087 	mlxsw_reg_ralue_act_ip2me_tun_pack(ralue_pl,
6088 					   fib_entry->decap.tunnel_index);
6089 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralue), ralue_pl);
6090 }
6091 
6092 static int __mlxsw_sp_fib_entry_op(struct mlxsw_sp *mlxsw_sp,
6093 				   struct mlxsw_sp_fib_entry *fib_entry,
6094 				   enum mlxsw_reg_ralue_op op)
6095 {
6096 	switch (fib_entry->type) {
6097 	case MLXSW_SP_FIB_ENTRY_TYPE_REMOTE:
6098 		return mlxsw_sp_fib_entry_op_remote(mlxsw_sp, fib_entry, op);
6099 	case MLXSW_SP_FIB_ENTRY_TYPE_LOCAL:
6100 		return mlxsw_sp_fib_entry_op_local(mlxsw_sp, fib_entry, op);
6101 	case MLXSW_SP_FIB_ENTRY_TYPE_TRAP:
6102 		return mlxsw_sp_fib_entry_op_trap(mlxsw_sp, fib_entry, op);
6103 	case MLXSW_SP_FIB_ENTRY_TYPE_BLACKHOLE:
6104 		return mlxsw_sp_fib_entry_op_blackhole(mlxsw_sp, fib_entry, op);
6105 	case MLXSW_SP_FIB_ENTRY_TYPE_UNREACHABLE:
6106 		return mlxsw_sp_fib_entry_op_unreachable(mlxsw_sp, fib_entry,
6107 							 op);
6108 	case MLXSW_SP_FIB_ENTRY_TYPE_IPIP_DECAP:
6109 		return mlxsw_sp_fib_entry_op_ipip_decap(mlxsw_sp,
6110 							fib_entry, op);
6111 	case MLXSW_SP_FIB_ENTRY_TYPE_NVE_DECAP:
6112 		return mlxsw_sp_fib_entry_op_nve_decap(mlxsw_sp, fib_entry, op);
6113 	}
6114 	return -EINVAL;
6115 }
6116 
6117 static int mlxsw_sp_fib_entry_op(struct mlxsw_sp *mlxsw_sp,
6118 				 struct mlxsw_sp_fib_entry *fib_entry,
6119 				 enum mlxsw_reg_ralue_op op)
6120 {
6121 	int err = __mlxsw_sp_fib_entry_op(mlxsw_sp, fib_entry, op);
6122 
6123 	if (err)
6124 		return err;
6125 
6126 	mlxsw_sp_fib_entry_hw_flags_refresh(mlxsw_sp, fib_entry, op);
6127 
6128 	return err;
6129 }
6130 
6131 static int mlxsw_sp_fib_entry_update(struct mlxsw_sp *mlxsw_sp,
6132 				     struct mlxsw_sp_fib_entry *fib_entry)
6133 {
6134 	return mlxsw_sp_fib_entry_op(mlxsw_sp, fib_entry,
6135 				     MLXSW_REG_RALUE_OP_WRITE_WRITE);
6136 }
6137 
6138 static int mlxsw_sp_fib_entry_del(struct mlxsw_sp *mlxsw_sp,
6139 				  struct mlxsw_sp_fib_entry *fib_entry)
6140 {
6141 	return mlxsw_sp_fib_entry_op(mlxsw_sp, fib_entry,
6142 				     MLXSW_REG_RALUE_OP_WRITE_DELETE);
6143 }
6144 
6145 static int
6146 mlxsw_sp_fib4_entry_type_set(struct mlxsw_sp *mlxsw_sp,
6147 			     const struct fib_entry_notifier_info *fen_info,
6148 			     struct mlxsw_sp_fib_entry *fib_entry)
6149 {
6150 	struct mlxsw_sp_nexthop_group_info *nhgi = fib_entry->nh_group->nhgi;
6151 	union mlxsw_sp_l3addr dip = { .addr4 = htonl(fen_info->dst) };
6152 	struct mlxsw_sp_router *router = mlxsw_sp->router;
6153 	u32 tb_id = mlxsw_sp_fix_tb_id(fen_info->tb_id);
6154 	int ifindex = nhgi->nexthops[0].ifindex;
6155 	struct mlxsw_sp_ipip_entry *ipip_entry;
6156 
6157 	switch (fen_info->type) {
6158 	case RTN_LOCAL:
6159 		ipip_entry = mlxsw_sp_ipip_entry_find_by_decap(mlxsw_sp, ifindex,
6160 							       MLXSW_SP_L3_PROTO_IPV4, dip);
6161 		if (ipip_entry && ipip_entry->ol_dev->flags & IFF_UP) {
6162 			fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_IPIP_DECAP;
6163 			return mlxsw_sp_fib_entry_decap_init(mlxsw_sp,
6164 							     fib_entry,
6165 							     ipip_entry);
6166 		}
6167 		if (mlxsw_sp_router_nve_is_decap(mlxsw_sp, tb_id,
6168 						 MLXSW_SP_L3_PROTO_IPV4,
6169 						 &dip)) {
6170 			u32 tunnel_index;
6171 
6172 			tunnel_index = router->nve_decap_config.tunnel_index;
6173 			fib_entry->decap.tunnel_index = tunnel_index;
6174 			fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_NVE_DECAP;
6175 			return 0;
6176 		}
6177 		fallthrough;
6178 	case RTN_BROADCAST:
6179 		fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_TRAP;
6180 		return 0;
6181 	case RTN_BLACKHOLE:
6182 		fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_BLACKHOLE;
6183 		return 0;
6184 	case RTN_UNREACHABLE:
6185 	case RTN_PROHIBIT:
6186 		/* Packets hitting these routes need to be trapped, but
6187 		 * can do so with a lower priority than packets directed
6188 		 * at the host, so use action type local instead of trap.
6189 		 */
6190 		fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_UNREACHABLE;
6191 		return 0;
6192 	case RTN_UNICAST:
6193 		if (nhgi->gateway)
6194 			fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_REMOTE;
6195 		else
6196 			fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_LOCAL;
6197 		return 0;
6198 	default:
6199 		return -EINVAL;
6200 	}
6201 }
6202 
6203 static void
6204 mlxsw_sp_fib_entry_type_unset(struct mlxsw_sp *mlxsw_sp,
6205 			      struct mlxsw_sp_fib_entry *fib_entry)
6206 {
6207 	switch (fib_entry->type) {
6208 	case MLXSW_SP_FIB_ENTRY_TYPE_IPIP_DECAP:
6209 		mlxsw_sp_fib_entry_decap_fini(mlxsw_sp, fib_entry);
6210 		break;
6211 	default:
6212 		break;
6213 	}
6214 }
6215 
6216 static void
6217 mlxsw_sp_fib4_entry_type_unset(struct mlxsw_sp *mlxsw_sp,
6218 			       struct mlxsw_sp_fib4_entry *fib4_entry)
6219 {
6220 	mlxsw_sp_fib_entry_type_unset(mlxsw_sp, &fib4_entry->common);
6221 }
6222 
6223 static struct mlxsw_sp_fib4_entry *
6224 mlxsw_sp_fib4_entry_create(struct mlxsw_sp *mlxsw_sp,
6225 			   struct mlxsw_sp_fib_node *fib_node,
6226 			   const struct fib_entry_notifier_info *fen_info)
6227 {
6228 	struct mlxsw_sp_fib4_entry *fib4_entry;
6229 	struct mlxsw_sp_fib_entry *fib_entry;
6230 	int err;
6231 
6232 	fib4_entry = kzalloc(sizeof(*fib4_entry), GFP_KERNEL);
6233 	if (!fib4_entry)
6234 		return ERR_PTR(-ENOMEM);
6235 	fib_entry = &fib4_entry->common;
6236 
6237 	err = mlxsw_sp_nexthop4_group_get(mlxsw_sp, fib_entry, fen_info->fi);
6238 	if (err)
6239 		goto err_nexthop4_group_get;
6240 
6241 	err = mlxsw_sp_nexthop_group_vr_link(fib_entry->nh_group,
6242 					     fib_node->fib);
6243 	if (err)
6244 		goto err_nexthop_group_vr_link;
6245 
6246 	err = mlxsw_sp_fib4_entry_type_set(mlxsw_sp, fen_info, fib_entry);
6247 	if (err)
6248 		goto err_fib4_entry_type_set;
6249 
6250 	fib4_entry->fi = fen_info->fi;
6251 	fib_info_hold(fib4_entry->fi);
6252 	fib4_entry->tb_id = fen_info->tb_id;
6253 	fib4_entry->type = fen_info->type;
6254 	fib4_entry->dscp = fen_info->dscp;
6255 
6256 	fib_entry->fib_node = fib_node;
6257 
6258 	return fib4_entry;
6259 
6260 err_fib4_entry_type_set:
6261 	mlxsw_sp_nexthop_group_vr_unlink(fib_entry->nh_group, fib_node->fib);
6262 err_nexthop_group_vr_link:
6263 	mlxsw_sp_nexthop4_group_put(mlxsw_sp, &fib4_entry->common);
6264 err_nexthop4_group_get:
6265 	kfree(fib4_entry);
6266 	return ERR_PTR(err);
6267 }
6268 
6269 static void mlxsw_sp_fib4_entry_destroy(struct mlxsw_sp *mlxsw_sp,
6270 					struct mlxsw_sp_fib4_entry *fib4_entry)
6271 {
6272 	struct mlxsw_sp_fib_node *fib_node = fib4_entry->common.fib_node;
6273 
6274 	fib_info_put(fib4_entry->fi);
6275 	mlxsw_sp_fib4_entry_type_unset(mlxsw_sp, fib4_entry);
6276 	mlxsw_sp_nexthop_group_vr_unlink(fib4_entry->common.nh_group,
6277 					 fib_node->fib);
6278 	mlxsw_sp_nexthop4_group_put(mlxsw_sp, &fib4_entry->common);
6279 	kfree(fib4_entry);
6280 }
6281 
6282 static struct mlxsw_sp_fib4_entry *
6283 mlxsw_sp_fib4_entry_lookup(struct mlxsw_sp *mlxsw_sp,
6284 			   const struct fib_entry_notifier_info *fen_info)
6285 {
6286 	struct mlxsw_sp_fib4_entry *fib4_entry;
6287 	struct mlxsw_sp_fib_node *fib_node;
6288 	struct mlxsw_sp_fib *fib;
6289 	struct mlxsw_sp_vr *vr;
6290 
6291 	vr = mlxsw_sp_vr_find(mlxsw_sp, fen_info->tb_id);
6292 	if (!vr)
6293 		return NULL;
6294 	fib = mlxsw_sp_vr_fib(vr, MLXSW_SP_L3_PROTO_IPV4);
6295 
6296 	fib_node = mlxsw_sp_fib_node_lookup(fib, &fen_info->dst,
6297 					    sizeof(fen_info->dst),
6298 					    fen_info->dst_len);
6299 	if (!fib_node)
6300 		return NULL;
6301 
6302 	fib4_entry = container_of(fib_node->fib_entry,
6303 				  struct mlxsw_sp_fib4_entry, common);
6304 	if (fib4_entry->tb_id == fen_info->tb_id &&
6305 	    fib4_entry->dscp == fen_info->dscp &&
6306 	    fib4_entry->type == fen_info->type &&
6307 	    fib4_entry->fi == fen_info->fi)
6308 		return fib4_entry;
6309 
6310 	return NULL;
6311 }
6312 
6313 static const struct rhashtable_params mlxsw_sp_fib_ht_params = {
6314 	.key_offset = offsetof(struct mlxsw_sp_fib_node, key),
6315 	.head_offset = offsetof(struct mlxsw_sp_fib_node, ht_node),
6316 	.key_len = sizeof(struct mlxsw_sp_fib_key),
6317 	.automatic_shrinking = true,
6318 };
6319 
6320 static int mlxsw_sp_fib_node_insert(struct mlxsw_sp_fib *fib,
6321 				    struct mlxsw_sp_fib_node *fib_node)
6322 {
6323 	return rhashtable_insert_fast(&fib->ht, &fib_node->ht_node,
6324 				      mlxsw_sp_fib_ht_params);
6325 }
6326 
6327 static void mlxsw_sp_fib_node_remove(struct mlxsw_sp_fib *fib,
6328 				     struct mlxsw_sp_fib_node *fib_node)
6329 {
6330 	rhashtable_remove_fast(&fib->ht, &fib_node->ht_node,
6331 			       mlxsw_sp_fib_ht_params);
6332 }
6333 
6334 static struct mlxsw_sp_fib_node *
6335 mlxsw_sp_fib_node_lookup(struct mlxsw_sp_fib *fib, const void *addr,
6336 			 size_t addr_len, unsigned char prefix_len)
6337 {
6338 	struct mlxsw_sp_fib_key key;
6339 
6340 	memset(&key, 0, sizeof(key));
6341 	memcpy(key.addr, addr, addr_len);
6342 	key.prefix_len = prefix_len;
6343 	return rhashtable_lookup_fast(&fib->ht, &key, mlxsw_sp_fib_ht_params);
6344 }
6345 
6346 static struct mlxsw_sp_fib_node *
6347 mlxsw_sp_fib_node_create(struct mlxsw_sp_fib *fib, const void *addr,
6348 			 size_t addr_len, unsigned char prefix_len)
6349 {
6350 	struct mlxsw_sp_fib_node *fib_node;
6351 
6352 	fib_node = kzalloc(sizeof(*fib_node), GFP_KERNEL);
6353 	if (!fib_node)
6354 		return NULL;
6355 
6356 	list_add(&fib_node->list, &fib->node_list);
6357 	memcpy(fib_node->key.addr, addr, addr_len);
6358 	fib_node->key.prefix_len = prefix_len;
6359 
6360 	return fib_node;
6361 }
6362 
6363 static void mlxsw_sp_fib_node_destroy(struct mlxsw_sp_fib_node *fib_node)
6364 {
6365 	list_del(&fib_node->list);
6366 	kfree(fib_node);
6367 }
6368 
6369 static int mlxsw_sp_fib_lpm_tree_link(struct mlxsw_sp *mlxsw_sp,
6370 				      struct mlxsw_sp_fib_node *fib_node)
6371 {
6372 	struct mlxsw_sp_prefix_usage req_prefix_usage;
6373 	struct mlxsw_sp_fib *fib = fib_node->fib;
6374 	struct mlxsw_sp_lpm_tree *lpm_tree;
6375 	int err;
6376 
6377 	lpm_tree = mlxsw_sp->router->lpm.proto_trees[fib->proto];
6378 	if (lpm_tree->prefix_ref_count[fib_node->key.prefix_len] != 0)
6379 		goto out;
6380 
6381 	mlxsw_sp_prefix_usage_cpy(&req_prefix_usage, &lpm_tree->prefix_usage);
6382 	mlxsw_sp_prefix_usage_set(&req_prefix_usage, fib_node->key.prefix_len);
6383 	lpm_tree = mlxsw_sp_lpm_tree_get(mlxsw_sp, &req_prefix_usage,
6384 					 fib->proto);
6385 	if (IS_ERR(lpm_tree))
6386 		return PTR_ERR(lpm_tree);
6387 
6388 	err = mlxsw_sp_vrs_lpm_tree_replace(mlxsw_sp, fib, lpm_tree);
6389 	if (err)
6390 		goto err_lpm_tree_replace;
6391 
6392 out:
6393 	lpm_tree->prefix_ref_count[fib_node->key.prefix_len]++;
6394 	return 0;
6395 
6396 err_lpm_tree_replace:
6397 	mlxsw_sp_lpm_tree_put(mlxsw_sp, lpm_tree);
6398 	return err;
6399 }
6400 
6401 static void mlxsw_sp_fib_lpm_tree_unlink(struct mlxsw_sp *mlxsw_sp,
6402 					 struct mlxsw_sp_fib_node *fib_node)
6403 {
6404 	struct mlxsw_sp_lpm_tree *lpm_tree = fib_node->fib->lpm_tree;
6405 	struct mlxsw_sp_prefix_usage req_prefix_usage;
6406 	struct mlxsw_sp_fib *fib = fib_node->fib;
6407 	int err;
6408 
6409 	if (--lpm_tree->prefix_ref_count[fib_node->key.prefix_len] != 0)
6410 		return;
6411 	/* Try to construct a new LPM tree from the current prefix usage
6412 	 * minus the unused one. If we fail, continue using the old one.
6413 	 */
6414 	mlxsw_sp_prefix_usage_cpy(&req_prefix_usage, &lpm_tree->prefix_usage);
6415 	mlxsw_sp_prefix_usage_clear(&req_prefix_usage,
6416 				    fib_node->key.prefix_len);
6417 	lpm_tree = mlxsw_sp_lpm_tree_get(mlxsw_sp, &req_prefix_usage,
6418 					 fib->proto);
6419 	if (IS_ERR(lpm_tree))
6420 		return;
6421 
6422 	err = mlxsw_sp_vrs_lpm_tree_replace(mlxsw_sp, fib, lpm_tree);
6423 	if (err)
6424 		goto err_lpm_tree_replace;
6425 
6426 	return;
6427 
6428 err_lpm_tree_replace:
6429 	mlxsw_sp_lpm_tree_put(mlxsw_sp, lpm_tree);
6430 }
6431 
6432 static int mlxsw_sp_fib_node_init(struct mlxsw_sp *mlxsw_sp,
6433 				  struct mlxsw_sp_fib_node *fib_node,
6434 				  struct mlxsw_sp_fib *fib)
6435 {
6436 	int err;
6437 
6438 	err = mlxsw_sp_fib_node_insert(fib, fib_node);
6439 	if (err)
6440 		return err;
6441 	fib_node->fib = fib;
6442 
6443 	err = mlxsw_sp_fib_lpm_tree_link(mlxsw_sp, fib_node);
6444 	if (err)
6445 		goto err_fib_lpm_tree_link;
6446 
6447 	return 0;
6448 
6449 err_fib_lpm_tree_link:
6450 	fib_node->fib = NULL;
6451 	mlxsw_sp_fib_node_remove(fib, fib_node);
6452 	return err;
6453 }
6454 
6455 static void mlxsw_sp_fib_node_fini(struct mlxsw_sp *mlxsw_sp,
6456 				   struct mlxsw_sp_fib_node *fib_node)
6457 {
6458 	struct mlxsw_sp_fib *fib = fib_node->fib;
6459 
6460 	mlxsw_sp_fib_lpm_tree_unlink(mlxsw_sp, fib_node);
6461 	fib_node->fib = NULL;
6462 	mlxsw_sp_fib_node_remove(fib, fib_node);
6463 }
6464 
6465 static struct mlxsw_sp_fib_node *
6466 mlxsw_sp_fib_node_get(struct mlxsw_sp *mlxsw_sp, u32 tb_id, const void *addr,
6467 		      size_t addr_len, unsigned char prefix_len,
6468 		      enum mlxsw_sp_l3proto proto)
6469 {
6470 	struct mlxsw_sp_fib_node *fib_node;
6471 	struct mlxsw_sp_fib *fib;
6472 	struct mlxsw_sp_vr *vr;
6473 	int err;
6474 
6475 	vr = mlxsw_sp_vr_get(mlxsw_sp, tb_id, NULL);
6476 	if (IS_ERR(vr))
6477 		return ERR_CAST(vr);
6478 	fib = mlxsw_sp_vr_fib(vr, proto);
6479 
6480 	fib_node = mlxsw_sp_fib_node_lookup(fib, addr, addr_len, prefix_len);
6481 	if (fib_node)
6482 		return fib_node;
6483 
6484 	fib_node = mlxsw_sp_fib_node_create(fib, addr, addr_len, prefix_len);
6485 	if (!fib_node) {
6486 		err = -ENOMEM;
6487 		goto err_fib_node_create;
6488 	}
6489 
6490 	err = mlxsw_sp_fib_node_init(mlxsw_sp, fib_node, fib);
6491 	if (err)
6492 		goto err_fib_node_init;
6493 
6494 	return fib_node;
6495 
6496 err_fib_node_init:
6497 	mlxsw_sp_fib_node_destroy(fib_node);
6498 err_fib_node_create:
6499 	mlxsw_sp_vr_put(mlxsw_sp, vr);
6500 	return ERR_PTR(err);
6501 }
6502 
6503 static void mlxsw_sp_fib_node_put(struct mlxsw_sp *mlxsw_sp,
6504 				  struct mlxsw_sp_fib_node *fib_node)
6505 {
6506 	struct mlxsw_sp_vr *vr = fib_node->fib->vr;
6507 
6508 	if (fib_node->fib_entry)
6509 		return;
6510 	mlxsw_sp_fib_node_fini(mlxsw_sp, fib_node);
6511 	mlxsw_sp_fib_node_destroy(fib_node);
6512 	mlxsw_sp_vr_put(mlxsw_sp, vr);
6513 }
6514 
6515 static int mlxsw_sp_fib_node_entry_link(struct mlxsw_sp *mlxsw_sp,
6516 					struct mlxsw_sp_fib_entry *fib_entry)
6517 {
6518 	struct mlxsw_sp_fib_node *fib_node = fib_entry->fib_node;
6519 	int err;
6520 
6521 	fib_node->fib_entry = fib_entry;
6522 
6523 	err = mlxsw_sp_fib_entry_update(mlxsw_sp, fib_entry);
6524 	if (err)
6525 		goto err_fib_entry_update;
6526 
6527 	return 0;
6528 
6529 err_fib_entry_update:
6530 	fib_node->fib_entry = NULL;
6531 	return err;
6532 }
6533 
6534 static void
6535 mlxsw_sp_fib_node_entry_unlink(struct mlxsw_sp *mlxsw_sp,
6536 			       struct mlxsw_sp_fib_entry *fib_entry)
6537 {
6538 	struct mlxsw_sp_fib_node *fib_node = fib_entry->fib_node;
6539 
6540 	mlxsw_sp_fib_entry_del(mlxsw_sp, fib_entry);
6541 	fib_node->fib_entry = NULL;
6542 }
6543 
6544 static bool mlxsw_sp_fib4_allow_replace(struct mlxsw_sp_fib4_entry *fib4_entry)
6545 {
6546 	struct mlxsw_sp_fib_node *fib_node = fib4_entry->common.fib_node;
6547 	struct mlxsw_sp_fib4_entry *fib4_replaced;
6548 
6549 	if (!fib_node->fib_entry)
6550 		return true;
6551 
6552 	fib4_replaced = container_of(fib_node->fib_entry,
6553 				     struct mlxsw_sp_fib4_entry, common);
6554 	if (fib4_entry->tb_id == RT_TABLE_MAIN &&
6555 	    fib4_replaced->tb_id == RT_TABLE_LOCAL)
6556 		return false;
6557 
6558 	return true;
6559 }
6560 
6561 static int
6562 mlxsw_sp_router_fib4_replace(struct mlxsw_sp *mlxsw_sp,
6563 			     const struct fib_entry_notifier_info *fen_info)
6564 {
6565 	struct mlxsw_sp_fib4_entry *fib4_entry, *fib4_replaced;
6566 	struct mlxsw_sp_fib_entry *replaced;
6567 	struct mlxsw_sp_fib_node *fib_node;
6568 	int err;
6569 
6570 	if (fen_info->fi->nh &&
6571 	    !mlxsw_sp_nexthop_obj_group_lookup(mlxsw_sp, fen_info->fi->nh->id))
6572 		return 0;
6573 
6574 	fib_node = mlxsw_sp_fib_node_get(mlxsw_sp, fen_info->tb_id,
6575 					 &fen_info->dst, sizeof(fen_info->dst),
6576 					 fen_info->dst_len,
6577 					 MLXSW_SP_L3_PROTO_IPV4);
6578 	if (IS_ERR(fib_node)) {
6579 		dev_warn(mlxsw_sp->bus_info->dev, "Failed to get FIB node\n");
6580 		return PTR_ERR(fib_node);
6581 	}
6582 
6583 	fib4_entry = mlxsw_sp_fib4_entry_create(mlxsw_sp, fib_node, fen_info);
6584 	if (IS_ERR(fib4_entry)) {
6585 		dev_warn(mlxsw_sp->bus_info->dev, "Failed to create FIB entry\n");
6586 		err = PTR_ERR(fib4_entry);
6587 		goto err_fib4_entry_create;
6588 	}
6589 
6590 	if (!mlxsw_sp_fib4_allow_replace(fib4_entry)) {
6591 		mlxsw_sp_fib4_entry_destroy(mlxsw_sp, fib4_entry);
6592 		mlxsw_sp_fib_node_put(mlxsw_sp, fib_node);
6593 		return 0;
6594 	}
6595 
6596 	replaced = fib_node->fib_entry;
6597 	err = mlxsw_sp_fib_node_entry_link(mlxsw_sp, &fib4_entry->common);
6598 	if (err) {
6599 		dev_warn(mlxsw_sp->bus_info->dev, "Failed to link FIB entry to node\n");
6600 		goto err_fib_node_entry_link;
6601 	}
6602 
6603 	/* Nothing to replace */
6604 	if (!replaced)
6605 		return 0;
6606 
6607 	mlxsw_sp_fib_entry_hw_flags_clear(mlxsw_sp, replaced);
6608 	fib4_replaced = container_of(replaced, struct mlxsw_sp_fib4_entry,
6609 				     common);
6610 	mlxsw_sp_fib4_entry_destroy(mlxsw_sp, fib4_replaced);
6611 
6612 	return 0;
6613 
6614 err_fib_node_entry_link:
6615 	fib_node->fib_entry = replaced;
6616 	mlxsw_sp_fib4_entry_destroy(mlxsw_sp, fib4_entry);
6617 err_fib4_entry_create:
6618 	mlxsw_sp_fib_node_put(mlxsw_sp, fib_node);
6619 	return err;
6620 }
6621 
6622 static void mlxsw_sp_router_fib4_del(struct mlxsw_sp *mlxsw_sp,
6623 				     struct fib_entry_notifier_info *fen_info)
6624 {
6625 	struct mlxsw_sp_fib4_entry *fib4_entry;
6626 	struct mlxsw_sp_fib_node *fib_node;
6627 
6628 	fib4_entry = mlxsw_sp_fib4_entry_lookup(mlxsw_sp, fen_info);
6629 	if (!fib4_entry)
6630 		return;
6631 	fib_node = fib4_entry->common.fib_node;
6632 
6633 	mlxsw_sp_fib_node_entry_unlink(mlxsw_sp, &fib4_entry->common);
6634 	mlxsw_sp_fib4_entry_destroy(mlxsw_sp, fib4_entry);
6635 	mlxsw_sp_fib_node_put(mlxsw_sp, fib_node);
6636 }
6637 
6638 static bool mlxsw_sp_fib6_rt_should_ignore(const struct fib6_info *rt)
6639 {
6640 	/* Multicast routes aren't supported, so ignore them. Neighbour
6641 	 * Discovery packets are specifically trapped.
6642 	 */
6643 	if (ipv6_addr_type(&rt->fib6_dst.addr) & IPV6_ADDR_MULTICAST)
6644 		return true;
6645 
6646 	/* Cloned routes are irrelevant in the forwarding path. */
6647 	if (rt->fib6_flags & RTF_CACHE)
6648 		return true;
6649 
6650 	return false;
6651 }
6652 
6653 static struct mlxsw_sp_rt6 *mlxsw_sp_rt6_create(struct fib6_info *rt)
6654 {
6655 	struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
6656 
6657 	mlxsw_sp_rt6 = kzalloc(sizeof(*mlxsw_sp_rt6), GFP_KERNEL);
6658 	if (!mlxsw_sp_rt6)
6659 		return ERR_PTR(-ENOMEM);
6660 
6661 	/* In case of route replace, replaced route is deleted with
6662 	 * no notification. Take reference to prevent accessing freed
6663 	 * memory.
6664 	 */
6665 	mlxsw_sp_rt6->rt = rt;
6666 	fib6_info_hold(rt);
6667 
6668 	return mlxsw_sp_rt6;
6669 }
6670 
6671 #if IS_ENABLED(CONFIG_IPV6)
6672 static void mlxsw_sp_rt6_release(struct fib6_info *rt)
6673 {
6674 	fib6_info_release(rt);
6675 }
6676 #else
6677 static void mlxsw_sp_rt6_release(struct fib6_info *rt)
6678 {
6679 }
6680 #endif
6681 
6682 static void mlxsw_sp_rt6_destroy(struct mlxsw_sp_rt6 *mlxsw_sp_rt6)
6683 {
6684 	struct fib6_nh *fib6_nh = mlxsw_sp_rt6->rt->fib6_nh;
6685 
6686 	if (!mlxsw_sp_rt6->rt->nh)
6687 		fib6_nh->fib_nh_flags &= ~RTNH_F_OFFLOAD;
6688 	mlxsw_sp_rt6_release(mlxsw_sp_rt6->rt);
6689 	kfree(mlxsw_sp_rt6);
6690 }
6691 
6692 static struct fib6_info *
6693 mlxsw_sp_fib6_entry_rt(const struct mlxsw_sp_fib6_entry *fib6_entry)
6694 {
6695 	return list_first_entry(&fib6_entry->rt6_list, struct mlxsw_sp_rt6,
6696 				list)->rt;
6697 }
6698 
6699 static struct mlxsw_sp_rt6 *
6700 mlxsw_sp_fib6_entry_rt_find(const struct mlxsw_sp_fib6_entry *fib6_entry,
6701 			    const struct fib6_info *rt)
6702 {
6703 	struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
6704 
6705 	list_for_each_entry(mlxsw_sp_rt6, &fib6_entry->rt6_list, list) {
6706 		if (mlxsw_sp_rt6->rt == rt)
6707 			return mlxsw_sp_rt6;
6708 	}
6709 
6710 	return NULL;
6711 }
6712 
6713 static bool mlxsw_sp_nexthop6_ipip_type(const struct mlxsw_sp *mlxsw_sp,
6714 					const struct fib6_info *rt,
6715 					enum mlxsw_sp_ipip_type *ret)
6716 {
6717 	return rt->fib6_nh->fib_nh_dev &&
6718 	       mlxsw_sp_netdev_ipip_type(mlxsw_sp, rt->fib6_nh->fib_nh_dev, ret);
6719 }
6720 
6721 static int mlxsw_sp_nexthop6_init(struct mlxsw_sp *mlxsw_sp,
6722 				  struct mlxsw_sp_nexthop_group *nh_grp,
6723 				  struct mlxsw_sp_nexthop *nh,
6724 				  const struct fib6_info *rt)
6725 {
6726 	struct net_device *dev = rt->fib6_nh->fib_nh_dev;
6727 	int err;
6728 
6729 	nh->nhgi = nh_grp->nhgi;
6730 	nh->nh_weight = rt->fib6_nh->fib_nh_weight;
6731 	memcpy(&nh->gw_addr, &rt->fib6_nh->fib_nh_gw6, sizeof(nh->gw_addr));
6732 #if IS_ENABLED(CONFIG_IPV6)
6733 	nh->neigh_tbl = &nd_tbl;
6734 #endif
6735 	mlxsw_sp_nexthop_counter_alloc(mlxsw_sp, nh);
6736 
6737 	list_add_tail(&nh->router_list_node, &mlxsw_sp->router->nexthop_list);
6738 
6739 	if (!dev)
6740 		return 0;
6741 	nh->ifindex = dev->ifindex;
6742 
6743 	err = mlxsw_sp_nexthop_type_init(mlxsw_sp, nh, dev);
6744 	if (err)
6745 		goto err_nexthop_type_init;
6746 
6747 	return 0;
6748 
6749 err_nexthop_type_init:
6750 	list_del(&nh->router_list_node);
6751 	mlxsw_sp_nexthop_counter_free(mlxsw_sp, nh);
6752 	return err;
6753 }
6754 
6755 static void mlxsw_sp_nexthop6_fini(struct mlxsw_sp *mlxsw_sp,
6756 				   struct mlxsw_sp_nexthop *nh)
6757 {
6758 	mlxsw_sp_nexthop_type_fini(mlxsw_sp, nh);
6759 	list_del(&nh->router_list_node);
6760 	mlxsw_sp_nexthop_counter_free(mlxsw_sp, nh);
6761 }
6762 
6763 static bool mlxsw_sp_rt6_is_gateway(const struct mlxsw_sp *mlxsw_sp,
6764 				    const struct fib6_info *rt)
6765 {
6766 	return rt->fib6_nh->fib_nh_gw_family ||
6767 	       mlxsw_sp_nexthop6_ipip_type(mlxsw_sp, rt, NULL);
6768 }
6769 
6770 static int
6771 mlxsw_sp_nexthop6_group_info_init(struct mlxsw_sp *mlxsw_sp,
6772 				  struct mlxsw_sp_nexthop_group *nh_grp,
6773 				  struct mlxsw_sp_fib6_entry *fib6_entry)
6774 {
6775 	struct mlxsw_sp_nexthop_group_info *nhgi;
6776 	struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
6777 	struct mlxsw_sp_nexthop *nh;
6778 	int err, i;
6779 
6780 	nhgi = kzalloc(struct_size(nhgi, nexthops, fib6_entry->nrt6),
6781 		       GFP_KERNEL);
6782 	if (!nhgi)
6783 		return -ENOMEM;
6784 	nh_grp->nhgi = nhgi;
6785 	nhgi->nh_grp = nh_grp;
6786 	mlxsw_sp_rt6 = list_first_entry(&fib6_entry->rt6_list,
6787 					struct mlxsw_sp_rt6, list);
6788 	nhgi->gateway = mlxsw_sp_rt6_is_gateway(mlxsw_sp, mlxsw_sp_rt6->rt);
6789 	nhgi->count = fib6_entry->nrt6;
6790 	for (i = 0; i < nhgi->count; i++) {
6791 		struct fib6_info *rt = mlxsw_sp_rt6->rt;
6792 
6793 		nh = &nhgi->nexthops[i];
6794 		err = mlxsw_sp_nexthop6_init(mlxsw_sp, nh_grp, nh, rt);
6795 		if (err)
6796 			goto err_nexthop6_init;
6797 		mlxsw_sp_rt6 = list_next_entry(mlxsw_sp_rt6, list);
6798 	}
6799 	nh_grp->nhgi = nhgi;
6800 	err = mlxsw_sp_nexthop_group_inc(mlxsw_sp);
6801 	if (err)
6802 		goto err_group_inc;
6803 	err = mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh_grp);
6804 	if (err)
6805 		goto err_group_refresh;
6806 
6807 	return 0;
6808 
6809 err_group_refresh:
6810 	mlxsw_sp_nexthop_group_dec(mlxsw_sp);
6811 err_group_inc:
6812 	i = nhgi->count;
6813 err_nexthop6_init:
6814 	for (i--; i >= 0; i--) {
6815 		nh = &nhgi->nexthops[i];
6816 		mlxsw_sp_nexthop6_fini(mlxsw_sp, nh);
6817 	}
6818 	kfree(nhgi);
6819 	return err;
6820 }
6821 
6822 static void
6823 mlxsw_sp_nexthop6_group_info_fini(struct mlxsw_sp *mlxsw_sp,
6824 				  struct mlxsw_sp_nexthop_group *nh_grp)
6825 {
6826 	struct mlxsw_sp_nexthop_group_info *nhgi = nh_grp->nhgi;
6827 	int i;
6828 
6829 	mlxsw_sp_nexthop_group_dec(mlxsw_sp);
6830 	for (i = nhgi->count - 1; i >= 0; i--) {
6831 		struct mlxsw_sp_nexthop *nh = &nhgi->nexthops[i];
6832 
6833 		mlxsw_sp_nexthop6_fini(mlxsw_sp, nh);
6834 	}
6835 	mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh_grp);
6836 	WARN_ON_ONCE(nhgi->adj_index_valid);
6837 	kfree(nhgi);
6838 }
6839 
6840 static struct mlxsw_sp_nexthop_group *
6841 mlxsw_sp_nexthop6_group_create(struct mlxsw_sp *mlxsw_sp,
6842 			       struct mlxsw_sp_fib6_entry *fib6_entry)
6843 {
6844 	struct mlxsw_sp_nexthop_group *nh_grp;
6845 	int err;
6846 
6847 	nh_grp = kzalloc(sizeof(*nh_grp), GFP_KERNEL);
6848 	if (!nh_grp)
6849 		return ERR_PTR(-ENOMEM);
6850 	INIT_LIST_HEAD(&nh_grp->vr_list);
6851 	err = rhashtable_init(&nh_grp->vr_ht,
6852 			      &mlxsw_sp_nexthop_group_vr_ht_params);
6853 	if (err)
6854 		goto err_nexthop_group_vr_ht_init;
6855 	INIT_LIST_HEAD(&nh_grp->fib_list);
6856 	nh_grp->type = MLXSW_SP_NEXTHOP_GROUP_TYPE_IPV6;
6857 
6858 	err = mlxsw_sp_nexthop6_group_info_init(mlxsw_sp, nh_grp, fib6_entry);
6859 	if (err)
6860 		goto err_nexthop_group_info_init;
6861 
6862 	err = mlxsw_sp_nexthop_group_insert(mlxsw_sp, nh_grp);
6863 	if (err)
6864 		goto err_nexthop_group_insert;
6865 
6866 	nh_grp->can_destroy = true;
6867 
6868 	return nh_grp;
6869 
6870 err_nexthop_group_insert:
6871 	mlxsw_sp_nexthop6_group_info_fini(mlxsw_sp, nh_grp);
6872 err_nexthop_group_info_init:
6873 	rhashtable_destroy(&nh_grp->vr_ht);
6874 err_nexthop_group_vr_ht_init:
6875 	kfree(nh_grp);
6876 	return ERR_PTR(err);
6877 }
6878 
6879 static void
6880 mlxsw_sp_nexthop6_group_destroy(struct mlxsw_sp *mlxsw_sp,
6881 				struct mlxsw_sp_nexthop_group *nh_grp)
6882 {
6883 	if (!nh_grp->can_destroy)
6884 		return;
6885 	mlxsw_sp_nexthop_group_remove(mlxsw_sp, nh_grp);
6886 	mlxsw_sp_nexthop6_group_info_fini(mlxsw_sp, nh_grp);
6887 	WARN_ON_ONCE(!list_empty(&nh_grp->vr_list));
6888 	rhashtable_destroy(&nh_grp->vr_ht);
6889 	kfree(nh_grp);
6890 }
6891 
6892 static int mlxsw_sp_nexthop6_group_get(struct mlxsw_sp *mlxsw_sp,
6893 				       struct mlxsw_sp_fib6_entry *fib6_entry)
6894 {
6895 	struct fib6_info *rt = mlxsw_sp_fib6_entry_rt(fib6_entry);
6896 	struct mlxsw_sp_nexthop_group *nh_grp;
6897 
6898 	if (rt->nh) {
6899 		nh_grp = mlxsw_sp_nexthop_obj_group_lookup(mlxsw_sp,
6900 							   rt->nh->id);
6901 		if (WARN_ON_ONCE(!nh_grp))
6902 			return -EINVAL;
6903 		goto out;
6904 	}
6905 
6906 	nh_grp = mlxsw_sp_nexthop6_group_lookup(mlxsw_sp, fib6_entry);
6907 	if (!nh_grp) {
6908 		nh_grp = mlxsw_sp_nexthop6_group_create(mlxsw_sp, fib6_entry);
6909 		if (IS_ERR(nh_grp))
6910 			return PTR_ERR(nh_grp);
6911 	}
6912 
6913 	/* The route and the nexthop are described by the same struct, so we
6914 	 * need to the update the nexthop offload indication for the new route.
6915 	 */
6916 	__mlxsw_sp_nexthop6_group_offload_refresh(nh_grp, fib6_entry);
6917 
6918 out:
6919 	list_add_tail(&fib6_entry->common.nexthop_group_node,
6920 		      &nh_grp->fib_list);
6921 	fib6_entry->common.nh_group = nh_grp;
6922 
6923 	return 0;
6924 }
6925 
6926 static void mlxsw_sp_nexthop6_group_put(struct mlxsw_sp *mlxsw_sp,
6927 					struct mlxsw_sp_fib_entry *fib_entry)
6928 {
6929 	struct mlxsw_sp_nexthop_group *nh_grp = fib_entry->nh_group;
6930 
6931 	list_del(&fib_entry->nexthop_group_node);
6932 	if (!list_empty(&nh_grp->fib_list))
6933 		return;
6934 
6935 	if (nh_grp->type == MLXSW_SP_NEXTHOP_GROUP_TYPE_OBJ) {
6936 		mlxsw_sp_nexthop_obj_group_destroy(mlxsw_sp, nh_grp);
6937 		return;
6938 	}
6939 
6940 	mlxsw_sp_nexthop6_group_destroy(mlxsw_sp, nh_grp);
6941 }
6942 
6943 static int
6944 mlxsw_sp_nexthop6_group_update(struct mlxsw_sp *mlxsw_sp,
6945 			       struct mlxsw_sp_fib6_entry *fib6_entry)
6946 {
6947 	struct mlxsw_sp_nexthop_group *old_nh_grp = fib6_entry->common.nh_group;
6948 	struct mlxsw_sp_fib_node *fib_node = fib6_entry->common.fib_node;
6949 	int err;
6950 
6951 	mlxsw_sp_nexthop_group_vr_unlink(old_nh_grp, fib_node->fib);
6952 	fib6_entry->common.nh_group = NULL;
6953 	list_del(&fib6_entry->common.nexthop_group_node);
6954 
6955 	err = mlxsw_sp_nexthop6_group_get(mlxsw_sp, fib6_entry);
6956 	if (err)
6957 		goto err_nexthop6_group_get;
6958 
6959 	err = mlxsw_sp_nexthop_group_vr_link(fib6_entry->common.nh_group,
6960 					     fib_node->fib);
6961 	if (err)
6962 		goto err_nexthop_group_vr_link;
6963 
6964 	/* In case this entry is offloaded, then the adjacency index
6965 	 * currently associated with it in the device's table is that
6966 	 * of the old group. Start using the new one instead.
6967 	 */
6968 	err = mlxsw_sp_fib_entry_update(mlxsw_sp, &fib6_entry->common);
6969 	if (err)
6970 		goto err_fib_entry_update;
6971 
6972 	if (list_empty(&old_nh_grp->fib_list))
6973 		mlxsw_sp_nexthop6_group_destroy(mlxsw_sp, old_nh_grp);
6974 
6975 	return 0;
6976 
6977 err_fib_entry_update:
6978 	mlxsw_sp_nexthop_group_vr_unlink(fib6_entry->common.nh_group,
6979 					 fib_node->fib);
6980 err_nexthop_group_vr_link:
6981 	mlxsw_sp_nexthop6_group_put(mlxsw_sp, &fib6_entry->common);
6982 err_nexthop6_group_get:
6983 	list_add_tail(&fib6_entry->common.nexthop_group_node,
6984 		      &old_nh_grp->fib_list);
6985 	fib6_entry->common.nh_group = old_nh_grp;
6986 	mlxsw_sp_nexthop_group_vr_link(old_nh_grp, fib_node->fib);
6987 	return err;
6988 }
6989 
6990 static int
6991 mlxsw_sp_fib6_entry_nexthop_add(struct mlxsw_sp *mlxsw_sp,
6992 				struct mlxsw_sp_fib6_entry *fib6_entry,
6993 				struct fib6_info **rt_arr, unsigned int nrt6)
6994 {
6995 	struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
6996 	int err, i;
6997 
6998 	for (i = 0; i < nrt6; i++) {
6999 		mlxsw_sp_rt6 = mlxsw_sp_rt6_create(rt_arr[i]);
7000 		if (IS_ERR(mlxsw_sp_rt6)) {
7001 			err = PTR_ERR(mlxsw_sp_rt6);
7002 			goto err_rt6_unwind;
7003 		}
7004 
7005 		list_add_tail(&mlxsw_sp_rt6->list, &fib6_entry->rt6_list);
7006 		fib6_entry->nrt6++;
7007 	}
7008 
7009 	err = mlxsw_sp_nexthop6_group_update(mlxsw_sp, fib6_entry);
7010 	if (err)
7011 		goto err_rt6_unwind;
7012 
7013 	return 0;
7014 
7015 err_rt6_unwind:
7016 	for (; i > 0; i--) {
7017 		fib6_entry->nrt6--;
7018 		mlxsw_sp_rt6 = list_last_entry(&fib6_entry->rt6_list,
7019 					       struct mlxsw_sp_rt6, list);
7020 		list_del(&mlxsw_sp_rt6->list);
7021 		mlxsw_sp_rt6_destroy(mlxsw_sp_rt6);
7022 	}
7023 	return err;
7024 }
7025 
7026 static void
7027 mlxsw_sp_fib6_entry_nexthop_del(struct mlxsw_sp *mlxsw_sp,
7028 				struct mlxsw_sp_fib6_entry *fib6_entry,
7029 				struct fib6_info **rt_arr, unsigned int nrt6)
7030 {
7031 	struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
7032 	int i;
7033 
7034 	for (i = 0; i < nrt6; i++) {
7035 		mlxsw_sp_rt6 = mlxsw_sp_fib6_entry_rt_find(fib6_entry,
7036 							   rt_arr[i]);
7037 		if (WARN_ON_ONCE(!mlxsw_sp_rt6))
7038 			continue;
7039 
7040 		fib6_entry->nrt6--;
7041 		list_del(&mlxsw_sp_rt6->list);
7042 		mlxsw_sp_rt6_destroy(mlxsw_sp_rt6);
7043 	}
7044 
7045 	mlxsw_sp_nexthop6_group_update(mlxsw_sp, fib6_entry);
7046 }
7047 
7048 static int
7049 mlxsw_sp_fib6_entry_type_set_local(struct mlxsw_sp *mlxsw_sp,
7050 				   struct mlxsw_sp_fib_entry *fib_entry,
7051 				   const struct fib6_info *rt)
7052 {
7053 	struct mlxsw_sp_nexthop_group_info *nhgi = fib_entry->nh_group->nhgi;
7054 	union mlxsw_sp_l3addr dip = { .addr6 = rt->fib6_dst.addr };
7055 	u32 tb_id = mlxsw_sp_fix_tb_id(rt->fib6_table->tb6_id);
7056 	struct mlxsw_sp_router *router = mlxsw_sp->router;
7057 	int ifindex = nhgi->nexthops[0].ifindex;
7058 	struct mlxsw_sp_ipip_entry *ipip_entry;
7059 
7060 	fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_TRAP;
7061 	ipip_entry = mlxsw_sp_ipip_entry_find_by_decap(mlxsw_sp, ifindex,
7062 						       MLXSW_SP_L3_PROTO_IPV6,
7063 						       dip);
7064 
7065 	if (ipip_entry && ipip_entry->ol_dev->flags & IFF_UP) {
7066 		fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_IPIP_DECAP;
7067 		return mlxsw_sp_fib_entry_decap_init(mlxsw_sp, fib_entry,
7068 						     ipip_entry);
7069 	}
7070 	if (mlxsw_sp_router_nve_is_decap(mlxsw_sp, tb_id,
7071 					 MLXSW_SP_L3_PROTO_IPV6, &dip)) {
7072 		u32 tunnel_index;
7073 
7074 		tunnel_index = router->nve_decap_config.tunnel_index;
7075 		fib_entry->decap.tunnel_index = tunnel_index;
7076 		fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_NVE_DECAP;
7077 	}
7078 
7079 	return 0;
7080 }
7081 
7082 static int mlxsw_sp_fib6_entry_type_set(struct mlxsw_sp *mlxsw_sp,
7083 					struct mlxsw_sp_fib_entry *fib_entry,
7084 					const struct fib6_info *rt)
7085 {
7086 	if (rt->fib6_flags & RTF_LOCAL)
7087 		return mlxsw_sp_fib6_entry_type_set_local(mlxsw_sp, fib_entry,
7088 							  rt);
7089 	if (rt->fib6_flags & RTF_ANYCAST)
7090 		fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_TRAP;
7091 	else if (rt->fib6_type == RTN_BLACKHOLE)
7092 		fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_BLACKHOLE;
7093 	else if (rt->fib6_flags & RTF_REJECT)
7094 		fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_UNREACHABLE;
7095 	else if (fib_entry->nh_group->nhgi->gateway)
7096 		fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_REMOTE;
7097 	else
7098 		fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_LOCAL;
7099 
7100 	return 0;
7101 }
7102 
7103 static void
7104 mlxsw_sp_fib6_entry_rt_destroy_all(struct mlxsw_sp_fib6_entry *fib6_entry)
7105 {
7106 	struct mlxsw_sp_rt6 *mlxsw_sp_rt6, *tmp;
7107 
7108 	list_for_each_entry_safe(mlxsw_sp_rt6, tmp, &fib6_entry->rt6_list,
7109 				 list) {
7110 		fib6_entry->nrt6--;
7111 		list_del(&mlxsw_sp_rt6->list);
7112 		mlxsw_sp_rt6_destroy(mlxsw_sp_rt6);
7113 	}
7114 }
7115 
7116 static struct mlxsw_sp_fib6_entry *
7117 mlxsw_sp_fib6_entry_create(struct mlxsw_sp *mlxsw_sp,
7118 			   struct mlxsw_sp_fib_node *fib_node,
7119 			   struct fib6_info **rt_arr, unsigned int nrt6)
7120 {
7121 	struct mlxsw_sp_fib6_entry *fib6_entry;
7122 	struct mlxsw_sp_fib_entry *fib_entry;
7123 	struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
7124 	int err, i;
7125 
7126 	fib6_entry = kzalloc(sizeof(*fib6_entry), GFP_KERNEL);
7127 	if (!fib6_entry)
7128 		return ERR_PTR(-ENOMEM);
7129 	fib_entry = &fib6_entry->common;
7130 
7131 	INIT_LIST_HEAD(&fib6_entry->rt6_list);
7132 
7133 	for (i = 0; i < nrt6; i++) {
7134 		mlxsw_sp_rt6 = mlxsw_sp_rt6_create(rt_arr[i]);
7135 		if (IS_ERR(mlxsw_sp_rt6)) {
7136 			err = PTR_ERR(mlxsw_sp_rt6);
7137 			goto err_rt6_unwind;
7138 		}
7139 		list_add_tail(&mlxsw_sp_rt6->list, &fib6_entry->rt6_list);
7140 		fib6_entry->nrt6++;
7141 	}
7142 
7143 	err = mlxsw_sp_nexthop6_group_get(mlxsw_sp, fib6_entry);
7144 	if (err)
7145 		goto err_rt6_unwind;
7146 
7147 	err = mlxsw_sp_nexthop_group_vr_link(fib_entry->nh_group,
7148 					     fib_node->fib);
7149 	if (err)
7150 		goto err_nexthop_group_vr_link;
7151 
7152 	err = mlxsw_sp_fib6_entry_type_set(mlxsw_sp, fib_entry, rt_arr[0]);
7153 	if (err)
7154 		goto err_fib6_entry_type_set;
7155 
7156 	fib_entry->fib_node = fib_node;
7157 
7158 	return fib6_entry;
7159 
7160 err_fib6_entry_type_set:
7161 	mlxsw_sp_nexthop_group_vr_unlink(fib_entry->nh_group, fib_node->fib);
7162 err_nexthop_group_vr_link:
7163 	mlxsw_sp_nexthop6_group_put(mlxsw_sp, fib_entry);
7164 err_rt6_unwind:
7165 	for (; i > 0; i--) {
7166 		fib6_entry->nrt6--;
7167 		mlxsw_sp_rt6 = list_last_entry(&fib6_entry->rt6_list,
7168 					       struct mlxsw_sp_rt6, list);
7169 		list_del(&mlxsw_sp_rt6->list);
7170 		mlxsw_sp_rt6_destroy(mlxsw_sp_rt6);
7171 	}
7172 	kfree(fib6_entry);
7173 	return ERR_PTR(err);
7174 }
7175 
7176 static void
7177 mlxsw_sp_fib6_entry_type_unset(struct mlxsw_sp *mlxsw_sp,
7178 			       struct mlxsw_sp_fib6_entry *fib6_entry)
7179 {
7180 	mlxsw_sp_fib_entry_type_unset(mlxsw_sp, &fib6_entry->common);
7181 }
7182 
7183 static void mlxsw_sp_fib6_entry_destroy(struct mlxsw_sp *mlxsw_sp,
7184 					struct mlxsw_sp_fib6_entry *fib6_entry)
7185 {
7186 	struct mlxsw_sp_fib_node *fib_node = fib6_entry->common.fib_node;
7187 
7188 	mlxsw_sp_fib6_entry_type_unset(mlxsw_sp, fib6_entry);
7189 	mlxsw_sp_nexthop_group_vr_unlink(fib6_entry->common.nh_group,
7190 					 fib_node->fib);
7191 	mlxsw_sp_nexthop6_group_put(mlxsw_sp, &fib6_entry->common);
7192 	mlxsw_sp_fib6_entry_rt_destroy_all(fib6_entry);
7193 	WARN_ON(fib6_entry->nrt6);
7194 	kfree(fib6_entry);
7195 }
7196 
7197 static struct mlxsw_sp_fib6_entry *
7198 mlxsw_sp_fib6_entry_lookup(struct mlxsw_sp *mlxsw_sp,
7199 			   const struct fib6_info *rt)
7200 {
7201 	struct mlxsw_sp_fib6_entry *fib6_entry;
7202 	struct mlxsw_sp_fib_node *fib_node;
7203 	struct mlxsw_sp_fib *fib;
7204 	struct fib6_info *cmp_rt;
7205 	struct mlxsw_sp_vr *vr;
7206 
7207 	vr = mlxsw_sp_vr_find(mlxsw_sp, rt->fib6_table->tb6_id);
7208 	if (!vr)
7209 		return NULL;
7210 	fib = mlxsw_sp_vr_fib(vr, MLXSW_SP_L3_PROTO_IPV6);
7211 
7212 	fib_node = mlxsw_sp_fib_node_lookup(fib, &rt->fib6_dst.addr,
7213 					    sizeof(rt->fib6_dst.addr),
7214 					    rt->fib6_dst.plen);
7215 	if (!fib_node)
7216 		return NULL;
7217 
7218 	fib6_entry = container_of(fib_node->fib_entry,
7219 				  struct mlxsw_sp_fib6_entry, common);
7220 	cmp_rt = mlxsw_sp_fib6_entry_rt(fib6_entry);
7221 	if (rt->fib6_table->tb6_id == cmp_rt->fib6_table->tb6_id &&
7222 	    rt->fib6_metric == cmp_rt->fib6_metric &&
7223 	    mlxsw_sp_fib6_entry_rt_find(fib6_entry, rt))
7224 		return fib6_entry;
7225 
7226 	return NULL;
7227 }
7228 
7229 static bool mlxsw_sp_fib6_allow_replace(struct mlxsw_sp_fib6_entry *fib6_entry)
7230 {
7231 	struct mlxsw_sp_fib_node *fib_node = fib6_entry->common.fib_node;
7232 	struct mlxsw_sp_fib6_entry *fib6_replaced;
7233 	struct fib6_info *rt, *rt_replaced;
7234 
7235 	if (!fib_node->fib_entry)
7236 		return true;
7237 
7238 	fib6_replaced = container_of(fib_node->fib_entry,
7239 				     struct mlxsw_sp_fib6_entry,
7240 				     common);
7241 	rt = mlxsw_sp_fib6_entry_rt(fib6_entry);
7242 	rt_replaced = mlxsw_sp_fib6_entry_rt(fib6_replaced);
7243 	if (rt->fib6_table->tb6_id == RT_TABLE_MAIN &&
7244 	    rt_replaced->fib6_table->tb6_id == RT_TABLE_LOCAL)
7245 		return false;
7246 
7247 	return true;
7248 }
7249 
7250 static int mlxsw_sp_router_fib6_replace(struct mlxsw_sp *mlxsw_sp,
7251 					struct fib6_info **rt_arr,
7252 					unsigned int nrt6)
7253 {
7254 	struct mlxsw_sp_fib6_entry *fib6_entry, *fib6_replaced;
7255 	struct mlxsw_sp_fib_entry *replaced;
7256 	struct mlxsw_sp_fib_node *fib_node;
7257 	struct fib6_info *rt = rt_arr[0];
7258 	int err;
7259 
7260 	if (rt->fib6_src.plen)
7261 		return -EINVAL;
7262 
7263 	if (mlxsw_sp_fib6_rt_should_ignore(rt))
7264 		return 0;
7265 
7266 	if (rt->nh && !mlxsw_sp_nexthop_obj_group_lookup(mlxsw_sp, rt->nh->id))
7267 		return 0;
7268 
7269 	fib_node = mlxsw_sp_fib_node_get(mlxsw_sp, rt->fib6_table->tb6_id,
7270 					 &rt->fib6_dst.addr,
7271 					 sizeof(rt->fib6_dst.addr),
7272 					 rt->fib6_dst.plen,
7273 					 MLXSW_SP_L3_PROTO_IPV6);
7274 	if (IS_ERR(fib_node))
7275 		return PTR_ERR(fib_node);
7276 
7277 	fib6_entry = mlxsw_sp_fib6_entry_create(mlxsw_sp, fib_node, rt_arr,
7278 						nrt6);
7279 	if (IS_ERR(fib6_entry)) {
7280 		err = PTR_ERR(fib6_entry);
7281 		goto err_fib6_entry_create;
7282 	}
7283 
7284 	if (!mlxsw_sp_fib6_allow_replace(fib6_entry)) {
7285 		mlxsw_sp_fib6_entry_destroy(mlxsw_sp, fib6_entry);
7286 		mlxsw_sp_fib_node_put(mlxsw_sp, fib_node);
7287 		return 0;
7288 	}
7289 
7290 	replaced = fib_node->fib_entry;
7291 	err = mlxsw_sp_fib_node_entry_link(mlxsw_sp, &fib6_entry->common);
7292 	if (err)
7293 		goto err_fib_node_entry_link;
7294 
7295 	/* Nothing to replace */
7296 	if (!replaced)
7297 		return 0;
7298 
7299 	mlxsw_sp_fib_entry_hw_flags_clear(mlxsw_sp, replaced);
7300 	fib6_replaced = container_of(replaced, struct mlxsw_sp_fib6_entry,
7301 				     common);
7302 	mlxsw_sp_fib6_entry_destroy(mlxsw_sp, fib6_replaced);
7303 
7304 	return 0;
7305 
7306 err_fib_node_entry_link:
7307 	fib_node->fib_entry = replaced;
7308 	mlxsw_sp_fib6_entry_destroy(mlxsw_sp, fib6_entry);
7309 err_fib6_entry_create:
7310 	mlxsw_sp_fib_node_put(mlxsw_sp, fib_node);
7311 	return err;
7312 }
7313 
7314 static int mlxsw_sp_router_fib6_append(struct mlxsw_sp *mlxsw_sp,
7315 				       struct fib6_info **rt_arr,
7316 				       unsigned int nrt6)
7317 {
7318 	struct mlxsw_sp_fib6_entry *fib6_entry;
7319 	struct mlxsw_sp_fib_node *fib_node;
7320 	struct fib6_info *rt = rt_arr[0];
7321 	int err;
7322 
7323 	if (rt->fib6_src.plen)
7324 		return -EINVAL;
7325 
7326 	if (mlxsw_sp_fib6_rt_should_ignore(rt))
7327 		return 0;
7328 
7329 	fib_node = mlxsw_sp_fib_node_get(mlxsw_sp, rt->fib6_table->tb6_id,
7330 					 &rt->fib6_dst.addr,
7331 					 sizeof(rt->fib6_dst.addr),
7332 					 rt->fib6_dst.plen,
7333 					 MLXSW_SP_L3_PROTO_IPV6);
7334 	if (IS_ERR(fib_node))
7335 		return PTR_ERR(fib_node);
7336 
7337 	if (WARN_ON_ONCE(!fib_node->fib_entry)) {
7338 		mlxsw_sp_fib_node_put(mlxsw_sp, fib_node);
7339 		return -EINVAL;
7340 	}
7341 
7342 	fib6_entry = container_of(fib_node->fib_entry,
7343 				  struct mlxsw_sp_fib6_entry, common);
7344 	err = mlxsw_sp_fib6_entry_nexthop_add(mlxsw_sp, fib6_entry, rt_arr,
7345 					      nrt6);
7346 	if (err)
7347 		goto err_fib6_entry_nexthop_add;
7348 
7349 	return 0;
7350 
7351 err_fib6_entry_nexthop_add:
7352 	mlxsw_sp_fib_node_put(mlxsw_sp, fib_node);
7353 	return err;
7354 }
7355 
7356 static void mlxsw_sp_router_fib6_del(struct mlxsw_sp *mlxsw_sp,
7357 				     struct fib6_info **rt_arr,
7358 				     unsigned int nrt6)
7359 {
7360 	struct mlxsw_sp_fib6_entry *fib6_entry;
7361 	struct mlxsw_sp_fib_node *fib_node;
7362 	struct fib6_info *rt = rt_arr[0];
7363 
7364 	if (mlxsw_sp_fib6_rt_should_ignore(rt))
7365 		return;
7366 
7367 	/* Multipath routes are first added to the FIB trie and only then
7368 	 * notified. If we vetoed the addition, we will get a delete
7369 	 * notification for a route we do not have. Therefore, do not warn if
7370 	 * route was not found.
7371 	 */
7372 	fib6_entry = mlxsw_sp_fib6_entry_lookup(mlxsw_sp, rt);
7373 	if (!fib6_entry)
7374 		return;
7375 
7376 	/* If not all the nexthops are deleted, then only reduce the nexthop
7377 	 * group.
7378 	 */
7379 	if (nrt6 != fib6_entry->nrt6) {
7380 		mlxsw_sp_fib6_entry_nexthop_del(mlxsw_sp, fib6_entry, rt_arr,
7381 						nrt6);
7382 		return;
7383 	}
7384 
7385 	fib_node = fib6_entry->common.fib_node;
7386 
7387 	mlxsw_sp_fib_node_entry_unlink(mlxsw_sp, &fib6_entry->common);
7388 	mlxsw_sp_fib6_entry_destroy(mlxsw_sp, fib6_entry);
7389 	mlxsw_sp_fib_node_put(mlxsw_sp, fib_node);
7390 }
7391 
7392 static struct mlxsw_sp_mr_table *
7393 mlxsw_sp_router_fibmr_family_to_table(struct mlxsw_sp_vr *vr, int family)
7394 {
7395 	if (family == RTNL_FAMILY_IPMR)
7396 		return vr->mr_table[MLXSW_SP_L3_PROTO_IPV4];
7397 	else
7398 		return vr->mr_table[MLXSW_SP_L3_PROTO_IPV6];
7399 }
7400 
7401 static int mlxsw_sp_router_fibmr_add(struct mlxsw_sp *mlxsw_sp,
7402 				     struct mfc_entry_notifier_info *men_info,
7403 				     bool replace)
7404 {
7405 	struct mlxsw_sp_mr_table *mrt;
7406 	struct mlxsw_sp_vr *vr;
7407 
7408 	vr = mlxsw_sp_vr_get(mlxsw_sp, men_info->tb_id, NULL);
7409 	if (IS_ERR(vr))
7410 		return PTR_ERR(vr);
7411 
7412 	mrt = mlxsw_sp_router_fibmr_family_to_table(vr, men_info->info.family);
7413 	return mlxsw_sp_mr_route_add(mrt, men_info->mfc, replace);
7414 }
7415 
7416 static void mlxsw_sp_router_fibmr_del(struct mlxsw_sp *mlxsw_sp,
7417 				      struct mfc_entry_notifier_info *men_info)
7418 {
7419 	struct mlxsw_sp_mr_table *mrt;
7420 	struct mlxsw_sp_vr *vr;
7421 
7422 	vr = mlxsw_sp_vr_find(mlxsw_sp, men_info->tb_id);
7423 	if (WARN_ON(!vr))
7424 		return;
7425 
7426 	mrt = mlxsw_sp_router_fibmr_family_to_table(vr, men_info->info.family);
7427 	mlxsw_sp_mr_route_del(mrt, men_info->mfc);
7428 	mlxsw_sp_vr_put(mlxsw_sp, vr);
7429 }
7430 
7431 static int
7432 mlxsw_sp_router_fibmr_vif_add(struct mlxsw_sp *mlxsw_sp,
7433 			      struct vif_entry_notifier_info *ven_info)
7434 {
7435 	struct mlxsw_sp_mr_table *mrt;
7436 	struct mlxsw_sp_rif *rif;
7437 	struct mlxsw_sp_vr *vr;
7438 
7439 	vr = mlxsw_sp_vr_get(mlxsw_sp, ven_info->tb_id, NULL);
7440 	if (IS_ERR(vr))
7441 		return PTR_ERR(vr);
7442 
7443 	mrt = mlxsw_sp_router_fibmr_family_to_table(vr, ven_info->info.family);
7444 	rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, ven_info->dev);
7445 	return mlxsw_sp_mr_vif_add(mrt, ven_info->dev,
7446 				   ven_info->vif_index,
7447 				   ven_info->vif_flags, rif);
7448 }
7449 
7450 static void
7451 mlxsw_sp_router_fibmr_vif_del(struct mlxsw_sp *mlxsw_sp,
7452 			      struct vif_entry_notifier_info *ven_info)
7453 {
7454 	struct mlxsw_sp_mr_table *mrt;
7455 	struct mlxsw_sp_vr *vr;
7456 
7457 	vr = mlxsw_sp_vr_find(mlxsw_sp, ven_info->tb_id);
7458 	if (WARN_ON(!vr))
7459 		return;
7460 
7461 	mrt = mlxsw_sp_router_fibmr_family_to_table(vr, ven_info->info.family);
7462 	mlxsw_sp_mr_vif_del(mrt, ven_info->vif_index);
7463 	mlxsw_sp_vr_put(mlxsw_sp, vr);
7464 }
7465 
7466 static void mlxsw_sp_fib4_node_flush(struct mlxsw_sp *mlxsw_sp,
7467 				     struct mlxsw_sp_fib_node *fib_node)
7468 {
7469 	struct mlxsw_sp_fib4_entry *fib4_entry;
7470 
7471 	fib4_entry = container_of(fib_node->fib_entry,
7472 				  struct mlxsw_sp_fib4_entry, common);
7473 	mlxsw_sp_fib_node_entry_unlink(mlxsw_sp, fib_node->fib_entry);
7474 	mlxsw_sp_fib4_entry_destroy(mlxsw_sp, fib4_entry);
7475 	mlxsw_sp_fib_node_put(mlxsw_sp, fib_node);
7476 }
7477 
7478 static void mlxsw_sp_fib6_node_flush(struct mlxsw_sp *mlxsw_sp,
7479 				     struct mlxsw_sp_fib_node *fib_node)
7480 {
7481 	struct mlxsw_sp_fib6_entry *fib6_entry;
7482 
7483 	fib6_entry = container_of(fib_node->fib_entry,
7484 				  struct mlxsw_sp_fib6_entry, common);
7485 	mlxsw_sp_fib_node_entry_unlink(mlxsw_sp, fib_node->fib_entry);
7486 	mlxsw_sp_fib6_entry_destroy(mlxsw_sp, fib6_entry);
7487 	mlxsw_sp_fib_node_put(mlxsw_sp, fib_node);
7488 }
7489 
7490 static void mlxsw_sp_fib_node_flush(struct mlxsw_sp *mlxsw_sp,
7491 				    struct mlxsw_sp_fib_node *fib_node)
7492 {
7493 	switch (fib_node->fib->proto) {
7494 	case MLXSW_SP_L3_PROTO_IPV4:
7495 		mlxsw_sp_fib4_node_flush(mlxsw_sp, fib_node);
7496 		break;
7497 	case MLXSW_SP_L3_PROTO_IPV6:
7498 		mlxsw_sp_fib6_node_flush(mlxsw_sp, fib_node);
7499 		break;
7500 	}
7501 }
7502 
7503 static void mlxsw_sp_vr_fib_flush(struct mlxsw_sp *mlxsw_sp,
7504 				  struct mlxsw_sp_vr *vr,
7505 				  enum mlxsw_sp_l3proto proto)
7506 {
7507 	struct mlxsw_sp_fib *fib = mlxsw_sp_vr_fib(vr, proto);
7508 	struct mlxsw_sp_fib_node *fib_node, *tmp;
7509 
7510 	list_for_each_entry_safe(fib_node, tmp, &fib->node_list, list) {
7511 		bool do_break = &tmp->list == &fib->node_list;
7512 
7513 		mlxsw_sp_fib_node_flush(mlxsw_sp, fib_node);
7514 		if (do_break)
7515 			break;
7516 	}
7517 }
7518 
7519 static void mlxsw_sp_router_fib_flush(struct mlxsw_sp *mlxsw_sp)
7520 {
7521 	int max_vrs = MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_VRS);
7522 	int i, j;
7523 
7524 	for (i = 0; i < max_vrs; i++) {
7525 		struct mlxsw_sp_vr *vr = &mlxsw_sp->router->vrs[i];
7526 
7527 		if (!mlxsw_sp_vr_is_used(vr))
7528 			continue;
7529 
7530 		for (j = 0; j < MLXSW_SP_L3_PROTO_MAX; j++)
7531 			mlxsw_sp_mr_table_flush(vr->mr_table[j]);
7532 		mlxsw_sp_vr_fib_flush(mlxsw_sp, vr, MLXSW_SP_L3_PROTO_IPV4);
7533 
7534 		/* If virtual router was only used for IPv4, then it's no
7535 		 * longer used.
7536 		 */
7537 		if (!mlxsw_sp_vr_is_used(vr))
7538 			continue;
7539 		mlxsw_sp_vr_fib_flush(mlxsw_sp, vr, MLXSW_SP_L3_PROTO_IPV6);
7540 	}
7541 }
7542 
7543 struct mlxsw_sp_fib6_event_work {
7544 	struct fib6_info **rt_arr;
7545 	unsigned int nrt6;
7546 };
7547 
7548 struct mlxsw_sp_fib_event_work {
7549 	struct work_struct work;
7550 	union {
7551 		struct mlxsw_sp_fib6_event_work fib6_work;
7552 		struct fib_entry_notifier_info fen_info;
7553 		struct fib_rule_notifier_info fr_info;
7554 		struct fib_nh_notifier_info fnh_info;
7555 		struct mfc_entry_notifier_info men_info;
7556 		struct vif_entry_notifier_info ven_info;
7557 	};
7558 	struct mlxsw_sp *mlxsw_sp;
7559 	unsigned long event;
7560 };
7561 
7562 static int
7563 mlxsw_sp_router_fib6_work_init(struct mlxsw_sp_fib6_event_work *fib6_work,
7564 			       struct fib6_entry_notifier_info *fen6_info)
7565 {
7566 	struct fib6_info *rt = fen6_info->rt;
7567 	struct fib6_info **rt_arr;
7568 	struct fib6_info *iter;
7569 	unsigned int nrt6;
7570 	int i = 0;
7571 
7572 	nrt6 = fen6_info->nsiblings + 1;
7573 
7574 	rt_arr = kcalloc(nrt6, sizeof(struct fib6_info *), GFP_ATOMIC);
7575 	if (!rt_arr)
7576 		return -ENOMEM;
7577 
7578 	fib6_work->rt_arr = rt_arr;
7579 	fib6_work->nrt6 = nrt6;
7580 
7581 	rt_arr[0] = rt;
7582 	fib6_info_hold(rt);
7583 
7584 	if (!fen6_info->nsiblings)
7585 		return 0;
7586 
7587 	list_for_each_entry(iter, &rt->fib6_siblings, fib6_siblings) {
7588 		if (i == fen6_info->nsiblings)
7589 			break;
7590 
7591 		rt_arr[i + 1] = iter;
7592 		fib6_info_hold(iter);
7593 		i++;
7594 	}
7595 	WARN_ON_ONCE(i != fen6_info->nsiblings);
7596 
7597 	return 0;
7598 }
7599 
7600 static void
7601 mlxsw_sp_router_fib6_work_fini(struct mlxsw_sp_fib6_event_work *fib6_work)
7602 {
7603 	int i;
7604 
7605 	for (i = 0; i < fib6_work->nrt6; i++)
7606 		mlxsw_sp_rt6_release(fib6_work->rt_arr[i]);
7607 	kfree(fib6_work->rt_arr);
7608 }
7609 
7610 static void mlxsw_sp_router_fib4_event_work(struct work_struct *work)
7611 {
7612 	struct mlxsw_sp_fib_event_work *fib_work =
7613 		container_of(work, struct mlxsw_sp_fib_event_work, work);
7614 	struct mlxsw_sp *mlxsw_sp = fib_work->mlxsw_sp;
7615 	int err;
7616 
7617 	mutex_lock(&mlxsw_sp->router->lock);
7618 	mlxsw_sp_span_respin(mlxsw_sp);
7619 
7620 	switch (fib_work->event) {
7621 	case FIB_EVENT_ENTRY_REPLACE:
7622 		err = mlxsw_sp_router_fib4_replace(mlxsw_sp,
7623 						   &fib_work->fen_info);
7624 		if (err) {
7625 			dev_warn(mlxsw_sp->bus_info->dev, "FIB replace failed.\n");
7626 			mlxsw_sp_fib4_offload_failed_flag_set(mlxsw_sp,
7627 							      &fib_work->fen_info);
7628 		}
7629 		fib_info_put(fib_work->fen_info.fi);
7630 		break;
7631 	case FIB_EVENT_ENTRY_DEL:
7632 		mlxsw_sp_router_fib4_del(mlxsw_sp, &fib_work->fen_info);
7633 		fib_info_put(fib_work->fen_info.fi);
7634 		break;
7635 	case FIB_EVENT_NH_ADD:
7636 	case FIB_EVENT_NH_DEL:
7637 		mlxsw_sp_nexthop4_event(mlxsw_sp, fib_work->event,
7638 					fib_work->fnh_info.fib_nh);
7639 		fib_info_put(fib_work->fnh_info.fib_nh->nh_parent);
7640 		break;
7641 	}
7642 	mutex_unlock(&mlxsw_sp->router->lock);
7643 	kfree(fib_work);
7644 }
7645 
7646 static void mlxsw_sp_router_fib6_event_work(struct work_struct *work)
7647 {
7648 	struct mlxsw_sp_fib_event_work *fib_work =
7649 		    container_of(work, struct mlxsw_sp_fib_event_work, work);
7650 	struct mlxsw_sp_fib6_event_work *fib6_work = &fib_work->fib6_work;
7651 	struct mlxsw_sp *mlxsw_sp = fib_work->mlxsw_sp;
7652 	int err;
7653 
7654 	mutex_lock(&mlxsw_sp->router->lock);
7655 	mlxsw_sp_span_respin(mlxsw_sp);
7656 
7657 	switch (fib_work->event) {
7658 	case FIB_EVENT_ENTRY_REPLACE:
7659 		err = mlxsw_sp_router_fib6_replace(mlxsw_sp,
7660 						   fib6_work->rt_arr,
7661 						   fib6_work->nrt6);
7662 		if (err) {
7663 			dev_warn(mlxsw_sp->bus_info->dev, "FIB replace failed.\n");
7664 			mlxsw_sp_fib6_offload_failed_flag_set(mlxsw_sp,
7665 							      fib6_work->rt_arr,
7666 							      fib6_work->nrt6);
7667 		}
7668 		mlxsw_sp_router_fib6_work_fini(fib6_work);
7669 		break;
7670 	case FIB_EVENT_ENTRY_APPEND:
7671 		err = mlxsw_sp_router_fib6_append(mlxsw_sp,
7672 						  fib6_work->rt_arr,
7673 						  fib6_work->nrt6);
7674 		if (err) {
7675 			dev_warn(mlxsw_sp->bus_info->dev, "FIB append failed.\n");
7676 			mlxsw_sp_fib6_offload_failed_flag_set(mlxsw_sp,
7677 							      fib6_work->rt_arr,
7678 							      fib6_work->nrt6);
7679 		}
7680 		mlxsw_sp_router_fib6_work_fini(fib6_work);
7681 		break;
7682 	case FIB_EVENT_ENTRY_DEL:
7683 		mlxsw_sp_router_fib6_del(mlxsw_sp,
7684 					 fib6_work->rt_arr,
7685 					 fib6_work->nrt6);
7686 		mlxsw_sp_router_fib6_work_fini(fib6_work);
7687 		break;
7688 	}
7689 	mutex_unlock(&mlxsw_sp->router->lock);
7690 	kfree(fib_work);
7691 }
7692 
7693 static void mlxsw_sp_router_fibmr_event_work(struct work_struct *work)
7694 {
7695 	struct mlxsw_sp_fib_event_work *fib_work =
7696 		container_of(work, struct mlxsw_sp_fib_event_work, work);
7697 	struct mlxsw_sp *mlxsw_sp = fib_work->mlxsw_sp;
7698 	bool replace;
7699 	int err;
7700 
7701 	rtnl_lock();
7702 	mutex_lock(&mlxsw_sp->router->lock);
7703 	switch (fib_work->event) {
7704 	case FIB_EVENT_ENTRY_REPLACE:
7705 	case FIB_EVENT_ENTRY_ADD:
7706 		replace = fib_work->event == FIB_EVENT_ENTRY_REPLACE;
7707 
7708 		err = mlxsw_sp_router_fibmr_add(mlxsw_sp, &fib_work->men_info,
7709 						replace);
7710 		if (err)
7711 			dev_warn(mlxsw_sp->bus_info->dev, "MR entry add failed.\n");
7712 		mr_cache_put(fib_work->men_info.mfc);
7713 		break;
7714 	case FIB_EVENT_ENTRY_DEL:
7715 		mlxsw_sp_router_fibmr_del(mlxsw_sp, &fib_work->men_info);
7716 		mr_cache_put(fib_work->men_info.mfc);
7717 		break;
7718 	case FIB_EVENT_VIF_ADD:
7719 		err = mlxsw_sp_router_fibmr_vif_add(mlxsw_sp,
7720 						    &fib_work->ven_info);
7721 		if (err)
7722 			dev_warn(mlxsw_sp->bus_info->dev, "MR VIF add failed.\n");
7723 		dev_put(fib_work->ven_info.dev);
7724 		break;
7725 	case FIB_EVENT_VIF_DEL:
7726 		mlxsw_sp_router_fibmr_vif_del(mlxsw_sp,
7727 					      &fib_work->ven_info);
7728 		dev_put(fib_work->ven_info.dev);
7729 		break;
7730 	}
7731 	mutex_unlock(&mlxsw_sp->router->lock);
7732 	rtnl_unlock();
7733 	kfree(fib_work);
7734 }
7735 
7736 static void mlxsw_sp_router_fib4_event(struct mlxsw_sp_fib_event_work *fib_work,
7737 				       struct fib_notifier_info *info)
7738 {
7739 	struct fib_entry_notifier_info *fen_info;
7740 	struct fib_nh_notifier_info *fnh_info;
7741 
7742 	switch (fib_work->event) {
7743 	case FIB_EVENT_ENTRY_REPLACE:
7744 	case FIB_EVENT_ENTRY_DEL:
7745 		fen_info = container_of(info, struct fib_entry_notifier_info,
7746 					info);
7747 		fib_work->fen_info = *fen_info;
7748 		/* Take reference on fib_info to prevent it from being
7749 		 * freed while work is queued. Release it afterwards.
7750 		 */
7751 		fib_info_hold(fib_work->fen_info.fi);
7752 		break;
7753 	case FIB_EVENT_NH_ADD:
7754 	case FIB_EVENT_NH_DEL:
7755 		fnh_info = container_of(info, struct fib_nh_notifier_info,
7756 					info);
7757 		fib_work->fnh_info = *fnh_info;
7758 		fib_info_hold(fib_work->fnh_info.fib_nh->nh_parent);
7759 		break;
7760 	}
7761 }
7762 
7763 static int mlxsw_sp_router_fib6_event(struct mlxsw_sp_fib_event_work *fib_work,
7764 				      struct fib_notifier_info *info)
7765 {
7766 	struct fib6_entry_notifier_info *fen6_info;
7767 	int err;
7768 
7769 	switch (fib_work->event) {
7770 	case FIB_EVENT_ENTRY_REPLACE:
7771 	case FIB_EVENT_ENTRY_APPEND:
7772 	case FIB_EVENT_ENTRY_DEL:
7773 		fen6_info = container_of(info, struct fib6_entry_notifier_info,
7774 					 info);
7775 		err = mlxsw_sp_router_fib6_work_init(&fib_work->fib6_work,
7776 						     fen6_info);
7777 		if (err)
7778 			return err;
7779 		break;
7780 	}
7781 
7782 	return 0;
7783 }
7784 
7785 static void
7786 mlxsw_sp_router_fibmr_event(struct mlxsw_sp_fib_event_work *fib_work,
7787 			    struct fib_notifier_info *info)
7788 {
7789 	switch (fib_work->event) {
7790 	case FIB_EVENT_ENTRY_REPLACE:
7791 	case FIB_EVENT_ENTRY_ADD:
7792 	case FIB_EVENT_ENTRY_DEL:
7793 		memcpy(&fib_work->men_info, info, sizeof(fib_work->men_info));
7794 		mr_cache_hold(fib_work->men_info.mfc);
7795 		break;
7796 	case FIB_EVENT_VIF_ADD:
7797 	case FIB_EVENT_VIF_DEL:
7798 		memcpy(&fib_work->ven_info, info, sizeof(fib_work->ven_info));
7799 		dev_hold(fib_work->ven_info.dev);
7800 		break;
7801 	}
7802 }
7803 
7804 static int mlxsw_sp_router_fib_rule_event(unsigned long event,
7805 					  struct fib_notifier_info *info,
7806 					  struct mlxsw_sp *mlxsw_sp)
7807 {
7808 	struct netlink_ext_ack *extack = info->extack;
7809 	struct fib_rule_notifier_info *fr_info;
7810 	struct fib_rule *rule;
7811 	int err = 0;
7812 
7813 	/* nothing to do at the moment */
7814 	if (event == FIB_EVENT_RULE_DEL)
7815 		return 0;
7816 
7817 	fr_info = container_of(info, struct fib_rule_notifier_info, info);
7818 	rule = fr_info->rule;
7819 
7820 	/* Rule only affects locally generated traffic */
7821 	if (rule->iifindex == mlxsw_sp_net(mlxsw_sp)->loopback_dev->ifindex)
7822 		return 0;
7823 
7824 	switch (info->family) {
7825 	case AF_INET:
7826 		if (!fib4_rule_default(rule) && !rule->l3mdev)
7827 			err = -EOPNOTSUPP;
7828 		break;
7829 	case AF_INET6:
7830 		if (!fib6_rule_default(rule) && !rule->l3mdev)
7831 			err = -EOPNOTSUPP;
7832 		break;
7833 	case RTNL_FAMILY_IPMR:
7834 		if (!ipmr_rule_default(rule) && !rule->l3mdev)
7835 			err = -EOPNOTSUPP;
7836 		break;
7837 	case RTNL_FAMILY_IP6MR:
7838 		if (!ip6mr_rule_default(rule) && !rule->l3mdev)
7839 			err = -EOPNOTSUPP;
7840 		break;
7841 	}
7842 
7843 	if (err < 0)
7844 		NL_SET_ERR_MSG_MOD(extack, "FIB rules not supported");
7845 
7846 	return err;
7847 }
7848 
7849 /* Called with rcu_read_lock() */
7850 static int mlxsw_sp_router_fib_event(struct notifier_block *nb,
7851 				     unsigned long event, void *ptr)
7852 {
7853 	struct mlxsw_sp_fib_event_work *fib_work;
7854 	struct fib_notifier_info *info = ptr;
7855 	struct mlxsw_sp_router *router;
7856 	int err;
7857 
7858 	if ((info->family != AF_INET && info->family != AF_INET6 &&
7859 	     info->family != RTNL_FAMILY_IPMR &&
7860 	     info->family != RTNL_FAMILY_IP6MR))
7861 		return NOTIFY_DONE;
7862 
7863 	router = container_of(nb, struct mlxsw_sp_router, fib_nb);
7864 
7865 	switch (event) {
7866 	case FIB_EVENT_RULE_ADD:
7867 	case FIB_EVENT_RULE_DEL:
7868 		err = mlxsw_sp_router_fib_rule_event(event, info,
7869 						     router->mlxsw_sp);
7870 		return notifier_from_errno(err);
7871 	case FIB_EVENT_ENTRY_ADD:
7872 	case FIB_EVENT_ENTRY_REPLACE:
7873 	case FIB_EVENT_ENTRY_APPEND:
7874 		if (info->family == AF_INET) {
7875 			struct fib_entry_notifier_info *fen_info = ptr;
7876 
7877 			if (fen_info->fi->fib_nh_is_v6) {
7878 				NL_SET_ERR_MSG_MOD(info->extack, "IPv6 gateway with IPv4 route is not supported");
7879 				return notifier_from_errno(-EINVAL);
7880 			}
7881 		}
7882 		break;
7883 	}
7884 
7885 	fib_work = kzalloc(sizeof(*fib_work), GFP_ATOMIC);
7886 	if (!fib_work)
7887 		return NOTIFY_BAD;
7888 
7889 	fib_work->mlxsw_sp = router->mlxsw_sp;
7890 	fib_work->event = event;
7891 
7892 	switch (info->family) {
7893 	case AF_INET:
7894 		INIT_WORK(&fib_work->work, mlxsw_sp_router_fib4_event_work);
7895 		mlxsw_sp_router_fib4_event(fib_work, info);
7896 		break;
7897 	case AF_INET6:
7898 		INIT_WORK(&fib_work->work, mlxsw_sp_router_fib6_event_work);
7899 		err = mlxsw_sp_router_fib6_event(fib_work, info);
7900 		if (err)
7901 			goto err_fib_event;
7902 		break;
7903 	case RTNL_FAMILY_IP6MR:
7904 	case RTNL_FAMILY_IPMR:
7905 		INIT_WORK(&fib_work->work, mlxsw_sp_router_fibmr_event_work);
7906 		mlxsw_sp_router_fibmr_event(fib_work, info);
7907 		break;
7908 	}
7909 
7910 	mlxsw_core_schedule_work(&fib_work->work);
7911 
7912 	return NOTIFY_DONE;
7913 
7914 err_fib_event:
7915 	kfree(fib_work);
7916 	return NOTIFY_BAD;
7917 }
7918 
7919 static struct mlxsw_sp_rif *
7920 mlxsw_sp_rif_find_by_dev(const struct mlxsw_sp *mlxsw_sp,
7921 			 const struct net_device *dev)
7922 {
7923 	int max_rifs = MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_RIFS);
7924 	int i;
7925 
7926 	for (i = 0; i < max_rifs; i++)
7927 		if (mlxsw_sp->router->rifs[i] &&
7928 		    mlxsw_sp_rif_dev_is(mlxsw_sp->router->rifs[i], dev))
7929 			return mlxsw_sp->router->rifs[i];
7930 
7931 	return NULL;
7932 }
7933 
7934 bool mlxsw_sp_rif_exists(struct mlxsw_sp *mlxsw_sp,
7935 			 const struct net_device *dev)
7936 {
7937 	struct mlxsw_sp_rif *rif;
7938 
7939 	mutex_lock(&mlxsw_sp->router->lock);
7940 	rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev);
7941 	mutex_unlock(&mlxsw_sp->router->lock);
7942 
7943 	return rif;
7944 }
7945 
7946 u16 mlxsw_sp_rif_vid(struct mlxsw_sp *mlxsw_sp, const struct net_device *dev)
7947 {
7948 	struct mlxsw_sp_rif *rif;
7949 	u16 vid = 0;
7950 
7951 	mutex_lock(&mlxsw_sp->router->lock);
7952 	rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev);
7953 	if (!rif)
7954 		goto out;
7955 
7956 	/* We only return the VID for VLAN RIFs. Otherwise we return an
7957 	 * invalid value (0).
7958 	 */
7959 	if (rif->ops->type != MLXSW_SP_RIF_TYPE_VLAN)
7960 		goto out;
7961 
7962 	vid = mlxsw_sp_fid_8021q_vid(rif->fid);
7963 
7964 out:
7965 	mutex_unlock(&mlxsw_sp->router->lock);
7966 	return vid;
7967 }
7968 
7969 static int mlxsw_sp_router_rif_disable(struct mlxsw_sp *mlxsw_sp, u16 rif)
7970 {
7971 	char ritr_pl[MLXSW_REG_RITR_LEN];
7972 	int err;
7973 
7974 	mlxsw_reg_ritr_rif_pack(ritr_pl, rif);
7975 	err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
7976 	if (err)
7977 		return err;
7978 
7979 	mlxsw_reg_ritr_enable_set(ritr_pl, false);
7980 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
7981 }
7982 
7983 static int mlxsw_sp_router_rif_made_sync(struct mlxsw_sp *mlxsw_sp,
7984 					 struct mlxsw_sp_rif *rif)
7985 {
7986 	int err;
7987 
7988 	err = mlxsw_sp_neigh_rif_made_sync(mlxsw_sp, rif);
7989 	if (err)
7990 		return err;
7991 
7992 	err = mlxsw_sp_nexthop_rif_made_sync(mlxsw_sp, rif);
7993 	if (err)
7994 		goto err_nexthop;
7995 
7996 	return 0;
7997 
7998 err_nexthop:
7999 	mlxsw_sp_neigh_rif_gone_sync(mlxsw_sp, rif);
8000 	return err;
8001 }
8002 
8003 static void mlxsw_sp_router_rif_gone_sync(struct mlxsw_sp *mlxsw_sp,
8004 					  struct mlxsw_sp_rif *rif)
8005 {
8006 	/* Signal to nexthop cleanup that the RIF is going away. */
8007 	rif->crif->rif = NULL;
8008 
8009 	mlxsw_sp_router_rif_disable(mlxsw_sp, rif->rif_index);
8010 	mlxsw_sp_nexthop_rif_gone_sync(mlxsw_sp, rif);
8011 	mlxsw_sp_neigh_rif_gone_sync(mlxsw_sp, rif);
8012 }
8013 
8014 static bool __mlxsw_sp_dev_addr_list_empty(const struct net_device *dev)
8015 {
8016 	struct inet6_dev *inet6_dev;
8017 	struct in_device *idev;
8018 
8019 	idev = __in_dev_get_rcu(dev);
8020 	if (idev && idev->ifa_list)
8021 		return false;
8022 
8023 	inet6_dev = __in6_dev_get(dev);
8024 	if (inet6_dev && !list_empty(&inet6_dev->addr_list))
8025 		return false;
8026 
8027 	return true;
8028 }
8029 
8030 static bool mlxsw_sp_dev_addr_list_empty(const struct net_device *dev)
8031 {
8032 	bool addr_list_empty;
8033 
8034 	rcu_read_lock();
8035 	addr_list_empty = __mlxsw_sp_dev_addr_list_empty(dev);
8036 	rcu_read_unlock();
8037 
8038 	return addr_list_empty;
8039 }
8040 
8041 static bool
8042 mlxsw_sp_rif_should_config(struct mlxsw_sp_rif *rif, struct net_device *dev,
8043 			   unsigned long event)
8044 {
8045 	bool addr_list_empty;
8046 
8047 	switch (event) {
8048 	case NETDEV_UP:
8049 		return rif == NULL;
8050 	case NETDEV_DOWN:
8051 		addr_list_empty = mlxsw_sp_dev_addr_list_empty(dev);
8052 
8053 		/* macvlans do not have a RIF, but rather piggy back on the
8054 		 * RIF of their lower device.
8055 		 */
8056 		if (netif_is_macvlan(dev) && addr_list_empty)
8057 			return true;
8058 
8059 		if (rif && addr_list_empty &&
8060 		    !netif_is_l3_slave(mlxsw_sp_rif_dev(rif)))
8061 			return true;
8062 		/* It is possible we already removed the RIF ourselves
8063 		 * if it was assigned to a netdev that is now a bridge
8064 		 * or LAG slave.
8065 		 */
8066 		return false;
8067 	}
8068 
8069 	return false;
8070 }
8071 
8072 static enum mlxsw_sp_rif_type
8073 mlxsw_sp_dev_rif_type(const struct mlxsw_sp *mlxsw_sp,
8074 		      const struct net_device *dev)
8075 {
8076 	enum mlxsw_sp_fid_type type;
8077 
8078 	if (mlxsw_sp_netdev_ipip_type(mlxsw_sp, dev, NULL))
8079 		return MLXSW_SP_RIF_TYPE_IPIP_LB;
8080 
8081 	/* Otherwise RIF type is derived from the type of the underlying FID. */
8082 	if (is_vlan_dev(dev) && netif_is_bridge_master(vlan_dev_real_dev(dev)))
8083 		type = MLXSW_SP_FID_TYPE_8021Q;
8084 	else if (netif_is_bridge_master(dev) && br_vlan_enabled(dev))
8085 		type = MLXSW_SP_FID_TYPE_8021Q;
8086 	else if (netif_is_bridge_master(dev))
8087 		type = MLXSW_SP_FID_TYPE_8021D;
8088 	else
8089 		type = MLXSW_SP_FID_TYPE_RFID;
8090 
8091 	return mlxsw_sp_fid_type_rif_type(mlxsw_sp, type);
8092 }
8093 
8094 static int mlxsw_sp_rif_index_alloc(struct mlxsw_sp *mlxsw_sp, u16 *p_rif_index,
8095 				    u8 rif_entries)
8096 {
8097 	*p_rif_index = gen_pool_alloc(mlxsw_sp->router->rifs_table,
8098 				      rif_entries);
8099 	if (*p_rif_index == 0)
8100 		return -ENOBUFS;
8101 	*p_rif_index -= MLXSW_SP_ROUTER_GENALLOC_OFFSET;
8102 
8103 	/* RIF indexes must be aligned to the allocation size. */
8104 	WARN_ON_ONCE(*p_rif_index % rif_entries);
8105 
8106 	return 0;
8107 }
8108 
8109 static void mlxsw_sp_rif_index_free(struct mlxsw_sp *mlxsw_sp, u16 rif_index,
8110 				    u8 rif_entries)
8111 {
8112 	gen_pool_free(mlxsw_sp->router->rifs_table,
8113 		      MLXSW_SP_ROUTER_GENALLOC_OFFSET + rif_index, rif_entries);
8114 }
8115 
8116 static struct mlxsw_sp_rif *mlxsw_sp_rif_alloc(size_t rif_size, u16 rif_index,
8117 					       u16 vr_id,
8118 					       struct mlxsw_sp_crif *crif)
8119 {
8120 	struct net_device *l3_dev = crif ? crif->key.dev : NULL;
8121 	struct mlxsw_sp_rif *rif;
8122 
8123 	rif = kzalloc(rif_size, GFP_KERNEL);
8124 	if (!rif)
8125 		return NULL;
8126 
8127 	INIT_LIST_HEAD(&rif->neigh_list);
8128 	if (l3_dev) {
8129 		ether_addr_copy(rif->addr, l3_dev->dev_addr);
8130 		rif->mtu = l3_dev->mtu;
8131 	}
8132 	rif->vr_id = vr_id;
8133 	rif->rif_index = rif_index;
8134 	if (crif) {
8135 		rif->crif = crif;
8136 		crif->rif = rif;
8137 	}
8138 
8139 	return rif;
8140 }
8141 
8142 static void mlxsw_sp_rif_free(struct mlxsw_sp_rif *rif)
8143 {
8144 	WARN_ON(!list_empty(&rif->neigh_list));
8145 
8146 	if (rif->crif)
8147 		rif->crif->rif = NULL;
8148 	kfree(rif);
8149 }
8150 
8151 struct mlxsw_sp_rif *mlxsw_sp_rif_by_index(const struct mlxsw_sp *mlxsw_sp,
8152 					   u16 rif_index)
8153 {
8154 	return mlxsw_sp->router->rifs[rif_index];
8155 }
8156 
8157 u16 mlxsw_sp_rif_index(const struct mlxsw_sp_rif *rif)
8158 {
8159 	return rif->rif_index;
8160 }
8161 
8162 u16 mlxsw_sp_ipip_lb_rif_index(const struct mlxsw_sp_rif_ipip_lb *lb_rif)
8163 {
8164 	return lb_rif->common.rif_index;
8165 }
8166 
8167 u16 mlxsw_sp_ipip_lb_ul_vr_id(const struct mlxsw_sp_rif_ipip_lb *lb_rif)
8168 {
8169 	struct net_device *dev = mlxsw_sp_rif_dev(&lb_rif->common);
8170 	u32 ul_tb_id = mlxsw_sp_ipip_dev_ul_tb_id(dev);
8171 	struct mlxsw_sp_vr *ul_vr;
8172 
8173 	ul_vr = mlxsw_sp_vr_get(lb_rif->common.mlxsw_sp, ul_tb_id, NULL);
8174 	if (WARN_ON(IS_ERR(ul_vr)))
8175 		return 0;
8176 
8177 	return ul_vr->id;
8178 }
8179 
8180 u16 mlxsw_sp_ipip_lb_ul_rif_id(const struct mlxsw_sp_rif_ipip_lb *lb_rif)
8181 {
8182 	return lb_rif->ul_rif_id;
8183 }
8184 
8185 static bool
8186 mlxsw_sp_router_port_l3_stats_enabled(struct mlxsw_sp_rif *rif)
8187 {
8188 	return mlxsw_sp_rif_counter_valid_get(rif,
8189 					      MLXSW_SP_RIF_COUNTER_EGRESS) &&
8190 	       mlxsw_sp_rif_counter_valid_get(rif,
8191 					      MLXSW_SP_RIF_COUNTER_INGRESS);
8192 }
8193 
8194 static int
8195 mlxsw_sp_router_port_l3_stats_enable(struct mlxsw_sp_rif *rif)
8196 {
8197 	int err;
8198 
8199 	err = mlxsw_sp_rif_counter_alloc(rif, MLXSW_SP_RIF_COUNTER_INGRESS);
8200 	if (err)
8201 		return err;
8202 
8203 	/* Clear stale data. */
8204 	err = mlxsw_sp_rif_counter_fetch_clear(rif,
8205 					       MLXSW_SP_RIF_COUNTER_INGRESS,
8206 					       NULL);
8207 	if (err)
8208 		goto err_clear_ingress;
8209 
8210 	err = mlxsw_sp_rif_counter_alloc(rif, MLXSW_SP_RIF_COUNTER_EGRESS);
8211 	if (err)
8212 		goto err_alloc_egress;
8213 
8214 	/* Clear stale data. */
8215 	err = mlxsw_sp_rif_counter_fetch_clear(rif,
8216 					       MLXSW_SP_RIF_COUNTER_EGRESS,
8217 					       NULL);
8218 	if (err)
8219 		goto err_clear_egress;
8220 
8221 	return 0;
8222 
8223 err_clear_egress:
8224 	mlxsw_sp_rif_counter_free(rif, MLXSW_SP_RIF_COUNTER_EGRESS);
8225 err_alloc_egress:
8226 err_clear_ingress:
8227 	mlxsw_sp_rif_counter_free(rif, MLXSW_SP_RIF_COUNTER_INGRESS);
8228 	return err;
8229 }
8230 
8231 static void
8232 mlxsw_sp_router_port_l3_stats_disable(struct mlxsw_sp_rif *rif)
8233 {
8234 	mlxsw_sp_rif_counter_free(rif, MLXSW_SP_RIF_COUNTER_EGRESS);
8235 	mlxsw_sp_rif_counter_free(rif, MLXSW_SP_RIF_COUNTER_INGRESS);
8236 }
8237 
8238 static void
8239 mlxsw_sp_router_port_l3_stats_report_used(struct mlxsw_sp_rif *rif,
8240 					  struct netdev_notifier_offload_xstats_info *info)
8241 {
8242 	if (!mlxsw_sp_router_port_l3_stats_enabled(rif))
8243 		return;
8244 	netdev_offload_xstats_report_used(info->report_used);
8245 }
8246 
8247 static int
8248 mlxsw_sp_router_port_l3_stats_fetch(struct mlxsw_sp_rif *rif,
8249 				    struct rtnl_hw_stats64 *p_stats)
8250 {
8251 	struct mlxsw_sp_rif_counter_set_basic ingress;
8252 	struct mlxsw_sp_rif_counter_set_basic egress;
8253 	int err;
8254 
8255 	err = mlxsw_sp_rif_counter_fetch_clear(rif,
8256 					       MLXSW_SP_RIF_COUNTER_INGRESS,
8257 					       &ingress);
8258 	if (err)
8259 		return err;
8260 
8261 	err = mlxsw_sp_rif_counter_fetch_clear(rif,
8262 					       MLXSW_SP_RIF_COUNTER_EGRESS,
8263 					       &egress);
8264 	if (err)
8265 		return err;
8266 
8267 #define MLXSW_SP_ROUTER_ALL_GOOD(SET, SFX)		\
8268 		((SET.good_unicast_ ## SFX) +		\
8269 		 (SET.good_multicast_ ## SFX) +		\
8270 		 (SET.good_broadcast_ ## SFX))
8271 
8272 	p_stats->rx_packets = MLXSW_SP_ROUTER_ALL_GOOD(ingress, packets);
8273 	p_stats->tx_packets = MLXSW_SP_ROUTER_ALL_GOOD(egress, packets);
8274 	p_stats->rx_bytes = MLXSW_SP_ROUTER_ALL_GOOD(ingress, bytes);
8275 	p_stats->tx_bytes = MLXSW_SP_ROUTER_ALL_GOOD(egress, bytes);
8276 	p_stats->rx_errors = ingress.error_packets;
8277 	p_stats->tx_errors = egress.error_packets;
8278 	p_stats->rx_dropped = ingress.discard_packets;
8279 	p_stats->tx_dropped = egress.discard_packets;
8280 	p_stats->multicast = ingress.good_multicast_packets +
8281 			     ingress.good_broadcast_packets;
8282 
8283 #undef MLXSW_SP_ROUTER_ALL_GOOD
8284 
8285 	return 0;
8286 }
8287 
8288 static int
8289 mlxsw_sp_router_port_l3_stats_report_delta(struct mlxsw_sp_rif *rif,
8290 					   struct netdev_notifier_offload_xstats_info *info)
8291 {
8292 	struct rtnl_hw_stats64 stats = {};
8293 	int err;
8294 
8295 	if (!mlxsw_sp_router_port_l3_stats_enabled(rif))
8296 		return 0;
8297 
8298 	err = mlxsw_sp_router_port_l3_stats_fetch(rif, &stats);
8299 	if (err)
8300 		return err;
8301 
8302 	netdev_offload_xstats_report_delta(info->report_delta, &stats);
8303 	return 0;
8304 }
8305 
8306 struct mlxsw_sp_router_hwstats_notify_work {
8307 	struct work_struct work;
8308 	struct net_device *dev;
8309 };
8310 
8311 static void mlxsw_sp_router_hwstats_notify_work(struct work_struct *work)
8312 {
8313 	struct mlxsw_sp_router_hwstats_notify_work *hws_work =
8314 		container_of(work, struct mlxsw_sp_router_hwstats_notify_work,
8315 			     work);
8316 
8317 	rtnl_lock();
8318 	rtnl_offload_xstats_notify(hws_work->dev);
8319 	rtnl_unlock();
8320 	dev_put(hws_work->dev);
8321 	kfree(hws_work);
8322 }
8323 
8324 static void
8325 mlxsw_sp_router_hwstats_notify_schedule(struct net_device *dev)
8326 {
8327 	struct mlxsw_sp_router_hwstats_notify_work *hws_work;
8328 
8329 	/* To collect notification payload, the core ends up sending another
8330 	 * notifier block message, which would deadlock on the attempt to
8331 	 * acquire the router lock again. Just postpone the notification until
8332 	 * later.
8333 	 */
8334 
8335 	hws_work = kzalloc(sizeof(*hws_work), GFP_KERNEL);
8336 	if (!hws_work)
8337 		return;
8338 
8339 	INIT_WORK(&hws_work->work, mlxsw_sp_router_hwstats_notify_work);
8340 	dev_hold(dev);
8341 	hws_work->dev = dev;
8342 	mlxsw_core_schedule_work(&hws_work->work);
8343 }
8344 
8345 int mlxsw_sp_rif_dev_ifindex(const struct mlxsw_sp_rif *rif)
8346 {
8347 	return mlxsw_sp_rif_dev(rif)->ifindex;
8348 }
8349 
8350 bool mlxsw_sp_rif_has_dev(const struct mlxsw_sp_rif *rif)
8351 {
8352 	return !!mlxsw_sp_rif_dev(rif);
8353 }
8354 
8355 bool mlxsw_sp_rif_dev_is(const struct mlxsw_sp_rif *rif,
8356 			 const struct net_device *dev)
8357 {
8358 	return mlxsw_sp_rif_dev(rif) == dev;
8359 }
8360 
8361 static void mlxsw_sp_rif_push_l3_stats(struct mlxsw_sp_rif *rif)
8362 {
8363 	struct rtnl_hw_stats64 stats = {};
8364 
8365 	if (!mlxsw_sp_router_port_l3_stats_fetch(rif, &stats))
8366 		netdev_offload_xstats_push_delta(mlxsw_sp_rif_dev(rif),
8367 						 NETDEV_OFFLOAD_XSTATS_TYPE_L3,
8368 						 &stats);
8369 }
8370 
8371 static struct mlxsw_sp_rif *
8372 mlxsw_sp_rif_create(struct mlxsw_sp *mlxsw_sp,
8373 		    const struct mlxsw_sp_rif_params *params,
8374 		    struct netlink_ext_ack *extack)
8375 {
8376 	u8 rif_entries = params->double_entry ? 2 : 1;
8377 	u32 tb_id = l3mdev_fib_table(params->dev);
8378 	const struct mlxsw_sp_rif_ops *ops;
8379 	struct mlxsw_sp_fid *fid = NULL;
8380 	enum mlxsw_sp_rif_type type;
8381 	struct mlxsw_sp_crif *crif;
8382 	struct mlxsw_sp_rif *rif;
8383 	struct mlxsw_sp_vr *vr;
8384 	u16 rif_index;
8385 	int i, err;
8386 
8387 	type = mlxsw_sp_dev_rif_type(mlxsw_sp, params->dev);
8388 	ops = mlxsw_sp->router->rif_ops_arr[type];
8389 
8390 	vr = mlxsw_sp_vr_get(mlxsw_sp, tb_id ? : RT_TABLE_MAIN, extack);
8391 	if (IS_ERR(vr))
8392 		return ERR_CAST(vr);
8393 	vr->rif_count++;
8394 
8395 	err = mlxsw_sp_rif_index_alloc(mlxsw_sp, &rif_index, rif_entries);
8396 	if (err) {
8397 		NL_SET_ERR_MSG_MOD(extack, "Exceeded number of supported router interfaces");
8398 		goto err_rif_index_alloc;
8399 	}
8400 
8401 	crif = mlxsw_sp_crif_lookup(mlxsw_sp->router, params->dev);
8402 	if (WARN_ON(!crif)) {
8403 		err = -ENOENT;
8404 		goto err_crif_lookup;
8405 	}
8406 
8407 	rif = mlxsw_sp_rif_alloc(ops->rif_size, rif_index, vr->id, crif);
8408 	if (!rif) {
8409 		err = -ENOMEM;
8410 		goto err_rif_alloc;
8411 	}
8412 	dev_hold(params->dev);
8413 	mlxsw_sp->router->rifs[rif_index] = rif;
8414 	rif->mlxsw_sp = mlxsw_sp;
8415 	rif->ops = ops;
8416 	rif->rif_entries = rif_entries;
8417 
8418 	if (ops->fid_get) {
8419 		fid = ops->fid_get(rif, params, extack);
8420 		if (IS_ERR(fid)) {
8421 			err = PTR_ERR(fid);
8422 			goto err_fid_get;
8423 		}
8424 		rif->fid = fid;
8425 	}
8426 
8427 	if (ops->setup)
8428 		ops->setup(rif, params);
8429 
8430 	err = ops->configure(rif, extack);
8431 	if (err)
8432 		goto err_configure;
8433 
8434 	for (i = 0; i < MLXSW_SP_L3_PROTO_MAX; i++) {
8435 		err = mlxsw_sp_mr_rif_add(vr->mr_table[i], rif);
8436 		if (err)
8437 			goto err_mr_rif_add;
8438 	}
8439 
8440 	err = mlxsw_sp_router_rif_made_sync(mlxsw_sp, rif);
8441 	if (err)
8442 		goto err_rif_made_sync;
8443 
8444 	if (netdev_offload_xstats_enabled(params->dev,
8445 					  NETDEV_OFFLOAD_XSTATS_TYPE_L3)) {
8446 		err = mlxsw_sp_router_port_l3_stats_enable(rif);
8447 		if (err)
8448 			goto err_stats_enable;
8449 		mlxsw_sp_router_hwstats_notify_schedule(params->dev);
8450 	} else {
8451 		mlxsw_sp_rif_counters_alloc(rif);
8452 	}
8453 
8454 	atomic_add(rif_entries, &mlxsw_sp->router->rifs_count);
8455 	return rif;
8456 
8457 err_stats_enable:
8458 	mlxsw_sp_router_rif_gone_sync(mlxsw_sp, rif);
8459 err_rif_made_sync:
8460 err_mr_rif_add:
8461 	for (i--; i >= 0; i--)
8462 		mlxsw_sp_mr_rif_del(vr->mr_table[i], rif);
8463 	ops->deconfigure(rif);
8464 err_configure:
8465 	if (fid)
8466 		mlxsw_sp_fid_put(fid);
8467 err_fid_get:
8468 	mlxsw_sp->router->rifs[rif_index] = NULL;
8469 	dev_put(params->dev);
8470 	mlxsw_sp_rif_free(rif);
8471 err_rif_alloc:
8472 err_crif_lookup:
8473 	mlxsw_sp_rif_index_free(mlxsw_sp, rif_index, rif_entries);
8474 err_rif_index_alloc:
8475 	vr->rif_count--;
8476 	mlxsw_sp_vr_put(mlxsw_sp, vr);
8477 	return ERR_PTR(err);
8478 }
8479 
8480 static void mlxsw_sp_rif_destroy(struct mlxsw_sp_rif *rif)
8481 {
8482 	struct net_device *dev = mlxsw_sp_rif_dev(rif);
8483 	const struct mlxsw_sp_rif_ops *ops = rif->ops;
8484 	struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
8485 	struct mlxsw_sp_crif *crif = rif->crif;
8486 	struct mlxsw_sp_fid *fid = rif->fid;
8487 	u8 rif_entries = rif->rif_entries;
8488 	u16 rif_index = rif->rif_index;
8489 	struct mlxsw_sp_vr *vr;
8490 	int i;
8491 
8492 	atomic_sub(rif_entries, &mlxsw_sp->router->rifs_count);
8493 	mlxsw_sp_router_rif_gone_sync(mlxsw_sp, rif);
8494 	vr = &mlxsw_sp->router->vrs[rif->vr_id];
8495 
8496 	if (netdev_offload_xstats_enabled(dev, NETDEV_OFFLOAD_XSTATS_TYPE_L3)) {
8497 		mlxsw_sp_rif_push_l3_stats(rif);
8498 		mlxsw_sp_router_port_l3_stats_disable(rif);
8499 		mlxsw_sp_router_hwstats_notify_schedule(dev);
8500 	} else {
8501 		mlxsw_sp_rif_counters_free(rif);
8502 	}
8503 
8504 	for (i = 0; i < MLXSW_SP_L3_PROTO_MAX; i++)
8505 		mlxsw_sp_mr_rif_del(vr->mr_table[i], rif);
8506 	ops->deconfigure(rif);
8507 	if (fid)
8508 		/* Loopback RIFs are not associated with a FID. */
8509 		mlxsw_sp_fid_put(fid);
8510 	mlxsw_sp->router->rifs[rif->rif_index] = NULL;
8511 	dev_put(dev);
8512 	mlxsw_sp_rif_free(rif);
8513 	mlxsw_sp_rif_index_free(mlxsw_sp, rif_index, rif_entries);
8514 	vr->rif_count--;
8515 	mlxsw_sp_vr_put(mlxsw_sp, vr);
8516 
8517 	if (crif->can_destroy)
8518 		mlxsw_sp_crif_free(crif);
8519 }
8520 
8521 void mlxsw_sp_rif_destroy_by_dev(struct mlxsw_sp *mlxsw_sp,
8522 				 struct net_device *dev)
8523 {
8524 	struct mlxsw_sp_rif *rif;
8525 
8526 	mutex_lock(&mlxsw_sp->router->lock);
8527 	rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev);
8528 	if (!rif)
8529 		goto out;
8530 	mlxsw_sp_rif_destroy(rif);
8531 out:
8532 	mutex_unlock(&mlxsw_sp->router->lock);
8533 }
8534 
8535 static void mlxsw_sp_rif_destroy_vlan_upper(struct mlxsw_sp *mlxsw_sp,
8536 					    struct net_device *br_dev,
8537 					    u16 vid)
8538 {
8539 	struct net_device *upper_dev;
8540 	struct mlxsw_sp_crif *crif;
8541 
8542 	rcu_read_lock();
8543 	upper_dev = __vlan_find_dev_deep_rcu(br_dev, htons(ETH_P_8021Q), vid);
8544 	rcu_read_unlock();
8545 
8546 	if (!upper_dev)
8547 		return;
8548 
8549 	crif = mlxsw_sp_crif_lookup(mlxsw_sp->router, upper_dev);
8550 	if (!crif || !crif->rif)
8551 		return;
8552 
8553 	mlxsw_sp_rif_destroy(crif->rif);
8554 }
8555 
8556 static int mlxsw_sp_inetaddr_bridge_event(struct mlxsw_sp *mlxsw_sp,
8557 					  struct net_device *l3_dev,
8558 					  int lower_pvid,
8559 					  unsigned long event,
8560 					  struct netlink_ext_ack *extack);
8561 
8562 int mlxsw_sp_router_bridge_vlan_add(struct mlxsw_sp *mlxsw_sp,
8563 				    struct net_device *br_dev,
8564 				    u16 new_vid, bool is_pvid,
8565 				    struct netlink_ext_ack *extack)
8566 {
8567 	struct mlxsw_sp_rif *old_rif;
8568 	struct mlxsw_sp_rif *new_rif;
8569 	struct net_device *upper_dev;
8570 	u16 old_pvid = 0;
8571 	u16 new_pvid;
8572 	int err = 0;
8573 
8574 	mutex_lock(&mlxsw_sp->router->lock);
8575 	old_rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, br_dev);
8576 	if (old_rif) {
8577 		/* If the RIF on the bridge is not a VLAN RIF, we shouldn't have
8578 		 * gotten a PVID notification.
8579 		 */
8580 		if (WARN_ON(old_rif->ops->type != MLXSW_SP_RIF_TYPE_VLAN))
8581 			old_rif = NULL;
8582 		else
8583 			old_pvid = mlxsw_sp_fid_8021q_vid(old_rif->fid);
8584 	}
8585 
8586 	if (is_pvid)
8587 		new_pvid = new_vid;
8588 	else if (old_pvid == new_vid)
8589 		new_pvid = 0;
8590 	else
8591 		goto out;
8592 
8593 	if (old_pvid == new_pvid)
8594 		goto out;
8595 
8596 	if (new_pvid) {
8597 		struct mlxsw_sp_rif_params params = {
8598 			.dev = br_dev,
8599 			.vid = new_pvid,
8600 		};
8601 
8602 		/* If there is a VLAN upper with the same VID as the new PVID,
8603 		 * kill its RIF, if there is one.
8604 		 */
8605 		mlxsw_sp_rif_destroy_vlan_upper(mlxsw_sp, br_dev, new_pvid);
8606 
8607 		if (mlxsw_sp_dev_addr_list_empty(br_dev))
8608 			goto out;
8609 		new_rif = mlxsw_sp_rif_create(mlxsw_sp, &params, extack);
8610 		if (IS_ERR(new_rif)) {
8611 			err = PTR_ERR(new_rif);
8612 			goto out;
8613 		}
8614 
8615 		if (old_pvid)
8616 			mlxsw_sp_rif_migrate_destroy(mlxsw_sp, old_rif, new_rif,
8617 						     true);
8618 	} else {
8619 		mlxsw_sp_rif_destroy(old_rif);
8620 	}
8621 
8622 	if (old_pvid) {
8623 		rcu_read_lock();
8624 		upper_dev = __vlan_find_dev_deep_rcu(br_dev, htons(ETH_P_8021Q),
8625 						     old_pvid);
8626 		rcu_read_unlock();
8627 		if (upper_dev)
8628 			err = mlxsw_sp_inetaddr_bridge_event(mlxsw_sp,
8629 							     upper_dev,
8630 							     new_pvid,
8631 							     NETDEV_UP, extack);
8632 	}
8633 
8634 out:
8635 	mutex_unlock(&mlxsw_sp->router->lock);
8636 	return err;
8637 }
8638 
8639 static void
8640 mlxsw_sp_rif_subport_params_init(struct mlxsw_sp_rif_params *params,
8641 				 struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan)
8642 {
8643 	struct mlxsw_sp_port *mlxsw_sp_port = mlxsw_sp_port_vlan->mlxsw_sp_port;
8644 
8645 	params->vid = mlxsw_sp_port_vlan->vid;
8646 	params->lag = mlxsw_sp_port->lagged;
8647 	if (params->lag)
8648 		params->lag_id = mlxsw_sp_port->lag_id;
8649 	else
8650 		params->system_port = mlxsw_sp_port->local_port;
8651 }
8652 
8653 static struct mlxsw_sp_rif_subport *
8654 mlxsw_sp_rif_subport_rif(const struct mlxsw_sp_rif *rif)
8655 {
8656 	return container_of(rif, struct mlxsw_sp_rif_subport, common);
8657 }
8658 
8659 static struct mlxsw_sp_rif *
8660 mlxsw_sp_rif_subport_get(struct mlxsw_sp *mlxsw_sp,
8661 			 const struct mlxsw_sp_rif_params *params,
8662 			 struct netlink_ext_ack *extack)
8663 {
8664 	struct mlxsw_sp_rif_subport *rif_subport;
8665 	struct mlxsw_sp_rif *rif;
8666 
8667 	rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, params->dev);
8668 	if (!rif)
8669 		return mlxsw_sp_rif_create(mlxsw_sp, params, extack);
8670 
8671 	rif_subport = mlxsw_sp_rif_subport_rif(rif);
8672 	refcount_inc(&rif_subport->ref_count);
8673 	return rif;
8674 }
8675 
8676 static void mlxsw_sp_rif_subport_put(struct mlxsw_sp_rif *rif)
8677 {
8678 	struct mlxsw_sp_rif_subport *rif_subport;
8679 
8680 	rif_subport = mlxsw_sp_rif_subport_rif(rif);
8681 	if (!refcount_dec_and_test(&rif_subport->ref_count))
8682 		return;
8683 
8684 	mlxsw_sp_rif_destroy(rif);
8685 }
8686 
8687 static int mlxsw_sp_rif_mac_profile_index_alloc(struct mlxsw_sp *mlxsw_sp,
8688 						struct mlxsw_sp_rif_mac_profile *profile,
8689 						struct netlink_ext_ack *extack)
8690 {
8691 	u8 max_rif_mac_profiles = mlxsw_sp->router->max_rif_mac_profile;
8692 	struct mlxsw_sp_router *router = mlxsw_sp->router;
8693 	int id;
8694 
8695 	id = idr_alloc(&router->rif_mac_profiles_idr, profile, 0,
8696 		       max_rif_mac_profiles, GFP_KERNEL);
8697 
8698 	if (id >= 0) {
8699 		profile->id = id;
8700 		return 0;
8701 	}
8702 
8703 	if (id == -ENOSPC)
8704 		NL_SET_ERR_MSG_MOD(extack,
8705 				   "Exceeded number of supported router interface MAC profiles");
8706 
8707 	return id;
8708 }
8709 
8710 static struct mlxsw_sp_rif_mac_profile *
8711 mlxsw_sp_rif_mac_profile_index_free(struct mlxsw_sp *mlxsw_sp, u8 mac_profile)
8712 {
8713 	struct mlxsw_sp_rif_mac_profile *profile;
8714 
8715 	profile = idr_remove(&mlxsw_sp->router->rif_mac_profiles_idr,
8716 			     mac_profile);
8717 	WARN_ON(!profile);
8718 	return profile;
8719 }
8720 
8721 static struct mlxsw_sp_rif_mac_profile *
8722 mlxsw_sp_rif_mac_profile_alloc(const char *mac)
8723 {
8724 	struct mlxsw_sp_rif_mac_profile *profile;
8725 
8726 	profile = kzalloc(sizeof(*profile), GFP_KERNEL);
8727 	if (!profile)
8728 		return NULL;
8729 
8730 	ether_addr_copy(profile->mac_prefix, mac);
8731 	refcount_set(&profile->ref_count, 1);
8732 	return profile;
8733 }
8734 
8735 static struct mlxsw_sp_rif_mac_profile *
8736 mlxsw_sp_rif_mac_profile_find(const struct mlxsw_sp *mlxsw_sp, const char *mac)
8737 {
8738 	struct mlxsw_sp_router *router = mlxsw_sp->router;
8739 	struct mlxsw_sp_rif_mac_profile *profile;
8740 	int id;
8741 
8742 	idr_for_each_entry(&router->rif_mac_profiles_idr, profile, id) {
8743 		if (ether_addr_equal_masked(profile->mac_prefix, mac,
8744 					    mlxsw_sp->mac_mask))
8745 			return profile;
8746 	}
8747 
8748 	return NULL;
8749 }
8750 
8751 static u64 mlxsw_sp_rif_mac_profiles_occ_get(void *priv)
8752 {
8753 	const struct mlxsw_sp *mlxsw_sp = priv;
8754 
8755 	return atomic_read(&mlxsw_sp->router->rif_mac_profiles_count);
8756 }
8757 
8758 static u64 mlxsw_sp_rifs_occ_get(void *priv)
8759 {
8760 	const struct mlxsw_sp *mlxsw_sp = priv;
8761 
8762 	return atomic_read(&mlxsw_sp->router->rifs_count);
8763 }
8764 
8765 static struct mlxsw_sp_rif_mac_profile *
8766 mlxsw_sp_rif_mac_profile_create(struct mlxsw_sp *mlxsw_sp, const char *mac,
8767 				struct netlink_ext_ack *extack)
8768 {
8769 	struct mlxsw_sp_rif_mac_profile *profile;
8770 	int err;
8771 
8772 	profile = mlxsw_sp_rif_mac_profile_alloc(mac);
8773 	if (!profile)
8774 		return ERR_PTR(-ENOMEM);
8775 
8776 	err = mlxsw_sp_rif_mac_profile_index_alloc(mlxsw_sp, profile, extack);
8777 	if (err)
8778 		goto profile_index_alloc_err;
8779 
8780 	atomic_inc(&mlxsw_sp->router->rif_mac_profiles_count);
8781 	return profile;
8782 
8783 profile_index_alloc_err:
8784 	kfree(profile);
8785 	return ERR_PTR(err);
8786 }
8787 
8788 static void mlxsw_sp_rif_mac_profile_destroy(struct mlxsw_sp *mlxsw_sp,
8789 					     u8 mac_profile)
8790 {
8791 	struct mlxsw_sp_rif_mac_profile *profile;
8792 
8793 	atomic_dec(&mlxsw_sp->router->rif_mac_profiles_count);
8794 	profile = mlxsw_sp_rif_mac_profile_index_free(mlxsw_sp, mac_profile);
8795 	kfree(profile);
8796 }
8797 
8798 static int mlxsw_sp_rif_mac_profile_get(struct mlxsw_sp *mlxsw_sp,
8799 					const char *mac, u8 *p_mac_profile,
8800 					struct netlink_ext_ack *extack)
8801 {
8802 	struct mlxsw_sp_rif_mac_profile *profile;
8803 
8804 	profile = mlxsw_sp_rif_mac_profile_find(mlxsw_sp, mac);
8805 	if (profile) {
8806 		refcount_inc(&profile->ref_count);
8807 		goto out;
8808 	}
8809 
8810 	profile = mlxsw_sp_rif_mac_profile_create(mlxsw_sp, mac, extack);
8811 	if (IS_ERR(profile))
8812 		return PTR_ERR(profile);
8813 
8814 out:
8815 	*p_mac_profile = profile->id;
8816 	return 0;
8817 }
8818 
8819 static void mlxsw_sp_rif_mac_profile_put(struct mlxsw_sp *mlxsw_sp,
8820 					 u8 mac_profile)
8821 {
8822 	struct mlxsw_sp_rif_mac_profile *profile;
8823 
8824 	profile = idr_find(&mlxsw_sp->router->rif_mac_profiles_idr,
8825 			   mac_profile);
8826 	if (WARN_ON(!profile))
8827 		return;
8828 
8829 	if (!refcount_dec_and_test(&profile->ref_count))
8830 		return;
8831 
8832 	mlxsw_sp_rif_mac_profile_destroy(mlxsw_sp, mac_profile);
8833 }
8834 
8835 static bool mlxsw_sp_rif_mac_profile_is_shared(const struct mlxsw_sp_rif *rif)
8836 {
8837 	struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
8838 	struct mlxsw_sp_rif_mac_profile *profile;
8839 
8840 	profile = idr_find(&mlxsw_sp->router->rif_mac_profiles_idr,
8841 			   rif->mac_profile_id);
8842 	if (WARN_ON(!profile))
8843 		return false;
8844 
8845 	return refcount_read(&profile->ref_count) > 1;
8846 }
8847 
8848 static int mlxsw_sp_rif_mac_profile_edit(struct mlxsw_sp_rif *rif,
8849 					 const char *new_mac)
8850 {
8851 	struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
8852 	struct mlxsw_sp_rif_mac_profile *profile;
8853 
8854 	profile = idr_find(&mlxsw_sp->router->rif_mac_profiles_idr,
8855 			   rif->mac_profile_id);
8856 	if (WARN_ON(!profile))
8857 		return -EINVAL;
8858 
8859 	ether_addr_copy(profile->mac_prefix, new_mac);
8860 	return 0;
8861 }
8862 
8863 static int
8864 mlxsw_sp_rif_mac_profile_replace(struct mlxsw_sp *mlxsw_sp,
8865 				 struct mlxsw_sp_rif *rif,
8866 				 const char *new_mac,
8867 				 struct netlink_ext_ack *extack)
8868 {
8869 	u8 mac_profile;
8870 	int err;
8871 
8872 	if (!mlxsw_sp_rif_mac_profile_is_shared(rif) &&
8873 	    !mlxsw_sp_rif_mac_profile_find(mlxsw_sp, new_mac))
8874 		return mlxsw_sp_rif_mac_profile_edit(rif, new_mac);
8875 
8876 	err = mlxsw_sp_rif_mac_profile_get(mlxsw_sp, new_mac,
8877 					   &mac_profile, extack);
8878 	if (err)
8879 		return err;
8880 
8881 	mlxsw_sp_rif_mac_profile_put(mlxsw_sp, rif->mac_profile_id);
8882 	rif->mac_profile_id = mac_profile;
8883 	return 0;
8884 }
8885 
8886 static int
8887 __mlxsw_sp_port_vlan_router_join(struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan,
8888 				 struct net_device *l3_dev,
8889 				 struct netlink_ext_ack *extack)
8890 {
8891 	struct mlxsw_sp_port *mlxsw_sp_port = mlxsw_sp_port_vlan->mlxsw_sp_port;
8892 	struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
8893 	struct mlxsw_sp_rif_params params;
8894 	u16 vid = mlxsw_sp_port_vlan->vid;
8895 	struct mlxsw_sp_rif *rif;
8896 	struct mlxsw_sp_fid *fid;
8897 	int err;
8898 
8899 	params = (struct mlxsw_sp_rif_params) {
8900 		.dev = l3_dev,
8901 		.vid = vid,
8902 	};
8903 
8904 	mlxsw_sp_rif_subport_params_init(&params, mlxsw_sp_port_vlan);
8905 	rif = mlxsw_sp_rif_subport_get(mlxsw_sp, &params, extack);
8906 	if (IS_ERR(rif))
8907 		return PTR_ERR(rif);
8908 
8909 	/* FID was already created, just take a reference */
8910 	fid = rif->ops->fid_get(rif, &params, extack);
8911 	err = mlxsw_sp_fid_port_vid_map(fid, mlxsw_sp_port, vid);
8912 	if (err)
8913 		goto err_fid_port_vid_map;
8914 
8915 	err = mlxsw_sp_port_vid_learning_set(mlxsw_sp_port, vid, false);
8916 	if (err)
8917 		goto err_port_vid_learning_set;
8918 
8919 	err = mlxsw_sp_port_vid_stp_set(mlxsw_sp_port, vid,
8920 					BR_STATE_FORWARDING);
8921 	if (err)
8922 		goto err_port_vid_stp_set;
8923 
8924 	mlxsw_sp_port_vlan->fid = fid;
8925 
8926 	return 0;
8927 
8928 err_port_vid_stp_set:
8929 	mlxsw_sp_port_vid_learning_set(mlxsw_sp_port, vid, true);
8930 err_port_vid_learning_set:
8931 	mlxsw_sp_fid_port_vid_unmap(fid, mlxsw_sp_port, vid);
8932 err_fid_port_vid_map:
8933 	mlxsw_sp_fid_put(fid);
8934 	mlxsw_sp_rif_subport_put(rif);
8935 	return err;
8936 }
8937 
8938 static void
8939 __mlxsw_sp_port_vlan_router_leave(struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan)
8940 {
8941 	struct mlxsw_sp_port *mlxsw_sp_port = mlxsw_sp_port_vlan->mlxsw_sp_port;
8942 	struct mlxsw_sp_fid *fid = mlxsw_sp_port_vlan->fid;
8943 	struct mlxsw_sp_rif *rif = mlxsw_sp_fid_rif(fid);
8944 	u16 vid = mlxsw_sp_port_vlan->vid;
8945 
8946 	if (WARN_ON(mlxsw_sp_fid_type(fid) != MLXSW_SP_FID_TYPE_RFID))
8947 		return;
8948 
8949 	mlxsw_sp_port_vlan->fid = NULL;
8950 	mlxsw_sp_port_vid_stp_set(mlxsw_sp_port, vid, BR_STATE_BLOCKING);
8951 	mlxsw_sp_port_vid_learning_set(mlxsw_sp_port, vid, true);
8952 	mlxsw_sp_fid_port_vid_unmap(fid, mlxsw_sp_port, vid);
8953 	mlxsw_sp_fid_put(fid);
8954 	mlxsw_sp_rif_subport_put(rif);
8955 }
8956 
8957 static int
8958 mlxsw_sp_port_vlan_router_join_existing(struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan,
8959 					struct net_device *l3_dev,
8960 					struct netlink_ext_ack *extack)
8961 {
8962 	struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port_vlan->mlxsw_sp_port->mlxsw_sp;
8963 
8964 	lockdep_assert_held(&mlxsw_sp->router->lock);
8965 
8966 	if (!mlxsw_sp_rif_find_by_dev(mlxsw_sp, l3_dev))
8967 		return 0;
8968 
8969 	return __mlxsw_sp_port_vlan_router_join(mlxsw_sp_port_vlan, l3_dev,
8970 						extack);
8971 }
8972 
8973 void
8974 mlxsw_sp_port_vlan_router_leave(struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan)
8975 {
8976 	struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port_vlan->mlxsw_sp_port->mlxsw_sp;
8977 
8978 	mutex_lock(&mlxsw_sp->router->lock);
8979 	__mlxsw_sp_port_vlan_router_leave(mlxsw_sp_port_vlan);
8980 	mutex_unlock(&mlxsw_sp->router->lock);
8981 }
8982 
8983 static int mlxsw_sp_inetaddr_port_vlan_event(struct net_device *l3_dev,
8984 					     struct net_device *port_dev,
8985 					     unsigned long event, u16 vid,
8986 					     struct netlink_ext_ack *extack)
8987 {
8988 	struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(port_dev);
8989 	struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan;
8990 
8991 	mlxsw_sp_port_vlan = mlxsw_sp_port_vlan_find_by_vid(mlxsw_sp_port, vid);
8992 	if (WARN_ON(!mlxsw_sp_port_vlan))
8993 		return -EINVAL;
8994 
8995 	switch (event) {
8996 	case NETDEV_UP:
8997 		return __mlxsw_sp_port_vlan_router_join(mlxsw_sp_port_vlan,
8998 							l3_dev, extack);
8999 	case NETDEV_DOWN:
9000 		__mlxsw_sp_port_vlan_router_leave(mlxsw_sp_port_vlan);
9001 		break;
9002 	}
9003 
9004 	return 0;
9005 }
9006 
9007 static int mlxsw_sp_inetaddr_port_event(struct net_device *port_dev,
9008 					unsigned long event, bool nomaster,
9009 					struct netlink_ext_ack *extack)
9010 {
9011 	if (!nomaster && (netif_is_any_bridge_port(port_dev) ||
9012 			  netif_is_lag_port(port_dev)))
9013 		return 0;
9014 
9015 	return mlxsw_sp_inetaddr_port_vlan_event(port_dev, port_dev, event,
9016 						 MLXSW_SP_DEFAULT_VID, extack);
9017 }
9018 
9019 static int __mlxsw_sp_inetaddr_lag_event(struct net_device *l3_dev,
9020 					 struct net_device *lag_dev,
9021 					 unsigned long event, u16 vid,
9022 					 struct netlink_ext_ack *extack)
9023 {
9024 	struct net_device *port_dev;
9025 	struct list_head *iter;
9026 	int err;
9027 
9028 	netdev_for_each_lower_dev(lag_dev, port_dev, iter) {
9029 		if (mlxsw_sp_port_dev_check(port_dev)) {
9030 			err = mlxsw_sp_inetaddr_port_vlan_event(l3_dev,
9031 								port_dev,
9032 								event, vid,
9033 								extack);
9034 			if (err)
9035 				return err;
9036 		}
9037 	}
9038 
9039 	return 0;
9040 }
9041 
9042 static int mlxsw_sp_inetaddr_lag_event(struct net_device *lag_dev,
9043 				       unsigned long event, bool nomaster,
9044 				       struct netlink_ext_ack *extack)
9045 {
9046 	if (!nomaster && netif_is_bridge_port(lag_dev))
9047 		return 0;
9048 
9049 	return __mlxsw_sp_inetaddr_lag_event(lag_dev, lag_dev, event,
9050 					     MLXSW_SP_DEFAULT_VID, extack);
9051 }
9052 
9053 static int mlxsw_sp_inetaddr_bridge_event(struct mlxsw_sp *mlxsw_sp,
9054 					  struct net_device *l3_dev,
9055 					  int lower_pvid,
9056 					  unsigned long event,
9057 					  struct netlink_ext_ack *extack)
9058 {
9059 	struct mlxsw_sp_rif_params params = {
9060 		.dev = l3_dev,
9061 	};
9062 	struct mlxsw_sp_rif *rif;
9063 	int err;
9064 
9065 	switch (event) {
9066 	case NETDEV_UP:
9067 		if (netif_is_bridge_master(l3_dev) && br_vlan_enabled(l3_dev)) {
9068 			u16 proto;
9069 
9070 			br_vlan_get_proto(l3_dev, &proto);
9071 			if (proto == ETH_P_8021AD) {
9072 				NL_SET_ERR_MSG_MOD(extack, "Adding an IP address to 802.1ad bridge is not supported");
9073 				return -EOPNOTSUPP;
9074 			}
9075 			err = br_vlan_get_pvid(l3_dev, &params.vid);
9076 			if (err)
9077 				return err;
9078 			if (!params.vid)
9079 				return 0;
9080 		} else if (is_vlan_dev(l3_dev)) {
9081 			params.vid = vlan_dev_vlan_id(l3_dev);
9082 
9083 			/* If the VID matches PVID of the bridge below, the
9084 			 * bridge owns the RIF for this VLAN. Don't do anything.
9085 			 */
9086 			if ((int)params.vid == lower_pvid)
9087 				return 0;
9088 		}
9089 
9090 		rif = mlxsw_sp_rif_create(mlxsw_sp, &params, extack);
9091 		if (IS_ERR(rif))
9092 			return PTR_ERR(rif);
9093 		break;
9094 	case NETDEV_DOWN:
9095 		rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, l3_dev);
9096 		mlxsw_sp_rif_destroy(rif);
9097 		break;
9098 	}
9099 
9100 	return 0;
9101 }
9102 
9103 static int mlxsw_sp_inetaddr_vlan_event(struct mlxsw_sp *mlxsw_sp,
9104 					struct net_device *vlan_dev,
9105 					unsigned long event, bool nomaster,
9106 					struct netlink_ext_ack *extack)
9107 {
9108 	struct net_device *real_dev = vlan_dev_real_dev(vlan_dev);
9109 	u16 vid = vlan_dev_vlan_id(vlan_dev);
9110 	u16 lower_pvid;
9111 	int err;
9112 
9113 	if (!nomaster && netif_is_bridge_port(vlan_dev))
9114 		return 0;
9115 
9116 	if (mlxsw_sp_port_dev_check(real_dev)) {
9117 		return mlxsw_sp_inetaddr_port_vlan_event(vlan_dev, real_dev,
9118 							 event, vid, extack);
9119 	} else if (netif_is_lag_master(real_dev)) {
9120 		return __mlxsw_sp_inetaddr_lag_event(vlan_dev, real_dev, event,
9121 						     vid, extack);
9122 	} else if (netif_is_bridge_master(real_dev) &&
9123 		   br_vlan_enabled(real_dev)) {
9124 		err = br_vlan_get_pvid(real_dev, &lower_pvid);
9125 		if (err)
9126 			return err;
9127 		return mlxsw_sp_inetaddr_bridge_event(mlxsw_sp, vlan_dev,
9128 						      lower_pvid, event,
9129 						      extack);
9130 	}
9131 
9132 	return 0;
9133 }
9134 
9135 static bool mlxsw_sp_rif_macvlan_is_vrrp4(const u8 *mac)
9136 {
9137 	u8 vrrp4[ETH_ALEN] = { 0x00, 0x00, 0x5e, 0x00, 0x01, 0x00 };
9138 	u8 mask[ETH_ALEN] = { 0xff, 0xff, 0xff, 0xff, 0xff, 0x00 };
9139 
9140 	return ether_addr_equal_masked(mac, vrrp4, mask);
9141 }
9142 
9143 static bool mlxsw_sp_rif_macvlan_is_vrrp6(const u8 *mac)
9144 {
9145 	u8 vrrp6[ETH_ALEN] = { 0x00, 0x00, 0x5e, 0x00, 0x02, 0x00 };
9146 	u8 mask[ETH_ALEN] = { 0xff, 0xff, 0xff, 0xff, 0xff, 0x00 };
9147 
9148 	return ether_addr_equal_masked(mac, vrrp6, mask);
9149 }
9150 
9151 static int mlxsw_sp_rif_vrrp_op(struct mlxsw_sp *mlxsw_sp, u16 rif_index,
9152 				const u8 *mac, bool adding)
9153 {
9154 	char ritr_pl[MLXSW_REG_RITR_LEN];
9155 	u8 vrrp_id = adding ? mac[5] : 0;
9156 	int err;
9157 
9158 	if (!mlxsw_sp_rif_macvlan_is_vrrp4(mac) &&
9159 	    !mlxsw_sp_rif_macvlan_is_vrrp6(mac))
9160 		return 0;
9161 
9162 	mlxsw_reg_ritr_rif_pack(ritr_pl, rif_index);
9163 	err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
9164 	if (err)
9165 		return err;
9166 
9167 	if (mlxsw_sp_rif_macvlan_is_vrrp4(mac))
9168 		mlxsw_reg_ritr_if_vrrp_id_ipv4_set(ritr_pl, vrrp_id);
9169 	else
9170 		mlxsw_reg_ritr_if_vrrp_id_ipv6_set(ritr_pl, vrrp_id);
9171 
9172 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
9173 }
9174 
9175 static int mlxsw_sp_rif_macvlan_add(struct mlxsw_sp *mlxsw_sp,
9176 				    const struct net_device *macvlan_dev,
9177 				    struct netlink_ext_ack *extack)
9178 {
9179 	struct macvlan_dev *vlan = netdev_priv(macvlan_dev);
9180 	struct mlxsw_sp_rif *rif;
9181 	int err;
9182 
9183 	rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, vlan->lowerdev);
9184 	if (!rif)
9185 		return 0;
9186 
9187 	err = mlxsw_sp_rif_fdb_op(mlxsw_sp, macvlan_dev->dev_addr,
9188 				  mlxsw_sp_fid_index(rif->fid), true);
9189 	if (err)
9190 		return err;
9191 
9192 	err = mlxsw_sp_rif_vrrp_op(mlxsw_sp, rif->rif_index,
9193 				   macvlan_dev->dev_addr, true);
9194 	if (err)
9195 		goto err_rif_vrrp_add;
9196 
9197 	/* Make sure the bridge driver does not have this MAC pointing at
9198 	 * some other port.
9199 	 */
9200 	if (rif->ops->fdb_del)
9201 		rif->ops->fdb_del(rif, macvlan_dev->dev_addr);
9202 
9203 	return 0;
9204 
9205 err_rif_vrrp_add:
9206 	mlxsw_sp_rif_fdb_op(mlxsw_sp, macvlan_dev->dev_addr,
9207 			    mlxsw_sp_fid_index(rif->fid), false);
9208 	return err;
9209 }
9210 
9211 static void __mlxsw_sp_rif_macvlan_del(struct mlxsw_sp *mlxsw_sp,
9212 				       const struct net_device *macvlan_dev)
9213 {
9214 	struct macvlan_dev *vlan = netdev_priv(macvlan_dev);
9215 	struct mlxsw_sp_rif *rif;
9216 
9217 	rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, vlan->lowerdev);
9218 	/* If we do not have a RIF, then we already took care of
9219 	 * removing the macvlan's MAC during RIF deletion.
9220 	 */
9221 	if (!rif)
9222 		return;
9223 	mlxsw_sp_rif_vrrp_op(mlxsw_sp, rif->rif_index, macvlan_dev->dev_addr,
9224 			     false);
9225 	mlxsw_sp_rif_fdb_op(mlxsw_sp, macvlan_dev->dev_addr,
9226 			    mlxsw_sp_fid_index(rif->fid), false);
9227 }
9228 
9229 void mlxsw_sp_rif_macvlan_del(struct mlxsw_sp *mlxsw_sp,
9230 			      const struct net_device *macvlan_dev)
9231 {
9232 	mutex_lock(&mlxsw_sp->router->lock);
9233 	__mlxsw_sp_rif_macvlan_del(mlxsw_sp, macvlan_dev);
9234 	mutex_unlock(&mlxsw_sp->router->lock);
9235 }
9236 
9237 static int mlxsw_sp_inetaddr_macvlan_event(struct mlxsw_sp *mlxsw_sp,
9238 					   struct net_device *macvlan_dev,
9239 					   unsigned long event,
9240 					   struct netlink_ext_ack *extack)
9241 {
9242 	switch (event) {
9243 	case NETDEV_UP:
9244 		return mlxsw_sp_rif_macvlan_add(mlxsw_sp, macvlan_dev, extack);
9245 	case NETDEV_DOWN:
9246 		__mlxsw_sp_rif_macvlan_del(mlxsw_sp, macvlan_dev);
9247 		break;
9248 	}
9249 
9250 	return 0;
9251 }
9252 
9253 static int __mlxsw_sp_inetaddr_event(struct mlxsw_sp *mlxsw_sp,
9254 				     struct net_device *dev,
9255 				     unsigned long event, bool nomaster,
9256 				     struct netlink_ext_ack *extack)
9257 {
9258 	if (mlxsw_sp_port_dev_check(dev))
9259 		return mlxsw_sp_inetaddr_port_event(dev, event, nomaster,
9260 						    extack);
9261 	else if (netif_is_lag_master(dev))
9262 		return mlxsw_sp_inetaddr_lag_event(dev, event, nomaster,
9263 						   extack);
9264 	else if (netif_is_bridge_master(dev))
9265 		return mlxsw_sp_inetaddr_bridge_event(mlxsw_sp, dev, -1, event,
9266 						      extack);
9267 	else if (is_vlan_dev(dev))
9268 		return mlxsw_sp_inetaddr_vlan_event(mlxsw_sp, dev, event,
9269 						    nomaster, extack);
9270 	else if (netif_is_macvlan(dev))
9271 		return mlxsw_sp_inetaddr_macvlan_event(mlxsw_sp, dev, event,
9272 						       extack);
9273 	else
9274 		return 0;
9275 }
9276 
9277 static int mlxsw_sp_inetaddr_event(struct notifier_block *nb,
9278 				   unsigned long event, void *ptr)
9279 {
9280 	struct in_ifaddr *ifa = (struct in_ifaddr *) ptr;
9281 	struct net_device *dev = ifa->ifa_dev->dev;
9282 	struct mlxsw_sp_router *router;
9283 	struct mlxsw_sp_rif *rif;
9284 	int err = 0;
9285 
9286 	/* NETDEV_UP event is handled by mlxsw_sp_inetaddr_valid_event */
9287 	if (event == NETDEV_UP)
9288 		return NOTIFY_DONE;
9289 
9290 	router = container_of(nb, struct mlxsw_sp_router, inetaddr_nb);
9291 	mutex_lock(&router->lock);
9292 	rif = mlxsw_sp_rif_find_by_dev(router->mlxsw_sp, dev);
9293 	if (!mlxsw_sp_rif_should_config(rif, dev, event))
9294 		goto out;
9295 
9296 	err = __mlxsw_sp_inetaddr_event(router->mlxsw_sp, dev, event, false,
9297 					NULL);
9298 out:
9299 	mutex_unlock(&router->lock);
9300 	return notifier_from_errno(err);
9301 }
9302 
9303 static int mlxsw_sp_inetaddr_valid_event(struct notifier_block *unused,
9304 					 unsigned long event, void *ptr)
9305 {
9306 	struct in_validator_info *ivi = (struct in_validator_info *) ptr;
9307 	struct net_device *dev = ivi->ivi_dev->dev;
9308 	struct mlxsw_sp *mlxsw_sp;
9309 	struct mlxsw_sp_rif *rif;
9310 	int err = 0;
9311 
9312 	mlxsw_sp = mlxsw_sp_lower_get(dev);
9313 	if (!mlxsw_sp)
9314 		return NOTIFY_DONE;
9315 
9316 	mutex_lock(&mlxsw_sp->router->lock);
9317 	rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev);
9318 	if (!mlxsw_sp_rif_should_config(rif, dev, event))
9319 		goto out;
9320 
9321 	err = __mlxsw_sp_inetaddr_event(mlxsw_sp, dev, event, false,
9322 					ivi->extack);
9323 out:
9324 	mutex_unlock(&mlxsw_sp->router->lock);
9325 	return notifier_from_errno(err);
9326 }
9327 
9328 struct mlxsw_sp_inet6addr_event_work {
9329 	struct work_struct work;
9330 	struct mlxsw_sp *mlxsw_sp;
9331 	struct net_device *dev;
9332 	unsigned long event;
9333 };
9334 
9335 static void mlxsw_sp_inet6addr_event_work(struct work_struct *work)
9336 {
9337 	struct mlxsw_sp_inet6addr_event_work *inet6addr_work =
9338 		container_of(work, struct mlxsw_sp_inet6addr_event_work, work);
9339 	struct mlxsw_sp *mlxsw_sp = inet6addr_work->mlxsw_sp;
9340 	struct net_device *dev = inet6addr_work->dev;
9341 	unsigned long event = inet6addr_work->event;
9342 	struct mlxsw_sp_rif *rif;
9343 
9344 	rtnl_lock();
9345 	mutex_lock(&mlxsw_sp->router->lock);
9346 
9347 	rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev);
9348 	if (!mlxsw_sp_rif_should_config(rif, dev, event))
9349 		goto out;
9350 
9351 	__mlxsw_sp_inetaddr_event(mlxsw_sp, dev, event, false, NULL);
9352 out:
9353 	mutex_unlock(&mlxsw_sp->router->lock);
9354 	rtnl_unlock();
9355 	dev_put(dev);
9356 	kfree(inet6addr_work);
9357 }
9358 
9359 /* Called with rcu_read_lock() */
9360 static int mlxsw_sp_inet6addr_event(struct notifier_block *nb,
9361 				    unsigned long event, void *ptr)
9362 {
9363 	struct inet6_ifaddr *if6 = (struct inet6_ifaddr *) ptr;
9364 	struct mlxsw_sp_inet6addr_event_work *inet6addr_work;
9365 	struct net_device *dev = if6->idev->dev;
9366 	struct mlxsw_sp_router *router;
9367 
9368 	/* NETDEV_UP event is handled by mlxsw_sp_inet6addr_valid_event */
9369 	if (event == NETDEV_UP)
9370 		return NOTIFY_DONE;
9371 
9372 	inet6addr_work = kzalloc(sizeof(*inet6addr_work), GFP_ATOMIC);
9373 	if (!inet6addr_work)
9374 		return NOTIFY_BAD;
9375 
9376 	router = container_of(nb, struct mlxsw_sp_router, inet6addr_nb);
9377 	INIT_WORK(&inet6addr_work->work, mlxsw_sp_inet6addr_event_work);
9378 	inet6addr_work->mlxsw_sp = router->mlxsw_sp;
9379 	inet6addr_work->dev = dev;
9380 	inet6addr_work->event = event;
9381 	dev_hold(dev);
9382 	mlxsw_core_schedule_work(&inet6addr_work->work);
9383 
9384 	return NOTIFY_DONE;
9385 }
9386 
9387 static int mlxsw_sp_inet6addr_valid_event(struct notifier_block *unused,
9388 					  unsigned long event, void *ptr)
9389 {
9390 	struct in6_validator_info *i6vi = (struct in6_validator_info *) ptr;
9391 	struct net_device *dev = i6vi->i6vi_dev->dev;
9392 	struct mlxsw_sp *mlxsw_sp;
9393 	struct mlxsw_sp_rif *rif;
9394 	int err = 0;
9395 
9396 	mlxsw_sp = mlxsw_sp_lower_get(dev);
9397 	if (!mlxsw_sp)
9398 		return NOTIFY_DONE;
9399 
9400 	mutex_lock(&mlxsw_sp->router->lock);
9401 	rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev);
9402 	if (!mlxsw_sp_rif_should_config(rif, dev, event))
9403 		goto out;
9404 
9405 	err = __mlxsw_sp_inetaddr_event(mlxsw_sp, dev, event, false,
9406 					i6vi->extack);
9407 out:
9408 	mutex_unlock(&mlxsw_sp->router->lock);
9409 	return notifier_from_errno(err);
9410 }
9411 
9412 static int mlxsw_sp_rif_edit(struct mlxsw_sp *mlxsw_sp, u16 rif_index,
9413 			     const char *mac, int mtu, u8 mac_profile)
9414 {
9415 	char ritr_pl[MLXSW_REG_RITR_LEN];
9416 	int err;
9417 
9418 	mlxsw_reg_ritr_rif_pack(ritr_pl, rif_index);
9419 	err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
9420 	if (err)
9421 		return err;
9422 
9423 	mlxsw_reg_ritr_mtu_set(ritr_pl, mtu);
9424 	mlxsw_reg_ritr_if_mac_memcpy_to(ritr_pl, mac);
9425 	mlxsw_reg_ritr_if_mac_profile_id_set(ritr_pl, mac_profile);
9426 	mlxsw_reg_ritr_op_set(ritr_pl, MLXSW_REG_RITR_RIF_CREATE);
9427 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
9428 }
9429 
9430 static int
9431 mlxsw_sp_router_port_change_event(struct mlxsw_sp *mlxsw_sp,
9432 				  struct mlxsw_sp_rif *rif,
9433 				  struct netlink_ext_ack *extack)
9434 {
9435 	struct net_device *dev = mlxsw_sp_rif_dev(rif);
9436 	u8 old_mac_profile;
9437 	u16 fid_index;
9438 	int err;
9439 
9440 	fid_index = mlxsw_sp_fid_index(rif->fid);
9441 
9442 	err = mlxsw_sp_rif_fdb_op(mlxsw_sp, rif->addr, fid_index, false);
9443 	if (err)
9444 		return err;
9445 
9446 	old_mac_profile = rif->mac_profile_id;
9447 	err = mlxsw_sp_rif_mac_profile_replace(mlxsw_sp, rif, dev->dev_addr,
9448 					       extack);
9449 	if (err)
9450 		goto err_rif_mac_profile_replace;
9451 
9452 	err = mlxsw_sp_rif_edit(mlxsw_sp, rif->rif_index, dev->dev_addr,
9453 				dev->mtu, rif->mac_profile_id);
9454 	if (err)
9455 		goto err_rif_edit;
9456 
9457 	err = mlxsw_sp_rif_fdb_op(mlxsw_sp, dev->dev_addr, fid_index, true);
9458 	if (err)
9459 		goto err_rif_fdb_op;
9460 
9461 	if (rif->mtu != dev->mtu) {
9462 		struct mlxsw_sp_vr *vr;
9463 		int i;
9464 
9465 		/* The RIF is relevant only to its mr_table instance, as unlike
9466 		 * unicast routing, in multicast routing a RIF cannot be shared
9467 		 * between several multicast routing tables.
9468 		 */
9469 		vr = &mlxsw_sp->router->vrs[rif->vr_id];
9470 		for (i = 0; i < MLXSW_SP_L3_PROTO_MAX; i++)
9471 			mlxsw_sp_mr_rif_mtu_update(vr->mr_table[i],
9472 						   rif, dev->mtu);
9473 	}
9474 
9475 	ether_addr_copy(rif->addr, dev->dev_addr);
9476 	rif->mtu = dev->mtu;
9477 
9478 	netdev_dbg(dev, "Updated RIF=%d\n", rif->rif_index);
9479 
9480 	return 0;
9481 
9482 err_rif_fdb_op:
9483 	mlxsw_sp_rif_edit(mlxsw_sp, rif->rif_index, rif->addr, rif->mtu,
9484 			  old_mac_profile);
9485 err_rif_edit:
9486 	mlxsw_sp_rif_mac_profile_replace(mlxsw_sp, rif, rif->addr, extack);
9487 err_rif_mac_profile_replace:
9488 	mlxsw_sp_rif_fdb_op(mlxsw_sp, rif->addr, fid_index, true);
9489 	return err;
9490 }
9491 
9492 static int mlxsw_sp_router_port_pre_changeaddr_event(struct mlxsw_sp_rif *rif,
9493 			    struct netdev_notifier_pre_changeaddr_info *info)
9494 {
9495 	struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
9496 	struct mlxsw_sp_rif_mac_profile *profile;
9497 	struct netlink_ext_ack *extack;
9498 	u8 max_rif_mac_profiles;
9499 	u64 occ;
9500 
9501 	extack = netdev_notifier_info_to_extack(&info->info);
9502 
9503 	profile = mlxsw_sp_rif_mac_profile_find(mlxsw_sp, info->dev_addr);
9504 	if (profile)
9505 		return 0;
9506 
9507 	max_rif_mac_profiles = mlxsw_sp->router->max_rif_mac_profile;
9508 	occ = mlxsw_sp_rif_mac_profiles_occ_get(mlxsw_sp);
9509 	if (occ < max_rif_mac_profiles)
9510 		return 0;
9511 
9512 	if (!mlxsw_sp_rif_mac_profile_is_shared(rif))
9513 		return 0;
9514 
9515 	NL_SET_ERR_MSG_MOD(extack, "Exceeded number of supported router interface MAC profiles");
9516 	return -ENOBUFS;
9517 }
9518 
9519 static bool mlxsw_sp_router_netdevice_interesting(struct mlxsw_sp *mlxsw_sp,
9520 						  struct net_device *dev)
9521 {
9522 	struct vlan_dev_priv *vlan;
9523 
9524 	if (netif_is_lag_master(dev) ||
9525 	    netif_is_bridge_master(dev) ||
9526 	    mlxsw_sp_port_dev_check(dev) ||
9527 	    mlxsw_sp_netdev_is_ipip_ol(mlxsw_sp, dev) ||
9528 	    netif_is_l3_master(dev))
9529 		return true;
9530 
9531 	if (!is_vlan_dev(dev))
9532 		return false;
9533 
9534 	vlan = vlan_dev_priv(dev);
9535 	return netif_is_lag_master(vlan->real_dev) ||
9536 	       netif_is_bridge_master(vlan->real_dev) ||
9537 	       mlxsw_sp_port_dev_check(vlan->real_dev);
9538 }
9539 
9540 static struct mlxsw_sp_crif *
9541 mlxsw_sp_crif_register(struct mlxsw_sp_router *router, struct net_device *dev)
9542 {
9543 	struct mlxsw_sp_crif *crif;
9544 	int err;
9545 
9546 	if (WARN_ON(mlxsw_sp_crif_lookup(router, dev)))
9547 		return NULL;
9548 
9549 	crif = mlxsw_sp_crif_alloc(dev);
9550 	if (!crif)
9551 		return ERR_PTR(-ENOMEM);
9552 
9553 	err = mlxsw_sp_crif_insert(router, crif);
9554 	if (err)
9555 		goto err_netdev_insert;
9556 
9557 	return crif;
9558 
9559 err_netdev_insert:
9560 	mlxsw_sp_crif_free(crif);
9561 	return ERR_PTR(err);
9562 }
9563 
9564 static void mlxsw_sp_crif_unregister(struct mlxsw_sp_router *router,
9565 				     struct mlxsw_sp_crif *crif)
9566 {
9567 	struct mlxsw_sp_nexthop *nh, *tmp;
9568 
9569 	mlxsw_sp_crif_remove(router, crif);
9570 
9571 	list_for_each_entry_safe(nh, tmp, &crif->nexthop_list, crif_list_node)
9572 		mlxsw_sp_nexthop_type_fini(router->mlxsw_sp, nh);
9573 
9574 	if (crif->rif)
9575 		crif->can_destroy = true;
9576 	else
9577 		mlxsw_sp_crif_free(crif);
9578 }
9579 
9580 static int mlxsw_sp_netdevice_register(struct mlxsw_sp_router *router,
9581 				       struct net_device *dev)
9582 {
9583 	struct mlxsw_sp_crif *crif;
9584 
9585 	if (!mlxsw_sp_router_netdevice_interesting(router->mlxsw_sp, dev))
9586 		return 0;
9587 
9588 	crif = mlxsw_sp_crif_register(router, dev);
9589 	return PTR_ERR_OR_ZERO(crif);
9590 }
9591 
9592 static void mlxsw_sp_netdevice_unregister(struct mlxsw_sp_router *router,
9593 					  struct net_device *dev)
9594 {
9595 	struct mlxsw_sp_crif *crif;
9596 
9597 	if (!mlxsw_sp_router_netdevice_interesting(router->mlxsw_sp, dev))
9598 		return;
9599 
9600 	/* netdev_run_todo(), by way of netdev_wait_allrefs_any(), rebroadcasts
9601 	 * the NETDEV_UNREGISTER message, so we can get here twice. If that's
9602 	 * what happened, the netdevice state is NETREG_UNREGISTERED. In that
9603 	 * case, we expect to have collected the CRIF already, and warn if it
9604 	 * still exists. Otherwise we expect the CRIF to exist.
9605 	 */
9606 	crif = mlxsw_sp_crif_lookup(router, dev);
9607 	if (dev->reg_state == NETREG_UNREGISTERED) {
9608 		if (!WARN_ON(crif))
9609 			return;
9610 	}
9611 	if (WARN_ON(!crif))
9612 		return;
9613 
9614 	mlxsw_sp_crif_unregister(router, crif);
9615 }
9616 
9617 static bool mlxsw_sp_is_offload_xstats_event(unsigned long event)
9618 {
9619 	switch (event) {
9620 	case NETDEV_OFFLOAD_XSTATS_ENABLE:
9621 	case NETDEV_OFFLOAD_XSTATS_DISABLE:
9622 	case NETDEV_OFFLOAD_XSTATS_REPORT_USED:
9623 	case NETDEV_OFFLOAD_XSTATS_REPORT_DELTA:
9624 		return true;
9625 	}
9626 
9627 	return false;
9628 }
9629 
9630 static int
9631 mlxsw_sp_router_port_offload_xstats_cmd(struct mlxsw_sp_rif *rif,
9632 					unsigned long event,
9633 					struct netdev_notifier_offload_xstats_info *info)
9634 {
9635 	switch (info->type) {
9636 	case NETDEV_OFFLOAD_XSTATS_TYPE_L3:
9637 		break;
9638 	default:
9639 		return 0;
9640 	}
9641 
9642 	switch (event) {
9643 	case NETDEV_OFFLOAD_XSTATS_ENABLE:
9644 		return mlxsw_sp_router_port_l3_stats_enable(rif);
9645 	case NETDEV_OFFLOAD_XSTATS_DISABLE:
9646 		mlxsw_sp_router_port_l3_stats_disable(rif);
9647 		return 0;
9648 	case NETDEV_OFFLOAD_XSTATS_REPORT_USED:
9649 		mlxsw_sp_router_port_l3_stats_report_used(rif, info);
9650 		return 0;
9651 	case NETDEV_OFFLOAD_XSTATS_REPORT_DELTA:
9652 		return mlxsw_sp_router_port_l3_stats_report_delta(rif, info);
9653 	}
9654 
9655 	WARN_ON_ONCE(1);
9656 	return 0;
9657 }
9658 
9659 static int
9660 mlxsw_sp_netdevice_offload_xstats_cmd(struct mlxsw_sp *mlxsw_sp,
9661 				      struct net_device *dev,
9662 				      unsigned long event,
9663 				      struct netdev_notifier_offload_xstats_info *info)
9664 {
9665 	struct mlxsw_sp_rif *rif;
9666 
9667 	rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev);
9668 	if (!rif)
9669 		return 0;
9670 
9671 	return mlxsw_sp_router_port_offload_xstats_cmd(rif, event, info);
9672 }
9673 
9674 static bool mlxsw_sp_is_router_event(unsigned long event)
9675 {
9676 	switch (event) {
9677 	case NETDEV_PRE_CHANGEADDR:
9678 	case NETDEV_CHANGEADDR:
9679 	case NETDEV_CHANGEMTU:
9680 		return true;
9681 	default:
9682 		return false;
9683 	}
9684 }
9685 
9686 static int mlxsw_sp_netdevice_router_port_event(struct net_device *dev,
9687 						unsigned long event, void *ptr)
9688 {
9689 	struct netlink_ext_ack *extack = netdev_notifier_info_to_extack(ptr);
9690 	struct mlxsw_sp *mlxsw_sp;
9691 	struct mlxsw_sp_rif *rif;
9692 
9693 	mlxsw_sp = mlxsw_sp_lower_get(dev);
9694 	if (!mlxsw_sp)
9695 		return 0;
9696 
9697 	rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev);
9698 	if (!rif)
9699 		return 0;
9700 
9701 	switch (event) {
9702 	case NETDEV_CHANGEMTU:
9703 	case NETDEV_CHANGEADDR:
9704 		return mlxsw_sp_router_port_change_event(mlxsw_sp, rif, extack);
9705 	case NETDEV_PRE_CHANGEADDR:
9706 		return mlxsw_sp_router_port_pre_changeaddr_event(rif, ptr);
9707 	default:
9708 		WARN_ON_ONCE(1);
9709 		break;
9710 	}
9711 
9712 	return 0;
9713 }
9714 
9715 static int mlxsw_sp_port_vrf_join(struct mlxsw_sp *mlxsw_sp,
9716 				  struct net_device *l3_dev,
9717 				  struct netlink_ext_ack *extack)
9718 {
9719 	struct mlxsw_sp_rif *rif;
9720 
9721 	/* If netdev is already associated with a RIF, then we need to
9722 	 * destroy it and create a new one with the new virtual router ID.
9723 	 */
9724 	rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, l3_dev);
9725 	if (rif)
9726 		__mlxsw_sp_inetaddr_event(mlxsw_sp, l3_dev, NETDEV_DOWN, false,
9727 					  extack);
9728 
9729 	return __mlxsw_sp_inetaddr_event(mlxsw_sp, l3_dev, NETDEV_UP, false,
9730 					 extack);
9731 }
9732 
9733 static void mlxsw_sp_port_vrf_leave(struct mlxsw_sp *mlxsw_sp,
9734 				    struct net_device *l3_dev)
9735 {
9736 	struct mlxsw_sp_rif *rif;
9737 
9738 	rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, l3_dev);
9739 	if (!rif)
9740 		return;
9741 	__mlxsw_sp_inetaddr_event(mlxsw_sp, l3_dev, NETDEV_DOWN, false, NULL);
9742 }
9743 
9744 static bool mlxsw_sp_is_vrf_event(unsigned long event, void *ptr)
9745 {
9746 	struct netdev_notifier_changeupper_info *info = ptr;
9747 
9748 	if (event != NETDEV_PRECHANGEUPPER && event != NETDEV_CHANGEUPPER)
9749 		return false;
9750 	return netif_is_l3_master(info->upper_dev);
9751 }
9752 
9753 static int
9754 mlxsw_sp_netdevice_vrf_event(struct net_device *l3_dev, unsigned long event,
9755 			     struct netdev_notifier_changeupper_info *info)
9756 {
9757 	struct mlxsw_sp *mlxsw_sp = mlxsw_sp_lower_get(l3_dev);
9758 	int err = 0;
9759 
9760 	/* We do not create a RIF for a macvlan, but only use it to
9761 	 * direct more MAC addresses to the router.
9762 	 */
9763 	if (!mlxsw_sp || netif_is_macvlan(l3_dev))
9764 		return 0;
9765 
9766 	switch (event) {
9767 	case NETDEV_PRECHANGEUPPER:
9768 		break;
9769 	case NETDEV_CHANGEUPPER:
9770 		if (info->linking) {
9771 			struct netlink_ext_ack *extack;
9772 
9773 			extack = netdev_notifier_info_to_extack(&info->info);
9774 			err = mlxsw_sp_port_vrf_join(mlxsw_sp, l3_dev, extack);
9775 		} else {
9776 			mlxsw_sp_port_vrf_leave(mlxsw_sp, l3_dev);
9777 		}
9778 		break;
9779 	}
9780 
9781 	return err;
9782 }
9783 
9784 struct mlxsw_sp_router_replay_inetaddr_up {
9785 	struct mlxsw_sp *mlxsw_sp;
9786 	struct netlink_ext_ack *extack;
9787 	unsigned int done;
9788 	bool deslavement;
9789 };
9790 
9791 static int mlxsw_sp_router_replay_inetaddr_up(struct net_device *dev,
9792 					      struct netdev_nested_priv *priv)
9793 {
9794 	struct mlxsw_sp_router_replay_inetaddr_up *ctx = priv->data;
9795 	bool nomaster = ctx->deslavement;
9796 	struct mlxsw_sp_crif *crif;
9797 	int err;
9798 
9799 	if (mlxsw_sp_dev_addr_list_empty(dev))
9800 		return 0;
9801 
9802 	crif = mlxsw_sp_crif_lookup(ctx->mlxsw_sp->router, dev);
9803 	if (!crif || crif->rif)
9804 		return 0;
9805 
9806 	if (!mlxsw_sp_rif_should_config(crif->rif, dev, NETDEV_UP))
9807 		return 0;
9808 
9809 	err = __mlxsw_sp_inetaddr_event(ctx->mlxsw_sp, dev, NETDEV_UP,
9810 					nomaster, ctx->extack);
9811 	if (err)
9812 		return err;
9813 
9814 	ctx->done++;
9815 	return 0;
9816 }
9817 
9818 static int mlxsw_sp_router_unreplay_inetaddr_up(struct net_device *dev,
9819 						struct netdev_nested_priv *priv)
9820 {
9821 	struct mlxsw_sp_router_replay_inetaddr_up *ctx = priv->data;
9822 	bool nomaster = ctx->deslavement;
9823 	struct mlxsw_sp_crif *crif;
9824 
9825 	if (!ctx->done)
9826 		return 0;
9827 
9828 	if (mlxsw_sp_dev_addr_list_empty(dev))
9829 		return 0;
9830 
9831 	crif = mlxsw_sp_crif_lookup(ctx->mlxsw_sp->router, dev);
9832 	if (!crif || !crif->rif)
9833 		return 0;
9834 
9835 	/* We are rolling back NETDEV_UP, so ask for that. */
9836 	if (!mlxsw_sp_rif_should_config(crif->rif, dev, NETDEV_UP))
9837 		return 0;
9838 
9839 	__mlxsw_sp_inetaddr_event(ctx->mlxsw_sp, dev, NETDEV_DOWN, nomaster,
9840 				  NULL);
9841 
9842 	ctx->done--;
9843 	return 0;
9844 }
9845 
9846 int mlxsw_sp_netdevice_enslavement_replay(struct mlxsw_sp *mlxsw_sp,
9847 					  struct net_device *upper_dev,
9848 					  struct netlink_ext_ack *extack)
9849 {
9850 	struct mlxsw_sp_router_replay_inetaddr_up ctx = {
9851 		.mlxsw_sp = mlxsw_sp,
9852 		.extack = extack,
9853 		.deslavement = false,
9854 	};
9855 	struct netdev_nested_priv priv = {
9856 		.data = &ctx,
9857 	};
9858 	int err;
9859 
9860 	err = mlxsw_sp_router_replay_inetaddr_up(upper_dev, &priv);
9861 	if (err)
9862 		return err;
9863 
9864 	err = netdev_walk_all_upper_dev_rcu(upper_dev,
9865 					    mlxsw_sp_router_replay_inetaddr_up,
9866 					    &priv);
9867 	if (err)
9868 		goto err_replay_up;
9869 
9870 	return 0;
9871 
9872 err_replay_up:
9873 	netdev_walk_all_upper_dev_rcu(upper_dev,
9874 				      mlxsw_sp_router_unreplay_inetaddr_up,
9875 				      &priv);
9876 	mlxsw_sp_router_unreplay_inetaddr_up(upper_dev, &priv);
9877 	return err;
9878 }
9879 
9880 void mlxsw_sp_netdevice_deslavement_replay(struct mlxsw_sp *mlxsw_sp,
9881 					   struct net_device *dev)
9882 {
9883 	struct mlxsw_sp_router_replay_inetaddr_up ctx = {
9884 		.mlxsw_sp = mlxsw_sp,
9885 		.deslavement = true,
9886 	};
9887 	struct netdev_nested_priv priv = {
9888 		.data = &ctx,
9889 	};
9890 
9891 	mlxsw_sp_router_replay_inetaddr_up(dev, &priv);
9892 }
9893 
9894 static int
9895 mlxsw_sp_port_vid_router_join_existing(struct mlxsw_sp_port *mlxsw_sp_port,
9896 				       u16 vid, struct net_device *dev,
9897 				       struct netlink_ext_ack *extack)
9898 {
9899 	struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan;
9900 
9901 	mlxsw_sp_port_vlan = mlxsw_sp_port_vlan_find_by_vid(mlxsw_sp_port,
9902 							    vid);
9903 	if (WARN_ON(!mlxsw_sp_port_vlan))
9904 		return -EINVAL;
9905 
9906 	return mlxsw_sp_port_vlan_router_join_existing(mlxsw_sp_port_vlan,
9907 						       dev, extack);
9908 }
9909 
9910 static void
9911 mlxsw_sp_port_vid_router_leave(struct mlxsw_sp_port *mlxsw_sp_port, u16 vid,
9912 			       struct net_device *dev)
9913 {
9914 	struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan;
9915 
9916 	mlxsw_sp_port_vlan = mlxsw_sp_port_vlan_find_by_vid(mlxsw_sp_port,
9917 							    vid);
9918 	if (WARN_ON(!mlxsw_sp_port_vlan))
9919 		return;
9920 
9921 	__mlxsw_sp_port_vlan_router_leave(mlxsw_sp_port_vlan);
9922 }
9923 
9924 static int __mlxsw_sp_router_port_join_lag(struct mlxsw_sp_port *mlxsw_sp_port,
9925 					   struct net_device *lag_dev,
9926 					   struct netlink_ext_ack *extack)
9927 {
9928 	u16 default_vid = MLXSW_SP_DEFAULT_VID;
9929 	struct net_device *upper_dev;
9930 	struct list_head *iter;
9931 	int done = 0;
9932 	u16 vid;
9933 	int err;
9934 
9935 	err = mlxsw_sp_port_vid_router_join_existing(mlxsw_sp_port, default_vid,
9936 						     lag_dev, extack);
9937 	if (err)
9938 		return err;
9939 
9940 	netdev_for_each_upper_dev_rcu(lag_dev, upper_dev, iter) {
9941 		if (!is_vlan_dev(upper_dev))
9942 			continue;
9943 
9944 		vid = vlan_dev_vlan_id(upper_dev);
9945 		err = mlxsw_sp_port_vid_router_join_existing(mlxsw_sp_port, vid,
9946 							     upper_dev, extack);
9947 		if (err)
9948 			goto err_router_join_dev;
9949 
9950 		++done;
9951 	}
9952 
9953 	return 0;
9954 
9955 err_router_join_dev:
9956 	netdev_for_each_upper_dev_rcu(lag_dev, upper_dev, iter) {
9957 		if (!is_vlan_dev(upper_dev))
9958 			continue;
9959 		if (!done--)
9960 			break;
9961 
9962 		vid = vlan_dev_vlan_id(upper_dev);
9963 		mlxsw_sp_port_vid_router_leave(mlxsw_sp_port, vid, upper_dev);
9964 	}
9965 
9966 	mlxsw_sp_port_vid_router_leave(mlxsw_sp_port, default_vid, lag_dev);
9967 	return err;
9968 }
9969 
9970 static void
9971 __mlxsw_sp_router_port_leave_lag(struct mlxsw_sp_port *mlxsw_sp_port,
9972 				 struct net_device *lag_dev)
9973 {
9974 	u16 default_vid = MLXSW_SP_DEFAULT_VID;
9975 	struct net_device *upper_dev;
9976 	struct list_head *iter;
9977 	u16 vid;
9978 
9979 	netdev_for_each_upper_dev_rcu(lag_dev, upper_dev, iter) {
9980 		if (!is_vlan_dev(upper_dev))
9981 			continue;
9982 
9983 		vid = vlan_dev_vlan_id(upper_dev);
9984 		mlxsw_sp_port_vid_router_leave(mlxsw_sp_port, vid, upper_dev);
9985 	}
9986 
9987 	mlxsw_sp_port_vid_router_leave(mlxsw_sp_port, default_vid, lag_dev);
9988 }
9989 
9990 int mlxsw_sp_router_port_join_lag(struct mlxsw_sp_port *mlxsw_sp_port,
9991 				  struct net_device *lag_dev,
9992 				  struct netlink_ext_ack *extack)
9993 {
9994 	int err;
9995 
9996 	mutex_lock(&mlxsw_sp_port->mlxsw_sp->router->lock);
9997 	err = __mlxsw_sp_router_port_join_lag(mlxsw_sp_port, lag_dev, extack);
9998 	mutex_unlock(&mlxsw_sp_port->mlxsw_sp->router->lock);
9999 
10000 	return err;
10001 }
10002 
10003 void mlxsw_sp_router_port_leave_lag(struct mlxsw_sp_port *mlxsw_sp_port,
10004 				    struct net_device *lag_dev)
10005 {
10006 	mutex_lock(&mlxsw_sp_port->mlxsw_sp->router->lock);
10007 	__mlxsw_sp_router_port_leave_lag(mlxsw_sp_port, lag_dev);
10008 	mutex_unlock(&mlxsw_sp_port->mlxsw_sp->router->lock);
10009 }
10010 
10011 static int mlxsw_sp_router_netdevice_event(struct notifier_block *nb,
10012 					   unsigned long event, void *ptr)
10013 {
10014 	struct net_device *dev = netdev_notifier_info_to_dev(ptr);
10015 	struct mlxsw_sp_router *router;
10016 	struct mlxsw_sp *mlxsw_sp;
10017 	int err = 0;
10018 
10019 	router = container_of(nb, struct mlxsw_sp_router, netdevice_nb);
10020 	mlxsw_sp = router->mlxsw_sp;
10021 
10022 	mutex_lock(&mlxsw_sp->router->lock);
10023 
10024 	if (event == NETDEV_REGISTER) {
10025 		err = mlxsw_sp_netdevice_register(router, dev);
10026 		if (err)
10027 			/* No need to roll this back, UNREGISTER will collect it
10028 			 * anyhow.
10029 			 */
10030 			goto out;
10031 	}
10032 
10033 	if (mlxsw_sp_is_offload_xstats_event(event))
10034 		err = mlxsw_sp_netdevice_offload_xstats_cmd(mlxsw_sp, dev,
10035 							    event, ptr);
10036 	else if (mlxsw_sp_netdev_is_ipip_ol(mlxsw_sp, dev))
10037 		err = mlxsw_sp_netdevice_ipip_ol_event(mlxsw_sp, dev,
10038 						       event, ptr);
10039 	else if (mlxsw_sp_netdev_is_ipip_ul(mlxsw_sp, dev))
10040 		err = mlxsw_sp_netdevice_ipip_ul_event(mlxsw_sp, dev,
10041 						       event, ptr);
10042 	else if (mlxsw_sp_is_router_event(event))
10043 		err = mlxsw_sp_netdevice_router_port_event(dev, event, ptr);
10044 	else if (mlxsw_sp_is_vrf_event(event, ptr))
10045 		err = mlxsw_sp_netdevice_vrf_event(dev, event, ptr);
10046 
10047 	if (event == NETDEV_UNREGISTER)
10048 		mlxsw_sp_netdevice_unregister(router, dev);
10049 
10050 out:
10051 	mutex_unlock(&mlxsw_sp->router->lock);
10052 
10053 	return notifier_from_errno(err);
10054 }
10055 
10056 struct mlxsw_sp_macvlan_replay {
10057 	struct mlxsw_sp *mlxsw_sp;
10058 	struct netlink_ext_ack *extack;
10059 };
10060 
10061 static int mlxsw_sp_macvlan_replay_upper(struct net_device *dev,
10062 					 struct netdev_nested_priv *priv)
10063 {
10064 	const struct mlxsw_sp_macvlan_replay *rms = priv->data;
10065 	struct netlink_ext_ack *extack = rms->extack;
10066 	struct mlxsw_sp *mlxsw_sp = rms->mlxsw_sp;
10067 
10068 	if (!netif_is_macvlan(dev))
10069 		return 0;
10070 
10071 	return mlxsw_sp_rif_macvlan_add(mlxsw_sp, dev, extack);
10072 }
10073 
10074 static int mlxsw_sp_macvlan_replay(struct mlxsw_sp_rif *rif,
10075 				   struct netlink_ext_ack *extack)
10076 {
10077 	struct mlxsw_sp_macvlan_replay rms = {
10078 		.mlxsw_sp = rif->mlxsw_sp,
10079 		.extack = extack,
10080 	};
10081 	struct netdev_nested_priv priv = {
10082 		.data = &rms,
10083 	};
10084 
10085 	return netdev_walk_all_upper_dev_rcu(mlxsw_sp_rif_dev(rif),
10086 					     mlxsw_sp_macvlan_replay_upper,
10087 					     &priv);
10088 }
10089 
10090 static int __mlxsw_sp_rif_macvlan_flush(struct net_device *dev,
10091 					struct netdev_nested_priv *priv)
10092 {
10093 	struct mlxsw_sp_rif *rif = (struct mlxsw_sp_rif *)priv->data;
10094 
10095 	if (!netif_is_macvlan(dev))
10096 		return 0;
10097 
10098 	return mlxsw_sp_rif_fdb_op(rif->mlxsw_sp, dev->dev_addr,
10099 				   mlxsw_sp_fid_index(rif->fid), false);
10100 }
10101 
10102 static int mlxsw_sp_rif_macvlan_flush(struct mlxsw_sp_rif *rif)
10103 {
10104 	struct net_device *dev = mlxsw_sp_rif_dev(rif);
10105 	struct netdev_nested_priv priv = {
10106 		.data = (void *)rif,
10107 	};
10108 
10109 	if (!netif_is_macvlan_port(dev))
10110 		return 0;
10111 
10112 	return netdev_walk_all_upper_dev_rcu(dev,
10113 					     __mlxsw_sp_rif_macvlan_flush, &priv);
10114 }
10115 
10116 static void mlxsw_sp_rif_subport_setup(struct mlxsw_sp_rif *rif,
10117 				       const struct mlxsw_sp_rif_params *params)
10118 {
10119 	struct mlxsw_sp_rif_subport *rif_subport;
10120 
10121 	rif_subport = mlxsw_sp_rif_subport_rif(rif);
10122 	refcount_set(&rif_subport->ref_count, 1);
10123 	rif_subport->vid = params->vid;
10124 	rif_subport->lag = params->lag;
10125 	if (params->lag)
10126 		rif_subport->lag_id = params->lag_id;
10127 	else
10128 		rif_subport->system_port = params->system_port;
10129 }
10130 
10131 static int mlxsw_sp_rif_subport_op(struct mlxsw_sp_rif *rif, bool enable)
10132 {
10133 	struct net_device *dev = mlxsw_sp_rif_dev(rif);
10134 	struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
10135 	struct mlxsw_sp_rif_subport *rif_subport;
10136 	char ritr_pl[MLXSW_REG_RITR_LEN];
10137 	u16 efid;
10138 
10139 	rif_subport = mlxsw_sp_rif_subport_rif(rif);
10140 	mlxsw_reg_ritr_pack(ritr_pl, enable, MLXSW_REG_RITR_SP_IF,
10141 			    rif->rif_index, rif->vr_id, dev->mtu);
10142 	mlxsw_reg_ritr_mac_pack(ritr_pl, dev->dev_addr);
10143 	mlxsw_reg_ritr_if_mac_profile_id_set(ritr_pl, rif->mac_profile_id);
10144 	efid = mlxsw_sp_fid_index(rif->fid);
10145 	mlxsw_reg_ritr_sp_if_pack(ritr_pl, rif_subport->lag,
10146 				  rif_subport->lag ? rif_subport->lag_id :
10147 						     rif_subport->system_port,
10148 				  efid, 0);
10149 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
10150 }
10151 
10152 static int mlxsw_sp_rif_subport_configure(struct mlxsw_sp_rif *rif,
10153 					  struct netlink_ext_ack *extack)
10154 {
10155 	struct net_device *dev = mlxsw_sp_rif_dev(rif);
10156 	u8 mac_profile;
10157 	int err;
10158 
10159 	err = mlxsw_sp_rif_mac_profile_get(rif->mlxsw_sp, rif->addr,
10160 					   &mac_profile, extack);
10161 	if (err)
10162 		return err;
10163 	rif->mac_profile_id = mac_profile;
10164 
10165 	err = mlxsw_sp_rif_subport_op(rif, true);
10166 	if (err)
10167 		goto err_rif_subport_op;
10168 
10169 	err = mlxsw_sp_macvlan_replay(rif, extack);
10170 	if (err)
10171 		goto err_macvlan_replay;
10172 
10173 	err = mlxsw_sp_rif_fdb_op(rif->mlxsw_sp, dev->dev_addr,
10174 				  mlxsw_sp_fid_index(rif->fid), true);
10175 	if (err)
10176 		goto err_rif_fdb_op;
10177 
10178 	err = mlxsw_sp_fid_rif_set(rif->fid, rif);
10179 	if (err)
10180 		goto err_fid_rif_set;
10181 
10182 	return 0;
10183 
10184 err_fid_rif_set:
10185 	mlxsw_sp_rif_fdb_op(rif->mlxsw_sp, dev->dev_addr,
10186 			    mlxsw_sp_fid_index(rif->fid), false);
10187 err_rif_fdb_op:
10188 	mlxsw_sp_rif_macvlan_flush(rif);
10189 err_macvlan_replay:
10190 	mlxsw_sp_rif_subport_op(rif, false);
10191 err_rif_subport_op:
10192 	mlxsw_sp_rif_mac_profile_put(rif->mlxsw_sp, mac_profile);
10193 	return err;
10194 }
10195 
10196 static void mlxsw_sp_rif_subport_deconfigure(struct mlxsw_sp_rif *rif)
10197 {
10198 	struct net_device *dev = mlxsw_sp_rif_dev(rif);
10199 	struct mlxsw_sp_fid *fid = rif->fid;
10200 
10201 	mlxsw_sp_fid_rif_unset(fid);
10202 	mlxsw_sp_rif_fdb_op(rif->mlxsw_sp, dev->dev_addr,
10203 			    mlxsw_sp_fid_index(fid), false);
10204 	mlxsw_sp_rif_macvlan_flush(rif);
10205 	mlxsw_sp_rif_subport_op(rif, false);
10206 	mlxsw_sp_rif_mac_profile_put(rif->mlxsw_sp, rif->mac_profile_id);
10207 }
10208 
10209 static struct mlxsw_sp_fid *
10210 mlxsw_sp_rif_subport_fid_get(struct mlxsw_sp_rif *rif,
10211 			     const struct mlxsw_sp_rif_params *params,
10212 			     struct netlink_ext_ack *extack)
10213 {
10214 	return mlxsw_sp_fid_rfid_get(rif->mlxsw_sp, rif->rif_index);
10215 }
10216 
10217 static const struct mlxsw_sp_rif_ops mlxsw_sp_rif_subport_ops = {
10218 	.type			= MLXSW_SP_RIF_TYPE_SUBPORT,
10219 	.rif_size		= sizeof(struct mlxsw_sp_rif_subport),
10220 	.setup			= mlxsw_sp_rif_subport_setup,
10221 	.configure		= mlxsw_sp_rif_subport_configure,
10222 	.deconfigure		= mlxsw_sp_rif_subport_deconfigure,
10223 	.fid_get		= mlxsw_sp_rif_subport_fid_get,
10224 };
10225 
10226 static int mlxsw_sp_rif_fid_op(struct mlxsw_sp_rif *rif, u16 fid, bool enable)
10227 {
10228 	enum mlxsw_reg_ritr_if_type type = MLXSW_REG_RITR_FID_IF;
10229 	struct net_device *dev = mlxsw_sp_rif_dev(rif);
10230 	struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
10231 	char ritr_pl[MLXSW_REG_RITR_LEN];
10232 
10233 	mlxsw_reg_ritr_pack(ritr_pl, enable, type, rif->rif_index, rif->vr_id,
10234 			    dev->mtu);
10235 	mlxsw_reg_ritr_mac_pack(ritr_pl, dev->dev_addr);
10236 	mlxsw_reg_ritr_if_mac_profile_id_set(ritr_pl, rif->mac_profile_id);
10237 	mlxsw_reg_ritr_fid_if_fid_set(ritr_pl, fid);
10238 
10239 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
10240 }
10241 
10242 u16 mlxsw_sp_router_port(const struct mlxsw_sp *mlxsw_sp)
10243 {
10244 	return mlxsw_core_max_ports(mlxsw_sp->core) + 1;
10245 }
10246 
10247 static int mlxsw_sp_rif_fid_configure(struct mlxsw_sp_rif *rif,
10248 				      struct netlink_ext_ack *extack)
10249 {
10250 	struct net_device *dev = mlxsw_sp_rif_dev(rif);
10251 	struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
10252 	u16 fid_index = mlxsw_sp_fid_index(rif->fid);
10253 	u8 mac_profile;
10254 	int err;
10255 
10256 	err = mlxsw_sp_rif_mac_profile_get(mlxsw_sp, rif->addr,
10257 					   &mac_profile, extack);
10258 	if (err)
10259 		return err;
10260 	rif->mac_profile_id = mac_profile;
10261 
10262 	err = mlxsw_sp_rif_fid_op(rif, fid_index, true);
10263 	if (err)
10264 		goto err_rif_fid_op;
10265 
10266 	err = mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_MC,
10267 				     mlxsw_sp_router_port(mlxsw_sp), true);
10268 	if (err)
10269 		goto err_fid_mc_flood_set;
10270 
10271 	err = mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_BC,
10272 				     mlxsw_sp_router_port(mlxsw_sp), true);
10273 	if (err)
10274 		goto err_fid_bc_flood_set;
10275 
10276 	err = mlxsw_sp_macvlan_replay(rif, extack);
10277 	if (err)
10278 		goto err_macvlan_replay;
10279 
10280 	err = mlxsw_sp_rif_fdb_op(rif->mlxsw_sp, dev->dev_addr,
10281 				  mlxsw_sp_fid_index(rif->fid), true);
10282 	if (err)
10283 		goto err_rif_fdb_op;
10284 
10285 	err = mlxsw_sp_fid_rif_set(rif->fid, rif);
10286 	if (err)
10287 		goto err_fid_rif_set;
10288 
10289 	return 0;
10290 
10291 err_fid_rif_set:
10292 	mlxsw_sp_rif_fdb_op(rif->mlxsw_sp, dev->dev_addr,
10293 			    mlxsw_sp_fid_index(rif->fid), false);
10294 err_rif_fdb_op:
10295 	mlxsw_sp_rif_macvlan_flush(rif);
10296 err_macvlan_replay:
10297 	mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_BC,
10298 			       mlxsw_sp_router_port(mlxsw_sp), false);
10299 err_fid_bc_flood_set:
10300 	mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_MC,
10301 			       mlxsw_sp_router_port(mlxsw_sp), false);
10302 err_fid_mc_flood_set:
10303 	mlxsw_sp_rif_fid_op(rif, fid_index, false);
10304 err_rif_fid_op:
10305 	mlxsw_sp_rif_mac_profile_put(mlxsw_sp, mac_profile);
10306 	return err;
10307 }
10308 
10309 static void mlxsw_sp_rif_fid_deconfigure(struct mlxsw_sp_rif *rif)
10310 {
10311 	struct net_device *dev = mlxsw_sp_rif_dev(rif);
10312 	u16 fid_index = mlxsw_sp_fid_index(rif->fid);
10313 	struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
10314 	struct mlxsw_sp_fid *fid = rif->fid;
10315 
10316 	mlxsw_sp_fid_rif_unset(fid);
10317 	mlxsw_sp_rif_fdb_op(rif->mlxsw_sp, dev->dev_addr,
10318 			    mlxsw_sp_fid_index(fid), false);
10319 	mlxsw_sp_rif_macvlan_flush(rif);
10320 	mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_BC,
10321 			       mlxsw_sp_router_port(mlxsw_sp), false);
10322 	mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_MC,
10323 			       mlxsw_sp_router_port(mlxsw_sp), false);
10324 	mlxsw_sp_rif_fid_op(rif, fid_index, false);
10325 	mlxsw_sp_rif_mac_profile_put(rif->mlxsw_sp, rif->mac_profile_id);
10326 }
10327 
10328 static struct mlxsw_sp_fid *
10329 mlxsw_sp_rif_fid_fid_get(struct mlxsw_sp_rif *rif,
10330 			 const struct mlxsw_sp_rif_params *params,
10331 			 struct netlink_ext_ack *extack)
10332 {
10333 	int rif_ifindex = mlxsw_sp_rif_dev_ifindex(rif);
10334 
10335 	return mlxsw_sp_fid_8021d_get(rif->mlxsw_sp, rif_ifindex);
10336 }
10337 
10338 static void mlxsw_sp_rif_fid_fdb_del(struct mlxsw_sp_rif *rif, const char *mac)
10339 {
10340 	struct switchdev_notifier_fdb_info info = {};
10341 	struct net_device *dev;
10342 
10343 	dev = br_fdb_find_port(mlxsw_sp_rif_dev(rif), mac, 0);
10344 	if (!dev)
10345 		return;
10346 
10347 	info.addr = mac;
10348 	info.vid = 0;
10349 	call_switchdev_notifiers(SWITCHDEV_FDB_DEL_TO_BRIDGE, dev, &info.info,
10350 				 NULL);
10351 }
10352 
10353 static const struct mlxsw_sp_rif_ops mlxsw_sp_rif_fid_ops = {
10354 	.type			= MLXSW_SP_RIF_TYPE_FID,
10355 	.rif_size		= sizeof(struct mlxsw_sp_rif),
10356 	.configure		= mlxsw_sp_rif_fid_configure,
10357 	.deconfigure		= mlxsw_sp_rif_fid_deconfigure,
10358 	.fid_get		= mlxsw_sp_rif_fid_fid_get,
10359 	.fdb_del		= mlxsw_sp_rif_fid_fdb_del,
10360 };
10361 
10362 static struct mlxsw_sp_fid *
10363 mlxsw_sp_rif_vlan_fid_get(struct mlxsw_sp_rif *rif,
10364 			  const struct mlxsw_sp_rif_params *params,
10365 			  struct netlink_ext_ack *extack)
10366 {
10367 	struct net_device *dev = mlxsw_sp_rif_dev(rif);
10368 	struct net_device *br_dev;
10369 
10370 	if (WARN_ON(!params->vid))
10371 		return ERR_PTR(-EINVAL);
10372 
10373 	if (is_vlan_dev(dev)) {
10374 		br_dev = vlan_dev_real_dev(dev);
10375 		if (WARN_ON(!netif_is_bridge_master(br_dev)))
10376 			return ERR_PTR(-EINVAL);
10377 	}
10378 
10379 	return mlxsw_sp_fid_8021q_get(rif->mlxsw_sp, params->vid);
10380 }
10381 
10382 static void mlxsw_sp_rif_vlan_fdb_del(struct mlxsw_sp_rif *rif, const char *mac)
10383 {
10384 	struct net_device *rif_dev = mlxsw_sp_rif_dev(rif);
10385 	struct switchdev_notifier_fdb_info info = {};
10386 	u16 vid = mlxsw_sp_fid_8021q_vid(rif->fid);
10387 	struct net_device *br_dev;
10388 	struct net_device *dev;
10389 
10390 	br_dev = is_vlan_dev(rif_dev) ? vlan_dev_real_dev(rif_dev) : rif_dev;
10391 	dev = br_fdb_find_port(br_dev, mac, vid);
10392 	if (!dev)
10393 		return;
10394 
10395 	info.addr = mac;
10396 	info.vid = vid;
10397 	call_switchdev_notifiers(SWITCHDEV_FDB_DEL_TO_BRIDGE, dev, &info.info,
10398 				 NULL);
10399 }
10400 
10401 static int mlxsw_sp_rif_vlan_op(struct mlxsw_sp_rif *rif, u16 vid, u16 efid,
10402 				bool enable)
10403 {
10404 	struct net_device *dev = mlxsw_sp_rif_dev(rif);
10405 	struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
10406 	char ritr_pl[MLXSW_REG_RITR_LEN];
10407 
10408 	mlxsw_reg_ritr_vlan_if_pack(ritr_pl, enable, rif->rif_index, rif->vr_id,
10409 				    dev->mtu, dev->dev_addr,
10410 				    rif->mac_profile_id, vid, efid);
10411 
10412 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
10413 }
10414 
10415 static int mlxsw_sp_rif_vlan_configure(struct mlxsw_sp_rif *rif, u16 efid,
10416 				       struct netlink_ext_ack *extack)
10417 {
10418 	struct net_device *dev = mlxsw_sp_rif_dev(rif);
10419 	u16 vid = mlxsw_sp_fid_8021q_vid(rif->fid);
10420 	struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
10421 	u8 mac_profile;
10422 	int err;
10423 
10424 	err = mlxsw_sp_rif_mac_profile_get(mlxsw_sp, rif->addr,
10425 					   &mac_profile, extack);
10426 	if (err)
10427 		return err;
10428 	rif->mac_profile_id = mac_profile;
10429 
10430 	err = mlxsw_sp_rif_vlan_op(rif, vid, efid, true);
10431 	if (err)
10432 		goto err_rif_vlan_fid_op;
10433 
10434 	err = mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_MC,
10435 				     mlxsw_sp_router_port(mlxsw_sp), true);
10436 	if (err)
10437 		goto err_fid_mc_flood_set;
10438 
10439 	err = mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_BC,
10440 				     mlxsw_sp_router_port(mlxsw_sp), true);
10441 	if (err)
10442 		goto err_fid_bc_flood_set;
10443 
10444 	err = mlxsw_sp_macvlan_replay(rif, extack);
10445 	if (err)
10446 		goto err_macvlan_replay;
10447 
10448 	err = mlxsw_sp_rif_fdb_op(rif->mlxsw_sp, dev->dev_addr,
10449 				  mlxsw_sp_fid_index(rif->fid), true);
10450 	if (err)
10451 		goto err_rif_fdb_op;
10452 
10453 	err = mlxsw_sp_fid_rif_set(rif->fid, rif);
10454 	if (err)
10455 		goto err_fid_rif_set;
10456 
10457 	return 0;
10458 
10459 err_fid_rif_set:
10460 	mlxsw_sp_rif_fdb_op(rif->mlxsw_sp, dev->dev_addr,
10461 			    mlxsw_sp_fid_index(rif->fid), false);
10462 err_rif_fdb_op:
10463 	mlxsw_sp_rif_macvlan_flush(rif);
10464 err_macvlan_replay:
10465 	mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_BC,
10466 			       mlxsw_sp_router_port(mlxsw_sp), false);
10467 err_fid_bc_flood_set:
10468 	mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_MC,
10469 			       mlxsw_sp_router_port(mlxsw_sp), false);
10470 err_fid_mc_flood_set:
10471 	mlxsw_sp_rif_vlan_op(rif, vid, 0, false);
10472 err_rif_vlan_fid_op:
10473 	mlxsw_sp_rif_mac_profile_put(mlxsw_sp, mac_profile);
10474 	return err;
10475 }
10476 
10477 static void mlxsw_sp_rif_vlan_deconfigure(struct mlxsw_sp_rif *rif)
10478 {
10479 	struct net_device *dev = mlxsw_sp_rif_dev(rif);
10480 	u16 vid = mlxsw_sp_fid_8021q_vid(rif->fid);
10481 	struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
10482 
10483 	mlxsw_sp_fid_rif_unset(rif->fid);
10484 	mlxsw_sp_rif_fdb_op(rif->mlxsw_sp, dev->dev_addr,
10485 			    mlxsw_sp_fid_index(rif->fid), false);
10486 	mlxsw_sp_rif_macvlan_flush(rif);
10487 	mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_BC,
10488 			       mlxsw_sp_router_port(mlxsw_sp), false);
10489 	mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_MC,
10490 			       mlxsw_sp_router_port(mlxsw_sp), false);
10491 	mlxsw_sp_rif_vlan_op(rif, vid, 0, false);
10492 	mlxsw_sp_rif_mac_profile_put(rif->mlxsw_sp, rif->mac_profile_id);
10493 }
10494 
10495 static int mlxsw_sp1_rif_vlan_configure(struct mlxsw_sp_rif *rif,
10496 					struct netlink_ext_ack *extack)
10497 {
10498 	return mlxsw_sp_rif_vlan_configure(rif, 0, extack);
10499 }
10500 
10501 static const struct mlxsw_sp_rif_ops mlxsw_sp1_rif_vlan_ops = {
10502 	.type			= MLXSW_SP_RIF_TYPE_VLAN,
10503 	.rif_size		= sizeof(struct mlxsw_sp_rif),
10504 	.configure		= mlxsw_sp1_rif_vlan_configure,
10505 	.deconfigure		= mlxsw_sp_rif_vlan_deconfigure,
10506 	.fid_get		= mlxsw_sp_rif_vlan_fid_get,
10507 	.fdb_del		= mlxsw_sp_rif_vlan_fdb_del,
10508 };
10509 
10510 static int mlxsw_sp2_rif_vlan_configure(struct mlxsw_sp_rif *rif,
10511 					struct netlink_ext_ack *extack)
10512 {
10513 	u16 efid = mlxsw_sp_fid_index(rif->fid);
10514 
10515 	return mlxsw_sp_rif_vlan_configure(rif, efid, extack);
10516 }
10517 
10518 static const struct mlxsw_sp_rif_ops mlxsw_sp2_rif_vlan_ops = {
10519 	.type			= MLXSW_SP_RIF_TYPE_VLAN,
10520 	.rif_size		= sizeof(struct mlxsw_sp_rif),
10521 	.configure		= mlxsw_sp2_rif_vlan_configure,
10522 	.deconfigure		= mlxsw_sp_rif_vlan_deconfigure,
10523 	.fid_get		= mlxsw_sp_rif_vlan_fid_get,
10524 	.fdb_del		= mlxsw_sp_rif_vlan_fdb_del,
10525 };
10526 
10527 static struct mlxsw_sp_rif_ipip_lb *
10528 mlxsw_sp_rif_ipip_lb_rif(struct mlxsw_sp_rif *rif)
10529 {
10530 	return container_of(rif, struct mlxsw_sp_rif_ipip_lb, common);
10531 }
10532 
10533 static void
10534 mlxsw_sp_rif_ipip_lb_setup(struct mlxsw_sp_rif *rif,
10535 			   const struct mlxsw_sp_rif_params *params)
10536 {
10537 	struct mlxsw_sp_rif_params_ipip_lb *params_lb;
10538 	struct mlxsw_sp_rif_ipip_lb *rif_lb;
10539 
10540 	params_lb = container_of(params, struct mlxsw_sp_rif_params_ipip_lb,
10541 				 common);
10542 	rif_lb = mlxsw_sp_rif_ipip_lb_rif(rif);
10543 	rif_lb->lb_config = params_lb->lb_config;
10544 }
10545 
10546 static int
10547 mlxsw_sp1_rif_ipip_lb_configure(struct mlxsw_sp_rif *rif,
10548 				struct netlink_ext_ack *extack)
10549 {
10550 	struct mlxsw_sp_rif_ipip_lb *lb_rif = mlxsw_sp_rif_ipip_lb_rif(rif);
10551 	struct net_device *dev = mlxsw_sp_rif_dev(rif);
10552 	u32 ul_tb_id = mlxsw_sp_ipip_dev_ul_tb_id(dev);
10553 	struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
10554 	struct mlxsw_sp_vr *ul_vr;
10555 	int err;
10556 
10557 	ul_vr = mlxsw_sp_vr_get(mlxsw_sp, ul_tb_id, extack);
10558 	if (IS_ERR(ul_vr))
10559 		return PTR_ERR(ul_vr);
10560 
10561 	err = mlxsw_sp_rif_ipip_lb_op(lb_rif, ul_vr->id, 0, true);
10562 	if (err)
10563 		goto err_loopback_op;
10564 
10565 	lb_rif->ul_vr_id = ul_vr->id;
10566 	lb_rif->ul_rif_id = 0;
10567 	++ul_vr->rif_count;
10568 	return 0;
10569 
10570 err_loopback_op:
10571 	mlxsw_sp_vr_put(mlxsw_sp, ul_vr);
10572 	return err;
10573 }
10574 
10575 static void mlxsw_sp1_rif_ipip_lb_deconfigure(struct mlxsw_sp_rif *rif)
10576 {
10577 	struct mlxsw_sp_rif_ipip_lb *lb_rif = mlxsw_sp_rif_ipip_lb_rif(rif);
10578 	struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
10579 	struct mlxsw_sp_vr *ul_vr;
10580 
10581 	ul_vr = &mlxsw_sp->router->vrs[lb_rif->ul_vr_id];
10582 	mlxsw_sp_rif_ipip_lb_op(lb_rif, ul_vr->id, 0, false);
10583 
10584 	--ul_vr->rif_count;
10585 	mlxsw_sp_vr_put(mlxsw_sp, ul_vr);
10586 }
10587 
10588 static const struct mlxsw_sp_rif_ops mlxsw_sp1_rif_ipip_lb_ops = {
10589 	.type			= MLXSW_SP_RIF_TYPE_IPIP_LB,
10590 	.rif_size		= sizeof(struct mlxsw_sp_rif_ipip_lb),
10591 	.setup                  = mlxsw_sp_rif_ipip_lb_setup,
10592 	.configure		= mlxsw_sp1_rif_ipip_lb_configure,
10593 	.deconfigure		= mlxsw_sp1_rif_ipip_lb_deconfigure,
10594 };
10595 
10596 static const struct mlxsw_sp_rif_ops *mlxsw_sp1_rif_ops_arr[] = {
10597 	[MLXSW_SP_RIF_TYPE_SUBPORT]	= &mlxsw_sp_rif_subport_ops,
10598 	[MLXSW_SP_RIF_TYPE_VLAN]	= &mlxsw_sp1_rif_vlan_ops,
10599 	[MLXSW_SP_RIF_TYPE_FID]		= &mlxsw_sp_rif_fid_ops,
10600 	[MLXSW_SP_RIF_TYPE_IPIP_LB]	= &mlxsw_sp1_rif_ipip_lb_ops,
10601 };
10602 
10603 static int
10604 mlxsw_sp_rif_ipip_lb_ul_rif_op(struct mlxsw_sp_rif *ul_rif, bool enable)
10605 {
10606 	struct mlxsw_sp *mlxsw_sp = ul_rif->mlxsw_sp;
10607 	char ritr_pl[MLXSW_REG_RITR_LEN];
10608 
10609 	mlxsw_reg_ritr_pack(ritr_pl, enable, MLXSW_REG_RITR_LOOPBACK_IF,
10610 			    ul_rif->rif_index, ul_rif->vr_id, IP_MAX_MTU);
10611 	mlxsw_reg_ritr_loopback_protocol_set(ritr_pl,
10612 					     MLXSW_REG_RITR_LOOPBACK_GENERIC);
10613 
10614 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
10615 }
10616 
10617 static struct mlxsw_sp_rif *
10618 mlxsw_sp_ul_rif_create(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_vr *vr,
10619 		       struct mlxsw_sp_crif *ul_crif,
10620 		       struct netlink_ext_ack *extack)
10621 {
10622 	struct mlxsw_sp_rif *ul_rif;
10623 	u8 rif_entries = 1;
10624 	u16 rif_index;
10625 	int err;
10626 
10627 	err = mlxsw_sp_rif_index_alloc(mlxsw_sp, &rif_index, rif_entries);
10628 	if (err) {
10629 		NL_SET_ERR_MSG_MOD(extack, "Exceeded number of supported router interfaces");
10630 		return ERR_PTR(err);
10631 	}
10632 
10633 	ul_rif = mlxsw_sp_rif_alloc(sizeof(*ul_rif), rif_index, vr->id,
10634 				    ul_crif);
10635 	if (!ul_rif) {
10636 		err = -ENOMEM;
10637 		goto err_rif_alloc;
10638 	}
10639 
10640 	mlxsw_sp->router->rifs[rif_index] = ul_rif;
10641 	ul_rif->mlxsw_sp = mlxsw_sp;
10642 	ul_rif->rif_entries = rif_entries;
10643 	err = mlxsw_sp_rif_ipip_lb_ul_rif_op(ul_rif, true);
10644 	if (err)
10645 		goto ul_rif_op_err;
10646 
10647 	atomic_add(rif_entries, &mlxsw_sp->router->rifs_count);
10648 	return ul_rif;
10649 
10650 ul_rif_op_err:
10651 	mlxsw_sp->router->rifs[rif_index] = NULL;
10652 	mlxsw_sp_rif_free(ul_rif);
10653 err_rif_alloc:
10654 	mlxsw_sp_rif_index_free(mlxsw_sp, rif_index, rif_entries);
10655 	return ERR_PTR(err);
10656 }
10657 
10658 static void mlxsw_sp_ul_rif_destroy(struct mlxsw_sp_rif *ul_rif)
10659 {
10660 	struct mlxsw_sp *mlxsw_sp = ul_rif->mlxsw_sp;
10661 	u8 rif_entries = ul_rif->rif_entries;
10662 	u16 rif_index = ul_rif->rif_index;
10663 
10664 	atomic_sub(rif_entries, &mlxsw_sp->router->rifs_count);
10665 	mlxsw_sp_rif_ipip_lb_ul_rif_op(ul_rif, false);
10666 	mlxsw_sp->router->rifs[ul_rif->rif_index] = NULL;
10667 	mlxsw_sp_rif_free(ul_rif);
10668 	mlxsw_sp_rif_index_free(mlxsw_sp, rif_index, rif_entries);
10669 }
10670 
10671 static struct mlxsw_sp_rif *
10672 mlxsw_sp_ul_rif_get(struct mlxsw_sp *mlxsw_sp, u32 tb_id,
10673 		    struct mlxsw_sp_crif *ul_crif,
10674 		    struct netlink_ext_ack *extack)
10675 {
10676 	struct mlxsw_sp_vr *vr;
10677 	int err;
10678 
10679 	vr = mlxsw_sp_vr_get(mlxsw_sp, tb_id, extack);
10680 	if (IS_ERR(vr))
10681 		return ERR_CAST(vr);
10682 
10683 	if (refcount_inc_not_zero(&vr->ul_rif_refcnt))
10684 		return vr->ul_rif;
10685 
10686 	vr->ul_rif = mlxsw_sp_ul_rif_create(mlxsw_sp, vr, ul_crif, extack);
10687 	if (IS_ERR(vr->ul_rif)) {
10688 		err = PTR_ERR(vr->ul_rif);
10689 		goto err_ul_rif_create;
10690 	}
10691 
10692 	vr->rif_count++;
10693 	refcount_set(&vr->ul_rif_refcnt, 1);
10694 
10695 	return vr->ul_rif;
10696 
10697 err_ul_rif_create:
10698 	mlxsw_sp_vr_put(mlxsw_sp, vr);
10699 	return ERR_PTR(err);
10700 }
10701 
10702 static void mlxsw_sp_ul_rif_put(struct mlxsw_sp_rif *ul_rif)
10703 {
10704 	struct mlxsw_sp *mlxsw_sp = ul_rif->mlxsw_sp;
10705 	struct mlxsw_sp_vr *vr;
10706 
10707 	vr = &mlxsw_sp->router->vrs[ul_rif->vr_id];
10708 
10709 	if (!refcount_dec_and_test(&vr->ul_rif_refcnt))
10710 		return;
10711 
10712 	vr->rif_count--;
10713 	mlxsw_sp_ul_rif_destroy(ul_rif);
10714 	mlxsw_sp_vr_put(mlxsw_sp, vr);
10715 }
10716 
10717 int mlxsw_sp_router_ul_rif_get(struct mlxsw_sp *mlxsw_sp, u32 ul_tb_id,
10718 			       u16 *ul_rif_index)
10719 {
10720 	struct mlxsw_sp_rif *ul_rif;
10721 	int err = 0;
10722 
10723 	mutex_lock(&mlxsw_sp->router->lock);
10724 	ul_rif = mlxsw_sp_ul_rif_get(mlxsw_sp, ul_tb_id, NULL, NULL);
10725 	if (IS_ERR(ul_rif)) {
10726 		err = PTR_ERR(ul_rif);
10727 		goto out;
10728 	}
10729 	*ul_rif_index = ul_rif->rif_index;
10730 out:
10731 	mutex_unlock(&mlxsw_sp->router->lock);
10732 	return err;
10733 }
10734 
10735 void mlxsw_sp_router_ul_rif_put(struct mlxsw_sp *mlxsw_sp, u16 ul_rif_index)
10736 {
10737 	struct mlxsw_sp_rif *ul_rif;
10738 
10739 	mutex_lock(&mlxsw_sp->router->lock);
10740 	ul_rif = mlxsw_sp->router->rifs[ul_rif_index];
10741 	if (WARN_ON(!ul_rif))
10742 		goto out;
10743 
10744 	mlxsw_sp_ul_rif_put(ul_rif);
10745 out:
10746 	mutex_unlock(&mlxsw_sp->router->lock);
10747 }
10748 
10749 static int
10750 mlxsw_sp2_rif_ipip_lb_configure(struct mlxsw_sp_rif *rif,
10751 				struct netlink_ext_ack *extack)
10752 {
10753 	struct mlxsw_sp_rif_ipip_lb *lb_rif = mlxsw_sp_rif_ipip_lb_rif(rif);
10754 	struct net_device *dev = mlxsw_sp_rif_dev(rif);
10755 	u32 ul_tb_id = mlxsw_sp_ipip_dev_ul_tb_id(dev);
10756 	struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
10757 	struct mlxsw_sp_rif *ul_rif;
10758 	int err;
10759 
10760 	ul_rif = mlxsw_sp_ul_rif_get(mlxsw_sp, ul_tb_id, NULL, extack);
10761 	if (IS_ERR(ul_rif))
10762 		return PTR_ERR(ul_rif);
10763 
10764 	err = mlxsw_sp_rif_ipip_lb_op(lb_rif, 0, ul_rif->rif_index, true);
10765 	if (err)
10766 		goto err_loopback_op;
10767 
10768 	lb_rif->ul_vr_id = 0;
10769 	lb_rif->ul_rif_id = ul_rif->rif_index;
10770 
10771 	return 0;
10772 
10773 err_loopback_op:
10774 	mlxsw_sp_ul_rif_put(ul_rif);
10775 	return err;
10776 }
10777 
10778 static void mlxsw_sp2_rif_ipip_lb_deconfigure(struct mlxsw_sp_rif *rif)
10779 {
10780 	struct mlxsw_sp_rif_ipip_lb *lb_rif = mlxsw_sp_rif_ipip_lb_rif(rif);
10781 	struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
10782 	struct mlxsw_sp_rif *ul_rif;
10783 
10784 	ul_rif = mlxsw_sp_rif_by_index(mlxsw_sp, lb_rif->ul_rif_id);
10785 	mlxsw_sp_rif_ipip_lb_op(lb_rif, 0, lb_rif->ul_rif_id, false);
10786 	mlxsw_sp_ul_rif_put(ul_rif);
10787 }
10788 
10789 static const struct mlxsw_sp_rif_ops mlxsw_sp2_rif_ipip_lb_ops = {
10790 	.type			= MLXSW_SP_RIF_TYPE_IPIP_LB,
10791 	.rif_size		= sizeof(struct mlxsw_sp_rif_ipip_lb),
10792 	.setup                  = mlxsw_sp_rif_ipip_lb_setup,
10793 	.configure		= mlxsw_sp2_rif_ipip_lb_configure,
10794 	.deconfigure		= mlxsw_sp2_rif_ipip_lb_deconfigure,
10795 };
10796 
10797 static const struct mlxsw_sp_rif_ops *mlxsw_sp2_rif_ops_arr[] = {
10798 	[MLXSW_SP_RIF_TYPE_SUBPORT]	= &mlxsw_sp_rif_subport_ops,
10799 	[MLXSW_SP_RIF_TYPE_VLAN]	= &mlxsw_sp2_rif_vlan_ops,
10800 	[MLXSW_SP_RIF_TYPE_FID]		= &mlxsw_sp_rif_fid_ops,
10801 	[MLXSW_SP_RIF_TYPE_IPIP_LB]	= &mlxsw_sp2_rif_ipip_lb_ops,
10802 };
10803 
10804 static int mlxsw_sp_rifs_table_init(struct mlxsw_sp *mlxsw_sp)
10805 {
10806 	struct gen_pool *rifs_table;
10807 	int err;
10808 
10809 	rifs_table = gen_pool_create(0, -1);
10810 	if (!rifs_table)
10811 		return -ENOMEM;
10812 
10813 	gen_pool_set_algo(rifs_table, gen_pool_first_fit_order_align,
10814 			  NULL);
10815 
10816 	err = gen_pool_add(rifs_table, MLXSW_SP_ROUTER_GENALLOC_OFFSET,
10817 			   MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_RIFS), -1);
10818 	if (err)
10819 		goto err_gen_pool_add;
10820 
10821 	mlxsw_sp->router->rifs_table = rifs_table;
10822 
10823 	return 0;
10824 
10825 err_gen_pool_add:
10826 	gen_pool_destroy(rifs_table);
10827 	return err;
10828 }
10829 
10830 static void mlxsw_sp_rifs_table_fini(struct mlxsw_sp *mlxsw_sp)
10831 {
10832 	gen_pool_destroy(mlxsw_sp->router->rifs_table);
10833 }
10834 
10835 static int mlxsw_sp_rifs_init(struct mlxsw_sp *mlxsw_sp)
10836 {
10837 	u64 max_rifs = MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_RIFS);
10838 	struct devlink *devlink = priv_to_devlink(mlxsw_sp->core);
10839 	struct mlxsw_core *core = mlxsw_sp->core;
10840 	int err;
10841 
10842 	if (!MLXSW_CORE_RES_VALID(core, MAX_RIF_MAC_PROFILES))
10843 		return -EIO;
10844 	mlxsw_sp->router->max_rif_mac_profile =
10845 		MLXSW_CORE_RES_GET(core, MAX_RIF_MAC_PROFILES);
10846 
10847 	mlxsw_sp->router->rifs = kcalloc(max_rifs,
10848 					 sizeof(struct mlxsw_sp_rif *),
10849 					 GFP_KERNEL);
10850 	if (!mlxsw_sp->router->rifs)
10851 		return -ENOMEM;
10852 
10853 	err = mlxsw_sp_rifs_table_init(mlxsw_sp);
10854 	if (err)
10855 		goto err_rifs_table_init;
10856 
10857 	idr_init(&mlxsw_sp->router->rif_mac_profiles_idr);
10858 	atomic_set(&mlxsw_sp->router->rif_mac_profiles_count, 0);
10859 	atomic_set(&mlxsw_sp->router->rifs_count, 0);
10860 	devl_resource_occ_get_register(devlink,
10861 				       MLXSW_SP_RESOURCE_RIF_MAC_PROFILES,
10862 				       mlxsw_sp_rif_mac_profiles_occ_get,
10863 				       mlxsw_sp);
10864 	devl_resource_occ_get_register(devlink,
10865 				       MLXSW_SP_RESOURCE_RIFS,
10866 				       mlxsw_sp_rifs_occ_get,
10867 				       mlxsw_sp);
10868 
10869 	return 0;
10870 
10871 err_rifs_table_init:
10872 	kfree(mlxsw_sp->router->rifs);
10873 	return err;
10874 }
10875 
10876 static void mlxsw_sp_rifs_fini(struct mlxsw_sp *mlxsw_sp)
10877 {
10878 	int max_rifs = MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_RIFS);
10879 	struct devlink *devlink = priv_to_devlink(mlxsw_sp->core);
10880 	int i;
10881 
10882 	WARN_ON_ONCE(atomic_read(&mlxsw_sp->router->rifs_count));
10883 	for (i = 0; i < max_rifs; i++)
10884 		WARN_ON_ONCE(mlxsw_sp->router->rifs[i]);
10885 
10886 	devl_resource_occ_get_unregister(devlink, MLXSW_SP_RESOURCE_RIFS);
10887 	devl_resource_occ_get_unregister(devlink,
10888 					 MLXSW_SP_RESOURCE_RIF_MAC_PROFILES);
10889 	WARN_ON(!idr_is_empty(&mlxsw_sp->router->rif_mac_profiles_idr));
10890 	idr_destroy(&mlxsw_sp->router->rif_mac_profiles_idr);
10891 	mlxsw_sp_rifs_table_fini(mlxsw_sp);
10892 	kfree(mlxsw_sp->router->rifs);
10893 }
10894 
10895 static int
10896 mlxsw_sp_ipip_config_tigcr(struct mlxsw_sp *mlxsw_sp)
10897 {
10898 	char tigcr_pl[MLXSW_REG_TIGCR_LEN];
10899 
10900 	mlxsw_reg_tigcr_pack(tigcr_pl, true, 0);
10901 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(tigcr), tigcr_pl);
10902 }
10903 
10904 static int mlxsw_sp_ipips_init(struct mlxsw_sp *mlxsw_sp)
10905 {
10906 	int err;
10907 
10908 	INIT_LIST_HEAD(&mlxsw_sp->router->ipip_list);
10909 
10910 	err = mlxsw_sp_ipip_ecn_encap_init(mlxsw_sp);
10911 	if (err)
10912 		return err;
10913 	err = mlxsw_sp_ipip_ecn_decap_init(mlxsw_sp);
10914 	if (err)
10915 		return err;
10916 
10917 	return mlxsw_sp_ipip_config_tigcr(mlxsw_sp);
10918 }
10919 
10920 static int mlxsw_sp1_ipips_init(struct mlxsw_sp *mlxsw_sp)
10921 {
10922 	mlxsw_sp->router->ipip_ops_arr = mlxsw_sp1_ipip_ops_arr;
10923 	return mlxsw_sp_ipips_init(mlxsw_sp);
10924 }
10925 
10926 static int mlxsw_sp2_ipips_init(struct mlxsw_sp *mlxsw_sp)
10927 {
10928 	mlxsw_sp->router->ipip_ops_arr = mlxsw_sp2_ipip_ops_arr;
10929 	return mlxsw_sp_ipips_init(mlxsw_sp);
10930 }
10931 
10932 static void mlxsw_sp_ipips_fini(struct mlxsw_sp *mlxsw_sp)
10933 {
10934 	WARN_ON(!list_empty(&mlxsw_sp->router->ipip_list));
10935 }
10936 
10937 static void mlxsw_sp_router_fib_dump_flush(struct notifier_block *nb)
10938 {
10939 	struct mlxsw_sp_router *router;
10940 
10941 	/* Flush pending FIB notifications and then flush the device's
10942 	 * table before requesting another dump. The FIB notification
10943 	 * block is unregistered, so no need to take RTNL.
10944 	 */
10945 	mlxsw_core_flush_owq();
10946 	router = container_of(nb, struct mlxsw_sp_router, fib_nb);
10947 	mlxsw_sp_router_fib_flush(router->mlxsw_sp);
10948 }
10949 
10950 #ifdef CONFIG_IP_ROUTE_MULTIPATH
10951 struct mlxsw_sp_mp_hash_config {
10952 	DECLARE_BITMAP(headers, __MLXSW_REG_RECR2_HEADER_CNT);
10953 	DECLARE_BITMAP(fields, __MLXSW_REG_RECR2_FIELD_CNT);
10954 	DECLARE_BITMAP(inner_headers, __MLXSW_REG_RECR2_HEADER_CNT);
10955 	DECLARE_BITMAP(inner_fields, __MLXSW_REG_RECR2_INNER_FIELD_CNT);
10956 	bool inc_parsing_depth;
10957 };
10958 
10959 #define MLXSW_SP_MP_HASH_HEADER_SET(_headers, _header) \
10960 	bitmap_set(_headers, MLXSW_REG_RECR2_##_header, 1)
10961 
10962 #define MLXSW_SP_MP_HASH_FIELD_SET(_fields, _field) \
10963 	bitmap_set(_fields, MLXSW_REG_RECR2_##_field, 1)
10964 
10965 #define MLXSW_SP_MP_HASH_FIELD_RANGE_SET(_fields, _field, _nr) \
10966 	bitmap_set(_fields, MLXSW_REG_RECR2_##_field, _nr)
10967 
10968 static void mlxsw_sp_mp_hash_inner_l3(struct mlxsw_sp_mp_hash_config *config)
10969 {
10970 	unsigned long *inner_headers = config->inner_headers;
10971 	unsigned long *inner_fields = config->inner_fields;
10972 
10973 	/* IPv4 inner */
10974 	MLXSW_SP_MP_HASH_HEADER_SET(inner_headers, IPV4_EN_NOT_TCP_NOT_UDP);
10975 	MLXSW_SP_MP_HASH_HEADER_SET(inner_headers, IPV4_EN_TCP_UDP);
10976 	MLXSW_SP_MP_HASH_FIELD_RANGE_SET(inner_fields, INNER_IPV4_SIP0, 4);
10977 	MLXSW_SP_MP_HASH_FIELD_RANGE_SET(inner_fields, INNER_IPV4_DIP0, 4);
10978 	/* IPv6 inner */
10979 	MLXSW_SP_MP_HASH_HEADER_SET(inner_headers, IPV6_EN_NOT_TCP_NOT_UDP);
10980 	MLXSW_SP_MP_HASH_HEADER_SET(inner_headers, IPV6_EN_TCP_UDP);
10981 	MLXSW_SP_MP_HASH_FIELD_SET(inner_fields, INNER_IPV6_SIP0_7);
10982 	MLXSW_SP_MP_HASH_FIELD_RANGE_SET(inner_fields, INNER_IPV6_SIP8, 8);
10983 	MLXSW_SP_MP_HASH_FIELD_SET(inner_fields, INNER_IPV6_DIP0_7);
10984 	MLXSW_SP_MP_HASH_FIELD_RANGE_SET(inner_fields, INNER_IPV6_DIP8, 8);
10985 	MLXSW_SP_MP_HASH_FIELD_SET(inner_fields, INNER_IPV6_NEXT_HEADER);
10986 	MLXSW_SP_MP_HASH_FIELD_SET(inner_fields, INNER_IPV6_FLOW_LABEL);
10987 }
10988 
10989 static void mlxsw_sp_mp4_hash_outer_addr(struct mlxsw_sp_mp_hash_config *config)
10990 {
10991 	unsigned long *headers = config->headers;
10992 	unsigned long *fields = config->fields;
10993 
10994 	MLXSW_SP_MP_HASH_HEADER_SET(headers, IPV4_EN_NOT_TCP_NOT_UDP);
10995 	MLXSW_SP_MP_HASH_HEADER_SET(headers, IPV4_EN_TCP_UDP);
10996 	MLXSW_SP_MP_HASH_FIELD_RANGE_SET(fields, IPV4_SIP0, 4);
10997 	MLXSW_SP_MP_HASH_FIELD_RANGE_SET(fields, IPV4_DIP0, 4);
10998 }
10999 
11000 static void
11001 mlxsw_sp_mp_hash_inner_custom(struct mlxsw_sp_mp_hash_config *config,
11002 			      u32 hash_fields)
11003 {
11004 	unsigned long *inner_headers = config->inner_headers;
11005 	unsigned long *inner_fields = config->inner_fields;
11006 
11007 	/* IPv4 Inner */
11008 	MLXSW_SP_MP_HASH_HEADER_SET(inner_headers, IPV4_EN_NOT_TCP_NOT_UDP);
11009 	MLXSW_SP_MP_HASH_HEADER_SET(inner_headers, IPV4_EN_TCP_UDP);
11010 	if (hash_fields & FIB_MULTIPATH_HASH_FIELD_INNER_SRC_IP)
11011 		MLXSW_SP_MP_HASH_FIELD_RANGE_SET(inner_fields, INNER_IPV4_SIP0, 4);
11012 	if (hash_fields & FIB_MULTIPATH_HASH_FIELD_INNER_DST_IP)
11013 		MLXSW_SP_MP_HASH_FIELD_RANGE_SET(inner_fields, INNER_IPV4_DIP0, 4);
11014 	if (hash_fields & FIB_MULTIPATH_HASH_FIELD_INNER_IP_PROTO)
11015 		MLXSW_SP_MP_HASH_FIELD_SET(inner_fields, INNER_IPV4_PROTOCOL);
11016 	/* IPv6 inner */
11017 	MLXSW_SP_MP_HASH_HEADER_SET(inner_headers, IPV6_EN_NOT_TCP_NOT_UDP);
11018 	MLXSW_SP_MP_HASH_HEADER_SET(inner_headers, IPV6_EN_TCP_UDP);
11019 	if (hash_fields & FIB_MULTIPATH_HASH_FIELD_INNER_SRC_IP) {
11020 		MLXSW_SP_MP_HASH_FIELD_SET(inner_fields, INNER_IPV6_SIP0_7);
11021 		MLXSW_SP_MP_HASH_FIELD_RANGE_SET(inner_fields, INNER_IPV6_SIP8, 8);
11022 	}
11023 	if (hash_fields & FIB_MULTIPATH_HASH_FIELD_INNER_DST_IP) {
11024 		MLXSW_SP_MP_HASH_FIELD_SET(inner_fields, INNER_IPV6_DIP0_7);
11025 		MLXSW_SP_MP_HASH_FIELD_RANGE_SET(inner_fields, INNER_IPV6_DIP8, 8);
11026 	}
11027 	if (hash_fields & FIB_MULTIPATH_HASH_FIELD_INNER_IP_PROTO)
11028 		MLXSW_SP_MP_HASH_FIELD_SET(inner_fields, INNER_IPV6_NEXT_HEADER);
11029 	if (hash_fields & FIB_MULTIPATH_HASH_FIELD_INNER_FLOWLABEL)
11030 		MLXSW_SP_MP_HASH_FIELD_SET(inner_fields, INNER_IPV6_FLOW_LABEL);
11031 	/* L4 inner */
11032 	MLXSW_SP_MP_HASH_HEADER_SET(inner_headers, TCP_UDP_EN_IPV4);
11033 	MLXSW_SP_MP_HASH_HEADER_SET(inner_headers, TCP_UDP_EN_IPV6);
11034 	if (hash_fields & FIB_MULTIPATH_HASH_FIELD_INNER_SRC_PORT)
11035 		MLXSW_SP_MP_HASH_FIELD_SET(inner_fields, INNER_TCP_UDP_SPORT);
11036 	if (hash_fields & FIB_MULTIPATH_HASH_FIELD_INNER_DST_PORT)
11037 		MLXSW_SP_MP_HASH_FIELD_SET(inner_fields, INNER_TCP_UDP_DPORT);
11038 }
11039 
11040 static void mlxsw_sp_mp4_hash_init(struct mlxsw_sp *mlxsw_sp,
11041 				   struct mlxsw_sp_mp_hash_config *config)
11042 {
11043 	struct net *net = mlxsw_sp_net(mlxsw_sp);
11044 	unsigned long *headers = config->headers;
11045 	unsigned long *fields = config->fields;
11046 	u32 hash_fields;
11047 
11048 	switch (READ_ONCE(net->ipv4.sysctl_fib_multipath_hash_policy)) {
11049 	case 0:
11050 		mlxsw_sp_mp4_hash_outer_addr(config);
11051 		break;
11052 	case 1:
11053 		mlxsw_sp_mp4_hash_outer_addr(config);
11054 		MLXSW_SP_MP_HASH_HEADER_SET(headers, TCP_UDP_EN_IPV4);
11055 		MLXSW_SP_MP_HASH_FIELD_SET(fields, IPV4_PROTOCOL);
11056 		MLXSW_SP_MP_HASH_FIELD_SET(fields, TCP_UDP_SPORT);
11057 		MLXSW_SP_MP_HASH_FIELD_SET(fields, TCP_UDP_DPORT);
11058 		break;
11059 	case 2:
11060 		/* Outer */
11061 		mlxsw_sp_mp4_hash_outer_addr(config);
11062 		/* Inner */
11063 		mlxsw_sp_mp_hash_inner_l3(config);
11064 		break;
11065 	case 3:
11066 		hash_fields = READ_ONCE(net->ipv4.sysctl_fib_multipath_hash_fields);
11067 		/* Outer */
11068 		MLXSW_SP_MP_HASH_HEADER_SET(headers, IPV4_EN_NOT_TCP_NOT_UDP);
11069 		MLXSW_SP_MP_HASH_HEADER_SET(headers, IPV4_EN_TCP_UDP);
11070 		MLXSW_SP_MP_HASH_HEADER_SET(headers, TCP_UDP_EN_IPV4);
11071 		if (hash_fields & FIB_MULTIPATH_HASH_FIELD_SRC_IP)
11072 			MLXSW_SP_MP_HASH_FIELD_RANGE_SET(fields, IPV4_SIP0, 4);
11073 		if (hash_fields & FIB_MULTIPATH_HASH_FIELD_DST_IP)
11074 			MLXSW_SP_MP_HASH_FIELD_RANGE_SET(fields, IPV4_DIP0, 4);
11075 		if (hash_fields & FIB_MULTIPATH_HASH_FIELD_IP_PROTO)
11076 			MLXSW_SP_MP_HASH_FIELD_SET(fields, IPV4_PROTOCOL);
11077 		if (hash_fields & FIB_MULTIPATH_HASH_FIELD_SRC_PORT)
11078 			MLXSW_SP_MP_HASH_FIELD_SET(fields, TCP_UDP_SPORT);
11079 		if (hash_fields & FIB_MULTIPATH_HASH_FIELD_DST_PORT)
11080 			MLXSW_SP_MP_HASH_FIELD_SET(fields, TCP_UDP_DPORT);
11081 		/* Inner */
11082 		mlxsw_sp_mp_hash_inner_custom(config, hash_fields);
11083 		break;
11084 	}
11085 }
11086 
11087 static void mlxsw_sp_mp6_hash_outer_addr(struct mlxsw_sp_mp_hash_config *config)
11088 {
11089 	unsigned long *headers = config->headers;
11090 	unsigned long *fields = config->fields;
11091 
11092 	MLXSW_SP_MP_HASH_HEADER_SET(headers, IPV6_EN_NOT_TCP_NOT_UDP);
11093 	MLXSW_SP_MP_HASH_HEADER_SET(headers, IPV6_EN_TCP_UDP);
11094 	MLXSW_SP_MP_HASH_FIELD_SET(fields, IPV6_SIP0_7);
11095 	MLXSW_SP_MP_HASH_FIELD_RANGE_SET(fields, IPV6_SIP8, 8);
11096 	MLXSW_SP_MP_HASH_FIELD_SET(fields, IPV6_DIP0_7);
11097 	MLXSW_SP_MP_HASH_FIELD_RANGE_SET(fields, IPV6_DIP8, 8);
11098 }
11099 
11100 static void mlxsw_sp_mp6_hash_init(struct mlxsw_sp *mlxsw_sp,
11101 				   struct mlxsw_sp_mp_hash_config *config)
11102 {
11103 	u32 hash_fields = ip6_multipath_hash_fields(mlxsw_sp_net(mlxsw_sp));
11104 	unsigned long *headers = config->headers;
11105 	unsigned long *fields = config->fields;
11106 
11107 	switch (ip6_multipath_hash_policy(mlxsw_sp_net(mlxsw_sp))) {
11108 	case 0:
11109 		mlxsw_sp_mp6_hash_outer_addr(config);
11110 		MLXSW_SP_MP_HASH_FIELD_SET(fields, IPV6_NEXT_HEADER);
11111 		MLXSW_SP_MP_HASH_FIELD_SET(fields, IPV6_FLOW_LABEL);
11112 		break;
11113 	case 1:
11114 		mlxsw_sp_mp6_hash_outer_addr(config);
11115 		MLXSW_SP_MP_HASH_HEADER_SET(headers, TCP_UDP_EN_IPV6);
11116 		MLXSW_SP_MP_HASH_FIELD_SET(fields, IPV6_NEXT_HEADER);
11117 		MLXSW_SP_MP_HASH_FIELD_SET(fields, TCP_UDP_SPORT);
11118 		MLXSW_SP_MP_HASH_FIELD_SET(fields, TCP_UDP_DPORT);
11119 		break;
11120 	case 2:
11121 		/* Outer */
11122 		mlxsw_sp_mp6_hash_outer_addr(config);
11123 		MLXSW_SP_MP_HASH_FIELD_SET(fields, IPV6_NEXT_HEADER);
11124 		MLXSW_SP_MP_HASH_FIELD_SET(fields, IPV6_FLOW_LABEL);
11125 		/* Inner */
11126 		mlxsw_sp_mp_hash_inner_l3(config);
11127 		config->inc_parsing_depth = true;
11128 		break;
11129 	case 3:
11130 		/* Outer */
11131 		MLXSW_SP_MP_HASH_HEADER_SET(headers, IPV6_EN_NOT_TCP_NOT_UDP);
11132 		MLXSW_SP_MP_HASH_HEADER_SET(headers, IPV6_EN_TCP_UDP);
11133 		MLXSW_SP_MP_HASH_HEADER_SET(headers, TCP_UDP_EN_IPV6);
11134 		if (hash_fields & FIB_MULTIPATH_HASH_FIELD_SRC_IP) {
11135 			MLXSW_SP_MP_HASH_FIELD_SET(fields, IPV6_SIP0_7);
11136 			MLXSW_SP_MP_HASH_FIELD_RANGE_SET(fields, IPV6_SIP8, 8);
11137 		}
11138 		if (hash_fields & FIB_MULTIPATH_HASH_FIELD_DST_IP) {
11139 			MLXSW_SP_MP_HASH_FIELD_SET(fields, IPV6_DIP0_7);
11140 			MLXSW_SP_MP_HASH_FIELD_RANGE_SET(fields, IPV6_DIP8, 8);
11141 		}
11142 		if (hash_fields & FIB_MULTIPATH_HASH_FIELD_IP_PROTO)
11143 			MLXSW_SP_MP_HASH_FIELD_SET(fields, IPV6_NEXT_HEADER);
11144 		if (hash_fields & FIB_MULTIPATH_HASH_FIELD_FLOWLABEL)
11145 			MLXSW_SP_MP_HASH_FIELD_SET(fields, IPV6_FLOW_LABEL);
11146 		if (hash_fields & FIB_MULTIPATH_HASH_FIELD_SRC_PORT)
11147 			MLXSW_SP_MP_HASH_FIELD_SET(fields, TCP_UDP_SPORT);
11148 		if (hash_fields & FIB_MULTIPATH_HASH_FIELD_DST_PORT)
11149 			MLXSW_SP_MP_HASH_FIELD_SET(fields, TCP_UDP_DPORT);
11150 		/* Inner */
11151 		mlxsw_sp_mp_hash_inner_custom(config, hash_fields);
11152 		if (hash_fields & FIB_MULTIPATH_HASH_FIELD_INNER_MASK)
11153 			config->inc_parsing_depth = true;
11154 		break;
11155 	}
11156 }
11157 
11158 static int mlxsw_sp_mp_hash_parsing_depth_adjust(struct mlxsw_sp *mlxsw_sp,
11159 						 bool old_inc_parsing_depth,
11160 						 bool new_inc_parsing_depth)
11161 {
11162 	int err;
11163 
11164 	if (!old_inc_parsing_depth && new_inc_parsing_depth) {
11165 		err = mlxsw_sp_parsing_depth_inc(mlxsw_sp);
11166 		if (err)
11167 			return err;
11168 		mlxsw_sp->router->inc_parsing_depth = true;
11169 	} else if (old_inc_parsing_depth && !new_inc_parsing_depth) {
11170 		mlxsw_sp_parsing_depth_dec(mlxsw_sp);
11171 		mlxsw_sp->router->inc_parsing_depth = false;
11172 	}
11173 
11174 	return 0;
11175 }
11176 
11177 static int mlxsw_sp_mp_hash_init(struct mlxsw_sp *mlxsw_sp)
11178 {
11179 	bool old_inc_parsing_depth, new_inc_parsing_depth;
11180 	struct mlxsw_sp_mp_hash_config config = {};
11181 	char recr2_pl[MLXSW_REG_RECR2_LEN];
11182 	unsigned long bit;
11183 	u32 seed;
11184 	int err;
11185 
11186 	seed = jhash(mlxsw_sp->base_mac, sizeof(mlxsw_sp->base_mac), 0);
11187 	mlxsw_reg_recr2_pack(recr2_pl, seed);
11188 	mlxsw_sp_mp4_hash_init(mlxsw_sp, &config);
11189 	mlxsw_sp_mp6_hash_init(mlxsw_sp, &config);
11190 
11191 	old_inc_parsing_depth = mlxsw_sp->router->inc_parsing_depth;
11192 	new_inc_parsing_depth = config.inc_parsing_depth;
11193 	err = mlxsw_sp_mp_hash_parsing_depth_adjust(mlxsw_sp,
11194 						    old_inc_parsing_depth,
11195 						    new_inc_parsing_depth);
11196 	if (err)
11197 		return err;
11198 
11199 	for_each_set_bit(bit, config.headers, __MLXSW_REG_RECR2_HEADER_CNT)
11200 		mlxsw_reg_recr2_outer_header_enables_set(recr2_pl, bit, 1);
11201 	for_each_set_bit(bit, config.fields, __MLXSW_REG_RECR2_FIELD_CNT)
11202 		mlxsw_reg_recr2_outer_header_fields_enable_set(recr2_pl, bit, 1);
11203 	for_each_set_bit(bit, config.inner_headers, __MLXSW_REG_RECR2_HEADER_CNT)
11204 		mlxsw_reg_recr2_inner_header_enables_set(recr2_pl, bit, 1);
11205 	for_each_set_bit(bit, config.inner_fields, __MLXSW_REG_RECR2_INNER_FIELD_CNT)
11206 		mlxsw_reg_recr2_inner_header_fields_enable_set(recr2_pl, bit, 1);
11207 
11208 	err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(recr2), recr2_pl);
11209 	if (err)
11210 		goto err_reg_write;
11211 
11212 	return 0;
11213 
11214 err_reg_write:
11215 	mlxsw_sp_mp_hash_parsing_depth_adjust(mlxsw_sp, new_inc_parsing_depth,
11216 					      old_inc_parsing_depth);
11217 	return err;
11218 }
11219 
11220 static void mlxsw_sp_mp_hash_fini(struct mlxsw_sp *mlxsw_sp)
11221 {
11222 	bool old_inc_parsing_depth = mlxsw_sp->router->inc_parsing_depth;
11223 
11224 	mlxsw_sp_mp_hash_parsing_depth_adjust(mlxsw_sp, old_inc_parsing_depth,
11225 					      false);
11226 }
11227 #else
11228 static int mlxsw_sp_mp_hash_init(struct mlxsw_sp *mlxsw_sp)
11229 {
11230 	return 0;
11231 }
11232 
11233 static void mlxsw_sp_mp_hash_fini(struct mlxsw_sp *mlxsw_sp)
11234 {
11235 }
11236 #endif
11237 
11238 static int mlxsw_sp_dscp_init(struct mlxsw_sp *mlxsw_sp)
11239 {
11240 	char rdpm_pl[MLXSW_REG_RDPM_LEN];
11241 	unsigned int i;
11242 
11243 	MLXSW_REG_ZERO(rdpm, rdpm_pl);
11244 
11245 	/* HW is determining switch priority based on DSCP-bits, but the
11246 	 * kernel is still doing that based on the ToS. Since there's a
11247 	 * mismatch in bits we need to make sure to translate the right
11248 	 * value ToS would observe, skipping the 2 least-significant ECN bits.
11249 	 */
11250 	for (i = 0; i < MLXSW_REG_RDPM_DSCP_ENTRY_REC_MAX_COUNT; i++)
11251 		mlxsw_reg_rdpm_pack(rdpm_pl, i, rt_tos2priority(i << 2));
11252 
11253 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rdpm), rdpm_pl);
11254 }
11255 
11256 static int __mlxsw_sp_router_init(struct mlxsw_sp *mlxsw_sp)
11257 {
11258 	struct net *net = mlxsw_sp_net(mlxsw_sp);
11259 	char rgcr_pl[MLXSW_REG_RGCR_LEN];
11260 	u64 max_rifs;
11261 	bool usp;
11262 
11263 	if (!MLXSW_CORE_RES_VALID(mlxsw_sp->core, MAX_RIFS))
11264 		return -EIO;
11265 	max_rifs = MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_RIFS);
11266 	usp = READ_ONCE(net->ipv4.sysctl_ip_fwd_update_priority);
11267 
11268 	mlxsw_reg_rgcr_pack(rgcr_pl, true, true);
11269 	mlxsw_reg_rgcr_max_router_interfaces_set(rgcr_pl, max_rifs);
11270 	mlxsw_reg_rgcr_usp_set(rgcr_pl, usp);
11271 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rgcr), rgcr_pl);
11272 }
11273 
11274 static void __mlxsw_sp_router_fini(struct mlxsw_sp *mlxsw_sp)
11275 {
11276 	char rgcr_pl[MLXSW_REG_RGCR_LEN];
11277 
11278 	mlxsw_reg_rgcr_pack(rgcr_pl, false, false);
11279 	mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rgcr), rgcr_pl);
11280 }
11281 
11282 static int mlxsw_sp_lb_rif_init(struct mlxsw_sp *mlxsw_sp,
11283 				struct netlink_ext_ack *extack)
11284 {
11285 	struct mlxsw_sp_router *router = mlxsw_sp->router;
11286 	struct mlxsw_sp_rif *lb_rif;
11287 	int err;
11288 
11289 	router->lb_crif = mlxsw_sp_crif_alloc(NULL);
11290 	if (!router->lb_crif)
11291 		return -ENOMEM;
11292 
11293 	/* Create a generic loopback RIF associated with the main table
11294 	 * (default VRF). Any table can be used, but the main table exists
11295 	 * anyway, so we do not waste resources. Loopback RIFs are usually
11296 	 * created with a NULL CRIF, but this RIF is used as a fallback RIF
11297 	 * for blackhole nexthops, and nexthops expect to have a valid CRIF.
11298 	 */
11299 	lb_rif = mlxsw_sp_ul_rif_get(mlxsw_sp, RT_TABLE_MAIN, router->lb_crif,
11300 				     extack);
11301 	if (IS_ERR(lb_rif)) {
11302 		err = PTR_ERR(lb_rif);
11303 		goto err_ul_rif_get;
11304 	}
11305 
11306 	return 0;
11307 
11308 err_ul_rif_get:
11309 	mlxsw_sp_crif_free(router->lb_crif);
11310 	return err;
11311 }
11312 
11313 static void mlxsw_sp_lb_rif_fini(struct mlxsw_sp *mlxsw_sp)
11314 {
11315 	mlxsw_sp_ul_rif_put(mlxsw_sp->router->lb_crif->rif);
11316 	mlxsw_sp_crif_free(mlxsw_sp->router->lb_crif);
11317 }
11318 
11319 static int mlxsw_sp1_router_init(struct mlxsw_sp *mlxsw_sp)
11320 {
11321 	size_t size_ranges_count = ARRAY_SIZE(mlxsw_sp1_adj_grp_size_ranges);
11322 
11323 	mlxsw_sp->router->rif_ops_arr = mlxsw_sp1_rif_ops_arr;
11324 	mlxsw_sp->router->adj_grp_size_ranges = mlxsw_sp1_adj_grp_size_ranges;
11325 	mlxsw_sp->router->adj_grp_size_ranges_count = size_ranges_count;
11326 
11327 	return 0;
11328 }
11329 
11330 const struct mlxsw_sp_router_ops mlxsw_sp1_router_ops = {
11331 	.init = mlxsw_sp1_router_init,
11332 	.ipips_init = mlxsw_sp1_ipips_init,
11333 };
11334 
11335 static int mlxsw_sp2_router_init(struct mlxsw_sp *mlxsw_sp)
11336 {
11337 	size_t size_ranges_count = ARRAY_SIZE(mlxsw_sp2_adj_grp_size_ranges);
11338 
11339 	mlxsw_sp->router->rif_ops_arr = mlxsw_sp2_rif_ops_arr;
11340 	mlxsw_sp->router->adj_grp_size_ranges = mlxsw_sp2_adj_grp_size_ranges;
11341 	mlxsw_sp->router->adj_grp_size_ranges_count = size_ranges_count;
11342 
11343 	return 0;
11344 }
11345 
11346 const struct mlxsw_sp_router_ops mlxsw_sp2_router_ops = {
11347 	.init = mlxsw_sp2_router_init,
11348 	.ipips_init = mlxsw_sp2_ipips_init,
11349 };
11350 
11351 int mlxsw_sp_router_init(struct mlxsw_sp *mlxsw_sp,
11352 			 struct netlink_ext_ack *extack)
11353 {
11354 	struct mlxsw_sp_router *router;
11355 	struct notifier_block *nb;
11356 	int err;
11357 
11358 	router = kzalloc(sizeof(*mlxsw_sp->router), GFP_KERNEL);
11359 	if (!router)
11360 		return -ENOMEM;
11361 	mutex_init(&router->lock);
11362 	mlxsw_sp->router = router;
11363 	router->mlxsw_sp = mlxsw_sp;
11364 
11365 	err = mlxsw_sp->router_ops->init(mlxsw_sp);
11366 	if (err)
11367 		goto err_router_ops_init;
11368 
11369 	INIT_LIST_HEAD(&mlxsw_sp->router->nh_res_grp_list);
11370 	INIT_DELAYED_WORK(&mlxsw_sp->router->nh_grp_activity_dw,
11371 			  mlxsw_sp_nh_grp_activity_work);
11372 	INIT_LIST_HEAD(&mlxsw_sp->router->nexthop_neighs_list);
11373 	err = __mlxsw_sp_router_init(mlxsw_sp);
11374 	if (err)
11375 		goto err_router_init;
11376 
11377 	err = mlxsw_sp->router_ops->ipips_init(mlxsw_sp);
11378 	if (err)
11379 		goto err_ipips_init;
11380 
11381 	err = rhashtable_init(&mlxsw_sp->router->crif_ht,
11382 			      &mlxsw_sp_crif_ht_params);
11383 	if (err)
11384 		goto err_crif_ht_init;
11385 
11386 	err = mlxsw_sp_rifs_init(mlxsw_sp);
11387 	if (err)
11388 		goto err_rifs_init;
11389 
11390 	err = rhashtable_init(&mlxsw_sp->router->nexthop_ht,
11391 			      &mlxsw_sp_nexthop_ht_params);
11392 	if (err)
11393 		goto err_nexthop_ht_init;
11394 
11395 	err = rhashtable_init(&mlxsw_sp->router->nexthop_group_ht,
11396 			      &mlxsw_sp_nexthop_group_ht_params);
11397 	if (err)
11398 		goto err_nexthop_group_ht_init;
11399 
11400 	INIT_LIST_HEAD(&mlxsw_sp->router->nexthop_list);
11401 	err = mlxsw_sp_lpm_init(mlxsw_sp);
11402 	if (err)
11403 		goto err_lpm_init;
11404 
11405 	err = mlxsw_sp_mr_init(mlxsw_sp, &mlxsw_sp_mr_tcam_ops);
11406 	if (err)
11407 		goto err_mr_init;
11408 
11409 	err = mlxsw_sp_vrs_init(mlxsw_sp);
11410 	if (err)
11411 		goto err_vrs_init;
11412 
11413 	err = mlxsw_sp_lb_rif_init(mlxsw_sp, extack);
11414 	if (err)
11415 		goto err_lb_rif_init;
11416 
11417 	err = mlxsw_sp_neigh_init(mlxsw_sp);
11418 	if (err)
11419 		goto err_neigh_init;
11420 
11421 	err = mlxsw_sp_mp_hash_init(mlxsw_sp);
11422 	if (err)
11423 		goto err_mp_hash_init;
11424 
11425 	err = mlxsw_sp_dscp_init(mlxsw_sp);
11426 	if (err)
11427 		goto err_dscp_init;
11428 
11429 	router->inetaddr_nb.notifier_call = mlxsw_sp_inetaddr_event;
11430 	err = register_inetaddr_notifier(&router->inetaddr_nb);
11431 	if (err)
11432 		goto err_register_inetaddr_notifier;
11433 
11434 	router->inet6addr_nb.notifier_call = mlxsw_sp_inet6addr_event;
11435 	err = register_inet6addr_notifier(&router->inet6addr_nb);
11436 	if (err)
11437 		goto err_register_inet6addr_notifier;
11438 
11439 	router->inetaddr_valid_nb.notifier_call = mlxsw_sp_inetaddr_valid_event;
11440 	err = register_inetaddr_validator_notifier(&router->inetaddr_valid_nb);
11441 	if (err)
11442 		goto err_register_inetaddr_valid_notifier;
11443 
11444 	nb = &router->inet6addr_valid_nb;
11445 	nb->notifier_call = mlxsw_sp_inet6addr_valid_event;
11446 	err = register_inet6addr_validator_notifier(nb);
11447 	if (err)
11448 		goto err_register_inet6addr_valid_notifier;
11449 
11450 	mlxsw_sp->router->netevent_nb.notifier_call =
11451 		mlxsw_sp_router_netevent_event;
11452 	err = register_netevent_notifier(&mlxsw_sp->router->netevent_nb);
11453 	if (err)
11454 		goto err_register_netevent_notifier;
11455 
11456 	mlxsw_sp->router->nexthop_nb.notifier_call =
11457 		mlxsw_sp_nexthop_obj_event;
11458 	err = register_nexthop_notifier(mlxsw_sp_net(mlxsw_sp),
11459 					&mlxsw_sp->router->nexthop_nb,
11460 					extack);
11461 	if (err)
11462 		goto err_register_nexthop_notifier;
11463 
11464 	mlxsw_sp->router->fib_nb.notifier_call = mlxsw_sp_router_fib_event;
11465 	err = register_fib_notifier(mlxsw_sp_net(mlxsw_sp),
11466 				    &mlxsw_sp->router->fib_nb,
11467 				    mlxsw_sp_router_fib_dump_flush, extack);
11468 	if (err)
11469 		goto err_register_fib_notifier;
11470 
11471 	mlxsw_sp->router->netdevice_nb.notifier_call =
11472 		mlxsw_sp_router_netdevice_event;
11473 	err = register_netdevice_notifier_net(mlxsw_sp_net(mlxsw_sp),
11474 					      &mlxsw_sp->router->netdevice_nb);
11475 	if (err)
11476 		goto err_register_netdev_notifier;
11477 
11478 	return 0;
11479 
11480 err_register_netdev_notifier:
11481 	unregister_fib_notifier(mlxsw_sp_net(mlxsw_sp),
11482 				&mlxsw_sp->router->fib_nb);
11483 err_register_fib_notifier:
11484 	unregister_nexthop_notifier(mlxsw_sp_net(mlxsw_sp),
11485 				    &mlxsw_sp->router->nexthop_nb);
11486 err_register_nexthop_notifier:
11487 	unregister_netevent_notifier(&mlxsw_sp->router->netevent_nb);
11488 err_register_netevent_notifier:
11489 	unregister_inet6addr_validator_notifier(&router->inet6addr_valid_nb);
11490 err_register_inet6addr_valid_notifier:
11491 	unregister_inetaddr_validator_notifier(&router->inetaddr_valid_nb);
11492 err_register_inetaddr_valid_notifier:
11493 	unregister_inet6addr_notifier(&router->inet6addr_nb);
11494 err_register_inet6addr_notifier:
11495 	unregister_inetaddr_notifier(&router->inetaddr_nb);
11496 err_register_inetaddr_notifier:
11497 	mlxsw_core_flush_owq();
11498 err_dscp_init:
11499 	mlxsw_sp_mp_hash_fini(mlxsw_sp);
11500 err_mp_hash_init:
11501 	mlxsw_sp_neigh_fini(mlxsw_sp);
11502 err_neigh_init:
11503 	mlxsw_sp_lb_rif_fini(mlxsw_sp);
11504 err_lb_rif_init:
11505 	mlxsw_sp_vrs_fini(mlxsw_sp);
11506 err_vrs_init:
11507 	mlxsw_sp_mr_fini(mlxsw_sp);
11508 err_mr_init:
11509 	mlxsw_sp_lpm_fini(mlxsw_sp);
11510 err_lpm_init:
11511 	rhashtable_destroy(&mlxsw_sp->router->nexthop_group_ht);
11512 err_nexthop_group_ht_init:
11513 	rhashtable_destroy(&mlxsw_sp->router->nexthop_ht);
11514 err_nexthop_ht_init:
11515 	mlxsw_sp_rifs_fini(mlxsw_sp);
11516 err_rifs_init:
11517 	rhashtable_destroy(&mlxsw_sp->router->crif_ht);
11518 err_crif_ht_init:
11519 	mlxsw_sp_ipips_fini(mlxsw_sp);
11520 err_ipips_init:
11521 	__mlxsw_sp_router_fini(mlxsw_sp);
11522 err_router_init:
11523 	cancel_delayed_work_sync(&mlxsw_sp->router->nh_grp_activity_dw);
11524 err_router_ops_init:
11525 	mutex_destroy(&mlxsw_sp->router->lock);
11526 	kfree(mlxsw_sp->router);
11527 	return err;
11528 }
11529 
11530 void mlxsw_sp_router_fini(struct mlxsw_sp *mlxsw_sp)
11531 {
11532 	struct mlxsw_sp_router *router = mlxsw_sp->router;
11533 
11534 	unregister_netdevice_notifier_net(mlxsw_sp_net(mlxsw_sp),
11535 					  &router->netdevice_nb);
11536 	unregister_fib_notifier(mlxsw_sp_net(mlxsw_sp), &router->fib_nb);
11537 	unregister_nexthop_notifier(mlxsw_sp_net(mlxsw_sp),
11538 				    &router->nexthop_nb);
11539 	unregister_netevent_notifier(&router->netevent_nb);
11540 	unregister_inet6addr_validator_notifier(&router->inet6addr_valid_nb);
11541 	unregister_inetaddr_validator_notifier(&router->inetaddr_valid_nb);
11542 	unregister_inet6addr_notifier(&router->inet6addr_nb);
11543 	unregister_inetaddr_notifier(&router->inetaddr_nb);
11544 	mlxsw_core_flush_owq();
11545 	mlxsw_sp_mp_hash_fini(mlxsw_sp);
11546 	mlxsw_sp_neigh_fini(mlxsw_sp);
11547 	mlxsw_sp_lb_rif_fini(mlxsw_sp);
11548 	mlxsw_sp_vrs_fini(mlxsw_sp);
11549 	mlxsw_sp_mr_fini(mlxsw_sp);
11550 	mlxsw_sp_lpm_fini(mlxsw_sp);
11551 	rhashtable_destroy(&router->nexthop_group_ht);
11552 	rhashtable_destroy(&router->nexthop_ht);
11553 	mlxsw_sp_rifs_fini(mlxsw_sp);
11554 	rhashtable_destroy(&mlxsw_sp->router->crif_ht);
11555 	mlxsw_sp_ipips_fini(mlxsw_sp);
11556 	__mlxsw_sp_router_fini(mlxsw_sp);
11557 	cancel_delayed_work_sync(&router->nh_grp_activity_dw);
11558 	mutex_destroy(&router->lock);
11559 	kfree(router);
11560 }
11561