1 // SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0
2 /* Copyright (c) 2016-2018 Mellanox Technologies. All rights reserved */
3 
4 #include <linux/kernel.h>
5 #include <linux/types.h>
6 #include <linux/rhashtable.h>
7 #include <linux/bitops.h>
8 #include <linux/in6.h>
9 #include <linux/notifier.h>
10 #include <linux/inetdevice.h>
11 #include <linux/netdevice.h>
12 #include <linux/if_bridge.h>
13 #include <linux/socket.h>
14 #include <linux/route.h>
15 #include <linux/gcd.h>
16 #include <linux/if_macvlan.h>
17 #include <linux/refcount.h>
18 #include <linux/jhash.h>
19 #include <linux/net_namespace.h>
20 #include <linux/mutex.h>
21 #include <linux/genalloc.h>
22 #include <net/netevent.h>
23 #include <net/neighbour.h>
24 #include <net/arp.h>
25 #include <net/inet_dscp.h>
26 #include <net/ip_fib.h>
27 #include <net/ip6_fib.h>
28 #include <net/nexthop.h>
29 #include <net/fib_rules.h>
30 #include <net/ip_tunnels.h>
31 #include <net/l3mdev.h>
32 #include <net/addrconf.h>
33 #include <net/ndisc.h>
34 #include <net/ipv6.h>
35 #include <net/fib_notifier.h>
36 #include <net/switchdev.h>
37 
38 #include "spectrum.h"
39 #include "core.h"
40 #include "reg.h"
41 #include "spectrum_cnt.h"
42 #include "spectrum_dpipe.h"
43 #include "spectrum_ipip.h"
44 #include "spectrum_mr.h"
45 #include "spectrum_mr_tcam.h"
46 #include "spectrum_router.h"
47 #include "spectrum_span.h"
48 
49 struct mlxsw_sp_fib;
50 struct mlxsw_sp_vr;
51 struct mlxsw_sp_lpm_tree;
52 struct mlxsw_sp_rif_ops;
53 
54 struct mlxsw_sp_rif {
55 	struct list_head nexthop_list;
56 	struct list_head neigh_list;
57 	struct net_device *dev; /* NULL for underlay RIF */
58 	struct mlxsw_sp_fid *fid;
59 	unsigned char addr[ETH_ALEN];
60 	int mtu;
61 	u16 rif_index;
62 	u8 mac_profile_id;
63 	u8 rif_entries;
64 	u16 vr_id;
65 	const struct mlxsw_sp_rif_ops *ops;
66 	struct mlxsw_sp *mlxsw_sp;
67 
68 	unsigned int counter_ingress;
69 	bool counter_ingress_valid;
70 	unsigned int counter_egress;
71 	bool counter_egress_valid;
72 };
73 
74 struct mlxsw_sp_rif_params {
75 	struct net_device *dev;
76 	union {
77 		u16 system_port;
78 		u16 lag_id;
79 	};
80 	u16 vid;
81 	bool lag;
82 	bool double_entry;
83 };
84 
85 struct mlxsw_sp_rif_subport {
86 	struct mlxsw_sp_rif common;
87 	refcount_t ref_count;
88 	union {
89 		u16 system_port;
90 		u16 lag_id;
91 	};
92 	u16 vid;
93 	bool lag;
94 };
95 
96 struct mlxsw_sp_rif_ipip_lb {
97 	struct mlxsw_sp_rif common;
98 	struct mlxsw_sp_rif_ipip_lb_config lb_config;
99 	u16 ul_vr_id;	/* Spectrum-1. */
100 	u16 ul_rif_id;	/* Spectrum-2+. */
101 };
102 
103 struct mlxsw_sp_rif_params_ipip_lb {
104 	struct mlxsw_sp_rif_params common;
105 	struct mlxsw_sp_rif_ipip_lb_config lb_config;
106 };
107 
108 struct mlxsw_sp_rif_ops {
109 	enum mlxsw_sp_rif_type type;
110 	size_t rif_size;
111 
112 	void (*setup)(struct mlxsw_sp_rif *rif,
113 		      const struct mlxsw_sp_rif_params *params);
114 	int (*configure)(struct mlxsw_sp_rif *rif,
115 			 struct netlink_ext_ack *extack);
116 	void (*deconfigure)(struct mlxsw_sp_rif *rif);
117 	struct mlxsw_sp_fid * (*fid_get)(struct mlxsw_sp_rif *rif,
118 					 struct netlink_ext_ack *extack);
119 	void (*fdb_del)(struct mlxsw_sp_rif *rif, const char *mac);
120 };
121 
122 struct mlxsw_sp_rif_mac_profile {
123 	unsigned char mac_prefix[ETH_ALEN];
124 	refcount_t ref_count;
125 	u8 id;
126 };
127 
128 struct mlxsw_sp_router_ops {
129 	int (*init)(struct mlxsw_sp *mlxsw_sp);
130 	int (*ipips_init)(struct mlxsw_sp *mlxsw_sp);
131 };
132 
133 static struct mlxsw_sp_rif *
134 mlxsw_sp_rif_find_by_dev(const struct mlxsw_sp *mlxsw_sp,
135 			 const struct net_device *dev);
136 static void mlxsw_sp_rif_destroy(struct mlxsw_sp_rif *rif);
137 static void mlxsw_sp_lpm_tree_hold(struct mlxsw_sp_lpm_tree *lpm_tree);
138 static void mlxsw_sp_lpm_tree_put(struct mlxsw_sp *mlxsw_sp,
139 				  struct mlxsw_sp_lpm_tree *lpm_tree);
140 static int mlxsw_sp_vr_lpm_tree_bind(struct mlxsw_sp *mlxsw_sp,
141 				     const struct mlxsw_sp_fib *fib,
142 				     u8 tree_id);
143 static int mlxsw_sp_vr_lpm_tree_unbind(struct mlxsw_sp *mlxsw_sp,
144 				       const struct mlxsw_sp_fib *fib);
145 
146 static unsigned int *
147 mlxsw_sp_rif_p_counter_get(struct mlxsw_sp_rif *rif,
148 			   enum mlxsw_sp_rif_counter_dir dir)
149 {
150 	switch (dir) {
151 	case MLXSW_SP_RIF_COUNTER_EGRESS:
152 		return &rif->counter_egress;
153 	case MLXSW_SP_RIF_COUNTER_INGRESS:
154 		return &rif->counter_ingress;
155 	}
156 	return NULL;
157 }
158 
159 static bool
160 mlxsw_sp_rif_counter_valid_get(struct mlxsw_sp_rif *rif,
161 			       enum mlxsw_sp_rif_counter_dir dir)
162 {
163 	switch (dir) {
164 	case MLXSW_SP_RIF_COUNTER_EGRESS:
165 		return rif->counter_egress_valid;
166 	case MLXSW_SP_RIF_COUNTER_INGRESS:
167 		return rif->counter_ingress_valid;
168 	}
169 	return false;
170 }
171 
172 static void
173 mlxsw_sp_rif_counter_valid_set(struct mlxsw_sp_rif *rif,
174 			       enum mlxsw_sp_rif_counter_dir dir,
175 			       bool valid)
176 {
177 	switch (dir) {
178 	case MLXSW_SP_RIF_COUNTER_EGRESS:
179 		rif->counter_egress_valid = valid;
180 		break;
181 	case MLXSW_SP_RIF_COUNTER_INGRESS:
182 		rif->counter_ingress_valid = valid;
183 		break;
184 	}
185 }
186 
187 static int mlxsw_sp_rif_counter_edit(struct mlxsw_sp *mlxsw_sp, u16 rif_index,
188 				     unsigned int counter_index, bool enable,
189 				     enum mlxsw_sp_rif_counter_dir dir)
190 {
191 	char ritr_pl[MLXSW_REG_RITR_LEN];
192 	bool is_egress = false;
193 	int err;
194 
195 	if (dir == MLXSW_SP_RIF_COUNTER_EGRESS)
196 		is_egress = true;
197 	mlxsw_reg_ritr_rif_pack(ritr_pl, rif_index);
198 	err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
199 	if (err)
200 		return err;
201 
202 	mlxsw_reg_ritr_counter_pack(ritr_pl, counter_index, enable,
203 				    is_egress);
204 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
205 }
206 
207 int mlxsw_sp_rif_counter_value_get(struct mlxsw_sp *mlxsw_sp,
208 				   struct mlxsw_sp_rif *rif,
209 				   enum mlxsw_sp_rif_counter_dir dir, u64 *cnt)
210 {
211 	char ricnt_pl[MLXSW_REG_RICNT_LEN];
212 	unsigned int *p_counter_index;
213 	bool valid;
214 	int err;
215 
216 	valid = mlxsw_sp_rif_counter_valid_get(rif, dir);
217 	if (!valid)
218 		return -EINVAL;
219 
220 	p_counter_index = mlxsw_sp_rif_p_counter_get(rif, dir);
221 	if (!p_counter_index)
222 		return -EINVAL;
223 	mlxsw_reg_ricnt_pack(ricnt_pl, *p_counter_index,
224 			     MLXSW_REG_RICNT_OPCODE_NOP);
225 	err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(ricnt), ricnt_pl);
226 	if (err)
227 		return err;
228 	*cnt = mlxsw_reg_ricnt_good_unicast_packets_get(ricnt_pl);
229 	return 0;
230 }
231 
232 struct mlxsw_sp_rif_counter_set_basic {
233 	u64 good_unicast_packets;
234 	u64 good_multicast_packets;
235 	u64 good_broadcast_packets;
236 	u64 good_unicast_bytes;
237 	u64 good_multicast_bytes;
238 	u64 good_broadcast_bytes;
239 	u64 error_packets;
240 	u64 discard_packets;
241 	u64 error_bytes;
242 	u64 discard_bytes;
243 };
244 
245 static int
246 mlxsw_sp_rif_counter_fetch_clear(struct mlxsw_sp_rif *rif,
247 				 enum mlxsw_sp_rif_counter_dir dir,
248 				 struct mlxsw_sp_rif_counter_set_basic *set)
249 {
250 	struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
251 	char ricnt_pl[MLXSW_REG_RICNT_LEN];
252 	unsigned int *p_counter_index;
253 	int err;
254 
255 	if (!mlxsw_sp_rif_counter_valid_get(rif, dir))
256 		return -EINVAL;
257 
258 	p_counter_index = mlxsw_sp_rif_p_counter_get(rif, dir);
259 	if (!p_counter_index)
260 		return -EINVAL;
261 
262 	mlxsw_reg_ricnt_pack(ricnt_pl, *p_counter_index,
263 			     MLXSW_REG_RICNT_OPCODE_CLEAR);
264 	err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(ricnt), ricnt_pl);
265 	if (err)
266 		return err;
267 
268 	if (!set)
269 		return 0;
270 
271 #define MLXSW_SP_RIF_COUNTER_EXTRACT(NAME)				\
272 		(set->NAME = mlxsw_reg_ricnt_ ## NAME ## _get(ricnt_pl))
273 
274 	MLXSW_SP_RIF_COUNTER_EXTRACT(good_unicast_packets);
275 	MLXSW_SP_RIF_COUNTER_EXTRACT(good_multicast_packets);
276 	MLXSW_SP_RIF_COUNTER_EXTRACT(good_broadcast_packets);
277 	MLXSW_SP_RIF_COUNTER_EXTRACT(good_unicast_bytes);
278 	MLXSW_SP_RIF_COUNTER_EXTRACT(good_multicast_bytes);
279 	MLXSW_SP_RIF_COUNTER_EXTRACT(good_broadcast_bytes);
280 	MLXSW_SP_RIF_COUNTER_EXTRACT(error_packets);
281 	MLXSW_SP_RIF_COUNTER_EXTRACT(discard_packets);
282 	MLXSW_SP_RIF_COUNTER_EXTRACT(error_bytes);
283 	MLXSW_SP_RIF_COUNTER_EXTRACT(discard_bytes);
284 
285 #undef MLXSW_SP_RIF_COUNTER_EXTRACT
286 
287 	return 0;
288 }
289 
290 static int mlxsw_sp_rif_counter_clear(struct mlxsw_sp *mlxsw_sp,
291 				      unsigned int counter_index)
292 {
293 	char ricnt_pl[MLXSW_REG_RICNT_LEN];
294 
295 	mlxsw_reg_ricnt_pack(ricnt_pl, counter_index,
296 			     MLXSW_REG_RICNT_OPCODE_CLEAR);
297 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ricnt), ricnt_pl);
298 }
299 
300 int mlxsw_sp_rif_counter_alloc(struct mlxsw_sp_rif *rif,
301 			       enum mlxsw_sp_rif_counter_dir dir)
302 {
303 	struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
304 	unsigned int *p_counter_index;
305 	int err;
306 
307 	if (mlxsw_sp_rif_counter_valid_get(rif, dir))
308 		return 0;
309 
310 	p_counter_index = mlxsw_sp_rif_p_counter_get(rif, dir);
311 	if (!p_counter_index)
312 		return -EINVAL;
313 
314 	err = mlxsw_sp_counter_alloc(mlxsw_sp, MLXSW_SP_COUNTER_SUB_POOL_RIF,
315 				     p_counter_index);
316 	if (err)
317 		return err;
318 
319 	err = mlxsw_sp_rif_counter_clear(mlxsw_sp, *p_counter_index);
320 	if (err)
321 		goto err_counter_clear;
322 
323 	err = mlxsw_sp_rif_counter_edit(mlxsw_sp, rif->rif_index,
324 					*p_counter_index, true, dir);
325 	if (err)
326 		goto err_counter_edit;
327 	mlxsw_sp_rif_counter_valid_set(rif, dir, true);
328 	return 0;
329 
330 err_counter_edit:
331 err_counter_clear:
332 	mlxsw_sp_counter_free(mlxsw_sp, MLXSW_SP_COUNTER_SUB_POOL_RIF,
333 			      *p_counter_index);
334 	return err;
335 }
336 
337 void mlxsw_sp_rif_counter_free(struct mlxsw_sp_rif *rif,
338 			       enum mlxsw_sp_rif_counter_dir dir)
339 {
340 	struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
341 	unsigned int *p_counter_index;
342 
343 	if (!mlxsw_sp_rif_counter_valid_get(rif, dir))
344 		return;
345 
346 	p_counter_index = mlxsw_sp_rif_p_counter_get(rif, dir);
347 	if (WARN_ON(!p_counter_index))
348 		return;
349 	mlxsw_sp_rif_counter_edit(mlxsw_sp, rif->rif_index,
350 				  *p_counter_index, false, dir);
351 	mlxsw_sp_counter_free(mlxsw_sp, MLXSW_SP_COUNTER_SUB_POOL_RIF,
352 			      *p_counter_index);
353 	mlxsw_sp_rif_counter_valid_set(rif, dir, false);
354 }
355 
356 static void mlxsw_sp_rif_counters_alloc(struct mlxsw_sp_rif *rif)
357 {
358 	struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
359 	struct devlink *devlink;
360 
361 	devlink = priv_to_devlink(mlxsw_sp->core);
362 	if (!devlink_dpipe_table_counter_enabled(devlink,
363 						 MLXSW_SP_DPIPE_TABLE_NAME_ERIF))
364 		return;
365 	mlxsw_sp_rif_counter_alloc(rif, MLXSW_SP_RIF_COUNTER_EGRESS);
366 }
367 
368 static void mlxsw_sp_rif_counters_free(struct mlxsw_sp_rif *rif)
369 {
370 	mlxsw_sp_rif_counter_free(rif, MLXSW_SP_RIF_COUNTER_EGRESS);
371 }
372 
373 #define MLXSW_SP_PREFIX_COUNT (sizeof(struct in6_addr) * BITS_PER_BYTE + 1)
374 
375 struct mlxsw_sp_prefix_usage {
376 	DECLARE_BITMAP(b, MLXSW_SP_PREFIX_COUNT);
377 };
378 
379 #define mlxsw_sp_prefix_usage_for_each(prefix, prefix_usage) \
380 	for_each_set_bit(prefix, (prefix_usage)->b, MLXSW_SP_PREFIX_COUNT)
381 
382 static bool
383 mlxsw_sp_prefix_usage_eq(struct mlxsw_sp_prefix_usage *prefix_usage1,
384 			 struct mlxsw_sp_prefix_usage *prefix_usage2)
385 {
386 	return !memcmp(prefix_usage1, prefix_usage2, sizeof(*prefix_usage1));
387 }
388 
389 static void
390 mlxsw_sp_prefix_usage_cpy(struct mlxsw_sp_prefix_usage *prefix_usage1,
391 			  struct mlxsw_sp_prefix_usage *prefix_usage2)
392 {
393 	memcpy(prefix_usage1, prefix_usage2, sizeof(*prefix_usage1));
394 }
395 
396 static void
397 mlxsw_sp_prefix_usage_set(struct mlxsw_sp_prefix_usage *prefix_usage,
398 			  unsigned char prefix_len)
399 {
400 	set_bit(prefix_len, prefix_usage->b);
401 }
402 
403 static void
404 mlxsw_sp_prefix_usage_clear(struct mlxsw_sp_prefix_usage *prefix_usage,
405 			    unsigned char prefix_len)
406 {
407 	clear_bit(prefix_len, prefix_usage->b);
408 }
409 
410 struct mlxsw_sp_fib_key {
411 	unsigned char addr[sizeof(struct in6_addr)];
412 	unsigned char prefix_len;
413 };
414 
415 enum mlxsw_sp_fib_entry_type {
416 	MLXSW_SP_FIB_ENTRY_TYPE_REMOTE,
417 	MLXSW_SP_FIB_ENTRY_TYPE_LOCAL,
418 	MLXSW_SP_FIB_ENTRY_TYPE_TRAP,
419 	MLXSW_SP_FIB_ENTRY_TYPE_BLACKHOLE,
420 	MLXSW_SP_FIB_ENTRY_TYPE_UNREACHABLE,
421 
422 	/* This is a special case of local delivery, where a packet should be
423 	 * decapsulated on reception. Note that there is no corresponding ENCAP,
424 	 * because that's a type of next hop, not of FIB entry. (There can be
425 	 * several next hops in a REMOTE entry, and some of them may be
426 	 * encapsulating entries.)
427 	 */
428 	MLXSW_SP_FIB_ENTRY_TYPE_IPIP_DECAP,
429 	MLXSW_SP_FIB_ENTRY_TYPE_NVE_DECAP,
430 };
431 
432 struct mlxsw_sp_nexthop_group_info;
433 struct mlxsw_sp_nexthop_group;
434 struct mlxsw_sp_fib_entry;
435 
436 struct mlxsw_sp_fib_node {
437 	struct mlxsw_sp_fib_entry *fib_entry;
438 	struct list_head list;
439 	struct rhash_head ht_node;
440 	struct mlxsw_sp_fib *fib;
441 	struct mlxsw_sp_fib_key key;
442 };
443 
444 struct mlxsw_sp_fib_entry_decap {
445 	struct mlxsw_sp_ipip_entry *ipip_entry;
446 	u32 tunnel_index;
447 };
448 
449 struct mlxsw_sp_fib_entry {
450 	struct mlxsw_sp_fib_node *fib_node;
451 	enum mlxsw_sp_fib_entry_type type;
452 	struct list_head nexthop_group_node;
453 	struct mlxsw_sp_nexthop_group *nh_group;
454 	struct mlxsw_sp_fib_entry_decap decap; /* Valid for decap entries. */
455 };
456 
457 struct mlxsw_sp_fib4_entry {
458 	struct mlxsw_sp_fib_entry common;
459 	struct fib_info *fi;
460 	u32 tb_id;
461 	dscp_t dscp;
462 	u8 type;
463 };
464 
465 struct mlxsw_sp_fib6_entry {
466 	struct mlxsw_sp_fib_entry common;
467 	struct list_head rt6_list;
468 	unsigned int nrt6;
469 };
470 
471 struct mlxsw_sp_rt6 {
472 	struct list_head list;
473 	struct fib6_info *rt;
474 };
475 
476 struct mlxsw_sp_lpm_tree {
477 	u8 id; /* tree ID */
478 	unsigned int ref_count;
479 	enum mlxsw_sp_l3proto proto;
480 	unsigned long prefix_ref_count[MLXSW_SP_PREFIX_COUNT];
481 	struct mlxsw_sp_prefix_usage prefix_usage;
482 };
483 
484 struct mlxsw_sp_fib {
485 	struct rhashtable ht;
486 	struct list_head node_list;
487 	struct mlxsw_sp_vr *vr;
488 	struct mlxsw_sp_lpm_tree *lpm_tree;
489 	enum mlxsw_sp_l3proto proto;
490 };
491 
492 struct mlxsw_sp_vr {
493 	u16 id; /* virtual router ID */
494 	u32 tb_id; /* kernel fib table id */
495 	unsigned int rif_count;
496 	struct mlxsw_sp_fib *fib4;
497 	struct mlxsw_sp_fib *fib6;
498 	struct mlxsw_sp_mr_table *mr_table[MLXSW_SP_L3_PROTO_MAX];
499 	struct mlxsw_sp_rif *ul_rif;
500 	refcount_t ul_rif_refcnt;
501 };
502 
503 static const struct rhashtable_params mlxsw_sp_fib_ht_params;
504 
505 static struct mlxsw_sp_fib *mlxsw_sp_fib_create(struct mlxsw_sp *mlxsw_sp,
506 						struct mlxsw_sp_vr *vr,
507 						enum mlxsw_sp_l3proto proto)
508 {
509 	struct mlxsw_sp_lpm_tree *lpm_tree;
510 	struct mlxsw_sp_fib *fib;
511 	int err;
512 
513 	lpm_tree = mlxsw_sp->router->lpm.proto_trees[proto];
514 	fib = kzalloc(sizeof(*fib), GFP_KERNEL);
515 	if (!fib)
516 		return ERR_PTR(-ENOMEM);
517 	err = rhashtable_init(&fib->ht, &mlxsw_sp_fib_ht_params);
518 	if (err)
519 		goto err_rhashtable_init;
520 	INIT_LIST_HEAD(&fib->node_list);
521 	fib->proto = proto;
522 	fib->vr = vr;
523 	fib->lpm_tree = lpm_tree;
524 	mlxsw_sp_lpm_tree_hold(lpm_tree);
525 	err = mlxsw_sp_vr_lpm_tree_bind(mlxsw_sp, fib, lpm_tree->id);
526 	if (err)
527 		goto err_lpm_tree_bind;
528 	return fib;
529 
530 err_lpm_tree_bind:
531 	mlxsw_sp_lpm_tree_put(mlxsw_sp, lpm_tree);
532 err_rhashtable_init:
533 	kfree(fib);
534 	return ERR_PTR(err);
535 }
536 
537 static void mlxsw_sp_fib_destroy(struct mlxsw_sp *mlxsw_sp,
538 				 struct mlxsw_sp_fib *fib)
539 {
540 	mlxsw_sp_vr_lpm_tree_unbind(mlxsw_sp, fib);
541 	mlxsw_sp_lpm_tree_put(mlxsw_sp, fib->lpm_tree);
542 	WARN_ON(!list_empty(&fib->node_list));
543 	rhashtable_destroy(&fib->ht);
544 	kfree(fib);
545 }
546 
547 static struct mlxsw_sp_lpm_tree *
548 mlxsw_sp_lpm_tree_find_unused(struct mlxsw_sp *mlxsw_sp)
549 {
550 	static struct mlxsw_sp_lpm_tree *lpm_tree;
551 	int i;
552 
553 	for (i = 0; i < mlxsw_sp->router->lpm.tree_count; i++) {
554 		lpm_tree = &mlxsw_sp->router->lpm.trees[i];
555 		if (lpm_tree->ref_count == 0)
556 			return lpm_tree;
557 	}
558 	return NULL;
559 }
560 
561 static int mlxsw_sp_lpm_tree_alloc(struct mlxsw_sp *mlxsw_sp,
562 				   struct mlxsw_sp_lpm_tree *lpm_tree)
563 {
564 	char ralta_pl[MLXSW_REG_RALTA_LEN];
565 
566 	mlxsw_reg_ralta_pack(ralta_pl, true,
567 			     (enum mlxsw_reg_ralxx_protocol) lpm_tree->proto,
568 			     lpm_tree->id);
569 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralta), ralta_pl);
570 }
571 
572 static void mlxsw_sp_lpm_tree_free(struct mlxsw_sp *mlxsw_sp,
573 				   struct mlxsw_sp_lpm_tree *lpm_tree)
574 {
575 	char ralta_pl[MLXSW_REG_RALTA_LEN];
576 
577 	mlxsw_reg_ralta_pack(ralta_pl, false,
578 			     (enum mlxsw_reg_ralxx_protocol) lpm_tree->proto,
579 			     lpm_tree->id);
580 	mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralta), ralta_pl);
581 }
582 
583 static int
584 mlxsw_sp_lpm_tree_left_struct_set(struct mlxsw_sp *mlxsw_sp,
585 				  struct mlxsw_sp_prefix_usage *prefix_usage,
586 				  struct mlxsw_sp_lpm_tree *lpm_tree)
587 {
588 	char ralst_pl[MLXSW_REG_RALST_LEN];
589 	u8 root_bin = 0;
590 	u8 prefix;
591 	u8 last_prefix = MLXSW_REG_RALST_BIN_NO_CHILD;
592 
593 	mlxsw_sp_prefix_usage_for_each(prefix, prefix_usage)
594 		root_bin = prefix;
595 
596 	mlxsw_reg_ralst_pack(ralst_pl, root_bin, lpm_tree->id);
597 	mlxsw_sp_prefix_usage_for_each(prefix, prefix_usage) {
598 		if (prefix == 0)
599 			continue;
600 		mlxsw_reg_ralst_bin_pack(ralst_pl, prefix, last_prefix,
601 					 MLXSW_REG_RALST_BIN_NO_CHILD);
602 		last_prefix = prefix;
603 	}
604 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralst), ralst_pl);
605 }
606 
607 static struct mlxsw_sp_lpm_tree *
608 mlxsw_sp_lpm_tree_create(struct mlxsw_sp *mlxsw_sp,
609 			 struct mlxsw_sp_prefix_usage *prefix_usage,
610 			 enum mlxsw_sp_l3proto proto)
611 {
612 	struct mlxsw_sp_lpm_tree *lpm_tree;
613 	int err;
614 
615 	lpm_tree = mlxsw_sp_lpm_tree_find_unused(mlxsw_sp);
616 	if (!lpm_tree)
617 		return ERR_PTR(-EBUSY);
618 	lpm_tree->proto = proto;
619 	err = mlxsw_sp_lpm_tree_alloc(mlxsw_sp, lpm_tree);
620 	if (err)
621 		return ERR_PTR(err);
622 
623 	err = mlxsw_sp_lpm_tree_left_struct_set(mlxsw_sp, prefix_usage,
624 						lpm_tree);
625 	if (err)
626 		goto err_left_struct_set;
627 	memcpy(&lpm_tree->prefix_usage, prefix_usage,
628 	       sizeof(lpm_tree->prefix_usage));
629 	memset(&lpm_tree->prefix_ref_count, 0,
630 	       sizeof(lpm_tree->prefix_ref_count));
631 	lpm_tree->ref_count = 1;
632 	return lpm_tree;
633 
634 err_left_struct_set:
635 	mlxsw_sp_lpm_tree_free(mlxsw_sp, lpm_tree);
636 	return ERR_PTR(err);
637 }
638 
639 static void mlxsw_sp_lpm_tree_destroy(struct mlxsw_sp *mlxsw_sp,
640 				      struct mlxsw_sp_lpm_tree *lpm_tree)
641 {
642 	mlxsw_sp_lpm_tree_free(mlxsw_sp, lpm_tree);
643 }
644 
645 static struct mlxsw_sp_lpm_tree *
646 mlxsw_sp_lpm_tree_get(struct mlxsw_sp *mlxsw_sp,
647 		      struct mlxsw_sp_prefix_usage *prefix_usage,
648 		      enum mlxsw_sp_l3proto proto)
649 {
650 	struct mlxsw_sp_lpm_tree *lpm_tree;
651 	int i;
652 
653 	for (i = 0; i < mlxsw_sp->router->lpm.tree_count; i++) {
654 		lpm_tree = &mlxsw_sp->router->lpm.trees[i];
655 		if (lpm_tree->ref_count != 0 &&
656 		    lpm_tree->proto == proto &&
657 		    mlxsw_sp_prefix_usage_eq(&lpm_tree->prefix_usage,
658 					     prefix_usage)) {
659 			mlxsw_sp_lpm_tree_hold(lpm_tree);
660 			return lpm_tree;
661 		}
662 	}
663 	return mlxsw_sp_lpm_tree_create(mlxsw_sp, prefix_usage, proto);
664 }
665 
666 static void mlxsw_sp_lpm_tree_hold(struct mlxsw_sp_lpm_tree *lpm_tree)
667 {
668 	lpm_tree->ref_count++;
669 }
670 
671 static void mlxsw_sp_lpm_tree_put(struct mlxsw_sp *mlxsw_sp,
672 				  struct mlxsw_sp_lpm_tree *lpm_tree)
673 {
674 	if (--lpm_tree->ref_count == 0)
675 		mlxsw_sp_lpm_tree_destroy(mlxsw_sp, lpm_tree);
676 }
677 
678 #define MLXSW_SP_LPM_TREE_MIN 1 /* tree 0 is reserved */
679 
680 static int mlxsw_sp_lpm_init(struct mlxsw_sp *mlxsw_sp)
681 {
682 	struct mlxsw_sp_prefix_usage req_prefix_usage = {{ 0 } };
683 	struct mlxsw_sp_lpm_tree *lpm_tree;
684 	u64 max_trees;
685 	int err, i;
686 
687 	if (!MLXSW_CORE_RES_VALID(mlxsw_sp->core, MAX_LPM_TREES))
688 		return -EIO;
689 
690 	max_trees = MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_LPM_TREES);
691 	mlxsw_sp->router->lpm.tree_count = max_trees - MLXSW_SP_LPM_TREE_MIN;
692 	mlxsw_sp->router->lpm.trees = kcalloc(mlxsw_sp->router->lpm.tree_count,
693 					     sizeof(struct mlxsw_sp_lpm_tree),
694 					     GFP_KERNEL);
695 	if (!mlxsw_sp->router->lpm.trees)
696 		return -ENOMEM;
697 
698 	for (i = 0; i < mlxsw_sp->router->lpm.tree_count; i++) {
699 		lpm_tree = &mlxsw_sp->router->lpm.trees[i];
700 		lpm_tree->id = i + MLXSW_SP_LPM_TREE_MIN;
701 	}
702 
703 	lpm_tree = mlxsw_sp_lpm_tree_get(mlxsw_sp, &req_prefix_usage,
704 					 MLXSW_SP_L3_PROTO_IPV4);
705 	if (IS_ERR(lpm_tree)) {
706 		err = PTR_ERR(lpm_tree);
707 		goto err_ipv4_tree_get;
708 	}
709 	mlxsw_sp->router->lpm.proto_trees[MLXSW_SP_L3_PROTO_IPV4] = lpm_tree;
710 
711 	lpm_tree = mlxsw_sp_lpm_tree_get(mlxsw_sp, &req_prefix_usage,
712 					 MLXSW_SP_L3_PROTO_IPV6);
713 	if (IS_ERR(lpm_tree)) {
714 		err = PTR_ERR(lpm_tree);
715 		goto err_ipv6_tree_get;
716 	}
717 	mlxsw_sp->router->lpm.proto_trees[MLXSW_SP_L3_PROTO_IPV6] = lpm_tree;
718 
719 	return 0;
720 
721 err_ipv6_tree_get:
722 	lpm_tree = mlxsw_sp->router->lpm.proto_trees[MLXSW_SP_L3_PROTO_IPV4];
723 	mlxsw_sp_lpm_tree_put(mlxsw_sp, lpm_tree);
724 err_ipv4_tree_get:
725 	kfree(mlxsw_sp->router->lpm.trees);
726 	return err;
727 }
728 
729 static void mlxsw_sp_lpm_fini(struct mlxsw_sp *mlxsw_sp)
730 {
731 	struct mlxsw_sp_lpm_tree *lpm_tree;
732 
733 	lpm_tree = mlxsw_sp->router->lpm.proto_trees[MLXSW_SP_L3_PROTO_IPV6];
734 	mlxsw_sp_lpm_tree_put(mlxsw_sp, lpm_tree);
735 
736 	lpm_tree = mlxsw_sp->router->lpm.proto_trees[MLXSW_SP_L3_PROTO_IPV4];
737 	mlxsw_sp_lpm_tree_put(mlxsw_sp, lpm_tree);
738 
739 	kfree(mlxsw_sp->router->lpm.trees);
740 }
741 
742 static bool mlxsw_sp_vr_is_used(const struct mlxsw_sp_vr *vr)
743 {
744 	return !!vr->fib4 || !!vr->fib6 ||
745 	       !!vr->mr_table[MLXSW_SP_L3_PROTO_IPV4] ||
746 	       !!vr->mr_table[MLXSW_SP_L3_PROTO_IPV6];
747 }
748 
749 static struct mlxsw_sp_vr *mlxsw_sp_vr_find_unused(struct mlxsw_sp *mlxsw_sp)
750 {
751 	int max_vrs = MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_VRS);
752 	struct mlxsw_sp_vr *vr;
753 	int i;
754 
755 	for (i = 0; i < max_vrs; i++) {
756 		vr = &mlxsw_sp->router->vrs[i];
757 		if (!mlxsw_sp_vr_is_used(vr))
758 			return vr;
759 	}
760 	return NULL;
761 }
762 
763 static int mlxsw_sp_vr_lpm_tree_bind(struct mlxsw_sp *mlxsw_sp,
764 				     const struct mlxsw_sp_fib *fib, u8 tree_id)
765 {
766 	char raltb_pl[MLXSW_REG_RALTB_LEN];
767 
768 	mlxsw_reg_raltb_pack(raltb_pl, fib->vr->id,
769 			     (enum mlxsw_reg_ralxx_protocol) fib->proto,
770 			     tree_id);
771 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(raltb), raltb_pl);
772 }
773 
774 static int mlxsw_sp_vr_lpm_tree_unbind(struct mlxsw_sp *mlxsw_sp,
775 				       const struct mlxsw_sp_fib *fib)
776 {
777 	char raltb_pl[MLXSW_REG_RALTB_LEN];
778 
779 	/* Bind to tree 0 which is default */
780 	mlxsw_reg_raltb_pack(raltb_pl, fib->vr->id,
781 			     (enum mlxsw_reg_ralxx_protocol) fib->proto, 0);
782 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(raltb), raltb_pl);
783 }
784 
785 static u32 mlxsw_sp_fix_tb_id(u32 tb_id)
786 {
787 	/* For our purpose, squash main, default and local tables into one */
788 	if (tb_id == RT_TABLE_LOCAL || tb_id == RT_TABLE_DEFAULT)
789 		tb_id = RT_TABLE_MAIN;
790 	return tb_id;
791 }
792 
793 static struct mlxsw_sp_vr *mlxsw_sp_vr_find(struct mlxsw_sp *mlxsw_sp,
794 					    u32 tb_id)
795 {
796 	int max_vrs = MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_VRS);
797 	struct mlxsw_sp_vr *vr;
798 	int i;
799 
800 	tb_id = mlxsw_sp_fix_tb_id(tb_id);
801 
802 	for (i = 0; i < max_vrs; i++) {
803 		vr = &mlxsw_sp->router->vrs[i];
804 		if (mlxsw_sp_vr_is_used(vr) && vr->tb_id == tb_id)
805 			return vr;
806 	}
807 	return NULL;
808 }
809 
810 int mlxsw_sp_router_tb_id_vr_id(struct mlxsw_sp *mlxsw_sp, u32 tb_id,
811 				u16 *vr_id)
812 {
813 	struct mlxsw_sp_vr *vr;
814 	int err = 0;
815 
816 	mutex_lock(&mlxsw_sp->router->lock);
817 	vr = mlxsw_sp_vr_find(mlxsw_sp, tb_id);
818 	if (!vr) {
819 		err = -ESRCH;
820 		goto out;
821 	}
822 	*vr_id = vr->id;
823 out:
824 	mutex_unlock(&mlxsw_sp->router->lock);
825 	return err;
826 }
827 
828 static struct mlxsw_sp_fib *mlxsw_sp_vr_fib(const struct mlxsw_sp_vr *vr,
829 					    enum mlxsw_sp_l3proto proto)
830 {
831 	switch (proto) {
832 	case MLXSW_SP_L3_PROTO_IPV4:
833 		return vr->fib4;
834 	case MLXSW_SP_L3_PROTO_IPV6:
835 		return vr->fib6;
836 	}
837 	return NULL;
838 }
839 
840 static struct mlxsw_sp_vr *mlxsw_sp_vr_create(struct mlxsw_sp *mlxsw_sp,
841 					      u32 tb_id,
842 					      struct netlink_ext_ack *extack)
843 {
844 	struct mlxsw_sp_mr_table *mr4_table, *mr6_table;
845 	struct mlxsw_sp_fib *fib4;
846 	struct mlxsw_sp_fib *fib6;
847 	struct mlxsw_sp_vr *vr;
848 	int err;
849 
850 	vr = mlxsw_sp_vr_find_unused(mlxsw_sp);
851 	if (!vr) {
852 		NL_SET_ERR_MSG_MOD(extack, "Exceeded number of supported virtual routers");
853 		return ERR_PTR(-EBUSY);
854 	}
855 	fib4 = mlxsw_sp_fib_create(mlxsw_sp, vr, MLXSW_SP_L3_PROTO_IPV4);
856 	if (IS_ERR(fib4))
857 		return ERR_CAST(fib4);
858 	fib6 = mlxsw_sp_fib_create(mlxsw_sp, vr, MLXSW_SP_L3_PROTO_IPV6);
859 	if (IS_ERR(fib6)) {
860 		err = PTR_ERR(fib6);
861 		goto err_fib6_create;
862 	}
863 	mr4_table = mlxsw_sp_mr_table_create(mlxsw_sp, vr->id,
864 					     MLXSW_SP_L3_PROTO_IPV4);
865 	if (IS_ERR(mr4_table)) {
866 		err = PTR_ERR(mr4_table);
867 		goto err_mr4_table_create;
868 	}
869 	mr6_table = mlxsw_sp_mr_table_create(mlxsw_sp, vr->id,
870 					     MLXSW_SP_L3_PROTO_IPV6);
871 	if (IS_ERR(mr6_table)) {
872 		err = PTR_ERR(mr6_table);
873 		goto err_mr6_table_create;
874 	}
875 
876 	vr->fib4 = fib4;
877 	vr->fib6 = fib6;
878 	vr->mr_table[MLXSW_SP_L3_PROTO_IPV4] = mr4_table;
879 	vr->mr_table[MLXSW_SP_L3_PROTO_IPV6] = mr6_table;
880 	vr->tb_id = tb_id;
881 	return vr;
882 
883 err_mr6_table_create:
884 	mlxsw_sp_mr_table_destroy(mr4_table);
885 err_mr4_table_create:
886 	mlxsw_sp_fib_destroy(mlxsw_sp, fib6);
887 err_fib6_create:
888 	mlxsw_sp_fib_destroy(mlxsw_sp, fib4);
889 	return ERR_PTR(err);
890 }
891 
892 static void mlxsw_sp_vr_destroy(struct mlxsw_sp *mlxsw_sp,
893 				struct mlxsw_sp_vr *vr)
894 {
895 	mlxsw_sp_mr_table_destroy(vr->mr_table[MLXSW_SP_L3_PROTO_IPV6]);
896 	vr->mr_table[MLXSW_SP_L3_PROTO_IPV6] = NULL;
897 	mlxsw_sp_mr_table_destroy(vr->mr_table[MLXSW_SP_L3_PROTO_IPV4]);
898 	vr->mr_table[MLXSW_SP_L3_PROTO_IPV4] = NULL;
899 	mlxsw_sp_fib_destroy(mlxsw_sp, vr->fib6);
900 	vr->fib6 = NULL;
901 	mlxsw_sp_fib_destroy(mlxsw_sp, vr->fib4);
902 	vr->fib4 = NULL;
903 }
904 
905 static struct mlxsw_sp_vr *mlxsw_sp_vr_get(struct mlxsw_sp *mlxsw_sp, u32 tb_id,
906 					   struct netlink_ext_ack *extack)
907 {
908 	struct mlxsw_sp_vr *vr;
909 
910 	tb_id = mlxsw_sp_fix_tb_id(tb_id);
911 	vr = mlxsw_sp_vr_find(mlxsw_sp, tb_id);
912 	if (!vr)
913 		vr = mlxsw_sp_vr_create(mlxsw_sp, tb_id, extack);
914 	return vr;
915 }
916 
917 static void mlxsw_sp_vr_put(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_vr *vr)
918 {
919 	if (!vr->rif_count && list_empty(&vr->fib4->node_list) &&
920 	    list_empty(&vr->fib6->node_list) &&
921 	    mlxsw_sp_mr_table_empty(vr->mr_table[MLXSW_SP_L3_PROTO_IPV4]) &&
922 	    mlxsw_sp_mr_table_empty(vr->mr_table[MLXSW_SP_L3_PROTO_IPV6]))
923 		mlxsw_sp_vr_destroy(mlxsw_sp, vr);
924 }
925 
926 static bool
927 mlxsw_sp_vr_lpm_tree_should_replace(struct mlxsw_sp_vr *vr,
928 				    enum mlxsw_sp_l3proto proto, u8 tree_id)
929 {
930 	struct mlxsw_sp_fib *fib = mlxsw_sp_vr_fib(vr, proto);
931 
932 	if (!mlxsw_sp_vr_is_used(vr))
933 		return false;
934 	if (fib->lpm_tree->id == tree_id)
935 		return true;
936 	return false;
937 }
938 
939 static int mlxsw_sp_vr_lpm_tree_replace(struct mlxsw_sp *mlxsw_sp,
940 					struct mlxsw_sp_fib *fib,
941 					struct mlxsw_sp_lpm_tree *new_tree)
942 {
943 	struct mlxsw_sp_lpm_tree *old_tree = fib->lpm_tree;
944 	int err;
945 
946 	fib->lpm_tree = new_tree;
947 	mlxsw_sp_lpm_tree_hold(new_tree);
948 	err = mlxsw_sp_vr_lpm_tree_bind(mlxsw_sp, fib, new_tree->id);
949 	if (err)
950 		goto err_tree_bind;
951 	mlxsw_sp_lpm_tree_put(mlxsw_sp, old_tree);
952 	return 0;
953 
954 err_tree_bind:
955 	mlxsw_sp_lpm_tree_put(mlxsw_sp, new_tree);
956 	fib->lpm_tree = old_tree;
957 	return err;
958 }
959 
960 static int mlxsw_sp_vrs_lpm_tree_replace(struct mlxsw_sp *mlxsw_sp,
961 					 struct mlxsw_sp_fib *fib,
962 					 struct mlxsw_sp_lpm_tree *new_tree)
963 {
964 	int max_vrs = MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_VRS);
965 	enum mlxsw_sp_l3proto proto = fib->proto;
966 	struct mlxsw_sp_lpm_tree *old_tree;
967 	u8 old_id, new_id = new_tree->id;
968 	struct mlxsw_sp_vr *vr;
969 	int i, err;
970 
971 	old_tree = mlxsw_sp->router->lpm.proto_trees[proto];
972 	old_id = old_tree->id;
973 
974 	for (i = 0; i < max_vrs; i++) {
975 		vr = &mlxsw_sp->router->vrs[i];
976 		if (!mlxsw_sp_vr_lpm_tree_should_replace(vr, proto, old_id))
977 			continue;
978 		err = mlxsw_sp_vr_lpm_tree_replace(mlxsw_sp,
979 						   mlxsw_sp_vr_fib(vr, proto),
980 						   new_tree);
981 		if (err)
982 			goto err_tree_replace;
983 	}
984 
985 	memcpy(new_tree->prefix_ref_count, old_tree->prefix_ref_count,
986 	       sizeof(new_tree->prefix_ref_count));
987 	mlxsw_sp->router->lpm.proto_trees[proto] = new_tree;
988 	mlxsw_sp_lpm_tree_put(mlxsw_sp, old_tree);
989 
990 	return 0;
991 
992 err_tree_replace:
993 	for (i--; i >= 0; i--) {
994 		if (!mlxsw_sp_vr_lpm_tree_should_replace(vr, proto, new_id))
995 			continue;
996 		mlxsw_sp_vr_lpm_tree_replace(mlxsw_sp,
997 					     mlxsw_sp_vr_fib(vr, proto),
998 					     old_tree);
999 	}
1000 	return err;
1001 }
1002 
1003 static int mlxsw_sp_vrs_init(struct mlxsw_sp *mlxsw_sp)
1004 {
1005 	struct mlxsw_sp_vr *vr;
1006 	u64 max_vrs;
1007 	int i;
1008 
1009 	if (!MLXSW_CORE_RES_VALID(mlxsw_sp->core, MAX_VRS))
1010 		return -EIO;
1011 
1012 	max_vrs = MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_VRS);
1013 	mlxsw_sp->router->vrs = kcalloc(max_vrs, sizeof(struct mlxsw_sp_vr),
1014 					GFP_KERNEL);
1015 	if (!mlxsw_sp->router->vrs)
1016 		return -ENOMEM;
1017 
1018 	for (i = 0; i < max_vrs; i++) {
1019 		vr = &mlxsw_sp->router->vrs[i];
1020 		vr->id = i;
1021 	}
1022 
1023 	return 0;
1024 }
1025 
1026 static void mlxsw_sp_router_fib_flush(struct mlxsw_sp *mlxsw_sp);
1027 
1028 static void mlxsw_sp_vrs_fini(struct mlxsw_sp *mlxsw_sp)
1029 {
1030 	/* At this stage we're guaranteed not to have new incoming
1031 	 * FIB notifications and the work queue is free from FIBs
1032 	 * sitting on top of mlxsw netdevs. However, we can still
1033 	 * have other FIBs queued. Flush the queue before flushing
1034 	 * the device's tables. No need for locks, as we're the only
1035 	 * writer.
1036 	 */
1037 	mlxsw_core_flush_owq();
1038 	mlxsw_sp_router_fib_flush(mlxsw_sp);
1039 	kfree(mlxsw_sp->router->vrs);
1040 }
1041 
1042 u32 mlxsw_sp_ipip_dev_ul_tb_id(const struct net_device *ol_dev)
1043 {
1044 	struct net_device *d;
1045 	u32 tb_id;
1046 
1047 	rcu_read_lock();
1048 	d = mlxsw_sp_ipip_netdev_ul_dev_get(ol_dev);
1049 	if (d)
1050 		tb_id = l3mdev_fib_table(d) ? : RT_TABLE_MAIN;
1051 	else
1052 		tb_id = RT_TABLE_MAIN;
1053 	rcu_read_unlock();
1054 
1055 	return tb_id;
1056 }
1057 
1058 static struct mlxsw_sp_rif *
1059 mlxsw_sp_rif_create(struct mlxsw_sp *mlxsw_sp,
1060 		    const struct mlxsw_sp_rif_params *params,
1061 		    struct netlink_ext_ack *extack);
1062 
1063 static struct mlxsw_sp_rif_ipip_lb *
1064 mlxsw_sp_ipip_ol_ipip_lb_create(struct mlxsw_sp *mlxsw_sp,
1065 				enum mlxsw_sp_ipip_type ipipt,
1066 				struct net_device *ol_dev,
1067 				struct netlink_ext_ack *extack)
1068 {
1069 	struct mlxsw_sp_rif_params_ipip_lb lb_params;
1070 	const struct mlxsw_sp_ipip_ops *ipip_ops;
1071 	struct mlxsw_sp_rif *rif;
1072 
1073 	ipip_ops = mlxsw_sp->router->ipip_ops_arr[ipipt];
1074 	lb_params = (struct mlxsw_sp_rif_params_ipip_lb) {
1075 		.common.dev = ol_dev,
1076 		.common.lag = false,
1077 		.common.double_entry = ipip_ops->double_rif_entry,
1078 		.lb_config = ipip_ops->ol_loopback_config(mlxsw_sp, ol_dev),
1079 	};
1080 
1081 	rif = mlxsw_sp_rif_create(mlxsw_sp, &lb_params.common, extack);
1082 	if (IS_ERR(rif))
1083 		return ERR_CAST(rif);
1084 	return container_of(rif, struct mlxsw_sp_rif_ipip_lb, common);
1085 }
1086 
1087 static struct mlxsw_sp_ipip_entry *
1088 mlxsw_sp_ipip_entry_alloc(struct mlxsw_sp *mlxsw_sp,
1089 			  enum mlxsw_sp_ipip_type ipipt,
1090 			  struct net_device *ol_dev)
1091 {
1092 	const struct mlxsw_sp_ipip_ops *ipip_ops;
1093 	struct mlxsw_sp_ipip_entry *ipip_entry;
1094 	struct mlxsw_sp_ipip_entry *ret = NULL;
1095 	int err;
1096 
1097 	ipip_ops = mlxsw_sp->router->ipip_ops_arr[ipipt];
1098 	ipip_entry = kzalloc(sizeof(*ipip_entry), GFP_KERNEL);
1099 	if (!ipip_entry)
1100 		return ERR_PTR(-ENOMEM);
1101 
1102 	ipip_entry->ol_lb = mlxsw_sp_ipip_ol_ipip_lb_create(mlxsw_sp, ipipt,
1103 							    ol_dev, NULL);
1104 	if (IS_ERR(ipip_entry->ol_lb)) {
1105 		ret = ERR_CAST(ipip_entry->ol_lb);
1106 		goto err_ol_ipip_lb_create;
1107 	}
1108 
1109 	ipip_entry->ipipt = ipipt;
1110 	ipip_entry->ol_dev = ol_dev;
1111 	ipip_entry->parms = ipip_ops->parms_init(ol_dev);
1112 
1113 	err = ipip_ops->rem_ip_addr_set(mlxsw_sp, ipip_entry);
1114 	if (err) {
1115 		ret = ERR_PTR(err);
1116 		goto err_rem_ip_addr_set;
1117 	}
1118 
1119 	return ipip_entry;
1120 
1121 err_rem_ip_addr_set:
1122 	mlxsw_sp_rif_destroy(&ipip_entry->ol_lb->common);
1123 err_ol_ipip_lb_create:
1124 	kfree(ipip_entry);
1125 	return ret;
1126 }
1127 
1128 static void mlxsw_sp_ipip_entry_dealloc(struct mlxsw_sp *mlxsw_sp,
1129 					struct mlxsw_sp_ipip_entry *ipip_entry)
1130 {
1131 	const struct mlxsw_sp_ipip_ops *ipip_ops =
1132 		mlxsw_sp->router->ipip_ops_arr[ipip_entry->ipipt];
1133 
1134 	ipip_ops->rem_ip_addr_unset(mlxsw_sp, ipip_entry);
1135 	mlxsw_sp_rif_destroy(&ipip_entry->ol_lb->common);
1136 	kfree(ipip_entry);
1137 }
1138 
1139 static bool
1140 mlxsw_sp_ipip_entry_saddr_matches(struct mlxsw_sp *mlxsw_sp,
1141 				  const enum mlxsw_sp_l3proto ul_proto,
1142 				  union mlxsw_sp_l3addr saddr,
1143 				  u32 ul_tb_id,
1144 				  struct mlxsw_sp_ipip_entry *ipip_entry)
1145 {
1146 	u32 tun_ul_tb_id = mlxsw_sp_ipip_dev_ul_tb_id(ipip_entry->ol_dev);
1147 	enum mlxsw_sp_ipip_type ipipt = ipip_entry->ipipt;
1148 	union mlxsw_sp_l3addr tun_saddr;
1149 
1150 	if (mlxsw_sp->router->ipip_ops_arr[ipipt]->ul_proto != ul_proto)
1151 		return false;
1152 
1153 	tun_saddr = mlxsw_sp_ipip_netdev_saddr(ul_proto, ipip_entry->ol_dev);
1154 	return tun_ul_tb_id == ul_tb_id &&
1155 	       mlxsw_sp_l3addr_eq(&tun_saddr, &saddr);
1156 }
1157 
1158 static int mlxsw_sp_ipip_decap_parsing_depth_inc(struct mlxsw_sp *mlxsw_sp,
1159 						 enum mlxsw_sp_ipip_type ipipt)
1160 {
1161 	const struct mlxsw_sp_ipip_ops *ipip_ops;
1162 
1163 	ipip_ops = mlxsw_sp->router->ipip_ops_arr[ipipt];
1164 
1165 	/* Not all tunnels require to increase the default pasing depth
1166 	 * (96 bytes).
1167 	 */
1168 	if (ipip_ops->inc_parsing_depth)
1169 		return mlxsw_sp_parsing_depth_inc(mlxsw_sp);
1170 
1171 	return 0;
1172 }
1173 
1174 static void mlxsw_sp_ipip_decap_parsing_depth_dec(struct mlxsw_sp *mlxsw_sp,
1175 						  enum mlxsw_sp_ipip_type ipipt)
1176 {
1177 	const struct mlxsw_sp_ipip_ops *ipip_ops =
1178 		mlxsw_sp->router->ipip_ops_arr[ipipt];
1179 
1180 	if (ipip_ops->inc_parsing_depth)
1181 		mlxsw_sp_parsing_depth_dec(mlxsw_sp);
1182 }
1183 
1184 static int
1185 mlxsw_sp_fib_entry_decap_init(struct mlxsw_sp *mlxsw_sp,
1186 			      struct mlxsw_sp_fib_entry *fib_entry,
1187 			      struct mlxsw_sp_ipip_entry *ipip_entry)
1188 {
1189 	u32 tunnel_index;
1190 	int err;
1191 
1192 	err = mlxsw_sp_kvdl_alloc(mlxsw_sp, MLXSW_SP_KVDL_ENTRY_TYPE_ADJ,
1193 				  1, &tunnel_index);
1194 	if (err)
1195 		return err;
1196 
1197 	err = mlxsw_sp_ipip_decap_parsing_depth_inc(mlxsw_sp,
1198 						    ipip_entry->ipipt);
1199 	if (err)
1200 		goto err_parsing_depth_inc;
1201 
1202 	ipip_entry->decap_fib_entry = fib_entry;
1203 	fib_entry->decap.ipip_entry = ipip_entry;
1204 	fib_entry->decap.tunnel_index = tunnel_index;
1205 
1206 	return 0;
1207 
1208 err_parsing_depth_inc:
1209 	mlxsw_sp_kvdl_free(mlxsw_sp, MLXSW_SP_KVDL_ENTRY_TYPE_ADJ, 1,
1210 			   fib_entry->decap.tunnel_index);
1211 	return err;
1212 }
1213 
1214 static void mlxsw_sp_fib_entry_decap_fini(struct mlxsw_sp *mlxsw_sp,
1215 					  struct mlxsw_sp_fib_entry *fib_entry)
1216 {
1217 	enum mlxsw_sp_ipip_type ipipt = fib_entry->decap.ipip_entry->ipipt;
1218 
1219 	/* Unlink this node from the IPIP entry that it's the decap entry of. */
1220 	fib_entry->decap.ipip_entry->decap_fib_entry = NULL;
1221 	fib_entry->decap.ipip_entry = NULL;
1222 	mlxsw_sp_ipip_decap_parsing_depth_dec(mlxsw_sp, ipipt);
1223 	mlxsw_sp_kvdl_free(mlxsw_sp, MLXSW_SP_KVDL_ENTRY_TYPE_ADJ,
1224 			   1, fib_entry->decap.tunnel_index);
1225 }
1226 
1227 static struct mlxsw_sp_fib_node *
1228 mlxsw_sp_fib_node_lookup(struct mlxsw_sp_fib *fib, const void *addr,
1229 			 size_t addr_len, unsigned char prefix_len);
1230 static int mlxsw_sp_fib_entry_update(struct mlxsw_sp *mlxsw_sp,
1231 				     struct mlxsw_sp_fib_entry *fib_entry);
1232 
1233 static void
1234 mlxsw_sp_ipip_entry_demote_decap(struct mlxsw_sp *mlxsw_sp,
1235 				 struct mlxsw_sp_ipip_entry *ipip_entry)
1236 {
1237 	struct mlxsw_sp_fib_entry *fib_entry = ipip_entry->decap_fib_entry;
1238 
1239 	mlxsw_sp_fib_entry_decap_fini(mlxsw_sp, fib_entry);
1240 	fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_TRAP;
1241 
1242 	mlxsw_sp_fib_entry_update(mlxsw_sp, fib_entry);
1243 }
1244 
1245 static void
1246 mlxsw_sp_ipip_entry_promote_decap(struct mlxsw_sp *mlxsw_sp,
1247 				  struct mlxsw_sp_ipip_entry *ipip_entry,
1248 				  struct mlxsw_sp_fib_entry *decap_fib_entry)
1249 {
1250 	if (mlxsw_sp_fib_entry_decap_init(mlxsw_sp, decap_fib_entry,
1251 					  ipip_entry))
1252 		return;
1253 	decap_fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_IPIP_DECAP;
1254 
1255 	if (mlxsw_sp_fib_entry_update(mlxsw_sp, decap_fib_entry))
1256 		mlxsw_sp_ipip_entry_demote_decap(mlxsw_sp, ipip_entry);
1257 }
1258 
1259 static struct mlxsw_sp_fib_entry *
1260 mlxsw_sp_router_ip2me_fib_entry_find(struct mlxsw_sp *mlxsw_sp, u32 tb_id,
1261 				     enum mlxsw_sp_l3proto proto,
1262 				     const union mlxsw_sp_l3addr *addr,
1263 				     enum mlxsw_sp_fib_entry_type type)
1264 {
1265 	struct mlxsw_sp_fib_node *fib_node;
1266 	unsigned char addr_prefix_len;
1267 	struct mlxsw_sp_fib *fib;
1268 	struct mlxsw_sp_vr *vr;
1269 	const void *addrp;
1270 	size_t addr_len;
1271 	u32 addr4;
1272 
1273 	vr = mlxsw_sp_vr_find(mlxsw_sp, tb_id);
1274 	if (!vr)
1275 		return NULL;
1276 	fib = mlxsw_sp_vr_fib(vr, proto);
1277 
1278 	switch (proto) {
1279 	case MLXSW_SP_L3_PROTO_IPV4:
1280 		addr4 = be32_to_cpu(addr->addr4);
1281 		addrp = &addr4;
1282 		addr_len = 4;
1283 		addr_prefix_len = 32;
1284 		break;
1285 	case MLXSW_SP_L3_PROTO_IPV6:
1286 		addrp = &addr->addr6;
1287 		addr_len = 16;
1288 		addr_prefix_len = 128;
1289 		break;
1290 	default:
1291 		WARN_ON(1);
1292 		return NULL;
1293 	}
1294 
1295 	fib_node = mlxsw_sp_fib_node_lookup(fib, addrp, addr_len,
1296 					    addr_prefix_len);
1297 	if (!fib_node || fib_node->fib_entry->type != type)
1298 		return NULL;
1299 
1300 	return fib_node->fib_entry;
1301 }
1302 
1303 /* Given an IPIP entry, find the corresponding decap route. */
1304 static struct mlxsw_sp_fib_entry *
1305 mlxsw_sp_ipip_entry_find_decap(struct mlxsw_sp *mlxsw_sp,
1306 			       struct mlxsw_sp_ipip_entry *ipip_entry)
1307 {
1308 	static struct mlxsw_sp_fib_node *fib_node;
1309 	const struct mlxsw_sp_ipip_ops *ipip_ops;
1310 	unsigned char saddr_prefix_len;
1311 	union mlxsw_sp_l3addr saddr;
1312 	struct mlxsw_sp_fib *ul_fib;
1313 	struct mlxsw_sp_vr *ul_vr;
1314 	const void *saddrp;
1315 	size_t saddr_len;
1316 	u32 ul_tb_id;
1317 	u32 saddr4;
1318 
1319 	ipip_ops = mlxsw_sp->router->ipip_ops_arr[ipip_entry->ipipt];
1320 
1321 	ul_tb_id = mlxsw_sp_ipip_dev_ul_tb_id(ipip_entry->ol_dev);
1322 	ul_vr = mlxsw_sp_vr_find(mlxsw_sp, ul_tb_id);
1323 	if (!ul_vr)
1324 		return NULL;
1325 
1326 	ul_fib = mlxsw_sp_vr_fib(ul_vr, ipip_ops->ul_proto);
1327 	saddr = mlxsw_sp_ipip_netdev_saddr(ipip_ops->ul_proto,
1328 					   ipip_entry->ol_dev);
1329 
1330 	switch (ipip_ops->ul_proto) {
1331 	case MLXSW_SP_L3_PROTO_IPV4:
1332 		saddr4 = be32_to_cpu(saddr.addr4);
1333 		saddrp = &saddr4;
1334 		saddr_len = 4;
1335 		saddr_prefix_len = 32;
1336 		break;
1337 	case MLXSW_SP_L3_PROTO_IPV6:
1338 		saddrp = &saddr.addr6;
1339 		saddr_len = 16;
1340 		saddr_prefix_len = 128;
1341 		break;
1342 	default:
1343 		WARN_ON(1);
1344 		return NULL;
1345 	}
1346 
1347 	fib_node = mlxsw_sp_fib_node_lookup(ul_fib, saddrp, saddr_len,
1348 					    saddr_prefix_len);
1349 	if (!fib_node ||
1350 	    fib_node->fib_entry->type != MLXSW_SP_FIB_ENTRY_TYPE_TRAP)
1351 		return NULL;
1352 
1353 	return fib_node->fib_entry;
1354 }
1355 
1356 static struct mlxsw_sp_ipip_entry *
1357 mlxsw_sp_ipip_entry_create(struct mlxsw_sp *mlxsw_sp,
1358 			   enum mlxsw_sp_ipip_type ipipt,
1359 			   struct net_device *ol_dev)
1360 {
1361 	struct mlxsw_sp_ipip_entry *ipip_entry;
1362 
1363 	ipip_entry = mlxsw_sp_ipip_entry_alloc(mlxsw_sp, ipipt, ol_dev);
1364 	if (IS_ERR(ipip_entry))
1365 		return ipip_entry;
1366 
1367 	list_add_tail(&ipip_entry->ipip_list_node,
1368 		      &mlxsw_sp->router->ipip_list);
1369 
1370 	return ipip_entry;
1371 }
1372 
1373 static void
1374 mlxsw_sp_ipip_entry_destroy(struct mlxsw_sp *mlxsw_sp,
1375 			    struct mlxsw_sp_ipip_entry *ipip_entry)
1376 {
1377 	list_del(&ipip_entry->ipip_list_node);
1378 	mlxsw_sp_ipip_entry_dealloc(mlxsw_sp, ipip_entry);
1379 }
1380 
1381 static bool
1382 mlxsw_sp_ipip_entry_matches_decap(struct mlxsw_sp *mlxsw_sp,
1383 				  const struct net_device *ul_dev,
1384 				  enum mlxsw_sp_l3proto ul_proto,
1385 				  union mlxsw_sp_l3addr ul_dip,
1386 				  struct mlxsw_sp_ipip_entry *ipip_entry)
1387 {
1388 	u32 ul_tb_id = l3mdev_fib_table(ul_dev) ? : RT_TABLE_MAIN;
1389 	enum mlxsw_sp_ipip_type ipipt = ipip_entry->ipipt;
1390 
1391 	if (mlxsw_sp->router->ipip_ops_arr[ipipt]->ul_proto != ul_proto)
1392 		return false;
1393 
1394 	return mlxsw_sp_ipip_entry_saddr_matches(mlxsw_sp, ul_proto, ul_dip,
1395 						 ul_tb_id, ipip_entry);
1396 }
1397 
1398 /* Given decap parameters, find the corresponding IPIP entry. */
1399 static struct mlxsw_sp_ipip_entry *
1400 mlxsw_sp_ipip_entry_find_by_decap(struct mlxsw_sp *mlxsw_sp, int ul_dev_ifindex,
1401 				  enum mlxsw_sp_l3proto ul_proto,
1402 				  union mlxsw_sp_l3addr ul_dip)
1403 {
1404 	struct mlxsw_sp_ipip_entry *ipip_entry = NULL;
1405 	struct net_device *ul_dev;
1406 
1407 	rcu_read_lock();
1408 
1409 	ul_dev = dev_get_by_index_rcu(mlxsw_sp_net(mlxsw_sp), ul_dev_ifindex);
1410 	if (!ul_dev)
1411 		goto out_unlock;
1412 
1413 	list_for_each_entry(ipip_entry, &mlxsw_sp->router->ipip_list,
1414 			    ipip_list_node)
1415 		if (mlxsw_sp_ipip_entry_matches_decap(mlxsw_sp, ul_dev,
1416 						      ul_proto, ul_dip,
1417 						      ipip_entry))
1418 			goto out_unlock;
1419 
1420 	rcu_read_unlock();
1421 
1422 	return NULL;
1423 
1424 out_unlock:
1425 	rcu_read_unlock();
1426 	return ipip_entry;
1427 }
1428 
1429 static bool mlxsw_sp_netdev_ipip_type(const struct mlxsw_sp *mlxsw_sp,
1430 				      const struct net_device *dev,
1431 				      enum mlxsw_sp_ipip_type *p_type)
1432 {
1433 	struct mlxsw_sp_router *router = mlxsw_sp->router;
1434 	const struct mlxsw_sp_ipip_ops *ipip_ops;
1435 	enum mlxsw_sp_ipip_type ipipt;
1436 
1437 	for (ipipt = 0; ipipt < MLXSW_SP_IPIP_TYPE_MAX; ++ipipt) {
1438 		ipip_ops = router->ipip_ops_arr[ipipt];
1439 		if (dev->type == ipip_ops->dev_type) {
1440 			if (p_type)
1441 				*p_type = ipipt;
1442 			return true;
1443 		}
1444 	}
1445 	return false;
1446 }
1447 
1448 static bool mlxsw_sp_netdev_is_ipip_ol(const struct mlxsw_sp *mlxsw_sp,
1449 				       const struct net_device *dev)
1450 {
1451 	return mlxsw_sp_netdev_ipip_type(mlxsw_sp, dev, NULL);
1452 }
1453 
1454 static struct mlxsw_sp_ipip_entry *
1455 mlxsw_sp_ipip_entry_find_by_ol_dev(struct mlxsw_sp *mlxsw_sp,
1456 				   const struct net_device *ol_dev)
1457 {
1458 	struct mlxsw_sp_ipip_entry *ipip_entry;
1459 
1460 	list_for_each_entry(ipip_entry, &mlxsw_sp->router->ipip_list,
1461 			    ipip_list_node)
1462 		if (ipip_entry->ol_dev == ol_dev)
1463 			return ipip_entry;
1464 
1465 	return NULL;
1466 }
1467 
1468 static struct mlxsw_sp_ipip_entry *
1469 mlxsw_sp_ipip_entry_find_by_ul_dev(const struct mlxsw_sp *mlxsw_sp,
1470 				   const struct net_device *ul_dev,
1471 				   struct mlxsw_sp_ipip_entry *start)
1472 {
1473 	struct mlxsw_sp_ipip_entry *ipip_entry;
1474 
1475 	ipip_entry = list_prepare_entry(start, &mlxsw_sp->router->ipip_list,
1476 					ipip_list_node);
1477 	list_for_each_entry_continue(ipip_entry, &mlxsw_sp->router->ipip_list,
1478 				     ipip_list_node) {
1479 		struct net_device *ol_dev = ipip_entry->ol_dev;
1480 		struct net_device *ipip_ul_dev;
1481 
1482 		rcu_read_lock();
1483 		ipip_ul_dev = mlxsw_sp_ipip_netdev_ul_dev_get(ol_dev);
1484 		rcu_read_unlock();
1485 
1486 		if (ipip_ul_dev == ul_dev)
1487 			return ipip_entry;
1488 	}
1489 
1490 	return NULL;
1491 }
1492 
1493 static bool mlxsw_sp_netdev_is_ipip_ul(struct mlxsw_sp *mlxsw_sp,
1494 				       const struct net_device *dev)
1495 {
1496 	return mlxsw_sp_ipip_entry_find_by_ul_dev(mlxsw_sp, dev, NULL);
1497 }
1498 
1499 static bool mlxsw_sp_netdevice_ipip_can_offload(struct mlxsw_sp *mlxsw_sp,
1500 						const struct net_device *ol_dev,
1501 						enum mlxsw_sp_ipip_type ipipt)
1502 {
1503 	const struct mlxsw_sp_ipip_ops *ops
1504 		= mlxsw_sp->router->ipip_ops_arr[ipipt];
1505 
1506 	return ops->can_offload(mlxsw_sp, ol_dev);
1507 }
1508 
1509 static int mlxsw_sp_netdevice_ipip_ol_reg_event(struct mlxsw_sp *mlxsw_sp,
1510 						struct net_device *ol_dev)
1511 {
1512 	enum mlxsw_sp_ipip_type ipipt = MLXSW_SP_IPIP_TYPE_MAX;
1513 	struct mlxsw_sp_ipip_entry *ipip_entry;
1514 	enum mlxsw_sp_l3proto ul_proto;
1515 	union mlxsw_sp_l3addr saddr;
1516 	u32 ul_tb_id;
1517 
1518 	mlxsw_sp_netdev_ipip_type(mlxsw_sp, ol_dev, &ipipt);
1519 	if (mlxsw_sp_netdevice_ipip_can_offload(mlxsw_sp, ol_dev, ipipt)) {
1520 		ul_tb_id = mlxsw_sp_ipip_dev_ul_tb_id(ol_dev);
1521 		ul_proto = mlxsw_sp->router->ipip_ops_arr[ipipt]->ul_proto;
1522 		saddr = mlxsw_sp_ipip_netdev_saddr(ul_proto, ol_dev);
1523 		if (!mlxsw_sp_ipip_demote_tunnel_by_saddr(mlxsw_sp, ul_proto,
1524 							  saddr, ul_tb_id,
1525 							  NULL)) {
1526 			ipip_entry = mlxsw_sp_ipip_entry_create(mlxsw_sp, ipipt,
1527 								ol_dev);
1528 			if (IS_ERR(ipip_entry))
1529 				return PTR_ERR(ipip_entry);
1530 		}
1531 	}
1532 
1533 	return 0;
1534 }
1535 
1536 static void mlxsw_sp_netdevice_ipip_ol_unreg_event(struct mlxsw_sp *mlxsw_sp,
1537 						   struct net_device *ol_dev)
1538 {
1539 	struct mlxsw_sp_ipip_entry *ipip_entry;
1540 
1541 	ipip_entry = mlxsw_sp_ipip_entry_find_by_ol_dev(mlxsw_sp, ol_dev);
1542 	if (ipip_entry)
1543 		mlxsw_sp_ipip_entry_destroy(mlxsw_sp, ipip_entry);
1544 }
1545 
1546 static void
1547 mlxsw_sp_ipip_entry_ol_up_event(struct mlxsw_sp *mlxsw_sp,
1548 				struct mlxsw_sp_ipip_entry *ipip_entry)
1549 {
1550 	struct mlxsw_sp_fib_entry *decap_fib_entry;
1551 
1552 	decap_fib_entry = mlxsw_sp_ipip_entry_find_decap(mlxsw_sp, ipip_entry);
1553 	if (decap_fib_entry)
1554 		mlxsw_sp_ipip_entry_promote_decap(mlxsw_sp, ipip_entry,
1555 						  decap_fib_entry);
1556 }
1557 
1558 static int
1559 mlxsw_sp_rif_ipip_lb_op(struct mlxsw_sp_rif_ipip_lb *lb_rif, u16 ul_vr_id,
1560 			u16 ul_rif_id, bool enable)
1561 {
1562 	struct mlxsw_sp_rif_ipip_lb_config lb_cf = lb_rif->lb_config;
1563 	enum mlxsw_reg_ritr_loopback_ipip_options ipip_options;
1564 	struct mlxsw_sp_rif *rif = &lb_rif->common;
1565 	struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
1566 	char ritr_pl[MLXSW_REG_RITR_LEN];
1567 	struct in6_addr *saddr6;
1568 	u32 saddr4;
1569 
1570 	ipip_options = MLXSW_REG_RITR_LOOPBACK_IPIP_OPTIONS_GRE_KEY_PRESET;
1571 	switch (lb_cf.ul_protocol) {
1572 	case MLXSW_SP_L3_PROTO_IPV4:
1573 		saddr4 = be32_to_cpu(lb_cf.saddr.addr4);
1574 		mlxsw_reg_ritr_pack(ritr_pl, enable, MLXSW_REG_RITR_LOOPBACK_IF,
1575 				    rif->rif_index, rif->vr_id, rif->dev->mtu);
1576 		mlxsw_reg_ritr_loopback_ipip4_pack(ritr_pl, lb_cf.lb_ipipt,
1577 						   ipip_options, ul_vr_id,
1578 						   ul_rif_id, saddr4,
1579 						   lb_cf.okey);
1580 		break;
1581 
1582 	case MLXSW_SP_L3_PROTO_IPV6:
1583 		saddr6 = &lb_cf.saddr.addr6;
1584 		mlxsw_reg_ritr_pack(ritr_pl, enable, MLXSW_REG_RITR_LOOPBACK_IF,
1585 				    rif->rif_index, rif->vr_id, rif->dev->mtu);
1586 		mlxsw_reg_ritr_loopback_ipip6_pack(ritr_pl, lb_cf.lb_ipipt,
1587 						   ipip_options, ul_vr_id,
1588 						   ul_rif_id, saddr6,
1589 						   lb_cf.okey);
1590 		break;
1591 	}
1592 
1593 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
1594 }
1595 
1596 static int mlxsw_sp_netdevice_ipip_ol_update_mtu(struct mlxsw_sp *mlxsw_sp,
1597 						 struct net_device *ol_dev)
1598 {
1599 	struct mlxsw_sp_ipip_entry *ipip_entry;
1600 	struct mlxsw_sp_rif_ipip_lb *lb_rif;
1601 	int err = 0;
1602 
1603 	ipip_entry = mlxsw_sp_ipip_entry_find_by_ol_dev(mlxsw_sp, ol_dev);
1604 	if (ipip_entry) {
1605 		lb_rif = ipip_entry->ol_lb;
1606 		err = mlxsw_sp_rif_ipip_lb_op(lb_rif, lb_rif->ul_vr_id,
1607 					      lb_rif->ul_rif_id, true);
1608 		if (err)
1609 			goto out;
1610 		lb_rif->common.mtu = ol_dev->mtu;
1611 	}
1612 
1613 out:
1614 	return err;
1615 }
1616 
1617 static void mlxsw_sp_netdevice_ipip_ol_up_event(struct mlxsw_sp *mlxsw_sp,
1618 						struct net_device *ol_dev)
1619 {
1620 	struct mlxsw_sp_ipip_entry *ipip_entry;
1621 
1622 	ipip_entry = mlxsw_sp_ipip_entry_find_by_ol_dev(mlxsw_sp, ol_dev);
1623 	if (ipip_entry)
1624 		mlxsw_sp_ipip_entry_ol_up_event(mlxsw_sp, ipip_entry);
1625 }
1626 
1627 static void
1628 mlxsw_sp_ipip_entry_ol_down_event(struct mlxsw_sp *mlxsw_sp,
1629 				  struct mlxsw_sp_ipip_entry *ipip_entry)
1630 {
1631 	if (ipip_entry->decap_fib_entry)
1632 		mlxsw_sp_ipip_entry_demote_decap(mlxsw_sp, ipip_entry);
1633 }
1634 
1635 static void mlxsw_sp_netdevice_ipip_ol_down_event(struct mlxsw_sp *mlxsw_sp,
1636 						  struct net_device *ol_dev)
1637 {
1638 	struct mlxsw_sp_ipip_entry *ipip_entry;
1639 
1640 	ipip_entry = mlxsw_sp_ipip_entry_find_by_ol_dev(mlxsw_sp, ol_dev);
1641 	if (ipip_entry)
1642 		mlxsw_sp_ipip_entry_ol_down_event(mlxsw_sp, ipip_entry);
1643 }
1644 
1645 static void mlxsw_sp_nexthop_rif_migrate(struct mlxsw_sp *mlxsw_sp,
1646 					 struct mlxsw_sp_rif *old_rif,
1647 					 struct mlxsw_sp_rif *new_rif);
1648 static int
1649 mlxsw_sp_ipip_entry_ol_lb_update(struct mlxsw_sp *mlxsw_sp,
1650 				 struct mlxsw_sp_ipip_entry *ipip_entry,
1651 				 bool keep_encap,
1652 				 struct netlink_ext_ack *extack)
1653 {
1654 	struct mlxsw_sp_rif_ipip_lb *old_lb_rif = ipip_entry->ol_lb;
1655 	struct mlxsw_sp_rif_ipip_lb *new_lb_rif;
1656 
1657 	new_lb_rif = mlxsw_sp_ipip_ol_ipip_lb_create(mlxsw_sp,
1658 						     ipip_entry->ipipt,
1659 						     ipip_entry->ol_dev,
1660 						     extack);
1661 	if (IS_ERR(new_lb_rif))
1662 		return PTR_ERR(new_lb_rif);
1663 	ipip_entry->ol_lb = new_lb_rif;
1664 
1665 	if (keep_encap)
1666 		mlxsw_sp_nexthop_rif_migrate(mlxsw_sp, &old_lb_rif->common,
1667 					     &new_lb_rif->common);
1668 
1669 	mlxsw_sp_rif_destroy(&old_lb_rif->common);
1670 
1671 	return 0;
1672 }
1673 
1674 static void mlxsw_sp_nexthop_rif_update(struct mlxsw_sp *mlxsw_sp,
1675 					struct mlxsw_sp_rif *rif);
1676 
1677 /**
1678  * __mlxsw_sp_ipip_entry_update_tunnel - Update offload related to IPIP entry.
1679  * @mlxsw_sp: mlxsw_sp.
1680  * @ipip_entry: IPIP entry.
1681  * @recreate_loopback: Recreates the associated loopback RIF.
1682  * @keep_encap: Updates next hops that use the tunnel netdevice. This is only
1683  *              relevant when recreate_loopback is true.
1684  * @update_nexthops: Updates next hops, keeping the current loopback RIF. This
1685  *                   is only relevant when recreate_loopback is false.
1686  * @extack: extack.
1687  *
1688  * Return: Non-zero value on failure.
1689  */
1690 int __mlxsw_sp_ipip_entry_update_tunnel(struct mlxsw_sp *mlxsw_sp,
1691 					struct mlxsw_sp_ipip_entry *ipip_entry,
1692 					bool recreate_loopback,
1693 					bool keep_encap,
1694 					bool update_nexthops,
1695 					struct netlink_ext_ack *extack)
1696 {
1697 	int err;
1698 
1699 	/* RIFs can't be edited, so to update loopback, we need to destroy and
1700 	 * recreate it. That creates a window of opportunity where RALUE and
1701 	 * RATR registers end up referencing a RIF that's already gone. RATRs
1702 	 * are handled in mlxsw_sp_ipip_entry_ol_lb_update(), and to take care
1703 	 * of RALUE, demote the decap route back.
1704 	 */
1705 	if (ipip_entry->decap_fib_entry)
1706 		mlxsw_sp_ipip_entry_demote_decap(mlxsw_sp, ipip_entry);
1707 
1708 	if (recreate_loopback) {
1709 		err = mlxsw_sp_ipip_entry_ol_lb_update(mlxsw_sp, ipip_entry,
1710 						       keep_encap, extack);
1711 		if (err)
1712 			return err;
1713 	} else if (update_nexthops) {
1714 		mlxsw_sp_nexthop_rif_update(mlxsw_sp,
1715 					    &ipip_entry->ol_lb->common);
1716 	}
1717 
1718 	if (ipip_entry->ol_dev->flags & IFF_UP)
1719 		mlxsw_sp_ipip_entry_ol_up_event(mlxsw_sp, ipip_entry);
1720 
1721 	return 0;
1722 }
1723 
1724 static int mlxsw_sp_netdevice_ipip_ol_vrf_event(struct mlxsw_sp *mlxsw_sp,
1725 						struct net_device *ol_dev,
1726 						struct netlink_ext_ack *extack)
1727 {
1728 	struct mlxsw_sp_ipip_entry *ipip_entry =
1729 		mlxsw_sp_ipip_entry_find_by_ol_dev(mlxsw_sp, ol_dev);
1730 
1731 	if (!ipip_entry)
1732 		return 0;
1733 
1734 	return __mlxsw_sp_ipip_entry_update_tunnel(mlxsw_sp, ipip_entry,
1735 						   true, false, false, extack);
1736 }
1737 
1738 static int
1739 mlxsw_sp_netdevice_ipip_ul_vrf_event(struct mlxsw_sp *mlxsw_sp,
1740 				     struct mlxsw_sp_ipip_entry *ipip_entry,
1741 				     struct net_device *ul_dev,
1742 				     bool *demote_this,
1743 				     struct netlink_ext_ack *extack)
1744 {
1745 	u32 ul_tb_id = l3mdev_fib_table(ul_dev) ? : RT_TABLE_MAIN;
1746 	enum mlxsw_sp_l3proto ul_proto;
1747 	union mlxsw_sp_l3addr saddr;
1748 
1749 	/* Moving underlay to a different VRF might cause local address
1750 	 * conflict, and the conflicting tunnels need to be demoted.
1751 	 */
1752 	ul_proto = mlxsw_sp->router->ipip_ops_arr[ipip_entry->ipipt]->ul_proto;
1753 	saddr = mlxsw_sp_ipip_netdev_saddr(ul_proto, ipip_entry->ol_dev);
1754 	if (mlxsw_sp_ipip_demote_tunnel_by_saddr(mlxsw_sp, ul_proto,
1755 						 saddr, ul_tb_id,
1756 						 ipip_entry)) {
1757 		*demote_this = true;
1758 		return 0;
1759 	}
1760 
1761 	return __mlxsw_sp_ipip_entry_update_tunnel(mlxsw_sp, ipip_entry,
1762 						   true, true, false, extack);
1763 }
1764 
1765 static int
1766 mlxsw_sp_netdevice_ipip_ul_up_event(struct mlxsw_sp *mlxsw_sp,
1767 				    struct mlxsw_sp_ipip_entry *ipip_entry,
1768 				    struct net_device *ul_dev)
1769 {
1770 	return __mlxsw_sp_ipip_entry_update_tunnel(mlxsw_sp, ipip_entry,
1771 						   false, false, true, NULL);
1772 }
1773 
1774 static int
1775 mlxsw_sp_netdevice_ipip_ul_down_event(struct mlxsw_sp *mlxsw_sp,
1776 				      struct mlxsw_sp_ipip_entry *ipip_entry,
1777 				      struct net_device *ul_dev)
1778 {
1779 	/* A down underlay device causes encapsulated packets to not be
1780 	 * forwarded, but decap still works. So refresh next hops without
1781 	 * touching anything else.
1782 	 */
1783 	return __mlxsw_sp_ipip_entry_update_tunnel(mlxsw_sp, ipip_entry,
1784 						   false, false, true, NULL);
1785 }
1786 
1787 static int
1788 mlxsw_sp_netdevice_ipip_ol_change_event(struct mlxsw_sp *mlxsw_sp,
1789 					struct net_device *ol_dev,
1790 					struct netlink_ext_ack *extack)
1791 {
1792 	const struct mlxsw_sp_ipip_ops *ipip_ops;
1793 	struct mlxsw_sp_ipip_entry *ipip_entry;
1794 	int err;
1795 
1796 	ipip_entry = mlxsw_sp_ipip_entry_find_by_ol_dev(mlxsw_sp, ol_dev);
1797 	if (!ipip_entry)
1798 		/* A change might make a tunnel eligible for offloading, but
1799 		 * that is currently not implemented. What falls to slow path
1800 		 * stays there.
1801 		 */
1802 		return 0;
1803 
1804 	/* A change might make a tunnel not eligible for offloading. */
1805 	if (!mlxsw_sp_netdevice_ipip_can_offload(mlxsw_sp, ol_dev,
1806 						 ipip_entry->ipipt)) {
1807 		mlxsw_sp_ipip_entry_demote_tunnel(mlxsw_sp, ipip_entry);
1808 		return 0;
1809 	}
1810 
1811 	ipip_ops = mlxsw_sp->router->ipip_ops_arr[ipip_entry->ipipt];
1812 	err = ipip_ops->ol_netdev_change(mlxsw_sp, ipip_entry, extack);
1813 	return err;
1814 }
1815 
1816 void mlxsw_sp_ipip_entry_demote_tunnel(struct mlxsw_sp *mlxsw_sp,
1817 				       struct mlxsw_sp_ipip_entry *ipip_entry)
1818 {
1819 	struct net_device *ol_dev = ipip_entry->ol_dev;
1820 
1821 	if (ol_dev->flags & IFF_UP)
1822 		mlxsw_sp_ipip_entry_ol_down_event(mlxsw_sp, ipip_entry);
1823 	mlxsw_sp_ipip_entry_destroy(mlxsw_sp, ipip_entry);
1824 }
1825 
1826 /* The configuration where several tunnels have the same local address in the
1827  * same underlay table needs special treatment in the HW. That is currently not
1828  * implemented in the driver. This function finds and demotes the first tunnel
1829  * with a given source address, except the one passed in the argument
1830  * `except'.
1831  */
1832 bool
1833 mlxsw_sp_ipip_demote_tunnel_by_saddr(struct mlxsw_sp *mlxsw_sp,
1834 				     enum mlxsw_sp_l3proto ul_proto,
1835 				     union mlxsw_sp_l3addr saddr,
1836 				     u32 ul_tb_id,
1837 				     const struct mlxsw_sp_ipip_entry *except)
1838 {
1839 	struct mlxsw_sp_ipip_entry *ipip_entry, *tmp;
1840 
1841 	list_for_each_entry_safe(ipip_entry, tmp, &mlxsw_sp->router->ipip_list,
1842 				 ipip_list_node) {
1843 		if (ipip_entry != except &&
1844 		    mlxsw_sp_ipip_entry_saddr_matches(mlxsw_sp, ul_proto, saddr,
1845 						      ul_tb_id, ipip_entry)) {
1846 			mlxsw_sp_ipip_entry_demote_tunnel(mlxsw_sp, ipip_entry);
1847 			return true;
1848 		}
1849 	}
1850 
1851 	return false;
1852 }
1853 
1854 static void mlxsw_sp_ipip_demote_tunnel_by_ul_netdev(struct mlxsw_sp *mlxsw_sp,
1855 						     struct net_device *ul_dev)
1856 {
1857 	struct mlxsw_sp_ipip_entry *ipip_entry, *tmp;
1858 
1859 	list_for_each_entry_safe(ipip_entry, tmp, &mlxsw_sp->router->ipip_list,
1860 				 ipip_list_node) {
1861 		struct net_device *ol_dev = ipip_entry->ol_dev;
1862 		struct net_device *ipip_ul_dev;
1863 
1864 		rcu_read_lock();
1865 		ipip_ul_dev = mlxsw_sp_ipip_netdev_ul_dev_get(ol_dev);
1866 		rcu_read_unlock();
1867 		if (ipip_ul_dev == ul_dev)
1868 			mlxsw_sp_ipip_entry_demote_tunnel(mlxsw_sp, ipip_entry);
1869 	}
1870 }
1871 
1872 static int mlxsw_sp_netdevice_ipip_ol_event(struct mlxsw_sp *mlxsw_sp,
1873 					    struct net_device *ol_dev,
1874 					    unsigned long event,
1875 					    struct netdev_notifier_info *info)
1876 {
1877 	struct netdev_notifier_changeupper_info *chup;
1878 	struct netlink_ext_ack *extack;
1879 	int err = 0;
1880 
1881 	switch (event) {
1882 	case NETDEV_REGISTER:
1883 		err = mlxsw_sp_netdevice_ipip_ol_reg_event(mlxsw_sp, ol_dev);
1884 		break;
1885 	case NETDEV_UNREGISTER:
1886 		mlxsw_sp_netdevice_ipip_ol_unreg_event(mlxsw_sp, ol_dev);
1887 		break;
1888 	case NETDEV_UP:
1889 		mlxsw_sp_netdevice_ipip_ol_up_event(mlxsw_sp, ol_dev);
1890 		break;
1891 	case NETDEV_DOWN:
1892 		mlxsw_sp_netdevice_ipip_ol_down_event(mlxsw_sp, ol_dev);
1893 		break;
1894 	case NETDEV_CHANGEUPPER:
1895 		chup = container_of(info, typeof(*chup), info);
1896 		extack = info->extack;
1897 		if (netif_is_l3_master(chup->upper_dev))
1898 			err = mlxsw_sp_netdevice_ipip_ol_vrf_event(mlxsw_sp,
1899 								   ol_dev,
1900 								   extack);
1901 		break;
1902 	case NETDEV_CHANGE:
1903 		extack = info->extack;
1904 		err = mlxsw_sp_netdevice_ipip_ol_change_event(mlxsw_sp,
1905 							      ol_dev, extack);
1906 		break;
1907 	case NETDEV_CHANGEMTU:
1908 		err = mlxsw_sp_netdevice_ipip_ol_update_mtu(mlxsw_sp, ol_dev);
1909 		break;
1910 	}
1911 	return err;
1912 }
1913 
1914 static int
1915 __mlxsw_sp_netdevice_ipip_ul_event(struct mlxsw_sp *mlxsw_sp,
1916 				   struct mlxsw_sp_ipip_entry *ipip_entry,
1917 				   struct net_device *ul_dev,
1918 				   bool *demote_this,
1919 				   unsigned long event,
1920 				   struct netdev_notifier_info *info)
1921 {
1922 	struct netdev_notifier_changeupper_info *chup;
1923 	struct netlink_ext_ack *extack;
1924 
1925 	switch (event) {
1926 	case NETDEV_CHANGEUPPER:
1927 		chup = container_of(info, typeof(*chup), info);
1928 		extack = info->extack;
1929 		if (netif_is_l3_master(chup->upper_dev))
1930 			return mlxsw_sp_netdevice_ipip_ul_vrf_event(mlxsw_sp,
1931 								    ipip_entry,
1932 								    ul_dev,
1933 								    demote_this,
1934 								    extack);
1935 		break;
1936 
1937 	case NETDEV_UP:
1938 		return mlxsw_sp_netdevice_ipip_ul_up_event(mlxsw_sp, ipip_entry,
1939 							   ul_dev);
1940 	case NETDEV_DOWN:
1941 		return mlxsw_sp_netdevice_ipip_ul_down_event(mlxsw_sp,
1942 							     ipip_entry,
1943 							     ul_dev);
1944 	}
1945 	return 0;
1946 }
1947 
1948 static int
1949 mlxsw_sp_netdevice_ipip_ul_event(struct mlxsw_sp *mlxsw_sp,
1950 				 struct net_device *ul_dev,
1951 				 unsigned long event,
1952 				 struct netdev_notifier_info *info)
1953 {
1954 	struct mlxsw_sp_ipip_entry *ipip_entry = NULL;
1955 	int err;
1956 
1957 	while ((ipip_entry = mlxsw_sp_ipip_entry_find_by_ul_dev(mlxsw_sp,
1958 								ul_dev,
1959 								ipip_entry))) {
1960 		struct mlxsw_sp_ipip_entry *prev;
1961 		bool demote_this = false;
1962 
1963 		err = __mlxsw_sp_netdevice_ipip_ul_event(mlxsw_sp, ipip_entry,
1964 							 ul_dev, &demote_this,
1965 							 event, info);
1966 		if (err) {
1967 			mlxsw_sp_ipip_demote_tunnel_by_ul_netdev(mlxsw_sp,
1968 								 ul_dev);
1969 			return err;
1970 		}
1971 
1972 		if (demote_this) {
1973 			if (list_is_first(&ipip_entry->ipip_list_node,
1974 					  &mlxsw_sp->router->ipip_list))
1975 				prev = NULL;
1976 			else
1977 				/* This can't be cached from previous iteration,
1978 				 * because that entry could be gone now.
1979 				 */
1980 				prev = list_prev_entry(ipip_entry,
1981 						       ipip_list_node);
1982 			mlxsw_sp_ipip_entry_demote_tunnel(mlxsw_sp, ipip_entry);
1983 			ipip_entry = prev;
1984 		}
1985 	}
1986 
1987 	return 0;
1988 }
1989 
1990 int mlxsw_sp_router_nve_promote_decap(struct mlxsw_sp *mlxsw_sp, u32 ul_tb_id,
1991 				      enum mlxsw_sp_l3proto ul_proto,
1992 				      const union mlxsw_sp_l3addr *ul_sip,
1993 				      u32 tunnel_index)
1994 {
1995 	enum mlxsw_sp_fib_entry_type type = MLXSW_SP_FIB_ENTRY_TYPE_TRAP;
1996 	struct mlxsw_sp_router *router = mlxsw_sp->router;
1997 	struct mlxsw_sp_fib_entry *fib_entry;
1998 	int err = 0;
1999 
2000 	mutex_lock(&mlxsw_sp->router->lock);
2001 
2002 	if (WARN_ON_ONCE(router->nve_decap_config.valid)) {
2003 		err = -EINVAL;
2004 		goto out;
2005 	}
2006 
2007 	router->nve_decap_config.ul_tb_id = ul_tb_id;
2008 	router->nve_decap_config.tunnel_index = tunnel_index;
2009 	router->nve_decap_config.ul_proto = ul_proto;
2010 	router->nve_decap_config.ul_sip = *ul_sip;
2011 	router->nve_decap_config.valid = true;
2012 
2013 	/* It is valid to create a tunnel with a local IP and only later
2014 	 * assign this IP address to a local interface
2015 	 */
2016 	fib_entry = mlxsw_sp_router_ip2me_fib_entry_find(mlxsw_sp, ul_tb_id,
2017 							 ul_proto, ul_sip,
2018 							 type);
2019 	if (!fib_entry)
2020 		goto out;
2021 
2022 	fib_entry->decap.tunnel_index = tunnel_index;
2023 	fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_NVE_DECAP;
2024 
2025 	err = mlxsw_sp_fib_entry_update(mlxsw_sp, fib_entry);
2026 	if (err)
2027 		goto err_fib_entry_update;
2028 
2029 	goto out;
2030 
2031 err_fib_entry_update:
2032 	fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_TRAP;
2033 	mlxsw_sp_fib_entry_update(mlxsw_sp, fib_entry);
2034 out:
2035 	mutex_unlock(&mlxsw_sp->router->lock);
2036 	return err;
2037 }
2038 
2039 void mlxsw_sp_router_nve_demote_decap(struct mlxsw_sp *mlxsw_sp, u32 ul_tb_id,
2040 				      enum mlxsw_sp_l3proto ul_proto,
2041 				      const union mlxsw_sp_l3addr *ul_sip)
2042 {
2043 	enum mlxsw_sp_fib_entry_type type = MLXSW_SP_FIB_ENTRY_TYPE_NVE_DECAP;
2044 	struct mlxsw_sp_router *router = mlxsw_sp->router;
2045 	struct mlxsw_sp_fib_entry *fib_entry;
2046 
2047 	mutex_lock(&mlxsw_sp->router->lock);
2048 
2049 	if (WARN_ON_ONCE(!router->nve_decap_config.valid))
2050 		goto out;
2051 
2052 	router->nve_decap_config.valid = false;
2053 
2054 	fib_entry = mlxsw_sp_router_ip2me_fib_entry_find(mlxsw_sp, ul_tb_id,
2055 							 ul_proto, ul_sip,
2056 							 type);
2057 	if (!fib_entry)
2058 		goto out;
2059 
2060 	fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_TRAP;
2061 	mlxsw_sp_fib_entry_update(mlxsw_sp, fib_entry);
2062 out:
2063 	mutex_unlock(&mlxsw_sp->router->lock);
2064 }
2065 
2066 static bool mlxsw_sp_router_nve_is_decap(struct mlxsw_sp *mlxsw_sp,
2067 					 u32 ul_tb_id,
2068 					 enum mlxsw_sp_l3proto ul_proto,
2069 					 const union mlxsw_sp_l3addr *ul_sip)
2070 {
2071 	struct mlxsw_sp_router *router = mlxsw_sp->router;
2072 
2073 	return router->nve_decap_config.valid &&
2074 	       router->nve_decap_config.ul_tb_id == ul_tb_id &&
2075 	       router->nve_decap_config.ul_proto == ul_proto &&
2076 	       !memcmp(&router->nve_decap_config.ul_sip, ul_sip,
2077 		       sizeof(*ul_sip));
2078 }
2079 
2080 struct mlxsw_sp_neigh_key {
2081 	struct neighbour *n;
2082 };
2083 
2084 struct mlxsw_sp_neigh_entry {
2085 	struct list_head rif_list_node;
2086 	struct rhash_head ht_node;
2087 	struct mlxsw_sp_neigh_key key;
2088 	u16 rif;
2089 	bool connected;
2090 	unsigned char ha[ETH_ALEN];
2091 	struct list_head nexthop_list; /* list of nexthops using
2092 					* this neigh entry
2093 					*/
2094 	struct list_head nexthop_neighs_list_node;
2095 	unsigned int counter_index;
2096 	bool counter_valid;
2097 };
2098 
2099 static const struct rhashtable_params mlxsw_sp_neigh_ht_params = {
2100 	.key_offset = offsetof(struct mlxsw_sp_neigh_entry, key),
2101 	.head_offset = offsetof(struct mlxsw_sp_neigh_entry, ht_node),
2102 	.key_len = sizeof(struct mlxsw_sp_neigh_key),
2103 };
2104 
2105 struct mlxsw_sp_neigh_entry *
2106 mlxsw_sp_rif_neigh_next(struct mlxsw_sp_rif *rif,
2107 			struct mlxsw_sp_neigh_entry *neigh_entry)
2108 {
2109 	if (!neigh_entry) {
2110 		if (list_empty(&rif->neigh_list))
2111 			return NULL;
2112 		else
2113 			return list_first_entry(&rif->neigh_list,
2114 						typeof(*neigh_entry),
2115 						rif_list_node);
2116 	}
2117 	if (list_is_last(&neigh_entry->rif_list_node, &rif->neigh_list))
2118 		return NULL;
2119 	return list_next_entry(neigh_entry, rif_list_node);
2120 }
2121 
2122 int mlxsw_sp_neigh_entry_type(struct mlxsw_sp_neigh_entry *neigh_entry)
2123 {
2124 	return neigh_entry->key.n->tbl->family;
2125 }
2126 
2127 unsigned char *
2128 mlxsw_sp_neigh_entry_ha(struct mlxsw_sp_neigh_entry *neigh_entry)
2129 {
2130 	return neigh_entry->ha;
2131 }
2132 
2133 u32 mlxsw_sp_neigh4_entry_dip(struct mlxsw_sp_neigh_entry *neigh_entry)
2134 {
2135 	struct neighbour *n;
2136 
2137 	n = neigh_entry->key.n;
2138 	return ntohl(*((__be32 *) n->primary_key));
2139 }
2140 
2141 struct in6_addr *
2142 mlxsw_sp_neigh6_entry_dip(struct mlxsw_sp_neigh_entry *neigh_entry)
2143 {
2144 	struct neighbour *n;
2145 
2146 	n = neigh_entry->key.n;
2147 	return (struct in6_addr *) &n->primary_key;
2148 }
2149 
2150 int mlxsw_sp_neigh_counter_get(struct mlxsw_sp *mlxsw_sp,
2151 			       struct mlxsw_sp_neigh_entry *neigh_entry,
2152 			       u64 *p_counter)
2153 {
2154 	if (!neigh_entry->counter_valid)
2155 		return -EINVAL;
2156 
2157 	return mlxsw_sp_flow_counter_get(mlxsw_sp, neigh_entry->counter_index,
2158 					 p_counter, NULL);
2159 }
2160 
2161 static struct mlxsw_sp_neigh_entry *
2162 mlxsw_sp_neigh_entry_alloc(struct mlxsw_sp *mlxsw_sp, struct neighbour *n,
2163 			   u16 rif)
2164 {
2165 	struct mlxsw_sp_neigh_entry *neigh_entry;
2166 
2167 	neigh_entry = kzalloc(sizeof(*neigh_entry), GFP_KERNEL);
2168 	if (!neigh_entry)
2169 		return NULL;
2170 
2171 	neigh_entry->key.n = n;
2172 	neigh_entry->rif = rif;
2173 	INIT_LIST_HEAD(&neigh_entry->nexthop_list);
2174 
2175 	return neigh_entry;
2176 }
2177 
2178 static void mlxsw_sp_neigh_entry_free(struct mlxsw_sp_neigh_entry *neigh_entry)
2179 {
2180 	kfree(neigh_entry);
2181 }
2182 
2183 static int
2184 mlxsw_sp_neigh_entry_insert(struct mlxsw_sp *mlxsw_sp,
2185 			    struct mlxsw_sp_neigh_entry *neigh_entry)
2186 {
2187 	return rhashtable_insert_fast(&mlxsw_sp->router->neigh_ht,
2188 				      &neigh_entry->ht_node,
2189 				      mlxsw_sp_neigh_ht_params);
2190 }
2191 
2192 static void
2193 mlxsw_sp_neigh_entry_remove(struct mlxsw_sp *mlxsw_sp,
2194 			    struct mlxsw_sp_neigh_entry *neigh_entry)
2195 {
2196 	rhashtable_remove_fast(&mlxsw_sp->router->neigh_ht,
2197 			       &neigh_entry->ht_node,
2198 			       mlxsw_sp_neigh_ht_params);
2199 }
2200 
2201 static bool
2202 mlxsw_sp_neigh_counter_should_alloc(struct mlxsw_sp *mlxsw_sp,
2203 				    struct mlxsw_sp_neigh_entry *neigh_entry)
2204 {
2205 	struct devlink *devlink;
2206 	const char *table_name;
2207 
2208 	switch (mlxsw_sp_neigh_entry_type(neigh_entry)) {
2209 	case AF_INET:
2210 		table_name = MLXSW_SP_DPIPE_TABLE_NAME_HOST4;
2211 		break;
2212 	case AF_INET6:
2213 		table_name = MLXSW_SP_DPIPE_TABLE_NAME_HOST6;
2214 		break;
2215 	default:
2216 		WARN_ON(1);
2217 		return false;
2218 	}
2219 
2220 	devlink = priv_to_devlink(mlxsw_sp->core);
2221 	return devlink_dpipe_table_counter_enabled(devlink, table_name);
2222 }
2223 
2224 static void
2225 mlxsw_sp_neigh_counter_alloc(struct mlxsw_sp *mlxsw_sp,
2226 			     struct mlxsw_sp_neigh_entry *neigh_entry)
2227 {
2228 	if (!mlxsw_sp_neigh_counter_should_alloc(mlxsw_sp, neigh_entry))
2229 		return;
2230 
2231 	if (mlxsw_sp_flow_counter_alloc(mlxsw_sp, &neigh_entry->counter_index))
2232 		return;
2233 
2234 	neigh_entry->counter_valid = true;
2235 }
2236 
2237 static void
2238 mlxsw_sp_neigh_counter_free(struct mlxsw_sp *mlxsw_sp,
2239 			    struct mlxsw_sp_neigh_entry *neigh_entry)
2240 {
2241 	if (!neigh_entry->counter_valid)
2242 		return;
2243 	mlxsw_sp_flow_counter_free(mlxsw_sp,
2244 				   neigh_entry->counter_index);
2245 	neigh_entry->counter_valid = false;
2246 }
2247 
2248 static struct mlxsw_sp_neigh_entry *
2249 mlxsw_sp_neigh_entry_create(struct mlxsw_sp *mlxsw_sp, struct neighbour *n)
2250 {
2251 	struct mlxsw_sp_neigh_entry *neigh_entry;
2252 	struct mlxsw_sp_rif *rif;
2253 	int err;
2254 
2255 	rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, n->dev);
2256 	if (!rif)
2257 		return ERR_PTR(-EINVAL);
2258 
2259 	neigh_entry = mlxsw_sp_neigh_entry_alloc(mlxsw_sp, n, rif->rif_index);
2260 	if (!neigh_entry)
2261 		return ERR_PTR(-ENOMEM);
2262 
2263 	err = mlxsw_sp_neigh_entry_insert(mlxsw_sp, neigh_entry);
2264 	if (err)
2265 		goto err_neigh_entry_insert;
2266 
2267 	mlxsw_sp_neigh_counter_alloc(mlxsw_sp, neigh_entry);
2268 	atomic_inc(&mlxsw_sp->router->neighs_update.neigh_count);
2269 	list_add(&neigh_entry->rif_list_node, &rif->neigh_list);
2270 
2271 	return neigh_entry;
2272 
2273 err_neigh_entry_insert:
2274 	mlxsw_sp_neigh_entry_free(neigh_entry);
2275 	return ERR_PTR(err);
2276 }
2277 
2278 static void
2279 mlxsw_sp_neigh_entry_destroy(struct mlxsw_sp *mlxsw_sp,
2280 			     struct mlxsw_sp_neigh_entry *neigh_entry)
2281 {
2282 	list_del(&neigh_entry->rif_list_node);
2283 	atomic_dec(&mlxsw_sp->router->neighs_update.neigh_count);
2284 	mlxsw_sp_neigh_counter_free(mlxsw_sp, neigh_entry);
2285 	mlxsw_sp_neigh_entry_remove(mlxsw_sp, neigh_entry);
2286 	mlxsw_sp_neigh_entry_free(neigh_entry);
2287 }
2288 
2289 static struct mlxsw_sp_neigh_entry *
2290 mlxsw_sp_neigh_entry_lookup(struct mlxsw_sp *mlxsw_sp, struct neighbour *n)
2291 {
2292 	struct mlxsw_sp_neigh_key key;
2293 
2294 	key.n = n;
2295 	return rhashtable_lookup_fast(&mlxsw_sp->router->neigh_ht,
2296 				      &key, mlxsw_sp_neigh_ht_params);
2297 }
2298 
2299 static void
2300 mlxsw_sp_router_neighs_update_interval_init(struct mlxsw_sp *mlxsw_sp)
2301 {
2302 	unsigned long interval;
2303 
2304 #if IS_ENABLED(CONFIG_IPV6)
2305 	interval = min_t(unsigned long,
2306 			 NEIGH_VAR(&arp_tbl.parms, DELAY_PROBE_TIME),
2307 			 NEIGH_VAR(&nd_tbl.parms, DELAY_PROBE_TIME));
2308 #else
2309 	interval = NEIGH_VAR(&arp_tbl.parms, DELAY_PROBE_TIME);
2310 #endif
2311 	mlxsw_sp->router->neighs_update.interval = jiffies_to_msecs(interval);
2312 }
2313 
2314 static void mlxsw_sp_router_neigh_ent_ipv4_process(struct mlxsw_sp *mlxsw_sp,
2315 						   char *rauhtd_pl,
2316 						   int ent_index)
2317 {
2318 	u64 max_rifs = MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_RIFS);
2319 	struct net_device *dev;
2320 	struct neighbour *n;
2321 	__be32 dipn;
2322 	u32 dip;
2323 	u16 rif;
2324 
2325 	mlxsw_reg_rauhtd_ent_ipv4_unpack(rauhtd_pl, ent_index, &rif, &dip);
2326 
2327 	if (WARN_ON_ONCE(rif >= max_rifs))
2328 		return;
2329 	if (!mlxsw_sp->router->rifs[rif]) {
2330 		dev_err_ratelimited(mlxsw_sp->bus_info->dev, "Incorrect RIF in neighbour entry\n");
2331 		return;
2332 	}
2333 
2334 	dipn = htonl(dip);
2335 	dev = mlxsw_sp->router->rifs[rif]->dev;
2336 	n = neigh_lookup(&arp_tbl, &dipn, dev);
2337 	if (!n)
2338 		return;
2339 
2340 	netdev_dbg(dev, "Updating neighbour with IP=%pI4h\n", &dip);
2341 	neigh_event_send(n, NULL);
2342 	neigh_release(n);
2343 }
2344 
2345 #if IS_ENABLED(CONFIG_IPV6)
2346 static void mlxsw_sp_router_neigh_ent_ipv6_process(struct mlxsw_sp *mlxsw_sp,
2347 						   char *rauhtd_pl,
2348 						   int rec_index)
2349 {
2350 	struct net_device *dev;
2351 	struct neighbour *n;
2352 	struct in6_addr dip;
2353 	u16 rif;
2354 
2355 	mlxsw_reg_rauhtd_ent_ipv6_unpack(rauhtd_pl, rec_index, &rif,
2356 					 (char *) &dip);
2357 
2358 	if (!mlxsw_sp->router->rifs[rif]) {
2359 		dev_err_ratelimited(mlxsw_sp->bus_info->dev, "Incorrect RIF in neighbour entry\n");
2360 		return;
2361 	}
2362 
2363 	dev = mlxsw_sp->router->rifs[rif]->dev;
2364 	n = neigh_lookup(&nd_tbl, &dip, dev);
2365 	if (!n)
2366 		return;
2367 
2368 	netdev_dbg(dev, "Updating neighbour with IP=%pI6c\n", &dip);
2369 	neigh_event_send(n, NULL);
2370 	neigh_release(n);
2371 }
2372 #else
2373 static void mlxsw_sp_router_neigh_ent_ipv6_process(struct mlxsw_sp *mlxsw_sp,
2374 						   char *rauhtd_pl,
2375 						   int rec_index)
2376 {
2377 }
2378 #endif
2379 
2380 static void mlxsw_sp_router_neigh_rec_ipv4_process(struct mlxsw_sp *mlxsw_sp,
2381 						   char *rauhtd_pl,
2382 						   int rec_index)
2383 {
2384 	u8 num_entries;
2385 	int i;
2386 
2387 	num_entries = mlxsw_reg_rauhtd_ipv4_rec_num_entries_get(rauhtd_pl,
2388 								rec_index);
2389 	/* Hardware starts counting at 0, so add 1. */
2390 	num_entries++;
2391 
2392 	/* Each record consists of several neighbour entries. */
2393 	for (i = 0; i < num_entries; i++) {
2394 		int ent_index;
2395 
2396 		ent_index = rec_index * MLXSW_REG_RAUHTD_IPV4_ENT_PER_REC + i;
2397 		mlxsw_sp_router_neigh_ent_ipv4_process(mlxsw_sp, rauhtd_pl,
2398 						       ent_index);
2399 	}
2400 
2401 }
2402 
2403 static void mlxsw_sp_router_neigh_rec_ipv6_process(struct mlxsw_sp *mlxsw_sp,
2404 						   char *rauhtd_pl,
2405 						   int rec_index)
2406 {
2407 	/* One record contains one entry. */
2408 	mlxsw_sp_router_neigh_ent_ipv6_process(mlxsw_sp, rauhtd_pl,
2409 					       rec_index);
2410 }
2411 
2412 static void mlxsw_sp_router_neigh_rec_process(struct mlxsw_sp *mlxsw_sp,
2413 					      char *rauhtd_pl, int rec_index)
2414 {
2415 	switch (mlxsw_reg_rauhtd_rec_type_get(rauhtd_pl, rec_index)) {
2416 	case MLXSW_REG_RAUHTD_TYPE_IPV4:
2417 		mlxsw_sp_router_neigh_rec_ipv4_process(mlxsw_sp, rauhtd_pl,
2418 						       rec_index);
2419 		break;
2420 	case MLXSW_REG_RAUHTD_TYPE_IPV6:
2421 		mlxsw_sp_router_neigh_rec_ipv6_process(mlxsw_sp, rauhtd_pl,
2422 						       rec_index);
2423 		break;
2424 	}
2425 }
2426 
2427 static bool mlxsw_sp_router_rauhtd_is_full(char *rauhtd_pl)
2428 {
2429 	u8 num_rec, last_rec_index, num_entries;
2430 
2431 	num_rec = mlxsw_reg_rauhtd_num_rec_get(rauhtd_pl);
2432 	last_rec_index = num_rec - 1;
2433 
2434 	if (num_rec < MLXSW_REG_RAUHTD_REC_MAX_NUM)
2435 		return false;
2436 	if (mlxsw_reg_rauhtd_rec_type_get(rauhtd_pl, last_rec_index) ==
2437 	    MLXSW_REG_RAUHTD_TYPE_IPV6)
2438 		return true;
2439 
2440 	num_entries = mlxsw_reg_rauhtd_ipv4_rec_num_entries_get(rauhtd_pl,
2441 								last_rec_index);
2442 	if (++num_entries == MLXSW_REG_RAUHTD_IPV4_ENT_PER_REC)
2443 		return true;
2444 	return false;
2445 }
2446 
2447 static int
2448 __mlxsw_sp_router_neighs_update_rauhtd(struct mlxsw_sp *mlxsw_sp,
2449 				       char *rauhtd_pl,
2450 				       enum mlxsw_reg_rauhtd_type type)
2451 {
2452 	int i, num_rec;
2453 	int err;
2454 
2455 	/* Ensure the RIF we read from the device does not change mid-dump. */
2456 	mutex_lock(&mlxsw_sp->router->lock);
2457 	do {
2458 		mlxsw_reg_rauhtd_pack(rauhtd_pl, type);
2459 		err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(rauhtd),
2460 				      rauhtd_pl);
2461 		if (err) {
2462 			dev_err_ratelimited(mlxsw_sp->bus_info->dev, "Failed to dump neighbour table\n");
2463 			break;
2464 		}
2465 		num_rec = mlxsw_reg_rauhtd_num_rec_get(rauhtd_pl);
2466 		for (i = 0; i < num_rec; i++)
2467 			mlxsw_sp_router_neigh_rec_process(mlxsw_sp, rauhtd_pl,
2468 							  i);
2469 	} while (mlxsw_sp_router_rauhtd_is_full(rauhtd_pl));
2470 	mutex_unlock(&mlxsw_sp->router->lock);
2471 
2472 	return err;
2473 }
2474 
2475 static int mlxsw_sp_router_neighs_update_rauhtd(struct mlxsw_sp *mlxsw_sp)
2476 {
2477 	enum mlxsw_reg_rauhtd_type type;
2478 	char *rauhtd_pl;
2479 	int err;
2480 
2481 	if (!atomic_read(&mlxsw_sp->router->neighs_update.neigh_count))
2482 		return 0;
2483 
2484 	rauhtd_pl = kmalloc(MLXSW_REG_RAUHTD_LEN, GFP_KERNEL);
2485 	if (!rauhtd_pl)
2486 		return -ENOMEM;
2487 
2488 	type = MLXSW_REG_RAUHTD_TYPE_IPV4;
2489 	err = __mlxsw_sp_router_neighs_update_rauhtd(mlxsw_sp, rauhtd_pl, type);
2490 	if (err)
2491 		goto out;
2492 
2493 	type = MLXSW_REG_RAUHTD_TYPE_IPV6;
2494 	err = __mlxsw_sp_router_neighs_update_rauhtd(mlxsw_sp, rauhtd_pl, type);
2495 out:
2496 	kfree(rauhtd_pl);
2497 	return err;
2498 }
2499 
2500 static void mlxsw_sp_router_neighs_update_nh(struct mlxsw_sp *mlxsw_sp)
2501 {
2502 	struct mlxsw_sp_neigh_entry *neigh_entry;
2503 
2504 	mutex_lock(&mlxsw_sp->router->lock);
2505 	list_for_each_entry(neigh_entry, &mlxsw_sp->router->nexthop_neighs_list,
2506 			    nexthop_neighs_list_node)
2507 		/* If this neigh have nexthops, make the kernel think this neigh
2508 		 * is active regardless of the traffic.
2509 		 */
2510 		neigh_event_send(neigh_entry->key.n, NULL);
2511 	mutex_unlock(&mlxsw_sp->router->lock);
2512 }
2513 
2514 static void
2515 mlxsw_sp_router_neighs_update_work_schedule(struct mlxsw_sp *mlxsw_sp)
2516 {
2517 	unsigned long interval = mlxsw_sp->router->neighs_update.interval;
2518 
2519 	mlxsw_core_schedule_dw(&mlxsw_sp->router->neighs_update.dw,
2520 			       msecs_to_jiffies(interval));
2521 }
2522 
2523 static void mlxsw_sp_router_neighs_update_work(struct work_struct *work)
2524 {
2525 	struct mlxsw_sp_router *router;
2526 	int err;
2527 
2528 	router = container_of(work, struct mlxsw_sp_router,
2529 			      neighs_update.dw.work);
2530 	err = mlxsw_sp_router_neighs_update_rauhtd(router->mlxsw_sp);
2531 	if (err)
2532 		dev_err(router->mlxsw_sp->bus_info->dev, "Could not update kernel for neigh activity");
2533 
2534 	mlxsw_sp_router_neighs_update_nh(router->mlxsw_sp);
2535 
2536 	mlxsw_sp_router_neighs_update_work_schedule(router->mlxsw_sp);
2537 }
2538 
2539 static void mlxsw_sp_router_probe_unresolved_nexthops(struct work_struct *work)
2540 {
2541 	struct mlxsw_sp_neigh_entry *neigh_entry;
2542 	struct mlxsw_sp_router *router;
2543 
2544 	router = container_of(work, struct mlxsw_sp_router,
2545 			      nexthop_probe_dw.work);
2546 	/* Iterate over nexthop neighbours, find those who are unresolved and
2547 	 * send arp on them. This solves the chicken-egg problem when
2548 	 * the nexthop wouldn't get offloaded until the neighbor is resolved
2549 	 * but it wouldn't get resolved ever in case traffic is flowing in HW
2550 	 * using different nexthop.
2551 	 */
2552 	mutex_lock(&router->lock);
2553 	list_for_each_entry(neigh_entry, &router->nexthop_neighs_list,
2554 			    nexthop_neighs_list_node)
2555 		if (!neigh_entry->connected)
2556 			neigh_event_send(neigh_entry->key.n, NULL);
2557 	mutex_unlock(&router->lock);
2558 
2559 	mlxsw_core_schedule_dw(&router->nexthop_probe_dw,
2560 			       MLXSW_SP_UNRESOLVED_NH_PROBE_INTERVAL);
2561 }
2562 
2563 static void
2564 mlxsw_sp_nexthop_neigh_update(struct mlxsw_sp *mlxsw_sp,
2565 			      struct mlxsw_sp_neigh_entry *neigh_entry,
2566 			      bool removing, bool dead);
2567 
2568 static enum mlxsw_reg_rauht_op mlxsw_sp_rauht_op(bool adding)
2569 {
2570 	return adding ? MLXSW_REG_RAUHT_OP_WRITE_ADD :
2571 			MLXSW_REG_RAUHT_OP_WRITE_DELETE;
2572 }
2573 
2574 static int
2575 mlxsw_sp_router_neigh_entry_op4(struct mlxsw_sp *mlxsw_sp,
2576 				struct mlxsw_sp_neigh_entry *neigh_entry,
2577 				enum mlxsw_reg_rauht_op op)
2578 {
2579 	struct neighbour *n = neigh_entry->key.n;
2580 	u32 dip = ntohl(*((__be32 *) n->primary_key));
2581 	char rauht_pl[MLXSW_REG_RAUHT_LEN];
2582 
2583 	mlxsw_reg_rauht_pack4(rauht_pl, op, neigh_entry->rif, neigh_entry->ha,
2584 			      dip);
2585 	if (neigh_entry->counter_valid)
2586 		mlxsw_reg_rauht_pack_counter(rauht_pl,
2587 					     neigh_entry->counter_index);
2588 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rauht), rauht_pl);
2589 }
2590 
2591 static int
2592 mlxsw_sp_router_neigh_entry_op6(struct mlxsw_sp *mlxsw_sp,
2593 				struct mlxsw_sp_neigh_entry *neigh_entry,
2594 				enum mlxsw_reg_rauht_op op)
2595 {
2596 	struct neighbour *n = neigh_entry->key.n;
2597 	char rauht_pl[MLXSW_REG_RAUHT_LEN];
2598 	const char *dip = n->primary_key;
2599 
2600 	mlxsw_reg_rauht_pack6(rauht_pl, op, neigh_entry->rif, neigh_entry->ha,
2601 			      dip);
2602 	if (neigh_entry->counter_valid)
2603 		mlxsw_reg_rauht_pack_counter(rauht_pl,
2604 					     neigh_entry->counter_index);
2605 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rauht), rauht_pl);
2606 }
2607 
2608 bool mlxsw_sp_neigh_ipv6_ignore(struct mlxsw_sp_neigh_entry *neigh_entry)
2609 {
2610 	struct neighbour *n = neigh_entry->key.n;
2611 
2612 	/* Packets with a link-local destination address are trapped
2613 	 * after LPM lookup and never reach the neighbour table, so
2614 	 * there is no need to program such neighbours to the device.
2615 	 */
2616 	if (ipv6_addr_type((struct in6_addr *) &n->primary_key) &
2617 	    IPV6_ADDR_LINKLOCAL)
2618 		return true;
2619 	return false;
2620 }
2621 
2622 static void
2623 mlxsw_sp_neigh_entry_update(struct mlxsw_sp *mlxsw_sp,
2624 			    struct mlxsw_sp_neigh_entry *neigh_entry,
2625 			    bool adding)
2626 {
2627 	enum mlxsw_reg_rauht_op op = mlxsw_sp_rauht_op(adding);
2628 	int err;
2629 
2630 	if (!adding && !neigh_entry->connected)
2631 		return;
2632 	neigh_entry->connected = adding;
2633 	if (neigh_entry->key.n->tbl->family == AF_INET) {
2634 		err = mlxsw_sp_router_neigh_entry_op4(mlxsw_sp, neigh_entry,
2635 						      op);
2636 		if (err)
2637 			return;
2638 	} else if (neigh_entry->key.n->tbl->family == AF_INET6) {
2639 		if (mlxsw_sp_neigh_ipv6_ignore(neigh_entry))
2640 			return;
2641 		err = mlxsw_sp_router_neigh_entry_op6(mlxsw_sp, neigh_entry,
2642 						      op);
2643 		if (err)
2644 			return;
2645 	} else {
2646 		WARN_ON_ONCE(1);
2647 		return;
2648 	}
2649 
2650 	if (adding)
2651 		neigh_entry->key.n->flags |= NTF_OFFLOADED;
2652 	else
2653 		neigh_entry->key.n->flags &= ~NTF_OFFLOADED;
2654 }
2655 
2656 void
2657 mlxsw_sp_neigh_entry_counter_update(struct mlxsw_sp *mlxsw_sp,
2658 				    struct mlxsw_sp_neigh_entry *neigh_entry,
2659 				    bool adding)
2660 {
2661 	if (adding)
2662 		mlxsw_sp_neigh_counter_alloc(mlxsw_sp, neigh_entry);
2663 	else
2664 		mlxsw_sp_neigh_counter_free(mlxsw_sp, neigh_entry);
2665 	mlxsw_sp_neigh_entry_update(mlxsw_sp, neigh_entry, true);
2666 }
2667 
2668 struct mlxsw_sp_netevent_work {
2669 	struct work_struct work;
2670 	struct mlxsw_sp *mlxsw_sp;
2671 	struct neighbour *n;
2672 };
2673 
2674 static void mlxsw_sp_router_neigh_event_work(struct work_struct *work)
2675 {
2676 	struct mlxsw_sp_netevent_work *net_work =
2677 		container_of(work, struct mlxsw_sp_netevent_work, work);
2678 	struct mlxsw_sp *mlxsw_sp = net_work->mlxsw_sp;
2679 	struct mlxsw_sp_neigh_entry *neigh_entry;
2680 	struct neighbour *n = net_work->n;
2681 	unsigned char ha[ETH_ALEN];
2682 	bool entry_connected;
2683 	u8 nud_state, dead;
2684 
2685 	/* If these parameters are changed after we release the lock,
2686 	 * then we are guaranteed to receive another event letting us
2687 	 * know about it.
2688 	 */
2689 	read_lock_bh(&n->lock);
2690 	memcpy(ha, n->ha, ETH_ALEN);
2691 	nud_state = n->nud_state;
2692 	dead = n->dead;
2693 	read_unlock_bh(&n->lock);
2694 
2695 	mutex_lock(&mlxsw_sp->router->lock);
2696 	mlxsw_sp_span_respin(mlxsw_sp);
2697 
2698 	entry_connected = nud_state & NUD_VALID && !dead;
2699 	neigh_entry = mlxsw_sp_neigh_entry_lookup(mlxsw_sp, n);
2700 	if (!entry_connected && !neigh_entry)
2701 		goto out;
2702 	if (!neigh_entry) {
2703 		neigh_entry = mlxsw_sp_neigh_entry_create(mlxsw_sp, n);
2704 		if (IS_ERR(neigh_entry))
2705 			goto out;
2706 	}
2707 
2708 	if (neigh_entry->connected && entry_connected &&
2709 	    !memcmp(neigh_entry->ha, ha, ETH_ALEN))
2710 		goto out;
2711 
2712 	memcpy(neigh_entry->ha, ha, ETH_ALEN);
2713 	mlxsw_sp_neigh_entry_update(mlxsw_sp, neigh_entry, entry_connected);
2714 	mlxsw_sp_nexthop_neigh_update(mlxsw_sp, neigh_entry, !entry_connected,
2715 				      dead);
2716 
2717 	if (!neigh_entry->connected && list_empty(&neigh_entry->nexthop_list))
2718 		mlxsw_sp_neigh_entry_destroy(mlxsw_sp, neigh_entry);
2719 
2720 out:
2721 	mutex_unlock(&mlxsw_sp->router->lock);
2722 	neigh_release(n);
2723 	kfree(net_work);
2724 }
2725 
2726 static int mlxsw_sp_mp_hash_init(struct mlxsw_sp *mlxsw_sp);
2727 
2728 static void mlxsw_sp_router_mp_hash_event_work(struct work_struct *work)
2729 {
2730 	struct mlxsw_sp_netevent_work *net_work =
2731 		container_of(work, struct mlxsw_sp_netevent_work, work);
2732 	struct mlxsw_sp *mlxsw_sp = net_work->mlxsw_sp;
2733 
2734 	mlxsw_sp_mp_hash_init(mlxsw_sp);
2735 	kfree(net_work);
2736 }
2737 
2738 static int __mlxsw_sp_router_init(struct mlxsw_sp *mlxsw_sp);
2739 
2740 static void mlxsw_sp_router_update_priority_work(struct work_struct *work)
2741 {
2742 	struct mlxsw_sp_netevent_work *net_work =
2743 		container_of(work, struct mlxsw_sp_netevent_work, work);
2744 	struct mlxsw_sp *mlxsw_sp = net_work->mlxsw_sp;
2745 
2746 	__mlxsw_sp_router_init(mlxsw_sp);
2747 	kfree(net_work);
2748 }
2749 
2750 static int mlxsw_sp_router_schedule_work(struct net *net,
2751 					 struct mlxsw_sp_router *router,
2752 					 struct neighbour *n,
2753 					 void (*cb)(struct work_struct *))
2754 {
2755 	struct mlxsw_sp_netevent_work *net_work;
2756 
2757 	if (!net_eq(net, mlxsw_sp_net(router->mlxsw_sp)))
2758 		return NOTIFY_DONE;
2759 
2760 	net_work = kzalloc(sizeof(*net_work), GFP_ATOMIC);
2761 	if (!net_work)
2762 		return NOTIFY_BAD;
2763 
2764 	INIT_WORK(&net_work->work, cb);
2765 	net_work->mlxsw_sp = router->mlxsw_sp;
2766 	net_work->n = n;
2767 	mlxsw_core_schedule_work(&net_work->work);
2768 	return NOTIFY_DONE;
2769 }
2770 
2771 static bool mlxsw_sp_dev_lower_is_port(struct net_device *dev)
2772 {
2773 	struct mlxsw_sp_port *mlxsw_sp_port;
2774 
2775 	rcu_read_lock();
2776 	mlxsw_sp_port = mlxsw_sp_port_dev_lower_find_rcu(dev);
2777 	rcu_read_unlock();
2778 	return !!mlxsw_sp_port;
2779 }
2780 
2781 static int mlxsw_sp_router_netevent_event(struct notifier_block *nb,
2782 					  unsigned long event, void *ptr)
2783 {
2784 	struct mlxsw_sp_router *router;
2785 	unsigned long interval;
2786 	struct neigh_parms *p;
2787 	struct neighbour *n;
2788 	struct net *net;
2789 
2790 	router = container_of(nb, struct mlxsw_sp_router, netevent_nb);
2791 
2792 	switch (event) {
2793 	case NETEVENT_DELAY_PROBE_TIME_UPDATE:
2794 		p = ptr;
2795 
2796 		/* We don't care about changes in the default table. */
2797 		if (!p->dev || (p->tbl->family != AF_INET &&
2798 				p->tbl->family != AF_INET6))
2799 			return NOTIFY_DONE;
2800 
2801 		/* We are in atomic context and can't take RTNL mutex,
2802 		 * so use RCU variant to walk the device chain.
2803 		 */
2804 		if (!mlxsw_sp_dev_lower_is_port(p->dev))
2805 			return NOTIFY_DONE;
2806 
2807 		interval = jiffies_to_msecs(NEIGH_VAR(p, DELAY_PROBE_TIME));
2808 		router->neighs_update.interval = interval;
2809 		break;
2810 	case NETEVENT_NEIGH_UPDATE:
2811 		n = ptr;
2812 		net = neigh_parms_net(n->parms);
2813 
2814 		if (n->tbl->family != AF_INET && n->tbl->family != AF_INET6)
2815 			return NOTIFY_DONE;
2816 
2817 		if (!mlxsw_sp_dev_lower_is_port(n->dev))
2818 			return NOTIFY_DONE;
2819 
2820 		/* Take a reference to ensure the neighbour won't be
2821 		 * destructed until we drop the reference in delayed
2822 		 * work.
2823 		 */
2824 		neigh_clone(n);
2825 		return mlxsw_sp_router_schedule_work(net, router, n,
2826 				mlxsw_sp_router_neigh_event_work);
2827 
2828 	case NETEVENT_IPV4_MPATH_HASH_UPDATE:
2829 	case NETEVENT_IPV6_MPATH_HASH_UPDATE:
2830 		return mlxsw_sp_router_schedule_work(ptr, router, NULL,
2831 				mlxsw_sp_router_mp_hash_event_work);
2832 
2833 	case NETEVENT_IPV4_FWD_UPDATE_PRIORITY_UPDATE:
2834 		return mlxsw_sp_router_schedule_work(ptr, router, NULL,
2835 				mlxsw_sp_router_update_priority_work);
2836 	}
2837 
2838 	return NOTIFY_DONE;
2839 }
2840 
2841 static int mlxsw_sp_neigh_init(struct mlxsw_sp *mlxsw_sp)
2842 {
2843 	int err;
2844 
2845 	err = rhashtable_init(&mlxsw_sp->router->neigh_ht,
2846 			      &mlxsw_sp_neigh_ht_params);
2847 	if (err)
2848 		return err;
2849 
2850 	/* Initialize the polling interval according to the default
2851 	 * table.
2852 	 */
2853 	mlxsw_sp_router_neighs_update_interval_init(mlxsw_sp);
2854 
2855 	/* Create the delayed works for the activity_update */
2856 	INIT_DELAYED_WORK(&mlxsw_sp->router->neighs_update.dw,
2857 			  mlxsw_sp_router_neighs_update_work);
2858 	INIT_DELAYED_WORK(&mlxsw_sp->router->nexthop_probe_dw,
2859 			  mlxsw_sp_router_probe_unresolved_nexthops);
2860 	atomic_set(&mlxsw_sp->router->neighs_update.neigh_count, 0);
2861 	mlxsw_core_schedule_dw(&mlxsw_sp->router->neighs_update.dw, 0);
2862 	mlxsw_core_schedule_dw(&mlxsw_sp->router->nexthop_probe_dw, 0);
2863 	return 0;
2864 }
2865 
2866 static void mlxsw_sp_neigh_fini(struct mlxsw_sp *mlxsw_sp)
2867 {
2868 	cancel_delayed_work_sync(&mlxsw_sp->router->neighs_update.dw);
2869 	cancel_delayed_work_sync(&mlxsw_sp->router->nexthop_probe_dw);
2870 	rhashtable_destroy(&mlxsw_sp->router->neigh_ht);
2871 }
2872 
2873 static void mlxsw_sp_neigh_rif_gone_sync(struct mlxsw_sp *mlxsw_sp,
2874 					 struct mlxsw_sp_rif *rif)
2875 {
2876 	struct mlxsw_sp_neigh_entry *neigh_entry, *tmp;
2877 
2878 	list_for_each_entry_safe(neigh_entry, tmp, &rif->neigh_list,
2879 				 rif_list_node) {
2880 		mlxsw_sp_neigh_entry_update(mlxsw_sp, neigh_entry, false);
2881 		mlxsw_sp_neigh_entry_destroy(mlxsw_sp, neigh_entry);
2882 	}
2883 }
2884 
2885 enum mlxsw_sp_nexthop_type {
2886 	MLXSW_SP_NEXTHOP_TYPE_ETH,
2887 	MLXSW_SP_NEXTHOP_TYPE_IPIP,
2888 };
2889 
2890 enum mlxsw_sp_nexthop_action {
2891 	/* Nexthop forwards packets to an egress RIF */
2892 	MLXSW_SP_NEXTHOP_ACTION_FORWARD,
2893 	/* Nexthop discards packets */
2894 	MLXSW_SP_NEXTHOP_ACTION_DISCARD,
2895 	/* Nexthop traps packets */
2896 	MLXSW_SP_NEXTHOP_ACTION_TRAP,
2897 };
2898 
2899 struct mlxsw_sp_nexthop_key {
2900 	struct fib_nh *fib_nh;
2901 };
2902 
2903 struct mlxsw_sp_nexthop {
2904 	struct list_head neigh_list_node; /* member of neigh entry list */
2905 	struct list_head rif_list_node;
2906 	struct list_head router_list_node;
2907 	struct mlxsw_sp_nexthop_group_info *nhgi; /* pointer back to the group
2908 						   * this nexthop belongs to
2909 						   */
2910 	struct rhash_head ht_node;
2911 	struct neigh_table *neigh_tbl;
2912 	struct mlxsw_sp_nexthop_key key;
2913 	unsigned char gw_addr[sizeof(struct in6_addr)];
2914 	int ifindex;
2915 	int nh_weight;
2916 	int norm_nh_weight;
2917 	int num_adj_entries;
2918 	struct mlxsw_sp_rif *rif;
2919 	u8 should_offload:1, /* set indicates this nexthop should be written
2920 			      * to the adjacency table.
2921 			      */
2922 	   offloaded:1, /* set indicates this nexthop was written to the
2923 			 * adjacency table.
2924 			 */
2925 	   update:1; /* set indicates this nexthop should be updated in the
2926 		      * adjacency table (f.e., its MAC changed).
2927 		      */
2928 	enum mlxsw_sp_nexthop_action action;
2929 	enum mlxsw_sp_nexthop_type type;
2930 	union {
2931 		struct mlxsw_sp_neigh_entry *neigh_entry;
2932 		struct mlxsw_sp_ipip_entry *ipip_entry;
2933 	};
2934 	unsigned int counter_index;
2935 	bool counter_valid;
2936 };
2937 
2938 enum mlxsw_sp_nexthop_group_type {
2939 	MLXSW_SP_NEXTHOP_GROUP_TYPE_IPV4,
2940 	MLXSW_SP_NEXTHOP_GROUP_TYPE_IPV6,
2941 	MLXSW_SP_NEXTHOP_GROUP_TYPE_OBJ,
2942 };
2943 
2944 struct mlxsw_sp_nexthop_group_info {
2945 	struct mlxsw_sp_nexthop_group *nh_grp;
2946 	u32 adj_index;
2947 	u16 ecmp_size;
2948 	u16 count;
2949 	int sum_norm_weight;
2950 	u8 adj_index_valid:1,
2951 	   gateway:1, /* routes using the group use a gateway */
2952 	   is_resilient:1;
2953 	struct list_head list; /* member in nh_res_grp_list */
2954 	struct mlxsw_sp_nexthop nexthops[];
2955 #define nh_rif	nexthops[0].rif
2956 };
2957 
2958 struct mlxsw_sp_nexthop_group_vr_key {
2959 	u16 vr_id;
2960 	enum mlxsw_sp_l3proto proto;
2961 };
2962 
2963 struct mlxsw_sp_nexthop_group_vr_entry {
2964 	struct list_head list; /* member in vr_list */
2965 	struct rhash_head ht_node; /* member in vr_ht */
2966 	refcount_t ref_count;
2967 	struct mlxsw_sp_nexthop_group_vr_key key;
2968 };
2969 
2970 struct mlxsw_sp_nexthop_group {
2971 	struct rhash_head ht_node;
2972 	struct list_head fib_list; /* list of fib entries that use this group */
2973 	union {
2974 		struct {
2975 			struct fib_info *fi;
2976 		} ipv4;
2977 		struct {
2978 			u32 id;
2979 		} obj;
2980 	};
2981 	struct mlxsw_sp_nexthop_group_info *nhgi;
2982 	struct list_head vr_list;
2983 	struct rhashtable vr_ht;
2984 	enum mlxsw_sp_nexthop_group_type type;
2985 	bool can_destroy;
2986 };
2987 
2988 void mlxsw_sp_nexthop_counter_alloc(struct mlxsw_sp *mlxsw_sp,
2989 				    struct mlxsw_sp_nexthop *nh)
2990 {
2991 	struct devlink *devlink;
2992 
2993 	devlink = priv_to_devlink(mlxsw_sp->core);
2994 	if (!devlink_dpipe_table_counter_enabled(devlink,
2995 						 MLXSW_SP_DPIPE_TABLE_NAME_ADJ))
2996 		return;
2997 
2998 	if (mlxsw_sp_flow_counter_alloc(mlxsw_sp, &nh->counter_index))
2999 		return;
3000 
3001 	nh->counter_valid = true;
3002 }
3003 
3004 void mlxsw_sp_nexthop_counter_free(struct mlxsw_sp *mlxsw_sp,
3005 				   struct mlxsw_sp_nexthop *nh)
3006 {
3007 	if (!nh->counter_valid)
3008 		return;
3009 	mlxsw_sp_flow_counter_free(mlxsw_sp, nh->counter_index);
3010 	nh->counter_valid = false;
3011 }
3012 
3013 int mlxsw_sp_nexthop_counter_get(struct mlxsw_sp *mlxsw_sp,
3014 				 struct mlxsw_sp_nexthop *nh, u64 *p_counter)
3015 {
3016 	if (!nh->counter_valid)
3017 		return -EINVAL;
3018 
3019 	return mlxsw_sp_flow_counter_get(mlxsw_sp, nh->counter_index,
3020 					 p_counter, NULL);
3021 }
3022 
3023 struct mlxsw_sp_nexthop *mlxsw_sp_nexthop_next(struct mlxsw_sp_router *router,
3024 					       struct mlxsw_sp_nexthop *nh)
3025 {
3026 	if (!nh) {
3027 		if (list_empty(&router->nexthop_list))
3028 			return NULL;
3029 		else
3030 			return list_first_entry(&router->nexthop_list,
3031 						typeof(*nh), router_list_node);
3032 	}
3033 	if (list_is_last(&nh->router_list_node, &router->nexthop_list))
3034 		return NULL;
3035 	return list_next_entry(nh, router_list_node);
3036 }
3037 
3038 bool mlxsw_sp_nexthop_is_forward(const struct mlxsw_sp_nexthop *nh)
3039 {
3040 	return nh->offloaded && nh->action == MLXSW_SP_NEXTHOP_ACTION_FORWARD;
3041 }
3042 
3043 unsigned char *mlxsw_sp_nexthop_ha(struct mlxsw_sp_nexthop *nh)
3044 {
3045 	if (nh->type != MLXSW_SP_NEXTHOP_TYPE_ETH ||
3046 	    !mlxsw_sp_nexthop_is_forward(nh))
3047 		return NULL;
3048 	return nh->neigh_entry->ha;
3049 }
3050 
3051 int mlxsw_sp_nexthop_indexes(struct mlxsw_sp_nexthop *nh, u32 *p_adj_index,
3052 			     u32 *p_adj_size, u32 *p_adj_hash_index)
3053 {
3054 	struct mlxsw_sp_nexthop_group_info *nhgi = nh->nhgi;
3055 	u32 adj_hash_index = 0;
3056 	int i;
3057 
3058 	if (!nh->offloaded || !nhgi->adj_index_valid)
3059 		return -EINVAL;
3060 
3061 	*p_adj_index = nhgi->adj_index;
3062 	*p_adj_size = nhgi->ecmp_size;
3063 
3064 	for (i = 0; i < nhgi->count; i++) {
3065 		struct mlxsw_sp_nexthop *nh_iter = &nhgi->nexthops[i];
3066 
3067 		if (nh_iter == nh)
3068 			break;
3069 		if (nh_iter->offloaded)
3070 			adj_hash_index += nh_iter->num_adj_entries;
3071 	}
3072 
3073 	*p_adj_hash_index = adj_hash_index;
3074 	return 0;
3075 }
3076 
3077 struct mlxsw_sp_rif *mlxsw_sp_nexthop_rif(struct mlxsw_sp_nexthop *nh)
3078 {
3079 	return nh->rif;
3080 }
3081 
3082 bool mlxsw_sp_nexthop_group_has_ipip(struct mlxsw_sp_nexthop *nh)
3083 {
3084 	struct mlxsw_sp_nexthop_group_info *nhgi = nh->nhgi;
3085 	int i;
3086 
3087 	for (i = 0; i < nhgi->count; i++) {
3088 		struct mlxsw_sp_nexthop *nh_iter = &nhgi->nexthops[i];
3089 
3090 		if (nh_iter->type == MLXSW_SP_NEXTHOP_TYPE_IPIP)
3091 			return true;
3092 	}
3093 	return false;
3094 }
3095 
3096 static const struct rhashtable_params mlxsw_sp_nexthop_group_vr_ht_params = {
3097 	.key_offset = offsetof(struct mlxsw_sp_nexthop_group_vr_entry, key),
3098 	.head_offset = offsetof(struct mlxsw_sp_nexthop_group_vr_entry, ht_node),
3099 	.key_len = sizeof(struct mlxsw_sp_nexthop_group_vr_key),
3100 	.automatic_shrinking = true,
3101 };
3102 
3103 static struct mlxsw_sp_nexthop_group_vr_entry *
3104 mlxsw_sp_nexthop_group_vr_entry_lookup(struct mlxsw_sp_nexthop_group *nh_grp,
3105 				       const struct mlxsw_sp_fib *fib)
3106 {
3107 	struct mlxsw_sp_nexthop_group_vr_key key;
3108 
3109 	memset(&key, 0, sizeof(key));
3110 	key.vr_id = fib->vr->id;
3111 	key.proto = fib->proto;
3112 	return rhashtable_lookup_fast(&nh_grp->vr_ht, &key,
3113 				      mlxsw_sp_nexthop_group_vr_ht_params);
3114 }
3115 
3116 static int
3117 mlxsw_sp_nexthop_group_vr_entry_create(struct mlxsw_sp_nexthop_group *nh_grp,
3118 				       const struct mlxsw_sp_fib *fib)
3119 {
3120 	struct mlxsw_sp_nexthop_group_vr_entry *vr_entry;
3121 	int err;
3122 
3123 	vr_entry = kzalloc(sizeof(*vr_entry), GFP_KERNEL);
3124 	if (!vr_entry)
3125 		return -ENOMEM;
3126 
3127 	vr_entry->key.vr_id = fib->vr->id;
3128 	vr_entry->key.proto = fib->proto;
3129 	refcount_set(&vr_entry->ref_count, 1);
3130 
3131 	err = rhashtable_insert_fast(&nh_grp->vr_ht, &vr_entry->ht_node,
3132 				     mlxsw_sp_nexthop_group_vr_ht_params);
3133 	if (err)
3134 		goto err_hashtable_insert;
3135 
3136 	list_add(&vr_entry->list, &nh_grp->vr_list);
3137 
3138 	return 0;
3139 
3140 err_hashtable_insert:
3141 	kfree(vr_entry);
3142 	return err;
3143 }
3144 
3145 static void
3146 mlxsw_sp_nexthop_group_vr_entry_destroy(struct mlxsw_sp_nexthop_group *nh_grp,
3147 					struct mlxsw_sp_nexthop_group_vr_entry *vr_entry)
3148 {
3149 	list_del(&vr_entry->list);
3150 	rhashtable_remove_fast(&nh_grp->vr_ht, &vr_entry->ht_node,
3151 			       mlxsw_sp_nexthop_group_vr_ht_params);
3152 	kfree(vr_entry);
3153 }
3154 
3155 static int
3156 mlxsw_sp_nexthop_group_vr_link(struct mlxsw_sp_nexthop_group *nh_grp,
3157 			       const struct mlxsw_sp_fib *fib)
3158 {
3159 	struct mlxsw_sp_nexthop_group_vr_entry *vr_entry;
3160 
3161 	vr_entry = mlxsw_sp_nexthop_group_vr_entry_lookup(nh_grp, fib);
3162 	if (vr_entry) {
3163 		refcount_inc(&vr_entry->ref_count);
3164 		return 0;
3165 	}
3166 
3167 	return mlxsw_sp_nexthop_group_vr_entry_create(nh_grp, fib);
3168 }
3169 
3170 static void
3171 mlxsw_sp_nexthop_group_vr_unlink(struct mlxsw_sp_nexthop_group *nh_grp,
3172 				 const struct mlxsw_sp_fib *fib)
3173 {
3174 	struct mlxsw_sp_nexthop_group_vr_entry *vr_entry;
3175 
3176 	vr_entry = mlxsw_sp_nexthop_group_vr_entry_lookup(nh_grp, fib);
3177 	if (WARN_ON_ONCE(!vr_entry))
3178 		return;
3179 
3180 	if (!refcount_dec_and_test(&vr_entry->ref_count))
3181 		return;
3182 
3183 	mlxsw_sp_nexthop_group_vr_entry_destroy(nh_grp, vr_entry);
3184 }
3185 
3186 struct mlxsw_sp_nexthop_group_cmp_arg {
3187 	enum mlxsw_sp_nexthop_group_type type;
3188 	union {
3189 		struct fib_info *fi;
3190 		struct mlxsw_sp_fib6_entry *fib6_entry;
3191 		u32 id;
3192 	};
3193 };
3194 
3195 static bool
3196 mlxsw_sp_nexthop6_group_has_nexthop(const struct mlxsw_sp_nexthop_group *nh_grp,
3197 				    const struct in6_addr *gw, int ifindex,
3198 				    int weight)
3199 {
3200 	int i;
3201 
3202 	for (i = 0; i < nh_grp->nhgi->count; i++) {
3203 		const struct mlxsw_sp_nexthop *nh;
3204 
3205 		nh = &nh_grp->nhgi->nexthops[i];
3206 		if (nh->ifindex == ifindex && nh->nh_weight == weight &&
3207 		    ipv6_addr_equal(gw, (struct in6_addr *) nh->gw_addr))
3208 			return true;
3209 	}
3210 
3211 	return false;
3212 }
3213 
3214 static bool
3215 mlxsw_sp_nexthop6_group_cmp(const struct mlxsw_sp_nexthop_group *nh_grp,
3216 			    const struct mlxsw_sp_fib6_entry *fib6_entry)
3217 {
3218 	struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
3219 
3220 	if (nh_grp->nhgi->count != fib6_entry->nrt6)
3221 		return false;
3222 
3223 	list_for_each_entry(mlxsw_sp_rt6, &fib6_entry->rt6_list, list) {
3224 		struct fib6_nh *fib6_nh = mlxsw_sp_rt6->rt->fib6_nh;
3225 		struct in6_addr *gw;
3226 		int ifindex, weight;
3227 
3228 		ifindex = fib6_nh->fib_nh_dev->ifindex;
3229 		weight = fib6_nh->fib_nh_weight;
3230 		gw = &fib6_nh->fib_nh_gw6;
3231 		if (!mlxsw_sp_nexthop6_group_has_nexthop(nh_grp, gw, ifindex,
3232 							 weight))
3233 			return false;
3234 	}
3235 
3236 	return true;
3237 }
3238 
3239 static int
3240 mlxsw_sp_nexthop_group_cmp(struct rhashtable_compare_arg *arg, const void *ptr)
3241 {
3242 	const struct mlxsw_sp_nexthop_group_cmp_arg *cmp_arg = arg->key;
3243 	const struct mlxsw_sp_nexthop_group *nh_grp = ptr;
3244 
3245 	if (nh_grp->type != cmp_arg->type)
3246 		return 1;
3247 
3248 	switch (cmp_arg->type) {
3249 	case MLXSW_SP_NEXTHOP_GROUP_TYPE_IPV4:
3250 		return cmp_arg->fi != nh_grp->ipv4.fi;
3251 	case MLXSW_SP_NEXTHOP_GROUP_TYPE_IPV6:
3252 		return !mlxsw_sp_nexthop6_group_cmp(nh_grp,
3253 						    cmp_arg->fib6_entry);
3254 	case MLXSW_SP_NEXTHOP_GROUP_TYPE_OBJ:
3255 		return cmp_arg->id != nh_grp->obj.id;
3256 	default:
3257 		WARN_ON(1);
3258 		return 1;
3259 	}
3260 }
3261 
3262 static u32 mlxsw_sp_nexthop_group_hash_obj(const void *data, u32 len, u32 seed)
3263 {
3264 	const struct mlxsw_sp_nexthop_group *nh_grp = data;
3265 	const struct mlxsw_sp_nexthop *nh;
3266 	struct fib_info *fi;
3267 	unsigned int val;
3268 	int i;
3269 
3270 	switch (nh_grp->type) {
3271 	case MLXSW_SP_NEXTHOP_GROUP_TYPE_IPV4:
3272 		fi = nh_grp->ipv4.fi;
3273 		return jhash(&fi, sizeof(fi), seed);
3274 	case MLXSW_SP_NEXTHOP_GROUP_TYPE_IPV6:
3275 		val = nh_grp->nhgi->count;
3276 		for (i = 0; i < nh_grp->nhgi->count; i++) {
3277 			nh = &nh_grp->nhgi->nexthops[i];
3278 			val ^= jhash(&nh->ifindex, sizeof(nh->ifindex), seed);
3279 			val ^= jhash(&nh->gw_addr, sizeof(nh->gw_addr), seed);
3280 		}
3281 		return jhash(&val, sizeof(val), seed);
3282 	case MLXSW_SP_NEXTHOP_GROUP_TYPE_OBJ:
3283 		return jhash(&nh_grp->obj.id, sizeof(nh_grp->obj.id), seed);
3284 	default:
3285 		WARN_ON(1);
3286 		return 0;
3287 	}
3288 }
3289 
3290 static u32
3291 mlxsw_sp_nexthop6_group_hash(struct mlxsw_sp_fib6_entry *fib6_entry, u32 seed)
3292 {
3293 	unsigned int val = fib6_entry->nrt6;
3294 	struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
3295 
3296 	list_for_each_entry(mlxsw_sp_rt6, &fib6_entry->rt6_list, list) {
3297 		struct fib6_nh *fib6_nh = mlxsw_sp_rt6->rt->fib6_nh;
3298 		struct net_device *dev = fib6_nh->fib_nh_dev;
3299 		struct in6_addr *gw = &fib6_nh->fib_nh_gw6;
3300 
3301 		val ^= jhash(&dev->ifindex, sizeof(dev->ifindex), seed);
3302 		val ^= jhash(gw, sizeof(*gw), seed);
3303 	}
3304 
3305 	return jhash(&val, sizeof(val), seed);
3306 }
3307 
3308 static u32
3309 mlxsw_sp_nexthop_group_hash(const void *data, u32 len, u32 seed)
3310 {
3311 	const struct mlxsw_sp_nexthop_group_cmp_arg *cmp_arg = data;
3312 
3313 	switch (cmp_arg->type) {
3314 	case MLXSW_SP_NEXTHOP_GROUP_TYPE_IPV4:
3315 		return jhash(&cmp_arg->fi, sizeof(cmp_arg->fi), seed);
3316 	case MLXSW_SP_NEXTHOP_GROUP_TYPE_IPV6:
3317 		return mlxsw_sp_nexthop6_group_hash(cmp_arg->fib6_entry, seed);
3318 	case MLXSW_SP_NEXTHOP_GROUP_TYPE_OBJ:
3319 		return jhash(&cmp_arg->id, sizeof(cmp_arg->id), seed);
3320 	default:
3321 		WARN_ON(1);
3322 		return 0;
3323 	}
3324 }
3325 
3326 static const struct rhashtable_params mlxsw_sp_nexthop_group_ht_params = {
3327 	.head_offset = offsetof(struct mlxsw_sp_nexthop_group, ht_node),
3328 	.hashfn	     = mlxsw_sp_nexthop_group_hash,
3329 	.obj_hashfn  = mlxsw_sp_nexthop_group_hash_obj,
3330 	.obj_cmpfn   = mlxsw_sp_nexthop_group_cmp,
3331 };
3332 
3333 static int mlxsw_sp_nexthop_group_insert(struct mlxsw_sp *mlxsw_sp,
3334 					 struct mlxsw_sp_nexthop_group *nh_grp)
3335 {
3336 	if (nh_grp->type == MLXSW_SP_NEXTHOP_GROUP_TYPE_IPV6 &&
3337 	    !nh_grp->nhgi->gateway)
3338 		return 0;
3339 
3340 	return rhashtable_insert_fast(&mlxsw_sp->router->nexthop_group_ht,
3341 				      &nh_grp->ht_node,
3342 				      mlxsw_sp_nexthop_group_ht_params);
3343 }
3344 
3345 static void mlxsw_sp_nexthop_group_remove(struct mlxsw_sp *mlxsw_sp,
3346 					  struct mlxsw_sp_nexthop_group *nh_grp)
3347 {
3348 	if (nh_grp->type == MLXSW_SP_NEXTHOP_GROUP_TYPE_IPV6 &&
3349 	    !nh_grp->nhgi->gateway)
3350 		return;
3351 
3352 	rhashtable_remove_fast(&mlxsw_sp->router->nexthop_group_ht,
3353 			       &nh_grp->ht_node,
3354 			       mlxsw_sp_nexthop_group_ht_params);
3355 }
3356 
3357 static struct mlxsw_sp_nexthop_group *
3358 mlxsw_sp_nexthop4_group_lookup(struct mlxsw_sp *mlxsw_sp,
3359 			       struct fib_info *fi)
3360 {
3361 	struct mlxsw_sp_nexthop_group_cmp_arg cmp_arg;
3362 
3363 	cmp_arg.type = MLXSW_SP_NEXTHOP_GROUP_TYPE_IPV4;
3364 	cmp_arg.fi = fi;
3365 	return rhashtable_lookup_fast(&mlxsw_sp->router->nexthop_group_ht,
3366 				      &cmp_arg,
3367 				      mlxsw_sp_nexthop_group_ht_params);
3368 }
3369 
3370 static struct mlxsw_sp_nexthop_group *
3371 mlxsw_sp_nexthop6_group_lookup(struct mlxsw_sp *mlxsw_sp,
3372 			       struct mlxsw_sp_fib6_entry *fib6_entry)
3373 {
3374 	struct mlxsw_sp_nexthop_group_cmp_arg cmp_arg;
3375 
3376 	cmp_arg.type = MLXSW_SP_NEXTHOP_GROUP_TYPE_IPV6;
3377 	cmp_arg.fib6_entry = fib6_entry;
3378 	return rhashtable_lookup_fast(&mlxsw_sp->router->nexthop_group_ht,
3379 				      &cmp_arg,
3380 				      mlxsw_sp_nexthop_group_ht_params);
3381 }
3382 
3383 static const struct rhashtable_params mlxsw_sp_nexthop_ht_params = {
3384 	.key_offset = offsetof(struct mlxsw_sp_nexthop, key),
3385 	.head_offset = offsetof(struct mlxsw_sp_nexthop, ht_node),
3386 	.key_len = sizeof(struct mlxsw_sp_nexthop_key),
3387 };
3388 
3389 static int mlxsw_sp_nexthop_insert(struct mlxsw_sp *mlxsw_sp,
3390 				   struct mlxsw_sp_nexthop *nh)
3391 {
3392 	return rhashtable_insert_fast(&mlxsw_sp->router->nexthop_ht,
3393 				      &nh->ht_node, mlxsw_sp_nexthop_ht_params);
3394 }
3395 
3396 static void mlxsw_sp_nexthop_remove(struct mlxsw_sp *mlxsw_sp,
3397 				    struct mlxsw_sp_nexthop *nh)
3398 {
3399 	rhashtable_remove_fast(&mlxsw_sp->router->nexthop_ht, &nh->ht_node,
3400 			       mlxsw_sp_nexthop_ht_params);
3401 }
3402 
3403 static struct mlxsw_sp_nexthop *
3404 mlxsw_sp_nexthop_lookup(struct mlxsw_sp *mlxsw_sp,
3405 			struct mlxsw_sp_nexthop_key key)
3406 {
3407 	return rhashtable_lookup_fast(&mlxsw_sp->router->nexthop_ht, &key,
3408 				      mlxsw_sp_nexthop_ht_params);
3409 }
3410 
3411 static int mlxsw_sp_adj_index_mass_update_vr(struct mlxsw_sp *mlxsw_sp,
3412 					     enum mlxsw_sp_l3proto proto,
3413 					     u16 vr_id,
3414 					     u32 adj_index, u16 ecmp_size,
3415 					     u32 new_adj_index,
3416 					     u16 new_ecmp_size)
3417 {
3418 	char raleu_pl[MLXSW_REG_RALEU_LEN];
3419 
3420 	mlxsw_reg_raleu_pack(raleu_pl,
3421 			     (enum mlxsw_reg_ralxx_protocol) proto, vr_id,
3422 			     adj_index, ecmp_size, new_adj_index,
3423 			     new_ecmp_size);
3424 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(raleu), raleu_pl);
3425 }
3426 
3427 static int mlxsw_sp_adj_index_mass_update(struct mlxsw_sp *mlxsw_sp,
3428 					  struct mlxsw_sp_nexthop_group *nh_grp,
3429 					  u32 old_adj_index, u16 old_ecmp_size)
3430 {
3431 	struct mlxsw_sp_nexthop_group_info *nhgi = nh_grp->nhgi;
3432 	struct mlxsw_sp_nexthop_group_vr_entry *vr_entry;
3433 	int err;
3434 
3435 	list_for_each_entry(vr_entry, &nh_grp->vr_list, list) {
3436 		err = mlxsw_sp_adj_index_mass_update_vr(mlxsw_sp,
3437 							vr_entry->key.proto,
3438 							vr_entry->key.vr_id,
3439 							old_adj_index,
3440 							old_ecmp_size,
3441 							nhgi->adj_index,
3442 							nhgi->ecmp_size);
3443 		if (err)
3444 			goto err_mass_update_vr;
3445 	}
3446 	return 0;
3447 
3448 err_mass_update_vr:
3449 	list_for_each_entry_continue_reverse(vr_entry, &nh_grp->vr_list, list)
3450 		mlxsw_sp_adj_index_mass_update_vr(mlxsw_sp, vr_entry->key.proto,
3451 						  vr_entry->key.vr_id,
3452 						  nhgi->adj_index,
3453 						  nhgi->ecmp_size,
3454 						  old_adj_index, old_ecmp_size);
3455 	return err;
3456 }
3457 
3458 static int __mlxsw_sp_nexthop_eth_update(struct mlxsw_sp *mlxsw_sp,
3459 					 u32 adj_index,
3460 					 struct mlxsw_sp_nexthop *nh,
3461 					 bool force, char *ratr_pl)
3462 {
3463 	struct mlxsw_sp_neigh_entry *neigh_entry = nh->neigh_entry;
3464 	enum mlxsw_reg_ratr_op op;
3465 	u16 rif_index;
3466 
3467 	rif_index = nh->rif ? nh->rif->rif_index :
3468 			      mlxsw_sp->router->lb_rif_index;
3469 	op = force ? MLXSW_REG_RATR_OP_WRITE_WRITE_ENTRY :
3470 		     MLXSW_REG_RATR_OP_WRITE_WRITE_ENTRY_ON_ACTIVITY;
3471 	mlxsw_reg_ratr_pack(ratr_pl, op, true, MLXSW_REG_RATR_TYPE_ETHERNET,
3472 			    adj_index, rif_index);
3473 	switch (nh->action) {
3474 	case MLXSW_SP_NEXTHOP_ACTION_FORWARD:
3475 		mlxsw_reg_ratr_eth_entry_pack(ratr_pl, neigh_entry->ha);
3476 		break;
3477 	case MLXSW_SP_NEXTHOP_ACTION_DISCARD:
3478 		mlxsw_reg_ratr_trap_action_set(ratr_pl,
3479 					       MLXSW_REG_RATR_TRAP_ACTION_DISCARD_ERRORS);
3480 		break;
3481 	case MLXSW_SP_NEXTHOP_ACTION_TRAP:
3482 		mlxsw_reg_ratr_trap_action_set(ratr_pl,
3483 					       MLXSW_REG_RATR_TRAP_ACTION_TRAP);
3484 		mlxsw_reg_ratr_trap_id_set(ratr_pl, MLXSW_TRAP_ID_RTR_EGRESS0);
3485 		break;
3486 	default:
3487 		WARN_ON_ONCE(1);
3488 		return -EINVAL;
3489 	}
3490 	if (nh->counter_valid)
3491 		mlxsw_reg_ratr_counter_pack(ratr_pl, nh->counter_index, true);
3492 	else
3493 		mlxsw_reg_ratr_counter_pack(ratr_pl, 0, false);
3494 
3495 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ratr), ratr_pl);
3496 }
3497 
3498 int mlxsw_sp_nexthop_eth_update(struct mlxsw_sp *mlxsw_sp, u32 adj_index,
3499 				struct mlxsw_sp_nexthop *nh, bool force,
3500 				char *ratr_pl)
3501 {
3502 	int i;
3503 
3504 	for (i = 0; i < nh->num_adj_entries; i++) {
3505 		int err;
3506 
3507 		err = __mlxsw_sp_nexthop_eth_update(mlxsw_sp, adj_index + i,
3508 						    nh, force, ratr_pl);
3509 		if (err)
3510 			return err;
3511 	}
3512 
3513 	return 0;
3514 }
3515 
3516 static int __mlxsw_sp_nexthop_ipip_update(struct mlxsw_sp *mlxsw_sp,
3517 					  u32 adj_index,
3518 					  struct mlxsw_sp_nexthop *nh,
3519 					  bool force, char *ratr_pl)
3520 {
3521 	const struct mlxsw_sp_ipip_ops *ipip_ops;
3522 
3523 	ipip_ops = mlxsw_sp->router->ipip_ops_arr[nh->ipip_entry->ipipt];
3524 	return ipip_ops->nexthop_update(mlxsw_sp, adj_index, nh->ipip_entry,
3525 					force, ratr_pl);
3526 }
3527 
3528 static int mlxsw_sp_nexthop_ipip_update(struct mlxsw_sp *mlxsw_sp,
3529 					u32 adj_index,
3530 					struct mlxsw_sp_nexthop *nh, bool force,
3531 					char *ratr_pl)
3532 {
3533 	int i;
3534 
3535 	for (i = 0; i < nh->num_adj_entries; i++) {
3536 		int err;
3537 
3538 		err = __mlxsw_sp_nexthop_ipip_update(mlxsw_sp, adj_index + i,
3539 						     nh, force, ratr_pl);
3540 		if (err)
3541 			return err;
3542 	}
3543 
3544 	return 0;
3545 }
3546 
3547 static int mlxsw_sp_nexthop_update(struct mlxsw_sp *mlxsw_sp, u32 adj_index,
3548 				   struct mlxsw_sp_nexthop *nh, bool force,
3549 				   char *ratr_pl)
3550 {
3551 	/* When action is discard or trap, the nexthop must be
3552 	 * programmed as an Ethernet nexthop.
3553 	 */
3554 	if (nh->type == MLXSW_SP_NEXTHOP_TYPE_ETH ||
3555 	    nh->action == MLXSW_SP_NEXTHOP_ACTION_DISCARD ||
3556 	    nh->action == MLXSW_SP_NEXTHOP_ACTION_TRAP)
3557 		return mlxsw_sp_nexthop_eth_update(mlxsw_sp, adj_index, nh,
3558 						   force, ratr_pl);
3559 	else
3560 		return mlxsw_sp_nexthop_ipip_update(mlxsw_sp, adj_index, nh,
3561 						    force, ratr_pl);
3562 }
3563 
3564 static int
3565 mlxsw_sp_nexthop_group_update(struct mlxsw_sp *mlxsw_sp,
3566 			      struct mlxsw_sp_nexthop_group_info *nhgi,
3567 			      bool reallocate)
3568 {
3569 	char ratr_pl[MLXSW_REG_RATR_LEN];
3570 	u32 adj_index = nhgi->adj_index; /* base */
3571 	struct mlxsw_sp_nexthop *nh;
3572 	int i;
3573 
3574 	for (i = 0; i < nhgi->count; i++) {
3575 		nh = &nhgi->nexthops[i];
3576 
3577 		if (!nh->should_offload) {
3578 			nh->offloaded = 0;
3579 			continue;
3580 		}
3581 
3582 		if (nh->update || reallocate) {
3583 			int err = 0;
3584 
3585 			err = mlxsw_sp_nexthop_update(mlxsw_sp, adj_index, nh,
3586 						      true, ratr_pl);
3587 			if (err)
3588 				return err;
3589 			nh->update = 0;
3590 			nh->offloaded = 1;
3591 		}
3592 		adj_index += nh->num_adj_entries;
3593 	}
3594 	return 0;
3595 }
3596 
3597 static int
3598 mlxsw_sp_nexthop_fib_entries_update(struct mlxsw_sp *mlxsw_sp,
3599 				    struct mlxsw_sp_nexthop_group *nh_grp)
3600 {
3601 	struct mlxsw_sp_fib_entry *fib_entry;
3602 	int err;
3603 
3604 	list_for_each_entry(fib_entry, &nh_grp->fib_list, nexthop_group_node) {
3605 		err = mlxsw_sp_fib_entry_update(mlxsw_sp, fib_entry);
3606 		if (err)
3607 			return err;
3608 	}
3609 	return 0;
3610 }
3611 
3612 struct mlxsw_sp_adj_grp_size_range {
3613 	u16 start; /* Inclusive */
3614 	u16 end; /* Inclusive */
3615 };
3616 
3617 /* Ordered by range start value */
3618 static const struct mlxsw_sp_adj_grp_size_range
3619 mlxsw_sp1_adj_grp_size_ranges[] = {
3620 	{ .start = 1, .end = 64 },
3621 	{ .start = 512, .end = 512 },
3622 	{ .start = 1024, .end = 1024 },
3623 	{ .start = 2048, .end = 2048 },
3624 	{ .start = 4096, .end = 4096 },
3625 };
3626 
3627 /* Ordered by range start value */
3628 static const struct mlxsw_sp_adj_grp_size_range
3629 mlxsw_sp2_adj_grp_size_ranges[] = {
3630 	{ .start = 1, .end = 128 },
3631 	{ .start = 256, .end = 256 },
3632 	{ .start = 512, .end = 512 },
3633 	{ .start = 1024, .end = 1024 },
3634 	{ .start = 2048, .end = 2048 },
3635 	{ .start = 4096, .end = 4096 },
3636 };
3637 
3638 static void mlxsw_sp_adj_grp_size_round_up(const struct mlxsw_sp *mlxsw_sp,
3639 					   u16 *p_adj_grp_size)
3640 {
3641 	int i;
3642 
3643 	for (i = 0; i < mlxsw_sp->router->adj_grp_size_ranges_count; i++) {
3644 		const struct mlxsw_sp_adj_grp_size_range *size_range;
3645 
3646 		size_range = &mlxsw_sp->router->adj_grp_size_ranges[i];
3647 
3648 		if (*p_adj_grp_size >= size_range->start &&
3649 		    *p_adj_grp_size <= size_range->end)
3650 			return;
3651 
3652 		if (*p_adj_grp_size <= size_range->end) {
3653 			*p_adj_grp_size = size_range->end;
3654 			return;
3655 		}
3656 	}
3657 }
3658 
3659 static void mlxsw_sp_adj_grp_size_round_down(const struct mlxsw_sp *mlxsw_sp,
3660 					     u16 *p_adj_grp_size,
3661 					     unsigned int alloc_size)
3662 {
3663 	int i;
3664 
3665 	for (i = mlxsw_sp->router->adj_grp_size_ranges_count - 1; i >= 0; i--) {
3666 		const struct mlxsw_sp_adj_grp_size_range *size_range;
3667 
3668 		size_range = &mlxsw_sp->router->adj_grp_size_ranges[i];
3669 
3670 		if (alloc_size >= size_range->end) {
3671 			*p_adj_grp_size = size_range->end;
3672 			return;
3673 		}
3674 	}
3675 }
3676 
3677 static int mlxsw_sp_fix_adj_grp_size(struct mlxsw_sp *mlxsw_sp,
3678 				     u16 *p_adj_grp_size)
3679 {
3680 	unsigned int alloc_size;
3681 	int err;
3682 
3683 	/* Round up the requested group size to the next size supported
3684 	 * by the device and make sure the request can be satisfied.
3685 	 */
3686 	mlxsw_sp_adj_grp_size_round_up(mlxsw_sp, p_adj_grp_size);
3687 	err = mlxsw_sp_kvdl_alloc_count_query(mlxsw_sp,
3688 					      MLXSW_SP_KVDL_ENTRY_TYPE_ADJ,
3689 					      *p_adj_grp_size, &alloc_size);
3690 	if (err)
3691 		return err;
3692 	/* It is possible the allocation results in more allocated
3693 	 * entries than requested. Try to use as much of them as
3694 	 * possible.
3695 	 */
3696 	mlxsw_sp_adj_grp_size_round_down(mlxsw_sp, p_adj_grp_size, alloc_size);
3697 
3698 	return 0;
3699 }
3700 
3701 static void
3702 mlxsw_sp_nexthop_group_normalize(struct mlxsw_sp_nexthop_group_info *nhgi)
3703 {
3704 	int i, g = 0, sum_norm_weight = 0;
3705 	struct mlxsw_sp_nexthop *nh;
3706 
3707 	for (i = 0; i < nhgi->count; i++) {
3708 		nh = &nhgi->nexthops[i];
3709 
3710 		if (!nh->should_offload)
3711 			continue;
3712 		if (g > 0)
3713 			g = gcd(nh->nh_weight, g);
3714 		else
3715 			g = nh->nh_weight;
3716 	}
3717 
3718 	for (i = 0; i < nhgi->count; i++) {
3719 		nh = &nhgi->nexthops[i];
3720 
3721 		if (!nh->should_offload)
3722 			continue;
3723 		nh->norm_nh_weight = nh->nh_weight / g;
3724 		sum_norm_weight += nh->norm_nh_weight;
3725 	}
3726 
3727 	nhgi->sum_norm_weight = sum_norm_weight;
3728 }
3729 
3730 static void
3731 mlxsw_sp_nexthop_group_rebalance(struct mlxsw_sp_nexthop_group_info *nhgi)
3732 {
3733 	int i, weight = 0, lower_bound = 0;
3734 	int total = nhgi->sum_norm_weight;
3735 	u16 ecmp_size = nhgi->ecmp_size;
3736 
3737 	for (i = 0; i < nhgi->count; i++) {
3738 		struct mlxsw_sp_nexthop *nh = &nhgi->nexthops[i];
3739 		int upper_bound;
3740 
3741 		if (!nh->should_offload)
3742 			continue;
3743 		weight += nh->norm_nh_weight;
3744 		upper_bound = DIV_ROUND_CLOSEST(ecmp_size * weight, total);
3745 		nh->num_adj_entries = upper_bound - lower_bound;
3746 		lower_bound = upper_bound;
3747 	}
3748 }
3749 
3750 static struct mlxsw_sp_nexthop *
3751 mlxsw_sp_rt6_nexthop(struct mlxsw_sp_nexthop_group *nh_grp,
3752 		     const struct mlxsw_sp_rt6 *mlxsw_sp_rt6);
3753 
3754 static void
3755 mlxsw_sp_nexthop4_group_offload_refresh(struct mlxsw_sp *mlxsw_sp,
3756 					struct mlxsw_sp_nexthop_group *nh_grp)
3757 {
3758 	int i;
3759 
3760 	for (i = 0; i < nh_grp->nhgi->count; i++) {
3761 		struct mlxsw_sp_nexthop *nh = &nh_grp->nhgi->nexthops[i];
3762 
3763 		if (nh->offloaded)
3764 			nh->key.fib_nh->fib_nh_flags |= RTNH_F_OFFLOAD;
3765 		else
3766 			nh->key.fib_nh->fib_nh_flags &= ~RTNH_F_OFFLOAD;
3767 	}
3768 }
3769 
3770 static void
3771 __mlxsw_sp_nexthop6_group_offload_refresh(struct mlxsw_sp_nexthop_group *nh_grp,
3772 					  struct mlxsw_sp_fib6_entry *fib6_entry)
3773 {
3774 	struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
3775 
3776 	list_for_each_entry(mlxsw_sp_rt6, &fib6_entry->rt6_list, list) {
3777 		struct fib6_nh *fib6_nh = mlxsw_sp_rt6->rt->fib6_nh;
3778 		struct mlxsw_sp_nexthop *nh;
3779 
3780 		nh = mlxsw_sp_rt6_nexthop(nh_grp, mlxsw_sp_rt6);
3781 		if (nh && nh->offloaded)
3782 			fib6_nh->fib_nh_flags |= RTNH_F_OFFLOAD;
3783 		else
3784 			fib6_nh->fib_nh_flags &= ~RTNH_F_OFFLOAD;
3785 	}
3786 }
3787 
3788 static void
3789 mlxsw_sp_nexthop6_group_offload_refresh(struct mlxsw_sp *mlxsw_sp,
3790 					struct mlxsw_sp_nexthop_group *nh_grp)
3791 {
3792 	struct mlxsw_sp_fib6_entry *fib6_entry;
3793 
3794 	/* Unfortunately, in IPv6 the route and the nexthop are described by
3795 	 * the same struct, so we need to iterate over all the routes using the
3796 	 * nexthop group and set / clear the offload indication for them.
3797 	 */
3798 	list_for_each_entry(fib6_entry, &nh_grp->fib_list,
3799 			    common.nexthop_group_node)
3800 		__mlxsw_sp_nexthop6_group_offload_refresh(nh_grp, fib6_entry);
3801 }
3802 
3803 static void
3804 mlxsw_sp_nexthop_bucket_offload_refresh(struct mlxsw_sp *mlxsw_sp,
3805 					const struct mlxsw_sp_nexthop *nh,
3806 					u16 bucket_index)
3807 {
3808 	struct mlxsw_sp_nexthop_group *nh_grp = nh->nhgi->nh_grp;
3809 	bool offload = false, trap = false;
3810 
3811 	if (nh->offloaded) {
3812 		if (nh->action == MLXSW_SP_NEXTHOP_ACTION_TRAP)
3813 			trap = true;
3814 		else
3815 			offload = true;
3816 	}
3817 	nexthop_bucket_set_hw_flags(mlxsw_sp_net(mlxsw_sp), nh_grp->obj.id,
3818 				    bucket_index, offload, trap);
3819 }
3820 
3821 static void
3822 mlxsw_sp_nexthop_obj_group_offload_refresh(struct mlxsw_sp *mlxsw_sp,
3823 					   struct mlxsw_sp_nexthop_group *nh_grp)
3824 {
3825 	int i;
3826 
3827 	/* Do not update the flags if the nexthop group is being destroyed
3828 	 * since:
3829 	 * 1. The nexthop objects is being deleted, in which case the flags are
3830 	 * irrelevant.
3831 	 * 2. The nexthop group was replaced by a newer group, in which case
3832 	 * the flags of the nexthop object were already updated based on the
3833 	 * new group.
3834 	 */
3835 	if (nh_grp->can_destroy)
3836 		return;
3837 
3838 	nexthop_set_hw_flags(mlxsw_sp_net(mlxsw_sp), nh_grp->obj.id,
3839 			     nh_grp->nhgi->adj_index_valid, false);
3840 
3841 	/* Update flags of individual nexthop buckets in case of a resilient
3842 	 * nexthop group.
3843 	 */
3844 	if (!nh_grp->nhgi->is_resilient)
3845 		return;
3846 
3847 	for (i = 0; i < nh_grp->nhgi->count; i++) {
3848 		struct mlxsw_sp_nexthop *nh = &nh_grp->nhgi->nexthops[i];
3849 
3850 		mlxsw_sp_nexthop_bucket_offload_refresh(mlxsw_sp, nh, i);
3851 	}
3852 }
3853 
3854 static void
3855 mlxsw_sp_nexthop_group_offload_refresh(struct mlxsw_sp *mlxsw_sp,
3856 				       struct mlxsw_sp_nexthop_group *nh_grp)
3857 {
3858 	switch (nh_grp->type) {
3859 	case MLXSW_SP_NEXTHOP_GROUP_TYPE_IPV4:
3860 		mlxsw_sp_nexthop4_group_offload_refresh(mlxsw_sp, nh_grp);
3861 		break;
3862 	case MLXSW_SP_NEXTHOP_GROUP_TYPE_IPV6:
3863 		mlxsw_sp_nexthop6_group_offload_refresh(mlxsw_sp, nh_grp);
3864 		break;
3865 	case MLXSW_SP_NEXTHOP_GROUP_TYPE_OBJ:
3866 		mlxsw_sp_nexthop_obj_group_offload_refresh(mlxsw_sp, nh_grp);
3867 		break;
3868 	}
3869 }
3870 
3871 static int
3872 mlxsw_sp_nexthop_group_refresh(struct mlxsw_sp *mlxsw_sp,
3873 			       struct mlxsw_sp_nexthop_group *nh_grp)
3874 {
3875 	struct mlxsw_sp_nexthop_group_info *nhgi = nh_grp->nhgi;
3876 	u16 ecmp_size, old_ecmp_size;
3877 	struct mlxsw_sp_nexthop *nh;
3878 	bool offload_change = false;
3879 	u32 adj_index;
3880 	bool old_adj_index_valid;
3881 	u32 old_adj_index;
3882 	int i, err2, err;
3883 
3884 	if (!nhgi->gateway)
3885 		return mlxsw_sp_nexthop_fib_entries_update(mlxsw_sp, nh_grp);
3886 
3887 	for (i = 0; i < nhgi->count; i++) {
3888 		nh = &nhgi->nexthops[i];
3889 
3890 		if (nh->should_offload != nh->offloaded) {
3891 			offload_change = true;
3892 			if (nh->should_offload)
3893 				nh->update = 1;
3894 		}
3895 	}
3896 	if (!offload_change) {
3897 		/* Nothing was added or removed, so no need to reallocate. Just
3898 		 * update MAC on existing adjacency indexes.
3899 		 */
3900 		err = mlxsw_sp_nexthop_group_update(mlxsw_sp, nhgi, false);
3901 		if (err) {
3902 			dev_warn(mlxsw_sp->bus_info->dev, "Failed to update neigh MAC in adjacency table.\n");
3903 			goto set_trap;
3904 		}
3905 		/* Flags of individual nexthop buckets might need to be
3906 		 * updated.
3907 		 */
3908 		mlxsw_sp_nexthop_group_offload_refresh(mlxsw_sp, nh_grp);
3909 		return 0;
3910 	}
3911 	mlxsw_sp_nexthop_group_normalize(nhgi);
3912 	if (!nhgi->sum_norm_weight) {
3913 		/* No neigh of this group is connected so we just set
3914 		 * the trap and let everthing flow through kernel.
3915 		 */
3916 		err = 0;
3917 		goto set_trap;
3918 	}
3919 
3920 	ecmp_size = nhgi->sum_norm_weight;
3921 	err = mlxsw_sp_fix_adj_grp_size(mlxsw_sp, &ecmp_size);
3922 	if (err)
3923 		/* No valid allocation size available. */
3924 		goto set_trap;
3925 
3926 	err = mlxsw_sp_kvdl_alloc(mlxsw_sp, MLXSW_SP_KVDL_ENTRY_TYPE_ADJ,
3927 				  ecmp_size, &adj_index);
3928 	if (err) {
3929 		/* We ran out of KVD linear space, just set the
3930 		 * trap and let everything flow through kernel.
3931 		 */
3932 		dev_warn(mlxsw_sp->bus_info->dev, "Failed to allocate KVD linear area for nexthop group.\n");
3933 		goto set_trap;
3934 	}
3935 	old_adj_index_valid = nhgi->adj_index_valid;
3936 	old_adj_index = nhgi->adj_index;
3937 	old_ecmp_size = nhgi->ecmp_size;
3938 	nhgi->adj_index_valid = 1;
3939 	nhgi->adj_index = adj_index;
3940 	nhgi->ecmp_size = ecmp_size;
3941 	mlxsw_sp_nexthop_group_rebalance(nhgi);
3942 	err = mlxsw_sp_nexthop_group_update(mlxsw_sp, nhgi, true);
3943 	if (err) {
3944 		dev_warn(mlxsw_sp->bus_info->dev, "Failed to update neigh MAC in adjacency table.\n");
3945 		goto set_trap;
3946 	}
3947 
3948 	mlxsw_sp_nexthop_group_offload_refresh(mlxsw_sp, nh_grp);
3949 
3950 	if (!old_adj_index_valid) {
3951 		/* The trap was set for fib entries, so we have to call
3952 		 * fib entry update to unset it and use adjacency index.
3953 		 */
3954 		err = mlxsw_sp_nexthop_fib_entries_update(mlxsw_sp, nh_grp);
3955 		if (err) {
3956 			dev_warn(mlxsw_sp->bus_info->dev, "Failed to add adjacency index to fib entries.\n");
3957 			goto set_trap;
3958 		}
3959 		return 0;
3960 	}
3961 
3962 	err = mlxsw_sp_adj_index_mass_update(mlxsw_sp, nh_grp,
3963 					     old_adj_index, old_ecmp_size);
3964 	mlxsw_sp_kvdl_free(mlxsw_sp, MLXSW_SP_KVDL_ENTRY_TYPE_ADJ,
3965 			   old_ecmp_size, old_adj_index);
3966 	if (err) {
3967 		dev_warn(mlxsw_sp->bus_info->dev, "Failed to mass-update adjacency index for nexthop group.\n");
3968 		goto set_trap;
3969 	}
3970 
3971 	return 0;
3972 
3973 set_trap:
3974 	old_adj_index_valid = nhgi->adj_index_valid;
3975 	nhgi->adj_index_valid = 0;
3976 	for (i = 0; i < nhgi->count; i++) {
3977 		nh = &nhgi->nexthops[i];
3978 		nh->offloaded = 0;
3979 	}
3980 	err2 = mlxsw_sp_nexthop_fib_entries_update(mlxsw_sp, nh_grp);
3981 	if (err2)
3982 		dev_warn(mlxsw_sp->bus_info->dev, "Failed to set traps for fib entries.\n");
3983 	mlxsw_sp_nexthop_group_offload_refresh(mlxsw_sp, nh_grp);
3984 	if (old_adj_index_valid)
3985 		mlxsw_sp_kvdl_free(mlxsw_sp, MLXSW_SP_KVDL_ENTRY_TYPE_ADJ,
3986 				   nhgi->ecmp_size, nhgi->adj_index);
3987 	return err;
3988 }
3989 
3990 static void __mlxsw_sp_nexthop_neigh_update(struct mlxsw_sp_nexthop *nh,
3991 					    bool removing)
3992 {
3993 	if (!removing) {
3994 		nh->action = MLXSW_SP_NEXTHOP_ACTION_FORWARD;
3995 		nh->should_offload = 1;
3996 	} else if (nh->nhgi->is_resilient) {
3997 		nh->action = MLXSW_SP_NEXTHOP_ACTION_TRAP;
3998 		nh->should_offload = 1;
3999 	} else {
4000 		nh->should_offload = 0;
4001 	}
4002 	nh->update = 1;
4003 }
4004 
4005 static int
4006 mlxsw_sp_nexthop_dead_neigh_replace(struct mlxsw_sp *mlxsw_sp,
4007 				    struct mlxsw_sp_neigh_entry *neigh_entry)
4008 {
4009 	struct neighbour *n, *old_n = neigh_entry->key.n;
4010 	struct mlxsw_sp_nexthop *nh;
4011 	bool entry_connected;
4012 	u8 nud_state, dead;
4013 	int err;
4014 
4015 	nh = list_first_entry(&neigh_entry->nexthop_list,
4016 			      struct mlxsw_sp_nexthop, neigh_list_node);
4017 
4018 	n = neigh_lookup(nh->neigh_tbl, &nh->gw_addr, nh->rif->dev);
4019 	if (!n) {
4020 		n = neigh_create(nh->neigh_tbl, &nh->gw_addr, nh->rif->dev);
4021 		if (IS_ERR(n))
4022 			return PTR_ERR(n);
4023 		neigh_event_send(n, NULL);
4024 	}
4025 
4026 	mlxsw_sp_neigh_entry_remove(mlxsw_sp, neigh_entry);
4027 	neigh_entry->key.n = n;
4028 	err = mlxsw_sp_neigh_entry_insert(mlxsw_sp, neigh_entry);
4029 	if (err)
4030 		goto err_neigh_entry_insert;
4031 
4032 	read_lock_bh(&n->lock);
4033 	nud_state = n->nud_state;
4034 	dead = n->dead;
4035 	read_unlock_bh(&n->lock);
4036 	entry_connected = nud_state & NUD_VALID && !dead;
4037 
4038 	list_for_each_entry(nh, &neigh_entry->nexthop_list,
4039 			    neigh_list_node) {
4040 		neigh_release(old_n);
4041 		neigh_clone(n);
4042 		__mlxsw_sp_nexthop_neigh_update(nh, !entry_connected);
4043 		mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh->nhgi->nh_grp);
4044 	}
4045 
4046 	neigh_release(n);
4047 
4048 	return 0;
4049 
4050 err_neigh_entry_insert:
4051 	neigh_entry->key.n = old_n;
4052 	mlxsw_sp_neigh_entry_insert(mlxsw_sp, neigh_entry);
4053 	neigh_release(n);
4054 	return err;
4055 }
4056 
4057 static void
4058 mlxsw_sp_nexthop_neigh_update(struct mlxsw_sp *mlxsw_sp,
4059 			      struct mlxsw_sp_neigh_entry *neigh_entry,
4060 			      bool removing, bool dead)
4061 {
4062 	struct mlxsw_sp_nexthop *nh;
4063 
4064 	if (list_empty(&neigh_entry->nexthop_list))
4065 		return;
4066 
4067 	if (dead) {
4068 		int err;
4069 
4070 		err = mlxsw_sp_nexthop_dead_neigh_replace(mlxsw_sp,
4071 							  neigh_entry);
4072 		if (err)
4073 			dev_err(mlxsw_sp->bus_info->dev, "Failed to replace dead neigh\n");
4074 		return;
4075 	}
4076 
4077 	list_for_each_entry(nh, &neigh_entry->nexthop_list,
4078 			    neigh_list_node) {
4079 		__mlxsw_sp_nexthop_neigh_update(nh, removing);
4080 		mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh->nhgi->nh_grp);
4081 	}
4082 }
4083 
4084 static void mlxsw_sp_nexthop_rif_init(struct mlxsw_sp_nexthop *nh,
4085 				      struct mlxsw_sp_rif *rif)
4086 {
4087 	if (nh->rif)
4088 		return;
4089 
4090 	nh->rif = rif;
4091 	list_add(&nh->rif_list_node, &rif->nexthop_list);
4092 }
4093 
4094 static void mlxsw_sp_nexthop_rif_fini(struct mlxsw_sp_nexthop *nh)
4095 {
4096 	if (!nh->rif)
4097 		return;
4098 
4099 	list_del(&nh->rif_list_node);
4100 	nh->rif = NULL;
4101 }
4102 
4103 static int mlxsw_sp_nexthop_neigh_init(struct mlxsw_sp *mlxsw_sp,
4104 				       struct mlxsw_sp_nexthop *nh)
4105 {
4106 	struct mlxsw_sp_neigh_entry *neigh_entry;
4107 	struct neighbour *n;
4108 	u8 nud_state, dead;
4109 	int err;
4110 
4111 	if (!nh->nhgi->gateway || nh->neigh_entry)
4112 		return 0;
4113 
4114 	/* Take a reference of neigh here ensuring that neigh would
4115 	 * not be destructed before the nexthop entry is finished.
4116 	 * The reference is taken either in neigh_lookup() or
4117 	 * in neigh_create() in case n is not found.
4118 	 */
4119 	n = neigh_lookup(nh->neigh_tbl, &nh->gw_addr, nh->rif->dev);
4120 	if (!n) {
4121 		n = neigh_create(nh->neigh_tbl, &nh->gw_addr, nh->rif->dev);
4122 		if (IS_ERR(n))
4123 			return PTR_ERR(n);
4124 		neigh_event_send(n, NULL);
4125 	}
4126 	neigh_entry = mlxsw_sp_neigh_entry_lookup(mlxsw_sp, n);
4127 	if (!neigh_entry) {
4128 		neigh_entry = mlxsw_sp_neigh_entry_create(mlxsw_sp, n);
4129 		if (IS_ERR(neigh_entry)) {
4130 			err = -EINVAL;
4131 			goto err_neigh_entry_create;
4132 		}
4133 	}
4134 
4135 	/* If that is the first nexthop connected to that neigh, add to
4136 	 * nexthop_neighs_list
4137 	 */
4138 	if (list_empty(&neigh_entry->nexthop_list))
4139 		list_add_tail(&neigh_entry->nexthop_neighs_list_node,
4140 			      &mlxsw_sp->router->nexthop_neighs_list);
4141 
4142 	nh->neigh_entry = neigh_entry;
4143 	list_add_tail(&nh->neigh_list_node, &neigh_entry->nexthop_list);
4144 	read_lock_bh(&n->lock);
4145 	nud_state = n->nud_state;
4146 	dead = n->dead;
4147 	read_unlock_bh(&n->lock);
4148 	__mlxsw_sp_nexthop_neigh_update(nh, !(nud_state & NUD_VALID && !dead));
4149 
4150 	return 0;
4151 
4152 err_neigh_entry_create:
4153 	neigh_release(n);
4154 	return err;
4155 }
4156 
4157 static void mlxsw_sp_nexthop_neigh_fini(struct mlxsw_sp *mlxsw_sp,
4158 					struct mlxsw_sp_nexthop *nh)
4159 {
4160 	struct mlxsw_sp_neigh_entry *neigh_entry = nh->neigh_entry;
4161 	struct neighbour *n;
4162 
4163 	if (!neigh_entry)
4164 		return;
4165 	n = neigh_entry->key.n;
4166 
4167 	__mlxsw_sp_nexthop_neigh_update(nh, true);
4168 	list_del(&nh->neigh_list_node);
4169 	nh->neigh_entry = NULL;
4170 
4171 	/* If that is the last nexthop connected to that neigh, remove from
4172 	 * nexthop_neighs_list
4173 	 */
4174 	if (list_empty(&neigh_entry->nexthop_list))
4175 		list_del(&neigh_entry->nexthop_neighs_list_node);
4176 
4177 	if (!neigh_entry->connected && list_empty(&neigh_entry->nexthop_list))
4178 		mlxsw_sp_neigh_entry_destroy(mlxsw_sp, neigh_entry);
4179 
4180 	neigh_release(n);
4181 }
4182 
4183 static bool mlxsw_sp_ipip_netdev_ul_up(struct net_device *ol_dev)
4184 {
4185 	struct net_device *ul_dev;
4186 	bool is_up;
4187 
4188 	rcu_read_lock();
4189 	ul_dev = mlxsw_sp_ipip_netdev_ul_dev_get(ol_dev);
4190 	is_up = ul_dev ? (ul_dev->flags & IFF_UP) : true;
4191 	rcu_read_unlock();
4192 
4193 	return is_up;
4194 }
4195 
4196 static void mlxsw_sp_nexthop_ipip_init(struct mlxsw_sp *mlxsw_sp,
4197 				       struct mlxsw_sp_nexthop *nh,
4198 				       struct mlxsw_sp_ipip_entry *ipip_entry)
4199 {
4200 	bool removing;
4201 
4202 	if (!nh->nhgi->gateway || nh->ipip_entry)
4203 		return;
4204 
4205 	nh->ipip_entry = ipip_entry;
4206 	removing = !mlxsw_sp_ipip_netdev_ul_up(ipip_entry->ol_dev);
4207 	__mlxsw_sp_nexthop_neigh_update(nh, removing);
4208 	mlxsw_sp_nexthop_rif_init(nh, &ipip_entry->ol_lb->common);
4209 }
4210 
4211 static void mlxsw_sp_nexthop_ipip_fini(struct mlxsw_sp *mlxsw_sp,
4212 				       struct mlxsw_sp_nexthop *nh)
4213 {
4214 	struct mlxsw_sp_ipip_entry *ipip_entry = nh->ipip_entry;
4215 
4216 	if (!ipip_entry)
4217 		return;
4218 
4219 	__mlxsw_sp_nexthop_neigh_update(nh, true);
4220 	nh->ipip_entry = NULL;
4221 }
4222 
4223 static bool mlxsw_sp_nexthop4_ipip_type(const struct mlxsw_sp *mlxsw_sp,
4224 					const struct fib_nh *fib_nh,
4225 					enum mlxsw_sp_ipip_type *p_ipipt)
4226 {
4227 	struct net_device *dev = fib_nh->fib_nh_dev;
4228 
4229 	return dev &&
4230 	       fib_nh->nh_parent->fib_type == RTN_UNICAST &&
4231 	       mlxsw_sp_netdev_ipip_type(mlxsw_sp, dev, p_ipipt);
4232 }
4233 
4234 static int mlxsw_sp_nexthop_type_init(struct mlxsw_sp *mlxsw_sp,
4235 				      struct mlxsw_sp_nexthop *nh,
4236 				      const struct net_device *dev)
4237 {
4238 	const struct mlxsw_sp_ipip_ops *ipip_ops;
4239 	struct mlxsw_sp_ipip_entry *ipip_entry;
4240 	struct mlxsw_sp_rif *rif;
4241 	int err;
4242 
4243 	ipip_entry = mlxsw_sp_ipip_entry_find_by_ol_dev(mlxsw_sp, dev);
4244 	if (ipip_entry) {
4245 		ipip_ops = mlxsw_sp->router->ipip_ops_arr[ipip_entry->ipipt];
4246 		if (ipip_ops->can_offload(mlxsw_sp, dev)) {
4247 			nh->type = MLXSW_SP_NEXTHOP_TYPE_IPIP;
4248 			mlxsw_sp_nexthop_ipip_init(mlxsw_sp, nh, ipip_entry);
4249 			return 0;
4250 		}
4251 	}
4252 
4253 	nh->type = MLXSW_SP_NEXTHOP_TYPE_ETH;
4254 	rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev);
4255 	if (!rif)
4256 		return 0;
4257 
4258 	mlxsw_sp_nexthop_rif_init(nh, rif);
4259 	err = mlxsw_sp_nexthop_neigh_init(mlxsw_sp, nh);
4260 	if (err)
4261 		goto err_neigh_init;
4262 
4263 	return 0;
4264 
4265 err_neigh_init:
4266 	mlxsw_sp_nexthop_rif_fini(nh);
4267 	return err;
4268 }
4269 
4270 static void mlxsw_sp_nexthop_type_fini(struct mlxsw_sp *mlxsw_sp,
4271 				       struct mlxsw_sp_nexthop *nh)
4272 {
4273 	switch (nh->type) {
4274 	case MLXSW_SP_NEXTHOP_TYPE_ETH:
4275 		mlxsw_sp_nexthop_neigh_fini(mlxsw_sp, nh);
4276 		mlxsw_sp_nexthop_rif_fini(nh);
4277 		break;
4278 	case MLXSW_SP_NEXTHOP_TYPE_IPIP:
4279 		mlxsw_sp_nexthop_rif_fini(nh);
4280 		mlxsw_sp_nexthop_ipip_fini(mlxsw_sp, nh);
4281 		break;
4282 	}
4283 }
4284 
4285 static int mlxsw_sp_nexthop4_init(struct mlxsw_sp *mlxsw_sp,
4286 				  struct mlxsw_sp_nexthop_group *nh_grp,
4287 				  struct mlxsw_sp_nexthop *nh,
4288 				  struct fib_nh *fib_nh)
4289 {
4290 	struct net_device *dev = fib_nh->fib_nh_dev;
4291 	struct in_device *in_dev;
4292 	int err;
4293 
4294 	nh->nhgi = nh_grp->nhgi;
4295 	nh->key.fib_nh = fib_nh;
4296 #ifdef CONFIG_IP_ROUTE_MULTIPATH
4297 	nh->nh_weight = fib_nh->fib_nh_weight;
4298 #else
4299 	nh->nh_weight = 1;
4300 #endif
4301 	memcpy(&nh->gw_addr, &fib_nh->fib_nh_gw4, sizeof(fib_nh->fib_nh_gw4));
4302 	nh->neigh_tbl = &arp_tbl;
4303 	err = mlxsw_sp_nexthop_insert(mlxsw_sp, nh);
4304 	if (err)
4305 		return err;
4306 
4307 	mlxsw_sp_nexthop_counter_alloc(mlxsw_sp, nh);
4308 	list_add_tail(&nh->router_list_node, &mlxsw_sp->router->nexthop_list);
4309 
4310 	if (!dev)
4311 		return 0;
4312 	nh->ifindex = dev->ifindex;
4313 
4314 	rcu_read_lock();
4315 	in_dev = __in_dev_get_rcu(dev);
4316 	if (in_dev && IN_DEV_IGNORE_ROUTES_WITH_LINKDOWN(in_dev) &&
4317 	    fib_nh->fib_nh_flags & RTNH_F_LINKDOWN) {
4318 		rcu_read_unlock();
4319 		return 0;
4320 	}
4321 	rcu_read_unlock();
4322 
4323 	err = mlxsw_sp_nexthop_type_init(mlxsw_sp, nh, dev);
4324 	if (err)
4325 		goto err_nexthop_neigh_init;
4326 
4327 	return 0;
4328 
4329 err_nexthop_neigh_init:
4330 	list_del(&nh->router_list_node);
4331 	mlxsw_sp_nexthop_counter_free(mlxsw_sp, nh);
4332 	mlxsw_sp_nexthop_remove(mlxsw_sp, nh);
4333 	return err;
4334 }
4335 
4336 static void mlxsw_sp_nexthop4_fini(struct mlxsw_sp *mlxsw_sp,
4337 				   struct mlxsw_sp_nexthop *nh)
4338 {
4339 	mlxsw_sp_nexthop_type_fini(mlxsw_sp, nh);
4340 	list_del(&nh->router_list_node);
4341 	mlxsw_sp_nexthop_counter_free(mlxsw_sp, nh);
4342 	mlxsw_sp_nexthop_remove(mlxsw_sp, nh);
4343 }
4344 
4345 static void mlxsw_sp_nexthop4_event(struct mlxsw_sp *mlxsw_sp,
4346 				    unsigned long event, struct fib_nh *fib_nh)
4347 {
4348 	struct mlxsw_sp_nexthop_key key;
4349 	struct mlxsw_sp_nexthop *nh;
4350 
4351 	key.fib_nh = fib_nh;
4352 	nh = mlxsw_sp_nexthop_lookup(mlxsw_sp, key);
4353 	if (!nh)
4354 		return;
4355 
4356 	switch (event) {
4357 	case FIB_EVENT_NH_ADD:
4358 		mlxsw_sp_nexthop_type_init(mlxsw_sp, nh, fib_nh->fib_nh_dev);
4359 		break;
4360 	case FIB_EVENT_NH_DEL:
4361 		mlxsw_sp_nexthop_type_fini(mlxsw_sp, nh);
4362 		break;
4363 	}
4364 
4365 	mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh->nhgi->nh_grp);
4366 }
4367 
4368 static void mlxsw_sp_nexthop_rif_update(struct mlxsw_sp *mlxsw_sp,
4369 					struct mlxsw_sp_rif *rif)
4370 {
4371 	struct mlxsw_sp_nexthop *nh;
4372 	bool removing;
4373 
4374 	list_for_each_entry(nh, &rif->nexthop_list, rif_list_node) {
4375 		switch (nh->type) {
4376 		case MLXSW_SP_NEXTHOP_TYPE_ETH:
4377 			removing = false;
4378 			break;
4379 		case MLXSW_SP_NEXTHOP_TYPE_IPIP:
4380 			removing = !mlxsw_sp_ipip_netdev_ul_up(rif->dev);
4381 			break;
4382 		default:
4383 			WARN_ON(1);
4384 			continue;
4385 		}
4386 
4387 		__mlxsw_sp_nexthop_neigh_update(nh, removing);
4388 		mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh->nhgi->nh_grp);
4389 	}
4390 }
4391 
4392 static void mlxsw_sp_nexthop_rif_migrate(struct mlxsw_sp *mlxsw_sp,
4393 					 struct mlxsw_sp_rif *old_rif,
4394 					 struct mlxsw_sp_rif *new_rif)
4395 {
4396 	struct mlxsw_sp_nexthop *nh;
4397 
4398 	list_splice_init(&old_rif->nexthop_list, &new_rif->nexthop_list);
4399 	list_for_each_entry(nh, &new_rif->nexthop_list, rif_list_node)
4400 		nh->rif = new_rif;
4401 	mlxsw_sp_nexthop_rif_update(mlxsw_sp, new_rif);
4402 }
4403 
4404 static void mlxsw_sp_nexthop_rif_gone_sync(struct mlxsw_sp *mlxsw_sp,
4405 					   struct mlxsw_sp_rif *rif)
4406 {
4407 	struct mlxsw_sp_nexthop *nh, *tmp;
4408 
4409 	list_for_each_entry_safe(nh, tmp, &rif->nexthop_list, rif_list_node) {
4410 		mlxsw_sp_nexthop_type_fini(mlxsw_sp, nh);
4411 		mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh->nhgi->nh_grp);
4412 	}
4413 }
4414 
4415 static int mlxsw_sp_adj_trap_entry_init(struct mlxsw_sp *mlxsw_sp)
4416 {
4417 	enum mlxsw_reg_ratr_trap_action trap_action;
4418 	char ratr_pl[MLXSW_REG_RATR_LEN];
4419 	int err;
4420 
4421 	err = mlxsw_sp_kvdl_alloc(mlxsw_sp, MLXSW_SP_KVDL_ENTRY_TYPE_ADJ, 1,
4422 				  &mlxsw_sp->router->adj_trap_index);
4423 	if (err)
4424 		return err;
4425 
4426 	trap_action = MLXSW_REG_RATR_TRAP_ACTION_TRAP;
4427 	mlxsw_reg_ratr_pack(ratr_pl, MLXSW_REG_RATR_OP_WRITE_WRITE_ENTRY, true,
4428 			    MLXSW_REG_RATR_TYPE_ETHERNET,
4429 			    mlxsw_sp->router->adj_trap_index,
4430 			    mlxsw_sp->router->lb_rif_index);
4431 	mlxsw_reg_ratr_trap_action_set(ratr_pl, trap_action);
4432 	mlxsw_reg_ratr_trap_id_set(ratr_pl, MLXSW_TRAP_ID_RTR_EGRESS0);
4433 	err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ratr), ratr_pl);
4434 	if (err)
4435 		goto err_ratr_write;
4436 
4437 	return 0;
4438 
4439 err_ratr_write:
4440 	mlxsw_sp_kvdl_free(mlxsw_sp, MLXSW_SP_KVDL_ENTRY_TYPE_ADJ, 1,
4441 			   mlxsw_sp->router->adj_trap_index);
4442 	return err;
4443 }
4444 
4445 static void mlxsw_sp_adj_trap_entry_fini(struct mlxsw_sp *mlxsw_sp)
4446 {
4447 	mlxsw_sp_kvdl_free(mlxsw_sp, MLXSW_SP_KVDL_ENTRY_TYPE_ADJ, 1,
4448 			   mlxsw_sp->router->adj_trap_index);
4449 }
4450 
4451 static int mlxsw_sp_nexthop_group_inc(struct mlxsw_sp *mlxsw_sp)
4452 {
4453 	int err;
4454 
4455 	if (refcount_inc_not_zero(&mlxsw_sp->router->num_groups))
4456 		return 0;
4457 
4458 	err = mlxsw_sp_adj_trap_entry_init(mlxsw_sp);
4459 	if (err)
4460 		return err;
4461 
4462 	refcount_set(&mlxsw_sp->router->num_groups, 1);
4463 
4464 	return 0;
4465 }
4466 
4467 static void mlxsw_sp_nexthop_group_dec(struct mlxsw_sp *mlxsw_sp)
4468 {
4469 	if (!refcount_dec_and_test(&mlxsw_sp->router->num_groups))
4470 		return;
4471 
4472 	mlxsw_sp_adj_trap_entry_fini(mlxsw_sp);
4473 }
4474 
4475 static void
4476 mlxsw_sp_nh_grp_activity_get(struct mlxsw_sp *mlxsw_sp,
4477 			     const struct mlxsw_sp_nexthop_group *nh_grp,
4478 			     unsigned long *activity)
4479 {
4480 	char *ratrad_pl;
4481 	int i, err;
4482 
4483 	ratrad_pl = kmalloc(MLXSW_REG_RATRAD_LEN, GFP_KERNEL);
4484 	if (!ratrad_pl)
4485 		return;
4486 
4487 	mlxsw_reg_ratrad_pack(ratrad_pl, nh_grp->nhgi->adj_index,
4488 			      nh_grp->nhgi->count);
4489 	err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(ratrad), ratrad_pl);
4490 	if (err)
4491 		goto out;
4492 
4493 	for (i = 0; i < nh_grp->nhgi->count; i++) {
4494 		if (!mlxsw_reg_ratrad_activity_vector_get(ratrad_pl, i))
4495 			continue;
4496 		bitmap_set(activity, i, 1);
4497 	}
4498 
4499 out:
4500 	kfree(ratrad_pl);
4501 }
4502 
4503 #define MLXSW_SP_NH_GRP_ACTIVITY_UPDATE_INTERVAL 1000 /* ms */
4504 
4505 static void
4506 mlxsw_sp_nh_grp_activity_update(struct mlxsw_sp *mlxsw_sp,
4507 				const struct mlxsw_sp_nexthop_group *nh_grp)
4508 {
4509 	unsigned long *activity;
4510 
4511 	activity = bitmap_zalloc(nh_grp->nhgi->count, GFP_KERNEL);
4512 	if (!activity)
4513 		return;
4514 
4515 	mlxsw_sp_nh_grp_activity_get(mlxsw_sp, nh_grp, activity);
4516 	nexthop_res_grp_activity_update(mlxsw_sp_net(mlxsw_sp), nh_grp->obj.id,
4517 					nh_grp->nhgi->count, activity);
4518 
4519 	bitmap_free(activity);
4520 }
4521 
4522 static void
4523 mlxsw_sp_nh_grp_activity_work_schedule(struct mlxsw_sp *mlxsw_sp)
4524 {
4525 	unsigned int interval = MLXSW_SP_NH_GRP_ACTIVITY_UPDATE_INTERVAL;
4526 
4527 	mlxsw_core_schedule_dw(&mlxsw_sp->router->nh_grp_activity_dw,
4528 			       msecs_to_jiffies(interval));
4529 }
4530 
4531 static void mlxsw_sp_nh_grp_activity_work(struct work_struct *work)
4532 {
4533 	struct mlxsw_sp_nexthop_group_info *nhgi;
4534 	struct mlxsw_sp_router *router;
4535 	bool reschedule = false;
4536 
4537 	router = container_of(work, struct mlxsw_sp_router,
4538 			      nh_grp_activity_dw.work);
4539 
4540 	mutex_lock(&router->lock);
4541 
4542 	list_for_each_entry(nhgi, &router->nh_res_grp_list, list) {
4543 		mlxsw_sp_nh_grp_activity_update(router->mlxsw_sp, nhgi->nh_grp);
4544 		reschedule = true;
4545 	}
4546 
4547 	mutex_unlock(&router->lock);
4548 
4549 	if (!reschedule)
4550 		return;
4551 	mlxsw_sp_nh_grp_activity_work_schedule(router->mlxsw_sp);
4552 }
4553 
4554 static int
4555 mlxsw_sp_nexthop_obj_single_validate(struct mlxsw_sp *mlxsw_sp,
4556 				     const struct nh_notifier_single_info *nh,
4557 				     struct netlink_ext_ack *extack)
4558 {
4559 	int err = -EINVAL;
4560 
4561 	if (nh->is_fdb)
4562 		NL_SET_ERR_MSG_MOD(extack, "FDB nexthops are not supported");
4563 	else if (nh->has_encap)
4564 		NL_SET_ERR_MSG_MOD(extack, "Encapsulating nexthops are not supported");
4565 	else
4566 		err = 0;
4567 
4568 	return err;
4569 }
4570 
4571 static int
4572 mlxsw_sp_nexthop_obj_group_entry_validate(struct mlxsw_sp *mlxsw_sp,
4573 					  const struct nh_notifier_single_info *nh,
4574 					  struct netlink_ext_ack *extack)
4575 {
4576 	int err;
4577 
4578 	err = mlxsw_sp_nexthop_obj_single_validate(mlxsw_sp, nh, extack);
4579 	if (err)
4580 		return err;
4581 
4582 	/* Device only nexthops with an IPIP device are programmed as
4583 	 * encapsulating adjacency entries.
4584 	 */
4585 	if (!nh->gw_family && !nh->is_reject &&
4586 	    !mlxsw_sp_netdev_ipip_type(mlxsw_sp, nh->dev, NULL)) {
4587 		NL_SET_ERR_MSG_MOD(extack, "Nexthop group entry does not have a gateway");
4588 		return -EINVAL;
4589 	}
4590 
4591 	return 0;
4592 }
4593 
4594 static int
4595 mlxsw_sp_nexthop_obj_group_validate(struct mlxsw_sp *mlxsw_sp,
4596 				    const struct nh_notifier_grp_info *nh_grp,
4597 				    struct netlink_ext_ack *extack)
4598 {
4599 	int i;
4600 
4601 	if (nh_grp->is_fdb) {
4602 		NL_SET_ERR_MSG_MOD(extack, "FDB nexthop groups are not supported");
4603 		return -EINVAL;
4604 	}
4605 
4606 	for (i = 0; i < nh_grp->num_nh; i++) {
4607 		const struct nh_notifier_single_info *nh;
4608 		int err;
4609 
4610 		nh = &nh_grp->nh_entries[i].nh;
4611 		err = mlxsw_sp_nexthop_obj_group_entry_validate(mlxsw_sp, nh,
4612 								extack);
4613 		if (err)
4614 			return err;
4615 	}
4616 
4617 	return 0;
4618 }
4619 
4620 static int
4621 mlxsw_sp_nexthop_obj_res_group_size_validate(struct mlxsw_sp *mlxsw_sp,
4622 					     const struct nh_notifier_res_table_info *nh_res_table,
4623 					     struct netlink_ext_ack *extack)
4624 {
4625 	unsigned int alloc_size;
4626 	bool valid_size = false;
4627 	int err, i;
4628 
4629 	if (nh_res_table->num_nh_buckets < 32) {
4630 		NL_SET_ERR_MSG_MOD(extack, "Minimum number of buckets is 32");
4631 		return -EINVAL;
4632 	}
4633 
4634 	for (i = 0; i < mlxsw_sp->router->adj_grp_size_ranges_count; i++) {
4635 		const struct mlxsw_sp_adj_grp_size_range *size_range;
4636 
4637 		size_range = &mlxsw_sp->router->adj_grp_size_ranges[i];
4638 
4639 		if (nh_res_table->num_nh_buckets >= size_range->start &&
4640 		    nh_res_table->num_nh_buckets <= size_range->end) {
4641 			valid_size = true;
4642 			break;
4643 		}
4644 	}
4645 
4646 	if (!valid_size) {
4647 		NL_SET_ERR_MSG_MOD(extack, "Invalid number of buckets");
4648 		return -EINVAL;
4649 	}
4650 
4651 	err = mlxsw_sp_kvdl_alloc_count_query(mlxsw_sp,
4652 					      MLXSW_SP_KVDL_ENTRY_TYPE_ADJ,
4653 					      nh_res_table->num_nh_buckets,
4654 					      &alloc_size);
4655 	if (err || nh_res_table->num_nh_buckets != alloc_size) {
4656 		NL_SET_ERR_MSG_MOD(extack, "Number of buckets does not fit allocation size of any KVDL partition");
4657 		return -EINVAL;
4658 	}
4659 
4660 	return 0;
4661 }
4662 
4663 static int
4664 mlxsw_sp_nexthop_obj_res_group_validate(struct mlxsw_sp *mlxsw_sp,
4665 					const struct nh_notifier_res_table_info *nh_res_table,
4666 					struct netlink_ext_ack *extack)
4667 {
4668 	int err;
4669 	u16 i;
4670 
4671 	err = mlxsw_sp_nexthop_obj_res_group_size_validate(mlxsw_sp,
4672 							   nh_res_table,
4673 							   extack);
4674 	if (err)
4675 		return err;
4676 
4677 	for (i = 0; i < nh_res_table->num_nh_buckets; i++) {
4678 		const struct nh_notifier_single_info *nh;
4679 		int err;
4680 
4681 		nh = &nh_res_table->nhs[i];
4682 		err = mlxsw_sp_nexthop_obj_group_entry_validate(mlxsw_sp, nh,
4683 								extack);
4684 		if (err)
4685 			return err;
4686 	}
4687 
4688 	return 0;
4689 }
4690 
4691 static int mlxsw_sp_nexthop_obj_validate(struct mlxsw_sp *mlxsw_sp,
4692 					 unsigned long event,
4693 					 struct nh_notifier_info *info)
4694 {
4695 	struct nh_notifier_single_info *nh;
4696 
4697 	if (event != NEXTHOP_EVENT_REPLACE &&
4698 	    event != NEXTHOP_EVENT_RES_TABLE_PRE_REPLACE &&
4699 	    event != NEXTHOP_EVENT_BUCKET_REPLACE)
4700 		return 0;
4701 
4702 	switch (info->type) {
4703 	case NH_NOTIFIER_INFO_TYPE_SINGLE:
4704 		return mlxsw_sp_nexthop_obj_single_validate(mlxsw_sp, info->nh,
4705 							    info->extack);
4706 	case NH_NOTIFIER_INFO_TYPE_GRP:
4707 		return mlxsw_sp_nexthop_obj_group_validate(mlxsw_sp,
4708 							   info->nh_grp,
4709 							   info->extack);
4710 	case NH_NOTIFIER_INFO_TYPE_RES_TABLE:
4711 		return mlxsw_sp_nexthop_obj_res_group_validate(mlxsw_sp,
4712 							       info->nh_res_table,
4713 							       info->extack);
4714 	case NH_NOTIFIER_INFO_TYPE_RES_BUCKET:
4715 		nh = &info->nh_res_bucket->new_nh;
4716 		return mlxsw_sp_nexthop_obj_group_entry_validate(mlxsw_sp, nh,
4717 								 info->extack);
4718 	default:
4719 		NL_SET_ERR_MSG_MOD(info->extack, "Unsupported nexthop type");
4720 		return -EOPNOTSUPP;
4721 	}
4722 }
4723 
4724 static bool mlxsw_sp_nexthop_obj_is_gateway(struct mlxsw_sp *mlxsw_sp,
4725 					    const struct nh_notifier_info *info)
4726 {
4727 	const struct net_device *dev;
4728 
4729 	switch (info->type) {
4730 	case NH_NOTIFIER_INFO_TYPE_SINGLE:
4731 		dev = info->nh->dev;
4732 		return info->nh->gw_family || info->nh->is_reject ||
4733 		       mlxsw_sp_netdev_ipip_type(mlxsw_sp, dev, NULL);
4734 	case NH_NOTIFIER_INFO_TYPE_GRP:
4735 	case NH_NOTIFIER_INFO_TYPE_RES_TABLE:
4736 		/* Already validated earlier. */
4737 		return true;
4738 	default:
4739 		return false;
4740 	}
4741 }
4742 
4743 static void mlxsw_sp_nexthop_obj_blackhole_init(struct mlxsw_sp *mlxsw_sp,
4744 						struct mlxsw_sp_nexthop *nh)
4745 {
4746 	u16 lb_rif_index = mlxsw_sp->router->lb_rif_index;
4747 
4748 	nh->action = MLXSW_SP_NEXTHOP_ACTION_DISCARD;
4749 	nh->should_offload = 1;
4750 	/* While nexthops that discard packets do not forward packets
4751 	 * via an egress RIF, they still need to be programmed using a
4752 	 * valid RIF, so use the loopback RIF created during init.
4753 	 */
4754 	nh->rif = mlxsw_sp->router->rifs[lb_rif_index];
4755 }
4756 
4757 static void mlxsw_sp_nexthop_obj_blackhole_fini(struct mlxsw_sp *mlxsw_sp,
4758 						struct mlxsw_sp_nexthop *nh)
4759 {
4760 	nh->rif = NULL;
4761 	nh->should_offload = 0;
4762 }
4763 
4764 static int
4765 mlxsw_sp_nexthop_obj_init(struct mlxsw_sp *mlxsw_sp,
4766 			  struct mlxsw_sp_nexthop_group *nh_grp,
4767 			  struct mlxsw_sp_nexthop *nh,
4768 			  struct nh_notifier_single_info *nh_obj, int weight)
4769 {
4770 	struct net_device *dev = nh_obj->dev;
4771 	int err;
4772 
4773 	nh->nhgi = nh_grp->nhgi;
4774 	nh->nh_weight = weight;
4775 
4776 	switch (nh_obj->gw_family) {
4777 	case AF_INET:
4778 		memcpy(&nh->gw_addr, &nh_obj->ipv4, sizeof(nh_obj->ipv4));
4779 		nh->neigh_tbl = &arp_tbl;
4780 		break;
4781 	case AF_INET6:
4782 		memcpy(&nh->gw_addr, &nh_obj->ipv6, sizeof(nh_obj->ipv6));
4783 #if IS_ENABLED(CONFIG_IPV6)
4784 		nh->neigh_tbl = &nd_tbl;
4785 #endif
4786 		break;
4787 	}
4788 
4789 	mlxsw_sp_nexthop_counter_alloc(mlxsw_sp, nh);
4790 	list_add_tail(&nh->router_list_node, &mlxsw_sp->router->nexthop_list);
4791 	nh->ifindex = dev->ifindex;
4792 
4793 	err = mlxsw_sp_nexthop_type_init(mlxsw_sp, nh, dev);
4794 	if (err)
4795 		goto err_type_init;
4796 
4797 	if (nh_obj->is_reject)
4798 		mlxsw_sp_nexthop_obj_blackhole_init(mlxsw_sp, nh);
4799 
4800 	/* In a resilient nexthop group, all the nexthops must be written to
4801 	 * the adjacency table. Even if they do not have a valid neighbour or
4802 	 * RIF.
4803 	 */
4804 	if (nh_grp->nhgi->is_resilient && !nh->should_offload) {
4805 		nh->action = MLXSW_SP_NEXTHOP_ACTION_TRAP;
4806 		nh->should_offload = 1;
4807 	}
4808 
4809 	return 0;
4810 
4811 err_type_init:
4812 	list_del(&nh->router_list_node);
4813 	mlxsw_sp_nexthop_counter_free(mlxsw_sp, nh);
4814 	return err;
4815 }
4816 
4817 static void mlxsw_sp_nexthop_obj_fini(struct mlxsw_sp *mlxsw_sp,
4818 				      struct mlxsw_sp_nexthop *nh)
4819 {
4820 	if (nh->action == MLXSW_SP_NEXTHOP_ACTION_DISCARD)
4821 		mlxsw_sp_nexthop_obj_blackhole_fini(mlxsw_sp, nh);
4822 	mlxsw_sp_nexthop_type_fini(mlxsw_sp, nh);
4823 	list_del(&nh->router_list_node);
4824 	mlxsw_sp_nexthop_counter_free(mlxsw_sp, nh);
4825 	nh->should_offload = 0;
4826 }
4827 
4828 static int
4829 mlxsw_sp_nexthop_obj_group_info_init(struct mlxsw_sp *mlxsw_sp,
4830 				     struct mlxsw_sp_nexthop_group *nh_grp,
4831 				     struct nh_notifier_info *info)
4832 {
4833 	struct mlxsw_sp_nexthop_group_info *nhgi;
4834 	struct mlxsw_sp_nexthop *nh;
4835 	bool is_resilient = false;
4836 	unsigned int nhs;
4837 	int err, i;
4838 
4839 	switch (info->type) {
4840 	case NH_NOTIFIER_INFO_TYPE_SINGLE:
4841 		nhs = 1;
4842 		break;
4843 	case NH_NOTIFIER_INFO_TYPE_GRP:
4844 		nhs = info->nh_grp->num_nh;
4845 		break;
4846 	case NH_NOTIFIER_INFO_TYPE_RES_TABLE:
4847 		nhs = info->nh_res_table->num_nh_buckets;
4848 		is_resilient = true;
4849 		break;
4850 	default:
4851 		return -EINVAL;
4852 	}
4853 
4854 	nhgi = kzalloc(struct_size(nhgi, nexthops, nhs), GFP_KERNEL);
4855 	if (!nhgi)
4856 		return -ENOMEM;
4857 	nh_grp->nhgi = nhgi;
4858 	nhgi->nh_grp = nh_grp;
4859 	nhgi->gateway = mlxsw_sp_nexthop_obj_is_gateway(mlxsw_sp, info);
4860 	nhgi->is_resilient = is_resilient;
4861 	nhgi->count = nhs;
4862 	for (i = 0; i < nhgi->count; i++) {
4863 		struct nh_notifier_single_info *nh_obj;
4864 		int weight;
4865 
4866 		nh = &nhgi->nexthops[i];
4867 		switch (info->type) {
4868 		case NH_NOTIFIER_INFO_TYPE_SINGLE:
4869 			nh_obj = info->nh;
4870 			weight = 1;
4871 			break;
4872 		case NH_NOTIFIER_INFO_TYPE_GRP:
4873 			nh_obj = &info->nh_grp->nh_entries[i].nh;
4874 			weight = info->nh_grp->nh_entries[i].weight;
4875 			break;
4876 		case NH_NOTIFIER_INFO_TYPE_RES_TABLE:
4877 			nh_obj = &info->nh_res_table->nhs[i];
4878 			weight = 1;
4879 			break;
4880 		default:
4881 			err = -EINVAL;
4882 			goto err_nexthop_obj_init;
4883 		}
4884 		err = mlxsw_sp_nexthop_obj_init(mlxsw_sp, nh_grp, nh, nh_obj,
4885 						weight);
4886 		if (err)
4887 			goto err_nexthop_obj_init;
4888 	}
4889 	err = mlxsw_sp_nexthop_group_inc(mlxsw_sp);
4890 	if (err)
4891 		goto err_group_inc;
4892 	err = mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh_grp);
4893 	if (err) {
4894 		NL_SET_ERR_MSG_MOD(info->extack, "Failed to write adjacency entries to the device");
4895 		goto err_group_refresh;
4896 	}
4897 
4898 	/* Add resilient nexthop groups to a list so that the activity of their
4899 	 * nexthop buckets will be periodically queried and cleared.
4900 	 */
4901 	if (nhgi->is_resilient) {
4902 		if (list_empty(&mlxsw_sp->router->nh_res_grp_list))
4903 			mlxsw_sp_nh_grp_activity_work_schedule(mlxsw_sp);
4904 		list_add(&nhgi->list, &mlxsw_sp->router->nh_res_grp_list);
4905 	}
4906 
4907 	return 0;
4908 
4909 err_group_refresh:
4910 	mlxsw_sp_nexthop_group_dec(mlxsw_sp);
4911 err_group_inc:
4912 	i = nhgi->count;
4913 err_nexthop_obj_init:
4914 	for (i--; i >= 0; i--) {
4915 		nh = &nhgi->nexthops[i];
4916 		mlxsw_sp_nexthop_obj_fini(mlxsw_sp, nh);
4917 	}
4918 	kfree(nhgi);
4919 	return err;
4920 }
4921 
4922 static void
4923 mlxsw_sp_nexthop_obj_group_info_fini(struct mlxsw_sp *mlxsw_sp,
4924 				     struct mlxsw_sp_nexthop_group *nh_grp)
4925 {
4926 	struct mlxsw_sp_nexthop_group_info *nhgi = nh_grp->nhgi;
4927 	struct mlxsw_sp_router *router = mlxsw_sp->router;
4928 	int i;
4929 
4930 	if (nhgi->is_resilient) {
4931 		list_del(&nhgi->list);
4932 		if (list_empty(&mlxsw_sp->router->nh_res_grp_list))
4933 			cancel_delayed_work(&router->nh_grp_activity_dw);
4934 	}
4935 
4936 	mlxsw_sp_nexthop_group_dec(mlxsw_sp);
4937 	for (i = nhgi->count - 1; i >= 0; i--) {
4938 		struct mlxsw_sp_nexthop *nh = &nhgi->nexthops[i];
4939 
4940 		mlxsw_sp_nexthop_obj_fini(mlxsw_sp, nh);
4941 	}
4942 	mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh_grp);
4943 	WARN_ON_ONCE(nhgi->adj_index_valid);
4944 	kfree(nhgi);
4945 }
4946 
4947 static struct mlxsw_sp_nexthop_group *
4948 mlxsw_sp_nexthop_obj_group_create(struct mlxsw_sp *mlxsw_sp,
4949 				  struct nh_notifier_info *info)
4950 {
4951 	struct mlxsw_sp_nexthop_group *nh_grp;
4952 	int err;
4953 
4954 	nh_grp = kzalloc(sizeof(*nh_grp), GFP_KERNEL);
4955 	if (!nh_grp)
4956 		return ERR_PTR(-ENOMEM);
4957 	INIT_LIST_HEAD(&nh_grp->vr_list);
4958 	err = rhashtable_init(&nh_grp->vr_ht,
4959 			      &mlxsw_sp_nexthop_group_vr_ht_params);
4960 	if (err)
4961 		goto err_nexthop_group_vr_ht_init;
4962 	INIT_LIST_HEAD(&nh_grp->fib_list);
4963 	nh_grp->type = MLXSW_SP_NEXTHOP_GROUP_TYPE_OBJ;
4964 	nh_grp->obj.id = info->id;
4965 
4966 	err = mlxsw_sp_nexthop_obj_group_info_init(mlxsw_sp, nh_grp, info);
4967 	if (err)
4968 		goto err_nexthop_group_info_init;
4969 
4970 	nh_grp->can_destroy = false;
4971 
4972 	return nh_grp;
4973 
4974 err_nexthop_group_info_init:
4975 	rhashtable_destroy(&nh_grp->vr_ht);
4976 err_nexthop_group_vr_ht_init:
4977 	kfree(nh_grp);
4978 	return ERR_PTR(err);
4979 }
4980 
4981 static void
4982 mlxsw_sp_nexthop_obj_group_destroy(struct mlxsw_sp *mlxsw_sp,
4983 				   struct mlxsw_sp_nexthop_group *nh_grp)
4984 {
4985 	if (!nh_grp->can_destroy)
4986 		return;
4987 	mlxsw_sp_nexthop_obj_group_info_fini(mlxsw_sp, nh_grp);
4988 	WARN_ON_ONCE(!list_empty(&nh_grp->fib_list));
4989 	WARN_ON_ONCE(!list_empty(&nh_grp->vr_list));
4990 	rhashtable_destroy(&nh_grp->vr_ht);
4991 	kfree(nh_grp);
4992 }
4993 
4994 static struct mlxsw_sp_nexthop_group *
4995 mlxsw_sp_nexthop_obj_group_lookup(struct mlxsw_sp *mlxsw_sp, u32 id)
4996 {
4997 	struct mlxsw_sp_nexthop_group_cmp_arg cmp_arg;
4998 
4999 	cmp_arg.type = MLXSW_SP_NEXTHOP_GROUP_TYPE_OBJ;
5000 	cmp_arg.id = id;
5001 	return rhashtable_lookup_fast(&mlxsw_sp->router->nexthop_group_ht,
5002 				      &cmp_arg,
5003 				      mlxsw_sp_nexthop_group_ht_params);
5004 }
5005 
5006 static int mlxsw_sp_nexthop_obj_group_add(struct mlxsw_sp *mlxsw_sp,
5007 					  struct mlxsw_sp_nexthop_group *nh_grp)
5008 {
5009 	return mlxsw_sp_nexthop_group_insert(mlxsw_sp, nh_grp);
5010 }
5011 
5012 static int
5013 mlxsw_sp_nexthop_obj_group_replace(struct mlxsw_sp *mlxsw_sp,
5014 				   struct mlxsw_sp_nexthop_group *nh_grp,
5015 				   struct mlxsw_sp_nexthop_group *old_nh_grp,
5016 				   struct netlink_ext_ack *extack)
5017 {
5018 	struct mlxsw_sp_nexthop_group_info *old_nhgi = old_nh_grp->nhgi;
5019 	struct mlxsw_sp_nexthop_group_info *new_nhgi = nh_grp->nhgi;
5020 	int err;
5021 
5022 	old_nh_grp->nhgi = new_nhgi;
5023 	new_nhgi->nh_grp = old_nh_grp;
5024 	nh_grp->nhgi = old_nhgi;
5025 	old_nhgi->nh_grp = nh_grp;
5026 
5027 	if (old_nhgi->adj_index_valid && new_nhgi->adj_index_valid) {
5028 		/* Both the old adjacency index and the new one are valid.
5029 		 * Routes are currently using the old one. Tell the device to
5030 		 * replace the old adjacency index with the new one.
5031 		 */
5032 		err = mlxsw_sp_adj_index_mass_update(mlxsw_sp, old_nh_grp,
5033 						     old_nhgi->adj_index,
5034 						     old_nhgi->ecmp_size);
5035 		if (err) {
5036 			NL_SET_ERR_MSG_MOD(extack, "Failed to replace old adjacency index with new one");
5037 			goto err_out;
5038 		}
5039 	} else if (old_nhgi->adj_index_valid && !new_nhgi->adj_index_valid) {
5040 		/* The old adjacency index is valid, while the new one is not.
5041 		 * Iterate over all the routes using the group and change them
5042 		 * to trap packets to the CPU.
5043 		 */
5044 		err = mlxsw_sp_nexthop_fib_entries_update(mlxsw_sp, old_nh_grp);
5045 		if (err) {
5046 			NL_SET_ERR_MSG_MOD(extack, "Failed to update routes to trap packets");
5047 			goto err_out;
5048 		}
5049 	} else if (!old_nhgi->adj_index_valid && new_nhgi->adj_index_valid) {
5050 		/* The old adjacency index is invalid, while the new one is.
5051 		 * Iterate over all the routes using the group and change them
5052 		 * to forward packets using the new valid index.
5053 		 */
5054 		err = mlxsw_sp_nexthop_fib_entries_update(mlxsw_sp, old_nh_grp);
5055 		if (err) {
5056 			NL_SET_ERR_MSG_MOD(extack, "Failed to update routes to forward packets");
5057 			goto err_out;
5058 		}
5059 	}
5060 
5061 	/* Make sure the flags are set / cleared based on the new nexthop group
5062 	 * information.
5063 	 */
5064 	mlxsw_sp_nexthop_obj_group_offload_refresh(mlxsw_sp, old_nh_grp);
5065 
5066 	/* At this point 'nh_grp' is just a shell that is not used by anyone
5067 	 * and its nexthop group info is the old info that was just replaced
5068 	 * with the new one. Remove it.
5069 	 */
5070 	nh_grp->can_destroy = true;
5071 	mlxsw_sp_nexthop_obj_group_destroy(mlxsw_sp, nh_grp);
5072 
5073 	return 0;
5074 
5075 err_out:
5076 	old_nhgi->nh_grp = old_nh_grp;
5077 	nh_grp->nhgi = new_nhgi;
5078 	new_nhgi->nh_grp = nh_grp;
5079 	old_nh_grp->nhgi = old_nhgi;
5080 	return err;
5081 }
5082 
5083 static int mlxsw_sp_nexthop_obj_new(struct mlxsw_sp *mlxsw_sp,
5084 				    struct nh_notifier_info *info)
5085 {
5086 	struct mlxsw_sp_nexthop_group *nh_grp, *old_nh_grp;
5087 	struct netlink_ext_ack *extack = info->extack;
5088 	int err;
5089 
5090 	nh_grp = mlxsw_sp_nexthop_obj_group_create(mlxsw_sp, info);
5091 	if (IS_ERR(nh_grp))
5092 		return PTR_ERR(nh_grp);
5093 
5094 	old_nh_grp = mlxsw_sp_nexthop_obj_group_lookup(mlxsw_sp, info->id);
5095 	if (!old_nh_grp)
5096 		err = mlxsw_sp_nexthop_obj_group_add(mlxsw_sp, nh_grp);
5097 	else
5098 		err = mlxsw_sp_nexthop_obj_group_replace(mlxsw_sp, nh_grp,
5099 							 old_nh_grp, extack);
5100 
5101 	if (err) {
5102 		nh_grp->can_destroy = true;
5103 		mlxsw_sp_nexthop_obj_group_destroy(mlxsw_sp, nh_grp);
5104 	}
5105 
5106 	return err;
5107 }
5108 
5109 static void mlxsw_sp_nexthop_obj_del(struct mlxsw_sp *mlxsw_sp,
5110 				     struct nh_notifier_info *info)
5111 {
5112 	struct mlxsw_sp_nexthop_group *nh_grp;
5113 
5114 	nh_grp = mlxsw_sp_nexthop_obj_group_lookup(mlxsw_sp, info->id);
5115 	if (!nh_grp)
5116 		return;
5117 
5118 	nh_grp->can_destroy = true;
5119 	mlxsw_sp_nexthop_group_remove(mlxsw_sp, nh_grp);
5120 
5121 	/* If the group still has routes using it, then defer the delete
5122 	 * operation until the last route using it is deleted.
5123 	 */
5124 	if (!list_empty(&nh_grp->fib_list))
5125 		return;
5126 	mlxsw_sp_nexthop_obj_group_destroy(mlxsw_sp, nh_grp);
5127 }
5128 
5129 static int mlxsw_sp_nexthop_obj_bucket_query(struct mlxsw_sp *mlxsw_sp,
5130 					     u32 adj_index, char *ratr_pl)
5131 {
5132 	MLXSW_REG_ZERO(ratr, ratr_pl);
5133 	mlxsw_reg_ratr_op_set(ratr_pl, MLXSW_REG_RATR_OP_QUERY_READ);
5134 	mlxsw_reg_ratr_adjacency_index_low_set(ratr_pl, adj_index);
5135 	mlxsw_reg_ratr_adjacency_index_high_set(ratr_pl, adj_index >> 16);
5136 
5137 	return mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(ratr), ratr_pl);
5138 }
5139 
5140 static int mlxsw_sp_nexthop_obj_bucket_compare(char *ratr_pl, char *ratr_pl_new)
5141 {
5142 	/* Clear the opcode and activity on both the old and new payload as
5143 	 * they are irrelevant for the comparison.
5144 	 */
5145 	mlxsw_reg_ratr_op_set(ratr_pl, MLXSW_REG_RATR_OP_QUERY_READ);
5146 	mlxsw_reg_ratr_a_set(ratr_pl, 0);
5147 	mlxsw_reg_ratr_op_set(ratr_pl_new, MLXSW_REG_RATR_OP_QUERY_READ);
5148 	mlxsw_reg_ratr_a_set(ratr_pl_new, 0);
5149 
5150 	/* If the contents of the adjacency entry are consistent with the
5151 	 * replacement request, then replacement was successful.
5152 	 */
5153 	if (!memcmp(ratr_pl, ratr_pl_new, MLXSW_REG_RATR_LEN))
5154 		return 0;
5155 
5156 	return -EINVAL;
5157 }
5158 
5159 static int
5160 mlxsw_sp_nexthop_obj_bucket_adj_update(struct mlxsw_sp *mlxsw_sp,
5161 				       struct mlxsw_sp_nexthop *nh,
5162 				       struct nh_notifier_info *info)
5163 {
5164 	u16 bucket_index = info->nh_res_bucket->bucket_index;
5165 	struct netlink_ext_ack *extack = info->extack;
5166 	bool force = info->nh_res_bucket->force;
5167 	char ratr_pl_new[MLXSW_REG_RATR_LEN];
5168 	char ratr_pl[MLXSW_REG_RATR_LEN];
5169 	u32 adj_index;
5170 	int err;
5171 
5172 	/* No point in trying an atomic replacement if the idle timer interval
5173 	 * is smaller than the interval in which we query and clear activity.
5174 	 */
5175 	if (!force && info->nh_res_bucket->idle_timer_ms <
5176 	    MLXSW_SP_NH_GRP_ACTIVITY_UPDATE_INTERVAL)
5177 		force = true;
5178 
5179 	adj_index = nh->nhgi->adj_index + bucket_index;
5180 	err = mlxsw_sp_nexthop_update(mlxsw_sp, adj_index, nh, force, ratr_pl);
5181 	if (err) {
5182 		NL_SET_ERR_MSG_MOD(extack, "Failed to overwrite nexthop bucket");
5183 		return err;
5184 	}
5185 
5186 	if (!force) {
5187 		err = mlxsw_sp_nexthop_obj_bucket_query(mlxsw_sp, adj_index,
5188 							ratr_pl_new);
5189 		if (err) {
5190 			NL_SET_ERR_MSG_MOD(extack, "Failed to query nexthop bucket state after replacement. State might be inconsistent");
5191 			return err;
5192 		}
5193 
5194 		err = mlxsw_sp_nexthop_obj_bucket_compare(ratr_pl, ratr_pl_new);
5195 		if (err) {
5196 			NL_SET_ERR_MSG_MOD(extack, "Nexthop bucket was not replaced because it was active during replacement");
5197 			return err;
5198 		}
5199 	}
5200 
5201 	nh->update = 0;
5202 	nh->offloaded = 1;
5203 	mlxsw_sp_nexthop_bucket_offload_refresh(mlxsw_sp, nh, bucket_index);
5204 
5205 	return 0;
5206 }
5207 
5208 static int mlxsw_sp_nexthop_obj_bucket_replace(struct mlxsw_sp *mlxsw_sp,
5209 					       struct nh_notifier_info *info)
5210 {
5211 	u16 bucket_index = info->nh_res_bucket->bucket_index;
5212 	struct netlink_ext_ack *extack = info->extack;
5213 	struct mlxsw_sp_nexthop_group_info *nhgi;
5214 	struct nh_notifier_single_info *nh_obj;
5215 	struct mlxsw_sp_nexthop_group *nh_grp;
5216 	struct mlxsw_sp_nexthop *nh;
5217 	int err;
5218 
5219 	nh_grp = mlxsw_sp_nexthop_obj_group_lookup(mlxsw_sp, info->id);
5220 	if (!nh_grp) {
5221 		NL_SET_ERR_MSG_MOD(extack, "Nexthop group was not found");
5222 		return -EINVAL;
5223 	}
5224 
5225 	nhgi = nh_grp->nhgi;
5226 
5227 	if (bucket_index >= nhgi->count) {
5228 		NL_SET_ERR_MSG_MOD(extack, "Nexthop bucket index out of range");
5229 		return -EINVAL;
5230 	}
5231 
5232 	nh = &nhgi->nexthops[bucket_index];
5233 	mlxsw_sp_nexthop_obj_fini(mlxsw_sp, nh);
5234 
5235 	nh_obj = &info->nh_res_bucket->new_nh;
5236 	err = mlxsw_sp_nexthop_obj_init(mlxsw_sp, nh_grp, nh, nh_obj, 1);
5237 	if (err) {
5238 		NL_SET_ERR_MSG_MOD(extack, "Failed to initialize nexthop object for nexthop bucket replacement");
5239 		goto err_nexthop_obj_init;
5240 	}
5241 
5242 	err = mlxsw_sp_nexthop_obj_bucket_adj_update(mlxsw_sp, nh, info);
5243 	if (err)
5244 		goto err_nexthop_obj_bucket_adj_update;
5245 
5246 	return 0;
5247 
5248 err_nexthop_obj_bucket_adj_update:
5249 	mlxsw_sp_nexthop_obj_fini(mlxsw_sp, nh);
5250 err_nexthop_obj_init:
5251 	nh_obj = &info->nh_res_bucket->old_nh;
5252 	mlxsw_sp_nexthop_obj_init(mlxsw_sp, nh_grp, nh, nh_obj, 1);
5253 	/* The old adjacency entry was not overwritten */
5254 	nh->update = 0;
5255 	nh->offloaded = 1;
5256 	return err;
5257 }
5258 
5259 static int mlxsw_sp_nexthop_obj_event(struct notifier_block *nb,
5260 				      unsigned long event, void *ptr)
5261 {
5262 	struct nh_notifier_info *info = ptr;
5263 	struct mlxsw_sp_router *router;
5264 	int err = 0;
5265 
5266 	router = container_of(nb, struct mlxsw_sp_router, nexthop_nb);
5267 	err = mlxsw_sp_nexthop_obj_validate(router->mlxsw_sp, event, info);
5268 	if (err)
5269 		goto out;
5270 
5271 	mutex_lock(&router->lock);
5272 
5273 	switch (event) {
5274 	case NEXTHOP_EVENT_REPLACE:
5275 		err = mlxsw_sp_nexthop_obj_new(router->mlxsw_sp, info);
5276 		break;
5277 	case NEXTHOP_EVENT_DEL:
5278 		mlxsw_sp_nexthop_obj_del(router->mlxsw_sp, info);
5279 		break;
5280 	case NEXTHOP_EVENT_BUCKET_REPLACE:
5281 		err = mlxsw_sp_nexthop_obj_bucket_replace(router->mlxsw_sp,
5282 							  info);
5283 		break;
5284 	default:
5285 		break;
5286 	}
5287 
5288 	mutex_unlock(&router->lock);
5289 
5290 out:
5291 	return notifier_from_errno(err);
5292 }
5293 
5294 static bool mlxsw_sp_fi_is_gateway(const struct mlxsw_sp *mlxsw_sp,
5295 				   struct fib_info *fi)
5296 {
5297 	const struct fib_nh *nh = fib_info_nh(fi, 0);
5298 
5299 	return nh->fib_nh_gw_family ||
5300 	       mlxsw_sp_nexthop4_ipip_type(mlxsw_sp, nh, NULL);
5301 }
5302 
5303 static int
5304 mlxsw_sp_nexthop4_group_info_init(struct mlxsw_sp *mlxsw_sp,
5305 				  struct mlxsw_sp_nexthop_group *nh_grp)
5306 {
5307 	unsigned int nhs = fib_info_num_path(nh_grp->ipv4.fi);
5308 	struct mlxsw_sp_nexthop_group_info *nhgi;
5309 	struct mlxsw_sp_nexthop *nh;
5310 	int err, i;
5311 
5312 	nhgi = kzalloc(struct_size(nhgi, nexthops, nhs), GFP_KERNEL);
5313 	if (!nhgi)
5314 		return -ENOMEM;
5315 	nh_grp->nhgi = nhgi;
5316 	nhgi->nh_grp = nh_grp;
5317 	nhgi->gateway = mlxsw_sp_fi_is_gateway(mlxsw_sp, nh_grp->ipv4.fi);
5318 	nhgi->count = nhs;
5319 	for (i = 0; i < nhgi->count; i++) {
5320 		struct fib_nh *fib_nh;
5321 
5322 		nh = &nhgi->nexthops[i];
5323 		fib_nh = fib_info_nh(nh_grp->ipv4.fi, i);
5324 		err = mlxsw_sp_nexthop4_init(mlxsw_sp, nh_grp, nh, fib_nh);
5325 		if (err)
5326 			goto err_nexthop4_init;
5327 	}
5328 	err = mlxsw_sp_nexthop_group_inc(mlxsw_sp);
5329 	if (err)
5330 		goto err_group_inc;
5331 	err = mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh_grp);
5332 	if (err)
5333 		goto err_group_refresh;
5334 
5335 	return 0;
5336 
5337 err_group_refresh:
5338 	mlxsw_sp_nexthop_group_dec(mlxsw_sp);
5339 err_group_inc:
5340 	i = nhgi->count;
5341 err_nexthop4_init:
5342 	for (i--; i >= 0; i--) {
5343 		nh = &nhgi->nexthops[i];
5344 		mlxsw_sp_nexthop4_fini(mlxsw_sp, nh);
5345 	}
5346 	kfree(nhgi);
5347 	return err;
5348 }
5349 
5350 static void
5351 mlxsw_sp_nexthop4_group_info_fini(struct mlxsw_sp *mlxsw_sp,
5352 				  struct mlxsw_sp_nexthop_group *nh_grp)
5353 {
5354 	struct mlxsw_sp_nexthop_group_info *nhgi = nh_grp->nhgi;
5355 	int i;
5356 
5357 	mlxsw_sp_nexthop_group_dec(mlxsw_sp);
5358 	for (i = nhgi->count - 1; i >= 0; i--) {
5359 		struct mlxsw_sp_nexthop *nh = &nhgi->nexthops[i];
5360 
5361 		mlxsw_sp_nexthop4_fini(mlxsw_sp, nh);
5362 	}
5363 	mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh_grp);
5364 	WARN_ON_ONCE(nhgi->adj_index_valid);
5365 	kfree(nhgi);
5366 }
5367 
5368 static struct mlxsw_sp_nexthop_group *
5369 mlxsw_sp_nexthop4_group_create(struct mlxsw_sp *mlxsw_sp, struct fib_info *fi)
5370 {
5371 	struct mlxsw_sp_nexthop_group *nh_grp;
5372 	int err;
5373 
5374 	nh_grp = kzalloc(sizeof(*nh_grp), GFP_KERNEL);
5375 	if (!nh_grp)
5376 		return ERR_PTR(-ENOMEM);
5377 	INIT_LIST_HEAD(&nh_grp->vr_list);
5378 	err = rhashtable_init(&nh_grp->vr_ht,
5379 			      &mlxsw_sp_nexthop_group_vr_ht_params);
5380 	if (err)
5381 		goto err_nexthop_group_vr_ht_init;
5382 	INIT_LIST_HEAD(&nh_grp->fib_list);
5383 	nh_grp->type = MLXSW_SP_NEXTHOP_GROUP_TYPE_IPV4;
5384 	nh_grp->ipv4.fi = fi;
5385 	fib_info_hold(fi);
5386 
5387 	err = mlxsw_sp_nexthop4_group_info_init(mlxsw_sp, nh_grp);
5388 	if (err)
5389 		goto err_nexthop_group_info_init;
5390 
5391 	err = mlxsw_sp_nexthop_group_insert(mlxsw_sp, nh_grp);
5392 	if (err)
5393 		goto err_nexthop_group_insert;
5394 
5395 	nh_grp->can_destroy = true;
5396 
5397 	return nh_grp;
5398 
5399 err_nexthop_group_insert:
5400 	mlxsw_sp_nexthop4_group_info_fini(mlxsw_sp, nh_grp);
5401 err_nexthop_group_info_init:
5402 	fib_info_put(fi);
5403 	rhashtable_destroy(&nh_grp->vr_ht);
5404 err_nexthop_group_vr_ht_init:
5405 	kfree(nh_grp);
5406 	return ERR_PTR(err);
5407 }
5408 
5409 static void
5410 mlxsw_sp_nexthop4_group_destroy(struct mlxsw_sp *mlxsw_sp,
5411 				struct mlxsw_sp_nexthop_group *nh_grp)
5412 {
5413 	if (!nh_grp->can_destroy)
5414 		return;
5415 	mlxsw_sp_nexthop_group_remove(mlxsw_sp, nh_grp);
5416 	mlxsw_sp_nexthop4_group_info_fini(mlxsw_sp, nh_grp);
5417 	fib_info_put(nh_grp->ipv4.fi);
5418 	WARN_ON_ONCE(!list_empty(&nh_grp->vr_list));
5419 	rhashtable_destroy(&nh_grp->vr_ht);
5420 	kfree(nh_grp);
5421 }
5422 
5423 static int mlxsw_sp_nexthop4_group_get(struct mlxsw_sp *mlxsw_sp,
5424 				       struct mlxsw_sp_fib_entry *fib_entry,
5425 				       struct fib_info *fi)
5426 {
5427 	struct mlxsw_sp_nexthop_group *nh_grp;
5428 
5429 	if (fi->nh) {
5430 		nh_grp = mlxsw_sp_nexthop_obj_group_lookup(mlxsw_sp,
5431 							   fi->nh->id);
5432 		if (WARN_ON_ONCE(!nh_grp))
5433 			return -EINVAL;
5434 		goto out;
5435 	}
5436 
5437 	nh_grp = mlxsw_sp_nexthop4_group_lookup(mlxsw_sp, fi);
5438 	if (!nh_grp) {
5439 		nh_grp = mlxsw_sp_nexthop4_group_create(mlxsw_sp, fi);
5440 		if (IS_ERR(nh_grp))
5441 			return PTR_ERR(nh_grp);
5442 	}
5443 out:
5444 	list_add_tail(&fib_entry->nexthop_group_node, &nh_grp->fib_list);
5445 	fib_entry->nh_group = nh_grp;
5446 	return 0;
5447 }
5448 
5449 static void mlxsw_sp_nexthop4_group_put(struct mlxsw_sp *mlxsw_sp,
5450 					struct mlxsw_sp_fib_entry *fib_entry)
5451 {
5452 	struct mlxsw_sp_nexthop_group *nh_grp = fib_entry->nh_group;
5453 
5454 	list_del(&fib_entry->nexthop_group_node);
5455 	if (!list_empty(&nh_grp->fib_list))
5456 		return;
5457 
5458 	if (nh_grp->type == MLXSW_SP_NEXTHOP_GROUP_TYPE_OBJ) {
5459 		mlxsw_sp_nexthop_obj_group_destroy(mlxsw_sp, nh_grp);
5460 		return;
5461 	}
5462 
5463 	mlxsw_sp_nexthop4_group_destroy(mlxsw_sp, nh_grp);
5464 }
5465 
5466 static bool
5467 mlxsw_sp_fib4_entry_should_offload(const struct mlxsw_sp_fib_entry *fib_entry)
5468 {
5469 	struct mlxsw_sp_fib4_entry *fib4_entry;
5470 
5471 	fib4_entry = container_of(fib_entry, struct mlxsw_sp_fib4_entry,
5472 				  common);
5473 	return !fib4_entry->dscp;
5474 }
5475 
5476 static bool
5477 mlxsw_sp_fib_entry_should_offload(const struct mlxsw_sp_fib_entry *fib_entry)
5478 {
5479 	struct mlxsw_sp_nexthop_group *nh_group = fib_entry->nh_group;
5480 
5481 	switch (fib_entry->fib_node->fib->proto) {
5482 	case MLXSW_SP_L3_PROTO_IPV4:
5483 		if (!mlxsw_sp_fib4_entry_should_offload(fib_entry))
5484 			return false;
5485 		break;
5486 	case MLXSW_SP_L3_PROTO_IPV6:
5487 		break;
5488 	}
5489 
5490 	switch (fib_entry->type) {
5491 	case MLXSW_SP_FIB_ENTRY_TYPE_REMOTE:
5492 		return !!nh_group->nhgi->adj_index_valid;
5493 	case MLXSW_SP_FIB_ENTRY_TYPE_LOCAL:
5494 		return !!nh_group->nhgi->nh_rif;
5495 	case MLXSW_SP_FIB_ENTRY_TYPE_BLACKHOLE:
5496 	case MLXSW_SP_FIB_ENTRY_TYPE_IPIP_DECAP:
5497 	case MLXSW_SP_FIB_ENTRY_TYPE_NVE_DECAP:
5498 		return true;
5499 	default:
5500 		return false;
5501 	}
5502 }
5503 
5504 static struct mlxsw_sp_nexthop *
5505 mlxsw_sp_rt6_nexthop(struct mlxsw_sp_nexthop_group *nh_grp,
5506 		     const struct mlxsw_sp_rt6 *mlxsw_sp_rt6)
5507 {
5508 	int i;
5509 
5510 	for (i = 0; i < nh_grp->nhgi->count; i++) {
5511 		struct mlxsw_sp_nexthop *nh = &nh_grp->nhgi->nexthops[i];
5512 		struct fib6_info *rt = mlxsw_sp_rt6->rt;
5513 
5514 		if (nh->rif && nh->rif->dev == rt->fib6_nh->fib_nh_dev &&
5515 		    ipv6_addr_equal((const struct in6_addr *) &nh->gw_addr,
5516 				    &rt->fib6_nh->fib_nh_gw6))
5517 			return nh;
5518 	}
5519 
5520 	return NULL;
5521 }
5522 
5523 static void
5524 mlxsw_sp_fib4_offload_failed_flag_set(struct mlxsw_sp *mlxsw_sp,
5525 				      struct fib_entry_notifier_info *fen_info)
5526 {
5527 	u32 *p_dst = (u32 *) &fen_info->dst;
5528 	struct fib_rt_info fri;
5529 
5530 	fri.fi = fen_info->fi;
5531 	fri.tb_id = fen_info->tb_id;
5532 	fri.dst = cpu_to_be32(*p_dst);
5533 	fri.dst_len = fen_info->dst_len;
5534 	fri.dscp = fen_info->dscp;
5535 	fri.type = fen_info->type;
5536 	fri.offload = false;
5537 	fri.trap = false;
5538 	fri.offload_failed = true;
5539 	fib_alias_hw_flags_set(mlxsw_sp_net(mlxsw_sp), &fri);
5540 }
5541 
5542 static void
5543 mlxsw_sp_fib4_entry_hw_flags_set(struct mlxsw_sp *mlxsw_sp,
5544 				 struct mlxsw_sp_fib_entry *fib_entry)
5545 {
5546 	u32 *p_dst = (u32 *) fib_entry->fib_node->key.addr;
5547 	int dst_len = fib_entry->fib_node->key.prefix_len;
5548 	struct mlxsw_sp_fib4_entry *fib4_entry;
5549 	struct fib_rt_info fri;
5550 	bool should_offload;
5551 
5552 	should_offload = mlxsw_sp_fib_entry_should_offload(fib_entry);
5553 	fib4_entry = container_of(fib_entry, struct mlxsw_sp_fib4_entry,
5554 				  common);
5555 	fri.fi = fib4_entry->fi;
5556 	fri.tb_id = fib4_entry->tb_id;
5557 	fri.dst = cpu_to_be32(*p_dst);
5558 	fri.dst_len = dst_len;
5559 	fri.dscp = fib4_entry->dscp;
5560 	fri.type = fib4_entry->type;
5561 	fri.offload = should_offload;
5562 	fri.trap = !should_offload;
5563 	fri.offload_failed = false;
5564 	fib_alias_hw_flags_set(mlxsw_sp_net(mlxsw_sp), &fri);
5565 }
5566 
5567 static void
5568 mlxsw_sp_fib4_entry_hw_flags_clear(struct mlxsw_sp *mlxsw_sp,
5569 				   struct mlxsw_sp_fib_entry *fib_entry)
5570 {
5571 	u32 *p_dst = (u32 *) fib_entry->fib_node->key.addr;
5572 	int dst_len = fib_entry->fib_node->key.prefix_len;
5573 	struct mlxsw_sp_fib4_entry *fib4_entry;
5574 	struct fib_rt_info fri;
5575 
5576 	fib4_entry = container_of(fib_entry, struct mlxsw_sp_fib4_entry,
5577 				  common);
5578 	fri.fi = fib4_entry->fi;
5579 	fri.tb_id = fib4_entry->tb_id;
5580 	fri.dst = cpu_to_be32(*p_dst);
5581 	fri.dst_len = dst_len;
5582 	fri.dscp = fib4_entry->dscp;
5583 	fri.type = fib4_entry->type;
5584 	fri.offload = false;
5585 	fri.trap = false;
5586 	fri.offload_failed = false;
5587 	fib_alias_hw_flags_set(mlxsw_sp_net(mlxsw_sp), &fri);
5588 }
5589 
5590 #if IS_ENABLED(CONFIG_IPV6)
5591 static void
5592 mlxsw_sp_fib6_offload_failed_flag_set(struct mlxsw_sp *mlxsw_sp,
5593 				      struct fib6_info **rt_arr,
5594 				      unsigned int nrt6)
5595 {
5596 	int i;
5597 
5598 	/* In IPv6 a multipath route is represented using multiple routes, so
5599 	 * we need to set the flags on all of them.
5600 	 */
5601 	for (i = 0; i < nrt6; i++)
5602 		fib6_info_hw_flags_set(mlxsw_sp_net(mlxsw_sp), rt_arr[i],
5603 				       false, false, true);
5604 }
5605 #else
5606 static void
5607 mlxsw_sp_fib6_offload_failed_flag_set(struct mlxsw_sp *mlxsw_sp,
5608 				      struct fib6_info **rt_arr,
5609 				      unsigned int nrt6)
5610 {
5611 }
5612 #endif
5613 
5614 #if IS_ENABLED(CONFIG_IPV6)
5615 static void
5616 mlxsw_sp_fib6_entry_hw_flags_set(struct mlxsw_sp *mlxsw_sp,
5617 				 struct mlxsw_sp_fib_entry *fib_entry)
5618 {
5619 	struct mlxsw_sp_fib6_entry *fib6_entry;
5620 	struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
5621 	bool should_offload;
5622 
5623 	should_offload = mlxsw_sp_fib_entry_should_offload(fib_entry);
5624 
5625 	/* In IPv6 a multipath route is represented using multiple routes, so
5626 	 * we need to set the flags on all of them.
5627 	 */
5628 	fib6_entry = container_of(fib_entry, struct mlxsw_sp_fib6_entry,
5629 				  common);
5630 	list_for_each_entry(mlxsw_sp_rt6, &fib6_entry->rt6_list, list)
5631 		fib6_info_hw_flags_set(mlxsw_sp_net(mlxsw_sp), mlxsw_sp_rt6->rt,
5632 				       should_offload, !should_offload, false);
5633 }
5634 #else
5635 static void
5636 mlxsw_sp_fib6_entry_hw_flags_set(struct mlxsw_sp *mlxsw_sp,
5637 				 struct mlxsw_sp_fib_entry *fib_entry)
5638 {
5639 }
5640 #endif
5641 
5642 #if IS_ENABLED(CONFIG_IPV6)
5643 static void
5644 mlxsw_sp_fib6_entry_hw_flags_clear(struct mlxsw_sp *mlxsw_sp,
5645 				   struct mlxsw_sp_fib_entry *fib_entry)
5646 {
5647 	struct mlxsw_sp_fib6_entry *fib6_entry;
5648 	struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
5649 
5650 	fib6_entry = container_of(fib_entry, struct mlxsw_sp_fib6_entry,
5651 				  common);
5652 	list_for_each_entry(mlxsw_sp_rt6, &fib6_entry->rt6_list, list)
5653 		fib6_info_hw_flags_set(mlxsw_sp_net(mlxsw_sp), mlxsw_sp_rt6->rt,
5654 				       false, false, false);
5655 }
5656 #else
5657 static void
5658 mlxsw_sp_fib6_entry_hw_flags_clear(struct mlxsw_sp *mlxsw_sp,
5659 				   struct mlxsw_sp_fib_entry *fib_entry)
5660 {
5661 }
5662 #endif
5663 
5664 static void
5665 mlxsw_sp_fib_entry_hw_flags_set(struct mlxsw_sp *mlxsw_sp,
5666 				struct mlxsw_sp_fib_entry *fib_entry)
5667 {
5668 	switch (fib_entry->fib_node->fib->proto) {
5669 	case MLXSW_SP_L3_PROTO_IPV4:
5670 		mlxsw_sp_fib4_entry_hw_flags_set(mlxsw_sp, fib_entry);
5671 		break;
5672 	case MLXSW_SP_L3_PROTO_IPV6:
5673 		mlxsw_sp_fib6_entry_hw_flags_set(mlxsw_sp, fib_entry);
5674 		break;
5675 	}
5676 }
5677 
5678 static void
5679 mlxsw_sp_fib_entry_hw_flags_clear(struct mlxsw_sp *mlxsw_sp,
5680 				  struct mlxsw_sp_fib_entry *fib_entry)
5681 {
5682 	switch (fib_entry->fib_node->fib->proto) {
5683 	case MLXSW_SP_L3_PROTO_IPV4:
5684 		mlxsw_sp_fib4_entry_hw_flags_clear(mlxsw_sp, fib_entry);
5685 		break;
5686 	case MLXSW_SP_L3_PROTO_IPV6:
5687 		mlxsw_sp_fib6_entry_hw_flags_clear(mlxsw_sp, fib_entry);
5688 		break;
5689 	}
5690 }
5691 
5692 static void
5693 mlxsw_sp_fib_entry_hw_flags_refresh(struct mlxsw_sp *mlxsw_sp,
5694 				    struct mlxsw_sp_fib_entry *fib_entry,
5695 				    enum mlxsw_reg_ralue_op op)
5696 {
5697 	switch (op) {
5698 	case MLXSW_REG_RALUE_OP_WRITE_WRITE:
5699 		mlxsw_sp_fib_entry_hw_flags_set(mlxsw_sp, fib_entry);
5700 		break;
5701 	case MLXSW_REG_RALUE_OP_WRITE_DELETE:
5702 		mlxsw_sp_fib_entry_hw_flags_clear(mlxsw_sp, fib_entry);
5703 		break;
5704 	default:
5705 		break;
5706 	}
5707 }
5708 
5709 static void
5710 mlxsw_sp_fib_entry_ralue_pack(char *ralue_pl,
5711 			      const struct mlxsw_sp_fib_entry *fib_entry,
5712 			      enum mlxsw_reg_ralue_op op)
5713 {
5714 	struct mlxsw_sp_fib *fib = fib_entry->fib_node->fib;
5715 	enum mlxsw_reg_ralxx_protocol proto;
5716 	u32 *p_dip;
5717 
5718 	proto = (enum mlxsw_reg_ralxx_protocol) fib->proto;
5719 
5720 	switch (fib->proto) {
5721 	case MLXSW_SP_L3_PROTO_IPV4:
5722 		p_dip = (u32 *) fib_entry->fib_node->key.addr;
5723 		mlxsw_reg_ralue_pack4(ralue_pl, proto, op, fib->vr->id,
5724 				      fib_entry->fib_node->key.prefix_len,
5725 				      *p_dip);
5726 		break;
5727 	case MLXSW_SP_L3_PROTO_IPV6:
5728 		mlxsw_reg_ralue_pack6(ralue_pl, proto, op, fib->vr->id,
5729 				      fib_entry->fib_node->key.prefix_len,
5730 				      fib_entry->fib_node->key.addr);
5731 		break;
5732 	}
5733 }
5734 
5735 static int mlxsw_sp_fib_entry_op_remote(struct mlxsw_sp *mlxsw_sp,
5736 					struct mlxsw_sp_fib_entry *fib_entry,
5737 					enum mlxsw_reg_ralue_op op)
5738 {
5739 	struct mlxsw_sp_nexthop_group *nh_group = fib_entry->nh_group;
5740 	struct mlxsw_sp_nexthop_group_info *nhgi = nh_group->nhgi;
5741 	char ralue_pl[MLXSW_REG_RALUE_LEN];
5742 	enum mlxsw_reg_ralue_trap_action trap_action;
5743 	u16 trap_id = 0;
5744 	u32 adjacency_index = 0;
5745 	u16 ecmp_size = 0;
5746 
5747 	/* In case the nexthop group adjacency index is valid, use it
5748 	 * with provided ECMP size. Otherwise, setup trap and pass
5749 	 * traffic to kernel.
5750 	 */
5751 	if (mlxsw_sp_fib_entry_should_offload(fib_entry)) {
5752 		trap_action = MLXSW_REG_RALUE_TRAP_ACTION_NOP;
5753 		adjacency_index = nhgi->adj_index;
5754 		ecmp_size = nhgi->ecmp_size;
5755 	} else if (!nhgi->adj_index_valid && nhgi->count && nhgi->nh_rif) {
5756 		trap_action = MLXSW_REG_RALUE_TRAP_ACTION_NOP;
5757 		adjacency_index = mlxsw_sp->router->adj_trap_index;
5758 		ecmp_size = 1;
5759 	} else {
5760 		trap_action = MLXSW_REG_RALUE_TRAP_ACTION_TRAP;
5761 		trap_id = MLXSW_TRAP_ID_RTR_INGRESS0;
5762 	}
5763 
5764 	mlxsw_sp_fib_entry_ralue_pack(ralue_pl, fib_entry, op);
5765 	mlxsw_reg_ralue_act_remote_pack(ralue_pl, trap_action, trap_id,
5766 					adjacency_index, ecmp_size);
5767 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralue), ralue_pl);
5768 }
5769 
5770 static int mlxsw_sp_fib_entry_op_local(struct mlxsw_sp *mlxsw_sp,
5771 				       struct mlxsw_sp_fib_entry *fib_entry,
5772 				       enum mlxsw_reg_ralue_op op)
5773 {
5774 	struct mlxsw_sp_rif *rif = fib_entry->nh_group->nhgi->nh_rif;
5775 	enum mlxsw_reg_ralue_trap_action trap_action;
5776 	char ralue_pl[MLXSW_REG_RALUE_LEN];
5777 	u16 trap_id = 0;
5778 	u16 rif_index = 0;
5779 
5780 	if (mlxsw_sp_fib_entry_should_offload(fib_entry)) {
5781 		trap_action = MLXSW_REG_RALUE_TRAP_ACTION_NOP;
5782 		rif_index = rif->rif_index;
5783 	} else {
5784 		trap_action = MLXSW_REG_RALUE_TRAP_ACTION_TRAP;
5785 		trap_id = MLXSW_TRAP_ID_RTR_INGRESS0;
5786 	}
5787 
5788 	mlxsw_sp_fib_entry_ralue_pack(ralue_pl, fib_entry, op);
5789 	mlxsw_reg_ralue_act_local_pack(ralue_pl, trap_action, trap_id,
5790 				       rif_index);
5791 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralue), ralue_pl);
5792 }
5793 
5794 static int mlxsw_sp_fib_entry_op_trap(struct mlxsw_sp *mlxsw_sp,
5795 				      struct mlxsw_sp_fib_entry *fib_entry,
5796 				      enum mlxsw_reg_ralue_op op)
5797 {
5798 	char ralue_pl[MLXSW_REG_RALUE_LEN];
5799 
5800 	mlxsw_sp_fib_entry_ralue_pack(ralue_pl, fib_entry, op);
5801 	mlxsw_reg_ralue_act_ip2me_pack(ralue_pl);
5802 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralue), ralue_pl);
5803 }
5804 
5805 static int mlxsw_sp_fib_entry_op_blackhole(struct mlxsw_sp *mlxsw_sp,
5806 					   struct mlxsw_sp_fib_entry *fib_entry,
5807 					   enum mlxsw_reg_ralue_op op)
5808 {
5809 	enum mlxsw_reg_ralue_trap_action trap_action;
5810 	char ralue_pl[MLXSW_REG_RALUE_LEN];
5811 
5812 	trap_action = MLXSW_REG_RALUE_TRAP_ACTION_DISCARD_ERROR;
5813 	mlxsw_sp_fib_entry_ralue_pack(ralue_pl, fib_entry, op);
5814 	mlxsw_reg_ralue_act_local_pack(ralue_pl, trap_action, 0, 0);
5815 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralue), ralue_pl);
5816 }
5817 
5818 static int
5819 mlxsw_sp_fib_entry_op_unreachable(struct mlxsw_sp *mlxsw_sp,
5820 				  struct mlxsw_sp_fib_entry *fib_entry,
5821 				  enum mlxsw_reg_ralue_op op)
5822 {
5823 	enum mlxsw_reg_ralue_trap_action trap_action;
5824 	char ralue_pl[MLXSW_REG_RALUE_LEN];
5825 	u16 trap_id;
5826 
5827 	trap_action = MLXSW_REG_RALUE_TRAP_ACTION_TRAP;
5828 	trap_id = MLXSW_TRAP_ID_RTR_INGRESS1;
5829 
5830 	mlxsw_sp_fib_entry_ralue_pack(ralue_pl, fib_entry, op);
5831 	mlxsw_reg_ralue_act_local_pack(ralue_pl, trap_action, trap_id, 0);
5832 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralue), ralue_pl);
5833 }
5834 
5835 static int
5836 mlxsw_sp_fib_entry_op_ipip_decap(struct mlxsw_sp *mlxsw_sp,
5837 				 struct mlxsw_sp_fib_entry *fib_entry,
5838 				 enum mlxsw_reg_ralue_op op)
5839 {
5840 	struct mlxsw_sp_ipip_entry *ipip_entry = fib_entry->decap.ipip_entry;
5841 	const struct mlxsw_sp_ipip_ops *ipip_ops;
5842 	char ralue_pl[MLXSW_REG_RALUE_LEN];
5843 	int err;
5844 
5845 	if (WARN_ON(!ipip_entry))
5846 		return -EINVAL;
5847 
5848 	ipip_ops = mlxsw_sp->router->ipip_ops_arr[ipip_entry->ipipt];
5849 	err = ipip_ops->decap_config(mlxsw_sp, ipip_entry,
5850 				     fib_entry->decap.tunnel_index);
5851 	if (err)
5852 		return err;
5853 
5854 	mlxsw_sp_fib_entry_ralue_pack(ralue_pl, fib_entry, op);
5855 	mlxsw_reg_ralue_act_ip2me_tun_pack(ralue_pl,
5856 					   fib_entry->decap.tunnel_index);
5857 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralue), ralue_pl);
5858 }
5859 
5860 static int mlxsw_sp_fib_entry_op_nve_decap(struct mlxsw_sp *mlxsw_sp,
5861 					   struct mlxsw_sp_fib_entry *fib_entry,
5862 					   enum mlxsw_reg_ralue_op op)
5863 {
5864 	char ralue_pl[MLXSW_REG_RALUE_LEN];
5865 
5866 	mlxsw_sp_fib_entry_ralue_pack(ralue_pl, fib_entry, op);
5867 	mlxsw_reg_ralue_act_ip2me_tun_pack(ralue_pl,
5868 					   fib_entry->decap.tunnel_index);
5869 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralue), ralue_pl);
5870 }
5871 
5872 static int __mlxsw_sp_fib_entry_op(struct mlxsw_sp *mlxsw_sp,
5873 				   struct mlxsw_sp_fib_entry *fib_entry,
5874 				   enum mlxsw_reg_ralue_op op)
5875 {
5876 	switch (fib_entry->type) {
5877 	case MLXSW_SP_FIB_ENTRY_TYPE_REMOTE:
5878 		return mlxsw_sp_fib_entry_op_remote(mlxsw_sp, fib_entry, op);
5879 	case MLXSW_SP_FIB_ENTRY_TYPE_LOCAL:
5880 		return mlxsw_sp_fib_entry_op_local(mlxsw_sp, fib_entry, op);
5881 	case MLXSW_SP_FIB_ENTRY_TYPE_TRAP:
5882 		return mlxsw_sp_fib_entry_op_trap(mlxsw_sp, fib_entry, op);
5883 	case MLXSW_SP_FIB_ENTRY_TYPE_BLACKHOLE:
5884 		return mlxsw_sp_fib_entry_op_blackhole(mlxsw_sp, fib_entry, op);
5885 	case MLXSW_SP_FIB_ENTRY_TYPE_UNREACHABLE:
5886 		return mlxsw_sp_fib_entry_op_unreachable(mlxsw_sp, fib_entry,
5887 							 op);
5888 	case MLXSW_SP_FIB_ENTRY_TYPE_IPIP_DECAP:
5889 		return mlxsw_sp_fib_entry_op_ipip_decap(mlxsw_sp,
5890 							fib_entry, op);
5891 	case MLXSW_SP_FIB_ENTRY_TYPE_NVE_DECAP:
5892 		return mlxsw_sp_fib_entry_op_nve_decap(mlxsw_sp, fib_entry, op);
5893 	}
5894 	return -EINVAL;
5895 }
5896 
5897 static int mlxsw_sp_fib_entry_op(struct mlxsw_sp *mlxsw_sp,
5898 				 struct mlxsw_sp_fib_entry *fib_entry,
5899 				 enum mlxsw_reg_ralue_op op)
5900 {
5901 	int err = __mlxsw_sp_fib_entry_op(mlxsw_sp, fib_entry, op);
5902 
5903 	if (err)
5904 		return err;
5905 
5906 	mlxsw_sp_fib_entry_hw_flags_refresh(mlxsw_sp, fib_entry, op);
5907 
5908 	return err;
5909 }
5910 
5911 static int mlxsw_sp_fib_entry_update(struct mlxsw_sp *mlxsw_sp,
5912 				     struct mlxsw_sp_fib_entry *fib_entry)
5913 {
5914 	return mlxsw_sp_fib_entry_op(mlxsw_sp, fib_entry,
5915 				     MLXSW_REG_RALUE_OP_WRITE_WRITE);
5916 }
5917 
5918 static int mlxsw_sp_fib_entry_del(struct mlxsw_sp *mlxsw_sp,
5919 				  struct mlxsw_sp_fib_entry *fib_entry)
5920 {
5921 	return mlxsw_sp_fib_entry_op(mlxsw_sp, fib_entry,
5922 				     MLXSW_REG_RALUE_OP_WRITE_DELETE);
5923 }
5924 
5925 static int
5926 mlxsw_sp_fib4_entry_type_set(struct mlxsw_sp *mlxsw_sp,
5927 			     const struct fib_entry_notifier_info *fen_info,
5928 			     struct mlxsw_sp_fib_entry *fib_entry)
5929 {
5930 	struct mlxsw_sp_nexthop_group_info *nhgi = fib_entry->nh_group->nhgi;
5931 	union mlxsw_sp_l3addr dip = { .addr4 = htonl(fen_info->dst) };
5932 	struct mlxsw_sp_router *router = mlxsw_sp->router;
5933 	u32 tb_id = mlxsw_sp_fix_tb_id(fen_info->tb_id);
5934 	int ifindex = nhgi->nexthops[0].ifindex;
5935 	struct mlxsw_sp_ipip_entry *ipip_entry;
5936 
5937 	switch (fen_info->type) {
5938 	case RTN_LOCAL:
5939 		ipip_entry = mlxsw_sp_ipip_entry_find_by_decap(mlxsw_sp, ifindex,
5940 							       MLXSW_SP_L3_PROTO_IPV4, dip);
5941 		if (ipip_entry && ipip_entry->ol_dev->flags & IFF_UP) {
5942 			fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_IPIP_DECAP;
5943 			return mlxsw_sp_fib_entry_decap_init(mlxsw_sp,
5944 							     fib_entry,
5945 							     ipip_entry);
5946 		}
5947 		if (mlxsw_sp_router_nve_is_decap(mlxsw_sp, tb_id,
5948 						 MLXSW_SP_L3_PROTO_IPV4,
5949 						 &dip)) {
5950 			u32 tunnel_index;
5951 
5952 			tunnel_index = router->nve_decap_config.tunnel_index;
5953 			fib_entry->decap.tunnel_index = tunnel_index;
5954 			fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_NVE_DECAP;
5955 			return 0;
5956 		}
5957 		fallthrough;
5958 	case RTN_BROADCAST:
5959 		fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_TRAP;
5960 		return 0;
5961 	case RTN_BLACKHOLE:
5962 		fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_BLACKHOLE;
5963 		return 0;
5964 	case RTN_UNREACHABLE:
5965 	case RTN_PROHIBIT:
5966 		/* Packets hitting these routes need to be trapped, but
5967 		 * can do so with a lower priority than packets directed
5968 		 * at the host, so use action type local instead of trap.
5969 		 */
5970 		fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_UNREACHABLE;
5971 		return 0;
5972 	case RTN_UNICAST:
5973 		if (nhgi->gateway)
5974 			fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_REMOTE;
5975 		else
5976 			fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_LOCAL;
5977 		return 0;
5978 	default:
5979 		return -EINVAL;
5980 	}
5981 }
5982 
5983 static void
5984 mlxsw_sp_fib_entry_type_unset(struct mlxsw_sp *mlxsw_sp,
5985 			      struct mlxsw_sp_fib_entry *fib_entry)
5986 {
5987 	switch (fib_entry->type) {
5988 	case MLXSW_SP_FIB_ENTRY_TYPE_IPIP_DECAP:
5989 		mlxsw_sp_fib_entry_decap_fini(mlxsw_sp, fib_entry);
5990 		break;
5991 	default:
5992 		break;
5993 	}
5994 }
5995 
5996 static void
5997 mlxsw_sp_fib4_entry_type_unset(struct mlxsw_sp *mlxsw_sp,
5998 			       struct mlxsw_sp_fib4_entry *fib4_entry)
5999 {
6000 	mlxsw_sp_fib_entry_type_unset(mlxsw_sp, &fib4_entry->common);
6001 }
6002 
6003 static struct mlxsw_sp_fib4_entry *
6004 mlxsw_sp_fib4_entry_create(struct mlxsw_sp *mlxsw_sp,
6005 			   struct mlxsw_sp_fib_node *fib_node,
6006 			   const struct fib_entry_notifier_info *fen_info)
6007 {
6008 	struct mlxsw_sp_fib4_entry *fib4_entry;
6009 	struct mlxsw_sp_fib_entry *fib_entry;
6010 	int err;
6011 
6012 	fib4_entry = kzalloc(sizeof(*fib4_entry), GFP_KERNEL);
6013 	if (!fib4_entry)
6014 		return ERR_PTR(-ENOMEM);
6015 	fib_entry = &fib4_entry->common;
6016 
6017 	err = mlxsw_sp_nexthop4_group_get(mlxsw_sp, fib_entry, fen_info->fi);
6018 	if (err)
6019 		goto err_nexthop4_group_get;
6020 
6021 	err = mlxsw_sp_nexthop_group_vr_link(fib_entry->nh_group,
6022 					     fib_node->fib);
6023 	if (err)
6024 		goto err_nexthop_group_vr_link;
6025 
6026 	err = mlxsw_sp_fib4_entry_type_set(mlxsw_sp, fen_info, fib_entry);
6027 	if (err)
6028 		goto err_fib4_entry_type_set;
6029 
6030 	fib4_entry->fi = fen_info->fi;
6031 	fib_info_hold(fib4_entry->fi);
6032 	fib4_entry->tb_id = fen_info->tb_id;
6033 	fib4_entry->type = fen_info->type;
6034 	fib4_entry->dscp = fen_info->dscp;
6035 
6036 	fib_entry->fib_node = fib_node;
6037 
6038 	return fib4_entry;
6039 
6040 err_fib4_entry_type_set:
6041 	mlxsw_sp_nexthop_group_vr_unlink(fib_entry->nh_group, fib_node->fib);
6042 err_nexthop_group_vr_link:
6043 	mlxsw_sp_nexthop4_group_put(mlxsw_sp, &fib4_entry->common);
6044 err_nexthop4_group_get:
6045 	kfree(fib4_entry);
6046 	return ERR_PTR(err);
6047 }
6048 
6049 static void mlxsw_sp_fib4_entry_destroy(struct mlxsw_sp *mlxsw_sp,
6050 					struct mlxsw_sp_fib4_entry *fib4_entry)
6051 {
6052 	struct mlxsw_sp_fib_node *fib_node = fib4_entry->common.fib_node;
6053 
6054 	fib_info_put(fib4_entry->fi);
6055 	mlxsw_sp_fib4_entry_type_unset(mlxsw_sp, fib4_entry);
6056 	mlxsw_sp_nexthop_group_vr_unlink(fib4_entry->common.nh_group,
6057 					 fib_node->fib);
6058 	mlxsw_sp_nexthop4_group_put(mlxsw_sp, &fib4_entry->common);
6059 	kfree(fib4_entry);
6060 }
6061 
6062 static struct mlxsw_sp_fib4_entry *
6063 mlxsw_sp_fib4_entry_lookup(struct mlxsw_sp *mlxsw_sp,
6064 			   const struct fib_entry_notifier_info *fen_info)
6065 {
6066 	struct mlxsw_sp_fib4_entry *fib4_entry;
6067 	struct mlxsw_sp_fib_node *fib_node;
6068 	struct mlxsw_sp_fib *fib;
6069 	struct mlxsw_sp_vr *vr;
6070 
6071 	vr = mlxsw_sp_vr_find(mlxsw_sp, fen_info->tb_id);
6072 	if (!vr)
6073 		return NULL;
6074 	fib = mlxsw_sp_vr_fib(vr, MLXSW_SP_L3_PROTO_IPV4);
6075 
6076 	fib_node = mlxsw_sp_fib_node_lookup(fib, &fen_info->dst,
6077 					    sizeof(fen_info->dst),
6078 					    fen_info->dst_len);
6079 	if (!fib_node)
6080 		return NULL;
6081 
6082 	fib4_entry = container_of(fib_node->fib_entry,
6083 				  struct mlxsw_sp_fib4_entry, common);
6084 	if (fib4_entry->tb_id == fen_info->tb_id &&
6085 	    fib4_entry->dscp == fen_info->dscp &&
6086 	    fib4_entry->type == fen_info->type &&
6087 	    fib4_entry->fi == fen_info->fi)
6088 		return fib4_entry;
6089 
6090 	return NULL;
6091 }
6092 
6093 static const struct rhashtable_params mlxsw_sp_fib_ht_params = {
6094 	.key_offset = offsetof(struct mlxsw_sp_fib_node, key),
6095 	.head_offset = offsetof(struct mlxsw_sp_fib_node, ht_node),
6096 	.key_len = sizeof(struct mlxsw_sp_fib_key),
6097 	.automatic_shrinking = true,
6098 };
6099 
6100 static int mlxsw_sp_fib_node_insert(struct mlxsw_sp_fib *fib,
6101 				    struct mlxsw_sp_fib_node *fib_node)
6102 {
6103 	return rhashtable_insert_fast(&fib->ht, &fib_node->ht_node,
6104 				      mlxsw_sp_fib_ht_params);
6105 }
6106 
6107 static void mlxsw_sp_fib_node_remove(struct mlxsw_sp_fib *fib,
6108 				     struct mlxsw_sp_fib_node *fib_node)
6109 {
6110 	rhashtable_remove_fast(&fib->ht, &fib_node->ht_node,
6111 			       mlxsw_sp_fib_ht_params);
6112 }
6113 
6114 static struct mlxsw_sp_fib_node *
6115 mlxsw_sp_fib_node_lookup(struct mlxsw_sp_fib *fib, const void *addr,
6116 			 size_t addr_len, unsigned char prefix_len)
6117 {
6118 	struct mlxsw_sp_fib_key key;
6119 
6120 	memset(&key, 0, sizeof(key));
6121 	memcpy(key.addr, addr, addr_len);
6122 	key.prefix_len = prefix_len;
6123 	return rhashtable_lookup_fast(&fib->ht, &key, mlxsw_sp_fib_ht_params);
6124 }
6125 
6126 static struct mlxsw_sp_fib_node *
6127 mlxsw_sp_fib_node_create(struct mlxsw_sp_fib *fib, const void *addr,
6128 			 size_t addr_len, unsigned char prefix_len)
6129 {
6130 	struct mlxsw_sp_fib_node *fib_node;
6131 
6132 	fib_node = kzalloc(sizeof(*fib_node), GFP_KERNEL);
6133 	if (!fib_node)
6134 		return NULL;
6135 
6136 	list_add(&fib_node->list, &fib->node_list);
6137 	memcpy(fib_node->key.addr, addr, addr_len);
6138 	fib_node->key.prefix_len = prefix_len;
6139 
6140 	return fib_node;
6141 }
6142 
6143 static void mlxsw_sp_fib_node_destroy(struct mlxsw_sp_fib_node *fib_node)
6144 {
6145 	list_del(&fib_node->list);
6146 	kfree(fib_node);
6147 }
6148 
6149 static int mlxsw_sp_fib_lpm_tree_link(struct mlxsw_sp *mlxsw_sp,
6150 				      struct mlxsw_sp_fib_node *fib_node)
6151 {
6152 	struct mlxsw_sp_prefix_usage req_prefix_usage;
6153 	struct mlxsw_sp_fib *fib = fib_node->fib;
6154 	struct mlxsw_sp_lpm_tree *lpm_tree;
6155 	int err;
6156 
6157 	lpm_tree = mlxsw_sp->router->lpm.proto_trees[fib->proto];
6158 	if (lpm_tree->prefix_ref_count[fib_node->key.prefix_len] != 0)
6159 		goto out;
6160 
6161 	mlxsw_sp_prefix_usage_cpy(&req_prefix_usage, &lpm_tree->prefix_usage);
6162 	mlxsw_sp_prefix_usage_set(&req_prefix_usage, fib_node->key.prefix_len);
6163 	lpm_tree = mlxsw_sp_lpm_tree_get(mlxsw_sp, &req_prefix_usage,
6164 					 fib->proto);
6165 	if (IS_ERR(lpm_tree))
6166 		return PTR_ERR(lpm_tree);
6167 
6168 	err = mlxsw_sp_vrs_lpm_tree_replace(mlxsw_sp, fib, lpm_tree);
6169 	if (err)
6170 		goto err_lpm_tree_replace;
6171 
6172 out:
6173 	lpm_tree->prefix_ref_count[fib_node->key.prefix_len]++;
6174 	return 0;
6175 
6176 err_lpm_tree_replace:
6177 	mlxsw_sp_lpm_tree_put(mlxsw_sp, lpm_tree);
6178 	return err;
6179 }
6180 
6181 static void mlxsw_sp_fib_lpm_tree_unlink(struct mlxsw_sp *mlxsw_sp,
6182 					 struct mlxsw_sp_fib_node *fib_node)
6183 {
6184 	struct mlxsw_sp_lpm_tree *lpm_tree = fib_node->fib->lpm_tree;
6185 	struct mlxsw_sp_prefix_usage req_prefix_usage;
6186 	struct mlxsw_sp_fib *fib = fib_node->fib;
6187 	int err;
6188 
6189 	if (--lpm_tree->prefix_ref_count[fib_node->key.prefix_len] != 0)
6190 		return;
6191 	/* Try to construct a new LPM tree from the current prefix usage
6192 	 * minus the unused one. If we fail, continue using the old one.
6193 	 */
6194 	mlxsw_sp_prefix_usage_cpy(&req_prefix_usage, &lpm_tree->prefix_usage);
6195 	mlxsw_sp_prefix_usage_clear(&req_prefix_usage,
6196 				    fib_node->key.prefix_len);
6197 	lpm_tree = mlxsw_sp_lpm_tree_get(mlxsw_sp, &req_prefix_usage,
6198 					 fib->proto);
6199 	if (IS_ERR(lpm_tree))
6200 		return;
6201 
6202 	err = mlxsw_sp_vrs_lpm_tree_replace(mlxsw_sp, fib, lpm_tree);
6203 	if (err)
6204 		goto err_lpm_tree_replace;
6205 
6206 	return;
6207 
6208 err_lpm_tree_replace:
6209 	mlxsw_sp_lpm_tree_put(mlxsw_sp, lpm_tree);
6210 }
6211 
6212 static int mlxsw_sp_fib_node_init(struct mlxsw_sp *mlxsw_sp,
6213 				  struct mlxsw_sp_fib_node *fib_node,
6214 				  struct mlxsw_sp_fib *fib)
6215 {
6216 	int err;
6217 
6218 	err = mlxsw_sp_fib_node_insert(fib, fib_node);
6219 	if (err)
6220 		return err;
6221 	fib_node->fib = fib;
6222 
6223 	err = mlxsw_sp_fib_lpm_tree_link(mlxsw_sp, fib_node);
6224 	if (err)
6225 		goto err_fib_lpm_tree_link;
6226 
6227 	return 0;
6228 
6229 err_fib_lpm_tree_link:
6230 	fib_node->fib = NULL;
6231 	mlxsw_sp_fib_node_remove(fib, fib_node);
6232 	return err;
6233 }
6234 
6235 static void mlxsw_sp_fib_node_fini(struct mlxsw_sp *mlxsw_sp,
6236 				   struct mlxsw_sp_fib_node *fib_node)
6237 {
6238 	struct mlxsw_sp_fib *fib = fib_node->fib;
6239 
6240 	mlxsw_sp_fib_lpm_tree_unlink(mlxsw_sp, fib_node);
6241 	fib_node->fib = NULL;
6242 	mlxsw_sp_fib_node_remove(fib, fib_node);
6243 }
6244 
6245 static struct mlxsw_sp_fib_node *
6246 mlxsw_sp_fib_node_get(struct mlxsw_sp *mlxsw_sp, u32 tb_id, const void *addr,
6247 		      size_t addr_len, unsigned char prefix_len,
6248 		      enum mlxsw_sp_l3proto proto)
6249 {
6250 	struct mlxsw_sp_fib_node *fib_node;
6251 	struct mlxsw_sp_fib *fib;
6252 	struct mlxsw_sp_vr *vr;
6253 	int err;
6254 
6255 	vr = mlxsw_sp_vr_get(mlxsw_sp, tb_id, NULL);
6256 	if (IS_ERR(vr))
6257 		return ERR_CAST(vr);
6258 	fib = mlxsw_sp_vr_fib(vr, proto);
6259 
6260 	fib_node = mlxsw_sp_fib_node_lookup(fib, addr, addr_len, prefix_len);
6261 	if (fib_node)
6262 		return fib_node;
6263 
6264 	fib_node = mlxsw_sp_fib_node_create(fib, addr, addr_len, prefix_len);
6265 	if (!fib_node) {
6266 		err = -ENOMEM;
6267 		goto err_fib_node_create;
6268 	}
6269 
6270 	err = mlxsw_sp_fib_node_init(mlxsw_sp, fib_node, fib);
6271 	if (err)
6272 		goto err_fib_node_init;
6273 
6274 	return fib_node;
6275 
6276 err_fib_node_init:
6277 	mlxsw_sp_fib_node_destroy(fib_node);
6278 err_fib_node_create:
6279 	mlxsw_sp_vr_put(mlxsw_sp, vr);
6280 	return ERR_PTR(err);
6281 }
6282 
6283 static void mlxsw_sp_fib_node_put(struct mlxsw_sp *mlxsw_sp,
6284 				  struct mlxsw_sp_fib_node *fib_node)
6285 {
6286 	struct mlxsw_sp_vr *vr = fib_node->fib->vr;
6287 
6288 	if (fib_node->fib_entry)
6289 		return;
6290 	mlxsw_sp_fib_node_fini(mlxsw_sp, fib_node);
6291 	mlxsw_sp_fib_node_destroy(fib_node);
6292 	mlxsw_sp_vr_put(mlxsw_sp, vr);
6293 }
6294 
6295 static int mlxsw_sp_fib_node_entry_link(struct mlxsw_sp *mlxsw_sp,
6296 					struct mlxsw_sp_fib_entry *fib_entry)
6297 {
6298 	struct mlxsw_sp_fib_node *fib_node = fib_entry->fib_node;
6299 	int err;
6300 
6301 	fib_node->fib_entry = fib_entry;
6302 
6303 	err = mlxsw_sp_fib_entry_update(mlxsw_sp, fib_entry);
6304 	if (err)
6305 		goto err_fib_entry_update;
6306 
6307 	return 0;
6308 
6309 err_fib_entry_update:
6310 	fib_node->fib_entry = NULL;
6311 	return err;
6312 }
6313 
6314 static void
6315 mlxsw_sp_fib_node_entry_unlink(struct mlxsw_sp *mlxsw_sp,
6316 			       struct mlxsw_sp_fib_entry *fib_entry)
6317 {
6318 	struct mlxsw_sp_fib_node *fib_node = fib_entry->fib_node;
6319 
6320 	mlxsw_sp_fib_entry_del(mlxsw_sp, fib_entry);
6321 	fib_node->fib_entry = NULL;
6322 }
6323 
6324 static bool mlxsw_sp_fib4_allow_replace(struct mlxsw_sp_fib4_entry *fib4_entry)
6325 {
6326 	struct mlxsw_sp_fib_node *fib_node = fib4_entry->common.fib_node;
6327 	struct mlxsw_sp_fib4_entry *fib4_replaced;
6328 
6329 	if (!fib_node->fib_entry)
6330 		return true;
6331 
6332 	fib4_replaced = container_of(fib_node->fib_entry,
6333 				     struct mlxsw_sp_fib4_entry, common);
6334 	if (fib4_entry->tb_id == RT_TABLE_MAIN &&
6335 	    fib4_replaced->tb_id == RT_TABLE_LOCAL)
6336 		return false;
6337 
6338 	return true;
6339 }
6340 
6341 static int
6342 mlxsw_sp_router_fib4_replace(struct mlxsw_sp *mlxsw_sp,
6343 			     const struct fib_entry_notifier_info *fen_info)
6344 {
6345 	struct mlxsw_sp_fib4_entry *fib4_entry, *fib4_replaced;
6346 	struct mlxsw_sp_fib_entry *replaced;
6347 	struct mlxsw_sp_fib_node *fib_node;
6348 	int err;
6349 
6350 	if (fen_info->fi->nh &&
6351 	    !mlxsw_sp_nexthop_obj_group_lookup(mlxsw_sp, fen_info->fi->nh->id))
6352 		return 0;
6353 
6354 	fib_node = mlxsw_sp_fib_node_get(mlxsw_sp, fen_info->tb_id,
6355 					 &fen_info->dst, sizeof(fen_info->dst),
6356 					 fen_info->dst_len,
6357 					 MLXSW_SP_L3_PROTO_IPV4);
6358 	if (IS_ERR(fib_node)) {
6359 		dev_warn(mlxsw_sp->bus_info->dev, "Failed to get FIB node\n");
6360 		return PTR_ERR(fib_node);
6361 	}
6362 
6363 	fib4_entry = mlxsw_sp_fib4_entry_create(mlxsw_sp, fib_node, fen_info);
6364 	if (IS_ERR(fib4_entry)) {
6365 		dev_warn(mlxsw_sp->bus_info->dev, "Failed to create FIB entry\n");
6366 		err = PTR_ERR(fib4_entry);
6367 		goto err_fib4_entry_create;
6368 	}
6369 
6370 	if (!mlxsw_sp_fib4_allow_replace(fib4_entry)) {
6371 		mlxsw_sp_fib4_entry_destroy(mlxsw_sp, fib4_entry);
6372 		mlxsw_sp_fib_node_put(mlxsw_sp, fib_node);
6373 		return 0;
6374 	}
6375 
6376 	replaced = fib_node->fib_entry;
6377 	err = mlxsw_sp_fib_node_entry_link(mlxsw_sp, &fib4_entry->common);
6378 	if (err) {
6379 		dev_warn(mlxsw_sp->bus_info->dev, "Failed to link FIB entry to node\n");
6380 		goto err_fib_node_entry_link;
6381 	}
6382 
6383 	/* Nothing to replace */
6384 	if (!replaced)
6385 		return 0;
6386 
6387 	mlxsw_sp_fib_entry_hw_flags_clear(mlxsw_sp, replaced);
6388 	fib4_replaced = container_of(replaced, struct mlxsw_sp_fib4_entry,
6389 				     common);
6390 	mlxsw_sp_fib4_entry_destroy(mlxsw_sp, fib4_replaced);
6391 
6392 	return 0;
6393 
6394 err_fib_node_entry_link:
6395 	fib_node->fib_entry = replaced;
6396 	mlxsw_sp_fib4_entry_destroy(mlxsw_sp, fib4_entry);
6397 err_fib4_entry_create:
6398 	mlxsw_sp_fib_node_put(mlxsw_sp, fib_node);
6399 	return err;
6400 }
6401 
6402 static void mlxsw_sp_router_fib4_del(struct mlxsw_sp *mlxsw_sp,
6403 				     struct fib_entry_notifier_info *fen_info)
6404 {
6405 	struct mlxsw_sp_fib4_entry *fib4_entry;
6406 	struct mlxsw_sp_fib_node *fib_node;
6407 
6408 	fib4_entry = mlxsw_sp_fib4_entry_lookup(mlxsw_sp, fen_info);
6409 	if (!fib4_entry)
6410 		return;
6411 	fib_node = fib4_entry->common.fib_node;
6412 
6413 	mlxsw_sp_fib_node_entry_unlink(mlxsw_sp, &fib4_entry->common);
6414 	mlxsw_sp_fib4_entry_destroy(mlxsw_sp, fib4_entry);
6415 	mlxsw_sp_fib_node_put(mlxsw_sp, fib_node);
6416 }
6417 
6418 static bool mlxsw_sp_fib6_rt_should_ignore(const struct fib6_info *rt)
6419 {
6420 	/* Multicast routes aren't supported, so ignore them. Neighbour
6421 	 * Discovery packets are specifically trapped.
6422 	 */
6423 	if (ipv6_addr_type(&rt->fib6_dst.addr) & IPV6_ADDR_MULTICAST)
6424 		return true;
6425 
6426 	/* Cloned routes are irrelevant in the forwarding path. */
6427 	if (rt->fib6_flags & RTF_CACHE)
6428 		return true;
6429 
6430 	return false;
6431 }
6432 
6433 static struct mlxsw_sp_rt6 *mlxsw_sp_rt6_create(struct fib6_info *rt)
6434 {
6435 	struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
6436 
6437 	mlxsw_sp_rt6 = kzalloc(sizeof(*mlxsw_sp_rt6), GFP_KERNEL);
6438 	if (!mlxsw_sp_rt6)
6439 		return ERR_PTR(-ENOMEM);
6440 
6441 	/* In case of route replace, replaced route is deleted with
6442 	 * no notification. Take reference to prevent accessing freed
6443 	 * memory.
6444 	 */
6445 	mlxsw_sp_rt6->rt = rt;
6446 	fib6_info_hold(rt);
6447 
6448 	return mlxsw_sp_rt6;
6449 }
6450 
6451 #if IS_ENABLED(CONFIG_IPV6)
6452 static void mlxsw_sp_rt6_release(struct fib6_info *rt)
6453 {
6454 	fib6_info_release(rt);
6455 }
6456 #else
6457 static void mlxsw_sp_rt6_release(struct fib6_info *rt)
6458 {
6459 }
6460 #endif
6461 
6462 static void mlxsw_sp_rt6_destroy(struct mlxsw_sp_rt6 *mlxsw_sp_rt6)
6463 {
6464 	struct fib6_nh *fib6_nh = mlxsw_sp_rt6->rt->fib6_nh;
6465 
6466 	if (!mlxsw_sp_rt6->rt->nh)
6467 		fib6_nh->fib_nh_flags &= ~RTNH_F_OFFLOAD;
6468 	mlxsw_sp_rt6_release(mlxsw_sp_rt6->rt);
6469 	kfree(mlxsw_sp_rt6);
6470 }
6471 
6472 static struct fib6_info *
6473 mlxsw_sp_fib6_entry_rt(const struct mlxsw_sp_fib6_entry *fib6_entry)
6474 {
6475 	return list_first_entry(&fib6_entry->rt6_list, struct mlxsw_sp_rt6,
6476 				list)->rt;
6477 }
6478 
6479 static struct mlxsw_sp_rt6 *
6480 mlxsw_sp_fib6_entry_rt_find(const struct mlxsw_sp_fib6_entry *fib6_entry,
6481 			    const struct fib6_info *rt)
6482 {
6483 	struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
6484 
6485 	list_for_each_entry(mlxsw_sp_rt6, &fib6_entry->rt6_list, list) {
6486 		if (mlxsw_sp_rt6->rt == rt)
6487 			return mlxsw_sp_rt6;
6488 	}
6489 
6490 	return NULL;
6491 }
6492 
6493 static bool mlxsw_sp_nexthop6_ipip_type(const struct mlxsw_sp *mlxsw_sp,
6494 					const struct fib6_info *rt,
6495 					enum mlxsw_sp_ipip_type *ret)
6496 {
6497 	return rt->fib6_nh->fib_nh_dev &&
6498 	       mlxsw_sp_netdev_ipip_type(mlxsw_sp, rt->fib6_nh->fib_nh_dev, ret);
6499 }
6500 
6501 static int mlxsw_sp_nexthop6_init(struct mlxsw_sp *mlxsw_sp,
6502 				  struct mlxsw_sp_nexthop_group *nh_grp,
6503 				  struct mlxsw_sp_nexthop *nh,
6504 				  const struct fib6_info *rt)
6505 {
6506 	struct net_device *dev = rt->fib6_nh->fib_nh_dev;
6507 	int err;
6508 
6509 	nh->nhgi = nh_grp->nhgi;
6510 	nh->nh_weight = rt->fib6_nh->fib_nh_weight;
6511 	memcpy(&nh->gw_addr, &rt->fib6_nh->fib_nh_gw6, sizeof(nh->gw_addr));
6512 #if IS_ENABLED(CONFIG_IPV6)
6513 	nh->neigh_tbl = &nd_tbl;
6514 #endif
6515 	mlxsw_sp_nexthop_counter_alloc(mlxsw_sp, nh);
6516 
6517 	list_add_tail(&nh->router_list_node, &mlxsw_sp->router->nexthop_list);
6518 
6519 	if (!dev)
6520 		return 0;
6521 	nh->ifindex = dev->ifindex;
6522 
6523 	err = mlxsw_sp_nexthop_type_init(mlxsw_sp, nh, dev);
6524 	if (err)
6525 		goto err_nexthop_type_init;
6526 
6527 	return 0;
6528 
6529 err_nexthop_type_init:
6530 	list_del(&nh->router_list_node);
6531 	mlxsw_sp_nexthop_counter_free(mlxsw_sp, nh);
6532 	return err;
6533 }
6534 
6535 static void mlxsw_sp_nexthop6_fini(struct mlxsw_sp *mlxsw_sp,
6536 				   struct mlxsw_sp_nexthop *nh)
6537 {
6538 	mlxsw_sp_nexthop_type_fini(mlxsw_sp, nh);
6539 	list_del(&nh->router_list_node);
6540 	mlxsw_sp_nexthop_counter_free(mlxsw_sp, nh);
6541 }
6542 
6543 static bool mlxsw_sp_rt6_is_gateway(const struct mlxsw_sp *mlxsw_sp,
6544 				    const struct fib6_info *rt)
6545 {
6546 	return rt->fib6_nh->fib_nh_gw_family ||
6547 	       mlxsw_sp_nexthop6_ipip_type(mlxsw_sp, rt, NULL);
6548 }
6549 
6550 static int
6551 mlxsw_sp_nexthop6_group_info_init(struct mlxsw_sp *mlxsw_sp,
6552 				  struct mlxsw_sp_nexthop_group *nh_grp,
6553 				  struct mlxsw_sp_fib6_entry *fib6_entry)
6554 {
6555 	struct mlxsw_sp_nexthop_group_info *nhgi;
6556 	struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
6557 	struct mlxsw_sp_nexthop *nh;
6558 	int err, i;
6559 
6560 	nhgi = kzalloc(struct_size(nhgi, nexthops, fib6_entry->nrt6),
6561 		       GFP_KERNEL);
6562 	if (!nhgi)
6563 		return -ENOMEM;
6564 	nh_grp->nhgi = nhgi;
6565 	nhgi->nh_grp = nh_grp;
6566 	mlxsw_sp_rt6 = list_first_entry(&fib6_entry->rt6_list,
6567 					struct mlxsw_sp_rt6, list);
6568 	nhgi->gateway = mlxsw_sp_rt6_is_gateway(mlxsw_sp, mlxsw_sp_rt6->rt);
6569 	nhgi->count = fib6_entry->nrt6;
6570 	for (i = 0; i < nhgi->count; i++) {
6571 		struct fib6_info *rt = mlxsw_sp_rt6->rt;
6572 
6573 		nh = &nhgi->nexthops[i];
6574 		err = mlxsw_sp_nexthop6_init(mlxsw_sp, nh_grp, nh, rt);
6575 		if (err)
6576 			goto err_nexthop6_init;
6577 		mlxsw_sp_rt6 = list_next_entry(mlxsw_sp_rt6, list);
6578 	}
6579 	nh_grp->nhgi = nhgi;
6580 	err = mlxsw_sp_nexthop_group_inc(mlxsw_sp);
6581 	if (err)
6582 		goto err_group_inc;
6583 	err = mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh_grp);
6584 	if (err)
6585 		goto err_group_refresh;
6586 
6587 	return 0;
6588 
6589 err_group_refresh:
6590 	mlxsw_sp_nexthop_group_dec(mlxsw_sp);
6591 err_group_inc:
6592 	i = nhgi->count;
6593 err_nexthop6_init:
6594 	for (i--; i >= 0; i--) {
6595 		nh = &nhgi->nexthops[i];
6596 		mlxsw_sp_nexthop6_fini(mlxsw_sp, nh);
6597 	}
6598 	kfree(nhgi);
6599 	return err;
6600 }
6601 
6602 static void
6603 mlxsw_sp_nexthop6_group_info_fini(struct mlxsw_sp *mlxsw_sp,
6604 				  struct mlxsw_sp_nexthop_group *nh_grp)
6605 {
6606 	struct mlxsw_sp_nexthop_group_info *nhgi = nh_grp->nhgi;
6607 	int i;
6608 
6609 	mlxsw_sp_nexthop_group_dec(mlxsw_sp);
6610 	for (i = nhgi->count - 1; i >= 0; i--) {
6611 		struct mlxsw_sp_nexthop *nh = &nhgi->nexthops[i];
6612 
6613 		mlxsw_sp_nexthop6_fini(mlxsw_sp, nh);
6614 	}
6615 	mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh_grp);
6616 	WARN_ON_ONCE(nhgi->adj_index_valid);
6617 	kfree(nhgi);
6618 }
6619 
6620 static struct mlxsw_sp_nexthop_group *
6621 mlxsw_sp_nexthop6_group_create(struct mlxsw_sp *mlxsw_sp,
6622 			       struct mlxsw_sp_fib6_entry *fib6_entry)
6623 {
6624 	struct mlxsw_sp_nexthop_group *nh_grp;
6625 	int err;
6626 
6627 	nh_grp = kzalloc(sizeof(*nh_grp), GFP_KERNEL);
6628 	if (!nh_grp)
6629 		return ERR_PTR(-ENOMEM);
6630 	INIT_LIST_HEAD(&nh_grp->vr_list);
6631 	err = rhashtable_init(&nh_grp->vr_ht,
6632 			      &mlxsw_sp_nexthop_group_vr_ht_params);
6633 	if (err)
6634 		goto err_nexthop_group_vr_ht_init;
6635 	INIT_LIST_HEAD(&nh_grp->fib_list);
6636 	nh_grp->type = MLXSW_SP_NEXTHOP_GROUP_TYPE_IPV6;
6637 
6638 	err = mlxsw_sp_nexthop6_group_info_init(mlxsw_sp, nh_grp, fib6_entry);
6639 	if (err)
6640 		goto err_nexthop_group_info_init;
6641 
6642 	err = mlxsw_sp_nexthop_group_insert(mlxsw_sp, nh_grp);
6643 	if (err)
6644 		goto err_nexthop_group_insert;
6645 
6646 	nh_grp->can_destroy = true;
6647 
6648 	return nh_grp;
6649 
6650 err_nexthop_group_insert:
6651 	mlxsw_sp_nexthop6_group_info_fini(mlxsw_sp, nh_grp);
6652 err_nexthop_group_info_init:
6653 	rhashtable_destroy(&nh_grp->vr_ht);
6654 err_nexthop_group_vr_ht_init:
6655 	kfree(nh_grp);
6656 	return ERR_PTR(err);
6657 }
6658 
6659 static void
6660 mlxsw_sp_nexthop6_group_destroy(struct mlxsw_sp *mlxsw_sp,
6661 				struct mlxsw_sp_nexthop_group *nh_grp)
6662 {
6663 	if (!nh_grp->can_destroy)
6664 		return;
6665 	mlxsw_sp_nexthop_group_remove(mlxsw_sp, nh_grp);
6666 	mlxsw_sp_nexthop6_group_info_fini(mlxsw_sp, nh_grp);
6667 	WARN_ON_ONCE(!list_empty(&nh_grp->vr_list));
6668 	rhashtable_destroy(&nh_grp->vr_ht);
6669 	kfree(nh_grp);
6670 }
6671 
6672 static int mlxsw_sp_nexthop6_group_get(struct mlxsw_sp *mlxsw_sp,
6673 				       struct mlxsw_sp_fib6_entry *fib6_entry)
6674 {
6675 	struct fib6_info *rt = mlxsw_sp_fib6_entry_rt(fib6_entry);
6676 	struct mlxsw_sp_nexthop_group *nh_grp;
6677 
6678 	if (rt->nh) {
6679 		nh_grp = mlxsw_sp_nexthop_obj_group_lookup(mlxsw_sp,
6680 							   rt->nh->id);
6681 		if (WARN_ON_ONCE(!nh_grp))
6682 			return -EINVAL;
6683 		goto out;
6684 	}
6685 
6686 	nh_grp = mlxsw_sp_nexthop6_group_lookup(mlxsw_sp, fib6_entry);
6687 	if (!nh_grp) {
6688 		nh_grp = mlxsw_sp_nexthop6_group_create(mlxsw_sp, fib6_entry);
6689 		if (IS_ERR(nh_grp))
6690 			return PTR_ERR(nh_grp);
6691 	}
6692 
6693 	/* The route and the nexthop are described by the same struct, so we
6694 	 * need to the update the nexthop offload indication for the new route.
6695 	 */
6696 	__mlxsw_sp_nexthop6_group_offload_refresh(nh_grp, fib6_entry);
6697 
6698 out:
6699 	list_add_tail(&fib6_entry->common.nexthop_group_node,
6700 		      &nh_grp->fib_list);
6701 	fib6_entry->common.nh_group = nh_grp;
6702 
6703 	return 0;
6704 }
6705 
6706 static void mlxsw_sp_nexthop6_group_put(struct mlxsw_sp *mlxsw_sp,
6707 					struct mlxsw_sp_fib_entry *fib_entry)
6708 {
6709 	struct mlxsw_sp_nexthop_group *nh_grp = fib_entry->nh_group;
6710 
6711 	list_del(&fib_entry->nexthop_group_node);
6712 	if (!list_empty(&nh_grp->fib_list))
6713 		return;
6714 
6715 	if (nh_grp->type == MLXSW_SP_NEXTHOP_GROUP_TYPE_OBJ) {
6716 		mlxsw_sp_nexthop_obj_group_destroy(mlxsw_sp, nh_grp);
6717 		return;
6718 	}
6719 
6720 	mlxsw_sp_nexthop6_group_destroy(mlxsw_sp, nh_grp);
6721 }
6722 
6723 static int
6724 mlxsw_sp_nexthop6_group_update(struct mlxsw_sp *mlxsw_sp,
6725 			       struct mlxsw_sp_fib6_entry *fib6_entry)
6726 {
6727 	struct mlxsw_sp_nexthop_group *old_nh_grp = fib6_entry->common.nh_group;
6728 	struct mlxsw_sp_fib_node *fib_node = fib6_entry->common.fib_node;
6729 	int err;
6730 
6731 	mlxsw_sp_nexthop_group_vr_unlink(old_nh_grp, fib_node->fib);
6732 	fib6_entry->common.nh_group = NULL;
6733 	list_del(&fib6_entry->common.nexthop_group_node);
6734 
6735 	err = mlxsw_sp_nexthop6_group_get(mlxsw_sp, fib6_entry);
6736 	if (err)
6737 		goto err_nexthop6_group_get;
6738 
6739 	err = mlxsw_sp_nexthop_group_vr_link(fib6_entry->common.nh_group,
6740 					     fib_node->fib);
6741 	if (err)
6742 		goto err_nexthop_group_vr_link;
6743 
6744 	/* In case this entry is offloaded, then the adjacency index
6745 	 * currently associated with it in the device's table is that
6746 	 * of the old group. Start using the new one instead.
6747 	 */
6748 	err = mlxsw_sp_fib_entry_update(mlxsw_sp, &fib6_entry->common);
6749 	if (err)
6750 		goto err_fib_entry_update;
6751 
6752 	if (list_empty(&old_nh_grp->fib_list))
6753 		mlxsw_sp_nexthop6_group_destroy(mlxsw_sp, old_nh_grp);
6754 
6755 	return 0;
6756 
6757 err_fib_entry_update:
6758 	mlxsw_sp_nexthop_group_vr_unlink(fib6_entry->common.nh_group,
6759 					 fib_node->fib);
6760 err_nexthop_group_vr_link:
6761 	mlxsw_sp_nexthop6_group_put(mlxsw_sp, &fib6_entry->common);
6762 err_nexthop6_group_get:
6763 	list_add_tail(&fib6_entry->common.nexthop_group_node,
6764 		      &old_nh_grp->fib_list);
6765 	fib6_entry->common.nh_group = old_nh_grp;
6766 	mlxsw_sp_nexthop_group_vr_link(old_nh_grp, fib_node->fib);
6767 	return err;
6768 }
6769 
6770 static int
6771 mlxsw_sp_fib6_entry_nexthop_add(struct mlxsw_sp *mlxsw_sp,
6772 				struct mlxsw_sp_fib6_entry *fib6_entry,
6773 				struct fib6_info **rt_arr, unsigned int nrt6)
6774 {
6775 	struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
6776 	int err, i;
6777 
6778 	for (i = 0; i < nrt6; i++) {
6779 		mlxsw_sp_rt6 = mlxsw_sp_rt6_create(rt_arr[i]);
6780 		if (IS_ERR(mlxsw_sp_rt6)) {
6781 			err = PTR_ERR(mlxsw_sp_rt6);
6782 			goto err_rt6_unwind;
6783 		}
6784 
6785 		list_add_tail(&mlxsw_sp_rt6->list, &fib6_entry->rt6_list);
6786 		fib6_entry->nrt6++;
6787 	}
6788 
6789 	err = mlxsw_sp_nexthop6_group_update(mlxsw_sp, fib6_entry);
6790 	if (err)
6791 		goto err_rt6_unwind;
6792 
6793 	return 0;
6794 
6795 err_rt6_unwind:
6796 	for (; i > 0; i--) {
6797 		fib6_entry->nrt6--;
6798 		mlxsw_sp_rt6 = list_last_entry(&fib6_entry->rt6_list,
6799 					       struct mlxsw_sp_rt6, list);
6800 		list_del(&mlxsw_sp_rt6->list);
6801 		mlxsw_sp_rt6_destroy(mlxsw_sp_rt6);
6802 	}
6803 	return err;
6804 }
6805 
6806 static void
6807 mlxsw_sp_fib6_entry_nexthop_del(struct mlxsw_sp *mlxsw_sp,
6808 				struct mlxsw_sp_fib6_entry *fib6_entry,
6809 				struct fib6_info **rt_arr, unsigned int nrt6)
6810 {
6811 	struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
6812 	int i;
6813 
6814 	for (i = 0; i < nrt6; i++) {
6815 		mlxsw_sp_rt6 = mlxsw_sp_fib6_entry_rt_find(fib6_entry,
6816 							   rt_arr[i]);
6817 		if (WARN_ON_ONCE(!mlxsw_sp_rt6))
6818 			continue;
6819 
6820 		fib6_entry->nrt6--;
6821 		list_del(&mlxsw_sp_rt6->list);
6822 		mlxsw_sp_rt6_destroy(mlxsw_sp_rt6);
6823 	}
6824 
6825 	mlxsw_sp_nexthop6_group_update(mlxsw_sp, fib6_entry);
6826 }
6827 
6828 static int
6829 mlxsw_sp_fib6_entry_type_set_local(struct mlxsw_sp *mlxsw_sp,
6830 				   struct mlxsw_sp_fib_entry *fib_entry,
6831 				   const struct fib6_info *rt)
6832 {
6833 	struct mlxsw_sp_nexthop_group_info *nhgi = fib_entry->nh_group->nhgi;
6834 	union mlxsw_sp_l3addr dip = { .addr6 = rt->fib6_dst.addr };
6835 	u32 tb_id = mlxsw_sp_fix_tb_id(rt->fib6_table->tb6_id);
6836 	struct mlxsw_sp_router *router = mlxsw_sp->router;
6837 	int ifindex = nhgi->nexthops[0].ifindex;
6838 	struct mlxsw_sp_ipip_entry *ipip_entry;
6839 
6840 	fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_TRAP;
6841 	ipip_entry = mlxsw_sp_ipip_entry_find_by_decap(mlxsw_sp, ifindex,
6842 						       MLXSW_SP_L3_PROTO_IPV6,
6843 						       dip);
6844 
6845 	if (ipip_entry && ipip_entry->ol_dev->flags & IFF_UP) {
6846 		fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_IPIP_DECAP;
6847 		return mlxsw_sp_fib_entry_decap_init(mlxsw_sp, fib_entry,
6848 						     ipip_entry);
6849 	}
6850 	if (mlxsw_sp_router_nve_is_decap(mlxsw_sp, tb_id,
6851 					 MLXSW_SP_L3_PROTO_IPV6, &dip)) {
6852 		u32 tunnel_index;
6853 
6854 		tunnel_index = router->nve_decap_config.tunnel_index;
6855 		fib_entry->decap.tunnel_index = tunnel_index;
6856 		fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_NVE_DECAP;
6857 	}
6858 
6859 	return 0;
6860 }
6861 
6862 static int mlxsw_sp_fib6_entry_type_set(struct mlxsw_sp *mlxsw_sp,
6863 					struct mlxsw_sp_fib_entry *fib_entry,
6864 					const struct fib6_info *rt)
6865 {
6866 	if (rt->fib6_flags & RTF_LOCAL)
6867 		return mlxsw_sp_fib6_entry_type_set_local(mlxsw_sp, fib_entry,
6868 							  rt);
6869 	if (rt->fib6_flags & RTF_ANYCAST)
6870 		fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_TRAP;
6871 	else if (rt->fib6_type == RTN_BLACKHOLE)
6872 		fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_BLACKHOLE;
6873 	else if (rt->fib6_flags & RTF_REJECT)
6874 		fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_UNREACHABLE;
6875 	else if (fib_entry->nh_group->nhgi->gateway)
6876 		fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_REMOTE;
6877 	else
6878 		fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_LOCAL;
6879 
6880 	return 0;
6881 }
6882 
6883 static void
6884 mlxsw_sp_fib6_entry_rt_destroy_all(struct mlxsw_sp_fib6_entry *fib6_entry)
6885 {
6886 	struct mlxsw_sp_rt6 *mlxsw_sp_rt6, *tmp;
6887 
6888 	list_for_each_entry_safe(mlxsw_sp_rt6, tmp, &fib6_entry->rt6_list,
6889 				 list) {
6890 		fib6_entry->nrt6--;
6891 		list_del(&mlxsw_sp_rt6->list);
6892 		mlxsw_sp_rt6_destroy(mlxsw_sp_rt6);
6893 	}
6894 }
6895 
6896 static struct mlxsw_sp_fib6_entry *
6897 mlxsw_sp_fib6_entry_create(struct mlxsw_sp *mlxsw_sp,
6898 			   struct mlxsw_sp_fib_node *fib_node,
6899 			   struct fib6_info **rt_arr, unsigned int nrt6)
6900 {
6901 	struct mlxsw_sp_fib6_entry *fib6_entry;
6902 	struct mlxsw_sp_fib_entry *fib_entry;
6903 	struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
6904 	int err, i;
6905 
6906 	fib6_entry = kzalloc(sizeof(*fib6_entry), GFP_KERNEL);
6907 	if (!fib6_entry)
6908 		return ERR_PTR(-ENOMEM);
6909 	fib_entry = &fib6_entry->common;
6910 
6911 	INIT_LIST_HEAD(&fib6_entry->rt6_list);
6912 
6913 	for (i = 0; i < nrt6; i++) {
6914 		mlxsw_sp_rt6 = mlxsw_sp_rt6_create(rt_arr[i]);
6915 		if (IS_ERR(mlxsw_sp_rt6)) {
6916 			err = PTR_ERR(mlxsw_sp_rt6);
6917 			goto err_rt6_unwind;
6918 		}
6919 		list_add_tail(&mlxsw_sp_rt6->list, &fib6_entry->rt6_list);
6920 		fib6_entry->nrt6++;
6921 	}
6922 
6923 	err = mlxsw_sp_nexthop6_group_get(mlxsw_sp, fib6_entry);
6924 	if (err)
6925 		goto err_rt6_unwind;
6926 
6927 	err = mlxsw_sp_nexthop_group_vr_link(fib_entry->nh_group,
6928 					     fib_node->fib);
6929 	if (err)
6930 		goto err_nexthop_group_vr_link;
6931 
6932 	err = mlxsw_sp_fib6_entry_type_set(mlxsw_sp, fib_entry, rt_arr[0]);
6933 	if (err)
6934 		goto err_fib6_entry_type_set;
6935 
6936 	fib_entry->fib_node = fib_node;
6937 
6938 	return fib6_entry;
6939 
6940 err_fib6_entry_type_set:
6941 	mlxsw_sp_nexthop_group_vr_unlink(fib_entry->nh_group, fib_node->fib);
6942 err_nexthop_group_vr_link:
6943 	mlxsw_sp_nexthop6_group_put(mlxsw_sp, fib_entry);
6944 err_rt6_unwind:
6945 	for (; i > 0; i--) {
6946 		fib6_entry->nrt6--;
6947 		mlxsw_sp_rt6 = list_last_entry(&fib6_entry->rt6_list,
6948 					       struct mlxsw_sp_rt6, list);
6949 		list_del(&mlxsw_sp_rt6->list);
6950 		mlxsw_sp_rt6_destroy(mlxsw_sp_rt6);
6951 	}
6952 	kfree(fib6_entry);
6953 	return ERR_PTR(err);
6954 }
6955 
6956 static void
6957 mlxsw_sp_fib6_entry_type_unset(struct mlxsw_sp *mlxsw_sp,
6958 			       struct mlxsw_sp_fib6_entry *fib6_entry)
6959 {
6960 	mlxsw_sp_fib_entry_type_unset(mlxsw_sp, &fib6_entry->common);
6961 }
6962 
6963 static void mlxsw_sp_fib6_entry_destroy(struct mlxsw_sp *mlxsw_sp,
6964 					struct mlxsw_sp_fib6_entry *fib6_entry)
6965 {
6966 	struct mlxsw_sp_fib_node *fib_node = fib6_entry->common.fib_node;
6967 
6968 	mlxsw_sp_fib6_entry_type_unset(mlxsw_sp, fib6_entry);
6969 	mlxsw_sp_nexthop_group_vr_unlink(fib6_entry->common.nh_group,
6970 					 fib_node->fib);
6971 	mlxsw_sp_nexthop6_group_put(mlxsw_sp, &fib6_entry->common);
6972 	mlxsw_sp_fib6_entry_rt_destroy_all(fib6_entry);
6973 	WARN_ON(fib6_entry->nrt6);
6974 	kfree(fib6_entry);
6975 }
6976 
6977 static struct mlxsw_sp_fib6_entry *
6978 mlxsw_sp_fib6_entry_lookup(struct mlxsw_sp *mlxsw_sp,
6979 			   const struct fib6_info *rt)
6980 {
6981 	struct mlxsw_sp_fib6_entry *fib6_entry;
6982 	struct mlxsw_sp_fib_node *fib_node;
6983 	struct mlxsw_sp_fib *fib;
6984 	struct fib6_info *cmp_rt;
6985 	struct mlxsw_sp_vr *vr;
6986 
6987 	vr = mlxsw_sp_vr_find(mlxsw_sp, rt->fib6_table->tb6_id);
6988 	if (!vr)
6989 		return NULL;
6990 	fib = mlxsw_sp_vr_fib(vr, MLXSW_SP_L3_PROTO_IPV6);
6991 
6992 	fib_node = mlxsw_sp_fib_node_lookup(fib, &rt->fib6_dst.addr,
6993 					    sizeof(rt->fib6_dst.addr),
6994 					    rt->fib6_dst.plen);
6995 	if (!fib_node)
6996 		return NULL;
6997 
6998 	fib6_entry = container_of(fib_node->fib_entry,
6999 				  struct mlxsw_sp_fib6_entry, common);
7000 	cmp_rt = mlxsw_sp_fib6_entry_rt(fib6_entry);
7001 	if (rt->fib6_table->tb6_id == cmp_rt->fib6_table->tb6_id &&
7002 	    rt->fib6_metric == cmp_rt->fib6_metric &&
7003 	    mlxsw_sp_fib6_entry_rt_find(fib6_entry, rt))
7004 		return fib6_entry;
7005 
7006 	return NULL;
7007 }
7008 
7009 static bool mlxsw_sp_fib6_allow_replace(struct mlxsw_sp_fib6_entry *fib6_entry)
7010 {
7011 	struct mlxsw_sp_fib_node *fib_node = fib6_entry->common.fib_node;
7012 	struct mlxsw_sp_fib6_entry *fib6_replaced;
7013 	struct fib6_info *rt, *rt_replaced;
7014 
7015 	if (!fib_node->fib_entry)
7016 		return true;
7017 
7018 	fib6_replaced = container_of(fib_node->fib_entry,
7019 				     struct mlxsw_sp_fib6_entry,
7020 				     common);
7021 	rt = mlxsw_sp_fib6_entry_rt(fib6_entry);
7022 	rt_replaced = mlxsw_sp_fib6_entry_rt(fib6_replaced);
7023 	if (rt->fib6_table->tb6_id == RT_TABLE_MAIN &&
7024 	    rt_replaced->fib6_table->tb6_id == RT_TABLE_LOCAL)
7025 		return false;
7026 
7027 	return true;
7028 }
7029 
7030 static int mlxsw_sp_router_fib6_replace(struct mlxsw_sp *mlxsw_sp,
7031 					struct fib6_info **rt_arr,
7032 					unsigned int nrt6)
7033 {
7034 	struct mlxsw_sp_fib6_entry *fib6_entry, *fib6_replaced;
7035 	struct mlxsw_sp_fib_entry *replaced;
7036 	struct mlxsw_sp_fib_node *fib_node;
7037 	struct fib6_info *rt = rt_arr[0];
7038 	int err;
7039 
7040 	if (rt->fib6_src.plen)
7041 		return -EINVAL;
7042 
7043 	if (mlxsw_sp_fib6_rt_should_ignore(rt))
7044 		return 0;
7045 
7046 	if (rt->nh && !mlxsw_sp_nexthop_obj_group_lookup(mlxsw_sp, rt->nh->id))
7047 		return 0;
7048 
7049 	fib_node = mlxsw_sp_fib_node_get(mlxsw_sp, rt->fib6_table->tb6_id,
7050 					 &rt->fib6_dst.addr,
7051 					 sizeof(rt->fib6_dst.addr),
7052 					 rt->fib6_dst.plen,
7053 					 MLXSW_SP_L3_PROTO_IPV6);
7054 	if (IS_ERR(fib_node))
7055 		return PTR_ERR(fib_node);
7056 
7057 	fib6_entry = mlxsw_sp_fib6_entry_create(mlxsw_sp, fib_node, rt_arr,
7058 						nrt6);
7059 	if (IS_ERR(fib6_entry)) {
7060 		err = PTR_ERR(fib6_entry);
7061 		goto err_fib6_entry_create;
7062 	}
7063 
7064 	if (!mlxsw_sp_fib6_allow_replace(fib6_entry)) {
7065 		mlxsw_sp_fib6_entry_destroy(mlxsw_sp, fib6_entry);
7066 		mlxsw_sp_fib_node_put(mlxsw_sp, fib_node);
7067 		return 0;
7068 	}
7069 
7070 	replaced = fib_node->fib_entry;
7071 	err = mlxsw_sp_fib_node_entry_link(mlxsw_sp, &fib6_entry->common);
7072 	if (err)
7073 		goto err_fib_node_entry_link;
7074 
7075 	/* Nothing to replace */
7076 	if (!replaced)
7077 		return 0;
7078 
7079 	mlxsw_sp_fib_entry_hw_flags_clear(mlxsw_sp, replaced);
7080 	fib6_replaced = container_of(replaced, struct mlxsw_sp_fib6_entry,
7081 				     common);
7082 	mlxsw_sp_fib6_entry_destroy(mlxsw_sp, fib6_replaced);
7083 
7084 	return 0;
7085 
7086 err_fib_node_entry_link:
7087 	fib_node->fib_entry = replaced;
7088 	mlxsw_sp_fib6_entry_destroy(mlxsw_sp, fib6_entry);
7089 err_fib6_entry_create:
7090 	mlxsw_sp_fib_node_put(mlxsw_sp, fib_node);
7091 	return err;
7092 }
7093 
7094 static int mlxsw_sp_router_fib6_append(struct mlxsw_sp *mlxsw_sp,
7095 				       struct fib6_info **rt_arr,
7096 				       unsigned int nrt6)
7097 {
7098 	struct mlxsw_sp_fib6_entry *fib6_entry;
7099 	struct mlxsw_sp_fib_node *fib_node;
7100 	struct fib6_info *rt = rt_arr[0];
7101 	int err;
7102 
7103 	if (rt->fib6_src.plen)
7104 		return -EINVAL;
7105 
7106 	if (mlxsw_sp_fib6_rt_should_ignore(rt))
7107 		return 0;
7108 
7109 	fib_node = mlxsw_sp_fib_node_get(mlxsw_sp, rt->fib6_table->tb6_id,
7110 					 &rt->fib6_dst.addr,
7111 					 sizeof(rt->fib6_dst.addr),
7112 					 rt->fib6_dst.plen,
7113 					 MLXSW_SP_L3_PROTO_IPV6);
7114 	if (IS_ERR(fib_node))
7115 		return PTR_ERR(fib_node);
7116 
7117 	if (WARN_ON_ONCE(!fib_node->fib_entry)) {
7118 		mlxsw_sp_fib_node_put(mlxsw_sp, fib_node);
7119 		return -EINVAL;
7120 	}
7121 
7122 	fib6_entry = container_of(fib_node->fib_entry,
7123 				  struct mlxsw_sp_fib6_entry, common);
7124 	err = mlxsw_sp_fib6_entry_nexthop_add(mlxsw_sp, fib6_entry, rt_arr,
7125 					      nrt6);
7126 	if (err)
7127 		goto err_fib6_entry_nexthop_add;
7128 
7129 	return 0;
7130 
7131 err_fib6_entry_nexthop_add:
7132 	mlxsw_sp_fib_node_put(mlxsw_sp, fib_node);
7133 	return err;
7134 }
7135 
7136 static void mlxsw_sp_router_fib6_del(struct mlxsw_sp *mlxsw_sp,
7137 				     struct fib6_info **rt_arr,
7138 				     unsigned int nrt6)
7139 {
7140 	struct mlxsw_sp_fib6_entry *fib6_entry;
7141 	struct mlxsw_sp_fib_node *fib_node;
7142 	struct fib6_info *rt = rt_arr[0];
7143 
7144 	if (mlxsw_sp_fib6_rt_should_ignore(rt))
7145 		return;
7146 
7147 	/* Multipath routes are first added to the FIB trie and only then
7148 	 * notified. If we vetoed the addition, we will get a delete
7149 	 * notification for a route we do not have. Therefore, do not warn if
7150 	 * route was not found.
7151 	 */
7152 	fib6_entry = mlxsw_sp_fib6_entry_lookup(mlxsw_sp, rt);
7153 	if (!fib6_entry)
7154 		return;
7155 
7156 	/* If not all the nexthops are deleted, then only reduce the nexthop
7157 	 * group.
7158 	 */
7159 	if (nrt6 != fib6_entry->nrt6) {
7160 		mlxsw_sp_fib6_entry_nexthop_del(mlxsw_sp, fib6_entry, rt_arr,
7161 						nrt6);
7162 		return;
7163 	}
7164 
7165 	fib_node = fib6_entry->common.fib_node;
7166 
7167 	mlxsw_sp_fib_node_entry_unlink(mlxsw_sp, &fib6_entry->common);
7168 	mlxsw_sp_fib6_entry_destroy(mlxsw_sp, fib6_entry);
7169 	mlxsw_sp_fib_node_put(mlxsw_sp, fib_node);
7170 }
7171 
7172 static struct mlxsw_sp_mr_table *
7173 mlxsw_sp_router_fibmr_family_to_table(struct mlxsw_sp_vr *vr, int family)
7174 {
7175 	if (family == RTNL_FAMILY_IPMR)
7176 		return vr->mr_table[MLXSW_SP_L3_PROTO_IPV4];
7177 	else
7178 		return vr->mr_table[MLXSW_SP_L3_PROTO_IPV6];
7179 }
7180 
7181 static int mlxsw_sp_router_fibmr_add(struct mlxsw_sp *mlxsw_sp,
7182 				     struct mfc_entry_notifier_info *men_info,
7183 				     bool replace)
7184 {
7185 	struct mlxsw_sp_mr_table *mrt;
7186 	struct mlxsw_sp_vr *vr;
7187 
7188 	vr = mlxsw_sp_vr_get(mlxsw_sp, men_info->tb_id, NULL);
7189 	if (IS_ERR(vr))
7190 		return PTR_ERR(vr);
7191 
7192 	mrt = mlxsw_sp_router_fibmr_family_to_table(vr, men_info->info.family);
7193 	return mlxsw_sp_mr_route_add(mrt, men_info->mfc, replace);
7194 }
7195 
7196 static void mlxsw_sp_router_fibmr_del(struct mlxsw_sp *mlxsw_sp,
7197 				      struct mfc_entry_notifier_info *men_info)
7198 {
7199 	struct mlxsw_sp_mr_table *mrt;
7200 	struct mlxsw_sp_vr *vr;
7201 
7202 	vr = mlxsw_sp_vr_find(mlxsw_sp, men_info->tb_id);
7203 	if (WARN_ON(!vr))
7204 		return;
7205 
7206 	mrt = mlxsw_sp_router_fibmr_family_to_table(vr, men_info->info.family);
7207 	mlxsw_sp_mr_route_del(mrt, men_info->mfc);
7208 	mlxsw_sp_vr_put(mlxsw_sp, vr);
7209 }
7210 
7211 static int
7212 mlxsw_sp_router_fibmr_vif_add(struct mlxsw_sp *mlxsw_sp,
7213 			      struct vif_entry_notifier_info *ven_info)
7214 {
7215 	struct mlxsw_sp_mr_table *mrt;
7216 	struct mlxsw_sp_rif *rif;
7217 	struct mlxsw_sp_vr *vr;
7218 
7219 	vr = mlxsw_sp_vr_get(mlxsw_sp, ven_info->tb_id, NULL);
7220 	if (IS_ERR(vr))
7221 		return PTR_ERR(vr);
7222 
7223 	mrt = mlxsw_sp_router_fibmr_family_to_table(vr, ven_info->info.family);
7224 	rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, ven_info->dev);
7225 	return mlxsw_sp_mr_vif_add(mrt, ven_info->dev,
7226 				   ven_info->vif_index,
7227 				   ven_info->vif_flags, rif);
7228 }
7229 
7230 static void
7231 mlxsw_sp_router_fibmr_vif_del(struct mlxsw_sp *mlxsw_sp,
7232 			      struct vif_entry_notifier_info *ven_info)
7233 {
7234 	struct mlxsw_sp_mr_table *mrt;
7235 	struct mlxsw_sp_vr *vr;
7236 
7237 	vr = mlxsw_sp_vr_find(mlxsw_sp, ven_info->tb_id);
7238 	if (WARN_ON(!vr))
7239 		return;
7240 
7241 	mrt = mlxsw_sp_router_fibmr_family_to_table(vr, ven_info->info.family);
7242 	mlxsw_sp_mr_vif_del(mrt, ven_info->vif_index);
7243 	mlxsw_sp_vr_put(mlxsw_sp, vr);
7244 }
7245 
7246 static void mlxsw_sp_fib4_node_flush(struct mlxsw_sp *mlxsw_sp,
7247 				     struct mlxsw_sp_fib_node *fib_node)
7248 {
7249 	struct mlxsw_sp_fib4_entry *fib4_entry;
7250 
7251 	fib4_entry = container_of(fib_node->fib_entry,
7252 				  struct mlxsw_sp_fib4_entry, common);
7253 	mlxsw_sp_fib_node_entry_unlink(mlxsw_sp, fib_node->fib_entry);
7254 	mlxsw_sp_fib4_entry_destroy(mlxsw_sp, fib4_entry);
7255 	mlxsw_sp_fib_node_put(mlxsw_sp, fib_node);
7256 }
7257 
7258 static void mlxsw_sp_fib6_node_flush(struct mlxsw_sp *mlxsw_sp,
7259 				     struct mlxsw_sp_fib_node *fib_node)
7260 {
7261 	struct mlxsw_sp_fib6_entry *fib6_entry;
7262 
7263 	fib6_entry = container_of(fib_node->fib_entry,
7264 				  struct mlxsw_sp_fib6_entry, common);
7265 	mlxsw_sp_fib_node_entry_unlink(mlxsw_sp, fib_node->fib_entry);
7266 	mlxsw_sp_fib6_entry_destroy(mlxsw_sp, fib6_entry);
7267 	mlxsw_sp_fib_node_put(mlxsw_sp, fib_node);
7268 }
7269 
7270 static void mlxsw_sp_fib_node_flush(struct mlxsw_sp *mlxsw_sp,
7271 				    struct mlxsw_sp_fib_node *fib_node)
7272 {
7273 	switch (fib_node->fib->proto) {
7274 	case MLXSW_SP_L3_PROTO_IPV4:
7275 		mlxsw_sp_fib4_node_flush(mlxsw_sp, fib_node);
7276 		break;
7277 	case MLXSW_SP_L3_PROTO_IPV6:
7278 		mlxsw_sp_fib6_node_flush(mlxsw_sp, fib_node);
7279 		break;
7280 	}
7281 }
7282 
7283 static void mlxsw_sp_vr_fib_flush(struct mlxsw_sp *mlxsw_sp,
7284 				  struct mlxsw_sp_vr *vr,
7285 				  enum mlxsw_sp_l3proto proto)
7286 {
7287 	struct mlxsw_sp_fib *fib = mlxsw_sp_vr_fib(vr, proto);
7288 	struct mlxsw_sp_fib_node *fib_node, *tmp;
7289 
7290 	list_for_each_entry_safe(fib_node, tmp, &fib->node_list, list) {
7291 		bool do_break = &tmp->list == &fib->node_list;
7292 
7293 		mlxsw_sp_fib_node_flush(mlxsw_sp, fib_node);
7294 		if (do_break)
7295 			break;
7296 	}
7297 }
7298 
7299 static void mlxsw_sp_router_fib_flush(struct mlxsw_sp *mlxsw_sp)
7300 {
7301 	int max_vrs = MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_VRS);
7302 	int i, j;
7303 
7304 	for (i = 0; i < max_vrs; i++) {
7305 		struct mlxsw_sp_vr *vr = &mlxsw_sp->router->vrs[i];
7306 
7307 		if (!mlxsw_sp_vr_is_used(vr))
7308 			continue;
7309 
7310 		for (j = 0; j < MLXSW_SP_L3_PROTO_MAX; j++)
7311 			mlxsw_sp_mr_table_flush(vr->mr_table[j]);
7312 		mlxsw_sp_vr_fib_flush(mlxsw_sp, vr, MLXSW_SP_L3_PROTO_IPV4);
7313 
7314 		/* If virtual router was only used for IPv4, then it's no
7315 		 * longer used.
7316 		 */
7317 		if (!mlxsw_sp_vr_is_used(vr))
7318 			continue;
7319 		mlxsw_sp_vr_fib_flush(mlxsw_sp, vr, MLXSW_SP_L3_PROTO_IPV6);
7320 	}
7321 }
7322 
7323 struct mlxsw_sp_fib6_event_work {
7324 	struct fib6_info **rt_arr;
7325 	unsigned int nrt6;
7326 };
7327 
7328 struct mlxsw_sp_fib_event_work {
7329 	struct work_struct work;
7330 	union {
7331 		struct mlxsw_sp_fib6_event_work fib6_work;
7332 		struct fib_entry_notifier_info fen_info;
7333 		struct fib_rule_notifier_info fr_info;
7334 		struct fib_nh_notifier_info fnh_info;
7335 		struct mfc_entry_notifier_info men_info;
7336 		struct vif_entry_notifier_info ven_info;
7337 	};
7338 	struct mlxsw_sp *mlxsw_sp;
7339 	unsigned long event;
7340 };
7341 
7342 static int
7343 mlxsw_sp_router_fib6_work_init(struct mlxsw_sp_fib6_event_work *fib6_work,
7344 			       struct fib6_entry_notifier_info *fen6_info)
7345 {
7346 	struct fib6_info *rt = fen6_info->rt;
7347 	struct fib6_info **rt_arr;
7348 	struct fib6_info *iter;
7349 	unsigned int nrt6;
7350 	int i = 0;
7351 
7352 	nrt6 = fen6_info->nsiblings + 1;
7353 
7354 	rt_arr = kcalloc(nrt6, sizeof(struct fib6_info *), GFP_ATOMIC);
7355 	if (!rt_arr)
7356 		return -ENOMEM;
7357 
7358 	fib6_work->rt_arr = rt_arr;
7359 	fib6_work->nrt6 = nrt6;
7360 
7361 	rt_arr[0] = rt;
7362 	fib6_info_hold(rt);
7363 
7364 	if (!fen6_info->nsiblings)
7365 		return 0;
7366 
7367 	list_for_each_entry(iter, &rt->fib6_siblings, fib6_siblings) {
7368 		if (i == fen6_info->nsiblings)
7369 			break;
7370 
7371 		rt_arr[i + 1] = iter;
7372 		fib6_info_hold(iter);
7373 		i++;
7374 	}
7375 	WARN_ON_ONCE(i != fen6_info->nsiblings);
7376 
7377 	return 0;
7378 }
7379 
7380 static void
7381 mlxsw_sp_router_fib6_work_fini(struct mlxsw_sp_fib6_event_work *fib6_work)
7382 {
7383 	int i;
7384 
7385 	for (i = 0; i < fib6_work->nrt6; i++)
7386 		mlxsw_sp_rt6_release(fib6_work->rt_arr[i]);
7387 	kfree(fib6_work->rt_arr);
7388 }
7389 
7390 static void mlxsw_sp_router_fib4_event_work(struct work_struct *work)
7391 {
7392 	struct mlxsw_sp_fib_event_work *fib_work =
7393 		container_of(work, struct mlxsw_sp_fib_event_work, work);
7394 	struct mlxsw_sp *mlxsw_sp = fib_work->mlxsw_sp;
7395 	int err;
7396 
7397 	mutex_lock(&mlxsw_sp->router->lock);
7398 	mlxsw_sp_span_respin(mlxsw_sp);
7399 
7400 	switch (fib_work->event) {
7401 	case FIB_EVENT_ENTRY_REPLACE:
7402 		err = mlxsw_sp_router_fib4_replace(mlxsw_sp,
7403 						   &fib_work->fen_info);
7404 		if (err) {
7405 			dev_warn(mlxsw_sp->bus_info->dev, "FIB replace failed.\n");
7406 			mlxsw_sp_fib4_offload_failed_flag_set(mlxsw_sp,
7407 							      &fib_work->fen_info);
7408 		}
7409 		fib_info_put(fib_work->fen_info.fi);
7410 		break;
7411 	case FIB_EVENT_ENTRY_DEL:
7412 		mlxsw_sp_router_fib4_del(mlxsw_sp, &fib_work->fen_info);
7413 		fib_info_put(fib_work->fen_info.fi);
7414 		break;
7415 	case FIB_EVENT_NH_ADD:
7416 	case FIB_EVENT_NH_DEL:
7417 		mlxsw_sp_nexthop4_event(mlxsw_sp, fib_work->event,
7418 					fib_work->fnh_info.fib_nh);
7419 		fib_info_put(fib_work->fnh_info.fib_nh->nh_parent);
7420 		break;
7421 	}
7422 	mutex_unlock(&mlxsw_sp->router->lock);
7423 	kfree(fib_work);
7424 }
7425 
7426 static void mlxsw_sp_router_fib6_event_work(struct work_struct *work)
7427 {
7428 	struct mlxsw_sp_fib_event_work *fib_work =
7429 		    container_of(work, struct mlxsw_sp_fib_event_work, work);
7430 	struct mlxsw_sp_fib6_event_work *fib6_work = &fib_work->fib6_work;
7431 	struct mlxsw_sp *mlxsw_sp = fib_work->mlxsw_sp;
7432 	int err;
7433 
7434 	mutex_lock(&mlxsw_sp->router->lock);
7435 	mlxsw_sp_span_respin(mlxsw_sp);
7436 
7437 	switch (fib_work->event) {
7438 	case FIB_EVENT_ENTRY_REPLACE:
7439 		err = mlxsw_sp_router_fib6_replace(mlxsw_sp,
7440 						   fib6_work->rt_arr,
7441 						   fib6_work->nrt6);
7442 		if (err) {
7443 			dev_warn(mlxsw_sp->bus_info->dev, "FIB replace failed.\n");
7444 			mlxsw_sp_fib6_offload_failed_flag_set(mlxsw_sp,
7445 							      fib6_work->rt_arr,
7446 							      fib6_work->nrt6);
7447 		}
7448 		mlxsw_sp_router_fib6_work_fini(fib6_work);
7449 		break;
7450 	case FIB_EVENT_ENTRY_APPEND:
7451 		err = mlxsw_sp_router_fib6_append(mlxsw_sp,
7452 						  fib6_work->rt_arr,
7453 						  fib6_work->nrt6);
7454 		if (err) {
7455 			dev_warn(mlxsw_sp->bus_info->dev, "FIB append failed.\n");
7456 			mlxsw_sp_fib6_offload_failed_flag_set(mlxsw_sp,
7457 							      fib6_work->rt_arr,
7458 							      fib6_work->nrt6);
7459 		}
7460 		mlxsw_sp_router_fib6_work_fini(fib6_work);
7461 		break;
7462 	case FIB_EVENT_ENTRY_DEL:
7463 		mlxsw_sp_router_fib6_del(mlxsw_sp,
7464 					 fib6_work->rt_arr,
7465 					 fib6_work->nrt6);
7466 		mlxsw_sp_router_fib6_work_fini(fib6_work);
7467 		break;
7468 	}
7469 	mutex_unlock(&mlxsw_sp->router->lock);
7470 	kfree(fib_work);
7471 }
7472 
7473 static void mlxsw_sp_router_fibmr_event_work(struct work_struct *work)
7474 {
7475 	struct mlxsw_sp_fib_event_work *fib_work =
7476 		container_of(work, struct mlxsw_sp_fib_event_work, work);
7477 	struct mlxsw_sp *mlxsw_sp = fib_work->mlxsw_sp;
7478 	bool replace;
7479 	int err;
7480 
7481 	rtnl_lock();
7482 	mutex_lock(&mlxsw_sp->router->lock);
7483 	switch (fib_work->event) {
7484 	case FIB_EVENT_ENTRY_REPLACE:
7485 	case FIB_EVENT_ENTRY_ADD:
7486 		replace = fib_work->event == FIB_EVENT_ENTRY_REPLACE;
7487 
7488 		err = mlxsw_sp_router_fibmr_add(mlxsw_sp, &fib_work->men_info,
7489 						replace);
7490 		if (err)
7491 			dev_warn(mlxsw_sp->bus_info->dev, "MR entry add failed.\n");
7492 		mr_cache_put(fib_work->men_info.mfc);
7493 		break;
7494 	case FIB_EVENT_ENTRY_DEL:
7495 		mlxsw_sp_router_fibmr_del(mlxsw_sp, &fib_work->men_info);
7496 		mr_cache_put(fib_work->men_info.mfc);
7497 		break;
7498 	case FIB_EVENT_VIF_ADD:
7499 		err = mlxsw_sp_router_fibmr_vif_add(mlxsw_sp,
7500 						    &fib_work->ven_info);
7501 		if (err)
7502 			dev_warn(mlxsw_sp->bus_info->dev, "MR VIF add failed.\n");
7503 		dev_put(fib_work->ven_info.dev);
7504 		break;
7505 	case FIB_EVENT_VIF_DEL:
7506 		mlxsw_sp_router_fibmr_vif_del(mlxsw_sp,
7507 					      &fib_work->ven_info);
7508 		dev_put(fib_work->ven_info.dev);
7509 		break;
7510 	}
7511 	mutex_unlock(&mlxsw_sp->router->lock);
7512 	rtnl_unlock();
7513 	kfree(fib_work);
7514 }
7515 
7516 static void mlxsw_sp_router_fib4_event(struct mlxsw_sp_fib_event_work *fib_work,
7517 				       struct fib_notifier_info *info)
7518 {
7519 	struct fib_entry_notifier_info *fen_info;
7520 	struct fib_nh_notifier_info *fnh_info;
7521 
7522 	switch (fib_work->event) {
7523 	case FIB_EVENT_ENTRY_REPLACE:
7524 	case FIB_EVENT_ENTRY_DEL:
7525 		fen_info = container_of(info, struct fib_entry_notifier_info,
7526 					info);
7527 		fib_work->fen_info = *fen_info;
7528 		/* Take reference on fib_info to prevent it from being
7529 		 * freed while work is queued. Release it afterwards.
7530 		 */
7531 		fib_info_hold(fib_work->fen_info.fi);
7532 		break;
7533 	case FIB_EVENT_NH_ADD:
7534 	case FIB_EVENT_NH_DEL:
7535 		fnh_info = container_of(info, struct fib_nh_notifier_info,
7536 					info);
7537 		fib_work->fnh_info = *fnh_info;
7538 		fib_info_hold(fib_work->fnh_info.fib_nh->nh_parent);
7539 		break;
7540 	}
7541 }
7542 
7543 static int mlxsw_sp_router_fib6_event(struct mlxsw_sp_fib_event_work *fib_work,
7544 				      struct fib_notifier_info *info)
7545 {
7546 	struct fib6_entry_notifier_info *fen6_info;
7547 	int err;
7548 
7549 	switch (fib_work->event) {
7550 	case FIB_EVENT_ENTRY_REPLACE:
7551 	case FIB_EVENT_ENTRY_APPEND:
7552 	case FIB_EVENT_ENTRY_DEL:
7553 		fen6_info = container_of(info, struct fib6_entry_notifier_info,
7554 					 info);
7555 		err = mlxsw_sp_router_fib6_work_init(&fib_work->fib6_work,
7556 						     fen6_info);
7557 		if (err)
7558 			return err;
7559 		break;
7560 	}
7561 
7562 	return 0;
7563 }
7564 
7565 static void
7566 mlxsw_sp_router_fibmr_event(struct mlxsw_sp_fib_event_work *fib_work,
7567 			    struct fib_notifier_info *info)
7568 {
7569 	switch (fib_work->event) {
7570 	case FIB_EVENT_ENTRY_REPLACE:
7571 	case FIB_EVENT_ENTRY_ADD:
7572 	case FIB_EVENT_ENTRY_DEL:
7573 		memcpy(&fib_work->men_info, info, sizeof(fib_work->men_info));
7574 		mr_cache_hold(fib_work->men_info.mfc);
7575 		break;
7576 	case FIB_EVENT_VIF_ADD:
7577 	case FIB_EVENT_VIF_DEL:
7578 		memcpy(&fib_work->ven_info, info, sizeof(fib_work->ven_info));
7579 		dev_hold(fib_work->ven_info.dev);
7580 		break;
7581 	}
7582 }
7583 
7584 static int mlxsw_sp_router_fib_rule_event(unsigned long event,
7585 					  struct fib_notifier_info *info,
7586 					  struct mlxsw_sp *mlxsw_sp)
7587 {
7588 	struct netlink_ext_ack *extack = info->extack;
7589 	struct fib_rule_notifier_info *fr_info;
7590 	struct fib_rule *rule;
7591 	int err = 0;
7592 
7593 	/* nothing to do at the moment */
7594 	if (event == FIB_EVENT_RULE_DEL)
7595 		return 0;
7596 
7597 	fr_info = container_of(info, struct fib_rule_notifier_info, info);
7598 	rule = fr_info->rule;
7599 
7600 	/* Rule only affects locally generated traffic */
7601 	if (rule->iifindex == mlxsw_sp_net(mlxsw_sp)->loopback_dev->ifindex)
7602 		return 0;
7603 
7604 	switch (info->family) {
7605 	case AF_INET:
7606 		if (!fib4_rule_default(rule) && !rule->l3mdev)
7607 			err = -EOPNOTSUPP;
7608 		break;
7609 	case AF_INET6:
7610 		if (!fib6_rule_default(rule) && !rule->l3mdev)
7611 			err = -EOPNOTSUPP;
7612 		break;
7613 	case RTNL_FAMILY_IPMR:
7614 		if (!ipmr_rule_default(rule) && !rule->l3mdev)
7615 			err = -EOPNOTSUPP;
7616 		break;
7617 	case RTNL_FAMILY_IP6MR:
7618 		if (!ip6mr_rule_default(rule) && !rule->l3mdev)
7619 			err = -EOPNOTSUPP;
7620 		break;
7621 	}
7622 
7623 	if (err < 0)
7624 		NL_SET_ERR_MSG_MOD(extack, "FIB rules not supported");
7625 
7626 	return err;
7627 }
7628 
7629 /* Called with rcu_read_lock() */
7630 static int mlxsw_sp_router_fib_event(struct notifier_block *nb,
7631 				     unsigned long event, void *ptr)
7632 {
7633 	struct mlxsw_sp_fib_event_work *fib_work;
7634 	struct fib_notifier_info *info = ptr;
7635 	struct mlxsw_sp_router *router;
7636 	int err;
7637 
7638 	if ((info->family != AF_INET && info->family != AF_INET6 &&
7639 	     info->family != RTNL_FAMILY_IPMR &&
7640 	     info->family != RTNL_FAMILY_IP6MR))
7641 		return NOTIFY_DONE;
7642 
7643 	router = container_of(nb, struct mlxsw_sp_router, fib_nb);
7644 
7645 	switch (event) {
7646 	case FIB_EVENT_RULE_ADD:
7647 	case FIB_EVENT_RULE_DEL:
7648 		err = mlxsw_sp_router_fib_rule_event(event, info,
7649 						     router->mlxsw_sp);
7650 		return notifier_from_errno(err);
7651 	case FIB_EVENT_ENTRY_ADD:
7652 	case FIB_EVENT_ENTRY_REPLACE:
7653 	case FIB_EVENT_ENTRY_APPEND:
7654 		if (info->family == AF_INET) {
7655 			struct fib_entry_notifier_info *fen_info = ptr;
7656 
7657 			if (fen_info->fi->fib_nh_is_v6) {
7658 				NL_SET_ERR_MSG_MOD(info->extack, "IPv6 gateway with IPv4 route is not supported");
7659 				return notifier_from_errno(-EINVAL);
7660 			}
7661 		}
7662 		break;
7663 	}
7664 
7665 	fib_work = kzalloc(sizeof(*fib_work), GFP_ATOMIC);
7666 	if (!fib_work)
7667 		return NOTIFY_BAD;
7668 
7669 	fib_work->mlxsw_sp = router->mlxsw_sp;
7670 	fib_work->event = event;
7671 
7672 	switch (info->family) {
7673 	case AF_INET:
7674 		INIT_WORK(&fib_work->work, mlxsw_sp_router_fib4_event_work);
7675 		mlxsw_sp_router_fib4_event(fib_work, info);
7676 		break;
7677 	case AF_INET6:
7678 		INIT_WORK(&fib_work->work, mlxsw_sp_router_fib6_event_work);
7679 		err = mlxsw_sp_router_fib6_event(fib_work, info);
7680 		if (err)
7681 			goto err_fib_event;
7682 		break;
7683 	case RTNL_FAMILY_IP6MR:
7684 	case RTNL_FAMILY_IPMR:
7685 		INIT_WORK(&fib_work->work, mlxsw_sp_router_fibmr_event_work);
7686 		mlxsw_sp_router_fibmr_event(fib_work, info);
7687 		break;
7688 	}
7689 
7690 	mlxsw_core_schedule_work(&fib_work->work);
7691 
7692 	return NOTIFY_DONE;
7693 
7694 err_fib_event:
7695 	kfree(fib_work);
7696 	return NOTIFY_BAD;
7697 }
7698 
7699 static struct mlxsw_sp_rif *
7700 mlxsw_sp_rif_find_by_dev(const struct mlxsw_sp *mlxsw_sp,
7701 			 const struct net_device *dev)
7702 {
7703 	int max_rifs = MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_RIFS);
7704 	int i;
7705 
7706 	for (i = 0; i < max_rifs; i++)
7707 		if (mlxsw_sp->router->rifs[i] &&
7708 		    mlxsw_sp_rif_dev_is(mlxsw_sp->router->rifs[i], dev))
7709 			return mlxsw_sp->router->rifs[i];
7710 
7711 	return NULL;
7712 }
7713 
7714 bool mlxsw_sp_rif_exists(struct mlxsw_sp *mlxsw_sp,
7715 			 const struct net_device *dev)
7716 {
7717 	struct mlxsw_sp_rif *rif;
7718 
7719 	mutex_lock(&mlxsw_sp->router->lock);
7720 	rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev);
7721 	mutex_unlock(&mlxsw_sp->router->lock);
7722 
7723 	return rif;
7724 }
7725 
7726 u16 mlxsw_sp_rif_vid(struct mlxsw_sp *mlxsw_sp, const struct net_device *dev)
7727 {
7728 	struct mlxsw_sp_rif *rif;
7729 	u16 vid = 0;
7730 
7731 	mutex_lock(&mlxsw_sp->router->lock);
7732 	rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev);
7733 	if (!rif)
7734 		goto out;
7735 
7736 	/* We only return the VID for VLAN RIFs. Otherwise we return an
7737 	 * invalid value (0).
7738 	 */
7739 	if (rif->ops->type != MLXSW_SP_RIF_TYPE_VLAN)
7740 		goto out;
7741 
7742 	vid = mlxsw_sp_fid_8021q_vid(rif->fid);
7743 
7744 out:
7745 	mutex_unlock(&mlxsw_sp->router->lock);
7746 	return vid;
7747 }
7748 
7749 static int mlxsw_sp_router_rif_disable(struct mlxsw_sp *mlxsw_sp, u16 rif)
7750 {
7751 	char ritr_pl[MLXSW_REG_RITR_LEN];
7752 	int err;
7753 
7754 	mlxsw_reg_ritr_rif_pack(ritr_pl, rif);
7755 	err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
7756 	if (err)
7757 		return err;
7758 
7759 	mlxsw_reg_ritr_enable_set(ritr_pl, false);
7760 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
7761 }
7762 
7763 static void mlxsw_sp_router_rif_gone_sync(struct mlxsw_sp *mlxsw_sp,
7764 					  struct mlxsw_sp_rif *rif)
7765 {
7766 	mlxsw_sp_router_rif_disable(mlxsw_sp, rif->rif_index);
7767 	mlxsw_sp_nexthop_rif_gone_sync(mlxsw_sp, rif);
7768 	mlxsw_sp_neigh_rif_gone_sync(mlxsw_sp, rif);
7769 }
7770 
7771 static bool
7772 mlxsw_sp_rif_should_config(struct mlxsw_sp_rif *rif, struct net_device *dev,
7773 			   unsigned long event)
7774 {
7775 	struct inet6_dev *inet6_dev;
7776 	bool addr_list_empty = true;
7777 	struct in_device *idev;
7778 
7779 	switch (event) {
7780 	case NETDEV_UP:
7781 		return rif == NULL;
7782 	case NETDEV_DOWN:
7783 		rcu_read_lock();
7784 		idev = __in_dev_get_rcu(dev);
7785 		if (idev && idev->ifa_list)
7786 			addr_list_empty = false;
7787 
7788 		inet6_dev = __in6_dev_get(dev);
7789 		if (addr_list_empty && inet6_dev &&
7790 		    !list_empty(&inet6_dev->addr_list))
7791 			addr_list_empty = false;
7792 		rcu_read_unlock();
7793 
7794 		/* macvlans do not have a RIF, but rather piggy back on the
7795 		 * RIF of their lower device.
7796 		 */
7797 		if (netif_is_macvlan(dev) && addr_list_empty)
7798 			return true;
7799 
7800 		if (rif && addr_list_empty &&
7801 		    !netif_is_l3_slave(rif->dev))
7802 			return true;
7803 		/* It is possible we already removed the RIF ourselves
7804 		 * if it was assigned to a netdev that is now a bridge
7805 		 * or LAG slave.
7806 		 */
7807 		return false;
7808 	}
7809 
7810 	return false;
7811 }
7812 
7813 static enum mlxsw_sp_rif_type
7814 mlxsw_sp_dev_rif_type(const struct mlxsw_sp *mlxsw_sp,
7815 		      const struct net_device *dev)
7816 {
7817 	enum mlxsw_sp_fid_type type;
7818 
7819 	if (mlxsw_sp_netdev_ipip_type(mlxsw_sp, dev, NULL))
7820 		return MLXSW_SP_RIF_TYPE_IPIP_LB;
7821 
7822 	/* Otherwise RIF type is derived from the type of the underlying FID. */
7823 	if (is_vlan_dev(dev) && netif_is_bridge_master(vlan_dev_real_dev(dev)))
7824 		type = MLXSW_SP_FID_TYPE_8021Q;
7825 	else if (netif_is_bridge_master(dev) && br_vlan_enabled(dev))
7826 		type = MLXSW_SP_FID_TYPE_8021Q;
7827 	else if (netif_is_bridge_master(dev))
7828 		type = MLXSW_SP_FID_TYPE_8021D;
7829 	else
7830 		type = MLXSW_SP_FID_TYPE_RFID;
7831 
7832 	return mlxsw_sp_fid_type_rif_type(mlxsw_sp, type);
7833 }
7834 
7835 static int mlxsw_sp_rif_index_alloc(struct mlxsw_sp *mlxsw_sp, u16 *p_rif_index,
7836 				    u8 rif_entries)
7837 {
7838 	*p_rif_index = gen_pool_alloc(mlxsw_sp->router->rifs_table,
7839 				      rif_entries);
7840 	if (*p_rif_index == 0)
7841 		return -ENOBUFS;
7842 	*p_rif_index -= MLXSW_SP_ROUTER_GENALLOC_OFFSET;
7843 
7844 	/* RIF indexes must be aligned to the allocation size. */
7845 	WARN_ON_ONCE(*p_rif_index % rif_entries);
7846 
7847 	return 0;
7848 }
7849 
7850 static void mlxsw_sp_rif_index_free(struct mlxsw_sp *mlxsw_sp, u16 rif_index,
7851 				    u8 rif_entries)
7852 {
7853 	gen_pool_free(mlxsw_sp->router->rifs_table,
7854 		      MLXSW_SP_ROUTER_GENALLOC_OFFSET + rif_index, rif_entries);
7855 }
7856 
7857 static struct mlxsw_sp_rif *mlxsw_sp_rif_alloc(size_t rif_size, u16 rif_index,
7858 					       u16 vr_id,
7859 					       struct net_device *l3_dev)
7860 {
7861 	struct mlxsw_sp_rif *rif;
7862 
7863 	rif = kzalloc(rif_size, GFP_KERNEL);
7864 	if (!rif)
7865 		return NULL;
7866 
7867 	INIT_LIST_HEAD(&rif->nexthop_list);
7868 	INIT_LIST_HEAD(&rif->neigh_list);
7869 	if (l3_dev) {
7870 		ether_addr_copy(rif->addr, l3_dev->dev_addr);
7871 		rif->mtu = l3_dev->mtu;
7872 		rif->dev = l3_dev;
7873 	}
7874 	rif->vr_id = vr_id;
7875 	rif->rif_index = rif_index;
7876 
7877 	return rif;
7878 }
7879 
7880 struct mlxsw_sp_rif *mlxsw_sp_rif_by_index(const struct mlxsw_sp *mlxsw_sp,
7881 					   u16 rif_index)
7882 {
7883 	return mlxsw_sp->router->rifs[rif_index];
7884 }
7885 
7886 u16 mlxsw_sp_rif_index(const struct mlxsw_sp_rif *rif)
7887 {
7888 	return rif->rif_index;
7889 }
7890 
7891 u16 mlxsw_sp_ipip_lb_rif_index(const struct mlxsw_sp_rif_ipip_lb *lb_rif)
7892 {
7893 	return lb_rif->common.rif_index;
7894 }
7895 
7896 u16 mlxsw_sp_ipip_lb_ul_vr_id(const struct mlxsw_sp_rif_ipip_lb *lb_rif)
7897 {
7898 	u32 ul_tb_id = mlxsw_sp_ipip_dev_ul_tb_id(lb_rif->common.dev);
7899 	struct mlxsw_sp_vr *ul_vr;
7900 
7901 	ul_vr = mlxsw_sp_vr_get(lb_rif->common.mlxsw_sp, ul_tb_id, NULL);
7902 	if (WARN_ON(IS_ERR(ul_vr)))
7903 		return 0;
7904 
7905 	return ul_vr->id;
7906 }
7907 
7908 u16 mlxsw_sp_ipip_lb_ul_rif_id(const struct mlxsw_sp_rif_ipip_lb *lb_rif)
7909 {
7910 	return lb_rif->ul_rif_id;
7911 }
7912 
7913 static bool
7914 mlxsw_sp_router_port_l3_stats_enabled(struct mlxsw_sp_rif *rif)
7915 {
7916 	return mlxsw_sp_rif_counter_valid_get(rif,
7917 					      MLXSW_SP_RIF_COUNTER_EGRESS) &&
7918 	       mlxsw_sp_rif_counter_valid_get(rif,
7919 					      MLXSW_SP_RIF_COUNTER_INGRESS);
7920 }
7921 
7922 static int
7923 mlxsw_sp_router_port_l3_stats_enable(struct mlxsw_sp_rif *rif)
7924 {
7925 	int err;
7926 
7927 	err = mlxsw_sp_rif_counter_alloc(rif, MLXSW_SP_RIF_COUNTER_INGRESS);
7928 	if (err)
7929 		return err;
7930 
7931 	/* Clear stale data. */
7932 	err = mlxsw_sp_rif_counter_fetch_clear(rif,
7933 					       MLXSW_SP_RIF_COUNTER_INGRESS,
7934 					       NULL);
7935 	if (err)
7936 		goto err_clear_ingress;
7937 
7938 	err = mlxsw_sp_rif_counter_alloc(rif, MLXSW_SP_RIF_COUNTER_EGRESS);
7939 	if (err)
7940 		goto err_alloc_egress;
7941 
7942 	/* Clear stale data. */
7943 	err = mlxsw_sp_rif_counter_fetch_clear(rif,
7944 					       MLXSW_SP_RIF_COUNTER_EGRESS,
7945 					       NULL);
7946 	if (err)
7947 		goto err_clear_egress;
7948 
7949 	return 0;
7950 
7951 err_clear_egress:
7952 	mlxsw_sp_rif_counter_free(rif, MLXSW_SP_RIF_COUNTER_EGRESS);
7953 err_alloc_egress:
7954 err_clear_ingress:
7955 	mlxsw_sp_rif_counter_free(rif, MLXSW_SP_RIF_COUNTER_INGRESS);
7956 	return err;
7957 }
7958 
7959 static void
7960 mlxsw_sp_router_port_l3_stats_disable(struct mlxsw_sp_rif *rif)
7961 {
7962 	mlxsw_sp_rif_counter_free(rif, MLXSW_SP_RIF_COUNTER_EGRESS);
7963 	mlxsw_sp_rif_counter_free(rif, MLXSW_SP_RIF_COUNTER_INGRESS);
7964 }
7965 
7966 static void
7967 mlxsw_sp_router_port_l3_stats_report_used(struct mlxsw_sp_rif *rif,
7968 					  struct netdev_notifier_offload_xstats_info *info)
7969 {
7970 	if (!mlxsw_sp_router_port_l3_stats_enabled(rif))
7971 		return;
7972 	netdev_offload_xstats_report_used(info->report_used);
7973 }
7974 
7975 static int
7976 mlxsw_sp_router_port_l3_stats_fetch(struct mlxsw_sp_rif *rif,
7977 				    struct rtnl_hw_stats64 *p_stats)
7978 {
7979 	struct mlxsw_sp_rif_counter_set_basic ingress;
7980 	struct mlxsw_sp_rif_counter_set_basic egress;
7981 	int err;
7982 
7983 	err = mlxsw_sp_rif_counter_fetch_clear(rif,
7984 					       MLXSW_SP_RIF_COUNTER_INGRESS,
7985 					       &ingress);
7986 	if (err)
7987 		return err;
7988 
7989 	err = mlxsw_sp_rif_counter_fetch_clear(rif,
7990 					       MLXSW_SP_RIF_COUNTER_EGRESS,
7991 					       &egress);
7992 	if (err)
7993 		return err;
7994 
7995 #define MLXSW_SP_ROUTER_ALL_GOOD(SET, SFX)		\
7996 		((SET.good_unicast_ ## SFX) +		\
7997 		 (SET.good_multicast_ ## SFX) +		\
7998 		 (SET.good_broadcast_ ## SFX))
7999 
8000 	p_stats->rx_packets = MLXSW_SP_ROUTER_ALL_GOOD(ingress, packets);
8001 	p_stats->tx_packets = MLXSW_SP_ROUTER_ALL_GOOD(egress, packets);
8002 	p_stats->rx_bytes = MLXSW_SP_ROUTER_ALL_GOOD(ingress, bytes);
8003 	p_stats->tx_bytes = MLXSW_SP_ROUTER_ALL_GOOD(egress, bytes);
8004 	p_stats->rx_errors = ingress.error_packets;
8005 	p_stats->tx_errors = egress.error_packets;
8006 	p_stats->rx_dropped = ingress.discard_packets;
8007 	p_stats->tx_dropped = egress.discard_packets;
8008 	p_stats->multicast = ingress.good_multicast_packets +
8009 			     ingress.good_broadcast_packets;
8010 
8011 #undef MLXSW_SP_ROUTER_ALL_GOOD
8012 
8013 	return 0;
8014 }
8015 
8016 static int
8017 mlxsw_sp_router_port_l3_stats_report_delta(struct mlxsw_sp_rif *rif,
8018 					   struct netdev_notifier_offload_xstats_info *info)
8019 {
8020 	struct rtnl_hw_stats64 stats = {};
8021 	int err;
8022 
8023 	if (!mlxsw_sp_router_port_l3_stats_enabled(rif))
8024 		return 0;
8025 
8026 	err = mlxsw_sp_router_port_l3_stats_fetch(rif, &stats);
8027 	if (err)
8028 		return err;
8029 
8030 	netdev_offload_xstats_report_delta(info->report_delta, &stats);
8031 	return 0;
8032 }
8033 
8034 struct mlxsw_sp_router_hwstats_notify_work {
8035 	struct work_struct work;
8036 	struct net_device *dev;
8037 };
8038 
8039 static void mlxsw_sp_router_hwstats_notify_work(struct work_struct *work)
8040 {
8041 	struct mlxsw_sp_router_hwstats_notify_work *hws_work =
8042 		container_of(work, struct mlxsw_sp_router_hwstats_notify_work,
8043 			     work);
8044 
8045 	rtnl_lock();
8046 	rtnl_offload_xstats_notify(hws_work->dev);
8047 	rtnl_unlock();
8048 	dev_put(hws_work->dev);
8049 	kfree(hws_work);
8050 }
8051 
8052 static void
8053 mlxsw_sp_router_hwstats_notify_schedule(struct net_device *dev)
8054 {
8055 	struct mlxsw_sp_router_hwstats_notify_work *hws_work;
8056 
8057 	/* To collect notification payload, the core ends up sending another
8058 	 * notifier block message, which would deadlock on the attempt to
8059 	 * acquire the router lock again. Just postpone the notification until
8060 	 * later.
8061 	 */
8062 
8063 	hws_work = kzalloc(sizeof(*hws_work), GFP_KERNEL);
8064 	if (!hws_work)
8065 		return;
8066 
8067 	INIT_WORK(&hws_work->work, mlxsw_sp_router_hwstats_notify_work);
8068 	dev_hold(dev);
8069 	hws_work->dev = dev;
8070 	mlxsw_core_schedule_work(&hws_work->work);
8071 }
8072 
8073 int mlxsw_sp_rif_dev_ifindex(const struct mlxsw_sp_rif *rif)
8074 {
8075 	return rif->dev->ifindex;
8076 }
8077 
8078 static const struct net_device *mlxsw_sp_rif_dev(const struct mlxsw_sp_rif *rif)
8079 {
8080 	return rif->dev;
8081 }
8082 
8083 bool mlxsw_sp_rif_has_dev(const struct mlxsw_sp_rif *rif)
8084 {
8085 	return !!mlxsw_sp_rif_dev(rif);
8086 }
8087 
8088 bool mlxsw_sp_rif_dev_is(const struct mlxsw_sp_rif *rif,
8089 			 const struct net_device *dev)
8090 {
8091 	return mlxsw_sp_rif_dev(rif) == dev;
8092 }
8093 
8094 static void mlxsw_sp_rif_push_l3_stats(struct mlxsw_sp_rif *rif)
8095 {
8096 	struct rtnl_hw_stats64 stats = {};
8097 
8098 	if (!mlxsw_sp_router_port_l3_stats_fetch(rif, &stats))
8099 		netdev_offload_xstats_push_delta(rif->dev,
8100 						 NETDEV_OFFLOAD_XSTATS_TYPE_L3,
8101 						 &stats);
8102 }
8103 
8104 static struct mlxsw_sp_rif *
8105 mlxsw_sp_rif_create(struct mlxsw_sp *mlxsw_sp,
8106 		    const struct mlxsw_sp_rif_params *params,
8107 		    struct netlink_ext_ack *extack)
8108 {
8109 	u8 rif_entries = params->double_entry ? 2 : 1;
8110 	u32 tb_id = l3mdev_fib_table(params->dev);
8111 	const struct mlxsw_sp_rif_ops *ops;
8112 	struct mlxsw_sp_fid *fid = NULL;
8113 	enum mlxsw_sp_rif_type type;
8114 	struct mlxsw_sp_rif *rif;
8115 	struct mlxsw_sp_vr *vr;
8116 	u16 rif_index;
8117 	int i, err;
8118 
8119 	type = mlxsw_sp_dev_rif_type(mlxsw_sp, params->dev);
8120 	ops = mlxsw_sp->router->rif_ops_arr[type];
8121 
8122 	vr = mlxsw_sp_vr_get(mlxsw_sp, tb_id ? : RT_TABLE_MAIN, extack);
8123 	if (IS_ERR(vr))
8124 		return ERR_CAST(vr);
8125 	vr->rif_count++;
8126 
8127 	err = mlxsw_sp_rif_index_alloc(mlxsw_sp, &rif_index, rif_entries);
8128 	if (err) {
8129 		NL_SET_ERR_MSG_MOD(extack, "Exceeded number of supported router interfaces");
8130 		goto err_rif_index_alloc;
8131 	}
8132 
8133 	rif = mlxsw_sp_rif_alloc(ops->rif_size, rif_index, vr->id, params->dev);
8134 	if (!rif) {
8135 		err = -ENOMEM;
8136 		goto err_rif_alloc;
8137 	}
8138 	dev_hold(rif->dev);
8139 	mlxsw_sp->router->rifs[rif_index] = rif;
8140 	rif->mlxsw_sp = mlxsw_sp;
8141 	rif->ops = ops;
8142 	rif->rif_entries = rif_entries;
8143 
8144 	if (ops->fid_get) {
8145 		fid = ops->fid_get(rif, extack);
8146 		if (IS_ERR(fid)) {
8147 			err = PTR_ERR(fid);
8148 			goto err_fid_get;
8149 		}
8150 		rif->fid = fid;
8151 	}
8152 
8153 	if (ops->setup)
8154 		ops->setup(rif, params);
8155 
8156 	err = ops->configure(rif, extack);
8157 	if (err)
8158 		goto err_configure;
8159 
8160 	for (i = 0; i < MLXSW_SP_L3_PROTO_MAX; i++) {
8161 		err = mlxsw_sp_mr_rif_add(vr->mr_table[i], rif);
8162 		if (err)
8163 			goto err_mr_rif_add;
8164 	}
8165 
8166 	if (netdev_offload_xstats_enabled(rif->dev,
8167 					  NETDEV_OFFLOAD_XSTATS_TYPE_L3)) {
8168 		err = mlxsw_sp_router_port_l3_stats_enable(rif);
8169 		if (err)
8170 			goto err_stats_enable;
8171 		mlxsw_sp_router_hwstats_notify_schedule(rif->dev);
8172 	} else {
8173 		mlxsw_sp_rif_counters_alloc(rif);
8174 	}
8175 
8176 	atomic_add(rif_entries, &mlxsw_sp->router->rifs_count);
8177 	return rif;
8178 
8179 err_stats_enable:
8180 err_mr_rif_add:
8181 	for (i--; i >= 0; i--)
8182 		mlxsw_sp_mr_rif_del(vr->mr_table[i], rif);
8183 	ops->deconfigure(rif);
8184 err_configure:
8185 	if (fid)
8186 		mlxsw_sp_fid_put(fid);
8187 err_fid_get:
8188 	mlxsw_sp->router->rifs[rif_index] = NULL;
8189 	dev_put(rif->dev);
8190 	kfree(rif);
8191 err_rif_alloc:
8192 	mlxsw_sp_rif_index_free(mlxsw_sp, rif_index, rif_entries);
8193 err_rif_index_alloc:
8194 	vr->rif_count--;
8195 	mlxsw_sp_vr_put(mlxsw_sp, vr);
8196 	return ERR_PTR(err);
8197 }
8198 
8199 static void mlxsw_sp_rif_destroy(struct mlxsw_sp_rif *rif)
8200 {
8201 	const struct mlxsw_sp_rif_ops *ops = rif->ops;
8202 	struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
8203 	struct mlxsw_sp_fid *fid = rif->fid;
8204 	u8 rif_entries = rif->rif_entries;
8205 	u16 rif_index = rif->rif_index;
8206 	struct mlxsw_sp_vr *vr;
8207 	int i;
8208 
8209 	atomic_sub(rif_entries, &mlxsw_sp->router->rifs_count);
8210 	mlxsw_sp_router_rif_gone_sync(mlxsw_sp, rif);
8211 	vr = &mlxsw_sp->router->vrs[rif->vr_id];
8212 
8213 	if (netdev_offload_xstats_enabled(rif->dev,
8214 					  NETDEV_OFFLOAD_XSTATS_TYPE_L3)) {
8215 		mlxsw_sp_rif_push_l3_stats(rif);
8216 		mlxsw_sp_router_port_l3_stats_disable(rif);
8217 		mlxsw_sp_router_hwstats_notify_schedule(rif->dev);
8218 	} else {
8219 		mlxsw_sp_rif_counters_free(rif);
8220 	}
8221 
8222 	for (i = 0; i < MLXSW_SP_L3_PROTO_MAX; i++)
8223 		mlxsw_sp_mr_rif_del(vr->mr_table[i], rif);
8224 	ops->deconfigure(rif);
8225 	if (fid)
8226 		/* Loopback RIFs are not associated with a FID. */
8227 		mlxsw_sp_fid_put(fid);
8228 	mlxsw_sp->router->rifs[rif->rif_index] = NULL;
8229 	dev_put(rif->dev);
8230 	kfree(rif);
8231 	mlxsw_sp_rif_index_free(mlxsw_sp, rif_index, rif_entries);
8232 	vr->rif_count--;
8233 	mlxsw_sp_vr_put(mlxsw_sp, vr);
8234 }
8235 
8236 void mlxsw_sp_rif_destroy_by_dev(struct mlxsw_sp *mlxsw_sp,
8237 				 struct net_device *dev)
8238 {
8239 	struct mlxsw_sp_rif *rif;
8240 
8241 	mutex_lock(&mlxsw_sp->router->lock);
8242 	rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev);
8243 	if (!rif)
8244 		goto out;
8245 	mlxsw_sp_rif_destroy(rif);
8246 out:
8247 	mutex_unlock(&mlxsw_sp->router->lock);
8248 }
8249 
8250 static void
8251 mlxsw_sp_rif_subport_params_init(struct mlxsw_sp_rif_params *params,
8252 				 struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan)
8253 {
8254 	struct mlxsw_sp_port *mlxsw_sp_port = mlxsw_sp_port_vlan->mlxsw_sp_port;
8255 
8256 	params->vid = mlxsw_sp_port_vlan->vid;
8257 	params->lag = mlxsw_sp_port->lagged;
8258 	if (params->lag)
8259 		params->lag_id = mlxsw_sp_port->lag_id;
8260 	else
8261 		params->system_port = mlxsw_sp_port->local_port;
8262 }
8263 
8264 static struct mlxsw_sp_rif_subport *
8265 mlxsw_sp_rif_subport_rif(const struct mlxsw_sp_rif *rif)
8266 {
8267 	return container_of(rif, struct mlxsw_sp_rif_subport, common);
8268 }
8269 
8270 static struct mlxsw_sp_rif *
8271 mlxsw_sp_rif_subport_get(struct mlxsw_sp *mlxsw_sp,
8272 			 const struct mlxsw_sp_rif_params *params,
8273 			 struct netlink_ext_ack *extack)
8274 {
8275 	struct mlxsw_sp_rif_subport *rif_subport;
8276 	struct mlxsw_sp_rif *rif;
8277 
8278 	rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, params->dev);
8279 	if (!rif)
8280 		return mlxsw_sp_rif_create(mlxsw_sp, params, extack);
8281 
8282 	rif_subport = mlxsw_sp_rif_subport_rif(rif);
8283 	refcount_inc(&rif_subport->ref_count);
8284 	return rif;
8285 }
8286 
8287 static void mlxsw_sp_rif_subport_put(struct mlxsw_sp_rif *rif)
8288 {
8289 	struct mlxsw_sp_rif_subport *rif_subport;
8290 
8291 	rif_subport = mlxsw_sp_rif_subport_rif(rif);
8292 	if (!refcount_dec_and_test(&rif_subport->ref_count))
8293 		return;
8294 
8295 	mlxsw_sp_rif_destroy(rif);
8296 }
8297 
8298 static int mlxsw_sp_rif_mac_profile_index_alloc(struct mlxsw_sp *mlxsw_sp,
8299 						struct mlxsw_sp_rif_mac_profile *profile,
8300 						struct netlink_ext_ack *extack)
8301 {
8302 	u8 max_rif_mac_profiles = mlxsw_sp->router->max_rif_mac_profile;
8303 	struct mlxsw_sp_router *router = mlxsw_sp->router;
8304 	int id;
8305 
8306 	id = idr_alloc(&router->rif_mac_profiles_idr, profile, 0,
8307 		       max_rif_mac_profiles, GFP_KERNEL);
8308 
8309 	if (id >= 0) {
8310 		profile->id = id;
8311 		return 0;
8312 	}
8313 
8314 	if (id == -ENOSPC)
8315 		NL_SET_ERR_MSG_MOD(extack,
8316 				   "Exceeded number of supported router interface MAC profiles");
8317 
8318 	return id;
8319 }
8320 
8321 static struct mlxsw_sp_rif_mac_profile *
8322 mlxsw_sp_rif_mac_profile_index_free(struct mlxsw_sp *mlxsw_sp, u8 mac_profile)
8323 {
8324 	struct mlxsw_sp_rif_mac_profile *profile;
8325 
8326 	profile = idr_remove(&mlxsw_sp->router->rif_mac_profiles_idr,
8327 			     mac_profile);
8328 	WARN_ON(!profile);
8329 	return profile;
8330 }
8331 
8332 static struct mlxsw_sp_rif_mac_profile *
8333 mlxsw_sp_rif_mac_profile_alloc(const char *mac)
8334 {
8335 	struct mlxsw_sp_rif_mac_profile *profile;
8336 
8337 	profile = kzalloc(sizeof(*profile), GFP_KERNEL);
8338 	if (!profile)
8339 		return NULL;
8340 
8341 	ether_addr_copy(profile->mac_prefix, mac);
8342 	refcount_set(&profile->ref_count, 1);
8343 	return profile;
8344 }
8345 
8346 static struct mlxsw_sp_rif_mac_profile *
8347 mlxsw_sp_rif_mac_profile_find(const struct mlxsw_sp *mlxsw_sp, const char *mac)
8348 {
8349 	struct mlxsw_sp_router *router = mlxsw_sp->router;
8350 	struct mlxsw_sp_rif_mac_profile *profile;
8351 	int id;
8352 
8353 	idr_for_each_entry(&router->rif_mac_profiles_idr, profile, id) {
8354 		if (ether_addr_equal_masked(profile->mac_prefix, mac,
8355 					    mlxsw_sp->mac_mask))
8356 			return profile;
8357 	}
8358 
8359 	return NULL;
8360 }
8361 
8362 static u64 mlxsw_sp_rif_mac_profiles_occ_get(void *priv)
8363 {
8364 	const struct mlxsw_sp *mlxsw_sp = priv;
8365 
8366 	return atomic_read(&mlxsw_sp->router->rif_mac_profiles_count);
8367 }
8368 
8369 static u64 mlxsw_sp_rifs_occ_get(void *priv)
8370 {
8371 	const struct mlxsw_sp *mlxsw_sp = priv;
8372 
8373 	return atomic_read(&mlxsw_sp->router->rifs_count);
8374 }
8375 
8376 static struct mlxsw_sp_rif_mac_profile *
8377 mlxsw_sp_rif_mac_profile_create(struct mlxsw_sp *mlxsw_sp, const char *mac,
8378 				struct netlink_ext_ack *extack)
8379 {
8380 	struct mlxsw_sp_rif_mac_profile *profile;
8381 	int err;
8382 
8383 	profile = mlxsw_sp_rif_mac_profile_alloc(mac);
8384 	if (!profile)
8385 		return ERR_PTR(-ENOMEM);
8386 
8387 	err = mlxsw_sp_rif_mac_profile_index_alloc(mlxsw_sp, profile, extack);
8388 	if (err)
8389 		goto profile_index_alloc_err;
8390 
8391 	atomic_inc(&mlxsw_sp->router->rif_mac_profiles_count);
8392 	return profile;
8393 
8394 profile_index_alloc_err:
8395 	kfree(profile);
8396 	return ERR_PTR(err);
8397 }
8398 
8399 static void mlxsw_sp_rif_mac_profile_destroy(struct mlxsw_sp *mlxsw_sp,
8400 					     u8 mac_profile)
8401 {
8402 	struct mlxsw_sp_rif_mac_profile *profile;
8403 
8404 	atomic_dec(&mlxsw_sp->router->rif_mac_profiles_count);
8405 	profile = mlxsw_sp_rif_mac_profile_index_free(mlxsw_sp, mac_profile);
8406 	kfree(profile);
8407 }
8408 
8409 static int mlxsw_sp_rif_mac_profile_get(struct mlxsw_sp *mlxsw_sp,
8410 					const char *mac, u8 *p_mac_profile,
8411 					struct netlink_ext_ack *extack)
8412 {
8413 	struct mlxsw_sp_rif_mac_profile *profile;
8414 
8415 	profile = mlxsw_sp_rif_mac_profile_find(mlxsw_sp, mac);
8416 	if (profile) {
8417 		refcount_inc(&profile->ref_count);
8418 		goto out;
8419 	}
8420 
8421 	profile = mlxsw_sp_rif_mac_profile_create(mlxsw_sp, mac, extack);
8422 	if (IS_ERR(profile))
8423 		return PTR_ERR(profile);
8424 
8425 out:
8426 	*p_mac_profile = profile->id;
8427 	return 0;
8428 }
8429 
8430 static void mlxsw_sp_rif_mac_profile_put(struct mlxsw_sp *mlxsw_sp,
8431 					 u8 mac_profile)
8432 {
8433 	struct mlxsw_sp_rif_mac_profile *profile;
8434 
8435 	profile = idr_find(&mlxsw_sp->router->rif_mac_profiles_idr,
8436 			   mac_profile);
8437 	if (WARN_ON(!profile))
8438 		return;
8439 
8440 	if (!refcount_dec_and_test(&profile->ref_count))
8441 		return;
8442 
8443 	mlxsw_sp_rif_mac_profile_destroy(mlxsw_sp, mac_profile);
8444 }
8445 
8446 static bool mlxsw_sp_rif_mac_profile_is_shared(const struct mlxsw_sp_rif *rif)
8447 {
8448 	struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
8449 	struct mlxsw_sp_rif_mac_profile *profile;
8450 
8451 	profile = idr_find(&mlxsw_sp->router->rif_mac_profiles_idr,
8452 			   rif->mac_profile_id);
8453 	if (WARN_ON(!profile))
8454 		return false;
8455 
8456 	return refcount_read(&profile->ref_count) > 1;
8457 }
8458 
8459 static int mlxsw_sp_rif_mac_profile_edit(struct mlxsw_sp_rif *rif,
8460 					 const char *new_mac)
8461 {
8462 	struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
8463 	struct mlxsw_sp_rif_mac_profile *profile;
8464 
8465 	profile = idr_find(&mlxsw_sp->router->rif_mac_profiles_idr,
8466 			   rif->mac_profile_id);
8467 	if (WARN_ON(!profile))
8468 		return -EINVAL;
8469 
8470 	ether_addr_copy(profile->mac_prefix, new_mac);
8471 	return 0;
8472 }
8473 
8474 static int
8475 mlxsw_sp_rif_mac_profile_replace(struct mlxsw_sp *mlxsw_sp,
8476 				 struct mlxsw_sp_rif *rif,
8477 				 const char *new_mac,
8478 				 struct netlink_ext_ack *extack)
8479 {
8480 	u8 mac_profile;
8481 	int err;
8482 
8483 	if (!mlxsw_sp_rif_mac_profile_is_shared(rif) &&
8484 	    !mlxsw_sp_rif_mac_profile_find(mlxsw_sp, new_mac))
8485 		return mlxsw_sp_rif_mac_profile_edit(rif, new_mac);
8486 
8487 	err = mlxsw_sp_rif_mac_profile_get(mlxsw_sp, new_mac,
8488 					   &mac_profile, extack);
8489 	if (err)
8490 		return err;
8491 
8492 	mlxsw_sp_rif_mac_profile_put(mlxsw_sp, rif->mac_profile_id);
8493 	rif->mac_profile_id = mac_profile;
8494 	return 0;
8495 }
8496 
8497 static int
8498 __mlxsw_sp_port_vlan_router_join(struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan,
8499 				 struct net_device *l3_dev,
8500 				 struct netlink_ext_ack *extack)
8501 {
8502 	struct mlxsw_sp_port *mlxsw_sp_port = mlxsw_sp_port_vlan->mlxsw_sp_port;
8503 	struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
8504 	struct mlxsw_sp_rif_params params = {
8505 		.dev = l3_dev,
8506 	};
8507 	u16 vid = mlxsw_sp_port_vlan->vid;
8508 	struct mlxsw_sp_rif *rif;
8509 	struct mlxsw_sp_fid *fid;
8510 	int err;
8511 
8512 	mlxsw_sp_rif_subport_params_init(&params, mlxsw_sp_port_vlan);
8513 	rif = mlxsw_sp_rif_subport_get(mlxsw_sp, &params, extack);
8514 	if (IS_ERR(rif))
8515 		return PTR_ERR(rif);
8516 
8517 	/* FID was already created, just take a reference */
8518 	fid = rif->ops->fid_get(rif, extack);
8519 	err = mlxsw_sp_fid_port_vid_map(fid, mlxsw_sp_port, vid);
8520 	if (err)
8521 		goto err_fid_port_vid_map;
8522 
8523 	err = mlxsw_sp_port_vid_learning_set(mlxsw_sp_port, vid, false);
8524 	if (err)
8525 		goto err_port_vid_learning_set;
8526 
8527 	err = mlxsw_sp_port_vid_stp_set(mlxsw_sp_port, vid,
8528 					BR_STATE_FORWARDING);
8529 	if (err)
8530 		goto err_port_vid_stp_set;
8531 
8532 	mlxsw_sp_port_vlan->fid = fid;
8533 
8534 	return 0;
8535 
8536 err_port_vid_stp_set:
8537 	mlxsw_sp_port_vid_learning_set(mlxsw_sp_port, vid, true);
8538 err_port_vid_learning_set:
8539 	mlxsw_sp_fid_port_vid_unmap(fid, mlxsw_sp_port, vid);
8540 err_fid_port_vid_map:
8541 	mlxsw_sp_fid_put(fid);
8542 	mlxsw_sp_rif_subport_put(rif);
8543 	return err;
8544 }
8545 
8546 static void
8547 __mlxsw_sp_port_vlan_router_leave(struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan)
8548 {
8549 	struct mlxsw_sp_port *mlxsw_sp_port = mlxsw_sp_port_vlan->mlxsw_sp_port;
8550 	struct mlxsw_sp_fid *fid = mlxsw_sp_port_vlan->fid;
8551 	struct mlxsw_sp_rif *rif = mlxsw_sp_fid_rif(fid);
8552 	u16 vid = mlxsw_sp_port_vlan->vid;
8553 
8554 	if (WARN_ON(mlxsw_sp_fid_type(fid) != MLXSW_SP_FID_TYPE_RFID))
8555 		return;
8556 
8557 	mlxsw_sp_port_vlan->fid = NULL;
8558 	mlxsw_sp_port_vid_stp_set(mlxsw_sp_port, vid, BR_STATE_BLOCKING);
8559 	mlxsw_sp_port_vid_learning_set(mlxsw_sp_port, vid, true);
8560 	mlxsw_sp_fid_port_vid_unmap(fid, mlxsw_sp_port, vid);
8561 	mlxsw_sp_fid_put(fid);
8562 	mlxsw_sp_rif_subport_put(rif);
8563 }
8564 
8565 int
8566 mlxsw_sp_port_vlan_router_join(struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan,
8567 			       struct net_device *l3_dev,
8568 			       struct netlink_ext_ack *extack)
8569 {
8570 	struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port_vlan->mlxsw_sp_port->mlxsw_sp;
8571 	struct mlxsw_sp_rif *rif;
8572 	int err = 0;
8573 
8574 	mutex_lock(&mlxsw_sp->router->lock);
8575 	rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, l3_dev);
8576 	if (!rif)
8577 		goto out;
8578 
8579 	err = __mlxsw_sp_port_vlan_router_join(mlxsw_sp_port_vlan, l3_dev,
8580 					       extack);
8581 out:
8582 	mutex_unlock(&mlxsw_sp->router->lock);
8583 	return err;
8584 }
8585 
8586 void
8587 mlxsw_sp_port_vlan_router_leave(struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan)
8588 {
8589 	struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port_vlan->mlxsw_sp_port->mlxsw_sp;
8590 
8591 	mutex_lock(&mlxsw_sp->router->lock);
8592 	__mlxsw_sp_port_vlan_router_leave(mlxsw_sp_port_vlan);
8593 	mutex_unlock(&mlxsw_sp->router->lock);
8594 }
8595 
8596 static int mlxsw_sp_inetaddr_port_vlan_event(struct net_device *l3_dev,
8597 					     struct net_device *port_dev,
8598 					     unsigned long event, u16 vid,
8599 					     struct netlink_ext_ack *extack)
8600 {
8601 	struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(port_dev);
8602 	struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan;
8603 
8604 	mlxsw_sp_port_vlan = mlxsw_sp_port_vlan_find_by_vid(mlxsw_sp_port, vid);
8605 	if (WARN_ON(!mlxsw_sp_port_vlan))
8606 		return -EINVAL;
8607 
8608 	switch (event) {
8609 	case NETDEV_UP:
8610 		return __mlxsw_sp_port_vlan_router_join(mlxsw_sp_port_vlan,
8611 							l3_dev, extack);
8612 	case NETDEV_DOWN:
8613 		__mlxsw_sp_port_vlan_router_leave(mlxsw_sp_port_vlan);
8614 		break;
8615 	}
8616 
8617 	return 0;
8618 }
8619 
8620 static int mlxsw_sp_inetaddr_port_event(struct net_device *port_dev,
8621 					unsigned long event,
8622 					struct netlink_ext_ack *extack)
8623 {
8624 	if (netif_is_any_bridge_port(port_dev) || netif_is_lag_port(port_dev))
8625 		return 0;
8626 
8627 	return mlxsw_sp_inetaddr_port_vlan_event(port_dev, port_dev, event,
8628 						 MLXSW_SP_DEFAULT_VID, extack);
8629 }
8630 
8631 static int __mlxsw_sp_inetaddr_lag_event(struct net_device *l3_dev,
8632 					 struct net_device *lag_dev,
8633 					 unsigned long event, u16 vid,
8634 					 struct netlink_ext_ack *extack)
8635 {
8636 	struct net_device *port_dev;
8637 	struct list_head *iter;
8638 	int err;
8639 
8640 	netdev_for_each_lower_dev(lag_dev, port_dev, iter) {
8641 		if (mlxsw_sp_port_dev_check(port_dev)) {
8642 			err = mlxsw_sp_inetaddr_port_vlan_event(l3_dev,
8643 								port_dev,
8644 								event, vid,
8645 								extack);
8646 			if (err)
8647 				return err;
8648 		}
8649 	}
8650 
8651 	return 0;
8652 }
8653 
8654 static int mlxsw_sp_inetaddr_lag_event(struct net_device *lag_dev,
8655 				       unsigned long event,
8656 				       struct netlink_ext_ack *extack)
8657 {
8658 	if (netif_is_bridge_port(lag_dev))
8659 		return 0;
8660 
8661 	return __mlxsw_sp_inetaddr_lag_event(lag_dev, lag_dev, event,
8662 					     MLXSW_SP_DEFAULT_VID, extack);
8663 }
8664 
8665 static int mlxsw_sp_inetaddr_bridge_event(struct mlxsw_sp *mlxsw_sp,
8666 					  struct net_device *l3_dev,
8667 					  unsigned long event,
8668 					  struct netlink_ext_ack *extack)
8669 {
8670 	struct mlxsw_sp_rif_params params = {
8671 		.dev = l3_dev,
8672 	};
8673 	struct mlxsw_sp_rif *rif;
8674 
8675 	switch (event) {
8676 	case NETDEV_UP:
8677 		if (netif_is_bridge_master(l3_dev) && br_vlan_enabled(l3_dev)) {
8678 			u16 proto;
8679 
8680 			br_vlan_get_proto(l3_dev, &proto);
8681 			if (proto == ETH_P_8021AD) {
8682 				NL_SET_ERR_MSG_MOD(extack, "Adding an IP address to 802.1ad bridge is not supported");
8683 				return -EOPNOTSUPP;
8684 			}
8685 		}
8686 		rif = mlxsw_sp_rif_create(mlxsw_sp, &params, extack);
8687 		if (IS_ERR(rif))
8688 			return PTR_ERR(rif);
8689 		break;
8690 	case NETDEV_DOWN:
8691 		rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, l3_dev);
8692 		mlxsw_sp_rif_destroy(rif);
8693 		break;
8694 	}
8695 
8696 	return 0;
8697 }
8698 
8699 static int mlxsw_sp_inetaddr_vlan_event(struct mlxsw_sp *mlxsw_sp,
8700 					struct net_device *vlan_dev,
8701 					unsigned long event,
8702 					struct netlink_ext_ack *extack)
8703 {
8704 	struct net_device *real_dev = vlan_dev_real_dev(vlan_dev);
8705 	u16 vid = vlan_dev_vlan_id(vlan_dev);
8706 
8707 	if (netif_is_bridge_port(vlan_dev))
8708 		return 0;
8709 
8710 	if (mlxsw_sp_port_dev_check(real_dev))
8711 		return mlxsw_sp_inetaddr_port_vlan_event(vlan_dev, real_dev,
8712 							 event, vid, extack);
8713 	else if (netif_is_lag_master(real_dev))
8714 		return __mlxsw_sp_inetaddr_lag_event(vlan_dev, real_dev, event,
8715 						     vid, extack);
8716 	else if (netif_is_bridge_master(real_dev) && br_vlan_enabled(real_dev))
8717 		return mlxsw_sp_inetaddr_bridge_event(mlxsw_sp, vlan_dev, event,
8718 						      extack);
8719 
8720 	return 0;
8721 }
8722 
8723 static bool mlxsw_sp_rif_macvlan_is_vrrp4(const u8 *mac)
8724 {
8725 	u8 vrrp4[ETH_ALEN] = { 0x00, 0x00, 0x5e, 0x00, 0x01, 0x00 };
8726 	u8 mask[ETH_ALEN] = { 0xff, 0xff, 0xff, 0xff, 0xff, 0x00 };
8727 
8728 	return ether_addr_equal_masked(mac, vrrp4, mask);
8729 }
8730 
8731 static bool mlxsw_sp_rif_macvlan_is_vrrp6(const u8 *mac)
8732 {
8733 	u8 vrrp6[ETH_ALEN] = { 0x00, 0x00, 0x5e, 0x00, 0x02, 0x00 };
8734 	u8 mask[ETH_ALEN] = { 0xff, 0xff, 0xff, 0xff, 0xff, 0x00 };
8735 
8736 	return ether_addr_equal_masked(mac, vrrp6, mask);
8737 }
8738 
8739 static int mlxsw_sp_rif_vrrp_op(struct mlxsw_sp *mlxsw_sp, u16 rif_index,
8740 				const u8 *mac, bool adding)
8741 {
8742 	char ritr_pl[MLXSW_REG_RITR_LEN];
8743 	u8 vrrp_id = adding ? mac[5] : 0;
8744 	int err;
8745 
8746 	if (!mlxsw_sp_rif_macvlan_is_vrrp4(mac) &&
8747 	    !mlxsw_sp_rif_macvlan_is_vrrp6(mac))
8748 		return 0;
8749 
8750 	mlxsw_reg_ritr_rif_pack(ritr_pl, rif_index);
8751 	err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
8752 	if (err)
8753 		return err;
8754 
8755 	if (mlxsw_sp_rif_macvlan_is_vrrp4(mac))
8756 		mlxsw_reg_ritr_if_vrrp_id_ipv4_set(ritr_pl, vrrp_id);
8757 	else
8758 		mlxsw_reg_ritr_if_vrrp_id_ipv6_set(ritr_pl, vrrp_id);
8759 
8760 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
8761 }
8762 
8763 static int mlxsw_sp_rif_macvlan_add(struct mlxsw_sp *mlxsw_sp,
8764 				    const struct net_device *macvlan_dev,
8765 				    struct netlink_ext_ack *extack)
8766 {
8767 	struct macvlan_dev *vlan = netdev_priv(macvlan_dev);
8768 	struct mlxsw_sp_rif *rif;
8769 	int err;
8770 
8771 	rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, vlan->lowerdev);
8772 	if (!rif) {
8773 		NL_SET_ERR_MSG_MOD(extack, "macvlan is only supported on top of router interfaces");
8774 		return -EOPNOTSUPP;
8775 	}
8776 
8777 	err = mlxsw_sp_rif_fdb_op(mlxsw_sp, macvlan_dev->dev_addr,
8778 				  mlxsw_sp_fid_index(rif->fid), true);
8779 	if (err)
8780 		return err;
8781 
8782 	err = mlxsw_sp_rif_vrrp_op(mlxsw_sp, rif->rif_index,
8783 				   macvlan_dev->dev_addr, true);
8784 	if (err)
8785 		goto err_rif_vrrp_add;
8786 
8787 	/* Make sure the bridge driver does not have this MAC pointing at
8788 	 * some other port.
8789 	 */
8790 	if (rif->ops->fdb_del)
8791 		rif->ops->fdb_del(rif, macvlan_dev->dev_addr);
8792 
8793 	return 0;
8794 
8795 err_rif_vrrp_add:
8796 	mlxsw_sp_rif_fdb_op(mlxsw_sp, macvlan_dev->dev_addr,
8797 			    mlxsw_sp_fid_index(rif->fid), false);
8798 	return err;
8799 }
8800 
8801 static void __mlxsw_sp_rif_macvlan_del(struct mlxsw_sp *mlxsw_sp,
8802 				       const struct net_device *macvlan_dev)
8803 {
8804 	struct macvlan_dev *vlan = netdev_priv(macvlan_dev);
8805 	struct mlxsw_sp_rif *rif;
8806 
8807 	rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, vlan->lowerdev);
8808 	/* If we do not have a RIF, then we already took care of
8809 	 * removing the macvlan's MAC during RIF deletion.
8810 	 */
8811 	if (!rif)
8812 		return;
8813 	mlxsw_sp_rif_vrrp_op(mlxsw_sp, rif->rif_index, macvlan_dev->dev_addr,
8814 			     false);
8815 	mlxsw_sp_rif_fdb_op(mlxsw_sp, macvlan_dev->dev_addr,
8816 			    mlxsw_sp_fid_index(rif->fid), false);
8817 }
8818 
8819 void mlxsw_sp_rif_macvlan_del(struct mlxsw_sp *mlxsw_sp,
8820 			      const struct net_device *macvlan_dev)
8821 {
8822 	mutex_lock(&mlxsw_sp->router->lock);
8823 	__mlxsw_sp_rif_macvlan_del(mlxsw_sp, macvlan_dev);
8824 	mutex_unlock(&mlxsw_sp->router->lock);
8825 }
8826 
8827 static int mlxsw_sp_inetaddr_macvlan_event(struct mlxsw_sp *mlxsw_sp,
8828 					   struct net_device *macvlan_dev,
8829 					   unsigned long event,
8830 					   struct netlink_ext_ack *extack)
8831 {
8832 	switch (event) {
8833 	case NETDEV_UP:
8834 		return mlxsw_sp_rif_macvlan_add(mlxsw_sp, macvlan_dev, extack);
8835 	case NETDEV_DOWN:
8836 		__mlxsw_sp_rif_macvlan_del(mlxsw_sp, macvlan_dev);
8837 		break;
8838 	}
8839 
8840 	return 0;
8841 }
8842 
8843 static int __mlxsw_sp_inetaddr_event(struct mlxsw_sp *mlxsw_sp,
8844 				     struct net_device *dev,
8845 				     unsigned long event,
8846 				     struct netlink_ext_ack *extack)
8847 {
8848 	if (mlxsw_sp_port_dev_check(dev))
8849 		return mlxsw_sp_inetaddr_port_event(dev, event, extack);
8850 	else if (netif_is_lag_master(dev))
8851 		return mlxsw_sp_inetaddr_lag_event(dev, event, extack);
8852 	else if (netif_is_bridge_master(dev))
8853 		return mlxsw_sp_inetaddr_bridge_event(mlxsw_sp, dev, event,
8854 						      extack);
8855 	else if (is_vlan_dev(dev))
8856 		return mlxsw_sp_inetaddr_vlan_event(mlxsw_sp, dev, event,
8857 						    extack);
8858 	else if (netif_is_macvlan(dev))
8859 		return mlxsw_sp_inetaddr_macvlan_event(mlxsw_sp, dev, event,
8860 						       extack);
8861 	else
8862 		return 0;
8863 }
8864 
8865 static int mlxsw_sp_inetaddr_event(struct notifier_block *nb,
8866 				   unsigned long event, void *ptr)
8867 {
8868 	struct in_ifaddr *ifa = (struct in_ifaddr *) ptr;
8869 	struct net_device *dev = ifa->ifa_dev->dev;
8870 	struct mlxsw_sp_router *router;
8871 	struct mlxsw_sp_rif *rif;
8872 	int err = 0;
8873 
8874 	/* NETDEV_UP event is handled by mlxsw_sp_inetaddr_valid_event */
8875 	if (event == NETDEV_UP)
8876 		return NOTIFY_DONE;
8877 
8878 	router = container_of(nb, struct mlxsw_sp_router, inetaddr_nb);
8879 	mutex_lock(&router->lock);
8880 	rif = mlxsw_sp_rif_find_by_dev(router->mlxsw_sp, dev);
8881 	if (!mlxsw_sp_rif_should_config(rif, dev, event))
8882 		goto out;
8883 
8884 	err = __mlxsw_sp_inetaddr_event(router->mlxsw_sp, dev, event, NULL);
8885 out:
8886 	mutex_unlock(&router->lock);
8887 	return notifier_from_errno(err);
8888 }
8889 
8890 static int mlxsw_sp_inetaddr_valid_event(struct notifier_block *unused,
8891 					 unsigned long event, void *ptr)
8892 {
8893 	struct in_validator_info *ivi = (struct in_validator_info *) ptr;
8894 	struct net_device *dev = ivi->ivi_dev->dev;
8895 	struct mlxsw_sp *mlxsw_sp;
8896 	struct mlxsw_sp_rif *rif;
8897 	int err = 0;
8898 
8899 	mlxsw_sp = mlxsw_sp_lower_get(dev);
8900 	if (!mlxsw_sp)
8901 		return NOTIFY_DONE;
8902 
8903 	mutex_lock(&mlxsw_sp->router->lock);
8904 	rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev);
8905 	if (!mlxsw_sp_rif_should_config(rif, dev, event))
8906 		goto out;
8907 
8908 	err = __mlxsw_sp_inetaddr_event(mlxsw_sp, dev, event, ivi->extack);
8909 out:
8910 	mutex_unlock(&mlxsw_sp->router->lock);
8911 	return notifier_from_errno(err);
8912 }
8913 
8914 struct mlxsw_sp_inet6addr_event_work {
8915 	struct work_struct work;
8916 	struct mlxsw_sp *mlxsw_sp;
8917 	struct net_device *dev;
8918 	unsigned long event;
8919 };
8920 
8921 static void mlxsw_sp_inet6addr_event_work(struct work_struct *work)
8922 {
8923 	struct mlxsw_sp_inet6addr_event_work *inet6addr_work =
8924 		container_of(work, struct mlxsw_sp_inet6addr_event_work, work);
8925 	struct mlxsw_sp *mlxsw_sp = inet6addr_work->mlxsw_sp;
8926 	struct net_device *dev = inet6addr_work->dev;
8927 	unsigned long event = inet6addr_work->event;
8928 	struct mlxsw_sp_rif *rif;
8929 
8930 	rtnl_lock();
8931 	mutex_lock(&mlxsw_sp->router->lock);
8932 
8933 	rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev);
8934 	if (!mlxsw_sp_rif_should_config(rif, dev, event))
8935 		goto out;
8936 
8937 	__mlxsw_sp_inetaddr_event(mlxsw_sp, dev, event, NULL);
8938 out:
8939 	mutex_unlock(&mlxsw_sp->router->lock);
8940 	rtnl_unlock();
8941 	dev_put(dev);
8942 	kfree(inet6addr_work);
8943 }
8944 
8945 /* Called with rcu_read_lock() */
8946 static int mlxsw_sp_inet6addr_event(struct notifier_block *nb,
8947 				    unsigned long event, void *ptr)
8948 {
8949 	struct inet6_ifaddr *if6 = (struct inet6_ifaddr *) ptr;
8950 	struct mlxsw_sp_inet6addr_event_work *inet6addr_work;
8951 	struct net_device *dev = if6->idev->dev;
8952 	struct mlxsw_sp_router *router;
8953 
8954 	/* NETDEV_UP event is handled by mlxsw_sp_inet6addr_valid_event */
8955 	if (event == NETDEV_UP)
8956 		return NOTIFY_DONE;
8957 
8958 	inet6addr_work = kzalloc(sizeof(*inet6addr_work), GFP_ATOMIC);
8959 	if (!inet6addr_work)
8960 		return NOTIFY_BAD;
8961 
8962 	router = container_of(nb, struct mlxsw_sp_router, inet6addr_nb);
8963 	INIT_WORK(&inet6addr_work->work, mlxsw_sp_inet6addr_event_work);
8964 	inet6addr_work->mlxsw_sp = router->mlxsw_sp;
8965 	inet6addr_work->dev = dev;
8966 	inet6addr_work->event = event;
8967 	dev_hold(dev);
8968 	mlxsw_core_schedule_work(&inet6addr_work->work);
8969 
8970 	return NOTIFY_DONE;
8971 }
8972 
8973 static int mlxsw_sp_inet6addr_valid_event(struct notifier_block *unused,
8974 					  unsigned long event, void *ptr)
8975 {
8976 	struct in6_validator_info *i6vi = (struct in6_validator_info *) ptr;
8977 	struct net_device *dev = i6vi->i6vi_dev->dev;
8978 	struct mlxsw_sp *mlxsw_sp;
8979 	struct mlxsw_sp_rif *rif;
8980 	int err = 0;
8981 
8982 	mlxsw_sp = mlxsw_sp_lower_get(dev);
8983 	if (!mlxsw_sp)
8984 		return NOTIFY_DONE;
8985 
8986 	mutex_lock(&mlxsw_sp->router->lock);
8987 	rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev);
8988 	if (!mlxsw_sp_rif_should_config(rif, dev, event))
8989 		goto out;
8990 
8991 	err = __mlxsw_sp_inetaddr_event(mlxsw_sp, dev, event, i6vi->extack);
8992 out:
8993 	mutex_unlock(&mlxsw_sp->router->lock);
8994 	return notifier_from_errno(err);
8995 }
8996 
8997 static int mlxsw_sp_rif_edit(struct mlxsw_sp *mlxsw_sp, u16 rif_index,
8998 			     const char *mac, int mtu, u8 mac_profile)
8999 {
9000 	char ritr_pl[MLXSW_REG_RITR_LEN];
9001 	int err;
9002 
9003 	mlxsw_reg_ritr_rif_pack(ritr_pl, rif_index);
9004 	err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
9005 	if (err)
9006 		return err;
9007 
9008 	mlxsw_reg_ritr_mtu_set(ritr_pl, mtu);
9009 	mlxsw_reg_ritr_if_mac_memcpy_to(ritr_pl, mac);
9010 	mlxsw_reg_ritr_if_mac_profile_id_set(ritr_pl, mac_profile);
9011 	mlxsw_reg_ritr_op_set(ritr_pl, MLXSW_REG_RITR_RIF_CREATE);
9012 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
9013 }
9014 
9015 static int
9016 mlxsw_sp_router_port_change_event(struct mlxsw_sp *mlxsw_sp,
9017 				  struct mlxsw_sp_rif *rif,
9018 				  struct netlink_ext_ack *extack)
9019 {
9020 	struct net_device *dev = rif->dev;
9021 	u8 old_mac_profile;
9022 	u16 fid_index;
9023 	int err;
9024 
9025 	fid_index = mlxsw_sp_fid_index(rif->fid);
9026 
9027 	err = mlxsw_sp_rif_fdb_op(mlxsw_sp, rif->addr, fid_index, false);
9028 	if (err)
9029 		return err;
9030 
9031 	old_mac_profile = rif->mac_profile_id;
9032 	err = mlxsw_sp_rif_mac_profile_replace(mlxsw_sp, rif, dev->dev_addr,
9033 					       extack);
9034 	if (err)
9035 		goto err_rif_mac_profile_replace;
9036 
9037 	err = mlxsw_sp_rif_edit(mlxsw_sp, rif->rif_index, dev->dev_addr,
9038 				dev->mtu, rif->mac_profile_id);
9039 	if (err)
9040 		goto err_rif_edit;
9041 
9042 	err = mlxsw_sp_rif_fdb_op(mlxsw_sp, dev->dev_addr, fid_index, true);
9043 	if (err)
9044 		goto err_rif_fdb_op;
9045 
9046 	if (rif->mtu != dev->mtu) {
9047 		struct mlxsw_sp_vr *vr;
9048 		int i;
9049 
9050 		/* The RIF is relevant only to its mr_table instance, as unlike
9051 		 * unicast routing, in multicast routing a RIF cannot be shared
9052 		 * between several multicast routing tables.
9053 		 */
9054 		vr = &mlxsw_sp->router->vrs[rif->vr_id];
9055 		for (i = 0; i < MLXSW_SP_L3_PROTO_MAX; i++)
9056 			mlxsw_sp_mr_rif_mtu_update(vr->mr_table[i],
9057 						   rif, dev->mtu);
9058 	}
9059 
9060 	ether_addr_copy(rif->addr, dev->dev_addr);
9061 	rif->mtu = dev->mtu;
9062 
9063 	netdev_dbg(dev, "Updated RIF=%d\n", rif->rif_index);
9064 
9065 	return 0;
9066 
9067 err_rif_fdb_op:
9068 	mlxsw_sp_rif_edit(mlxsw_sp, rif->rif_index, rif->addr, rif->mtu,
9069 			  old_mac_profile);
9070 err_rif_edit:
9071 	mlxsw_sp_rif_mac_profile_replace(mlxsw_sp, rif, rif->addr, extack);
9072 err_rif_mac_profile_replace:
9073 	mlxsw_sp_rif_fdb_op(mlxsw_sp, rif->addr, fid_index, true);
9074 	return err;
9075 }
9076 
9077 static int mlxsw_sp_router_port_pre_changeaddr_event(struct mlxsw_sp_rif *rif,
9078 			    struct netdev_notifier_pre_changeaddr_info *info)
9079 {
9080 	struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
9081 	struct mlxsw_sp_rif_mac_profile *profile;
9082 	struct netlink_ext_ack *extack;
9083 	u8 max_rif_mac_profiles;
9084 	u64 occ;
9085 
9086 	extack = netdev_notifier_info_to_extack(&info->info);
9087 
9088 	profile = mlxsw_sp_rif_mac_profile_find(mlxsw_sp, info->dev_addr);
9089 	if (profile)
9090 		return 0;
9091 
9092 	max_rif_mac_profiles = mlxsw_sp->router->max_rif_mac_profile;
9093 	occ = mlxsw_sp_rif_mac_profiles_occ_get(mlxsw_sp);
9094 	if (occ < max_rif_mac_profiles)
9095 		return 0;
9096 
9097 	if (!mlxsw_sp_rif_mac_profile_is_shared(rif))
9098 		return 0;
9099 
9100 	NL_SET_ERR_MSG_MOD(extack, "Exceeded number of supported router interface MAC profiles");
9101 	return -ENOBUFS;
9102 }
9103 
9104 static bool mlxsw_sp_is_offload_xstats_event(unsigned long event)
9105 {
9106 	switch (event) {
9107 	case NETDEV_OFFLOAD_XSTATS_ENABLE:
9108 	case NETDEV_OFFLOAD_XSTATS_DISABLE:
9109 	case NETDEV_OFFLOAD_XSTATS_REPORT_USED:
9110 	case NETDEV_OFFLOAD_XSTATS_REPORT_DELTA:
9111 		return true;
9112 	}
9113 
9114 	return false;
9115 }
9116 
9117 static int
9118 mlxsw_sp_router_port_offload_xstats_cmd(struct mlxsw_sp_rif *rif,
9119 					unsigned long event,
9120 					struct netdev_notifier_offload_xstats_info *info)
9121 {
9122 	switch (info->type) {
9123 	case NETDEV_OFFLOAD_XSTATS_TYPE_L3:
9124 		break;
9125 	default:
9126 		return 0;
9127 	}
9128 
9129 	switch (event) {
9130 	case NETDEV_OFFLOAD_XSTATS_ENABLE:
9131 		return mlxsw_sp_router_port_l3_stats_enable(rif);
9132 	case NETDEV_OFFLOAD_XSTATS_DISABLE:
9133 		mlxsw_sp_router_port_l3_stats_disable(rif);
9134 		return 0;
9135 	case NETDEV_OFFLOAD_XSTATS_REPORT_USED:
9136 		mlxsw_sp_router_port_l3_stats_report_used(rif, info);
9137 		return 0;
9138 	case NETDEV_OFFLOAD_XSTATS_REPORT_DELTA:
9139 		return mlxsw_sp_router_port_l3_stats_report_delta(rif, info);
9140 	}
9141 
9142 	WARN_ON_ONCE(1);
9143 	return 0;
9144 }
9145 
9146 static int
9147 mlxsw_sp_netdevice_offload_xstats_cmd(struct mlxsw_sp *mlxsw_sp,
9148 				      struct net_device *dev,
9149 				      unsigned long event,
9150 				      struct netdev_notifier_offload_xstats_info *info)
9151 {
9152 	struct mlxsw_sp_rif *rif;
9153 
9154 	rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev);
9155 	if (!rif)
9156 		return 0;
9157 
9158 	return mlxsw_sp_router_port_offload_xstats_cmd(rif, event, info);
9159 }
9160 
9161 static bool mlxsw_sp_is_router_event(unsigned long event)
9162 {
9163 	switch (event) {
9164 	case NETDEV_PRE_CHANGEADDR:
9165 	case NETDEV_CHANGEADDR:
9166 	case NETDEV_CHANGEMTU:
9167 		return true;
9168 	default:
9169 		return false;
9170 	}
9171 }
9172 
9173 static int mlxsw_sp_netdevice_router_port_event(struct net_device *dev,
9174 						unsigned long event, void *ptr)
9175 {
9176 	struct netlink_ext_ack *extack = netdev_notifier_info_to_extack(ptr);
9177 	struct mlxsw_sp *mlxsw_sp;
9178 	struct mlxsw_sp_rif *rif;
9179 
9180 	mlxsw_sp = mlxsw_sp_lower_get(dev);
9181 	if (!mlxsw_sp)
9182 		return 0;
9183 
9184 	rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev);
9185 	if (!rif)
9186 		return 0;
9187 
9188 	switch (event) {
9189 	case NETDEV_CHANGEMTU:
9190 	case NETDEV_CHANGEADDR:
9191 		return mlxsw_sp_router_port_change_event(mlxsw_sp, rif, extack);
9192 	case NETDEV_PRE_CHANGEADDR:
9193 		return mlxsw_sp_router_port_pre_changeaddr_event(rif, ptr);
9194 	default:
9195 		WARN_ON_ONCE(1);
9196 		break;
9197 	}
9198 
9199 	return 0;
9200 }
9201 
9202 static int mlxsw_sp_port_vrf_join(struct mlxsw_sp *mlxsw_sp,
9203 				  struct net_device *l3_dev,
9204 				  struct netlink_ext_ack *extack)
9205 {
9206 	struct mlxsw_sp_rif *rif;
9207 
9208 	/* If netdev is already associated with a RIF, then we need to
9209 	 * destroy it and create a new one with the new virtual router ID.
9210 	 */
9211 	rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, l3_dev);
9212 	if (rif)
9213 		__mlxsw_sp_inetaddr_event(mlxsw_sp, l3_dev, NETDEV_DOWN,
9214 					  extack);
9215 
9216 	return __mlxsw_sp_inetaddr_event(mlxsw_sp, l3_dev, NETDEV_UP, extack);
9217 }
9218 
9219 static void mlxsw_sp_port_vrf_leave(struct mlxsw_sp *mlxsw_sp,
9220 				    struct net_device *l3_dev)
9221 {
9222 	struct mlxsw_sp_rif *rif;
9223 
9224 	rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, l3_dev);
9225 	if (!rif)
9226 		return;
9227 	__mlxsw_sp_inetaddr_event(mlxsw_sp, l3_dev, NETDEV_DOWN, NULL);
9228 }
9229 
9230 static bool mlxsw_sp_is_vrf_event(unsigned long event, void *ptr)
9231 {
9232 	struct netdev_notifier_changeupper_info *info = ptr;
9233 
9234 	if (event != NETDEV_PRECHANGEUPPER && event != NETDEV_CHANGEUPPER)
9235 		return false;
9236 	return netif_is_l3_master(info->upper_dev);
9237 }
9238 
9239 static int
9240 mlxsw_sp_netdevice_vrf_event(struct net_device *l3_dev, unsigned long event,
9241 			     struct netdev_notifier_changeupper_info *info)
9242 {
9243 	struct mlxsw_sp *mlxsw_sp = mlxsw_sp_lower_get(l3_dev);
9244 	int err = 0;
9245 
9246 	/* We do not create a RIF for a macvlan, but only use it to
9247 	 * direct more MAC addresses to the router.
9248 	 */
9249 	if (!mlxsw_sp || netif_is_macvlan(l3_dev))
9250 		return 0;
9251 
9252 	switch (event) {
9253 	case NETDEV_PRECHANGEUPPER:
9254 		break;
9255 	case NETDEV_CHANGEUPPER:
9256 		if (info->linking) {
9257 			struct netlink_ext_ack *extack;
9258 
9259 			extack = netdev_notifier_info_to_extack(&info->info);
9260 			err = mlxsw_sp_port_vrf_join(mlxsw_sp, l3_dev, extack);
9261 		} else {
9262 			mlxsw_sp_port_vrf_leave(mlxsw_sp, l3_dev);
9263 		}
9264 		break;
9265 	}
9266 
9267 	return err;
9268 }
9269 
9270 static int mlxsw_sp_router_netdevice_event(struct notifier_block *nb,
9271 					   unsigned long event, void *ptr)
9272 {
9273 	struct net_device *dev = netdev_notifier_info_to_dev(ptr);
9274 	struct mlxsw_sp_router *router;
9275 	struct mlxsw_sp *mlxsw_sp;
9276 	int err = 0;
9277 
9278 	router = container_of(nb, struct mlxsw_sp_router, netdevice_nb);
9279 	mlxsw_sp = router->mlxsw_sp;
9280 
9281 	mutex_lock(&mlxsw_sp->router->lock);
9282 
9283 	if (mlxsw_sp_is_offload_xstats_event(event))
9284 		err = mlxsw_sp_netdevice_offload_xstats_cmd(mlxsw_sp, dev,
9285 							    event, ptr);
9286 	else if (mlxsw_sp_netdev_is_ipip_ol(mlxsw_sp, dev))
9287 		err = mlxsw_sp_netdevice_ipip_ol_event(mlxsw_sp, dev,
9288 						       event, ptr);
9289 	else if (mlxsw_sp_netdev_is_ipip_ul(mlxsw_sp, dev))
9290 		err = mlxsw_sp_netdevice_ipip_ul_event(mlxsw_sp, dev,
9291 						       event, ptr);
9292 	else if (mlxsw_sp_is_router_event(event))
9293 		err = mlxsw_sp_netdevice_router_port_event(dev, event, ptr);
9294 	else if (mlxsw_sp_is_vrf_event(event, ptr))
9295 		err = mlxsw_sp_netdevice_vrf_event(dev, event, ptr);
9296 
9297 	mutex_unlock(&mlxsw_sp->router->lock);
9298 
9299 	return notifier_from_errno(err);
9300 }
9301 
9302 static int __mlxsw_sp_rif_macvlan_flush(struct net_device *dev,
9303 					struct netdev_nested_priv *priv)
9304 {
9305 	struct mlxsw_sp_rif *rif = (struct mlxsw_sp_rif *)priv->data;
9306 
9307 	if (!netif_is_macvlan(dev))
9308 		return 0;
9309 
9310 	return mlxsw_sp_rif_fdb_op(rif->mlxsw_sp, dev->dev_addr,
9311 				   mlxsw_sp_fid_index(rif->fid), false);
9312 }
9313 
9314 static int mlxsw_sp_rif_macvlan_flush(struct mlxsw_sp_rif *rif)
9315 {
9316 	struct netdev_nested_priv priv = {
9317 		.data = (void *)rif,
9318 	};
9319 
9320 	if (!netif_is_macvlan_port(rif->dev))
9321 		return 0;
9322 
9323 	netdev_warn(rif->dev, "Router interface is deleted. Upper macvlans will not work\n");
9324 	return netdev_walk_all_upper_dev_rcu(rif->dev,
9325 					     __mlxsw_sp_rif_macvlan_flush, &priv);
9326 }
9327 
9328 static void mlxsw_sp_rif_subport_setup(struct mlxsw_sp_rif *rif,
9329 				       const struct mlxsw_sp_rif_params *params)
9330 {
9331 	struct mlxsw_sp_rif_subport *rif_subport;
9332 
9333 	rif_subport = mlxsw_sp_rif_subport_rif(rif);
9334 	refcount_set(&rif_subport->ref_count, 1);
9335 	rif_subport->vid = params->vid;
9336 	rif_subport->lag = params->lag;
9337 	if (params->lag)
9338 		rif_subport->lag_id = params->lag_id;
9339 	else
9340 		rif_subport->system_port = params->system_port;
9341 }
9342 
9343 static int mlxsw_sp_rif_subport_op(struct mlxsw_sp_rif *rif, bool enable)
9344 {
9345 	struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
9346 	struct mlxsw_sp_rif_subport *rif_subport;
9347 	char ritr_pl[MLXSW_REG_RITR_LEN];
9348 	u16 efid;
9349 
9350 	rif_subport = mlxsw_sp_rif_subport_rif(rif);
9351 	mlxsw_reg_ritr_pack(ritr_pl, enable, MLXSW_REG_RITR_SP_IF,
9352 			    rif->rif_index, rif->vr_id, rif->dev->mtu);
9353 	mlxsw_reg_ritr_mac_pack(ritr_pl, rif->dev->dev_addr);
9354 	mlxsw_reg_ritr_if_mac_profile_id_set(ritr_pl, rif->mac_profile_id);
9355 	efid = mlxsw_sp_fid_index(rif->fid);
9356 	mlxsw_reg_ritr_sp_if_pack(ritr_pl, rif_subport->lag,
9357 				  rif_subport->lag ? rif_subport->lag_id :
9358 						     rif_subport->system_port,
9359 				  efid, 0);
9360 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
9361 }
9362 
9363 static int mlxsw_sp_rif_subport_configure(struct mlxsw_sp_rif *rif,
9364 					  struct netlink_ext_ack *extack)
9365 {
9366 	u8 mac_profile;
9367 	int err;
9368 
9369 	err = mlxsw_sp_rif_mac_profile_get(rif->mlxsw_sp, rif->addr,
9370 					   &mac_profile, extack);
9371 	if (err)
9372 		return err;
9373 	rif->mac_profile_id = mac_profile;
9374 
9375 	err = mlxsw_sp_rif_subport_op(rif, true);
9376 	if (err)
9377 		goto err_rif_subport_op;
9378 
9379 	err = mlxsw_sp_rif_fdb_op(rif->mlxsw_sp, rif->dev->dev_addr,
9380 				  mlxsw_sp_fid_index(rif->fid), true);
9381 	if (err)
9382 		goto err_rif_fdb_op;
9383 
9384 	err = mlxsw_sp_fid_rif_set(rif->fid, rif);
9385 	if (err)
9386 		goto err_fid_rif_set;
9387 
9388 	return 0;
9389 
9390 err_fid_rif_set:
9391 	mlxsw_sp_rif_fdb_op(rif->mlxsw_sp, rif->dev->dev_addr,
9392 			    mlxsw_sp_fid_index(rif->fid), false);
9393 err_rif_fdb_op:
9394 	mlxsw_sp_rif_subport_op(rif, false);
9395 err_rif_subport_op:
9396 	mlxsw_sp_rif_mac_profile_put(rif->mlxsw_sp, mac_profile);
9397 	return err;
9398 }
9399 
9400 static void mlxsw_sp_rif_subport_deconfigure(struct mlxsw_sp_rif *rif)
9401 {
9402 	struct mlxsw_sp_fid *fid = rif->fid;
9403 
9404 	mlxsw_sp_fid_rif_unset(fid);
9405 	mlxsw_sp_rif_fdb_op(rif->mlxsw_sp, rif->dev->dev_addr,
9406 			    mlxsw_sp_fid_index(fid), false);
9407 	mlxsw_sp_rif_macvlan_flush(rif);
9408 	mlxsw_sp_rif_subport_op(rif, false);
9409 	mlxsw_sp_rif_mac_profile_put(rif->mlxsw_sp, rif->mac_profile_id);
9410 }
9411 
9412 static struct mlxsw_sp_fid *
9413 mlxsw_sp_rif_subport_fid_get(struct mlxsw_sp_rif *rif,
9414 			     struct netlink_ext_ack *extack)
9415 {
9416 	return mlxsw_sp_fid_rfid_get(rif->mlxsw_sp, rif->rif_index);
9417 }
9418 
9419 static const struct mlxsw_sp_rif_ops mlxsw_sp_rif_subport_ops = {
9420 	.type			= MLXSW_SP_RIF_TYPE_SUBPORT,
9421 	.rif_size		= sizeof(struct mlxsw_sp_rif_subport),
9422 	.setup			= mlxsw_sp_rif_subport_setup,
9423 	.configure		= mlxsw_sp_rif_subport_configure,
9424 	.deconfigure		= mlxsw_sp_rif_subport_deconfigure,
9425 	.fid_get		= mlxsw_sp_rif_subport_fid_get,
9426 };
9427 
9428 static int mlxsw_sp_rif_fid_op(struct mlxsw_sp_rif *rif, u16 fid, bool enable)
9429 {
9430 	enum mlxsw_reg_ritr_if_type type = MLXSW_REG_RITR_FID_IF;
9431 	struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
9432 	char ritr_pl[MLXSW_REG_RITR_LEN];
9433 
9434 	mlxsw_reg_ritr_pack(ritr_pl, enable, type, rif->rif_index, rif->vr_id,
9435 			    rif->dev->mtu);
9436 	mlxsw_reg_ritr_mac_pack(ritr_pl, rif->dev->dev_addr);
9437 	mlxsw_reg_ritr_if_mac_profile_id_set(ritr_pl, rif->mac_profile_id);
9438 	mlxsw_reg_ritr_fid_if_fid_set(ritr_pl, fid);
9439 
9440 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
9441 }
9442 
9443 u16 mlxsw_sp_router_port(const struct mlxsw_sp *mlxsw_sp)
9444 {
9445 	return mlxsw_core_max_ports(mlxsw_sp->core) + 1;
9446 }
9447 
9448 static int mlxsw_sp_rif_fid_configure(struct mlxsw_sp_rif *rif,
9449 				      struct netlink_ext_ack *extack)
9450 {
9451 	struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
9452 	u16 fid_index = mlxsw_sp_fid_index(rif->fid);
9453 	u8 mac_profile;
9454 	int err;
9455 
9456 	err = mlxsw_sp_rif_mac_profile_get(mlxsw_sp, rif->addr,
9457 					   &mac_profile, extack);
9458 	if (err)
9459 		return err;
9460 	rif->mac_profile_id = mac_profile;
9461 
9462 	err = mlxsw_sp_rif_fid_op(rif, fid_index, true);
9463 	if (err)
9464 		goto err_rif_fid_op;
9465 
9466 	err = mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_MC,
9467 				     mlxsw_sp_router_port(mlxsw_sp), true);
9468 	if (err)
9469 		goto err_fid_mc_flood_set;
9470 
9471 	err = mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_BC,
9472 				     mlxsw_sp_router_port(mlxsw_sp), true);
9473 	if (err)
9474 		goto err_fid_bc_flood_set;
9475 
9476 	err = mlxsw_sp_rif_fdb_op(rif->mlxsw_sp, rif->dev->dev_addr,
9477 				  mlxsw_sp_fid_index(rif->fid), true);
9478 	if (err)
9479 		goto err_rif_fdb_op;
9480 
9481 	err = mlxsw_sp_fid_rif_set(rif->fid, rif);
9482 	if (err)
9483 		goto err_fid_rif_set;
9484 
9485 	return 0;
9486 
9487 err_fid_rif_set:
9488 	mlxsw_sp_rif_fdb_op(rif->mlxsw_sp, rif->dev->dev_addr,
9489 			    mlxsw_sp_fid_index(rif->fid), false);
9490 err_rif_fdb_op:
9491 	mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_BC,
9492 			       mlxsw_sp_router_port(mlxsw_sp), false);
9493 err_fid_bc_flood_set:
9494 	mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_MC,
9495 			       mlxsw_sp_router_port(mlxsw_sp), false);
9496 err_fid_mc_flood_set:
9497 	mlxsw_sp_rif_fid_op(rif, fid_index, false);
9498 err_rif_fid_op:
9499 	mlxsw_sp_rif_mac_profile_put(mlxsw_sp, mac_profile);
9500 	return err;
9501 }
9502 
9503 static void mlxsw_sp_rif_fid_deconfigure(struct mlxsw_sp_rif *rif)
9504 {
9505 	u16 fid_index = mlxsw_sp_fid_index(rif->fid);
9506 	struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
9507 	struct mlxsw_sp_fid *fid = rif->fid;
9508 
9509 	mlxsw_sp_fid_rif_unset(fid);
9510 	mlxsw_sp_rif_fdb_op(rif->mlxsw_sp, rif->dev->dev_addr,
9511 			    mlxsw_sp_fid_index(fid), false);
9512 	mlxsw_sp_rif_macvlan_flush(rif);
9513 	mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_BC,
9514 			       mlxsw_sp_router_port(mlxsw_sp), false);
9515 	mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_MC,
9516 			       mlxsw_sp_router_port(mlxsw_sp), false);
9517 	mlxsw_sp_rif_fid_op(rif, fid_index, false);
9518 	mlxsw_sp_rif_mac_profile_put(rif->mlxsw_sp, rif->mac_profile_id);
9519 }
9520 
9521 static struct mlxsw_sp_fid *
9522 mlxsw_sp_rif_fid_fid_get(struct mlxsw_sp_rif *rif,
9523 			 struct netlink_ext_ack *extack)
9524 {
9525 	return mlxsw_sp_fid_8021d_get(rif->mlxsw_sp, rif->dev->ifindex);
9526 }
9527 
9528 static void mlxsw_sp_rif_fid_fdb_del(struct mlxsw_sp_rif *rif, const char *mac)
9529 {
9530 	struct switchdev_notifier_fdb_info info = {};
9531 	struct net_device *dev;
9532 
9533 	dev = br_fdb_find_port(rif->dev, mac, 0);
9534 	if (!dev)
9535 		return;
9536 
9537 	info.addr = mac;
9538 	info.vid = 0;
9539 	call_switchdev_notifiers(SWITCHDEV_FDB_DEL_TO_BRIDGE, dev, &info.info,
9540 				 NULL);
9541 }
9542 
9543 static const struct mlxsw_sp_rif_ops mlxsw_sp_rif_fid_ops = {
9544 	.type			= MLXSW_SP_RIF_TYPE_FID,
9545 	.rif_size		= sizeof(struct mlxsw_sp_rif),
9546 	.configure		= mlxsw_sp_rif_fid_configure,
9547 	.deconfigure		= mlxsw_sp_rif_fid_deconfigure,
9548 	.fid_get		= mlxsw_sp_rif_fid_fid_get,
9549 	.fdb_del		= mlxsw_sp_rif_fid_fdb_del,
9550 };
9551 
9552 static struct mlxsw_sp_fid *
9553 mlxsw_sp_rif_vlan_fid_get(struct mlxsw_sp_rif *rif,
9554 			  struct netlink_ext_ack *extack)
9555 {
9556 	struct net_device *br_dev;
9557 	u16 vid;
9558 	int err;
9559 
9560 	if (is_vlan_dev(rif->dev)) {
9561 		vid = vlan_dev_vlan_id(rif->dev);
9562 		br_dev = vlan_dev_real_dev(rif->dev);
9563 		if (WARN_ON(!netif_is_bridge_master(br_dev)))
9564 			return ERR_PTR(-EINVAL);
9565 	} else {
9566 		err = br_vlan_get_pvid(rif->dev, &vid);
9567 		if (err < 0 || !vid) {
9568 			NL_SET_ERR_MSG_MOD(extack, "Couldn't determine bridge PVID");
9569 			return ERR_PTR(-EINVAL);
9570 		}
9571 	}
9572 
9573 	return mlxsw_sp_fid_8021q_get(rif->mlxsw_sp, vid);
9574 }
9575 
9576 static void mlxsw_sp_rif_vlan_fdb_del(struct mlxsw_sp_rif *rif, const char *mac)
9577 {
9578 	struct switchdev_notifier_fdb_info info = {};
9579 	u16 vid = mlxsw_sp_fid_8021q_vid(rif->fid);
9580 	struct net_device *br_dev;
9581 	struct net_device *dev;
9582 
9583 	br_dev = is_vlan_dev(rif->dev) ? vlan_dev_real_dev(rif->dev) : rif->dev;
9584 	dev = br_fdb_find_port(br_dev, mac, vid);
9585 	if (!dev)
9586 		return;
9587 
9588 	info.addr = mac;
9589 	info.vid = vid;
9590 	call_switchdev_notifiers(SWITCHDEV_FDB_DEL_TO_BRIDGE, dev, &info.info,
9591 				 NULL);
9592 }
9593 
9594 static int mlxsw_sp_rif_vlan_op(struct mlxsw_sp_rif *rif, u16 vid, u16 efid,
9595 				bool enable)
9596 {
9597 	struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
9598 	char ritr_pl[MLXSW_REG_RITR_LEN];
9599 
9600 	mlxsw_reg_ritr_vlan_if_pack(ritr_pl, enable, rif->rif_index, rif->vr_id,
9601 				    rif->dev->mtu, rif->dev->dev_addr,
9602 				    rif->mac_profile_id, vid, efid);
9603 
9604 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
9605 }
9606 
9607 static int mlxsw_sp_rif_vlan_configure(struct mlxsw_sp_rif *rif, u16 efid,
9608 				       struct netlink_ext_ack *extack)
9609 {
9610 	u16 vid = mlxsw_sp_fid_8021q_vid(rif->fid);
9611 	struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
9612 	u8 mac_profile;
9613 	int err;
9614 
9615 	err = mlxsw_sp_rif_mac_profile_get(mlxsw_sp, rif->addr,
9616 					   &mac_profile, extack);
9617 	if (err)
9618 		return err;
9619 	rif->mac_profile_id = mac_profile;
9620 
9621 	err = mlxsw_sp_rif_vlan_op(rif, vid, efid, true);
9622 	if (err)
9623 		goto err_rif_vlan_fid_op;
9624 
9625 	err = mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_MC,
9626 				     mlxsw_sp_router_port(mlxsw_sp), true);
9627 	if (err)
9628 		goto err_fid_mc_flood_set;
9629 
9630 	err = mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_BC,
9631 				     mlxsw_sp_router_port(mlxsw_sp), true);
9632 	if (err)
9633 		goto err_fid_bc_flood_set;
9634 
9635 	err = mlxsw_sp_rif_fdb_op(rif->mlxsw_sp, rif->dev->dev_addr,
9636 				  mlxsw_sp_fid_index(rif->fid), true);
9637 	if (err)
9638 		goto err_rif_fdb_op;
9639 
9640 	err = mlxsw_sp_fid_rif_set(rif->fid, rif);
9641 	if (err)
9642 		goto err_fid_rif_set;
9643 
9644 	return 0;
9645 
9646 err_fid_rif_set:
9647 	mlxsw_sp_rif_fdb_op(rif->mlxsw_sp, rif->dev->dev_addr,
9648 			    mlxsw_sp_fid_index(rif->fid), false);
9649 err_rif_fdb_op:
9650 	mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_BC,
9651 			       mlxsw_sp_router_port(mlxsw_sp), false);
9652 err_fid_bc_flood_set:
9653 	mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_MC,
9654 			       mlxsw_sp_router_port(mlxsw_sp), false);
9655 err_fid_mc_flood_set:
9656 	mlxsw_sp_rif_vlan_op(rif, vid, 0, false);
9657 err_rif_vlan_fid_op:
9658 	mlxsw_sp_rif_mac_profile_put(mlxsw_sp, mac_profile);
9659 	return err;
9660 }
9661 
9662 static void mlxsw_sp_rif_vlan_deconfigure(struct mlxsw_sp_rif *rif)
9663 {
9664 	u16 vid = mlxsw_sp_fid_8021q_vid(rif->fid);
9665 	struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
9666 
9667 	mlxsw_sp_fid_rif_unset(rif->fid);
9668 	mlxsw_sp_rif_fdb_op(rif->mlxsw_sp, rif->dev->dev_addr,
9669 			    mlxsw_sp_fid_index(rif->fid), false);
9670 	mlxsw_sp_rif_macvlan_flush(rif);
9671 	mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_BC,
9672 			       mlxsw_sp_router_port(mlxsw_sp), false);
9673 	mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_MC,
9674 			       mlxsw_sp_router_port(mlxsw_sp), false);
9675 	mlxsw_sp_rif_vlan_op(rif, vid, 0, false);
9676 	mlxsw_sp_rif_mac_profile_put(rif->mlxsw_sp, rif->mac_profile_id);
9677 }
9678 
9679 static int mlxsw_sp1_rif_vlan_configure(struct mlxsw_sp_rif *rif,
9680 					struct netlink_ext_ack *extack)
9681 {
9682 	return mlxsw_sp_rif_vlan_configure(rif, 0, extack);
9683 }
9684 
9685 static const struct mlxsw_sp_rif_ops mlxsw_sp1_rif_vlan_ops = {
9686 	.type			= MLXSW_SP_RIF_TYPE_VLAN,
9687 	.rif_size		= sizeof(struct mlxsw_sp_rif),
9688 	.configure		= mlxsw_sp1_rif_vlan_configure,
9689 	.deconfigure		= mlxsw_sp_rif_vlan_deconfigure,
9690 	.fid_get		= mlxsw_sp_rif_vlan_fid_get,
9691 	.fdb_del		= mlxsw_sp_rif_vlan_fdb_del,
9692 };
9693 
9694 static int mlxsw_sp2_rif_vlan_configure(struct mlxsw_sp_rif *rif,
9695 					struct netlink_ext_ack *extack)
9696 {
9697 	u16 efid = mlxsw_sp_fid_index(rif->fid);
9698 
9699 	return mlxsw_sp_rif_vlan_configure(rif, efid, extack);
9700 }
9701 
9702 static const struct mlxsw_sp_rif_ops mlxsw_sp2_rif_vlan_ops = {
9703 	.type			= MLXSW_SP_RIF_TYPE_VLAN,
9704 	.rif_size		= sizeof(struct mlxsw_sp_rif),
9705 	.configure		= mlxsw_sp2_rif_vlan_configure,
9706 	.deconfigure		= mlxsw_sp_rif_vlan_deconfigure,
9707 	.fid_get		= mlxsw_sp_rif_vlan_fid_get,
9708 	.fdb_del		= mlxsw_sp_rif_vlan_fdb_del,
9709 };
9710 
9711 static struct mlxsw_sp_rif_ipip_lb *
9712 mlxsw_sp_rif_ipip_lb_rif(struct mlxsw_sp_rif *rif)
9713 {
9714 	return container_of(rif, struct mlxsw_sp_rif_ipip_lb, common);
9715 }
9716 
9717 static void
9718 mlxsw_sp_rif_ipip_lb_setup(struct mlxsw_sp_rif *rif,
9719 			   const struct mlxsw_sp_rif_params *params)
9720 {
9721 	struct mlxsw_sp_rif_params_ipip_lb *params_lb;
9722 	struct mlxsw_sp_rif_ipip_lb *rif_lb;
9723 
9724 	params_lb = container_of(params, struct mlxsw_sp_rif_params_ipip_lb,
9725 				 common);
9726 	rif_lb = mlxsw_sp_rif_ipip_lb_rif(rif);
9727 	rif_lb->lb_config = params_lb->lb_config;
9728 }
9729 
9730 static int
9731 mlxsw_sp1_rif_ipip_lb_configure(struct mlxsw_sp_rif *rif,
9732 				struct netlink_ext_ack *extack)
9733 {
9734 	struct mlxsw_sp_rif_ipip_lb *lb_rif = mlxsw_sp_rif_ipip_lb_rif(rif);
9735 	u32 ul_tb_id = mlxsw_sp_ipip_dev_ul_tb_id(rif->dev);
9736 	struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
9737 	struct mlxsw_sp_vr *ul_vr;
9738 	int err;
9739 
9740 	ul_vr = mlxsw_sp_vr_get(mlxsw_sp, ul_tb_id, extack);
9741 	if (IS_ERR(ul_vr))
9742 		return PTR_ERR(ul_vr);
9743 
9744 	err = mlxsw_sp_rif_ipip_lb_op(lb_rif, ul_vr->id, 0, true);
9745 	if (err)
9746 		goto err_loopback_op;
9747 
9748 	lb_rif->ul_vr_id = ul_vr->id;
9749 	lb_rif->ul_rif_id = 0;
9750 	++ul_vr->rif_count;
9751 	return 0;
9752 
9753 err_loopback_op:
9754 	mlxsw_sp_vr_put(mlxsw_sp, ul_vr);
9755 	return err;
9756 }
9757 
9758 static void mlxsw_sp1_rif_ipip_lb_deconfigure(struct mlxsw_sp_rif *rif)
9759 {
9760 	struct mlxsw_sp_rif_ipip_lb *lb_rif = mlxsw_sp_rif_ipip_lb_rif(rif);
9761 	struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
9762 	struct mlxsw_sp_vr *ul_vr;
9763 
9764 	ul_vr = &mlxsw_sp->router->vrs[lb_rif->ul_vr_id];
9765 	mlxsw_sp_rif_ipip_lb_op(lb_rif, ul_vr->id, 0, false);
9766 
9767 	--ul_vr->rif_count;
9768 	mlxsw_sp_vr_put(mlxsw_sp, ul_vr);
9769 }
9770 
9771 static const struct mlxsw_sp_rif_ops mlxsw_sp1_rif_ipip_lb_ops = {
9772 	.type			= MLXSW_SP_RIF_TYPE_IPIP_LB,
9773 	.rif_size		= sizeof(struct mlxsw_sp_rif_ipip_lb),
9774 	.setup                  = mlxsw_sp_rif_ipip_lb_setup,
9775 	.configure		= mlxsw_sp1_rif_ipip_lb_configure,
9776 	.deconfigure		= mlxsw_sp1_rif_ipip_lb_deconfigure,
9777 };
9778 
9779 static const struct mlxsw_sp_rif_ops *mlxsw_sp1_rif_ops_arr[] = {
9780 	[MLXSW_SP_RIF_TYPE_SUBPORT]	= &mlxsw_sp_rif_subport_ops,
9781 	[MLXSW_SP_RIF_TYPE_VLAN]	= &mlxsw_sp1_rif_vlan_ops,
9782 	[MLXSW_SP_RIF_TYPE_FID]		= &mlxsw_sp_rif_fid_ops,
9783 	[MLXSW_SP_RIF_TYPE_IPIP_LB]	= &mlxsw_sp1_rif_ipip_lb_ops,
9784 };
9785 
9786 static int
9787 mlxsw_sp_rif_ipip_lb_ul_rif_op(struct mlxsw_sp_rif *ul_rif, bool enable)
9788 {
9789 	struct mlxsw_sp *mlxsw_sp = ul_rif->mlxsw_sp;
9790 	char ritr_pl[MLXSW_REG_RITR_LEN];
9791 
9792 	mlxsw_reg_ritr_pack(ritr_pl, enable, MLXSW_REG_RITR_LOOPBACK_IF,
9793 			    ul_rif->rif_index, ul_rif->vr_id, IP_MAX_MTU);
9794 	mlxsw_reg_ritr_loopback_protocol_set(ritr_pl,
9795 					     MLXSW_REG_RITR_LOOPBACK_GENERIC);
9796 
9797 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
9798 }
9799 
9800 static struct mlxsw_sp_rif *
9801 mlxsw_sp_ul_rif_create(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_vr *vr,
9802 		       struct netlink_ext_ack *extack)
9803 {
9804 	struct mlxsw_sp_rif *ul_rif;
9805 	u8 rif_entries = 1;
9806 	u16 rif_index;
9807 	int err;
9808 
9809 	err = mlxsw_sp_rif_index_alloc(mlxsw_sp, &rif_index, rif_entries);
9810 	if (err) {
9811 		NL_SET_ERR_MSG_MOD(extack, "Exceeded number of supported router interfaces");
9812 		return ERR_PTR(err);
9813 	}
9814 
9815 	ul_rif = mlxsw_sp_rif_alloc(sizeof(*ul_rif), rif_index, vr->id, NULL);
9816 	if (!ul_rif) {
9817 		err = -ENOMEM;
9818 		goto err_rif_alloc;
9819 	}
9820 
9821 	mlxsw_sp->router->rifs[rif_index] = ul_rif;
9822 	ul_rif->mlxsw_sp = mlxsw_sp;
9823 	ul_rif->rif_entries = rif_entries;
9824 	err = mlxsw_sp_rif_ipip_lb_ul_rif_op(ul_rif, true);
9825 	if (err)
9826 		goto ul_rif_op_err;
9827 
9828 	atomic_add(rif_entries, &mlxsw_sp->router->rifs_count);
9829 	return ul_rif;
9830 
9831 ul_rif_op_err:
9832 	mlxsw_sp->router->rifs[rif_index] = NULL;
9833 	kfree(ul_rif);
9834 err_rif_alloc:
9835 	mlxsw_sp_rif_index_free(mlxsw_sp, rif_index, rif_entries);
9836 	return ERR_PTR(err);
9837 }
9838 
9839 static void mlxsw_sp_ul_rif_destroy(struct mlxsw_sp_rif *ul_rif)
9840 {
9841 	struct mlxsw_sp *mlxsw_sp = ul_rif->mlxsw_sp;
9842 	u8 rif_entries = ul_rif->rif_entries;
9843 	u16 rif_index = ul_rif->rif_index;
9844 
9845 	atomic_sub(rif_entries, &mlxsw_sp->router->rifs_count);
9846 	mlxsw_sp_rif_ipip_lb_ul_rif_op(ul_rif, false);
9847 	mlxsw_sp->router->rifs[ul_rif->rif_index] = NULL;
9848 	kfree(ul_rif);
9849 	mlxsw_sp_rif_index_free(mlxsw_sp, rif_index, rif_entries);
9850 }
9851 
9852 static struct mlxsw_sp_rif *
9853 mlxsw_sp_ul_rif_get(struct mlxsw_sp *mlxsw_sp, u32 tb_id,
9854 		    struct netlink_ext_ack *extack)
9855 {
9856 	struct mlxsw_sp_vr *vr;
9857 	int err;
9858 
9859 	vr = mlxsw_sp_vr_get(mlxsw_sp, tb_id, extack);
9860 	if (IS_ERR(vr))
9861 		return ERR_CAST(vr);
9862 
9863 	if (refcount_inc_not_zero(&vr->ul_rif_refcnt))
9864 		return vr->ul_rif;
9865 
9866 	vr->ul_rif = mlxsw_sp_ul_rif_create(mlxsw_sp, vr, extack);
9867 	if (IS_ERR(vr->ul_rif)) {
9868 		err = PTR_ERR(vr->ul_rif);
9869 		goto err_ul_rif_create;
9870 	}
9871 
9872 	vr->rif_count++;
9873 	refcount_set(&vr->ul_rif_refcnt, 1);
9874 
9875 	return vr->ul_rif;
9876 
9877 err_ul_rif_create:
9878 	mlxsw_sp_vr_put(mlxsw_sp, vr);
9879 	return ERR_PTR(err);
9880 }
9881 
9882 static void mlxsw_sp_ul_rif_put(struct mlxsw_sp_rif *ul_rif)
9883 {
9884 	struct mlxsw_sp *mlxsw_sp = ul_rif->mlxsw_sp;
9885 	struct mlxsw_sp_vr *vr;
9886 
9887 	vr = &mlxsw_sp->router->vrs[ul_rif->vr_id];
9888 
9889 	if (!refcount_dec_and_test(&vr->ul_rif_refcnt))
9890 		return;
9891 
9892 	vr->rif_count--;
9893 	mlxsw_sp_ul_rif_destroy(ul_rif);
9894 	mlxsw_sp_vr_put(mlxsw_sp, vr);
9895 }
9896 
9897 int mlxsw_sp_router_ul_rif_get(struct mlxsw_sp *mlxsw_sp, u32 ul_tb_id,
9898 			       u16 *ul_rif_index)
9899 {
9900 	struct mlxsw_sp_rif *ul_rif;
9901 	int err = 0;
9902 
9903 	mutex_lock(&mlxsw_sp->router->lock);
9904 	ul_rif = mlxsw_sp_ul_rif_get(mlxsw_sp, ul_tb_id, NULL);
9905 	if (IS_ERR(ul_rif)) {
9906 		err = PTR_ERR(ul_rif);
9907 		goto out;
9908 	}
9909 	*ul_rif_index = ul_rif->rif_index;
9910 out:
9911 	mutex_unlock(&mlxsw_sp->router->lock);
9912 	return err;
9913 }
9914 
9915 void mlxsw_sp_router_ul_rif_put(struct mlxsw_sp *mlxsw_sp, u16 ul_rif_index)
9916 {
9917 	struct mlxsw_sp_rif *ul_rif;
9918 
9919 	mutex_lock(&mlxsw_sp->router->lock);
9920 	ul_rif = mlxsw_sp->router->rifs[ul_rif_index];
9921 	if (WARN_ON(!ul_rif))
9922 		goto out;
9923 
9924 	mlxsw_sp_ul_rif_put(ul_rif);
9925 out:
9926 	mutex_unlock(&mlxsw_sp->router->lock);
9927 }
9928 
9929 static int
9930 mlxsw_sp2_rif_ipip_lb_configure(struct mlxsw_sp_rif *rif,
9931 				struct netlink_ext_ack *extack)
9932 {
9933 	struct mlxsw_sp_rif_ipip_lb *lb_rif = mlxsw_sp_rif_ipip_lb_rif(rif);
9934 	u32 ul_tb_id = mlxsw_sp_ipip_dev_ul_tb_id(rif->dev);
9935 	struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
9936 	struct mlxsw_sp_rif *ul_rif;
9937 	int err;
9938 
9939 	ul_rif = mlxsw_sp_ul_rif_get(mlxsw_sp, ul_tb_id, extack);
9940 	if (IS_ERR(ul_rif))
9941 		return PTR_ERR(ul_rif);
9942 
9943 	err = mlxsw_sp_rif_ipip_lb_op(lb_rif, 0, ul_rif->rif_index, true);
9944 	if (err)
9945 		goto err_loopback_op;
9946 
9947 	lb_rif->ul_vr_id = 0;
9948 	lb_rif->ul_rif_id = ul_rif->rif_index;
9949 
9950 	return 0;
9951 
9952 err_loopback_op:
9953 	mlxsw_sp_ul_rif_put(ul_rif);
9954 	return err;
9955 }
9956 
9957 static void mlxsw_sp2_rif_ipip_lb_deconfigure(struct mlxsw_sp_rif *rif)
9958 {
9959 	struct mlxsw_sp_rif_ipip_lb *lb_rif = mlxsw_sp_rif_ipip_lb_rif(rif);
9960 	struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
9961 	struct mlxsw_sp_rif *ul_rif;
9962 
9963 	ul_rif = mlxsw_sp_rif_by_index(mlxsw_sp, lb_rif->ul_rif_id);
9964 	mlxsw_sp_rif_ipip_lb_op(lb_rif, 0, lb_rif->ul_rif_id, false);
9965 	mlxsw_sp_ul_rif_put(ul_rif);
9966 }
9967 
9968 static const struct mlxsw_sp_rif_ops mlxsw_sp2_rif_ipip_lb_ops = {
9969 	.type			= MLXSW_SP_RIF_TYPE_IPIP_LB,
9970 	.rif_size		= sizeof(struct mlxsw_sp_rif_ipip_lb),
9971 	.setup                  = mlxsw_sp_rif_ipip_lb_setup,
9972 	.configure		= mlxsw_sp2_rif_ipip_lb_configure,
9973 	.deconfigure		= mlxsw_sp2_rif_ipip_lb_deconfigure,
9974 };
9975 
9976 static const struct mlxsw_sp_rif_ops *mlxsw_sp2_rif_ops_arr[] = {
9977 	[MLXSW_SP_RIF_TYPE_SUBPORT]	= &mlxsw_sp_rif_subport_ops,
9978 	[MLXSW_SP_RIF_TYPE_VLAN]	= &mlxsw_sp2_rif_vlan_ops,
9979 	[MLXSW_SP_RIF_TYPE_FID]		= &mlxsw_sp_rif_fid_ops,
9980 	[MLXSW_SP_RIF_TYPE_IPIP_LB]	= &mlxsw_sp2_rif_ipip_lb_ops,
9981 };
9982 
9983 static int mlxsw_sp_rifs_table_init(struct mlxsw_sp *mlxsw_sp)
9984 {
9985 	struct gen_pool *rifs_table;
9986 	int err;
9987 
9988 	rifs_table = gen_pool_create(0, -1);
9989 	if (!rifs_table)
9990 		return -ENOMEM;
9991 
9992 	gen_pool_set_algo(rifs_table, gen_pool_first_fit_order_align,
9993 			  NULL);
9994 
9995 	err = gen_pool_add(rifs_table, MLXSW_SP_ROUTER_GENALLOC_OFFSET,
9996 			   MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_RIFS), -1);
9997 	if (err)
9998 		goto err_gen_pool_add;
9999 
10000 	mlxsw_sp->router->rifs_table = rifs_table;
10001 
10002 	return 0;
10003 
10004 err_gen_pool_add:
10005 	gen_pool_destroy(rifs_table);
10006 	return err;
10007 }
10008 
10009 static void mlxsw_sp_rifs_table_fini(struct mlxsw_sp *mlxsw_sp)
10010 {
10011 	gen_pool_destroy(mlxsw_sp->router->rifs_table);
10012 }
10013 
10014 static int mlxsw_sp_rifs_init(struct mlxsw_sp *mlxsw_sp)
10015 {
10016 	u64 max_rifs = MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_RIFS);
10017 	struct devlink *devlink = priv_to_devlink(mlxsw_sp->core);
10018 	struct mlxsw_core *core = mlxsw_sp->core;
10019 	int err;
10020 
10021 	if (!MLXSW_CORE_RES_VALID(core, MAX_RIF_MAC_PROFILES))
10022 		return -EIO;
10023 	mlxsw_sp->router->max_rif_mac_profile =
10024 		MLXSW_CORE_RES_GET(core, MAX_RIF_MAC_PROFILES);
10025 
10026 	mlxsw_sp->router->rifs = kcalloc(max_rifs,
10027 					 sizeof(struct mlxsw_sp_rif *),
10028 					 GFP_KERNEL);
10029 	if (!mlxsw_sp->router->rifs)
10030 		return -ENOMEM;
10031 
10032 	err = mlxsw_sp_rifs_table_init(mlxsw_sp);
10033 	if (err)
10034 		goto err_rifs_table_init;
10035 
10036 	idr_init(&mlxsw_sp->router->rif_mac_profiles_idr);
10037 	atomic_set(&mlxsw_sp->router->rif_mac_profiles_count, 0);
10038 	atomic_set(&mlxsw_sp->router->rifs_count, 0);
10039 	devl_resource_occ_get_register(devlink,
10040 				       MLXSW_SP_RESOURCE_RIF_MAC_PROFILES,
10041 				       mlxsw_sp_rif_mac_profiles_occ_get,
10042 				       mlxsw_sp);
10043 	devl_resource_occ_get_register(devlink,
10044 				       MLXSW_SP_RESOURCE_RIFS,
10045 				       mlxsw_sp_rifs_occ_get,
10046 				       mlxsw_sp);
10047 
10048 	return 0;
10049 
10050 err_rifs_table_init:
10051 	kfree(mlxsw_sp->router->rifs);
10052 	return err;
10053 }
10054 
10055 static void mlxsw_sp_rifs_fini(struct mlxsw_sp *mlxsw_sp)
10056 {
10057 	int max_rifs = MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_RIFS);
10058 	struct devlink *devlink = priv_to_devlink(mlxsw_sp->core);
10059 	int i;
10060 
10061 	WARN_ON_ONCE(atomic_read(&mlxsw_sp->router->rifs_count));
10062 	for (i = 0; i < max_rifs; i++)
10063 		WARN_ON_ONCE(mlxsw_sp->router->rifs[i]);
10064 
10065 	devl_resource_occ_get_unregister(devlink, MLXSW_SP_RESOURCE_RIFS);
10066 	devl_resource_occ_get_unregister(devlink,
10067 					 MLXSW_SP_RESOURCE_RIF_MAC_PROFILES);
10068 	WARN_ON(!idr_is_empty(&mlxsw_sp->router->rif_mac_profiles_idr));
10069 	idr_destroy(&mlxsw_sp->router->rif_mac_profiles_idr);
10070 	mlxsw_sp_rifs_table_fini(mlxsw_sp);
10071 	kfree(mlxsw_sp->router->rifs);
10072 }
10073 
10074 static int
10075 mlxsw_sp_ipip_config_tigcr(struct mlxsw_sp *mlxsw_sp)
10076 {
10077 	char tigcr_pl[MLXSW_REG_TIGCR_LEN];
10078 
10079 	mlxsw_reg_tigcr_pack(tigcr_pl, true, 0);
10080 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(tigcr), tigcr_pl);
10081 }
10082 
10083 static int mlxsw_sp_ipips_init(struct mlxsw_sp *mlxsw_sp)
10084 {
10085 	int err;
10086 
10087 	INIT_LIST_HEAD(&mlxsw_sp->router->ipip_list);
10088 
10089 	err = mlxsw_sp_ipip_ecn_encap_init(mlxsw_sp);
10090 	if (err)
10091 		return err;
10092 	err = mlxsw_sp_ipip_ecn_decap_init(mlxsw_sp);
10093 	if (err)
10094 		return err;
10095 
10096 	return mlxsw_sp_ipip_config_tigcr(mlxsw_sp);
10097 }
10098 
10099 static int mlxsw_sp1_ipips_init(struct mlxsw_sp *mlxsw_sp)
10100 {
10101 	mlxsw_sp->router->ipip_ops_arr = mlxsw_sp1_ipip_ops_arr;
10102 	return mlxsw_sp_ipips_init(mlxsw_sp);
10103 }
10104 
10105 static int mlxsw_sp2_ipips_init(struct mlxsw_sp *mlxsw_sp)
10106 {
10107 	mlxsw_sp->router->ipip_ops_arr = mlxsw_sp2_ipip_ops_arr;
10108 	return mlxsw_sp_ipips_init(mlxsw_sp);
10109 }
10110 
10111 static void mlxsw_sp_ipips_fini(struct mlxsw_sp *mlxsw_sp)
10112 {
10113 	WARN_ON(!list_empty(&mlxsw_sp->router->ipip_list));
10114 }
10115 
10116 static void mlxsw_sp_router_fib_dump_flush(struct notifier_block *nb)
10117 {
10118 	struct mlxsw_sp_router *router;
10119 
10120 	/* Flush pending FIB notifications and then flush the device's
10121 	 * table before requesting another dump. The FIB notification
10122 	 * block is unregistered, so no need to take RTNL.
10123 	 */
10124 	mlxsw_core_flush_owq();
10125 	router = container_of(nb, struct mlxsw_sp_router, fib_nb);
10126 	mlxsw_sp_router_fib_flush(router->mlxsw_sp);
10127 }
10128 
10129 #ifdef CONFIG_IP_ROUTE_MULTIPATH
10130 struct mlxsw_sp_mp_hash_config {
10131 	DECLARE_BITMAP(headers, __MLXSW_REG_RECR2_HEADER_CNT);
10132 	DECLARE_BITMAP(fields, __MLXSW_REG_RECR2_FIELD_CNT);
10133 	DECLARE_BITMAP(inner_headers, __MLXSW_REG_RECR2_HEADER_CNT);
10134 	DECLARE_BITMAP(inner_fields, __MLXSW_REG_RECR2_INNER_FIELD_CNT);
10135 	bool inc_parsing_depth;
10136 };
10137 
10138 #define MLXSW_SP_MP_HASH_HEADER_SET(_headers, _header) \
10139 	bitmap_set(_headers, MLXSW_REG_RECR2_##_header, 1)
10140 
10141 #define MLXSW_SP_MP_HASH_FIELD_SET(_fields, _field) \
10142 	bitmap_set(_fields, MLXSW_REG_RECR2_##_field, 1)
10143 
10144 #define MLXSW_SP_MP_HASH_FIELD_RANGE_SET(_fields, _field, _nr) \
10145 	bitmap_set(_fields, MLXSW_REG_RECR2_##_field, _nr)
10146 
10147 static void mlxsw_sp_mp_hash_inner_l3(struct mlxsw_sp_mp_hash_config *config)
10148 {
10149 	unsigned long *inner_headers = config->inner_headers;
10150 	unsigned long *inner_fields = config->inner_fields;
10151 
10152 	/* IPv4 inner */
10153 	MLXSW_SP_MP_HASH_HEADER_SET(inner_headers, IPV4_EN_NOT_TCP_NOT_UDP);
10154 	MLXSW_SP_MP_HASH_HEADER_SET(inner_headers, IPV4_EN_TCP_UDP);
10155 	MLXSW_SP_MP_HASH_FIELD_RANGE_SET(inner_fields, INNER_IPV4_SIP0, 4);
10156 	MLXSW_SP_MP_HASH_FIELD_RANGE_SET(inner_fields, INNER_IPV4_DIP0, 4);
10157 	/* IPv6 inner */
10158 	MLXSW_SP_MP_HASH_HEADER_SET(inner_headers, IPV6_EN_NOT_TCP_NOT_UDP);
10159 	MLXSW_SP_MP_HASH_HEADER_SET(inner_headers, IPV6_EN_TCP_UDP);
10160 	MLXSW_SP_MP_HASH_FIELD_SET(inner_fields, INNER_IPV6_SIP0_7);
10161 	MLXSW_SP_MP_HASH_FIELD_RANGE_SET(inner_fields, INNER_IPV6_SIP8, 8);
10162 	MLXSW_SP_MP_HASH_FIELD_SET(inner_fields, INNER_IPV6_DIP0_7);
10163 	MLXSW_SP_MP_HASH_FIELD_RANGE_SET(inner_fields, INNER_IPV6_DIP8, 8);
10164 	MLXSW_SP_MP_HASH_FIELD_SET(inner_fields, INNER_IPV6_NEXT_HEADER);
10165 	MLXSW_SP_MP_HASH_FIELD_SET(inner_fields, INNER_IPV6_FLOW_LABEL);
10166 }
10167 
10168 static void mlxsw_sp_mp4_hash_outer_addr(struct mlxsw_sp_mp_hash_config *config)
10169 {
10170 	unsigned long *headers = config->headers;
10171 	unsigned long *fields = config->fields;
10172 
10173 	MLXSW_SP_MP_HASH_HEADER_SET(headers, IPV4_EN_NOT_TCP_NOT_UDP);
10174 	MLXSW_SP_MP_HASH_HEADER_SET(headers, IPV4_EN_TCP_UDP);
10175 	MLXSW_SP_MP_HASH_FIELD_RANGE_SET(fields, IPV4_SIP0, 4);
10176 	MLXSW_SP_MP_HASH_FIELD_RANGE_SET(fields, IPV4_DIP0, 4);
10177 }
10178 
10179 static void
10180 mlxsw_sp_mp_hash_inner_custom(struct mlxsw_sp_mp_hash_config *config,
10181 			      u32 hash_fields)
10182 {
10183 	unsigned long *inner_headers = config->inner_headers;
10184 	unsigned long *inner_fields = config->inner_fields;
10185 
10186 	/* IPv4 Inner */
10187 	MLXSW_SP_MP_HASH_HEADER_SET(inner_headers, IPV4_EN_NOT_TCP_NOT_UDP);
10188 	MLXSW_SP_MP_HASH_HEADER_SET(inner_headers, IPV4_EN_TCP_UDP);
10189 	if (hash_fields & FIB_MULTIPATH_HASH_FIELD_INNER_SRC_IP)
10190 		MLXSW_SP_MP_HASH_FIELD_RANGE_SET(inner_fields, INNER_IPV4_SIP0, 4);
10191 	if (hash_fields & FIB_MULTIPATH_HASH_FIELD_INNER_DST_IP)
10192 		MLXSW_SP_MP_HASH_FIELD_RANGE_SET(inner_fields, INNER_IPV4_DIP0, 4);
10193 	if (hash_fields & FIB_MULTIPATH_HASH_FIELD_INNER_IP_PROTO)
10194 		MLXSW_SP_MP_HASH_FIELD_SET(inner_fields, INNER_IPV4_PROTOCOL);
10195 	/* IPv6 inner */
10196 	MLXSW_SP_MP_HASH_HEADER_SET(inner_headers, IPV6_EN_NOT_TCP_NOT_UDP);
10197 	MLXSW_SP_MP_HASH_HEADER_SET(inner_headers, IPV6_EN_TCP_UDP);
10198 	if (hash_fields & FIB_MULTIPATH_HASH_FIELD_INNER_SRC_IP) {
10199 		MLXSW_SP_MP_HASH_FIELD_SET(inner_fields, INNER_IPV6_SIP0_7);
10200 		MLXSW_SP_MP_HASH_FIELD_RANGE_SET(inner_fields, INNER_IPV6_SIP8, 8);
10201 	}
10202 	if (hash_fields & FIB_MULTIPATH_HASH_FIELD_INNER_DST_IP) {
10203 		MLXSW_SP_MP_HASH_FIELD_SET(inner_fields, INNER_IPV6_DIP0_7);
10204 		MLXSW_SP_MP_HASH_FIELD_RANGE_SET(inner_fields, INNER_IPV6_DIP8, 8);
10205 	}
10206 	if (hash_fields & FIB_MULTIPATH_HASH_FIELD_INNER_IP_PROTO)
10207 		MLXSW_SP_MP_HASH_FIELD_SET(inner_fields, INNER_IPV6_NEXT_HEADER);
10208 	if (hash_fields & FIB_MULTIPATH_HASH_FIELD_INNER_FLOWLABEL)
10209 		MLXSW_SP_MP_HASH_FIELD_SET(inner_fields, INNER_IPV6_FLOW_LABEL);
10210 	/* L4 inner */
10211 	MLXSW_SP_MP_HASH_HEADER_SET(inner_headers, TCP_UDP_EN_IPV4);
10212 	MLXSW_SP_MP_HASH_HEADER_SET(inner_headers, TCP_UDP_EN_IPV6);
10213 	if (hash_fields & FIB_MULTIPATH_HASH_FIELD_INNER_SRC_PORT)
10214 		MLXSW_SP_MP_HASH_FIELD_SET(inner_fields, INNER_TCP_UDP_SPORT);
10215 	if (hash_fields & FIB_MULTIPATH_HASH_FIELD_INNER_DST_PORT)
10216 		MLXSW_SP_MP_HASH_FIELD_SET(inner_fields, INNER_TCP_UDP_DPORT);
10217 }
10218 
10219 static void mlxsw_sp_mp4_hash_init(struct mlxsw_sp *mlxsw_sp,
10220 				   struct mlxsw_sp_mp_hash_config *config)
10221 {
10222 	struct net *net = mlxsw_sp_net(mlxsw_sp);
10223 	unsigned long *headers = config->headers;
10224 	unsigned long *fields = config->fields;
10225 	u32 hash_fields;
10226 
10227 	switch (READ_ONCE(net->ipv4.sysctl_fib_multipath_hash_policy)) {
10228 	case 0:
10229 		mlxsw_sp_mp4_hash_outer_addr(config);
10230 		break;
10231 	case 1:
10232 		mlxsw_sp_mp4_hash_outer_addr(config);
10233 		MLXSW_SP_MP_HASH_HEADER_SET(headers, TCP_UDP_EN_IPV4);
10234 		MLXSW_SP_MP_HASH_FIELD_SET(fields, IPV4_PROTOCOL);
10235 		MLXSW_SP_MP_HASH_FIELD_SET(fields, TCP_UDP_SPORT);
10236 		MLXSW_SP_MP_HASH_FIELD_SET(fields, TCP_UDP_DPORT);
10237 		break;
10238 	case 2:
10239 		/* Outer */
10240 		mlxsw_sp_mp4_hash_outer_addr(config);
10241 		/* Inner */
10242 		mlxsw_sp_mp_hash_inner_l3(config);
10243 		break;
10244 	case 3:
10245 		hash_fields = READ_ONCE(net->ipv4.sysctl_fib_multipath_hash_fields);
10246 		/* Outer */
10247 		MLXSW_SP_MP_HASH_HEADER_SET(headers, IPV4_EN_NOT_TCP_NOT_UDP);
10248 		MLXSW_SP_MP_HASH_HEADER_SET(headers, IPV4_EN_TCP_UDP);
10249 		MLXSW_SP_MP_HASH_HEADER_SET(headers, TCP_UDP_EN_IPV4);
10250 		if (hash_fields & FIB_MULTIPATH_HASH_FIELD_SRC_IP)
10251 			MLXSW_SP_MP_HASH_FIELD_RANGE_SET(fields, IPV4_SIP0, 4);
10252 		if (hash_fields & FIB_MULTIPATH_HASH_FIELD_DST_IP)
10253 			MLXSW_SP_MP_HASH_FIELD_RANGE_SET(fields, IPV4_DIP0, 4);
10254 		if (hash_fields & FIB_MULTIPATH_HASH_FIELD_IP_PROTO)
10255 			MLXSW_SP_MP_HASH_FIELD_SET(fields, IPV4_PROTOCOL);
10256 		if (hash_fields & FIB_MULTIPATH_HASH_FIELD_SRC_PORT)
10257 			MLXSW_SP_MP_HASH_FIELD_SET(fields, TCP_UDP_SPORT);
10258 		if (hash_fields & FIB_MULTIPATH_HASH_FIELD_DST_PORT)
10259 			MLXSW_SP_MP_HASH_FIELD_SET(fields, TCP_UDP_DPORT);
10260 		/* Inner */
10261 		mlxsw_sp_mp_hash_inner_custom(config, hash_fields);
10262 		break;
10263 	}
10264 }
10265 
10266 static void mlxsw_sp_mp6_hash_outer_addr(struct mlxsw_sp_mp_hash_config *config)
10267 {
10268 	unsigned long *headers = config->headers;
10269 	unsigned long *fields = config->fields;
10270 
10271 	MLXSW_SP_MP_HASH_HEADER_SET(headers, IPV6_EN_NOT_TCP_NOT_UDP);
10272 	MLXSW_SP_MP_HASH_HEADER_SET(headers, IPV6_EN_TCP_UDP);
10273 	MLXSW_SP_MP_HASH_FIELD_SET(fields, IPV6_SIP0_7);
10274 	MLXSW_SP_MP_HASH_FIELD_RANGE_SET(fields, IPV6_SIP8, 8);
10275 	MLXSW_SP_MP_HASH_FIELD_SET(fields, IPV6_DIP0_7);
10276 	MLXSW_SP_MP_HASH_FIELD_RANGE_SET(fields, IPV6_DIP8, 8);
10277 }
10278 
10279 static void mlxsw_sp_mp6_hash_init(struct mlxsw_sp *mlxsw_sp,
10280 				   struct mlxsw_sp_mp_hash_config *config)
10281 {
10282 	u32 hash_fields = ip6_multipath_hash_fields(mlxsw_sp_net(mlxsw_sp));
10283 	unsigned long *headers = config->headers;
10284 	unsigned long *fields = config->fields;
10285 
10286 	switch (ip6_multipath_hash_policy(mlxsw_sp_net(mlxsw_sp))) {
10287 	case 0:
10288 		mlxsw_sp_mp6_hash_outer_addr(config);
10289 		MLXSW_SP_MP_HASH_FIELD_SET(fields, IPV6_NEXT_HEADER);
10290 		MLXSW_SP_MP_HASH_FIELD_SET(fields, IPV6_FLOW_LABEL);
10291 		break;
10292 	case 1:
10293 		mlxsw_sp_mp6_hash_outer_addr(config);
10294 		MLXSW_SP_MP_HASH_HEADER_SET(headers, TCP_UDP_EN_IPV6);
10295 		MLXSW_SP_MP_HASH_FIELD_SET(fields, IPV6_NEXT_HEADER);
10296 		MLXSW_SP_MP_HASH_FIELD_SET(fields, TCP_UDP_SPORT);
10297 		MLXSW_SP_MP_HASH_FIELD_SET(fields, TCP_UDP_DPORT);
10298 		break;
10299 	case 2:
10300 		/* Outer */
10301 		mlxsw_sp_mp6_hash_outer_addr(config);
10302 		MLXSW_SP_MP_HASH_FIELD_SET(fields, IPV6_NEXT_HEADER);
10303 		MLXSW_SP_MP_HASH_FIELD_SET(fields, IPV6_FLOW_LABEL);
10304 		/* Inner */
10305 		mlxsw_sp_mp_hash_inner_l3(config);
10306 		config->inc_parsing_depth = true;
10307 		break;
10308 	case 3:
10309 		/* Outer */
10310 		MLXSW_SP_MP_HASH_HEADER_SET(headers, IPV6_EN_NOT_TCP_NOT_UDP);
10311 		MLXSW_SP_MP_HASH_HEADER_SET(headers, IPV6_EN_TCP_UDP);
10312 		MLXSW_SP_MP_HASH_HEADER_SET(headers, TCP_UDP_EN_IPV6);
10313 		if (hash_fields & FIB_MULTIPATH_HASH_FIELD_SRC_IP) {
10314 			MLXSW_SP_MP_HASH_FIELD_SET(fields, IPV6_SIP0_7);
10315 			MLXSW_SP_MP_HASH_FIELD_RANGE_SET(fields, IPV6_SIP8, 8);
10316 		}
10317 		if (hash_fields & FIB_MULTIPATH_HASH_FIELD_DST_IP) {
10318 			MLXSW_SP_MP_HASH_FIELD_SET(fields, IPV6_DIP0_7);
10319 			MLXSW_SP_MP_HASH_FIELD_RANGE_SET(fields, IPV6_DIP8, 8);
10320 		}
10321 		if (hash_fields & FIB_MULTIPATH_HASH_FIELD_IP_PROTO)
10322 			MLXSW_SP_MP_HASH_FIELD_SET(fields, IPV6_NEXT_HEADER);
10323 		if (hash_fields & FIB_MULTIPATH_HASH_FIELD_FLOWLABEL)
10324 			MLXSW_SP_MP_HASH_FIELD_SET(fields, IPV6_FLOW_LABEL);
10325 		if (hash_fields & FIB_MULTIPATH_HASH_FIELD_SRC_PORT)
10326 			MLXSW_SP_MP_HASH_FIELD_SET(fields, TCP_UDP_SPORT);
10327 		if (hash_fields & FIB_MULTIPATH_HASH_FIELD_DST_PORT)
10328 			MLXSW_SP_MP_HASH_FIELD_SET(fields, TCP_UDP_DPORT);
10329 		/* Inner */
10330 		mlxsw_sp_mp_hash_inner_custom(config, hash_fields);
10331 		if (hash_fields & FIB_MULTIPATH_HASH_FIELD_INNER_MASK)
10332 			config->inc_parsing_depth = true;
10333 		break;
10334 	}
10335 }
10336 
10337 static int mlxsw_sp_mp_hash_parsing_depth_adjust(struct mlxsw_sp *mlxsw_sp,
10338 						 bool old_inc_parsing_depth,
10339 						 bool new_inc_parsing_depth)
10340 {
10341 	int err;
10342 
10343 	if (!old_inc_parsing_depth && new_inc_parsing_depth) {
10344 		err = mlxsw_sp_parsing_depth_inc(mlxsw_sp);
10345 		if (err)
10346 			return err;
10347 		mlxsw_sp->router->inc_parsing_depth = true;
10348 	} else if (old_inc_parsing_depth && !new_inc_parsing_depth) {
10349 		mlxsw_sp_parsing_depth_dec(mlxsw_sp);
10350 		mlxsw_sp->router->inc_parsing_depth = false;
10351 	}
10352 
10353 	return 0;
10354 }
10355 
10356 static int mlxsw_sp_mp_hash_init(struct mlxsw_sp *mlxsw_sp)
10357 {
10358 	bool old_inc_parsing_depth, new_inc_parsing_depth;
10359 	struct mlxsw_sp_mp_hash_config config = {};
10360 	char recr2_pl[MLXSW_REG_RECR2_LEN];
10361 	unsigned long bit;
10362 	u32 seed;
10363 	int err;
10364 
10365 	seed = jhash(mlxsw_sp->base_mac, sizeof(mlxsw_sp->base_mac), 0);
10366 	mlxsw_reg_recr2_pack(recr2_pl, seed);
10367 	mlxsw_sp_mp4_hash_init(mlxsw_sp, &config);
10368 	mlxsw_sp_mp6_hash_init(mlxsw_sp, &config);
10369 
10370 	old_inc_parsing_depth = mlxsw_sp->router->inc_parsing_depth;
10371 	new_inc_parsing_depth = config.inc_parsing_depth;
10372 	err = mlxsw_sp_mp_hash_parsing_depth_adjust(mlxsw_sp,
10373 						    old_inc_parsing_depth,
10374 						    new_inc_parsing_depth);
10375 	if (err)
10376 		return err;
10377 
10378 	for_each_set_bit(bit, config.headers, __MLXSW_REG_RECR2_HEADER_CNT)
10379 		mlxsw_reg_recr2_outer_header_enables_set(recr2_pl, bit, 1);
10380 	for_each_set_bit(bit, config.fields, __MLXSW_REG_RECR2_FIELD_CNT)
10381 		mlxsw_reg_recr2_outer_header_fields_enable_set(recr2_pl, bit, 1);
10382 	for_each_set_bit(bit, config.inner_headers, __MLXSW_REG_RECR2_HEADER_CNT)
10383 		mlxsw_reg_recr2_inner_header_enables_set(recr2_pl, bit, 1);
10384 	for_each_set_bit(bit, config.inner_fields, __MLXSW_REG_RECR2_INNER_FIELD_CNT)
10385 		mlxsw_reg_recr2_inner_header_fields_enable_set(recr2_pl, bit, 1);
10386 
10387 	err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(recr2), recr2_pl);
10388 	if (err)
10389 		goto err_reg_write;
10390 
10391 	return 0;
10392 
10393 err_reg_write:
10394 	mlxsw_sp_mp_hash_parsing_depth_adjust(mlxsw_sp, new_inc_parsing_depth,
10395 					      old_inc_parsing_depth);
10396 	return err;
10397 }
10398 
10399 static void mlxsw_sp_mp_hash_fini(struct mlxsw_sp *mlxsw_sp)
10400 {
10401 	bool old_inc_parsing_depth = mlxsw_sp->router->inc_parsing_depth;
10402 
10403 	mlxsw_sp_mp_hash_parsing_depth_adjust(mlxsw_sp, old_inc_parsing_depth,
10404 					      false);
10405 }
10406 #else
10407 static int mlxsw_sp_mp_hash_init(struct mlxsw_sp *mlxsw_sp)
10408 {
10409 	return 0;
10410 }
10411 
10412 static void mlxsw_sp_mp_hash_fini(struct mlxsw_sp *mlxsw_sp)
10413 {
10414 }
10415 #endif
10416 
10417 static int mlxsw_sp_dscp_init(struct mlxsw_sp *mlxsw_sp)
10418 {
10419 	char rdpm_pl[MLXSW_REG_RDPM_LEN];
10420 	unsigned int i;
10421 
10422 	MLXSW_REG_ZERO(rdpm, rdpm_pl);
10423 
10424 	/* HW is determining switch priority based on DSCP-bits, but the
10425 	 * kernel is still doing that based on the ToS. Since there's a
10426 	 * mismatch in bits we need to make sure to translate the right
10427 	 * value ToS would observe, skipping the 2 least-significant ECN bits.
10428 	 */
10429 	for (i = 0; i < MLXSW_REG_RDPM_DSCP_ENTRY_REC_MAX_COUNT; i++)
10430 		mlxsw_reg_rdpm_pack(rdpm_pl, i, rt_tos2priority(i << 2));
10431 
10432 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rdpm), rdpm_pl);
10433 }
10434 
10435 static int __mlxsw_sp_router_init(struct mlxsw_sp *mlxsw_sp)
10436 {
10437 	struct net *net = mlxsw_sp_net(mlxsw_sp);
10438 	char rgcr_pl[MLXSW_REG_RGCR_LEN];
10439 	u64 max_rifs;
10440 	bool usp;
10441 
10442 	if (!MLXSW_CORE_RES_VALID(mlxsw_sp->core, MAX_RIFS))
10443 		return -EIO;
10444 	max_rifs = MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_RIFS);
10445 	usp = READ_ONCE(net->ipv4.sysctl_ip_fwd_update_priority);
10446 
10447 	mlxsw_reg_rgcr_pack(rgcr_pl, true, true);
10448 	mlxsw_reg_rgcr_max_router_interfaces_set(rgcr_pl, max_rifs);
10449 	mlxsw_reg_rgcr_usp_set(rgcr_pl, usp);
10450 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rgcr), rgcr_pl);
10451 }
10452 
10453 static void __mlxsw_sp_router_fini(struct mlxsw_sp *mlxsw_sp)
10454 {
10455 	char rgcr_pl[MLXSW_REG_RGCR_LEN];
10456 
10457 	mlxsw_reg_rgcr_pack(rgcr_pl, false, false);
10458 	mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rgcr), rgcr_pl);
10459 }
10460 
10461 static int mlxsw_sp_lb_rif_init(struct mlxsw_sp *mlxsw_sp)
10462 {
10463 	u16 lb_rif_index;
10464 	int err;
10465 
10466 	/* Create a generic loopback RIF associated with the main table
10467 	 * (default VRF). Any table can be used, but the main table exists
10468 	 * anyway, so we do not waste resources.
10469 	 */
10470 	err = mlxsw_sp_router_ul_rif_get(mlxsw_sp, RT_TABLE_MAIN,
10471 					 &lb_rif_index);
10472 	if (err)
10473 		return err;
10474 
10475 	mlxsw_sp->router->lb_rif_index = lb_rif_index;
10476 
10477 	return 0;
10478 }
10479 
10480 static void mlxsw_sp_lb_rif_fini(struct mlxsw_sp *mlxsw_sp)
10481 {
10482 	mlxsw_sp_router_ul_rif_put(mlxsw_sp, mlxsw_sp->router->lb_rif_index);
10483 }
10484 
10485 static int mlxsw_sp1_router_init(struct mlxsw_sp *mlxsw_sp)
10486 {
10487 	size_t size_ranges_count = ARRAY_SIZE(mlxsw_sp1_adj_grp_size_ranges);
10488 
10489 	mlxsw_sp->router->rif_ops_arr = mlxsw_sp1_rif_ops_arr;
10490 	mlxsw_sp->router->adj_grp_size_ranges = mlxsw_sp1_adj_grp_size_ranges;
10491 	mlxsw_sp->router->adj_grp_size_ranges_count = size_ranges_count;
10492 
10493 	return 0;
10494 }
10495 
10496 const struct mlxsw_sp_router_ops mlxsw_sp1_router_ops = {
10497 	.init = mlxsw_sp1_router_init,
10498 	.ipips_init = mlxsw_sp1_ipips_init,
10499 };
10500 
10501 static int mlxsw_sp2_router_init(struct mlxsw_sp *mlxsw_sp)
10502 {
10503 	size_t size_ranges_count = ARRAY_SIZE(mlxsw_sp2_adj_grp_size_ranges);
10504 
10505 	mlxsw_sp->router->rif_ops_arr = mlxsw_sp2_rif_ops_arr;
10506 	mlxsw_sp->router->adj_grp_size_ranges = mlxsw_sp2_adj_grp_size_ranges;
10507 	mlxsw_sp->router->adj_grp_size_ranges_count = size_ranges_count;
10508 
10509 	return 0;
10510 }
10511 
10512 const struct mlxsw_sp_router_ops mlxsw_sp2_router_ops = {
10513 	.init = mlxsw_sp2_router_init,
10514 	.ipips_init = mlxsw_sp2_ipips_init,
10515 };
10516 
10517 int mlxsw_sp_router_init(struct mlxsw_sp *mlxsw_sp,
10518 			 struct netlink_ext_ack *extack)
10519 {
10520 	struct mlxsw_sp_router *router;
10521 	struct notifier_block *nb;
10522 	int err;
10523 
10524 	router = kzalloc(sizeof(*mlxsw_sp->router), GFP_KERNEL);
10525 	if (!router)
10526 		return -ENOMEM;
10527 	mutex_init(&router->lock);
10528 	mlxsw_sp->router = router;
10529 	router->mlxsw_sp = mlxsw_sp;
10530 
10531 	err = mlxsw_sp->router_ops->init(mlxsw_sp);
10532 	if (err)
10533 		goto err_router_ops_init;
10534 
10535 	INIT_LIST_HEAD(&mlxsw_sp->router->nh_res_grp_list);
10536 	INIT_DELAYED_WORK(&mlxsw_sp->router->nh_grp_activity_dw,
10537 			  mlxsw_sp_nh_grp_activity_work);
10538 	INIT_LIST_HEAD(&mlxsw_sp->router->nexthop_neighs_list);
10539 	err = __mlxsw_sp_router_init(mlxsw_sp);
10540 	if (err)
10541 		goto err_router_init;
10542 
10543 	err = mlxsw_sp_rifs_init(mlxsw_sp);
10544 	if (err)
10545 		goto err_rifs_init;
10546 
10547 	err = mlxsw_sp->router_ops->ipips_init(mlxsw_sp);
10548 	if (err)
10549 		goto err_ipips_init;
10550 
10551 	err = rhashtable_init(&mlxsw_sp->router->nexthop_ht,
10552 			      &mlxsw_sp_nexthop_ht_params);
10553 	if (err)
10554 		goto err_nexthop_ht_init;
10555 
10556 	err = rhashtable_init(&mlxsw_sp->router->nexthop_group_ht,
10557 			      &mlxsw_sp_nexthop_group_ht_params);
10558 	if (err)
10559 		goto err_nexthop_group_ht_init;
10560 
10561 	INIT_LIST_HEAD(&mlxsw_sp->router->nexthop_list);
10562 	err = mlxsw_sp_lpm_init(mlxsw_sp);
10563 	if (err)
10564 		goto err_lpm_init;
10565 
10566 	err = mlxsw_sp_mr_init(mlxsw_sp, &mlxsw_sp_mr_tcam_ops);
10567 	if (err)
10568 		goto err_mr_init;
10569 
10570 	err = mlxsw_sp_vrs_init(mlxsw_sp);
10571 	if (err)
10572 		goto err_vrs_init;
10573 
10574 	err = mlxsw_sp_lb_rif_init(mlxsw_sp);
10575 	if (err)
10576 		goto err_lb_rif_init;
10577 
10578 	err = mlxsw_sp_neigh_init(mlxsw_sp);
10579 	if (err)
10580 		goto err_neigh_init;
10581 
10582 	err = mlxsw_sp_mp_hash_init(mlxsw_sp);
10583 	if (err)
10584 		goto err_mp_hash_init;
10585 
10586 	err = mlxsw_sp_dscp_init(mlxsw_sp);
10587 	if (err)
10588 		goto err_dscp_init;
10589 
10590 	router->inetaddr_nb.notifier_call = mlxsw_sp_inetaddr_event;
10591 	err = register_inetaddr_notifier(&router->inetaddr_nb);
10592 	if (err)
10593 		goto err_register_inetaddr_notifier;
10594 
10595 	router->inet6addr_nb.notifier_call = mlxsw_sp_inet6addr_event;
10596 	err = register_inet6addr_notifier(&router->inet6addr_nb);
10597 	if (err)
10598 		goto err_register_inet6addr_notifier;
10599 
10600 	router->inetaddr_valid_nb.notifier_call = mlxsw_sp_inetaddr_valid_event;
10601 	err = register_inetaddr_validator_notifier(&router->inetaddr_valid_nb);
10602 	if (err)
10603 		goto err_register_inetaddr_valid_notifier;
10604 
10605 	nb = &router->inet6addr_valid_nb;
10606 	nb->notifier_call = mlxsw_sp_inet6addr_valid_event;
10607 	err = register_inet6addr_validator_notifier(nb);
10608 	if (err)
10609 		goto err_register_inet6addr_valid_notifier;
10610 
10611 	mlxsw_sp->router->netevent_nb.notifier_call =
10612 		mlxsw_sp_router_netevent_event;
10613 	err = register_netevent_notifier(&mlxsw_sp->router->netevent_nb);
10614 	if (err)
10615 		goto err_register_netevent_notifier;
10616 
10617 	mlxsw_sp->router->nexthop_nb.notifier_call =
10618 		mlxsw_sp_nexthop_obj_event;
10619 	err = register_nexthop_notifier(mlxsw_sp_net(mlxsw_sp),
10620 					&mlxsw_sp->router->nexthop_nb,
10621 					extack);
10622 	if (err)
10623 		goto err_register_nexthop_notifier;
10624 
10625 	mlxsw_sp->router->fib_nb.notifier_call = mlxsw_sp_router_fib_event;
10626 	err = register_fib_notifier(mlxsw_sp_net(mlxsw_sp),
10627 				    &mlxsw_sp->router->fib_nb,
10628 				    mlxsw_sp_router_fib_dump_flush, extack);
10629 	if (err)
10630 		goto err_register_fib_notifier;
10631 
10632 	mlxsw_sp->router->netdevice_nb.notifier_call =
10633 		mlxsw_sp_router_netdevice_event;
10634 	err = register_netdevice_notifier_net(mlxsw_sp_net(mlxsw_sp),
10635 					      &mlxsw_sp->router->netdevice_nb);
10636 	if (err)
10637 		goto err_register_netdev_notifier;
10638 
10639 	return 0;
10640 
10641 err_register_netdev_notifier:
10642 	unregister_fib_notifier(mlxsw_sp_net(mlxsw_sp),
10643 				&mlxsw_sp->router->fib_nb);
10644 err_register_fib_notifier:
10645 	unregister_nexthop_notifier(mlxsw_sp_net(mlxsw_sp),
10646 				    &mlxsw_sp->router->nexthop_nb);
10647 err_register_nexthop_notifier:
10648 	unregister_netevent_notifier(&mlxsw_sp->router->netevent_nb);
10649 err_register_netevent_notifier:
10650 	unregister_inet6addr_validator_notifier(&router->inet6addr_valid_nb);
10651 err_register_inet6addr_valid_notifier:
10652 	unregister_inetaddr_validator_notifier(&router->inetaddr_valid_nb);
10653 err_register_inetaddr_valid_notifier:
10654 	unregister_inet6addr_notifier(&router->inet6addr_nb);
10655 err_register_inet6addr_notifier:
10656 	unregister_inetaddr_notifier(&router->inetaddr_nb);
10657 err_register_inetaddr_notifier:
10658 	mlxsw_core_flush_owq();
10659 err_dscp_init:
10660 	mlxsw_sp_mp_hash_fini(mlxsw_sp);
10661 err_mp_hash_init:
10662 	mlxsw_sp_neigh_fini(mlxsw_sp);
10663 err_neigh_init:
10664 	mlxsw_sp_lb_rif_fini(mlxsw_sp);
10665 err_lb_rif_init:
10666 	mlxsw_sp_vrs_fini(mlxsw_sp);
10667 err_vrs_init:
10668 	mlxsw_sp_mr_fini(mlxsw_sp);
10669 err_mr_init:
10670 	mlxsw_sp_lpm_fini(mlxsw_sp);
10671 err_lpm_init:
10672 	rhashtable_destroy(&mlxsw_sp->router->nexthop_group_ht);
10673 err_nexthop_group_ht_init:
10674 	rhashtable_destroy(&mlxsw_sp->router->nexthop_ht);
10675 err_nexthop_ht_init:
10676 	mlxsw_sp_ipips_fini(mlxsw_sp);
10677 err_ipips_init:
10678 	mlxsw_sp_rifs_fini(mlxsw_sp);
10679 err_rifs_init:
10680 	__mlxsw_sp_router_fini(mlxsw_sp);
10681 err_router_init:
10682 	cancel_delayed_work_sync(&mlxsw_sp->router->nh_grp_activity_dw);
10683 err_router_ops_init:
10684 	mutex_destroy(&mlxsw_sp->router->lock);
10685 	kfree(mlxsw_sp->router);
10686 	return err;
10687 }
10688 
10689 void mlxsw_sp_router_fini(struct mlxsw_sp *mlxsw_sp)
10690 {
10691 	struct mlxsw_sp_router *router = mlxsw_sp->router;
10692 
10693 	unregister_netdevice_notifier_net(mlxsw_sp_net(mlxsw_sp),
10694 					  &router->netdevice_nb);
10695 	unregister_fib_notifier(mlxsw_sp_net(mlxsw_sp), &router->fib_nb);
10696 	unregister_nexthop_notifier(mlxsw_sp_net(mlxsw_sp),
10697 				    &router->nexthop_nb);
10698 	unregister_netevent_notifier(&router->netevent_nb);
10699 	unregister_inet6addr_validator_notifier(&router->inet6addr_valid_nb);
10700 	unregister_inetaddr_validator_notifier(&router->inetaddr_valid_nb);
10701 	unregister_inet6addr_notifier(&router->inet6addr_nb);
10702 	unregister_inetaddr_notifier(&router->inetaddr_nb);
10703 	mlxsw_core_flush_owq();
10704 	mlxsw_sp_mp_hash_fini(mlxsw_sp);
10705 	mlxsw_sp_neigh_fini(mlxsw_sp);
10706 	mlxsw_sp_lb_rif_fini(mlxsw_sp);
10707 	mlxsw_sp_vrs_fini(mlxsw_sp);
10708 	mlxsw_sp_mr_fini(mlxsw_sp);
10709 	mlxsw_sp_lpm_fini(mlxsw_sp);
10710 	rhashtable_destroy(&router->nexthop_group_ht);
10711 	rhashtable_destroy(&router->nexthop_ht);
10712 	mlxsw_sp_ipips_fini(mlxsw_sp);
10713 	mlxsw_sp_rifs_fini(mlxsw_sp);
10714 	__mlxsw_sp_router_fini(mlxsw_sp);
10715 	cancel_delayed_work_sync(&router->nh_grp_activity_dw);
10716 	mutex_destroy(&router->lock);
10717 	kfree(router);
10718 }
10719