1 /*
2  * drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
3  * Copyright (c) 2016-2017 Mellanox Technologies. All rights reserved.
4  * Copyright (c) 2016 Jiri Pirko <jiri@mellanox.com>
5  * Copyright (c) 2016 Ido Schimmel <idosch@mellanox.com>
6  * Copyright (c) 2016 Yotam Gigi <yotamg@mellanox.com>
7  * Copyright (c) 2017 Petr Machata <petrm@mellanox.com>
8  *
9  * Redistribution and use in source and binary forms, with or without
10  * modification, are permitted provided that the following conditions are met:
11  *
12  * 1. Redistributions of source code must retain the above copyright
13  *    notice, this list of conditions and the following disclaimer.
14  * 2. Redistributions in binary form must reproduce the above copyright
15  *    notice, this list of conditions and the following disclaimer in the
16  *    documentation and/or other materials provided with the distribution.
17  * 3. Neither the names of the copyright holders nor the names of its
18  *    contributors may be used to endorse or promote products derived from
19  *    this software without specific prior written permission.
20  *
21  * Alternatively, this software may be distributed under the terms of the
22  * GNU General Public License ("GPL") version 2 as published by the Free
23  * Software Foundation.
24  *
25  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
26  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
28  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
29  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
30  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
31  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
32  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
33  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
34  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
35  * POSSIBILITY OF SUCH DAMAGE.
36  */
37 
38 #include <linux/kernel.h>
39 #include <linux/types.h>
40 #include <linux/rhashtable.h>
41 #include <linux/bitops.h>
42 #include <linux/in6.h>
43 #include <linux/notifier.h>
44 #include <linux/inetdevice.h>
45 #include <linux/netdevice.h>
46 #include <linux/if_bridge.h>
47 #include <linux/socket.h>
48 #include <linux/route.h>
49 #include <linux/gcd.h>
50 #include <linux/random.h>
51 #include <net/netevent.h>
52 #include <net/neighbour.h>
53 #include <net/arp.h>
54 #include <net/ip_fib.h>
55 #include <net/ip6_fib.h>
56 #include <net/fib_rules.h>
57 #include <net/ip_tunnels.h>
58 #include <net/l3mdev.h>
59 #include <net/addrconf.h>
60 #include <net/ndisc.h>
61 #include <net/ipv6.h>
62 #include <net/fib_notifier.h>
63 
64 #include "spectrum.h"
65 #include "core.h"
66 #include "reg.h"
67 #include "spectrum_cnt.h"
68 #include "spectrum_dpipe.h"
69 #include "spectrum_ipip.h"
70 #include "spectrum_mr.h"
71 #include "spectrum_mr_tcam.h"
72 #include "spectrum_router.h"
73 
74 struct mlxsw_sp_vr;
75 struct mlxsw_sp_lpm_tree;
76 struct mlxsw_sp_rif_ops;
77 
78 struct mlxsw_sp_router {
79 	struct mlxsw_sp *mlxsw_sp;
80 	struct mlxsw_sp_rif **rifs;
81 	struct mlxsw_sp_vr *vrs;
82 	struct rhashtable neigh_ht;
83 	struct rhashtable nexthop_group_ht;
84 	struct rhashtable nexthop_ht;
85 	struct list_head nexthop_list;
86 	struct {
87 		struct mlxsw_sp_lpm_tree *trees;
88 		unsigned int tree_count;
89 	} lpm;
90 	struct {
91 		struct delayed_work dw;
92 		unsigned long interval;	/* ms */
93 	} neighs_update;
94 	struct delayed_work nexthop_probe_dw;
95 #define MLXSW_SP_UNRESOLVED_NH_PROBE_INTERVAL 5000 /* ms */
96 	struct list_head nexthop_neighs_list;
97 	struct list_head ipip_list;
98 	bool aborted;
99 	struct notifier_block fib_nb;
100 	struct notifier_block netevent_nb;
101 	const struct mlxsw_sp_rif_ops **rif_ops_arr;
102 	const struct mlxsw_sp_ipip_ops **ipip_ops_arr;
103 };
104 
105 struct mlxsw_sp_rif {
106 	struct list_head nexthop_list;
107 	struct list_head neigh_list;
108 	struct net_device *dev;
109 	struct mlxsw_sp_fid *fid;
110 	unsigned char addr[ETH_ALEN];
111 	int mtu;
112 	u16 rif_index;
113 	u16 vr_id;
114 	const struct mlxsw_sp_rif_ops *ops;
115 	struct mlxsw_sp *mlxsw_sp;
116 
117 	unsigned int counter_ingress;
118 	bool counter_ingress_valid;
119 	unsigned int counter_egress;
120 	bool counter_egress_valid;
121 };
122 
123 struct mlxsw_sp_rif_params {
124 	struct net_device *dev;
125 	union {
126 		u16 system_port;
127 		u16 lag_id;
128 	};
129 	u16 vid;
130 	bool lag;
131 };
132 
133 struct mlxsw_sp_rif_subport {
134 	struct mlxsw_sp_rif common;
135 	union {
136 		u16 system_port;
137 		u16 lag_id;
138 	};
139 	u16 vid;
140 	bool lag;
141 };
142 
143 struct mlxsw_sp_rif_ipip_lb {
144 	struct mlxsw_sp_rif common;
145 	struct mlxsw_sp_rif_ipip_lb_config lb_config;
146 	u16 ul_vr_id; /* Reserved for Spectrum-2. */
147 };
148 
149 struct mlxsw_sp_rif_params_ipip_lb {
150 	struct mlxsw_sp_rif_params common;
151 	struct mlxsw_sp_rif_ipip_lb_config lb_config;
152 };
153 
154 struct mlxsw_sp_rif_ops {
155 	enum mlxsw_sp_rif_type type;
156 	size_t rif_size;
157 
158 	void (*setup)(struct mlxsw_sp_rif *rif,
159 		      const struct mlxsw_sp_rif_params *params);
160 	int (*configure)(struct mlxsw_sp_rif *rif);
161 	void (*deconfigure)(struct mlxsw_sp_rif *rif);
162 	struct mlxsw_sp_fid * (*fid_get)(struct mlxsw_sp_rif *rif);
163 };
164 
165 static unsigned int *
166 mlxsw_sp_rif_p_counter_get(struct mlxsw_sp_rif *rif,
167 			   enum mlxsw_sp_rif_counter_dir dir)
168 {
169 	switch (dir) {
170 	case MLXSW_SP_RIF_COUNTER_EGRESS:
171 		return &rif->counter_egress;
172 	case MLXSW_SP_RIF_COUNTER_INGRESS:
173 		return &rif->counter_ingress;
174 	}
175 	return NULL;
176 }
177 
178 static bool
179 mlxsw_sp_rif_counter_valid_get(struct mlxsw_sp_rif *rif,
180 			       enum mlxsw_sp_rif_counter_dir dir)
181 {
182 	switch (dir) {
183 	case MLXSW_SP_RIF_COUNTER_EGRESS:
184 		return rif->counter_egress_valid;
185 	case MLXSW_SP_RIF_COUNTER_INGRESS:
186 		return rif->counter_ingress_valid;
187 	}
188 	return false;
189 }
190 
191 static void
192 mlxsw_sp_rif_counter_valid_set(struct mlxsw_sp_rif *rif,
193 			       enum mlxsw_sp_rif_counter_dir dir,
194 			       bool valid)
195 {
196 	switch (dir) {
197 	case MLXSW_SP_RIF_COUNTER_EGRESS:
198 		rif->counter_egress_valid = valid;
199 		break;
200 	case MLXSW_SP_RIF_COUNTER_INGRESS:
201 		rif->counter_ingress_valid = valid;
202 		break;
203 	}
204 }
205 
206 static int mlxsw_sp_rif_counter_edit(struct mlxsw_sp *mlxsw_sp, u16 rif_index,
207 				     unsigned int counter_index, bool enable,
208 				     enum mlxsw_sp_rif_counter_dir dir)
209 {
210 	char ritr_pl[MLXSW_REG_RITR_LEN];
211 	bool is_egress = false;
212 	int err;
213 
214 	if (dir == MLXSW_SP_RIF_COUNTER_EGRESS)
215 		is_egress = true;
216 	mlxsw_reg_ritr_rif_pack(ritr_pl, rif_index);
217 	err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
218 	if (err)
219 		return err;
220 
221 	mlxsw_reg_ritr_counter_pack(ritr_pl, counter_index, enable,
222 				    is_egress);
223 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
224 }
225 
226 int mlxsw_sp_rif_counter_value_get(struct mlxsw_sp *mlxsw_sp,
227 				   struct mlxsw_sp_rif *rif,
228 				   enum mlxsw_sp_rif_counter_dir dir, u64 *cnt)
229 {
230 	char ricnt_pl[MLXSW_REG_RICNT_LEN];
231 	unsigned int *p_counter_index;
232 	bool valid;
233 	int err;
234 
235 	valid = mlxsw_sp_rif_counter_valid_get(rif, dir);
236 	if (!valid)
237 		return -EINVAL;
238 
239 	p_counter_index = mlxsw_sp_rif_p_counter_get(rif, dir);
240 	if (!p_counter_index)
241 		return -EINVAL;
242 	mlxsw_reg_ricnt_pack(ricnt_pl, *p_counter_index,
243 			     MLXSW_REG_RICNT_OPCODE_NOP);
244 	err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(ricnt), ricnt_pl);
245 	if (err)
246 		return err;
247 	*cnt = mlxsw_reg_ricnt_good_unicast_packets_get(ricnt_pl);
248 	return 0;
249 }
250 
251 static int mlxsw_sp_rif_counter_clear(struct mlxsw_sp *mlxsw_sp,
252 				      unsigned int counter_index)
253 {
254 	char ricnt_pl[MLXSW_REG_RICNT_LEN];
255 
256 	mlxsw_reg_ricnt_pack(ricnt_pl, counter_index,
257 			     MLXSW_REG_RICNT_OPCODE_CLEAR);
258 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ricnt), ricnt_pl);
259 }
260 
261 int mlxsw_sp_rif_counter_alloc(struct mlxsw_sp *mlxsw_sp,
262 			       struct mlxsw_sp_rif *rif,
263 			       enum mlxsw_sp_rif_counter_dir dir)
264 {
265 	unsigned int *p_counter_index;
266 	int err;
267 
268 	p_counter_index = mlxsw_sp_rif_p_counter_get(rif, dir);
269 	if (!p_counter_index)
270 		return -EINVAL;
271 	err = mlxsw_sp_counter_alloc(mlxsw_sp, MLXSW_SP_COUNTER_SUB_POOL_RIF,
272 				     p_counter_index);
273 	if (err)
274 		return err;
275 
276 	err = mlxsw_sp_rif_counter_clear(mlxsw_sp, *p_counter_index);
277 	if (err)
278 		goto err_counter_clear;
279 
280 	err = mlxsw_sp_rif_counter_edit(mlxsw_sp, rif->rif_index,
281 					*p_counter_index, true, dir);
282 	if (err)
283 		goto err_counter_edit;
284 	mlxsw_sp_rif_counter_valid_set(rif, dir, true);
285 	return 0;
286 
287 err_counter_edit:
288 err_counter_clear:
289 	mlxsw_sp_counter_free(mlxsw_sp, MLXSW_SP_COUNTER_SUB_POOL_RIF,
290 			      *p_counter_index);
291 	return err;
292 }
293 
294 void mlxsw_sp_rif_counter_free(struct mlxsw_sp *mlxsw_sp,
295 			       struct mlxsw_sp_rif *rif,
296 			       enum mlxsw_sp_rif_counter_dir dir)
297 {
298 	unsigned int *p_counter_index;
299 
300 	if (!mlxsw_sp_rif_counter_valid_get(rif, dir))
301 		return;
302 
303 	p_counter_index = mlxsw_sp_rif_p_counter_get(rif, dir);
304 	if (WARN_ON(!p_counter_index))
305 		return;
306 	mlxsw_sp_rif_counter_edit(mlxsw_sp, rif->rif_index,
307 				  *p_counter_index, false, dir);
308 	mlxsw_sp_counter_free(mlxsw_sp, MLXSW_SP_COUNTER_SUB_POOL_RIF,
309 			      *p_counter_index);
310 	mlxsw_sp_rif_counter_valid_set(rif, dir, false);
311 }
312 
313 static void mlxsw_sp_rif_counters_alloc(struct mlxsw_sp_rif *rif)
314 {
315 	struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
316 	struct devlink *devlink;
317 
318 	devlink = priv_to_devlink(mlxsw_sp->core);
319 	if (!devlink_dpipe_table_counter_enabled(devlink,
320 						 MLXSW_SP_DPIPE_TABLE_NAME_ERIF))
321 		return;
322 	mlxsw_sp_rif_counter_alloc(mlxsw_sp, rif, MLXSW_SP_RIF_COUNTER_EGRESS);
323 }
324 
325 static void mlxsw_sp_rif_counters_free(struct mlxsw_sp_rif *rif)
326 {
327 	struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
328 
329 	mlxsw_sp_rif_counter_free(mlxsw_sp, rif, MLXSW_SP_RIF_COUNTER_EGRESS);
330 }
331 
332 static struct mlxsw_sp_rif *
333 mlxsw_sp_rif_find_by_dev(const struct mlxsw_sp *mlxsw_sp,
334 			 const struct net_device *dev);
335 
336 #define MLXSW_SP_PREFIX_COUNT (sizeof(struct in6_addr) * BITS_PER_BYTE + 1)
337 
338 struct mlxsw_sp_prefix_usage {
339 	DECLARE_BITMAP(b, MLXSW_SP_PREFIX_COUNT);
340 };
341 
342 #define mlxsw_sp_prefix_usage_for_each(prefix, prefix_usage) \
343 	for_each_set_bit(prefix, (prefix_usage)->b, MLXSW_SP_PREFIX_COUNT)
344 
345 static bool
346 mlxsw_sp_prefix_usage_eq(struct mlxsw_sp_prefix_usage *prefix_usage1,
347 			 struct mlxsw_sp_prefix_usage *prefix_usage2)
348 {
349 	return !memcmp(prefix_usage1, prefix_usage2, sizeof(*prefix_usage1));
350 }
351 
352 static bool
353 mlxsw_sp_prefix_usage_none(struct mlxsw_sp_prefix_usage *prefix_usage)
354 {
355 	struct mlxsw_sp_prefix_usage prefix_usage_none = {{ 0 } };
356 
357 	return mlxsw_sp_prefix_usage_eq(prefix_usage, &prefix_usage_none);
358 }
359 
360 static void
361 mlxsw_sp_prefix_usage_cpy(struct mlxsw_sp_prefix_usage *prefix_usage1,
362 			  struct mlxsw_sp_prefix_usage *prefix_usage2)
363 {
364 	memcpy(prefix_usage1, prefix_usage2, sizeof(*prefix_usage1));
365 }
366 
367 static void
368 mlxsw_sp_prefix_usage_set(struct mlxsw_sp_prefix_usage *prefix_usage,
369 			  unsigned char prefix_len)
370 {
371 	set_bit(prefix_len, prefix_usage->b);
372 }
373 
374 static void
375 mlxsw_sp_prefix_usage_clear(struct mlxsw_sp_prefix_usage *prefix_usage,
376 			    unsigned char prefix_len)
377 {
378 	clear_bit(prefix_len, prefix_usage->b);
379 }
380 
381 struct mlxsw_sp_fib_key {
382 	unsigned char addr[sizeof(struct in6_addr)];
383 	unsigned char prefix_len;
384 };
385 
386 enum mlxsw_sp_fib_entry_type {
387 	MLXSW_SP_FIB_ENTRY_TYPE_REMOTE,
388 	MLXSW_SP_FIB_ENTRY_TYPE_LOCAL,
389 	MLXSW_SP_FIB_ENTRY_TYPE_TRAP,
390 
391 	/* This is a special case of local delivery, where a packet should be
392 	 * decapsulated on reception. Note that there is no corresponding ENCAP,
393 	 * because that's a type of next hop, not of FIB entry. (There can be
394 	 * several next hops in a REMOTE entry, and some of them may be
395 	 * encapsulating entries.)
396 	 */
397 	MLXSW_SP_FIB_ENTRY_TYPE_IPIP_DECAP,
398 };
399 
400 struct mlxsw_sp_nexthop_group;
401 struct mlxsw_sp_fib;
402 
403 struct mlxsw_sp_fib_node {
404 	struct list_head entry_list;
405 	struct list_head list;
406 	struct rhash_head ht_node;
407 	struct mlxsw_sp_fib *fib;
408 	struct mlxsw_sp_fib_key key;
409 };
410 
411 struct mlxsw_sp_fib_entry_decap {
412 	struct mlxsw_sp_ipip_entry *ipip_entry;
413 	u32 tunnel_index;
414 };
415 
416 struct mlxsw_sp_fib_entry {
417 	struct list_head list;
418 	struct mlxsw_sp_fib_node *fib_node;
419 	enum mlxsw_sp_fib_entry_type type;
420 	struct list_head nexthop_group_node;
421 	struct mlxsw_sp_nexthop_group *nh_group;
422 	struct mlxsw_sp_fib_entry_decap decap; /* Valid for decap entries. */
423 };
424 
425 struct mlxsw_sp_fib4_entry {
426 	struct mlxsw_sp_fib_entry common;
427 	u32 tb_id;
428 	u32 prio;
429 	u8 tos;
430 	u8 type;
431 };
432 
433 struct mlxsw_sp_fib6_entry {
434 	struct mlxsw_sp_fib_entry common;
435 	struct list_head rt6_list;
436 	unsigned int nrt6;
437 };
438 
439 struct mlxsw_sp_rt6 {
440 	struct list_head list;
441 	struct rt6_info *rt;
442 };
443 
444 struct mlxsw_sp_lpm_tree {
445 	u8 id; /* tree ID */
446 	unsigned int ref_count;
447 	enum mlxsw_sp_l3proto proto;
448 	struct mlxsw_sp_prefix_usage prefix_usage;
449 };
450 
451 struct mlxsw_sp_fib {
452 	struct rhashtable ht;
453 	struct list_head node_list;
454 	struct mlxsw_sp_vr *vr;
455 	struct mlxsw_sp_lpm_tree *lpm_tree;
456 	unsigned long prefix_ref_count[MLXSW_SP_PREFIX_COUNT];
457 	struct mlxsw_sp_prefix_usage prefix_usage;
458 	enum mlxsw_sp_l3proto proto;
459 };
460 
461 struct mlxsw_sp_vr {
462 	u16 id; /* virtual router ID */
463 	u32 tb_id; /* kernel fib table id */
464 	unsigned int rif_count;
465 	struct mlxsw_sp_fib *fib4;
466 	struct mlxsw_sp_fib *fib6;
467 	struct mlxsw_sp_mr_table *mr4_table;
468 };
469 
470 static const struct rhashtable_params mlxsw_sp_fib_ht_params;
471 
472 static struct mlxsw_sp_fib *mlxsw_sp_fib_create(struct mlxsw_sp_vr *vr,
473 						enum mlxsw_sp_l3proto proto)
474 {
475 	struct mlxsw_sp_fib *fib;
476 	int err;
477 
478 	fib = kzalloc(sizeof(*fib), GFP_KERNEL);
479 	if (!fib)
480 		return ERR_PTR(-ENOMEM);
481 	err = rhashtable_init(&fib->ht, &mlxsw_sp_fib_ht_params);
482 	if (err)
483 		goto err_rhashtable_init;
484 	INIT_LIST_HEAD(&fib->node_list);
485 	fib->proto = proto;
486 	fib->vr = vr;
487 	return fib;
488 
489 err_rhashtable_init:
490 	kfree(fib);
491 	return ERR_PTR(err);
492 }
493 
494 static void mlxsw_sp_fib_destroy(struct mlxsw_sp_fib *fib)
495 {
496 	WARN_ON(!list_empty(&fib->node_list));
497 	WARN_ON(fib->lpm_tree);
498 	rhashtable_destroy(&fib->ht);
499 	kfree(fib);
500 }
501 
502 static struct mlxsw_sp_lpm_tree *
503 mlxsw_sp_lpm_tree_find_unused(struct mlxsw_sp *mlxsw_sp)
504 {
505 	static struct mlxsw_sp_lpm_tree *lpm_tree;
506 	int i;
507 
508 	for (i = 0; i < mlxsw_sp->router->lpm.tree_count; i++) {
509 		lpm_tree = &mlxsw_sp->router->lpm.trees[i];
510 		if (lpm_tree->ref_count == 0)
511 			return lpm_tree;
512 	}
513 	return NULL;
514 }
515 
516 static int mlxsw_sp_lpm_tree_alloc(struct mlxsw_sp *mlxsw_sp,
517 				   struct mlxsw_sp_lpm_tree *lpm_tree)
518 {
519 	char ralta_pl[MLXSW_REG_RALTA_LEN];
520 
521 	mlxsw_reg_ralta_pack(ralta_pl, true,
522 			     (enum mlxsw_reg_ralxx_protocol) lpm_tree->proto,
523 			     lpm_tree->id);
524 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralta), ralta_pl);
525 }
526 
527 static void mlxsw_sp_lpm_tree_free(struct mlxsw_sp *mlxsw_sp,
528 				   struct mlxsw_sp_lpm_tree *lpm_tree)
529 {
530 	char ralta_pl[MLXSW_REG_RALTA_LEN];
531 
532 	mlxsw_reg_ralta_pack(ralta_pl, false,
533 			     (enum mlxsw_reg_ralxx_protocol) lpm_tree->proto,
534 			     lpm_tree->id);
535 	mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralta), ralta_pl);
536 }
537 
538 static int
539 mlxsw_sp_lpm_tree_left_struct_set(struct mlxsw_sp *mlxsw_sp,
540 				  struct mlxsw_sp_prefix_usage *prefix_usage,
541 				  struct mlxsw_sp_lpm_tree *lpm_tree)
542 {
543 	char ralst_pl[MLXSW_REG_RALST_LEN];
544 	u8 root_bin = 0;
545 	u8 prefix;
546 	u8 last_prefix = MLXSW_REG_RALST_BIN_NO_CHILD;
547 
548 	mlxsw_sp_prefix_usage_for_each(prefix, prefix_usage)
549 		root_bin = prefix;
550 
551 	mlxsw_reg_ralst_pack(ralst_pl, root_bin, lpm_tree->id);
552 	mlxsw_sp_prefix_usage_for_each(prefix, prefix_usage) {
553 		if (prefix == 0)
554 			continue;
555 		mlxsw_reg_ralst_bin_pack(ralst_pl, prefix, last_prefix,
556 					 MLXSW_REG_RALST_BIN_NO_CHILD);
557 		last_prefix = prefix;
558 	}
559 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralst), ralst_pl);
560 }
561 
562 static struct mlxsw_sp_lpm_tree *
563 mlxsw_sp_lpm_tree_create(struct mlxsw_sp *mlxsw_sp,
564 			 struct mlxsw_sp_prefix_usage *prefix_usage,
565 			 enum mlxsw_sp_l3proto proto)
566 {
567 	struct mlxsw_sp_lpm_tree *lpm_tree;
568 	int err;
569 
570 	lpm_tree = mlxsw_sp_lpm_tree_find_unused(mlxsw_sp);
571 	if (!lpm_tree)
572 		return ERR_PTR(-EBUSY);
573 	lpm_tree->proto = proto;
574 	err = mlxsw_sp_lpm_tree_alloc(mlxsw_sp, lpm_tree);
575 	if (err)
576 		return ERR_PTR(err);
577 
578 	err = mlxsw_sp_lpm_tree_left_struct_set(mlxsw_sp, prefix_usage,
579 						lpm_tree);
580 	if (err)
581 		goto err_left_struct_set;
582 	memcpy(&lpm_tree->prefix_usage, prefix_usage,
583 	       sizeof(lpm_tree->prefix_usage));
584 	return lpm_tree;
585 
586 err_left_struct_set:
587 	mlxsw_sp_lpm_tree_free(mlxsw_sp, lpm_tree);
588 	return ERR_PTR(err);
589 }
590 
591 static void mlxsw_sp_lpm_tree_destroy(struct mlxsw_sp *mlxsw_sp,
592 				      struct mlxsw_sp_lpm_tree *lpm_tree)
593 {
594 	mlxsw_sp_lpm_tree_free(mlxsw_sp, lpm_tree);
595 }
596 
597 static struct mlxsw_sp_lpm_tree *
598 mlxsw_sp_lpm_tree_get(struct mlxsw_sp *mlxsw_sp,
599 		      struct mlxsw_sp_prefix_usage *prefix_usage,
600 		      enum mlxsw_sp_l3proto proto)
601 {
602 	struct mlxsw_sp_lpm_tree *lpm_tree;
603 	int i;
604 
605 	for (i = 0; i < mlxsw_sp->router->lpm.tree_count; i++) {
606 		lpm_tree = &mlxsw_sp->router->lpm.trees[i];
607 		if (lpm_tree->ref_count != 0 &&
608 		    lpm_tree->proto == proto &&
609 		    mlxsw_sp_prefix_usage_eq(&lpm_tree->prefix_usage,
610 					     prefix_usage))
611 			return lpm_tree;
612 	}
613 	return mlxsw_sp_lpm_tree_create(mlxsw_sp, prefix_usage, proto);
614 }
615 
616 static void mlxsw_sp_lpm_tree_hold(struct mlxsw_sp_lpm_tree *lpm_tree)
617 {
618 	lpm_tree->ref_count++;
619 }
620 
621 static void mlxsw_sp_lpm_tree_put(struct mlxsw_sp *mlxsw_sp,
622 				  struct mlxsw_sp_lpm_tree *lpm_tree)
623 {
624 	if (--lpm_tree->ref_count == 0)
625 		mlxsw_sp_lpm_tree_destroy(mlxsw_sp, lpm_tree);
626 }
627 
628 #define MLXSW_SP_LPM_TREE_MIN 1 /* tree 0 is reserved */
629 
630 static int mlxsw_sp_lpm_init(struct mlxsw_sp *mlxsw_sp)
631 {
632 	struct mlxsw_sp_lpm_tree *lpm_tree;
633 	u64 max_trees;
634 	int i;
635 
636 	if (!MLXSW_CORE_RES_VALID(mlxsw_sp->core, MAX_LPM_TREES))
637 		return -EIO;
638 
639 	max_trees = MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_LPM_TREES);
640 	mlxsw_sp->router->lpm.tree_count = max_trees - MLXSW_SP_LPM_TREE_MIN;
641 	mlxsw_sp->router->lpm.trees = kcalloc(mlxsw_sp->router->lpm.tree_count,
642 					     sizeof(struct mlxsw_sp_lpm_tree),
643 					     GFP_KERNEL);
644 	if (!mlxsw_sp->router->lpm.trees)
645 		return -ENOMEM;
646 
647 	for (i = 0; i < mlxsw_sp->router->lpm.tree_count; i++) {
648 		lpm_tree = &mlxsw_sp->router->lpm.trees[i];
649 		lpm_tree->id = i + MLXSW_SP_LPM_TREE_MIN;
650 	}
651 
652 	return 0;
653 }
654 
655 static void mlxsw_sp_lpm_fini(struct mlxsw_sp *mlxsw_sp)
656 {
657 	kfree(mlxsw_sp->router->lpm.trees);
658 }
659 
660 static bool mlxsw_sp_vr_is_used(const struct mlxsw_sp_vr *vr)
661 {
662 	return !!vr->fib4 || !!vr->fib6 || !!vr->mr4_table;
663 }
664 
665 static struct mlxsw_sp_vr *mlxsw_sp_vr_find_unused(struct mlxsw_sp *mlxsw_sp)
666 {
667 	struct mlxsw_sp_vr *vr;
668 	int i;
669 
670 	for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_VRS); i++) {
671 		vr = &mlxsw_sp->router->vrs[i];
672 		if (!mlxsw_sp_vr_is_used(vr))
673 			return vr;
674 	}
675 	return NULL;
676 }
677 
678 static int mlxsw_sp_vr_lpm_tree_bind(struct mlxsw_sp *mlxsw_sp,
679 				     const struct mlxsw_sp_fib *fib, u8 tree_id)
680 {
681 	char raltb_pl[MLXSW_REG_RALTB_LEN];
682 
683 	mlxsw_reg_raltb_pack(raltb_pl, fib->vr->id,
684 			     (enum mlxsw_reg_ralxx_protocol) fib->proto,
685 			     tree_id);
686 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(raltb), raltb_pl);
687 }
688 
689 static int mlxsw_sp_vr_lpm_tree_unbind(struct mlxsw_sp *mlxsw_sp,
690 				       const struct mlxsw_sp_fib *fib)
691 {
692 	char raltb_pl[MLXSW_REG_RALTB_LEN];
693 
694 	/* Bind to tree 0 which is default */
695 	mlxsw_reg_raltb_pack(raltb_pl, fib->vr->id,
696 			     (enum mlxsw_reg_ralxx_protocol) fib->proto, 0);
697 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(raltb), raltb_pl);
698 }
699 
700 static u32 mlxsw_sp_fix_tb_id(u32 tb_id)
701 {
702 	/* For our purpose, squash main, default and local tables into one */
703 	if (tb_id == RT_TABLE_LOCAL || tb_id == RT_TABLE_DEFAULT)
704 		tb_id = RT_TABLE_MAIN;
705 	return tb_id;
706 }
707 
708 static struct mlxsw_sp_vr *mlxsw_sp_vr_find(struct mlxsw_sp *mlxsw_sp,
709 					    u32 tb_id)
710 {
711 	struct mlxsw_sp_vr *vr;
712 	int i;
713 
714 	tb_id = mlxsw_sp_fix_tb_id(tb_id);
715 
716 	for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_VRS); i++) {
717 		vr = &mlxsw_sp->router->vrs[i];
718 		if (mlxsw_sp_vr_is_used(vr) && vr->tb_id == tb_id)
719 			return vr;
720 	}
721 	return NULL;
722 }
723 
724 static struct mlxsw_sp_fib *mlxsw_sp_vr_fib(const struct mlxsw_sp_vr *vr,
725 					    enum mlxsw_sp_l3proto proto)
726 {
727 	switch (proto) {
728 	case MLXSW_SP_L3_PROTO_IPV4:
729 		return vr->fib4;
730 	case MLXSW_SP_L3_PROTO_IPV6:
731 		return vr->fib6;
732 	}
733 	return NULL;
734 }
735 
736 static struct mlxsw_sp_vr *mlxsw_sp_vr_create(struct mlxsw_sp *mlxsw_sp,
737 					      u32 tb_id,
738 					      struct netlink_ext_ack *extack)
739 {
740 	struct mlxsw_sp_vr *vr;
741 	int err;
742 
743 	vr = mlxsw_sp_vr_find_unused(mlxsw_sp);
744 	if (!vr) {
745 		NL_SET_ERR_MSG(extack, "spectrum: Exceeded number of supported virtual routers");
746 		return ERR_PTR(-EBUSY);
747 	}
748 	vr->fib4 = mlxsw_sp_fib_create(vr, MLXSW_SP_L3_PROTO_IPV4);
749 	if (IS_ERR(vr->fib4))
750 		return ERR_CAST(vr->fib4);
751 	vr->fib6 = mlxsw_sp_fib_create(vr, MLXSW_SP_L3_PROTO_IPV6);
752 	if (IS_ERR(vr->fib6)) {
753 		err = PTR_ERR(vr->fib6);
754 		goto err_fib6_create;
755 	}
756 	vr->mr4_table = mlxsw_sp_mr_table_create(mlxsw_sp, vr->id,
757 						 MLXSW_SP_L3_PROTO_IPV4);
758 	if (IS_ERR(vr->mr4_table)) {
759 		err = PTR_ERR(vr->mr4_table);
760 		goto err_mr_table_create;
761 	}
762 	vr->tb_id = tb_id;
763 	return vr;
764 
765 err_mr_table_create:
766 	mlxsw_sp_fib_destroy(vr->fib6);
767 	vr->fib6 = NULL;
768 err_fib6_create:
769 	mlxsw_sp_fib_destroy(vr->fib4);
770 	vr->fib4 = NULL;
771 	return ERR_PTR(err);
772 }
773 
774 static void mlxsw_sp_vr_destroy(struct mlxsw_sp_vr *vr)
775 {
776 	mlxsw_sp_mr_table_destroy(vr->mr4_table);
777 	vr->mr4_table = NULL;
778 	mlxsw_sp_fib_destroy(vr->fib6);
779 	vr->fib6 = NULL;
780 	mlxsw_sp_fib_destroy(vr->fib4);
781 	vr->fib4 = NULL;
782 }
783 
784 static struct mlxsw_sp_vr *mlxsw_sp_vr_get(struct mlxsw_sp *mlxsw_sp, u32 tb_id,
785 					   struct netlink_ext_ack *extack)
786 {
787 	struct mlxsw_sp_vr *vr;
788 
789 	tb_id = mlxsw_sp_fix_tb_id(tb_id);
790 	vr = mlxsw_sp_vr_find(mlxsw_sp, tb_id);
791 	if (!vr)
792 		vr = mlxsw_sp_vr_create(mlxsw_sp, tb_id, extack);
793 	return vr;
794 }
795 
796 static void mlxsw_sp_vr_put(struct mlxsw_sp_vr *vr)
797 {
798 	if (!vr->rif_count && list_empty(&vr->fib4->node_list) &&
799 	    list_empty(&vr->fib6->node_list) &&
800 	    mlxsw_sp_mr_table_empty(vr->mr4_table))
801 		mlxsw_sp_vr_destroy(vr);
802 }
803 
804 static bool
805 mlxsw_sp_vr_lpm_tree_should_replace(struct mlxsw_sp_vr *vr,
806 				    enum mlxsw_sp_l3proto proto, u8 tree_id)
807 {
808 	struct mlxsw_sp_fib *fib = mlxsw_sp_vr_fib(vr, proto);
809 
810 	if (!mlxsw_sp_vr_is_used(vr))
811 		return false;
812 	if (fib->lpm_tree && fib->lpm_tree->id == tree_id)
813 		return true;
814 	return false;
815 }
816 
817 static int mlxsw_sp_vr_lpm_tree_replace(struct mlxsw_sp *mlxsw_sp,
818 					struct mlxsw_sp_fib *fib,
819 					struct mlxsw_sp_lpm_tree *new_tree)
820 {
821 	struct mlxsw_sp_lpm_tree *old_tree = fib->lpm_tree;
822 	int err;
823 
824 	err = mlxsw_sp_vr_lpm_tree_bind(mlxsw_sp, fib, new_tree->id);
825 	if (err)
826 		return err;
827 	fib->lpm_tree = new_tree;
828 	mlxsw_sp_lpm_tree_hold(new_tree);
829 	mlxsw_sp_lpm_tree_put(mlxsw_sp, old_tree);
830 	return 0;
831 }
832 
833 static int mlxsw_sp_vrs_lpm_tree_replace(struct mlxsw_sp *mlxsw_sp,
834 					 struct mlxsw_sp_fib *fib,
835 					 struct mlxsw_sp_lpm_tree *new_tree)
836 {
837 	struct mlxsw_sp_lpm_tree *old_tree = fib->lpm_tree;
838 	enum mlxsw_sp_l3proto proto = fib->proto;
839 	u8 old_id, new_id = new_tree->id;
840 	struct mlxsw_sp_vr *vr;
841 	int i, err;
842 
843 	if (!old_tree)
844 		goto no_replace;
845 	old_id = old_tree->id;
846 
847 	for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_VRS); i++) {
848 		vr = &mlxsw_sp->router->vrs[i];
849 		if (!mlxsw_sp_vr_lpm_tree_should_replace(vr, proto, old_id))
850 			continue;
851 		err = mlxsw_sp_vr_lpm_tree_replace(mlxsw_sp,
852 						   mlxsw_sp_vr_fib(vr, proto),
853 						   new_tree);
854 		if (err)
855 			goto err_tree_replace;
856 	}
857 
858 	return 0;
859 
860 err_tree_replace:
861 	for (i--; i >= 0; i--) {
862 		if (!mlxsw_sp_vr_lpm_tree_should_replace(vr, proto, new_id))
863 			continue;
864 		mlxsw_sp_vr_lpm_tree_replace(mlxsw_sp,
865 					     mlxsw_sp_vr_fib(vr, proto),
866 					     old_tree);
867 	}
868 	return err;
869 
870 no_replace:
871 	err = mlxsw_sp_vr_lpm_tree_bind(mlxsw_sp, fib, new_tree->id);
872 	if (err)
873 		return err;
874 	fib->lpm_tree = new_tree;
875 	mlxsw_sp_lpm_tree_hold(new_tree);
876 	return 0;
877 }
878 
879 static void
880 mlxsw_sp_vrs_prefixes(struct mlxsw_sp *mlxsw_sp,
881 		      enum mlxsw_sp_l3proto proto,
882 		      struct mlxsw_sp_prefix_usage *req_prefix_usage)
883 {
884 	int i;
885 
886 	for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_VRS); i++) {
887 		struct mlxsw_sp_vr *vr = &mlxsw_sp->router->vrs[i];
888 		struct mlxsw_sp_fib *fib = mlxsw_sp_vr_fib(vr, proto);
889 		unsigned char prefix;
890 
891 		if (!mlxsw_sp_vr_is_used(vr))
892 			continue;
893 		mlxsw_sp_prefix_usage_for_each(prefix, &fib->prefix_usage)
894 			mlxsw_sp_prefix_usage_set(req_prefix_usage, prefix);
895 	}
896 }
897 
898 static int mlxsw_sp_vrs_init(struct mlxsw_sp *mlxsw_sp)
899 {
900 	struct mlxsw_sp_vr *vr;
901 	u64 max_vrs;
902 	int i;
903 
904 	if (!MLXSW_CORE_RES_VALID(mlxsw_sp->core, MAX_VRS))
905 		return -EIO;
906 
907 	max_vrs = MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_VRS);
908 	mlxsw_sp->router->vrs = kcalloc(max_vrs, sizeof(struct mlxsw_sp_vr),
909 					GFP_KERNEL);
910 	if (!mlxsw_sp->router->vrs)
911 		return -ENOMEM;
912 
913 	for (i = 0; i < max_vrs; i++) {
914 		vr = &mlxsw_sp->router->vrs[i];
915 		vr->id = i;
916 	}
917 
918 	return 0;
919 }
920 
921 static void mlxsw_sp_router_fib_flush(struct mlxsw_sp *mlxsw_sp);
922 
923 static void mlxsw_sp_vrs_fini(struct mlxsw_sp *mlxsw_sp)
924 {
925 	/* At this stage we're guaranteed not to have new incoming
926 	 * FIB notifications and the work queue is free from FIBs
927 	 * sitting on top of mlxsw netdevs. However, we can still
928 	 * have other FIBs queued. Flush the queue before flushing
929 	 * the device's tables. No need for locks, as we're the only
930 	 * writer.
931 	 */
932 	mlxsw_core_flush_owq();
933 	mlxsw_sp_router_fib_flush(mlxsw_sp);
934 	kfree(mlxsw_sp->router->vrs);
935 }
936 
937 static struct net_device *
938 __mlxsw_sp_ipip_netdev_ul_dev_get(const struct net_device *ol_dev)
939 {
940 	struct ip_tunnel *tun = netdev_priv(ol_dev);
941 	struct net *net = dev_net(ol_dev);
942 
943 	return __dev_get_by_index(net, tun->parms.link);
944 }
945 
946 u32 mlxsw_sp_ipip_dev_ul_tb_id(const struct net_device *ol_dev)
947 {
948 	struct net_device *d = __mlxsw_sp_ipip_netdev_ul_dev_get(ol_dev);
949 
950 	if (d)
951 		return l3mdev_fib_table(d) ? : RT_TABLE_MAIN;
952 	else
953 		return l3mdev_fib_table(ol_dev) ? : RT_TABLE_MAIN;
954 }
955 
956 static struct mlxsw_sp_rif *
957 mlxsw_sp_rif_create(struct mlxsw_sp *mlxsw_sp,
958 		    const struct mlxsw_sp_rif_params *params,
959 		    struct netlink_ext_ack *extack);
960 
961 static struct mlxsw_sp_rif_ipip_lb *
962 mlxsw_sp_ipip_ol_ipip_lb_create(struct mlxsw_sp *mlxsw_sp,
963 				enum mlxsw_sp_ipip_type ipipt,
964 				struct net_device *ol_dev,
965 				struct netlink_ext_ack *extack)
966 {
967 	struct mlxsw_sp_rif_params_ipip_lb lb_params;
968 	const struct mlxsw_sp_ipip_ops *ipip_ops;
969 	struct mlxsw_sp_rif *rif;
970 
971 	ipip_ops = mlxsw_sp->router->ipip_ops_arr[ipipt];
972 	lb_params = (struct mlxsw_sp_rif_params_ipip_lb) {
973 		.common.dev = ol_dev,
974 		.common.lag = false,
975 		.lb_config = ipip_ops->ol_loopback_config(mlxsw_sp, ol_dev),
976 	};
977 
978 	rif = mlxsw_sp_rif_create(mlxsw_sp, &lb_params.common, extack);
979 	if (IS_ERR(rif))
980 		return ERR_CAST(rif);
981 	return container_of(rif, struct mlxsw_sp_rif_ipip_lb, common);
982 }
983 
984 static struct mlxsw_sp_ipip_entry *
985 mlxsw_sp_ipip_entry_alloc(struct mlxsw_sp *mlxsw_sp,
986 			  enum mlxsw_sp_ipip_type ipipt,
987 			  struct net_device *ol_dev)
988 {
989 	struct mlxsw_sp_ipip_entry *ipip_entry;
990 	struct mlxsw_sp_ipip_entry *ret = NULL;
991 
992 	ipip_entry = kzalloc(sizeof(*ipip_entry), GFP_KERNEL);
993 	if (!ipip_entry)
994 		return ERR_PTR(-ENOMEM);
995 
996 	ipip_entry->ol_lb = mlxsw_sp_ipip_ol_ipip_lb_create(mlxsw_sp, ipipt,
997 							    ol_dev, NULL);
998 	if (IS_ERR(ipip_entry->ol_lb)) {
999 		ret = ERR_CAST(ipip_entry->ol_lb);
1000 		goto err_ol_ipip_lb_create;
1001 	}
1002 
1003 	ipip_entry->ipipt = ipipt;
1004 	ipip_entry->ol_dev = ol_dev;
1005 	ipip_entry->parms = mlxsw_sp_ipip_netdev_parms(ol_dev);
1006 
1007 	return ipip_entry;
1008 
1009 err_ol_ipip_lb_create:
1010 	kfree(ipip_entry);
1011 	return ret;
1012 }
1013 
1014 static void
1015 mlxsw_sp_ipip_entry_dealloc(struct mlxsw_sp_ipip_entry *ipip_entry)
1016 {
1017 	mlxsw_sp_rif_destroy(&ipip_entry->ol_lb->common);
1018 	kfree(ipip_entry);
1019 }
1020 
1021 static bool
1022 mlxsw_sp_ipip_entry_saddr_matches(struct mlxsw_sp *mlxsw_sp,
1023 				  const enum mlxsw_sp_l3proto ul_proto,
1024 				  union mlxsw_sp_l3addr saddr,
1025 				  u32 ul_tb_id,
1026 				  struct mlxsw_sp_ipip_entry *ipip_entry)
1027 {
1028 	u32 tun_ul_tb_id = mlxsw_sp_ipip_dev_ul_tb_id(ipip_entry->ol_dev);
1029 	enum mlxsw_sp_ipip_type ipipt = ipip_entry->ipipt;
1030 	union mlxsw_sp_l3addr tun_saddr;
1031 
1032 	if (mlxsw_sp->router->ipip_ops_arr[ipipt]->ul_proto != ul_proto)
1033 		return false;
1034 
1035 	tun_saddr = mlxsw_sp_ipip_netdev_saddr(ul_proto, ipip_entry->ol_dev);
1036 	return tun_ul_tb_id == ul_tb_id &&
1037 	       mlxsw_sp_l3addr_eq(&tun_saddr, &saddr);
1038 }
1039 
1040 static int
1041 mlxsw_sp_fib_entry_decap_init(struct mlxsw_sp *mlxsw_sp,
1042 			      struct mlxsw_sp_fib_entry *fib_entry,
1043 			      struct mlxsw_sp_ipip_entry *ipip_entry)
1044 {
1045 	u32 tunnel_index;
1046 	int err;
1047 
1048 	err = mlxsw_sp_kvdl_alloc(mlxsw_sp, 1, &tunnel_index);
1049 	if (err)
1050 		return err;
1051 
1052 	ipip_entry->decap_fib_entry = fib_entry;
1053 	fib_entry->decap.ipip_entry = ipip_entry;
1054 	fib_entry->decap.tunnel_index = tunnel_index;
1055 	return 0;
1056 }
1057 
1058 static void mlxsw_sp_fib_entry_decap_fini(struct mlxsw_sp *mlxsw_sp,
1059 					  struct mlxsw_sp_fib_entry *fib_entry)
1060 {
1061 	/* Unlink this node from the IPIP entry that it's the decap entry of. */
1062 	fib_entry->decap.ipip_entry->decap_fib_entry = NULL;
1063 	fib_entry->decap.ipip_entry = NULL;
1064 	mlxsw_sp_kvdl_free(mlxsw_sp, fib_entry->decap.tunnel_index);
1065 }
1066 
1067 static struct mlxsw_sp_fib_node *
1068 mlxsw_sp_fib_node_lookup(struct mlxsw_sp_fib *fib, const void *addr,
1069 			 size_t addr_len, unsigned char prefix_len);
1070 static int mlxsw_sp_fib_entry_update(struct mlxsw_sp *mlxsw_sp,
1071 				     struct mlxsw_sp_fib_entry *fib_entry);
1072 
1073 static void
1074 mlxsw_sp_ipip_entry_demote_decap(struct mlxsw_sp *mlxsw_sp,
1075 				 struct mlxsw_sp_ipip_entry *ipip_entry)
1076 {
1077 	struct mlxsw_sp_fib_entry *fib_entry = ipip_entry->decap_fib_entry;
1078 
1079 	mlxsw_sp_fib_entry_decap_fini(mlxsw_sp, fib_entry);
1080 	fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_TRAP;
1081 
1082 	mlxsw_sp_fib_entry_update(mlxsw_sp, fib_entry);
1083 }
1084 
1085 static void
1086 mlxsw_sp_ipip_entry_promote_decap(struct mlxsw_sp *mlxsw_sp,
1087 				  struct mlxsw_sp_ipip_entry *ipip_entry,
1088 				  struct mlxsw_sp_fib_entry *decap_fib_entry)
1089 {
1090 	if (mlxsw_sp_fib_entry_decap_init(mlxsw_sp, decap_fib_entry,
1091 					  ipip_entry))
1092 		return;
1093 	decap_fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_IPIP_DECAP;
1094 
1095 	if (mlxsw_sp_fib_entry_update(mlxsw_sp, decap_fib_entry))
1096 		mlxsw_sp_ipip_entry_demote_decap(mlxsw_sp, ipip_entry);
1097 }
1098 
1099 /* Given an IPIP entry, find the corresponding decap route. */
1100 static struct mlxsw_sp_fib_entry *
1101 mlxsw_sp_ipip_entry_find_decap(struct mlxsw_sp *mlxsw_sp,
1102 			       struct mlxsw_sp_ipip_entry *ipip_entry)
1103 {
1104 	static struct mlxsw_sp_fib_node *fib_node;
1105 	const struct mlxsw_sp_ipip_ops *ipip_ops;
1106 	struct mlxsw_sp_fib_entry *fib_entry;
1107 	unsigned char saddr_prefix_len;
1108 	union mlxsw_sp_l3addr saddr;
1109 	struct mlxsw_sp_fib *ul_fib;
1110 	struct mlxsw_sp_vr *ul_vr;
1111 	const void *saddrp;
1112 	size_t saddr_len;
1113 	u32 ul_tb_id;
1114 	u32 saddr4;
1115 
1116 	ipip_ops = mlxsw_sp->router->ipip_ops_arr[ipip_entry->ipipt];
1117 
1118 	ul_tb_id = mlxsw_sp_ipip_dev_ul_tb_id(ipip_entry->ol_dev);
1119 	ul_vr = mlxsw_sp_vr_find(mlxsw_sp, ul_tb_id);
1120 	if (!ul_vr)
1121 		return NULL;
1122 
1123 	ul_fib = mlxsw_sp_vr_fib(ul_vr, ipip_ops->ul_proto);
1124 	saddr = mlxsw_sp_ipip_netdev_saddr(ipip_ops->ul_proto,
1125 					   ipip_entry->ol_dev);
1126 
1127 	switch (ipip_ops->ul_proto) {
1128 	case MLXSW_SP_L3_PROTO_IPV4:
1129 		saddr4 = be32_to_cpu(saddr.addr4);
1130 		saddrp = &saddr4;
1131 		saddr_len = 4;
1132 		saddr_prefix_len = 32;
1133 		break;
1134 	case MLXSW_SP_L3_PROTO_IPV6:
1135 		WARN_ON(1);
1136 		return NULL;
1137 	}
1138 
1139 	fib_node = mlxsw_sp_fib_node_lookup(ul_fib, saddrp, saddr_len,
1140 					    saddr_prefix_len);
1141 	if (!fib_node || list_empty(&fib_node->entry_list))
1142 		return NULL;
1143 
1144 	fib_entry = list_first_entry(&fib_node->entry_list,
1145 				     struct mlxsw_sp_fib_entry, list);
1146 	if (fib_entry->type != MLXSW_SP_FIB_ENTRY_TYPE_TRAP)
1147 		return NULL;
1148 
1149 	return fib_entry;
1150 }
1151 
1152 static struct mlxsw_sp_ipip_entry *
1153 mlxsw_sp_ipip_entry_create(struct mlxsw_sp *mlxsw_sp,
1154 			   enum mlxsw_sp_ipip_type ipipt,
1155 			   struct net_device *ol_dev)
1156 {
1157 	struct mlxsw_sp_ipip_entry *ipip_entry;
1158 
1159 	ipip_entry = mlxsw_sp_ipip_entry_alloc(mlxsw_sp, ipipt, ol_dev);
1160 	if (IS_ERR(ipip_entry))
1161 		return ipip_entry;
1162 
1163 	list_add_tail(&ipip_entry->ipip_list_node,
1164 		      &mlxsw_sp->router->ipip_list);
1165 
1166 	return ipip_entry;
1167 }
1168 
1169 static void
1170 mlxsw_sp_ipip_entry_destroy(struct mlxsw_sp *mlxsw_sp,
1171 			    struct mlxsw_sp_ipip_entry *ipip_entry)
1172 {
1173 	list_del(&ipip_entry->ipip_list_node);
1174 	mlxsw_sp_ipip_entry_dealloc(ipip_entry);
1175 }
1176 
1177 static bool
1178 mlxsw_sp_ipip_entry_matches_decap(struct mlxsw_sp *mlxsw_sp,
1179 				  const struct net_device *ul_dev,
1180 				  enum mlxsw_sp_l3proto ul_proto,
1181 				  union mlxsw_sp_l3addr ul_dip,
1182 				  struct mlxsw_sp_ipip_entry *ipip_entry)
1183 {
1184 	u32 ul_tb_id = l3mdev_fib_table(ul_dev) ? : RT_TABLE_MAIN;
1185 	enum mlxsw_sp_ipip_type ipipt = ipip_entry->ipipt;
1186 	struct net_device *ipip_ul_dev;
1187 
1188 	if (mlxsw_sp->router->ipip_ops_arr[ipipt]->ul_proto != ul_proto)
1189 		return false;
1190 
1191 	ipip_ul_dev = __mlxsw_sp_ipip_netdev_ul_dev_get(ipip_entry->ol_dev);
1192 	return mlxsw_sp_ipip_entry_saddr_matches(mlxsw_sp, ul_proto, ul_dip,
1193 						 ul_tb_id, ipip_entry) &&
1194 	       (!ipip_ul_dev || ipip_ul_dev == ul_dev);
1195 }
1196 
1197 /* Given decap parameters, find the corresponding IPIP entry. */
1198 static struct mlxsw_sp_ipip_entry *
1199 mlxsw_sp_ipip_entry_find_by_decap(struct mlxsw_sp *mlxsw_sp,
1200 				  const struct net_device *ul_dev,
1201 				  enum mlxsw_sp_l3proto ul_proto,
1202 				  union mlxsw_sp_l3addr ul_dip)
1203 {
1204 	struct mlxsw_sp_ipip_entry *ipip_entry;
1205 
1206 	list_for_each_entry(ipip_entry, &mlxsw_sp->router->ipip_list,
1207 			    ipip_list_node)
1208 		if (mlxsw_sp_ipip_entry_matches_decap(mlxsw_sp, ul_dev,
1209 						      ul_proto, ul_dip,
1210 						      ipip_entry))
1211 			return ipip_entry;
1212 
1213 	return NULL;
1214 }
1215 
1216 static bool mlxsw_sp_netdev_ipip_type(const struct mlxsw_sp *mlxsw_sp,
1217 				      const struct net_device *dev,
1218 				      enum mlxsw_sp_ipip_type *p_type)
1219 {
1220 	struct mlxsw_sp_router *router = mlxsw_sp->router;
1221 	const struct mlxsw_sp_ipip_ops *ipip_ops;
1222 	enum mlxsw_sp_ipip_type ipipt;
1223 
1224 	for (ipipt = 0; ipipt < MLXSW_SP_IPIP_TYPE_MAX; ++ipipt) {
1225 		ipip_ops = router->ipip_ops_arr[ipipt];
1226 		if (dev->type == ipip_ops->dev_type) {
1227 			if (p_type)
1228 				*p_type = ipipt;
1229 			return true;
1230 		}
1231 	}
1232 	return false;
1233 }
1234 
1235 bool mlxsw_sp_netdev_is_ipip_ol(const struct mlxsw_sp *mlxsw_sp,
1236 				const struct net_device *dev)
1237 {
1238 	return mlxsw_sp_netdev_ipip_type(mlxsw_sp, dev, NULL);
1239 }
1240 
1241 static struct mlxsw_sp_ipip_entry *
1242 mlxsw_sp_ipip_entry_find_by_ol_dev(struct mlxsw_sp *mlxsw_sp,
1243 				   const struct net_device *ol_dev)
1244 {
1245 	struct mlxsw_sp_ipip_entry *ipip_entry;
1246 
1247 	list_for_each_entry(ipip_entry, &mlxsw_sp->router->ipip_list,
1248 			    ipip_list_node)
1249 		if (ipip_entry->ol_dev == ol_dev)
1250 			return ipip_entry;
1251 
1252 	return NULL;
1253 }
1254 
1255 static struct mlxsw_sp_ipip_entry *
1256 mlxsw_sp_ipip_entry_find_by_ul_dev(const struct mlxsw_sp *mlxsw_sp,
1257 				   const struct net_device *ul_dev,
1258 				   struct mlxsw_sp_ipip_entry *start)
1259 {
1260 	struct mlxsw_sp_ipip_entry *ipip_entry;
1261 
1262 	ipip_entry = list_prepare_entry(start, &mlxsw_sp->router->ipip_list,
1263 					ipip_list_node);
1264 	list_for_each_entry_continue(ipip_entry, &mlxsw_sp->router->ipip_list,
1265 				     ipip_list_node) {
1266 		struct net_device *ipip_ul_dev =
1267 			__mlxsw_sp_ipip_netdev_ul_dev_get(ipip_entry->ol_dev);
1268 
1269 		if (ipip_ul_dev == ul_dev)
1270 			return ipip_entry;
1271 	}
1272 
1273 	return NULL;
1274 }
1275 
1276 bool mlxsw_sp_netdev_is_ipip_ul(const struct mlxsw_sp *mlxsw_sp,
1277 				const struct net_device *dev)
1278 {
1279 	return mlxsw_sp_ipip_entry_find_by_ul_dev(mlxsw_sp, dev, NULL);
1280 }
1281 
1282 static bool mlxsw_sp_netdevice_ipip_can_offload(struct mlxsw_sp *mlxsw_sp,
1283 						const struct net_device *ol_dev,
1284 						enum mlxsw_sp_ipip_type ipipt)
1285 {
1286 	const struct mlxsw_sp_ipip_ops *ops
1287 		= mlxsw_sp->router->ipip_ops_arr[ipipt];
1288 
1289 	/* For deciding whether decap should be offloaded, we don't care about
1290 	 * overlay protocol, so ask whether either one is supported.
1291 	 */
1292 	return ops->can_offload(mlxsw_sp, ol_dev, MLXSW_SP_L3_PROTO_IPV4) ||
1293 	       ops->can_offload(mlxsw_sp, ol_dev, MLXSW_SP_L3_PROTO_IPV6);
1294 }
1295 
1296 static int mlxsw_sp_netdevice_ipip_ol_reg_event(struct mlxsw_sp *mlxsw_sp,
1297 						struct net_device *ol_dev)
1298 {
1299 	struct mlxsw_sp_ipip_entry *ipip_entry;
1300 	enum mlxsw_sp_l3proto ul_proto;
1301 	enum mlxsw_sp_ipip_type ipipt;
1302 	union mlxsw_sp_l3addr saddr;
1303 	u32 ul_tb_id;
1304 
1305 	mlxsw_sp_netdev_ipip_type(mlxsw_sp, ol_dev, &ipipt);
1306 	if (mlxsw_sp_netdevice_ipip_can_offload(mlxsw_sp, ol_dev, ipipt)) {
1307 		ul_tb_id = mlxsw_sp_ipip_dev_ul_tb_id(ol_dev);
1308 		ul_proto = mlxsw_sp->router->ipip_ops_arr[ipipt]->ul_proto;
1309 		saddr = mlxsw_sp_ipip_netdev_saddr(ul_proto, ol_dev);
1310 		if (!mlxsw_sp_ipip_demote_tunnel_by_saddr(mlxsw_sp, ul_proto,
1311 							  saddr, ul_tb_id,
1312 							  NULL)) {
1313 			ipip_entry = mlxsw_sp_ipip_entry_create(mlxsw_sp, ipipt,
1314 								ol_dev);
1315 			if (IS_ERR(ipip_entry))
1316 				return PTR_ERR(ipip_entry);
1317 		}
1318 	}
1319 
1320 	return 0;
1321 }
1322 
1323 static void mlxsw_sp_netdevice_ipip_ol_unreg_event(struct mlxsw_sp *mlxsw_sp,
1324 						   struct net_device *ol_dev)
1325 {
1326 	struct mlxsw_sp_ipip_entry *ipip_entry;
1327 
1328 	ipip_entry = mlxsw_sp_ipip_entry_find_by_ol_dev(mlxsw_sp, ol_dev);
1329 	if (ipip_entry)
1330 		mlxsw_sp_ipip_entry_destroy(mlxsw_sp, ipip_entry);
1331 }
1332 
1333 static void
1334 mlxsw_sp_ipip_entry_ol_up_event(struct mlxsw_sp *mlxsw_sp,
1335 				struct mlxsw_sp_ipip_entry *ipip_entry)
1336 {
1337 	struct mlxsw_sp_fib_entry *decap_fib_entry;
1338 
1339 	decap_fib_entry = mlxsw_sp_ipip_entry_find_decap(mlxsw_sp, ipip_entry);
1340 	if (decap_fib_entry)
1341 		mlxsw_sp_ipip_entry_promote_decap(mlxsw_sp, ipip_entry,
1342 						  decap_fib_entry);
1343 }
1344 
1345 static void mlxsw_sp_netdevice_ipip_ol_up_event(struct mlxsw_sp *mlxsw_sp,
1346 						struct net_device *ol_dev)
1347 {
1348 	struct mlxsw_sp_ipip_entry *ipip_entry;
1349 
1350 	ipip_entry = mlxsw_sp_ipip_entry_find_by_ol_dev(mlxsw_sp, ol_dev);
1351 	if (ipip_entry)
1352 		mlxsw_sp_ipip_entry_ol_up_event(mlxsw_sp, ipip_entry);
1353 }
1354 
1355 static void
1356 mlxsw_sp_ipip_entry_ol_down_event(struct mlxsw_sp *mlxsw_sp,
1357 				  struct mlxsw_sp_ipip_entry *ipip_entry)
1358 {
1359 	if (ipip_entry->decap_fib_entry)
1360 		mlxsw_sp_ipip_entry_demote_decap(mlxsw_sp, ipip_entry);
1361 }
1362 
1363 static void mlxsw_sp_netdevice_ipip_ol_down_event(struct mlxsw_sp *mlxsw_sp,
1364 						  struct net_device *ol_dev)
1365 {
1366 	struct mlxsw_sp_ipip_entry *ipip_entry;
1367 
1368 	ipip_entry = mlxsw_sp_ipip_entry_find_by_ol_dev(mlxsw_sp, ol_dev);
1369 	if (ipip_entry)
1370 		mlxsw_sp_ipip_entry_ol_down_event(mlxsw_sp, ipip_entry);
1371 }
1372 
1373 static void mlxsw_sp_nexthop_rif_update(struct mlxsw_sp *mlxsw_sp,
1374 					struct mlxsw_sp_rif *rif);
1375 static int
1376 mlxsw_sp_ipip_entry_ol_lb_update(struct mlxsw_sp *mlxsw_sp,
1377 				 struct mlxsw_sp_ipip_entry *ipip_entry,
1378 				 bool keep_encap,
1379 				 struct netlink_ext_ack *extack)
1380 {
1381 	struct mlxsw_sp_rif_ipip_lb *old_lb_rif = ipip_entry->ol_lb;
1382 	struct mlxsw_sp_rif_ipip_lb *new_lb_rif;
1383 
1384 	new_lb_rif = mlxsw_sp_ipip_ol_ipip_lb_create(mlxsw_sp,
1385 						     ipip_entry->ipipt,
1386 						     ipip_entry->ol_dev,
1387 						     extack);
1388 	if (IS_ERR(new_lb_rif))
1389 		return PTR_ERR(new_lb_rif);
1390 	ipip_entry->ol_lb = new_lb_rif;
1391 
1392 	if (keep_encap) {
1393 		list_splice_init(&old_lb_rif->common.nexthop_list,
1394 				 &new_lb_rif->common.nexthop_list);
1395 		mlxsw_sp_nexthop_rif_update(mlxsw_sp, &new_lb_rif->common);
1396 	}
1397 
1398 	mlxsw_sp_rif_destroy(&old_lb_rif->common);
1399 
1400 	return 0;
1401 }
1402 
1403 /**
1404  * Update the offload related to an IPIP entry. This always updates decap, and
1405  * in addition to that it also:
1406  * @recreate_loopback: recreates the associated loopback RIF
1407  * @keep_encap: updates next hops that use the tunnel netdevice. This is only
1408  *              relevant when recreate_loopback is true.
1409  * @update_nexthops: updates next hops, keeping the current loopback RIF. This
1410  *                   is only relevant when recreate_loopback is false.
1411  */
1412 int __mlxsw_sp_ipip_entry_update_tunnel(struct mlxsw_sp *mlxsw_sp,
1413 					struct mlxsw_sp_ipip_entry *ipip_entry,
1414 					bool recreate_loopback,
1415 					bool keep_encap,
1416 					bool update_nexthops,
1417 					struct netlink_ext_ack *extack)
1418 {
1419 	int err;
1420 
1421 	/* RIFs can't be edited, so to update loopback, we need to destroy and
1422 	 * recreate it. That creates a window of opportunity where RALUE and
1423 	 * RATR registers end up referencing a RIF that's already gone. RATRs
1424 	 * are handled in mlxsw_sp_ipip_entry_ol_lb_update(), and to take care
1425 	 * of RALUE, demote the decap route back.
1426 	 */
1427 	if (ipip_entry->decap_fib_entry)
1428 		mlxsw_sp_ipip_entry_demote_decap(mlxsw_sp, ipip_entry);
1429 
1430 	if (recreate_loopback) {
1431 		err = mlxsw_sp_ipip_entry_ol_lb_update(mlxsw_sp, ipip_entry,
1432 						       keep_encap, extack);
1433 		if (err)
1434 			return err;
1435 	} else if (update_nexthops) {
1436 		mlxsw_sp_nexthop_rif_update(mlxsw_sp,
1437 					    &ipip_entry->ol_lb->common);
1438 	}
1439 
1440 	if (ipip_entry->ol_dev->flags & IFF_UP)
1441 		mlxsw_sp_ipip_entry_ol_up_event(mlxsw_sp, ipip_entry);
1442 
1443 	return 0;
1444 }
1445 
1446 static int mlxsw_sp_netdevice_ipip_ol_vrf_event(struct mlxsw_sp *mlxsw_sp,
1447 						struct net_device *ol_dev,
1448 						struct netlink_ext_ack *extack)
1449 {
1450 	struct mlxsw_sp_ipip_entry *ipip_entry =
1451 		mlxsw_sp_ipip_entry_find_by_ol_dev(mlxsw_sp, ol_dev);
1452 
1453 	if (!ipip_entry)
1454 		return 0;
1455 	return __mlxsw_sp_ipip_entry_update_tunnel(mlxsw_sp, ipip_entry,
1456 						   true, false, false, extack);
1457 }
1458 
1459 static int
1460 mlxsw_sp_netdevice_ipip_ul_vrf_event(struct mlxsw_sp *mlxsw_sp,
1461 				     struct mlxsw_sp_ipip_entry *ipip_entry,
1462 				     struct net_device *ul_dev,
1463 				     struct netlink_ext_ack *extack)
1464 {
1465 	return __mlxsw_sp_ipip_entry_update_tunnel(mlxsw_sp, ipip_entry,
1466 						   true, true, false, extack);
1467 }
1468 
1469 static int
1470 mlxsw_sp_netdevice_ipip_ul_up_event(struct mlxsw_sp *mlxsw_sp,
1471 				    struct mlxsw_sp_ipip_entry *ipip_entry,
1472 				    struct net_device *ul_dev)
1473 {
1474 	return __mlxsw_sp_ipip_entry_update_tunnel(mlxsw_sp, ipip_entry,
1475 						   false, false, true, NULL);
1476 }
1477 
1478 static int
1479 mlxsw_sp_netdevice_ipip_ul_down_event(struct mlxsw_sp *mlxsw_sp,
1480 				      struct mlxsw_sp_ipip_entry *ipip_entry,
1481 				      struct net_device *ul_dev)
1482 {
1483 	/* A down underlay device causes encapsulated packets to not be
1484 	 * forwarded, but decap still works. So refresh next hops without
1485 	 * touching anything else.
1486 	 */
1487 	return __mlxsw_sp_ipip_entry_update_tunnel(mlxsw_sp, ipip_entry,
1488 						   false, false, true, NULL);
1489 }
1490 
1491 static int
1492 mlxsw_sp_netdevice_ipip_ol_change_event(struct mlxsw_sp *mlxsw_sp,
1493 					struct net_device *ol_dev,
1494 					struct netlink_ext_ack *extack)
1495 {
1496 	const struct mlxsw_sp_ipip_ops *ipip_ops;
1497 	struct mlxsw_sp_ipip_entry *ipip_entry;
1498 	int err;
1499 
1500 	ipip_entry = mlxsw_sp_ipip_entry_find_by_ol_dev(mlxsw_sp, ol_dev);
1501 	if (!ipip_entry)
1502 		/* A change might make a tunnel eligible for offloading, but
1503 		 * that is currently not implemented. What falls to slow path
1504 		 * stays there.
1505 		 */
1506 		return 0;
1507 
1508 	/* A change might make a tunnel not eligible for offloading. */
1509 	if (!mlxsw_sp_netdevice_ipip_can_offload(mlxsw_sp, ol_dev,
1510 						 ipip_entry->ipipt)) {
1511 		mlxsw_sp_ipip_entry_demote_tunnel(mlxsw_sp, ipip_entry);
1512 		return 0;
1513 	}
1514 
1515 	ipip_ops = mlxsw_sp->router->ipip_ops_arr[ipip_entry->ipipt];
1516 	err = ipip_ops->ol_netdev_change(mlxsw_sp, ipip_entry, extack);
1517 	return err;
1518 }
1519 
1520 void mlxsw_sp_ipip_entry_demote_tunnel(struct mlxsw_sp *mlxsw_sp,
1521 				       struct mlxsw_sp_ipip_entry *ipip_entry)
1522 {
1523 	struct net_device *ol_dev = ipip_entry->ol_dev;
1524 
1525 	if (ol_dev->flags & IFF_UP)
1526 		mlxsw_sp_ipip_entry_ol_down_event(mlxsw_sp, ipip_entry);
1527 	mlxsw_sp_ipip_entry_destroy(mlxsw_sp, ipip_entry);
1528 }
1529 
1530 /* The configuration where several tunnels have the same local address in the
1531  * same underlay table needs special treatment in the HW. That is currently not
1532  * implemented in the driver. This function finds and demotes the first tunnel
1533  * with a given source address, except the one passed in in the argument
1534  * `except'.
1535  */
1536 bool
1537 mlxsw_sp_ipip_demote_tunnel_by_saddr(struct mlxsw_sp *mlxsw_sp,
1538 				     enum mlxsw_sp_l3proto ul_proto,
1539 				     union mlxsw_sp_l3addr saddr,
1540 				     u32 ul_tb_id,
1541 				     const struct mlxsw_sp_ipip_entry *except)
1542 {
1543 	struct mlxsw_sp_ipip_entry *ipip_entry, *tmp;
1544 
1545 	list_for_each_entry_safe(ipip_entry, tmp, &mlxsw_sp->router->ipip_list,
1546 				 ipip_list_node) {
1547 		if (ipip_entry != except &&
1548 		    mlxsw_sp_ipip_entry_saddr_matches(mlxsw_sp, ul_proto, saddr,
1549 						      ul_tb_id, ipip_entry)) {
1550 			mlxsw_sp_ipip_entry_demote_tunnel(mlxsw_sp, ipip_entry);
1551 			return true;
1552 		}
1553 	}
1554 
1555 	return false;
1556 }
1557 
1558 static void mlxsw_sp_ipip_demote_tunnel_by_ul_netdev(struct mlxsw_sp *mlxsw_sp,
1559 						     struct net_device *ul_dev)
1560 {
1561 	struct mlxsw_sp_ipip_entry *ipip_entry, *tmp;
1562 
1563 	list_for_each_entry_safe(ipip_entry, tmp, &mlxsw_sp->router->ipip_list,
1564 				 ipip_list_node) {
1565 		struct net_device *ipip_ul_dev =
1566 			__mlxsw_sp_ipip_netdev_ul_dev_get(ipip_entry->ol_dev);
1567 
1568 		if (ipip_ul_dev == ul_dev)
1569 			mlxsw_sp_ipip_entry_demote_tunnel(mlxsw_sp, ipip_entry);
1570 	}
1571 }
1572 
1573 int mlxsw_sp_netdevice_ipip_ol_event(struct mlxsw_sp *mlxsw_sp,
1574 				     struct net_device *ol_dev,
1575 				     unsigned long event,
1576 				     struct netdev_notifier_info *info)
1577 {
1578 	struct netdev_notifier_changeupper_info *chup;
1579 	struct netlink_ext_ack *extack;
1580 
1581 	switch (event) {
1582 	case NETDEV_REGISTER:
1583 		return mlxsw_sp_netdevice_ipip_ol_reg_event(mlxsw_sp, ol_dev);
1584 	case NETDEV_UNREGISTER:
1585 		mlxsw_sp_netdevice_ipip_ol_unreg_event(mlxsw_sp, ol_dev);
1586 		return 0;
1587 	case NETDEV_UP:
1588 		mlxsw_sp_netdevice_ipip_ol_up_event(mlxsw_sp, ol_dev);
1589 		return 0;
1590 	case NETDEV_DOWN:
1591 		mlxsw_sp_netdevice_ipip_ol_down_event(mlxsw_sp, ol_dev);
1592 		return 0;
1593 	case NETDEV_CHANGEUPPER:
1594 		chup = container_of(info, typeof(*chup), info);
1595 		extack = info->extack;
1596 		if (netif_is_l3_master(chup->upper_dev))
1597 			return mlxsw_sp_netdevice_ipip_ol_vrf_event(mlxsw_sp,
1598 								    ol_dev,
1599 								    extack);
1600 		return 0;
1601 	case NETDEV_CHANGE:
1602 		extack = info->extack;
1603 		return mlxsw_sp_netdevice_ipip_ol_change_event(mlxsw_sp,
1604 							       ol_dev, extack);
1605 	}
1606 	return 0;
1607 }
1608 
1609 static int
1610 __mlxsw_sp_netdevice_ipip_ul_event(struct mlxsw_sp *mlxsw_sp,
1611 				   struct mlxsw_sp_ipip_entry *ipip_entry,
1612 				   struct net_device *ul_dev,
1613 				   unsigned long event,
1614 				   struct netdev_notifier_info *info)
1615 {
1616 	struct netdev_notifier_changeupper_info *chup;
1617 	struct netlink_ext_ack *extack;
1618 
1619 	switch (event) {
1620 	case NETDEV_CHANGEUPPER:
1621 		chup = container_of(info, typeof(*chup), info);
1622 		extack = info->extack;
1623 		if (netif_is_l3_master(chup->upper_dev))
1624 			return mlxsw_sp_netdevice_ipip_ul_vrf_event(mlxsw_sp,
1625 								    ipip_entry,
1626 								    ul_dev,
1627 								    extack);
1628 		break;
1629 
1630 	case NETDEV_UP:
1631 		return mlxsw_sp_netdevice_ipip_ul_up_event(mlxsw_sp, ipip_entry,
1632 							   ul_dev);
1633 	case NETDEV_DOWN:
1634 		return mlxsw_sp_netdevice_ipip_ul_down_event(mlxsw_sp,
1635 							     ipip_entry,
1636 							     ul_dev);
1637 	}
1638 	return 0;
1639 }
1640 
1641 int
1642 mlxsw_sp_netdevice_ipip_ul_event(struct mlxsw_sp *mlxsw_sp,
1643 				 struct net_device *ul_dev,
1644 				 unsigned long event,
1645 				 struct netdev_notifier_info *info)
1646 {
1647 	struct mlxsw_sp_ipip_entry *ipip_entry = NULL;
1648 	int err;
1649 
1650 	while ((ipip_entry = mlxsw_sp_ipip_entry_find_by_ul_dev(mlxsw_sp,
1651 								ul_dev,
1652 								ipip_entry))) {
1653 		err = __mlxsw_sp_netdevice_ipip_ul_event(mlxsw_sp, ipip_entry,
1654 							 ul_dev, event, info);
1655 		if (err) {
1656 			mlxsw_sp_ipip_demote_tunnel_by_ul_netdev(mlxsw_sp,
1657 								 ul_dev);
1658 			return err;
1659 		}
1660 	}
1661 
1662 	return 0;
1663 }
1664 
1665 struct mlxsw_sp_neigh_key {
1666 	struct neighbour *n;
1667 };
1668 
1669 struct mlxsw_sp_neigh_entry {
1670 	struct list_head rif_list_node;
1671 	struct rhash_head ht_node;
1672 	struct mlxsw_sp_neigh_key key;
1673 	u16 rif;
1674 	bool connected;
1675 	unsigned char ha[ETH_ALEN];
1676 	struct list_head nexthop_list; /* list of nexthops using
1677 					* this neigh entry
1678 					*/
1679 	struct list_head nexthop_neighs_list_node;
1680 	unsigned int counter_index;
1681 	bool counter_valid;
1682 };
1683 
1684 static const struct rhashtable_params mlxsw_sp_neigh_ht_params = {
1685 	.key_offset = offsetof(struct mlxsw_sp_neigh_entry, key),
1686 	.head_offset = offsetof(struct mlxsw_sp_neigh_entry, ht_node),
1687 	.key_len = sizeof(struct mlxsw_sp_neigh_key),
1688 };
1689 
1690 struct mlxsw_sp_neigh_entry *
1691 mlxsw_sp_rif_neigh_next(struct mlxsw_sp_rif *rif,
1692 			struct mlxsw_sp_neigh_entry *neigh_entry)
1693 {
1694 	if (!neigh_entry) {
1695 		if (list_empty(&rif->neigh_list))
1696 			return NULL;
1697 		else
1698 			return list_first_entry(&rif->neigh_list,
1699 						typeof(*neigh_entry),
1700 						rif_list_node);
1701 	}
1702 	if (list_is_last(&neigh_entry->rif_list_node, &rif->neigh_list))
1703 		return NULL;
1704 	return list_next_entry(neigh_entry, rif_list_node);
1705 }
1706 
1707 int mlxsw_sp_neigh_entry_type(struct mlxsw_sp_neigh_entry *neigh_entry)
1708 {
1709 	return neigh_entry->key.n->tbl->family;
1710 }
1711 
1712 unsigned char *
1713 mlxsw_sp_neigh_entry_ha(struct mlxsw_sp_neigh_entry *neigh_entry)
1714 {
1715 	return neigh_entry->ha;
1716 }
1717 
1718 u32 mlxsw_sp_neigh4_entry_dip(struct mlxsw_sp_neigh_entry *neigh_entry)
1719 {
1720 	struct neighbour *n;
1721 
1722 	n = neigh_entry->key.n;
1723 	return ntohl(*((__be32 *) n->primary_key));
1724 }
1725 
1726 struct in6_addr *
1727 mlxsw_sp_neigh6_entry_dip(struct mlxsw_sp_neigh_entry *neigh_entry)
1728 {
1729 	struct neighbour *n;
1730 
1731 	n = neigh_entry->key.n;
1732 	return (struct in6_addr *) &n->primary_key;
1733 }
1734 
1735 int mlxsw_sp_neigh_counter_get(struct mlxsw_sp *mlxsw_sp,
1736 			       struct mlxsw_sp_neigh_entry *neigh_entry,
1737 			       u64 *p_counter)
1738 {
1739 	if (!neigh_entry->counter_valid)
1740 		return -EINVAL;
1741 
1742 	return mlxsw_sp_flow_counter_get(mlxsw_sp, neigh_entry->counter_index,
1743 					 p_counter, NULL);
1744 }
1745 
1746 static struct mlxsw_sp_neigh_entry *
1747 mlxsw_sp_neigh_entry_alloc(struct mlxsw_sp *mlxsw_sp, struct neighbour *n,
1748 			   u16 rif)
1749 {
1750 	struct mlxsw_sp_neigh_entry *neigh_entry;
1751 
1752 	neigh_entry = kzalloc(sizeof(*neigh_entry), GFP_KERNEL);
1753 	if (!neigh_entry)
1754 		return NULL;
1755 
1756 	neigh_entry->key.n = n;
1757 	neigh_entry->rif = rif;
1758 	INIT_LIST_HEAD(&neigh_entry->nexthop_list);
1759 
1760 	return neigh_entry;
1761 }
1762 
1763 static void mlxsw_sp_neigh_entry_free(struct mlxsw_sp_neigh_entry *neigh_entry)
1764 {
1765 	kfree(neigh_entry);
1766 }
1767 
1768 static int
1769 mlxsw_sp_neigh_entry_insert(struct mlxsw_sp *mlxsw_sp,
1770 			    struct mlxsw_sp_neigh_entry *neigh_entry)
1771 {
1772 	return rhashtable_insert_fast(&mlxsw_sp->router->neigh_ht,
1773 				      &neigh_entry->ht_node,
1774 				      mlxsw_sp_neigh_ht_params);
1775 }
1776 
1777 static void
1778 mlxsw_sp_neigh_entry_remove(struct mlxsw_sp *mlxsw_sp,
1779 			    struct mlxsw_sp_neigh_entry *neigh_entry)
1780 {
1781 	rhashtable_remove_fast(&mlxsw_sp->router->neigh_ht,
1782 			       &neigh_entry->ht_node,
1783 			       mlxsw_sp_neigh_ht_params);
1784 }
1785 
1786 static bool
1787 mlxsw_sp_neigh_counter_should_alloc(struct mlxsw_sp *mlxsw_sp,
1788 				    struct mlxsw_sp_neigh_entry *neigh_entry)
1789 {
1790 	struct devlink *devlink;
1791 	const char *table_name;
1792 
1793 	switch (mlxsw_sp_neigh_entry_type(neigh_entry)) {
1794 	case AF_INET:
1795 		table_name = MLXSW_SP_DPIPE_TABLE_NAME_HOST4;
1796 		break;
1797 	case AF_INET6:
1798 		table_name = MLXSW_SP_DPIPE_TABLE_NAME_HOST6;
1799 		break;
1800 	default:
1801 		WARN_ON(1);
1802 		return false;
1803 	}
1804 
1805 	devlink = priv_to_devlink(mlxsw_sp->core);
1806 	return devlink_dpipe_table_counter_enabled(devlink, table_name);
1807 }
1808 
1809 static void
1810 mlxsw_sp_neigh_counter_alloc(struct mlxsw_sp *mlxsw_sp,
1811 			     struct mlxsw_sp_neigh_entry *neigh_entry)
1812 {
1813 	if (!mlxsw_sp_neigh_counter_should_alloc(mlxsw_sp, neigh_entry))
1814 		return;
1815 
1816 	if (mlxsw_sp_flow_counter_alloc(mlxsw_sp, &neigh_entry->counter_index))
1817 		return;
1818 
1819 	neigh_entry->counter_valid = true;
1820 }
1821 
1822 static void
1823 mlxsw_sp_neigh_counter_free(struct mlxsw_sp *mlxsw_sp,
1824 			    struct mlxsw_sp_neigh_entry *neigh_entry)
1825 {
1826 	if (!neigh_entry->counter_valid)
1827 		return;
1828 	mlxsw_sp_flow_counter_free(mlxsw_sp,
1829 				   neigh_entry->counter_index);
1830 	neigh_entry->counter_valid = false;
1831 }
1832 
1833 static struct mlxsw_sp_neigh_entry *
1834 mlxsw_sp_neigh_entry_create(struct mlxsw_sp *mlxsw_sp, struct neighbour *n)
1835 {
1836 	struct mlxsw_sp_neigh_entry *neigh_entry;
1837 	struct mlxsw_sp_rif *rif;
1838 	int err;
1839 
1840 	rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, n->dev);
1841 	if (!rif)
1842 		return ERR_PTR(-EINVAL);
1843 
1844 	neigh_entry = mlxsw_sp_neigh_entry_alloc(mlxsw_sp, n, rif->rif_index);
1845 	if (!neigh_entry)
1846 		return ERR_PTR(-ENOMEM);
1847 
1848 	err = mlxsw_sp_neigh_entry_insert(mlxsw_sp, neigh_entry);
1849 	if (err)
1850 		goto err_neigh_entry_insert;
1851 
1852 	mlxsw_sp_neigh_counter_alloc(mlxsw_sp, neigh_entry);
1853 	list_add(&neigh_entry->rif_list_node, &rif->neigh_list);
1854 
1855 	return neigh_entry;
1856 
1857 err_neigh_entry_insert:
1858 	mlxsw_sp_neigh_entry_free(neigh_entry);
1859 	return ERR_PTR(err);
1860 }
1861 
1862 static void
1863 mlxsw_sp_neigh_entry_destroy(struct mlxsw_sp *mlxsw_sp,
1864 			     struct mlxsw_sp_neigh_entry *neigh_entry)
1865 {
1866 	list_del(&neigh_entry->rif_list_node);
1867 	mlxsw_sp_neigh_counter_free(mlxsw_sp, neigh_entry);
1868 	mlxsw_sp_neigh_entry_remove(mlxsw_sp, neigh_entry);
1869 	mlxsw_sp_neigh_entry_free(neigh_entry);
1870 }
1871 
1872 static struct mlxsw_sp_neigh_entry *
1873 mlxsw_sp_neigh_entry_lookup(struct mlxsw_sp *mlxsw_sp, struct neighbour *n)
1874 {
1875 	struct mlxsw_sp_neigh_key key;
1876 
1877 	key.n = n;
1878 	return rhashtable_lookup_fast(&mlxsw_sp->router->neigh_ht,
1879 				      &key, mlxsw_sp_neigh_ht_params);
1880 }
1881 
1882 static void
1883 mlxsw_sp_router_neighs_update_interval_init(struct mlxsw_sp *mlxsw_sp)
1884 {
1885 	unsigned long interval;
1886 
1887 #if IS_ENABLED(CONFIG_IPV6)
1888 	interval = min_t(unsigned long,
1889 			 NEIGH_VAR(&arp_tbl.parms, DELAY_PROBE_TIME),
1890 			 NEIGH_VAR(&nd_tbl.parms, DELAY_PROBE_TIME));
1891 #else
1892 	interval = NEIGH_VAR(&arp_tbl.parms, DELAY_PROBE_TIME);
1893 #endif
1894 	mlxsw_sp->router->neighs_update.interval = jiffies_to_msecs(interval);
1895 }
1896 
1897 static void mlxsw_sp_router_neigh_ent_ipv4_process(struct mlxsw_sp *mlxsw_sp,
1898 						   char *rauhtd_pl,
1899 						   int ent_index)
1900 {
1901 	struct net_device *dev;
1902 	struct neighbour *n;
1903 	__be32 dipn;
1904 	u32 dip;
1905 	u16 rif;
1906 
1907 	mlxsw_reg_rauhtd_ent_ipv4_unpack(rauhtd_pl, ent_index, &rif, &dip);
1908 
1909 	if (!mlxsw_sp->router->rifs[rif]) {
1910 		dev_err_ratelimited(mlxsw_sp->bus_info->dev, "Incorrect RIF in neighbour entry\n");
1911 		return;
1912 	}
1913 
1914 	dipn = htonl(dip);
1915 	dev = mlxsw_sp->router->rifs[rif]->dev;
1916 	n = neigh_lookup(&arp_tbl, &dipn, dev);
1917 	if (!n) {
1918 		netdev_err(dev, "Failed to find matching neighbour for IP=%pI4h\n",
1919 			   &dip);
1920 		return;
1921 	}
1922 
1923 	netdev_dbg(dev, "Updating neighbour with IP=%pI4h\n", &dip);
1924 	neigh_event_send(n, NULL);
1925 	neigh_release(n);
1926 }
1927 
1928 #if IS_ENABLED(CONFIG_IPV6)
1929 static void mlxsw_sp_router_neigh_ent_ipv6_process(struct mlxsw_sp *mlxsw_sp,
1930 						   char *rauhtd_pl,
1931 						   int rec_index)
1932 {
1933 	struct net_device *dev;
1934 	struct neighbour *n;
1935 	struct in6_addr dip;
1936 	u16 rif;
1937 
1938 	mlxsw_reg_rauhtd_ent_ipv6_unpack(rauhtd_pl, rec_index, &rif,
1939 					 (char *) &dip);
1940 
1941 	if (!mlxsw_sp->router->rifs[rif]) {
1942 		dev_err_ratelimited(mlxsw_sp->bus_info->dev, "Incorrect RIF in neighbour entry\n");
1943 		return;
1944 	}
1945 
1946 	dev = mlxsw_sp->router->rifs[rif]->dev;
1947 	n = neigh_lookup(&nd_tbl, &dip, dev);
1948 	if (!n) {
1949 		netdev_err(dev, "Failed to find matching neighbour for IP=%pI6c\n",
1950 			   &dip);
1951 		return;
1952 	}
1953 
1954 	netdev_dbg(dev, "Updating neighbour with IP=%pI6c\n", &dip);
1955 	neigh_event_send(n, NULL);
1956 	neigh_release(n);
1957 }
1958 #else
1959 static void mlxsw_sp_router_neigh_ent_ipv6_process(struct mlxsw_sp *mlxsw_sp,
1960 						   char *rauhtd_pl,
1961 						   int rec_index)
1962 {
1963 }
1964 #endif
1965 
1966 static void mlxsw_sp_router_neigh_rec_ipv4_process(struct mlxsw_sp *mlxsw_sp,
1967 						   char *rauhtd_pl,
1968 						   int rec_index)
1969 {
1970 	u8 num_entries;
1971 	int i;
1972 
1973 	num_entries = mlxsw_reg_rauhtd_ipv4_rec_num_entries_get(rauhtd_pl,
1974 								rec_index);
1975 	/* Hardware starts counting at 0, so add 1. */
1976 	num_entries++;
1977 
1978 	/* Each record consists of several neighbour entries. */
1979 	for (i = 0; i < num_entries; i++) {
1980 		int ent_index;
1981 
1982 		ent_index = rec_index * MLXSW_REG_RAUHTD_IPV4_ENT_PER_REC + i;
1983 		mlxsw_sp_router_neigh_ent_ipv4_process(mlxsw_sp, rauhtd_pl,
1984 						       ent_index);
1985 	}
1986 
1987 }
1988 
1989 static void mlxsw_sp_router_neigh_rec_ipv6_process(struct mlxsw_sp *mlxsw_sp,
1990 						   char *rauhtd_pl,
1991 						   int rec_index)
1992 {
1993 	/* One record contains one entry. */
1994 	mlxsw_sp_router_neigh_ent_ipv6_process(mlxsw_sp, rauhtd_pl,
1995 					       rec_index);
1996 }
1997 
1998 static void mlxsw_sp_router_neigh_rec_process(struct mlxsw_sp *mlxsw_sp,
1999 					      char *rauhtd_pl, int rec_index)
2000 {
2001 	switch (mlxsw_reg_rauhtd_rec_type_get(rauhtd_pl, rec_index)) {
2002 	case MLXSW_REG_RAUHTD_TYPE_IPV4:
2003 		mlxsw_sp_router_neigh_rec_ipv4_process(mlxsw_sp, rauhtd_pl,
2004 						       rec_index);
2005 		break;
2006 	case MLXSW_REG_RAUHTD_TYPE_IPV6:
2007 		mlxsw_sp_router_neigh_rec_ipv6_process(mlxsw_sp, rauhtd_pl,
2008 						       rec_index);
2009 		break;
2010 	}
2011 }
2012 
2013 static bool mlxsw_sp_router_rauhtd_is_full(char *rauhtd_pl)
2014 {
2015 	u8 num_rec, last_rec_index, num_entries;
2016 
2017 	num_rec = mlxsw_reg_rauhtd_num_rec_get(rauhtd_pl);
2018 	last_rec_index = num_rec - 1;
2019 
2020 	if (num_rec < MLXSW_REG_RAUHTD_REC_MAX_NUM)
2021 		return false;
2022 	if (mlxsw_reg_rauhtd_rec_type_get(rauhtd_pl, last_rec_index) ==
2023 	    MLXSW_REG_RAUHTD_TYPE_IPV6)
2024 		return true;
2025 
2026 	num_entries = mlxsw_reg_rauhtd_ipv4_rec_num_entries_get(rauhtd_pl,
2027 								last_rec_index);
2028 	if (++num_entries == MLXSW_REG_RAUHTD_IPV4_ENT_PER_REC)
2029 		return true;
2030 	return false;
2031 }
2032 
2033 static int
2034 __mlxsw_sp_router_neighs_update_rauhtd(struct mlxsw_sp *mlxsw_sp,
2035 				       char *rauhtd_pl,
2036 				       enum mlxsw_reg_rauhtd_type type)
2037 {
2038 	int i, num_rec;
2039 	int err;
2040 
2041 	/* Make sure the neighbour's netdev isn't removed in the
2042 	 * process.
2043 	 */
2044 	rtnl_lock();
2045 	do {
2046 		mlxsw_reg_rauhtd_pack(rauhtd_pl, type);
2047 		err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(rauhtd),
2048 				      rauhtd_pl);
2049 		if (err) {
2050 			dev_err_ratelimited(mlxsw_sp->bus_info->dev, "Failed to dump neighbour table\n");
2051 			break;
2052 		}
2053 		num_rec = mlxsw_reg_rauhtd_num_rec_get(rauhtd_pl);
2054 		for (i = 0; i < num_rec; i++)
2055 			mlxsw_sp_router_neigh_rec_process(mlxsw_sp, rauhtd_pl,
2056 							  i);
2057 	} while (mlxsw_sp_router_rauhtd_is_full(rauhtd_pl));
2058 	rtnl_unlock();
2059 
2060 	return err;
2061 }
2062 
2063 static int mlxsw_sp_router_neighs_update_rauhtd(struct mlxsw_sp *mlxsw_sp)
2064 {
2065 	enum mlxsw_reg_rauhtd_type type;
2066 	char *rauhtd_pl;
2067 	int err;
2068 
2069 	rauhtd_pl = kmalloc(MLXSW_REG_RAUHTD_LEN, GFP_KERNEL);
2070 	if (!rauhtd_pl)
2071 		return -ENOMEM;
2072 
2073 	type = MLXSW_REG_RAUHTD_TYPE_IPV4;
2074 	err = __mlxsw_sp_router_neighs_update_rauhtd(mlxsw_sp, rauhtd_pl, type);
2075 	if (err)
2076 		goto out;
2077 
2078 	type = MLXSW_REG_RAUHTD_TYPE_IPV6;
2079 	err = __mlxsw_sp_router_neighs_update_rauhtd(mlxsw_sp, rauhtd_pl, type);
2080 out:
2081 	kfree(rauhtd_pl);
2082 	return err;
2083 }
2084 
2085 static void mlxsw_sp_router_neighs_update_nh(struct mlxsw_sp *mlxsw_sp)
2086 {
2087 	struct mlxsw_sp_neigh_entry *neigh_entry;
2088 
2089 	/* Take RTNL mutex here to prevent lists from changes */
2090 	rtnl_lock();
2091 	list_for_each_entry(neigh_entry, &mlxsw_sp->router->nexthop_neighs_list,
2092 			    nexthop_neighs_list_node)
2093 		/* If this neigh have nexthops, make the kernel think this neigh
2094 		 * is active regardless of the traffic.
2095 		 */
2096 		neigh_event_send(neigh_entry->key.n, NULL);
2097 	rtnl_unlock();
2098 }
2099 
2100 static void
2101 mlxsw_sp_router_neighs_update_work_schedule(struct mlxsw_sp *mlxsw_sp)
2102 {
2103 	unsigned long interval = mlxsw_sp->router->neighs_update.interval;
2104 
2105 	mlxsw_core_schedule_dw(&mlxsw_sp->router->neighs_update.dw,
2106 			       msecs_to_jiffies(interval));
2107 }
2108 
2109 static void mlxsw_sp_router_neighs_update_work(struct work_struct *work)
2110 {
2111 	struct mlxsw_sp_router *router;
2112 	int err;
2113 
2114 	router = container_of(work, struct mlxsw_sp_router,
2115 			      neighs_update.dw.work);
2116 	err = mlxsw_sp_router_neighs_update_rauhtd(router->mlxsw_sp);
2117 	if (err)
2118 		dev_err(router->mlxsw_sp->bus_info->dev, "Could not update kernel for neigh activity");
2119 
2120 	mlxsw_sp_router_neighs_update_nh(router->mlxsw_sp);
2121 
2122 	mlxsw_sp_router_neighs_update_work_schedule(router->mlxsw_sp);
2123 }
2124 
2125 static void mlxsw_sp_router_probe_unresolved_nexthops(struct work_struct *work)
2126 {
2127 	struct mlxsw_sp_neigh_entry *neigh_entry;
2128 	struct mlxsw_sp_router *router;
2129 
2130 	router = container_of(work, struct mlxsw_sp_router,
2131 			      nexthop_probe_dw.work);
2132 	/* Iterate over nexthop neighbours, find those who are unresolved and
2133 	 * send arp on them. This solves the chicken-egg problem when
2134 	 * the nexthop wouldn't get offloaded until the neighbor is resolved
2135 	 * but it wouldn't get resolved ever in case traffic is flowing in HW
2136 	 * using different nexthop.
2137 	 *
2138 	 * Take RTNL mutex here to prevent lists from changes.
2139 	 */
2140 	rtnl_lock();
2141 	list_for_each_entry(neigh_entry, &router->nexthop_neighs_list,
2142 			    nexthop_neighs_list_node)
2143 		if (!neigh_entry->connected)
2144 			neigh_event_send(neigh_entry->key.n, NULL);
2145 	rtnl_unlock();
2146 
2147 	mlxsw_core_schedule_dw(&router->nexthop_probe_dw,
2148 			       MLXSW_SP_UNRESOLVED_NH_PROBE_INTERVAL);
2149 }
2150 
2151 static void
2152 mlxsw_sp_nexthop_neigh_update(struct mlxsw_sp *mlxsw_sp,
2153 			      struct mlxsw_sp_neigh_entry *neigh_entry,
2154 			      bool removing);
2155 
2156 static enum mlxsw_reg_rauht_op mlxsw_sp_rauht_op(bool adding)
2157 {
2158 	return adding ? MLXSW_REG_RAUHT_OP_WRITE_ADD :
2159 			MLXSW_REG_RAUHT_OP_WRITE_DELETE;
2160 }
2161 
2162 static void
2163 mlxsw_sp_router_neigh_entry_op4(struct mlxsw_sp *mlxsw_sp,
2164 				struct mlxsw_sp_neigh_entry *neigh_entry,
2165 				enum mlxsw_reg_rauht_op op)
2166 {
2167 	struct neighbour *n = neigh_entry->key.n;
2168 	u32 dip = ntohl(*((__be32 *) n->primary_key));
2169 	char rauht_pl[MLXSW_REG_RAUHT_LEN];
2170 
2171 	mlxsw_reg_rauht_pack4(rauht_pl, op, neigh_entry->rif, neigh_entry->ha,
2172 			      dip);
2173 	if (neigh_entry->counter_valid)
2174 		mlxsw_reg_rauht_pack_counter(rauht_pl,
2175 					     neigh_entry->counter_index);
2176 	mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rauht), rauht_pl);
2177 }
2178 
2179 static void
2180 mlxsw_sp_router_neigh_entry_op6(struct mlxsw_sp *mlxsw_sp,
2181 				struct mlxsw_sp_neigh_entry *neigh_entry,
2182 				enum mlxsw_reg_rauht_op op)
2183 {
2184 	struct neighbour *n = neigh_entry->key.n;
2185 	char rauht_pl[MLXSW_REG_RAUHT_LEN];
2186 	const char *dip = n->primary_key;
2187 
2188 	mlxsw_reg_rauht_pack6(rauht_pl, op, neigh_entry->rif, neigh_entry->ha,
2189 			      dip);
2190 	if (neigh_entry->counter_valid)
2191 		mlxsw_reg_rauht_pack_counter(rauht_pl,
2192 					     neigh_entry->counter_index);
2193 	mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rauht), rauht_pl);
2194 }
2195 
2196 bool mlxsw_sp_neigh_ipv6_ignore(struct mlxsw_sp_neigh_entry *neigh_entry)
2197 {
2198 	struct neighbour *n = neigh_entry->key.n;
2199 
2200 	/* Packets with a link-local destination address are trapped
2201 	 * after LPM lookup and never reach the neighbour table, so
2202 	 * there is no need to program such neighbours to the device.
2203 	 */
2204 	if (ipv6_addr_type((struct in6_addr *) &n->primary_key) &
2205 	    IPV6_ADDR_LINKLOCAL)
2206 		return true;
2207 	return false;
2208 }
2209 
2210 static void
2211 mlxsw_sp_neigh_entry_update(struct mlxsw_sp *mlxsw_sp,
2212 			    struct mlxsw_sp_neigh_entry *neigh_entry,
2213 			    bool adding)
2214 {
2215 	if (!adding && !neigh_entry->connected)
2216 		return;
2217 	neigh_entry->connected = adding;
2218 	if (neigh_entry->key.n->tbl->family == AF_INET) {
2219 		mlxsw_sp_router_neigh_entry_op4(mlxsw_sp, neigh_entry,
2220 						mlxsw_sp_rauht_op(adding));
2221 	} else if (neigh_entry->key.n->tbl->family == AF_INET6) {
2222 		if (mlxsw_sp_neigh_ipv6_ignore(neigh_entry))
2223 			return;
2224 		mlxsw_sp_router_neigh_entry_op6(mlxsw_sp, neigh_entry,
2225 						mlxsw_sp_rauht_op(adding));
2226 	} else {
2227 		WARN_ON_ONCE(1);
2228 	}
2229 }
2230 
2231 void
2232 mlxsw_sp_neigh_entry_counter_update(struct mlxsw_sp *mlxsw_sp,
2233 				    struct mlxsw_sp_neigh_entry *neigh_entry,
2234 				    bool adding)
2235 {
2236 	if (adding)
2237 		mlxsw_sp_neigh_counter_alloc(mlxsw_sp, neigh_entry);
2238 	else
2239 		mlxsw_sp_neigh_counter_free(mlxsw_sp, neigh_entry);
2240 	mlxsw_sp_neigh_entry_update(mlxsw_sp, neigh_entry, true);
2241 }
2242 
2243 struct mlxsw_sp_netevent_work {
2244 	struct work_struct work;
2245 	struct mlxsw_sp *mlxsw_sp;
2246 	struct neighbour *n;
2247 };
2248 
2249 static void mlxsw_sp_router_neigh_event_work(struct work_struct *work)
2250 {
2251 	struct mlxsw_sp_netevent_work *net_work =
2252 		container_of(work, struct mlxsw_sp_netevent_work, work);
2253 	struct mlxsw_sp *mlxsw_sp = net_work->mlxsw_sp;
2254 	struct mlxsw_sp_neigh_entry *neigh_entry;
2255 	struct neighbour *n = net_work->n;
2256 	unsigned char ha[ETH_ALEN];
2257 	bool entry_connected;
2258 	u8 nud_state, dead;
2259 
2260 	/* If these parameters are changed after we release the lock,
2261 	 * then we are guaranteed to receive another event letting us
2262 	 * know about it.
2263 	 */
2264 	read_lock_bh(&n->lock);
2265 	memcpy(ha, n->ha, ETH_ALEN);
2266 	nud_state = n->nud_state;
2267 	dead = n->dead;
2268 	read_unlock_bh(&n->lock);
2269 
2270 	rtnl_lock();
2271 	entry_connected = nud_state & NUD_VALID && !dead;
2272 	neigh_entry = mlxsw_sp_neigh_entry_lookup(mlxsw_sp, n);
2273 	if (!entry_connected && !neigh_entry)
2274 		goto out;
2275 	if (!neigh_entry) {
2276 		neigh_entry = mlxsw_sp_neigh_entry_create(mlxsw_sp, n);
2277 		if (IS_ERR(neigh_entry))
2278 			goto out;
2279 	}
2280 
2281 	memcpy(neigh_entry->ha, ha, ETH_ALEN);
2282 	mlxsw_sp_neigh_entry_update(mlxsw_sp, neigh_entry, entry_connected);
2283 	mlxsw_sp_nexthop_neigh_update(mlxsw_sp, neigh_entry, !entry_connected);
2284 
2285 	if (!neigh_entry->connected && list_empty(&neigh_entry->nexthop_list))
2286 		mlxsw_sp_neigh_entry_destroy(mlxsw_sp, neigh_entry);
2287 
2288 out:
2289 	rtnl_unlock();
2290 	neigh_release(n);
2291 	kfree(net_work);
2292 }
2293 
2294 static int mlxsw_sp_mp_hash_init(struct mlxsw_sp *mlxsw_sp);
2295 
2296 static void mlxsw_sp_router_mp_hash_event_work(struct work_struct *work)
2297 {
2298 	struct mlxsw_sp_netevent_work *net_work =
2299 		container_of(work, struct mlxsw_sp_netevent_work, work);
2300 	struct mlxsw_sp *mlxsw_sp = net_work->mlxsw_sp;
2301 
2302 	mlxsw_sp_mp_hash_init(mlxsw_sp);
2303 	kfree(net_work);
2304 }
2305 
2306 static int mlxsw_sp_router_netevent_event(struct notifier_block *nb,
2307 					  unsigned long event, void *ptr)
2308 {
2309 	struct mlxsw_sp_netevent_work *net_work;
2310 	struct mlxsw_sp_port *mlxsw_sp_port;
2311 	struct mlxsw_sp_router *router;
2312 	struct mlxsw_sp *mlxsw_sp;
2313 	unsigned long interval;
2314 	struct neigh_parms *p;
2315 	struct neighbour *n;
2316 	struct net *net;
2317 
2318 	switch (event) {
2319 	case NETEVENT_DELAY_PROBE_TIME_UPDATE:
2320 		p = ptr;
2321 
2322 		/* We don't care about changes in the default table. */
2323 		if (!p->dev || (p->tbl->family != AF_INET &&
2324 				p->tbl->family != AF_INET6))
2325 			return NOTIFY_DONE;
2326 
2327 		/* We are in atomic context and can't take RTNL mutex,
2328 		 * so use RCU variant to walk the device chain.
2329 		 */
2330 		mlxsw_sp_port = mlxsw_sp_port_lower_dev_hold(p->dev);
2331 		if (!mlxsw_sp_port)
2332 			return NOTIFY_DONE;
2333 
2334 		mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
2335 		interval = jiffies_to_msecs(NEIGH_VAR(p, DELAY_PROBE_TIME));
2336 		mlxsw_sp->router->neighs_update.interval = interval;
2337 
2338 		mlxsw_sp_port_dev_put(mlxsw_sp_port);
2339 		break;
2340 	case NETEVENT_NEIGH_UPDATE:
2341 		n = ptr;
2342 
2343 		if (n->tbl->family != AF_INET && n->tbl->family != AF_INET6)
2344 			return NOTIFY_DONE;
2345 
2346 		mlxsw_sp_port = mlxsw_sp_port_lower_dev_hold(n->dev);
2347 		if (!mlxsw_sp_port)
2348 			return NOTIFY_DONE;
2349 
2350 		net_work = kzalloc(sizeof(*net_work), GFP_ATOMIC);
2351 		if (!net_work) {
2352 			mlxsw_sp_port_dev_put(mlxsw_sp_port);
2353 			return NOTIFY_BAD;
2354 		}
2355 
2356 		INIT_WORK(&net_work->work, mlxsw_sp_router_neigh_event_work);
2357 		net_work->mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
2358 		net_work->n = n;
2359 
2360 		/* Take a reference to ensure the neighbour won't be
2361 		 * destructed until we drop the reference in delayed
2362 		 * work.
2363 		 */
2364 		neigh_clone(n);
2365 		mlxsw_core_schedule_work(&net_work->work);
2366 		mlxsw_sp_port_dev_put(mlxsw_sp_port);
2367 		break;
2368 	case NETEVENT_MULTIPATH_HASH_UPDATE:
2369 		net = ptr;
2370 
2371 		if (!net_eq(net, &init_net))
2372 			return NOTIFY_DONE;
2373 
2374 		net_work = kzalloc(sizeof(*net_work), GFP_ATOMIC);
2375 		if (!net_work)
2376 			return NOTIFY_BAD;
2377 
2378 		router = container_of(nb, struct mlxsw_sp_router, netevent_nb);
2379 		INIT_WORK(&net_work->work, mlxsw_sp_router_mp_hash_event_work);
2380 		net_work->mlxsw_sp = router->mlxsw_sp;
2381 		mlxsw_core_schedule_work(&net_work->work);
2382 		break;
2383 	}
2384 
2385 	return NOTIFY_DONE;
2386 }
2387 
2388 static int mlxsw_sp_neigh_init(struct mlxsw_sp *mlxsw_sp)
2389 {
2390 	int err;
2391 
2392 	err = rhashtable_init(&mlxsw_sp->router->neigh_ht,
2393 			      &mlxsw_sp_neigh_ht_params);
2394 	if (err)
2395 		return err;
2396 
2397 	/* Initialize the polling interval according to the default
2398 	 * table.
2399 	 */
2400 	mlxsw_sp_router_neighs_update_interval_init(mlxsw_sp);
2401 
2402 	/* Create the delayed works for the activity_update */
2403 	INIT_DELAYED_WORK(&mlxsw_sp->router->neighs_update.dw,
2404 			  mlxsw_sp_router_neighs_update_work);
2405 	INIT_DELAYED_WORK(&mlxsw_sp->router->nexthop_probe_dw,
2406 			  mlxsw_sp_router_probe_unresolved_nexthops);
2407 	mlxsw_core_schedule_dw(&mlxsw_sp->router->neighs_update.dw, 0);
2408 	mlxsw_core_schedule_dw(&mlxsw_sp->router->nexthop_probe_dw, 0);
2409 	return 0;
2410 }
2411 
2412 static void mlxsw_sp_neigh_fini(struct mlxsw_sp *mlxsw_sp)
2413 {
2414 	cancel_delayed_work_sync(&mlxsw_sp->router->neighs_update.dw);
2415 	cancel_delayed_work_sync(&mlxsw_sp->router->nexthop_probe_dw);
2416 	rhashtable_destroy(&mlxsw_sp->router->neigh_ht);
2417 }
2418 
2419 static int mlxsw_sp_neigh_rif_flush(struct mlxsw_sp *mlxsw_sp,
2420 				    const struct mlxsw_sp_rif *rif)
2421 {
2422 	char rauht_pl[MLXSW_REG_RAUHT_LEN];
2423 
2424 	mlxsw_reg_rauht_pack(rauht_pl, MLXSW_REG_RAUHT_OP_WRITE_DELETE_ALL,
2425 			     rif->rif_index, rif->addr);
2426 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rauht), rauht_pl);
2427 }
2428 
2429 static void mlxsw_sp_neigh_rif_gone_sync(struct mlxsw_sp *mlxsw_sp,
2430 					 struct mlxsw_sp_rif *rif)
2431 {
2432 	struct mlxsw_sp_neigh_entry *neigh_entry, *tmp;
2433 
2434 	mlxsw_sp_neigh_rif_flush(mlxsw_sp, rif);
2435 	list_for_each_entry_safe(neigh_entry, tmp, &rif->neigh_list,
2436 				 rif_list_node)
2437 		mlxsw_sp_neigh_entry_destroy(mlxsw_sp, neigh_entry);
2438 }
2439 
2440 enum mlxsw_sp_nexthop_type {
2441 	MLXSW_SP_NEXTHOP_TYPE_ETH,
2442 	MLXSW_SP_NEXTHOP_TYPE_IPIP,
2443 };
2444 
2445 struct mlxsw_sp_nexthop_key {
2446 	struct fib_nh *fib_nh;
2447 };
2448 
2449 struct mlxsw_sp_nexthop {
2450 	struct list_head neigh_list_node; /* member of neigh entry list */
2451 	struct list_head rif_list_node;
2452 	struct list_head router_list_node;
2453 	struct mlxsw_sp_nexthop_group *nh_grp; /* pointer back to the group
2454 						* this belongs to
2455 						*/
2456 	struct rhash_head ht_node;
2457 	struct mlxsw_sp_nexthop_key key;
2458 	unsigned char gw_addr[sizeof(struct in6_addr)];
2459 	int ifindex;
2460 	int nh_weight;
2461 	int norm_nh_weight;
2462 	int num_adj_entries;
2463 	struct mlxsw_sp_rif *rif;
2464 	u8 should_offload:1, /* set indicates this neigh is connected and
2465 			      * should be put to KVD linear area of this group.
2466 			      */
2467 	   offloaded:1, /* set in case the neigh is actually put into
2468 			 * KVD linear area of this group.
2469 			 */
2470 	   update:1; /* set indicates that MAC of this neigh should be
2471 		      * updated in HW
2472 		      */
2473 	enum mlxsw_sp_nexthop_type type;
2474 	union {
2475 		struct mlxsw_sp_neigh_entry *neigh_entry;
2476 		struct mlxsw_sp_ipip_entry *ipip_entry;
2477 	};
2478 	unsigned int counter_index;
2479 	bool counter_valid;
2480 };
2481 
2482 struct mlxsw_sp_nexthop_group {
2483 	void *priv;
2484 	struct rhash_head ht_node;
2485 	struct list_head fib_list; /* list of fib entries that use this group */
2486 	struct neigh_table *neigh_tbl;
2487 	u8 adj_index_valid:1,
2488 	   gateway:1; /* routes using the group use a gateway */
2489 	u32 adj_index;
2490 	u16 ecmp_size;
2491 	u16 count;
2492 	int sum_norm_weight;
2493 	struct mlxsw_sp_nexthop nexthops[0];
2494 #define nh_rif	nexthops[0].rif
2495 };
2496 
2497 void mlxsw_sp_nexthop_counter_alloc(struct mlxsw_sp *mlxsw_sp,
2498 				    struct mlxsw_sp_nexthop *nh)
2499 {
2500 	struct devlink *devlink;
2501 
2502 	devlink = priv_to_devlink(mlxsw_sp->core);
2503 	if (!devlink_dpipe_table_counter_enabled(devlink,
2504 						 MLXSW_SP_DPIPE_TABLE_NAME_ADJ))
2505 		return;
2506 
2507 	if (mlxsw_sp_flow_counter_alloc(mlxsw_sp, &nh->counter_index))
2508 		return;
2509 
2510 	nh->counter_valid = true;
2511 }
2512 
2513 void mlxsw_sp_nexthop_counter_free(struct mlxsw_sp *mlxsw_sp,
2514 				   struct mlxsw_sp_nexthop *nh)
2515 {
2516 	if (!nh->counter_valid)
2517 		return;
2518 	mlxsw_sp_flow_counter_free(mlxsw_sp, nh->counter_index);
2519 	nh->counter_valid = false;
2520 }
2521 
2522 int mlxsw_sp_nexthop_counter_get(struct mlxsw_sp *mlxsw_sp,
2523 				 struct mlxsw_sp_nexthop *nh, u64 *p_counter)
2524 {
2525 	if (!nh->counter_valid)
2526 		return -EINVAL;
2527 
2528 	return mlxsw_sp_flow_counter_get(mlxsw_sp, nh->counter_index,
2529 					 p_counter, NULL);
2530 }
2531 
2532 struct mlxsw_sp_nexthop *mlxsw_sp_nexthop_next(struct mlxsw_sp_router *router,
2533 					       struct mlxsw_sp_nexthop *nh)
2534 {
2535 	if (!nh) {
2536 		if (list_empty(&router->nexthop_list))
2537 			return NULL;
2538 		else
2539 			return list_first_entry(&router->nexthop_list,
2540 						typeof(*nh), router_list_node);
2541 	}
2542 	if (list_is_last(&nh->router_list_node, &router->nexthop_list))
2543 		return NULL;
2544 	return list_next_entry(nh, router_list_node);
2545 }
2546 
2547 bool mlxsw_sp_nexthop_offload(struct mlxsw_sp_nexthop *nh)
2548 {
2549 	return nh->offloaded;
2550 }
2551 
2552 unsigned char *mlxsw_sp_nexthop_ha(struct mlxsw_sp_nexthop *nh)
2553 {
2554 	if (!nh->offloaded)
2555 		return NULL;
2556 	return nh->neigh_entry->ha;
2557 }
2558 
2559 int mlxsw_sp_nexthop_indexes(struct mlxsw_sp_nexthop *nh, u32 *p_adj_index,
2560 			     u32 *p_adj_size, u32 *p_adj_hash_index)
2561 {
2562 	struct mlxsw_sp_nexthop_group *nh_grp = nh->nh_grp;
2563 	u32 adj_hash_index = 0;
2564 	int i;
2565 
2566 	if (!nh->offloaded || !nh_grp->adj_index_valid)
2567 		return -EINVAL;
2568 
2569 	*p_adj_index = nh_grp->adj_index;
2570 	*p_adj_size = nh_grp->ecmp_size;
2571 
2572 	for (i = 0; i < nh_grp->count; i++) {
2573 		struct mlxsw_sp_nexthop *nh_iter = &nh_grp->nexthops[i];
2574 
2575 		if (nh_iter == nh)
2576 			break;
2577 		if (nh_iter->offloaded)
2578 			adj_hash_index += nh_iter->num_adj_entries;
2579 	}
2580 
2581 	*p_adj_hash_index = adj_hash_index;
2582 	return 0;
2583 }
2584 
2585 struct mlxsw_sp_rif *mlxsw_sp_nexthop_rif(struct mlxsw_sp_nexthop *nh)
2586 {
2587 	return nh->rif;
2588 }
2589 
2590 bool mlxsw_sp_nexthop_group_has_ipip(struct mlxsw_sp_nexthop *nh)
2591 {
2592 	struct mlxsw_sp_nexthop_group *nh_grp = nh->nh_grp;
2593 	int i;
2594 
2595 	for (i = 0; i < nh_grp->count; i++) {
2596 		struct mlxsw_sp_nexthop *nh_iter = &nh_grp->nexthops[i];
2597 
2598 		if (nh_iter->type == MLXSW_SP_NEXTHOP_TYPE_IPIP)
2599 			return true;
2600 	}
2601 	return false;
2602 }
2603 
2604 static struct fib_info *
2605 mlxsw_sp_nexthop4_group_fi(const struct mlxsw_sp_nexthop_group *nh_grp)
2606 {
2607 	return nh_grp->priv;
2608 }
2609 
2610 struct mlxsw_sp_nexthop_group_cmp_arg {
2611 	enum mlxsw_sp_l3proto proto;
2612 	union {
2613 		struct fib_info *fi;
2614 		struct mlxsw_sp_fib6_entry *fib6_entry;
2615 	};
2616 };
2617 
2618 static bool
2619 mlxsw_sp_nexthop6_group_has_nexthop(const struct mlxsw_sp_nexthop_group *nh_grp,
2620 				    const struct in6_addr *gw, int ifindex)
2621 {
2622 	int i;
2623 
2624 	for (i = 0; i < nh_grp->count; i++) {
2625 		const struct mlxsw_sp_nexthop *nh;
2626 
2627 		nh = &nh_grp->nexthops[i];
2628 		if (nh->ifindex == ifindex &&
2629 		    ipv6_addr_equal(gw, (struct in6_addr *) nh->gw_addr))
2630 			return true;
2631 	}
2632 
2633 	return false;
2634 }
2635 
2636 static bool
2637 mlxsw_sp_nexthop6_group_cmp(const struct mlxsw_sp_nexthop_group *nh_grp,
2638 			    const struct mlxsw_sp_fib6_entry *fib6_entry)
2639 {
2640 	struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
2641 
2642 	if (nh_grp->count != fib6_entry->nrt6)
2643 		return false;
2644 
2645 	list_for_each_entry(mlxsw_sp_rt6, &fib6_entry->rt6_list, list) {
2646 		struct in6_addr *gw;
2647 		int ifindex;
2648 
2649 		ifindex = mlxsw_sp_rt6->rt->dst.dev->ifindex;
2650 		gw = &mlxsw_sp_rt6->rt->rt6i_gateway;
2651 		if (!mlxsw_sp_nexthop6_group_has_nexthop(nh_grp, gw, ifindex))
2652 			return false;
2653 	}
2654 
2655 	return true;
2656 }
2657 
2658 static int
2659 mlxsw_sp_nexthop_group_cmp(struct rhashtable_compare_arg *arg, const void *ptr)
2660 {
2661 	const struct mlxsw_sp_nexthop_group_cmp_arg *cmp_arg = arg->key;
2662 	const struct mlxsw_sp_nexthop_group *nh_grp = ptr;
2663 
2664 	switch (cmp_arg->proto) {
2665 	case MLXSW_SP_L3_PROTO_IPV4:
2666 		return cmp_arg->fi != mlxsw_sp_nexthop4_group_fi(nh_grp);
2667 	case MLXSW_SP_L3_PROTO_IPV6:
2668 		return !mlxsw_sp_nexthop6_group_cmp(nh_grp,
2669 						    cmp_arg->fib6_entry);
2670 	default:
2671 		WARN_ON(1);
2672 		return 1;
2673 	}
2674 }
2675 
2676 static int
2677 mlxsw_sp_nexthop_group_type(const struct mlxsw_sp_nexthop_group *nh_grp)
2678 {
2679 	return nh_grp->neigh_tbl->family;
2680 }
2681 
2682 static u32 mlxsw_sp_nexthop_group_hash_obj(const void *data, u32 len, u32 seed)
2683 {
2684 	const struct mlxsw_sp_nexthop_group *nh_grp = data;
2685 	const struct mlxsw_sp_nexthop *nh;
2686 	struct fib_info *fi;
2687 	unsigned int val;
2688 	int i;
2689 
2690 	switch (mlxsw_sp_nexthop_group_type(nh_grp)) {
2691 	case AF_INET:
2692 		fi = mlxsw_sp_nexthop4_group_fi(nh_grp);
2693 		return jhash(&fi, sizeof(fi), seed);
2694 	case AF_INET6:
2695 		val = nh_grp->count;
2696 		for (i = 0; i < nh_grp->count; i++) {
2697 			nh = &nh_grp->nexthops[i];
2698 			val ^= nh->ifindex;
2699 		}
2700 		return jhash(&val, sizeof(val), seed);
2701 	default:
2702 		WARN_ON(1);
2703 		return 0;
2704 	}
2705 }
2706 
2707 static u32
2708 mlxsw_sp_nexthop6_group_hash(struct mlxsw_sp_fib6_entry *fib6_entry, u32 seed)
2709 {
2710 	unsigned int val = fib6_entry->nrt6;
2711 	struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
2712 	struct net_device *dev;
2713 
2714 	list_for_each_entry(mlxsw_sp_rt6, &fib6_entry->rt6_list, list) {
2715 		dev = mlxsw_sp_rt6->rt->dst.dev;
2716 		val ^= dev->ifindex;
2717 	}
2718 
2719 	return jhash(&val, sizeof(val), seed);
2720 }
2721 
2722 static u32
2723 mlxsw_sp_nexthop_group_hash(const void *data, u32 len, u32 seed)
2724 {
2725 	const struct mlxsw_sp_nexthop_group_cmp_arg *cmp_arg = data;
2726 
2727 	switch (cmp_arg->proto) {
2728 	case MLXSW_SP_L3_PROTO_IPV4:
2729 		return jhash(&cmp_arg->fi, sizeof(cmp_arg->fi), seed);
2730 	case MLXSW_SP_L3_PROTO_IPV6:
2731 		return mlxsw_sp_nexthop6_group_hash(cmp_arg->fib6_entry, seed);
2732 	default:
2733 		WARN_ON(1);
2734 		return 0;
2735 	}
2736 }
2737 
2738 static const struct rhashtable_params mlxsw_sp_nexthop_group_ht_params = {
2739 	.head_offset = offsetof(struct mlxsw_sp_nexthop_group, ht_node),
2740 	.hashfn	     = mlxsw_sp_nexthop_group_hash,
2741 	.obj_hashfn  = mlxsw_sp_nexthop_group_hash_obj,
2742 	.obj_cmpfn   = mlxsw_sp_nexthop_group_cmp,
2743 };
2744 
2745 static int mlxsw_sp_nexthop_group_insert(struct mlxsw_sp *mlxsw_sp,
2746 					 struct mlxsw_sp_nexthop_group *nh_grp)
2747 {
2748 	if (mlxsw_sp_nexthop_group_type(nh_grp) == AF_INET6 &&
2749 	    !nh_grp->gateway)
2750 		return 0;
2751 
2752 	return rhashtable_insert_fast(&mlxsw_sp->router->nexthop_group_ht,
2753 				      &nh_grp->ht_node,
2754 				      mlxsw_sp_nexthop_group_ht_params);
2755 }
2756 
2757 static void mlxsw_sp_nexthop_group_remove(struct mlxsw_sp *mlxsw_sp,
2758 					  struct mlxsw_sp_nexthop_group *nh_grp)
2759 {
2760 	if (mlxsw_sp_nexthop_group_type(nh_grp) == AF_INET6 &&
2761 	    !nh_grp->gateway)
2762 		return;
2763 
2764 	rhashtable_remove_fast(&mlxsw_sp->router->nexthop_group_ht,
2765 			       &nh_grp->ht_node,
2766 			       mlxsw_sp_nexthop_group_ht_params);
2767 }
2768 
2769 static struct mlxsw_sp_nexthop_group *
2770 mlxsw_sp_nexthop4_group_lookup(struct mlxsw_sp *mlxsw_sp,
2771 			       struct fib_info *fi)
2772 {
2773 	struct mlxsw_sp_nexthop_group_cmp_arg cmp_arg;
2774 
2775 	cmp_arg.proto = MLXSW_SP_L3_PROTO_IPV4;
2776 	cmp_arg.fi = fi;
2777 	return rhashtable_lookup_fast(&mlxsw_sp->router->nexthop_group_ht,
2778 				      &cmp_arg,
2779 				      mlxsw_sp_nexthop_group_ht_params);
2780 }
2781 
2782 static struct mlxsw_sp_nexthop_group *
2783 mlxsw_sp_nexthop6_group_lookup(struct mlxsw_sp *mlxsw_sp,
2784 			       struct mlxsw_sp_fib6_entry *fib6_entry)
2785 {
2786 	struct mlxsw_sp_nexthop_group_cmp_arg cmp_arg;
2787 
2788 	cmp_arg.proto = MLXSW_SP_L3_PROTO_IPV6;
2789 	cmp_arg.fib6_entry = fib6_entry;
2790 	return rhashtable_lookup_fast(&mlxsw_sp->router->nexthop_group_ht,
2791 				      &cmp_arg,
2792 				      mlxsw_sp_nexthop_group_ht_params);
2793 }
2794 
2795 static const struct rhashtable_params mlxsw_sp_nexthop_ht_params = {
2796 	.key_offset = offsetof(struct mlxsw_sp_nexthop, key),
2797 	.head_offset = offsetof(struct mlxsw_sp_nexthop, ht_node),
2798 	.key_len = sizeof(struct mlxsw_sp_nexthop_key),
2799 };
2800 
2801 static int mlxsw_sp_nexthop_insert(struct mlxsw_sp *mlxsw_sp,
2802 				   struct mlxsw_sp_nexthop *nh)
2803 {
2804 	return rhashtable_insert_fast(&mlxsw_sp->router->nexthop_ht,
2805 				      &nh->ht_node, mlxsw_sp_nexthop_ht_params);
2806 }
2807 
2808 static void mlxsw_sp_nexthop_remove(struct mlxsw_sp *mlxsw_sp,
2809 				    struct mlxsw_sp_nexthop *nh)
2810 {
2811 	rhashtable_remove_fast(&mlxsw_sp->router->nexthop_ht, &nh->ht_node,
2812 			       mlxsw_sp_nexthop_ht_params);
2813 }
2814 
2815 static struct mlxsw_sp_nexthop *
2816 mlxsw_sp_nexthop_lookup(struct mlxsw_sp *mlxsw_sp,
2817 			struct mlxsw_sp_nexthop_key key)
2818 {
2819 	return rhashtable_lookup_fast(&mlxsw_sp->router->nexthop_ht, &key,
2820 				      mlxsw_sp_nexthop_ht_params);
2821 }
2822 
2823 static int mlxsw_sp_adj_index_mass_update_vr(struct mlxsw_sp *mlxsw_sp,
2824 					     const struct mlxsw_sp_fib *fib,
2825 					     u32 adj_index, u16 ecmp_size,
2826 					     u32 new_adj_index,
2827 					     u16 new_ecmp_size)
2828 {
2829 	char raleu_pl[MLXSW_REG_RALEU_LEN];
2830 
2831 	mlxsw_reg_raleu_pack(raleu_pl,
2832 			     (enum mlxsw_reg_ralxx_protocol) fib->proto,
2833 			     fib->vr->id, adj_index, ecmp_size, new_adj_index,
2834 			     new_ecmp_size);
2835 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(raleu), raleu_pl);
2836 }
2837 
2838 static int mlxsw_sp_adj_index_mass_update(struct mlxsw_sp *mlxsw_sp,
2839 					  struct mlxsw_sp_nexthop_group *nh_grp,
2840 					  u32 old_adj_index, u16 old_ecmp_size)
2841 {
2842 	struct mlxsw_sp_fib_entry *fib_entry;
2843 	struct mlxsw_sp_fib *fib = NULL;
2844 	int err;
2845 
2846 	list_for_each_entry(fib_entry, &nh_grp->fib_list, nexthop_group_node) {
2847 		if (fib == fib_entry->fib_node->fib)
2848 			continue;
2849 		fib = fib_entry->fib_node->fib;
2850 		err = mlxsw_sp_adj_index_mass_update_vr(mlxsw_sp, fib,
2851 							old_adj_index,
2852 							old_ecmp_size,
2853 							nh_grp->adj_index,
2854 							nh_grp->ecmp_size);
2855 		if (err)
2856 			return err;
2857 	}
2858 	return 0;
2859 }
2860 
2861 static int __mlxsw_sp_nexthop_update(struct mlxsw_sp *mlxsw_sp, u32 adj_index,
2862 				     struct mlxsw_sp_nexthop *nh)
2863 {
2864 	struct mlxsw_sp_neigh_entry *neigh_entry = nh->neigh_entry;
2865 	char ratr_pl[MLXSW_REG_RATR_LEN];
2866 
2867 	mlxsw_reg_ratr_pack(ratr_pl, MLXSW_REG_RATR_OP_WRITE_WRITE_ENTRY,
2868 			    true, MLXSW_REG_RATR_TYPE_ETHERNET,
2869 			    adj_index, neigh_entry->rif);
2870 	mlxsw_reg_ratr_eth_entry_pack(ratr_pl, neigh_entry->ha);
2871 	if (nh->counter_valid)
2872 		mlxsw_reg_ratr_counter_pack(ratr_pl, nh->counter_index, true);
2873 	else
2874 		mlxsw_reg_ratr_counter_pack(ratr_pl, 0, false);
2875 
2876 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ratr), ratr_pl);
2877 }
2878 
2879 int mlxsw_sp_nexthop_update(struct mlxsw_sp *mlxsw_sp, u32 adj_index,
2880 			    struct mlxsw_sp_nexthop *nh)
2881 {
2882 	int i;
2883 
2884 	for (i = 0; i < nh->num_adj_entries; i++) {
2885 		int err;
2886 
2887 		err = __mlxsw_sp_nexthop_update(mlxsw_sp, adj_index + i, nh);
2888 		if (err)
2889 			return err;
2890 	}
2891 
2892 	return 0;
2893 }
2894 
2895 static int __mlxsw_sp_nexthop_ipip_update(struct mlxsw_sp *mlxsw_sp,
2896 					  u32 adj_index,
2897 					  struct mlxsw_sp_nexthop *nh)
2898 {
2899 	const struct mlxsw_sp_ipip_ops *ipip_ops;
2900 
2901 	ipip_ops = mlxsw_sp->router->ipip_ops_arr[nh->ipip_entry->ipipt];
2902 	return ipip_ops->nexthop_update(mlxsw_sp, adj_index, nh->ipip_entry);
2903 }
2904 
2905 static int mlxsw_sp_nexthop_ipip_update(struct mlxsw_sp *mlxsw_sp,
2906 					u32 adj_index,
2907 					struct mlxsw_sp_nexthop *nh)
2908 {
2909 	int i;
2910 
2911 	for (i = 0; i < nh->num_adj_entries; i++) {
2912 		int err;
2913 
2914 		err = __mlxsw_sp_nexthop_ipip_update(mlxsw_sp, adj_index + i,
2915 						     nh);
2916 		if (err)
2917 			return err;
2918 	}
2919 
2920 	return 0;
2921 }
2922 
2923 static int
2924 mlxsw_sp_nexthop_group_update(struct mlxsw_sp *mlxsw_sp,
2925 			      struct mlxsw_sp_nexthop_group *nh_grp,
2926 			      bool reallocate)
2927 {
2928 	u32 adj_index = nh_grp->adj_index; /* base */
2929 	struct mlxsw_sp_nexthop *nh;
2930 	int i;
2931 	int err;
2932 
2933 	for (i = 0; i < nh_grp->count; i++) {
2934 		nh = &nh_grp->nexthops[i];
2935 
2936 		if (!nh->should_offload) {
2937 			nh->offloaded = 0;
2938 			continue;
2939 		}
2940 
2941 		if (nh->update || reallocate) {
2942 			switch (nh->type) {
2943 			case MLXSW_SP_NEXTHOP_TYPE_ETH:
2944 				err = mlxsw_sp_nexthop_update
2945 					    (mlxsw_sp, adj_index, nh);
2946 				break;
2947 			case MLXSW_SP_NEXTHOP_TYPE_IPIP:
2948 				err = mlxsw_sp_nexthop_ipip_update
2949 					    (mlxsw_sp, adj_index, nh);
2950 				break;
2951 			}
2952 			if (err)
2953 				return err;
2954 			nh->update = 0;
2955 			nh->offloaded = 1;
2956 		}
2957 		adj_index += nh->num_adj_entries;
2958 	}
2959 	return 0;
2960 }
2961 
2962 static bool
2963 mlxsw_sp_fib_node_entry_is_first(const struct mlxsw_sp_fib_node *fib_node,
2964 				 const struct mlxsw_sp_fib_entry *fib_entry);
2965 
2966 static int
2967 mlxsw_sp_nexthop_fib_entries_update(struct mlxsw_sp *mlxsw_sp,
2968 				    struct mlxsw_sp_nexthop_group *nh_grp)
2969 {
2970 	struct mlxsw_sp_fib_entry *fib_entry;
2971 	int err;
2972 
2973 	list_for_each_entry(fib_entry, &nh_grp->fib_list, nexthop_group_node) {
2974 		if (!mlxsw_sp_fib_node_entry_is_first(fib_entry->fib_node,
2975 						      fib_entry))
2976 			continue;
2977 		err = mlxsw_sp_fib_entry_update(mlxsw_sp, fib_entry);
2978 		if (err)
2979 			return err;
2980 	}
2981 	return 0;
2982 }
2983 
2984 static void
2985 mlxsw_sp_fib_entry_offload_refresh(struct mlxsw_sp_fib_entry *fib_entry,
2986 				   enum mlxsw_reg_ralue_op op, int err);
2987 
2988 static void
2989 mlxsw_sp_nexthop_fib_entries_refresh(struct mlxsw_sp_nexthop_group *nh_grp)
2990 {
2991 	enum mlxsw_reg_ralue_op op = MLXSW_REG_RALUE_OP_WRITE_WRITE;
2992 	struct mlxsw_sp_fib_entry *fib_entry;
2993 
2994 	list_for_each_entry(fib_entry, &nh_grp->fib_list, nexthop_group_node) {
2995 		if (!mlxsw_sp_fib_node_entry_is_first(fib_entry->fib_node,
2996 						      fib_entry))
2997 			continue;
2998 		mlxsw_sp_fib_entry_offload_refresh(fib_entry, op, 0);
2999 	}
3000 }
3001 
3002 static void mlxsw_sp_adj_grp_size_round_up(u16 *p_adj_grp_size)
3003 {
3004 	/* Valid sizes for an adjacency group are:
3005 	 * 1-64, 512, 1024, 2048 and 4096.
3006 	 */
3007 	if (*p_adj_grp_size <= 64)
3008 		return;
3009 	else if (*p_adj_grp_size <= 512)
3010 		*p_adj_grp_size = 512;
3011 	else if (*p_adj_grp_size <= 1024)
3012 		*p_adj_grp_size = 1024;
3013 	else if (*p_adj_grp_size <= 2048)
3014 		*p_adj_grp_size = 2048;
3015 	else
3016 		*p_adj_grp_size = 4096;
3017 }
3018 
3019 static void mlxsw_sp_adj_grp_size_round_down(u16 *p_adj_grp_size,
3020 					     unsigned int alloc_size)
3021 {
3022 	if (alloc_size >= 4096)
3023 		*p_adj_grp_size = 4096;
3024 	else if (alloc_size >= 2048)
3025 		*p_adj_grp_size = 2048;
3026 	else if (alloc_size >= 1024)
3027 		*p_adj_grp_size = 1024;
3028 	else if (alloc_size >= 512)
3029 		*p_adj_grp_size = 512;
3030 }
3031 
3032 static int mlxsw_sp_fix_adj_grp_size(struct mlxsw_sp *mlxsw_sp,
3033 				     u16 *p_adj_grp_size)
3034 {
3035 	unsigned int alloc_size;
3036 	int err;
3037 
3038 	/* Round up the requested group size to the next size supported
3039 	 * by the device and make sure the request can be satisfied.
3040 	 */
3041 	mlxsw_sp_adj_grp_size_round_up(p_adj_grp_size);
3042 	err = mlxsw_sp_kvdl_alloc_size_query(mlxsw_sp, *p_adj_grp_size,
3043 					     &alloc_size);
3044 	if (err)
3045 		return err;
3046 	/* It is possible the allocation results in more allocated
3047 	 * entries than requested. Try to use as much of them as
3048 	 * possible.
3049 	 */
3050 	mlxsw_sp_adj_grp_size_round_down(p_adj_grp_size, alloc_size);
3051 
3052 	return 0;
3053 }
3054 
3055 static void
3056 mlxsw_sp_nexthop_group_normalize(struct mlxsw_sp_nexthop_group *nh_grp)
3057 {
3058 	int i, g = 0, sum_norm_weight = 0;
3059 	struct mlxsw_sp_nexthop *nh;
3060 
3061 	for (i = 0; i < nh_grp->count; i++) {
3062 		nh = &nh_grp->nexthops[i];
3063 
3064 		if (!nh->should_offload)
3065 			continue;
3066 		if (g > 0)
3067 			g = gcd(nh->nh_weight, g);
3068 		else
3069 			g = nh->nh_weight;
3070 	}
3071 
3072 	for (i = 0; i < nh_grp->count; i++) {
3073 		nh = &nh_grp->nexthops[i];
3074 
3075 		if (!nh->should_offload)
3076 			continue;
3077 		nh->norm_nh_weight = nh->nh_weight / g;
3078 		sum_norm_weight += nh->norm_nh_weight;
3079 	}
3080 
3081 	nh_grp->sum_norm_weight = sum_norm_weight;
3082 }
3083 
3084 static void
3085 mlxsw_sp_nexthop_group_rebalance(struct mlxsw_sp_nexthop_group *nh_grp)
3086 {
3087 	int total = nh_grp->sum_norm_weight;
3088 	u16 ecmp_size = nh_grp->ecmp_size;
3089 	int i, weight = 0, lower_bound = 0;
3090 
3091 	for (i = 0; i < nh_grp->count; i++) {
3092 		struct mlxsw_sp_nexthop *nh = &nh_grp->nexthops[i];
3093 		int upper_bound;
3094 
3095 		if (!nh->should_offload)
3096 			continue;
3097 		weight += nh->norm_nh_weight;
3098 		upper_bound = DIV_ROUND_CLOSEST(ecmp_size * weight, total);
3099 		nh->num_adj_entries = upper_bound - lower_bound;
3100 		lower_bound = upper_bound;
3101 	}
3102 }
3103 
3104 static void
3105 mlxsw_sp_nexthop_group_refresh(struct mlxsw_sp *mlxsw_sp,
3106 			       struct mlxsw_sp_nexthop_group *nh_grp)
3107 {
3108 	u16 ecmp_size, old_ecmp_size;
3109 	struct mlxsw_sp_nexthop *nh;
3110 	bool offload_change = false;
3111 	u32 adj_index;
3112 	bool old_adj_index_valid;
3113 	u32 old_adj_index;
3114 	int i;
3115 	int err;
3116 
3117 	if (!nh_grp->gateway) {
3118 		mlxsw_sp_nexthop_fib_entries_update(mlxsw_sp, nh_grp);
3119 		return;
3120 	}
3121 
3122 	for (i = 0; i < nh_grp->count; i++) {
3123 		nh = &nh_grp->nexthops[i];
3124 
3125 		if (nh->should_offload != nh->offloaded) {
3126 			offload_change = true;
3127 			if (nh->should_offload)
3128 				nh->update = 1;
3129 		}
3130 	}
3131 	if (!offload_change) {
3132 		/* Nothing was added or removed, so no need to reallocate. Just
3133 		 * update MAC on existing adjacency indexes.
3134 		 */
3135 		err = mlxsw_sp_nexthop_group_update(mlxsw_sp, nh_grp, false);
3136 		if (err) {
3137 			dev_warn(mlxsw_sp->bus_info->dev, "Failed to update neigh MAC in adjacency table.\n");
3138 			goto set_trap;
3139 		}
3140 		return;
3141 	}
3142 	mlxsw_sp_nexthop_group_normalize(nh_grp);
3143 	if (!nh_grp->sum_norm_weight)
3144 		/* No neigh of this group is connected so we just set
3145 		 * the trap and let everthing flow through kernel.
3146 		 */
3147 		goto set_trap;
3148 
3149 	ecmp_size = nh_grp->sum_norm_weight;
3150 	err = mlxsw_sp_fix_adj_grp_size(mlxsw_sp, &ecmp_size);
3151 	if (err)
3152 		/* No valid allocation size available. */
3153 		goto set_trap;
3154 
3155 	err = mlxsw_sp_kvdl_alloc(mlxsw_sp, ecmp_size, &adj_index);
3156 	if (err) {
3157 		/* We ran out of KVD linear space, just set the
3158 		 * trap and let everything flow through kernel.
3159 		 */
3160 		dev_warn(mlxsw_sp->bus_info->dev, "Failed to allocate KVD linear area for nexthop group.\n");
3161 		goto set_trap;
3162 	}
3163 	old_adj_index_valid = nh_grp->adj_index_valid;
3164 	old_adj_index = nh_grp->adj_index;
3165 	old_ecmp_size = nh_grp->ecmp_size;
3166 	nh_grp->adj_index_valid = 1;
3167 	nh_grp->adj_index = adj_index;
3168 	nh_grp->ecmp_size = ecmp_size;
3169 	mlxsw_sp_nexthop_group_rebalance(nh_grp);
3170 	err = mlxsw_sp_nexthop_group_update(mlxsw_sp, nh_grp, true);
3171 	if (err) {
3172 		dev_warn(mlxsw_sp->bus_info->dev, "Failed to update neigh MAC in adjacency table.\n");
3173 		goto set_trap;
3174 	}
3175 
3176 	if (!old_adj_index_valid) {
3177 		/* The trap was set for fib entries, so we have to call
3178 		 * fib entry update to unset it and use adjacency index.
3179 		 */
3180 		err = mlxsw_sp_nexthop_fib_entries_update(mlxsw_sp, nh_grp);
3181 		if (err) {
3182 			dev_warn(mlxsw_sp->bus_info->dev, "Failed to add adjacency index to fib entries.\n");
3183 			goto set_trap;
3184 		}
3185 		return;
3186 	}
3187 
3188 	err = mlxsw_sp_adj_index_mass_update(mlxsw_sp, nh_grp,
3189 					     old_adj_index, old_ecmp_size);
3190 	mlxsw_sp_kvdl_free(mlxsw_sp, old_adj_index);
3191 	if (err) {
3192 		dev_warn(mlxsw_sp->bus_info->dev, "Failed to mass-update adjacency index for nexthop group.\n");
3193 		goto set_trap;
3194 	}
3195 
3196 	/* Offload state within the group changed, so update the flags. */
3197 	mlxsw_sp_nexthop_fib_entries_refresh(nh_grp);
3198 
3199 	return;
3200 
3201 set_trap:
3202 	old_adj_index_valid = nh_grp->adj_index_valid;
3203 	nh_grp->adj_index_valid = 0;
3204 	for (i = 0; i < nh_grp->count; i++) {
3205 		nh = &nh_grp->nexthops[i];
3206 		nh->offloaded = 0;
3207 	}
3208 	err = mlxsw_sp_nexthop_fib_entries_update(mlxsw_sp, nh_grp);
3209 	if (err)
3210 		dev_warn(mlxsw_sp->bus_info->dev, "Failed to set traps for fib entries.\n");
3211 	if (old_adj_index_valid)
3212 		mlxsw_sp_kvdl_free(mlxsw_sp, nh_grp->adj_index);
3213 }
3214 
3215 static void __mlxsw_sp_nexthop_neigh_update(struct mlxsw_sp_nexthop *nh,
3216 					    bool removing)
3217 {
3218 	if (!removing)
3219 		nh->should_offload = 1;
3220 	else if (nh->offloaded)
3221 		nh->should_offload = 0;
3222 	nh->update = 1;
3223 }
3224 
3225 static void
3226 mlxsw_sp_nexthop_neigh_update(struct mlxsw_sp *mlxsw_sp,
3227 			      struct mlxsw_sp_neigh_entry *neigh_entry,
3228 			      bool removing)
3229 {
3230 	struct mlxsw_sp_nexthop *nh;
3231 
3232 	list_for_each_entry(nh, &neigh_entry->nexthop_list,
3233 			    neigh_list_node) {
3234 		__mlxsw_sp_nexthop_neigh_update(nh, removing);
3235 		mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh->nh_grp);
3236 	}
3237 }
3238 
3239 static void mlxsw_sp_nexthop_rif_init(struct mlxsw_sp_nexthop *nh,
3240 				      struct mlxsw_sp_rif *rif)
3241 {
3242 	if (nh->rif)
3243 		return;
3244 
3245 	nh->rif = rif;
3246 	list_add(&nh->rif_list_node, &rif->nexthop_list);
3247 }
3248 
3249 static void mlxsw_sp_nexthop_rif_fini(struct mlxsw_sp_nexthop *nh)
3250 {
3251 	if (!nh->rif)
3252 		return;
3253 
3254 	list_del(&nh->rif_list_node);
3255 	nh->rif = NULL;
3256 }
3257 
3258 static int mlxsw_sp_nexthop_neigh_init(struct mlxsw_sp *mlxsw_sp,
3259 				       struct mlxsw_sp_nexthop *nh)
3260 {
3261 	struct mlxsw_sp_neigh_entry *neigh_entry;
3262 	struct neighbour *n;
3263 	u8 nud_state, dead;
3264 	int err;
3265 
3266 	if (!nh->nh_grp->gateway || nh->neigh_entry)
3267 		return 0;
3268 
3269 	/* Take a reference of neigh here ensuring that neigh would
3270 	 * not be destructed before the nexthop entry is finished.
3271 	 * The reference is taken either in neigh_lookup() or
3272 	 * in neigh_create() in case n is not found.
3273 	 */
3274 	n = neigh_lookup(nh->nh_grp->neigh_tbl, &nh->gw_addr, nh->rif->dev);
3275 	if (!n) {
3276 		n = neigh_create(nh->nh_grp->neigh_tbl, &nh->gw_addr,
3277 				 nh->rif->dev);
3278 		if (IS_ERR(n))
3279 			return PTR_ERR(n);
3280 		neigh_event_send(n, NULL);
3281 	}
3282 	neigh_entry = mlxsw_sp_neigh_entry_lookup(mlxsw_sp, n);
3283 	if (!neigh_entry) {
3284 		neigh_entry = mlxsw_sp_neigh_entry_create(mlxsw_sp, n);
3285 		if (IS_ERR(neigh_entry)) {
3286 			err = -EINVAL;
3287 			goto err_neigh_entry_create;
3288 		}
3289 	}
3290 
3291 	/* If that is the first nexthop connected to that neigh, add to
3292 	 * nexthop_neighs_list
3293 	 */
3294 	if (list_empty(&neigh_entry->nexthop_list))
3295 		list_add_tail(&neigh_entry->nexthop_neighs_list_node,
3296 			      &mlxsw_sp->router->nexthop_neighs_list);
3297 
3298 	nh->neigh_entry = neigh_entry;
3299 	list_add_tail(&nh->neigh_list_node, &neigh_entry->nexthop_list);
3300 	read_lock_bh(&n->lock);
3301 	nud_state = n->nud_state;
3302 	dead = n->dead;
3303 	read_unlock_bh(&n->lock);
3304 	__mlxsw_sp_nexthop_neigh_update(nh, !(nud_state & NUD_VALID && !dead));
3305 
3306 	return 0;
3307 
3308 err_neigh_entry_create:
3309 	neigh_release(n);
3310 	return err;
3311 }
3312 
3313 static void mlxsw_sp_nexthop_neigh_fini(struct mlxsw_sp *mlxsw_sp,
3314 					struct mlxsw_sp_nexthop *nh)
3315 {
3316 	struct mlxsw_sp_neigh_entry *neigh_entry = nh->neigh_entry;
3317 	struct neighbour *n;
3318 
3319 	if (!neigh_entry)
3320 		return;
3321 	n = neigh_entry->key.n;
3322 
3323 	__mlxsw_sp_nexthop_neigh_update(nh, true);
3324 	list_del(&nh->neigh_list_node);
3325 	nh->neigh_entry = NULL;
3326 
3327 	/* If that is the last nexthop connected to that neigh, remove from
3328 	 * nexthop_neighs_list
3329 	 */
3330 	if (list_empty(&neigh_entry->nexthop_list))
3331 		list_del(&neigh_entry->nexthop_neighs_list_node);
3332 
3333 	if (!neigh_entry->connected && list_empty(&neigh_entry->nexthop_list))
3334 		mlxsw_sp_neigh_entry_destroy(mlxsw_sp, neigh_entry);
3335 
3336 	neigh_release(n);
3337 }
3338 
3339 static bool mlxsw_sp_ipip_netdev_ul_up(struct net_device *ol_dev)
3340 {
3341 	struct net_device *ul_dev = __mlxsw_sp_ipip_netdev_ul_dev_get(ol_dev);
3342 
3343 	return ul_dev ? (ul_dev->flags & IFF_UP) : true;
3344 }
3345 
3346 static int mlxsw_sp_nexthop_ipip_init(struct mlxsw_sp *mlxsw_sp,
3347 				      struct mlxsw_sp_nexthop *nh,
3348 				      struct net_device *ol_dev)
3349 {
3350 	bool removing;
3351 
3352 	if (!nh->nh_grp->gateway || nh->ipip_entry)
3353 		return 0;
3354 
3355 	nh->ipip_entry = mlxsw_sp_ipip_entry_find_by_ol_dev(mlxsw_sp, ol_dev);
3356 	if (!nh->ipip_entry)
3357 		return -ENOENT;
3358 
3359 	removing = !mlxsw_sp_ipip_netdev_ul_up(ol_dev);
3360 	__mlxsw_sp_nexthop_neigh_update(nh, removing);
3361 	return 0;
3362 }
3363 
3364 static void mlxsw_sp_nexthop_ipip_fini(struct mlxsw_sp *mlxsw_sp,
3365 				       struct mlxsw_sp_nexthop *nh)
3366 {
3367 	struct mlxsw_sp_ipip_entry *ipip_entry = nh->ipip_entry;
3368 
3369 	if (!ipip_entry)
3370 		return;
3371 
3372 	__mlxsw_sp_nexthop_neigh_update(nh, true);
3373 	nh->ipip_entry = NULL;
3374 }
3375 
3376 static bool mlxsw_sp_nexthop4_ipip_type(const struct mlxsw_sp *mlxsw_sp,
3377 					const struct fib_nh *fib_nh,
3378 					enum mlxsw_sp_ipip_type *p_ipipt)
3379 {
3380 	struct net_device *dev = fib_nh->nh_dev;
3381 
3382 	return dev &&
3383 	       fib_nh->nh_parent->fib_type == RTN_UNICAST &&
3384 	       mlxsw_sp_netdev_ipip_type(mlxsw_sp, dev, p_ipipt);
3385 }
3386 
3387 static void mlxsw_sp_nexthop_type_fini(struct mlxsw_sp *mlxsw_sp,
3388 				       struct mlxsw_sp_nexthop *nh)
3389 {
3390 	switch (nh->type) {
3391 	case MLXSW_SP_NEXTHOP_TYPE_ETH:
3392 		mlxsw_sp_nexthop_neigh_fini(mlxsw_sp, nh);
3393 		mlxsw_sp_nexthop_rif_fini(nh);
3394 		break;
3395 	case MLXSW_SP_NEXTHOP_TYPE_IPIP:
3396 		mlxsw_sp_nexthop_rif_fini(nh);
3397 		mlxsw_sp_nexthop_ipip_fini(mlxsw_sp, nh);
3398 		break;
3399 	}
3400 }
3401 
3402 static int mlxsw_sp_nexthop4_type_init(struct mlxsw_sp *mlxsw_sp,
3403 				       struct mlxsw_sp_nexthop *nh,
3404 				       struct fib_nh *fib_nh)
3405 {
3406 	struct mlxsw_sp_router *router = mlxsw_sp->router;
3407 	struct net_device *dev = fib_nh->nh_dev;
3408 	enum mlxsw_sp_ipip_type ipipt;
3409 	struct mlxsw_sp_rif *rif;
3410 	int err;
3411 
3412 	if (mlxsw_sp_nexthop4_ipip_type(mlxsw_sp, fib_nh, &ipipt) &&
3413 	    router->ipip_ops_arr[ipipt]->can_offload(mlxsw_sp, dev,
3414 						     MLXSW_SP_L3_PROTO_IPV4)) {
3415 		nh->type = MLXSW_SP_NEXTHOP_TYPE_IPIP;
3416 		err = mlxsw_sp_nexthop_ipip_init(mlxsw_sp, nh, dev);
3417 		if (err)
3418 			return err;
3419 		mlxsw_sp_nexthop_rif_init(nh, &nh->ipip_entry->ol_lb->common);
3420 		return 0;
3421 	}
3422 
3423 	nh->type = MLXSW_SP_NEXTHOP_TYPE_ETH;
3424 	rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev);
3425 	if (!rif)
3426 		return 0;
3427 
3428 	mlxsw_sp_nexthop_rif_init(nh, rif);
3429 	err = mlxsw_sp_nexthop_neigh_init(mlxsw_sp, nh);
3430 	if (err)
3431 		goto err_neigh_init;
3432 
3433 	return 0;
3434 
3435 err_neigh_init:
3436 	mlxsw_sp_nexthop_rif_fini(nh);
3437 	return err;
3438 }
3439 
3440 static void mlxsw_sp_nexthop4_type_fini(struct mlxsw_sp *mlxsw_sp,
3441 					struct mlxsw_sp_nexthop *nh)
3442 {
3443 	mlxsw_sp_nexthop_type_fini(mlxsw_sp, nh);
3444 }
3445 
3446 static int mlxsw_sp_nexthop4_init(struct mlxsw_sp *mlxsw_sp,
3447 				  struct mlxsw_sp_nexthop_group *nh_grp,
3448 				  struct mlxsw_sp_nexthop *nh,
3449 				  struct fib_nh *fib_nh)
3450 {
3451 	struct net_device *dev = fib_nh->nh_dev;
3452 	struct in_device *in_dev;
3453 	int err;
3454 
3455 	nh->nh_grp = nh_grp;
3456 	nh->key.fib_nh = fib_nh;
3457 #ifdef CONFIG_IP_ROUTE_MULTIPATH
3458 	nh->nh_weight = fib_nh->nh_weight;
3459 #else
3460 	nh->nh_weight = 1;
3461 #endif
3462 	memcpy(&nh->gw_addr, &fib_nh->nh_gw, sizeof(fib_nh->nh_gw));
3463 	err = mlxsw_sp_nexthop_insert(mlxsw_sp, nh);
3464 	if (err)
3465 		return err;
3466 
3467 	mlxsw_sp_nexthop_counter_alloc(mlxsw_sp, nh);
3468 	list_add_tail(&nh->router_list_node, &mlxsw_sp->router->nexthop_list);
3469 
3470 	if (!dev)
3471 		return 0;
3472 
3473 	in_dev = __in_dev_get_rtnl(dev);
3474 	if (in_dev && IN_DEV_IGNORE_ROUTES_WITH_LINKDOWN(in_dev) &&
3475 	    fib_nh->nh_flags & RTNH_F_LINKDOWN)
3476 		return 0;
3477 
3478 	err = mlxsw_sp_nexthop4_type_init(mlxsw_sp, nh, fib_nh);
3479 	if (err)
3480 		goto err_nexthop_neigh_init;
3481 
3482 	return 0;
3483 
3484 err_nexthop_neigh_init:
3485 	mlxsw_sp_nexthop_remove(mlxsw_sp, nh);
3486 	return err;
3487 }
3488 
3489 static void mlxsw_sp_nexthop4_fini(struct mlxsw_sp *mlxsw_sp,
3490 				   struct mlxsw_sp_nexthop *nh)
3491 {
3492 	mlxsw_sp_nexthop4_type_fini(mlxsw_sp, nh);
3493 	list_del(&nh->router_list_node);
3494 	mlxsw_sp_nexthop_counter_free(mlxsw_sp, nh);
3495 	mlxsw_sp_nexthop_remove(mlxsw_sp, nh);
3496 }
3497 
3498 static void mlxsw_sp_nexthop4_event(struct mlxsw_sp *mlxsw_sp,
3499 				    unsigned long event, struct fib_nh *fib_nh)
3500 {
3501 	struct mlxsw_sp_nexthop_key key;
3502 	struct mlxsw_sp_nexthop *nh;
3503 
3504 	if (mlxsw_sp->router->aborted)
3505 		return;
3506 
3507 	key.fib_nh = fib_nh;
3508 	nh = mlxsw_sp_nexthop_lookup(mlxsw_sp, key);
3509 	if (WARN_ON_ONCE(!nh))
3510 		return;
3511 
3512 	switch (event) {
3513 	case FIB_EVENT_NH_ADD:
3514 		mlxsw_sp_nexthop4_type_init(mlxsw_sp, nh, fib_nh);
3515 		break;
3516 	case FIB_EVENT_NH_DEL:
3517 		mlxsw_sp_nexthop4_type_fini(mlxsw_sp, nh);
3518 		break;
3519 	}
3520 
3521 	mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh->nh_grp);
3522 }
3523 
3524 static void mlxsw_sp_nexthop_rif_update(struct mlxsw_sp *mlxsw_sp,
3525 					struct mlxsw_sp_rif *rif)
3526 {
3527 	struct mlxsw_sp_nexthop *nh;
3528 	bool removing;
3529 
3530 	list_for_each_entry(nh, &rif->nexthop_list, rif_list_node) {
3531 		switch (nh->type) {
3532 		case MLXSW_SP_NEXTHOP_TYPE_ETH:
3533 			removing = false;
3534 			break;
3535 		case MLXSW_SP_NEXTHOP_TYPE_IPIP:
3536 			removing = !mlxsw_sp_ipip_netdev_ul_up(rif->dev);
3537 			break;
3538 		default:
3539 			WARN_ON(1);
3540 			continue;
3541 		}
3542 
3543 		__mlxsw_sp_nexthop_neigh_update(nh, removing);
3544 		mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh->nh_grp);
3545 	}
3546 }
3547 
3548 static void mlxsw_sp_nexthop_rif_gone_sync(struct mlxsw_sp *mlxsw_sp,
3549 					   struct mlxsw_sp_rif *rif)
3550 {
3551 	struct mlxsw_sp_nexthop *nh, *tmp;
3552 
3553 	list_for_each_entry_safe(nh, tmp, &rif->nexthop_list, rif_list_node) {
3554 		mlxsw_sp_nexthop_type_fini(mlxsw_sp, nh);
3555 		mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh->nh_grp);
3556 	}
3557 }
3558 
3559 static bool mlxsw_sp_fi_is_gateway(const struct mlxsw_sp *mlxsw_sp,
3560 				   const struct fib_info *fi)
3561 {
3562 	return fi->fib_nh->nh_scope == RT_SCOPE_LINK ||
3563 	       mlxsw_sp_nexthop4_ipip_type(mlxsw_sp, fi->fib_nh, NULL);
3564 }
3565 
3566 static struct mlxsw_sp_nexthop_group *
3567 mlxsw_sp_nexthop4_group_create(struct mlxsw_sp *mlxsw_sp, struct fib_info *fi)
3568 {
3569 	struct mlxsw_sp_nexthop_group *nh_grp;
3570 	struct mlxsw_sp_nexthop *nh;
3571 	struct fib_nh *fib_nh;
3572 	size_t alloc_size;
3573 	int i;
3574 	int err;
3575 
3576 	alloc_size = sizeof(*nh_grp) +
3577 		     fi->fib_nhs * sizeof(struct mlxsw_sp_nexthop);
3578 	nh_grp = kzalloc(alloc_size, GFP_KERNEL);
3579 	if (!nh_grp)
3580 		return ERR_PTR(-ENOMEM);
3581 	nh_grp->priv = fi;
3582 	INIT_LIST_HEAD(&nh_grp->fib_list);
3583 	nh_grp->neigh_tbl = &arp_tbl;
3584 
3585 	nh_grp->gateway = mlxsw_sp_fi_is_gateway(mlxsw_sp, fi);
3586 	nh_grp->count = fi->fib_nhs;
3587 	fib_info_hold(fi);
3588 	for (i = 0; i < nh_grp->count; i++) {
3589 		nh = &nh_grp->nexthops[i];
3590 		fib_nh = &fi->fib_nh[i];
3591 		err = mlxsw_sp_nexthop4_init(mlxsw_sp, nh_grp, nh, fib_nh);
3592 		if (err)
3593 			goto err_nexthop4_init;
3594 	}
3595 	err = mlxsw_sp_nexthop_group_insert(mlxsw_sp, nh_grp);
3596 	if (err)
3597 		goto err_nexthop_group_insert;
3598 	mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh_grp);
3599 	return nh_grp;
3600 
3601 err_nexthop_group_insert:
3602 err_nexthop4_init:
3603 	for (i--; i >= 0; i--) {
3604 		nh = &nh_grp->nexthops[i];
3605 		mlxsw_sp_nexthop4_fini(mlxsw_sp, nh);
3606 	}
3607 	fib_info_put(fi);
3608 	kfree(nh_grp);
3609 	return ERR_PTR(err);
3610 }
3611 
3612 static void
3613 mlxsw_sp_nexthop4_group_destroy(struct mlxsw_sp *mlxsw_sp,
3614 				struct mlxsw_sp_nexthop_group *nh_grp)
3615 {
3616 	struct mlxsw_sp_nexthop *nh;
3617 	int i;
3618 
3619 	mlxsw_sp_nexthop_group_remove(mlxsw_sp, nh_grp);
3620 	for (i = 0; i < nh_grp->count; i++) {
3621 		nh = &nh_grp->nexthops[i];
3622 		mlxsw_sp_nexthop4_fini(mlxsw_sp, nh);
3623 	}
3624 	mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh_grp);
3625 	WARN_ON_ONCE(nh_grp->adj_index_valid);
3626 	fib_info_put(mlxsw_sp_nexthop4_group_fi(nh_grp));
3627 	kfree(nh_grp);
3628 }
3629 
3630 static int mlxsw_sp_nexthop4_group_get(struct mlxsw_sp *mlxsw_sp,
3631 				       struct mlxsw_sp_fib_entry *fib_entry,
3632 				       struct fib_info *fi)
3633 {
3634 	struct mlxsw_sp_nexthop_group *nh_grp;
3635 
3636 	nh_grp = mlxsw_sp_nexthop4_group_lookup(mlxsw_sp, fi);
3637 	if (!nh_grp) {
3638 		nh_grp = mlxsw_sp_nexthop4_group_create(mlxsw_sp, fi);
3639 		if (IS_ERR(nh_grp))
3640 			return PTR_ERR(nh_grp);
3641 	}
3642 	list_add_tail(&fib_entry->nexthop_group_node, &nh_grp->fib_list);
3643 	fib_entry->nh_group = nh_grp;
3644 	return 0;
3645 }
3646 
3647 static void mlxsw_sp_nexthop4_group_put(struct mlxsw_sp *mlxsw_sp,
3648 					struct mlxsw_sp_fib_entry *fib_entry)
3649 {
3650 	struct mlxsw_sp_nexthop_group *nh_grp = fib_entry->nh_group;
3651 
3652 	list_del(&fib_entry->nexthop_group_node);
3653 	if (!list_empty(&nh_grp->fib_list))
3654 		return;
3655 	mlxsw_sp_nexthop4_group_destroy(mlxsw_sp, nh_grp);
3656 }
3657 
3658 static bool
3659 mlxsw_sp_fib4_entry_should_offload(const struct mlxsw_sp_fib_entry *fib_entry)
3660 {
3661 	struct mlxsw_sp_fib4_entry *fib4_entry;
3662 
3663 	fib4_entry = container_of(fib_entry, struct mlxsw_sp_fib4_entry,
3664 				  common);
3665 	return !fib4_entry->tos;
3666 }
3667 
3668 static bool
3669 mlxsw_sp_fib_entry_should_offload(const struct mlxsw_sp_fib_entry *fib_entry)
3670 {
3671 	struct mlxsw_sp_nexthop_group *nh_group = fib_entry->nh_group;
3672 
3673 	switch (fib_entry->fib_node->fib->proto) {
3674 	case MLXSW_SP_L3_PROTO_IPV4:
3675 		if (!mlxsw_sp_fib4_entry_should_offload(fib_entry))
3676 			return false;
3677 		break;
3678 	case MLXSW_SP_L3_PROTO_IPV6:
3679 		break;
3680 	}
3681 
3682 	switch (fib_entry->type) {
3683 	case MLXSW_SP_FIB_ENTRY_TYPE_REMOTE:
3684 		return !!nh_group->adj_index_valid;
3685 	case MLXSW_SP_FIB_ENTRY_TYPE_LOCAL:
3686 		return !!nh_group->nh_rif;
3687 	case MLXSW_SP_FIB_ENTRY_TYPE_IPIP_DECAP:
3688 		return true;
3689 	default:
3690 		return false;
3691 	}
3692 }
3693 
3694 static struct mlxsw_sp_nexthop *
3695 mlxsw_sp_rt6_nexthop(struct mlxsw_sp_nexthop_group *nh_grp,
3696 		     const struct mlxsw_sp_rt6 *mlxsw_sp_rt6)
3697 {
3698 	int i;
3699 
3700 	for (i = 0; i < nh_grp->count; i++) {
3701 		struct mlxsw_sp_nexthop *nh = &nh_grp->nexthops[i];
3702 		struct rt6_info *rt = mlxsw_sp_rt6->rt;
3703 
3704 		if (nh->rif && nh->rif->dev == rt->dst.dev &&
3705 		    ipv6_addr_equal((const struct in6_addr *) &nh->gw_addr,
3706 				    &rt->rt6i_gateway))
3707 			return nh;
3708 		continue;
3709 	}
3710 
3711 	return NULL;
3712 }
3713 
3714 static void
3715 mlxsw_sp_fib4_entry_offload_set(struct mlxsw_sp_fib_entry *fib_entry)
3716 {
3717 	struct mlxsw_sp_nexthop_group *nh_grp = fib_entry->nh_group;
3718 	int i;
3719 
3720 	if (fib_entry->type == MLXSW_SP_FIB_ENTRY_TYPE_LOCAL ||
3721 	    fib_entry->type == MLXSW_SP_FIB_ENTRY_TYPE_IPIP_DECAP) {
3722 		nh_grp->nexthops->key.fib_nh->nh_flags |= RTNH_F_OFFLOAD;
3723 		return;
3724 	}
3725 
3726 	for (i = 0; i < nh_grp->count; i++) {
3727 		struct mlxsw_sp_nexthop *nh = &nh_grp->nexthops[i];
3728 
3729 		if (nh->offloaded)
3730 			nh->key.fib_nh->nh_flags |= RTNH_F_OFFLOAD;
3731 		else
3732 			nh->key.fib_nh->nh_flags &= ~RTNH_F_OFFLOAD;
3733 	}
3734 }
3735 
3736 static void
3737 mlxsw_sp_fib4_entry_offload_unset(struct mlxsw_sp_fib_entry *fib_entry)
3738 {
3739 	struct mlxsw_sp_nexthop_group *nh_grp = fib_entry->nh_group;
3740 	int i;
3741 
3742 	for (i = 0; i < nh_grp->count; i++) {
3743 		struct mlxsw_sp_nexthop *nh = &nh_grp->nexthops[i];
3744 
3745 		nh->key.fib_nh->nh_flags &= ~RTNH_F_OFFLOAD;
3746 	}
3747 }
3748 
3749 static void
3750 mlxsw_sp_fib6_entry_offload_set(struct mlxsw_sp_fib_entry *fib_entry)
3751 {
3752 	struct mlxsw_sp_fib6_entry *fib6_entry;
3753 	struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
3754 
3755 	fib6_entry = container_of(fib_entry, struct mlxsw_sp_fib6_entry,
3756 				  common);
3757 
3758 	if (fib_entry->type == MLXSW_SP_FIB_ENTRY_TYPE_LOCAL) {
3759 		list_first_entry(&fib6_entry->rt6_list, struct mlxsw_sp_rt6,
3760 				 list)->rt->rt6i_nh_flags |= RTNH_F_OFFLOAD;
3761 		return;
3762 	}
3763 
3764 	list_for_each_entry(mlxsw_sp_rt6, &fib6_entry->rt6_list, list) {
3765 		struct mlxsw_sp_nexthop_group *nh_grp = fib_entry->nh_group;
3766 		struct mlxsw_sp_nexthop *nh;
3767 
3768 		nh = mlxsw_sp_rt6_nexthop(nh_grp, mlxsw_sp_rt6);
3769 		if (nh && nh->offloaded)
3770 			mlxsw_sp_rt6->rt->rt6i_nh_flags |= RTNH_F_OFFLOAD;
3771 		else
3772 			mlxsw_sp_rt6->rt->rt6i_nh_flags &= ~RTNH_F_OFFLOAD;
3773 	}
3774 }
3775 
3776 static void
3777 mlxsw_sp_fib6_entry_offload_unset(struct mlxsw_sp_fib_entry *fib_entry)
3778 {
3779 	struct mlxsw_sp_fib6_entry *fib6_entry;
3780 	struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
3781 
3782 	fib6_entry = container_of(fib_entry, struct mlxsw_sp_fib6_entry,
3783 				  common);
3784 	list_for_each_entry(mlxsw_sp_rt6, &fib6_entry->rt6_list, list) {
3785 		struct rt6_info *rt = mlxsw_sp_rt6->rt;
3786 
3787 		rt->rt6i_nh_flags &= ~RTNH_F_OFFLOAD;
3788 	}
3789 }
3790 
3791 static void mlxsw_sp_fib_entry_offload_set(struct mlxsw_sp_fib_entry *fib_entry)
3792 {
3793 	switch (fib_entry->fib_node->fib->proto) {
3794 	case MLXSW_SP_L3_PROTO_IPV4:
3795 		mlxsw_sp_fib4_entry_offload_set(fib_entry);
3796 		break;
3797 	case MLXSW_SP_L3_PROTO_IPV6:
3798 		mlxsw_sp_fib6_entry_offload_set(fib_entry);
3799 		break;
3800 	}
3801 }
3802 
3803 static void
3804 mlxsw_sp_fib_entry_offload_unset(struct mlxsw_sp_fib_entry *fib_entry)
3805 {
3806 	switch (fib_entry->fib_node->fib->proto) {
3807 	case MLXSW_SP_L3_PROTO_IPV4:
3808 		mlxsw_sp_fib4_entry_offload_unset(fib_entry);
3809 		break;
3810 	case MLXSW_SP_L3_PROTO_IPV6:
3811 		mlxsw_sp_fib6_entry_offload_unset(fib_entry);
3812 		break;
3813 	}
3814 }
3815 
3816 static void
3817 mlxsw_sp_fib_entry_offload_refresh(struct mlxsw_sp_fib_entry *fib_entry,
3818 				   enum mlxsw_reg_ralue_op op, int err)
3819 {
3820 	switch (op) {
3821 	case MLXSW_REG_RALUE_OP_WRITE_DELETE:
3822 		return mlxsw_sp_fib_entry_offload_unset(fib_entry);
3823 	case MLXSW_REG_RALUE_OP_WRITE_WRITE:
3824 		if (err)
3825 			return;
3826 		if (mlxsw_sp_fib_entry_should_offload(fib_entry))
3827 			mlxsw_sp_fib_entry_offload_set(fib_entry);
3828 		else
3829 			mlxsw_sp_fib_entry_offload_unset(fib_entry);
3830 		return;
3831 	default:
3832 		return;
3833 	}
3834 }
3835 
3836 static void
3837 mlxsw_sp_fib_entry_ralue_pack(char *ralue_pl,
3838 			      const struct mlxsw_sp_fib_entry *fib_entry,
3839 			      enum mlxsw_reg_ralue_op op)
3840 {
3841 	struct mlxsw_sp_fib *fib = fib_entry->fib_node->fib;
3842 	enum mlxsw_reg_ralxx_protocol proto;
3843 	u32 *p_dip;
3844 
3845 	proto = (enum mlxsw_reg_ralxx_protocol) fib->proto;
3846 
3847 	switch (fib->proto) {
3848 	case MLXSW_SP_L3_PROTO_IPV4:
3849 		p_dip = (u32 *) fib_entry->fib_node->key.addr;
3850 		mlxsw_reg_ralue_pack4(ralue_pl, proto, op, fib->vr->id,
3851 				      fib_entry->fib_node->key.prefix_len,
3852 				      *p_dip);
3853 		break;
3854 	case MLXSW_SP_L3_PROTO_IPV6:
3855 		mlxsw_reg_ralue_pack6(ralue_pl, proto, op, fib->vr->id,
3856 				      fib_entry->fib_node->key.prefix_len,
3857 				      fib_entry->fib_node->key.addr);
3858 		break;
3859 	}
3860 }
3861 
3862 static int mlxsw_sp_fib_entry_op_remote(struct mlxsw_sp *mlxsw_sp,
3863 					struct mlxsw_sp_fib_entry *fib_entry,
3864 					enum mlxsw_reg_ralue_op op)
3865 {
3866 	char ralue_pl[MLXSW_REG_RALUE_LEN];
3867 	enum mlxsw_reg_ralue_trap_action trap_action;
3868 	u16 trap_id = 0;
3869 	u32 adjacency_index = 0;
3870 	u16 ecmp_size = 0;
3871 
3872 	/* In case the nexthop group adjacency index is valid, use it
3873 	 * with provided ECMP size. Otherwise, setup trap and pass
3874 	 * traffic to kernel.
3875 	 */
3876 	if (mlxsw_sp_fib_entry_should_offload(fib_entry)) {
3877 		trap_action = MLXSW_REG_RALUE_TRAP_ACTION_NOP;
3878 		adjacency_index = fib_entry->nh_group->adj_index;
3879 		ecmp_size = fib_entry->nh_group->ecmp_size;
3880 	} else {
3881 		trap_action = MLXSW_REG_RALUE_TRAP_ACTION_TRAP;
3882 		trap_id = MLXSW_TRAP_ID_RTR_INGRESS0;
3883 	}
3884 
3885 	mlxsw_sp_fib_entry_ralue_pack(ralue_pl, fib_entry, op);
3886 	mlxsw_reg_ralue_act_remote_pack(ralue_pl, trap_action, trap_id,
3887 					adjacency_index, ecmp_size);
3888 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralue), ralue_pl);
3889 }
3890 
3891 static int mlxsw_sp_fib_entry_op_local(struct mlxsw_sp *mlxsw_sp,
3892 				       struct mlxsw_sp_fib_entry *fib_entry,
3893 				       enum mlxsw_reg_ralue_op op)
3894 {
3895 	struct mlxsw_sp_rif *rif = fib_entry->nh_group->nh_rif;
3896 	enum mlxsw_reg_ralue_trap_action trap_action;
3897 	char ralue_pl[MLXSW_REG_RALUE_LEN];
3898 	u16 trap_id = 0;
3899 	u16 rif_index = 0;
3900 
3901 	if (mlxsw_sp_fib_entry_should_offload(fib_entry)) {
3902 		trap_action = MLXSW_REG_RALUE_TRAP_ACTION_NOP;
3903 		rif_index = rif->rif_index;
3904 	} else {
3905 		trap_action = MLXSW_REG_RALUE_TRAP_ACTION_TRAP;
3906 		trap_id = MLXSW_TRAP_ID_RTR_INGRESS0;
3907 	}
3908 
3909 	mlxsw_sp_fib_entry_ralue_pack(ralue_pl, fib_entry, op);
3910 	mlxsw_reg_ralue_act_local_pack(ralue_pl, trap_action, trap_id,
3911 				       rif_index);
3912 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralue), ralue_pl);
3913 }
3914 
3915 static int mlxsw_sp_fib_entry_op_trap(struct mlxsw_sp *mlxsw_sp,
3916 				      struct mlxsw_sp_fib_entry *fib_entry,
3917 				      enum mlxsw_reg_ralue_op op)
3918 {
3919 	char ralue_pl[MLXSW_REG_RALUE_LEN];
3920 
3921 	mlxsw_sp_fib_entry_ralue_pack(ralue_pl, fib_entry, op);
3922 	mlxsw_reg_ralue_act_ip2me_pack(ralue_pl);
3923 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralue), ralue_pl);
3924 }
3925 
3926 static int
3927 mlxsw_sp_fib_entry_op_ipip_decap(struct mlxsw_sp *mlxsw_sp,
3928 				 struct mlxsw_sp_fib_entry *fib_entry,
3929 				 enum mlxsw_reg_ralue_op op)
3930 {
3931 	struct mlxsw_sp_ipip_entry *ipip_entry = fib_entry->decap.ipip_entry;
3932 	const struct mlxsw_sp_ipip_ops *ipip_ops;
3933 
3934 	if (WARN_ON(!ipip_entry))
3935 		return -EINVAL;
3936 
3937 	ipip_ops = mlxsw_sp->router->ipip_ops_arr[ipip_entry->ipipt];
3938 	return ipip_ops->fib_entry_op(mlxsw_sp, ipip_entry, op,
3939 				      fib_entry->decap.tunnel_index);
3940 }
3941 
3942 static int __mlxsw_sp_fib_entry_op(struct mlxsw_sp *mlxsw_sp,
3943 				   struct mlxsw_sp_fib_entry *fib_entry,
3944 				   enum mlxsw_reg_ralue_op op)
3945 {
3946 	switch (fib_entry->type) {
3947 	case MLXSW_SP_FIB_ENTRY_TYPE_REMOTE:
3948 		return mlxsw_sp_fib_entry_op_remote(mlxsw_sp, fib_entry, op);
3949 	case MLXSW_SP_FIB_ENTRY_TYPE_LOCAL:
3950 		return mlxsw_sp_fib_entry_op_local(mlxsw_sp, fib_entry, op);
3951 	case MLXSW_SP_FIB_ENTRY_TYPE_TRAP:
3952 		return mlxsw_sp_fib_entry_op_trap(mlxsw_sp, fib_entry, op);
3953 	case MLXSW_SP_FIB_ENTRY_TYPE_IPIP_DECAP:
3954 		return mlxsw_sp_fib_entry_op_ipip_decap(mlxsw_sp,
3955 							fib_entry, op);
3956 	}
3957 	return -EINVAL;
3958 }
3959 
3960 static int mlxsw_sp_fib_entry_op(struct mlxsw_sp *mlxsw_sp,
3961 				 struct mlxsw_sp_fib_entry *fib_entry,
3962 				 enum mlxsw_reg_ralue_op op)
3963 {
3964 	int err = __mlxsw_sp_fib_entry_op(mlxsw_sp, fib_entry, op);
3965 
3966 	mlxsw_sp_fib_entry_offload_refresh(fib_entry, op, err);
3967 
3968 	return err;
3969 }
3970 
3971 static int mlxsw_sp_fib_entry_update(struct mlxsw_sp *mlxsw_sp,
3972 				     struct mlxsw_sp_fib_entry *fib_entry)
3973 {
3974 	return mlxsw_sp_fib_entry_op(mlxsw_sp, fib_entry,
3975 				     MLXSW_REG_RALUE_OP_WRITE_WRITE);
3976 }
3977 
3978 static int mlxsw_sp_fib_entry_del(struct mlxsw_sp *mlxsw_sp,
3979 				  struct mlxsw_sp_fib_entry *fib_entry)
3980 {
3981 	return mlxsw_sp_fib_entry_op(mlxsw_sp, fib_entry,
3982 				     MLXSW_REG_RALUE_OP_WRITE_DELETE);
3983 }
3984 
3985 static int
3986 mlxsw_sp_fib4_entry_type_set(struct mlxsw_sp *mlxsw_sp,
3987 			     const struct fib_entry_notifier_info *fen_info,
3988 			     struct mlxsw_sp_fib_entry *fib_entry)
3989 {
3990 	union mlxsw_sp_l3addr dip = { .addr4 = htonl(fen_info->dst) };
3991 	struct net_device *dev = fen_info->fi->fib_dev;
3992 	struct mlxsw_sp_ipip_entry *ipip_entry;
3993 	struct fib_info *fi = fen_info->fi;
3994 
3995 	switch (fen_info->type) {
3996 	case RTN_LOCAL:
3997 		ipip_entry = mlxsw_sp_ipip_entry_find_by_decap(mlxsw_sp, dev,
3998 						 MLXSW_SP_L3_PROTO_IPV4, dip);
3999 		if (ipip_entry) {
4000 			fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_IPIP_DECAP;
4001 			return mlxsw_sp_fib_entry_decap_init(mlxsw_sp,
4002 							     fib_entry,
4003 							     ipip_entry);
4004 		}
4005 		/* fall through */
4006 	case RTN_BROADCAST:
4007 		fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_TRAP;
4008 		return 0;
4009 	case RTN_UNREACHABLE: /* fall through */
4010 	case RTN_BLACKHOLE: /* fall through */
4011 	case RTN_PROHIBIT:
4012 		/* Packets hitting these routes need to be trapped, but
4013 		 * can do so with a lower priority than packets directed
4014 		 * at the host, so use action type local instead of trap.
4015 		 */
4016 		fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_LOCAL;
4017 		return 0;
4018 	case RTN_UNICAST:
4019 		if (mlxsw_sp_fi_is_gateway(mlxsw_sp, fi))
4020 			fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_REMOTE;
4021 		else
4022 			fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_LOCAL;
4023 		return 0;
4024 	default:
4025 		return -EINVAL;
4026 	}
4027 }
4028 
4029 static struct mlxsw_sp_fib4_entry *
4030 mlxsw_sp_fib4_entry_create(struct mlxsw_sp *mlxsw_sp,
4031 			   struct mlxsw_sp_fib_node *fib_node,
4032 			   const struct fib_entry_notifier_info *fen_info)
4033 {
4034 	struct mlxsw_sp_fib4_entry *fib4_entry;
4035 	struct mlxsw_sp_fib_entry *fib_entry;
4036 	int err;
4037 
4038 	fib4_entry = kzalloc(sizeof(*fib4_entry), GFP_KERNEL);
4039 	if (!fib4_entry)
4040 		return ERR_PTR(-ENOMEM);
4041 	fib_entry = &fib4_entry->common;
4042 
4043 	err = mlxsw_sp_fib4_entry_type_set(mlxsw_sp, fen_info, fib_entry);
4044 	if (err)
4045 		goto err_fib4_entry_type_set;
4046 
4047 	err = mlxsw_sp_nexthop4_group_get(mlxsw_sp, fib_entry, fen_info->fi);
4048 	if (err)
4049 		goto err_nexthop4_group_get;
4050 
4051 	fib4_entry->prio = fen_info->fi->fib_priority;
4052 	fib4_entry->tb_id = fen_info->tb_id;
4053 	fib4_entry->type = fen_info->type;
4054 	fib4_entry->tos = fen_info->tos;
4055 
4056 	fib_entry->fib_node = fib_node;
4057 
4058 	return fib4_entry;
4059 
4060 err_nexthop4_group_get:
4061 err_fib4_entry_type_set:
4062 	kfree(fib4_entry);
4063 	return ERR_PTR(err);
4064 }
4065 
4066 static void mlxsw_sp_fib4_entry_destroy(struct mlxsw_sp *mlxsw_sp,
4067 					struct mlxsw_sp_fib4_entry *fib4_entry)
4068 {
4069 	mlxsw_sp_nexthop4_group_put(mlxsw_sp, &fib4_entry->common);
4070 	kfree(fib4_entry);
4071 }
4072 
4073 static struct mlxsw_sp_fib4_entry *
4074 mlxsw_sp_fib4_entry_lookup(struct mlxsw_sp *mlxsw_sp,
4075 			   const struct fib_entry_notifier_info *fen_info)
4076 {
4077 	struct mlxsw_sp_fib4_entry *fib4_entry;
4078 	struct mlxsw_sp_fib_node *fib_node;
4079 	struct mlxsw_sp_fib *fib;
4080 	struct mlxsw_sp_vr *vr;
4081 
4082 	vr = mlxsw_sp_vr_find(mlxsw_sp, fen_info->tb_id);
4083 	if (!vr)
4084 		return NULL;
4085 	fib = mlxsw_sp_vr_fib(vr, MLXSW_SP_L3_PROTO_IPV4);
4086 
4087 	fib_node = mlxsw_sp_fib_node_lookup(fib, &fen_info->dst,
4088 					    sizeof(fen_info->dst),
4089 					    fen_info->dst_len);
4090 	if (!fib_node)
4091 		return NULL;
4092 
4093 	list_for_each_entry(fib4_entry, &fib_node->entry_list, common.list) {
4094 		if (fib4_entry->tb_id == fen_info->tb_id &&
4095 		    fib4_entry->tos == fen_info->tos &&
4096 		    fib4_entry->type == fen_info->type &&
4097 		    mlxsw_sp_nexthop4_group_fi(fib4_entry->common.nh_group) ==
4098 		    fen_info->fi) {
4099 			return fib4_entry;
4100 		}
4101 	}
4102 
4103 	return NULL;
4104 }
4105 
4106 static const struct rhashtable_params mlxsw_sp_fib_ht_params = {
4107 	.key_offset = offsetof(struct mlxsw_sp_fib_node, key),
4108 	.head_offset = offsetof(struct mlxsw_sp_fib_node, ht_node),
4109 	.key_len = sizeof(struct mlxsw_sp_fib_key),
4110 	.automatic_shrinking = true,
4111 };
4112 
4113 static int mlxsw_sp_fib_node_insert(struct mlxsw_sp_fib *fib,
4114 				    struct mlxsw_sp_fib_node *fib_node)
4115 {
4116 	return rhashtable_insert_fast(&fib->ht, &fib_node->ht_node,
4117 				      mlxsw_sp_fib_ht_params);
4118 }
4119 
4120 static void mlxsw_sp_fib_node_remove(struct mlxsw_sp_fib *fib,
4121 				     struct mlxsw_sp_fib_node *fib_node)
4122 {
4123 	rhashtable_remove_fast(&fib->ht, &fib_node->ht_node,
4124 			       mlxsw_sp_fib_ht_params);
4125 }
4126 
4127 static struct mlxsw_sp_fib_node *
4128 mlxsw_sp_fib_node_lookup(struct mlxsw_sp_fib *fib, const void *addr,
4129 			 size_t addr_len, unsigned char prefix_len)
4130 {
4131 	struct mlxsw_sp_fib_key key;
4132 
4133 	memset(&key, 0, sizeof(key));
4134 	memcpy(key.addr, addr, addr_len);
4135 	key.prefix_len = prefix_len;
4136 	return rhashtable_lookup_fast(&fib->ht, &key, mlxsw_sp_fib_ht_params);
4137 }
4138 
4139 static struct mlxsw_sp_fib_node *
4140 mlxsw_sp_fib_node_create(struct mlxsw_sp_fib *fib, const void *addr,
4141 			 size_t addr_len, unsigned char prefix_len)
4142 {
4143 	struct mlxsw_sp_fib_node *fib_node;
4144 
4145 	fib_node = kzalloc(sizeof(*fib_node), GFP_KERNEL);
4146 	if (!fib_node)
4147 		return NULL;
4148 
4149 	INIT_LIST_HEAD(&fib_node->entry_list);
4150 	list_add(&fib_node->list, &fib->node_list);
4151 	memcpy(fib_node->key.addr, addr, addr_len);
4152 	fib_node->key.prefix_len = prefix_len;
4153 
4154 	return fib_node;
4155 }
4156 
4157 static void mlxsw_sp_fib_node_destroy(struct mlxsw_sp_fib_node *fib_node)
4158 {
4159 	list_del(&fib_node->list);
4160 	WARN_ON(!list_empty(&fib_node->entry_list));
4161 	kfree(fib_node);
4162 }
4163 
4164 static bool
4165 mlxsw_sp_fib_node_entry_is_first(const struct mlxsw_sp_fib_node *fib_node,
4166 				 const struct mlxsw_sp_fib_entry *fib_entry)
4167 {
4168 	return list_first_entry(&fib_node->entry_list,
4169 				struct mlxsw_sp_fib_entry, list) == fib_entry;
4170 }
4171 
4172 static int mlxsw_sp_fib_lpm_tree_link(struct mlxsw_sp *mlxsw_sp,
4173 				      struct mlxsw_sp_fib *fib,
4174 				      struct mlxsw_sp_fib_node *fib_node)
4175 {
4176 	struct mlxsw_sp_prefix_usage req_prefix_usage = {{ 0 } };
4177 	struct mlxsw_sp_lpm_tree *lpm_tree;
4178 	int err;
4179 
4180 	/* Since the tree is shared between all virtual routers we must
4181 	 * make sure it contains all the required prefix lengths. This
4182 	 * can be computed by either adding the new prefix length to the
4183 	 * existing prefix usage of a bound tree, or by aggregating the
4184 	 * prefix lengths across all virtual routers and adding the new
4185 	 * one as well.
4186 	 */
4187 	if (fib->lpm_tree)
4188 		mlxsw_sp_prefix_usage_cpy(&req_prefix_usage,
4189 					  &fib->lpm_tree->prefix_usage);
4190 	else
4191 		mlxsw_sp_vrs_prefixes(mlxsw_sp, fib->proto, &req_prefix_usage);
4192 	mlxsw_sp_prefix_usage_set(&req_prefix_usage, fib_node->key.prefix_len);
4193 
4194 	lpm_tree = mlxsw_sp_lpm_tree_get(mlxsw_sp, &req_prefix_usage,
4195 					 fib->proto);
4196 	if (IS_ERR(lpm_tree))
4197 		return PTR_ERR(lpm_tree);
4198 
4199 	if (fib->lpm_tree && fib->lpm_tree->id == lpm_tree->id)
4200 		return 0;
4201 
4202 	err = mlxsw_sp_vrs_lpm_tree_replace(mlxsw_sp, fib, lpm_tree);
4203 	if (err)
4204 		return err;
4205 
4206 	return 0;
4207 }
4208 
4209 static void mlxsw_sp_fib_lpm_tree_unlink(struct mlxsw_sp *mlxsw_sp,
4210 					 struct mlxsw_sp_fib *fib)
4211 {
4212 	if (!mlxsw_sp_prefix_usage_none(&fib->prefix_usage))
4213 		return;
4214 	mlxsw_sp_vr_lpm_tree_unbind(mlxsw_sp, fib);
4215 	mlxsw_sp_lpm_tree_put(mlxsw_sp, fib->lpm_tree);
4216 	fib->lpm_tree = NULL;
4217 }
4218 
4219 static void mlxsw_sp_fib_node_prefix_inc(struct mlxsw_sp_fib_node *fib_node)
4220 {
4221 	unsigned char prefix_len = fib_node->key.prefix_len;
4222 	struct mlxsw_sp_fib *fib = fib_node->fib;
4223 
4224 	if (fib->prefix_ref_count[prefix_len]++ == 0)
4225 		mlxsw_sp_prefix_usage_set(&fib->prefix_usage, prefix_len);
4226 }
4227 
4228 static void mlxsw_sp_fib_node_prefix_dec(struct mlxsw_sp_fib_node *fib_node)
4229 {
4230 	unsigned char prefix_len = fib_node->key.prefix_len;
4231 	struct mlxsw_sp_fib *fib = fib_node->fib;
4232 
4233 	if (--fib->prefix_ref_count[prefix_len] == 0)
4234 		mlxsw_sp_prefix_usage_clear(&fib->prefix_usage, prefix_len);
4235 }
4236 
4237 static int mlxsw_sp_fib_node_init(struct mlxsw_sp *mlxsw_sp,
4238 				  struct mlxsw_sp_fib_node *fib_node,
4239 				  struct mlxsw_sp_fib *fib)
4240 {
4241 	int err;
4242 
4243 	err = mlxsw_sp_fib_node_insert(fib, fib_node);
4244 	if (err)
4245 		return err;
4246 	fib_node->fib = fib;
4247 
4248 	err = mlxsw_sp_fib_lpm_tree_link(mlxsw_sp, fib, fib_node);
4249 	if (err)
4250 		goto err_fib_lpm_tree_link;
4251 
4252 	mlxsw_sp_fib_node_prefix_inc(fib_node);
4253 
4254 	return 0;
4255 
4256 err_fib_lpm_tree_link:
4257 	fib_node->fib = NULL;
4258 	mlxsw_sp_fib_node_remove(fib, fib_node);
4259 	return err;
4260 }
4261 
4262 static void mlxsw_sp_fib_node_fini(struct mlxsw_sp *mlxsw_sp,
4263 				   struct mlxsw_sp_fib_node *fib_node)
4264 {
4265 	struct mlxsw_sp_fib *fib = fib_node->fib;
4266 
4267 	mlxsw_sp_fib_node_prefix_dec(fib_node);
4268 	mlxsw_sp_fib_lpm_tree_unlink(mlxsw_sp, fib);
4269 	fib_node->fib = NULL;
4270 	mlxsw_sp_fib_node_remove(fib, fib_node);
4271 }
4272 
4273 static struct mlxsw_sp_fib_node *
4274 mlxsw_sp_fib_node_get(struct mlxsw_sp *mlxsw_sp, u32 tb_id, const void *addr,
4275 		      size_t addr_len, unsigned char prefix_len,
4276 		      enum mlxsw_sp_l3proto proto)
4277 {
4278 	struct mlxsw_sp_fib_node *fib_node;
4279 	struct mlxsw_sp_fib *fib;
4280 	struct mlxsw_sp_vr *vr;
4281 	int err;
4282 
4283 	vr = mlxsw_sp_vr_get(mlxsw_sp, tb_id, NULL);
4284 	if (IS_ERR(vr))
4285 		return ERR_CAST(vr);
4286 	fib = mlxsw_sp_vr_fib(vr, proto);
4287 
4288 	fib_node = mlxsw_sp_fib_node_lookup(fib, addr, addr_len, prefix_len);
4289 	if (fib_node)
4290 		return fib_node;
4291 
4292 	fib_node = mlxsw_sp_fib_node_create(fib, addr, addr_len, prefix_len);
4293 	if (!fib_node) {
4294 		err = -ENOMEM;
4295 		goto err_fib_node_create;
4296 	}
4297 
4298 	err = mlxsw_sp_fib_node_init(mlxsw_sp, fib_node, fib);
4299 	if (err)
4300 		goto err_fib_node_init;
4301 
4302 	return fib_node;
4303 
4304 err_fib_node_init:
4305 	mlxsw_sp_fib_node_destroy(fib_node);
4306 err_fib_node_create:
4307 	mlxsw_sp_vr_put(vr);
4308 	return ERR_PTR(err);
4309 }
4310 
4311 static void mlxsw_sp_fib_node_put(struct mlxsw_sp *mlxsw_sp,
4312 				  struct mlxsw_sp_fib_node *fib_node)
4313 {
4314 	struct mlxsw_sp_vr *vr = fib_node->fib->vr;
4315 
4316 	if (!list_empty(&fib_node->entry_list))
4317 		return;
4318 	mlxsw_sp_fib_node_fini(mlxsw_sp, fib_node);
4319 	mlxsw_sp_fib_node_destroy(fib_node);
4320 	mlxsw_sp_vr_put(vr);
4321 }
4322 
4323 static struct mlxsw_sp_fib4_entry *
4324 mlxsw_sp_fib4_node_entry_find(const struct mlxsw_sp_fib_node *fib_node,
4325 			      const struct mlxsw_sp_fib4_entry *new4_entry)
4326 {
4327 	struct mlxsw_sp_fib4_entry *fib4_entry;
4328 
4329 	list_for_each_entry(fib4_entry, &fib_node->entry_list, common.list) {
4330 		if (fib4_entry->tb_id > new4_entry->tb_id)
4331 			continue;
4332 		if (fib4_entry->tb_id != new4_entry->tb_id)
4333 			break;
4334 		if (fib4_entry->tos > new4_entry->tos)
4335 			continue;
4336 		if (fib4_entry->prio >= new4_entry->prio ||
4337 		    fib4_entry->tos < new4_entry->tos)
4338 			return fib4_entry;
4339 	}
4340 
4341 	return NULL;
4342 }
4343 
4344 static int
4345 mlxsw_sp_fib4_node_list_append(struct mlxsw_sp_fib4_entry *fib4_entry,
4346 			       struct mlxsw_sp_fib4_entry *new4_entry)
4347 {
4348 	struct mlxsw_sp_fib_node *fib_node;
4349 
4350 	if (WARN_ON(!fib4_entry))
4351 		return -EINVAL;
4352 
4353 	fib_node = fib4_entry->common.fib_node;
4354 	list_for_each_entry_from(fib4_entry, &fib_node->entry_list,
4355 				 common.list) {
4356 		if (fib4_entry->tb_id != new4_entry->tb_id ||
4357 		    fib4_entry->tos != new4_entry->tos ||
4358 		    fib4_entry->prio != new4_entry->prio)
4359 			break;
4360 	}
4361 
4362 	list_add_tail(&new4_entry->common.list, &fib4_entry->common.list);
4363 	return 0;
4364 }
4365 
4366 static int
4367 mlxsw_sp_fib4_node_list_insert(struct mlxsw_sp_fib4_entry *new4_entry,
4368 			       bool replace, bool append)
4369 {
4370 	struct mlxsw_sp_fib_node *fib_node = new4_entry->common.fib_node;
4371 	struct mlxsw_sp_fib4_entry *fib4_entry;
4372 
4373 	fib4_entry = mlxsw_sp_fib4_node_entry_find(fib_node, new4_entry);
4374 
4375 	if (append)
4376 		return mlxsw_sp_fib4_node_list_append(fib4_entry, new4_entry);
4377 	if (replace && WARN_ON(!fib4_entry))
4378 		return -EINVAL;
4379 
4380 	/* Insert new entry before replaced one, so that we can later
4381 	 * remove the second.
4382 	 */
4383 	if (fib4_entry) {
4384 		list_add_tail(&new4_entry->common.list,
4385 			      &fib4_entry->common.list);
4386 	} else {
4387 		struct mlxsw_sp_fib4_entry *last;
4388 
4389 		list_for_each_entry(last, &fib_node->entry_list, common.list) {
4390 			if (new4_entry->tb_id > last->tb_id)
4391 				break;
4392 			fib4_entry = last;
4393 		}
4394 
4395 		if (fib4_entry)
4396 			list_add(&new4_entry->common.list,
4397 				 &fib4_entry->common.list);
4398 		else
4399 			list_add(&new4_entry->common.list,
4400 				 &fib_node->entry_list);
4401 	}
4402 
4403 	return 0;
4404 }
4405 
4406 static void
4407 mlxsw_sp_fib4_node_list_remove(struct mlxsw_sp_fib4_entry *fib4_entry)
4408 {
4409 	list_del(&fib4_entry->common.list);
4410 }
4411 
4412 static int mlxsw_sp_fib_node_entry_add(struct mlxsw_sp *mlxsw_sp,
4413 				       struct mlxsw_sp_fib_entry *fib_entry)
4414 {
4415 	struct mlxsw_sp_fib_node *fib_node = fib_entry->fib_node;
4416 
4417 	if (!mlxsw_sp_fib_node_entry_is_first(fib_node, fib_entry))
4418 		return 0;
4419 
4420 	/* To prevent packet loss, overwrite the previously offloaded
4421 	 * entry.
4422 	 */
4423 	if (!list_is_singular(&fib_node->entry_list)) {
4424 		enum mlxsw_reg_ralue_op op = MLXSW_REG_RALUE_OP_WRITE_DELETE;
4425 		struct mlxsw_sp_fib_entry *n = list_next_entry(fib_entry, list);
4426 
4427 		mlxsw_sp_fib_entry_offload_refresh(n, op, 0);
4428 	}
4429 
4430 	return mlxsw_sp_fib_entry_update(mlxsw_sp, fib_entry);
4431 }
4432 
4433 static void mlxsw_sp_fib_node_entry_del(struct mlxsw_sp *mlxsw_sp,
4434 					struct mlxsw_sp_fib_entry *fib_entry)
4435 {
4436 	struct mlxsw_sp_fib_node *fib_node = fib_entry->fib_node;
4437 
4438 	if (!mlxsw_sp_fib_node_entry_is_first(fib_node, fib_entry))
4439 		return;
4440 
4441 	/* Promote the next entry by overwriting the deleted entry */
4442 	if (!list_is_singular(&fib_node->entry_list)) {
4443 		struct mlxsw_sp_fib_entry *n = list_next_entry(fib_entry, list);
4444 		enum mlxsw_reg_ralue_op op = MLXSW_REG_RALUE_OP_WRITE_DELETE;
4445 
4446 		mlxsw_sp_fib_entry_update(mlxsw_sp, n);
4447 		mlxsw_sp_fib_entry_offload_refresh(fib_entry, op, 0);
4448 		return;
4449 	}
4450 
4451 	mlxsw_sp_fib_entry_del(mlxsw_sp, fib_entry);
4452 }
4453 
4454 static int mlxsw_sp_fib4_node_entry_link(struct mlxsw_sp *mlxsw_sp,
4455 					 struct mlxsw_sp_fib4_entry *fib4_entry,
4456 					 bool replace, bool append)
4457 {
4458 	int err;
4459 
4460 	err = mlxsw_sp_fib4_node_list_insert(fib4_entry, replace, append);
4461 	if (err)
4462 		return err;
4463 
4464 	err = mlxsw_sp_fib_node_entry_add(mlxsw_sp, &fib4_entry->common);
4465 	if (err)
4466 		goto err_fib_node_entry_add;
4467 
4468 	return 0;
4469 
4470 err_fib_node_entry_add:
4471 	mlxsw_sp_fib4_node_list_remove(fib4_entry);
4472 	return err;
4473 }
4474 
4475 static void
4476 mlxsw_sp_fib4_node_entry_unlink(struct mlxsw_sp *mlxsw_sp,
4477 				struct mlxsw_sp_fib4_entry *fib4_entry)
4478 {
4479 	mlxsw_sp_fib_node_entry_del(mlxsw_sp, &fib4_entry->common);
4480 	mlxsw_sp_fib4_node_list_remove(fib4_entry);
4481 
4482 	if (fib4_entry->common.type == MLXSW_SP_FIB_ENTRY_TYPE_IPIP_DECAP)
4483 		mlxsw_sp_fib_entry_decap_fini(mlxsw_sp, &fib4_entry->common);
4484 }
4485 
4486 static void mlxsw_sp_fib4_entry_replace(struct mlxsw_sp *mlxsw_sp,
4487 					struct mlxsw_sp_fib4_entry *fib4_entry,
4488 					bool replace)
4489 {
4490 	struct mlxsw_sp_fib_node *fib_node = fib4_entry->common.fib_node;
4491 	struct mlxsw_sp_fib4_entry *replaced;
4492 
4493 	if (!replace)
4494 		return;
4495 
4496 	/* We inserted the new entry before replaced one */
4497 	replaced = list_next_entry(fib4_entry, common.list);
4498 
4499 	mlxsw_sp_fib4_node_entry_unlink(mlxsw_sp, replaced);
4500 	mlxsw_sp_fib4_entry_destroy(mlxsw_sp, replaced);
4501 	mlxsw_sp_fib_node_put(mlxsw_sp, fib_node);
4502 }
4503 
4504 static int
4505 mlxsw_sp_router_fib4_add(struct mlxsw_sp *mlxsw_sp,
4506 			 const struct fib_entry_notifier_info *fen_info,
4507 			 bool replace, bool append)
4508 {
4509 	struct mlxsw_sp_fib4_entry *fib4_entry;
4510 	struct mlxsw_sp_fib_node *fib_node;
4511 	int err;
4512 
4513 	if (mlxsw_sp->router->aborted)
4514 		return 0;
4515 
4516 	fib_node = mlxsw_sp_fib_node_get(mlxsw_sp, fen_info->tb_id,
4517 					 &fen_info->dst, sizeof(fen_info->dst),
4518 					 fen_info->dst_len,
4519 					 MLXSW_SP_L3_PROTO_IPV4);
4520 	if (IS_ERR(fib_node)) {
4521 		dev_warn(mlxsw_sp->bus_info->dev, "Failed to get FIB node\n");
4522 		return PTR_ERR(fib_node);
4523 	}
4524 
4525 	fib4_entry = mlxsw_sp_fib4_entry_create(mlxsw_sp, fib_node, fen_info);
4526 	if (IS_ERR(fib4_entry)) {
4527 		dev_warn(mlxsw_sp->bus_info->dev, "Failed to create FIB entry\n");
4528 		err = PTR_ERR(fib4_entry);
4529 		goto err_fib4_entry_create;
4530 	}
4531 
4532 	err = mlxsw_sp_fib4_node_entry_link(mlxsw_sp, fib4_entry, replace,
4533 					    append);
4534 	if (err) {
4535 		dev_warn(mlxsw_sp->bus_info->dev, "Failed to link FIB entry to node\n");
4536 		goto err_fib4_node_entry_link;
4537 	}
4538 
4539 	mlxsw_sp_fib4_entry_replace(mlxsw_sp, fib4_entry, replace);
4540 
4541 	return 0;
4542 
4543 err_fib4_node_entry_link:
4544 	mlxsw_sp_fib4_entry_destroy(mlxsw_sp, fib4_entry);
4545 err_fib4_entry_create:
4546 	mlxsw_sp_fib_node_put(mlxsw_sp, fib_node);
4547 	return err;
4548 }
4549 
4550 static void mlxsw_sp_router_fib4_del(struct mlxsw_sp *mlxsw_sp,
4551 				     struct fib_entry_notifier_info *fen_info)
4552 {
4553 	struct mlxsw_sp_fib4_entry *fib4_entry;
4554 	struct mlxsw_sp_fib_node *fib_node;
4555 
4556 	if (mlxsw_sp->router->aborted)
4557 		return;
4558 
4559 	fib4_entry = mlxsw_sp_fib4_entry_lookup(mlxsw_sp, fen_info);
4560 	if (WARN_ON(!fib4_entry))
4561 		return;
4562 	fib_node = fib4_entry->common.fib_node;
4563 
4564 	mlxsw_sp_fib4_node_entry_unlink(mlxsw_sp, fib4_entry);
4565 	mlxsw_sp_fib4_entry_destroy(mlxsw_sp, fib4_entry);
4566 	mlxsw_sp_fib_node_put(mlxsw_sp, fib_node);
4567 }
4568 
4569 static bool mlxsw_sp_fib6_rt_should_ignore(const struct rt6_info *rt)
4570 {
4571 	/* Packets with link-local destination IP arriving to the router
4572 	 * are trapped to the CPU, so no need to program specific routes
4573 	 * for them.
4574 	 */
4575 	if (ipv6_addr_type(&rt->rt6i_dst.addr) & IPV6_ADDR_LINKLOCAL)
4576 		return true;
4577 
4578 	/* Multicast routes aren't supported, so ignore them. Neighbour
4579 	 * Discovery packets are specifically trapped.
4580 	 */
4581 	if (ipv6_addr_type(&rt->rt6i_dst.addr) & IPV6_ADDR_MULTICAST)
4582 		return true;
4583 
4584 	/* Cloned routes are irrelevant in the forwarding path. */
4585 	if (rt->rt6i_flags & RTF_CACHE)
4586 		return true;
4587 
4588 	return false;
4589 }
4590 
4591 static struct mlxsw_sp_rt6 *mlxsw_sp_rt6_create(struct rt6_info *rt)
4592 {
4593 	struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
4594 
4595 	mlxsw_sp_rt6 = kzalloc(sizeof(*mlxsw_sp_rt6), GFP_KERNEL);
4596 	if (!mlxsw_sp_rt6)
4597 		return ERR_PTR(-ENOMEM);
4598 
4599 	/* In case of route replace, replaced route is deleted with
4600 	 * no notification. Take reference to prevent accessing freed
4601 	 * memory.
4602 	 */
4603 	mlxsw_sp_rt6->rt = rt;
4604 	rt6_hold(rt);
4605 
4606 	return mlxsw_sp_rt6;
4607 }
4608 
4609 #if IS_ENABLED(CONFIG_IPV6)
4610 static void mlxsw_sp_rt6_release(struct rt6_info *rt)
4611 {
4612 	rt6_release(rt);
4613 }
4614 #else
4615 static void mlxsw_sp_rt6_release(struct rt6_info *rt)
4616 {
4617 }
4618 #endif
4619 
4620 static void mlxsw_sp_rt6_destroy(struct mlxsw_sp_rt6 *mlxsw_sp_rt6)
4621 {
4622 	mlxsw_sp_rt6_release(mlxsw_sp_rt6->rt);
4623 	kfree(mlxsw_sp_rt6);
4624 }
4625 
4626 static bool mlxsw_sp_fib6_rt_can_mp(const struct rt6_info *rt)
4627 {
4628 	/* RTF_CACHE routes are ignored */
4629 	return (rt->rt6i_flags & (RTF_GATEWAY | RTF_ADDRCONF)) == RTF_GATEWAY;
4630 }
4631 
4632 static struct rt6_info *
4633 mlxsw_sp_fib6_entry_rt(const struct mlxsw_sp_fib6_entry *fib6_entry)
4634 {
4635 	return list_first_entry(&fib6_entry->rt6_list, struct mlxsw_sp_rt6,
4636 				list)->rt;
4637 }
4638 
4639 static struct mlxsw_sp_fib6_entry *
4640 mlxsw_sp_fib6_node_mp_entry_find(const struct mlxsw_sp_fib_node *fib_node,
4641 				 const struct rt6_info *nrt, bool replace)
4642 {
4643 	struct mlxsw_sp_fib6_entry *fib6_entry;
4644 
4645 	if (!mlxsw_sp_fib6_rt_can_mp(nrt) || replace)
4646 		return NULL;
4647 
4648 	list_for_each_entry(fib6_entry, &fib_node->entry_list, common.list) {
4649 		struct rt6_info *rt = mlxsw_sp_fib6_entry_rt(fib6_entry);
4650 
4651 		/* RT6_TABLE_LOCAL and RT6_TABLE_MAIN share the same
4652 		 * virtual router.
4653 		 */
4654 		if (rt->rt6i_table->tb6_id > nrt->rt6i_table->tb6_id)
4655 			continue;
4656 		if (rt->rt6i_table->tb6_id != nrt->rt6i_table->tb6_id)
4657 			break;
4658 		if (rt->rt6i_metric < nrt->rt6i_metric)
4659 			continue;
4660 		if (rt->rt6i_metric == nrt->rt6i_metric &&
4661 		    mlxsw_sp_fib6_rt_can_mp(rt))
4662 			return fib6_entry;
4663 		if (rt->rt6i_metric > nrt->rt6i_metric)
4664 			break;
4665 	}
4666 
4667 	return NULL;
4668 }
4669 
4670 static struct mlxsw_sp_rt6 *
4671 mlxsw_sp_fib6_entry_rt_find(const struct mlxsw_sp_fib6_entry *fib6_entry,
4672 			    const struct rt6_info *rt)
4673 {
4674 	struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
4675 
4676 	list_for_each_entry(mlxsw_sp_rt6, &fib6_entry->rt6_list, list) {
4677 		if (mlxsw_sp_rt6->rt == rt)
4678 			return mlxsw_sp_rt6;
4679 	}
4680 
4681 	return NULL;
4682 }
4683 
4684 static bool mlxsw_sp_nexthop6_ipip_type(const struct mlxsw_sp *mlxsw_sp,
4685 					const struct rt6_info *rt,
4686 					enum mlxsw_sp_ipip_type *ret)
4687 {
4688 	return rt->dst.dev &&
4689 	       mlxsw_sp_netdev_ipip_type(mlxsw_sp, rt->dst.dev, ret);
4690 }
4691 
4692 static int mlxsw_sp_nexthop6_type_init(struct mlxsw_sp *mlxsw_sp,
4693 				       struct mlxsw_sp_nexthop_group *nh_grp,
4694 				       struct mlxsw_sp_nexthop *nh,
4695 				       const struct rt6_info *rt)
4696 {
4697 	struct mlxsw_sp_router *router = mlxsw_sp->router;
4698 	struct net_device *dev = rt->dst.dev;
4699 	enum mlxsw_sp_ipip_type ipipt;
4700 	struct mlxsw_sp_rif *rif;
4701 	int err;
4702 
4703 	if (mlxsw_sp_nexthop6_ipip_type(mlxsw_sp, rt, &ipipt) &&
4704 	    router->ipip_ops_arr[ipipt]->can_offload(mlxsw_sp, dev,
4705 						     MLXSW_SP_L3_PROTO_IPV6)) {
4706 		nh->type = MLXSW_SP_NEXTHOP_TYPE_IPIP;
4707 		err = mlxsw_sp_nexthop_ipip_init(mlxsw_sp, nh, dev);
4708 		if (err)
4709 			return err;
4710 		mlxsw_sp_nexthop_rif_init(nh, &nh->ipip_entry->ol_lb->common);
4711 		return 0;
4712 	}
4713 
4714 	nh->type = MLXSW_SP_NEXTHOP_TYPE_ETH;
4715 	rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev);
4716 	if (!rif)
4717 		return 0;
4718 	mlxsw_sp_nexthop_rif_init(nh, rif);
4719 
4720 	err = mlxsw_sp_nexthop_neigh_init(mlxsw_sp, nh);
4721 	if (err)
4722 		goto err_nexthop_neigh_init;
4723 
4724 	return 0;
4725 
4726 err_nexthop_neigh_init:
4727 	mlxsw_sp_nexthop_rif_fini(nh);
4728 	return err;
4729 }
4730 
4731 static void mlxsw_sp_nexthop6_type_fini(struct mlxsw_sp *mlxsw_sp,
4732 					struct mlxsw_sp_nexthop *nh)
4733 {
4734 	mlxsw_sp_nexthop_type_fini(mlxsw_sp, nh);
4735 }
4736 
4737 static int mlxsw_sp_nexthop6_init(struct mlxsw_sp *mlxsw_sp,
4738 				  struct mlxsw_sp_nexthop_group *nh_grp,
4739 				  struct mlxsw_sp_nexthop *nh,
4740 				  const struct rt6_info *rt)
4741 {
4742 	struct net_device *dev = rt->dst.dev;
4743 
4744 	nh->nh_grp = nh_grp;
4745 	nh->nh_weight = 1;
4746 	memcpy(&nh->gw_addr, &rt->rt6i_gateway, sizeof(nh->gw_addr));
4747 	mlxsw_sp_nexthop_counter_alloc(mlxsw_sp, nh);
4748 
4749 	list_add_tail(&nh->router_list_node, &mlxsw_sp->router->nexthop_list);
4750 
4751 	if (!dev)
4752 		return 0;
4753 	nh->ifindex = dev->ifindex;
4754 
4755 	return mlxsw_sp_nexthop6_type_init(mlxsw_sp, nh_grp, nh, rt);
4756 }
4757 
4758 static void mlxsw_sp_nexthop6_fini(struct mlxsw_sp *mlxsw_sp,
4759 				   struct mlxsw_sp_nexthop *nh)
4760 {
4761 	mlxsw_sp_nexthop6_type_fini(mlxsw_sp, nh);
4762 	list_del(&nh->router_list_node);
4763 	mlxsw_sp_nexthop_counter_free(mlxsw_sp, nh);
4764 }
4765 
4766 static bool mlxsw_sp_rt6_is_gateway(const struct mlxsw_sp *mlxsw_sp,
4767 				    const struct rt6_info *rt)
4768 {
4769 	return rt->rt6i_flags & RTF_GATEWAY ||
4770 	       mlxsw_sp_nexthop6_ipip_type(mlxsw_sp, rt, NULL);
4771 }
4772 
4773 static struct mlxsw_sp_nexthop_group *
4774 mlxsw_sp_nexthop6_group_create(struct mlxsw_sp *mlxsw_sp,
4775 			       struct mlxsw_sp_fib6_entry *fib6_entry)
4776 {
4777 	struct mlxsw_sp_nexthop_group *nh_grp;
4778 	struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
4779 	struct mlxsw_sp_nexthop *nh;
4780 	size_t alloc_size;
4781 	int i = 0;
4782 	int err;
4783 
4784 	alloc_size = sizeof(*nh_grp) +
4785 		     fib6_entry->nrt6 * sizeof(struct mlxsw_sp_nexthop);
4786 	nh_grp = kzalloc(alloc_size, GFP_KERNEL);
4787 	if (!nh_grp)
4788 		return ERR_PTR(-ENOMEM);
4789 	INIT_LIST_HEAD(&nh_grp->fib_list);
4790 #if IS_ENABLED(CONFIG_IPV6)
4791 	nh_grp->neigh_tbl = &nd_tbl;
4792 #endif
4793 	mlxsw_sp_rt6 = list_first_entry(&fib6_entry->rt6_list,
4794 					struct mlxsw_sp_rt6, list);
4795 	nh_grp->gateway = mlxsw_sp_rt6_is_gateway(mlxsw_sp, mlxsw_sp_rt6->rt);
4796 	nh_grp->count = fib6_entry->nrt6;
4797 	for (i = 0; i < nh_grp->count; i++) {
4798 		struct rt6_info *rt = mlxsw_sp_rt6->rt;
4799 
4800 		nh = &nh_grp->nexthops[i];
4801 		err = mlxsw_sp_nexthop6_init(mlxsw_sp, nh_grp, nh, rt);
4802 		if (err)
4803 			goto err_nexthop6_init;
4804 		mlxsw_sp_rt6 = list_next_entry(mlxsw_sp_rt6, list);
4805 	}
4806 
4807 	err = mlxsw_sp_nexthop_group_insert(mlxsw_sp, nh_grp);
4808 	if (err)
4809 		goto err_nexthop_group_insert;
4810 
4811 	mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh_grp);
4812 	return nh_grp;
4813 
4814 err_nexthop_group_insert:
4815 err_nexthop6_init:
4816 	for (i--; i >= 0; i--) {
4817 		nh = &nh_grp->nexthops[i];
4818 		mlxsw_sp_nexthop6_fini(mlxsw_sp, nh);
4819 	}
4820 	kfree(nh_grp);
4821 	return ERR_PTR(err);
4822 }
4823 
4824 static void
4825 mlxsw_sp_nexthop6_group_destroy(struct mlxsw_sp *mlxsw_sp,
4826 				struct mlxsw_sp_nexthop_group *nh_grp)
4827 {
4828 	struct mlxsw_sp_nexthop *nh;
4829 	int i = nh_grp->count;
4830 
4831 	mlxsw_sp_nexthop_group_remove(mlxsw_sp, nh_grp);
4832 	for (i--; i >= 0; i--) {
4833 		nh = &nh_grp->nexthops[i];
4834 		mlxsw_sp_nexthop6_fini(mlxsw_sp, nh);
4835 	}
4836 	mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh_grp);
4837 	WARN_ON(nh_grp->adj_index_valid);
4838 	kfree(nh_grp);
4839 }
4840 
4841 static int mlxsw_sp_nexthop6_group_get(struct mlxsw_sp *mlxsw_sp,
4842 				       struct mlxsw_sp_fib6_entry *fib6_entry)
4843 {
4844 	struct mlxsw_sp_nexthop_group *nh_grp;
4845 
4846 	nh_grp = mlxsw_sp_nexthop6_group_lookup(mlxsw_sp, fib6_entry);
4847 	if (!nh_grp) {
4848 		nh_grp = mlxsw_sp_nexthop6_group_create(mlxsw_sp, fib6_entry);
4849 		if (IS_ERR(nh_grp))
4850 			return PTR_ERR(nh_grp);
4851 	}
4852 
4853 	list_add_tail(&fib6_entry->common.nexthop_group_node,
4854 		      &nh_grp->fib_list);
4855 	fib6_entry->common.nh_group = nh_grp;
4856 
4857 	return 0;
4858 }
4859 
4860 static void mlxsw_sp_nexthop6_group_put(struct mlxsw_sp *mlxsw_sp,
4861 					struct mlxsw_sp_fib_entry *fib_entry)
4862 {
4863 	struct mlxsw_sp_nexthop_group *nh_grp = fib_entry->nh_group;
4864 
4865 	list_del(&fib_entry->nexthop_group_node);
4866 	if (!list_empty(&nh_grp->fib_list))
4867 		return;
4868 	mlxsw_sp_nexthop6_group_destroy(mlxsw_sp, nh_grp);
4869 }
4870 
4871 static int
4872 mlxsw_sp_nexthop6_group_update(struct mlxsw_sp *mlxsw_sp,
4873 			       struct mlxsw_sp_fib6_entry *fib6_entry)
4874 {
4875 	struct mlxsw_sp_nexthop_group *old_nh_grp = fib6_entry->common.nh_group;
4876 	int err;
4877 
4878 	fib6_entry->common.nh_group = NULL;
4879 	list_del(&fib6_entry->common.nexthop_group_node);
4880 
4881 	err = mlxsw_sp_nexthop6_group_get(mlxsw_sp, fib6_entry);
4882 	if (err)
4883 		goto err_nexthop6_group_get;
4884 
4885 	/* In case this entry is offloaded, then the adjacency index
4886 	 * currently associated with it in the device's table is that
4887 	 * of the old group. Start using the new one instead.
4888 	 */
4889 	err = mlxsw_sp_fib_node_entry_add(mlxsw_sp, &fib6_entry->common);
4890 	if (err)
4891 		goto err_fib_node_entry_add;
4892 
4893 	if (list_empty(&old_nh_grp->fib_list))
4894 		mlxsw_sp_nexthop6_group_destroy(mlxsw_sp, old_nh_grp);
4895 
4896 	return 0;
4897 
4898 err_fib_node_entry_add:
4899 	mlxsw_sp_nexthop6_group_put(mlxsw_sp, &fib6_entry->common);
4900 err_nexthop6_group_get:
4901 	list_add_tail(&fib6_entry->common.nexthop_group_node,
4902 		      &old_nh_grp->fib_list);
4903 	fib6_entry->common.nh_group = old_nh_grp;
4904 	return err;
4905 }
4906 
4907 static int
4908 mlxsw_sp_fib6_entry_nexthop_add(struct mlxsw_sp *mlxsw_sp,
4909 				struct mlxsw_sp_fib6_entry *fib6_entry,
4910 				struct rt6_info *rt)
4911 {
4912 	struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
4913 	int err;
4914 
4915 	mlxsw_sp_rt6 = mlxsw_sp_rt6_create(rt);
4916 	if (IS_ERR(mlxsw_sp_rt6))
4917 		return PTR_ERR(mlxsw_sp_rt6);
4918 
4919 	list_add_tail(&mlxsw_sp_rt6->list, &fib6_entry->rt6_list);
4920 	fib6_entry->nrt6++;
4921 
4922 	err = mlxsw_sp_nexthop6_group_update(mlxsw_sp, fib6_entry);
4923 	if (err)
4924 		goto err_nexthop6_group_update;
4925 
4926 	return 0;
4927 
4928 err_nexthop6_group_update:
4929 	fib6_entry->nrt6--;
4930 	list_del(&mlxsw_sp_rt6->list);
4931 	mlxsw_sp_rt6_destroy(mlxsw_sp_rt6);
4932 	return err;
4933 }
4934 
4935 static void
4936 mlxsw_sp_fib6_entry_nexthop_del(struct mlxsw_sp *mlxsw_sp,
4937 				struct mlxsw_sp_fib6_entry *fib6_entry,
4938 				struct rt6_info *rt)
4939 {
4940 	struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
4941 
4942 	mlxsw_sp_rt6 = mlxsw_sp_fib6_entry_rt_find(fib6_entry, rt);
4943 	if (WARN_ON(!mlxsw_sp_rt6))
4944 		return;
4945 
4946 	fib6_entry->nrt6--;
4947 	list_del(&mlxsw_sp_rt6->list);
4948 	mlxsw_sp_nexthop6_group_update(mlxsw_sp, fib6_entry);
4949 	mlxsw_sp_rt6_destroy(mlxsw_sp_rt6);
4950 }
4951 
4952 static void mlxsw_sp_fib6_entry_type_set(struct mlxsw_sp *mlxsw_sp,
4953 					 struct mlxsw_sp_fib_entry *fib_entry,
4954 					 const struct rt6_info *rt)
4955 {
4956 	/* Packets hitting RTF_REJECT routes need to be discarded by the
4957 	 * stack. We can rely on their destination device not having a
4958 	 * RIF (it's the loopback device) and can thus use action type
4959 	 * local, which will cause them to be trapped with a lower
4960 	 * priority than packets that need to be locally received.
4961 	 */
4962 	if (rt->rt6i_flags & (RTF_LOCAL | RTF_ANYCAST))
4963 		fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_TRAP;
4964 	else if (rt->rt6i_flags & RTF_REJECT)
4965 		fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_LOCAL;
4966 	else if (mlxsw_sp_rt6_is_gateway(mlxsw_sp, rt))
4967 		fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_REMOTE;
4968 	else
4969 		fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_LOCAL;
4970 }
4971 
4972 static void
4973 mlxsw_sp_fib6_entry_rt_destroy_all(struct mlxsw_sp_fib6_entry *fib6_entry)
4974 {
4975 	struct mlxsw_sp_rt6 *mlxsw_sp_rt6, *tmp;
4976 
4977 	list_for_each_entry_safe(mlxsw_sp_rt6, tmp, &fib6_entry->rt6_list,
4978 				 list) {
4979 		fib6_entry->nrt6--;
4980 		list_del(&mlxsw_sp_rt6->list);
4981 		mlxsw_sp_rt6_destroy(mlxsw_sp_rt6);
4982 	}
4983 }
4984 
4985 static struct mlxsw_sp_fib6_entry *
4986 mlxsw_sp_fib6_entry_create(struct mlxsw_sp *mlxsw_sp,
4987 			   struct mlxsw_sp_fib_node *fib_node,
4988 			   struct rt6_info *rt)
4989 {
4990 	struct mlxsw_sp_fib6_entry *fib6_entry;
4991 	struct mlxsw_sp_fib_entry *fib_entry;
4992 	struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
4993 	int err;
4994 
4995 	fib6_entry = kzalloc(sizeof(*fib6_entry), GFP_KERNEL);
4996 	if (!fib6_entry)
4997 		return ERR_PTR(-ENOMEM);
4998 	fib_entry = &fib6_entry->common;
4999 
5000 	mlxsw_sp_rt6 = mlxsw_sp_rt6_create(rt);
5001 	if (IS_ERR(mlxsw_sp_rt6)) {
5002 		err = PTR_ERR(mlxsw_sp_rt6);
5003 		goto err_rt6_create;
5004 	}
5005 
5006 	mlxsw_sp_fib6_entry_type_set(mlxsw_sp, fib_entry, mlxsw_sp_rt6->rt);
5007 
5008 	INIT_LIST_HEAD(&fib6_entry->rt6_list);
5009 	list_add_tail(&mlxsw_sp_rt6->list, &fib6_entry->rt6_list);
5010 	fib6_entry->nrt6 = 1;
5011 	err = mlxsw_sp_nexthop6_group_get(mlxsw_sp, fib6_entry);
5012 	if (err)
5013 		goto err_nexthop6_group_get;
5014 
5015 	fib_entry->fib_node = fib_node;
5016 
5017 	return fib6_entry;
5018 
5019 err_nexthop6_group_get:
5020 	list_del(&mlxsw_sp_rt6->list);
5021 	mlxsw_sp_rt6_destroy(mlxsw_sp_rt6);
5022 err_rt6_create:
5023 	kfree(fib6_entry);
5024 	return ERR_PTR(err);
5025 }
5026 
5027 static void mlxsw_sp_fib6_entry_destroy(struct mlxsw_sp *mlxsw_sp,
5028 					struct mlxsw_sp_fib6_entry *fib6_entry)
5029 {
5030 	mlxsw_sp_nexthop6_group_put(mlxsw_sp, &fib6_entry->common);
5031 	mlxsw_sp_fib6_entry_rt_destroy_all(fib6_entry);
5032 	WARN_ON(fib6_entry->nrt6);
5033 	kfree(fib6_entry);
5034 }
5035 
5036 static struct mlxsw_sp_fib6_entry *
5037 mlxsw_sp_fib6_node_entry_find(const struct mlxsw_sp_fib_node *fib_node,
5038 			      const struct rt6_info *nrt, bool replace)
5039 {
5040 	struct mlxsw_sp_fib6_entry *fib6_entry, *fallback = NULL;
5041 
5042 	list_for_each_entry(fib6_entry, &fib_node->entry_list, common.list) {
5043 		struct rt6_info *rt = mlxsw_sp_fib6_entry_rt(fib6_entry);
5044 
5045 		if (rt->rt6i_table->tb6_id > nrt->rt6i_table->tb6_id)
5046 			continue;
5047 		if (rt->rt6i_table->tb6_id != nrt->rt6i_table->tb6_id)
5048 			break;
5049 		if (replace && rt->rt6i_metric == nrt->rt6i_metric) {
5050 			if (mlxsw_sp_fib6_rt_can_mp(rt) ==
5051 			    mlxsw_sp_fib6_rt_can_mp(nrt))
5052 				return fib6_entry;
5053 			if (mlxsw_sp_fib6_rt_can_mp(nrt))
5054 				fallback = fallback ?: fib6_entry;
5055 		}
5056 		if (rt->rt6i_metric > nrt->rt6i_metric)
5057 			return fallback ?: fib6_entry;
5058 	}
5059 
5060 	return fallback;
5061 }
5062 
5063 static int
5064 mlxsw_sp_fib6_node_list_insert(struct mlxsw_sp_fib6_entry *new6_entry,
5065 			       bool replace)
5066 {
5067 	struct mlxsw_sp_fib_node *fib_node = new6_entry->common.fib_node;
5068 	struct rt6_info *nrt = mlxsw_sp_fib6_entry_rt(new6_entry);
5069 	struct mlxsw_sp_fib6_entry *fib6_entry;
5070 
5071 	fib6_entry = mlxsw_sp_fib6_node_entry_find(fib_node, nrt, replace);
5072 
5073 	if (replace && WARN_ON(!fib6_entry))
5074 		return -EINVAL;
5075 
5076 	if (fib6_entry) {
5077 		list_add_tail(&new6_entry->common.list,
5078 			      &fib6_entry->common.list);
5079 	} else {
5080 		struct mlxsw_sp_fib6_entry *last;
5081 
5082 		list_for_each_entry(last, &fib_node->entry_list, common.list) {
5083 			struct rt6_info *rt = mlxsw_sp_fib6_entry_rt(last);
5084 
5085 			if (nrt->rt6i_table->tb6_id > rt->rt6i_table->tb6_id)
5086 				break;
5087 			fib6_entry = last;
5088 		}
5089 
5090 		if (fib6_entry)
5091 			list_add(&new6_entry->common.list,
5092 				 &fib6_entry->common.list);
5093 		else
5094 			list_add(&new6_entry->common.list,
5095 				 &fib_node->entry_list);
5096 	}
5097 
5098 	return 0;
5099 }
5100 
5101 static void
5102 mlxsw_sp_fib6_node_list_remove(struct mlxsw_sp_fib6_entry *fib6_entry)
5103 {
5104 	list_del(&fib6_entry->common.list);
5105 }
5106 
5107 static int mlxsw_sp_fib6_node_entry_link(struct mlxsw_sp *mlxsw_sp,
5108 					 struct mlxsw_sp_fib6_entry *fib6_entry,
5109 					 bool replace)
5110 {
5111 	int err;
5112 
5113 	err = mlxsw_sp_fib6_node_list_insert(fib6_entry, replace);
5114 	if (err)
5115 		return err;
5116 
5117 	err = mlxsw_sp_fib_node_entry_add(mlxsw_sp, &fib6_entry->common);
5118 	if (err)
5119 		goto err_fib_node_entry_add;
5120 
5121 	return 0;
5122 
5123 err_fib_node_entry_add:
5124 	mlxsw_sp_fib6_node_list_remove(fib6_entry);
5125 	return err;
5126 }
5127 
5128 static void
5129 mlxsw_sp_fib6_node_entry_unlink(struct mlxsw_sp *mlxsw_sp,
5130 				struct mlxsw_sp_fib6_entry *fib6_entry)
5131 {
5132 	mlxsw_sp_fib_node_entry_del(mlxsw_sp, &fib6_entry->common);
5133 	mlxsw_sp_fib6_node_list_remove(fib6_entry);
5134 }
5135 
5136 static struct mlxsw_sp_fib6_entry *
5137 mlxsw_sp_fib6_entry_lookup(struct mlxsw_sp *mlxsw_sp,
5138 			   const struct rt6_info *rt)
5139 {
5140 	struct mlxsw_sp_fib6_entry *fib6_entry;
5141 	struct mlxsw_sp_fib_node *fib_node;
5142 	struct mlxsw_sp_fib *fib;
5143 	struct mlxsw_sp_vr *vr;
5144 
5145 	vr = mlxsw_sp_vr_find(mlxsw_sp, rt->rt6i_table->tb6_id);
5146 	if (!vr)
5147 		return NULL;
5148 	fib = mlxsw_sp_vr_fib(vr, MLXSW_SP_L3_PROTO_IPV6);
5149 
5150 	fib_node = mlxsw_sp_fib_node_lookup(fib, &rt->rt6i_dst.addr,
5151 					    sizeof(rt->rt6i_dst.addr),
5152 					    rt->rt6i_dst.plen);
5153 	if (!fib_node)
5154 		return NULL;
5155 
5156 	list_for_each_entry(fib6_entry, &fib_node->entry_list, common.list) {
5157 		struct rt6_info *iter_rt = mlxsw_sp_fib6_entry_rt(fib6_entry);
5158 
5159 		if (rt->rt6i_table->tb6_id == iter_rt->rt6i_table->tb6_id &&
5160 		    rt->rt6i_metric == iter_rt->rt6i_metric &&
5161 		    mlxsw_sp_fib6_entry_rt_find(fib6_entry, rt))
5162 			return fib6_entry;
5163 	}
5164 
5165 	return NULL;
5166 }
5167 
5168 static void mlxsw_sp_fib6_entry_replace(struct mlxsw_sp *mlxsw_sp,
5169 					struct mlxsw_sp_fib6_entry *fib6_entry,
5170 					bool replace)
5171 {
5172 	struct mlxsw_sp_fib_node *fib_node = fib6_entry->common.fib_node;
5173 	struct mlxsw_sp_fib6_entry *replaced;
5174 
5175 	if (!replace)
5176 		return;
5177 
5178 	replaced = list_next_entry(fib6_entry, common.list);
5179 
5180 	mlxsw_sp_fib6_node_entry_unlink(mlxsw_sp, replaced);
5181 	mlxsw_sp_fib6_entry_destroy(mlxsw_sp, replaced);
5182 	mlxsw_sp_fib_node_put(mlxsw_sp, fib_node);
5183 }
5184 
5185 static int mlxsw_sp_router_fib6_add(struct mlxsw_sp *mlxsw_sp,
5186 				    struct rt6_info *rt, bool replace)
5187 {
5188 	struct mlxsw_sp_fib6_entry *fib6_entry;
5189 	struct mlxsw_sp_fib_node *fib_node;
5190 	int err;
5191 
5192 	if (mlxsw_sp->router->aborted)
5193 		return 0;
5194 
5195 	if (rt->rt6i_src.plen)
5196 		return -EINVAL;
5197 
5198 	if (mlxsw_sp_fib6_rt_should_ignore(rt))
5199 		return 0;
5200 
5201 	fib_node = mlxsw_sp_fib_node_get(mlxsw_sp, rt->rt6i_table->tb6_id,
5202 					 &rt->rt6i_dst.addr,
5203 					 sizeof(rt->rt6i_dst.addr),
5204 					 rt->rt6i_dst.plen,
5205 					 MLXSW_SP_L3_PROTO_IPV6);
5206 	if (IS_ERR(fib_node))
5207 		return PTR_ERR(fib_node);
5208 
5209 	/* Before creating a new entry, try to append route to an existing
5210 	 * multipath entry.
5211 	 */
5212 	fib6_entry = mlxsw_sp_fib6_node_mp_entry_find(fib_node, rt, replace);
5213 	if (fib6_entry) {
5214 		err = mlxsw_sp_fib6_entry_nexthop_add(mlxsw_sp, fib6_entry, rt);
5215 		if (err)
5216 			goto err_fib6_entry_nexthop_add;
5217 		return 0;
5218 	}
5219 
5220 	fib6_entry = mlxsw_sp_fib6_entry_create(mlxsw_sp, fib_node, rt);
5221 	if (IS_ERR(fib6_entry)) {
5222 		err = PTR_ERR(fib6_entry);
5223 		goto err_fib6_entry_create;
5224 	}
5225 
5226 	err = mlxsw_sp_fib6_node_entry_link(mlxsw_sp, fib6_entry, replace);
5227 	if (err)
5228 		goto err_fib6_node_entry_link;
5229 
5230 	mlxsw_sp_fib6_entry_replace(mlxsw_sp, fib6_entry, replace);
5231 
5232 	return 0;
5233 
5234 err_fib6_node_entry_link:
5235 	mlxsw_sp_fib6_entry_destroy(mlxsw_sp, fib6_entry);
5236 err_fib6_entry_create:
5237 err_fib6_entry_nexthop_add:
5238 	mlxsw_sp_fib_node_put(mlxsw_sp, fib_node);
5239 	return err;
5240 }
5241 
5242 static void mlxsw_sp_router_fib6_del(struct mlxsw_sp *mlxsw_sp,
5243 				     struct rt6_info *rt)
5244 {
5245 	struct mlxsw_sp_fib6_entry *fib6_entry;
5246 	struct mlxsw_sp_fib_node *fib_node;
5247 
5248 	if (mlxsw_sp->router->aborted)
5249 		return;
5250 
5251 	if (mlxsw_sp_fib6_rt_should_ignore(rt))
5252 		return;
5253 
5254 	fib6_entry = mlxsw_sp_fib6_entry_lookup(mlxsw_sp, rt);
5255 	if (WARN_ON(!fib6_entry))
5256 		return;
5257 
5258 	/* If route is part of a multipath entry, but not the last one
5259 	 * removed, then only reduce its nexthop group.
5260 	 */
5261 	if (!list_is_singular(&fib6_entry->rt6_list)) {
5262 		mlxsw_sp_fib6_entry_nexthop_del(mlxsw_sp, fib6_entry, rt);
5263 		return;
5264 	}
5265 
5266 	fib_node = fib6_entry->common.fib_node;
5267 
5268 	mlxsw_sp_fib6_node_entry_unlink(mlxsw_sp, fib6_entry);
5269 	mlxsw_sp_fib6_entry_destroy(mlxsw_sp, fib6_entry);
5270 	mlxsw_sp_fib_node_put(mlxsw_sp, fib_node);
5271 }
5272 
5273 static int __mlxsw_sp_router_set_abort_trap(struct mlxsw_sp *mlxsw_sp,
5274 					    enum mlxsw_reg_ralxx_protocol proto,
5275 					    u8 tree_id)
5276 {
5277 	char ralta_pl[MLXSW_REG_RALTA_LEN];
5278 	char ralst_pl[MLXSW_REG_RALST_LEN];
5279 	int i, err;
5280 
5281 	mlxsw_reg_ralta_pack(ralta_pl, true, proto, tree_id);
5282 	err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralta), ralta_pl);
5283 	if (err)
5284 		return err;
5285 
5286 	mlxsw_reg_ralst_pack(ralst_pl, 0xff, tree_id);
5287 	err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralst), ralst_pl);
5288 	if (err)
5289 		return err;
5290 
5291 	for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_VRS); i++) {
5292 		struct mlxsw_sp_vr *vr = &mlxsw_sp->router->vrs[i];
5293 		char raltb_pl[MLXSW_REG_RALTB_LEN];
5294 		char ralue_pl[MLXSW_REG_RALUE_LEN];
5295 
5296 		mlxsw_reg_raltb_pack(raltb_pl, vr->id, proto, tree_id);
5297 		err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(raltb),
5298 				      raltb_pl);
5299 		if (err)
5300 			return err;
5301 
5302 		mlxsw_reg_ralue_pack(ralue_pl, proto,
5303 				     MLXSW_REG_RALUE_OP_WRITE_WRITE, vr->id, 0);
5304 		mlxsw_reg_ralue_act_ip2me_pack(ralue_pl);
5305 		err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralue),
5306 				      ralue_pl);
5307 		if (err)
5308 			return err;
5309 	}
5310 
5311 	return 0;
5312 }
5313 
5314 static int mlxsw_sp_router_fibmr_add(struct mlxsw_sp *mlxsw_sp,
5315 				     struct mfc_entry_notifier_info *men_info,
5316 				     bool replace)
5317 {
5318 	struct mlxsw_sp_vr *vr;
5319 
5320 	if (mlxsw_sp->router->aborted)
5321 		return 0;
5322 
5323 	vr = mlxsw_sp_vr_get(mlxsw_sp, men_info->tb_id, NULL);
5324 	if (IS_ERR(vr))
5325 		return PTR_ERR(vr);
5326 
5327 	return mlxsw_sp_mr_route4_add(vr->mr4_table, men_info->mfc, replace);
5328 }
5329 
5330 static void mlxsw_sp_router_fibmr_del(struct mlxsw_sp *mlxsw_sp,
5331 				      struct mfc_entry_notifier_info *men_info)
5332 {
5333 	struct mlxsw_sp_vr *vr;
5334 
5335 	if (mlxsw_sp->router->aborted)
5336 		return;
5337 
5338 	vr = mlxsw_sp_vr_find(mlxsw_sp, men_info->tb_id);
5339 	if (WARN_ON(!vr))
5340 		return;
5341 
5342 	mlxsw_sp_mr_route4_del(vr->mr4_table, men_info->mfc);
5343 	mlxsw_sp_vr_put(vr);
5344 }
5345 
5346 static int
5347 mlxsw_sp_router_fibmr_vif_add(struct mlxsw_sp *mlxsw_sp,
5348 			      struct vif_entry_notifier_info *ven_info)
5349 {
5350 	struct mlxsw_sp_rif *rif;
5351 	struct mlxsw_sp_vr *vr;
5352 
5353 	if (mlxsw_sp->router->aborted)
5354 		return 0;
5355 
5356 	vr = mlxsw_sp_vr_get(mlxsw_sp, ven_info->tb_id, NULL);
5357 	if (IS_ERR(vr))
5358 		return PTR_ERR(vr);
5359 
5360 	rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, ven_info->dev);
5361 	return mlxsw_sp_mr_vif_add(vr->mr4_table, ven_info->dev,
5362 				   ven_info->vif_index,
5363 				   ven_info->vif_flags, rif);
5364 }
5365 
5366 static void
5367 mlxsw_sp_router_fibmr_vif_del(struct mlxsw_sp *mlxsw_sp,
5368 			      struct vif_entry_notifier_info *ven_info)
5369 {
5370 	struct mlxsw_sp_vr *vr;
5371 
5372 	if (mlxsw_sp->router->aborted)
5373 		return;
5374 
5375 	vr = mlxsw_sp_vr_find(mlxsw_sp, ven_info->tb_id);
5376 	if (WARN_ON(!vr))
5377 		return;
5378 
5379 	mlxsw_sp_mr_vif_del(vr->mr4_table, ven_info->vif_index);
5380 	mlxsw_sp_vr_put(vr);
5381 }
5382 
5383 static int mlxsw_sp_router_set_abort_trap(struct mlxsw_sp *mlxsw_sp)
5384 {
5385 	enum mlxsw_reg_ralxx_protocol proto = MLXSW_REG_RALXX_PROTOCOL_IPV4;
5386 	int err;
5387 
5388 	err = __mlxsw_sp_router_set_abort_trap(mlxsw_sp, proto,
5389 					       MLXSW_SP_LPM_TREE_MIN);
5390 	if (err)
5391 		return err;
5392 
5393 	/* The multicast router code does not need an abort trap as by default,
5394 	 * packets that don't match any routes are trapped to the CPU.
5395 	 */
5396 
5397 	proto = MLXSW_REG_RALXX_PROTOCOL_IPV6;
5398 	return __mlxsw_sp_router_set_abort_trap(mlxsw_sp, proto,
5399 						MLXSW_SP_LPM_TREE_MIN + 1);
5400 }
5401 
5402 static void mlxsw_sp_fib4_node_flush(struct mlxsw_sp *mlxsw_sp,
5403 				     struct mlxsw_sp_fib_node *fib_node)
5404 {
5405 	struct mlxsw_sp_fib4_entry *fib4_entry, *tmp;
5406 
5407 	list_for_each_entry_safe(fib4_entry, tmp, &fib_node->entry_list,
5408 				 common.list) {
5409 		bool do_break = &tmp->common.list == &fib_node->entry_list;
5410 
5411 		mlxsw_sp_fib4_node_entry_unlink(mlxsw_sp, fib4_entry);
5412 		mlxsw_sp_fib4_entry_destroy(mlxsw_sp, fib4_entry);
5413 		mlxsw_sp_fib_node_put(mlxsw_sp, fib_node);
5414 		/* Break when entry list is empty and node was freed.
5415 		 * Otherwise, we'll access freed memory in the next
5416 		 * iteration.
5417 		 */
5418 		if (do_break)
5419 			break;
5420 	}
5421 }
5422 
5423 static void mlxsw_sp_fib6_node_flush(struct mlxsw_sp *mlxsw_sp,
5424 				     struct mlxsw_sp_fib_node *fib_node)
5425 {
5426 	struct mlxsw_sp_fib6_entry *fib6_entry, *tmp;
5427 
5428 	list_for_each_entry_safe(fib6_entry, tmp, &fib_node->entry_list,
5429 				 common.list) {
5430 		bool do_break = &tmp->common.list == &fib_node->entry_list;
5431 
5432 		mlxsw_sp_fib6_node_entry_unlink(mlxsw_sp, fib6_entry);
5433 		mlxsw_sp_fib6_entry_destroy(mlxsw_sp, fib6_entry);
5434 		mlxsw_sp_fib_node_put(mlxsw_sp, fib_node);
5435 		if (do_break)
5436 			break;
5437 	}
5438 }
5439 
5440 static void mlxsw_sp_fib_node_flush(struct mlxsw_sp *mlxsw_sp,
5441 				    struct mlxsw_sp_fib_node *fib_node)
5442 {
5443 	switch (fib_node->fib->proto) {
5444 	case MLXSW_SP_L3_PROTO_IPV4:
5445 		mlxsw_sp_fib4_node_flush(mlxsw_sp, fib_node);
5446 		break;
5447 	case MLXSW_SP_L3_PROTO_IPV6:
5448 		mlxsw_sp_fib6_node_flush(mlxsw_sp, fib_node);
5449 		break;
5450 	}
5451 }
5452 
5453 static void mlxsw_sp_vr_fib_flush(struct mlxsw_sp *mlxsw_sp,
5454 				  struct mlxsw_sp_vr *vr,
5455 				  enum mlxsw_sp_l3proto proto)
5456 {
5457 	struct mlxsw_sp_fib *fib = mlxsw_sp_vr_fib(vr, proto);
5458 	struct mlxsw_sp_fib_node *fib_node, *tmp;
5459 
5460 	list_for_each_entry_safe(fib_node, tmp, &fib->node_list, list) {
5461 		bool do_break = &tmp->list == &fib->node_list;
5462 
5463 		mlxsw_sp_fib_node_flush(mlxsw_sp, fib_node);
5464 		if (do_break)
5465 			break;
5466 	}
5467 }
5468 
5469 static void mlxsw_sp_router_fib_flush(struct mlxsw_sp *mlxsw_sp)
5470 {
5471 	int i;
5472 
5473 	for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_VRS); i++) {
5474 		struct mlxsw_sp_vr *vr = &mlxsw_sp->router->vrs[i];
5475 
5476 		if (!mlxsw_sp_vr_is_used(vr))
5477 			continue;
5478 
5479 		mlxsw_sp_mr_table_flush(vr->mr4_table);
5480 		mlxsw_sp_vr_fib_flush(mlxsw_sp, vr, MLXSW_SP_L3_PROTO_IPV4);
5481 
5482 		/* If virtual router was only used for IPv4, then it's no
5483 		 * longer used.
5484 		 */
5485 		if (!mlxsw_sp_vr_is_used(vr))
5486 			continue;
5487 		mlxsw_sp_vr_fib_flush(mlxsw_sp, vr, MLXSW_SP_L3_PROTO_IPV6);
5488 	}
5489 }
5490 
5491 static void mlxsw_sp_router_fib_abort(struct mlxsw_sp *mlxsw_sp)
5492 {
5493 	int err;
5494 
5495 	if (mlxsw_sp->router->aborted)
5496 		return;
5497 	dev_warn(mlxsw_sp->bus_info->dev, "FIB abort triggered. Note that FIB entries are no longer being offloaded to this device.\n");
5498 	mlxsw_sp_router_fib_flush(mlxsw_sp);
5499 	mlxsw_sp->router->aborted = true;
5500 	err = mlxsw_sp_router_set_abort_trap(mlxsw_sp);
5501 	if (err)
5502 		dev_warn(mlxsw_sp->bus_info->dev, "Failed to set abort trap.\n");
5503 }
5504 
5505 struct mlxsw_sp_fib_event_work {
5506 	struct work_struct work;
5507 	union {
5508 		struct fib6_entry_notifier_info fen6_info;
5509 		struct fib_entry_notifier_info fen_info;
5510 		struct fib_rule_notifier_info fr_info;
5511 		struct fib_nh_notifier_info fnh_info;
5512 		struct mfc_entry_notifier_info men_info;
5513 		struct vif_entry_notifier_info ven_info;
5514 	};
5515 	struct mlxsw_sp *mlxsw_sp;
5516 	unsigned long event;
5517 };
5518 
5519 static void mlxsw_sp_router_fib4_event_work(struct work_struct *work)
5520 {
5521 	struct mlxsw_sp_fib_event_work *fib_work =
5522 		container_of(work, struct mlxsw_sp_fib_event_work, work);
5523 	struct mlxsw_sp *mlxsw_sp = fib_work->mlxsw_sp;
5524 	bool replace, append;
5525 	int err;
5526 
5527 	/* Protect internal structures from changes */
5528 	rtnl_lock();
5529 	switch (fib_work->event) {
5530 	case FIB_EVENT_ENTRY_REPLACE: /* fall through */
5531 	case FIB_EVENT_ENTRY_APPEND: /* fall through */
5532 	case FIB_EVENT_ENTRY_ADD:
5533 		replace = fib_work->event == FIB_EVENT_ENTRY_REPLACE;
5534 		append = fib_work->event == FIB_EVENT_ENTRY_APPEND;
5535 		err = mlxsw_sp_router_fib4_add(mlxsw_sp, &fib_work->fen_info,
5536 					       replace, append);
5537 		if (err)
5538 			mlxsw_sp_router_fib_abort(mlxsw_sp);
5539 		fib_info_put(fib_work->fen_info.fi);
5540 		break;
5541 	case FIB_EVENT_ENTRY_DEL:
5542 		mlxsw_sp_router_fib4_del(mlxsw_sp, &fib_work->fen_info);
5543 		fib_info_put(fib_work->fen_info.fi);
5544 		break;
5545 	case FIB_EVENT_RULE_ADD:
5546 		/* if we get here, a rule was added that we do not support.
5547 		 * just do the fib_abort
5548 		 */
5549 		mlxsw_sp_router_fib_abort(mlxsw_sp);
5550 		break;
5551 	case FIB_EVENT_NH_ADD: /* fall through */
5552 	case FIB_EVENT_NH_DEL:
5553 		mlxsw_sp_nexthop4_event(mlxsw_sp, fib_work->event,
5554 					fib_work->fnh_info.fib_nh);
5555 		fib_info_put(fib_work->fnh_info.fib_nh->nh_parent);
5556 		break;
5557 	}
5558 	rtnl_unlock();
5559 	kfree(fib_work);
5560 }
5561 
5562 static void mlxsw_sp_router_fib6_event_work(struct work_struct *work)
5563 {
5564 	struct mlxsw_sp_fib_event_work *fib_work =
5565 		container_of(work, struct mlxsw_sp_fib_event_work, work);
5566 	struct mlxsw_sp *mlxsw_sp = fib_work->mlxsw_sp;
5567 	bool replace;
5568 	int err;
5569 
5570 	rtnl_lock();
5571 	switch (fib_work->event) {
5572 	case FIB_EVENT_ENTRY_REPLACE: /* fall through */
5573 	case FIB_EVENT_ENTRY_ADD:
5574 		replace = fib_work->event == FIB_EVENT_ENTRY_REPLACE;
5575 		err = mlxsw_sp_router_fib6_add(mlxsw_sp,
5576 					       fib_work->fen6_info.rt, replace);
5577 		if (err)
5578 			mlxsw_sp_router_fib_abort(mlxsw_sp);
5579 		mlxsw_sp_rt6_release(fib_work->fen6_info.rt);
5580 		break;
5581 	case FIB_EVENT_ENTRY_DEL:
5582 		mlxsw_sp_router_fib6_del(mlxsw_sp, fib_work->fen6_info.rt);
5583 		mlxsw_sp_rt6_release(fib_work->fen6_info.rt);
5584 		break;
5585 	case FIB_EVENT_RULE_ADD:
5586 		/* if we get here, a rule was added that we do not support.
5587 		 * just do the fib_abort
5588 		 */
5589 		mlxsw_sp_router_fib_abort(mlxsw_sp);
5590 		break;
5591 	}
5592 	rtnl_unlock();
5593 	kfree(fib_work);
5594 }
5595 
5596 static void mlxsw_sp_router_fibmr_event_work(struct work_struct *work)
5597 {
5598 	struct mlxsw_sp_fib_event_work *fib_work =
5599 		container_of(work, struct mlxsw_sp_fib_event_work, work);
5600 	struct mlxsw_sp *mlxsw_sp = fib_work->mlxsw_sp;
5601 	bool replace;
5602 	int err;
5603 
5604 	rtnl_lock();
5605 	switch (fib_work->event) {
5606 	case FIB_EVENT_ENTRY_REPLACE: /* fall through */
5607 	case FIB_EVENT_ENTRY_ADD:
5608 		replace = fib_work->event == FIB_EVENT_ENTRY_REPLACE;
5609 
5610 		err = mlxsw_sp_router_fibmr_add(mlxsw_sp, &fib_work->men_info,
5611 						replace);
5612 		if (err)
5613 			mlxsw_sp_router_fib_abort(mlxsw_sp);
5614 		ipmr_cache_put(fib_work->men_info.mfc);
5615 		break;
5616 	case FIB_EVENT_ENTRY_DEL:
5617 		mlxsw_sp_router_fibmr_del(mlxsw_sp, &fib_work->men_info);
5618 		ipmr_cache_put(fib_work->men_info.mfc);
5619 		break;
5620 	case FIB_EVENT_VIF_ADD:
5621 		err = mlxsw_sp_router_fibmr_vif_add(mlxsw_sp,
5622 						    &fib_work->ven_info);
5623 		if (err)
5624 			mlxsw_sp_router_fib_abort(mlxsw_sp);
5625 		dev_put(fib_work->ven_info.dev);
5626 		break;
5627 	case FIB_EVENT_VIF_DEL:
5628 		mlxsw_sp_router_fibmr_vif_del(mlxsw_sp,
5629 					      &fib_work->ven_info);
5630 		dev_put(fib_work->ven_info.dev);
5631 		break;
5632 	case FIB_EVENT_RULE_ADD:
5633 		/* if we get here, a rule was added that we do not support.
5634 		 * just do the fib_abort
5635 		 */
5636 		mlxsw_sp_router_fib_abort(mlxsw_sp);
5637 		break;
5638 	}
5639 	rtnl_unlock();
5640 	kfree(fib_work);
5641 }
5642 
5643 static void mlxsw_sp_router_fib4_event(struct mlxsw_sp_fib_event_work *fib_work,
5644 				       struct fib_notifier_info *info)
5645 {
5646 	struct fib_entry_notifier_info *fen_info;
5647 	struct fib_nh_notifier_info *fnh_info;
5648 
5649 	switch (fib_work->event) {
5650 	case FIB_EVENT_ENTRY_REPLACE: /* fall through */
5651 	case FIB_EVENT_ENTRY_APPEND: /* fall through */
5652 	case FIB_EVENT_ENTRY_ADD: /* fall through */
5653 	case FIB_EVENT_ENTRY_DEL:
5654 		fen_info = container_of(info, struct fib_entry_notifier_info,
5655 					info);
5656 		fib_work->fen_info = *fen_info;
5657 		/* Take reference on fib_info to prevent it from being
5658 		 * freed while work is queued. Release it afterwards.
5659 		 */
5660 		fib_info_hold(fib_work->fen_info.fi);
5661 		break;
5662 	case FIB_EVENT_NH_ADD: /* fall through */
5663 	case FIB_EVENT_NH_DEL:
5664 		fnh_info = container_of(info, struct fib_nh_notifier_info,
5665 					info);
5666 		fib_work->fnh_info = *fnh_info;
5667 		fib_info_hold(fib_work->fnh_info.fib_nh->nh_parent);
5668 		break;
5669 	}
5670 }
5671 
5672 static void mlxsw_sp_router_fib6_event(struct mlxsw_sp_fib_event_work *fib_work,
5673 				       struct fib_notifier_info *info)
5674 {
5675 	struct fib6_entry_notifier_info *fen6_info;
5676 
5677 	switch (fib_work->event) {
5678 	case FIB_EVENT_ENTRY_REPLACE: /* fall through */
5679 	case FIB_EVENT_ENTRY_ADD: /* fall through */
5680 	case FIB_EVENT_ENTRY_DEL:
5681 		fen6_info = container_of(info, struct fib6_entry_notifier_info,
5682 					 info);
5683 		fib_work->fen6_info = *fen6_info;
5684 		rt6_hold(fib_work->fen6_info.rt);
5685 		break;
5686 	}
5687 }
5688 
5689 static void
5690 mlxsw_sp_router_fibmr_event(struct mlxsw_sp_fib_event_work *fib_work,
5691 			    struct fib_notifier_info *info)
5692 {
5693 	switch (fib_work->event) {
5694 	case FIB_EVENT_ENTRY_REPLACE: /* fall through */
5695 	case FIB_EVENT_ENTRY_ADD: /* fall through */
5696 	case FIB_EVENT_ENTRY_DEL:
5697 		memcpy(&fib_work->men_info, info, sizeof(fib_work->men_info));
5698 		ipmr_cache_hold(fib_work->men_info.mfc);
5699 		break;
5700 	case FIB_EVENT_VIF_ADD: /* fall through */
5701 	case FIB_EVENT_VIF_DEL:
5702 		memcpy(&fib_work->ven_info, info, sizeof(fib_work->ven_info));
5703 		dev_hold(fib_work->ven_info.dev);
5704 		break;
5705 	}
5706 }
5707 
5708 static int mlxsw_sp_router_fib_rule_event(unsigned long event,
5709 					  struct fib_notifier_info *info,
5710 					  struct mlxsw_sp *mlxsw_sp)
5711 {
5712 	struct netlink_ext_ack *extack = info->extack;
5713 	struct fib_rule_notifier_info *fr_info;
5714 	struct fib_rule *rule;
5715 	int err = 0;
5716 
5717 	/* nothing to do at the moment */
5718 	if (event == FIB_EVENT_RULE_DEL)
5719 		return 0;
5720 
5721 	if (mlxsw_sp->router->aborted)
5722 		return 0;
5723 
5724 	fr_info = container_of(info, struct fib_rule_notifier_info, info);
5725 	rule = fr_info->rule;
5726 
5727 	switch (info->family) {
5728 	case AF_INET:
5729 		if (!fib4_rule_default(rule) && !rule->l3mdev)
5730 			err = -1;
5731 		break;
5732 	case AF_INET6:
5733 		if (!fib6_rule_default(rule) && !rule->l3mdev)
5734 			err = -1;
5735 		break;
5736 	case RTNL_FAMILY_IPMR:
5737 		if (!ipmr_rule_default(rule) && !rule->l3mdev)
5738 			err = -1;
5739 		break;
5740 	}
5741 
5742 	if (err < 0)
5743 		NL_SET_ERR_MSG(extack, "spectrum: FIB rules not supported. Aborting offload");
5744 
5745 	return err;
5746 }
5747 
5748 /* Called with rcu_read_lock() */
5749 static int mlxsw_sp_router_fib_event(struct notifier_block *nb,
5750 				     unsigned long event, void *ptr)
5751 {
5752 	struct mlxsw_sp_fib_event_work *fib_work;
5753 	struct fib_notifier_info *info = ptr;
5754 	struct mlxsw_sp_router *router;
5755 	int err;
5756 
5757 	if (!net_eq(info->net, &init_net) ||
5758 	    (info->family != AF_INET && info->family != AF_INET6 &&
5759 	     info->family != RTNL_FAMILY_IPMR))
5760 		return NOTIFY_DONE;
5761 
5762 	router = container_of(nb, struct mlxsw_sp_router, fib_nb);
5763 
5764 	switch (event) {
5765 	case FIB_EVENT_RULE_ADD: /* fall through */
5766 	case FIB_EVENT_RULE_DEL:
5767 		err = mlxsw_sp_router_fib_rule_event(event, info,
5768 						     router->mlxsw_sp);
5769 		if (!err)
5770 			return NOTIFY_DONE;
5771 	}
5772 
5773 	fib_work = kzalloc(sizeof(*fib_work), GFP_ATOMIC);
5774 	if (WARN_ON(!fib_work))
5775 		return NOTIFY_BAD;
5776 
5777 	fib_work->mlxsw_sp = router->mlxsw_sp;
5778 	fib_work->event = event;
5779 
5780 	switch (info->family) {
5781 	case AF_INET:
5782 		INIT_WORK(&fib_work->work, mlxsw_sp_router_fib4_event_work);
5783 		mlxsw_sp_router_fib4_event(fib_work, info);
5784 		break;
5785 	case AF_INET6:
5786 		INIT_WORK(&fib_work->work, mlxsw_sp_router_fib6_event_work);
5787 		mlxsw_sp_router_fib6_event(fib_work, info);
5788 		break;
5789 	case RTNL_FAMILY_IPMR:
5790 		INIT_WORK(&fib_work->work, mlxsw_sp_router_fibmr_event_work);
5791 		mlxsw_sp_router_fibmr_event(fib_work, info);
5792 		break;
5793 	}
5794 
5795 	mlxsw_core_schedule_work(&fib_work->work);
5796 
5797 	return NOTIFY_DONE;
5798 }
5799 
5800 static struct mlxsw_sp_rif *
5801 mlxsw_sp_rif_find_by_dev(const struct mlxsw_sp *mlxsw_sp,
5802 			 const struct net_device *dev)
5803 {
5804 	int i;
5805 
5806 	for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_RIFS); i++)
5807 		if (mlxsw_sp->router->rifs[i] &&
5808 		    mlxsw_sp->router->rifs[i]->dev == dev)
5809 			return mlxsw_sp->router->rifs[i];
5810 
5811 	return NULL;
5812 }
5813 
5814 static int mlxsw_sp_router_rif_disable(struct mlxsw_sp *mlxsw_sp, u16 rif)
5815 {
5816 	char ritr_pl[MLXSW_REG_RITR_LEN];
5817 	int err;
5818 
5819 	mlxsw_reg_ritr_rif_pack(ritr_pl, rif);
5820 	err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
5821 	if (WARN_ON_ONCE(err))
5822 		return err;
5823 
5824 	mlxsw_reg_ritr_enable_set(ritr_pl, false);
5825 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
5826 }
5827 
5828 static void mlxsw_sp_router_rif_gone_sync(struct mlxsw_sp *mlxsw_sp,
5829 					  struct mlxsw_sp_rif *rif)
5830 {
5831 	mlxsw_sp_router_rif_disable(mlxsw_sp, rif->rif_index);
5832 	mlxsw_sp_nexthop_rif_gone_sync(mlxsw_sp, rif);
5833 	mlxsw_sp_neigh_rif_gone_sync(mlxsw_sp, rif);
5834 }
5835 
5836 static bool
5837 mlxsw_sp_rif_should_config(struct mlxsw_sp_rif *rif, struct net_device *dev,
5838 			   unsigned long event)
5839 {
5840 	struct inet6_dev *inet6_dev;
5841 	bool addr_list_empty = true;
5842 	struct in_device *idev;
5843 
5844 	switch (event) {
5845 	case NETDEV_UP:
5846 		return rif == NULL;
5847 	case NETDEV_DOWN:
5848 		idev = __in_dev_get_rtnl(dev);
5849 		if (idev && idev->ifa_list)
5850 			addr_list_empty = false;
5851 
5852 		inet6_dev = __in6_dev_get(dev);
5853 		if (addr_list_empty && inet6_dev &&
5854 		    !list_empty(&inet6_dev->addr_list))
5855 			addr_list_empty = false;
5856 
5857 		if (rif && addr_list_empty &&
5858 		    !netif_is_l3_slave(rif->dev))
5859 			return true;
5860 		/* It is possible we already removed the RIF ourselves
5861 		 * if it was assigned to a netdev that is now a bridge
5862 		 * or LAG slave.
5863 		 */
5864 		return false;
5865 	}
5866 
5867 	return false;
5868 }
5869 
5870 static enum mlxsw_sp_rif_type
5871 mlxsw_sp_dev_rif_type(const struct mlxsw_sp *mlxsw_sp,
5872 		      const struct net_device *dev)
5873 {
5874 	enum mlxsw_sp_fid_type type;
5875 
5876 	if (mlxsw_sp_netdev_ipip_type(mlxsw_sp, dev, NULL))
5877 		return MLXSW_SP_RIF_TYPE_IPIP_LB;
5878 
5879 	/* Otherwise RIF type is derived from the type of the underlying FID. */
5880 	if (is_vlan_dev(dev) && netif_is_bridge_master(vlan_dev_real_dev(dev)))
5881 		type = MLXSW_SP_FID_TYPE_8021Q;
5882 	else if (netif_is_bridge_master(dev) && br_vlan_enabled(dev))
5883 		type = MLXSW_SP_FID_TYPE_8021Q;
5884 	else if (netif_is_bridge_master(dev))
5885 		type = MLXSW_SP_FID_TYPE_8021D;
5886 	else
5887 		type = MLXSW_SP_FID_TYPE_RFID;
5888 
5889 	return mlxsw_sp_fid_type_rif_type(mlxsw_sp, type);
5890 }
5891 
5892 static int mlxsw_sp_rif_index_alloc(struct mlxsw_sp *mlxsw_sp, u16 *p_rif_index)
5893 {
5894 	int i;
5895 
5896 	for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_RIFS); i++) {
5897 		if (!mlxsw_sp->router->rifs[i]) {
5898 			*p_rif_index = i;
5899 			return 0;
5900 		}
5901 	}
5902 
5903 	return -ENOBUFS;
5904 }
5905 
5906 static struct mlxsw_sp_rif *mlxsw_sp_rif_alloc(size_t rif_size, u16 rif_index,
5907 					       u16 vr_id,
5908 					       struct net_device *l3_dev)
5909 {
5910 	struct mlxsw_sp_rif *rif;
5911 
5912 	rif = kzalloc(rif_size, GFP_KERNEL);
5913 	if (!rif)
5914 		return NULL;
5915 
5916 	INIT_LIST_HEAD(&rif->nexthop_list);
5917 	INIT_LIST_HEAD(&rif->neigh_list);
5918 	ether_addr_copy(rif->addr, l3_dev->dev_addr);
5919 	rif->mtu = l3_dev->mtu;
5920 	rif->vr_id = vr_id;
5921 	rif->dev = l3_dev;
5922 	rif->rif_index = rif_index;
5923 
5924 	return rif;
5925 }
5926 
5927 struct mlxsw_sp_rif *mlxsw_sp_rif_by_index(const struct mlxsw_sp *mlxsw_sp,
5928 					   u16 rif_index)
5929 {
5930 	return mlxsw_sp->router->rifs[rif_index];
5931 }
5932 
5933 u16 mlxsw_sp_rif_index(const struct mlxsw_sp_rif *rif)
5934 {
5935 	return rif->rif_index;
5936 }
5937 
5938 u16 mlxsw_sp_ipip_lb_rif_index(const struct mlxsw_sp_rif_ipip_lb *lb_rif)
5939 {
5940 	return lb_rif->common.rif_index;
5941 }
5942 
5943 u16 mlxsw_sp_ipip_lb_ul_vr_id(const struct mlxsw_sp_rif_ipip_lb *lb_rif)
5944 {
5945 	return lb_rif->ul_vr_id;
5946 }
5947 
5948 int mlxsw_sp_rif_dev_ifindex(const struct mlxsw_sp_rif *rif)
5949 {
5950 	return rif->dev->ifindex;
5951 }
5952 
5953 const struct net_device *mlxsw_sp_rif_dev(const struct mlxsw_sp_rif *rif)
5954 {
5955 	return rif->dev;
5956 }
5957 
5958 static struct mlxsw_sp_rif *
5959 mlxsw_sp_rif_create(struct mlxsw_sp *mlxsw_sp,
5960 		    const struct mlxsw_sp_rif_params *params,
5961 		    struct netlink_ext_ack *extack)
5962 {
5963 	u32 tb_id = l3mdev_fib_table(params->dev);
5964 	const struct mlxsw_sp_rif_ops *ops;
5965 	struct mlxsw_sp_fid *fid = NULL;
5966 	enum mlxsw_sp_rif_type type;
5967 	struct mlxsw_sp_rif *rif;
5968 	struct mlxsw_sp_vr *vr;
5969 	u16 rif_index;
5970 	int err;
5971 
5972 	type = mlxsw_sp_dev_rif_type(mlxsw_sp, params->dev);
5973 	ops = mlxsw_sp->router->rif_ops_arr[type];
5974 
5975 	vr = mlxsw_sp_vr_get(mlxsw_sp, tb_id ? : RT_TABLE_MAIN, extack);
5976 	if (IS_ERR(vr))
5977 		return ERR_CAST(vr);
5978 	vr->rif_count++;
5979 
5980 	err = mlxsw_sp_rif_index_alloc(mlxsw_sp, &rif_index);
5981 	if (err) {
5982 		NL_SET_ERR_MSG(extack, "spectrum: Exceeded number of supported router interfaces");
5983 		goto err_rif_index_alloc;
5984 	}
5985 
5986 	rif = mlxsw_sp_rif_alloc(ops->rif_size, rif_index, vr->id, params->dev);
5987 	if (!rif) {
5988 		err = -ENOMEM;
5989 		goto err_rif_alloc;
5990 	}
5991 	rif->mlxsw_sp = mlxsw_sp;
5992 	rif->ops = ops;
5993 
5994 	if (ops->fid_get) {
5995 		fid = ops->fid_get(rif);
5996 		if (IS_ERR(fid)) {
5997 			err = PTR_ERR(fid);
5998 			goto err_fid_get;
5999 		}
6000 		rif->fid = fid;
6001 	}
6002 
6003 	if (ops->setup)
6004 		ops->setup(rif, params);
6005 
6006 	err = ops->configure(rif);
6007 	if (err)
6008 		goto err_configure;
6009 
6010 	err = mlxsw_sp_mr_rif_add(vr->mr4_table, rif);
6011 	if (err)
6012 		goto err_mr_rif_add;
6013 
6014 	mlxsw_sp_rif_counters_alloc(rif);
6015 	mlxsw_sp->router->rifs[rif_index] = rif;
6016 
6017 	return rif;
6018 
6019 err_mr_rif_add:
6020 	ops->deconfigure(rif);
6021 err_configure:
6022 	if (fid)
6023 		mlxsw_sp_fid_put(fid);
6024 err_fid_get:
6025 	kfree(rif);
6026 err_rif_alloc:
6027 err_rif_index_alloc:
6028 	vr->rif_count--;
6029 	mlxsw_sp_vr_put(vr);
6030 	return ERR_PTR(err);
6031 }
6032 
6033 void mlxsw_sp_rif_destroy(struct mlxsw_sp_rif *rif)
6034 {
6035 	const struct mlxsw_sp_rif_ops *ops = rif->ops;
6036 	struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
6037 	struct mlxsw_sp_fid *fid = rif->fid;
6038 	struct mlxsw_sp_vr *vr;
6039 
6040 	mlxsw_sp_router_rif_gone_sync(mlxsw_sp, rif);
6041 	vr = &mlxsw_sp->router->vrs[rif->vr_id];
6042 
6043 	mlxsw_sp->router->rifs[rif->rif_index] = NULL;
6044 	mlxsw_sp_rif_counters_free(rif);
6045 	mlxsw_sp_mr_rif_del(vr->mr4_table, rif);
6046 	ops->deconfigure(rif);
6047 	if (fid)
6048 		/* Loopback RIFs are not associated with a FID. */
6049 		mlxsw_sp_fid_put(fid);
6050 	kfree(rif);
6051 	vr->rif_count--;
6052 	mlxsw_sp_vr_put(vr);
6053 }
6054 
6055 static void
6056 mlxsw_sp_rif_subport_params_init(struct mlxsw_sp_rif_params *params,
6057 				 struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan)
6058 {
6059 	struct mlxsw_sp_port *mlxsw_sp_port = mlxsw_sp_port_vlan->mlxsw_sp_port;
6060 
6061 	params->vid = mlxsw_sp_port_vlan->vid;
6062 	params->lag = mlxsw_sp_port->lagged;
6063 	if (params->lag)
6064 		params->lag_id = mlxsw_sp_port->lag_id;
6065 	else
6066 		params->system_port = mlxsw_sp_port->local_port;
6067 }
6068 
6069 static int
6070 mlxsw_sp_port_vlan_router_join(struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan,
6071 			       struct net_device *l3_dev,
6072 			       struct netlink_ext_ack *extack)
6073 {
6074 	struct mlxsw_sp_port *mlxsw_sp_port = mlxsw_sp_port_vlan->mlxsw_sp_port;
6075 	struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
6076 	u16 vid = mlxsw_sp_port_vlan->vid;
6077 	struct mlxsw_sp_rif *rif;
6078 	struct mlxsw_sp_fid *fid;
6079 	int err;
6080 
6081 	rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, l3_dev);
6082 	if (!rif) {
6083 		struct mlxsw_sp_rif_params params = {
6084 			.dev = l3_dev,
6085 		};
6086 
6087 		mlxsw_sp_rif_subport_params_init(&params, mlxsw_sp_port_vlan);
6088 		rif = mlxsw_sp_rif_create(mlxsw_sp, &params, extack);
6089 		if (IS_ERR(rif))
6090 			return PTR_ERR(rif);
6091 	}
6092 
6093 	/* FID was already created, just take a reference */
6094 	fid = rif->ops->fid_get(rif);
6095 	err = mlxsw_sp_fid_port_vid_map(fid, mlxsw_sp_port, vid);
6096 	if (err)
6097 		goto err_fid_port_vid_map;
6098 
6099 	err = mlxsw_sp_port_vid_learning_set(mlxsw_sp_port, vid, false);
6100 	if (err)
6101 		goto err_port_vid_learning_set;
6102 
6103 	err = mlxsw_sp_port_vid_stp_set(mlxsw_sp_port, vid,
6104 					BR_STATE_FORWARDING);
6105 	if (err)
6106 		goto err_port_vid_stp_set;
6107 
6108 	mlxsw_sp_port_vlan->fid = fid;
6109 
6110 	return 0;
6111 
6112 err_port_vid_stp_set:
6113 	mlxsw_sp_port_vid_learning_set(mlxsw_sp_port, vid, true);
6114 err_port_vid_learning_set:
6115 	mlxsw_sp_fid_port_vid_unmap(fid, mlxsw_sp_port, vid);
6116 err_fid_port_vid_map:
6117 	mlxsw_sp_fid_put(fid);
6118 	return err;
6119 }
6120 
6121 void
6122 mlxsw_sp_port_vlan_router_leave(struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan)
6123 {
6124 	struct mlxsw_sp_port *mlxsw_sp_port = mlxsw_sp_port_vlan->mlxsw_sp_port;
6125 	struct mlxsw_sp_fid *fid = mlxsw_sp_port_vlan->fid;
6126 	u16 vid = mlxsw_sp_port_vlan->vid;
6127 
6128 	if (WARN_ON(mlxsw_sp_fid_type(fid) != MLXSW_SP_FID_TYPE_RFID))
6129 		return;
6130 
6131 	mlxsw_sp_port_vlan->fid = NULL;
6132 	mlxsw_sp_port_vid_stp_set(mlxsw_sp_port, vid, BR_STATE_BLOCKING);
6133 	mlxsw_sp_port_vid_learning_set(mlxsw_sp_port, vid, true);
6134 	mlxsw_sp_fid_port_vid_unmap(fid, mlxsw_sp_port, vid);
6135 	/* If router port holds the last reference on the rFID, then the
6136 	 * associated Sub-port RIF will be destroyed.
6137 	 */
6138 	mlxsw_sp_fid_put(fid);
6139 }
6140 
6141 static int mlxsw_sp_inetaddr_port_vlan_event(struct net_device *l3_dev,
6142 					     struct net_device *port_dev,
6143 					     unsigned long event, u16 vid,
6144 					     struct netlink_ext_ack *extack)
6145 {
6146 	struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(port_dev);
6147 	struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan;
6148 
6149 	mlxsw_sp_port_vlan = mlxsw_sp_port_vlan_find_by_vid(mlxsw_sp_port, vid);
6150 	if (WARN_ON(!mlxsw_sp_port_vlan))
6151 		return -EINVAL;
6152 
6153 	switch (event) {
6154 	case NETDEV_UP:
6155 		return mlxsw_sp_port_vlan_router_join(mlxsw_sp_port_vlan,
6156 						      l3_dev, extack);
6157 	case NETDEV_DOWN:
6158 		mlxsw_sp_port_vlan_router_leave(mlxsw_sp_port_vlan);
6159 		break;
6160 	}
6161 
6162 	return 0;
6163 }
6164 
6165 static int mlxsw_sp_inetaddr_port_event(struct net_device *port_dev,
6166 					unsigned long event,
6167 					struct netlink_ext_ack *extack)
6168 {
6169 	if (netif_is_bridge_port(port_dev) ||
6170 	    netif_is_lag_port(port_dev) ||
6171 	    netif_is_ovs_port(port_dev))
6172 		return 0;
6173 
6174 	return mlxsw_sp_inetaddr_port_vlan_event(port_dev, port_dev, event, 1,
6175 						 extack);
6176 }
6177 
6178 static int __mlxsw_sp_inetaddr_lag_event(struct net_device *l3_dev,
6179 					 struct net_device *lag_dev,
6180 					 unsigned long event, u16 vid,
6181 					 struct netlink_ext_ack *extack)
6182 {
6183 	struct net_device *port_dev;
6184 	struct list_head *iter;
6185 	int err;
6186 
6187 	netdev_for_each_lower_dev(lag_dev, port_dev, iter) {
6188 		if (mlxsw_sp_port_dev_check(port_dev)) {
6189 			err = mlxsw_sp_inetaddr_port_vlan_event(l3_dev,
6190 								port_dev,
6191 								event, vid,
6192 								extack);
6193 			if (err)
6194 				return err;
6195 		}
6196 	}
6197 
6198 	return 0;
6199 }
6200 
6201 static int mlxsw_sp_inetaddr_lag_event(struct net_device *lag_dev,
6202 				       unsigned long event,
6203 				       struct netlink_ext_ack *extack)
6204 {
6205 	if (netif_is_bridge_port(lag_dev))
6206 		return 0;
6207 
6208 	return __mlxsw_sp_inetaddr_lag_event(lag_dev, lag_dev, event, 1,
6209 					     extack);
6210 }
6211 
6212 static int mlxsw_sp_inetaddr_bridge_event(struct net_device *l3_dev,
6213 					  unsigned long event,
6214 					  struct netlink_ext_ack *extack)
6215 {
6216 	struct mlxsw_sp *mlxsw_sp = mlxsw_sp_lower_get(l3_dev);
6217 	struct mlxsw_sp_rif_params params = {
6218 		.dev = l3_dev,
6219 	};
6220 	struct mlxsw_sp_rif *rif;
6221 
6222 	switch (event) {
6223 	case NETDEV_UP:
6224 		rif = mlxsw_sp_rif_create(mlxsw_sp, &params, extack);
6225 		if (IS_ERR(rif))
6226 			return PTR_ERR(rif);
6227 		break;
6228 	case NETDEV_DOWN:
6229 		rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, l3_dev);
6230 		mlxsw_sp_rif_destroy(rif);
6231 		break;
6232 	}
6233 
6234 	return 0;
6235 }
6236 
6237 static int mlxsw_sp_inetaddr_vlan_event(struct net_device *vlan_dev,
6238 					unsigned long event,
6239 					struct netlink_ext_ack *extack)
6240 {
6241 	struct net_device *real_dev = vlan_dev_real_dev(vlan_dev);
6242 	u16 vid = vlan_dev_vlan_id(vlan_dev);
6243 
6244 	if (netif_is_bridge_port(vlan_dev))
6245 		return 0;
6246 
6247 	if (mlxsw_sp_port_dev_check(real_dev))
6248 		return mlxsw_sp_inetaddr_port_vlan_event(vlan_dev, real_dev,
6249 							 event, vid, extack);
6250 	else if (netif_is_lag_master(real_dev))
6251 		return __mlxsw_sp_inetaddr_lag_event(vlan_dev, real_dev, event,
6252 						     vid, extack);
6253 	else if (netif_is_bridge_master(real_dev) && br_vlan_enabled(real_dev))
6254 		return mlxsw_sp_inetaddr_bridge_event(vlan_dev, event, extack);
6255 
6256 	return 0;
6257 }
6258 
6259 static int __mlxsw_sp_inetaddr_event(struct net_device *dev,
6260 				     unsigned long event,
6261 				     struct netlink_ext_ack *extack)
6262 {
6263 	if (mlxsw_sp_port_dev_check(dev))
6264 		return mlxsw_sp_inetaddr_port_event(dev, event, extack);
6265 	else if (netif_is_lag_master(dev))
6266 		return mlxsw_sp_inetaddr_lag_event(dev, event, extack);
6267 	else if (netif_is_bridge_master(dev))
6268 		return mlxsw_sp_inetaddr_bridge_event(dev, event, extack);
6269 	else if (is_vlan_dev(dev))
6270 		return mlxsw_sp_inetaddr_vlan_event(dev, event, extack);
6271 	else
6272 		return 0;
6273 }
6274 
6275 int mlxsw_sp_inetaddr_event(struct notifier_block *unused,
6276 			    unsigned long event, void *ptr)
6277 {
6278 	struct in_ifaddr *ifa = (struct in_ifaddr *) ptr;
6279 	struct net_device *dev = ifa->ifa_dev->dev;
6280 	struct mlxsw_sp *mlxsw_sp;
6281 	struct mlxsw_sp_rif *rif;
6282 	int err = 0;
6283 
6284 	/* NETDEV_UP event is handled by mlxsw_sp_inetaddr_valid_event */
6285 	if (event == NETDEV_UP)
6286 		goto out;
6287 
6288 	mlxsw_sp = mlxsw_sp_lower_get(dev);
6289 	if (!mlxsw_sp)
6290 		goto out;
6291 
6292 	rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev);
6293 	if (!mlxsw_sp_rif_should_config(rif, dev, event))
6294 		goto out;
6295 
6296 	err = __mlxsw_sp_inetaddr_event(dev, event, NULL);
6297 out:
6298 	return notifier_from_errno(err);
6299 }
6300 
6301 int mlxsw_sp_inetaddr_valid_event(struct notifier_block *unused,
6302 				  unsigned long event, void *ptr)
6303 {
6304 	struct in_validator_info *ivi = (struct in_validator_info *) ptr;
6305 	struct net_device *dev = ivi->ivi_dev->dev;
6306 	struct mlxsw_sp *mlxsw_sp;
6307 	struct mlxsw_sp_rif *rif;
6308 	int err = 0;
6309 
6310 	mlxsw_sp = mlxsw_sp_lower_get(dev);
6311 	if (!mlxsw_sp)
6312 		goto out;
6313 
6314 	rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev);
6315 	if (!mlxsw_sp_rif_should_config(rif, dev, event))
6316 		goto out;
6317 
6318 	err = __mlxsw_sp_inetaddr_event(dev, event, ivi->extack);
6319 out:
6320 	return notifier_from_errno(err);
6321 }
6322 
6323 struct mlxsw_sp_inet6addr_event_work {
6324 	struct work_struct work;
6325 	struct net_device *dev;
6326 	unsigned long event;
6327 };
6328 
6329 static void mlxsw_sp_inet6addr_event_work(struct work_struct *work)
6330 {
6331 	struct mlxsw_sp_inet6addr_event_work *inet6addr_work =
6332 		container_of(work, struct mlxsw_sp_inet6addr_event_work, work);
6333 	struct net_device *dev = inet6addr_work->dev;
6334 	unsigned long event = inet6addr_work->event;
6335 	struct mlxsw_sp *mlxsw_sp;
6336 	struct mlxsw_sp_rif *rif;
6337 
6338 	rtnl_lock();
6339 	mlxsw_sp = mlxsw_sp_lower_get(dev);
6340 	if (!mlxsw_sp)
6341 		goto out;
6342 
6343 	rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev);
6344 	if (!mlxsw_sp_rif_should_config(rif, dev, event))
6345 		goto out;
6346 
6347 	__mlxsw_sp_inetaddr_event(dev, event, NULL);
6348 out:
6349 	rtnl_unlock();
6350 	dev_put(dev);
6351 	kfree(inet6addr_work);
6352 }
6353 
6354 /* Called with rcu_read_lock() */
6355 int mlxsw_sp_inet6addr_event(struct notifier_block *unused,
6356 			     unsigned long event, void *ptr)
6357 {
6358 	struct inet6_ifaddr *if6 = (struct inet6_ifaddr *) ptr;
6359 	struct mlxsw_sp_inet6addr_event_work *inet6addr_work;
6360 	struct net_device *dev = if6->idev->dev;
6361 
6362 	/* NETDEV_UP event is handled by mlxsw_sp_inet6addr_valid_event */
6363 	if (event == NETDEV_UP)
6364 		return NOTIFY_DONE;
6365 
6366 	if (!mlxsw_sp_port_dev_lower_find_rcu(dev))
6367 		return NOTIFY_DONE;
6368 
6369 	inet6addr_work = kzalloc(sizeof(*inet6addr_work), GFP_ATOMIC);
6370 	if (!inet6addr_work)
6371 		return NOTIFY_BAD;
6372 
6373 	INIT_WORK(&inet6addr_work->work, mlxsw_sp_inet6addr_event_work);
6374 	inet6addr_work->dev = dev;
6375 	inet6addr_work->event = event;
6376 	dev_hold(dev);
6377 	mlxsw_core_schedule_work(&inet6addr_work->work);
6378 
6379 	return NOTIFY_DONE;
6380 }
6381 
6382 int mlxsw_sp_inet6addr_valid_event(struct notifier_block *unused,
6383 				   unsigned long event, void *ptr)
6384 {
6385 	struct in6_validator_info *i6vi = (struct in6_validator_info *) ptr;
6386 	struct net_device *dev = i6vi->i6vi_dev->dev;
6387 	struct mlxsw_sp *mlxsw_sp;
6388 	struct mlxsw_sp_rif *rif;
6389 	int err = 0;
6390 
6391 	mlxsw_sp = mlxsw_sp_lower_get(dev);
6392 	if (!mlxsw_sp)
6393 		goto out;
6394 
6395 	rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev);
6396 	if (!mlxsw_sp_rif_should_config(rif, dev, event))
6397 		goto out;
6398 
6399 	err = __mlxsw_sp_inetaddr_event(dev, event, i6vi->extack);
6400 out:
6401 	return notifier_from_errno(err);
6402 }
6403 
6404 static int mlxsw_sp_rif_edit(struct mlxsw_sp *mlxsw_sp, u16 rif_index,
6405 			     const char *mac, int mtu)
6406 {
6407 	char ritr_pl[MLXSW_REG_RITR_LEN];
6408 	int err;
6409 
6410 	mlxsw_reg_ritr_rif_pack(ritr_pl, rif_index);
6411 	err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
6412 	if (err)
6413 		return err;
6414 
6415 	mlxsw_reg_ritr_mtu_set(ritr_pl, mtu);
6416 	mlxsw_reg_ritr_if_mac_memcpy_to(ritr_pl, mac);
6417 	mlxsw_reg_ritr_op_set(ritr_pl, MLXSW_REG_RITR_RIF_CREATE);
6418 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
6419 }
6420 
6421 int mlxsw_sp_netdevice_router_port_event(struct net_device *dev)
6422 {
6423 	struct mlxsw_sp *mlxsw_sp;
6424 	struct mlxsw_sp_rif *rif;
6425 	u16 fid_index;
6426 	int err;
6427 
6428 	mlxsw_sp = mlxsw_sp_lower_get(dev);
6429 	if (!mlxsw_sp)
6430 		return 0;
6431 
6432 	rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev);
6433 	if (!rif)
6434 		return 0;
6435 	fid_index = mlxsw_sp_fid_index(rif->fid);
6436 
6437 	err = mlxsw_sp_rif_fdb_op(mlxsw_sp, rif->addr, fid_index, false);
6438 	if (err)
6439 		return err;
6440 
6441 	err = mlxsw_sp_rif_edit(mlxsw_sp, rif->rif_index, dev->dev_addr,
6442 				dev->mtu);
6443 	if (err)
6444 		goto err_rif_edit;
6445 
6446 	err = mlxsw_sp_rif_fdb_op(mlxsw_sp, dev->dev_addr, fid_index, true);
6447 	if (err)
6448 		goto err_rif_fdb_op;
6449 
6450 	if (rif->mtu != dev->mtu) {
6451 		struct mlxsw_sp_vr *vr;
6452 
6453 		/* The RIF is relevant only to its mr_table instance, as unlike
6454 		 * unicast routing, in multicast routing a RIF cannot be shared
6455 		 * between several multicast routing tables.
6456 		 */
6457 		vr = &mlxsw_sp->router->vrs[rif->vr_id];
6458 		mlxsw_sp_mr_rif_mtu_update(vr->mr4_table, rif, dev->mtu);
6459 	}
6460 
6461 	ether_addr_copy(rif->addr, dev->dev_addr);
6462 	rif->mtu = dev->mtu;
6463 
6464 	netdev_dbg(dev, "Updated RIF=%d\n", rif->rif_index);
6465 
6466 	return 0;
6467 
6468 err_rif_fdb_op:
6469 	mlxsw_sp_rif_edit(mlxsw_sp, rif->rif_index, rif->addr, rif->mtu);
6470 err_rif_edit:
6471 	mlxsw_sp_rif_fdb_op(mlxsw_sp, rif->addr, fid_index, true);
6472 	return err;
6473 }
6474 
6475 static int mlxsw_sp_port_vrf_join(struct mlxsw_sp *mlxsw_sp,
6476 				  struct net_device *l3_dev,
6477 				  struct netlink_ext_ack *extack)
6478 {
6479 	struct mlxsw_sp_rif *rif;
6480 
6481 	/* If netdev is already associated with a RIF, then we need to
6482 	 * destroy it and create a new one with the new virtual router ID.
6483 	 */
6484 	rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, l3_dev);
6485 	if (rif)
6486 		__mlxsw_sp_inetaddr_event(l3_dev, NETDEV_DOWN, extack);
6487 
6488 	return __mlxsw_sp_inetaddr_event(l3_dev, NETDEV_UP, extack);
6489 }
6490 
6491 static void mlxsw_sp_port_vrf_leave(struct mlxsw_sp *mlxsw_sp,
6492 				    struct net_device *l3_dev)
6493 {
6494 	struct mlxsw_sp_rif *rif;
6495 
6496 	rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, l3_dev);
6497 	if (!rif)
6498 		return;
6499 	__mlxsw_sp_inetaddr_event(l3_dev, NETDEV_DOWN, NULL);
6500 }
6501 
6502 int mlxsw_sp_netdevice_vrf_event(struct net_device *l3_dev, unsigned long event,
6503 				 struct netdev_notifier_changeupper_info *info)
6504 {
6505 	struct mlxsw_sp *mlxsw_sp = mlxsw_sp_lower_get(l3_dev);
6506 	int err = 0;
6507 
6508 	if (!mlxsw_sp)
6509 		return 0;
6510 
6511 	switch (event) {
6512 	case NETDEV_PRECHANGEUPPER:
6513 		return 0;
6514 	case NETDEV_CHANGEUPPER:
6515 		if (info->linking) {
6516 			struct netlink_ext_ack *extack;
6517 
6518 			extack = netdev_notifier_info_to_extack(&info->info);
6519 			err = mlxsw_sp_port_vrf_join(mlxsw_sp, l3_dev, extack);
6520 		} else {
6521 			mlxsw_sp_port_vrf_leave(mlxsw_sp, l3_dev);
6522 		}
6523 		break;
6524 	}
6525 
6526 	return err;
6527 }
6528 
6529 static struct mlxsw_sp_rif_subport *
6530 mlxsw_sp_rif_subport_rif(const struct mlxsw_sp_rif *rif)
6531 {
6532 	return container_of(rif, struct mlxsw_sp_rif_subport, common);
6533 }
6534 
6535 static void mlxsw_sp_rif_subport_setup(struct mlxsw_sp_rif *rif,
6536 				       const struct mlxsw_sp_rif_params *params)
6537 {
6538 	struct mlxsw_sp_rif_subport *rif_subport;
6539 
6540 	rif_subport = mlxsw_sp_rif_subport_rif(rif);
6541 	rif_subport->vid = params->vid;
6542 	rif_subport->lag = params->lag;
6543 	if (params->lag)
6544 		rif_subport->lag_id = params->lag_id;
6545 	else
6546 		rif_subport->system_port = params->system_port;
6547 }
6548 
6549 static int mlxsw_sp_rif_subport_op(struct mlxsw_sp_rif *rif, bool enable)
6550 {
6551 	struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
6552 	struct mlxsw_sp_rif_subport *rif_subport;
6553 	char ritr_pl[MLXSW_REG_RITR_LEN];
6554 
6555 	rif_subport = mlxsw_sp_rif_subport_rif(rif);
6556 	mlxsw_reg_ritr_pack(ritr_pl, enable, MLXSW_REG_RITR_SP_IF,
6557 			    rif->rif_index, rif->vr_id, rif->dev->mtu);
6558 	mlxsw_reg_ritr_mac_pack(ritr_pl, rif->dev->dev_addr);
6559 	mlxsw_reg_ritr_sp_if_pack(ritr_pl, rif_subport->lag,
6560 				  rif_subport->lag ? rif_subport->lag_id :
6561 						     rif_subport->system_port,
6562 				  rif_subport->vid);
6563 
6564 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
6565 }
6566 
6567 static int mlxsw_sp_rif_subport_configure(struct mlxsw_sp_rif *rif)
6568 {
6569 	int err;
6570 
6571 	err = mlxsw_sp_rif_subport_op(rif, true);
6572 	if (err)
6573 		return err;
6574 
6575 	err = mlxsw_sp_rif_fdb_op(rif->mlxsw_sp, rif->dev->dev_addr,
6576 				  mlxsw_sp_fid_index(rif->fid), true);
6577 	if (err)
6578 		goto err_rif_fdb_op;
6579 
6580 	mlxsw_sp_fid_rif_set(rif->fid, rif);
6581 	return 0;
6582 
6583 err_rif_fdb_op:
6584 	mlxsw_sp_rif_subport_op(rif, false);
6585 	return err;
6586 }
6587 
6588 static void mlxsw_sp_rif_subport_deconfigure(struct mlxsw_sp_rif *rif)
6589 {
6590 	struct mlxsw_sp_fid *fid = rif->fid;
6591 
6592 	mlxsw_sp_fid_rif_set(fid, NULL);
6593 	mlxsw_sp_rif_fdb_op(rif->mlxsw_sp, rif->dev->dev_addr,
6594 			    mlxsw_sp_fid_index(fid), false);
6595 	mlxsw_sp_rif_subport_op(rif, false);
6596 }
6597 
6598 static struct mlxsw_sp_fid *
6599 mlxsw_sp_rif_subport_fid_get(struct mlxsw_sp_rif *rif)
6600 {
6601 	return mlxsw_sp_fid_rfid_get(rif->mlxsw_sp, rif->rif_index);
6602 }
6603 
6604 static const struct mlxsw_sp_rif_ops mlxsw_sp_rif_subport_ops = {
6605 	.type			= MLXSW_SP_RIF_TYPE_SUBPORT,
6606 	.rif_size		= sizeof(struct mlxsw_sp_rif_subport),
6607 	.setup			= mlxsw_sp_rif_subport_setup,
6608 	.configure		= mlxsw_sp_rif_subport_configure,
6609 	.deconfigure		= mlxsw_sp_rif_subport_deconfigure,
6610 	.fid_get		= mlxsw_sp_rif_subport_fid_get,
6611 };
6612 
6613 static int mlxsw_sp_rif_vlan_fid_op(struct mlxsw_sp_rif *rif,
6614 				    enum mlxsw_reg_ritr_if_type type,
6615 				    u16 vid_fid, bool enable)
6616 {
6617 	struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
6618 	char ritr_pl[MLXSW_REG_RITR_LEN];
6619 
6620 	mlxsw_reg_ritr_pack(ritr_pl, enable, type, rif->rif_index, rif->vr_id,
6621 			    rif->dev->mtu);
6622 	mlxsw_reg_ritr_mac_pack(ritr_pl, rif->dev->dev_addr);
6623 	mlxsw_reg_ritr_fid_set(ritr_pl, type, vid_fid);
6624 
6625 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
6626 }
6627 
6628 u8 mlxsw_sp_router_port(const struct mlxsw_sp *mlxsw_sp)
6629 {
6630 	return mlxsw_core_max_ports(mlxsw_sp->core) + 1;
6631 }
6632 
6633 static int mlxsw_sp_rif_vlan_configure(struct mlxsw_sp_rif *rif)
6634 {
6635 	struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
6636 	u16 vid = mlxsw_sp_fid_8021q_vid(rif->fid);
6637 	int err;
6638 
6639 	err = mlxsw_sp_rif_vlan_fid_op(rif, MLXSW_REG_RITR_VLAN_IF, vid, true);
6640 	if (err)
6641 		return err;
6642 
6643 	err = mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_MC,
6644 				     mlxsw_sp_router_port(mlxsw_sp), true);
6645 	if (err)
6646 		goto err_fid_mc_flood_set;
6647 
6648 	err = mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_BC,
6649 				     mlxsw_sp_router_port(mlxsw_sp), true);
6650 	if (err)
6651 		goto err_fid_bc_flood_set;
6652 
6653 	err = mlxsw_sp_rif_fdb_op(rif->mlxsw_sp, rif->dev->dev_addr,
6654 				  mlxsw_sp_fid_index(rif->fid), true);
6655 	if (err)
6656 		goto err_rif_fdb_op;
6657 
6658 	mlxsw_sp_fid_rif_set(rif->fid, rif);
6659 	return 0;
6660 
6661 err_rif_fdb_op:
6662 	mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_BC,
6663 			       mlxsw_sp_router_port(mlxsw_sp), false);
6664 err_fid_bc_flood_set:
6665 	mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_MC,
6666 			       mlxsw_sp_router_port(mlxsw_sp), false);
6667 err_fid_mc_flood_set:
6668 	mlxsw_sp_rif_vlan_fid_op(rif, MLXSW_REG_RITR_VLAN_IF, vid, false);
6669 	return err;
6670 }
6671 
6672 static void mlxsw_sp_rif_vlan_deconfigure(struct mlxsw_sp_rif *rif)
6673 {
6674 	u16 vid = mlxsw_sp_fid_8021q_vid(rif->fid);
6675 	struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
6676 	struct mlxsw_sp_fid *fid = rif->fid;
6677 
6678 	mlxsw_sp_fid_rif_set(fid, NULL);
6679 	mlxsw_sp_rif_fdb_op(rif->mlxsw_sp, rif->dev->dev_addr,
6680 			    mlxsw_sp_fid_index(fid), false);
6681 	mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_BC,
6682 			       mlxsw_sp_router_port(mlxsw_sp), false);
6683 	mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_MC,
6684 			       mlxsw_sp_router_port(mlxsw_sp), false);
6685 	mlxsw_sp_rif_vlan_fid_op(rif, MLXSW_REG_RITR_VLAN_IF, vid, false);
6686 }
6687 
6688 static struct mlxsw_sp_fid *
6689 mlxsw_sp_rif_vlan_fid_get(struct mlxsw_sp_rif *rif)
6690 {
6691 	u16 vid = is_vlan_dev(rif->dev) ? vlan_dev_vlan_id(rif->dev) : 1;
6692 
6693 	return mlxsw_sp_fid_8021q_get(rif->mlxsw_sp, vid);
6694 }
6695 
6696 static const struct mlxsw_sp_rif_ops mlxsw_sp_rif_vlan_ops = {
6697 	.type			= MLXSW_SP_RIF_TYPE_VLAN,
6698 	.rif_size		= sizeof(struct mlxsw_sp_rif),
6699 	.configure		= mlxsw_sp_rif_vlan_configure,
6700 	.deconfigure		= mlxsw_sp_rif_vlan_deconfigure,
6701 	.fid_get		= mlxsw_sp_rif_vlan_fid_get,
6702 };
6703 
6704 static int mlxsw_sp_rif_fid_configure(struct mlxsw_sp_rif *rif)
6705 {
6706 	struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
6707 	u16 fid_index = mlxsw_sp_fid_index(rif->fid);
6708 	int err;
6709 
6710 	err = mlxsw_sp_rif_vlan_fid_op(rif, MLXSW_REG_RITR_FID_IF, fid_index,
6711 				       true);
6712 	if (err)
6713 		return err;
6714 
6715 	err = mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_MC,
6716 				     mlxsw_sp_router_port(mlxsw_sp), true);
6717 	if (err)
6718 		goto err_fid_mc_flood_set;
6719 
6720 	err = mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_BC,
6721 				     mlxsw_sp_router_port(mlxsw_sp), true);
6722 	if (err)
6723 		goto err_fid_bc_flood_set;
6724 
6725 	err = mlxsw_sp_rif_fdb_op(rif->mlxsw_sp, rif->dev->dev_addr,
6726 				  mlxsw_sp_fid_index(rif->fid), true);
6727 	if (err)
6728 		goto err_rif_fdb_op;
6729 
6730 	mlxsw_sp_fid_rif_set(rif->fid, rif);
6731 	return 0;
6732 
6733 err_rif_fdb_op:
6734 	mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_BC,
6735 			       mlxsw_sp_router_port(mlxsw_sp), false);
6736 err_fid_bc_flood_set:
6737 	mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_MC,
6738 			       mlxsw_sp_router_port(mlxsw_sp), false);
6739 err_fid_mc_flood_set:
6740 	mlxsw_sp_rif_vlan_fid_op(rif, MLXSW_REG_RITR_FID_IF, fid_index, false);
6741 	return err;
6742 }
6743 
6744 static void mlxsw_sp_rif_fid_deconfigure(struct mlxsw_sp_rif *rif)
6745 {
6746 	u16 fid_index = mlxsw_sp_fid_index(rif->fid);
6747 	struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
6748 	struct mlxsw_sp_fid *fid = rif->fid;
6749 
6750 	mlxsw_sp_fid_rif_set(fid, NULL);
6751 	mlxsw_sp_rif_fdb_op(rif->mlxsw_sp, rif->dev->dev_addr,
6752 			    mlxsw_sp_fid_index(fid), false);
6753 	mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_BC,
6754 			       mlxsw_sp_router_port(mlxsw_sp), false);
6755 	mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_MC,
6756 			       mlxsw_sp_router_port(mlxsw_sp), false);
6757 	mlxsw_sp_rif_vlan_fid_op(rif, MLXSW_REG_RITR_FID_IF, fid_index, false);
6758 }
6759 
6760 static struct mlxsw_sp_fid *
6761 mlxsw_sp_rif_fid_fid_get(struct mlxsw_sp_rif *rif)
6762 {
6763 	return mlxsw_sp_fid_8021d_get(rif->mlxsw_sp, rif->dev->ifindex);
6764 }
6765 
6766 static const struct mlxsw_sp_rif_ops mlxsw_sp_rif_fid_ops = {
6767 	.type			= MLXSW_SP_RIF_TYPE_FID,
6768 	.rif_size		= sizeof(struct mlxsw_sp_rif),
6769 	.configure		= mlxsw_sp_rif_fid_configure,
6770 	.deconfigure		= mlxsw_sp_rif_fid_deconfigure,
6771 	.fid_get		= mlxsw_sp_rif_fid_fid_get,
6772 };
6773 
6774 static struct mlxsw_sp_rif_ipip_lb *
6775 mlxsw_sp_rif_ipip_lb_rif(struct mlxsw_sp_rif *rif)
6776 {
6777 	return container_of(rif, struct mlxsw_sp_rif_ipip_lb, common);
6778 }
6779 
6780 static void
6781 mlxsw_sp_rif_ipip_lb_setup(struct mlxsw_sp_rif *rif,
6782 			   const struct mlxsw_sp_rif_params *params)
6783 {
6784 	struct mlxsw_sp_rif_params_ipip_lb *params_lb;
6785 	struct mlxsw_sp_rif_ipip_lb *rif_lb;
6786 
6787 	params_lb = container_of(params, struct mlxsw_sp_rif_params_ipip_lb,
6788 				 common);
6789 	rif_lb = mlxsw_sp_rif_ipip_lb_rif(rif);
6790 	rif_lb->lb_config = params_lb->lb_config;
6791 }
6792 
6793 static int
6794 mlxsw_sp_rif_ipip_lb_op(struct mlxsw_sp_rif_ipip_lb *lb_rif,
6795 			struct mlxsw_sp_vr *ul_vr, bool enable)
6796 {
6797 	struct mlxsw_sp_rif_ipip_lb_config lb_cf = lb_rif->lb_config;
6798 	struct mlxsw_sp_rif *rif = &lb_rif->common;
6799 	struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
6800 	char ritr_pl[MLXSW_REG_RITR_LEN];
6801 	u32 saddr4;
6802 
6803 	switch (lb_cf.ul_protocol) {
6804 	case MLXSW_SP_L3_PROTO_IPV4:
6805 		saddr4 = be32_to_cpu(lb_cf.saddr.addr4);
6806 		mlxsw_reg_ritr_pack(ritr_pl, enable, MLXSW_REG_RITR_LOOPBACK_IF,
6807 				    rif->rif_index, rif->vr_id, rif->dev->mtu);
6808 		mlxsw_reg_ritr_loopback_ipip4_pack(ritr_pl, lb_cf.lb_ipipt,
6809 			    MLXSW_REG_RITR_LOOPBACK_IPIP_OPTIONS_GRE_KEY_PRESET,
6810 			    ul_vr->id, saddr4, lb_cf.okey);
6811 		break;
6812 
6813 	case MLXSW_SP_L3_PROTO_IPV6:
6814 		return -EAFNOSUPPORT;
6815 	}
6816 
6817 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
6818 }
6819 
6820 static int
6821 mlxsw_sp_rif_ipip_lb_configure(struct mlxsw_sp_rif *rif)
6822 {
6823 	struct mlxsw_sp_rif_ipip_lb *lb_rif = mlxsw_sp_rif_ipip_lb_rif(rif);
6824 	u32 ul_tb_id = mlxsw_sp_ipip_dev_ul_tb_id(rif->dev);
6825 	struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
6826 	struct mlxsw_sp_vr *ul_vr;
6827 	int err;
6828 
6829 	ul_vr = mlxsw_sp_vr_get(mlxsw_sp, ul_tb_id, NULL);
6830 	if (IS_ERR(ul_vr))
6831 		return PTR_ERR(ul_vr);
6832 
6833 	err = mlxsw_sp_rif_ipip_lb_op(lb_rif, ul_vr, true);
6834 	if (err)
6835 		goto err_loopback_op;
6836 
6837 	lb_rif->ul_vr_id = ul_vr->id;
6838 	++ul_vr->rif_count;
6839 	return 0;
6840 
6841 err_loopback_op:
6842 	mlxsw_sp_vr_put(ul_vr);
6843 	return err;
6844 }
6845 
6846 static void mlxsw_sp_rif_ipip_lb_deconfigure(struct mlxsw_sp_rif *rif)
6847 {
6848 	struct mlxsw_sp_rif_ipip_lb *lb_rif = mlxsw_sp_rif_ipip_lb_rif(rif);
6849 	struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
6850 	struct mlxsw_sp_vr *ul_vr;
6851 
6852 	ul_vr = &mlxsw_sp->router->vrs[lb_rif->ul_vr_id];
6853 	mlxsw_sp_rif_ipip_lb_op(lb_rif, ul_vr, false);
6854 
6855 	--ul_vr->rif_count;
6856 	mlxsw_sp_vr_put(ul_vr);
6857 }
6858 
6859 static const struct mlxsw_sp_rif_ops mlxsw_sp_rif_ipip_lb_ops = {
6860 	.type			= MLXSW_SP_RIF_TYPE_IPIP_LB,
6861 	.rif_size		= sizeof(struct mlxsw_sp_rif_ipip_lb),
6862 	.setup                  = mlxsw_sp_rif_ipip_lb_setup,
6863 	.configure		= mlxsw_sp_rif_ipip_lb_configure,
6864 	.deconfigure		= mlxsw_sp_rif_ipip_lb_deconfigure,
6865 };
6866 
6867 static const struct mlxsw_sp_rif_ops *mlxsw_sp_rif_ops_arr[] = {
6868 	[MLXSW_SP_RIF_TYPE_SUBPORT]	= &mlxsw_sp_rif_subport_ops,
6869 	[MLXSW_SP_RIF_TYPE_VLAN]	= &mlxsw_sp_rif_vlan_ops,
6870 	[MLXSW_SP_RIF_TYPE_FID]		= &mlxsw_sp_rif_fid_ops,
6871 	[MLXSW_SP_RIF_TYPE_IPIP_LB]	= &mlxsw_sp_rif_ipip_lb_ops,
6872 };
6873 
6874 static int mlxsw_sp_rifs_init(struct mlxsw_sp *mlxsw_sp)
6875 {
6876 	u64 max_rifs = MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_RIFS);
6877 
6878 	mlxsw_sp->router->rifs = kcalloc(max_rifs,
6879 					 sizeof(struct mlxsw_sp_rif *),
6880 					 GFP_KERNEL);
6881 	if (!mlxsw_sp->router->rifs)
6882 		return -ENOMEM;
6883 
6884 	mlxsw_sp->router->rif_ops_arr = mlxsw_sp_rif_ops_arr;
6885 
6886 	return 0;
6887 }
6888 
6889 static void mlxsw_sp_rifs_fini(struct mlxsw_sp *mlxsw_sp)
6890 {
6891 	int i;
6892 
6893 	for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_RIFS); i++)
6894 		WARN_ON_ONCE(mlxsw_sp->router->rifs[i]);
6895 
6896 	kfree(mlxsw_sp->router->rifs);
6897 }
6898 
6899 static int
6900 mlxsw_sp_ipip_config_tigcr(struct mlxsw_sp *mlxsw_sp)
6901 {
6902 	char tigcr_pl[MLXSW_REG_TIGCR_LEN];
6903 
6904 	mlxsw_reg_tigcr_pack(tigcr_pl, true, 0);
6905 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(tigcr), tigcr_pl);
6906 }
6907 
6908 static int mlxsw_sp_ipips_init(struct mlxsw_sp *mlxsw_sp)
6909 {
6910 	mlxsw_sp->router->ipip_ops_arr = mlxsw_sp_ipip_ops_arr;
6911 	INIT_LIST_HEAD(&mlxsw_sp->router->ipip_list);
6912 	return mlxsw_sp_ipip_config_tigcr(mlxsw_sp);
6913 }
6914 
6915 static void mlxsw_sp_ipips_fini(struct mlxsw_sp *mlxsw_sp)
6916 {
6917 	WARN_ON(!list_empty(&mlxsw_sp->router->ipip_list));
6918 }
6919 
6920 static void mlxsw_sp_router_fib_dump_flush(struct notifier_block *nb)
6921 {
6922 	struct mlxsw_sp_router *router;
6923 
6924 	/* Flush pending FIB notifications and then flush the device's
6925 	 * table before requesting another dump. The FIB notification
6926 	 * block is unregistered, so no need to take RTNL.
6927 	 */
6928 	mlxsw_core_flush_owq();
6929 	router = container_of(nb, struct mlxsw_sp_router, fib_nb);
6930 	mlxsw_sp_router_fib_flush(router->mlxsw_sp);
6931 }
6932 
6933 #ifdef CONFIG_IP_ROUTE_MULTIPATH
6934 static void mlxsw_sp_mp_hash_header_set(char *recr2_pl, int header)
6935 {
6936 	mlxsw_reg_recr2_outer_header_enables_set(recr2_pl, header, true);
6937 }
6938 
6939 static void mlxsw_sp_mp_hash_field_set(char *recr2_pl, int field)
6940 {
6941 	mlxsw_reg_recr2_outer_header_fields_enable_set(recr2_pl, field, true);
6942 }
6943 
6944 static void mlxsw_sp_mp4_hash_init(char *recr2_pl)
6945 {
6946 	bool only_l3 = !init_net.ipv4.sysctl_fib_multipath_hash_policy;
6947 
6948 	mlxsw_sp_mp_hash_header_set(recr2_pl,
6949 				    MLXSW_REG_RECR2_IPV4_EN_NOT_TCP_NOT_UDP);
6950 	mlxsw_sp_mp_hash_header_set(recr2_pl, MLXSW_REG_RECR2_IPV4_EN_TCP_UDP);
6951 	mlxsw_reg_recr2_ipv4_sip_enable(recr2_pl);
6952 	mlxsw_reg_recr2_ipv4_dip_enable(recr2_pl);
6953 	if (only_l3)
6954 		return;
6955 	mlxsw_sp_mp_hash_header_set(recr2_pl, MLXSW_REG_RECR2_TCP_UDP_EN_IPV4);
6956 	mlxsw_sp_mp_hash_field_set(recr2_pl, MLXSW_REG_RECR2_IPV4_PROTOCOL);
6957 	mlxsw_sp_mp_hash_field_set(recr2_pl, MLXSW_REG_RECR2_TCP_UDP_SPORT);
6958 	mlxsw_sp_mp_hash_field_set(recr2_pl, MLXSW_REG_RECR2_TCP_UDP_DPORT);
6959 }
6960 
6961 static void mlxsw_sp_mp6_hash_init(char *recr2_pl)
6962 {
6963 	mlxsw_sp_mp_hash_header_set(recr2_pl,
6964 				    MLXSW_REG_RECR2_IPV6_EN_NOT_TCP_NOT_UDP);
6965 	mlxsw_sp_mp_hash_header_set(recr2_pl, MLXSW_REG_RECR2_IPV6_EN_TCP_UDP);
6966 	mlxsw_reg_recr2_ipv6_sip_enable(recr2_pl);
6967 	mlxsw_reg_recr2_ipv6_dip_enable(recr2_pl);
6968 	mlxsw_sp_mp_hash_field_set(recr2_pl, MLXSW_REG_RECR2_IPV6_FLOW_LABEL);
6969 	mlxsw_sp_mp_hash_field_set(recr2_pl, MLXSW_REG_RECR2_IPV6_NEXT_HEADER);
6970 }
6971 
6972 static int mlxsw_sp_mp_hash_init(struct mlxsw_sp *mlxsw_sp)
6973 {
6974 	char recr2_pl[MLXSW_REG_RECR2_LEN];
6975 	u32 seed;
6976 
6977 	get_random_bytes(&seed, sizeof(seed));
6978 	mlxsw_reg_recr2_pack(recr2_pl, seed);
6979 	mlxsw_sp_mp4_hash_init(recr2_pl);
6980 	mlxsw_sp_mp6_hash_init(recr2_pl);
6981 
6982 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(recr2), recr2_pl);
6983 }
6984 #else
6985 static int mlxsw_sp_mp_hash_init(struct mlxsw_sp *mlxsw_sp)
6986 {
6987 	return 0;
6988 }
6989 #endif
6990 
6991 static int __mlxsw_sp_router_init(struct mlxsw_sp *mlxsw_sp)
6992 {
6993 	char rgcr_pl[MLXSW_REG_RGCR_LEN];
6994 	u64 max_rifs;
6995 	int err;
6996 
6997 	if (!MLXSW_CORE_RES_VALID(mlxsw_sp->core, MAX_RIFS))
6998 		return -EIO;
6999 	max_rifs = MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_RIFS);
7000 
7001 	mlxsw_reg_rgcr_pack(rgcr_pl, true, true);
7002 	mlxsw_reg_rgcr_max_router_interfaces_set(rgcr_pl, max_rifs);
7003 	err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rgcr), rgcr_pl);
7004 	if (err)
7005 		return err;
7006 	return 0;
7007 }
7008 
7009 static void __mlxsw_sp_router_fini(struct mlxsw_sp *mlxsw_sp)
7010 {
7011 	char rgcr_pl[MLXSW_REG_RGCR_LEN];
7012 
7013 	mlxsw_reg_rgcr_pack(rgcr_pl, false, false);
7014 	mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rgcr), rgcr_pl);
7015 }
7016 
7017 int mlxsw_sp_router_init(struct mlxsw_sp *mlxsw_sp)
7018 {
7019 	struct mlxsw_sp_router *router;
7020 	int err;
7021 
7022 	router = kzalloc(sizeof(*mlxsw_sp->router), GFP_KERNEL);
7023 	if (!router)
7024 		return -ENOMEM;
7025 	mlxsw_sp->router = router;
7026 	router->mlxsw_sp = mlxsw_sp;
7027 
7028 	INIT_LIST_HEAD(&mlxsw_sp->router->nexthop_neighs_list);
7029 	err = __mlxsw_sp_router_init(mlxsw_sp);
7030 	if (err)
7031 		goto err_router_init;
7032 
7033 	err = mlxsw_sp_rifs_init(mlxsw_sp);
7034 	if (err)
7035 		goto err_rifs_init;
7036 
7037 	err = mlxsw_sp_ipips_init(mlxsw_sp);
7038 	if (err)
7039 		goto err_ipips_init;
7040 
7041 	err = rhashtable_init(&mlxsw_sp->router->nexthop_ht,
7042 			      &mlxsw_sp_nexthop_ht_params);
7043 	if (err)
7044 		goto err_nexthop_ht_init;
7045 
7046 	err = rhashtable_init(&mlxsw_sp->router->nexthop_group_ht,
7047 			      &mlxsw_sp_nexthop_group_ht_params);
7048 	if (err)
7049 		goto err_nexthop_group_ht_init;
7050 
7051 	INIT_LIST_HEAD(&mlxsw_sp->router->nexthop_list);
7052 	err = mlxsw_sp_lpm_init(mlxsw_sp);
7053 	if (err)
7054 		goto err_lpm_init;
7055 
7056 	err = mlxsw_sp_mr_init(mlxsw_sp, &mlxsw_sp_mr_tcam_ops);
7057 	if (err)
7058 		goto err_mr_init;
7059 
7060 	err = mlxsw_sp_vrs_init(mlxsw_sp);
7061 	if (err)
7062 		goto err_vrs_init;
7063 
7064 	err = mlxsw_sp_neigh_init(mlxsw_sp);
7065 	if (err)
7066 		goto err_neigh_init;
7067 
7068 	mlxsw_sp->router->netevent_nb.notifier_call =
7069 		mlxsw_sp_router_netevent_event;
7070 	err = register_netevent_notifier(&mlxsw_sp->router->netevent_nb);
7071 	if (err)
7072 		goto err_register_netevent_notifier;
7073 
7074 	err = mlxsw_sp_mp_hash_init(mlxsw_sp);
7075 	if (err)
7076 		goto err_mp_hash_init;
7077 
7078 	mlxsw_sp->router->fib_nb.notifier_call = mlxsw_sp_router_fib_event;
7079 	err = register_fib_notifier(&mlxsw_sp->router->fib_nb,
7080 				    mlxsw_sp_router_fib_dump_flush);
7081 	if (err)
7082 		goto err_register_fib_notifier;
7083 
7084 	return 0;
7085 
7086 err_register_fib_notifier:
7087 err_mp_hash_init:
7088 	unregister_netevent_notifier(&mlxsw_sp->router->netevent_nb);
7089 err_register_netevent_notifier:
7090 	mlxsw_sp_neigh_fini(mlxsw_sp);
7091 err_neigh_init:
7092 	mlxsw_sp_vrs_fini(mlxsw_sp);
7093 err_vrs_init:
7094 	mlxsw_sp_mr_fini(mlxsw_sp);
7095 err_mr_init:
7096 	mlxsw_sp_lpm_fini(mlxsw_sp);
7097 err_lpm_init:
7098 	rhashtable_destroy(&mlxsw_sp->router->nexthop_group_ht);
7099 err_nexthop_group_ht_init:
7100 	rhashtable_destroy(&mlxsw_sp->router->nexthop_ht);
7101 err_nexthop_ht_init:
7102 	mlxsw_sp_ipips_fini(mlxsw_sp);
7103 err_ipips_init:
7104 	mlxsw_sp_rifs_fini(mlxsw_sp);
7105 err_rifs_init:
7106 	__mlxsw_sp_router_fini(mlxsw_sp);
7107 err_router_init:
7108 	kfree(mlxsw_sp->router);
7109 	return err;
7110 }
7111 
7112 void mlxsw_sp_router_fini(struct mlxsw_sp *mlxsw_sp)
7113 {
7114 	unregister_fib_notifier(&mlxsw_sp->router->fib_nb);
7115 	unregister_netevent_notifier(&mlxsw_sp->router->netevent_nb);
7116 	mlxsw_sp_neigh_fini(mlxsw_sp);
7117 	mlxsw_sp_vrs_fini(mlxsw_sp);
7118 	mlxsw_sp_mr_fini(mlxsw_sp);
7119 	mlxsw_sp_lpm_fini(mlxsw_sp);
7120 	rhashtable_destroy(&mlxsw_sp->router->nexthop_group_ht);
7121 	rhashtable_destroy(&mlxsw_sp->router->nexthop_ht);
7122 	mlxsw_sp_ipips_fini(mlxsw_sp);
7123 	mlxsw_sp_rifs_fini(mlxsw_sp);
7124 	__mlxsw_sp_router_fini(mlxsw_sp);
7125 	kfree(mlxsw_sp->router);
7126 }
7127