xref: /openbmc/linux/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c (revision c127f98ba9aba1818a6ca3a1da5a24653a10d966)
1 /*
2  * drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
3  * Copyright (c) 2016-2017 Mellanox Technologies. All rights reserved.
4  * Copyright (c) 2016 Jiri Pirko <jiri@mellanox.com>
5  * Copyright (c) 2016 Ido Schimmel <idosch@mellanox.com>
6  * Copyright (c) 2016 Yotam Gigi <yotamg@mellanox.com>
7  * Copyright (c) 2017 Petr Machata <petrm@mellanox.com>
8  *
9  * Redistribution and use in source and binary forms, with or without
10  * modification, are permitted provided that the following conditions are met:
11  *
12  * 1. Redistributions of source code must retain the above copyright
13  *    notice, this list of conditions and the following disclaimer.
14  * 2. Redistributions in binary form must reproduce the above copyright
15  *    notice, this list of conditions and the following disclaimer in the
16  *    documentation and/or other materials provided with the distribution.
17  * 3. Neither the names of the copyright holders nor the names of its
18  *    contributors may be used to endorse or promote products derived from
19  *    this software without specific prior written permission.
20  *
21  * Alternatively, this software may be distributed under the terms of the
22  * GNU General Public License ("GPL") version 2 as published by the Free
23  * Software Foundation.
24  *
25  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
26  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
28  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
29  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
30  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
31  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
32  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
33  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
34  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
35  * POSSIBILITY OF SUCH DAMAGE.
36  */
37 
38 #include <linux/kernel.h>
39 #include <linux/types.h>
40 #include <linux/rhashtable.h>
41 #include <linux/bitops.h>
42 #include <linux/in6.h>
43 #include <linux/notifier.h>
44 #include <linux/inetdevice.h>
45 #include <linux/netdevice.h>
46 #include <linux/if_bridge.h>
47 #include <linux/socket.h>
48 #include <linux/route.h>
49 #include <linux/gcd.h>
50 #include <linux/random.h>
51 #include <net/netevent.h>
52 #include <net/neighbour.h>
53 #include <net/arp.h>
54 #include <net/ip_fib.h>
55 #include <net/ip6_fib.h>
56 #include <net/fib_rules.h>
57 #include <net/ip_tunnels.h>
58 #include <net/l3mdev.h>
59 #include <net/addrconf.h>
60 #include <net/ndisc.h>
61 #include <net/ipv6.h>
62 #include <net/fib_notifier.h>
63 
64 #include "spectrum.h"
65 #include "core.h"
66 #include "reg.h"
67 #include "spectrum_cnt.h"
68 #include "spectrum_dpipe.h"
69 #include "spectrum_ipip.h"
70 #include "spectrum_mr.h"
71 #include "spectrum_mr_tcam.h"
72 #include "spectrum_router.h"
73 
74 struct mlxsw_sp_vr;
75 struct mlxsw_sp_lpm_tree;
76 struct mlxsw_sp_rif_ops;
77 
78 struct mlxsw_sp_router {
79 	struct mlxsw_sp *mlxsw_sp;
80 	struct mlxsw_sp_rif **rifs;
81 	struct mlxsw_sp_vr *vrs;
82 	struct rhashtable neigh_ht;
83 	struct rhashtable nexthop_group_ht;
84 	struct rhashtable nexthop_ht;
85 	struct list_head nexthop_list;
86 	struct {
87 		struct mlxsw_sp_lpm_tree *trees;
88 		unsigned int tree_count;
89 	} lpm;
90 	struct {
91 		struct delayed_work dw;
92 		unsigned long interval;	/* ms */
93 	} neighs_update;
94 	struct delayed_work nexthop_probe_dw;
95 #define MLXSW_SP_UNRESOLVED_NH_PROBE_INTERVAL 5000 /* ms */
96 	struct list_head nexthop_neighs_list;
97 	struct list_head ipip_list;
98 	bool aborted;
99 	struct notifier_block fib_nb;
100 	struct notifier_block netevent_nb;
101 	const struct mlxsw_sp_rif_ops **rif_ops_arr;
102 	const struct mlxsw_sp_ipip_ops **ipip_ops_arr;
103 };
104 
105 struct mlxsw_sp_rif {
106 	struct list_head nexthop_list;
107 	struct list_head neigh_list;
108 	struct net_device *dev;
109 	struct mlxsw_sp_fid *fid;
110 	unsigned char addr[ETH_ALEN];
111 	int mtu;
112 	u16 rif_index;
113 	u16 vr_id;
114 	const struct mlxsw_sp_rif_ops *ops;
115 	struct mlxsw_sp *mlxsw_sp;
116 
117 	unsigned int counter_ingress;
118 	bool counter_ingress_valid;
119 	unsigned int counter_egress;
120 	bool counter_egress_valid;
121 };
122 
123 struct mlxsw_sp_rif_params {
124 	struct net_device *dev;
125 	union {
126 		u16 system_port;
127 		u16 lag_id;
128 	};
129 	u16 vid;
130 	bool lag;
131 };
132 
133 struct mlxsw_sp_rif_subport {
134 	struct mlxsw_sp_rif common;
135 	union {
136 		u16 system_port;
137 		u16 lag_id;
138 	};
139 	u16 vid;
140 	bool lag;
141 };
142 
143 struct mlxsw_sp_rif_ipip_lb {
144 	struct mlxsw_sp_rif common;
145 	struct mlxsw_sp_rif_ipip_lb_config lb_config;
146 	u16 ul_vr_id; /* Reserved for Spectrum-2. */
147 };
148 
149 struct mlxsw_sp_rif_params_ipip_lb {
150 	struct mlxsw_sp_rif_params common;
151 	struct mlxsw_sp_rif_ipip_lb_config lb_config;
152 };
153 
154 struct mlxsw_sp_rif_ops {
155 	enum mlxsw_sp_rif_type type;
156 	size_t rif_size;
157 
158 	void (*setup)(struct mlxsw_sp_rif *rif,
159 		      const struct mlxsw_sp_rif_params *params);
160 	int (*configure)(struct mlxsw_sp_rif *rif);
161 	void (*deconfigure)(struct mlxsw_sp_rif *rif);
162 	struct mlxsw_sp_fid * (*fid_get)(struct mlxsw_sp_rif *rif);
163 };
164 
165 static unsigned int *
166 mlxsw_sp_rif_p_counter_get(struct mlxsw_sp_rif *rif,
167 			   enum mlxsw_sp_rif_counter_dir dir)
168 {
169 	switch (dir) {
170 	case MLXSW_SP_RIF_COUNTER_EGRESS:
171 		return &rif->counter_egress;
172 	case MLXSW_SP_RIF_COUNTER_INGRESS:
173 		return &rif->counter_ingress;
174 	}
175 	return NULL;
176 }
177 
178 static bool
179 mlxsw_sp_rif_counter_valid_get(struct mlxsw_sp_rif *rif,
180 			       enum mlxsw_sp_rif_counter_dir dir)
181 {
182 	switch (dir) {
183 	case MLXSW_SP_RIF_COUNTER_EGRESS:
184 		return rif->counter_egress_valid;
185 	case MLXSW_SP_RIF_COUNTER_INGRESS:
186 		return rif->counter_ingress_valid;
187 	}
188 	return false;
189 }
190 
191 static void
192 mlxsw_sp_rif_counter_valid_set(struct mlxsw_sp_rif *rif,
193 			       enum mlxsw_sp_rif_counter_dir dir,
194 			       bool valid)
195 {
196 	switch (dir) {
197 	case MLXSW_SP_RIF_COUNTER_EGRESS:
198 		rif->counter_egress_valid = valid;
199 		break;
200 	case MLXSW_SP_RIF_COUNTER_INGRESS:
201 		rif->counter_ingress_valid = valid;
202 		break;
203 	}
204 }
205 
206 static int mlxsw_sp_rif_counter_edit(struct mlxsw_sp *mlxsw_sp, u16 rif_index,
207 				     unsigned int counter_index, bool enable,
208 				     enum mlxsw_sp_rif_counter_dir dir)
209 {
210 	char ritr_pl[MLXSW_REG_RITR_LEN];
211 	bool is_egress = false;
212 	int err;
213 
214 	if (dir == MLXSW_SP_RIF_COUNTER_EGRESS)
215 		is_egress = true;
216 	mlxsw_reg_ritr_rif_pack(ritr_pl, rif_index);
217 	err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
218 	if (err)
219 		return err;
220 
221 	mlxsw_reg_ritr_counter_pack(ritr_pl, counter_index, enable,
222 				    is_egress);
223 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
224 }
225 
226 int mlxsw_sp_rif_counter_value_get(struct mlxsw_sp *mlxsw_sp,
227 				   struct mlxsw_sp_rif *rif,
228 				   enum mlxsw_sp_rif_counter_dir dir, u64 *cnt)
229 {
230 	char ricnt_pl[MLXSW_REG_RICNT_LEN];
231 	unsigned int *p_counter_index;
232 	bool valid;
233 	int err;
234 
235 	valid = mlxsw_sp_rif_counter_valid_get(rif, dir);
236 	if (!valid)
237 		return -EINVAL;
238 
239 	p_counter_index = mlxsw_sp_rif_p_counter_get(rif, dir);
240 	if (!p_counter_index)
241 		return -EINVAL;
242 	mlxsw_reg_ricnt_pack(ricnt_pl, *p_counter_index,
243 			     MLXSW_REG_RICNT_OPCODE_NOP);
244 	err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(ricnt), ricnt_pl);
245 	if (err)
246 		return err;
247 	*cnt = mlxsw_reg_ricnt_good_unicast_packets_get(ricnt_pl);
248 	return 0;
249 }
250 
251 static int mlxsw_sp_rif_counter_clear(struct mlxsw_sp *mlxsw_sp,
252 				      unsigned int counter_index)
253 {
254 	char ricnt_pl[MLXSW_REG_RICNT_LEN];
255 
256 	mlxsw_reg_ricnt_pack(ricnt_pl, counter_index,
257 			     MLXSW_REG_RICNT_OPCODE_CLEAR);
258 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ricnt), ricnt_pl);
259 }
260 
261 int mlxsw_sp_rif_counter_alloc(struct mlxsw_sp *mlxsw_sp,
262 			       struct mlxsw_sp_rif *rif,
263 			       enum mlxsw_sp_rif_counter_dir dir)
264 {
265 	unsigned int *p_counter_index;
266 	int err;
267 
268 	p_counter_index = mlxsw_sp_rif_p_counter_get(rif, dir);
269 	if (!p_counter_index)
270 		return -EINVAL;
271 	err = mlxsw_sp_counter_alloc(mlxsw_sp, MLXSW_SP_COUNTER_SUB_POOL_RIF,
272 				     p_counter_index);
273 	if (err)
274 		return err;
275 
276 	err = mlxsw_sp_rif_counter_clear(mlxsw_sp, *p_counter_index);
277 	if (err)
278 		goto err_counter_clear;
279 
280 	err = mlxsw_sp_rif_counter_edit(mlxsw_sp, rif->rif_index,
281 					*p_counter_index, true, dir);
282 	if (err)
283 		goto err_counter_edit;
284 	mlxsw_sp_rif_counter_valid_set(rif, dir, true);
285 	return 0;
286 
287 err_counter_edit:
288 err_counter_clear:
289 	mlxsw_sp_counter_free(mlxsw_sp, MLXSW_SP_COUNTER_SUB_POOL_RIF,
290 			      *p_counter_index);
291 	return err;
292 }
293 
294 void mlxsw_sp_rif_counter_free(struct mlxsw_sp *mlxsw_sp,
295 			       struct mlxsw_sp_rif *rif,
296 			       enum mlxsw_sp_rif_counter_dir dir)
297 {
298 	unsigned int *p_counter_index;
299 
300 	if (!mlxsw_sp_rif_counter_valid_get(rif, dir))
301 		return;
302 
303 	p_counter_index = mlxsw_sp_rif_p_counter_get(rif, dir);
304 	if (WARN_ON(!p_counter_index))
305 		return;
306 	mlxsw_sp_rif_counter_edit(mlxsw_sp, rif->rif_index,
307 				  *p_counter_index, false, dir);
308 	mlxsw_sp_counter_free(mlxsw_sp, MLXSW_SP_COUNTER_SUB_POOL_RIF,
309 			      *p_counter_index);
310 	mlxsw_sp_rif_counter_valid_set(rif, dir, false);
311 }
312 
313 static void mlxsw_sp_rif_counters_alloc(struct mlxsw_sp_rif *rif)
314 {
315 	struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
316 	struct devlink *devlink;
317 
318 	devlink = priv_to_devlink(mlxsw_sp->core);
319 	if (!devlink_dpipe_table_counter_enabled(devlink,
320 						 MLXSW_SP_DPIPE_TABLE_NAME_ERIF))
321 		return;
322 	mlxsw_sp_rif_counter_alloc(mlxsw_sp, rif, MLXSW_SP_RIF_COUNTER_EGRESS);
323 }
324 
325 static void mlxsw_sp_rif_counters_free(struct mlxsw_sp_rif *rif)
326 {
327 	struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
328 
329 	mlxsw_sp_rif_counter_free(mlxsw_sp, rif, MLXSW_SP_RIF_COUNTER_EGRESS);
330 }
331 
332 static struct mlxsw_sp_rif *
333 mlxsw_sp_rif_find_by_dev(const struct mlxsw_sp *mlxsw_sp,
334 			 const struct net_device *dev);
335 
336 #define MLXSW_SP_PREFIX_COUNT (sizeof(struct in6_addr) * BITS_PER_BYTE + 1)
337 
338 struct mlxsw_sp_prefix_usage {
339 	DECLARE_BITMAP(b, MLXSW_SP_PREFIX_COUNT);
340 };
341 
342 #define mlxsw_sp_prefix_usage_for_each(prefix, prefix_usage) \
343 	for_each_set_bit(prefix, (prefix_usage)->b, MLXSW_SP_PREFIX_COUNT)
344 
345 static bool
346 mlxsw_sp_prefix_usage_eq(struct mlxsw_sp_prefix_usage *prefix_usage1,
347 			 struct mlxsw_sp_prefix_usage *prefix_usage2)
348 {
349 	return !memcmp(prefix_usage1, prefix_usage2, sizeof(*prefix_usage1));
350 }
351 
352 static bool
353 mlxsw_sp_prefix_usage_none(struct mlxsw_sp_prefix_usage *prefix_usage)
354 {
355 	struct mlxsw_sp_prefix_usage prefix_usage_none = {{ 0 } };
356 
357 	return mlxsw_sp_prefix_usage_eq(prefix_usage, &prefix_usage_none);
358 }
359 
360 static void
361 mlxsw_sp_prefix_usage_cpy(struct mlxsw_sp_prefix_usage *prefix_usage1,
362 			  struct mlxsw_sp_prefix_usage *prefix_usage2)
363 {
364 	memcpy(prefix_usage1, prefix_usage2, sizeof(*prefix_usage1));
365 }
366 
367 static void
368 mlxsw_sp_prefix_usage_set(struct mlxsw_sp_prefix_usage *prefix_usage,
369 			  unsigned char prefix_len)
370 {
371 	set_bit(prefix_len, prefix_usage->b);
372 }
373 
374 static void
375 mlxsw_sp_prefix_usage_clear(struct mlxsw_sp_prefix_usage *prefix_usage,
376 			    unsigned char prefix_len)
377 {
378 	clear_bit(prefix_len, prefix_usage->b);
379 }
380 
381 struct mlxsw_sp_fib_key {
382 	unsigned char addr[sizeof(struct in6_addr)];
383 	unsigned char prefix_len;
384 };
385 
386 enum mlxsw_sp_fib_entry_type {
387 	MLXSW_SP_FIB_ENTRY_TYPE_REMOTE,
388 	MLXSW_SP_FIB_ENTRY_TYPE_LOCAL,
389 	MLXSW_SP_FIB_ENTRY_TYPE_TRAP,
390 
391 	/* This is a special case of local delivery, where a packet should be
392 	 * decapsulated on reception. Note that there is no corresponding ENCAP,
393 	 * because that's a type of next hop, not of FIB entry. (There can be
394 	 * several next hops in a REMOTE entry, and some of them may be
395 	 * encapsulating entries.)
396 	 */
397 	MLXSW_SP_FIB_ENTRY_TYPE_IPIP_DECAP,
398 };
399 
400 struct mlxsw_sp_nexthop_group;
401 struct mlxsw_sp_fib;
402 
403 struct mlxsw_sp_fib_node {
404 	struct list_head entry_list;
405 	struct list_head list;
406 	struct rhash_head ht_node;
407 	struct mlxsw_sp_fib *fib;
408 	struct mlxsw_sp_fib_key key;
409 };
410 
411 struct mlxsw_sp_fib_entry_decap {
412 	struct mlxsw_sp_ipip_entry *ipip_entry;
413 	u32 tunnel_index;
414 };
415 
416 struct mlxsw_sp_fib_entry {
417 	struct list_head list;
418 	struct mlxsw_sp_fib_node *fib_node;
419 	enum mlxsw_sp_fib_entry_type type;
420 	struct list_head nexthop_group_node;
421 	struct mlxsw_sp_nexthop_group *nh_group;
422 	struct mlxsw_sp_fib_entry_decap decap; /* Valid for decap entries. */
423 };
424 
425 struct mlxsw_sp_fib4_entry {
426 	struct mlxsw_sp_fib_entry common;
427 	u32 tb_id;
428 	u32 prio;
429 	u8 tos;
430 	u8 type;
431 };
432 
433 struct mlxsw_sp_fib6_entry {
434 	struct mlxsw_sp_fib_entry common;
435 	struct list_head rt6_list;
436 	unsigned int nrt6;
437 };
438 
439 struct mlxsw_sp_rt6 {
440 	struct list_head list;
441 	struct rt6_info *rt;
442 };
443 
444 struct mlxsw_sp_lpm_tree {
445 	u8 id; /* tree ID */
446 	unsigned int ref_count;
447 	enum mlxsw_sp_l3proto proto;
448 	struct mlxsw_sp_prefix_usage prefix_usage;
449 };
450 
451 struct mlxsw_sp_fib {
452 	struct rhashtable ht;
453 	struct list_head node_list;
454 	struct mlxsw_sp_vr *vr;
455 	struct mlxsw_sp_lpm_tree *lpm_tree;
456 	unsigned long prefix_ref_count[MLXSW_SP_PREFIX_COUNT];
457 	struct mlxsw_sp_prefix_usage prefix_usage;
458 	enum mlxsw_sp_l3proto proto;
459 };
460 
461 struct mlxsw_sp_vr {
462 	u16 id; /* virtual router ID */
463 	u32 tb_id; /* kernel fib table id */
464 	unsigned int rif_count;
465 	struct mlxsw_sp_fib *fib4;
466 	struct mlxsw_sp_fib *fib6;
467 	struct mlxsw_sp_mr_table *mr4_table;
468 };
469 
470 static const struct rhashtable_params mlxsw_sp_fib_ht_params;
471 
472 static struct mlxsw_sp_fib *mlxsw_sp_fib_create(struct mlxsw_sp_vr *vr,
473 						enum mlxsw_sp_l3proto proto)
474 {
475 	struct mlxsw_sp_fib *fib;
476 	int err;
477 
478 	fib = kzalloc(sizeof(*fib), GFP_KERNEL);
479 	if (!fib)
480 		return ERR_PTR(-ENOMEM);
481 	err = rhashtable_init(&fib->ht, &mlxsw_sp_fib_ht_params);
482 	if (err)
483 		goto err_rhashtable_init;
484 	INIT_LIST_HEAD(&fib->node_list);
485 	fib->proto = proto;
486 	fib->vr = vr;
487 	return fib;
488 
489 err_rhashtable_init:
490 	kfree(fib);
491 	return ERR_PTR(err);
492 }
493 
494 static void mlxsw_sp_fib_destroy(struct mlxsw_sp_fib *fib)
495 {
496 	WARN_ON(!list_empty(&fib->node_list));
497 	WARN_ON(fib->lpm_tree);
498 	rhashtable_destroy(&fib->ht);
499 	kfree(fib);
500 }
501 
502 static struct mlxsw_sp_lpm_tree *
503 mlxsw_sp_lpm_tree_find_unused(struct mlxsw_sp *mlxsw_sp)
504 {
505 	static struct mlxsw_sp_lpm_tree *lpm_tree;
506 	int i;
507 
508 	for (i = 0; i < mlxsw_sp->router->lpm.tree_count; i++) {
509 		lpm_tree = &mlxsw_sp->router->lpm.trees[i];
510 		if (lpm_tree->ref_count == 0)
511 			return lpm_tree;
512 	}
513 	return NULL;
514 }
515 
516 static int mlxsw_sp_lpm_tree_alloc(struct mlxsw_sp *mlxsw_sp,
517 				   struct mlxsw_sp_lpm_tree *lpm_tree)
518 {
519 	char ralta_pl[MLXSW_REG_RALTA_LEN];
520 
521 	mlxsw_reg_ralta_pack(ralta_pl, true,
522 			     (enum mlxsw_reg_ralxx_protocol) lpm_tree->proto,
523 			     lpm_tree->id);
524 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralta), ralta_pl);
525 }
526 
527 static void mlxsw_sp_lpm_tree_free(struct mlxsw_sp *mlxsw_sp,
528 				   struct mlxsw_sp_lpm_tree *lpm_tree)
529 {
530 	char ralta_pl[MLXSW_REG_RALTA_LEN];
531 
532 	mlxsw_reg_ralta_pack(ralta_pl, false,
533 			     (enum mlxsw_reg_ralxx_protocol) lpm_tree->proto,
534 			     lpm_tree->id);
535 	mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralta), ralta_pl);
536 }
537 
538 static int
539 mlxsw_sp_lpm_tree_left_struct_set(struct mlxsw_sp *mlxsw_sp,
540 				  struct mlxsw_sp_prefix_usage *prefix_usage,
541 				  struct mlxsw_sp_lpm_tree *lpm_tree)
542 {
543 	char ralst_pl[MLXSW_REG_RALST_LEN];
544 	u8 root_bin = 0;
545 	u8 prefix;
546 	u8 last_prefix = MLXSW_REG_RALST_BIN_NO_CHILD;
547 
548 	mlxsw_sp_prefix_usage_for_each(prefix, prefix_usage)
549 		root_bin = prefix;
550 
551 	mlxsw_reg_ralst_pack(ralst_pl, root_bin, lpm_tree->id);
552 	mlxsw_sp_prefix_usage_for_each(prefix, prefix_usage) {
553 		if (prefix == 0)
554 			continue;
555 		mlxsw_reg_ralst_bin_pack(ralst_pl, prefix, last_prefix,
556 					 MLXSW_REG_RALST_BIN_NO_CHILD);
557 		last_prefix = prefix;
558 	}
559 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralst), ralst_pl);
560 }
561 
562 static struct mlxsw_sp_lpm_tree *
563 mlxsw_sp_lpm_tree_create(struct mlxsw_sp *mlxsw_sp,
564 			 struct mlxsw_sp_prefix_usage *prefix_usage,
565 			 enum mlxsw_sp_l3proto proto)
566 {
567 	struct mlxsw_sp_lpm_tree *lpm_tree;
568 	int err;
569 
570 	lpm_tree = mlxsw_sp_lpm_tree_find_unused(mlxsw_sp);
571 	if (!lpm_tree)
572 		return ERR_PTR(-EBUSY);
573 	lpm_tree->proto = proto;
574 	err = mlxsw_sp_lpm_tree_alloc(mlxsw_sp, lpm_tree);
575 	if (err)
576 		return ERR_PTR(err);
577 
578 	err = mlxsw_sp_lpm_tree_left_struct_set(mlxsw_sp, prefix_usage,
579 						lpm_tree);
580 	if (err)
581 		goto err_left_struct_set;
582 	memcpy(&lpm_tree->prefix_usage, prefix_usage,
583 	       sizeof(lpm_tree->prefix_usage));
584 	return lpm_tree;
585 
586 err_left_struct_set:
587 	mlxsw_sp_lpm_tree_free(mlxsw_sp, lpm_tree);
588 	return ERR_PTR(err);
589 }
590 
591 static void mlxsw_sp_lpm_tree_destroy(struct mlxsw_sp *mlxsw_sp,
592 				      struct mlxsw_sp_lpm_tree *lpm_tree)
593 {
594 	mlxsw_sp_lpm_tree_free(mlxsw_sp, lpm_tree);
595 }
596 
597 static struct mlxsw_sp_lpm_tree *
598 mlxsw_sp_lpm_tree_get(struct mlxsw_sp *mlxsw_sp,
599 		      struct mlxsw_sp_prefix_usage *prefix_usage,
600 		      enum mlxsw_sp_l3proto proto)
601 {
602 	struct mlxsw_sp_lpm_tree *lpm_tree;
603 	int i;
604 
605 	for (i = 0; i < mlxsw_sp->router->lpm.tree_count; i++) {
606 		lpm_tree = &mlxsw_sp->router->lpm.trees[i];
607 		if (lpm_tree->ref_count != 0 &&
608 		    lpm_tree->proto == proto &&
609 		    mlxsw_sp_prefix_usage_eq(&lpm_tree->prefix_usage,
610 					     prefix_usage))
611 			return lpm_tree;
612 	}
613 	return mlxsw_sp_lpm_tree_create(mlxsw_sp, prefix_usage, proto);
614 }
615 
616 static void mlxsw_sp_lpm_tree_hold(struct mlxsw_sp_lpm_tree *lpm_tree)
617 {
618 	lpm_tree->ref_count++;
619 }
620 
621 static void mlxsw_sp_lpm_tree_put(struct mlxsw_sp *mlxsw_sp,
622 				  struct mlxsw_sp_lpm_tree *lpm_tree)
623 {
624 	if (--lpm_tree->ref_count == 0)
625 		mlxsw_sp_lpm_tree_destroy(mlxsw_sp, lpm_tree);
626 }
627 
628 #define MLXSW_SP_LPM_TREE_MIN 1 /* tree 0 is reserved */
629 
630 static int mlxsw_sp_lpm_init(struct mlxsw_sp *mlxsw_sp)
631 {
632 	struct mlxsw_sp_lpm_tree *lpm_tree;
633 	u64 max_trees;
634 	int i;
635 
636 	if (!MLXSW_CORE_RES_VALID(mlxsw_sp->core, MAX_LPM_TREES))
637 		return -EIO;
638 
639 	max_trees = MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_LPM_TREES);
640 	mlxsw_sp->router->lpm.tree_count = max_trees - MLXSW_SP_LPM_TREE_MIN;
641 	mlxsw_sp->router->lpm.trees = kcalloc(mlxsw_sp->router->lpm.tree_count,
642 					     sizeof(struct mlxsw_sp_lpm_tree),
643 					     GFP_KERNEL);
644 	if (!mlxsw_sp->router->lpm.trees)
645 		return -ENOMEM;
646 
647 	for (i = 0; i < mlxsw_sp->router->lpm.tree_count; i++) {
648 		lpm_tree = &mlxsw_sp->router->lpm.trees[i];
649 		lpm_tree->id = i + MLXSW_SP_LPM_TREE_MIN;
650 	}
651 
652 	return 0;
653 }
654 
655 static void mlxsw_sp_lpm_fini(struct mlxsw_sp *mlxsw_sp)
656 {
657 	kfree(mlxsw_sp->router->lpm.trees);
658 }
659 
660 static bool mlxsw_sp_vr_is_used(const struct mlxsw_sp_vr *vr)
661 {
662 	return !!vr->fib4 || !!vr->fib6 || !!vr->mr4_table;
663 }
664 
665 static struct mlxsw_sp_vr *mlxsw_sp_vr_find_unused(struct mlxsw_sp *mlxsw_sp)
666 {
667 	struct mlxsw_sp_vr *vr;
668 	int i;
669 
670 	for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_VRS); i++) {
671 		vr = &mlxsw_sp->router->vrs[i];
672 		if (!mlxsw_sp_vr_is_used(vr))
673 			return vr;
674 	}
675 	return NULL;
676 }
677 
678 static int mlxsw_sp_vr_lpm_tree_bind(struct mlxsw_sp *mlxsw_sp,
679 				     const struct mlxsw_sp_fib *fib, u8 tree_id)
680 {
681 	char raltb_pl[MLXSW_REG_RALTB_LEN];
682 
683 	mlxsw_reg_raltb_pack(raltb_pl, fib->vr->id,
684 			     (enum mlxsw_reg_ralxx_protocol) fib->proto,
685 			     tree_id);
686 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(raltb), raltb_pl);
687 }
688 
689 static int mlxsw_sp_vr_lpm_tree_unbind(struct mlxsw_sp *mlxsw_sp,
690 				       const struct mlxsw_sp_fib *fib)
691 {
692 	char raltb_pl[MLXSW_REG_RALTB_LEN];
693 
694 	/* Bind to tree 0 which is default */
695 	mlxsw_reg_raltb_pack(raltb_pl, fib->vr->id,
696 			     (enum mlxsw_reg_ralxx_protocol) fib->proto, 0);
697 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(raltb), raltb_pl);
698 }
699 
700 static u32 mlxsw_sp_fix_tb_id(u32 tb_id)
701 {
702 	/* For our purpose, squash main, default and local tables into one */
703 	if (tb_id == RT_TABLE_LOCAL || tb_id == RT_TABLE_DEFAULT)
704 		tb_id = RT_TABLE_MAIN;
705 	return tb_id;
706 }
707 
708 static struct mlxsw_sp_vr *mlxsw_sp_vr_find(struct mlxsw_sp *mlxsw_sp,
709 					    u32 tb_id)
710 {
711 	struct mlxsw_sp_vr *vr;
712 	int i;
713 
714 	tb_id = mlxsw_sp_fix_tb_id(tb_id);
715 
716 	for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_VRS); i++) {
717 		vr = &mlxsw_sp->router->vrs[i];
718 		if (mlxsw_sp_vr_is_used(vr) && vr->tb_id == tb_id)
719 			return vr;
720 	}
721 	return NULL;
722 }
723 
724 static struct mlxsw_sp_fib *mlxsw_sp_vr_fib(const struct mlxsw_sp_vr *vr,
725 					    enum mlxsw_sp_l3proto proto)
726 {
727 	switch (proto) {
728 	case MLXSW_SP_L3_PROTO_IPV4:
729 		return vr->fib4;
730 	case MLXSW_SP_L3_PROTO_IPV6:
731 		return vr->fib6;
732 	}
733 	return NULL;
734 }
735 
736 static struct mlxsw_sp_vr *mlxsw_sp_vr_create(struct mlxsw_sp *mlxsw_sp,
737 					      u32 tb_id,
738 					      struct netlink_ext_ack *extack)
739 {
740 	struct mlxsw_sp_vr *vr;
741 	int err;
742 
743 	vr = mlxsw_sp_vr_find_unused(mlxsw_sp);
744 	if (!vr) {
745 		NL_SET_ERR_MSG(extack, "spectrum: Exceeded number of supported virtual routers");
746 		return ERR_PTR(-EBUSY);
747 	}
748 	vr->fib4 = mlxsw_sp_fib_create(vr, MLXSW_SP_L3_PROTO_IPV4);
749 	if (IS_ERR(vr->fib4))
750 		return ERR_CAST(vr->fib4);
751 	vr->fib6 = mlxsw_sp_fib_create(vr, MLXSW_SP_L3_PROTO_IPV6);
752 	if (IS_ERR(vr->fib6)) {
753 		err = PTR_ERR(vr->fib6);
754 		goto err_fib6_create;
755 	}
756 	vr->mr4_table = mlxsw_sp_mr_table_create(mlxsw_sp, vr->id,
757 						 MLXSW_SP_L3_PROTO_IPV4);
758 	if (IS_ERR(vr->mr4_table)) {
759 		err = PTR_ERR(vr->mr4_table);
760 		goto err_mr_table_create;
761 	}
762 	vr->tb_id = tb_id;
763 	return vr;
764 
765 err_mr_table_create:
766 	mlxsw_sp_fib_destroy(vr->fib6);
767 	vr->fib6 = NULL;
768 err_fib6_create:
769 	mlxsw_sp_fib_destroy(vr->fib4);
770 	vr->fib4 = NULL;
771 	return ERR_PTR(err);
772 }
773 
774 static void mlxsw_sp_vr_destroy(struct mlxsw_sp_vr *vr)
775 {
776 	mlxsw_sp_mr_table_destroy(vr->mr4_table);
777 	vr->mr4_table = NULL;
778 	mlxsw_sp_fib_destroy(vr->fib6);
779 	vr->fib6 = NULL;
780 	mlxsw_sp_fib_destroy(vr->fib4);
781 	vr->fib4 = NULL;
782 }
783 
784 static struct mlxsw_sp_vr *mlxsw_sp_vr_get(struct mlxsw_sp *mlxsw_sp, u32 tb_id,
785 					   struct netlink_ext_ack *extack)
786 {
787 	struct mlxsw_sp_vr *vr;
788 
789 	tb_id = mlxsw_sp_fix_tb_id(tb_id);
790 	vr = mlxsw_sp_vr_find(mlxsw_sp, tb_id);
791 	if (!vr)
792 		vr = mlxsw_sp_vr_create(mlxsw_sp, tb_id, extack);
793 	return vr;
794 }
795 
796 static void mlxsw_sp_vr_put(struct mlxsw_sp_vr *vr)
797 {
798 	if (!vr->rif_count && list_empty(&vr->fib4->node_list) &&
799 	    list_empty(&vr->fib6->node_list) &&
800 	    mlxsw_sp_mr_table_empty(vr->mr4_table))
801 		mlxsw_sp_vr_destroy(vr);
802 }
803 
804 static bool
805 mlxsw_sp_vr_lpm_tree_should_replace(struct mlxsw_sp_vr *vr,
806 				    enum mlxsw_sp_l3proto proto, u8 tree_id)
807 {
808 	struct mlxsw_sp_fib *fib = mlxsw_sp_vr_fib(vr, proto);
809 
810 	if (!mlxsw_sp_vr_is_used(vr))
811 		return false;
812 	if (fib->lpm_tree && fib->lpm_tree->id == tree_id)
813 		return true;
814 	return false;
815 }
816 
817 static int mlxsw_sp_vr_lpm_tree_replace(struct mlxsw_sp *mlxsw_sp,
818 					struct mlxsw_sp_fib *fib,
819 					struct mlxsw_sp_lpm_tree *new_tree)
820 {
821 	struct mlxsw_sp_lpm_tree *old_tree = fib->lpm_tree;
822 	int err;
823 
824 	err = mlxsw_sp_vr_lpm_tree_bind(mlxsw_sp, fib, new_tree->id);
825 	if (err)
826 		return err;
827 	fib->lpm_tree = new_tree;
828 	mlxsw_sp_lpm_tree_hold(new_tree);
829 	mlxsw_sp_lpm_tree_put(mlxsw_sp, old_tree);
830 	return 0;
831 }
832 
833 static int mlxsw_sp_vrs_lpm_tree_replace(struct mlxsw_sp *mlxsw_sp,
834 					 struct mlxsw_sp_fib *fib,
835 					 struct mlxsw_sp_lpm_tree *new_tree)
836 {
837 	struct mlxsw_sp_lpm_tree *old_tree = fib->lpm_tree;
838 	enum mlxsw_sp_l3proto proto = fib->proto;
839 	u8 old_id, new_id = new_tree->id;
840 	struct mlxsw_sp_vr *vr;
841 	int i, err;
842 
843 	if (!old_tree)
844 		goto no_replace;
845 	old_id = old_tree->id;
846 
847 	for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_VRS); i++) {
848 		vr = &mlxsw_sp->router->vrs[i];
849 		if (!mlxsw_sp_vr_lpm_tree_should_replace(vr, proto, old_id))
850 			continue;
851 		err = mlxsw_sp_vr_lpm_tree_replace(mlxsw_sp,
852 						   mlxsw_sp_vr_fib(vr, proto),
853 						   new_tree);
854 		if (err)
855 			goto err_tree_replace;
856 	}
857 
858 	return 0;
859 
860 err_tree_replace:
861 	for (i--; i >= 0; i--) {
862 		if (!mlxsw_sp_vr_lpm_tree_should_replace(vr, proto, new_id))
863 			continue;
864 		mlxsw_sp_vr_lpm_tree_replace(mlxsw_sp,
865 					     mlxsw_sp_vr_fib(vr, proto),
866 					     old_tree);
867 	}
868 	return err;
869 
870 no_replace:
871 	err = mlxsw_sp_vr_lpm_tree_bind(mlxsw_sp, fib, new_tree->id);
872 	if (err)
873 		return err;
874 	fib->lpm_tree = new_tree;
875 	mlxsw_sp_lpm_tree_hold(new_tree);
876 	return 0;
877 }
878 
879 static void
880 mlxsw_sp_vrs_prefixes(struct mlxsw_sp *mlxsw_sp,
881 		      enum mlxsw_sp_l3proto proto,
882 		      struct mlxsw_sp_prefix_usage *req_prefix_usage)
883 {
884 	int i;
885 
886 	for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_VRS); i++) {
887 		struct mlxsw_sp_vr *vr = &mlxsw_sp->router->vrs[i];
888 		struct mlxsw_sp_fib *fib = mlxsw_sp_vr_fib(vr, proto);
889 		unsigned char prefix;
890 
891 		if (!mlxsw_sp_vr_is_used(vr))
892 			continue;
893 		mlxsw_sp_prefix_usage_for_each(prefix, &fib->prefix_usage)
894 			mlxsw_sp_prefix_usage_set(req_prefix_usage, prefix);
895 	}
896 }
897 
898 static int mlxsw_sp_vrs_init(struct mlxsw_sp *mlxsw_sp)
899 {
900 	struct mlxsw_sp_vr *vr;
901 	u64 max_vrs;
902 	int i;
903 
904 	if (!MLXSW_CORE_RES_VALID(mlxsw_sp->core, MAX_VRS))
905 		return -EIO;
906 
907 	max_vrs = MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_VRS);
908 	mlxsw_sp->router->vrs = kcalloc(max_vrs, sizeof(struct mlxsw_sp_vr),
909 					GFP_KERNEL);
910 	if (!mlxsw_sp->router->vrs)
911 		return -ENOMEM;
912 
913 	for (i = 0; i < max_vrs; i++) {
914 		vr = &mlxsw_sp->router->vrs[i];
915 		vr->id = i;
916 	}
917 
918 	return 0;
919 }
920 
921 static void mlxsw_sp_router_fib_flush(struct mlxsw_sp *mlxsw_sp);
922 
923 static void mlxsw_sp_vrs_fini(struct mlxsw_sp *mlxsw_sp)
924 {
925 	/* At this stage we're guaranteed not to have new incoming
926 	 * FIB notifications and the work queue is free from FIBs
927 	 * sitting on top of mlxsw netdevs. However, we can still
928 	 * have other FIBs queued. Flush the queue before flushing
929 	 * the device's tables. No need for locks, as we're the only
930 	 * writer.
931 	 */
932 	mlxsw_core_flush_owq();
933 	mlxsw_sp_router_fib_flush(mlxsw_sp);
934 	kfree(mlxsw_sp->router->vrs);
935 }
936 
937 static struct net_device *
938 __mlxsw_sp_ipip_netdev_ul_dev_get(const struct net_device *ol_dev)
939 {
940 	struct ip_tunnel *tun = netdev_priv(ol_dev);
941 	struct net *net = dev_net(ol_dev);
942 
943 	return __dev_get_by_index(net, tun->parms.link);
944 }
945 
946 u32 mlxsw_sp_ipip_dev_ul_tb_id(const struct net_device *ol_dev)
947 {
948 	struct net_device *d = __mlxsw_sp_ipip_netdev_ul_dev_get(ol_dev);
949 
950 	if (d)
951 		return l3mdev_fib_table(d) ? : RT_TABLE_MAIN;
952 	else
953 		return l3mdev_fib_table(ol_dev) ? : RT_TABLE_MAIN;
954 }
955 
956 static struct mlxsw_sp_rif *
957 mlxsw_sp_rif_create(struct mlxsw_sp *mlxsw_sp,
958 		    const struct mlxsw_sp_rif_params *params,
959 		    struct netlink_ext_ack *extack);
960 
961 static struct mlxsw_sp_rif_ipip_lb *
962 mlxsw_sp_ipip_ol_ipip_lb_create(struct mlxsw_sp *mlxsw_sp,
963 				enum mlxsw_sp_ipip_type ipipt,
964 				struct net_device *ol_dev,
965 				struct netlink_ext_ack *extack)
966 {
967 	struct mlxsw_sp_rif_params_ipip_lb lb_params;
968 	const struct mlxsw_sp_ipip_ops *ipip_ops;
969 	struct mlxsw_sp_rif *rif;
970 
971 	ipip_ops = mlxsw_sp->router->ipip_ops_arr[ipipt];
972 	lb_params = (struct mlxsw_sp_rif_params_ipip_lb) {
973 		.common.dev = ol_dev,
974 		.common.lag = false,
975 		.lb_config = ipip_ops->ol_loopback_config(mlxsw_sp, ol_dev),
976 	};
977 
978 	rif = mlxsw_sp_rif_create(mlxsw_sp, &lb_params.common, extack);
979 	if (IS_ERR(rif))
980 		return ERR_CAST(rif);
981 	return container_of(rif, struct mlxsw_sp_rif_ipip_lb, common);
982 }
983 
984 static struct mlxsw_sp_ipip_entry *
985 mlxsw_sp_ipip_entry_alloc(struct mlxsw_sp *mlxsw_sp,
986 			  enum mlxsw_sp_ipip_type ipipt,
987 			  struct net_device *ol_dev)
988 {
989 	struct mlxsw_sp_ipip_entry *ipip_entry;
990 	struct mlxsw_sp_ipip_entry *ret = NULL;
991 
992 	ipip_entry = kzalloc(sizeof(*ipip_entry), GFP_KERNEL);
993 	if (!ipip_entry)
994 		return ERR_PTR(-ENOMEM);
995 
996 	ipip_entry->ol_lb = mlxsw_sp_ipip_ol_ipip_lb_create(mlxsw_sp, ipipt,
997 							    ol_dev, NULL);
998 	if (IS_ERR(ipip_entry->ol_lb)) {
999 		ret = ERR_CAST(ipip_entry->ol_lb);
1000 		goto err_ol_ipip_lb_create;
1001 	}
1002 
1003 	ipip_entry->ipipt = ipipt;
1004 	ipip_entry->ol_dev = ol_dev;
1005 	ipip_entry->parms = mlxsw_sp_ipip_netdev_parms(ol_dev);
1006 
1007 	return ipip_entry;
1008 
1009 err_ol_ipip_lb_create:
1010 	kfree(ipip_entry);
1011 	return ret;
1012 }
1013 
1014 static void
1015 mlxsw_sp_ipip_entry_dealloc(struct mlxsw_sp_ipip_entry *ipip_entry)
1016 {
1017 	mlxsw_sp_rif_destroy(&ipip_entry->ol_lb->common);
1018 	kfree(ipip_entry);
1019 }
1020 
1021 static bool
1022 mlxsw_sp_ipip_entry_saddr_matches(struct mlxsw_sp *mlxsw_sp,
1023 				  const enum mlxsw_sp_l3proto ul_proto,
1024 				  union mlxsw_sp_l3addr saddr,
1025 				  u32 ul_tb_id,
1026 				  struct mlxsw_sp_ipip_entry *ipip_entry)
1027 {
1028 	u32 tun_ul_tb_id = mlxsw_sp_ipip_dev_ul_tb_id(ipip_entry->ol_dev);
1029 	enum mlxsw_sp_ipip_type ipipt = ipip_entry->ipipt;
1030 	union mlxsw_sp_l3addr tun_saddr;
1031 
1032 	if (mlxsw_sp->router->ipip_ops_arr[ipipt]->ul_proto != ul_proto)
1033 		return false;
1034 
1035 	tun_saddr = mlxsw_sp_ipip_netdev_saddr(ul_proto, ipip_entry->ol_dev);
1036 	return tun_ul_tb_id == ul_tb_id &&
1037 	       mlxsw_sp_l3addr_eq(&tun_saddr, &saddr);
1038 }
1039 
1040 static int
1041 mlxsw_sp_fib_entry_decap_init(struct mlxsw_sp *mlxsw_sp,
1042 			      struct mlxsw_sp_fib_entry *fib_entry,
1043 			      struct mlxsw_sp_ipip_entry *ipip_entry)
1044 {
1045 	u32 tunnel_index;
1046 	int err;
1047 
1048 	err = mlxsw_sp_kvdl_alloc(mlxsw_sp, 1, &tunnel_index);
1049 	if (err)
1050 		return err;
1051 
1052 	ipip_entry->decap_fib_entry = fib_entry;
1053 	fib_entry->decap.ipip_entry = ipip_entry;
1054 	fib_entry->decap.tunnel_index = tunnel_index;
1055 	return 0;
1056 }
1057 
1058 static void mlxsw_sp_fib_entry_decap_fini(struct mlxsw_sp *mlxsw_sp,
1059 					  struct mlxsw_sp_fib_entry *fib_entry)
1060 {
1061 	/* Unlink this node from the IPIP entry that it's the decap entry of. */
1062 	fib_entry->decap.ipip_entry->decap_fib_entry = NULL;
1063 	fib_entry->decap.ipip_entry = NULL;
1064 	mlxsw_sp_kvdl_free(mlxsw_sp, fib_entry->decap.tunnel_index);
1065 }
1066 
1067 static struct mlxsw_sp_fib_node *
1068 mlxsw_sp_fib_node_lookup(struct mlxsw_sp_fib *fib, const void *addr,
1069 			 size_t addr_len, unsigned char prefix_len);
1070 static int mlxsw_sp_fib_entry_update(struct mlxsw_sp *mlxsw_sp,
1071 				     struct mlxsw_sp_fib_entry *fib_entry);
1072 
1073 static void
1074 mlxsw_sp_ipip_entry_demote_decap(struct mlxsw_sp *mlxsw_sp,
1075 				 struct mlxsw_sp_ipip_entry *ipip_entry)
1076 {
1077 	struct mlxsw_sp_fib_entry *fib_entry = ipip_entry->decap_fib_entry;
1078 
1079 	mlxsw_sp_fib_entry_decap_fini(mlxsw_sp, fib_entry);
1080 	fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_TRAP;
1081 
1082 	mlxsw_sp_fib_entry_update(mlxsw_sp, fib_entry);
1083 }
1084 
1085 static void
1086 mlxsw_sp_ipip_entry_promote_decap(struct mlxsw_sp *mlxsw_sp,
1087 				  struct mlxsw_sp_ipip_entry *ipip_entry,
1088 				  struct mlxsw_sp_fib_entry *decap_fib_entry)
1089 {
1090 	if (mlxsw_sp_fib_entry_decap_init(mlxsw_sp, decap_fib_entry,
1091 					  ipip_entry))
1092 		return;
1093 	decap_fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_IPIP_DECAP;
1094 
1095 	if (mlxsw_sp_fib_entry_update(mlxsw_sp, decap_fib_entry))
1096 		mlxsw_sp_ipip_entry_demote_decap(mlxsw_sp, ipip_entry);
1097 }
1098 
1099 /* Given an IPIP entry, find the corresponding decap route. */
1100 static struct mlxsw_sp_fib_entry *
1101 mlxsw_sp_ipip_entry_find_decap(struct mlxsw_sp *mlxsw_sp,
1102 			       struct mlxsw_sp_ipip_entry *ipip_entry)
1103 {
1104 	static struct mlxsw_sp_fib_node *fib_node;
1105 	const struct mlxsw_sp_ipip_ops *ipip_ops;
1106 	struct mlxsw_sp_fib_entry *fib_entry;
1107 	unsigned char saddr_prefix_len;
1108 	union mlxsw_sp_l3addr saddr;
1109 	struct mlxsw_sp_fib *ul_fib;
1110 	struct mlxsw_sp_vr *ul_vr;
1111 	const void *saddrp;
1112 	size_t saddr_len;
1113 	u32 ul_tb_id;
1114 	u32 saddr4;
1115 
1116 	ipip_ops = mlxsw_sp->router->ipip_ops_arr[ipip_entry->ipipt];
1117 
1118 	ul_tb_id = mlxsw_sp_ipip_dev_ul_tb_id(ipip_entry->ol_dev);
1119 	ul_vr = mlxsw_sp_vr_find(mlxsw_sp, ul_tb_id);
1120 	if (!ul_vr)
1121 		return NULL;
1122 
1123 	ul_fib = mlxsw_sp_vr_fib(ul_vr, ipip_ops->ul_proto);
1124 	saddr = mlxsw_sp_ipip_netdev_saddr(ipip_ops->ul_proto,
1125 					   ipip_entry->ol_dev);
1126 
1127 	switch (ipip_ops->ul_proto) {
1128 	case MLXSW_SP_L3_PROTO_IPV4:
1129 		saddr4 = be32_to_cpu(saddr.addr4);
1130 		saddrp = &saddr4;
1131 		saddr_len = 4;
1132 		saddr_prefix_len = 32;
1133 		break;
1134 	case MLXSW_SP_L3_PROTO_IPV6:
1135 		WARN_ON(1);
1136 		return NULL;
1137 	}
1138 
1139 	fib_node = mlxsw_sp_fib_node_lookup(ul_fib, saddrp, saddr_len,
1140 					    saddr_prefix_len);
1141 	if (!fib_node || list_empty(&fib_node->entry_list))
1142 		return NULL;
1143 
1144 	fib_entry = list_first_entry(&fib_node->entry_list,
1145 				     struct mlxsw_sp_fib_entry, list);
1146 	if (fib_entry->type != MLXSW_SP_FIB_ENTRY_TYPE_TRAP)
1147 		return NULL;
1148 
1149 	return fib_entry;
1150 }
1151 
1152 static struct mlxsw_sp_ipip_entry *
1153 mlxsw_sp_ipip_entry_create(struct mlxsw_sp *mlxsw_sp,
1154 			   enum mlxsw_sp_ipip_type ipipt,
1155 			   struct net_device *ol_dev)
1156 {
1157 	struct mlxsw_sp_ipip_entry *ipip_entry;
1158 
1159 	ipip_entry = mlxsw_sp_ipip_entry_alloc(mlxsw_sp, ipipt, ol_dev);
1160 	if (IS_ERR(ipip_entry))
1161 		return ipip_entry;
1162 
1163 	list_add_tail(&ipip_entry->ipip_list_node,
1164 		      &mlxsw_sp->router->ipip_list);
1165 
1166 	return ipip_entry;
1167 }
1168 
1169 static void
1170 mlxsw_sp_ipip_entry_destroy(struct mlxsw_sp *mlxsw_sp,
1171 			    struct mlxsw_sp_ipip_entry *ipip_entry)
1172 {
1173 	list_del(&ipip_entry->ipip_list_node);
1174 	mlxsw_sp_ipip_entry_dealloc(ipip_entry);
1175 }
1176 
1177 static bool
1178 mlxsw_sp_ipip_entry_matches_decap(struct mlxsw_sp *mlxsw_sp,
1179 				  const struct net_device *ul_dev,
1180 				  enum mlxsw_sp_l3proto ul_proto,
1181 				  union mlxsw_sp_l3addr ul_dip,
1182 				  struct mlxsw_sp_ipip_entry *ipip_entry)
1183 {
1184 	u32 ul_tb_id = l3mdev_fib_table(ul_dev) ? : RT_TABLE_MAIN;
1185 	enum mlxsw_sp_ipip_type ipipt = ipip_entry->ipipt;
1186 	struct net_device *ipip_ul_dev;
1187 
1188 	if (mlxsw_sp->router->ipip_ops_arr[ipipt]->ul_proto != ul_proto)
1189 		return false;
1190 
1191 	ipip_ul_dev = __mlxsw_sp_ipip_netdev_ul_dev_get(ipip_entry->ol_dev);
1192 	return mlxsw_sp_ipip_entry_saddr_matches(mlxsw_sp, ul_proto, ul_dip,
1193 						 ul_tb_id, ipip_entry) &&
1194 	       (!ipip_ul_dev || ipip_ul_dev == ul_dev);
1195 }
1196 
1197 /* Given decap parameters, find the corresponding IPIP entry. */
1198 static struct mlxsw_sp_ipip_entry *
1199 mlxsw_sp_ipip_entry_find_by_decap(struct mlxsw_sp *mlxsw_sp,
1200 				  const struct net_device *ul_dev,
1201 				  enum mlxsw_sp_l3proto ul_proto,
1202 				  union mlxsw_sp_l3addr ul_dip)
1203 {
1204 	struct mlxsw_sp_ipip_entry *ipip_entry;
1205 
1206 	list_for_each_entry(ipip_entry, &mlxsw_sp->router->ipip_list,
1207 			    ipip_list_node)
1208 		if (mlxsw_sp_ipip_entry_matches_decap(mlxsw_sp, ul_dev,
1209 						      ul_proto, ul_dip,
1210 						      ipip_entry))
1211 			return ipip_entry;
1212 
1213 	return NULL;
1214 }
1215 
1216 static bool mlxsw_sp_netdev_ipip_type(const struct mlxsw_sp *mlxsw_sp,
1217 				      const struct net_device *dev,
1218 				      enum mlxsw_sp_ipip_type *p_type)
1219 {
1220 	struct mlxsw_sp_router *router = mlxsw_sp->router;
1221 	const struct mlxsw_sp_ipip_ops *ipip_ops;
1222 	enum mlxsw_sp_ipip_type ipipt;
1223 
1224 	for (ipipt = 0; ipipt < MLXSW_SP_IPIP_TYPE_MAX; ++ipipt) {
1225 		ipip_ops = router->ipip_ops_arr[ipipt];
1226 		if (dev->type == ipip_ops->dev_type) {
1227 			if (p_type)
1228 				*p_type = ipipt;
1229 			return true;
1230 		}
1231 	}
1232 	return false;
1233 }
1234 
1235 bool mlxsw_sp_netdev_is_ipip_ol(const struct mlxsw_sp *mlxsw_sp,
1236 				const struct net_device *dev)
1237 {
1238 	return mlxsw_sp_netdev_ipip_type(mlxsw_sp, dev, NULL);
1239 }
1240 
1241 static struct mlxsw_sp_ipip_entry *
1242 mlxsw_sp_ipip_entry_find_by_ol_dev(struct mlxsw_sp *mlxsw_sp,
1243 				   const struct net_device *ol_dev)
1244 {
1245 	struct mlxsw_sp_ipip_entry *ipip_entry;
1246 
1247 	list_for_each_entry(ipip_entry, &mlxsw_sp->router->ipip_list,
1248 			    ipip_list_node)
1249 		if (ipip_entry->ol_dev == ol_dev)
1250 			return ipip_entry;
1251 
1252 	return NULL;
1253 }
1254 
1255 static struct mlxsw_sp_ipip_entry *
1256 mlxsw_sp_ipip_entry_find_by_ul_dev(const struct mlxsw_sp *mlxsw_sp,
1257 				   const struct net_device *ul_dev,
1258 				   struct mlxsw_sp_ipip_entry *start)
1259 {
1260 	struct mlxsw_sp_ipip_entry *ipip_entry;
1261 
1262 	ipip_entry = list_prepare_entry(start, &mlxsw_sp->router->ipip_list,
1263 					ipip_list_node);
1264 	list_for_each_entry_continue(ipip_entry, &mlxsw_sp->router->ipip_list,
1265 				     ipip_list_node) {
1266 		struct net_device *ipip_ul_dev =
1267 			__mlxsw_sp_ipip_netdev_ul_dev_get(ipip_entry->ol_dev);
1268 
1269 		if (ipip_ul_dev == ul_dev)
1270 			return ipip_entry;
1271 	}
1272 
1273 	return NULL;
1274 }
1275 
1276 bool mlxsw_sp_netdev_is_ipip_ul(const struct mlxsw_sp *mlxsw_sp,
1277 				const struct net_device *dev)
1278 {
1279 	return mlxsw_sp_ipip_entry_find_by_ul_dev(mlxsw_sp, dev, NULL);
1280 }
1281 
1282 static bool mlxsw_sp_netdevice_ipip_can_offload(struct mlxsw_sp *mlxsw_sp,
1283 						const struct net_device *ol_dev,
1284 						enum mlxsw_sp_ipip_type ipipt)
1285 {
1286 	const struct mlxsw_sp_ipip_ops *ops
1287 		= mlxsw_sp->router->ipip_ops_arr[ipipt];
1288 
1289 	/* For deciding whether decap should be offloaded, we don't care about
1290 	 * overlay protocol, so ask whether either one is supported.
1291 	 */
1292 	return ops->can_offload(mlxsw_sp, ol_dev, MLXSW_SP_L3_PROTO_IPV4) ||
1293 	       ops->can_offload(mlxsw_sp, ol_dev, MLXSW_SP_L3_PROTO_IPV6);
1294 }
1295 
1296 static int mlxsw_sp_netdevice_ipip_ol_reg_event(struct mlxsw_sp *mlxsw_sp,
1297 						struct net_device *ol_dev)
1298 {
1299 	struct mlxsw_sp_ipip_entry *ipip_entry;
1300 	enum mlxsw_sp_l3proto ul_proto;
1301 	enum mlxsw_sp_ipip_type ipipt;
1302 	union mlxsw_sp_l3addr saddr;
1303 	u32 ul_tb_id;
1304 
1305 	mlxsw_sp_netdev_ipip_type(mlxsw_sp, ol_dev, &ipipt);
1306 	if (mlxsw_sp_netdevice_ipip_can_offload(mlxsw_sp, ol_dev, ipipt)) {
1307 		ul_tb_id = mlxsw_sp_ipip_dev_ul_tb_id(ol_dev);
1308 		ul_proto = mlxsw_sp->router->ipip_ops_arr[ipipt]->ul_proto;
1309 		saddr = mlxsw_sp_ipip_netdev_saddr(ul_proto, ol_dev);
1310 		if (!mlxsw_sp_ipip_demote_tunnel_by_saddr(mlxsw_sp, ul_proto,
1311 							  saddr, ul_tb_id,
1312 							  NULL)) {
1313 			ipip_entry = mlxsw_sp_ipip_entry_create(mlxsw_sp, ipipt,
1314 								ol_dev);
1315 			if (IS_ERR(ipip_entry))
1316 				return PTR_ERR(ipip_entry);
1317 		}
1318 	}
1319 
1320 	return 0;
1321 }
1322 
1323 static void mlxsw_sp_netdevice_ipip_ol_unreg_event(struct mlxsw_sp *mlxsw_sp,
1324 						   struct net_device *ol_dev)
1325 {
1326 	struct mlxsw_sp_ipip_entry *ipip_entry;
1327 
1328 	ipip_entry = mlxsw_sp_ipip_entry_find_by_ol_dev(mlxsw_sp, ol_dev);
1329 	if (ipip_entry)
1330 		mlxsw_sp_ipip_entry_destroy(mlxsw_sp, ipip_entry);
1331 }
1332 
1333 static void
1334 mlxsw_sp_ipip_entry_ol_up_event(struct mlxsw_sp *mlxsw_sp,
1335 				struct mlxsw_sp_ipip_entry *ipip_entry)
1336 {
1337 	struct mlxsw_sp_fib_entry *decap_fib_entry;
1338 
1339 	decap_fib_entry = mlxsw_sp_ipip_entry_find_decap(mlxsw_sp, ipip_entry);
1340 	if (decap_fib_entry)
1341 		mlxsw_sp_ipip_entry_promote_decap(mlxsw_sp, ipip_entry,
1342 						  decap_fib_entry);
1343 }
1344 
1345 static void mlxsw_sp_netdevice_ipip_ol_up_event(struct mlxsw_sp *mlxsw_sp,
1346 						struct net_device *ol_dev)
1347 {
1348 	struct mlxsw_sp_ipip_entry *ipip_entry;
1349 
1350 	ipip_entry = mlxsw_sp_ipip_entry_find_by_ol_dev(mlxsw_sp, ol_dev);
1351 	if (ipip_entry)
1352 		mlxsw_sp_ipip_entry_ol_up_event(mlxsw_sp, ipip_entry);
1353 }
1354 
1355 static void
1356 mlxsw_sp_ipip_entry_ol_down_event(struct mlxsw_sp *mlxsw_sp,
1357 				  struct mlxsw_sp_ipip_entry *ipip_entry)
1358 {
1359 	if (ipip_entry->decap_fib_entry)
1360 		mlxsw_sp_ipip_entry_demote_decap(mlxsw_sp, ipip_entry);
1361 }
1362 
1363 static void mlxsw_sp_netdevice_ipip_ol_down_event(struct mlxsw_sp *mlxsw_sp,
1364 						  struct net_device *ol_dev)
1365 {
1366 	struct mlxsw_sp_ipip_entry *ipip_entry;
1367 
1368 	ipip_entry = mlxsw_sp_ipip_entry_find_by_ol_dev(mlxsw_sp, ol_dev);
1369 	if (ipip_entry)
1370 		mlxsw_sp_ipip_entry_ol_down_event(mlxsw_sp, ipip_entry);
1371 }
1372 
1373 static void mlxsw_sp_nexthop_rif_migrate(struct mlxsw_sp *mlxsw_sp,
1374 					 struct mlxsw_sp_rif *old_rif,
1375 					 struct mlxsw_sp_rif *new_rif);
1376 static int
1377 mlxsw_sp_ipip_entry_ol_lb_update(struct mlxsw_sp *mlxsw_sp,
1378 				 struct mlxsw_sp_ipip_entry *ipip_entry,
1379 				 bool keep_encap,
1380 				 struct netlink_ext_ack *extack)
1381 {
1382 	struct mlxsw_sp_rif_ipip_lb *old_lb_rif = ipip_entry->ol_lb;
1383 	struct mlxsw_sp_rif_ipip_lb *new_lb_rif;
1384 
1385 	new_lb_rif = mlxsw_sp_ipip_ol_ipip_lb_create(mlxsw_sp,
1386 						     ipip_entry->ipipt,
1387 						     ipip_entry->ol_dev,
1388 						     extack);
1389 	if (IS_ERR(new_lb_rif))
1390 		return PTR_ERR(new_lb_rif);
1391 	ipip_entry->ol_lb = new_lb_rif;
1392 
1393 	if (keep_encap)
1394 		mlxsw_sp_nexthop_rif_migrate(mlxsw_sp, &old_lb_rif->common,
1395 					     &new_lb_rif->common);
1396 
1397 	mlxsw_sp_rif_destroy(&old_lb_rif->common);
1398 
1399 	return 0;
1400 }
1401 
1402 static void mlxsw_sp_nexthop_rif_update(struct mlxsw_sp *mlxsw_sp,
1403 					struct mlxsw_sp_rif *rif);
1404 
1405 /**
1406  * Update the offload related to an IPIP entry. This always updates decap, and
1407  * in addition to that it also:
1408  * @recreate_loopback: recreates the associated loopback RIF
1409  * @keep_encap: updates next hops that use the tunnel netdevice. This is only
1410  *              relevant when recreate_loopback is true.
1411  * @update_nexthops: updates next hops, keeping the current loopback RIF. This
1412  *                   is only relevant when recreate_loopback is false.
1413  */
1414 int __mlxsw_sp_ipip_entry_update_tunnel(struct mlxsw_sp *mlxsw_sp,
1415 					struct mlxsw_sp_ipip_entry *ipip_entry,
1416 					bool recreate_loopback,
1417 					bool keep_encap,
1418 					bool update_nexthops,
1419 					struct netlink_ext_ack *extack)
1420 {
1421 	int err;
1422 
1423 	/* RIFs can't be edited, so to update loopback, we need to destroy and
1424 	 * recreate it. That creates a window of opportunity where RALUE and
1425 	 * RATR registers end up referencing a RIF that's already gone. RATRs
1426 	 * are handled in mlxsw_sp_ipip_entry_ol_lb_update(), and to take care
1427 	 * of RALUE, demote the decap route back.
1428 	 */
1429 	if (ipip_entry->decap_fib_entry)
1430 		mlxsw_sp_ipip_entry_demote_decap(mlxsw_sp, ipip_entry);
1431 
1432 	if (recreate_loopback) {
1433 		err = mlxsw_sp_ipip_entry_ol_lb_update(mlxsw_sp, ipip_entry,
1434 						       keep_encap, extack);
1435 		if (err)
1436 			return err;
1437 	} else if (update_nexthops) {
1438 		mlxsw_sp_nexthop_rif_update(mlxsw_sp,
1439 					    &ipip_entry->ol_lb->common);
1440 	}
1441 
1442 	if (ipip_entry->ol_dev->flags & IFF_UP)
1443 		mlxsw_sp_ipip_entry_ol_up_event(mlxsw_sp, ipip_entry);
1444 
1445 	return 0;
1446 }
1447 
1448 static int mlxsw_sp_netdevice_ipip_ol_vrf_event(struct mlxsw_sp *mlxsw_sp,
1449 						struct net_device *ol_dev,
1450 						struct netlink_ext_ack *extack)
1451 {
1452 	struct mlxsw_sp_ipip_entry *ipip_entry =
1453 		mlxsw_sp_ipip_entry_find_by_ol_dev(mlxsw_sp, ol_dev);
1454 	enum mlxsw_sp_l3proto ul_proto;
1455 	union mlxsw_sp_l3addr saddr;
1456 	u32 ul_tb_id;
1457 
1458 	if (!ipip_entry)
1459 		return 0;
1460 
1461 	/* For flat configuration cases, moving overlay to a different VRF might
1462 	 * cause local address conflict, and the conflicting tunnels need to be
1463 	 * demoted.
1464 	 */
1465 	ul_tb_id = mlxsw_sp_ipip_dev_ul_tb_id(ol_dev);
1466 	ul_proto = mlxsw_sp->router->ipip_ops_arr[ipip_entry->ipipt]->ul_proto;
1467 	saddr = mlxsw_sp_ipip_netdev_saddr(ul_proto, ol_dev);
1468 	if (mlxsw_sp_ipip_demote_tunnel_by_saddr(mlxsw_sp, ul_proto,
1469 						 saddr, ul_tb_id,
1470 						 ipip_entry)) {
1471 		mlxsw_sp_ipip_entry_demote_tunnel(mlxsw_sp, ipip_entry);
1472 		return 0;
1473 	}
1474 
1475 	return __mlxsw_sp_ipip_entry_update_tunnel(mlxsw_sp, ipip_entry,
1476 						   true, false, false, extack);
1477 }
1478 
1479 static int
1480 mlxsw_sp_netdevice_ipip_ul_vrf_event(struct mlxsw_sp *mlxsw_sp,
1481 				     struct mlxsw_sp_ipip_entry *ipip_entry,
1482 				     struct net_device *ul_dev,
1483 				     struct netlink_ext_ack *extack)
1484 {
1485 	return __mlxsw_sp_ipip_entry_update_tunnel(mlxsw_sp, ipip_entry,
1486 						   true, true, false, extack);
1487 }
1488 
1489 static int
1490 mlxsw_sp_netdevice_ipip_ul_up_event(struct mlxsw_sp *mlxsw_sp,
1491 				    struct mlxsw_sp_ipip_entry *ipip_entry,
1492 				    struct net_device *ul_dev)
1493 {
1494 	return __mlxsw_sp_ipip_entry_update_tunnel(mlxsw_sp, ipip_entry,
1495 						   false, false, true, NULL);
1496 }
1497 
1498 static int
1499 mlxsw_sp_netdevice_ipip_ul_down_event(struct mlxsw_sp *mlxsw_sp,
1500 				      struct mlxsw_sp_ipip_entry *ipip_entry,
1501 				      struct net_device *ul_dev)
1502 {
1503 	/* A down underlay device causes encapsulated packets to not be
1504 	 * forwarded, but decap still works. So refresh next hops without
1505 	 * touching anything else.
1506 	 */
1507 	return __mlxsw_sp_ipip_entry_update_tunnel(mlxsw_sp, ipip_entry,
1508 						   false, false, true, NULL);
1509 }
1510 
1511 static int
1512 mlxsw_sp_netdevice_ipip_ol_change_event(struct mlxsw_sp *mlxsw_sp,
1513 					struct net_device *ol_dev,
1514 					struct netlink_ext_ack *extack)
1515 {
1516 	const struct mlxsw_sp_ipip_ops *ipip_ops;
1517 	struct mlxsw_sp_ipip_entry *ipip_entry;
1518 	int err;
1519 
1520 	ipip_entry = mlxsw_sp_ipip_entry_find_by_ol_dev(mlxsw_sp, ol_dev);
1521 	if (!ipip_entry)
1522 		/* A change might make a tunnel eligible for offloading, but
1523 		 * that is currently not implemented. What falls to slow path
1524 		 * stays there.
1525 		 */
1526 		return 0;
1527 
1528 	/* A change might make a tunnel not eligible for offloading. */
1529 	if (!mlxsw_sp_netdevice_ipip_can_offload(mlxsw_sp, ol_dev,
1530 						 ipip_entry->ipipt)) {
1531 		mlxsw_sp_ipip_entry_demote_tunnel(mlxsw_sp, ipip_entry);
1532 		return 0;
1533 	}
1534 
1535 	ipip_ops = mlxsw_sp->router->ipip_ops_arr[ipip_entry->ipipt];
1536 	err = ipip_ops->ol_netdev_change(mlxsw_sp, ipip_entry, extack);
1537 	return err;
1538 }
1539 
1540 void mlxsw_sp_ipip_entry_demote_tunnel(struct mlxsw_sp *mlxsw_sp,
1541 				       struct mlxsw_sp_ipip_entry *ipip_entry)
1542 {
1543 	struct net_device *ol_dev = ipip_entry->ol_dev;
1544 
1545 	if (ol_dev->flags & IFF_UP)
1546 		mlxsw_sp_ipip_entry_ol_down_event(mlxsw_sp, ipip_entry);
1547 	mlxsw_sp_ipip_entry_destroy(mlxsw_sp, ipip_entry);
1548 }
1549 
1550 /* The configuration where several tunnels have the same local address in the
1551  * same underlay table needs special treatment in the HW. That is currently not
1552  * implemented in the driver. This function finds and demotes the first tunnel
1553  * with a given source address, except the one passed in in the argument
1554  * `except'.
1555  */
1556 bool
1557 mlxsw_sp_ipip_demote_tunnel_by_saddr(struct mlxsw_sp *mlxsw_sp,
1558 				     enum mlxsw_sp_l3proto ul_proto,
1559 				     union mlxsw_sp_l3addr saddr,
1560 				     u32 ul_tb_id,
1561 				     const struct mlxsw_sp_ipip_entry *except)
1562 {
1563 	struct mlxsw_sp_ipip_entry *ipip_entry, *tmp;
1564 
1565 	list_for_each_entry_safe(ipip_entry, tmp, &mlxsw_sp->router->ipip_list,
1566 				 ipip_list_node) {
1567 		if (ipip_entry != except &&
1568 		    mlxsw_sp_ipip_entry_saddr_matches(mlxsw_sp, ul_proto, saddr,
1569 						      ul_tb_id, ipip_entry)) {
1570 			mlxsw_sp_ipip_entry_demote_tunnel(mlxsw_sp, ipip_entry);
1571 			return true;
1572 		}
1573 	}
1574 
1575 	return false;
1576 }
1577 
1578 static void mlxsw_sp_ipip_demote_tunnel_by_ul_netdev(struct mlxsw_sp *mlxsw_sp,
1579 						     struct net_device *ul_dev)
1580 {
1581 	struct mlxsw_sp_ipip_entry *ipip_entry, *tmp;
1582 
1583 	list_for_each_entry_safe(ipip_entry, tmp, &mlxsw_sp->router->ipip_list,
1584 				 ipip_list_node) {
1585 		struct net_device *ipip_ul_dev =
1586 			__mlxsw_sp_ipip_netdev_ul_dev_get(ipip_entry->ol_dev);
1587 
1588 		if (ipip_ul_dev == ul_dev)
1589 			mlxsw_sp_ipip_entry_demote_tunnel(mlxsw_sp, ipip_entry);
1590 	}
1591 }
1592 
1593 int mlxsw_sp_netdevice_ipip_ol_event(struct mlxsw_sp *mlxsw_sp,
1594 				     struct net_device *ol_dev,
1595 				     unsigned long event,
1596 				     struct netdev_notifier_info *info)
1597 {
1598 	struct netdev_notifier_changeupper_info *chup;
1599 	struct netlink_ext_ack *extack;
1600 
1601 	switch (event) {
1602 	case NETDEV_REGISTER:
1603 		return mlxsw_sp_netdevice_ipip_ol_reg_event(mlxsw_sp, ol_dev);
1604 	case NETDEV_UNREGISTER:
1605 		mlxsw_sp_netdevice_ipip_ol_unreg_event(mlxsw_sp, ol_dev);
1606 		return 0;
1607 	case NETDEV_UP:
1608 		mlxsw_sp_netdevice_ipip_ol_up_event(mlxsw_sp, ol_dev);
1609 		return 0;
1610 	case NETDEV_DOWN:
1611 		mlxsw_sp_netdevice_ipip_ol_down_event(mlxsw_sp, ol_dev);
1612 		return 0;
1613 	case NETDEV_CHANGEUPPER:
1614 		chup = container_of(info, typeof(*chup), info);
1615 		extack = info->extack;
1616 		if (netif_is_l3_master(chup->upper_dev))
1617 			return mlxsw_sp_netdevice_ipip_ol_vrf_event(mlxsw_sp,
1618 								    ol_dev,
1619 								    extack);
1620 		return 0;
1621 	case NETDEV_CHANGE:
1622 		extack = info->extack;
1623 		return mlxsw_sp_netdevice_ipip_ol_change_event(mlxsw_sp,
1624 							       ol_dev, extack);
1625 	}
1626 	return 0;
1627 }
1628 
1629 static int
1630 __mlxsw_sp_netdevice_ipip_ul_event(struct mlxsw_sp *mlxsw_sp,
1631 				   struct mlxsw_sp_ipip_entry *ipip_entry,
1632 				   struct net_device *ul_dev,
1633 				   unsigned long event,
1634 				   struct netdev_notifier_info *info)
1635 {
1636 	struct netdev_notifier_changeupper_info *chup;
1637 	struct netlink_ext_ack *extack;
1638 
1639 	switch (event) {
1640 	case NETDEV_CHANGEUPPER:
1641 		chup = container_of(info, typeof(*chup), info);
1642 		extack = info->extack;
1643 		if (netif_is_l3_master(chup->upper_dev))
1644 			return mlxsw_sp_netdevice_ipip_ul_vrf_event(mlxsw_sp,
1645 								    ipip_entry,
1646 								    ul_dev,
1647 								    extack);
1648 		break;
1649 
1650 	case NETDEV_UP:
1651 		return mlxsw_sp_netdevice_ipip_ul_up_event(mlxsw_sp, ipip_entry,
1652 							   ul_dev);
1653 	case NETDEV_DOWN:
1654 		return mlxsw_sp_netdevice_ipip_ul_down_event(mlxsw_sp,
1655 							     ipip_entry,
1656 							     ul_dev);
1657 	}
1658 	return 0;
1659 }
1660 
1661 int
1662 mlxsw_sp_netdevice_ipip_ul_event(struct mlxsw_sp *mlxsw_sp,
1663 				 struct net_device *ul_dev,
1664 				 unsigned long event,
1665 				 struct netdev_notifier_info *info)
1666 {
1667 	struct mlxsw_sp_ipip_entry *ipip_entry = NULL;
1668 	int err;
1669 
1670 	while ((ipip_entry = mlxsw_sp_ipip_entry_find_by_ul_dev(mlxsw_sp,
1671 								ul_dev,
1672 								ipip_entry))) {
1673 		err = __mlxsw_sp_netdevice_ipip_ul_event(mlxsw_sp, ipip_entry,
1674 							 ul_dev, event, info);
1675 		if (err) {
1676 			mlxsw_sp_ipip_demote_tunnel_by_ul_netdev(mlxsw_sp,
1677 								 ul_dev);
1678 			return err;
1679 		}
1680 	}
1681 
1682 	return 0;
1683 }
1684 
1685 struct mlxsw_sp_neigh_key {
1686 	struct neighbour *n;
1687 };
1688 
1689 struct mlxsw_sp_neigh_entry {
1690 	struct list_head rif_list_node;
1691 	struct rhash_head ht_node;
1692 	struct mlxsw_sp_neigh_key key;
1693 	u16 rif;
1694 	bool connected;
1695 	unsigned char ha[ETH_ALEN];
1696 	struct list_head nexthop_list; /* list of nexthops using
1697 					* this neigh entry
1698 					*/
1699 	struct list_head nexthop_neighs_list_node;
1700 	unsigned int counter_index;
1701 	bool counter_valid;
1702 };
1703 
1704 static const struct rhashtable_params mlxsw_sp_neigh_ht_params = {
1705 	.key_offset = offsetof(struct mlxsw_sp_neigh_entry, key),
1706 	.head_offset = offsetof(struct mlxsw_sp_neigh_entry, ht_node),
1707 	.key_len = sizeof(struct mlxsw_sp_neigh_key),
1708 };
1709 
1710 struct mlxsw_sp_neigh_entry *
1711 mlxsw_sp_rif_neigh_next(struct mlxsw_sp_rif *rif,
1712 			struct mlxsw_sp_neigh_entry *neigh_entry)
1713 {
1714 	if (!neigh_entry) {
1715 		if (list_empty(&rif->neigh_list))
1716 			return NULL;
1717 		else
1718 			return list_first_entry(&rif->neigh_list,
1719 						typeof(*neigh_entry),
1720 						rif_list_node);
1721 	}
1722 	if (list_is_last(&neigh_entry->rif_list_node, &rif->neigh_list))
1723 		return NULL;
1724 	return list_next_entry(neigh_entry, rif_list_node);
1725 }
1726 
1727 int mlxsw_sp_neigh_entry_type(struct mlxsw_sp_neigh_entry *neigh_entry)
1728 {
1729 	return neigh_entry->key.n->tbl->family;
1730 }
1731 
1732 unsigned char *
1733 mlxsw_sp_neigh_entry_ha(struct mlxsw_sp_neigh_entry *neigh_entry)
1734 {
1735 	return neigh_entry->ha;
1736 }
1737 
1738 u32 mlxsw_sp_neigh4_entry_dip(struct mlxsw_sp_neigh_entry *neigh_entry)
1739 {
1740 	struct neighbour *n;
1741 
1742 	n = neigh_entry->key.n;
1743 	return ntohl(*((__be32 *) n->primary_key));
1744 }
1745 
1746 struct in6_addr *
1747 mlxsw_sp_neigh6_entry_dip(struct mlxsw_sp_neigh_entry *neigh_entry)
1748 {
1749 	struct neighbour *n;
1750 
1751 	n = neigh_entry->key.n;
1752 	return (struct in6_addr *) &n->primary_key;
1753 }
1754 
1755 int mlxsw_sp_neigh_counter_get(struct mlxsw_sp *mlxsw_sp,
1756 			       struct mlxsw_sp_neigh_entry *neigh_entry,
1757 			       u64 *p_counter)
1758 {
1759 	if (!neigh_entry->counter_valid)
1760 		return -EINVAL;
1761 
1762 	return mlxsw_sp_flow_counter_get(mlxsw_sp, neigh_entry->counter_index,
1763 					 p_counter, NULL);
1764 }
1765 
1766 static struct mlxsw_sp_neigh_entry *
1767 mlxsw_sp_neigh_entry_alloc(struct mlxsw_sp *mlxsw_sp, struct neighbour *n,
1768 			   u16 rif)
1769 {
1770 	struct mlxsw_sp_neigh_entry *neigh_entry;
1771 
1772 	neigh_entry = kzalloc(sizeof(*neigh_entry), GFP_KERNEL);
1773 	if (!neigh_entry)
1774 		return NULL;
1775 
1776 	neigh_entry->key.n = n;
1777 	neigh_entry->rif = rif;
1778 	INIT_LIST_HEAD(&neigh_entry->nexthop_list);
1779 
1780 	return neigh_entry;
1781 }
1782 
1783 static void mlxsw_sp_neigh_entry_free(struct mlxsw_sp_neigh_entry *neigh_entry)
1784 {
1785 	kfree(neigh_entry);
1786 }
1787 
1788 static int
1789 mlxsw_sp_neigh_entry_insert(struct mlxsw_sp *mlxsw_sp,
1790 			    struct mlxsw_sp_neigh_entry *neigh_entry)
1791 {
1792 	return rhashtable_insert_fast(&mlxsw_sp->router->neigh_ht,
1793 				      &neigh_entry->ht_node,
1794 				      mlxsw_sp_neigh_ht_params);
1795 }
1796 
1797 static void
1798 mlxsw_sp_neigh_entry_remove(struct mlxsw_sp *mlxsw_sp,
1799 			    struct mlxsw_sp_neigh_entry *neigh_entry)
1800 {
1801 	rhashtable_remove_fast(&mlxsw_sp->router->neigh_ht,
1802 			       &neigh_entry->ht_node,
1803 			       mlxsw_sp_neigh_ht_params);
1804 }
1805 
1806 static bool
1807 mlxsw_sp_neigh_counter_should_alloc(struct mlxsw_sp *mlxsw_sp,
1808 				    struct mlxsw_sp_neigh_entry *neigh_entry)
1809 {
1810 	struct devlink *devlink;
1811 	const char *table_name;
1812 
1813 	switch (mlxsw_sp_neigh_entry_type(neigh_entry)) {
1814 	case AF_INET:
1815 		table_name = MLXSW_SP_DPIPE_TABLE_NAME_HOST4;
1816 		break;
1817 	case AF_INET6:
1818 		table_name = MLXSW_SP_DPIPE_TABLE_NAME_HOST6;
1819 		break;
1820 	default:
1821 		WARN_ON(1);
1822 		return false;
1823 	}
1824 
1825 	devlink = priv_to_devlink(mlxsw_sp->core);
1826 	return devlink_dpipe_table_counter_enabled(devlink, table_name);
1827 }
1828 
1829 static void
1830 mlxsw_sp_neigh_counter_alloc(struct mlxsw_sp *mlxsw_sp,
1831 			     struct mlxsw_sp_neigh_entry *neigh_entry)
1832 {
1833 	if (!mlxsw_sp_neigh_counter_should_alloc(mlxsw_sp, neigh_entry))
1834 		return;
1835 
1836 	if (mlxsw_sp_flow_counter_alloc(mlxsw_sp, &neigh_entry->counter_index))
1837 		return;
1838 
1839 	neigh_entry->counter_valid = true;
1840 }
1841 
1842 static void
1843 mlxsw_sp_neigh_counter_free(struct mlxsw_sp *mlxsw_sp,
1844 			    struct mlxsw_sp_neigh_entry *neigh_entry)
1845 {
1846 	if (!neigh_entry->counter_valid)
1847 		return;
1848 	mlxsw_sp_flow_counter_free(mlxsw_sp,
1849 				   neigh_entry->counter_index);
1850 	neigh_entry->counter_valid = false;
1851 }
1852 
1853 static struct mlxsw_sp_neigh_entry *
1854 mlxsw_sp_neigh_entry_create(struct mlxsw_sp *mlxsw_sp, struct neighbour *n)
1855 {
1856 	struct mlxsw_sp_neigh_entry *neigh_entry;
1857 	struct mlxsw_sp_rif *rif;
1858 	int err;
1859 
1860 	rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, n->dev);
1861 	if (!rif)
1862 		return ERR_PTR(-EINVAL);
1863 
1864 	neigh_entry = mlxsw_sp_neigh_entry_alloc(mlxsw_sp, n, rif->rif_index);
1865 	if (!neigh_entry)
1866 		return ERR_PTR(-ENOMEM);
1867 
1868 	err = mlxsw_sp_neigh_entry_insert(mlxsw_sp, neigh_entry);
1869 	if (err)
1870 		goto err_neigh_entry_insert;
1871 
1872 	mlxsw_sp_neigh_counter_alloc(mlxsw_sp, neigh_entry);
1873 	list_add(&neigh_entry->rif_list_node, &rif->neigh_list);
1874 
1875 	return neigh_entry;
1876 
1877 err_neigh_entry_insert:
1878 	mlxsw_sp_neigh_entry_free(neigh_entry);
1879 	return ERR_PTR(err);
1880 }
1881 
1882 static void
1883 mlxsw_sp_neigh_entry_destroy(struct mlxsw_sp *mlxsw_sp,
1884 			     struct mlxsw_sp_neigh_entry *neigh_entry)
1885 {
1886 	list_del(&neigh_entry->rif_list_node);
1887 	mlxsw_sp_neigh_counter_free(mlxsw_sp, neigh_entry);
1888 	mlxsw_sp_neigh_entry_remove(mlxsw_sp, neigh_entry);
1889 	mlxsw_sp_neigh_entry_free(neigh_entry);
1890 }
1891 
1892 static struct mlxsw_sp_neigh_entry *
1893 mlxsw_sp_neigh_entry_lookup(struct mlxsw_sp *mlxsw_sp, struct neighbour *n)
1894 {
1895 	struct mlxsw_sp_neigh_key key;
1896 
1897 	key.n = n;
1898 	return rhashtable_lookup_fast(&mlxsw_sp->router->neigh_ht,
1899 				      &key, mlxsw_sp_neigh_ht_params);
1900 }
1901 
1902 static void
1903 mlxsw_sp_router_neighs_update_interval_init(struct mlxsw_sp *mlxsw_sp)
1904 {
1905 	unsigned long interval;
1906 
1907 #if IS_ENABLED(CONFIG_IPV6)
1908 	interval = min_t(unsigned long,
1909 			 NEIGH_VAR(&arp_tbl.parms, DELAY_PROBE_TIME),
1910 			 NEIGH_VAR(&nd_tbl.parms, DELAY_PROBE_TIME));
1911 #else
1912 	interval = NEIGH_VAR(&arp_tbl.parms, DELAY_PROBE_TIME);
1913 #endif
1914 	mlxsw_sp->router->neighs_update.interval = jiffies_to_msecs(interval);
1915 }
1916 
1917 static void mlxsw_sp_router_neigh_ent_ipv4_process(struct mlxsw_sp *mlxsw_sp,
1918 						   char *rauhtd_pl,
1919 						   int ent_index)
1920 {
1921 	struct net_device *dev;
1922 	struct neighbour *n;
1923 	__be32 dipn;
1924 	u32 dip;
1925 	u16 rif;
1926 
1927 	mlxsw_reg_rauhtd_ent_ipv4_unpack(rauhtd_pl, ent_index, &rif, &dip);
1928 
1929 	if (!mlxsw_sp->router->rifs[rif]) {
1930 		dev_err_ratelimited(mlxsw_sp->bus_info->dev, "Incorrect RIF in neighbour entry\n");
1931 		return;
1932 	}
1933 
1934 	dipn = htonl(dip);
1935 	dev = mlxsw_sp->router->rifs[rif]->dev;
1936 	n = neigh_lookup(&arp_tbl, &dipn, dev);
1937 	if (!n) {
1938 		netdev_err(dev, "Failed to find matching neighbour for IP=%pI4h\n",
1939 			   &dip);
1940 		return;
1941 	}
1942 
1943 	netdev_dbg(dev, "Updating neighbour with IP=%pI4h\n", &dip);
1944 	neigh_event_send(n, NULL);
1945 	neigh_release(n);
1946 }
1947 
1948 #if IS_ENABLED(CONFIG_IPV6)
1949 static void mlxsw_sp_router_neigh_ent_ipv6_process(struct mlxsw_sp *mlxsw_sp,
1950 						   char *rauhtd_pl,
1951 						   int rec_index)
1952 {
1953 	struct net_device *dev;
1954 	struct neighbour *n;
1955 	struct in6_addr dip;
1956 	u16 rif;
1957 
1958 	mlxsw_reg_rauhtd_ent_ipv6_unpack(rauhtd_pl, rec_index, &rif,
1959 					 (char *) &dip);
1960 
1961 	if (!mlxsw_sp->router->rifs[rif]) {
1962 		dev_err_ratelimited(mlxsw_sp->bus_info->dev, "Incorrect RIF in neighbour entry\n");
1963 		return;
1964 	}
1965 
1966 	dev = mlxsw_sp->router->rifs[rif]->dev;
1967 	n = neigh_lookup(&nd_tbl, &dip, dev);
1968 	if (!n) {
1969 		netdev_err(dev, "Failed to find matching neighbour for IP=%pI6c\n",
1970 			   &dip);
1971 		return;
1972 	}
1973 
1974 	netdev_dbg(dev, "Updating neighbour with IP=%pI6c\n", &dip);
1975 	neigh_event_send(n, NULL);
1976 	neigh_release(n);
1977 }
1978 #else
1979 static void mlxsw_sp_router_neigh_ent_ipv6_process(struct mlxsw_sp *mlxsw_sp,
1980 						   char *rauhtd_pl,
1981 						   int rec_index)
1982 {
1983 }
1984 #endif
1985 
1986 static void mlxsw_sp_router_neigh_rec_ipv4_process(struct mlxsw_sp *mlxsw_sp,
1987 						   char *rauhtd_pl,
1988 						   int rec_index)
1989 {
1990 	u8 num_entries;
1991 	int i;
1992 
1993 	num_entries = mlxsw_reg_rauhtd_ipv4_rec_num_entries_get(rauhtd_pl,
1994 								rec_index);
1995 	/* Hardware starts counting at 0, so add 1. */
1996 	num_entries++;
1997 
1998 	/* Each record consists of several neighbour entries. */
1999 	for (i = 0; i < num_entries; i++) {
2000 		int ent_index;
2001 
2002 		ent_index = rec_index * MLXSW_REG_RAUHTD_IPV4_ENT_PER_REC + i;
2003 		mlxsw_sp_router_neigh_ent_ipv4_process(mlxsw_sp, rauhtd_pl,
2004 						       ent_index);
2005 	}
2006 
2007 }
2008 
2009 static void mlxsw_sp_router_neigh_rec_ipv6_process(struct mlxsw_sp *mlxsw_sp,
2010 						   char *rauhtd_pl,
2011 						   int rec_index)
2012 {
2013 	/* One record contains one entry. */
2014 	mlxsw_sp_router_neigh_ent_ipv6_process(mlxsw_sp, rauhtd_pl,
2015 					       rec_index);
2016 }
2017 
2018 static void mlxsw_sp_router_neigh_rec_process(struct mlxsw_sp *mlxsw_sp,
2019 					      char *rauhtd_pl, int rec_index)
2020 {
2021 	switch (mlxsw_reg_rauhtd_rec_type_get(rauhtd_pl, rec_index)) {
2022 	case MLXSW_REG_RAUHTD_TYPE_IPV4:
2023 		mlxsw_sp_router_neigh_rec_ipv4_process(mlxsw_sp, rauhtd_pl,
2024 						       rec_index);
2025 		break;
2026 	case MLXSW_REG_RAUHTD_TYPE_IPV6:
2027 		mlxsw_sp_router_neigh_rec_ipv6_process(mlxsw_sp, rauhtd_pl,
2028 						       rec_index);
2029 		break;
2030 	}
2031 }
2032 
2033 static bool mlxsw_sp_router_rauhtd_is_full(char *rauhtd_pl)
2034 {
2035 	u8 num_rec, last_rec_index, num_entries;
2036 
2037 	num_rec = mlxsw_reg_rauhtd_num_rec_get(rauhtd_pl);
2038 	last_rec_index = num_rec - 1;
2039 
2040 	if (num_rec < MLXSW_REG_RAUHTD_REC_MAX_NUM)
2041 		return false;
2042 	if (mlxsw_reg_rauhtd_rec_type_get(rauhtd_pl, last_rec_index) ==
2043 	    MLXSW_REG_RAUHTD_TYPE_IPV6)
2044 		return true;
2045 
2046 	num_entries = mlxsw_reg_rauhtd_ipv4_rec_num_entries_get(rauhtd_pl,
2047 								last_rec_index);
2048 	if (++num_entries == MLXSW_REG_RAUHTD_IPV4_ENT_PER_REC)
2049 		return true;
2050 	return false;
2051 }
2052 
2053 static int
2054 __mlxsw_sp_router_neighs_update_rauhtd(struct mlxsw_sp *mlxsw_sp,
2055 				       char *rauhtd_pl,
2056 				       enum mlxsw_reg_rauhtd_type type)
2057 {
2058 	int i, num_rec;
2059 	int err;
2060 
2061 	/* Make sure the neighbour's netdev isn't removed in the
2062 	 * process.
2063 	 */
2064 	rtnl_lock();
2065 	do {
2066 		mlxsw_reg_rauhtd_pack(rauhtd_pl, type);
2067 		err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(rauhtd),
2068 				      rauhtd_pl);
2069 		if (err) {
2070 			dev_err_ratelimited(mlxsw_sp->bus_info->dev, "Failed to dump neighbour table\n");
2071 			break;
2072 		}
2073 		num_rec = mlxsw_reg_rauhtd_num_rec_get(rauhtd_pl);
2074 		for (i = 0; i < num_rec; i++)
2075 			mlxsw_sp_router_neigh_rec_process(mlxsw_sp, rauhtd_pl,
2076 							  i);
2077 	} while (mlxsw_sp_router_rauhtd_is_full(rauhtd_pl));
2078 	rtnl_unlock();
2079 
2080 	return err;
2081 }
2082 
2083 static int mlxsw_sp_router_neighs_update_rauhtd(struct mlxsw_sp *mlxsw_sp)
2084 {
2085 	enum mlxsw_reg_rauhtd_type type;
2086 	char *rauhtd_pl;
2087 	int err;
2088 
2089 	rauhtd_pl = kmalloc(MLXSW_REG_RAUHTD_LEN, GFP_KERNEL);
2090 	if (!rauhtd_pl)
2091 		return -ENOMEM;
2092 
2093 	type = MLXSW_REG_RAUHTD_TYPE_IPV4;
2094 	err = __mlxsw_sp_router_neighs_update_rauhtd(mlxsw_sp, rauhtd_pl, type);
2095 	if (err)
2096 		goto out;
2097 
2098 	type = MLXSW_REG_RAUHTD_TYPE_IPV6;
2099 	err = __mlxsw_sp_router_neighs_update_rauhtd(mlxsw_sp, rauhtd_pl, type);
2100 out:
2101 	kfree(rauhtd_pl);
2102 	return err;
2103 }
2104 
2105 static void mlxsw_sp_router_neighs_update_nh(struct mlxsw_sp *mlxsw_sp)
2106 {
2107 	struct mlxsw_sp_neigh_entry *neigh_entry;
2108 
2109 	/* Take RTNL mutex here to prevent lists from changes */
2110 	rtnl_lock();
2111 	list_for_each_entry(neigh_entry, &mlxsw_sp->router->nexthop_neighs_list,
2112 			    nexthop_neighs_list_node)
2113 		/* If this neigh have nexthops, make the kernel think this neigh
2114 		 * is active regardless of the traffic.
2115 		 */
2116 		neigh_event_send(neigh_entry->key.n, NULL);
2117 	rtnl_unlock();
2118 }
2119 
2120 static void
2121 mlxsw_sp_router_neighs_update_work_schedule(struct mlxsw_sp *mlxsw_sp)
2122 {
2123 	unsigned long interval = mlxsw_sp->router->neighs_update.interval;
2124 
2125 	mlxsw_core_schedule_dw(&mlxsw_sp->router->neighs_update.dw,
2126 			       msecs_to_jiffies(interval));
2127 }
2128 
2129 static void mlxsw_sp_router_neighs_update_work(struct work_struct *work)
2130 {
2131 	struct mlxsw_sp_router *router;
2132 	int err;
2133 
2134 	router = container_of(work, struct mlxsw_sp_router,
2135 			      neighs_update.dw.work);
2136 	err = mlxsw_sp_router_neighs_update_rauhtd(router->mlxsw_sp);
2137 	if (err)
2138 		dev_err(router->mlxsw_sp->bus_info->dev, "Could not update kernel for neigh activity");
2139 
2140 	mlxsw_sp_router_neighs_update_nh(router->mlxsw_sp);
2141 
2142 	mlxsw_sp_router_neighs_update_work_schedule(router->mlxsw_sp);
2143 }
2144 
2145 static void mlxsw_sp_router_probe_unresolved_nexthops(struct work_struct *work)
2146 {
2147 	struct mlxsw_sp_neigh_entry *neigh_entry;
2148 	struct mlxsw_sp_router *router;
2149 
2150 	router = container_of(work, struct mlxsw_sp_router,
2151 			      nexthop_probe_dw.work);
2152 	/* Iterate over nexthop neighbours, find those who are unresolved and
2153 	 * send arp on them. This solves the chicken-egg problem when
2154 	 * the nexthop wouldn't get offloaded until the neighbor is resolved
2155 	 * but it wouldn't get resolved ever in case traffic is flowing in HW
2156 	 * using different nexthop.
2157 	 *
2158 	 * Take RTNL mutex here to prevent lists from changes.
2159 	 */
2160 	rtnl_lock();
2161 	list_for_each_entry(neigh_entry, &router->nexthop_neighs_list,
2162 			    nexthop_neighs_list_node)
2163 		if (!neigh_entry->connected)
2164 			neigh_event_send(neigh_entry->key.n, NULL);
2165 	rtnl_unlock();
2166 
2167 	mlxsw_core_schedule_dw(&router->nexthop_probe_dw,
2168 			       MLXSW_SP_UNRESOLVED_NH_PROBE_INTERVAL);
2169 }
2170 
2171 static void
2172 mlxsw_sp_nexthop_neigh_update(struct mlxsw_sp *mlxsw_sp,
2173 			      struct mlxsw_sp_neigh_entry *neigh_entry,
2174 			      bool removing);
2175 
2176 static enum mlxsw_reg_rauht_op mlxsw_sp_rauht_op(bool adding)
2177 {
2178 	return adding ? MLXSW_REG_RAUHT_OP_WRITE_ADD :
2179 			MLXSW_REG_RAUHT_OP_WRITE_DELETE;
2180 }
2181 
2182 static void
2183 mlxsw_sp_router_neigh_entry_op4(struct mlxsw_sp *mlxsw_sp,
2184 				struct mlxsw_sp_neigh_entry *neigh_entry,
2185 				enum mlxsw_reg_rauht_op op)
2186 {
2187 	struct neighbour *n = neigh_entry->key.n;
2188 	u32 dip = ntohl(*((__be32 *) n->primary_key));
2189 	char rauht_pl[MLXSW_REG_RAUHT_LEN];
2190 
2191 	mlxsw_reg_rauht_pack4(rauht_pl, op, neigh_entry->rif, neigh_entry->ha,
2192 			      dip);
2193 	if (neigh_entry->counter_valid)
2194 		mlxsw_reg_rauht_pack_counter(rauht_pl,
2195 					     neigh_entry->counter_index);
2196 	mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rauht), rauht_pl);
2197 }
2198 
2199 static void
2200 mlxsw_sp_router_neigh_entry_op6(struct mlxsw_sp *mlxsw_sp,
2201 				struct mlxsw_sp_neigh_entry *neigh_entry,
2202 				enum mlxsw_reg_rauht_op op)
2203 {
2204 	struct neighbour *n = neigh_entry->key.n;
2205 	char rauht_pl[MLXSW_REG_RAUHT_LEN];
2206 	const char *dip = n->primary_key;
2207 
2208 	mlxsw_reg_rauht_pack6(rauht_pl, op, neigh_entry->rif, neigh_entry->ha,
2209 			      dip);
2210 	if (neigh_entry->counter_valid)
2211 		mlxsw_reg_rauht_pack_counter(rauht_pl,
2212 					     neigh_entry->counter_index);
2213 	mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rauht), rauht_pl);
2214 }
2215 
2216 bool mlxsw_sp_neigh_ipv6_ignore(struct mlxsw_sp_neigh_entry *neigh_entry)
2217 {
2218 	struct neighbour *n = neigh_entry->key.n;
2219 
2220 	/* Packets with a link-local destination address are trapped
2221 	 * after LPM lookup and never reach the neighbour table, so
2222 	 * there is no need to program such neighbours to the device.
2223 	 */
2224 	if (ipv6_addr_type((struct in6_addr *) &n->primary_key) &
2225 	    IPV6_ADDR_LINKLOCAL)
2226 		return true;
2227 	return false;
2228 }
2229 
2230 static void
2231 mlxsw_sp_neigh_entry_update(struct mlxsw_sp *mlxsw_sp,
2232 			    struct mlxsw_sp_neigh_entry *neigh_entry,
2233 			    bool adding)
2234 {
2235 	if (!adding && !neigh_entry->connected)
2236 		return;
2237 	neigh_entry->connected = adding;
2238 	if (neigh_entry->key.n->tbl->family == AF_INET) {
2239 		mlxsw_sp_router_neigh_entry_op4(mlxsw_sp, neigh_entry,
2240 						mlxsw_sp_rauht_op(adding));
2241 	} else if (neigh_entry->key.n->tbl->family == AF_INET6) {
2242 		if (mlxsw_sp_neigh_ipv6_ignore(neigh_entry))
2243 			return;
2244 		mlxsw_sp_router_neigh_entry_op6(mlxsw_sp, neigh_entry,
2245 						mlxsw_sp_rauht_op(adding));
2246 	} else {
2247 		WARN_ON_ONCE(1);
2248 	}
2249 }
2250 
2251 void
2252 mlxsw_sp_neigh_entry_counter_update(struct mlxsw_sp *mlxsw_sp,
2253 				    struct mlxsw_sp_neigh_entry *neigh_entry,
2254 				    bool adding)
2255 {
2256 	if (adding)
2257 		mlxsw_sp_neigh_counter_alloc(mlxsw_sp, neigh_entry);
2258 	else
2259 		mlxsw_sp_neigh_counter_free(mlxsw_sp, neigh_entry);
2260 	mlxsw_sp_neigh_entry_update(mlxsw_sp, neigh_entry, true);
2261 }
2262 
2263 struct mlxsw_sp_netevent_work {
2264 	struct work_struct work;
2265 	struct mlxsw_sp *mlxsw_sp;
2266 	struct neighbour *n;
2267 };
2268 
2269 static void mlxsw_sp_router_neigh_event_work(struct work_struct *work)
2270 {
2271 	struct mlxsw_sp_netevent_work *net_work =
2272 		container_of(work, struct mlxsw_sp_netevent_work, work);
2273 	struct mlxsw_sp *mlxsw_sp = net_work->mlxsw_sp;
2274 	struct mlxsw_sp_neigh_entry *neigh_entry;
2275 	struct neighbour *n = net_work->n;
2276 	unsigned char ha[ETH_ALEN];
2277 	bool entry_connected;
2278 	u8 nud_state, dead;
2279 
2280 	/* If these parameters are changed after we release the lock,
2281 	 * then we are guaranteed to receive another event letting us
2282 	 * know about it.
2283 	 */
2284 	read_lock_bh(&n->lock);
2285 	memcpy(ha, n->ha, ETH_ALEN);
2286 	nud_state = n->nud_state;
2287 	dead = n->dead;
2288 	read_unlock_bh(&n->lock);
2289 
2290 	rtnl_lock();
2291 	entry_connected = nud_state & NUD_VALID && !dead;
2292 	neigh_entry = mlxsw_sp_neigh_entry_lookup(mlxsw_sp, n);
2293 	if (!entry_connected && !neigh_entry)
2294 		goto out;
2295 	if (!neigh_entry) {
2296 		neigh_entry = mlxsw_sp_neigh_entry_create(mlxsw_sp, n);
2297 		if (IS_ERR(neigh_entry))
2298 			goto out;
2299 	}
2300 
2301 	memcpy(neigh_entry->ha, ha, ETH_ALEN);
2302 	mlxsw_sp_neigh_entry_update(mlxsw_sp, neigh_entry, entry_connected);
2303 	mlxsw_sp_nexthop_neigh_update(mlxsw_sp, neigh_entry, !entry_connected);
2304 
2305 	if (!neigh_entry->connected && list_empty(&neigh_entry->nexthop_list))
2306 		mlxsw_sp_neigh_entry_destroy(mlxsw_sp, neigh_entry);
2307 
2308 out:
2309 	rtnl_unlock();
2310 	neigh_release(n);
2311 	kfree(net_work);
2312 }
2313 
2314 static int mlxsw_sp_mp_hash_init(struct mlxsw_sp *mlxsw_sp);
2315 
2316 static void mlxsw_sp_router_mp_hash_event_work(struct work_struct *work)
2317 {
2318 	struct mlxsw_sp_netevent_work *net_work =
2319 		container_of(work, struct mlxsw_sp_netevent_work, work);
2320 	struct mlxsw_sp *mlxsw_sp = net_work->mlxsw_sp;
2321 
2322 	mlxsw_sp_mp_hash_init(mlxsw_sp);
2323 	kfree(net_work);
2324 }
2325 
2326 static int mlxsw_sp_router_netevent_event(struct notifier_block *nb,
2327 					  unsigned long event, void *ptr)
2328 {
2329 	struct mlxsw_sp_netevent_work *net_work;
2330 	struct mlxsw_sp_port *mlxsw_sp_port;
2331 	struct mlxsw_sp_router *router;
2332 	struct mlxsw_sp *mlxsw_sp;
2333 	unsigned long interval;
2334 	struct neigh_parms *p;
2335 	struct neighbour *n;
2336 	struct net *net;
2337 
2338 	switch (event) {
2339 	case NETEVENT_DELAY_PROBE_TIME_UPDATE:
2340 		p = ptr;
2341 
2342 		/* We don't care about changes in the default table. */
2343 		if (!p->dev || (p->tbl->family != AF_INET &&
2344 				p->tbl->family != AF_INET6))
2345 			return NOTIFY_DONE;
2346 
2347 		/* We are in atomic context and can't take RTNL mutex,
2348 		 * so use RCU variant to walk the device chain.
2349 		 */
2350 		mlxsw_sp_port = mlxsw_sp_port_lower_dev_hold(p->dev);
2351 		if (!mlxsw_sp_port)
2352 			return NOTIFY_DONE;
2353 
2354 		mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
2355 		interval = jiffies_to_msecs(NEIGH_VAR(p, DELAY_PROBE_TIME));
2356 		mlxsw_sp->router->neighs_update.interval = interval;
2357 
2358 		mlxsw_sp_port_dev_put(mlxsw_sp_port);
2359 		break;
2360 	case NETEVENT_NEIGH_UPDATE:
2361 		n = ptr;
2362 
2363 		if (n->tbl->family != AF_INET && n->tbl->family != AF_INET6)
2364 			return NOTIFY_DONE;
2365 
2366 		mlxsw_sp_port = mlxsw_sp_port_lower_dev_hold(n->dev);
2367 		if (!mlxsw_sp_port)
2368 			return NOTIFY_DONE;
2369 
2370 		net_work = kzalloc(sizeof(*net_work), GFP_ATOMIC);
2371 		if (!net_work) {
2372 			mlxsw_sp_port_dev_put(mlxsw_sp_port);
2373 			return NOTIFY_BAD;
2374 		}
2375 
2376 		INIT_WORK(&net_work->work, mlxsw_sp_router_neigh_event_work);
2377 		net_work->mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
2378 		net_work->n = n;
2379 
2380 		/* Take a reference to ensure the neighbour won't be
2381 		 * destructed until we drop the reference in delayed
2382 		 * work.
2383 		 */
2384 		neigh_clone(n);
2385 		mlxsw_core_schedule_work(&net_work->work);
2386 		mlxsw_sp_port_dev_put(mlxsw_sp_port);
2387 		break;
2388 	case NETEVENT_MULTIPATH_HASH_UPDATE:
2389 		net = ptr;
2390 
2391 		if (!net_eq(net, &init_net))
2392 			return NOTIFY_DONE;
2393 
2394 		net_work = kzalloc(sizeof(*net_work), GFP_ATOMIC);
2395 		if (!net_work)
2396 			return NOTIFY_BAD;
2397 
2398 		router = container_of(nb, struct mlxsw_sp_router, netevent_nb);
2399 		INIT_WORK(&net_work->work, mlxsw_sp_router_mp_hash_event_work);
2400 		net_work->mlxsw_sp = router->mlxsw_sp;
2401 		mlxsw_core_schedule_work(&net_work->work);
2402 		break;
2403 	}
2404 
2405 	return NOTIFY_DONE;
2406 }
2407 
2408 static int mlxsw_sp_neigh_init(struct mlxsw_sp *mlxsw_sp)
2409 {
2410 	int err;
2411 
2412 	err = rhashtable_init(&mlxsw_sp->router->neigh_ht,
2413 			      &mlxsw_sp_neigh_ht_params);
2414 	if (err)
2415 		return err;
2416 
2417 	/* Initialize the polling interval according to the default
2418 	 * table.
2419 	 */
2420 	mlxsw_sp_router_neighs_update_interval_init(mlxsw_sp);
2421 
2422 	/* Create the delayed works for the activity_update */
2423 	INIT_DELAYED_WORK(&mlxsw_sp->router->neighs_update.dw,
2424 			  mlxsw_sp_router_neighs_update_work);
2425 	INIT_DELAYED_WORK(&mlxsw_sp->router->nexthop_probe_dw,
2426 			  mlxsw_sp_router_probe_unresolved_nexthops);
2427 	mlxsw_core_schedule_dw(&mlxsw_sp->router->neighs_update.dw, 0);
2428 	mlxsw_core_schedule_dw(&mlxsw_sp->router->nexthop_probe_dw, 0);
2429 	return 0;
2430 }
2431 
2432 static void mlxsw_sp_neigh_fini(struct mlxsw_sp *mlxsw_sp)
2433 {
2434 	cancel_delayed_work_sync(&mlxsw_sp->router->neighs_update.dw);
2435 	cancel_delayed_work_sync(&mlxsw_sp->router->nexthop_probe_dw);
2436 	rhashtable_destroy(&mlxsw_sp->router->neigh_ht);
2437 }
2438 
2439 static void mlxsw_sp_neigh_rif_gone_sync(struct mlxsw_sp *mlxsw_sp,
2440 					 struct mlxsw_sp_rif *rif)
2441 {
2442 	struct mlxsw_sp_neigh_entry *neigh_entry, *tmp;
2443 
2444 	list_for_each_entry_safe(neigh_entry, tmp, &rif->neigh_list,
2445 				 rif_list_node) {
2446 		mlxsw_sp_neigh_entry_update(mlxsw_sp, neigh_entry, false);
2447 		mlxsw_sp_neigh_entry_destroy(mlxsw_sp, neigh_entry);
2448 	}
2449 }
2450 
2451 enum mlxsw_sp_nexthop_type {
2452 	MLXSW_SP_NEXTHOP_TYPE_ETH,
2453 	MLXSW_SP_NEXTHOP_TYPE_IPIP,
2454 };
2455 
2456 struct mlxsw_sp_nexthop_key {
2457 	struct fib_nh *fib_nh;
2458 };
2459 
2460 struct mlxsw_sp_nexthop {
2461 	struct list_head neigh_list_node; /* member of neigh entry list */
2462 	struct list_head rif_list_node;
2463 	struct list_head router_list_node;
2464 	struct mlxsw_sp_nexthop_group *nh_grp; /* pointer back to the group
2465 						* this belongs to
2466 						*/
2467 	struct rhash_head ht_node;
2468 	struct mlxsw_sp_nexthop_key key;
2469 	unsigned char gw_addr[sizeof(struct in6_addr)];
2470 	int ifindex;
2471 	int nh_weight;
2472 	int norm_nh_weight;
2473 	int num_adj_entries;
2474 	struct mlxsw_sp_rif *rif;
2475 	u8 should_offload:1, /* set indicates this neigh is connected and
2476 			      * should be put to KVD linear area of this group.
2477 			      */
2478 	   offloaded:1, /* set in case the neigh is actually put into
2479 			 * KVD linear area of this group.
2480 			 */
2481 	   update:1; /* set indicates that MAC of this neigh should be
2482 		      * updated in HW
2483 		      */
2484 	enum mlxsw_sp_nexthop_type type;
2485 	union {
2486 		struct mlxsw_sp_neigh_entry *neigh_entry;
2487 		struct mlxsw_sp_ipip_entry *ipip_entry;
2488 	};
2489 	unsigned int counter_index;
2490 	bool counter_valid;
2491 };
2492 
2493 struct mlxsw_sp_nexthop_group {
2494 	void *priv;
2495 	struct rhash_head ht_node;
2496 	struct list_head fib_list; /* list of fib entries that use this group */
2497 	struct neigh_table *neigh_tbl;
2498 	u8 adj_index_valid:1,
2499 	   gateway:1; /* routes using the group use a gateway */
2500 	u32 adj_index;
2501 	u16 ecmp_size;
2502 	u16 count;
2503 	int sum_norm_weight;
2504 	struct mlxsw_sp_nexthop nexthops[0];
2505 #define nh_rif	nexthops[0].rif
2506 };
2507 
2508 void mlxsw_sp_nexthop_counter_alloc(struct mlxsw_sp *mlxsw_sp,
2509 				    struct mlxsw_sp_nexthop *nh)
2510 {
2511 	struct devlink *devlink;
2512 
2513 	devlink = priv_to_devlink(mlxsw_sp->core);
2514 	if (!devlink_dpipe_table_counter_enabled(devlink,
2515 						 MLXSW_SP_DPIPE_TABLE_NAME_ADJ))
2516 		return;
2517 
2518 	if (mlxsw_sp_flow_counter_alloc(mlxsw_sp, &nh->counter_index))
2519 		return;
2520 
2521 	nh->counter_valid = true;
2522 }
2523 
2524 void mlxsw_sp_nexthop_counter_free(struct mlxsw_sp *mlxsw_sp,
2525 				   struct mlxsw_sp_nexthop *nh)
2526 {
2527 	if (!nh->counter_valid)
2528 		return;
2529 	mlxsw_sp_flow_counter_free(mlxsw_sp, nh->counter_index);
2530 	nh->counter_valid = false;
2531 }
2532 
2533 int mlxsw_sp_nexthop_counter_get(struct mlxsw_sp *mlxsw_sp,
2534 				 struct mlxsw_sp_nexthop *nh, u64 *p_counter)
2535 {
2536 	if (!nh->counter_valid)
2537 		return -EINVAL;
2538 
2539 	return mlxsw_sp_flow_counter_get(mlxsw_sp, nh->counter_index,
2540 					 p_counter, NULL);
2541 }
2542 
2543 struct mlxsw_sp_nexthop *mlxsw_sp_nexthop_next(struct mlxsw_sp_router *router,
2544 					       struct mlxsw_sp_nexthop *nh)
2545 {
2546 	if (!nh) {
2547 		if (list_empty(&router->nexthop_list))
2548 			return NULL;
2549 		else
2550 			return list_first_entry(&router->nexthop_list,
2551 						typeof(*nh), router_list_node);
2552 	}
2553 	if (list_is_last(&nh->router_list_node, &router->nexthop_list))
2554 		return NULL;
2555 	return list_next_entry(nh, router_list_node);
2556 }
2557 
2558 bool mlxsw_sp_nexthop_offload(struct mlxsw_sp_nexthop *nh)
2559 {
2560 	return nh->offloaded;
2561 }
2562 
2563 unsigned char *mlxsw_sp_nexthop_ha(struct mlxsw_sp_nexthop *nh)
2564 {
2565 	if (!nh->offloaded)
2566 		return NULL;
2567 	return nh->neigh_entry->ha;
2568 }
2569 
2570 int mlxsw_sp_nexthop_indexes(struct mlxsw_sp_nexthop *nh, u32 *p_adj_index,
2571 			     u32 *p_adj_size, u32 *p_adj_hash_index)
2572 {
2573 	struct mlxsw_sp_nexthop_group *nh_grp = nh->nh_grp;
2574 	u32 adj_hash_index = 0;
2575 	int i;
2576 
2577 	if (!nh->offloaded || !nh_grp->adj_index_valid)
2578 		return -EINVAL;
2579 
2580 	*p_adj_index = nh_grp->adj_index;
2581 	*p_adj_size = nh_grp->ecmp_size;
2582 
2583 	for (i = 0; i < nh_grp->count; i++) {
2584 		struct mlxsw_sp_nexthop *nh_iter = &nh_grp->nexthops[i];
2585 
2586 		if (nh_iter == nh)
2587 			break;
2588 		if (nh_iter->offloaded)
2589 			adj_hash_index += nh_iter->num_adj_entries;
2590 	}
2591 
2592 	*p_adj_hash_index = adj_hash_index;
2593 	return 0;
2594 }
2595 
2596 struct mlxsw_sp_rif *mlxsw_sp_nexthop_rif(struct mlxsw_sp_nexthop *nh)
2597 {
2598 	return nh->rif;
2599 }
2600 
2601 bool mlxsw_sp_nexthop_group_has_ipip(struct mlxsw_sp_nexthop *nh)
2602 {
2603 	struct mlxsw_sp_nexthop_group *nh_grp = nh->nh_grp;
2604 	int i;
2605 
2606 	for (i = 0; i < nh_grp->count; i++) {
2607 		struct mlxsw_sp_nexthop *nh_iter = &nh_grp->nexthops[i];
2608 
2609 		if (nh_iter->type == MLXSW_SP_NEXTHOP_TYPE_IPIP)
2610 			return true;
2611 	}
2612 	return false;
2613 }
2614 
2615 static struct fib_info *
2616 mlxsw_sp_nexthop4_group_fi(const struct mlxsw_sp_nexthop_group *nh_grp)
2617 {
2618 	return nh_grp->priv;
2619 }
2620 
2621 struct mlxsw_sp_nexthop_group_cmp_arg {
2622 	enum mlxsw_sp_l3proto proto;
2623 	union {
2624 		struct fib_info *fi;
2625 		struct mlxsw_sp_fib6_entry *fib6_entry;
2626 	};
2627 };
2628 
2629 static bool
2630 mlxsw_sp_nexthop6_group_has_nexthop(const struct mlxsw_sp_nexthop_group *nh_grp,
2631 				    const struct in6_addr *gw, int ifindex)
2632 {
2633 	int i;
2634 
2635 	for (i = 0; i < nh_grp->count; i++) {
2636 		const struct mlxsw_sp_nexthop *nh;
2637 
2638 		nh = &nh_grp->nexthops[i];
2639 		if (nh->ifindex == ifindex &&
2640 		    ipv6_addr_equal(gw, (struct in6_addr *) nh->gw_addr))
2641 			return true;
2642 	}
2643 
2644 	return false;
2645 }
2646 
2647 static bool
2648 mlxsw_sp_nexthop6_group_cmp(const struct mlxsw_sp_nexthop_group *nh_grp,
2649 			    const struct mlxsw_sp_fib6_entry *fib6_entry)
2650 {
2651 	struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
2652 
2653 	if (nh_grp->count != fib6_entry->nrt6)
2654 		return false;
2655 
2656 	list_for_each_entry(mlxsw_sp_rt6, &fib6_entry->rt6_list, list) {
2657 		struct in6_addr *gw;
2658 		int ifindex;
2659 
2660 		ifindex = mlxsw_sp_rt6->rt->dst.dev->ifindex;
2661 		gw = &mlxsw_sp_rt6->rt->rt6i_gateway;
2662 		if (!mlxsw_sp_nexthop6_group_has_nexthop(nh_grp, gw, ifindex))
2663 			return false;
2664 	}
2665 
2666 	return true;
2667 }
2668 
2669 static int
2670 mlxsw_sp_nexthop_group_cmp(struct rhashtable_compare_arg *arg, const void *ptr)
2671 {
2672 	const struct mlxsw_sp_nexthop_group_cmp_arg *cmp_arg = arg->key;
2673 	const struct mlxsw_sp_nexthop_group *nh_grp = ptr;
2674 
2675 	switch (cmp_arg->proto) {
2676 	case MLXSW_SP_L3_PROTO_IPV4:
2677 		return cmp_arg->fi != mlxsw_sp_nexthop4_group_fi(nh_grp);
2678 	case MLXSW_SP_L3_PROTO_IPV6:
2679 		return !mlxsw_sp_nexthop6_group_cmp(nh_grp,
2680 						    cmp_arg->fib6_entry);
2681 	default:
2682 		WARN_ON(1);
2683 		return 1;
2684 	}
2685 }
2686 
2687 static int
2688 mlxsw_sp_nexthop_group_type(const struct mlxsw_sp_nexthop_group *nh_grp)
2689 {
2690 	return nh_grp->neigh_tbl->family;
2691 }
2692 
2693 static u32 mlxsw_sp_nexthop_group_hash_obj(const void *data, u32 len, u32 seed)
2694 {
2695 	const struct mlxsw_sp_nexthop_group *nh_grp = data;
2696 	const struct mlxsw_sp_nexthop *nh;
2697 	struct fib_info *fi;
2698 	unsigned int val;
2699 	int i;
2700 
2701 	switch (mlxsw_sp_nexthop_group_type(nh_grp)) {
2702 	case AF_INET:
2703 		fi = mlxsw_sp_nexthop4_group_fi(nh_grp);
2704 		return jhash(&fi, sizeof(fi), seed);
2705 	case AF_INET6:
2706 		val = nh_grp->count;
2707 		for (i = 0; i < nh_grp->count; i++) {
2708 			nh = &nh_grp->nexthops[i];
2709 			val ^= nh->ifindex;
2710 		}
2711 		return jhash(&val, sizeof(val), seed);
2712 	default:
2713 		WARN_ON(1);
2714 		return 0;
2715 	}
2716 }
2717 
2718 static u32
2719 mlxsw_sp_nexthop6_group_hash(struct mlxsw_sp_fib6_entry *fib6_entry, u32 seed)
2720 {
2721 	unsigned int val = fib6_entry->nrt6;
2722 	struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
2723 	struct net_device *dev;
2724 
2725 	list_for_each_entry(mlxsw_sp_rt6, &fib6_entry->rt6_list, list) {
2726 		dev = mlxsw_sp_rt6->rt->dst.dev;
2727 		val ^= dev->ifindex;
2728 	}
2729 
2730 	return jhash(&val, sizeof(val), seed);
2731 }
2732 
2733 static u32
2734 mlxsw_sp_nexthop_group_hash(const void *data, u32 len, u32 seed)
2735 {
2736 	const struct mlxsw_sp_nexthop_group_cmp_arg *cmp_arg = data;
2737 
2738 	switch (cmp_arg->proto) {
2739 	case MLXSW_SP_L3_PROTO_IPV4:
2740 		return jhash(&cmp_arg->fi, sizeof(cmp_arg->fi), seed);
2741 	case MLXSW_SP_L3_PROTO_IPV6:
2742 		return mlxsw_sp_nexthop6_group_hash(cmp_arg->fib6_entry, seed);
2743 	default:
2744 		WARN_ON(1);
2745 		return 0;
2746 	}
2747 }
2748 
2749 static const struct rhashtable_params mlxsw_sp_nexthop_group_ht_params = {
2750 	.head_offset = offsetof(struct mlxsw_sp_nexthop_group, ht_node),
2751 	.hashfn	     = mlxsw_sp_nexthop_group_hash,
2752 	.obj_hashfn  = mlxsw_sp_nexthop_group_hash_obj,
2753 	.obj_cmpfn   = mlxsw_sp_nexthop_group_cmp,
2754 };
2755 
2756 static int mlxsw_sp_nexthop_group_insert(struct mlxsw_sp *mlxsw_sp,
2757 					 struct mlxsw_sp_nexthop_group *nh_grp)
2758 {
2759 	if (mlxsw_sp_nexthop_group_type(nh_grp) == AF_INET6 &&
2760 	    !nh_grp->gateway)
2761 		return 0;
2762 
2763 	return rhashtable_insert_fast(&mlxsw_sp->router->nexthop_group_ht,
2764 				      &nh_grp->ht_node,
2765 				      mlxsw_sp_nexthop_group_ht_params);
2766 }
2767 
2768 static void mlxsw_sp_nexthop_group_remove(struct mlxsw_sp *mlxsw_sp,
2769 					  struct mlxsw_sp_nexthop_group *nh_grp)
2770 {
2771 	if (mlxsw_sp_nexthop_group_type(nh_grp) == AF_INET6 &&
2772 	    !nh_grp->gateway)
2773 		return;
2774 
2775 	rhashtable_remove_fast(&mlxsw_sp->router->nexthop_group_ht,
2776 			       &nh_grp->ht_node,
2777 			       mlxsw_sp_nexthop_group_ht_params);
2778 }
2779 
2780 static struct mlxsw_sp_nexthop_group *
2781 mlxsw_sp_nexthop4_group_lookup(struct mlxsw_sp *mlxsw_sp,
2782 			       struct fib_info *fi)
2783 {
2784 	struct mlxsw_sp_nexthop_group_cmp_arg cmp_arg;
2785 
2786 	cmp_arg.proto = MLXSW_SP_L3_PROTO_IPV4;
2787 	cmp_arg.fi = fi;
2788 	return rhashtable_lookup_fast(&mlxsw_sp->router->nexthop_group_ht,
2789 				      &cmp_arg,
2790 				      mlxsw_sp_nexthop_group_ht_params);
2791 }
2792 
2793 static struct mlxsw_sp_nexthop_group *
2794 mlxsw_sp_nexthop6_group_lookup(struct mlxsw_sp *mlxsw_sp,
2795 			       struct mlxsw_sp_fib6_entry *fib6_entry)
2796 {
2797 	struct mlxsw_sp_nexthop_group_cmp_arg cmp_arg;
2798 
2799 	cmp_arg.proto = MLXSW_SP_L3_PROTO_IPV6;
2800 	cmp_arg.fib6_entry = fib6_entry;
2801 	return rhashtable_lookup_fast(&mlxsw_sp->router->nexthop_group_ht,
2802 				      &cmp_arg,
2803 				      mlxsw_sp_nexthop_group_ht_params);
2804 }
2805 
2806 static const struct rhashtable_params mlxsw_sp_nexthop_ht_params = {
2807 	.key_offset = offsetof(struct mlxsw_sp_nexthop, key),
2808 	.head_offset = offsetof(struct mlxsw_sp_nexthop, ht_node),
2809 	.key_len = sizeof(struct mlxsw_sp_nexthop_key),
2810 };
2811 
2812 static int mlxsw_sp_nexthop_insert(struct mlxsw_sp *mlxsw_sp,
2813 				   struct mlxsw_sp_nexthop *nh)
2814 {
2815 	return rhashtable_insert_fast(&mlxsw_sp->router->nexthop_ht,
2816 				      &nh->ht_node, mlxsw_sp_nexthop_ht_params);
2817 }
2818 
2819 static void mlxsw_sp_nexthop_remove(struct mlxsw_sp *mlxsw_sp,
2820 				    struct mlxsw_sp_nexthop *nh)
2821 {
2822 	rhashtable_remove_fast(&mlxsw_sp->router->nexthop_ht, &nh->ht_node,
2823 			       mlxsw_sp_nexthop_ht_params);
2824 }
2825 
2826 static struct mlxsw_sp_nexthop *
2827 mlxsw_sp_nexthop_lookup(struct mlxsw_sp *mlxsw_sp,
2828 			struct mlxsw_sp_nexthop_key key)
2829 {
2830 	return rhashtable_lookup_fast(&mlxsw_sp->router->nexthop_ht, &key,
2831 				      mlxsw_sp_nexthop_ht_params);
2832 }
2833 
2834 static int mlxsw_sp_adj_index_mass_update_vr(struct mlxsw_sp *mlxsw_sp,
2835 					     const struct mlxsw_sp_fib *fib,
2836 					     u32 adj_index, u16 ecmp_size,
2837 					     u32 new_adj_index,
2838 					     u16 new_ecmp_size)
2839 {
2840 	char raleu_pl[MLXSW_REG_RALEU_LEN];
2841 
2842 	mlxsw_reg_raleu_pack(raleu_pl,
2843 			     (enum mlxsw_reg_ralxx_protocol) fib->proto,
2844 			     fib->vr->id, adj_index, ecmp_size, new_adj_index,
2845 			     new_ecmp_size);
2846 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(raleu), raleu_pl);
2847 }
2848 
2849 static int mlxsw_sp_adj_index_mass_update(struct mlxsw_sp *mlxsw_sp,
2850 					  struct mlxsw_sp_nexthop_group *nh_grp,
2851 					  u32 old_adj_index, u16 old_ecmp_size)
2852 {
2853 	struct mlxsw_sp_fib_entry *fib_entry;
2854 	struct mlxsw_sp_fib *fib = NULL;
2855 	int err;
2856 
2857 	list_for_each_entry(fib_entry, &nh_grp->fib_list, nexthop_group_node) {
2858 		if (fib == fib_entry->fib_node->fib)
2859 			continue;
2860 		fib = fib_entry->fib_node->fib;
2861 		err = mlxsw_sp_adj_index_mass_update_vr(mlxsw_sp, fib,
2862 							old_adj_index,
2863 							old_ecmp_size,
2864 							nh_grp->adj_index,
2865 							nh_grp->ecmp_size);
2866 		if (err)
2867 			return err;
2868 	}
2869 	return 0;
2870 }
2871 
2872 static int __mlxsw_sp_nexthop_update(struct mlxsw_sp *mlxsw_sp, u32 adj_index,
2873 				     struct mlxsw_sp_nexthop *nh)
2874 {
2875 	struct mlxsw_sp_neigh_entry *neigh_entry = nh->neigh_entry;
2876 	char ratr_pl[MLXSW_REG_RATR_LEN];
2877 
2878 	mlxsw_reg_ratr_pack(ratr_pl, MLXSW_REG_RATR_OP_WRITE_WRITE_ENTRY,
2879 			    true, MLXSW_REG_RATR_TYPE_ETHERNET,
2880 			    adj_index, neigh_entry->rif);
2881 	mlxsw_reg_ratr_eth_entry_pack(ratr_pl, neigh_entry->ha);
2882 	if (nh->counter_valid)
2883 		mlxsw_reg_ratr_counter_pack(ratr_pl, nh->counter_index, true);
2884 	else
2885 		mlxsw_reg_ratr_counter_pack(ratr_pl, 0, false);
2886 
2887 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ratr), ratr_pl);
2888 }
2889 
2890 int mlxsw_sp_nexthop_update(struct mlxsw_sp *mlxsw_sp, u32 adj_index,
2891 			    struct mlxsw_sp_nexthop *nh)
2892 {
2893 	int i;
2894 
2895 	for (i = 0; i < nh->num_adj_entries; i++) {
2896 		int err;
2897 
2898 		err = __mlxsw_sp_nexthop_update(mlxsw_sp, adj_index + i, nh);
2899 		if (err)
2900 			return err;
2901 	}
2902 
2903 	return 0;
2904 }
2905 
2906 static int __mlxsw_sp_nexthop_ipip_update(struct mlxsw_sp *mlxsw_sp,
2907 					  u32 adj_index,
2908 					  struct mlxsw_sp_nexthop *nh)
2909 {
2910 	const struct mlxsw_sp_ipip_ops *ipip_ops;
2911 
2912 	ipip_ops = mlxsw_sp->router->ipip_ops_arr[nh->ipip_entry->ipipt];
2913 	return ipip_ops->nexthop_update(mlxsw_sp, adj_index, nh->ipip_entry);
2914 }
2915 
2916 static int mlxsw_sp_nexthop_ipip_update(struct mlxsw_sp *mlxsw_sp,
2917 					u32 adj_index,
2918 					struct mlxsw_sp_nexthop *nh)
2919 {
2920 	int i;
2921 
2922 	for (i = 0; i < nh->num_adj_entries; i++) {
2923 		int err;
2924 
2925 		err = __mlxsw_sp_nexthop_ipip_update(mlxsw_sp, adj_index + i,
2926 						     nh);
2927 		if (err)
2928 			return err;
2929 	}
2930 
2931 	return 0;
2932 }
2933 
2934 static int
2935 mlxsw_sp_nexthop_group_update(struct mlxsw_sp *mlxsw_sp,
2936 			      struct mlxsw_sp_nexthop_group *nh_grp,
2937 			      bool reallocate)
2938 {
2939 	u32 adj_index = nh_grp->adj_index; /* base */
2940 	struct mlxsw_sp_nexthop *nh;
2941 	int i;
2942 	int err;
2943 
2944 	for (i = 0; i < nh_grp->count; i++) {
2945 		nh = &nh_grp->nexthops[i];
2946 
2947 		if (!nh->should_offload) {
2948 			nh->offloaded = 0;
2949 			continue;
2950 		}
2951 
2952 		if (nh->update || reallocate) {
2953 			switch (nh->type) {
2954 			case MLXSW_SP_NEXTHOP_TYPE_ETH:
2955 				err = mlxsw_sp_nexthop_update
2956 					    (mlxsw_sp, adj_index, nh);
2957 				break;
2958 			case MLXSW_SP_NEXTHOP_TYPE_IPIP:
2959 				err = mlxsw_sp_nexthop_ipip_update
2960 					    (mlxsw_sp, adj_index, nh);
2961 				break;
2962 			}
2963 			if (err)
2964 				return err;
2965 			nh->update = 0;
2966 			nh->offloaded = 1;
2967 		}
2968 		adj_index += nh->num_adj_entries;
2969 	}
2970 	return 0;
2971 }
2972 
2973 static bool
2974 mlxsw_sp_fib_node_entry_is_first(const struct mlxsw_sp_fib_node *fib_node,
2975 				 const struct mlxsw_sp_fib_entry *fib_entry);
2976 
2977 static int
2978 mlxsw_sp_nexthop_fib_entries_update(struct mlxsw_sp *mlxsw_sp,
2979 				    struct mlxsw_sp_nexthop_group *nh_grp)
2980 {
2981 	struct mlxsw_sp_fib_entry *fib_entry;
2982 	int err;
2983 
2984 	list_for_each_entry(fib_entry, &nh_grp->fib_list, nexthop_group_node) {
2985 		if (!mlxsw_sp_fib_node_entry_is_first(fib_entry->fib_node,
2986 						      fib_entry))
2987 			continue;
2988 		err = mlxsw_sp_fib_entry_update(mlxsw_sp, fib_entry);
2989 		if (err)
2990 			return err;
2991 	}
2992 	return 0;
2993 }
2994 
2995 static void
2996 mlxsw_sp_fib_entry_offload_refresh(struct mlxsw_sp_fib_entry *fib_entry,
2997 				   enum mlxsw_reg_ralue_op op, int err);
2998 
2999 static void
3000 mlxsw_sp_nexthop_fib_entries_refresh(struct mlxsw_sp_nexthop_group *nh_grp)
3001 {
3002 	enum mlxsw_reg_ralue_op op = MLXSW_REG_RALUE_OP_WRITE_WRITE;
3003 	struct mlxsw_sp_fib_entry *fib_entry;
3004 
3005 	list_for_each_entry(fib_entry, &nh_grp->fib_list, nexthop_group_node) {
3006 		if (!mlxsw_sp_fib_node_entry_is_first(fib_entry->fib_node,
3007 						      fib_entry))
3008 			continue;
3009 		mlxsw_sp_fib_entry_offload_refresh(fib_entry, op, 0);
3010 	}
3011 }
3012 
3013 static void mlxsw_sp_adj_grp_size_round_up(u16 *p_adj_grp_size)
3014 {
3015 	/* Valid sizes for an adjacency group are:
3016 	 * 1-64, 512, 1024, 2048 and 4096.
3017 	 */
3018 	if (*p_adj_grp_size <= 64)
3019 		return;
3020 	else if (*p_adj_grp_size <= 512)
3021 		*p_adj_grp_size = 512;
3022 	else if (*p_adj_grp_size <= 1024)
3023 		*p_adj_grp_size = 1024;
3024 	else if (*p_adj_grp_size <= 2048)
3025 		*p_adj_grp_size = 2048;
3026 	else
3027 		*p_adj_grp_size = 4096;
3028 }
3029 
3030 static void mlxsw_sp_adj_grp_size_round_down(u16 *p_adj_grp_size,
3031 					     unsigned int alloc_size)
3032 {
3033 	if (alloc_size >= 4096)
3034 		*p_adj_grp_size = 4096;
3035 	else if (alloc_size >= 2048)
3036 		*p_adj_grp_size = 2048;
3037 	else if (alloc_size >= 1024)
3038 		*p_adj_grp_size = 1024;
3039 	else if (alloc_size >= 512)
3040 		*p_adj_grp_size = 512;
3041 }
3042 
3043 static int mlxsw_sp_fix_adj_grp_size(struct mlxsw_sp *mlxsw_sp,
3044 				     u16 *p_adj_grp_size)
3045 {
3046 	unsigned int alloc_size;
3047 	int err;
3048 
3049 	/* Round up the requested group size to the next size supported
3050 	 * by the device and make sure the request can be satisfied.
3051 	 */
3052 	mlxsw_sp_adj_grp_size_round_up(p_adj_grp_size);
3053 	err = mlxsw_sp_kvdl_alloc_size_query(mlxsw_sp, *p_adj_grp_size,
3054 					     &alloc_size);
3055 	if (err)
3056 		return err;
3057 	/* It is possible the allocation results in more allocated
3058 	 * entries than requested. Try to use as much of them as
3059 	 * possible.
3060 	 */
3061 	mlxsw_sp_adj_grp_size_round_down(p_adj_grp_size, alloc_size);
3062 
3063 	return 0;
3064 }
3065 
3066 static void
3067 mlxsw_sp_nexthop_group_normalize(struct mlxsw_sp_nexthop_group *nh_grp)
3068 {
3069 	int i, g = 0, sum_norm_weight = 0;
3070 	struct mlxsw_sp_nexthop *nh;
3071 
3072 	for (i = 0; i < nh_grp->count; i++) {
3073 		nh = &nh_grp->nexthops[i];
3074 
3075 		if (!nh->should_offload)
3076 			continue;
3077 		if (g > 0)
3078 			g = gcd(nh->nh_weight, g);
3079 		else
3080 			g = nh->nh_weight;
3081 	}
3082 
3083 	for (i = 0; i < nh_grp->count; i++) {
3084 		nh = &nh_grp->nexthops[i];
3085 
3086 		if (!nh->should_offload)
3087 			continue;
3088 		nh->norm_nh_weight = nh->nh_weight / g;
3089 		sum_norm_weight += nh->norm_nh_weight;
3090 	}
3091 
3092 	nh_grp->sum_norm_weight = sum_norm_weight;
3093 }
3094 
3095 static void
3096 mlxsw_sp_nexthop_group_rebalance(struct mlxsw_sp_nexthop_group *nh_grp)
3097 {
3098 	int total = nh_grp->sum_norm_weight;
3099 	u16 ecmp_size = nh_grp->ecmp_size;
3100 	int i, weight = 0, lower_bound = 0;
3101 
3102 	for (i = 0; i < nh_grp->count; i++) {
3103 		struct mlxsw_sp_nexthop *nh = &nh_grp->nexthops[i];
3104 		int upper_bound;
3105 
3106 		if (!nh->should_offload)
3107 			continue;
3108 		weight += nh->norm_nh_weight;
3109 		upper_bound = DIV_ROUND_CLOSEST(ecmp_size * weight, total);
3110 		nh->num_adj_entries = upper_bound - lower_bound;
3111 		lower_bound = upper_bound;
3112 	}
3113 }
3114 
3115 static void
3116 mlxsw_sp_nexthop_group_refresh(struct mlxsw_sp *mlxsw_sp,
3117 			       struct mlxsw_sp_nexthop_group *nh_grp)
3118 {
3119 	u16 ecmp_size, old_ecmp_size;
3120 	struct mlxsw_sp_nexthop *nh;
3121 	bool offload_change = false;
3122 	u32 adj_index;
3123 	bool old_adj_index_valid;
3124 	u32 old_adj_index;
3125 	int i;
3126 	int err;
3127 
3128 	if (!nh_grp->gateway) {
3129 		mlxsw_sp_nexthop_fib_entries_update(mlxsw_sp, nh_grp);
3130 		return;
3131 	}
3132 
3133 	for (i = 0; i < nh_grp->count; i++) {
3134 		nh = &nh_grp->nexthops[i];
3135 
3136 		if (nh->should_offload != nh->offloaded) {
3137 			offload_change = true;
3138 			if (nh->should_offload)
3139 				nh->update = 1;
3140 		}
3141 	}
3142 	if (!offload_change) {
3143 		/* Nothing was added or removed, so no need to reallocate. Just
3144 		 * update MAC on existing adjacency indexes.
3145 		 */
3146 		err = mlxsw_sp_nexthop_group_update(mlxsw_sp, nh_grp, false);
3147 		if (err) {
3148 			dev_warn(mlxsw_sp->bus_info->dev, "Failed to update neigh MAC in adjacency table.\n");
3149 			goto set_trap;
3150 		}
3151 		return;
3152 	}
3153 	mlxsw_sp_nexthop_group_normalize(nh_grp);
3154 	if (!nh_grp->sum_norm_weight)
3155 		/* No neigh of this group is connected so we just set
3156 		 * the trap and let everthing flow through kernel.
3157 		 */
3158 		goto set_trap;
3159 
3160 	ecmp_size = nh_grp->sum_norm_weight;
3161 	err = mlxsw_sp_fix_adj_grp_size(mlxsw_sp, &ecmp_size);
3162 	if (err)
3163 		/* No valid allocation size available. */
3164 		goto set_trap;
3165 
3166 	err = mlxsw_sp_kvdl_alloc(mlxsw_sp, ecmp_size, &adj_index);
3167 	if (err) {
3168 		/* We ran out of KVD linear space, just set the
3169 		 * trap and let everything flow through kernel.
3170 		 */
3171 		dev_warn(mlxsw_sp->bus_info->dev, "Failed to allocate KVD linear area for nexthop group.\n");
3172 		goto set_trap;
3173 	}
3174 	old_adj_index_valid = nh_grp->adj_index_valid;
3175 	old_adj_index = nh_grp->adj_index;
3176 	old_ecmp_size = nh_grp->ecmp_size;
3177 	nh_grp->adj_index_valid = 1;
3178 	nh_grp->adj_index = adj_index;
3179 	nh_grp->ecmp_size = ecmp_size;
3180 	mlxsw_sp_nexthop_group_rebalance(nh_grp);
3181 	err = mlxsw_sp_nexthop_group_update(mlxsw_sp, nh_grp, true);
3182 	if (err) {
3183 		dev_warn(mlxsw_sp->bus_info->dev, "Failed to update neigh MAC in adjacency table.\n");
3184 		goto set_trap;
3185 	}
3186 
3187 	if (!old_adj_index_valid) {
3188 		/* The trap was set for fib entries, so we have to call
3189 		 * fib entry update to unset it and use adjacency index.
3190 		 */
3191 		err = mlxsw_sp_nexthop_fib_entries_update(mlxsw_sp, nh_grp);
3192 		if (err) {
3193 			dev_warn(mlxsw_sp->bus_info->dev, "Failed to add adjacency index to fib entries.\n");
3194 			goto set_trap;
3195 		}
3196 		return;
3197 	}
3198 
3199 	err = mlxsw_sp_adj_index_mass_update(mlxsw_sp, nh_grp,
3200 					     old_adj_index, old_ecmp_size);
3201 	mlxsw_sp_kvdl_free(mlxsw_sp, old_adj_index);
3202 	if (err) {
3203 		dev_warn(mlxsw_sp->bus_info->dev, "Failed to mass-update adjacency index for nexthop group.\n");
3204 		goto set_trap;
3205 	}
3206 
3207 	/* Offload state within the group changed, so update the flags. */
3208 	mlxsw_sp_nexthop_fib_entries_refresh(nh_grp);
3209 
3210 	return;
3211 
3212 set_trap:
3213 	old_adj_index_valid = nh_grp->adj_index_valid;
3214 	nh_grp->adj_index_valid = 0;
3215 	for (i = 0; i < nh_grp->count; i++) {
3216 		nh = &nh_grp->nexthops[i];
3217 		nh->offloaded = 0;
3218 	}
3219 	err = mlxsw_sp_nexthop_fib_entries_update(mlxsw_sp, nh_grp);
3220 	if (err)
3221 		dev_warn(mlxsw_sp->bus_info->dev, "Failed to set traps for fib entries.\n");
3222 	if (old_adj_index_valid)
3223 		mlxsw_sp_kvdl_free(mlxsw_sp, nh_grp->adj_index);
3224 }
3225 
3226 static void __mlxsw_sp_nexthop_neigh_update(struct mlxsw_sp_nexthop *nh,
3227 					    bool removing)
3228 {
3229 	if (!removing)
3230 		nh->should_offload = 1;
3231 	else if (nh->offloaded)
3232 		nh->should_offload = 0;
3233 	nh->update = 1;
3234 }
3235 
3236 static void
3237 mlxsw_sp_nexthop_neigh_update(struct mlxsw_sp *mlxsw_sp,
3238 			      struct mlxsw_sp_neigh_entry *neigh_entry,
3239 			      bool removing)
3240 {
3241 	struct mlxsw_sp_nexthop *nh;
3242 
3243 	list_for_each_entry(nh, &neigh_entry->nexthop_list,
3244 			    neigh_list_node) {
3245 		__mlxsw_sp_nexthop_neigh_update(nh, removing);
3246 		mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh->nh_grp);
3247 	}
3248 }
3249 
3250 static void mlxsw_sp_nexthop_rif_init(struct mlxsw_sp_nexthop *nh,
3251 				      struct mlxsw_sp_rif *rif)
3252 {
3253 	if (nh->rif)
3254 		return;
3255 
3256 	nh->rif = rif;
3257 	list_add(&nh->rif_list_node, &rif->nexthop_list);
3258 }
3259 
3260 static void mlxsw_sp_nexthop_rif_fini(struct mlxsw_sp_nexthop *nh)
3261 {
3262 	if (!nh->rif)
3263 		return;
3264 
3265 	list_del(&nh->rif_list_node);
3266 	nh->rif = NULL;
3267 }
3268 
3269 static int mlxsw_sp_nexthop_neigh_init(struct mlxsw_sp *mlxsw_sp,
3270 				       struct mlxsw_sp_nexthop *nh)
3271 {
3272 	struct mlxsw_sp_neigh_entry *neigh_entry;
3273 	struct neighbour *n;
3274 	u8 nud_state, dead;
3275 	int err;
3276 
3277 	if (!nh->nh_grp->gateway || nh->neigh_entry)
3278 		return 0;
3279 
3280 	/* Take a reference of neigh here ensuring that neigh would
3281 	 * not be destructed before the nexthop entry is finished.
3282 	 * The reference is taken either in neigh_lookup() or
3283 	 * in neigh_create() in case n is not found.
3284 	 */
3285 	n = neigh_lookup(nh->nh_grp->neigh_tbl, &nh->gw_addr, nh->rif->dev);
3286 	if (!n) {
3287 		n = neigh_create(nh->nh_grp->neigh_tbl, &nh->gw_addr,
3288 				 nh->rif->dev);
3289 		if (IS_ERR(n))
3290 			return PTR_ERR(n);
3291 		neigh_event_send(n, NULL);
3292 	}
3293 	neigh_entry = mlxsw_sp_neigh_entry_lookup(mlxsw_sp, n);
3294 	if (!neigh_entry) {
3295 		neigh_entry = mlxsw_sp_neigh_entry_create(mlxsw_sp, n);
3296 		if (IS_ERR(neigh_entry)) {
3297 			err = -EINVAL;
3298 			goto err_neigh_entry_create;
3299 		}
3300 	}
3301 
3302 	/* If that is the first nexthop connected to that neigh, add to
3303 	 * nexthop_neighs_list
3304 	 */
3305 	if (list_empty(&neigh_entry->nexthop_list))
3306 		list_add_tail(&neigh_entry->nexthop_neighs_list_node,
3307 			      &mlxsw_sp->router->nexthop_neighs_list);
3308 
3309 	nh->neigh_entry = neigh_entry;
3310 	list_add_tail(&nh->neigh_list_node, &neigh_entry->nexthop_list);
3311 	read_lock_bh(&n->lock);
3312 	nud_state = n->nud_state;
3313 	dead = n->dead;
3314 	read_unlock_bh(&n->lock);
3315 	__mlxsw_sp_nexthop_neigh_update(nh, !(nud_state & NUD_VALID && !dead));
3316 
3317 	return 0;
3318 
3319 err_neigh_entry_create:
3320 	neigh_release(n);
3321 	return err;
3322 }
3323 
3324 static void mlxsw_sp_nexthop_neigh_fini(struct mlxsw_sp *mlxsw_sp,
3325 					struct mlxsw_sp_nexthop *nh)
3326 {
3327 	struct mlxsw_sp_neigh_entry *neigh_entry = nh->neigh_entry;
3328 	struct neighbour *n;
3329 
3330 	if (!neigh_entry)
3331 		return;
3332 	n = neigh_entry->key.n;
3333 
3334 	__mlxsw_sp_nexthop_neigh_update(nh, true);
3335 	list_del(&nh->neigh_list_node);
3336 	nh->neigh_entry = NULL;
3337 
3338 	/* If that is the last nexthop connected to that neigh, remove from
3339 	 * nexthop_neighs_list
3340 	 */
3341 	if (list_empty(&neigh_entry->nexthop_list))
3342 		list_del(&neigh_entry->nexthop_neighs_list_node);
3343 
3344 	if (!neigh_entry->connected && list_empty(&neigh_entry->nexthop_list))
3345 		mlxsw_sp_neigh_entry_destroy(mlxsw_sp, neigh_entry);
3346 
3347 	neigh_release(n);
3348 }
3349 
3350 static bool mlxsw_sp_ipip_netdev_ul_up(struct net_device *ol_dev)
3351 {
3352 	struct net_device *ul_dev = __mlxsw_sp_ipip_netdev_ul_dev_get(ol_dev);
3353 
3354 	return ul_dev ? (ul_dev->flags & IFF_UP) : true;
3355 }
3356 
3357 static void mlxsw_sp_nexthop_ipip_init(struct mlxsw_sp *mlxsw_sp,
3358 				       struct mlxsw_sp_nexthop *nh,
3359 				       struct mlxsw_sp_ipip_entry *ipip_entry)
3360 {
3361 	bool removing;
3362 
3363 	if (!nh->nh_grp->gateway || nh->ipip_entry)
3364 		return;
3365 
3366 	nh->ipip_entry = ipip_entry;
3367 	removing = !mlxsw_sp_ipip_netdev_ul_up(ipip_entry->ol_dev);
3368 	__mlxsw_sp_nexthop_neigh_update(nh, removing);
3369 	mlxsw_sp_nexthop_rif_init(nh, &ipip_entry->ol_lb->common);
3370 }
3371 
3372 static void mlxsw_sp_nexthop_ipip_fini(struct mlxsw_sp *mlxsw_sp,
3373 				       struct mlxsw_sp_nexthop *nh)
3374 {
3375 	struct mlxsw_sp_ipip_entry *ipip_entry = nh->ipip_entry;
3376 
3377 	if (!ipip_entry)
3378 		return;
3379 
3380 	__mlxsw_sp_nexthop_neigh_update(nh, true);
3381 	nh->ipip_entry = NULL;
3382 }
3383 
3384 static bool mlxsw_sp_nexthop4_ipip_type(const struct mlxsw_sp *mlxsw_sp,
3385 					const struct fib_nh *fib_nh,
3386 					enum mlxsw_sp_ipip_type *p_ipipt)
3387 {
3388 	struct net_device *dev = fib_nh->nh_dev;
3389 
3390 	return dev &&
3391 	       fib_nh->nh_parent->fib_type == RTN_UNICAST &&
3392 	       mlxsw_sp_netdev_ipip_type(mlxsw_sp, dev, p_ipipt);
3393 }
3394 
3395 static void mlxsw_sp_nexthop_type_fini(struct mlxsw_sp *mlxsw_sp,
3396 				       struct mlxsw_sp_nexthop *nh)
3397 {
3398 	switch (nh->type) {
3399 	case MLXSW_SP_NEXTHOP_TYPE_ETH:
3400 		mlxsw_sp_nexthop_neigh_fini(mlxsw_sp, nh);
3401 		mlxsw_sp_nexthop_rif_fini(nh);
3402 		break;
3403 	case MLXSW_SP_NEXTHOP_TYPE_IPIP:
3404 		mlxsw_sp_nexthop_rif_fini(nh);
3405 		mlxsw_sp_nexthop_ipip_fini(mlxsw_sp, nh);
3406 		break;
3407 	}
3408 }
3409 
3410 static int mlxsw_sp_nexthop4_type_init(struct mlxsw_sp *mlxsw_sp,
3411 				       struct mlxsw_sp_nexthop *nh,
3412 				       struct fib_nh *fib_nh)
3413 {
3414 	const struct mlxsw_sp_ipip_ops *ipip_ops;
3415 	struct net_device *dev = fib_nh->nh_dev;
3416 	struct mlxsw_sp_ipip_entry *ipip_entry;
3417 	struct mlxsw_sp_rif *rif;
3418 	int err;
3419 
3420 	ipip_entry = mlxsw_sp_ipip_entry_find_by_ol_dev(mlxsw_sp, dev);
3421 	if (ipip_entry) {
3422 		ipip_ops = mlxsw_sp->router->ipip_ops_arr[ipip_entry->ipipt];
3423 		if (ipip_ops->can_offload(mlxsw_sp, dev,
3424 					  MLXSW_SP_L3_PROTO_IPV4)) {
3425 			nh->type = MLXSW_SP_NEXTHOP_TYPE_IPIP;
3426 			mlxsw_sp_nexthop_ipip_init(mlxsw_sp, nh, ipip_entry);
3427 			return 0;
3428 		}
3429 	}
3430 
3431 	nh->type = MLXSW_SP_NEXTHOP_TYPE_ETH;
3432 	rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev);
3433 	if (!rif)
3434 		return 0;
3435 
3436 	mlxsw_sp_nexthop_rif_init(nh, rif);
3437 	err = mlxsw_sp_nexthop_neigh_init(mlxsw_sp, nh);
3438 	if (err)
3439 		goto err_neigh_init;
3440 
3441 	return 0;
3442 
3443 err_neigh_init:
3444 	mlxsw_sp_nexthop_rif_fini(nh);
3445 	return err;
3446 }
3447 
3448 static void mlxsw_sp_nexthop4_type_fini(struct mlxsw_sp *mlxsw_sp,
3449 					struct mlxsw_sp_nexthop *nh)
3450 {
3451 	mlxsw_sp_nexthop_type_fini(mlxsw_sp, nh);
3452 }
3453 
3454 static int mlxsw_sp_nexthop4_init(struct mlxsw_sp *mlxsw_sp,
3455 				  struct mlxsw_sp_nexthop_group *nh_grp,
3456 				  struct mlxsw_sp_nexthop *nh,
3457 				  struct fib_nh *fib_nh)
3458 {
3459 	struct net_device *dev = fib_nh->nh_dev;
3460 	struct in_device *in_dev;
3461 	int err;
3462 
3463 	nh->nh_grp = nh_grp;
3464 	nh->key.fib_nh = fib_nh;
3465 #ifdef CONFIG_IP_ROUTE_MULTIPATH
3466 	nh->nh_weight = fib_nh->nh_weight;
3467 #else
3468 	nh->nh_weight = 1;
3469 #endif
3470 	memcpy(&nh->gw_addr, &fib_nh->nh_gw, sizeof(fib_nh->nh_gw));
3471 	err = mlxsw_sp_nexthop_insert(mlxsw_sp, nh);
3472 	if (err)
3473 		return err;
3474 
3475 	mlxsw_sp_nexthop_counter_alloc(mlxsw_sp, nh);
3476 	list_add_tail(&nh->router_list_node, &mlxsw_sp->router->nexthop_list);
3477 
3478 	if (!dev)
3479 		return 0;
3480 
3481 	in_dev = __in_dev_get_rtnl(dev);
3482 	if (in_dev && IN_DEV_IGNORE_ROUTES_WITH_LINKDOWN(in_dev) &&
3483 	    fib_nh->nh_flags & RTNH_F_LINKDOWN)
3484 		return 0;
3485 
3486 	err = mlxsw_sp_nexthop4_type_init(mlxsw_sp, nh, fib_nh);
3487 	if (err)
3488 		goto err_nexthop_neigh_init;
3489 
3490 	return 0;
3491 
3492 err_nexthop_neigh_init:
3493 	mlxsw_sp_nexthop_remove(mlxsw_sp, nh);
3494 	return err;
3495 }
3496 
3497 static void mlxsw_sp_nexthop4_fini(struct mlxsw_sp *mlxsw_sp,
3498 				   struct mlxsw_sp_nexthop *nh)
3499 {
3500 	mlxsw_sp_nexthop4_type_fini(mlxsw_sp, nh);
3501 	list_del(&nh->router_list_node);
3502 	mlxsw_sp_nexthop_counter_free(mlxsw_sp, nh);
3503 	mlxsw_sp_nexthop_remove(mlxsw_sp, nh);
3504 }
3505 
3506 static void mlxsw_sp_nexthop4_event(struct mlxsw_sp *mlxsw_sp,
3507 				    unsigned long event, struct fib_nh *fib_nh)
3508 {
3509 	struct mlxsw_sp_nexthop_key key;
3510 	struct mlxsw_sp_nexthop *nh;
3511 
3512 	if (mlxsw_sp->router->aborted)
3513 		return;
3514 
3515 	key.fib_nh = fib_nh;
3516 	nh = mlxsw_sp_nexthop_lookup(mlxsw_sp, key);
3517 	if (WARN_ON_ONCE(!nh))
3518 		return;
3519 
3520 	switch (event) {
3521 	case FIB_EVENT_NH_ADD:
3522 		mlxsw_sp_nexthop4_type_init(mlxsw_sp, nh, fib_nh);
3523 		break;
3524 	case FIB_EVENT_NH_DEL:
3525 		mlxsw_sp_nexthop4_type_fini(mlxsw_sp, nh);
3526 		break;
3527 	}
3528 
3529 	mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh->nh_grp);
3530 }
3531 
3532 static void mlxsw_sp_nexthop_rif_update(struct mlxsw_sp *mlxsw_sp,
3533 					struct mlxsw_sp_rif *rif)
3534 {
3535 	struct mlxsw_sp_nexthop *nh;
3536 	bool removing;
3537 
3538 	list_for_each_entry(nh, &rif->nexthop_list, rif_list_node) {
3539 		switch (nh->type) {
3540 		case MLXSW_SP_NEXTHOP_TYPE_ETH:
3541 			removing = false;
3542 			break;
3543 		case MLXSW_SP_NEXTHOP_TYPE_IPIP:
3544 			removing = !mlxsw_sp_ipip_netdev_ul_up(rif->dev);
3545 			break;
3546 		default:
3547 			WARN_ON(1);
3548 			continue;
3549 		}
3550 
3551 		__mlxsw_sp_nexthop_neigh_update(nh, removing);
3552 		mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh->nh_grp);
3553 	}
3554 }
3555 
3556 static void mlxsw_sp_nexthop_rif_migrate(struct mlxsw_sp *mlxsw_sp,
3557 					 struct mlxsw_sp_rif *old_rif,
3558 					 struct mlxsw_sp_rif *new_rif)
3559 {
3560 	struct mlxsw_sp_nexthop *nh;
3561 
3562 	list_splice_init(&old_rif->nexthop_list, &new_rif->nexthop_list);
3563 	list_for_each_entry(nh, &new_rif->nexthop_list, rif_list_node)
3564 		nh->rif = new_rif;
3565 	mlxsw_sp_nexthop_rif_update(mlxsw_sp, new_rif);
3566 }
3567 
3568 static void mlxsw_sp_nexthop_rif_gone_sync(struct mlxsw_sp *mlxsw_sp,
3569 					   struct mlxsw_sp_rif *rif)
3570 {
3571 	struct mlxsw_sp_nexthop *nh, *tmp;
3572 
3573 	list_for_each_entry_safe(nh, tmp, &rif->nexthop_list, rif_list_node) {
3574 		mlxsw_sp_nexthop_type_fini(mlxsw_sp, nh);
3575 		mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh->nh_grp);
3576 	}
3577 }
3578 
3579 static bool mlxsw_sp_fi_is_gateway(const struct mlxsw_sp *mlxsw_sp,
3580 				   const struct fib_info *fi)
3581 {
3582 	return fi->fib_nh->nh_scope == RT_SCOPE_LINK ||
3583 	       mlxsw_sp_nexthop4_ipip_type(mlxsw_sp, fi->fib_nh, NULL);
3584 }
3585 
3586 static struct mlxsw_sp_nexthop_group *
3587 mlxsw_sp_nexthop4_group_create(struct mlxsw_sp *mlxsw_sp, struct fib_info *fi)
3588 {
3589 	struct mlxsw_sp_nexthop_group *nh_grp;
3590 	struct mlxsw_sp_nexthop *nh;
3591 	struct fib_nh *fib_nh;
3592 	size_t alloc_size;
3593 	int i;
3594 	int err;
3595 
3596 	alloc_size = sizeof(*nh_grp) +
3597 		     fi->fib_nhs * sizeof(struct mlxsw_sp_nexthop);
3598 	nh_grp = kzalloc(alloc_size, GFP_KERNEL);
3599 	if (!nh_grp)
3600 		return ERR_PTR(-ENOMEM);
3601 	nh_grp->priv = fi;
3602 	INIT_LIST_HEAD(&nh_grp->fib_list);
3603 	nh_grp->neigh_tbl = &arp_tbl;
3604 
3605 	nh_grp->gateway = mlxsw_sp_fi_is_gateway(mlxsw_sp, fi);
3606 	nh_grp->count = fi->fib_nhs;
3607 	fib_info_hold(fi);
3608 	for (i = 0; i < nh_grp->count; i++) {
3609 		nh = &nh_grp->nexthops[i];
3610 		fib_nh = &fi->fib_nh[i];
3611 		err = mlxsw_sp_nexthop4_init(mlxsw_sp, nh_grp, nh, fib_nh);
3612 		if (err)
3613 			goto err_nexthop4_init;
3614 	}
3615 	err = mlxsw_sp_nexthop_group_insert(mlxsw_sp, nh_grp);
3616 	if (err)
3617 		goto err_nexthop_group_insert;
3618 	mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh_grp);
3619 	return nh_grp;
3620 
3621 err_nexthop_group_insert:
3622 err_nexthop4_init:
3623 	for (i--; i >= 0; i--) {
3624 		nh = &nh_grp->nexthops[i];
3625 		mlxsw_sp_nexthop4_fini(mlxsw_sp, nh);
3626 	}
3627 	fib_info_put(fi);
3628 	kfree(nh_grp);
3629 	return ERR_PTR(err);
3630 }
3631 
3632 static void
3633 mlxsw_sp_nexthop4_group_destroy(struct mlxsw_sp *mlxsw_sp,
3634 				struct mlxsw_sp_nexthop_group *nh_grp)
3635 {
3636 	struct mlxsw_sp_nexthop *nh;
3637 	int i;
3638 
3639 	mlxsw_sp_nexthop_group_remove(mlxsw_sp, nh_grp);
3640 	for (i = 0; i < nh_grp->count; i++) {
3641 		nh = &nh_grp->nexthops[i];
3642 		mlxsw_sp_nexthop4_fini(mlxsw_sp, nh);
3643 	}
3644 	mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh_grp);
3645 	WARN_ON_ONCE(nh_grp->adj_index_valid);
3646 	fib_info_put(mlxsw_sp_nexthop4_group_fi(nh_grp));
3647 	kfree(nh_grp);
3648 }
3649 
3650 static int mlxsw_sp_nexthop4_group_get(struct mlxsw_sp *mlxsw_sp,
3651 				       struct mlxsw_sp_fib_entry *fib_entry,
3652 				       struct fib_info *fi)
3653 {
3654 	struct mlxsw_sp_nexthop_group *nh_grp;
3655 
3656 	nh_grp = mlxsw_sp_nexthop4_group_lookup(mlxsw_sp, fi);
3657 	if (!nh_grp) {
3658 		nh_grp = mlxsw_sp_nexthop4_group_create(mlxsw_sp, fi);
3659 		if (IS_ERR(nh_grp))
3660 			return PTR_ERR(nh_grp);
3661 	}
3662 	list_add_tail(&fib_entry->nexthop_group_node, &nh_grp->fib_list);
3663 	fib_entry->nh_group = nh_grp;
3664 	return 0;
3665 }
3666 
3667 static void mlxsw_sp_nexthop4_group_put(struct mlxsw_sp *mlxsw_sp,
3668 					struct mlxsw_sp_fib_entry *fib_entry)
3669 {
3670 	struct mlxsw_sp_nexthop_group *nh_grp = fib_entry->nh_group;
3671 
3672 	list_del(&fib_entry->nexthop_group_node);
3673 	if (!list_empty(&nh_grp->fib_list))
3674 		return;
3675 	mlxsw_sp_nexthop4_group_destroy(mlxsw_sp, nh_grp);
3676 }
3677 
3678 static bool
3679 mlxsw_sp_fib4_entry_should_offload(const struct mlxsw_sp_fib_entry *fib_entry)
3680 {
3681 	struct mlxsw_sp_fib4_entry *fib4_entry;
3682 
3683 	fib4_entry = container_of(fib_entry, struct mlxsw_sp_fib4_entry,
3684 				  common);
3685 	return !fib4_entry->tos;
3686 }
3687 
3688 static bool
3689 mlxsw_sp_fib_entry_should_offload(const struct mlxsw_sp_fib_entry *fib_entry)
3690 {
3691 	struct mlxsw_sp_nexthop_group *nh_group = fib_entry->nh_group;
3692 
3693 	switch (fib_entry->fib_node->fib->proto) {
3694 	case MLXSW_SP_L3_PROTO_IPV4:
3695 		if (!mlxsw_sp_fib4_entry_should_offload(fib_entry))
3696 			return false;
3697 		break;
3698 	case MLXSW_SP_L3_PROTO_IPV6:
3699 		break;
3700 	}
3701 
3702 	switch (fib_entry->type) {
3703 	case MLXSW_SP_FIB_ENTRY_TYPE_REMOTE:
3704 		return !!nh_group->adj_index_valid;
3705 	case MLXSW_SP_FIB_ENTRY_TYPE_LOCAL:
3706 		return !!nh_group->nh_rif;
3707 	case MLXSW_SP_FIB_ENTRY_TYPE_IPIP_DECAP:
3708 		return true;
3709 	default:
3710 		return false;
3711 	}
3712 }
3713 
3714 static struct mlxsw_sp_nexthop *
3715 mlxsw_sp_rt6_nexthop(struct mlxsw_sp_nexthop_group *nh_grp,
3716 		     const struct mlxsw_sp_rt6 *mlxsw_sp_rt6)
3717 {
3718 	int i;
3719 
3720 	for (i = 0; i < nh_grp->count; i++) {
3721 		struct mlxsw_sp_nexthop *nh = &nh_grp->nexthops[i];
3722 		struct rt6_info *rt = mlxsw_sp_rt6->rt;
3723 
3724 		if (nh->rif && nh->rif->dev == rt->dst.dev &&
3725 		    ipv6_addr_equal((const struct in6_addr *) &nh->gw_addr,
3726 				    &rt->rt6i_gateway))
3727 			return nh;
3728 		continue;
3729 	}
3730 
3731 	return NULL;
3732 }
3733 
3734 static void
3735 mlxsw_sp_fib4_entry_offload_set(struct mlxsw_sp_fib_entry *fib_entry)
3736 {
3737 	struct mlxsw_sp_nexthop_group *nh_grp = fib_entry->nh_group;
3738 	int i;
3739 
3740 	if (fib_entry->type == MLXSW_SP_FIB_ENTRY_TYPE_LOCAL ||
3741 	    fib_entry->type == MLXSW_SP_FIB_ENTRY_TYPE_IPIP_DECAP) {
3742 		nh_grp->nexthops->key.fib_nh->nh_flags |= RTNH_F_OFFLOAD;
3743 		return;
3744 	}
3745 
3746 	for (i = 0; i < nh_grp->count; i++) {
3747 		struct mlxsw_sp_nexthop *nh = &nh_grp->nexthops[i];
3748 
3749 		if (nh->offloaded)
3750 			nh->key.fib_nh->nh_flags |= RTNH_F_OFFLOAD;
3751 		else
3752 			nh->key.fib_nh->nh_flags &= ~RTNH_F_OFFLOAD;
3753 	}
3754 }
3755 
3756 static void
3757 mlxsw_sp_fib4_entry_offload_unset(struct mlxsw_sp_fib_entry *fib_entry)
3758 {
3759 	struct mlxsw_sp_nexthop_group *nh_grp = fib_entry->nh_group;
3760 	int i;
3761 
3762 	for (i = 0; i < nh_grp->count; i++) {
3763 		struct mlxsw_sp_nexthop *nh = &nh_grp->nexthops[i];
3764 
3765 		nh->key.fib_nh->nh_flags &= ~RTNH_F_OFFLOAD;
3766 	}
3767 }
3768 
3769 static void
3770 mlxsw_sp_fib6_entry_offload_set(struct mlxsw_sp_fib_entry *fib_entry)
3771 {
3772 	struct mlxsw_sp_fib6_entry *fib6_entry;
3773 	struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
3774 
3775 	fib6_entry = container_of(fib_entry, struct mlxsw_sp_fib6_entry,
3776 				  common);
3777 
3778 	if (fib_entry->type == MLXSW_SP_FIB_ENTRY_TYPE_LOCAL) {
3779 		list_first_entry(&fib6_entry->rt6_list, struct mlxsw_sp_rt6,
3780 				 list)->rt->rt6i_nh_flags |= RTNH_F_OFFLOAD;
3781 		return;
3782 	}
3783 
3784 	list_for_each_entry(mlxsw_sp_rt6, &fib6_entry->rt6_list, list) {
3785 		struct mlxsw_sp_nexthop_group *nh_grp = fib_entry->nh_group;
3786 		struct mlxsw_sp_nexthop *nh;
3787 
3788 		nh = mlxsw_sp_rt6_nexthop(nh_grp, mlxsw_sp_rt6);
3789 		if (nh && nh->offloaded)
3790 			mlxsw_sp_rt6->rt->rt6i_nh_flags |= RTNH_F_OFFLOAD;
3791 		else
3792 			mlxsw_sp_rt6->rt->rt6i_nh_flags &= ~RTNH_F_OFFLOAD;
3793 	}
3794 }
3795 
3796 static void
3797 mlxsw_sp_fib6_entry_offload_unset(struct mlxsw_sp_fib_entry *fib_entry)
3798 {
3799 	struct mlxsw_sp_fib6_entry *fib6_entry;
3800 	struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
3801 
3802 	fib6_entry = container_of(fib_entry, struct mlxsw_sp_fib6_entry,
3803 				  common);
3804 	list_for_each_entry(mlxsw_sp_rt6, &fib6_entry->rt6_list, list) {
3805 		struct rt6_info *rt = mlxsw_sp_rt6->rt;
3806 
3807 		rt->rt6i_nh_flags &= ~RTNH_F_OFFLOAD;
3808 	}
3809 }
3810 
3811 static void mlxsw_sp_fib_entry_offload_set(struct mlxsw_sp_fib_entry *fib_entry)
3812 {
3813 	switch (fib_entry->fib_node->fib->proto) {
3814 	case MLXSW_SP_L3_PROTO_IPV4:
3815 		mlxsw_sp_fib4_entry_offload_set(fib_entry);
3816 		break;
3817 	case MLXSW_SP_L3_PROTO_IPV6:
3818 		mlxsw_sp_fib6_entry_offload_set(fib_entry);
3819 		break;
3820 	}
3821 }
3822 
3823 static void
3824 mlxsw_sp_fib_entry_offload_unset(struct mlxsw_sp_fib_entry *fib_entry)
3825 {
3826 	switch (fib_entry->fib_node->fib->proto) {
3827 	case MLXSW_SP_L3_PROTO_IPV4:
3828 		mlxsw_sp_fib4_entry_offload_unset(fib_entry);
3829 		break;
3830 	case MLXSW_SP_L3_PROTO_IPV6:
3831 		mlxsw_sp_fib6_entry_offload_unset(fib_entry);
3832 		break;
3833 	}
3834 }
3835 
3836 static void
3837 mlxsw_sp_fib_entry_offload_refresh(struct mlxsw_sp_fib_entry *fib_entry,
3838 				   enum mlxsw_reg_ralue_op op, int err)
3839 {
3840 	switch (op) {
3841 	case MLXSW_REG_RALUE_OP_WRITE_DELETE:
3842 		return mlxsw_sp_fib_entry_offload_unset(fib_entry);
3843 	case MLXSW_REG_RALUE_OP_WRITE_WRITE:
3844 		if (err)
3845 			return;
3846 		if (mlxsw_sp_fib_entry_should_offload(fib_entry))
3847 			mlxsw_sp_fib_entry_offload_set(fib_entry);
3848 		else
3849 			mlxsw_sp_fib_entry_offload_unset(fib_entry);
3850 		return;
3851 	default:
3852 		return;
3853 	}
3854 }
3855 
3856 static void
3857 mlxsw_sp_fib_entry_ralue_pack(char *ralue_pl,
3858 			      const struct mlxsw_sp_fib_entry *fib_entry,
3859 			      enum mlxsw_reg_ralue_op op)
3860 {
3861 	struct mlxsw_sp_fib *fib = fib_entry->fib_node->fib;
3862 	enum mlxsw_reg_ralxx_protocol proto;
3863 	u32 *p_dip;
3864 
3865 	proto = (enum mlxsw_reg_ralxx_protocol) fib->proto;
3866 
3867 	switch (fib->proto) {
3868 	case MLXSW_SP_L3_PROTO_IPV4:
3869 		p_dip = (u32 *) fib_entry->fib_node->key.addr;
3870 		mlxsw_reg_ralue_pack4(ralue_pl, proto, op, fib->vr->id,
3871 				      fib_entry->fib_node->key.prefix_len,
3872 				      *p_dip);
3873 		break;
3874 	case MLXSW_SP_L3_PROTO_IPV6:
3875 		mlxsw_reg_ralue_pack6(ralue_pl, proto, op, fib->vr->id,
3876 				      fib_entry->fib_node->key.prefix_len,
3877 				      fib_entry->fib_node->key.addr);
3878 		break;
3879 	}
3880 }
3881 
3882 static int mlxsw_sp_fib_entry_op_remote(struct mlxsw_sp *mlxsw_sp,
3883 					struct mlxsw_sp_fib_entry *fib_entry,
3884 					enum mlxsw_reg_ralue_op op)
3885 {
3886 	char ralue_pl[MLXSW_REG_RALUE_LEN];
3887 	enum mlxsw_reg_ralue_trap_action trap_action;
3888 	u16 trap_id = 0;
3889 	u32 adjacency_index = 0;
3890 	u16 ecmp_size = 0;
3891 
3892 	/* In case the nexthop group adjacency index is valid, use it
3893 	 * with provided ECMP size. Otherwise, setup trap and pass
3894 	 * traffic to kernel.
3895 	 */
3896 	if (mlxsw_sp_fib_entry_should_offload(fib_entry)) {
3897 		trap_action = MLXSW_REG_RALUE_TRAP_ACTION_NOP;
3898 		adjacency_index = fib_entry->nh_group->adj_index;
3899 		ecmp_size = fib_entry->nh_group->ecmp_size;
3900 	} else {
3901 		trap_action = MLXSW_REG_RALUE_TRAP_ACTION_TRAP;
3902 		trap_id = MLXSW_TRAP_ID_RTR_INGRESS0;
3903 	}
3904 
3905 	mlxsw_sp_fib_entry_ralue_pack(ralue_pl, fib_entry, op);
3906 	mlxsw_reg_ralue_act_remote_pack(ralue_pl, trap_action, trap_id,
3907 					adjacency_index, ecmp_size);
3908 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralue), ralue_pl);
3909 }
3910 
3911 static int mlxsw_sp_fib_entry_op_local(struct mlxsw_sp *mlxsw_sp,
3912 				       struct mlxsw_sp_fib_entry *fib_entry,
3913 				       enum mlxsw_reg_ralue_op op)
3914 {
3915 	struct mlxsw_sp_rif *rif = fib_entry->nh_group->nh_rif;
3916 	enum mlxsw_reg_ralue_trap_action trap_action;
3917 	char ralue_pl[MLXSW_REG_RALUE_LEN];
3918 	u16 trap_id = 0;
3919 	u16 rif_index = 0;
3920 
3921 	if (mlxsw_sp_fib_entry_should_offload(fib_entry)) {
3922 		trap_action = MLXSW_REG_RALUE_TRAP_ACTION_NOP;
3923 		rif_index = rif->rif_index;
3924 	} else {
3925 		trap_action = MLXSW_REG_RALUE_TRAP_ACTION_TRAP;
3926 		trap_id = MLXSW_TRAP_ID_RTR_INGRESS0;
3927 	}
3928 
3929 	mlxsw_sp_fib_entry_ralue_pack(ralue_pl, fib_entry, op);
3930 	mlxsw_reg_ralue_act_local_pack(ralue_pl, trap_action, trap_id,
3931 				       rif_index);
3932 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralue), ralue_pl);
3933 }
3934 
3935 static int mlxsw_sp_fib_entry_op_trap(struct mlxsw_sp *mlxsw_sp,
3936 				      struct mlxsw_sp_fib_entry *fib_entry,
3937 				      enum mlxsw_reg_ralue_op op)
3938 {
3939 	char ralue_pl[MLXSW_REG_RALUE_LEN];
3940 
3941 	mlxsw_sp_fib_entry_ralue_pack(ralue_pl, fib_entry, op);
3942 	mlxsw_reg_ralue_act_ip2me_pack(ralue_pl);
3943 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralue), ralue_pl);
3944 }
3945 
3946 static int
3947 mlxsw_sp_fib_entry_op_ipip_decap(struct mlxsw_sp *mlxsw_sp,
3948 				 struct mlxsw_sp_fib_entry *fib_entry,
3949 				 enum mlxsw_reg_ralue_op op)
3950 {
3951 	struct mlxsw_sp_ipip_entry *ipip_entry = fib_entry->decap.ipip_entry;
3952 	const struct mlxsw_sp_ipip_ops *ipip_ops;
3953 
3954 	if (WARN_ON(!ipip_entry))
3955 		return -EINVAL;
3956 
3957 	ipip_ops = mlxsw_sp->router->ipip_ops_arr[ipip_entry->ipipt];
3958 	return ipip_ops->fib_entry_op(mlxsw_sp, ipip_entry, op,
3959 				      fib_entry->decap.tunnel_index);
3960 }
3961 
3962 static int __mlxsw_sp_fib_entry_op(struct mlxsw_sp *mlxsw_sp,
3963 				   struct mlxsw_sp_fib_entry *fib_entry,
3964 				   enum mlxsw_reg_ralue_op op)
3965 {
3966 	switch (fib_entry->type) {
3967 	case MLXSW_SP_FIB_ENTRY_TYPE_REMOTE:
3968 		return mlxsw_sp_fib_entry_op_remote(mlxsw_sp, fib_entry, op);
3969 	case MLXSW_SP_FIB_ENTRY_TYPE_LOCAL:
3970 		return mlxsw_sp_fib_entry_op_local(mlxsw_sp, fib_entry, op);
3971 	case MLXSW_SP_FIB_ENTRY_TYPE_TRAP:
3972 		return mlxsw_sp_fib_entry_op_trap(mlxsw_sp, fib_entry, op);
3973 	case MLXSW_SP_FIB_ENTRY_TYPE_IPIP_DECAP:
3974 		return mlxsw_sp_fib_entry_op_ipip_decap(mlxsw_sp,
3975 							fib_entry, op);
3976 	}
3977 	return -EINVAL;
3978 }
3979 
3980 static int mlxsw_sp_fib_entry_op(struct mlxsw_sp *mlxsw_sp,
3981 				 struct mlxsw_sp_fib_entry *fib_entry,
3982 				 enum mlxsw_reg_ralue_op op)
3983 {
3984 	int err = __mlxsw_sp_fib_entry_op(mlxsw_sp, fib_entry, op);
3985 
3986 	mlxsw_sp_fib_entry_offload_refresh(fib_entry, op, err);
3987 
3988 	return err;
3989 }
3990 
3991 static int mlxsw_sp_fib_entry_update(struct mlxsw_sp *mlxsw_sp,
3992 				     struct mlxsw_sp_fib_entry *fib_entry)
3993 {
3994 	return mlxsw_sp_fib_entry_op(mlxsw_sp, fib_entry,
3995 				     MLXSW_REG_RALUE_OP_WRITE_WRITE);
3996 }
3997 
3998 static int mlxsw_sp_fib_entry_del(struct mlxsw_sp *mlxsw_sp,
3999 				  struct mlxsw_sp_fib_entry *fib_entry)
4000 {
4001 	return mlxsw_sp_fib_entry_op(mlxsw_sp, fib_entry,
4002 				     MLXSW_REG_RALUE_OP_WRITE_DELETE);
4003 }
4004 
4005 static int
4006 mlxsw_sp_fib4_entry_type_set(struct mlxsw_sp *mlxsw_sp,
4007 			     const struct fib_entry_notifier_info *fen_info,
4008 			     struct mlxsw_sp_fib_entry *fib_entry)
4009 {
4010 	union mlxsw_sp_l3addr dip = { .addr4 = htonl(fen_info->dst) };
4011 	struct net_device *dev = fen_info->fi->fib_dev;
4012 	struct mlxsw_sp_ipip_entry *ipip_entry;
4013 	struct fib_info *fi = fen_info->fi;
4014 
4015 	switch (fen_info->type) {
4016 	case RTN_LOCAL:
4017 		ipip_entry = mlxsw_sp_ipip_entry_find_by_decap(mlxsw_sp, dev,
4018 						 MLXSW_SP_L3_PROTO_IPV4, dip);
4019 		if (ipip_entry && ipip_entry->ol_dev->flags & IFF_UP) {
4020 			fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_IPIP_DECAP;
4021 			return mlxsw_sp_fib_entry_decap_init(mlxsw_sp,
4022 							     fib_entry,
4023 							     ipip_entry);
4024 		}
4025 		/* fall through */
4026 	case RTN_BROADCAST:
4027 		fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_TRAP;
4028 		return 0;
4029 	case RTN_UNREACHABLE: /* fall through */
4030 	case RTN_BLACKHOLE: /* fall through */
4031 	case RTN_PROHIBIT:
4032 		/* Packets hitting these routes need to be trapped, but
4033 		 * can do so with a lower priority than packets directed
4034 		 * at the host, so use action type local instead of trap.
4035 		 */
4036 		fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_LOCAL;
4037 		return 0;
4038 	case RTN_UNICAST:
4039 		if (mlxsw_sp_fi_is_gateway(mlxsw_sp, fi))
4040 			fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_REMOTE;
4041 		else
4042 			fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_LOCAL;
4043 		return 0;
4044 	default:
4045 		return -EINVAL;
4046 	}
4047 }
4048 
4049 static struct mlxsw_sp_fib4_entry *
4050 mlxsw_sp_fib4_entry_create(struct mlxsw_sp *mlxsw_sp,
4051 			   struct mlxsw_sp_fib_node *fib_node,
4052 			   const struct fib_entry_notifier_info *fen_info)
4053 {
4054 	struct mlxsw_sp_fib4_entry *fib4_entry;
4055 	struct mlxsw_sp_fib_entry *fib_entry;
4056 	int err;
4057 
4058 	fib4_entry = kzalloc(sizeof(*fib4_entry), GFP_KERNEL);
4059 	if (!fib4_entry)
4060 		return ERR_PTR(-ENOMEM);
4061 	fib_entry = &fib4_entry->common;
4062 
4063 	err = mlxsw_sp_fib4_entry_type_set(mlxsw_sp, fen_info, fib_entry);
4064 	if (err)
4065 		goto err_fib4_entry_type_set;
4066 
4067 	err = mlxsw_sp_nexthop4_group_get(mlxsw_sp, fib_entry, fen_info->fi);
4068 	if (err)
4069 		goto err_nexthop4_group_get;
4070 
4071 	fib4_entry->prio = fen_info->fi->fib_priority;
4072 	fib4_entry->tb_id = fen_info->tb_id;
4073 	fib4_entry->type = fen_info->type;
4074 	fib4_entry->tos = fen_info->tos;
4075 
4076 	fib_entry->fib_node = fib_node;
4077 
4078 	return fib4_entry;
4079 
4080 err_nexthop4_group_get:
4081 err_fib4_entry_type_set:
4082 	kfree(fib4_entry);
4083 	return ERR_PTR(err);
4084 }
4085 
4086 static void mlxsw_sp_fib4_entry_destroy(struct mlxsw_sp *mlxsw_sp,
4087 					struct mlxsw_sp_fib4_entry *fib4_entry)
4088 {
4089 	mlxsw_sp_nexthop4_group_put(mlxsw_sp, &fib4_entry->common);
4090 	kfree(fib4_entry);
4091 }
4092 
4093 static struct mlxsw_sp_fib4_entry *
4094 mlxsw_sp_fib4_entry_lookup(struct mlxsw_sp *mlxsw_sp,
4095 			   const struct fib_entry_notifier_info *fen_info)
4096 {
4097 	struct mlxsw_sp_fib4_entry *fib4_entry;
4098 	struct mlxsw_sp_fib_node *fib_node;
4099 	struct mlxsw_sp_fib *fib;
4100 	struct mlxsw_sp_vr *vr;
4101 
4102 	vr = mlxsw_sp_vr_find(mlxsw_sp, fen_info->tb_id);
4103 	if (!vr)
4104 		return NULL;
4105 	fib = mlxsw_sp_vr_fib(vr, MLXSW_SP_L3_PROTO_IPV4);
4106 
4107 	fib_node = mlxsw_sp_fib_node_lookup(fib, &fen_info->dst,
4108 					    sizeof(fen_info->dst),
4109 					    fen_info->dst_len);
4110 	if (!fib_node)
4111 		return NULL;
4112 
4113 	list_for_each_entry(fib4_entry, &fib_node->entry_list, common.list) {
4114 		if (fib4_entry->tb_id == fen_info->tb_id &&
4115 		    fib4_entry->tos == fen_info->tos &&
4116 		    fib4_entry->type == fen_info->type &&
4117 		    mlxsw_sp_nexthop4_group_fi(fib4_entry->common.nh_group) ==
4118 		    fen_info->fi) {
4119 			return fib4_entry;
4120 		}
4121 	}
4122 
4123 	return NULL;
4124 }
4125 
4126 static const struct rhashtable_params mlxsw_sp_fib_ht_params = {
4127 	.key_offset = offsetof(struct mlxsw_sp_fib_node, key),
4128 	.head_offset = offsetof(struct mlxsw_sp_fib_node, ht_node),
4129 	.key_len = sizeof(struct mlxsw_sp_fib_key),
4130 	.automatic_shrinking = true,
4131 };
4132 
4133 static int mlxsw_sp_fib_node_insert(struct mlxsw_sp_fib *fib,
4134 				    struct mlxsw_sp_fib_node *fib_node)
4135 {
4136 	return rhashtable_insert_fast(&fib->ht, &fib_node->ht_node,
4137 				      mlxsw_sp_fib_ht_params);
4138 }
4139 
4140 static void mlxsw_sp_fib_node_remove(struct mlxsw_sp_fib *fib,
4141 				     struct mlxsw_sp_fib_node *fib_node)
4142 {
4143 	rhashtable_remove_fast(&fib->ht, &fib_node->ht_node,
4144 			       mlxsw_sp_fib_ht_params);
4145 }
4146 
4147 static struct mlxsw_sp_fib_node *
4148 mlxsw_sp_fib_node_lookup(struct mlxsw_sp_fib *fib, const void *addr,
4149 			 size_t addr_len, unsigned char prefix_len)
4150 {
4151 	struct mlxsw_sp_fib_key key;
4152 
4153 	memset(&key, 0, sizeof(key));
4154 	memcpy(key.addr, addr, addr_len);
4155 	key.prefix_len = prefix_len;
4156 	return rhashtable_lookup_fast(&fib->ht, &key, mlxsw_sp_fib_ht_params);
4157 }
4158 
4159 static struct mlxsw_sp_fib_node *
4160 mlxsw_sp_fib_node_create(struct mlxsw_sp_fib *fib, const void *addr,
4161 			 size_t addr_len, unsigned char prefix_len)
4162 {
4163 	struct mlxsw_sp_fib_node *fib_node;
4164 
4165 	fib_node = kzalloc(sizeof(*fib_node), GFP_KERNEL);
4166 	if (!fib_node)
4167 		return NULL;
4168 
4169 	INIT_LIST_HEAD(&fib_node->entry_list);
4170 	list_add(&fib_node->list, &fib->node_list);
4171 	memcpy(fib_node->key.addr, addr, addr_len);
4172 	fib_node->key.prefix_len = prefix_len;
4173 
4174 	return fib_node;
4175 }
4176 
4177 static void mlxsw_sp_fib_node_destroy(struct mlxsw_sp_fib_node *fib_node)
4178 {
4179 	list_del(&fib_node->list);
4180 	WARN_ON(!list_empty(&fib_node->entry_list));
4181 	kfree(fib_node);
4182 }
4183 
4184 static bool
4185 mlxsw_sp_fib_node_entry_is_first(const struct mlxsw_sp_fib_node *fib_node,
4186 				 const struct mlxsw_sp_fib_entry *fib_entry)
4187 {
4188 	return list_first_entry(&fib_node->entry_list,
4189 				struct mlxsw_sp_fib_entry, list) == fib_entry;
4190 }
4191 
4192 static int mlxsw_sp_fib_lpm_tree_link(struct mlxsw_sp *mlxsw_sp,
4193 				      struct mlxsw_sp_fib *fib,
4194 				      struct mlxsw_sp_fib_node *fib_node)
4195 {
4196 	struct mlxsw_sp_prefix_usage req_prefix_usage = {{ 0 } };
4197 	struct mlxsw_sp_lpm_tree *lpm_tree;
4198 	int err;
4199 
4200 	/* Since the tree is shared between all virtual routers we must
4201 	 * make sure it contains all the required prefix lengths. This
4202 	 * can be computed by either adding the new prefix length to the
4203 	 * existing prefix usage of a bound tree, or by aggregating the
4204 	 * prefix lengths across all virtual routers and adding the new
4205 	 * one as well.
4206 	 */
4207 	if (fib->lpm_tree)
4208 		mlxsw_sp_prefix_usage_cpy(&req_prefix_usage,
4209 					  &fib->lpm_tree->prefix_usage);
4210 	else
4211 		mlxsw_sp_vrs_prefixes(mlxsw_sp, fib->proto, &req_prefix_usage);
4212 	mlxsw_sp_prefix_usage_set(&req_prefix_usage, fib_node->key.prefix_len);
4213 
4214 	lpm_tree = mlxsw_sp_lpm_tree_get(mlxsw_sp, &req_prefix_usage,
4215 					 fib->proto);
4216 	if (IS_ERR(lpm_tree))
4217 		return PTR_ERR(lpm_tree);
4218 
4219 	if (fib->lpm_tree && fib->lpm_tree->id == lpm_tree->id)
4220 		return 0;
4221 
4222 	err = mlxsw_sp_vrs_lpm_tree_replace(mlxsw_sp, fib, lpm_tree);
4223 	if (err)
4224 		return err;
4225 
4226 	return 0;
4227 }
4228 
4229 static void mlxsw_sp_fib_lpm_tree_unlink(struct mlxsw_sp *mlxsw_sp,
4230 					 struct mlxsw_sp_fib *fib)
4231 {
4232 	if (!mlxsw_sp_prefix_usage_none(&fib->prefix_usage))
4233 		return;
4234 	mlxsw_sp_vr_lpm_tree_unbind(mlxsw_sp, fib);
4235 	mlxsw_sp_lpm_tree_put(mlxsw_sp, fib->lpm_tree);
4236 	fib->lpm_tree = NULL;
4237 }
4238 
4239 static void mlxsw_sp_fib_node_prefix_inc(struct mlxsw_sp_fib_node *fib_node)
4240 {
4241 	unsigned char prefix_len = fib_node->key.prefix_len;
4242 	struct mlxsw_sp_fib *fib = fib_node->fib;
4243 
4244 	if (fib->prefix_ref_count[prefix_len]++ == 0)
4245 		mlxsw_sp_prefix_usage_set(&fib->prefix_usage, prefix_len);
4246 }
4247 
4248 static void mlxsw_sp_fib_node_prefix_dec(struct mlxsw_sp_fib_node *fib_node)
4249 {
4250 	unsigned char prefix_len = fib_node->key.prefix_len;
4251 	struct mlxsw_sp_fib *fib = fib_node->fib;
4252 
4253 	if (--fib->prefix_ref_count[prefix_len] == 0)
4254 		mlxsw_sp_prefix_usage_clear(&fib->prefix_usage, prefix_len);
4255 }
4256 
4257 static int mlxsw_sp_fib_node_init(struct mlxsw_sp *mlxsw_sp,
4258 				  struct mlxsw_sp_fib_node *fib_node,
4259 				  struct mlxsw_sp_fib *fib)
4260 {
4261 	int err;
4262 
4263 	err = mlxsw_sp_fib_node_insert(fib, fib_node);
4264 	if (err)
4265 		return err;
4266 	fib_node->fib = fib;
4267 
4268 	err = mlxsw_sp_fib_lpm_tree_link(mlxsw_sp, fib, fib_node);
4269 	if (err)
4270 		goto err_fib_lpm_tree_link;
4271 
4272 	mlxsw_sp_fib_node_prefix_inc(fib_node);
4273 
4274 	return 0;
4275 
4276 err_fib_lpm_tree_link:
4277 	fib_node->fib = NULL;
4278 	mlxsw_sp_fib_node_remove(fib, fib_node);
4279 	return err;
4280 }
4281 
4282 static void mlxsw_sp_fib_node_fini(struct mlxsw_sp *mlxsw_sp,
4283 				   struct mlxsw_sp_fib_node *fib_node)
4284 {
4285 	struct mlxsw_sp_fib *fib = fib_node->fib;
4286 
4287 	mlxsw_sp_fib_node_prefix_dec(fib_node);
4288 	mlxsw_sp_fib_lpm_tree_unlink(mlxsw_sp, fib);
4289 	fib_node->fib = NULL;
4290 	mlxsw_sp_fib_node_remove(fib, fib_node);
4291 }
4292 
4293 static struct mlxsw_sp_fib_node *
4294 mlxsw_sp_fib_node_get(struct mlxsw_sp *mlxsw_sp, u32 tb_id, const void *addr,
4295 		      size_t addr_len, unsigned char prefix_len,
4296 		      enum mlxsw_sp_l3proto proto)
4297 {
4298 	struct mlxsw_sp_fib_node *fib_node;
4299 	struct mlxsw_sp_fib *fib;
4300 	struct mlxsw_sp_vr *vr;
4301 	int err;
4302 
4303 	vr = mlxsw_sp_vr_get(mlxsw_sp, tb_id, NULL);
4304 	if (IS_ERR(vr))
4305 		return ERR_CAST(vr);
4306 	fib = mlxsw_sp_vr_fib(vr, proto);
4307 
4308 	fib_node = mlxsw_sp_fib_node_lookup(fib, addr, addr_len, prefix_len);
4309 	if (fib_node)
4310 		return fib_node;
4311 
4312 	fib_node = mlxsw_sp_fib_node_create(fib, addr, addr_len, prefix_len);
4313 	if (!fib_node) {
4314 		err = -ENOMEM;
4315 		goto err_fib_node_create;
4316 	}
4317 
4318 	err = mlxsw_sp_fib_node_init(mlxsw_sp, fib_node, fib);
4319 	if (err)
4320 		goto err_fib_node_init;
4321 
4322 	return fib_node;
4323 
4324 err_fib_node_init:
4325 	mlxsw_sp_fib_node_destroy(fib_node);
4326 err_fib_node_create:
4327 	mlxsw_sp_vr_put(vr);
4328 	return ERR_PTR(err);
4329 }
4330 
4331 static void mlxsw_sp_fib_node_put(struct mlxsw_sp *mlxsw_sp,
4332 				  struct mlxsw_sp_fib_node *fib_node)
4333 {
4334 	struct mlxsw_sp_vr *vr = fib_node->fib->vr;
4335 
4336 	if (!list_empty(&fib_node->entry_list))
4337 		return;
4338 	mlxsw_sp_fib_node_fini(mlxsw_sp, fib_node);
4339 	mlxsw_sp_fib_node_destroy(fib_node);
4340 	mlxsw_sp_vr_put(vr);
4341 }
4342 
4343 static struct mlxsw_sp_fib4_entry *
4344 mlxsw_sp_fib4_node_entry_find(const struct mlxsw_sp_fib_node *fib_node,
4345 			      const struct mlxsw_sp_fib4_entry *new4_entry)
4346 {
4347 	struct mlxsw_sp_fib4_entry *fib4_entry;
4348 
4349 	list_for_each_entry(fib4_entry, &fib_node->entry_list, common.list) {
4350 		if (fib4_entry->tb_id > new4_entry->tb_id)
4351 			continue;
4352 		if (fib4_entry->tb_id != new4_entry->tb_id)
4353 			break;
4354 		if (fib4_entry->tos > new4_entry->tos)
4355 			continue;
4356 		if (fib4_entry->prio >= new4_entry->prio ||
4357 		    fib4_entry->tos < new4_entry->tos)
4358 			return fib4_entry;
4359 	}
4360 
4361 	return NULL;
4362 }
4363 
4364 static int
4365 mlxsw_sp_fib4_node_list_append(struct mlxsw_sp_fib4_entry *fib4_entry,
4366 			       struct mlxsw_sp_fib4_entry *new4_entry)
4367 {
4368 	struct mlxsw_sp_fib_node *fib_node;
4369 
4370 	if (WARN_ON(!fib4_entry))
4371 		return -EINVAL;
4372 
4373 	fib_node = fib4_entry->common.fib_node;
4374 	list_for_each_entry_from(fib4_entry, &fib_node->entry_list,
4375 				 common.list) {
4376 		if (fib4_entry->tb_id != new4_entry->tb_id ||
4377 		    fib4_entry->tos != new4_entry->tos ||
4378 		    fib4_entry->prio != new4_entry->prio)
4379 			break;
4380 	}
4381 
4382 	list_add_tail(&new4_entry->common.list, &fib4_entry->common.list);
4383 	return 0;
4384 }
4385 
4386 static int
4387 mlxsw_sp_fib4_node_list_insert(struct mlxsw_sp_fib4_entry *new4_entry,
4388 			       bool replace, bool append)
4389 {
4390 	struct mlxsw_sp_fib_node *fib_node = new4_entry->common.fib_node;
4391 	struct mlxsw_sp_fib4_entry *fib4_entry;
4392 
4393 	fib4_entry = mlxsw_sp_fib4_node_entry_find(fib_node, new4_entry);
4394 
4395 	if (append)
4396 		return mlxsw_sp_fib4_node_list_append(fib4_entry, new4_entry);
4397 	if (replace && WARN_ON(!fib4_entry))
4398 		return -EINVAL;
4399 
4400 	/* Insert new entry before replaced one, so that we can later
4401 	 * remove the second.
4402 	 */
4403 	if (fib4_entry) {
4404 		list_add_tail(&new4_entry->common.list,
4405 			      &fib4_entry->common.list);
4406 	} else {
4407 		struct mlxsw_sp_fib4_entry *last;
4408 
4409 		list_for_each_entry(last, &fib_node->entry_list, common.list) {
4410 			if (new4_entry->tb_id > last->tb_id)
4411 				break;
4412 			fib4_entry = last;
4413 		}
4414 
4415 		if (fib4_entry)
4416 			list_add(&new4_entry->common.list,
4417 				 &fib4_entry->common.list);
4418 		else
4419 			list_add(&new4_entry->common.list,
4420 				 &fib_node->entry_list);
4421 	}
4422 
4423 	return 0;
4424 }
4425 
4426 static void
4427 mlxsw_sp_fib4_node_list_remove(struct mlxsw_sp_fib4_entry *fib4_entry)
4428 {
4429 	list_del(&fib4_entry->common.list);
4430 }
4431 
4432 static int mlxsw_sp_fib_node_entry_add(struct mlxsw_sp *mlxsw_sp,
4433 				       struct mlxsw_sp_fib_entry *fib_entry)
4434 {
4435 	struct mlxsw_sp_fib_node *fib_node = fib_entry->fib_node;
4436 
4437 	if (!mlxsw_sp_fib_node_entry_is_first(fib_node, fib_entry))
4438 		return 0;
4439 
4440 	/* To prevent packet loss, overwrite the previously offloaded
4441 	 * entry.
4442 	 */
4443 	if (!list_is_singular(&fib_node->entry_list)) {
4444 		enum mlxsw_reg_ralue_op op = MLXSW_REG_RALUE_OP_WRITE_DELETE;
4445 		struct mlxsw_sp_fib_entry *n = list_next_entry(fib_entry, list);
4446 
4447 		mlxsw_sp_fib_entry_offload_refresh(n, op, 0);
4448 	}
4449 
4450 	return mlxsw_sp_fib_entry_update(mlxsw_sp, fib_entry);
4451 }
4452 
4453 static void mlxsw_sp_fib_node_entry_del(struct mlxsw_sp *mlxsw_sp,
4454 					struct mlxsw_sp_fib_entry *fib_entry)
4455 {
4456 	struct mlxsw_sp_fib_node *fib_node = fib_entry->fib_node;
4457 
4458 	if (!mlxsw_sp_fib_node_entry_is_first(fib_node, fib_entry))
4459 		return;
4460 
4461 	/* Promote the next entry by overwriting the deleted entry */
4462 	if (!list_is_singular(&fib_node->entry_list)) {
4463 		struct mlxsw_sp_fib_entry *n = list_next_entry(fib_entry, list);
4464 		enum mlxsw_reg_ralue_op op = MLXSW_REG_RALUE_OP_WRITE_DELETE;
4465 
4466 		mlxsw_sp_fib_entry_update(mlxsw_sp, n);
4467 		mlxsw_sp_fib_entry_offload_refresh(fib_entry, op, 0);
4468 		return;
4469 	}
4470 
4471 	mlxsw_sp_fib_entry_del(mlxsw_sp, fib_entry);
4472 }
4473 
4474 static int mlxsw_sp_fib4_node_entry_link(struct mlxsw_sp *mlxsw_sp,
4475 					 struct mlxsw_sp_fib4_entry *fib4_entry,
4476 					 bool replace, bool append)
4477 {
4478 	int err;
4479 
4480 	err = mlxsw_sp_fib4_node_list_insert(fib4_entry, replace, append);
4481 	if (err)
4482 		return err;
4483 
4484 	err = mlxsw_sp_fib_node_entry_add(mlxsw_sp, &fib4_entry->common);
4485 	if (err)
4486 		goto err_fib_node_entry_add;
4487 
4488 	return 0;
4489 
4490 err_fib_node_entry_add:
4491 	mlxsw_sp_fib4_node_list_remove(fib4_entry);
4492 	return err;
4493 }
4494 
4495 static void
4496 mlxsw_sp_fib4_node_entry_unlink(struct mlxsw_sp *mlxsw_sp,
4497 				struct mlxsw_sp_fib4_entry *fib4_entry)
4498 {
4499 	mlxsw_sp_fib_node_entry_del(mlxsw_sp, &fib4_entry->common);
4500 	mlxsw_sp_fib4_node_list_remove(fib4_entry);
4501 
4502 	if (fib4_entry->common.type == MLXSW_SP_FIB_ENTRY_TYPE_IPIP_DECAP)
4503 		mlxsw_sp_fib_entry_decap_fini(mlxsw_sp, &fib4_entry->common);
4504 }
4505 
4506 static void mlxsw_sp_fib4_entry_replace(struct mlxsw_sp *mlxsw_sp,
4507 					struct mlxsw_sp_fib4_entry *fib4_entry,
4508 					bool replace)
4509 {
4510 	struct mlxsw_sp_fib_node *fib_node = fib4_entry->common.fib_node;
4511 	struct mlxsw_sp_fib4_entry *replaced;
4512 
4513 	if (!replace)
4514 		return;
4515 
4516 	/* We inserted the new entry before replaced one */
4517 	replaced = list_next_entry(fib4_entry, common.list);
4518 
4519 	mlxsw_sp_fib4_node_entry_unlink(mlxsw_sp, replaced);
4520 	mlxsw_sp_fib4_entry_destroy(mlxsw_sp, replaced);
4521 	mlxsw_sp_fib_node_put(mlxsw_sp, fib_node);
4522 }
4523 
4524 static int
4525 mlxsw_sp_router_fib4_add(struct mlxsw_sp *mlxsw_sp,
4526 			 const struct fib_entry_notifier_info *fen_info,
4527 			 bool replace, bool append)
4528 {
4529 	struct mlxsw_sp_fib4_entry *fib4_entry;
4530 	struct mlxsw_sp_fib_node *fib_node;
4531 	int err;
4532 
4533 	if (mlxsw_sp->router->aborted)
4534 		return 0;
4535 
4536 	fib_node = mlxsw_sp_fib_node_get(mlxsw_sp, fen_info->tb_id,
4537 					 &fen_info->dst, sizeof(fen_info->dst),
4538 					 fen_info->dst_len,
4539 					 MLXSW_SP_L3_PROTO_IPV4);
4540 	if (IS_ERR(fib_node)) {
4541 		dev_warn(mlxsw_sp->bus_info->dev, "Failed to get FIB node\n");
4542 		return PTR_ERR(fib_node);
4543 	}
4544 
4545 	fib4_entry = mlxsw_sp_fib4_entry_create(mlxsw_sp, fib_node, fen_info);
4546 	if (IS_ERR(fib4_entry)) {
4547 		dev_warn(mlxsw_sp->bus_info->dev, "Failed to create FIB entry\n");
4548 		err = PTR_ERR(fib4_entry);
4549 		goto err_fib4_entry_create;
4550 	}
4551 
4552 	err = mlxsw_sp_fib4_node_entry_link(mlxsw_sp, fib4_entry, replace,
4553 					    append);
4554 	if (err) {
4555 		dev_warn(mlxsw_sp->bus_info->dev, "Failed to link FIB entry to node\n");
4556 		goto err_fib4_node_entry_link;
4557 	}
4558 
4559 	mlxsw_sp_fib4_entry_replace(mlxsw_sp, fib4_entry, replace);
4560 
4561 	return 0;
4562 
4563 err_fib4_node_entry_link:
4564 	mlxsw_sp_fib4_entry_destroy(mlxsw_sp, fib4_entry);
4565 err_fib4_entry_create:
4566 	mlxsw_sp_fib_node_put(mlxsw_sp, fib_node);
4567 	return err;
4568 }
4569 
4570 static void mlxsw_sp_router_fib4_del(struct mlxsw_sp *mlxsw_sp,
4571 				     struct fib_entry_notifier_info *fen_info)
4572 {
4573 	struct mlxsw_sp_fib4_entry *fib4_entry;
4574 	struct mlxsw_sp_fib_node *fib_node;
4575 
4576 	if (mlxsw_sp->router->aborted)
4577 		return;
4578 
4579 	fib4_entry = mlxsw_sp_fib4_entry_lookup(mlxsw_sp, fen_info);
4580 	if (WARN_ON(!fib4_entry))
4581 		return;
4582 	fib_node = fib4_entry->common.fib_node;
4583 
4584 	mlxsw_sp_fib4_node_entry_unlink(mlxsw_sp, fib4_entry);
4585 	mlxsw_sp_fib4_entry_destroy(mlxsw_sp, fib4_entry);
4586 	mlxsw_sp_fib_node_put(mlxsw_sp, fib_node);
4587 }
4588 
4589 static bool mlxsw_sp_fib6_rt_should_ignore(const struct rt6_info *rt)
4590 {
4591 	/* Packets with link-local destination IP arriving to the router
4592 	 * are trapped to the CPU, so no need to program specific routes
4593 	 * for them.
4594 	 */
4595 	if (ipv6_addr_type(&rt->rt6i_dst.addr) & IPV6_ADDR_LINKLOCAL)
4596 		return true;
4597 
4598 	/* Multicast routes aren't supported, so ignore them. Neighbour
4599 	 * Discovery packets are specifically trapped.
4600 	 */
4601 	if (ipv6_addr_type(&rt->rt6i_dst.addr) & IPV6_ADDR_MULTICAST)
4602 		return true;
4603 
4604 	/* Cloned routes are irrelevant in the forwarding path. */
4605 	if (rt->rt6i_flags & RTF_CACHE)
4606 		return true;
4607 
4608 	return false;
4609 }
4610 
4611 static struct mlxsw_sp_rt6 *mlxsw_sp_rt6_create(struct rt6_info *rt)
4612 {
4613 	struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
4614 
4615 	mlxsw_sp_rt6 = kzalloc(sizeof(*mlxsw_sp_rt6), GFP_KERNEL);
4616 	if (!mlxsw_sp_rt6)
4617 		return ERR_PTR(-ENOMEM);
4618 
4619 	/* In case of route replace, replaced route is deleted with
4620 	 * no notification. Take reference to prevent accessing freed
4621 	 * memory.
4622 	 */
4623 	mlxsw_sp_rt6->rt = rt;
4624 	rt6_hold(rt);
4625 
4626 	return mlxsw_sp_rt6;
4627 }
4628 
4629 #if IS_ENABLED(CONFIG_IPV6)
4630 static void mlxsw_sp_rt6_release(struct rt6_info *rt)
4631 {
4632 	rt6_release(rt);
4633 }
4634 #else
4635 static void mlxsw_sp_rt6_release(struct rt6_info *rt)
4636 {
4637 }
4638 #endif
4639 
4640 static void mlxsw_sp_rt6_destroy(struct mlxsw_sp_rt6 *mlxsw_sp_rt6)
4641 {
4642 	mlxsw_sp_rt6_release(mlxsw_sp_rt6->rt);
4643 	kfree(mlxsw_sp_rt6);
4644 }
4645 
4646 static bool mlxsw_sp_fib6_rt_can_mp(const struct rt6_info *rt)
4647 {
4648 	/* RTF_CACHE routes are ignored */
4649 	return (rt->rt6i_flags & (RTF_GATEWAY | RTF_ADDRCONF)) == RTF_GATEWAY;
4650 }
4651 
4652 static struct rt6_info *
4653 mlxsw_sp_fib6_entry_rt(const struct mlxsw_sp_fib6_entry *fib6_entry)
4654 {
4655 	return list_first_entry(&fib6_entry->rt6_list, struct mlxsw_sp_rt6,
4656 				list)->rt;
4657 }
4658 
4659 static struct mlxsw_sp_fib6_entry *
4660 mlxsw_sp_fib6_node_mp_entry_find(const struct mlxsw_sp_fib_node *fib_node,
4661 				 const struct rt6_info *nrt, bool replace)
4662 {
4663 	struct mlxsw_sp_fib6_entry *fib6_entry;
4664 
4665 	if (!mlxsw_sp_fib6_rt_can_mp(nrt) || replace)
4666 		return NULL;
4667 
4668 	list_for_each_entry(fib6_entry, &fib_node->entry_list, common.list) {
4669 		struct rt6_info *rt = mlxsw_sp_fib6_entry_rt(fib6_entry);
4670 
4671 		/* RT6_TABLE_LOCAL and RT6_TABLE_MAIN share the same
4672 		 * virtual router.
4673 		 */
4674 		if (rt->rt6i_table->tb6_id > nrt->rt6i_table->tb6_id)
4675 			continue;
4676 		if (rt->rt6i_table->tb6_id != nrt->rt6i_table->tb6_id)
4677 			break;
4678 		if (rt->rt6i_metric < nrt->rt6i_metric)
4679 			continue;
4680 		if (rt->rt6i_metric == nrt->rt6i_metric &&
4681 		    mlxsw_sp_fib6_rt_can_mp(rt))
4682 			return fib6_entry;
4683 		if (rt->rt6i_metric > nrt->rt6i_metric)
4684 			break;
4685 	}
4686 
4687 	return NULL;
4688 }
4689 
4690 static struct mlxsw_sp_rt6 *
4691 mlxsw_sp_fib6_entry_rt_find(const struct mlxsw_sp_fib6_entry *fib6_entry,
4692 			    const struct rt6_info *rt)
4693 {
4694 	struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
4695 
4696 	list_for_each_entry(mlxsw_sp_rt6, &fib6_entry->rt6_list, list) {
4697 		if (mlxsw_sp_rt6->rt == rt)
4698 			return mlxsw_sp_rt6;
4699 	}
4700 
4701 	return NULL;
4702 }
4703 
4704 static bool mlxsw_sp_nexthop6_ipip_type(const struct mlxsw_sp *mlxsw_sp,
4705 					const struct rt6_info *rt,
4706 					enum mlxsw_sp_ipip_type *ret)
4707 {
4708 	return rt->dst.dev &&
4709 	       mlxsw_sp_netdev_ipip_type(mlxsw_sp, rt->dst.dev, ret);
4710 }
4711 
4712 static int mlxsw_sp_nexthop6_type_init(struct mlxsw_sp *mlxsw_sp,
4713 				       struct mlxsw_sp_nexthop_group *nh_grp,
4714 				       struct mlxsw_sp_nexthop *nh,
4715 				       const struct rt6_info *rt)
4716 {
4717 	const struct mlxsw_sp_ipip_ops *ipip_ops;
4718 	struct mlxsw_sp_ipip_entry *ipip_entry;
4719 	struct net_device *dev = rt->dst.dev;
4720 	struct mlxsw_sp_rif *rif;
4721 	int err;
4722 
4723 	ipip_entry = mlxsw_sp_ipip_entry_find_by_ol_dev(mlxsw_sp, dev);
4724 	if (ipip_entry) {
4725 		ipip_ops = mlxsw_sp->router->ipip_ops_arr[ipip_entry->ipipt];
4726 		if (ipip_ops->can_offload(mlxsw_sp, dev,
4727 					  MLXSW_SP_L3_PROTO_IPV6)) {
4728 			nh->type = MLXSW_SP_NEXTHOP_TYPE_IPIP;
4729 			mlxsw_sp_nexthop_ipip_init(mlxsw_sp, nh, ipip_entry);
4730 			return 0;
4731 		}
4732 	}
4733 
4734 	nh->type = MLXSW_SP_NEXTHOP_TYPE_ETH;
4735 	rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev);
4736 	if (!rif)
4737 		return 0;
4738 	mlxsw_sp_nexthop_rif_init(nh, rif);
4739 
4740 	err = mlxsw_sp_nexthop_neigh_init(mlxsw_sp, nh);
4741 	if (err)
4742 		goto err_nexthop_neigh_init;
4743 
4744 	return 0;
4745 
4746 err_nexthop_neigh_init:
4747 	mlxsw_sp_nexthop_rif_fini(nh);
4748 	return err;
4749 }
4750 
4751 static void mlxsw_sp_nexthop6_type_fini(struct mlxsw_sp *mlxsw_sp,
4752 					struct mlxsw_sp_nexthop *nh)
4753 {
4754 	mlxsw_sp_nexthop_type_fini(mlxsw_sp, nh);
4755 }
4756 
4757 static int mlxsw_sp_nexthop6_init(struct mlxsw_sp *mlxsw_sp,
4758 				  struct mlxsw_sp_nexthop_group *nh_grp,
4759 				  struct mlxsw_sp_nexthop *nh,
4760 				  const struct rt6_info *rt)
4761 {
4762 	struct net_device *dev = rt->dst.dev;
4763 
4764 	nh->nh_grp = nh_grp;
4765 	nh->nh_weight = 1;
4766 	memcpy(&nh->gw_addr, &rt->rt6i_gateway, sizeof(nh->gw_addr));
4767 	mlxsw_sp_nexthop_counter_alloc(mlxsw_sp, nh);
4768 
4769 	list_add_tail(&nh->router_list_node, &mlxsw_sp->router->nexthop_list);
4770 
4771 	if (!dev)
4772 		return 0;
4773 	nh->ifindex = dev->ifindex;
4774 
4775 	return mlxsw_sp_nexthop6_type_init(mlxsw_sp, nh_grp, nh, rt);
4776 }
4777 
4778 static void mlxsw_sp_nexthop6_fini(struct mlxsw_sp *mlxsw_sp,
4779 				   struct mlxsw_sp_nexthop *nh)
4780 {
4781 	mlxsw_sp_nexthop6_type_fini(mlxsw_sp, nh);
4782 	list_del(&nh->router_list_node);
4783 	mlxsw_sp_nexthop_counter_free(mlxsw_sp, nh);
4784 }
4785 
4786 static bool mlxsw_sp_rt6_is_gateway(const struct mlxsw_sp *mlxsw_sp,
4787 				    const struct rt6_info *rt)
4788 {
4789 	return rt->rt6i_flags & RTF_GATEWAY ||
4790 	       mlxsw_sp_nexthop6_ipip_type(mlxsw_sp, rt, NULL);
4791 }
4792 
4793 static struct mlxsw_sp_nexthop_group *
4794 mlxsw_sp_nexthop6_group_create(struct mlxsw_sp *mlxsw_sp,
4795 			       struct mlxsw_sp_fib6_entry *fib6_entry)
4796 {
4797 	struct mlxsw_sp_nexthop_group *nh_grp;
4798 	struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
4799 	struct mlxsw_sp_nexthop *nh;
4800 	size_t alloc_size;
4801 	int i = 0;
4802 	int err;
4803 
4804 	alloc_size = sizeof(*nh_grp) +
4805 		     fib6_entry->nrt6 * sizeof(struct mlxsw_sp_nexthop);
4806 	nh_grp = kzalloc(alloc_size, GFP_KERNEL);
4807 	if (!nh_grp)
4808 		return ERR_PTR(-ENOMEM);
4809 	INIT_LIST_HEAD(&nh_grp->fib_list);
4810 #if IS_ENABLED(CONFIG_IPV6)
4811 	nh_grp->neigh_tbl = &nd_tbl;
4812 #endif
4813 	mlxsw_sp_rt6 = list_first_entry(&fib6_entry->rt6_list,
4814 					struct mlxsw_sp_rt6, list);
4815 	nh_grp->gateway = mlxsw_sp_rt6_is_gateway(mlxsw_sp, mlxsw_sp_rt6->rt);
4816 	nh_grp->count = fib6_entry->nrt6;
4817 	for (i = 0; i < nh_grp->count; i++) {
4818 		struct rt6_info *rt = mlxsw_sp_rt6->rt;
4819 
4820 		nh = &nh_grp->nexthops[i];
4821 		err = mlxsw_sp_nexthop6_init(mlxsw_sp, nh_grp, nh, rt);
4822 		if (err)
4823 			goto err_nexthop6_init;
4824 		mlxsw_sp_rt6 = list_next_entry(mlxsw_sp_rt6, list);
4825 	}
4826 
4827 	err = mlxsw_sp_nexthop_group_insert(mlxsw_sp, nh_grp);
4828 	if (err)
4829 		goto err_nexthop_group_insert;
4830 
4831 	mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh_grp);
4832 	return nh_grp;
4833 
4834 err_nexthop_group_insert:
4835 err_nexthop6_init:
4836 	for (i--; i >= 0; i--) {
4837 		nh = &nh_grp->nexthops[i];
4838 		mlxsw_sp_nexthop6_fini(mlxsw_sp, nh);
4839 	}
4840 	kfree(nh_grp);
4841 	return ERR_PTR(err);
4842 }
4843 
4844 static void
4845 mlxsw_sp_nexthop6_group_destroy(struct mlxsw_sp *mlxsw_sp,
4846 				struct mlxsw_sp_nexthop_group *nh_grp)
4847 {
4848 	struct mlxsw_sp_nexthop *nh;
4849 	int i = nh_grp->count;
4850 
4851 	mlxsw_sp_nexthop_group_remove(mlxsw_sp, nh_grp);
4852 	for (i--; i >= 0; i--) {
4853 		nh = &nh_grp->nexthops[i];
4854 		mlxsw_sp_nexthop6_fini(mlxsw_sp, nh);
4855 	}
4856 	mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh_grp);
4857 	WARN_ON(nh_grp->adj_index_valid);
4858 	kfree(nh_grp);
4859 }
4860 
4861 static int mlxsw_sp_nexthop6_group_get(struct mlxsw_sp *mlxsw_sp,
4862 				       struct mlxsw_sp_fib6_entry *fib6_entry)
4863 {
4864 	struct mlxsw_sp_nexthop_group *nh_grp;
4865 
4866 	nh_grp = mlxsw_sp_nexthop6_group_lookup(mlxsw_sp, fib6_entry);
4867 	if (!nh_grp) {
4868 		nh_grp = mlxsw_sp_nexthop6_group_create(mlxsw_sp, fib6_entry);
4869 		if (IS_ERR(nh_grp))
4870 			return PTR_ERR(nh_grp);
4871 	}
4872 
4873 	list_add_tail(&fib6_entry->common.nexthop_group_node,
4874 		      &nh_grp->fib_list);
4875 	fib6_entry->common.nh_group = nh_grp;
4876 
4877 	return 0;
4878 }
4879 
4880 static void mlxsw_sp_nexthop6_group_put(struct mlxsw_sp *mlxsw_sp,
4881 					struct mlxsw_sp_fib_entry *fib_entry)
4882 {
4883 	struct mlxsw_sp_nexthop_group *nh_grp = fib_entry->nh_group;
4884 
4885 	list_del(&fib_entry->nexthop_group_node);
4886 	if (!list_empty(&nh_grp->fib_list))
4887 		return;
4888 	mlxsw_sp_nexthop6_group_destroy(mlxsw_sp, nh_grp);
4889 }
4890 
4891 static int
4892 mlxsw_sp_nexthop6_group_update(struct mlxsw_sp *mlxsw_sp,
4893 			       struct mlxsw_sp_fib6_entry *fib6_entry)
4894 {
4895 	struct mlxsw_sp_nexthop_group *old_nh_grp = fib6_entry->common.nh_group;
4896 	int err;
4897 
4898 	fib6_entry->common.nh_group = NULL;
4899 	list_del(&fib6_entry->common.nexthop_group_node);
4900 
4901 	err = mlxsw_sp_nexthop6_group_get(mlxsw_sp, fib6_entry);
4902 	if (err)
4903 		goto err_nexthop6_group_get;
4904 
4905 	/* In case this entry is offloaded, then the adjacency index
4906 	 * currently associated with it in the device's table is that
4907 	 * of the old group. Start using the new one instead.
4908 	 */
4909 	err = mlxsw_sp_fib_node_entry_add(mlxsw_sp, &fib6_entry->common);
4910 	if (err)
4911 		goto err_fib_node_entry_add;
4912 
4913 	if (list_empty(&old_nh_grp->fib_list))
4914 		mlxsw_sp_nexthop6_group_destroy(mlxsw_sp, old_nh_grp);
4915 
4916 	return 0;
4917 
4918 err_fib_node_entry_add:
4919 	mlxsw_sp_nexthop6_group_put(mlxsw_sp, &fib6_entry->common);
4920 err_nexthop6_group_get:
4921 	list_add_tail(&fib6_entry->common.nexthop_group_node,
4922 		      &old_nh_grp->fib_list);
4923 	fib6_entry->common.nh_group = old_nh_grp;
4924 	return err;
4925 }
4926 
4927 static int
4928 mlxsw_sp_fib6_entry_nexthop_add(struct mlxsw_sp *mlxsw_sp,
4929 				struct mlxsw_sp_fib6_entry *fib6_entry,
4930 				struct rt6_info *rt)
4931 {
4932 	struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
4933 	int err;
4934 
4935 	mlxsw_sp_rt6 = mlxsw_sp_rt6_create(rt);
4936 	if (IS_ERR(mlxsw_sp_rt6))
4937 		return PTR_ERR(mlxsw_sp_rt6);
4938 
4939 	list_add_tail(&mlxsw_sp_rt6->list, &fib6_entry->rt6_list);
4940 	fib6_entry->nrt6++;
4941 
4942 	err = mlxsw_sp_nexthop6_group_update(mlxsw_sp, fib6_entry);
4943 	if (err)
4944 		goto err_nexthop6_group_update;
4945 
4946 	return 0;
4947 
4948 err_nexthop6_group_update:
4949 	fib6_entry->nrt6--;
4950 	list_del(&mlxsw_sp_rt6->list);
4951 	mlxsw_sp_rt6_destroy(mlxsw_sp_rt6);
4952 	return err;
4953 }
4954 
4955 static void
4956 mlxsw_sp_fib6_entry_nexthop_del(struct mlxsw_sp *mlxsw_sp,
4957 				struct mlxsw_sp_fib6_entry *fib6_entry,
4958 				struct rt6_info *rt)
4959 {
4960 	struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
4961 
4962 	mlxsw_sp_rt6 = mlxsw_sp_fib6_entry_rt_find(fib6_entry, rt);
4963 	if (WARN_ON(!mlxsw_sp_rt6))
4964 		return;
4965 
4966 	fib6_entry->nrt6--;
4967 	list_del(&mlxsw_sp_rt6->list);
4968 	mlxsw_sp_nexthop6_group_update(mlxsw_sp, fib6_entry);
4969 	mlxsw_sp_rt6_destroy(mlxsw_sp_rt6);
4970 }
4971 
4972 static void mlxsw_sp_fib6_entry_type_set(struct mlxsw_sp *mlxsw_sp,
4973 					 struct mlxsw_sp_fib_entry *fib_entry,
4974 					 const struct rt6_info *rt)
4975 {
4976 	/* Packets hitting RTF_REJECT routes need to be discarded by the
4977 	 * stack. We can rely on their destination device not having a
4978 	 * RIF (it's the loopback device) and can thus use action type
4979 	 * local, which will cause them to be trapped with a lower
4980 	 * priority than packets that need to be locally received.
4981 	 */
4982 	if (rt->rt6i_flags & (RTF_LOCAL | RTF_ANYCAST))
4983 		fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_TRAP;
4984 	else if (rt->rt6i_flags & RTF_REJECT)
4985 		fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_LOCAL;
4986 	else if (mlxsw_sp_rt6_is_gateway(mlxsw_sp, rt))
4987 		fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_REMOTE;
4988 	else
4989 		fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_LOCAL;
4990 }
4991 
4992 static void
4993 mlxsw_sp_fib6_entry_rt_destroy_all(struct mlxsw_sp_fib6_entry *fib6_entry)
4994 {
4995 	struct mlxsw_sp_rt6 *mlxsw_sp_rt6, *tmp;
4996 
4997 	list_for_each_entry_safe(mlxsw_sp_rt6, tmp, &fib6_entry->rt6_list,
4998 				 list) {
4999 		fib6_entry->nrt6--;
5000 		list_del(&mlxsw_sp_rt6->list);
5001 		mlxsw_sp_rt6_destroy(mlxsw_sp_rt6);
5002 	}
5003 }
5004 
5005 static struct mlxsw_sp_fib6_entry *
5006 mlxsw_sp_fib6_entry_create(struct mlxsw_sp *mlxsw_sp,
5007 			   struct mlxsw_sp_fib_node *fib_node,
5008 			   struct rt6_info *rt)
5009 {
5010 	struct mlxsw_sp_fib6_entry *fib6_entry;
5011 	struct mlxsw_sp_fib_entry *fib_entry;
5012 	struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
5013 	int err;
5014 
5015 	fib6_entry = kzalloc(sizeof(*fib6_entry), GFP_KERNEL);
5016 	if (!fib6_entry)
5017 		return ERR_PTR(-ENOMEM);
5018 	fib_entry = &fib6_entry->common;
5019 
5020 	mlxsw_sp_rt6 = mlxsw_sp_rt6_create(rt);
5021 	if (IS_ERR(mlxsw_sp_rt6)) {
5022 		err = PTR_ERR(mlxsw_sp_rt6);
5023 		goto err_rt6_create;
5024 	}
5025 
5026 	mlxsw_sp_fib6_entry_type_set(mlxsw_sp, fib_entry, mlxsw_sp_rt6->rt);
5027 
5028 	INIT_LIST_HEAD(&fib6_entry->rt6_list);
5029 	list_add_tail(&mlxsw_sp_rt6->list, &fib6_entry->rt6_list);
5030 	fib6_entry->nrt6 = 1;
5031 	err = mlxsw_sp_nexthop6_group_get(mlxsw_sp, fib6_entry);
5032 	if (err)
5033 		goto err_nexthop6_group_get;
5034 
5035 	fib_entry->fib_node = fib_node;
5036 
5037 	return fib6_entry;
5038 
5039 err_nexthop6_group_get:
5040 	list_del(&mlxsw_sp_rt6->list);
5041 	mlxsw_sp_rt6_destroy(mlxsw_sp_rt6);
5042 err_rt6_create:
5043 	kfree(fib6_entry);
5044 	return ERR_PTR(err);
5045 }
5046 
5047 static void mlxsw_sp_fib6_entry_destroy(struct mlxsw_sp *mlxsw_sp,
5048 					struct mlxsw_sp_fib6_entry *fib6_entry)
5049 {
5050 	mlxsw_sp_nexthop6_group_put(mlxsw_sp, &fib6_entry->common);
5051 	mlxsw_sp_fib6_entry_rt_destroy_all(fib6_entry);
5052 	WARN_ON(fib6_entry->nrt6);
5053 	kfree(fib6_entry);
5054 }
5055 
5056 static struct mlxsw_sp_fib6_entry *
5057 mlxsw_sp_fib6_node_entry_find(const struct mlxsw_sp_fib_node *fib_node,
5058 			      const struct rt6_info *nrt, bool replace)
5059 {
5060 	struct mlxsw_sp_fib6_entry *fib6_entry, *fallback = NULL;
5061 
5062 	list_for_each_entry(fib6_entry, &fib_node->entry_list, common.list) {
5063 		struct rt6_info *rt = mlxsw_sp_fib6_entry_rt(fib6_entry);
5064 
5065 		if (rt->rt6i_table->tb6_id > nrt->rt6i_table->tb6_id)
5066 			continue;
5067 		if (rt->rt6i_table->tb6_id != nrt->rt6i_table->tb6_id)
5068 			break;
5069 		if (replace && rt->rt6i_metric == nrt->rt6i_metric) {
5070 			if (mlxsw_sp_fib6_rt_can_mp(rt) ==
5071 			    mlxsw_sp_fib6_rt_can_mp(nrt))
5072 				return fib6_entry;
5073 			if (mlxsw_sp_fib6_rt_can_mp(nrt))
5074 				fallback = fallback ?: fib6_entry;
5075 		}
5076 		if (rt->rt6i_metric > nrt->rt6i_metric)
5077 			return fallback ?: fib6_entry;
5078 	}
5079 
5080 	return fallback;
5081 }
5082 
5083 static int
5084 mlxsw_sp_fib6_node_list_insert(struct mlxsw_sp_fib6_entry *new6_entry,
5085 			       bool replace)
5086 {
5087 	struct mlxsw_sp_fib_node *fib_node = new6_entry->common.fib_node;
5088 	struct rt6_info *nrt = mlxsw_sp_fib6_entry_rt(new6_entry);
5089 	struct mlxsw_sp_fib6_entry *fib6_entry;
5090 
5091 	fib6_entry = mlxsw_sp_fib6_node_entry_find(fib_node, nrt, replace);
5092 
5093 	if (replace && WARN_ON(!fib6_entry))
5094 		return -EINVAL;
5095 
5096 	if (fib6_entry) {
5097 		list_add_tail(&new6_entry->common.list,
5098 			      &fib6_entry->common.list);
5099 	} else {
5100 		struct mlxsw_sp_fib6_entry *last;
5101 
5102 		list_for_each_entry(last, &fib_node->entry_list, common.list) {
5103 			struct rt6_info *rt = mlxsw_sp_fib6_entry_rt(last);
5104 
5105 			if (nrt->rt6i_table->tb6_id > rt->rt6i_table->tb6_id)
5106 				break;
5107 			fib6_entry = last;
5108 		}
5109 
5110 		if (fib6_entry)
5111 			list_add(&new6_entry->common.list,
5112 				 &fib6_entry->common.list);
5113 		else
5114 			list_add(&new6_entry->common.list,
5115 				 &fib_node->entry_list);
5116 	}
5117 
5118 	return 0;
5119 }
5120 
5121 static void
5122 mlxsw_sp_fib6_node_list_remove(struct mlxsw_sp_fib6_entry *fib6_entry)
5123 {
5124 	list_del(&fib6_entry->common.list);
5125 }
5126 
5127 static int mlxsw_sp_fib6_node_entry_link(struct mlxsw_sp *mlxsw_sp,
5128 					 struct mlxsw_sp_fib6_entry *fib6_entry,
5129 					 bool replace)
5130 {
5131 	int err;
5132 
5133 	err = mlxsw_sp_fib6_node_list_insert(fib6_entry, replace);
5134 	if (err)
5135 		return err;
5136 
5137 	err = mlxsw_sp_fib_node_entry_add(mlxsw_sp, &fib6_entry->common);
5138 	if (err)
5139 		goto err_fib_node_entry_add;
5140 
5141 	return 0;
5142 
5143 err_fib_node_entry_add:
5144 	mlxsw_sp_fib6_node_list_remove(fib6_entry);
5145 	return err;
5146 }
5147 
5148 static void
5149 mlxsw_sp_fib6_node_entry_unlink(struct mlxsw_sp *mlxsw_sp,
5150 				struct mlxsw_sp_fib6_entry *fib6_entry)
5151 {
5152 	mlxsw_sp_fib_node_entry_del(mlxsw_sp, &fib6_entry->common);
5153 	mlxsw_sp_fib6_node_list_remove(fib6_entry);
5154 }
5155 
5156 static struct mlxsw_sp_fib6_entry *
5157 mlxsw_sp_fib6_entry_lookup(struct mlxsw_sp *mlxsw_sp,
5158 			   const struct rt6_info *rt)
5159 {
5160 	struct mlxsw_sp_fib6_entry *fib6_entry;
5161 	struct mlxsw_sp_fib_node *fib_node;
5162 	struct mlxsw_sp_fib *fib;
5163 	struct mlxsw_sp_vr *vr;
5164 
5165 	vr = mlxsw_sp_vr_find(mlxsw_sp, rt->rt6i_table->tb6_id);
5166 	if (!vr)
5167 		return NULL;
5168 	fib = mlxsw_sp_vr_fib(vr, MLXSW_SP_L3_PROTO_IPV6);
5169 
5170 	fib_node = mlxsw_sp_fib_node_lookup(fib, &rt->rt6i_dst.addr,
5171 					    sizeof(rt->rt6i_dst.addr),
5172 					    rt->rt6i_dst.plen);
5173 	if (!fib_node)
5174 		return NULL;
5175 
5176 	list_for_each_entry(fib6_entry, &fib_node->entry_list, common.list) {
5177 		struct rt6_info *iter_rt = mlxsw_sp_fib6_entry_rt(fib6_entry);
5178 
5179 		if (rt->rt6i_table->tb6_id == iter_rt->rt6i_table->tb6_id &&
5180 		    rt->rt6i_metric == iter_rt->rt6i_metric &&
5181 		    mlxsw_sp_fib6_entry_rt_find(fib6_entry, rt))
5182 			return fib6_entry;
5183 	}
5184 
5185 	return NULL;
5186 }
5187 
5188 static void mlxsw_sp_fib6_entry_replace(struct mlxsw_sp *mlxsw_sp,
5189 					struct mlxsw_sp_fib6_entry *fib6_entry,
5190 					bool replace)
5191 {
5192 	struct mlxsw_sp_fib_node *fib_node = fib6_entry->common.fib_node;
5193 	struct mlxsw_sp_fib6_entry *replaced;
5194 
5195 	if (!replace)
5196 		return;
5197 
5198 	replaced = list_next_entry(fib6_entry, common.list);
5199 
5200 	mlxsw_sp_fib6_node_entry_unlink(mlxsw_sp, replaced);
5201 	mlxsw_sp_fib6_entry_destroy(mlxsw_sp, replaced);
5202 	mlxsw_sp_fib_node_put(mlxsw_sp, fib_node);
5203 }
5204 
5205 static int mlxsw_sp_router_fib6_add(struct mlxsw_sp *mlxsw_sp,
5206 				    struct rt6_info *rt, bool replace)
5207 {
5208 	struct mlxsw_sp_fib6_entry *fib6_entry;
5209 	struct mlxsw_sp_fib_node *fib_node;
5210 	int err;
5211 
5212 	if (mlxsw_sp->router->aborted)
5213 		return 0;
5214 
5215 	if (rt->rt6i_src.plen)
5216 		return -EINVAL;
5217 
5218 	if (mlxsw_sp_fib6_rt_should_ignore(rt))
5219 		return 0;
5220 
5221 	fib_node = mlxsw_sp_fib_node_get(mlxsw_sp, rt->rt6i_table->tb6_id,
5222 					 &rt->rt6i_dst.addr,
5223 					 sizeof(rt->rt6i_dst.addr),
5224 					 rt->rt6i_dst.plen,
5225 					 MLXSW_SP_L3_PROTO_IPV6);
5226 	if (IS_ERR(fib_node))
5227 		return PTR_ERR(fib_node);
5228 
5229 	/* Before creating a new entry, try to append route to an existing
5230 	 * multipath entry.
5231 	 */
5232 	fib6_entry = mlxsw_sp_fib6_node_mp_entry_find(fib_node, rt, replace);
5233 	if (fib6_entry) {
5234 		err = mlxsw_sp_fib6_entry_nexthop_add(mlxsw_sp, fib6_entry, rt);
5235 		if (err)
5236 			goto err_fib6_entry_nexthop_add;
5237 		return 0;
5238 	}
5239 
5240 	fib6_entry = mlxsw_sp_fib6_entry_create(mlxsw_sp, fib_node, rt);
5241 	if (IS_ERR(fib6_entry)) {
5242 		err = PTR_ERR(fib6_entry);
5243 		goto err_fib6_entry_create;
5244 	}
5245 
5246 	err = mlxsw_sp_fib6_node_entry_link(mlxsw_sp, fib6_entry, replace);
5247 	if (err)
5248 		goto err_fib6_node_entry_link;
5249 
5250 	mlxsw_sp_fib6_entry_replace(mlxsw_sp, fib6_entry, replace);
5251 
5252 	return 0;
5253 
5254 err_fib6_node_entry_link:
5255 	mlxsw_sp_fib6_entry_destroy(mlxsw_sp, fib6_entry);
5256 err_fib6_entry_create:
5257 err_fib6_entry_nexthop_add:
5258 	mlxsw_sp_fib_node_put(mlxsw_sp, fib_node);
5259 	return err;
5260 }
5261 
5262 static void mlxsw_sp_router_fib6_del(struct mlxsw_sp *mlxsw_sp,
5263 				     struct rt6_info *rt)
5264 {
5265 	struct mlxsw_sp_fib6_entry *fib6_entry;
5266 	struct mlxsw_sp_fib_node *fib_node;
5267 
5268 	if (mlxsw_sp->router->aborted)
5269 		return;
5270 
5271 	if (mlxsw_sp_fib6_rt_should_ignore(rt))
5272 		return;
5273 
5274 	fib6_entry = mlxsw_sp_fib6_entry_lookup(mlxsw_sp, rt);
5275 	if (WARN_ON(!fib6_entry))
5276 		return;
5277 
5278 	/* If route is part of a multipath entry, but not the last one
5279 	 * removed, then only reduce its nexthop group.
5280 	 */
5281 	if (!list_is_singular(&fib6_entry->rt6_list)) {
5282 		mlxsw_sp_fib6_entry_nexthop_del(mlxsw_sp, fib6_entry, rt);
5283 		return;
5284 	}
5285 
5286 	fib_node = fib6_entry->common.fib_node;
5287 
5288 	mlxsw_sp_fib6_node_entry_unlink(mlxsw_sp, fib6_entry);
5289 	mlxsw_sp_fib6_entry_destroy(mlxsw_sp, fib6_entry);
5290 	mlxsw_sp_fib_node_put(mlxsw_sp, fib_node);
5291 }
5292 
5293 static int __mlxsw_sp_router_set_abort_trap(struct mlxsw_sp *mlxsw_sp,
5294 					    enum mlxsw_reg_ralxx_protocol proto,
5295 					    u8 tree_id)
5296 {
5297 	char ralta_pl[MLXSW_REG_RALTA_LEN];
5298 	char ralst_pl[MLXSW_REG_RALST_LEN];
5299 	int i, err;
5300 
5301 	mlxsw_reg_ralta_pack(ralta_pl, true, proto, tree_id);
5302 	err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralta), ralta_pl);
5303 	if (err)
5304 		return err;
5305 
5306 	mlxsw_reg_ralst_pack(ralst_pl, 0xff, tree_id);
5307 	err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralst), ralst_pl);
5308 	if (err)
5309 		return err;
5310 
5311 	for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_VRS); i++) {
5312 		struct mlxsw_sp_vr *vr = &mlxsw_sp->router->vrs[i];
5313 		char raltb_pl[MLXSW_REG_RALTB_LEN];
5314 		char ralue_pl[MLXSW_REG_RALUE_LEN];
5315 
5316 		mlxsw_reg_raltb_pack(raltb_pl, vr->id, proto, tree_id);
5317 		err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(raltb),
5318 				      raltb_pl);
5319 		if (err)
5320 			return err;
5321 
5322 		mlxsw_reg_ralue_pack(ralue_pl, proto,
5323 				     MLXSW_REG_RALUE_OP_WRITE_WRITE, vr->id, 0);
5324 		mlxsw_reg_ralue_act_ip2me_pack(ralue_pl);
5325 		err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralue),
5326 				      ralue_pl);
5327 		if (err)
5328 			return err;
5329 	}
5330 
5331 	return 0;
5332 }
5333 
5334 static int mlxsw_sp_router_fibmr_add(struct mlxsw_sp *mlxsw_sp,
5335 				     struct mfc_entry_notifier_info *men_info,
5336 				     bool replace)
5337 {
5338 	struct mlxsw_sp_vr *vr;
5339 
5340 	if (mlxsw_sp->router->aborted)
5341 		return 0;
5342 
5343 	vr = mlxsw_sp_vr_get(mlxsw_sp, men_info->tb_id, NULL);
5344 	if (IS_ERR(vr))
5345 		return PTR_ERR(vr);
5346 
5347 	return mlxsw_sp_mr_route4_add(vr->mr4_table, men_info->mfc, replace);
5348 }
5349 
5350 static void mlxsw_sp_router_fibmr_del(struct mlxsw_sp *mlxsw_sp,
5351 				      struct mfc_entry_notifier_info *men_info)
5352 {
5353 	struct mlxsw_sp_vr *vr;
5354 
5355 	if (mlxsw_sp->router->aborted)
5356 		return;
5357 
5358 	vr = mlxsw_sp_vr_find(mlxsw_sp, men_info->tb_id);
5359 	if (WARN_ON(!vr))
5360 		return;
5361 
5362 	mlxsw_sp_mr_route4_del(vr->mr4_table, men_info->mfc);
5363 	mlxsw_sp_vr_put(vr);
5364 }
5365 
5366 static int
5367 mlxsw_sp_router_fibmr_vif_add(struct mlxsw_sp *mlxsw_sp,
5368 			      struct vif_entry_notifier_info *ven_info)
5369 {
5370 	struct mlxsw_sp_rif *rif;
5371 	struct mlxsw_sp_vr *vr;
5372 
5373 	if (mlxsw_sp->router->aborted)
5374 		return 0;
5375 
5376 	vr = mlxsw_sp_vr_get(mlxsw_sp, ven_info->tb_id, NULL);
5377 	if (IS_ERR(vr))
5378 		return PTR_ERR(vr);
5379 
5380 	rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, ven_info->dev);
5381 	return mlxsw_sp_mr_vif_add(vr->mr4_table, ven_info->dev,
5382 				   ven_info->vif_index,
5383 				   ven_info->vif_flags, rif);
5384 }
5385 
5386 static void
5387 mlxsw_sp_router_fibmr_vif_del(struct mlxsw_sp *mlxsw_sp,
5388 			      struct vif_entry_notifier_info *ven_info)
5389 {
5390 	struct mlxsw_sp_vr *vr;
5391 
5392 	if (mlxsw_sp->router->aborted)
5393 		return;
5394 
5395 	vr = mlxsw_sp_vr_find(mlxsw_sp, ven_info->tb_id);
5396 	if (WARN_ON(!vr))
5397 		return;
5398 
5399 	mlxsw_sp_mr_vif_del(vr->mr4_table, ven_info->vif_index);
5400 	mlxsw_sp_vr_put(vr);
5401 }
5402 
5403 static int mlxsw_sp_router_set_abort_trap(struct mlxsw_sp *mlxsw_sp)
5404 {
5405 	enum mlxsw_reg_ralxx_protocol proto = MLXSW_REG_RALXX_PROTOCOL_IPV4;
5406 	int err;
5407 
5408 	err = __mlxsw_sp_router_set_abort_trap(mlxsw_sp, proto,
5409 					       MLXSW_SP_LPM_TREE_MIN);
5410 	if (err)
5411 		return err;
5412 
5413 	/* The multicast router code does not need an abort trap as by default,
5414 	 * packets that don't match any routes are trapped to the CPU.
5415 	 */
5416 
5417 	proto = MLXSW_REG_RALXX_PROTOCOL_IPV6;
5418 	return __mlxsw_sp_router_set_abort_trap(mlxsw_sp, proto,
5419 						MLXSW_SP_LPM_TREE_MIN + 1);
5420 }
5421 
5422 static void mlxsw_sp_fib4_node_flush(struct mlxsw_sp *mlxsw_sp,
5423 				     struct mlxsw_sp_fib_node *fib_node)
5424 {
5425 	struct mlxsw_sp_fib4_entry *fib4_entry, *tmp;
5426 
5427 	list_for_each_entry_safe(fib4_entry, tmp, &fib_node->entry_list,
5428 				 common.list) {
5429 		bool do_break = &tmp->common.list == &fib_node->entry_list;
5430 
5431 		mlxsw_sp_fib4_node_entry_unlink(mlxsw_sp, fib4_entry);
5432 		mlxsw_sp_fib4_entry_destroy(mlxsw_sp, fib4_entry);
5433 		mlxsw_sp_fib_node_put(mlxsw_sp, fib_node);
5434 		/* Break when entry list is empty and node was freed.
5435 		 * Otherwise, we'll access freed memory in the next
5436 		 * iteration.
5437 		 */
5438 		if (do_break)
5439 			break;
5440 	}
5441 }
5442 
5443 static void mlxsw_sp_fib6_node_flush(struct mlxsw_sp *mlxsw_sp,
5444 				     struct mlxsw_sp_fib_node *fib_node)
5445 {
5446 	struct mlxsw_sp_fib6_entry *fib6_entry, *tmp;
5447 
5448 	list_for_each_entry_safe(fib6_entry, tmp, &fib_node->entry_list,
5449 				 common.list) {
5450 		bool do_break = &tmp->common.list == &fib_node->entry_list;
5451 
5452 		mlxsw_sp_fib6_node_entry_unlink(mlxsw_sp, fib6_entry);
5453 		mlxsw_sp_fib6_entry_destroy(mlxsw_sp, fib6_entry);
5454 		mlxsw_sp_fib_node_put(mlxsw_sp, fib_node);
5455 		if (do_break)
5456 			break;
5457 	}
5458 }
5459 
5460 static void mlxsw_sp_fib_node_flush(struct mlxsw_sp *mlxsw_sp,
5461 				    struct mlxsw_sp_fib_node *fib_node)
5462 {
5463 	switch (fib_node->fib->proto) {
5464 	case MLXSW_SP_L3_PROTO_IPV4:
5465 		mlxsw_sp_fib4_node_flush(mlxsw_sp, fib_node);
5466 		break;
5467 	case MLXSW_SP_L3_PROTO_IPV6:
5468 		mlxsw_sp_fib6_node_flush(mlxsw_sp, fib_node);
5469 		break;
5470 	}
5471 }
5472 
5473 static void mlxsw_sp_vr_fib_flush(struct mlxsw_sp *mlxsw_sp,
5474 				  struct mlxsw_sp_vr *vr,
5475 				  enum mlxsw_sp_l3proto proto)
5476 {
5477 	struct mlxsw_sp_fib *fib = mlxsw_sp_vr_fib(vr, proto);
5478 	struct mlxsw_sp_fib_node *fib_node, *tmp;
5479 
5480 	list_for_each_entry_safe(fib_node, tmp, &fib->node_list, list) {
5481 		bool do_break = &tmp->list == &fib->node_list;
5482 
5483 		mlxsw_sp_fib_node_flush(mlxsw_sp, fib_node);
5484 		if (do_break)
5485 			break;
5486 	}
5487 }
5488 
5489 static void mlxsw_sp_router_fib_flush(struct mlxsw_sp *mlxsw_sp)
5490 {
5491 	int i;
5492 
5493 	for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_VRS); i++) {
5494 		struct mlxsw_sp_vr *vr = &mlxsw_sp->router->vrs[i];
5495 
5496 		if (!mlxsw_sp_vr_is_used(vr))
5497 			continue;
5498 
5499 		mlxsw_sp_mr_table_flush(vr->mr4_table);
5500 		mlxsw_sp_vr_fib_flush(mlxsw_sp, vr, MLXSW_SP_L3_PROTO_IPV4);
5501 
5502 		/* If virtual router was only used for IPv4, then it's no
5503 		 * longer used.
5504 		 */
5505 		if (!mlxsw_sp_vr_is_used(vr))
5506 			continue;
5507 		mlxsw_sp_vr_fib_flush(mlxsw_sp, vr, MLXSW_SP_L3_PROTO_IPV6);
5508 	}
5509 }
5510 
5511 static void mlxsw_sp_router_fib_abort(struct mlxsw_sp *mlxsw_sp)
5512 {
5513 	int err;
5514 
5515 	if (mlxsw_sp->router->aborted)
5516 		return;
5517 	dev_warn(mlxsw_sp->bus_info->dev, "FIB abort triggered. Note that FIB entries are no longer being offloaded to this device.\n");
5518 	mlxsw_sp_router_fib_flush(mlxsw_sp);
5519 	mlxsw_sp->router->aborted = true;
5520 	err = mlxsw_sp_router_set_abort_trap(mlxsw_sp);
5521 	if (err)
5522 		dev_warn(mlxsw_sp->bus_info->dev, "Failed to set abort trap.\n");
5523 }
5524 
5525 struct mlxsw_sp_fib_event_work {
5526 	struct work_struct work;
5527 	union {
5528 		struct fib6_entry_notifier_info fen6_info;
5529 		struct fib_entry_notifier_info fen_info;
5530 		struct fib_rule_notifier_info fr_info;
5531 		struct fib_nh_notifier_info fnh_info;
5532 		struct mfc_entry_notifier_info men_info;
5533 		struct vif_entry_notifier_info ven_info;
5534 	};
5535 	struct mlxsw_sp *mlxsw_sp;
5536 	unsigned long event;
5537 };
5538 
5539 static void mlxsw_sp_router_fib4_event_work(struct work_struct *work)
5540 {
5541 	struct mlxsw_sp_fib_event_work *fib_work =
5542 		container_of(work, struct mlxsw_sp_fib_event_work, work);
5543 	struct mlxsw_sp *mlxsw_sp = fib_work->mlxsw_sp;
5544 	bool replace, append;
5545 	int err;
5546 
5547 	/* Protect internal structures from changes */
5548 	rtnl_lock();
5549 	switch (fib_work->event) {
5550 	case FIB_EVENT_ENTRY_REPLACE: /* fall through */
5551 	case FIB_EVENT_ENTRY_APPEND: /* fall through */
5552 	case FIB_EVENT_ENTRY_ADD:
5553 		replace = fib_work->event == FIB_EVENT_ENTRY_REPLACE;
5554 		append = fib_work->event == FIB_EVENT_ENTRY_APPEND;
5555 		err = mlxsw_sp_router_fib4_add(mlxsw_sp, &fib_work->fen_info,
5556 					       replace, append);
5557 		if (err)
5558 			mlxsw_sp_router_fib_abort(mlxsw_sp);
5559 		fib_info_put(fib_work->fen_info.fi);
5560 		break;
5561 	case FIB_EVENT_ENTRY_DEL:
5562 		mlxsw_sp_router_fib4_del(mlxsw_sp, &fib_work->fen_info);
5563 		fib_info_put(fib_work->fen_info.fi);
5564 		break;
5565 	case FIB_EVENT_RULE_ADD:
5566 		/* if we get here, a rule was added that we do not support.
5567 		 * just do the fib_abort
5568 		 */
5569 		mlxsw_sp_router_fib_abort(mlxsw_sp);
5570 		break;
5571 	case FIB_EVENT_NH_ADD: /* fall through */
5572 	case FIB_EVENT_NH_DEL:
5573 		mlxsw_sp_nexthop4_event(mlxsw_sp, fib_work->event,
5574 					fib_work->fnh_info.fib_nh);
5575 		fib_info_put(fib_work->fnh_info.fib_nh->nh_parent);
5576 		break;
5577 	}
5578 	rtnl_unlock();
5579 	kfree(fib_work);
5580 }
5581 
5582 static void mlxsw_sp_router_fib6_event_work(struct work_struct *work)
5583 {
5584 	struct mlxsw_sp_fib_event_work *fib_work =
5585 		container_of(work, struct mlxsw_sp_fib_event_work, work);
5586 	struct mlxsw_sp *mlxsw_sp = fib_work->mlxsw_sp;
5587 	bool replace;
5588 	int err;
5589 
5590 	rtnl_lock();
5591 	switch (fib_work->event) {
5592 	case FIB_EVENT_ENTRY_REPLACE: /* fall through */
5593 	case FIB_EVENT_ENTRY_ADD:
5594 		replace = fib_work->event == FIB_EVENT_ENTRY_REPLACE;
5595 		err = mlxsw_sp_router_fib6_add(mlxsw_sp,
5596 					       fib_work->fen6_info.rt, replace);
5597 		if (err)
5598 			mlxsw_sp_router_fib_abort(mlxsw_sp);
5599 		mlxsw_sp_rt6_release(fib_work->fen6_info.rt);
5600 		break;
5601 	case FIB_EVENT_ENTRY_DEL:
5602 		mlxsw_sp_router_fib6_del(mlxsw_sp, fib_work->fen6_info.rt);
5603 		mlxsw_sp_rt6_release(fib_work->fen6_info.rt);
5604 		break;
5605 	case FIB_EVENT_RULE_ADD:
5606 		/* if we get here, a rule was added that we do not support.
5607 		 * just do the fib_abort
5608 		 */
5609 		mlxsw_sp_router_fib_abort(mlxsw_sp);
5610 		break;
5611 	}
5612 	rtnl_unlock();
5613 	kfree(fib_work);
5614 }
5615 
5616 static void mlxsw_sp_router_fibmr_event_work(struct work_struct *work)
5617 {
5618 	struct mlxsw_sp_fib_event_work *fib_work =
5619 		container_of(work, struct mlxsw_sp_fib_event_work, work);
5620 	struct mlxsw_sp *mlxsw_sp = fib_work->mlxsw_sp;
5621 	bool replace;
5622 	int err;
5623 
5624 	rtnl_lock();
5625 	switch (fib_work->event) {
5626 	case FIB_EVENT_ENTRY_REPLACE: /* fall through */
5627 	case FIB_EVENT_ENTRY_ADD:
5628 		replace = fib_work->event == FIB_EVENT_ENTRY_REPLACE;
5629 
5630 		err = mlxsw_sp_router_fibmr_add(mlxsw_sp, &fib_work->men_info,
5631 						replace);
5632 		if (err)
5633 			mlxsw_sp_router_fib_abort(mlxsw_sp);
5634 		ipmr_cache_put(fib_work->men_info.mfc);
5635 		break;
5636 	case FIB_EVENT_ENTRY_DEL:
5637 		mlxsw_sp_router_fibmr_del(mlxsw_sp, &fib_work->men_info);
5638 		ipmr_cache_put(fib_work->men_info.mfc);
5639 		break;
5640 	case FIB_EVENT_VIF_ADD:
5641 		err = mlxsw_sp_router_fibmr_vif_add(mlxsw_sp,
5642 						    &fib_work->ven_info);
5643 		if (err)
5644 			mlxsw_sp_router_fib_abort(mlxsw_sp);
5645 		dev_put(fib_work->ven_info.dev);
5646 		break;
5647 	case FIB_EVENT_VIF_DEL:
5648 		mlxsw_sp_router_fibmr_vif_del(mlxsw_sp,
5649 					      &fib_work->ven_info);
5650 		dev_put(fib_work->ven_info.dev);
5651 		break;
5652 	case FIB_EVENT_RULE_ADD:
5653 		/* if we get here, a rule was added that we do not support.
5654 		 * just do the fib_abort
5655 		 */
5656 		mlxsw_sp_router_fib_abort(mlxsw_sp);
5657 		break;
5658 	}
5659 	rtnl_unlock();
5660 	kfree(fib_work);
5661 }
5662 
5663 static void mlxsw_sp_router_fib4_event(struct mlxsw_sp_fib_event_work *fib_work,
5664 				       struct fib_notifier_info *info)
5665 {
5666 	struct fib_entry_notifier_info *fen_info;
5667 	struct fib_nh_notifier_info *fnh_info;
5668 
5669 	switch (fib_work->event) {
5670 	case FIB_EVENT_ENTRY_REPLACE: /* fall through */
5671 	case FIB_EVENT_ENTRY_APPEND: /* fall through */
5672 	case FIB_EVENT_ENTRY_ADD: /* fall through */
5673 	case FIB_EVENT_ENTRY_DEL:
5674 		fen_info = container_of(info, struct fib_entry_notifier_info,
5675 					info);
5676 		fib_work->fen_info = *fen_info;
5677 		/* Take reference on fib_info to prevent it from being
5678 		 * freed while work is queued. Release it afterwards.
5679 		 */
5680 		fib_info_hold(fib_work->fen_info.fi);
5681 		break;
5682 	case FIB_EVENT_NH_ADD: /* fall through */
5683 	case FIB_EVENT_NH_DEL:
5684 		fnh_info = container_of(info, struct fib_nh_notifier_info,
5685 					info);
5686 		fib_work->fnh_info = *fnh_info;
5687 		fib_info_hold(fib_work->fnh_info.fib_nh->nh_parent);
5688 		break;
5689 	}
5690 }
5691 
5692 static void mlxsw_sp_router_fib6_event(struct mlxsw_sp_fib_event_work *fib_work,
5693 				       struct fib_notifier_info *info)
5694 {
5695 	struct fib6_entry_notifier_info *fen6_info;
5696 
5697 	switch (fib_work->event) {
5698 	case FIB_EVENT_ENTRY_REPLACE: /* fall through */
5699 	case FIB_EVENT_ENTRY_ADD: /* fall through */
5700 	case FIB_EVENT_ENTRY_DEL:
5701 		fen6_info = container_of(info, struct fib6_entry_notifier_info,
5702 					 info);
5703 		fib_work->fen6_info = *fen6_info;
5704 		rt6_hold(fib_work->fen6_info.rt);
5705 		break;
5706 	}
5707 }
5708 
5709 static void
5710 mlxsw_sp_router_fibmr_event(struct mlxsw_sp_fib_event_work *fib_work,
5711 			    struct fib_notifier_info *info)
5712 {
5713 	switch (fib_work->event) {
5714 	case FIB_EVENT_ENTRY_REPLACE: /* fall through */
5715 	case FIB_EVENT_ENTRY_ADD: /* fall through */
5716 	case FIB_EVENT_ENTRY_DEL:
5717 		memcpy(&fib_work->men_info, info, sizeof(fib_work->men_info));
5718 		ipmr_cache_hold(fib_work->men_info.mfc);
5719 		break;
5720 	case FIB_EVENT_VIF_ADD: /* fall through */
5721 	case FIB_EVENT_VIF_DEL:
5722 		memcpy(&fib_work->ven_info, info, sizeof(fib_work->ven_info));
5723 		dev_hold(fib_work->ven_info.dev);
5724 		break;
5725 	}
5726 }
5727 
5728 static int mlxsw_sp_router_fib_rule_event(unsigned long event,
5729 					  struct fib_notifier_info *info,
5730 					  struct mlxsw_sp *mlxsw_sp)
5731 {
5732 	struct netlink_ext_ack *extack = info->extack;
5733 	struct fib_rule_notifier_info *fr_info;
5734 	struct fib_rule *rule;
5735 	int err = 0;
5736 
5737 	/* nothing to do at the moment */
5738 	if (event == FIB_EVENT_RULE_DEL)
5739 		return 0;
5740 
5741 	if (mlxsw_sp->router->aborted)
5742 		return 0;
5743 
5744 	fr_info = container_of(info, struct fib_rule_notifier_info, info);
5745 	rule = fr_info->rule;
5746 
5747 	switch (info->family) {
5748 	case AF_INET:
5749 		if (!fib4_rule_default(rule) && !rule->l3mdev)
5750 			err = -1;
5751 		break;
5752 	case AF_INET6:
5753 		if (!fib6_rule_default(rule) && !rule->l3mdev)
5754 			err = -1;
5755 		break;
5756 	case RTNL_FAMILY_IPMR:
5757 		if (!ipmr_rule_default(rule) && !rule->l3mdev)
5758 			err = -1;
5759 		break;
5760 	}
5761 
5762 	if (err < 0)
5763 		NL_SET_ERR_MSG(extack, "spectrum: FIB rules not supported. Aborting offload");
5764 
5765 	return err;
5766 }
5767 
5768 /* Called with rcu_read_lock() */
5769 static int mlxsw_sp_router_fib_event(struct notifier_block *nb,
5770 				     unsigned long event, void *ptr)
5771 {
5772 	struct mlxsw_sp_fib_event_work *fib_work;
5773 	struct fib_notifier_info *info = ptr;
5774 	struct mlxsw_sp_router *router;
5775 	int err;
5776 
5777 	if (!net_eq(info->net, &init_net) ||
5778 	    (info->family != AF_INET && info->family != AF_INET6 &&
5779 	     info->family != RTNL_FAMILY_IPMR))
5780 		return NOTIFY_DONE;
5781 
5782 	router = container_of(nb, struct mlxsw_sp_router, fib_nb);
5783 
5784 	switch (event) {
5785 	case FIB_EVENT_RULE_ADD: /* fall through */
5786 	case FIB_EVENT_RULE_DEL:
5787 		err = mlxsw_sp_router_fib_rule_event(event, info,
5788 						     router->mlxsw_sp);
5789 		if (!err)
5790 			return NOTIFY_DONE;
5791 	}
5792 
5793 	fib_work = kzalloc(sizeof(*fib_work), GFP_ATOMIC);
5794 	if (WARN_ON(!fib_work))
5795 		return NOTIFY_BAD;
5796 
5797 	fib_work->mlxsw_sp = router->mlxsw_sp;
5798 	fib_work->event = event;
5799 
5800 	switch (info->family) {
5801 	case AF_INET:
5802 		INIT_WORK(&fib_work->work, mlxsw_sp_router_fib4_event_work);
5803 		mlxsw_sp_router_fib4_event(fib_work, info);
5804 		break;
5805 	case AF_INET6:
5806 		INIT_WORK(&fib_work->work, mlxsw_sp_router_fib6_event_work);
5807 		mlxsw_sp_router_fib6_event(fib_work, info);
5808 		break;
5809 	case RTNL_FAMILY_IPMR:
5810 		INIT_WORK(&fib_work->work, mlxsw_sp_router_fibmr_event_work);
5811 		mlxsw_sp_router_fibmr_event(fib_work, info);
5812 		break;
5813 	}
5814 
5815 	mlxsw_core_schedule_work(&fib_work->work);
5816 
5817 	return NOTIFY_DONE;
5818 }
5819 
5820 static struct mlxsw_sp_rif *
5821 mlxsw_sp_rif_find_by_dev(const struct mlxsw_sp *mlxsw_sp,
5822 			 const struct net_device *dev)
5823 {
5824 	int i;
5825 
5826 	for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_RIFS); i++)
5827 		if (mlxsw_sp->router->rifs[i] &&
5828 		    mlxsw_sp->router->rifs[i]->dev == dev)
5829 			return mlxsw_sp->router->rifs[i];
5830 
5831 	return NULL;
5832 }
5833 
5834 static int mlxsw_sp_router_rif_disable(struct mlxsw_sp *mlxsw_sp, u16 rif)
5835 {
5836 	char ritr_pl[MLXSW_REG_RITR_LEN];
5837 	int err;
5838 
5839 	mlxsw_reg_ritr_rif_pack(ritr_pl, rif);
5840 	err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
5841 	if (WARN_ON_ONCE(err))
5842 		return err;
5843 
5844 	mlxsw_reg_ritr_enable_set(ritr_pl, false);
5845 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
5846 }
5847 
5848 static void mlxsw_sp_router_rif_gone_sync(struct mlxsw_sp *mlxsw_sp,
5849 					  struct mlxsw_sp_rif *rif)
5850 {
5851 	mlxsw_sp_router_rif_disable(mlxsw_sp, rif->rif_index);
5852 	mlxsw_sp_nexthop_rif_gone_sync(mlxsw_sp, rif);
5853 	mlxsw_sp_neigh_rif_gone_sync(mlxsw_sp, rif);
5854 }
5855 
5856 static bool
5857 mlxsw_sp_rif_should_config(struct mlxsw_sp_rif *rif, struct net_device *dev,
5858 			   unsigned long event)
5859 {
5860 	struct inet6_dev *inet6_dev;
5861 	bool addr_list_empty = true;
5862 	struct in_device *idev;
5863 
5864 	switch (event) {
5865 	case NETDEV_UP:
5866 		return rif == NULL;
5867 	case NETDEV_DOWN:
5868 		idev = __in_dev_get_rtnl(dev);
5869 		if (idev && idev->ifa_list)
5870 			addr_list_empty = false;
5871 
5872 		inet6_dev = __in6_dev_get(dev);
5873 		if (addr_list_empty && inet6_dev &&
5874 		    !list_empty(&inet6_dev->addr_list))
5875 			addr_list_empty = false;
5876 
5877 		if (rif && addr_list_empty &&
5878 		    !netif_is_l3_slave(rif->dev))
5879 			return true;
5880 		/* It is possible we already removed the RIF ourselves
5881 		 * if it was assigned to a netdev that is now a bridge
5882 		 * or LAG slave.
5883 		 */
5884 		return false;
5885 	}
5886 
5887 	return false;
5888 }
5889 
5890 static enum mlxsw_sp_rif_type
5891 mlxsw_sp_dev_rif_type(const struct mlxsw_sp *mlxsw_sp,
5892 		      const struct net_device *dev)
5893 {
5894 	enum mlxsw_sp_fid_type type;
5895 
5896 	if (mlxsw_sp_netdev_ipip_type(mlxsw_sp, dev, NULL))
5897 		return MLXSW_SP_RIF_TYPE_IPIP_LB;
5898 
5899 	/* Otherwise RIF type is derived from the type of the underlying FID. */
5900 	if (is_vlan_dev(dev) && netif_is_bridge_master(vlan_dev_real_dev(dev)))
5901 		type = MLXSW_SP_FID_TYPE_8021Q;
5902 	else if (netif_is_bridge_master(dev) && br_vlan_enabled(dev))
5903 		type = MLXSW_SP_FID_TYPE_8021Q;
5904 	else if (netif_is_bridge_master(dev))
5905 		type = MLXSW_SP_FID_TYPE_8021D;
5906 	else
5907 		type = MLXSW_SP_FID_TYPE_RFID;
5908 
5909 	return mlxsw_sp_fid_type_rif_type(mlxsw_sp, type);
5910 }
5911 
5912 static int mlxsw_sp_rif_index_alloc(struct mlxsw_sp *mlxsw_sp, u16 *p_rif_index)
5913 {
5914 	int i;
5915 
5916 	for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_RIFS); i++) {
5917 		if (!mlxsw_sp->router->rifs[i]) {
5918 			*p_rif_index = i;
5919 			return 0;
5920 		}
5921 	}
5922 
5923 	return -ENOBUFS;
5924 }
5925 
5926 static struct mlxsw_sp_rif *mlxsw_sp_rif_alloc(size_t rif_size, u16 rif_index,
5927 					       u16 vr_id,
5928 					       struct net_device *l3_dev)
5929 {
5930 	struct mlxsw_sp_rif *rif;
5931 
5932 	rif = kzalloc(rif_size, GFP_KERNEL);
5933 	if (!rif)
5934 		return NULL;
5935 
5936 	INIT_LIST_HEAD(&rif->nexthop_list);
5937 	INIT_LIST_HEAD(&rif->neigh_list);
5938 	ether_addr_copy(rif->addr, l3_dev->dev_addr);
5939 	rif->mtu = l3_dev->mtu;
5940 	rif->vr_id = vr_id;
5941 	rif->dev = l3_dev;
5942 	rif->rif_index = rif_index;
5943 
5944 	return rif;
5945 }
5946 
5947 struct mlxsw_sp_rif *mlxsw_sp_rif_by_index(const struct mlxsw_sp *mlxsw_sp,
5948 					   u16 rif_index)
5949 {
5950 	return mlxsw_sp->router->rifs[rif_index];
5951 }
5952 
5953 u16 mlxsw_sp_rif_index(const struct mlxsw_sp_rif *rif)
5954 {
5955 	return rif->rif_index;
5956 }
5957 
5958 u16 mlxsw_sp_ipip_lb_rif_index(const struct mlxsw_sp_rif_ipip_lb *lb_rif)
5959 {
5960 	return lb_rif->common.rif_index;
5961 }
5962 
5963 u16 mlxsw_sp_ipip_lb_ul_vr_id(const struct mlxsw_sp_rif_ipip_lb *lb_rif)
5964 {
5965 	return lb_rif->ul_vr_id;
5966 }
5967 
5968 int mlxsw_sp_rif_dev_ifindex(const struct mlxsw_sp_rif *rif)
5969 {
5970 	return rif->dev->ifindex;
5971 }
5972 
5973 const struct net_device *mlxsw_sp_rif_dev(const struct mlxsw_sp_rif *rif)
5974 {
5975 	return rif->dev;
5976 }
5977 
5978 static struct mlxsw_sp_rif *
5979 mlxsw_sp_rif_create(struct mlxsw_sp *mlxsw_sp,
5980 		    const struct mlxsw_sp_rif_params *params,
5981 		    struct netlink_ext_ack *extack)
5982 {
5983 	u32 tb_id = l3mdev_fib_table(params->dev);
5984 	const struct mlxsw_sp_rif_ops *ops;
5985 	struct mlxsw_sp_fid *fid = NULL;
5986 	enum mlxsw_sp_rif_type type;
5987 	struct mlxsw_sp_rif *rif;
5988 	struct mlxsw_sp_vr *vr;
5989 	u16 rif_index;
5990 	int err;
5991 
5992 	type = mlxsw_sp_dev_rif_type(mlxsw_sp, params->dev);
5993 	ops = mlxsw_sp->router->rif_ops_arr[type];
5994 
5995 	vr = mlxsw_sp_vr_get(mlxsw_sp, tb_id ? : RT_TABLE_MAIN, extack);
5996 	if (IS_ERR(vr))
5997 		return ERR_CAST(vr);
5998 	vr->rif_count++;
5999 
6000 	err = mlxsw_sp_rif_index_alloc(mlxsw_sp, &rif_index);
6001 	if (err) {
6002 		NL_SET_ERR_MSG(extack, "spectrum: Exceeded number of supported router interfaces");
6003 		goto err_rif_index_alloc;
6004 	}
6005 
6006 	rif = mlxsw_sp_rif_alloc(ops->rif_size, rif_index, vr->id, params->dev);
6007 	if (!rif) {
6008 		err = -ENOMEM;
6009 		goto err_rif_alloc;
6010 	}
6011 	rif->mlxsw_sp = mlxsw_sp;
6012 	rif->ops = ops;
6013 
6014 	if (ops->fid_get) {
6015 		fid = ops->fid_get(rif);
6016 		if (IS_ERR(fid)) {
6017 			err = PTR_ERR(fid);
6018 			goto err_fid_get;
6019 		}
6020 		rif->fid = fid;
6021 	}
6022 
6023 	if (ops->setup)
6024 		ops->setup(rif, params);
6025 
6026 	err = ops->configure(rif);
6027 	if (err)
6028 		goto err_configure;
6029 
6030 	err = mlxsw_sp_mr_rif_add(vr->mr4_table, rif);
6031 	if (err)
6032 		goto err_mr_rif_add;
6033 
6034 	mlxsw_sp_rif_counters_alloc(rif);
6035 	mlxsw_sp->router->rifs[rif_index] = rif;
6036 
6037 	return rif;
6038 
6039 err_mr_rif_add:
6040 	ops->deconfigure(rif);
6041 err_configure:
6042 	if (fid)
6043 		mlxsw_sp_fid_put(fid);
6044 err_fid_get:
6045 	kfree(rif);
6046 err_rif_alloc:
6047 err_rif_index_alloc:
6048 	vr->rif_count--;
6049 	mlxsw_sp_vr_put(vr);
6050 	return ERR_PTR(err);
6051 }
6052 
6053 void mlxsw_sp_rif_destroy(struct mlxsw_sp_rif *rif)
6054 {
6055 	const struct mlxsw_sp_rif_ops *ops = rif->ops;
6056 	struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
6057 	struct mlxsw_sp_fid *fid = rif->fid;
6058 	struct mlxsw_sp_vr *vr;
6059 
6060 	mlxsw_sp_router_rif_gone_sync(mlxsw_sp, rif);
6061 	vr = &mlxsw_sp->router->vrs[rif->vr_id];
6062 
6063 	mlxsw_sp->router->rifs[rif->rif_index] = NULL;
6064 	mlxsw_sp_rif_counters_free(rif);
6065 	mlxsw_sp_mr_rif_del(vr->mr4_table, rif);
6066 	ops->deconfigure(rif);
6067 	if (fid)
6068 		/* Loopback RIFs are not associated with a FID. */
6069 		mlxsw_sp_fid_put(fid);
6070 	kfree(rif);
6071 	vr->rif_count--;
6072 	mlxsw_sp_vr_put(vr);
6073 }
6074 
6075 static void
6076 mlxsw_sp_rif_subport_params_init(struct mlxsw_sp_rif_params *params,
6077 				 struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan)
6078 {
6079 	struct mlxsw_sp_port *mlxsw_sp_port = mlxsw_sp_port_vlan->mlxsw_sp_port;
6080 
6081 	params->vid = mlxsw_sp_port_vlan->vid;
6082 	params->lag = mlxsw_sp_port->lagged;
6083 	if (params->lag)
6084 		params->lag_id = mlxsw_sp_port->lag_id;
6085 	else
6086 		params->system_port = mlxsw_sp_port->local_port;
6087 }
6088 
6089 static int
6090 mlxsw_sp_port_vlan_router_join(struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan,
6091 			       struct net_device *l3_dev,
6092 			       struct netlink_ext_ack *extack)
6093 {
6094 	struct mlxsw_sp_port *mlxsw_sp_port = mlxsw_sp_port_vlan->mlxsw_sp_port;
6095 	struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
6096 	u16 vid = mlxsw_sp_port_vlan->vid;
6097 	struct mlxsw_sp_rif *rif;
6098 	struct mlxsw_sp_fid *fid;
6099 	int err;
6100 
6101 	rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, l3_dev);
6102 	if (!rif) {
6103 		struct mlxsw_sp_rif_params params = {
6104 			.dev = l3_dev,
6105 		};
6106 
6107 		mlxsw_sp_rif_subport_params_init(&params, mlxsw_sp_port_vlan);
6108 		rif = mlxsw_sp_rif_create(mlxsw_sp, &params, extack);
6109 		if (IS_ERR(rif))
6110 			return PTR_ERR(rif);
6111 	}
6112 
6113 	/* FID was already created, just take a reference */
6114 	fid = rif->ops->fid_get(rif);
6115 	err = mlxsw_sp_fid_port_vid_map(fid, mlxsw_sp_port, vid);
6116 	if (err)
6117 		goto err_fid_port_vid_map;
6118 
6119 	err = mlxsw_sp_port_vid_learning_set(mlxsw_sp_port, vid, false);
6120 	if (err)
6121 		goto err_port_vid_learning_set;
6122 
6123 	err = mlxsw_sp_port_vid_stp_set(mlxsw_sp_port, vid,
6124 					BR_STATE_FORWARDING);
6125 	if (err)
6126 		goto err_port_vid_stp_set;
6127 
6128 	mlxsw_sp_port_vlan->fid = fid;
6129 
6130 	return 0;
6131 
6132 err_port_vid_stp_set:
6133 	mlxsw_sp_port_vid_learning_set(mlxsw_sp_port, vid, true);
6134 err_port_vid_learning_set:
6135 	mlxsw_sp_fid_port_vid_unmap(fid, mlxsw_sp_port, vid);
6136 err_fid_port_vid_map:
6137 	mlxsw_sp_fid_put(fid);
6138 	return err;
6139 }
6140 
6141 void
6142 mlxsw_sp_port_vlan_router_leave(struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan)
6143 {
6144 	struct mlxsw_sp_port *mlxsw_sp_port = mlxsw_sp_port_vlan->mlxsw_sp_port;
6145 	struct mlxsw_sp_fid *fid = mlxsw_sp_port_vlan->fid;
6146 	u16 vid = mlxsw_sp_port_vlan->vid;
6147 
6148 	if (WARN_ON(mlxsw_sp_fid_type(fid) != MLXSW_SP_FID_TYPE_RFID))
6149 		return;
6150 
6151 	mlxsw_sp_port_vlan->fid = NULL;
6152 	mlxsw_sp_port_vid_stp_set(mlxsw_sp_port, vid, BR_STATE_BLOCKING);
6153 	mlxsw_sp_port_vid_learning_set(mlxsw_sp_port, vid, true);
6154 	mlxsw_sp_fid_port_vid_unmap(fid, mlxsw_sp_port, vid);
6155 	/* If router port holds the last reference on the rFID, then the
6156 	 * associated Sub-port RIF will be destroyed.
6157 	 */
6158 	mlxsw_sp_fid_put(fid);
6159 }
6160 
6161 static int mlxsw_sp_inetaddr_port_vlan_event(struct net_device *l3_dev,
6162 					     struct net_device *port_dev,
6163 					     unsigned long event, u16 vid,
6164 					     struct netlink_ext_ack *extack)
6165 {
6166 	struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(port_dev);
6167 	struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan;
6168 
6169 	mlxsw_sp_port_vlan = mlxsw_sp_port_vlan_find_by_vid(mlxsw_sp_port, vid);
6170 	if (WARN_ON(!mlxsw_sp_port_vlan))
6171 		return -EINVAL;
6172 
6173 	switch (event) {
6174 	case NETDEV_UP:
6175 		return mlxsw_sp_port_vlan_router_join(mlxsw_sp_port_vlan,
6176 						      l3_dev, extack);
6177 	case NETDEV_DOWN:
6178 		mlxsw_sp_port_vlan_router_leave(mlxsw_sp_port_vlan);
6179 		break;
6180 	}
6181 
6182 	return 0;
6183 }
6184 
6185 static int mlxsw_sp_inetaddr_port_event(struct net_device *port_dev,
6186 					unsigned long event,
6187 					struct netlink_ext_ack *extack)
6188 {
6189 	if (netif_is_bridge_port(port_dev) ||
6190 	    netif_is_lag_port(port_dev) ||
6191 	    netif_is_ovs_port(port_dev))
6192 		return 0;
6193 
6194 	return mlxsw_sp_inetaddr_port_vlan_event(port_dev, port_dev, event, 1,
6195 						 extack);
6196 }
6197 
6198 static int __mlxsw_sp_inetaddr_lag_event(struct net_device *l3_dev,
6199 					 struct net_device *lag_dev,
6200 					 unsigned long event, u16 vid,
6201 					 struct netlink_ext_ack *extack)
6202 {
6203 	struct net_device *port_dev;
6204 	struct list_head *iter;
6205 	int err;
6206 
6207 	netdev_for_each_lower_dev(lag_dev, port_dev, iter) {
6208 		if (mlxsw_sp_port_dev_check(port_dev)) {
6209 			err = mlxsw_sp_inetaddr_port_vlan_event(l3_dev,
6210 								port_dev,
6211 								event, vid,
6212 								extack);
6213 			if (err)
6214 				return err;
6215 		}
6216 	}
6217 
6218 	return 0;
6219 }
6220 
6221 static int mlxsw_sp_inetaddr_lag_event(struct net_device *lag_dev,
6222 				       unsigned long event,
6223 				       struct netlink_ext_ack *extack)
6224 {
6225 	if (netif_is_bridge_port(lag_dev))
6226 		return 0;
6227 
6228 	return __mlxsw_sp_inetaddr_lag_event(lag_dev, lag_dev, event, 1,
6229 					     extack);
6230 }
6231 
6232 static int mlxsw_sp_inetaddr_bridge_event(struct net_device *l3_dev,
6233 					  unsigned long event,
6234 					  struct netlink_ext_ack *extack)
6235 {
6236 	struct mlxsw_sp *mlxsw_sp = mlxsw_sp_lower_get(l3_dev);
6237 	struct mlxsw_sp_rif_params params = {
6238 		.dev = l3_dev,
6239 	};
6240 	struct mlxsw_sp_rif *rif;
6241 
6242 	switch (event) {
6243 	case NETDEV_UP:
6244 		rif = mlxsw_sp_rif_create(mlxsw_sp, &params, extack);
6245 		if (IS_ERR(rif))
6246 			return PTR_ERR(rif);
6247 		break;
6248 	case NETDEV_DOWN:
6249 		rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, l3_dev);
6250 		mlxsw_sp_rif_destroy(rif);
6251 		break;
6252 	}
6253 
6254 	return 0;
6255 }
6256 
6257 static int mlxsw_sp_inetaddr_vlan_event(struct net_device *vlan_dev,
6258 					unsigned long event,
6259 					struct netlink_ext_ack *extack)
6260 {
6261 	struct net_device *real_dev = vlan_dev_real_dev(vlan_dev);
6262 	u16 vid = vlan_dev_vlan_id(vlan_dev);
6263 
6264 	if (netif_is_bridge_port(vlan_dev))
6265 		return 0;
6266 
6267 	if (mlxsw_sp_port_dev_check(real_dev))
6268 		return mlxsw_sp_inetaddr_port_vlan_event(vlan_dev, real_dev,
6269 							 event, vid, extack);
6270 	else if (netif_is_lag_master(real_dev))
6271 		return __mlxsw_sp_inetaddr_lag_event(vlan_dev, real_dev, event,
6272 						     vid, extack);
6273 	else if (netif_is_bridge_master(real_dev) && br_vlan_enabled(real_dev))
6274 		return mlxsw_sp_inetaddr_bridge_event(vlan_dev, event, extack);
6275 
6276 	return 0;
6277 }
6278 
6279 static int __mlxsw_sp_inetaddr_event(struct net_device *dev,
6280 				     unsigned long event,
6281 				     struct netlink_ext_ack *extack)
6282 {
6283 	if (mlxsw_sp_port_dev_check(dev))
6284 		return mlxsw_sp_inetaddr_port_event(dev, event, extack);
6285 	else if (netif_is_lag_master(dev))
6286 		return mlxsw_sp_inetaddr_lag_event(dev, event, extack);
6287 	else if (netif_is_bridge_master(dev))
6288 		return mlxsw_sp_inetaddr_bridge_event(dev, event, extack);
6289 	else if (is_vlan_dev(dev))
6290 		return mlxsw_sp_inetaddr_vlan_event(dev, event, extack);
6291 	else
6292 		return 0;
6293 }
6294 
6295 int mlxsw_sp_inetaddr_event(struct notifier_block *unused,
6296 			    unsigned long event, void *ptr)
6297 {
6298 	struct in_ifaddr *ifa = (struct in_ifaddr *) ptr;
6299 	struct net_device *dev = ifa->ifa_dev->dev;
6300 	struct mlxsw_sp *mlxsw_sp;
6301 	struct mlxsw_sp_rif *rif;
6302 	int err = 0;
6303 
6304 	/* NETDEV_UP event is handled by mlxsw_sp_inetaddr_valid_event */
6305 	if (event == NETDEV_UP)
6306 		goto out;
6307 
6308 	mlxsw_sp = mlxsw_sp_lower_get(dev);
6309 	if (!mlxsw_sp)
6310 		goto out;
6311 
6312 	rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev);
6313 	if (!mlxsw_sp_rif_should_config(rif, dev, event))
6314 		goto out;
6315 
6316 	err = __mlxsw_sp_inetaddr_event(dev, event, NULL);
6317 out:
6318 	return notifier_from_errno(err);
6319 }
6320 
6321 int mlxsw_sp_inetaddr_valid_event(struct notifier_block *unused,
6322 				  unsigned long event, void *ptr)
6323 {
6324 	struct in_validator_info *ivi = (struct in_validator_info *) ptr;
6325 	struct net_device *dev = ivi->ivi_dev->dev;
6326 	struct mlxsw_sp *mlxsw_sp;
6327 	struct mlxsw_sp_rif *rif;
6328 	int err = 0;
6329 
6330 	mlxsw_sp = mlxsw_sp_lower_get(dev);
6331 	if (!mlxsw_sp)
6332 		goto out;
6333 
6334 	rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev);
6335 	if (!mlxsw_sp_rif_should_config(rif, dev, event))
6336 		goto out;
6337 
6338 	err = __mlxsw_sp_inetaddr_event(dev, event, ivi->extack);
6339 out:
6340 	return notifier_from_errno(err);
6341 }
6342 
6343 struct mlxsw_sp_inet6addr_event_work {
6344 	struct work_struct work;
6345 	struct net_device *dev;
6346 	unsigned long event;
6347 };
6348 
6349 static void mlxsw_sp_inet6addr_event_work(struct work_struct *work)
6350 {
6351 	struct mlxsw_sp_inet6addr_event_work *inet6addr_work =
6352 		container_of(work, struct mlxsw_sp_inet6addr_event_work, work);
6353 	struct net_device *dev = inet6addr_work->dev;
6354 	unsigned long event = inet6addr_work->event;
6355 	struct mlxsw_sp *mlxsw_sp;
6356 	struct mlxsw_sp_rif *rif;
6357 
6358 	rtnl_lock();
6359 	mlxsw_sp = mlxsw_sp_lower_get(dev);
6360 	if (!mlxsw_sp)
6361 		goto out;
6362 
6363 	rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev);
6364 	if (!mlxsw_sp_rif_should_config(rif, dev, event))
6365 		goto out;
6366 
6367 	__mlxsw_sp_inetaddr_event(dev, event, NULL);
6368 out:
6369 	rtnl_unlock();
6370 	dev_put(dev);
6371 	kfree(inet6addr_work);
6372 }
6373 
6374 /* Called with rcu_read_lock() */
6375 int mlxsw_sp_inet6addr_event(struct notifier_block *unused,
6376 			     unsigned long event, void *ptr)
6377 {
6378 	struct inet6_ifaddr *if6 = (struct inet6_ifaddr *) ptr;
6379 	struct mlxsw_sp_inet6addr_event_work *inet6addr_work;
6380 	struct net_device *dev = if6->idev->dev;
6381 
6382 	/* NETDEV_UP event is handled by mlxsw_sp_inet6addr_valid_event */
6383 	if (event == NETDEV_UP)
6384 		return NOTIFY_DONE;
6385 
6386 	if (!mlxsw_sp_port_dev_lower_find_rcu(dev))
6387 		return NOTIFY_DONE;
6388 
6389 	inet6addr_work = kzalloc(sizeof(*inet6addr_work), GFP_ATOMIC);
6390 	if (!inet6addr_work)
6391 		return NOTIFY_BAD;
6392 
6393 	INIT_WORK(&inet6addr_work->work, mlxsw_sp_inet6addr_event_work);
6394 	inet6addr_work->dev = dev;
6395 	inet6addr_work->event = event;
6396 	dev_hold(dev);
6397 	mlxsw_core_schedule_work(&inet6addr_work->work);
6398 
6399 	return NOTIFY_DONE;
6400 }
6401 
6402 int mlxsw_sp_inet6addr_valid_event(struct notifier_block *unused,
6403 				   unsigned long event, void *ptr)
6404 {
6405 	struct in6_validator_info *i6vi = (struct in6_validator_info *) ptr;
6406 	struct net_device *dev = i6vi->i6vi_dev->dev;
6407 	struct mlxsw_sp *mlxsw_sp;
6408 	struct mlxsw_sp_rif *rif;
6409 	int err = 0;
6410 
6411 	mlxsw_sp = mlxsw_sp_lower_get(dev);
6412 	if (!mlxsw_sp)
6413 		goto out;
6414 
6415 	rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev);
6416 	if (!mlxsw_sp_rif_should_config(rif, dev, event))
6417 		goto out;
6418 
6419 	err = __mlxsw_sp_inetaddr_event(dev, event, i6vi->extack);
6420 out:
6421 	return notifier_from_errno(err);
6422 }
6423 
6424 static int mlxsw_sp_rif_edit(struct mlxsw_sp *mlxsw_sp, u16 rif_index,
6425 			     const char *mac, int mtu)
6426 {
6427 	char ritr_pl[MLXSW_REG_RITR_LEN];
6428 	int err;
6429 
6430 	mlxsw_reg_ritr_rif_pack(ritr_pl, rif_index);
6431 	err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
6432 	if (err)
6433 		return err;
6434 
6435 	mlxsw_reg_ritr_mtu_set(ritr_pl, mtu);
6436 	mlxsw_reg_ritr_if_mac_memcpy_to(ritr_pl, mac);
6437 	mlxsw_reg_ritr_op_set(ritr_pl, MLXSW_REG_RITR_RIF_CREATE);
6438 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
6439 }
6440 
6441 int mlxsw_sp_netdevice_router_port_event(struct net_device *dev)
6442 {
6443 	struct mlxsw_sp *mlxsw_sp;
6444 	struct mlxsw_sp_rif *rif;
6445 	u16 fid_index;
6446 	int err;
6447 
6448 	mlxsw_sp = mlxsw_sp_lower_get(dev);
6449 	if (!mlxsw_sp)
6450 		return 0;
6451 
6452 	rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev);
6453 	if (!rif)
6454 		return 0;
6455 	fid_index = mlxsw_sp_fid_index(rif->fid);
6456 
6457 	err = mlxsw_sp_rif_fdb_op(mlxsw_sp, rif->addr, fid_index, false);
6458 	if (err)
6459 		return err;
6460 
6461 	err = mlxsw_sp_rif_edit(mlxsw_sp, rif->rif_index, dev->dev_addr,
6462 				dev->mtu);
6463 	if (err)
6464 		goto err_rif_edit;
6465 
6466 	err = mlxsw_sp_rif_fdb_op(mlxsw_sp, dev->dev_addr, fid_index, true);
6467 	if (err)
6468 		goto err_rif_fdb_op;
6469 
6470 	if (rif->mtu != dev->mtu) {
6471 		struct mlxsw_sp_vr *vr;
6472 
6473 		/* The RIF is relevant only to its mr_table instance, as unlike
6474 		 * unicast routing, in multicast routing a RIF cannot be shared
6475 		 * between several multicast routing tables.
6476 		 */
6477 		vr = &mlxsw_sp->router->vrs[rif->vr_id];
6478 		mlxsw_sp_mr_rif_mtu_update(vr->mr4_table, rif, dev->mtu);
6479 	}
6480 
6481 	ether_addr_copy(rif->addr, dev->dev_addr);
6482 	rif->mtu = dev->mtu;
6483 
6484 	netdev_dbg(dev, "Updated RIF=%d\n", rif->rif_index);
6485 
6486 	return 0;
6487 
6488 err_rif_fdb_op:
6489 	mlxsw_sp_rif_edit(mlxsw_sp, rif->rif_index, rif->addr, rif->mtu);
6490 err_rif_edit:
6491 	mlxsw_sp_rif_fdb_op(mlxsw_sp, rif->addr, fid_index, true);
6492 	return err;
6493 }
6494 
6495 static int mlxsw_sp_port_vrf_join(struct mlxsw_sp *mlxsw_sp,
6496 				  struct net_device *l3_dev,
6497 				  struct netlink_ext_ack *extack)
6498 {
6499 	struct mlxsw_sp_rif *rif;
6500 
6501 	/* If netdev is already associated with a RIF, then we need to
6502 	 * destroy it and create a new one with the new virtual router ID.
6503 	 */
6504 	rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, l3_dev);
6505 	if (rif)
6506 		__mlxsw_sp_inetaddr_event(l3_dev, NETDEV_DOWN, extack);
6507 
6508 	return __mlxsw_sp_inetaddr_event(l3_dev, NETDEV_UP, extack);
6509 }
6510 
6511 static void mlxsw_sp_port_vrf_leave(struct mlxsw_sp *mlxsw_sp,
6512 				    struct net_device *l3_dev)
6513 {
6514 	struct mlxsw_sp_rif *rif;
6515 
6516 	rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, l3_dev);
6517 	if (!rif)
6518 		return;
6519 	__mlxsw_sp_inetaddr_event(l3_dev, NETDEV_DOWN, NULL);
6520 }
6521 
6522 int mlxsw_sp_netdevice_vrf_event(struct net_device *l3_dev, unsigned long event,
6523 				 struct netdev_notifier_changeupper_info *info)
6524 {
6525 	struct mlxsw_sp *mlxsw_sp = mlxsw_sp_lower_get(l3_dev);
6526 	int err = 0;
6527 
6528 	if (!mlxsw_sp)
6529 		return 0;
6530 
6531 	switch (event) {
6532 	case NETDEV_PRECHANGEUPPER:
6533 		return 0;
6534 	case NETDEV_CHANGEUPPER:
6535 		if (info->linking) {
6536 			struct netlink_ext_ack *extack;
6537 
6538 			extack = netdev_notifier_info_to_extack(&info->info);
6539 			err = mlxsw_sp_port_vrf_join(mlxsw_sp, l3_dev, extack);
6540 		} else {
6541 			mlxsw_sp_port_vrf_leave(mlxsw_sp, l3_dev);
6542 		}
6543 		break;
6544 	}
6545 
6546 	return err;
6547 }
6548 
6549 static struct mlxsw_sp_rif_subport *
6550 mlxsw_sp_rif_subport_rif(const struct mlxsw_sp_rif *rif)
6551 {
6552 	return container_of(rif, struct mlxsw_sp_rif_subport, common);
6553 }
6554 
6555 static void mlxsw_sp_rif_subport_setup(struct mlxsw_sp_rif *rif,
6556 				       const struct mlxsw_sp_rif_params *params)
6557 {
6558 	struct mlxsw_sp_rif_subport *rif_subport;
6559 
6560 	rif_subport = mlxsw_sp_rif_subport_rif(rif);
6561 	rif_subport->vid = params->vid;
6562 	rif_subport->lag = params->lag;
6563 	if (params->lag)
6564 		rif_subport->lag_id = params->lag_id;
6565 	else
6566 		rif_subport->system_port = params->system_port;
6567 }
6568 
6569 static int mlxsw_sp_rif_subport_op(struct mlxsw_sp_rif *rif, bool enable)
6570 {
6571 	struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
6572 	struct mlxsw_sp_rif_subport *rif_subport;
6573 	char ritr_pl[MLXSW_REG_RITR_LEN];
6574 
6575 	rif_subport = mlxsw_sp_rif_subport_rif(rif);
6576 	mlxsw_reg_ritr_pack(ritr_pl, enable, MLXSW_REG_RITR_SP_IF,
6577 			    rif->rif_index, rif->vr_id, rif->dev->mtu);
6578 	mlxsw_reg_ritr_mac_pack(ritr_pl, rif->dev->dev_addr);
6579 	mlxsw_reg_ritr_sp_if_pack(ritr_pl, rif_subport->lag,
6580 				  rif_subport->lag ? rif_subport->lag_id :
6581 						     rif_subport->system_port,
6582 				  rif_subport->vid);
6583 
6584 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
6585 }
6586 
6587 static int mlxsw_sp_rif_subport_configure(struct mlxsw_sp_rif *rif)
6588 {
6589 	int err;
6590 
6591 	err = mlxsw_sp_rif_subport_op(rif, true);
6592 	if (err)
6593 		return err;
6594 
6595 	err = mlxsw_sp_rif_fdb_op(rif->mlxsw_sp, rif->dev->dev_addr,
6596 				  mlxsw_sp_fid_index(rif->fid), true);
6597 	if (err)
6598 		goto err_rif_fdb_op;
6599 
6600 	mlxsw_sp_fid_rif_set(rif->fid, rif);
6601 	return 0;
6602 
6603 err_rif_fdb_op:
6604 	mlxsw_sp_rif_subport_op(rif, false);
6605 	return err;
6606 }
6607 
6608 static void mlxsw_sp_rif_subport_deconfigure(struct mlxsw_sp_rif *rif)
6609 {
6610 	struct mlxsw_sp_fid *fid = rif->fid;
6611 
6612 	mlxsw_sp_fid_rif_set(fid, NULL);
6613 	mlxsw_sp_rif_fdb_op(rif->mlxsw_sp, rif->dev->dev_addr,
6614 			    mlxsw_sp_fid_index(fid), false);
6615 	mlxsw_sp_rif_subport_op(rif, false);
6616 }
6617 
6618 static struct mlxsw_sp_fid *
6619 mlxsw_sp_rif_subport_fid_get(struct mlxsw_sp_rif *rif)
6620 {
6621 	return mlxsw_sp_fid_rfid_get(rif->mlxsw_sp, rif->rif_index);
6622 }
6623 
6624 static const struct mlxsw_sp_rif_ops mlxsw_sp_rif_subport_ops = {
6625 	.type			= MLXSW_SP_RIF_TYPE_SUBPORT,
6626 	.rif_size		= sizeof(struct mlxsw_sp_rif_subport),
6627 	.setup			= mlxsw_sp_rif_subport_setup,
6628 	.configure		= mlxsw_sp_rif_subport_configure,
6629 	.deconfigure		= mlxsw_sp_rif_subport_deconfigure,
6630 	.fid_get		= mlxsw_sp_rif_subport_fid_get,
6631 };
6632 
6633 static int mlxsw_sp_rif_vlan_fid_op(struct mlxsw_sp_rif *rif,
6634 				    enum mlxsw_reg_ritr_if_type type,
6635 				    u16 vid_fid, bool enable)
6636 {
6637 	struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
6638 	char ritr_pl[MLXSW_REG_RITR_LEN];
6639 
6640 	mlxsw_reg_ritr_pack(ritr_pl, enable, type, rif->rif_index, rif->vr_id,
6641 			    rif->dev->mtu);
6642 	mlxsw_reg_ritr_mac_pack(ritr_pl, rif->dev->dev_addr);
6643 	mlxsw_reg_ritr_fid_set(ritr_pl, type, vid_fid);
6644 
6645 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
6646 }
6647 
6648 u8 mlxsw_sp_router_port(const struct mlxsw_sp *mlxsw_sp)
6649 {
6650 	return mlxsw_core_max_ports(mlxsw_sp->core) + 1;
6651 }
6652 
6653 static int mlxsw_sp_rif_vlan_configure(struct mlxsw_sp_rif *rif)
6654 {
6655 	struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
6656 	u16 vid = mlxsw_sp_fid_8021q_vid(rif->fid);
6657 	int err;
6658 
6659 	err = mlxsw_sp_rif_vlan_fid_op(rif, MLXSW_REG_RITR_VLAN_IF, vid, true);
6660 	if (err)
6661 		return err;
6662 
6663 	err = mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_MC,
6664 				     mlxsw_sp_router_port(mlxsw_sp), true);
6665 	if (err)
6666 		goto err_fid_mc_flood_set;
6667 
6668 	err = mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_BC,
6669 				     mlxsw_sp_router_port(mlxsw_sp), true);
6670 	if (err)
6671 		goto err_fid_bc_flood_set;
6672 
6673 	err = mlxsw_sp_rif_fdb_op(rif->mlxsw_sp, rif->dev->dev_addr,
6674 				  mlxsw_sp_fid_index(rif->fid), true);
6675 	if (err)
6676 		goto err_rif_fdb_op;
6677 
6678 	mlxsw_sp_fid_rif_set(rif->fid, rif);
6679 	return 0;
6680 
6681 err_rif_fdb_op:
6682 	mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_BC,
6683 			       mlxsw_sp_router_port(mlxsw_sp), false);
6684 err_fid_bc_flood_set:
6685 	mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_MC,
6686 			       mlxsw_sp_router_port(mlxsw_sp), false);
6687 err_fid_mc_flood_set:
6688 	mlxsw_sp_rif_vlan_fid_op(rif, MLXSW_REG_RITR_VLAN_IF, vid, false);
6689 	return err;
6690 }
6691 
6692 static void mlxsw_sp_rif_vlan_deconfigure(struct mlxsw_sp_rif *rif)
6693 {
6694 	u16 vid = mlxsw_sp_fid_8021q_vid(rif->fid);
6695 	struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
6696 	struct mlxsw_sp_fid *fid = rif->fid;
6697 
6698 	mlxsw_sp_fid_rif_set(fid, NULL);
6699 	mlxsw_sp_rif_fdb_op(rif->mlxsw_sp, rif->dev->dev_addr,
6700 			    mlxsw_sp_fid_index(fid), false);
6701 	mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_BC,
6702 			       mlxsw_sp_router_port(mlxsw_sp), false);
6703 	mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_MC,
6704 			       mlxsw_sp_router_port(mlxsw_sp), false);
6705 	mlxsw_sp_rif_vlan_fid_op(rif, MLXSW_REG_RITR_VLAN_IF, vid, false);
6706 }
6707 
6708 static struct mlxsw_sp_fid *
6709 mlxsw_sp_rif_vlan_fid_get(struct mlxsw_sp_rif *rif)
6710 {
6711 	u16 vid = is_vlan_dev(rif->dev) ? vlan_dev_vlan_id(rif->dev) : 1;
6712 
6713 	return mlxsw_sp_fid_8021q_get(rif->mlxsw_sp, vid);
6714 }
6715 
6716 static const struct mlxsw_sp_rif_ops mlxsw_sp_rif_vlan_ops = {
6717 	.type			= MLXSW_SP_RIF_TYPE_VLAN,
6718 	.rif_size		= sizeof(struct mlxsw_sp_rif),
6719 	.configure		= mlxsw_sp_rif_vlan_configure,
6720 	.deconfigure		= mlxsw_sp_rif_vlan_deconfigure,
6721 	.fid_get		= mlxsw_sp_rif_vlan_fid_get,
6722 };
6723 
6724 static int mlxsw_sp_rif_fid_configure(struct mlxsw_sp_rif *rif)
6725 {
6726 	struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
6727 	u16 fid_index = mlxsw_sp_fid_index(rif->fid);
6728 	int err;
6729 
6730 	err = mlxsw_sp_rif_vlan_fid_op(rif, MLXSW_REG_RITR_FID_IF, fid_index,
6731 				       true);
6732 	if (err)
6733 		return err;
6734 
6735 	err = mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_MC,
6736 				     mlxsw_sp_router_port(mlxsw_sp), true);
6737 	if (err)
6738 		goto err_fid_mc_flood_set;
6739 
6740 	err = mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_BC,
6741 				     mlxsw_sp_router_port(mlxsw_sp), true);
6742 	if (err)
6743 		goto err_fid_bc_flood_set;
6744 
6745 	err = mlxsw_sp_rif_fdb_op(rif->mlxsw_sp, rif->dev->dev_addr,
6746 				  mlxsw_sp_fid_index(rif->fid), true);
6747 	if (err)
6748 		goto err_rif_fdb_op;
6749 
6750 	mlxsw_sp_fid_rif_set(rif->fid, rif);
6751 	return 0;
6752 
6753 err_rif_fdb_op:
6754 	mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_BC,
6755 			       mlxsw_sp_router_port(mlxsw_sp), false);
6756 err_fid_bc_flood_set:
6757 	mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_MC,
6758 			       mlxsw_sp_router_port(mlxsw_sp), false);
6759 err_fid_mc_flood_set:
6760 	mlxsw_sp_rif_vlan_fid_op(rif, MLXSW_REG_RITR_FID_IF, fid_index, false);
6761 	return err;
6762 }
6763 
6764 static void mlxsw_sp_rif_fid_deconfigure(struct mlxsw_sp_rif *rif)
6765 {
6766 	u16 fid_index = mlxsw_sp_fid_index(rif->fid);
6767 	struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
6768 	struct mlxsw_sp_fid *fid = rif->fid;
6769 
6770 	mlxsw_sp_fid_rif_set(fid, NULL);
6771 	mlxsw_sp_rif_fdb_op(rif->mlxsw_sp, rif->dev->dev_addr,
6772 			    mlxsw_sp_fid_index(fid), false);
6773 	mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_BC,
6774 			       mlxsw_sp_router_port(mlxsw_sp), false);
6775 	mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_MC,
6776 			       mlxsw_sp_router_port(mlxsw_sp), false);
6777 	mlxsw_sp_rif_vlan_fid_op(rif, MLXSW_REG_RITR_FID_IF, fid_index, false);
6778 }
6779 
6780 static struct mlxsw_sp_fid *
6781 mlxsw_sp_rif_fid_fid_get(struct mlxsw_sp_rif *rif)
6782 {
6783 	return mlxsw_sp_fid_8021d_get(rif->mlxsw_sp, rif->dev->ifindex);
6784 }
6785 
6786 static const struct mlxsw_sp_rif_ops mlxsw_sp_rif_fid_ops = {
6787 	.type			= MLXSW_SP_RIF_TYPE_FID,
6788 	.rif_size		= sizeof(struct mlxsw_sp_rif),
6789 	.configure		= mlxsw_sp_rif_fid_configure,
6790 	.deconfigure		= mlxsw_sp_rif_fid_deconfigure,
6791 	.fid_get		= mlxsw_sp_rif_fid_fid_get,
6792 };
6793 
6794 static struct mlxsw_sp_rif_ipip_lb *
6795 mlxsw_sp_rif_ipip_lb_rif(struct mlxsw_sp_rif *rif)
6796 {
6797 	return container_of(rif, struct mlxsw_sp_rif_ipip_lb, common);
6798 }
6799 
6800 static void
6801 mlxsw_sp_rif_ipip_lb_setup(struct mlxsw_sp_rif *rif,
6802 			   const struct mlxsw_sp_rif_params *params)
6803 {
6804 	struct mlxsw_sp_rif_params_ipip_lb *params_lb;
6805 	struct mlxsw_sp_rif_ipip_lb *rif_lb;
6806 
6807 	params_lb = container_of(params, struct mlxsw_sp_rif_params_ipip_lb,
6808 				 common);
6809 	rif_lb = mlxsw_sp_rif_ipip_lb_rif(rif);
6810 	rif_lb->lb_config = params_lb->lb_config;
6811 }
6812 
6813 static int
6814 mlxsw_sp_rif_ipip_lb_op(struct mlxsw_sp_rif_ipip_lb *lb_rif,
6815 			struct mlxsw_sp_vr *ul_vr, bool enable)
6816 {
6817 	struct mlxsw_sp_rif_ipip_lb_config lb_cf = lb_rif->lb_config;
6818 	struct mlxsw_sp_rif *rif = &lb_rif->common;
6819 	struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
6820 	char ritr_pl[MLXSW_REG_RITR_LEN];
6821 	u32 saddr4;
6822 
6823 	switch (lb_cf.ul_protocol) {
6824 	case MLXSW_SP_L3_PROTO_IPV4:
6825 		saddr4 = be32_to_cpu(lb_cf.saddr.addr4);
6826 		mlxsw_reg_ritr_pack(ritr_pl, enable, MLXSW_REG_RITR_LOOPBACK_IF,
6827 				    rif->rif_index, rif->vr_id, rif->dev->mtu);
6828 		mlxsw_reg_ritr_loopback_ipip4_pack(ritr_pl, lb_cf.lb_ipipt,
6829 			    MLXSW_REG_RITR_LOOPBACK_IPIP_OPTIONS_GRE_KEY_PRESET,
6830 			    ul_vr->id, saddr4, lb_cf.okey);
6831 		break;
6832 
6833 	case MLXSW_SP_L3_PROTO_IPV6:
6834 		return -EAFNOSUPPORT;
6835 	}
6836 
6837 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
6838 }
6839 
6840 static int
6841 mlxsw_sp_rif_ipip_lb_configure(struct mlxsw_sp_rif *rif)
6842 {
6843 	struct mlxsw_sp_rif_ipip_lb *lb_rif = mlxsw_sp_rif_ipip_lb_rif(rif);
6844 	u32 ul_tb_id = mlxsw_sp_ipip_dev_ul_tb_id(rif->dev);
6845 	struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
6846 	struct mlxsw_sp_vr *ul_vr;
6847 	int err;
6848 
6849 	ul_vr = mlxsw_sp_vr_get(mlxsw_sp, ul_tb_id, NULL);
6850 	if (IS_ERR(ul_vr))
6851 		return PTR_ERR(ul_vr);
6852 
6853 	err = mlxsw_sp_rif_ipip_lb_op(lb_rif, ul_vr, true);
6854 	if (err)
6855 		goto err_loopback_op;
6856 
6857 	lb_rif->ul_vr_id = ul_vr->id;
6858 	++ul_vr->rif_count;
6859 	return 0;
6860 
6861 err_loopback_op:
6862 	mlxsw_sp_vr_put(ul_vr);
6863 	return err;
6864 }
6865 
6866 static void mlxsw_sp_rif_ipip_lb_deconfigure(struct mlxsw_sp_rif *rif)
6867 {
6868 	struct mlxsw_sp_rif_ipip_lb *lb_rif = mlxsw_sp_rif_ipip_lb_rif(rif);
6869 	struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
6870 	struct mlxsw_sp_vr *ul_vr;
6871 
6872 	ul_vr = &mlxsw_sp->router->vrs[lb_rif->ul_vr_id];
6873 	mlxsw_sp_rif_ipip_lb_op(lb_rif, ul_vr, false);
6874 
6875 	--ul_vr->rif_count;
6876 	mlxsw_sp_vr_put(ul_vr);
6877 }
6878 
6879 static const struct mlxsw_sp_rif_ops mlxsw_sp_rif_ipip_lb_ops = {
6880 	.type			= MLXSW_SP_RIF_TYPE_IPIP_LB,
6881 	.rif_size		= sizeof(struct mlxsw_sp_rif_ipip_lb),
6882 	.setup                  = mlxsw_sp_rif_ipip_lb_setup,
6883 	.configure		= mlxsw_sp_rif_ipip_lb_configure,
6884 	.deconfigure		= mlxsw_sp_rif_ipip_lb_deconfigure,
6885 };
6886 
6887 static const struct mlxsw_sp_rif_ops *mlxsw_sp_rif_ops_arr[] = {
6888 	[MLXSW_SP_RIF_TYPE_SUBPORT]	= &mlxsw_sp_rif_subport_ops,
6889 	[MLXSW_SP_RIF_TYPE_VLAN]	= &mlxsw_sp_rif_vlan_ops,
6890 	[MLXSW_SP_RIF_TYPE_FID]		= &mlxsw_sp_rif_fid_ops,
6891 	[MLXSW_SP_RIF_TYPE_IPIP_LB]	= &mlxsw_sp_rif_ipip_lb_ops,
6892 };
6893 
6894 static int mlxsw_sp_rifs_init(struct mlxsw_sp *mlxsw_sp)
6895 {
6896 	u64 max_rifs = MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_RIFS);
6897 
6898 	mlxsw_sp->router->rifs = kcalloc(max_rifs,
6899 					 sizeof(struct mlxsw_sp_rif *),
6900 					 GFP_KERNEL);
6901 	if (!mlxsw_sp->router->rifs)
6902 		return -ENOMEM;
6903 
6904 	mlxsw_sp->router->rif_ops_arr = mlxsw_sp_rif_ops_arr;
6905 
6906 	return 0;
6907 }
6908 
6909 static void mlxsw_sp_rifs_fini(struct mlxsw_sp *mlxsw_sp)
6910 {
6911 	int i;
6912 
6913 	for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_RIFS); i++)
6914 		WARN_ON_ONCE(mlxsw_sp->router->rifs[i]);
6915 
6916 	kfree(mlxsw_sp->router->rifs);
6917 }
6918 
6919 static int
6920 mlxsw_sp_ipip_config_tigcr(struct mlxsw_sp *mlxsw_sp)
6921 {
6922 	char tigcr_pl[MLXSW_REG_TIGCR_LEN];
6923 
6924 	mlxsw_reg_tigcr_pack(tigcr_pl, true, 0);
6925 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(tigcr), tigcr_pl);
6926 }
6927 
6928 static int mlxsw_sp_ipips_init(struct mlxsw_sp *mlxsw_sp)
6929 {
6930 	mlxsw_sp->router->ipip_ops_arr = mlxsw_sp_ipip_ops_arr;
6931 	INIT_LIST_HEAD(&mlxsw_sp->router->ipip_list);
6932 	return mlxsw_sp_ipip_config_tigcr(mlxsw_sp);
6933 }
6934 
6935 static void mlxsw_sp_ipips_fini(struct mlxsw_sp *mlxsw_sp)
6936 {
6937 	WARN_ON(!list_empty(&mlxsw_sp->router->ipip_list));
6938 }
6939 
6940 static void mlxsw_sp_router_fib_dump_flush(struct notifier_block *nb)
6941 {
6942 	struct mlxsw_sp_router *router;
6943 
6944 	/* Flush pending FIB notifications and then flush the device's
6945 	 * table before requesting another dump. The FIB notification
6946 	 * block is unregistered, so no need to take RTNL.
6947 	 */
6948 	mlxsw_core_flush_owq();
6949 	router = container_of(nb, struct mlxsw_sp_router, fib_nb);
6950 	mlxsw_sp_router_fib_flush(router->mlxsw_sp);
6951 }
6952 
6953 #ifdef CONFIG_IP_ROUTE_MULTIPATH
6954 static void mlxsw_sp_mp_hash_header_set(char *recr2_pl, int header)
6955 {
6956 	mlxsw_reg_recr2_outer_header_enables_set(recr2_pl, header, true);
6957 }
6958 
6959 static void mlxsw_sp_mp_hash_field_set(char *recr2_pl, int field)
6960 {
6961 	mlxsw_reg_recr2_outer_header_fields_enable_set(recr2_pl, field, true);
6962 }
6963 
6964 static void mlxsw_sp_mp4_hash_init(char *recr2_pl)
6965 {
6966 	bool only_l3 = !init_net.ipv4.sysctl_fib_multipath_hash_policy;
6967 
6968 	mlxsw_sp_mp_hash_header_set(recr2_pl,
6969 				    MLXSW_REG_RECR2_IPV4_EN_NOT_TCP_NOT_UDP);
6970 	mlxsw_sp_mp_hash_header_set(recr2_pl, MLXSW_REG_RECR2_IPV4_EN_TCP_UDP);
6971 	mlxsw_reg_recr2_ipv4_sip_enable(recr2_pl);
6972 	mlxsw_reg_recr2_ipv4_dip_enable(recr2_pl);
6973 	if (only_l3)
6974 		return;
6975 	mlxsw_sp_mp_hash_header_set(recr2_pl, MLXSW_REG_RECR2_TCP_UDP_EN_IPV4);
6976 	mlxsw_sp_mp_hash_field_set(recr2_pl, MLXSW_REG_RECR2_IPV4_PROTOCOL);
6977 	mlxsw_sp_mp_hash_field_set(recr2_pl, MLXSW_REG_RECR2_TCP_UDP_SPORT);
6978 	mlxsw_sp_mp_hash_field_set(recr2_pl, MLXSW_REG_RECR2_TCP_UDP_DPORT);
6979 }
6980 
6981 static void mlxsw_sp_mp6_hash_init(char *recr2_pl)
6982 {
6983 	mlxsw_sp_mp_hash_header_set(recr2_pl,
6984 				    MLXSW_REG_RECR2_IPV6_EN_NOT_TCP_NOT_UDP);
6985 	mlxsw_sp_mp_hash_header_set(recr2_pl, MLXSW_REG_RECR2_IPV6_EN_TCP_UDP);
6986 	mlxsw_reg_recr2_ipv6_sip_enable(recr2_pl);
6987 	mlxsw_reg_recr2_ipv6_dip_enable(recr2_pl);
6988 	mlxsw_sp_mp_hash_field_set(recr2_pl, MLXSW_REG_RECR2_IPV6_FLOW_LABEL);
6989 	mlxsw_sp_mp_hash_field_set(recr2_pl, MLXSW_REG_RECR2_IPV6_NEXT_HEADER);
6990 }
6991 
6992 static int mlxsw_sp_mp_hash_init(struct mlxsw_sp *mlxsw_sp)
6993 {
6994 	char recr2_pl[MLXSW_REG_RECR2_LEN];
6995 	u32 seed;
6996 
6997 	get_random_bytes(&seed, sizeof(seed));
6998 	mlxsw_reg_recr2_pack(recr2_pl, seed);
6999 	mlxsw_sp_mp4_hash_init(recr2_pl);
7000 	mlxsw_sp_mp6_hash_init(recr2_pl);
7001 
7002 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(recr2), recr2_pl);
7003 }
7004 #else
7005 static int mlxsw_sp_mp_hash_init(struct mlxsw_sp *mlxsw_sp)
7006 {
7007 	return 0;
7008 }
7009 #endif
7010 
7011 static int __mlxsw_sp_router_init(struct mlxsw_sp *mlxsw_sp)
7012 {
7013 	char rgcr_pl[MLXSW_REG_RGCR_LEN];
7014 	u64 max_rifs;
7015 	int err;
7016 
7017 	if (!MLXSW_CORE_RES_VALID(mlxsw_sp->core, MAX_RIFS))
7018 		return -EIO;
7019 	max_rifs = MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_RIFS);
7020 
7021 	mlxsw_reg_rgcr_pack(rgcr_pl, true, true);
7022 	mlxsw_reg_rgcr_max_router_interfaces_set(rgcr_pl, max_rifs);
7023 	err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rgcr), rgcr_pl);
7024 	if (err)
7025 		return err;
7026 	return 0;
7027 }
7028 
7029 static void __mlxsw_sp_router_fini(struct mlxsw_sp *mlxsw_sp)
7030 {
7031 	char rgcr_pl[MLXSW_REG_RGCR_LEN];
7032 
7033 	mlxsw_reg_rgcr_pack(rgcr_pl, false, false);
7034 	mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rgcr), rgcr_pl);
7035 }
7036 
7037 int mlxsw_sp_router_init(struct mlxsw_sp *mlxsw_sp)
7038 {
7039 	struct mlxsw_sp_router *router;
7040 	int err;
7041 
7042 	router = kzalloc(sizeof(*mlxsw_sp->router), GFP_KERNEL);
7043 	if (!router)
7044 		return -ENOMEM;
7045 	mlxsw_sp->router = router;
7046 	router->mlxsw_sp = mlxsw_sp;
7047 
7048 	INIT_LIST_HEAD(&mlxsw_sp->router->nexthop_neighs_list);
7049 	err = __mlxsw_sp_router_init(mlxsw_sp);
7050 	if (err)
7051 		goto err_router_init;
7052 
7053 	err = mlxsw_sp_rifs_init(mlxsw_sp);
7054 	if (err)
7055 		goto err_rifs_init;
7056 
7057 	err = mlxsw_sp_ipips_init(mlxsw_sp);
7058 	if (err)
7059 		goto err_ipips_init;
7060 
7061 	err = rhashtable_init(&mlxsw_sp->router->nexthop_ht,
7062 			      &mlxsw_sp_nexthop_ht_params);
7063 	if (err)
7064 		goto err_nexthop_ht_init;
7065 
7066 	err = rhashtable_init(&mlxsw_sp->router->nexthop_group_ht,
7067 			      &mlxsw_sp_nexthop_group_ht_params);
7068 	if (err)
7069 		goto err_nexthop_group_ht_init;
7070 
7071 	INIT_LIST_HEAD(&mlxsw_sp->router->nexthop_list);
7072 	err = mlxsw_sp_lpm_init(mlxsw_sp);
7073 	if (err)
7074 		goto err_lpm_init;
7075 
7076 	err = mlxsw_sp_mr_init(mlxsw_sp, &mlxsw_sp_mr_tcam_ops);
7077 	if (err)
7078 		goto err_mr_init;
7079 
7080 	err = mlxsw_sp_vrs_init(mlxsw_sp);
7081 	if (err)
7082 		goto err_vrs_init;
7083 
7084 	err = mlxsw_sp_neigh_init(mlxsw_sp);
7085 	if (err)
7086 		goto err_neigh_init;
7087 
7088 	mlxsw_sp->router->netevent_nb.notifier_call =
7089 		mlxsw_sp_router_netevent_event;
7090 	err = register_netevent_notifier(&mlxsw_sp->router->netevent_nb);
7091 	if (err)
7092 		goto err_register_netevent_notifier;
7093 
7094 	err = mlxsw_sp_mp_hash_init(mlxsw_sp);
7095 	if (err)
7096 		goto err_mp_hash_init;
7097 
7098 	mlxsw_sp->router->fib_nb.notifier_call = mlxsw_sp_router_fib_event;
7099 	err = register_fib_notifier(&mlxsw_sp->router->fib_nb,
7100 				    mlxsw_sp_router_fib_dump_flush);
7101 	if (err)
7102 		goto err_register_fib_notifier;
7103 
7104 	return 0;
7105 
7106 err_register_fib_notifier:
7107 err_mp_hash_init:
7108 	unregister_netevent_notifier(&mlxsw_sp->router->netevent_nb);
7109 err_register_netevent_notifier:
7110 	mlxsw_sp_neigh_fini(mlxsw_sp);
7111 err_neigh_init:
7112 	mlxsw_sp_vrs_fini(mlxsw_sp);
7113 err_vrs_init:
7114 	mlxsw_sp_mr_fini(mlxsw_sp);
7115 err_mr_init:
7116 	mlxsw_sp_lpm_fini(mlxsw_sp);
7117 err_lpm_init:
7118 	rhashtable_destroy(&mlxsw_sp->router->nexthop_group_ht);
7119 err_nexthop_group_ht_init:
7120 	rhashtable_destroy(&mlxsw_sp->router->nexthop_ht);
7121 err_nexthop_ht_init:
7122 	mlxsw_sp_ipips_fini(mlxsw_sp);
7123 err_ipips_init:
7124 	mlxsw_sp_rifs_fini(mlxsw_sp);
7125 err_rifs_init:
7126 	__mlxsw_sp_router_fini(mlxsw_sp);
7127 err_router_init:
7128 	kfree(mlxsw_sp->router);
7129 	return err;
7130 }
7131 
7132 void mlxsw_sp_router_fini(struct mlxsw_sp *mlxsw_sp)
7133 {
7134 	unregister_fib_notifier(&mlxsw_sp->router->fib_nb);
7135 	unregister_netevent_notifier(&mlxsw_sp->router->netevent_nb);
7136 	mlxsw_sp_neigh_fini(mlxsw_sp);
7137 	mlxsw_sp_vrs_fini(mlxsw_sp);
7138 	mlxsw_sp_mr_fini(mlxsw_sp);
7139 	mlxsw_sp_lpm_fini(mlxsw_sp);
7140 	rhashtable_destroy(&mlxsw_sp->router->nexthop_group_ht);
7141 	rhashtable_destroy(&mlxsw_sp->router->nexthop_ht);
7142 	mlxsw_sp_ipips_fini(mlxsw_sp);
7143 	mlxsw_sp_rifs_fini(mlxsw_sp);
7144 	__mlxsw_sp_router_fini(mlxsw_sp);
7145 	kfree(mlxsw_sp->router);
7146 }
7147