1 /*
2  * drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
3  * Copyright (c) 2016-2017 Mellanox Technologies. All rights reserved.
4  * Copyright (c) 2016 Jiri Pirko <jiri@mellanox.com>
5  * Copyright (c) 2016 Ido Schimmel <idosch@mellanox.com>
6  * Copyright (c) 2016 Yotam Gigi <yotamg@mellanox.com>
7  * Copyright (c) 2017 Petr Machata <petrm@mellanox.com>
8  *
9  * Redistribution and use in source and binary forms, with or without
10  * modification, are permitted provided that the following conditions are met:
11  *
12  * 1. Redistributions of source code must retain the above copyright
13  *    notice, this list of conditions and the following disclaimer.
14  * 2. Redistributions in binary form must reproduce the above copyright
15  *    notice, this list of conditions and the following disclaimer in the
16  *    documentation and/or other materials provided with the distribution.
17  * 3. Neither the names of the copyright holders nor the names of its
18  *    contributors may be used to endorse or promote products derived from
19  *    this software without specific prior written permission.
20  *
21  * Alternatively, this software may be distributed under the terms of the
22  * GNU General Public License ("GPL") version 2 as published by the Free
23  * Software Foundation.
24  *
25  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
26  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
28  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
29  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
30  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
31  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
32  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
33  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
34  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
35  * POSSIBILITY OF SUCH DAMAGE.
36  */
37 
38 #include <linux/kernel.h>
39 #include <linux/types.h>
40 #include <linux/rhashtable.h>
41 #include <linux/bitops.h>
42 #include <linux/in6.h>
43 #include <linux/notifier.h>
44 #include <linux/inetdevice.h>
45 #include <linux/netdevice.h>
46 #include <linux/if_bridge.h>
47 #include <linux/socket.h>
48 #include <linux/route.h>
49 #include <linux/gcd.h>
50 #include <linux/random.h>
51 #include <net/netevent.h>
52 #include <net/neighbour.h>
53 #include <net/arp.h>
54 #include <net/ip_fib.h>
55 #include <net/ip6_fib.h>
56 #include <net/fib_rules.h>
57 #include <net/ip_tunnels.h>
58 #include <net/l3mdev.h>
59 #include <net/addrconf.h>
60 #include <net/ndisc.h>
61 #include <net/ipv6.h>
62 #include <net/fib_notifier.h>
63 
64 #include "spectrum.h"
65 #include "core.h"
66 #include "reg.h"
67 #include "spectrum_cnt.h"
68 #include "spectrum_dpipe.h"
69 #include "spectrum_ipip.h"
70 #include "spectrum_mr.h"
71 #include "spectrum_mr_tcam.h"
72 #include "spectrum_router.h"
73 
74 struct mlxsw_sp_fib;
75 struct mlxsw_sp_vr;
76 struct mlxsw_sp_lpm_tree;
77 struct mlxsw_sp_rif_ops;
78 
79 struct mlxsw_sp_router {
80 	struct mlxsw_sp *mlxsw_sp;
81 	struct mlxsw_sp_rif **rifs;
82 	struct mlxsw_sp_vr *vrs;
83 	struct rhashtable neigh_ht;
84 	struct rhashtable nexthop_group_ht;
85 	struct rhashtable nexthop_ht;
86 	struct list_head nexthop_list;
87 	struct {
88 		/* One tree for each protocol: IPv4 and IPv6 */
89 		struct mlxsw_sp_lpm_tree *proto_trees[2];
90 		struct mlxsw_sp_lpm_tree *trees;
91 		unsigned int tree_count;
92 	} lpm;
93 	struct {
94 		struct delayed_work dw;
95 		unsigned long interval;	/* ms */
96 	} neighs_update;
97 	struct delayed_work nexthop_probe_dw;
98 #define MLXSW_SP_UNRESOLVED_NH_PROBE_INTERVAL 5000 /* ms */
99 	struct list_head nexthop_neighs_list;
100 	struct list_head ipip_list;
101 	bool aborted;
102 	struct notifier_block fib_nb;
103 	struct notifier_block netevent_nb;
104 	const struct mlxsw_sp_rif_ops **rif_ops_arr;
105 	const struct mlxsw_sp_ipip_ops **ipip_ops_arr;
106 };
107 
108 struct mlxsw_sp_rif {
109 	struct list_head nexthop_list;
110 	struct list_head neigh_list;
111 	struct net_device *dev;
112 	struct mlxsw_sp_fid *fid;
113 	unsigned char addr[ETH_ALEN];
114 	int mtu;
115 	u16 rif_index;
116 	u16 vr_id;
117 	const struct mlxsw_sp_rif_ops *ops;
118 	struct mlxsw_sp *mlxsw_sp;
119 
120 	unsigned int counter_ingress;
121 	bool counter_ingress_valid;
122 	unsigned int counter_egress;
123 	bool counter_egress_valid;
124 };
125 
126 struct mlxsw_sp_rif_params {
127 	struct net_device *dev;
128 	union {
129 		u16 system_port;
130 		u16 lag_id;
131 	};
132 	u16 vid;
133 	bool lag;
134 };
135 
136 struct mlxsw_sp_rif_subport {
137 	struct mlxsw_sp_rif common;
138 	union {
139 		u16 system_port;
140 		u16 lag_id;
141 	};
142 	u16 vid;
143 	bool lag;
144 };
145 
146 struct mlxsw_sp_rif_ipip_lb {
147 	struct mlxsw_sp_rif common;
148 	struct mlxsw_sp_rif_ipip_lb_config lb_config;
149 	u16 ul_vr_id; /* Reserved for Spectrum-2. */
150 };
151 
152 struct mlxsw_sp_rif_params_ipip_lb {
153 	struct mlxsw_sp_rif_params common;
154 	struct mlxsw_sp_rif_ipip_lb_config lb_config;
155 };
156 
157 struct mlxsw_sp_rif_ops {
158 	enum mlxsw_sp_rif_type type;
159 	size_t rif_size;
160 
161 	void (*setup)(struct mlxsw_sp_rif *rif,
162 		      const struct mlxsw_sp_rif_params *params);
163 	int (*configure)(struct mlxsw_sp_rif *rif);
164 	void (*deconfigure)(struct mlxsw_sp_rif *rif);
165 	struct mlxsw_sp_fid * (*fid_get)(struct mlxsw_sp_rif *rif);
166 };
167 
168 static void mlxsw_sp_lpm_tree_hold(struct mlxsw_sp_lpm_tree *lpm_tree);
169 static void mlxsw_sp_lpm_tree_put(struct mlxsw_sp *mlxsw_sp,
170 				  struct mlxsw_sp_lpm_tree *lpm_tree);
171 static int mlxsw_sp_vr_lpm_tree_bind(struct mlxsw_sp *mlxsw_sp,
172 				     const struct mlxsw_sp_fib *fib,
173 				     u8 tree_id);
174 static int mlxsw_sp_vr_lpm_tree_unbind(struct mlxsw_sp *mlxsw_sp,
175 				       const struct mlxsw_sp_fib *fib);
176 
177 static unsigned int *
178 mlxsw_sp_rif_p_counter_get(struct mlxsw_sp_rif *rif,
179 			   enum mlxsw_sp_rif_counter_dir dir)
180 {
181 	switch (dir) {
182 	case MLXSW_SP_RIF_COUNTER_EGRESS:
183 		return &rif->counter_egress;
184 	case MLXSW_SP_RIF_COUNTER_INGRESS:
185 		return &rif->counter_ingress;
186 	}
187 	return NULL;
188 }
189 
190 static bool
191 mlxsw_sp_rif_counter_valid_get(struct mlxsw_sp_rif *rif,
192 			       enum mlxsw_sp_rif_counter_dir dir)
193 {
194 	switch (dir) {
195 	case MLXSW_SP_RIF_COUNTER_EGRESS:
196 		return rif->counter_egress_valid;
197 	case MLXSW_SP_RIF_COUNTER_INGRESS:
198 		return rif->counter_ingress_valid;
199 	}
200 	return false;
201 }
202 
203 static void
204 mlxsw_sp_rif_counter_valid_set(struct mlxsw_sp_rif *rif,
205 			       enum mlxsw_sp_rif_counter_dir dir,
206 			       bool valid)
207 {
208 	switch (dir) {
209 	case MLXSW_SP_RIF_COUNTER_EGRESS:
210 		rif->counter_egress_valid = valid;
211 		break;
212 	case MLXSW_SP_RIF_COUNTER_INGRESS:
213 		rif->counter_ingress_valid = valid;
214 		break;
215 	}
216 }
217 
218 static int mlxsw_sp_rif_counter_edit(struct mlxsw_sp *mlxsw_sp, u16 rif_index,
219 				     unsigned int counter_index, bool enable,
220 				     enum mlxsw_sp_rif_counter_dir dir)
221 {
222 	char ritr_pl[MLXSW_REG_RITR_LEN];
223 	bool is_egress = false;
224 	int err;
225 
226 	if (dir == MLXSW_SP_RIF_COUNTER_EGRESS)
227 		is_egress = true;
228 	mlxsw_reg_ritr_rif_pack(ritr_pl, rif_index);
229 	err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
230 	if (err)
231 		return err;
232 
233 	mlxsw_reg_ritr_counter_pack(ritr_pl, counter_index, enable,
234 				    is_egress);
235 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
236 }
237 
238 int mlxsw_sp_rif_counter_value_get(struct mlxsw_sp *mlxsw_sp,
239 				   struct mlxsw_sp_rif *rif,
240 				   enum mlxsw_sp_rif_counter_dir dir, u64 *cnt)
241 {
242 	char ricnt_pl[MLXSW_REG_RICNT_LEN];
243 	unsigned int *p_counter_index;
244 	bool valid;
245 	int err;
246 
247 	valid = mlxsw_sp_rif_counter_valid_get(rif, dir);
248 	if (!valid)
249 		return -EINVAL;
250 
251 	p_counter_index = mlxsw_sp_rif_p_counter_get(rif, dir);
252 	if (!p_counter_index)
253 		return -EINVAL;
254 	mlxsw_reg_ricnt_pack(ricnt_pl, *p_counter_index,
255 			     MLXSW_REG_RICNT_OPCODE_NOP);
256 	err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(ricnt), ricnt_pl);
257 	if (err)
258 		return err;
259 	*cnt = mlxsw_reg_ricnt_good_unicast_packets_get(ricnt_pl);
260 	return 0;
261 }
262 
263 static int mlxsw_sp_rif_counter_clear(struct mlxsw_sp *mlxsw_sp,
264 				      unsigned int counter_index)
265 {
266 	char ricnt_pl[MLXSW_REG_RICNT_LEN];
267 
268 	mlxsw_reg_ricnt_pack(ricnt_pl, counter_index,
269 			     MLXSW_REG_RICNT_OPCODE_CLEAR);
270 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ricnt), ricnt_pl);
271 }
272 
273 int mlxsw_sp_rif_counter_alloc(struct mlxsw_sp *mlxsw_sp,
274 			       struct mlxsw_sp_rif *rif,
275 			       enum mlxsw_sp_rif_counter_dir dir)
276 {
277 	unsigned int *p_counter_index;
278 	int err;
279 
280 	p_counter_index = mlxsw_sp_rif_p_counter_get(rif, dir);
281 	if (!p_counter_index)
282 		return -EINVAL;
283 	err = mlxsw_sp_counter_alloc(mlxsw_sp, MLXSW_SP_COUNTER_SUB_POOL_RIF,
284 				     p_counter_index);
285 	if (err)
286 		return err;
287 
288 	err = mlxsw_sp_rif_counter_clear(mlxsw_sp, *p_counter_index);
289 	if (err)
290 		goto err_counter_clear;
291 
292 	err = mlxsw_sp_rif_counter_edit(mlxsw_sp, rif->rif_index,
293 					*p_counter_index, true, dir);
294 	if (err)
295 		goto err_counter_edit;
296 	mlxsw_sp_rif_counter_valid_set(rif, dir, true);
297 	return 0;
298 
299 err_counter_edit:
300 err_counter_clear:
301 	mlxsw_sp_counter_free(mlxsw_sp, MLXSW_SP_COUNTER_SUB_POOL_RIF,
302 			      *p_counter_index);
303 	return err;
304 }
305 
306 void mlxsw_sp_rif_counter_free(struct mlxsw_sp *mlxsw_sp,
307 			       struct mlxsw_sp_rif *rif,
308 			       enum mlxsw_sp_rif_counter_dir dir)
309 {
310 	unsigned int *p_counter_index;
311 
312 	if (!mlxsw_sp_rif_counter_valid_get(rif, dir))
313 		return;
314 
315 	p_counter_index = mlxsw_sp_rif_p_counter_get(rif, dir);
316 	if (WARN_ON(!p_counter_index))
317 		return;
318 	mlxsw_sp_rif_counter_edit(mlxsw_sp, rif->rif_index,
319 				  *p_counter_index, false, dir);
320 	mlxsw_sp_counter_free(mlxsw_sp, MLXSW_SP_COUNTER_SUB_POOL_RIF,
321 			      *p_counter_index);
322 	mlxsw_sp_rif_counter_valid_set(rif, dir, false);
323 }
324 
325 static void mlxsw_sp_rif_counters_alloc(struct mlxsw_sp_rif *rif)
326 {
327 	struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
328 	struct devlink *devlink;
329 
330 	devlink = priv_to_devlink(mlxsw_sp->core);
331 	if (!devlink_dpipe_table_counter_enabled(devlink,
332 						 MLXSW_SP_DPIPE_TABLE_NAME_ERIF))
333 		return;
334 	mlxsw_sp_rif_counter_alloc(mlxsw_sp, rif, MLXSW_SP_RIF_COUNTER_EGRESS);
335 }
336 
337 static void mlxsw_sp_rif_counters_free(struct mlxsw_sp_rif *rif)
338 {
339 	struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
340 
341 	mlxsw_sp_rif_counter_free(mlxsw_sp, rif, MLXSW_SP_RIF_COUNTER_EGRESS);
342 }
343 
344 static struct mlxsw_sp_rif *
345 mlxsw_sp_rif_find_by_dev(const struct mlxsw_sp *mlxsw_sp,
346 			 const struct net_device *dev);
347 
348 #define MLXSW_SP_PREFIX_COUNT (sizeof(struct in6_addr) * BITS_PER_BYTE + 1)
349 
350 struct mlxsw_sp_prefix_usage {
351 	DECLARE_BITMAP(b, MLXSW_SP_PREFIX_COUNT);
352 };
353 
354 #define mlxsw_sp_prefix_usage_for_each(prefix, prefix_usage) \
355 	for_each_set_bit(prefix, (prefix_usage)->b, MLXSW_SP_PREFIX_COUNT)
356 
357 static bool
358 mlxsw_sp_prefix_usage_eq(struct mlxsw_sp_prefix_usage *prefix_usage1,
359 			 struct mlxsw_sp_prefix_usage *prefix_usage2)
360 {
361 	return !memcmp(prefix_usage1, prefix_usage2, sizeof(*prefix_usage1));
362 }
363 
364 static void
365 mlxsw_sp_prefix_usage_cpy(struct mlxsw_sp_prefix_usage *prefix_usage1,
366 			  struct mlxsw_sp_prefix_usage *prefix_usage2)
367 {
368 	memcpy(prefix_usage1, prefix_usage2, sizeof(*prefix_usage1));
369 }
370 
371 static void
372 mlxsw_sp_prefix_usage_set(struct mlxsw_sp_prefix_usage *prefix_usage,
373 			  unsigned char prefix_len)
374 {
375 	set_bit(prefix_len, prefix_usage->b);
376 }
377 
378 static void
379 mlxsw_sp_prefix_usage_clear(struct mlxsw_sp_prefix_usage *prefix_usage,
380 			    unsigned char prefix_len)
381 {
382 	clear_bit(prefix_len, prefix_usage->b);
383 }
384 
385 struct mlxsw_sp_fib_key {
386 	unsigned char addr[sizeof(struct in6_addr)];
387 	unsigned char prefix_len;
388 };
389 
390 enum mlxsw_sp_fib_entry_type {
391 	MLXSW_SP_FIB_ENTRY_TYPE_REMOTE,
392 	MLXSW_SP_FIB_ENTRY_TYPE_LOCAL,
393 	MLXSW_SP_FIB_ENTRY_TYPE_TRAP,
394 
395 	/* This is a special case of local delivery, where a packet should be
396 	 * decapsulated on reception. Note that there is no corresponding ENCAP,
397 	 * because that's a type of next hop, not of FIB entry. (There can be
398 	 * several next hops in a REMOTE entry, and some of them may be
399 	 * encapsulating entries.)
400 	 */
401 	MLXSW_SP_FIB_ENTRY_TYPE_IPIP_DECAP,
402 };
403 
404 struct mlxsw_sp_nexthop_group;
405 
406 struct mlxsw_sp_fib_node {
407 	struct list_head entry_list;
408 	struct list_head list;
409 	struct rhash_head ht_node;
410 	struct mlxsw_sp_fib *fib;
411 	struct mlxsw_sp_fib_key key;
412 };
413 
414 struct mlxsw_sp_fib_entry_decap {
415 	struct mlxsw_sp_ipip_entry *ipip_entry;
416 	u32 tunnel_index;
417 };
418 
419 struct mlxsw_sp_fib_entry {
420 	struct list_head list;
421 	struct mlxsw_sp_fib_node *fib_node;
422 	enum mlxsw_sp_fib_entry_type type;
423 	struct list_head nexthop_group_node;
424 	struct mlxsw_sp_nexthop_group *nh_group;
425 	struct mlxsw_sp_fib_entry_decap decap; /* Valid for decap entries. */
426 };
427 
428 struct mlxsw_sp_fib4_entry {
429 	struct mlxsw_sp_fib_entry common;
430 	u32 tb_id;
431 	u32 prio;
432 	u8 tos;
433 	u8 type;
434 };
435 
436 struct mlxsw_sp_fib6_entry {
437 	struct mlxsw_sp_fib_entry common;
438 	struct list_head rt6_list;
439 	unsigned int nrt6;
440 };
441 
442 struct mlxsw_sp_rt6 {
443 	struct list_head list;
444 	struct rt6_info *rt;
445 };
446 
447 struct mlxsw_sp_lpm_tree {
448 	u8 id; /* tree ID */
449 	unsigned int ref_count;
450 	enum mlxsw_sp_l3proto proto;
451 	unsigned long prefix_ref_count[MLXSW_SP_PREFIX_COUNT];
452 	struct mlxsw_sp_prefix_usage prefix_usage;
453 };
454 
455 struct mlxsw_sp_fib {
456 	struct rhashtable ht;
457 	struct list_head node_list;
458 	struct mlxsw_sp_vr *vr;
459 	struct mlxsw_sp_lpm_tree *lpm_tree;
460 	enum mlxsw_sp_l3proto proto;
461 };
462 
463 struct mlxsw_sp_vr {
464 	u16 id; /* virtual router ID */
465 	u32 tb_id; /* kernel fib table id */
466 	unsigned int rif_count;
467 	struct mlxsw_sp_fib *fib4;
468 	struct mlxsw_sp_fib *fib6;
469 	struct mlxsw_sp_mr_table *mr4_table;
470 };
471 
472 static const struct rhashtable_params mlxsw_sp_fib_ht_params;
473 
474 static struct mlxsw_sp_fib *mlxsw_sp_fib_create(struct mlxsw_sp *mlxsw_sp,
475 						struct mlxsw_sp_vr *vr,
476 						enum mlxsw_sp_l3proto proto)
477 {
478 	struct mlxsw_sp_lpm_tree *lpm_tree;
479 	struct mlxsw_sp_fib *fib;
480 	int err;
481 
482 	lpm_tree = mlxsw_sp->router->lpm.proto_trees[proto];
483 	fib = kzalloc(sizeof(*fib), GFP_KERNEL);
484 	if (!fib)
485 		return ERR_PTR(-ENOMEM);
486 	err = rhashtable_init(&fib->ht, &mlxsw_sp_fib_ht_params);
487 	if (err)
488 		goto err_rhashtable_init;
489 	INIT_LIST_HEAD(&fib->node_list);
490 	fib->proto = proto;
491 	fib->vr = vr;
492 	fib->lpm_tree = lpm_tree;
493 	mlxsw_sp_lpm_tree_hold(lpm_tree);
494 	err = mlxsw_sp_vr_lpm_tree_bind(mlxsw_sp, fib, lpm_tree->id);
495 	if (err)
496 		goto err_lpm_tree_bind;
497 	return fib;
498 
499 err_lpm_tree_bind:
500 	mlxsw_sp_lpm_tree_put(mlxsw_sp, lpm_tree);
501 err_rhashtable_init:
502 	kfree(fib);
503 	return ERR_PTR(err);
504 }
505 
506 static void mlxsw_sp_fib_destroy(struct mlxsw_sp *mlxsw_sp,
507 				 struct mlxsw_sp_fib *fib)
508 {
509 	mlxsw_sp_vr_lpm_tree_unbind(mlxsw_sp, fib);
510 	mlxsw_sp_lpm_tree_put(mlxsw_sp, fib->lpm_tree);
511 	WARN_ON(!list_empty(&fib->node_list));
512 	rhashtable_destroy(&fib->ht);
513 	kfree(fib);
514 }
515 
516 static struct mlxsw_sp_lpm_tree *
517 mlxsw_sp_lpm_tree_find_unused(struct mlxsw_sp *mlxsw_sp)
518 {
519 	static struct mlxsw_sp_lpm_tree *lpm_tree;
520 	int i;
521 
522 	for (i = 0; i < mlxsw_sp->router->lpm.tree_count; i++) {
523 		lpm_tree = &mlxsw_sp->router->lpm.trees[i];
524 		if (lpm_tree->ref_count == 0)
525 			return lpm_tree;
526 	}
527 	return NULL;
528 }
529 
530 static int mlxsw_sp_lpm_tree_alloc(struct mlxsw_sp *mlxsw_sp,
531 				   struct mlxsw_sp_lpm_tree *lpm_tree)
532 {
533 	char ralta_pl[MLXSW_REG_RALTA_LEN];
534 
535 	mlxsw_reg_ralta_pack(ralta_pl, true,
536 			     (enum mlxsw_reg_ralxx_protocol) lpm_tree->proto,
537 			     lpm_tree->id);
538 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralta), ralta_pl);
539 }
540 
541 static void mlxsw_sp_lpm_tree_free(struct mlxsw_sp *mlxsw_sp,
542 				   struct mlxsw_sp_lpm_tree *lpm_tree)
543 {
544 	char ralta_pl[MLXSW_REG_RALTA_LEN];
545 
546 	mlxsw_reg_ralta_pack(ralta_pl, false,
547 			     (enum mlxsw_reg_ralxx_protocol) lpm_tree->proto,
548 			     lpm_tree->id);
549 	mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralta), ralta_pl);
550 }
551 
552 static int
553 mlxsw_sp_lpm_tree_left_struct_set(struct mlxsw_sp *mlxsw_sp,
554 				  struct mlxsw_sp_prefix_usage *prefix_usage,
555 				  struct mlxsw_sp_lpm_tree *lpm_tree)
556 {
557 	char ralst_pl[MLXSW_REG_RALST_LEN];
558 	u8 root_bin = 0;
559 	u8 prefix;
560 	u8 last_prefix = MLXSW_REG_RALST_BIN_NO_CHILD;
561 
562 	mlxsw_sp_prefix_usage_for_each(prefix, prefix_usage)
563 		root_bin = prefix;
564 
565 	mlxsw_reg_ralst_pack(ralst_pl, root_bin, lpm_tree->id);
566 	mlxsw_sp_prefix_usage_for_each(prefix, prefix_usage) {
567 		if (prefix == 0)
568 			continue;
569 		mlxsw_reg_ralst_bin_pack(ralst_pl, prefix, last_prefix,
570 					 MLXSW_REG_RALST_BIN_NO_CHILD);
571 		last_prefix = prefix;
572 	}
573 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralst), ralst_pl);
574 }
575 
576 static struct mlxsw_sp_lpm_tree *
577 mlxsw_sp_lpm_tree_create(struct mlxsw_sp *mlxsw_sp,
578 			 struct mlxsw_sp_prefix_usage *prefix_usage,
579 			 enum mlxsw_sp_l3proto proto)
580 {
581 	struct mlxsw_sp_lpm_tree *lpm_tree;
582 	int err;
583 
584 	lpm_tree = mlxsw_sp_lpm_tree_find_unused(mlxsw_sp);
585 	if (!lpm_tree)
586 		return ERR_PTR(-EBUSY);
587 	lpm_tree->proto = proto;
588 	err = mlxsw_sp_lpm_tree_alloc(mlxsw_sp, lpm_tree);
589 	if (err)
590 		return ERR_PTR(err);
591 
592 	err = mlxsw_sp_lpm_tree_left_struct_set(mlxsw_sp, prefix_usage,
593 						lpm_tree);
594 	if (err)
595 		goto err_left_struct_set;
596 	memcpy(&lpm_tree->prefix_usage, prefix_usage,
597 	       sizeof(lpm_tree->prefix_usage));
598 	memset(&lpm_tree->prefix_ref_count, 0,
599 	       sizeof(lpm_tree->prefix_ref_count));
600 	lpm_tree->ref_count = 1;
601 	return lpm_tree;
602 
603 err_left_struct_set:
604 	mlxsw_sp_lpm_tree_free(mlxsw_sp, lpm_tree);
605 	return ERR_PTR(err);
606 }
607 
608 static void mlxsw_sp_lpm_tree_destroy(struct mlxsw_sp *mlxsw_sp,
609 				      struct mlxsw_sp_lpm_tree *lpm_tree)
610 {
611 	mlxsw_sp_lpm_tree_free(mlxsw_sp, lpm_tree);
612 }
613 
614 static struct mlxsw_sp_lpm_tree *
615 mlxsw_sp_lpm_tree_get(struct mlxsw_sp *mlxsw_sp,
616 		      struct mlxsw_sp_prefix_usage *prefix_usage,
617 		      enum mlxsw_sp_l3proto proto)
618 {
619 	struct mlxsw_sp_lpm_tree *lpm_tree;
620 	int i;
621 
622 	for (i = 0; i < mlxsw_sp->router->lpm.tree_count; i++) {
623 		lpm_tree = &mlxsw_sp->router->lpm.trees[i];
624 		if (lpm_tree->ref_count != 0 &&
625 		    lpm_tree->proto == proto &&
626 		    mlxsw_sp_prefix_usage_eq(&lpm_tree->prefix_usage,
627 					     prefix_usage)) {
628 			mlxsw_sp_lpm_tree_hold(lpm_tree);
629 			return lpm_tree;
630 		}
631 	}
632 	return mlxsw_sp_lpm_tree_create(mlxsw_sp, prefix_usage, proto);
633 }
634 
635 static void mlxsw_sp_lpm_tree_hold(struct mlxsw_sp_lpm_tree *lpm_tree)
636 {
637 	lpm_tree->ref_count++;
638 }
639 
640 static void mlxsw_sp_lpm_tree_put(struct mlxsw_sp *mlxsw_sp,
641 				  struct mlxsw_sp_lpm_tree *lpm_tree)
642 {
643 	if (--lpm_tree->ref_count == 0)
644 		mlxsw_sp_lpm_tree_destroy(mlxsw_sp, lpm_tree);
645 }
646 
647 #define MLXSW_SP_LPM_TREE_MIN 1 /* tree 0 is reserved */
648 
649 static int mlxsw_sp_lpm_init(struct mlxsw_sp *mlxsw_sp)
650 {
651 	struct mlxsw_sp_prefix_usage req_prefix_usage = {{ 0 } };
652 	struct mlxsw_sp_lpm_tree *lpm_tree;
653 	u64 max_trees;
654 	int err, i;
655 
656 	if (!MLXSW_CORE_RES_VALID(mlxsw_sp->core, MAX_LPM_TREES))
657 		return -EIO;
658 
659 	max_trees = MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_LPM_TREES);
660 	mlxsw_sp->router->lpm.tree_count = max_trees - MLXSW_SP_LPM_TREE_MIN;
661 	mlxsw_sp->router->lpm.trees = kcalloc(mlxsw_sp->router->lpm.tree_count,
662 					     sizeof(struct mlxsw_sp_lpm_tree),
663 					     GFP_KERNEL);
664 	if (!mlxsw_sp->router->lpm.trees)
665 		return -ENOMEM;
666 
667 	for (i = 0; i < mlxsw_sp->router->lpm.tree_count; i++) {
668 		lpm_tree = &mlxsw_sp->router->lpm.trees[i];
669 		lpm_tree->id = i + MLXSW_SP_LPM_TREE_MIN;
670 	}
671 
672 	lpm_tree = mlxsw_sp_lpm_tree_get(mlxsw_sp, &req_prefix_usage,
673 					 MLXSW_SP_L3_PROTO_IPV4);
674 	if (IS_ERR(lpm_tree)) {
675 		err = PTR_ERR(lpm_tree);
676 		goto err_ipv4_tree_get;
677 	}
678 	mlxsw_sp->router->lpm.proto_trees[MLXSW_SP_L3_PROTO_IPV4] = lpm_tree;
679 
680 	lpm_tree = mlxsw_sp_lpm_tree_get(mlxsw_sp, &req_prefix_usage,
681 					 MLXSW_SP_L3_PROTO_IPV6);
682 	if (IS_ERR(lpm_tree)) {
683 		err = PTR_ERR(lpm_tree);
684 		goto err_ipv6_tree_get;
685 	}
686 	mlxsw_sp->router->lpm.proto_trees[MLXSW_SP_L3_PROTO_IPV6] = lpm_tree;
687 
688 	return 0;
689 
690 err_ipv6_tree_get:
691 	lpm_tree = mlxsw_sp->router->lpm.proto_trees[MLXSW_SP_L3_PROTO_IPV4];
692 	mlxsw_sp_lpm_tree_put(mlxsw_sp, lpm_tree);
693 err_ipv4_tree_get:
694 	kfree(mlxsw_sp->router->lpm.trees);
695 	return err;
696 }
697 
698 static void mlxsw_sp_lpm_fini(struct mlxsw_sp *mlxsw_sp)
699 {
700 	struct mlxsw_sp_lpm_tree *lpm_tree;
701 
702 	lpm_tree = mlxsw_sp->router->lpm.proto_trees[MLXSW_SP_L3_PROTO_IPV6];
703 	mlxsw_sp_lpm_tree_put(mlxsw_sp, lpm_tree);
704 
705 	lpm_tree = mlxsw_sp->router->lpm.proto_trees[MLXSW_SP_L3_PROTO_IPV4];
706 	mlxsw_sp_lpm_tree_put(mlxsw_sp, lpm_tree);
707 
708 	kfree(mlxsw_sp->router->lpm.trees);
709 }
710 
711 static bool mlxsw_sp_vr_is_used(const struct mlxsw_sp_vr *vr)
712 {
713 	return !!vr->fib4 || !!vr->fib6 || !!vr->mr4_table;
714 }
715 
716 static struct mlxsw_sp_vr *mlxsw_sp_vr_find_unused(struct mlxsw_sp *mlxsw_sp)
717 {
718 	struct mlxsw_sp_vr *vr;
719 	int i;
720 
721 	for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_VRS); i++) {
722 		vr = &mlxsw_sp->router->vrs[i];
723 		if (!mlxsw_sp_vr_is_used(vr))
724 			return vr;
725 	}
726 	return NULL;
727 }
728 
729 static int mlxsw_sp_vr_lpm_tree_bind(struct mlxsw_sp *mlxsw_sp,
730 				     const struct mlxsw_sp_fib *fib, u8 tree_id)
731 {
732 	char raltb_pl[MLXSW_REG_RALTB_LEN];
733 
734 	mlxsw_reg_raltb_pack(raltb_pl, fib->vr->id,
735 			     (enum mlxsw_reg_ralxx_protocol) fib->proto,
736 			     tree_id);
737 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(raltb), raltb_pl);
738 }
739 
740 static int mlxsw_sp_vr_lpm_tree_unbind(struct mlxsw_sp *mlxsw_sp,
741 				       const struct mlxsw_sp_fib *fib)
742 {
743 	char raltb_pl[MLXSW_REG_RALTB_LEN];
744 
745 	/* Bind to tree 0 which is default */
746 	mlxsw_reg_raltb_pack(raltb_pl, fib->vr->id,
747 			     (enum mlxsw_reg_ralxx_protocol) fib->proto, 0);
748 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(raltb), raltb_pl);
749 }
750 
751 static u32 mlxsw_sp_fix_tb_id(u32 tb_id)
752 {
753 	/* For our purpose, squash main, default and local tables into one */
754 	if (tb_id == RT_TABLE_LOCAL || tb_id == RT_TABLE_DEFAULT)
755 		tb_id = RT_TABLE_MAIN;
756 	return tb_id;
757 }
758 
759 static struct mlxsw_sp_vr *mlxsw_sp_vr_find(struct mlxsw_sp *mlxsw_sp,
760 					    u32 tb_id)
761 {
762 	struct mlxsw_sp_vr *vr;
763 	int i;
764 
765 	tb_id = mlxsw_sp_fix_tb_id(tb_id);
766 
767 	for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_VRS); i++) {
768 		vr = &mlxsw_sp->router->vrs[i];
769 		if (mlxsw_sp_vr_is_used(vr) && vr->tb_id == tb_id)
770 			return vr;
771 	}
772 	return NULL;
773 }
774 
775 static struct mlxsw_sp_fib *mlxsw_sp_vr_fib(const struct mlxsw_sp_vr *vr,
776 					    enum mlxsw_sp_l3proto proto)
777 {
778 	switch (proto) {
779 	case MLXSW_SP_L3_PROTO_IPV4:
780 		return vr->fib4;
781 	case MLXSW_SP_L3_PROTO_IPV6:
782 		return vr->fib6;
783 	}
784 	return NULL;
785 }
786 
787 static struct mlxsw_sp_vr *mlxsw_sp_vr_create(struct mlxsw_sp *mlxsw_sp,
788 					      u32 tb_id,
789 					      struct netlink_ext_ack *extack)
790 {
791 	struct mlxsw_sp_mr_table *mr4_table;
792 	struct mlxsw_sp_fib *fib4;
793 	struct mlxsw_sp_fib *fib6;
794 	struct mlxsw_sp_vr *vr;
795 	int err;
796 
797 	vr = mlxsw_sp_vr_find_unused(mlxsw_sp);
798 	if (!vr) {
799 		NL_SET_ERR_MSG(extack, "spectrum: Exceeded number of supported virtual routers");
800 		return ERR_PTR(-EBUSY);
801 	}
802 	fib4 = mlxsw_sp_fib_create(mlxsw_sp, vr, MLXSW_SP_L3_PROTO_IPV4);
803 	if (IS_ERR(fib4))
804 		return ERR_CAST(fib4);
805 	fib6 = mlxsw_sp_fib_create(mlxsw_sp, vr, MLXSW_SP_L3_PROTO_IPV6);
806 	if (IS_ERR(fib6)) {
807 		err = PTR_ERR(fib6);
808 		goto err_fib6_create;
809 	}
810 	mr4_table = mlxsw_sp_mr_table_create(mlxsw_sp, vr->id,
811 					     MLXSW_SP_L3_PROTO_IPV4);
812 	if (IS_ERR(mr4_table)) {
813 		err = PTR_ERR(mr4_table);
814 		goto err_mr_table_create;
815 	}
816 	vr->fib4 = fib4;
817 	vr->fib6 = fib6;
818 	vr->mr4_table = mr4_table;
819 	vr->tb_id = tb_id;
820 	return vr;
821 
822 err_mr_table_create:
823 	mlxsw_sp_fib_destroy(mlxsw_sp, fib6);
824 err_fib6_create:
825 	mlxsw_sp_fib_destroy(mlxsw_sp, fib4);
826 	return ERR_PTR(err);
827 }
828 
829 static void mlxsw_sp_vr_destroy(struct mlxsw_sp *mlxsw_sp,
830 				struct mlxsw_sp_vr *vr)
831 {
832 	mlxsw_sp_mr_table_destroy(vr->mr4_table);
833 	vr->mr4_table = NULL;
834 	mlxsw_sp_fib_destroy(mlxsw_sp, vr->fib6);
835 	vr->fib6 = NULL;
836 	mlxsw_sp_fib_destroy(mlxsw_sp, vr->fib4);
837 	vr->fib4 = NULL;
838 }
839 
840 static struct mlxsw_sp_vr *mlxsw_sp_vr_get(struct mlxsw_sp *mlxsw_sp, u32 tb_id,
841 					   struct netlink_ext_ack *extack)
842 {
843 	struct mlxsw_sp_vr *vr;
844 
845 	tb_id = mlxsw_sp_fix_tb_id(tb_id);
846 	vr = mlxsw_sp_vr_find(mlxsw_sp, tb_id);
847 	if (!vr)
848 		vr = mlxsw_sp_vr_create(mlxsw_sp, tb_id, extack);
849 	return vr;
850 }
851 
852 static void mlxsw_sp_vr_put(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_vr *vr)
853 {
854 	if (!vr->rif_count && list_empty(&vr->fib4->node_list) &&
855 	    list_empty(&vr->fib6->node_list) &&
856 	    mlxsw_sp_mr_table_empty(vr->mr4_table))
857 		mlxsw_sp_vr_destroy(mlxsw_sp, vr);
858 }
859 
860 static bool
861 mlxsw_sp_vr_lpm_tree_should_replace(struct mlxsw_sp_vr *vr,
862 				    enum mlxsw_sp_l3proto proto, u8 tree_id)
863 {
864 	struct mlxsw_sp_fib *fib = mlxsw_sp_vr_fib(vr, proto);
865 
866 	if (!mlxsw_sp_vr_is_used(vr))
867 		return false;
868 	if (fib->lpm_tree->id == tree_id)
869 		return true;
870 	return false;
871 }
872 
873 static int mlxsw_sp_vr_lpm_tree_replace(struct mlxsw_sp *mlxsw_sp,
874 					struct mlxsw_sp_fib *fib,
875 					struct mlxsw_sp_lpm_tree *new_tree)
876 {
877 	struct mlxsw_sp_lpm_tree *old_tree = fib->lpm_tree;
878 	int err;
879 
880 	fib->lpm_tree = new_tree;
881 	mlxsw_sp_lpm_tree_hold(new_tree);
882 	err = mlxsw_sp_vr_lpm_tree_bind(mlxsw_sp, fib, new_tree->id);
883 	if (err)
884 		goto err_tree_bind;
885 	mlxsw_sp_lpm_tree_put(mlxsw_sp, old_tree);
886 	return 0;
887 
888 err_tree_bind:
889 	mlxsw_sp_lpm_tree_put(mlxsw_sp, new_tree);
890 	fib->lpm_tree = old_tree;
891 	return err;
892 }
893 
894 static int mlxsw_sp_vrs_lpm_tree_replace(struct mlxsw_sp *mlxsw_sp,
895 					 struct mlxsw_sp_fib *fib,
896 					 struct mlxsw_sp_lpm_tree *new_tree)
897 {
898 	enum mlxsw_sp_l3proto proto = fib->proto;
899 	struct mlxsw_sp_lpm_tree *old_tree;
900 	u8 old_id, new_id = new_tree->id;
901 	struct mlxsw_sp_vr *vr;
902 	int i, err;
903 
904 	old_tree = mlxsw_sp->router->lpm.proto_trees[proto];
905 	old_id = old_tree->id;
906 
907 	for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_VRS); i++) {
908 		vr = &mlxsw_sp->router->vrs[i];
909 		if (!mlxsw_sp_vr_lpm_tree_should_replace(vr, proto, old_id))
910 			continue;
911 		err = mlxsw_sp_vr_lpm_tree_replace(mlxsw_sp,
912 						   mlxsw_sp_vr_fib(vr, proto),
913 						   new_tree);
914 		if (err)
915 			goto err_tree_replace;
916 	}
917 
918 	memcpy(new_tree->prefix_ref_count, old_tree->prefix_ref_count,
919 	       sizeof(new_tree->prefix_ref_count));
920 	mlxsw_sp->router->lpm.proto_trees[proto] = new_tree;
921 	mlxsw_sp_lpm_tree_put(mlxsw_sp, old_tree);
922 
923 	return 0;
924 
925 err_tree_replace:
926 	for (i--; i >= 0; i--) {
927 		if (!mlxsw_sp_vr_lpm_tree_should_replace(vr, proto, new_id))
928 			continue;
929 		mlxsw_sp_vr_lpm_tree_replace(mlxsw_sp,
930 					     mlxsw_sp_vr_fib(vr, proto),
931 					     old_tree);
932 	}
933 	return err;
934 }
935 
936 static int mlxsw_sp_vrs_init(struct mlxsw_sp *mlxsw_sp)
937 {
938 	struct mlxsw_sp_vr *vr;
939 	u64 max_vrs;
940 	int i;
941 
942 	if (!MLXSW_CORE_RES_VALID(mlxsw_sp->core, MAX_VRS))
943 		return -EIO;
944 
945 	max_vrs = MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_VRS);
946 	mlxsw_sp->router->vrs = kcalloc(max_vrs, sizeof(struct mlxsw_sp_vr),
947 					GFP_KERNEL);
948 	if (!mlxsw_sp->router->vrs)
949 		return -ENOMEM;
950 
951 	for (i = 0; i < max_vrs; i++) {
952 		vr = &mlxsw_sp->router->vrs[i];
953 		vr->id = i;
954 	}
955 
956 	return 0;
957 }
958 
959 static void mlxsw_sp_router_fib_flush(struct mlxsw_sp *mlxsw_sp);
960 
961 static void mlxsw_sp_vrs_fini(struct mlxsw_sp *mlxsw_sp)
962 {
963 	/* At this stage we're guaranteed not to have new incoming
964 	 * FIB notifications and the work queue is free from FIBs
965 	 * sitting on top of mlxsw netdevs. However, we can still
966 	 * have other FIBs queued. Flush the queue before flushing
967 	 * the device's tables. No need for locks, as we're the only
968 	 * writer.
969 	 */
970 	mlxsw_core_flush_owq();
971 	mlxsw_sp_router_fib_flush(mlxsw_sp);
972 	kfree(mlxsw_sp->router->vrs);
973 }
974 
975 static struct net_device *
976 __mlxsw_sp_ipip_netdev_ul_dev_get(const struct net_device *ol_dev)
977 {
978 	struct ip_tunnel *tun = netdev_priv(ol_dev);
979 	struct net *net = dev_net(ol_dev);
980 
981 	return __dev_get_by_index(net, tun->parms.link);
982 }
983 
984 u32 mlxsw_sp_ipip_dev_ul_tb_id(const struct net_device *ol_dev)
985 {
986 	struct net_device *d = __mlxsw_sp_ipip_netdev_ul_dev_get(ol_dev);
987 
988 	if (d)
989 		return l3mdev_fib_table(d) ? : RT_TABLE_MAIN;
990 	else
991 		return l3mdev_fib_table(ol_dev) ? : RT_TABLE_MAIN;
992 }
993 
994 static struct mlxsw_sp_rif *
995 mlxsw_sp_rif_create(struct mlxsw_sp *mlxsw_sp,
996 		    const struct mlxsw_sp_rif_params *params,
997 		    struct netlink_ext_ack *extack);
998 
999 static struct mlxsw_sp_rif_ipip_lb *
1000 mlxsw_sp_ipip_ol_ipip_lb_create(struct mlxsw_sp *mlxsw_sp,
1001 				enum mlxsw_sp_ipip_type ipipt,
1002 				struct net_device *ol_dev,
1003 				struct netlink_ext_ack *extack)
1004 {
1005 	struct mlxsw_sp_rif_params_ipip_lb lb_params;
1006 	const struct mlxsw_sp_ipip_ops *ipip_ops;
1007 	struct mlxsw_sp_rif *rif;
1008 
1009 	ipip_ops = mlxsw_sp->router->ipip_ops_arr[ipipt];
1010 	lb_params = (struct mlxsw_sp_rif_params_ipip_lb) {
1011 		.common.dev = ol_dev,
1012 		.common.lag = false,
1013 		.lb_config = ipip_ops->ol_loopback_config(mlxsw_sp, ol_dev),
1014 	};
1015 
1016 	rif = mlxsw_sp_rif_create(mlxsw_sp, &lb_params.common, extack);
1017 	if (IS_ERR(rif))
1018 		return ERR_CAST(rif);
1019 	return container_of(rif, struct mlxsw_sp_rif_ipip_lb, common);
1020 }
1021 
1022 static struct mlxsw_sp_ipip_entry *
1023 mlxsw_sp_ipip_entry_alloc(struct mlxsw_sp *mlxsw_sp,
1024 			  enum mlxsw_sp_ipip_type ipipt,
1025 			  struct net_device *ol_dev)
1026 {
1027 	struct mlxsw_sp_ipip_entry *ipip_entry;
1028 	struct mlxsw_sp_ipip_entry *ret = NULL;
1029 
1030 	ipip_entry = kzalloc(sizeof(*ipip_entry), GFP_KERNEL);
1031 	if (!ipip_entry)
1032 		return ERR_PTR(-ENOMEM);
1033 
1034 	ipip_entry->ol_lb = mlxsw_sp_ipip_ol_ipip_lb_create(mlxsw_sp, ipipt,
1035 							    ol_dev, NULL);
1036 	if (IS_ERR(ipip_entry->ol_lb)) {
1037 		ret = ERR_CAST(ipip_entry->ol_lb);
1038 		goto err_ol_ipip_lb_create;
1039 	}
1040 
1041 	ipip_entry->ipipt = ipipt;
1042 	ipip_entry->ol_dev = ol_dev;
1043 	ipip_entry->parms = mlxsw_sp_ipip_netdev_parms(ol_dev);
1044 
1045 	return ipip_entry;
1046 
1047 err_ol_ipip_lb_create:
1048 	kfree(ipip_entry);
1049 	return ret;
1050 }
1051 
1052 static void
1053 mlxsw_sp_ipip_entry_dealloc(struct mlxsw_sp_ipip_entry *ipip_entry)
1054 {
1055 	mlxsw_sp_rif_destroy(&ipip_entry->ol_lb->common);
1056 	kfree(ipip_entry);
1057 }
1058 
1059 static bool
1060 mlxsw_sp_ipip_entry_saddr_matches(struct mlxsw_sp *mlxsw_sp,
1061 				  const enum mlxsw_sp_l3proto ul_proto,
1062 				  union mlxsw_sp_l3addr saddr,
1063 				  u32 ul_tb_id,
1064 				  struct mlxsw_sp_ipip_entry *ipip_entry)
1065 {
1066 	u32 tun_ul_tb_id = mlxsw_sp_ipip_dev_ul_tb_id(ipip_entry->ol_dev);
1067 	enum mlxsw_sp_ipip_type ipipt = ipip_entry->ipipt;
1068 	union mlxsw_sp_l3addr tun_saddr;
1069 
1070 	if (mlxsw_sp->router->ipip_ops_arr[ipipt]->ul_proto != ul_proto)
1071 		return false;
1072 
1073 	tun_saddr = mlxsw_sp_ipip_netdev_saddr(ul_proto, ipip_entry->ol_dev);
1074 	return tun_ul_tb_id == ul_tb_id &&
1075 	       mlxsw_sp_l3addr_eq(&tun_saddr, &saddr);
1076 }
1077 
1078 static int
1079 mlxsw_sp_fib_entry_decap_init(struct mlxsw_sp *mlxsw_sp,
1080 			      struct mlxsw_sp_fib_entry *fib_entry,
1081 			      struct mlxsw_sp_ipip_entry *ipip_entry)
1082 {
1083 	u32 tunnel_index;
1084 	int err;
1085 
1086 	err = mlxsw_sp_kvdl_alloc(mlxsw_sp, 1, &tunnel_index);
1087 	if (err)
1088 		return err;
1089 
1090 	ipip_entry->decap_fib_entry = fib_entry;
1091 	fib_entry->decap.ipip_entry = ipip_entry;
1092 	fib_entry->decap.tunnel_index = tunnel_index;
1093 	return 0;
1094 }
1095 
1096 static void mlxsw_sp_fib_entry_decap_fini(struct mlxsw_sp *mlxsw_sp,
1097 					  struct mlxsw_sp_fib_entry *fib_entry)
1098 {
1099 	/* Unlink this node from the IPIP entry that it's the decap entry of. */
1100 	fib_entry->decap.ipip_entry->decap_fib_entry = NULL;
1101 	fib_entry->decap.ipip_entry = NULL;
1102 	mlxsw_sp_kvdl_free(mlxsw_sp, fib_entry->decap.tunnel_index);
1103 }
1104 
1105 static struct mlxsw_sp_fib_node *
1106 mlxsw_sp_fib_node_lookup(struct mlxsw_sp_fib *fib, const void *addr,
1107 			 size_t addr_len, unsigned char prefix_len);
1108 static int mlxsw_sp_fib_entry_update(struct mlxsw_sp *mlxsw_sp,
1109 				     struct mlxsw_sp_fib_entry *fib_entry);
1110 
1111 static void
1112 mlxsw_sp_ipip_entry_demote_decap(struct mlxsw_sp *mlxsw_sp,
1113 				 struct mlxsw_sp_ipip_entry *ipip_entry)
1114 {
1115 	struct mlxsw_sp_fib_entry *fib_entry = ipip_entry->decap_fib_entry;
1116 
1117 	mlxsw_sp_fib_entry_decap_fini(mlxsw_sp, fib_entry);
1118 	fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_TRAP;
1119 
1120 	mlxsw_sp_fib_entry_update(mlxsw_sp, fib_entry);
1121 }
1122 
1123 static void
1124 mlxsw_sp_ipip_entry_promote_decap(struct mlxsw_sp *mlxsw_sp,
1125 				  struct mlxsw_sp_ipip_entry *ipip_entry,
1126 				  struct mlxsw_sp_fib_entry *decap_fib_entry)
1127 {
1128 	if (mlxsw_sp_fib_entry_decap_init(mlxsw_sp, decap_fib_entry,
1129 					  ipip_entry))
1130 		return;
1131 	decap_fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_IPIP_DECAP;
1132 
1133 	if (mlxsw_sp_fib_entry_update(mlxsw_sp, decap_fib_entry))
1134 		mlxsw_sp_ipip_entry_demote_decap(mlxsw_sp, ipip_entry);
1135 }
1136 
1137 /* Given an IPIP entry, find the corresponding decap route. */
1138 static struct mlxsw_sp_fib_entry *
1139 mlxsw_sp_ipip_entry_find_decap(struct mlxsw_sp *mlxsw_sp,
1140 			       struct mlxsw_sp_ipip_entry *ipip_entry)
1141 {
1142 	static struct mlxsw_sp_fib_node *fib_node;
1143 	const struct mlxsw_sp_ipip_ops *ipip_ops;
1144 	struct mlxsw_sp_fib_entry *fib_entry;
1145 	unsigned char saddr_prefix_len;
1146 	union mlxsw_sp_l3addr saddr;
1147 	struct mlxsw_sp_fib *ul_fib;
1148 	struct mlxsw_sp_vr *ul_vr;
1149 	const void *saddrp;
1150 	size_t saddr_len;
1151 	u32 ul_tb_id;
1152 	u32 saddr4;
1153 
1154 	ipip_ops = mlxsw_sp->router->ipip_ops_arr[ipip_entry->ipipt];
1155 
1156 	ul_tb_id = mlxsw_sp_ipip_dev_ul_tb_id(ipip_entry->ol_dev);
1157 	ul_vr = mlxsw_sp_vr_find(mlxsw_sp, ul_tb_id);
1158 	if (!ul_vr)
1159 		return NULL;
1160 
1161 	ul_fib = mlxsw_sp_vr_fib(ul_vr, ipip_ops->ul_proto);
1162 	saddr = mlxsw_sp_ipip_netdev_saddr(ipip_ops->ul_proto,
1163 					   ipip_entry->ol_dev);
1164 
1165 	switch (ipip_ops->ul_proto) {
1166 	case MLXSW_SP_L3_PROTO_IPV4:
1167 		saddr4 = be32_to_cpu(saddr.addr4);
1168 		saddrp = &saddr4;
1169 		saddr_len = 4;
1170 		saddr_prefix_len = 32;
1171 		break;
1172 	case MLXSW_SP_L3_PROTO_IPV6:
1173 		WARN_ON(1);
1174 		return NULL;
1175 	}
1176 
1177 	fib_node = mlxsw_sp_fib_node_lookup(ul_fib, saddrp, saddr_len,
1178 					    saddr_prefix_len);
1179 	if (!fib_node || list_empty(&fib_node->entry_list))
1180 		return NULL;
1181 
1182 	fib_entry = list_first_entry(&fib_node->entry_list,
1183 				     struct mlxsw_sp_fib_entry, list);
1184 	if (fib_entry->type != MLXSW_SP_FIB_ENTRY_TYPE_TRAP)
1185 		return NULL;
1186 
1187 	return fib_entry;
1188 }
1189 
1190 static struct mlxsw_sp_ipip_entry *
1191 mlxsw_sp_ipip_entry_create(struct mlxsw_sp *mlxsw_sp,
1192 			   enum mlxsw_sp_ipip_type ipipt,
1193 			   struct net_device *ol_dev)
1194 {
1195 	struct mlxsw_sp_ipip_entry *ipip_entry;
1196 
1197 	ipip_entry = mlxsw_sp_ipip_entry_alloc(mlxsw_sp, ipipt, ol_dev);
1198 	if (IS_ERR(ipip_entry))
1199 		return ipip_entry;
1200 
1201 	list_add_tail(&ipip_entry->ipip_list_node,
1202 		      &mlxsw_sp->router->ipip_list);
1203 
1204 	return ipip_entry;
1205 }
1206 
1207 static void
1208 mlxsw_sp_ipip_entry_destroy(struct mlxsw_sp *mlxsw_sp,
1209 			    struct mlxsw_sp_ipip_entry *ipip_entry)
1210 {
1211 	list_del(&ipip_entry->ipip_list_node);
1212 	mlxsw_sp_ipip_entry_dealloc(ipip_entry);
1213 }
1214 
1215 static bool
1216 mlxsw_sp_ipip_entry_matches_decap(struct mlxsw_sp *mlxsw_sp,
1217 				  const struct net_device *ul_dev,
1218 				  enum mlxsw_sp_l3proto ul_proto,
1219 				  union mlxsw_sp_l3addr ul_dip,
1220 				  struct mlxsw_sp_ipip_entry *ipip_entry)
1221 {
1222 	u32 ul_tb_id = l3mdev_fib_table(ul_dev) ? : RT_TABLE_MAIN;
1223 	enum mlxsw_sp_ipip_type ipipt = ipip_entry->ipipt;
1224 	struct net_device *ipip_ul_dev;
1225 
1226 	if (mlxsw_sp->router->ipip_ops_arr[ipipt]->ul_proto != ul_proto)
1227 		return false;
1228 
1229 	ipip_ul_dev = __mlxsw_sp_ipip_netdev_ul_dev_get(ipip_entry->ol_dev);
1230 	return mlxsw_sp_ipip_entry_saddr_matches(mlxsw_sp, ul_proto, ul_dip,
1231 						 ul_tb_id, ipip_entry) &&
1232 	       (!ipip_ul_dev || ipip_ul_dev == ul_dev);
1233 }
1234 
1235 /* Given decap parameters, find the corresponding IPIP entry. */
1236 static struct mlxsw_sp_ipip_entry *
1237 mlxsw_sp_ipip_entry_find_by_decap(struct mlxsw_sp *mlxsw_sp,
1238 				  const struct net_device *ul_dev,
1239 				  enum mlxsw_sp_l3proto ul_proto,
1240 				  union mlxsw_sp_l3addr ul_dip)
1241 {
1242 	struct mlxsw_sp_ipip_entry *ipip_entry;
1243 
1244 	list_for_each_entry(ipip_entry, &mlxsw_sp->router->ipip_list,
1245 			    ipip_list_node)
1246 		if (mlxsw_sp_ipip_entry_matches_decap(mlxsw_sp, ul_dev,
1247 						      ul_proto, ul_dip,
1248 						      ipip_entry))
1249 			return ipip_entry;
1250 
1251 	return NULL;
1252 }
1253 
1254 static bool mlxsw_sp_netdev_ipip_type(const struct mlxsw_sp *mlxsw_sp,
1255 				      const struct net_device *dev,
1256 				      enum mlxsw_sp_ipip_type *p_type)
1257 {
1258 	struct mlxsw_sp_router *router = mlxsw_sp->router;
1259 	const struct mlxsw_sp_ipip_ops *ipip_ops;
1260 	enum mlxsw_sp_ipip_type ipipt;
1261 
1262 	for (ipipt = 0; ipipt < MLXSW_SP_IPIP_TYPE_MAX; ++ipipt) {
1263 		ipip_ops = router->ipip_ops_arr[ipipt];
1264 		if (dev->type == ipip_ops->dev_type) {
1265 			if (p_type)
1266 				*p_type = ipipt;
1267 			return true;
1268 		}
1269 	}
1270 	return false;
1271 }
1272 
1273 bool mlxsw_sp_netdev_is_ipip_ol(const struct mlxsw_sp *mlxsw_sp,
1274 				const struct net_device *dev)
1275 {
1276 	return mlxsw_sp_netdev_ipip_type(mlxsw_sp, dev, NULL);
1277 }
1278 
1279 static struct mlxsw_sp_ipip_entry *
1280 mlxsw_sp_ipip_entry_find_by_ol_dev(struct mlxsw_sp *mlxsw_sp,
1281 				   const struct net_device *ol_dev)
1282 {
1283 	struct mlxsw_sp_ipip_entry *ipip_entry;
1284 
1285 	list_for_each_entry(ipip_entry, &mlxsw_sp->router->ipip_list,
1286 			    ipip_list_node)
1287 		if (ipip_entry->ol_dev == ol_dev)
1288 			return ipip_entry;
1289 
1290 	return NULL;
1291 }
1292 
1293 static struct mlxsw_sp_ipip_entry *
1294 mlxsw_sp_ipip_entry_find_by_ul_dev(const struct mlxsw_sp *mlxsw_sp,
1295 				   const struct net_device *ul_dev,
1296 				   struct mlxsw_sp_ipip_entry *start)
1297 {
1298 	struct mlxsw_sp_ipip_entry *ipip_entry;
1299 
1300 	ipip_entry = list_prepare_entry(start, &mlxsw_sp->router->ipip_list,
1301 					ipip_list_node);
1302 	list_for_each_entry_continue(ipip_entry, &mlxsw_sp->router->ipip_list,
1303 				     ipip_list_node) {
1304 		struct net_device *ipip_ul_dev =
1305 			__mlxsw_sp_ipip_netdev_ul_dev_get(ipip_entry->ol_dev);
1306 
1307 		if (ipip_ul_dev == ul_dev)
1308 			return ipip_entry;
1309 	}
1310 
1311 	return NULL;
1312 }
1313 
1314 bool mlxsw_sp_netdev_is_ipip_ul(const struct mlxsw_sp *mlxsw_sp,
1315 				const struct net_device *dev)
1316 {
1317 	return mlxsw_sp_ipip_entry_find_by_ul_dev(mlxsw_sp, dev, NULL);
1318 }
1319 
1320 static bool mlxsw_sp_netdevice_ipip_can_offload(struct mlxsw_sp *mlxsw_sp,
1321 						const struct net_device *ol_dev,
1322 						enum mlxsw_sp_ipip_type ipipt)
1323 {
1324 	const struct mlxsw_sp_ipip_ops *ops
1325 		= mlxsw_sp->router->ipip_ops_arr[ipipt];
1326 
1327 	/* For deciding whether decap should be offloaded, we don't care about
1328 	 * overlay protocol, so ask whether either one is supported.
1329 	 */
1330 	return ops->can_offload(mlxsw_sp, ol_dev, MLXSW_SP_L3_PROTO_IPV4) ||
1331 	       ops->can_offload(mlxsw_sp, ol_dev, MLXSW_SP_L3_PROTO_IPV6);
1332 }
1333 
1334 static int mlxsw_sp_netdevice_ipip_ol_reg_event(struct mlxsw_sp *mlxsw_sp,
1335 						struct net_device *ol_dev)
1336 {
1337 	struct mlxsw_sp_ipip_entry *ipip_entry;
1338 	enum mlxsw_sp_l3proto ul_proto;
1339 	enum mlxsw_sp_ipip_type ipipt;
1340 	union mlxsw_sp_l3addr saddr;
1341 	u32 ul_tb_id;
1342 
1343 	mlxsw_sp_netdev_ipip_type(mlxsw_sp, ol_dev, &ipipt);
1344 	if (mlxsw_sp_netdevice_ipip_can_offload(mlxsw_sp, ol_dev, ipipt)) {
1345 		ul_tb_id = mlxsw_sp_ipip_dev_ul_tb_id(ol_dev);
1346 		ul_proto = mlxsw_sp->router->ipip_ops_arr[ipipt]->ul_proto;
1347 		saddr = mlxsw_sp_ipip_netdev_saddr(ul_proto, ol_dev);
1348 		if (!mlxsw_sp_ipip_demote_tunnel_by_saddr(mlxsw_sp, ul_proto,
1349 							  saddr, ul_tb_id,
1350 							  NULL)) {
1351 			ipip_entry = mlxsw_sp_ipip_entry_create(mlxsw_sp, ipipt,
1352 								ol_dev);
1353 			if (IS_ERR(ipip_entry))
1354 				return PTR_ERR(ipip_entry);
1355 		}
1356 	}
1357 
1358 	return 0;
1359 }
1360 
1361 static void mlxsw_sp_netdevice_ipip_ol_unreg_event(struct mlxsw_sp *mlxsw_sp,
1362 						   struct net_device *ol_dev)
1363 {
1364 	struct mlxsw_sp_ipip_entry *ipip_entry;
1365 
1366 	ipip_entry = mlxsw_sp_ipip_entry_find_by_ol_dev(mlxsw_sp, ol_dev);
1367 	if (ipip_entry)
1368 		mlxsw_sp_ipip_entry_destroy(mlxsw_sp, ipip_entry);
1369 }
1370 
1371 static void
1372 mlxsw_sp_ipip_entry_ol_up_event(struct mlxsw_sp *mlxsw_sp,
1373 				struct mlxsw_sp_ipip_entry *ipip_entry)
1374 {
1375 	struct mlxsw_sp_fib_entry *decap_fib_entry;
1376 
1377 	decap_fib_entry = mlxsw_sp_ipip_entry_find_decap(mlxsw_sp, ipip_entry);
1378 	if (decap_fib_entry)
1379 		mlxsw_sp_ipip_entry_promote_decap(mlxsw_sp, ipip_entry,
1380 						  decap_fib_entry);
1381 }
1382 
1383 static void mlxsw_sp_netdevice_ipip_ol_up_event(struct mlxsw_sp *mlxsw_sp,
1384 						struct net_device *ol_dev)
1385 {
1386 	struct mlxsw_sp_ipip_entry *ipip_entry;
1387 
1388 	ipip_entry = mlxsw_sp_ipip_entry_find_by_ol_dev(mlxsw_sp, ol_dev);
1389 	if (ipip_entry)
1390 		mlxsw_sp_ipip_entry_ol_up_event(mlxsw_sp, ipip_entry);
1391 }
1392 
1393 static void
1394 mlxsw_sp_ipip_entry_ol_down_event(struct mlxsw_sp *mlxsw_sp,
1395 				  struct mlxsw_sp_ipip_entry *ipip_entry)
1396 {
1397 	if (ipip_entry->decap_fib_entry)
1398 		mlxsw_sp_ipip_entry_demote_decap(mlxsw_sp, ipip_entry);
1399 }
1400 
1401 static void mlxsw_sp_netdevice_ipip_ol_down_event(struct mlxsw_sp *mlxsw_sp,
1402 						  struct net_device *ol_dev)
1403 {
1404 	struct mlxsw_sp_ipip_entry *ipip_entry;
1405 
1406 	ipip_entry = mlxsw_sp_ipip_entry_find_by_ol_dev(mlxsw_sp, ol_dev);
1407 	if (ipip_entry)
1408 		mlxsw_sp_ipip_entry_ol_down_event(mlxsw_sp, ipip_entry);
1409 }
1410 
1411 static void mlxsw_sp_nexthop_rif_migrate(struct mlxsw_sp *mlxsw_sp,
1412 					 struct mlxsw_sp_rif *old_rif,
1413 					 struct mlxsw_sp_rif *new_rif);
1414 static int
1415 mlxsw_sp_ipip_entry_ol_lb_update(struct mlxsw_sp *mlxsw_sp,
1416 				 struct mlxsw_sp_ipip_entry *ipip_entry,
1417 				 bool keep_encap,
1418 				 struct netlink_ext_ack *extack)
1419 {
1420 	struct mlxsw_sp_rif_ipip_lb *old_lb_rif = ipip_entry->ol_lb;
1421 	struct mlxsw_sp_rif_ipip_lb *new_lb_rif;
1422 
1423 	new_lb_rif = mlxsw_sp_ipip_ol_ipip_lb_create(mlxsw_sp,
1424 						     ipip_entry->ipipt,
1425 						     ipip_entry->ol_dev,
1426 						     extack);
1427 	if (IS_ERR(new_lb_rif))
1428 		return PTR_ERR(new_lb_rif);
1429 	ipip_entry->ol_lb = new_lb_rif;
1430 
1431 	if (keep_encap)
1432 		mlxsw_sp_nexthop_rif_migrate(mlxsw_sp, &old_lb_rif->common,
1433 					     &new_lb_rif->common);
1434 
1435 	mlxsw_sp_rif_destroy(&old_lb_rif->common);
1436 
1437 	return 0;
1438 }
1439 
1440 static void mlxsw_sp_nexthop_rif_update(struct mlxsw_sp *mlxsw_sp,
1441 					struct mlxsw_sp_rif *rif);
1442 
1443 /**
1444  * Update the offload related to an IPIP entry. This always updates decap, and
1445  * in addition to that it also:
1446  * @recreate_loopback: recreates the associated loopback RIF
1447  * @keep_encap: updates next hops that use the tunnel netdevice. This is only
1448  *              relevant when recreate_loopback is true.
1449  * @update_nexthops: updates next hops, keeping the current loopback RIF. This
1450  *                   is only relevant when recreate_loopback is false.
1451  */
1452 int __mlxsw_sp_ipip_entry_update_tunnel(struct mlxsw_sp *mlxsw_sp,
1453 					struct mlxsw_sp_ipip_entry *ipip_entry,
1454 					bool recreate_loopback,
1455 					bool keep_encap,
1456 					bool update_nexthops,
1457 					struct netlink_ext_ack *extack)
1458 {
1459 	int err;
1460 
1461 	/* RIFs can't be edited, so to update loopback, we need to destroy and
1462 	 * recreate it. That creates a window of opportunity where RALUE and
1463 	 * RATR registers end up referencing a RIF that's already gone. RATRs
1464 	 * are handled in mlxsw_sp_ipip_entry_ol_lb_update(), and to take care
1465 	 * of RALUE, demote the decap route back.
1466 	 */
1467 	if (ipip_entry->decap_fib_entry)
1468 		mlxsw_sp_ipip_entry_demote_decap(mlxsw_sp, ipip_entry);
1469 
1470 	if (recreate_loopback) {
1471 		err = mlxsw_sp_ipip_entry_ol_lb_update(mlxsw_sp, ipip_entry,
1472 						       keep_encap, extack);
1473 		if (err)
1474 			return err;
1475 	} else if (update_nexthops) {
1476 		mlxsw_sp_nexthop_rif_update(mlxsw_sp,
1477 					    &ipip_entry->ol_lb->common);
1478 	}
1479 
1480 	if (ipip_entry->ol_dev->flags & IFF_UP)
1481 		mlxsw_sp_ipip_entry_ol_up_event(mlxsw_sp, ipip_entry);
1482 
1483 	return 0;
1484 }
1485 
1486 static int mlxsw_sp_netdevice_ipip_ol_vrf_event(struct mlxsw_sp *mlxsw_sp,
1487 						struct net_device *ol_dev,
1488 						struct netlink_ext_ack *extack)
1489 {
1490 	struct mlxsw_sp_ipip_entry *ipip_entry =
1491 		mlxsw_sp_ipip_entry_find_by_ol_dev(mlxsw_sp, ol_dev);
1492 	enum mlxsw_sp_l3proto ul_proto;
1493 	union mlxsw_sp_l3addr saddr;
1494 	u32 ul_tb_id;
1495 
1496 	if (!ipip_entry)
1497 		return 0;
1498 
1499 	/* For flat configuration cases, moving overlay to a different VRF might
1500 	 * cause local address conflict, and the conflicting tunnels need to be
1501 	 * demoted.
1502 	 */
1503 	ul_tb_id = mlxsw_sp_ipip_dev_ul_tb_id(ol_dev);
1504 	ul_proto = mlxsw_sp->router->ipip_ops_arr[ipip_entry->ipipt]->ul_proto;
1505 	saddr = mlxsw_sp_ipip_netdev_saddr(ul_proto, ol_dev);
1506 	if (mlxsw_sp_ipip_demote_tunnel_by_saddr(mlxsw_sp, ul_proto,
1507 						 saddr, ul_tb_id,
1508 						 ipip_entry)) {
1509 		mlxsw_sp_ipip_entry_demote_tunnel(mlxsw_sp, ipip_entry);
1510 		return 0;
1511 	}
1512 
1513 	return __mlxsw_sp_ipip_entry_update_tunnel(mlxsw_sp, ipip_entry,
1514 						   true, false, false, extack);
1515 }
1516 
1517 static int
1518 mlxsw_sp_netdevice_ipip_ul_vrf_event(struct mlxsw_sp *mlxsw_sp,
1519 				     struct mlxsw_sp_ipip_entry *ipip_entry,
1520 				     struct net_device *ul_dev,
1521 				     struct netlink_ext_ack *extack)
1522 {
1523 	return __mlxsw_sp_ipip_entry_update_tunnel(mlxsw_sp, ipip_entry,
1524 						   true, true, false, extack);
1525 }
1526 
1527 static int
1528 mlxsw_sp_netdevice_ipip_ul_up_event(struct mlxsw_sp *mlxsw_sp,
1529 				    struct mlxsw_sp_ipip_entry *ipip_entry,
1530 				    struct net_device *ul_dev)
1531 {
1532 	return __mlxsw_sp_ipip_entry_update_tunnel(mlxsw_sp, ipip_entry,
1533 						   false, false, true, NULL);
1534 }
1535 
1536 static int
1537 mlxsw_sp_netdevice_ipip_ul_down_event(struct mlxsw_sp *mlxsw_sp,
1538 				      struct mlxsw_sp_ipip_entry *ipip_entry,
1539 				      struct net_device *ul_dev)
1540 {
1541 	/* A down underlay device causes encapsulated packets to not be
1542 	 * forwarded, but decap still works. So refresh next hops without
1543 	 * touching anything else.
1544 	 */
1545 	return __mlxsw_sp_ipip_entry_update_tunnel(mlxsw_sp, ipip_entry,
1546 						   false, false, true, NULL);
1547 }
1548 
1549 static int
1550 mlxsw_sp_netdevice_ipip_ol_change_event(struct mlxsw_sp *mlxsw_sp,
1551 					struct net_device *ol_dev,
1552 					struct netlink_ext_ack *extack)
1553 {
1554 	const struct mlxsw_sp_ipip_ops *ipip_ops;
1555 	struct mlxsw_sp_ipip_entry *ipip_entry;
1556 	int err;
1557 
1558 	ipip_entry = mlxsw_sp_ipip_entry_find_by_ol_dev(mlxsw_sp, ol_dev);
1559 	if (!ipip_entry)
1560 		/* A change might make a tunnel eligible for offloading, but
1561 		 * that is currently not implemented. What falls to slow path
1562 		 * stays there.
1563 		 */
1564 		return 0;
1565 
1566 	/* A change might make a tunnel not eligible for offloading. */
1567 	if (!mlxsw_sp_netdevice_ipip_can_offload(mlxsw_sp, ol_dev,
1568 						 ipip_entry->ipipt)) {
1569 		mlxsw_sp_ipip_entry_demote_tunnel(mlxsw_sp, ipip_entry);
1570 		return 0;
1571 	}
1572 
1573 	ipip_ops = mlxsw_sp->router->ipip_ops_arr[ipip_entry->ipipt];
1574 	err = ipip_ops->ol_netdev_change(mlxsw_sp, ipip_entry, extack);
1575 	return err;
1576 }
1577 
1578 void mlxsw_sp_ipip_entry_demote_tunnel(struct mlxsw_sp *mlxsw_sp,
1579 				       struct mlxsw_sp_ipip_entry *ipip_entry)
1580 {
1581 	struct net_device *ol_dev = ipip_entry->ol_dev;
1582 
1583 	if (ol_dev->flags & IFF_UP)
1584 		mlxsw_sp_ipip_entry_ol_down_event(mlxsw_sp, ipip_entry);
1585 	mlxsw_sp_ipip_entry_destroy(mlxsw_sp, ipip_entry);
1586 }
1587 
1588 /* The configuration where several tunnels have the same local address in the
1589  * same underlay table needs special treatment in the HW. That is currently not
1590  * implemented in the driver. This function finds and demotes the first tunnel
1591  * with a given source address, except the one passed in in the argument
1592  * `except'.
1593  */
1594 bool
1595 mlxsw_sp_ipip_demote_tunnel_by_saddr(struct mlxsw_sp *mlxsw_sp,
1596 				     enum mlxsw_sp_l3proto ul_proto,
1597 				     union mlxsw_sp_l3addr saddr,
1598 				     u32 ul_tb_id,
1599 				     const struct mlxsw_sp_ipip_entry *except)
1600 {
1601 	struct mlxsw_sp_ipip_entry *ipip_entry, *tmp;
1602 
1603 	list_for_each_entry_safe(ipip_entry, tmp, &mlxsw_sp->router->ipip_list,
1604 				 ipip_list_node) {
1605 		if (ipip_entry != except &&
1606 		    mlxsw_sp_ipip_entry_saddr_matches(mlxsw_sp, ul_proto, saddr,
1607 						      ul_tb_id, ipip_entry)) {
1608 			mlxsw_sp_ipip_entry_demote_tunnel(mlxsw_sp, ipip_entry);
1609 			return true;
1610 		}
1611 	}
1612 
1613 	return false;
1614 }
1615 
1616 static void mlxsw_sp_ipip_demote_tunnel_by_ul_netdev(struct mlxsw_sp *mlxsw_sp,
1617 						     struct net_device *ul_dev)
1618 {
1619 	struct mlxsw_sp_ipip_entry *ipip_entry, *tmp;
1620 
1621 	list_for_each_entry_safe(ipip_entry, tmp, &mlxsw_sp->router->ipip_list,
1622 				 ipip_list_node) {
1623 		struct net_device *ipip_ul_dev =
1624 			__mlxsw_sp_ipip_netdev_ul_dev_get(ipip_entry->ol_dev);
1625 
1626 		if (ipip_ul_dev == ul_dev)
1627 			mlxsw_sp_ipip_entry_demote_tunnel(mlxsw_sp, ipip_entry);
1628 	}
1629 }
1630 
1631 int mlxsw_sp_netdevice_ipip_ol_event(struct mlxsw_sp *mlxsw_sp,
1632 				     struct net_device *ol_dev,
1633 				     unsigned long event,
1634 				     struct netdev_notifier_info *info)
1635 {
1636 	struct netdev_notifier_changeupper_info *chup;
1637 	struct netlink_ext_ack *extack;
1638 
1639 	switch (event) {
1640 	case NETDEV_REGISTER:
1641 		return mlxsw_sp_netdevice_ipip_ol_reg_event(mlxsw_sp, ol_dev);
1642 	case NETDEV_UNREGISTER:
1643 		mlxsw_sp_netdevice_ipip_ol_unreg_event(mlxsw_sp, ol_dev);
1644 		return 0;
1645 	case NETDEV_UP:
1646 		mlxsw_sp_netdevice_ipip_ol_up_event(mlxsw_sp, ol_dev);
1647 		return 0;
1648 	case NETDEV_DOWN:
1649 		mlxsw_sp_netdevice_ipip_ol_down_event(mlxsw_sp, ol_dev);
1650 		return 0;
1651 	case NETDEV_CHANGEUPPER:
1652 		chup = container_of(info, typeof(*chup), info);
1653 		extack = info->extack;
1654 		if (netif_is_l3_master(chup->upper_dev))
1655 			return mlxsw_sp_netdevice_ipip_ol_vrf_event(mlxsw_sp,
1656 								    ol_dev,
1657 								    extack);
1658 		return 0;
1659 	case NETDEV_CHANGE:
1660 		extack = info->extack;
1661 		return mlxsw_sp_netdevice_ipip_ol_change_event(mlxsw_sp,
1662 							       ol_dev, extack);
1663 	}
1664 	return 0;
1665 }
1666 
1667 static int
1668 __mlxsw_sp_netdevice_ipip_ul_event(struct mlxsw_sp *mlxsw_sp,
1669 				   struct mlxsw_sp_ipip_entry *ipip_entry,
1670 				   struct net_device *ul_dev,
1671 				   unsigned long event,
1672 				   struct netdev_notifier_info *info)
1673 {
1674 	struct netdev_notifier_changeupper_info *chup;
1675 	struct netlink_ext_ack *extack;
1676 
1677 	switch (event) {
1678 	case NETDEV_CHANGEUPPER:
1679 		chup = container_of(info, typeof(*chup), info);
1680 		extack = info->extack;
1681 		if (netif_is_l3_master(chup->upper_dev))
1682 			return mlxsw_sp_netdevice_ipip_ul_vrf_event(mlxsw_sp,
1683 								    ipip_entry,
1684 								    ul_dev,
1685 								    extack);
1686 		break;
1687 
1688 	case NETDEV_UP:
1689 		return mlxsw_sp_netdevice_ipip_ul_up_event(mlxsw_sp, ipip_entry,
1690 							   ul_dev);
1691 	case NETDEV_DOWN:
1692 		return mlxsw_sp_netdevice_ipip_ul_down_event(mlxsw_sp,
1693 							     ipip_entry,
1694 							     ul_dev);
1695 	}
1696 	return 0;
1697 }
1698 
1699 int
1700 mlxsw_sp_netdevice_ipip_ul_event(struct mlxsw_sp *mlxsw_sp,
1701 				 struct net_device *ul_dev,
1702 				 unsigned long event,
1703 				 struct netdev_notifier_info *info)
1704 {
1705 	struct mlxsw_sp_ipip_entry *ipip_entry = NULL;
1706 	int err;
1707 
1708 	while ((ipip_entry = mlxsw_sp_ipip_entry_find_by_ul_dev(mlxsw_sp,
1709 								ul_dev,
1710 								ipip_entry))) {
1711 		err = __mlxsw_sp_netdevice_ipip_ul_event(mlxsw_sp, ipip_entry,
1712 							 ul_dev, event, info);
1713 		if (err) {
1714 			mlxsw_sp_ipip_demote_tunnel_by_ul_netdev(mlxsw_sp,
1715 								 ul_dev);
1716 			return err;
1717 		}
1718 	}
1719 
1720 	return 0;
1721 }
1722 
1723 struct mlxsw_sp_neigh_key {
1724 	struct neighbour *n;
1725 };
1726 
1727 struct mlxsw_sp_neigh_entry {
1728 	struct list_head rif_list_node;
1729 	struct rhash_head ht_node;
1730 	struct mlxsw_sp_neigh_key key;
1731 	u16 rif;
1732 	bool connected;
1733 	unsigned char ha[ETH_ALEN];
1734 	struct list_head nexthop_list; /* list of nexthops using
1735 					* this neigh entry
1736 					*/
1737 	struct list_head nexthop_neighs_list_node;
1738 	unsigned int counter_index;
1739 	bool counter_valid;
1740 };
1741 
1742 static const struct rhashtable_params mlxsw_sp_neigh_ht_params = {
1743 	.key_offset = offsetof(struct mlxsw_sp_neigh_entry, key),
1744 	.head_offset = offsetof(struct mlxsw_sp_neigh_entry, ht_node),
1745 	.key_len = sizeof(struct mlxsw_sp_neigh_key),
1746 };
1747 
1748 struct mlxsw_sp_neigh_entry *
1749 mlxsw_sp_rif_neigh_next(struct mlxsw_sp_rif *rif,
1750 			struct mlxsw_sp_neigh_entry *neigh_entry)
1751 {
1752 	if (!neigh_entry) {
1753 		if (list_empty(&rif->neigh_list))
1754 			return NULL;
1755 		else
1756 			return list_first_entry(&rif->neigh_list,
1757 						typeof(*neigh_entry),
1758 						rif_list_node);
1759 	}
1760 	if (list_is_last(&neigh_entry->rif_list_node, &rif->neigh_list))
1761 		return NULL;
1762 	return list_next_entry(neigh_entry, rif_list_node);
1763 }
1764 
1765 int mlxsw_sp_neigh_entry_type(struct mlxsw_sp_neigh_entry *neigh_entry)
1766 {
1767 	return neigh_entry->key.n->tbl->family;
1768 }
1769 
1770 unsigned char *
1771 mlxsw_sp_neigh_entry_ha(struct mlxsw_sp_neigh_entry *neigh_entry)
1772 {
1773 	return neigh_entry->ha;
1774 }
1775 
1776 u32 mlxsw_sp_neigh4_entry_dip(struct mlxsw_sp_neigh_entry *neigh_entry)
1777 {
1778 	struct neighbour *n;
1779 
1780 	n = neigh_entry->key.n;
1781 	return ntohl(*((__be32 *) n->primary_key));
1782 }
1783 
1784 struct in6_addr *
1785 mlxsw_sp_neigh6_entry_dip(struct mlxsw_sp_neigh_entry *neigh_entry)
1786 {
1787 	struct neighbour *n;
1788 
1789 	n = neigh_entry->key.n;
1790 	return (struct in6_addr *) &n->primary_key;
1791 }
1792 
1793 int mlxsw_sp_neigh_counter_get(struct mlxsw_sp *mlxsw_sp,
1794 			       struct mlxsw_sp_neigh_entry *neigh_entry,
1795 			       u64 *p_counter)
1796 {
1797 	if (!neigh_entry->counter_valid)
1798 		return -EINVAL;
1799 
1800 	return mlxsw_sp_flow_counter_get(mlxsw_sp, neigh_entry->counter_index,
1801 					 p_counter, NULL);
1802 }
1803 
1804 static struct mlxsw_sp_neigh_entry *
1805 mlxsw_sp_neigh_entry_alloc(struct mlxsw_sp *mlxsw_sp, struct neighbour *n,
1806 			   u16 rif)
1807 {
1808 	struct mlxsw_sp_neigh_entry *neigh_entry;
1809 
1810 	neigh_entry = kzalloc(sizeof(*neigh_entry), GFP_KERNEL);
1811 	if (!neigh_entry)
1812 		return NULL;
1813 
1814 	neigh_entry->key.n = n;
1815 	neigh_entry->rif = rif;
1816 	INIT_LIST_HEAD(&neigh_entry->nexthop_list);
1817 
1818 	return neigh_entry;
1819 }
1820 
1821 static void mlxsw_sp_neigh_entry_free(struct mlxsw_sp_neigh_entry *neigh_entry)
1822 {
1823 	kfree(neigh_entry);
1824 }
1825 
1826 static int
1827 mlxsw_sp_neigh_entry_insert(struct mlxsw_sp *mlxsw_sp,
1828 			    struct mlxsw_sp_neigh_entry *neigh_entry)
1829 {
1830 	return rhashtable_insert_fast(&mlxsw_sp->router->neigh_ht,
1831 				      &neigh_entry->ht_node,
1832 				      mlxsw_sp_neigh_ht_params);
1833 }
1834 
1835 static void
1836 mlxsw_sp_neigh_entry_remove(struct mlxsw_sp *mlxsw_sp,
1837 			    struct mlxsw_sp_neigh_entry *neigh_entry)
1838 {
1839 	rhashtable_remove_fast(&mlxsw_sp->router->neigh_ht,
1840 			       &neigh_entry->ht_node,
1841 			       mlxsw_sp_neigh_ht_params);
1842 }
1843 
1844 static bool
1845 mlxsw_sp_neigh_counter_should_alloc(struct mlxsw_sp *mlxsw_sp,
1846 				    struct mlxsw_sp_neigh_entry *neigh_entry)
1847 {
1848 	struct devlink *devlink;
1849 	const char *table_name;
1850 
1851 	switch (mlxsw_sp_neigh_entry_type(neigh_entry)) {
1852 	case AF_INET:
1853 		table_name = MLXSW_SP_DPIPE_TABLE_NAME_HOST4;
1854 		break;
1855 	case AF_INET6:
1856 		table_name = MLXSW_SP_DPIPE_TABLE_NAME_HOST6;
1857 		break;
1858 	default:
1859 		WARN_ON(1);
1860 		return false;
1861 	}
1862 
1863 	devlink = priv_to_devlink(mlxsw_sp->core);
1864 	return devlink_dpipe_table_counter_enabled(devlink, table_name);
1865 }
1866 
1867 static void
1868 mlxsw_sp_neigh_counter_alloc(struct mlxsw_sp *mlxsw_sp,
1869 			     struct mlxsw_sp_neigh_entry *neigh_entry)
1870 {
1871 	if (!mlxsw_sp_neigh_counter_should_alloc(mlxsw_sp, neigh_entry))
1872 		return;
1873 
1874 	if (mlxsw_sp_flow_counter_alloc(mlxsw_sp, &neigh_entry->counter_index))
1875 		return;
1876 
1877 	neigh_entry->counter_valid = true;
1878 }
1879 
1880 static void
1881 mlxsw_sp_neigh_counter_free(struct mlxsw_sp *mlxsw_sp,
1882 			    struct mlxsw_sp_neigh_entry *neigh_entry)
1883 {
1884 	if (!neigh_entry->counter_valid)
1885 		return;
1886 	mlxsw_sp_flow_counter_free(mlxsw_sp,
1887 				   neigh_entry->counter_index);
1888 	neigh_entry->counter_valid = false;
1889 }
1890 
1891 static struct mlxsw_sp_neigh_entry *
1892 mlxsw_sp_neigh_entry_create(struct mlxsw_sp *mlxsw_sp, struct neighbour *n)
1893 {
1894 	struct mlxsw_sp_neigh_entry *neigh_entry;
1895 	struct mlxsw_sp_rif *rif;
1896 	int err;
1897 
1898 	rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, n->dev);
1899 	if (!rif)
1900 		return ERR_PTR(-EINVAL);
1901 
1902 	neigh_entry = mlxsw_sp_neigh_entry_alloc(mlxsw_sp, n, rif->rif_index);
1903 	if (!neigh_entry)
1904 		return ERR_PTR(-ENOMEM);
1905 
1906 	err = mlxsw_sp_neigh_entry_insert(mlxsw_sp, neigh_entry);
1907 	if (err)
1908 		goto err_neigh_entry_insert;
1909 
1910 	mlxsw_sp_neigh_counter_alloc(mlxsw_sp, neigh_entry);
1911 	list_add(&neigh_entry->rif_list_node, &rif->neigh_list);
1912 
1913 	return neigh_entry;
1914 
1915 err_neigh_entry_insert:
1916 	mlxsw_sp_neigh_entry_free(neigh_entry);
1917 	return ERR_PTR(err);
1918 }
1919 
1920 static void
1921 mlxsw_sp_neigh_entry_destroy(struct mlxsw_sp *mlxsw_sp,
1922 			     struct mlxsw_sp_neigh_entry *neigh_entry)
1923 {
1924 	list_del(&neigh_entry->rif_list_node);
1925 	mlxsw_sp_neigh_counter_free(mlxsw_sp, neigh_entry);
1926 	mlxsw_sp_neigh_entry_remove(mlxsw_sp, neigh_entry);
1927 	mlxsw_sp_neigh_entry_free(neigh_entry);
1928 }
1929 
1930 static struct mlxsw_sp_neigh_entry *
1931 mlxsw_sp_neigh_entry_lookup(struct mlxsw_sp *mlxsw_sp, struct neighbour *n)
1932 {
1933 	struct mlxsw_sp_neigh_key key;
1934 
1935 	key.n = n;
1936 	return rhashtable_lookup_fast(&mlxsw_sp->router->neigh_ht,
1937 				      &key, mlxsw_sp_neigh_ht_params);
1938 }
1939 
1940 static void
1941 mlxsw_sp_router_neighs_update_interval_init(struct mlxsw_sp *mlxsw_sp)
1942 {
1943 	unsigned long interval;
1944 
1945 #if IS_ENABLED(CONFIG_IPV6)
1946 	interval = min_t(unsigned long,
1947 			 NEIGH_VAR(&arp_tbl.parms, DELAY_PROBE_TIME),
1948 			 NEIGH_VAR(&nd_tbl.parms, DELAY_PROBE_TIME));
1949 #else
1950 	interval = NEIGH_VAR(&arp_tbl.parms, DELAY_PROBE_TIME);
1951 #endif
1952 	mlxsw_sp->router->neighs_update.interval = jiffies_to_msecs(interval);
1953 }
1954 
1955 static void mlxsw_sp_router_neigh_ent_ipv4_process(struct mlxsw_sp *mlxsw_sp,
1956 						   char *rauhtd_pl,
1957 						   int ent_index)
1958 {
1959 	struct net_device *dev;
1960 	struct neighbour *n;
1961 	__be32 dipn;
1962 	u32 dip;
1963 	u16 rif;
1964 
1965 	mlxsw_reg_rauhtd_ent_ipv4_unpack(rauhtd_pl, ent_index, &rif, &dip);
1966 
1967 	if (!mlxsw_sp->router->rifs[rif]) {
1968 		dev_err_ratelimited(mlxsw_sp->bus_info->dev, "Incorrect RIF in neighbour entry\n");
1969 		return;
1970 	}
1971 
1972 	dipn = htonl(dip);
1973 	dev = mlxsw_sp->router->rifs[rif]->dev;
1974 	n = neigh_lookup(&arp_tbl, &dipn, dev);
1975 	if (!n)
1976 		return;
1977 
1978 	netdev_dbg(dev, "Updating neighbour with IP=%pI4h\n", &dip);
1979 	neigh_event_send(n, NULL);
1980 	neigh_release(n);
1981 }
1982 
1983 #if IS_ENABLED(CONFIG_IPV6)
1984 static void mlxsw_sp_router_neigh_ent_ipv6_process(struct mlxsw_sp *mlxsw_sp,
1985 						   char *rauhtd_pl,
1986 						   int rec_index)
1987 {
1988 	struct net_device *dev;
1989 	struct neighbour *n;
1990 	struct in6_addr dip;
1991 	u16 rif;
1992 
1993 	mlxsw_reg_rauhtd_ent_ipv6_unpack(rauhtd_pl, rec_index, &rif,
1994 					 (char *) &dip);
1995 
1996 	if (!mlxsw_sp->router->rifs[rif]) {
1997 		dev_err_ratelimited(mlxsw_sp->bus_info->dev, "Incorrect RIF in neighbour entry\n");
1998 		return;
1999 	}
2000 
2001 	dev = mlxsw_sp->router->rifs[rif]->dev;
2002 	n = neigh_lookup(&nd_tbl, &dip, dev);
2003 	if (!n)
2004 		return;
2005 
2006 	netdev_dbg(dev, "Updating neighbour with IP=%pI6c\n", &dip);
2007 	neigh_event_send(n, NULL);
2008 	neigh_release(n);
2009 }
2010 #else
2011 static void mlxsw_sp_router_neigh_ent_ipv6_process(struct mlxsw_sp *mlxsw_sp,
2012 						   char *rauhtd_pl,
2013 						   int rec_index)
2014 {
2015 }
2016 #endif
2017 
2018 static void mlxsw_sp_router_neigh_rec_ipv4_process(struct mlxsw_sp *mlxsw_sp,
2019 						   char *rauhtd_pl,
2020 						   int rec_index)
2021 {
2022 	u8 num_entries;
2023 	int i;
2024 
2025 	num_entries = mlxsw_reg_rauhtd_ipv4_rec_num_entries_get(rauhtd_pl,
2026 								rec_index);
2027 	/* Hardware starts counting at 0, so add 1. */
2028 	num_entries++;
2029 
2030 	/* Each record consists of several neighbour entries. */
2031 	for (i = 0; i < num_entries; i++) {
2032 		int ent_index;
2033 
2034 		ent_index = rec_index * MLXSW_REG_RAUHTD_IPV4_ENT_PER_REC + i;
2035 		mlxsw_sp_router_neigh_ent_ipv4_process(mlxsw_sp, rauhtd_pl,
2036 						       ent_index);
2037 	}
2038 
2039 }
2040 
2041 static void mlxsw_sp_router_neigh_rec_ipv6_process(struct mlxsw_sp *mlxsw_sp,
2042 						   char *rauhtd_pl,
2043 						   int rec_index)
2044 {
2045 	/* One record contains one entry. */
2046 	mlxsw_sp_router_neigh_ent_ipv6_process(mlxsw_sp, rauhtd_pl,
2047 					       rec_index);
2048 }
2049 
2050 static void mlxsw_sp_router_neigh_rec_process(struct mlxsw_sp *mlxsw_sp,
2051 					      char *rauhtd_pl, int rec_index)
2052 {
2053 	switch (mlxsw_reg_rauhtd_rec_type_get(rauhtd_pl, rec_index)) {
2054 	case MLXSW_REG_RAUHTD_TYPE_IPV4:
2055 		mlxsw_sp_router_neigh_rec_ipv4_process(mlxsw_sp, rauhtd_pl,
2056 						       rec_index);
2057 		break;
2058 	case MLXSW_REG_RAUHTD_TYPE_IPV6:
2059 		mlxsw_sp_router_neigh_rec_ipv6_process(mlxsw_sp, rauhtd_pl,
2060 						       rec_index);
2061 		break;
2062 	}
2063 }
2064 
2065 static bool mlxsw_sp_router_rauhtd_is_full(char *rauhtd_pl)
2066 {
2067 	u8 num_rec, last_rec_index, num_entries;
2068 
2069 	num_rec = mlxsw_reg_rauhtd_num_rec_get(rauhtd_pl);
2070 	last_rec_index = num_rec - 1;
2071 
2072 	if (num_rec < MLXSW_REG_RAUHTD_REC_MAX_NUM)
2073 		return false;
2074 	if (mlxsw_reg_rauhtd_rec_type_get(rauhtd_pl, last_rec_index) ==
2075 	    MLXSW_REG_RAUHTD_TYPE_IPV6)
2076 		return true;
2077 
2078 	num_entries = mlxsw_reg_rauhtd_ipv4_rec_num_entries_get(rauhtd_pl,
2079 								last_rec_index);
2080 	if (++num_entries == MLXSW_REG_RAUHTD_IPV4_ENT_PER_REC)
2081 		return true;
2082 	return false;
2083 }
2084 
2085 static int
2086 __mlxsw_sp_router_neighs_update_rauhtd(struct mlxsw_sp *mlxsw_sp,
2087 				       char *rauhtd_pl,
2088 				       enum mlxsw_reg_rauhtd_type type)
2089 {
2090 	int i, num_rec;
2091 	int err;
2092 
2093 	/* Make sure the neighbour's netdev isn't removed in the
2094 	 * process.
2095 	 */
2096 	rtnl_lock();
2097 	do {
2098 		mlxsw_reg_rauhtd_pack(rauhtd_pl, type);
2099 		err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(rauhtd),
2100 				      rauhtd_pl);
2101 		if (err) {
2102 			dev_err_ratelimited(mlxsw_sp->bus_info->dev, "Failed to dump neighbour table\n");
2103 			break;
2104 		}
2105 		num_rec = mlxsw_reg_rauhtd_num_rec_get(rauhtd_pl);
2106 		for (i = 0; i < num_rec; i++)
2107 			mlxsw_sp_router_neigh_rec_process(mlxsw_sp, rauhtd_pl,
2108 							  i);
2109 	} while (mlxsw_sp_router_rauhtd_is_full(rauhtd_pl));
2110 	rtnl_unlock();
2111 
2112 	return err;
2113 }
2114 
2115 static int mlxsw_sp_router_neighs_update_rauhtd(struct mlxsw_sp *mlxsw_sp)
2116 {
2117 	enum mlxsw_reg_rauhtd_type type;
2118 	char *rauhtd_pl;
2119 	int err;
2120 
2121 	rauhtd_pl = kmalloc(MLXSW_REG_RAUHTD_LEN, GFP_KERNEL);
2122 	if (!rauhtd_pl)
2123 		return -ENOMEM;
2124 
2125 	type = MLXSW_REG_RAUHTD_TYPE_IPV4;
2126 	err = __mlxsw_sp_router_neighs_update_rauhtd(mlxsw_sp, rauhtd_pl, type);
2127 	if (err)
2128 		goto out;
2129 
2130 	type = MLXSW_REG_RAUHTD_TYPE_IPV6;
2131 	err = __mlxsw_sp_router_neighs_update_rauhtd(mlxsw_sp, rauhtd_pl, type);
2132 out:
2133 	kfree(rauhtd_pl);
2134 	return err;
2135 }
2136 
2137 static void mlxsw_sp_router_neighs_update_nh(struct mlxsw_sp *mlxsw_sp)
2138 {
2139 	struct mlxsw_sp_neigh_entry *neigh_entry;
2140 
2141 	/* Take RTNL mutex here to prevent lists from changes */
2142 	rtnl_lock();
2143 	list_for_each_entry(neigh_entry, &mlxsw_sp->router->nexthop_neighs_list,
2144 			    nexthop_neighs_list_node)
2145 		/* If this neigh have nexthops, make the kernel think this neigh
2146 		 * is active regardless of the traffic.
2147 		 */
2148 		neigh_event_send(neigh_entry->key.n, NULL);
2149 	rtnl_unlock();
2150 }
2151 
2152 static void
2153 mlxsw_sp_router_neighs_update_work_schedule(struct mlxsw_sp *mlxsw_sp)
2154 {
2155 	unsigned long interval = mlxsw_sp->router->neighs_update.interval;
2156 
2157 	mlxsw_core_schedule_dw(&mlxsw_sp->router->neighs_update.dw,
2158 			       msecs_to_jiffies(interval));
2159 }
2160 
2161 static void mlxsw_sp_router_neighs_update_work(struct work_struct *work)
2162 {
2163 	struct mlxsw_sp_router *router;
2164 	int err;
2165 
2166 	router = container_of(work, struct mlxsw_sp_router,
2167 			      neighs_update.dw.work);
2168 	err = mlxsw_sp_router_neighs_update_rauhtd(router->mlxsw_sp);
2169 	if (err)
2170 		dev_err(router->mlxsw_sp->bus_info->dev, "Could not update kernel for neigh activity");
2171 
2172 	mlxsw_sp_router_neighs_update_nh(router->mlxsw_sp);
2173 
2174 	mlxsw_sp_router_neighs_update_work_schedule(router->mlxsw_sp);
2175 }
2176 
2177 static void mlxsw_sp_router_probe_unresolved_nexthops(struct work_struct *work)
2178 {
2179 	struct mlxsw_sp_neigh_entry *neigh_entry;
2180 	struct mlxsw_sp_router *router;
2181 
2182 	router = container_of(work, struct mlxsw_sp_router,
2183 			      nexthop_probe_dw.work);
2184 	/* Iterate over nexthop neighbours, find those who are unresolved and
2185 	 * send arp on them. This solves the chicken-egg problem when
2186 	 * the nexthop wouldn't get offloaded until the neighbor is resolved
2187 	 * but it wouldn't get resolved ever in case traffic is flowing in HW
2188 	 * using different nexthop.
2189 	 *
2190 	 * Take RTNL mutex here to prevent lists from changes.
2191 	 */
2192 	rtnl_lock();
2193 	list_for_each_entry(neigh_entry, &router->nexthop_neighs_list,
2194 			    nexthop_neighs_list_node)
2195 		if (!neigh_entry->connected)
2196 			neigh_event_send(neigh_entry->key.n, NULL);
2197 	rtnl_unlock();
2198 
2199 	mlxsw_core_schedule_dw(&router->nexthop_probe_dw,
2200 			       MLXSW_SP_UNRESOLVED_NH_PROBE_INTERVAL);
2201 }
2202 
2203 static void
2204 mlxsw_sp_nexthop_neigh_update(struct mlxsw_sp *mlxsw_sp,
2205 			      struct mlxsw_sp_neigh_entry *neigh_entry,
2206 			      bool removing);
2207 
2208 static enum mlxsw_reg_rauht_op mlxsw_sp_rauht_op(bool adding)
2209 {
2210 	return adding ? MLXSW_REG_RAUHT_OP_WRITE_ADD :
2211 			MLXSW_REG_RAUHT_OP_WRITE_DELETE;
2212 }
2213 
2214 static void
2215 mlxsw_sp_router_neigh_entry_op4(struct mlxsw_sp *mlxsw_sp,
2216 				struct mlxsw_sp_neigh_entry *neigh_entry,
2217 				enum mlxsw_reg_rauht_op op)
2218 {
2219 	struct neighbour *n = neigh_entry->key.n;
2220 	u32 dip = ntohl(*((__be32 *) n->primary_key));
2221 	char rauht_pl[MLXSW_REG_RAUHT_LEN];
2222 
2223 	mlxsw_reg_rauht_pack4(rauht_pl, op, neigh_entry->rif, neigh_entry->ha,
2224 			      dip);
2225 	if (neigh_entry->counter_valid)
2226 		mlxsw_reg_rauht_pack_counter(rauht_pl,
2227 					     neigh_entry->counter_index);
2228 	mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rauht), rauht_pl);
2229 }
2230 
2231 static void
2232 mlxsw_sp_router_neigh_entry_op6(struct mlxsw_sp *mlxsw_sp,
2233 				struct mlxsw_sp_neigh_entry *neigh_entry,
2234 				enum mlxsw_reg_rauht_op op)
2235 {
2236 	struct neighbour *n = neigh_entry->key.n;
2237 	char rauht_pl[MLXSW_REG_RAUHT_LEN];
2238 	const char *dip = n->primary_key;
2239 
2240 	mlxsw_reg_rauht_pack6(rauht_pl, op, neigh_entry->rif, neigh_entry->ha,
2241 			      dip);
2242 	if (neigh_entry->counter_valid)
2243 		mlxsw_reg_rauht_pack_counter(rauht_pl,
2244 					     neigh_entry->counter_index);
2245 	mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rauht), rauht_pl);
2246 }
2247 
2248 bool mlxsw_sp_neigh_ipv6_ignore(struct mlxsw_sp_neigh_entry *neigh_entry)
2249 {
2250 	struct neighbour *n = neigh_entry->key.n;
2251 
2252 	/* Packets with a link-local destination address are trapped
2253 	 * after LPM lookup and never reach the neighbour table, so
2254 	 * there is no need to program such neighbours to the device.
2255 	 */
2256 	if (ipv6_addr_type((struct in6_addr *) &n->primary_key) &
2257 	    IPV6_ADDR_LINKLOCAL)
2258 		return true;
2259 	return false;
2260 }
2261 
2262 static void
2263 mlxsw_sp_neigh_entry_update(struct mlxsw_sp *mlxsw_sp,
2264 			    struct mlxsw_sp_neigh_entry *neigh_entry,
2265 			    bool adding)
2266 {
2267 	if (!adding && !neigh_entry->connected)
2268 		return;
2269 	neigh_entry->connected = adding;
2270 	if (neigh_entry->key.n->tbl->family == AF_INET) {
2271 		mlxsw_sp_router_neigh_entry_op4(mlxsw_sp, neigh_entry,
2272 						mlxsw_sp_rauht_op(adding));
2273 	} else if (neigh_entry->key.n->tbl->family == AF_INET6) {
2274 		if (mlxsw_sp_neigh_ipv6_ignore(neigh_entry))
2275 			return;
2276 		mlxsw_sp_router_neigh_entry_op6(mlxsw_sp, neigh_entry,
2277 						mlxsw_sp_rauht_op(adding));
2278 	} else {
2279 		WARN_ON_ONCE(1);
2280 	}
2281 }
2282 
2283 void
2284 mlxsw_sp_neigh_entry_counter_update(struct mlxsw_sp *mlxsw_sp,
2285 				    struct mlxsw_sp_neigh_entry *neigh_entry,
2286 				    bool adding)
2287 {
2288 	if (adding)
2289 		mlxsw_sp_neigh_counter_alloc(mlxsw_sp, neigh_entry);
2290 	else
2291 		mlxsw_sp_neigh_counter_free(mlxsw_sp, neigh_entry);
2292 	mlxsw_sp_neigh_entry_update(mlxsw_sp, neigh_entry, true);
2293 }
2294 
2295 struct mlxsw_sp_netevent_work {
2296 	struct work_struct work;
2297 	struct mlxsw_sp *mlxsw_sp;
2298 	struct neighbour *n;
2299 };
2300 
2301 static void mlxsw_sp_router_neigh_event_work(struct work_struct *work)
2302 {
2303 	struct mlxsw_sp_netevent_work *net_work =
2304 		container_of(work, struct mlxsw_sp_netevent_work, work);
2305 	struct mlxsw_sp *mlxsw_sp = net_work->mlxsw_sp;
2306 	struct mlxsw_sp_neigh_entry *neigh_entry;
2307 	struct neighbour *n = net_work->n;
2308 	unsigned char ha[ETH_ALEN];
2309 	bool entry_connected;
2310 	u8 nud_state, dead;
2311 
2312 	/* If these parameters are changed after we release the lock,
2313 	 * then we are guaranteed to receive another event letting us
2314 	 * know about it.
2315 	 */
2316 	read_lock_bh(&n->lock);
2317 	memcpy(ha, n->ha, ETH_ALEN);
2318 	nud_state = n->nud_state;
2319 	dead = n->dead;
2320 	read_unlock_bh(&n->lock);
2321 
2322 	rtnl_lock();
2323 	entry_connected = nud_state & NUD_VALID && !dead;
2324 	neigh_entry = mlxsw_sp_neigh_entry_lookup(mlxsw_sp, n);
2325 	if (!entry_connected && !neigh_entry)
2326 		goto out;
2327 	if (!neigh_entry) {
2328 		neigh_entry = mlxsw_sp_neigh_entry_create(mlxsw_sp, n);
2329 		if (IS_ERR(neigh_entry))
2330 			goto out;
2331 	}
2332 
2333 	memcpy(neigh_entry->ha, ha, ETH_ALEN);
2334 	mlxsw_sp_neigh_entry_update(mlxsw_sp, neigh_entry, entry_connected);
2335 	mlxsw_sp_nexthop_neigh_update(mlxsw_sp, neigh_entry, !entry_connected);
2336 
2337 	if (!neigh_entry->connected && list_empty(&neigh_entry->nexthop_list))
2338 		mlxsw_sp_neigh_entry_destroy(mlxsw_sp, neigh_entry);
2339 
2340 out:
2341 	rtnl_unlock();
2342 	neigh_release(n);
2343 	kfree(net_work);
2344 }
2345 
2346 static int mlxsw_sp_mp_hash_init(struct mlxsw_sp *mlxsw_sp);
2347 
2348 static void mlxsw_sp_router_mp_hash_event_work(struct work_struct *work)
2349 {
2350 	struct mlxsw_sp_netevent_work *net_work =
2351 		container_of(work, struct mlxsw_sp_netevent_work, work);
2352 	struct mlxsw_sp *mlxsw_sp = net_work->mlxsw_sp;
2353 
2354 	mlxsw_sp_mp_hash_init(mlxsw_sp);
2355 	kfree(net_work);
2356 }
2357 
2358 static int mlxsw_sp_router_netevent_event(struct notifier_block *nb,
2359 					  unsigned long event, void *ptr)
2360 {
2361 	struct mlxsw_sp_netevent_work *net_work;
2362 	struct mlxsw_sp_port *mlxsw_sp_port;
2363 	struct mlxsw_sp_router *router;
2364 	struct mlxsw_sp *mlxsw_sp;
2365 	unsigned long interval;
2366 	struct neigh_parms *p;
2367 	struct neighbour *n;
2368 	struct net *net;
2369 
2370 	switch (event) {
2371 	case NETEVENT_DELAY_PROBE_TIME_UPDATE:
2372 		p = ptr;
2373 
2374 		/* We don't care about changes in the default table. */
2375 		if (!p->dev || (p->tbl->family != AF_INET &&
2376 				p->tbl->family != AF_INET6))
2377 			return NOTIFY_DONE;
2378 
2379 		/* We are in atomic context and can't take RTNL mutex,
2380 		 * so use RCU variant to walk the device chain.
2381 		 */
2382 		mlxsw_sp_port = mlxsw_sp_port_lower_dev_hold(p->dev);
2383 		if (!mlxsw_sp_port)
2384 			return NOTIFY_DONE;
2385 
2386 		mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
2387 		interval = jiffies_to_msecs(NEIGH_VAR(p, DELAY_PROBE_TIME));
2388 		mlxsw_sp->router->neighs_update.interval = interval;
2389 
2390 		mlxsw_sp_port_dev_put(mlxsw_sp_port);
2391 		break;
2392 	case NETEVENT_NEIGH_UPDATE:
2393 		n = ptr;
2394 
2395 		if (n->tbl->family != AF_INET && n->tbl->family != AF_INET6)
2396 			return NOTIFY_DONE;
2397 
2398 		mlxsw_sp_port = mlxsw_sp_port_lower_dev_hold(n->dev);
2399 		if (!mlxsw_sp_port)
2400 			return NOTIFY_DONE;
2401 
2402 		net_work = kzalloc(sizeof(*net_work), GFP_ATOMIC);
2403 		if (!net_work) {
2404 			mlxsw_sp_port_dev_put(mlxsw_sp_port);
2405 			return NOTIFY_BAD;
2406 		}
2407 
2408 		INIT_WORK(&net_work->work, mlxsw_sp_router_neigh_event_work);
2409 		net_work->mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
2410 		net_work->n = n;
2411 
2412 		/* Take a reference to ensure the neighbour won't be
2413 		 * destructed until we drop the reference in delayed
2414 		 * work.
2415 		 */
2416 		neigh_clone(n);
2417 		mlxsw_core_schedule_work(&net_work->work);
2418 		mlxsw_sp_port_dev_put(mlxsw_sp_port);
2419 		break;
2420 	case NETEVENT_MULTIPATH_HASH_UPDATE:
2421 		net = ptr;
2422 
2423 		if (!net_eq(net, &init_net))
2424 			return NOTIFY_DONE;
2425 
2426 		net_work = kzalloc(sizeof(*net_work), GFP_ATOMIC);
2427 		if (!net_work)
2428 			return NOTIFY_BAD;
2429 
2430 		router = container_of(nb, struct mlxsw_sp_router, netevent_nb);
2431 		INIT_WORK(&net_work->work, mlxsw_sp_router_mp_hash_event_work);
2432 		net_work->mlxsw_sp = router->mlxsw_sp;
2433 		mlxsw_core_schedule_work(&net_work->work);
2434 		break;
2435 	}
2436 
2437 	return NOTIFY_DONE;
2438 }
2439 
2440 static int mlxsw_sp_neigh_init(struct mlxsw_sp *mlxsw_sp)
2441 {
2442 	int err;
2443 
2444 	err = rhashtable_init(&mlxsw_sp->router->neigh_ht,
2445 			      &mlxsw_sp_neigh_ht_params);
2446 	if (err)
2447 		return err;
2448 
2449 	/* Initialize the polling interval according to the default
2450 	 * table.
2451 	 */
2452 	mlxsw_sp_router_neighs_update_interval_init(mlxsw_sp);
2453 
2454 	/* Create the delayed works for the activity_update */
2455 	INIT_DELAYED_WORK(&mlxsw_sp->router->neighs_update.dw,
2456 			  mlxsw_sp_router_neighs_update_work);
2457 	INIT_DELAYED_WORK(&mlxsw_sp->router->nexthop_probe_dw,
2458 			  mlxsw_sp_router_probe_unresolved_nexthops);
2459 	mlxsw_core_schedule_dw(&mlxsw_sp->router->neighs_update.dw, 0);
2460 	mlxsw_core_schedule_dw(&mlxsw_sp->router->nexthop_probe_dw, 0);
2461 	return 0;
2462 }
2463 
2464 static void mlxsw_sp_neigh_fini(struct mlxsw_sp *mlxsw_sp)
2465 {
2466 	cancel_delayed_work_sync(&mlxsw_sp->router->neighs_update.dw);
2467 	cancel_delayed_work_sync(&mlxsw_sp->router->nexthop_probe_dw);
2468 	rhashtable_destroy(&mlxsw_sp->router->neigh_ht);
2469 }
2470 
2471 static void mlxsw_sp_neigh_rif_gone_sync(struct mlxsw_sp *mlxsw_sp,
2472 					 struct mlxsw_sp_rif *rif)
2473 {
2474 	struct mlxsw_sp_neigh_entry *neigh_entry, *tmp;
2475 
2476 	list_for_each_entry_safe(neigh_entry, tmp, &rif->neigh_list,
2477 				 rif_list_node) {
2478 		mlxsw_sp_neigh_entry_update(mlxsw_sp, neigh_entry, false);
2479 		mlxsw_sp_neigh_entry_destroy(mlxsw_sp, neigh_entry);
2480 	}
2481 }
2482 
2483 enum mlxsw_sp_nexthop_type {
2484 	MLXSW_SP_NEXTHOP_TYPE_ETH,
2485 	MLXSW_SP_NEXTHOP_TYPE_IPIP,
2486 };
2487 
2488 struct mlxsw_sp_nexthop_key {
2489 	struct fib_nh *fib_nh;
2490 };
2491 
2492 struct mlxsw_sp_nexthop {
2493 	struct list_head neigh_list_node; /* member of neigh entry list */
2494 	struct list_head rif_list_node;
2495 	struct list_head router_list_node;
2496 	struct mlxsw_sp_nexthop_group *nh_grp; /* pointer back to the group
2497 						* this belongs to
2498 						*/
2499 	struct rhash_head ht_node;
2500 	struct mlxsw_sp_nexthop_key key;
2501 	unsigned char gw_addr[sizeof(struct in6_addr)];
2502 	int ifindex;
2503 	int nh_weight;
2504 	int norm_nh_weight;
2505 	int num_adj_entries;
2506 	struct mlxsw_sp_rif *rif;
2507 	u8 should_offload:1, /* set indicates this neigh is connected and
2508 			      * should be put to KVD linear area of this group.
2509 			      */
2510 	   offloaded:1, /* set in case the neigh is actually put into
2511 			 * KVD linear area of this group.
2512 			 */
2513 	   update:1; /* set indicates that MAC of this neigh should be
2514 		      * updated in HW
2515 		      */
2516 	enum mlxsw_sp_nexthop_type type;
2517 	union {
2518 		struct mlxsw_sp_neigh_entry *neigh_entry;
2519 		struct mlxsw_sp_ipip_entry *ipip_entry;
2520 	};
2521 	unsigned int counter_index;
2522 	bool counter_valid;
2523 };
2524 
2525 struct mlxsw_sp_nexthop_group {
2526 	void *priv;
2527 	struct rhash_head ht_node;
2528 	struct list_head fib_list; /* list of fib entries that use this group */
2529 	struct neigh_table *neigh_tbl;
2530 	u8 adj_index_valid:1,
2531 	   gateway:1; /* routes using the group use a gateway */
2532 	u32 adj_index;
2533 	u16 ecmp_size;
2534 	u16 count;
2535 	int sum_norm_weight;
2536 	struct mlxsw_sp_nexthop nexthops[0];
2537 #define nh_rif	nexthops[0].rif
2538 };
2539 
2540 void mlxsw_sp_nexthop_counter_alloc(struct mlxsw_sp *mlxsw_sp,
2541 				    struct mlxsw_sp_nexthop *nh)
2542 {
2543 	struct devlink *devlink;
2544 
2545 	devlink = priv_to_devlink(mlxsw_sp->core);
2546 	if (!devlink_dpipe_table_counter_enabled(devlink,
2547 						 MLXSW_SP_DPIPE_TABLE_NAME_ADJ))
2548 		return;
2549 
2550 	if (mlxsw_sp_flow_counter_alloc(mlxsw_sp, &nh->counter_index))
2551 		return;
2552 
2553 	nh->counter_valid = true;
2554 }
2555 
2556 void mlxsw_sp_nexthop_counter_free(struct mlxsw_sp *mlxsw_sp,
2557 				   struct mlxsw_sp_nexthop *nh)
2558 {
2559 	if (!nh->counter_valid)
2560 		return;
2561 	mlxsw_sp_flow_counter_free(mlxsw_sp, nh->counter_index);
2562 	nh->counter_valid = false;
2563 }
2564 
2565 int mlxsw_sp_nexthop_counter_get(struct mlxsw_sp *mlxsw_sp,
2566 				 struct mlxsw_sp_nexthop *nh, u64 *p_counter)
2567 {
2568 	if (!nh->counter_valid)
2569 		return -EINVAL;
2570 
2571 	return mlxsw_sp_flow_counter_get(mlxsw_sp, nh->counter_index,
2572 					 p_counter, NULL);
2573 }
2574 
2575 struct mlxsw_sp_nexthop *mlxsw_sp_nexthop_next(struct mlxsw_sp_router *router,
2576 					       struct mlxsw_sp_nexthop *nh)
2577 {
2578 	if (!nh) {
2579 		if (list_empty(&router->nexthop_list))
2580 			return NULL;
2581 		else
2582 			return list_first_entry(&router->nexthop_list,
2583 						typeof(*nh), router_list_node);
2584 	}
2585 	if (list_is_last(&nh->router_list_node, &router->nexthop_list))
2586 		return NULL;
2587 	return list_next_entry(nh, router_list_node);
2588 }
2589 
2590 bool mlxsw_sp_nexthop_offload(struct mlxsw_sp_nexthop *nh)
2591 {
2592 	return nh->offloaded;
2593 }
2594 
2595 unsigned char *mlxsw_sp_nexthop_ha(struct mlxsw_sp_nexthop *nh)
2596 {
2597 	if (!nh->offloaded)
2598 		return NULL;
2599 	return nh->neigh_entry->ha;
2600 }
2601 
2602 int mlxsw_sp_nexthop_indexes(struct mlxsw_sp_nexthop *nh, u32 *p_adj_index,
2603 			     u32 *p_adj_size, u32 *p_adj_hash_index)
2604 {
2605 	struct mlxsw_sp_nexthop_group *nh_grp = nh->nh_grp;
2606 	u32 adj_hash_index = 0;
2607 	int i;
2608 
2609 	if (!nh->offloaded || !nh_grp->adj_index_valid)
2610 		return -EINVAL;
2611 
2612 	*p_adj_index = nh_grp->adj_index;
2613 	*p_adj_size = nh_grp->ecmp_size;
2614 
2615 	for (i = 0; i < nh_grp->count; i++) {
2616 		struct mlxsw_sp_nexthop *nh_iter = &nh_grp->nexthops[i];
2617 
2618 		if (nh_iter == nh)
2619 			break;
2620 		if (nh_iter->offloaded)
2621 			adj_hash_index += nh_iter->num_adj_entries;
2622 	}
2623 
2624 	*p_adj_hash_index = adj_hash_index;
2625 	return 0;
2626 }
2627 
2628 struct mlxsw_sp_rif *mlxsw_sp_nexthop_rif(struct mlxsw_sp_nexthop *nh)
2629 {
2630 	return nh->rif;
2631 }
2632 
2633 bool mlxsw_sp_nexthop_group_has_ipip(struct mlxsw_sp_nexthop *nh)
2634 {
2635 	struct mlxsw_sp_nexthop_group *nh_grp = nh->nh_grp;
2636 	int i;
2637 
2638 	for (i = 0; i < nh_grp->count; i++) {
2639 		struct mlxsw_sp_nexthop *nh_iter = &nh_grp->nexthops[i];
2640 
2641 		if (nh_iter->type == MLXSW_SP_NEXTHOP_TYPE_IPIP)
2642 			return true;
2643 	}
2644 	return false;
2645 }
2646 
2647 static struct fib_info *
2648 mlxsw_sp_nexthop4_group_fi(const struct mlxsw_sp_nexthop_group *nh_grp)
2649 {
2650 	return nh_grp->priv;
2651 }
2652 
2653 struct mlxsw_sp_nexthop_group_cmp_arg {
2654 	enum mlxsw_sp_l3proto proto;
2655 	union {
2656 		struct fib_info *fi;
2657 		struct mlxsw_sp_fib6_entry *fib6_entry;
2658 	};
2659 };
2660 
2661 static bool
2662 mlxsw_sp_nexthop6_group_has_nexthop(const struct mlxsw_sp_nexthop_group *nh_grp,
2663 				    const struct in6_addr *gw, int ifindex,
2664 				    int weight)
2665 {
2666 	int i;
2667 
2668 	for (i = 0; i < nh_grp->count; i++) {
2669 		const struct mlxsw_sp_nexthop *nh;
2670 
2671 		nh = &nh_grp->nexthops[i];
2672 		if (nh->ifindex == ifindex && nh->nh_weight == weight &&
2673 		    ipv6_addr_equal(gw, (struct in6_addr *) nh->gw_addr))
2674 			return true;
2675 	}
2676 
2677 	return false;
2678 }
2679 
2680 static bool
2681 mlxsw_sp_nexthop6_group_cmp(const struct mlxsw_sp_nexthop_group *nh_grp,
2682 			    const struct mlxsw_sp_fib6_entry *fib6_entry)
2683 {
2684 	struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
2685 
2686 	if (nh_grp->count != fib6_entry->nrt6)
2687 		return false;
2688 
2689 	list_for_each_entry(mlxsw_sp_rt6, &fib6_entry->rt6_list, list) {
2690 		struct in6_addr *gw;
2691 		int ifindex, weight;
2692 
2693 		ifindex = mlxsw_sp_rt6->rt->dst.dev->ifindex;
2694 		weight = mlxsw_sp_rt6->rt->rt6i_nh_weight;
2695 		gw = &mlxsw_sp_rt6->rt->rt6i_gateway;
2696 		if (!mlxsw_sp_nexthop6_group_has_nexthop(nh_grp, gw, ifindex,
2697 							 weight))
2698 			return false;
2699 	}
2700 
2701 	return true;
2702 }
2703 
2704 static int
2705 mlxsw_sp_nexthop_group_cmp(struct rhashtable_compare_arg *arg, const void *ptr)
2706 {
2707 	const struct mlxsw_sp_nexthop_group_cmp_arg *cmp_arg = arg->key;
2708 	const struct mlxsw_sp_nexthop_group *nh_grp = ptr;
2709 
2710 	switch (cmp_arg->proto) {
2711 	case MLXSW_SP_L3_PROTO_IPV4:
2712 		return cmp_arg->fi != mlxsw_sp_nexthop4_group_fi(nh_grp);
2713 	case MLXSW_SP_L3_PROTO_IPV6:
2714 		return !mlxsw_sp_nexthop6_group_cmp(nh_grp,
2715 						    cmp_arg->fib6_entry);
2716 	default:
2717 		WARN_ON(1);
2718 		return 1;
2719 	}
2720 }
2721 
2722 static int
2723 mlxsw_sp_nexthop_group_type(const struct mlxsw_sp_nexthop_group *nh_grp)
2724 {
2725 	return nh_grp->neigh_tbl->family;
2726 }
2727 
2728 static u32 mlxsw_sp_nexthop_group_hash_obj(const void *data, u32 len, u32 seed)
2729 {
2730 	const struct mlxsw_sp_nexthop_group *nh_grp = data;
2731 	const struct mlxsw_sp_nexthop *nh;
2732 	struct fib_info *fi;
2733 	unsigned int val;
2734 	int i;
2735 
2736 	switch (mlxsw_sp_nexthop_group_type(nh_grp)) {
2737 	case AF_INET:
2738 		fi = mlxsw_sp_nexthop4_group_fi(nh_grp);
2739 		return jhash(&fi, sizeof(fi), seed);
2740 	case AF_INET6:
2741 		val = nh_grp->count;
2742 		for (i = 0; i < nh_grp->count; i++) {
2743 			nh = &nh_grp->nexthops[i];
2744 			val ^= nh->ifindex;
2745 		}
2746 		return jhash(&val, sizeof(val), seed);
2747 	default:
2748 		WARN_ON(1);
2749 		return 0;
2750 	}
2751 }
2752 
2753 static u32
2754 mlxsw_sp_nexthop6_group_hash(struct mlxsw_sp_fib6_entry *fib6_entry, u32 seed)
2755 {
2756 	unsigned int val = fib6_entry->nrt6;
2757 	struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
2758 	struct net_device *dev;
2759 
2760 	list_for_each_entry(mlxsw_sp_rt6, &fib6_entry->rt6_list, list) {
2761 		dev = mlxsw_sp_rt6->rt->dst.dev;
2762 		val ^= dev->ifindex;
2763 	}
2764 
2765 	return jhash(&val, sizeof(val), seed);
2766 }
2767 
2768 static u32
2769 mlxsw_sp_nexthop_group_hash(const void *data, u32 len, u32 seed)
2770 {
2771 	const struct mlxsw_sp_nexthop_group_cmp_arg *cmp_arg = data;
2772 
2773 	switch (cmp_arg->proto) {
2774 	case MLXSW_SP_L3_PROTO_IPV4:
2775 		return jhash(&cmp_arg->fi, sizeof(cmp_arg->fi), seed);
2776 	case MLXSW_SP_L3_PROTO_IPV6:
2777 		return mlxsw_sp_nexthop6_group_hash(cmp_arg->fib6_entry, seed);
2778 	default:
2779 		WARN_ON(1);
2780 		return 0;
2781 	}
2782 }
2783 
2784 static const struct rhashtable_params mlxsw_sp_nexthop_group_ht_params = {
2785 	.head_offset = offsetof(struct mlxsw_sp_nexthop_group, ht_node),
2786 	.hashfn	     = mlxsw_sp_nexthop_group_hash,
2787 	.obj_hashfn  = mlxsw_sp_nexthop_group_hash_obj,
2788 	.obj_cmpfn   = mlxsw_sp_nexthop_group_cmp,
2789 };
2790 
2791 static int mlxsw_sp_nexthop_group_insert(struct mlxsw_sp *mlxsw_sp,
2792 					 struct mlxsw_sp_nexthop_group *nh_grp)
2793 {
2794 	if (mlxsw_sp_nexthop_group_type(nh_grp) == AF_INET6 &&
2795 	    !nh_grp->gateway)
2796 		return 0;
2797 
2798 	return rhashtable_insert_fast(&mlxsw_sp->router->nexthop_group_ht,
2799 				      &nh_grp->ht_node,
2800 				      mlxsw_sp_nexthop_group_ht_params);
2801 }
2802 
2803 static void mlxsw_sp_nexthop_group_remove(struct mlxsw_sp *mlxsw_sp,
2804 					  struct mlxsw_sp_nexthop_group *nh_grp)
2805 {
2806 	if (mlxsw_sp_nexthop_group_type(nh_grp) == AF_INET6 &&
2807 	    !nh_grp->gateway)
2808 		return;
2809 
2810 	rhashtable_remove_fast(&mlxsw_sp->router->nexthop_group_ht,
2811 			       &nh_grp->ht_node,
2812 			       mlxsw_sp_nexthop_group_ht_params);
2813 }
2814 
2815 static struct mlxsw_sp_nexthop_group *
2816 mlxsw_sp_nexthop4_group_lookup(struct mlxsw_sp *mlxsw_sp,
2817 			       struct fib_info *fi)
2818 {
2819 	struct mlxsw_sp_nexthop_group_cmp_arg cmp_arg;
2820 
2821 	cmp_arg.proto = MLXSW_SP_L3_PROTO_IPV4;
2822 	cmp_arg.fi = fi;
2823 	return rhashtable_lookup_fast(&mlxsw_sp->router->nexthop_group_ht,
2824 				      &cmp_arg,
2825 				      mlxsw_sp_nexthop_group_ht_params);
2826 }
2827 
2828 static struct mlxsw_sp_nexthop_group *
2829 mlxsw_sp_nexthop6_group_lookup(struct mlxsw_sp *mlxsw_sp,
2830 			       struct mlxsw_sp_fib6_entry *fib6_entry)
2831 {
2832 	struct mlxsw_sp_nexthop_group_cmp_arg cmp_arg;
2833 
2834 	cmp_arg.proto = MLXSW_SP_L3_PROTO_IPV6;
2835 	cmp_arg.fib6_entry = fib6_entry;
2836 	return rhashtable_lookup_fast(&mlxsw_sp->router->nexthop_group_ht,
2837 				      &cmp_arg,
2838 				      mlxsw_sp_nexthop_group_ht_params);
2839 }
2840 
2841 static const struct rhashtable_params mlxsw_sp_nexthop_ht_params = {
2842 	.key_offset = offsetof(struct mlxsw_sp_nexthop, key),
2843 	.head_offset = offsetof(struct mlxsw_sp_nexthop, ht_node),
2844 	.key_len = sizeof(struct mlxsw_sp_nexthop_key),
2845 };
2846 
2847 static int mlxsw_sp_nexthop_insert(struct mlxsw_sp *mlxsw_sp,
2848 				   struct mlxsw_sp_nexthop *nh)
2849 {
2850 	return rhashtable_insert_fast(&mlxsw_sp->router->nexthop_ht,
2851 				      &nh->ht_node, mlxsw_sp_nexthop_ht_params);
2852 }
2853 
2854 static void mlxsw_sp_nexthop_remove(struct mlxsw_sp *mlxsw_sp,
2855 				    struct mlxsw_sp_nexthop *nh)
2856 {
2857 	rhashtable_remove_fast(&mlxsw_sp->router->nexthop_ht, &nh->ht_node,
2858 			       mlxsw_sp_nexthop_ht_params);
2859 }
2860 
2861 static struct mlxsw_sp_nexthop *
2862 mlxsw_sp_nexthop_lookup(struct mlxsw_sp *mlxsw_sp,
2863 			struct mlxsw_sp_nexthop_key key)
2864 {
2865 	return rhashtable_lookup_fast(&mlxsw_sp->router->nexthop_ht, &key,
2866 				      mlxsw_sp_nexthop_ht_params);
2867 }
2868 
2869 static int mlxsw_sp_adj_index_mass_update_vr(struct mlxsw_sp *mlxsw_sp,
2870 					     const struct mlxsw_sp_fib *fib,
2871 					     u32 adj_index, u16 ecmp_size,
2872 					     u32 new_adj_index,
2873 					     u16 new_ecmp_size)
2874 {
2875 	char raleu_pl[MLXSW_REG_RALEU_LEN];
2876 
2877 	mlxsw_reg_raleu_pack(raleu_pl,
2878 			     (enum mlxsw_reg_ralxx_protocol) fib->proto,
2879 			     fib->vr->id, adj_index, ecmp_size, new_adj_index,
2880 			     new_ecmp_size);
2881 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(raleu), raleu_pl);
2882 }
2883 
2884 static int mlxsw_sp_adj_index_mass_update(struct mlxsw_sp *mlxsw_sp,
2885 					  struct mlxsw_sp_nexthop_group *nh_grp,
2886 					  u32 old_adj_index, u16 old_ecmp_size)
2887 {
2888 	struct mlxsw_sp_fib_entry *fib_entry;
2889 	struct mlxsw_sp_fib *fib = NULL;
2890 	int err;
2891 
2892 	list_for_each_entry(fib_entry, &nh_grp->fib_list, nexthop_group_node) {
2893 		if (fib == fib_entry->fib_node->fib)
2894 			continue;
2895 		fib = fib_entry->fib_node->fib;
2896 		err = mlxsw_sp_adj_index_mass_update_vr(mlxsw_sp, fib,
2897 							old_adj_index,
2898 							old_ecmp_size,
2899 							nh_grp->adj_index,
2900 							nh_grp->ecmp_size);
2901 		if (err)
2902 			return err;
2903 	}
2904 	return 0;
2905 }
2906 
2907 static int __mlxsw_sp_nexthop_update(struct mlxsw_sp *mlxsw_sp, u32 adj_index,
2908 				     struct mlxsw_sp_nexthop *nh)
2909 {
2910 	struct mlxsw_sp_neigh_entry *neigh_entry = nh->neigh_entry;
2911 	char ratr_pl[MLXSW_REG_RATR_LEN];
2912 
2913 	mlxsw_reg_ratr_pack(ratr_pl, MLXSW_REG_RATR_OP_WRITE_WRITE_ENTRY,
2914 			    true, MLXSW_REG_RATR_TYPE_ETHERNET,
2915 			    adj_index, neigh_entry->rif);
2916 	mlxsw_reg_ratr_eth_entry_pack(ratr_pl, neigh_entry->ha);
2917 	if (nh->counter_valid)
2918 		mlxsw_reg_ratr_counter_pack(ratr_pl, nh->counter_index, true);
2919 	else
2920 		mlxsw_reg_ratr_counter_pack(ratr_pl, 0, false);
2921 
2922 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ratr), ratr_pl);
2923 }
2924 
2925 int mlxsw_sp_nexthop_update(struct mlxsw_sp *mlxsw_sp, u32 adj_index,
2926 			    struct mlxsw_sp_nexthop *nh)
2927 {
2928 	int i;
2929 
2930 	for (i = 0; i < nh->num_adj_entries; i++) {
2931 		int err;
2932 
2933 		err = __mlxsw_sp_nexthop_update(mlxsw_sp, adj_index + i, nh);
2934 		if (err)
2935 			return err;
2936 	}
2937 
2938 	return 0;
2939 }
2940 
2941 static int __mlxsw_sp_nexthop_ipip_update(struct mlxsw_sp *mlxsw_sp,
2942 					  u32 adj_index,
2943 					  struct mlxsw_sp_nexthop *nh)
2944 {
2945 	const struct mlxsw_sp_ipip_ops *ipip_ops;
2946 
2947 	ipip_ops = mlxsw_sp->router->ipip_ops_arr[nh->ipip_entry->ipipt];
2948 	return ipip_ops->nexthop_update(mlxsw_sp, adj_index, nh->ipip_entry);
2949 }
2950 
2951 static int mlxsw_sp_nexthop_ipip_update(struct mlxsw_sp *mlxsw_sp,
2952 					u32 adj_index,
2953 					struct mlxsw_sp_nexthop *nh)
2954 {
2955 	int i;
2956 
2957 	for (i = 0; i < nh->num_adj_entries; i++) {
2958 		int err;
2959 
2960 		err = __mlxsw_sp_nexthop_ipip_update(mlxsw_sp, adj_index + i,
2961 						     nh);
2962 		if (err)
2963 			return err;
2964 	}
2965 
2966 	return 0;
2967 }
2968 
2969 static int
2970 mlxsw_sp_nexthop_group_update(struct mlxsw_sp *mlxsw_sp,
2971 			      struct mlxsw_sp_nexthop_group *nh_grp,
2972 			      bool reallocate)
2973 {
2974 	u32 adj_index = nh_grp->adj_index; /* base */
2975 	struct mlxsw_sp_nexthop *nh;
2976 	int i;
2977 	int err;
2978 
2979 	for (i = 0; i < nh_grp->count; i++) {
2980 		nh = &nh_grp->nexthops[i];
2981 
2982 		if (!nh->should_offload) {
2983 			nh->offloaded = 0;
2984 			continue;
2985 		}
2986 
2987 		if (nh->update || reallocate) {
2988 			switch (nh->type) {
2989 			case MLXSW_SP_NEXTHOP_TYPE_ETH:
2990 				err = mlxsw_sp_nexthop_update
2991 					    (mlxsw_sp, adj_index, nh);
2992 				break;
2993 			case MLXSW_SP_NEXTHOP_TYPE_IPIP:
2994 				err = mlxsw_sp_nexthop_ipip_update
2995 					    (mlxsw_sp, adj_index, nh);
2996 				break;
2997 			}
2998 			if (err)
2999 				return err;
3000 			nh->update = 0;
3001 			nh->offloaded = 1;
3002 		}
3003 		adj_index += nh->num_adj_entries;
3004 	}
3005 	return 0;
3006 }
3007 
3008 static bool
3009 mlxsw_sp_fib_node_entry_is_first(const struct mlxsw_sp_fib_node *fib_node,
3010 				 const struct mlxsw_sp_fib_entry *fib_entry);
3011 
3012 static int
3013 mlxsw_sp_nexthop_fib_entries_update(struct mlxsw_sp *mlxsw_sp,
3014 				    struct mlxsw_sp_nexthop_group *nh_grp)
3015 {
3016 	struct mlxsw_sp_fib_entry *fib_entry;
3017 	int err;
3018 
3019 	list_for_each_entry(fib_entry, &nh_grp->fib_list, nexthop_group_node) {
3020 		if (!mlxsw_sp_fib_node_entry_is_first(fib_entry->fib_node,
3021 						      fib_entry))
3022 			continue;
3023 		err = mlxsw_sp_fib_entry_update(mlxsw_sp, fib_entry);
3024 		if (err)
3025 			return err;
3026 	}
3027 	return 0;
3028 }
3029 
3030 static void
3031 mlxsw_sp_fib_entry_offload_refresh(struct mlxsw_sp_fib_entry *fib_entry,
3032 				   enum mlxsw_reg_ralue_op op, int err);
3033 
3034 static void
3035 mlxsw_sp_nexthop_fib_entries_refresh(struct mlxsw_sp_nexthop_group *nh_grp)
3036 {
3037 	enum mlxsw_reg_ralue_op op = MLXSW_REG_RALUE_OP_WRITE_WRITE;
3038 	struct mlxsw_sp_fib_entry *fib_entry;
3039 
3040 	list_for_each_entry(fib_entry, &nh_grp->fib_list, nexthop_group_node) {
3041 		if (!mlxsw_sp_fib_node_entry_is_first(fib_entry->fib_node,
3042 						      fib_entry))
3043 			continue;
3044 		mlxsw_sp_fib_entry_offload_refresh(fib_entry, op, 0);
3045 	}
3046 }
3047 
3048 static void mlxsw_sp_adj_grp_size_round_up(u16 *p_adj_grp_size)
3049 {
3050 	/* Valid sizes for an adjacency group are:
3051 	 * 1-64, 512, 1024, 2048 and 4096.
3052 	 */
3053 	if (*p_adj_grp_size <= 64)
3054 		return;
3055 	else if (*p_adj_grp_size <= 512)
3056 		*p_adj_grp_size = 512;
3057 	else if (*p_adj_grp_size <= 1024)
3058 		*p_adj_grp_size = 1024;
3059 	else if (*p_adj_grp_size <= 2048)
3060 		*p_adj_grp_size = 2048;
3061 	else
3062 		*p_adj_grp_size = 4096;
3063 }
3064 
3065 static void mlxsw_sp_adj_grp_size_round_down(u16 *p_adj_grp_size,
3066 					     unsigned int alloc_size)
3067 {
3068 	if (alloc_size >= 4096)
3069 		*p_adj_grp_size = 4096;
3070 	else if (alloc_size >= 2048)
3071 		*p_adj_grp_size = 2048;
3072 	else if (alloc_size >= 1024)
3073 		*p_adj_grp_size = 1024;
3074 	else if (alloc_size >= 512)
3075 		*p_adj_grp_size = 512;
3076 }
3077 
3078 static int mlxsw_sp_fix_adj_grp_size(struct mlxsw_sp *mlxsw_sp,
3079 				     u16 *p_adj_grp_size)
3080 {
3081 	unsigned int alloc_size;
3082 	int err;
3083 
3084 	/* Round up the requested group size to the next size supported
3085 	 * by the device and make sure the request can be satisfied.
3086 	 */
3087 	mlxsw_sp_adj_grp_size_round_up(p_adj_grp_size);
3088 	err = mlxsw_sp_kvdl_alloc_size_query(mlxsw_sp, *p_adj_grp_size,
3089 					     &alloc_size);
3090 	if (err)
3091 		return err;
3092 	/* It is possible the allocation results in more allocated
3093 	 * entries than requested. Try to use as much of them as
3094 	 * possible.
3095 	 */
3096 	mlxsw_sp_adj_grp_size_round_down(p_adj_grp_size, alloc_size);
3097 
3098 	return 0;
3099 }
3100 
3101 static void
3102 mlxsw_sp_nexthop_group_normalize(struct mlxsw_sp_nexthop_group *nh_grp)
3103 {
3104 	int i, g = 0, sum_norm_weight = 0;
3105 	struct mlxsw_sp_nexthop *nh;
3106 
3107 	for (i = 0; i < nh_grp->count; i++) {
3108 		nh = &nh_grp->nexthops[i];
3109 
3110 		if (!nh->should_offload)
3111 			continue;
3112 		if (g > 0)
3113 			g = gcd(nh->nh_weight, g);
3114 		else
3115 			g = nh->nh_weight;
3116 	}
3117 
3118 	for (i = 0; i < nh_grp->count; i++) {
3119 		nh = &nh_grp->nexthops[i];
3120 
3121 		if (!nh->should_offload)
3122 			continue;
3123 		nh->norm_nh_weight = nh->nh_weight / g;
3124 		sum_norm_weight += nh->norm_nh_weight;
3125 	}
3126 
3127 	nh_grp->sum_norm_weight = sum_norm_weight;
3128 }
3129 
3130 static void
3131 mlxsw_sp_nexthop_group_rebalance(struct mlxsw_sp_nexthop_group *nh_grp)
3132 {
3133 	int total = nh_grp->sum_norm_weight;
3134 	u16 ecmp_size = nh_grp->ecmp_size;
3135 	int i, weight = 0, lower_bound = 0;
3136 
3137 	for (i = 0; i < nh_grp->count; i++) {
3138 		struct mlxsw_sp_nexthop *nh = &nh_grp->nexthops[i];
3139 		int upper_bound;
3140 
3141 		if (!nh->should_offload)
3142 			continue;
3143 		weight += nh->norm_nh_weight;
3144 		upper_bound = DIV_ROUND_CLOSEST(ecmp_size * weight, total);
3145 		nh->num_adj_entries = upper_bound - lower_bound;
3146 		lower_bound = upper_bound;
3147 	}
3148 }
3149 
3150 static void
3151 mlxsw_sp_nexthop_group_refresh(struct mlxsw_sp *mlxsw_sp,
3152 			       struct mlxsw_sp_nexthop_group *nh_grp)
3153 {
3154 	u16 ecmp_size, old_ecmp_size;
3155 	struct mlxsw_sp_nexthop *nh;
3156 	bool offload_change = false;
3157 	u32 adj_index;
3158 	bool old_adj_index_valid;
3159 	u32 old_adj_index;
3160 	int i;
3161 	int err;
3162 
3163 	if (!nh_grp->gateway) {
3164 		mlxsw_sp_nexthop_fib_entries_update(mlxsw_sp, nh_grp);
3165 		return;
3166 	}
3167 
3168 	for (i = 0; i < nh_grp->count; i++) {
3169 		nh = &nh_grp->nexthops[i];
3170 
3171 		if (nh->should_offload != nh->offloaded) {
3172 			offload_change = true;
3173 			if (nh->should_offload)
3174 				nh->update = 1;
3175 		}
3176 	}
3177 	if (!offload_change) {
3178 		/* Nothing was added or removed, so no need to reallocate. Just
3179 		 * update MAC on existing adjacency indexes.
3180 		 */
3181 		err = mlxsw_sp_nexthop_group_update(mlxsw_sp, nh_grp, false);
3182 		if (err) {
3183 			dev_warn(mlxsw_sp->bus_info->dev, "Failed to update neigh MAC in adjacency table.\n");
3184 			goto set_trap;
3185 		}
3186 		return;
3187 	}
3188 	mlxsw_sp_nexthop_group_normalize(nh_grp);
3189 	if (!nh_grp->sum_norm_weight)
3190 		/* No neigh of this group is connected so we just set
3191 		 * the trap and let everthing flow through kernel.
3192 		 */
3193 		goto set_trap;
3194 
3195 	ecmp_size = nh_grp->sum_norm_weight;
3196 	err = mlxsw_sp_fix_adj_grp_size(mlxsw_sp, &ecmp_size);
3197 	if (err)
3198 		/* No valid allocation size available. */
3199 		goto set_trap;
3200 
3201 	err = mlxsw_sp_kvdl_alloc(mlxsw_sp, ecmp_size, &adj_index);
3202 	if (err) {
3203 		/* We ran out of KVD linear space, just set the
3204 		 * trap and let everything flow through kernel.
3205 		 */
3206 		dev_warn(mlxsw_sp->bus_info->dev, "Failed to allocate KVD linear area for nexthop group.\n");
3207 		goto set_trap;
3208 	}
3209 	old_adj_index_valid = nh_grp->adj_index_valid;
3210 	old_adj_index = nh_grp->adj_index;
3211 	old_ecmp_size = nh_grp->ecmp_size;
3212 	nh_grp->adj_index_valid = 1;
3213 	nh_grp->adj_index = adj_index;
3214 	nh_grp->ecmp_size = ecmp_size;
3215 	mlxsw_sp_nexthop_group_rebalance(nh_grp);
3216 	err = mlxsw_sp_nexthop_group_update(mlxsw_sp, nh_grp, true);
3217 	if (err) {
3218 		dev_warn(mlxsw_sp->bus_info->dev, "Failed to update neigh MAC in adjacency table.\n");
3219 		goto set_trap;
3220 	}
3221 
3222 	if (!old_adj_index_valid) {
3223 		/* The trap was set for fib entries, so we have to call
3224 		 * fib entry update to unset it and use adjacency index.
3225 		 */
3226 		err = mlxsw_sp_nexthop_fib_entries_update(mlxsw_sp, nh_grp);
3227 		if (err) {
3228 			dev_warn(mlxsw_sp->bus_info->dev, "Failed to add adjacency index to fib entries.\n");
3229 			goto set_trap;
3230 		}
3231 		return;
3232 	}
3233 
3234 	err = mlxsw_sp_adj_index_mass_update(mlxsw_sp, nh_grp,
3235 					     old_adj_index, old_ecmp_size);
3236 	mlxsw_sp_kvdl_free(mlxsw_sp, old_adj_index);
3237 	if (err) {
3238 		dev_warn(mlxsw_sp->bus_info->dev, "Failed to mass-update adjacency index for nexthop group.\n");
3239 		goto set_trap;
3240 	}
3241 
3242 	/* Offload state within the group changed, so update the flags. */
3243 	mlxsw_sp_nexthop_fib_entries_refresh(nh_grp);
3244 
3245 	return;
3246 
3247 set_trap:
3248 	old_adj_index_valid = nh_grp->adj_index_valid;
3249 	nh_grp->adj_index_valid = 0;
3250 	for (i = 0; i < nh_grp->count; i++) {
3251 		nh = &nh_grp->nexthops[i];
3252 		nh->offloaded = 0;
3253 	}
3254 	err = mlxsw_sp_nexthop_fib_entries_update(mlxsw_sp, nh_grp);
3255 	if (err)
3256 		dev_warn(mlxsw_sp->bus_info->dev, "Failed to set traps for fib entries.\n");
3257 	if (old_adj_index_valid)
3258 		mlxsw_sp_kvdl_free(mlxsw_sp, nh_grp->adj_index);
3259 }
3260 
3261 static void __mlxsw_sp_nexthop_neigh_update(struct mlxsw_sp_nexthop *nh,
3262 					    bool removing)
3263 {
3264 	if (!removing)
3265 		nh->should_offload = 1;
3266 	else
3267 		nh->should_offload = 0;
3268 	nh->update = 1;
3269 }
3270 
3271 static void
3272 mlxsw_sp_nexthop_neigh_update(struct mlxsw_sp *mlxsw_sp,
3273 			      struct mlxsw_sp_neigh_entry *neigh_entry,
3274 			      bool removing)
3275 {
3276 	struct mlxsw_sp_nexthop *nh;
3277 
3278 	list_for_each_entry(nh, &neigh_entry->nexthop_list,
3279 			    neigh_list_node) {
3280 		__mlxsw_sp_nexthop_neigh_update(nh, removing);
3281 		mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh->nh_grp);
3282 	}
3283 }
3284 
3285 static void mlxsw_sp_nexthop_rif_init(struct mlxsw_sp_nexthop *nh,
3286 				      struct mlxsw_sp_rif *rif)
3287 {
3288 	if (nh->rif)
3289 		return;
3290 
3291 	nh->rif = rif;
3292 	list_add(&nh->rif_list_node, &rif->nexthop_list);
3293 }
3294 
3295 static void mlxsw_sp_nexthop_rif_fini(struct mlxsw_sp_nexthop *nh)
3296 {
3297 	if (!nh->rif)
3298 		return;
3299 
3300 	list_del(&nh->rif_list_node);
3301 	nh->rif = NULL;
3302 }
3303 
3304 static int mlxsw_sp_nexthop_neigh_init(struct mlxsw_sp *mlxsw_sp,
3305 				       struct mlxsw_sp_nexthop *nh)
3306 {
3307 	struct mlxsw_sp_neigh_entry *neigh_entry;
3308 	struct neighbour *n;
3309 	u8 nud_state, dead;
3310 	int err;
3311 
3312 	if (!nh->nh_grp->gateway || nh->neigh_entry)
3313 		return 0;
3314 
3315 	/* Take a reference of neigh here ensuring that neigh would
3316 	 * not be destructed before the nexthop entry is finished.
3317 	 * The reference is taken either in neigh_lookup() or
3318 	 * in neigh_create() in case n is not found.
3319 	 */
3320 	n = neigh_lookup(nh->nh_grp->neigh_tbl, &nh->gw_addr, nh->rif->dev);
3321 	if (!n) {
3322 		n = neigh_create(nh->nh_grp->neigh_tbl, &nh->gw_addr,
3323 				 nh->rif->dev);
3324 		if (IS_ERR(n))
3325 			return PTR_ERR(n);
3326 		neigh_event_send(n, NULL);
3327 	}
3328 	neigh_entry = mlxsw_sp_neigh_entry_lookup(mlxsw_sp, n);
3329 	if (!neigh_entry) {
3330 		neigh_entry = mlxsw_sp_neigh_entry_create(mlxsw_sp, n);
3331 		if (IS_ERR(neigh_entry)) {
3332 			err = -EINVAL;
3333 			goto err_neigh_entry_create;
3334 		}
3335 	}
3336 
3337 	/* If that is the first nexthop connected to that neigh, add to
3338 	 * nexthop_neighs_list
3339 	 */
3340 	if (list_empty(&neigh_entry->nexthop_list))
3341 		list_add_tail(&neigh_entry->nexthop_neighs_list_node,
3342 			      &mlxsw_sp->router->nexthop_neighs_list);
3343 
3344 	nh->neigh_entry = neigh_entry;
3345 	list_add_tail(&nh->neigh_list_node, &neigh_entry->nexthop_list);
3346 	read_lock_bh(&n->lock);
3347 	nud_state = n->nud_state;
3348 	dead = n->dead;
3349 	read_unlock_bh(&n->lock);
3350 	__mlxsw_sp_nexthop_neigh_update(nh, !(nud_state & NUD_VALID && !dead));
3351 
3352 	return 0;
3353 
3354 err_neigh_entry_create:
3355 	neigh_release(n);
3356 	return err;
3357 }
3358 
3359 static void mlxsw_sp_nexthop_neigh_fini(struct mlxsw_sp *mlxsw_sp,
3360 					struct mlxsw_sp_nexthop *nh)
3361 {
3362 	struct mlxsw_sp_neigh_entry *neigh_entry = nh->neigh_entry;
3363 	struct neighbour *n;
3364 
3365 	if (!neigh_entry)
3366 		return;
3367 	n = neigh_entry->key.n;
3368 
3369 	__mlxsw_sp_nexthop_neigh_update(nh, true);
3370 	list_del(&nh->neigh_list_node);
3371 	nh->neigh_entry = NULL;
3372 
3373 	/* If that is the last nexthop connected to that neigh, remove from
3374 	 * nexthop_neighs_list
3375 	 */
3376 	if (list_empty(&neigh_entry->nexthop_list))
3377 		list_del(&neigh_entry->nexthop_neighs_list_node);
3378 
3379 	if (!neigh_entry->connected && list_empty(&neigh_entry->nexthop_list))
3380 		mlxsw_sp_neigh_entry_destroy(mlxsw_sp, neigh_entry);
3381 
3382 	neigh_release(n);
3383 }
3384 
3385 static bool mlxsw_sp_ipip_netdev_ul_up(struct net_device *ol_dev)
3386 {
3387 	struct net_device *ul_dev = __mlxsw_sp_ipip_netdev_ul_dev_get(ol_dev);
3388 
3389 	return ul_dev ? (ul_dev->flags & IFF_UP) : true;
3390 }
3391 
3392 static void mlxsw_sp_nexthop_ipip_init(struct mlxsw_sp *mlxsw_sp,
3393 				       struct mlxsw_sp_nexthop *nh,
3394 				       struct mlxsw_sp_ipip_entry *ipip_entry)
3395 {
3396 	bool removing;
3397 
3398 	if (!nh->nh_grp->gateway || nh->ipip_entry)
3399 		return;
3400 
3401 	nh->ipip_entry = ipip_entry;
3402 	removing = !mlxsw_sp_ipip_netdev_ul_up(ipip_entry->ol_dev);
3403 	__mlxsw_sp_nexthop_neigh_update(nh, removing);
3404 	mlxsw_sp_nexthop_rif_init(nh, &ipip_entry->ol_lb->common);
3405 }
3406 
3407 static void mlxsw_sp_nexthop_ipip_fini(struct mlxsw_sp *mlxsw_sp,
3408 				       struct mlxsw_sp_nexthop *nh)
3409 {
3410 	struct mlxsw_sp_ipip_entry *ipip_entry = nh->ipip_entry;
3411 
3412 	if (!ipip_entry)
3413 		return;
3414 
3415 	__mlxsw_sp_nexthop_neigh_update(nh, true);
3416 	nh->ipip_entry = NULL;
3417 }
3418 
3419 static bool mlxsw_sp_nexthop4_ipip_type(const struct mlxsw_sp *mlxsw_sp,
3420 					const struct fib_nh *fib_nh,
3421 					enum mlxsw_sp_ipip_type *p_ipipt)
3422 {
3423 	struct net_device *dev = fib_nh->nh_dev;
3424 
3425 	return dev &&
3426 	       fib_nh->nh_parent->fib_type == RTN_UNICAST &&
3427 	       mlxsw_sp_netdev_ipip_type(mlxsw_sp, dev, p_ipipt);
3428 }
3429 
3430 static void mlxsw_sp_nexthop_type_fini(struct mlxsw_sp *mlxsw_sp,
3431 				       struct mlxsw_sp_nexthop *nh)
3432 {
3433 	switch (nh->type) {
3434 	case MLXSW_SP_NEXTHOP_TYPE_ETH:
3435 		mlxsw_sp_nexthop_neigh_fini(mlxsw_sp, nh);
3436 		mlxsw_sp_nexthop_rif_fini(nh);
3437 		break;
3438 	case MLXSW_SP_NEXTHOP_TYPE_IPIP:
3439 		mlxsw_sp_nexthop_rif_fini(nh);
3440 		mlxsw_sp_nexthop_ipip_fini(mlxsw_sp, nh);
3441 		break;
3442 	}
3443 }
3444 
3445 static int mlxsw_sp_nexthop4_type_init(struct mlxsw_sp *mlxsw_sp,
3446 				       struct mlxsw_sp_nexthop *nh,
3447 				       struct fib_nh *fib_nh)
3448 {
3449 	const struct mlxsw_sp_ipip_ops *ipip_ops;
3450 	struct net_device *dev = fib_nh->nh_dev;
3451 	struct mlxsw_sp_ipip_entry *ipip_entry;
3452 	struct mlxsw_sp_rif *rif;
3453 	int err;
3454 
3455 	ipip_entry = mlxsw_sp_ipip_entry_find_by_ol_dev(mlxsw_sp, dev);
3456 	if (ipip_entry) {
3457 		ipip_ops = mlxsw_sp->router->ipip_ops_arr[ipip_entry->ipipt];
3458 		if (ipip_ops->can_offload(mlxsw_sp, dev,
3459 					  MLXSW_SP_L3_PROTO_IPV4)) {
3460 			nh->type = MLXSW_SP_NEXTHOP_TYPE_IPIP;
3461 			mlxsw_sp_nexthop_ipip_init(mlxsw_sp, nh, ipip_entry);
3462 			return 0;
3463 		}
3464 	}
3465 
3466 	nh->type = MLXSW_SP_NEXTHOP_TYPE_ETH;
3467 	rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev);
3468 	if (!rif)
3469 		return 0;
3470 
3471 	mlxsw_sp_nexthop_rif_init(nh, rif);
3472 	err = mlxsw_sp_nexthop_neigh_init(mlxsw_sp, nh);
3473 	if (err)
3474 		goto err_neigh_init;
3475 
3476 	return 0;
3477 
3478 err_neigh_init:
3479 	mlxsw_sp_nexthop_rif_fini(nh);
3480 	return err;
3481 }
3482 
3483 static void mlxsw_sp_nexthop4_type_fini(struct mlxsw_sp *mlxsw_sp,
3484 					struct mlxsw_sp_nexthop *nh)
3485 {
3486 	mlxsw_sp_nexthop_type_fini(mlxsw_sp, nh);
3487 }
3488 
3489 static int mlxsw_sp_nexthop4_init(struct mlxsw_sp *mlxsw_sp,
3490 				  struct mlxsw_sp_nexthop_group *nh_grp,
3491 				  struct mlxsw_sp_nexthop *nh,
3492 				  struct fib_nh *fib_nh)
3493 {
3494 	struct net_device *dev = fib_nh->nh_dev;
3495 	struct in_device *in_dev;
3496 	int err;
3497 
3498 	nh->nh_grp = nh_grp;
3499 	nh->key.fib_nh = fib_nh;
3500 #ifdef CONFIG_IP_ROUTE_MULTIPATH
3501 	nh->nh_weight = fib_nh->nh_weight;
3502 #else
3503 	nh->nh_weight = 1;
3504 #endif
3505 	memcpy(&nh->gw_addr, &fib_nh->nh_gw, sizeof(fib_nh->nh_gw));
3506 	err = mlxsw_sp_nexthop_insert(mlxsw_sp, nh);
3507 	if (err)
3508 		return err;
3509 
3510 	mlxsw_sp_nexthop_counter_alloc(mlxsw_sp, nh);
3511 	list_add_tail(&nh->router_list_node, &mlxsw_sp->router->nexthop_list);
3512 
3513 	if (!dev)
3514 		return 0;
3515 
3516 	in_dev = __in_dev_get_rtnl(dev);
3517 	if (in_dev && IN_DEV_IGNORE_ROUTES_WITH_LINKDOWN(in_dev) &&
3518 	    fib_nh->nh_flags & RTNH_F_LINKDOWN)
3519 		return 0;
3520 
3521 	err = mlxsw_sp_nexthop4_type_init(mlxsw_sp, nh, fib_nh);
3522 	if (err)
3523 		goto err_nexthop_neigh_init;
3524 
3525 	return 0;
3526 
3527 err_nexthop_neigh_init:
3528 	mlxsw_sp_nexthop_remove(mlxsw_sp, nh);
3529 	return err;
3530 }
3531 
3532 static void mlxsw_sp_nexthop4_fini(struct mlxsw_sp *mlxsw_sp,
3533 				   struct mlxsw_sp_nexthop *nh)
3534 {
3535 	mlxsw_sp_nexthop4_type_fini(mlxsw_sp, nh);
3536 	list_del(&nh->router_list_node);
3537 	mlxsw_sp_nexthop_counter_free(mlxsw_sp, nh);
3538 	mlxsw_sp_nexthop_remove(mlxsw_sp, nh);
3539 }
3540 
3541 static void mlxsw_sp_nexthop4_event(struct mlxsw_sp *mlxsw_sp,
3542 				    unsigned long event, struct fib_nh *fib_nh)
3543 {
3544 	struct mlxsw_sp_nexthop_key key;
3545 	struct mlxsw_sp_nexthop *nh;
3546 
3547 	if (mlxsw_sp->router->aborted)
3548 		return;
3549 
3550 	key.fib_nh = fib_nh;
3551 	nh = mlxsw_sp_nexthop_lookup(mlxsw_sp, key);
3552 	if (WARN_ON_ONCE(!nh))
3553 		return;
3554 
3555 	switch (event) {
3556 	case FIB_EVENT_NH_ADD:
3557 		mlxsw_sp_nexthop4_type_init(mlxsw_sp, nh, fib_nh);
3558 		break;
3559 	case FIB_EVENT_NH_DEL:
3560 		mlxsw_sp_nexthop4_type_fini(mlxsw_sp, nh);
3561 		break;
3562 	}
3563 
3564 	mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh->nh_grp);
3565 }
3566 
3567 static void mlxsw_sp_nexthop_rif_update(struct mlxsw_sp *mlxsw_sp,
3568 					struct mlxsw_sp_rif *rif)
3569 {
3570 	struct mlxsw_sp_nexthop *nh;
3571 	bool removing;
3572 
3573 	list_for_each_entry(nh, &rif->nexthop_list, rif_list_node) {
3574 		switch (nh->type) {
3575 		case MLXSW_SP_NEXTHOP_TYPE_ETH:
3576 			removing = false;
3577 			break;
3578 		case MLXSW_SP_NEXTHOP_TYPE_IPIP:
3579 			removing = !mlxsw_sp_ipip_netdev_ul_up(rif->dev);
3580 			break;
3581 		default:
3582 			WARN_ON(1);
3583 			continue;
3584 		}
3585 
3586 		__mlxsw_sp_nexthop_neigh_update(nh, removing);
3587 		mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh->nh_grp);
3588 	}
3589 }
3590 
3591 static void mlxsw_sp_nexthop_rif_migrate(struct mlxsw_sp *mlxsw_sp,
3592 					 struct mlxsw_sp_rif *old_rif,
3593 					 struct mlxsw_sp_rif *new_rif)
3594 {
3595 	struct mlxsw_sp_nexthop *nh;
3596 
3597 	list_splice_init(&old_rif->nexthop_list, &new_rif->nexthop_list);
3598 	list_for_each_entry(nh, &new_rif->nexthop_list, rif_list_node)
3599 		nh->rif = new_rif;
3600 	mlxsw_sp_nexthop_rif_update(mlxsw_sp, new_rif);
3601 }
3602 
3603 static void mlxsw_sp_nexthop_rif_gone_sync(struct mlxsw_sp *mlxsw_sp,
3604 					   struct mlxsw_sp_rif *rif)
3605 {
3606 	struct mlxsw_sp_nexthop *nh, *tmp;
3607 
3608 	list_for_each_entry_safe(nh, tmp, &rif->nexthop_list, rif_list_node) {
3609 		mlxsw_sp_nexthop_type_fini(mlxsw_sp, nh);
3610 		mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh->nh_grp);
3611 	}
3612 }
3613 
3614 static bool mlxsw_sp_fi_is_gateway(const struct mlxsw_sp *mlxsw_sp,
3615 				   const struct fib_info *fi)
3616 {
3617 	return fi->fib_nh->nh_scope == RT_SCOPE_LINK ||
3618 	       mlxsw_sp_nexthop4_ipip_type(mlxsw_sp, fi->fib_nh, NULL);
3619 }
3620 
3621 static struct mlxsw_sp_nexthop_group *
3622 mlxsw_sp_nexthop4_group_create(struct mlxsw_sp *mlxsw_sp, struct fib_info *fi)
3623 {
3624 	struct mlxsw_sp_nexthop_group *nh_grp;
3625 	struct mlxsw_sp_nexthop *nh;
3626 	struct fib_nh *fib_nh;
3627 	size_t alloc_size;
3628 	int i;
3629 	int err;
3630 
3631 	alloc_size = sizeof(*nh_grp) +
3632 		     fi->fib_nhs * sizeof(struct mlxsw_sp_nexthop);
3633 	nh_grp = kzalloc(alloc_size, GFP_KERNEL);
3634 	if (!nh_grp)
3635 		return ERR_PTR(-ENOMEM);
3636 	nh_grp->priv = fi;
3637 	INIT_LIST_HEAD(&nh_grp->fib_list);
3638 	nh_grp->neigh_tbl = &arp_tbl;
3639 
3640 	nh_grp->gateway = mlxsw_sp_fi_is_gateway(mlxsw_sp, fi);
3641 	nh_grp->count = fi->fib_nhs;
3642 	fib_info_hold(fi);
3643 	for (i = 0; i < nh_grp->count; i++) {
3644 		nh = &nh_grp->nexthops[i];
3645 		fib_nh = &fi->fib_nh[i];
3646 		err = mlxsw_sp_nexthop4_init(mlxsw_sp, nh_grp, nh, fib_nh);
3647 		if (err)
3648 			goto err_nexthop4_init;
3649 	}
3650 	err = mlxsw_sp_nexthop_group_insert(mlxsw_sp, nh_grp);
3651 	if (err)
3652 		goto err_nexthop_group_insert;
3653 	mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh_grp);
3654 	return nh_grp;
3655 
3656 err_nexthop_group_insert:
3657 err_nexthop4_init:
3658 	for (i--; i >= 0; i--) {
3659 		nh = &nh_grp->nexthops[i];
3660 		mlxsw_sp_nexthop4_fini(mlxsw_sp, nh);
3661 	}
3662 	fib_info_put(fi);
3663 	kfree(nh_grp);
3664 	return ERR_PTR(err);
3665 }
3666 
3667 static void
3668 mlxsw_sp_nexthop4_group_destroy(struct mlxsw_sp *mlxsw_sp,
3669 				struct mlxsw_sp_nexthop_group *nh_grp)
3670 {
3671 	struct mlxsw_sp_nexthop *nh;
3672 	int i;
3673 
3674 	mlxsw_sp_nexthop_group_remove(mlxsw_sp, nh_grp);
3675 	for (i = 0; i < nh_grp->count; i++) {
3676 		nh = &nh_grp->nexthops[i];
3677 		mlxsw_sp_nexthop4_fini(mlxsw_sp, nh);
3678 	}
3679 	mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh_grp);
3680 	WARN_ON_ONCE(nh_grp->adj_index_valid);
3681 	fib_info_put(mlxsw_sp_nexthop4_group_fi(nh_grp));
3682 	kfree(nh_grp);
3683 }
3684 
3685 static int mlxsw_sp_nexthop4_group_get(struct mlxsw_sp *mlxsw_sp,
3686 				       struct mlxsw_sp_fib_entry *fib_entry,
3687 				       struct fib_info *fi)
3688 {
3689 	struct mlxsw_sp_nexthop_group *nh_grp;
3690 
3691 	nh_grp = mlxsw_sp_nexthop4_group_lookup(mlxsw_sp, fi);
3692 	if (!nh_grp) {
3693 		nh_grp = mlxsw_sp_nexthop4_group_create(mlxsw_sp, fi);
3694 		if (IS_ERR(nh_grp))
3695 			return PTR_ERR(nh_grp);
3696 	}
3697 	list_add_tail(&fib_entry->nexthop_group_node, &nh_grp->fib_list);
3698 	fib_entry->nh_group = nh_grp;
3699 	return 0;
3700 }
3701 
3702 static void mlxsw_sp_nexthop4_group_put(struct mlxsw_sp *mlxsw_sp,
3703 					struct mlxsw_sp_fib_entry *fib_entry)
3704 {
3705 	struct mlxsw_sp_nexthop_group *nh_grp = fib_entry->nh_group;
3706 
3707 	list_del(&fib_entry->nexthop_group_node);
3708 	if (!list_empty(&nh_grp->fib_list))
3709 		return;
3710 	mlxsw_sp_nexthop4_group_destroy(mlxsw_sp, nh_grp);
3711 }
3712 
3713 static bool
3714 mlxsw_sp_fib4_entry_should_offload(const struct mlxsw_sp_fib_entry *fib_entry)
3715 {
3716 	struct mlxsw_sp_fib4_entry *fib4_entry;
3717 
3718 	fib4_entry = container_of(fib_entry, struct mlxsw_sp_fib4_entry,
3719 				  common);
3720 	return !fib4_entry->tos;
3721 }
3722 
3723 static bool
3724 mlxsw_sp_fib_entry_should_offload(const struct mlxsw_sp_fib_entry *fib_entry)
3725 {
3726 	struct mlxsw_sp_nexthop_group *nh_group = fib_entry->nh_group;
3727 
3728 	switch (fib_entry->fib_node->fib->proto) {
3729 	case MLXSW_SP_L3_PROTO_IPV4:
3730 		if (!mlxsw_sp_fib4_entry_should_offload(fib_entry))
3731 			return false;
3732 		break;
3733 	case MLXSW_SP_L3_PROTO_IPV6:
3734 		break;
3735 	}
3736 
3737 	switch (fib_entry->type) {
3738 	case MLXSW_SP_FIB_ENTRY_TYPE_REMOTE:
3739 		return !!nh_group->adj_index_valid;
3740 	case MLXSW_SP_FIB_ENTRY_TYPE_LOCAL:
3741 		return !!nh_group->nh_rif;
3742 	case MLXSW_SP_FIB_ENTRY_TYPE_IPIP_DECAP:
3743 		return true;
3744 	default:
3745 		return false;
3746 	}
3747 }
3748 
3749 static struct mlxsw_sp_nexthop *
3750 mlxsw_sp_rt6_nexthop(struct mlxsw_sp_nexthop_group *nh_grp,
3751 		     const struct mlxsw_sp_rt6 *mlxsw_sp_rt6)
3752 {
3753 	int i;
3754 
3755 	for (i = 0; i < nh_grp->count; i++) {
3756 		struct mlxsw_sp_nexthop *nh = &nh_grp->nexthops[i];
3757 		struct rt6_info *rt = mlxsw_sp_rt6->rt;
3758 
3759 		if (nh->rif && nh->rif->dev == rt->dst.dev &&
3760 		    ipv6_addr_equal((const struct in6_addr *) &nh->gw_addr,
3761 				    &rt->rt6i_gateway))
3762 			return nh;
3763 		continue;
3764 	}
3765 
3766 	return NULL;
3767 }
3768 
3769 static void
3770 mlxsw_sp_fib4_entry_offload_set(struct mlxsw_sp_fib_entry *fib_entry)
3771 {
3772 	struct mlxsw_sp_nexthop_group *nh_grp = fib_entry->nh_group;
3773 	int i;
3774 
3775 	if (fib_entry->type == MLXSW_SP_FIB_ENTRY_TYPE_LOCAL ||
3776 	    fib_entry->type == MLXSW_SP_FIB_ENTRY_TYPE_IPIP_DECAP) {
3777 		nh_grp->nexthops->key.fib_nh->nh_flags |= RTNH_F_OFFLOAD;
3778 		return;
3779 	}
3780 
3781 	for (i = 0; i < nh_grp->count; i++) {
3782 		struct mlxsw_sp_nexthop *nh = &nh_grp->nexthops[i];
3783 
3784 		if (nh->offloaded)
3785 			nh->key.fib_nh->nh_flags |= RTNH_F_OFFLOAD;
3786 		else
3787 			nh->key.fib_nh->nh_flags &= ~RTNH_F_OFFLOAD;
3788 	}
3789 }
3790 
3791 static void
3792 mlxsw_sp_fib4_entry_offload_unset(struct mlxsw_sp_fib_entry *fib_entry)
3793 {
3794 	struct mlxsw_sp_nexthop_group *nh_grp = fib_entry->nh_group;
3795 	int i;
3796 
3797 	if (!list_is_singular(&nh_grp->fib_list))
3798 		return;
3799 
3800 	for (i = 0; i < nh_grp->count; i++) {
3801 		struct mlxsw_sp_nexthop *nh = &nh_grp->nexthops[i];
3802 
3803 		nh->key.fib_nh->nh_flags &= ~RTNH_F_OFFLOAD;
3804 	}
3805 }
3806 
3807 static void
3808 mlxsw_sp_fib6_entry_offload_set(struct mlxsw_sp_fib_entry *fib_entry)
3809 {
3810 	struct mlxsw_sp_fib6_entry *fib6_entry;
3811 	struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
3812 
3813 	fib6_entry = container_of(fib_entry, struct mlxsw_sp_fib6_entry,
3814 				  common);
3815 
3816 	if (fib_entry->type == MLXSW_SP_FIB_ENTRY_TYPE_LOCAL) {
3817 		list_first_entry(&fib6_entry->rt6_list, struct mlxsw_sp_rt6,
3818 				 list)->rt->rt6i_nh_flags |= RTNH_F_OFFLOAD;
3819 		return;
3820 	}
3821 
3822 	list_for_each_entry(mlxsw_sp_rt6, &fib6_entry->rt6_list, list) {
3823 		struct mlxsw_sp_nexthop_group *nh_grp = fib_entry->nh_group;
3824 		struct mlxsw_sp_nexthop *nh;
3825 
3826 		nh = mlxsw_sp_rt6_nexthop(nh_grp, mlxsw_sp_rt6);
3827 		if (nh && nh->offloaded)
3828 			mlxsw_sp_rt6->rt->rt6i_nh_flags |= RTNH_F_OFFLOAD;
3829 		else
3830 			mlxsw_sp_rt6->rt->rt6i_nh_flags &= ~RTNH_F_OFFLOAD;
3831 	}
3832 }
3833 
3834 static void
3835 mlxsw_sp_fib6_entry_offload_unset(struct mlxsw_sp_fib_entry *fib_entry)
3836 {
3837 	struct mlxsw_sp_fib6_entry *fib6_entry;
3838 	struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
3839 
3840 	fib6_entry = container_of(fib_entry, struct mlxsw_sp_fib6_entry,
3841 				  common);
3842 	list_for_each_entry(mlxsw_sp_rt6, &fib6_entry->rt6_list, list) {
3843 		struct rt6_info *rt = mlxsw_sp_rt6->rt;
3844 
3845 		rt->rt6i_nh_flags &= ~RTNH_F_OFFLOAD;
3846 	}
3847 }
3848 
3849 static void mlxsw_sp_fib_entry_offload_set(struct mlxsw_sp_fib_entry *fib_entry)
3850 {
3851 	switch (fib_entry->fib_node->fib->proto) {
3852 	case MLXSW_SP_L3_PROTO_IPV4:
3853 		mlxsw_sp_fib4_entry_offload_set(fib_entry);
3854 		break;
3855 	case MLXSW_SP_L3_PROTO_IPV6:
3856 		mlxsw_sp_fib6_entry_offload_set(fib_entry);
3857 		break;
3858 	}
3859 }
3860 
3861 static void
3862 mlxsw_sp_fib_entry_offload_unset(struct mlxsw_sp_fib_entry *fib_entry)
3863 {
3864 	switch (fib_entry->fib_node->fib->proto) {
3865 	case MLXSW_SP_L3_PROTO_IPV4:
3866 		mlxsw_sp_fib4_entry_offload_unset(fib_entry);
3867 		break;
3868 	case MLXSW_SP_L3_PROTO_IPV6:
3869 		mlxsw_sp_fib6_entry_offload_unset(fib_entry);
3870 		break;
3871 	}
3872 }
3873 
3874 static void
3875 mlxsw_sp_fib_entry_offload_refresh(struct mlxsw_sp_fib_entry *fib_entry,
3876 				   enum mlxsw_reg_ralue_op op, int err)
3877 {
3878 	switch (op) {
3879 	case MLXSW_REG_RALUE_OP_WRITE_DELETE:
3880 		return mlxsw_sp_fib_entry_offload_unset(fib_entry);
3881 	case MLXSW_REG_RALUE_OP_WRITE_WRITE:
3882 		if (err)
3883 			return;
3884 		if (mlxsw_sp_fib_entry_should_offload(fib_entry))
3885 			mlxsw_sp_fib_entry_offload_set(fib_entry);
3886 		else
3887 			mlxsw_sp_fib_entry_offload_unset(fib_entry);
3888 		return;
3889 	default:
3890 		return;
3891 	}
3892 }
3893 
3894 static void
3895 mlxsw_sp_fib_entry_ralue_pack(char *ralue_pl,
3896 			      const struct mlxsw_sp_fib_entry *fib_entry,
3897 			      enum mlxsw_reg_ralue_op op)
3898 {
3899 	struct mlxsw_sp_fib *fib = fib_entry->fib_node->fib;
3900 	enum mlxsw_reg_ralxx_protocol proto;
3901 	u32 *p_dip;
3902 
3903 	proto = (enum mlxsw_reg_ralxx_protocol) fib->proto;
3904 
3905 	switch (fib->proto) {
3906 	case MLXSW_SP_L3_PROTO_IPV4:
3907 		p_dip = (u32 *) fib_entry->fib_node->key.addr;
3908 		mlxsw_reg_ralue_pack4(ralue_pl, proto, op, fib->vr->id,
3909 				      fib_entry->fib_node->key.prefix_len,
3910 				      *p_dip);
3911 		break;
3912 	case MLXSW_SP_L3_PROTO_IPV6:
3913 		mlxsw_reg_ralue_pack6(ralue_pl, proto, op, fib->vr->id,
3914 				      fib_entry->fib_node->key.prefix_len,
3915 				      fib_entry->fib_node->key.addr);
3916 		break;
3917 	}
3918 }
3919 
3920 static int mlxsw_sp_fib_entry_op_remote(struct mlxsw_sp *mlxsw_sp,
3921 					struct mlxsw_sp_fib_entry *fib_entry,
3922 					enum mlxsw_reg_ralue_op op)
3923 {
3924 	char ralue_pl[MLXSW_REG_RALUE_LEN];
3925 	enum mlxsw_reg_ralue_trap_action trap_action;
3926 	u16 trap_id = 0;
3927 	u32 adjacency_index = 0;
3928 	u16 ecmp_size = 0;
3929 
3930 	/* In case the nexthop group adjacency index is valid, use it
3931 	 * with provided ECMP size. Otherwise, setup trap and pass
3932 	 * traffic to kernel.
3933 	 */
3934 	if (mlxsw_sp_fib_entry_should_offload(fib_entry)) {
3935 		trap_action = MLXSW_REG_RALUE_TRAP_ACTION_NOP;
3936 		adjacency_index = fib_entry->nh_group->adj_index;
3937 		ecmp_size = fib_entry->nh_group->ecmp_size;
3938 	} else {
3939 		trap_action = MLXSW_REG_RALUE_TRAP_ACTION_TRAP;
3940 		trap_id = MLXSW_TRAP_ID_RTR_INGRESS0;
3941 	}
3942 
3943 	mlxsw_sp_fib_entry_ralue_pack(ralue_pl, fib_entry, op);
3944 	mlxsw_reg_ralue_act_remote_pack(ralue_pl, trap_action, trap_id,
3945 					adjacency_index, ecmp_size);
3946 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralue), ralue_pl);
3947 }
3948 
3949 static int mlxsw_sp_fib_entry_op_local(struct mlxsw_sp *mlxsw_sp,
3950 				       struct mlxsw_sp_fib_entry *fib_entry,
3951 				       enum mlxsw_reg_ralue_op op)
3952 {
3953 	struct mlxsw_sp_rif *rif = fib_entry->nh_group->nh_rif;
3954 	enum mlxsw_reg_ralue_trap_action trap_action;
3955 	char ralue_pl[MLXSW_REG_RALUE_LEN];
3956 	u16 trap_id = 0;
3957 	u16 rif_index = 0;
3958 
3959 	if (mlxsw_sp_fib_entry_should_offload(fib_entry)) {
3960 		trap_action = MLXSW_REG_RALUE_TRAP_ACTION_NOP;
3961 		rif_index = rif->rif_index;
3962 	} else {
3963 		trap_action = MLXSW_REG_RALUE_TRAP_ACTION_TRAP;
3964 		trap_id = MLXSW_TRAP_ID_RTR_INGRESS0;
3965 	}
3966 
3967 	mlxsw_sp_fib_entry_ralue_pack(ralue_pl, fib_entry, op);
3968 	mlxsw_reg_ralue_act_local_pack(ralue_pl, trap_action, trap_id,
3969 				       rif_index);
3970 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralue), ralue_pl);
3971 }
3972 
3973 static int mlxsw_sp_fib_entry_op_trap(struct mlxsw_sp *mlxsw_sp,
3974 				      struct mlxsw_sp_fib_entry *fib_entry,
3975 				      enum mlxsw_reg_ralue_op op)
3976 {
3977 	char ralue_pl[MLXSW_REG_RALUE_LEN];
3978 
3979 	mlxsw_sp_fib_entry_ralue_pack(ralue_pl, fib_entry, op);
3980 	mlxsw_reg_ralue_act_ip2me_pack(ralue_pl);
3981 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralue), ralue_pl);
3982 }
3983 
3984 static int
3985 mlxsw_sp_fib_entry_op_ipip_decap(struct mlxsw_sp *mlxsw_sp,
3986 				 struct mlxsw_sp_fib_entry *fib_entry,
3987 				 enum mlxsw_reg_ralue_op op)
3988 {
3989 	struct mlxsw_sp_ipip_entry *ipip_entry = fib_entry->decap.ipip_entry;
3990 	const struct mlxsw_sp_ipip_ops *ipip_ops;
3991 
3992 	if (WARN_ON(!ipip_entry))
3993 		return -EINVAL;
3994 
3995 	ipip_ops = mlxsw_sp->router->ipip_ops_arr[ipip_entry->ipipt];
3996 	return ipip_ops->fib_entry_op(mlxsw_sp, ipip_entry, op,
3997 				      fib_entry->decap.tunnel_index);
3998 }
3999 
4000 static int __mlxsw_sp_fib_entry_op(struct mlxsw_sp *mlxsw_sp,
4001 				   struct mlxsw_sp_fib_entry *fib_entry,
4002 				   enum mlxsw_reg_ralue_op op)
4003 {
4004 	switch (fib_entry->type) {
4005 	case MLXSW_SP_FIB_ENTRY_TYPE_REMOTE:
4006 		return mlxsw_sp_fib_entry_op_remote(mlxsw_sp, fib_entry, op);
4007 	case MLXSW_SP_FIB_ENTRY_TYPE_LOCAL:
4008 		return mlxsw_sp_fib_entry_op_local(mlxsw_sp, fib_entry, op);
4009 	case MLXSW_SP_FIB_ENTRY_TYPE_TRAP:
4010 		return mlxsw_sp_fib_entry_op_trap(mlxsw_sp, fib_entry, op);
4011 	case MLXSW_SP_FIB_ENTRY_TYPE_IPIP_DECAP:
4012 		return mlxsw_sp_fib_entry_op_ipip_decap(mlxsw_sp,
4013 							fib_entry, op);
4014 	}
4015 	return -EINVAL;
4016 }
4017 
4018 static int mlxsw_sp_fib_entry_op(struct mlxsw_sp *mlxsw_sp,
4019 				 struct mlxsw_sp_fib_entry *fib_entry,
4020 				 enum mlxsw_reg_ralue_op op)
4021 {
4022 	int err = __mlxsw_sp_fib_entry_op(mlxsw_sp, fib_entry, op);
4023 
4024 	mlxsw_sp_fib_entry_offload_refresh(fib_entry, op, err);
4025 
4026 	return err;
4027 }
4028 
4029 static int mlxsw_sp_fib_entry_update(struct mlxsw_sp *mlxsw_sp,
4030 				     struct mlxsw_sp_fib_entry *fib_entry)
4031 {
4032 	return mlxsw_sp_fib_entry_op(mlxsw_sp, fib_entry,
4033 				     MLXSW_REG_RALUE_OP_WRITE_WRITE);
4034 }
4035 
4036 static int mlxsw_sp_fib_entry_del(struct mlxsw_sp *mlxsw_sp,
4037 				  struct mlxsw_sp_fib_entry *fib_entry)
4038 {
4039 	return mlxsw_sp_fib_entry_op(mlxsw_sp, fib_entry,
4040 				     MLXSW_REG_RALUE_OP_WRITE_DELETE);
4041 }
4042 
4043 static int
4044 mlxsw_sp_fib4_entry_type_set(struct mlxsw_sp *mlxsw_sp,
4045 			     const struct fib_entry_notifier_info *fen_info,
4046 			     struct mlxsw_sp_fib_entry *fib_entry)
4047 {
4048 	union mlxsw_sp_l3addr dip = { .addr4 = htonl(fen_info->dst) };
4049 	struct net_device *dev = fen_info->fi->fib_dev;
4050 	struct mlxsw_sp_ipip_entry *ipip_entry;
4051 	struct fib_info *fi = fen_info->fi;
4052 
4053 	switch (fen_info->type) {
4054 	case RTN_LOCAL:
4055 		ipip_entry = mlxsw_sp_ipip_entry_find_by_decap(mlxsw_sp, dev,
4056 						 MLXSW_SP_L3_PROTO_IPV4, dip);
4057 		if (ipip_entry && ipip_entry->ol_dev->flags & IFF_UP) {
4058 			fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_IPIP_DECAP;
4059 			return mlxsw_sp_fib_entry_decap_init(mlxsw_sp,
4060 							     fib_entry,
4061 							     ipip_entry);
4062 		}
4063 		/* fall through */
4064 	case RTN_BROADCAST:
4065 		fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_TRAP;
4066 		return 0;
4067 	case RTN_UNREACHABLE: /* fall through */
4068 	case RTN_BLACKHOLE: /* fall through */
4069 	case RTN_PROHIBIT:
4070 		/* Packets hitting these routes need to be trapped, but
4071 		 * can do so with a lower priority than packets directed
4072 		 * at the host, so use action type local instead of trap.
4073 		 */
4074 		fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_LOCAL;
4075 		return 0;
4076 	case RTN_UNICAST:
4077 		if (mlxsw_sp_fi_is_gateway(mlxsw_sp, fi))
4078 			fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_REMOTE;
4079 		else
4080 			fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_LOCAL;
4081 		return 0;
4082 	default:
4083 		return -EINVAL;
4084 	}
4085 }
4086 
4087 static struct mlxsw_sp_fib4_entry *
4088 mlxsw_sp_fib4_entry_create(struct mlxsw_sp *mlxsw_sp,
4089 			   struct mlxsw_sp_fib_node *fib_node,
4090 			   const struct fib_entry_notifier_info *fen_info)
4091 {
4092 	struct mlxsw_sp_fib4_entry *fib4_entry;
4093 	struct mlxsw_sp_fib_entry *fib_entry;
4094 	int err;
4095 
4096 	fib4_entry = kzalloc(sizeof(*fib4_entry), GFP_KERNEL);
4097 	if (!fib4_entry)
4098 		return ERR_PTR(-ENOMEM);
4099 	fib_entry = &fib4_entry->common;
4100 
4101 	err = mlxsw_sp_fib4_entry_type_set(mlxsw_sp, fen_info, fib_entry);
4102 	if (err)
4103 		goto err_fib4_entry_type_set;
4104 
4105 	err = mlxsw_sp_nexthop4_group_get(mlxsw_sp, fib_entry, fen_info->fi);
4106 	if (err)
4107 		goto err_nexthop4_group_get;
4108 
4109 	fib4_entry->prio = fen_info->fi->fib_priority;
4110 	fib4_entry->tb_id = fen_info->tb_id;
4111 	fib4_entry->type = fen_info->type;
4112 	fib4_entry->tos = fen_info->tos;
4113 
4114 	fib_entry->fib_node = fib_node;
4115 
4116 	return fib4_entry;
4117 
4118 err_nexthop4_group_get:
4119 err_fib4_entry_type_set:
4120 	kfree(fib4_entry);
4121 	return ERR_PTR(err);
4122 }
4123 
4124 static void mlxsw_sp_fib4_entry_destroy(struct mlxsw_sp *mlxsw_sp,
4125 					struct mlxsw_sp_fib4_entry *fib4_entry)
4126 {
4127 	mlxsw_sp_nexthop4_group_put(mlxsw_sp, &fib4_entry->common);
4128 	kfree(fib4_entry);
4129 }
4130 
4131 static struct mlxsw_sp_fib4_entry *
4132 mlxsw_sp_fib4_entry_lookup(struct mlxsw_sp *mlxsw_sp,
4133 			   const struct fib_entry_notifier_info *fen_info)
4134 {
4135 	struct mlxsw_sp_fib4_entry *fib4_entry;
4136 	struct mlxsw_sp_fib_node *fib_node;
4137 	struct mlxsw_sp_fib *fib;
4138 	struct mlxsw_sp_vr *vr;
4139 
4140 	vr = mlxsw_sp_vr_find(mlxsw_sp, fen_info->tb_id);
4141 	if (!vr)
4142 		return NULL;
4143 	fib = mlxsw_sp_vr_fib(vr, MLXSW_SP_L3_PROTO_IPV4);
4144 
4145 	fib_node = mlxsw_sp_fib_node_lookup(fib, &fen_info->dst,
4146 					    sizeof(fen_info->dst),
4147 					    fen_info->dst_len);
4148 	if (!fib_node)
4149 		return NULL;
4150 
4151 	list_for_each_entry(fib4_entry, &fib_node->entry_list, common.list) {
4152 		if (fib4_entry->tb_id == fen_info->tb_id &&
4153 		    fib4_entry->tos == fen_info->tos &&
4154 		    fib4_entry->type == fen_info->type &&
4155 		    mlxsw_sp_nexthop4_group_fi(fib4_entry->common.nh_group) ==
4156 		    fen_info->fi) {
4157 			return fib4_entry;
4158 		}
4159 	}
4160 
4161 	return NULL;
4162 }
4163 
4164 static const struct rhashtable_params mlxsw_sp_fib_ht_params = {
4165 	.key_offset = offsetof(struct mlxsw_sp_fib_node, key),
4166 	.head_offset = offsetof(struct mlxsw_sp_fib_node, ht_node),
4167 	.key_len = sizeof(struct mlxsw_sp_fib_key),
4168 	.automatic_shrinking = true,
4169 };
4170 
4171 static int mlxsw_sp_fib_node_insert(struct mlxsw_sp_fib *fib,
4172 				    struct mlxsw_sp_fib_node *fib_node)
4173 {
4174 	return rhashtable_insert_fast(&fib->ht, &fib_node->ht_node,
4175 				      mlxsw_sp_fib_ht_params);
4176 }
4177 
4178 static void mlxsw_sp_fib_node_remove(struct mlxsw_sp_fib *fib,
4179 				     struct mlxsw_sp_fib_node *fib_node)
4180 {
4181 	rhashtable_remove_fast(&fib->ht, &fib_node->ht_node,
4182 			       mlxsw_sp_fib_ht_params);
4183 }
4184 
4185 static struct mlxsw_sp_fib_node *
4186 mlxsw_sp_fib_node_lookup(struct mlxsw_sp_fib *fib, const void *addr,
4187 			 size_t addr_len, unsigned char prefix_len)
4188 {
4189 	struct mlxsw_sp_fib_key key;
4190 
4191 	memset(&key, 0, sizeof(key));
4192 	memcpy(key.addr, addr, addr_len);
4193 	key.prefix_len = prefix_len;
4194 	return rhashtable_lookup_fast(&fib->ht, &key, mlxsw_sp_fib_ht_params);
4195 }
4196 
4197 static struct mlxsw_sp_fib_node *
4198 mlxsw_sp_fib_node_create(struct mlxsw_sp_fib *fib, const void *addr,
4199 			 size_t addr_len, unsigned char prefix_len)
4200 {
4201 	struct mlxsw_sp_fib_node *fib_node;
4202 
4203 	fib_node = kzalloc(sizeof(*fib_node), GFP_KERNEL);
4204 	if (!fib_node)
4205 		return NULL;
4206 
4207 	INIT_LIST_HEAD(&fib_node->entry_list);
4208 	list_add(&fib_node->list, &fib->node_list);
4209 	memcpy(fib_node->key.addr, addr, addr_len);
4210 	fib_node->key.prefix_len = prefix_len;
4211 
4212 	return fib_node;
4213 }
4214 
4215 static void mlxsw_sp_fib_node_destroy(struct mlxsw_sp_fib_node *fib_node)
4216 {
4217 	list_del(&fib_node->list);
4218 	WARN_ON(!list_empty(&fib_node->entry_list));
4219 	kfree(fib_node);
4220 }
4221 
4222 static bool
4223 mlxsw_sp_fib_node_entry_is_first(const struct mlxsw_sp_fib_node *fib_node,
4224 				 const struct mlxsw_sp_fib_entry *fib_entry)
4225 {
4226 	return list_first_entry(&fib_node->entry_list,
4227 				struct mlxsw_sp_fib_entry, list) == fib_entry;
4228 }
4229 
4230 static int mlxsw_sp_fib_lpm_tree_link(struct mlxsw_sp *mlxsw_sp,
4231 				      struct mlxsw_sp_fib_node *fib_node)
4232 {
4233 	struct mlxsw_sp_prefix_usage req_prefix_usage;
4234 	struct mlxsw_sp_fib *fib = fib_node->fib;
4235 	struct mlxsw_sp_lpm_tree *lpm_tree;
4236 	int err;
4237 
4238 	lpm_tree = mlxsw_sp->router->lpm.proto_trees[fib->proto];
4239 	if (lpm_tree->prefix_ref_count[fib_node->key.prefix_len] != 0)
4240 		goto out;
4241 
4242 	mlxsw_sp_prefix_usage_cpy(&req_prefix_usage, &lpm_tree->prefix_usage);
4243 	mlxsw_sp_prefix_usage_set(&req_prefix_usage, fib_node->key.prefix_len);
4244 	lpm_tree = mlxsw_sp_lpm_tree_get(mlxsw_sp, &req_prefix_usage,
4245 					 fib->proto);
4246 	if (IS_ERR(lpm_tree))
4247 		return PTR_ERR(lpm_tree);
4248 
4249 	err = mlxsw_sp_vrs_lpm_tree_replace(mlxsw_sp, fib, lpm_tree);
4250 	if (err)
4251 		goto err_lpm_tree_replace;
4252 
4253 out:
4254 	lpm_tree->prefix_ref_count[fib_node->key.prefix_len]++;
4255 	return 0;
4256 
4257 err_lpm_tree_replace:
4258 	mlxsw_sp_lpm_tree_put(mlxsw_sp, lpm_tree);
4259 	return err;
4260 }
4261 
4262 static void mlxsw_sp_fib_lpm_tree_unlink(struct mlxsw_sp *mlxsw_sp,
4263 					 struct mlxsw_sp_fib_node *fib_node)
4264 {
4265 	struct mlxsw_sp_lpm_tree *lpm_tree = fib_node->fib->lpm_tree;
4266 	struct mlxsw_sp_prefix_usage req_prefix_usage;
4267 	struct mlxsw_sp_fib *fib = fib_node->fib;
4268 	int err;
4269 
4270 	if (--lpm_tree->prefix_ref_count[fib_node->key.prefix_len] != 0)
4271 		return;
4272 	/* Try to construct a new LPM tree from the current prefix usage
4273 	 * minus the unused one. If we fail, continue using the old one.
4274 	 */
4275 	mlxsw_sp_prefix_usage_cpy(&req_prefix_usage, &lpm_tree->prefix_usage);
4276 	mlxsw_sp_prefix_usage_clear(&req_prefix_usage,
4277 				    fib_node->key.prefix_len);
4278 	lpm_tree = mlxsw_sp_lpm_tree_get(mlxsw_sp, &req_prefix_usage,
4279 					 fib->proto);
4280 	if (IS_ERR(lpm_tree))
4281 		return;
4282 
4283 	err = mlxsw_sp_vrs_lpm_tree_replace(mlxsw_sp, fib, lpm_tree);
4284 	if (err)
4285 		goto err_lpm_tree_replace;
4286 
4287 	return;
4288 
4289 err_lpm_tree_replace:
4290 	mlxsw_sp_lpm_tree_put(mlxsw_sp, lpm_tree);
4291 }
4292 
4293 static int mlxsw_sp_fib_node_init(struct mlxsw_sp *mlxsw_sp,
4294 				  struct mlxsw_sp_fib_node *fib_node,
4295 				  struct mlxsw_sp_fib *fib)
4296 {
4297 	int err;
4298 
4299 	err = mlxsw_sp_fib_node_insert(fib, fib_node);
4300 	if (err)
4301 		return err;
4302 	fib_node->fib = fib;
4303 
4304 	err = mlxsw_sp_fib_lpm_tree_link(mlxsw_sp, fib_node);
4305 	if (err)
4306 		goto err_fib_lpm_tree_link;
4307 
4308 	return 0;
4309 
4310 err_fib_lpm_tree_link:
4311 	fib_node->fib = NULL;
4312 	mlxsw_sp_fib_node_remove(fib, fib_node);
4313 	return err;
4314 }
4315 
4316 static void mlxsw_sp_fib_node_fini(struct mlxsw_sp *mlxsw_sp,
4317 				   struct mlxsw_sp_fib_node *fib_node)
4318 {
4319 	struct mlxsw_sp_fib *fib = fib_node->fib;
4320 
4321 	mlxsw_sp_fib_lpm_tree_unlink(mlxsw_sp, fib_node);
4322 	fib_node->fib = NULL;
4323 	mlxsw_sp_fib_node_remove(fib, fib_node);
4324 }
4325 
4326 static struct mlxsw_sp_fib_node *
4327 mlxsw_sp_fib_node_get(struct mlxsw_sp *mlxsw_sp, u32 tb_id, const void *addr,
4328 		      size_t addr_len, unsigned char prefix_len,
4329 		      enum mlxsw_sp_l3proto proto)
4330 {
4331 	struct mlxsw_sp_fib_node *fib_node;
4332 	struct mlxsw_sp_fib *fib;
4333 	struct mlxsw_sp_vr *vr;
4334 	int err;
4335 
4336 	vr = mlxsw_sp_vr_get(mlxsw_sp, tb_id, NULL);
4337 	if (IS_ERR(vr))
4338 		return ERR_CAST(vr);
4339 	fib = mlxsw_sp_vr_fib(vr, proto);
4340 
4341 	fib_node = mlxsw_sp_fib_node_lookup(fib, addr, addr_len, prefix_len);
4342 	if (fib_node)
4343 		return fib_node;
4344 
4345 	fib_node = mlxsw_sp_fib_node_create(fib, addr, addr_len, prefix_len);
4346 	if (!fib_node) {
4347 		err = -ENOMEM;
4348 		goto err_fib_node_create;
4349 	}
4350 
4351 	err = mlxsw_sp_fib_node_init(mlxsw_sp, fib_node, fib);
4352 	if (err)
4353 		goto err_fib_node_init;
4354 
4355 	return fib_node;
4356 
4357 err_fib_node_init:
4358 	mlxsw_sp_fib_node_destroy(fib_node);
4359 err_fib_node_create:
4360 	mlxsw_sp_vr_put(mlxsw_sp, vr);
4361 	return ERR_PTR(err);
4362 }
4363 
4364 static void mlxsw_sp_fib_node_put(struct mlxsw_sp *mlxsw_sp,
4365 				  struct mlxsw_sp_fib_node *fib_node)
4366 {
4367 	struct mlxsw_sp_vr *vr = fib_node->fib->vr;
4368 
4369 	if (!list_empty(&fib_node->entry_list))
4370 		return;
4371 	mlxsw_sp_fib_node_fini(mlxsw_sp, fib_node);
4372 	mlxsw_sp_fib_node_destroy(fib_node);
4373 	mlxsw_sp_vr_put(mlxsw_sp, vr);
4374 }
4375 
4376 static struct mlxsw_sp_fib4_entry *
4377 mlxsw_sp_fib4_node_entry_find(const struct mlxsw_sp_fib_node *fib_node,
4378 			      const struct mlxsw_sp_fib4_entry *new4_entry)
4379 {
4380 	struct mlxsw_sp_fib4_entry *fib4_entry;
4381 
4382 	list_for_each_entry(fib4_entry, &fib_node->entry_list, common.list) {
4383 		if (fib4_entry->tb_id > new4_entry->tb_id)
4384 			continue;
4385 		if (fib4_entry->tb_id != new4_entry->tb_id)
4386 			break;
4387 		if (fib4_entry->tos > new4_entry->tos)
4388 			continue;
4389 		if (fib4_entry->prio >= new4_entry->prio ||
4390 		    fib4_entry->tos < new4_entry->tos)
4391 			return fib4_entry;
4392 	}
4393 
4394 	return NULL;
4395 }
4396 
4397 static int
4398 mlxsw_sp_fib4_node_list_append(struct mlxsw_sp_fib4_entry *fib4_entry,
4399 			       struct mlxsw_sp_fib4_entry *new4_entry)
4400 {
4401 	struct mlxsw_sp_fib_node *fib_node;
4402 
4403 	if (WARN_ON(!fib4_entry))
4404 		return -EINVAL;
4405 
4406 	fib_node = fib4_entry->common.fib_node;
4407 	list_for_each_entry_from(fib4_entry, &fib_node->entry_list,
4408 				 common.list) {
4409 		if (fib4_entry->tb_id != new4_entry->tb_id ||
4410 		    fib4_entry->tos != new4_entry->tos ||
4411 		    fib4_entry->prio != new4_entry->prio)
4412 			break;
4413 	}
4414 
4415 	list_add_tail(&new4_entry->common.list, &fib4_entry->common.list);
4416 	return 0;
4417 }
4418 
4419 static int
4420 mlxsw_sp_fib4_node_list_insert(struct mlxsw_sp_fib4_entry *new4_entry,
4421 			       bool replace, bool append)
4422 {
4423 	struct mlxsw_sp_fib_node *fib_node = new4_entry->common.fib_node;
4424 	struct mlxsw_sp_fib4_entry *fib4_entry;
4425 
4426 	fib4_entry = mlxsw_sp_fib4_node_entry_find(fib_node, new4_entry);
4427 
4428 	if (append)
4429 		return mlxsw_sp_fib4_node_list_append(fib4_entry, new4_entry);
4430 	if (replace && WARN_ON(!fib4_entry))
4431 		return -EINVAL;
4432 
4433 	/* Insert new entry before replaced one, so that we can later
4434 	 * remove the second.
4435 	 */
4436 	if (fib4_entry) {
4437 		list_add_tail(&new4_entry->common.list,
4438 			      &fib4_entry->common.list);
4439 	} else {
4440 		struct mlxsw_sp_fib4_entry *last;
4441 
4442 		list_for_each_entry(last, &fib_node->entry_list, common.list) {
4443 			if (new4_entry->tb_id > last->tb_id)
4444 				break;
4445 			fib4_entry = last;
4446 		}
4447 
4448 		if (fib4_entry)
4449 			list_add(&new4_entry->common.list,
4450 				 &fib4_entry->common.list);
4451 		else
4452 			list_add(&new4_entry->common.list,
4453 				 &fib_node->entry_list);
4454 	}
4455 
4456 	return 0;
4457 }
4458 
4459 static void
4460 mlxsw_sp_fib4_node_list_remove(struct mlxsw_sp_fib4_entry *fib4_entry)
4461 {
4462 	list_del(&fib4_entry->common.list);
4463 }
4464 
4465 static int mlxsw_sp_fib_node_entry_add(struct mlxsw_sp *mlxsw_sp,
4466 				       struct mlxsw_sp_fib_entry *fib_entry)
4467 {
4468 	struct mlxsw_sp_fib_node *fib_node = fib_entry->fib_node;
4469 
4470 	if (!mlxsw_sp_fib_node_entry_is_first(fib_node, fib_entry))
4471 		return 0;
4472 
4473 	/* To prevent packet loss, overwrite the previously offloaded
4474 	 * entry.
4475 	 */
4476 	if (!list_is_singular(&fib_node->entry_list)) {
4477 		enum mlxsw_reg_ralue_op op = MLXSW_REG_RALUE_OP_WRITE_DELETE;
4478 		struct mlxsw_sp_fib_entry *n = list_next_entry(fib_entry, list);
4479 
4480 		mlxsw_sp_fib_entry_offload_refresh(n, op, 0);
4481 	}
4482 
4483 	return mlxsw_sp_fib_entry_update(mlxsw_sp, fib_entry);
4484 }
4485 
4486 static void mlxsw_sp_fib_node_entry_del(struct mlxsw_sp *mlxsw_sp,
4487 					struct mlxsw_sp_fib_entry *fib_entry)
4488 {
4489 	struct mlxsw_sp_fib_node *fib_node = fib_entry->fib_node;
4490 
4491 	if (!mlxsw_sp_fib_node_entry_is_first(fib_node, fib_entry))
4492 		return;
4493 
4494 	/* Promote the next entry by overwriting the deleted entry */
4495 	if (!list_is_singular(&fib_node->entry_list)) {
4496 		struct mlxsw_sp_fib_entry *n = list_next_entry(fib_entry, list);
4497 		enum mlxsw_reg_ralue_op op = MLXSW_REG_RALUE_OP_WRITE_DELETE;
4498 
4499 		mlxsw_sp_fib_entry_update(mlxsw_sp, n);
4500 		mlxsw_sp_fib_entry_offload_refresh(fib_entry, op, 0);
4501 		return;
4502 	}
4503 
4504 	mlxsw_sp_fib_entry_del(mlxsw_sp, fib_entry);
4505 }
4506 
4507 static int mlxsw_sp_fib4_node_entry_link(struct mlxsw_sp *mlxsw_sp,
4508 					 struct mlxsw_sp_fib4_entry *fib4_entry,
4509 					 bool replace, bool append)
4510 {
4511 	int err;
4512 
4513 	err = mlxsw_sp_fib4_node_list_insert(fib4_entry, replace, append);
4514 	if (err)
4515 		return err;
4516 
4517 	err = mlxsw_sp_fib_node_entry_add(mlxsw_sp, &fib4_entry->common);
4518 	if (err)
4519 		goto err_fib_node_entry_add;
4520 
4521 	return 0;
4522 
4523 err_fib_node_entry_add:
4524 	mlxsw_sp_fib4_node_list_remove(fib4_entry);
4525 	return err;
4526 }
4527 
4528 static void
4529 mlxsw_sp_fib4_node_entry_unlink(struct mlxsw_sp *mlxsw_sp,
4530 				struct mlxsw_sp_fib4_entry *fib4_entry)
4531 {
4532 	mlxsw_sp_fib_node_entry_del(mlxsw_sp, &fib4_entry->common);
4533 	mlxsw_sp_fib4_node_list_remove(fib4_entry);
4534 
4535 	if (fib4_entry->common.type == MLXSW_SP_FIB_ENTRY_TYPE_IPIP_DECAP)
4536 		mlxsw_sp_fib_entry_decap_fini(mlxsw_sp, &fib4_entry->common);
4537 }
4538 
4539 static void mlxsw_sp_fib4_entry_replace(struct mlxsw_sp *mlxsw_sp,
4540 					struct mlxsw_sp_fib4_entry *fib4_entry,
4541 					bool replace)
4542 {
4543 	struct mlxsw_sp_fib_node *fib_node = fib4_entry->common.fib_node;
4544 	struct mlxsw_sp_fib4_entry *replaced;
4545 
4546 	if (!replace)
4547 		return;
4548 
4549 	/* We inserted the new entry before replaced one */
4550 	replaced = list_next_entry(fib4_entry, common.list);
4551 
4552 	mlxsw_sp_fib4_node_entry_unlink(mlxsw_sp, replaced);
4553 	mlxsw_sp_fib4_entry_destroy(mlxsw_sp, replaced);
4554 	mlxsw_sp_fib_node_put(mlxsw_sp, fib_node);
4555 }
4556 
4557 static int
4558 mlxsw_sp_router_fib4_add(struct mlxsw_sp *mlxsw_sp,
4559 			 const struct fib_entry_notifier_info *fen_info,
4560 			 bool replace, bool append)
4561 {
4562 	struct mlxsw_sp_fib4_entry *fib4_entry;
4563 	struct mlxsw_sp_fib_node *fib_node;
4564 	int err;
4565 
4566 	if (mlxsw_sp->router->aborted)
4567 		return 0;
4568 
4569 	fib_node = mlxsw_sp_fib_node_get(mlxsw_sp, fen_info->tb_id,
4570 					 &fen_info->dst, sizeof(fen_info->dst),
4571 					 fen_info->dst_len,
4572 					 MLXSW_SP_L3_PROTO_IPV4);
4573 	if (IS_ERR(fib_node)) {
4574 		dev_warn(mlxsw_sp->bus_info->dev, "Failed to get FIB node\n");
4575 		return PTR_ERR(fib_node);
4576 	}
4577 
4578 	fib4_entry = mlxsw_sp_fib4_entry_create(mlxsw_sp, fib_node, fen_info);
4579 	if (IS_ERR(fib4_entry)) {
4580 		dev_warn(mlxsw_sp->bus_info->dev, "Failed to create FIB entry\n");
4581 		err = PTR_ERR(fib4_entry);
4582 		goto err_fib4_entry_create;
4583 	}
4584 
4585 	err = mlxsw_sp_fib4_node_entry_link(mlxsw_sp, fib4_entry, replace,
4586 					    append);
4587 	if (err) {
4588 		dev_warn(mlxsw_sp->bus_info->dev, "Failed to link FIB entry to node\n");
4589 		goto err_fib4_node_entry_link;
4590 	}
4591 
4592 	mlxsw_sp_fib4_entry_replace(mlxsw_sp, fib4_entry, replace);
4593 
4594 	return 0;
4595 
4596 err_fib4_node_entry_link:
4597 	mlxsw_sp_fib4_entry_destroy(mlxsw_sp, fib4_entry);
4598 err_fib4_entry_create:
4599 	mlxsw_sp_fib_node_put(mlxsw_sp, fib_node);
4600 	return err;
4601 }
4602 
4603 static void mlxsw_sp_router_fib4_del(struct mlxsw_sp *mlxsw_sp,
4604 				     struct fib_entry_notifier_info *fen_info)
4605 {
4606 	struct mlxsw_sp_fib4_entry *fib4_entry;
4607 	struct mlxsw_sp_fib_node *fib_node;
4608 
4609 	if (mlxsw_sp->router->aborted)
4610 		return;
4611 
4612 	fib4_entry = mlxsw_sp_fib4_entry_lookup(mlxsw_sp, fen_info);
4613 	if (WARN_ON(!fib4_entry))
4614 		return;
4615 	fib_node = fib4_entry->common.fib_node;
4616 
4617 	mlxsw_sp_fib4_node_entry_unlink(mlxsw_sp, fib4_entry);
4618 	mlxsw_sp_fib4_entry_destroy(mlxsw_sp, fib4_entry);
4619 	mlxsw_sp_fib_node_put(mlxsw_sp, fib_node);
4620 }
4621 
4622 static bool mlxsw_sp_fib6_rt_should_ignore(const struct rt6_info *rt)
4623 {
4624 	/* Packets with link-local destination IP arriving to the router
4625 	 * are trapped to the CPU, so no need to program specific routes
4626 	 * for them.
4627 	 */
4628 	if (ipv6_addr_type(&rt->rt6i_dst.addr) & IPV6_ADDR_LINKLOCAL)
4629 		return true;
4630 
4631 	/* Multicast routes aren't supported, so ignore them. Neighbour
4632 	 * Discovery packets are specifically trapped.
4633 	 */
4634 	if (ipv6_addr_type(&rt->rt6i_dst.addr) & IPV6_ADDR_MULTICAST)
4635 		return true;
4636 
4637 	/* Cloned routes are irrelevant in the forwarding path. */
4638 	if (rt->rt6i_flags & RTF_CACHE)
4639 		return true;
4640 
4641 	return false;
4642 }
4643 
4644 static struct mlxsw_sp_rt6 *mlxsw_sp_rt6_create(struct rt6_info *rt)
4645 {
4646 	struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
4647 
4648 	mlxsw_sp_rt6 = kzalloc(sizeof(*mlxsw_sp_rt6), GFP_KERNEL);
4649 	if (!mlxsw_sp_rt6)
4650 		return ERR_PTR(-ENOMEM);
4651 
4652 	/* In case of route replace, replaced route is deleted with
4653 	 * no notification. Take reference to prevent accessing freed
4654 	 * memory.
4655 	 */
4656 	mlxsw_sp_rt6->rt = rt;
4657 	rt6_hold(rt);
4658 
4659 	return mlxsw_sp_rt6;
4660 }
4661 
4662 #if IS_ENABLED(CONFIG_IPV6)
4663 static void mlxsw_sp_rt6_release(struct rt6_info *rt)
4664 {
4665 	rt6_release(rt);
4666 }
4667 #else
4668 static void mlxsw_sp_rt6_release(struct rt6_info *rt)
4669 {
4670 }
4671 #endif
4672 
4673 static void mlxsw_sp_rt6_destroy(struct mlxsw_sp_rt6 *mlxsw_sp_rt6)
4674 {
4675 	mlxsw_sp_rt6_release(mlxsw_sp_rt6->rt);
4676 	kfree(mlxsw_sp_rt6);
4677 }
4678 
4679 static bool mlxsw_sp_fib6_rt_can_mp(const struct rt6_info *rt)
4680 {
4681 	/* RTF_CACHE routes are ignored */
4682 	return (rt->rt6i_flags & (RTF_GATEWAY | RTF_ADDRCONF)) == RTF_GATEWAY;
4683 }
4684 
4685 static struct rt6_info *
4686 mlxsw_sp_fib6_entry_rt(const struct mlxsw_sp_fib6_entry *fib6_entry)
4687 {
4688 	return list_first_entry(&fib6_entry->rt6_list, struct mlxsw_sp_rt6,
4689 				list)->rt;
4690 }
4691 
4692 static struct mlxsw_sp_fib6_entry *
4693 mlxsw_sp_fib6_node_mp_entry_find(const struct mlxsw_sp_fib_node *fib_node,
4694 				 const struct rt6_info *nrt, bool replace)
4695 {
4696 	struct mlxsw_sp_fib6_entry *fib6_entry;
4697 
4698 	if (!mlxsw_sp_fib6_rt_can_mp(nrt) || replace)
4699 		return NULL;
4700 
4701 	list_for_each_entry(fib6_entry, &fib_node->entry_list, common.list) {
4702 		struct rt6_info *rt = mlxsw_sp_fib6_entry_rt(fib6_entry);
4703 
4704 		/* RT6_TABLE_LOCAL and RT6_TABLE_MAIN share the same
4705 		 * virtual router.
4706 		 */
4707 		if (rt->rt6i_table->tb6_id > nrt->rt6i_table->tb6_id)
4708 			continue;
4709 		if (rt->rt6i_table->tb6_id != nrt->rt6i_table->tb6_id)
4710 			break;
4711 		if (rt->rt6i_metric < nrt->rt6i_metric)
4712 			continue;
4713 		if (rt->rt6i_metric == nrt->rt6i_metric &&
4714 		    mlxsw_sp_fib6_rt_can_mp(rt))
4715 			return fib6_entry;
4716 		if (rt->rt6i_metric > nrt->rt6i_metric)
4717 			break;
4718 	}
4719 
4720 	return NULL;
4721 }
4722 
4723 static struct mlxsw_sp_rt6 *
4724 mlxsw_sp_fib6_entry_rt_find(const struct mlxsw_sp_fib6_entry *fib6_entry,
4725 			    const struct rt6_info *rt)
4726 {
4727 	struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
4728 
4729 	list_for_each_entry(mlxsw_sp_rt6, &fib6_entry->rt6_list, list) {
4730 		if (mlxsw_sp_rt6->rt == rt)
4731 			return mlxsw_sp_rt6;
4732 	}
4733 
4734 	return NULL;
4735 }
4736 
4737 static bool mlxsw_sp_nexthop6_ipip_type(const struct mlxsw_sp *mlxsw_sp,
4738 					const struct rt6_info *rt,
4739 					enum mlxsw_sp_ipip_type *ret)
4740 {
4741 	return rt->dst.dev &&
4742 	       mlxsw_sp_netdev_ipip_type(mlxsw_sp, rt->dst.dev, ret);
4743 }
4744 
4745 static int mlxsw_sp_nexthop6_type_init(struct mlxsw_sp *mlxsw_sp,
4746 				       struct mlxsw_sp_nexthop_group *nh_grp,
4747 				       struct mlxsw_sp_nexthop *nh,
4748 				       const struct rt6_info *rt)
4749 {
4750 	const struct mlxsw_sp_ipip_ops *ipip_ops;
4751 	struct mlxsw_sp_ipip_entry *ipip_entry;
4752 	struct net_device *dev = rt->dst.dev;
4753 	struct mlxsw_sp_rif *rif;
4754 	int err;
4755 
4756 	ipip_entry = mlxsw_sp_ipip_entry_find_by_ol_dev(mlxsw_sp, dev);
4757 	if (ipip_entry) {
4758 		ipip_ops = mlxsw_sp->router->ipip_ops_arr[ipip_entry->ipipt];
4759 		if (ipip_ops->can_offload(mlxsw_sp, dev,
4760 					  MLXSW_SP_L3_PROTO_IPV6)) {
4761 			nh->type = MLXSW_SP_NEXTHOP_TYPE_IPIP;
4762 			mlxsw_sp_nexthop_ipip_init(mlxsw_sp, nh, ipip_entry);
4763 			return 0;
4764 		}
4765 	}
4766 
4767 	nh->type = MLXSW_SP_NEXTHOP_TYPE_ETH;
4768 	rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev);
4769 	if (!rif)
4770 		return 0;
4771 	mlxsw_sp_nexthop_rif_init(nh, rif);
4772 
4773 	err = mlxsw_sp_nexthop_neigh_init(mlxsw_sp, nh);
4774 	if (err)
4775 		goto err_nexthop_neigh_init;
4776 
4777 	return 0;
4778 
4779 err_nexthop_neigh_init:
4780 	mlxsw_sp_nexthop_rif_fini(nh);
4781 	return err;
4782 }
4783 
4784 static void mlxsw_sp_nexthop6_type_fini(struct mlxsw_sp *mlxsw_sp,
4785 					struct mlxsw_sp_nexthop *nh)
4786 {
4787 	mlxsw_sp_nexthop_type_fini(mlxsw_sp, nh);
4788 }
4789 
4790 static int mlxsw_sp_nexthop6_init(struct mlxsw_sp *mlxsw_sp,
4791 				  struct mlxsw_sp_nexthop_group *nh_grp,
4792 				  struct mlxsw_sp_nexthop *nh,
4793 				  const struct rt6_info *rt)
4794 {
4795 	struct net_device *dev = rt->dst.dev;
4796 
4797 	nh->nh_grp = nh_grp;
4798 	nh->nh_weight = rt->rt6i_nh_weight;
4799 	memcpy(&nh->gw_addr, &rt->rt6i_gateway, sizeof(nh->gw_addr));
4800 	mlxsw_sp_nexthop_counter_alloc(mlxsw_sp, nh);
4801 
4802 	list_add_tail(&nh->router_list_node, &mlxsw_sp->router->nexthop_list);
4803 
4804 	if (!dev)
4805 		return 0;
4806 	nh->ifindex = dev->ifindex;
4807 
4808 	return mlxsw_sp_nexthop6_type_init(mlxsw_sp, nh_grp, nh, rt);
4809 }
4810 
4811 static void mlxsw_sp_nexthop6_fini(struct mlxsw_sp *mlxsw_sp,
4812 				   struct mlxsw_sp_nexthop *nh)
4813 {
4814 	mlxsw_sp_nexthop6_type_fini(mlxsw_sp, nh);
4815 	list_del(&nh->router_list_node);
4816 	mlxsw_sp_nexthop_counter_free(mlxsw_sp, nh);
4817 }
4818 
4819 static bool mlxsw_sp_rt6_is_gateway(const struct mlxsw_sp *mlxsw_sp,
4820 				    const struct rt6_info *rt)
4821 {
4822 	return rt->rt6i_flags & RTF_GATEWAY ||
4823 	       mlxsw_sp_nexthop6_ipip_type(mlxsw_sp, rt, NULL);
4824 }
4825 
4826 static struct mlxsw_sp_nexthop_group *
4827 mlxsw_sp_nexthop6_group_create(struct mlxsw_sp *mlxsw_sp,
4828 			       struct mlxsw_sp_fib6_entry *fib6_entry)
4829 {
4830 	struct mlxsw_sp_nexthop_group *nh_grp;
4831 	struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
4832 	struct mlxsw_sp_nexthop *nh;
4833 	size_t alloc_size;
4834 	int i = 0;
4835 	int err;
4836 
4837 	alloc_size = sizeof(*nh_grp) +
4838 		     fib6_entry->nrt6 * sizeof(struct mlxsw_sp_nexthop);
4839 	nh_grp = kzalloc(alloc_size, GFP_KERNEL);
4840 	if (!nh_grp)
4841 		return ERR_PTR(-ENOMEM);
4842 	INIT_LIST_HEAD(&nh_grp->fib_list);
4843 #if IS_ENABLED(CONFIG_IPV6)
4844 	nh_grp->neigh_tbl = &nd_tbl;
4845 #endif
4846 	mlxsw_sp_rt6 = list_first_entry(&fib6_entry->rt6_list,
4847 					struct mlxsw_sp_rt6, list);
4848 	nh_grp->gateway = mlxsw_sp_rt6_is_gateway(mlxsw_sp, mlxsw_sp_rt6->rt);
4849 	nh_grp->count = fib6_entry->nrt6;
4850 	for (i = 0; i < nh_grp->count; i++) {
4851 		struct rt6_info *rt = mlxsw_sp_rt6->rt;
4852 
4853 		nh = &nh_grp->nexthops[i];
4854 		err = mlxsw_sp_nexthop6_init(mlxsw_sp, nh_grp, nh, rt);
4855 		if (err)
4856 			goto err_nexthop6_init;
4857 		mlxsw_sp_rt6 = list_next_entry(mlxsw_sp_rt6, list);
4858 	}
4859 
4860 	err = mlxsw_sp_nexthop_group_insert(mlxsw_sp, nh_grp);
4861 	if (err)
4862 		goto err_nexthop_group_insert;
4863 
4864 	mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh_grp);
4865 	return nh_grp;
4866 
4867 err_nexthop_group_insert:
4868 err_nexthop6_init:
4869 	for (i--; i >= 0; i--) {
4870 		nh = &nh_grp->nexthops[i];
4871 		mlxsw_sp_nexthop6_fini(mlxsw_sp, nh);
4872 	}
4873 	kfree(nh_grp);
4874 	return ERR_PTR(err);
4875 }
4876 
4877 static void
4878 mlxsw_sp_nexthop6_group_destroy(struct mlxsw_sp *mlxsw_sp,
4879 				struct mlxsw_sp_nexthop_group *nh_grp)
4880 {
4881 	struct mlxsw_sp_nexthop *nh;
4882 	int i = nh_grp->count;
4883 
4884 	mlxsw_sp_nexthop_group_remove(mlxsw_sp, nh_grp);
4885 	for (i--; i >= 0; i--) {
4886 		nh = &nh_grp->nexthops[i];
4887 		mlxsw_sp_nexthop6_fini(mlxsw_sp, nh);
4888 	}
4889 	mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh_grp);
4890 	WARN_ON(nh_grp->adj_index_valid);
4891 	kfree(nh_grp);
4892 }
4893 
4894 static int mlxsw_sp_nexthop6_group_get(struct mlxsw_sp *mlxsw_sp,
4895 				       struct mlxsw_sp_fib6_entry *fib6_entry)
4896 {
4897 	struct mlxsw_sp_nexthop_group *nh_grp;
4898 
4899 	nh_grp = mlxsw_sp_nexthop6_group_lookup(mlxsw_sp, fib6_entry);
4900 	if (!nh_grp) {
4901 		nh_grp = mlxsw_sp_nexthop6_group_create(mlxsw_sp, fib6_entry);
4902 		if (IS_ERR(nh_grp))
4903 			return PTR_ERR(nh_grp);
4904 	}
4905 
4906 	list_add_tail(&fib6_entry->common.nexthop_group_node,
4907 		      &nh_grp->fib_list);
4908 	fib6_entry->common.nh_group = nh_grp;
4909 
4910 	return 0;
4911 }
4912 
4913 static void mlxsw_sp_nexthop6_group_put(struct mlxsw_sp *mlxsw_sp,
4914 					struct mlxsw_sp_fib_entry *fib_entry)
4915 {
4916 	struct mlxsw_sp_nexthop_group *nh_grp = fib_entry->nh_group;
4917 
4918 	list_del(&fib_entry->nexthop_group_node);
4919 	if (!list_empty(&nh_grp->fib_list))
4920 		return;
4921 	mlxsw_sp_nexthop6_group_destroy(mlxsw_sp, nh_grp);
4922 }
4923 
4924 static int
4925 mlxsw_sp_nexthop6_group_update(struct mlxsw_sp *mlxsw_sp,
4926 			       struct mlxsw_sp_fib6_entry *fib6_entry)
4927 {
4928 	struct mlxsw_sp_nexthop_group *old_nh_grp = fib6_entry->common.nh_group;
4929 	int err;
4930 
4931 	fib6_entry->common.nh_group = NULL;
4932 	list_del(&fib6_entry->common.nexthop_group_node);
4933 
4934 	err = mlxsw_sp_nexthop6_group_get(mlxsw_sp, fib6_entry);
4935 	if (err)
4936 		goto err_nexthop6_group_get;
4937 
4938 	/* In case this entry is offloaded, then the adjacency index
4939 	 * currently associated with it in the device's table is that
4940 	 * of the old group. Start using the new one instead.
4941 	 */
4942 	err = mlxsw_sp_fib_node_entry_add(mlxsw_sp, &fib6_entry->common);
4943 	if (err)
4944 		goto err_fib_node_entry_add;
4945 
4946 	if (list_empty(&old_nh_grp->fib_list))
4947 		mlxsw_sp_nexthop6_group_destroy(mlxsw_sp, old_nh_grp);
4948 
4949 	return 0;
4950 
4951 err_fib_node_entry_add:
4952 	mlxsw_sp_nexthop6_group_put(mlxsw_sp, &fib6_entry->common);
4953 err_nexthop6_group_get:
4954 	list_add_tail(&fib6_entry->common.nexthop_group_node,
4955 		      &old_nh_grp->fib_list);
4956 	fib6_entry->common.nh_group = old_nh_grp;
4957 	return err;
4958 }
4959 
4960 static int
4961 mlxsw_sp_fib6_entry_nexthop_add(struct mlxsw_sp *mlxsw_sp,
4962 				struct mlxsw_sp_fib6_entry *fib6_entry,
4963 				struct rt6_info *rt)
4964 {
4965 	struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
4966 	int err;
4967 
4968 	mlxsw_sp_rt6 = mlxsw_sp_rt6_create(rt);
4969 	if (IS_ERR(mlxsw_sp_rt6))
4970 		return PTR_ERR(mlxsw_sp_rt6);
4971 
4972 	list_add_tail(&mlxsw_sp_rt6->list, &fib6_entry->rt6_list);
4973 	fib6_entry->nrt6++;
4974 
4975 	err = mlxsw_sp_nexthop6_group_update(mlxsw_sp, fib6_entry);
4976 	if (err)
4977 		goto err_nexthop6_group_update;
4978 
4979 	return 0;
4980 
4981 err_nexthop6_group_update:
4982 	fib6_entry->nrt6--;
4983 	list_del(&mlxsw_sp_rt6->list);
4984 	mlxsw_sp_rt6_destroy(mlxsw_sp_rt6);
4985 	return err;
4986 }
4987 
4988 static void
4989 mlxsw_sp_fib6_entry_nexthop_del(struct mlxsw_sp *mlxsw_sp,
4990 				struct mlxsw_sp_fib6_entry *fib6_entry,
4991 				struct rt6_info *rt)
4992 {
4993 	struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
4994 
4995 	mlxsw_sp_rt6 = mlxsw_sp_fib6_entry_rt_find(fib6_entry, rt);
4996 	if (WARN_ON(!mlxsw_sp_rt6))
4997 		return;
4998 
4999 	fib6_entry->nrt6--;
5000 	list_del(&mlxsw_sp_rt6->list);
5001 	mlxsw_sp_nexthop6_group_update(mlxsw_sp, fib6_entry);
5002 	mlxsw_sp_rt6_destroy(mlxsw_sp_rt6);
5003 }
5004 
5005 static void mlxsw_sp_fib6_entry_type_set(struct mlxsw_sp *mlxsw_sp,
5006 					 struct mlxsw_sp_fib_entry *fib_entry,
5007 					 const struct rt6_info *rt)
5008 {
5009 	/* Packets hitting RTF_REJECT routes need to be discarded by the
5010 	 * stack. We can rely on their destination device not having a
5011 	 * RIF (it's the loopback device) and can thus use action type
5012 	 * local, which will cause them to be trapped with a lower
5013 	 * priority than packets that need to be locally received.
5014 	 */
5015 	if (rt->rt6i_flags & (RTF_LOCAL | RTF_ANYCAST))
5016 		fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_TRAP;
5017 	else if (rt->rt6i_flags & RTF_REJECT)
5018 		fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_LOCAL;
5019 	else if (mlxsw_sp_rt6_is_gateway(mlxsw_sp, rt))
5020 		fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_REMOTE;
5021 	else
5022 		fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_LOCAL;
5023 }
5024 
5025 static void
5026 mlxsw_sp_fib6_entry_rt_destroy_all(struct mlxsw_sp_fib6_entry *fib6_entry)
5027 {
5028 	struct mlxsw_sp_rt6 *mlxsw_sp_rt6, *tmp;
5029 
5030 	list_for_each_entry_safe(mlxsw_sp_rt6, tmp, &fib6_entry->rt6_list,
5031 				 list) {
5032 		fib6_entry->nrt6--;
5033 		list_del(&mlxsw_sp_rt6->list);
5034 		mlxsw_sp_rt6_destroy(mlxsw_sp_rt6);
5035 	}
5036 }
5037 
5038 static struct mlxsw_sp_fib6_entry *
5039 mlxsw_sp_fib6_entry_create(struct mlxsw_sp *mlxsw_sp,
5040 			   struct mlxsw_sp_fib_node *fib_node,
5041 			   struct rt6_info *rt)
5042 {
5043 	struct mlxsw_sp_fib6_entry *fib6_entry;
5044 	struct mlxsw_sp_fib_entry *fib_entry;
5045 	struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
5046 	int err;
5047 
5048 	fib6_entry = kzalloc(sizeof(*fib6_entry), GFP_KERNEL);
5049 	if (!fib6_entry)
5050 		return ERR_PTR(-ENOMEM);
5051 	fib_entry = &fib6_entry->common;
5052 
5053 	mlxsw_sp_rt6 = mlxsw_sp_rt6_create(rt);
5054 	if (IS_ERR(mlxsw_sp_rt6)) {
5055 		err = PTR_ERR(mlxsw_sp_rt6);
5056 		goto err_rt6_create;
5057 	}
5058 
5059 	mlxsw_sp_fib6_entry_type_set(mlxsw_sp, fib_entry, mlxsw_sp_rt6->rt);
5060 
5061 	INIT_LIST_HEAD(&fib6_entry->rt6_list);
5062 	list_add_tail(&mlxsw_sp_rt6->list, &fib6_entry->rt6_list);
5063 	fib6_entry->nrt6 = 1;
5064 	err = mlxsw_sp_nexthop6_group_get(mlxsw_sp, fib6_entry);
5065 	if (err)
5066 		goto err_nexthop6_group_get;
5067 
5068 	fib_entry->fib_node = fib_node;
5069 
5070 	return fib6_entry;
5071 
5072 err_nexthop6_group_get:
5073 	list_del(&mlxsw_sp_rt6->list);
5074 	mlxsw_sp_rt6_destroy(mlxsw_sp_rt6);
5075 err_rt6_create:
5076 	kfree(fib6_entry);
5077 	return ERR_PTR(err);
5078 }
5079 
5080 static void mlxsw_sp_fib6_entry_destroy(struct mlxsw_sp *mlxsw_sp,
5081 					struct mlxsw_sp_fib6_entry *fib6_entry)
5082 {
5083 	mlxsw_sp_nexthop6_group_put(mlxsw_sp, &fib6_entry->common);
5084 	mlxsw_sp_fib6_entry_rt_destroy_all(fib6_entry);
5085 	WARN_ON(fib6_entry->nrt6);
5086 	kfree(fib6_entry);
5087 }
5088 
5089 static struct mlxsw_sp_fib6_entry *
5090 mlxsw_sp_fib6_node_entry_find(const struct mlxsw_sp_fib_node *fib_node,
5091 			      const struct rt6_info *nrt, bool replace)
5092 {
5093 	struct mlxsw_sp_fib6_entry *fib6_entry, *fallback = NULL;
5094 
5095 	list_for_each_entry(fib6_entry, &fib_node->entry_list, common.list) {
5096 		struct rt6_info *rt = mlxsw_sp_fib6_entry_rt(fib6_entry);
5097 
5098 		if (rt->rt6i_table->tb6_id > nrt->rt6i_table->tb6_id)
5099 			continue;
5100 		if (rt->rt6i_table->tb6_id != nrt->rt6i_table->tb6_id)
5101 			break;
5102 		if (replace && rt->rt6i_metric == nrt->rt6i_metric) {
5103 			if (mlxsw_sp_fib6_rt_can_mp(rt) ==
5104 			    mlxsw_sp_fib6_rt_can_mp(nrt))
5105 				return fib6_entry;
5106 			if (mlxsw_sp_fib6_rt_can_mp(nrt))
5107 				fallback = fallback ?: fib6_entry;
5108 		}
5109 		if (rt->rt6i_metric > nrt->rt6i_metric)
5110 			return fallback ?: fib6_entry;
5111 	}
5112 
5113 	return fallback;
5114 }
5115 
5116 static int
5117 mlxsw_sp_fib6_node_list_insert(struct mlxsw_sp_fib6_entry *new6_entry,
5118 			       bool replace)
5119 {
5120 	struct mlxsw_sp_fib_node *fib_node = new6_entry->common.fib_node;
5121 	struct rt6_info *nrt = mlxsw_sp_fib6_entry_rt(new6_entry);
5122 	struct mlxsw_sp_fib6_entry *fib6_entry;
5123 
5124 	fib6_entry = mlxsw_sp_fib6_node_entry_find(fib_node, nrt, replace);
5125 
5126 	if (replace && WARN_ON(!fib6_entry))
5127 		return -EINVAL;
5128 
5129 	if (fib6_entry) {
5130 		list_add_tail(&new6_entry->common.list,
5131 			      &fib6_entry->common.list);
5132 	} else {
5133 		struct mlxsw_sp_fib6_entry *last;
5134 
5135 		list_for_each_entry(last, &fib_node->entry_list, common.list) {
5136 			struct rt6_info *rt = mlxsw_sp_fib6_entry_rt(last);
5137 
5138 			if (nrt->rt6i_table->tb6_id > rt->rt6i_table->tb6_id)
5139 				break;
5140 			fib6_entry = last;
5141 		}
5142 
5143 		if (fib6_entry)
5144 			list_add(&new6_entry->common.list,
5145 				 &fib6_entry->common.list);
5146 		else
5147 			list_add(&new6_entry->common.list,
5148 				 &fib_node->entry_list);
5149 	}
5150 
5151 	return 0;
5152 }
5153 
5154 static void
5155 mlxsw_sp_fib6_node_list_remove(struct mlxsw_sp_fib6_entry *fib6_entry)
5156 {
5157 	list_del(&fib6_entry->common.list);
5158 }
5159 
5160 static int mlxsw_sp_fib6_node_entry_link(struct mlxsw_sp *mlxsw_sp,
5161 					 struct mlxsw_sp_fib6_entry *fib6_entry,
5162 					 bool replace)
5163 {
5164 	int err;
5165 
5166 	err = mlxsw_sp_fib6_node_list_insert(fib6_entry, replace);
5167 	if (err)
5168 		return err;
5169 
5170 	err = mlxsw_sp_fib_node_entry_add(mlxsw_sp, &fib6_entry->common);
5171 	if (err)
5172 		goto err_fib_node_entry_add;
5173 
5174 	return 0;
5175 
5176 err_fib_node_entry_add:
5177 	mlxsw_sp_fib6_node_list_remove(fib6_entry);
5178 	return err;
5179 }
5180 
5181 static void
5182 mlxsw_sp_fib6_node_entry_unlink(struct mlxsw_sp *mlxsw_sp,
5183 				struct mlxsw_sp_fib6_entry *fib6_entry)
5184 {
5185 	mlxsw_sp_fib_node_entry_del(mlxsw_sp, &fib6_entry->common);
5186 	mlxsw_sp_fib6_node_list_remove(fib6_entry);
5187 }
5188 
5189 static struct mlxsw_sp_fib6_entry *
5190 mlxsw_sp_fib6_entry_lookup(struct mlxsw_sp *mlxsw_sp,
5191 			   const struct rt6_info *rt)
5192 {
5193 	struct mlxsw_sp_fib6_entry *fib6_entry;
5194 	struct mlxsw_sp_fib_node *fib_node;
5195 	struct mlxsw_sp_fib *fib;
5196 	struct mlxsw_sp_vr *vr;
5197 
5198 	vr = mlxsw_sp_vr_find(mlxsw_sp, rt->rt6i_table->tb6_id);
5199 	if (!vr)
5200 		return NULL;
5201 	fib = mlxsw_sp_vr_fib(vr, MLXSW_SP_L3_PROTO_IPV6);
5202 
5203 	fib_node = mlxsw_sp_fib_node_lookup(fib, &rt->rt6i_dst.addr,
5204 					    sizeof(rt->rt6i_dst.addr),
5205 					    rt->rt6i_dst.plen);
5206 	if (!fib_node)
5207 		return NULL;
5208 
5209 	list_for_each_entry(fib6_entry, &fib_node->entry_list, common.list) {
5210 		struct rt6_info *iter_rt = mlxsw_sp_fib6_entry_rt(fib6_entry);
5211 
5212 		if (rt->rt6i_table->tb6_id == iter_rt->rt6i_table->tb6_id &&
5213 		    rt->rt6i_metric == iter_rt->rt6i_metric &&
5214 		    mlxsw_sp_fib6_entry_rt_find(fib6_entry, rt))
5215 			return fib6_entry;
5216 	}
5217 
5218 	return NULL;
5219 }
5220 
5221 static void mlxsw_sp_fib6_entry_replace(struct mlxsw_sp *mlxsw_sp,
5222 					struct mlxsw_sp_fib6_entry *fib6_entry,
5223 					bool replace)
5224 {
5225 	struct mlxsw_sp_fib_node *fib_node = fib6_entry->common.fib_node;
5226 	struct mlxsw_sp_fib6_entry *replaced;
5227 
5228 	if (!replace)
5229 		return;
5230 
5231 	replaced = list_next_entry(fib6_entry, common.list);
5232 
5233 	mlxsw_sp_fib6_node_entry_unlink(mlxsw_sp, replaced);
5234 	mlxsw_sp_fib6_entry_destroy(mlxsw_sp, replaced);
5235 	mlxsw_sp_fib_node_put(mlxsw_sp, fib_node);
5236 }
5237 
5238 static int mlxsw_sp_router_fib6_add(struct mlxsw_sp *mlxsw_sp,
5239 				    struct rt6_info *rt, bool replace)
5240 {
5241 	struct mlxsw_sp_fib6_entry *fib6_entry;
5242 	struct mlxsw_sp_fib_node *fib_node;
5243 	int err;
5244 
5245 	if (mlxsw_sp->router->aborted)
5246 		return 0;
5247 
5248 	if (rt->rt6i_src.plen)
5249 		return -EINVAL;
5250 
5251 	if (mlxsw_sp_fib6_rt_should_ignore(rt))
5252 		return 0;
5253 
5254 	fib_node = mlxsw_sp_fib_node_get(mlxsw_sp, rt->rt6i_table->tb6_id,
5255 					 &rt->rt6i_dst.addr,
5256 					 sizeof(rt->rt6i_dst.addr),
5257 					 rt->rt6i_dst.plen,
5258 					 MLXSW_SP_L3_PROTO_IPV6);
5259 	if (IS_ERR(fib_node))
5260 		return PTR_ERR(fib_node);
5261 
5262 	/* Before creating a new entry, try to append route to an existing
5263 	 * multipath entry.
5264 	 */
5265 	fib6_entry = mlxsw_sp_fib6_node_mp_entry_find(fib_node, rt, replace);
5266 	if (fib6_entry) {
5267 		err = mlxsw_sp_fib6_entry_nexthop_add(mlxsw_sp, fib6_entry, rt);
5268 		if (err)
5269 			goto err_fib6_entry_nexthop_add;
5270 		return 0;
5271 	}
5272 
5273 	fib6_entry = mlxsw_sp_fib6_entry_create(mlxsw_sp, fib_node, rt);
5274 	if (IS_ERR(fib6_entry)) {
5275 		err = PTR_ERR(fib6_entry);
5276 		goto err_fib6_entry_create;
5277 	}
5278 
5279 	err = mlxsw_sp_fib6_node_entry_link(mlxsw_sp, fib6_entry, replace);
5280 	if (err)
5281 		goto err_fib6_node_entry_link;
5282 
5283 	mlxsw_sp_fib6_entry_replace(mlxsw_sp, fib6_entry, replace);
5284 
5285 	return 0;
5286 
5287 err_fib6_node_entry_link:
5288 	mlxsw_sp_fib6_entry_destroy(mlxsw_sp, fib6_entry);
5289 err_fib6_entry_create:
5290 err_fib6_entry_nexthop_add:
5291 	mlxsw_sp_fib_node_put(mlxsw_sp, fib_node);
5292 	return err;
5293 }
5294 
5295 static void mlxsw_sp_router_fib6_del(struct mlxsw_sp *mlxsw_sp,
5296 				     struct rt6_info *rt)
5297 {
5298 	struct mlxsw_sp_fib6_entry *fib6_entry;
5299 	struct mlxsw_sp_fib_node *fib_node;
5300 
5301 	if (mlxsw_sp->router->aborted)
5302 		return;
5303 
5304 	if (mlxsw_sp_fib6_rt_should_ignore(rt))
5305 		return;
5306 
5307 	fib6_entry = mlxsw_sp_fib6_entry_lookup(mlxsw_sp, rt);
5308 	if (WARN_ON(!fib6_entry))
5309 		return;
5310 
5311 	/* If route is part of a multipath entry, but not the last one
5312 	 * removed, then only reduce its nexthop group.
5313 	 */
5314 	if (!list_is_singular(&fib6_entry->rt6_list)) {
5315 		mlxsw_sp_fib6_entry_nexthop_del(mlxsw_sp, fib6_entry, rt);
5316 		return;
5317 	}
5318 
5319 	fib_node = fib6_entry->common.fib_node;
5320 
5321 	mlxsw_sp_fib6_node_entry_unlink(mlxsw_sp, fib6_entry);
5322 	mlxsw_sp_fib6_entry_destroy(mlxsw_sp, fib6_entry);
5323 	mlxsw_sp_fib_node_put(mlxsw_sp, fib_node);
5324 }
5325 
5326 static int __mlxsw_sp_router_set_abort_trap(struct mlxsw_sp *mlxsw_sp,
5327 					    enum mlxsw_reg_ralxx_protocol proto,
5328 					    u8 tree_id)
5329 {
5330 	char ralta_pl[MLXSW_REG_RALTA_LEN];
5331 	char ralst_pl[MLXSW_REG_RALST_LEN];
5332 	int i, err;
5333 
5334 	mlxsw_reg_ralta_pack(ralta_pl, true, proto, tree_id);
5335 	err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralta), ralta_pl);
5336 	if (err)
5337 		return err;
5338 
5339 	mlxsw_reg_ralst_pack(ralst_pl, 0xff, tree_id);
5340 	err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralst), ralst_pl);
5341 	if (err)
5342 		return err;
5343 
5344 	for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_VRS); i++) {
5345 		struct mlxsw_sp_vr *vr = &mlxsw_sp->router->vrs[i];
5346 		char raltb_pl[MLXSW_REG_RALTB_LEN];
5347 		char ralue_pl[MLXSW_REG_RALUE_LEN];
5348 
5349 		mlxsw_reg_raltb_pack(raltb_pl, vr->id, proto, tree_id);
5350 		err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(raltb),
5351 				      raltb_pl);
5352 		if (err)
5353 			return err;
5354 
5355 		mlxsw_reg_ralue_pack(ralue_pl, proto,
5356 				     MLXSW_REG_RALUE_OP_WRITE_WRITE, vr->id, 0);
5357 		mlxsw_reg_ralue_act_ip2me_pack(ralue_pl);
5358 		err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralue),
5359 				      ralue_pl);
5360 		if (err)
5361 			return err;
5362 	}
5363 
5364 	return 0;
5365 }
5366 
5367 static int mlxsw_sp_router_fibmr_add(struct mlxsw_sp *mlxsw_sp,
5368 				     struct mfc_entry_notifier_info *men_info,
5369 				     bool replace)
5370 {
5371 	struct mlxsw_sp_vr *vr;
5372 
5373 	if (mlxsw_sp->router->aborted)
5374 		return 0;
5375 
5376 	vr = mlxsw_sp_vr_get(mlxsw_sp, men_info->tb_id, NULL);
5377 	if (IS_ERR(vr))
5378 		return PTR_ERR(vr);
5379 
5380 	return mlxsw_sp_mr_route4_add(vr->mr4_table, men_info->mfc, replace);
5381 }
5382 
5383 static void mlxsw_sp_router_fibmr_del(struct mlxsw_sp *mlxsw_sp,
5384 				      struct mfc_entry_notifier_info *men_info)
5385 {
5386 	struct mlxsw_sp_vr *vr;
5387 
5388 	if (mlxsw_sp->router->aborted)
5389 		return;
5390 
5391 	vr = mlxsw_sp_vr_find(mlxsw_sp, men_info->tb_id);
5392 	if (WARN_ON(!vr))
5393 		return;
5394 
5395 	mlxsw_sp_mr_route4_del(vr->mr4_table, men_info->mfc);
5396 	mlxsw_sp_vr_put(mlxsw_sp, vr);
5397 }
5398 
5399 static int
5400 mlxsw_sp_router_fibmr_vif_add(struct mlxsw_sp *mlxsw_sp,
5401 			      struct vif_entry_notifier_info *ven_info)
5402 {
5403 	struct mlxsw_sp_rif *rif;
5404 	struct mlxsw_sp_vr *vr;
5405 
5406 	if (mlxsw_sp->router->aborted)
5407 		return 0;
5408 
5409 	vr = mlxsw_sp_vr_get(mlxsw_sp, ven_info->tb_id, NULL);
5410 	if (IS_ERR(vr))
5411 		return PTR_ERR(vr);
5412 
5413 	rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, ven_info->dev);
5414 	return mlxsw_sp_mr_vif_add(vr->mr4_table, ven_info->dev,
5415 				   ven_info->vif_index,
5416 				   ven_info->vif_flags, rif);
5417 }
5418 
5419 static void
5420 mlxsw_sp_router_fibmr_vif_del(struct mlxsw_sp *mlxsw_sp,
5421 			      struct vif_entry_notifier_info *ven_info)
5422 {
5423 	struct mlxsw_sp_vr *vr;
5424 
5425 	if (mlxsw_sp->router->aborted)
5426 		return;
5427 
5428 	vr = mlxsw_sp_vr_find(mlxsw_sp, ven_info->tb_id);
5429 	if (WARN_ON(!vr))
5430 		return;
5431 
5432 	mlxsw_sp_mr_vif_del(vr->mr4_table, ven_info->vif_index);
5433 	mlxsw_sp_vr_put(mlxsw_sp, vr);
5434 }
5435 
5436 static int mlxsw_sp_router_set_abort_trap(struct mlxsw_sp *mlxsw_sp)
5437 {
5438 	enum mlxsw_reg_ralxx_protocol proto = MLXSW_REG_RALXX_PROTOCOL_IPV4;
5439 	int err;
5440 
5441 	err = __mlxsw_sp_router_set_abort_trap(mlxsw_sp, proto,
5442 					       MLXSW_SP_LPM_TREE_MIN);
5443 	if (err)
5444 		return err;
5445 
5446 	/* The multicast router code does not need an abort trap as by default,
5447 	 * packets that don't match any routes are trapped to the CPU.
5448 	 */
5449 
5450 	proto = MLXSW_REG_RALXX_PROTOCOL_IPV6;
5451 	return __mlxsw_sp_router_set_abort_trap(mlxsw_sp, proto,
5452 						MLXSW_SP_LPM_TREE_MIN + 1);
5453 }
5454 
5455 static void mlxsw_sp_fib4_node_flush(struct mlxsw_sp *mlxsw_sp,
5456 				     struct mlxsw_sp_fib_node *fib_node)
5457 {
5458 	struct mlxsw_sp_fib4_entry *fib4_entry, *tmp;
5459 
5460 	list_for_each_entry_safe(fib4_entry, tmp, &fib_node->entry_list,
5461 				 common.list) {
5462 		bool do_break = &tmp->common.list == &fib_node->entry_list;
5463 
5464 		mlxsw_sp_fib4_node_entry_unlink(mlxsw_sp, fib4_entry);
5465 		mlxsw_sp_fib4_entry_destroy(mlxsw_sp, fib4_entry);
5466 		mlxsw_sp_fib_node_put(mlxsw_sp, fib_node);
5467 		/* Break when entry list is empty and node was freed.
5468 		 * Otherwise, we'll access freed memory in the next
5469 		 * iteration.
5470 		 */
5471 		if (do_break)
5472 			break;
5473 	}
5474 }
5475 
5476 static void mlxsw_sp_fib6_node_flush(struct mlxsw_sp *mlxsw_sp,
5477 				     struct mlxsw_sp_fib_node *fib_node)
5478 {
5479 	struct mlxsw_sp_fib6_entry *fib6_entry, *tmp;
5480 
5481 	list_for_each_entry_safe(fib6_entry, tmp, &fib_node->entry_list,
5482 				 common.list) {
5483 		bool do_break = &tmp->common.list == &fib_node->entry_list;
5484 
5485 		mlxsw_sp_fib6_node_entry_unlink(mlxsw_sp, fib6_entry);
5486 		mlxsw_sp_fib6_entry_destroy(mlxsw_sp, fib6_entry);
5487 		mlxsw_sp_fib_node_put(mlxsw_sp, fib_node);
5488 		if (do_break)
5489 			break;
5490 	}
5491 }
5492 
5493 static void mlxsw_sp_fib_node_flush(struct mlxsw_sp *mlxsw_sp,
5494 				    struct mlxsw_sp_fib_node *fib_node)
5495 {
5496 	switch (fib_node->fib->proto) {
5497 	case MLXSW_SP_L3_PROTO_IPV4:
5498 		mlxsw_sp_fib4_node_flush(mlxsw_sp, fib_node);
5499 		break;
5500 	case MLXSW_SP_L3_PROTO_IPV6:
5501 		mlxsw_sp_fib6_node_flush(mlxsw_sp, fib_node);
5502 		break;
5503 	}
5504 }
5505 
5506 static void mlxsw_sp_vr_fib_flush(struct mlxsw_sp *mlxsw_sp,
5507 				  struct mlxsw_sp_vr *vr,
5508 				  enum mlxsw_sp_l3proto proto)
5509 {
5510 	struct mlxsw_sp_fib *fib = mlxsw_sp_vr_fib(vr, proto);
5511 	struct mlxsw_sp_fib_node *fib_node, *tmp;
5512 
5513 	list_for_each_entry_safe(fib_node, tmp, &fib->node_list, list) {
5514 		bool do_break = &tmp->list == &fib->node_list;
5515 
5516 		mlxsw_sp_fib_node_flush(mlxsw_sp, fib_node);
5517 		if (do_break)
5518 			break;
5519 	}
5520 }
5521 
5522 static void mlxsw_sp_router_fib_flush(struct mlxsw_sp *mlxsw_sp)
5523 {
5524 	int i;
5525 
5526 	for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_VRS); i++) {
5527 		struct mlxsw_sp_vr *vr = &mlxsw_sp->router->vrs[i];
5528 
5529 		if (!mlxsw_sp_vr_is_used(vr))
5530 			continue;
5531 
5532 		mlxsw_sp_mr_table_flush(vr->mr4_table);
5533 		mlxsw_sp_vr_fib_flush(mlxsw_sp, vr, MLXSW_SP_L3_PROTO_IPV4);
5534 
5535 		/* If virtual router was only used for IPv4, then it's no
5536 		 * longer used.
5537 		 */
5538 		if (!mlxsw_sp_vr_is_used(vr))
5539 			continue;
5540 		mlxsw_sp_vr_fib_flush(mlxsw_sp, vr, MLXSW_SP_L3_PROTO_IPV6);
5541 	}
5542 }
5543 
5544 static void mlxsw_sp_router_fib_abort(struct mlxsw_sp *mlxsw_sp)
5545 {
5546 	int err;
5547 
5548 	if (mlxsw_sp->router->aborted)
5549 		return;
5550 	dev_warn(mlxsw_sp->bus_info->dev, "FIB abort triggered. Note that FIB entries are no longer being offloaded to this device.\n");
5551 	mlxsw_sp_router_fib_flush(mlxsw_sp);
5552 	mlxsw_sp->router->aborted = true;
5553 	err = mlxsw_sp_router_set_abort_trap(mlxsw_sp);
5554 	if (err)
5555 		dev_warn(mlxsw_sp->bus_info->dev, "Failed to set abort trap.\n");
5556 }
5557 
5558 struct mlxsw_sp_fib_event_work {
5559 	struct work_struct work;
5560 	union {
5561 		struct fib6_entry_notifier_info fen6_info;
5562 		struct fib_entry_notifier_info fen_info;
5563 		struct fib_rule_notifier_info fr_info;
5564 		struct fib_nh_notifier_info fnh_info;
5565 		struct mfc_entry_notifier_info men_info;
5566 		struct vif_entry_notifier_info ven_info;
5567 	};
5568 	struct mlxsw_sp *mlxsw_sp;
5569 	unsigned long event;
5570 };
5571 
5572 static void mlxsw_sp_router_fib4_event_work(struct work_struct *work)
5573 {
5574 	struct mlxsw_sp_fib_event_work *fib_work =
5575 		container_of(work, struct mlxsw_sp_fib_event_work, work);
5576 	struct mlxsw_sp *mlxsw_sp = fib_work->mlxsw_sp;
5577 	bool replace, append;
5578 	int err;
5579 
5580 	/* Protect internal structures from changes */
5581 	rtnl_lock();
5582 	switch (fib_work->event) {
5583 	case FIB_EVENT_ENTRY_REPLACE: /* fall through */
5584 	case FIB_EVENT_ENTRY_APPEND: /* fall through */
5585 	case FIB_EVENT_ENTRY_ADD:
5586 		replace = fib_work->event == FIB_EVENT_ENTRY_REPLACE;
5587 		append = fib_work->event == FIB_EVENT_ENTRY_APPEND;
5588 		err = mlxsw_sp_router_fib4_add(mlxsw_sp, &fib_work->fen_info,
5589 					       replace, append);
5590 		if (err)
5591 			mlxsw_sp_router_fib_abort(mlxsw_sp);
5592 		fib_info_put(fib_work->fen_info.fi);
5593 		break;
5594 	case FIB_EVENT_ENTRY_DEL:
5595 		mlxsw_sp_router_fib4_del(mlxsw_sp, &fib_work->fen_info);
5596 		fib_info_put(fib_work->fen_info.fi);
5597 		break;
5598 	case FIB_EVENT_RULE_ADD:
5599 		/* if we get here, a rule was added that we do not support.
5600 		 * just do the fib_abort
5601 		 */
5602 		mlxsw_sp_router_fib_abort(mlxsw_sp);
5603 		break;
5604 	case FIB_EVENT_NH_ADD: /* fall through */
5605 	case FIB_EVENT_NH_DEL:
5606 		mlxsw_sp_nexthop4_event(mlxsw_sp, fib_work->event,
5607 					fib_work->fnh_info.fib_nh);
5608 		fib_info_put(fib_work->fnh_info.fib_nh->nh_parent);
5609 		break;
5610 	}
5611 	rtnl_unlock();
5612 	kfree(fib_work);
5613 }
5614 
5615 static void mlxsw_sp_router_fib6_event_work(struct work_struct *work)
5616 {
5617 	struct mlxsw_sp_fib_event_work *fib_work =
5618 		container_of(work, struct mlxsw_sp_fib_event_work, work);
5619 	struct mlxsw_sp *mlxsw_sp = fib_work->mlxsw_sp;
5620 	bool replace;
5621 	int err;
5622 
5623 	rtnl_lock();
5624 	switch (fib_work->event) {
5625 	case FIB_EVENT_ENTRY_REPLACE: /* fall through */
5626 	case FIB_EVENT_ENTRY_ADD:
5627 		replace = fib_work->event == FIB_EVENT_ENTRY_REPLACE;
5628 		err = mlxsw_sp_router_fib6_add(mlxsw_sp,
5629 					       fib_work->fen6_info.rt, replace);
5630 		if (err)
5631 			mlxsw_sp_router_fib_abort(mlxsw_sp);
5632 		mlxsw_sp_rt6_release(fib_work->fen6_info.rt);
5633 		break;
5634 	case FIB_EVENT_ENTRY_DEL:
5635 		mlxsw_sp_router_fib6_del(mlxsw_sp, fib_work->fen6_info.rt);
5636 		mlxsw_sp_rt6_release(fib_work->fen6_info.rt);
5637 		break;
5638 	case FIB_EVENT_RULE_ADD:
5639 		/* if we get here, a rule was added that we do not support.
5640 		 * just do the fib_abort
5641 		 */
5642 		mlxsw_sp_router_fib_abort(mlxsw_sp);
5643 		break;
5644 	}
5645 	rtnl_unlock();
5646 	kfree(fib_work);
5647 }
5648 
5649 static void mlxsw_sp_router_fibmr_event_work(struct work_struct *work)
5650 {
5651 	struct mlxsw_sp_fib_event_work *fib_work =
5652 		container_of(work, struct mlxsw_sp_fib_event_work, work);
5653 	struct mlxsw_sp *mlxsw_sp = fib_work->mlxsw_sp;
5654 	bool replace;
5655 	int err;
5656 
5657 	rtnl_lock();
5658 	switch (fib_work->event) {
5659 	case FIB_EVENT_ENTRY_REPLACE: /* fall through */
5660 	case FIB_EVENT_ENTRY_ADD:
5661 		replace = fib_work->event == FIB_EVENT_ENTRY_REPLACE;
5662 
5663 		err = mlxsw_sp_router_fibmr_add(mlxsw_sp, &fib_work->men_info,
5664 						replace);
5665 		if (err)
5666 			mlxsw_sp_router_fib_abort(mlxsw_sp);
5667 		ipmr_cache_put(fib_work->men_info.mfc);
5668 		break;
5669 	case FIB_EVENT_ENTRY_DEL:
5670 		mlxsw_sp_router_fibmr_del(mlxsw_sp, &fib_work->men_info);
5671 		ipmr_cache_put(fib_work->men_info.mfc);
5672 		break;
5673 	case FIB_EVENT_VIF_ADD:
5674 		err = mlxsw_sp_router_fibmr_vif_add(mlxsw_sp,
5675 						    &fib_work->ven_info);
5676 		if (err)
5677 			mlxsw_sp_router_fib_abort(mlxsw_sp);
5678 		dev_put(fib_work->ven_info.dev);
5679 		break;
5680 	case FIB_EVENT_VIF_DEL:
5681 		mlxsw_sp_router_fibmr_vif_del(mlxsw_sp,
5682 					      &fib_work->ven_info);
5683 		dev_put(fib_work->ven_info.dev);
5684 		break;
5685 	case FIB_EVENT_RULE_ADD:
5686 		/* if we get here, a rule was added that we do not support.
5687 		 * just do the fib_abort
5688 		 */
5689 		mlxsw_sp_router_fib_abort(mlxsw_sp);
5690 		break;
5691 	}
5692 	rtnl_unlock();
5693 	kfree(fib_work);
5694 }
5695 
5696 static void mlxsw_sp_router_fib4_event(struct mlxsw_sp_fib_event_work *fib_work,
5697 				       struct fib_notifier_info *info)
5698 {
5699 	struct fib_entry_notifier_info *fen_info;
5700 	struct fib_nh_notifier_info *fnh_info;
5701 
5702 	switch (fib_work->event) {
5703 	case FIB_EVENT_ENTRY_REPLACE: /* fall through */
5704 	case FIB_EVENT_ENTRY_APPEND: /* fall through */
5705 	case FIB_EVENT_ENTRY_ADD: /* fall through */
5706 	case FIB_EVENT_ENTRY_DEL:
5707 		fen_info = container_of(info, struct fib_entry_notifier_info,
5708 					info);
5709 		fib_work->fen_info = *fen_info;
5710 		/* Take reference on fib_info to prevent it from being
5711 		 * freed while work is queued. Release it afterwards.
5712 		 */
5713 		fib_info_hold(fib_work->fen_info.fi);
5714 		break;
5715 	case FIB_EVENT_NH_ADD: /* fall through */
5716 	case FIB_EVENT_NH_DEL:
5717 		fnh_info = container_of(info, struct fib_nh_notifier_info,
5718 					info);
5719 		fib_work->fnh_info = *fnh_info;
5720 		fib_info_hold(fib_work->fnh_info.fib_nh->nh_parent);
5721 		break;
5722 	}
5723 }
5724 
5725 static void mlxsw_sp_router_fib6_event(struct mlxsw_sp_fib_event_work *fib_work,
5726 				       struct fib_notifier_info *info)
5727 {
5728 	struct fib6_entry_notifier_info *fen6_info;
5729 
5730 	switch (fib_work->event) {
5731 	case FIB_EVENT_ENTRY_REPLACE: /* fall through */
5732 	case FIB_EVENT_ENTRY_ADD: /* fall through */
5733 	case FIB_EVENT_ENTRY_DEL:
5734 		fen6_info = container_of(info, struct fib6_entry_notifier_info,
5735 					 info);
5736 		fib_work->fen6_info = *fen6_info;
5737 		rt6_hold(fib_work->fen6_info.rt);
5738 		break;
5739 	}
5740 }
5741 
5742 static void
5743 mlxsw_sp_router_fibmr_event(struct mlxsw_sp_fib_event_work *fib_work,
5744 			    struct fib_notifier_info *info)
5745 {
5746 	switch (fib_work->event) {
5747 	case FIB_EVENT_ENTRY_REPLACE: /* fall through */
5748 	case FIB_EVENT_ENTRY_ADD: /* fall through */
5749 	case FIB_EVENT_ENTRY_DEL:
5750 		memcpy(&fib_work->men_info, info, sizeof(fib_work->men_info));
5751 		ipmr_cache_hold(fib_work->men_info.mfc);
5752 		break;
5753 	case FIB_EVENT_VIF_ADD: /* fall through */
5754 	case FIB_EVENT_VIF_DEL:
5755 		memcpy(&fib_work->ven_info, info, sizeof(fib_work->ven_info));
5756 		dev_hold(fib_work->ven_info.dev);
5757 		break;
5758 	}
5759 }
5760 
5761 static int mlxsw_sp_router_fib_rule_event(unsigned long event,
5762 					  struct fib_notifier_info *info,
5763 					  struct mlxsw_sp *mlxsw_sp)
5764 {
5765 	struct netlink_ext_ack *extack = info->extack;
5766 	struct fib_rule_notifier_info *fr_info;
5767 	struct fib_rule *rule;
5768 	int err = 0;
5769 
5770 	/* nothing to do at the moment */
5771 	if (event == FIB_EVENT_RULE_DEL)
5772 		return 0;
5773 
5774 	if (mlxsw_sp->router->aborted)
5775 		return 0;
5776 
5777 	fr_info = container_of(info, struct fib_rule_notifier_info, info);
5778 	rule = fr_info->rule;
5779 
5780 	switch (info->family) {
5781 	case AF_INET:
5782 		if (!fib4_rule_default(rule) && !rule->l3mdev)
5783 			err = -1;
5784 		break;
5785 	case AF_INET6:
5786 		if (!fib6_rule_default(rule) && !rule->l3mdev)
5787 			err = -1;
5788 		break;
5789 	case RTNL_FAMILY_IPMR:
5790 		if (!ipmr_rule_default(rule) && !rule->l3mdev)
5791 			err = -1;
5792 		break;
5793 	}
5794 
5795 	if (err < 0)
5796 		NL_SET_ERR_MSG(extack, "spectrum: FIB rules not supported. Aborting offload");
5797 
5798 	return err;
5799 }
5800 
5801 /* Called with rcu_read_lock() */
5802 static int mlxsw_sp_router_fib_event(struct notifier_block *nb,
5803 				     unsigned long event, void *ptr)
5804 {
5805 	struct mlxsw_sp_fib_event_work *fib_work;
5806 	struct fib_notifier_info *info = ptr;
5807 	struct mlxsw_sp_router *router;
5808 	int err;
5809 
5810 	if (!net_eq(info->net, &init_net) ||
5811 	    (info->family != AF_INET && info->family != AF_INET6 &&
5812 	     info->family != RTNL_FAMILY_IPMR))
5813 		return NOTIFY_DONE;
5814 
5815 	router = container_of(nb, struct mlxsw_sp_router, fib_nb);
5816 
5817 	switch (event) {
5818 	case FIB_EVENT_RULE_ADD: /* fall through */
5819 	case FIB_EVENT_RULE_DEL:
5820 		err = mlxsw_sp_router_fib_rule_event(event, info,
5821 						     router->mlxsw_sp);
5822 		if (!err)
5823 			return NOTIFY_DONE;
5824 	}
5825 
5826 	fib_work = kzalloc(sizeof(*fib_work), GFP_ATOMIC);
5827 	if (WARN_ON(!fib_work))
5828 		return NOTIFY_BAD;
5829 
5830 	fib_work->mlxsw_sp = router->mlxsw_sp;
5831 	fib_work->event = event;
5832 
5833 	switch (info->family) {
5834 	case AF_INET:
5835 		INIT_WORK(&fib_work->work, mlxsw_sp_router_fib4_event_work);
5836 		mlxsw_sp_router_fib4_event(fib_work, info);
5837 		break;
5838 	case AF_INET6:
5839 		INIT_WORK(&fib_work->work, mlxsw_sp_router_fib6_event_work);
5840 		mlxsw_sp_router_fib6_event(fib_work, info);
5841 		break;
5842 	case RTNL_FAMILY_IPMR:
5843 		INIT_WORK(&fib_work->work, mlxsw_sp_router_fibmr_event_work);
5844 		mlxsw_sp_router_fibmr_event(fib_work, info);
5845 		break;
5846 	}
5847 
5848 	mlxsw_core_schedule_work(&fib_work->work);
5849 
5850 	return NOTIFY_DONE;
5851 }
5852 
5853 static struct mlxsw_sp_rif *
5854 mlxsw_sp_rif_find_by_dev(const struct mlxsw_sp *mlxsw_sp,
5855 			 const struct net_device *dev)
5856 {
5857 	int i;
5858 
5859 	for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_RIFS); i++)
5860 		if (mlxsw_sp->router->rifs[i] &&
5861 		    mlxsw_sp->router->rifs[i]->dev == dev)
5862 			return mlxsw_sp->router->rifs[i];
5863 
5864 	return NULL;
5865 }
5866 
5867 static int mlxsw_sp_router_rif_disable(struct mlxsw_sp *mlxsw_sp, u16 rif)
5868 {
5869 	char ritr_pl[MLXSW_REG_RITR_LEN];
5870 	int err;
5871 
5872 	mlxsw_reg_ritr_rif_pack(ritr_pl, rif);
5873 	err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
5874 	if (WARN_ON_ONCE(err))
5875 		return err;
5876 
5877 	mlxsw_reg_ritr_enable_set(ritr_pl, false);
5878 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
5879 }
5880 
5881 static void mlxsw_sp_router_rif_gone_sync(struct mlxsw_sp *mlxsw_sp,
5882 					  struct mlxsw_sp_rif *rif)
5883 {
5884 	mlxsw_sp_router_rif_disable(mlxsw_sp, rif->rif_index);
5885 	mlxsw_sp_nexthop_rif_gone_sync(mlxsw_sp, rif);
5886 	mlxsw_sp_neigh_rif_gone_sync(mlxsw_sp, rif);
5887 }
5888 
5889 static bool
5890 mlxsw_sp_rif_should_config(struct mlxsw_sp_rif *rif, struct net_device *dev,
5891 			   unsigned long event)
5892 {
5893 	struct inet6_dev *inet6_dev;
5894 	bool addr_list_empty = true;
5895 	struct in_device *idev;
5896 
5897 	switch (event) {
5898 	case NETDEV_UP:
5899 		return rif == NULL;
5900 	case NETDEV_DOWN:
5901 		idev = __in_dev_get_rtnl(dev);
5902 		if (idev && idev->ifa_list)
5903 			addr_list_empty = false;
5904 
5905 		inet6_dev = __in6_dev_get(dev);
5906 		if (addr_list_empty && inet6_dev &&
5907 		    !list_empty(&inet6_dev->addr_list))
5908 			addr_list_empty = false;
5909 
5910 		if (rif && addr_list_empty &&
5911 		    !netif_is_l3_slave(rif->dev))
5912 			return true;
5913 		/* It is possible we already removed the RIF ourselves
5914 		 * if it was assigned to a netdev that is now a bridge
5915 		 * or LAG slave.
5916 		 */
5917 		return false;
5918 	}
5919 
5920 	return false;
5921 }
5922 
5923 static enum mlxsw_sp_rif_type
5924 mlxsw_sp_dev_rif_type(const struct mlxsw_sp *mlxsw_sp,
5925 		      const struct net_device *dev)
5926 {
5927 	enum mlxsw_sp_fid_type type;
5928 
5929 	if (mlxsw_sp_netdev_ipip_type(mlxsw_sp, dev, NULL))
5930 		return MLXSW_SP_RIF_TYPE_IPIP_LB;
5931 
5932 	/* Otherwise RIF type is derived from the type of the underlying FID. */
5933 	if (is_vlan_dev(dev) && netif_is_bridge_master(vlan_dev_real_dev(dev)))
5934 		type = MLXSW_SP_FID_TYPE_8021Q;
5935 	else if (netif_is_bridge_master(dev) && br_vlan_enabled(dev))
5936 		type = MLXSW_SP_FID_TYPE_8021Q;
5937 	else if (netif_is_bridge_master(dev))
5938 		type = MLXSW_SP_FID_TYPE_8021D;
5939 	else
5940 		type = MLXSW_SP_FID_TYPE_RFID;
5941 
5942 	return mlxsw_sp_fid_type_rif_type(mlxsw_sp, type);
5943 }
5944 
5945 static int mlxsw_sp_rif_index_alloc(struct mlxsw_sp *mlxsw_sp, u16 *p_rif_index)
5946 {
5947 	int i;
5948 
5949 	for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_RIFS); i++) {
5950 		if (!mlxsw_sp->router->rifs[i]) {
5951 			*p_rif_index = i;
5952 			return 0;
5953 		}
5954 	}
5955 
5956 	return -ENOBUFS;
5957 }
5958 
5959 static struct mlxsw_sp_rif *mlxsw_sp_rif_alloc(size_t rif_size, u16 rif_index,
5960 					       u16 vr_id,
5961 					       struct net_device *l3_dev)
5962 {
5963 	struct mlxsw_sp_rif *rif;
5964 
5965 	rif = kzalloc(rif_size, GFP_KERNEL);
5966 	if (!rif)
5967 		return NULL;
5968 
5969 	INIT_LIST_HEAD(&rif->nexthop_list);
5970 	INIT_LIST_HEAD(&rif->neigh_list);
5971 	ether_addr_copy(rif->addr, l3_dev->dev_addr);
5972 	rif->mtu = l3_dev->mtu;
5973 	rif->vr_id = vr_id;
5974 	rif->dev = l3_dev;
5975 	rif->rif_index = rif_index;
5976 
5977 	return rif;
5978 }
5979 
5980 struct mlxsw_sp_rif *mlxsw_sp_rif_by_index(const struct mlxsw_sp *mlxsw_sp,
5981 					   u16 rif_index)
5982 {
5983 	return mlxsw_sp->router->rifs[rif_index];
5984 }
5985 
5986 u16 mlxsw_sp_rif_index(const struct mlxsw_sp_rif *rif)
5987 {
5988 	return rif->rif_index;
5989 }
5990 
5991 u16 mlxsw_sp_ipip_lb_rif_index(const struct mlxsw_sp_rif_ipip_lb *lb_rif)
5992 {
5993 	return lb_rif->common.rif_index;
5994 }
5995 
5996 u16 mlxsw_sp_ipip_lb_ul_vr_id(const struct mlxsw_sp_rif_ipip_lb *lb_rif)
5997 {
5998 	return lb_rif->ul_vr_id;
5999 }
6000 
6001 int mlxsw_sp_rif_dev_ifindex(const struct mlxsw_sp_rif *rif)
6002 {
6003 	return rif->dev->ifindex;
6004 }
6005 
6006 const struct net_device *mlxsw_sp_rif_dev(const struct mlxsw_sp_rif *rif)
6007 {
6008 	return rif->dev;
6009 }
6010 
6011 static struct mlxsw_sp_rif *
6012 mlxsw_sp_rif_create(struct mlxsw_sp *mlxsw_sp,
6013 		    const struct mlxsw_sp_rif_params *params,
6014 		    struct netlink_ext_ack *extack)
6015 {
6016 	u32 tb_id = l3mdev_fib_table(params->dev);
6017 	const struct mlxsw_sp_rif_ops *ops;
6018 	struct mlxsw_sp_fid *fid = NULL;
6019 	enum mlxsw_sp_rif_type type;
6020 	struct mlxsw_sp_rif *rif;
6021 	struct mlxsw_sp_vr *vr;
6022 	u16 rif_index;
6023 	int err;
6024 
6025 	type = mlxsw_sp_dev_rif_type(mlxsw_sp, params->dev);
6026 	ops = mlxsw_sp->router->rif_ops_arr[type];
6027 
6028 	vr = mlxsw_sp_vr_get(mlxsw_sp, tb_id ? : RT_TABLE_MAIN, extack);
6029 	if (IS_ERR(vr))
6030 		return ERR_CAST(vr);
6031 	vr->rif_count++;
6032 
6033 	err = mlxsw_sp_rif_index_alloc(mlxsw_sp, &rif_index);
6034 	if (err) {
6035 		NL_SET_ERR_MSG(extack, "spectrum: Exceeded number of supported router interfaces");
6036 		goto err_rif_index_alloc;
6037 	}
6038 
6039 	rif = mlxsw_sp_rif_alloc(ops->rif_size, rif_index, vr->id, params->dev);
6040 	if (!rif) {
6041 		err = -ENOMEM;
6042 		goto err_rif_alloc;
6043 	}
6044 	rif->mlxsw_sp = mlxsw_sp;
6045 	rif->ops = ops;
6046 
6047 	if (ops->fid_get) {
6048 		fid = ops->fid_get(rif);
6049 		if (IS_ERR(fid)) {
6050 			err = PTR_ERR(fid);
6051 			goto err_fid_get;
6052 		}
6053 		rif->fid = fid;
6054 	}
6055 
6056 	if (ops->setup)
6057 		ops->setup(rif, params);
6058 
6059 	err = ops->configure(rif);
6060 	if (err)
6061 		goto err_configure;
6062 
6063 	err = mlxsw_sp_mr_rif_add(vr->mr4_table, rif);
6064 	if (err)
6065 		goto err_mr_rif_add;
6066 
6067 	mlxsw_sp_rif_counters_alloc(rif);
6068 	mlxsw_sp->router->rifs[rif_index] = rif;
6069 
6070 	return rif;
6071 
6072 err_mr_rif_add:
6073 	ops->deconfigure(rif);
6074 err_configure:
6075 	if (fid)
6076 		mlxsw_sp_fid_put(fid);
6077 err_fid_get:
6078 	kfree(rif);
6079 err_rif_alloc:
6080 err_rif_index_alloc:
6081 	vr->rif_count--;
6082 	mlxsw_sp_vr_put(mlxsw_sp, vr);
6083 	return ERR_PTR(err);
6084 }
6085 
6086 void mlxsw_sp_rif_destroy(struct mlxsw_sp_rif *rif)
6087 {
6088 	const struct mlxsw_sp_rif_ops *ops = rif->ops;
6089 	struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
6090 	struct mlxsw_sp_fid *fid = rif->fid;
6091 	struct mlxsw_sp_vr *vr;
6092 
6093 	mlxsw_sp_router_rif_gone_sync(mlxsw_sp, rif);
6094 	vr = &mlxsw_sp->router->vrs[rif->vr_id];
6095 
6096 	mlxsw_sp->router->rifs[rif->rif_index] = NULL;
6097 	mlxsw_sp_rif_counters_free(rif);
6098 	mlxsw_sp_mr_rif_del(vr->mr4_table, rif);
6099 	ops->deconfigure(rif);
6100 	if (fid)
6101 		/* Loopback RIFs are not associated with a FID. */
6102 		mlxsw_sp_fid_put(fid);
6103 	kfree(rif);
6104 	vr->rif_count--;
6105 	mlxsw_sp_vr_put(mlxsw_sp, vr);
6106 }
6107 
6108 static void
6109 mlxsw_sp_rif_subport_params_init(struct mlxsw_sp_rif_params *params,
6110 				 struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan)
6111 {
6112 	struct mlxsw_sp_port *mlxsw_sp_port = mlxsw_sp_port_vlan->mlxsw_sp_port;
6113 
6114 	params->vid = mlxsw_sp_port_vlan->vid;
6115 	params->lag = mlxsw_sp_port->lagged;
6116 	if (params->lag)
6117 		params->lag_id = mlxsw_sp_port->lag_id;
6118 	else
6119 		params->system_port = mlxsw_sp_port->local_port;
6120 }
6121 
6122 static int
6123 mlxsw_sp_port_vlan_router_join(struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan,
6124 			       struct net_device *l3_dev,
6125 			       struct netlink_ext_ack *extack)
6126 {
6127 	struct mlxsw_sp_port *mlxsw_sp_port = mlxsw_sp_port_vlan->mlxsw_sp_port;
6128 	struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
6129 	u16 vid = mlxsw_sp_port_vlan->vid;
6130 	struct mlxsw_sp_rif *rif;
6131 	struct mlxsw_sp_fid *fid;
6132 	int err;
6133 
6134 	rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, l3_dev);
6135 	if (!rif) {
6136 		struct mlxsw_sp_rif_params params = {
6137 			.dev = l3_dev,
6138 		};
6139 
6140 		mlxsw_sp_rif_subport_params_init(&params, mlxsw_sp_port_vlan);
6141 		rif = mlxsw_sp_rif_create(mlxsw_sp, &params, extack);
6142 		if (IS_ERR(rif))
6143 			return PTR_ERR(rif);
6144 	}
6145 
6146 	/* FID was already created, just take a reference */
6147 	fid = rif->ops->fid_get(rif);
6148 	err = mlxsw_sp_fid_port_vid_map(fid, mlxsw_sp_port, vid);
6149 	if (err)
6150 		goto err_fid_port_vid_map;
6151 
6152 	err = mlxsw_sp_port_vid_learning_set(mlxsw_sp_port, vid, false);
6153 	if (err)
6154 		goto err_port_vid_learning_set;
6155 
6156 	err = mlxsw_sp_port_vid_stp_set(mlxsw_sp_port, vid,
6157 					BR_STATE_FORWARDING);
6158 	if (err)
6159 		goto err_port_vid_stp_set;
6160 
6161 	mlxsw_sp_port_vlan->fid = fid;
6162 
6163 	return 0;
6164 
6165 err_port_vid_stp_set:
6166 	mlxsw_sp_port_vid_learning_set(mlxsw_sp_port, vid, true);
6167 err_port_vid_learning_set:
6168 	mlxsw_sp_fid_port_vid_unmap(fid, mlxsw_sp_port, vid);
6169 err_fid_port_vid_map:
6170 	mlxsw_sp_fid_put(fid);
6171 	return err;
6172 }
6173 
6174 void
6175 mlxsw_sp_port_vlan_router_leave(struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan)
6176 {
6177 	struct mlxsw_sp_port *mlxsw_sp_port = mlxsw_sp_port_vlan->mlxsw_sp_port;
6178 	struct mlxsw_sp_fid *fid = mlxsw_sp_port_vlan->fid;
6179 	u16 vid = mlxsw_sp_port_vlan->vid;
6180 
6181 	if (WARN_ON(mlxsw_sp_fid_type(fid) != MLXSW_SP_FID_TYPE_RFID))
6182 		return;
6183 
6184 	mlxsw_sp_port_vlan->fid = NULL;
6185 	mlxsw_sp_port_vid_stp_set(mlxsw_sp_port, vid, BR_STATE_BLOCKING);
6186 	mlxsw_sp_port_vid_learning_set(mlxsw_sp_port, vid, true);
6187 	mlxsw_sp_fid_port_vid_unmap(fid, mlxsw_sp_port, vid);
6188 	/* If router port holds the last reference on the rFID, then the
6189 	 * associated Sub-port RIF will be destroyed.
6190 	 */
6191 	mlxsw_sp_fid_put(fid);
6192 }
6193 
6194 static int mlxsw_sp_inetaddr_port_vlan_event(struct net_device *l3_dev,
6195 					     struct net_device *port_dev,
6196 					     unsigned long event, u16 vid,
6197 					     struct netlink_ext_ack *extack)
6198 {
6199 	struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(port_dev);
6200 	struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan;
6201 
6202 	mlxsw_sp_port_vlan = mlxsw_sp_port_vlan_find_by_vid(mlxsw_sp_port, vid);
6203 	if (WARN_ON(!mlxsw_sp_port_vlan))
6204 		return -EINVAL;
6205 
6206 	switch (event) {
6207 	case NETDEV_UP:
6208 		return mlxsw_sp_port_vlan_router_join(mlxsw_sp_port_vlan,
6209 						      l3_dev, extack);
6210 	case NETDEV_DOWN:
6211 		mlxsw_sp_port_vlan_router_leave(mlxsw_sp_port_vlan);
6212 		break;
6213 	}
6214 
6215 	return 0;
6216 }
6217 
6218 static int mlxsw_sp_inetaddr_port_event(struct net_device *port_dev,
6219 					unsigned long event,
6220 					struct netlink_ext_ack *extack)
6221 {
6222 	if (netif_is_bridge_port(port_dev) ||
6223 	    netif_is_lag_port(port_dev) ||
6224 	    netif_is_ovs_port(port_dev))
6225 		return 0;
6226 
6227 	return mlxsw_sp_inetaddr_port_vlan_event(port_dev, port_dev, event, 1,
6228 						 extack);
6229 }
6230 
6231 static int __mlxsw_sp_inetaddr_lag_event(struct net_device *l3_dev,
6232 					 struct net_device *lag_dev,
6233 					 unsigned long event, u16 vid,
6234 					 struct netlink_ext_ack *extack)
6235 {
6236 	struct net_device *port_dev;
6237 	struct list_head *iter;
6238 	int err;
6239 
6240 	netdev_for_each_lower_dev(lag_dev, port_dev, iter) {
6241 		if (mlxsw_sp_port_dev_check(port_dev)) {
6242 			err = mlxsw_sp_inetaddr_port_vlan_event(l3_dev,
6243 								port_dev,
6244 								event, vid,
6245 								extack);
6246 			if (err)
6247 				return err;
6248 		}
6249 	}
6250 
6251 	return 0;
6252 }
6253 
6254 static int mlxsw_sp_inetaddr_lag_event(struct net_device *lag_dev,
6255 				       unsigned long event,
6256 				       struct netlink_ext_ack *extack)
6257 {
6258 	if (netif_is_bridge_port(lag_dev))
6259 		return 0;
6260 
6261 	return __mlxsw_sp_inetaddr_lag_event(lag_dev, lag_dev, event, 1,
6262 					     extack);
6263 }
6264 
6265 static int mlxsw_sp_inetaddr_bridge_event(struct net_device *l3_dev,
6266 					  unsigned long event,
6267 					  struct netlink_ext_ack *extack)
6268 {
6269 	struct mlxsw_sp *mlxsw_sp = mlxsw_sp_lower_get(l3_dev);
6270 	struct mlxsw_sp_rif_params params = {
6271 		.dev = l3_dev,
6272 	};
6273 	struct mlxsw_sp_rif *rif;
6274 
6275 	switch (event) {
6276 	case NETDEV_UP:
6277 		rif = mlxsw_sp_rif_create(mlxsw_sp, &params, extack);
6278 		if (IS_ERR(rif))
6279 			return PTR_ERR(rif);
6280 		break;
6281 	case NETDEV_DOWN:
6282 		rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, l3_dev);
6283 		mlxsw_sp_rif_destroy(rif);
6284 		break;
6285 	}
6286 
6287 	return 0;
6288 }
6289 
6290 static int mlxsw_sp_inetaddr_vlan_event(struct net_device *vlan_dev,
6291 					unsigned long event,
6292 					struct netlink_ext_ack *extack)
6293 {
6294 	struct net_device *real_dev = vlan_dev_real_dev(vlan_dev);
6295 	u16 vid = vlan_dev_vlan_id(vlan_dev);
6296 
6297 	if (netif_is_bridge_port(vlan_dev))
6298 		return 0;
6299 
6300 	if (mlxsw_sp_port_dev_check(real_dev))
6301 		return mlxsw_sp_inetaddr_port_vlan_event(vlan_dev, real_dev,
6302 							 event, vid, extack);
6303 	else if (netif_is_lag_master(real_dev))
6304 		return __mlxsw_sp_inetaddr_lag_event(vlan_dev, real_dev, event,
6305 						     vid, extack);
6306 	else if (netif_is_bridge_master(real_dev) && br_vlan_enabled(real_dev))
6307 		return mlxsw_sp_inetaddr_bridge_event(vlan_dev, event, extack);
6308 
6309 	return 0;
6310 }
6311 
6312 static int __mlxsw_sp_inetaddr_event(struct net_device *dev,
6313 				     unsigned long event,
6314 				     struct netlink_ext_ack *extack)
6315 {
6316 	if (mlxsw_sp_port_dev_check(dev))
6317 		return mlxsw_sp_inetaddr_port_event(dev, event, extack);
6318 	else if (netif_is_lag_master(dev))
6319 		return mlxsw_sp_inetaddr_lag_event(dev, event, extack);
6320 	else if (netif_is_bridge_master(dev))
6321 		return mlxsw_sp_inetaddr_bridge_event(dev, event, extack);
6322 	else if (is_vlan_dev(dev))
6323 		return mlxsw_sp_inetaddr_vlan_event(dev, event, extack);
6324 	else
6325 		return 0;
6326 }
6327 
6328 int mlxsw_sp_inetaddr_event(struct notifier_block *unused,
6329 			    unsigned long event, void *ptr)
6330 {
6331 	struct in_ifaddr *ifa = (struct in_ifaddr *) ptr;
6332 	struct net_device *dev = ifa->ifa_dev->dev;
6333 	struct mlxsw_sp *mlxsw_sp;
6334 	struct mlxsw_sp_rif *rif;
6335 	int err = 0;
6336 
6337 	/* NETDEV_UP event is handled by mlxsw_sp_inetaddr_valid_event */
6338 	if (event == NETDEV_UP)
6339 		goto out;
6340 
6341 	mlxsw_sp = mlxsw_sp_lower_get(dev);
6342 	if (!mlxsw_sp)
6343 		goto out;
6344 
6345 	rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev);
6346 	if (!mlxsw_sp_rif_should_config(rif, dev, event))
6347 		goto out;
6348 
6349 	err = __mlxsw_sp_inetaddr_event(dev, event, NULL);
6350 out:
6351 	return notifier_from_errno(err);
6352 }
6353 
6354 int mlxsw_sp_inetaddr_valid_event(struct notifier_block *unused,
6355 				  unsigned long event, void *ptr)
6356 {
6357 	struct in_validator_info *ivi = (struct in_validator_info *) ptr;
6358 	struct net_device *dev = ivi->ivi_dev->dev;
6359 	struct mlxsw_sp *mlxsw_sp;
6360 	struct mlxsw_sp_rif *rif;
6361 	int err = 0;
6362 
6363 	mlxsw_sp = mlxsw_sp_lower_get(dev);
6364 	if (!mlxsw_sp)
6365 		goto out;
6366 
6367 	rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev);
6368 	if (!mlxsw_sp_rif_should_config(rif, dev, event))
6369 		goto out;
6370 
6371 	err = __mlxsw_sp_inetaddr_event(dev, event, ivi->extack);
6372 out:
6373 	return notifier_from_errno(err);
6374 }
6375 
6376 struct mlxsw_sp_inet6addr_event_work {
6377 	struct work_struct work;
6378 	struct net_device *dev;
6379 	unsigned long event;
6380 };
6381 
6382 static void mlxsw_sp_inet6addr_event_work(struct work_struct *work)
6383 {
6384 	struct mlxsw_sp_inet6addr_event_work *inet6addr_work =
6385 		container_of(work, struct mlxsw_sp_inet6addr_event_work, work);
6386 	struct net_device *dev = inet6addr_work->dev;
6387 	unsigned long event = inet6addr_work->event;
6388 	struct mlxsw_sp *mlxsw_sp;
6389 	struct mlxsw_sp_rif *rif;
6390 
6391 	rtnl_lock();
6392 	mlxsw_sp = mlxsw_sp_lower_get(dev);
6393 	if (!mlxsw_sp)
6394 		goto out;
6395 
6396 	rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev);
6397 	if (!mlxsw_sp_rif_should_config(rif, dev, event))
6398 		goto out;
6399 
6400 	__mlxsw_sp_inetaddr_event(dev, event, NULL);
6401 out:
6402 	rtnl_unlock();
6403 	dev_put(dev);
6404 	kfree(inet6addr_work);
6405 }
6406 
6407 /* Called with rcu_read_lock() */
6408 int mlxsw_sp_inet6addr_event(struct notifier_block *unused,
6409 			     unsigned long event, void *ptr)
6410 {
6411 	struct inet6_ifaddr *if6 = (struct inet6_ifaddr *) ptr;
6412 	struct mlxsw_sp_inet6addr_event_work *inet6addr_work;
6413 	struct net_device *dev = if6->idev->dev;
6414 
6415 	/* NETDEV_UP event is handled by mlxsw_sp_inet6addr_valid_event */
6416 	if (event == NETDEV_UP)
6417 		return NOTIFY_DONE;
6418 
6419 	if (!mlxsw_sp_port_dev_lower_find_rcu(dev))
6420 		return NOTIFY_DONE;
6421 
6422 	inet6addr_work = kzalloc(sizeof(*inet6addr_work), GFP_ATOMIC);
6423 	if (!inet6addr_work)
6424 		return NOTIFY_BAD;
6425 
6426 	INIT_WORK(&inet6addr_work->work, mlxsw_sp_inet6addr_event_work);
6427 	inet6addr_work->dev = dev;
6428 	inet6addr_work->event = event;
6429 	dev_hold(dev);
6430 	mlxsw_core_schedule_work(&inet6addr_work->work);
6431 
6432 	return NOTIFY_DONE;
6433 }
6434 
6435 int mlxsw_sp_inet6addr_valid_event(struct notifier_block *unused,
6436 				   unsigned long event, void *ptr)
6437 {
6438 	struct in6_validator_info *i6vi = (struct in6_validator_info *) ptr;
6439 	struct net_device *dev = i6vi->i6vi_dev->dev;
6440 	struct mlxsw_sp *mlxsw_sp;
6441 	struct mlxsw_sp_rif *rif;
6442 	int err = 0;
6443 
6444 	mlxsw_sp = mlxsw_sp_lower_get(dev);
6445 	if (!mlxsw_sp)
6446 		goto out;
6447 
6448 	rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev);
6449 	if (!mlxsw_sp_rif_should_config(rif, dev, event))
6450 		goto out;
6451 
6452 	err = __mlxsw_sp_inetaddr_event(dev, event, i6vi->extack);
6453 out:
6454 	return notifier_from_errno(err);
6455 }
6456 
6457 static int mlxsw_sp_rif_edit(struct mlxsw_sp *mlxsw_sp, u16 rif_index,
6458 			     const char *mac, int mtu)
6459 {
6460 	char ritr_pl[MLXSW_REG_RITR_LEN];
6461 	int err;
6462 
6463 	mlxsw_reg_ritr_rif_pack(ritr_pl, rif_index);
6464 	err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
6465 	if (err)
6466 		return err;
6467 
6468 	mlxsw_reg_ritr_mtu_set(ritr_pl, mtu);
6469 	mlxsw_reg_ritr_if_mac_memcpy_to(ritr_pl, mac);
6470 	mlxsw_reg_ritr_op_set(ritr_pl, MLXSW_REG_RITR_RIF_CREATE);
6471 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
6472 }
6473 
6474 int mlxsw_sp_netdevice_router_port_event(struct net_device *dev)
6475 {
6476 	struct mlxsw_sp *mlxsw_sp;
6477 	struct mlxsw_sp_rif *rif;
6478 	u16 fid_index;
6479 	int err;
6480 
6481 	mlxsw_sp = mlxsw_sp_lower_get(dev);
6482 	if (!mlxsw_sp)
6483 		return 0;
6484 
6485 	rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev);
6486 	if (!rif)
6487 		return 0;
6488 	fid_index = mlxsw_sp_fid_index(rif->fid);
6489 
6490 	err = mlxsw_sp_rif_fdb_op(mlxsw_sp, rif->addr, fid_index, false);
6491 	if (err)
6492 		return err;
6493 
6494 	err = mlxsw_sp_rif_edit(mlxsw_sp, rif->rif_index, dev->dev_addr,
6495 				dev->mtu);
6496 	if (err)
6497 		goto err_rif_edit;
6498 
6499 	err = mlxsw_sp_rif_fdb_op(mlxsw_sp, dev->dev_addr, fid_index, true);
6500 	if (err)
6501 		goto err_rif_fdb_op;
6502 
6503 	if (rif->mtu != dev->mtu) {
6504 		struct mlxsw_sp_vr *vr;
6505 
6506 		/* The RIF is relevant only to its mr_table instance, as unlike
6507 		 * unicast routing, in multicast routing a RIF cannot be shared
6508 		 * between several multicast routing tables.
6509 		 */
6510 		vr = &mlxsw_sp->router->vrs[rif->vr_id];
6511 		mlxsw_sp_mr_rif_mtu_update(vr->mr4_table, rif, dev->mtu);
6512 	}
6513 
6514 	ether_addr_copy(rif->addr, dev->dev_addr);
6515 	rif->mtu = dev->mtu;
6516 
6517 	netdev_dbg(dev, "Updated RIF=%d\n", rif->rif_index);
6518 
6519 	return 0;
6520 
6521 err_rif_fdb_op:
6522 	mlxsw_sp_rif_edit(mlxsw_sp, rif->rif_index, rif->addr, rif->mtu);
6523 err_rif_edit:
6524 	mlxsw_sp_rif_fdb_op(mlxsw_sp, rif->addr, fid_index, true);
6525 	return err;
6526 }
6527 
6528 static int mlxsw_sp_port_vrf_join(struct mlxsw_sp *mlxsw_sp,
6529 				  struct net_device *l3_dev,
6530 				  struct netlink_ext_ack *extack)
6531 {
6532 	struct mlxsw_sp_rif *rif;
6533 
6534 	/* If netdev is already associated with a RIF, then we need to
6535 	 * destroy it and create a new one with the new virtual router ID.
6536 	 */
6537 	rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, l3_dev);
6538 	if (rif)
6539 		__mlxsw_sp_inetaddr_event(l3_dev, NETDEV_DOWN, extack);
6540 
6541 	return __mlxsw_sp_inetaddr_event(l3_dev, NETDEV_UP, extack);
6542 }
6543 
6544 static void mlxsw_sp_port_vrf_leave(struct mlxsw_sp *mlxsw_sp,
6545 				    struct net_device *l3_dev)
6546 {
6547 	struct mlxsw_sp_rif *rif;
6548 
6549 	rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, l3_dev);
6550 	if (!rif)
6551 		return;
6552 	__mlxsw_sp_inetaddr_event(l3_dev, NETDEV_DOWN, NULL);
6553 }
6554 
6555 int mlxsw_sp_netdevice_vrf_event(struct net_device *l3_dev, unsigned long event,
6556 				 struct netdev_notifier_changeupper_info *info)
6557 {
6558 	struct mlxsw_sp *mlxsw_sp = mlxsw_sp_lower_get(l3_dev);
6559 	int err = 0;
6560 
6561 	if (!mlxsw_sp)
6562 		return 0;
6563 
6564 	switch (event) {
6565 	case NETDEV_PRECHANGEUPPER:
6566 		return 0;
6567 	case NETDEV_CHANGEUPPER:
6568 		if (info->linking) {
6569 			struct netlink_ext_ack *extack;
6570 
6571 			extack = netdev_notifier_info_to_extack(&info->info);
6572 			err = mlxsw_sp_port_vrf_join(mlxsw_sp, l3_dev, extack);
6573 		} else {
6574 			mlxsw_sp_port_vrf_leave(mlxsw_sp, l3_dev);
6575 		}
6576 		break;
6577 	}
6578 
6579 	return err;
6580 }
6581 
6582 static struct mlxsw_sp_rif_subport *
6583 mlxsw_sp_rif_subport_rif(const struct mlxsw_sp_rif *rif)
6584 {
6585 	return container_of(rif, struct mlxsw_sp_rif_subport, common);
6586 }
6587 
6588 static void mlxsw_sp_rif_subport_setup(struct mlxsw_sp_rif *rif,
6589 				       const struct mlxsw_sp_rif_params *params)
6590 {
6591 	struct mlxsw_sp_rif_subport *rif_subport;
6592 
6593 	rif_subport = mlxsw_sp_rif_subport_rif(rif);
6594 	rif_subport->vid = params->vid;
6595 	rif_subport->lag = params->lag;
6596 	if (params->lag)
6597 		rif_subport->lag_id = params->lag_id;
6598 	else
6599 		rif_subport->system_port = params->system_port;
6600 }
6601 
6602 static int mlxsw_sp_rif_subport_op(struct mlxsw_sp_rif *rif, bool enable)
6603 {
6604 	struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
6605 	struct mlxsw_sp_rif_subport *rif_subport;
6606 	char ritr_pl[MLXSW_REG_RITR_LEN];
6607 
6608 	rif_subport = mlxsw_sp_rif_subport_rif(rif);
6609 	mlxsw_reg_ritr_pack(ritr_pl, enable, MLXSW_REG_RITR_SP_IF,
6610 			    rif->rif_index, rif->vr_id, rif->dev->mtu);
6611 	mlxsw_reg_ritr_mac_pack(ritr_pl, rif->dev->dev_addr);
6612 	mlxsw_reg_ritr_sp_if_pack(ritr_pl, rif_subport->lag,
6613 				  rif_subport->lag ? rif_subport->lag_id :
6614 						     rif_subport->system_port,
6615 				  rif_subport->vid);
6616 
6617 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
6618 }
6619 
6620 static int mlxsw_sp_rif_subport_configure(struct mlxsw_sp_rif *rif)
6621 {
6622 	int err;
6623 
6624 	err = mlxsw_sp_rif_subport_op(rif, true);
6625 	if (err)
6626 		return err;
6627 
6628 	err = mlxsw_sp_rif_fdb_op(rif->mlxsw_sp, rif->dev->dev_addr,
6629 				  mlxsw_sp_fid_index(rif->fid), true);
6630 	if (err)
6631 		goto err_rif_fdb_op;
6632 
6633 	mlxsw_sp_fid_rif_set(rif->fid, rif);
6634 	return 0;
6635 
6636 err_rif_fdb_op:
6637 	mlxsw_sp_rif_subport_op(rif, false);
6638 	return err;
6639 }
6640 
6641 static void mlxsw_sp_rif_subport_deconfigure(struct mlxsw_sp_rif *rif)
6642 {
6643 	struct mlxsw_sp_fid *fid = rif->fid;
6644 
6645 	mlxsw_sp_fid_rif_set(fid, NULL);
6646 	mlxsw_sp_rif_fdb_op(rif->mlxsw_sp, rif->dev->dev_addr,
6647 			    mlxsw_sp_fid_index(fid), false);
6648 	mlxsw_sp_rif_subport_op(rif, false);
6649 }
6650 
6651 static struct mlxsw_sp_fid *
6652 mlxsw_sp_rif_subport_fid_get(struct mlxsw_sp_rif *rif)
6653 {
6654 	return mlxsw_sp_fid_rfid_get(rif->mlxsw_sp, rif->rif_index);
6655 }
6656 
6657 static const struct mlxsw_sp_rif_ops mlxsw_sp_rif_subport_ops = {
6658 	.type			= MLXSW_SP_RIF_TYPE_SUBPORT,
6659 	.rif_size		= sizeof(struct mlxsw_sp_rif_subport),
6660 	.setup			= mlxsw_sp_rif_subport_setup,
6661 	.configure		= mlxsw_sp_rif_subport_configure,
6662 	.deconfigure		= mlxsw_sp_rif_subport_deconfigure,
6663 	.fid_get		= mlxsw_sp_rif_subport_fid_get,
6664 };
6665 
6666 static int mlxsw_sp_rif_vlan_fid_op(struct mlxsw_sp_rif *rif,
6667 				    enum mlxsw_reg_ritr_if_type type,
6668 				    u16 vid_fid, bool enable)
6669 {
6670 	struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
6671 	char ritr_pl[MLXSW_REG_RITR_LEN];
6672 
6673 	mlxsw_reg_ritr_pack(ritr_pl, enable, type, rif->rif_index, rif->vr_id,
6674 			    rif->dev->mtu);
6675 	mlxsw_reg_ritr_mac_pack(ritr_pl, rif->dev->dev_addr);
6676 	mlxsw_reg_ritr_fid_set(ritr_pl, type, vid_fid);
6677 
6678 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
6679 }
6680 
6681 u8 mlxsw_sp_router_port(const struct mlxsw_sp *mlxsw_sp)
6682 {
6683 	return mlxsw_core_max_ports(mlxsw_sp->core) + 1;
6684 }
6685 
6686 static int mlxsw_sp_rif_vlan_configure(struct mlxsw_sp_rif *rif)
6687 {
6688 	struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
6689 	u16 vid = mlxsw_sp_fid_8021q_vid(rif->fid);
6690 	int err;
6691 
6692 	err = mlxsw_sp_rif_vlan_fid_op(rif, MLXSW_REG_RITR_VLAN_IF, vid, true);
6693 	if (err)
6694 		return err;
6695 
6696 	err = mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_MC,
6697 				     mlxsw_sp_router_port(mlxsw_sp), true);
6698 	if (err)
6699 		goto err_fid_mc_flood_set;
6700 
6701 	err = mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_BC,
6702 				     mlxsw_sp_router_port(mlxsw_sp), true);
6703 	if (err)
6704 		goto err_fid_bc_flood_set;
6705 
6706 	err = mlxsw_sp_rif_fdb_op(rif->mlxsw_sp, rif->dev->dev_addr,
6707 				  mlxsw_sp_fid_index(rif->fid), true);
6708 	if (err)
6709 		goto err_rif_fdb_op;
6710 
6711 	mlxsw_sp_fid_rif_set(rif->fid, rif);
6712 	return 0;
6713 
6714 err_rif_fdb_op:
6715 	mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_BC,
6716 			       mlxsw_sp_router_port(mlxsw_sp), false);
6717 err_fid_bc_flood_set:
6718 	mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_MC,
6719 			       mlxsw_sp_router_port(mlxsw_sp), false);
6720 err_fid_mc_flood_set:
6721 	mlxsw_sp_rif_vlan_fid_op(rif, MLXSW_REG_RITR_VLAN_IF, vid, false);
6722 	return err;
6723 }
6724 
6725 static void mlxsw_sp_rif_vlan_deconfigure(struct mlxsw_sp_rif *rif)
6726 {
6727 	u16 vid = mlxsw_sp_fid_8021q_vid(rif->fid);
6728 	struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
6729 	struct mlxsw_sp_fid *fid = rif->fid;
6730 
6731 	mlxsw_sp_fid_rif_set(fid, NULL);
6732 	mlxsw_sp_rif_fdb_op(rif->mlxsw_sp, rif->dev->dev_addr,
6733 			    mlxsw_sp_fid_index(fid), false);
6734 	mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_BC,
6735 			       mlxsw_sp_router_port(mlxsw_sp), false);
6736 	mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_MC,
6737 			       mlxsw_sp_router_port(mlxsw_sp), false);
6738 	mlxsw_sp_rif_vlan_fid_op(rif, MLXSW_REG_RITR_VLAN_IF, vid, false);
6739 }
6740 
6741 static struct mlxsw_sp_fid *
6742 mlxsw_sp_rif_vlan_fid_get(struct mlxsw_sp_rif *rif)
6743 {
6744 	u16 vid = is_vlan_dev(rif->dev) ? vlan_dev_vlan_id(rif->dev) : 1;
6745 
6746 	return mlxsw_sp_fid_8021q_get(rif->mlxsw_sp, vid);
6747 }
6748 
6749 static const struct mlxsw_sp_rif_ops mlxsw_sp_rif_vlan_ops = {
6750 	.type			= MLXSW_SP_RIF_TYPE_VLAN,
6751 	.rif_size		= sizeof(struct mlxsw_sp_rif),
6752 	.configure		= mlxsw_sp_rif_vlan_configure,
6753 	.deconfigure		= mlxsw_sp_rif_vlan_deconfigure,
6754 	.fid_get		= mlxsw_sp_rif_vlan_fid_get,
6755 };
6756 
6757 static int mlxsw_sp_rif_fid_configure(struct mlxsw_sp_rif *rif)
6758 {
6759 	struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
6760 	u16 fid_index = mlxsw_sp_fid_index(rif->fid);
6761 	int err;
6762 
6763 	err = mlxsw_sp_rif_vlan_fid_op(rif, MLXSW_REG_RITR_FID_IF, fid_index,
6764 				       true);
6765 	if (err)
6766 		return err;
6767 
6768 	err = mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_MC,
6769 				     mlxsw_sp_router_port(mlxsw_sp), true);
6770 	if (err)
6771 		goto err_fid_mc_flood_set;
6772 
6773 	err = mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_BC,
6774 				     mlxsw_sp_router_port(mlxsw_sp), true);
6775 	if (err)
6776 		goto err_fid_bc_flood_set;
6777 
6778 	err = mlxsw_sp_rif_fdb_op(rif->mlxsw_sp, rif->dev->dev_addr,
6779 				  mlxsw_sp_fid_index(rif->fid), true);
6780 	if (err)
6781 		goto err_rif_fdb_op;
6782 
6783 	mlxsw_sp_fid_rif_set(rif->fid, rif);
6784 	return 0;
6785 
6786 err_rif_fdb_op:
6787 	mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_BC,
6788 			       mlxsw_sp_router_port(mlxsw_sp), false);
6789 err_fid_bc_flood_set:
6790 	mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_MC,
6791 			       mlxsw_sp_router_port(mlxsw_sp), false);
6792 err_fid_mc_flood_set:
6793 	mlxsw_sp_rif_vlan_fid_op(rif, MLXSW_REG_RITR_FID_IF, fid_index, false);
6794 	return err;
6795 }
6796 
6797 static void mlxsw_sp_rif_fid_deconfigure(struct mlxsw_sp_rif *rif)
6798 {
6799 	u16 fid_index = mlxsw_sp_fid_index(rif->fid);
6800 	struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
6801 	struct mlxsw_sp_fid *fid = rif->fid;
6802 
6803 	mlxsw_sp_fid_rif_set(fid, NULL);
6804 	mlxsw_sp_rif_fdb_op(rif->mlxsw_sp, rif->dev->dev_addr,
6805 			    mlxsw_sp_fid_index(fid), false);
6806 	mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_BC,
6807 			       mlxsw_sp_router_port(mlxsw_sp), false);
6808 	mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_MC,
6809 			       mlxsw_sp_router_port(mlxsw_sp), false);
6810 	mlxsw_sp_rif_vlan_fid_op(rif, MLXSW_REG_RITR_FID_IF, fid_index, false);
6811 }
6812 
6813 static struct mlxsw_sp_fid *
6814 mlxsw_sp_rif_fid_fid_get(struct mlxsw_sp_rif *rif)
6815 {
6816 	return mlxsw_sp_fid_8021d_get(rif->mlxsw_sp, rif->dev->ifindex);
6817 }
6818 
6819 static const struct mlxsw_sp_rif_ops mlxsw_sp_rif_fid_ops = {
6820 	.type			= MLXSW_SP_RIF_TYPE_FID,
6821 	.rif_size		= sizeof(struct mlxsw_sp_rif),
6822 	.configure		= mlxsw_sp_rif_fid_configure,
6823 	.deconfigure		= mlxsw_sp_rif_fid_deconfigure,
6824 	.fid_get		= mlxsw_sp_rif_fid_fid_get,
6825 };
6826 
6827 static struct mlxsw_sp_rif_ipip_lb *
6828 mlxsw_sp_rif_ipip_lb_rif(struct mlxsw_sp_rif *rif)
6829 {
6830 	return container_of(rif, struct mlxsw_sp_rif_ipip_lb, common);
6831 }
6832 
6833 static void
6834 mlxsw_sp_rif_ipip_lb_setup(struct mlxsw_sp_rif *rif,
6835 			   const struct mlxsw_sp_rif_params *params)
6836 {
6837 	struct mlxsw_sp_rif_params_ipip_lb *params_lb;
6838 	struct mlxsw_sp_rif_ipip_lb *rif_lb;
6839 
6840 	params_lb = container_of(params, struct mlxsw_sp_rif_params_ipip_lb,
6841 				 common);
6842 	rif_lb = mlxsw_sp_rif_ipip_lb_rif(rif);
6843 	rif_lb->lb_config = params_lb->lb_config;
6844 }
6845 
6846 static int
6847 mlxsw_sp_rif_ipip_lb_op(struct mlxsw_sp_rif_ipip_lb *lb_rif,
6848 			struct mlxsw_sp_vr *ul_vr, bool enable)
6849 {
6850 	struct mlxsw_sp_rif_ipip_lb_config lb_cf = lb_rif->lb_config;
6851 	struct mlxsw_sp_rif *rif = &lb_rif->common;
6852 	struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
6853 	char ritr_pl[MLXSW_REG_RITR_LEN];
6854 	u32 saddr4;
6855 
6856 	switch (lb_cf.ul_protocol) {
6857 	case MLXSW_SP_L3_PROTO_IPV4:
6858 		saddr4 = be32_to_cpu(lb_cf.saddr.addr4);
6859 		mlxsw_reg_ritr_pack(ritr_pl, enable, MLXSW_REG_RITR_LOOPBACK_IF,
6860 				    rif->rif_index, rif->vr_id, rif->dev->mtu);
6861 		mlxsw_reg_ritr_loopback_ipip4_pack(ritr_pl, lb_cf.lb_ipipt,
6862 			    MLXSW_REG_RITR_LOOPBACK_IPIP_OPTIONS_GRE_KEY_PRESET,
6863 			    ul_vr->id, saddr4, lb_cf.okey);
6864 		break;
6865 
6866 	case MLXSW_SP_L3_PROTO_IPV6:
6867 		return -EAFNOSUPPORT;
6868 	}
6869 
6870 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
6871 }
6872 
6873 static int
6874 mlxsw_sp_rif_ipip_lb_configure(struct mlxsw_sp_rif *rif)
6875 {
6876 	struct mlxsw_sp_rif_ipip_lb *lb_rif = mlxsw_sp_rif_ipip_lb_rif(rif);
6877 	u32 ul_tb_id = mlxsw_sp_ipip_dev_ul_tb_id(rif->dev);
6878 	struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
6879 	struct mlxsw_sp_vr *ul_vr;
6880 	int err;
6881 
6882 	ul_vr = mlxsw_sp_vr_get(mlxsw_sp, ul_tb_id, NULL);
6883 	if (IS_ERR(ul_vr))
6884 		return PTR_ERR(ul_vr);
6885 
6886 	err = mlxsw_sp_rif_ipip_lb_op(lb_rif, ul_vr, true);
6887 	if (err)
6888 		goto err_loopback_op;
6889 
6890 	lb_rif->ul_vr_id = ul_vr->id;
6891 	++ul_vr->rif_count;
6892 	return 0;
6893 
6894 err_loopback_op:
6895 	mlxsw_sp_vr_put(mlxsw_sp, ul_vr);
6896 	return err;
6897 }
6898 
6899 static void mlxsw_sp_rif_ipip_lb_deconfigure(struct mlxsw_sp_rif *rif)
6900 {
6901 	struct mlxsw_sp_rif_ipip_lb *lb_rif = mlxsw_sp_rif_ipip_lb_rif(rif);
6902 	struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
6903 	struct mlxsw_sp_vr *ul_vr;
6904 
6905 	ul_vr = &mlxsw_sp->router->vrs[lb_rif->ul_vr_id];
6906 	mlxsw_sp_rif_ipip_lb_op(lb_rif, ul_vr, false);
6907 
6908 	--ul_vr->rif_count;
6909 	mlxsw_sp_vr_put(mlxsw_sp, ul_vr);
6910 }
6911 
6912 static const struct mlxsw_sp_rif_ops mlxsw_sp_rif_ipip_lb_ops = {
6913 	.type			= MLXSW_SP_RIF_TYPE_IPIP_LB,
6914 	.rif_size		= sizeof(struct mlxsw_sp_rif_ipip_lb),
6915 	.setup                  = mlxsw_sp_rif_ipip_lb_setup,
6916 	.configure		= mlxsw_sp_rif_ipip_lb_configure,
6917 	.deconfigure		= mlxsw_sp_rif_ipip_lb_deconfigure,
6918 };
6919 
6920 static const struct mlxsw_sp_rif_ops *mlxsw_sp_rif_ops_arr[] = {
6921 	[MLXSW_SP_RIF_TYPE_SUBPORT]	= &mlxsw_sp_rif_subport_ops,
6922 	[MLXSW_SP_RIF_TYPE_VLAN]	= &mlxsw_sp_rif_vlan_ops,
6923 	[MLXSW_SP_RIF_TYPE_FID]		= &mlxsw_sp_rif_fid_ops,
6924 	[MLXSW_SP_RIF_TYPE_IPIP_LB]	= &mlxsw_sp_rif_ipip_lb_ops,
6925 };
6926 
6927 static int mlxsw_sp_rifs_init(struct mlxsw_sp *mlxsw_sp)
6928 {
6929 	u64 max_rifs = MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_RIFS);
6930 
6931 	mlxsw_sp->router->rifs = kcalloc(max_rifs,
6932 					 sizeof(struct mlxsw_sp_rif *),
6933 					 GFP_KERNEL);
6934 	if (!mlxsw_sp->router->rifs)
6935 		return -ENOMEM;
6936 
6937 	mlxsw_sp->router->rif_ops_arr = mlxsw_sp_rif_ops_arr;
6938 
6939 	return 0;
6940 }
6941 
6942 static void mlxsw_sp_rifs_fini(struct mlxsw_sp *mlxsw_sp)
6943 {
6944 	int i;
6945 
6946 	for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_RIFS); i++)
6947 		WARN_ON_ONCE(mlxsw_sp->router->rifs[i]);
6948 
6949 	kfree(mlxsw_sp->router->rifs);
6950 }
6951 
6952 static int
6953 mlxsw_sp_ipip_config_tigcr(struct mlxsw_sp *mlxsw_sp)
6954 {
6955 	char tigcr_pl[MLXSW_REG_TIGCR_LEN];
6956 
6957 	mlxsw_reg_tigcr_pack(tigcr_pl, true, 0);
6958 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(tigcr), tigcr_pl);
6959 }
6960 
6961 static int mlxsw_sp_ipips_init(struct mlxsw_sp *mlxsw_sp)
6962 {
6963 	mlxsw_sp->router->ipip_ops_arr = mlxsw_sp_ipip_ops_arr;
6964 	INIT_LIST_HEAD(&mlxsw_sp->router->ipip_list);
6965 	return mlxsw_sp_ipip_config_tigcr(mlxsw_sp);
6966 }
6967 
6968 static void mlxsw_sp_ipips_fini(struct mlxsw_sp *mlxsw_sp)
6969 {
6970 	WARN_ON(!list_empty(&mlxsw_sp->router->ipip_list));
6971 }
6972 
6973 static void mlxsw_sp_router_fib_dump_flush(struct notifier_block *nb)
6974 {
6975 	struct mlxsw_sp_router *router;
6976 
6977 	/* Flush pending FIB notifications and then flush the device's
6978 	 * table before requesting another dump. The FIB notification
6979 	 * block is unregistered, so no need to take RTNL.
6980 	 */
6981 	mlxsw_core_flush_owq();
6982 	router = container_of(nb, struct mlxsw_sp_router, fib_nb);
6983 	mlxsw_sp_router_fib_flush(router->mlxsw_sp);
6984 }
6985 
6986 #ifdef CONFIG_IP_ROUTE_MULTIPATH
6987 static void mlxsw_sp_mp_hash_header_set(char *recr2_pl, int header)
6988 {
6989 	mlxsw_reg_recr2_outer_header_enables_set(recr2_pl, header, true);
6990 }
6991 
6992 static void mlxsw_sp_mp_hash_field_set(char *recr2_pl, int field)
6993 {
6994 	mlxsw_reg_recr2_outer_header_fields_enable_set(recr2_pl, field, true);
6995 }
6996 
6997 static void mlxsw_sp_mp4_hash_init(char *recr2_pl)
6998 {
6999 	bool only_l3 = !init_net.ipv4.sysctl_fib_multipath_hash_policy;
7000 
7001 	mlxsw_sp_mp_hash_header_set(recr2_pl,
7002 				    MLXSW_REG_RECR2_IPV4_EN_NOT_TCP_NOT_UDP);
7003 	mlxsw_sp_mp_hash_header_set(recr2_pl, MLXSW_REG_RECR2_IPV4_EN_TCP_UDP);
7004 	mlxsw_reg_recr2_ipv4_sip_enable(recr2_pl);
7005 	mlxsw_reg_recr2_ipv4_dip_enable(recr2_pl);
7006 	if (only_l3)
7007 		return;
7008 	mlxsw_sp_mp_hash_header_set(recr2_pl, MLXSW_REG_RECR2_TCP_UDP_EN_IPV4);
7009 	mlxsw_sp_mp_hash_field_set(recr2_pl, MLXSW_REG_RECR2_IPV4_PROTOCOL);
7010 	mlxsw_sp_mp_hash_field_set(recr2_pl, MLXSW_REG_RECR2_TCP_UDP_SPORT);
7011 	mlxsw_sp_mp_hash_field_set(recr2_pl, MLXSW_REG_RECR2_TCP_UDP_DPORT);
7012 }
7013 
7014 static void mlxsw_sp_mp6_hash_init(char *recr2_pl)
7015 {
7016 	mlxsw_sp_mp_hash_header_set(recr2_pl,
7017 				    MLXSW_REG_RECR2_IPV6_EN_NOT_TCP_NOT_UDP);
7018 	mlxsw_sp_mp_hash_header_set(recr2_pl, MLXSW_REG_RECR2_IPV6_EN_TCP_UDP);
7019 	mlxsw_reg_recr2_ipv6_sip_enable(recr2_pl);
7020 	mlxsw_reg_recr2_ipv6_dip_enable(recr2_pl);
7021 	mlxsw_sp_mp_hash_field_set(recr2_pl, MLXSW_REG_RECR2_IPV6_FLOW_LABEL);
7022 	mlxsw_sp_mp_hash_field_set(recr2_pl, MLXSW_REG_RECR2_IPV6_NEXT_HEADER);
7023 }
7024 
7025 static int mlxsw_sp_mp_hash_init(struct mlxsw_sp *mlxsw_sp)
7026 {
7027 	char recr2_pl[MLXSW_REG_RECR2_LEN];
7028 	u32 seed;
7029 
7030 	get_random_bytes(&seed, sizeof(seed));
7031 	mlxsw_reg_recr2_pack(recr2_pl, seed);
7032 	mlxsw_sp_mp4_hash_init(recr2_pl);
7033 	mlxsw_sp_mp6_hash_init(recr2_pl);
7034 
7035 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(recr2), recr2_pl);
7036 }
7037 #else
7038 static int mlxsw_sp_mp_hash_init(struct mlxsw_sp *mlxsw_sp)
7039 {
7040 	return 0;
7041 }
7042 #endif
7043 
7044 static int mlxsw_sp_dscp_init(struct mlxsw_sp *mlxsw_sp)
7045 {
7046 	char rdpm_pl[MLXSW_REG_RDPM_LEN];
7047 	unsigned int i;
7048 
7049 	MLXSW_REG_ZERO(rdpm, rdpm_pl);
7050 
7051 	/* HW is determining switch priority based on DSCP-bits, but the
7052 	 * kernel is still doing that based on the ToS. Since there's a
7053 	 * mismatch in bits we need to make sure to translate the right
7054 	 * value ToS would observe, skipping the 2 least-significant ECN bits.
7055 	 */
7056 	for (i = 0; i < MLXSW_REG_RDPM_DSCP_ENTRY_REC_MAX_COUNT; i++)
7057 		mlxsw_reg_rdpm_pack(rdpm_pl, i, rt_tos2priority(i << 2));
7058 
7059 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rdpm), rdpm_pl);
7060 }
7061 
7062 static int __mlxsw_sp_router_init(struct mlxsw_sp *mlxsw_sp)
7063 {
7064 	char rgcr_pl[MLXSW_REG_RGCR_LEN];
7065 	u64 max_rifs;
7066 	int err;
7067 
7068 	if (!MLXSW_CORE_RES_VALID(mlxsw_sp->core, MAX_RIFS))
7069 		return -EIO;
7070 	max_rifs = MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_RIFS);
7071 
7072 	mlxsw_reg_rgcr_pack(rgcr_pl, true, true);
7073 	mlxsw_reg_rgcr_max_router_interfaces_set(rgcr_pl, max_rifs);
7074 	mlxsw_reg_rgcr_usp_set(rgcr_pl, true);
7075 	err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rgcr), rgcr_pl);
7076 	if (err)
7077 		return err;
7078 	return 0;
7079 }
7080 
7081 static void __mlxsw_sp_router_fini(struct mlxsw_sp *mlxsw_sp)
7082 {
7083 	char rgcr_pl[MLXSW_REG_RGCR_LEN];
7084 
7085 	mlxsw_reg_rgcr_pack(rgcr_pl, false, false);
7086 	mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rgcr), rgcr_pl);
7087 }
7088 
7089 int mlxsw_sp_router_init(struct mlxsw_sp *mlxsw_sp)
7090 {
7091 	struct mlxsw_sp_router *router;
7092 	int err;
7093 
7094 	router = kzalloc(sizeof(*mlxsw_sp->router), GFP_KERNEL);
7095 	if (!router)
7096 		return -ENOMEM;
7097 	mlxsw_sp->router = router;
7098 	router->mlxsw_sp = mlxsw_sp;
7099 
7100 	INIT_LIST_HEAD(&mlxsw_sp->router->nexthop_neighs_list);
7101 	err = __mlxsw_sp_router_init(mlxsw_sp);
7102 	if (err)
7103 		goto err_router_init;
7104 
7105 	err = mlxsw_sp_rifs_init(mlxsw_sp);
7106 	if (err)
7107 		goto err_rifs_init;
7108 
7109 	err = mlxsw_sp_ipips_init(mlxsw_sp);
7110 	if (err)
7111 		goto err_ipips_init;
7112 
7113 	err = rhashtable_init(&mlxsw_sp->router->nexthop_ht,
7114 			      &mlxsw_sp_nexthop_ht_params);
7115 	if (err)
7116 		goto err_nexthop_ht_init;
7117 
7118 	err = rhashtable_init(&mlxsw_sp->router->nexthop_group_ht,
7119 			      &mlxsw_sp_nexthop_group_ht_params);
7120 	if (err)
7121 		goto err_nexthop_group_ht_init;
7122 
7123 	INIT_LIST_HEAD(&mlxsw_sp->router->nexthop_list);
7124 	err = mlxsw_sp_lpm_init(mlxsw_sp);
7125 	if (err)
7126 		goto err_lpm_init;
7127 
7128 	err = mlxsw_sp_mr_init(mlxsw_sp, &mlxsw_sp_mr_tcam_ops);
7129 	if (err)
7130 		goto err_mr_init;
7131 
7132 	err = mlxsw_sp_vrs_init(mlxsw_sp);
7133 	if (err)
7134 		goto err_vrs_init;
7135 
7136 	err = mlxsw_sp_neigh_init(mlxsw_sp);
7137 	if (err)
7138 		goto err_neigh_init;
7139 
7140 	mlxsw_sp->router->netevent_nb.notifier_call =
7141 		mlxsw_sp_router_netevent_event;
7142 	err = register_netevent_notifier(&mlxsw_sp->router->netevent_nb);
7143 	if (err)
7144 		goto err_register_netevent_notifier;
7145 
7146 	err = mlxsw_sp_mp_hash_init(mlxsw_sp);
7147 	if (err)
7148 		goto err_mp_hash_init;
7149 
7150 	err = mlxsw_sp_dscp_init(mlxsw_sp);
7151 	if (err)
7152 		goto err_dscp_init;
7153 
7154 	mlxsw_sp->router->fib_nb.notifier_call = mlxsw_sp_router_fib_event;
7155 	err = register_fib_notifier(&mlxsw_sp->router->fib_nb,
7156 				    mlxsw_sp_router_fib_dump_flush);
7157 	if (err)
7158 		goto err_register_fib_notifier;
7159 
7160 	return 0;
7161 
7162 err_register_fib_notifier:
7163 err_dscp_init:
7164 err_mp_hash_init:
7165 	unregister_netevent_notifier(&mlxsw_sp->router->netevent_nb);
7166 err_register_netevent_notifier:
7167 	mlxsw_sp_neigh_fini(mlxsw_sp);
7168 err_neigh_init:
7169 	mlxsw_sp_vrs_fini(mlxsw_sp);
7170 err_vrs_init:
7171 	mlxsw_sp_mr_fini(mlxsw_sp);
7172 err_mr_init:
7173 	mlxsw_sp_lpm_fini(mlxsw_sp);
7174 err_lpm_init:
7175 	rhashtable_destroy(&mlxsw_sp->router->nexthop_group_ht);
7176 err_nexthop_group_ht_init:
7177 	rhashtable_destroy(&mlxsw_sp->router->nexthop_ht);
7178 err_nexthop_ht_init:
7179 	mlxsw_sp_ipips_fini(mlxsw_sp);
7180 err_ipips_init:
7181 	mlxsw_sp_rifs_fini(mlxsw_sp);
7182 err_rifs_init:
7183 	__mlxsw_sp_router_fini(mlxsw_sp);
7184 err_router_init:
7185 	kfree(mlxsw_sp->router);
7186 	return err;
7187 }
7188 
7189 void mlxsw_sp_router_fini(struct mlxsw_sp *mlxsw_sp)
7190 {
7191 	unregister_fib_notifier(&mlxsw_sp->router->fib_nb);
7192 	unregister_netevent_notifier(&mlxsw_sp->router->netevent_nb);
7193 	mlxsw_sp_neigh_fini(mlxsw_sp);
7194 	mlxsw_sp_vrs_fini(mlxsw_sp);
7195 	mlxsw_sp_mr_fini(mlxsw_sp);
7196 	mlxsw_sp_lpm_fini(mlxsw_sp);
7197 	rhashtable_destroy(&mlxsw_sp->router->nexthop_group_ht);
7198 	rhashtable_destroy(&mlxsw_sp->router->nexthop_ht);
7199 	mlxsw_sp_ipips_fini(mlxsw_sp);
7200 	mlxsw_sp_rifs_fini(mlxsw_sp);
7201 	__mlxsw_sp_router_fini(mlxsw_sp);
7202 	kfree(mlxsw_sp->router);
7203 }
7204