xref: /openbmc/linux/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c (revision 9977a8c3497a8f7f7f951994f298a8e4d961234f)
1 /*
2  * drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
3  * Copyright (c) 2016-2017 Mellanox Technologies. All rights reserved.
4  * Copyright (c) 2016 Jiri Pirko <jiri@mellanox.com>
5  * Copyright (c) 2016 Ido Schimmel <idosch@mellanox.com>
6  * Copyright (c) 2016 Yotam Gigi <yotamg@mellanox.com>
7  * Copyright (c) 2017 Petr Machata <petrm@mellanox.com>
8  *
9  * Redistribution and use in source and binary forms, with or without
10  * modification, are permitted provided that the following conditions are met:
11  *
12  * 1. Redistributions of source code must retain the above copyright
13  *    notice, this list of conditions and the following disclaimer.
14  * 2. Redistributions in binary form must reproduce the above copyright
15  *    notice, this list of conditions and the following disclaimer in the
16  *    documentation and/or other materials provided with the distribution.
17  * 3. Neither the names of the copyright holders nor the names of its
18  *    contributors may be used to endorse or promote products derived from
19  *    this software without specific prior written permission.
20  *
21  * Alternatively, this software may be distributed under the terms of the
22  * GNU General Public License ("GPL") version 2 as published by the Free
23  * Software Foundation.
24  *
25  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
26  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
28  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
29  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
30  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
31  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
32  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
33  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
34  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
35  * POSSIBILITY OF SUCH DAMAGE.
36  */
37 
38 #include <linux/kernel.h>
39 #include <linux/types.h>
40 #include <linux/rhashtable.h>
41 #include <linux/bitops.h>
42 #include <linux/in6.h>
43 #include <linux/notifier.h>
44 #include <linux/inetdevice.h>
45 #include <linux/netdevice.h>
46 #include <linux/if_bridge.h>
47 #include <linux/socket.h>
48 #include <linux/route.h>
49 #include <linux/gcd.h>
50 #include <linux/random.h>
51 #include <net/netevent.h>
52 #include <net/neighbour.h>
53 #include <net/arp.h>
54 #include <net/ip_fib.h>
55 #include <net/ip6_fib.h>
56 #include <net/fib_rules.h>
57 #include <net/ip_tunnels.h>
58 #include <net/l3mdev.h>
59 #include <net/addrconf.h>
60 #include <net/ndisc.h>
61 #include <net/ipv6.h>
62 #include <net/fib_notifier.h>
63 
64 #include "spectrum.h"
65 #include "core.h"
66 #include "reg.h"
67 #include "spectrum_cnt.h"
68 #include "spectrum_dpipe.h"
69 #include "spectrum_ipip.h"
70 #include "spectrum_mr.h"
71 #include "spectrum_mr_tcam.h"
72 #include "spectrum_router.h"
73 
74 struct mlxsw_sp_fib;
75 struct mlxsw_sp_vr;
76 struct mlxsw_sp_lpm_tree;
77 struct mlxsw_sp_rif_ops;
78 
79 struct mlxsw_sp_router {
80 	struct mlxsw_sp *mlxsw_sp;
81 	struct mlxsw_sp_rif **rifs;
82 	struct mlxsw_sp_vr *vrs;
83 	struct rhashtable neigh_ht;
84 	struct rhashtable nexthop_group_ht;
85 	struct rhashtable nexthop_ht;
86 	struct list_head nexthop_list;
87 	struct {
88 		/* One tree for each protocol: IPv4 and IPv6 */
89 		struct mlxsw_sp_lpm_tree *proto_trees[2];
90 		struct mlxsw_sp_lpm_tree *trees;
91 		unsigned int tree_count;
92 	} lpm;
93 	struct {
94 		struct delayed_work dw;
95 		unsigned long interval;	/* ms */
96 	} neighs_update;
97 	struct delayed_work nexthop_probe_dw;
98 #define MLXSW_SP_UNRESOLVED_NH_PROBE_INTERVAL 5000 /* ms */
99 	struct list_head nexthop_neighs_list;
100 	struct list_head ipip_list;
101 	bool aborted;
102 	struct notifier_block fib_nb;
103 	struct notifier_block netevent_nb;
104 	const struct mlxsw_sp_rif_ops **rif_ops_arr;
105 	const struct mlxsw_sp_ipip_ops **ipip_ops_arr;
106 };
107 
108 struct mlxsw_sp_rif {
109 	struct list_head nexthop_list;
110 	struct list_head neigh_list;
111 	struct net_device *dev;
112 	struct mlxsw_sp_fid *fid;
113 	unsigned char addr[ETH_ALEN];
114 	int mtu;
115 	u16 rif_index;
116 	u16 vr_id;
117 	const struct mlxsw_sp_rif_ops *ops;
118 	struct mlxsw_sp *mlxsw_sp;
119 
120 	unsigned int counter_ingress;
121 	bool counter_ingress_valid;
122 	unsigned int counter_egress;
123 	bool counter_egress_valid;
124 };
125 
126 struct mlxsw_sp_rif_params {
127 	struct net_device *dev;
128 	union {
129 		u16 system_port;
130 		u16 lag_id;
131 	};
132 	u16 vid;
133 	bool lag;
134 };
135 
136 struct mlxsw_sp_rif_subport {
137 	struct mlxsw_sp_rif common;
138 	union {
139 		u16 system_port;
140 		u16 lag_id;
141 	};
142 	u16 vid;
143 	bool lag;
144 };
145 
146 struct mlxsw_sp_rif_ipip_lb {
147 	struct mlxsw_sp_rif common;
148 	struct mlxsw_sp_rif_ipip_lb_config lb_config;
149 	u16 ul_vr_id; /* Reserved for Spectrum-2. */
150 };
151 
152 struct mlxsw_sp_rif_params_ipip_lb {
153 	struct mlxsw_sp_rif_params common;
154 	struct mlxsw_sp_rif_ipip_lb_config lb_config;
155 };
156 
157 struct mlxsw_sp_rif_ops {
158 	enum mlxsw_sp_rif_type type;
159 	size_t rif_size;
160 
161 	void (*setup)(struct mlxsw_sp_rif *rif,
162 		      const struct mlxsw_sp_rif_params *params);
163 	int (*configure)(struct mlxsw_sp_rif *rif);
164 	void (*deconfigure)(struct mlxsw_sp_rif *rif);
165 	struct mlxsw_sp_fid * (*fid_get)(struct mlxsw_sp_rif *rif);
166 };
167 
168 static void mlxsw_sp_lpm_tree_hold(struct mlxsw_sp_lpm_tree *lpm_tree);
169 static void mlxsw_sp_lpm_tree_put(struct mlxsw_sp *mlxsw_sp,
170 				  struct mlxsw_sp_lpm_tree *lpm_tree);
171 static int mlxsw_sp_vr_lpm_tree_bind(struct mlxsw_sp *mlxsw_sp,
172 				     const struct mlxsw_sp_fib *fib,
173 				     u8 tree_id);
174 static int mlxsw_sp_vr_lpm_tree_unbind(struct mlxsw_sp *mlxsw_sp,
175 				       const struct mlxsw_sp_fib *fib);
176 
177 static unsigned int *
178 mlxsw_sp_rif_p_counter_get(struct mlxsw_sp_rif *rif,
179 			   enum mlxsw_sp_rif_counter_dir dir)
180 {
181 	switch (dir) {
182 	case MLXSW_SP_RIF_COUNTER_EGRESS:
183 		return &rif->counter_egress;
184 	case MLXSW_SP_RIF_COUNTER_INGRESS:
185 		return &rif->counter_ingress;
186 	}
187 	return NULL;
188 }
189 
190 static bool
191 mlxsw_sp_rif_counter_valid_get(struct mlxsw_sp_rif *rif,
192 			       enum mlxsw_sp_rif_counter_dir dir)
193 {
194 	switch (dir) {
195 	case MLXSW_SP_RIF_COUNTER_EGRESS:
196 		return rif->counter_egress_valid;
197 	case MLXSW_SP_RIF_COUNTER_INGRESS:
198 		return rif->counter_ingress_valid;
199 	}
200 	return false;
201 }
202 
203 static void
204 mlxsw_sp_rif_counter_valid_set(struct mlxsw_sp_rif *rif,
205 			       enum mlxsw_sp_rif_counter_dir dir,
206 			       bool valid)
207 {
208 	switch (dir) {
209 	case MLXSW_SP_RIF_COUNTER_EGRESS:
210 		rif->counter_egress_valid = valid;
211 		break;
212 	case MLXSW_SP_RIF_COUNTER_INGRESS:
213 		rif->counter_ingress_valid = valid;
214 		break;
215 	}
216 }
217 
218 static int mlxsw_sp_rif_counter_edit(struct mlxsw_sp *mlxsw_sp, u16 rif_index,
219 				     unsigned int counter_index, bool enable,
220 				     enum mlxsw_sp_rif_counter_dir dir)
221 {
222 	char ritr_pl[MLXSW_REG_RITR_LEN];
223 	bool is_egress = false;
224 	int err;
225 
226 	if (dir == MLXSW_SP_RIF_COUNTER_EGRESS)
227 		is_egress = true;
228 	mlxsw_reg_ritr_rif_pack(ritr_pl, rif_index);
229 	err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
230 	if (err)
231 		return err;
232 
233 	mlxsw_reg_ritr_counter_pack(ritr_pl, counter_index, enable,
234 				    is_egress);
235 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
236 }
237 
238 int mlxsw_sp_rif_counter_value_get(struct mlxsw_sp *mlxsw_sp,
239 				   struct mlxsw_sp_rif *rif,
240 				   enum mlxsw_sp_rif_counter_dir dir, u64 *cnt)
241 {
242 	char ricnt_pl[MLXSW_REG_RICNT_LEN];
243 	unsigned int *p_counter_index;
244 	bool valid;
245 	int err;
246 
247 	valid = mlxsw_sp_rif_counter_valid_get(rif, dir);
248 	if (!valid)
249 		return -EINVAL;
250 
251 	p_counter_index = mlxsw_sp_rif_p_counter_get(rif, dir);
252 	if (!p_counter_index)
253 		return -EINVAL;
254 	mlxsw_reg_ricnt_pack(ricnt_pl, *p_counter_index,
255 			     MLXSW_REG_RICNT_OPCODE_NOP);
256 	err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(ricnt), ricnt_pl);
257 	if (err)
258 		return err;
259 	*cnt = mlxsw_reg_ricnt_good_unicast_packets_get(ricnt_pl);
260 	return 0;
261 }
262 
263 static int mlxsw_sp_rif_counter_clear(struct mlxsw_sp *mlxsw_sp,
264 				      unsigned int counter_index)
265 {
266 	char ricnt_pl[MLXSW_REG_RICNT_LEN];
267 
268 	mlxsw_reg_ricnt_pack(ricnt_pl, counter_index,
269 			     MLXSW_REG_RICNT_OPCODE_CLEAR);
270 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ricnt), ricnt_pl);
271 }
272 
273 int mlxsw_sp_rif_counter_alloc(struct mlxsw_sp *mlxsw_sp,
274 			       struct mlxsw_sp_rif *rif,
275 			       enum mlxsw_sp_rif_counter_dir dir)
276 {
277 	unsigned int *p_counter_index;
278 	int err;
279 
280 	p_counter_index = mlxsw_sp_rif_p_counter_get(rif, dir);
281 	if (!p_counter_index)
282 		return -EINVAL;
283 	err = mlxsw_sp_counter_alloc(mlxsw_sp, MLXSW_SP_COUNTER_SUB_POOL_RIF,
284 				     p_counter_index);
285 	if (err)
286 		return err;
287 
288 	err = mlxsw_sp_rif_counter_clear(mlxsw_sp, *p_counter_index);
289 	if (err)
290 		goto err_counter_clear;
291 
292 	err = mlxsw_sp_rif_counter_edit(mlxsw_sp, rif->rif_index,
293 					*p_counter_index, true, dir);
294 	if (err)
295 		goto err_counter_edit;
296 	mlxsw_sp_rif_counter_valid_set(rif, dir, true);
297 	return 0;
298 
299 err_counter_edit:
300 err_counter_clear:
301 	mlxsw_sp_counter_free(mlxsw_sp, MLXSW_SP_COUNTER_SUB_POOL_RIF,
302 			      *p_counter_index);
303 	return err;
304 }
305 
306 void mlxsw_sp_rif_counter_free(struct mlxsw_sp *mlxsw_sp,
307 			       struct mlxsw_sp_rif *rif,
308 			       enum mlxsw_sp_rif_counter_dir dir)
309 {
310 	unsigned int *p_counter_index;
311 
312 	if (!mlxsw_sp_rif_counter_valid_get(rif, dir))
313 		return;
314 
315 	p_counter_index = mlxsw_sp_rif_p_counter_get(rif, dir);
316 	if (WARN_ON(!p_counter_index))
317 		return;
318 	mlxsw_sp_rif_counter_edit(mlxsw_sp, rif->rif_index,
319 				  *p_counter_index, false, dir);
320 	mlxsw_sp_counter_free(mlxsw_sp, MLXSW_SP_COUNTER_SUB_POOL_RIF,
321 			      *p_counter_index);
322 	mlxsw_sp_rif_counter_valid_set(rif, dir, false);
323 }
324 
325 static void mlxsw_sp_rif_counters_alloc(struct mlxsw_sp_rif *rif)
326 {
327 	struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
328 	struct devlink *devlink;
329 
330 	devlink = priv_to_devlink(mlxsw_sp->core);
331 	if (!devlink_dpipe_table_counter_enabled(devlink,
332 						 MLXSW_SP_DPIPE_TABLE_NAME_ERIF))
333 		return;
334 	mlxsw_sp_rif_counter_alloc(mlxsw_sp, rif, MLXSW_SP_RIF_COUNTER_EGRESS);
335 }
336 
337 static void mlxsw_sp_rif_counters_free(struct mlxsw_sp_rif *rif)
338 {
339 	struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
340 
341 	mlxsw_sp_rif_counter_free(mlxsw_sp, rif, MLXSW_SP_RIF_COUNTER_EGRESS);
342 }
343 
344 static struct mlxsw_sp_rif *
345 mlxsw_sp_rif_find_by_dev(const struct mlxsw_sp *mlxsw_sp,
346 			 const struct net_device *dev);
347 
348 #define MLXSW_SP_PREFIX_COUNT (sizeof(struct in6_addr) * BITS_PER_BYTE + 1)
349 
350 struct mlxsw_sp_prefix_usage {
351 	DECLARE_BITMAP(b, MLXSW_SP_PREFIX_COUNT);
352 };
353 
354 #define mlxsw_sp_prefix_usage_for_each(prefix, prefix_usage) \
355 	for_each_set_bit(prefix, (prefix_usage)->b, MLXSW_SP_PREFIX_COUNT)
356 
357 static bool
358 mlxsw_sp_prefix_usage_eq(struct mlxsw_sp_prefix_usage *prefix_usage1,
359 			 struct mlxsw_sp_prefix_usage *prefix_usage2)
360 {
361 	return !memcmp(prefix_usage1, prefix_usage2, sizeof(*prefix_usage1));
362 }
363 
364 static void
365 mlxsw_sp_prefix_usage_cpy(struct mlxsw_sp_prefix_usage *prefix_usage1,
366 			  struct mlxsw_sp_prefix_usage *prefix_usage2)
367 {
368 	memcpy(prefix_usage1, prefix_usage2, sizeof(*prefix_usage1));
369 }
370 
371 static void
372 mlxsw_sp_prefix_usage_set(struct mlxsw_sp_prefix_usage *prefix_usage,
373 			  unsigned char prefix_len)
374 {
375 	set_bit(prefix_len, prefix_usage->b);
376 }
377 
378 static void
379 mlxsw_sp_prefix_usage_clear(struct mlxsw_sp_prefix_usage *prefix_usage,
380 			    unsigned char prefix_len)
381 {
382 	clear_bit(prefix_len, prefix_usage->b);
383 }
384 
385 struct mlxsw_sp_fib_key {
386 	unsigned char addr[sizeof(struct in6_addr)];
387 	unsigned char prefix_len;
388 };
389 
390 enum mlxsw_sp_fib_entry_type {
391 	MLXSW_SP_FIB_ENTRY_TYPE_REMOTE,
392 	MLXSW_SP_FIB_ENTRY_TYPE_LOCAL,
393 	MLXSW_SP_FIB_ENTRY_TYPE_TRAP,
394 
395 	/* This is a special case of local delivery, where a packet should be
396 	 * decapsulated on reception. Note that there is no corresponding ENCAP,
397 	 * because that's a type of next hop, not of FIB entry. (There can be
398 	 * several next hops in a REMOTE entry, and some of them may be
399 	 * encapsulating entries.)
400 	 */
401 	MLXSW_SP_FIB_ENTRY_TYPE_IPIP_DECAP,
402 };
403 
404 struct mlxsw_sp_nexthop_group;
405 
406 struct mlxsw_sp_fib_node {
407 	struct list_head entry_list;
408 	struct list_head list;
409 	struct rhash_head ht_node;
410 	struct mlxsw_sp_fib *fib;
411 	struct mlxsw_sp_fib_key key;
412 };
413 
414 struct mlxsw_sp_fib_entry_decap {
415 	struct mlxsw_sp_ipip_entry *ipip_entry;
416 	u32 tunnel_index;
417 };
418 
419 struct mlxsw_sp_fib_entry {
420 	struct list_head list;
421 	struct mlxsw_sp_fib_node *fib_node;
422 	enum mlxsw_sp_fib_entry_type type;
423 	struct list_head nexthop_group_node;
424 	struct mlxsw_sp_nexthop_group *nh_group;
425 	struct mlxsw_sp_fib_entry_decap decap; /* Valid for decap entries. */
426 };
427 
428 struct mlxsw_sp_fib4_entry {
429 	struct mlxsw_sp_fib_entry common;
430 	u32 tb_id;
431 	u32 prio;
432 	u8 tos;
433 	u8 type;
434 };
435 
436 struct mlxsw_sp_fib6_entry {
437 	struct mlxsw_sp_fib_entry common;
438 	struct list_head rt6_list;
439 	unsigned int nrt6;
440 };
441 
442 struct mlxsw_sp_rt6 {
443 	struct list_head list;
444 	struct rt6_info *rt;
445 };
446 
447 struct mlxsw_sp_lpm_tree {
448 	u8 id; /* tree ID */
449 	unsigned int ref_count;
450 	enum mlxsw_sp_l3proto proto;
451 	unsigned long prefix_ref_count[MLXSW_SP_PREFIX_COUNT];
452 	struct mlxsw_sp_prefix_usage prefix_usage;
453 };
454 
455 struct mlxsw_sp_fib {
456 	struct rhashtable ht;
457 	struct list_head node_list;
458 	struct mlxsw_sp_vr *vr;
459 	struct mlxsw_sp_lpm_tree *lpm_tree;
460 	enum mlxsw_sp_l3proto proto;
461 };
462 
463 struct mlxsw_sp_vr {
464 	u16 id; /* virtual router ID */
465 	u32 tb_id; /* kernel fib table id */
466 	unsigned int rif_count;
467 	struct mlxsw_sp_fib *fib4;
468 	struct mlxsw_sp_fib *fib6;
469 	struct mlxsw_sp_mr_table *mr4_table;
470 };
471 
472 static const struct rhashtable_params mlxsw_sp_fib_ht_params;
473 
474 static struct mlxsw_sp_fib *mlxsw_sp_fib_create(struct mlxsw_sp *mlxsw_sp,
475 						struct mlxsw_sp_vr *vr,
476 						enum mlxsw_sp_l3proto proto)
477 {
478 	struct mlxsw_sp_lpm_tree *lpm_tree;
479 	struct mlxsw_sp_fib *fib;
480 	int err;
481 
482 	lpm_tree = mlxsw_sp->router->lpm.proto_trees[proto];
483 	fib = kzalloc(sizeof(*fib), GFP_KERNEL);
484 	if (!fib)
485 		return ERR_PTR(-ENOMEM);
486 	err = rhashtable_init(&fib->ht, &mlxsw_sp_fib_ht_params);
487 	if (err)
488 		goto err_rhashtable_init;
489 	INIT_LIST_HEAD(&fib->node_list);
490 	fib->proto = proto;
491 	fib->vr = vr;
492 	fib->lpm_tree = lpm_tree;
493 	mlxsw_sp_lpm_tree_hold(lpm_tree);
494 	err = mlxsw_sp_vr_lpm_tree_bind(mlxsw_sp, fib, lpm_tree->id);
495 	if (err)
496 		goto err_lpm_tree_bind;
497 	return fib;
498 
499 err_lpm_tree_bind:
500 	mlxsw_sp_lpm_tree_put(mlxsw_sp, lpm_tree);
501 err_rhashtable_init:
502 	kfree(fib);
503 	return ERR_PTR(err);
504 }
505 
506 static void mlxsw_sp_fib_destroy(struct mlxsw_sp *mlxsw_sp,
507 				 struct mlxsw_sp_fib *fib)
508 {
509 	mlxsw_sp_vr_lpm_tree_unbind(mlxsw_sp, fib);
510 	mlxsw_sp_lpm_tree_put(mlxsw_sp, fib->lpm_tree);
511 	WARN_ON(!list_empty(&fib->node_list));
512 	rhashtable_destroy(&fib->ht);
513 	kfree(fib);
514 }
515 
516 static struct mlxsw_sp_lpm_tree *
517 mlxsw_sp_lpm_tree_find_unused(struct mlxsw_sp *mlxsw_sp)
518 {
519 	static struct mlxsw_sp_lpm_tree *lpm_tree;
520 	int i;
521 
522 	for (i = 0; i < mlxsw_sp->router->lpm.tree_count; i++) {
523 		lpm_tree = &mlxsw_sp->router->lpm.trees[i];
524 		if (lpm_tree->ref_count == 0)
525 			return lpm_tree;
526 	}
527 	return NULL;
528 }
529 
530 static int mlxsw_sp_lpm_tree_alloc(struct mlxsw_sp *mlxsw_sp,
531 				   struct mlxsw_sp_lpm_tree *lpm_tree)
532 {
533 	char ralta_pl[MLXSW_REG_RALTA_LEN];
534 
535 	mlxsw_reg_ralta_pack(ralta_pl, true,
536 			     (enum mlxsw_reg_ralxx_protocol) lpm_tree->proto,
537 			     lpm_tree->id);
538 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralta), ralta_pl);
539 }
540 
541 static void mlxsw_sp_lpm_tree_free(struct mlxsw_sp *mlxsw_sp,
542 				   struct mlxsw_sp_lpm_tree *lpm_tree)
543 {
544 	char ralta_pl[MLXSW_REG_RALTA_LEN];
545 
546 	mlxsw_reg_ralta_pack(ralta_pl, false,
547 			     (enum mlxsw_reg_ralxx_protocol) lpm_tree->proto,
548 			     lpm_tree->id);
549 	mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralta), ralta_pl);
550 }
551 
552 static int
553 mlxsw_sp_lpm_tree_left_struct_set(struct mlxsw_sp *mlxsw_sp,
554 				  struct mlxsw_sp_prefix_usage *prefix_usage,
555 				  struct mlxsw_sp_lpm_tree *lpm_tree)
556 {
557 	char ralst_pl[MLXSW_REG_RALST_LEN];
558 	u8 root_bin = 0;
559 	u8 prefix;
560 	u8 last_prefix = MLXSW_REG_RALST_BIN_NO_CHILD;
561 
562 	mlxsw_sp_prefix_usage_for_each(prefix, prefix_usage)
563 		root_bin = prefix;
564 
565 	mlxsw_reg_ralst_pack(ralst_pl, root_bin, lpm_tree->id);
566 	mlxsw_sp_prefix_usage_for_each(prefix, prefix_usage) {
567 		if (prefix == 0)
568 			continue;
569 		mlxsw_reg_ralst_bin_pack(ralst_pl, prefix, last_prefix,
570 					 MLXSW_REG_RALST_BIN_NO_CHILD);
571 		last_prefix = prefix;
572 	}
573 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralst), ralst_pl);
574 }
575 
576 static struct mlxsw_sp_lpm_tree *
577 mlxsw_sp_lpm_tree_create(struct mlxsw_sp *mlxsw_sp,
578 			 struct mlxsw_sp_prefix_usage *prefix_usage,
579 			 enum mlxsw_sp_l3proto proto)
580 {
581 	struct mlxsw_sp_lpm_tree *lpm_tree;
582 	int err;
583 
584 	lpm_tree = mlxsw_sp_lpm_tree_find_unused(mlxsw_sp);
585 	if (!lpm_tree)
586 		return ERR_PTR(-EBUSY);
587 	lpm_tree->proto = proto;
588 	err = mlxsw_sp_lpm_tree_alloc(mlxsw_sp, lpm_tree);
589 	if (err)
590 		return ERR_PTR(err);
591 
592 	err = mlxsw_sp_lpm_tree_left_struct_set(mlxsw_sp, prefix_usage,
593 						lpm_tree);
594 	if (err)
595 		goto err_left_struct_set;
596 	memcpy(&lpm_tree->prefix_usage, prefix_usage,
597 	       sizeof(lpm_tree->prefix_usage));
598 	memset(&lpm_tree->prefix_ref_count, 0,
599 	       sizeof(lpm_tree->prefix_ref_count));
600 	lpm_tree->ref_count = 1;
601 	return lpm_tree;
602 
603 err_left_struct_set:
604 	mlxsw_sp_lpm_tree_free(mlxsw_sp, lpm_tree);
605 	return ERR_PTR(err);
606 }
607 
608 static void mlxsw_sp_lpm_tree_destroy(struct mlxsw_sp *mlxsw_sp,
609 				      struct mlxsw_sp_lpm_tree *lpm_tree)
610 {
611 	mlxsw_sp_lpm_tree_free(mlxsw_sp, lpm_tree);
612 }
613 
614 static struct mlxsw_sp_lpm_tree *
615 mlxsw_sp_lpm_tree_get(struct mlxsw_sp *mlxsw_sp,
616 		      struct mlxsw_sp_prefix_usage *prefix_usage,
617 		      enum mlxsw_sp_l3proto proto)
618 {
619 	struct mlxsw_sp_lpm_tree *lpm_tree;
620 	int i;
621 
622 	for (i = 0; i < mlxsw_sp->router->lpm.tree_count; i++) {
623 		lpm_tree = &mlxsw_sp->router->lpm.trees[i];
624 		if (lpm_tree->ref_count != 0 &&
625 		    lpm_tree->proto == proto &&
626 		    mlxsw_sp_prefix_usage_eq(&lpm_tree->prefix_usage,
627 					     prefix_usage)) {
628 			mlxsw_sp_lpm_tree_hold(lpm_tree);
629 			return lpm_tree;
630 		}
631 	}
632 	return mlxsw_sp_lpm_tree_create(mlxsw_sp, prefix_usage, proto);
633 }
634 
635 static void mlxsw_sp_lpm_tree_hold(struct mlxsw_sp_lpm_tree *lpm_tree)
636 {
637 	lpm_tree->ref_count++;
638 }
639 
640 static void mlxsw_sp_lpm_tree_put(struct mlxsw_sp *mlxsw_sp,
641 				  struct mlxsw_sp_lpm_tree *lpm_tree)
642 {
643 	if (--lpm_tree->ref_count == 0)
644 		mlxsw_sp_lpm_tree_destroy(mlxsw_sp, lpm_tree);
645 }
646 
647 #define MLXSW_SP_LPM_TREE_MIN 1 /* tree 0 is reserved */
648 
649 static int mlxsw_sp_lpm_init(struct mlxsw_sp *mlxsw_sp)
650 {
651 	struct mlxsw_sp_prefix_usage req_prefix_usage = {{ 0 } };
652 	struct mlxsw_sp_lpm_tree *lpm_tree;
653 	u64 max_trees;
654 	int err, i;
655 
656 	if (!MLXSW_CORE_RES_VALID(mlxsw_sp->core, MAX_LPM_TREES))
657 		return -EIO;
658 
659 	max_trees = MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_LPM_TREES);
660 	mlxsw_sp->router->lpm.tree_count = max_trees - MLXSW_SP_LPM_TREE_MIN;
661 	mlxsw_sp->router->lpm.trees = kcalloc(mlxsw_sp->router->lpm.tree_count,
662 					     sizeof(struct mlxsw_sp_lpm_tree),
663 					     GFP_KERNEL);
664 	if (!mlxsw_sp->router->lpm.trees)
665 		return -ENOMEM;
666 
667 	for (i = 0; i < mlxsw_sp->router->lpm.tree_count; i++) {
668 		lpm_tree = &mlxsw_sp->router->lpm.trees[i];
669 		lpm_tree->id = i + MLXSW_SP_LPM_TREE_MIN;
670 	}
671 
672 	lpm_tree = mlxsw_sp_lpm_tree_get(mlxsw_sp, &req_prefix_usage,
673 					 MLXSW_SP_L3_PROTO_IPV4);
674 	if (IS_ERR(lpm_tree)) {
675 		err = PTR_ERR(lpm_tree);
676 		goto err_ipv4_tree_get;
677 	}
678 	mlxsw_sp->router->lpm.proto_trees[MLXSW_SP_L3_PROTO_IPV4] = lpm_tree;
679 
680 	lpm_tree = mlxsw_sp_lpm_tree_get(mlxsw_sp, &req_prefix_usage,
681 					 MLXSW_SP_L3_PROTO_IPV6);
682 	if (IS_ERR(lpm_tree)) {
683 		err = PTR_ERR(lpm_tree);
684 		goto err_ipv6_tree_get;
685 	}
686 	mlxsw_sp->router->lpm.proto_trees[MLXSW_SP_L3_PROTO_IPV6] = lpm_tree;
687 
688 	return 0;
689 
690 err_ipv6_tree_get:
691 	lpm_tree = mlxsw_sp->router->lpm.proto_trees[MLXSW_SP_L3_PROTO_IPV4];
692 	mlxsw_sp_lpm_tree_put(mlxsw_sp, lpm_tree);
693 err_ipv4_tree_get:
694 	kfree(mlxsw_sp->router->lpm.trees);
695 	return err;
696 }
697 
698 static void mlxsw_sp_lpm_fini(struct mlxsw_sp *mlxsw_sp)
699 {
700 	struct mlxsw_sp_lpm_tree *lpm_tree;
701 
702 	lpm_tree = mlxsw_sp->router->lpm.proto_trees[MLXSW_SP_L3_PROTO_IPV6];
703 	mlxsw_sp_lpm_tree_put(mlxsw_sp, lpm_tree);
704 
705 	lpm_tree = mlxsw_sp->router->lpm.proto_trees[MLXSW_SP_L3_PROTO_IPV4];
706 	mlxsw_sp_lpm_tree_put(mlxsw_sp, lpm_tree);
707 
708 	kfree(mlxsw_sp->router->lpm.trees);
709 }
710 
711 static bool mlxsw_sp_vr_is_used(const struct mlxsw_sp_vr *vr)
712 {
713 	return !!vr->fib4 || !!vr->fib6 || !!vr->mr4_table;
714 }
715 
716 static struct mlxsw_sp_vr *mlxsw_sp_vr_find_unused(struct mlxsw_sp *mlxsw_sp)
717 {
718 	struct mlxsw_sp_vr *vr;
719 	int i;
720 
721 	for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_VRS); i++) {
722 		vr = &mlxsw_sp->router->vrs[i];
723 		if (!mlxsw_sp_vr_is_used(vr))
724 			return vr;
725 	}
726 	return NULL;
727 }
728 
729 static int mlxsw_sp_vr_lpm_tree_bind(struct mlxsw_sp *mlxsw_sp,
730 				     const struct mlxsw_sp_fib *fib, u8 tree_id)
731 {
732 	char raltb_pl[MLXSW_REG_RALTB_LEN];
733 
734 	mlxsw_reg_raltb_pack(raltb_pl, fib->vr->id,
735 			     (enum mlxsw_reg_ralxx_protocol) fib->proto,
736 			     tree_id);
737 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(raltb), raltb_pl);
738 }
739 
740 static int mlxsw_sp_vr_lpm_tree_unbind(struct mlxsw_sp *mlxsw_sp,
741 				       const struct mlxsw_sp_fib *fib)
742 {
743 	char raltb_pl[MLXSW_REG_RALTB_LEN];
744 
745 	/* Bind to tree 0 which is default */
746 	mlxsw_reg_raltb_pack(raltb_pl, fib->vr->id,
747 			     (enum mlxsw_reg_ralxx_protocol) fib->proto, 0);
748 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(raltb), raltb_pl);
749 }
750 
751 static u32 mlxsw_sp_fix_tb_id(u32 tb_id)
752 {
753 	/* For our purpose, squash main, default and local tables into one */
754 	if (tb_id == RT_TABLE_LOCAL || tb_id == RT_TABLE_DEFAULT)
755 		tb_id = RT_TABLE_MAIN;
756 	return tb_id;
757 }
758 
759 static struct mlxsw_sp_vr *mlxsw_sp_vr_find(struct mlxsw_sp *mlxsw_sp,
760 					    u32 tb_id)
761 {
762 	struct mlxsw_sp_vr *vr;
763 	int i;
764 
765 	tb_id = mlxsw_sp_fix_tb_id(tb_id);
766 
767 	for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_VRS); i++) {
768 		vr = &mlxsw_sp->router->vrs[i];
769 		if (mlxsw_sp_vr_is_used(vr) && vr->tb_id == tb_id)
770 			return vr;
771 	}
772 	return NULL;
773 }
774 
775 static struct mlxsw_sp_fib *mlxsw_sp_vr_fib(const struct mlxsw_sp_vr *vr,
776 					    enum mlxsw_sp_l3proto proto)
777 {
778 	switch (proto) {
779 	case MLXSW_SP_L3_PROTO_IPV4:
780 		return vr->fib4;
781 	case MLXSW_SP_L3_PROTO_IPV6:
782 		return vr->fib6;
783 	}
784 	return NULL;
785 }
786 
787 static struct mlxsw_sp_vr *mlxsw_sp_vr_create(struct mlxsw_sp *mlxsw_sp,
788 					      u32 tb_id,
789 					      struct netlink_ext_ack *extack)
790 {
791 	struct mlxsw_sp_vr *vr;
792 	int err;
793 
794 	vr = mlxsw_sp_vr_find_unused(mlxsw_sp);
795 	if (!vr) {
796 		NL_SET_ERR_MSG(extack, "spectrum: Exceeded number of supported virtual routers");
797 		return ERR_PTR(-EBUSY);
798 	}
799 	vr->fib4 = mlxsw_sp_fib_create(mlxsw_sp, vr, MLXSW_SP_L3_PROTO_IPV4);
800 	if (IS_ERR(vr->fib4))
801 		return ERR_CAST(vr->fib4);
802 	vr->fib6 = mlxsw_sp_fib_create(mlxsw_sp, vr, MLXSW_SP_L3_PROTO_IPV6);
803 	if (IS_ERR(vr->fib6)) {
804 		err = PTR_ERR(vr->fib6);
805 		goto err_fib6_create;
806 	}
807 	vr->mr4_table = mlxsw_sp_mr_table_create(mlxsw_sp, vr->id,
808 						 MLXSW_SP_L3_PROTO_IPV4);
809 	if (IS_ERR(vr->mr4_table)) {
810 		err = PTR_ERR(vr->mr4_table);
811 		goto err_mr_table_create;
812 	}
813 	vr->tb_id = tb_id;
814 	return vr;
815 
816 err_mr_table_create:
817 	mlxsw_sp_fib_destroy(mlxsw_sp, vr->fib6);
818 	vr->fib6 = NULL;
819 err_fib6_create:
820 	mlxsw_sp_fib_destroy(mlxsw_sp, vr->fib4);
821 	vr->fib4 = NULL;
822 	return ERR_PTR(err);
823 }
824 
825 static void mlxsw_sp_vr_destroy(struct mlxsw_sp *mlxsw_sp,
826 				struct mlxsw_sp_vr *vr)
827 {
828 	mlxsw_sp_mr_table_destroy(vr->mr4_table);
829 	vr->mr4_table = NULL;
830 	mlxsw_sp_fib_destroy(mlxsw_sp, vr->fib6);
831 	vr->fib6 = NULL;
832 	mlxsw_sp_fib_destroy(mlxsw_sp, vr->fib4);
833 	vr->fib4 = NULL;
834 }
835 
836 static struct mlxsw_sp_vr *mlxsw_sp_vr_get(struct mlxsw_sp *mlxsw_sp, u32 tb_id,
837 					   struct netlink_ext_ack *extack)
838 {
839 	struct mlxsw_sp_vr *vr;
840 
841 	tb_id = mlxsw_sp_fix_tb_id(tb_id);
842 	vr = mlxsw_sp_vr_find(mlxsw_sp, tb_id);
843 	if (!vr)
844 		vr = mlxsw_sp_vr_create(mlxsw_sp, tb_id, extack);
845 	return vr;
846 }
847 
848 static void mlxsw_sp_vr_put(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_vr *vr)
849 {
850 	if (!vr->rif_count && list_empty(&vr->fib4->node_list) &&
851 	    list_empty(&vr->fib6->node_list) &&
852 	    mlxsw_sp_mr_table_empty(vr->mr4_table))
853 		mlxsw_sp_vr_destroy(mlxsw_sp, vr);
854 }
855 
856 static bool
857 mlxsw_sp_vr_lpm_tree_should_replace(struct mlxsw_sp_vr *vr,
858 				    enum mlxsw_sp_l3proto proto, u8 tree_id)
859 {
860 	struct mlxsw_sp_fib *fib = mlxsw_sp_vr_fib(vr, proto);
861 
862 	if (!mlxsw_sp_vr_is_used(vr))
863 		return false;
864 	if (fib->lpm_tree->id == tree_id)
865 		return true;
866 	return false;
867 }
868 
869 static int mlxsw_sp_vr_lpm_tree_replace(struct mlxsw_sp *mlxsw_sp,
870 					struct mlxsw_sp_fib *fib,
871 					struct mlxsw_sp_lpm_tree *new_tree)
872 {
873 	struct mlxsw_sp_lpm_tree *old_tree = fib->lpm_tree;
874 	int err;
875 
876 	fib->lpm_tree = new_tree;
877 	mlxsw_sp_lpm_tree_hold(new_tree);
878 	err = mlxsw_sp_vr_lpm_tree_bind(mlxsw_sp, fib, new_tree->id);
879 	if (err)
880 		goto err_tree_bind;
881 	mlxsw_sp_lpm_tree_put(mlxsw_sp, old_tree);
882 	return 0;
883 
884 err_tree_bind:
885 	mlxsw_sp_lpm_tree_put(mlxsw_sp, new_tree);
886 	fib->lpm_tree = old_tree;
887 	return err;
888 }
889 
890 static int mlxsw_sp_vrs_lpm_tree_replace(struct mlxsw_sp *mlxsw_sp,
891 					 struct mlxsw_sp_fib *fib,
892 					 struct mlxsw_sp_lpm_tree *new_tree)
893 {
894 	enum mlxsw_sp_l3proto proto = fib->proto;
895 	struct mlxsw_sp_lpm_tree *old_tree;
896 	u8 old_id, new_id = new_tree->id;
897 	struct mlxsw_sp_vr *vr;
898 	int i, err;
899 
900 	old_tree = mlxsw_sp->router->lpm.proto_trees[proto];
901 	old_id = old_tree->id;
902 
903 	for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_VRS); i++) {
904 		vr = &mlxsw_sp->router->vrs[i];
905 		if (!mlxsw_sp_vr_lpm_tree_should_replace(vr, proto, old_id))
906 			continue;
907 		err = mlxsw_sp_vr_lpm_tree_replace(mlxsw_sp,
908 						   mlxsw_sp_vr_fib(vr, proto),
909 						   new_tree);
910 		if (err)
911 			goto err_tree_replace;
912 	}
913 
914 	memcpy(new_tree->prefix_ref_count, old_tree->prefix_ref_count,
915 	       sizeof(new_tree->prefix_ref_count));
916 	mlxsw_sp->router->lpm.proto_trees[proto] = new_tree;
917 	mlxsw_sp_lpm_tree_put(mlxsw_sp, old_tree);
918 
919 	return 0;
920 
921 err_tree_replace:
922 	for (i--; i >= 0; i--) {
923 		if (!mlxsw_sp_vr_lpm_tree_should_replace(vr, proto, new_id))
924 			continue;
925 		mlxsw_sp_vr_lpm_tree_replace(mlxsw_sp,
926 					     mlxsw_sp_vr_fib(vr, proto),
927 					     old_tree);
928 	}
929 	return err;
930 }
931 
932 static int mlxsw_sp_vrs_init(struct mlxsw_sp *mlxsw_sp)
933 {
934 	struct mlxsw_sp_vr *vr;
935 	u64 max_vrs;
936 	int i;
937 
938 	if (!MLXSW_CORE_RES_VALID(mlxsw_sp->core, MAX_VRS))
939 		return -EIO;
940 
941 	max_vrs = MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_VRS);
942 	mlxsw_sp->router->vrs = kcalloc(max_vrs, sizeof(struct mlxsw_sp_vr),
943 					GFP_KERNEL);
944 	if (!mlxsw_sp->router->vrs)
945 		return -ENOMEM;
946 
947 	for (i = 0; i < max_vrs; i++) {
948 		vr = &mlxsw_sp->router->vrs[i];
949 		vr->id = i;
950 	}
951 
952 	return 0;
953 }
954 
955 static void mlxsw_sp_router_fib_flush(struct mlxsw_sp *mlxsw_sp);
956 
957 static void mlxsw_sp_vrs_fini(struct mlxsw_sp *mlxsw_sp)
958 {
959 	/* At this stage we're guaranteed not to have new incoming
960 	 * FIB notifications and the work queue is free from FIBs
961 	 * sitting on top of mlxsw netdevs. However, we can still
962 	 * have other FIBs queued. Flush the queue before flushing
963 	 * the device's tables. No need for locks, as we're the only
964 	 * writer.
965 	 */
966 	mlxsw_core_flush_owq();
967 	mlxsw_sp_router_fib_flush(mlxsw_sp);
968 	kfree(mlxsw_sp->router->vrs);
969 }
970 
971 static struct net_device *
972 __mlxsw_sp_ipip_netdev_ul_dev_get(const struct net_device *ol_dev)
973 {
974 	struct ip_tunnel *tun = netdev_priv(ol_dev);
975 	struct net *net = dev_net(ol_dev);
976 
977 	return __dev_get_by_index(net, tun->parms.link);
978 }
979 
980 u32 mlxsw_sp_ipip_dev_ul_tb_id(const struct net_device *ol_dev)
981 {
982 	struct net_device *d = __mlxsw_sp_ipip_netdev_ul_dev_get(ol_dev);
983 
984 	if (d)
985 		return l3mdev_fib_table(d) ? : RT_TABLE_MAIN;
986 	else
987 		return l3mdev_fib_table(ol_dev) ? : RT_TABLE_MAIN;
988 }
989 
990 static struct mlxsw_sp_rif *
991 mlxsw_sp_rif_create(struct mlxsw_sp *mlxsw_sp,
992 		    const struct mlxsw_sp_rif_params *params,
993 		    struct netlink_ext_ack *extack);
994 
995 static struct mlxsw_sp_rif_ipip_lb *
996 mlxsw_sp_ipip_ol_ipip_lb_create(struct mlxsw_sp *mlxsw_sp,
997 				enum mlxsw_sp_ipip_type ipipt,
998 				struct net_device *ol_dev,
999 				struct netlink_ext_ack *extack)
1000 {
1001 	struct mlxsw_sp_rif_params_ipip_lb lb_params;
1002 	const struct mlxsw_sp_ipip_ops *ipip_ops;
1003 	struct mlxsw_sp_rif *rif;
1004 
1005 	ipip_ops = mlxsw_sp->router->ipip_ops_arr[ipipt];
1006 	lb_params = (struct mlxsw_sp_rif_params_ipip_lb) {
1007 		.common.dev = ol_dev,
1008 		.common.lag = false,
1009 		.lb_config = ipip_ops->ol_loopback_config(mlxsw_sp, ol_dev),
1010 	};
1011 
1012 	rif = mlxsw_sp_rif_create(mlxsw_sp, &lb_params.common, extack);
1013 	if (IS_ERR(rif))
1014 		return ERR_CAST(rif);
1015 	return container_of(rif, struct mlxsw_sp_rif_ipip_lb, common);
1016 }
1017 
1018 static struct mlxsw_sp_ipip_entry *
1019 mlxsw_sp_ipip_entry_alloc(struct mlxsw_sp *mlxsw_sp,
1020 			  enum mlxsw_sp_ipip_type ipipt,
1021 			  struct net_device *ol_dev)
1022 {
1023 	struct mlxsw_sp_ipip_entry *ipip_entry;
1024 	struct mlxsw_sp_ipip_entry *ret = NULL;
1025 
1026 	ipip_entry = kzalloc(sizeof(*ipip_entry), GFP_KERNEL);
1027 	if (!ipip_entry)
1028 		return ERR_PTR(-ENOMEM);
1029 
1030 	ipip_entry->ol_lb = mlxsw_sp_ipip_ol_ipip_lb_create(mlxsw_sp, ipipt,
1031 							    ol_dev, NULL);
1032 	if (IS_ERR(ipip_entry->ol_lb)) {
1033 		ret = ERR_CAST(ipip_entry->ol_lb);
1034 		goto err_ol_ipip_lb_create;
1035 	}
1036 
1037 	ipip_entry->ipipt = ipipt;
1038 	ipip_entry->ol_dev = ol_dev;
1039 	ipip_entry->parms = mlxsw_sp_ipip_netdev_parms(ol_dev);
1040 
1041 	return ipip_entry;
1042 
1043 err_ol_ipip_lb_create:
1044 	kfree(ipip_entry);
1045 	return ret;
1046 }
1047 
1048 static void
1049 mlxsw_sp_ipip_entry_dealloc(struct mlxsw_sp_ipip_entry *ipip_entry)
1050 {
1051 	mlxsw_sp_rif_destroy(&ipip_entry->ol_lb->common);
1052 	kfree(ipip_entry);
1053 }
1054 
1055 static bool
1056 mlxsw_sp_ipip_entry_saddr_matches(struct mlxsw_sp *mlxsw_sp,
1057 				  const enum mlxsw_sp_l3proto ul_proto,
1058 				  union mlxsw_sp_l3addr saddr,
1059 				  u32 ul_tb_id,
1060 				  struct mlxsw_sp_ipip_entry *ipip_entry)
1061 {
1062 	u32 tun_ul_tb_id = mlxsw_sp_ipip_dev_ul_tb_id(ipip_entry->ol_dev);
1063 	enum mlxsw_sp_ipip_type ipipt = ipip_entry->ipipt;
1064 	union mlxsw_sp_l3addr tun_saddr;
1065 
1066 	if (mlxsw_sp->router->ipip_ops_arr[ipipt]->ul_proto != ul_proto)
1067 		return false;
1068 
1069 	tun_saddr = mlxsw_sp_ipip_netdev_saddr(ul_proto, ipip_entry->ol_dev);
1070 	return tun_ul_tb_id == ul_tb_id &&
1071 	       mlxsw_sp_l3addr_eq(&tun_saddr, &saddr);
1072 }
1073 
1074 static int
1075 mlxsw_sp_fib_entry_decap_init(struct mlxsw_sp *mlxsw_sp,
1076 			      struct mlxsw_sp_fib_entry *fib_entry,
1077 			      struct mlxsw_sp_ipip_entry *ipip_entry)
1078 {
1079 	u32 tunnel_index;
1080 	int err;
1081 
1082 	err = mlxsw_sp_kvdl_alloc(mlxsw_sp, 1, &tunnel_index);
1083 	if (err)
1084 		return err;
1085 
1086 	ipip_entry->decap_fib_entry = fib_entry;
1087 	fib_entry->decap.ipip_entry = ipip_entry;
1088 	fib_entry->decap.tunnel_index = tunnel_index;
1089 	return 0;
1090 }
1091 
1092 static void mlxsw_sp_fib_entry_decap_fini(struct mlxsw_sp *mlxsw_sp,
1093 					  struct mlxsw_sp_fib_entry *fib_entry)
1094 {
1095 	/* Unlink this node from the IPIP entry that it's the decap entry of. */
1096 	fib_entry->decap.ipip_entry->decap_fib_entry = NULL;
1097 	fib_entry->decap.ipip_entry = NULL;
1098 	mlxsw_sp_kvdl_free(mlxsw_sp, fib_entry->decap.tunnel_index);
1099 }
1100 
1101 static struct mlxsw_sp_fib_node *
1102 mlxsw_sp_fib_node_lookup(struct mlxsw_sp_fib *fib, const void *addr,
1103 			 size_t addr_len, unsigned char prefix_len);
1104 static int mlxsw_sp_fib_entry_update(struct mlxsw_sp *mlxsw_sp,
1105 				     struct mlxsw_sp_fib_entry *fib_entry);
1106 
1107 static void
1108 mlxsw_sp_ipip_entry_demote_decap(struct mlxsw_sp *mlxsw_sp,
1109 				 struct mlxsw_sp_ipip_entry *ipip_entry)
1110 {
1111 	struct mlxsw_sp_fib_entry *fib_entry = ipip_entry->decap_fib_entry;
1112 
1113 	mlxsw_sp_fib_entry_decap_fini(mlxsw_sp, fib_entry);
1114 	fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_TRAP;
1115 
1116 	mlxsw_sp_fib_entry_update(mlxsw_sp, fib_entry);
1117 }
1118 
1119 static void
1120 mlxsw_sp_ipip_entry_promote_decap(struct mlxsw_sp *mlxsw_sp,
1121 				  struct mlxsw_sp_ipip_entry *ipip_entry,
1122 				  struct mlxsw_sp_fib_entry *decap_fib_entry)
1123 {
1124 	if (mlxsw_sp_fib_entry_decap_init(mlxsw_sp, decap_fib_entry,
1125 					  ipip_entry))
1126 		return;
1127 	decap_fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_IPIP_DECAP;
1128 
1129 	if (mlxsw_sp_fib_entry_update(mlxsw_sp, decap_fib_entry))
1130 		mlxsw_sp_ipip_entry_demote_decap(mlxsw_sp, ipip_entry);
1131 }
1132 
1133 /* Given an IPIP entry, find the corresponding decap route. */
1134 static struct mlxsw_sp_fib_entry *
1135 mlxsw_sp_ipip_entry_find_decap(struct mlxsw_sp *mlxsw_sp,
1136 			       struct mlxsw_sp_ipip_entry *ipip_entry)
1137 {
1138 	static struct mlxsw_sp_fib_node *fib_node;
1139 	const struct mlxsw_sp_ipip_ops *ipip_ops;
1140 	struct mlxsw_sp_fib_entry *fib_entry;
1141 	unsigned char saddr_prefix_len;
1142 	union mlxsw_sp_l3addr saddr;
1143 	struct mlxsw_sp_fib *ul_fib;
1144 	struct mlxsw_sp_vr *ul_vr;
1145 	const void *saddrp;
1146 	size_t saddr_len;
1147 	u32 ul_tb_id;
1148 	u32 saddr4;
1149 
1150 	ipip_ops = mlxsw_sp->router->ipip_ops_arr[ipip_entry->ipipt];
1151 
1152 	ul_tb_id = mlxsw_sp_ipip_dev_ul_tb_id(ipip_entry->ol_dev);
1153 	ul_vr = mlxsw_sp_vr_find(mlxsw_sp, ul_tb_id);
1154 	if (!ul_vr)
1155 		return NULL;
1156 
1157 	ul_fib = mlxsw_sp_vr_fib(ul_vr, ipip_ops->ul_proto);
1158 	saddr = mlxsw_sp_ipip_netdev_saddr(ipip_ops->ul_proto,
1159 					   ipip_entry->ol_dev);
1160 
1161 	switch (ipip_ops->ul_proto) {
1162 	case MLXSW_SP_L3_PROTO_IPV4:
1163 		saddr4 = be32_to_cpu(saddr.addr4);
1164 		saddrp = &saddr4;
1165 		saddr_len = 4;
1166 		saddr_prefix_len = 32;
1167 		break;
1168 	case MLXSW_SP_L3_PROTO_IPV6:
1169 		WARN_ON(1);
1170 		return NULL;
1171 	}
1172 
1173 	fib_node = mlxsw_sp_fib_node_lookup(ul_fib, saddrp, saddr_len,
1174 					    saddr_prefix_len);
1175 	if (!fib_node || list_empty(&fib_node->entry_list))
1176 		return NULL;
1177 
1178 	fib_entry = list_first_entry(&fib_node->entry_list,
1179 				     struct mlxsw_sp_fib_entry, list);
1180 	if (fib_entry->type != MLXSW_SP_FIB_ENTRY_TYPE_TRAP)
1181 		return NULL;
1182 
1183 	return fib_entry;
1184 }
1185 
1186 static struct mlxsw_sp_ipip_entry *
1187 mlxsw_sp_ipip_entry_create(struct mlxsw_sp *mlxsw_sp,
1188 			   enum mlxsw_sp_ipip_type ipipt,
1189 			   struct net_device *ol_dev)
1190 {
1191 	struct mlxsw_sp_ipip_entry *ipip_entry;
1192 
1193 	ipip_entry = mlxsw_sp_ipip_entry_alloc(mlxsw_sp, ipipt, ol_dev);
1194 	if (IS_ERR(ipip_entry))
1195 		return ipip_entry;
1196 
1197 	list_add_tail(&ipip_entry->ipip_list_node,
1198 		      &mlxsw_sp->router->ipip_list);
1199 
1200 	return ipip_entry;
1201 }
1202 
1203 static void
1204 mlxsw_sp_ipip_entry_destroy(struct mlxsw_sp *mlxsw_sp,
1205 			    struct mlxsw_sp_ipip_entry *ipip_entry)
1206 {
1207 	list_del(&ipip_entry->ipip_list_node);
1208 	mlxsw_sp_ipip_entry_dealloc(ipip_entry);
1209 }
1210 
1211 static bool
1212 mlxsw_sp_ipip_entry_matches_decap(struct mlxsw_sp *mlxsw_sp,
1213 				  const struct net_device *ul_dev,
1214 				  enum mlxsw_sp_l3proto ul_proto,
1215 				  union mlxsw_sp_l3addr ul_dip,
1216 				  struct mlxsw_sp_ipip_entry *ipip_entry)
1217 {
1218 	u32 ul_tb_id = l3mdev_fib_table(ul_dev) ? : RT_TABLE_MAIN;
1219 	enum mlxsw_sp_ipip_type ipipt = ipip_entry->ipipt;
1220 	struct net_device *ipip_ul_dev;
1221 
1222 	if (mlxsw_sp->router->ipip_ops_arr[ipipt]->ul_proto != ul_proto)
1223 		return false;
1224 
1225 	ipip_ul_dev = __mlxsw_sp_ipip_netdev_ul_dev_get(ipip_entry->ol_dev);
1226 	return mlxsw_sp_ipip_entry_saddr_matches(mlxsw_sp, ul_proto, ul_dip,
1227 						 ul_tb_id, ipip_entry) &&
1228 	       (!ipip_ul_dev || ipip_ul_dev == ul_dev);
1229 }
1230 
1231 /* Given decap parameters, find the corresponding IPIP entry. */
1232 static struct mlxsw_sp_ipip_entry *
1233 mlxsw_sp_ipip_entry_find_by_decap(struct mlxsw_sp *mlxsw_sp,
1234 				  const struct net_device *ul_dev,
1235 				  enum mlxsw_sp_l3proto ul_proto,
1236 				  union mlxsw_sp_l3addr ul_dip)
1237 {
1238 	struct mlxsw_sp_ipip_entry *ipip_entry;
1239 
1240 	list_for_each_entry(ipip_entry, &mlxsw_sp->router->ipip_list,
1241 			    ipip_list_node)
1242 		if (mlxsw_sp_ipip_entry_matches_decap(mlxsw_sp, ul_dev,
1243 						      ul_proto, ul_dip,
1244 						      ipip_entry))
1245 			return ipip_entry;
1246 
1247 	return NULL;
1248 }
1249 
1250 static bool mlxsw_sp_netdev_ipip_type(const struct mlxsw_sp *mlxsw_sp,
1251 				      const struct net_device *dev,
1252 				      enum mlxsw_sp_ipip_type *p_type)
1253 {
1254 	struct mlxsw_sp_router *router = mlxsw_sp->router;
1255 	const struct mlxsw_sp_ipip_ops *ipip_ops;
1256 	enum mlxsw_sp_ipip_type ipipt;
1257 
1258 	for (ipipt = 0; ipipt < MLXSW_SP_IPIP_TYPE_MAX; ++ipipt) {
1259 		ipip_ops = router->ipip_ops_arr[ipipt];
1260 		if (dev->type == ipip_ops->dev_type) {
1261 			if (p_type)
1262 				*p_type = ipipt;
1263 			return true;
1264 		}
1265 	}
1266 	return false;
1267 }
1268 
1269 bool mlxsw_sp_netdev_is_ipip_ol(const struct mlxsw_sp *mlxsw_sp,
1270 				const struct net_device *dev)
1271 {
1272 	return mlxsw_sp_netdev_ipip_type(mlxsw_sp, dev, NULL);
1273 }
1274 
1275 static struct mlxsw_sp_ipip_entry *
1276 mlxsw_sp_ipip_entry_find_by_ol_dev(struct mlxsw_sp *mlxsw_sp,
1277 				   const struct net_device *ol_dev)
1278 {
1279 	struct mlxsw_sp_ipip_entry *ipip_entry;
1280 
1281 	list_for_each_entry(ipip_entry, &mlxsw_sp->router->ipip_list,
1282 			    ipip_list_node)
1283 		if (ipip_entry->ol_dev == ol_dev)
1284 			return ipip_entry;
1285 
1286 	return NULL;
1287 }
1288 
1289 static struct mlxsw_sp_ipip_entry *
1290 mlxsw_sp_ipip_entry_find_by_ul_dev(const struct mlxsw_sp *mlxsw_sp,
1291 				   const struct net_device *ul_dev,
1292 				   struct mlxsw_sp_ipip_entry *start)
1293 {
1294 	struct mlxsw_sp_ipip_entry *ipip_entry;
1295 
1296 	ipip_entry = list_prepare_entry(start, &mlxsw_sp->router->ipip_list,
1297 					ipip_list_node);
1298 	list_for_each_entry_continue(ipip_entry, &mlxsw_sp->router->ipip_list,
1299 				     ipip_list_node) {
1300 		struct net_device *ipip_ul_dev =
1301 			__mlxsw_sp_ipip_netdev_ul_dev_get(ipip_entry->ol_dev);
1302 
1303 		if (ipip_ul_dev == ul_dev)
1304 			return ipip_entry;
1305 	}
1306 
1307 	return NULL;
1308 }
1309 
1310 bool mlxsw_sp_netdev_is_ipip_ul(const struct mlxsw_sp *mlxsw_sp,
1311 				const struct net_device *dev)
1312 {
1313 	return mlxsw_sp_ipip_entry_find_by_ul_dev(mlxsw_sp, dev, NULL);
1314 }
1315 
1316 static bool mlxsw_sp_netdevice_ipip_can_offload(struct mlxsw_sp *mlxsw_sp,
1317 						const struct net_device *ol_dev,
1318 						enum mlxsw_sp_ipip_type ipipt)
1319 {
1320 	const struct mlxsw_sp_ipip_ops *ops
1321 		= mlxsw_sp->router->ipip_ops_arr[ipipt];
1322 
1323 	/* For deciding whether decap should be offloaded, we don't care about
1324 	 * overlay protocol, so ask whether either one is supported.
1325 	 */
1326 	return ops->can_offload(mlxsw_sp, ol_dev, MLXSW_SP_L3_PROTO_IPV4) ||
1327 	       ops->can_offload(mlxsw_sp, ol_dev, MLXSW_SP_L3_PROTO_IPV6);
1328 }
1329 
1330 static int mlxsw_sp_netdevice_ipip_ol_reg_event(struct mlxsw_sp *mlxsw_sp,
1331 						struct net_device *ol_dev)
1332 {
1333 	struct mlxsw_sp_ipip_entry *ipip_entry;
1334 	enum mlxsw_sp_l3proto ul_proto;
1335 	enum mlxsw_sp_ipip_type ipipt;
1336 	union mlxsw_sp_l3addr saddr;
1337 	u32 ul_tb_id;
1338 
1339 	mlxsw_sp_netdev_ipip_type(mlxsw_sp, ol_dev, &ipipt);
1340 	if (mlxsw_sp_netdevice_ipip_can_offload(mlxsw_sp, ol_dev, ipipt)) {
1341 		ul_tb_id = mlxsw_sp_ipip_dev_ul_tb_id(ol_dev);
1342 		ul_proto = mlxsw_sp->router->ipip_ops_arr[ipipt]->ul_proto;
1343 		saddr = mlxsw_sp_ipip_netdev_saddr(ul_proto, ol_dev);
1344 		if (!mlxsw_sp_ipip_demote_tunnel_by_saddr(mlxsw_sp, ul_proto,
1345 							  saddr, ul_tb_id,
1346 							  NULL)) {
1347 			ipip_entry = mlxsw_sp_ipip_entry_create(mlxsw_sp, ipipt,
1348 								ol_dev);
1349 			if (IS_ERR(ipip_entry))
1350 				return PTR_ERR(ipip_entry);
1351 		}
1352 	}
1353 
1354 	return 0;
1355 }
1356 
1357 static void mlxsw_sp_netdevice_ipip_ol_unreg_event(struct mlxsw_sp *mlxsw_sp,
1358 						   struct net_device *ol_dev)
1359 {
1360 	struct mlxsw_sp_ipip_entry *ipip_entry;
1361 
1362 	ipip_entry = mlxsw_sp_ipip_entry_find_by_ol_dev(mlxsw_sp, ol_dev);
1363 	if (ipip_entry)
1364 		mlxsw_sp_ipip_entry_destroy(mlxsw_sp, ipip_entry);
1365 }
1366 
1367 static void
1368 mlxsw_sp_ipip_entry_ol_up_event(struct mlxsw_sp *mlxsw_sp,
1369 				struct mlxsw_sp_ipip_entry *ipip_entry)
1370 {
1371 	struct mlxsw_sp_fib_entry *decap_fib_entry;
1372 
1373 	decap_fib_entry = mlxsw_sp_ipip_entry_find_decap(mlxsw_sp, ipip_entry);
1374 	if (decap_fib_entry)
1375 		mlxsw_sp_ipip_entry_promote_decap(mlxsw_sp, ipip_entry,
1376 						  decap_fib_entry);
1377 }
1378 
1379 static void mlxsw_sp_netdevice_ipip_ol_up_event(struct mlxsw_sp *mlxsw_sp,
1380 						struct net_device *ol_dev)
1381 {
1382 	struct mlxsw_sp_ipip_entry *ipip_entry;
1383 
1384 	ipip_entry = mlxsw_sp_ipip_entry_find_by_ol_dev(mlxsw_sp, ol_dev);
1385 	if (ipip_entry)
1386 		mlxsw_sp_ipip_entry_ol_up_event(mlxsw_sp, ipip_entry);
1387 }
1388 
1389 static void
1390 mlxsw_sp_ipip_entry_ol_down_event(struct mlxsw_sp *mlxsw_sp,
1391 				  struct mlxsw_sp_ipip_entry *ipip_entry)
1392 {
1393 	if (ipip_entry->decap_fib_entry)
1394 		mlxsw_sp_ipip_entry_demote_decap(mlxsw_sp, ipip_entry);
1395 }
1396 
1397 static void mlxsw_sp_netdevice_ipip_ol_down_event(struct mlxsw_sp *mlxsw_sp,
1398 						  struct net_device *ol_dev)
1399 {
1400 	struct mlxsw_sp_ipip_entry *ipip_entry;
1401 
1402 	ipip_entry = mlxsw_sp_ipip_entry_find_by_ol_dev(mlxsw_sp, ol_dev);
1403 	if (ipip_entry)
1404 		mlxsw_sp_ipip_entry_ol_down_event(mlxsw_sp, ipip_entry);
1405 }
1406 
1407 static void mlxsw_sp_nexthop_rif_migrate(struct mlxsw_sp *mlxsw_sp,
1408 					 struct mlxsw_sp_rif *old_rif,
1409 					 struct mlxsw_sp_rif *new_rif);
1410 static int
1411 mlxsw_sp_ipip_entry_ol_lb_update(struct mlxsw_sp *mlxsw_sp,
1412 				 struct mlxsw_sp_ipip_entry *ipip_entry,
1413 				 bool keep_encap,
1414 				 struct netlink_ext_ack *extack)
1415 {
1416 	struct mlxsw_sp_rif_ipip_lb *old_lb_rif = ipip_entry->ol_lb;
1417 	struct mlxsw_sp_rif_ipip_lb *new_lb_rif;
1418 
1419 	new_lb_rif = mlxsw_sp_ipip_ol_ipip_lb_create(mlxsw_sp,
1420 						     ipip_entry->ipipt,
1421 						     ipip_entry->ol_dev,
1422 						     extack);
1423 	if (IS_ERR(new_lb_rif))
1424 		return PTR_ERR(new_lb_rif);
1425 	ipip_entry->ol_lb = new_lb_rif;
1426 
1427 	if (keep_encap)
1428 		mlxsw_sp_nexthop_rif_migrate(mlxsw_sp, &old_lb_rif->common,
1429 					     &new_lb_rif->common);
1430 
1431 	mlxsw_sp_rif_destroy(&old_lb_rif->common);
1432 
1433 	return 0;
1434 }
1435 
1436 static void mlxsw_sp_nexthop_rif_update(struct mlxsw_sp *mlxsw_sp,
1437 					struct mlxsw_sp_rif *rif);
1438 
1439 /**
1440  * Update the offload related to an IPIP entry. This always updates decap, and
1441  * in addition to that it also:
1442  * @recreate_loopback: recreates the associated loopback RIF
1443  * @keep_encap: updates next hops that use the tunnel netdevice. This is only
1444  *              relevant when recreate_loopback is true.
1445  * @update_nexthops: updates next hops, keeping the current loopback RIF. This
1446  *                   is only relevant when recreate_loopback is false.
1447  */
1448 int __mlxsw_sp_ipip_entry_update_tunnel(struct mlxsw_sp *mlxsw_sp,
1449 					struct mlxsw_sp_ipip_entry *ipip_entry,
1450 					bool recreate_loopback,
1451 					bool keep_encap,
1452 					bool update_nexthops,
1453 					struct netlink_ext_ack *extack)
1454 {
1455 	int err;
1456 
1457 	/* RIFs can't be edited, so to update loopback, we need to destroy and
1458 	 * recreate it. That creates a window of opportunity where RALUE and
1459 	 * RATR registers end up referencing a RIF that's already gone. RATRs
1460 	 * are handled in mlxsw_sp_ipip_entry_ol_lb_update(), and to take care
1461 	 * of RALUE, demote the decap route back.
1462 	 */
1463 	if (ipip_entry->decap_fib_entry)
1464 		mlxsw_sp_ipip_entry_demote_decap(mlxsw_sp, ipip_entry);
1465 
1466 	if (recreate_loopback) {
1467 		err = mlxsw_sp_ipip_entry_ol_lb_update(mlxsw_sp, ipip_entry,
1468 						       keep_encap, extack);
1469 		if (err)
1470 			return err;
1471 	} else if (update_nexthops) {
1472 		mlxsw_sp_nexthop_rif_update(mlxsw_sp,
1473 					    &ipip_entry->ol_lb->common);
1474 	}
1475 
1476 	if (ipip_entry->ol_dev->flags & IFF_UP)
1477 		mlxsw_sp_ipip_entry_ol_up_event(mlxsw_sp, ipip_entry);
1478 
1479 	return 0;
1480 }
1481 
1482 static int mlxsw_sp_netdevice_ipip_ol_vrf_event(struct mlxsw_sp *mlxsw_sp,
1483 						struct net_device *ol_dev,
1484 						struct netlink_ext_ack *extack)
1485 {
1486 	struct mlxsw_sp_ipip_entry *ipip_entry =
1487 		mlxsw_sp_ipip_entry_find_by_ol_dev(mlxsw_sp, ol_dev);
1488 	enum mlxsw_sp_l3proto ul_proto;
1489 	union mlxsw_sp_l3addr saddr;
1490 	u32 ul_tb_id;
1491 
1492 	if (!ipip_entry)
1493 		return 0;
1494 
1495 	/* For flat configuration cases, moving overlay to a different VRF might
1496 	 * cause local address conflict, and the conflicting tunnels need to be
1497 	 * demoted.
1498 	 */
1499 	ul_tb_id = mlxsw_sp_ipip_dev_ul_tb_id(ol_dev);
1500 	ul_proto = mlxsw_sp->router->ipip_ops_arr[ipip_entry->ipipt]->ul_proto;
1501 	saddr = mlxsw_sp_ipip_netdev_saddr(ul_proto, ol_dev);
1502 	if (mlxsw_sp_ipip_demote_tunnel_by_saddr(mlxsw_sp, ul_proto,
1503 						 saddr, ul_tb_id,
1504 						 ipip_entry)) {
1505 		mlxsw_sp_ipip_entry_demote_tunnel(mlxsw_sp, ipip_entry);
1506 		return 0;
1507 	}
1508 
1509 	return __mlxsw_sp_ipip_entry_update_tunnel(mlxsw_sp, ipip_entry,
1510 						   true, false, false, extack);
1511 }
1512 
1513 static int
1514 mlxsw_sp_netdevice_ipip_ul_vrf_event(struct mlxsw_sp *mlxsw_sp,
1515 				     struct mlxsw_sp_ipip_entry *ipip_entry,
1516 				     struct net_device *ul_dev,
1517 				     struct netlink_ext_ack *extack)
1518 {
1519 	return __mlxsw_sp_ipip_entry_update_tunnel(mlxsw_sp, ipip_entry,
1520 						   true, true, false, extack);
1521 }
1522 
1523 static int
1524 mlxsw_sp_netdevice_ipip_ul_up_event(struct mlxsw_sp *mlxsw_sp,
1525 				    struct mlxsw_sp_ipip_entry *ipip_entry,
1526 				    struct net_device *ul_dev)
1527 {
1528 	return __mlxsw_sp_ipip_entry_update_tunnel(mlxsw_sp, ipip_entry,
1529 						   false, false, true, NULL);
1530 }
1531 
1532 static int
1533 mlxsw_sp_netdevice_ipip_ul_down_event(struct mlxsw_sp *mlxsw_sp,
1534 				      struct mlxsw_sp_ipip_entry *ipip_entry,
1535 				      struct net_device *ul_dev)
1536 {
1537 	/* A down underlay device causes encapsulated packets to not be
1538 	 * forwarded, but decap still works. So refresh next hops without
1539 	 * touching anything else.
1540 	 */
1541 	return __mlxsw_sp_ipip_entry_update_tunnel(mlxsw_sp, ipip_entry,
1542 						   false, false, true, NULL);
1543 }
1544 
1545 static int
1546 mlxsw_sp_netdevice_ipip_ol_change_event(struct mlxsw_sp *mlxsw_sp,
1547 					struct net_device *ol_dev,
1548 					struct netlink_ext_ack *extack)
1549 {
1550 	const struct mlxsw_sp_ipip_ops *ipip_ops;
1551 	struct mlxsw_sp_ipip_entry *ipip_entry;
1552 	int err;
1553 
1554 	ipip_entry = mlxsw_sp_ipip_entry_find_by_ol_dev(mlxsw_sp, ol_dev);
1555 	if (!ipip_entry)
1556 		/* A change might make a tunnel eligible for offloading, but
1557 		 * that is currently not implemented. What falls to slow path
1558 		 * stays there.
1559 		 */
1560 		return 0;
1561 
1562 	/* A change might make a tunnel not eligible for offloading. */
1563 	if (!mlxsw_sp_netdevice_ipip_can_offload(mlxsw_sp, ol_dev,
1564 						 ipip_entry->ipipt)) {
1565 		mlxsw_sp_ipip_entry_demote_tunnel(mlxsw_sp, ipip_entry);
1566 		return 0;
1567 	}
1568 
1569 	ipip_ops = mlxsw_sp->router->ipip_ops_arr[ipip_entry->ipipt];
1570 	err = ipip_ops->ol_netdev_change(mlxsw_sp, ipip_entry, extack);
1571 	return err;
1572 }
1573 
1574 void mlxsw_sp_ipip_entry_demote_tunnel(struct mlxsw_sp *mlxsw_sp,
1575 				       struct mlxsw_sp_ipip_entry *ipip_entry)
1576 {
1577 	struct net_device *ol_dev = ipip_entry->ol_dev;
1578 
1579 	if (ol_dev->flags & IFF_UP)
1580 		mlxsw_sp_ipip_entry_ol_down_event(mlxsw_sp, ipip_entry);
1581 	mlxsw_sp_ipip_entry_destroy(mlxsw_sp, ipip_entry);
1582 }
1583 
1584 /* The configuration where several tunnels have the same local address in the
1585  * same underlay table needs special treatment in the HW. That is currently not
1586  * implemented in the driver. This function finds and demotes the first tunnel
1587  * with a given source address, except the one passed in in the argument
1588  * `except'.
1589  */
1590 bool
1591 mlxsw_sp_ipip_demote_tunnel_by_saddr(struct mlxsw_sp *mlxsw_sp,
1592 				     enum mlxsw_sp_l3proto ul_proto,
1593 				     union mlxsw_sp_l3addr saddr,
1594 				     u32 ul_tb_id,
1595 				     const struct mlxsw_sp_ipip_entry *except)
1596 {
1597 	struct mlxsw_sp_ipip_entry *ipip_entry, *tmp;
1598 
1599 	list_for_each_entry_safe(ipip_entry, tmp, &mlxsw_sp->router->ipip_list,
1600 				 ipip_list_node) {
1601 		if (ipip_entry != except &&
1602 		    mlxsw_sp_ipip_entry_saddr_matches(mlxsw_sp, ul_proto, saddr,
1603 						      ul_tb_id, ipip_entry)) {
1604 			mlxsw_sp_ipip_entry_demote_tunnel(mlxsw_sp, ipip_entry);
1605 			return true;
1606 		}
1607 	}
1608 
1609 	return false;
1610 }
1611 
1612 static void mlxsw_sp_ipip_demote_tunnel_by_ul_netdev(struct mlxsw_sp *mlxsw_sp,
1613 						     struct net_device *ul_dev)
1614 {
1615 	struct mlxsw_sp_ipip_entry *ipip_entry, *tmp;
1616 
1617 	list_for_each_entry_safe(ipip_entry, tmp, &mlxsw_sp->router->ipip_list,
1618 				 ipip_list_node) {
1619 		struct net_device *ipip_ul_dev =
1620 			__mlxsw_sp_ipip_netdev_ul_dev_get(ipip_entry->ol_dev);
1621 
1622 		if (ipip_ul_dev == ul_dev)
1623 			mlxsw_sp_ipip_entry_demote_tunnel(mlxsw_sp, ipip_entry);
1624 	}
1625 }
1626 
1627 int mlxsw_sp_netdevice_ipip_ol_event(struct mlxsw_sp *mlxsw_sp,
1628 				     struct net_device *ol_dev,
1629 				     unsigned long event,
1630 				     struct netdev_notifier_info *info)
1631 {
1632 	struct netdev_notifier_changeupper_info *chup;
1633 	struct netlink_ext_ack *extack;
1634 
1635 	switch (event) {
1636 	case NETDEV_REGISTER:
1637 		return mlxsw_sp_netdevice_ipip_ol_reg_event(mlxsw_sp, ol_dev);
1638 	case NETDEV_UNREGISTER:
1639 		mlxsw_sp_netdevice_ipip_ol_unreg_event(mlxsw_sp, ol_dev);
1640 		return 0;
1641 	case NETDEV_UP:
1642 		mlxsw_sp_netdevice_ipip_ol_up_event(mlxsw_sp, ol_dev);
1643 		return 0;
1644 	case NETDEV_DOWN:
1645 		mlxsw_sp_netdevice_ipip_ol_down_event(mlxsw_sp, ol_dev);
1646 		return 0;
1647 	case NETDEV_CHANGEUPPER:
1648 		chup = container_of(info, typeof(*chup), info);
1649 		extack = info->extack;
1650 		if (netif_is_l3_master(chup->upper_dev))
1651 			return mlxsw_sp_netdevice_ipip_ol_vrf_event(mlxsw_sp,
1652 								    ol_dev,
1653 								    extack);
1654 		return 0;
1655 	case NETDEV_CHANGE:
1656 		extack = info->extack;
1657 		return mlxsw_sp_netdevice_ipip_ol_change_event(mlxsw_sp,
1658 							       ol_dev, extack);
1659 	}
1660 	return 0;
1661 }
1662 
1663 static int
1664 __mlxsw_sp_netdevice_ipip_ul_event(struct mlxsw_sp *mlxsw_sp,
1665 				   struct mlxsw_sp_ipip_entry *ipip_entry,
1666 				   struct net_device *ul_dev,
1667 				   unsigned long event,
1668 				   struct netdev_notifier_info *info)
1669 {
1670 	struct netdev_notifier_changeupper_info *chup;
1671 	struct netlink_ext_ack *extack;
1672 
1673 	switch (event) {
1674 	case NETDEV_CHANGEUPPER:
1675 		chup = container_of(info, typeof(*chup), info);
1676 		extack = info->extack;
1677 		if (netif_is_l3_master(chup->upper_dev))
1678 			return mlxsw_sp_netdevice_ipip_ul_vrf_event(mlxsw_sp,
1679 								    ipip_entry,
1680 								    ul_dev,
1681 								    extack);
1682 		break;
1683 
1684 	case NETDEV_UP:
1685 		return mlxsw_sp_netdevice_ipip_ul_up_event(mlxsw_sp, ipip_entry,
1686 							   ul_dev);
1687 	case NETDEV_DOWN:
1688 		return mlxsw_sp_netdevice_ipip_ul_down_event(mlxsw_sp,
1689 							     ipip_entry,
1690 							     ul_dev);
1691 	}
1692 	return 0;
1693 }
1694 
1695 int
1696 mlxsw_sp_netdevice_ipip_ul_event(struct mlxsw_sp *mlxsw_sp,
1697 				 struct net_device *ul_dev,
1698 				 unsigned long event,
1699 				 struct netdev_notifier_info *info)
1700 {
1701 	struct mlxsw_sp_ipip_entry *ipip_entry = NULL;
1702 	int err;
1703 
1704 	while ((ipip_entry = mlxsw_sp_ipip_entry_find_by_ul_dev(mlxsw_sp,
1705 								ul_dev,
1706 								ipip_entry))) {
1707 		err = __mlxsw_sp_netdevice_ipip_ul_event(mlxsw_sp, ipip_entry,
1708 							 ul_dev, event, info);
1709 		if (err) {
1710 			mlxsw_sp_ipip_demote_tunnel_by_ul_netdev(mlxsw_sp,
1711 								 ul_dev);
1712 			return err;
1713 		}
1714 	}
1715 
1716 	return 0;
1717 }
1718 
1719 struct mlxsw_sp_neigh_key {
1720 	struct neighbour *n;
1721 };
1722 
1723 struct mlxsw_sp_neigh_entry {
1724 	struct list_head rif_list_node;
1725 	struct rhash_head ht_node;
1726 	struct mlxsw_sp_neigh_key key;
1727 	u16 rif;
1728 	bool connected;
1729 	unsigned char ha[ETH_ALEN];
1730 	struct list_head nexthop_list; /* list of nexthops using
1731 					* this neigh entry
1732 					*/
1733 	struct list_head nexthop_neighs_list_node;
1734 	unsigned int counter_index;
1735 	bool counter_valid;
1736 };
1737 
1738 static const struct rhashtable_params mlxsw_sp_neigh_ht_params = {
1739 	.key_offset = offsetof(struct mlxsw_sp_neigh_entry, key),
1740 	.head_offset = offsetof(struct mlxsw_sp_neigh_entry, ht_node),
1741 	.key_len = sizeof(struct mlxsw_sp_neigh_key),
1742 };
1743 
1744 struct mlxsw_sp_neigh_entry *
1745 mlxsw_sp_rif_neigh_next(struct mlxsw_sp_rif *rif,
1746 			struct mlxsw_sp_neigh_entry *neigh_entry)
1747 {
1748 	if (!neigh_entry) {
1749 		if (list_empty(&rif->neigh_list))
1750 			return NULL;
1751 		else
1752 			return list_first_entry(&rif->neigh_list,
1753 						typeof(*neigh_entry),
1754 						rif_list_node);
1755 	}
1756 	if (list_is_last(&neigh_entry->rif_list_node, &rif->neigh_list))
1757 		return NULL;
1758 	return list_next_entry(neigh_entry, rif_list_node);
1759 }
1760 
1761 int mlxsw_sp_neigh_entry_type(struct mlxsw_sp_neigh_entry *neigh_entry)
1762 {
1763 	return neigh_entry->key.n->tbl->family;
1764 }
1765 
1766 unsigned char *
1767 mlxsw_sp_neigh_entry_ha(struct mlxsw_sp_neigh_entry *neigh_entry)
1768 {
1769 	return neigh_entry->ha;
1770 }
1771 
1772 u32 mlxsw_sp_neigh4_entry_dip(struct mlxsw_sp_neigh_entry *neigh_entry)
1773 {
1774 	struct neighbour *n;
1775 
1776 	n = neigh_entry->key.n;
1777 	return ntohl(*((__be32 *) n->primary_key));
1778 }
1779 
1780 struct in6_addr *
1781 mlxsw_sp_neigh6_entry_dip(struct mlxsw_sp_neigh_entry *neigh_entry)
1782 {
1783 	struct neighbour *n;
1784 
1785 	n = neigh_entry->key.n;
1786 	return (struct in6_addr *) &n->primary_key;
1787 }
1788 
1789 int mlxsw_sp_neigh_counter_get(struct mlxsw_sp *mlxsw_sp,
1790 			       struct mlxsw_sp_neigh_entry *neigh_entry,
1791 			       u64 *p_counter)
1792 {
1793 	if (!neigh_entry->counter_valid)
1794 		return -EINVAL;
1795 
1796 	return mlxsw_sp_flow_counter_get(mlxsw_sp, neigh_entry->counter_index,
1797 					 p_counter, NULL);
1798 }
1799 
1800 static struct mlxsw_sp_neigh_entry *
1801 mlxsw_sp_neigh_entry_alloc(struct mlxsw_sp *mlxsw_sp, struct neighbour *n,
1802 			   u16 rif)
1803 {
1804 	struct mlxsw_sp_neigh_entry *neigh_entry;
1805 
1806 	neigh_entry = kzalloc(sizeof(*neigh_entry), GFP_KERNEL);
1807 	if (!neigh_entry)
1808 		return NULL;
1809 
1810 	neigh_entry->key.n = n;
1811 	neigh_entry->rif = rif;
1812 	INIT_LIST_HEAD(&neigh_entry->nexthop_list);
1813 
1814 	return neigh_entry;
1815 }
1816 
1817 static void mlxsw_sp_neigh_entry_free(struct mlxsw_sp_neigh_entry *neigh_entry)
1818 {
1819 	kfree(neigh_entry);
1820 }
1821 
1822 static int
1823 mlxsw_sp_neigh_entry_insert(struct mlxsw_sp *mlxsw_sp,
1824 			    struct mlxsw_sp_neigh_entry *neigh_entry)
1825 {
1826 	return rhashtable_insert_fast(&mlxsw_sp->router->neigh_ht,
1827 				      &neigh_entry->ht_node,
1828 				      mlxsw_sp_neigh_ht_params);
1829 }
1830 
1831 static void
1832 mlxsw_sp_neigh_entry_remove(struct mlxsw_sp *mlxsw_sp,
1833 			    struct mlxsw_sp_neigh_entry *neigh_entry)
1834 {
1835 	rhashtable_remove_fast(&mlxsw_sp->router->neigh_ht,
1836 			       &neigh_entry->ht_node,
1837 			       mlxsw_sp_neigh_ht_params);
1838 }
1839 
1840 static bool
1841 mlxsw_sp_neigh_counter_should_alloc(struct mlxsw_sp *mlxsw_sp,
1842 				    struct mlxsw_sp_neigh_entry *neigh_entry)
1843 {
1844 	struct devlink *devlink;
1845 	const char *table_name;
1846 
1847 	switch (mlxsw_sp_neigh_entry_type(neigh_entry)) {
1848 	case AF_INET:
1849 		table_name = MLXSW_SP_DPIPE_TABLE_NAME_HOST4;
1850 		break;
1851 	case AF_INET6:
1852 		table_name = MLXSW_SP_DPIPE_TABLE_NAME_HOST6;
1853 		break;
1854 	default:
1855 		WARN_ON(1);
1856 		return false;
1857 	}
1858 
1859 	devlink = priv_to_devlink(mlxsw_sp->core);
1860 	return devlink_dpipe_table_counter_enabled(devlink, table_name);
1861 }
1862 
1863 static void
1864 mlxsw_sp_neigh_counter_alloc(struct mlxsw_sp *mlxsw_sp,
1865 			     struct mlxsw_sp_neigh_entry *neigh_entry)
1866 {
1867 	if (!mlxsw_sp_neigh_counter_should_alloc(mlxsw_sp, neigh_entry))
1868 		return;
1869 
1870 	if (mlxsw_sp_flow_counter_alloc(mlxsw_sp, &neigh_entry->counter_index))
1871 		return;
1872 
1873 	neigh_entry->counter_valid = true;
1874 }
1875 
1876 static void
1877 mlxsw_sp_neigh_counter_free(struct mlxsw_sp *mlxsw_sp,
1878 			    struct mlxsw_sp_neigh_entry *neigh_entry)
1879 {
1880 	if (!neigh_entry->counter_valid)
1881 		return;
1882 	mlxsw_sp_flow_counter_free(mlxsw_sp,
1883 				   neigh_entry->counter_index);
1884 	neigh_entry->counter_valid = false;
1885 }
1886 
1887 static struct mlxsw_sp_neigh_entry *
1888 mlxsw_sp_neigh_entry_create(struct mlxsw_sp *mlxsw_sp, struct neighbour *n)
1889 {
1890 	struct mlxsw_sp_neigh_entry *neigh_entry;
1891 	struct mlxsw_sp_rif *rif;
1892 	int err;
1893 
1894 	rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, n->dev);
1895 	if (!rif)
1896 		return ERR_PTR(-EINVAL);
1897 
1898 	neigh_entry = mlxsw_sp_neigh_entry_alloc(mlxsw_sp, n, rif->rif_index);
1899 	if (!neigh_entry)
1900 		return ERR_PTR(-ENOMEM);
1901 
1902 	err = mlxsw_sp_neigh_entry_insert(mlxsw_sp, neigh_entry);
1903 	if (err)
1904 		goto err_neigh_entry_insert;
1905 
1906 	mlxsw_sp_neigh_counter_alloc(mlxsw_sp, neigh_entry);
1907 	list_add(&neigh_entry->rif_list_node, &rif->neigh_list);
1908 
1909 	return neigh_entry;
1910 
1911 err_neigh_entry_insert:
1912 	mlxsw_sp_neigh_entry_free(neigh_entry);
1913 	return ERR_PTR(err);
1914 }
1915 
1916 static void
1917 mlxsw_sp_neigh_entry_destroy(struct mlxsw_sp *mlxsw_sp,
1918 			     struct mlxsw_sp_neigh_entry *neigh_entry)
1919 {
1920 	list_del(&neigh_entry->rif_list_node);
1921 	mlxsw_sp_neigh_counter_free(mlxsw_sp, neigh_entry);
1922 	mlxsw_sp_neigh_entry_remove(mlxsw_sp, neigh_entry);
1923 	mlxsw_sp_neigh_entry_free(neigh_entry);
1924 }
1925 
1926 static struct mlxsw_sp_neigh_entry *
1927 mlxsw_sp_neigh_entry_lookup(struct mlxsw_sp *mlxsw_sp, struct neighbour *n)
1928 {
1929 	struct mlxsw_sp_neigh_key key;
1930 
1931 	key.n = n;
1932 	return rhashtable_lookup_fast(&mlxsw_sp->router->neigh_ht,
1933 				      &key, mlxsw_sp_neigh_ht_params);
1934 }
1935 
1936 static void
1937 mlxsw_sp_router_neighs_update_interval_init(struct mlxsw_sp *mlxsw_sp)
1938 {
1939 	unsigned long interval;
1940 
1941 #if IS_ENABLED(CONFIG_IPV6)
1942 	interval = min_t(unsigned long,
1943 			 NEIGH_VAR(&arp_tbl.parms, DELAY_PROBE_TIME),
1944 			 NEIGH_VAR(&nd_tbl.parms, DELAY_PROBE_TIME));
1945 #else
1946 	interval = NEIGH_VAR(&arp_tbl.parms, DELAY_PROBE_TIME);
1947 #endif
1948 	mlxsw_sp->router->neighs_update.interval = jiffies_to_msecs(interval);
1949 }
1950 
1951 static void mlxsw_sp_router_neigh_ent_ipv4_process(struct mlxsw_sp *mlxsw_sp,
1952 						   char *rauhtd_pl,
1953 						   int ent_index)
1954 {
1955 	struct net_device *dev;
1956 	struct neighbour *n;
1957 	__be32 dipn;
1958 	u32 dip;
1959 	u16 rif;
1960 
1961 	mlxsw_reg_rauhtd_ent_ipv4_unpack(rauhtd_pl, ent_index, &rif, &dip);
1962 
1963 	if (!mlxsw_sp->router->rifs[rif]) {
1964 		dev_err_ratelimited(mlxsw_sp->bus_info->dev, "Incorrect RIF in neighbour entry\n");
1965 		return;
1966 	}
1967 
1968 	dipn = htonl(dip);
1969 	dev = mlxsw_sp->router->rifs[rif]->dev;
1970 	n = neigh_lookup(&arp_tbl, &dipn, dev);
1971 	if (!n)
1972 		return;
1973 
1974 	netdev_dbg(dev, "Updating neighbour with IP=%pI4h\n", &dip);
1975 	neigh_event_send(n, NULL);
1976 	neigh_release(n);
1977 }
1978 
1979 #if IS_ENABLED(CONFIG_IPV6)
1980 static void mlxsw_sp_router_neigh_ent_ipv6_process(struct mlxsw_sp *mlxsw_sp,
1981 						   char *rauhtd_pl,
1982 						   int rec_index)
1983 {
1984 	struct net_device *dev;
1985 	struct neighbour *n;
1986 	struct in6_addr dip;
1987 	u16 rif;
1988 
1989 	mlxsw_reg_rauhtd_ent_ipv6_unpack(rauhtd_pl, rec_index, &rif,
1990 					 (char *) &dip);
1991 
1992 	if (!mlxsw_sp->router->rifs[rif]) {
1993 		dev_err_ratelimited(mlxsw_sp->bus_info->dev, "Incorrect RIF in neighbour entry\n");
1994 		return;
1995 	}
1996 
1997 	dev = mlxsw_sp->router->rifs[rif]->dev;
1998 	n = neigh_lookup(&nd_tbl, &dip, dev);
1999 	if (!n)
2000 		return;
2001 
2002 	netdev_dbg(dev, "Updating neighbour with IP=%pI6c\n", &dip);
2003 	neigh_event_send(n, NULL);
2004 	neigh_release(n);
2005 }
2006 #else
2007 static void mlxsw_sp_router_neigh_ent_ipv6_process(struct mlxsw_sp *mlxsw_sp,
2008 						   char *rauhtd_pl,
2009 						   int rec_index)
2010 {
2011 }
2012 #endif
2013 
2014 static void mlxsw_sp_router_neigh_rec_ipv4_process(struct mlxsw_sp *mlxsw_sp,
2015 						   char *rauhtd_pl,
2016 						   int rec_index)
2017 {
2018 	u8 num_entries;
2019 	int i;
2020 
2021 	num_entries = mlxsw_reg_rauhtd_ipv4_rec_num_entries_get(rauhtd_pl,
2022 								rec_index);
2023 	/* Hardware starts counting at 0, so add 1. */
2024 	num_entries++;
2025 
2026 	/* Each record consists of several neighbour entries. */
2027 	for (i = 0; i < num_entries; i++) {
2028 		int ent_index;
2029 
2030 		ent_index = rec_index * MLXSW_REG_RAUHTD_IPV4_ENT_PER_REC + i;
2031 		mlxsw_sp_router_neigh_ent_ipv4_process(mlxsw_sp, rauhtd_pl,
2032 						       ent_index);
2033 	}
2034 
2035 }
2036 
2037 static void mlxsw_sp_router_neigh_rec_ipv6_process(struct mlxsw_sp *mlxsw_sp,
2038 						   char *rauhtd_pl,
2039 						   int rec_index)
2040 {
2041 	/* One record contains one entry. */
2042 	mlxsw_sp_router_neigh_ent_ipv6_process(mlxsw_sp, rauhtd_pl,
2043 					       rec_index);
2044 }
2045 
2046 static void mlxsw_sp_router_neigh_rec_process(struct mlxsw_sp *mlxsw_sp,
2047 					      char *rauhtd_pl, int rec_index)
2048 {
2049 	switch (mlxsw_reg_rauhtd_rec_type_get(rauhtd_pl, rec_index)) {
2050 	case MLXSW_REG_RAUHTD_TYPE_IPV4:
2051 		mlxsw_sp_router_neigh_rec_ipv4_process(mlxsw_sp, rauhtd_pl,
2052 						       rec_index);
2053 		break;
2054 	case MLXSW_REG_RAUHTD_TYPE_IPV6:
2055 		mlxsw_sp_router_neigh_rec_ipv6_process(mlxsw_sp, rauhtd_pl,
2056 						       rec_index);
2057 		break;
2058 	}
2059 }
2060 
2061 static bool mlxsw_sp_router_rauhtd_is_full(char *rauhtd_pl)
2062 {
2063 	u8 num_rec, last_rec_index, num_entries;
2064 
2065 	num_rec = mlxsw_reg_rauhtd_num_rec_get(rauhtd_pl);
2066 	last_rec_index = num_rec - 1;
2067 
2068 	if (num_rec < MLXSW_REG_RAUHTD_REC_MAX_NUM)
2069 		return false;
2070 	if (mlxsw_reg_rauhtd_rec_type_get(rauhtd_pl, last_rec_index) ==
2071 	    MLXSW_REG_RAUHTD_TYPE_IPV6)
2072 		return true;
2073 
2074 	num_entries = mlxsw_reg_rauhtd_ipv4_rec_num_entries_get(rauhtd_pl,
2075 								last_rec_index);
2076 	if (++num_entries == MLXSW_REG_RAUHTD_IPV4_ENT_PER_REC)
2077 		return true;
2078 	return false;
2079 }
2080 
2081 static int
2082 __mlxsw_sp_router_neighs_update_rauhtd(struct mlxsw_sp *mlxsw_sp,
2083 				       char *rauhtd_pl,
2084 				       enum mlxsw_reg_rauhtd_type type)
2085 {
2086 	int i, num_rec;
2087 	int err;
2088 
2089 	/* Make sure the neighbour's netdev isn't removed in the
2090 	 * process.
2091 	 */
2092 	rtnl_lock();
2093 	do {
2094 		mlxsw_reg_rauhtd_pack(rauhtd_pl, type);
2095 		err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(rauhtd),
2096 				      rauhtd_pl);
2097 		if (err) {
2098 			dev_err_ratelimited(mlxsw_sp->bus_info->dev, "Failed to dump neighbour table\n");
2099 			break;
2100 		}
2101 		num_rec = mlxsw_reg_rauhtd_num_rec_get(rauhtd_pl);
2102 		for (i = 0; i < num_rec; i++)
2103 			mlxsw_sp_router_neigh_rec_process(mlxsw_sp, rauhtd_pl,
2104 							  i);
2105 	} while (mlxsw_sp_router_rauhtd_is_full(rauhtd_pl));
2106 	rtnl_unlock();
2107 
2108 	return err;
2109 }
2110 
2111 static int mlxsw_sp_router_neighs_update_rauhtd(struct mlxsw_sp *mlxsw_sp)
2112 {
2113 	enum mlxsw_reg_rauhtd_type type;
2114 	char *rauhtd_pl;
2115 	int err;
2116 
2117 	rauhtd_pl = kmalloc(MLXSW_REG_RAUHTD_LEN, GFP_KERNEL);
2118 	if (!rauhtd_pl)
2119 		return -ENOMEM;
2120 
2121 	type = MLXSW_REG_RAUHTD_TYPE_IPV4;
2122 	err = __mlxsw_sp_router_neighs_update_rauhtd(mlxsw_sp, rauhtd_pl, type);
2123 	if (err)
2124 		goto out;
2125 
2126 	type = MLXSW_REG_RAUHTD_TYPE_IPV6;
2127 	err = __mlxsw_sp_router_neighs_update_rauhtd(mlxsw_sp, rauhtd_pl, type);
2128 out:
2129 	kfree(rauhtd_pl);
2130 	return err;
2131 }
2132 
2133 static void mlxsw_sp_router_neighs_update_nh(struct mlxsw_sp *mlxsw_sp)
2134 {
2135 	struct mlxsw_sp_neigh_entry *neigh_entry;
2136 
2137 	/* Take RTNL mutex here to prevent lists from changes */
2138 	rtnl_lock();
2139 	list_for_each_entry(neigh_entry, &mlxsw_sp->router->nexthop_neighs_list,
2140 			    nexthop_neighs_list_node)
2141 		/* If this neigh have nexthops, make the kernel think this neigh
2142 		 * is active regardless of the traffic.
2143 		 */
2144 		neigh_event_send(neigh_entry->key.n, NULL);
2145 	rtnl_unlock();
2146 }
2147 
2148 static void
2149 mlxsw_sp_router_neighs_update_work_schedule(struct mlxsw_sp *mlxsw_sp)
2150 {
2151 	unsigned long interval = mlxsw_sp->router->neighs_update.interval;
2152 
2153 	mlxsw_core_schedule_dw(&mlxsw_sp->router->neighs_update.dw,
2154 			       msecs_to_jiffies(interval));
2155 }
2156 
2157 static void mlxsw_sp_router_neighs_update_work(struct work_struct *work)
2158 {
2159 	struct mlxsw_sp_router *router;
2160 	int err;
2161 
2162 	router = container_of(work, struct mlxsw_sp_router,
2163 			      neighs_update.dw.work);
2164 	err = mlxsw_sp_router_neighs_update_rauhtd(router->mlxsw_sp);
2165 	if (err)
2166 		dev_err(router->mlxsw_sp->bus_info->dev, "Could not update kernel for neigh activity");
2167 
2168 	mlxsw_sp_router_neighs_update_nh(router->mlxsw_sp);
2169 
2170 	mlxsw_sp_router_neighs_update_work_schedule(router->mlxsw_sp);
2171 }
2172 
2173 static void mlxsw_sp_router_probe_unresolved_nexthops(struct work_struct *work)
2174 {
2175 	struct mlxsw_sp_neigh_entry *neigh_entry;
2176 	struct mlxsw_sp_router *router;
2177 
2178 	router = container_of(work, struct mlxsw_sp_router,
2179 			      nexthop_probe_dw.work);
2180 	/* Iterate over nexthop neighbours, find those who are unresolved and
2181 	 * send arp on them. This solves the chicken-egg problem when
2182 	 * the nexthop wouldn't get offloaded until the neighbor is resolved
2183 	 * but it wouldn't get resolved ever in case traffic is flowing in HW
2184 	 * using different nexthop.
2185 	 *
2186 	 * Take RTNL mutex here to prevent lists from changes.
2187 	 */
2188 	rtnl_lock();
2189 	list_for_each_entry(neigh_entry, &router->nexthop_neighs_list,
2190 			    nexthop_neighs_list_node)
2191 		if (!neigh_entry->connected)
2192 			neigh_event_send(neigh_entry->key.n, NULL);
2193 	rtnl_unlock();
2194 
2195 	mlxsw_core_schedule_dw(&router->nexthop_probe_dw,
2196 			       MLXSW_SP_UNRESOLVED_NH_PROBE_INTERVAL);
2197 }
2198 
2199 static void
2200 mlxsw_sp_nexthop_neigh_update(struct mlxsw_sp *mlxsw_sp,
2201 			      struct mlxsw_sp_neigh_entry *neigh_entry,
2202 			      bool removing);
2203 
2204 static enum mlxsw_reg_rauht_op mlxsw_sp_rauht_op(bool adding)
2205 {
2206 	return adding ? MLXSW_REG_RAUHT_OP_WRITE_ADD :
2207 			MLXSW_REG_RAUHT_OP_WRITE_DELETE;
2208 }
2209 
2210 static void
2211 mlxsw_sp_router_neigh_entry_op4(struct mlxsw_sp *mlxsw_sp,
2212 				struct mlxsw_sp_neigh_entry *neigh_entry,
2213 				enum mlxsw_reg_rauht_op op)
2214 {
2215 	struct neighbour *n = neigh_entry->key.n;
2216 	u32 dip = ntohl(*((__be32 *) n->primary_key));
2217 	char rauht_pl[MLXSW_REG_RAUHT_LEN];
2218 
2219 	mlxsw_reg_rauht_pack4(rauht_pl, op, neigh_entry->rif, neigh_entry->ha,
2220 			      dip);
2221 	if (neigh_entry->counter_valid)
2222 		mlxsw_reg_rauht_pack_counter(rauht_pl,
2223 					     neigh_entry->counter_index);
2224 	mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rauht), rauht_pl);
2225 }
2226 
2227 static void
2228 mlxsw_sp_router_neigh_entry_op6(struct mlxsw_sp *mlxsw_sp,
2229 				struct mlxsw_sp_neigh_entry *neigh_entry,
2230 				enum mlxsw_reg_rauht_op op)
2231 {
2232 	struct neighbour *n = neigh_entry->key.n;
2233 	char rauht_pl[MLXSW_REG_RAUHT_LEN];
2234 	const char *dip = n->primary_key;
2235 
2236 	mlxsw_reg_rauht_pack6(rauht_pl, op, neigh_entry->rif, neigh_entry->ha,
2237 			      dip);
2238 	if (neigh_entry->counter_valid)
2239 		mlxsw_reg_rauht_pack_counter(rauht_pl,
2240 					     neigh_entry->counter_index);
2241 	mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rauht), rauht_pl);
2242 }
2243 
2244 bool mlxsw_sp_neigh_ipv6_ignore(struct mlxsw_sp_neigh_entry *neigh_entry)
2245 {
2246 	struct neighbour *n = neigh_entry->key.n;
2247 
2248 	/* Packets with a link-local destination address are trapped
2249 	 * after LPM lookup and never reach the neighbour table, so
2250 	 * there is no need to program such neighbours to the device.
2251 	 */
2252 	if (ipv6_addr_type((struct in6_addr *) &n->primary_key) &
2253 	    IPV6_ADDR_LINKLOCAL)
2254 		return true;
2255 	return false;
2256 }
2257 
2258 static void
2259 mlxsw_sp_neigh_entry_update(struct mlxsw_sp *mlxsw_sp,
2260 			    struct mlxsw_sp_neigh_entry *neigh_entry,
2261 			    bool adding)
2262 {
2263 	if (!adding && !neigh_entry->connected)
2264 		return;
2265 	neigh_entry->connected = adding;
2266 	if (neigh_entry->key.n->tbl->family == AF_INET) {
2267 		mlxsw_sp_router_neigh_entry_op4(mlxsw_sp, neigh_entry,
2268 						mlxsw_sp_rauht_op(adding));
2269 	} else if (neigh_entry->key.n->tbl->family == AF_INET6) {
2270 		if (mlxsw_sp_neigh_ipv6_ignore(neigh_entry))
2271 			return;
2272 		mlxsw_sp_router_neigh_entry_op6(mlxsw_sp, neigh_entry,
2273 						mlxsw_sp_rauht_op(adding));
2274 	} else {
2275 		WARN_ON_ONCE(1);
2276 	}
2277 }
2278 
2279 void
2280 mlxsw_sp_neigh_entry_counter_update(struct mlxsw_sp *mlxsw_sp,
2281 				    struct mlxsw_sp_neigh_entry *neigh_entry,
2282 				    bool adding)
2283 {
2284 	if (adding)
2285 		mlxsw_sp_neigh_counter_alloc(mlxsw_sp, neigh_entry);
2286 	else
2287 		mlxsw_sp_neigh_counter_free(mlxsw_sp, neigh_entry);
2288 	mlxsw_sp_neigh_entry_update(mlxsw_sp, neigh_entry, true);
2289 }
2290 
2291 struct mlxsw_sp_netevent_work {
2292 	struct work_struct work;
2293 	struct mlxsw_sp *mlxsw_sp;
2294 	struct neighbour *n;
2295 };
2296 
2297 static void mlxsw_sp_router_neigh_event_work(struct work_struct *work)
2298 {
2299 	struct mlxsw_sp_netevent_work *net_work =
2300 		container_of(work, struct mlxsw_sp_netevent_work, work);
2301 	struct mlxsw_sp *mlxsw_sp = net_work->mlxsw_sp;
2302 	struct mlxsw_sp_neigh_entry *neigh_entry;
2303 	struct neighbour *n = net_work->n;
2304 	unsigned char ha[ETH_ALEN];
2305 	bool entry_connected;
2306 	u8 nud_state, dead;
2307 
2308 	/* If these parameters are changed after we release the lock,
2309 	 * then we are guaranteed to receive another event letting us
2310 	 * know about it.
2311 	 */
2312 	read_lock_bh(&n->lock);
2313 	memcpy(ha, n->ha, ETH_ALEN);
2314 	nud_state = n->nud_state;
2315 	dead = n->dead;
2316 	read_unlock_bh(&n->lock);
2317 
2318 	rtnl_lock();
2319 	entry_connected = nud_state & NUD_VALID && !dead;
2320 	neigh_entry = mlxsw_sp_neigh_entry_lookup(mlxsw_sp, n);
2321 	if (!entry_connected && !neigh_entry)
2322 		goto out;
2323 	if (!neigh_entry) {
2324 		neigh_entry = mlxsw_sp_neigh_entry_create(mlxsw_sp, n);
2325 		if (IS_ERR(neigh_entry))
2326 			goto out;
2327 	}
2328 
2329 	memcpy(neigh_entry->ha, ha, ETH_ALEN);
2330 	mlxsw_sp_neigh_entry_update(mlxsw_sp, neigh_entry, entry_connected);
2331 	mlxsw_sp_nexthop_neigh_update(mlxsw_sp, neigh_entry, !entry_connected);
2332 
2333 	if (!neigh_entry->connected && list_empty(&neigh_entry->nexthop_list))
2334 		mlxsw_sp_neigh_entry_destroy(mlxsw_sp, neigh_entry);
2335 
2336 out:
2337 	rtnl_unlock();
2338 	neigh_release(n);
2339 	kfree(net_work);
2340 }
2341 
2342 static int mlxsw_sp_mp_hash_init(struct mlxsw_sp *mlxsw_sp);
2343 
2344 static void mlxsw_sp_router_mp_hash_event_work(struct work_struct *work)
2345 {
2346 	struct mlxsw_sp_netevent_work *net_work =
2347 		container_of(work, struct mlxsw_sp_netevent_work, work);
2348 	struct mlxsw_sp *mlxsw_sp = net_work->mlxsw_sp;
2349 
2350 	mlxsw_sp_mp_hash_init(mlxsw_sp);
2351 	kfree(net_work);
2352 }
2353 
2354 static int mlxsw_sp_router_netevent_event(struct notifier_block *nb,
2355 					  unsigned long event, void *ptr)
2356 {
2357 	struct mlxsw_sp_netevent_work *net_work;
2358 	struct mlxsw_sp_port *mlxsw_sp_port;
2359 	struct mlxsw_sp_router *router;
2360 	struct mlxsw_sp *mlxsw_sp;
2361 	unsigned long interval;
2362 	struct neigh_parms *p;
2363 	struct neighbour *n;
2364 	struct net *net;
2365 
2366 	switch (event) {
2367 	case NETEVENT_DELAY_PROBE_TIME_UPDATE:
2368 		p = ptr;
2369 
2370 		/* We don't care about changes in the default table. */
2371 		if (!p->dev || (p->tbl->family != AF_INET &&
2372 				p->tbl->family != AF_INET6))
2373 			return NOTIFY_DONE;
2374 
2375 		/* We are in atomic context and can't take RTNL mutex,
2376 		 * so use RCU variant to walk the device chain.
2377 		 */
2378 		mlxsw_sp_port = mlxsw_sp_port_lower_dev_hold(p->dev);
2379 		if (!mlxsw_sp_port)
2380 			return NOTIFY_DONE;
2381 
2382 		mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
2383 		interval = jiffies_to_msecs(NEIGH_VAR(p, DELAY_PROBE_TIME));
2384 		mlxsw_sp->router->neighs_update.interval = interval;
2385 
2386 		mlxsw_sp_port_dev_put(mlxsw_sp_port);
2387 		break;
2388 	case NETEVENT_NEIGH_UPDATE:
2389 		n = ptr;
2390 
2391 		if (n->tbl->family != AF_INET && n->tbl->family != AF_INET6)
2392 			return NOTIFY_DONE;
2393 
2394 		mlxsw_sp_port = mlxsw_sp_port_lower_dev_hold(n->dev);
2395 		if (!mlxsw_sp_port)
2396 			return NOTIFY_DONE;
2397 
2398 		net_work = kzalloc(sizeof(*net_work), GFP_ATOMIC);
2399 		if (!net_work) {
2400 			mlxsw_sp_port_dev_put(mlxsw_sp_port);
2401 			return NOTIFY_BAD;
2402 		}
2403 
2404 		INIT_WORK(&net_work->work, mlxsw_sp_router_neigh_event_work);
2405 		net_work->mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
2406 		net_work->n = n;
2407 
2408 		/* Take a reference to ensure the neighbour won't be
2409 		 * destructed until we drop the reference in delayed
2410 		 * work.
2411 		 */
2412 		neigh_clone(n);
2413 		mlxsw_core_schedule_work(&net_work->work);
2414 		mlxsw_sp_port_dev_put(mlxsw_sp_port);
2415 		break;
2416 	case NETEVENT_MULTIPATH_HASH_UPDATE:
2417 		net = ptr;
2418 
2419 		if (!net_eq(net, &init_net))
2420 			return NOTIFY_DONE;
2421 
2422 		net_work = kzalloc(sizeof(*net_work), GFP_ATOMIC);
2423 		if (!net_work)
2424 			return NOTIFY_BAD;
2425 
2426 		router = container_of(nb, struct mlxsw_sp_router, netevent_nb);
2427 		INIT_WORK(&net_work->work, mlxsw_sp_router_mp_hash_event_work);
2428 		net_work->mlxsw_sp = router->mlxsw_sp;
2429 		mlxsw_core_schedule_work(&net_work->work);
2430 		break;
2431 	}
2432 
2433 	return NOTIFY_DONE;
2434 }
2435 
2436 static int mlxsw_sp_neigh_init(struct mlxsw_sp *mlxsw_sp)
2437 {
2438 	int err;
2439 
2440 	err = rhashtable_init(&mlxsw_sp->router->neigh_ht,
2441 			      &mlxsw_sp_neigh_ht_params);
2442 	if (err)
2443 		return err;
2444 
2445 	/* Initialize the polling interval according to the default
2446 	 * table.
2447 	 */
2448 	mlxsw_sp_router_neighs_update_interval_init(mlxsw_sp);
2449 
2450 	/* Create the delayed works for the activity_update */
2451 	INIT_DELAYED_WORK(&mlxsw_sp->router->neighs_update.dw,
2452 			  mlxsw_sp_router_neighs_update_work);
2453 	INIT_DELAYED_WORK(&mlxsw_sp->router->nexthop_probe_dw,
2454 			  mlxsw_sp_router_probe_unresolved_nexthops);
2455 	mlxsw_core_schedule_dw(&mlxsw_sp->router->neighs_update.dw, 0);
2456 	mlxsw_core_schedule_dw(&mlxsw_sp->router->nexthop_probe_dw, 0);
2457 	return 0;
2458 }
2459 
2460 static void mlxsw_sp_neigh_fini(struct mlxsw_sp *mlxsw_sp)
2461 {
2462 	cancel_delayed_work_sync(&mlxsw_sp->router->neighs_update.dw);
2463 	cancel_delayed_work_sync(&mlxsw_sp->router->nexthop_probe_dw);
2464 	rhashtable_destroy(&mlxsw_sp->router->neigh_ht);
2465 }
2466 
2467 static void mlxsw_sp_neigh_rif_gone_sync(struct mlxsw_sp *mlxsw_sp,
2468 					 struct mlxsw_sp_rif *rif)
2469 {
2470 	struct mlxsw_sp_neigh_entry *neigh_entry, *tmp;
2471 
2472 	list_for_each_entry_safe(neigh_entry, tmp, &rif->neigh_list,
2473 				 rif_list_node) {
2474 		mlxsw_sp_neigh_entry_update(mlxsw_sp, neigh_entry, false);
2475 		mlxsw_sp_neigh_entry_destroy(mlxsw_sp, neigh_entry);
2476 	}
2477 }
2478 
2479 enum mlxsw_sp_nexthop_type {
2480 	MLXSW_SP_NEXTHOP_TYPE_ETH,
2481 	MLXSW_SP_NEXTHOP_TYPE_IPIP,
2482 };
2483 
2484 struct mlxsw_sp_nexthop_key {
2485 	struct fib_nh *fib_nh;
2486 };
2487 
2488 struct mlxsw_sp_nexthop {
2489 	struct list_head neigh_list_node; /* member of neigh entry list */
2490 	struct list_head rif_list_node;
2491 	struct list_head router_list_node;
2492 	struct mlxsw_sp_nexthop_group *nh_grp; /* pointer back to the group
2493 						* this belongs to
2494 						*/
2495 	struct rhash_head ht_node;
2496 	struct mlxsw_sp_nexthop_key key;
2497 	unsigned char gw_addr[sizeof(struct in6_addr)];
2498 	int ifindex;
2499 	int nh_weight;
2500 	int norm_nh_weight;
2501 	int num_adj_entries;
2502 	struct mlxsw_sp_rif *rif;
2503 	u8 should_offload:1, /* set indicates this neigh is connected and
2504 			      * should be put to KVD linear area of this group.
2505 			      */
2506 	   offloaded:1, /* set in case the neigh is actually put into
2507 			 * KVD linear area of this group.
2508 			 */
2509 	   update:1; /* set indicates that MAC of this neigh should be
2510 		      * updated in HW
2511 		      */
2512 	enum mlxsw_sp_nexthop_type type;
2513 	union {
2514 		struct mlxsw_sp_neigh_entry *neigh_entry;
2515 		struct mlxsw_sp_ipip_entry *ipip_entry;
2516 	};
2517 	unsigned int counter_index;
2518 	bool counter_valid;
2519 };
2520 
2521 struct mlxsw_sp_nexthop_group {
2522 	void *priv;
2523 	struct rhash_head ht_node;
2524 	struct list_head fib_list; /* list of fib entries that use this group */
2525 	struct neigh_table *neigh_tbl;
2526 	u8 adj_index_valid:1,
2527 	   gateway:1; /* routes using the group use a gateway */
2528 	u32 adj_index;
2529 	u16 ecmp_size;
2530 	u16 count;
2531 	int sum_norm_weight;
2532 	struct mlxsw_sp_nexthop nexthops[0];
2533 #define nh_rif	nexthops[0].rif
2534 };
2535 
2536 void mlxsw_sp_nexthop_counter_alloc(struct mlxsw_sp *mlxsw_sp,
2537 				    struct mlxsw_sp_nexthop *nh)
2538 {
2539 	struct devlink *devlink;
2540 
2541 	devlink = priv_to_devlink(mlxsw_sp->core);
2542 	if (!devlink_dpipe_table_counter_enabled(devlink,
2543 						 MLXSW_SP_DPIPE_TABLE_NAME_ADJ))
2544 		return;
2545 
2546 	if (mlxsw_sp_flow_counter_alloc(mlxsw_sp, &nh->counter_index))
2547 		return;
2548 
2549 	nh->counter_valid = true;
2550 }
2551 
2552 void mlxsw_sp_nexthop_counter_free(struct mlxsw_sp *mlxsw_sp,
2553 				   struct mlxsw_sp_nexthop *nh)
2554 {
2555 	if (!nh->counter_valid)
2556 		return;
2557 	mlxsw_sp_flow_counter_free(mlxsw_sp, nh->counter_index);
2558 	nh->counter_valid = false;
2559 }
2560 
2561 int mlxsw_sp_nexthop_counter_get(struct mlxsw_sp *mlxsw_sp,
2562 				 struct mlxsw_sp_nexthop *nh, u64 *p_counter)
2563 {
2564 	if (!nh->counter_valid)
2565 		return -EINVAL;
2566 
2567 	return mlxsw_sp_flow_counter_get(mlxsw_sp, nh->counter_index,
2568 					 p_counter, NULL);
2569 }
2570 
2571 struct mlxsw_sp_nexthop *mlxsw_sp_nexthop_next(struct mlxsw_sp_router *router,
2572 					       struct mlxsw_sp_nexthop *nh)
2573 {
2574 	if (!nh) {
2575 		if (list_empty(&router->nexthop_list))
2576 			return NULL;
2577 		else
2578 			return list_first_entry(&router->nexthop_list,
2579 						typeof(*nh), router_list_node);
2580 	}
2581 	if (list_is_last(&nh->router_list_node, &router->nexthop_list))
2582 		return NULL;
2583 	return list_next_entry(nh, router_list_node);
2584 }
2585 
2586 bool mlxsw_sp_nexthop_offload(struct mlxsw_sp_nexthop *nh)
2587 {
2588 	return nh->offloaded;
2589 }
2590 
2591 unsigned char *mlxsw_sp_nexthop_ha(struct mlxsw_sp_nexthop *nh)
2592 {
2593 	if (!nh->offloaded)
2594 		return NULL;
2595 	return nh->neigh_entry->ha;
2596 }
2597 
2598 int mlxsw_sp_nexthop_indexes(struct mlxsw_sp_nexthop *nh, u32 *p_adj_index,
2599 			     u32 *p_adj_size, u32 *p_adj_hash_index)
2600 {
2601 	struct mlxsw_sp_nexthop_group *nh_grp = nh->nh_grp;
2602 	u32 adj_hash_index = 0;
2603 	int i;
2604 
2605 	if (!nh->offloaded || !nh_grp->adj_index_valid)
2606 		return -EINVAL;
2607 
2608 	*p_adj_index = nh_grp->adj_index;
2609 	*p_adj_size = nh_grp->ecmp_size;
2610 
2611 	for (i = 0; i < nh_grp->count; i++) {
2612 		struct mlxsw_sp_nexthop *nh_iter = &nh_grp->nexthops[i];
2613 
2614 		if (nh_iter == nh)
2615 			break;
2616 		if (nh_iter->offloaded)
2617 			adj_hash_index += nh_iter->num_adj_entries;
2618 	}
2619 
2620 	*p_adj_hash_index = adj_hash_index;
2621 	return 0;
2622 }
2623 
2624 struct mlxsw_sp_rif *mlxsw_sp_nexthop_rif(struct mlxsw_sp_nexthop *nh)
2625 {
2626 	return nh->rif;
2627 }
2628 
2629 bool mlxsw_sp_nexthop_group_has_ipip(struct mlxsw_sp_nexthop *nh)
2630 {
2631 	struct mlxsw_sp_nexthop_group *nh_grp = nh->nh_grp;
2632 	int i;
2633 
2634 	for (i = 0; i < nh_grp->count; i++) {
2635 		struct mlxsw_sp_nexthop *nh_iter = &nh_grp->nexthops[i];
2636 
2637 		if (nh_iter->type == MLXSW_SP_NEXTHOP_TYPE_IPIP)
2638 			return true;
2639 	}
2640 	return false;
2641 }
2642 
2643 static struct fib_info *
2644 mlxsw_sp_nexthop4_group_fi(const struct mlxsw_sp_nexthop_group *nh_grp)
2645 {
2646 	return nh_grp->priv;
2647 }
2648 
2649 struct mlxsw_sp_nexthop_group_cmp_arg {
2650 	enum mlxsw_sp_l3proto proto;
2651 	union {
2652 		struct fib_info *fi;
2653 		struct mlxsw_sp_fib6_entry *fib6_entry;
2654 	};
2655 };
2656 
2657 static bool
2658 mlxsw_sp_nexthop6_group_has_nexthop(const struct mlxsw_sp_nexthop_group *nh_grp,
2659 				    const struct in6_addr *gw, int ifindex,
2660 				    int weight)
2661 {
2662 	int i;
2663 
2664 	for (i = 0; i < nh_grp->count; i++) {
2665 		const struct mlxsw_sp_nexthop *nh;
2666 
2667 		nh = &nh_grp->nexthops[i];
2668 		if (nh->ifindex == ifindex && nh->nh_weight == weight &&
2669 		    ipv6_addr_equal(gw, (struct in6_addr *) nh->gw_addr))
2670 			return true;
2671 	}
2672 
2673 	return false;
2674 }
2675 
2676 static bool
2677 mlxsw_sp_nexthop6_group_cmp(const struct mlxsw_sp_nexthop_group *nh_grp,
2678 			    const struct mlxsw_sp_fib6_entry *fib6_entry)
2679 {
2680 	struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
2681 
2682 	if (nh_grp->count != fib6_entry->nrt6)
2683 		return false;
2684 
2685 	list_for_each_entry(mlxsw_sp_rt6, &fib6_entry->rt6_list, list) {
2686 		struct in6_addr *gw;
2687 		int ifindex, weight;
2688 
2689 		ifindex = mlxsw_sp_rt6->rt->dst.dev->ifindex;
2690 		weight = mlxsw_sp_rt6->rt->rt6i_nh_weight;
2691 		gw = &mlxsw_sp_rt6->rt->rt6i_gateway;
2692 		if (!mlxsw_sp_nexthop6_group_has_nexthop(nh_grp, gw, ifindex,
2693 							 weight))
2694 			return false;
2695 	}
2696 
2697 	return true;
2698 }
2699 
2700 static int
2701 mlxsw_sp_nexthop_group_cmp(struct rhashtable_compare_arg *arg, const void *ptr)
2702 {
2703 	const struct mlxsw_sp_nexthop_group_cmp_arg *cmp_arg = arg->key;
2704 	const struct mlxsw_sp_nexthop_group *nh_grp = ptr;
2705 
2706 	switch (cmp_arg->proto) {
2707 	case MLXSW_SP_L3_PROTO_IPV4:
2708 		return cmp_arg->fi != mlxsw_sp_nexthop4_group_fi(nh_grp);
2709 	case MLXSW_SP_L3_PROTO_IPV6:
2710 		return !mlxsw_sp_nexthop6_group_cmp(nh_grp,
2711 						    cmp_arg->fib6_entry);
2712 	default:
2713 		WARN_ON(1);
2714 		return 1;
2715 	}
2716 }
2717 
2718 static int
2719 mlxsw_sp_nexthop_group_type(const struct mlxsw_sp_nexthop_group *nh_grp)
2720 {
2721 	return nh_grp->neigh_tbl->family;
2722 }
2723 
2724 static u32 mlxsw_sp_nexthop_group_hash_obj(const void *data, u32 len, u32 seed)
2725 {
2726 	const struct mlxsw_sp_nexthop_group *nh_grp = data;
2727 	const struct mlxsw_sp_nexthop *nh;
2728 	struct fib_info *fi;
2729 	unsigned int val;
2730 	int i;
2731 
2732 	switch (mlxsw_sp_nexthop_group_type(nh_grp)) {
2733 	case AF_INET:
2734 		fi = mlxsw_sp_nexthop4_group_fi(nh_grp);
2735 		return jhash(&fi, sizeof(fi), seed);
2736 	case AF_INET6:
2737 		val = nh_grp->count;
2738 		for (i = 0; i < nh_grp->count; i++) {
2739 			nh = &nh_grp->nexthops[i];
2740 			val ^= nh->ifindex;
2741 		}
2742 		return jhash(&val, sizeof(val), seed);
2743 	default:
2744 		WARN_ON(1);
2745 		return 0;
2746 	}
2747 }
2748 
2749 static u32
2750 mlxsw_sp_nexthop6_group_hash(struct mlxsw_sp_fib6_entry *fib6_entry, u32 seed)
2751 {
2752 	unsigned int val = fib6_entry->nrt6;
2753 	struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
2754 	struct net_device *dev;
2755 
2756 	list_for_each_entry(mlxsw_sp_rt6, &fib6_entry->rt6_list, list) {
2757 		dev = mlxsw_sp_rt6->rt->dst.dev;
2758 		val ^= dev->ifindex;
2759 	}
2760 
2761 	return jhash(&val, sizeof(val), seed);
2762 }
2763 
2764 static u32
2765 mlxsw_sp_nexthop_group_hash(const void *data, u32 len, u32 seed)
2766 {
2767 	const struct mlxsw_sp_nexthop_group_cmp_arg *cmp_arg = data;
2768 
2769 	switch (cmp_arg->proto) {
2770 	case MLXSW_SP_L3_PROTO_IPV4:
2771 		return jhash(&cmp_arg->fi, sizeof(cmp_arg->fi), seed);
2772 	case MLXSW_SP_L3_PROTO_IPV6:
2773 		return mlxsw_sp_nexthop6_group_hash(cmp_arg->fib6_entry, seed);
2774 	default:
2775 		WARN_ON(1);
2776 		return 0;
2777 	}
2778 }
2779 
2780 static const struct rhashtable_params mlxsw_sp_nexthop_group_ht_params = {
2781 	.head_offset = offsetof(struct mlxsw_sp_nexthop_group, ht_node),
2782 	.hashfn	     = mlxsw_sp_nexthop_group_hash,
2783 	.obj_hashfn  = mlxsw_sp_nexthop_group_hash_obj,
2784 	.obj_cmpfn   = mlxsw_sp_nexthop_group_cmp,
2785 };
2786 
2787 static int mlxsw_sp_nexthop_group_insert(struct mlxsw_sp *mlxsw_sp,
2788 					 struct mlxsw_sp_nexthop_group *nh_grp)
2789 {
2790 	if (mlxsw_sp_nexthop_group_type(nh_grp) == AF_INET6 &&
2791 	    !nh_grp->gateway)
2792 		return 0;
2793 
2794 	return rhashtable_insert_fast(&mlxsw_sp->router->nexthop_group_ht,
2795 				      &nh_grp->ht_node,
2796 				      mlxsw_sp_nexthop_group_ht_params);
2797 }
2798 
2799 static void mlxsw_sp_nexthop_group_remove(struct mlxsw_sp *mlxsw_sp,
2800 					  struct mlxsw_sp_nexthop_group *nh_grp)
2801 {
2802 	if (mlxsw_sp_nexthop_group_type(nh_grp) == AF_INET6 &&
2803 	    !nh_grp->gateway)
2804 		return;
2805 
2806 	rhashtable_remove_fast(&mlxsw_sp->router->nexthop_group_ht,
2807 			       &nh_grp->ht_node,
2808 			       mlxsw_sp_nexthop_group_ht_params);
2809 }
2810 
2811 static struct mlxsw_sp_nexthop_group *
2812 mlxsw_sp_nexthop4_group_lookup(struct mlxsw_sp *mlxsw_sp,
2813 			       struct fib_info *fi)
2814 {
2815 	struct mlxsw_sp_nexthop_group_cmp_arg cmp_arg;
2816 
2817 	cmp_arg.proto = MLXSW_SP_L3_PROTO_IPV4;
2818 	cmp_arg.fi = fi;
2819 	return rhashtable_lookup_fast(&mlxsw_sp->router->nexthop_group_ht,
2820 				      &cmp_arg,
2821 				      mlxsw_sp_nexthop_group_ht_params);
2822 }
2823 
2824 static struct mlxsw_sp_nexthop_group *
2825 mlxsw_sp_nexthop6_group_lookup(struct mlxsw_sp *mlxsw_sp,
2826 			       struct mlxsw_sp_fib6_entry *fib6_entry)
2827 {
2828 	struct mlxsw_sp_nexthop_group_cmp_arg cmp_arg;
2829 
2830 	cmp_arg.proto = MLXSW_SP_L3_PROTO_IPV6;
2831 	cmp_arg.fib6_entry = fib6_entry;
2832 	return rhashtable_lookup_fast(&mlxsw_sp->router->nexthop_group_ht,
2833 				      &cmp_arg,
2834 				      mlxsw_sp_nexthop_group_ht_params);
2835 }
2836 
2837 static const struct rhashtable_params mlxsw_sp_nexthop_ht_params = {
2838 	.key_offset = offsetof(struct mlxsw_sp_nexthop, key),
2839 	.head_offset = offsetof(struct mlxsw_sp_nexthop, ht_node),
2840 	.key_len = sizeof(struct mlxsw_sp_nexthop_key),
2841 };
2842 
2843 static int mlxsw_sp_nexthop_insert(struct mlxsw_sp *mlxsw_sp,
2844 				   struct mlxsw_sp_nexthop *nh)
2845 {
2846 	return rhashtable_insert_fast(&mlxsw_sp->router->nexthop_ht,
2847 				      &nh->ht_node, mlxsw_sp_nexthop_ht_params);
2848 }
2849 
2850 static void mlxsw_sp_nexthop_remove(struct mlxsw_sp *mlxsw_sp,
2851 				    struct mlxsw_sp_nexthop *nh)
2852 {
2853 	rhashtable_remove_fast(&mlxsw_sp->router->nexthop_ht, &nh->ht_node,
2854 			       mlxsw_sp_nexthop_ht_params);
2855 }
2856 
2857 static struct mlxsw_sp_nexthop *
2858 mlxsw_sp_nexthop_lookup(struct mlxsw_sp *mlxsw_sp,
2859 			struct mlxsw_sp_nexthop_key key)
2860 {
2861 	return rhashtable_lookup_fast(&mlxsw_sp->router->nexthop_ht, &key,
2862 				      mlxsw_sp_nexthop_ht_params);
2863 }
2864 
2865 static int mlxsw_sp_adj_index_mass_update_vr(struct mlxsw_sp *mlxsw_sp,
2866 					     const struct mlxsw_sp_fib *fib,
2867 					     u32 adj_index, u16 ecmp_size,
2868 					     u32 new_adj_index,
2869 					     u16 new_ecmp_size)
2870 {
2871 	char raleu_pl[MLXSW_REG_RALEU_LEN];
2872 
2873 	mlxsw_reg_raleu_pack(raleu_pl,
2874 			     (enum mlxsw_reg_ralxx_protocol) fib->proto,
2875 			     fib->vr->id, adj_index, ecmp_size, new_adj_index,
2876 			     new_ecmp_size);
2877 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(raleu), raleu_pl);
2878 }
2879 
2880 static int mlxsw_sp_adj_index_mass_update(struct mlxsw_sp *mlxsw_sp,
2881 					  struct mlxsw_sp_nexthop_group *nh_grp,
2882 					  u32 old_adj_index, u16 old_ecmp_size)
2883 {
2884 	struct mlxsw_sp_fib_entry *fib_entry;
2885 	struct mlxsw_sp_fib *fib = NULL;
2886 	int err;
2887 
2888 	list_for_each_entry(fib_entry, &nh_grp->fib_list, nexthop_group_node) {
2889 		if (fib == fib_entry->fib_node->fib)
2890 			continue;
2891 		fib = fib_entry->fib_node->fib;
2892 		err = mlxsw_sp_adj_index_mass_update_vr(mlxsw_sp, fib,
2893 							old_adj_index,
2894 							old_ecmp_size,
2895 							nh_grp->adj_index,
2896 							nh_grp->ecmp_size);
2897 		if (err)
2898 			return err;
2899 	}
2900 	return 0;
2901 }
2902 
2903 static int __mlxsw_sp_nexthop_update(struct mlxsw_sp *mlxsw_sp, u32 adj_index,
2904 				     struct mlxsw_sp_nexthop *nh)
2905 {
2906 	struct mlxsw_sp_neigh_entry *neigh_entry = nh->neigh_entry;
2907 	char ratr_pl[MLXSW_REG_RATR_LEN];
2908 
2909 	mlxsw_reg_ratr_pack(ratr_pl, MLXSW_REG_RATR_OP_WRITE_WRITE_ENTRY,
2910 			    true, MLXSW_REG_RATR_TYPE_ETHERNET,
2911 			    adj_index, neigh_entry->rif);
2912 	mlxsw_reg_ratr_eth_entry_pack(ratr_pl, neigh_entry->ha);
2913 	if (nh->counter_valid)
2914 		mlxsw_reg_ratr_counter_pack(ratr_pl, nh->counter_index, true);
2915 	else
2916 		mlxsw_reg_ratr_counter_pack(ratr_pl, 0, false);
2917 
2918 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ratr), ratr_pl);
2919 }
2920 
2921 int mlxsw_sp_nexthop_update(struct mlxsw_sp *mlxsw_sp, u32 adj_index,
2922 			    struct mlxsw_sp_nexthop *nh)
2923 {
2924 	int i;
2925 
2926 	for (i = 0; i < nh->num_adj_entries; i++) {
2927 		int err;
2928 
2929 		err = __mlxsw_sp_nexthop_update(mlxsw_sp, adj_index + i, nh);
2930 		if (err)
2931 			return err;
2932 	}
2933 
2934 	return 0;
2935 }
2936 
2937 static int __mlxsw_sp_nexthop_ipip_update(struct mlxsw_sp *mlxsw_sp,
2938 					  u32 adj_index,
2939 					  struct mlxsw_sp_nexthop *nh)
2940 {
2941 	const struct mlxsw_sp_ipip_ops *ipip_ops;
2942 
2943 	ipip_ops = mlxsw_sp->router->ipip_ops_arr[nh->ipip_entry->ipipt];
2944 	return ipip_ops->nexthop_update(mlxsw_sp, adj_index, nh->ipip_entry);
2945 }
2946 
2947 static int mlxsw_sp_nexthop_ipip_update(struct mlxsw_sp *mlxsw_sp,
2948 					u32 adj_index,
2949 					struct mlxsw_sp_nexthop *nh)
2950 {
2951 	int i;
2952 
2953 	for (i = 0; i < nh->num_adj_entries; i++) {
2954 		int err;
2955 
2956 		err = __mlxsw_sp_nexthop_ipip_update(mlxsw_sp, adj_index + i,
2957 						     nh);
2958 		if (err)
2959 			return err;
2960 	}
2961 
2962 	return 0;
2963 }
2964 
2965 static int
2966 mlxsw_sp_nexthop_group_update(struct mlxsw_sp *mlxsw_sp,
2967 			      struct mlxsw_sp_nexthop_group *nh_grp,
2968 			      bool reallocate)
2969 {
2970 	u32 adj_index = nh_grp->adj_index; /* base */
2971 	struct mlxsw_sp_nexthop *nh;
2972 	int i;
2973 	int err;
2974 
2975 	for (i = 0; i < nh_grp->count; i++) {
2976 		nh = &nh_grp->nexthops[i];
2977 
2978 		if (!nh->should_offload) {
2979 			nh->offloaded = 0;
2980 			continue;
2981 		}
2982 
2983 		if (nh->update || reallocate) {
2984 			switch (nh->type) {
2985 			case MLXSW_SP_NEXTHOP_TYPE_ETH:
2986 				err = mlxsw_sp_nexthop_update
2987 					    (mlxsw_sp, adj_index, nh);
2988 				break;
2989 			case MLXSW_SP_NEXTHOP_TYPE_IPIP:
2990 				err = mlxsw_sp_nexthop_ipip_update
2991 					    (mlxsw_sp, adj_index, nh);
2992 				break;
2993 			}
2994 			if (err)
2995 				return err;
2996 			nh->update = 0;
2997 			nh->offloaded = 1;
2998 		}
2999 		adj_index += nh->num_adj_entries;
3000 	}
3001 	return 0;
3002 }
3003 
3004 static bool
3005 mlxsw_sp_fib_node_entry_is_first(const struct mlxsw_sp_fib_node *fib_node,
3006 				 const struct mlxsw_sp_fib_entry *fib_entry);
3007 
3008 static int
3009 mlxsw_sp_nexthop_fib_entries_update(struct mlxsw_sp *mlxsw_sp,
3010 				    struct mlxsw_sp_nexthop_group *nh_grp)
3011 {
3012 	struct mlxsw_sp_fib_entry *fib_entry;
3013 	int err;
3014 
3015 	list_for_each_entry(fib_entry, &nh_grp->fib_list, nexthop_group_node) {
3016 		if (!mlxsw_sp_fib_node_entry_is_first(fib_entry->fib_node,
3017 						      fib_entry))
3018 			continue;
3019 		err = mlxsw_sp_fib_entry_update(mlxsw_sp, fib_entry);
3020 		if (err)
3021 			return err;
3022 	}
3023 	return 0;
3024 }
3025 
3026 static void
3027 mlxsw_sp_fib_entry_offload_refresh(struct mlxsw_sp_fib_entry *fib_entry,
3028 				   enum mlxsw_reg_ralue_op op, int err);
3029 
3030 static void
3031 mlxsw_sp_nexthop_fib_entries_refresh(struct mlxsw_sp_nexthop_group *nh_grp)
3032 {
3033 	enum mlxsw_reg_ralue_op op = MLXSW_REG_RALUE_OP_WRITE_WRITE;
3034 	struct mlxsw_sp_fib_entry *fib_entry;
3035 
3036 	list_for_each_entry(fib_entry, &nh_grp->fib_list, nexthop_group_node) {
3037 		if (!mlxsw_sp_fib_node_entry_is_first(fib_entry->fib_node,
3038 						      fib_entry))
3039 			continue;
3040 		mlxsw_sp_fib_entry_offload_refresh(fib_entry, op, 0);
3041 	}
3042 }
3043 
3044 static void mlxsw_sp_adj_grp_size_round_up(u16 *p_adj_grp_size)
3045 {
3046 	/* Valid sizes for an adjacency group are:
3047 	 * 1-64, 512, 1024, 2048 and 4096.
3048 	 */
3049 	if (*p_adj_grp_size <= 64)
3050 		return;
3051 	else if (*p_adj_grp_size <= 512)
3052 		*p_adj_grp_size = 512;
3053 	else if (*p_adj_grp_size <= 1024)
3054 		*p_adj_grp_size = 1024;
3055 	else if (*p_adj_grp_size <= 2048)
3056 		*p_adj_grp_size = 2048;
3057 	else
3058 		*p_adj_grp_size = 4096;
3059 }
3060 
3061 static void mlxsw_sp_adj_grp_size_round_down(u16 *p_adj_grp_size,
3062 					     unsigned int alloc_size)
3063 {
3064 	if (alloc_size >= 4096)
3065 		*p_adj_grp_size = 4096;
3066 	else if (alloc_size >= 2048)
3067 		*p_adj_grp_size = 2048;
3068 	else if (alloc_size >= 1024)
3069 		*p_adj_grp_size = 1024;
3070 	else if (alloc_size >= 512)
3071 		*p_adj_grp_size = 512;
3072 }
3073 
3074 static int mlxsw_sp_fix_adj_grp_size(struct mlxsw_sp *mlxsw_sp,
3075 				     u16 *p_adj_grp_size)
3076 {
3077 	unsigned int alloc_size;
3078 	int err;
3079 
3080 	/* Round up the requested group size to the next size supported
3081 	 * by the device and make sure the request can be satisfied.
3082 	 */
3083 	mlxsw_sp_adj_grp_size_round_up(p_adj_grp_size);
3084 	err = mlxsw_sp_kvdl_alloc_size_query(mlxsw_sp, *p_adj_grp_size,
3085 					     &alloc_size);
3086 	if (err)
3087 		return err;
3088 	/* It is possible the allocation results in more allocated
3089 	 * entries than requested. Try to use as much of them as
3090 	 * possible.
3091 	 */
3092 	mlxsw_sp_adj_grp_size_round_down(p_adj_grp_size, alloc_size);
3093 
3094 	return 0;
3095 }
3096 
3097 static void
3098 mlxsw_sp_nexthop_group_normalize(struct mlxsw_sp_nexthop_group *nh_grp)
3099 {
3100 	int i, g = 0, sum_norm_weight = 0;
3101 	struct mlxsw_sp_nexthop *nh;
3102 
3103 	for (i = 0; i < nh_grp->count; i++) {
3104 		nh = &nh_grp->nexthops[i];
3105 
3106 		if (!nh->should_offload)
3107 			continue;
3108 		if (g > 0)
3109 			g = gcd(nh->nh_weight, g);
3110 		else
3111 			g = nh->nh_weight;
3112 	}
3113 
3114 	for (i = 0; i < nh_grp->count; i++) {
3115 		nh = &nh_grp->nexthops[i];
3116 
3117 		if (!nh->should_offload)
3118 			continue;
3119 		nh->norm_nh_weight = nh->nh_weight / g;
3120 		sum_norm_weight += nh->norm_nh_weight;
3121 	}
3122 
3123 	nh_grp->sum_norm_weight = sum_norm_weight;
3124 }
3125 
3126 static void
3127 mlxsw_sp_nexthop_group_rebalance(struct mlxsw_sp_nexthop_group *nh_grp)
3128 {
3129 	int total = nh_grp->sum_norm_weight;
3130 	u16 ecmp_size = nh_grp->ecmp_size;
3131 	int i, weight = 0, lower_bound = 0;
3132 
3133 	for (i = 0; i < nh_grp->count; i++) {
3134 		struct mlxsw_sp_nexthop *nh = &nh_grp->nexthops[i];
3135 		int upper_bound;
3136 
3137 		if (!nh->should_offload)
3138 			continue;
3139 		weight += nh->norm_nh_weight;
3140 		upper_bound = DIV_ROUND_CLOSEST(ecmp_size * weight, total);
3141 		nh->num_adj_entries = upper_bound - lower_bound;
3142 		lower_bound = upper_bound;
3143 	}
3144 }
3145 
3146 static void
3147 mlxsw_sp_nexthop_group_refresh(struct mlxsw_sp *mlxsw_sp,
3148 			       struct mlxsw_sp_nexthop_group *nh_grp)
3149 {
3150 	u16 ecmp_size, old_ecmp_size;
3151 	struct mlxsw_sp_nexthop *nh;
3152 	bool offload_change = false;
3153 	u32 adj_index;
3154 	bool old_adj_index_valid;
3155 	u32 old_adj_index;
3156 	int i;
3157 	int err;
3158 
3159 	if (!nh_grp->gateway) {
3160 		mlxsw_sp_nexthop_fib_entries_update(mlxsw_sp, nh_grp);
3161 		return;
3162 	}
3163 
3164 	for (i = 0; i < nh_grp->count; i++) {
3165 		nh = &nh_grp->nexthops[i];
3166 
3167 		if (nh->should_offload != nh->offloaded) {
3168 			offload_change = true;
3169 			if (nh->should_offload)
3170 				nh->update = 1;
3171 		}
3172 	}
3173 	if (!offload_change) {
3174 		/* Nothing was added or removed, so no need to reallocate. Just
3175 		 * update MAC on existing adjacency indexes.
3176 		 */
3177 		err = mlxsw_sp_nexthop_group_update(mlxsw_sp, nh_grp, false);
3178 		if (err) {
3179 			dev_warn(mlxsw_sp->bus_info->dev, "Failed to update neigh MAC in adjacency table.\n");
3180 			goto set_trap;
3181 		}
3182 		return;
3183 	}
3184 	mlxsw_sp_nexthop_group_normalize(nh_grp);
3185 	if (!nh_grp->sum_norm_weight)
3186 		/* No neigh of this group is connected so we just set
3187 		 * the trap and let everthing flow through kernel.
3188 		 */
3189 		goto set_trap;
3190 
3191 	ecmp_size = nh_grp->sum_norm_weight;
3192 	err = mlxsw_sp_fix_adj_grp_size(mlxsw_sp, &ecmp_size);
3193 	if (err)
3194 		/* No valid allocation size available. */
3195 		goto set_trap;
3196 
3197 	err = mlxsw_sp_kvdl_alloc(mlxsw_sp, ecmp_size, &adj_index);
3198 	if (err) {
3199 		/* We ran out of KVD linear space, just set the
3200 		 * trap and let everything flow through kernel.
3201 		 */
3202 		dev_warn(mlxsw_sp->bus_info->dev, "Failed to allocate KVD linear area for nexthop group.\n");
3203 		goto set_trap;
3204 	}
3205 	old_adj_index_valid = nh_grp->adj_index_valid;
3206 	old_adj_index = nh_grp->adj_index;
3207 	old_ecmp_size = nh_grp->ecmp_size;
3208 	nh_grp->adj_index_valid = 1;
3209 	nh_grp->adj_index = adj_index;
3210 	nh_grp->ecmp_size = ecmp_size;
3211 	mlxsw_sp_nexthop_group_rebalance(nh_grp);
3212 	err = mlxsw_sp_nexthop_group_update(mlxsw_sp, nh_grp, true);
3213 	if (err) {
3214 		dev_warn(mlxsw_sp->bus_info->dev, "Failed to update neigh MAC in adjacency table.\n");
3215 		goto set_trap;
3216 	}
3217 
3218 	if (!old_adj_index_valid) {
3219 		/* The trap was set for fib entries, so we have to call
3220 		 * fib entry update to unset it and use adjacency index.
3221 		 */
3222 		err = mlxsw_sp_nexthop_fib_entries_update(mlxsw_sp, nh_grp);
3223 		if (err) {
3224 			dev_warn(mlxsw_sp->bus_info->dev, "Failed to add adjacency index to fib entries.\n");
3225 			goto set_trap;
3226 		}
3227 		return;
3228 	}
3229 
3230 	err = mlxsw_sp_adj_index_mass_update(mlxsw_sp, nh_grp,
3231 					     old_adj_index, old_ecmp_size);
3232 	mlxsw_sp_kvdl_free(mlxsw_sp, old_adj_index);
3233 	if (err) {
3234 		dev_warn(mlxsw_sp->bus_info->dev, "Failed to mass-update adjacency index for nexthop group.\n");
3235 		goto set_trap;
3236 	}
3237 
3238 	/* Offload state within the group changed, so update the flags. */
3239 	mlxsw_sp_nexthop_fib_entries_refresh(nh_grp);
3240 
3241 	return;
3242 
3243 set_trap:
3244 	old_adj_index_valid = nh_grp->adj_index_valid;
3245 	nh_grp->adj_index_valid = 0;
3246 	for (i = 0; i < nh_grp->count; i++) {
3247 		nh = &nh_grp->nexthops[i];
3248 		nh->offloaded = 0;
3249 	}
3250 	err = mlxsw_sp_nexthop_fib_entries_update(mlxsw_sp, nh_grp);
3251 	if (err)
3252 		dev_warn(mlxsw_sp->bus_info->dev, "Failed to set traps for fib entries.\n");
3253 	if (old_adj_index_valid)
3254 		mlxsw_sp_kvdl_free(mlxsw_sp, nh_grp->adj_index);
3255 }
3256 
3257 static void __mlxsw_sp_nexthop_neigh_update(struct mlxsw_sp_nexthop *nh,
3258 					    bool removing)
3259 {
3260 	if (!removing)
3261 		nh->should_offload = 1;
3262 	else
3263 		nh->should_offload = 0;
3264 	nh->update = 1;
3265 }
3266 
3267 static void
3268 mlxsw_sp_nexthop_neigh_update(struct mlxsw_sp *mlxsw_sp,
3269 			      struct mlxsw_sp_neigh_entry *neigh_entry,
3270 			      bool removing)
3271 {
3272 	struct mlxsw_sp_nexthop *nh;
3273 
3274 	list_for_each_entry(nh, &neigh_entry->nexthop_list,
3275 			    neigh_list_node) {
3276 		__mlxsw_sp_nexthop_neigh_update(nh, removing);
3277 		mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh->nh_grp);
3278 	}
3279 }
3280 
3281 static void mlxsw_sp_nexthop_rif_init(struct mlxsw_sp_nexthop *nh,
3282 				      struct mlxsw_sp_rif *rif)
3283 {
3284 	if (nh->rif)
3285 		return;
3286 
3287 	nh->rif = rif;
3288 	list_add(&nh->rif_list_node, &rif->nexthop_list);
3289 }
3290 
3291 static void mlxsw_sp_nexthop_rif_fini(struct mlxsw_sp_nexthop *nh)
3292 {
3293 	if (!nh->rif)
3294 		return;
3295 
3296 	list_del(&nh->rif_list_node);
3297 	nh->rif = NULL;
3298 }
3299 
3300 static int mlxsw_sp_nexthop_neigh_init(struct mlxsw_sp *mlxsw_sp,
3301 				       struct mlxsw_sp_nexthop *nh)
3302 {
3303 	struct mlxsw_sp_neigh_entry *neigh_entry;
3304 	struct neighbour *n;
3305 	u8 nud_state, dead;
3306 	int err;
3307 
3308 	if (!nh->nh_grp->gateway || nh->neigh_entry)
3309 		return 0;
3310 
3311 	/* Take a reference of neigh here ensuring that neigh would
3312 	 * not be destructed before the nexthop entry is finished.
3313 	 * The reference is taken either in neigh_lookup() or
3314 	 * in neigh_create() in case n is not found.
3315 	 */
3316 	n = neigh_lookup(nh->nh_grp->neigh_tbl, &nh->gw_addr, nh->rif->dev);
3317 	if (!n) {
3318 		n = neigh_create(nh->nh_grp->neigh_tbl, &nh->gw_addr,
3319 				 nh->rif->dev);
3320 		if (IS_ERR(n))
3321 			return PTR_ERR(n);
3322 		neigh_event_send(n, NULL);
3323 	}
3324 	neigh_entry = mlxsw_sp_neigh_entry_lookup(mlxsw_sp, n);
3325 	if (!neigh_entry) {
3326 		neigh_entry = mlxsw_sp_neigh_entry_create(mlxsw_sp, n);
3327 		if (IS_ERR(neigh_entry)) {
3328 			err = -EINVAL;
3329 			goto err_neigh_entry_create;
3330 		}
3331 	}
3332 
3333 	/* If that is the first nexthop connected to that neigh, add to
3334 	 * nexthop_neighs_list
3335 	 */
3336 	if (list_empty(&neigh_entry->nexthop_list))
3337 		list_add_tail(&neigh_entry->nexthop_neighs_list_node,
3338 			      &mlxsw_sp->router->nexthop_neighs_list);
3339 
3340 	nh->neigh_entry = neigh_entry;
3341 	list_add_tail(&nh->neigh_list_node, &neigh_entry->nexthop_list);
3342 	read_lock_bh(&n->lock);
3343 	nud_state = n->nud_state;
3344 	dead = n->dead;
3345 	read_unlock_bh(&n->lock);
3346 	__mlxsw_sp_nexthop_neigh_update(nh, !(nud_state & NUD_VALID && !dead));
3347 
3348 	return 0;
3349 
3350 err_neigh_entry_create:
3351 	neigh_release(n);
3352 	return err;
3353 }
3354 
3355 static void mlxsw_sp_nexthop_neigh_fini(struct mlxsw_sp *mlxsw_sp,
3356 					struct mlxsw_sp_nexthop *nh)
3357 {
3358 	struct mlxsw_sp_neigh_entry *neigh_entry = nh->neigh_entry;
3359 	struct neighbour *n;
3360 
3361 	if (!neigh_entry)
3362 		return;
3363 	n = neigh_entry->key.n;
3364 
3365 	__mlxsw_sp_nexthop_neigh_update(nh, true);
3366 	list_del(&nh->neigh_list_node);
3367 	nh->neigh_entry = NULL;
3368 
3369 	/* If that is the last nexthop connected to that neigh, remove from
3370 	 * nexthop_neighs_list
3371 	 */
3372 	if (list_empty(&neigh_entry->nexthop_list))
3373 		list_del(&neigh_entry->nexthop_neighs_list_node);
3374 
3375 	if (!neigh_entry->connected && list_empty(&neigh_entry->nexthop_list))
3376 		mlxsw_sp_neigh_entry_destroy(mlxsw_sp, neigh_entry);
3377 
3378 	neigh_release(n);
3379 }
3380 
3381 static bool mlxsw_sp_ipip_netdev_ul_up(struct net_device *ol_dev)
3382 {
3383 	struct net_device *ul_dev = __mlxsw_sp_ipip_netdev_ul_dev_get(ol_dev);
3384 
3385 	return ul_dev ? (ul_dev->flags & IFF_UP) : true;
3386 }
3387 
3388 static void mlxsw_sp_nexthop_ipip_init(struct mlxsw_sp *mlxsw_sp,
3389 				       struct mlxsw_sp_nexthop *nh,
3390 				       struct mlxsw_sp_ipip_entry *ipip_entry)
3391 {
3392 	bool removing;
3393 
3394 	if (!nh->nh_grp->gateway || nh->ipip_entry)
3395 		return;
3396 
3397 	nh->ipip_entry = ipip_entry;
3398 	removing = !mlxsw_sp_ipip_netdev_ul_up(ipip_entry->ol_dev);
3399 	__mlxsw_sp_nexthop_neigh_update(nh, removing);
3400 	mlxsw_sp_nexthop_rif_init(nh, &ipip_entry->ol_lb->common);
3401 }
3402 
3403 static void mlxsw_sp_nexthop_ipip_fini(struct mlxsw_sp *mlxsw_sp,
3404 				       struct mlxsw_sp_nexthop *nh)
3405 {
3406 	struct mlxsw_sp_ipip_entry *ipip_entry = nh->ipip_entry;
3407 
3408 	if (!ipip_entry)
3409 		return;
3410 
3411 	__mlxsw_sp_nexthop_neigh_update(nh, true);
3412 	nh->ipip_entry = NULL;
3413 }
3414 
3415 static bool mlxsw_sp_nexthop4_ipip_type(const struct mlxsw_sp *mlxsw_sp,
3416 					const struct fib_nh *fib_nh,
3417 					enum mlxsw_sp_ipip_type *p_ipipt)
3418 {
3419 	struct net_device *dev = fib_nh->nh_dev;
3420 
3421 	return dev &&
3422 	       fib_nh->nh_parent->fib_type == RTN_UNICAST &&
3423 	       mlxsw_sp_netdev_ipip_type(mlxsw_sp, dev, p_ipipt);
3424 }
3425 
3426 static void mlxsw_sp_nexthop_type_fini(struct mlxsw_sp *mlxsw_sp,
3427 				       struct mlxsw_sp_nexthop *nh)
3428 {
3429 	switch (nh->type) {
3430 	case MLXSW_SP_NEXTHOP_TYPE_ETH:
3431 		mlxsw_sp_nexthop_neigh_fini(mlxsw_sp, nh);
3432 		mlxsw_sp_nexthop_rif_fini(nh);
3433 		break;
3434 	case MLXSW_SP_NEXTHOP_TYPE_IPIP:
3435 		mlxsw_sp_nexthop_rif_fini(nh);
3436 		mlxsw_sp_nexthop_ipip_fini(mlxsw_sp, nh);
3437 		break;
3438 	}
3439 }
3440 
3441 static int mlxsw_sp_nexthop4_type_init(struct mlxsw_sp *mlxsw_sp,
3442 				       struct mlxsw_sp_nexthop *nh,
3443 				       struct fib_nh *fib_nh)
3444 {
3445 	const struct mlxsw_sp_ipip_ops *ipip_ops;
3446 	struct net_device *dev = fib_nh->nh_dev;
3447 	struct mlxsw_sp_ipip_entry *ipip_entry;
3448 	struct mlxsw_sp_rif *rif;
3449 	int err;
3450 
3451 	ipip_entry = mlxsw_sp_ipip_entry_find_by_ol_dev(mlxsw_sp, dev);
3452 	if (ipip_entry) {
3453 		ipip_ops = mlxsw_sp->router->ipip_ops_arr[ipip_entry->ipipt];
3454 		if (ipip_ops->can_offload(mlxsw_sp, dev,
3455 					  MLXSW_SP_L3_PROTO_IPV4)) {
3456 			nh->type = MLXSW_SP_NEXTHOP_TYPE_IPIP;
3457 			mlxsw_sp_nexthop_ipip_init(mlxsw_sp, nh, ipip_entry);
3458 			return 0;
3459 		}
3460 	}
3461 
3462 	nh->type = MLXSW_SP_NEXTHOP_TYPE_ETH;
3463 	rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev);
3464 	if (!rif)
3465 		return 0;
3466 
3467 	mlxsw_sp_nexthop_rif_init(nh, rif);
3468 	err = mlxsw_sp_nexthop_neigh_init(mlxsw_sp, nh);
3469 	if (err)
3470 		goto err_neigh_init;
3471 
3472 	return 0;
3473 
3474 err_neigh_init:
3475 	mlxsw_sp_nexthop_rif_fini(nh);
3476 	return err;
3477 }
3478 
3479 static void mlxsw_sp_nexthop4_type_fini(struct mlxsw_sp *mlxsw_sp,
3480 					struct mlxsw_sp_nexthop *nh)
3481 {
3482 	mlxsw_sp_nexthop_type_fini(mlxsw_sp, nh);
3483 }
3484 
3485 static int mlxsw_sp_nexthop4_init(struct mlxsw_sp *mlxsw_sp,
3486 				  struct mlxsw_sp_nexthop_group *nh_grp,
3487 				  struct mlxsw_sp_nexthop *nh,
3488 				  struct fib_nh *fib_nh)
3489 {
3490 	struct net_device *dev = fib_nh->nh_dev;
3491 	struct in_device *in_dev;
3492 	int err;
3493 
3494 	nh->nh_grp = nh_grp;
3495 	nh->key.fib_nh = fib_nh;
3496 #ifdef CONFIG_IP_ROUTE_MULTIPATH
3497 	nh->nh_weight = fib_nh->nh_weight;
3498 #else
3499 	nh->nh_weight = 1;
3500 #endif
3501 	memcpy(&nh->gw_addr, &fib_nh->nh_gw, sizeof(fib_nh->nh_gw));
3502 	err = mlxsw_sp_nexthop_insert(mlxsw_sp, nh);
3503 	if (err)
3504 		return err;
3505 
3506 	mlxsw_sp_nexthop_counter_alloc(mlxsw_sp, nh);
3507 	list_add_tail(&nh->router_list_node, &mlxsw_sp->router->nexthop_list);
3508 
3509 	if (!dev)
3510 		return 0;
3511 
3512 	in_dev = __in_dev_get_rtnl(dev);
3513 	if (in_dev && IN_DEV_IGNORE_ROUTES_WITH_LINKDOWN(in_dev) &&
3514 	    fib_nh->nh_flags & RTNH_F_LINKDOWN)
3515 		return 0;
3516 
3517 	err = mlxsw_sp_nexthop4_type_init(mlxsw_sp, nh, fib_nh);
3518 	if (err)
3519 		goto err_nexthop_neigh_init;
3520 
3521 	return 0;
3522 
3523 err_nexthop_neigh_init:
3524 	mlxsw_sp_nexthop_remove(mlxsw_sp, nh);
3525 	return err;
3526 }
3527 
3528 static void mlxsw_sp_nexthop4_fini(struct mlxsw_sp *mlxsw_sp,
3529 				   struct mlxsw_sp_nexthop *nh)
3530 {
3531 	mlxsw_sp_nexthop4_type_fini(mlxsw_sp, nh);
3532 	list_del(&nh->router_list_node);
3533 	mlxsw_sp_nexthop_counter_free(mlxsw_sp, nh);
3534 	mlxsw_sp_nexthop_remove(mlxsw_sp, nh);
3535 }
3536 
3537 static void mlxsw_sp_nexthop4_event(struct mlxsw_sp *mlxsw_sp,
3538 				    unsigned long event, struct fib_nh *fib_nh)
3539 {
3540 	struct mlxsw_sp_nexthop_key key;
3541 	struct mlxsw_sp_nexthop *nh;
3542 
3543 	if (mlxsw_sp->router->aborted)
3544 		return;
3545 
3546 	key.fib_nh = fib_nh;
3547 	nh = mlxsw_sp_nexthop_lookup(mlxsw_sp, key);
3548 	if (WARN_ON_ONCE(!nh))
3549 		return;
3550 
3551 	switch (event) {
3552 	case FIB_EVENT_NH_ADD:
3553 		mlxsw_sp_nexthop4_type_init(mlxsw_sp, nh, fib_nh);
3554 		break;
3555 	case FIB_EVENT_NH_DEL:
3556 		mlxsw_sp_nexthop4_type_fini(mlxsw_sp, nh);
3557 		break;
3558 	}
3559 
3560 	mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh->nh_grp);
3561 }
3562 
3563 static void mlxsw_sp_nexthop_rif_update(struct mlxsw_sp *mlxsw_sp,
3564 					struct mlxsw_sp_rif *rif)
3565 {
3566 	struct mlxsw_sp_nexthop *nh;
3567 	bool removing;
3568 
3569 	list_for_each_entry(nh, &rif->nexthop_list, rif_list_node) {
3570 		switch (nh->type) {
3571 		case MLXSW_SP_NEXTHOP_TYPE_ETH:
3572 			removing = false;
3573 			break;
3574 		case MLXSW_SP_NEXTHOP_TYPE_IPIP:
3575 			removing = !mlxsw_sp_ipip_netdev_ul_up(rif->dev);
3576 			break;
3577 		default:
3578 			WARN_ON(1);
3579 			continue;
3580 		}
3581 
3582 		__mlxsw_sp_nexthop_neigh_update(nh, removing);
3583 		mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh->nh_grp);
3584 	}
3585 }
3586 
3587 static void mlxsw_sp_nexthop_rif_migrate(struct mlxsw_sp *mlxsw_sp,
3588 					 struct mlxsw_sp_rif *old_rif,
3589 					 struct mlxsw_sp_rif *new_rif)
3590 {
3591 	struct mlxsw_sp_nexthop *nh;
3592 
3593 	list_splice_init(&old_rif->nexthop_list, &new_rif->nexthop_list);
3594 	list_for_each_entry(nh, &new_rif->nexthop_list, rif_list_node)
3595 		nh->rif = new_rif;
3596 	mlxsw_sp_nexthop_rif_update(mlxsw_sp, new_rif);
3597 }
3598 
3599 static void mlxsw_sp_nexthop_rif_gone_sync(struct mlxsw_sp *mlxsw_sp,
3600 					   struct mlxsw_sp_rif *rif)
3601 {
3602 	struct mlxsw_sp_nexthop *nh, *tmp;
3603 
3604 	list_for_each_entry_safe(nh, tmp, &rif->nexthop_list, rif_list_node) {
3605 		mlxsw_sp_nexthop_type_fini(mlxsw_sp, nh);
3606 		mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh->nh_grp);
3607 	}
3608 }
3609 
3610 static bool mlxsw_sp_fi_is_gateway(const struct mlxsw_sp *mlxsw_sp,
3611 				   const struct fib_info *fi)
3612 {
3613 	return fi->fib_nh->nh_scope == RT_SCOPE_LINK ||
3614 	       mlxsw_sp_nexthop4_ipip_type(mlxsw_sp, fi->fib_nh, NULL);
3615 }
3616 
3617 static struct mlxsw_sp_nexthop_group *
3618 mlxsw_sp_nexthop4_group_create(struct mlxsw_sp *mlxsw_sp, struct fib_info *fi)
3619 {
3620 	struct mlxsw_sp_nexthop_group *nh_grp;
3621 	struct mlxsw_sp_nexthop *nh;
3622 	struct fib_nh *fib_nh;
3623 	size_t alloc_size;
3624 	int i;
3625 	int err;
3626 
3627 	alloc_size = sizeof(*nh_grp) +
3628 		     fi->fib_nhs * sizeof(struct mlxsw_sp_nexthop);
3629 	nh_grp = kzalloc(alloc_size, GFP_KERNEL);
3630 	if (!nh_grp)
3631 		return ERR_PTR(-ENOMEM);
3632 	nh_grp->priv = fi;
3633 	INIT_LIST_HEAD(&nh_grp->fib_list);
3634 	nh_grp->neigh_tbl = &arp_tbl;
3635 
3636 	nh_grp->gateway = mlxsw_sp_fi_is_gateway(mlxsw_sp, fi);
3637 	nh_grp->count = fi->fib_nhs;
3638 	fib_info_hold(fi);
3639 	for (i = 0; i < nh_grp->count; i++) {
3640 		nh = &nh_grp->nexthops[i];
3641 		fib_nh = &fi->fib_nh[i];
3642 		err = mlxsw_sp_nexthop4_init(mlxsw_sp, nh_grp, nh, fib_nh);
3643 		if (err)
3644 			goto err_nexthop4_init;
3645 	}
3646 	err = mlxsw_sp_nexthop_group_insert(mlxsw_sp, nh_grp);
3647 	if (err)
3648 		goto err_nexthop_group_insert;
3649 	mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh_grp);
3650 	return nh_grp;
3651 
3652 err_nexthop_group_insert:
3653 err_nexthop4_init:
3654 	for (i--; i >= 0; i--) {
3655 		nh = &nh_grp->nexthops[i];
3656 		mlxsw_sp_nexthop4_fini(mlxsw_sp, nh);
3657 	}
3658 	fib_info_put(fi);
3659 	kfree(nh_grp);
3660 	return ERR_PTR(err);
3661 }
3662 
3663 static void
3664 mlxsw_sp_nexthop4_group_destroy(struct mlxsw_sp *mlxsw_sp,
3665 				struct mlxsw_sp_nexthop_group *nh_grp)
3666 {
3667 	struct mlxsw_sp_nexthop *nh;
3668 	int i;
3669 
3670 	mlxsw_sp_nexthop_group_remove(mlxsw_sp, nh_grp);
3671 	for (i = 0; i < nh_grp->count; i++) {
3672 		nh = &nh_grp->nexthops[i];
3673 		mlxsw_sp_nexthop4_fini(mlxsw_sp, nh);
3674 	}
3675 	mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh_grp);
3676 	WARN_ON_ONCE(nh_grp->adj_index_valid);
3677 	fib_info_put(mlxsw_sp_nexthop4_group_fi(nh_grp));
3678 	kfree(nh_grp);
3679 }
3680 
3681 static int mlxsw_sp_nexthop4_group_get(struct mlxsw_sp *mlxsw_sp,
3682 				       struct mlxsw_sp_fib_entry *fib_entry,
3683 				       struct fib_info *fi)
3684 {
3685 	struct mlxsw_sp_nexthop_group *nh_grp;
3686 
3687 	nh_grp = mlxsw_sp_nexthop4_group_lookup(mlxsw_sp, fi);
3688 	if (!nh_grp) {
3689 		nh_grp = mlxsw_sp_nexthop4_group_create(mlxsw_sp, fi);
3690 		if (IS_ERR(nh_grp))
3691 			return PTR_ERR(nh_grp);
3692 	}
3693 	list_add_tail(&fib_entry->nexthop_group_node, &nh_grp->fib_list);
3694 	fib_entry->nh_group = nh_grp;
3695 	return 0;
3696 }
3697 
3698 static void mlxsw_sp_nexthop4_group_put(struct mlxsw_sp *mlxsw_sp,
3699 					struct mlxsw_sp_fib_entry *fib_entry)
3700 {
3701 	struct mlxsw_sp_nexthop_group *nh_grp = fib_entry->nh_group;
3702 
3703 	list_del(&fib_entry->nexthop_group_node);
3704 	if (!list_empty(&nh_grp->fib_list))
3705 		return;
3706 	mlxsw_sp_nexthop4_group_destroy(mlxsw_sp, nh_grp);
3707 }
3708 
3709 static bool
3710 mlxsw_sp_fib4_entry_should_offload(const struct mlxsw_sp_fib_entry *fib_entry)
3711 {
3712 	struct mlxsw_sp_fib4_entry *fib4_entry;
3713 
3714 	fib4_entry = container_of(fib_entry, struct mlxsw_sp_fib4_entry,
3715 				  common);
3716 	return !fib4_entry->tos;
3717 }
3718 
3719 static bool
3720 mlxsw_sp_fib_entry_should_offload(const struct mlxsw_sp_fib_entry *fib_entry)
3721 {
3722 	struct mlxsw_sp_nexthop_group *nh_group = fib_entry->nh_group;
3723 
3724 	switch (fib_entry->fib_node->fib->proto) {
3725 	case MLXSW_SP_L3_PROTO_IPV4:
3726 		if (!mlxsw_sp_fib4_entry_should_offload(fib_entry))
3727 			return false;
3728 		break;
3729 	case MLXSW_SP_L3_PROTO_IPV6:
3730 		break;
3731 	}
3732 
3733 	switch (fib_entry->type) {
3734 	case MLXSW_SP_FIB_ENTRY_TYPE_REMOTE:
3735 		return !!nh_group->adj_index_valid;
3736 	case MLXSW_SP_FIB_ENTRY_TYPE_LOCAL:
3737 		return !!nh_group->nh_rif;
3738 	case MLXSW_SP_FIB_ENTRY_TYPE_IPIP_DECAP:
3739 		return true;
3740 	default:
3741 		return false;
3742 	}
3743 }
3744 
3745 static struct mlxsw_sp_nexthop *
3746 mlxsw_sp_rt6_nexthop(struct mlxsw_sp_nexthop_group *nh_grp,
3747 		     const struct mlxsw_sp_rt6 *mlxsw_sp_rt6)
3748 {
3749 	int i;
3750 
3751 	for (i = 0; i < nh_grp->count; i++) {
3752 		struct mlxsw_sp_nexthop *nh = &nh_grp->nexthops[i];
3753 		struct rt6_info *rt = mlxsw_sp_rt6->rt;
3754 
3755 		if (nh->rif && nh->rif->dev == rt->dst.dev &&
3756 		    ipv6_addr_equal((const struct in6_addr *) &nh->gw_addr,
3757 				    &rt->rt6i_gateway))
3758 			return nh;
3759 		continue;
3760 	}
3761 
3762 	return NULL;
3763 }
3764 
3765 static void
3766 mlxsw_sp_fib4_entry_offload_set(struct mlxsw_sp_fib_entry *fib_entry)
3767 {
3768 	struct mlxsw_sp_nexthop_group *nh_grp = fib_entry->nh_group;
3769 	int i;
3770 
3771 	if (fib_entry->type == MLXSW_SP_FIB_ENTRY_TYPE_LOCAL ||
3772 	    fib_entry->type == MLXSW_SP_FIB_ENTRY_TYPE_IPIP_DECAP) {
3773 		nh_grp->nexthops->key.fib_nh->nh_flags |= RTNH_F_OFFLOAD;
3774 		return;
3775 	}
3776 
3777 	for (i = 0; i < nh_grp->count; i++) {
3778 		struct mlxsw_sp_nexthop *nh = &nh_grp->nexthops[i];
3779 
3780 		if (nh->offloaded)
3781 			nh->key.fib_nh->nh_flags |= RTNH_F_OFFLOAD;
3782 		else
3783 			nh->key.fib_nh->nh_flags &= ~RTNH_F_OFFLOAD;
3784 	}
3785 }
3786 
3787 static void
3788 mlxsw_sp_fib4_entry_offload_unset(struct mlxsw_sp_fib_entry *fib_entry)
3789 {
3790 	struct mlxsw_sp_nexthop_group *nh_grp = fib_entry->nh_group;
3791 	int i;
3792 
3793 	for (i = 0; i < nh_grp->count; i++) {
3794 		struct mlxsw_sp_nexthop *nh = &nh_grp->nexthops[i];
3795 
3796 		nh->key.fib_nh->nh_flags &= ~RTNH_F_OFFLOAD;
3797 	}
3798 }
3799 
3800 static void
3801 mlxsw_sp_fib6_entry_offload_set(struct mlxsw_sp_fib_entry *fib_entry)
3802 {
3803 	struct mlxsw_sp_fib6_entry *fib6_entry;
3804 	struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
3805 
3806 	fib6_entry = container_of(fib_entry, struct mlxsw_sp_fib6_entry,
3807 				  common);
3808 
3809 	if (fib_entry->type == MLXSW_SP_FIB_ENTRY_TYPE_LOCAL) {
3810 		list_first_entry(&fib6_entry->rt6_list, struct mlxsw_sp_rt6,
3811 				 list)->rt->rt6i_nh_flags |= RTNH_F_OFFLOAD;
3812 		return;
3813 	}
3814 
3815 	list_for_each_entry(mlxsw_sp_rt6, &fib6_entry->rt6_list, list) {
3816 		struct mlxsw_sp_nexthop_group *nh_grp = fib_entry->nh_group;
3817 		struct mlxsw_sp_nexthop *nh;
3818 
3819 		nh = mlxsw_sp_rt6_nexthop(nh_grp, mlxsw_sp_rt6);
3820 		if (nh && nh->offloaded)
3821 			mlxsw_sp_rt6->rt->rt6i_nh_flags |= RTNH_F_OFFLOAD;
3822 		else
3823 			mlxsw_sp_rt6->rt->rt6i_nh_flags &= ~RTNH_F_OFFLOAD;
3824 	}
3825 }
3826 
3827 static void
3828 mlxsw_sp_fib6_entry_offload_unset(struct mlxsw_sp_fib_entry *fib_entry)
3829 {
3830 	struct mlxsw_sp_fib6_entry *fib6_entry;
3831 	struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
3832 
3833 	fib6_entry = container_of(fib_entry, struct mlxsw_sp_fib6_entry,
3834 				  common);
3835 	list_for_each_entry(mlxsw_sp_rt6, &fib6_entry->rt6_list, list) {
3836 		struct rt6_info *rt = mlxsw_sp_rt6->rt;
3837 
3838 		rt->rt6i_nh_flags &= ~RTNH_F_OFFLOAD;
3839 	}
3840 }
3841 
3842 static void mlxsw_sp_fib_entry_offload_set(struct mlxsw_sp_fib_entry *fib_entry)
3843 {
3844 	switch (fib_entry->fib_node->fib->proto) {
3845 	case MLXSW_SP_L3_PROTO_IPV4:
3846 		mlxsw_sp_fib4_entry_offload_set(fib_entry);
3847 		break;
3848 	case MLXSW_SP_L3_PROTO_IPV6:
3849 		mlxsw_sp_fib6_entry_offload_set(fib_entry);
3850 		break;
3851 	}
3852 }
3853 
3854 static void
3855 mlxsw_sp_fib_entry_offload_unset(struct mlxsw_sp_fib_entry *fib_entry)
3856 {
3857 	switch (fib_entry->fib_node->fib->proto) {
3858 	case MLXSW_SP_L3_PROTO_IPV4:
3859 		mlxsw_sp_fib4_entry_offload_unset(fib_entry);
3860 		break;
3861 	case MLXSW_SP_L3_PROTO_IPV6:
3862 		mlxsw_sp_fib6_entry_offload_unset(fib_entry);
3863 		break;
3864 	}
3865 }
3866 
3867 static void
3868 mlxsw_sp_fib_entry_offload_refresh(struct mlxsw_sp_fib_entry *fib_entry,
3869 				   enum mlxsw_reg_ralue_op op, int err)
3870 {
3871 	switch (op) {
3872 	case MLXSW_REG_RALUE_OP_WRITE_DELETE:
3873 		return mlxsw_sp_fib_entry_offload_unset(fib_entry);
3874 	case MLXSW_REG_RALUE_OP_WRITE_WRITE:
3875 		if (err)
3876 			return;
3877 		if (mlxsw_sp_fib_entry_should_offload(fib_entry))
3878 			mlxsw_sp_fib_entry_offload_set(fib_entry);
3879 		else
3880 			mlxsw_sp_fib_entry_offload_unset(fib_entry);
3881 		return;
3882 	default:
3883 		return;
3884 	}
3885 }
3886 
3887 static void
3888 mlxsw_sp_fib_entry_ralue_pack(char *ralue_pl,
3889 			      const struct mlxsw_sp_fib_entry *fib_entry,
3890 			      enum mlxsw_reg_ralue_op op)
3891 {
3892 	struct mlxsw_sp_fib *fib = fib_entry->fib_node->fib;
3893 	enum mlxsw_reg_ralxx_protocol proto;
3894 	u32 *p_dip;
3895 
3896 	proto = (enum mlxsw_reg_ralxx_protocol) fib->proto;
3897 
3898 	switch (fib->proto) {
3899 	case MLXSW_SP_L3_PROTO_IPV4:
3900 		p_dip = (u32 *) fib_entry->fib_node->key.addr;
3901 		mlxsw_reg_ralue_pack4(ralue_pl, proto, op, fib->vr->id,
3902 				      fib_entry->fib_node->key.prefix_len,
3903 				      *p_dip);
3904 		break;
3905 	case MLXSW_SP_L3_PROTO_IPV6:
3906 		mlxsw_reg_ralue_pack6(ralue_pl, proto, op, fib->vr->id,
3907 				      fib_entry->fib_node->key.prefix_len,
3908 				      fib_entry->fib_node->key.addr);
3909 		break;
3910 	}
3911 }
3912 
3913 static int mlxsw_sp_fib_entry_op_remote(struct mlxsw_sp *mlxsw_sp,
3914 					struct mlxsw_sp_fib_entry *fib_entry,
3915 					enum mlxsw_reg_ralue_op op)
3916 {
3917 	char ralue_pl[MLXSW_REG_RALUE_LEN];
3918 	enum mlxsw_reg_ralue_trap_action trap_action;
3919 	u16 trap_id = 0;
3920 	u32 adjacency_index = 0;
3921 	u16 ecmp_size = 0;
3922 
3923 	/* In case the nexthop group adjacency index is valid, use it
3924 	 * with provided ECMP size. Otherwise, setup trap and pass
3925 	 * traffic to kernel.
3926 	 */
3927 	if (mlxsw_sp_fib_entry_should_offload(fib_entry)) {
3928 		trap_action = MLXSW_REG_RALUE_TRAP_ACTION_NOP;
3929 		adjacency_index = fib_entry->nh_group->adj_index;
3930 		ecmp_size = fib_entry->nh_group->ecmp_size;
3931 	} else {
3932 		trap_action = MLXSW_REG_RALUE_TRAP_ACTION_TRAP;
3933 		trap_id = MLXSW_TRAP_ID_RTR_INGRESS0;
3934 	}
3935 
3936 	mlxsw_sp_fib_entry_ralue_pack(ralue_pl, fib_entry, op);
3937 	mlxsw_reg_ralue_act_remote_pack(ralue_pl, trap_action, trap_id,
3938 					adjacency_index, ecmp_size);
3939 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralue), ralue_pl);
3940 }
3941 
3942 static int mlxsw_sp_fib_entry_op_local(struct mlxsw_sp *mlxsw_sp,
3943 				       struct mlxsw_sp_fib_entry *fib_entry,
3944 				       enum mlxsw_reg_ralue_op op)
3945 {
3946 	struct mlxsw_sp_rif *rif = fib_entry->nh_group->nh_rif;
3947 	enum mlxsw_reg_ralue_trap_action trap_action;
3948 	char ralue_pl[MLXSW_REG_RALUE_LEN];
3949 	u16 trap_id = 0;
3950 	u16 rif_index = 0;
3951 
3952 	if (mlxsw_sp_fib_entry_should_offload(fib_entry)) {
3953 		trap_action = MLXSW_REG_RALUE_TRAP_ACTION_NOP;
3954 		rif_index = rif->rif_index;
3955 	} else {
3956 		trap_action = MLXSW_REG_RALUE_TRAP_ACTION_TRAP;
3957 		trap_id = MLXSW_TRAP_ID_RTR_INGRESS0;
3958 	}
3959 
3960 	mlxsw_sp_fib_entry_ralue_pack(ralue_pl, fib_entry, op);
3961 	mlxsw_reg_ralue_act_local_pack(ralue_pl, trap_action, trap_id,
3962 				       rif_index);
3963 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralue), ralue_pl);
3964 }
3965 
3966 static int mlxsw_sp_fib_entry_op_trap(struct mlxsw_sp *mlxsw_sp,
3967 				      struct mlxsw_sp_fib_entry *fib_entry,
3968 				      enum mlxsw_reg_ralue_op op)
3969 {
3970 	char ralue_pl[MLXSW_REG_RALUE_LEN];
3971 
3972 	mlxsw_sp_fib_entry_ralue_pack(ralue_pl, fib_entry, op);
3973 	mlxsw_reg_ralue_act_ip2me_pack(ralue_pl);
3974 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralue), ralue_pl);
3975 }
3976 
3977 static int
3978 mlxsw_sp_fib_entry_op_ipip_decap(struct mlxsw_sp *mlxsw_sp,
3979 				 struct mlxsw_sp_fib_entry *fib_entry,
3980 				 enum mlxsw_reg_ralue_op op)
3981 {
3982 	struct mlxsw_sp_ipip_entry *ipip_entry = fib_entry->decap.ipip_entry;
3983 	const struct mlxsw_sp_ipip_ops *ipip_ops;
3984 
3985 	if (WARN_ON(!ipip_entry))
3986 		return -EINVAL;
3987 
3988 	ipip_ops = mlxsw_sp->router->ipip_ops_arr[ipip_entry->ipipt];
3989 	return ipip_ops->fib_entry_op(mlxsw_sp, ipip_entry, op,
3990 				      fib_entry->decap.tunnel_index);
3991 }
3992 
3993 static int __mlxsw_sp_fib_entry_op(struct mlxsw_sp *mlxsw_sp,
3994 				   struct mlxsw_sp_fib_entry *fib_entry,
3995 				   enum mlxsw_reg_ralue_op op)
3996 {
3997 	switch (fib_entry->type) {
3998 	case MLXSW_SP_FIB_ENTRY_TYPE_REMOTE:
3999 		return mlxsw_sp_fib_entry_op_remote(mlxsw_sp, fib_entry, op);
4000 	case MLXSW_SP_FIB_ENTRY_TYPE_LOCAL:
4001 		return mlxsw_sp_fib_entry_op_local(mlxsw_sp, fib_entry, op);
4002 	case MLXSW_SP_FIB_ENTRY_TYPE_TRAP:
4003 		return mlxsw_sp_fib_entry_op_trap(mlxsw_sp, fib_entry, op);
4004 	case MLXSW_SP_FIB_ENTRY_TYPE_IPIP_DECAP:
4005 		return mlxsw_sp_fib_entry_op_ipip_decap(mlxsw_sp,
4006 							fib_entry, op);
4007 	}
4008 	return -EINVAL;
4009 }
4010 
4011 static int mlxsw_sp_fib_entry_op(struct mlxsw_sp *mlxsw_sp,
4012 				 struct mlxsw_sp_fib_entry *fib_entry,
4013 				 enum mlxsw_reg_ralue_op op)
4014 {
4015 	int err = __mlxsw_sp_fib_entry_op(mlxsw_sp, fib_entry, op);
4016 
4017 	mlxsw_sp_fib_entry_offload_refresh(fib_entry, op, err);
4018 
4019 	return err;
4020 }
4021 
4022 static int mlxsw_sp_fib_entry_update(struct mlxsw_sp *mlxsw_sp,
4023 				     struct mlxsw_sp_fib_entry *fib_entry)
4024 {
4025 	return mlxsw_sp_fib_entry_op(mlxsw_sp, fib_entry,
4026 				     MLXSW_REG_RALUE_OP_WRITE_WRITE);
4027 }
4028 
4029 static int mlxsw_sp_fib_entry_del(struct mlxsw_sp *mlxsw_sp,
4030 				  struct mlxsw_sp_fib_entry *fib_entry)
4031 {
4032 	return mlxsw_sp_fib_entry_op(mlxsw_sp, fib_entry,
4033 				     MLXSW_REG_RALUE_OP_WRITE_DELETE);
4034 }
4035 
4036 static int
4037 mlxsw_sp_fib4_entry_type_set(struct mlxsw_sp *mlxsw_sp,
4038 			     const struct fib_entry_notifier_info *fen_info,
4039 			     struct mlxsw_sp_fib_entry *fib_entry)
4040 {
4041 	union mlxsw_sp_l3addr dip = { .addr4 = htonl(fen_info->dst) };
4042 	struct net_device *dev = fen_info->fi->fib_dev;
4043 	struct mlxsw_sp_ipip_entry *ipip_entry;
4044 	struct fib_info *fi = fen_info->fi;
4045 
4046 	switch (fen_info->type) {
4047 	case RTN_LOCAL:
4048 		ipip_entry = mlxsw_sp_ipip_entry_find_by_decap(mlxsw_sp, dev,
4049 						 MLXSW_SP_L3_PROTO_IPV4, dip);
4050 		if (ipip_entry && ipip_entry->ol_dev->flags & IFF_UP) {
4051 			fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_IPIP_DECAP;
4052 			return mlxsw_sp_fib_entry_decap_init(mlxsw_sp,
4053 							     fib_entry,
4054 							     ipip_entry);
4055 		}
4056 		/* fall through */
4057 	case RTN_BROADCAST:
4058 		fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_TRAP;
4059 		return 0;
4060 	case RTN_UNREACHABLE: /* fall through */
4061 	case RTN_BLACKHOLE: /* fall through */
4062 	case RTN_PROHIBIT:
4063 		/* Packets hitting these routes need to be trapped, but
4064 		 * can do so with a lower priority than packets directed
4065 		 * at the host, so use action type local instead of trap.
4066 		 */
4067 		fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_LOCAL;
4068 		return 0;
4069 	case RTN_UNICAST:
4070 		if (mlxsw_sp_fi_is_gateway(mlxsw_sp, fi))
4071 			fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_REMOTE;
4072 		else
4073 			fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_LOCAL;
4074 		return 0;
4075 	default:
4076 		return -EINVAL;
4077 	}
4078 }
4079 
4080 static struct mlxsw_sp_fib4_entry *
4081 mlxsw_sp_fib4_entry_create(struct mlxsw_sp *mlxsw_sp,
4082 			   struct mlxsw_sp_fib_node *fib_node,
4083 			   const struct fib_entry_notifier_info *fen_info)
4084 {
4085 	struct mlxsw_sp_fib4_entry *fib4_entry;
4086 	struct mlxsw_sp_fib_entry *fib_entry;
4087 	int err;
4088 
4089 	fib4_entry = kzalloc(sizeof(*fib4_entry), GFP_KERNEL);
4090 	if (!fib4_entry)
4091 		return ERR_PTR(-ENOMEM);
4092 	fib_entry = &fib4_entry->common;
4093 
4094 	err = mlxsw_sp_fib4_entry_type_set(mlxsw_sp, fen_info, fib_entry);
4095 	if (err)
4096 		goto err_fib4_entry_type_set;
4097 
4098 	err = mlxsw_sp_nexthop4_group_get(mlxsw_sp, fib_entry, fen_info->fi);
4099 	if (err)
4100 		goto err_nexthop4_group_get;
4101 
4102 	fib4_entry->prio = fen_info->fi->fib_priority;
4103 	fib4_entry->tb_id = fen_info->tb_id;
4104 	fib4_entry->type = fen_info->type;
4105 	fib4_entry->tos = fen_info->tos;
4106 
4107 	fib_entry->fib_node = fib_node;
4108 
4109 	return fib4_entry;
4110 
4111 err_nexthop4_group_get:
4112 err_fib4_entry_type_set:
4113 	kfree(fib4_entry);
4114 	return ERR_PTR(err);
4115 }
4116 
4117 static void mlxsw_sp_fib4_entry_destroy(struct mlxsw_sp *mlxsw_sp,
4118 					struct mlxsw_sp_fib4_entry *fib4_entry)
4119 {
4120 	mlxsw_sp_nexthop4_group_put(mlxsw_sp, &fib4_entry->common);
4121 	kfree(fib4_entry);
4122 }
4123 
4124 static struct mlxsw_sp_fib4_entry *
4125 mlxsw_sp_fib4_entry_lookup(struct mlxsw_sp *mlxsw_sp,
4126 			   const struct fib_entry_notifier_info *fen_info)
4127 {
4128 	struct mlxsw_sp_fib4_entry *fib4_entry;
4129 	struct mlxsw_sp_fib_node *fib_node;
4130 	struct mlxsw_sp_fib *fib;
4131 	struct mlxsw_sp_vr *vr;
4132 
4133 	vr = mlxsw_sp_vr_find(mlxsw_sp, fen_info->tb_id);
4134 	if (!vr)
4135 		return NULL;
4136 	fib = mlxsw_sp_vr_fib(vr, MLXSW_SP_L3_PROTO_IPV4);
4137 
4138 	fib_node = mlxsw_sp_fib_node_lookup(fib, &fen_info->dst,
4139 					    sizeof(fen_info->dst),
4140 					    fen_info->dst_len);
4141 	if (!fib_node)
4142 		return NULL;
4143 
4144 	list_for_each_entry(fib4_entry, &fib_node->entry_list, common.list) {
4145 		if (fib4_entry->tb_id == fen_info->tb_id &&
4146 		    fib4_entry->tos == fen_info->tos &&
4147 		    fib4_entry->type == fen_info->type &&
4148 		    mlxsw_sp_nexthop4_group_fi(fib4_entry->common.nh_group) ==
4149 		    fen_info->fi) {
4150 			return fib4_entry;
4151 		}
4152 	}
4153 
4154 	return NULL;
4155 }
4156 
4157 static const struct rhashtable_params mlxsw_sp_fib_ht_params = {
4158 	.key_offset = offsetof(struct mlxsw_sp_fib_node, key),
4159 	.head_offset = offsetof(struct mlxsw_sp_fib_node, ht_node),
4160 	.key_len = sizeof(struct mlxsw_sp_fib_key),
4161 	.automatic_shrinking = true,
4162 };
4163 
4164 static int mlxsw_sp_fib_node_insert(struct mlxsw_sp_fib *fib,
4165 				    struct mlxsw_sp_fib_node *fib_node)
4166 {
4167 	return rhashtable_insert_fast(&fib->ht, &fib_node->ht_node,
4168 				      mlxsw_sp_fib_ht_params);
4169 }
4170 
4171 static void mlxsw_sp_fib_node_remove(struct mlxsw_sp_fib *fib,
4172 				     struct mlxsw_sp_fib_node *fib_node)
4173 {
4174 	rhashtable_remove_fast(&fib->ht, &fib_node->ht_node,
4175 			       mlxsw_sp_fib_ht_params);
4176 }
4177 
4178 static struct mlxsw_sp_fib_node *
4179 mlxsw_sp_fib_node_lookup(struct mlxsw_sp_fib *fib, const void *addr,
4180 			 size_t addr_len, unsigned char prefix_len)
4181 {
4182 	struct mlxsw_sp_fib_key key;
4183 
4184 	memset(&key, 0, sizeof(key));
4185 	memcpy(key.addr, addr, addr_len);
4186 	key.prefix_len = prefix_len;
4187 	return rhashtable_lookup_fast(&fib->ht, &key, mlxsw_sp_fib_ht_params);
4188 }
4189 
4190 static struct mlxsw_sp_fib_node *
4191 mlxsw_sp_fib_node_create(struct mlxsw_sp_fib *fib, const void *addr,
4192 			 size_t addr_len, unsigned char prefix_len)
4193 {
4194 	struct mlxsw_sp_fib_node *fib_node;
4195 
4196 	fib_node = kzalloc(sizeof(*fib_node), GFP_KERNEL);
4197 	if (!fib_node)
4198 		return NULL;
4199 
4200 	INIT_LIST_HEAD(&fib_node->entry_list);
4201 	list_add(&fib_node->list, &fib->node_list);
4202 	memcpy(fib_node->key.addr, addr, addr_len);
4203 	fib_node->key.prefix_len = prefix_len;
4204 
4205 	return fib_node;
4206 }
4207 
4208 static void mlxsw_sp_fib_node_destroy(struct mlxsw_sp_fib_node *fib_node)
4209 {
4210 	list_del(&fib_node->list);
4211 	WARN_ON(!list_empty(&fib_node->entry_list));
4212 	kfree(fib_node);
4213 }
4214 
4215 static bool
4216 mlxsw_sp_fib_node_entry_is_first(const struct mlxsw_sp_fib_node *fib_node,
4217 				 const struct mlxsw_sp_fib_entry *fib_entry)
4218 {
4219 	return list_first_entry(&fib_node->entry_list,
4220 				struct mlxsw_sp_fib_entry, list) == fib_entry;
4221 }
4222 
4223 static int mlxsw_sp_fib_lpm_tree_link(struct mlxsw_sp *mlxsw_sp,
4224 				      struct mlxsw_sp_fib_node *fib_node)
4225 {
4226 	struct mlxsw_sp_prefix_usage req_prefix_usage;
4227 	struct mlxsw_sp_fib *fib = fib_node->fib;
4228 	struct mlxsw_sp_lpm_tree *lpm_tree;
4229 	int err;
4230 
4231 	lpm_tree = mlxsw_sp->router->lpm.proto_trees[fib->proto];
4232 	if (lpm_tree->prefix_ref_count[fib_node->key.prefix_len] != 0)
4233 		goto out;
4234 
4235 	mlxsw_sp_prefix_usage_cpy(&req_prefix_usage, &lpm_tree->prefix_usage);
4236 	mlxsw_sp_prefix_usage_set(&req_prefix_usage, fib_node->key.prefix_len);
4237 	lpm_tree = mlxsw_sp_lpm_tree_get(mlxsw_sp, &req_prefix_usage,
4238 					 fib->proto);
4239 	if (IS_ERR(lpm_tree))
4240 		return PTR_ERR(lpm_tree);
4241 
4242 	err = mlxsw_sp_vrs_lpm_tree_replace(mlxsw_sp, fib, lpm_tree);
4243 	if (err)
4244 		goto err_lpm_tree_replace;
4245 
4246 out:
4247 	lpm_tree->prefix_ref_count[fib_node->key.prefix_len]++;
4248 	return 0;
4249 
4250 err_lpm_tree_replace:
4251 	mlxsw_sp_lpm_tree_put(mlxsw_sp, lpm_tree);
4252 	return err;
4253 }
4254 
4255 static void mlxsw_sp_fib_lpm_tree_unlink(struct mlxsw_sp *mlxsw_sp,
4256 					 struct mlxsw_sp_fib_node *fib_node)
4257 {
4258 	struct mlxsw_sp_lpm_tree *lpm_tree = fib_node->fib->lpm_tree;
4259 	struct mlxsw_sp_prefix_usage req_prefix_usage;
4260 	struct mlxsw_sp_fib *fib = fib_node->fib;
4261 	int err;
4262 
4263 	if (--lpm_tree->prefix_ref_count[fib_node->key.prefix_len] != 0)
4264 		return;
4265 	/* Try to construct a new LPM tree from the current prefix usage
4266 	 * minus the unused one. If we fail, continue using the old one.
4267 	 */
4268 	mlxsw_sp_prefix_usage_cpy(&req_prefix_usage, &lpm_tree->prefix_usage);
4269 	mlxsw_sp_prefix_usage_clear(&req_prefix_usage,
4270 				    fib_node->key.prefix_len);
4271 	lpm_tree = mlxsw_sp_lpm_tree_get(mlxsw_sp, &req_prefix_usage,
4272 					 fib->proto);
4273 	if (IS_ERR(lpm_tree))
4274 		return;
4275 
4276 	err = mlxsw_sp_vrs_lpm_tree_replace(mlxsw_sp, fib, lpm_tree);
4277 	if (err)
4278 		goto err_lpm_tree_replace;
4279 
4280 	return;
4281 
4282 err_lpm_tree_replace:
4283 	mlxsw_sp_lpm_tree_put(mlxsw_sp, lpm_tree);
4284 }
4285 
4286 static int mlxsw_sp_fib_node_init(struct mlxsw_sp *mlxsw_sp,
4287 				  struct mlxsw_sp_fib_node *fib_node,
4288 				  struct mlxsw_sp_fib *fib)
4289 {
4290 	int err;
4291 
4292 	err = mlxsw_sp_fib_node_insert(fib, fib_node);
4293 	if (err)
4294 		return err;
4295 	fib_node->fib = fib;
4296 
4297 	err = mlxsw_sp_fib_lpm_tree_link(mlxsw_sp, fib_node);
4298 	if (err)
4299 		goto err_fib_lpm_tree_link;
4300 
4301 	return 0;
4302 
4303 err_fib_lpm_tree_link:
4304 	fib_node->fib = NULL;
4305 	mlxsw_sp_fib_node_remove(fib, fib_node);
4306 	return err;
4307 }
4308 
4309 static void mlxsw_sp_fib_node_fini(struct mlxsw_sp *mlxsw_sp,
4310 				   struct mlxsw_sp_fib_node *fib_node)
4311 {
4312 	struct mlxsw_sp_fib *fib = fib_node->fib;
4313 
4314 	mlxsw_sp_fib_lpm_tree_unlink(mlxsw_sp, fib_node);
4315 	fib_node->fib = NULL;
4316 	mlxsw_sp_fib_node_remove(fib, fib_node);
4317 }
4318 
4319 static struct mlxsw_sp_fib_node *
4320 mlxsw_sp_fib_node_get(struct mlxsw_sp *mlxsw_sp, u32 tb_id, const void *addr,
4321 		      size_t addr_len, unsigned char prefix_len,
4322 		      enum mlxsw_sp_l3proto proto)
4323 {
4324 	struct mlxsw_sp_fib_node *fib_node;
4325 	struct mlxsw_sp_fib *fib;
4326 	struct mlxsw_sp_vr *vr;
4327 	int err;
4328 
4329 	vr = mlxsw_sp_vr_get(mlxsw_sp, tb_id, NULL);
4330 	if (IS_ERR(vr))
4331 		return ERR_CAST(vr);
4332 	fib = mlxsw_sp_vr_fib(vr, proto);
4333 
4334 	fib_node = mlxsw_sp_fib_node_lookup(fib, addr, addr_len, prefix_len);
4335 	if (fib_node)
4336 		return fib_node;
4337 
4338 	fib_node = mlxsw_sp_fib_node_create(fib, addr, addr_len, prefix_len);
4339 	if (!fib_node) {
4340 		err = -ENOMEM;
4341 		goto err_fib_node_create;
4342 	}
4343 
4344 	err = mlxsw_sp_fib_node_init(mlxsw_sp, fib_node, fib);
4345 	if (err)
4346 		goto err_fib_node_init;
4347 
4348 	return fib_node;
4349 
4350 err_fib_node_init:
4351 	mlxsw_sp_fib_node_destroy(fib_node);
4352 err_fib_node_create:
4353 	mlxsw_sp_vr_put(mlxsw_sp, vr);
4354 	return ERR_PTR(err);
4355 }
4356 
4357 static void mlxsw_sp_fib_node_put(struct mlxsw_sp *mlxsw_sp,
4358 				  struct mlxsw_sp_fib_node *fib_node)
4359 {
4360 	struct mlxsw_sp_vr *vr = fib_node->fib->vr;
4361 
4362 	if (!list_empty(&fib_node->entry_list))
4363 		return;
4364 	mlxsw_sp_fib_node_fini(mlxsw_sp, fib_node);
4365 	mlxsw_sp_fib_node_destroy(fib_node);
4366 	mlxsw_sp_vr_put(mlxsw_sp, vr);
4367 }
4368 
4369 static struct mlxsw_sp_fib4_entry *
4370 mlxsw_sp_fib4_node_entry_find(const struct mlxsw_sp_fib_node *fib_node,
4371 			      const struct mlxsw_sp_fib4_entry *new4_entry)
4372 {
4373 	struct mlxsw_sp_fib4_entry *fib4_entry;
4374 
4375 	list_for_each_entry(fib4_entry, &fib_node->entry_list, common.list) {
4376 		if (fib4_entry->tb_id > new4_entry->tb_id)
4377 			continue;
4378 		if (fib4_entry->tb_id != new4_entry->tb_id)
4379 			break;
4380 		if (fib4_entry->tos > new4_entry->tos)
4381 			continue;
4382 		if (fib4_entry->prio >= new4_entry->prio ||
4383 		    fib4_entry->tos < new4_entry->tos)
4384 			return fib4_entry;
4385 	}
4386 
4387 	return NULL;
4388 }
4389 
4390 static int
4391 mlxsw_sp_fib4_node_list_append(struct mlxsw_sp_fib4_entry *fib4_entry,
4392 			       struct mlxsw_sp_fib4_entry *new4_entry)
4393 {
4394 	struct mlxsw_sp_fib_node *fib_node;
4395 
4396 	if (WARN_ON(!fib4_entry))
4397 		return -EINVAL;
4398 
4399 	fib_node = fib4_entry->common.fib_node;
4400 	list_for_each_entry_from(fib4_entry, &fib_node->entry_list,
4401 				 common.list) {
4402 		if (fib4_entry->tb_id != new4_entry->tb_id ||
4403 		    fib4_entry->tos != new4_entry->tos ||
4404 		    fib4_entry->prio != new4_entry->prio)
4405 			break;
4406 	}
4407 
4408 	list_add_tail(&new4_entry->common.list, &fib4_entry->common.list);
4409 	return 0;
4410 }
4411 
4412 static int
4413 mlxsw_sp_fib4_node_list_insert(struct mlxsw_sp_fib4_entry *new4_entry,
4414 			       bool replace, bool append)
4415 {
4416 	struct mlxsw_sp_fib_node *fib_node = new4_entry->common.fib_node;
4417 	struct mlxsw_sp_fib4_entry *fib4_entry;
4418 
4419 	fib4_entry = mlxsw_sp_fib4_node_entry_find(fib_node, new4_entry);
4420 
4421 	if (append)
4422 		return mlxsw_sp_fib4_node_list_append(fib4_entry, new4_entry);
4423 	if (replace && WARN_ON(!fib4_entry))
4424 		return -EINVAL;
4425 
4426 	/* Insert new entry before replaced one, so that we can later
4427 	 * remove the second.
4428 	 */
4429 	if (fib4_entry) {
4430 		list_add_tail(&new4_entry->common.list,
4431 			      &fib4_entry->common.list);
4432 	} else {
4433 		struct mlxsw_sp_fib4_entry *last;
4434 
4435 		list_for_each_entry(last, &fib_node->entry_list, common.list) {
4436 			if (new4_entry->tb_id > last->tb_id)
4437 				break;
4438 			fib4_entry = last;
4439 		}
4440 
4441 		if (fib4_entry)
4442 			list_add(&new4_entry->common.list,
4443 				 &fib4_entry->common.list);
4444 		else
4445 			list_add(&new4_entry->common.list,
4446 				 &fib_node->entry_list);
4447 	}
4448 
4449 	return 0;
4450 }
4451 
4452 static void
4453 mlxsw_sp_fib4_node_list_remove(struct mlxsw_sp_fib4_entry *fib4_entry)
4454 {
4455 	list_del(&fib4_entry->common.list);
4456 }
4457 
4458 static int mlxsw_sp_fib_node_entry_add(struct mlxsw_sp *mlxsw_sp,
4459 				       struct mlxsw_sp_fib_entry *fib_entry)
4460 {
4461 	struct mlxsw_sp_fib_node *fib_node = fib_entry->fib_node;
4462 
4463 	if (!mlxsw_sp_fib_node_entry_is_first(fib_node, fib_entry))
4464 		return 0;
4465 
4466 	/* To prevent packet loss, overwrite the previously offloaded
4467 	 * entry.
4468 	 */
4469 	if (!list_is_singular(&fib_node->entry_list)) {
4470 		enum mlxsw_reg_ralue_op op = MLXSW_REG_RALUE_OP_WRITE_DELETE;
4471 		struct mlxsw_sp_fib_entry *n = list_next_entry(fib_entry, list);
4472 
4473 		mlxsw_sp_fib_entry_offload_refresh(n, op, 0);
4474 	}
4475 
4476 	return mlxsw_sp_fib_entry_update(mlxsw_sp, fib_entry);
4477 }
4478 
4479 static void mlxsw_sp_fib_node_entry_del(struct mlxsw_sp *mlxsw_sp,
4480 					struct mlxsw_sp_fib_entry *fib_entry)
4481 {
4482 	struct mlxsw_sp_fib_node *fib_node = fib_entry->fib_node;
4483 
4484 	if (!mlxsw_sp_fib_node_entry_is_first(fib_node, fib_entry))
4485 		return;
4486 
4487 	/* Promote the next entry by overwriting the deleted entry */
4488 	if (!list_is_singular(&fib_node->entry_list)) {
4489 		struct mlxsw_sp_fib_entry *n = list_next_entry(fib_entry, list);
4490 		enum mlxsw_reg_ralue_op op = MLXSW_REG_RALUE_OP_WRITE_DELETE;
4491 
4492 		mlxsw_sp_fib_entry_update(mlxsw_sp, n);
4493 		mlxsw_sp_fib_entry_offload_refresh(fib_entry, op, 0);
4494 		return;
4495 	}
4496 
4497 	mlxsw_sp_fib_entry_del(mlxsw_sp, fib_entry);
4498 }
4499 
4500 static int mlxsw_sp_fib4_node_entry_link(struct mlxsw_sp *mlxsw_sp,
4501 					 struct mlxsw_sp_fib4_entry *fib4_entry,
4502 					 bool replace, bool append)
4503 {
4504 	int err;
4505 
4506 	err = mlxsw_sp_fib4_node_list_insert(fib4_entry, replace, append);
4507 	if (err)
4508 		return err;
4509 
4510 	err = mlxsw_sp_fib_node_entry_add(mlxsw_sp, &fib4_entry->common);
4511 	if (err)
4512 		goto err_fib_node_entry_add;
4513 
4514 	return 0;
4515 
4516 err_fib_node_entry_add:
4517 	mlxsw_sp_fib4_node_list_remove(fib4_entry);
4518 	return err;
4519 }
4520 
4521 static void
4522 mlxsw_sp_fib4_node_entry_unlink(struct mlxsw_sp *mlxsw_sp,
4523 				struct mlxsw_sp_fib4_entry *fib4_entry)
4524 {
4525 	mlxsw_sp_fib_node_entry_del(mlxsw_sp, &fib4_entry->common);
4526 	mlxsw_sp_fib4_node_list_remove(fib4_entry);
4527 
4528 	if (fib4_entry->common.type == MLXSW_SP_FIB_ENTRY_TYPE_IPIP_DECAP)
4529 		mlxsw_sp_fib_entry_decap_fini(mlxsw_sp, &fib4_entry->common);
4530 }
4531 
4532 static void mlxsw_sp_fib4_entry_replace(struct mlxsw_sp *mlxsw_sp,
4533 					struct mlxsw_sp_fib4_entry *fib4_entry,
4534 					bool replace)
4535 {
4536 	struct mlxsw_sp_fib_node *fib_node = fib4_entry->common.fib_node;
4537 	struct mlxsw_sp_fib4_entry *replaced;
4538 
4539 	if (!replace)
4540 		return;
4541 
4542 	/* We inserted the new entry before replaced one */
4543 	replaced = list_next_entry(fib4_entry, common.list);
4544 
4545 	mlxsw_sp_fib4_node_entry_unlink(mlxsw_sp, replaced);
4546 	mlxsw_sp_fib4_entry_destroy(mlxsw_sp, replaced);
4547 	mlxsw_sp_fib_node_put(mlxsw_sp, fib_node);
4548 }
4549 
4550 static int
4551 mlxsw_sp_router_fib4_add(struct mlxsw_sp *mlxsw_sp,
4552 			 const struct fib_entry_notifier_info *fen_info,
4553 			 bool replace, bool append)
4554 {
4555 	struct mlxsw_sp_fib4_entry *fib4_entry;
4556 	struct mlxsw_sp_fib_node *fib_node;
4557 	int err;
4558 
4559 	if (mlxsw_sp->router->aborted)
4560 		return 0;
4561 
4562 	fib_node = mlxsw_sp_fib_node_get(mlxsw_sp, fen_info->tb_id,
4563 					 &fen_info->dst, sizeof(fen_info->dst),
4564 					 fen_info->dst_len,
4565 					 MLXSW_SP_L3_PROTO_IPV4);
4566 	if (IS_ERR(fib_node)) {
4567 		dev_warn(mlxsw_sp->bus_info->dev, "Failed to get FIB node\n");
4568 		return PTR_ERR(fib_node);
4569 	}
4570 
4571 	fib4_entry = mlxsw_sp_fib4_entry_create(mlxsw_sp, fib_node, fen_info);
4572 	if (IS_ERR(fib4_entry)) {
4573 		dev_warn(mlxsw_sp->bus_info->dev, "Failed to create FIB entry\n");
4574 		err = PTR_ERR(fib4_entry);
4575 		goto err_fib4_entry_create;
4576 	}
4577 
4578 	err = mlxsw_sp_fib4_node_entry_link(mlxsw_sp, fib4_entry, replace,
4579 					    append);
4580 	if (err) {
4581 		dev_warn(mlxsw_sp->bus_info->dev, "Failed to link FIB entry to node\n");
4582 		goto err_fib4_node_entry_link;
4583 	}
4584 
4585 	mlxsw_sp_fib4_entry_replace(mlxsw_sp, fib4_entry, replace);
4586 
4587 	return 0;
4588 
4589 err_fib4_node_entry_link:
4590 	mlxsw_sp_fib4_entry_destroy(mlxsw_sp, fib4_entry);
4591 err_fib4_entry_create:
4592 	mlxsw_sp_fib_node_put(mlxsw_sp, fib_node);
4593 	return err;
4594 }
4595 
4596 static void mlxsw_sp_router_fib4_del(struct mlxsw_sp *mlxsw_sp,
4597 				     struct fib_entry_notifier_info *fen_info)
4598 {
4599 	struct mlxsw_sp_fib4_entry *fib4_entry;
4600 	struct mlxsw_sp_fib_node *fib_node;
4601 
4602 	if (mlxsw_sp->router->aborted)
4603 		return;
4604 
4605 	fib4_entry = mlxsw_sp_fib4_entry_lookup(mlxsw_sp, fen_info);
4606 	if (WARN_ON(!fib4_entry))
4607 		return;
4608 	fib_node = fib4_entry->common.fib_node;
4609 
4610 	mlxsw_sp_fib4_node_entry_unlink(mlxsw_sp, fib4_entry);
4611 	mlxsw_sp_fib4_entry_destroy(mlxsw_sp, fib4_entry);
4612 	mlxsw_sp_fib_node_put(mlxsw_sp, fib_node);
4613 }
4614 
4615 static bool mlxsw_sp_fib6_rt_should_ignore(const struct rt6_info *rt)
4616 {
4617 	/* Packets with link-local destination IP arriving to the router
4618 	 * are trapped to the CPU, so no need to program specific routes
4619 	 * for them.
4620 	 */
4621 	if (ipv6_addr_type(&rt->rt6i_dst.addr) & IPV6_ADDR_LINKLOCAL)
4622 		return true;
4623 
4624 	/* Multicast routes aren't supported, so ignore them. Neighbour
4625 	 * Discovery packets are specifically trapped.
4626 	 */
4627 	if (ipv6_addr_type(&rt->rt6i_dst.addr) & IPV6_ADDR_MULTICAST)
4628 		return true;
4629 
4630 	/* Cloned routes are irrelevant in the forwarding path. */
4631 	if (rt->rt6i_flags & RTF_CACHE)
4632 		return true;
4633 
4634 	return false;
4635 }
4636 
4637 static struct mlxsw_sp_rt6 *mlxsw_sp_rt6_create(struct rt6_info *rt)
4638 {
4639 	struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
4640 
4641 	mlxsw_sp_rt6 = kzalloc(sizeof(*mlxsw_sp_rt6), GFP_KERNEL);
4642 	if (!mlxsw_sp_rt6)
4643 		return ERR_PTR(-ENOMEM);
4644 
4645 	/* In case of route replace, replaced route is deleted with
4646 	 * no notification. Take reference to prevent accessing freed
4647 	 * memory.
4648 	 */
4649 	mlxsw_sp_rt6->rt = rt;
4650 	rt6_hold(rt);
4651 
4652 	return mlxsw_sp_rt6;
4653 }
4654 
4655 #if IS_ENABLED(CONFIG_IPV6)
4656 static void mlxsw_sp_rt6_release(struct rt6_info *rt)
4657 {
4658 	rt6_release(rt);
4659 }
4660 #else
4661 static void mlxsw_sp_rt6_release(struct rt6_info *rt)
4662 {
4663 }
4664 #endif
4665 
4666 static void mlxsw_sp_rt6_destroy(struct mlxsw_sp_rt6 *mlxsw_sp_rt6)
4667 {
4668 	mlxsw_sp_rt6_release(mlxsw_sp_rt6->rt);
4669 	kfree(mlxsw_sp_rt6);
4670 }
4671 
4672 static bool mlxsw_sp_fib6_rt_can_mp(const struct rt6_info *rt)
4673 {
4674 	/* RTF_CACHE routes are ignored */
4675 	return (rt->rt6i_flags & (RTF_GATEWAY | RTF_ADDRCONF)) == RTF_GATEWAY;
4676 }
4677 
4678 static struct rt6_info *
4679 mlxsw_sp_fib6_entry_rt(const struct mlxsw_sp_fib6_entry *fib6_entry)
4680 {
4681 	return list_first_entry(&fib6_entry->rt6_list, struct mlxsw_sp_rt6,
4682 				list)->rt;
4683 }
4684 
4685 static struct mlxsw_sp_fib6_entry *
4686 mlxsw_sp_fib6_node_mp_entry_find(const struct mlxsw_sp_fib_node *fib_node,
4687 				 const struct rt6_info *nrt, bool replace)
4688 {
4689 	struct mlxsw_sp_fib6_entry *fib6_entry;
4690 
4691 	if (!mlxsw_sp_fib6_rt_can_mp(nrt) || replace)
4692 		return NULL;
4693 
4694 	list_for_each_entry(fib6_entry, &fib_node->entry_list, common.list) {
4695 		struct rt6_info *rt = mlxsw_sp_fib6_entry_rt(fib6_entry);
4696 
4697 		/* RT6_TABLE_LOCAL and RT6_TABLE_MAIN share the same
4698 		 * virtual router.
4699 		 */
4700 		if (rt->rt6i_table->tb6_id > nrt->rt6i_table->tb6_id)
4701 			continue;
4702 		if (rt->rt6i_table->tb6_id != nrt->rt6i_table->tb6_id)
4703 			break;
4704 		if (rt->rt6i_metric < nrt->rt6i_metric)
4705 			continue;
4706 		if (rt->rt6i_metric == nrt->rt6i_metric &&
4707 		    mlxsw_sp_fib6_rt_can_mp(rt))
4708 			return fib6_entry;
4709 		if (rt->rt6i_metric > nrt->rt6i_metric)
4710 			break;
4711 	}
4712 
4713 	return NULL;
4714 }
4715 
4716 static struct mlxsw_sp_rt6 *
4717 mlxsw_sp_fib6_entry_rt_find(const struct mlxsw_sp_fib6_entry *fib6_entry,
4718 			    const struct rt6_info *rt)
4719 {
4720 	struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
4721 
4722 	list_for_each_entry(mlxsw_sp_rt6, &fib6_entry->rt6_list, list) {
4723 		if (mlxsw_sp_rt6->rt == rt)
4724 			return mlxsw_sp_rt6;
4725 	}
4726 
4727 	return NULL;
4728 }
4729 
4730 static bool mlxsw_sp_nexthop6_ipip_type(const struct mlxsw_sp *mlxsw_sp,
4731 					const struct rt6_info *rt,
4732 					enum mlxsw_sp_ipip_type *ret)
4733 {
4734 	return rt->dst.dev &&
4735 	       mlxsw_sp_netdev_ipip_type(mlxsw_sp, rt->dst.dev, ret);
4736 }
4737 
4738 static int mlxsw_sp_nexthop6_type_init(struct mlxsw_sp *mlxsw_sp,
4739 				       struct mlxsw_sp_nexthop_group *nh_grp,
4740 				       struct mlxsw_sp_nexthop *nh,
4741 				       const struct rt6_info *rt)
4742 {
4743 	const struct mlxsw_sp_ipip_ops *ipip_ops;
4744 	struct mlxsw_sp_ipip_entry *ipip_entry;
4745 	struct net_device *dev = rt->dst.dev;
4746 	struct mlxsw_sp_rif *rif;
4747 	int err;
4748 
4749 	ipip_entry = mlxsw_sp_ipip_entry_find_by_ol_dev(mlxsw_sp, dev);
4750 	if (ipip_entry) {
4751 		ipip_ops = mlxsw_sp->router->ipip_ops_arr[ipip_entry->ipipt];
4752 		if (ipip_ops->can_offload(mlxsw_sp, dev,
4753 					  MLXSW_SP_L3_PROTO_IPV6)) {
4754 			nh->type = MLXSW_SP_NEXTHOP_TYPE_IPIP;
4755 			mlxsw_sp_nexthop_ipip_init(mlxsw_sp, nh, ipip_entry);
4756 			return 0;
4757 		}
4758 	}
4759 
4760 	nh->type = MLXSW_SP_NEXTHOP_TYPE_ETH;
4761 	rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev);
4762 	if (!rif)
4763 		return 0;
4764 	mlxsw_sp_nexthop_rif_init(nh, rif);
4765 
4766 	err = mlxsw_sp_nexthop_neigh_init(mlxsw_sp, nh);
4767 	if (err)
4768 		goto err_nexthop_neigh_init;
4769 
4770 	return 0;
4771 
4772 err_nexthop_neigh_init:
4773 	mlxsw_sp_nexthop_rif_fini(nh);
4774 	return err;
4775 }
4776 
4777 static void mlxsw_sp_nexthop6_type_fini(struct mlxsw_sp *mlxsw_sp,
4778 					struct mlxsw_sp_nexthop *nh)
4779 {
4780 	mlxsw_sp_nexthop_type_fini(mlxsw_sp, nh);
4781 }
4782 
4783 static int mlxsw_sp_nexthop6_init(struct mlxsw_sp *mlxsw_sp,
4784 				  struct mlxsw_sp_nexthop_group *nh_grp,
4785 				  struct mlxsw_sp_nexthop *nh,
4786 				  const struct rt6_info *rt)
4787 {
4788 	struct net_device *dev = rt->dst.dev;
4789 
4790 	nh->nh_grp = nh_grp;
4791 	nh->nh_weight = rt->rt6i_nh_weight;
4792 	memcpy(&nh->gw_addr, &rt->rt6i_gateway, sizeof(nh->gw_addr));
4793 	mlxsw_sp_nexthop_counter_alloc(mlxsw_sp, nh);
4794 
4795 	list_add_tail(&nh->router_list_node, &mlxsw_sp->router->nexthop_list);
4796 
4797 	if (!dev)
4798 		return 0;
4799 	nh->ifindex = dev->ifindex;
4800 
4801 	return mlxsw_sp_nexthop6_type_init(mlxsw_sp, nh_grp, nh, rt);
4802 }
4803 
4804 static void mlxsw_sp_nexthop6_fini(struct mlxsw_sp *mlxsw_sp,
4805 				   struct mlxsw_sp_nexthop *nh)
4806 {
4807 	mlxsw_sp_nexthop6_type_fini(mlxsw_sp, nh);
4808 	list_del(&nh->router_list_node);
4809 	mlxsw_sp_nexthop_counter_free(mlxsw_sp, nh);
4810 }
4811 
4812 static bool mlxsw_sp_rt6_is_gateway(const struct mlxsw_sp *mlxsw_sp,
4813 				    const struct rt6_info *rt)
4814 {
4815 	return rt->rt6i_flags & RTF_GATEWAY ||
4816 	       mlxsw_sp_nexthop6_ipip_type(mlxsw_sp, rt, NULL);
4817 }
4818 
4819 static struct mlxsw_sp_nexthop_group *
4820 mlxsw_sp_nexthop6_group_create(struct mlxsw_sp *mlxsw_sp,
4821 			       struct mlxsw_sp_fib6_entry *fib6_entry)
4822 {
4823 	struct mlxsw_sp_nexthop_group *nh_grp;
4824 	struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
4825 	struct mlxsw_sp_nexthop *nh;
4826 	size_t alloc_size;
4827 	int i = 0;
4828 	int err;
4829 
4830 	alloc_size = sizeof(*nh_grp) +
4831 		     fib6_entry->nrt6 * sizeof(struct mlxsw_sp_nexthop);
4832 	nh_grp = kzalloc(alloc_size, GFP_KERNEL);
4833 	if (!nh_grp)
4834 		return ERR_PTR(-ENOMEM);
4835 	INIT_LIST_HEAD(&nh_grp->fib_list);
4836 #if IS_ENABLED(CONFIG_IPV6)
4837 	nh_grp->neigh_tbl = &nd_tbl;
4838 #endif
4839 	mlxsw_sp_rt6 = list_first_entry(&fib6_entry->rt6_list,
4840 					struct mlxsw_sp_rt6, list);
4841 	nh_grp->gateway = mlxsw_sp_rt6_is_gateway(mlxsw_sp, mlxsw_sp_rt6->rt);
4842 	nh_grp->count = fib6_entry->nrt6;
4843 	for (i = 0; i < nh_grp->count; i++) {
4844 		struct rt6_info *rt = mlxsw_sp_rt6->rt;
4845 
4846 		nh = &nh_grp->nexthops[i];
4847 		err = mlxsw_sp_nexthop6_init(mlxsw_sp, nh_grp, nh, rt);
4848 		if (err)
4849 			goto err_nexthop6_init;
4850 		mlxsw_sp_rt6 = list_next_entry(mlxsw_sp_rt6, list);
4851 	}
4852 
4853 	err = mlxsw_sp_nexthop_group_insert(mlxsw_sp, nh_grp);
4854 	if (err)
4855 		goto err_nexthop_group_insert;
4856 
4857 	mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh_grp);
4858 	return nh_grp;
4859 
4860 err_nexthop_group_insert:
4861 err_nexthop6_init:
4862 	for (i--; i >= 0; i--) {
4863 		nh = &nh_grp->nexthops[i];
4864 		mlxsw_sp_nexthop6_fini(mlxsw_sp, nh);
4865 	}
4866 	kfree(nh_grp);
4867 	return ERR_PTR(err);
4868 }
4869 
4870 static void
4871 mlxsw_sp_nexthop6_group_destroy(struct mlxsw_sp *mlxsw_sp,
4872 				struct mlxsw_sp_nexthop_group *nh_grp)
4873 {
4874 	struct mlxsw_sp_nexthop *nh;
4875 	int i = nh_grp->count;
4876 
4877 	mlxsw_sp_nexthop_group_remove(mlxsw_sp, nh_grp);
4878 	for (i--; i >= 0; i--) {
4879 		nh = &nh_grp->nexthops[i];
4880 		mlxsw_sp_nexthop6_fini(mlxsw_sp, nh);
4881 	}
4882 	mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh_grp);
4883 	WARN_ON(nh_grp->adj_index_valid);
4884 	kfree(nh_grp);
4885 }
4886 
4887 static int mlxsw_sp_nexthop6_group_get(struct mlxsw_sp *mlxsw_sp,
4888 				       struct mlxsw_sp_fib6_entry *fib6_entry)
4889 {
4890 	struct mlxsw_sp_nexthop_group *nh_grp;
4891 
4892 	nh_grp = mlxsw_sp_nexthop6_group_lookup(mlxsw_sp, fib6_entry);
4893 	if (!nh_grp) {
4894 		nh_grp = mlxsw_sp_nexthop6_group_create(mlxsw_sp, fib6_entry);
4895 		if (IS_ERR(nh_grp))
4896 			return PTR_ERR(nh_grp);
4897 	}
4898 
4899 	list_add_tail(&fib6_entry->common.nexthop_group_node,
4900 		      &nh_grp->fib_list);
4901 	fib6_entry->common.nh_group = nh_grp;
4902 
4903 	return 0;
4904 }
4905 
4906 static void mlxsw_sp_nexthop6_group_put(struct mlxsw_sp *mlxsw_sp,
4907 					struct mlxsw_sp_fib_entry *fib_entry)
4908 {
4909 	struct mlxsw_sp_nexthop_group *nh_grp = fib_entry->nh_group;
4910 
4911 	list_del(&fib_entry->nexthop_group_node);
4912 	if (!list_empty(&nh_grp->fib_list))
4913 		return;
4914 	mlxsw_sp_nexthop6_group_destroy(mlxsw_sp, nh_grp);
4915 }
4916 
4917 static int
4918 mlxsw_sp_nexthop6_group_update(struct mlxsw_sp *mlxsw_sp,
4919 			       struct mlxsw_sp_fib6_entry *fib6_entry)
4920 {
4921 	struct mlxsw_sp_nexthop_group *old_nh_grp = fib6_entry->common.nh_group;
4922 	int err;
4923 
4924 	fib6_entry->common.nh_group = NULL;
4925 	list_del(&fib6_entry->common.nexthop_group_node);
4926 
4927 	err = mlxsw_sp_nexthop6_group_get(mlxsw_sp, fib6_entry);
4928 	if (err)
4929 		goto err_nexthop6_group_get;
4930 
4931 	/* In case this entry is offloaded, then the adjacency index
4932 	 * currently associated with it in the device's table is that
4933 	 * of the old group. Start using the new one instead.
4934 	 */
4935 	err = mlxsw_sp_fib_node_entry_add(mlxsw_sp, &fib6_entry->common);
4936 	if (err)
4937 		goto err_fib_node_entry_add;
4938 
4939 	if (list_empty(&old_nh_grp->fib_list))
4940 		mlxsw_sp_nexthop6_group_destroy(mlxsw_sp, old_nh_grp);
4941 
4942 	return 0;
4943 
4944 err_fib_node_entry_add:
4945 	mlxsw_sp_nexthop6_group_put(mlxsw_sp, &fib6_entry->common);
4946 err_nexthop6_group_get:
4947 	list_add_tail(&fib6_entry->common.nexthop_group_node,
4948 		      &old_nh_grp->fib_list);
4949 	fib6_entry->common.nh_group = old_nh_grp;
4950 	return err;
4951 }
4952 
4953 static int
4954 mlxsw_sp_fib6_entry_nexthop_add(struct mlxsw_sp *mlxsw_sp,
4955 				struct mlxsw_sp_fib6_entry *fib6_entry,
4956 				struct rt6_info *rt)
4957 {
4958 	struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
4959 	int err;
4960 
4961 	mlxsw_sp_rt6 = mlxsw_sp_rt6_create(rt);
4962 	if (IS_ERR(mlxsw_sp_rt6))
4963 		return PTR_ERR(mlxsw_sp_rt6);
4964 
4965 	list_add_tail(&mlxsw_sp_rt6->list, &fib6_entry->rt6_list);
4966 	fib6_entry->nrt6++;
4967 
4968 	err = mlxsw_sp_nexthop6_group_update(mlxsw_sp, fib6_entry);
4969 	if (err)
4970 		goto err_nexthop6_group_update;
4971 
4972 	return 0;
4973 
4974 err_nexthop6_group_update:
4975 	fib6_entry->nrt6--;
4976 	list_del(&mlxsw_sp_rt6->list);
4977 	mlxsw_sp_rt6_destroy(mlxsw_sp_rt6);
4978 	return err;
4979 }
4980 
4981 static void
4982 mlxsw_sp_fib6_entry_nexthop_del(struct mlxsw_sp *mlxsw_sp,
4983 				struct mlxsw_sp_fib6_entry *fib6_entry,
4984 				struct rt6_info *rt)
4985 {
4986 	struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
4987 
4988 	mlxsw_sp_rt6 = mlxsw_sp_fib6_entry_rt_find(fib6_entry, rt);
4989 	if (WARN_ON(!mlxsw_sp_rt6))
4990 		return;
4991 
4992 	fib6_entry->nrt6--;
4993 	list_del(&mlxsw_sp_rt6->list);
4994 	mlxsw_sp_nexthop6_group_update(mlxsw_sp, fib6_entry);
4995 	mlxsw_sp_rt6_destroy(mlxsw_sp_rt6);
4996 }
4997 
4998 static void mlxsw_sp_fib6_entry_type_set(struct mlxsw_sp *mlxsw_sp,
4999 					 struct mlxsw_sp_fib_entry *fib_entry,
5000 					 const struct rt6_info *rt)
5001 {
5002 	/* Packets hitting RTF_REJECT routes need to be discarded by the
5003 	 * stack. We can rely on their destination device not having a
5004 	 * RIF (it's the loopback device) and can thus use action type
5005 	 * local, which will cause them to be trapped with a lower
5006 	 * priority than packets that need to be locally received.
5007 	 */
5008 	if (rt->rt6i_flags & (RTF_LOCAL | RTF_ANYCAST))
5009 		fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_TRAP;
5010 	else if (rt->rt6i_flags & RTF_REJECT)
5011 		fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_LOCAL;
5012 	else if (mlxsw_sp_rt6_is_gateway(mlxsw_sp, rt))
5013 		fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_REMOTE;
5014 	else
5015 		fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_LOCAL;
5016 }
5017 
5018 static void
5019 mlxsw_sp_fib6_entry_rt_destroy_all(struct mlxsw_sp_fib6_entry *fib6_entry)
5020 {
5021 	struct mlxsw_sp_rt6 *mlxsw_sp_rt6, *tmp;
5022 
5023 	list_for_each_entry_safe(mlxsw_sp_rt6, tmp, &fib6_entry->rt6_list,
5024 				 list) {
5025 		fib6_entry->nrt6--;
5026 		list_del(&mlxsw_sp_rt6->list);
5027 		mlxsw_sp_rt6_destroy(mlxsw_sp_rt6);
5028 	}
5029 }
5030 
5031 static struct mlxsw_sp_fib6_entry *
5032 mlxsw_sp_fib6_entry_create(struct mlxsw_sp *mlxsw_sp,
5033 			   struct mlxsw_sp_fib_node *fib_node,
5034 			   struct rt6_info *rt)
5035 {
5036 	struct mlxsw_sp_fib6_entry *fib6_entry;
5037 	struct mlxsw_sp_fib_entry *fib_entry;
5038 	struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
5039 	int err;
5040 
5041 	fib6_entry = kzalloc(sizeof(*fib6_entry), GFP_KERNEL);
5042 	if (!fib6_entry)
5043 		return ERR_PTR(-ENOMEM);
5044 	fib_entry = &fib6_entry->common;
5045 
5046 	mlxsw_sp_rt6 = mlxsw_sp_rt6_create(rt);
5047 	if (IS_ERR(mlxsw_sp_rt6)) {
5048 		err = PTR_ERR(mlxsw_sp_rt6);
5049 		goto err_rt6_create;
5050 	}
5051 
5052 	mlxsw_sp_fib6_entry_type_set(mlxsw_sp, fib_entry, mlxsw_sp_rt6->rt);
5053 
5054 	INIT_LIST_HEAD(&fib6_entry->rt6_list);
5055 	list_add_tail(&mlxsw_sp_rt6->list, &fib6_entry->rt6_list);
5056 	fib6_entry->nrt6 = 1;
5057 	err = mlxsw_sp_nexthop6_group_get(mlxsw_sp, fib6_entry);
5058 	if (err)
5059 		goto err_nexthop6_group_get;
5060 
5061 	fib_entry->fib_node = fib_node;
5062 
5063 	return fib6_entry;
5064 
5065 err_nexthop6_group_get:
5066 	list_del(&mlxsw_sp_rt6->list);
5067 	mlxsw_sp_rt6_destroy(mlxsw_sp_rt6);
5068 err_rt6_create:
5069 	kfree(fib6_entry);
5070 	return ERR_PTR(err);
5071 }
5072 
5073 static void mlxsw_sp_fib6_entry_destroy(struct mlxsw_sp *mlxsw_sp,
5074 					struct mlxsw_sp_fib6_entry *fib6_entry)
5075 {
5076 	mlxsw_sp_nexthop6_group_put(mlxsw_sp, &fib6_entry->common);
5077 	mlxsw_sp_fib6_entry_rt_destroy_all(fib6_entry);
5078 	WARN_ON(fib6_entry->nrt6);
5079 	kfree(fib6_entry);
5080 }
5081 
5082 static struct mlxsw_sp_fib6_entry *
5083 mlxsw_sp_fib6_node_entry_find(const struct mlxsw_sp_fib_node *fib_node,
5084 			      const struct rt6_info *nrt, bool replace)
5085 {
5086 	struct mlxsw_sp_fib6_entry *fib6_entry, *fallback = NULL;
5087 
5088 	list_for_each_entry(fib6_entry, &fib_node->entry_list, common.list) {
5089 		struct rt6_info *rt = mlxsw_sp_fib6_entry_rt(fib6_entry);
5090 
5091 		if (rt->rt6i_table->tb6_id > nrt->rt6i_table->tb6_id)
5092 			continue;
5093 		if (rt->rt6i_table->tb6_id != nrt->rt6i_table->tb6_id)
5094 			break;
5095 		if (replace && rt->rt6i_metric == nrt->rt6i_metric) {
5096 			if (mlxsw_sp_fib6_rt_can_mp(rt) ==
5097 			    mlxsw_sp_fib6_rt_can_mp(nrt))
5098 				return fib6_entry;
5099 			if (mlxsw_sp_fib6_rt_can_mp(nrt))
5100 				fallback = fallback ?: fib6_entry;
5101 		}
5102 		if (rt->rt6i_metric > nrt->rt6i_metric)
5103 			return fallback ?: fib6_entry;
5104 	}
5105 
5106 	return fallback;
5107 }
5108 
5109 static int
5110 mlxsw_sp_fib6_node_list_insert(struct mlxsw_sp_fib6_entry *new6_entry,
5111 			       bool replace)
5112 {
5113 	struct mlxsw_sp_fib_node *fib_node = new6_entry->common.fib_node;
5114 	struct rt6_info *nrt = mlxsw_sp_fib6_entry_rt(new6_entry);
5115 	struct mlxsw_sp_fib6_entry *fib6_entry;
5116 
5117 	fib6_entry = mlxsw_sp_fib6_node_entry_find(fib_node, nrt, replace);
5118 
5119 	if (replace && WARN_ON(!fib6_entry))
5120 		return -EINVAL;
5121 
5122 	if (fib6_entry) {
5123 		list_add_tail(&new6_entry->common.list,
5124 			      &fib6_entry->common.list);
5125 	} else {
5126 		struct mlxsw_sp_fib6_entry *last;
5127 
5128 		list_for_each_entry(last, &fib_node->entry_list, common.list) {
5129 			struct rt6_info *rt = mlxsw_sp_fib6_entry_rt(last);
5130 
5131 			if (nrt->rt6i_table->tb6_id > rt->rt6i_table->tb6_id)
5132 				break;
5133 			fib6_entry = last;
5134 		}
5135 
5136 		if (fib6_entry)
5137 			list_add(&new6_entry->common.list,
5138 				 &fib6_entry->common.list);
5139 		else
5140 			list_add(&new6_entry->common.list,
5141 				 &fib_node->entry_list);
5142 	}
5143 
5144 	return 0;
5145 }
5146 
5147 static void
5148 mlxsw_sp_fib6_node_list_remove(struct mlxsw_sp_fib6_entry *fib6_entry)
5149 {
5150 	list_del(&fib6_entry->common.list);
5151 }
5152 
5153 static int mlxsw_sp_fib6_node_entry_link(struct mlxsw_sp *mlxsw_sp,
5154 					 struct mlxsw_sp_fib6_entry *fib6_entry,
5155 					 bool replace)
5156 {
5157 	int err;
5158 
5159 	err = mlxsw_sp_fib6_node_list_insert(fib6_entry, replace);
5160 	if (err)
5161 		return err;
5162 
5163 	err = mlxsw_sp_fib_node_entry_add(mlxsw_sp, &fib6_entry->common);
5164 	if (err)
5165 		goto err_fib_node_entry_add;
5166 
5167 	return 0;
5168 
5169 err_fib_node_entry_add:
5170 	mlxsw_sp_fib6_node_list_remove(fib6_entry);
5171 	return err;
5172 }
5173 
5174 static void
5175 mlxsw_sp_fib6_node_entry_unlink(struct mlxsw_sp *mlxsw_sp,
5176 				struct mlxsw_sp_fib6_entry *fib6_entry)
5177 {
5178 	mlxsw_sp_fib_node_entry_del(mlxsw_sp, &fib6_entry->common);
5179 	mlxsw_sp_fib6_node_list_remove(fib6_entry);
5180 }
5181 
5182 static struct mlxsw_sp_fib6_entry *
5183 mlxsw_sp_fib6_entry_lookup(struct mlxsw_sp *mlxsw_sp,
5184 			   const struct rt6_info *rt)
5185 {
5186 	struct mlxsw_sp_fib6_entry *fib6_entry;
5187 	struct mlxsw_sp_fib_node *fib_node;
5188 	struct mlxsw_sp_fib *fib;
5189 	struct mlxsw_sp_vr *vr;
5190 
5191 	vr = mlxsw_sp_vr_find(mlxsw_sp, rt->rt6i_table->tb6_id);
5192 	if (!vr)
5193 		return NULL;
5194 	fib = mlxsw_sp_vr_fib(vr, MLXSW_SP_L3_PROTO_IPV6);
5195 
5196 	fib_node = mlxsw_sp_fib_node_lookup(fib, &rt->rt6i_dst.addr,
5197 					    sizeof(rt->rt6i_dst.addr),
5198 					    rt->rt6i_dst.plen);
5199 	if (!fib_node)
5200 		return NULL;
5201 
5202 	list_for_each_entry(fib6_entry, &fib_node->entry_list, common.list) {
5203 		struct rt6_info *iter_rt = mlxsw_sp_fib6_entry_rt(fib6_entry);
5204 
5205 		if (rt->rt6i_table->tb6_id == iter_rt->rt6i_table->tb6_id &&
5206 		    rt->rt6i_metric == iter_rt->rt6i_metric &&
5207 		    mlxsw_sp_fib6_entry_rt_find(fib6_entry, rt))
5208 			return fib6_entry;
5209 	}
5210 
5211 	return NULL;
5212 }
5213 
5214 static void mlxsw_sp_fib6_entry_replace(struct mlxsw_sp *mlxsw_sp,
5215 					struct mlxsw_sp_fib6_entry *fib6_entry,
5216 					bool replace)
5217 {
5218 	struct mlxsw_sp_fib_node *fib_node = fib6_entry->common.fib_node;
5219 	struct mlxsw_sp_fib6_entry *replaced;
5220 
5221 	if (!replace)
5222 		return;
5223 
5224 	replaced = list_next_entry(fib6_entry, common.list);
5225 
5226 	mlxsw_sp_fib6_node_entry_unlink(mlxsw_sp, replaced);
5227 	mlxsw_sp_fib6_entry_destroy(mlxsw_sp, replaced);
5228 	mlxsw_sp_fib_node_put(mlxsw_sp, fib_node);
5229 }
5230 
5231 static int mlxsw_sp_router_fib6_add(struct mlxsw_sp *mlxsw_sp,
5232 				    struct rt6_info *rt, bool replace)
5233 {
5234 	struct mlxsw_sp_fib6_entry *fib6_entry;
5235 	struct mlxsw_sp_fib_node *fib_node;
5236 	int err;
5237 
5238 	if (mlxsw_sp->router->aborted)
5239 		return 0;
5240 
5241 	if (rt->rt6i_src.plen)
5242 		return -EINVAL;
5243 
5244 	if (mlxsw_sp_fib6_rt_should_ignore(rt))
5245 		return 0;
5246 
5247 	fib_node = mlxsw_sp_fib_node_get(mlxsw_sp, rt->rt6i_table->tb6_id,
5248 					 &rt->rt6i_dst.addr,
5249 					 sizeof(rt->rt6i_dst.addr),
5250 					 rt->rt6i_dst.plen,
5251 					 MLXSW_SP_L3_PROTO_IPV6);
5252 	if (IS_ERR(fib_node))
5253 		return PTR_ERR(fib_node);
5254 
5255 	/* Before creating a new entry, try to append route to an existing
5256 	 * multipath entry.
5257 	 */
5258 	fib6_entry = mlxsw_sp_fib6_node_mp_entry_find(fib_node, rt, replace);
5259 	if (fib6_entry) {
5260 		err = mlxsw_sp_fib6_entry_nexthop_add(mlxsw_sp, fib6_entry, rt);
5261 		if (err)
5262 			goto err_fib6_entry_nexthop_add;
5263 		return 0;
5264 	}
5265 
5266 	fib6_entry = mlxsw_sp_fib6_entry_create(mlxsw_sp, fib_node, rt);
5267 	if (IS_ERR(fib6_entry)) {
5268 		err = PTR_ERR(fib6_entry);
5269 		goto err_fib6_entry_create;
5270 	}
5271 
5272 	err = mlxsw_sp_fib6_node_entry_link(mlxsw_sp, fib6_entry, replace);
5273 	if (err)
5274 		goto err_fib6_node_entry_link;
5275 
5276 	mlxsw_sp_fib6_entry_replace(mlxsw_sp, fib6_entry, replace);
5277 
5278 	return 0;
5279 
5280 err_fib6_node_entry_link:
5281 	mlxsw_sp_fib6_entry_destroy(mlxsw_sp, fib6_entry);
5282 err_fib6_entry_create:
5283 err_fib6_entry_nexthop_add:
5284 	mlxsw_sp_fib_node_put(mlxsw_sp, fib_node);
5285 	return err;
5286 }
5287 
5288 static void mlxsw_sp_router_fib6_del(struct mlxsw_sp *mlxsw_sp,
5289 				     struct rt6_info *rt)
5290 {
5291 	struct mlxsw_sp_fib6_entry *fib6_entry;
5292 	struct mlxsw_sp_fib_node *fib_node;
5293 
5294 	if (mlxsw_sp->router->aborted)
5295 		return;
5296 
5297 	if (mlxsw_sp_fib6_rt_should_ignore(rt))
5298 		return;
5299 
5300 	fib6_entry = mlxsw_sp_fib6_entry_lookup(mlxsw_sp, rt);
5301 	if (WARN_ON(!fib6_entry))
5302 		return;
5303 
5304 	/* If route is part of a multipath entry, but not the last one
5305 	 * removed, then only reduce its nexthop group.
5306 	 */
5307 	if (!list_is_singular(&fib6_entry->rt6_list)) {
5308 		mlxsw_sp_fib6_entry_nexthop_del(mlxsw_sp, fib6_entry, rt);
5309 		return;
5310 	}
5311 
5312 	fib_node = fib6_entry->common.fib_node;
5313 
5314 	mlxsw_sp_fib6_node_entry_unlink(mlxsw_sp, fib6_entry);
5315 	mlxsw_sp_fib6_entry_destroy(mlxsw_sp, fib6_entry);
5316 	mlxsw_sp_fib_node_put(mlxsw_sp, fib_node);
5317 }
5318 
5319 static int __mlxsw_sp_router_set_abort_trap(struct mlxsw_sp *mlxsw_sp,
5320 					    enum mlxsw_reg_ralxx_protocol proto,
5321 					    u8 tree_id)
5322 {
5323 	char ralta_pl[MLXSW_REG_RALTA_LEN];
5324 	char ralst_pl[MLXSW_REG_RALST_LEN];
5325 	int i, err;
5326 
5327 	mlxsw_reg_ralta_pack(ralta_pl, true, proto, tree_id);
5328 	err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralta), ralta_pl);
5329 	if (err)
5330 		return err;
5331 
5332 	mlxsw_reg_ralst_pack(ralst_pl, 0xff, tree_id);
5333 	err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralst), ralst_pl);
5334 	if (err)
5335 		return err;
5336 
5337 	for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_VRS); i++) {
5338 		struct mlxsw_sp_vr *vr = &mlxsw_sp->router->vrs[i];
5339 		char raltb_pl[MLXSW_REG_RALTB_LEN];
5340 		char ralue_pl[MLXSW_REG_RALUE_LEN];
5341 
5342 		mlxsw_reg_raltb_pack(raltb_pl, vr->id, proto, tree_id);
5343 		err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(raltb),
5344 				      raltb_pl);
5345 		if (err)
5346 			return err;
5347 
5348 		mlxsw_reg_ralue_pack(ralue_pl, proto,
5349 				     MLXSW_REG_RALUE_OP_WRITE_WRITE, vr->id, 0);
5350 		mlxsw_reg_ralue_act_ip2me_pack(ralue_pl);
5351 		err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralue),
5352 				      ralue_pl);
5353 		if (err)
5354 			return err;
5355 	}
5356 
5357 	return 0;
5358 }
5359 
5360 static int mlxsw_sp_router_fibmr_add(struct mlxsw_sp *mlxsw_sp,
5361 				     struct mfc_entry_notifier_info *men_info,
5362 				     bool replace)
5363 {
5364 	struct mlxsw_sp_vr *vr;
5365 
5366 	if (mlxsw_sp->router->aborted)
5367 		return 0;
5368 
5369 	vr = mlxsw_sp_vr_get(mlxsw_sp, men_info->tb_id, NULL);
5370 	if (IS_ERR(vr))
5371 		return PTR_ERR(vr);
5372 
5373 	return mlxsw_sp_mr_route4_add(vr->mr4_table, men_info->mfc, replace);
5374 }
5375 
5376 static void mlxsw_sp_router_fibmr_del(struct mlxsw_sp *mlxsw_sp,
5377 				      struct mfc_entry_notifier_info *men_info)
5378 {
5379 	struct mlxsw_sp_vr *vr;
5380 
5381 	if (mlxsw_sp->router->aborted)
5382 		return;
5383 
5384 	vr = mlxsw_sp_vr_find(mlxsw_sp, men_info->tb_id);
5385 	if (WARN_ON(!vr))
5386 		return;
5387 
5388 	mlxsw_sp_mr_route4_del(vr->mr4_table, men_info->mfc);
5389 	mlxsw_sp_vr_put(mlxsw_sp, vr);
5390 }
5391 
5392 static int
5393 mlxsw_sp_router_fibmr_vif_add(struct mlxsw_sp *mlxsw_sp,
5394 			      struct vif_entry_notifier_info *ven_info)
5395 {
5396 	struct mlxsw_sp_rif *rif;
5397 	struct mlxsw_sp_vr *vr;
5398 
5399 	if (mlxsw_sp->router->aborted)
5400 		return 0;
5401 
5402 	vr = mlxsw_sp_vr_get(mlxsw_sp, ven_info->tb_id, NULL);
5403 	if (IS_ERR(vr))
5404 		return PTR_ERR(vr);
5405 
5406 	rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, ven_info->dev);
5407 	return mlxsw_sp_mr_vif_add(vr->mr4_table, ven_info->dev,
5408 				   ven_info->vif_index,
5409 				   ven_info->vif_flags, rif);
5410 }
5411 
5412 static void
5413 mlxsw_sp_router_fibmr_vif_del(struct mlxsw_sp *mlxsw_sp,
5414 			      struct vif_entry_notifier_info *ven_info)
5415 {
5416 	struct mlxsw_sp_vr *vr;
5417 
5418 	if (mlxsw_sp->router->aborted)
5419 		return;
5420 
5421 	vr = mlxsw_sp_vr_find(mlxsw_sp, ven_info->tb_id);
5422 	if (WARN_ON(!vr))
5423 		return;
5424 
5425 	mlxsw_sp_mr_vif_del(vr->mr4_table, ven_info->vif_index);
5426 	mlxsw_sp_vr_put(mlxsw_sp, vr);
5427 }
5428 
5429 static int mlxsw_sp_router_set_abort_trap(struct mlxsw_sp *mlxsw_sp)
5430 {
5431 	enum mlxsw_reg_ralxx_protocol proto = MLXSW_REG_RALXX_PROTOCOL_IPV4;
5432 	int err;
5433 
5434 	err = __mlxsw_sp_router_set_abort_trap(mlxsw_sp, proto,
5435 					       MLXSW_SP_LPM_TREE_MIN);
5436 	if (err)
5437 		return err;
5438 
5439 	/* The multicast router code does not need an abort trap as by default,
5440 	 * packets that don't match any routes are trapped to the CPU.
5441 	 */
5442 
5443 	proto = MLXSW_REG_RALXX_PROTOCOL_IPV6;
5444 	return __mlxsw_sp_router_set_abort_trap(mlxsw_sp, proto,
5445 						MLXSW_SP_LPM_TREE_MIN + 1);
5446 }
5447 
5448 static void mlxsw_sp_fib4_node_flush(struct mlxsw_sp *mlxsw_sp,
5449 				     struct mlxsw_sp_fib_node *fib_node)
5450 {
5451 	struct mlxsw_sp_fib4_entry *fib4_entry, *tmp;
5452 
5453 	list_for_each_entry_safe(fib4_entry, tmp, &fib_node->entry_list,
5454 				 common.list) {
5455 		bool do_break = &tmp->common.list == &fib_node->entry_list;
5456 
5457 		mlxsw_sp_fib4_node_entry_unlink(mlxsw_sp, fib4_entry);
5458 		mlxsw_sp_fib4_entry_destroy(mlxsw_sp, fib4_entry);
5459 		mlxsw_sp_fib_node_put(mlxsw_sp, fib_node);
5460 		/* Break when entry list is empty and node was freed.
5461 		 * Otherwise, we'll access freed memory in the next
5462 		 * iteration.
5463 		 */
5464 		if (do_break)
5465 			break;
5466 	}
5467 }
5468 
5469 static void mlxsw_sp_fib6_node_flush(struct mlxsw_sp *mlxsw_sp,
5470 				     struct mlxsw_sp_fib_node *fib_node)
5471 {
5472 	struct mlxsw_sp_fib6_entry *fib6_entry, *tmp;
5473 
5474 	list_for_each_entry_safe(fib6_entry, tmp, &fib_node->entry_list,
5475 				 common.list) {
5476 		bool do_break = &tmp->common.list == &fib_node->entry_list;
5477 
5478 		mlxsw_sp_fib6_node_entry_unlink(mlxsw_sp, fib6_entry);
5479 		mlxsw_sp_fib6_entry_destroy(mlxsw_sp, fib6_entry);
5480 		mlxsw_sp_fib_node_put(mlxsw_sp, fib_node);
5481 		if (do_break)
5482 			break;
5483 	}
5484 }
5485 
5486 static void mlxsw_sp_fib_node_flush(struct mlxsw_sp *mlxsw_sp,
5487 				    struct mlxsw_sp_fib_node *fib_node)
5488 {
5489 	switch (fib_node->fib->proto) {
5490 	case MLXSW_SP_L3_PROTO_IPV4:
5491 		mlxsw_sp_fib4_node_flush(mlxsw_sp, fib_node);
5492 		break;
5493 	case MLXSW_SP_L3_PROTO_IPV6:
5494 		mlxsw_sp_fib6_node_flush(mlxsw_sp, fib_node);
5495 		break;
5496 	}
5497 }
5498 
5499 static void mlxsw_sp_vr_fib_flush(struct mlxsw_sp *mlxsw_sp,
5500 				  struct mlxsw_sp_vr *vr,
5501 				  enum mlxsw_sp_l3proto proto)
5502 {
5503 	struct mlxsw_sp_fib *fib = mlxsw_sp_vr_fib(vr, proto);
5504 	struct mlxsw_sp_fib_node *fib_node, *tmp;
5505 
5506 	list_for_each_entry_safe(fib_node, tmp, &fib->node_list, list) {
5507 		bool do_break = &tmp->list == &fib->node_list;
5508 
5509 		mlxsw_sp_fib_node_flush(mlxsw_sp, fib_node);
5510 		if (do_break)
5511 			break;
5512 	}
5513 }
5514 
5515 static void mlxsw_sp_router_fib_flush(struct mlxsw_sp *mlxsw_sp)
5516 {
5517 	int i;
5518 
5519 	for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_VRS); i++) {
5520 		struct mlxsw_sp_vr *vr = &mlxsw_sp->router->vrs[i];
5521 
5522 		if (!mlxsw_sp_vr_is_used(vr))
5523 			continue;
5524 
5525 		mlxsw_sp_mr_table_flush(vr->mr4_table);
5526 		mlxsw_sp_vr_fib_flush(mlxsw_sp, vr, MLXSW_SP_L3_PROTO_IPV4);
5527 
5528 		/* If virtual router was only used for IPv4, then it's no
5529 		 * longer used.
5530 		 */
5531 		if (!mlxsw_sp_vr_is_used(vr))
5532 			continue;
5533 		mlxsw_sp_vr_fib_flush(mlxsw_sp, vr, MLXSW_SP_L3_PROTO_IPV6);
5534 	}
5535 }
5536 
5537 static void mlxsw_sp_router_fib_abort(struct mlxsw_sp *mlxsw_sp)
5538 {
5539 	int err;
5540 
5541 	if (mlxsw_sp->router->aborted)
5542 		return;
5543 	dev_warn(mlxsw_sp->bus_info->dev, "FIB abort triggered. Note that FIB entries are no longer being offloaded to this device.\n");
5544 	mlxsw_sp_router_fib_flush(mlxsw_sp);
5545 	mlxsw_sp->router->aborted = true;
5546 	err = mlxsw_sp_router_set_abort_trap(mlxsw_sp);
5547 	if (err)
5548 		dev_warn(mlxsw_sp->bus_info->dev, "Failed to set abort trap.\n");
5549 }
5550 
5551 struct mlxsw_sp_fib_event_work {
5552 	struct work_struct work;
5553 	union {
5554 		struct fib6_entry_notifier_info fen6_info;
5555 		struct fib_entry_notifier_info fen_info;
5556 		struct fib_rule_notifier_info fr_info;
5557 		struct fib_nh_notifier_info fnh_info;
5558 		struct mfc_entry_notifier_info men_info;
5559 		struct vif_entry_notifier_info ven_info;
5560 	};
5561 	struct mlxsw_sp *mlxsw_sp;
5562 	unsigned long event;
5563 };
5564 
5565 static void mlxsw_sp_router_fib4_event_work(struct work_struct *work)
5566 {
5567 	struct mlxsw_sp_fib_event_work *fib_work =
5568 		container_of(work, struct mlxsw_sp_fib_event_work, work);
5569 	struct mlxsw_sp *mlxsw_sp = fib_work->mlxsw_sp;
5570 	bool replace, append;
5571 	int err;
5572 
5573 	/* Protect internal structures from changes */
5574 	rtnl_lock();
5575 	switch (fib_work->event) {
5576 	case FIB_EVENT_ENTRY_REPLACE: /* fall through */
5577 	case FIB_EVENT_ENTRY_APPEND: /* fall through */
5578 	case FIB_EVENT_ENTRY_ADD:
5579 		replace = fib_work->event == FIB_EVENT_ENTRY_REPLACE;
5580 		append = fib_work->event == FIB_EVENT_ENTRY_APPEND;
5581 		err = mlxsw_sp_router_fib4_add(mlxsw_sp, &fib_work->fen_info,
5582 					       replace, append);
5583 		if (err)
5584 			mlxsw_sp_router_fib_abort(mlxsw_sp);
5585 		fib_info_put(fib_work->fen_info.fi);
5586 		break;
5587 	case FIB_EVENT_ENTRY_DEL:
5588 		mlxsw_sp_router_fib4_del(mlxsw_sp, &fib_work->fen_info);
5589 		fib_info_put(fib_work->fen_info.fi);
5590 		break;
5591 	case FIB_EVENT_RULE_ADD:
5592 		/* if we get here, a rule was added that we do not support.
5593 		 * just do the fib_abort
5594 		 */
5595 		mlxsw_sp_router_fib_abort(mlxsw_sp);
5596 		break;
5597 	case FIB_EVENT_NH_ADD: /* fall through */
5598 	case FIB_EVENT_NH_DEL:
5599 		mlxsw_sp_nexthop4_event(mlxsw_sp, fib_work->event,
5600 					fib_work->fnh_info.fib_nh);
5601 		fib_info_put(fib_work->fnh_info.fib_nh->nh_parent);
5602 		break;
5603 	}
5604 	rtnl_unlock();
5605 	kfree(fib_work);
5606 }
5607 
5608 static void mlxsw_sp_router_fib6_event_work(struct work_struct *work)
5609 {
5610 	struct mlxsw_sp_fib_event_work *fib_work =
5611 		container_of(work, struct mlxsw_sp_fib_event_work, work);
5612 	struct mlxsw_sp *mlxsw_sp = fib_work->mlxsw_sp;
5613 	bool replace;
5614 	int err;
5615 
5616 	rtnl_lock();
5617 	switch (fib_work->event) {
5618 	case FIB_EVENT_ENTRY_REPLACE: /* fall through */
5619 	case FIB_EVENT_ENTRY_ADD:
5620 		replace = fib_work->event == FIB_EVENT_ENTRY_REPLACE;
5621 		err = mlxsw_sp_router_fib6_add(mlxsw_sp,
5622 					       fib_work->fen6_info.rt, replace);
5623 		if (err)
5624 			mlxsw_sp_router_fib_abort(mlxsw_sp);
5625 		mlxsw_sp_rt6_release(fib_work->fen6_info.rt);
5626 		break;
5627 	case FIB_EVENT_ENTRY_DEL:
5628 		mlxsw_sp_router_fib6_del(mlxsw_sp, fib_work->fen6_info.rt);
5629 		mlxsw_sp_rt6_release(fib_work->fen6_info.rt);
5630 		break;
5631 	case FIB_EVENT_RULE_ADD:
5632 		/* if we get here, a rule was added that we do not support.
5633 		 * just do the fib_abort
5634 		 */
5635 		mlxsw_sp_router_fib_abort(mlxsw_sp);
5636 		break;
5637 	}
5638 	rtnl_unlock();
5639 	kfree(fib_work);
5640 }
5641 
5642 static void mlxsw_sp_router_fibmr_event_work(struct work_struct *work)
5643 {
5644 	struct mlxsw_sp_fib_event_work *fib_work =
5645 		container_of(work, struct mlxsw_sp_fib_event_work, work);
5646 	struct mlxsw_sp *mlxsw_sp = fib_work->mlxsw_sp;
5647 	bool replace;
5648 	int err;
5649 
5650 	rtnl_lock();
5651 	switch (fib_work->event) {
5652 	case FIB_EVENT_ENTRY_REPLACE: /* fall through */
5653 	case FIB_EVENT_ENTRY_ADD:
5654 		replace = fib_work->event == FIB_EVENT_ENTRY_REPLACE;
5655 
5656 		err = mlxsw_sp_router_fibmr_add(mlxsw_sp, &fib_work->men_info,
5657 						replace);
5658 		if (err)
5659 			mlxsw_sp_router_fib_abort(mlxsw_sp);
5660 		ipmr_cache_put(fib_work->men_info.mfc);
5661 		break;
5662 	case FIB_EVENT_ENTRY_DEL:
5663 		mlxsw_sp_router_fibmr_del(mlxsw_sp, &fib_work->men_info);
5664 		ipmr_cache_put(fib_work->men_info.mfc);
5665 		break;
5666 	case FIB_EVENT_VIF_ADD:
5667 		err = mlxsw_sp_router_fibmr_vif_add(mlxsw_sp,
5668 						    &fib_work->ven_info);
5669 		if (err)
5670 			mlxsw_sp_router_fib_abort(mlxsw_sp);
5671 		dev_put(fib_work->ven_info.dev);
5672 		break;
5673 	case FIB_EVENT_VIF_DEL:
5674 		mlxsw_sp_router_fibmr_vif_del(mlxsw_sp,
5675 					      &fib_work->ven_info);
5676 		dev_put(fib_work->ven_info.dev);
5677 		break;
5678 	case FIB_EVENT_RULE_ADD:
5679 		/* if we get here, a rule was added that we do not support.
5680 		 * just do the fib_abort
5681 		 */
5682 		mlxsw_sp_router_fib_abort(mlxsw_sp);
5683 		break;
5684 	}
5685 	rtnl_unlock();
5686 	kfree(fib_work);
5687 }
5688 
5689 static void mlxsw_sp_router_fib4_event(struct mlxsw_sp_fib_event_work *fib_work,
5690 				       struct fib_notifier_info *info)
5691 {
5692 	struct fib_entry_notifier_info *fen_info;
5693 	struct fib_nh_notifier_info *fnh_info;
5694 
5695 	switch (fib_work->event) {
5696 	case FIB_EVENT_ENTRY_REPLACE: /* fall through */
5697 	case FIB_EVENT_ENTRY_APPEND: /* fall through */
5698 	case FIB_EVENT_ENTRY_ADD: /* fall through */
5699 	case FIB_EVENT_ENTRY_DEL:
5700 		fen_info = container_of(info, struct fib_entry_notifier_info,
5701 					info);
5702 		fib_work->fen_info = *fen_info;
5703 		/* Take reference on fib_info to prevent it from being
5704 		 * freed while work is queued. Release it afterwards.
5705 		 */
5706 		fib_info_hold(fib_work->fen_info.fi);
5707 		break;
5708 	case FIB_EVENT_NH_ADD: /* fall through */
5709 	case FIB_EVENT_NH_DEL:
5710 		fnh_info = container_of(info, struct fib_nh_notifier_info,
5711 					info);
5712 		fib_work->fnh_info = *fnh_info;
5713 		fib_info_hold(fib_work->fnh_info.fib_nh->nh_parent);
5714 		break;
5715 	}
5716 }
5717 
5718 static void mlxsw_sp_router_fib6_event(struct mlxsw_sp_fib_event_work *fib_work,
5719 				       struct fib_notifier_info *info)
5720 {
5721 	struct fib6_entry_notifier_info *fen6_info;
5722 
5723 	switch (fib_work->event) {
5724 	case FIB_EVENT_ENTRY_REPLACE: /* fall through */
5725 	case FIB_EVENT_ENTRY_ADD: /* fall through */
5726 	case FIB_EVENT_ENTRY_DEL:
5727 		fen6_info = container_of(info, struct fib6_entry_notifier_info,
5728 					 info);
5729 		fib_work->fen6_info = *fen6_info;
5730 		rt6_hold(fib_work->fen6_info.rt);
5731 		break;
5732 	}
5733 }
5734 
5735 static void
5736 mlxsw_sp_router_fibmr_event(struct mlxsw_sp_fib_event_work *fib_work,
5737 			    struct fib_notifier_info *info)
5738 {
5739 	switch (fib_work->event) {
5740 	case FIB_EVENT_ENTRY_REPLACE: /* fall through */
5741 	case FIB_EVENT_ENTRY_ADD: /* fall through */
5742 	case FIB_EVENT_ENTRY_DEL:
5743 		memcpy(&fib_work->men_info, info, sizeof(fib_work->men_info));
5744 		ipmr_cache_hold(fib_work->men_info.mfc);
5745 		break;
5746 	case FIB_EVENT_VIF_ADD: /* fall through */
5747 	case FIB_EVENT_VIF_DEL:
5748 		memcpy(&fib_work->ven_info, info, sizeof(fib_work->ven_info));
5749 		dev_hold(fib_work->ven_info.dev);
5750 		break;
5751 	}
5752 }
5753 
5754 static int mlxsw_sp_router_fib_rule_event(unsigned long event,
5755 					  struct fib_notifier_info *info,
5756 					  struct mlxsw_sp *mlxsw_sp)
5757 {
5758 	struct netlink_ext_ack *extack = info->extack;
5759 	struct fib_rule_notifier_info *fr_info;
5760 	struct fib_rule *rule;
5761 	int err = 0;
5762 
5763 	/* nothing to do at the moment */
5764 	if (event == FIB_EVENT_RULE_DEL)
5765 		return 0;
5766 
5767 	if (mlxsw_sp->router->aborted)
5768 		return 0;
5769 
5770 	fr_info = container_of(info, struct fib_rule_notifier_info, info);
5771 	rule = fr_info->rule;
5772 
5773 	switch (info->family) {
5774 	case AF_INET:
5775 		if (!fib4_rule_default(rule) && !rule->l3mdev)
5776 			err = -1;
5777 		break;
5778 	case AF_INET6:
5779 		if (!fib6_rule_default(rule) && !rule->l3mdev)
5780 			err = -1;
5781 		break;
5782 	case RTNL_FAMILY_IPMR:
5783 		if (!ipmr_rule_default(rule) && !rule->l3mdev)
5784 			err = -1;
5785 		break;
5786 	}
5787 
5788 	if (err < 0)
5789 		NL_SET_ERR_MSG(extack, "spectrum: FIB rules not supported. Aborting offload");
5790 
5791 	return err;
5792 }
5793 
5794 /* Called with rcu_read_lock() */
5795 static int mlxsw_sp_router_fib_event(struct notifier_block *nb,
5796 				     unsigned long event, void *ptr)
5797 {
5798 	struct mlxsw_sp_fib_event_work *fib_work;
5799 	struct fib_notifier_info *info = ptr;
5800 	struct mlxsw_sp_router *router;
5801 	int err;
5802 
5803 	if (!net_eq(info->net, &init_net) ||
5804 	    (info->family != AF_INET && info->family != AF_INET6 &&
5805 	     info->family != RTNL_FAMILY_IPMR))
5806 		return NOTIFY_DONE;
5807 
5808 	router = container_of(nb, struct mlxsw_sp_router, fib_nb);
5809 
5810 	switch (event) {
5811 	case FIB_EVENT_RULE_ADD: /* fall through */
5812 	case FIB_EVENT_RULE_DEL:
5813 		err = mlxsw_sp_router_fib_rule_event(event, info,
5814 						     router->mlxsw_sp);
5815 		if (!err)
5816 			return NOTIFY_DONE;
5817 	}
5818 
5819 	fib_work = kzalloc(sizeof(*fib_work), GFP_ATOMIC);
5820 	if (WARN_ON(!fib_work))
5821 		return NOTIFY_BAD;
5822 
5823 	fib_work->mlxsw_sp = router->mlxsw_sp;
5824 	fib_work->event = event;
5825 
5826 	switch (info->family) {
5827 	case AF_INET:
5828 		INIT_WORK(&fib_work->work, mlxsw_sp_router_fib4_event_work);
5829 		mlxsw_sp_router_fib4_event(fib_work, info);
5830 		break;
5831 	case AF_INET6:
5832 		INIT_WORK(&fib_work->work, mlxsw_sp_router_fib6_event_work);
5833 		mlxsw_sp_router_fib6_event(fib_work, info);
5834 		break;
5835 	case RTNL_FAMILY_IPMR:
5836 		INIT_WORK(&fib_work->work, mlxsw_sp_router_fibmr_event_work);
5837 		mlxsw_sp_router_fibmr_event(fib_work, info);
5838 		break;
5839 	}
5840 
5841 	mlxsw_core_schedule_work(&fib_work->work);
5842 
5843 	return NOTIFY_DONE;
5844 }
5845 
5846 static struct mlxsw_sp_rif *
5847 mlxsw_sp_rif_find_by_dev(const struct mlxsw_sp *mlxsw_sp,
5848 			 const struct net_device *dev)
5849 {
5850 	int i;
5851 
5852 	for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_RIFS); i++)
5853 		if (mlxsw_sp->router->rifs[i] &&
5854 		    mlxsw_sp->router->rifs[i]->dev == dev)
5855 			return mlxsw_sp->router->rifs[i];
5856 
5857 	return NULL;
5858 }
5859 
5860 static int mlxsw_sp_router_rif_disable(struct mlxsw_sp *mlxsw_sp, u16 rif)
5861 {
5862 	char ritr_pl[MLXSW_REG_RITR_LEN];
5863 	int err;
5864 
5865 	mlxsw_reg_ritr_rif_pack(ritr_pl, rif);
5866 	err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
5867 	if (WARN_ON_ONCE(err))
5868 		return err;
5869 
5870 	mlxsw_reg_ritr_enable_set(ritr_pl, false);
5871 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
5872 }
5873 
5874 static void mlxsw_sp_router_rif_gone_sync(struct mlxsw_sp *mlxsw_sp,
5875 					  struct mlxsw_sp_rif *rif)
5876 {
5877 	mlxsw_sp_router_rif_disable(mlxsw_sp, rif->rif_index);
5878 	mlxsw_sp_nexthop_rif_gone_sync(mlxsw_sp, rif);
5879 	mlxsw_sp_neigh_rif_gone_sync(mlxsw_sp, rif);
5880 }
5881 
5882 static bool
5883 mlxsw_sp_rif_should_config(struct mlxsw_sp_rif *rif, struct net_device *dev,
5884 			   unsigned long event)
5885 {
5886 	struct inet6_dev *inet6_dev;
5887 	bool addr_list_empty = true;
5888 	struct in_device *idev;
5889 
5890 	switch (event) {
5891 	case NETDEV_UP:
5892 		return rif == NULL;
5893 	case NETDEV_DOWN:
5894 		idev = __in_dev_get_rtnl(dev);
5895 		if (idev && idev->ifa_list)
5896 			addr_list_empty = false;
5897 
5898 		inet6_dev = __in6_dev_get(dev);
5899 		if (addr_list_empty && inet6_dev &&
5900 		    !list_empty(&inet6_dev->addr_list))
5901 			addr_list_empty = false;
5902 
5903 		if (rif && addr_list_empty &&
5904 		    !netif_is_l3_slave(rif->dev))
5905 			return true;
5906 		/* It is possible we already removed the RIF ourselves
5907 		 * if it was assigned to a netdev that is now a bridge
5908 		 * or LAG slave.
5909 		 */
5910 		return false;
5911 	}
5912 
5913 	return false;
5914 }
5915 
5916 static enum mlxsw_sp_rif_type
5917 mlxsw_sp_dev_rif_type(const struct mlxsw_sp *mlxsw_sp,
5918 		      const struct net_device *dev)
5919 {
5920 	enum mlxsw_sp_fid_type type;
5921 
5922 	if (mlxsw_sp_netdev_ipip_type(mlxsw_sp, dev, NULL))
5923 		return MLXSW_SP_RIF_TYPE_IPIP_LB;
5924 
5925 	/* Otherwise RIF type is derived from the type of the underlying FID. */
5926 	if (is_vlan_dev(dev) && netif_is_bridge_master(vlan_dev_real_dev(dev)))
5927 		type = MLXSW_SP_FID_TYPE_8021Q;
5928 	else if (netif_is_bridge_master(dev) && br_vlan_enabled(dev))
5929 		type = MLXSW_SP_FID_TYPE_8021Q;
5930 	else if (netif_is_bridge_master(dev))
5931 		type = MLXSW_SP_FID_TYPE_8021D;
5932 	else
5933 		type = MLXSW_SP_FID_TYPE_RFID;
5934 
5935 	return mlxsw_sp_fid_type_rif_type(mlxsw_sp, type);
5936 }
5937 
5938 static int mlxsw_sp_rif_index_alloc(struct mlxsw_sp *mlxsw_sp, u16 *p_rif_index)
5939 {
5940 	int i;
5941 
5942 	for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_RIFS); i++) {
5943 		if (!mlxsw_sp->router->rifs[i]) {
5944 			*p_rif_index = i;
5945 			return 0;
5946 		}
5947 	}
5948 
5949 	return -ENOBUFS;
5950 }
5951 
5952 static struct mlxsw_sp_rif *mlxsw_sp_rif_alloc(size_t rif_size, u16 rif_index,
5953 					       u16 vr_id,
5954 					       struct net_device *l3_dev)
5955 {
5956 	struct mlxsw_sp_rif *rif;
5957 
5958 	rif = kzalloc(rif_size, GFP_KERNEL);
5959 	if (!rif)
5960 		return NULL;
5961 
5962 	INIT_LIST_HEAD(&rif->nexthop_list);
5963 	INIT_LIST_HEAD(&rif->neigh_list);
5964 	ether_addr_copy(rif->addr, l3_dev->dev_addr);
5965 	rif->mtu = l3_dev->mtu;
5966 	rif->vr_id = vr_id;
5967 	rif->dev = l3_dev;
5968 	rif->rif_index = rif_index;
5969 
5970 	return rif;
5971 }
5972 
5973 struct mlxsw_sp_rif *mlxsw_sp_rif_by_index(const struct mlxsw_sp *mlxsw_sp,
5974 					   u16 rif_index)
5975 {
5976 	return mlxsw_sp->router->rifs[rif_index];
5977 }
5978 
5979 u16 mlxsw_sp_rif_index(const struct mlxsw_sp_rif *rif)
5980 {
5981 	return rif->rif_index;
5982 }
5983 
5984 u16 mlxsw_sp_ipip_lb_rif_index(const struct mlxsw_sp_rif_ipip_lb *lb_rif)
5985 {
5986 	return lb_rif->common.rif_index;
5987 }
5988 
5989 u16 mlxsw_sp_ipip_lb_ul_vr_id(const struct mlxsw_sp_rif_ipip_lb *lb_rif)
5990 {
5991 	return lb_rif->ul_vr_id;
5992 }
5993 
5994 int mlxsw_sp_rif_dev_ifindex(const struct mlxsw_sp_rif *rif)
5995 {
5996 	return rif->dev->ifindex;
5997 }
5998 
5999 const struct net_device *mlxsw_sp_rif_dev(const struct mlxsw_sp_rif *rif)
6000 {
6001 	return rif->dev;
6002 }
6003 
6004 static struct mlxsw_sp_rif *
6005 mlxsw_sp_rif_create(struct mlxsw_sp *mlxsw_sp,
6006 		    const struct mlxsw_sp_rif_params *params,
6007 		    struct netlink_ext_ack *extack)
6008 {
6009 	u32 tb_id = l3mdev_fib_table(params->dev);
6010 	const struct mlxsw_sp_rif_ops *ops;
6011 	struct mlxsw_sp_fid *fid = NULL;
6012 	enum mlxsw_sp_rif_type type;
6013 	struct mlxsw_sp_rif *rif;
6014 	struct mlxsw_sp_vr *vr;
6015 	u16 rif_index;
6016 	int err;
6017 
6018 	type = mlxsw_sp_dev_rif_type(mlxsw_sp, params->dev);
6019 	ops = mlxsw_sp->router->rif_ops_arr[type];
6020 
6021 	vr = mlxsw_sp_vr_get(mlxsw_sp, tb_id ? : RT_TABLE_MAIN, extack);
6022 	if (IS_ERR(vr))
6023 		return ERR_CAST(vr);
6024 	vr->rif_count++;
6025 
6026 	err = mlxsw_sp_rif_index_alloc(mlxsw_sp, &rif_index);
6027 	if (err) {
6028 		NL_SET_ERR_MSG(extack, "spectrum: Exceeded number of supported router interfaces");
6029 		goto err_rif_index_alloc;
6030 	}
6031 
6032 	rif = mlxsw_sp_rif_alloc(ops->rif_size, rif_index, vr->id, params->dev);
6033 	if (!rif) {
6034 		err = -ENOMEM;
6035 		goto err_rif_alloc;
6036 	}
6037 	rif->mlxsw_sp = mlxsw_sp;
6038 	rif->ops = ops;
6039 
6040 	if (ops->fid_get) {
6041 		fid = ops->fid_get(rif);
6042 		if (IS_ERR(fid)) {
6043 			err = PTR_ERR(fid);
6044 			goto err_fid_get;
6045 		}
6046 		rif->fid = fid;
6047 	}
6048 
6049 	if (ops->setup)
6050 		ops->setup(rif, params);
6051 
6052 	err = ops->configure(rif);
6053 	if (err)
6054 		goto err_configure;
6055 
6056 	err = mlxsw_sp_mr_rif_add(vr->mr4_table, rif);
6057 	if (err)
6058 		goto err_mr_rif_add;
6059 
6060 	mlxsw_sp_rif_counters_alloc(rif);
6061 	mlxsw_sp->router->rifs[rif_index] = rif;
6062 
6063 	return rif;
6064 
6065 err_mr_rif_add:
6066 	ops->deconfigure(rif);
6067 err_configure:
6068 	if (fid)
6069 		mlxsw_sp_fid_put(fid);
6070 err_fid_get:
6071 	kfree(rif);
6072 err_rif_alloc:
6073 err_rif_index_alloc:
6074 	vr->rif_count--;
6075 	mlxsw_sp_vr_put(mlxsw_sp, vr);
6076 	return ERR_PTR(err);
6077 }
6078 
6079 void mlxsw_sp_rif_destroy(struct mlxsw_sp_rif *rif)
6080 {
6081 	const struct mlxsw_sp_rif_ops *ops = rif->ops;
6082 	struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
6083 	struct mlxsw_sp_fid *fid = rif->fid;
6084 	struct mlxsw_sp_vr *vr;
6085 
6086 	mlxsw_sp_router_rif_gone_sync(mlxsw_sp, rif);
6087 	vr = &mlxsw_sp->router->vrs[rif->vr_id];
6088 
6089 	mlxsw_sp->router->rifs[rif->rif_index] = NULL;
6090 	mlxsw_sp_rif_counters_free(rif);
6091 	mlxsw_sp_mr_rif_del(vr->mr4_table, rif);
6092 	ops->deconfigure(rif);
6093 	if (fid)
6094 		/* Loopback RIFs are not associated with a FID. */
6095 		mlxsw_sp_fid_put(fid);
6096 	kfree(rif);
6097 	vr->rif_count--;
6098 	mlxsw_sp_vr_put(mlxsw_sp, vr);
6099 }
6100 
6101 static void
6102 mlxsw_sp_rif_subport_params_init(struct mlxsw_sp_rif_params *params,
6103 				 struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan)
6104 {
6105 	struct mlxsw_sp_port *mlxsw_sp_port = mlxsw_sp_port_vlan->mlxsw_sp_port;
6106 
6107 	params->vid = mlxsw_sp_port_vlan->vid;
6108 	params->lag = mlxsw_sp_port->lagged;
6109 	if (params->lag)
6110 		params->lag_id = mlxsw_sp_port->lag_id;
6111 	else
6112 		params->system_port = mlxsw_sp_port->local_port;
6113 }
6114 
6115 static int
6116 mlxsw_sp_port_vlan_router_join(struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan,
6117 			       struct net_device *l3_dev,
6118 			       struct netlink_ext_ack *extack)
6119 {
6120 	struct mlxsw_sp_port *mlxsw_sp_port = mlxsw_sp_port_vlan->mlxsw_sp_port;
6121 	struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
6122 	u16 vid = mlxsw_sp_port_vlan->vid;
6123 	struct mlxsw_sp_rif *rif;
6124 	struct mlxsw_sp_fid *fid;
6125 	int err;
6126 
6127 	rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, l3_dev);
6128 	if (!rif) {
6129 		struct mlxsw_sp_rif_params params = {
6130 			.dev = l3_dev,
6131 		};
6132 
6133 		mlxsw_sp_rif_subport_params_init(&params, mlxsw_sp_port_vlan);
6134 		rif = mlxsw_sp_rif_create(mlxsw_sp, &params, extack);
6135 		if (IS_ERR(rif))
6136 			return PTR_ERR(rif);
6137 	}
6138 
6139 	/* FID was already created, just take a reference */
6140 	fid = rif->ops->fid_get(rif);
6141 	err = mlxsw_sp_fid_port_vid_map(fid, mlxsw_sp_port, vid);
6142 	if (err)
6143 		goto err_fid_port_vid_map;
6144 
6145 	err = mlxsw_sp_port_vid_learning_set(mlxsw_sp_port, vid, false);
6146 	if (err)
6147 		goto err_port_vid_learning_set;
6148 
6149 	err = mlxsw_sp_port_vid_stp_set(mlxsw_sp_port, vid,
6150 					BR_STATE_FORWARDING);
6151 	if (err)
6152 		goto err_port_vid_stp_set;
6153 
6154 	mlxsw_sp_port_vlan->fid = fid;
6155 
6156 	return 0;
6157 
6158 err_port_vid_stp_set:
6159 	mlxsw_sp_port_vid_learning_set(mlxsw_sp_port, vid, true);
6160 err_port_vid_learning_set:
6161 	mlxsw_sp_fid_port_vid_unmap(fid, mlxsw_sp_port, vid);
6162 err_fid_port_vid_map:
6163 	mlxsw_sp_fid_put(fid);
6164 	return err;
6165 }
6166 
6167 void
6168 mlxsw_sp_port_vlan_router_leave(struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan)
6169 {
6170 	struct mlxsw_sp_port *mlxsw_sp_port = mlxsw_sp_port_vlan->mlxsw_sp_port;
6171 	struct mlxsw_sp_fid *fid = mlxsw_sp_port_vlan->fid;
6172 	u16 vid = mlxsw_sp_port_vlan->vid;
6173 
6174 	if (WARN_ON(mlxsw_sp_fid_type(fid) != MLXSW_SP_FID_TYPE_RFID))
6175 		return;
6176 
6177 	mlxsw_sp_port_vlan->fid = NULL;
6178 	mlxsw_sp_port_vid_stp_set(mlxsw_sp_port, vid, BR_STATE_BLOCKING);
6179 	mlxsw_sp_port_vid_learning_set(mlxsw_sp_port, vid, true);
6180 	mlxsw_sp_fid_port_vid_unmap(fid, mlxsw_sp_port, vid);
6181 	/* If router port holds the last reference on the rFID, then the
6182 	 * associated Sub-port RIF will be destroyed.
6183 	 */
6184 	mlxsw_sp_fid_put(fid);
6185 }
6186 
6187 static int mlxsw_sp_inetaddr_port_vlan_event(struct net_device *l3_dev,
6188 					     struct net_device *port_dev,
6189 					     unsigned long event, u16 vid,
6190 					     struct netlink_ext_ack *extack)
6191 {
6192 	struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(port_dev);
6193 	struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan;
6194 
6195 	mlxsw_sp_port_vlan = mlxsw_sp_port_vlan_find_by_vid(mlxsw_sp_port, vid);
6196 	if (WARN_ON(!mlxsw_sp_port_vlan))
6197 		return -EINVAL;
6198 
6199 	switch (event) {
6200 	case NETDEV_UP:
6201 		return mlxsw_sp_port_vlan_router_join(mlxsw_sp_port_vlan,
6202 						      l3_dev, extack);
6203 	case NETDEV_DOWN:
6204 		mlxsw_sp_port_vlan_router_leave(mlxsw_sp_port_vlan);
6205 		break;
6206 	}
6207 
6208 	return 0;
6209 }
6210 
6211 static int mlxsw_sp_inetaddr_port_event(struct net_device *port_dev,
6212 					unsigned long event,
6213 					struct netlink_ext_ack *extack)
6214 {
6215 	if (netif_is_bridge_port(port_dev) ||
6216 	    netif_is_lag_port(port_dev) ||
6217 	    netif_is_ovs_port(port_dev))
6218 		return 0;
6219 
6220 	return mlxsw_sp_inetaddr_port_vlan_event(port_dev, port_dev, event, 1,
6221 						 extack);
6222 }
6223 
6224 static int __mlxsw_sp_inetaddr_lag_event(struct net_device *l3_dev,
6225 					 struct net_device *lag_dev,
6226 					 unsigned long event, u16 vid,
6227 					 struct netlink_ext_ack *extack)
6228 {
6229 	struct net_device *port_dev;
6230 	struct list_head *iter;
6231 	int err;
6232 
6233 	netdev_for_each_lower_dev(lag_dev, port_dev, iter) {
6234 		if (mlxsw_sp_port_dev_check(port_dev)) {
6235 			err = mlxsw_sp_inetaddr_port_vlan_event(l3_dev,
6236 								port_dev,
6237 								event, vid,
6238 								extack);
6239 			if (err)
6240 				return err;
6241 		}
6242 	}
6243 
6244 	return 0;
6245 }
6246 
6247 static int mlxsw_sp_inetaddr_lag_event(struct net_device *lag_dev,
6248 				       unsigned long event,
6249 				       struct netlink_ext_ack *extack)
6250 {
6251 	if (netif_is_bridge_port(lag_dev))
6252 		return 0;
6253 
6254 	return __mlxsw_sp_inetaddr_lag_event(lag_dev, lag_dev, event, 1,
6255 					     extack);
6256 }
6257 
6258 static int mlxsw_sp_inetaddr_bridge_event(struct net_device *l3_dev,
6259 					  unsigned long event,
6260 					  struct netlink_ext_ack *extack)
6261 {
6262 	struct mlxsw_sp *mlxsw_sp = mlxsw_sp_lower_get(l3_dev);
6263 	struct mlxsw_sp_rif_params params = {
6264 		.dev = l3_dev,
6265 	};
6266 	struct mlxsw_sp_rif *rif;
6267 
6268 	switch (event) {
6269 	case NETDEV_UP:
6270 		rif = mlxsw_sp_rif_create(mlxsw_sp, &params, extack);
6271 		if (IS_ERR(rif))
6272 			return PTR_ERR(rif);
6273 		break;
6274 	case NETDEV_DOWN:
6275 		rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, l3_dev);
6276 		mlxsw_sp_rif_destroy(rif);
6277 		break;
6278 	}
6279 
6280 	return 0;
6281 }
6282 
6283 static int mlxsw_sp_inetaddr_vlan_event(struct net_device *vlan_dev,
6284 					unsigned long event,
6285 					struct netlink_ext_ack *extack)
6286 {
6287 	struct net_device *real_dev = vlan_dev_real_dev(vlan_dev);
6288 	u16 vid = vlan_dev_vlan_id(vlan_dev);
6289 
6290 	if (netif_is_bridge_port(vlan_dev))
6291 		return 0;
6292 
6293 	if (mlxsw_sp_port_dev_check(real_dev))
6294 		return mlxsw_sp_inetaddr_port_vlan_event(vlan_dev, real_dev,
6295 							 event, vid, extack);
6296 	else if (netif_is_lag_master(real_dev))
6297 		return __mlxsw_sp_inetaddr_lag_event(vlan_dev, real_dev, event,
6298 						     vid, extack);
6299 	else if (netif_is_bridge_master(real_dev) && br_vlan_enabled(real_dev))
6300 		return mlxsw_sp_inetaddr_bridge_event(vlan_dev, event, extack);
6301 
6302 	return 0;
6303 }
6304 
6305 static int __mlxsw_sp_inetaddr_event(struct net_device *dev,
6306 				     unsigned long event,
6307 				     struct netlink_ext_ack *extack)
6308 {
6309 	if (mlxsw_sp_port_dev_check(dev))
6310 		return mlxsw_sp_inetaddr_port_event(dev, event, extack);
6311 	else if (netif_is_lag_master(dev))
6312 		return mlxsw_sp_inetaddr_lag_event(dev, event, extack);
6313 	else if (netif_is_bridge_master(dev))
6314 		return mlxsw_sp_inetaddr_bridge_event(dev, event, extack);
6315 	else if (is_vlan_dev(dev))
6316 		return mlxsw_sp_inetaddr_vlan_event(dev, event, extack);
6317 	else
6318 		return 0;
6319 }
6320 
6321 int mlxsw_sp_inetaddr_event(struct notifier_block *unused,
6322 			    unsigned long event, void *ptr)
6323 {
6324 	struct in_ifaddr *ifa = (struct in_ifaddr *) ptr;
6325 	struct net_device *dev = ifa->ifa_dev->dev;
6326 	struct mlxsw_sp *mlxsw_sp;
6327 	struct mlxsw_sp_rif *rif;
6328 	int err = 0;
6329 
6330 	/* NETDEV_UP event is handled by mlxsw_sp_inetaddr_valid_event */
6331 	if (event == NETDEV_UP)
6332 		goto out;
6333 
6334 	mlxsw_sp = mlxsw_sp_lower_get(dev);
6335 	if (!mlxsw_sp)
6336 		goto out;
6337 
6338 	rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev);
6339 	if (!mlxsw_sp_rif_should_config(rif, dev, event))
6340 		goto out;
6341 
6342 	err = __mlxsw_sp_inetaddr_event(dev, event, NULL);
6343 out:
6344 	return notifier_from_errno(err);
6345 }
6346 
6347 int mlxsw_sp_inetaddr_valid_event(struct notifier_block *unused,
6348 				  unsigned long event, void *ptr)
6349 {
6350 	struct in_validator_info *ivi = (struct in_validator_info *) ptr;
6351 	struct net_device *dev = ivi->ivi_dev->dev;
6352 	struct mlxsw_sp *mlxsw_sp;
6353 	struct mlxsw_sp_rif *rif;
6354 	int err = 0;
6355 
6356 	mlxsw_sp = mlxsw_sp_lower_get(dev);
6357 	if (!mlxsw_sp)
6358 		goto out;
6359 
6360 	rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev);
6361 	if (!mlxsw_sp_rif_should_config(rif, dev, event))
6362 		goto out;
6363 
6364 	err = __mlxsw_sp_inetaddr_event(dev, event, ivi->extack);
6365 out:
6366 	return notifier_from_errno(err);
6367 }
6368 
6369 struct mlxsw_sp_inet6addr_event_work {
6370 	struct work_struct work;
6371 	struct net_device *dev;
6372 	unsigned long event;
6373 };
6374 
6375 static void mlxsw_sp_inet6addr_event_work(struct work_struct *work)
6376 {
6377 	struct mlxsw_sp_inet6addr_event_work *inet6addr_work =
6378 		container_of(work, struct mlxsw_sp_inet6addr_event_work, work);
6379 	struct net_device *dev = inet6addr_work->dev;
6380 	unsigned long event = inet6addr_work->event;
6381 	struct mlxsw_sp *mlxsw_sp;
6382 	struct mlxsw_sp_rif *rif;
6383 
6384 	rtnl_lock();
6385 	mlxsw_sp = mlxsw_sp_lower_get(dev);
6386 	if (!mlxsw_sp)
6387 		goto out;
6388 
6389 	rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev);
6390 	if (!mlxsw_sp_rif_should_config(rif, dev, event))
6391 		goto out;
6392 
6393 	__mlxsw_sp_inetaddr_event(dev, event, NULL);
6394 out:
6395 	rtnl_unlock();
6396 	dev_put(dev);
6397 	kfree(inet6addr_work);
6398 }
6399 
6400 /* Called with rcu_read_lock() */
6401 int mlxsw_sp_inet6addr_event(struct notifier_block *unused,
6402 			     unsigned long event, void *ptr)
6403 {
6404 	struct inet6_ifaddr *if6 = (struct inet6_ifaddr *) ptr;
6405 	struct mlxsw_sp_inet6addr_event_work *inet6addr_work;
6406 	struct net_device *dev = if6->idev->dev;
6407 
6408 	/* NETDEV_UP event is handled by mlxsw_sp_inet6addr_valid_event */
6409 	if (event == NETDEV_UP)
6410 		return NOTIFY_DONE;
6411 
6412 	if (!mlxsw_sp_port_dev_lower_find_rcu(dev))
6413 		return NOTIFY_DONE;
6414 
6415 	inet6addr_work = kzalloc(sizeof(*inet6addr_work), GFP_ATOMIC);
6416 	if (!inet6addr_work)
6417 		return NOTIFY_BAD;
6418 
6419 	INIT_WORK(&inet6addr_work->work, mlxsw_sp_inet6addr_event_work);
6420 	inet6addr_work->dev = dev;
6421 	inet6addr_work->event = event;
6422 	dev_hold(dev);
6423 	mlxsw_core_schedule_work(&inet6addr_work->work);
6424 
6425 	return NOTIFY_DONE;
6426 }
6427 
6428 int mlxsw_sp_inet6addr_valid_event(struct notifier_block *unused,
6429 				   unsigned long event, void *ptr)
6430 {
6431 	struct in6_validator_info *i6vi = (struct in6_validator_info *) ptr;
6432 	struct net_device *dev = i6vi->i6vi_dev->dev;
6433 	struct mlxsw_sp *mlxsw_sp;
6434 	struct mlxsw_sp_rif *rif;
6435 	int err = 0;
6436 
6437 	mlxsw_sp = mlxsw_sp_lower_get(dev);
6438 	if (!mlxsw_sp)
6439 		goto out;
6440 
6441 	rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev);
6442 	if (!mlxsw_sp_rif_should_config(rif, dev, event))
6443 		goto out;
6444 
6445 	err = __mlxsw_sp_inetaddr_event(dev, event, i6vi->extack);
6446 out:
6447 	return notifier_from_errno(err);
6448 }
6449 
6450 static int mlxsw_sp_rif_edit(struct mlxsw_sp *mlxsw_sp, u16 rif_index,
6451 			     const char *mac, int mtu)
6452 {
6453 	char ritr_pl[MLXSW_REG_RITR_LEN];
6454 	int err;
6455 
6456 	mlxsw_reg_ritr_rif_pack(ritr_pl, rif_index);
6457 	err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
6458 	if (err)
6459 		return err;
6460 
6461 	mlxsw_reg_ritr_mtu_set(ritr_pl, mtu);
6462 	mlxsw_reg_ritr_if_mac_memcpy_to(ritr_pl, mac);
6463 	mlxsw_reg_ritr_op_set(ritr_pl, MLXSW_REG_RITR_RIF_CREATE);
6464 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
6465 }
6466 
6467 int mlxsw_sp_netdevice_router_port_event(struct net_device *dev)
6468 {
6469 	struct mlxsw_sp *mlxsw_sp;
6470 	struct mlxsw_sp_rif *rif;
6471 	u16 fid_index;
6472 	int err;
6473 
6474 	mlxsw_sp = mlxsw_sp_lower_get(dev);
6475 	if (!mlxsw_sp)
6476 		return 0;
6477 
6478 	rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev);
6479 	if (!rif)
6480 		return 0;
6481 	fid_index = mlxsw_sp_fid_index(rif->fid);
6482 
6483 	err = mlxsw_sp_rif_fdb_op(mlxsw_sp, rif->addr, fid_index, false);
6484 	if (err)
6485 		return err;
6486 
6487 	err = mlxsw_sp_rif_edit(mlxsw_sp, rif->rif_index, dev->dev_addr,
6488 				dev->mtu);
6489 	if (err)
6490 		goto err_rif_edit;
6491 
6492 	err = mlxsw_sp_rif_fdb_op(mlxsw_sp, dev->dev_addr, fid_index, true);
6493 	if (err)
6494 		goto err_rif_fdb_op;
6495 
6496 	if (rif->mtu != dev->mtu) {
6497 		struct mlxsw_sp_vr *vr;
6498 
6499 		/* The RIF is relevant only to its mr_table instance, as unlike
6500 		 * unicast routing, in multicast routing a RIF cannot be shared
6501 		 * between several multicast routing tables.
6502 		 */
6503 		vr = &mlxsw_sp->router->vrs[rif->vr_id];
6504 		mlxsw_sp_mr_rif_mtu_update(vr->mr4_table, rif, dev->mtu);
6505 	}
6506 
6507 	ether_addr_copy(rif->addr, dev->dev_addr);
6508 	rif->mtu = dev->mtu;
6509 
6510 	netdev_dbg(dev, "Updated RIF=%d\n", rif->rif_index);
6511 
6512 	return 0;
6513 
6514 err_rif_fdb_op:
6515 	mlxsw_sp_rif_edit(mlxsw_sp, rif->rif_index, rif->addr, rif->mtu);
6516 err_rif_edit:
6517 	mlxsw_sp_rif_fdb_op(mlxsw_sp, rif->addr, fid_index, true);
6518 	return err;
6519 }
6520 
6521 static int mlxsw_sp_port_vrf_join(struct mlxsw_sp *mlxsw_sp,
6522 				  struct net_device *l3_dev,
6523 				  struct netlink_ext_ack *extack)
6524 {
6525 	struct mlxsw_sp_rif *rif;
6526 
6527 	/* If netdev is already associated with a RIF, then we need to
6528 	 * destroy it and create a new one with the new virtual router ID.
6529 	 */
6530 	rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, l3_dev);
6531 	if (rif)
6532 		__mlxsw_sp_inetaddr_event(l3_dev, NETDEV_DOWN, extack);
6533 
6534 	return __mlxsw_sp_inetaddr_event(l3_dev, NETDEV_UP, extack);
6535 }
6536 
6537 static void mlxsw_sp_port_vrf_leave(struct mlxsw_sp *mlxsw_sp,
6538 				    struct net_device *l3_dev)
6539 {
6540 	struct mlxsw_sp_rif *rif;
6541 
6542 	rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, l3_dev);
6543 	if (!rif)
6544 		return;
6545 	__mlxsw_sp_inetaddr_event(l3_dev, NETDEV_DOWN, NULL);
6546 }
6547 
6548 int mlxsw_sp_netdevice_vrf_event(struct net_device *l3_dev, unsigned long event,
6549 				 struct netdev_notifier_changeupper_info *info)
6550 {
6551 	struct mlxsw_sp *mlxsw_sp = mlxsw_sp_lower_get(l3_dev);
6552 	int err = 0;
6553 
6554 	if (!mlxsw_sp)
6555 		return 0;
6556 
6557 	switch (event) {
6558 	case NETDEV_PRECHANGEUPPER:
6559 		return 0;
6560 	case NETDEV_CHANGEUPPER:
6561 		if (info->linking) {
6562 			struct netlink_ext_ack *extack;
6563 
6564 			extack = netdev_notifier_info_to_extack(&info->info);
6565 			err = mlxsw_sp_port_vrf_join(mlxsw_sp, l3_dev, extack);
6566 		} else {
6567 			mlxsw_sp_port_vrf_leave(mlxsw_sp, l3_dev);
6568 		}
6569 		break;
6570 	}
6571 
6572 	return err;
6573 }
6574 
6575 static struct mlxsw_sp_rif_subport *
6576 mlxsw_sp_rif_subport_rif(const struct mlxsw_sp_rif *rif)
6577 {
6578 	return container_of(rif, struct mlxsw_sp_rif_subport, common);
6579 }
6580 
6581 static void mlxsw_sp_rif_subport_setup(struct mlxsw_sp_rif *rif,
6582 				       const struct mlxsw_sp_rif_params *params)
6583 {
6584 	struct mlxsw_sp_rif_subport *rif_subport;
6585 
6586 	rif_subport = mlxsw_sp_rif_subport_rif(rif);
6587 	rif_subport->vid = params->vid;
6588 	rif_subport->lag = params->lag;
6589 	if (params->lag)
6590 		rif_subport->lag_id = params->lag_id;
6591 	else
6592 		rif_subport->system_port = params->system_port;
6593 }
6594 
6595 static int mlxsw_sp_rif_subport_op(struct mlxsw_sp_rif *rif, bool enable)
6596 {
6597 	struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
6598 	struct mlxsw_sp_rif_subport *rif_subport;
6599 	char ritr_pl[MLXSW_REG_RITR_LEN];
6600 
6601 	rif_subport = mlxsw_sp_rif_subport_rif(rif);
6602 	mlxsw_reg_ritr_pack(ritr_pl, enable, MLXSW_REG_RITR_SP_IF,
6603 			    rif->rif_index, rif->vr_id, rif->dev->mtu);
6604 	mlxsw_reg_ritr_mac_pack(ritr_pl, rif->dev->dev_addr);
6605 	mlxsw_reg_ritr_sp_if_pack(ritr_pl, rif_subport->lag,
6606 				  rif_subport->lag ? rif_subport->lag_id :
6607 						     rif_subport->system_port,
6608 				  rif_subport->vid);
6609 
6610 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
6611 }
6612 
6613 static int mlxsw_sp_rif_subport_configure(struct mlxsw_sp_rif *rif)
6614 {
6615 	int err;
6616 
6617 	err = mlxsw_sp_rif_subport_op(rif, true);
6618 	if (err)
6619 		return err;
6620 
6621 	err = mlxsw_sp_rif_fdb_op(rif->mlxsw_sp, rif->dev->dev_addr,
6622 				  mlxsw_sp_fid_index(rif->fid), true);
6623 	if (err)
6624 		goto err_rif_fdb_op;
6625 
6626 	mlxsw_sp_fid_rif_set(rif->fid, rif);
6627 	return 0;
6628 
6629 err_rif_fdb_op:
6630 	mlxsw_sp_rif_subport_op(rif, false);
6631 	return err;
6632 }
6633 
6634 static void mlxsw_sp_rif_subport_deconfigure(struct mlxsw_sp_rif *rif)
6635 {
6636 	struct mlxsw_sp_fid *fid = rif->fid;
6637 
6638 	mlxsw_sp_fid_rif_set(fid, NULL);
6639 	mlxsw_sp_rif_fdb_op(rif->mlxsw_sp, rif->dev->dev_addr,
6640 			    mlxsw_sp_fid_index(fid), false);
6641 	mlxsw_sp_rif_subport_op(rif, false);
6642 }
6643 
6644 static struct mlxsw_sp_fid *
6645 mlxsw_sp_rif_subport_fid_get(struct mlxsw_sp_rif *rif)
6646 {
6647 	return mlxsw_sp_fid_rfid_get(rif->mlxsw_sp, rif->rif_index);
6648 }
6649 
6650 static const struct mlxsw_sp_rif_ops mlxsw_sp_rif_subport_ops = {
6651 	.type			= MLXSW_SP_RIF_TYPE_SUBPORT,
6652 	.rif_size		= sizeof(struct mlxsw_sp_rif_subport),
6653 	.setup			= mlxsw_sp_rif_subport_setup,
6654 	.configure		= mlxsw_sp_rif_subport_configure,
6655 	.deconfigure		= mlxsw_sp_rif_subport_deconfigure,
6656 	.fid_get		= mlxsw_sp_rif_subport_fid_get,
6657 };
6658 
6659 static int mlxsw_sp_rif_vlan_fid_op(struct mlxsw_sp_rif *rif,
6660 				    enum mlxsw_reg_ritr_if_type type,
6661 				    u16 vid_fid, bool enable)
6662 {
6663 	struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
6664 	char ritr_pl[MLXSW_REG_RITR_LEN];
6665 
6666 	mlxsw_reg_ritr_pack(ritr_pl, enable, type, rif->rif_index, rif->vr_id,
6667 			    rif->dev->mtu);
6668 	mlxsw_reg_ritr_mac_pack(ritr_pl, rif->dev->dev_addr);
6669 	mlxsw_reg_ritr_fid_set(ritr_pl, type, vid_fid);
6670 
6671 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
6672 }
6673 
6674 u8 mlxsw_sp_router_port(const struct mlxsw_sp *mlxsw_sp)
6675 {
6676 	return mlxsw_core_max_ports(mlxsw_sp->core) + 1;
6677 }
6678 
6679 static int mlxsw_sp_rif_vlan_configure(struct mlxsw_sp_rif *rif)
6680 {
6681 	struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
6682 	u16 vid = mlxsw_sp_fid_8021q_vid(rif->fid);
6683 	int err;
6684 
6685 	err = mlxsw_sp_rif_vlan_fid_op(rif, MLXSW_REG_RITR_VLAN_IF, vid, true);
6686 	if (err)
6687 		return err;
6688 
6689 	err = mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_MC,
6690 				     mlxsw_sp_router_port(mlxsw_sp), true);
6691 	if (err)
6692 		goto err_fid_mc_flood_set;
6693 
6694 	err = mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_BC,
6695 				     mlxsw_sp_router_port(mlxsw_sp), true);
6696 	if (err)
6697 		goto err_fid_bc_flood_set;
6698 
6699 	err = mlxsw_sp_rif_fdb_op(rif->mlxsw_sp, rif->dev->dev_addr,
6700 				  mlxsw_sp_fid_index(rif->fid), true);
6701 	if (err)
6702 		goto err_rif_fdb_op;
6703 
6704 	mlxsw_sp_fid_rif_set(rif->fid, rif);
6705 	return 0;
6706 
6707 err_rif_fdb_op:
6708 	mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_BC,
6709 			       mlxsw_sp_router_port(mlxsw_sp), false);
6710 err_fid_bc_flood_set:
6711 	mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_MC,
6712 			       mlxsw_sp_router_port(mlxsw_sp), false);
6713 err_fid_mc_flood_set:
6714 	mlxsw_sp_rif_vlan_fid_op(rif, MLXSW_REG_RITR_VLAN_IF, vid, false);
6715 	return err;
6716 }
6717 
6718 static void mlxsw_sp_rif_vlan_deconfigure(struct mlxsw_sp_rif *rif)
6719 {
6720 	u16 vid = mlxsw_sp_fid_8021q_vid(rif->fid);
6721 	struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
6722 	struct mlxsw_sp_fid *fid = rif->fid;
6723 
6724 	mlxsw_sp_fid_rif_set(fid, NULL);
6725 	mlxsw_sp_rif_fdb_op(rif->mlxsw_sp, rif->dev->dev_addr,
6726 			    mlxsw_sp_fid_index(fid), false);
6727 	mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_BC,
6728 			       mlxsw_sp_router_port(mlxsw_sp), false);
6729 	mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_MC,
6730 			       mlxsw_sp_router_port(mlxsw_sp), false);
6731 	mlxsw_sp_rif_vlan_fid_op(rif, MLXSW_REG_RITR_VLAN_IF, vid, false);
6732 }
6733 
6734 static struct mlxsw_sp_fid *
6735 mlxsw_sp_rif_vlan_fid_get(struct mlxsw_sp_rif *rif)
6736 {
6737 	u16 vid = is_vlan_dev(rif->dev) ? vlan_dev_vlan_id(rif->dev) : 1;
6738 
6739 	return mlxsw_sp_fid_8021q_get(rif->mlxsw_sp, vid);
6740 }
6741 
6742 static const struct mlxsw_sp_rif_ops mlxsw_sp_rif_vlan_ops = {
6743 	.type			= MLXSW_SP_RIF_TYPE_VLAN,
6744 	.rif_size		= sizeof(struct mlxsw_sp_rif),
6745 	.configure		= mlxsw_sp_rif_vlan_configure,
6746 	.deconfigure		= mlxsw_sp_rif_vlan_deconfigure,
6747 	.fid_get		= mlxsw_sp_rif_vlan_fid_get,
6748 };
6749 
6750 static int mlxsw_sp_rif_fid_configure(struct mlxsw_sp_rif *rif)
6751 {
6752 	struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
6753 	u16 fid_index = mlxsw_sp_fid_index(rif->fid);
6754 	int err;
6755 
6756 	err = mlxsw_sp_rif_vlan_fid_op(rif, MLXSW_REG_RITR_FID_IF, fid_index,
6757 				       true);
6758 	if (err)
6759 		return err;
6760 
6761 	err = mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_MC,
6762 				     mlxsw_sp_router_port(mlxsw_sp), true);
6763 	if (err)
6764 		goto err_fid_mc_flood_set;
6765 
6766 	err = mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_BC,
6767 				     mlxsw_sp_router_port(mlxsw_sp), true);
6768 	if (err)
6769 		goto err_fid_bc_flood_set;
6770 
6771 	err = mlxsw_sp_rif_fdb_op(rif->mlxsw_sp, rif->dev->dev_addr,
6772 				  mlxsw_sp_fid_index(rif->fid), true);
6773 	if (err)
6774 		goto err_rif_fdb_op;
6775 
6776 	mlxsw_sp_fid_rif_set(rif->fid, rif);
6777 	return 0;
6778 
6779 err_rif_fdb_op:
6780 	mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_BC,
6781 			       mlxsw_sp_router_port(mlxsw_sp), false);
6782 err_fid_bc_flood_set:
6783 	mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_MC,
6784 			       mlxsw_sp_router_port(mlxsw_sp), false);
6785 err_fid_mc_flood_set:
6786 	mlxsw_sp_rif_vlan_fid_op(rif, MLXSW_REG_RITR_FID_IF, fid_index, false);
6787 	return err;
6788 }
6789 
6790 static void mlxsw_sp_rif_fid_deconfigure(struct mlxsw_sp_rif *rif)
6791 {
6792 	u16 fid_index = mlxsw_sp_fid_index(rif->fid);
6793 	struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
6794 	struct mlxsw_sp_fid *fid = rif->fid;
6795 
6796 	mlxsw_sp_fid_rif_set(fid, NULL);
6797 	mlxsw_sp_rif_fdb_op(rif->mlxsw_sp, rif->dev->dev_addr,
6798 			    mlxsw_sp_fid_index(fid), false);
6799 	mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_BC,
6800 			       mlxsw_sp_router_port(mlxsw_sp), false);
6801 	mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_MC,
6802 			       mlxsw_sp_router_port(mlxsw_sp), false);
6803 	mlxsw_sp_rif_vlan_fid_op(rif, MLXSW_REG_RITR_FID_IF, fid_index, false);
6804 }
6805 
6806 static struct mlxsw_sp_fid *
6807 mlxsw_sp_rif_fid_fid_get(struct mlxsw_sp_rif *rif)
6808 {
6809 	return mlxsw_sp_fid_8021d_get(rif->mlxsw_sp, rif->dev->ifindex);
6810 }
6811 
6812 static const struct mlxsw_sp_rif_ops mlxsw_sp_rif_fid_ops = {
6813 	.type			= MLXSW_SP_RIF_TYPE_FID,
6814 	.rif_size		= sizeof(struct mlxsw_sp_rif),
6815 	.configure		= mlxsw_sp_rif_fid_configure,
6816 	.deconfigure		= mlxsw_sp_rif_fid_deconfigure,
6817 	.fid_get		= mlxsw_sp_rif_fid_fid_get,
6818 };
6819 
6820 static struct mlxsw_sp_rif_ipip_lb *
6821 mlxsw_sp_rif_ipip_lb_rif(struct mlxsw_sp_rif *rif)
6822 {
6823 	return container_of(rif, struct mlxsw_sp_rif_ipip_lb, common);
6824 }
6825 
6826 static void
6827 mlxsw_sp_rif_ipip_lb_setup(struct mlxsw_sp_rif *rif,
6828 			   const struct mlxsw_sp_rif_params *params)
6829 {
6830 	struct mlxsw_sp_rif_params_ipip_lb *params_lb;
6831 	struct mlxsw_sp_rif_ipip_lb *rif_lb;
6832 
6833 	params_lb = container_of(params, struct mlxsw_sp_rif_params_ipip_lb,
6834 				 common);
6835 	rif_lb = mlxsw_sp_rif_ipip_lb_rif(rif);
6836 	rif_lb->lb_config = params_lb->lb_config;
6837 }
6838 
6839 static int
6840 mlxsw_sp_rif_ipip_lb_op(struct mlxsw_sp_rif_ipip_lb *lb_rif,
6841 			struct mlxsw_sp_vr *ul_vr, bool enable)
6842 {
6843 	struct mlxsw_sp_rif_ipip_lb_config lb_cf = lb_rif->lb_config;
6844 	struct mlxsw_sp_rif *rif = &lb_rif->common;
6845 	struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
6846 	char ritr_pl[MLXSW_REG_RITR_LEN];
6847 	u32 saddr4;
6848 
6849 	switch (lb_cf.ul_protocol) {
6850 	case MLXSW_SP_L3_PROTO_IPV4:
6851 		saddr4 = be32_to_cpu(lb_cf.saddr.addr4);
6852 		mlxsw_reg_ritr_pack(ritr_pl, enable, MLXSW_REG_RITR_LOOPBACK_IF,
6853 				    rif->rif_index, rif->vr_id, rif->dev->mtu);
6854 		mlxsw_reg_ritr_loopback_ipip4_pack(ritr_pl, lb_cf.lb_ipipt,
6855 			    MLXSW_REG_RITR_LOOPBACK_IPIP_OPTIONS_GRE_KEY_PRESET,
6856 			    ul_vr->id, saddr4, lb_cf.okey);
6857 		break;
6858 
6859 	case MLXSW_SP_L3_PROTO_IPV6:
6860 		return -EAFNOSUPPORT;
6861 	}
6862 
6863 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
6864 }
6865 
6866 static int
6867 mlxsw_sp_rif_ipip_lb_configure(struct mlxsw_sp_rif *rif)
6868 {
6869 	struct mlxsw_sp_rif_ipip_lb *lb_rif = mlxsw_sp_rif_ipip_lb_rif(rif);
6870 	u32 ul_tb_id = mlxsw_sp_ipip_dev_ul_tb_id(rif->dev);
6871 	struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
6872 	struct mlxsw_sp_vr *ul_vr;
6873 	int err;
6874 
6875 	ul_vr = mlxsw_sp_vr_get(mlxsw_sp, ul_tb_id, NULL);
6876 	if (IS_ERR(ul_vr))
6877 		return PTR_ERR(ul_vr);
6878 
6879 	err = mlxsw_sp_rif_ipip_lb_op(lb_rif, ul_vr, true);
6880 	if (err)
6881 		goto err_loopback_op;
6882 
6883 	lb_rif->ul_vr_id = ul_vr->id;
6884 	++ul_vr->rif_count;
6885 	return 0;
6886 
6887 err_loopback_op:
6888 	mlxsw_sp_vr_put(mlxsw_sp, ul_vr);
6889 	return err;
6890 }
6891 
6892 static void mlxsw_sp_rif_ipip_lb_deconfigure(struct mlxsw_sp_rif *rif)
6893 {
6894 	struct mlxsw_sp_rif_ipip_lb *lb_rif = mlxsw_sp_rif_ipip_lb_rif(rif);
6895 	struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
6896 	struct mlxsw_sp_vr *ul_vr;
6897 
6898 	ul_vr = &mlxsw_sp->router->vrs[lb_rif->ul_vr_id];
6899 	mlxsw_sp_rif_ipip_lb_op(lb_rif, ul_vr, false);
6900 
6901 	--ul_vr->rif_count;
6902 	mlxsw_sp_vr_put(mlxsw_sp, ul_vr);
6903 }
6904 
6905 static const struct mlxsw_sp_rif_ops mlxsw_sp_rif_ipip_lb_ops = {
6906 	.type			= MLXSW_SP_RIF_TYPE_IPIP_LB,
6907 	.rif_size		= sizeof(struct mlxsw_sp_rif_ipip_lb),
6908 	.setup                  = mlxsw_sp_rif_ipip_lb_setup,
6909 	.configure		= mlxsw_sp_rif_ipip_lb_configure,
6910 	.deconfigure		= mlxsw_sp_rif_ipip_lb_deconfigure,
6911 };
6912 
6913 static const struct mlxsw_sp_rif_ops *mlxsw_sp_rif_ops_arr[] = {
6914 	[MLXSW_SP_RIF_TYPE_SUBPORT]	= &mlxsw_sp_rif_subport_ops,
6915 	[MLXSW_SP_RIF_TYPE_VLAN]	= &mlxsw_sp_rif_vlan_ops,
6916 	[MLXSW_SP_RIF_TYPE_FID]		= &mlxsw_sp_rif_fid_ops,
6917 	[MLXSW_SP_RIF_TYPE_IPIP_LB]	= &mlxsw_sp_rif_ipip_lb_ops,
6918 };
6919 
6920 static int mlxsw_sp_rifs_init(struct mlxsw_sp *mlxsw_sp)
6921 {
6922 	u64 max_rifs = MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_RIFS);
6923 
6924 	mlxsw_sp->router->rifs = kcalloc(max_rifs,
6925 					 sizeof(struct mlxsw_sp_rif *),
6926 					 GFP_KERNEL);
6927 	if (!mlxsw_sp->router->rifs)
6928 		return -ENOMEM;
6929 
6930 	mlxsw_sp->router->rif_ops_arr = mlxsw_sp_rif_ops_arr;
6931 
6932 	return 0;
6933 }
6934 
6935 static void mlxsw_sp_rifs_fini(struct mlxsw_sp *mlxsw_sp)
6936 {
6937 	int i;
6938 
6939 	for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_RIFS); i++)
6940 		WARN_ON_ONCE(mlxsw_sp->router->rifs[i]);
6941 
6942 	kfree(mlxsw_sp->router->rifs);
6943 }
6944 
6945 static int
6946 mlxsw_sp_ipip_config_tigcr(struct mlxsw_sp *mlxsw_sp)
6947 {
6948 	char tigcr_pl[MLXSW_REG_TIGCR_LEN];
6949 
6950 	mlxsw_reg_tigcr_pack(tigcr_pl, true, 0);
6951 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(tigcr), tigcr_pl);
6952 }
6953 
6954 static int mlxsw_sp_ipips_init(struct mlxsw_sp *mlxsw_sp)
6955 {
6956 	mlxsw_sp->router->ipip_ops_arr = mlxsw_sp_ipip_ops_arr;
6957 	INIT_LIST_HEAD(&mlxsw_sp->router->ipip_list);
6958 	return mlxsw_sp_ipip_config_tigcr(mlxsw_sp);
6959 }
6960 
6961 static void mlxsw_sp_ipips_fini(struct mlxsw_sp *mlxsw_sp)
6962 {
6963 	WARN_ON(!list_empty(&mlxsw_sp->router->ipip_list));
6964 }
6965 
6966 static void mlxsw_sp_router_fib_dump_flush(struct notifier_block *nb)
6967 {
6968 	struct mlxsw_sp_router *router;
6969 
6970 	/* Flush pending FIB notifications and then flush the device's
6971 	 * table before requesting another dump. The FIB notification
6972 	 * block is unregistered, so no need to take RTNL.
6973 	 */
6974 	mlxsw_core_flush_owq();
6975 	router = container_of(nb, struct mlxsw_sp_router, fib_nb);
6976 	mlxsw_sp_router_fib_flush(router->mlxsw_sp);
6977 }
6978 
6979 #ifdef CONFIG_IP_ROUTE_MULTIPATH
6980 static void mlxsw_sp_mp_hash_header_set(char *recr2_pl, int header)
6981 {
6982 	mlxsw_reg_recr2_outer_header_enables_set(recr2_pl, header, true);
6983 }
6984 
6985 static void mlxsw_sp_mp_hash_field_set(char *recr2_pl, int field)
6986 {
6987 	mlxsw_reg_recr2_outer_header_fields_enable_set(recr2_pl, field, true);
6988 }
6989 
6990 static void mlxsw_sp_mp4_hash_init(char *recr2_pl)
6991 {
6992 	bool only_l3 = !init_net.ipv4.sysctl_fib_multipath_hash_policy;
6993 
6994 	mlxsw_sp_mp_hash_header_set(recr2_pl,
6995 				    MLXSW_REG_RECR2_IPV4_EN_NOT_TCP_NOT_UDP);
6996 	mlxsw_sp_mp_hash_header_set(recr2_pl, MLXSW_REG_RECR2_IPV4_EN_TCP_UDP);
6997 	mlxsw_reg_recr2_ipv4_sip_enable(recr2_pl);
6998 	mlxsw_reg_recr2_ipv4_dip_enable(recr2_pl);
6999 	if (only_l3)
7000 		return;
7001 	mlxsw_sp_mp_hash_header_set(recr2_pl, MLXSW_REG_RECR2_TCP_UDP_EN_IPV4);
7002 	mlxsw_sp_mp_hash_field_set(recr2_pl, MLXSW_REG_RECR2_IPV4_PROTOCOL);
7003 	mlxsw_sp_mp_hash_field_set(recr2_pl, MLXSW_REG_RECR2_TCP_UDP_SPORT);
7004 	mlxsw_sp_mp_hash_field_set(recr2_pl, MLXSW_REG_RECR2_TCP_UDP_DPORT);
7005 }
7006 
7007 static void mlxsw_sp_mp6_hash_init(char *recr2_pl)
7008 {
7009 	mlxsw_sp_mp_hash_header_set(recr2_pl,
7010 				    MLXSW_REG_RECR2_IPV6_EN_NOT_TCP_NOT_UDP);
7011 	mlxsw_sp_mp_hash_header_set(recr2_pl, MLXSW_REG_RECR2_IPV6_EN_TCP_UDP);
7012 	mlxsw_reg_recr2_ipv6_sip_enable(recr2_pl);
7013 	mlxsw_reg_recr2_ipv6_dip_enable(recr2_pl);
7014 	mlxsw_sp_mp_hash_field_set(recr2_pl, MLXSW_REG_RECR2_IPV6_FLOW_LABEL);
7015 	mlxsw_sp_mp_hash_field_set(recr2_pl, MLXSW_REG_RECR2_IPV6_NEXT_HEADER);
7016 }
7017 
7018 static int mlxsw_sp_mp_hash_init(struct mlxsw_sp *mlxsw_sp)
7019 {
7020 	char recr2_pl[MLXSW_REG_RECR2_LEN];
7021 	u32 seed;
7022 
7023 	get_random_bytes(&seed, sizeof(seed));
7024 	mlxsw_reg_recr2_pack(recr2_pl, seed);
7025 	mlxsw_sp_mp4_hash_init(recr2_pl);
7026 	mlxsw_sp_mp6_hash_init(recr2_pl);
7027 
7028 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(recr2), recr2_pl);
7029 }
7030 #else
7031 static int mlxsw_sp_mp_hash_init(struct mlxsw_sp *mlxsw_sp)
7032 {
7033 	return 0;
7034 }
7035 #endif
7036 
7037 static int mlxsw_sp_dscp_init(struct mlxsw_sp *mlxsw_sp)
7038 {
7039 	char rdpm_pl[MLXSW_REG_RDPM_LEN];
7040 	unsigned int i;
7041 
7042 	MLXSW_REG_ZERO(rdpm, rdpm_pl);
7043 
7044 	/* HW is determining switch priority based on DSCP-bits, but the
7045 	 * kernel is still doing that based on the ToS. Since there's a
7046 	 * mismatch in bits we need to make sure to translate the right
7047 	 * value ToS would observe, skipping the 2 least-significant ECN bits.
7048 	 */
7049 	for (i = 0; i < MLXSW_REG_RDPM_DSCP_ENTRY_REC_MAX_COUNT; i++)
7050 		mlxsw_reg_rdpm_pack(rdpm_pl, i, rt_tos2priority(i << 2));
7051 
7052 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rdpm), rdpm_pl);
7053 }
7054 
7055 static int __mlxsw_sp_router_init(struct mlxsw_sp *mlxsw_sp)
7056 {
7057 	char rgcr_pl[MLXSW_REG_RGCR_LEN];
7058 	u64 max_rifs;
7059 	int err;
7060 
7061 	if (!MLXSW_CORE_RES_VALID(mlxsw_sp->core, MAX_RIFS))
7062 		return -EIO;
7063 	max_rifs = MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_RIFS);
7064 
7065 	mlxsw_reg_rgcr_pack(rgcr_pl, true, true);
7066 	mlxsw_reg_rgcr_max_router_interfaces_set(rgcr_pl, max_rifs);
7067 	mlxsw_reg_rgcr_usp_set(rgcr_pl, true);
7068 	err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rgcr), rgcr_pl);
7069 	if (err)
7070 		return err;
7071 	return 0;
7072 }
7073 
7074 static void __mlxsw_sp_router_fini(struct mlxsw_sp *mlxsw_sp)
7075 {
7076 	char rgcr_pl[MLXSW_REG_RGCR_LEN];
7077 
7078 	mlxsw_reg_rgcr_pack(rgcr_pl, false, false);
7079 	mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rgcr), rgcr_pl);
7080 }
7081 
7082 int mlxsw_sp_router_init(struct mlxsw_sp *mlxsw_sp)
7083 {
7084 	struct mlxsw_sp_router *router;
7085 	int err;
7086 
7087 	router = kzalloc(sizeof(*mlxsw_sp->router), GFP_KERNEL);
7088 	if (!router)
7089 		return -ENOMEM;
7090 	mlxsw_sp->router = router;
7091 	router->mlxsw_sp = mlxsw_sp;
7092 
7093 	INIT_LIST_HEAD(&mlxsw_sp->router->nexthop_neighs_list);
7094 	err = __mlxsw_sp_router_init(mlxsw_sp);
7095 	if (err)
7096 		goto err_router_init;
7097 
7098 	err = mlxsw_sp_rifs_init(mlxsw_sp);
7099 	if (err)
7100 		goto err_rifs_init;
7101 
7102 	err = mlxsw_sp_ipips_init(mlxsw_sp);
7103 	if (err)
7104 		goto err_ipips_init;
7105 
7106 	err = rhashtable_init(&mlxsw_sp->router->nexthop_ht,
7107 			      &mlxsw_sp_nexthop_ht_params);
7108 	if (err)
7109 		goto err_nexthop_ht_init;
7110 
7111 	err = rhashtable_init(&mlxsw_sp->router->nexthop_group_ht,
7112 			      &mlxsw_sp_nexthop_group_ht_params);
7113 	if (err)
7114 		goto err_nexthop_group_ht_init;
7115 
7116 	INIT_LIST_HEAD(&mlxsw_sp->router->nexthop_list);
7117 	err = mlxsw_sp_lpm_init(mlxsw_sp);
7118 	if (err)
7119 		goto err_lpm_init;
7120 
7121 	err = mlxsw_sp_mr_init(mlxsw_sp, &mlxsw_sp_mr_tcam_ops);
7122 	if (err)
7123 		goto err_mr_init;
7124 
7125 	err = mlxsw_sp_vrs_init(mlxsw_sp);
7126 	if (err)
7127 		goto err_vrs_init;
7128 
7129 	err = mlxsw_sp_neigh_init(mlxsw_sp);
7130 	if (err)
7131 		goto err_neigh_init;
7132 
7133 	mlxsw_sp->router->netevent_nb.notifier_call =
7134 		mlxsw_sp_router_netevent_event;
7135 	err = register_netevent_notifier(&mlxsw_sp->router->netevent_nb);
7136 	if (err)
7137 		goto err_register_netevent_notifier;
7138 
7139 	err = mlxsw_sp_mp_hash_init(mlxsw_sp);
7140 	if (err)
7141 		goto err_mp_hash_init;
7142 
7143 	err = mlxsw_sp_dscp_init(mlxsw_sp);
7144 	if (err)
7145 		goto err_dscp_init;
7146 
7147 	mlxsw_sp->router->fib_nb.notifier_call = mlxsw_sp_router_fib_event;
7148 	err = register_fib_notifier(&mlxsw_sp->router->fib_nb,
7149 				    mlxsw_sp_router_fib_dump_flush);
7150 	if (err)
7151 		goto err_register_fib_notifier;
7152 
7153 	return 0;
7154 
7155 err_register_fib_notifier:
7156 err_dscp_init:
7157 err_mp_hash_init:
7158 	unregister_netevent_notifier(&mlxsw_sp->router->netevent_nb);
7159 err_register_netevent_notifier:
7160 	mlxsw_sp_neigh_fini(mlxsw_sp);
7161 err_neigh_init:
7162 	mlxsw_sp_vrs_fini(mlxsw_sp);
7163 err_vrs_init:
7164 	mlxsw_sp_mr_fini(mlxsw_sp);
7165 err_mr_init:
7166 	mlxsw_sp_lpm_fini(mlxsw_sp);
7167 err_lpm_init:
7168 	rhashtable_destroy(&mlxsw_sp->router->nexthop_group_ht);
7169 err_nexthop_group_ht_init:
7170 	rhashtable_destroy(&mlxsw_sp->router->nexthop_ht);
7171 err_nexthop_ht_init:
7172 	mlxsw_sp_ipips_fini(mlxsw_sp);
7173 err_ipips_init:
7174 	mlxsw_sp_rifs_fini(mlxsw_sp);
7175 err_rifs_init:
7176 	__mlxsw_sp_router_fini(mlxsw_sp);
7177 err_router_init:
7178 	kfree(mlxsw_sp->router);
7179 	return err;
7180 }
7181 
7182 void mlxsw_sp_router_fini(struct mlxsw_sp *mlxsw_sp)
7183 {
7184 	unregister_fib_notifier(&mlxsw_sp->router->fib_nb);
7185 	unregister_netevent_notifier(&mlxsw_sp->router->netevent_nb);
7186 	mlxsw_sp_neigh_fini(mlxsw_sp);
7187 	mlxsw_sp_vrs_fini(mlxsw_sp);
7188 	mlxsw_sp_mr_fini(mlxsw_sp);
7189 	mlxsw_sp_lpm_fini(mlxsw_sp);
7190 	rhashtable_destroy(&mlxsw_sp->router->nexthop_group_ht);
7191 	rhashtable_destroy(&mlxsw_sp->router->nexthop_ht);
7192 	mlxsw_sp_ipips_fini(mlxsw_sp);
7193 	mlxsw_sp_rifs_fini(mlxsw_sp);
7194 	__mlxsw_sp_router_fini(mlxsw_sp);
7195 	kfree(mlxsw_sp->router);
7196 }
7197