xref: /openbmc/linux/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c (revision a0ae2562c6c4b2721d9fddba63b7286c13517d9f)
1 /*
2  * drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
3  * Copyright (c) 2016-2018 Mellanox Technologies. All rights reserved.
4  * Copyright (c) 2016 Jiri Pirko <jiri@mellanox.com>
5  * Copyright (c) 2016 Ido Schimmel <idosch@mellanox.com>
6  * Copyright (c) 2016 Yotam Gigi <yotamg@mellanox.com>
7  * Copyright (c) 2017-2018 Petr Machata <petrm@mellanox.com>
8  *
9  * Redistribution and use in source and binary forms, with or without
10  * modification, are permitted provided that the following conditions are met:
11  *
12  * 1. Redistributions of source code must retain the above copyright
13  *    notice, this list of conditions and the following disclaimer.
14  * 2. Redistributions in binary form must reproduce the above copyright
15  *    notice, this list of conditions and the following disclaimer in the
16  *    documentation and/or other materials provided with the distribution.
17  * 3. Neither the names of the copyright holders nor the names of its
18  *    contributors may be used to endorse or promote products derived from
19  *    this software without specific prior written permission.
20  *
21  * Alternatively, this software may be distributed under the terms of the
22  * GNU General Public License ("GPL") version 2 as published by the Free
23  * Software Foundation.
24  *
25  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
26  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
28  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
29  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
30  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
31  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
32  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
33  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
34  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
35  * POSSIBILITY OF SUCH DAMAGE.
36  */
37 
38 #include <linux/kernel.h>
39 #include <linux/types.h>
40 #include <linux/rhashtable.h>
41 #include <linux/bitops.h>
42 #include <linux/in6.h>
43 #include <linux/notifier.h>
44 #include <linux/inetdevice.h>
45 #include <linux/netdevice.h>
46 #include <linux/if_bridge.h>
47 #include <linux/socket.h>
48 #include <linux/route.h>
49 #include <linux/gcd.h>
50 #include <linux/random.h>
51 #include <linux/if_macvlan.h>
52 #include <net/netevent.h>
53 #include <net/neighbour.h>
54 #include <net/arp.h>
55 #include <net/ip_fib.h>
56 #include <net/ip6_fib.h>
57 #include <net/fib_rules.h>
58 #include <net/ip_tunnels.h>
59 #include <net/l3mdev.h>
60 #include <net/addrconf.h>
61 #include <net/ndisc.h>
62 #include <net/ipv6.h>
63 #include <net/fib_notifier.h>
64 #include <net/switchdev.h>
65 
66 #include "spectrum.h"
67 #include "core.h"
68 #include "reg.h"
69 #include "spectrum_cnt.h"
70 #include "spectrum_dpipe.h"
71 #include "spectrum_ipip.h"
72 #include "spectrum_mr.h"
73 #include "spectrum_mr_tcam.h"
74 #include "spectrum_router.h"
75 #include "spectrum_span.h"
76 
77 struct mlxsw_sp_fib;
78 struct mlxsw_sp_vr;
79 struct mlxsw_sp_lpm_tree;
80 struct mlxsw_sp_rif_ops;
81 
82 struct mlxsw_sp_router {
83 	struct mlxsw_sp *mlxsw_sp;
84 	struct mlxsw_sp_rif **rifs;
85 	struct mlxsw_sp_vr *vrs;
86 	struct rhashtable neigh_ht;
87 	struct rhashtable nexthop_group_ht;
88 	struct rhashtable nexthop_ht;
89 	struct list_head nexthop_list;
90 	struct {
91 		/* One tree for each protocol: IPv4 and IPv6 */
92 		struct mlxsw_sp_lpm_tree *proto_trees[2];
93 		struct mlxsw_sp_lpm_tree *trees;
94 		unsigned int tree_count;
95 	} lpm;
96 	struct {
97 		struct delayed_work dw;
98 		unsigned long interval;	/* ms */
99 	} neighs_update;
100 	struct delayed_work nexthop_probe_dw;
101 #define MLXSW_SP_UNRESOLVED_NH_PROBE_INTERVAL 5000 /* ms */
102 	struct list_head nexthop_neighs_list;
103 	struct list_head ipip_list;
104 	bool aborted;
105 	struct notifier_block fib_nb;
106 	struct notifier_block netevent_nb;
107 	const struct mlxsw_sp_rif_ops **rif_ops_arr;
108 	const struct mlxsw_sp_ipip_ops **ipip_ops_arr;
109 };
110 
111 struct mlxsw_sp_rif {
112 	struct list_head nexthop_list;
113 	struct list_head neigh_list;
114 	struct net_device *dev;
115 	struct mlxsw_sp_fid *fid;
116 	unsigned char addr[ETH_ALEN];
117 	int mtu;
118 	u16 rif_index;
119 	u16 vr_id;
120 	const struct mlxsw_sp_rif_ops *ops;
121 	struct mlxsw_sp *mlxsw_sp;
122 
123 	unsigned int counter_ingress;
124 	bool counter_ingress_valid;
125 	unsigned int counter_egress;
126 	bool counter_egress_valid;
127 };
128 
129 struct mlxsw_sp_rif_params {
130 	struct net_device *dev;
131 	union {
132 		u16 system_port;
133 		u16 lag_id;
134 	};
135 	u16 vid;
136 	bool lag;
137 };
138 
139 struct mlxsw_sp_rif_subport {
140 	struct mlxsw_sp_rif common;
141 	union {
142 		u16 system_port;
143 		u16 lag_id;
144 	};
145 	u16 vid;
146 	bool lag;
147 };
148 
149 struct mlxsw_sp_rif_ipip_lb {
150 	struct mlxsw_sp_rif common;
151 	struct mlxsw_sp_rif_ipip_lb_config lb_config;
152 	u16 ul_vr_id; /* Reserved for Spectrum-2. */
153 };
154 
155 struct mlxsw_sp_rif_params_ipip_lb {
156 	struct mlxsw_sp_rif_params common;
157 	struct mlxsw_sp_rif_ipip_lb_config lb_config;
158 };
159 
160 struct mlxsw_sp_rif_ops {
161 	enum mlxsw_sp_rif_type type;
162 	size_t rif_size;
163 
164 	void (*setup)(struct mlxsw_sp_rif *rif,
165 		      const struct mlxsw_sp_rif_params *params);
166 	int (*configure)(struct mlxsw_sp_rif *rif);
167 	void (*deconfigure)(struct mlxsw_sp_rif *rif);
168 	struct mlxsw_sp_fid * (*fid_get)(struct mlxsw_sp_rif *rif,
169 					 struct netlink_ext_ack *extack);
170 	void (*fdb_del)(struct mlxsw_sp_rif *rif, const char *mac);
171 };
172 
173 static void mlxsw_sp_lpm_tree_hold(struct mlxsw_sp_lpm_tree *lpm_tree);
174 static void mlxsw_sp_lpm_tree_put(struct mlxsw_sp *mlxsw_sp,
175 				  struct mlxsw_sp_lpm_tree *lpm_tree);
176 static int mlxsw_sp_vr_lpm_tree_bind(struct mlxsw_sp *mlxsw_sp,
177 				     const struct mlxsw_sp_fib *fib,
178 				     u8 tree_id);
179 static int mlxsw_sp_vr_lpm_tree_unbind(struct mlxsw_sp *mlxsw_sp,
180 				       const struct mlxsw_sp_fib *fib);
181 
182 static unsigned int *
183 mlxsw_sp_rif_p_counter_get(struct mlxsw_sp_rif *rif,
184 			   enum mlxsw_sp_rif_counter_dir dir)
185 {
186 	switch (dir) {
187 	case MLXSW_SP_RIF_COUNTER_EGRESS:
188 		return &rif->counter_egress;
189 	case MLXSW_SP_RIF_COUNTER_INGRESS:
190 		return &rif->counter_ingress;
191 	}
192 	return NULL;
193 }
194 
195 static bool
196 mlxsw_sp_rif_counter_valid_get(struct mlxsw_sp_rif *rif,
197 			       enum mlxsw_sp_rif_counter_dir dir)
198 {
199 	switch (dir) {
200 	case MLXSW_SP_RIF_COUNTER_EGRESS:
201 		return rif->counter_egress_valid;
202 	case MLXSW_SP_RIF_COUNTER_INGRESS:
203 		return rif->counter_ingress_valid;
204 	}
205 	return false;
206 }
207 
208 static void
209 mlxsw_sp_rif_counter_valid_set(struct mlxsw_sp_rif *rif,
210 			       enum mlxsw_sp_rif_counter_dir dir,
211 			       bool valid)
212 {
213 	switch (dir) {
214 	case MLXSW_SP_RIF_COUNTER_EGRESS:
215 		rif->counter_egress_valid = valid;
216 		break;
217 	case MLXSW_SP_RIF_COUNTER_INGRESS:
218 		rif->counter_ingress_valid = valid;
219 		break;
220 	}
221 }
222 
223 static int mlxsw_sp_rif_counter_edit(struct mlxsw_sp *mlxsw_sp, u16 rif_index,
224 				     unsigned int counter_index, bool enable,
225 				     enum mlxsw_sp_rif_counter_dir dir)
226 {
227 	char ritr_pl[MLXSW_REG_RITR_LEN];
228 	bool is_egress = false;
229 	int err;
230 
231 	if (dir == MLXSW_SP_RIF_COUNTER_EGRESS)
232 		is_egress = true;
233 	mlxsw_reg_ritr_rif_pack(ritr_pl, rif_index);
234 	err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
235 	if (err)
236 		return err;
237 
238 	mlxsw_reg_ritr_counter_pack(ritr_pl, counter_index, enable,
239 				    is_egress);
240 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
241 }
242 
243 int mlxsw_sp_rif_counter_value_get(struct mlxsw_sp *mlxsw_sp,
244 				   struct mlxsw_sp_rif *rif,
245 				   enum mlxsw_sp_rif_counter_dir dir, u64 *cnt)
246 {
247 	char ricnt_pl[MLXSW_REG_RICNT_LEN];
248 	unsigned int *p_counter_index;
249 	bool valid;
250 	int err;
251 
252 	valid = mlxsw_sp_rif_counter_valid_get(rif, dir);
253 	if (!valid)
254 		return -EINVAL;
255 
256 	p_counter_index = mlxsw_sp_rif_p_counter_get(rif, dir);
257 	if (!p_counter_index)
258 		return -EINVAL;
259 	mlxsw_reg_ricnt_pack(ricnt_pl, *p_counter_index,
260 			     MLXSW_REG_RICNT_OPCODE_NOP);
261 	err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(ricnt), ricnt_pl);
262 	if (err)
263 		return err;
264 	*cnt = mlxsw_reg_ricnt_good_unicast_packets_get(ricnt_pl);
265 	return 0;
266 }
267 
268 static int mlxsw_sp_rif_counter_clear(struct mlxsw_sp *mlxsw_sp,
269 				      unsigned int counter_index)
270 {
271 	char ricnt_pl[MLXSW_REG_RICNT_LEN];
272 
273 	mlxsw_reg_ricnt_pack(ricnt_pl, counter_index,
274 			     MLXSW_REG_RICNT_OPCODE_CLEAR);
275 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ricnt), ricnt_pl);
276 }
277 
278 int mlxsw_sp_rif_counter_alloc(struct mlxsw_sp *mlxsw_sp,
279 			       struct mlxsw_sp_rif *rif,
280 			       enum mlxsw_sp_rif_counter_dir dir)
281 {
282 	unsigned int *p_counter_index;
283 	int err;
284 
285 	p_counter_index = mlxsw_sp_rif_p_counter_get(rif, dir);
286 	if (!p_counter_index)
287 		return -EINVAL;
288 	err = mlxsw_sp_counter_alloc(mlxsw_sp, MLXSW_SP_COUNTER_SUB_POOL_RIF,
289 				     p_counter_index);
290 	if (err)
291 		return err;
292 
293 	err = mlxsw_sp_rif_counter_clear(mlxsw_sp, *p_counter_index);
294 	if (err)
295 		goto err_counter_clear;
296 
297 	err = mlxsw_sp_rif_counter_edit(mlxsw_sp, rif->rif_index,
298 					*p_counter_index, true, dir);
299 	if (err)
300 		goto err_counter_edit;
301 	mlxsw_sp_rif_counter_valid_set(rif, dir, true);
302 	return 0;
303 
304 err_counter_edit:
305 err_counter_clear:
306 	mlxsw_sp_counter_free(mlxsw_sp, MLXSW_SP_COUNTER_SUB_POOL_RIF,
307 			      *p_counter_index);
308 	return err;
309 }
310 
311 void mlxsw_sp_rif_counter_free(struct mlxsw_sp *mlxsw_sp,
312 			       struct mlxsw_sp_rif *rif,
313 			       enum mlxsw_sp_rif_counter_dir dir)
314 {
315 	unsigned int *p_counter_index;
316 
317 	if (!mlxsw_sp_rif_counter_valid_get(rif, dir))
318 		return;
319 
320 	p_counter_index = mlxsw_sp_rif_p_counter_get(rif, dir);
321 	if (WARN_ON(!p_counter_index))
322 		return;
323 	mlxsw_sp_rif_counter_edit(mlxsw_sp, rif->rif_index,
324 				  *p_counter_index, false, dir);
325 	mlxsw_sp_counter_free(mlxsw_sp, MLXSW_SP_COUNTER_SUB_POOL_RIF,
326 			      *p_counter_index);
327 	mlxsw_sp_rif_counter_valid_set(rif, dir, false);
328 }
329 
330 static void mlxsw_sp_rif_counters_alloc(struct mlxsw_sp_rif *rif)
331 {
332 	struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
333 	struct devlink *devlink;
334 
335 	devlink = priv_to_devlink(mlxsw_sp->core);
336 	if (!devlink_dpipe_table_counter_enabled(devlink,
337 						 MLXSW_SP_DPIPE_TABLE_NAME_ERIF))
338 		return;
339 	mlxsw_sp_rif_counter_alloc(mlxsw_sp, rif, MLXSW_SP_RIF_COUNTER_EGRESS);
340 }
341 
342 static void mlxsw_sp_rif_counters_free(struct mlxsw_sp_rif *rif)
343 {
344 	struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
345 
346 	mlxsw_sp_rif_counter_free(mlxsw_sp, rif, MLXSW_SP_RIF_COUNTER_EGRESS);
347 }
348 
349 #define MLXSW_SP_PREFIX_COUNT (sizeof(struct in6_addr) * BITS_PER_BYTE + 1)
350 
351 struct mlxsw_sp_prefix_usage {
352 	DECLARE_BITMAP(b, MLXSW_SP_PREFIX_COUNT);
353 };
354 
355 #define mlxsw_sp_prefix_usage_for_each(prefix, prefix_usage) \
356 	for_each_set_bit(prefix, (prefix_usage)->b, MLXSW_SP_PREFIX_COUNT)
357 
358 static bool
359 mlxsw_sp_prefix_usage_eq(struct mlxsw_sp_prefix_usage *prefix_usage1,
360 			 struct mlxsw_sp_prefix_usage *prefix_usage2)
361 {
362 	return !memcmp(prefix_usage1, prefix_usage2, sizeof(*prefix_usage1));
363 }
364 
365 static void
366 mlxsw_sp_prefix_usage_cpy(struct mlxsw_sp_prefix_usage *prefix_usage1,
367 			  struct mlxsw_sp_prefix_usage *prefix_usage2)
368 {
369 	memcpy(prefix_usage1, prefix_usage2, sizeof(*prefix_usage1));
370 }
371 
372 static void
373 mlxsw_sp_prefix_usage_set(struct mlxsw_sp_prefix_usage *prefix_usage,
374 			  unsigned char prefix_len)
375 {
376 	set_bit(prefix_len, prefix_usage->b);
377 }
378 
379 static void
380 mlxsw_sp_prefix_usage_clear(struct mlxsw_sp_prefix_usage *prefix_usage,
381 			    unsigned char prefix_len)
382 {
383 	clear_bit(prefix_len, prefix_usage->b);
384 }
385 
386 struct mlxsw_sp_fib_key {
387 	unsigned char addr[sizeof(struct in6_addr)];
388 	unsigned char prefix_len;
389 };
390 
391 enum mlxsw_sp_fib_entry_type {
392 	MLXSW_SP_FIB_ENTRY_TYPE_REMOTE,
393 	MLXSW_SP_FIB_ENTRY_TYPE_LOCAL,
394 	MLXSW_SP_FIB_ENTRY_TYPE_TRAP,
395 
396 	/* This is a special case of local delivery, where a packet should be
397 	 * decapsulated on reception. Note that there is no corresponding ENCAP,
398 	 * because that's a type of next hop, not of FIB entry. (There can be
399 	 * several next hops in a REMOTE entry, and some of them may be
400 	 * encapsulating entries.)
401 	 */
402 	MLXSW_SP_FIB_ENTRY_TYPE_IPIP_DECAP,
403 };
404 
405 struct mlxsw_sp_nexthop_group;
406 
407 struct mlxsw_sp_fib_node {
408 	struct list_head entry_list;
409 	struct list_head list;
410 	struct rhash_head ht_node;
411 	struct mlxsw_sp_fib *fib;
412 	struct mlxsw_sp_fib_key key;
413 };
414 
415 struct mlxsw_sp_fib_entry_decap {
416 	struct mlxsw_sp_ipip_entry *ipip_entry;
417 	u32 tunnel_index;
418 };
419 
420 struct mlxsw_sp_fib_entry {
421 	struct list_head list;
422 	struct mlxsw_sp_fib_node *fib_node;
423 	enum mlxsw_sp_fib_entry_type type;
424 	struct list_head nexthop_group_node;
425 	struct mlxsw_sp_nexthop_group *nh_group;
426 	struct mlxsw_sp_fib_entry_decap decap; /* Valid for decap entries. */
427 };
428 
429 struct mlxsw_sp_fib4_entry {
430 	struct mlxsw_sp_fib_entry common;
431 	u32 tb_id;
432 	u32 prio;
433 	u8 tos;
434 	u8 type;
435 };
436 
437 struct mlxsw_sp_fib6_entry {
438 	struct mlxsw_sp_fib_entry common;
439 	struct list_head rt6_list;
440 	unsigned int nrt6;
441 };
442 
443 struct mlxsw_sp_rt6 {
444 	struct list_head list;
445 	struct fib6_info *rt;
446 };
447 
448 struct mlxsw_sp_lpm_tree {
449 	u8 id; /* tree ID */
450 	unsigned int ref_count;
451 	enum mlxsw_sp_l3proto proto;
452 	unsigned long prefix_ref_count[MLXSW_SP_PREFIX_COUNT];
453 	struct mlxsw_sp_prefix_usage prefix_usage;
454 };
455 
456 struct mlxsw_sp_fib {
457 	struct rhashtable ht;
458 	struct list_head node_list;
459 	struct mlxsw_sp_vr *vr;
460 	struct mlxsw_sp_lpm_tree *lpm_tree;
461 	enum mlxsw_sp_l3proto proto;
462 };
463 
464 struct mlxsw_sp_vr {
465 	u16 id; /* virtual router ID */
466 	u32 tb_id; /* kernel fib table id */
467 	unsigned int rif_count;
468 	struct mlxsw_sp_fib *fib4;
469 	struct mlxsw_sp_fib *fib6;
470 	struct mlxsw_sp_mr_table *mr_table[MLXSW_SP_L3_PROTO_MAX];
471 };
472 
473 static const struct rhashtable_params mlxsw_sp_fib_ht_params;
474 
475 static struct mlxsw_sp_fib *mlxsw_sp_fib_create(struct mlxsw_sp *mlxsw_sp,
476 						struct mlxsw_sp_vr *vr,
477 						enum mlxsw_sp_l3proto proto)
478 {
479 	struct mlxsw_sp_lpm_tree *lpm_tree;
480 	struct mlxsw_sp_fib *fib;
481 	int err;
482 
483 	lpm_tree = mlxsw_sp->router->lpm.proto_trees[proto];
484 	fib = kzalloc(sizeof(*fib), GFP_KERNEL);
485 	if (!fib)
486 		return ERR_PTR(-ENOMEM);
487 	err = rhashtable_init(&fib->ht, &mlxsw_sp_fib_ht_params);
488 	if (err)
489 		goto err_rhashtable_init;
490 	INIT_LIST_HEAD(&fib->node_list);
491 	fib->proto = proto;
492 	fib->vr = vr;
493 	fib->lpm_tree = lpm_tree;
494 	mlxsw_sp_lpm_tree_hold(lpm_tree);
495 	err = mlxsw_sp_vr_lpm_tree_bind(mlxsw_sp, fib, lpm_tree->id);
496 	if (err)
497 		goto err_lpm_tree_bind;
498 	return fib;
499 
500 err_lpm_tree_bind:
501 	mlxsw_sp_lpm_tree_put(mlxsw_sp, lpm_tree);
502 err_rhashtable_init:
503 	kfree(fib);
504 	return ERR_PTR(err);
505 }
506 
507 static void mlxsw_sp_fib_destroy(struct mlxsw_sp *mlxsw_sp,
508 				 struct mlxsw_sp_fib *fib)
509 {
510 	mlxsw_sp_vr_lpm_tree_unbind(mlxsw_sp, fib);
511 	mlxsw_sp_lpm_tree_put(mlxsw_sp, fib->lpm_tree);
512 	WARN_ON(!list_empty(&fib->node_list));
513 	rhashtable_destroy(&fib->ht);
514 	kfree(fib);
515 }
516 
517 static struct mlxsw_sp_lpm_tree *
518 mlxsw_sp_lpm_tree_find_unused(struct mlxsw_sp *mlxsw_sp)
519 {
520 	static struct mlxsw_sp_lpm_tree *lpm_tree;
521 	int i;
522 
523 	for (i = 0; i < mlxsw_sp->router->lpm.tree_count; i++) {
524 		lpm_tree = &mlxsw_sp->router->lpm.trees[i];
525 		if (lpm_tree->ref_count == 0)
526 			return lpm_tree;
527 	}
528 	return NULL;
529 }
530 
531 static int mlxsw_sp_lpm_tree_alloc(struct mlxsw_sp *mlxsw_sp,
532 				   struct mlxsw_sp_lpm_tree *lpm_tree)
533 {
534 	char ralta_pl[MLXSW_REG_RALTA_LEN];
535 
536 	mlxsw_reg_ralta_pack(ralta_pl, true,
537 			     (enum mlxsw_reg_ralxx_protocol) lpm_tree->proto,
538 			     lpm_tree->id);
539 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralta), ralta_pl);
540 }
541 
542 static void mlxsw_sp_lpm_tree_free(struct mlxsw_sp *mlxsw_sp,
543 				   struct mlxsw_sp_lpm_tree *lpm_tree)
544 {
545 	char ralta_pl[MLXSW_REG_RALTA_LEN];
546 
547 	mlxsw_reg_ralta_pack(ralta_pl, false,
548 			     (enum mlxsw_reg_ralxx_protocol) lpm_tree->proto,
549 			     lpm_tree->id);
550 	mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralta), ralta_pl);
551 }
552 
553 static int
554 mlxsw_sp_lpm_tree_left_struct_set(struct mlxsw_sp *mlxsw_sp,
555 				  struct mlxsw_sp_prefix_usage *prefix_usage,
556 				  struct mlxsw_sp_lpm_tree *lpm_tree)
557 {
558 	char ralst_pl[MLXSW_REG_RALST_LEN];
559 	u8 root_bin = 0;
560 	u8 prefix;
561 	u8 last_prefix = MLXSW_REG_RALST_BIN_NO_CHILD;
562 
563 	mlxsw_sp_prefix_usage_for_each(prefix, prefix_usage)
564 		root_bin = prefix;
565 
566 	mlxsw_reg_ralst_pack(ralst_pl, root_bin, lpm_tree->id);
567 	mlxsw_sp_prefix_usage_for_each(prefix, prefix_usage) {
568 		if (prefix == 0)
569 			continue;
570 		mlxsw_reg_ralst_bin_pack(ralst_pl, prefix, last_prefix,
571 					 MLXSW_REG_RALST_BIN_NO_CHILD);
572 		last_prefix = prefix;
573 	}
574 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralst), ralst_pl);
575 }
576 
577 static struct mlxsw_sp_lpm_tree *
578 mlxsw_sp_lpm_tree_create(struct mlxsw_sp *mlxsw_sp,
579 			 struct mlxsw_sp_prefix_usage *prefix_usage,
580 			 enum mlxsw_sp_l3proto proto)
581 {
582 	struct mlxsw_sp_lpm_tree *lpm_tree;
583 	int err;
584 
585 	lpm_tree = mlxsw_sp_lpm_tree_find_unused(mlxsw_sp);
586 	if (!lpm_tree)
587 		return ERR_PTR(-EBUSY);
588 	lpm_tree->proto = proto;
589 	err = mlxsw_sp_lpm_tree_alloc(mlxsw_sp, lpm_tree);
590 	if (err)
591 		return ERR_PTR(err);
592 
593 	err = mlxsw_sp_lpm_tree_left_struct_set(mlxsw_sp, prefix_usage,
594 						lpm_tree);
595 	if (err)
596 		goto err_left_struct_set;
597 	memcpy(&lpm_tree->prefix_usage, prefix_usage,
598 	       sizeof(lpm_tree->prefix_usage));
599 	memset(&lpm_tree->prefix_ref_count, 0,
600 	       sizeof(lpm_tree->prefix_ref_count));
601 	lpm_tree->ref_count = 1;
602 	return lpm_tree;
603 
604 err_left_struct_set:
605 	mlxsw_sp_lpm_tree_free(mlxsw_sp, lpm_tree);
606 	return ERR_PTR(err);
607 }
608 
609 static void mlxsw_sp_lpm_tree_destroy(struct mlxsw_sp *mlxsw_sp,
610 				      struct mlxsw_sp_lpm_tree *lpm_tree)
611 {
612 	mlxsw_sp_lpm_tree_free(mlxsw_sp, lpm_tree);
613 }
614 
615 static struct mlxsw_sp_lpm_tree *
616 mlxsw_sp_lpm_tree_get(struct mlxsw_sp *mlxsw_sp,
617 		      struct mlxsw_sp_prefix_usage *prefix_usage,
618 		      enum mlxsw_sp_l3proto proto)
619 {
620 	struct mlxsw_sp_lpm_tree *lpm_tree;
621 	int i;
622 
623 	for (i = 0; i < mlxsw_sp->router->lpm.tree_count; i++) {
624 		lpm_tree = &mlxsw_sp->router->lpm.trees[i];
625 		if (lpm_tree->ref_count != 0 &&
626 		    lpm_tree->proto == proto &&
627 		    mlxsw_sp_prefix_usage_eq(&lpm_tree->prefix_usage,
628 					     prefix_usage)) {
629 			mlxsw_sp_lpm_tree_hold(lpm_tree);
630 			return lpm_tree;
631 		}
632 	}
633 	return mlxsw_sp_lpm_tree_create(mlxsw_sp, prefix_usage, proto);
634 }
635 
636 static void mlxsw_sp_lpm_tree_hold(struct mlxsw_sp_lpm_tree *lpm_tree)
637 {
638 	lpm_tree->ref_count++;
639 }
640 
641 static void mlxsw_sp_lpm_tree_put(struct mlxsw_sp *mlxsw_sp,
642 				  struct mlxsw_sp_lpm_tree *lpm_tree)
643 {
644 	if (--lpm_tree->ref_count == 0)
645 		mlxsw_sp_lpm_tree_destroy(mlxsw_sp, lpm_tree);
646 }
647 
648 #define MLXSW_SP_LPM_TREE_MIN 1 /* tree 0 is reserved */
649 
650 static int mlxsw_sp_lpm_init(struct mlxsw_sp *mlxsw_sp)
651 {
652 	struct mlxsw_sp_prefix_usage req_prefix_usage = {{ 0 } };
653 	struct mlxsw_sp_lpm_tree *lpm_tree;
654 	u64 max_trees;
655 	int err, i;
656 
657 	if (!MLXSW_CORE_RES_VALID(mlxsw_sp->core, MAX_LPM_TREES))
658 		return -EIO;
659 
660 	max_trees = MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_LPM_TREES);
661 	mlxsw_sp->router->lpm.tree_count = max_trees - MLXSW_SP_LPM_TREE_MIN;
662 	mlxsw_sp->router->lpm.trees = kcalloc(mlxsw_sp->router->lpm.tree_count,
663 					     sizeof(struct mlxsw_sp_lpm_tree),
664 					     GFP_KERNEL);
665 	if (!mlxsw_sp->router->lpm.trees)
666 		return -ENOMEM;
667 
668 	for (i = 0; i < mlxsw_sp->router->lpm.tree_count; i++) {
669 		lpm_tree = &mlxsw_sp->router->lpm.trees[i];
670 		lpm_tree->id = i + MLXSW_SP_LPM_TREE_MIN;
671 	}
672 
673 	lpm_tree = mlxsw_sp_lpm_tree_get(mlxsw_sp, &req_prefix_usage,
674 					 MLXSW_SP_L3_PROTO_IPV4);
675 	if (IS_ERR(lpm_tree)) {
676 		err = PTR_ERR(lpm_tree);
677 		goto err_ipv4_tree_get;
678 	}
679 	mlxsw_sp->router->lpm.proto_trees[MLXSW_SP_L3_PROTO_IPV4] = lpm_tree;
680 
681 	lpm_tree = mlxsw_sp_lpm_tree_get(mlxsw_sp, &req_prefix_usage,
682 					 MLXSW_SP_L3_PROTO_IPV6);
683 	if (IS_ERR(lpm_tree)) {
684 		err = PTR_ERR(lpm_tree);
685 		goto err_ipv6_tree_get;
686 	}
687 	mlxsw_sp->router->lpm.proto_trees[MLXSW_SP_L3_PROTO_IPV6] = lpm_tree;
688 
689 	return 0;
690 
691 err_ipv6_tree_get:
692 	lpm_tree = mlxsw_sp->router->lpm.proto_trees[MLXSW_SP_L3_PROTO_IPV4];
693 	mlxsw_sp_lpm_tree_put(mlxsw_sp, lpm_tree);
694 err_ipv4_tree_get:
695 	kfree(mlxsw_sp->router->lpm.trees);
696 	return err;
697 }
698 
699 static void mlxsw_sp_lpm_fini(struct mlxsw_sp *mlxsw_sp)
700 {
701 	struct mlxsw_sp_lpm_tree *lpm_tree;
702 
703 	lpm_tree = mlxsw_sp->router->lpm.proto_trees[MLXSW_SP_L3_PROTO_IPV6];
704 	mlxsw_sp_lpm_tree_put(mlxsw_sp, lpm_tree);
705 
706 	lpm_tree = mlxsw_sp->router->lpm.proto_trees[MLXSW_SP_L3_PROTO_IPV4];
707 	mlxsw_sp_lpm_tree_put(mlxsw_sp, lpm_tree);
708 
709 	kfree(mlxsw_sp->router->lpm.trees);
710 }
711 
712 static bool mlxsw_sp_vr_is_used(const struct mlxsw_sp_vr *vr)
713 {
714 	return !!vr->fib4 || !!vr->fib6 ||
715 	       !!vr->mr_table[MLXSW_SP_L3_PROTO_IPV4] ||
716 	       !!vr->mr_table[MLXSW_SP_L3_PROTO_IPV6];
717 }
718 
719 static struct mlxsw_sp_vr *mlxsw_sp_vr_find_unused(struct mlxsw_sp *mlxsw_sp)
720 {
721 	struct mlxsw_sp_vr *vr;
722 	int i;
723 
724 	for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_VRS); i++) {
725 		vr = &mlxsw_sp->router->vrs[i];
726 		if (!mlxsw_sp_vr_is_used(vr))
727 			return vr;
728 	}
729 	return NULL;
730 }
731 
732 static int mlxsw_sp_vr_lpm_tree_bind(struct mlxsw_sp *mlxsw_sp,
733 				     const struct mlxsw_sp_fib *fib, u8 tree_id)
734 {
735 	char raltb_pl[MLXSW_REG_RALTB_LEN];
736 
737 	mlxsw_reg_raltb_pack(raltb_pl, fib->vr->id,
738 			     (enum mlxsw_reg_ralxx_protocol) fib->proto,
739 			     tree_id);
740 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(raltb), raltb_pl);
741 }
742 
743 static int mlxsw_sp_vr_lpm_tree_unbind(struct mlxsw_sp *mlxsw_sp,
744 				       const struct mlxsw_sp_fib *fib)
745 {
746 	char raltb_pl[MLXSW_REG_RALTB_LEN];
747 
748 	/* Bind to tree 0 which is default */
749 	mlxsw_reg_raltb_pack(raltb_pl, fib->vr->id,
750 			     (enum mlxsw_reg_ralxx_protocol) fib->proto, 0);
751 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(raltb), raltb_pl);
752 }
753 
754 static u32 mlxsw_sp_fix_tb_id(u32 tb_id)
755 {
756 	/* For our purpose, squash main, default and local tables into one */
757 	if (tb_id == RT_TABLE_LOCAL || tb_id == RT_TABLE_DEFAULT)
758 		tb_id = RT_TABLE_MAIN;
759 	return tb_id;
760 }
761 
762 static struct mlxsw_sp_vr *mlxsw_sp_vr_find(struct mlxsw_sp *mlxsw_sp,
763 					    u32 tb_id)
764 {
765 	struct mlxsw_sp_vr *vr;
766 	int i;
767 
768 	tb_id = mlxsw_sp_fix_tb_id(tb_id);
769 
770 	for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_VRS); i++) {
771 		vr = &mlxsw_sp->router->vrs[i];
772 		if (mlxsw_sp_vr_is_used(vr) && vr->tb_id == tb_id)
773 			return vr;
774 	}
775 	return NULL;
776 }
777 
778 static struct mlxsw_sp_fib *mlxsw_sp_vr_fib(const struct mlxsw_sp_vr *vr,
779 					    enum mlxsw_sp_l3proto proto)
780 {
781 	switch (proto) {
782 	case MLXSW_SP_L3_PROTO_IPV4:
783 		return vr->fib4;
784 	case MLXSW_SP_L3_PROTO_IPV6:
785 		return vr->fib6;
786 	}
787 	return NULL;
788 }
789 
790 static struct mlxsw_sp_vr *mlxsw_sp_vr_create(struct mlxsw_sp *mlxsw_sp,
791 					      u32 tb_id,
792 					      struct netlink_ext_ack *extack)
793 {
794 	struct mlxsw_sp_mr_table *mr4_table, *mr6_table;
795 	struct mlxsw_sp_fib *fib4;
796 	struct mlxsw_sp_fib *fib6;
797 	struct mlxsw_sp_vr *vr;
798 	int err;
799 
800 	vr = mlxsw_sp_vr_find_unused(mlxsw_sp);
801 	if (!vr) {
802 		NL_SET_ERR_MSG_MOD(extack, "Exceeded number of supported virtual routers");
803 		return ERR_PTR(-EBUSY);
804 	}
805 	fib4 = mlxsw_sp_fib_create(mlxsw_sp, vr, MLXSW_SP_L3_PROTO_IPV4);
806 	if (IS_ERR(fib4))
807 		return ERR_CAST(fib4);
808 	fib6 = mlxsw_sp_fib_create(mlxsw_sp, vr, MLXSW_SP_L3_PROTO_IPV6);
809 	if (IS_ERR(fib6)) {
810 		err = PTR_ERR(fib6);
811 		goto err_fib6_create;
812 	}
813 	mr4_table = mlxsw_sp_mr_table_create(mlxsw_sp, vr->id,
814 					     MLXSW_SP_L3_PROTO_IPV4);
815 	if (IS_ERR(mr4_table)) {
816 		err = PTR_ERR(mr4_table);
817 		goto err_mr4_table_create;
818 	}
819 	mr6_table = mlxsw_sp_mr_table_create(mlxsw_sp, vr->id,
820 					     MLXSW_SP_L3_PROTO_IPV6);
821 	if (IS_ERR(mr6_table)) {
822 		err = PTR_ERR(mr6_table);
823 		goto err_mr6_table_create;
824 	}
825 
826 	vr->fib4 = fib4;
827 	vr->fib6 = fib6;
828 	vr->mr_table[MLXSW_SP_L3_PROTO_IPV4] = mr4_table;
829 	vr->mr_table[MLXSW_SP_L3_PROTO_IPV6] = mr6_table;
830 	vr->tb_id = tb_id;
831 	return vr;
832 
833 err_mr6_table_create:
834 	mlxsw_sp_mr_table_destroy(mr4_table);
835 err_mr4_table_create:
836 	mlxsw_sp_fib_destroy(mlxsw_sp, fib6);
837 err_fib6_create:
838 	mlxsw_sp_fib_destroy(mlxsw_sp, fib4);
839 	return ERR_PTR(err);
840 }
841 
842 static void mlxsw_sp_vr_destroy(struct mlxsw_sp *mlxsw_sp,
843 				struct mlxsw_sp_vr *vr)
844 {
845 	mlxsw_sp_mr_table_destroy(vr->mr_table[MLXSW_SP_L3_PROTO_IPV6]);
846 	vr->mr_table[MLXSW_SP_L3_PROTO_IPV6] = NULL;
847 	mlxsw_sp_mr_table_destroy(vr->mr_table[MLXSW_SP_L3_PROTO_IPV4]);
848 	vr->mr_table[MLXSW_SP_L3_PROTO_IPV4] = NULL;
849 	mlxsw_sp_fib_destroy(mlxsw_sp, vr->fib6);
850 	vr->fib6 = NULL;
851 	mlxsw_sp_fib_destroy(mlxsw_sp, vr->fib4);
852 	vr->fib4 = NULL;
853 }
854 
855 static struct mlxsw_sp_vr *mlxsw_sp_vr_get(struct mlxsw_sp *mlxsw_sp, u32 tb_id,
856 					   struct netlink_ext_ack *extack)
857 {
858 	struct mlxsw_sp_vr *vr;
859 
860 	tb_id = mlxsw_sp_fix_tb_id(tb_id);
861 	vr = mlxsw_sp_vr_find(mlxsw_sp, tb_id);
862 	if (!vr)
863 		vr = mlxsw_sp_vr_create(mlxsw_sp, tb_id, extack);
864 	return vr;
865 }
866 
867 static void mlxsw_sp_vr_put(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_vr *vr)
868 {
869 	if (!vr->rif_count && list_empty(&vr->fib4->node_list) &&
870 	    list_empty(&vr->fib6->node_list) &&
871 	    mlxsw_sp_mr_table_empty(vr->mr_table[MLXSW_SP_L3_PROTO_IPV4]) &&
872 	    mlxsw_sp_mr_table_empty(vr->mr_table[MLXSW_SP_L3_PROTO_IPV6]))
873 		mlxsw_sp_vr_destroy(mlxsw_sp, vr);
874 }
875 
876 static bool
877 mlxsw_sp_vr_lpm_tree_should_replace(struct mlxsw_sp_vr *vr,
878 				    enum mlxsw_sp_l3proto proto, u8 tree_id)
879 {
880 	struct mlxsw_sp_fib *fib = mlxsw_sp_vr_fib(vr, proto);
881 
882 	if (!mlxsw_sp_vr_is_used(vr))
883 		return false;
884 	if (fib->lpm_tree->id == tree_id)
885 		return true;
886 	return false;
887 }
888 
889 static int mlxsw_sp_vr_lpm_tree_replace(struct mlxsw_sp *mlxsw_sp,
890 					struct mlxsw_sp_fib *fib,
891 					struct mlxsw_sp_lpm_tree *new_tree)
892 {
893 	struct mlxsw_sp_lpm_tree *old_tree = fib->lpm_tree;
894 	int err;
895 
896 	fib->lpm_tree = new_tree;
897 	mlxsw_sp_lpm_tree_hold(new_tree);
898 	err = mlxsw_sp_vr_lpm_tree_bind(mlxsw_sp, fib, new_tree->id);
899 	if (err)
900 		goto err_tree_bind;
901 	mlxsw_sp_lpm_tree_put(mlxsw_sp, old_tree);
902 	return 0;
903 
904 err_tree_bind:
905 	mlxsw_sp_lpm_tree_put(mlxsw_sp, new_tree);
906 	fib->lpm_tree = old_tree;
907 	return err;
908 }
909 
910 static int mlxsw_sp_vrs_lpm_tree_replace(struct mlxsw_sp *mlxsw_sp,
911 					 struct mlxsw_sp_fib *fib,
912 					 struct mlxsw_sp_lpm_tree *new_tree)
913 {
914 	enum mlxsw_sp_l3proto proto = fib->proto;
915 	struct mlxsw_sp_lpm_tree *old_tree;
916 	u8 old_id, new_id = new_tree->id;
917 	struct mlxsw_sp_vr *vr;
918 	int i, err;
919 
920 	old_tree = mlxsw_sp->router->lpm.proto_trees[proto];
921 	old_id = old_tree->id;
922 
923 	for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_VRS); i++) {
924 		vr = &mlxsw_sp->router->vrs[i];
925 		if (!mlxsw_sp_vr_lpm_tree_should_replace(vr, proto, old_id))
926 			continue;
927 		err = mlxsw_sp_vr_lpm_tree_replace(mlxsw_sp,
928 						   mlxsw_sp_vr_fib(vr, proto),
929 						   new_tree);
930 		if (err)
931 			goto err_tree_replace;
932 	}
933 
934 	memcpy(new_tree->prefix_ref_count, old_tree->prefix_ref_count,
935 	       sizeof(new_tree->prefix_ref_count));
936 	mlxsw_sp->router->lpm.proto_trees[proto] = new_tree;
937 	mlxsw_sp_lpm_tree_put(mlxsw_sp, old_tree);
938 
939 	return 0;
940 
941 err_tree_replace:
942 	for (i--; i >= 0; i--) {
943 		if (!mlxsw_sp_vr_lpm_tree_should_replace(vr, proto, new_id))
944 			continue;
945 		mlxsw_sp_vr_lpm_tree_replace(mlxsw_sp,
946 					     mlxsw_sp_vr_fib(vr, proto),
947 					     old_tree);
948 	}
949 	return err;
950 }
951 
952 static int mlxsw_sp_vrs_init(struct mlxsw_sp *mlxsw_sp)
953 {
954 	struct mlxsw_sp_vr *vr;
955 	u64 max_vrs;
956 	int i;
957 
958 	if (!MLXSW_CORE_RES_VALID(mlxsw_sp->core, MAX_VRS))
959 		return -EIO;
960 
961 	max_vrs = MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_VRS);
962 	mlxsw_sp->router->vrs = kcalloc(max_vrs, sizeof(struct mlxsw_sp_vr),
963 					GFP_KERNEL);
964 	if (!mlxsw_sp->router->vrs)
965 		return -ENOMEM;
966 
967 	for (i = 0; i < max_vrs; i++) {
968 		vr = &mlxsw_sp->router->vrs[i];
969 		vr->id = i;
970 	}
971 
972 	return 0;
973 }
974 
975 static void mlxsw_sp_router_fib_flush(struct mlxsw_sp *mlxsw_sp);
976 
977 static void mlxsw_sp_vrs_fini(struct mlxsw_sp *mlxsw_sp)
978 {
979 	/* At this stage we're guaranteed not to have new incoming
980 	 * FIB notifications and the work queue is free from FIBs
981 	 * sitting on top of mlxsw netdevs. However, we can still
982 	 * have other FIBs queued. Flush the queue before flushing
983 	 * the device's tables. No need for locks, as we're the only
984 	 * writer.
985 	 */
986 	mlxsw_core_flush_owq();
987 	mlxsw_sp_router_fib_flush(mlxsw_sp);
988 	kfree(mlxsw_sp->router->vrs);
989 }
990 
991 static struct net_device *
992 __mlxsw_sp_ipip_netdev_ul_dev_get(const struct net_device *ol_dev)
993 {
994 	struct ip_tunnel *tun = netdev_priv(ol_dev);
995 	struct net *net = dev_net(ol_dev);
996 
997 	return __dev_get_by_index(net, tun->parms.link);
998 }
999 
1000 u32 mlxsw_sp_ipip_dev_ul_tb_id(const struct net_device *ol_dev)
1001 {
1002 	struct net_device *d = __mlxsw_sp_ipip_netdev_ul_dev_get(ol_dev);
1003 
1004 	if (d)
1005 		return l3mdev_fib_table(d) ? : RT_TABLE_MAIN;
1006 	else
1007 		return l3mdev_fib_table(ol_dev) ? : RT_TABLE_MAIN;
1008 }
1009 
1010 static struct mlxsw_sp_rif *
1011 mlxsw_sp_rif_create(struct mlxsw_sp *mlxsw_sp,
1012 		    const struct mlxsw_sp_rif_params *params,
1013 		    struct netlink_ext_ack *extack);
1014 
1015 static struct mlxsw_sp_rif_ipip_lb *
1016 mlxsw_sp_ipip_ol_ipip_lb_create(struct mlxsw_sp *mlxsw_sp,
1017 				enum mlxsw_sp_ipip_type ipipt,
1018 				struct net_device *ol_dev,
1019 				struct netlink_ext_ack *extack)
1020 {
1021 	struct mlxsw_sp_rif_params_ipip_lb lb_params;
1022 	const struct mlxsw_sp_ipip_ops *ipip_ops;
1023 	struct mlxsw_sp_rif *rif;
1024 
1025 	ipip_ops = mlxsw_sp->router->ipip_ops_arr[ipipt];
1026 	lb_params = (struct mlxsw_sp_rif_params_ipip_lb) {
1027 		.common.dev = ol_dev,
1028 		.common.lag = false,
1029 		.lb_config = ipip_ops->ol_loopback_config(mlxsw_sp, ol_dev),
1030 	};
1031 
1032 	rif = mlxsw_sp_rif_create(mlxsw_sp, &lb_params.common, extack);
1033 	if (IS_ERR(rif))
1034 		return ERR_CAST(rif);
1035 	return container_of(rif, struct mlxsw_sp_rif_ipip_lb, common);
1036 }
1037 
1038 static struct mlxsw_sp_ipip_entry *
1039 mlxsw_sp_ipip_entry_alloc(struct mlxsw_sp *mlxsw_sp,
1040 			  enum mlxsw_sp_ipip_type ipipt,
1041 			  struct net_device *ol_dev)
1042 {
1043 	const struct mlxsw_sp_ipip_ops *ipip_ops;
1044 	struct mlxsw_sp_ipip_entry *ipip_entry;
1045 	struct mlxsw_sp_ipip_entry *ret = NULL;
1046 
1047 	ipip_ops = mlxsw_sp->router->ipip_ops_arr[ipipt];
1048 	ipip_entry = kzalloc(sizeof(*ipip_entry), GFP_KERNEL);
1049 	if (!ipip_entry)
1050 		return ERR_PTR(-ENOMEM);
1051 
1052 	ipip_entry->ol_lb = mlxsw_sp_ipip_ol_ipip_lb_create(mlxsw_sp, ipipt,
1053 							    ol_dev, NULL);
1054 	if (IS_ERR(ipip_entry->ol_lb)) {
1055 		ret = ERR_CAST(ipip_entry->ol_lb);
1056 		goto err_ol_ipip_lb_create;
1057 	}
1058 
1059 	ipip_entry->ipipt = ipipt;
1060 	ipip_entry->ol_dev = ol_dev;
1061 
1062 	switch (ipip_ops->ul_proto) {
1063 	case MLXSW_SP_L3_PROTO_IPV4:
1064 		ipip_entry->parms4 = mlxsw_sp_ipip_netdev_parms4(ol_dev);
1065 		break;
1066 	case MLXSW_SP_L3_PROTO_IPV6:
1067 		WARN_ON(1);
1068 		break;
1069 	}
1070 
1071 	return ipip_entry;
1072 
1073 err_ol_ipip_lb_create:
1074 	kfree(ipip_entry);
1075 	return ret;
1076 }
1077 
1078 static void
1079 mlxsw_sp_ipip_entry_dealloc(struct mlxsw_sp_ipip_entry *ipip_entry)
1080 {
1081 	mlxsw_sp_rif_destroy(&ipip_entry->ol_lb->common);
1082 	kfree(ipip_entry);
1083 }
1084 
1085 static bool
1086 mlxsw_sp_ipip_entry_saddr_matches(struct mlxsw_sp *mlxsw_sp,
1087 				  const enum mlxsw_sp_l3proto ul_proto,
1088 				  union mlxsw_sp_l3addr saddr,
1089 				  u32 ul_tb_id,
1090 				  struct mlxsw_sp_ipip_entry *ipip_entry)
1091 {
1092 	u32 tun_ul_tb_id = mlxsw_sp_ipip_dev_ul_tb_id(ipip_entry->ol_dev);
1093 	enum mlxsw_sp_ipip_type ipipt = ipip_entry->ipipt;
1094 	union mlxsw_sp_l3addr tun_saddr;
1095 
1096 	if (mlxsw_sp->router->ipip_ops_arr[ipipt]->ul_proto != ul_proto)
1097 		return false;
1098 
1099 	tun_saddr = mlxsw_sp_ipip_netdev_saddr(ul_proto, ipip_entry->ol_dev);
1100 	return tun_ul_tb_id == ul_tb_id &&
1101 	       mlxsw_sp_l3addr_eq(&tun_saddr, &saddr);
1102 }
1103 
1104 static int
1105 mlxsw_sp_fib_entry_decap_init(struct mlxsw_sp *mlxsw_sp,
1106 			      struct mlxsw_sp_fib_entry *fib_entry,
1107 			      struct mlxsw_sp_ipip_entry *ipip_entry)
1108 {
1109 	u32 tunnel_index;
1110 	int err;
1111 
1112 	err = mlxsw_sp_kvdl_alloc(mlxsw_sp, MLXSW_SP_KVDL_ENTRY_TYPE_ADJ,
1113 				  1, &tunnel_index);
1114 	if (err)
1115 		return err;
1116 
1117 	ipip_entry->decap_fib_entry = fib_entry;
1118 	fib_entry->decap.ipip_entry = ipip_entry;
1119 	fib_entry->decap.tunnel_index = tunnel_index;
1120 	return 0;
1121 }
1122 
1123 static void mlxsw_sp_fib_entry_decap_fini(struct mlxsw_sp *mlxsw_sp,
1124 					  struct mlxsw_sp_fib_entry *fib_entry)
1125 {
1126 	/* Unlink this node from the IPIP entry that it's the decap entry of. */
1127 	fib_entry->decap.ipip_entry->decap_fib_entry = NULL;
1128 	fib_entry->decap.ipip_entry = NULL;
1129 	mlxsw_sp_kvdl_free(mlxsw_sp, MLXSW_SP_KVDL_ENTRY_TYPE_ADJ,
1130 			   1, fib_entry->decap.tunnel_index);
1131 }
1132 
1133 static struct mlxsw_sp_fib_node *
1134 mlxsw_sp_fib_node_lookup(struct mlxsw_sp_fib *fib, const void *addr,
1135 			 size_t addr_len, unsigned char prefix_len);
1136 static int mlxsw_sp_fib_entry_update(struct mlxsw_sp *mlxsw_sp,
1137 				     struct mlxsw_sp_fib_entry *fib_entry);
1138 
1139 static void
1140 mlxsw_sp_ipip_entry_demote_decap(struct mlxsw_sp *mlxsw_sp,
1141 				 struct mlxsw_sp_ipip_entry *ipip_entry)
1142 {
1143 	struct mlxsw_sp_fib_entry *fib_entry = ipip_entry->decap_fib_entry;
1144 
1145 	mlxsw_sp_fib_entry_decap_fini(mlxsw_sp, fib_entry);
1146 	fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_TRAP;
1147 
1148 	mlxsw_sp_fib_entry_update(mlxsw_sp, fib_entry);
1149 }
1150 
1151 static void
1152 mlxsw_sp_ipip_entry_promote_decap(struct mlxsw_sp *mlxsw_sp,
1153 				  struct mlxsw_sp_ipip_entry *ipip_entry,
1154 				  struct mlxsw_sp_fib_entry *decap_fib_entry)
1155 {
1156 	if (mlxsw_sp_fib_entry_decap_init(mlxsw_sp, decap_fib_entry,
1157 					  ipip_entry))
1158 		return;
1159 	decap_fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_IPIP_DECAP;
1160 
1161 	if (mlxsw_sp_fib_entry_update(mlxsw_sp, decap_fib_entry))
1162 		mlxsw_sp_ipip_entry_demote_decap(mlxsw_sp, ipip_entry);
1163 }
1164 
1165 /* Given an IPIP entry, find the corresponding decap route. */
1166 static struct mlxsw_sp_fib_entry *
1167 mlxsw_sp_ipip_entry_find_decap(struct mlxsw_sp *mlxsw_sp,
1168 			       struct mlxsw_sp_ipip_entry *ipip_entry)
1169 {
1170 	static struct mlxsw_sp_fib_node *fib_node;
1171 	const struct mlxsw_sp_ipip_ops *ipip_ops;
1172 	struct mlxsw_sp_fib_entry *fib_entry;
1173 	unsigned char saddr_prefix_len;
1174 	union mlxsw_sp_l3addr saddr;
1175 	struct mlxsw_sp_fib *ul_fib;
1176 	struct mlxsw_sp_vr *ul_vr;
1177 	const void *saddrp;
1178 	size_t saddr_len;
1179 	u32 ul_tb_id;
1180 	u32 saddr4;
1181 
1182 	ipip_ops = mlxsw_sp->router->ipip_ops_arr[ipip_entry->ipipt];
1183 
1184 	ul_tb_id = mlxsw_sp_ipip_dev_ul_tb_id(ipip_entry->ol_dev);
1185 	ul_vr = mlxsw_sp_vr_find(mlxsw_sp, ul_tb_id);
1186 	if (!ul_vr)
1187 		return NULL;
1188 
1189 	ul_fib = mlxsw_sp_vr_fib(ul_vr, ipip_ops->ul_proto);
1190 	saddr = mlxsw_sp_ipip_netdev_saddr(ipip_ops->ul_proto,
1191 					   ipip_entry->ol_dev);
1192 
1193 	switch (ipip_ops->ul_proto) {
1194 	case MLXSW_SP_L3_PROTO_IPV4:
1195 		saddr4 = be32_to_cpu(saddr.addr4);
1196 		saddrp = &saddr4;
1197 		saddr_len = 4;
1198 		saddr_prefix_len = 32;
1199 		break;
1200 	case MLXSW_SP_L3_PROTO_IPV6:
1201 		WARN_ON(1);
1202 		return NULL;
1203 	}
1204 
1205 	fib_node = mlxsw_sp_fib_node_lookup(ul_fib, saddrp, saddr_len,
1206 					    saddr_prefix_len);
1207 	if (!fib_node || list_empty(&fib_node->entry_list))
1208 		return NULL;
1209 
1210 	fib_entry = list_first_entry(&fib_node->entry_list,
1211 				     struct mlxsw_sp_fib_entry, list);
1212 	if (fib_entry->type != MLXSW_SP_FIB_ENTRY_TYPE_TRAP)
1213 		return NULL;
1214 
1215 	return fib_entry;
1216 }
1217 
1218 static struct mlxsw_sp_ipip_entry *
1219 mlxsw_sp_ipip_entry_create(struct mlxsw_sp *mlxsw_sp,
1220 			   enum mlxsw_sp_ipip_type ipipt,
1221 			   struct net_device *ol_dev)
1222 {
1223 	struct mlxsw_sp_ipip_entry *ipip_entry;
1224 
1225 	ipip_entry = mlxsw_sp_ipip_entry_alloc(mlxsw_sp, ipipt, ol_dev);
1226 	if (IS_ERR(ipip_entry))
1227 		return ipip_entry;
1228 
1229 	list_add_tail(&ipip_entry->ipip_list_node,
1230 		      &mlxsw_sp->router->ipip_list);
1231 
1232 	return ipip_entry;
1233 }
1234 
1235 static void
1236 mlxsw_sp_ipip_entry_destroy(struct mlxsw_sp *mlxsw_sp,
1237 			    struct mlxsw_sp_ipip_entry *ipip_entry)
1238 {
1239 	list_del(&ipip_entry->ipip_list_node);
1240 	mlxsw_sp_ipip_entry_dealloc(ipip_entry);
1241 }
1242 
1243 static bool
1244 mlxsw_sp_ipip_entry_matches_decap(struct mlxsw_sp *mlxsw_sp,
1245 				  const struct net_device *ul_dev,
1246 				  enum mlxsw_sp_l3proto ul_proto,
1247 				  union mlxsw_sp_l3addr ul_dip,
1248 				  struct mlxsw_sp_ipip_entry *ipip_entry)
1249 {
1250 	u32 ul_tb_id = l3mdev_fib_table(ul_dev) ? : RT_TABLE_MAIN;
1251 	enum mlxsw_sp_ipip_type ipipt = ipip_entry->ipipt;
1252 	struct net_device *ipip_ul_dev;
1253 
1254 	if (mlxsw_sp->router->ipip_ops_arr[ipipt]->ul_proto != ul_proto)
1255 		return false;
1256 
1257 	ipip_ul_dev = __mlxsw_sp_ipip_netdev_ul_dev_get(ipip_entry->ol_dev);
1258 	return mlxsw_sp_ipip_entry_saddr_matches(mlxsw_sp, ul_proto, ul_dip,
1259 						 ul_tb_id, ipip_entry) &&
1260 	       (!ipip_ul_dev || ipip_ul_dev == ul_dev);
1261 }
1262 
1263 /* Given decap parameters, find the corresponding IPIP entry. */
1264 static struct mlxsw_sp_ipip_entry *
1265 mlxsw_sp_ipip_entry_find_by_decap(struct mlxsw_sp *mlxsw_sp,
1266 				  const struct net_device *ul_dev,
1267 				  enum mlxsw_sp_l3proto ul_proto,
1268 				  union mlxsw_sp_l3addr ul_dip)
1269 {
1270 	struct mlxsw_sp_ipip_entry *ipip_entry;
1271 
1272 	list_for_each_entry(ipip_entry, &mlxsw_sp->router->ipip_list,
1273 			    ipip_list_node)
1274 		if (mlxsw_sp_ipip_entry_matches_decap(mlxsw_sp, ul_dev,
1275 						      ul_proto, ul_dip,
1276 						      ipip_entry))
1277 			return ipip_entry;
1278 
1279 	return NULL;
1280 }
1281 
1282 static bool mlxsw_sp_netdev_ipip_type(const struct mlxsw_sp *mlxsw_sp,
1283 				      const struct net_device *dev,
1284 				      enum mlxsw_sp_ipip_type *p_type)
1285 {
1286 	struct mlxsw_sp_router *router = mlxsw_sp->router;
1287 	const struct mlxsw_sp_ipip_ops *ipip_ops;
1288 	enum mlxsw_sp_ipip_type ipipt;
1289 
1290 	for (ipipt = 0; ipipt < MLXSW_SP_IPIP_TYPE_MAX; ++ipipt) {
1291 		ipip_ops = router->ipip_ops_arr[ipipt];
1292 		if (dev->type == ipip_ops->dev_type) {
1293 			if (p_type)
1294 				*p_type = ipipt;
1295 			return true;
1296 		}
1297 	}
1298 	return false;
1299 }
1300 
1301 bool mlxsw_sp_netdev_is_ipip_ol(const struct mlxsw_sp *mlxsw_sp,
1302 				const struct net_device *dev)
1303 {
1304 	return mlxsw_sp_netdev_ipip_type(mlxsw_sp, dev, NULL);
1305 }
1306 
1307 static struct mlxsw_sp_ipip_entry *
1308 mlxsw_sp_ipip_entry_find_by_ol_dev(struct mlxsw_sp *mlxsw_sp,
1309 				   const struct net_device *ol_dev)
1310 {
1311 	struct mlxsw_sp_ipip_entry *ipip_entry;
1312 
1313 	list_for_each_entry(ipip_entry, &mlxsw_sp->router->ipip_list,
1314 			    ipip_list_node)
1315 		if (ipip_entry->ol_dev == ol_dev)
1316 			return ipip_entry;
1317 
1318 	return NULL;
1319 }
1320 
1321 static struct mlxsw_sp_ipip_entry *
1322 mlxsw_sp_ipip_entry_find_by_ul_dev(const struct mlxsw_sp *mlxsw_sp,
1323 				   const struct net_device *ul_dev,
1324 				   struct mlxsw_sp_ipip_entry *start)
1325 {
1326 	struct mlxsw_sp_ipip_entry *ipip_entry;
1327 
1328 	ipip_entry = list_prepare_entry(start, &mlxsw_sp->router->ipip_list,
1329 					ipip_list_node);
1330 	list_for_each_entry_continue(ipip_entry, &mlxsw_sp->router->ipip_list,
1331 				     ipip_list_node) {
1332 		struct net_device *ipip_ul_dev =
1333 			__mlxsw_sp_ipip_netdev_ul_dev_get(ipip_entry->ol_dev);
1334 
1335 		if (ipip_ul_dev == ul_dev)
1336 			return ipip_entry;
1337 	}
1338 
1339 	return NULL;
1340 }
1341 
1342 bool mlxsw_sp_netdev_is_ipip_ul(const struct mlxsw_sp *mlxsw_sp,
1343 				const struct net_device *dev)
1344 {
1345 	return mlxsw_sp_ipip_entry_find_by_ul_dev(mlxsw_sp, dev, NULL);
1346 }
1347 
1348 static bool mlxsw_sp_netdevice_ipip_can_offload(struct mlxsw_sp *mlxsw_sp,
1349 						const struct net_device *ol_dev,
1350 						enum mlxsw_sp_ipip_type ipipt)
1351 {
1352 	const struct mlxsw_sp_ipip_ops *ops
1353 		= mlxsw_sp->router->ipip_ops_arr[ipipt];
1354 
1355 	/* For deciding whether decap should be offloaded, we don't care about
1356 	 * overlay protocol, so ask whether either one is supported.
1357 	 */
1358 	return ops->can_offload(mlxsw_sp, ol_dev, MLXSW_SP_L3_PROTO_IPV4) ||
1359 	       ops->can_offload(mlxsw_sp, ol_dev, MLXSW_SP_L3_PROTO_IPV6);
1360 }
1361 
1362 static int mlxsw_sp_netdevice_ipip_ol_reg_event(struct mlxsw_sp *mlxsw_sp,
1363 						struct net_device *ol_dev)
1364 {
1365 	struct mlxsw_sp_ipip_entry *ipip_entry;
1366 	enum mlxsw_sp_l3proto ul_proto;
1367 	enum mlxsw_sp_ipip_type ipipt;
1368 	union mlxsw_sp_l3addr saddr;
1369 	u32 ul_tb_id;
1370 
1371 	mlxsw_sp_netdev_ipip_type(mlxsw_sp, ol_dev, &ipipt);
1372 	if (mlxsw_sp_netdevice_ipip_can_offload(mlxsw_sp, ol_dev, ipipt)) {
1373 		ul_tb_id = mlxsw_sp_ipip_dev_ul_tb_id(ol_dev);
1374 		ul_proto = mlxsw_sp->router->ipip_ops_arr[ipipt]->ul_proto;
1375 		saddr = mlxsw_sp_ipip_netdev_saddr(ul_proto, ol_dev);
1376 		if (!mlxsw_sp_ipip_demote_tunnel_by_saddr(mlxsw_sp, ul_proto,
1377 							  saddr, ul_tb_id,
1378 							  NULL)) {
1379 			ipip_entry = mlxsw_sp_ipip_entry_create(mlxsw_sp, ipipt,
1380 								ol_dev);
1381 			if (IS_ERR(ipip_entry))
1382 				return PTR_ERR(ipip_entry);
1383 		}
1384 	}
1385 
1386 	return 0;
1387 }
1388 
1389 static void mlxsw_sp_netdevice_ipip_ol_unreg_event(struct mlxsw_sp *mlxsw_sp,
1390 						   struct net_device *ol_dev)
1391 {
1392 	struct mlxsw_sp_ipip_entry *ipip_entry;
1393 
1394 	ipip_entry = mlxsw_sp_ipip_entry_find_by_ol_dev(mlxsw_sp, ol_dev);
1395 	if (ipip_entry)
1396 		mlxsw_sp_ipip_entry_destroy(mlxsw_sp, ipip_entry);
1397 }
1398 
1399 static void
1400 mlxsw_sp_ipip_entry_ol_up_event(struct mlxsw_sp *mlxsw_sp,
1401 				struct mlxsw_sp_ipip_entry *ipip_entry)
1402 {
1403 	struct mlxsw_sp_fib_entry *decap_fib_entry;
1404 
1405 	decap_fib_entry = mlxsw_sp_ipip_entry_find_decap(mlxsw_sp, ipip_entry);
1406 	if (decap_fib_entry)
1407 		mlxsw_sp_ipip_entry_promote_decap(mlxsw_sp, ipip_entry,
1408 						  decap_fib_entry);
1409 }
1410 
1411 static int
1412 mlxsw_sp_rif_ipip_lb_op(struct mlxsw_sp_rif_ipip_lb *lb_rif,
1413 			struct mlxsw_sp_vr *ul_vr, bool enable)
1414 {
1415 	struct mlxsw_sp_rif_ipip_lb_config lb_cf = lb_rif->lb_config;
1416 	struct mlxsw_sp_rif *rif = &lb_rif->common;
1417 	struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
1418 	char ritr_pl[MLXSW_REG_RITR_LEN];
1419 	u32 saddr4;
1420 
1421 	switch (lb_cf.ul_protocol) {
1422 	case MLXSW_SP_L3_PROTO_IPV4:
1423 		saddr4 = be32_to_cpu(lb_cf.saddr.addr4);
1424 		mlxsw_reg_ritr_pack(ritr_pl, enable, MLXSW_REG_RITR_LOOPBACK_IF,
1425 				    rif->rif_index, rif->vr_id, rif->dev->mtu);
1426 		mlxsw_reg_ritr_loopback_ipip4_pack(ritr_pl, lb_cf.lb_ipipt,
1427 			    MLXSW_REG_RITR_LOOPBACK_IPIP_OPTIONS_GRE_KEY_PRESET,
1428 			    ul_vr->id, saddr4, lb_cf.okey);
1429 		break;
1430 
1431 	case MLXSW_SP_L3_PROTO_IPV6:
1432 		return -EAFNOSUPPORT;
1433 	}
1434 
1435 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
1436 }
1437 
1438 static int mlxsw_sp_netdevice_ipip_ol_update_mtu(struct mlxsw_sp *mlxsw_sp,
1439 						 struct net_device *ol_dev)
1440 {
1441 	struct mlxsw_sp_ipip_entry *ipip_entry;
1442 	struct mlxsw_sp_rif_ipip_lb *lb_rif;
1443 	struct mlxsw_sp_vr *ul_vr;
1444 	int err = 0;
1445 
1446 	ipip_entry = mlxsw_sp_ipip_entry_find_by_ol_dev(mlxsw_sp, ol_dev);
1447 	if (ipip_entry) {
1448 		lb_rif = ipip_entry->ol_lb;
1449 		ul_vr = &mlxsw_sp->router->vrs[lb_rif->ul_vr_id];
1450 		err = mlxsw_sp_rif_ipip_lb_op(lb_rif, ul_vr, true);
1451 		if (err)
1452 			goto out;
1453 		lb_rif->common.mtu = ol_dev->mtu;
1454 	}
1455 
1456 out:
1457 	return err;
1458 }
1459 
1460 static void mlxsw_sp_netdevice_ipip_ol_up_event(struct mlxsw_sp *mlxsw_sp,
1461 						struct net_device *ol_dev)
1462 {
1463 	struct mlxsw_sp_ipip_entry *ipip_entry;
1464 
1465 	ipip_entry = mlxsw_sp_ipip_entry_find_by_ol_dev(mlxsw_sp, ol_dev);
1466 	if (ipip_entry)
1467 		mlxsw_sp_ipip_entry_ol_up_event(mlxsw_sp, ipip_entry);
1468 }
1469 
1470 static void
1471 mlxsw_sp_ipip_entry_ol_down_event(struct mlxsw_sp *mlxsw_sp,
1472 				  struct mlxsw_sp_ipip_entry *ipip_entry)
1473 {
1474 	if (ipip_entry->decap_fib_entry)
1475 		mlxsw_sp_ipip_entry_demote_decap(mlxsw_sp, ipip_entry);
1476 }
1477 
1478 static void mlxsw_sp_netdevice_ipip_ol_down_event(struct mlxsw_sp *mlxsw_sp,
1479 						  struct net_device *ol_dev)
1480 {
1481 	struct mlxsw_sp_ipip_entry *ipip_entry;
1482 
1483 	ipip_entry = mlxsw_sp_ipip_entry_find_by_ol_dev(mlxsw_sp, ol_dev);
1484 	if (ipip_entry)
1485 		mlxsw_sp_ipip_entry_ol_down_event(mlxsw_sp, ipip_entry);
1486 }
1487 
1488 static void mlxsw_sp_nexthop_rif_migrate(struct mlxsw_sp *mlxsw_sp,
1489 					 struct mlxsw_sp_rif *old_rif,
1490 					 struct mlxsw_sp_rif *new_rif);
1491 static int
1492 mlxsw_sp_ipip_entry_ol_lb_update(struct mlxsw_sp *mlxsw_sp,
1493 				 struct mlxsw_sp_ipip_entry *ipip_entry,
1494 				 bool keep_encap,
1495 				 struct netlink_ext_ack *extack)
1496 {
1497 	struct mlxsw_sp_rif_ipip_lb *old_lb_rif = ipip_entry->ol_lb;
1498 	struct mlxsw_sp_rif_ipip_lb *new_lb_rif;
1499 
1500 	new_lb_rif = mlxsw_sp_ipip_ol_ipip_lb_create(mlxsw_sp,
1501 						     ipip_entry->ipipt,
1502 						     ipip_entry->ol_dev,
1503 						     extack);
1504 	if (IS_ERR(new_lb_rif))
1505 		return PTR_ERR(new_lb_rif);
1506 	ipip_entry->ol_lb = new_lb_rif;
1507 
1508 	if (keep_encap)
1509 		mlxsw_sp_nexthop_rif_migrate(mlxsw_sp, &old_lb_rif->common,
1510 					     &new_lb_rif->common);
1511 
1512 	mlxsw_sp_rif_destroy(&old_lb_rif->common);
1513 
1514 	return 0;
1515 }
1516 
1517 static void mlxsw_sp_nexthop_rif_update(struct mlxsw_sp *mlxsw_sp,
1518 					struct mlxsw_sp_rif *rif);
1519 
1520 /**
1521  * Update the offload related to an IPIP entry. This always updates decap, and
1522  * in addition to that it also:
1523  * @recreate_loopback: recreates the associated loopback RIF
1524  * @keep_encap: updates next hops that use the tunnel netdevice. This is only
1525  *              relevant when recreate_loopback is true.
1526  * @update_nexthops: updates next hops, keeping the current loopback RIF. This
1527  *                   is only relevant when recreate_loopback is false.
1528  */
1529 int __mlxsw_sp_ipip_entry_update_tunnel(struct mlxsw_sp *mlxsw_sp,
1530 					struct mlxsw_sp_ipip_entry *ipip_entry,
1531 					bool recreate_loopback,
1532 					bool keep_encap,
1533 					bool update_nexthops,
1534 					struct netlink_ext_ack *extack)
1535 {
1536 	int err;
1537 
1538 	/* RIFs can't be edited, so to update loopback, we need to destroy and
1539 	 * recreate it. That creates a window of opportunity where RALUE and
1540 	 * RATR registers end up referencing a RIF that's already gone. RATRs
1541 	 * are handled in mlxsw_sp_ipip_entry_ol_lb_update(), and to take care
1542 	 * of RALUE, demote the decap route back.
1543 	 */
1544 	if (ipip_entry->decap_fib_entry)
1545 		mlxsw_sp_ipip_entry_demote_decap(mlxsw_sp, ipip_entry);
1546 
1547 	if (recreate_loopback) {
1548 		err = mlxsw_sp_ipip_entry_ol_lb_update(mlxsw_sp, ipip_entry,
1549 						       keep_encap, extack);
1550 		if (err)
1551 			return err;
1552 	} else if (update_nexthops) {
1553 		mlxsw_sp_nexthop_rif_update(mlxsw_sp,
1554 					    &ipip_entry->ol_lb->common);
1555 	}
1556 
1557 	if (ipip_entry->ol_dev->flags & IFF_UP)
1558 		mlxsw_sp_ipip_entry_ol_up_event(mlxsw_sp, ipip_entry);
1559 
1560 	return 0;
1561 }
1562 
1563 static int mlxsw_sp_netdevice_ipip_ol_vrf_event(struct mlxsw_sp *mlxsw_sp,
1564 						struct net_device *ol_dev,
1565 						struct netlink_ext_ack *extack)
1566 {
1567 	struct mlxsw_sp_ipip_entry *ipip_entry =
1568 		mlxsw_sp_ipip_entry_find_by_ol_dev(mlxsw_sp, ol_dev);
1569 	enum mlxsw_sp_l3proto ul_proto;
1570 	union mlxsw_sp_l3addr saddr;
1571 	u32 ul_tb_id;
1572 
1573 	if (!ipip_entry)
1574 		return 0;
1575 
1576 	/* For flat configuration cases, moving overlay to a different VRF might
1577 	 * cause local address conflict, and the conflicting tunnels need to be
1578 	 * demoted.
1579 	 */
1580 	ul_tb_id = mlxsw_sp_ipip_dev_ul_tb_id(ol_dev);
1581 	ul_proto = mlxsw_sp->router->ipip_ops_arr[ipip_entry->ipipt]->ul_proto;
1582 	saddr = mlxsw_sp_ipip_netdev_saddr(ul_proto, ol_dev);
1583 	if (mlxsw_sp_ipip_demote_tunnel_by_saddr(mlxsw_sp, ul_proto,
1584 						 saddr, ul_tb_id,
1585 						 ipip_entry)) {
1586 		mlxsw_sp_ipip_entry_demote_tunnel(mlxsw_sp, ipip_entry);
1587 		return 0;
1588 	}
1589 
1590 	return __mlxsw_sp_ipip_entry_update_tunnel(mlxsw_sp, ipip_entry,
1591 						   true, false, false, extack);
1592 }
1593 
1594 static int
1595 mlxsw_sp_netdevice_ipip_ul_vrf_event(struct mlxsw_sp *mlxsw_sp,
1596 				     struct mlxsw_sp_ipip_entry *ipip_entry,
1597 				     struct net_device *ul_dev,
1598 				     struct netlink_ext_ack *extack)
1599 {
1600 	return __mlxsw_sp_ipip_entry_update_tunnel(mlxsw_sp, ipip_entry,
1601 						   true, true, false, extack);
1602 }
1603 
1604 static int
1605 mlxsw_sp_netdevice_ipip_ul_up_event(struct mlxsw_sp *mlxsw_sp,
1606 				    struct mlxsw_sp_ipip_entry *ipip_entry,
1607 				    struct net_device *ul_dev)
1608 {
1609 	return __mlxsw_sp_ipip_entry_update_tunnel(mlxsw_sp, ipip_entry,
1610 						   false, false, true, NULL);
1611 }
1612 
1613 static int
1614 mlxsw_sp_netdevice_ipip_ul_down_event(struct mlxsw_sp *mlxsw_sp,
1615 				      struct mlxsw_sp_ipip_entry *ipip_entry,
1616 				      struct net_device *ul_dev)
1617 {
1618 	/* A down underlay device causes encapsulated packets to not be
1619 	 * forwarded, but decap still works. So refresh next hops without
1620 	 * touching anything else.
1621 	 */
1622 	return __mlxsw_sp_ipip_entry_update_tunnel(mlxsw_sp, ipip_entry,
1623 						   false, false, true, NULL);
1624 }
1625 
1626 static int
1627 mlxsw_sp_netdevice_ipip_ol_change_event(struct mlxsw_sp *mlxsw_sp,
1628 					struct net_device *ol_dev,
1629 					struct netlink_ext_ack *extack)
1630 {
1631 	const struct mlxsw_sp_ipip_ops *ipip_ops;
1632 	struct mlxsw_sp_ipip_entry *ipip_entry;
1633 	int err;
1634 
1635 	ipip_entry = mlxsw_sp_ipip_entry_find_by_ol_dev(mlxsw_sp, ol_dev);
1636 	if (!ipip_entry)
1637 		/* A change might make a tunnel eligible for offloading, but
1638 		 * that is currently not implemented. What falls to slow path
1639 		 * stays there.
1640 		 */
1641 		return 0;
1642 
1643 	/* A change might make a tunnel not eligible for offloading. */
1644 	if (!mlxsw_sp_netdevice_ipip_can_offload(mlxsw_sp, ol_dev,
1645 						 ipip_entry->ipipt)) {
1646 		mlxsw_sp_ipip_entry_demote_tunnel(mlxsw_sp, ipip_entry);
1647 		return 0;
1648 	}
1649 
1650 	ipip_ops = mlxsw_sp->router->ipip_ops_arr[ipip_entry->ipipt];
1651 	err = ipip_ops->ol_netdev_change(mlxsw_sp, ipip_entry, extack);
1652 	return err;
1653 }
1654 
1655 void mlxsw_sp_ipip_entry_demote_tunnel(struct mlxsw_sp *mlxsw_sp,
1656 				       struct mlxsw_sp_ipip_entry *ipip_entry)
1657 {
1658 	struct net_device *ol_dev = ipip_entry->ol_dev;
1659 
1660 	if (ol_dev->flags & IFF_UP)
1661 		mlxsw_sp_ipip_entry_ol_down_event(mlxsw_sp, ipip_entry);
1662 	mlxsw_sp_ipip_entry_destroy(mlxsw_sp, ipip_entry);
1663 }
1664 
1665 /* The configuration where several tunnels have the same local address in the
1666  * same underlay table needs special treatment in the HW. That is currently not
1667  * implemented in the driver. This function finds and demotes the first tunnel
1668  * with a given source address, except the one passed in in the argument
1669  * `except'.
1670  */
1671 bool
1672 mlxsw_sp_ipip_demote_tunnel_by_saddr(struct mlxsw_sp *mlxsw_sp,
1673 				     enum mlxsw_sp_l3proto ul_proto,
1674 				     union mlxsw_sp_l3addr saddr,
1675 				     u32 ul_tb_id,
1676 				     const struct mlxsw_sp_ipip_entry *except)
1677 {
1678 	struct mlxsw_sp_ipip_entry *ipip_entry, *tmp;
1679 
1680 	list_for_each_entry_safe(ipip_entry, tmp, &mlxsw_sp->router->ipip_list,
1681 				 ipip_list_node) {
1682 		if (ipip_entry != except &&
1683 		    mlxsw_sp_ipip_entry_saddr_matches(mlxsw_sp, ul_proto, saddr,
1684 						      ul_tb_id, ipip_entry)) {
1685 			mlxsw_sp_ipip_entry_demote_tunnel(mlxsw_sp, ipip_entry);
1686 			return true;
1687 		}
1688 	}
1689 
1690 	return false;
1691 }
1692 
1693 static void mlxsw_sp_ipip_demote_tunnel_by_ul_netdev(struct mlxsw_sp *mlxsw_sp,
1694 						     struct net_device *ul_dev)
1695 {
1696 	struct mlxsw_sp_ipip_entry *ipip_entry, *tmp;
1697 
1698 	list_for_each_entry_safe(ipip_entry, tmp, &mlxsw_sp->router->ipip_list,
1699 				 ipip_list_node) {
1700 		struct net_device *ipip_ul_dev =
1701 			__mlxsw_sp_ipip_netdev_ul_dev_get(ipip_entry->ol_dev);
1702 
1703 		if (ipip_ul_dev == ul_dev)
1704 			mlxsw_sp_ipip_entry_demote_tunnel(mlxsw_sp, ipip_entry);
1705 	}
1706 }
1707 
1708 int mlxsw_sp_netdevice_ipip_ol_event(struct mlxsw_sp *mlxsw_sp,
1709 				     struct net_device *ol_dev,
1710 				     unsigned long event,
1711 				     struct netdev_notifier_info *info)
1712 {
1713 	struct netdev_notifier_changeupper_info *chup;
1714 	struct netlink_ext_ack *extack;
1715 
1716 	switch (event) {
1717 	case NETDEV_REGISTER:
1718 		return mlxsw_sp_netdevice_ipip_ol_reg_event(mlxsw_sp, ol_dev);
1719 	case NETDEV_UNREGISTER:
1720 		mlxsw_sp_netdevice_ipip_ol_unreg_event(mlxsw_sp, ol_dev);
1721 		return 0;
1722 	case NETDEV_UP:
1723 		mlxsw_sp_netdevice_ipip_ol_up_event(mlxsw_sp, ol_dev);
1724 		return 0;
1725 	case NETDEV_DOWN:
1726 		mlxsw_sp_netdevice_ipip_ol_down_event(mlxsw_sp, ol_dev);
1727 		return 0;
1728 	case NETDEV_CHANGEUPPER:
1729 		chup = container_of(info, typeof(*chup), info);
1730 		extack = info->extack;
1731 		if (netif_is_l3_master(chup->upper_dev))
1732 			return mlxsw_sp_netdevice_ipip_ol_vrf_event(mlxsw_sp,
1733 								    ol_dev,
1734 								    extack);
1735 		return 0;
1736 	case NETDEV_CHANGE:
1737 		extack = info->extack;
1738 		return mlxsw_sp_netdevice_ipip_ol_change_event(mlxsw_sp,
1739 							       ol_dev, extack);
1740 	case NETDEV_CHANGEMTU:
1741 		return mlxsw_sp_netdevice_ipip_ol_update_mtu(mlxsw_sp, ol_dev);
1742 	}
1743 	return 0;
1744 }
1745 
1746 static int
1747 __mlxsw_sp_netdevice_ipip_ul_event(struct mlxsw_sp *mlxsw_sp,
1748 				   struct mlxsw_sp_ipip_entry *ipip_entry,
1749 				   struct net_device *ul_dev,
1750 				   unsigned long event,
1751 				   struct netdev_notifier_info *info)
1752 {
1753 	struct netdev_notifier_changeupper_info *chup;
1754 	struct netlink_ext_ack *extack;
1755 
1756 	switch (event) {
1757 	case NETDEV_CHANGEUPPER:
1758 		chup = container_of(info, typeof(*chup), info);
1759 		extack = info->extack;
1760 		if (netif_is_l3_master(chup->upper_dev))
1761 			return mlxsw_sp_netdevice_ipip_ul_vrf_event(mlxsw_sp,
1762 								    ipip_entry,
1763 								    ul_dev,
1764 								    extack);
1765 		break;
1766 
1767 	case NETDEV_UP:
1768 		return mlxsw_sp_netdevice_ipip_ul_up_event(mlxsw_sp, ipip_entry,
1769 							   ul_dev);
1770 	case NETDEV_DOWN:
1771 		return mlxsw_sp_netdevice_ipip_ul_down_event(mlxsw_sp,
1772 							     ipip_entry,
1773 							     ul_dev);
1774 	}
1775 	return 0;
1776 }
1777 
1778 int
1779 mlxsw_sp_netdevice_ipip_ul_event(struct mlxsw_sp *mlxsw_sp,
1780 				 struct net_device *ul_dev,
1781 				 unsigned long event,
1782 				 struct netdev_notifier_info *info)
1783 {
1784 	struct mlxsw_sp_ipip_entry *ipip_entry = NULL;
1785 	int err;
1786 
1787 	while ((ipip_entry = mlxsw_sp_ipip_entry_find_by_ul_dev(mlxsw_sp,
1788 								ul_dev,
1789 								ipip_entry))) {
1790 		err = __mlxsw_sp_netdevice_ipip_ul_event(mlxsw_sp, ipip_entry,
1791 							 ul_dev, event, info);
1792 		if (err) {
1793 			mlxsw_sp_ipip_demote_tunnel_by_ul_netdev(mlxsw_sp,
1794 								 ul_dev);
1795 			return err;
1796 		}
1797 	}
1798 
1799 	return 0;
1800 }
1801 
1802 struct mlxsw_sp_neigh_key {
1803 	struct neighbour *n;
1804 };
1805 
1806 struct mlxsw_sp_neigh_entry {
1807 	struct list_head rif_list_node;
1808 	struct rhash_head ht_node;
1809 	struct mlxsw_sp_neigh_key key;
1810 	u16 rif;
1811 	bool connected;
1812 	unsigned char ha[ETH_ALEN];
1813 	struct list_head nexthop_list; /* list of nexthops using
1814 					* this neigh entry
1815 					*/
1816 	struct list_head nexthop_neighs_list_node;
1817 	unsigned int counter_index;
1818 	bool counter_valid;
1819 };
1820 
1821 static const struct rhashtable_params mlxsw_sp_neigh_ht_params = {
1822 	.key_offset = offsetof(struct mlxsw_sp_neigh_entry, key),
1823 	.head_offset = offsetof(struct mlxsw_sp_neigh_entry, ht_node),
1824 	.key_len = sizeof(struct mlxsw_sp_neigh_key),
1825 };
1826 
1827 struct mlxsw_sp_neigh_entry *
1828 mlxsw_sp_rif_neigh_next(struct mlxsw_sp_rif *rif,
1829 			struct mlxsw_sp_neigh_entry *neigh_entry)
1830 {
1831 	if (!neigh_entry) {
1832 		if (list_empty(&rif->neigh_list))
1833 			return NULL;
1834 		else
1835 			return list_first_entry(&rif->neigh_list,
1836 						typeof(*neigh_entry),
1837 						rif_list_node);
1838 	}
1839 	if (list_is_last(&neigh_entry->rif_list_node, &rif->neigh_list))
1840 		return NULL;
1841 	return list_next_entry(neigh_entry, rif_list_node);
1842 }
1843 
1844 int mlxsw_sp_neigh_entry_type(struct mlxsw_sp_neigh_entry *neigh_entry)
1845 {
1846 	return neigh_entry->key.n->tbl->family;
1847 }
1848 
1849 unsigned char *
1850 mlxsw_sp_neigh_entry_ha(struct mlxsw_sp_neigh_entry *neigh_entry)
1851 {
1852 	return neigh_entry->ha;
1853 }
1854 
1855 u32 mlxsw_sp_neigh4_entry_dip(struct mlxsw_sp_neigh_entry *neigh_entry)
1856 {
1857 	struct neighbour *n;
1858 
1859 	n = neigh_entry->key.n;
1860 	return ntohl(*((__be32 *) n->primary_key));
1861 }
1862 
1863 struct in6_addr *
1864 mlxsw_sp_neigh6_entry_dip(struct mlxsw_sp_neigh_entry *neigh_entry)
1865 {
1866 	struct neighbour *n;
1867 
1868 	n = neigh_entry->key.n;
1869 	return (struct in6_addr *) &n->primary_key;
1870 }
1871 
1872 int mlxsw_sp_neigh_counter_get(struct mlxsw_sp *mlxsw_sp,
1873 			       struct mlxsw_sp_neigh_entry *neigh_entry,
1874 			       u64 *p_counter)
1875 {
1876 	if (!neigh_entry->counter_valid)
1877 		return -EINVAL;
1878 
1879 	return mlxsw_sp_flow_counter_get(mlxsw_sp, neigh_entry->counter_index,
1880 					 p_counter, NULL);
1881 }
1882 
1883 static struct mlxsw_sp_neigh_entry *
1884 mlxsw_sp_neigh_entry_alloc(struct mlxsw_sp *mlxsw_sp, struct neighbour *n,
1885 			   u16 rif)
1886 {
1887 	struct mlxsw_sp_neigh_entry *neigh_entry;
1888 
1889 	neigh_entry = kzalloc(sizeof(*neigh_entry), GFP_KERNEL);
1890 	if (!neigh_entry)
1891 		return NULL;
1892 
1893 	neigh_entry->key.n = n;
1894 	neigh_entry->rif = rif;
1895 	INIT_LIST_HEAD(&neigh_entry->nexthop_list);
1896 
1897 	return neigh_entry;
1898 }
1899 
1900 static void mlxsw_sp_neigh_entry_free(struct mlxsw_sp_neigh_entry *neigh_entry)
1901 {
1902 	kfree(neigh_entry);
1903 }
1904 
1905 static int
1906 mlxsw_sp_neigh_entry_insert(struct mlxsw_sp *mlxsw_sp,
1907 			    struct mlxsw_sp_neigh_entry *neigh_entry)
1908 {
1909 	return rhashtable_insert_fast(&mlxsw_sp->router->neigh_ht,
1910 				      &neigh_entry->ht_node,
1911 				      mlxsw_sp_neigh_ht_params);
1912 }
1913 
1914 static void
1915 mlxsw_sp_neigh_entry_remove(struct mlxsw_sp *mlxsw_sp,
1916 			    struct mlxsw_sp_neigh_entry *neigh_entry)
1917 {
1918 	rhashtable_remove_fast(&mlxsw_sp->router->neigh_ht,
1919 			       &neigh_entry->ht_node,
1920 			       mlxsw_sp_neigh_ht_params);
1921 }
1922 
1923 static bool
1924 mlxsw_sp_neigh_counter_should_alloc(struct mlxsw_sp *mlxsw_sp,
1925 				    struct mlxsw_sp_neigh_entry *neigh_entry)
1926 {
1927 	struct devlink *devlink;
1928 	const char *table_name;
1929 
1930 	switch (mlxsw_sp_neigh_entry_type(neigh_entry)) {
1931 	case AF_INET:
1932 		table_name = MLXSW_SP_DPIPE_TABLE_NAME_HOST4;
1933 		break;
1934 	case AF_INET6:
1935 		table_name = MLXSW_SP_DPIPE_TABLE_NAME_HOST6;
1936 		break;
1937 	default:
1938 		WARN_ON(1);
1939 		return false;
1940 	}
1941 
1942 	devlink = priv_to_devlink(mlxsw_sp->core);
1943 	return devlink_dpipe_table_counter_enabled(devlink, table_name);
1944 }
1945 
1946 static void
1947 mlxsw_sp_neigh_counter_alloc(struct mlxsw_sp *mlxsw_sp,
1948 			     struct mlxsw_sp_neigh_entry *neigh_entry)
1949 {
1950 	if (!mlxsw_sp_neigh_counter_should_alloc(mlxsw_sp, neigh_entry))
1951 		return;
1952 
1953 	if (mlxsw_sp_flow_counter_alloc(mlxsw_sp, &neigh_entry->counter_index))
1954 		return;
1955 
1956 	neigh_entry->counter_valid = true;
1957 }
1958 
1959 static void
1960 mlxsw_sp_neigh_counter_free(struct mlxsw_sp *mlxsw_sp,
1961 			    struct mlxsw_sp_neigh_entry *neigh_entry)
1962 {
1963 	if (!neigh_entry->counter_valid)
1964 		return;
1965 	mlxsw_sp_flow_counter_free(mlxsw_sp,
1966 				   neigh_entry->counter_index);
1967 	neigh_entry->counter_valid = false;
1968 }
1969 
1970 static struct mlxsw_sp_neigh_entry *
1971 mlxsw_sp_neigh_entry_create(struct mlxsw_sp *mlxsw_sp, struct neighbour *n)
1972 {
1973 	struct mlxsw_sp_neigh_entry *neigh_entry;
1974 	struct mlxsw_sp_rif *rif;
1975 	int err;
1976 
1977 	rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, n->dev);
1978 	if (!rif)
1979 		return ERR_PTR(-EINVAL);
1980 
1981 	neigh_entry = mlxsw_sp_neigh_entry_alloc(mlxsw_sp, n, rif->rif_index);
1982 	if (!neigh_entry)
1983 		return ERR_PTR(-ENOMEM);
1984 
1985 	err = mlxsw_sp_neigh_entry_insert(mlxsw_sp, neigh_entry);
1986 	if (err)
1987 		goto err_neigh_entry_insert;
1988 
1989 	mlxsw_sp_neigh_counter_alloc(mlxsw_sp, neigh_entry);
1990 	list_add(&neigh_entry->rif_list_node, &rif->neigh_list);
1991 
1992 	return neigh_entry;
1993 
1994 err_neigh_entry_insert:
1995 	mlxsw_sp_neigh_entry_free(neigh_entry);
1996 	return ERR_PTR(err);
1997 }
1998 
1999 static void
2000 mlxsw_sp_neigh_entry_destroy(struct mlxsw_sp *mlxsw_sp,
2001 			     struct mlxsw_sp_neigh_entry *neigh_entry)
2002 {
2003 	list_del(&neigh_entry->rif_list_node);
2004 	mlxsw_sp_neigh_counter_free(mlxsw_sp, neigh_entry);
2005 	mlxsw_sp_neigh_entry_remove(mlxsw_sp, neigh_entry);
2006 	mlxsw_sp_neigh_entry_free(neigh_entry);
2007 }
2008 
2009 static struct mlxsw_sp_neigh_entry *
2010 mlxsw_sp_neigh_entry_lookup(struct mlxsw_sp *mlxsw_sp, struct neighbour *n)
2011 {
2012 	struct mlxsw_sp_neigh_key key;
2013 
2014 	key.n = n;
2015 	return rhashtable_lookup_fast(&mlxsw_sp->router->neigh_ht,
2016 				      &key, mlxsw_sp_neigh_ht_params);
2017 }
2018 
2019 static void
2020 mlxsw_sp_router_neighs_update_interval_init(struct mlxsw_sp *mlxsw_sp)
2021 {
2022 	unsigned long interval;
2023 
2024 #if IS_ENABLED(CONFIG_IPV6)
2025 	interval = min_t(unsigned long,
2026 			 NEIGH_VAR(&arp_tbl.parms, DELAY_PROBE_TIME),
2027 			 NEIGH_VAR(&nd_tbl.parms, DELAY_PROBE_TIME));
2028 #else
2029 	interval = NEIGH_VAR(&arp_tbl.parms, DELAY_PROBE_TIME);
2030 #endif
2031 	mlxsw_sp->router->neighs_update.interval = jiffies_to_msecs(interval);
2032 }
2033 
2034 static void mlxsw_sp_router_neigh_ent_ipv4_process(struct mlxsw_sp *mlxsw_sp,
2035 						   char *rauhtd_pl,
2036 						   int ent_index)
2037 {
2038 	struct net_device *dev;
2039 	struct neighbour *n;
2040 	__be32 dipn;
2041 	u32 dip;
2042 	u16 rif;
2043 
2044 	mlxsw_reg_rauhtd_ent_ipv4_unpack(rauhtd_pl, ent_index, &rif, &dip);
2045 
2046 	if (!mlxsw_sp->router->rifs[rif]) {
2047 		dev_err_ratelimited(mlxsw_sp->bus_info->dev, "Incorrect RIF in neighbour entry\n");
2048 		return;
2049 	}
2050 
2051 	dipn = htonl(dip);
2052 	dev = mlxsw_sp->router->rifs[rif]->dev;
2053 	n = neigh_lookup(&arp_tbl, &dipn, dev);
2054 	if (!n)
2055 		return;
2056 
2057 	netdev_dbg(dev, "Updating neighbour with IP=%pI4h\n", &dip);
2058 	neigh_event_send(n, NULL);
2059 	neigh_release(n);
2060 }
2061 
2062 #if IS_ENABLED(CONFIG_IPV6)
2063 static void mlxsw_sp_router_neigh_ent_ipv6_process(struct mlxsw_sp *mlxsw_sp,
2064 						   char *rauhtd_pl,
2065 						   int rec_index)
2066 {
2067 	struct net_device *dev;
2068 	struct neighbour *n;
2069 	struct in6_addr dip;
2070 	u16 rif;
2071 
2072 	mlxsw_reg_rauhtd_ent_ipv6_unpack(rauhtd_pl, rec_index, &rif,
2073 					 (char *) &dip);
2074 
2075 	if (!mlxsw_sp->router->rifs[rif]) {
2076 		dev_err_ratelimited(mlxsw_sp->bus_info->dev, "Incorrect RIF in neighbour entry\n");
2077 		return;
2078 	}
2079 
2080 	dev = mlxsw_sp->router->rifs[rif]->dev;
2081 	n = neigh_lookup(&nd_tbl, &dip, dev);
2082 	if (!n)
2083 		return;
2084 
2085 	netdev_dbg(dev, "Updating neighbour with IP=%pI6c\n", &dip);
2086 	neigh_event_send(n, NULL);
2087 	neigh_release(n);
2088 }
2089 #else
2090 static void mlxsw_sp_router_neigh_ent_ipv6_process(struct mlxsw_sp *mlxsw_sp,
2091 						   char *rauhtd_pl,
2092 						   int rec_index)
2093 {
2094 }
2095 #endif
2096 
2097 static void mlxsw_sp_router_neigh_rec_ipv4_process(struct mlxsw_sp *mlxsw_sp,
2098 						   char *rauhtd_pl,
2099 						   int rec_index)
2100 {
2101 	u8 num_entries;
2102 	int i;
2103 
2104 	num_entries = mlxsw_reg_rauhtd_ipv4_rec_num_entries_get(rauhtd_pl,
2105 								rec_index);
2106 	/* Hardware starts counting at 0, so add 1. */
2107 	num_entries++;
2108 
2109 	/* Each record consists of several neighbour entries. */
2110 	for (i = 0; i < num_entries; i++) {
2111 		int ent_index;
2112 
2113 		ent_index = rec_index * MLXSW_REG_RAUHTD_IPV4_ENT_PER_REC + i;
2114 		mlxsw_sp_router_neigh_ent_ipv4_process(mlxsw_sp, rauhtd_pl,
2115 						       ent_index);
2116 	}
2117 
2118 }
2119 
2120 static void mlxsw_sp_router_neigh_rec_ipv6_process(struct mlxsw_sp *mlxsw_sp,
2121 						   char *rauhtd_pl,
2122 						   int rec_index)
2123 {
2124 	/* One record contains one entry. */
2125 	mlxsw_sp_router_neigh_ent_ipv6_process(mlxsw_sp, rauhtd_pl,
2126 					       rec_index);
2127 }
2128 
2129 static void mlxsw_sp_router_neigh_rec_process(struct mlxsw_sp *mlxsw_sp,
2130 					      char *rauhtd_pl, int rec_index)
2131 {
2132 	switch (mlxsw_reg_rauhtd_rec_type_get(rauhtd_pl, rec_index)) {
2133 	case MLXSW_REG_RAUHTD_TYPE_IPV4:
2134 		mlxsw_sp_router_neigh_rec_ipv4_process(mlxsw_sp, rauhtd_pl,
2135 						       rec_index);
2136 		break;
2137 	case MLXSW_REG_RAUHTD_TYPE_IPV6:
2138 		mlxsw_sp_router_neigh_rec_ipv6_process(mlxsw_sp, rauhtd_pl,
2139 						       rec_index);
2140 		break;
2141 	}
2142 }
2143 
2144 static bool mlxsw_sp_router_rauhtd_is_full(char *rauhtd_pl)
2145 {
2146 	u8 num_rec, last_rec_index, num_entries;
2147 
2148 	num_rec = mlxsw_reg_rauhtd_num_rec_get(rauhtd_pl);
2149 	last_rec_index = num_rec - 1;
2150 
2151 	if (num_rec < MLXSW_REG_RAUHTD_REC_MAX_NUM)
2152 		return false;
2153 	if (mlxsw_reg_rauhtd_rec_type_get(rauhtd_pl, last_rec_index) ==
2154 	    MLXSW_REG_RAUHTD_TYPE_IPV6)
2155 		return true;
2156 
2157 	num_entries = mlxsw_reg_rauhtd_ipv4_rec_num_entries_get(rauhtd_pl,
2158 								last_rec_index);
2159 	if (++num_entries == MLXSW_REG_RAUHTD_IPV4_ENT_PER_REC)
2160 		return true;
2161 	return false;
2162 }
2163 
2164 static int
2165 __mlxsw_sp_router_neighs_update_rauhtd(struct mlxsw_sp *mlxsw_sp,
2166 				       char *rauhtd_pl,
2167 				       enum mlxsw_reg_rauhtd_type type)
2168 {
2169 	int i, num_rec;
2170 	int err;
2171 
2172 	/* Make sure the neighbour's netdev isn't removed in the
2173 	 * process.
2174 	 */
2175 	rtnl_lock();
2176 	do {
2177 		mlxsw_reg_rauhtd_pack(rauhtd_pl, type);
2178 		err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(rauhtd),
2179 				      rauhtd_pl);
2180 		if (err) {
2181 			dev_err_ratelimited(mlxsw_sp->bus_info->dev, "Failed to dump neighbour table\n");
2182 			break;
2183 		}
2184 		num_rec = mlxsw_reg_rauhtd_num_rec_get(rauhtd_pl);
2185 		for (i = 0; i < num_rec; i++)
2186 			mlxsw_sp_router_neigh_rec_process(mlxsw_sp, rauhtd_pl,
2187 							  i);
2188 	} while (mlxsw_sp_router_rauhtd_is_full(rauhtd_pl));
2189 	rtnl_unlock();
2190 
2191 	return err;
2192 }
2193 
2194 static int mlxsw_sp_router_neighs_update_rauhtd(struct mlxsw_sp *mlxsw_sp)
2195 {
2196 	enum mlxsw_reg_rauhtd_type type;
2197 	char *rauhtd_pl;
2198 	int err;
2199 
2200 	rauhtd_pl = kmalloc(MLXSW_REG_RAUHTD_LEN, GFP_KERNEL);
2201 	if (!rauhtd_pl)
2202 		return -ENOMEM;
2203 
2204 	type = MLXSW_REG_RAUHTD_TYPE_IPV4;
2205 	err = __mlxsw_sp_router_neighs_update_rauhtd(mlxsw_sp, rauhtd_pl, type);
2206 	if (err)
2207 		goto out;
2208 
2209 	type = MLXSW_REG_RAUHTD_TYPE_IPV6;
2210 	err = __mlxsw_sp_router_neighs_update_rauhtd(mlxsw_sp, rauhtd_pl, type);
2211 out:
2212 	kfree(rauhtd_pl);
2213 	return err;
2214 }
2215 
2216 static void mlxsw_sp_router_neighs_update_nh(struct mlxsw_sp *mlxsw_sp)
2217 {
2218 	struct mlxsw_sp_neigh_entry *neigh_entry;
2219 
2220 	/* Take RTNL mutex here to prevent lists from changes */
2221 	rtnl_lock();
2222 	list_for_each_entry(neigh_entry, &mlxsw_sp->router->nexthop_neighs_list,
2223 			    nexthop_neighs_list_node)
2224 		/* If this neigh have nexthops, make the kernel think this neigh
2225 		 * is active regardless of the traffic.
2226 		 */
2227 		neigh_event_send(neigh_entry->key.n, NULL);
2228 	rtnl_unlock();
2229 }
2230 
2231 static void
2232 mlxsw_sp_router_neighs_update_work_schedule(struct mlxsw_sp *mlxsw_sp)
2233 {
2234 	unsigned long interval = mlxsw_sp->router->neighs_update.interval;
2235 
2236 	mlxsw_core_schedule_dw(&mlxsw_sp->router->neighs_update.dw,
2237 			       msecs_to_jiffies(interval));
2238 }
2239 
2240 static void mlxsw_sp_router_neighs_update_work(struct work_struct *work)
2241 {
2242 	struct mlxsw_sp_router *router;
2243 	int err;
2244 
2245 	router = container_of(work, struct mlxsw_sp_router,
2246 			      neighs_update.dw.work);
2247 	err = mlxsw_sp_router_neighs_update_rauhtd(router->mlxsw_sp);
2248 	if (err)
2249 		dev_err(router->mlxsw_sp->bus_info->dev, "Could not update kernel for neigh activity");
2250 
2251 	mlxsw_sp_router_neighs_update_nh(router->mlxsw_sp);
2252 
2253 	mlxsw_sp_router_neighs_update_work_schedule(router->mlxsw_sp);
2254 }
2255 
2256 static void mlxsw_sp_router_probe_unresolved_nexthops(struct work_struct *work)
2257 {
2258 	struct mlxsw_sp_neigh_entry *neigh_entry;
2259 	struct mlxsw_sp_router *router;
2260 
2261 	router = container_of(work, struct mlxsw_sp_router,
2262 			      nexthop_probe_dw.work);
2263 	/* Iterate over nexthop neighbours, find those who are unresolved and
2264 	 * send arp on them. This solves the chicken-egg problem when
2265 	 * the nexthop wouldn't get offloaded until the neighbor is resolved
2266 	 * but it wouldn't get resolved ever in case traffic is flowing in HW
2267 	 * using different nexthop.
2268 	 *
2269 	 * Take RTNL mutex here to prevent lists from changes.
2270 	 */
2271 	rtnl_lock();
2272 	list_for_each_entry(neigh_entry, &router->nexthop_neighs_list,
2273 			    nexthop_neighs_list_node)
2274 		if (!neigh_entry->connected)
2275 			neigh_event_send(neigh_entry->key.n, NULL);
2276 	rtnl_unlock();
2277 
2278 	mlxsw_core_schedule_dw(&router->nexthop_probe_dw,
2279 			       MLXSW_SP_UNRESOLVED_NH_PROBE_INTERVAL);
2280 }
2281 
2282 static void
2283 mlxsw_sp_nexthop_neigh_update(struct mlxsw_sp *mlxsw_sp,
2284 			      struct mlxsw_sp_neigh_entry *neigh_entry,
2285 			      bool removing);
2286 
2287 static enum mlxsw_reg_rauht_op mlxsw_sp_rauht_op(bool adding)
2288 {
2289 	return adding ? MLXSW_REG_RAUHT_OP_WRITE_ADD :
2290 			MLXSW_REG_RAUHT_OP_WRITE_DELETE;
2291 }
2292 
2293 static void
2294 mlxsw_sp_router_neigh_entry_op4(struct mlxsw_sp *mlxsw_sp,
2295 				struct mlxsw_sp_neigh_entry *neigh_entry,
2296 				enum mlxsw_reg_rauht_op op)
2297 {
2298 	struct neighbour *n = neigh_entry->key.n;
2299 	u32 dip = ntohl(*((__be32 *) n->primary_key));
2300 	char rauht_pl[MLXSW_REG_RAUHT_LEN];
2301 
2302 	mlxsw_reg_rauht_pack4(rauht_pl, op, neigh_entry->rif, neigh_entry->ha,
2303 			      dip);
2304 	if (neigh_entry->counter_valid)
2305 		mlxsw_reg_rauht_pack_counter(rauht_pl,
2306 					     neigh_entry->counter_index);
2307 	mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rauht), rauht_pl);
2308 }
2309 
2310 static void
2311 mlxsw_sp_router_neigh_entry_op6(struct mlxsw_sp *mlxsw_sp,
2312 				struct mlxsw_sp_neigh_entry *neigh_entry,
2313 				enum mlxsw_reg_rauht_op op)
2314 {
2315 	struct neighbour *n = neigh_entry->key.n;
2316 	char rauht_pl[MLXSW_REG_RAUHT_LEN];
2317 	const char *dip = n->primary_key;
2318 
2319 	mlxsw_reg_rauht_pack6(rauht_pl, op, neigh_entry->rif, neigh_entry->ha,
2320 			      dip);
2321 	if (neigh_entry->counter_valid)
2322 		mlxsw_reg_rauht_pack_counter(rauht_pl,
2323 					     neigh_entry->counter_index);
2324 	mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rauht), rauht_pl);
2325 }
2326 
2327 bool mlxsw_sp_neigh_ipv6_ignore(struct mlxsw_sp_neigh_entry *neigh_entry)
2328 {
2329 	struct neighbour *n = neigh_entry->key.n;
2330 
2331 	/* Packets with a link-local destination address are trapped
2332 	 * after LPM lookup and never reach the neighbour table, so
2333 	 * there is no need to program such neighbours to the device.
2334 	 */
2335 	if (ipv6_addr_type((struct in6_addr *) &n->primary_key) &
2336 	    IPV6_ADDR_LINKLOCAL)
2337 		return true;
2338 	return false;
2339 }
2340 
2341 static void
2342 mlxsw_sp_neigh_entry_update(struct mlxsw_sp *mlxsw_sp,
2343 			    struct mlxsw_sp_neigh_entry *neigh_entry,
2344 			    bool adding)
2345 {
2346 	if (!adding && !neigh_entry->connected)
2347 		return;
2348 	neigh_entry->connected = adding;
2349 	if (neigh_entry->key.n->tbl->family == AF_INET) {
2350 		mlxsw_sp_router_neigh_entry_op4(mlxsw_sp, neigh_entry,
2351 						mlxsw_sp_rauht_op(adding));
2352 	} else if (neigh_entry->key.n->tbl->family == AF_INET6) {
2353 		if (mlxsw_sp_neigh_ipv6_ignore(neigh_entry))
2354 			return;
2355 		mlxsw_sp_router_neigh_entry_op6(mlxsw_sp, neigh_entry,
2356 						mlxsw_sp_rauht_op(adding));
2357 	} else {
2358 		WARN_ON_ONCE(1);
2359 	}
2360 }
2361 
2362 void
2363 mlxsw_sp_neigh_entry_counter_update(struct mlxsw_sp *mlxsw_sp,
2364 				    struct mlxsw_sp_neigh_entry *neigh_entry,
2365 				    bool adding)
2366 {
2367 	if (adding)
2368 		mlxsw_sp_neigh_counter_alloc(mlxsw_sp, neigh_entry);
2369 	else
2370 		mlxsw_sp_neigh_counter_free(mlxsw_sp, neigh_entry);
2371 	mlxsw_sp_neigh_entry_update(mlxsw_sp, neigh_entry, true);
2372 }
2373 
2374 struct mlxsw_sp_netevent_work {
2375 	struct work_struct work;
2376 	struct mlxsw_sp *mlxsw_sp;
2377 	struct neighbour *n;
2378 };
2379 
2380 static void mlxsw_sp_router_neigh_event_work(struct work_struct *work)
2381 {
2382 	struct mlxsw_sp_netevent_work *net_work =
2383 		container_of(work, struct mlxsw_sp_netevent_work, work);
2384 	struct mlxsw_sp *mlxsw_sp = net_work->mlxsw_sp;
2385 	struct mlxsw_sp_neigh_entry *neigh_entry;
2386 	struct neighbour *n = net_work->n;
2387 	unsigned char ha[ETH_ALEN];
2388 	bool entry_connected;
2389 	u8 nud_state, dead;
2390 
2391 	/* If these parameters are changed after we release the lock,
2392 	 * then we are guaranteed to receive another event letting us
2393 	 * know about it.
2394 	 */
2395 	read_lock_bh(&n->lock);
2396 	memcpy(ha, n->ha, ETH_ALEN);
2397 	nud_state = n->nud_state;
2398 	dead = n->dead;
2399 	read_unlock_bh(&n->lock);
2400 
2401 	rtnl_lock();
2402 	mlxsw_sp_span_respin(mlxsw_sp);
2403 
2404 	entry_connected = nud_state & NUD_VALID && !dead;
2405 	neigh_entry = mlxsw_sp_neigh_entry_lookup(mlxsw_sp, n);
2406 	if (!entry_connected && !neigh_entry)
2407 		goto out;
2408 	if (!neigh_entry) {
2409 		neigh_entry = mlxsw_sp_neigh_entry_create(mlxsw_sp, n);
2410 		if (IS_ERR(neigh_entry))
2411 			goto out;
2412 	}
2413 
2414 	memcpy(neigh_entry->ha, ha, ETH_ALEN);
2415 	mlxsw_sp_neigh_entry_update(mlxsw_sp, neigh_entry, entry_connected);
2416 	mlxsw_sp_nexthop_neigh_update(mlxsw_sp, neigh_entry, !entry_connected);
2417 
2418 	if (!neigh_entry->connected && list_empty(&neigh_entry->nexthop_list))
2419 		mlxsw_sp_neigh_entry_destroy(mlxsw_sp, neigh_entry);
2420 
2421 out:
2422 	rtnl_unlock();
2423 	neigh_release(n);
2424 	kfree(net_work);
2425 }
2426 
2427 static int mlxsw_sp_mp_hash_init(struct mlxsw_sp *mlxsw_sp);
2428 
2429 static void mlxsw_sp_router_mp_hash_event_work(struct work_struct *work)
2430 {
2431 	struct mlxsw_sp_netevent_work *net_work =
2432 		container_of(work, struct mlxsw_sp_netevent_work, work);
2433 	struct mlxsw_sp *mlxsw_sp = net_work->mlxsw_sp;
2434 
2435 	mlxsw_sp_mp_hash_init(mlxsw_sp);
2436 	kfree(net_work);
2437 }
2438 
2439 static int mlxsw_sp_router_netevent_event(struct notifier_block *nb,
2440 					  unsigned long event, void *ptr)
2441 {
2442 	struct mlxsw_sp_netevent_work *net_work;
2443 	struct mlxsw_sp_port *mlxsw_sp_port;
2444 	struct mlxsw_sp_router *router;
2445 	struct mlxsw_sp *mlxsw_sp;
2446 	unsigned long interval;
2447 	struct neigh_parms *p;
2448 	struct neighbour *n;
2449 	struct net *net;
2450 
2451 	switch (event) {
2452 	case NETEVENT_DELAY_PROBE_TIME_UPDATE:
2453 		p = ptr;
2454 
2455 		/* We don't care about changes in the default table. */
2456 		if (!p->dev || (p->tbl->family != AF_INET &&
2457 				p->tbl->family != AF_INET6))
2458 			return NOTIFY_DONE;
2459 
2460 		/* We are in atomic context and can't take RTNL mutex,
2461 		 * so use RCU variant to walk the device chain.
2462 		 */
2463 		mlxsw_sp_port = mlxsw_sp_port_lower_dev_hold(p->dev);
2464 		if (!mlxsw_sp_port)
2465 			return NOTIFY_DONE;
2466 
2467 		mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
2468 		interval = jiffies_to_msecs(NEIGH_VAR(p, DELAY_PROBE_TIME));
2469 		mlxsw_sp->router->neighs_update.interval = interval;
2470 
2471 		mlxsw_sp_port_dev_put(mlxsw_sp_port);
2472 		break;
2473 	case NETEVENT_NEIGH_UPDATE:
2474 		n = ptr;
2475 
2476 		if (n->tbl->family != AF_INET && n->tbl->family != AF_INET6)
2477 			return NOTIFY_DONE;
2478 
2479 		mlxsw_sp_port = mlxsw_sp_port_lower_dev_hold(n->dev);
2480 		if (!mlxsw_sp_port)
2481 			return NOTIFY_DONE;
2482 
2483 		net_work = kzalloc(sizeof(*net_work), GFP_ATOMIC);
2484 		if (!net_work) {
2485 			mlxsw_sp_port_dev_put(mlxsw_sp_port);
2486 			return NOTIFY_BAD;
2487 		}
2488 
2489 		INIT_WORK(&net_work->work, mlxsw_sp_router_neigh_event_work);
2490 		net_work->mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
2491 		net_work->n = n;
2492 
2493 		/* Take a reference to ensure the neighbour won't be
2494 		 * destructed until we drop the reference in delayed
2495 		 * work.
2496 		 */
2497 		neigh_clone(n);
2498 		mlxsw_core_schedule_work(&net_work->work);
2499 		mlxsw_sp_port_dev_put(mlxsw_sp_port);
2500 		break;
2501 	case NETEVENT_IPV4_MPATH_HASH_UPDATE:
2502 	case NETEVENT_IPV6_MPATH_HASH_UPDATE:
2503 		net = ptr;
2504 
2505 		if (!net_eq(net, &init_net))
2506 			return NOTIFY_DONE;
2507 
2508 		net_work = kzalloc(sizeof(*net_work), GFP_ATOMIC);
2509 		if (!net_work)
2510 			return NOTIFY_BAD;
2511 
2512 		router = container_of(nb, struct mlxsw_sp_router, netevent_nb);
2513 		INIT_WORK(&net_work->work, mlxsw_sp_router_mp_hash_event_work);
2514 		net_work->mlxsw_sp = router->mlxsw_sp;
2515 		mlxsw_core_schedule_work(&net_work->work);
2516 		break;
2517 	}
2518 
2519 	return NOTIFY_DONE;
2520 }
2521 
2522 static int mlxsw_sp_neigh_init(struct mlxsw_sp *mlxsw_sp)
2523 {
2524 	int err;
2525 
2526 	err = rhashtable_init(&mlxsw_sp->router->neigh_ht,
2527 			      &mlxsw_sp_neigh_ht_params);
2528 	if (err)
2529 		return err;
2530 
2531 	/* Initialize the polling interval according to the default
2532 	 * table.
2533 	 */
2534 	mlxsw_sp_router_neighs_update_interval_init(mlxsw_sp);
2535 
2536 	/* Create the delayed works for the activity_update */
2537 	INIT_DELAYED_WORK(&mlxsw_sp->router->neighs_update.dw,
2538 			  mlxsw_sp_router_neighs_update_work);
2539 	INIT_DELAYED_WORK(&mlxsw_sp->router->nexthop_probe_dw,
2540 			  mlxsw_sp_router_probe_unresolved_nexthops);
2541 	mlxsw_core_schedule_dw(&mlxsw_sp->router->neighs_update.dw, 0);
2542 	mlxsw_core_schedule_dw(&mlxsw_sp->router->nexthop_probe_dw, 0);
2543 	return 0;
2544 }
2545 
2546 static void mlxsw_sp_neigh_fini(struct mlxsw_sp *mlxsw_sp)
2547 {
2548 	cancel_delayed_work_sync(&mlxsw_sp->router->neighs_update.dw);
2549 	cancel_delayed_work_sync(&mlxsw_sp->router->nexthop_probe_dw);
2550 	rhashtable_destroy(&mlxsw_sp->router->neigh_ht);
2551 }
2552 
2553 static void mlxsw_sp_neigh_rif_gone_sync(struct mlxsw_sp *mlxsw_sp,
2554 					 struct mlxsw_sp_rif *rif)
2555 {
2556 	struct mlxsw_sp_neigh_entry *neigh_entry, *tmp;
2557 
2558 	list_for_each_entry_safe(neigh_entry, tmp, &rif->neigh_list,
2559 				 rif_list_node) {
2560 		mlxsw_sp_neigh_entry_update(mlxsw_sp, neigh_entry, false);
2561 		mlxsw_sp_neigh_entry_destroy(mlxsw_sp, neigh_entry);
2562 	}
2563 }
2564 
2565 enum mlxsw_sp_nexthop_type {
2566 	MLXSW_SP_NEXTHOP_TYPE_ETH,
2567 	MLXSW_SP_NEXTHOP_TYPE_IPIP,
2568 };
2569 
2570 struct mlxsw_sp_nexthop_key {
2571 	struct fib_nh *fib_nh;
2572 };
2573 
2574 struct mlxsw_sp_nexthop {
2575 	struct list_head neigh_list_node; /* member of neigh entry list */
2576 	struct list_head rif_list_node;
2577 	struct list_head router_list_node;
2578 	struct mlxsw_sp_nexthop_group *nh_grp; /* pointer back to the group
2579 						* this belongs to
2580 						*/
2581 	struct rhash_head ht_node;
2582 	struct mlxsw_sp_nexthop_key key;
2583 	unsigned char gw_addr[sizeof(struct in6_addr)];
2584 	int ifindex;
2585 	int nh_weight;
2586 	int norm_nh_weight;
2587 	int num_adj_entries;
2588 	struct mlxsw_sp_rif *rif;
2589 	u8 should_offload:1, /* set indicates this neigh is connected and
2590 			      * should be put to KVD linear area of this group.
2591 			      */
2592 	   offloaded:1, /* set in case the neigh is actually put into
2593 			 * KVD linear area of this group.
2594 			 */
2595 	   update:1; /* set indicates that MAC of this neigh should be
2596 		      * updated in HW
2597 		      */
2598 	enum mlxsw_sp_nexthop_type type;
2599 	union {
2600 		struct mlxsw_sp_neigh_entry *neigh_entry;
2601 		struct mlxsw_sp_ipip_entry *ipip_entry;
2602 	};
2603 	unsigned int counter_index;
2604 	bool counter_valid;
2605 };
2606 
2607 struct mlxsw_sp_nexthop_group {
2608 	void *priv;
2609 	struct rhash_head ht_node;
2610 	struct list_head fib_list; /* list of fib entries that use this group */
2611 	struct neigh_table *neigh_tbl;
2612 	u8 adj_index_valid:1,
2613 	   gateway:1; /* routes using the group use a gateway */
2614 	u32 adj_index;
2615 	u16 ecmp_size;
2616 	u16 count;
2617 	int sum_norm_weight;
2618 	struct mlxsw_sp_nexthop nexthops[0];
2619 #define nh_rif	nexthops[0].rif
2620 };
2621 
2622 void mlxsw_sp_nexthop_counter_alloc(struct mlxsw_sp *mlxsw_sp,
2623 				    struct mlxsw_sp_nexthop *nh)
2624 {
2625 	struct devlink *devlink;
2626 
2627 	devlink = priv_to_devlink(mlxsw_sp->core);
2628 	if (!devlink_dpipe_table_counter_enabled(devlink,
2629 						 MLXSW_SP_DPIPE_TABLE_NAME_ADJ))
2630 		return;
2631 
2632 	if (mlxsw_sp_flow_counter_alloc(mlxsw_sp, &nh->counter_index))
2633 		return;
2634 
2635 	nh->counter_valid = true;
2636 }
2637 
2638 void mlxsw_sp_nexthop_counter_free(struct mlxsw_sp *mlxsw_sp,
2639 				   struct mlxsw_sp_nexthop *nh)
2640 {
2641 	if (!nh->counter_valid)
2642 		return;
2643 	mlxsw_sp_flow_counter_free(mlxsw_sp, nh->counter_index);
2644 	nh->counter_valid = false;
2645 }
2646 
2647 int mlxsw_sp_nexthop_counter_get(struct mlxsw_sp *mlxsw_sp,
2648 				 struct mlxsw_sp_nexthop *nh, u64 *p_counter)
2649 {
2650 	if (!nh->counter_valid)
2651 		return -EINVAL;
2652 
2653 	return mlxsw_sp_flow_counter_get(mlxsw_sp, nh->counter_index,
2654 					 p_counter, NULL);
2655 }
2656 
2657 struct mlxsw_sp_nexthop *mlxsw_sp_nexthop_next(struct mlxsw_sp_router *router,
2658 					       struct mlxsw_sp_nexthop *nh)
2659 {
2660 	if (!nh) {
2661 		if (list_empty(&router->nexthop_list))
2662 			return NULL;
2663 		else
2664 			return list_first_entry(&router->nexthop_list,
2665 						typeof(*nh), router_list_node);
2666 	}
2667 	if (list_is_last(&nh->router_list_node, &router->nexthop_list))
2668 		return NULL;
2669 	return list_next_entry(nh, router_list_node);
2670 }
2671 
2672 bool mlxsw_sp_nexthop_offload(struct mlxsw_sp_nexthop *nh)
2673 {
2674 	return nh->offloaded;
2675 }
2676 
2677 unsigned char *mlxsw_sp_nexthop_ha(struct mlxsw_sp_nexthop *nh)
2678 {
2679 	if (!nh->offloaded)
2680 		return NULL;
2681 	return nh->neigh_entry->ha;
2682 }
2683 
2684 int mlxsw_sp_nexthop_indexes(struct mlxsw_sp_nexthop *nh, u32 *p_adj_index,
2685 			     u32 *p_adj_size, u32 *p_adj_hash_index)
2686 {
2687 	struct mlxsw_sp_nexthop_group *nh_grp = nh->nh_grp;
2688 	u32 adj_hash_index = 0;
2689 	int i;
2690 
2691 	if (!nh->offloaded || !nh_grp->adj_index_valid)
2692 		return -EINVAL;
2693 
2694 	*p_adj_index = nh_grp->adj_index;
2695 	*p_adj_size = nh_grp->ecmp_size;
2696 
2697 	for (i = 0; i < nh_grp->count; i++) {
2698 		struct mlxsw_sp_nexthop *nh_iter = &nh_grp->nexthops[i];
2699 
2700 		if (nh_iter == nh)
2701 			break;
2702 		if (nh_iter->offloaded)
2703 			adj_hash_index += nh_iter->num_adj_entries;
2704 	}
2705 
2706 	*p_adj_hash_index = adj_hash_index;
2707 	return 0;
2708 }
2709 
2710 struct mlxsw_sp_rif *mlxsw_sp_nexthop_rif(struct mlxsw_sp_nexthop *nh)
2711 {
2712 	return nh->rif;
2713 }
2714 
2715 bool mlxsw_sp_nexthop_group_has_ipip(struct mlxsw_sp_nexthop *nh)
2716 {
2717 	struct mlxsw_sp_nexthop_group *nh_grp = nh->nh_grp;
2718 	int i;
2719 
2720 	for (i = 0; i < nh_grp->count; i++) {
2721 		struct mlxsw_sp_nexthop *nh_iter = &nh_grp->nexthops[i];
2722 
2723 		if (nh_iter->type == MLXSW_SP_NEXTHOP_TYPE_IPIP)
2724 			return true;
2725 	}
2726 	return false;
2727 }
2728 
2729 static struct fib_info *
2730 mlxsw_sp_nexthop4_group_fi(const struct mlxsw_sp_nexthop_group *nh_grp)
2731 {
2732 	return nh_grp->priv;
2733 }
2734 
2735 struct mlxsw_sp_nexthop_group_cmp_arg {
2736 	enum mlxsw_sp_l3proto proto;
2737 	union {
2738 		struct fib_info *fi;
2739 		struct mlxsw_sp_fib6_entry *fib6_entry;
2740 	};
2741 };
2742 
2743 static bool
2744 mlxsw_sp_nexthop6_group_has_nexthop(const struct mlxsw_sp_nexthop_group *nh_grp,
2745 				    const struct in6_addr *gw, int ifindex,
2746 				    int weight)
2747 {
2748 	int i;
2749 
2750 	for (i = 0; i < nh_grp->count; i++) {
2751 		const struct mlxsw_sp_nexthop *nh;
2752 
2753 		nh = &nh_grp->nexthops[i];
2754 		if (nh->ifindex == ifindex && nh->nh_weight == weight &&
2755 		    ipv6_addr_equal(gw, (struct in6_addr *) nh->gw_addr))
2756 			return true;
2757 	}
2758 
2759 	return false;
2760 }
2761 
2762 static bool
2763 mlxsw_sp_nexthop6_group_cmp(const struct mlxsw_sp_nexthop_group *nh_grp,
2764 			    const struct mlxsw_sp_fib6_entry *fib6_entry)
2765 {
2766 	struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
2767 
2768 	if (nh_grp->count != fib6_entry->nrt6)
2769 		return false;
2770 
2771 	list_for_each_entry(mlxsw_sp_rt6, &fib6_entry->rt6_list, list) {
2772 		struct in6_addr *gw;
2773 		int ifindex, weight;
2774 
2775 		ifindex = mlxsw_sp_rt6->rt->fib6_nh.nh_dev->ifindex;
2776 		weight = mlxsw_sp_rt6->rt->fib6_nh.nh_weight;
2777 		gw = &mlxsw_sp_rt6->rt->fib6_nh.nh_gw;
2778 		if (!mlxsw_sp_nexthop6_group_has_nexthop(nh_grp, gw, ifindex,
2779 							 weight))
2780 			return false;
2781 	}
2782 
2783 	return true;
2784 }
2785 
2786 static int
2787 mlxsw_sp_nexthop_group_cmp(struct rhashtable_compare_arg *arg, const void *ptr)
2788 {
2789 	const struct mlxsw_sp_nexthop_group_cmp_arg *cmp_arg = arg->key;
2790 	const struct mlxsw_sp_nexthop_group *nh_grp = ptr;
2791 
2792 	switch (cmp_arg->proto) {
2793 	case MLXSW_SP_L3_PROTO_IPV4:
2794 		return cmp_arg->fi != mlxsw_sp_nexthop4_group_fi(nh_grp);
2795 	case MLXSW_SP_L3_PROTO_IPV6:
2796 		return !mlxsw_sp_nexthop6_group_cmp(nh_grp,
2797 						    cmp_arg->fib6_entry);
2798 	default:
2799 		WARN_ON(1);
2800 		return 1;
2801 	}
2802 }
2803 
2804 static int
2805 mlxsw_sp_nexthop_group_type(const struct mlxsw_sp_nexthop_group *nh_grp)
2806 {
2807 	return nh_grp->neigh_tbl->family;
2808 }
2809 
2810 static u32 mlxsw_sp_nexthop_group_hash_obj(const void *data, u32 len, u32 seed)
2811 {
2812 	const struct mlxsw_sp_nexthop_group *nh_grp = data;
2813 	const struct mlxsw_sp_nexthop *nh;
2814 	struct fib_info *fi;
2815 	unsigned int val;
2816 	int i;
2817 
2818 	switch (mlxsw_sp_nexthop_group_type(nh_grp)) {
2819 	case AF_INET:
2820 		fi = mlxsw_sp_nexthop4_group_fi(nh_grp);
2821 		return jhash(&fi, sizeof(fi), seed);
2822 	case AF_INET6:
2823 		val = nh_grp->count;
2824 		for (i = 0; i < nh_grp->count; i++) {
2825 			nh = &nh_grp->nexthops[i];
2826 			val ^= nh->ifindex;
2827 		}
2828 		return jhash(&val, sizeof(val), seed);
2829 	default:
2830 		WARN_ON(1);
2831 		return 0;
2832 	}
2833 }
2834 
2835 static u32
2836 mlxsw_sp_nexthop6_group_hash(struct mlxsw_sp_fib6_entry *fib6_entry, u32 seed)
2837 {
2838 	unsigned int val = fib6_entry->nrt6;
2839 	struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
2840 	struct net_device *dev;
2841 
2842 	list_for_each_entry(mlxsw_sp_rt6, &fib6_entry->rt6_list, list) {
2843 		dev = mlxsw_sp_rt6->rt->fib6_nh.nh_dev;
2844 		val ^= dev->ifindex;
2845 	}
2846 
2847 	return jhash(&val, sizeof(val), seed);
2848 }
2849 
2850 static u32
2851 mlxsw_sp_nexthop_group_hash(const void *data, u32 len, u32 seed)
2852 {
2853 	const struct mlxsw_sp_nexthop_group_cmp_arg *cmp_arg = data;
2854 
2855 	switch (cmp_arg->proto) {
2856 	case MLXSW_SP_L3_PROTO_IPV4:
2857 		return jhash(&cmp_arg->fi, sizeof(cmp_arg->fi), seed);
2858 	case MLXSW_SP_L3_PROTO_IPV6:
2859 		return mlxsw_sp_nexthop6_group_hash(cmp_arg->fib6_entry, seed);
2860 	default:
2861 		WARN_ON(1);
2862 		return 0;
2863 	}
2864 }
2865 
2866 static const struct rhashtable_params mlxsw_sp_nexthop_group_ht_params = {
2867 	.head_offset = offsetof(struct mlxsw_sp_nexthop_group, ht_node),
2868 	.hashfn	     = mlxsw_sp_nexthop_group_hash,
2869 	.obj_hashfn  = mlxsw_sp_nexthop_group_hash_obj,
2870 	.obj_cmpfn   = mlxsw_sp_nexthop_group_cmp,
2871 };
2872 
2873 static int mlxsw_sp_nexthop_group_insert(struct mlxsw_sp *mlxsw_sp,
2874 					 struct mlxsw_sp_nexthop_group *nh_grp)
2875 {
2876 	if (mlxsw_sp_nexthop_group_type(nh_grp) == AF_INET6 &&
2877 	    !nh_grp->gateway)
2878 		return 0;
2879 
2880 	return rhashtable_insert_fast(&mlxsw_sp->router->nexthop_group_ht,
2881 				      &nh_grp->ht_node,
2882 				      mlxsw_sp_nexthop_group_ht_params);
2883 }
2884 
2885 static void mlxsw_sp_nexthop_group_remove(struct mlxsw_sp *mlxsw_sp,
2886 					  struct mlxsw_sp_nexthop_group *nh_grp)
2887 {
2888 	if (mlxsw_sp_nexthop_group_type(nh_grp) == AF_INET6 &&
2889 	    !nh_grp->gateway)
2890 		return;
2891 
2892 	rhashtable_remove_fast(&mlxsw_sp->router->nexthop_group_ht,
2893 			       &nh_grp->ht_node,
2894 			       mlxsw_sp_nexthop_group_ht_params);
2895 }
2896 
2897 static struct mlxsw_sp_nexthop_group *
2898 mlxsw_sp_nexthop4_group_lookup(struct mlxsw_sp *mlxsw_sp,
2899 			       struct fib_info *fi)
2900 {
2901 	struct mlxsw_sp_nexthop_group_cmp_arg cmp_arg;
2902 
2903 	cmp_arg.proto = MLXSW_SP_L3_PROTO_IPV4;
2904 	cmp_arg.fi = fi;
2905 	return rhashtable_lookup_fast(&mlxsw_sp->router->nexthop_group_ht,
2906 				      &cmp_arg,
2907 				      mlxsw_sp_nexthop_group_ht_params);
2908 }
2909 
2910 static struct mlxsw_sp_nexthop_group *
2911 mlxsw_sp_nexthop6_group_lookup(struct mlxsw_sp *mlxsw_sp,
2912 			       struct mlxsw_sp_fib6_entry *fib6_entry)
2913 {
2914 	struct mlxsw_sp_nexthop_group_cmp_arg cmp_arg;
2915 
2916 	cmp_arg.proto = MLXSW_SP_L3_PROTO_IPV6;
2917 	cmp_arg.fib6_entry = fib6_entry;
2918 	return rhashtable_lookup_fast(&mlxsw_sp->router->nexthop_group_ht,
2919 				      &cmp_arg,
2920 				      mlxsw_sp_nexthop_group_ht_params);
2921 }
2922 
2923 static const struct rhashtable_params mlxsw_sp_nexthop_ht_params = {
2924 	.key_offset = offsetof(struct mlxsw_sp_nexthop, key),
2925 	.head_offset = offsetof(struct mlxsw_sp_nexthop, ht_node),
2926 	.key_len = sizeof(struct mlxsw_sp_nexthop_key),
2927 };
2928 
2929 static int mlxsw_sp_nexthop_insert(struct mlxsw_sp *mlxsw_sp,
2930 				   struct mlxsw_sp_nexthop *nh)
2931 {
2932 	return rhashtable_insert_fast(&mlxsw_sp->router->nexthop_ht,
2933 				      &nh->ht_node, mlxsw_sp_nexthop_ht_params);
2934 }
2935 
2936 static void mlxsw_sp_nexthop_remove(struct mlxsw_sp *mlxsw_sp,
2937 				    struct mlxsw_sp_nexthop *nh)
2938 {
2939 	rhashtable_remove_fast(&mlxsw_sp->router->nexthop_ht, &nh->ht_node,
2940 			       mlxsw_sp_nexthop_ht_params);
2941 }
2942 
2943 static struct mlxsw_sp_nexthop *
2944 mlxsw_sp_nexthop_lookup(struct mlxsw_sp *mlxsw_sp,
2945 			struct mlxsw_sp_nexthop_key key)
2946 {
2947 	return rhashtable_lookup_fast(&mlxsw_sp->router->nexthop_ht, &key,
2948 				      mlxsw_sp_nexthop_ht_params);
2949 }
2950 
2951 static int mlxsw_sp_adj_index_mass_update_vr(struct mlxsw_sp *mlxsw_sp,
2952 					     const struct mlxsw_sp_fib *fib,
2953 					     u32 adj_index, u16 ecmp_size,
2954 					     u32 new_adj_index,
2955 					     u16 new_ecmp_size)
2956 {
2957 	char raleu_pl[MLXSW_REG_RALEU_LEN];
2958 
2959 	mlxsw_reg_raleu_pack(raleu_pl,
2960 			     (enum mlxsw_reg_ralxx_protocol) fib->proto,
2961 			     fib->vr->id, adj_index, ecmp_size, new_adj_index,
2962 			     new_ecmp_size);
2963 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(raleu), raleu_pl);
2964 }
2965 
2966 static int mlxsw_sp_adj_index_mass_update(struct mlxsw_sp *mlxsw_sp,
2967 					  struct mlxsw_sp_nexthop_group *nh_grp,
2968 					  u32 old_adj_index, u16 old_ecmp_size)
2969 {
2970 	struct mlxsw_sp_fib_entry *fib_entry;
2971 	struct mlxsw_sp_fib *fib = NULL;
2972 	int err;
2973 
2974 	list_for_each_entry(fib_entry, &nh_grp->fib_list, nexthop_group_node) {
2975 		if (fib == fib_entry->fib_node->fib)
2976 			continue;
2977 		fib = fib_entry->fib_node->fib;
2978 		err = mlxsw_sp_adj_index_mass_update_vr(mlxsw_sp, fib,
2979 							old_adj_index,
2980 							old_ecmp_size,
2981 							nh_grp->adj_index,
2982 							nh_grp->ecmp_size);
2983 		if (err)
2984 			return err;
2985 	}
2986 	return 0;
2987 }
2988 
2989 static int __mlxsw_sp_nexthop_update(struct mlxsw_sp *mlxsw_sp, u32 adj_index,
2990 				     struct mlxsw_sp_nexthop *nh)
2991 {
2992 	struct mlxsw_sp_neigh_entry *neigh_entry = nh->neigh_entry;
2993 	char ratr_pl[MLXSW_REG_RATR_LEN];
2994 
2995 	mlxsw_reg_ratr_pack(ratr_pl, MLXSW_REG_RATR_OP_WRITE_WRITE_ENTRY,
2996 			    true, MLXSW_REG_RATR_TYPE_ETHERNET,
2997 			    adj_index, neigh_entry->rif);
2998 	mlxsw_reg_ratr_eth_entry_pack(ratr_pl, neigh_entry->ha);
2999 	if (nh->counter_valid)
3000 		mlxsw_reg_ratr_counter_pack(ratr_pl, nh->counter_index, true);
3001 	else
3002 		mlxsw_reg_ratr_counter_pack(ratr_pl, 0, false);
3003 
3004 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ratr), ratr_pl);
3005 }
3006 
3007 int mlxsw_sp_nexthop_update(struct mlxsw_sp *mlxsw_sp, u32 adj_index,
3008 			    struct mlxsw_sp_nexthop *nh)
3009 {
3010 	int i;
3011 
3012 	for (i = 0; i < nh->num_adj_entries; i++) {
3013 		int err;
3014 
3015 		err = __mlxsw_sp_nexthop_update(mlxsw_sp, adj_index + i, nh);
3016 		if (err)
3017 			return err;
3018 	}
3019 
3020 	return 0;
3021 }
3022 
3023 static int __mlxsw_sp_nexthop_ipip_update(struct mlxsw_sp *mlxsw_sp,
3024 					  u32 adj_index,
3025 					  struct mlxsw_sp_nexthop *nh)
3026 {
3027 	const struct mlxsw_sp_ipip_ops *ipip_ops;
3028 
3029 	ipip_ops = mlxsw_sp->router->ipip_ops_arr[nh->ipip_entry->ipipt];
3030 	return ipip_ops->nexthop_update(mlxsw_sp, adj_index, nh->ipip_entry);
3031 }
3032 
3033 static int mlxsw_sp_nexthop_ipip_update(struct mlxsw_sp *mlxsw_sp,
3034 					u32 adj_index,
3035 					struct mlxsw_sp_nexthop *nh)
3036 {
3037 	int i;
3038 
3039 	for (i = 0; i < nh->num_adj_entries; i++) {
3040 		int err;
3041 
3042 		err = __mlxsw_sp_nexthop_ipip_update(mlxsw_sp, adj_index + i,
3043 						     nh);
3044 		if (err)
3045 			return err;
3046 	}
3047 
3048 	return 0;
3049 }
3050 
3051 static int
3052 mlxsw_sp_nexthop_group_update(struct mlxsw_sp *mlxsw_sp,
3053 			      struct mlxsw_sp_nexthop_group *nh_grp,
3054 			      bool reallocate)
3055 {
3056 	u32 adj_index = nh_grp->adj_index; /* base */
3057 	struct mlxsw_sp_nexthop *nh;
3058 	int i;
3059 	int err;
3060 
3061 	for (i = 0; i < nh_grp->count; i++) {
3062 		nh = &nh_grp->nexthops[i];
3063 
3064 		if (!nh->should_offload) {
3065 			nh->offloaded = 0;
3066 			continue;
3067 		}
3068 
3069 		if (nh->update || reallocate) {
3070 			switch (nh->type) {
3071 			case MLXSW_SP_NEXTHOP_TYPE_ETH:
3072 				err = mlxsw_sp_nexthop_update
3073 					    (mlxsw_sp, adj_index, nh);
3074 				break;
3075 			case MLXSW_SP_NEXTHOP_TYPE_IPIP:
3076 				err = mlxsw_sp_nexthop_ipip_update
3077 					    (mlxsw_sp, adj_index, nh);
3078 				break;
3079 			}
3080 			if (err)
3081 				return err;
3082 			nh->update = 0;
3083 			nh->offloaded = 1;
3084 		}
3085 		adj_index += nh->num_adj_entries;
3086 	}
3087 	return 0;
3088 }
3089 
3090 static bool
3091 mlxsw_sp_fib_node_entry_is_first(const struct mlxsw_sp_fib_node *fib_node,
3092 				 const struct mlxsw_sp_fib_entry *fib_entry);
3093 
3094 static int
3095 mlxsw_sp_nexthop_fib_entries_update(struct mlxsw_sp *mlxsw_sp,
3096 				    struct mlxsw_sp_nexthop_group *nh_grp)
3097 {
3098 	struct mlxsw_sp_fib_entry *fib_entry;
3099 	int err;
3100 
3101 	list_for_each_entry(fib_entry, &nh_grp->fib_list, nexthop_group_node) {
3102 		if (!mlxsw_sp_fib_node_entry_is_first(fib_entry->fib_node,
3103 						      fib_entry))
3104 			continue;
3105 		err = mlxsw_sp_fib_entry_update(mlxsw_sp, fib_entry);
3106 		if (err)
3107 			return err;
3108 	}
3109 	return 0;
3110 }
3111 
3112 static void
3113 mlxsw_sp_fib_entry_offload_refresh(struct mlxsw_sp_fib_entry *fib_entry,
3114 				   enum mlxsw_reg_ralue_op op, int err);
3115 
3116 static void
3117 mlxsw_sp_nexthop_fib_entries_refresh(struct mlxsw_sp_nexthop_group *nh_grp)
3118 {
3119 	enum mlxsw_reg_ralue_op op = MLXSW_REG_RALUE_OP_WRITE_WRITE;
3120 	struct mlxsw_sp_fib_entry *fib_entry;
3121 
3122 	list_for_each_entry(fib_entry, &nh_grp->fib_list, nexthop_group_node) {
3123 		if (!mlxsw_sp_fib_node_entry_is_first(fib_entry->fib_node,
3124 						      fib_entry))
3125 			continue;
3126 		mlxsw_sp_fib_entry_offload_refresh(fib_entry, op, 0);
3127 	}
3128 }
3129 
3130 static void mlxsw_sp_adj_grp_size_round_up(u16 *p_adj_grp_size)
3131 {
3132 	/* Valid sizes for an adjacency group are:
3133 	 * 1-64, 512, 1024, 2048 and 4096.
3134 	 */
3135 	if (*p_adj_grp_size <= 64)
3136 		return;
3137 	else if (*p_adj_grp_size <= 512)
3138 		*p_adj_grp_size = 512;
3139 	else if (*p_adj_grp_size <= 1024)
3140 		*p_adj_grp_size = 1024;
3141 	else if (*p_adj_grp_size <= 2048)
3142 		*p_adj_grp_size = 2048;
3143 	else
3144 		*p_adj_grp_size = 4096;
3145 }
3146 
3147 static void mlxsw_sp_adj_grp_size_round_down(u16 *p_adj_grp_size,
3148 					     unsigned int alloc_size)
3149 {
3150 	if (alloc_size >= 4096)
3151 		*p_adj_grp_size = 4096;
3152 	else if (alloc_size >= 2048)
3153 		*p_adj_grp_size = 2048;
3154 	else if (alloc_size >= 1024)
3155 		*p_adj_grp_size = 1024;
3156 	else if (alloc_size >= 512)
3157 		*p_adj_grp_size = 512;
3158 }
3159 
3160 static int mlxsw_sp_fix_adj_grp_size(struct mlxsw_sp *mlxsw_sp,
3161 				     u16 *p_adj_grp_size)
3162 {
3163 	unsigned int alloc_size;
3164 	int err;
3165 
3166 	/* Round up the requested group size to the next size supported
3167 	 * by the device and make sure the request can be satisfied.
3168 	 */
3169 	mlxsw_sp_adj_grp_size_round_up(p_adj_grp_size);
3170 	err = mlxsw_sp_kvdl_alloc_count_query(mlxsw_sp,
3171 					      MLXSW_SP_KVDL_ENTRY_TYPE_ADJ,
3172 					      *p_adj_grp_size, &alloc_size);
3173 	if (err)
3174 		return err;
3175 	/* It is possible the allocation results in more allocated
3176 	 * entries than requested. Try to use as much of them as
3177 	 * possible.
3178 	 */
3179 	mlxsw_sp_adj_grp_size_round_down(p_adj_grp_size, alloc_size);
3180 
3181 	return 0;
3182 }
3183 
3184 static void
3185 mlxsw_sp_nexthop_group_normalize(struct mlxsw_sp_nexthop_group *nh_grp)
3186 {
3187 	int i, g = 0, sum_norm_weight = 0;
3188 	struct mlxsw_sp_nexthop *nh;
3189 
3190 	for (i = 0; i < nh_grp->count; i++) {
3191 		nh = &nh_grp->nexthops[i];
3192 
3193 		if (!nh->should_offload)
3194 			continue;
3195 		if (g > 0)
3196 			g = gcd(nh->nh_weight, g);
3197 		else
3198 			g = nh->nh_weight;
3199 	}
3200 
3201 	for (i = 0; i < nh_grp->count; i++) {
3202 		nh = &nh_grp->nexthops[i];
3203 
3204 		if (!nh->should_offload)
3205 			continue;
3206 		nh->norm_nh_weight = nh->nh_weight / g;
3207 		sum_norm_weight += nh->norm_nh_weight;
3208 	}
3209 
3210 	nh_grp->sum_norm_weight = sum_norm_weight;
3211 }
3212 
3213 static void
3214 mlxsw_sp_nexthop_group_rebalance(struct mlxsw_sp_nexthop_group *nh_grp)
3215 {
3216 	int total = nh_grp->sum_norm_weight;
3217 	u16 ecmp_size = nh_grp->ecmp_size;
3218 	int i, weight = 0, lower_bound = 0;
3219 
3220 	for (i = 0; i < nh_grp->count; i++) {
3221 		struct mlxsw_sp_nexthop *nh = &nh_grp->nexthops[i];
3222 		int upper_bound;
3223 
3224 		if (!nh->should_offload)
3225 			continue;
3226 		weight += nh->norm_nh_weight;
3227 		upper_bound = DIV_ROUND_CLOSEST(ecmp_size * weight, total);
3228 		nh->num_adj_entries = upper_bound - lower_bound;
3229 		lower_bound = upper_bound;
3230 	}
3231 }
3232 
3233 static void
3234 mlxsw_sp_nexthop_group_refresh(struct mlxsw_sp *mlxsw_sp,
3235 			       struct mlxsw_sp_nexthop_group *nh_grp)
3236 {
3237 	u16 ecmp_size, old_ecmp_size;
3238 	struct mlxsw_sp_nexthop *nh;
3239 	bool offload_change = false;
3240 	u32 adj_index;
3241 	bool old_adj_index_valid;
3242 	u32 old_adj_index;
3243 	int i;
3244 	int err;
3245 
3246 	if (!nh_grp->gateway) {
3247 		mlxsw_sp_nexthop_fib_entries_update(mlxsw_sp, nh_grp);
3248 		return;
3249 	}
3250 
3251 	for (i = 0; i < nh_grp->count; i++) {
3252 		nh = &nh_grp->nexthops[i];
3253 
3254 		if (nh->should_offload != nh->offloaded) {
3255 			offload_change = true;
3256 			if (nh->should_offload)
3257 				nh->update = 1;
3258 		}
3259 	}
3260 	if (!offload_change) {
3261 		/* Nothing was added or removed, so no need to reallocate. Just
3262 		 * update MAC on existing adjacency indexes.
3263 		 */
3264 		err = mlxsw_sp_nexthop_group_update(mlxsw_sp, nh_grp, false);
3265 		if (err) {
3266 			dev_warn(mlxsw_sp->bus_info->dev, "Failed to update neigh MAC in adjacency table.\n");
3267 			goto set_trap;
3268 		}
3269 		return;
3270 	}
3271 	mlxsw_sp_nexthop_group_normalize(nh_grp);
3272 	if (!nh_grp->sum_norm_weight)
3273 		/* No neigh of this group is connected so we just set
3274 		 * the trap and let everthing flow through kernel.
3275 		 */
3276 		goto set_trap;
3277 
3278 	ecmp_size = nh_grp->sum_norm_weight;
3279 	err = mlxsw_sp_fix_adj_grp_size(mlxsw_sp, &ecmp_size);
3280 	if (err)
3281 		/* No valid allocation size available. */
3282 		goto set_trap;
3283 
3284 	err = mlxsw_sp_kvdl_alloc(mlxsw_sp, MLXSW_SP_KVDL_ENTRY_TYPE_ADJ,
3285 				  ecmp_size, &adj_index);
3286 	if (err) {
3287 		/* We ran out of KVD linear space, just set the
3288 		 * trap and let everything flow through kernel.
3289 		 */
3290 		dev_warn(mlxsw_sp->bus_info->dev, "Failed to allocate KVD linear area for nexthop group.\n");
3291 		goto set_trap;
3292 	}
3293 	old_adj_index_valid = nh_grp->adj_index_valid;
3294 	old_adj_index = nh_grp->adj_index;
3295 	old_ecmp_size = nh_grp->ecmp_size;
3296 	nh_grp->adj_index_valid = 1;
3297 	nh_grp->adj_index = adj_index;
3298 	nh_grp->ecmp_size = ecmp_size;
3299 	mlxsw_sp_nexthop_group_rebalance(nh_grp);
3300 	err = mlxsw_sp_nexthop_group_update(mlxsw_sp, nh_grp, true);
3301 	if (err) {
3302 		dev_warn(mlxsw_sp->bus_info->dev, "Failed to update neigh MAC in adjacency table.\n");
3303 		goto set_trap;
3304 	}
3305 
3306 	if (!old_adj_index_valid) {
3307 		/* The trap was set for fib entries, so we have to call
3308 		 * fib entry update to unset it and use adjacency index.
3309 		 */
3310 		err = mlxsw_sp_nexthop_fib_entries_update(mlxsw_sp, nh_grp);
3311 		if (err) {
3312 			dev_warn(mlxsw_sp->bus_info->dev, "Failed to add adjacency index to fib entries.\n");
3313 			goto set_trap;
3314 		}
3315 		return;
3316 	}
3317 
3318 	err = mlxsw_sp_adj_index_mass_update(mlxsw_sp, nh_grp,
3319 					     old_adj_index, old_ecmp_size);
3320 	mlxsw_sp_kvdl_free(mlxsw_sp, MLXSW_SP_KVDL_ENTRY_TYPE_ADJ,
3321 			   old_ecmp_size, old_adj_index);
3322 	if (err) {
3323 		dev_warn(mlxsw_sp->bus_info->dev, "Failed to mass-update adjacency index for nexthop group.\n");
3324 		goto set_trap;
3325 	}
3326 
3327 	/* Offload state within the group changed, so update the flags. */
3328 	mlxsw_sp_nexthop_fib_entries_refresh(nh_grp);
3329 
3330 	return;
3331 
3332 set_trap:
3333 	old_adj_index_valid = nh_grp->adj_index_valid;
3334 	nh_grp->adj_index_valid = 0;
3335 	for (i = 0; i < nh_grp->count; i++) {
3336 		nh = &nh_grp->nexthops[i];
3337 		nh->offloaded = 0;
3338 	}
3339 	err = mlxsw_sp_nexthop_fib_entries_update(mlxsw_sp, nh_grp);
3340 	if (err)
3341 		dev_warn(mlxsw_sp->bus_info->dev, "Failed to set traps for fib entries.\n");
3342 	if (old_adj_index_valid)
3343 		mlxsw_sp_kvdl_free(mlxsw_sp, MLXSW_SP_KVDL_ENTRY_TYPE_ADJ,
3344 				   nh_grp->ecmp_size, nh_grp->adj_index);
3345 }
3346 
3347 static void __mlxsw_sp_nexthop_neigh_update(struct mlxsw_sp_nexthop *nh,
3348 					    bool removing)
3349 {
3350 	if (!removing)
3351 		nh->should_offload = 1;
3352 	else
3353 		nh->should_offload = 0;
3354 	nh->update = 1;
3355 }
3356 
3357 static void
3358 mlxsw_sp_nexthop_neigh_update(struct mlxsw_sp *mlxsw_sp,
3359 			      struct mlxsw_sp_neigh_entry *neigh_entry,
3360 			      bool removing)
3361 {
3362 	struct mlxsw_sp_nexthop *nh;
3363 
3364 	list_for_each_entry(nh, &neigh_entry->nexthop_list,
3365 			    neigh_list_node) {
3366 		__mlxsw_sp_nexthop_neigh_update(nh, removing);
3367 		mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh->nh_grp);
3368 	}
3369 }
3370 
3371 static void mlxsw_sp_nexthop_rif_init(struct mlxsw_sp_nexthop *nh,
3372 				      struct mlxsw_sp_rif *rif)
3373 {
3374 	if (nh->rif)
3375 		return;
3376 
3377 	nh->rif = rif;
3378 	list_add(&nh->rif_list_node, &rif->nexthop_list);
3379 }
3380 
3381 static void mlxsw_sp_nexthop_rif_fini(struct mlxsw_sp_nexthop *nh)
3382 {
3383 	if (!nh->rif)
3384 		return;
3385 
3386 	list_del(&nh->rif_list_node);
3387 	nh->rif = NULL;
3388 }
3389 
3390 static int mlxsw_sp_nexthop_neigh_init(struct mlxsw_sp *mlxsw_sp,
3391 				       struct mlxsw_sp_nexthop *nh)
3392 {
3393 	struct mlxsw_sp_neigh_entry *neigh_entry;
3394 	struct neighbour *n;
3395 	u8 nud_state, dead;
3396 	int err;
3397 
3398 	if (!nh->nh_grp->gateway || nh->neigh_entry)
3399 		return 0;
3400 
3401 	/* Take a reference of neigh here ensuring that neigh would
3402 	 * not be destructed before the nexthop entry is finished.
3403 	 * The reference is taken either in neigh_lookup() or
3404 	 * in neigh_create() in case n is not found.
3405 	 */
3406 	n = neigh_lookup(nh->nh_grp->neigh_tbl, &nh->gw_addr, nh->rif->dev);
3407 	if (!n) {
3408 		n = neigh_create(nh->nh_grp->neigh_tbl, &nh->gw_addr,
3409 				 nh->rif->dev);
3410 		if (IS_ERR(n))
3411 			return PTR_ERR(n);
3412 		neigh_event_send(n, NULL);
3413 	}
3414 	neigh_entry = mlxsw_sp_neigh_entry_lookup(mlxsw_sp, n);
3415 	if (!neigh_entry) {
3416 		neigh_entry = mlxsw_sp_neigh_entry_create(mlxsw_sp, n);
3417 		if (IS_ERR(neigh_entry)) {
3418 			err = -EINVAL;
3419 			goto err_neigh_entry_create;
3420 		}
3421 	}
3422 
3423 	/* If that is the first nexthop connected to that neigh, add to
3424 	 * nexthop_neighs_list
3425 	 */
3426 	if (list_empty(&neigh_entry->nexthop_list))
3427 		list_add_tail(&neigh_entry->nexthop_neighs_list_node,
3428 			      &mlxsw_sp->router->nexthop_neighs_list);
3429 
3430 	nh->neigh_entry = neigh_entry;
3431 	list_add_tail(&nh->neigh_list_node, &neigh_entry->nexthop_list);
3432 	read_lock_bh(&n->lock);
3433 	nud_state = n->nud_state;
3434 	dead = n->dead;
3435 	read_unlock_bh(&n->lock);
3436 	__mlxsw_sp_nexthop_neigh_update(nh, !(nud_state & NUD_VALID && !dead));
3437 
3438 	return 0;
3439 
3440 err_neigh_entry_create:
3441 	neigh_release(n);
3442 	return err;
3443 }
3444 
3445 static void mlxsw_sp_nexthop_neigh_fini(struct mlxsw_sp *mlxsw_sp,
3446 					struct mlxsw_sp_nexthop *nh)
3447 {
3448 	struct mlxsw_sp_neigh_entry *neigh_entry = nh->neigh_entry;
3449 	struct neighbour *n;
3450 
3451 	if (!neigh_entry)
3452 		return;
3453 	n = neigh_entry->key.n;
3454 
3455 	__mlxsw_sp_nexthop_neigh_update(nh, true);
3456 	list_del(&nh->neigh_list_node);
3457 	nh->neigh_entry = NULL;
3458 
3459 	/* If that is the last nexthop connected to that neigh, remove from
3460 	 * nexthop_neighs_list
3461 	 */
3462 	if (list_empty(&neigh_entry->nexthop_list))
3463 		list_del(&neigh_entry->nexthop_neighs_list_node);
3464 
3465 	if (!neigh_entry->connected && list_empty(&neigh_entry->nexthop_list))
3466 		mlxsw_sp_neigh_entry_destroy(mlxsw_sp, neigh_entry);
3467 
3468 	neigh_release(n);
3469 }
3470 
3471 static bool mlxsw_sp_ipip_netdev_ul_up(struct net_device *ol_dev)
3472 {
3473 	struct net_device *ul_dev = __mlxsw_sp_ipip_netdev_ul_dev_get(ol_dev);
3474 
3475 	return ul_dev ? (ul_dev->flags & IFF_UP) : true;
3476 }
3477 
3478 static void mlxsw_sp_nexthop_ipip_init(struct mlxsw_sp *mlxsw_sp,
3479 				       struct mlxsw_sp_nexthop *nh,
3480 				       struct mlxsw_sp_ipip_entry *ipip_entry)
3481 {
3482 	bool removing;
3483 
3484 	if (!nh->nh_grp->gateway || nh->ipip_entry)
3485 		return;
3486 
3487 	nh->ipip_entry = ipip_entry;
3488 	removing = !mlxsw_sp_ipip_netdev_ul_up(ipip_entry->ol_dev);
3489 	__mlxsw_sp_nexthop_neigh_update(nh, removing);
3490 	mlxsw_sp_nexthop_rif_init(nh, &ipip_entry->ol_lb->common);
3491 }
3492 
3493 static void mlxsw_sp_nexthop_ipip_fini(struct mlxsw_sp *mlxsw_sp,
3494 				       struct mlxsw_sp_nexthop *nh)
3495 {
3496 	struct mlxsw_sp_ipip_entry *ipip_entry = nh->ipip_entry;
3497 
3498 	if (!ipip_entry)
3499 		return;
3500 
3501 	__mlxsw_sp_nexthop_neigh_update(nh, true);
3502 	nh->ipip_entry = NULL;
3503 }
3504 
3505 static bool mlxsw_sp_nexthop4_ipip_type(const struct mlxsw_sp *mlxsw_sp,
3506 					const struct fib_nh *fib_nh,
3507 					enum mlxsw_sp_ipip_type *p_ipipt)
3508 {
3509 	struct net_device *dev = fib_nh->nh_dev;
3510 
3511 	return dev &&
3512 	       fib_nh->nh_parent->fib_type == RTN_UNICAST &&
3513 	       mlxsw_sp_netdev_ipip_type(mlxsw_sp, dev, p_ipipt);
3514 }
3515 
3516 static void mlxsw_sp_nexthop_type_fini(struct mlxsw_sp *mlxsw_sp,
3517 				       struct mlxsw_sp_nexthop *nh)
3518 {
3519 	switch (nh->type) {
3520 	case MLXSW_SP_NEXTHOP_TYPE_ETH:
3521 		mlxsw_sp_nexthop_neigh_fini(mlxsw_sp, nh);
3522 		mlxsw_sp_nexthop_rif_fini(nh);
3523 		break;
3524 	case MLXSW_SP_NEXTHOP_TYPE_IPIP:
3525 		mlxsw_sp_nexthop_rif_fini(nh);
3526 		mlxsw_sp_nexthop_ipip_fini(mlxsw_sp, nh);
3527 		break;
3528 	}
3529 }
3530 
3531 static int mlxsw_sp_nexthop4_type_init(struct mlxsw_sp *mlxsw_sp,
3532 				       struct mlxsw_sp_nexthop *nh,
3533 				       struct fib_nh *fib_nh)
3534 {
3535 	const struct mlxsw_sp_ipip_ops *ipip_ops;
3536 	struct net_device *dev = fib_nh->nh_dev;
3537 	struct mlxsw_sp_ipip_entry *ipip_entry;
3538 	struct mlxsw_sp_rif *rif;
3539 	int err;
3540 
3541 	ipip_entry = mlxsw_sp_ipip_entry_find_by_ol_dev(mlxsw_sp, dev);
3542 	if (ipip_entry) {
3543 		ipip_ops = mlxsw_sp->router->ipip_ops_arr[ipip_entry->ipipt];
3544 		if (ipip_ops->can_offload(mlxsw_sp, dev,
3545 					  MLXSW_SP_L3_PROTO_IPV4)) {
3546 			nh->type = MLXSW_SP_NEXTHOP_TYPE_IPIP;
3547 			mlxsw_sp_nexthop_ipip_init(mlxsw_sp, nh, ipip_entry);
3548 			return 0;
3549 		}
3550 	}
3551 
3552 	nh->type = MLXSW_SP_NEXTHOP_TYPE_ETH;
3553 	rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev);
3554 	if (!rif)
3555 		return 0;
3556 
3557 	mlxsw_sp_nexthop_rif_init(nh, rif);
3558 	err = mlxsw_sp_nexthop_neigh_init(mlxsw_sp, nh);
3559 	if (err)
3560 		goto err_neigh_init;
3561 
3562 	return 0;
3563 
3564 err_neigh_init:
3565 	mlxsw_sp_nexthop_rif_fini(nh);
3566 	return err;
3567 }
3568 
3569 static void mlxsw_sp_nexthop4_type_fini(struct mlxsw_sp *mlxsw_sp,
3570 					struct mlxsw_sp_nexthop *nh)
3571 {
3572 	mlxsw_sp_nexthop_type_fini(mlxsw_sp, nh);
3573 }
3574 
3575 static int mlxsw_sp_nexthop4_init(struct mlxsw_sp *mlxsw_sp,
3576 				  struct mlxsw_sp_nexthop_group *nh_grp,
3577 				  struct mlxsw_sp_nexthop *nh,
3578 				  struct fib_nh *fib_nh)
3579 {
3580 	struct net_device *dev = fib_nh->nh_dev;
3581 	struct in_device *in_dev;
3582 	int err;
3583 
3584 	nh->nh_grp = nh_grp;
3585 	nh->key.fib_nh = fib_nh;
3586 #ifdef CONFIG_IP_ROUTE_MULTIPATH
3587 	nh->nh_weight = fib_nh->nh_weight;
3588 #else
3589 	nh->nh_weight = 1;
3590 #endif
3591 	memcpy(&nh->gw_addr, &fib_nh->nh_gw, sizeof(fib_nh->nh_gw));
3592 	err = mlxsw_sp_nexthop_insert(mlxsw_sp, nh);
3593 	if (err)
3594 		return err;
3595 
3596 	mlxsw_sp_nexthop_counter_alloc(mlxsw_sp, nh);
3597 	list_add_tail(&nh->router_list_node, &mlxsw_sp->router->nexthop_list);
3598 
3599 	if (!dev)
3600 		return 0;
3601 
3602 	in_dev = __in_dev_get_rtnl(dev);
3603 	if (in_dev && IN_DEV_IGNORE_ROUTES_WITH_LINKDOWN(in_dev) &&
3604 	    fib_nh->nh_flags & RTNH_F_LINKDOWN)
3605 		return 0;
3606 
3607 	err = mlxsw_sp_nexthop4_type_init(mlxsw_sp, nh, fib_nh);
3608 	if (err)
3609 		goto err_nexthop_neigh_init;
3610 
3611 	return 0;
3612 
3613 err_nexthop_neigh_init:
3614 	mlxsw_sp_nexthop_remove(mlxsw_sp, nh);
3615 	return err;
3616 }
3617 
3618 static void mlxsw_sp_nexthop4_fini(struct mlxsw_sp *mlxsw_sp,
3619 				   struct mlxsw_sp_nexthop *nh)
3620 {
3621 	mlxsw_sp_nexthop4_type_fini(mlxsw_sp, nh);
3622 	list_del(&nh->router_list_node);
3623 	mlxsw_sp_nexthop_counter_free(mlxsw_sp, nh);
3624 	mlxsw_sp_nexthop_remove(mlxsw_sp, nh);
3625 }
3626 
3627 static void mlxsw_sp_nexthop4_event(struct mlxsw_sp *mlxsw_sp,
3628 				    unsigned long event, struct fib_nh *fib_nh)
3629 {
3630 	struct mlxsw_sp_nexthop_key key;
3631 	struct mlxsw_sp_nexthop *nh;
3632 
3633 	if (mlxsw_sp->router->aborted)
3634 		return;
3635 
3636 	key.fib_nh = fib_nh;
3637 	nh = mlxsw_sp_nexthop_lookup(mlxsw_sp, key);
3638 	if (WARN_ON_ONCE(!nh))
3639 		return;
3640 
3641 	switch (event) {
3642 	case FIB_EVENT_NH_ADD:
3643 		mlxsw_sp_nexthop4_type_init(mlxsw_sp, nh, fib_nh);
3644 		break;
3645 	case FIB_EVENT_NH_DEL:
3646 		mlxsw_sp_nexthop4_type_fini(mlxsw_sp, nh);
3647 		break;
3648 	}
3649 
3650 	mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh->nh_grp);
3651 }
3652 
3653 static void mlxsw_sp_nexthop_rif_update(struct mlxsw_sp *mlxsw_sp,
3654 					struct mlxsw_sp_rif *rif)
3655 {
3656 	struct mlxsw_sp_nexthop *nh;
3657 	bool removing;
3658 
3659 	list_for_each_entry(nh, &rif->nexthop_list, rif_list_node) {
3660 		switch (nh->type) {
3661 		case MLXSW_SP_NEXTHOP_TYPE_ETH:
3662 			removing = false;
3663 			break;
3664 		case MLXSW_SP_NEXTHOP_TYPE_IPIP:
3665 			removing = !mlxsw_sp_ipip_netdev_ul_up(rif->dev);
3666 			break;
3667 		default:
3668 			WARN_ON(1);
3669 			continue;
3670 		}
3671 
3672 		__mlxsw_sp_nexthop_neigh_update(nh, removing);
3673 		mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh->nh_grp);
3674 	}
3675 }
3676 
3677 static void mlxsw_sp_nexthop_rif_migrate(struct mlxsw_sp *mlxsw_sp,
3678 					 struct mlxsw_sp_rif *old_rif,
3679 					 struct mlxsw_sp_rif *new_rif)
3680 {
3681 	struct mlxsw_sp_nexthop *nh;
3682 
3683 	list_splice_init(&old_rif->nexthop_list, &new_rif->nexthop_list);
3684 	list_for_each_entry(nh, &new_rif->nexthop_list, rif_list_node)
3685 		nh->rif = new_rif;
3686 	mlxsw_sp_nexthop_rif_update(mlxsw_sp, new_rif);
3687 }
3688 
3689 static void mlxsw_sp_nexthop_rif_gone_sync(struct mlxsw_sp *mlxsw_sp,
3690 					   struct mlxsw_sp_rif *rif)
3691 {
3692 	struct mlxsw_sp_nexthop *nh, *tmp;
3693 
3694 	list_for_each_entry_safe(nh, tmp, &rif->nexthop_list, rif_list_node) {
3695 		mlxsw_sp_nexthop_type_fini(mlxsw_sp, nh);
3696 		mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh->nh_grp);
3697 	}
3698 }
3699 
3700 static bool mlxsw_sp_fi_is_gateway(const struct mlxsw_sp *mlxsw_sp,
3701 				   const struct fib_info *fi)
3702 {
3703 	return fi->fib_nh->nh_scope == RT_SCOPE_LINK ||
3704 	       mlxsw_sp_nexthop4_ipip_type(mlxsw_sp, fi->fib_nh, NULL);
3705 }
3706 
3707 static struct mlxsw_sp_nexthop_group *
3708 mlxsw_sp_nexthop4_group_create(struct mlxsw_sp *mlxsw_sp, struct fib_info *fi)
3709 {
3710 	struct mlxsw_sp_nexthop_group *nh_grp;
3711 	struct mlxsw_sp_nexthop *nh;
3712 	struct fib_nh *fib_nh;
3713 	size_t alloc_size;
3714 	int i;
3715 	int err;
3716 
3717 	alloc_size = sizeof(*nh_grp) +
3718 		     fi->fib_nhs * sizeof(struct mlxsw_sp_nexthop);
3719 	nh_grp = kzalloc(alloc_size, GFP_KERNEL);
3720 	if (!nh_grp)
3721 		return ERR_PTR(-ENOMEM);
3722 	nh_grp->priv = fi;
3723 	INIT_LIST_HEAD(&nh_grp->fib_list);
3724 	nh_grp->neigh_tbl = &arp_tbl;
3725 
3726 	nh_grp->gateway = mlxsw_sp_fi_is_gateway(mlxsw_sp, fi);
3727 	nh_grp->count = fi->fib_nhs;
3728 	fib_info_hold(fi);
3729 	for (i = 0; i < nh_grp->count; i++) {
3730 		nh = &nh_grp->nexthops[i];
3731 		fib_nh = &fi->fib_nh[i];
3732 		err = mlxsw_sp_nexthop4_init(mlxsw_sp, nh_grp, nh, fib_nh);
3733 		if (err)
3734 			goto err_nexthop4_init;
3735 	}
3736 	err = mlxsw_sp_nexthop_group_insert(mlxsw_sp, nh_grp);
3737 	if (err)
3738 		goto err_nexthop_group_insert;
3739 	mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh_grp);
3740 	return nh_grp;
3741 
3742 err_nexthop_group_insert:
3743 err_nexthop4_init:
3744 	for (i--; i >= 0; i--) {
3745 		nh = &nh_grp->nexthops[i];
3746 		mlxsw_sp_nexthop4_fini(mlxsw_sp, nh);
3747 	}
3748 	fib_info_put(fi);
3749 	kfree(nh_grp);
3750 	return ERR_PTR(err);
3751 }
3752 
3753 static void
3754 mlxsw_sp_nexthop4_group_destroy(struct mlxsw_sp *mlxsw_sp,
3755 				struct mlxsw_sp_nexthop_group *nh_grp)
3756 {
3757 	struct mlxsw_sp_nexthop *nh;
3758 	int i;
3759 
3760 	mlxsw_sp_nexthop_group_remove(mlxsw_sp, nh_grp);
3761 	for (i = 0; i < nh_grp->count; i++) {
3762 		nh = &nh_grp->nexthops[i];
3763 		mlxsw_sp_nexthop4_fini(mlxsw_sp, nh);
3764 	}
3765 	mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh_grp);
3766 	WARN_ON_ONCE(nh_grp->adj_index_valid);
3767 	fib_info_put(mlxsw_sp_nexthop4_group_fi(nh_grp));
3768 	kfree(nh_grp);
3769 }
3770 
3771 static int mlxsw_sp_nexthop4_group_get(struct mlxsw_sp *mlxsw_sp,
3772 				       struct mlxsw_sp_fib_entry *fib_entry,
3773 				       struct fib_info *fi)
3774 {
3775 	struct mlxsw_sp_nexthop_group *nh_grp;
3776 
3777 	nh_grp = mlxsw_sp_nexthop4_group_lookup(mlxsw_sp, fi);
3778 	if (!nh_grp) {
3779 		nh_grp = mlxsw_sp_nexthop4_group_create(mlxsw_sp, fi);
3780 		if (IS_ERR(nh_grp))
3781 			return PTR_ERR(nh_grp);
3782 	}
3783 	list_add_tail(&fib_entry->nexthop_group_node, &nh_grp->fib_list);
3784 	fib_entry->nh_group = nh_grp;
3785 	return 0;
3786 }
3787 
3788 static void mlxsw_sp_nexthop4_group_put(struct mlxsw_sp *mlxsw_sp,
3789 					struct mlxsw_sp_fib_entry *fib_entry)
3790 {
3791 	struct mlxsw_sp_nexthop_group *nh_grp = fib_entry->nh_group;
3792 
3793 	list_del(&fib_entry->nexthop_group_node);
3794 	if (!list_empty(&nh_grp->fib_list))
3795 		return;
3796 	mlxsw_sp_nexthop4_group_destroy(mlxsw_sp, nh_grp);
3797 }
3798 
3799 static bool
3800 mlxsw_sp_fib4_entry_should_offload(const struct mlxsw_sp_fib_entry *fib_entry)
3801 {
3802 	struct mlxsw_sp_fib4_entry *fib4_entry;
3803 
3804 	fib4_entry = container_of(fib_entry, struct mlxsw_sp_fib4_entry,
3805 				  common);
3806 	return !fib4_entry->tos;
3807 }
3808 
3809 static bool
3810 mlxsw_sp_fib_entry_should_offload(const struct mlxsw_sp_fib_entry *fib_entry)
3811 {
3812 	struct mlxsw_sp_nexthop_group *nh_group = fib_entry->nh_group;
3813 
3814 	switch (fib_entry->fib_node->fib->proto) {
3815 	case MLXSW_SP_L3_PROTO_IPV4:
3816 		if (!mlxsw_sp_fib4_entry_should_offload(fib_entry))
3817 			return false;
3818 		break;
3819 	case MLXSW_SP_L3_PROTO_IPV6:
3820 		break;
3821 	}
3822 
3823 	switch (fib_entry->type) {
3824 	case MLXSW_SP_FIB_ENTRY_TYPE_REMOTE:
3825 		return !!nh_group->adj_index_valid;
3826 	case MLXSW_SP_FIB_ENTRY_TYPE_LOCAL:
3827 		return !!nh_group->nh_rif;
3828 	case MLXSW_SP_FIB_ENTRY_TYPE_IPIP_DECAP:
3829 		return true;
3830 	default:
3831 		return false;
3832 	}
3833 }
3834 
3835 static struct mlxsw_sp_nexthop *
3836 mlxsw_sp_rt6_nexthop(struct mlxsw_sp_nexthop_group *nh_grp,
3837 		     const struct mlxsw_sp_rt6 *mlxsw_sp_rt6)
3838 {
3839 	int i;
3840 
3841 	for (i = 0; i < nh_grp->count; i++) {
3842 		struct mlxsw_sp_nexthop *nh = &nh_grp->nexthops[i];
3843 		struct fib6_info *rt = mlxsw_sp_rt6->rt;
3844 
3845 		if (nh->rif && nh->rif->dev == rt->fib6_nh.nh_dev &&
3846 		    ipv6_addr_equal((const struct in6_addr *) &nh->gw_addr,
3847 				    &rt->fib6_nh.nh_gw))
3848 			return nh;
3849 		continue;
3850 	}
3851 
3852 	return NULL;
3853 }
3854 
3855 static void
3856 mlxsw_sp_fib4_entry_offload_set(struct mlxsw_sp_fib_entry *fib_entry)
3857 {
3858 	struct mlxsw_sp_nexthop_group *nh_grp = fib_entry->nh_group;
3859 	int i;
3860 
3861 	if (fib_entry->type == MLXSW_SP_FIB_ENTRY_TYPE_LOCAL ||
3862 	    fib_entry->type == MLXSW_SP_FIB_ENTRY_TYPE_IPIP_DECAP) {
3863 		nh_grp->nexthops->key.fib_nh->nh_flags |= RTNH_F_OFFLOAD;
3864 		return;
3865 	}
3866 
3867 	for (i = 0; i < nh_grp->count; i++) {
3868 		struct mlxsw_sp_nexthop *nh = &nh_grp->nexthops[i];
3869 
3870 		if (nh->offloaded)
3871 			nh->key.fib_nh->nh_flags |= RTNH_F_OFFLOAD;
3872 		else
3873 			nh->key.fib_nh->nh_flags &= ~RTNH_F_OFFLOAD;
3874 	}
3875 }
3876 
3877 static void
3878 mlxsw_sp_fib4_entry_offload_unset(struct mlxsw_sp_fib_entry *fib_entry)
3879 {
3880 	struct mlxsw_sp_nexthop_group *nh_grp = fib_entry->nh_group;
3881 	int i;
3882 
3883 	if (!list_is_singular(&nh_grp->fib_list))
3884 		return;
3885 
3886 	for (i = 0; i < nh_grp->count; i++) {
3887 		struct mlxsw_sp_nexthop *nh = &nh_grp->nexthops[i];
3888 
3889 		nh->key.fib_nh->nh_flags &= ~RTNH_F_OFFLOAD;
3890 	}
3891 }
3892 
3893 static void
3894 mlxsw_sp_fib6_entry_offload_set(struct mlxsw_sp_fib_entry *fib_entry)
3895 {
3896 	struct mlxsw_sp_fib6_entry *fib6_entry;
3897 	struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
3898 
3899 	fib6_entry = container_of(fib_entry, struct mlxsw_sp_fib6_entry,
3900 				  common);
3901 
3902 	if (fib_entry->type == MLXSW_SP_FIB_ENTRY_TYPE_LOCAL) {
3903 		list_first_entry(&fib6_entry->rt6_list, struct mlxsw_sp_rt6,
3904 				 list)->rt->fib6_nh.nh_flags |= RTNH_F_OFFLOAD;
3905 		return;
3906 	}
3907 
3908 	list_for_each_entry(mlxsw_sp_rt6, &fib6_entry->rt6_list, list) {
3909 		struct mlxsw_sp_nexthop_group *nh_grp = fib_entry->nh_group;
3910 		struct mlxsw_sp_nexthop *nh;
3911 
3912 		nh = mlxsw_sp_rt6_nexthop(nh_grp, mlxsw_sp_rt6);
3913 		if (nh && nh->offloaded)
3914 			mlxsw_sp_rt6->rt->fib6_nh.nh_flags |= RTNH_F_OFFLOAD;
3915 		else
3916 			mlxsw_sp_rt6->rt->fib6_nh.nh_flags &= ~RTNH_F_OFFLOAD;
3917 	}
3918 }
3919 
3920 static void
3921 mlxsw_sp_fib6_entry_offload_unset(struct mlxsw_sp_fib_entry *fib_entry)
3922 {
3923 	struct mlxsw_sp_fib6_entry *fib6_entry;
3924 	struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
3925 
3926 	fib6_entry = container_of(fib_entry, struct mlxsw_sp_fib6_entry,
3927 				  common);
3928 	list_for_each_entry(mlxsw_sp_rt6, &fib6_entry->rt6_list, list) {
3929 		struct fib6_info *rt = mlxsw_sp_rt6->rt;
3930 
3931 		rt->fib6_nh.nh_flags &= ~RTNH_F_OFFLOAD;
3932 	}
3933 }
3934 
3935 static void mlxsw_sp_fib_entry_offload_set(struct mlxsw_sp_fib_entry *fib_entry)
3936 {
3937 	switch (fib_entry->fib_node->fib->proto) {
3938 	case MLXSW_SP_L3_PROTO_IPV4:
3939 		mlxsw_sp_fib4_entry_offload_set(fib_entry);
3940 		break;
3941 	case MLXSW_SP_L3_PROTO_IPV6:
3942 		mlxsw_sp_fib6_entry_offload_set(fib_entry);
3943 		break;
3944 	}
3945 }
3946 
3947 static void
3948 mlxsw_sp_fib_entry_offload_unset(struct mlxsw_sp_fib_entry *fib_entry)
3949 {
3950 	switch (fib_entry->fib_node->fib->proto) {
3951 	case MLXSW_SP_L3_PROTO_IPV4:
3952 		mlxsw_sp_fib4_entry_offload_unset(fib_entry);
3953 		break;
3954 	case MLXSW_SP_L3_PROTO_IPV6:
3955 		mlxsw_sp_fib6_entry_offload_unset(fib_entry);
3956 		break;
3957 	}
3958 }
3959 
3960 static void
3961 mlxsw_sp_fib_entry_offload_refresh(struct mlxsw_sp_fib_entry *fib_entry,
3962 				   enum mlxsw_reg_ralue_op op, int err)
3963 {
3964 	switch (op) {
3965 	case MLXSW_REG_RALUE_OP_WRITE_DELETE:
3966 		return mlxsw_sp_fib_entry_offload_unset(fib_entry);
3967 	case MLXSW_REG_RALUE_OP_WRITE_WRITE:
3968 		if (err)
3969 			return;
3970 		if (mlxsw_sp_fib_entry_should_offload(fib_entry))
3971 			mlxsw_sp_fib_entry_offload_set(fib_entry);
3972 		else
3973 			mlxsw_sp_fib_entry_offload_unset(fib_entry);
3974 		return;
3975 	default:
3976 		return;
3977 	}
3978 }
3979 
3980 static void
3981 mlxsw_sp_fib_entry_ralue_pack(char *ralue_pl,
3982 			      const struct mlxsw_sp_fib_entry *fib_entry,
3983 			      enum mlxsw_reg_ralue_op op)
3984 {
3985 	struct mlxsw_sp_fib *fib = fib_entry->fib_node->fib;
3986 	enum mlxsw_reg_ralxx_protocol proto;
3987 	u32 *p_dip;
3988 
3989 	proto = (enum mlxsw_reg_ralxx_protocol) fib->proto;
3990 
3991 	switch (fib->proto) {
3992 	case MLXSW_SP_L3_PROTO_IPV4:
3993 		p_dip = (u32 *) fib_entry->fib_node->key.addr;
3994 		mlxsw_reg_ralue_pack4(ralue_pl, proto, op, fib->vr->id,
3995 				      fib_entry->fib_node->key.prefix_len,
3996 				      *p_dip);
3997 		break;
3998 	case MLXSW_SP_L3_PROTO_IPV6:
3999 		mlxsw_reg_ralue_pack6(ralue_pl, proto, op, fib->vr->id,
4000 				      fib_entry->fib_node->key.prefix_len,
4001 				      fib_entry->fib_node->key.addr);
4002 		break;
4003 	}
4004 }
4005 
4006 static int mlxsw_sp_fib_entry_op_remote(struct mlxsw_sp *mlxsw_sp,
4007 					struct mlxsw_sp_fib_entry *fib_entry,
4008 					enum mlxsw_reg_ralue_op op)
4009 {
4010 	char ralue_pl[MLXSW_REG_RALUE_LEN];
4011 	enum mlxsw_reg_ralue_trap_action trap_action;
4012 	u16 trap_id = 0;
4013 	u32 adjacency_index = 0;
4014 	u16 ecmp_size = 0;
4015 
4016 	/* In case the nexthop group adjacency index is valid, use it
4017 	 * with provided ECMP size. Otherwise, setup trap and pass
4018 	 * traffic to kernel.
4019 	 */
4020 	if (mlxsw_sp_fib_entry_should_offload(fib_entry)) {
4021 		trap_action = MLXSW_REG_RALUE_TRAP_ACTION_NOP;
4022 		adjacency_index = fib_entry->nh_group->adj_index;
4023 		ecmp_size = fib_entry->nh_group->ecmp_size;
4024 	} else {
4025 		trap_action = MLXSW_REG_RALUE_TRAP_ACTION_TRAP;
4026 		trap_id = MLXSW_TRAP_ID_RTR_INGRESS0;
4027 	}
4028 
4029 	mlxsw_sp_fib_entry_ralue_pack(ralue_pl, fib_entry, op);
4030 	mlxsw_reg_ralue_act_remote_pack(ralue_pl, trap_action, trap_id,
4031 					adjacency_index, ecmp_size);
4032 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralue), ralue_pl);
4033 }
4034 
4035 static int mlxsw_sp_fib_entry_op_local(struct mlxsw_sp *mlxsw_sp,
4036 				       struct mlxsw_sp_fib_entry *fib_entry,
4037 				       enum mlxsw_reg_ralue_op op)
4038 {
4039 	struct mlxsw_sp_rif *rif = fib_entry->nh_group->nh_rif;
4040 	enum mlxsw_reg_ralue_trap_action trap_action;
4041 	char ralue_pl[MLXSW_REG_RALUE_LEN];
4042 	u16 trap_id = 0;
4043 	u16 rif_index = 0;
4044 
4045 	if (mlxsw_sp_fib_entry_should_offload(fib_entry)) {
4046 		trap_action = MLXSW_REG_RALUE_TRAP_ACTION_NOP;
4047 		rif_index = rif->rif_index;
4048 	} else {
4049 		trap_action = MLXSW_REG_RALUE_TRAP_ACTION_TRAP;
4050 		trap_id = MLXSW_TRAP_ID_RTR_INGRESS0;
4051 	}
4052 
4053 	mlxsw_sp_fib_entry_ralue_pack(ralue_pl, fib_entry, op);
4054 	mlxsw_reg_ralue_act_local_pack(ralue_pl, trap_action, trap_id,
4055 				       rif_index);
4056 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralue), ralue_pl);
4057 }
4058 
4059 static int mlxsw_sp_fib_entry_op_trap(struct mlxsw_sp *mlxsw_sp,
4060 				      struct mlxsw_sp_fib_entry *fib_entry,
4061 				      enum mlxsw_reg_ralue_op op)
4062 {
4063 	char ralue_pl[MLXSW_REG_RALUE_LEN];
4064 
4065 	mlxsw_sp_fib_entry_ralue_pack(ralue_pl, fib_entry, op);
4066 	mlxsw_reg_ralue_act_ip2me_pack(ralue_pl);
4067 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralue), ralue_pl);
4068 }
4069 
4070 static int
4071 mlxsw_sp_fib_entry_op_ipip_decap(struct mlxsw_sp *mlxsw_sp,
4072 				 struct mlxsw_sp_fib_entry *fib_entry,
4073 				 enum mlxsw_reg_ralue_op op)
4074 {
4075 	struct mlxsw_sp_ipip_entry *ipip_entry = fib_entry->decap.ipip_entry;
4076 	const struct mlxsw_sp_ipip_ops *ipip_ops;
4077 
4078 	if (WARN_ON(!ipip_entry))
4079 		return -EINVAL;
4080 
4081 	ipip_ops = mlxsw_sp->router->ipip_ops_arr[ipip_entry->ipipt];
4082 	return ipip_ops->fib_entry_op(mlxsw_sp, ipip_entry, op,
4083 				      fib_entry->decap.tunnel_index);
4084 }
4085 
4086 static int __mlxsw_sp_fib_entry_op(struct mlxsw_sp *mlxsw_sp,
4087 				   struct mlxsw_sp_fib_entry *fib_entry,
4088 				   enum mlxsw_reg_ralue_op op)
4089 {
4090 	switch (fib_entry->type) {
4091 	case MLXSW_SP_FIB_ENTRY_TYPE_REMOTE:
4092 		return mlxsw_sp_fib_entry_op_remote(mlxsw_sp, fib_entry, op);
4093 	case MLXSW_SP_FIB_ENTRY_TYPE_LOCAL:
4094 		return mlxsw_sp_fib_entry_op_local(mlxsw_sp, fib_entry, op);
4095 	case MLXSW_SP_FIB_ENTRY_TYPE_TRAP:
4096 		return mlxsw_sp_fib_entry_op_trap(mlxsw_sp, fib_entry, op);
4097 	case MLXSW_SP_FIB_ENTRY_TYPE_IPIP_DECAP:
4098 		return mlxsw_sp_fib_entry_op_ipip_decap(mlxsw_sp,
4099 							fib_entry, op);
4100 	}
4101 	return -EINVAL;
4102 }
4103 
4104 static int mlxsw_sp_fib_entry_op(struct mlxsw_sp *mlxsw_sp,
4105 				 struct mlxsw_sp_fib_entry *fib_entry,
4106 				 enum mlxsw_reg_ralue_op op)
4107 {
4108 	int err = __mlxsw_sp_fib_entry_op(mlxsw_sp, fib_entry, op);
4109 
4110 	mlxsw_sp_fib_entry_offload_refresh(fib_entry, op, err);
4111 
4112 	return err;
4113 }
4114 
4115 static int mlxsw_sp_fib_entry_update(struct mlxsw_sp *mlxsw_sp,
4116 				     struct mlxsw_sp_fib_entry *fib_entry)
4117 {
4118 	return mlxsw_sp_fib_entry_op(mlxsw_sp, fib_entry,
4119 				     MLXSW_REG_RALUE_OP_WRITE_WRITE);
4120 }
4121 
4122 static int mlxsw_sp_fib_entry_del(struct mlxsw_sp *mlxsw_sp,
4123 				  struct mlxsw_sp_fib_entry *fib_entry)
4124 {
4125 	return mlxsw_sp_fib_entry_op(mlxsw_sp, fib_entry,
4126 				     MLXSW_REG_RALUE_OP_WRITE_DELETE);
4127 }
4128 
4129 static int
4130 mlxsw_sp_fib4_entry_type_set(struct mlxsw_sp *mlxsw_sp,
4131 			     const struct fib_entry_notifier_info *fen_info,
4132 			     struct mlxsw_sp_fib_entry *fib_entry)
4133 {
4134 	union mlxsw_sp_l3addr dip = { .addr4 = htonl(fen_info->dst) };
4135 	struct net_device *dev = fen_info->fi->fib_dev;
4136 	struct mlxsw_sp_ipip_entry *ipip_entry;
4137 	struct fib_info *fi = fen_info->fi;
4138 
4139 	switch (fen_info->type) {
4140 	case RTN_LOCAL:
4141 		ipip_entry = mlxsw_sp_ipip_entry_find_by_decap(mlxsw_sp, dev,
4142 						 MLXSW_SP_L3_PROTO_IPV4, dip);
4143 		if (ipip_entry && ipip_entry->ol_dev->flags & IFF_UP) {
4144 			fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_IPIP_DECAP;
4145 			return mlxsw_sp_fib_entry_decap_init(mlxsw_sp,
4146 							     fib_entry,
4147 							     ipip_entry);
4148 		}
4149 		/* fall through */
4150 	case RTN_BROADCAST:
4151 		fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_TRAP;
4152 		return 0;
4153 	case RTN_UNREACHABLE: /* fall through */
4154 	case RTN_BLACKHOLE: /* fall through */
4155 	case RTN_PROHIBIT:
4156 		/* Packets hitting these routes need to be trapped, but
4157 		 * can do so with a lower priority than packets directed
4158 		 * at the host, so use action type local instead of trap.
4159 		 */
4160 		fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_LOCAL;
4161 		return 0;
4162 	case RTN_UNICAST:
4163 		if (mlxsw_sp_fi_is_gateway(mlxsw_sp, fi))
4164 			fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_REMOTE;
4165 		else
4166 			fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_LOCAL;
4167 		return 0;
4168 	default:
4169 		return -EINVAL;
4170 	}
4171 }
4172 
4173 static struct mlxsw_sp_fib4_entry *
4174 mlxsw_sp_fib4_entry_create(struct mlxsw_sp *mlxsw_sp,
4175 			   struct mlxsw_sp_fib_node *fib_node,
4176 			   const struct fib_entry_notifier_info *fen_info)
4177 {
4178 	struct mlxsw_sp_fib4_entry *fib4_entry;
4179 	struct mlxsw_sp_fib_entry *fib_entry;
4180 	int err;
4181 
4182 	fib4_entry = kzalloc(sizeof(*fib4_entry), GFP_KERNEL);
4183 	if (!fib4_entry)
4184 		return ERR_PTR(-ENOMEM);
4185 	fib_entry = &fib4_entry->common;
4186 
4187 	err = mlxsw_sp_fib4_entry_type_set(mlxsw_sp, fen_info, fib_entry);
4188 	if (err)
4189 		goto err_fib4_entry_type_set;
4190 
4191 	err = mlxsw_sp_nexthop4_group_get(mlxsw_sp, fib_entry, fen_info->fi);
4192 	if (err)
4193 		goto err_nexthop4_group_get;
4194 
4195 	fib4_entry->prio = fen_info->fi->fib_priority;
4196 	fib4_entry->tb_id = fen_info->tb_id;
4197 	fib4_entry->type = fen_info->type;
4198 	fib4_entry->tos = fen_info->tos;
4199 
4200 	fib_entry->fib_node = fib_node;
4201 
4202 	return fib4_entry;
4203 
4204 err_nexthop4_group_get:
4205 err_fib4_entry_type_set:
4206 	kfree(fib4_entry);
4207 	return ERR_PTR(err);
4208 }
4209 
4210 static void mlxsw_sp_fib4_entry_destroy(struct mlxsw_sp *mlxsw_sp,
4211 					struct mlxsw_sp_fib4_entry *fib4_entry)
4212 {
4213 	mlxsw_sp_nexthop4_group_put(mlxsw_sp, &fib4_entry->common);
4214 	kfree(fib4_entry);
4215 }
4216 
4217 static struct mlxsw_sp_fib4_entry *
4218 mlxsw_sp_fib4_entry_lookup(struct mlxsw_sp *mlxsw_sp,
4219 			   const struct fib_entry_notifier_info *fen_info)
4220 {
4221 	struct mlxsw_sp_fib4_entry *fib4_entry;
4222 	struct mlxsw_sp_fib_node *fib_node;
4223 	struct mlxsw_sp_fib *fib;
4224 	struct mlxsw_sp_vr *vr;
4225 
4226 	vr = mlxsw_sp_vr_find(mlxsw_sp, fen_info->tb_id);
4227 	if (!vr)
4228 		return NULL;
4229 	fib = mlxsw_sp_vr_fib(vr, MLXSW_SP_L3_PROTO_IPV4);
4230 
4231 	fib_node = mlxsw_sp_fib_node_lookup(fib, &fen_info->dst,
4232 					    sizeof(fen_info->dst),
4233 					    fen_info->dst_len);
4234 	if (!fib_node)
4235 		return NULL;
4236 
4237 	list_for_each_entry(fib4_entry, &fib_node->entry_list, common.list) {
4238 		if (fib4_entry->tb_id == fen_info->tb_id &&
4239 		    fib4_entry->tos == fen_info->tos &&
4240 		    fib4_entry->type == fen_info->type &&
4241 		    mlxsw_sp_nexthop4_group_fi(fib4_entry->common.nh_group) ==
4242 		    fen_info->fi) {
4243 			return fib4_entry;
4244 		}
4245 	}
4246 
4247 	return NULL;
4248 }
4249 
4250 static const struct rhashtable_params mlxsw_sp_fib_ht_params = {
4251 	.key_offset = offsetof(struct mlxsw_sp_fib_node, key),
4252 	.head_offset = offsetof(struct mlxsw_sp_fib_node, ht_node),
4253 	.key_len = sizeof(struct mlxsw_sp_fib_key),
4254 	.automatic_shrinking = true,
4255 };
4256 
4257 static int mlxsw_sp_fib_node_insert(struct mlxsw_sp_fib *fib,
4258 				    struct mlxsw_sp_fib_node *fib_node)
4259 {
4260 	return rhashtable_insert_fast(&fib->ht, &fib_node->ht_node,
4261 				      mlxsw_sp_fib_ht_params);
4262 }
4263 
4264 static void mlxsw_sp_fib_node_remove(struct mlxsw_sp_fib *fib,
4265 				     struct mlxsw_sp_fib_node *fib_node)
4266 {
4267 	rhashtable_remove_fast(&fib->ht, &fib_node->ht_node,
4268 			       mlxsw_sp_fib_ht_params);
4269 }
4270 
4271 static struct mlxsw_sp_fib_node *
4272 mlxsw_sp_fib_node_lookup(struct mlxsw_sp_fib *fib, const void *addr,
4273 			 size_t addr_len, unsigned char prefix_len)
4274 {
4275 	struct mlxsw_sp_fib_key key;
4276 
4277 	memset(&key, 0, sizeof(key));
4278 	memcpy(key.addr, addr, addr_len);
4279 	key.prefix_len = prefix_len;
4280 	return rhashtable_lookup_fast(&fib->ht, &key, mlxsw_sp_fib_ht_params);
4281 }
4282 
4283 static struct mlxsw_sp_fib_node *
4284 mlxsw_sp_fib_node_create(struct mlxsw_sp_fib *fib, const void *addr,
4285 			 size_t addr_len, unsigned char prefix_len)
4286 {
4287 	struct mlxsw_sp_fib_node *fib_node;
4288 
4289 	fib_node = kzalloc(sizeof(*fib_node), GFP_KERNEL);
4290 	if (!fib_node)
4291 		return NULL;
4292 
4293 	INIT_LIST_HEAD(&fib_node->entry_list);
4294 	list_add(&fib_node->list, &fib->node_list);
4295 	memcpy(fib_node->key.addr, addr, addr_len);
4296 	fib_node->key.prefix_len = prefix_len;
4297 
4298 	return fib_node;
4299 }
4300 
4301 static void mlxsw_sp_fib_node_destroy(struct mlxsw_sp_fib_node *fib_node)
4302 {
4303 	list_del(&fib_node->list);
4304 	WARN_ON(!list_empty(&fib_node->entry_list));
4305 	kfree(fib_node);
4306 }
4307 
4308 static bool
4309 mlxsw_sp_fib_node_entry_is_first(const struct mlxsw_sp_fib_node *fib_node,
4310 				 const struct mlxsw_sp_fib_entry *fib_entry)
4311 {
4312 	return list_first_entry(&fib_node->entry_list,
4313 				struct mlxsw_sp_fib_entry, list) == fib_entry;
4314 }
4315 
4316 static int mlxsw_sp_fib_lpm_tree_link(struct mlxsw_sp *mlxsw_sp,
4317 				      struct mlxsw_sp_fib_node *fib_node)
4318 {
4319 	struct mlxsw_sp_prefix_usage req_prefix_usage;
4320 	struct mlxsw_sp_fib *fib = fib_node->fib;
4321 	struct mlxsw_sp_lpm_tree *lpm_tree;
4322 	int err;
4323 
4324 	lpm_tree = mlxsw_sp->router->lpm.proto_trees[fib->proto];
4325 	if (lpm_tree->prefix_ref_count[fib_node->key.prefix_len] != 0)
4326 		goto out;
4327 
4328 	mlxsw_sp_prefix_usage_cpy(&req_prefix_usage, &lpm_tree->prefix_usage);
4329 	mlxsw_sp_prefix_usage_set(&req_prefix_usage, fib_node->key.prefix_len);
4330 	lpm_tree = mlxsw_sp_lpm_tree_get(mlxsw_sp, &req_prefix_usage,
4331 					 fib->proto);
4332 	if (IS_ERR(lpm_tree))
4333 		return PTR_ERR(lpm_tree);
4334 
4335 	err = mlxsw_sp_vrs_lpm_tree_replace(mlxsw_sp, fib, lpm_tree);
4336 	if (err)
4337 		goto err_lpm_tree_replace;
4338 
4339 out:
4340 	lpm_tree->prefix_ref_count[fib_node->key.prefix_len]++;
4341 	return 0;
4342 
4343 err_lpm_tree_replace:
4344 	mlxsw_sp_lpm_tree_put(mlxsw_sp, lpm_tree);
4345 	return err;
4346 }
4347 
4348 static void mlxsw_sp_fib_lpm_tree_unlink(struct mlxsw_sp *mlxsw_sp,
4349 					 struct mlxsw_sp_fib_node *fib_node)
4350 {
4351 	struct mlxsw_sp_lpm_tree *lpm_tree = fib_node->fib->lpm_tree;
4352 	struct mlxsw_sp_prefix_usage req_prefix_usage;
4353 	struct mlxsw_sp_fib *fib = fib_node->fib;
4354 	int err;
4355 
4356 	if (--lpm_tree->prefix_ref_count[fib_node->key.prefix_len] != 0)
4357 		return;
4358 	/* Try to construct a new LPM tree from the current prefix usage
4359 	 * minus the unused one. If we fail, continue using the old one.
4360 	 */
4361 	mlxsw_sp_prefix_usage_cpy(&req_prefix_usage, &lpm_tree->prefix_usage);
4362 	mlxsw_sp_prefix_usage_clear(&req_prefix_usage,
4363 				    fib_node->key.prefix_len);
4364 	lpm_tree = mlxsw_sp_lpm_tree_get(mlxsw_sp, &req_prefix_usage,
4365 					 fib->proto);
4366 	if (IS_ERR(lpm_tree))
4367 		return;
4368 
4369 	err = mlxsw_sp_vrs_lpm_tree_replace(mlxsw_sp, fib, lpm_tree);
4370 	if (err)
4371 		goto err_lpm_tree_replace;
4372 
4373 	return;
4374 
4375 err_lpm_tree_replace:
4376 	mlxsw_sp_lpm_tree_put(mlxsw_sp, lpm_tree);
4377 }
4378 
4379 static int mlxsw_sp_fib_node_init(struct mlxsw_sp *mlxsw_sp,
4380 				  struct mlxsw_sp_fib_node *fib_node,
4381 				  struct mlxsw_sp_fib *fib)
4382 {
4383 	int err;
4384 
4385 	err = mlxsw_sp_fib_node_insert(fib, fib_node);
4386 	if (err)
4387 		return err;
4388 	fib_node->fib = fib;
4389 
4390 	err = mlxsw_sp_fib_lpm_tree_link(mlxsw_sp, fib_node);
4391 	if (err)
4392 		goto err_fib_lpm_tree_link;
4393 
4394 	return 0;
4395 
4396 err_fib_lpm_tree_link:
4397 	fib_node->fib = NULL;
4398 	mlxsw_sp_fib_node_remove(fib, fib_node);
4399 	return err;
4400 }
4401 
4402 static void mlxsw_sp_fib_node_fini(struct mlxsw_sp *mlxsw_sp,
4403 				   struct mlxsw_sp_fib_node *fib_node)
4404 {
4405 	struct mlxsw_sp_fib *fib = fib_node->fib;
4406 
4407 	mlxsw_sp_fib_lpm_tree_unlink(mlxsw_sp, fib_node);
4408 	fib_node->fib = NULL;
4409 	mlxsw_sp_fib_node_remove(fib, fib_node);
4410 }
4411 
4412 static struct mlxsw_sp_fib_node *
4413 mlxsw_sp_fib_node_get(struct mlxsw_sp *mlxsw_sp, u32 tb_id, const void *addr,
4414 		      size_t addr_len, unsigned char prefix_len,
4415 		      enum mlxsw_sp_l3proto proto)
4416 {
4417 	struct mlxsw_sp_fib_node *fib_node;
4418 	struct mlxsw_sp_fib *fib;
4419 	struct mlxsw_sp_vr *vr;
4420 	int err;
4421 
4422 	vr = mlxsw_sp_vr_get(mlxsw_sp, tb_id, NULL);
4423 	if (IS_ERR(vr))
4424 		return ERR_CAST(vr);
4425 	fib = mlxsw_sp_vr_fib(vr, proto);
4426 
4427 	fib_node = mlxsw_sp_fib_node_lookup(fib, addr, addr_len, prefix_len);
4428 	if (fib_node)
4429 		return fib_node;
4430 
4431 	fib_node = mlxsw_sp_fib_node_create(fib, addr, addr_len, prefix_len);
4432 	if (!fib_node) {
4433 		err = -ENOMEM;
4434 		goto err_fib_node_create;
4435 	}
4436 
4437 	err = mlxsw_sp_fib_node_init(mlxsw_sp, fib_node, fib);
4438 	if (err)
4439 		goto err_fib_node_init;
4440 
4441 	return fib_node;
4442 
4443 err_fib_node_init:
4444 	mlxsw_sp_fib_node_destroy(fib_node);
4445 err_fib_node_create:
4446 	mlxsw_sp_vr_put(mlxsw_sp, vr);
4447 	return ERR_PTR(err);
4448 }
4449 
4450 static void mlxsw_sp_fib_node_put(struct mlxsw_sp *mlxsw_sp,
4451 				  struct mlxsw_sp_fib_node *fib_node)
4452 {
4453 	struct mlxsw_sp_vr *vr = fib_node->fib->vr;
4454 
4455 	if (!list_empty(&fib_node->entry_list))
4456 		return;
4457 	mlxsw_sp_fib_node_fini(mlxsw_sp, fib_node);
4458 	mlxsw_sp_fib_node_destroy(fib_node);
4459 	mlxsw_sp_vr_put(mlxsw_sp, vr);
4460 }
4461 
4462 static struct mlxsw_sp_fib4_entry *
4463 mlxsw_sp_fib4_node_entry_find(const struct mlxsw_sp_fib_node *fib_node,
4464 			      const struct mlxsw_sp_fib4_entry *new4_entry)
4465 {
4466 	struct mlxsw_sp_fib4_entry *fib4_entry;
4467 
4468 	list_for_each_entry(fib4_entry, &fib_node->entry_list, common.list) {
4469 		if (fib4_entry->tb_id > new4_entry->tb_id)
4470 			continue;
4471 		if (fib4_entry->tb_id != new4_entry->tb_id)
4472 			break;
4473 		if (fib4_entry->tos > new4_entry->tos)
4474 			continue;
4475 		if (fib4_entry->prio >= new4_entry->prio ||
4476 		    fib4_entry->tos < new4_entry->tos)
4477 			return fib4_entry;
4478 	}
4479 
4480 	return NULL;
4481 }
4482 
4483 static int
4484 mlxsw_sp_fib4_node_list_append(struct mlxsw_sp_fib4_entry *fib4_entry,
4485 			       struct mlxsw_sp_fib4_entry *new4_entry)
4486 {
4487 	struct mlxsw_sp_fib_node *fib_node;
4488 
4489 	if (WARN_ON(!fib4_entry))
4490 		return -EINVAL;
4491 
4492 	fib_node = fib4_entry->common.fib_node;
4493 	list_for_each_entry_from(fib4_entry, &fib_node->entry_list,
4494 				 common.list) {
4495 		if (fib4_entry->tb_id != new4_entry->tb_id ||
4496 		    fib4_entry->tos != new4_entry->tos ||
4497 		    fib4_entry->prio != new4_entry->prio)
4498 			break;
4499 	}
4500 
4501 	list_add_tail(&new4_entry->common.list, &fib4_entry->common.list);
4502 	return 0;
4503 }
4504 
4505 static int
4506 mlxsw_sp_fib4_node_list_insert(struct mlxsw_sp_fib4_entry *new4_entry,
4507 			       bool replace, bool append)
4508 {
4509 	struct mlxsw_sp_fib_node *fib_node = new4_entry->common.fib_node;
4510 	struct mlxsw_sp_fib4_entry *fib4_entry;
4511 
4512 	fib4_entry = mlxsw_sp_fib4_node_entry_find(fib_node, new4_entry);
4513 
4514 	if (append)
4515 		return mlxsw_sp_fib4_node_list_append(fib4_entry, new4_entry);
4516 	if (replace && WARN_ON(!fib4_entry))
4517 		return -EINVAL;
4518 
4519 	/* Insert new entry before replaced one, so that we can later
4520 	 * remove the second.
4521 	 */
4522 	if (fib4_entry) {
4523 		list_add_tail(&new4_entry->common.list,
4524 			      &fib4_entry->common.list);
4525 	} else {
4526 		struct mlxsw_sp_fib4_entry *last;
4527 
4528 		list_for_each_entry(last, &fib_node->entry_list, common.list) {
4529 			if (new4_entry->tb_id > last->tb_id)
4530 				break;
4531 			fib4_entry = last;
4532 		}
4533 
4534 		if (fib4_entry)
4535 			list_add(&new4_entry->common.list,
4536 				 &fib4_entry->common.list);
4537 		else
4538 			list_add(&new4_entry->common.list,
4539 				 &fib_node->entry_list);
4540 	}
4541 
4542 	return 0;
4543 }
4544 
4545 static void
4546 mlxsw_sp_fib4_node_list_remove(struct mlxsw_sp_fib4_entry *fib4_entry)
4547 {
4548 	list_del(&fib4_entry->common.list);
4549 }
4550 
4551 static int mlxsw_sp_fib_node_entry_add(struct mlxsw_sp *mlxsw_sp,
4552 				       struct mlxsw_sp_fib_entry *fib_entry)
4553 {
4554 	struct mlxsw_sp_fib_node *fib_node = fib_entry->fib_node;
4555 
4556 	if (!mlxsw_sp_fib_node_entry_is_first(fib_node, fib_entry))
4557 		return 0;
4558 
4559 	/* To prevent packet loss, overwrite the previously offloaded
4560 	 * entry.
4561 	 */
4562 	if (!list_is_singular(&fib_node->entry_list)) {
4563 		enum mlxsw_reg_ralue_op op = MLXSW_REG_RALUE_OP_WRITE_DELETE;
4564 		struct mlxsw_sp_fib_entry *n = list_next_entry(fib_entry, list);
4565 
4566 		mlxsw_sp_fib_entry_offload_refresh(n, op, 0);
4567 	}
4568 
4569 	return mlxsw_sp_fib_entry_update(mlxsw_sp, fib_entry);
4570 }
4571 
4572 static void mlxsw_sp_fib_node_entry_del(struct mlxsw_sp *mlxsw_sp,
4573 					struct mlxsw_sp_fib_entry *fib_entry)
4574 {
4575 	struct mlxsw_sp_fib_node *fib_node = fib_entry->fib_node;
4576 
4577 	if (!mlxsw_sp_fib_node_entry_is_first(fib_node, fib_entry))
4578 		return;
4579 
4580 	/* Promote the next entry by overwriting the deleted entry */
4581 	if (!list_is_singular(&fib_node->entry_list)) {
4582 		struct mlxsw_sp_fib_entry *n = list_next_entry(fib_entry, list);
4583 		enum mlxsw_reg_ralue_op op = MLXSW_REG_RALUE_OP_WRITE_DELETE;
4584 
4585 		mlxsw_sp_fib_entry_update(mlxsw_sp, n);
4586 		mlxsw_sp_fib_entry_offload_refresh(fib_entry, op, 0);
4587 		return;
4588 	}
4589 
4590 	mlxsw_sp_fib_entry_del(mlxsw_sp, fib_entry);
4591 }
4592 
4593 static int mlxsw_sp_fib4_node_entry_link(struct mlxsw_sp *mlxsw_sp,
4594 					 struct mlxsw_sp_fib4_entry *fib4_entry,
4595 					 bool replace, bool append)
4596 {
4597 	int err;
4598 
4599 	err = mlxsw_sp_fib4_node_list_insert(fib4_entry, replace, append);
4600 	if (err)
4601 		return err;
4602 
4603 	err = mlxsw_sp_fib_node_entry_add(mlxsw_sp, &fib4_entry->common);
4604 	if (err)
4605 		goto err_fib_node_entry_add;
4606 
4607 	return 0;
4608 
4609 err_fib_node_entry_add:
4610 	mlxsw_sp_fib4_node_list_remove(fib4_entry);
4611 	return err;
4612 }
4613 
4614 static void
4615 mlxsw_sp_fib4_node_entry_unlink(struct mlxsw_sp *mlxsw_sp,
4616 				struct mlxsw_sp_fib4_entry *fib4_entry)
4617 {
4618 	mlxsw_sp_fib_node_entry_del(mlxsw_sp, &fib4_entry->common);
4619 	mlxsw_sp_fib4_node_list_remove(fib4_entry);
4620 
4621 	if (fib4_entry->common.type == MLXSW_SP_FIB_ENTRY_TYPE_IPIP_DECAP)
4622 		mlxsw_sp_fib_entry_decap_fini(mlxsw_sp, &fib4_entry->common);
4623 }
4624 
4625 static void mlxsw_sp_fib4_entry_replace(struct mlxsw_sp *mlxsw_sp,
4626 					struct mlxsw_sp_fib4_entry *fib4_entry,
4627 					bool replace)
4628 {
4629 	struct mlxsw_sp_fib_node *fib_node = fib4_entry->common.fib_node;
4630 	struct mlxsw_sp_fib4_entry *replaced;
4631 
4632 	if (!replace)
4633 		return;
4634 
4635 	/* We inserted the new entry before replaced one */
4636 	replaced = list_next_entry(fib4_entry, common.list);
4637 
4638 	mlxsw_sp_fib4_node_entry_unlink(mlxsw_sp, replaced);
4639 	mlxsw_sp_fib4_entry_destroy(mlxsw_sp, replaced);
4640 	mlxsw_sp_fib_node_put(mlxsw_sp, fib_node);
4641 }
4642 
4643 static int
4644 mlxsw_sp_router_fib4_add(struct mlxsw_sp *mlxsw_sp,
4645 			 const struct fib_entry_notifier_info *fen_info,
4646 			 bool replace, bool append)
4647 {
4648 	struct mlxsw_sp_fib4_entry *fib4_entry;
4649 	struct mlxsw_sp_fib_node *fib_node;
4650 	int err;
4651 
4652 	if (mlxsw_sp->router->aborted)
4653 		return 0;
4654 
4655 	fib_node = mlxsw_sp_fib_node_get(mlxsw_sp, fen_info->tb_id,
4656 					 &fen_info->dst, sizeof(fen_info->dst),
4657 					 fen_info->dst_len,
4658 					 MLXSW_SP_L3_PROTO_IPV4);
4659 	if (IS_ERR(fib_node)) {
4660 		dev_warn(mlxsw_sp->bus_info->dev, "Failed to get FIB node\n");
4661 		return PTR_ERR(fib_node);
4662 	}
4663 
4664 	fib4_entry = mlxsw_sp_fib4_entry_create(mlxsw_sp, fib_node, fen_info);
4665 	if (IS_ERR(fib4_entry)) {
4666 		dev_warn(mlxsw_sp->bus_info->dev, "Failed to create FIB entry\n");
4667 		err = PTR_ERR(fib4_entry);
4668 		goto err_fib4_entry_create;
4669 	}
4670 
4671 	err = mlxsw_sp_fib4_node_entry_link(mlxsw_sp, fib4_entry, replace,
4672 					    append);
4673 	if (err) {
4674 		dev_warn(mlxsw_sp->bus_info->dev, "Failed to link FIB entry to node\n");
4675 		goto err_fib4_node_entry_link;
4676 	}
4677 
4678 	mlxsw_sp_fib4_entry_replace(mlxsw_sp, fib4_entry, replace);
4679 
4680 	return 0;
4681 
4682 err_fib4_node_entry_link:
4683 	mlxsw_sp_fib4_entry_destroy(mlxsw_sp, fib4_entry);
4684 err_fib4_entry_create:
4685 	mlxsw_sp_fib_node_put(mlxsw_sp, fib_node);
4686 	return err;
4687 }
4688 
4689 static void mlxsw_sp_router_fib4_del(struct mlxsw_sp *mlxsw_sp,
4690 				     struct fib_entry_notifier_info *fen_info)
4691 {
4692 	struct mlxsw_sp_fib4_entry *fib4_entry;
4693 	struct mlxsw_sp_fib_node *fib_node;
4694 
4695 	if (mlxsw_sp->router->aborted)
4696 		return;
4697 
4698 	fib4_entry = mlxsw_sp_fib4_entry_lookup(mlxsw_sp, fen_info);
4699 	if (WARN_ON(!fib4_entry))
4700 		return;
4701 	fib_node = fib4_entry->common.fib_node;
4702 
4703 	mlxsw_sp_fib4_node_entry_unlink(mlxsw_sp, fib4_entry);
4704 	mlxsw_sp_fib4_entry_destroy(mlxsw_sp, fib4_entry);
4705 	mlxsw_sp_fib_node_put(mlxsw_sp, fib_node);
4706 }
4707 
4708 static bool mlxsw_sp_fib6_rt_should_ignore(const struct fib6_info *rt)
4709 {
4710 	/* Packets with link-local destination IP arriving to the router
4711 	 * are trapped to the CPU, so no need to program specific routes
4712 	 * for them.
4713 	 */
4714 	if (ipv6_addr_type(&rt->fib6_dst.addr) & IPV6_ADDR_LINKLOCAL)
4715 		return true;
4716 
4717 	/* Multicast routes aren't supported, so ignore them. Neighbour
4718 	 * Discovery packets are specifically trapped.
4719 	 */
4720 	if (ipv6_addr_type(&rt->fib6_dst.addr) & IPV6_ADDR_MULTICAST)
4721 		return true;
4722 
4723 	/* Cloned routes are irrelevant in the forwarding path. */
4724 	if (rt->fib6_flags & RTF_CACHE)
4725 		return true;
4726 
4727 	return false;
4728 }
4729 
4730 static struct mlxsw_sp_rt6 *mlxsw_sp_rt6_create(struct fib6_info *rt)
4731 {
4732 	struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
4733 
4734 	mlxsw_sp_rt6 = kzalloc(sizeof(*mlxsw_sp_rt6), GFP_KERNEL);
4735 	if (!mlxsw_sp_rt6)
4736 		return ERR_PTR(-ENOMEM);
4737 
4738 	/* In case of route replace, replaced route is deleted with
4739 	 * no notification. Take reference to prevent accessing freed
4740 	 * memory.
4741 	 */
4742 	mlxsw_sp_rt6->rt = rt;
4743 	fib6_info_hold(rt);
4744 
4745 	return mlxsw_sp_rt6;
4746 }
4747 
4748 #if IS_ENABLED(CONFIG_IPV6)
4749 static void mlxsw_sp_rt6_release(struct fib6_info *rt)
4750 {
4751 	fib6_info_release(rt);
4752 }
4753 #else
4754 static void mlxsw_sp_rt6_release(struct fib6_info *rt)
4755 {
4756 }
4757 #endif
4758 
4759 static void mlxsw_sp_rt6_destroy(struct mlxsw_sp_rt6 *mlxsw_sp_rt6)
4760 {
4761 	mlxsw_sp_rt6_release(mlxsw_sp_rt6->rt);
4762 	kfree(mlxsw_sp_rt6);
4763 }
4764 
4765 static struct fib6_info *
4766 mlxsw_sp_fib6_entry_rt(const struct mlxsw_sp_fib6_entry *fib6_entry)
4767 {
4768 	return list_first_entry(&fib6_entry->rt6_list, struct mlxsw_sp_rt6,
4769 				list)->rt;
4770 }
4771 
4772 static struct mlxsw_sp_fib6_entry *
4773 mlxsw_sp_fib6_node_mp_entry_find(const struct mlxsw_sp_fib_node *fib_node,
4774 				 const struct fib6_info *nrt, bool append)
4775 {
4776 	struct mlxsw_sp_fib6_entry *fib6_entry;
4777 
4778 	if (!append)
4779 		return NULL;
4780 
4781 	list_for_each_entry(fib6_entry, &fib_node->entry_list, common.list) {
4782 		struct fib6_info *rt = mlxsw_sp_fib6_entry_rt(fib6_entry);
4783 
4784 		/* RT6_TABLE_LOCAL and RT6_TABLE_MAIN share the same
4785 		 * virtual router.
4786 		 */
4787 		if (rt->fib6_table->tb6_id > nrt->fib6_table->tb6_id)
4788 			continue;
4789 		if (rt->fib6_table->tb6_id != nrt->fib6_table->tb6_id)
4790 			break;
4791 		if (rt->fib6_metric < nrt->fib6_metric)
4792 			continue;
4793 		if (rt->fib6_metric == nrt->fib6_metric)
4794 			return fib6_entry;
4795 		if (rt->fib6_metric > nrt->fib6_metric)
4796 			break;
4797 	}
4798 
4799 	return NULL;
4800 }
4801 
4802 static struct mlxsw_sp_rt6 *
4803 mlxsw_sp_fib6_entry_rt_find(const struct mlxsw_sp_fib6_entry *fib6_entry,
4804 			    const struct fib6_info *rt)
4805 {
4806 	struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
4807 
4808 	list_for_each_entry(mlxsw_sp_rt6, &fib6_entry->rt6_list, list) {
4809 		if (mlxsw_sp_rt6->rt == rt)
4810 			return mlxsw_sp_rt6;
4811 	}
4812 
4813 	return NULL;
4814 }
4815 
4816 static bool mlxsw_sp_nexthop6_ipip_type(const struct mlxsw_sp *mlxsw_sp,
4817 					const struct fib6_info *rt,
4818 					enum mlxsw_sp_ipip_type *ret)
4819 {
4820 	return rt->fib6_nh.nh_dev &&
4821 	       mlxsw_sp_netdev_ipip_type(mlxsw_sp, rt->fib6_nh.nh_dev, ret);
4822 }
4823 
4824 static int mlxsw_sp_nexthop6_type_init(struct mlxsw_sp *mlxsw_sp,
4825 				       struct mlxsw_sp_nexthop_group *nh_grp,
4826 				       struct mlxsw_sp_nexthop *nh,
4827 				       const struct fib6_info *rt)
4828 {
4829 	const struct mlxsw_sp_ipip_ops *ipip_ops;
4830 	struct mlxsw_sp_ipip_entry *ipip_entry;
4831 	struct net_device *dev = rt->fib6_nh.nh_dev;
4832 	struct mlxsw_sp_rif *rif;
4833 	int err;
4834 
4835 	ipip_entry = mlxsw_sp_ipip_entry_find_by_ol_dev(mlxsw_sp, dev);
4836 	if (ipip_entry) {
4837 		ipip_ops = mlxsw_sp->router->ipip_ops_arr[ipip_entry->ipipt];
4838 		if (ipip_ops->can_offload(mlxsw_sp, dev,
4839 					  MLXSW_SP_L3_PROTO_IPV6)) {
4840 			nh->type = MLXSW_SP_NEXTHOP_TYPE_IPIP;
4841 			mlxsw_sp_nexthop_ipip_init(mlxsw_sp, nh, ipip_entry);
4842 			return 0;
4843 		}
4844 	}
4845 
4846 	nh->type = MLXSW_SP_NEXTHOP_TYPE_ETH;
4847 	rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev);
4848 	if (!rif)
4849 		return 0;
4850 	mlxsw_sp_nexthop_rif_init(nh, rif);
4851 
4852 	err = mlxsw_sp_nexthop_neigh_init(mlxsw_sp, nh);
4853 	if (err)
4854 		goto err_nexthop_neigh_init;
4855 
4856 	return 0;
4857 
4858 err_nexthop_neigh_init:
4859 	mlxsw_sp_nexthop_rif_fini(nh);
4860 	return err;
4861 }
4862 
4863 static void mlxsw_sp_nexthop6_type_fini(struct mlxsw_sp *mlxsw_sp,
4864 					struct mlxsw_sp_nexthop *nh)
4865 {
4866 	mlxsw_sp_nexthop_type_fini(mlxsw_sp, nh);
4867 }
4868 
4869 static int mlxsw_sp_nexthop6_init(struct mlxsw_sp *mlxsw_sp,
4870 				  struct mlxsw_sp_nexthop_group *nh_grp,
4871 				  struct mlxsw_sp_nexthop *nh,
4872 				  const struct fib6_info *rt)
4873 {
4874 	struct net_device *dev = rt->fib6_nh.nh_dev;
4875 
4876 	nh->nh_grp = nh_grp;
4877 	nh->nh_weight = rt->fib6_nh.nh_weight;
4878 	memcpy(&nh->gw_addr, &rt->fib6_nh.nh_gw, sizeof(nh->gw_addr));
4879 	mlxsw_sp_nexthop_counter_alloc(mlxsw_sp, nh);
4880 
4881 	list_add_tail(&nh->router_list_node, &mlxsw_sp->router->nexthop_list);
4882 
4883 	if (!dev)
4884 		return 0;
4885 	nh->ifindex = dev->ifindex;
4886 
4887 	return mlxsw_sp_nexthop6_type_init(mlxsw_sp, nh_grp, nh, rt);
4888 }
4889 
4890 static void mlxsw_sp_nexthop6_fini(struct mlxsw_sp *mlxsw_sp,
4891 				   struct mlxsw_sp_nexthop *nh)
4892 {
4893 	mlxsw_sp_nexthop6_type_fini(mlxsw_sp, nh);
4894 	list_del(&nh->router_list_node);
4895 	mlxsw_sp_nexthop_counter_free(mlxsw_sp, nh);
4896 }
4897 
4898 static bool mlxsw_sp_rt6_is_gateway(const struct mlxsw_sp *mlxsw_sp,
4899 				    const struct fib6_info *rt)
4900 {
4901 	return rt->fib6_flags & RTF_GATEWAY ||
4902 	       mlxsw_sp_nexthop6_ipip_type(mlxsw_sp, rt, NULL);
4903 }
4904 
4905 static struct mlxsw_sp_nexthop_group *
4906 mlxsw_sp_nexthop6_group_create(struct mlxsw_sp *mlxsw_sp,
4907 			       struct mlxsw_sp_fib6_entry *fib6_entry)
4908 {
4909 	struct mlxsw_sp_nexthop_group *nh_grp;
4910 	struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
4911 	struct mlxsw_sp_nexthop *nh;
4912 	size_t alloc_size;
4913 	int i = 0;
4914 	int err;
4915 
4916 	alloc_size = sizeof(*nh_grp) +
4917 		     fib6_entry->nrt6 * sizeof(struct mlxsw_sp_nexthop);
4918 	nh_grp = kzalloc(alloc_size, GFP_KERNEL);
4919 	if (!nh_grp)
4920 		return ERR_PTR(-ENOMEM);
4921 	INIT_LIST_HEAD(&nh_grp->fib_list);
4922 #if IS_ENABLED(CONFIG_IPV6)
4923 	nh_grp->neigh_tbl = &nd_tbl;
4924 #endif
4925 	mlxsw_sp_rt6 = list_first_entry(&fib6_entry->rt6_list,
4926 					struct mlxsw_sp_rt6, list);
4927 	nh_grp->gateway = mlxsw_sp_rt6_is_gateway(mlxsw_sp, mlxsw_sp_rt6->rt);
4928 	nh_grp->count = fib6_entry->nrt6;
4929 	for (i = 0; i < nh_grp->count; i++) {
4930 		struct fib6_info *rt = mlxsw_sp_rt6->rt;
4931 
4932 		nh = &nh_grp->nexthops[i];
4933 		err = mlxsw_sp_nexthop6_init(mlxsw_sp, nh_grp, nh, rt);
4934 		if (err)
4935 			goto err_nexthop6_init;
4936 		mlxsw_sp_rt6 = list_next_entry(mlxsw_sp_rt6, list);
4937 	}
4938 
4939 	err = mlxsw_sp_nexthop_group_insert(mlxsw_sp, nh_grp);
4940 	if (err)
4941 		goto err_nexthop_group_insert;
4942 
4943 	mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh_grp);
4944 	return nh_grp;
4945 
4946 err_nexthop_group_insert:
4947 err_nexthop6_init:
4948 	for (i--; i >= 0; i--) {
4949 		nh = &nh_grp->nexthops[i];
4950 		mlxsw_sp_nexthop6_fini(mlxsw_sp, nh);
4951 	}
4952 	kfree(nh_grp);
4953 	return ERR_PTR(err);
4954 }
4955 
4956 static void
4957 mlxsw_sp_nexthop6_group_destroy(struct mlxsw_sp *mlxsw_sp,
4958 				struct mlxsw_sp_nexthop_group *nh_grp)
4959 {
4960 	struct mlxsw_sp_nexthop *nh;
4961 	int i = nh_grp->count;
4962 
4963 	mlxsw_sp_nexthop_group_remove(mlxsw_sp, nh_grp);
4964 	for (i--; i >= 0; i--) {
4965 		nh = &nh_grp->nexthops[i];
4966 		mlxsw_sp_nexthop6_fini(mlxsw_sp, nh);
4967 	}
4968 	mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh_grp);
4969 	WARN_ON(nh_grp->adj_index_valid);
4970 	kfree(nh_grp);
4971 }
4972 
4973 static int mlxsw_sp_nexthop6_group_get(struct mlxsw_sp *mlxsw_sp,
4974 				       struct mlxsw_sp_fib6_entry *fib6_entry)
4975 {
4976 	struct mlxsw_sp_nexthop_group *nh_grp;
4977 
4978 	nh_grp = mlxsw_sp_nexthop6_group_lookup(mlxsw_sp, fib6_entry);
4979 	if (!nh_grp) {
4980 		nh_grp = mlxsw_sp_nexthop6_group_create(mlxsw_sp, fib6_entry);
4981 		if (IS_ERR(nh_grp))
4982 			return PTR_ERR(nh_grp);
4983 	}
4984 
4985 	list_add_tail(&fib6_entry->common.nexthop_group_node,
4986 		      &nh_grp->fib_list);
4987 	fib6_entry->common.nh_group = nh_grp;
4988 
4989 	return 0;
4990 }
4991 
4992 static void mlxsw_sp_nexthop6_group_put(struct mlxsw_sp *mlxsw_sp,
4993 					struct mlxsw_sp_fib_entry *fib_entry)
4994 {
4995 	struct mlxsw_sp_nexthop_group *nh_grp = fib_entry->nh_group;
4996 
4997 	list_del(&fib_entry->nexthop_group_node);
4998 	if (!list_empty(&nh_grp->fib_list))
4999 		return;
5000 	mlxsw_sp_nexthop6_group_destroy(mlxsw_sp, nh_grp);
5001 }
5002 
5003 static int
5004 mlxsw_sp_nexthop6_group_update(struct mlxsw_sp *mlxsw_sp,
5005 			       struct mlxsw_sp_fib6_entry *fib6_entry)
5006 {
5007 	struct mlxsw_sp_nexthop_group *old_nh_grp = fib6_entry->common.nh_group;
5008 	int err;
5009 
5010 	fib6_entry->common.nh_group = NULL;
5011 	list_del(&fib6_entry->common.nexthop_group_node);
5012 
5013 	err = mlxsw_sp_nexthop6_group_get(mlxsw_sp, fib6_entry);
5014 	if (err)
5015 		goto err_nexthop6_group_get;
5016 
5017 	/* In case this entry is offloaded, then the adjacency index
5018 	 * currently associated with it in the device's table is that
5019 	 * of the old group. Start using the new one instead.
5020 	 */
5021 	err = mlxsw_sp_fib_node_entry_add(mlxsw_sp, &fib6_entry->common);
5022 	if (err)
5023 		goto err_fib_node_entry_add;
5024 
5025 	if (list_empty(&old_nh_grp->fib_list))
5026 		mlxsw_sp_nexthop6_group_destroy(mlxsw_sp, old_nh_grp);
5027 
5028 	return 0;
5029 
5030 err_fib_node_entry_add:
5031 	mlxsw_sp_nexthop6_group_put(mlxsw_sp, &fib6_entry->common);
5032 err_nexthop6_group_get:
5033 	list_add_tail(&fib6_entry->common.nexthop_group_node,
5034 		      &old_nh_grp->fib_list);
5035 	fib6_entry->common.nh_group = old_nh_grp;
5036 	return err;
5037 }
5038 
5039 static int
5040 mlxsw_sp_fib6_entry_nexthop_add(struct mlxsw_sp *mlxsw_sp,
5041 				struct mlxsw_sp_fib6_entry *fib6_entry,
5042 				struct fib6_info *rt)
5043 {
5044 	struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
5045 	int err;
5046 
5047 	mlxsw_sp_rt6 = mlxsw_sp_rt6_create(rt);
5048 	if (IS_ERR(mlxsw_sp_rt6))
5049 		return PTR_ERR(mlxsw_sp_rt6);
5050 
5051 	list_add_tail(&mlxsw_sp_rt6->list, &fib6_entry->rt6_list);
5052 	fib6_entry->nrt6++;
5053 
5054 	err = mlxsw_sp_nexthop6_group_update(mlxsw_sp, fib6_entry);
5055 	if (err)
5056 		goto err_nexthop6_group_update;
5057 
5058 	return 0;
5059 
5060 err_nexthop6_group_update:
5061 	fib6_entry->nrt6--;
5062 	list_del(&mlxsw_sp_rt6->list);
5063 	mlxsw_sp_rt6_destroy(mlxsw_sp_rt6);
5064 	return err;
5065 }
5066 
5067 static void
5068 mlxsw_sp_fib6_entry_nexthop_del(struct mlxsw_sp *mlxsw_sp,
5069 				struct mlxsw_sp_fib6_entry *fib6_entry,
5070 				struct fib6_info *rt)
5071 {
5072 	struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
5073 
5074 	mlxsw_sp_rt6 = mlxsw_sp_fib6_entry_rt_find(fib6_entry, rt);
5075 	if (WARN_ON(!mlxsw_sp_rt6))
5076 		return;
5077 
5078 	fib6_entry->nrt6--;
5079 	list_del(&mlxsw_sp_rt6->list);
5080 	mlxsw_sp_nexthop6_group_update(mlxsw_sp, fib6_entry);
5081 	mlxsw_sp_rt6_destroy(mlxsw_sp_rt6);
5082 }
5083 
5084 static void mlxsw_sp_fib6_entry_type_set(struct mlxsw_sp *mlxsw_sp,
5085 					 struct mlxsw_sp_fib_entry *fib_entry,
5086 					 const struct fib6_info *rt)
5087 {
5088 	/* Packets hitting RTF_REJECT routes need to be discarded by the
5089 	 * stack. We can rely on their destination device not having a
5090 	 * RIF (it's the loopback device) and can thus use action type
5091 	 * local, which will cause them to be trapped with a lower
5092 	 * priority than packets that need to be locally received.
5093 	 */
5094 	if (rt->fib6_flags & (RTF_LOCAL | RTF_ANYCAST))
5095 		fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_TRAP;
5096 	else if (rt->fib6_flags & RTF_REJECT)
5097 		fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_LOCAL;
5098 	else if (mlxsw_sp_rt6_is_gateway(mlxsw_sp, rt))
5099 		fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_REMOTE;
5100 	else
5101 		fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_LOCAL;
5102 }
5103 
5104 static void
5105 mlxsw_sp_fib6_entry_rt_destroy_all(struct mlxsw_sp_fib6_entry *fib6_entry)
5106 {
5107 	struct mlxsw_sp_rt6 *mlxsw_sp_rt6, *tmp;
5108 
5109 	list_for_each_entry_safe(mlxsw_sp_rt6, tmp, &fib6_entry->rt6_list,
5110 				 list) {
5111 		fib6_entry->nrt6--;
5112 		list_del(&mlxsw_sp_rt6->list);
5113 		mlxsw_sp_rt6_destroy(mlxsw_sp_rt6);
5114 	}
5115 }
5116 
5117 static struct mlxsw_sp_fib6_entry *
5118 mlxsw_sp_fib6_entry_create(struct mlxsw_sp *mlxsw_sp,
5119 			   struct mlxsw_sp_fib_node *fib_node,
5120 			   struct fib6_info *rt)
5121 {
5122 	struct mlxsw_sp_fib6_entry *fib6_entry;
5123 	struct mlxsw_sp_fib_entry *fib_entry;
5124 	struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
5125 	int err;
5126 
5127 	fib6_entry = kzalloc(sizeof(*fib6_entry), GFP_KERNEL);
5128 	if (!fib6_entry)
5129 		return ERR_PTR(-ENOMEM);
5130 	fib_entry = &fib6_entry->common;
5131 
5132 	mlxsw_sp_rt6 = mlxsw_sp_rt6_create(rt);
5133 	if (IS_ERR(mlxsw_sp_rt6)) {
5134 		err = PTR_ERR(mlxsw_sp_rt6);
5135 		goto err_rt6_create;
5136 	}
5137 
5138 	mlxsw_sp_fib6_entry_type_set(mlxsw_sp, fib_entry, mlxsw_sp_rt6->rt);
5139 
5140 	INIT_LIST_HEAD(&fib6_entry->rt6_list);
5141 	list_add_tail(&mlxsw_sp_rt6->list, &fib6_entry->rt6_list);
5142 	fib6_entry->nrt6 = 1;
5143 	err = mlxsw_sp_nexthop6_group_get(mlxsw_sp, fib6_entry);
5144 	if (err)
5145 		goto err_nexthop6_group_get;
5146 
5147 	fib_entry->fib_node = fib_node;
5148 
5149 	return fib6_entry;
5150 
5151 err_nexthop6_group_get:
5152 	list_del(&mlxsw_sp_rt6->list);
5153 	mlxsw_sp_rt6_destroy(mlxsw_sp_rt6);
5154 err_rt6_create:
5155 	kfree(fib6_entry);
5156 	return ERR_PTR(err);
5157 }
5158 
5159 static void mlxsw_sp_fib6_entry_destroy(struct mlxsw_sp *mlxsw_sp,
5160 					struct mlxsw_sp_fib6_entry *fib6_entry)
5161 {
5162 	mlxsw_sp_nexthop6_group_put(mlxsw_sp, &fib6_entry->common);
5163 	mlxsw_sp_fib6_entry_rt_destroy_all(fib6_entry);
5164 	WARN_ON(fib6_entry->nrt6);
5165 	kfree(fib6_entry);
5166 }
5167 
5168 static struct mlxsw_sp_fib6_entry *
5169 mlxsw_sp_fib6_node_entry_find(const struct mlxsw_sp_fib_node *fib_node,
5170 			      const struct fib6_info *nrt, bool replace)
5171 {
5172 	struct mlxsw_sp_fib6_entry *fib6_entry;
5173 
5174 	list_for_each_entry(fib6_entry, &fib_node->entry_list, common.list) {
5175 		struct fib6_info *rt = mlxsw_sp_fib6_entry_rt(fib6_entry);
5176 
5177 		if (rt->fib6_table->tb6_id > nrt->fib6_table->tb6_id)
5178 			continue;
5179 		if (rt->fib6_table->tb6_id != nrt->fib6_table->tb6_id)
5180 			break;
5181 		if (replace && rt->fib6_metric == nrt->fib6_metric)
5182 			return fib6_entry;
5183 		if (rt->fib6_metric > nrt->fib6_metric)
5184 			return fib6_entry;
5185 	}
5186 
5187 	return NULL;
5188 }
5189 
5190 static int
5191 mlxsw_sp_fib6_node_list_insert(struct mlxsw_sp_fib6_entry *new6_entry,
5192 			       bool replace)
5193 {
5194 	struct mlxsw_sp_fib_node *fib_node = new6_entry->common.fib_node;
5195 	struct fib6_info *nrt = mlxsw_sp_fib6_entry_rt(new6_entry);
5196 	struct mlxsw_sp_fib6_entry *fib6_entry;
5197 
5198 	fib6_entry = mlxsw_sp_fib6_node_entry_find(fib_node, nrt, replace);
5199 
5200 	if (replace && WARN_ON(!fib6_entry))
5201 		return -EINVAL;
5202 
5203 	if (fib6_entry) {
5204 		list_add_tail(&new6_entry->common.list,
5205 			      &fib6_entry->common.list);
5206 	} else {
5207 		struct mlxsw_sp_fib6_entry *last;
5208 
5209 		list_for_each_entry(last, &fib_node->entry_list, common.list) {
5210 			struct fib6_info *rt = mlxsw_sp_fib6_entry_rt(last);
5211 
5212 			if (nrt->fib6_table->tb6_id > rt->fib6_table->tb6_id)
5213 				break;
5214 			fib6_entry = last;
5215 		}
5216 
5217 		if (fib6_entry)
5218 			list_add(&new6_entry->common.list,
5219 				 &fib6_entry->common.list);
5220 		else
5221 			list_add(&new6_entry->common.list,
5222 				 &fib_node->entry_list);
5223 	}
5224 
5225 	return 0;
5226 }
5227 
5228 static void
5229 mlxsw_sp_fib6_node_list_remove(struct mlxsw_sp_fib6_entry *fib6_entry)
5230 {
5231 	list_del(&fib6_entry->common.list);
5232 }
5233 
5234 static int mlxsw_sp_fib6_node_entry_link(struct mlxsw_sp *mlxsw_sp,
5235 					 struct mlxsw_sp_fib6_entry *fib6_entry,
5236 					 bool replace)
5237 {
5238 	int err;
5239 
5240 	err = mlxsw_sp_fib6_node_list_insert(fib6_entry, replace);
5241 	if (err)
5242 		return err;
5243 
5244 	err = mlxsw_sp_fib_node_entry_add(mlxsw_sp, &fib6_entry->common);
5245 	if (err)
5246 		goto err_fib_node_entry_add;
5247 
5248 	return 0;
5249 
5250 err_fib_node_entry_add:
5251 	mlxsw_sp_fib6_node_list_remove(fib6_entry);
5252 	return err;
5253 }
5254 
5255 static void
5256 mlxsw_sp_fib6_node_entry_unlink(struct mlxsw_sp *mlxsw_sp,
5257 				struct mlxsw_sp_fib6_entry *fib6_entry)
5258 {
5259 	mlxsw_sp_fib_node_entry_del(mlxsw_sp, &fib6_entry->common);
5260 	mlxsw_sp_fib6_node_list_remove(fib6_entry);
5261 }
5262 
5263 static struct mlxsw_sp_fib6_entry *
5264 mlxsw_sp_fib6_entry_lookup(struct mlxsw_sp *mlxsw_sp,
5265 			   const struct fib6_info *rt)
5266 {
5267 	struct mlxsw_sp_fib6_entry *fib6_entry;
5268 	struct mlxsw_sp_fib_node *fib_node;
5269 	struct mlxsw_sp_fib *fib;
5270 	struct mlxsw_sp_vr *vr;
5271 
5272 	vr = mlxsw_sp_vr_find(mlxsw_sp, rt->fib6_table->tb6_id);
5273 	if (!vr)
5274 		return NULL;
5275 	fib = mlxsw_sp_vr_fib(vr, MLXSW_SP_L3_PROTO_IPV6);
5276 
5277 	fib_node = mlxsw_sp_fib_node_lookup(fib, &rt->fib6_dst.addr,
5278 					    sizeof(rt->fib6_dst.addr),
5279 					    rt->fib6_dst.plen);
5280 	if (!fib_node)
5281 		return NULL;
5282 
5283 	list_for_each_entry(fib6_entry, &fib_node->entry_list, common.list) {
5284 		struct fib6_info *iter_rt = mlxsw_sp_fib6_entry_rt(fib6_entry);
5285 
5286 		if (rt->fib6_table->tb6_id == iter_rt->fib6_table->tb6_id &&
5287 		    rt->fib6_metric == iter_rt->fib6_metric &&
5288 		    mlxsw_sp_fib6_entry_rt_find(fib6_entry, rt))
5289 			return fib6_entry;
5290 	}
5291 
5292 	return NULL;
5293 }
5294 
5295 static void mlxsw_sp_fib6_entry_replace(struct mlxsw_sp *mlxsw_sp,
5296 					struct mlxsw_sp_fib6_entry *fib6_entry,
5297 					bool replace)
5298 {
5299 	struct mlxsw_sp_fib_node *fib_node = fib6_entry->common.fib_node;
5300 	struct mlxsw_sp_fib6_entry *replaced;
5301 
5302 	if (!replace)
5303 		return;
5304 
5305 	replaced = list_next_entry(fib6_entry, common.list);
5306 
5307 	mlxsw_sp_fib6_node_entry_unlink(mlxsw_sp, replaced);
5308 	mlxsw_sp_fib6_entry_destroy(mlxsw_sp, replaced);
5309 	mlxsw_sp_fib_node_put(mlxsw_sp, fib_node);
5310 }
5311 
5312 static int mlxsw_sp_router_fib6_add(struct mlxsw_sp *mlxsw_sp,
5313 				    struct fib6_info *rt, bool replace,
5314 				    bool append)
5315 {
5316 	struct mlxsw_sp_fib6_entry *fib6_entry;
5317 	struct mlxsw_sp_fib_node *fib_node;
5318 	int err;
5319 
5320 	if (mlxsw_sp->router->aborted)
5321 		return 0;
5322 
5323 	if (rt->fib6_src.plen)
5324 		return -EINVAL;
5325 
5326 	if (mlxsw_sp_fib6_rt_should_ignore(rt))
5327 		return 0;
5328 
5329 	fib_node = mlxsw_sp_fib_node_get(mlxsw_sp, rt->fib6_table->tb6_id,
5330 					 &rt->fib6_dst.addr,
5331 					 sizeof(rt->fib6_dst.addr),
5332 					 rt->fib6_dst.plen,
5333 					 MLXSW_SP_L3_PROTO_IPV6);
5334 	if (IS_ERR(fib_node))
5335 		return PTR_ERR(fib_node);
5336 
5337 	/* Before creating a new entry, try to append route to an existing
5338 	 * multipath entry.
5339 	 */
5340 	fib6_entry = mlxsw_sp_fib6_node_mp_entry_find(fib_node, rt, append);
5341 	if (fib6_entry) {
5342 		err = mlxsw_sp_fib6_entry_nexthop_add(mlxsw_sp, fib6_entry, rt);
5343 		if (err)
5344 			goto err_fib6_entry_nexthop_add;
5345 		return 0;
5346 	}
5347 
5348 	/* We received an append event, yet did not find any route to
5349 	 * append to.
5350 	 */
5351 	if (WARN_ON(append)) {
5352 		err = -EINVAL;
5353 		goto err_fib6_entry_append;
5354 	}
5355 
5356 	fib6_entry = mlxsw_sp_fib6_entry_create(mlxsw_sp, fib_node, rt);
5357 	if (IS_ERR(fib6_entry)) {
5358 		err = PTR_ERR(fib6_entry);
5359 		goto err_fib6_entry_create;
5360 	}
5361 
5362 	err = mlxsw_sp_fib6_node_entry_link(mlxsw_sp, fib6_entry, replace);
5363 	if (err)
5364 		goto err_fib6_node_entry_link;
5365 
5366 	mlxsw_sp_fib6_entry_replace(mlxsw_sp, fib6_entry, replace);
5367 
5368 	return 0;
5369 
5370 err_fib6_node_entry_link:
5371 	mlxsw_sp_fib6_entry_destroy(mlxsw_sp, fib6_entry);
5372 err_fib6_entry_create:
5373 err_fib6_entry_append:
5374 err_fib6_entry_nexthop_add:
5375 	mlxsw_sp_fib_node_put(mlxsw_sp, fib_node);
5376 	return err;
5377 }
5378 
5379 static void mlxsw_sp_router_fib6_del(struct mlxsw_sp *mlxsw_sp,
5380 				     struct fib6_info *rt)
5381 {
5382 	struct mlxsw_sp_fib6_entry *fib6_entry;
5383 	struct mlxsw_sp_fib_node *fib_node;
5384 
5385 	if (mlxsw_sp->router->aborted)
5386 		return;
5387 
5388 	if (mlxsw_sp_fib6_rt_should_ignore(rt))
5389 		return;
5390 
5391 	fib6_entry = mlxsw_sp_fib6_entry_lookup(mlxsw_sp, rt);
5392 	if (WARN_ON(!fib6_entry))
5393 		return;
5394 
5395 	/* If route is part of a multipath entry, but not the last one
5396 	 * removed, then only reduce its nexthop group.
5397 	 */
5398 	if (!list_is_singular(&fib6_entry->rt6_list)) {
5399 		mlxsw_sp_fib6_entry_nexthop_del(mlxsw_sp, fib6_entry, rt);
5400 		return;
5401 	}
5402 
5403 	fib_node = fib6_entry->common.fib_node;
5404 
5405 	mlxsw_sp_fib6_node_entry_unlink(mlxsw_sp, fib6_entry);
5406 	mlxsw_sp_fib6_entry_destroy(mlxsw_sp, fib6_entry);
5407 	mlxsw_sp_fib_node_put(mlxsw_sp, fib_node);
5408 }
5409 
5410 static int __mlxsw_sp_router_set_abort_trap(struct mlxsw_sp *mlxsw_sp,
5411 					    enum mlxsw_reg_ralxx_protocol proto,
5412 					    u8 tree_id)
5413 {
5414 	char ralta_pl[MLXSW_REG_RALTA_LEN];
5415 	char ralst_pl[MLXSW_REG_RALST_LEN];
5416 	int i, err;
5417 
5418 	mlxsw_reg_ralta_pack(ralta_pl, true, proto, tree_id);
5419 	err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralta), ralta_pl);
5420 	if (err)
5421 		return err;
5422 
5423 	mlxsw_reg_ralst_pack(ralst_pl, 0xff, tree_id);
5424 	err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralst), ralst_pl);
5425 	if (err)
5426 		return err;
5427 
5428 	for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_VRS); i++) {
5429 		struct mlxsw_sp_vr *vr = &mlxsw_sp->router->vrs[i];
5430 		char raltb_pl[MLXSW_REG_RALTB_LEN];
5431 		char ralue_pl[MLXSW_REG_RALUE_LEN];
5432 
5433 		mlxsw_reg_raltb_pack(raltb_pl, vr->id, proto, tree_id);
5434 		err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(raltb),
5435 				      raltb_pl);
5436 		if (err)
5437 			return err;
5438 
5439 		mlxsw_reg_ralue_pack(ralue_pl, proto,
5440 				     MLXSW_REG_RALUE_OP_WRITE_WRITE, vr->id, 0);
5441 		mlxsw_reg_ralue_act_ip2me_pack(ralue_pl);
5442 		err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralue),
5443 				      ralue_pl);
5444 		if (err)
5445 			return err;
5446 	}
5447 
5448 	return 0;
5449 }
5450 
5451 static struct mlxsw_sp_mr_table *
5452 mlxsw_sp_router_fibmr_family_to_table(struct mlxsw_sp_vr *vr, int family)
5453 {
5454 	if (family == RTNL_FAMILY_IPMR)
5455 		return vr->mr_table[MLXSW_SP_L3_PROTO_IPV4];
5456 	else
5457 		return vr->mr_table[MLXSW_SP_L3_PROTO_IPV6];
5458 }
5459 
5460 static int mlxsw_sp_router_fibmr_add(struct mlxsw_sp *mlxsw_sp,
5461 				     struct mfc_entry_notifier_info *men_info,
5462 				     bool replace)
5463 {
5464 	struct mlxsw_sp_mr_table *mrt;
5465 	struct mlxsw_sp_vr *vr;
5466 
5467 	if (mlxsw_sp->router->aborted)
5468 		return 0;
5469 
5470 	vr = mlxsw_sp_vr_get(mlxsw_sp, men_info->tb_id, NULL);
5471 	if (IS_ERR(vr))
5472 		return PTR_ERR(vr);
5473 
5474 	mrt = mlxsw_sp_router_fibmr_family_to_table(vr, men_info->info.family);
5475 	return mlxsw_sp_mr_route_add(mrt, men_info->mfc, replace);
5476 }
5477 
5478 static void mlxsw_sp_router_fibmr_del(struct mlxsw_sp *mlxsw_sp,
5479 				      struct mfc_entry_notifier_info *men_info)
5480 {
5481 	struct mlxsw_sp_mr_table *mrt;
5482 	struct mlxsw_sp_vr *vr;
5483 
5484 	if (mlxsw_sp->router->aborted)
5485 		return;
5486 
5487 	vr = mlxsw_sp_vr_find(mlxsw_sp, men_info->tb_id);
5488 	if (WARN_ON(!vr))
5489 		return;
5490 
5491 	mrt = mlxsw_sp_router_fibmr_family_to_table(vr, men_info->info.family);
5492 	mlxsw_sp_mr_route_del(mrt, men_info->mfc);
5493 	mlxsw_sp_vr_put(mlxsw_sp, vr);
5494 }
5495 
5496 static int
5497 mlxsw_sp_router_fibmr_vif_add(struct mlxsw_sp *mlxsw_sp,
5498 			      struct vif_entry_notifier_info *ven_info)
5499 {
5500 	struct mlxsw_sp_mr_table *mrt;
5501 	struct mlxsw_sp_rif *rif;
5502 	struct mlxsw_sp_vr *vr;
5503 
5504 	if (mlxsw_sp->router->aborted)
5505 		return 0;
5506 
5507 	vr = mlxsw_sp_vr_get(mlxsw_sp, ven_info->tb_id, NULL);
5508 	if (IS_ERR(vr))
5509 		return PTR_ERR(vr);
5510 
5511 	mrt = mlxsw_sp_router_fibmr_family_to_table(vr, ven_info->info.family);
5512 	rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, ven_info->dev);
5513 	return mlxsw_sp_mr_vif_add(mrt, ven_info->dev,
5514 				   ven_info->vif_index,
5515 				   ven_info->vif_flags, rif);
5516 }
5517 
5518 static void
5519 mlxsw_sp_router_fibmr_vif_del(struct mlxsw_sp *mlxsw_sp,
5520 			      struct vif_entry_notifier_info *ven_info)
5521 {
5522 	struct mlxsw_sp_mr_table *mrt;
5523 	struct mlxsw_sp_vr *vr;
5524 
5525 	if (mlxsw_sp->router->aborted)
5526 		return;
5527 
5528 	vr = mlxsw_sp_vr_find(mlxsw_sp, ven_info->tb_id);
5529 	if (WARN_ON(!vr))
5530 		return;
5531 
5532 	mrt = mlxsw_sp_router_fibmr_family_to_table(vr, ven_info->info.family);
5533 	mlxsw_sp_mr_vif_del(mrt, ven_info->vif_index);
5534 	mlxsw_sp_vr_put(mlxsw_sp, vr);
5535 }
5536 
5537 static int mlxsw_sp_router_set_abort_trap(struct mlxsw_sp *mlxsw_sp)
5538 {
5539 	enum mlxsw_reg_ralxx_protocol proto = MLXSW_REG_RALXX_PROTOCOL_IPV4;
5540 	int err;
5541 
5542 	err = __mlxsw_sp_router_set_abort_trap(mlxsw_sp, proto,
5543 					       MLXSW_SP_LPM_TREE_MIN);
5544 	if (err)
5545 		return err;
5546 
5547 	/* The multicast router code does not need an abort trap as by default,
5548 	 * packets that don't match any routes are trapped to the CPU.
5549 	 */
5550 
5551 	proto = MLXSW_REG_RALXX_PROTOCOL_IPV6;
5552 	return __mlxsw_sp_router_set_abort_trap(mlxsw_sp, proto,
5553 						MLXSW_SP_LPM_TREE_MIN + 1);
5554 }
5555 
5556 static void mlxsw_sp_fib4_node_flush(struct mlxsw_sp *mlxsw_sp,
5557 				     struct mlxsw_sp_fib_node *fib_node)
5558 {
5559 	struct mlxsw_sp_fib4_entry *fib4_entry, *tmp;
5560 
5561 	list_for_each_entry_safe(fib4_entry, tmp, &fib_node->entry_list,
5562 				 common.list) {
5563 		bool do_break = &tmp->common.list == &fib_node->entry_list;
5564 
5565 		mlxsw_sp_fib4_node_entry_unlink(mlxsw_sp, fib4_entry);
5566 		mlxsw_sp_fib4_entry_destroy(mlxsw_sp, fib4_entry);
5567 		mlxsw_sp_fib_node_put(mlxsw_sp, fib_node);
5568 		/* Break when entry list is empty and node was freed.
5569 		 * Otherwise, we'll access freed memory in the next
5570 		 * iteration.
5571 		 */
5572 		if (do_break)
5573 			break;
5574 	}
5575 }
5576 
5577 static void mlxsw_sp_fib6_node_flush(struct mlxsw_sp *mlxsw_sp,
5578 				     struct mlxsw_sp_fib_node *fib_node)
5579 {
5580 	struct mlxsw_sp_fib6_entry *fib6_entry, *tmp;
5581 
5582 	list_for_each_entry_safe(fib6_entry, tmp, &fib_node->entry_list,
5583 				 common.list) {
5584 		bool do_break = &tmp->common.list == &fib_node->entry_list;
5585 
5586 		mlxsw_sp_fib6_node_entry_unlink(mlxsw_sp, fib6_entry);
5587 		mlxsw_sp_fib6_entry_destroy(mlxsw_sp, fib6_entry);
5588 		mlxsw_sp_fib_node_put(mlxsw_sp, fib_node);
5589 		if (do_break)
5590 			break;
5591 	}
5592 }
5593 
5594 static void mlxsw_sp_fib_node_flush(struct mlxsw_sp *mlxsw_sp,
5595 				    struct mlxsw_sp_fib_node *fib_node)
5596 {
5597 	switch (fib_node->fib->proto) {
5598 	case MLXSW_SP_L3_PROTO_IPV4:
5599 		mlxsw_sp_fib4_node_flush(mlxsw_sp, fib_node);
5600 		break;
5601 	case MLXSW_SP_L3_PROTO_IPV6:
5602 		mlxsw_sp_fib6_node_flush(mlxsw_sp, fib_node);
5603 		break;
5604 	}
5605 }
5606 
5607 static void mlxsw_sp_vr_fib_flush(struct mlxsw_sp *mlxsw_sp,
5608 				  struct mlxsw_sp_vr *vr,
5609 				  enum mlxsw_sp_l3proto proto)
5610 {
5611 	struct mlxsw_sp_fib *fib = mlxsw_sp_vr_fib(vr, proto);
5612 	struct mlxsw_sp_fib_node *fib_node, *tmp;
5613 
5614 	list_for_each_entry_safe(fib_node, tmp, &fib->node_list, list) {
5615 		bool do_break = &tmp->list == &fib->node_list;
5616 
5617 		mlxsw_sp_fib_node_flush(mlxsw_sp, fib_node);
5618 		if (do_break)
5619 			break;
5620 	}
5621 }
5622 
5623 static void mlxsw_sp_router_fib_flush(struct mlxsw_sp *mlxsw_sp)
5624 {
5625 	int i, j;
5626 
5627 	for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_VRS); i++) {
5628 		struct mlxsw_sp_vr *vr = &mlxsw_sp->router->vrs[i];
5629 
5630 		if (!mlxsw_sp_vr_is_used(vr))
5631 			continue;
5632 
5633 		for (j = 0; j < MLXSW_SP_L3_PROTO_MAX; j++)
5634 			mlxsw_sp_mr_table_flush(vr->mr_table[j]);
5635 		mlxsw_sp_vr_fib_flush(mlxsw_sp, vr, MLXSW_SP_L3_PROTO_IPV4);
5636 
5637 		/* If virtual router was only used for IPv4, then it's no
5638 		 * longer used.
5639 		 */
5640 		if (!mlxsw_sp_vr_is_used(vr))
5641 			continue;
5642 		mlxsw_sp_vr_fib_flush(mlxsw_sp, vr, MLXSW_SP_L3_PROTO_IPV6);
5643 	}
5644 }
5645 
5646 static void mlxsw_sp_router_fib_abort(struct mlxsw_sp *mlxsw_sp)
5647 {
5648 	int err;
5649 
5650 	if (mlxsw_sp->router->aborted)
5651 		return;
5652 	dev_warn(mlxsw_sp->bus_info->dev, "FIB abort triggered. Note that FIB entries are no longer being offloaded to this device.\n");
5653 	mlxsw_sp_router_fib_flush(mlxsw_sp);
5654 	mlxsw_sp->router->aborted = true;
5655 	err = mlxsw_sp_router_set_abort_trap(mlxsw_sp);
5656 	if (err)
5657 		dev_warn(mlxsw_sp->bus_info->dev, "Failed to set abort trap.\n");
5658 }
5659 
5660 struct mlxsw_sp_fib_event_work {
5661 	struct work_struct work;
5662 	union {
5663 		struct fib6_entry_notifier_info fen6_info;
5664 		struct fib_entry_notifier_info fen_info;
5665 		struct fib_rule_notifier_info fr_info;
5666 		struct fib_nh_notifier_info fnh_info;
5667 		struct mfc_entry_notifier_info men_info;
5668 		struct vif_entry_notifier_info ven_info;
5669 	};
5670 	struct mlxsw_sp *mlxsw_sp;
5671 	unsigned long event;
5672 };
5673 
5674 static void mlxsw_sp_router_fib4_event_work(struct work_struct *work)
5675 {
5676 	struct mlxsw_sp_fib_event_work *fib_work =
5677 		container_of(work, struct mlxsw_sp_fib_event_work, work);
5678 	struct mlxsw_sp *mlxsw_sp = fib_work->mlxsw_sp;
5679 	bool replace, append;
5680 	int err;
5681 
5682 	/* Protect internal structures from changes */
5683 	rtnl_lock();
5684 	mlxsw_sp_span_respin(mlxsw_sp);
5685 
5686 	switch (fib_work->event) {
5687 	case FIB_EVENT_ENTRY_REPLACE: /* fall through */
5688 	case FIB_EVENT_ENTRY_APPEND: /* fall through */
5689 	case FIB_EVENT_ENTRY_ADD:
5690 		replace = fib_work->event == FIB_EVENT_ENTRY_REPLACE;
5691 		append = fib_work->event == FIB_EVENT_ENTRY_APPEND;
5692 		err = mlxsw_sp_router_fib4_add(mlxsw_sp, &fib_work->fen_info,
5693 					       replace, append);
5694 		if (err)
5695 			mlxsw_sp_router_fib_abort(mlxsw_sp);
5696 		fib_info_put(fib_work->fen_info.fi);
5697 		break;
5698 	case FIB_EVENT_ENTRY_DEL:
5699 		mlxsw_sp_router_fib4_del(mlxsw_sp, &fib_work->fen_info);
5700 		fib_info_put(fib_work->fen_info.fi);
5701 		break;
5702 	case FIB_EVENT_RULE_ADD:
5703 		/* if we get here, a rule was added that we do not support.
5704 		 * just do the fib_abort
5705 		 */
5706 		mlxsw_sp_router_fib_abort(mlxsw_sp);
5707 		break;
5708 	case FIB_EVENT_NH_ADD: /* fall through */
5709 	case FIB_EVENT_NH_DEL:
5710 		mlxsw_sp_nexthop4_event(mlxsw_sp, fib_work->event,
5711 					fib_work->fnh_info.fib_nh);
5712 		fib_info_put(fib_work->fnh_info.fib_nh->nh_parent);
5713 		break;
5714 	}
5715 	rtnl_unlock();
5716 	kfree(fib_work);
5717 }
5718 
5719 static void mlxsw_sp_router_fib6_event_work(struct work_struct *work)
5720 {
5721 	struct mlxsw_sp_fib_event_work *fib_work =
5722 		container_of(work, struct mlxsw_sp_fib_event_work, work);
5723 	struct mlxsw_sp *mlxsw_sp = fib_work->mlxsw_sp;
5724 	bool replace, append;
5725 	int err;
5726 
5727 	rtnl_lock();
5728 	mlxsw_sp_span_respin(mlxsw_sp);
5729 
5730 	switch (fib_work->event) {
5731 	case FIB_EVENT_ENTRY_REPLACE: /* fall through */
5732 	case FIB_EVENT_ENTRY_APPEND: /* fall through */
5733 	case FIB_EVENT_ENTRY_ADD:
5734 		replace = fib_work->event == FIB_EVENT_ENTRY_REPLACE;
5735 		append = fib_work->event == FIB_EVENT_ENTRY_APPEND;
5736 		err = mlxsw_sp_router_fib6_add(mlxsw_sp,
5737 					       fib_work->fen6_info.rt, replace,
5738 					       append);
5739 		if (err)
5740 			mlxsw_sp_router_fib_abort(mlxsw_sp);
5741 		mlxsw_sp_rt6_release(fib_work->fen6_info.rt);
5742 		break;
5743 	case FIB_EVENT_ENTRY_DEL:
5744 		mlxsw_sp_router_fib6_del(mlxsw_sp, fib_work->fen6_info.rt);
5745 		mlxsw_sp_rt6_release(fib_work->fen6_info.rt);
5746 		break;
5747 	case FIB_EVENT_RULE_ADD:
5748 		/* if we get here, a rule was added that we do not support.
5749 		 * just do the fib_abort
5750 		 */
5751 		mlxsw_sp_router_fib_abort(mlxsw_sp);
5752 		break;
5753 	}
5754 	rtnl_unlock();
5755 	kfree(fib_work);
5756 }
5757 
5758 static void mlxsw_sp_router_fibmr_event_work(struct work_struct *work)
5759 {
5760 	struct mlxsw_sp_fib_event_work *fib_work =
5761 		container_of(work, struct mlxsw_sp_fib_event_work, work);
5762 	struct mlxsw_sp *mlxsw_sp = fib_work->mlxsw_sp;
5763 	bool replace;
5764 	int err;
5765 
5766 	rtnl_lock();
5767 	switch (fib_work->event) {
5768 	case FIB_EVENT_ENTRY_REPLACE: /* fall through */
5769 	case FIB_EVENT_ENTRY_ADD:
5770 		replace = fib_work->event == FIB_EVENT_ENTRY_REPLACE;
5771 
5772 		err = mlxsw_sp_router_fibmr_add(mlxsw_sp, &fib_work->men_info,
5773 						replace);
5774 		if (err)
5775 			mlxsw_sp_router_fib_abort(mlxsw_sp);
5776 		mr_cache_put(fib_work->men_info.mfc);
5777 		break;
5778 	case FIB_EVENT_ENTRY_DEL:
5779 		mlxsw_sp_router_fibmr_del(mlxsw_sp, &fib_work->men_info);
5780 		mr_cache_put(fib_work->men_info.mfc);
5781 		break;
5782 	case FIB_EVENT_VIF_ADD:
5783 		err = mlxsw_sp_router_fibmr_vif_add(mlxsw_sp,
5784 						    &fib_work->ven_info);
5785 		if (err)
5786 			mlxsw_sp_router_fib_abort(mlxsw_sp);
5787 		dev_put(fib_work->ven_info.dev);
5788 		break;
5789 	case FIB_EVENT_VIF_DEL:
5790 		mlxsw_sp_router_fibmr_vif_del(mlxsw_sp,
5791 					      &fib_work->ven_info);
5792 		dev_put(fib_work->ven_info.dev);
5793 		break;
5794 	case FIB_EVENT_RULE_ADD:
5795 		/* if we get here, a rule was added that we do not support.
5796 		 * just do the fib_abort
5797 		 */
5798 		mlxsw_sp_router_fib_abort(mlxsw_sp);
5799 		break;
5800 	}
5801 	rtnl_unlock();
5802 	kfree(fib_work);
5803 }
5804 
5805 static void mlxsw_sp_router_fib4_event(struct mlxsw_sp_fib_event_work *fib_work,
5806 				       struct fib_notifier_info *info)
5807 {
5808 	struct fib_entry_notifier_info *fen_info;
5809 	struct fib_nh_notifier_info *fnh_info;
5810 
5811 	switch (fib_work->event) {
5812 	case FIB_EVENT_ENTRY_REPLACE: /* fall through */
5813 	case FIB_EVENT_ENTRY_APPEND: /* fall through */
5814 	case FIB_EVENT_ENTRY_ADD: /* fall through */
5815 	case FIB_EVENT_ENTRY_DEL:
5816 		fen_info = container_of(info, struct fib_entry_notifier_info,
5817 					info);
5818 		fib_work->fen_info = *fen_info;
5819 		/* Take reference on fib_info to prevent it from being
5820 		 * freed while work is queued. Release it afterwards.
5821 		 */
5822 		fib_info_hold(fib_work->fen_info.fi);
5823 		break;
5824 	case FIB_EVENT_NH_ADD: /* fall through */
5825 	case FIB_EVENT_NH_DEL:
5826 		fnh_info = container_of(info, struct fib_nh_notifier_info,
5827 					info);
5828 		fib_work->fnh_info = *fnh_info;
5829 		fib_info_hold(fib_work->fnh_info.fib_nh->nh_parent);
5830 		break;
5831 	}
5832 }
5833 
5834 static void mlxsw_sp_router_fib6_event(struct mlxsw_sp_fib_event_work *fib_work,
5835 				       struct fib_notifier_info *info)
5836 {
5837 	struct fib6_entry_notifier_info *fen6_info;
5838 
5839 	switch (fib_work->event) {
5840 	case FIB_EVENT_ENTRY_REPLACE: /* fall through */
5841 	case FIB_EVENT_ENTRY_APPEND: /* fall through */
5842 	case FIB_EVENT_ENTRY_ADD: /* fall through */
5843 	case FIB_EVENT_ENTRY_DEL:
5844 		fen6_info = container_of(info, struct fib6_entry_notifier_info,
5845 					 info);
5846 		fib_work->fen6_info = *fen6_info;
5847 		fib6_info_hold(fib_work->fen6_info.rt);
5848 		break;
5849 	}
5850 }
5851 
5852 static void
5853 mlxsw_sp_router_fibmr_event(struct mlxsw_sp_fib_event_work *fib_work,
5854 			    struct fib_notifier_info *info)
5855 {
5856 	switch (fib_work->event) {
5857 	case FIB_EVENT_ENTRY_REPLACE: /* fall through */
5858 	case FIB_EVENT_ENTRY_ADD: /* fall through */
5859 	case FIB_EVENT_ENTRY_DEL:
5860 		memcpy(&fib_work->men_info, info, sizeof(fib_work->men_info));
5861 		mr_cache_hold(fib_work->men_info.mfc);
5862 		break;
5863 	case FIB_EVENT_VIF_ADD: /* fall through */
5864 	case FIB_EVENT_VIF_DEL:
5865 		memcpy(&fib_work->ven_info, info, sizeof(fib_work->ven_info));
5866 		dev_hold(fib_work->ven_info.dev);
5867 		break;
5868 	}
5869 }
5870 
5871 static int mlxsw_sp_router_fib_rule_event(unsigned long event,
5872 					  struct fib_notifier_info *info,
5873 					  struct mlxsw_sp *mlxsw_sp)
5874 {
5875 	struct netlink_ext_ack *extack = info->extack;
5876 	struct fib_rule_notifier_info *fr_info;
5877 	struct fib_rule *rule;
5878 	int err = 0;
5879 
5880 	/* nothing to do at the moment */
5881 	if (event == FIB_EVENT_RULE_DEL)
5882 		return 0;
5883 
5884 	if (mlxsw_sp->router->aborted)
5885 		return 0;
5886 
5887 	fr_info = container_of(info, struct fib_rule_notifier_info, info);
5888 	rule = fr_info->rule;
5889 
5890 	switch (info->family) {
5891 	case AF_INET:
5892 		if (!fib4_rule_default(rule) && !rule->l3mdev)
5893 			err = -EOPNOTSUPP;
5894 		break;
5895 	case AF_INET6:
5896 		if (!fib6_rule_default(rule) && !rule->l3mdev)
5897 			err = -EOPNOTSUPP;
5898 		break;
5899 	case RTNL_FAMILY_IPMR:
5900 		if (!ipmr_rule_default(rule) && !rule->l3mdev)
5901 			err = -EOPNOTSUPP;
5902 		break;
5903 	case RTNL_FAMILY_IP6MR:
5904 		if (!ip6mr_rule_default(rule) && !rule->l3mdev)
5905 			err = -EOPNOTSUPP;
5906 		break;
5907 	}
5908 
5909 	if (err < 0)
5910 		NL_SET_ERR_MSG_MOD(extack, "FIB rules not supported");
5911 
5912 	return err;
5913 }
5914 
5915 /* Called with rcu_read_lock() */
5916 static int mlxsw_sp_router_fib_event(struct notifier_block *nb,
5917 				     unsigned long event, void *ptr)
5918 {
5919 	struct mlxsw_sp_fib_event_work *fib_work;
5920 	struct fib_notifier_info *info = ptr;
5921 	struct mlxsw_sp_router *router;
5922 	int err;
5923 
5924 	if (!net_eq(info->net, &init_net) ||
5925 	    (info->family != AF_INET && info->family != AF_INET6 &&
5926 	     info->family != RTNL_FAMILY_IPMR &&
5927 	     info->family != RTNL_FAMILY_IP6MR))
5928 		return NOTIFY_DONE;
5929 
5930 	router = container_of(nb, struct mlxsw_sp_router, fib_nb);
5931 
5932 	switch (event) {
5933 	case FIB_EVENT_RULE_ADD: /* fall through */
5934 	case FIB_EVENT_RULE_DEL:
5935 		err = mlxsw_sp_router_fib_rule_event(event, info,
5936 						     router->mlxsw_sp);
5937 		if (!err || info->extack)
5938 			return notifier_from_errno(err);
5939 		break;
5940 	case FIB_EVENT_ENTRY_ADD:
5941 		if (router->aborted) {
5942 			NL_SET_ERR_MSG_MOD(info->extack, "FIB offload was aborted. Not configuring route");
5943 			return notifier_from_errno(-EINVAL);
5944 		}
5945 		break;
5946 	}
5947 
5948 	fib_work = kzalloc(sizeof(*fib_work), GFP_ATOMIC);
5949 	if (WARN_ON(!fib_work))
5950 		return NOTIFY_BAD;
5951 
5952 	fib_work->mlxsw_sp = router->mlxsw_sp;
5953 	fib_work->event = event;
5954 
5955 	switch (info->family) {
5956 	case AF_INET:
5957 		INIT_WORK(&fib_work->work, mlxsw_sp_router_fib4_event_work);
5958 		mlxsw_sp_router_fib4_event(fib_work, info);
5959 		break;
5960 	case AF_INET6:
5961 		INIT_WORK(&fib_work->work, mlxsw_sp_router_fib6_event_work);
5962 		mlxsw_sp_router_fib6_event(fib_work, info);
5963 		break;
5964 	case RTNL_FAMILY_IP6MR:
5965 	case RTNL_FAMILY_IPMR:
5966 		INIT_WORK(&fib_work->work, mlxsw_sp_router_fibmr_event_work);
5967 		mlxsw_sp_router_fibmr_event(fib_work, info);
5968 		break;
5969 	}
5970 
5971 	mlxsw_core_schedule_work(&fib_work->work);
5972 
5973 	return NOTIFY_DONE;
5974 }
5975 
5976 struct mlxsw_sp_rif *
5977 mlxsw_sp_rif_find_by_dev(const struct mlxsw_sp *mlxsw_sp,
5978 			 const struct net_device *dev)
5979 {
5980 	int i;
5981 
5982 	for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_RIFS); i++)
5983 		if (mlxsw_sp->router->rifs[i] &&
5984 		    mlxsw_sp->router->rifs[i]->dev == dev)
5985 			return mlxsw_sp->router->rifs[i];
5986 
5987 	return NULL;
5988 }
5989 
5990 static int mlxsw_sp_router_rif_disable(struct mlxsw_sp *mlxsw_sp, u16 rif)
5991 {
5992 	char ritr_pl[MLXSW_REG_RITR_LEN];
5993 	int err;
5994 
5995 	mlxsw_reg_ritr_rif_pack(ritr_pl, rif);
5996 	err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
5997 	if (WARN_ON_ONCE(err))
5998 		return err;
5999 
6000 	mlxsw_reg_ritr_enable_set(ritr_pl, false);
6001 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
6002 }
6003 
6004 static void mlxsw_sp_router_rif_gone_sync(struct mlxsw_sp *mlxsw_sp,
6005 					  struct mlxsw_sp_rif *rif)
6006 {
6007 	mlxsw_sp_router_rif_disable(mlxsw_sp, rif->rif_index);
6008 	mlxsw_sp_nexthop_rif_gone_sync(mlxsw_sp, rif);
6009 	mlxsw_sp_neigh_rif_gone_sync(mlxsw_sp, rif);
6010 }
6011 
6012 static bool
6013 mlxsw_sp_rif_should_config(struct mlxsw_sp_rif *rif, struct net_device *dev,
6014 			   unsigned long event)
6015 {
6016 	struct inet6_dev *inet6_dev;
6017 	bool addr_list_empty = true;
6018 	struct in_device *idev;
6019 
6020 	switch (event) {
6021 	case NETDEV_UP:
6022 		return rif == NULL;
6023 	case NETDEV_DOWN:
6024 		idev = __in_dev_get_rtnl(dev);
6025 		if (idev && idev->ifa_list)
6026 			addr_list_empty = false;
6027 
6028 		inet6_dev = __in6_dev_get(dev);
6029 		if (addr_list_empty && inet6_dev &&
6030 		    !list_empty(&inet6_dev->addr_list))
6031 			addr_list_empty = false;
6032 
6033 		/* macvlans do not have a RIF, but rather piggy back on the
6034 		 * RIF of their lower device.
6035 		 */
6036 		if (netif_is_macvlan(dev) && addr_list_empty)
6037 			return true;
6038 
6039 		if (rif && addr_list_empty &&
6040 		    !netif_is_l3_slave(rif->dev))
6041 			return true;
6042 		/* It is possible we already removed the RIF ourselves
6043 		 * if it was assigned to a netdev that is now a bridge
6044 		 * or LAG slave.
6045 		 */
6046 		return false;
6047 	}
6048 
6049 	return false;
6050 }
6051 
6052 static enum mlxsw_sp_rif_type
6053 mlxsw_sp_dev_rif_type(const struct mlxsw_sp *mlxsw_sp,
6054 		      const struct net_device *dev)
6055 {
6056 	enum mlxsw_sp_fid_type type;
6057 
6058 	if (mlxsw_sp_netdev_ipip_type(mlxsw_sp, dev, NULL))
6059 		return MLXSW_SP_RIF_TYPE_IPIP_LB;
6060 
6061 	/* Otherwise RIF type is derived from the type of the underlying FID. */
6062 	if (is_vlan_dev(dev) && netif_is_bridge_master(vlan_dev_real_dev(dev)))
6063 		type = MLXSW_SP_FID_TYPE_8021Q;
6064 	else if (netif_is_bridge_master(dev) && br_vlan_enabled(dev))
6065 		type = MLXSW_SP_FID_TYPE_8021Q;
6066 	else if (netif_is_bridge_master(dev))
6067 		type = MLXSW_SP_FID_TYPE_8021D;
6068 	else
6069 		type = MLXSW_SP_FID_TYPE_RFID;
6070 
6071 	return mlxsw_sp_fid_type_rif_type(mlxsw_sp, type);
6072 }
6073 
6074 static int mlxsw_sp_rif_index_alloc(struct mlxsw_sp *mlxsw_sp, u16 *p_rif_index)
6075 {
6076 	int i;
6077 
6078 	for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_RIFS); i++) {
6079 		if (!mlxsw_sp->router->rifs[i]) {
6080 			*p_rif_index = i;
6081 			return 0;
6082 		}
6083 	}
6084 
6085 	return -ENOBUFS;
6086 }
6087 
6088 static struct mlxsw_sp_rif *mlxsw_sp_rif_alloc(size_t rif_size, u16 rif_index,
6089 					       u16 vr_id,
6090 					       struct net_device *l3_dev)
6091 {
6092 	struct mlxsw_sp_rif *rif;
6093 
6094 	rif = kzalloc(rif_size, GFP_KERNEL);
6095 	if (!rif)
6096 		return NULL;
6097 
6098 	INIT_LIST_HEAD(&rif->nexthop_list);
6099 	INIT_LIST_HEAD(&rif->neigh_list);
6100 	ether_addr_copy(rif->addr, l3_dev->dev_addr);
6101 	rif->mtu = l3_dev->mtu;
6102 	rif->vr_id = vr_id;
6103 	rif->dev = l3_dev;
6104 	rif->rif_index = rif_index;
6105 
6106 	return rif;
6107 }
6108 
6109 struct mlxsw_sp_rif *mlxsw_sp_rif_by_index(const struct mlxsw_sp *mlxsw_sp,
6110 					   u16 rif_index)
6111 {
6112 	return mlxsw_sp->router->rifs[rif_index];
6113 }
6114 
6115 u16 mlxsw_sp_rif_index(const struct mlxsw_sp_rif *rif)
6116 {
6117 	return rif->rif_index;
6118 }
6119 
6120 u16 mlxsw_sp_ipip_lb_rif_index(const struct mlxsw_sp_rif_ipip_lb *lb_rif)
6121 {
6122 	return lb_rif->common.rif_index;
6123 }
6124 
6125 u16 mlxsw_sp_ipip_lb_ul_vr_id(const struct mlxsw_sp_rif_ipip_lb *lb_rif)
6126 {
6127 	return lb_rif->ul_vr_id;
6128 }
6129 
6130 int mlxsw_sp_rif_dev_ifindex(const struct mlxsw_sp_rif *rif)
6131 {
6132 	return rif->dev->ifindex;
6133 }
6134 
6135 const struct net_device *mlxsw_sp_rif_dev(const struct mlxsw_sp_rif *rif)
6136 {
6137 	return rif->dev;
6138 }
6139 
6140 struct mlxsw_sp_fid *mlxsw_sp_rif_fid(const struct mlxsw_sp_rif *rif)
6141 {
6142 	return rif->fid;
6143 }
6144 
6145 static struct mlxsw_sp_rif *
6146 mlxsw_sp_rif_create(struct mlxsw_sp *mlxsw_sp,
6147 		    const struct mlxsw_sp_rif_params *params,
6148 		    struct netlink_ext_ack *extack)
6149 {
6150 	u32 tb_id = l3mdev_fib_table(params->dev);
6151 	const struct mlxsw_sp_rif_ops *ops;
6152 	struct mlxsw_sp_fid *fid = NULL;
6153 	enum mlxsw_sp_rif_type type;
6154 	struct mlxsw_sp_rif *rif;
6155 	struct mlxsw_sp_vr *vr;
6156 	u16 rif_index;
6157 	int i, err;
6158 
6159 	type = mlxsw_sp_dev_rif_type(mlxsw_sp, params->dev);
6160 	ops = mlxsw_sp->router->rif_ops_arr[type];
6161 
6162 	vr = mlxsw_sp_vr_get(mlxsw_sp, tb_id ? : RT_TABLE_MAIN, extack);
6163 	if (IS_ERR(vr))
6164 		return ERR_CAST(vr);
6165 	vr->rif_count++;
6166 
6167 	err = mlxsw_sp_rif_index_alloc(mlxsw_sp, &rif_index);
6168 	if (err) {
6169 		NL_SET_ERR_MSG_MOD(extack, "Exceeded number of supported router interfaces");
6170 		goto err_rif_index_alloc;
6171 	}
6172 
6173 	rif = mlxsw_sp_rif_alloc(ops->rif_size, rif_index, vr->id, params->dev);
6174 	if (!rif) {
6175 		err = -ENOMEM;
6176 		goto err_rif_alloc;
6177 	}
6178 	rif->mlxsw_sp = mlxsw_sp;
6179 	rif->ops = ops;
6180 
6181 	if (ops->fid_get) {
6182 		fid = ops->fid_get(rif, extack);
6183 		if (IS_ERR(fid)) {
6184 			err = PTR_ERR(fid);
6185 			goto err_fid_get;
6186 		}
6187 		rif->fid = fid;
6188 	}
6189 
6190 	if (ops->setup)
6191 		ops->setup(rif, params);
6192 
6193 	err = ops->configure(rif);
6194 	if (err)
6195 		goto err_configure;
6196 
6197 	for (i = 0; i < MLXSW_SP_L3_PROTO_MAX; i++) {
6198 		err = mlxsw_sp_mr_rif_add(vr->mr_table[i], rif);
6199 		if (err)
6200 			goto err_mr_rif_add;
6201 	}
6202 
6203 	mlxsw_sp_rif_counters_alloc(rif);
6204 	mlxsw_sp->router->rifs[rif_index] = rif;
6205 
6206 	return rif;
6207 
6208 err_mr_rif_add:
6209 	for (i--; i >= 0; i--)
6210 		mlxsw_sp_mr_rif_del(vr->mr_table[i], rif);
6211 	ops->deconfigure(rif);
6212 err_configure:
6213 	if (fid)
6214 		mlxsw_sp_fid_put(fid);
6215 err_fid_get:
6216 	kfree(rif);
6217 err_rif_alloc:
6218 err_rif_index_alloc:
6219 	vr->rif_count--;
6220 	mlxsw_sp_vr_put(mlxsw_sp, vr);
6221 	return ERR_PTR(err);
6222 }
6223 
6224 void mlxsw_sp_rif_destroy(struct mlxsw_sp_rif *rif)
6225 {
6226 	const struct mlxsw_sp_rif_ops *ops = rif->ops;
6227 	struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
6228 	struct mlxsw_sp_fid *fid = rif->fid;
6229 	struct mlxsw_sp_vr *vr;
6230 	int i;
6231 
6232 	mlxsw_sp_router_rif_gone_sync(mlxsw_sp, rif);
6233 	vr = &mlxsw_sp->router->vrs[rif->vr_id];
6234 
6235 	mlxsw_sp->router->rifs[rif->rif_index] = NULL;
6236 	mlxsw_sp_rif_counters_free(rif);
6237 	for (i = 0; i < MLXSW_SP_L3_PROTO_MAX; i++)
6238 		mlxsw_sp_mr_rif_del(vr->mr_table[i], rif);
6239 	ops->deconfigure(rif);
6240 	if (fid)
6241 		/* Loopback RIFs are not associated with a FID. */
6242 		mlxsw_sp_fid_put(fid);
6243 	kfree(rif);
6244 	vr->rif_count--;
6245 	mlxsw_sp_vr_put(mlxsw_sp, vr);
6246 }
6247 
6248 static void
6249 mlxsw_sp_rif_subport_params_init(struct mlxsw_sp_rif_params *params,
6250 				 struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan)
6251 {
6252 	struct mlxsw_sp_port *mlxsw_sp_port = mlxsw_sp_port_vlan->mlxsw_sp_port;
6253 
6254 	params->vid = mlxsw_sp_port_vlan->vid;
6255 	params->lag = mlxsw_sp_port->lagged;
6256 	if (params->lag)
6257 		params->lag_id = mlxsw_sp_port->lag_id;
6258 	else
6259 		params->system_port = mlxsw_sp_port->local_port;
6260 }
6261 
6262 static int
6263 mlxsw_sp_port_vlan_router_join(struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan,
6264 			       struct net_device *l3_dev,
6265 			       struct netlink_ext_ack *extack)
6266 {
6267 	struct mlxsw_sp_port *mlxsw_sp_port = mlxsw_sp_port_vlan->mlxsw_sp_port;
6268 	struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
6269 	u16 vid = mlxsw_sp_port_vlan->vid;
6270 	struct mlxsw_sp_rif *rif;
6271 	struct mlxsw_sp_fid *fid;
6272 	int err;
6273 
6274 	rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, l3_dev);
6275 	if (!rif) {
6276 		struct mlxsw_sp_rif_params params = {
6277 			.dev = l3_dev,
6278 		};
6279 
6280 		mlxsw_sp_rif_subport_params_init(&params, mlxsw_sp_port_vlan);
6281 		rif = mlxsw_sp_rif_create(mlxsw_sp, &params, extack);
6282 		if (IS_ERR(rif))
6283 			return PTR_ERR(rif);
6284 	}
6285 
6286 	/* FID was already created, just take a reference */
6287 	fid = rif->ops->fid_get(rif, extack);
6288 	err = mlxsw_sp_fid_port_vid_map(fid, mlxsw_sp_port, vid);
6289 	if (err)
6290 		goto err_fid_port_vid_map;
6291 
6292 	err = mlxsw_sp_port_vid_learning_set(mlxsw_sp_port, vid, false);
6293 	if (err)
6294 		goto err_port_vid_learning_set;
6295 
6296 	err = mlxsw_sp_port_vid_stp_set(mlxsw_sp_port, vid,
6297 					BR_STATE_FORWARDING);
6298 	if (err)
6299 		goto err_port_vid_stp_set;
6300 
6301 	mlxsw_sp_port_vlan->fid = fid;
6302 
6303 	return 0;
6304 
6305 err_port_vid_stp_set:
6306 	mlxsw_sp_port_vid_learning_set(mlxsw_sp_port, vid, true);
6307 err_port_vid_learning_set:
6308 	mlxsw_sp_fid_port_vid_unmap(fid, mlxsw_sp_port, vid);
6309 err_fid_port_vid_map:
6310 	mlxsw_sp_fid_put(fid);
6311 	return err;
6312 }
6313 
6314 void
6315 mlxsw_sp_port_vlan_router_leave(struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan)
6316 {
6317 	struct mlxsw_sp_port *mlxsw_sp_port = mlxsw_sp_port_vlan->mlxsw_sp_port;
6318 	struct mlxsw_sp_fid *fid = mlxsw_sp_port_vlan->fid;
6319 	u16 vid = mlxsw_sp_port_vlan->vid;
6320 
6321 	if (WARN_ON(mlxsw_sp_fid_type(fid) != MLXSW_SP_FID_TYPE_RFID))
6322 		return;
6323 
6324 	mlxsw_sp_port_vlan->fid = NULL;
6325 	mlxsw_sp_port_vid_stp_set(mlxsw_sp_port, vid, BR_STATE_BLOCKING);
6326 	mlxsw_sp_port_vid_learning_set(mlxsw_sp_port, vid, true);
6327 	mlxsw_sp_fid_port_vid_unmap(fid, mlxsw_sp_port, vid);
6328 	/* If router port holds the last reference on the rFID, then the
6329 	 * associated Sub-port RIF will be destroyed.
6330 	 */
6331 	mlxsw_sp_fid_put(fid);
6332 }
6333 
6334 static int mlxsw_sp_inetaddr_port_vlan_event(struct net_device *l3_dev,
6335 					     struct net_device *port_dev,
6336 					     unsigned long event, u16 vid,
6337 					     struct netlink_ext_ack *extack)
6338 {
6339 	struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(port_dev);
6340 	struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan;
6341 
6342 	mlxsw_sp_port_vlan = mlxsw_sp_port_vlan_find_by_vid(mlxsw_sp_port, vid);
6343 	if (WARN_ON(!mlxsw_sp_port_vlan))
6344 		return -EINVAL;
6345 
6346 	switch (event) {
6347 	case NETDEV_UP:
6348 		return mlxsw_sp_port_vlan_router_join(mlxsw_sp_port_vlan,
6349 						      l3_dev, extack);
6350 	case NETDEV_DOWN:
6351 		mlxsw_sp_port_vlan_router_leave(mlxsw_sp_port_vlan);
6352 		break;
6353 	}
6354 
6355 	return 0;
6356 }
6357 
6358 static int mlxsw_sp_inetaddr_port_event(struct net_device *port_dev,
6359 					unsigned long event,
6360 					struct netlink_ext_ack *extack)
6361 {
6362 	if (netif_is_bridge_port(port_dev) ||
6363 	    netif_is_lag_port(port_dev) ||
6364 	    netif_is_ovs_port(port_dev))
6365 		return 0;
6366 
6367 	return mlxsw_sp_inetaddr_port_vlan_event(port_dev, port_dev, event, 1,
6368 						 extack);
6369 }
6370 
6371 static int __mlxsw_sp_inetaddr_lag_event(struct net_device *l3_dev,
6372 					 struct net_device *lag_dev,
6373 					 unsigned long event, u16 vid,
6374 					 struct netlink_ext_ack *extack)
6375 {
6376 	struct net_device *port_dev;
6377 	struct list_head *iter;
6378 	int err;
6379 
6380 	netdev_for_each_lower_dev(lag_dev, port_dev, iter) {
6381 		if (mlxsw_sp_port_dev_check(port_dev)) {
6382 			err = mlxsw_sp_inetaddr_port_vlan_event(l3_dev,
6383 								port_dev,
6384 								event, vid,
6385 								extack);
6386 			if (err)
6387 				return err;
6388 		}
6389 	}
6390 
6391 	return 0;
6392 }
6393 
6394 static int mlxsw_sp_inetaddr_lag_event(struct net_device *lag_dev,
6395 				       unsigned long event,
6396 				       struct netlink_ext_ack *extack)
6397 {
6398 	if (netif_is_bridge_port(lag_dev))
6399 		return 0;
6400 
6401 	return __mlxsw_sp_inetaddr_lag_event(lag_dev, lag_dev, event, 1,
6402 					     extack);
6403 }
6404 
6405 static int mlxsw_sp_inetaddr_bridge_event(struct net_device *l3_dev,
6406 					  unsigned long event,
6407 					  struct netlink_ext_ack *extack)
6408 {
6409 	struct mlxsw_sp *mlxsw_sp = mlxsw_sp_lower_get(l3_dev);
6410 	struct mlxsw_sp_rif_params params = {
6411 		.dev = l3_dev,
6412 	};
6413 	struct mlxsw_sp_rif *rif;
6414 
6415 	switch (event) {
6416 	case NETDEV_UP:
6417 		rif = mlxsw_sp_rif_create(mlxsw_sp, &params, extack);
6418 		if (IS_ERR(rif))
6419 			return PTR_ERR(rif);
6420 		break;
6421 	case NETDEV_DOWN:
6422 		rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, l3_dev);
6423 		mlxsw_sp_rif_destroy(rif);
6424 		break;
6425 	}
6426 
6427 	return 0;
6428 }
6429 
6430 static int mlxsw_sp_inetaddr_vlan_event(struct net_device *vlan_dev,
6431 					unsigned long event,
6432 					struct netlink_ext_ack *extack)
6433 {
6434 	struct net_device *real_dev = vlan_dev_real_dev(vlan_dev);
6435 	u16 vid = vlan_dev_vlan_id(vlan_dev);
6436 
6437 	if (netif_is_bridge_port(vlan_dev))
6438 		return 0;
6439 
6440 	if (mlxsw_sp_port_dev_check(real_dev))
6441 		return mlxsw_sp_inetaddr_port_vlan_event(vlan_dev, real_dev,
6442 							 event, vid, extack);
6443 	else if (netif_is_lag_master(real_dev))
6444 		return __mlxsw_sp_inetaddr_lag_event(vlan_dev, real_dev, event,
6445 						     vid, extack);
6446 	else if (netif_is_bridge_master(real_dev) && br_vlan_enabled(real_dev))
6447 		return mlxsw_sp_inetaddr_bridge_event(vlan_dev, event, extack);
6448 
6449 	return 0;
6450 }
6451 
6452 static bool mlxsw_sp_rif_macvlan_is_vrrp4(const u8 *mac)
6453 {
6454 	u8 vrrp4[ETH_ALEN] = { 0x00, 0x00, 0x5e, 0x00, 0x01, 0x00 };
6455 	u8 mask[ETH_ALEN] = { 0xff, 0xff, 0xff, 0xff, 0xff, 0x00 };
6456 
6457 	return ether_addr_equal_masked(mac, vrrp4, mask);
6458 }
6459 
6460 static bool mlxsw_sp_rif_macvlan_is_vrrp6(const u8 *mac)
6461 {
6462 	u8 vrrp6[ETH_ALEN] = { 0x00, 0x00, 0x5e, 0x00, 0x02, 0x00 };
6463 	u8 mask[ETH_ALEN] = { 0xff, 0xff, 0xff, 0xff, 0xff, 0x00 };
6464 
6465 	return ether_addr_equal_masked(mac, vrrp6, mask);
6466 }
6467 
6468 static int mlxsw_sp_rif_vrrp_op(struct mlxsw_sp *mlxsw_sp, u16 rif_index,
6469 				const u8 *mac, bool adding)
6470 {
6471 	char ritr_pl[MLXSW_REG_RITR_LEN];
6472 	u8 vrrp_id = adding ? mac[5] : 0;
6473 	int err;
6474 
6475 	if (!mlxsw_sp_rif_macvlan_is_vrrp4(mac) &&
6476 	    !mlxsw_sp_rif_macvlan_is_vrrp6(mac))
6477 		return 0;
6478 
6479 	mlxsw_reg_ritr_rif_pack(ritr_pl, rif_index);
6480 	err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
6481 	if (err)
6482 		return err;
6483 
6484 	if (mlxsw_sp_rif_macvlan_is_vrrp4(mac))
6485 		mlxsw_reg_ritr_if_vrrp_id_ipv4_set(ritr_pl, vrrp_id);
6486 	else
6487 		mlxsw_reg_ritr_if_vrrp_id_ipv6_set(ritr_pl, vrrp_id);
6488 
6489 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
6490 }
6491 
6492 static int mlxsw_sp_rif_macvlan_add(struct mlxsw_sp *mlxsw_sp,
6493 				    const struct net_device *macvlan_dev,
6494 				    struct netlink_ext_ack *extack)
6495 {
6496 	struct macvlan_dev *vlan = netdev_priv(macvlan_dev);
6497 	struct mlxsw_sp_rif *rif;
6498 	int err;
6499 
6500 	rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, vlan->lowerdev);
6501 	if (!rif) {
6502 		NL_SET_ERR_MSG_MOD(extack, "macvlan is only supported on top of router interfaces");
6503 		return -EOPNOTSUPP;
6504 	}
6505 
6506 	err = mlxsw_sp_rif_fdb_op(mlxsw_sp, macvlan_dev->dev_addr,
6507 				  mlxsw_sp_fid_index(rif->fid), true);
6508 	if (err)
6509 		return err;
6510 
6511 	err = mlxsw_sp_rif_vrrp_op(mlxsw_sp, rif->rif_index,
6512 				   macvlan_dev->dev_addr, true);
6513 	if (err)
6514 		goto err_rif_vrrp_add;
6515 
6516 	/* Make sure the bridge driver does not have this MAC pointing at
6517 	 * some other port.
6518 	 */
6519 	if (rif->ops->fdb_del)
6520 		rif->ops->fdb_del(rif, macvlan_dev->dev_addr);
6521 
6522 	return 0;
6523 
6524 err_rif_vrrp_add:
6525 	mlxsw_sp_rif_fdb_op(mlxsw_sp, macvlan_dev->dev_addr,
6526 			    mlxsw_sp_fid_index(rif->fid), false);
6527 	return err;
6528 }
6529 
6530 void mlxsw_sp_rif_macvlan_del(struct mlxsw_sp *mlxsw_sp,
6531 			      const struct net_device *macvlan_dev)
6532 {
6533 	struct macvlan_dev *vlan = netdev_priv(macvlan_dev);
6534 	struct mlxsw_sp_rif *rif;
6535 
6536 	rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, vlan->lowerdev);
6537 	/* If we do not have a RIF, then we already took care of
6538 	 * removing the macvlan's MAC during RIF deletion.
6539 	 */
6540 	if (!rif)
6541 		return;
6542 	mlxsw_sp_rif_vrrp_op(mlxsw_sp, rif->rif_index, macvlan_dev->dev_addr,
6543 			     false);
6544 	mlxsw_sp_rif_fdb_op(mlxsw_sp, macvlan_dev->dev_addr,
6545 			    mlxsw_sp_fid_index(rif->fid), false);
6546 }
6547 
6548 static int mlxsw_sp_inetaddr_macvlan_event(struct net_device *macvlan_dev,
6549 					   unsigned long event,
6550 					   struct netlink_ext_ack *extack)
6551 {
6552 	struct mlxsw_sp *mlxsw_sp;
6553 
6554 	mlxsw_sp = mlxsw_sp_lower_get(macvlan_dev);
6555 	if (!mlxsw_sp)
6556 		return 0;
6557 
6558 	switch (event) {
6559 	case NETDEV_UP:
6560 		return mlxsw_sp_rif_macvlan_add(mlxsw_sp, macvlan_dev, extack);
6561 	case NETDEV_DOWN:
6562 		mlxsw_sp_rif_macvlan_del(mlxsw_sp, macvlan_dev);
6563 		break;
6564 	}
6565 
6566 	return 0;
6567 }
6568 
6569 static int __mlxsw_sp_inetaddr_event(struct net_device *dev,
6570 				     unsigned long event,
6571 				     struct netlink_ext_ack *extack)
6572 {
6573 	if (mlxsw_sp_port_dev_check(dev))
6574 		return mlxsw_sp_inetaddr_port_event(dev, event, extack);
6575 	else if (netif_is_lag_master(dev))
6576 		return mlxsw_sp_inetaddr_lag_event(dev, event, extack);
6577 	else if (netif_is_bridge_master(dev))
6578 		return mlxsw_sp_inetaddr_bridge_event(dev, event, extack);
6579 	else if (is_vlan_dev(dev))
6580 		return mlxsw_sp_inetaddr_vlan_event(dev, event, extack);
6581 	else if (netif_is_macvlan(dev))
6582 		return mlxsw_sp_inetaddr_macvlan_event(dev, event, extack);
6583 	else
6584 		return 0;
6585 }
6586 
6587 int mlxsw_sp_inetaddr_event(struct notifier_block *unused,
6588 			    unsigned long event, void *ptr)
6589 {
6590 	struct in_ifaddr *ifa = (struct in_ifaddr *) ptr;
6591 	struct net_device *dev = ifa->ifa_dev->dev;
6592 	struct mlxsw_sp *mlxsw_sp;
6593 	struct mlxsw_sp_rif *rif;
6594 	int err = 0;
6595 
6596 	/* NETDEV_UP event is handled by mlxsw_sp_inetaddr_valid_event */
6597 	if (event == NETDEV_UP)
6598 		goto out;
6599 
6600 	mlxsw_sp = mlxsw_sp_lower_get(dev);
6601 	if (!mlxsw_sp)
6602 		goto out;
6603 
6604 	rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev);
6605 	if (!mlxsw_sp_rif_should_config(rif, dev, event))
6606 		goto out;
6607 
6608 	err = __mlxsw_sp_inetaddr_event(dev, event, NULL);
6609 out:
6610 	return notifier_from_errno(err);
6611 }
6612 
6613 int mlxsw_sp_inetaddr_valid_event(struct notifier_block *unused,
6614 				  unsigned long event, void *ptr)
6615 {
6616 	struct in_validator_info *ivi = (struct in_validator_info *) ptr;
6617 	struct net_device *dev = ivi->ivi_dev->dev;
6618 	struct mlxsw_sp *mlxsw_sp;
6619 	struct mlxsw_sp_rif *rif;
6620 	int err = 0;
6621 
6622 	mlxsw_sp = mlxsw_sp_lower_get(dev);
6623 	if (!mlxsw_sp)
6624 		goto out;
6625 
6626 	rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev);
6627 	if (!mlxsw_sp_rif_should_config(rif, dev, event))
6628 		goto out;
6629 
6630 	err = __mlxsw_sp_inetaddr_event(dev, event, ivi->extack);
6631 out:
6632 	return notifier_from_errno(err);
6633 }
6634 
6635 struct mlxsw_sp_inet6addr_event_work {
6636 	struct work_struct work;
6637 	struct net_device *dev;
6638 	unsigned long event;
6639 };
6640 
6641 static void mlxsw_sp_inet6addr_event_work(struct work_struct *work)
6642 {
6643 	struct mlxsw_sp_inet6addr_event_work *inet6addr_work =
6644 		container_of(work, struct mlxsw_sp_inet6addr_event_work, work);
6645 	struct net_device *dev = inet6addr_work->dev;
6646 	unsigned long event = inet6addr_work->event;
6647 	struct mlxsw_sp *mlxsw_sp;
6648 	struct mlxsw_sp_rif *rif;
6649 
6650 	rtnl_lock();
6651 	mlxsw_sp = mlxsw_sp_lower_get(dev);
6652 	if (!mlxsw_sp)
6653 		goto out;
6654 
6655 	rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev);
6656 	if (!mlxsw_sp_rif_should_config(rif, dev, event))
6657 		goto out;
6658 
6659 	__mlxsw_sp_inetaddr_event(dev, event, NULL);
6660 out:
6661 	rtnl_unlock();
6662 	dev_put(dev);
6663 	kfree(inet6addr_work);
6664 }
6665 
6666 /* Called with rcu_read_lock() */
6667 int mlxsw_sp_inet6addr_event(struct notifier_block *unused,
6668 			     unsigned long event, void *ptr)
6669 {
6670 	struct inet6_ifaddr *if6 = (struct inet6_ifaddr *) ptr;
6671 	struct mlxsw_sp_inet6addr_event_work *inet6addr_work;
6672 	struct net_device *dev = if6->idev->dev;
6673 
6674 	/* NETDEV_UP event is handled by mlxsw_sp_inet6addr_valid_event */
6675 	if (event == NETDEV_UP)
6676 		return NOTIFY_DONE;
6677 
6678 	if (!mlxsw_sp_port_dev_lower_find_rcu(dev))
6679 		return NOTIFY_DONE;
6680 
6681 	inet6addr_work = kzalloc(sizeof(*inet6addr_work), GFP_ATOMIC);
6682 	if (!inet6addr_work)
6683 		return NOTIFY_BAD;
6684 
6685 	INIT_WORK(&inet6addr_work->work, mlxsw_sp_inet6addr_event_work);
6686 	inet6addr_work->dev = dev;
6687 	inet6addr_work->event = event;
6688 	dev_hold(dev);
6689 	mlxsw_core_schedule_work(&inet6addr_work->work);
6690 
6691 	return NOTIFY_DONE;
6692 }
6693 
6694 int mlxsw_sp_inet6addr_valid_event(struct notifier_block *unused,
6695 				   unsigned long event, void *ptr)
6696 {
6697 	struct in6_validator_info *i6vi = (struct in6_validator_info *) ptr;
6698 	struct net_device *dev = i6vi->i6vi_dev->dev;
6699 	struct mlxsw_sp *mlxsw_sp;
6700 	struct mlxsw_sp_rif *rif;
6701 	int err = 0;
6702 
6703 	mlxsw_sp = mlxsw_sp_lower_get(dev);
6704 	if (!mlxsw_sp)
6705 		goto out;
6706 
6707 	rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev);
6708 	if (!mlxsw_sp_rif_should_config(rif, dev, event))
6709 		goto out;
6710 
6711 	err = __mlxsw_sp_inetaddr_event(dev, event, i6vi->extack);
6712 out:
6713 	return notifier_from_errno(err);
6714 }
6715 
6716 static int mlxsw_sp_rif_edit(struct mlxsw_sp *mlxsw_sp, u16 rif_index,
6717 			     const char *mac, int mtu)
6718 {
6719 	char ritr_pl[MLXSW_REG_RITR_LEN];
6720 	int err;
6721 
6722 	mlxsw_reg_ritr_rif_pack(ritr_pl, rif_index);
6723 	err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
6724 	if (err)
6725 		return err;
6726 
6727 	mlxsw_reg_ritr_mtu_set(ritr_pl, mtu);
6728 	mlxsw_reg_ritr_if_mac_memcpy_to(ritr_pl, mac);
6729 	mlxsw_reg_ritr_op_set(ritr_pl, MLXSW_REG_RITR_RIF_CREATE);
6730 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
6731 }
6732 
6733 int mlxsw_sp_netdevice_router_port_event(struct net_device *dev)
6734 {
6735 	struct mlxsw_sp *mlxsw_sp;
6736 	struct mlxsw_sp_rif *rif;
6737 	u16 fid_index;
6738 	int err;
6739 
6740 	mlxsw_sp = mlxsw_sp_lower_get(dev);
6741 	if (!mlxsw_sp)
6742 		return 0;
6743 
6744 	rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev);
6745 	if (!rif)
6746 		return 0;
6747 	fid_index = mlxsw_sp_fid_index(rif->fid);
6748 
6749 	err = mlxsw_sp_rif_fdb_op(mlxsw_sp, rif->addr, fid_index, false);
6750 	if (err)
6751 		return err;
6752 
6753 	err = mlxsw_sp_rif_edit(mlxsw_sp, rif->rif_index, dev->dev_addr,
6754 				dev->mtu);
6755 	if (err)
6756 		goto err_rif_edit;
6757 
6758 	err = mlxsw_sp_rif_fdb_op(mlxsw_sp, dev->dev_addr, fid_index, true);
6759 	if (err)
6760 		goto err_rif_fdb_op;
6761 
6762 	if (rif->mtu != dev->mtu) {
6763 		struct mlxsw_sp_vr *vr;
6764 		int i;
6765 
6766 		/* The RIF is relevant only to its mr_table instance, as unlike
6767 		 * unicast routing, in multicast routing a RIF cannot be shared
6768 		 * between several multicast routing tables.
6769 		 */
6770 		vr = &mlxsw_sp->router->vrs[rif->vr_id];
6771 		for (i = 0; i < MLXSW_SP_L3_PROTO_MAX; i++)
6772 			mlxsw_sp_mr_rif_mtu_update(vr->mr_table[i],
6773 						   rif, dev->mtu);
6774 	}
6775 
6776 	ether_addr_copy(rif->addr, dev->dev_addr);
6777 	rif->mtu = dev->mtu;
6778 
6779 	netdev_dbg(dev, "Updated RIF=%d\n", rif->rif_index);
6780 
6781 	return 0;
6782 
6783 err_rif_fdb_op:
6784 	mlxsw_sp_rif_edit(mlxsw_sp, rif->rif_index, rif->addr, rif->mtu);
6785 err_rif_edit:
6786 	mlxsw_sp_rif_fdb_op(mlxsw_sp, rif->addr, fid_index, true);
6787 	return err;
6788 }
6789 
6790 static int mlxsw_sp_port_vrf_join(struct mlxsw_sp *mlxsw_sp,
6791 				  struct net_device *l3_dev,
6792 				  struct netlink_ext_ack *extack)
6793 {
6794 	struct mlxsw_sp_rif *rif;
6795 
6796 	/* If netdev is already associated with a RIF, then we need to
6797 	 * destroy it and create a new one with the new virtual router ID.
6798 	 */
6799 	rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, l3_dev);
6800 	if (rif)
6801 		__mlxsw_sp_inetaddr_event(l3_dev, NETDEV_DOWN, extack);
6802 
6803 	return __mlxsw_sp_inetaddr_event(l3_dev, NETDEV_UP, extack);
6804 }
6805 
6806 static void mlxsw_sp_port_vrf_leave(struct mlxsw_sp *mlxsw_sp,
6807 				    struct net_device *l3_dev)
6808 {
6809 	struct mlxsw_sp_rif *rif;
6810 
6811 	rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, l3_dev);
6812 	if (!rif)
6813 		return;
6814 	__mlxsw_sp_inetaddr_event(l3_dev, NETDEV_DOWN, NULL);
6815 }
6816 
6817 int mlxsw_sp_netdevice_vrf_event(struct net_device *l3_dev, unsigned long event,
6818 				 struct netdev_notifier_changeupper_info *info)
6819 {
6820 	struct mlxsw_sp *mlxsw_sp = mlxsw_sp_lower_get(l3_dev);
6821 	int err = 0;
6822 
6823 	/* We do not create a RIF for a macvlan, but only use it to
6824 	 * direct more MAC addresses to the router.
6825 	 */
6826 	if (!mlxsw_sp || netif_is_macvlan(l3_dev))
6827 		return 0;
6828 
6829 	switch (event) {
6830 	case NETDEV_PRECHANGEUPPER:
6831 		return 0;
6832 	case NETDEV_CHANGEUPPER:
6833 		if (info->linking) {
6834 			struct netlink_ext_ack *extack;
6835 
6836 			extack = netdev_notifier_info_to_extack(&info->info);
6837 			err = mlxsw_sp_port_vrf_join(mlxsw_sp, l3_dev, extack);
6838 		} else {
6839 			mlxsw_sp_port_vrf_leave(mlxsw_sp, l3_dev);
6840 		}
6841 		break;
6842 	}
6843 
6844 	return err;
6845 }
6846 
6847 static int __mlxsw_sp_rif_macvlan_flush(struct net_device *dev, void *data)
6848 {
6849 	struct mlxsw_sp_rif *rif = data;
6850 
6851 	if (!netif_is_macvlan(dev))
6852 		return 0;
6853 
6854 	return mlxsw_sp_rif_fdb_op(rif->mlxsw_sp, dev->dev_addr,
6855 				   mlxsw_sp_fid_index(rif->fid), false);
6856 }
6857 
6858 static int mlxsw_sp_rif_macvlan_flush(struct mlxsw_sp_rif *rif)
6859 {
6860 	if (!netif_is_macvlan_port(rif->dev))
6861 		return 0;
6862 
6863 	netdev_warn(rif->dev, "Router interface is deleted. Upper macvlans will not work\n");
6864 	return netdev_walk_all_upper_dev_rcu(rif->dev,
6865 					     __mlxsw_sp_rif_macvlan_flush, rif);
6866 }
6867 
6868 static struct mlxsw_sp_rif_subport *
6869 mlxsw_sp_rif_subport_rif(const struct mlxsw_sp_rif *rif)
6870 {
6871 	return container_of(rif, struct mlxsw_sp_rif_subport, common);
6872 }
6873 
6874 static void mlxsw_sp_rif_subport_setup(struct mlxsw_sp_rif *rif,
6875 				       const struct mlxsw_sp_rif_params *params)
6876 {
6877 	struct mlxsw_sp_rif_subport *rif_subport;
6878 
6879 	rif_subport = mlxsw_sp_rif_subport_rif(rif);
6880 	rif_subport->vid = params->vid;
6881 	rif_subport->lag = params->lag;
6882 	if (params->lag)
6883 		rif_subport->lag_id = params->lag_id;
6884 	else
6885 		rif_subport->system_port = params->system_port;
6886 }
6887 
6888 static int mlxsw_sp_rif_subport_op(struct mlxsw_sp_rif *rif, bool enable)
6889 {
6890 	struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
6891 	struct mlxsw_sp_rif_subport *rif_subport;
6892 	char ritr_pl[MLXSW_REG_RITR_LEN];
6893 
6894 	rif_subport = mlxsw_sp_rif_subport_rif(rif);
6895 	mlxsw_reg_ritr_pack(ritr_pl, enable, MLXSW_REG_RITR_SP_IF,
6896 			    rif->rif_index, rif->vr_id, rif->dev->mtu);
6897 	mlxsw_reg_ritr_mac_pack(ritr_pl, rif->dev->dev_addr);
6898 	mlxsw_reg_ritr_sp_if_pack(ritr_pl, rif_subport->lag,
6899 				  rif_subport->lag ? rif_subport->lag_id :
6900 						     rif_subport->system_port,
6901 				  rif_subport->vid);
6902 
6903 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
6904 }
6905 
6906 static int mlxsw_sp_rif_subport_configure(struct mlxsw_sp_rif *rif)
6907 {
6908 	int err;
6909 
6910 	err = mlxsw_sp_rif_subport_op(rif, true);
6911 	if (err)
6912 		return err;
6913 
6914 	err = mlxsw_sp_rif_fdb_op(rif->mlxsw_sp, rif->dev->dev_addr,
6915 				  mlxsw_sp_fid_index(rif->fid), true);
6916 	if (err)
6917 		goto err_rif_fdb_op;
6918 
6919 	mlxsw_sp_fid_rif_set(rif->fid, rif);
6920 	return 0;
6921 
6922 err_rif_fdb_op:
6923 	mlxsw_sp_rif_subport_op(rif, false);
6924 	return err;
6925 }
6926 
6927 static void mlxsw_sp_rif_subport_deconfigure(struct mlxsw_sp_rif *rif)
6928 {
6929 	struct mlxsw_sp_fid *fid = rif->fid;
6930 
6931 	mlxsw_sp_fid_rif_set(fid, NULL);
6932 	mlxsw_sp_rif_fdb_op(rif->mlxsw_sp, rif->dev->dev_addr,
6933 			    mlxsw_sp_fid_index(fid), false);
6934 	mlxsw_sp_rif_macvlan_flush(rif);
6935 	mlxsw_sp_rif_subport_op(rif, false);
6936 }
6937 
6938 static struct mlxsw_sp_fid *
6939 mlxsw_sp_rif_subport_fid_get(struct mlxsw_sp_rif *rif,
6940 			     struct netlink_ext_ack *extack)
6941 {
6942 	return mlxsw_sp_fid_rfid_get(rif->mlxsw_sp, rif->rif_index);
6943 }
6944 
6945 static const struct mlxsw_sp_rif_ops mlxsw_sp_rif_subport_ops = {
6946 	.type			= MLXSW_SP_RIF_TYPE_SUBPORT,
6947 	.rif_size		= sizeof(struct mlxsw_sp_rif_subport),
6948 	.setup			= mlxsw_sp_rif_subport_setup,
6949 	.configure		= mlxsw_sp_rif_subport_configure,
6950 	.deconfigure		= mlxsw_sp_rif_subport_deconfigure,
6951 	.fid_get		= mlxsw_sp_rif_subport_fid_get,
6952 };
6953 
6954 static int mlxsw_sp_rif_vlan_fid_op(struct mlxsw_sp_rif *rif,
6955 				    enum mlxsw_reg_ritr_if_type type,
6956 				    u16 vid_fid, bool enable)
6957 {
6958 	struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
6959 	char ritr_pl[MLXSW_REG_RITR_LEN];
6960 
6961 	mlxsw_reg_ritr_pack(ritr_pl, enable, type, rif->rif_index, rif->vr_id,
6962 			    rif->dev->mtu);
6963 	mlxsw_reg_ritr_mac_pack(ritr_pl, rif->dev->dev_addr);
6964 	mlxsw_reg_ritr_fid_set(ritr_pl, type, vid_fid);
6965 
6966 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
6967 }
6968 
6969 u8 mlxsw_sp_router_port(const struct mlxsw_sp *mlxsw_sp)
6970 {
6971 	return mlxsw_core_max_ports(mlxsw_sp->core) + 1;
6972 }
6973 
6974 static int mlxsw_sp_rif_vlan_configure(struct mlxsw_sp_rif *rif)
6975 {
6976 	struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
6977 	u16 vid = mlxsw_sp_fid_8021q_vid(rif->fid);
6978 	int err;
6979 
6980 	err = mlxsw_sp_rif_vlan_fid_op(rif, MLXSW_REG_RITR_VLAN_IF, vid, true);
6981 	if (err)
6982 		return err;
6983 
6984 	err = mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_MC,
6985 				     mlxsw_sp_router_port(mlxsw_sp), true);
6986 	if (err)
6987 		goto err_fid_mc_flood_set;
6988 
6989 	err = mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_BC,
6990 				     mlxsw_sp_router_port(mlxsw_sp), true);
6991 	if (err)
6992 		goto err_fid_bc_flood_set;
6993 
6994 	err = mlxsw_sp_rif_fdb_op(rif->mlxsw_sp, rif->dev->dev_addr,
6995 				  mlxsw_sp_fid_index(rif->fid), true);
6996 	if (err)
6997 		goto err_rif_fdb_op;
6998 
6999 	mlxsw_sp_fid_rif_set(rif->fid, rif);
7000 	return 0;
7001 
7002 err_rif_fdb_op:
7003 	mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_BC,
7004 			       mlxsw_sp_router_port(mlxsw_sp), false);
7005 err_fid_bc_flood_set:
7006 	mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_MC,
7007 			       mlxsw_sp_router_port(mlxsw_sp), false);
7008 err_fid_mc_flood_set:
7009 	mlxsw_sp_rif_vlan_fid_op(rif, MLXSW_REG_RITR_VLAN_IF, vid, false);
7010 	return err;
7011 }
7012 
7013 static void mlxsw_sp_rif_vlan_deconfigure(struct mlxsw_sp_rif *rif)
7014 {
7015 	u16 vid = mlxsw_sp_fid_8021q_vid(rif->fid);
7016 	struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
7017 	struct mlxsw_sp_fid *fid = rif->fid;
7018 
7019 	mlxsw_sp_fid_rif_set(fid, NULL);
7020 	mlxsw_sp_rif_fdb_op(rif->mlxsw_sp, rif->dev->dev_addr,
7021 			    mlxsw_sp_fid_index(fid), false);
7022 	mlxsw_sp_rif_macvlan_flush(rif);
7023 	mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_BC,
7024 			       mlxsw_sp_router_port(mlxsw_sp), false);
7025 	mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_MC,
7026 			       mlxsw_sp_router_port(mlxsw_sp), false);
7027 	mlxsw_sp_rif_vlan_fid_op(rif, MLXSW_REG_RITR_VLAN_IF, vid, false);
7028 }
7029 
7030 static struct mlxsw_sp_fid *
7031 mlxsw_sp_rif_vlan_fid_get(struct mlxsw_sp_rif *rif,
7032 			  struct netlink_ext_ack *extack)
7033 {
7034 	u16 vid;
7035 	int err;
7036 
7037 	if (is_vlan_dev(rif->dev)) {
7038 		vid = vlan_dev_vlan_id(rif->dev);
7039 	} else {
7040 		err = br_vlan_get_pvid(rif->dev, &vid);
7041 		if (err < 0 || !vid) {
7042 			NL_SET_ERR_MSG_MOD(extack, "Couldn't determine bridge PVID");
7043 			return ERR_PTR(-EINVAL);
7044 		}
7045 	}
7046 
7047 	return mlxsw_sp_fid_8021q_get(rif->mlxsw_sp, vid);
7048 }
7049 
7050 static void mlxsw_sp_rif_vlan_fdb_del(struct mlxsw_sp_rif *rif, const char *mac)
7051 {
7052 	u16 vid = mlxsw_sp_fid_8021q_vid(rif->fid);
7053 	struct switchdev_notifier_fdb_info info;
7054 	struct net_device *br_dev;
7055 	struct net_device *dev;
7056 
7057 	br_dev = is_vlan_dev(rif->dev) ? vlan_dev_real_dev(rif->dev) : rif->dev;
7058 	dev = br_fdb_find_port(br_dev, mac, vid);
7059 	if (!dev)
7060 		return;
7061 
7062 	info.addr = mac;
7063 	info.vid = vid;
7064 	call_switchdev_notifiers(SWITCHDEV_FDB_DEL_TO_BRIDGE, dev, &info.info);
7065 }
7066 
7067 static const struct mlxsw_sp_rif_ops mlxsw_sp_rif_vlan_ops = {
7068 	.type			= MLXSW_SP_RIF_TYPE_VLAN,
7069 	.rif_size		= sizeof(struct mlxsw_sp_rif),
7070 	.configure		= mlxsw_sp_rif_vlan_configure,
7071 	.deconfigure		= mlxsw_sp_rif_vlan_deconfigure,
7072 	.fid_get		= mlxsw_sp_rif_vlan_fid_get,
7073 	.fdb_del		= mlxsw_sp_rif_vlan_fdb_del,
7074 };
7075 
7076 static int mlxsw_sp_rif_fid_configure(struct mlxsw_sp_rif *rif)
7077 {
7078 	struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
7079 	u16 fid_index = mlxsw_sp_fid_index(rif->fid);
7080 	int err;
7081 
7082 	err = mlxsw_sp_rif_vlan_fid_op(rif, MLXSW_REG_RITR_FID_IF, fid_index,
7083 				       true);
7084 	if (err)
7085 		return err;
7086 
7087 	err = mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_MC,
7088 				     mlxsw_sp_router_port(mlxsw_sp), true);
7089 	if (err)
7090 		goto err_fid_mc_flood_set;
7091 
7092 	err = mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_BC,
7093 				     mlxsw_sp_router_port(mlxsw_sp), true);
7094 	if (err)
7095 		goto err_fid_bc_flood_set;
7096 
7097 	err = mlxsw_sp_rif_fdb_op(rif->mlxsw_sp, rif->dev->dev_addr,
7098 				  mlxsw_sp_fid_index(rif->fid), true);
7099 	if (err)
7100 		goto err_rif_fdb_op;
7101 
7102 	mlxsw_sp_fid_rif_set(rif->fid, rif);
7103 	return 0;
7104 
7105 err_rif_fdb_op:
7106 	mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_BC,
7107 			       mlxsw_sp_router_port(mlxsw_sp), false);
7108 err_fid_bc_flood_set:
7109 	mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_MC,
7110 			       mlxsw_sp_router_port(mlxsw_sp), false);
7111 err_fid_mc_flood_set:
7112 	mlxsw_sp_rif_vlan_fid_op(rif, MLXSW_REG_RITR_FID_IF, fid_index, false);
7113 	return err;
7114 }
7115 
7116 static void mlxsw_sp_rif_fid_deconfigure(struct mlxsw_sp_rif *rif)
7117 {
7118 	u16 fid_index = mlxsw_sp_fid_index(rif->fid);
7119 	struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
7120 	struct mlxsw_sp_fid *fid = rif->fid;
7121 
7122 	mlxsw_sp_fid_rif_set(fid, NULL);
7123 	mlxsw_sp_rif_fdb_op(rif->mlxsw_sp, rif->dev->dev_addr,
7124 			    mlxsw_sp_fid_index(fid), false);
7125 	mlxsw_sp_rif_macvlan_flush(rif);
7126 	mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_BC,
7127 			       mlxsw_sp_router_port(mlxsw_sp), false);
7128 	mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_MC,
7129 			       mlxsw_sp_router_port(mlxsw_sp), false);
7130 	mlxsw_sp_rif_vlan_fid_op(rif, MLXSW_REG_RITR_FID_IF, fid_index, false);
7131 }
7132 
7133 static struct mlxsw_sp_fid *
7134 mlxsw_sp_rif_fid_fid_get(struct mlxsw_sp_rif *rif,
7135 			 struct netlink_ext_ack *extack)
7136 {
7137 	return mlxsw_sp_fid_8021d_get(rif->mlxsw_sp, rif->dev->ifindex);
7138 }
7139 
7140 static void mlxsw_sp_rif_fid_fdb_del(struct mlxsw_sp_rif *rif, const char *mac)
7141 {
7142 	struct switchdev_notifier_fdb_info info;
7143 	struct net_device *dev;
7144 
7145 	dev = br_fdb_find_port(rif->dev, mac, 0);
7146 	if (!dev)
7147 		return;
7148 
7149 	info.addr = mac;
7150 	info.vid = 0;
7151 	call_switchdev_notifiers(SWITCHDEV_FDB_DEL_TO_BRIDGE, dev, &info.info);
7152 }
7153 
7154 static const struct mlxsw_sp_rif_ops mlxsw_sp_rif_fid_ops = {
7155 	.type			= MLXSW_SP_RIF_TYPE_FID,
7156 	.rif_size		= sizeof(struct mlxsw_sp_rif),
7157 	.configure		= mlxsw_sp_rif_fid_configure,
7158 	.deconfigure		= mlxsw_sp_rif_fid_deconfigure,
7159 	.fid_get		= mlxsw_sp_rif_fid_fid_get,
7160 	.fdb_del		= mlxsw_sp_rif_fid_fdb_del,
7161 };
7162 
7163 static struct mlxsw_sp_rif_ipip_lb *
7164 mlxsw_sp_rif_ipip_lb_rif(struct mlxsw_sp_rif *rif)
7165 {
7166 	return container_of(rif, struct mlxsw_sp_rif_ipip_lb, common);
7167 }
7168 
7169 static void
7170 mlxsw_sp_rif_ipip_lb_setup(struct mlxsw_sp_rif *rif,
7171 			   const struct mlxsw_sp_rif_params *params)
7172 {
7173 	struct mlxsw_sp_rif_params_ipip_lb *params_lb;
7174 	struct mlxsw_sp_rif_ipip_lb *rif_lb;
7175 
7176 	params_lb = container_of(params, struct mlxsw_sp_rif_params_ipip_lb,
7177 				 common);
7178 	rif_lb = mlxsw_sp_rif_ipip_lb_rif(rif);
7179 	rif_lb->lb_config = params_lb->lb_config;
7180 }
7181 
7182 static int
7183 mlxsw_sp_rif_ipip_lb_configure(struct mlxsw_sp_rif *rif)
7184 {
7185 	struct mlxsw_sp_rif_ipip_lb *lb_rif = mlxsw_sp_rif_ipip_lb_rif(rif);
7186 	u32 ul_tb_id = mlxsw_sp_ipip_dev_ul_tb_id(rif->dev);
7187 	struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
7188 	struct mlxsw_sp_vr *ul_vr;
7189 	int err;
7190 
7191 	ul_vr = mlxsw_sp_vr_get(mlxsw_sp, ul_tb_id, NULL);
7192 	if (IS_ERR(ul_vr))
7193 		return PTR_ERR(ul_vr);
7194 
7195 	err = mlxsw_sp_rif_ipip_lb_op(lb_rif, ul_vr, true);
7196 	if (err)
7197 		goto err_loopback_op;
7198 
7199 	lb_rif->ul_vr_id = ul_vr->id;
7200 	++ul_vr->rif_count;
7201 	return 0;
7202 
7203 err_loopback_op:
7204 	mlxsw_sp_vr_put(mlxsw_sp, ul_vr);
7205 	return err;
7206 }
7207 
7208 static void mlxsw_sp_rif_ipip_lb_deconfigure(struct mlxsw_sp_rif *rif)
7209 {
7210 	struct mlxsw_sp_rif_ipip_lb *lb_rif = mlxsw_sp_rif_ipip_lb_rif(rif);
7211 	struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
7212 	struct mlxsw_sp_vr *ul_vr;
7213 
7214 	ul_vr = &mlxsw_sp->router->vrs[lb_rif->ul_vr_id];
7215 	mlxsw_sp_rif_ipip_lb_op(lb_rif, ul_vr, false);
7216 
7217 	--ul_vr->rif_count;
7218 	mlxsw_sp_vr_put(mlxsw_sp, ul_vr);
7219 }
7220 
7221 static const struct mlxsw_sp_rif_ops mlxsw_sp_rif_ipip_lb_ops = {
7222 	.type			= MLXSW_SP_RIF_TYPE_IPIP_LB,
7223 	.rif_size		= sizeof(struct mlxsw_sp_rif_ipip_lb),
7224 	.setup                  = mlxsw_sp_rif_ipip_lb_setup,
7225 	.configure		= mlxsw_sp_rif_ipip_lb_configure,
7226 	.deconfigure		= mlxsw_sp_rif_ipip_lb_deconfigure,
7227 };
7228 
7229 static const struct mlxsw_sp_rif_ops *mlxsw_sp_rif_ops_arr[] = {
7230 	[MLXSW_SP_RIF_TYPE_SUBPORT]	= &mlxsw_sp_rif_subport_ops,
7231 	[MLXSW_SP_RIF_TYPE_VLAN]	= &mlxsw_sp_rif_vlan_ops,
7232 	[MLXSW_SP_RIF_TYPE_FID]		= &mlxsw_sp_rif_fid_ops,
7233 	[MLXSW_SP_RIF_TYPE_IPIP_LB]	= &mlxsw_sp_rif_ipip_lb_ops,
7234 };
7235 
7236 static int mlxsw_sp_rifs_init(struct mlxsw_sp *mlxsw_sp)
7237 {
7238 	u64 max_rifs = MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_RIFS);
7239 
7240 	mlxsw_sp->router->rifs = kcalloc(max_rifs,
7241 					 sizeof(struct mlxsw_sp_rif *),
7242 					 GFP_KERNEL);
7243 	if (!mlxsw_sp->router->rifs)
7244 		return -ENOMEM;
7245 
7246 	mlxsw_sp->router->rif_ops_arr = mlxsw_sp_rif_ops_arr;
7247 
7248 	return 0;
7249 }
7250 
7251 static void mlxsw_sp_rifs_fini(struct mlxsw_sp *mlxsw_sp)
7252 {
7253 	int i;
7254 
7255 	for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_RIFS); i++)
7256 		WARN_ON_ONCE(mlxsw_sp->router->rifs[i]);
7257 
7258 	kfree(mlxsw_sp->router->rifs);
7259 }
7260 
7261 static int
7262 mlxsw_sp_ipip_config_tigcr(struct mlxsw_sp *mlxsw_sp)
7263 {
7264 	char tigcr_pl[MLXSW_REG_TIGCR_LEN];
7265 
7266 	mlxsw_reg_tigcr_pack(tigcr_pl, true, 0);
7267 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(tigcr), tigcr_pl);
7268 }
7269 
7270 static int mlxsw_sp_ipips_init(struct mlxsw_sp *mlxsw_sp)
7271 {
7272 	mlxsw_sp->router->ipip_ops_arr = mlxsw_sp_ipip_ops_arr;
7273 	INIT_LIST_HEAD(&mlxsw_sp->router->ipip_list);
7274 	return mlxsw_sp_ipip_config_tigcr(mlxsw_sp);
7275 }
7276 
7277 static void mlxsw_sp_ipips_fini(struct mlxsw_sp *mlxsw_sp)
7278 {
7279 	WARN_ON(!list_empty(&mlxsw_sp->router->ipip_list));
7280 }
7281 
7282 static void mlxsw_sp_router_fib_dump_flush(struct notifier_block *nb)
7283 {
7284 	struct mlxsw_sp_router *router;
7285 
7286 	/* Flush pending FIB notifications and then flush the device's
7287 	 * table before requesting another dump. The FIB notification
7288 	 * block is unregistered, so no need to take RTNL.
7289 	 */
7290 	mlxsw_core_flush_owq();
7291 	router = container_of(nb, struct mlxsw_sp_router, fib_nb);
7292 	mlxsw_sp_router_fib_flush(router->mlxsw_sp);
7293 }
7294 
7295 #ifdef CONFIG_IP_ROUTE_MULTIPATH
7296 static void mlxsw_sp_mp_hash_header_set(char *recr2_pl, int header)
7297 {
7298 	mlxsw_reg_recr2_outer_header_enables_set(recr2_pl, header, true);
7299 }
7300 
7301 static void mlxsw_sp_mp_hash_field_set(char *recr2_pl, int field)
7302 {
7303 	mlxsw_reg_recr2_outer_header_fields_enable_set(recr2_pl, field, true);
7304 }
7305 
7306 static void mlxsw_sp_mp4_hash_init(char *recr2_pl)
7307 {
7308 	bool only_l3 = !init_net.ipv4.sysctl_fib_multipath_hash_policy;
7309 
7310 	mlxsw_sp_mp_hash_header_set(recr2_pl,
7311 				    MLXSW_REG_RECR2_IPV4_EN_NOT_TCP_NOT_UDP);
7312 	mlxsw_sp_mp_hash_header_set(recr2_pl, MLXSW_REG_RECR2_IPV4_EN_TCP_UDP);
7313 	mlxsw_reg_recr2_ipv4_sip_enable(recr2_pl);
7314 	mlxsw_reg_recr2_ipv4_dip_enable(recr2_pl);
7315 	if (only_l3)
7316 		return;
7317 	mlxsw_sp_mp_hash_header_set(recr2_pl, MLXSW_REG_RECR2_TCP_UDP_EN_IPV4);
7318 	mlxsw_sp_mp_hash_field_set(recr2_pl, MLXSW_REG_RECR2_IPV4_PROTOCOL);
7319 	mlxsw_sp_mp_hash_field_set(recr2_pl, MLXSW_REG_RECR2_TCP_UDP_SPORT);
7320 	mlxsw_sp_mp_hash_field_set(recr2_pl, MLXSW_REG_RECR2_TCP_UDP_DPORT);
7321 }
7322 
7323 static void mlxsw_sp_mp6_hash_init(char *recr2_pl)
7324 {
7325 	bool only_l3 = !ip6_multipath_hash_policy(&init_net);
7326 
7327 	mlxsw_sp_mp_hash_header_set(recr2_pl,
7328 				    MLXSW_REG_RECR2_IPV6_EN_NOT_TCP_NOT_UDP);
7329 	mlxsw_sp_mp_hash_header_set(recr2_pl, MLXSW_REG_RECR2_IPV6_EN_TCP_UDP);
7330 	mlxsw_reg_recr2_ipv6_sip_enable(recr2_pl);
7331 	mlxsw_reg_recr2_ipv6_dip_enable(recr2_pl);
7332 	mlxsw_sp_mp_hash_field_set(recr2_pl, MLXSW_REG_RECR2_IPV6_NEXT_HEADER);
7333 	if (only_l3) {
7334 		mlxsw_sp_mp_hash_field_set(recr2_pl,
7335 					   MLXSW_REG_RECR2_IPV6_FLOW_LABEL);
7336 	} else {
7337 		mlxsw_sp_mp_hash_header_set(recr2_pl,
7338 					    MLXSW_REG_RECR2_TCP_UDP_EN_IPV6);
7339 		mlxsw_sp_mp_hash_field_set(recr2_pl,
7340 					   MLXSW_REG_RECR2_TCP_UDP_SPORT);
7341 		mlxsw_sp_mp_hash_field_set(recr2_pl,
7342 					   MLXSW_REG_RECR2_TCP_UDP_DPORT);
7343 	}
7344 }
7345 
7346 static int mlxsw_sp_mp_hash_init(struct mlxsw_sp *mlxsw_sp)
7347 {
7348 	char recr2_pl[MLXSW_REG_RECR2_LEN];
7349 	u32 seed;
7350 
7351 	get_random_bytes(&seed, sizeof(seed));
7352 	mlxsw_reg_recr2_pack(recr2_pl, seed);
7353 	mlxsw_sp_mp4_hash_init(recr2_pl);
7354 	mlxsw_sp_mp6_hash_init(recr2_pl);
7355 
7356 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(recr2), recr2_pl);
7357 }
7358 #else
7359 static int mlxsw_sp_mp_hash_init(struct mlxsw_sp *mlxsw_sp)
7360 {
7361 	return 0;
7362 }
7363 #endif
7364 
7365 static int mlxsw_sp_dscp_init(struct mlxsw_sp *mlxsw_sp)
7366 {
7367 	char rdpm_pl[MLXSW_REG_RDPM_LEN];
7368 	unsigned int i;
7369 
7370 	MLXSW_REG_ZERO(rdpm, rdpm_pl);
7371 
7372 	/* HW is determining switch priority based on DSCP-bits, but the
7373 	 * kernel is still doing that based on the ToS. Since there's a
7374 	 * mismatch in bits we need to make sure to translate the right
7375 	 * value ToS would observe, skipping the 2 least-significant ECN bits.
7376 	 */
7377 	for (i = 0; i < MLXSW_REG_RDPM_DSCP_ENTRY_REC_MAX_COUNT; i++)
7378 		mlxsw_reg_rdpm_pack(rdpm_pl, i, rt_tos2priority(i << 2));
7379 
7380 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rdpm), rdpm_pl);
7381 }
7382 
7383 static int __mlxsw_sp_router_init(struct mlxsw_sp *mlxsw_sp)
7384 {
7385 	char rgcr_pl[MLXSW_REG_RGCR_LEN];
7386 	u64 max_rifs;
7387 	int err;
7388 
7389 	if (!MLXSW_CORE_RES_VALID(mlxsw_sp->core, MAX_RIFS))
7390 		return -EIO;
7391 	max_rifs = MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_RIFS);
7392 
7393 	mlxsw_reg_rgcr_pack(rgcr_pl, true, true);
7394 	mlxsw_reg_rgcr_max_router_interfaces_set(rgcr_pl, max_rifs);
7395 	mlxsw_reg_rgcr_usp_set(rgcr_pl, true);
7396 	err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rgcr), rgcr_pl);
7397 	if (err)
7398 		return err;
7399 	return 0;
7400 }
7401 
7402 static void __mlxsw_sp_router_fini(struct mlxsw_sp *mlxsw_sp)
7403 {
7404 	char rgcr_pl[MLXSW_REG_RGCR_LEN];
7405 
7406 	mlxsw_reg_rgcr_pack(rgcr_pl, false, false);
7407 	mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rgcr), rgcr_pl);
7408 }
7409 
7410 int mlxsw_sp_router_init(struct mlxsw_sp *mlxsw_sp)
7411 {
7412 	struct mlxsw_sp_router *router;
7413 	int err;
7414 
7415 	router = kzalloc(sizeof(*mlxsw_sp->router), GFP_KERNEL);
7416 	if (!router)
7417 		return -ENOMEM;
7418 	mlxsw_sp->router = router;
7419 	router->mlxsw_sp = mlxsw_sp;
7420 
7421 	INIT_LIST_HEAD(&mlxsw_sp->router->nexthop_neighs_list);
7422 	err = __mlxsw_sp_router_init(mlxsw_sp);
7423 	if (err)
7424 		goto err_router_init;
7425 
7426 	err = mlxsw_sp_rifs_init(mlxsw_sp);
7427 	if (err)
7428 		goto err_rifs_init;
7429 
7430 	err = mlxsw_sp_ipips_init(mlxsw_sp);
7431 	if (err)
7432 		goto err_ipips_init;
7433 
7434 	err = rhashtable_init(&mlxsw_sp->router->nexthop_ht,
7435 			      &mlxsw_sp_nexthop_ht_params);
7436 	if (err)
7437 		goto err_nexthop_ht_init;
7438 
7439 	err = rhashtable_init(&mlxsw_sp->router->nexthop_group_ht,
7440 			      &mlxsw_sp_nexthop_group_ht_params);
7441 	if (err)
7442 		goto err_nexthop_group_ht_init;
7443 
7444 	INIT_LIST_HEAD(&mlxsw_sp->router->nexthop_list);
7445 	err = mlxsw_sp_lpm_init(mlxsw_sp);
7446 	if (err)
7447 		goto err_lpm_init;
7448 
7449 	err = mlxsw_sp_mr_init(mlxsw_sp, &mlxsw_sp_mr_tcam_ops);
7450 	if (err)
7451 		goto err_mr_init;
7452 
7453 	err = mlxsw_sp_vrs_init(mlxsw_sp);
7454 	if (err)
7455 		goto err_vrs_init;
7456 
7457 	err = mlxsw_sp_neigh_init(mlxsw_sp);
7458 	if (err)
7459 		goto err_neigh_init;
7460 
7461 	mlxsw_sp->router->netevent_nb.notifier_call =
7462 		mlxsw_sp_router_netevent_event;
7463 	err = register_netevent_notifier(&mlxsw_sp->router->netevent_nb);
7464 	if (err)
7465 		goto err_register_netevent_notifier;
7466 
7467 	err = mlxsw_sp_mp_hash_init(mlxsw_sp);
7468 	if (err)
7469 		goto err_mp_hash_init;
7470 
7471 	err = mlxsw_sp_dscp_init(mlxsw_sp);
7472 	if (err)
7473 		goto err_dscp_init;
7474 
7475 	mlxsw_sp->router->fib_nb.notifier_call = mlxsw_sp_router_fib_event;
7476 	err = register_fib_notifier(&mlxsw_sp->router->fib_nb,
7477 				    mlxsw_sp_router_fib_dump_flush);
7478 	if (err)
7479 		goto err_register_fib_notifier;
7480 
7481 	return 0;
7482 
7483 err_register_fib_notifier:
7484 err_dscp_init:
7485 err_mp_hash_init:
7486 	unregister_netevent_notifier(&mlxsw_sp->router->netevent_nb);
7487 err_register_netevent_notifier:
7488 	mlxsw_sp_neigh_fini(mlxsw_sp);
7489 err_neigh_init:
7490 	mlxsw_sp_vrs_fini(mlxsw_sp);
7491 err_vrs_init:
7492 	mlxsw_sp_mr_fini(mlxsw_sp);
7493 err_mr_init:
7494 	mlxsw_sp_lpm_fini(mlxsw_sp);
7495 err_lpm_init:
7496 	rhashtable_destroy(&mlxsw_sp->router->nexthop_group_ht);
7497 err_nexthop_group_ht_init:
7498 	rhashtable_destroy(&mlxsw_sp->router->nexthop_ht);
7499 err_nexthop_ht_init:
7500 	mlxsw_sp_ipips_fini(mlxsw_sp);
7501 err_ipips_init:
7502 	mlxsw_sp_rifs_fini(mlxsw_sp);
7503 err_rifs_init:
7504 	__mlxsw_sp_router_fini(mlxsw_sp);
7505 err_router_init:
7506 	kfree(mlxsw_sp->router);
7507 	return err;
7508 }
7509 
7510 void mlxsw_sp_router_fini(struct mlxsw_sp *mlxsw_sp)
7511 {
7512 	unregister_fib_notifier(&mlxsw_sp->router->fib_nb);
7513 	unregister_netevent_notifier(&mlxsw_sp->router->netevent_nb);
7514 	mlxsw_sp_neigh_fini(mlxsw_sp);
7515 	mlxsw_sp_vrs_fini(mlxsw_sp);
7516 	mlxsw_sp_mr_fini(mlxsw_sp);
7517 	mlxsw_sp_lpm_fini(mlxsw_sp);
7518 	rhashtable_destroy(&mlxsw_sp->router->nexthop_group_ht);
7519 	rhashtable_destroy(&mlxsw_sp->router->nexthop_ht);
7520 	mlxsw_sp_ipips_fini(mlxsw_sp);
7521 	mlxsw_sp_rifs_fini(mlxsw_sp);
7522 	__mlxsw_sp_router_fini(mlxsw_sp);
7523 	kfree(mlxsw_sp->router);
7524 }
7525