1 // SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0
2 /* Copyright (c) 2016-2018 Mellanox Technologies. All rights reserved */
3 
4 #include <linux/kernel.h>
5 #include <linux/types.h>
6 #include <linux/rhashtable.h>
7 #include <linux/bitops.h>
8 #include <linux/in6.h>
9 #include <linux/notifier.h>
10 #include <linux/inetdevice.h>
11 #include <linux/netdevice.h>
12 #include <linux/if_bridge.h>
13 #include <linux/socket.h>
14 #include <linux/route.h>
15 #include <linux/gcd.h>
16 #include <linux/if_macvlan.h>
17 #include <linux/refcount.h>
18 #include <linux/jhash.h>
19 #include <linux/net_namespace.h>
20 #include <net/netevent.h>
21 #include <net/neighbour.h>
22 #include <net/arp.h>
23 #include <net/ip_fib.h>
24 #include <net/ip6_fib.h>
25 #include <net/nexthop.h>
26 #include <net/fib_rules.h>
27 #include <net/ip_tunnels.h>
28 #include <net/l3mdev.h>
29 #include <net/addrconf.h>
30 #include <net/ndisc.h>
31 #include <net/ipv6.h>
32 #include <net/fib_notifier.h>
33 #include <net/switchdev.h>
34 
35 #include "spectrum.h"
36 #include "core.h"
37 #include "reg.h"
38 #include "spectrum_cnt.h"
39 #include "spectrum_dpipe.h"
40 #include "spectrum_ipip.h"
41 #include "spectrum_mr.h"
42 #include "spectrum_mr_tcam.h"
43 #include "spectrum_router.h"
44 #include "spectrum_span.h"
45 
46 struct mlxsw_sp_fib;
47 struct mlxsw_sp_vr;
48 struct mlxsw_sp_lpm_tree;
49 struct mlxsw_sp_rif_ops;
50 
51 struct mlxsw_sp_router {
52 	struct mlxsw_sp *mlxsw_sp;
53 	struct mlxsw_sp_rif **rifs;
54 	struct mlxsw_sp_vr *vrs;
55 	struct rhashtable neigh_ht;
56 	struct rhashtable nexthop_group_ht;
57 	struct rhashtable nexthop_ht;
58 	struct list_head nexthop_list;
59 	struct {
60 		/* One tree for each protocol: IPv4 and IPv6 */
61 		struct mlxsw_sp_lpm_tree *proto_trees[2];
62 		struct mlxsw_sp_lpm_tree *trees;
63 		unsigned int tree_count;
64 	} lpm;
65 	struct {
66 		struct delayed_work dw;
67 		unsigned long interval;	/* ms */
68 	} neighs_update;
69 	struct delayed_work nexthop_probe_dw;
70 #define MLXSW_SP_UNRESOLVED_NH_PROBE_INTERVAL 5000 /* ms */
71 	struct list_head nexthop_neighs_list;
72 	struct list_head ipip_list;
73 	bool aborted;
74 	struct notifier_block fib_nb;
75 	struct notifier_block netevent_nb;
76 	struct notifier_block inetaddr_nb;
77 	struct notifier_block inet6addr_nb;
78 	const struct mlxsw_sp_rif_ops **rif_ops_arr;
79 	const struct mlxsw_sp_ipip_ops **ipip_ops_arr;
80 	u32 adj_discard_index;
81 	bool adj_discard_index_valid;
82 };
83 
84 struct mlxsw_sp_rif {
85 	struct list_head nexthop_list;
86 	struct list_head neigh_list;
87 	struct net_device *dev; /* NULL for underlay RIF */
88 	struct mlxsw_sp_fid *fid;
89 	unsigned char addr[ETH_ALEN];
90 	int mtu;
91 	u16 rif_index;
92 	u16 vr_id;
93 	const struct mlxsw_sp_rif_ops *ops;
94 	struct mlxsw_sp *mlxsw_sp;
95 
96 	unsigned int counter_ingress;
97 	bool counter_ingress_valid;
98 	unsigned int counter_egress;
99 	bool counter_egress_valid;
100 };
101 
102 struct mlxsw_sp_rif_params {
103 	struct net_device *dev;
104 	union {
105 		u16 system_port;
106 		u16 lag_id;
107 	};
108 	u16 vid;
109 	bool lag;
110 };
111 
112 struct mlxsw_sp_rif_subport {
113 	struct mlxsw_sp_rif common;
114 	refcount_t ref_count;
115 	union {
116 		u16 system_port;
117 		u16 lag_id;
118 	};
119 	u16 vid;
120 	bool lag;
121 };
122 
123 struct mlxsw_sp_rif_ipip_lb {
124 	struct mlxsw_sp_rif common;
125 	struct mlxsw_sp_rif_ipip_lb_config lb_config;
126 	u16 ul_vr_id; /* Reserved for Spectrum-2. */
127 	u16 ul_rif_id; /* Reserved for Spectrum. */
128 };
129 
130 struct mlxsw_sp_rif_params_ipip_lb {
131 	struct mlxsw_sp_rif_params common;
132 	struct mlxsw_sp_rif_ipip_lb_config lb_config;
133 };
134 
135 struct mlxsw_sp_rif_ops {
136 	enum mlxsw_sp_rif_type type;
137 	size_t rif_size;
138 
139 	void (*setup)(struct mlxsw_sp_rif *rif,
140 		      const struct mlxsw_sp_rif_params *params);
141 	int (*configure)(struct mlxsw_sp_rif *rif);
142 	void (*deconfigure)(struct mlxsw_sp_rif *rif);
143 	struct mlxsw_sp_fid * (*fid_get)(struct mlxsw_sp_rif *rif,
144 					 struct netlink_ext_ack *extack);
145 	void (*fdb_del)(struct mlxsw_sp_rif *rif, const char *mac);
146 };
147 
148 static void mlxsw_sp_rif_destroy(struct mlxsw_sp_rif *rif);
149 static void mlxsw_sp_lpm_tree_hold(struct mlxsw_sp_lpm_tree *lpm_tree);
150 static void mlxsw_sp_lpm_tree_put(struct mlxsw_sp *mlxsw_sp,
151 				  struct mlxsw_sp_lpm_tree *lpm_tree);
152 static int mlxsw_sp_vr_lpm_tree_bind(struct mlxsw_sp *mlxsw_sp,
153 				     const struct mlxsw_sp_fib *fib,
154 				     u8 tree_id);
155 static int mlxsw_sp_vr_lpm_tree_unbind(struct mlxsw_sp *mlxsw_sp,
156 				       const struct mlxsw_sp_fib *fib);
157 
158 static unsigned int *
159 mlxsw_sp_rif_p_counter_get(struct mlxsw_sp_rif *rif,
160 			   enum mlxsw_sp_rif_counter_dir dir)
161 {
162 	switch (dir) {
163 	case MLXSW_SP_RIF_COUNTER_EGRESS:
164 		return &rif->counter_egress;
165 	case MLXSW_SP_RIF_COUNTER_INGRESS:
166 		return &rif->counter_ingress;
167 	}
168 	return NULL;
169 }
170 
171 static bool
172 mlxsw_sp_rif_counter_valid_get(struct mlxsw_sp_rif *rif,
173 			       enum mlxsw_sp_rif_counter_dir dir)
174 {
175 	switch (dir) {
176 	case MLXSW_SP_RIF_COUNTER_EGRESS:
177 		return rif->counter_egress_valid;
178 	case MLXSW_SP_RIF_COUNTER_INGRESS:
179 		return rif->counter_ingress_valid;
180 	}
181 	return false;
182 }
183 
184 static void
185 mlxsw_sp_rif_counter_valid_set(struct mlxsw_sp_rif *rif,
186 			       enum mlxsw_sp_rif_counter_dir dir,
187 			       bool valid)
188 {
189 	switch (dir) {
190 	case MLXSW_SP_RIF_COUNTER_EGRESS:
191 		rif->counter_egress_valid = valid;
192 		break;
193 	case MLXSW_SP_RIF_COUNTER_INGRESS:
194 		rif->counter_ingress_valid = valid;
195 		break;
196 	}
197 }
198 
199 static int mlxsw_sp_rif_counter_edit(struct mlxsw_sp *mlxsw_sp, u16 rif_index,
200 				     unsigned int counter_index, bool enable,
201 				     enum mlxsw_sp_rif_counter_dir dir)
202 {
203 	char ritr_pl[MLXSW_REG_RITR_LEN];
204 	bool is_egress = false;
205 	int err;
206 
207 	if (dir == MLXSW_SP_RIF_COUNTER_EGRESS)
208 		is_egress = true;
209 	mlxsw_reg_ritr_rif_pack(ritr_pl, rif_index);
210 	err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
211 	if (err)
212 		return err;
213 
214 	mlxsw_reg_ritr_counter_pack(ritr_pl, counter_index, enable,
215 				    is_egress);
216 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
217 }
218 
219 int mlxsw_sp_rif_counter_value_get(struct mlxsw_sp *mlxsw_sp,
220 				   struct mlxsw_sp_rif *rif,
221 				   enum mlxsw_sp_rif_counter_dir dir, u64 *cnt)
222 {
223 	char ricnt_pl[MLXSW_REG_RICNT_LEN];
224 	unsigned int *p_counter_index;
225 	bool valid;
226 	int err;
227 
228 	valid = mlxsw_sp_rif_counter_valid_get(rif, dir);
229 	if (!valid)
230 		return -EINVAL;
231 
232 	p_counter_index = mlxsw_sp_rif_p_counter_get(rif, dir);
233 	if (!p_counter_index)
234 		return -EINVAL;
235 	mlxsw_reg_ricnt_pack(ricnt_pl, *p_counter_index,
236 			     MLXSW_REG_RICNT_OPCODE_NOP);
237 	err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(ricnt), ricnt_pl);
238 	if (err)
239 		return err;
240 	*cnt = mlxsw_reg_ricnt_good_unicast_packets_get(ricnt_pl);
241 	return 0;
242 }
243 
244 static int mlxsw_sp_rif_counter_clear(struct mlxsw_sp *mlxsw_sp,
245 				      unsigned int counter_index)
246 {
247 	char ricnt_pl[MLXSW_REG_RICNT_LEN];
248 
249 	mlxsw_reg_ricnt_pack(ricnt_pl, counter_index,
250 			     MLXSW_REG_RICNT_OPCODE_CLEAR);
251 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ricnt), ricnt_pl);
252 }
253 
254 int mlxsw_sp_rif_counter_alloc(struct mlxsw_sp *mlxsw_sp,
255 			       struct mlxsw_sp_rif *rif,
256 			       enum mlxsw_sp_rif_counter_dir dir)
257 {
258 	unsigned int *p_counter_index;
259 	int err;
260 
261 	p_counter_index = mlxsw_sp_rif_p_counter_get(rif, dir);
262 	if (!p_counter_index)
263 		return -EINVAL;
264 	err = mlxsw_sp_counter_alloc(mlxsw_sp, MLXSW_SP_COUNTER_SUB_POOL_RIF,
265 				     p_counter_index);
266 	if (err)
267 		return err;
268 
269 	err = mlxsw_sp_rif_counter_clear(mlxsw_sp, *p_counter_index);
270 	if (err)
271 		goto err_counter_clear;
272 
273 	err = mlxsw_sp_rif_counter_edit(mlxsw_sp, rif->rif_index,
274 					*p_counter_index, true, dir);
275 	if (err)
276 		goto err_counter_edit;
277 	mlxsw_sp_rif_counter_valid_set(rif, dir, true);
278 	return 0;
279 
280 err_counter_edit:
281 err_counter_clear:
282 	mlxsw_sp_counter_free(mlxsw_sp, MLXSW_SP_COUNTER_SUB_POOL_RIF,
283 			      *p_counter_index);
284 	return err;
285 }
286 
287 void mlxsw_sp_rif_counter_free(struct mlxsw_sp *mlxsw_sp,
288 			       struct mlxsw_sp_rif *rif,
289 			       enum mlxsw_sp_rif_counter_dir dir)
290 {
291 	unsigned int *p_counter_index;
292 
293 	if (!mlxsw_sp_rif_counter_valid_get(rif, dir))
294 		return;
295 
296 	p_counter_index = mlxsw_sp_rif_p_counter_get(rif, dir);
297 	if (WARN_ON(!p_counter_index))
298 		return;
299 	mlxsw_sp_rif_counter_edit(mlxsw_sp, rif->rif_index,
300 				  *p_counter_index, false, dir);
301 	mlxsw_sp_counter_free(mlxsw_sp, MLXSW_SP_COUNTER_SUB_POOL_RIF,
302 			      *p_counter_index);
303 	mlxsw_sp_rif_counter_valid_set(rif, dir, false);
304 }
305 
306 static void mlxsw_sp_rif_counters_alloc(struct mlxsw_sp_rif *rif)
307 {
308 	struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
309 	struct devlink *devlink;
310 
311 	devlink = priv_to_devlink(mlxsw_sp->core);
312 	if (!devlink_dpipe_table_counter_enabled(devlink,
313 						 MLXSW_SP_DPIPE_TABLE_NAME_ERIF))
314 		return;
315 	mlxsw_sp_rif_counter_alloc(mlxsw_sp, rif, MLXSW_SP_RIF_COUNTER_EGRESS);
316 }
317 
318 static void mlxsw_sp_rif_counters_free(struct mlxsw_sp_rif *rif)
319 {
320 	struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
321 
322 	mlxsw_sp_rif_counter_free(mlxsw_sp, rif, MLXSW_SP_RIF_COUNTER_EGRESS);
323 }
324 
325 #define MLXSW_SP_PREFIX_COUNT (sizeof(struct in6_addr) * BITS_PER_BYTE + 1)
326 
327 struct mlxsw_sp_prefix_usage {
328 	DECLARE_BITMAP(b, MLXSW_SP_PREFIX_COUNT);
329 };
330 
331 #define mlxsw_sp_prefix_usage_for_each(prefix, prefix_usage) \
332 	for_each_set_bit(prefix, (prefix_usage)->b, MLXSW_SP_PREFIX_COUNT)
333 
334 static bool
335 mlxsw_sp_prefix_usage_eq(struct mlxsw_sp_prefix_usage *prefix_usage1,
336 			 struct mlxsw_sp_prefix_usage *prefix_usage2)
337 {
338 	return !memcmp(prefix_usage1, prefix_usage2, sizeof(*prefix_usage1));
339 }
340 
341 static void
342 mlxsw_sp_prefix_usage_cpy(struct mlxsw_sp_prefix_usage *prefix_usage1,
343 			  struct mlxsw_sp_prefix_usage *prefix_usage2)
344 {
345 	memcpy(prefix_usage1, prefix_usage2, sizeof(*prefix_usage1));
346 }
347 
348 static void
349 mlxsw_sp_prefix_usage_set(struct mlxsw_sp_prefix_usage *prefix_usage,
350 			  unsigned char prefix_len)
351 {
352 	set_bit(prefix_len, prefix_usage->b);
353 }
354 
355 static void
356 mlxsw_sp_prefix_usage_clear(struct mlxsw_sp_prefix_usage *prefix_usage,
357 			    unsigned char prefix_len)
358 {
359 	clear_bit(prefix_len, prefix_usage->b);
360 }
361 
362 struct mlxsw_sp_fib_key {
363 	unsigned char addr[sizeof(struct in6_addr)];
364 	unsigned char prefix_len;
365 };
366 
367 enum mlxsw_sp_fib_entry_type {
368 	MLXSW_SP_FIB_ENTRY_TYPE_REMOTE,
369 	MLXSW_SP_FIB_ENTRY_TYPE_LOCAL,
370 	MLXSW_SP_FIB_ENTRY_TYPE_TRAP,
371 	MLXSW_SP_FIB_ENTRY_TYPE_BLACKHOLE,
372 	MLXSW_SP_FIB_ENTRY_TYPE_UNREACHABLE,
373 
374 	/* This is a special case of local delivery, where a packet should be
375 	 * decapsulated on reception. Note that there is no corresponding ENCAP,
376 	 * because that's a type of next hop, not of FIB entry. (There can be
377 	 * several next hops in a REMOTE entry, and some of them may be
378 	 * encapsulating entries.)
379 	 */
380 	MLXSW_SP_FIB_ENTRY_TYPE_IPIP_DECAP,
381 	MLXSW_SP_FIB_ENTRY_TYPE_NVE_DECAP,
382 };
383 
384 struct mlxsw_sp_nexthop_group;
385 struct mlxsw_sp_fib_entry;
386 
387 struct mlxsw_sp_fib_node {
388 	struct mlxsw_sp_fib_entry *fib_entry;
389 	struct list_head list;
390 	struct rhash_head ht_node;
391 	struct mlxsw_sp_fib *fib;
392 	struct mlxsw_sp_fib_key key;
393 };
394 
395 struct mlxsw_sp_fib_entry_decap {
396 	struct mlxsw_sp_ipip_entry *ipip_entry;
397 	u32 tunnel_index;
398 };
399 
400 struct mlxsw_sp_fib_entry {
401 	struct mlxsw_sp_fib_node *fib_node;
402 	enum mlxsw_sp_fib_entry_type type;
403 	struct list_head nexthop_group_node;
404 	struct mlxsw_sp_nexthop_group *nh_group;
405 	struct mlxsw_sp_fib_entry_decap decap; /* Valid for decap entries. */
406 };
407 
408 struct mlxsw_sp_fib4_entry {
409 	struct mlxsw_sp_fib_entry common;
410 	u32 tb_id;
411 	u32 prio;
412 	u8 tos;
413 	u8 type;
414 };
415 
416 struct mlxsw_sp_fib6_entry {
417 	struct mlxsw_sp_fib_entry common;
418 	struct list_head rt6_list;
419 	unsigned int nrt6;
420 };
421 
422 struct mlxsw_sp_rt6 {
423 	struct list_head list;
424 	struct fib6_info *rt;
425 };
426 
427 struct mlxsw_sp_lpm_tree {
428 	u8 id; /* tree ID */
429 	unsigned int ref_count;
430 	enum mlxsw_sp_l3proto proto;
431 	unsigned long prefix_ref_count[MLXSW_SP_PREFIX_COUNT];
432 	struct mlxsw_sp_prefix_usage prefix_usage;
433 };
434 
435 struct mlxsw_sp_fib {
436 	struct rhashtable ht;
437 	struct list_head node_list;
438 	struct mlxsw_sp_vr *vr;
439 	struct mlxsw_sp_lpm_tree *lpm_tree;
440 	enum mlxsw_sp_l3proto proto;
441 };
442 
443 struct mlxsw_sp_vr {
444 	u16 id; /* virtual router ID */
445 	u32 tb_id; /* kernel fib table id */
446 	unsigned int rif_count;
447 	struct mlxsw_sp_fib *fib4;
448 	struct mlxsw_sp_fib *fib6;
449 	struct mlxsw_sp_mr_table *mr_table[MLXSW_SP_L3_PROTO_MAX];
450 	struct mlxsw_sp_rif *ul_rif;
451 	refcount_t ul_rif_refcnt;
452 };
453 
454 static const struct rhashtable_params mlxsw_sp_fib_ht_params;
455 
456 static struct mlxsw_sp_fib *mlxsw_sp_fib_create(struct mlxsw_sp *mlxsw_sp,
457 						struct mlxsw_sp_vr *vr,
458 						enum mlxsw_sp_l3proto proto)
459 {
460 	struct mlxsw_sp_lpm_tree *lpm_tree;
461 	struct mlxsw_sp_fib *fib;
462 	int err;
463 
464 	lpm_tree = mlxsw_sp->router->lpm.proto_trees[proto];
465 	fib = kzalloc(sizeof(*fib), GFP_KERNEL);
466 	if (!fib)
467 		return ERR_PTR(-ENOMEM);
468 	err = rhashtable_init(&fib->ht, &mlxsw_sp_fib_ht_params);
469 	if (err)
470 		goto err_rhashtable_init;
471 	INIT_LIST_HEAD(&fib->node_list);
472 	fib->proto = proto;
473 	fib->vr = vr;
474 	fib->lpm_tree = lpm_tree;
475 	mlxsw_sp_lpm_tree_hold(lpm_tree);
476 	err = mlxsw_sp_vr_lpm_tree_bind(mlxsw_sp, fib, lpm_tree->id);
477 	if (err)
478 		goto err_lpm_tree_bind;
479 	return fib;
480 
481 err_lpm_tree_bind:
482 	mlxsw_sp_lpm_tree_put(mlxsw_sp, lpm_tree);
483 err_rhashtable_init:
484 	kfree(fib);
485 	return ERR_PTR(err);
486 }
487 
488 static void mlxsw_sp_fib_destroy(struct mlxsw_sp *mlxsw_sp,
489 				 struct mlxsw_sp_fib *fib)
490 {
491 	mlxsw_sp_vr_lpm_tree_unbind(mlxsw_sp, fib);
492 	mlxsw_sp_lpm_tree_put(mlxsw_sp, fib->lpm_tree);
493 	WARN_ON(!list_empty(&fib->node_list));
494 	rhashtable_destroy(&fib->ht);
495 	kfree(fib);
496 }
497 
498 static struct mlxsw_sp_lpm_tree *
499 mlxsw_sp_lpm_tree_find_unused(struct mlxsw_sp *mlxsw_sp)
500 {
501 	static struct mlxsw_sp_lpm_tree *lpm_tree;
502 	int i;
503 
504 	for (i = 0; i < mlxsw_sp->router->lpm.tree_count; i++) {
505 		lpm_tree = &mlxsw_sp->router->lpm.trees[i];
506 		if (lpm_tree->ref_count == 0)
507 			return lpm_tree;
508 	}
509 	return NULL;
510 }
511 
512 static int mlxsw_sp_lpm_tree_alloc(struct mlxsw_sp *mlxsw_sp,
513 				   struct mlxsw_sp_lpm_tree *lpm_tree)
514 {
515 	char ralta_pl[MLXSW_REG_RALTA_LEN];
516 
517 	mlxsw_reg_ralta_pack(ralta_pl, true,
518 			     (enum mlxsw_reg_ralxx_protocol) lpm_tree->proto,
519 			     lpm_tree->id);
520 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralta), ralta_pl);
521 }
522 
523 static void mlxsw_sp_lpm_tree_free(struct mlxsw_sp *mlxsw_sp,
524 				   struct mlxsw_sp_lpm_tree *lpm_tree)
525 {
526 	char ralta_pl[MLXSW_REG_RALTA_LEN];
527 
528 	mlxsw_reg_ralta_pack(ralta_pl, false,
529 			     (enum mlxsw_reg_ralxx_protocol) lpm_tree->proto,
530 			     lpm_tree->id);
531 	mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralta), ralta_pl);
532 }
533 
534 static int
535 mlxsw_sp_lpm_tree_left_struct_set(struct mlxsw_sp *mlxsw_sp,
536 				  struct mlxsw_sp_prefix_usage *prefix_usage,
537 				  struct mlxsw_sp_lpm_tree *lpm_tree)
538 {
539 	char ralst_pl[MLXSW_REG_RALST_LEN];
540 	u8 root_bin = 0;
541 	u8 prefix;
542 	u8 last_prefix = MLXSW_REG_RALST_BIN_NO_CHILD;
543 
544 	mlxsw_sp_prefix_usage_for_each(prefix, prefix_usage)
545 		root_bin = prefix;
546 
547 	mlxsw_reg_ralst_pack(ralst_pl, root_bin, lpm_tree->id);
548 	mlxsw_sp_prefix_usage_for_each(prefix, prefix_usage) {
549 		if (prefix == 0)
550 			continue;
551 		mlxsw_reg_ralst_bin_pack(ralst_pl, prefix, last_prefix,
552 					 MLXSW_REG_RALST_BIN_NO_CHILD);
553 		last_prefix = prefix;
554 	}
555 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralst), ralst_pl);
556 }
557 
558 static struct mlxsw_sp_lpm_tree *
559 mlxsw_sp_lpm_tree_create(struct mlxsw_sp *mlxsw_sp,
560 			 struct mlxsw_sp_prefix_usage *prefix_usage,
561 			 enum mlxsw_sp_l3proto proto)
562 {
563 	struct mlxsw_sp_lpm_tree *lpm_tree;
564 	int err;
565 
566 	lpm_tree = mlxsw_sp_lpm_tree_find_unused(mlxsw_sp);
567 	if (!lpm_tree)
568 		return ERR_PTR(-EBUSY);
569 	lpm_tree->proto = proto;
570 	err = mlxsw_sp_lpm_tree_alloc(mlxsw_sp, lpm_tree);
571 	if (err)
572 		return ERR_PTR(err);
573 
574 	err = mlxsw_sp_lpm_tree_left_struct_set(mlxsw_sp, prefix_usage,
575 						lpm_tree);
576 	if (err)
577 		goto err_left_struct_set;
578 	memcpy(&lpm_tree->prefix_usage, prefix_usage,
579 	       sizeof(lpm_tree->prefix_usage));
580 	memset(&lpm_tree->prefix_ref_count, 0,
581 	       sizeof(lpm_tree->prefix_ref_count));
582 	lpm_tree->ref_count = 1;
583 	return lpm_tree;
584 
585 err_left_struct_set:
586 	mlxsw_sp_lpm_tree_free(mlxsw_sp, lpm_tree);
587 	return ERR_PTR(err);
588 }
589 
590 static void mlxsw_sp_lpm_tree_destroy(struct mlxsw_sp *mlxsw_sp,
591 				      struct mlxsw_sp_lpm_tree *lpm_tree)
592 {
593 	mlxsw_sp_lpm_tree_free(mlxsw_sp, lpm_tree);
594 }
595 
596 static struct mlxsw_sp_lpm_tree *
597 mlxsw_sp_lpm_tree_get(struct mlxsw_sp *mlxsw_sp,
598 		      struct mlxsw_sp_prefix_usage *prefix_usage,
599 		      enum mlxsw_sp_l3proto proto)
600 {
601 	struct mlxsw_sp_lpm_tree *lpm_tree;
602 	int i;
603 
604 	for (i = 0; i < mlxsw_sp->router->lpm.tree_count; i++) {
605 		lpm_tree = &mlxsw_sp->router->lpm.trees[i];
606 		if (lpm_tree->ref_count != 0 &&
607 		    lpm_tree->proto == proto &&
608 		    mlxsw_sp_prefix_usage_eq(&lpm_tree->prefix_usage,
609 					     prefix_usage)) {
610 			mlxsw_sp_lpm_tree_hold(lpm_tree);
611 			return lpm_tree;
612 		}
613 	}
614 	return mlxsw_sp_lpm_tree_create(mlxsw_sp, prefix_usage, proto);
615 }
616 
617 static void mlxsw_sp_lpm_tree_hold(struct mlxsw_sp_lpm_tree *lpm_tree)
618 {
619 	lpm_tree->ref_count++;
620 }
621 
622 static void mlxsw_sp_lpm_tree_put(struct mlxsw_sp *mlxsw_sp,
623 				  struct mlxsw_sp_lpm_tree *lpm_tree)
624 {
625 	if (--lpm_tree->ref_count == 0)
626 		mlxsw_sp_lpm_tree_destroy(mlxsw_sp, lpm_tree);
627 }
628 
629 #define MLXSW_SP_LPM_TREE_MIN 1 /* tree 0 is reserved */
630 
631 static int mlxsw_sp_lpm_init(struct mlxsw_sp *mlxsw_sp)
632 {
633 	struct mlxsw_sp_prefix_usage req_prefix_usage = {{ 0 } };
634 	struct mlxsw_sp_lpm_tree *lpm_tree;
635 	u64 max_trees;
636 	int err, i;
637 
638 	if (!MLXSW_CORE_RES_VALID(mlxsw_sp->core, MAX_LPM_TREES))
639 		return -EIO;
640 
641 	max_trees = MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_LPM_TREES);
642 	mlxsw_sp->router->lpm.tree_count = max_trees - MLXSW_SP_LPM_TREE_MIN;
643 	mlxsw_sp->router->lpm.trees = kcalloc(mlxsw_sp->router->lpm.tree_count,
644 					     sizeof(struct mlxsw_sp_lpm_tree),
645 					     GFP_KERNEL);
646 	if (!mlxsw_sp->router->lpm.trees)
647 		return -ENOMEM;
648 
649 	for (i = 0; i < mlxsw_sp->router->lpm.tree_count; i++) {
650 		lpm_tree = &mlxsw_sp->router->lpm.trees[i];
651 		lpm_tree->id = i + MLXSW_SP_LPM_TREE_MIN;
652 	}
653 
654 	lpm_tree = mlxsw_sp_lpm_tree_get(mlxsw_sp, &req_prefix_usage,
655 					 MLXSW_SP_L3_PROTO_IPV4);
656 	if (IS_ERR(lpm_tree)) {
657 		err = PTR_ERR(lpm_tree);
658 		goto err_ipv4_tree_get;
659 	}
660 	mlxsw_sp->router->lpm.proto_trees[MLXSW_SP_L3_PROTO_IPV4] = lpm_tree;
661 
662 	lpm_tree = mlxsw_sp_lpm_tree_get(mlxsw_sp, &req_prefix_usage,
663 					 MLXSW_SP_L3_PROTO_IPV6);
664 	if (IS_ERR(lpm_tree)) {
665 		err = PTR_ERR(lpm_tree);
666 		goto err_ipv6_tree_get;
667 	}
668 	mlxsw_sp->router->lpm.proto_trees[MLXSW_SP_L3_PROTO_IPV6] = lpm_tree;
669 
670 	return 0;
671 
672 err_ipv6_tree_get:
673 	lpm_tree = mlxsw_sp->router->lpm.proto_trees[MLXSW_SP_L3_PROTO_IPV4];
674 	mlxsw_sp_lpm_tree_put(mlxsw_sp, lpm_tree);
675 err_ipv4_tree_get:
676 	kfree(mlxsw_sp->router->lpm.trees);
677 	return err;
678 }
679 
680 static void mlxsw_sp_lpm_fini(struct mlxsw_sp *mlxsw_sp)
681 {
682 	struct mlxsw_sp_lpm_tree *lpm_tree;
683 
684 	lpm_tree = mlxsw_sp->router->lpm.proto_trees[MLXSW_SP_L3_PROTO_IPV6];
685 	mlxsw_sp_lpm_tree_put(mlxsw_sp, lpm_tree);
686 
687 	lpm_tree = mlxsw_sp->router->lpm.proto_trees[MLXSW_SP_L3_PROTO_IPV4];
688 	mlxsw_sp_lpm_tree_put(mlxsw_sp, lpm_tree);
689 
690 	kfree(mlxsw_sp->router->lpm.trees);
691 }
692 
693 static bool mlxsw_sp_vr_is_used(const struct mlxsw_sp_vr *vr)
694 {
695 	return !!vr->fib4 || !!vr->fib6 ||
696 	       !!vr->mr_table[MLXSW_SP_L3_PROTO_IPV4] ||
697 	       !!vr->mr_table[MLXSW_SP_L3_PROTO_IPV6];
698 }
699 
700 static struct mlxsw_sp_vr *mlxsw_sp_vr_find_unused(struct mlxsw_sp *mlxsw_sp)
701 {
702 	struct mlxsw_sp_vr *vr;
703 	int i;
704 
705 	for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_VRS); i++) {
706 		vr = &mlxsw_sp->router->vrs[i];
707 		if (!mlxsw_sp_vr_is_used(vr))
708 			return vr;
709 	}
710 	return NULL;
711 }
712 
713 static int mlxsw_sp_vr_lpm_tree_bind(struct mlxsw_sp *mlxsw_sp,
714 				     const struct mlxsw_sp_fib *fib, u8 tree_id)
715 {
716 	char raltb_pl[MLXSW_REG_RALTB_LEN];
717 
718 	mlxsw_reg_raltb_pack(raltb_pl, fib->vr->id,
719 			     (enum mlxsw_reg_ralxx_protocol) fib->proto,
720 			     tree_id);
721 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(raltb), raltb_pl);
722 }
723 
724 static int mlxsw_sp_vr_lpm_tree_unbind(struct mlxsw_sp *mlxsw_sp,
725 				       const struct mlxsw_sp_fib *fib)
726 {
727 	char raltb_pl[MLXSW_REG_RALTB_LEN];
728 
729 	/* Bind to tree 0 which is default */
730 	mlxsw_reg_raltb_pack(raltb_pl, fib->vr->id,
731 			     (enum mlxsw_reg_ralxx_protocol) fib->proto, 0);
732 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(raltb), raltb_pl);
733 }
734 
735 static u32 mlxsw_sp_fix_tb_id(u32 tb_id)
736 {
737 	/* For our purpose, squash main, default and local tables into one */
738 	if (tb_id == RT_TABLE_LOCAL || tb_id == RT_TABLE_DEFAULT)
739 		tb_id = RT_TABLE_MAIN;
740 	return tb_id;
741 }
742 
743 static struct mlxsw_sp_vr *mlxsw_sp_vr_find(struct mlxsw_sp *mlxsw_sp,
744 					    u32 tb_id)
745 {
746 	struct mlxsw_sp_vr *vr;
747 	int i;
748 
749 	tb_id = mlxsw_sp_fix_tb_id(tb_id);
750 
751 	for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_VRS); i++) {
752 		vr = &mlxsw_sp->router->vrs[i];
753 		if (mlxsw_sp_vr_is_used(vr) && vr->tb_id == tb_id)
754 			return vr;
755 	}
756 	return NULL;
757 }
758 
759 int mlxsw_sp_router_tb_id_vr_id(struct mlxsw_sp *mlxsw_sp, u32 tb_id,
760 				u16 *vr_id)
761 {
762 	struct mlxsw_sp_vr *vr;
763 
764 	vr = mlxsw_sp_vr_find(mlxsw_sp, tb_id);
765 	if (!vr)
766 		return -ESRCH;
767 	*vr_id = vr->id;
768 
769 	return 0;
770 }
771 
772 static struct mlxsw_sp_fib *mlxsw_sp_vr_fib(const struct mlxsw_sp_vr *vr,
773 					    enum mlxsw_sp_l3proto proto)
774 {
775 	switch (proto) {
776 	case MLXSW_SP_L3_PROTO_IPV4:
777 		return vr->fib4;
778 	case MLXSW_SP_L3_PROTO_IPV6:
779 		return vr->fib6;
780 	}
781 	return NULL;
782 }
783 
784 static struct mlxsw_sp_vr *mlxsw_sp_vr_create(struct mlxsw_sp *mlxsw_sp,
785 					      u32 tb_id,
786 					      struct netlink_ext_ack *extack)
787 {
788 	struct mlxsw_sp_mr_table *mr4_table, *mr6_table;
789 	struct mlxsw_sp_fib *fib4;
790 	struct mlxsw_sp_fib *fib6;
791 	struct mlxsw_sp_vr *vr;
792 	int err;
793 
794 	vr = mlxsw_sp_vr_find_unused(mlxsw_sp);
795 	if (!vr) {
796 		NL_SET_ERR_MSG_MOD(extack, "Exceeded number of supported virtual routers");
797 		return ERR_PTR(-EBUSY);
798 	}
799 	fib4 = mlxsw_sp_fib_create(mlxsw_sp, vr, MLXSW_SP_L3_PROTO_IPV4);
800 	if (IS_ERR(fib4))
801 		return ERR_CAST(fib4);
802 	fib6 = mlxsw_sp_fib_create(mlxsw_sp, vr, MLXSW_SP_L3_PROTO_IPV6);
803 	if (IS_ERR(fib6)) {
804 		err = PTR_ERR(fib6);
805 		goto err_fib6_create;
806 	}
807 	mr4_table = mlxsw_sp_mr_table_create(mlxsw_sp, vr->id,
808 					     MLXSW_SP_L3_PROTO_IPV4);
809 	if (IS_ERR(mr4_table)) {
810 		err = PTR_ERR(mr4_table);
811 		goto err_mr4_table_create;
812 	}
813 	mr6_table = mlxsw_sp_mr_table_create(mlxsw_sp, vr->id,
814 					     MLXSW_SP_L3_PROTO_IPV6);
815 	if (IS_ERR(mr6_table)) {
816 		err = PTR_ERR(mr6_table);
817 		goto err_mr6_table_create;
818 	}
819 
820 	vr->fib4 = fib4;
821 	vr->fib6 = fib6;
822 	vr->mr_table[MLXSW_SP_L3_PROTO_IPV4] = mr4_table;
823 	vr->mr_table[MLXSW_SP_L3_PROTO_IPV6] = mr6_table;
824 	vr->tb_id = tb_id;
825 	return vr;
826 
827 err_mr6_table_create:
828 	mlxsw_sp_mr_table_destroy(mr4_table);
829 err_mr4_table_create:
830 	mlxsw_sp_fib_destroy(mlxsw_sp, fib6);
831 err_fib6_create:
832 	mlxsw_sp_fib_destroy(mlxsw_sp, fib4);
833 	return ERR_PTR(err);
834 }
835 
836 static void mlxsw_sp_vr_destroy(struct mlxsw_sp *mlxsw_sp,
837 				struct mlxsw_sp_vr *vr)
838 {
839 	mlxsw_sp_mr_table_destroy(vr->mr_table[MLXSW_SP_L3_PROTO_IPV6]);
840 	vr->mr_table[MLXSW_SP_L3_PROTO_IPV6] = NULL;
841 	mlxsw_sp_mr_table_destroy(vr->mr_table[MLXSW_SP_L3_PROTO_IPV4]);
842 	vr->mr_table[MLXSW_SP_L3_PROTO_IPV4] = NULL;
843 	mlxsw_sp_fib_destroy(mlxsw_sp, vr->fib6);
844 	vr->fib6 = NULL;
845 	mlxsw_sp_fib_destroy(mlxsw_sp, vr->fib4);
846 	vr->fib4 = NULL;
847 }
848 
849 static struct mlxsw_sp_vr *mlxsw_sp_vr_get(struct mlxsw_sp *mlxsw_sp, u32 tb_id,
850 					   struct netlink_ext_ack *extack)
851 {
852 	struct mlxsw_sp_vr *vr;
853 
854 	tb_id = mlxsw_sp_fix_tb_id(tb_id);
855 	vr = mlxsw_sp_vr_find(mlxsw_sp, tb_id);
856 	if (!vr)
857 		vr = mlxsw_sp_vr_create(mlxsw_sp, tb_id, extack);
858 	return vr;
859 }
860 
861 static void mlxsw_sp_vr_put(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_vr *vr)
862 {
863 	if (!vr->rif_count && list_empty(&vr->fib4->node_list) &&
864 	    list_empty(&vr->fib6->node_list) &&
865 	    mlxsw_sp_mr_table_empty(vr->mr_table[MLXSW_SP_L3_PROTO_IPV4]) &&
866 	    mlxsw_sp_mr_table_empty(vr->mr_table[MLXSW_SP_L3_PROTO_IPV6]))
867 		mlxsw_sp_vr_destroy(mlxsw_sp, vr);
868 }
869 
870 static bool
871 mlxsw_sp_vr_lpm_tree_should_replace(struct mlxsw_sp_vr *vr,
872 				    enum mlxsw_sp_l3proto proto, u8 tree_id)
873 {
874 	struct mlxsw_sp_fib *fib = mlxsw_sp_vr_fib(vr, proto);
875 
876 	if (!mlxsw_sp_vr_is_used(vr))
877 		return false;
878 	if (fib->lpm_tree->id == tree_id)
879 		return true;
880 	return false;
881 }
882 
883 static int mlxsw_sp_vr_lpm_tree_replace(struct mlxsw_sp *mlxsw_sp,
884 					struct mlxsw_sp_fib *fib,
885 					struct mlxsw_sp_lpm_tree *new_tree)
886 {
887 	struct mlxsw_sp_lpm_tree *old_tree = fib->lpm_tree;
888 	int err;
889 
890 	fib->lpm_tree = new_tree;
891 	mlxsw_sp_lpm_tree_hold(new_tree);
892 	err = mlxsw_sp_vr_lpm_tree_bind(mlxsw_sp, fib, new_tree->id);
893 	if (err)
894 		goto err_tree_bind;
895 	mlxsw_sp_lpm_tree_put(mlxsw_sp, old_tree);
896 	return 0;
897 
898 err_tree_bind:
899 	mlxsw_sp_lpm_tree_put(mlxsw_sp, new_tree);
900 	fib->lpm_tree = old_tree;
901 	return err;
902 }
903 
904 static int mlxsw_sp_vrs_lpm_tree_replace(struct mlxsw_sp *mlxsw_sp,
905 					 struct mlxsw_sp_fib *fib,
906 					 struct mlxsw_sp_lpm_tree *new_tree)
907 {
908 	enum mlxsw_sp_l3proto proto = fib->proto;
909 	struct mlxsw_sp_lpm_tree *old_tree;
910 	u8 old_id, new_id = new_tree->id;
911 	struct mlxsw_sp_vr *vr;
912 	int i, err;
913 
914 	old_tree = mlxsw_sp->router->lpm.proto_trees[proto];
915 	old_id = old_tree->id;
916 
917 	for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_VRS); i++) {
918 		vr = &mlxsw_sp->router->vrs[i];
919 		if (!mlxsw_sp_vr_lpm_tree_should_replace(vr, proto, old_id))
920 			continue;
921 		err = mlxsw_sp_vr_lpm_tree_replace(mlxsw_sp,
922 						   mlxsw_sp_vr_fib(vr, proto),
923 						   new_tree);
924 		if (err)
925 			goto err_tree_replace;
926 	}
927 
928 	memcpy(new_tree->prefix_ref_count, old_tree->prefix_ref_count,
929 	       sizeof(new_tree->prefix_ref_count));
930 	mlxsw_sp->router->lpm.proto_trees[proto] = new_tree;
931 	mlxsw_sp_lpm_tree_put(mlxsw_sp, old_tree);
932 
933 	return 0;
934 
935 err_tree_replace:
936 	for (i--; i >= 0; i--) {
937 		if (!mlxsw_sp_vr_lpm_tree_should_replace(vr, proto, new_id))
938 			continue;
939 		mlxsw_sp_vr_lpm_tree_replace(mlxsw_sp,
940 					     mlxsw_sp_vr_fib(vr, proto),
941 					     old_tree);
942 	}
943 	return err;
944 }
945 
946 static int mlxsw_sp_vrs_init(struct mlxsw_sp *mlxsw_sp)
947 {
948 	struct mlxsw_sp_vr *vr;
949 	u64 max_vrs;
950 	int i;
951 
952 	if (!MLXSW_CORE_RES_VALID(mlxsw_sp->core, MAX_VRS))
953 		return -EIO;
954 
955 	max_vrs = MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_VRS);
956 	mlxsw_sp->router->vrs = kcalloc(max_vrs, sizeof(struct mlxsw_sp_vr),
957 					GFP_KERNEL);
958 	if (!mlxsw_sp->router->vrs)
959 		return -ENOMEM;
960 
961 	for (i = 0; i < max_vrs; i++) {
962 		vr = &mlxsw_sp->router->vrs[i];
963 		vr->id = i;
964 	}
965 
966 	return 0;
967 }
968 
969 static void mlxsw_sp_router_fib_flush(struct mlxsw_sp *mlxsw_sp);
970 
971 static void mlxsw_sp_vrs_fini(struct mlxsw_sp *mlxsw_sp)
972 {
973 	/* At this stage we're guaranteed not to have new incoming
974 	 * FIB notifications and the work queue is free from FIBs
975 	 * sitting on top of mlxsw netdevs. However, we can still
976 	 * have other FIBs queued. Flush the queue before flushing
977 	 * the device's tables. No need for locks, as we're the only
978 	 * writer.
979 	 */
980 	mlxsw_core_flush_owq();
981 	mlxsw_sp_router_fib_flush(mlxsw_sp);
982 	kfree(mlxsw_sp->router->vrs);
983 }
984 
985 static struct net_device *
986 __mlxsw_sp_ipip_netdev_ul_dev_get(const struct net_device *ol_dev)
987 {
988 	struct ip_tunnel *tun = netdev_priv(ol_dev);
989 	struct net *net = dev_net(ol_dev);
990 
991 	return __dev_get_by_index(net, tun->parms.link);
992 }
993 
994 u32 mlxsw_sp_ipip_dev_ul_tb_id(const struct net_device *ol_dev)
995 {
996 	struct net_device *d = __mlxsw_sp_ipip_netdev_ul_dev_get(ol_dev);
997 
998 	if (d)
999 		return l3mdev_fib_table(d) ? : RT_TABLE_MAIN;
1000 	else
1001 		return RT_TABLE_MAIN;
1002 }
1003 
1004 static struct mlxsw_sp_rif *
1005 mlxsw_sp_rif_create(struct mlxsw_sp *mlxsw_sp,
1006 		    const struct mlxsw_sp_rif_params *params,
1007 		    struct netlink_ext_ack *extack);
1008 
1009 static struct mlxsw_sp_rif_ipip_lb *
1010 mlxsw_sp_ipip_ol_ipip_lb_create(struct mlxsw_sp *mlxsw_sp,
1011 				enum mlxsw_sp_ipip_type ipipt,
1012 				struct net_device *ol_dev,
1013 				struct netlink_ext_ack *extack)
1014 {
1015 	struct mlxsw_sp_rif_params_ipip_lb lb_params;
1016 	const struct mlxsw_sp_ipip_ops *ipip_ops;
1017 	struct mlxsw_sp_rif *rif;
1018 
1019 	ipip_ops = mlxsw_sp->router->ipip_ops_arr[ipipt];
1020 	lb_params = (struct mlxsw_sp_rif_params_ipip_lb) {
1021 		.common.dev = ol_dev,
1022 		.common.lag = false,
1023 		.lb_config = ipip_ops->ol_loopback_config(mlxsw_sp, ol_dev),
1024 	};
1025 
1026 	rif = mlxsw_sp_rif_create(mlxsw_sp, &lb_params.common, extack);
1027 	if (IS_ERR(rif))
1028 		return ERR_CAST(rif);
1029 	return container_of(rif, struct mlxsw_sp_rif_ipip_lb, common);
1030 }
1031 
1032 static struct mlxsw_sp_ipip_entry *
1033 mlxsw_sp_ipip_entry_alloc(struct mlxsw_sp *mlxsw_sp,
1034 			  enum mlxsw_sp_ipip_type ipipt,
1035 			  struct net_device *ol_dev)
1036 {
1037 	const struct mlxsw_sp_ipip_ops *ipip_ops;
1038 	struct mlxsw_sp_ipip_entry *ipip_entry;
1039 	struct mlxsw_sp_ipip_entry *ret = NULL;
1040 
1041 	ipip_ops = mlxsw_sp->router->ipip_ops_arr[ipipt];
1042 	ipip_entry = kzalloc(sizeof(*ipip_entry), GFP_KERNEL);
1043 	if (!ipip_entry)
1044 		return ERR_PTR(-ENOMEM);
1045 
1046 	ipip_entry->ol_lb = mlxsw_sp_ipip_ol_ipip_lb_create(mlxsw_sp, ipipt,
1047 							    ol_dev, NULL);
1048 	if (IS_ERR(ipip_entry->ol_lb)) {
1049 		ret = ERR_CAST(ipip_entry->ol_lb);
1050 		goto err_ol_ipip_lb_create;
1051 	}
1052 
1053 	ipip_entry->ipipt = ipipt;
1054 	ipip_entry->ol_dev = ol_dev;
1055 
1056 	switch (ipip_ops->ul_proto) {
1057 	case MLXSW_SP_L3_PROTO_IPV4:
1058 		ipip_entry->parms4 = mlxsw_sp_ipip_netdev_parms4(ol_dev);
1059 		break;
1060 	case MLXSW_SP_L3_PROTO_IPV6:
1061 		WARN_ON(1);
1062 		break;
1063 	}
1064 
1065 	return ipip_entry;
1066 
1067 err_ol_ipip_lb_create:
1068 	kfree(ipip_entry);
1069 	return ret;
1070 }
1071 
1072 static void
1073 mlxsw_sp_ipip_entry_dealloc(struct mlxsw_sp_ipip_entry *ipip_entry)
1074 {
1075 	mlxsw_sp_rif_destroy(&ipip_entry->ol_lb->common);
1076 	kfree(ipip_entry);
1077 }
1078 
1079 static bool
1080 mlxsw_sp_ipip_entry_saddr_matches(struct mlxsw_sp *mlxsw_sp,
1081 				  const enum mlxsw_sp_l3proto ul_proto,
1082 				  union mlxsw_sp_l3addr saddr,
1083 				  u32 ul_tb_id,
1084 				  struct mlxsw_sp_ipip_entry *ipip_entry)
1085 {
1086 	u32 tun_ul_tb_id = mlxsw_sp_ipip_dev_ul_tb_id(ipip_entry->ol_dev);
1087 	enum mlxsw_sp_ipip_type ipipt = ipip_entry->ipipt;
1088 	union mlxsw_sp_l3addr tun_saddr;
1089 
1090 	if (mlxsw_sp->router->ipip_ops_arr[ipipt]->ul_proto != ul_proto)
1091 		return false;
1092 
1093 	tun_saddr = mlxsw_sp_ipip_netdev_saddr(ul_proto, ipip_entry->ol_dev);
1094 	return tun_ul_tb_id == ul_tb_id &&
1095 	       mlxsw_sp_l3addr_eq(&tun_saddr, &saddr);
1096 }
1097 
1098 static int
1099 mlxsw_sp_fib_entry_decap_init(struct mlxsw_sp *mlxsw_sp,
1100 			      struct mlxsw_sp_fib_entry *fib_entry,
1101 			      struct mlxsw_sp_ipip_entry *ipip_entry)
1102 {
1103 	u32 tunnel_index;
1104 	int err;
1105 
1106 	err = mlxsw_sp_kvdl_alloc(mlxsw_sp, MLXSW_SP_KVDL_ENTRY_TYPE_ADJ,
1107 				  1, &tunnel_index);
1108 	if (err)
1109 		return err;
1110 
1111 	ipip_entry->decap_fib_entry = fib_entry;
1112 	fib_entry->decap.ipip_entry = ipip_entry;
1113 	fib_entry->decap.tunnel_index = tunnel_index;
1114 	return 0;
1115 }
1116 
1117 static void mlxsw_sp_fib_entry_decap_fini(struct mlxsw_sp *mlxsw_sp,
1118 					  struct mlxsw_sp_fib_entry *fib_entry)
1119 {
1120 	/* Unlink this node from the IPIP entry that it's the decap entry of. */
1121 	fib_entry->decap.ipip_entry->decap_fib_entry = NULL;
1122 	fib_entry->decap.ipip_entry = NULL;
1123 	mlxsw_sp_kvdl_free(mlxsw_sp, MLXSW_SP_KVDL_ENTRY_TYPE_ADJ,
1124 			   1, fib_entry->decap.tunnel_index);
1125 }
1126 
1127 static struct mlxsw_sp_fib_node *
1128 mlxsw_sp_fib_node_lookup(struct mlxsw_sp_fib *fib, const void *addr,
1129 			 size_t addr_len, unsigned char prefix_len);
1130 static int mlxsw_sp_fib_entry_update(struct mlxsw_sp *mlxsw_sp,
1131 				     struct mlxsw_sp_fib_entry *fib_entry);
1132 
1133 static void
1134 mlxsw_sp_ipip_entry_demote_decap(struct mlxsw_sp *mlxsw_sp,
1135 				 struct mlxsw_sp_ipip_entry *ipip_entry)
1136 {
1137 	struct mlxsw_sp_fib_entry *fib_entry = ipip_entry->decap_fib_entry;
1138 
1139 	mlxsw_sp_fib_entry_decap_fini(mlxsw_sp, fib_entry);
1140 	fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_TRAP;
1141 
1142 	mlxsw_sp_fib_entry_update(mlxsw_sp, fib_entry);
1143 }
1144 
1145 static void
1146 mlxsw_sp_ipip_entry_promote_decap(struct mlxsw_sp *mlxsw_sp,
1147 				  struct mlxsw_sp_ipip_entry *ipip_entry,
1148 				  struct mlxsw_sp_fib_entry *decap_fib_entry)
1149 {
1150 	if (mlxsw_sp_fib_entry_decap_init(mlxsw_sp, decap_fib_entry,
1151 					  ipip_entry))
1152 		return;
1153 	decap_fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_IPIP_DECAP;
1154 
1155 	if (mlxsw_sp_fib_entry_update(mlxsw_sp, decap_fib_entry))
1156 		mlxsw_sp_ipip_entry_demote_decap(mlxsw_sp, ipip_entry);
1157 }
1158 
1159 static struct mlxsw_sp_fib_entry *
1160 mlxsw_sp_router_ip2me_fib_entry_find(struct mlxsw_sp *mlxsw_sp, u32 tb_id,
1161 				     enum mlxsw_sp_l3proto proto,
1162 				     const union mlxsw_sp_l3addr *addr,
1163 				     enum mlxsw_sp_fib_entry_type type)
1164 {
1165 	struct mlxsw_sp_fib_node *fib_node;
1166 	unsigned char addr_prefix_len;
1167 	struct mlxsw_sp_fib *fib;
1168 	struct mlxsw_sp_vr *vr;
1169 	const void *addrp;
1170 	size_t addr_len;
1171 	u32 addr4;
1172 
1173 	vr = mlxsw_sp_vr_find(mlxsw_sp, tb_id);
1174 	if (!vr)
1175 		return NULL;
1176 	fib = mlxsw_sp_vr_fib(vr, proto);
1177 
1178 	switch (proto) {
1179 	case MLXSW_SP_L3_PROTO_IPV4:
1180 		addr4 = be32_to_cpu(addr->addr4);
1181 		addrp = &addr4;
1182 		addr_len = 4;
1183 		addr_prefix_len = 32;
1184 		break;
1185 	case MLXSW_SP_L3_PROTO_IPV6: /* fall through */
1186 	default:
1187 		WARN_ON(1);
1188 		return NULL;
1189 	}
1190 
1191 	fib_node = mlxsw_sp_fib_node_lookup(fib, addrp, addr_len,
1192 					    addr_prefix_len);
1193 	if (!fib_node || fib_node->fib_entry->type != type)
1194 		return NULL;
1195 
1196 	return fib_node->fib_entry;
1197 }
1198 
1199 /* Given an IPIP entry, find the corresponding decap route. */
1200 static struct mlxsw_sp_fib_entry *
1201 mlxsw_sp_ipip_entry_find_decap(struct mlxsw_sp *mlxsw_sp,
1202 			       struct mlxsw_sp_ipip_entry *ipip_entry)
1203 {
1204 	static struct mlxsw_sp_fib_node *fib_node;
1205 	const struct mlxsw_sp_ipip_ops *ipip_ops;
1206 	unsigned char saddr_prefix_len;
1207 	union mlxsw_sp_l3addr saddr;
1208 	struct mlxsw_sp_fib *ul_fib;
1209 	struct mlxsw_sp_vr *ul_vr;
1210 	const void *saddrp;
1211 	size_t saddr_len;
1212 	u32 ul_tb_id;
1213 	u32 saddr4;
1214 
1215 	ipip_ops = mlxsw_sp->router->ipip_ops_arr[ipip_entry->ipipt];
1216 
1217 	ul_tb_id = mlxsw_sp_ipip_dev_ul_tb_id(ipip_entry->ol_dev);
1218 	ul_vr = mlxsw_sp_vr_find(mlxsw_sp, ul_tb_id);
1219 	if (!ul_vr)
1220 		return NULL;
1221 
1222 	ul_fib = mlxsw_sp_vr_fib(ul_vr, ipip_ops->ul_proto);
1223 	saddr = mlxsw_sp_ipip_netdev_saddr(ipip_ops->ul_proto,
1224 					   ipip_entry->ol_dev);
1225 
1226 	switch (ipip_ops->ul_proto) {
1227 	case MLXSW_SP_L3_PROTO_IPV4:
1228 		saddr4 = be32_to_cpu(saddr.addr4);
1229 		saddrp = &saddr4;
1230 		saddr_len = 4;
1231 		saddr_prefix_len = 32;
1232 		break;
1233 	case MLXSW_SP_L3_PROTO_IPV6:
1234 		WARN_ON(1);
1235 		return NULL;
1236 	}
1237 
1238 	fib_node = mlxsw_sp_fib_node_lookup(ul_fib, saddrp, saddr_len,
1239 					    saddr_prefix_len);
1240 	if (!fib_node ||
1241 	    fib_node->fib_entry->type != MLXSW_SP_FIB_ENTRY_TYPE_TRAP)
1242 		return NULL;
1243 
1244 	return fib_node->fib_entry;
1245 }
1246 
1247 static struct mlxsw_sp_ipip_entry *
1248 mlxsw_sp_ipip_entry_create(struct mlxsw_sp *mlxsw_sp,
1249 			   enum mlxsw_sp_ipip_type ipipt,
1250 			   struct net_device *ol_dev)
1251 {
1252 	struct mlxsw_sp_ipip_entry *ipip_entry;
1253 
1254 	ipip_entry = mlxsw_sp_ipip_entry_alloc(mlxsw_sp, ipipt, ol_dev);
1255 	if (IS_ERR(ipip_entry))
1256 		return ipip_entry;
1257 
1258 	list_add_tail(&ipip_entry->ipip_list_node,
1259 		      &mlxsw_sp->router->ipip_list);
1260 
1261 	return ipip_entry;
1262 }
1263 
1264 static void
1265 mlxsw_sp_ipip_entry_destroy(struct mlxsw_sp *mlxsw_sp,
1266 			    struct mlxsw_sp_ipip_entry *ipip_entry)
1267 {
1268 	list_del(&ipip_entry->ipip_list_node);
1269 	mlxsw_sp_ipip_entry_dealloc(ipip_entry);
1270 }
1271 
1272 static bool
1273 mlxsw_sp_ipip_entry_matches_decap(struct mlxsw_sp *mlxsw_sp,
1274 				  const struct net_device *ul_dev,
1275 				  enum mlxsw_sp_l3proto ul_proto,
1276 				  union mlxsw_sp_l3addr ul_dip,
1277 				  struct mlxsw_sp_ipip_entry *ipip_entry)
1278 {
1279 	u32 ul_tb_id = l3mdev_fib_table(ul_dev) ? : RT_TABLE_MAIN;
1280 	enum mlxsw_sp_ipip_type ipipt = ipip_entry->ipipt;
1281 
1282 	if (mlxsw_sp->router->ipip_ops_arr[ipipt]->ul_proto != ul_proto)
1283 		return false;
1284 
1285 	return mlxsw_sp_ipip_entry_saddr_matches(mlxsw_sp, ul_proto, ul_dip,
1286 						 ul_tb_id, ipip_entry);
1287 }
1288 
1289 /* Given decap parameters, find the corresponding IPIP entry. */
1290 static struct mlxsw_sp_ipip_entry *
1291 mlxsw_sp_ipip_entry_find_by_decap(struct mlxsw_sp *mlxsw_sp,
1292 				  const struct net_device *ul_dev,
1293 				  enum mlxsw_sp_l3proto ul_proto,
1294 				  union mlxsw_sp_l3addr ul_dip)
1295 {
1296 	struct mlxsw_sp_ipip_entry *ipip_entry;
1297 
1298 	list_for_each_entry(ipip_entry, &mlxsw_sp->router->ipip_list,
1299 			    ipip_list_node)
1300 		if (mlxsw_sp_ipip_entry_matches_decap(mlxsw_sp, ul_dev,
1301 						      ul_proto, ul_dip,
1302 						      ipip_entry))
1303 			return ipip_entry;
1304 
1305 	return NULL;
1306 }
1307 
1308 static bool mlxsw_sp_netdev_ipip_type(const struct mlxsw_sp *mlxsw_sp,
1309 				      const struct net_device *dev,
1310 				      enum mlxsw_sp_ipip_type *p_type)
1311 {
1312 	struct mlxsw_sp_router *router = mlxsw_sp->router;
1313 	const struct mlxsw_sp_ipip_ops *ipip_ops;
1314 	enum mlxsw_sp_ipip_type ipipt;
1315 
1316 	for (ipipt = 0; ipipt < MLXSW_SP_IPIP_TYPE_MAX; ++ipipt) {
1317 		ipip_ops = router->ipip_ops_arr[ipipt];
1318 		if (dev->type == ipip_ops->dev_type) {
1319 			if (p_type)
1320 				*p_type = ipipt;
1321 			return true;
1322 		}
1323 	}
1324 	return false;
1325 }
1326 
1327 bool mlxsw_sp_netdev_is_ipip_ol(const struct mlxsw_sp *mlxsw_sp,
1328 				const struct net_device *dev)
1329 {
1330 	return mlxsw_sp_netdev_ipip_type(mlxsw_sp, dev, NULL);
1331 }
1332 
1333 static struct mlxsw_sp_ipip_entry *
1334 mlxsw_sp_ipip_entry_find_by_ol_dev(struct mlxsw_sp *mlxsw_sp,
1335 				   const struct net_device *ol_dev)
1336 {
1337 	struct mlxsw_sp_ipip_entry *ipip_entry;
1338 
1339 	list_for_each_entry(ipip_entry, &mlxsw_sp->router->ipip_list,
1340 			    ipip_list_node)
1341 		if (ipip_entry->ol_dev == ol_dev)
1342 			return ipip_entry;
1343 
1344 	return NULL;
1345 }
1346 
1347 static struct mlxsw_sp_ipip_entry *
1348 mlxsw_sp_ipip_entry_find_by_ul_dev(const struct mlxsw_sp *mlxsw_sp,
1349 				   const struct net_device *ul_dev,
1350 				   struct mlxsw_sp_ipip_entry *start)
1351 {
1352 	struct mlxsw_sp_ipip_entry *ipip_entry;
1353 
1354 	ipip_entry = list_prepare_entry(start, &mlxsw_sp->router->ipip_list,
1355 					ipip_list_node);
1356 	list_for_each_entry_continue(ipip_entry, &mlxsw_sp->router->ipip_list,
1357 				     ipip_list_node) {
1358 		struct net_device *ipip_ul_dev =
1359 			__mlxsw_sp_ipip_netdev_ul_dev_get(ipip_entry->ol_dev);
1360 
1361 		if (ipip_ul_dev == ul_dev)
1362 			return ipip_entry;
1363 	}
1364 
1365 	return NULL;
1366 }
1367 
1368 bool mlxsw_sp_netdev_is_ipip_ul(const struct mlxsw_sp *mlxsw_sp,
1369 				const struct net_device *dev)
1370 {
1371 	return mlxsw_sp_ipip_entry_find_by_ul_dev(mlxsw_sp, dev, NULL);
1372 }
1373 
1374 static bool mlxsw_sp_netdevice_ipip_can_offload(struct mlxsw_sp *mlxsw_sp,
1375 						const struct net_device *ol_dev,
1376 						enum mlxsw_sp_ipip_type ipipt)
1377 {
1378 	const struct mlxsw_sp_ipip_ops *ops
1379 		= mlxsw_sp->router->ipip_ops_arr[ipipt];
1380 
1381 	/* For deciding whether decap should be offloaded, we don't care about
1382 	 * overlay protocol, so ask whether either one is supported.
1383 	 */
1384 	return ops->can_offload(mlxsw_sp, ol_dev, MLXSW_SP_L3_PROTO_IPV4) ||
1385 	       ops->can_offload(mlxsw_sp, ol_dev, MLXSW_SP_L3_PROTO_IPV6);
1386 }
1387 
1388 static int mlxsw_sp_netdevice_ipip_ol_reg_event(struct mlxsw_sp *mlxsw_sp,
1389 						struct net_device *ol_dev)
1390 {
1391 	struct mlxsw_sp_ipip_entry *ipip_entry;
1392 	enum mlxsw_sp_l3proto ul_proto;
1393 	enum mlxsw_sp_ipip_type ipipt;
1394 	union mlxsw_sp_l3addr saddr;
1395 	u32 ul_tb_id;
1396 
1397 	mlxsw_sp_netdev_ipip_type(mlxsw_sp, ol_dev, &ipipt);
1398 	if (mlxsw_sp_netdevice_ipip_can_offload(mlxsw_sp, ol_dev, ipipt)) {
1399 		ul_tb_id = mlxsw_sp_ipip_dev_ul_tb_id(ol_dev);
1400 		ul_proto = mlxsw_sp->router->ipip_ops_arr[ipipt]->ul_proto;
1401 		saddr = mlxsw_sp_ipip_netdev_saddr(ul_proto, ol_dev);
1402 		if (!mlxsw_sp_ipip_demote_tunnel_by_saddr(mlxsw_sp, ul_proto,
1403 							  saddr, ul_tb_id,
1404 							  NULL)) {
1405 			ipip_entry = mlxsw_sp_ipip_entry_create(mlxsw_sp, ipipt,
1406 								ol_dev);
1407 			if (IS_ERR(ipip_entry))
1408 				return PTR_ERR(ipip_entry);
1409 		}
1410 	}
1411 
1412 	return 0;
1413 }
1414 
1415 static void mlxsw_sp_netdevice_ipip_ol_unreg_event(struct mlxsw_sp *mlxsw_sp,
1416 						   struct net_device *ol_dev)
1417 {
1418 	struct mlxsw_sp_ipip_entry *ipip_entry;
1419 
1420 	ipip_entry = mlxsw_sp_ipip_entry_find_by_ol_dev(mlxsw_sp, ol_dev);
1421 	if (ipip_entry)
1422 		mlxsw_sp_ipip_entry_destroy(mlxsw_sp, ipip_entry);
1423 }
1424 
1425 static void
1426 mlxsw_sp_ipip_entry_ol_up_event(struct mlxsw_sp *mlxsw_sp,
1427 				struct mlxsw_sp_ipip_entry *ipip_entry)
1428 {
1429 	struct mlxsw_sp_fib_entry *decap_fib_entry;
1430 
1431 	decap_fib_entry = mlxsw_sp_ipip_entry_find_decap(mlxsw_sp, ipip_entry);
1432 	if (decap_fib_entry)
1433 		mlxsw_sp_ipip_entry_promote_decap(mlxsw_sp, ipip_entry,
1434 						  decap_fib_entry);
1435 }
1436 
1437 static int
1438 mlxsw_sp_rif_ipip_lb_op(struct mlxsw_sp_rif_ipip_lb *lb_rif, u16 ul_vr_id,
1439 			u16 ul_rif_id, bool enable)
1440 {
1441 	struct mlxsw_sp_rif_ipip_lb_config lb_cf = lb_rif->lb_config;
1442 	struct mlxsw_sp_rif *rif = &lb_rif->common;
1443 	struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
1444 	char ritr_pl[MLXSW_REG_RITR_LEN];
1445 	u32 saddr4;
1446 
1447 	switch (lb_cf.ul_protocol) {
1448 	case MLXSW_SP_L3_PROTO_IPV4:
1449 		saddr4 = be32_to_cpu(lb_cf.saddr.addr4);
1450 		mlxsw_reg_ritr_pack(ritr_pl, enable, MLXSW_REG_RITR_LOOPBACK_IF,
1451 				    rif->rif_index, rif->vr_id, rif->dev->mtu);
1452 		mlxsw_reg_ritr_loopback_ipip4_pack(ritr_pl, lb_cf.lb_ipipt,
1453 			    MLXSW_REG_RITR_LOOPBACK_IPIP_OPTIONS_GRE_KEY_PRESET,
1454 			    ul_vr_id, ul_rif_id, saddr4, lb_cf.okey);
1455 		break;
1456 
1457 	case MLXSW_SP_L3_PROTO_IPV6:
1458 		return -EAFNOSUPPORT;
1459 	}
1460 
1461 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
1462 }
1463 
1464 static int mlxsw_sp_netdevice_ipip_ol_update_mtu(struct mlxsw_sp *mlxsw_sp,
1465 						 struct net_device *ol_dev)
1466 {
1467 	struct mlxsw_sp_ipip_entry *ipip_entry;
1468 	struct mlxsw_sp_rif_ipip_lb *lb_rif;
1469 	int err = 0;
1470 
1471 	ipip_entry = mlxsw_sp_ipip_entry_find_by_ol_dev(mlxsw_sp, ol_dev);
1472 	if (ipip_entry) {
1473 		lb_rif = ipip_entry->ol_lb;
1474 		err = mlxsw_sp_rif_ipip_lb_op(lb_rif, lb_rif->ul_vr_id,
1475 					      lb_rif->ul_rif_id, true);
1476 		if (err)
1477 			goto out;
1478 		lb_rif->common.mtu = ol_dev->mtu;
1479 	}
1480 
1481 out:
1482 	return err;
1483 }
1484 
1485 static void mlxsw_sp_netdevice_ipip_ol_up_event(struct mlxsw_sp *mlxsw_sp,
1486 						struct net_device *ol_dev)
1487 {
1488 	struct mlxsw_sp_ipip_entry *ipip_entry;
1489 
1490 	ipip_entry = mlxsw_sp_ipip_entry_find_by_ol_dev(mlxsw_sp, ol_dev);
1491 	if (ipip_entry)
1492 		mlxsw_sp_ipip_entry_ol_up_event(mlxsw_sp, ipip_entry);
1493 }
1494 
1495 static void
1496 mlxsw_sp_ipip_entry_ol_down_event(struct mlxsw_sp *mlxsw_sp,
1497 				  struct mlxsw_sp_ipip_entry *ipip_entry)
1498 {
1499 	if (ipip_entry->decap_fib_entry)
1500 		mlxsw_sp_ipip_entry_demote_decap(mlxsw_sp, ipip_entry);
1501 }
1502 
1503 static void mlxsw_sp_netdevice_ipip_ol_down_event(struct mlxsw_sp *mlxsw_sp,
1504 						  struct net_device *ol_dev)
1505 {
1506 	struct mlxsw_sp_ipip_entry *ipip_entry;
1507 
1508 	ipip_entry = mlxsw_sp_ipip_entry_find_by_ol_dev(mlxsw_sp, ol_dev);
1509 	if (ipip_entry)
1510 		mlxsw_sp_ipip_entry_ol_down_event(mlxsw_sp, ipip_entry);
1511 }
1512 
1513 static void mlxsw_sp_nexthop_rif_migrate(struct mlxsw_sp *mlxsw_sp,
1514 					 struct mlxsw_sp_rif *old_rif,
1515 					 struct mlxsw_sp_rif *new_rif);
1516 static int
1517 mlxsw_sp_ipip_entry_ol_lb_update(struct mlxsw_sp *mlxsw_sp,
1518 				 struct mlxsw_sp_ipip_entry *ipip_entry,
1519 				 bool keep_encap,
1520 				 struct netlink_ext_ack *extack)
1521 {
1522 	struct mlxsw_sp_rif_ipip_lb *old_lb_rif = ipip_entry->ol_lb;
1523 	struct mlxsw_sp_rif_ipip_lb *new_lb_rif;
1524 
1525 	new_lb_rif = mlxsw_sp_ipip_ol_ipip_lb_create(mlxsw_sp,
1526 						     ipip_entry->ipipt,
1527 						     ipip_entry->ol_dev,
1528 						     extack);
1529 	if (IS_ERR(new_lb_rif))
1530 		return PTR_ERR(new_lb_rif);
1531 	ipip_entry->ol_lb = new_lb_rif;
1532 
1533 	if (keep_encap)
1534 		mlxsw_sp_nexthop_rif_migrate(mlxsw_sp, &old_lb_rif->common,
1535 					     &new_lb_rif->common);
1536 
1537 	mlxsw_sp_rif_destroy(&old_lb_rif->common);
1538 
1539 	return 0;
1540 }
1541 
1542 static void mlxsw_sp_nexthop_rif_update(struct mlxsw_sp *mlxsw_sp,
1543 					struct mlxsw_sp_rif *rif);
1544 
1545 /**
1546  * Update the offload related to an IPIP entry. This always updates decap, and
1547  * in addition to that it also:
1548  * @recreate_loopback: recreates the associated loopback RIF
1549  * @keep_encap: updates next hops that use the tunnel netdevice. This is only
1550  *              relevant when recreate_loopback is true.
1551  * @update_nexthops: updates next hops, keeping the current loopback RIF. This
1552  *                   is only relevant when recreate_loopback is false.
1553  */
1554 int __mlxsw_sp_ipip_entry_update_tunnel(struct mlxsw_sp *mlxsw_sp,
1555 					struct mlxsw_sp_ipip_entry *ipip_entry,
1556 					bool recreate_loopback,
1557 					bool keep_encap,
1558 					bool update_nexthops,
1559 					struct netlink_ext_ack *extack)
1560 {
1561 	int err;
1562 
1563 	/* RIFs can't be edited, so to update loopback, we need to destroy and
1564 	 * recreate it. That creates a window of opportunity where RALUE and
1565 	 * RATR registers end up referencing a RIF that's already gone. RATRs
1566 	 * are handled in mlxsw_sp_ipip_entry_ol_lb_update(), and to take care
1567 	 * of RALUE, demote the decap route back.
1568 	 */
1569 	if (ipip_entry->decap_fib_entry)
1570 		mlxsw_sp_ipip_entry_demote_decap(mlxsw_sp, ipip_entry);
1571 
1572 	if (recreate_loopback) {
1573 		err = mlxsw_sp_ipip_entry_ol_lb_update(mlxsw_sp, ipip_entry,
1574 						       keep_encap, extack);
1575 		if (err)
1576 			return err;
1577 	} else if (update_nexthops) {
1578 		mlxsw_sp_nexthop_rif_update(mlxsw_sp,
1579 					    &ipip_entry->ol_lb->common);
1580 	}
1581 
1582 	if (ipip_entry->ol_dev->flags & IFF_UP)
1583 		mlxsw_sp_ipip_entry_ol_up_event(mlxsw_sp, ipip_entry);
1584 
1585 	return 0;
1586 }
1587 
1588 static int mlxsw_sp_netdevice_ipip_ol_vrf_event(struct mlxsw_sp *mlxsw_sp,
1589 						struct net_device *ol_dev,
1590 						struct netlink_ext_ack *extack)
1591 {
1592 	struct mlxsw_sp_ipip_entry *ipip_entry =
1593 		mlxsw_sp_ipip_entry_find_by_ol_dev(mlxsw_sp, ol_dev);
1594 
1595 	if (!ipip_entry)
1596 		return 0;
1597 
1598 	return __mlxsw_sp_ipip_entry_update_tunnel(mlxsw_sp, ipip_entry,
1599 						   true, false, false, extack);
1600 }
1601 
1602 static int
1603 mlxsw_sp_netdevice_ipip_ul_vrf_event(struct mlxsw_sp *mlxsw_sp,
1604 				     struct mlxsw_sp_ipip_entry *ipip_entry,
1605 				     struct net_device *ul_dev,
1606 				     bool *demote_this,
1607 				     struct netlink_ext_ack *extack)
1608 {
1609 	u32 ul_tb_id = l3mdev_fib_table(ul_dev) ? : RT_TABLE_MAIN;
1610 	enum mlxsw_sp_l3proto ul_proto;
1611 	union mlxsw_sp_l3addr saddr;
1612 
1613 	/* Moving underlay to a different VRF might cause local address
1614 	 * conflict, and the conflicting tunnels need to be demoted.
1615 	 */
1616 	ul_proto = mlxsw_sp->router->ipip_ops_arr[ipip_entry->ipipt]->ul_proto;
1617 	saddr = mlxsw_sp_ipip_netdev_saddr(ul_proto, ipip_entry->ol_dev);
1618 	if (mlxsw_sp_ipip_demote_tunnel_by_saddr(mlxsw_sp, ul_proto,
1619 						 saddr, ul_tb_id,
1620 						 ipip_entry)) {
1621 		*demote_this = true;
1622 		return 0;
1623 	}
1624 
1625 	return __mlxsw_sp_ipip_entry_update_tunnel(mlxsw_sp, ipip_entry,
1626 						   true, true, false, extack);
1627 }
1628 
1629 static int
1630 mlxsw_sp_netdevice_ipip_ul_up_event(struct mlxsw_sp *mlxsw_sp,
1631 				    struct mlxsw_sp_ipip_entry *ipip_entry,
1632 				    struct net_device *ul_dev)
1633 {
1634 	return __mlxsw_sp_ipip_entry_update_tunnel(mlxsw_sp, ipip_entry,
1635 						   false, false, true, NULL);
1636 }
1637 
1638 static int
1639 mlxsw_sp_netdevice_ipip_ul_down_event(struct mlxsw_sp *mlxsw_sp,
1640 				      struct mlxsw_sp_ipip_entry *ipip_entry,
1641 				      struct net_device *ul_dev)
1642 {
1643 	/* A down underlay device causes encapsulated packets to not be
1644 	 * forwarded, but decap still works. So refresh next hops without
1645 	 * touching anything else.
1646 	 */
1647 	return __mlxsw_sp_ipip_entry_update_tunnel(mlxsw_sp, ipip_entry,
1648 						   false, false, true, NULL);
1649 }
1650 
1651 static int
1652 mlxsw_sp_netdevice_ipip_ol_change_event(struct mlxsw_sp *mlxsw_sp,
1653 					struct net_device *ol_dev,
1654 					struct netlink_ext_ack *extack)
1655 {
1656 	const struct mlxsw_sp_ipip_ops *ipip_ops;
1657 	struct mlxsw_sp_ipip_entry *ipip_entry;
1658 	int err;
1659 
1660 	ipip_entry = mlxsw_sp_ipip_entry_find_by_ol_dev(mlxsw_sp, ol_dev);
1661 	if (!ipip_entry)
1662 		/* A change might make a tunnel eligible for offloading, but
1663 		 * that is currently not implemented. What falls to slow path
1664 		 * stays there.
1665 		 */
1666 		return 0;
1667 
1668 	/* A change might make a tunnel not eligible for offloading. */
1669 	if (!mlxsw_sp_netdevice_ipip_can_offload(mlxsw_sp, ol_dev,
1670 						 ipip_entry->ipipt)) {
1671 		mlxsw_sp_ipip_entry_demote_tunnel(mlxsw_sp, ipip_entry);
1672 		return 0;
1673 	}
1674 
1675 	ipip_ops = mlxsw_sp->router->ipip_ops_arr[ipip_entry->ipipt];
1676 	err = ipip_ops->ol_netdev_change(mlxsw_sp, ipip_entry, extack);
1677 	return err;
1678 }
1679 
1680 void mlxsw_sp_ipip_entry_demote_tunnel(struct mlxsw_sp *mlxsw_sp,
1681 				       struct mlxsw_sp_ipip_entry *ipip_entry)
1682 {
1683 	struct net_device *ol_dev = ipip_entry->ol_dev;
1684 
1685 	if (ol_dev->flags & IFF_UP)
1686 		mlxsw_sp_ipip_entry_ol_down_event(mlxsw_sp, ipip_entry);
1687 	mlxsw_sp_ipip_entry_destroy(mlxsw_sp, ipip_entry);
1688 }
1689 
1690 /* The configuration where several tunnels have the same local address in the
1691  * same underlay table needs special treatment in the HW. That is currently not
1692  * implemented in the driver. This function finds and demotes the first tunnel
1693  * with a given source address, except the one passed in in the argument
1694  * `except'.
1695  */
1696 bool
1697 mlxsw_sp_ipip_demote_tunnel_by_saddr(struct mlxsw_sp *mlxsw_sp,
1698 				     enum mlxsw_sp_l3proto ul_proto,
1699 				     union mlxsw_sp_l3addr saddr,
1700 				     u32 ul_tb_id,
1701 				     const struct mlxsw_sp_ipip_entry *except)
1702 {
1703 	struct mlxsw_sp_ipip_entry *ipip_entry, *tmp;
1704 
1705 	list_for_each_entry_safe(ipip_entry, tmp, &mlxsw_sp->router->ipip_list,
1706 				 ipip_list_node) {
1707 		if (ipip_entry != except &&
1708 		    mlxsw_sp_ipip_entry_saddr_matches(mlxsw_sp, ul_proto, saddr,
1709 						      ul_tb_id, ipip_entry)) {
1710 			mlxsw_sp_ipip_entry_demote_tunnel(mlxsw_sp, ipip_entry);
1711 			return true;
1712 		}
1713 	}
1714 
1715 	return false;
1716 }
1717 
1718 static void mlxsw_sp_ipip_demote_tunnel_by_ul_netdev(struct mlxsw_sp *mlxsw_sp,
1719 						     struct net_device *ul_dev)
1720 {
1721 	struct mlxsw_sp_ipip_entry *ipip_entry, *tmp;
1722 
1723 	list_for_each_entry_safe(ipip_entry, tmp, &mlxsw_sp->router->ipip_list,
1724 				 ipip_list_node) {
1725 		struct net_device *ipip_ul_dev =
1726 			__mlxsw_sp_ipip_netdev_ul_dev_get(ipip_entry->ol_dev);
1727 
1728 		if (ipip_ul_dev == ul_dev)
1729 			mlxsw_sp_ipip_entry_demote_tunnel(mlxsw_sp, ipip_entry);
1730 	}
1731 }
1732 
1733 int mlxsw_sp_netdevice_ipip_ol_event(struct mlxsw_sp *mlxsw_sp,
1734 				     struct net_device *ol_dev,
1735 				     unsigned long event,
1736 				     struct netdev_notifier_info *info)
1737 {
1738 	struct netdev_notifier_changeupper_info *chup;
1739 	struct netlink_ext_ack *extack;
1740 
1741 	switch (event) {
1742 	case NETDEV_REGISTER:
1743 		return mlxsw_sp_netdevice_ipip_ol_reg_event(mlxsw_sp, ol_dev);
1744 	case NETDEV_UNREGISTER:
1745 		mlxsw_sp_netdevice_ipip_ol_unreg_event(mlxsw_sp, ol_dev);
1746 		return 0;
1747 	case NETDEV_UP:
1748 		mlxsw_sp_netdevice_ipip_ol_up_event(mlxsw_sp, ol_dev);
1749 		return 0;
1750 	case NETDEV_DOWN:
1751 		mlxsw_sp_netdevice_ipip_ol_down_event(mlxsw_sp, ol_dev);
1752 		return 0;
1753 	case NETDEV_CHANGEUPPER:
1754 		chup = container_of(info, typeof(*chup), info);
1755 		extack = info->extack;
1756 		if (netif_is_l3_master(chup->upper_dev))
1757 			return mlxsw_sp_netdevice_ipip_ol_vrf_event(mlxsw_sp,
1758 								    ol_dev,
1759 								    extack);
1760 		return 0;
1761 	case NETDEV_CHANGE:
1762 		extack = info->extack;
1763 		return mlxsw_sp_netdevice_ipip_ol_change_event(mlxsw_sp,
1764 							       ol_dev, extack);
1765 	case NETDEV_CHANGEMTU:
1766 		return mlxsw_sp_netdevice_ipip_ol_update_mtu(mlxsw_sp, ol_dev);
1767 	}
1768 	return 0;
1769 }
1770 
1771 static int
1772 __mlxsw_sp_netdevice_ipip_ul_event(struct mlxsw_sp *mlxsw_sp,
1773 				   struct mlxsw_sp_ipip_entry *ipip_entry,
1774 				   struct net_device *ul_dev,
1775 				   bool *demote_this,
1776 				   unsigned long event,
1777 				   struct netdev_notifier_info *info)
1778 {
1779 	struct netdev_notifier_changeupper_info *chup;
1780 	struct netlink_ext_ack *extack;
1781 
1782 	switch (event) {
1783 	case NETDEV_CHANGEUPPER:
1784 		chup = container_of(info, typeof(*chup), info);
1785 		extack = info->extack;
1786 		if (netif_is_l3_master(chup->upper_dev))
1787 			return mlxsw_sp_netdevice_ipip_ul_vrf_event(mlxsw_sp,
1788 								    ipip_entry,
1789 								    ul_dev,
1790 								    demote_this,
1791 								    extack);
1792 		break;
1793 
1794 	case NETDEV_UP:
1795 		return mlxsw_sp_netdevice_ipip_ul_up_event(mlxsw_sp, ipip_entry,
1796 							   ul_dev);
1797 	case NETDEV_DOWN:
1798 		return mlxsw_sp_netdevice_ipip_ul_down_event(mlxsw_sp,
1799 							     ipip_entry,
1800 							     ul_dev);
1801 	}
1802 	return 0;
1803 }
1804 
1805 int
1806 mlxsw_sp_netdevice_ipip_ul_event(struct mlxsw_sp *mlxsw_sp,
1807 				 struct net_device *ul_dev,
1808 				 unsigned long event,
1809 				 struct netdev_notifier_info *info)
1810 {
1811 	struct mlxsw_sp_ipip_entry *ipip_entry = NULL;
1812 	int err;
1813 
1814 	while ((ipip_entry = mlxsw_sp_ipip_entry_find_by_ul_dev(mlxsw_sp,
1815 								ul_dev,
1816 								ipip_entry))) {
1817 		struct mlxsw_sp_ipip_entry *prev;
1818 		bool demote_this = false;
1819 
1820 		err = __mlxsw_sp_netdevice_ipip_ul_event(mlxsw_sp, ipip_entry,
1821 							 ul_dev, &demote_this,
1822 							 event, info);
1823 		if (err) {
1824 			mlxsw_sp_ipip_demote_tunnel_by_ul_netdev(mlxsw_sp,
1825 								 ul_dev);
1826 			return err;
1827 		}
1828 
1829 		if (demote_this) {
1830 			if (list_is_first(&ipip_entry->ipip_list_node,
1831 					  &mlxsw_sp->router->ipip_list))
1832 				prev = NULL;
1833 			else
1834 				/* This can't be cached from previous iteration,
1835 				 * because that entry could be gone now.
1836 				 */
1837 				prev = list_prev_entry(ipip_entry,
1838 						       ipip_list_node);
1839 			mlxsw_sp_ipip_entry_demote_tunnel(mlxsw_sp, ipip_entry);
1840 			ipip_entry = prev;
1841 		}
1842 	}
1843 
1844 	return 0;
1845 }
1846 
1847 int mlxsw_sp_router_nve_promote_decap(struct mlxsw_sp *mlxsw_sp, u32 ul_tb_id,
1848 				      enum mlxsw_sp_l3proto ul_proto,
1849 				      const union mlxsw_sp_l3addr *ul_sip,
1850 				      u32 tunnel_index)
1851 {
1852 	enum mlxsw_sp_fib_entry_type type = MLXSW_SP_FIB_ENTRY_TYPE_TRAP;
1853 	struct mlxsw_sp_fib_entry *fib_entry;
1854 	int err;
1855 
1856 	/* It is valid to create a tunnel with a local IP and only later
1857 	 * assign this IP address to a local interface
1858 	 */
1859 	fib_entry = mlxsw_sp_router_ip2me_fib_entry_find(mlxsw_sp, ul_tb_id,
1860 							 ul_proto, ul_sip,
1861 							 type);
1862 	if (!fib_entry)
1863 		return 0;
1864 
1865 	fib_entry->decap.tunnel_index = tunnel_index;
1866 	fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_NVE_DECAP;
1867 
1868 	err = mlxsw_sp_fib_entry_update(mlxsw_sp, fib_entry);
1869 	if (err)
1870 		goto err_fib_entry_update;
1871 
1872 	return 0;
1873 
1874 err_fib_entry_update:
1875 	fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_TRAP;
1876 	mlxsw_sp_fib_entry_update(mlxsw_sp, fib_entry);
1877 	return err;
1878 }
1879 
1880 void mlxsw_sp_router_nve_demote_decap(struct mlxsw_sp *mlxsw_sp, u32 ul_tb_id,
1881 				      enum mlxsw_sp_l3proto ul_proto,
1882 				      const union mlxsw_sp_l3addr *ul_sip)
1883 {
1884 	enum mlxsw_sp_fib_entry_type type = MLXSW_SP_FIB_ENTRY_TYPE_NVE_DECAP;
1885 	struct mlxsw_sp_fib_entry *fib_entry;
1886 
1887 	fib_entry = mlxsw_sp_router_ip2me_fib_entry_find(mlxsw_sp, ul_tb_id,
1888 							 ul_proto, ul_sip,
1889 							 type);
1890 	if (!fib_entry)
1891 		return;
1892 
1893 	fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_TRAP;
1894 	mlxsw_sp_fib_entry_update(mlxsw_sp, fib_entry);
1895 }
1896 
1897 struct mlxsw_sp_neigh_key {
1898 	struct neighbour *n;
1899 };
1900 
1901 struct mlxsw_sp_neigh_entry {
1902 	struct list_head rif_list_node;
1903 	struct rhash_head ht_node;
1904 	struct mlxsw_sp_neigh_key key;
1905 	u16 rif;
1906 	bool connected;
1907 	unsigned char ha[ETH_ALEN];
1908 	struct list_head nexthop_list; /* list of nexthops using
1909 					* this neigh entry
1910 					*/
1911 	struct list_head nexthop_neighs_list_node;
1912 	unsigned int counter_index;
1913 	bool counter_valid;
1914 };
1915 
1916 static const struct rhashtable_params mlxsw_sp_neigh_ht_params = {
1917 	.key_offset = offsetof(struct mlxsw_sp_neigh_entry, key),
1918 	.head_offset = offsetof(struct mlxsw_sp_neigh_entry, ht_node),
1919 	.key_len = sizeof(struct mlxsw_sp_neigh_key),
1920 };
1921 
1922 struct mlxsw_sp_neigh_entry *
1923 mlxsw_sp_rif_neigh_next(struct mlxsw_sp_rif *rif,
1924 			struct mlxsw_sp_neigh_entry *neigh_entry)
1925 {
1926 	if (!neigh_entry) {
1927 		if (list_empty(&rif->neigh_list))
1928 			return NULL;
1929 		else
1930 			return list_first_entry(&rif->neigh_list,
1931 						typeof(*neigh_entry),
1932 						rif_list_node);
1933 	}
1934 	if (list_is_last(&neigh_entry->rif_list_node, &rif->neigh_list))
1935 		return NULL;
1936 	return list_next_entry(neigh_entry, rif_list_node);
1937 }
1938 
1939 int mlxsw_sp_neigh_entry_type(struct mlxsw_sp_neigh_entry *neigh_entry)
1940 {
1941 	return neigh_entry->key.n->tbl->family;
1942 }
1943 
1944 unsigned char *
1945 mlxsw_sp_neigh_entry_ha(struct mlxsw_sp_neigh_entry *neigh_entry)
1946 {
1947 	return neigh_entry->ha;
1948 }
1949 
1950 u32 mlxsw_sp_neigh4_entry_dip(struct mlxsw_sp_neigh_entry *neigh_entry)
1951 {
1952 	struct neighbour *n;
1953 
1954 	n = neigh_entry->key.n;
1955 	return ntohl(*((__be32 *) n->primary_key));
1956 }
1957 
1958 struct in6_addr *
1959 mlxsw_sp_neigh6_entry_dip(struct mlxsw_sp_neigh_entry *neigh_entry)
1960 {
1961 	struct neighbour *n;
1962 
1963 	n = neigh_entry->key.n;
1964 	return (struct in6_addr *) &n->primary_key;
1965 }
1966 
1967 int mlxsw_sp_neigh_counter_get(struct mlxsw_sp *mlxsw_sp,
1968 			       struct mlxsw_sp_neigh_entry *neigh_entry,
1969 			       u64 *p_counter)
1970 {
1971 	if (!neigh_entry->counter_valid)
1972 		return -EINVAL;
1973 
1974 	return mlxsw_sp_flow_counter_get(mlxsw_sp, neigh_entry->counter_index,
1975 					 p_counter, NULL);
1976 }
1977 
1978 static struct mlxsw_sp_neigh_entry *
1979 mlxsw_sp_neigh_entry_alloc(struct mlxsw_sp *mlxsw_sp, struct neighbour *n,
1980 			   u16 rif)
1981 {
1982 	struct mlxsw_sp_neigh_entry *neigh_entry;
1983 
1984 	neigh_entry = kzalloc(sizeof(*neigh_entry), GFP_KERNEL);
1985 	if (!neigh_entry)
1986 		return NULL;
1987 
1988 	neigh_entry->key.n = n;
1989 	neigh_entry->rif = rif;
1990 	INIT_LIST_HEAD(&neigh_entry->nexthop_list);
1991 
1992 	return neigh_entry;
1993 }
1994 
1995 static void mlxsw_sp_neigh_entry_free(struct mlxsw_sp_neigh_entry *neigh_entry)
1996 {
1997 	kfree(neigh_entry);
1998 }
1999 
2000 static int
2001 mlxsw_sp_neigh_entry_insert(struct mlxsw_sp *mlxsw_sp,
2002 			    struct mlxsw_sp_neigh_entry *neigh_entry)
2003 {
2004 	return rhashtable_insert_fast(&mlxsw_sp->router->neigh_ht,
2005 				      &neigh_entry->ht_node,
2006 				      mlxsw_sp_neigh_ht_params);
2007 }
2008 
2009 static void
2010 mlxsw_sp_neigh_entry_remove(struct mlxsw_sp *mlxsw_sp,
2011 			    struct mlxsw_sp_neigh_entry *neigh_entry)
2012 {
2013 	rhashtable_remove_fast(&mlxsw_sp->router->neigh_ht,
2014 			       &neigh_entry->ht_node,
2015 			       mlxsw_sp_neigh_ht_params);
2016 }
2017 
2018 static bool
2019 mlxsw_sp_neigh_counter_should_alloc(struct mlxsw_sp *mlxsw_sp,
2020 				    struct mlxsw_sp_neigh_entry *neigh_entry)
2021 {
2022 	struct devlink *devlink;
2023 	const char *table_name;
2024 
2025 	switch (mlxsw_sp_neigh_entry_type(neigh_entry)) {
2026 	case AF_INET:
2027 		table_name = MLXSW_SP_DPIPE_TABLE_NAME_HOST4;
2028 		break;
2029 	case AF_INET6:
2030 		table_name = MLXSW_SP_DPIPE_TABLE_NAME_HOST6;
2031 		break;
2032 	default:
2033 		WARN_ON(1);
2034 		return false;
2035 	}
2036 
2037 	devlink = priv_to_devlink(mlxsw_sp->core);
2038 	return devlink_dpipe_table_counter_enabled(devlink, table_name);
2039 }
2040 
2041 static void
2042 mlxsw_sp_neigh_counter_alloc(struct mlxsw_sp *mlxsw_sp,
2043 			     struct mlxsw_sp_neigh_entry *neigh_entry)
2044 {
2045 	if (!mlxsw_sp_neigh_counter_should_alloc(mlxsw_sp, neigh_entry))
2046 		return;
2047 
2048 	if (mlxsw_sp_flow_counter_alloc(mlxsw_sp, &neigh_entry->counter_index))
2049 		return;
2050 
2051 	neigh_entry->counter_valid = true;
2052 }
2053 
2054 static void
2055 mlxsw_sp_neigh_counter_free(struct mlxsw_sp *mlxsw_sp,
2056 			    struct mlxsw_sp_neigh_entry *neigh_entry)
2057 {
2058 	if (!neigh_entry->counter_valid)
2059 		return;
2060 	mlxsw_sp_flow_counter_free(mlxsw_sp,
2061 				   neigh_entry->counter_index);
2062 	neigh_entry->counter_valid = false;
2063 }
2064 
2065 static struct mlxsw_sp_neigh_entry *
2066 mlxsw_sp_neigh_entry_create(struct mlxsw_sp *mlxsw_sp, struct neighbour *n)
2067 {
2068 	struct mlxsw_sp_neigh_entry *neigh_entry;
2069 	struct mlxsw_sp_rif *rif;
2070 	int err;
2071 
2072 	rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, n->dev);
2073 	if (!rif)
2074 		return ERR_PTR(-EINVAL);
2075 
2076 	neigh_entry = mlxsw_sp_neigh_entry_alloc(mlxsw_sp, n, rif->rif_index);
2077 	if (!neigh_entry)
2078 		return ERR_PTR(-ENOMEM);
2079 
2080 	err = mlxsw_sp_neigh_entry_insert(mlxsw_sp, neigh_entry);
2081 	if (err)
2082 		goto err_neigh_entry_insert;
2083 
2084 	mlxsw_sp_neigh_counter_alloc(mlxsw_sp, neigh_entry);
2085 	list_add(&neigh_entry->rif_list_node, &rif->neigh_list);
2086 
2087 	return neigh_entry;
2088 
2089 err_neigh_entry_insert:
2090 	mlxsw_sp_neigh_entry_free(neigh_entry);
2091 	return ERR_PTR(err);
2092 }
2093 
2094 static void
2095 mlxsw_sp_neigh_entry_destroy(struct mlxsw_sp *mlxsw_sp,
2096 			     struct mlxsw_sp_neigh_entry *neigh_entry)
2097 {
2098 	list_del(&neigh_entry->rif_list_node);
2099 	mlxsw_sp_neigh_counter_free(mlxsw_sp, neigh_entry);
2100 	mlxsw_sp_neigh_entry_remove(mlxsw_sp, neigh_entry);
2101 	mlxsw_sp_neigh_entry_free(neigh_entry);
2102 }
2103 
2104 static struct mlxsw_sp_neigh_entry *
2105 mlxsw_sp_neigh_entry_lookup(struct mlxsw_sp *mlxsw_sp, struct neighbour *n)
2106 {
2107 	struct mlxsw_sp_neigh_key key;
2108 
2109 	key.n = n;
2110 	return rhashtable_lookup_fast(&mlxsw_sp->router->neigh_ht,
2111 				      &key, mlxsw_sp_neigh_ht_params);
2112 }
2113 
2114 static void
2115 mlxsw_sp_router_neighs_update_interval_init(struct mlxsw_sp *mlxsw_sp)
2116 {
2117 	unsigned long interval;
2118 
2119 #if IS_ENABLED(CONFIG_IPV6)
2120 	interval = min_t(unsigned long,
2121 			 NEIGH_VAR(&arp_tbl.parms, DELAY_PROBE_TIME),
2122 			 NEIGH_VAR(&nd_tbl.parms, DELAY_PROBE_TIME));
2123 #else
2124 	interval = NEIGH_VAR(&arp_tbl.parms, DELAY_PROBE_TIME);
2125 #endif
2126 	mlxsw_sp->router->neighs_update.interval = jiffies_to_msecs(interval);
2127 }
2128 
2129 static void mlxsw_sp_router_neigh_ent_ipv4_process(struct mlxsw_sp *mlxsw_sp,
2130 						   char *rauhtd_pl,
2131 						   int ent_index)
2132 {
2133 	struct net_device *dev;
2134 	struct neighbour *n;
2135 	__be32 dipn;
2136 	u32 dip;
2137 	u16 rif;
2138 
2139 	mlxsw_reg_rauhtd_ent_ipv4_unpack(rauhtd_pl, ent_index, &rif, &dip);
2140 
2141 	if (!mlxsw_sp->router->rifs[rif]) {
2142 		dev_err_ratelimited(mlxsw_sp->bus_info->dev, "Incorrect RIF in neighbour entry\n");
2143 		return;
2144 	}
2145 
2146 	dipn = htonl(dip);
2147 	dev = mlxsw_sp->router->rifs[rif]->dev;
2148 	n = neigh_lookup(&arp_tbl, &dipn, dev);
2149 	if (!n)
2150 		return;
2151 
2152 	netdev_dbg(dev, "Updating neighbour with IP=%pI4h\n", &dip);
2153 	neigh_event_send(n, NULL);
2154 	neigh_release(n);
2155 }
2156 
2157 #if IS_ENABLED(CONFIG_IPV6)
2158 static void mlxsw_sp_router_neigh_ent_ipv6_process(struct mlxsw_sp *mlxsw_sp,
2159 						   char *rauhtd_pl,
2160 						   int rec_index)
2161 {
2162 	struct net_device *dev;
2163 	struct neighbour *n;
2164 	struct in6_addr dip;
2165 	u16 rif;
2166 
2167 	mlxsw_reg_rauhtd_ent_ipv6_unpack(rauhtd_pl, rec_index, &rif,
2168 					 (char *) &dip);
2169 
2170 	if (!mlxsw_sp->router->rifs[rif]) {
2171 		dev_err_ratelimited(mlxsw_sp->bus_info->dev, "Incorrect RIF in neighbour entry\n");
2172 		return;
2173 	}
2174 
2175 	dev = mlxsw_sp->router->rifs[rif]->dev;
2176 	n = neigh_lookup(&nd_tbl, &dip, dev);
2177 	if (!n)
2178 		return;
2179 
2180 	netdev_dbg(dev, "Updating neighbour with IP=%pI6c\n", &dip);
2181 	neigh_event_send(n, NULL);
2182 	neigh_release(n);
2183 }
2184 #else
2185 static void mlxsw_sp_router_neigh_ent_ipv6_process(struct mlxsw_sp *mlxsw_sp,
2186 						   char *rauhtd_pl,
2187 						   int rec_index)
2188 {
2189 }
2190 #endif
2191 
2192 static void mlxsw_sp_router_neigh_rec_ipv4_process(struct mlxsw_sp *mlxsw_sp,
2193 						   char *rauhtd_pl,
2194 						   int rec_index)
2195 {
2196 	u8 num_entries;
2197 	int i;
2198 
2199 	num_entries = mlxsw_reg_rauhtd_ipv4_rec_num_entries_get(rauhtd_pl,
2200 								rec_index);
2201 	/* Hardware starts counting at 0, so add 1. */
2202 	num_entries++;
2203 
2204 	/* Each record consists of several neighbour entries. */
2205 	for (i = 0; i < num_entries; i++) {
2206 		int ent_index;
2207 
2208 		ent_index = rec_index * MLXSW_REG_RAUHTD_IPV4_ENT_PER_REC + i;
2209 		mlxsw_sp_router_neigh_ent_ipv4_process(mlxsw_sp, rauhtd_pl,
2210 						       ent_index);
2211 	}
2212 
2213 }
2214 
2215 static void mlxsw_sp_router_neigh_rec_ipv6_process(struct mlxsw_sp *mlxsw_sp,
2216 						   char *rauhtd_pl,
2217 						   int rec_index)
2218 {
2219 	/* One record contains one entry. */
2220 	mlxsw_sp_router_neigh_ent_ipv6_process(mlxsw_sp, rauhtd_pl,
2221 					       rec_index);
2222 }
2223 
2224 static void mlxsw_sp_router_neigh_rec_process(struct mlxsw_sp *mlxsw_sp,
2225 					      char *rauhtd_pl, int rec_index)
2226 {
2227 	switch (mlxsw_reg_rauhtd_rec_type_get(rauhtd_pl, rec_index)) {
2228 	case MLXSW_REG_RAUHTD_TYPE_IPV4:
2229 		mlxsw_sp_router_neigh_rec_ipv4_process(mlxsw_sp, rauhtd_pl,
2230 						       rec_index);
2231 		break;
2232 	case MLXSW_REG_RAUHTD_TYPE_IPV6:
2233 		mlxsw_sp_router_neigh_rec_ipv6_process(mlxsw_sp, rauhtd_pl,
2234 						       rec_index);
2235 		break;
2236 	}
2237 }
2238 
2239 static bool mlxsw_sp_router_rauhtd_is_full(char *rauhtd_pl)
2240 {
2241 	u8 num_rec, last_rec_index, num_entries;
2242 
2243 	num_rec = mlxsw_reg_rauhtd_num_rec_get(rauhtd_pl);
2244 	last_rec_index = num_rec - 1;
2245 
2246 	if (num_rec < MLXSW_REG_RAUHTD_REC_MAX_NUM)
2247 		return false;
2248 	if (mlxsw_reg_rauhtd_rec_type_get(rauhtd_pl, last_rec_index) ==
2249 	    MLXSW_REG_RAUHTD_TYPE_IPV6)
2250 		return true;
2251 
2252 	num_entries = mlxsw_reg_rauhtd_ipv4_rec_num_entries_get(rauhtd_pl,
2253 								last_rec_index);
2254 	if (++num_entries == MLXSW_REG_RAUHTD_IPV4_ENT_PER_REC)
2255 		return true;
2256 	return false;
2257 }
2258 
2259 static int
2260 __mlxsw_sp_router_neighs_update_rauhtd(struct mlxsw_sp *mlxsw_sp,
2261 				       char *rauhtd_pl,
2262 				       enum mlxsw_reg_rauhtd_type type)
2263 {
2264 	int i, num_rec;
2265 	int err;
2266 
2267 	/* Make sure the neighbour's netdev isn't removed in the
2268 	 * process.
2269 	 */
2270 	rtnl_lock();
2271 	do {
2272 		mlxsw_reg_rauhtd_pack(rauhtd_pl, type);
2273 		err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(rauhtd),
2274 				      rauhtd_pl);
2275 		if (err) {
2276 			dev_err_ratelimited(mlxsw_sp->bus_info->dev, "Failed to dump neighbour table\n");
2277 			break;
2278 		}
2279 		num_rec = mlxsw_reg_rauhtd_num_rec_get(rauhtd_pl);
2280 		for (i = 0; i < num_rec; i++)
2281 			mlxsw_sp_router_neigh_rec_process(mlxsw_sp, rauhtd_pl,
2282 							  i);
2283 	} while (mlxsw_sp_router_rauhtd_is_full(rauhtd_pl));
2284 	rtnl_unlock();
2285 
2286 	return err;
2287 }
2288 
2289 static int mlxsw_sp_router_neighs_update_rauhtd(struct mlxsw_sp *mlxsw_sp)
2290 {
2291 	enum mlxsw_reg_rauhtd_type type;
2292 	char *rauhtd_pl;
2293 	int err;
2294 
2295 	rauhtd_pl = kmalloc(MLXSW_REG_RAUHTD_LEN, GFP_KERNEL);
2296 	if (!rauhtd_pl)
2297 		return -ENOMEM;
2298 
2299 	type = MLXSW_REG_RAUHTD_TYPE_IPV4;
2300 	err = __mlxsw_sp_router_neighs_update_rauhtd(mlxsw_sp, rauhtd_pl, type);
2301 	if (err)
2302 		goto out;
2303 
2304 	type = MLXSW_REG_RAUHTD_TYPE_IPV6;
2305 	err = __mlxsw_sp_router_neighs_update_rauhtd(mlxsw_sp, rauhtd_pl, type);
2306 out:
2307 	kfree(rauhtd_pl);
2308 	return err;
2309 }
2310 
2311 static void mlxsw_sp_router_neighs_update_nh(struct mlxsw_sp *mlxsw_sp)
2312 {
2313 	struct mlxsw_sp_neigh_entry *neigh_entry;
2314 
2315 	/* Take RTNL mutex here to prevent lists from changes */
2316 	rtnl_lock();
2317 	list_for_each_entry(neigh_entry, &mlxsw_sp->router->nexthop_neighs_list,
2318 			    nexthop_neighs_list_node)
2319 		/* If this neigh have nexthops, make the kernel think this neigh
2320 		 * is active regardless of the traffic.
2321 		 */
2322 		neigh_event_send(neigh_entry->key.n, NULL);
2323 	rtnl_unlock();
2324 }
2325 
2326 static void
2327 mlxsw_sp_router_neighs_update_work_schedule(struct mlxsw_sp *mlxsw_sp)
2328 {
2329 	unsigned long interval = mlxsw_sp->router->neighs_update.interval;
2330 
2331 	mlxsw_core_schedule_dw(&mlxsw_sp->router->neighs_update.dw,
2332 			       msecs_to_jiffies(interval));
2333 }
2334 
2335 static void mlxsw_sp_router_neighs_update_work(struct work_struct *work)
2336 {
2337 	struct mlxsw_sp_router *router;
2338 	int err;
2339 
2340 	router = container_of(work, struct mlxsw_sp_router,
2341 			      neighs_update.dw.work);
2342 	err = mlxsw_sp_router_neighs_update_rauhtd(router->mlxsw_sp);
2343 	if (err)
2344 		dev_err(router->mlxsw_sp->bus_info->dev, "Could not update kernel for neigh activity");
2345 
2346 	mlxsw_sp_router_neighs_update_nh(router->mlxsw_sp);
2347 
2348 	mlxsw_sp_router_neighs_update_work_schedule(router->mlxsw_sp);
2349 }
2350 
2351 static void mlxsw_sp_router_probe_unresolved_nexthops(struct work_struct *work)
2352 {
2353 	struct mlxsw_sp_neigh_entry *neigh_entry;
2354 	struct mlxsw_sp_router *router;
2355 
2356 	router = container_of(work, struct mlxsw_sp_router,
2357 			      nexthop_probe_dw.work);
2358 	/* Iterate over nexthop neighbours, find those who are unresolved and
2359 	 * send arp on them. This solves the chicken-egg problem when
2360 	 * the nexthop wouldn't get offloaded until the neighbor is resolved
2361 	 * but it wouldn't get resolved ever in case traffic is flowing in HW
2362 	 * using different nexthop.
2363 	 *
2364 	 * Take RTNL mutex here to prevent lists from changes.
2365 	 */
2366 	rtnl_lock();
2367 	list_for_each_entry(neigh_entry, &router->nexthop_neighs_list,
2368 			    nexthop_neighs_list_node)
2369 		if (!neigh_entry->connected)
2370 			neigh_event_send(neigh_entry->key.n, NULL);
2371 	rtnl_unlock();
2372 
2373 	mlxsw_core_schedule_dw(&router->nexthop_probe_dw,
2374 			       MLXSW_SP_UNRESOLVED_NH_PROBE_INTERVAL);
2375 }
2376 
2377 static void
2378 mlxsw_sp_nexthop_neigh_update(struct mlxsw_sp *mlxsw_sp,
2379 			      struct mlxsw_sp_neigh_entry *neigh_entry,
2380 			      bool removing, bool dead);
2381 
2382 static enum mlxsw_reg_rauht_op mlxsw_sp_rauht_op(bool adding)
2383 {
2384 	return adding ? MLXSW_REG_RAUHT_OP_WRITE_ADD :
2385 			MLXSW_REG_RAUHT_OP_WRITE_DELETE;
2386 }
2387 
2388 static int
2389 mlxsw_sp_router_neigh_entry_op4(struct mlxsw_sp *mlxsw_sp,
2390 				struct mlxsw_sp_neigh_entry *neigh_entry,
2391 				enum mlxsw_reg_rauht_op op)
2392 {
2393 	struct neighbour *n = neigh_entry->key.n;
2394 	u32 dip = ntohl(*((__be32 *) n->primary_key));
2395 	char rauht_pl[MLXSW_REG_RAUHT_LEN];
2396 
2397 	mlxsw_reg_rauht_pack4(rauht_pl, op, neigh_entry->rif, neigh_entry->ha,
2398 			      dip);
2399 	if (neigh_entry->counter_valid)
2400 		mlxsw_reg_rauht_pack_counter(rauht_pl,
2401 					     neigh_entry->counter_index);
2402 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rauht), rauht_pl);
2403 }
2404 
2405 static int
2406 mlxsw_sp_router_neigh_entry_op6(struct mlxsw_sp *mlxsw_sp,
2407 				struct mlxsw_sp_neigh_entry *neigh_entry,
2408 				enum mlxsw_reg_rauht_op op)
2409 {
2410 	struct neighbour *n = neigh_entry->key.n;
2411 	char rauht_pl[MLXSW_REG_RAUHT_LEN];
2412 	const char *dip = n->primary_key;
2413 
2414 	mlxsw_reg_rauht_pack6(rauht_pl, op, neigh_entry->rif, neigh_entry->ha,
2415 			      dip);
2416 	if (neigh_entry->counter_valid)
2417 		mlxsw_reg_rauht_pack_counter(rauht_pl,
2418 					     neigh_entry->counter_index);
2419 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rauht), rauht_pl);
2420 }
2421 
2422 bool mlxsw_sp_neigh_ipv6_ignore(struct mlxsw_sp_neigh_entry *neigh_entry)
2423 {
2424 	struct neighbour *n = neigh_entry->key.n;
2425 
2426 	/* Packets with a link-local destination address are trapped
2427 	 * after LPM lookup and never reach the neighbour table, so
2428 	 * there is no need to program such neighbours to the device.
2429 	 */
2430 	if (ipv6_addr_type((struct in6_addr *) &n->primary_key) &
2431 	    IPV6_ADDR_LINKLOCAL)
2432 		return true;
2433 	return false;
2434 }
2435 
2436 static void
2437 mlxsw_sp_neigh_entry_update(struct mlxsw_sp *mlxsw_sp,
2438 			    struct mlxsw_sp_neigh_entry *neigh_entry,
2439 			    bool adding)
2440 {
2441 	enum mlxsw_reg_rauht_op op = mlxsw_sp_rauht_op(adding);
2442 	int err;
2443 
2444 	if (!adding && !neigh_entry->connected)
2445 		return;
2446 	neigh_entry->connected = adding;
2447 	if (neigh_entry->key.n->tbl->family == AF_INET) {
2448 		err = mlxsw_sp_router_neigh_entry_op4(mlxsw_sp, neigh_entry,
2449 						      op);
2450 		if (err)
2451 			return;
2452 	} else if (neigh_entry->key.n->tbl->family == AF_INET6) {
2453 		if (mlxsw_sp_neigh_ipv6_ignore(neigh_entry))
2454 			return;
2455 		err = mlxsw_sp_router_neigh_entry_op6(mlxsw_sp, neigh_entry,
2456 						      op);
2457 		if (err)
2458 			return;
2459 	} else {
2460 		WARN_ON_ONCE(1);
2461 		return;
2462 	}
2463 
2464 	if (adding)
2465 		neigh_entry->key.n->flags |= NTF_OFFLOADED;
2466 	else
2467 		neigh_entry->key.n->flags &= ~NTF_OFFLOADED;
2468 }
2469 
2470 void
2471 mlxsw_sp_neigh_entry_counter_update(struct mlxsw_sp *mlxsw_sp,
2472 				    struct mlxsw_sp_neigh_entry *neigh_entry,
2473 				    bool adding)
2474 {
2475 	if (adding)
2476 		mlxsw_sp_neigh_counter_alloc(mlxsw_sp, neigh_entry);
2477 	else
2478 		mlxsw_sp_neigh_counter_free(mlxsw_sp, neigh_entry);
2479 	mlxsw_sp_neigh_entry_update(mlxsw_sp, neigh_entry, true);
2480 }
2481 
2482 struct mlxsw_sp_netevent_work {
2483 	struct work_struct work;
2484 	struct mlxsw_sp *mlxsw_sp;
2485 	struct neighbour *n;
2486 };
2487 
2488 static void mlxsw_sp_router_neigh_event_work(struct work_struct *work)
2489 {
2490 	struct mlxsw_sp_netevent_work *net_work =
2491 		container_of(work, struct mlxsw_sp_netevent_work, work);
2492 	struct mlxsw_sp *mlxsw_sp = net_work->mlxsw_sp;
2493 	struct mlxsw_sp_neigh_entry *neigh_entry;
2494 	struct neighbour *n = net_work->n;
2495 	unsigned char ha[ETH_ALEN];
2496 	bool entry_connected;
2497 	u8 nud_state, dead;
2498 
2499 	/* If these parameters are changed after we release the lock,
2500 	 * then we are guaranteed to receive another event letting us
2501 	 * know about it.
2502 	 */
2503 	read_lock_bh(&n->lock);
2504 	memcpy(ha, n->ha, ETH_ALEN);
2505 	nud_state = n->nud_state;
2506 	dead = n->dead;
2507 	read_unlock_bh(&n->lock);
2508 
2509 	rtnl_lock();
2510 	mlxsw_sp_span_respin(mlxsw_sp);
2511 
2512 	entry_connected = nud_state & NUD_VALID && !dead;
2513 	neigh_entry = mlxsw_sp_neigh_entry_lookup(mlxsw_sp, n);
2514 	if (!entry_connected && !neigh_entry)
2515 		goto out;
2516 	if (!neigh_entry) {
2517 		neigh_entry = mlxsw_sp_neigh_entry_create(mlxsw_sp, n);
2518 		if (IS_ERR(neigh_entry))
2519 			goto out;
2520 	}
2521 
2522 	memcpy(neigh_entry->ha, ha, ETH_ALEN);
2523 	mlxsw_sp_neigh_entry_update(mlxsw_sp, neigh_entry, entry_connected);
2524 	mlxsw_sp_nexthop_neigh_update(mlxsw_sp, neigh_entry, !entry_connected,
2525 				      dead);
2526 
2527 	if (!neigh_entry->connected && list_empty(&neigh_entry->nexthop_list))
2528 		mlxsw_sp_neigh_entry_destroy(mlxsw_sp, neigh_entry);
2529 
2530 out:
2531 	rtnl_unlock();
2532 	neigh_release(n);
2533 	kfree(net_work);
2534 }
2535 
2536 static int mlxsw_sp_mp_hash_init(struct mlxsw_sp *mlxsw_sp);
2537 
2538 static void mlxsw_sp_router_mp_hash_event_work(struct work_struct *work)
2539 {
2540 	struct mlxsw_sp_netevent_work *net_work =
2541 		container_of(work, struct mlxsw_sp_netevent_work, work);
2542 	struct mlxsw_sp *mlxsw_sp = net_work->mlxsw_sp;
2543 
2544 	mlxsw_sp_mp_hash_init(mlxsw_sp);
2545 	kfree(net_work);
2546 }
2547 
2548 static int __mlxsw_sp_router_init(struct mlxsw_sp *mlxsw_sp);
2549 
2550 static void mlxsw_sp_router_update_priority_work(struct work_struct *work)
2551 {
2552 	struct mlxsw_sp_netevent_work *net_work =
2553 		container_of(work, struct mlxsw_sp_netevent_work, work);
2554 	struct mlxsw_sp *mlxsw_sp = net_work->mlxsw_sp;
2555 
2556 	__mlxsw_sp_router_init(mlxsw_sp);
2557 	kfree(net_work);
2558 }
2559 
2560 static int mlxsw_sp_router_schedule_work(struct net *net,
2561 					 struct notifier_block *nb,
2562 					 void (*cb)(struct work_struct *))
2563 {
2564 	struct mlxsw_sp_netevent_work *net_work;
2565 	struct mlxsw_sp_router *router;
2566 
2567 	router = container_of(nb, struct mlxsw_sp_router, netevent_nb);
2568 	if (!net_eq(net, mlxsw_sp_net(router->mlxsw_sp)))
2569 		return NOTIFY_DONE;
2570 
2571 	net_work = kzalloc(sizeof(*net_work), GFP_ATOMIC);
2572 	if (!net_work)
2573 		return NOTIFY_BAD;
2574 
2575 	INIT_WORK(&net_work->work, cb);
2576 	net_work->mlxsw_sp = router->mlxsw_sp;
2577 	mlxsw_core_schedule_work(&net_work->work);
2578 	return NOTIFY_DONE;
2579 }
2580 
2581 static int mlxsw_sp_router_netevent_event(struct notifier_block *nb,
2582 					  unsigned long event, void *ptr)
2583 {
2584 	struct mlxsw_sp_netevent_work *net_work;
2585 	struct mlxsw_sp_port *mlxsw_sp_port;
2586 	struct mlxsw_sp *mlxsw_sp;
2587 	unsigned long interval;
2588 	struct neigh_parms *p;
2589 	struct neighbour *n;
2590 
2591 	switch (event) {
2592 	case NETEVENT_DELAY_PROBE_TIME_UPDATE:
2593 		p = ptr;
2594 
2595 		/* We don't care about changes in the default table. */
2596 		if (!p->dev || (p->tbl->family != AF_INET &&
2597 				p->tbl->family != AF_INET6))
2598 			return NOTIFY_DONE;
2599 
2600 		/* We are in atomic context and can't take RTNL mutex,
2601 		 * so use RCU variant to walk the device chain.
2602 		 */
2603 		mlxsw_sp_port = mlxsw_sp_port_lower_dev_hold(p->dev);
2604 		if (!mlxsw_sp_port)
2605 			return NOTIFY_DONE;
2606 
2607 		mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
2608 		interval = jiffies_to_msecs(NEIGH_VAR(p, DELAY_PROBE_TIME));
2609 		mlxsw_sp->router->neighs_update.interval = interval;
2610 
2611 		mlxsw_sp_port_dev_put(mlxsw_sp_port);
2612 		break;
2613 	case NETEVENT_NEIGH_UPDATE:
2614 		n = ptr;
2615 
2616 		if (n->tbl->family != AF_INET && n->tbl->family != AF_INET6)
2617 			return NOTIFY_DONE;
2618 
2619 		mlxsw_sp_port = mlxsw_sp_port_lower_dev_hold(n->dev);
2620 		if (!mlxsw_sp_port)
2621 			return NOTIFY_DONE;
2622 
2623 		net_work = kzalloc(sizeof(*net_work), GFP_ATOMIC);
2624 		if (!net_work) {
2625 			mlxsw_sp_port_dev_put(mlxsw_sp_port);
2626 			return NOTIFY_BAD;
2627 		}
2628 
2629 		INIT_WORK(&net_work->work, mlxsw_sp_router_neigh_event_work);
2630 		net_work->mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
2631 		net_work->n = n;
2632 
2633 		/* Take a reference to ensure the neighbour won't be
2634 		 * destructed until we drop the reference in delayed
2635 		 * work.
2636 		 */
2637 		neigh_clone(n);
2638 		mlxsw_core_schedule_work(&net_work->work);
2639 		mlxsw_sp_port_dev_put(mlxsw_sp_port);
2640 		break;
2641 	case NETEVENT_IPV4_MPATH_HASH_UPDATE:
2642 	case NETEVENT_IPV6_MPATH_HASH_UPDATE:
2643 		return mlxsw_sp_router_schedule_work(ptr, nb,
2644 				mlxsw_sp_router_mp_hash_event_work);
2645 
2646 	case NETEVENT_IPV4_FWD_UPDATE_PRIORITY_UPDATE:
2647 		return mlxsw_sp_router_schedule_work(ptr, nb,
2648 				mlxsw_sp_router_update_priority_work);
2649 	}
2650 
2651 	return NOTIFY_DONE;
2652 }
2653 
2654 static int mlxsw_sp_neigh_init(struct mlxsw_sp *mlxsw_sp)
2655 {
2656 	int err;
2657 
2658 	err = rhashtable_init(&mlxsw_sp->router->neigh_ht,
2659 			      &mlxsw_sp_neigh_ht_params);
2660 	if (err)
2661 		return err;
2662 
2663 	/* Initialize the polling interval according to the default
2664 	 * table.
2665 	 */
2666 	mlxsw_sp_router_neighs_update_interval_init(mlxsw_sp);
2667 
2668 	/* Create the delayed works for the activity_update */
2669 	INIT_DELAYED_WORK(&mlxsw_sp->router->neighs_update.dw,
2670 			  mlxsw_sp_router_neighs_update_work);
2671 	INIT_DELAYED_WORK(&mlxsw_sp->router->nexthop_probe_dw,
2672 			  mlxsw_sp_router_probe_unresolved_nexthops);
2673 	mlxsw_core_schedule_dw(&mlxsw_sp->router->neighs_update.dw, 0);
2674 	mlxsw_core_schedule_dw(&mlxsw_sp->router->nexthop_probe_dw, 0);
2675 	return 0;
2676 }
2677 
2678 static void mlxsw_sp_neigh_fini(struct mlxsw_sp *mlxsw_sp)
2679 {
2680 	cancel_delayed_work_sync(&mlxsw_sp->router->neighs_update.dw);
2681 	cancel_delayed_work_sync(&mlxsw_sp->router->nexthop_probe_dw);
2682 	rhashtable_destroy(&mlxsw_sp->router->neigh_ht);
2683 }
2684 
2685 static void mlxsw_sp_neigh_rif_gone_sync(struct mlxsw_sp *mlxsw_sp,
2686 					 struct mlxsw_sp_rif *rif)
2687 {
2688 	struct mlxsw_sp_neigh_entry *neigh_entry, *tmp;
2689 
2690 	list_for_each_entry_safe(neigh_entry, tmp, &rif->neigh_list,
2691 				 rif_list_node) {
2692 		mlxsw_sp_neigh_entry_update(mlxsw_sp, neigh_entry, false);
2693 		mlxsw_sp_neigh_entry_destroy(mlxsw_sp, neigh_entry);
2694 	}
2695 }
2696 
2697 enum mlxsw_sp_nexthop_type {
2698 	MLXSW_SP_NEXTHOP_TYPE_ETH,
2699 	MLXSW_SP_NEXTHOP_TYPE_IPIP,
2700 };
2701 
2702 struct mlxsw_sp_nexthop_key {
2703 	struct fib_nh *fib_nh;
2704 };
2705 
2706 struct mlxsw_sp_nexthop {
2707 	struct list_head neigh_list_node; /* member of neigh entry list */
2708 	struct list_head rif_list_node;
2709 	struct list_head router_list_node;
2710 	struct mlxsw_sp_nexthop_group *nh_grp; /* pointer back to the group
2711 						* this belongs to
2712 						*/
2713 	struct rhash_head ht_node;
2714 	struct mlxsw_sp_nexthop_key key;
2715 	unsigned char gw_addr[sizeof(struct in6_addr)];
2716 	int ifindex;
2717 	int nh_weight;
2718 	int norm_nh_weight;
2719 	int num_adj_entries;
2720 	struct mlxsw_sp_rif *rif;
2721 	u8 should_offload:1, /* set indicates this neigh is connected and
2722 			      * should be put to KVD linear area of this group.
2723 			      */
2724 	   offloaded:1, /* set in case the neigh is actually put into
2725 			 * KVD linear area of this group.
2726 			 */
2727 	   update:1; /* set indicates that MAC of this neigh should be
2728 		      * updated in HW
2729 		      */
2730 	enum mlxsw_sp_nexthop_type type;
2731 	union {
2732 		struct mlxsw_sp_neigh_entry *neigh_entry;
2733 		struct mlxsw_sp_ipip_entry *ipip_entry;
2734 	};
2735 	unsigned int counter_index;
2736 	bool counter_valid;
2737 };
2738 
2739 struct mlxsw_sp_nexthop_group {
2740 	void *priv;
2741 	struct rhash_head ht_node;
2742 	struct list_head fib_list; /* list of fib entries that use this group */
2743 	struct neigh_table *neigh_tbl;
2744 	u8 adj_index_valid:1,
2745 	   gateway:1; /* routes using the group use a gateway */
2746 	u32 adj_index;
2747 	u16 ecmp_size;
2748 	u16 count;
2749 	int sum_norm_weight;
2750 	struct mlxsw_sp_nexthop nexthops[0];
2751 #define nh_rif	nexthops[0].rif
2752 };
2753 
2754 void mlxsw_sp_nexthop_counter_alloc(struct mlxsw_sp *mlxsw_sp,
2755 				    struct mlxsw_sp_nexthop *nh)
2756 {
2757 	struct devlink *devlink;
2758 
2759 	devlink = priv_to_devlink(mlxsw_sp->core);
2760 	if (!devlink_dpipe_table_counter_enabled(devlink,
2761 						 MLXSW_SP_DPIPE_TABLE_NAME_ADJ))
2762 		return;
2763 
2764 	if (mlxsw_sp_flow_counter_alloc(mlxsw_sp, &nh->counter_index))
2765 		return;
2766 
2767 	nh->counter_valid = true;
2768 }
2769 
2770 void mlxsw_sp_nexthop_counter_free(struct mlxsw_sp *mlxsw_sp,
2771 				   struct mlxsw_sp_nexthop *nh)
2772 {
2773 	if (!nh->counter_valid)
2774 		return;
2775 	mlxsw_sp_flow_counter_free(mlxsw_sp, nh->counter_index);
2776 	nh->counter_valid = false;
2777 }
2778 
2779 int mlxsw_sp_nexthop_counter_get(struct mlxsw_sp *mlxsw_sp,
2780 				 struct mlxsw_sp_nexthop *nh, u64 *p_counter)
2781 {
2782 	if (!nh->counter_valid)
2783 		return -EINVAL;
2784 
2785 	return mlxsw_sp_flow_counter_get(mlxsw_sp, nh->counter_index,
2786 					 p_counter, NULL);
2787 }
2788 
2789 struct mlxsw_sp_nexthop *mlxsw_sp_nexthop_next(struct mlxsw_sp_router *router,
2790 					       struct mlxsw_sp_nexthop *nh)
2791 {
2792 	if (!nh) {
2793 		if (list_empty(&router->nexthop_list))
2794 			return NULL;
2795 		else
2796 			return list_first_entry(&router->nexthop_list,
2797 						typeof(*nh), router_list_node);
2798 	}
2799 	if (list_is_last(&nh->router_list_node, &router->nexthop_list))
2800 		return NULL;
2801 	return list_next_entry(nh, router_list_node);
2802 }
2803 
2804 bool mlxsw_sp_nexthop_offload(struct mlxsw_sp_nexthop *nh)
2805 {
2806 	return nh->offloaded;
2807 }
2808 
2809 unsigned char *mlxsw_sp_nexthop_ha(struct mlxsw_sp_nexthop *nh)
2810 {
2811 	if (!nh->offloaded)
2812 		return NULL;
2813 	return nh->neigh_entry->ha;
2814 }
2815 
2816 int mlxsw_sp_nexthop_indexes(struct mlxsw_sp_nexthop *nh, u32 *p_adj_index,
2817 			     u32 *p_adj_size, u32 *p_adj_hash_index)
2818 {
2819 	struct mlxsw_sp_nexthop_group *nh_grp = nh->nh_grp;
2820 	u32 adj_hash_index = 0;
2821 	int i;
2822 
2823 	if (!nh->offloaded || !nh_grp->adj_index_valid)
2824 		return -EINVAL;
2825 
2826 	*p_adj_index = nh_grp->adj_index;
2827 	*p_adj_size = nh_grp->ecmp_size;
2828 
2829 	for (i = 0; i < nh_grp->count; i++) {
2830 		struct mlxsw_sp_nexthop *nh_iter = &nh_grp->nexthops[i];
2831 
2832 		if (nh_iter == nh)
2833 			break;
2834 		if (nh_iter->offloaded)
2835 			adj_hash_index += nh_iter->num_adj_entries;
2836 	}
2837 
2838 	*p_adj_hash_index = adj_hash_index;
2839 	return 0;
2840 }
2841 
2842 struct mlxsw_sp_rif *mlxsw_sp_nexthop_rif(struct mlxsw_sp_nexthop *nh)
2843 {
2844 	return nh->rif;
2845 }
2846 
2847 bool mlxsw_sp_nexthop_group_has_ipip(struct mlxsw_sp_nexthop *nh)
2848 {
2849 	struct mlxsw_sp_nexthop_group *nh_grp = nh->nh_grp;
2850 	int i;
2851 
2852 	for (i = 0; i < nh_grp->count; i++) {
2853 		struct mlxsw_sp_nexthop *nh_iter = &nh_grp->nexthops[i];
2854 
2855 		if (nh_iter->type == MLXSW_SP_NEXTHOP_TYPE_IPIP)
2856 			return true;
2857 	}
2858 	return false;
2859 }
2860 
2861 static struct fib_info *
2862 mlxsw_sp_nexthop4_group_fi(const struct mlxsw_sp_nexthop_group *nh_grp)
2863 {
2864 	return nh_grp->priv;
2865 }
2866 
2867 struct mlxsw_sp_nexthop_group_cmp_arg {
2868 	enum mlxsw_sp_l3proto proto;
2869 	union {
2870 		struct fib_info *fi;
2871 		struct mlxsw_sp_fib6_entry *fib6_entry;
2872 	};
2873 };
2874 
2875 static bool
2876 mlxsw_sp_nexthop6_group_has_nexthop(const struct mlxsw_sp_nexthop_group *nh_grp,
2877 				    const struct in6_addr *gw, int ifindex,
2878 				    int weight)
2879 {
2880 	int i;
2881 
2882 	for (i = 0; i < nh_grp->count; i++) {
2883 		const struct mlxsw_sp_nexthop *nh;
2884 
2885 		nh = &nh_grp->nexthops[i];
2886 		if (nh->ifindex == ifindex && nh->nh_weight == weight &&
2887 		    ipv6_addr_equal(gw, (struct in6_addr *) nh->gw_addr))
2888 			return true;
2889 	}
2890 
2891 	return false;
2892 }
2893 
2894 static bool
2895 mlxsw_sp_nexthop6_group_cmp(const struct mlxsw_sp_nexthop_group *nh_grp,
2896 			    const struct mlxsw_sp_fib6_entry *fib6_entry)
2897 {
2898 	struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
2899 
2900 	if (nh_grp->count != fib6_entry->nrt6)
2901 		return false;
2902 
2903 	list_for_each_entry(mlxsw_sp_rt6, &fib6_entry->rt6_list, list) {
2904 		struct fib6_nh *fib6_nh = mlxsw_sp_rt6->rt->fib6_nh;
2905 		struct in6_addr *gw;
2906 		int ifindex, weight;
2907 
2908 		ifindex = fib6_nh->fib_nh_dev->ifindex;
2909 		weight = fib6_nh->fib_nh_weight;
2910 		gw = &fib6_nh->fib_nh_gw6;
2911 		if (!mlxsw_sp_nexthop6_group_has_nexthop(nh_grp, gw, ifindex,
2912 							 weight))
2913 			return false;
2914 	}
2915 
2916 	return true;
2917 }
2918 
2919 static int
2920 mlxsw_sp_nexthop_group_cmp(struct rhashtable_compare_arg *arg, const void *ptr)
2921 {
2922 	const struct mlxsw_sp_nexthop_group_cmp_arg *cmp_arg = arg->key;
2923 	const struct mlxsw_sp_nexthop_group *nh_grp = ptr;
2924 
2925 	switch (cmp_arg->proto) {
2926 	case MLXSW_SP_L3_PROTO_IPV4:
2927 		return cmp_arg->fi != mlxsw_sp_nexthop4_group_fi(nh_grp);
2928 	case MLXSW_SP_L3_PROTO_IPV6:
2929 		return !mlxsw_sp_nexthop6_group_cmp(nh_grp,
2930 						    cmp_arg->fib6_entry);
2931 	default:
2932 		WARN_ON(1);
2933 		return 1;
2934 	}
2935 }
2936 
2937 static int
2938 mlxsw_sp_nexthop_group_type(const struct mlxsw_sp_nexthop_group *nh_grp)
2939 {
2940 	return nh_grp->neigh_tbl->family;
2941 }
2942 
2943 static u32 mlxsw_sp_nexthop_group_hash_obj(const void *data, u32 len, u32 seed)
2944 {
2945 	const struct mlxsw_sp_nexthop_group *nh_grp = data;
2946 	const struct mlxsw_sp_nexthop *nh;
2947 	struct fib_info *fi;
2948 	unsigned int val;
2949 	int i;
2950 
2951 	switch (mlxsw_sp_nexthop_group_type(nh_grp)) {
2952 	case AF_INET:
2953 		fi = mlxsw_sp_nexthop4_group_fi(nh_grp);
2954 		return jhash(&fi, sizeof(fi), seed);
2955 	case AF_INET6:
2956 		val = nh_grp->count;
2957 		for (i = 0; i < nh_grp->count; i++) {
2958 			nh = &nh_grp->nexthops[i];
2959 			val ^= jhash(&nh->ifindex, sizeof(nh->ifindex), seed);
2960 		}
2961 		return jhash(&val, sizeof(val), seed);
2962 	default:
2963 		WARN_ON(1);
2964 		return 0;
2965 	}
2966 }
2967 
2968 static u32
2969 mlxsw_sp_nexthop6_group_hash(struct mlxsw_sp_fib6_entry *fib6_entry, u32 seed)
2970 {
2971 	unsigned int val = fib6_entry->nrt6;
2972 	struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
2973 	struct net_device *dev;
2974 
2975 	list_for_each_entry(mlxsw_sp_rt6, &fib6_entry->rt6_list, list) {
2976 		dev = mlxsw_sp_rt6->rt->fib6_nh->fib_nh_dev;
2977 		val ^= jhash(&dev->ifindex, sizeof(dev->ifindex), seed);
2978 	}
2979 
2980 	return jhash(&val, sizeof(val), seed);
2981 }
2982 
2983 static u32
2984 mlxsw_sp_nexthop_group_hash(const void *data, u32 len, u32 seed)
2985 {
2986 	const struct mlxsw_sp_nexthop_group_cmp_arg *cmp_arg = data;
2987 
2988 	switch (cmp_arg->proto) {
2989 	case MLXSW_SP_L3_PROTO_IPV4:
2990 		return jhash(&cmp_arg->fi, sizeof(cmp_arg->fi), seed);
2991 	case MLXSW_SP_L3_PROTO_IPV6:
2992 		return mlxsw_sp_nexthop6_group_hash(cmp_arg->fib6_entry, seed);
2993 	default:
2994 		WARN_ON(1);
2995 		return 0;
2996 	}
2997 }
2998 
2999 static const struct rhashtable_params mlxsw_sp_nexthop_group_ht_params = {
3000 	.head_offset = offsetof(struct mlxsw_sp_nexthop_group, ht_node),
3001 	.hashfn	     = mlxsw_sp_nexthop_group_hash,
3002 	.obj_hashfn  = mlxsw_sp_nexthop_group_hash_obj,
3003 	.obj_cmpfn   = mlxsw_sp_nexthop_group_cmp,
3004 };
3005 
3006 static int mlxsw_sp_nexthop_group_insert(struct mlxsw_sp *mlxsw_sp,
3007 					 struct mlxsw_sp_nexthop_group *nh_grp)
3008 {
3009 	if (mlxsw_sp_nexthop_group_type(nh_grp) == AF_INET6 &&
3010 	    !nh_grp->gateway)
3011 		return 0;
3012 
3013 	return rhashtable_insert_fast(&mlxsw_sp->router->nexthop_group_ht,
3014 				      &nh_grp->ht_node,
3015 				      mlxsw_sp_nexthop_group_ht_params);
3016 }
3017 
3018 static void mlxsw_sp_nexthop_group_remove(struct mlxsw_sp *mlxsw_sp,
3019 					  struct mlxsw_sp_nexthop_group *nh_grp)
3020 {
3021 	if (mlxsw_sp_nexthop_group_type(nh_grp) == AF_INET6 &&
3022 	    !nh_grp->gateway)
3023 		return;
3024 
3025 	rhashtable_remove_fast(&mlxsw_sp->router->nexthop_group_ht,
3026 			       &nh_grp->ht_node,
3027 			       mlxsw_sp_nexthop_group_ht_params);
3028 }
3029 
3030 static struct mlxsw_sp_nexthop_group *
3031 mlxsw_sp_nexthop4_group_lookup(struct mlxsw_sp *mlxsw_sp,
3032 			       struct fib_info *fi)
3033 {
3034 	struct mlxsw_sp_nexthop_group_cmp_arg cmp_arg;
3035 
3036 	cmp_arg.proto = MLXSW_SP_L3_PROTO_IPV4;
3037 	cmp_arg.fi = fi;
3038 	return rhashtable_lookup_fast(&mlxsw_sp->router->nexthop_group_ht,
3039 				      &cmp_arg,
3040 				      mlxsw_sp_nexthop_group_ht_params);
3041 }
3042 
3043 static struct mlxsw_sp_nexthop_group *
3044 mlxsw_sp_nexthop6_group_lookup(struct mlxsw_sp *mlxsw_sp,
3045 			       struct mlxsw_sp_fib6_entry *fib6_entry)
3046 {
3047 	struct mlxsw_sp_nexthop_group_cmp_arg cmp_arg;
3048 
3049 	cmp_arg.proto = MLXSW_SP_L3_PROTO_IPV6;
3050 	cmp_arg.fib6_entry = fib6_entry;
3051 	return rhashtable_lookup_fast(&mlxsw_sp->router->nexthop_group_ht,
3052 				      &cmp_arg,
3053 				      mlxsw_sp_nexthop_group_ht_params);
3054 }
3055 
3056 static const struct rhashtable_params mlxsw_sp_nexthop_ht_params = {
3057 	.key_offset = offsetof(struct mlxsw_sp_nexthop, key),
3058 	.head_offset = offsetof(struct mlxsw_sp_nexthop, ht_node),
3059 	.key_len = sizeof(struct mlxsw_sp_nexthop_key),
3060 };
3061 
3062 static int mlxsw_sp_nexthop_insert(struct mlxsw_sp *mlxsw_sp,
3063 				   struct mlxsw_sp_nexthop *nh)
3064 {
3065 	return rhashtable_insert_fast(&mlxsw_sp->router->nexthop_ht,
3066 				      &nh->ht_node, mlxsw_sp_nexthop_ht_params);
3067 }
3068 
3069 static void mlxsw_sp_nexthop_remove(struct mlxsw_sp *mlxsw_sp,
3070 				    struct mlxsw_sp_nexthop *nh)
3071 {
3072 	rhashtable_remove_fast(&mlxsw_sp->router->nexthop_ht, &nh->ht_node,
3073 			       mlxsw_sp_nexthop_ht_params);
3074 }
3075 
3076 static struct mlxsw_sp_nexthop *
3077 mlxsw_sp_nexthop_lookup(struct mlxsw_sp *mlxsw_sp,
3078 			struct mlxsw_sp_nexthop_key key)
3079 {
3080 	return rhashtable_lookup_fast(&mlxsw_sp->router->nexthop_ht, &key,
3081 				      mlxsw_sp_nexthop_ht_params);
3082 }
3083 
3084 static int mlxsw_sp_adj_index_mass_update_vr(struct mlxsw_sp *mlxsw_sp,
3085 					     const struct mlxsw_sp_fib *fib,
3086 					     u32 adj_index, u16 ecmp_size,
3087 					     u32 new_adj_index,
3088 					     u16 new_ecmp_size)
3089 {
3090 	char raleu_pl[MLXSW_REG_RALEU_LEN];
3091 
3092 	mlxsw_reg_raleu_pack(raleu_pl,
3093 			     (enum mlxsw_reg_ralxx_protocol) fib->proto,
3094 			     fib->vr->id, adj_index, ecmp_size, new_adj_index,
3095 			     new_ecmp_size);
3096 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(raleu), raleu_pl);
3097 }
3098 
3099 static int mlxsw_sp_adj_index_mass_update(struct mlxsw_sp *mlxsw_sp,
3100 					  struct mlxsw_sp_nexthop_group *nh_grp,
3101 					  u32 old_adj_index, u16 old_ecmp_size)
3102 {
3103 	struct mlxsw_sp_fib_entry *fib_entry;
3104 	struct mlxsw_sp_fib *fib = NULL;
3105 	int err;
3106 
3107 	list_for_each_entry(fib_entry, &nh_grp->fib_list, nexthop_group_node) {
3108 		if (fib == fib_entry->fib_node->fib)
3109 			continue;
3110 		fib = fib_entry->fib_node->fib;
3111 		err = mlxsw_sp_adj_index_mass_update_vr(mlxsw_sp, fib,
3112 							old_adj_index,
3113 							old_ecmp_size,
3114 							nh_grp->adj_index,
3115 							nh_grp->ecmp_size);
3116 		if (err)
3117 			return err;
3118 	}
3119 	return 0;
3120 }
3121 
3122 static int __mlxsw_sp_nexthop_update(struct mlxsw_sp *mlxsw_sp, u32 adj_index,
3123 				     struct mlxsw_sp_nexthop *nh)
3124 {
3125 	struct mlxsw_sp_neigh_entry *neigh_entry = nh->neigh_entry;
3126 	char ratr_pl[MLXSW_REG_RATR_LEN];
3127 
3128 	mlxsw_reg_ratr_pack(ratr_pl, MLXSW_REG_RATR_OP_WRITE_WRITE_ENTRY,
3129 			    true, MLXSW_REG_RATR_TYPE_ETHERNET,
3130 			    adj_index, neigh_entry->rif);
3131 	mlxsw_reg_ratr_eth_entry_pack(ratr_pl, neigh_entry->ha);
3132 	if (nh->counter_valid)
3133 		mlxsw_reg_ratr_counter_pack(ratr_pl, nh->counter_index, true);
3134 	else
3135 		mlxsw_reg_ratr_counter_pack(ratr_pl, 0, false);
3136 
3137 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ratr), ratr_pl);
3138 }
3139 
3140 int mlxsw_sp_nexthop_update(struct mlxsw_sp *mlxsw_sp, u32 adj_index,
3141 			    struct mlxsw_sp_nexthop *nh)
3142 {
3143 	int i;
3144 
3145 	for (i = 0; i < nh->num_adj_entries; i++) {
3146 		int err;
3147 
3148 		err = __mlxsw_sp_nexthop_update(mlxsw_sp, adj_index + i, nh);
3149 		if (err)
3150 			return err;
3151 	}
3152 
3153 	return 0;
3154 }
3155 
3156 static int __mlxsw_sp_nexthop_ipip_update(struct mlxsw_sp *mlxsw_sp,
3157 					  u32 adj_index,
3158 					  struct mlxsw_sp_nexthop *nh)
3159 {
3160 	const struct mlxsw_sp_ipip_ops *ipip_ops;
3161 
3162 	ipip_ops = mlxsw_sp->router->ipip_ops_arr[nh->ipip_entry->ipipt];
3163 	return ipip_ops->nexthop_update(mlxsw_sp, adj_index, nh->ipip_entry);
3164 }
3165 
3166 static int mlxsw_sp_nexthop_ipip_update(struct mlxsw_sp *mlxsw_sp,
3167 					u32 adj_index,
3168 					struct mlxsw_sp_nexthop *nh)
3169 {
3170 	int i;
3171 
3172 	for (i = 0; i < nh->num_adj_entries; i++) {
3173 		int err;
3174 
3175 		err = __mlxsw_sp_nexthop_ipip_update(mlxsw_sp, adj_index + i,
3176 						     nh);
3177 		if (err)
3178 			return err;
3179 	}
3180 
3181 	return 0;
3182 }
3183 
3184 static int
3185 mlxsw_sp_nexthop_group_update(struct mlxsw_sp *mlxsw_sp,
3186 			      struct mlxsw_sp_nexthop_group *nh_grp,
3187 			      bool reallocate)
3188 {
3189 	u32 adj_index = nh_grp->adj_index; /* base */
3190 	struct mlxsw_sp_nexthop *nh;
3191 	int i;
3192 	int err;
3193 
3194 	for (i = 0; i < nh_grp->count; i++) {
3195 		nh = &nh_grp->nexthops[i];
3196 
3197 		if (!nh->should_offload) {
3198 			nh->offloaded = 0;
3199 			continue;
3200 		}
3201 
3202 		if (nh->update || reallocate) {
3203 			switch (nh->type) {
3204 			case MLXSW_SP_NEXTHOP_TYPE_ETH:
3205 				err = mlxsw_sp_nexthop_update
3206 					    (mlxsw_sp, adj_index, nh);
3207 				break;
3208 			case MLXSW_SP_NEXTHOP_TYPE_IPIP:
3209 				err = mlxsw_sp_nexthop_ipip_update
3210 					    (mlxsw_sp, adj_index, nh);
3211 				break;
3212 			}
3213 			if (err)
3214 				return err;
3215 			nh->update = 0;
3216 			nh->offloaded = 1;
3217 		}
3218 		adj_index += nh->num_adj_entries;
3219 	}
3220 	return 0;
3221 }
3222 
3223 static int
3224 mlxsw_sp_nexthop_fib_entries_update(struct mlxsw_sp *mlxsw_sp,
3225 				    struct mlxsw_sp_nexthop_group *nh_grp)
3226 {
3227 	struct mlxsw_sp_fib_entry *fib_entry;
3228 	int err;
3229 
3230 	list_for_each_entry(fib_entry, &nh_grp->fib_list, nexthop_group_node) {
3231 		err = mlxsw_sp_fib_entry_update(mlxsw_sp, fib_entry);
3232 		if (err)
3233 			return err;
3234 	}
3235 	return 0;
3236 }
3237 
3238 static void mlxsw_sp_adj_grp_size_round_up(u16 *p_adj_grp_size)
3239 {
3240 	/* Valid sizes for an adjacency group are:
3241 	 * 1-64, 512, 1024, 2048 and 4096.
3242 	 */
3243 	if (*p_adj_grp_size <= 64)
3244 		return;
3245 	else if (*p_adj_grp_size <= 512)
3246 		*p_adj_grp_size = 512;
3247 	else if (*p_adj_grp_size <= 1024)
3248 		*p_adj_grp_size = 1024;
3249 	else if (*p_adj_grp_size <= 2048)
3250 		*p_adj_grp_size = 2048;
3251 	else
3252 		*p_adj_grp_size = 4096;
3253 }
3254 
3255 static void mlxsw_sp_adj_grp_size_round_down(u16 *p_adj_grp_size,
3256 					     unsigned int alloc_size)
3257 {
3258 	if (alloc_size >= 4096)
3259 		*p_adj_grp_size = 4096;
3260 	else if (alloc_size >= 2048)
3261 		*p_adj_grp_size = 2048;
3262 	else if (alloc_size >= 1024)
3263 		*p_adj_grp_size = 1024;
3264 	else if (alloc_size >= 512)
3265 		*p_adj_grp_size = 512;
3266 }
3267 
3268 static int mlxsw_sp_fix_adj_grp_size(struct mlxsw_sp *mlxsw_sp,
3269 				     u16 *p_adj_grp_size)
3270 {
3271 	unsigned int alloc_size;
3272 	int err;
3273 
3274 	/* Round up the requested group size to the next size supported
3275 	 * by the device and make sure the request can be satisfied.
3276 	 */
3277 	mlxsw_sp_adj_grp_size_round_up(p_adj_grp_size);
3278 	err = mlxsw_sp_kvdl_alloc_count_query(mlxsw_sp,
3279 					      MLXSW_SP_KVDL_ENTRY_TYPE_ADJ,
3280 					      *p_adj_grp_size, &alloc_size);
3281 	if (err)
3282 		return err;
3283 	/* It is possible the allocation results in more allocated
3284 	 * entries than requested. Try to use as much of them as
3285 	 * possible.
3286 	 */
3287 	mlxsw_sp_adj_grp_size_round_down(p_adj_grp_size, alloc_size);
3288 
3289 	return 0;
3290 }
3291 
3292 static void
3293 mlxsw_sp_nexthop_group_normalize(struct mlxsw_sp_nexthop_group *nh_grp)
3294 {
3295 	int i, g = 0, sum_norm_weight = 0;
3296 	struct mlxsw_sp_nexthop *nh;
3297 
3298 	for (i = 0; i < nh_grp->count; i++) {
3299 		nh = &nh_grp->nexthops[i];
3300 
3301 		if (!nh->should_offload)
3302 			continue;
3303 		if (g > 0)
3304 			g = gcd(nh->nh_weight, g);
3305 		else
3306 			g = nh->nh_weight;
3307 	}
3308 
3309 	for (i = 0; i < nh_grp->count; i++) {
3310 		nh = &nh_grp->nexthops[i];
3311 
3312 		if (!nh->should_offload)
3313 			continue;
3314 		nh->norm_nh_weight = nh->nh_weight / g;
3315 		sum_norm_weight += nh->norm_nh_weight;
3316 	}
3317 
3318 	nh_grp->sum_norm_weight = sum_norm_weight;
3319 }
3320 
3321 static void
3322 mlxsw_sp_nexthop_group_rebalance(struct mlxsw_sp_nexthop_group *nh_grp)
3323 {
3324 	int total = nh_grp->sum_norm_weight;
3325 	u16 ecmp_size = nh_grp->ecmp_size;
3326 	int i, weight = 0, lower_bound = 0;
3327 
3328 	for (i = 0; i < nh_grp->count; i++) {
3329 		struct mlxsw_sp_nexthop *nh = &nh_grp->nexthops[i];
3330 		int upper_bound;
3331 
3332 		if (!nh->should_offload)
3333 			continue;
3334 		weight += nh->norm_nh_weight;
3335 		upper_bound = DIV_ROUND_CLOSEST(ecmp_size * weight, total);
3336 		nh->num_adj_entries = upper_bound - lower_bound;
3337 		lower_bound = upper_bound;
3338 	}
3339 }
3340 
3341 static struct mlxsw_sp_nexthop *
3342 mlxsw_sp_rt6_nexthop(struct mlxsw_sp_nexthop_group *nh_grp,
3343 		     const struct mlxsw_sp_rt6 *mlxsw_sp_rt6);
3344 
3345 static void
3346 mlxsw_sp_nexthop4_group_offload_refresh(struct mlxsw_sp *mlxsw_sp,
3347 					struct mlxsw_sp_nexthop_group *nh_grp)
3348 {
3349 	int i;
3350 
3351 	for (i = 0; i < nh_grp->count; i++) {
3352 		struct mlxsw_sp_nexthop *nh = &nh_grp->nexthops[i];
3353 
3354 		if (nh->offloaded)
3355 			nh->key.fib_nh->fib_nh_flags |= RTNH_F_OFFLOAD;
3356 		else
3357 			nh->key.fib_nh->fib_nh_flags &= ~RTNH_F_OFFLOAD;
3358 	}
3359 }
3360 
3361 static void
3362 __mlxsw_sp_nexthop6_group_offload_refresh(struct mlxsw_sp_nexthop_group *nh_grp,
3363 					  struct mlxsw_sp_fib6_entry *fib6_entry)
3364 {
3365 	struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
3366 
3367 	list_for_each_entry(mlxsw_sp_rt6, &fib6_entry->rt6_list, list) {
3368 		struct fib6_nh *fib6_nh = mlxsw_sp_rt6->rt->fib6_nh;
3369 		struct mlxsw_sp_nexthop *nh;
3370 
3371 		nh = mlxsw_sp_rt6_nexthop(nh_grp, mlxsw_sp_rt6);
3372 		if (nh && nh->offloaded)
3373 			fib6_nh->fib_nh_flags |= RTNH_F_OFFLOAD;
3374 		else
3375 			fib6_nh->fib_nh_flags &= ~RTNH_F_OFFLOAD;
3376 	}
3377 }
3378 
3379 static void
3380 mlxsw_sp_nexthop6_group_offload_refresh(struct mlxsw_sp *mlxsw_sp,
3381 					struct mlxsw_sp_nexthop_group *nh_grp)
3382 {
3383 	struct mlxsw_sp_fib6_entry *fib6_entry;
3384 
3385 	/* Unfortunately, in IPv6 the route and the nexthop are described by
3386 	 * the same struct, so we need to iterate over all the routes using the
3387 	 * nexthop group and set / clear the offload indication for them.
3388 	 */
3389 	list_for_each_entry(fib6_entry, &nh_grp->fib_list,
3390 			    common.nexthop_group_node)
3391 		__mlxsw_sp_nexthop6_group_offload_refresh(nh_grp, fib6_entry);
3392 }
3393 
3394 static void
3395 mlxsw_sp_nexthop_group_offload_refresh(struct mlxsw_sp *mlxsw_sp,
3396 				       struct mlxsw_sp_nexthop_group *nh_grp)
3397 {
3398 	switch (mlxsw_sp_nexthop_group_type(nh_grp)) {
3399 	case AF_INET:
3400 		mlxsw_sp_nexthop4_group_offload_refresh(mlxsw_sp, nh_grp);
3401 		break;
3402 	case AF_INET6:
3403 		mlxsw_sp_nexthop6_group_offload_refresh(mlxsw_sp, nh_grp);
3404 		break;
3405 	}
3406 }
3407 
3408 static void
3409 mlxsw_sp_nexthop_group_refresh(struct mlxsw_sp *mlxsw_sp,
3410 			       struct mlxsw_sp_nexthop_group *nh_grp)
3411 {
3412 	u16 ecmp_size, old_ecmp_size;
3413 	struct mlxsw_sp_nexthop *nh;
3414 	bool offload_change = false;
3415 	u32 adj_index;
3416 	bool old_adj_index_valid;
3417 	u32 old_adj_index;
3418 	int i;
3419 	int err;
3420 
3421 	if (!nh_grp->gateway) {
3422 		mlxsw_sp_nexthop_fib_entries_update(mlxsw_sp, nh_grp);
3423 		return;
3424 	}
3425 
3426 	for (i = 0; i < nh_grp->count; i++) {
3427 		nh = &nh_grp->nexthops[i];
3428 
3429 		if (nh->should_offload != nh->offloaded) {
3430 			offload_change = true;
3431 			if (nh->should_offload)
3432 				nh->update = 1;
3433 		}
3434 	}
3435 	if (!offload_change) {
3436 		/* Nothing was added or removed, so no need to reallocate. Just
3437 		 * update MAC on existing adjacency indexes.
3438 		 */
3439 		err = mlxsw_sp_nexthop_group_update(mlxsw_sp, nh_grp, false);
3440 		if (err) {
3441 			dev_warn(mlxsw_sp->bus_info->dev, "Failed to update neigh MAC in adjacency table.\n");
3442 			goto set_trap;
3443 		}
3444 		return;
3445 	}
3446 	mlxsw_sp_nexthop_group_normalize(nh_grp);
3447 	if (!nh_grp->sum_norm_weight)
3448 		/* No neigh of this group is connected so we just set
3449 		 * the trap and let everthing flow through kernel.
3450 		 */
3451 		goto set_trap;
3452 
3453 	ecmp_size = nh_grp->sum_norm_weight;
3454 	err = mlxsw_sp_fix_adj_grp_size(mlxsw_sp, &ecmp_size);
3455 	if (err)
3456 		/* No valid allocation size available. */
3457 		goto set_trap;
3458 
3459 	err = mlxsw_sp_kvdl_alloc(mlxsw_sp, MLXSW_SP_KVDL_ENTRY_TYPE_ADJ,
3460 				  ecmp_size, &adj_index);
3461 	if (err) {
3462 		/* We ran out of KVD linear space, just set the
3463 		 * trap and let everything flow through kernel.
3464 		 */
3465 		dev_warn(mlxsw_sp->bus_info->dev, "Failed to allocate KVD linear area for nexthop group.\n");
3466 		goto set_trap;
3467 	}
3468 	old_adj_index_valid = nh_grp->adj_index_valid;
3469 	old_adj_index = nh_grp->adj_index;
3470 	old_ecmp_size = nh_grp->ecmp_size;
3471 	nh_grp->adj_index_valid = 1;
3472 	nh_grp->adj_index = adj_index;
3473 	nh_grp->ecmp_size = ecmp_size;
3474 	mlxsw_sp_nexthop_group_rebalance(nh_grp);
3475 	err = mlxsw_sp_nexthop_group_update(mlxsw_sp, nh_grp, true);
3476 	if (err) {
3477 		dev_warn(mlxsw_sp->bus_info->dev, "Failed to update neigh MAC in adjacency table.\n");
3478 		goto set_trap;
3479 	}
3480 
3481 	mlxsw_sp_nexthop_group_offload_refresh(mlxsw_sp, nh_grp);
3482 
3483 	if (!old_adj_index_valid) {
3484 		/* The trap was set for fib entries, so we have to call
3485 		 * fib entry update to unset it and use adjacency index.
3486 		 */
3487 		err = mlxsw_sp_nexthop_fib_entries_update(mlxsw_sp, nh_grp);
3488 		if (err) {
3489 			dev_warn(mlxsw_sp->bus_info->dev, "Failed to add adjacency index to fib entries.\n");
3490 			goto set_trap;
3491 		}
3492 		return;
3493 	}
3494 
3495 	err = mlxsw_sp_adj_index_mass_update(mlxsw_sp, nh_grp,
3496 					     old_adj_index, old_ecmp_size);
3497 	mlxsw_sp_kvdl_free(mlxsw_sp, MLXSW_SP_KVDL_ENTRY_TYPE_ADJ,
3498 			   old_ecmp_size, old_adj_index);
3499 	if (err) {
3500 		dev_warn(mlxsw_sp->bus_info->dev, "Failed to mass-update adjacency index for nexthop group.\n");
3501 		goto set_trap;
3502 	}
3503 
3504 	return;
3505 
3506 set_trap:
3507 	old_adj_index_valid = nh_grp->adj_index_valid;
3508 	nh_grp->adj_index_valid = 0;
3509 	for (i = 0; i < nh_grp->count; i++) {
3510 		nh = &nh_grp->nexthops[i];
3511 		nh->offloaded = 0;
3512 	}
3513 	err = mlxsw_sp_nexthop_fib_entries_update(mlxsw_sp, nh_grp);
3514 	if (err)
3515 		dev_warn(mlxsw_sp->bus_info->dev, "Failed to set traps for fib entries.\n");
3516 	mlxsw_sp_nexthop_group_offload_refresh(mlxsw_sp, nh_grp);
3517 	if (old_adj_index_valid)
3518 		mlxsw_sp_kvdl_free(mlxsw_sp, MLXSW_SP_KVDL_ENTRY_TYPE_ADJ,
3519 				   nh_grp->ecmp_size, nh_grp->adj_index);
3520 }
3521 
3522 static void __mlxsw_sp_nexthop_neigh_update(struct mlxsw_sp_nexthop *nh,
3523 					    bool removing)
3524 {
3525 	if (!removing)
3526 		nh->should_offload = 1;
3527 	else
3528 		nh->should_offload = 0;
3529 	nh->update = 1;
3530 }
3531 
3532 static int
3533 mlxsw_sp_nexthop_dead_neigh_replace(struct mlxsw_sp *mlxsw_sp,
3534 				    struct mlxsw_sp_neigh_entry *neigh_entry)
3535 {
3536 	struct neighbour *n, *old_n = neigh_entry->key.n;
3537 	struct mlxsw_sp_nexthop *nh;
3538 	bool entry_connected;
3539 	u8 nud_state, dead;
3540 	int err;
3541 
3542 	nh = list_first_entry(&neigh_entry->nexthop_list,
3543 			      struct mlxsw_sp_nexthop, neigh_list_node);
3544 
3545 	n = neigh_lookup(nh->nh_grp->neigh_tbl, &nh->gw_addr, nh->rif->dev);
3546 	if (!n) {
3547 		n = neigh_create(nh->nh_grp->neigh_tbl, &nh->gw_addr,
3548 				 nh->rif->dev);
3549 		if (IS_ERR(n))
3550 			return PTR_ERR(n);
3551 		neigh_event_send(n, NULL);
3552 	}
3553 
3554 	mlxsw_sp_neigh_entry_remove(mlxsw_sp, neigh_entry);
3555 	neigh_entry->key.n = n;
3556 	err = mlxsw_sp_neigh_entry_insert(mlxsw_sp, neigh_entry);
3557 	if (err)
3558 		goto err_neigh_entry_insert;
3559 
3560 	read_lock_bh(&n->lock);
3561 	nud_state = n->nud_state;
3562 	dead = n->dead;
3563 	read_unlock_bh(&n->lock);
3564 	entry_connected = nud_state & NUD_VALID && !dead;
3565 
3566 	list_for_each_entry(nh, &neigh_entry->nexthop_list,
3567 			    neigh_list_node) {
3568 		neigh_release(old_n);
3569 		neigh_clone(n);
3570 		__mlxsw_sp_nexthop_neigh_update(nh, !entry_connected);
3571 		mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh->nh_grp);
3572 	}
3573 
3574 	neigh_release(n);
3575 
3576 	return 0;
3577 
3578 err_neigh_entry_insert:
3579 	neigh_entry->key.n = old_n;
3580 	mlxsw_sp_neigh_entry_insert(mlxsw_sp, neigh_entry);
3581 	neigh_release(n);
3582 	return err;
3583 }
3584 
3585 static void
3586 mlxsw_sp_nexthop_neigh_update(struct mlxsw_sp *mlxsw_sp,
3587 			      struct mlxsw_sp_neigh_entry *neigh_entry,
3588 			      bool removing, bool dead)
3589 {
3590 	struct mlxsw_sp_nexthop *nh;
3591 
3592 	if (list_empty(&neigh_entry->nexthop_list))
3593 		return;
3594 
3595 	if (dead) {
3596 		int err;
3597 
3598 		err = mlxsw_sp_nexthop_dead_neigh_replace(mlxsw_sp,
3599 							  neigh_entry);
3600 		if (err)
3601 			dev_err(mlxsw_sp->bus_info->dev, "Failed to replace dead neigh\n");
3602 		return;
3603 	}
3604 
3605 	list_for_each_entry(nh, &neigh_entry->nexthop_list,
3606 			    neigh_list_node) {
3607 		__mlxsw_sp_nexthop_neigh_update(nh, removing);
3608 		mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh->nh_grp);
3609 	}
3610 }
3611 
3612 static void mlxsw_sp_nexthop_rif_init(struct mlxsw_sp_nexthop *nh,
3613 				      struct mlxsw_sp_rif *rif)
3614 {
3615 	if (nh->rif)
3616 		return;
3617 
3618 	nh->rif = rif;
3619 	list_add(&nh->rif_list_node, &rif->nexthop_list);
3620 }
3621 
3622 static void mlxsw_sp_nexthop_rif_fini(struct mlxsw_sp_nexthop *nh)
3623 {
3624 	if (!nh->rif)
3625 		return;
3626 
3627 	list_del(&nh->rif_list_node);
3628 	nh->rif = NULL;
3629 }
3630 
3631 static int mlxsw_sp_nexthop_neigh_init(struct mlxsw_sp *mlxsw_sp,
3632 				       struct mlxsw_sp_nexthop *nh)
3633 {
3634 	struct mlxsw_sp_neigh_entry *neigh_entry;
3635 	struct neighbour *n;
3636 	u8 nud_state, dead;
3637 	int err;
3638 
3639 	if (!nh->nh_grp->gateway || nh->neigh_entry)
3640 		return 0;
3641 
3642 	/* Take a reference of neigh here ensuring that neigh would
3643 	 * not be destructed before the nexthop entry is finished.
3644 	 * The reference is taken either in neigh_lookup() or
3645 	 * in neigh_create() in case n is not found.
3646 	 */
3647 	n = neigh_lookup(nh->nh_grp->neigh_tbl, &nh->gw_addr, nh->rif->dev);
3648 	if (!n) {
3649 		n = neigh_create(nh->nh_grp->neigh_tbl, &nh->gw_addr,
3650 				 nh->rif->dev);
3651 		if (IS_ERR(n))
3652 			return PTR_ERR(n);
3653 		neigh_event_send(n, NULL);
3654 	}
3655 	neigh_entry = mlxsw_sp_neigh_entry_lookup(mlxsw_sp, n);
3656 	if (!neigh_entry) {
3657 		neigh_entry = mlxsw_sp_neigh_entry_create(mlxsw_sp, n);
3658 		if (IS_ERR(neigh_entry)) {
3659 			err = -EINVAL;
3660 			goto err_neigh_entry_create;
3661 		}
3662 	}
3663 
3664 	/* If that is the first nexthop connected to that neigh, add to
3665 	 * nexthop_neighs_list
3666 	 */
3667 	if (list_empty(&neigh_entry->nexthop_list))
3668 		list_add_tail(&neigh_entry->nexthop_neighs_list_node,
3669 			      &mlxsw_sp->router->nexthop_neighs_list);
3670 
3671 	nh->neigh_entry = neigh_entry;
3672 	list_add_tail(&nh->neigh_list_node, &neigh_entry->nexthop_list);
3673 	read_lock_bh(&n->lock);
3674 	nud_state = n->nud_state;
3675 	dead = n->dead;
3676 	read_unlock_bh(&n->lock);
3677 	__mlxsw_sp_nexthop_neigh_update(nh, !(nud_state & NUD_VALID && !dead));
3678 
3679 	return 0;
3680 
3681 err_neigh_entry_create:
3682 	neigh_release(n);
3683 	return err;
3684 }
3685 
3686 static void mlxsw_sp_nexthop_neigh_fini(struct mlxsw_sp *mlxsw_sp,
3687 					struct mlxsw_sp_nexthop *nh)
3688 {
3689 	struct mlxsw_sp_neigh_entry *neigh_entry = nh->neigh_entry;
3690 	struct neighbour *n;
3691 
3692 	if (!neigh_entry)
3693 		return;
3694 	n = neigh_entry->key.n;
3695 
3696 	__mlxsw_sp_nexthop_neigh_update(nh, true);
3697 	list_del(&nh->neigh_list_node);
3698 	nh->neigh_entry = NULL;
3699 
3700 	/* If that is the last nexthop connected to that neigh, remove from
3701 	 * nexthop_neighs_list
3702 	 */
3703 	if (list_empty(&neigh_entry->nexthop_list))
3704 		list_del(&neigh_entry->nexthop_neighs_list_node);
3705 
3706 	if (!neigh_entry->connected && list_empty(&neigh_entry->nexthop_list))
3707 		mlxsw_sp_neigh_entry_destroy(mlxsw_sp, neigh_entry);
3708 
3709 	neigh_release(n);
3710 }
3711 
3712 static bool mlxsw_sp_ipip_netdev_ul_up(struct net_device *ol_dev)
3713 {
3714 	struct net_device *ul_dev = __mlxsw_sp_ipip_netdev_ul_dev_get(ol_dev);
3715 
3716 	return ul_dev ? (ul_dev->flags & IFF_UP) : true;
3717 }
3718 
3719 static void mlxsw_sp_nexthop_ipip_init(struct mlxsw_sp *mlxsw_sp,
3720 				       struct mlxsw_sp_nexthop *nh,
3721 				       struct mlxsw_sp_ipip_entry *ipip_entry)
3722 {
3723 	bool removing;
3724 
3725 	if (!nh->nh_grp->gateway || nh->ipip_entry)
3726 		return;
3727 
3728 	nh->ipip_entry = ipip_entry;
3729 	removing = !mlxsw_sp_ipip_netdev_ul_up(ipip_entry->ol_dev);
3730 	__mlxsw_sp_nexthop_neigh_update(nh, removing);
3731 	mlxsw_sp_nexthop_rif_init(nh, &ipip_entry->ol_lb->common);
3732 }
3733 
3734 static void mlxsw_sp_nexthop_ipip_fini(struct mlxsw_sp *mlxsw_sp,
3735 				       struct mlxsw_sp_nexthop *nh)
3736 {
3737 	struct mlxsw_sp_ipip_entry *ipip_entry = nh->ipip_entry;
3738 
3739 	if (!ipip_entry)
3740 		return;
3741 
3742 	__mlxsw_sp_nexthop_neigh_update(nh, true);
3743 	nh->ipip_entry = NULL;
3744 }
3745 
3746 static bool mlxsw_sp_nexthop4_ipip_type(const struct mlxsw_sp *mlxsw_sp,
3747 					const struct fib_nh *fib_nh,
3748 					enum mlxsw_sp_ipip_type *p_ipipt)
3749 {
3750 	struct net_device *dev = fib_nh->fib_nh_dev;
3751 
3752 	return dev &&
3753 	       fib_nh->nh_parent->fib_type == RTN_UNICAST &&
3754 	       mlxsw_sp_netdev_ipip_type(mlxsw_sp, dev, p_ipipt);
3755 }
3756 
3757 static void mlxsw_sp_nexthop_type_fini(struct mlxsw_sp *mlxsw_sp,
3758 				       struct mlxsw_sp_nexthop *nh)
3759 {
3760 	switch (nh->type) {
3761 	case MLXSW_SP_NEXTHOP_TYPE_ETH:
3762 		mlxsw_sp_nexthop_neigh_fini(mlxsw_sp, nh);
3763 		mlxsw_sp_nexthop_rif_fini(nh);
3764 		break;
3765 	case MLXSW_SP_NEXTHOP_TYPE_IPIP:
3766 		mlxsw_sp_nexthop_rif_fini(nh);
3767 		mlxsw_sp_nexthop_ipip_fini(mlxsw_sp, nh);
3768 		break;
3769 	}
3770 }
3771 
3772 static int mlxsw_sp_nexthop4_type_init(struct mlxsw_sp *mlxsw_sp,
3773 				       struct mlxsw_sp_nexthop *nh,
3774 				       struct fib_nh *fib_nh)
3775 {
3776 	const struct mlxsw_sp_ipip_ops *ipip_ops;
3777 	struct net_device *dev = fib_nh->fib_nh_dev;
3778 	struct mlxsw_sp_ipip_entry *ipip_entry;
3779 	struct mlxsw_sp_rif *rif;
3780 	int err;
3781 
3782 	ipip_entry = mlxsw_sp_ipip_entry_find_by_ol_dev(mlxsw_sp, dev);
3783 	if (ipip_entry) {
3784 		ipip_ops = mlxsw_sp->router->ipip_ops_arr[ipip_entry->ipipt];
3785 		if (ipip_ops->can_offload(mlxsw_sp, dev,
3786 					  MLXSW_SP_L3_PROTO_IPV4)) {
3787 			nh->type = MLXSW_SP_NEXTHOP_TYPE_IPIP;
3788 			mlxsw_sp_nexthop_ipip_init(mlxsw_sp, nh, ipip_entry);
3789 			return 0;
3790 		}
3791 	}
3792 
3793 	nh->type = MLXSW_SP_NEXTHOP_TYPE_ETH;
3794 	rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev);
3795 	if (!rif)
3796 		return 0;
3797 
3798 	mlxsw_sp_nexthop_rif_init(nh, rif);
3799 	err = mlxsw_sp_nexthop_neigh_init(mlxsw_sp, nh);
3800 	if (err)
3801 		goto err_neigh_init;
3802 
3803 	return 0;
3804 
3805 err_neigh_init:
3806 	mlxsw_sp_nexthop_rif_fini(nh);
3807 	return err;
3808 }
3809 
3810 static void mlxsw_sp_nexthop4_type_fini(struct mlxsw_sp *mlxsw_sp,
3811 					struct mlxsw_sp_nexthop *nh)
3812 {
3813 	mlxsw_sp_nexthop_type_fini(mlxsw_sp, nh);
3814 }
3815 
3816 static int mlxsw_sp_nexthop4_init(struct mlxsw_sp *mlxsw_sp,
3817 				  struct mlxsw_sp_nexthop_group *nh_grp,
3818 				  struct mlxsw_sp_nexthop *nh,
3819 				  struct fib_nh *fib_nh)
3820 {
3821 	struct net_device *dev = fib_nh->fib_nh_dev;
3822 	struct in_device *in_dev;
3823 	int err;
3824 
3825 	nh->nh_grp = nh_grp;
3826 	nh->key.fib_nh = fib_nh;
3827 #ifdef CONFIG_IP_ROUTE_MULTIPATH
3828 	nh->nh_weight = fib_nh->fib_nh_weight;
3829 #else
3830 	nh->nh_weight = 1;
3831 #endif
3832 	memcpy(&nh->gw_addr, &fib_nh->fib_nh_gw4, sizeof(fib_nh->fib_nh_gw4));
3833 	err = mlxsw_sp_nexthop_insert(mlxsw_sp, nh);
3834 	if (err)
3835 		return err;
3836 
3837 	mlxsw_sp_nexthop_counter_alloc(mlxsw_sp, nh);
3838 	list_add_tail(&nh->router_list_node, &mlxsw_sp->router->nexthop_list);
3839 
3840 	if (!dev)
3841 		return 0;
3842 
3843 	in_dev = __in_dev_get_rtnl(dev);
3844 	if (in_dev && IN_DEV_IGNORE_ROUTES_WITH_LINKDOWN(in_dev) &&
3845 	    fib_nh->fib_nh_flags & RTNH_F_LINKDOWN)
3846 		return 0;
3847 
3848 	err = mlxsw_sp_nexthop4_type_init(mlxsw_sp, nh, fib_nh);
3849 	if (err)
3850 		goto err_nexthop_neigh_init;
3851 
3852 	return 0;
3853 
3854 err_nexthop_neigh_init:
3855 	mlxsw_sp_nexthop_remove(mlxsw_sp, nh);
3856 	return err;
3857 }
3858 
3859 static void mlxsw_sp_nexthop4_fini(struct mlxsw_sp *mlxsw_sp,
3860 				   struct mlxsw_sp_nexthop *nh)
3861 {
3862 	mlxsw_sp_nexthop4_type_fini(mlxsw_sp, nh);
3863 	list_del(&nh->router_list_node);
3864 	mlxsw_sp_nexthop_counter_free(mlxsw_sp, nh);
3865 	mlxsw_sp_nexthop_remove(mlxsw_sp, nh);
3866 }
3867 
3868 static void mlxsw_sp_nexthop4_event(struct mlxsw_sp *mlxsw_sp,
3869 				    unsigned long event, struct fib_nh *fib_nh)
3870 {
3871 	struct mlxsw_sp_nexthop_key key;
3872 	struct mlxsw_sp_nexthop *nh;
3873 
3874 	if (mlxsw_sp->router->aborted)
3875 		return;
3876 
3877 	key.fib_nh = fib_nh;
3878 	nh = mlxsw_sp_nexthop_lookup(mlxsw_sp, key);
3879 	if (!nh)
3880 		return;
3881 
3882 	switch (event) {
3883 	case FIB_EVENT_NH_ADD:
3884 		mlxsw_sp_nexthop4_type_init(mlxsw_sp, nh, fib_nh);
3885 		break;
3886 	case FIB_EVENT_NH_DEL:
3887 		mlxsw_sp_nexthop4_type_fini(mlxsw_sp, nh);
3888 		break;
3889 	}
3890 
3891 	mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh->nh_grp);
3892 }
3893 
3894 static void mlxsw_sp_nexthop_rif_update(struct mlxsw_sp *mlxsw_sp,
3895 					struct mlxsw_sp_rif *rif)
3896 {
3897 	struct mlxsw_sp_nexthop *nh;
3898 	bool removing;
3899 
3900 	list_for_each_entry(nh, &rif->nexthop_list, rif_list_node) {
3901 		switch (nh->type) {
3902 		case MLXSW_SP_NEXTHOP_TYPE_ETH:
3903 			removing = false;
3904 			break;
3905 		case MLXSW_SP_NEXTHOP_TYPE_IPIP:
3906 			removing = !mlxsw_sp_ipip_netdev_ul_up(rif->dev);
3907 			break;
3908 		default:
3909 			WARN_ON(1);
3910 			continue;
3911 		}
3912 
3913 		__mlxsw_sp_nexthop_neigh_update(nh, removing);
3914 		mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh->nh_grp);
3915 	}
3916 }
3917 
3918 static void mlxsw_sp_nexthop_rif_migrate(struct mlxsw_sp *mlxsw_sp,
3919 					 struct mlxsw_sp_rif *old_rif,
3920 					 struct mlxsw_sp_rif *new_rif)
3921 {
3922 	struct mlxsw_sp_nexthop *nh;
3923 
3924 	list_splice_init(&old_rif->nexthop_list, &new_rif->nexthop_list);
3925 	list_for_each_entry(nh, &new_rif->nexthop_list, rif_list_node)
3926 		nh->rif = new_rif;
3927 	mlxsw_sp_nexthop_rif_update(mlxsw_sp, new_rif);
3928 }
3929 
3930 static void mlxsw_sp_nexthop_rif_gone_sync(struct mlxsw_sp *mlxsw_sp,
3931 					   struct mlxsw_sp_rif *rif)
3932 {
3933 	struct mlxsw_sp_nexthop *nh, *tmp;
3934 
3935 	list_for_each_entry_safe(nh, tmp, &rif->nexthop_list, rif_list_node) {
3936 		mlxsw_sp_nexthop_type_fini(mlxsw_sp, nh);
3937 		mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh->nh_grp);
3938 	}
3939 }
3940 
3941 static bool mlxsw_sp_fi_is_gateway(const struct mlxsw_sp *mlxsw_sp,
3942 				   struct fib_info *fi)
3943 {
3944 	const struct fib_nh *nh = fib_info_nh(fi, 0);
3945 
3946 	return nh->fib_nh_scope == RT_SCOPE_LINK ||
3947 	       mlxsw_sp_nexthop4_ipip_type(mlxsw_sp, nh, NULL);
3948 }
3949 
3950 static struct mlxsw_sp_nexthop_group *
3951 mlxsw_sp_nexthop4_group_create(struct mlxsw_sp *mlxsw_sp, struct fib_info *fi)
3952 {
3953 	unsigned int nhs = fib_info_num_path(fi);
3954 	struct mlxsw_sp_nexthop_group *nh_grp;
3955 	struct mlxsw_sp_nexthop *nh;
3956 	struct fib_nh *fib_nh;
3957 	int i;
3958 	int err;
3959 
3960 	nh_grp = kzalloc(struct_size(nh_grp, nexthops, nhs), GFP_KERNEL);
3961 	if (!nh_grp)
3962 		return ERR_PTR(-ENOMEM);
3963 	nh_grp->priv = fi;
3964 	INIT_LIST_HEAD(&nh_grp->fib_list);
3965 	nh_grp->neigh_tbl = &arp_tbl;
3966 
3967 	nh_grp->gateway = mlxsw_sp_fi_is_gateway(mlxsw_sp, fi);
3968 	nh_grp->count = nhs;
3969 	fib_info_hold(fi);
3970 	for (i = 0; i < nh_grp->count; i++) {
3971 		nh = &nh_grp->nexthops[i];
3972 		fib_nh = fib_info_nh(fi, i);
3973 		err = mlxsw_sp_nexthop4_init(mlxsw_sp, nh_grp, nh, fib_nh);
3974 		if (err)
3975 			goto err_nexthop4_init;
3976 	}
3977 	err = mlxsw_sp_nexthop_group_insert(mlxsw_sp, nh_grp);
3978 	if (err)
3979 		goto err_nexthop_group_insert;
3980 	mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh_grp);
3981 	return nh_grp;
3982 
3983 err_nexthop_group_insert:
3984 err_nexthop4_init:
3985 	for (i--; i >= 0; i--) {
3986 		nh = &nh_grp->nexthops[i];
3987 		mlxsw_sp_nexthop4_fini(mlxsw_sp, nh);
3988 	}
3989 	fib_info_put(fi);
3990 	kfree(nh_grp);
3991 	return ERR_PTR(err);
3992 }
3993 
3994 static void
3995 mlxsw_sp_nexthop4_group_destroy(struct mlxsw_sp *mlxsw_sp,
3996 				struct mlxsw_sp_nexthop_group *nh_grp)
3997 {
3998 	struct mlxsw_sp_nexthop *nh;
3999 	int i;
4000 
4001 	mlxsw_sp_nexthop_group_remove(mlxsw_sp, nh_grp);
4002 	for (i = 0; i < nh_grp->count; i++) {
4003 		nh = &nh_grp->nexthops[i];
4004 		mlxsw_sp_nexthop4_fini(mlxsw_sp, nh);
4005 	}
4006 	mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh_grp);
4007 	WARN_ON_ONCE(nh_grp->adj_index_valid);
4008 	fib_info_put(mlxsw_sp_nexthop4_group_fi(nh_grp));
4009 	kfree(nh_grp);
4010 }
4011 
4012 static int mlxsw_sp_nexthop4_group_get(struct mlxsw_sp *mlxsw_sp,
4013 				       struct mlxsw_sp_fib_entry *fib_entry,
4014 				       struct fib_info *fi)
4015 {
4016 	struct mlxsw_sp_nexthop_group *nh_grp;
4017 
4018 	nh_grp = mlxsw_sp_nexthop4_group_lookup(mlxsw_sp, fi);
4019 	if (!nh_grp) {
4020 		nh_grp = mlxsw_sp_nexthop4_group_create(mlxsw_sp, fi);
4021 		if (IS_ERR(nh_grp))
4022 			return PTR_ERR(nh_grp);
4023 	}
4024 	list_add_tail(&fib_entry->nexthop_group_node, &nh_grp->fib_list);
4025 	fib_entry->nh_group = nh_grp;
4026 	return 0;
4027 }
4028 
4029 static void mlxsw_sp_nexthop4_group_put(struct mlxsw_sp *mlxsw_sp,
4030 					struct mlxsw_sp_fib_entry *fib_entry)
4031 {
4032 	struct mlxsw_sp_nexthop_group *nh_grp = fib_entry->nh_group;
4033 
4034 	list_del(&fib_entry->nexthop_group_node);
4035 	if (!list_empty(&nh_grp->fib_list))
4036 		return;
4037 	mlxsw_sp_nexthop4_group_destroy(mlxsw_sp, nh_grp);
4038 }
4039 
4040 static bool
4041 mlxsw_sp_fib4_entry_should_offload(const struct mlxsw_sp_fib_entry *fib_entry)
4042 {
4043 	struct mlxsw_sp_fib4_entry *fib4_entry;
4044 
4045 	fib4_entry = container_of(fib_entry, struct mlxsw_sp_fib4_entry,
4046 				  common);
4047 	return !fib4_entry->tos;
4048 }
4049 
4050 static bool
4051 mlxsw_sp_fib_entry_should_offload(const struct mlxsw_sp_fib_entry *fib_entry)
4052 {
4053 	struct mlxsw_sp_nexthop_group *nh_group = fib_entry->nh_group;
4054 
4055 	switch (fib_entry->fib_node->fib->proto) {
4056 	case MLXSW_SP_L3_PROTO_IPV4:
4057 		if (!mlxsw_sp_fib4_entry_should_offload(fib_entry))
4058 			return false;
4059 		break;
4060 	case MLXSW_SP_L3_PROTO_IPV6:
4061 		break;
4062 	}
4063 
4064 	switch (fib_entry->type) {
4065 	case MLXSW_SP_FIB_ENTRY_TYPE_REMOTE:
4066 		return !!nh_group->adj_index_valid;
4067 	case MLXSW_SP_FIB_ENTRY_TYPE_LOCAL:
4068 		return !!nh_group->nh_rif;
4069 	case MLXSW_SP_FIB_ENTRY_TYPE_BLACKHOLE:
4070 	case MLXSW_SP_FIB_ENTRY_TYPE_IPIP_DECAP:
4071 	case MLXSW_SP_FIB_ENTRY_TYPE_NVE_DECAP:
4072 		return true;
4073 	default:
4074 		return false;
4075 	}
4076 }
4077 
4078 static struct mlxsw_sp_nexthop *
4079 mlxsw_sp_rt6_nexthop(struct mlxsw_sp_nexthop_group *nh_grp,
4080 		     const struct mlxsw_sp_rt6 *mlxsw_sp_rt6)
4081 {
4082 	int i;
4083 
4084 	for (i = 0; i < nh_grp->count; i++) {
4085 		struct mlxsw_sp_nexthop *nh = &nh_grp->nexthops[i];
4086 		struct fib6_info *rt = mlxsw_sp_rt6->rt;
4087 
4088 		if (nh->rif && nh->rif->dev == rt->fib6_nh->fib_nh_dev &&
4089 		    ipv6_addr_equal((const struct in6_addr *) &nh->gw_addr,
4090 				    &rt->fib6_nh->fib_nh_gw6))
4091 			return nh;
4092 		continue;
4093 	}
4094 
4095 	return NULL;
4096 }
4097 
4098 static void
4099 mlxsw_sp_fib4_entry_hw_flags_set(struct mlxsw_sp *mlxsw_sp,
4100 				 struct mlxsw_sp_fib_entry *fib_entry)
4101 {
4102 	struct fib_info *fi = mlxsw_sp_nexthop4_group_fi(fib_entry->nh_group);
4103 	u32 *p_dst = (u32 *) fib_entry->fib_node->key.addr;
4104 	int dst_len = fib_entry->fib_node->key.prefix_len;
4105 	struct mlxsw_sp_fib4_entry *fib4_entry;
4106 	struct fib_rt_info fri;
4107 	bool should_offload;
4108 
4109 	should_offload = mlxsw_sp_fib_entry_should_offload(fib_entry);
4110 	fib4_entry = container_of(fib_entry, struct mlxsw_sp_fib4_entry,
4111 				  common);
4112 	fri.fi = fi;
4113 	fri.tb_id = fib4_entry->tb_id;
4114 	fri.dst = cpu_to_be32(*p_dst);
4115 	fri.dst_len = dst_len;
4116 	fri.tos = fib4_entry->tos;
4117 	fri.type = fib4_entry->type;
4118 	fri.offload = should_offload;
4119 	fri.trap = !should_offload;
4120 	fib_alias_hw_flags_set(mlxsw_sp_net(mlxsw_sp), &fri);
4121 }
4122 
4123 static void
4124 mlxsw_sp_fib4_entry_hw_flags_clear(struct mlxsw_sp *mlxsw_sp,
4125 				   struct mlxsw_sp_fib_entry *fib_entry)
4126 {
4127 	struct fib_info *fi = mlxsw_sp_nexthop4_group_fi(fib_entry->nh_group);
4128 	u32 *p_dst = (u32 *) fib_entry->fib_node->key.addr;
4129 	int dst_len = fib_entry->fib_node->key.prefix_len;
4130 	struct mlxsw_sp_fib4_entry *fib4_entry;
4131 	struct fib_rt_info fri;
4132 
4133 	fib4_entry = container_of(fib_entry, struct mlxsw_sp_fib4_entry,
4134 				  common);
4135 	fri.fi = fi;
4136 	fri.tb_id = fib4_entry->tb_id;
4137 	fri.dst = cpu_to_be32(*p_dst);
4138 	fri.dst_len = dst_len;
4139 	fri.tos = fib4_entry->tos;
4140 	fri.type = fib4_entry->type;
4141 	fri.offload = false;
4142 	fri.trap = false;
4143 	fib_alias_hw_flags_set(mlxsw_sp_net(mlxsw_sp), &fri);
4144 }
4145 
4146 static void
4147 mlxsw_sp_fib6_entry_hw_flags_set(struct mlxsw_sp *mlxsw_sp,
4148 				 struct mlxsw_sp_fib_entry *fib_entry)
4149 {
4150 	struct mlxsw_sp_fib6_entry *fib6_entry;
4151 	struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
4152 	bool should_offload;
4153 
4154 	should_offload = mlxsw_sp_fib_entry_should_offload(fib_entry);
4155 
4156 	/* In IPv6 a multipath route is represented using multiple routes, so
4157 	 * we need to set the flags on all of them.
4158 	 */
4159 	fib6_entry = container_of(fib_entry, struct mlxsw_sp_fib6_entry,
4160 				  common);
4161 	list_for_each_entry(mlxsw_sp_rt6, &fib6_entry->rt6_list, list)
4162 		fib6_info_hw_flags_set(mlxsw_sp_rt6->rt, should_offload,
4163 				       !should_offload);
4164 }
4165 
4166 static void
4167 mlxsw_sp_fib6_entry_hw_flags_clear(struct mlxsw_sp *mlxsw_sp,
4168 				   struct mlxsw_sp_fib_entry *fib_entry)
4169 {
4170 	struct mlxsw_sp_fib6_entry *fib6_entry;
4171 	struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
4172 
4173 	fib6_entry = container_of(fib_entry, struct mlxsw_sp_fib6_entry,
4174 				  common);
4175 	list_for_each_entry(mlxsw_sp_rt6, &fib6_entry->rt6_list, list)
4176 		fib6_info_hw_flags_set(mlxsw_sp_rt6->rt, false, false);
4177 }
4178 
4179 static void
4180 mlxsw_sp_fib_entry_hw_flags_set(struct mlxsw_sp *mlxsw_sp,
4181 				struct mlxsw_sp_fib_entry *fib_entry)
4182 {
4183 	switch (fib_entry->fib_node->fib->proto) {
4184 	case MLXSW_SP_L3_PROTO_IPV4:
4185 		mlxsw_sp_fib4_entry_hw_flags_set(mlxsw_sp, fib_entry);
4186 		break;
4187 	case MLXSW_SP_L3_PROTO_IPV6:
4188 		mlxsw_sp_fib6_entry_hw_flags_set(mlxsw_sp, fib_entry);
4189 		break;
4190 	}
4191 }
4192 
4193 static void
4194 mlxsw_sp_fib_entry_hw_flags_clear(struct mlxsw_sp *mlxsw_sp,
4195 				  struct mlxsw_sp_fib_entry *fib_entry)
4196 {
4197 	switch (fib_entry->fib_node->fib->proto) {
4198 	case MLXSW_SP_L3_PROTO_IPV4:
4199 		mlxsw_sp_fib4_entry_hw_flags_clear(mlxsw_sp, fib_entry);
4200 		break;
4201 	case MLXSW_SP_L3_PROTO_IPV6:
4202 		mlxsw_sp_fib6_entry_hw_flags_clear(mlxsw_sp, fib_entry);
4203 		break;
4204 	}
4205 }
4206 
4207 static void
4208 mlxsw_sp_fib_entry_hw_flags_refresh(struct mlxsw_sp *mlxsw_sp,
4209 				    struct mlxsw_sp_fib_entry *fib_entry,
4210 				    enum mlxsw_reg_ralue_op op)
4211 {
4212 	switch (op) {
4213 	case MLXSW_REG_RALUE_OP_WRITE_WRITE:
4214 		mlxsw_sp_fib_entry_hw_flags_set(mlxsw_sp, fib_entry);
4215 		break;
4216 	case MLXSW_REG_RALUE_OP_WRITE_DELETE:
4217 		mlxsw_sp_fib_entry_hw_flags_clear(mlxsw_sp, fib_entry);
4218 		break;
4219 	default:
4220 		break;
4221 	}
4222 }
4223 
4224 static void
4225 mlxsw_sp_fib_entry_ralue_pack(char *ralue_pl,
4226 			      const struct mlxsw_sp_fib_entry *fib_entry,
4227 			      enum mlxsw_reg_ralue_op op)
4228 {
4229 	struct mlxsw_sp_fib *fib = fib_entry->fib_node->fib;
4230 	enum mlxsw_reg_ralxx_protocol proto;
4231 	u32 *p_dip;
4232 
4233 	proto = (enum mlxsw_reg_ralxx_protocol) fib->proto;
4234 
4235 	switch (fib->proto) {
4236 	case MLXSW_SP_L3_PROTO_IPV4:
4237 		p_dip = (u32 *) fib_entry->fib_node->key.addr;
4238 		mlxsw_reg_ralue_pack4(ralue_pl, proto, op, fib->vr->id,
4239 				      fib_entry->fib_node->key.prefix_len,
4240 				      *p_dip);
4241 		break;
4242 	case MLXSW_SP_L3_PROTO_IPV6:
4243 		mlxsw_reg_ralue_pack6(ralue_pl, proto, op, fib->vr->id,
4244 				      fib_entry->fib_node->key.prefix_len,
4245 				      fib_entry->fib_node->key.addr);
4246 		break;
4247 	}
4248 }
4249 
4250 static int mlxsw_sp_adj_discard_write(struct mlxsw_sp *mlxsw_sp, u16 rif_index)
4251 {
4252 	enum mlxsw_reg_ratr_trap_action trap_action;
4253 	char ratr_pl[MLXSW_REG_RATR_LEN];
4254 	int err;
4255 
4256 	if (mlxsw_sp->router->adj_discard_index_valid)
4257 		return 0;
4258 
4259 	err = mlxsw_sp_kvdl_alloc(mlxsw_sp, MLXSW_SP_KVDL_ENTRY_TYPE_ADJ, 1,
4260 				  &mlxsw_sp->router->adj_discard_index);
4261 	if (err)
4262 		return err;
4263 
4264 	trap_action = MLXSW_REG_RATR_TRAP_ACTION_DISCARD_ERRORS;
4265 	mlxsw_reg_ratr_pack(ratr_pl, MLXSW_REG_RATR_OP_WRITE_WRITE_ENTRY, true,
4266 			    MLXSW_REG_RATR_TYPE_ETHERNET,
4267 			    mlxsw_sp->router->adj_discard_index, rif_index);
4268 	mlxsw_reg_ratr_trap_action_set(ratr_pl, trap_action);
4269 	err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ratr), ratr_pl);
4270 	if (err)
4271 		goto err_ratr_write;
4272 
4273 	mlxsw_sp->router->adj_discard_index_valid = true;
4274 
4275 	return 0;
4276 
4277 err_ratr_write:
4278 	mlxsw_sp_kvdl_free(mlxsw_sp, MLXSW_SP_KVDL_ENTRY_TYPE_ADJ, 1,
4279 			   mlxsw_sp->router->adj_discard_index);
4280 	return err;
4281 }
4282 
4283 static int mlxsw_sp_fib_entry_op_remote(struct mlxsw_sp *mlxsw_sp,
4284 					struct mlxsw_sp_fib_entry *fib_entry,
4285 					enum mlxsw_reg_ralue_op op)
4286 {
4287 	struct mlxsw_sp_nexthop_group *nh_group = fib_entry->nh_group;
4288 	char ralue_pl[MLXSW_REG_RALUE_LEN];
4289 	enum mlxsw_reg_ralue_trap_action trap_action;
4290 	u16 trap_id = 0;
4291 	u32 adjacency_index = 0;
4292 	u16 ecmp_size = 0;
4293 	int err;
4294 
4295 	/* In case the nexthop group adjacency index is valid, use it
4296 	 * with provided ECMP size. Otherwise, setup trap and pass
4297 	 * traffic to kernel.
4298 	 */
4299 	if (mlxsw_sp_fib_entry_should_offload(fib_entry)) {
4300 		trap_action = MLXSW_REG_RALUE_TRAP_ACTION_NOP;
4301 		adjacency_index = fib_entry->nh_group->adj_index;
4302 		ecmp_size = fib_entry->nh_group->ecmp_size;
4303 	} else if (!nh_group->adj_index_valid && nh_group->count &&
4304 		   nh_group->nh_rif) {
4305 		err = mlxsw_sp_adj_discard_write(mlxsw_sp,
4306 						 nh_group->nh_rif->rif_index);
4307 		if (err)
4308 			return err;
4309 		trap_action = MLXSW_REG_RALUE_TRAP_ACTION_NOP;
4310 		adjacency_index = mlxsw_sp->router->adj_discard_index;
4311 		ecmp_size = 1;
4312 	} else {
4313 		trap_action = MLXSW_REG_RALUE_TRAP_ACTION_TRAP;
4314 		trap_id = MLXSW_TRAP_ID_RTR_INGRESS0;
4315 	}
4316 
4317 	mlxsw_sp_fib_entry_ralue_pack(ralue_pl, fib_entry, op);
4318 	mlxsw_reg_ralue_act_remote_pack(ralue_pl, trap_action, trap_id,
4319 					adjacency_index, ecmp_size);
4320 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralue), ralue_pl);
4321 }
4322 
4323 static int mlxsw_sp_fib_entry_op_local(struct mlxsw_sp *mlxsw_sp,
4324 				       struct mlxsw_sp_fib_entry *fib_entry,
4325 				       enum mlxsw_reg_ralue_op op)
4326 {
4327 	struct mlxsw_sp_rif *rif = fib_entry->nh_group->nh_rif;
4328 	enum mlxsw_reg_ralue_trap_action trap_action;
4329 	char ralue_pl[MLXSW_REG_RALUE_LEN];
4330 	u16 trap_id = 0;
4331 	u16 rif_index = 0;
4332 
4333 	if (mlxsw_sp_fib_entry_should_offload(fib_entry)) {
4334 		trap_action = MLXSW_REG_RALUE_TRAP_ACTION_NOP;
4335 		rif_index = rif->rif_index;
4336 	} else {
4337 		trap_action = MLXSW_REG_RALUE_TRAP_ACTION_TRAP;
4338 		trap_id = MLXSW_TRAP_ID_RTR_INGRESS0;
4339 	}
4340 
4341 	mlxsw_sp_fib_entry_ralue_pack(ralue_pl, fib_entry, op);
4342 	mlxsw_reg_ralue_act_local_pack(ralue_pl, trap_action, trap_id,
4343 				       rif_index);
4344 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralue), ralue_pl);
4345 }
4346 
4347 static int mlxsw_sp_fib_entry_op_trap(struct mlxsw_sp *mlxsw_sp,
4348 				      struct mlxsw_sp_fib_entry *fib_entry,
4349 				      enum mlxsw_reg_ralue_op op)
4350 {
4351 	char ralue_pl[MLXSW_REG_RALUE_LEN];
4352 
4353 	mlxsw_sp_fib_entry_ralue_pack(ralue_pl, fib_entry, op);
4354 	mlxsw_reg_ralue_act_ip2me_pack(ralue_pl);
4355 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralue), ralue_pl);
4356 }
4357 
4358 static int mlxsw_sp_fib_entry_op_blackhole(struct mlxsw_sp *mlxsw_sp,
4359 					   struct mlxsw_sp_fib_entry *fib_entry,
4360 					   enum mlxsw_reg_ralue_op op)
4361 {
4362 	enum mlxsw_reg_ralue_trap_action trap_action;
4363 	char ralue_pl[MLXSW_REG_RALUE_LEN];
4364 
4365 	trap_action = MLXSW_REG_RALUE_TRAP_ACTION_DISCARD_ERROR;
4366 	mlxsw_sp_fib_entry_ralue_pack(ralue_pl, fib_entry, op);
4367 	mlxsw_reg_ralue_act_local_pack(ralue_pl, trap_action, 0, 0);
4368 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralue), ralue_pl);
4369 }
4370 
4371 static int
4372 mlxsw_sp_fib_entry_op_unreachable(struct mlxsw_sp *mlxsw_sp,
4373 				  struct mlxsw_sp_fib_entry *fib_entry,
4374 				  enum mlxsw_reg_ralue_op op)
4375 {
4376 	enum mlxsw_reg_ralue_trap_action trap_action;
4377 	char ralue_pl[MLXSW_REG_RALUE_LEN];
4378 	u16 trap_id;
4379 
4380 	trap_action = MLXSW_REG_RALUE_TRAP_ACTION_TRAP;
4381 	trap_id = MLXSW_TRAP_ID_RTR_INGRESS1;
4382 
4383 	mlxsw_sp_fib_entry_ralue_pack(ralue_pl, fib_entry, op);
4384 	mlxsw_reg_ralue_act_local_pack(ralue_pl, trap_action, trap_id, 0);
4385 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralue), ralue_pl);
4386 }
4387 
4388 static int
4389 mlxsw_sp_fib_entry_op_ipip_decap(struct mlxsw_sp *mlxsw_sp,
4390 				 struct mlxsw_sp_fib_entry *fib_entry,
4391 				 enum mlxsw_reg_ralue_op op)
4392 {
4393 	struct mlxsw_sp_ipip_entry *ipip_entry = fib_entry->decap.ipip_entry;
4394 	const struct mlxsw_sp_ipip_ops *ipip_ops;
4395 
4396 	if (WARN_ON(!ipip_entry))
4397 		return -EINVAL;
4398 
4399 	ipip_ops = mlxsw_sp->router->ipip_ops_arr[ipip_entry->ipipt];
4400 	return ipip_ops->fib_entry_op(mlxsw_sp, ipip_entry, op,
4401 				      fib_entry->decap.tunnel_index);
4402 }
4403 
4404 static int mlxsw_sp_fib_entry_op_nve_decap(struct mlxsw_sp *mlxsw_sp,
4405 					   struct mlxsw_sp_fib_entry *fib_entry,
4406 					   enum mlxsw_reg_ralue_op op)
4407 {
4408 	char ralue_pl[MLXSW_REG_RALUE_LEN];
4409 
4410 	mlxsw_sp_fib_entry_ralue_pack(ralue_pl, fib_entry, op);
4411 	mlxsw_reg_ralue_act_ip2me_tun_pack(ralue_pl,
4412 					   fib_entry->decap.tunnel_index);
4413 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralue), ralue_pl);
4414 }
4415 
4416 static int __mlxsw_sp_fib_entry_op(struct mlxsw_sp *mlxsw_sp,
4417 				   struct mlxsw_sp_fib_entry *fib_entry,
4418 				   enum mlxsw_reg_ralue_op op)
4419 {
4420 	switch (fib_entry->type) {
4421 	case MLXSW_SP_FIB_ENTRY_TYPE_REMOTE:
4422 		return mlxsw_sp_fib_entry_op_remote(mlxsw_sp, fib_entry, op);
4423 	case MLXSW_SP_FIB_ENTRY_TYPE_LOCAL:
4424 		return mlxsw_sp_fib_entry_op_local(mlxsw_sp, fib_entry, op);
4425 	case MLXSW_SP_FIB_ENTRY_TYPE_TRAP:
4426 		return mlxsw_sp_fib_entry_op_trap(mlxsw_sp, fib_entry, op);
4427 	case MLXSW_SP_FIB_ENTRY_TYPE_BLACKHOLE:
4428 		return mlxsw_sp_fib_entry_op_blackhole(mlxsw_sp, fib_entry, op);
4429 	case MLXSW_SP_FIB_ENTRY_TYPE_UNREACHABLE:
4430 		return mlxsw_sp_fib_entry_op_unreachable(mlxsw_sp, fib_entry,
4431 							 op);
4432 	case MLXSW_SP_FIB_ENTRY_TYPE_IPIP_DECAP:
4433 		return mlxsw_sp_fib_entry_op_ipip_decap(mlxsw_sp,
4434 							fib_entry, op);
4435 	case MLXSW_SP_FIB_ENTRY_TYPE_NVE_DECAP:
4436 		return mlxsw_sp_fib_entry_op_nve_decap(mlxsw_sp, fib_entry, op);
4437 	}
4438 	return -EINVAL;
4439 }
4440 
4441 static int mlxsw_sp_fib_entry_op(struct mlxsw_sp *mlxsw_sp,
4442 				 struct mlxsw_sp_fib_entry *fib_entry,
4443 				 enum mlxsw_reg_ralue_op op)
4444 {
4445 	int err = __mlxsw_sp_fib_entry_op(mlxsw_sp, fib_entry, op);
4446 
4447 	if (err)
4448 		return err;
4449 
4450 	mlxsw_sp_fib_entry_hw_flags_refresh(mlxsw_sp, fib_entry, op);
4451 
4452 	return err;
4453 }
4454 
4455 static int mlxsw_sp_fib_entry_update(struct mlxsw_sp *mlxsw_sp,
4456 				     struct mlxsw_sp_fib_entry *fib_entry)
4457 {
4458 	return mlxsw_sp_fib_entry_op(mlxsw_sp, fib_entry,
4459 				     MLXSW_REG_RALUE_OP_WRITE_WRITE);
4460 }
4461 
4462 static int mlxsw_sp_fib_entry_del(struct mlxsw_sp *mlxsw_sp,
4463 				  struct mlxsw_sp_fib_entry *fib_entry)
4464 {
4465 	return mlxsw_sp_fib_entry_op(mlxsw_sp, fib_entry,
4466 				     MLXSW_REG_RALUE_OP_WRITE_DELETE);
4467 }
4468 
4469 static int
4470 mlxsw_sp_fib4_entry_type_set(struct mlxsw_sp *mlxsw_sp,
4471 			     const struct fib_entry_notifier_info *fen_info,
4472 			     struct mlxsw_sp_fib_entry *fib_entry)
4473 {
4474 	struct net_device *dev = fib_info_nh(fen_info->fi, 0)->fib_nh_dev;
4475 	union mlxsw_sp_l3addr dip = { .addr4 = htonl(fen_info->dst) };
4476 	u32 tb_id = mlxsw_sp_fix_tb_id(fen_info->tb_id);
4477 	struct mlxsw_sp_ipip_entry *ipip_entry;
4478 	struct fib_info *fi = fen_info->fi;
4479 
4480 	switch (fen_info->type) {
4481 	case RTN_LOCAL:
4482 		ipip_entry = mlxsw_sp_ipip_entry_find_by_decap(mlxsw_sp, dev,
4483 						 MLXSW_SP_L3_PROTO_IPV4, dip);
4484 		if (ipip_entry && ipip_entry->ol_dev->flags & IFF_UP) {
4485 			fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_IPIP_DECAP;
4486 			return mlxsw_sp_fib_entry_decap_init(mlxsw_sp,
4487 							     fib_entry,
4488 							     ipip_entry);
4489 		}
4490 		if (mlxsw_sp_nve_ipv4_route_is_decap(mlxsw_sp, tb_id,
4491 						     dip.addr4)) {
4492 			u32 t_index;
4493 
4494 			t_index = mlxsw_sp_nve_decap_tunnel_index_get(mlxsw_sp);
4495 			fib_entry->decap.tunnel_index = t_index;
4496 			fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_NVE_DECAP;
4497 			return 0;
4498 		}
4499 		/* fall through */
4500 	case RTN_BROADCAST:
4501 		fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_TRAP;
4502 		return 0;
4503 	case RTN_BLACKHOLE:
4504 		fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_BLACKHOLE;
4505 		return 0;
4506 	case RTN_UNREACHABLE: /* fall through */
4507 	case RTN_PROHIBIT:
4508 		/* Packets hitting these routes need to be trapped, but
4509 		 * can do so with a lower priority than packets directed
4510 		 * at the host, so use action type local instead of trap.
4511 		 */
4512 		fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_UNREACHABLE;
4513 		return 0;
4514 	case RTN_UNICAST:
4515 		if (mlxsw_sp_fi_is_gateway(mlxsw_sp, fi))
4516 			fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_REMOTE;
4517 		else
4518 			fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_LOCAL;
4519 		return 0;
4520 	default:
4521 		return -EINVAL;
4522 	}
4523 }
4524 
4525 static void
4526 mlxsw_sp_fib4_entry_type_unset(struct mlxsw_sp *mlxsw_sp,
4527 			       struct mlxsw_sp_fib_entry *fib_entry)
4528 {
4529 	switch (fib_entry->type) {
4530 	case MLXSW_SP_FIB_ENTRY_TYPE_IPIP_DECAP:
4531 		mlxsw_sp_fib_entry_decap_fini(mlxsw_sp, fib_entry);
4532 		break;
4533 	default:
4534 		break;
4535 	}
4536 }
4537 
4538 static struct mlxsw_sp_fib4_entry *
4539 mlxsw_sp_fib4_entry_create(struct mlxsw_sp *mlxsw_sp,
4540 			   struct mlxsw_sp_fib_node *fib_node,
4541 			   const struct fib_entry_notifier_info *fen_info)
4542 {
4543 	struct mlxsw_sp_fib4_entry *fib4_entry;
4544 	struct mlxsw_sp_fib_entry *fib_entry;
4545 	int err;
4546 
4547 	fib4_entry = kzalloc(sizeof(*fib4_entry), GFP_KERNEL);
4548 	if (!fib4_entry)
4549 		return ERR_PTR(-ENOMEM);
4550 	fib_entry = &fib4_entry->common;
4551 
4552 	err = mlxsw_sp_fib4_entry_type_set(mlxsw_sp, fen_info, fib_entry);
4553 	if (err)
4554 		goto err_fib4_entry_type_set;
4555 
4556 	err = mlxsw_sp_nexthop4_group_get(mlxsw_sp, fib_entry, fen_info->fi);
4557 	if (err)
4558 		goto err_nexthop4_group_get;
4559 
4560 	fib4_entry->prio = fen_info->fi->fib_priority;
4561 	fib4_entry->tb_id = fen_info->tb_id;
4562 	fib4_entry->type = fen_info->type;
4563 	fib4_entry->tos = fen_info->tos;
4564 
4565 	fib_entry->fib_node = fib_node;
4566 
4567 	return fib4_entry;
4568 
4569 err_nexthop4_group_get:
4570 	mlxsw_sp_fib4_entry_type_unset(mlxsw_sp, fib_entry);
4571 err_fib4_entry_type_set:
4572 	kfree(fib4_entry);
4573 	return ERR_PTR(err);
4574 }
4575 
4576 static void mlxsw_sp_fib4_entry_destroy(struct mlxsw_sp *mlxsw_sp,
4577 					struct mlxsw_sp_fib4_entry *fib4_entry)
4578 {
4579 	mlxsw_sp_nexthop4_group_put(mlxsw_sp, &fib4_entry->common);
4580 	mlxsw_sp_fib4_entry_type_unset(mlxsw_sp, &fib4_entry->common);
4581 	kfree(fib4_entry);
4582 }
4583 
4584 static struct mlxsw_sp_fib4_entry *
4585 mlxsw_sp_fib4_entry_lookup(struct mlxsw_sp *mlxsw_sp,
4586 			   const struct fib_entry_notifier_info *fen_info)
4587 {
4588 	struct mlxsw_sp_fib4_entry *fib4_entry;
4589 	struct mlxsw_sp_fib_node *fib_node;
4590 	struct mlxsw_sp_fib *fib;
4591 	struct mlxsw_sp_vr *vr;
4592 
4593 	vr = mlxsw_sp_vr_find(mlxsw_sp, fen_info->tb_id);
4594 	if (!vr)
4595 		return NULL;
4596 	fib = mlxsw_sp_vr_fib(vr, MLXSW_SP_L3_PROTO_IPV4);
4597 
4598 	fib_node = mlxsw_sp_fib_node_lookup(fib, &fen_info->dst,
4599 					    sizeof(fen_info->dst),
4600 					    fen_info->dst_len);
4601 	if (!fib_node)
4602 		return NULL;
4603 
4604 	fib4_entry = container_of(fib_node->fib_entry,
4605 				  struct mlxsw_sp_fib4_entry, common);
4606 	if (fib4_entry->tb_id == fen_info->tb_id &&
4607 	    fib4_entry->tos == fen_info->tos &&
4608 	    fib4_entry->type == fen_info->type &&
4609 	    mlxsw_sp_nexthop4_group_fi(fib4_entry->common.nh_group) ==
4610 	    fen_info->fi)
4611 		return fib4_entry;
4612 
4613 	return NULL;
4614 }
4615 
4616 static const struct rhashtable_params mlxsw_sp_fib_ht_params = {
4617 	.key_offset = offsetof(struct mlxsw_sp_fib_node, key),
4618 	.head_offset = offsetof(struct mlxsw_sp_fib_node, ht_node),
4619 	.key_len = sizeof(struct mlxsw_sp_fib_key),
4620 	.automatic_shrinking = true,
4621 };
4622 
4623 static int mlxsw_sp_fib_node_insert(struct mlxsw_sp_fib *fib,
4624 				    struct mlxsw_sp_fib_node *fib_node)
4625 {
4626 	return rhashtable_insert_fast(&fib->ht, &fib_node->ht_node,
4627 				      mlxsw_sp_fib_ht_params);
4628 }
4629 
4630 static void mlxsw_sp_fib_node_remove(struct mlxsw_sp_fib *fib,
4631 				     struct mlxsw_sp_fib_node *fib_node)
4632 {
4633 	rhashtable_remove_fast(&fib->ht, &fib_node->ht_node,
4634 			       mlxsw_sp_fib_ht_params);
4635 }
4636 
4637 static struct mlxsw_sp_fib_node *
4638 mlxsw_sp_fib_node_lookup(struct mlxsw_sp_fib *fib, const void *addr,
4639 			 size_t addr_len, unsigned char prefix_len)
4640 {
4641 	struct mlxsw_sp_fib_key key;
4642 
4643 	memset(&key, 0, sizeof(key));
4644 	memcpy(key.addr, addr, addr_len);
4645 	key.prefix_len = prefix_len;
4646 	return rhashtable_lookup_fast(&fib->ht, &key, mlxsw_sp_fib_ht_params);
4647 }
4648 
4649 static struct mlxsw_sp_fib_node *
4650 mlxsw_sp_fib_node_create(struct mlxsw_sp_fib *fib, const void *addr,
4651 			 size_t addr_len, unsigned char prefix_len)
4652 {
4653 	struct mlxsw_sp_fib_node *fib_node;
4654 
4655 	fib_node = kzalloc(sizeof(*fib_node), GFP_KERNEL);
4656 	if (!fib_node)
4657 		return NULL;
4658 
4659 	list_add(&fib_node->list, &fib->node_list);
4660 	memcpy(fib_node->key.addr, addr, addr_len);
4661 	fib_node->key.prefix_len = prefix_len;
4662 
4663 	return fib_node;
4664 }
4665 
4666 static void mlxsw_sp_fib_node_destroy(struct mlxsw_sp_fib_node *fib_node)
4667 {
4668 	list_del(&fib_node->list);
4669 	kfree(fib_node);
4670 }
4671 
4672 static int mlxsw_sp_fib_lpm_tree_link(struct mlxsw_sp *mlxsw_sp,
4673 				      struct mlxsw_sp_fib_node *fib_node)
4674 {
4675 	struct mlxsw_sp_prefix_usage req_prefix_usage;
4676 	struct mlxsw_sp_fib *fib = fib_node->fib;
4677 	struct mlxsw_sp_lpm_tree *lpm_tree;
4678 	int err;
4679 
4680 	lpm_tree = mlxsw_sp->router->lpm.proto_trees[fib->proto];
4681 	if (lpm_tree->prefix_ref_count[fib_node->key.prefix_len] != 0)
4682 		goto out;
4683 
4684 	mlxsw_sp_prefix_usage_cpy(&req_prefix_usage, &lpm_tree->prefix_usage);
4685 	mlxsw_sp_prefix_usage_set(&req_prefix_usage, fib_node->key.prefix_len);
4686 	lpm_tree = mlxsw_sp_lpm_tree_get(mlxsw_sp, &req_prefix_usage,
4687 					 fib->proto);
4688 	if (IS_ERR(lpm_tree))
4689 		return PTR_ERR(lpm_tree);
4690 
4691 	err = mlxsw_sp_vrs_lpm_tree_replace(mlxsw_sp, fib, lpm_tree);
4692 	if (err)
4693 		goto err_lpm_tree_replace;
4694 
4695 out:
4696 	lpm_tree->prefix_ref_count[fib_node->key.prefix_len]++;
4697 	return 0;
4698 
4699 err_lpm_tree_replace:
4700 	mlxsw_sp_lpm_tree_put(mlxsw_sp, lpm_tree);
4701 	return err;
4702 }
4703 
4704 static void mlxsw_sp_fib_lpm_tree_unlink(struct mlxsw_sp *mlxsw_sp,
4705 					 struct mlxsw_sp_fib_node *fib_node)
4706 {
4707 	struct mlxsw_sp_lpm_tree *lpm_tree = fib_node->fib->lpm_tree;
4708 	struct mlxsw_sp_prefix_usage req_prefix_usage;
4709 	struct mlxsw_sp_fib *fib = fib_node->fib;
4710 	int err;
4711 
4712 	if (--lpm_tree->prefix_ref_count[fib_node->key.prefix_len] != 0)
4713 		return;
4714 	/* Try to construct a new LPM tree from the current prefix usage
4715 	 * minus the unused one. If we fail, continue using the old one.
4716 	 */
4717 	mlxsw_sp_prefix_usage_cpy(&req_prefix_usage, &lpm_tree->prefix_usage);
4718 	mlxsw_sp_prefix_usage_clear(&req_prefix_usage,
4719 				    fib_node->key.prefix_len);
4720 	lpm_tree = mlxsw_sp_lpm_tree_get(mlxsw_sp, &req_prefix_usage,
4721 					 fib->proto);
4722 	if (IS_ERR(lpm_tree))
4723 		return;
4724 
4725 	err = mlxsw_sp_vrs_lpm_tree_replace(mlxsw_sp, fib, lpm_tree);
4726 	if (err)
4727 		goto err_lpm_tree_replace;
4728 
4729 	return;
4730 
4731 err_lpm_tree_replace:
4732 	mlxsw_sp_lpm_tree_put(mlxsw_sp, lpm_tree);
4733 }
4734 
4735 static int mlxsw_sp_fib_node_init(struct mlxsw_sp *mlxsw_sp,
4736 				  struct mlxsw_sp_fib_node *fib_node,
4737 				  struct mlxsw_sp_fib *fib)
4738 {
4739 	int err;
4740 
4741 	err = mlxsw_sp_fib_node_insert(fib, fib_node);
4742 	if (err)
4743 		return err;
4744 	fib_node->fib = fib;
4745 
4746 	err = mlxsw_sp_fib_lpm_tree_link(mlxsw_sp, fib_node);
4747 	if (err)
4748 		goto err_fib_lpm_tree_link;
4749 
4750 	return 0;
4751 
4752 err_fib_lpm_tree_link:
4753 	fib_node->fib = NULL;
4754 	mlxsw_sp_fib_node_remove(fib, fib_node);
4755 	return err;
4756 }
4757 
4758 static void mlxsw_sp_fib_node_fini(struct mlxsw_sp *mlxsw_sp,
4759 				   struct mlxsw_sp_fib_node *fib_node)
4760 {
4761 	struct mlxsw_sp_fib *fib = fib_node->fib;
4762 
4763 	mlxsw_sp_fib_lpm_tree_unlink(mlxsw_sp, fib_node);
4764 	fib_node->fib = NULL;
4765 	mlxsw_sp_fib_node_remove(fib, fib_node);
4766 }
4767 
4768 static struct mlxsw_sp_fib_node *
4769 mlxsw_sp_fib_node_get(struct mlxsw_sp *mlxsw_sp, u32 tb_id, const void *addr,
4770 		      size_t addr_len, unsigned char prefix_len,
4771 		      enum mlxsw_sp_l3proto proto)
4772 {
4773 	struct mlxsw_sp_fib_node *fib_node;
4774 	struct mlxsw_sp_fib *fib;
4775 	struct mlxsw_sp_vr *vr;
4776 	int err;
4777 
4778 	vr = mlxsw_sp_vr_get(mlxsw_sp, tb_id, NULL);
4779 	if (IS_ERR(vr))
4780 		return ERR_CAST(vr);
4781 	fib = mlxsw_sp_vr_fib(vr, proto);
4782 
4783 	fib_node = mlxsw_sp_fib_node_lookup(fib, addr, addr_len, prefix_len);
4784 	if (fib_node)
4785 		return fib_node;
4786 
4787 	fib_node = mlxsw_sp_fib_node_create(fib, addr, addr_len, prefix_len);
4788 	if (!fib_node) {
4789 		err = -ENOMEM;
4790 		goto err_fib_node_create;
4791 	}
4792 
4793 	err = mlxsw_sp_fib_node_init(mlxsw_sp, fib_node, fib);
4794 	if (err)
4795 		goto err_fib_node_init;
4796 
4797 	return fib_node;
4798 
4799 err_fib_node_init:
4800 	mlxsw_sp_fib_node_destroy(fib_node);
4801 err_fib_node_create:
4802 	mlxsw_sp_vr_put(mlxsw_sp, vr);
4803 	return ERR_PTR(err);
4804 }
4805 
4806 static void mlxsw_sp_fib_node_put(struct mlxsw_sp *mlxsw_sp,
4807 				  struct mlxsw_sp_fib_node *fib_node)
4808 {
4809 	struct mlxsw_sp_vr *vr = fib_node->fib->vr;
4810 
4811 	if (fib_node->fib_entry)
4812 		return;
4813 	mlxsw_sp_fib_node_fini(mlxsw_sp, fib_node);
4814 	mlxsw_sp_fib_node_destroy(fib_node);
4815 	mlxsw_sp_vr_put(mlxsw_sp, vr);
4816 }
4817 
4818 static int mlxsw_sp_fib_node_entry_link(struct mlxsw_sp *mlxsw_sp,
4819 					struct mlxsw_sp_fib_entry *fib_entry)
4820 {
4821 	struct mlxsw_sp_fib_node *fib_node = fib_entry->fib_node;
4822 	int err;
4823 
4824 	fib_node->fib_entry = fib_entry;
4825 
4826 	err = mlxsw_sp_fib_entry_update(mlxsw_sp, fib_entry);
4827 	if (err)
4828 		goto err_fib_entry_update;
4829 
4830 	return 0;
4831 
4832 err_fib_entry_update:
4833 	fib_node->fib_entry = NULL;
4834 	return err;
4835 }
4836 
4837 static void
4838 mlxsw_sp_fib_node_entry_unlink(struct mlxsw_sp *mlxsw_sp,
4839 			       struct mlxsw_sp_fib_entry *fib_entry)
4840 {
4841 	struct mlxsw_sp_fib_node *fib_node = fib_entry->fib_node;
4842 
4843 	mlxsw_sp_fib_entry_del(mlxsw_sp, fib_entry);
4844 	fib_node->fib_entry = NULL;
4845 }
4846 
4847 static bool mlxsw_sp_fib4_allow_replace(struct mlxsw_sp_fib4_entry *fib4_entry)
4848 {
4849 	struct mlxsw_sp_fib_node *fib_node = fib4_entry->common.fib_node;
4850 	struct mlxsw_sp_fib4_entry *fib4_replaced;
4851 
4852 	if (!fib_node->fib_entry)
4853 		return true;
4854 
4855 	fib4_replaced = container_of(fib_node->fib_entry,
4856 				     struct mlxsw_sp_fib4_entry, common);
4857 	if (fib4_entry->tb_id == RT_TABLE_MAIN &&
4858 	    fib4_replaced->tb_id == RT_TABLE_LOCAL)
4859 		return false;
4860 
4861 	return true;
4862 }
4863 
4864 static int
4865 mlxsw_sp_router_fib4_replace(struct mlxsw_sp *mlxsw_sp,
4866 			     const struct fib_entry_notifier_info *fen_info)
4867 {
4868 	struct mlxsw_sp_fib4_entry *fib4_entry, *fib4_replaced;
4869 	struct mlxsw_sp_fib_entry *replaced;
4870 	struct mlxsw_sp_fib_node *fib_node;
4871 	int err;
4872 
4873 	if (mlxsw_sp->router->aborted)
4874 		return 0;
4875 
4876 	fib_node = mlxsw_sp_fib_node_get(mlxsw_sp, fen_info->tb_id,
4877 					 &fen_info->dst, sizeof(fen_info->dst),
4878 					 fen_info->dst_len,
4879 					 MLXSW_SP_L3_PROTO_IPV4);
4880 	if (IS_ERR(fib_node)) {
4881 		dev_warn(mlxsw_sp->bus_info->dev, "Failed to get FIB node\n");
4882 		return PTR_ERR(fib_node);
4883 	}
4884 
4885 	fib4_entry = mlxsw_sp_fib4_entry_create(mlxsw_sp, fib_node, fen_info);
4886 	if (IS_ERR(fib4_entry)) {
4887 		dev_warn(mlxsw_sp->bus_info->dev, "Failed to create FIB entry\n");
4888 		err = PTR_ERR(fib4_entry);
4889 		goto err_fib4_entry_create;
4890 	}
4891 
4892 	if (!mlxsw_sp_fib4_allow_replace(fib4_entry)) {
4893 		mlxsw_sp_fib4_entry_destroy(mlxsw_sp, fib4_entry);
4894 		mlxsw_sp_fib_node_put(mlxsw_sp, fib_node);
4895 		return 0;
4896 	}
4897 
4898 	replaced = fib_node->fib_entry;
4899 	err = mlxsw_sp_fib_node_entry_link(mlxsw_sp, &fib4_entry->common);
4900 	if (err) {
4901 		dev_warn(mlxsw_sp->bus_info->dev, "Failed to link FIB entry to node\n");
4902 		goto err_fib_node_entry_link;
4903 	}
4904 
4905 	/* Nothing to replace */
4906 	if (!replaced)
4907 		return 0;
4908 
4909 	mlxsw_sp_fib_entry_hw_flags_clear(mlxsw_sp, replaced);
4910 	fib4_replaced = container_of(replaced, struct mlxsw_sp_fib4_entry,
4911 				     common);
4912 	mlxsw_sp_fib4_entry_destroy(mlxsw_sp, fib4_replaced);
4913 
4914 	return 0;
4915 
4916 err_fib_node_entry_link:
4917 	fib_node->fib_entry = replaced;
4918 	mlxsw_sp_fib4_entry_destroy(mlxsw_sp, fib4_entry);
4919 err_fib4_entry_create:
4920 	mlxsw_sp_fib_node_put(mlxsw_sp, fib_node);
4921 	return err;
4922 }
4923 
4924 static void mlxsw_sp_router_fib4_del(struct mlxsw_sp *mlxsw_sp,
4925 				     struct fib_entry_notifier_info *fen_info)
4926 {
4927 	struct mlxsw_sp_fib4_entry *fib4_entry;
4928 	struct mlxsw_sp_fib_node *fib_node;
4929 
4930 	if (mlxsw_sp->router->aborted)
4931 		return;
4932 
4933 	fib4_entry = mlxsw_sp_fib4_entry_lookup(mlxsw_sp, fen_info);
4934 	if (!fib4_entry)
4935 		return;
4936 	fib_node = fib4_entry->common.fib_node;
4937 
4938 	mlxsw_sp_fib_node_entry_unlink(mlxsw_sp, &fib4_entry->common);
4939 	mlxsw_sp_fib4_entry_destroy(mlxsw_sp, fib4_entry);
4940 	mlxsw_sp_fib_node_put(mlxsw_sp, fib_node);
4941 }
4942 
4943 static bool mlxsw_sp_fib6_rt_should_ignore(const struct fib6_info *rt)
4944 {
4945 	/* Packets with link-local destination IP arriving to the router
4946 	 * are trapped to the CPU, so no need to program specific routes
4947 	 * for them.
4948 	 */
4949 	if (ipv6_addr_type(&rt->fib6_dst.addr) & IPV6_ADDR_LINKLOCAL)
4950 		return true;
4951 
4952 	/* Multicast routes aren't supported, so ignore them. Neighbour
4953 	 * Discovery packets are specifically trapped.
4954 	 */
4955 	if (ipv6_addr_type(&rt->fib6_dst.addr) & IPV6_ADDR_MULTICAST)
4956 		return true;
4957 
4958 	/* Cloned routes are irrelevant in the forwarding path. */
4959 	if (rt->fib6_flags & RTF_CACHE)
4960 		return true;
4961 
4962 	return false;
4963 }
4964 
4965 static struct mlxsw_sp_rt6 *mlxsw_sp_rt6_create(struct fib6_info *rt)
4966 {
4967 	struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
4968 
4969 	mlxsw_sp_rt6 = kzalloc(sizeof(*mlxsw_sp_rt6), GFP_KERNEL);
4970 	if (!mlxsw_sp_rt6)
4971 		return ERR_PTR(-ENOMEM);
4972 
4973 	/* In case of route replace, replaced route is deleted with
4974 	 * no notification. Take reference to prevent accessing freed
4975 	 * memory.
4976 	 */
4977 	mlxsw_sp_rt6->rt = rt;
4978 	fib6_info_hold(rt);
4979 
4980 	return mlxsw_sp_rt6;
4981 }
4982 
4983 #if IS_ENABLED(CONFIG_IPV6)
4984 static void mlxsw_sp_rt6_release(struct fib6_info *rt)
4985 {
4986 	fib6_info_release(rt);
4987 }
4988 #else
4989 static void mlxsw_sp_rt6_release(struct fib6_info *rt)
4990 {
4991 }
4992 #endif
4993 
4994 static void mlxsw_sp_rt6_destroy(struct mlxsw_sp_rt6 *mlxsw_sp_rt6)
4995 {
4996 	struct fib6_nh *fib6_nh = mlxsw_sp_rt6->rt->fib6_nh;
4997 
4998 	fib6_nh->fib_nh_flags &= ~RTNH_F_OFFLOAD;
4999 	mlxsw_sp_rt6_release(mlxsw_sp_rt6->rt);
5000 	kfree(mlxsw_sp_rt6);
5001 }
5002 
5003 static struct fib6_info *
5004 mlxsw_sp_fib6_entry_rt(const struct mlxsw_sp_fib6_entry *fib6_entry)
5005 {
5006 	return list_first_entry(&fib6_entry->rt6_list, struct mlxsw_sp_rt6,
5007 				list)->rt;
5008 }
5009 
5010 static struct mlxsw_sp_rt6 *
5011 mlxsw_sp_fib6_entry_rt_find(const struct mlxsw_sp_fib6_entry *fib6_entry,
5012 			    const struct fib6_info *rt)
5013 {
5014 	struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
5015 
5016 	list_for_each_entry(mlxsw_sp_rt6, &fib6_entry->rt6_list, list) {
5017 		if (mlxsw_sp_rt6->rt == rt)
5018 			return mlxsw_sp_rt6;
5019 	}
5020 
5021 	return NULL;
5022 }
5023 
5024 static bool mlxsw_sp_nexthop6_ipip_type(const struct mlxsw_sp *mlxsw_sp,
5025 					const struct fib6_info *rt,
5026 					enum mlxsw_sp_ipip_type *ret)
5027 {
5028 	return rt->fib6_nh->fib_nh_dev &&
5029 	       mlxsw_sp_netdev_ipip_type(mlxsw_sp, rt->fib6_nh->fib_nh_dev, ret);
5030 }
5031 
5032 static int mlxsw_sp_nexthop6_type_init(struct mlxsw_sp *mlxsw_sp,
5033 				       struct mlxsw_sp_nexthop_group *nh_grp,
5034 				       struct mlxsw_sp_nexthop *nh,
5035 				       const struct fib6_info *rt)
5036 {
5037 	const struct mlxsw_sp_ipip_ops *ipip_ops;
5038 	struct mlxsw_sp_ipip_entry *ipip_entry;
5039 	struct net_device *dev = rt->fib6_nh->fib_nh_dev;
5040 	struct mlxsw_sp_rif *rif;
5041 	int err;
5042 
5043 	ipip_entry = mlxsw_sp_ipip_entry_find_by_ol_dev(mlxsw_sp, dev);
5044 	if (ipip_entry) {
5045 		ipip_ops = mlxsw_sp->router->ipip_ops_arr[ipip_entry->ipipt];
5046 		if (ipip_ops->can_offload(mlxsw_sp, dev,
5047 					  MLXSW_SP_L3_PROTO_IPV6)) {
5048 			nh->type = MLXSW_SP_NEXTHOP_TYPE_IPIP;
5049 			mlxsw_sp_nexthop_ipip_init(mlxsw_sp, nh, ipip_entry);
5050 			return 0;
5051 		}
5052 	}
5053 
5054 	nh->type = MLXSW_SP_NEXTHOP_TYPE_ETH;
5055 	rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev);
5056 	if (!rif)
5057 		return 0;
5058 	mlxsw_sp_nexthop_rif_init(nh, rif);
5059 
5060 	err = mlxsw_sp_nexthop_neigh_init(mlxsw_sp, nh);
5061 	if (err)
5062 		goto err_nexthop_neigh_init;
5063 
5064 	return 0;
5065 
5066 err_nexthop_neigh_init:
5067 	mlxsw_sp_nexthop_rif_fini(nh);
5068 	return err;
5069 }
5070 
5071 static void mlxsw_sp_nexthop6_type_fini(struct mlxsw_sp *mlxsw_sp,
5072 					struct mlxsw_sp_nexthop *nh)
5073 {
5074 	mlxsw_sp_nexthop_type_fini(mlxsw_sp, nh);
5075 }
5076 
5077 static int mlxsw_sp_nexthop6_init(struct mlxsw_sp *mlxsw_sp,
5078 				  struct mlxsw_sp_nexthop_group *nh_grp,
5079 				  struct mlxsw_sp_nexthop *nh,
5080 				  const struct fib6_info *rt)
5081 {
5082 	struct net_device *dev = rt->fib6_nh->fib_nh_dev;
5083 
5084 	nh->nh_grp = nh_grp;
5085 	nh->nh_weight = rt->fib6_nh->fib_nh_weight;
5086 	memcpy(&nh->gw_addr, &rt->fib6_nh->fib_nh_gw6, sizeof(nh->gw_addr));
5087 	mlxsw_sp_nexthop_counter_alloc(mlxsw_sp, nh);
5088 
5089 	list_add_tail(&nh->router_list_node, &mlxsw_sp->router->nexthop_list);
5090 
5091 	if (!dev)
5092 		return 0;
5093 	nh->ifindex = dev->ifindex;
5094 
5095 	return mlxsw_sp_nexthop6_type_init(mlxsw_sp, nh_grp, nh, rt);
5096 }
5097 
5098 static void mlxsw_sp_nexthop6_fini(struct mlxsw_sp *mlxsw_sp,
5099 				   struct mlxsw_sp_nexthop *nh)
5100 {
5101 	mlxsw_sp_nexthop6_type_fini(mlxsw_sp, nh);
5102 	list_del(&nh->router_list_node);
5103 	mlxsw_sp_nexthop_counter_free(mlxsw_sp, nh);
5104 }
5105 
5106 static bool mlxsw_sp_rt6_is_gateway(const struct mlxsw_sp *mlxsw_sp,
5107 				    const struct fib6_info *rt)
5108 {
5109 	return rt->fib6_nh->fib_nh_gw_family ||
5110 	       mlxsw_sp_nexthop6_ipip_type(mlxsw_sp, rt, NULL);
5111 }
5112 
5113 static struct mlxsw_sp_nexthop_group *
5114 mlxsw_sp_nexthop6_group_create(struct mlxsw_sp *mlxsw_sp,
5115 			       struct mlxsw_sp_fib6_entry *fib6_entry)
5116 {
5117 	struct mlxsw_sp_nexthop_group *nh_grp;
5118 	struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
5119 	struct mlxsw_sp_nexthop *nh;
5120 	int i = 0;
5121 	int err;
5122 
5123 	nh_grp = kzalloc(struct_size(nh_grp, nexthops, fib6_entry->nrt6),
5124 			 GFP_KERNEL);
5125 	if (!nh_grp)
5126 		return ERR_PTR(-ENOMEM);
5127 	INIT_LIST_HEAD(&nh_grp->fib_list);
5128 #if IS_ENABLED(CONFIG_IPV6)
5129 	nh_grp->neigh_tbl = &nd_tbl;
5130 #endif
5131 	mlxsw_sp_rt6 = list_first_entry(&fib6_entry->rt6_list,
5132 					struct mlxsw_sp_rt6, list);
5133 	nh_grp->gateway = mlxsw_sp_rt6_is_gateway(mlxsw_sp, mlxsw_sp_rt6->rt);
5134 	nh_grp->count = fib6_entry->nrt6;
5135 	for (i = 0; i < nh_grp->count; i++) {
5136 		struct fib6_info *rt = mlxsw_sp_rt6->rt;
5137 
5138 		nh = &nh_grp->nexthops[i];
5139 		err = mlxsw_sp_nexthop6_init(mlxsw_sp, nh_grp, nh, rt);
5140 		if (err)
5141 			goto err_nexthop6_init;
5142 		mlxsw_sp_rt6 = list_next_entry(mlxsw_sp_rt6, list);
5143 	}
5144 
5145 	err = mlxsw_sp_nexthop_group_insert(mlxsw_sp, nh_grp);
5146 	if (err)
5147 		goto err_nexthop_group_insert;
5148 
5149 	mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh_grp);
5150 	return nh_grp;
5151 
5152 err_nexthop_group_insert:
5153 err_nexthop6_init:
5154 	for (i--; i >= 0; i--) {
5155 		nh = &nh_grp->nexthops[i];
5156 		mlxsw_sp_nexthop6_fini(mlxsw_sp, nh);
5157 	}
5158 	kfree(nh_grp);
5159 	return ERR_PTR(err);
5160 }
5161 
5162 static void
5163 mlxsw_sp_nexthop6_group_destroy(struct mlxsw_sp *mlxsw_sp,
5164 				struct mlxsw_sp_nexthop_group *nh_grp)
5165 {
5166 	struct mlxsw_sp_nexthop *nh;
5167 	int i = nh_grp->count;
5168 
5169 	mlxsw_sp_nexthop_group_remove(mlxsw_sp, nh_grp);
5170 	for (i--; i >= 0; i--) {
5171 		nh = &nh_grp->nexthops[i];
5172 		mlxsw_sp_nexthop6_fini(mlxsw_sp, nh);
5173 	}
5174 	mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh_grp);
5175 	WARN_ON(nh_grp->adj_index_valid);
5176 	kfree(nh_grp);
5177 }
5178 
5179 static int mlxsw_sp_nexthop6_group_get(struct mlxsw_sp *mlxsw_sp,
5180 				       struct mlxsw_sp_fib6_entry *fib6_entry)
5181 {
5182 	struct mlxsw_sp_nexthop_group *nh_grp;
5183 
5184 	nh_grp = mlxsw_sp_nexthop6_group_lookup(mlxsw_sp, fib6_entry);
5185 	if (!nh_grp) {
5186 		nh_grp = mlxsw_sp_nexthop6_group_create(mlxsw_sp, fib6_entry);
5187 		if (IS_ERR(nh_grp))
5188 			return PTR_ERR(nh_grp);
5189 	}
5190 
5191 	list_add_tail(&fib6_entry->common.nexthop_group_node,
5192 		      &nh_grp->fib_list);
5193 	fib6_entry->common.nh_group = nh_grp;
5194 
5195 	/* The route and the nexthop are described by the same struct, so we
5196 	 * need to the update the nexthop offload indication for the new route.
5197 	 */
5198 	__mlxsw_sp_nexthop6_group_offload_refresh(nh_grp, fib6_entry);
5199 
5200 	return 0;
5201 }
5202 
5203 static void mlxsw_sp_nexthop6_group_put(struct mlxsw_sp *mlxsw_sp,
5204 					struct mlxsw_sp_fib_entry *fib_entry)
5205 {
5206 	struct mlxsw_sp_nexthop_group *nh_grp = fib_entry->nh_group;
5207 
5208 	list_del(&fib_entry->nexthop_group_node);
5209 	if (!list_empty(&nh_grp->fib_list))
5210 		return;
5211 	mlxsw_sp_nexthop6_group_destroy(mlxsw_sp, nh_grp);
5212 }
5213 
5214 static int
5215 mlxsw_sp_nexthop6_group_update(struct mlxsw_sp *mlxsw_sp,
5216 			       struct mlxsw_sp_fib6_entry *fib6_entry)
5217 {
5218 	struct mlxsw_sp_nexthop_group *old_nh_grp = fib6_entry->common.nh_group;
5219 	int err;
5220 
5221 	fib6_entry->common.nh_group = NULL;
5222 	list_del(&fib6_entry->common.nexthop_group_node);
5223 
5224 	err = mlxsw_sp_nexthop6_group_get(mlxsw_sp, fib6_entry);
5225 	if (err)
5226 		goto err_nexthop6_group_get;
5227 
5228 	/* In case this entry is offloaded, then the adjacency index
5229 	 * currently associated with it in the device's table is that
5230 	 * of the old group. Start using the new one instead.
5231 	 */
5232 	err = mlxsw_sp_fib_entry_update(mlxsw_sp, &fib6_entry->common);
5233 	if (err)
5234 		goto err_fib_entry_update;
5235 
5236 	if (list_empty(&old_nh_grp->fib_list))
5237 		mlxsw_sp_nexthop6_group_destroy(mlxsw_sp, old_nh_grp);
5238 
5239 	return 0;
5240 
5241 err_fib_entry_update:
5242 	mlxsw_sp_nexthop6_group_put(mlxsw_sp, &fib6_entry->common);
5243 err_nexthop6_group_get:
5244 	list_add_tail(&fib6_entry->common.nexthop_group_node,
5245 		      &old_nh_grp->fib_list);
5246 	fib6_entry->common.nh_group = old_nh_grp;
5247 	return err;
5248 }
5249 
5250 static int
5251 mlxsw_sp_fib6_entry_nexthop_add(struct mlxsw_sp *mlxsw_sp,
5252 				struct mlxsw_sp_fib6_entry *fib6_entry,
5253 				struct fib6_info **rt_arr, unsigned int nrt6)
5254 {
5255 	struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
5256 	int err, i;
5257 
5258 	for (i = 0; i < nrt6; i++) {
5259 		mlxsw_sp_rt6 = mlxsw_sp_rt6_create(rt_arr[i]);
5260 		if (IS_ERR(mlxsw_sp_rt6)) {
5261 			err = PTR_ERR(mlxsw_sp_rt6);
5262 			goto err_rt6_create;
5263 		}
5264 
5265 		list_add_tail(&mlxsw_sp_rt6->list, &fib6_entry->rt6_list);
5266 		fib6_entry->nrt6++;
5267 	}
5268 
5269 	err = mlxsw_sp_nexthop6_group_update(mlxsw_sp, fib6_entry);
5270 	if (err)
5271 		goto err_nexthop6_group_update;
5272 
5273 	return 0;
5274 
5275 err_nexthop6_group_update:
5276 	i = nrt6;
5277 err_rt6_create:
5278 	for (i--; i >= 0; i--) {
5279 		fib6_entry->nrt6--;
5280 		mlxsw_sp_rt6 = list_last_entry(&fib6_entry->rt6_list,
5281 					       struct mlxsw_sp_rt6, list);
5282 		list_del(&mlxsw_sp_rt6->list);
5283 		mlxsw_sp_rt6_destroy(mlxsw_sp_rt6);
5284 	}
5285 	return err;
5286 }
5287 
5288 static void
5289 mlxsw_sp_fib6_entry_nexthop_del(struct mlxsw_sp *mlxsw_sp,
5290 				struct mlxsw_sp_fib6_entry *fib6_entry,
5291 				struct fib6_info **rt_arr, unsigned int nrt6)
5292 {
5293 	struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
5294 	int i;
5295 
5296 	for (i = 0; i < nrt6; i++) {
5297 		mlxsw_sp_rt6 = mlxsw_sp_fib6_entry_rt_find(fib6_entry,
5298 							   rt_arr[i]);
5299 		if (WARN_ON_ONCE(!mlxsw_sp_rt6))
5300 			continue;
5301 
5302 		fib6_entry->nrt6--;
5303 		list_del(&mlxsw_sp_rt6->list);
5304 		mlxsw_sp_rt6_destroy(mlxsw_sp_rt6);
5305 	}
5306 
5307 	mlxsw_sp_nexthop6_group_update(mlxsw_sp, fib6_entry);
5308 }
5309 
5310 static void mlxsw_sp_fib6_entry_type_set(struct mlxsw_sp *mlxsw_sp,
5311 					 struct mlxsw_sp_fib_entry *fib_entry,
5312 					 const struct fib6_info *rt)
5313 {
5314 	/* Packets hitting RTF_REJECT routes need to be discarded by the
5315 	 * stack. We can rely on their destination device not having a
5316 	 * RIF (it's the loopback device) and can thus use action type
5317 	 * local, which will cause them to be trapped with a lower
5318 	 * priority than packets that need to be locally received.
5319 	 */
5320 	if (rt->fib6_flags & (RTF_LOCAL | RTF_ANYCAST))
5321 		fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_TRAP;
5322 	else if (rt->fib6_type == RTN_BLACKHOLE)
5323 		fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_BLACKHOLE;
5324 	else if (rt->fib6_flags & RTF_REJECT)
5325 		fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_UNREACHABLE;
5326 	else if (mlxsw_sp_rt6_is_gateway(mlxsw_sp, rt))
5327 		fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_REMOTE;
5328 	else
5329 		fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_LOCAL;
5330 }
5331 
5332 static void
5333 mlxsw_sp_fib6_entry_rt_destroy_all(struct mlxsw_sp_fib6_entry *fib6_entry)
5334 {
5335 	struct mlxsw_sp_rt6 *mlxsw_sp_rt6, *tmp;
5336 
5337 	list_for_each_entry_safe(mlxsw_sp_rt6, tmp, &fib6_entry->rt6_list,
5338 				 list) {
5339 		fib6_entry->nrt6--;
5340 		list_del(&mlxsw_sp_rt6->list);
5341 		mlxsw_sp_rt6_destroy(mlxsw_sp_rt6);
5342 	}
5343 }
5344 
5345 static struct mlxsw_sp_fib6_entry *
5346 mlxsw_sp_fib6_entry_create(struct mlxsw_sp *mlxsw_sp,
5347 			   struct mlxsw_sp_fib_node *fib_node,
5348 			   struct fib6_info **rt_arr, unsigned int nrt6)
5349 {
5350 	struct mlxsw_sp_fib6_entry *fib6_entry;
5351 	struct mlxsw_sp_fib_entry *fib_entry;
5352 	struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
5353 	int err, i;
5354 
5355 	fib6_entry = kzalloc(sizeof(*fib6_entry), GFP_KERNEL);
5356 	if (!fib6_entry)
5357 		return ERR_PTR(-ENOMEM);
5358 	fib_entry = &fib6_entry->common;
5359 
5360 	INIT_LIST_HEAD(&fib6_entry->rt6_list);
5361 
5362 	for (i = 0; i < nrt6; i++) {
5363 		mlxsw_sp_rt6 = mlxsw_sp_rt6_create(rt_arr[i]);
5364 		if (IS_ERR(mlxsw_sp_rt6)) {
5365 			err = PTR_ERR(mlxsw_sp_rt6);
5366 			goto err_rt6_create;
5367 		}
5368 		list_add_tail(&mlxsw_sp_rt6->list, &fib6_entry->rt6_list);
5369 		fib6_entry->nrt6++;
5370 	}
5371 
5372 	mlxsw_sp_fib6_entry_type_set(mlxsw_sp, fib_entry, rt_arr[0]);
5373 
5374 	err = mlxsw_sp_nexthop6_group_get(mlxsw_sp, fib6_entry);
5375 	if (err)
5376 		goto err_nexthop6_group_get;
5377 
5378 	fib_entry->fib_node = fib_node;
5379 
5380 	return fib6_entry;
5381 
5382 err_nexthop6_group_get:
5383 	i = nrt6;
5384 err_rt6_create:
5385 	for (i--; i >= 0; i--) {
5386 		fib6_entry->nrt6--;
5387 		mlxsw_sp_rt6 = list_last_entry(&fib6_entry->rt6_list,
5388 					       struct mlxsw_sp_rt6, list);
5389 		list_del(&mlxsw_sp_rt6->list);
5390 		mlxsw_sp_rt6_destroy(mlxsw_sp_rt6);
5391 	}
5392 	kfree(fib6_entry);
5393 	return ERR_PTR(err);
5394 }
5395 
5396 static void mlxsw_sp_fib6_entry_destroy(struct mlxsw_sp *mlxsw_sp,
5397 					struct mlxsw_sp_fib6_entry *fib6_entry)
5398 {
5399 	mlxsw_sp_nexthop6_group_put(mlxsw_sp, &fib6_entry->common);
5400 	mlxsw_sp_fib6_entry_rt_destroy_all(fib6_entry);
5401 	WARN_ON(fib6_entry->nrt6);
5402 	kfree(fib6_entry);
5403 }
5404 
5405 static struct mlxsw_sp_fib6_entry *
5406 mlxsw_sp_fib6_entry_lookup(struct mlxsw_sp *mlxsw_sp,
5407 			   const struct fib6_info *rt)
5408 {
5409 	struct mlxsw_sp_fib6_entry *fib6_entry;
5410 	struct mlxsw_sp_fib_node *fib_node;
5411 	struct mlxsw_sp_fib *fib;
5412 	struct fib6_info *cmp_rt;
5413 	struct mlxsw_sp_vr *vr;
5414 
5415 	vr = mlxsw_sp_vr_find(mlxsw_sp, rt->fib6_table->tb6_id);
5416 	if (!vr)
5417 		return NULL;
5418 	fib = mlxsw_sp_vr_fib(vr, MLXSW_SP_L3_PROTO_IPV6);
5419 
5420 	fib_node = mlxsw_sp_fib_node_lookup(fib, &rt->fib6_dst.addr,
5421 					    sizeof(rt->fib6_dst.addr),
5422 					    rt->fib6_dst.plen);
5423 	if (!fib_node)
5424 		return NULL;
5425 
5426 	fib6_entry = container_of(fib_node->fib_entry,
5427 				  struct mlxsw_sp_fib6_entry, common);
5428 	cmp_rt = mlxsw_sp_fib6_entry_rt(fib6_entry);
5429 	if (rt->fib6_table->tb6_id == cmp_rt->fib6_table->tb6_id &&
5430 	    rt->fib6_metric == cmp_rt->fib6_metric &&
5431 	    mlxsw_sp_fib6_entry_rt_find(fib6_entry, rt))
5432 		return fib6_entry;
5433 
5434 	return NULL;
5435 }
5436 
5437 static bool mlxsw_sp_fib6_allow_replace(struct mlxsw_sp_fib6_entry *fib6_entry)
5438 {
5439 	struct mlxsw_sp_fib_node *fib_node = fib6_entry->common.fib_node;
5440 	struct mlxsw_sp_fib6_entry *fib6_replaced;
5441 	struct fib6_info *rt, *rt_replaced;
5442 
5443 	if (!fib_node->fib_entry)
5444 		return true;
5445 
5446 	fib6_replaced = container_of(fib_node->fib_entry,
5447 				     struct mlxsw_sp_fib6_entry,
5448 				     common);
5449 	rt = mlxsw_sp_fib6_entry_rt(fib6_entry);
5450 	rt_replaced = mlxsw_sp_fib6_entry_rt(fib6_replaced);
5451 	if (rt->fib6_table->tb6_id == RT_TABLE_MAIN &&
5452 	    rt_replaced->fib6_table->tb6_id == RT_TABLE_LOCAL)
5453 		return false;
5454 
5455 	return true;
5456 }
5457 
5458 static int mlxsw_sp_router_fib6_replace(struct mlxsw_sp *mlxsw_sp,
5459 					struct fib6_info **rt_arr,
5460 					unsigned int nrt6)
5461 {
5462 	struct mlxsw_sp_fib6_entry *fib6_entry, *fib6_replaced;
5463 	struct mlxsw_sp_fib_entry *replaced;
5464 	struct mlxsw_sp_fib_node *fib_node;
5465 	struct fib6_info *rt = rt_arr[0];
5466 	int err;
5467 
5468 	if (mlxsw_sp->router->aborted)
5469 		return 0;
5470 
5471 	if (rt->fib6_src.plen)
5472 		return -EINVAL;
5473 
5474 	if (mlxsw_sp_fib6_rt_should_ignore(rt))
5475 		return 0;
5476 
5477 	fib_node = mlxsw_sp_fib_node_get(mlxsw_sp, rt->fib6_table->tb6_id,
5478 					 &rt->fib6_dst.addr,
5479 					 sizeof(rt->fib6_dst.addr),
5480 					 rt->fib6_dst.plen,
5481 					 MLXSW_SP_L3_PROTO_IPV6);
5482 	if (IS_ERR(fib_node))
5483 		return PTR_ERR(fib_node);
5484 
5485 	fib6_entry = mlxsw_sp_fib6_entry_create(mlxsw_sp, fib_node, rt_arr,
5486 						nrt6);
5487 	if (IS_ERR(fib6_entry)) {
5488 		err = PTR_ERR(fib6_entry);
5489 		goto err_fib6_entry_create;
5490 	}
5491 
5492 	if (!mlxsw_sp_fib6_allow_replace(fib6_entry)) {
5493 		mlxsw_sp_fib6_entry_destroy(mlxsw_sp, fib6_entry);
5494 		mlxsw_sp_fib_node_put(mlxsw_sp, fib_node);
5495 		return 0;
5496 	}
5497 
5498 	replaced = fib_node->fib_entry;
5499 	err = mlxsw_sp_fib_node_entry_link(mlxsw_sp, &fib6_entry->common);
5500 	if (err)
5501 		goto err_fib_node_entry_link;
5502 
5503 	/* Nothing to replace */
5504 	if (!replaced)
5505 		return 0;
5506 
5507 	mlxsw_sp_fib_entry_hw_flags_clear(mlxsw_sp, replaced);
5508 	fib6_replaced = container_of(replaced, struct mlxsw_sp_fib6_entry,
5509 				     common);
5510 	mlxsw_sp_fib6_entry_destroy(mlxsw_sp, fib6_replaced);
5511 
5512 	return 0;
5513 
5514 err_fib_node_entry_link:
5515 	fib_node->fib_entry = replaced;
5516 	mlxsw_sp_fib6_entry_destroy(mlxsw_sp, fib6_entry);
5517 err_fib6_entry_create:
5518 	mlxsw_sp_fib_node_put(mlxsw_sp, fib_node);
5519 	return err;
5520 }
5521 
5522 static int mlxsw_sp_router_fib6_append(struct mlxsw_sp *mlxsw_sp,
5523 				       struct fib6_info **rt_arr,
5524 				       unsigned int nrt6)
5525 {
5526 	struct mlxsw_sp_fib6_entry *fib6_entry;
5527 	struct mlxsw_sp_fib_node *fib_node;
5528 	struct fib6_info *rt = rt_arr[0];
5529 	int err;
5530 
5531 	if (mlxsw_sp->router->aborted)
5532 		return 0;
5533 
5534 	if (rt->fib6_src.plen)
5535 		return -EINVAL;
5536 
5537 	if (mlxsw_sp_fib6_rt_should_ignore(rt))
5538 		return 0;
5539 
5540 	fib_node = mlxsw_sp_fib_node_get(mlxsw_sp, rt->fib6_table->tb6_id,
5541 					 &rt->fib6_dst.addr,
5542 					 sizeof(rt->fib6_dst.addr),
5543 					 rt->fib6_dst.plen,
5544 					 MLXSW_SP_L3_PROTO_IPV6);
5545 	if (IS_ERR(fib_node))
5546 		return PTR_ERR(fib_node);
5547 
5548 	if (WARN_ON_ONCE(!fib_node->fib_entry)) {
5549 		mlxsw_sp_fib_node_put(mlxsw_sp, fib_node);
5550 		return -EINVAL;
5551 	}
5552 
5553 	fib6_entry = container_of(fib_node->fib_entry,
5554 				  struct mlxsw_sp_fib6_entry, common);
5555 	err = mlxsw_sp_fib6_entry_nexthop_add(mlxsw_sp, fib6_entry, rt_arr,
5556 					      nrt6);
5557 	if (err)
5558 		goto err_fib6_entry_nexthop_add;
5559 
5560 	return 0;
5561 
5562 err_fib6_entry_nexthop_add:
5563 	mlxsw_sp_fib_node_put(mlxsw_sp, fib_node);
5564 	return err;
5565 }
5566 
5567 static void mlxsw_sp_router_fib6_del(struct mlxsw_sp *mlxsw_sp,
5568 				     struct fib6_info **rt_arr,
5569 				     unsigned int nrt6)
5570 {
5571 	struct mlxsw_sp_fib6_entry *fib6_entry;
5572 	struct mlxsw_sp_fib_node *fib_node;
5573 	struct fib6_info *rt = rt_arr[0];
5574 
5575 	if (mlxsw_sp->router->aborted)
5576 		return;
5577 
5578 	if (mlxsw_sp_fib6_rt_should_ignore(rt))
5579 		return;
5580 
5581 	/* Multipath routes are first added to the FIB trie and only then
5582 	 * notified. If we vetoed the addition, we will get a delete
5583 	 * notification for a route we do not have. Therefore, do not warn if
5584 	 * route was not found.
5585 	 */
5586 	fib6_entry = mlxsw_sp_fib6_entry_lookup(mlxsw_sp, rt);
5587 	if (!fib6_entry)
5588 		return;
5589 
5590 	/* If not all the nexthops are deleted, then only reduce the nexthop
5591 	 * group.
5592 	 */
5593 	if (nrt6 != fib6_entry->nrt6) {
5594 		mlxsw_sp_fib6_entry_nexthop_del(mlxsw_sp, fib6_entry, rt_arr,
5595 						nrt6);
5596 		return;
5597 	}
5598 
5599 	fib_node = fib6_entry->common.fib_node;
5600 
5601 	mlxsw_sp_fib_node_entry_unlink(mlxsw_sp, &fib6_entry->common);
5602 	mlxsw_sp_fib6_entry_destroy(mlxsw_sp, fib6_entry);
5603 	mlxsw_sp_fib_node_put(mlxsw_sp, fib_node);
5604 }
5605 
5606 static int __mlxsw_sp_router_set_abort_trap(struct mlxsw_sp *mlxsw_sp,
5607 					    enum mlxsw_reg_ralxx_protocol proto,
5608 					    u8 tree_id)
5609 {
5610 	char ralta_pl[MLXSW_REG_RALTA_LEN];
5611 	char ralst_pl[MLXSW_REG_RALST_LEN];
5612 	int i, err;
5613 
5614 	mlxsw_reg_ralta_pack(ralta_pl, true, proto, tree_id);
5615 	err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralta), ralta_pl);
5616 	if (err)
5617 		return err;
5618 
5619 	mlxsw_reg_ralst_pack(ralst_pl, 0xff, tree_id);
5620 	err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralst), ralst_pl);
5621 	if (err)
5622 		return err;
5623 
5624 	for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_VRS); i++) {
5625 		struct mlxsw_sp_vr *vr = &mlxsw_sp->router->vrs[i];
5626 		char raltb_pl[MLXSW_REG_RALTB_LEN];
5627 		char ralue_pl[MLXSW_REG_RALUE_LEN];
5628 
5629 		mlxsw_reg_raltb_pack(raltb_pl, vr->id, proto, tree_id);
5630 		err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(raltb),
5631 				      raltb_pl);
5632 		if (err)
5633 			return err;
5634 
5635 		mlxsw_reg_ralue_pack(ralue_pl, proto,
5636 				     MLXSW_REG_RALUE_OP_WRITE_WRITE, vr->id, 0);
5637 		mlxsw_reg_ralue_act_ip2me_pack(ralue_pl);
5638 		err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralue),
5639 				      ralue_pl);
5640 		if (err)
5641 			return err;
5642 	}
5643 
5644 	return 0;
5645 }
5646 
5647 static struct mlxsw_sp_mr_table *
5648 mlxsw_sp_router_fibmr_family_to_table(struct mlxsw_sp_vr *vr, int family)
5649 {
5650 	if (family == RTNL_FAMILY_IPMR)
5651 		return vr->mr_table[MLXSW_SP_L3_PROTO_IPV4];
5652 	else
5653 		return vr->mr_table[MLXSW_SP_L3_PROTO_IPV6];
5654 }
5655 
5656 static int mlxsw_sp_router_fibmr_add(struct mlxsw_sp *mlxsw_sp,
5657 				     struct mfc_entry_notifier_info *men_info,
5658 				     bool replace)
5659 {
5660 	struct mlxsw_sp_mr_table *mrt;
5661 	struct mlxsw_sp_vr *vr;
5662 
5663 	if (mlxsw_sp->router->aborted)
5664 		return 0;
5665 
5666 	vr = mlxsw_sp_vr_get(mlxsw_sp, men_info->tb_id, NULL);
5667 	if (IS_ERR(vr))
5668 		return PTR_ERR(vr);
5669 
5670 	mrt = mlxsw_sp_router_fibmr_family_to_table(vr, men_info->info.family);
5671 	return mlxsw_sp_mr_route_add(mrt, men_info->mfc, replace);
5672 }
5673 
5674 static void mlxsw_sp_router_fibmr_del(struct mlxsw_sp *mlxsw_sp,
5675 				      struct mfc_entry_notifier_info *men_info)
5676 {
5677 	struct mlxsw_sp_mr_table *mrt;
5678 	struct mlxsw_sp_vr *vr;
5679 
5680 	if (mlxsw_sp->router->aborted)
5681 		return;
5682 
5683 	vr = mlxsw_sp_vr_find(mlxsw_sp, men_info->tb_id);
5684 	if (WARN_ON(!vr))
5685 		return;
5686 
5687 	mrt = mlxsw_sp_router_fibmr_family_to_table(vr, men_info->info.family);
5688 	mlxsw_sp_mr_route_del(mrt, men_info->mfc);
5689 	mlxsw_sp_vr_put(mlxsw_sp, vr);
5690 }
5691 
5692 static int
5693 mlxsw_sp_router_fibmr_vif_add(struct mlxsw_sp *mlxsw_sp,
5694 			      struct vif_entry_notifier_info *ven_info)
5695 {
5696 	struct mlxsw_sp_mr_table *mrt;
5697 	struct mlxsw_sp_rif *rif;
5698 	struct mlxsw_sp_vr *vr;
5699 
5700 	if (mlxsw_sp->router->aborted)
5701 		return 0;
5702 
5703 	vr = mlxsw_sp_vr_get(mlxsw_sp, ven_info->tb_id, NULL);
5704 	if (IS_ERR(vr))
5705 		return PTR_ERR(vr);
5706 
5707 	mrt = mlxsw_sp_router_fibmr_family_to_table(vr, ven_info->info.family);
5708 	rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, ven_info->dev);
5709 	return mlxsw_sp_mr_vif_add(mrt, ven_info->dev,
5710 				   ven_info->vif_index,
5711 				   ven_info->vif_flags, rif);
5712 }
5713 
5714 static void
5715 mlxsw_sp_router_fibmr_vif_del(struct mlxsw_sp *mlxsw_sp,
5716 			      struct vif_entry_notifier_info *ven_info)
5717 {
5718 	struct mlxsw_sp_mr_table *mrt;
5719 	struct mlxsw_sp_vr *vr;
5720 
5721 	if (mlxsw_sp->router->aborted)
5722 		return;
5723 
5724 	vr = mlxsw_sp_vr_find(mlxsw_sp, ven_info->tb_id);
5725 	if (WARN_ON(!vr))
5726 		return;
5727 
5728 	mrt = mlxsw_sp_router_fibmr_family_to_table(vr, ven_info->info.family);
5729 	mlxsw_sp_mr_vif_del(mrt, ven_info->vif_index);
5730 	mlxsw_sp_vr_put(mlxsw_sp, vr);
5731 }
5732 
5733 static int mlxsw_sp_router_set_abort_trap(struct mlxsw_sp *mlxsw_sp)
5734 {
5735 	enum mlxsw_reg_ralxx_protocol proto = MLXSW_REG_RALXX_PROTOCOL_IPV4;
5736 	int err;
5737 
5738 	err = __mlxsw_sp_router_set_abort_trap(mlxsw_sp, proto,
5739 					       MLXSW_SP_LPM_TREE_MIN);
5740 	if (err)
5741 		return err;
5742 
5743 	/* The multicast router code does not need an abort trap as by default,
5744 	 * packets that don't match any routes are trapped to the CPU.
5745 	 */
5746 
5747 	proto = MLXSW_REG_RALXX_PROTOCOL_IPV6;
5748 	return __mlxsw_sp_router_set_abort_trap(mlxsw_sp, proto,
5749 						MLXSW_SP_LPM_TREE_MIN + 1);
5750 }
5751 
5752 static void mlxsw_sp_fib4_node_flush(struct mlxsw_sp *mlxsw_sp,
5753 				     struct mlxsw_sp_fib_node *fib_node)
5754 {
5755 	struct mlxsw_sp_fib4_entry *fib4_entry;
5756 
5757 	fib4_entry = container_of(fib_node->fib_entry,
5758 				  struct mlxsw_sp_fib4_entry, common);
5759 	mlxsw_sp_fib_node_entry_unlink(mlxsw_sp, fib_node->fib_entry);
5760 	mlxsw_sp_fib4_entry_destroy(mlxsw_sp, fib4_entry);
5761 	mlxsw_sp_fib_node_put(mlxsw_sp, fib_node);
5762 }
5763 
5764 static void mlxsw_sp_fib6_node_flush(struct mlxsw_sp *mlxsw_sp,
5765 				     struct mlxsw_sp_fib_node *fib_node)
5766 {
5767 	struct mlxsw_sp_fib6_entry *fib6_entry;
5768 
5769 	fib6_entry = container_of(fib_node->fib_entry,
5770 				  struct mlxsw_sp_fib6_entry, common);
5771 	mlxsw_sp_fib_node_entry_unlink(mlxsw_sp, fib_node->fib_entry);
5772 	mlxsw_sp_fib6_entry_destroy(mlxsw_sp, fib6_entry);
5773 	mlxsw_sp_fib_node_put(mlxsw_sp, fib_node);
5774 }
5775 
5776 static void mlxsw_sp_fib_node_flush(struct mlxsw_sp *mlxsw_sp,
5777 				    struct mlxsw_sp_fib_node *fib_node)
5778 {
5779 	switch (fib_node->fib->proto) {
5780 	case MLXSW_SP_L3_PROTO_IPV4:
5781 		mlxsw_sp_fib4_node_flush(mlxsw_sp, fib_node);
5782 		break;
5783 	case MLXSW_SP_L3_PROTO_IPV6:
5784 		mlxsw_sp_fib6_node_flush(mlxsw_sp, fib_node);
5785 		break;
5786 	}
5787 }
5788 
5789 static void mlxsw_sp_vr_fib_flush(struct mlxsw_sp *mlxsw_sp,
5790 				  struct mlxsw_sp_vr *vr,
5791 				  enum mlxsw_sp_l3proto proto)
5792 {
5793 	struct mlxsw_sp_fib *fib = mlxsw_sp_vr_fib(vr, proto);
5794 	struct mlxsw_sp_fib_node *fib_node, *tmp;
5795 
5796 	list_for_each_entry_safe(fib_node, tmp, &fib->node_list, list) {
5797 		bool do_break = &tmp->list == &fib->node_list;
5798 
5799 		mlxsw_sp_fib_node_flush(mlxsw_sp, fib_node);
5800 		if (do_break)
5801 			break;
5802 	}
5803 }
5804 
5805 static void mlxsw_sp_router_fib_flush(struct mlxsw_sp *mlxsw_sp)
5806 {
5807 	int i, j;
5808 
5809 	for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_VRS); i++) {
5810 		struct mlxsw_sp_vr *vr = &mlxsw_sp->router->vrs[i];
5811 
5812 		if (!mlxsw_sp_vr_is_used(vr))
5813 			continue;
5814 
5815 		for (j = 0; j < MLXSW_SP_L3_PROTO_MAX; j++)
5816 			mlxsw_sp_mr_table_flush(vr->mr_table[j]);
5817 		mlxsw_sp_vr_fib_flush(mlxsw_sp, vr, MLXSW_SP_L3_PROTO_IPV4);
5818 
5819 		/* If virtual router was only used for IPv4, then it's no
5820 		 * longer used.
5821 		 */
5822 		if (!mlxsw_sp_vr_is_used(vr))
5823 			continue;
5824 		mlxsw_sp_vr_fib_flush(mlxsw_sp, vr, MLXSW_SP_L3_PROTO_IPV6);
5825 	}
5826 
5827 	/* After flushing all the routes, it is not possible anyone is still
5828 	 * using the adjacency index that is discarding packets, so free it in
5829 	 * case it was allocated.
5830 	 */
5831 	if (!mlxsw_sp->router->adj_discard_index_valid)
5832 		return;
5833 	mlxsw_sp_kvdl_free(mlxsw_sp, MLXSW_SP_KVDL_ENTRY_TYPE_ADJ, 1,
5834 			   mlxsw_sp->router->adj_discard_index);
5835 	mlxsw_sp->router->adj_discard_index_valid = false;
5836 }
5837 
5838 static void mlxsw_sp_router_fib_abort(struct mlxsw_sp *mlxsw_sp)
5839 {
5840 	int err;
5841 
5842 	if (mlxsw_sp->router->aborted)
5843 		return;
5844 	dev_warn(mlxsw_sp->bus_info->dev, "FIB abort triggered. Note that FIB entries are no longer being offloaded to this device.\n");
5845 	mlxsw_sp_router_fib_flush(mlxsw_sp);
5846 	mlxsw_sp->router->aborted = true;
5847 	err = mlxsw_sp_router_set_abort_trap(mlxsw_sp);
5848 	if (err)
5849 		dev_warn(mlxsw_sp->bus_info->dev, "Failed to set abort trap.\n");
5850 }
5851 
5852 struct mlxsw_sp_fib6_event_work {
5853 	struct fib6_info **rt_arr;
5854 	unsigned int nrt6;
5855 };
5856 
5857 struct mlxsw_sp_fib_event_work {
5858 	struct work_struct work;
5859 	union {
5860 		struct mlxsw_sp_fib6_event_work fib6_work;
5861 		struct fib_entry_notifier_info fen_info;
5862 		struct fib_rule_notifier_info fr_info;
5863 		struct fib_nh_notifier_info fnh_info;
5864 		struct mfc_entry_notifier_info men_info;
5865 		struct vif_entry_notifier_info ven_info;
5866 	};
5867 	struct mlxsw_sp *mlxsw_sp;
5868 	unsigned long event;
5869 };
5870 
5871 static int
5872 mlxsw_sp_router_fib6_work_init(struct mlxsw_sp_fib6_event_work *fib6_work,
5873 			       struct fib6_entry_notifier_info *fen6_info)
5874 {
5875 	struct fib6_info *rt = fen6_info->rt;
5876 	struct fib6_info **rt_arr;
5877 	struct fib6_info *iter;
5878 	unsigned int nrt6;
5879 	int i = 0;
5880 
5881 	nrt6 = fen6_info->nsiblings + 1;
5882 
5883 	rt_arr = kcalloc(nrt6, sizeof(struct fib6_info *), GFP_ATOMIC);
5884 	if (!rt_arr)
5885 		return -ENOMEM;
5886 
5887 	fib6_work->rt_arr = rt_arr;
5888 	fib6_work->nrt6 = nrt6;
5889 
5890 	rt_arr[0] = rt;
5891 	fib6_info_hold(rt);
5892 
5893 	if (!fen6_info->nsiblings)
5894 		return 0;
5895 
5896 	list_for_each_entry(iter, &rt->fib6_siblings, fib6_siblings) {
5897 		if (i == fen6_info->nsiblings)
5898 			break;
5899 
5900 		rt_arr[i + 1] = iter;
5901 		fib6_info_hold(iter);
5902 		i++;
5903 	}
5904 	WARN_ON_ONCE(i != fen6_info->nsiblings);
5905 
5906 	return 0;
5907 }
5908 
5909 static void
5910 mlxsw_sp_router_fib6_work_fini(struct mlxsw_sp_fib6_event_work *fib6_work)
5911 {
5912 	int i;
5913 
5914 	for (i = 0; i < fib6_work->nrt6; i++)
5915 		mlxsw_sp_rt6_release(fib6_work->rt_arr[i]);
5916 	kfree(fib6_work->rt_arr);
5917 }
5918 
5919 static void mlxsw_sp_router_fib4_event_work(struct work_struct *work)
5920 {
5921 	struct mlxsw_sp_fib_event_work *fib_work =
5922 		container_of(work, struct mlxsw_sp_fib_event_work, work);
5923 	struct mlxsw_sp *mlxsw_sp = fib_work->mlxsw_sp;
5924 	int err;
5925 
5926 	/* Protect internal structures from changes */
5927 	rtnl_lock();
5928 	mlxsw_sp_span_respin(mlxsw_sp);
5929 
5930 	switch (fib_work->event) {
5931 	case FIB_EVENT_ENTRY_REPLACE:
5932 		err = mlxsw_sp_router_fib4_replace(mlxsw_sp,
5933 						   &fib_work->fen_info);
5934 		if (err)
5935 			mlxsw_sp_router_fib_abort(mlxsw_sp);
5936 		fib_info_put(fib_work->fen_info.fi);
5937 		break;
5938 	case FIB_EVENT_ENTRY_DEL:
5939 		mlxsw_sp_router_fib4_del(mlxsw_sp, &fib_work->fen_info);
5940 		fib_info_put(fib_work->fen_info.fi);
5941 		break;
5942 	case FIB_EVENT_NH_ADD: /* fall through */
5943 	case FIB_EVENT_NH_DEL:
5944 		mlxsw_sp_nexthop4_event(mlxsw_sp, fib_work->event,
5945 					fib_work->fnh_info.fib_nh);
5946 		fib_info_put(fib_work->fnh_info.fib_nh->nh_parent);
5947 		break;
5948 	}
5949 	rtnl_unlock();
5950 	kfree(fib_work);
5951 }
5952 
5953 static void mlxsw_sp_router_fib6_event_work(struct work_struct *work)
5954 {
5955 	struct mlxsw_sp_fib_event_work *fib_work =
5956 		container_of(work, struct mlxsw_sp_fib_event_work, work);
5957 	struct mlxsw_sp *mlxsw_sp = fib_work->mlxsw_sp;
5958 	int err;
5959 
5960 	rtnl_lock();
5961 	mlxsw_sp_span_respin(mlxsw_sp);
5962 
5963 	switch (fib_work->event) {
5964 	case FIB_EVENT_ENTRY_REPLACE:
5965 		err = mlxsw_sp_router_fib6_replace(mlxsw_sp,
5966 						   fib_work->fib6_work.rt_arr,
5967 						   fib_work->fib6_work.nrt6);
5968 		if (err)
5969 			mlxsw_sp_router_fib_abort(mlxsw_sp);
5970 		mlxsw_sp_router_fib6_work_fini(&fib_work->fib6_work);
5971 		break;
5972 	case FIB_EVENT_ENTRY_APPEND:
5973 		err = mlxsw_sp_router_fib6_append(mlxsw_sp,
5974 						  fib_work->fib6_work.rt_arr,
5975 						  fib_work->fib6_work.nrt6);
5976 		if (err)
5977 			mlxsw_sp_router_fib_abort(mlxsw_sp);
5978 		mlxsw_sp_router_fib6_work_fini(&fib_work->fib6_work);
5979 		break;
5980 	case FIB_EVENT_ENTRY_DEL:
5981 		mlxsw_sp_router_fib6_del(mlxsw_sp,
5982 					 fib_work->fib6_work.rt_arr,
5983 					 fib_work->fib6_work.nrt6);
5984 		mlxsw_sp_router_fib6_work_fini(&fib_work->fib6_work);
5985 		break;
5986 	}
5987 	rtnl_unlock();
5988 	kfree(fib_work);
5989 }
5990 
5991 static void mlxsw_sp_router_fibmr_event_work(struct work_struct *work)
5992 {
5993 	struct mlxsw_sp_fib_event_work *fib_work =
5994 		container_of(work, struct mlxsw_sp_fib_event_work, work);
5995 	struct mlxsw_sp *mlxsw_sp = fib_work->mlxsw_sp;
5996 	bool replace;
5997 	int err;
5998 
5999 	rtnl_lock();
6000 	switch (fib_work->event) {
6001 	case FIB_EVENT_ENTRY_REPLACE: /* fall through */
6002 	case FIB_EVENT_ENTRY_ADD:
6003 		replace = fib_work->event == FIB_EVENT_ENTRY_REPLACE;
6004 
6005 		err = mlxsw_sp_router_fibmr_add(mlxsw_sp, &fib_work->men_info,
6006 						replace);
6007 		if (err)
6008 			mlxsw_sp_router_fib_abort(mlxsw_sp);
6009 		mr_cache_put(fib_work->men_info.mfc);
6010 		break;
6011 	case FIB_EVENT_ENTRY_DEL:
6012 		mlxsw_sp_router_fibmr_del(mlxsw_sp, &fib_work->men_info);
6013 		mr_cache_put(fib_work->men_info.mfc);
6014 		break;
6015 	case FIB_EVENT_VIF_ADD:
6016 		err = mlxsw_sp_router_fibmr_vif_add(mlxsw_sp,
6017 						    &fib_work->ven_info);
6018 		if (err)
6019 			mlxsw_sp_router_fib_abort(mlxsw_sp);
6020 		dev_put(fib_work->ven_info.dev);
6021 		break;
6022 	case FIB_EVENT_VIF_DEL:
6023 		mlxsw_sp_router_fibmr_vif_del(mlxsw_sp,
6024 					      &fib_work->ven_info);
6025 		dev_put(fib_work->ven_info.dev);
6026 		break;
6027 	}
6028 	rtnl_unlock();
6029 	kfree(fib_work);
6030 }
6031 
6032 static void mlxsw_sp_router_fib4_event(struct mlxsw_sp_fib_event_work *fib_work,
6033 				       struct fib_notifier_info *info)
6034 {
6035 	struct fib_entry_notifier_info *fen_info;
6036 	struct fib_nh_notifier_info *fnh_info;
6037 
6038 	switch (fib_work->event) {
6039 	case FIB_EVENT_ENTRY_REPLACE: /* fall through */
6040 	case FIB_EVENT_ENTRY_DEL:
6041 		fen_info = container_of(info, struct fib_entry_notifier_info,
6042 					info);
6043 		fib_work->fen_info = *fen_info;
6044 		/* Take reference on fib_info to prevent it from being
6045 		 * freed while work is queued. Release it afterwards.
6046 		 */
6047 		fib_info_hold(fib_work->fen_info.fi);
6048 		break;
6049 	case FIB_EVENT_NH_ADD: /* fall through */
6050 	case FIB_EVENT_NH_DEL:
6051 		fnh_info = container_of(info, struct fib_nh_notifier_info,
6052 					info);
6053 		fib_work->fnh_info = *fnh_info;
6054 		fib_info_hold(fib_work->fnh_info.fib_nh->nh_parent);
6055 		break;
6056 	}
6057 }
6058 
6059 static int mlxsw_sp_router_fib6_event(struct mlxsw_sp_fib_event_work *fib_work,
6060 				      struct fib_notifier_info *info)
6061 {
6062 	struct fib6_entry_notifier_info *fen6_info;
6063 	int err;
6064 
6065 	switch (fib_work->event) {
6066 	case FIB_EVENT_ENTRY_REPLACE: /* fall through */
6067 	case FIB_EVENT_ENTRY_APPEND: /* fall through */
6068 	case FIB_EVENT_ENTRY_DEL:
6069 		fen6_info = container_of(info, struct fib6_entry_notifier_info,
6070 					 info);
6071 		err = mlxsw_sp_router_fib6_work_init(&fib_work->fib6_work,
6072 						     fen6_info);
6073 		if (err)
6074 			return err;
6075 		break;
6076 	}
6077 
6078 	return 0;
6079 }
6080 
6081 static void
6082 mlxsw_sp_router_fibmr_event(struct mlxsw_sp_fib_event_work *fib_work,
6083 			    struct fib_notifier_info *info)
6084 {
6085 	switch (fib_work->event) {
6086 	case FIB_EVENT_ENTRY_REPLACE: /* fall through */
6087 	case FIB_EVENT_ENTRY_ADD: /* fall through */
6088 	case FIB_EVENT_ENTRY_DEL:
6089 		memcpy(&fib_work->men_info, info, sizeof(fib_work->men_info));
6090 		mr_cache_hold(fib_work->men_info.mfc);
6091 		break;
6092 	case FIB_EVENT_VIF_ADD: /* fall through */
6093 	case FIB_EVENT_VIF_DEL:
6094 		memcpy(&fib_work->ven_info, info, sizeof(fib_work->ven_info));
6095 		dev_hold(fib_work->ven_info.dev);
6096 		break;
6097 	}
6098 }
6099 
6100 static int mlxsw_sp_router_fib_rule_event(unsigned long event,
6101 					  struct fib_notifier_info *info,
6102 					  struct mlxsw_sp *mlxsw_sp)
6103 {
6104 	struct netlink_ext_ack *extack = info->extack;
6105 	struct fib_rule_notifier_info *fr_info;
6106 	struct fib_rule *rule;
6107 	int err = 0;
6108 
6109 	/* nothing to do at the moment */
6110 	if (event == FIB_EVENT_RULE_DEL)
6111 		return 0;
6112 
6113 	if (mlxsw_sp->router->aborted)
6114 		return 0;
6115 
6116 	fr_info = container_of(info, struct fib_rule_notifier_info, info);
6117 	rule = fr_info->rule;
6118 
6119 	/* Rule only affects locally generated traffic */
6120 	if (rule->iifindex == mlxsw_sp_net(mlxsw_sp)->loopback_dev->ifindex)
6121 		return 0;
6122 
6123 	switch (info->family) {
6124 	case AF_INET:
6125 		if (!fib4_rule_default(rule) && !rule->l3mdev)
6126 			err = -EOPNOTSUPP;
6127 		break;
6128 	case AF_INET6:
6129 		if (!fib6_rule_default(rule) && !rule->l3mdev)
6130 			err = -EOPNOTSUPP;
6131 		break;
6132 	case RTNL_FAMILY_IPMR:
6133 		if (!ipmr_rule_default(rule) && !rule->l3mdev)
6134 			err = -EOPNOTSUPP;
6135 		break;
6136 	case RTNL_FAMILY_IP6MR:
6137 		if (!ip6mr_rule_default(rule) && !rule->l3mdev)
6138 			err = -EOPNOTSUPP;
6139 		break;
6140 	}
6141 
6142 	if (err < 0)
6143 		NL_SET_ERR_MSG_MOD(extack, "FIB rules not supported");
6144 
6145 	return err;
6146 }
6147 
6148 /* Called with rcu_read_lock() */
6149 static int mlxsw_sp_router_fib_event(struct notifier_block *nb,
6150 				     unsigned long event, void *ptr)
6151 {
6152 	struct mlxsw_sp_fib_event_work *fib_work;
6153 	struct fib_notifier_info *info = ptr;
6154 	struct mlxsw_sp_router *router;
6155 	int err;
6156 
6157 	if ((info->family != AF_INET && info->family != AF_INET6 &&
6158 	     info->family != RTNL_FAMILY_IPMR &&
6159 	     info->family != RTNL_FAMILY_IP6MR))
6160 		return NOTIFY_DONE;
6161 
6162 	router = container_of(nb, struct mlxsw_sp_router, fib_nb);
6163 
6164 	switch (event) {
6165 	case FIB_EVENT_RULE_ADD: /* fall through */
6166 	case FIB_EVENT_RULE_DEL:
6167 		err = mlxsw_sp_router_fib_rule_event(event, info,
6168 						     router->mlxsw_sp);
6169 		return notifier_from_errno(err);
6170 	case FIB_EVENT_ENTRY_ADD: /* fall through */
6171 	case FIB_EVENT_ENTRY_REPLACE: /* fall through */
6172 	case FIB_EVENT_ENTRY_APPEND:
6173 		if (router->aborted) {
6174 			NL_SET_ERR_MSG_MOD(info->extack, "FIB offload was aborted. Not configuring route");
6175 			return notifier_from_errno(-EINVAL);
6176 		}
6177 		if (info->family == AF_INET) {
6178 			struct fib_entry_notifier_info *fen_info = ptr;
6179 
6180 			if (fen_info->fi->fib_nh_is_v6) {
6181 				NL_SET_ERR_MSG_MOD(info->extack, "IPv6 gateway with IPv4 route is not supported");
6182 				return notifier_from_errno(-EINVAL);
6183 			}
6184 			if (fen_info->fi->nh) {
6185 				NL_SET_ERR_MSG_MOD(info->extack, "IPv4 route with nexthop objects is not supported");
6186 				return notifier_from_errno(-EINVAL);
6187 			}
6188 		} else if (info->family == AF_INET6) {
6189 			struct fib6_entry_notifier_info *fen6_info;
6190 
6191 			fen6_info = container_of(info,
6192 						 struct fib6_entry_notifier_info,
6193 						 info);
6194 			if (fen6_info->rt->nh) {
6195 				NL_SET_ERR_MSG_MOD(info->extack, "IPv6 route with nexthop objects is not supported");
6196 				return notifier_from_errno(-EINVAL);
6197 			}
6198 		}
6199 		break;
6200 	}
6201 
6202 	fib_work = kzalloc(sizeof(*fib_work), GFP_ATOMIC);
6203 	if (WARN_ON(!fib_work))
6204 		return NOTIFY_BAD;
6205 
6206 	fib_work->mlxsw_sp = router->mlxsw_sp;
6207 	fib_work->event = event;
6208 
6209 	switch (info->family) {
6210 	case AF_INET:
6211 		INIT_WORK(&fib_work->work, mlxsw_sp_router_fib4_event_work);
6212 		mlxsw_sp_router_fib4_event(fib_work, info);
6213 		break;
6214 	case AF_INET6:
6215 		INIT_WORK(&fib_work->work, mlxsw_sp_router_fib6_event_work);
6216 		err = mlxsw_sp_router_fib6_event(fib_work, info);
6217 		if (err)
6218 			goto err_fib_event;
6219 		break;
6220 	case RTNL_FAMILY_IP6MR:
6221 	case RTNL_FAMILY_IPMR:
6222 		INIT_WORK(&fib_work->work, mlxsw_sp_router_fibmr_event_work);
6223 		mlxsw_sp_router_fibmr_event(fib_work, info);
6224 		break;
6225 	}
6226 
6227 	mlxsw_core_schedule_work(&fib_work->work);
6228 
6229 	return NOTIFY_DONE;
6230 
6231 err_fib_event:
6232 	kfree(fib_work);
6233 	return NOTIFY_BAD;
6234 }
6235 
6236 struct mlxsw_sp_rif *
6237 mlxsw_sp_rif_find_by_dev(const struct mlxsw_sp *mlxsw_sp,
6238 			 const struct net_device *dev)
6239 {
6240 	int i;
6241 
6242 	for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_RIFS); i++)
6243 		if (mlxsw_sp->router->rifs[i] &&
6244 		    mlxsw_sp->router->rifs[i]->dev == dev)
6245 			return mlxsw_sp->router->rifs[i];
6246 
6247 	return NULL;
6248 }
6249 
6250 static int mlxsw_sp_router_rif_disable(struct mlxsw_sp *mlxsw_sp, u16 rif)
6251 {
6252 	char ritr_pl[MLXSW_REG_RITR_LEN];
6253 	int err;
6254 
6255 	mlxsw_reg_ritr_rif_pack(ritr_pl, rif);
6256 	err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
6257 	if (err)
6258 		return err;
6259 
6260 	mlxsw_reg_ritr_enable_set(ritr_pl, false);
6261 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
6262 }
6263 
6264 static void mlxsw_sp_router_rif_gone_sync(struct mlxsw_sp *mlxsw_sp,
6265 					  struct mlxsw_sp_rif *rif)
6266 {
6267 	mlxsw_sp_router_rif_disable(mlxsw_sp, rif->rif_index);
6268 	mlxsw_sp_nexthop_rif_gone_sync(mlxsw_sp, rif);
6269 	mlxsw_sp_neigh_rif_gone_sync(mlxsw_sp, rif);
6270 }
6271 
6272 static bool
6273 mlxsw_sp_rif_should_config(struct mlxsw_sp_rif *rif, struct net_device *dev,
6274 			   unsigned long event)
6275 {
6276 	struct inet6_dev *inet6_dev;
6277 	bool addr_list_empty = true;
6278 	struct in_device *idev;
6279 
6280 	switch (event) {
6281 	case NETDEV_UP:
6282 		return rif == NULL;
6283 	case NETDEV_DOWN:
6284 		idev = __in_dev_get_rtnl(dev);
6285 		if (idev && idev->ifa_list)
6286 			addr_list_empty = false;
6287 
6288 		inet6_dev = __in6_dev_get(dev);
6289 		if (addr_list_empty && inet6_dev &&
6290 		    !list_empty(&inet6_dev->addr_list))
6291 			addr_list_empty = false;
6292 
6293 		/* macvlans do not have a RIF, but rather piggy back on the
6294 		 * RIF of their lower device.
6295 		 */
6296 		if (netif_is_macvlan(dev) && addr_list_empty)
6297 			return true;
6298 
6299 		if (rif && addr_list_empty &&
6300 		    !netif_is_l3_slave(rif->dev))
6301 			return true;
6302 		/* It is possible we already removed the RIF ourselves
6303 		 * if it was assigned to a netdev that is now a bridge
6304 		 * or LAG slave.
6305 		 */
6306 		return false;
6307 	}
6308 
6309 	return false;
6310 }
6311 
6312 static enum mlxsw_sp_rif_type
6313 mlxsw_sp_dev_rif_type(const struct mlxsw_sp *mlxsw_sp,
6314 		      const struct net_device *dev)
6315 {
6316 	enum mlxsw_sp_fid_type type;
6317 
6318 	if (mlxsw_sp_netdev_ipip_type(mlxsw_sp, dev, NULL))
6319 		return MLXSW_SP_RIF_TYPE_IPIP_LB;
6320 
6321 	/* Otherwise RIF type is derived from the type of the underlying FID. */
6322 	if (is_vlan_dev(dev) && netif_is_bridge_master(vlan_dev_real_dev(dev)))
6323 		type = MLXSW_SP_FID_TYPE_8021Q;
6324 	else if (netif_is_bridge_master(dev) && br_vlan_enabled(dev))
6325 		type = MLXSW_SP_FID_TYPE_8021Q;
6326 	else if (netif_is_bridge_master(dev))
6327 		type = MLXSW_SP_FID_TYPE_8021D;
6328 	else
6329 		type = MLXSW_SP_FID_TYPE_RFID;
6330 
6331 	return mlxsw_sp_fid_type_rif_type(mlxsw_sp, type);
6332 }
6333 
6334 static int mlxsw_sp_rif_index_alloc(struct mlxsw_sp *mlxsw_sp, u16 *p_rif_index)
6335 {
6336 	int i;
6337 
6338 	for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_RIFS); i++) {
6339 		if (!mlxsw_sp->router->rifs[i]) {
6340 			*p_rif_index = i;
6341 			return 0;
6342 		}
6343 	}
6344 
6345 	return -ENOBUFS;
6346 }
6347 
6348 static struct mlxsw_sp_rif *mlxsw_sp_rif_alloc(size_t rif_size, u16 rif_index,
6349 					       u16 vr_id,
6350 					       struct net_device *l3_dev)
6351 {
6352 	struct mlxsw_sp_rif *rif;
6353 
6354 	rif = kzalloc(rif_size, GFP_KERNEL);
6355 	if (!rif)
6356 		return NULL;
6357 
6358 	INIT_LIST_HEAD(&rif->nexthop_list);
6359 	INIT_LIST_HEAD(&rif->neigh_list);
6360 	if (l3_dev) {
6361 		ether_addr_copy(rif->addr, l3_dev->dev_addr);
6362 		rif->mtu = l3_dev->mtu;
6363 		rif->dev = l3_dev;
6364 	}
6365 	rif->vr_id = vr_id;
6366 	rif->rif_index = rif_index;
6367 
6368 	return rif;
6369 }
6370 
6371 struct mlxsw_sp_rif *mlxsw_sp_rif_by_index(const struct mlxsw_sp *mlxsw_sp,
6372 					   u16 rif_index)
6373 {
6374 	return mlxsw_sp->router->rifs[rif_index];
6375 }
6376 
6377 u16 mlxsw_sp_rif_index(const struct mlxsw_sp_rif *rif)
6378 {
6379 	return rif->rif_index;
6380 }
6381 
6382 u16 mlxsw_sp_ipip_lb_rif_index(const struct mlxsw_sp_rif_ipip_lb *lb_rif)
6383 {
6384 	return lb_rif->common.rif_index;
6385 }
6386 
6387 u16 mlxsw_sp_ipip_lb_ul_vr_id(const struct mlxsw_sp_rif_ipip_lb *lb_rif)
6388 {
6389 	u32 ul_tb_id = mlxsw_sp_ipip_dev_ul_tb_id(lb_rif->common.dev);
6390 	struct mlxsw_sp_vr *ul_vr;
6391 
6392 	ul_vr = mlxsw_sp_vr_get(lb_rif->common.mlxsw_sp, ul_tb_id, NULL);
6393 	if (WARN_ON(IS_ERR(ul_vr)))
6394 		return 0;
6395 
6396 	return ul_vr->id;
6397 }
6398 
6399 u16 mlxsw_sp_ipip_lb_ul_rif_id(const struct mlxsw_sp_rif_ipip_lb *lb_rif)
6400 {
6401 	return lb_rif->ul_rif_id;
6402 }
6403 
6404 int mlxsw_sp_rif_dev_ifindex(const struct mlxsw_sp_rif *rif)
6405 {
6406 	return rif->dev->ifindex;
6407 }
6408 
6409 const struct net_device *mlxsw_sp_rif_dev(const struct mlxsw_sp_rif *rif)
6410 {
6411 	return rif->dev;
6412 }
6413 
6414 struct mlxsw_sp_fid *mlxsw_sp_rif_fid(const struct mlxsw_sp_rif *rif)
6415 {
6416 	return rif->fid;
6417 }
6418 
6419 static struct mlxsw_sp_rif *
6420 mlxsw_sp_rif_create(struct mlxsw_sp *mlxsw_sp,
6421 		    const struct mlxsw_sp_rif_params *params,
6422 		    struct netlink_ext_ack *extack)
6423 {
6424 	u32 tb_id = l3mdev_fib_table(params->dev);
6425 	const struct mlxsw_sp_rif_ops *ops;
6426 	struct mlxsw_sp_fid *fid = NULL;
6427 	enum mlxsw_sp_rif_type type;
6428 	struct mlxsw_sp_rif *rif;
6429 	struct mlxsw_sp_vr *vr;
6430 	u16 rif_index;
6431 	int i, err;
6432 
6433 	type = mlxsw_sp_dev_rif_type(mlxsw_sp, params->dev);
6434 	ops = mlxsw_sp->rif_ops_arr[type];
6435 
6436 	vr = mlxsw_sp_vr_get(mlxsw_sp, tb_id ? : RT_TABLE_MAIN, extack);
6437 	if (IS_ERR(vr))
6438 		return ERR_CAST(vr);
6439 	vr->rif_count++;
6440 
6441 	err = mlxsw_sp_rif_index_alloc(mlxsw_sp, &rif_index);
6442 	if (err) {
6443 		NL_SET_ERR_MSG_MOD(extack, "Exceeded number of supported router interfaces");
6444 		goto err_rif_index_alloc;
6445 	}
6446 
6447 	rif = mlxsw_sp_rif_alloc(ops->rif_size, rif_index, vr->id, params->dev);
6448 	if (!rif) {
6449 		err = -ENOMEM;
6450 		goto err_rif_alloc;
6451 	}
6452 	dev_hold(rif->dev);
6453 	mlxsw_sp->router->rifs[rif_index] = rif;
6454 	rif->mlxsw_sp = mlxsw_sp;
6455 	rif->ops = ops;
6456 
6457 	if (ops->fid_get) {
6458 		fid = ops->fid_get(rif, extack);
6459 		if (IS_ERR(fid)) {
6460 			err = PTR_ERR(fid);
6461 			goto err_fid_get;
6462 		}
6463 		rif->fid = fid;
6464 	}
6465 
6466 	if (ops->setup)
6467 		ops->setup(rif, params);
6468 
6469 	err = ops->configure(rif);
6470 	if (err)
6471 		goto err_configure;
6472 
6473 	for (i = 0; i < MLXSW_SP_L3_PROTO_MAX; i++) {
6474 		err = mlxsw_sp_mr_rif_add(vr->mr_table[i], rif);
6475 		if (err)
6476 			goto err_mr_rif_add;
6477 	}
6478 
6479 	mlxsw_sp_rif_counters_alloc(rif);
6480 
6481 	return rif;
6482 
6483 err_mr_rif_add:
6484 	for (i--; i >= 0; i--)
6485 		mlxsw_sp_mr_rif_del(vr->mr_table[i], rif);
6486 	ops->deconfigure(rif);
6487 err_configure:
6488 	if (fid)
6489 		mlxsw_sp_fid_put(fid);
6490 err_fid_get:
6491 	mlxsw_sp->router->rifs[rif_index] = NULL;
6492 	dev_put(rif->dev);
6493 	kfree(rif);
6494 err_rif_alloc:
6495 err_rif_index_alloc:
6496 	vr->rif_count--;
6497 	mlxsw_sp_vr_put(mlxsw_sp, vr);
6498 	return ERR_PTR(err);
6499 }
6500 
6501 static void mlxsw_sp_rif_destroy(struct mlxsw_sp_rif *rif)
6502 {
6503 	const struct mlxsw_sp_rif_ops *ops = rif->ops;
6504 	struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
6505 	struct mlxsw_sp_fid *fid = rif->fid;
6506 	struct mlxsw_sp_vr *vr;
6507 	int i;
6508 
6509 	mlxsw_sp_router_rif_gone_sync(mlxsw_sp, rif);
6510 	vr = &mlxsw_sp->router->vrs[rif->vr_id];
6511 
6512 	mlxsw_sp_rif_counters_free(rif);
6513 	for (i = 0; i < MLXSW_SP_L3_PROTO_MAX; i++)
6514 		mlxsw_sp_mr_rif_del(vr->mr_table[i], rif);
6515 	ops->deconfigure(rif);
6516 	if (fid)
6517 		/* Loopback RIFs are not associated with a FID. */
6518 		mlxsw_sp_fid_put(fid);
6519 	mlxsw_sp->router->rifs[rif->rif_index] = NULL;
6520 	dev_put(rif->dev);
6521 	kfree(rif);
6522 	vr->rif_count--;
6523 	mlxsw_sp_vr_put(mlxsw_sp, vr);
6524 }
6525 
6526 void mlxsw_sp_rif_destroy_by_dev(struct mlxsw_sp *mlxsw_sp,
6527 				 struct net_device *dev)
6528 {
6529 	struct mlxsw_sp_rif *rif;
6530 
6531 	rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev);
6532 	if (!rif)
6533 		return;
6534 	mlxsw_sp_rif_destroy(rif);
6535 }
6536 
6537 static void
6538 mlxsw_sp_rif_subport_params_init(struct mlxsw_sp_rif_params *params,
6539 				 struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan)
6540 {
6541 	struct mlxsw_sp_port *mlxsw_sp_port = mlxsw_sp_port_vlan->mlxsw_sp_port;
6542 
6543 	params->vid = mlxsw_sp_port_vlan->vid;
6544 	params->lag = mlxsw_sp_port->lagged;
6545 	if (params->lag)
6546 		params->lag_id = mlxsw_sp_port->lag_id;
6547 	else
6548 		params->system_port = mlxsw_sp_port->local_port;
6549 }
6550 
6551 static struct mlxsw_sp_rif_subport *
6552 mlxsw_sp_rif_subport_rif(const struct mlxsw_sp_rif *rif)
6553 {
6554 	return container_of(rif, struct mlxsw_sp_rif_subport, common);
6555 }
6556 
6557 static struct mlxsw_sp_rif *
6558 mlxsw_sp_rif_subport_get(struct mlxsw_sp *mlxsw_sp,
6559 			 const struct mlxsw_sp_rif_params *params,
6560 			 struct netlink_ext_ack *extack)
6561 {
6562 	struct mlxsw_sp_rif_subport *rif_subport;
6563 	struct mlxsw_sp_rif *rif;
6564 
6565 	rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, params->dev);
6566 	if (!rif)
6567 		return mlxsw_sp_rif_create(mlxsw_sp, params, extack);
6568 
6569 	rif_subport = mlxsw_sp_rif_subport_rif(rif);
6570 	refcount_inc(&rif_subport->ref_count);
6571 	return rif;
6572 }
6573 
6574 static void mlxsw_sp_rif_subport_put(struct mlxsw_sp_rif *rif)
6575 {
6576 	struct mlxsw_sp_rif_subport *rif_subport;
6577 
6578 	rif_subport = mlxsw_sp_rif_subport_rif(rif);
6579 	if (!refcount_dec_and_test(&rif_subport->ref_count))
6580 		return;
6581 
6582 	mlxsw_sp_rif_destroy(rif);
6583 }
6584 
6585 static int
6586 mlxsw_sp_port_vlan_router_join(struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan,
6587 			       struct net_device *l3_dev,
6588 			       struct netlink_ext_ack *extack)
6589 {
6590 	struct mlxsw_sp_port *mlxsw_sp_port = mlxsw_sp_port_vlan->mlxsw_sp_port;
6591 	struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
6592 	struct mlxsw_sp_rif_params params = {
6593 		.dev = l3_dev,
6594 	};
6595 	u16 vid = mlxsw_sp_port_vlan->vid;
6596 	struct mlxsw_sp_rif *rif;
6597 	struct mlxsw_sp_fid *fid;
6598 	int err;
6599 
6600 	mlxsw_sp_rif_subport_params_init(&params, mlxsw_sp_port_vlan);
6601 	rif = mlxsw_sp_rif_subport_get(mlxsw_sp, &params, extack);
6602 	if (IS_ERR(rif))
6603 		return PTR_ERR(rif);
6604 
6605 	/* FID was already created, just take a reference */
6606 	fid = rif->ops->fid_get(rif, extack);
6607 	err = mlxsw_sp_fid_port_vid_map(fid, mlxsw_sp_port, vid);
6608 	if (err)
6609 		goto err_fid_port_vid_map;
6610 
6611 	err = mlxsw_sp_port_vid_learning_set(mlxsw_sp_port, vid, false);
6612 	if (err)
6613 		goto err_port_vid_learning_set;
6614 
6615 	err = mlxsw_sp_port_vid_stp_set(mlxsw_sp_port, vid,
6616 					BR_STATE_FORWARDING);
6617 	if (err)
6618 		goto err_port_vid_stp_set;
6619 
6620 	mlxsw_sp_port_vlan->fid = fid;
6621 
6622 	return 0;
6623 
6624 err_port_vid_stp_set:
6625 	mlxsw_sp_port_vid_learning_set(mlxsw_sp_port, vid, true);
6626 err_port_vid_learning_set:
6627 	mlxsw_sp_fid_port_vid_unmap(fid, mlxsw_sp_port, vid);
6628 err_fid_port_vid_map:
6629 	mlxsw_sp_fid_put(fid);
6630 	mlxsw_sp_rif_subport_put(rif);
6631 	return err;
6632 }
6633 
6634 void
6635 mlxsw_sp_port_vlan_router_leave(struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan)
6636 {
6637 	struct mlxsw_sp_port *mlxsw_sp_port = mlxsw_sp_port_vlan->mlxsw_sp_port;
6638 	struct mlxsw_sp_fid *fid = mlxsw_sp_port_vlan->fid;
6639 	struct mlxsw_sp_rif *rif = mlxsw_sp_fid_rif(fid);
6640 	u16 vid = mlxsw_sp_port_vlan->vid;
6641 
6642 	if (WARN_ON(mlxsw_sp_fid_type(fid) != MLXSW_SP_FID_TYPE_RFID))
6643 		return;
6644 
6645 	mlxsw_sp_port_vlan->fid = NULL;
6646 	mlxsw_sp_port_vid_stp_set(mlxsw_sp_port, vid, BR_STATE_BLOCKING);
6647 	mlxsw_sp_port_vid_learning_set(mlxsw_sp_port, vid, true);
6648 	mlxsw_sp_fid_port_vid_unmap(fid, mlxsw_sp_port, vid);
6649 	mlxsw_sp_fid_put(fid);
6650 	mlxsw_sp_rif_subport_put(rif);
6651 }
6652 
6653 static int mlxsw_sp_inetaddr_port_vlan_event(struct net_device *l3_dev,
6654 					     struct net_device *port_dev,
6655 					     unsigned long event, u16 vid,
6656 					     struct netlink_ext_ack *extack)
6657 {
6658 	struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(port_dev);
6659 	struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan;
6660 
6661 	mlxsw_sp_port_vlan = mlxsw_sp_port_vlan_find_by_vid(mlxsw_sp_port, vid);
6662 	if (WARN_ON(!mlxsw_sp_port_vlan))
6663 		return -EINVAL;
6664 
6665 	switch (event) {
6666 	case NETDEV_UP:
6667 		return mlxsw_sp_port_vlan_router_join(mlxsw_sp_port_vlan,
6668 						      l3_dev, extack);
6669 	case NETDEV_DOWN:
6670 		mlxsw_sp_port_vlan_router_leave(mlxsw_sp_port_vlan);
6671 		break;
6672 	}
6673 
6674 	return 0;
6675 }
6676 
6677 static int mlxsw_sp_inetaddr_port_event(struct net_device *port_dev,
6678 					unsigned long event,
6679 					struct netlink_ext_ack *extack)
6680 {
6681 	if (netif_is_bridge_port(port_dev) ||
6682 	    netif_is_lag_port(port_dev) ||
6683 	    netif_is_ovs_port(port_dev))
6684 		return 0;
6685 
6686 	return mlxsw_sp_inetaddr_port_vlan_event(port_dev, port_dev, event,
6687 						 MLXSW_SP_DEFAULT_VID, extack);
6688 }
6689 
6690 static int __mlxsw_sp_inetaddr_lag_event(struct net_device *l3_dev,
6691 					 struct net_device *lag_dev,
6692 					 unsigned long event, u16 vid,
6693 					 struct netlink_ext_ack *extack)
6694 {
6695 	struct net_device *port_dev;
6696 	struct list_head *iter;
6697 	int err;
6698 
6699 	netdev_for_each_lower_dev(lag_dev, port_dev, iter) {
6700 		if (mlxsw_sp_port_dev_check(port_dev)) {
6701 			err = mlxsw_sp_inetaddr_port_vlan_event(l3_dev,
6702 								port_dev,
6703 								event, vid,
6704 								extack);
6705 			if (err)
6706 				return err;
6707 		}
6708 	}
6709 
6710 	return 0;
6711 }
6712 
6713 static int mlxsw_sp_inetaddr_lag_event(struct net_device *lag_dev,
6714 				       unsigned long event,
6715 				       struct netlink_ext_ack *extack)
6716 {
6717 	if (netif_is_bridge_port(lag_dev))
6718 		return 0;
6719 
6720 	return __mlxsw_sp_inetaddr_lag_event(lag_dev, lag_dev, event,
6721 					     MLXSW_SP_DEFAULT_VID, extack);
6722 }
6723 
6724 static int mlxsw_sp_inetaddr_bridge_event(struct mlxsw_sp *mlxsw_sp,
6725 					  struct net_device *l3_dev,
6726 					  unsigned long event,
6727 					  struct netlink_ext_ack *extack)
6728 {
6729 	struct mlxsw_sp_rif_params params = {
6730 		.dev = l3_dev,
6731 	};
6732 	struct mlxsw_sp_rif *rif;
6733 
6734 	switch (event) {
6735 	case NETDEV_UP:
6736 		rif = mlxsw_sp_rif_create(mlxsw_sp, &params, extack);
6737 		if (IS_ERR(rif))
6738 			return PTR_ERR(rif);
6739 		break;
6740 	case NETDEV_DOWN:
6741 		rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, l3_dev);
6742 		mlxsw_sp_rif_destroy(rif);
6743 		break;
6744 	}
6745 
6746 	return 0;
6747 }
6748 
6749 static int mlxsw_sp_inetaddr_vlan_event(struct mlxsw_sp *mlxsw_sp,
6750 					struct net_device *vlan_dev,
6751 					unsigned long event,
6752 					struct netlink_ext_ack *extack)
6753 {
6754 	struct net_device *real_dev = vlan_dev_real_dev(vlan_dev);
6755 	u16 vid = vlan_dev_vlan_id(vlan_dev);
6756 
6757 	if (netif_is_bridge_port(vlan_dev))
6758 		return 0;
6759 
6760 	if (mlxsw_sp_port_dev_check(real_dev))
6761 		return mlxsw_sp_inetaddr_port_vlan_event(vlan_dev, real_dev,
6762 							 event, vid, extack);
6763 	else if (netif_is_lag_master(real_dev))
6764 		return __mlxsw_sp_inetaddr_lag_event(vlan_dev, real_dev, event,
6765 						     vid, extack);
6766 	else if (netif_is_bridge_master(real_dev) && br_vlan_enabled(real_dev))
6767 		return mlxsw_sp_inetaddr_bridge_event(mlxsw_sp, vlan_dev, event,
6768 						      extack);
6769 
6770 	return 0;
6771 }
6772 
6773 static bool mlxsw_sp_rif_macvlan_is_vrrp4(const u8 *mac)
6774 {
6775 	u8 vrrp4[ETH_ALEN] = { 0x00, 0x00, 0x5e, 0x00, 0x01, 0x00 };
6776 	u8 mask[ETH_ALEN] = { 0xff, 0xff, 0xff, 0xff, 0xff, 0x00 };
6777 
6778 	return ether_addr_equal_masked(mac, vrrp4, mask);
6779 }
6780 
6781 static bool mlxsw_sp_rif_macvlan_is_vrrp6(const u8 *mac)
6782 {
6783 	u8 vrrp6[ETH_ALEN] = { 0x00, 0x00, 0x5e, 0x00, 0x02, 0x00 };
6784 	u8 mask[ETH_ALEN] = { 0xff, 0xff, 0xff, 0xff, 0xff, 0x00 };
6785 
6786 	return ether_addr_equal_masked(mac, vrrp6, mask);
6787 }
6788 
6789 static int mlxsw_sp_rif_vrrp_op(struct mlxsw_sp *mlxsw_sp, u16 rif_index,
6790 				const u8 *mac, bool adding)
6791 {
6792 	char ritr_pl[MLXSW_REG_RITR_LEN];
6793 	u8 vrrp_id = adding ? mac[5] : 0;
6794 	int err;
6795 
6796 	if (!mlxsw_sp_rif_macvlan_is_vrrp4(mac) &&
6797 	    !mlxsw_sp_rif_macvlan_is_vrrp6(mac))
6798 		return 0;
6799 
6800 	mlxsw_reg_ritr_rif_pack(ritr_pl, rif_index);
6801 	err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
6802 	if (err)
6803 		return err;
6804 
6805 	if (mlxsw_sp_rif_macvlan_is_vrrp4(mac))
6806 		mlxsw_reg_ritr_if_vrrp_id_ipv4_set(ritr_pl, vrrp_id);
6807 	else
6808 		mlxsw_reg_ritr_if_vrrp_id_ipv6_set(ritr_pl, vrrp_id);
6809 
6810 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
6811 }
6812 
6813 static int mlxsw_sp_rif_macvlan_add(struct mlxsw_sp *mlxsw_sp,
6814 				    const struct net_device *macvlan_dev,
6815 				    struct netlink_ext_ack *extack)
6816 {
6817 	struct macvlan_dev *vlan = netdev_priv(macvlan_dev);
6818 	struct mlxsw_sp_rif *rif;
6819 	int err;
6820 
6821 	rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, vlan->lowerdev);
6822 	if (!rif) {
6823 		NL_SET_ERR_MSG_MOD(extack, "macvlan is only supported on top of router interfaces");
6824 		return -EOPNOTSUPP;
6825 	}
6826 
6827 	err = mlxsw_sp_rif_fdb_op(mlxsw_sp, macvlan_dev->dev_addr,
6828 				  mlxsw_sp_fid_index(rif->fid), true);
6829 	if (err)
6830 		return err;
6831 
6832 	err = mlxsw_sp_rif_vrrp_op(mlxsw_sp, rif->rif_index,
6833 				   macvlan_dev->dev_addr, true);
6834 	if (err)
6835 		goto err_rif_vrrp_add;
6836 
6837 	/* Make sure the bridge driver does not have this MAC pointing at
6838 	 * some other port.
6839 	 */
6840 	if (rif->ops->fdb_del)
6841 		rif->ops->fdb_del(rif, macvlan_dev->dev_addr);
6842 
6843 	return 0;
6844 
6845 err_rif_vrrp_add:
6846 	mlxsw_sp_rif_fdb_op(mlxsw_sp, macvlan_dev->dev_addr,
6847 			    mlxsw_sp_fid_index(rif->fid), false);
6848 	return err;
6849 }
6850 
6851 void mlxsw_sp_rif_macvlan_del(struct mlxsw_sp *mlxsw_sp,
6852 			      const struct net_device *macvlan_dev)
6853 {
6854 	struct macvlan_dev *vlan = netdev_priv(macvlan_dev);
6855 	struct mlxsw_sp_rif *rif;
6856 
6857 	rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, vlan->lowerdev);
6858 	/* If we do not have a RIF, then we already took care of
6859 	 * removing the macvlan's MAC during RIF deletion.
6860 	 */
6861 	if (!rif)
6862 		return;
6863 	mlxsw_sp_rif_vrrp_op(mlxsw_sp, rif->rif_index, macvlan_dev->dev_addr,
6864 			     false);
6865 	mlxsw_sp_rif_fdb_op(mlxsw_sp, macvlan_dev->dev_addr,
6866 			    mlxsw_sp_fid_index(rif->fid), false);
6867 }
6868 
6869 static int mlxsw_sp_inetaddr_macvlan_event(struct mlxsw_sp *mlxsw_sp,
6870 					   struct net_device *macvlan_dev,
6871 					   unsigned long event,
6872 					   struct netlink_ext_ack *extack)
6873 {
6874 	switch (event) {
6875 	case NETDEV_UP:
6876 		return mlxsw_sp_rif_macvlan_add(mlxsw_sp, macvlan_dev, extack);
6877 	case NETDEV_DOWN:
6878 		mlxsw_sp_rif_macvlan_del(mlxsw_sp, macvlan_dev);
6879 		break;
6880 	}
6881 
6882 	return 0;
6883 }
6884 
6885 static int mlxsw_sp_router_port_check_rif_addr(struct mlxsw_sp *mlxsw_sp,
6886 					       struct net_device *dev,
6887 					       const unsigned char *dev_addr,
6888 					       struct netlink_ext_ack *extack)
6889 {
6890 	struct mlxsw_sp_rif *rif;
6891 	int i;
6892 
6893 	/* A RIF is not created for macvlan netdevs. Their MAC is used to
6894 	 * populate the FDB
6895 	 */
6896 	if (netif_is_macvlan(dev) || netif_is_l3_master(dev))
6897 		return 0;
6898 
6899 	for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_RIFS); i++) {
6900 		rif = mlxsw_sp->router->rifs[i];
6901 		if (rif && rif->ops &&
6902 		    rif->ops->type == MLXSW_SP_RIF_TYPE_IPIP_LB)
6903 			continue;
6904 		if (rif && rif->dev && rif->dev != dev &&
6905 		    !ether_addr_equal_masked(rif->dev->dev_addr, dev_addr,
6906 					     mlxsw_sp->mac_mask)) {
6907 			NL_SET_ERR_MSG_MOD(extack, "All router interface MAC addresses must have the same prefix");
6908 			return -EINVAL;
6909 		}
6910 	}
6911 
6912 	return 0;
6913 }
6914 
6915 static int __mlxsw_sp_inetaddr_event(struct mlxsw_sp *mlxsw_sp,
6916 				     struct net_device *dev,
6917 				     unsigned long event,
6918 				     struct netlink_ext_ack *extack)
6919 {
6920 	if (mlxsw_sp_port_dev_check(dev))
6921 		return mlxsw_sp_inetaddr_port_event(dev, event, extack);
6922 	else if (netif_is_lag_master(dev))
6923 		return mlxsw_sp_inetaddr_lag_event(dev, event, extack);
6924 	else if (netif_is_bridge_master(dev))
6925 		return mlxsw_sp_inetaddr_bridge_event(mlxsw_sp, dev, event,
6926 						      extack);
6927 	else if (is_vlan_dev(dev))
6928 		return mlxsw_sp_inetaddr_vlan_event(mlxsw_sp, dev, event,
6929 						    extack);
6930 	else if (netif_is_macvlan(dev))
6931 		return mlxsw_sp_inetaddr_macvlan_event(mlxsw_sp, dev, event,
6932 						       extack);
6933 	else
6934 		return 0;
6935 }
6936 
6937 static int mlxsw_sp_inetaddr_event(struct notifier_block *nb,
6938 				   unsigned long event, void *ptr)
6939 {
6940 	struct in_ifaddr *ifa = (struct in_ifaddr *) ptr;
6941 	struct net_device *dev = ifa->ifa_dev->dev;
6942 	struct mlxsw_sp_router *router;
6943 	struct mlxsw_sp_rif *rif;
6944 	int err = 0;
6945 
6946 	/* NETDEV_UP event is handled by mlxsw_sp_inetaddr_valid_event */
6947 	if (event == NETDEV_UP)
6948 		goto out;
6949 
6950 	router = container_of(nb, struct mlxsw_sp_router, inetaddr_nb);
6951 	rif = mlxsw_sp_rif_find_by_dev(router->mlxsw_sp, dev);
6952 	if (!mlxsw_sp_rif_should_config(rif, dev, event))
6953 		goto out;
6954 
6955 	err = __mlxsw_sp_inetaddr_event(router->mlxsw_sp, dev, event, NULL);
6956 out:
6957 	return notifier_from_errno(err);
6958 }
6959 
6960 int mlxsw_sp_inetaddr_valid_event(struct notifier_block *unused,
6961 				  unsigned long event, void *ptr)
6962 {
6963 	struct in_validator_info *ivi = (struct in_validator_info *) ptr;
6964 	struct net_device *dev = ivi->ivi_dev->dev;
6965 	struct mlxsw_sp *mlxsw_sp;
6966 	struct mlxsw_sp_rif *rif;
6967 	int err = 0;
6968 
6969 	mlxsw_sp = mlxsw_sp_lower_get(dev);
6970 	if (!mlxsw_sp)
6971 		goto out;
6972 
6973 	rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev);
6974 	if (!mlxsw_sp_rif_should_config(rif, dev, event))
6975 		goto out;
6976 
6977 	err = mlxsw_sp_router_port_check_rif_addr(mlxsw_sp, dev, dev->dev_addr,
6978 						  ivi->extack);
6979 	if (err)
6980 		goto out;
6981 
6982 	err = __mlxsw_sp_inetaddr_event(mlxsw_sp, dev, event, ivi->extack);
6983 out:
6984 	return notifier_from_errno(err);
6985 }
6986 
6987 struct mlxsw_sp_inet6addr_event_work {
6988 	struct work_struct work;
6989 	struct mlxsw_sp *mlxsw_sp;
6990 	struct net_device *dev;
6991 	unsigned long event;
6992 };
6993 
6994 static void mlxsw_sp_inet6addr_event_work(struct work_struct *work)
6995 {
6996 	struct mlxsw_sp_inet6addr_event_work *inet6addr_work =
6997 		container_of(work, struct mlxsw_sp_inet6addr_event_work, work);
6998 	struct mlxsw_sp *mlxsw_sp = inet6addr_work->mlxsw_sp;
6999 	struct net_device *dev = inet6addr_work->dev;
7000 	unsigned long event = inet6addr_work->event;
7001 	struct mlxsw_sp_rif *rif;
7002 
7003 	rtnl_lock();
7004 
7005 	rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev);
7006 	if (!mlxsw_sp_rif_should_config(rif, dev, event))
7007 		goto out;
7008 
7009 	__mlxsw_sp_inetaddr_event(mlxsw_sp, dev, event, NULL);
7010 out:
7011 	rtnl_unlock();
7012 	dev_put(dev);
7013 	kfree(inet6addr_work);
7014 }
7015 
7016 /* Called with rcu_read_lock() */
7017 static int mlxsw_sp_inet6addr_event(struct notifier_block *nb,
7018 				    unsigned long event, void *ptr)
7019 {
7020 	struct inet6_ifaddr *if6 = (struct inet6_ifaddr *) ptr;
7021 	struct mlxsw_sp_inet6addr_event_work *inet6addr_work;
7022 	struct net_device *dev = if6->idev->dev;
7023 	struct mlxsw_sp_router *router;
7024 
7025 	/* NETDEV_UP event is handled by mlxsw_sp_inet6addr_valid_event */
7026 	if (event == NETDEV_UP)
7027 		return NOTIFY_DONE;
7028 
7029 	inet6addr_work = kzalloc(sizeof(*inet6addr_work), GFP_ATOMIC);
7030 	if (!inet6addr_work)
7031 		return NOTIFY_BAD;
7032 
7033 	router = container_of(nb, struct mlxsw_sp_router, inet6addr_nb);
7034 	INIT_WORK(&inet6addr_work->work, mlxsw_sp_inet6addr_event_work);
7035 	inet6addr_work->mlxsw_sp = router->mlxsw_sp;
7036 	inet6addr_work->dev = dev;
7037 	inet6addr_work->event = event;
7038 	dev_hold(dev);
7039 	mlxsw_core_schedule_work(&inet6addr_work->work);
7040 
7041 	return NOTIFY_DONE;
7042 }
7043 
7044 int mlxsw_sp_inet6addr_valid_event(struct notifier_block *unused,
7045 				   unsigned long event, void *ptr)
7046 {
7047 	struct in6_validator_info *i6vi = (struct in6_validator_info *) ptr;
7048 	struct net_device *dev = i6vi->i6vi_dev->dev;
7049 	struct mlxsw_sp *mlxsw_sp;
7050 	struct mlxsw_sp_rif *rif;
7051 	int err = 0;
7052 
7053 	mlxsw_sp = mlxsw_sp_lower_get(dev);
7054 	if (!mlxsw_sp)
7055 		goto out;
7056 
7057 	rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev);
7058 	if (!mlxsw_sp_rif_should_config(rif, dev, event))
7059 		goto out;
7060 
7061 	err = mlxsw_sp_router_port_check_rif_addr(mlxsw_sp, dev, dev->dev_addr,
7062 						  i6vi->extack);
7063 	if (err)
7064 		goto out;
7065 
7066 	err = __mlxsw_sp_inetaddr_event(mlxsw_sp, dev, event, i6vi->extack);
7067 out:
7068 	return notifier_from_errno(err);
7069 }
7070 
7071 static int mlxsw_sp_rif_edit(struct mlxsw_sp *mlxsw_sp, u16 rif_index,
7072 			     const char *mac, int mtu)
7073 {
7074 	char ritr_pl[MLXSW_REG_RITR_LEN];
7075 	int err;
7076 
7077 	mlxsw_reg_ritr_rif_pack(ritr_pl, rif_index);
7078 	err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
7079 	if (err)
7080 		return err;
7081 
7082 	mlxsw_reg_ritr_mtu_set(ritr_pl, mtu);
7083 	mlxsw_reg_ritr_if_mac_memcpy_to(ritr_pl, mac);
7084 	mlxsw_reg_ritr_op_set(ritr_pl, MLXSW_REG_RITR_RIF_CREATE);
7085 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
7086 }
7087 
7088 static int
7089 mlxsw_sp_router_port_change_event(struct mlxsw_sp *mlxsw_sp,
7090 				  struct mlxsw_sp_rif *rif)
7091 {
7092 	struct net_device *dev = rif->dev;
7093 	u16 fid_index;
7094 	int err;
7095 
7096 	fid_index = mlxsw_sp_fid_index(rif->fid);
7097 
7098 	err = mlxsw_sp_rif_fdb_op(mlxsw_sp, rif->addr, fid_index, false);
7099 	if (err)
7100 		return err;
7101 
7102 	err = mlxsw_sp_rif_edit(mlxsw_sp, rif->rif_index, dev->dev_addr,
7103 				dev->mtu);
7104 	if (err)
7105 		goto err_rif_edit;
7106 
7107 	err = mlxsw_sp_rif_fdb_op(mlxsw_sp, dev->dev_addr, fid_index, true);
7108 	if (err)
7109 		goto err_rif_fdb_op;
7110 
7111 	if (rif->mtu != dev->mtu) {
7112 		struct mlxsw_sp_vr *vr;
7113 		int i;
7114 
7115 		/* The RIF is relevant only to its mr_table instance, as unlike
7116 		 * unicast routing, in multicast routing a RIF cannot be shared
7117 		 * between several multicast routing tables.
7118 		 */
7119 		vr = &mlxsw_sp->router->vrs[rif->vr_id];
7120 		for (i = 0; i < MLXSW_SP_L3_PROTO_MAX; i++)
7121 			mlxsw_sp_mr_rif_mtu_update(vr->mr_table[i],
7122 						   rif, dev->mtu);
7123 	}
7124 
7125 	ether_addr_copy(rif->addr, dev->dev_addr);
7126 	rif->mtu = dev->mtu;
7127 
7128 	netdev_dbg(dev, "Updated RIF=%d\n", rif->rif_index);
7129 
7130 	return 0;
7131 
7132 err_rif_fdb_op:
7133 	mlxsw_sp_rif_edit(mlxsw_sp, rif->rif_index, rif->addr, rif->mtu);
7134 err_rif_edit:
7135 	mlxsw_sp_rif_fdb_op(mlxsw_sp, rif->addr, fid_index, true);
7136 	return err;
7137 }
7138 
7139 static int mlxsw_sp_router_port_pre_changeaddr_event(struct mlxsw_sp_rif *rif,
7140 			    struct netdev_notifier_pre_changeaddr_info *info)
7141 {
7142 	struct netlink_ext_ack *extack;
7143 
7144 	extack = netdev_notifier_info_to_extack(&info->info);
7145 	return mlxsw_sp_router_port_check_rif_addr(rif->mlxsw_sp, rif->dev,
7146 						   info->dev_addr, extack);
7147 }
7148 
7149 int mlxsw_sp_netdevice_router_port_event(struct net_device *dev,
7150 					 unsigned long event, void *ptr)
7151 {
7152 	struct mlxsw_sp *mlxsw_sp;
7153 	struct mlxsw_sp_rif *rif;
7154 
7155 	mlxsw_sp = mlxsw_sp_lower_get(dev);
7156 	if (!mlxsw_sp)
7157 		return 0;
7158 
7159 	rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev);
7160 	if (!rif)
7161 		return 0;
7162 
7163 	switch (event) {
7164 	case NETDEV_CHANGEMTU: /* fall through */
7165 	case NETDEV_CHANGEADDR:
7166 		return mlxsw_sp_router_port_change_event(mlxsw_sp, rif);
7167 	case NETDEV_PRE_CHANGEADDR:
7168 		return mlxsw_sp_router_port_pre_changeaddr_event(rif, ptr);
7169 	}
7170 
7171 	return 0;
7172 }
7173 
7174 static int mlxsw_sp_port_vrf_join(struct mlxsw_sp *mlxsw_sp,
7175 				  struct net_device *l3_dev,
7176 				  struct netlink_ext_ack *extack)
7177 {
7178 	struct mlxsw_sp_rif *rif;
7179 
7180 	/* If netdev is already associated with a RIF, then we need to
7181 	 * destroy it and create a new one with the new virtual router ID.
7182 	 */
7183 	rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, l3_dev);
7184 	if (rif)
7185 		__mlxsw_sp_inetaddr_event(mlxsw_sp, l3_dev, NETDEV_DOWN,
7186 					  extack);
7187 
7188 	return __mlxsw_sp_inetaddr_event(mlxsw_sp, l3_dev, NETDEV_UP, extack);
7189 }
7190 
7191 static void mlxsw_sp_port_vrf_leave(struct mlxsw_sp *mlxsw_sp,
7192 				    struct net_device *l3_dev)
7193 {
7194 	struct mlxsw_sp_rif *rif;
7195 
7196 	rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, l3_dev);
7197 	if (!rif)
7198 		return;
7199 	__mlxsw_sp_inetaddr_event(mlxsw_sp, l3_dev, NETDEV_DOWN, NULL);
7200 }
7201 
7202 int mlxsw_sp_netdevice_vrf_event(struct net_device *l3_dev, unsigned long event,
7203 				 struct netdev_notifier_changeupper_info *info)
7204 {
7205 	struct mlxsw_sp *mlxsw_sp = mlxsw_sp_lower_get(l3_dev);
7206 	int err = 0;
7207 
7208 	/* We do not create a RIF for a macvlan, but only use it to
7209 	 * direct more MAC addresses to the router.
7210 	 */
7211 	if (!mlxsw_sp || netif_is_macvlan(l3_dev))
7212 		return 0;
7213 
7214 	switch (event) {
7215 	case NETDEV_PRECHANGEUPPER:
7216 		return 0;
7217 	case NETDEV_CHANGEUPPER:
7218 		if (info->linking) {
7219 			struct netlink_ext_ack *extack;
7220 
7221 			extack = netdev_notifier_info_to_extack(&info->info);
7222 			err = mlxsw_sp_port_vrf_join(mlxsw_sp, l3_dev, extack);
7223 		} else {
7224 			mlxsw_sp_port_vrf_leave(mlxsw_sp, l3_dev);
7225 		}
7226 		break;
7227 	}
7228 
7229 	return err;
7230 }
7231 
7232 static int __mlxsw_sp_rif_macvlan_flush(struct net_device *dev, void *data)
7233 {
7234 	struct mlxsw_sp_rif *rif = data;
7235 
7236 	if (!netif_is_macvlan(dev))
7237 		return 0;
7238 
7239 	return mlxsw_sp_rif_fdb_op(rif->mlxsw_sp, dev->dev_addr,
7240 				   mlxsw_sp_fid_index(rif->fid), false);
7241 }
7242 
7243 static int mlxsw_sp_rif_macvlan_flush(struct mlxsw_sp_rif *rif)
7244 {
7245 	if (!netif_is_macvlan_port(rif->dev))
7246 		return 0;
7247 
7248 	netdev_warn(rif->dev, "Router interface is deleted. Upper macvlans will not work\n");
7249 	return netdev_walk_all_upper_dev_rcu(rif->dev,
7250 					     __mlxsw_sp_rif_macvlan_flush, rif);
7251 }
7252 
7253 static void mlxsw_sp_rif_subport_setup(struct mlxsw_sp_rif *rif,
7254 				       const struct mlxsw_sp_rif_params *params)
7255 {
7256 	struct mlxsw_sp_rif_subport *rif_subport;
7257 
7258 	rif_subport = mlxsw_sp_rif_subport_rif(rif);
7259 	refcount_set(&rif_subport->ref_count, 1);
7260 	rif_subport->vid = params->vid;
7261 	rif_subport->lag = params->lag;
7262 	if (params->lag)
7263 		rif_subport->lag_id = params->lag_id;
7264 	else
7265 		rif_subport->system_port = params->system_port;
7266 }
7267 
7268 static int mlxsw_sp_rif_subport_op(struct mlxsw_sp_rif *rif, bool enable)
7269 {
7270 	struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
7271 	struct mlxsw_sp_rif_subport *rif_subport;
7272 	char ritr_pl[MLXSW_REG_RITR_LEN];
7273 
7274 	rif_subport = mlxsw_sp_rif_subport_rif(rif);
7275 	mlxsw_reg_ritr_pack(ritr_pl, enable, MLXSW_REG_RITR_SP_IF,
7276 			    rif->rif_index, rif->vr_id, rif->dev->mtu);
7277 	mlxsw_reg_ritr_mac_pack(ritr_pl, rif->dev->dev_addr);
7278 	mlxsw_reg_ritr_sp_if_pack(ritr_pl, rif_subport->lag,
7279 				  rif_subport->lag ? rif_subport->lag_id :
7280 						     rif_subport->system_port,
7281 				  rif_subport->vid);
7282 
7283 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
7284 }
7285 
7286 static int mlxsw_sp_rif_subport_configure(struct mlxsw_sp_rif *rif)
7287 {
7288 	int err;
7289 
7290 	err = mlxsw_sp_rif_subport_op(rif, true);
7291 	if (err)
7292 		return err;
7293 
7294 	err = mlxsw_sp_rif_fdb_op(rif->mlxsw_sp, rif->dev->dev_addr,
7295 				  mlxsw_sp_fid_index(rif->fid), true);
7296 	if (err)
7297 		goto err_rif_fdb_op;
7298 
7299 	mlxsw_sp_fid_rif_set(rif->fid, rif);
7300 	return 0;
7301 
7302 err_rif_fdb_op:
7303 	mlxsw_sp_rif_subport_op(rif, false);
7304 	return err;
7305 }
7306 
7307 static void mlxsw_sp_rif_subport_deconfigure(struct mlxsw_sp_rif *rif)
7308 {
7309 	struct mlxsw_sp_fid *fid = rif->fid;
7310 
7311 	mlxsw_sp_fid_rif_set(fid, NULL);
7312 	mlxsw_sp_rif_fdb_op(rif->mlxsw_sp, rif->dev->dev_addr,
7313 			    mlxsw_sp_fid_index(fid), false);
7314 	mlxsw_sp_rif_macvlan_flush(rif);
7315 	mlxsw_sp_rif_subport_op(rif, false);
7316 }
7317 
7318 static struct mlxsw_sp_fid *
7319 mlxsw_sp_rif_subport_fid_get(struct mlxsw_sp_rif *rif,
7320 			     struct netlink_ext_ack *extack)
7321 {
7322 	return mlxsw_sp_fid_rfid_get(rif->mlxsw_sp, rif->rif_index);
7323 }
7324 
7325 static const struct mlxsw_sp_rif_ops mlxsw_sp_rif_subport_ops = {
7326 	.type			= MLXSW_SP_RIF_TYPE_SUBPORT,
7327 	.rif_size		= sizeof(struct mlxsw_sp_rif_subport),
7328 	.setup			= mlxsw_sp_rif_subport_setup,
7329 	.configure		= mlxsw_sp_rif_subport_configure,
7330 	.deconfigure		= mlxsw_sp_rif_subport_deconfigure,
7331 	.fid_get		= mlxsw_sp_rif_subport_fid_get,
7332 };
7333 
7334 static int mlxsw_sp_rif_vlan_fid_op(struct mlxsw_sp_rif *rif,
7335 				    enum mlxsw_reg_ritr_if_type type,
7336 				    u16 vid_fid, bool enable)
7337 {
7338 	struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
7339 	char ritr_pl[MLXSW_REG_RITR_LEN];
7340 
7341 	mlxsw_reg_ritr_pack(ritr_pl, enable, type, rif->rif_index, rif->vr_id,
7342 			    rif->dev->mtu);
7343 	mlxsw_reg_ritr_mac_pack(ritr_pl, rif->dev->dev_addr);
7344 	mlxsw_reg_ritr_fid_set(ritr_pl, type, vid_fid);
7345 
7346 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
7347 }
7348 
7349 u8 mlxsw_sp_router_port(const struct mlxsw_sp *mlxsw_sp)
7350 {
7351 	return mlxsw_core_max_ports(mlxsw_sp->core) + 1;
7352 }
7353 
7354 static int mlxsw_sp_rif_vlan_configure(struct mlxsw_sp_rif *rif)
7355 {
7356 	struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
7357 	u16 vid = mlxsw_sp_fid_8021q_vid(rif->fid);
7358 	int err;
7359 
7360 	err = mlxsw_sp_rif_vlan_fid_op(rif, MLXSW_REG_RITR_VLAN_IF, vid, true);
7361 	if (err)
7362 		return err;
7363 
7364 	err = mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_MC,
7365 				     mlxsw_sp_router_port(mlxsw_sp), true);
7366 	if (err)
7367 		goto err_fid_mc_flood_set;
7368 
7369 	err = mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_BC,
7370 				     mlxsw_sp_router_port(mlxsw_sp), true);
7371 	if (err)
7372 		goto err_fid_bc_flood_set;
7373 
7374 	err = mlxsw_sp_rif_fdb_op(rif->mlxsw_sp, rif->dev->dev_addr,
7375 				  mlxsw_sp_fid_index(rif->fid), true);
7376 	if (err)
7377 		goto err_rif_fdb_op;
7378 
7379 	mlxsw_sp_fid_rif_set(rif->fid, rif);
7380 	return 0;
7381 
7382 err_rif_fdb_op:
7383 	mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_BC,
7384 			       mlxsw_sp_router_port(mlxsw_sp), false);
7385 err_fid_bc_flood_set:
7386 	mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_MC,
7387 			       mlxsw_sp_router_port(mlxsw_sp), false);
7388 err_fid_mc_flood_set:
7389 	mlxsw_sp_rif_vlan_fid_op(rif, MLXSW_REG_RITR_VLAN_IF, vid, false);
7390 	return err;
7391 }
7392 
7393 static void mlxsw_sp_rif_vlan_deconfigure(struct mlxsw_sp_rif *rif)
7394 {
7395 	u16 vid = mlxsw_sp_fid_8021q_vid(rif->fid);
7396 	struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
7397 	struct mlxsw_sp_fid *fid = rif->fid;
7398 
7399 	mlxsw_sp_fid_rif_set(fid, NULL);
7400 	mlxsw_sp_rif_fdb_op(rif->mlxsw_sp, rif->dev->dev_addr,
7401 			    mlxsw_sp_fid_index(fid), false);
7402 	mlxsw_sp_rif_macvlan_flush(rif);
7403 	mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_BC,
7404 			       mlxsw_sp_router_port(mlxsw_sp), false);
7405 	mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_MC,
7406 			       mlxsw_sp_router_port(mlxsw_sp), false);
7407 	mlxsw_sp_rif_vlan_fid_op(rif, MLXSW_REG_RITR_VLAN_IF, vid, false);
7408 }
7409 
7410 static struct mlxsw_sp_fid *
7411 mlxsw_sp_rif_vlan_fid_get(struct mlxsw_sp_rif *rif,
7412 			  struct netlink_ext_ack *extack)
7413 {
7414 	struct net_device *br_dev = rif->dev;
7415 	u16 vid;
7416 	int err;
7417 
7418 	if (is_vlan_dev(rif->dev)) {
7419 		vid = vlan_dev_vlan_id(rif->dev);
7420 		br_dev = vlan_dev_real_dev(rif->dev);
7421 		if (WARN_ON(!netif_is_bridge_master(br_dev)))
7422 			return ERR_PTR(-EINVAL);
7423 	} else {
7424 		err = br_vlan_get_pvid(rif->dev, &vid);
7425 		if (err < 0 || !vid) {
7426 			NL_SET_ERR_MSG_MOD(extack, "Couldn't determine bridge PVID");
7427 			return ERR_PTR(-EINVAL);
7428 		}
7429 	}
7430 
7431 	return mlxsw_sp_bridge_fid_get(rif->mlxsw_sp, br_dev, vid, extack);
7432 }
7433 
7434 static void mlxsw_sp_rif_vlan_fdb_del(struct mlxsw_sp_rif *rif, const char *mac)
7435 {
7436 	u16 vid = mlxsw_sp_fid_8021q_vid(rif->fid);
7437 	struct switchdev_notifier_fdb_info info;
7438 	struct net_device *br_dev;
7439 	struct net_device *dev;
7440 
7441 	br_dev = is_vlan_dev(rif->dev) ? vlan_dev_real_dev(rif->dev) : rif->dev;
7442 	dev = br_fdb_find_port(br_dev, mac, vid);
7443 	if (!dev)
7444 		return;
7445 
7446 	info.addr = mac;
7447 	info.vid = vid;
7448 	call_switchdev_notifiers(SWITCHDEV_FDB_DEL_TO_BRIDGE, dev, &info.info,
7449 				 NULL);
7450 }
7451 
7452 static const struct mlxsw_sp_rif_ops mlxsw_sp_rif_vlan_ops = {
7453 	.type			= MLXSW_SP_RIF_TYPE_VLAN,
7454 	.rif_size		= sizeof(struct mlxsw_sp_rif),
7455 	.configure		= mlxsw_sp_rif_vlan_configure,
7456 	.deconfigure		= mlxsw_sp_rif_vlan_deconfigure,
7457 	.fid_get		= mlxsw_sp_rif_vlan_fid_get,
7458 	.fdb_del		= mlxsw_sp_rif_vlan_fdb_del,
7459 };
7460 
7461 static int mlxsw_sp_rif_fid_configure(struct mlxsw_sp_rif *rif)
7462 {
7463 	struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
7464 	u16 fid_index = mlxsw_sp_fid_index(rif->fid);
7465 	int err;
7466 
7467 	err = mlxsw_sp_rif_vlan_fid_op(rif, MLXSW_REG_RITR_FID_IF, fid_index,
7468 				       true);
7469 	if (err)
7470 		return err;
7471 
7472 	err = mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_MC,
7473 				     mlxsw_sp_router_port(mlxsw_sp), true);
7474 	if (err)
7475 		goto err_fid_mc_flood_set;
7476 
7477 	err = mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_BC,
7478 				     mlxsw_sp_router_port(mlxsw_sp), true);
7479 	if (err)
7480 		goto err_fid_bc_flood_set;
7481 
7482 	err = mlxsw_sp_rif_fdb_op(rif->mlxsw_sp, rif->dev->dev_addr,
7483 				  mlxsw_sp_fid_index(rif->fid), true);
7484 	if (err)
7485 		goto err_rif_fdb_op;
7486 
7487 	mlxsw_sp_fid_rif_set(rif->fid, rif);
7488 	return 0;
7489 
7490 err_rif_fdb_op:
7491 	mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_BC,
7492 			       mlxsw_sp_router_port(mlxsw_sp), false);
7493 err_fid_bc_flood_set:
7494 	mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_MC,
7495 			       mlxsw_sp_router_port(mlxsw_sp), false);
7496 err_fid_mc_flood_set:
7497 	mlxsw_sp_rif_vlan_fid_op(rif, MLXSW_REG_RITR_FID_IF, fid_index, false);
7498 	return err;
7499 }
7500 
7501 static void mlxsw_sp_rif_fid_deconfigure(struct mlxsw_sp_rif *rif)
7502 {
7503 	u16 fid_index = mlxsw_sp_fid_index(rif->fid);
7504 	struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
7505 	struct mlxsw_sp_fid *fid = rif->fid;
7506 
7507 	mlxsw_sp_fid_rif_set(fid, NULL);
7508 	mlxsw_sp_rif_fdb_op(rif->mlxsw_sp, rif->dev->dev_addr,
7509 			    mlxsw_sp_fid_index(fid), false);
7510 	mlxsw_sp_rif_macvlan_flush(rif);
7511 	mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_BC,
7512 			       mlxsw_sp_router_port(mlxsw_sp), false);
7513 	mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_MC,
7514 			       mlxsw_sp_router_port(mlxsw_sp), false);
7515 	mlxsw_sp_rif_vlan_fid_op(rif, MLXSW_REG_RITR_FID_IF, fid_index, false);
7516 }
7517 
7518 static struct mlxsw_sp_fid *
7519 mlxsw_sp_rif_fid_fid_get(struct mlxsw_sp_rif *rif,
7520 			 struct netlink_ext_ack *extack)
7521 {
7522 	return mlxsw_sp_bridge_fid_get(rif->mlxsw_sp, rif->dev, 0, extack);
7523 }
7524 
7525 static void mlxsw_sp_rif_fid_fdb_del(struct mlxsw_sp_rif *rif, const char *mac)
7526 {
7527 	struct switchdev_notifier_fdb_info info;
7528 	struct net_device *dev;
7529 
7530 	dev = br_fdb_find_port(rif->dev, mac, 0);
7531 	if (!dev)
7532 		return;
7533 
7534 	info.addr = mac;
7535 	info.vid = 0;
7536 	call_switchdev_notifiers(SWITCHDEV_FDB_DEL_TO_BRIDGE, dev, &info.info,
7537 				 NULL);
7538 }
7539 
7540 static const struct mlxsw_sp_rif_ops mlxsw_sp_rif_fid_ops = {
7541 	.type			= MLXSW_SP_RIF_TYPE_FID,
7542 	.rif_size		= sizeof(struct mlxsw_sp_rif),
7543 	.configure		= mlxsw_sp_rif_fid_configure,
7544 	.deconfigure		= mlxsw_sp_rif_fid_deconfigure,
7545 	.fid_get		= mlxsw_sp_rif_fid_fid_get,
7546 	.fdb_del		= mlxsw_sp_rif_fid_fdb_del,
7547 };
7548 
7549 static const struct mlxsw_sp_rif_ops mlxsw_sp_rif_vlan_emu_ops = {
7550 	.type			= MLXSW_SP_RIF_TYPE_VLAN,
7551 	.rif_size		= sizeof(struct mlxsw_sp_rif),
7552 	.configure		= mlxsw_sp_rif_fid_configure,
7553 	.deconfigure		= mlxsw_sp_rif_fid_deconfigure,
7554 	.fid_get		= mlxsw_sp_rif_vlan_fid_get,
7555 	.fdb_del		= mlxsw_sp_rif_vlan_fdb_del,
7556 };
7557 
7558 static struct mlxsw_sp_rif_ipip_lb *
7559 mlxsw_sp_rif_ipip_lb_rif(struct mlxsw_sp_rif *rif)
7560 {
7561 	return container_of(rif, struct mlxsw_sp_rif_ipip_lb, common);
7562 }
7563 
7564 static void
7565 mlxsw_sp_rif_ipip_lb_setup(struct mlxsw_sp_rif *rif,
7566 			   const struct mlxsw_sp_rif_params *params)
7567 {
7568 	struct mlxsw_sp_rif_params_ipip_lb *params_lb;
7569 	struct mlxsw_sp_rif_ipip_lb *rif_lb;
7570 
7571 	params_lb = container_of(params, struct mlxsw_sp_rif_params_ipip_lb,
7572 				 common);
7573 	rif_lb = mlxsw_sp_rif_ipip_lb_rif(rif);
7574 	rif_lb->lb_config = params_lb->lb_config;
7575 }
7576 
7577 static int
7578 mlxsw_sp1_rif_ipip_lb_configure(struct mlxsw_sp_rif *rif)
7579 {
7580 	struct mlxsw_sp_rif_ipip_lb *lb_rif = mlxsw_sp_rif_ipip_lb_rif(rif);
7581 	u32 ul_tb_id = mlxsw_sp_ipip_dev_ul_tb_id(rif->dev);
7582 	struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
7583 	struct mlxsw_sp_vr *ul_vr;
7584 	int err;
7585 
7586 	ul_vr = mlxsw_sp_vr_get(mlxsw_sp, ul_tb_id, NULL);
7587 	if (IS_ERR(ul_vr))
7588 		return PTR_ERR(ul_vr);
7589 
7590 	err = mlxsw_sp_rif_ipip_lb_op(lb_rif, ul_vr->id, 0, true);
7591 	if (err)
7592 		goto err_loopback_op;
7593 
7594 	lb_rif->ul_vr_id = ul_vr->id;
7595 	lb_rif->ul_rif_id = 0;
7596 	++ul_vr->rif_count;
7597 	return 0;
7598 
7599 err_loopback_op:
7600 	mlxsw_sp_vr_put(mlxsw_sp, ul_vr);
7601 	return err;
7602 }
7603 
7604 static void mlxsw_sp1_rif_ipip_lb_deconfigure(struct mlxsw_sp_rif *rif)
7605 {
7606 	struct mlxsw_sp_rif_ipip_lb *lb_rif = mlxsw_sp_rif_ipip_lb_rif(rif);
7607 	struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
7608 	struct mlxsw_sp_vr *ul_vr;
7609 
7610 	ul_vr = &mlxsw_sp->router->vrs[lb_rif->ul_vr_id];
7611 	mlxsw_sp_rif_ipip_lb_op(lb_rif, ul_vr->id, 0, false);
7612 
7613 	--ul_vr->rif_count;
7614 	mlxsw_sp_vr_put(mlxsw_sp, ul_vr);
7615 }
7616 
7617 static const struct mlxsw_sp_rif_ops mlxsw_sp1_rif_ipip_lb_ops = {
7618 	.type			= MLXSW_SP_RIF_TYPE_IPIP_LB,
7619 	.rif_size		= sizeof(struct mlxsw_sp_rif_ipip_lb),
7620 	.setup                  = mlxsw_sp_rif_ipip_lb_setup,
7621 	.configure		= mlxsw_sp1_rif_ipip_lb_configure,
7622 	.deconfigure		= mlxsw_sp1_rif_ipip_lb_deconfigure,
7623 };
7624 
7625 const struct mlxsw_sp_rif_ops *mlxsw_sp1_rif_ops_arr[] = {
7626 	[MLXSW_SP_RIF_TYPE_SUBPORT]	= &mlxsw_sp_rif_subport_ops,
7627 	[MLXSW_SP_RIF_TYPE_VLAN]	= &mlxsw_sp_rif_vlan_emu_ops,
7628 	[MLXSW_SP_RIF_TYPE_FID]		= &mlxsw_sp_rif_fid_ops,
7629 	[MLXSW_SP_RIF_TYPE_IPIP_LB]	= &mlxsw_sp1_rif_ipip_lb_ops,
7630 };
7631 
7632 static int
7633 mlxsw_sp_rif_ipip_lb_ul_rif_op(struct mlxsw_sp_rif *ul_rif, bool enable)
7634 {
7635 	struct mlxsw_sp *mlxsw_sp = ul_rif->mlxsw_sp;
7636 	char ritr_pl[MLXSW_REG_RITR_LEN];
7637 
7638 	mlxsw_reg_ritr_pack(ritr_pl, enable, MLXSW_REG_RITR_LOOPBACK_IF,
7639 			    ul_rif->rif_index, ul_rif->vr_id, IP_MAX_MTU);
7640 	mlxsw_reg_ritr_loopback_protocol_set(ritr_pl,
7641 					     MLXSW_REG_RITR_LOOPBACK_GENERIC);
7642 
7643 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
7644 }
7645 
7646 static struct mlxsw_sp_rif *
7647 mlxsw_sp_ul_rif_create(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_vr *vr,
7648 		       struct netlink_ext_ack *extack)
7649 {
7650 	struct mlxsw_sp_rif *ul_rif;
7651 	u16 rif_index;
7652 	int err;
7653 
7654 	err = mlxsw_sp_rif_index_alloc(mlxsw_sp, &rif_index);
7655 	if (err) {
7656 		NL_SET_ERR_MSG_MOD(extack, "Exceeded number of supported router interfaces");
7657 		return ERR_PTR(err);
7658 	}
7659 
7660 	ul_rif = mlxsw_sp_rif_alloc(sizeof(*ul_rif), rif_index, vr->id, NULL);
7661 	if (!ul_rif)
7662 		return ERR_PTR(-ENOMEM);
7663 
7664 	mlxsw_sp->router->rifs[rif_index] = ul_rif;
7665 	ul_rif->mlxsw_sp = mlxsw_sp;
7666 	err = mlxsw_sp_rif_ipip_lb_ul_rif_op(ul_rif, true);
7667 	if (err)
7668 		goto ul_rif_op_err;
7669 
7670 	return ul_rif;
7671 
7672 ul_rif_op_err:
7673 	mlxsw_sp->router->rifs[rif_index] = NULL;
7674 	kfree(ul_rif);
7675 	return ERR_PTR(err);
7676 }
7677 
7678 static void mlxsw_sp_ul_rif_destroy(struct mlxsw_sp_rif *ul_rif)
7679 {
7680 	struct mlxsw_sp *mlxsw_sp = ul_rif->mlxsw_sp;
7681 
7682 	mlxsw_sp_rif_ipip_lb_ul_rif_op(ul_rif, false);
7683 	mlxsw_sp->router->rifs[ul_rif->rif_index] = NULL;
7684 	kfree(ul_rif);
7685 }
7686 
7687 static struct mlxsw_sp_rif *
7688 mlxsw_sp_ul_rif_get(struct mlxsw_sp *mlxsw_sp, u32 tb_id,
7689 		    struct netlink_ext_ack *extack)
7690 {
7691 	struct mlxsw_sp_vr *vr;
7692 	int err;
7693 
7694 	vr = mlxsw_sp_vr_get(mlxsw_sp, tb_id, extack);
7695 	if (IS_ERR(vr))
7696 		return ERR_CAST(vr);
7697 
7698 	if (refcount_inc_not_zero(&vr->ul_rif_refcnt))
7699 		return vr->ul_rif;
7700 
7701 	vr->ul_rif = mlxsw_sp_ul_rif_create(mlxsw_sp, vr, extack);
7702 	if (IS_ERR(vr->ul_rif)) {
7703 		err = PTR_ERR(vr->ul_rif);
7704 		goto err_ul_rif_create;
7705 	}
7706 
7707 	vr->rif_count++;
7708 	refcount_set(&vr->ul_rif_refcnt, 1);
7709 
7710 	return vr->ul_rif;
7711 
7712 err_ul_rif_create:
7713 	mlxsw_sp_vr_put(mlxsw_sp, vr);
7714 	return ERR_PTR(err);
7715 }
7716 
7717 static void mlxsw_sp_ul_rif_put(struct mlxsw_sp_rif *ul_rif)
7718 {
7719 	struct mlxsw_sp *mlxsw_sp = ul_rif->mlxsw_sp;
7720 	struct mlxsw_sp_vr *vr;
7721 
7722 	vr = &mlxsw_sp->router->vrs[ul_rif->vr_id];
7723 
7724 	if (!refcount_dec_and_test(&vr->ul_rif_refcnt))
7725 		return;
7726 
7727 	vr->rif_count--;
7728 	mlxsw_sp_ul_rif_destroy(ul_rif);
7729 	mlxsw_sp_vr_put(mlxsw_sp, vr);
7730 }
7731 
7732 int mlxsw_sp_router_ul_rif_get(struct mlxsw_sp *mlxsw_sp, u32 ul_tb_id,
7733 			       u16 *ul_rif_index)
7734 {
7735 	struct mlxsw_sp_rif *ul_rif;
7736 
7737 	ASSERT_RTNL();
7738 
7739 	ul_rif = mlxsw_sp_ul_rif_get(mlxsw_sp, ul_tb_id, NULL);
7740 	if (IS_ERR(ul_rif))
7741 		return PTR_ERR(ul_rif);
7742 	*ul_rif_index = ul_rif->rif_index;
7743 
7744 	return 0;
7745 }
7746 
7747 void mlxsw_sp_router_ul_rif_put(struct mlxsw_sp *mlxsw_sp, u16 ul_rif_index)
7748 {
7749 	struct mlxsw_sp_rif *ul_rif;
7750 
7751 	ASSERT_RTNL();
7752 
7753 	ul_rif = mlxsw_sp->router->rifs[ul_rif_index];
7754 	if (WARN_ON(!ul_rif))
7755 		return;
7756 
7757 	mlxsw_sp_ul_rif_put(ul_rif);
7758 }
7759 
7760 static int
7761 mlxsw_sp2_rif_ipip_lb_configure(struct mlxsw_sp_rif *rif)
7762 {
7763 	struct mlxsw_sp_rif_ipip_lb *lb_rif = mlxsw_sp_rif_ipip_lb_rif(rif);
7764 	u32 ul_tb_id = mlxsw_sp_ipip_dev_ul_tb_id(rif->dev);
7765 	struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
7766 	struct mlxsw_sp_rif *ul_rif;
7767 	int err;
7768 
7769 	ul_rif = mlxsw_sp_ul_rif_get(mlxsw_sp, ul_tb_id, NULL);
7770 	if (IS_ERR(ul_rif))
7771 		return PTR_ERR(ul_rif);
7772 
7773 	err = mlxsw_sp_rif_ipip_lb_op(lb_rif, 0, ul_rif->rif_index, true);
7774 	if (err)
7775 		goto err_loopback_op;
7776 
7777 	lb_rif->ul_vr_id = 0;
7778 	lb_rif->ul_rif_id = ul_rif->rif_index;
7779 
7780 	return 0;
7781 
7782 err_loopback_op:
7783 	mlxsw_sp_ul_rif_put(ul_rif);
7784 	return err;
7785 }
7786 
7787 static void mlxsw_sp2_rif_ipip_lb_deconfigure(struct mlxsw_sp_rif *rif)
7788 {
7789 	struct mlxsw_sp_rif_ipip_lb *lb_rif = mlxsw_sp_rif_ipip_lb_rif(rif);
7790 	struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
7791 	struct mlxsw_sp_rif *ul_rif;
7792 
7793 	ul_rif = mlxsw_sp_rif_by_index(mlxsw_sp, lb_rif->ul_rif_id);
7794 	mlxsw_sp_rif_ipip_lb_op(lb_rif, 0, lb_rif->ul_rif_id, false);
7795 	mlxsw_sp_ul_rif_put(ul_rif);
7796 }
7797 
7798 static const struct mlxsw_sp_rif_ops mlxsw_sp2_rif_ipip_lb_ops = {
7799 	.type			= MLXSW_SP_RIF_TYPE_IPIP_LB,
7800 	.rif_size		= sizeof(struct mlxsw_sp_rif_ipip_lb),
7801 	.setup                  = mlxsw_sp_rif_ipip_lb_setup,
7802 	.configure		= mlxsw_sp2_rif_ipip_lb_configure,
7803 	.deconfigure		= mlxsw_sp2_rif_ipip_lb_deconfigure,
7804 };
7805 
7806 const struct mlxsw_sp_rif_ops *mlxsw_sp2_rif_ops_arr[] = {
7807 	[MLXSW_SP_RIF_TYPE_SUBPORT]	= &mlxsw_sp_rif_subport_ops,
7808 	[MLXSW_SP_RIF_TYPE_VLAN]	= &mlxsw_sp_rif_vlan_emu_ops,
7809 	[MLXSW_SP_RIF_TYPE_FID]		= &mlxsw_sp_rif_fid_ops,
7810 	[MLXSW_SP_RIF_TYPE_IPIP_LB]	= &mlxsw_sp2_rif_ipip_lb_ops,
7811 };
7812 
7813 static int mlxsw_sp_rifs_init(struct mlxsw_sp *mlxsw_sp)
7814 {
7815 	u64 max_rifs = MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_RIFS);
7816 
7817 	mlxsw_sp->router->rifs = kcalloc(max_rifs,
7818 					 sizeof(struct mlxsw_sp_rif *),
7819 					 GFP_KERNEL);
7820 	if (!mlxsw_sp->router->rifs)
7821 		return -ENOMEM;
7822 
7823 	return 0;
7824 }
7825 
7826 static void mlxsw_sp_rifs_fini(struct mlxsw_sp *mlxsw_sp)
7827 {
7828 	int i;
7829 
7830 	for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_RIFS); i++)
7831 		WARN_ON_ONCE(mlxsw_sp->router->rifs[i]);
7832 
7833 	kfree(mlxsw_sp->router->rifs);
7834 }
7835 
7836 static int
7837 mlxsw_sp_ipip_config_tigcr(struct mlxsw_sp *mlxsw_sp)
7838 {
7839 	char tigcr_pl[MLXSW_REG_TIGCR_LEN];
7840 
7841 	mlxsw_reg_tigcr_pack(tigcr_pl, true, 0);
7842 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(tigcr), tigcr_pl);
7843 }
7844 
7845 static int mlxsw_sp_ipips_init(struct mlxsw_sp *mlxsw_sp)
7846 {
7847 	int err;
7848 
7849 	mlxsw_sp->router->ipip_ops_arr = mlxsw_sp_ipip_ops_arr;
7850 	INIT_LIST_HEAD(&mlxsw_sp->router->ipip_list);
7851 
7852 	err = mlxsw_sp_ipip_ecn_encap_init(mlxsw_sp);
7853 	if (err)
7854 		return err;
7855 	err = mlxsw_sp_ipip_ecn_decap_init(mlxsw_sp);
7856 	if (err)
7857 		return err;
7858 
7859 	return mlxsw_sp_ipip_config_tigcr(mlxsw_sp);
7860 }
7861 
7862 static void mlxsw_sp_ipips_fini(struct mlxsw_sp *mlxsw_sp)
7863 {
7864 	WARN_ON(!list_empty(&mlxsw_sp->router->ipip_list));
7865 }
7866 
7867 static void mlxsw_sp_router_fib_dump_flush(struct notifier_block *nb)
7868 {
7869 	struct mlxsw_sp_router *router;
7870 
7871 	/* Flush pending FIB notifications and then flush the device's
7872 	 * table before requesting another dump. The FIB notification
7873 	 * block is unregistered, so no need to take RTNL.
7874 	 */
7875 	mlxsw_core_flush_owq();
7876 	router = container_of(nb, struct mlxsw_sp_router, fib_nb);
7877 	mlxsw_sp_router_fib_flush(router->mlxsw_sp);
7878 }
7879 
7880 #ifdef CONFIG_IP_ROUTE_MULTIPATH
7881 static void mlxsw_sp_mp_hash_header_set(char *recr2_pl, int header)
7882 {
7883 	mlxsw_reg_recr2_outer_header_enables_set(recr2_pl, header, true);
7884 }
7885 
7886 static void mlxsw_sp_mp_hash_field_set(char *recr2_pl, int field)
7887 {
7888 	mlxsw_reg_recr2_outer_header_fields_enable_set(recr2_pl, field, true);
7889 }
7890 
7891 static void mlxsw_sp_mp4_hash_init(struct mlxsw_sp *mlxsw_sp, char *recr2_pl)
7892 {
7893 	struct net *net = mlxsw_sp_net(mlxsw_sp);
7894 	bool only_l3 = !net->ipv4.sysctl_fib_multipath_hash_policy;
7895 
7896 	mlxsw_sp_mp_hash_header_set(recr2_pl,
7897 				    MLXSW_REG_RECR2_IPV4_EN_NOT_TCP_NOT_UDP);
7898 	mlxsw_sp_mp_hash_header_set(recr2_pl, MLXSW_REG_RECR2_IPV4_EN_TCP_UDP);
7899 	mlxsw_reg_recr2_ipv4_sip_enable(recr2_pl);
7900 	mlxsw_reg_recr2_ipv4_dip_enable(recr2_pl);
7901 	if (only_l3)
7902 		return;
7903 	mlxsw_sp_mp_hash_header_set(recr2_pl, MLXSW_REG_RECR2_TCP_UDP_EN_IPV4);
7904 	mlxsw_sp_mp_hash_field_set(recr2_pl, MLXSW_REG_RECR2_IPV4_PROTOCOL);
7905 	mlxsw_sp_mp_hash_field_set(recr2_pl, MLXSW_REG_RECR2_TCP_UDP_SPORT);
7906 	mlxsw_sp_mp_hash_field_set(recr2_pl, MLXSW_REG_RECR2_TCP_UDP_DPORT);
7907 }
7908 
7909 static void mlxsw_sp_mp6_hash_init(struct mlxsw_sp *mlxsw_sp, char *recr2_pl)
7910 {
7911 	bool only_l3 = !ip6_multipath_hash_policy(mlxsw_sp_net(mlxsw_sp));
7912 
7913 	mlxsw_sp_mp_hash_header_set(recr2_pl,
7914 				    MLXSW_REG_RECR2_IPV6_EN_NOT_TCP_NOT_UDP);
7915 	mlxsw_sp_mp_hash_header_set(recr2_pl, MLXSW_REG_RECR2_IPV6_EN_TCP_UDP);
7916 	mlxsw_reg_recr2_ipv6_sip_enable(recr2_pl);
7917 	mlxsw_reg_recr2_ipv6_dip_enable(recr2_pl);
7918 	mlxsw_sp_mp_hash_field_set(recr2_pl, MLXSW_REG_RECR2_IPV6_NEXT_HEADER);
7919 	if (only_l3) {
7920 		mlxsw_sp_mp_hash_field_set(recr2_pl,
7921 					   MLXSW_REG_RECR2_IPV6_FLOW_LABEL);
7922 	} else {
7923 		mlxsw_sp_mp_hash_header_set(recr2_pl,
7924 					    MLXSW_REG_RECR2_TCP_UDP_EN_IPV6);
7925 		mlxsw_sp_mp_hash_field_set(recr2_pl,
7926 					   MLXSW_REG_RECR2_TCP_UDP_SPORT);
7927 		mlxsw_sp_mp_hash_field_set(recr2_pl,
7928 					   MLXSW_REG_RECR2_TCP_UDP_DPORT);
7929 	}
7930 }
7931 
7932 static int mlxsw_sp_mp_hash_init(struct mlxsw_sp *mlxsw_sp)
7933 {
7934 	char recr2_pl[MLXSW_REG_RECR2_LEN];
7935 	u32 seed;
7936 
7937 	seed = jhash(mlxsw_sp->base_mac, sizeof(mlxsw_sp->base_mac), 0);
7938 	mlxsw_reg_recr2_pack(recr2_pl, seed);
7939 	mlxsw_sp_mp4_hash_init(mlxsw_sp, recr2_pl);
7940 	mlxsw_sp_mp6_hash_init(mlxsw_sp, recr2_pl);
7941 
7942 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(recr2), recr2_pl);
7943 }
7944 #else
7945 static int mlxsw_sp_mp_hash_init(struct mlxsw_sp *mlxsw_sp)
7946 {
7947 	return 0;
7948 }
7949 #endif
7950 
7951 static int mlxsw_sp_dscp_init(struct mlxsw_sp *mlxsw_sp)
7952 {
7953 	char rdpm_pl[MLXSW_REG_RDPM_LEN];
7954 	unsigned int i;
7955 
7956 	MLXSW_REG_ZERO(rdpm, rdpm_pl);
7957 
7958 	/* HW is determining switch priority based on DSCP-bits, but the
7959 	 * kernel is still doing that based on the ToS. Since there's a
7960 	 * mismatch in bits we need to make sure to translate the right
7961 	 * value ToS would observe, skipping the 2 least-significant ECN bits.
7962 	 */
7963 	for (i = 0; i < MLXSW_REG_RDPM_DSCP_ENTRY_REC_MAX_COUNT; i++)
7964 		mlxsw_reg_rdpm_pack(rdpm_pl, i, rt_tos2priority(i << 2));
7965 
7966 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rdpm), rdpm_pl);
7967 }
7968 
7969 static int __mlxsw_sp_router_init(struct mlxsw_sp *mlxsw_sp)
7970 {
7971 	struct net *net = mlxsw_sp_net(mlxsw_sp);
7972 	bool usp = net->ipv4.sysctl_ip_fwd_update_priority;
7973 	char rgcr_pl[MLXSW_REG_RGCR_LEN];
7974 	u64 max_rifs;
7975 	int err;
7976 
7977 	if (!MLXSW_CORE_RES_VALID(mlxsw_sp->core, MAX_RIFS))
7978 		return -EIO;
7979 	max_rifs = MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_RIFS);
7980 
7981 	mlxsw_reg_rgcr_pack(rgcr_pl, true, true);
7982 	mlxsw_reg_rgcr_max_router_interfaces_set(rgcr_pl, max_rifs);
7983 	mlxsw_reg_rgcr_usp_set(rgcr_pl, usp);
7984 	err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rgcr), rgcr_pl);
7985 	if (err)
7986 		return err;
7987 	return 0;
7988 }
7989 
7990 static void __mlxsw_sp_router_fini(struct mlxsw_sp *mlxsw_sp)
7991 {
7992 	char rgcr_pl[MLXSW_REG_RGCR_LEN];
7993 
7994 	mlxsw_reg_rgcr_pack(rgcr_pl, false, false);
7995 	mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rgcr), rgcr_pl);
7996 }
7997 
7998 int mlxsw_sp_router_init(struct mlxsw_sp *mlxsw_sp,
7999 			 struct netlink_ext_ack *extack)
8000 {
8001 	struct mlxsw_sp_router *router;
8002 	int err;
8003 
8004 	router = kzalloc(sizeof(*mlxsw_sp->router), GFP_KERNEL);
8005 	if (!router)
8006 		return -ENOMEM;
8007 	mlxsw_sp->router = router;
8008 	router->mlxsw_sp = mlxsw_sp;
8009 
8010 	router->inetaddr_nb.notifier_call = mlxsw_sp_inetaddr_event;
8011 	err = register_inetaddr_notifier(&router->inetaddr_nb);
8012 	if (err)
8013 		goto err_register_inetaddr_notifier;
8014 
8015 	router->inet6addr_nb.notifier_call = mlxsw_sp_inet6addr_event;
8016 	err = register_inet6addr_notifier(&router->inet6addr_nb);
8017 	if (err)
8018 		goto err_register_inet6addr_notifier;
8019 
8020 	INIT_LIST_HEAD(&mlxsw_sp->router->nexthop_neighs_list);
8021 	err = __mlxsw_sp_router_init(mlxsw_sp);
8022 	if (err)
8023 		goto err_router_init;
8024 
8025 	err = mlxsw_sp_rifs_init(mlxsw_sp);
8026 	if (err)
8027 		goto err_rifs_init;
8028 
8029 	err = mlxsw_sp_ipips_init(mlxsw_sp);
8030 	if (err)
8031 		goto err_ipips_init;
8032 
8033 	err = rhashtable_init(&mlxsw_sp->router->nexthop_ht,
8034 			      &mlxsw_sp_nexthop_ht_params);
8035 	if (err)
8036 		goto err_nexthop_ht_init;
8037 
8038 	err = rhashtable_init(&mlxsw_sp->router->nexthop_group_ht,
8039 			      &mlxsw_sp_nexthop_group_ht_params);
8040 	if (err)
8041 		goto err_nexthop_group_ht_init;
8042 
8043 	INIT_LIST_HEAD(&mlxsw_sp->router->nexthop_list);
8044 	err = mlxsw_sp_lpm_init(mlxsw_sp);
8045 	if (err)
8046 		goto err_lpm_init;
8047 
8048 	err = mlxsw_sp_mr_init(mlxsw_sp, &mlxsw_sp_mr_tcam_ops);
8049 	if (err)
8050 		goto err_mr_init;
8051 
8052 	err = mlxsw_sp_vrs_init(mlxsw_sp);
8053 	if (err)
8054 		goto err_vrs_init;
8055 
8056 	err = mlxsw_sp_neigh_init(mlxsw_sp);
8057 	if (err)
8058 		goto err_neigh_init;
8059 
8060 	mlxsw_sp->router->netevent_nb.notifier_call =
8061 		mlxsw_sp_router_netevent_event;
8062 	err = register_netevent_notifier(&mlxsw_sp->router->netevent_nb);
8063 	if (err)
8064 		goto err_register_netevent_notifier;
8065 
8066 	err = mlxsw_sp_mp_hash_init(mlxsw_sp);
8067 	if (err)
8068 		goto err_mp_hash_init;
8069 
8070 	err = mlxsw_sp_dscp_init(mlxsw_sp);
8071 	if (err)
8072 		goto err_dscp_init;
8073 
8074 	mlxsw_sp->router->fib_nb.notifier_call = mlxsw_sp_router_fib_event;
8075 	err = register_fib_notifier(mlxsw_sp_net(mlxsw_sp),
8076 				    &mlxsw_sp->router->fib_nb,
8077 				    mlxsw_sp_router_fib_dump_flush, extack);
8078 	if (err)
8079 		goto err_register_fib_notifier;
8080 
8081 	return 0;
8082 
8083 err_register_fib_notifier:
8084 err_dscp_init:
8085 err_mp_hash_init:
8086 	unregister_netevent_notifier(&mlxsw_sp->router->netevent_nb);
8087 err_register_netevent_notifier:
8088 	mlxsw_sp_neigh_fini(mlxsw_sp);
8089 err_neigh_init:
8090 	mlxsw_sp_vrs_fini(mlxsw_sp);
8091 err_vrs_init:
8092 	mlxsw_sp_mr_fini(mlxsw_sp);
8093 err_mr_init:
8094 	mlxsw_sp_lpm_fini(mlxsw_sp);
8095 err_lpm_init:
8096 	rhashtable_destroy(&mlxsw_sp->router->nexthop_group_ht);
8097 err_nexthop_group_ht_init:
8098 	rhashtable_destroy(&mlxsw_sp->router->nexthop_ht);
8099 err_nexthop_ht_init:
8100 	mlxsw_sp_ipips_fini(mlxsw_sp);
8101 err_ipips_init:
8102 	mlxsw_sp_rifs_fini(mlxsw_sp);
8103 err_rifs_init:
8104 	__mlxsw_sp_router_fini(mlxsw_sp);
8105 err_router_init:
8106 	unregister_inet6addr_notifier(&router->inet6addr_nb);
8107 err_register_inet6addr_notifier:
8108 	unregister_inetaddr_notifier(&router->inetaddr_nb);
8109 err_register_inetaddr_notifier:
8110 	kfree(mlxsw_sp->router);
8111 	return err;
8112 }
8113 
8114 void mlxsw_sp_router_fini(struct mlxsw_sp *mlxsw_sp)
8115 {
8116 	unregister_fib_notifier(mlxsw_sp_net(mlxsw_sp),
8117 				&mlxsw_sp->router->fib_nb);
8118 	unregister_netevent_notifier(&mlxsw_sp->router->netevent_nb);
8119 	mlxsw_sp_neigh_fini(mlxsw_sp);
8120 	mlxsw_sp_vrs_fini(mlxsw_sp);
8121 	mlxsw_sp_mr_fini(mlxsw_sp);
8122 	mlxsw_sp_lpm_fini(mlxsw_sp);
8123 	rhashtable_destroy(&mlxsw_sp->router->nexthop_group_ht);
8124 	rhashtable_destroy(&mlxsw_sp->router->nexthop_ht);
8125 	mlxsw_sp_ipips_fini(mlxsw_sp);
8126 	mlxsw_sp_rifs_fini(mlxsw_sp);
8127 	__mlxsw_sp_router_fini(mlxsw_sp);
8128 	unregister_inet6addr_notifier(&mlxsw_sp->router->inet6addr_nb);
8129 	unregister_inetaddr_notifier(&mlxsw_sp->router->inetaddr_nb);
8130 	kfree(mlxsw_sp->router);
8131 }
8132