1 // SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0
2 /* Copyright (c) 2016-2018 Mellanox Technologies. All rights reserved */
3 
4 #include <linux/kernel.h>
5 #include <linux/types.h>
6 #include <linux/rhashtable.h>
7 #include <linux/bitops.h>
8 #include <linux/in6.h>
9 #include <linux/notifier.h>
10 #include <linux/inetdevice.h>
11 #include <linux/netdevice.h>
12 #include <linux/if_bridge.h>
13 #include <linux/socket.h>
14 #include <linux/route.h>
15 #include <linux/gcd.h>
16 #include <linux/random.h>
17 #include <linux/if_macvlan.h>
18 #include <net/netevent.h>
19 #include <net/neighbour.h>
20 #include <net/arp.h>
21 #include <net/ip_fib.h>
22 #include <net/ip6_fib.h>
23 #include <net/fib_rules.h>
24 #include <net/ip_tunnels.h>
25 #include <net/l3mdev.h>
26 #include <net/addrconf.h>
27 #include <net/ndisc.h>
28 #include <net/ipv6.h>
29 #include <net/fib_notifier.h>
30 #include <net/switchdev.h>
31 
32 #include "spectrum.h"
33 #include "core.h"
34 #include "reg.h"
35 #include "spectrum_cnt.h"
36 #include "spectrum_dpipe.h"
37 #include "spectrum_ipip.h"
38 #include "spectrum_mr.h"
39 #include "spectrum_mr_tcam.h"
40 #include "spectrum_router.h"
41 #include "spectrum_span.h"
42 
43 struct mlxsw_sp_fib;
44 struct mlxsw_sp_vr;
45 struct mlxsw_sp_lpm_tree;
46 struct mlxsw_sp_rif_ops;
47 
48 struct mlxsw_sp_router {
49 	struct mlxsw_sp *mlxsw_sp;
50 	struct mlxsw_sp_rif **rifs;
51 	struct mlxsw_sp_vr *vrs;
52 	struct rhashtable neigh_ht;
53 	struct rhashtable nexthop_group_ht;
54 	struct rhashtable nexthop_ht;
55 	struct list_head nexthop_list;
56 	struct {
57 		/* One tree for each protocol: IPv4 and IPv6 */
58 		struct mlxsw_sp_lpm_tree *proto_trees[2];
59 		struct mlxsw_sp_lpm_tree *trees;
60 		unsigned int tree_count;
61 	} lpm;
62 	struct {
63 		struct delayed_work dw;
64 		unsigned long interval;	/* ms */
65 	} neighs_update;
66 	struct delayed_work nexthop_probe_dw;
67 #define MLXSW_SP_UNRESOLVED_NH_PROBE_INTERVAL 5000 /* ms */
68 	struct list_head nexthop_neighs_list;
69 	struct list_head ipip_list;
70 	bool aborted;
71 	struct notifier_block fib_nb;
72 	struct notifier_block netevent_nb;
73 	const struct mlxsw_sp_rif_ops **rif_ops_arr;
74 	const struct mlxsw_sp_ipip_ops **ipip_ops_arr;
75 };
76 
77 struct mlxsw_sp_rif {
78 	struct list_head nexthop_list;
79 	struct list_head neigh_list;
80 	struct net_device *dev;
81 	struct mlxsw_sp_fid *fid;
82 	unsigned char addr[ETH_ALEN];
83 	int mtu;
84 	u16 rif_index;
85 	u16 vr_id;
86 	const struct mlxsw_sp_rif_ops *ops;
87 	struct mlxsw_sp *mlxsw_sp;
88 
89 	unsigned int counter_ingress;
90 	bool counter_ingress_valid;
91 	unsigned int counter_egress;
92 	bool counter_egress_valid;
93 };
94 
95 struct mlxsw_sp_rif_params {
96 	struct net_device *dev;
97 	union {
98 		u16 system_port;
99 		u16 lag_id;
100 	};
101 	u16 vid;
102 	bool lag;
103 };
104 
105 struct mlxsw_sp_rif_subport {
106 	struct mlxsw_sp_rif common;
107 	union {
108 		u16 system_port;
109 		u16 lag_id;
110 	};
111 	u16 vid;
112 	bool lag;
113 };
114 
115 struct mlxsw_sp_rif_ipip_lb {
116 	struct mlxsw_sp_rif common;
117 	struct mlxsw_sp_rif_ipip_lb_config lb_config;
118 	u16 ul_vr_id; /* Reserved for Spectrum-2. */
119 };
120 
121 struct mlxsw_sp_rif_params_ipip_lb {
122 	struct mlxsw_sp_rif_params common;
123 	struct mlxsw_sp_rif_ipip_lb_config lb_config;
124 };
125 
126 struct mlxsw_sp_rif_ops {
127 	enum mlxsw_sp_rif_type type;
128 	size_t rif_size;
129 
130 	void (*setup)(struct mlxsw_sp_rif *rif,
131 		      const struct mlxsw_sp_rif_params *params);
132 	int (*configure)(struct mlxsw_sp_rif *rif);
133 	void (*deconfigure)(struct mlxsw_sp_rif *rif);
134 	struct mlxsw_sp_fid * (*fid_get)(struct mlxsw_sp_rif *rif,
135 					 struct netlink_ext_ack *extack);
136 	void (*fdb_del)(struct mlxsw_sp_rif *rif, const char *mac);
137 };
138 
139 static void mlxsw_sp_lpm_tree_hold(struct mlxsw_sp_lpm_tree *lpm_tree);
140 static void mlxsw_sp_lpm_tree_put(struct mlxsw_sp *mlxsw_sp,
141 				  struct mlxsw_sp_lpm_tree *lpm_tree);
142 static int mlxsw_sp_vr_lpm_tree_bind(struct mlxsw_sp *mlxsw_sp,
143 				     const struct mlxsw_sp_fib *fib,
144 				     u8 tree_id);
145 static int mlxsw_sp_vr_lpm_tree_unbind(struct mlxsw_sp *mlxsw_sp,
146 				       const struct mlxsw_sp_fib *fib);
147 
148 static unsigned int *
149 mlxsw_sp_rif_p_counter_get(struct mlxsw_sp_rif *rif,
150 			   enum mlxsw_sp_rif_counter_dir dir)
151 {
152 	switch (dir) {
153 	case MLXSW_SP_RIF_COUNTER_EGRESS:
154 		return &rif->counter_egress;
155 	case MLXSW_SP_RIF_COUNTER_INGRESS:
156 		return &rif->counter_ingress;
157 	}
158 	return NULL;
159 }
160 
161 static bool
162 mlxsw_sp_rif_counter_valid_get(struct mlxsw_sp_rif *rif,
163 			       enum mlxsw_sp_rif_counter_dir dir)
164 {
165 	switch (dir) {
166 	case MLXSW_SP_RIF_COUNTER_EGRESS:
167 		return rif->counter_egress_valid;
168 	case MLXSW_SP_RIF_COUNTER_INGRESS:
169 		return rif->counter_ingress_valid;
170 	}
171 	return false;
172 }
173 
174 static void
175 mlxsw_sp_rif_counter_valid_set(struct mlxsw_sp_rif *rif,
176 			       enum mlxsw_sp_rif_counter_dir dir,
177 			       bool valid)
178 {
179 	switch (dir) {
180 	case MLXSW_SP_RIF_COUNTER_EGRESS:
181 		rif->counter_egress_valid = valid;
182 		break;
183 	case MLXSW_SP_RIF_COUNTER_INGRESS:
184 		rif->counter_ingress_valid = valid;
185 		break;
186 	}
187 }
188 
189 static int mlxsw_sp_rif_counter_edit(struct mlxsw_sp *mlxsw_sp, u16 rif_index,
190 				     unsigned int counter_index, bool enable,
191 				     enum mlxsw_sp_rif_counter_dir dir)
192 {
193 	char ritr_pl[MLXSW_REG_RITR_LEN];
194 	bool is_egress = false;
195 	int err;
196 
197 	if (dir == MLXSW_SP_RIF_COUNTER_EGRESS)
198 		is_egress = true;
199 	mlxsw_reg_ritr_rif_pack(ritr_pl, rif_index);
200 	err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
201 	if (err)
202 		return err;
203 
204 	mlxsw_reg_ritr_counter_pack(ritr_pl, counter_index, enable,
205 				    is_egress);
206 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
207 }
208 
209 int mlxsw_sp_rif_counter_value_get(struct mlxsw_sp *mlxsw_sp,
210 				   struct mlxsw_sp_rif *rif,
211 				   enum mlxsw_sp_rif_counter_dir dir, u64 *cnt)
212 {
213 	char ricnt_pl[MLXSW_REG_RICNT_LEN];
214 	unsigned int *p_counter_index;
215 	bool valid;
216 	int err;
217 
218 	valid = mlxsw_sp_rif_counter_valid_get(rif, dir);
219 	if (!valid)
220 		return -EINVAL;
221 
222 	p_counter_index = mlxsw_sp_rif_p_counter_get(rif, dir);
223 	if (!p_counter_index)
224 		return -EINVAL;
225 	mlxsw_reg_ricnt_pack(ricnt_pl, *p_counter_index,
226 			     MLXSW_REG_RICNT_OPCODE_NOP);
227 	err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(ricnt), ricnt_pl);
228 	if (err)
229 		return err;
230 	*cnt = mlxsw_reg_ricnt_good_unicast_packets_get(ricnt_pl);
231 	return 0;
232 }
233 
234 static int mlxsw_sp_rif_counter_clear(struct mlxsw_sp *mlxsw_sp,
235 				      unsigned int counter_index)
236 {
237 	char ricnt_pl[MLXSW_REG_RICNT_LEN];
238 
239 	mlxsw_reg_ricnt_pack(ricnt_pl, counter_index,
240 			     MLXSW_REG_RICNT_OPCODE_CLEAR);
241 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ricnt), ricnt_pl);
242 }
243 
244 int mlxsw_sp_rif_counter_alloc(struct mlxsw_sp *mlxsw_sp,
245 			       struct mlxsw_sp_rif *rif,
246 			       enum mlxsw_sp_rif_counter_dir dir)
247 {
248 	unsigned int *p_counter_index;
249 	int err;
250 
251 	p_counter_index = mlxsw_sp_rif_p_counter_get(rif, dir);
252 	if (!p_counter_index)
253 		return -EINVAL;
254 	err = mlxsw_sp_counter_alloc(mlxsw_sp, MLXSW_SP_COUNTER_SUB_POOL_RIF,
255 				     p_counter_index);
256 	if (err)
257 		return err;
258 
259 	err = mlxsw_sp_rif_counter_clear(mlxsw_sp, *p_counter_index);
260 	if (err)
261 		goto err_counter_clear;
262 
263 	err = mlxsw_sp_rif_counter_edit(mlxsw_sp, rif->rif_index,
264 					*p_counter_index, true, dir);
265 	if (err)
266 		goto err_counter_edit;
267 	mlxsw_sp_rif_counter_valid_set(rif, dir, true);
268 	return 0;
269 
270 err_counter_edit:
271 err_counter_clear:
272 	mlxsw_sp_counter_free(mlxsw_sp, MLXSW_SP_COUNTER_SUB_POOL_RIF,
273 			      *p_counter_index);
274 	return err;
275 }
276 
277 void mlxsw_sp_rif_counter_free(struct mlxsw_sp *mlxsw_sp,
278 			       struct mlxsw_sp_rif *rif,
279 			       enum mlxsw_sp_rif_counter_dir dir)
280 {
281 	unsigned int *p_counter_index;
282 
283 	if (!mlxsw_sp_rif_counter_valid_get(rif, dir))
284 		return;
285 
286 	p_counter_index = mlxsw_sp_rif_p_counter_get(rif, dir);
287 	if (WARN_ON(!p_counter_index))
288 		return;
289 	mlxsw_sp_rif_counter_edit(mlxsw_sp, rif->rif_index,
290 				  *p_counter_index, false, dir);
291 	mlxsw_sp_counter_free(mlxsw_sp, MLXSW_SP_COUNTER_SUB_POOL_RIF,
292 			      *p_counter_index);
293 	mlxsw_sp_rif_counter_valid_set(rif, dir, false);
294 }
295 
296 static void mlxsw_sp_rif_counters_alloc(struct mlxsw_sp_rif *rif)
297 {
298 	struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
299 	struct devlink *devlink;
300 
301 	devlink = priv_to_devlink(mlxsw_sp->core);
302 	if (!devlink_dpipe_table_counter_enabled(devlink,
303 						 MLXSW_SP_DPIPE_TABLE_NAME_ERIF))
304 		return;
305 	mlxsw_sp_rif_counter_alloc(mlxsw_sp, rif, MLXSW_SP_RIF_COUNTER_EGRESS);
306 }
307 
308 static void mlxsw_sp_rif_counters_free(struct mlxsw_sp_rif *rif)
309 {
310 	struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
311 
312 	mlxsw_sp_rif_counter_free(mlxsw_sp, rif, MLXSW_SP_RIF_COUNTER_EGRESS);
313 }
314 
315 #define MLXSW_SP_PREFIX_COUNT (sizeof(struct in6_addr) * BITS_PER_BYTE + 1)
316 
317 struct mlxsw_sp_prefix_usage {
318 	DECLARE_BITMAP(b, MLXSW_SP_PREFIX_COUNT);
319 };
320 
321 #define mlxsw_sp_prefix_usage_for_each(prefix, prefix_usage) \
322 	for_each_set_bit(prefix, (prefix_usage)->b, MLXSW_SP_PREFIX_COUNT)
323 
324 static bool
325 mlxsw_sp_prefix_usage_eq(struct mlxsw_sp_prefix_usage *prefix_usage1,
326 			 struct mlxsw_sp_prefix_usage *prefix_usage2)
327 {
328 	return !memcmp(prefix_usage1, prefix_usage2, sizeof(*prefix_usage1));
329 }
330 
331 static void
332 mlxsw_sp_prefix_usage_cpy(struct mlxsw_sp_prefix_usage *prefix_usage1,
333 			  struct mlxsw_sp_prefix_usage *prefix_usage2)
334 {
335 	memcpy(prefix_usage1, prefix_usage2, sizeof(*prefix_usage1));
336 }
337 
338 static void
339 mlxsw_sp_prefix_usage_set(struct mlxsw_sp_prefix_usage *prefix_usage,
340 			  unsigned char prefix_len)
341 {
342 	set_bit(prefix_len, prefix_usage->b);
343 }
344 
345 static void
346 mlxsw_sp_prefix_usage_clear(struct mlxsw_sp_prefix_usage *prefix_usage,
347 			    unsigned char prefix_len)
348 {
349 	clear_bit(prefix_len, prefix_usage->b);
350 }
351 
352 struct mlxsw_sp_fib_key {
353 	unsigned char addr[sizeof(struct in6_addr)];
354 	unsigned char prefix_len;
355 };
356 
357 enum mlxsw_sp_fib_entry_type {
358 	MLXSW_SP_FIB_ENTRY_TYPE_REMOTE,
359 	MLXSW_SP_FIB_ENTRY_TYPE_LOCAL,
360 	MLXSW_SP_FIB_ENTRY_TYPE_TRAP,
361 
362 	/* This is a special case of local delivery, where a packet should be
363 	 * decapsulated on reception. Note that there is no corresponding ENCAP,
364 	 * because that's a type of next hop, not of FIB entry. (There can be
365 	 * several next hops in a REMOTE entry, and some of them may be
366 	 * encapsulating entries.)
367 	 */
368 	MLXSW_SP_FIB_ENTRY_TYPE_IPIP_DECAP,
369 	MLXSW_SP_FIB_ENTRY_TYPE_NVE_DECAP,
370 };
371 
372 struct mlxsw_sp_nexthop_group;
373 
374 struct mlxsw_sp_fib_node {
375 	struct list_head entry_list;
376 	struct list_head list;
377 	struct rhash_head ht_node;
378 	struct mlxsw_sp_fib *fib;
379 	struct mlxsw_sp_fib_key key;
380 };
381 
382 struct mlxsw_sp_fib_entry_decap {
383 	struct mlxsw_sp_ipip_entry *ipip_entry;
384 	u32 tunnel_index;
385 };
386 
387 struct mlxsw_sp_fib_entry {
388 	struct list_head list;
389 	struct mlxsw_sp_fib_node *fib_node;
390 	enum mlxsw_sp_fib_entry_type type;
391 	struct list_head nexthop_group_node;
392 	struct mlxsw_sp_nexthop_group *nh_group;
393 	struct mlxsw_sp_fib_entry_decap decap; /* Valid for decap entries. */
394 };
395 
396 struct mlxsw_sp_fib4_entry {
397 	struct mlxsw_sp_fib_entry common;
398 	u32 tb_id;
399 	u32 prio;
400 	u8 tos;
401 	u8 type;
402 };
403 
404 struct mlxsw_sp_fib6_entry {
405 	struct mlxsw_sp_fib_entry common;
406 	struct list_head rt6_list;
407 	unsigned int nrt6;
408 };
409 
410 struct mlxsw_sp_rt6 {
411 	struct list_head list;
412 	struct fib6_info *rt;
413 };
414 
415 struct mlxsw_sp_lpm_tree {
416 	u8 id; /* tree ID */
417 	unsigned int ref_count;
418 	enum mlxsw_sp_l3proto proto;
419 	unsigned long prefix_ref_count[MLXSW_SP_PREFIX_COUNT];
420 	struct mlxsw_sp_prefix_usage prefix_usage;
421 };
422 
423 struct mlxsw_sp_fib {
424 	struct rhashtable ht;
425 	struct list_head node_list;
426 	struct mlxsw_sp_vr *vr;
427 	struct mlxsw_sp_lpm_tree *lpm_tree;
428 	enum mlxsw_sp_l3proto proto;
429 };
430 
431 struct mlxsw_sp_vr {
432 	u16 id; /* virtual router ID */
433 	u32 tb_id; /* kernel fib table id */
434 	unsigned int rif_count;
435 	struct mlxsw_sp_fib *fib4;
436 	struct mlxsw_sp_fib *fib6;
437 	struct mlxsw_sp_mr_table *mr_table[MLXSW_SP_L3_PROTO_MAX];
438 };
439 
440 static const struct rhashtable_params mlxsw_sp_fib_ht_params;
441 
442 static struct mlxsw_sp_fib *mlxsw_sp_fib_create(struct mlxsw_sp *mlxsw_sp,
443 						struct mlxsw_sp_vr *vr,
444 						enum mlxsw_sp_l3proto proto)
445 {
446 	struct mlxsw_sp_lpm_tree *lpm_tree;
447 	struct mlxsw_sp_fib *fib;
448 	int err;
449 
450 	lpm_tree = mlxsw_sp->router->lpm.proto_trees[proto];
451 	fib = kzalloc(sizeof(*fib), GFP_KERNEL);
452 	if (!fib)
453 		return ERR_PTR(-ENOMEM);
454 	err = rhashtable_init(&fib->ht, &mlxsw_sp_fib_ht_params);
455 	if (err)
456 		goto err_rhashtable_init;
457 	INIT_LIST_HEAD(&fib->node_list);
458 	fib->proto = proto;
459 	fib->vr = vr;
460 	fib->lpm_tree = lpm_tree;
461 	mlxsw_sp_lpm_tree_hold(lpm_tree);
462 	err = mlxsw_sp_vr_lpm_tree_bind(mlxsw_sp, fib, lpm_tree->id);
463 	if (err)
464 		goto err_lpm_tree_bind;
465 	return fib;
466 
467 err_lpm_tree_bind:
468 	mlxsw_sp_lpm_tree_put(mlxsw_sp, lpm_tree);
469 err_rhashtable_init:
470 	kfree(fib);
471 	return ERR_PTR(err);
472 }
473 
474 static void mlxsw_sp_fib_destroy(struct mlxsw_sp *mlxsw_sp,
475 				 struct mlxsw_sp_fib *fib)
476 {
477 	mlxsw_sp_vr_lpm_tree_unbind(mlxsw_sp, fib);
478 	mlxsw_sp_lpm_tree_put(mlxsw_sp, fib->lpm_tree);
479 	WARN_ON(!list_empty(&fib->node_list));
480 	rhashtable_destroy(&fib->ht);
481 	kfree(fib);
482 }
483 
484 static struct mlxsw_sp_lpm_tree *
485 mlxsw_sp_lpm_tree_find_unused(struct mlxsw_sp *mlxsw_sp)
486 {
487 	static struct mlxsw_sp_lpm_tree *lpm_tree;
488 	int i;
489 
490 	for (i = 0; i < mlxsw_sp->router->lpm.tree_count; i++) {
491 		lpm_tree = &mlxsw_sp->router->lpm.trees[i];
492 		if (lpm_tree->ref_count == 0)
493 			return lpm_tree;
494 	}
495 	return NULL;
496 }
497 
498 static int mlxsw_sp_lpm_tree_alloc(struct mlxsw_sp *mlxsw_sp,
499 				   struct mlxsw_sp_lpm_tree *lpm_tree)
500 {
501 	char ralta_pl[MLXSW_REG_RALTA_LEN];
502 
503 	mlxsw_reg_ralta_pack(ralta_pl, true,
504 			     (enum mlxsw_reg_ralxx_protocol) lpm_tree->proto,
505 			     lpm_tree->id);
506 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralta), ralta_pl);
507 }
508 
509 static void mlxsw_sp_lpm_tree_free(struct mlxsw_sp *mlxsw_sp,
510 				   struct mlxsw_sp_lpm_tree *lpm_tree)
511 {
512 	char ralta_pl[MLXSW_REG_RALTA_LEN];
513 
514 	mlxsw_reg_ralta_pack(ralta_pl, false,
515 			     (enum mlxsw_reg_ralxx_protocol) lpm_tree->proto,
516 			     lpm_tree->id);
517 	mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralta), ralta_pl);
518 }
519 
520 static int
521 mlxsw_sp_lpm_tree_left_struct_set(struct mlxsw_sp *mlxsw_sp,
522 				  struct mlxsw_sp_prefix_usage *prefix_usage,
523 				  struct mlxsw_sp_lpm_tree *lpm_tree)
524 {
525 	char ralst_pl[MLXSW_REG_RALST_LEN];
526 	u8 root_bin = 0;
527 	u8 prefix;
528 	u8 last_prefix = MLXSW_REG_RALST_BIN_NO_CHILD;
529 
530 	mlxsw_sp_prefix_usage_for_each(prefix, prefix_usage)
531 		root_bin = prefix;
532 
533 	mlxsw_reg_ralst_pack(ralst_pl, root_bin, lpm_tree->id);
534 	mlxsw_sp_prefix_usage_for_each(prefix, prefix_usage) {
535 		if (prefix == 0)
536 			continue;
537 		mlxsw_reg_ralst_bin_pack(ralst_pl, prefix, last_prefix,
538 					 MLXSW_REG_RALST_BIN_NO_CHILD);
539 		last_prefix = prefix;
540 	}
541 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralst), ralst_pl);
542 }
543 
544 static struct mlxsw_sp_lpm_tree *
545 mlxsw_sp_lpm_tree_create(struct mlxsw_sp *mlxsw_sp,
546 			 struct mlxsw_sp_prefix_usage *prefix_usage,
547 			 enum mlxsw_sp_l3proto proto)
548 {
549 	struct mlxsw_sp_lpm_tree *lpm_tree;
550 	int err;
551 
552 	lpm_tree = mlxsw_sp_lpm_tree_find_unused(mlxsw_sp);
553 	if (!lpm_tree)
554 		return ERR_PTR(-EBUSY);
555 	lpm_tree->proto = proto;
556 	err = mlxsw_sp_lpm_tree_alloc(mlxsw_sp, lpm_tree);
557 	if (err)
558 		return ERR_PTR(err);
559 
560 	err = mlxsw_sp_lpm_tree_left_struct_set(mlxsw_sp, prefix_usage,
561 						lpm_tree);
562 	if (err)
563 		goto err_left_struct_set;
564 	memcpy(&lpm_tree->prefix_usage, prefix_usage,
565 	       sizeof(lpm_tree->prefix_usage));
566 	memset(&lpm_tree->prefix_ref_count, 0,
567 	       sizeof(lpm_tree->prefix_ref_count));
568 	lpm_tree->ref_count = 1;
569 	return lpm_tree;
570 
571 err_left_struct_set:
572 	mlxsw_sp_lpm_tree_free(mlxsw_sp, lpm_tree);
573 	return ERR_PTR(err);
574 }
575 
576 static void mlxsw_sp_lpm_tree_destroy(struct mlxsw_sp *mlxsw_sp,
577 				      struct mlxsw_sp_lpm_tree *lpm_tree)
578 {
579 	mlxsw_sp_lpm_tree_free(mlxsw_sp, lpm_tree);
580 }
581 
582 static struct mlxsw_sp_lpm_tree *
583 mlxsw_sp_lpm_tree_get(struct mlxsw_sp *mlxsw_sp,
584 		      struct mlxsw_sp_prefix_usage *prefix_usage,
585 		      enum mlxsw_sp_l3proto proto)
586 {
587 	struct mlxsw_sp_lpm_tree *lpm_tree;
588 	int i;
589 
590 	for (i = 0; i < mlxsw_sp->router->lpm.tree_count; i++) {
591 		lpm_tree = &mlxsw_sp->router->lpm.trees[i];
592 		if (lpm_tree->ref_count != 0 &&
593 		    lpm_tree->proto == proto &&
594 		    mlxsw_sp_prefix_usage_eq(&lpm_tree->prefix_usage,
595 					     prefix_usage)) {
596 			mlxsw_sp_lpm_tree_hold(lpm_tree);
597 			return lpm_tree;
598 		}
599 	}
600 	return mlxsw_sp_lpm_tree_create(mlxsw_sp, prefix_usage, proto);
601 }
602 
603 static void mlxsw_sp_lpm_tree_hold(struct mlxsw_sp_lpm_tree *lpm_tree)
604 {
605 	lpm_tree->ref_count++;
606 }
607 
608 static void mlxsw_sp_lpm_tree_put(struct mlxsw_sp *mlxsw_sp,
609 				  struct mlxsw_sp_lpm_tree *lpm_tree)
610 {
611 	if (--lpm_tree->ref_count == 0)
612 		mlxsw_sp_lpm_tree_destroy(mlxsw_sp, lpm_tree);
613 }
614 
615 #define MLXSW_SP_LPM_TREE_MIN 1 /* tree 0 is reserved */
616 
617 static int mlxsw_sp_lpm_init(struct mlxsw_sp *mlxsw_sp)
618 {
619 	struct mlxsw_sp_prefix_usage req_prefix_usage = {{ 0 } };
620 	struct mlxsw_sp_lpm_tree *lpm_tree;
621 	u64 max_trees;
622 	int err, i;
623 
624 	if (!MLXSW_CORE_RES_VALID(mlxsw_sp->core, MAX_LPM_TREES))
625 		return -EIO;
626 
627 	max_trees = MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_LPM_TREES);
628 	mlxsw_sp->router->lpm.tree_count = max_trees - MLXSW_SP_LPM_TREE_MIN;
629 	mlxsw_sp->router->lpm.trees = kcalloc(mlxsw_sp->router->lpm.tree_count,
630 					     sizeof(struct mlxsw_sp_lpm_tree),
631 					     GFP_KERNEL);
632 	if (!mlxsw_sp->router->lpm.trees)
633 		return -ENOMEM;
634 
635 	for (i = 0; i < mlxsw_sp->router->lpm.tree_count; i++) {
636 		lpm_tree = &mlxsw_sp->router->lpm.trees[i];
637 		lpm_tree->id = i + MLXSW_SP_LPM_TREE_MIN;
638 	}
639 
640 	lpm_tree = mlxsw_sp_lpm_tree_get(mlxsw_sp, &req_prefix_usage,
641 					 MLXSW_SP_L3_PROTO_IPV4);
642 	if (IS_ERR(lpm_tree)) {
643 		err = PTR_ERR(lpm_tree);
644 		goto err_ipv4_tree_get;
645 	}
646 	mlxsw_sp->router->lpm.proto_trees[MLXSW_SP_L3_PROTO_IPV4] = lpm_tree;
647 
648 	lpm_tree = mlxsw_sp_lpm_tree_get(mlxsw_sp, &req_prefix_usage,
649 					 MLXSW_SP_L3_PROTO_IPV6);
650 	if (IS_ERR(lpm_tree)) {
651 		err = PTR_ERR(lpm_tree);
652 		goto err_ipv6_tree_get;
653 	}
654 	mlxsw_sp->router->lpm.proto_trees[MLXSW_SP_L3_PROTO_IPV6] = lpm_tree;
655 
656 	return 0;
657 
658 err_ipv6_tree_get:
659 	lpm_tree = mlxsw_sp->router->lpm.proto_trees[MLXSW_SP_L3_PROTO_IPV4];
660 	mlxsw_sp_lpm_tree_put(mlxsw_sp, lpm_tree);
661 err_ipv4_tree_get:
662 	kfree(mlxsw_sp->router->lpm.trees);
663 	return err;
664 }
665 
666 static void mlxsw_sp_lpm_fini(struct mlxsw_sp *mlxsw_sp)
667 {
668 	struct mlxsw_sp_lpm_tree *lpm_tree;
669 
670 	lpm_tree = mlxsw_sp->router->lpm.proto_trees[MLXSW_SP_L3_PROTO_IPV6];
671 	mlxsw_sp_lpm_tree_put(mlxsw_sp, lpm_tree);
672 
673 	lpm_tree = mlxsw_sp->router->lpm.proto_trees[MLXSW_SP_L3_PROTO_IPV4];
674 	mlxsw_sp_lpm_tree_put(mlxsw_sp, lpm_tree);
675 
676 	kfree(mlxsw_sp->router->lpm.trees);
677 }
678 
679 static bool mlxsw_sp_vr_is_used(const struct mlxsw_sp_vr *vr)
680 {
681 	return !!vr->fib4 || !!vr->fib6 ||
682 	       !!vr->mr_table[MLXSW_SP_L3_PROTO_IPV4] ||
683 	       !!vr->mr_table[MLXSW_SP_L3_PROTO_IPV6];
684 }
685 
686 static struct mlxsw_sp_vr *mlxsw_sp_vr_find_unused(struct mlxsw_sp *mlxsw_sp)
687 {
688 	struct mlxsw_sp_vr *vr;
689 	int i;
690 
691 	for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_VRS); i++) {
692 		vr = &mlxsw_sp->router->vrs[i];
693 		if (!mlxsw_sp_vr_is_used(vr))
694 			return vr;
695 	}
696 	return NULL;
697 }
698 
699 static int mlxsw_sp_vr_lpm_tree_bind(struct mlxsw_sp *mlxsw_sp,
700 				     const struct mlxsw_sp_fib *fib, u8 tree_id)
701 {
702 	char raltb_pl[MLXSW_REG_RALTB_LEN];
703 
704 	mlxsw_reg_raltb_pack(raltb_pl, fib->vr->id,
705 			     (enum mlxsw_reg_ralxx_protocol) fib->proto,
706 			     tree_id);
707 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(raltb), raltb_pl);
708 }
709 
710 static int mlxsw_sp_vr_lpm_tree_unbind(struct mlxsw_sp *mlxsw_sp,
711 				       const struct mlxsw_sp_fib *fib)
712 {
713 	char raltb_pl[MLXSW_REG_RALTB_LEN];
714 
715 	/* Bind to tree 0 which is default */
716 	mlxsw_reg_raltb_pack(raltb_pl, fib->vr->id,
717 			     (enum mlxsw_reg_ralxx_protocol) fib->proto, 0);
718 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(raltb), raltb_pl);
719 }
720 
721 static u32 mlxsw_sp_fix_tb_id(u32 tb_id)
722 {
723 	/* For our purpose, squash main, default and local tables into one */
724 	if (tb_id == RT_TABLE_LOCAL || tb_id == RT_TABLE_DEFAULT)
725 		tb_id = RT_TABLE_MAIN;
726 	return tb_id;
727 }
728 
729 static struct mlxsw_sp_vr *mlxsw_sp_vr_find(struct mlxsw_sp *mlxsw_sp,
730 					    u32 tb_id)
731 {
732 	struct mlxsw_sp_vr *vr;
733 	int i;
734 
735 	tb_id = mlxsw_sp_fix_tb_id(tb_id);
736 
737 	for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_VRS); i++) {
738 		vr = &mlxsw_sp->router->vrs[i];
739 		if (mlxsw_sp_vr_is_used(vr) && vr->tb_id == tb_id)
740 			return vr;
741 	}
742 	return NULL;
743 }
744 
745 int mlxsw_sp_router_tb_id_vr_id(struct mlxsw_sp *mlxsw_sp, u32 tb_id,
746 				u16 *vr_id)
747 {
748 	struct mlxsw_sp_vr *vr;
749 
750 	vr = mlxsw_sp_vr_find(mlxsw_sp, tb_id);
751 	if (!vr)
752 		return -ESRCH;
753 	*vr_id = vr->id;
754 
755 	return 0;
756 }
757 
758 static struct mlxsw_sp_fib *mlxsw_sp_vr_fib(const struct mlxsw_sp_vr *vr,
759 					    enum mlxsw_sp_l3proto proto)
760 {
761 	switch (proto) {
762 	case MLXSW_SP_L3_PROTO_IPV4:
763 		return vr->fib4;
764 	case MLXSW_SP_L3_PROTO_IPV6:
765 		return vr->fib6;
766 	}
767 	return NULL;
768 }
769 
770 static struct mlxsw_sp_vr *mlxsw_sp_vr_create(struct mlxsw_sp *mlxsw_sp,
771 					      u32 tb_id,
772 					      struct netlink_ext_ack *extack)
773 {
774 	struct mlxsw_sp_mr_table *mr4_table, *mr6_table;
775 	struct mlxsw_sp_fib *fib4;
776 	struct mlxsw_sp_fib *fib6;
777 	struct mlxsw_sp_vr *vr;
778 	int err;
779 
780 	vr = mlxsw_sp_vr_find_unused(mlxsw_sp);
781 	if (!vr) {
782 		NL_SET_ERR_MSG_MOD(extack, "Exceeded number of supported virtual routers");
783 		return ERR_PTR(-EBUSY);
784 	}
785 	fib4 = mlxsw_sp_fib_create(mlxsw_sp, vr, MLXSW_SP_L3_PROTO_IPV4);
786 	if (IS_ERR(fib4))
787 		return ERR_CAST(fib4);
788 	fib6 = mlxsw_sp_fib_create(mlxsw_sp, vr, MLXSW_SP_L3_PROTO_IPV6);
789 	if (IS_ERR(fib6)) {
790 		err = PTR_ERR(fib6);
791 		goto err_fib6_create;
792 	}
793 	mr4_table = mlxsw_sp_mr_table_create(mlxsw_sp, vr->id,
794 					     MLXSW_SP_L3_PROTO_IPV4);
795 	if (IS_ERR(mr4_table)) {
796 		err = PTR_ERR(mr4_table);
797 		goto err_mr4_table_create;
798 	}
799 	mr6_table = mlxsw_sp_mr_table_create(mlxsw_sp, vr->id,
800 					     MLXSW_SP_L3_PROTO_IPV6);
801 	if (IS_ERR(mr6_table)) {
802 		err = PTR_ERR(mr6_table);
803 		goto err_mr6_table_create;
804 	}
805 
806 	vr->fib4 = fib4;
807 	vr->fib6 = fib6;
808 	vr->mr_table[MLXSW_SP_L3_PROTO_IPV4] = mr4_table;
809 	vr->mr_table[MLXSW_SP_L3_PROTO_IPV6] = mr6_table;
810 	vr->tb_id = tb_id;
811 	return vr;
812 
813 err_mr6_table_create:
814 	mlxsw_sp_mr_table_destroy(mr4_table);
815 err_mr4_table_create:
816 	mlxsw_sp_fib_destroy(mlxsw_sp, fib6);
817 err_fib6_create:
818 	mlxsw_sp_fib_destroy(mlxsw_sp, fib4);
819 	return ERR_PTR(err);
820 }
821 
822 static void mlxsw_sp_vr_destroy(struct mlxsw_sp *mlxsw_sp,
823 				struct mlxsw_sp_vr *vr)
824 {
825 	mlxsw_sp_mr_table_destroy(vr->mr_table[MLXSW_SP_L3_PROTO_IPV6]);
826 	vr->mr_table[MLXSW_SP_L3_PROTO_IPV6] = NULL;
827 	mlxsw_sp_mr_table_destroy(vr->mr_table[MLXSW_SP_L3_PROTO_IPV4]);
828 	vr->mr_table[MLXSW_SP_L3_PROTO_IPV4] = NULL;
829 	mlxsw_sp_fib_destroy(mlxsw_sp, vr->fib6);
830 	vr->fib6 = NULL;
831 	mlxsw_sp_fib_destroy(mlxsw_sp, vr->fib4);
832 	vr->fib4 = NULL;
833 }
834 
835 static struct mlxsw_sp_vr *mlxsw_sp_vr_get(struct mlxsw_sp *mlxsw_sp, u32 tb_id,
836 					   struct netlink_ext_ack *extack)
837 {
838 	struct mlxsw_sp_vr *vr;
839 
840 	tb_id = mlxsw_sp_fix_tb_id(tb_id);
841 	vr = mlxsw_sp_vr_find(mlxsw_sp, tb_id);
842 	if (!vr)
843 		vr = mlxsw_sp_vr_create(mlxsw_sp, tb_id, extack);
844 	return vr;
845 }
846 
847 static void mlxsw_sp_vr_put(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_vr *vr)
848 {
849 	if (!vr->rif_count && list_empty(&vr->fib4->node_list) &&
850 	    list_empty(&vr->fib6->node_list) &&
851 	    mlxsw_sp_mr_table_empty(vr->mr_table[MLXSW_SP_L3_PROTO_IPV4]) &&
852 	    mlxsw_sp_mr_table_empty(vr->mr_table[MLXSW_SP_L3_PROTO_IPV6]))
853 		mlxsw_sp_vr_destroy(mlxsw_sp, vr);
854 }
855 
856 static bool
857 mlxsw_sp_vr_lpm_tree_should_replace(struct mlxsw_sp_vr *vr,
858 				    enum mlxsw_sp_l3proto proto, u8 tree_id)
859 {
860 	struct mlxsw_sp_fib *fib = mlxsw_sp_vr_fib(vr, proto);
861 
862 	if (!mlxsw_sp_vr_is_used(vr))
863 		return false;
864 	if (fib->lpm_tree->id == tree_id)
865 		return true;
866 	return false;
867 }
868 
869 static int mlxsw_sp_vr_lpm_tree_replace(struct mlxsw_sp *mlxsw_sp,
870 					struct mlxsw_sp_fib *fib,
871 					struct mlxsw_sp_lpm_tree *new_tree)
872 {
873 	struct mlxsw_sp_lpm_tree *old_tree = fib->lpm_tree;
874 	int err;
875 
876 	fib->lpm_tree = new_tree;
877 	mlxsw_sp_lpm_tree_hold(new_tree);
878 	err = mlxsw_sp_vr_lpm_tree_bind(mlxsw_sp, fib, new_tree->id);
879 	if (err)
880 		goto err_tree_bind;
881 	mlxsw_sp_lpm_tree_put(mlxsw_sp, old_tree);
882 	return 0;
883 
884 err_tree_bind:
885 	mlxsw_sp_lpm_tree_put(mlxsw_sp, new_tree);
886 	fib->lpm_tree = old_tree;
887 	return err;
888 }
889 
890 static int mlxsw_sp_vrs_lpm_tree_replace(struct mlxsw_sp *mlxsw_sp,
891 					 struct mlxsw_sp_fib *fib,
892 					 struct mlxsw_sp_lpm_tree *new_tree)
893 {
894 	enum mlxsw_sp_l3proto proto = fib->proto;
895 	struct mlxsw_sp_lpm_tree *old_tree;
896 	u8 old_id, new_id = new_tree->id;
897 	struct mlxsw_sp_vr *vr;
898 	int i, err;
899 
900 	old_tree = mlxsw_sp->router->lpm.proto_trees[proto];
901 	old_id = old_tree->id;
902 
903 	for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_VRS); i++) {
904 		vr = &mlxsw_sp->router->vrs[i];
905 		if (!mlxsw_sp_vr_lpm_tree_should_replace(vr, proto, old_id))
906 			continue;
907 		err = mlxsw_sp_vr_lpm_tree_replace(mlxsw_sp,
908 						   mlxsw_sp_vr_fib(vr, proto),
909 						   new_tree);
910 		if (err)
911 			goto err_tree_replace;
912 	}
913 
914 	memcpy(new_tree->prefix_ref_count, old_tree->prefix_ref_count,
915 	       sizeof(new_tree->prefix_ref_count));
916 	mlxsw_sp->router->lpm.proto_trees[proto] = new_tree;
917 	mlxsw_sp_lpm_tree_put(mlxsw_sp, old_tree);
918 
919 	return 0;
920 
921 err_tree_replace:
922 	for (i--; i >= 0; i--) {
923 		if (!mlxsw_sp_vr_lpm_tree_should_replace(vr, proto, new_id))
924 			continue;
925 		mlxsw_sp_vr_lpm_tree_replace(mlxsw_sp,
926 					     mlxsw_sp_vr_fib(vr, proto),
927 					     old_tree);
928 	}
929 	return err;
930 }
931 
932 static int mlxsw_sp_vrs_init(struct mlxsw_sp *mlxsw_sp)
933 {
934 	struct mlxsw_sp_vr *vr;
935 	u64 max_vrs;
936 	int i;
937 
938 	if (!MLXSW_CORE_RES_VALID(mlxsw_sp->core, MAX_VRS))
939 		return -EIO;
940 
941 	max_vrs = MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_VRS);
942 	mlxsw_sp->router->vrs = kcalloc(max_vrs, sizeof(struct mlxsw_sp_vr),
943 					GFP_KERNEL);
944 	if (!mlxsw_sp->router->vrs)
945 		return -ENOMEM;
946 
947 	for (i = 0; i < max_vrs; i++) {
948 		vr = &mlxsw_sp->router->vrs[i];
949 		vr->id = i;
950 	}
951 
952 	return 0;
953 }
954 
955 static void mlxsw_sp_router_fib_flush(struct mlxsw_sp *mlxsw_sp);
956 
957 static void mlxsw_sp_vrs_fini(struct mlxsw_sp *mlxsw_sp)
958 {
959 	/* At this stage we're guaranteed not to have new incoming
960 	 * FIB notifications and the work queue is free from FIBs
961 	 * sitting on top of mlxsw netdevs. However, we can still
962 	 * have other FIBs queued. Flush the queue before flushing
963 	 * the device's tables. No need for locks, as we're the only
964 	 * writer.
965 	 */
966 	mlxsw_core_flush_owq();
967 	mlxsw_sp_router_fib_flush(mlxsw_sp);
968 	kfree(mlxsw_sp->router->vrs);
969 }
970 
971 static struct net_device *
972 __mlxsw_sp_ipip_netdev_ul_dev_get(const struct net_device *ol_dev)
973 {
974 	struct ip_tunnel *tun = netdev_priv(ol_dev);
975 	struct net *net = dev_net(ol_dev);
976 
977 	return __dev_get_by_index(net, tun->parms.link);
978 }
979 
980 u32 mlxsw_sp_ipip_dev_ul_tb_id(const struct net_device *ol_dev)
981 {
982 	struct net_device *d = __mlxsw_sp_ipip_netdev_ul_dev_get(ol_dev);
983 
984 	if (d)
985 		return l3mdev_fib_table(d) ? : RT_TABLE_MAIN;
986 	else
987 		return l3mdev_fib_table(ol_dev) ? : RT_TABLE_MAIN;
988 }
989 
990 static struct mlxsw_sp_rif *
991 mlxsw_sp_rif_create(struct mlxsw_sp *mlxsw_sp,
992 		    const struct mlxsw_sp_rif_params *params,
993 		    struct netlink_ext_ack *extack);
994 
995 static struct mlxsw_sp_rif_ipip_lb *
996 mlxsw_sp_ipip_ol_ipip_lb_create(struct mlxsw_sp *mlxsw_sp,
997 				enum mlxsw_sp_ipip_type ipipt,
998 				struct net_device *ol_dev,
999 				struct netlink_ext_ack *extack)
1000 {
1001 	struct mlxsw_sp_rif_params_ipip_lb lb_params;
1002 	const struct mlxsw_sp_ipip_ops *ipip_ops;
1003 	struct mlxsw_sp_rif *rif;
1004 
1005 	ipip_ops = mlxsw_sp->router->ipip_ops_arr[ipipt];
1006 	lb_params = (struct mlxsw_sp_rif_params_ipip_lb) {
1007 		.common.dev = ol_dev,
1008 		.common.lag = false,
1009 		.lb_config = ipip_ops->ol_loopback_config(mlxsw_sp, ol_dev),
1010 	};
1011 
1012 	rif = mlxsw_sp_rif_create(mlxsw_sp, &lb_params.common, extack);
1013 	if (IS_ERR(rif))
1014 		return ERR_CAST(rif);
1015 	return container_of(rif, struct mlxsw_sp_rif_ipip_lb, common);
1016 }
1017 
1018 static struct mlxsw_sp_ipip_entry *
1019 mlxsw_sp_ipip_entry_alloc(struct mlxsw_sp *mlxsw_sp,
1020 			  enum mlxsw_sp_ipip_type ipipt,
1021 			  struct net_device *ol_dev)
1022 {
1023 	const struct mlxsw_sp_ipip_ops *ipip_ops;
1024 	struct mlxsw_sp_ipip_entry *ipip_entry;
1025 	struct mlxsw_sp_ipip_entry *ret = NULL;
1026 
1027 	ipip_ops = mlxsw_sp->router->ipip_ops_arr[ipipt];
1028 	ipip_entry = kzalloc(sizeof(*ipip_entry), GFP_KERNEL);
1029 	if (!ipip_entry)
1030 		return ERR_PTR(-ENOMEM);
1031 
1032 	ipip_entry->ol_lb = mlxsw_sp_ipip_ol_ipip_lb_create(mlxsw_sp, ipipt,
1033 							    ol_dev, NULL);
1034 	if (IS_ERR(ipip_entry->ol_lb)) {
1035 		ret = ERR_CAST(ipip_entry->ol_lb);
1036 		goto err_ol_ipip_lb_create;
1037 	}
1038 
1039 	ipip_entry->ipipt = ipipt;
1040 	ipip_entry->ol_dev = ol_dev;
1041 
1042 	switch (ipip_ops->ul_proto) {
1043 	case MLXSW_SP_L3_PROTO_IPV4:
1044 		ipip_entry->parms4 = mlxsw_sp_ipip_netdev_parms4(ol_dev);
1045 		break;
1046 	case MLXSW_SP_L3_PROTO_IPV6:
1047 		WARN_ON(1);
1048 		break;
1049 	}
1050 
1051 	return ipip_entry;
1052 
1053 err_ol_ipip_lb_create:
1054 	kfree(ipip_entry);
1055 	return ret;
1056 }
1057 
1058 static void
1059 mlxsw_sp_ipip_entry_dealloc(struct mlxsw_sp_ipip_entry *ipip_entry)
1060 {
1061 	mlxsw_sp_rif_destroy(&ipip_entry->ol_lb->common);
1062 	kfree(ipip_entry);
1063 }
1064 
1065 static bool
1066 mlxsw_sp_ipip_entry_saddr_matches(struct mlxsw_sp *mlxsw_sp,
1067 				  const enum mlxsw_sp_l3proto ul_proto,
1068 				  union mlxsw_sp_l3addr saddr,
1069 				  u32 ul_tb_id,
1070 				  struct mlxsw_sp_ipip_entry *ipip_entry)
1071 {
1072 	u32 tun_ul_tb_id = mlxsw_sp_ipip_dev_ul_tb_id(ipip_entry->ol_dev);
1073 	enum mlxsw_sp_ipip_type ipipt = ipip_entry->ipipt;
1074 	union mlxsw_sp_l3addr tun_saddr;
1075 
1076 	if (mlxsw_sp->router->ipip_ops_arr[ipipt]->ul_proto != ul_proto)
1077 		return false;
1078 
1079 	tun_saddr = mlxsw_sp_ipip_netdev_saddr(ul_proto, ipip_entry->ol_dev);
1080 	return tun_ul_tb_id == ul_tb_id &&
1081 	       mlxsw_sp_l3addr_eq(&tun_saddr, &saddr);
1082 }
1083 
1084 static int
1085 mlxsw_sp_fib_entry_decap_init(struct mlxsw_sp *mlxsw_sp,
1086 			      struct mlxsw_sp_fib_entry *fib_entry,
1087 			      struct mlxsw_sp_ipip_entry *ipip_entry)
1088 {
1089 	u32 tunnel_index;
1090 	int err;
1091 
1092 	err = mlxsw_sp_kvdl_alloc(mlxsw_sp, MLXSW_SP_KVDL_ENTRY_TYPE_ADJ,
1093 				  1, &tunnel_index);
1094 	if (err)
1095 		return err;
1096 
1097 	ipip_entry->decap_fib_entry = fib_entry;
1098 	fib_entry->decap.ipip_entry = ipip_entry;
1099 	fib_entry->decap.tunnel_index = tunnel_index;
1100 	return 0;
1101 }
1102 
1103 static void mlxsw_sp_fib_entry_decap_fini(struct mlxsw_sp *mlxsw_sp,
1104 					  struct mlxsw_sp_fib_entry *fib_entry)
1105 {
1106 	/* Unlink this node from the IPIP entry that it's the decap entry of. */
1107 	fib_entry->decap.ipip_entry->decap_fib_entry = NULL;
1108 	fib_entry->decap.ipip_entry = NULL;
1109 	mlxsw_sp_kvdl_free(mlxsw_sp, MLXSW_SP_KVDL_ENTRY_TYPE_ADJ,
1110 			   1, fib_entry->decap.tunnel_index);
1111 }
1112 
1113 static struct mlxsw_sp_fib_node *
1114 mlxsw_sp_fib_node_lookup(struct mlxsw_sp_fib *fib, const void *addr,
1115 			 size_t addr_len, unsigned char prefix_len);
1116 static int mlxsw_sp_fib_entry_update(struct mlxsw_sp *mlxsw_sp,
1117 				     struct mlxsw_sp_fib_entry *fib_entry);
1118 
1119 static void
1120 mlxsw_sp_ipip_entry_demote_decap(struct mlxsw_sp *mlxsw_sp,
1121 				 struct mlxsw_sp_ipip_entry *ipip_entry)
1122 {
1123 	struct mlxsw_sp_fib_entry *fib_entry = ipip_entry->decap_fib_entry;
1124 
1125 	mlxsw_sp_fib_entry_decap_fini(mlxsw_sp, fib_entry);
1126 	fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_TRAP;
1127 
1128 	mlxsw_sp_fib_entry_update(mlxsw_sp, fib_entry);
1129 }
1130 
1131 static void
1132 mlxsw_sp_ipip_entry_promote_decap(struct mlxsw_sp *mlxsw_sp,
1133 				  struct mlxsw_sp_ipip_entry *ipip_entry,
1134 				  struct mlxsw_sp_fib_entry *decap_fib_entry)
1135 {
1136 	if (mlxsw_sp_fib_entry_decap_init(mlxsw_sp, decap_fib_entry,
1137 					  ipip_entry))
1138 		return;
1139 	decap_fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_IPIP_DECAP;
1140 
1141 	if (mlxsw_sp_fib_entry_update(mlxsw_sp, decap_fib_entry))
1142 		mlxsw_sp_ipip_entry_demote_decap(mlxsw_sp, ipip_entry);
1143 }
1144 
1145 static struct mlxsw_sp_fib_entry *
1146 mlxsw_sp_router_ip2me_fib_entry_find(struct mlxsw_sp *mlxsw_sp, u32 tb_id,
1147 				     enum mlxsw_sp_l3proto proto,
1148 				     const union mlxsw_sp_l3addr *addr,
1149 				     enum mlxsw_sp_fib_entry_type type)
1150 {
1151 	struct mlxsw_sp_fib_entry *fib_entry;
1152 	struct mlxsw_sp_fib_node *fib_node;
1153 	unsigned char addr_prefix_len;
1154 	struct mlxsw_sp_fib *fib;
1155 	struct mlxsw_sp_vr *vr;
1156 	const void *addrp;
1157 	size_t addr_len;
1158 	u32 addr4;
1159 
1160 	vr = mlxsw_sp_vr_find(mlxsw_sp, tb_id);
1161 	if (!vr)
1162 		return NULL;
1163 	fib = mlxsw_sp_vr_fib(vr, proto);
1164 
1165 	switch (proto) {
1166 	case MLXSW_SP_L3_PROTO_IPV4:
1167 		addr4 = be32_to_cpu(addr->addr4);
1168 		addrp = &addr4;
1169 		addr_len = 4;
1170 		addr_prefix_len = 32;
1171 		break;
1172 	case MLXSW_SP_L3_PROTO_IPV6: /* fall through */
1173 	default:
1174 		WARN_ON(1);
1175 		return NULL;
1176 	}
1177 
1178 	fib_node = mlxsw_sp_fib_node_lookup(fib, addrp, addr_len,
1179 					    addr_prefix_len);
1180 	if (!fib_node || list_empty(&fib_node->entry_list))
1181 		return NULL;
1182 
1183 	fib_entry = list_first_entry(&fib_node->entry_list,
1184 				     struct mlxsw_sp_fib_entry, list);
1185 	if (fib_entry->type != type)
1186 		return NULL;
1187 
1188 	return fib_entry;
1189 }
1190 
1191 /* Given an IPIP entry, find the corresponding decap route. */
1192 static struct mlxsw_sp_fib_entry *
1193 mlxsw_sp_ipip_entry_find_decap(struct mlxsw_sp *mlxsw_sp,
1194 			       struct mlxsw_sp_ipip_entry *ipip_entry)
1195 {
1196 	static struct mlxsw_sp_fib_node *fib_node;
1197 	const struct mlxsw_sp_ipip_ops *ipip_ops;
1198 	struct mlxsw_sp_fib_entry *fib_entry;
1199 	unsigned char saddr_prefix_len;
1200 	union mlxsw_sp_l3addr saddr;
1201 	struct mlxsw_sp_fib *ul_fib;
1202 	struct mlxsw_sp_vr *ul_vr;
1203 	const void *saddrp;
1204 	size_t saddr_len;
1205 	u32 ul_tb_id;
1206 	u32 saddr4;
1207 
1208 	ipip_ops = mlxsw_sp->router->ipip_ops_arr[ipip_entry->ipipt];
1209 
1210 	ul_tb_id = mlxsw_sp_ipip_dev_ul_tb_id(ipip_entry->ol_dev);
1211 	ul_vr = mlxsw_sp_vr_find(mlxsw_sp, ul_tb_id);
1212 	if (!ul_vr)
1213 		return NULL;
1214 
1215 	ul_fib = mlxsw_sp_vr_fib(ul_vr, ipip_ops->ul_proto);
1216 	saddr = mlxsw_sp_ipip_netdev_saddr(ipip_ops->ul_proto,
1217 					   ipip_entry->ol_dev);
1218 
1219 	switch (ipip_ops->ul_proto) {
1220 	case MLXSW_SP_L3_PROTO_IPV4:
1221 		saddr4 = be32_to_cpu(saddr.addr4);
1222 		saddrp = &saddr4;
1223 		saddr_len = 4;
1224 		saddr_prefix_len = 32;
1225 		break;
1226 	case MLXSW_SP_L3_PROTO_IPV6:
1227 		WARN_ON(1);
1228 		return NULL;
1229 	}
1230 
1231 	fib_node = mlxsw_sp_fib_node_lookup(ul_fib, saddrp, saddr_len,
1232 					    saddr_prefix_len);
1233 	if (!fib_node || list_empty(&fib_node->entry_list))
1234 		return NULL;
1235 
1236 	fib_entry = list_first_entry(&fib_node->entry_list,
1237 				     struct mlxsw_sp_fib_entry, list);
1238 	if (fib_entry->type != MLXSW_SP_FIB_ENTRY_TYPE_TRAP)
1239 		return NULL;
1240 
1241 	return fib_entry;
1242 }
1243 
1244 static struct mlxsw_sp_ipip_entry *
1245 mlxsw_sp_ipip_entry_create(struct mlxsw_sp *mlxsw_sp,
1246 			   enum mlxsw_sp_ipip_type ipipt,
1247 			   struct net_device *ol_dev)
1248 {
1249 	struct mlxsw_sp_ipip_entry *ipip_entry;
1250 
1251 	ipip_entry = mlxsw_sp_ipip_entry_alloc(mlxsw_sp, ipipt, ol_dev);
1252 	if (IS_ERR(ipip_entry))
1253 		return ipip_entry;
1254 
1255 	list_add_tail(&ipip_entry->ipip_list_node,
1256 		      &mlxsw_sp->router->ipip_list);
1257 
1258 	return ipip_entry;
1259 }
1260 
1261 static void
1262 mlxsw_sp_ipip_entry_destroy(struct mlxsw_sp *mlxsw_sp,
1263 			    struct mlxsw_sp_ipip_entry *ipip_entry)
1264 {
1265 	list_del(&ipip_entry->ipip_list_node);
1266 	mlxsw_sp_ipip_entry_dealloc(ipip_entry);
1267 }
1268 
1269 static bool
1270 mlxsw_sp_ipip_entry_matches_decap(struct mlxsw_sp *mlxsw_sp,
1271 				  const struct net_device *ul_dev,
1272 				  enum mlxsw_sp_l3proto ul_proto,
1273 				  union mlxsw_sp_l3addr ul_dip,
1274 				  struct mlxsw_sp_ipip_entry *ipip_entry)
1275 {
1276 	u32 ul_tb_id = l3mdev_fib_table(ul_dev) ? : RT_TABLE_MAIN;
1277 	enum mlxsw_sp_ipip_type ipipt = ipip_entry->ipipt;
1278 	struct net_device *ipip_ul_dev;
1279 
1280 	if (mlxsw_sp->router->ipip_ops_arr[ipipt]->ul_proto != ul_proto)
1281 		return false;
1282 
1283 	ipip_ul_dev = __mlxsw_sp_ipip_netdev_ul_dev_get(ipip_entry->ol_dev);
1284 	return mlxsw_sp_ipip_entry_saddr_matches(mlxsw_sp, ul_proto, ul_dip,
1285 						 ul_tb_id, ipip_entry) &&
1286 	       (!ipip_ul_dev || ipip_ul_dev == ul_dev);
1287 }
1288 
1289 /* Given decap parameters, find the corresponding IPIP entry. */
1290 static struct mlxsw_sp_ipip_entry *
1291 mlxsw_sp_ipip_entry_find_by_decap(struct mlxsw_sp *mlxsw_sp,
1292 				  const struct net_device *ul_dev,
1293 				  enum mlxsw_sp_l3proto ul_proto,
1294 				  union mlxsw_sp_l3addr ul_dip)
1295 {
1296 	struct mlxsw_sp_ipip_entry *ipip_entry;
1297 
1298 	list_for_each_entry(ipip_entry, &mlxsw_sp->router->ipip_list,
1299 			    ipip_list_node)
1300 		if (mlxsw_sp_ipip_entry_matches_decap(mlxsw_sp, ul_dev,
1301 						      ul_proto, ul_dip,
1302 						      ipip_entry))
1303 			return ipip_entry;
1304 
1305 	return NULL;
1306 }
1307 
1308 static bool mlxsw_sp_netdev_ipip_type(const struct mlxsw_sp *mlxsw_sp,
1309 				      const struct net_device *dev,
1310 				      enum mlxsw_sp_ipip_type *p_type)
1311 {
1312 	struct mlxsw_sp_router *router = mlxsw_sp->router;
1313 	const struct mlxsw_sp_ipip_ops *ipip_ops;
1314 	enum mlxsw_sp_ipip_type ipipt;
1315 
1316 	for (ipipt = 0; ipipt < MLXSW_SP_IPIP_TYPE_MAX; ++ipipt) {
1317 		ipip_ops = router->ipip_ops_arr[ipipt];
1318 		if (dev->type == ipip_ops->dev_type) {
1319 			if (p_type)
1320 				*p_type = ipipt;
1321 			return true;
1322 		}
1323 	}
1324 	return false;
1325 }
1326 
1327 bool mlxsw_sp_netdev_is_ipip_ol(const struct mlxsw_sp *mlxsw_sp,
1328 				const struct net_device *dev)
1329 {
1330 	return mlxsw_sp_netdev_ipip_type(mlxsw_sp, dev, NULL);
1331 }
1332 
1333 static struct mlxsw_sp_ipip_entry *
1334 mlxsw_sp_ipip_entry_find_by_ol_dev(struct mlxsw_sp *mlxsw_sp,
1335 				   const struct net_device *ol_dev)
1336 {
1337 	struct mlxsw_sp_ipip_entry *ipip_entry;
1338 
1339 	list_for_each_entry(ipip_entry, &mlxsw_sp->router->ipip_list,
1340 			    ipip_list_node)
1341 		if (ipip_entry->ol_dev == ol_dev)
1342 			return ipip_entry;
1343 
1344 	return NULL;
1345 }
1346 
1347 static struct mlxsw_sp_ipip_entry *
1348 mlxsw_sp_ipip_entry_find_by_ul_dev(const struct mlxsw_sp *mlxsw_sp,
1349 				   const struct net_device *ul_dev,
1350 				   struct mlxsw_sp_ipip_entry *start)
1351 {
1352 	struct mlxsw_sp_ipip_entry *ipip_entry;
1353 
1354 	ipip_entry = list_prepare_entry(start, &mlxsw_sp->router->ipip_list,
1355 					ipip_list_node);
1356 	list_for_each_entry_continue(ipip_entry, &mlxsw_sp->router->ipip_list,
1357 				     ipip_list_node) {
1358 		struct net_device *ipip_ul_dev =
1359 			__mlxsw_sp_ipip_netdev_ul_dev_get(ipip_entry->ol_dev);
1360 
1361 		if (ipip_ul_dev == ul_dev)
1362 			return ipip_entry;
1363 	}
1364 
1365 	return NULL;
1366 }
1367 
1368 bool mlxsw_sp_netdev_is_ipip_ul(const struct mlxsw_sp *mlxsw_sp,
1369 				const struct net_device *dev)
1370 {
1371 	return mlxsw_sp_ipip_entry_find_by_ul_dev(mlxsw_sp, dev, NULL);
1372 }
1373 
1374 static bool mlxsw_sp_netdevice_ipip_can_offload(struct mlxsw_sp *mlxsw_sp,
1375 						const struct net_device *ol_dev,
1376 						enum mlxsw_sp_ipip_type ipipt)
1377 {
1378 	const struct mlxsw_sp_ipip_ops *ops
1379 		= mlxsw_sp->router->ipip_ops_arr[ipipt];
1380 
1381 	/* For deciding whether decap should be offloaded, we don't care about
1382 	 * overlay protocol, so ask whether either one is supported.
1383 	 */
1384 	return ops->can_offload(mlxsw_sp, ol_dev, MLXSW_SP_L3_PROTO_IPV4) ||
1385 	       ops->can_offload(mlxsw_sp, ol_dev, MLXSW_SP_L3_PROTO_IPV6);
1386 }
1387 
1388 static int mlxsw_sp_netdevice_ipip_ol_reg_event(struct mlxsw_sp *mlxsw_sp,
1389 						struct net_device *ol_dev)
1390 {
1391 	struct mlxsw_sp_ipip_entry *ipip_entry;
1392 	enum mlxsw_sp_l3proto ul_proto;
1393 	enum mlxsw_sp_ipip_type ipipt;
1394 	union mlxsw_sp_l3addr saddr;
1395 	u32 ul_tb_id;
1396 
1397 	mlxsw_sp_netdev_ipip_type(mlxsw_sp, ol_dev, &ipipt);
1398 	if (mlxsw_sp_netdevice_ipip_can_offload(mlxsw_sp, ol_dev, ipipt)) {
1399 		ul_tb_id = mlxsw_sp_ipip_dev_ul_tb_id(ol_dev);
1400 		ul_proto = mlxsw_sp->router->ipip_ops_arr[ipipt]->ul_proto;
1401 		saddr = mlxsw_sp_ipip_netdev_saddr(ul_proto, ol_dev);
1402 		if (!mlxsw_sp_ipip_demote_tunnel_by_saddr(mlxsw_sp, ul_proto,
1403 							  saddr, ul_tb_id,
1404 							  NULL)) {
1405 			ipip_entry = mlxsw_sp_ipip_entry_create(mlxsw_sp, ipipt,
1406 								ol_dev);
1407 			if (IS_ERR(ipip_entry))
1408 				return PTR_ERR(ipip_entry);
1409 		}
1410 	}
1411 
1412 	return 0;
1413 }
1414 
1415 static void mlxsw_sp_netdevice_ipip_ol_unreg_event(struct mlxsw_sp *mlxsw_sp,
1416 						   struct net_device *ol_dev)
1417 {
1418 	struct mlxsw_sp_ipip_entry *ipip_entry;
1419 
1420 	ipip_entry = mlxsw_sp_ipip_entry_find_by_ol_dev(mlxsw_sp, ol_dev);
1421 	if (ipip_entry)
1422 		mlxsw_sp_ipip_entry_destroy(mlxsw_sp, ipip_entry);
1423 }
1424 
1425 static void
1426 mlxsw_sp_ipip_entry_ol_up_event(struct mlxsw_sp *mlxsw_sp,
1427 				struct mlxsw_sp_ipip_entry *ipip_entry)
1428 {
1429 	struct mlxsw_sp_fib_entry *decap_fib_entry;
1430 
1431 	decap_fib_entry = mlxsw_sp_ipip_entry_find_decap(mlxsw_sp, ipip_entry);
1432 	if (decap_fib_entry)
1433 		mlxsw_sp_ipip_entry_promote_decap(mlxsw_sp, ipip_entry,
1434 						  decap_fib_entry);
1435 }
1436 
1437 static int
1438 mlxsw_sp_rif_ipip_lb_op(struct mlxsw_sp_rif_ipip_lb *lb_rif,
1439 			struct mlxsw_sp_vr *ul_vr, bool enable)
1440 {
1441 	struct mlxsw_sp_rif_ipip_lb_config lb_cf = lb_rif->lb_config;
1442 	struct mlxsw_sp_rif *rif = &lb_rif->common;
1443 	struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
1444 	char ritr_pl[MLXSW_REG_RITR_LEN];
1445 	u32 saddr4;
1446 
1447 	switch (lb_cf.ul_protocol) {
1448 	case MLXSW_SP_L3_PROTO_IPV4:
1449 		saddr4 = be32_to_cpu(lb_cf.saddr.addr4);
1450 		mlxsw_reg_ritr_pack(ritr_pl, enable, MLXSW_REG_RITR_LOOPBACK_IF,
1451 				    rif->rif_index, rif->vr_id, rif->dev->mtu);
1452 		mlxsw_reg_ritr_loopback_ipip4_pack(ritr_pl, lb_cf.lb_ipipt,
1453 			    MLXSW_REG_RITR_LOOPBACK_IPIP_OPTIONS_GRE_KEY_PRESET,
1454 			    ul_vr->id, saddr4, lb_cf.okey);
1455 		break;
1456 
1457 	case MLXSW_SP_L3_PROTO_IPV6:
1458 		return -EAFNOSUPPORT;
1459 	}
1460 
1461 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
1462 }
1463 
1464 static int mlxsw_sp_netdevice_ipip_ol_update_mtu(struct mlxsw_sp *mlxsw_sp,
1465 						 struct net_device *ol_dev)
1466 {
1467 	struct mlxsw_sp_ipip_entry *ipip_entry;
1468 	struct mlxsw_sp_rif_ipip_lb *lb_rif;
1469 	struct mlxsw_sp_vr *ul_vr;
1470 	int err = 0;
1471 
1472 	ipip_entry = mlxsw_sp_ipip_entry_find_by_ol_dev(mlxsw_sp, ol_dev);
1473 	if (ipip_entry) {
1474 		lb_rif = ipip_entry->ol_lb;
1475 		ul_vr = &mlxsw_sp->router->vrs[lb_rif->ul_vr_id];
1476 		err = mlxsw_sp_rif_ipip_lb_op(lb_rif, ul_vr, true);
1477 		if (err)
1478 			goto out;
1479 		lb_rif->common.mtu = ol_dev->mtu;
1480 	}
1481 
1482 out:
1483 	return err;
1484 }
1485 
1486 static void mlxsw_sp_netdevice_ipip_ol_up_event(struct mlxsw_sp *mlxsw_sp,
1487 						struct net_device *ol_dev)
1488 {
1489 	struct mlxsw_sp_ipip_entry *ipip_entry;
1490 
1491 	ipip_entry = mlxsw_sp_ipip_entry_find_by_ol_dev(mlxsw_sp, ol_dev);
1492 	if (ipip_entry)
1493 		mlxsw_sp_ipip_entry_ol_up_event(mlxsw_sp, ipip_entry);
1494 }
1495 
1496 static void
1497 mlxsw_sp_ipip_entry_ol_down_event(struct mlxsw_sp *mlxsw_sp,
1498 				  struct mlxsw_sp_ipip_entry *ipip_entry)
1499 {
1500 	if (ipip_entry->decap_fib_entry)
1501 		mlxsw_sp_ipip_entry_demote_decap(mlxsw_sp, ipip_entry);
1502 }
1503 
1504 static void mlxsw_sp_netdevice_ipip_ol_down_event(struct mlxsw_sp *mlxsw_sp,
1505 						  struct net_device *ol_dev)
1506 {
1507 	struct mlxsw_sp_ipip_entry *ipip_entry;
1508 
1509 	ipip_entry = mlxsw_sp_ipip_entry_find_by_ol_dev(mlxsw_sp, ol_dev);
1510 	if (ipip_entry)
1511 		mlxsw_sp_ipip_entry_ol_down_event(mlxsw_sp, ipip_entry);
1512 }
1513 
1514 static void mlxsw_sp_nexthop_rif_migrate(struct mlxsw_sp *mlxsw_sp,
1515 					 struct mlxsw_sp_rif *old_rif,
1516 					 struct mlxsw_sp_rif *new_rif);
1517 static int
1518 mlxsw_sp_ipip_entry_ol_lb_update(struct mlxsw_sp *mlxsw_sp,
1519 				 struct mlxsw_sp_ipip_entry *ipip_entry,
1520 				 bool keep_encap,
1521 				 struct netlink_ext_ack *extack)
1522 {
1523 	struct mlxsw_sp_rif_ipip_lb *old_lb_rif = ipip_entry->ol_lb;
1524 	struct mlxsw_sp_rif_ipip_lb *new_lb_rif;
1525 
1526 	new_lb_rif = mlxsw_sp_ipip_ol_ipip_lb_create(mlxsw_sp,
1527 						     ipip_entry->ipipt,
1528 						     ipip_entry->ol_dev,
1529 						     extack);
1530 	if (IS_ERR(new_lb_rif))
1531 		return PTR_ERR(new_lb_rif);
1532 	ipip_entry->ol_lb = new_lb_rif;
1533 
1534 	if (keep_encap)
1535 		mlxsw_sp_nexthop_rif_migrate(mlxsw_sp, &old_lb_rif->common,
1536 					     &new_lb_rif->common);
1537 
1538 	mlxsw_sp_rif_destroy(&old_lb_rif->common);
1539 
1540 	return 0;
1541 }
1542 
1543 static void mlxsw_sp_nexthop_rif_update(struct mlxsw_sp *mlxsw_sp,
1544 					struct mlxsw_sp_rif *rif);
1545 
1546 /**
1547  * Update the offload related to an IPIP entry. This always updates decap, and
1548  * in addition to that it also:
1549  * @recreate_loopback: recreates the associated loopback RIF
1550  * @keep_encap: updates next hops that use the tunnel netdevice. This is only
1551  *              relevant when recreate_loopback is true.
1552  * @update_nexthops: updates next hops, keeping the current loopback RIF. This
1553  *                   is only relevant when recreate_loopback is false.
1554  */
1555 int __mlxsw_sp_ipip_entry_update_tunnel(struct mlxsw_sp *mlxsw_sp,
1556 					struct mlxsw_sp_ipip_entry *ipip_entry,
1557 					bool recreate_loopback,
1558 					bool keep_encap,
1559 					bool update_nexthops,
1560 					struct netlink_ext_ack *extack)
1561 {
1562 	int err;
1563 
1564 	/* RIFs can't be edited, so to update loopback, we need to destroy and
1565 	 * recreate it. That creates a window of opportunity where RALUE and
1566 	 * RATR registers end up referencing a RIF that's already gone. RATRs
1567 	 * are handled in mlxsw_sp_ipip_entry_ol_lb_update(), and to take care
1568 	 * of RALUE, demote the decap route back.
1569 	 */
1570 	if (ipip_entry->decap_fib_entry)
1571 		mlxsw_sp_ipip_entry_demote_decap(mlxsw_sp, ipip_entry);
1572 
1573 	if (recreate_loopback) {
1574 		err = mlxsw_sp_ipip_entry_ol_lb_update(mlxsw_sp, ipip_entry,
1575 						       keep_encap, extack);
1576 		if (err)
1577 			return err;
1578 	} else if (update_nexthops) {
1579 		mlxsw_sp_nexthop_rif_update(mlxsw_sp,
1580 					    &ipip_entry->ol_lb->common);
1581 	}
1582 
1583 	if (ipip_entry->ol_dev->flags & IFF_UP)
1584 		mlxsw_sp_ipip_entry_ol_up_event(mlxsw_sp, ipip_entry);
1585 
1586 	return 0;
1587 }
1588 
1589 static int mlxsw_sp_netdevice_ipip_ol_vrf_event(struct mlxsw_sp *mlxsw_sp,
1590 						struct net_device *ol_dev,
1591 						struct netlink_ext_ack *extack)
1592 {
1593 	struct mlxsw_sp_ipip_entry *ipip_entry =
1594 		mlxsw_sp_ipip_entry_find_by_ol_dev(mlxsw_sp, ol_dev);
1595 	enum mlxsw_sp_l3proto ul_proto;
1596 	union mlxsw_sp_l3addr saddr;
1597 	u32 ul_tb_id;
1598 
1599 	if (!ipip_entry)
1600 		return 0;
1601 
1602 	/* For flat configuration cases, moving overlay to a different VRF might
1603 	 * cause local address conflict, and the conflicting tunnels need to be
1604 	 * demoted.
1605 	 */
1606 	ul_tb_id = mlxsw_sp_ipip_dev_ul_tb_id(ol_dev);
1607 	ul_proto = mlxsw_sp->router->ipip_ops_arr[ipip_entry->ipipt]->ul_proto;
1608 	saddr = mlxsw_sp_ipip_netdev_saddr(ul_proto, ol_dev);
1609 	if (mlxsw_sp_ipip_demote_tunnel_by_saddr(mlxsw_sp, ul_proto,
1610 						 saddr, ul_tb_id,
1611 						 ipip_entry)) {
1612 		mlxsw_sp_ipip_entry_demote_tunnel(mlxsw_sp, ipip_entry);
1613 		return 0;
1614 	}
1615 
1616 	return __mlxsw_sp_ipip_entry_update_tunnel(mlxsw_sp, ipip_entry,
1617 						   true, false, false, extack);
1618 }
1619 
1620 static int
1621 mlxsw_sp_netdevice_ipip_ul_vrf_event(struct mlxsw_sp *mlxsw_sp,
1622 				     struct mlxsw_sp_ipip_entry *ipip_entry,
1623 				     struct net_device *ul_dev,
1624 				     struct netlink_ext_ack *extack)
1625 {
1626 	return __mlxsw_sp_ipip_entry_update_tunnel(mlxsw_sp, ipip_entry,
1627 						   true, true, false, extack);
1628 }
1629 
1630 static int
1631 mlxsw_sp_netdevice_ipip_ul_up_event(struct mlxsw_sp *mlxsw_sp,
1632 				    struct mlxsw_sp_ipip_entry *ipip_entry,
1633 				    struct net_device *ul_dev)
1634 {
1635 	return __mlxsw_sp_ipip_entry_update_tunnel(mlxsw_sp, ipip_entry,
1636 						   false, false, true, NULL);
1637 }
1638 
1639 static int
1640 mlxsw_sp_netdevice_ipip_ul_down_event(struct mlxsw_sp *mlxsw_sp,
1641 				      struct mlxsw_sp_ipip_entry *ipip_entry,
1642 				      struct net_device *ul_dev)
1643 {
1644 	/* A down underlay device causes encapsulated packets to not be
1645 	 * forwarded, but decap still works. So refresh next hops without
1646 	 * touching anything else.
1647 	 */
1648 	return __mlxsw_sp_ipip_entry_update_tunnel(mlxsw_sp, ipip_entry,
1649 						   false, false, true, NULL);
1650 }
1651 
1652 static int
1653 mlxsw_sp_netdevice_ipip_ol_change_event(struct mlxsw_sp *mlxsw_sp,
1654 					struct net_device *ol_dev,
1655 					struct netlink_ext_ack *extack)
1656 {
1657 	const struct mlxsw_sp_ipip_ops *ipip_ops;
1658 	struct mlxsw_sp_ipip_entry *ipip_entry;
1659 	int err;
1660 
1661 	ipip_entry = mlxsw_sp_ipip_entry_find_by_ol_dev(mlxsw_sp, ol_dev);
1662 	if (!ipip_entry)
1663 		/* A change might make a tunnel eligible for offloading, but
1664 		 * that is currently not implemented. What falls to slow path
1665 		 * stays there.
1666 		 */
1667 		return 0;
1668 
1669 	/* A change might make a tunnel not eligible for offloading. */
1670 	if (!mlxsw_sp_netdevice_ipip_can_offload(mlxsw_sp, ol_dev,
1671 						 ipip_entry->ipipt)) {
1672 		mlxsw_sp_ipip_entry_demote_tunnel(mlxsw_sp, ipip_entry);
1673 		return 0;
1674 	}
1675 
1676 	ipip_ops = mlxsw_sp->router->ipip_ops_arr[ipip_entry->ipipt];
1677 	err = ipip_ops->ol_netdev_change(mlxsw_sp, ipip_entry, extack);
1678 	return err;
1679 }
1680 
1681 void mlxsw_sp_ipip_entry_demote_tunnel(struct mlxsw_sp *mlxsw_sp,
1682 				       struct mlxsw_sp_ipip_entry *ipip_entry)
1683 {
1684 	struct net_device *ol_dev = ipip_entry->ol_dev;
1685 
1686 	if (ol_dev->flags & IFF_UP)
1687 		mlxsw_sp_ipip_entry_ol_down_event(mlxsw_sp, ipip_entry);
1688 	mlxsw_sp_ipip_entry_destroy(mlxsw_sp, ipip_entry);
1689 }
1690 
1691 /* The configuration where several tunnels have the same local address in the
1692  * same underlay table needs special treatment in the HW. That is currently not
1693  * implemented in the driver. This function finds and demotes the first tunnel
1694  * with a given source address, except the one passed in in the argument
1695  * `except'.
1696  */
1697 bool
1698 mlxsw_sp_ipip_demote_tunnel_by_saddr(struct mlxsw_sp *mlxsw_sp,
1699 				     enum mlxsw_sp_l3proto ul_proto,
1700 				     union mlxsw_sp_l3addr saddr,
1701 				     u32 ul_tb_id,
1702 				     const struct mlxsw_sp_ipip_entry *except)
1703 {
1704 	struct mlxsw_sp_ipip_entry *ipip_entry, *tmp;
1705 
1706 	list_for_each_entry_safe(ipip_entry, tmp, &mlxsw_sp->router->ipip_list,
1707 				 ipip_list_node) {
1708 		if (ipip_entry != except &&
1709 		    mlxsw_sp_ipip_entry_saddr_matches(mlxsw_sp, ul_proto, saddr,
1710 						      ul_tb_id, ipip_entry)) {
1711 			mlxsw_sp_ipip_entry_demote_tunnel(mlxsw_sp, ipip_entry);
1712 			return true;
1713 		}
1714 	}
1715 
1716 	return false;
1717 }
1718 
1719 static void mlxsw_sp_ipip_demote_tunnel_by_ul_netdev(struct mlxsw_sp *mlxsw_sp,
1720 						     struct net_device *ul_dev)
1721 {
1722 	struct mlxsw_sp_ipip_entry *ipip_entry, *tmp;
1723 
1724 	list_for_each_entry_safe(ipip_entry, tmp, &mlxsw_sp->router->ipip_list,
1725 				 ipip_list_node) {
1726 		struct net_device *ipip_ul_dev =
1727 			__mlxsw_sp_ipip_netdev_ul_dev_get(ipip_entry->ol_dev);
1728 
1729 		if (ipip_ul_dev == ul_dev)
1730 			mlxsw_sp_ipip_entry_demote_tunnel(mlxsw_sp, ipip_entry);
1731 	}
1732 }
1733 
1734 int mlxsw_sp_netdevice_ipip_ol_event(struct mlxsw_sp *mlxsw_sp,
1735 				     struct net_device *ol_dev,
1736 				     unsigned long event,
1737 				     struct netdev_notifier_info *info)
1738 {
1739 	struct netdev_notifier_changeupper_info *chup;
1740 	struct netlink_ext_ack *extack;
1741 
1742 	switch (event) {
1743 	case NETDEV_REGISTER:
1744 		return mlxsw_sp_netdevice_ipip_ol_reg_event(mlxsw_sp, ol_dev);
1745 	case NETDEV_UNREGISTER:
1746 		mlxsw_sp_netdevice_ipip_ol_unreg_event(mlxsw_sp, ol_dev);
1747 		return 0;
1748 	case NETDEV_UP:
1749 		mlxsw_sp_netdevice_ipip_ol_up_event(mlxsw_sp, ol_dev);
1750 		return 0;
1751 	case NETDEV_DOWN:
1752 		mlxsw_sp_netdevice_ipip_ol_down_event(mlxsw_sp, ol_dev);
1753 		return 0;
1754 	case NETDEV_CHANGEUPPER:
1755 		chup = container_of(info, typeof(*chup), info);
1756 		extack = info->extack;
1757 		if (netif_is_l3_master(chup->upper_dev))
1758 			return mlxsw_sp_netdevice_ipip_ol_vrf_event(mlxsw_sp,
1759 								    ol_dev,
1760 								    extack);
1761 		return 0;
1762 	case NETDEV_CHANGE:
1763 		extack = info->extack;
1764 		return mlxsw_sp_netdevice_ipip_ol_change_event(mlxsw_sp,
1765 							       ol_dev, extack);
1766 	case NETDEV_CHANGEMTU:
1767 		return mlxsw_sp_netdevice_ipip_ol_update_mtu(mlxsw_sp, ol_dev);
1768 	}
1769 	return 0;
1770 }
1771 
1772 static int
1773 __mlxsw_sp_netdevice_ipip_ul_event(struct mlxsw_sp *mlxsw_sp,
1774 				   struct mlxsw_sp_ipip_entry *ipip_entry,
1775 				   struct net_device *ul_dev,
1776 				   unsigned long event,
1777 				   struct netdev_notifier_info *info)
1778 {
1779 	struct netdev_notifier_changeupper_info *chup;
1780 	struct netlink_ext_ack *extack;
1781 
1782 	switch (event) {
1783 	case NETDEV_CHANGEUPPER:
1784 		chup = container_of(info, typeof(*chup), info);
1785 		extack = info->extack;
1786 		if (netif_is_l3_master(chup->upper_dev))
1787 			return mlxsw_sp_netdevice_ipip_ul_vrf_event(mlxsw_sp,
1788 								    ipip_entry,
1789 								    ul_dev,
1790 								    extack);
1791 		break;
1792 
1793 	case NETDEV_UP:
1794 		return mlxsw_sp_netdevice_ipip_ul_up_event(mlxsw_sp, ipip_entry,
1795 							   ul_dev);
1796 	case NETDEV_DOWN:
1797 		return mlxsw_sp_netdevice_ipip_ul_down_event(mlxsw_sp,
1798 							     ipip_entry,
1799 							     ul_dev);
1800 	}
1801 	return 0;
1802 }
1803 
1804 int
1805 mlxsw_sp_netdevice_ipip_ul_event(struct mlxsw_sp *mlxsw_sp,
1806 				 struct net_device *ul_dev,
1807 				 unsigned long event,
1808 				 struct netdev_notifier_info *info)
1809 {
1810 	struct mlxsw_sp_ipip_entry *ipip_entry = NULL;
1811 	int err;
1812 
1813 	while ((ipip_entry = mlxsw_sp_ipip_entry_find_by_ul_dev(mlxsw_sp,
1814 								ul_dev,
1815 								ipip_entry))) {
1816 		err = __mlxsw_sp_netdevice_ipip_ul_event(mlxsw_sp, ipip_entry,
1817 							 ul_dev, event, info);
1818 		if (err) {
1819 			mlxsw_sp_ipip_demote_tunnel_by_ul_netdev(mlxsw_sp,
1820 								 ul_dev);
1821 			return err;
1822 		}
1823 	}
1824 
1825 	return 0;
1826 }
1827 
1828 int mlxsw_sp_router_nve_promote_decap(struct mlxsw_sp *mlxsw_sp, u32 ul_tb_id,
1829 				      enum mlxsw_sp_l3proto ul_proto,
1830 				      const union mlxsw_sp_l3addr *ul_sip,
1831 				      u32 tunnel_index)
1832 {
1833 	enum mlxsw_sp_fib_entry_type type = MLXSW_SP_FIB_ENTRY_TYPE_TRAP;
1834 	struct mlxsw_sp_fib_entry *fib_entry;
1835 	int err;
1836 
1837 	/* It is valid to create a tunnel with a local IP and only later
1838 	 * assign this IP address to a local interface
1839 	 */
1840 	fib_entry = mlxsw_sp_router_ip2me_fib_entry_find(mlxsw_sp, ul_tb_id,
1841 							 ul_proto, ul_sip,
1842 							 type);
1843 	if (!fib_entry)
1844 		return 0;
1845 
1846 	fib_entry->decap.tunnel_index = tunnel_index;
1847 	fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_NVE_DECAP;
1848 
1849 	err = mlxsw_sp_fib_entry_update(mlxsw_sp, fib_entry);
1850 	if (err)
1851 		goto err_fib_entry_update;
1852 
1853 	return 0;
1854 
1855 err_fib_entry_update:
1856 	fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_TRAP;
1857 	mlxsw_sp_fib_entry_update(mlxsw_sp, fib_entry);
1858 	return err;
1859 }
1860 
1861 void mlxsw_sp_router_nve_demote_decap(struct mlxsw_sp *mlxsw_sp, u32 ul_tb_id,
1862 				      enum mlxsw_sp_l3proto ul_proto,
1863 				      const union mlxsw_sp_l3addr *ul_sip)
1864 {
1865 	enum mlxsw_sp_fib_entry_type type = MLXSW_SP_FIB_ENTRY_TYPE_NVE_DECAP;
1866 	struct mlxsw_sp_fib_entry *fib_entry;
1867 
1868 	fib_entry = mlxsw_sp_router_ip2me_fib_entry_find(mlxsw_sp, ul_tb_id,
1869 							 ul_proto, ul_sip,
1870 							 type);
1871 	if (!fib_entry)
1872 		return;
1873 
1874 	fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_TRAP;
1875 	mlxsw_sp_fib_entry_update(mlxsw_sp, fib_entry);
1876 }
1877 
1878 struct mlxsw_sp_neigh_key {
1879 	struct neighbour *n;
1880 };
1881 
1882 struct mlxsw_sp_neigh_entry {
1883 	struct list_head rif_list_node;
1884 	struct rhash_head ht_node;
1885 	struct mlxsw_sp_neigh_key key;
1886 	u16 rif;
1887 	bool connected;
1888 	unsigned char ha[ETH_ALEN];
1889 	struct list_head nexthop_list; /* list of nexthops using
1890 					* this neigh entry
1891 					*/
1892 	struct list_head nexthop_neighs_list_node;
1893 	unsigned int counter_index;
1894 	bool counter_valid;
1895 };
1896 
1897 static const struct rhashtable_params mlxsw_sp_neigh_ht_params = {
1898 	.key_offset = offsetof(struct mlxsw_sp_neigh_entry, key),
1899 	.head_offset = offsetof(struct mlxsw_sp_neigh_entry, ht_node),
1900 	.key_len = sizeof(struct mlxsw_sp_neigh_key),
1901 };
1902 
1903 struct mlxsw_sp_neigh_entry *
1904 mlxsw_sp_rif_neigh_next(struct mlxsw_sp_rif *rif,
1905 			struct mlxsw_sp_neigh_entry *neigh_entry)
1906 {
1907 	if (!neigh_entry) {
1908 		if (list_empty(&rif->neigh_list))
1909 			return NULL;
1910 		else
1911 			return list_first_entry(&rif->neigh_list,
1912 						typeof(*neigh_entry),
1913 						rif_list_node);
1914 	}
1915 	if (list_is_last(&neigh_entry->rif_list_node, &rif->neigh_list))
1916 		return NULL;
1917 	return list_next_entry(neigh_entry, rif_list_node);
1918 }
1919 
1920 int mlxsw_sp_neigh_entry_type(struct mlxsw_sp_neigh_entry *neigh_entry)
1921 {
1922 	return neigh_entry->key.n->tbl->family;
1923 }
1924 
1925 unsigned char *
1926 mlxsw_sp_neigh_entry_ha(struct mlxsw_sp_neigh_entry *neigh_entry)
1927 {
1928 	return neigh_entry->ha;
1929 }
1930 
1931 u32 mlxsw_sp_neigh4_entry_dip(struct mlxsw_sp_neigh_entry *neigh_entry)
1932 {
1933 	struct neighbour *n;
1934 
1935 	n = neigh_entry->key.n;
1936 	return ntohl(*((__be32 *) n->primary_key));
1937 }
1938 
1939 struct in6_addr *
1940 mlxsw_sp_neigh6_entry_dip(struct mlxsw_sp_neigh_entry *neigh_entry)
1941 {
1942 	struct neighbour *n;
1943 
1944 	n = neigh_entry->key.n;
1945 	return (struct in6_addr *) &n->primary_key;
1946 }
1947 
1948 int mlxsw_sp_neigh_counter_get(struct mlxsw_sp *mlxsw_sp,
1949 			       struct mlxsw_sp_neigh_entry *neigh_entry,
1950 			       u64 *p_counter)
1951 {
1952 	if (!neigh_entry->counter_valid)
1953 		return -EINVAL;
1954 
1955 	return mlxsw_sp_flow_counter_get(mlxsw_sp, neigh_entry->counter_index,
1956 					 p_counter, NULL);
1957 }
1958 
1959 static struct mlxsw_sp_neigh_entry *
1960 mlxsw_sp_neigh_entry_alloc(struct mlxsw_sp *mlxsw_sp, struct neighbour *n,
1961 			   u16 rif)
1962 {
1963 	struct mlxsw_sp_neigh_entry *neigh_entry;
1964 
1965 	neigh_entry = kzalloc(sizeof(*neigh_entry), GFP_KERNEL);
1966 	if (!neigh_entry)
1967 		return NULL;
1968 
1969 	neigh_entry->key.n = n;
1970 	neigh_entry->rif = rif;
1971 	INIT_LIST_HEAD(&neigh_entry->nexthop_list);
1972 
1973 	return neigh_entry;
1974 }
1975 
1976 static void mlxsw_sp_neigh_entry_free(struct mlxsw_sp_neigh_entry *neigh_entry)
1977 {
1978 	kfree(neigh_entry);
1979 }
1980 
1981 static int
1982 mlxsw_sp_neigh_entry_insert(struct mlxsw_sp *mlxsw_sp,
1983 			    struct mlxsw_sp_neigh_entry *neigh_entry)
1984 {
1985 	return rhashtable_insert_fast(&mlxsw_sp->router->neigh_ht,
1986 				      &neigh_entry->ht_node,
1987 				      mlxsw_sp_neigh_ht_params);
1988 }
1989 
1990 static void
1991 mlxsw_sp_neigh_entry_remove(struct mlxsw_sp *mlxsw_sp,
1992 			    struct mlxsw_sp_neigh_entry *neigh_entry)
1993 {
1994 	rhashtable_remove_fast(&mlxsw_sp->router->neigh_ht,
1995 			       &neigh_entry->ht_node,
1996 			       mlxsw_sp_neigh_ht_params);
1997 }
1998 
1999 static bool
2000 mlxsw_sp_neigh_counter_should_alloc(struct mlxsw_sp *mlxsw_sp,
2001 				    struct mlxsw_sp_neigh_entry *neigh_entry)
2002 {
2003 	struct devlink *devlink;
2004 	const char *table_name;
2005 
2006 	switch (mlxsw_sp_neigh_entry_type(neigh_entry)) {
2007 	case AF_INET:
2008 		table_name = MLXSW_SP_DPIPE_TABLE_NAME_HOST4;
2009 		break;
2010 	case AF_INET6:
2011 		table_name = MLXSW_SP_DPIPE_TABLE_NAME_HOST6;
2012 		break;
2013 	default:
2014 		WARN_ON(1);
2015 		return false;
2016 	}
2017 
2018 	devlink = priv_to_devlink(mlxsw_sp->core);
2019 	return devlink_dpipe_table_counter_enabled(devlink, table_name);
2020 }
2021 
2022 static void
2023 mlxsw_sp_neigh_counter_alloc(struct mlxsw_sp *mlxsw_sp,
2024 			     struct mlxsw_sp_neigh_entry *neigh_entry)
2025 {
2026 	if (!mlxsw_sp_neigh_counter_should_alloc(mlxsw_sp, neigh_entry))
2027 		return;
2028 
2029 	if (mlxsw_sp_flow_counter_alloc(mlxsw_sp, &neigh_entry->counter_index))
2030 		return;
2031 
2032 	neigh_entry->counter_valid = true;
2033 }
2034 
2035 static void
2036 mlxsw_sp_neigh_counter_free(struct mlxsw_sp *mlxsw_sp,
2037 			    struct mlxsw_sp_neigh_entry *neigh_entry)
2038 {
2039 	if (!neigh_entry->counter_valid)
2040 		return;
2041 	mlxsw_sp_flow_counter_free(mlxsw_sp,
2042 				   neigh_entry->counter_index);
2043 	neigh_entry->counter_valid = false;
2044 }
2045 
2046 static struct mlxsw_sp_neigh_entry *
2047 mlxsw_sp_neigh_entry_create(struct mlxsw_sp *mlxsw_sp, struct neighbour *n)
2048 {
2049 	struct mlxsw_sp_neigh_entry *neigh_entry;
2050 	struct mlxsw_sp_rif *rif;
2051 	int err;
2052 
2053 	rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, n->dev);
2054 	if (!rif)
2055 		return ERR_PTR(-EINVAL);
2056 
2057 	neigh_entry = mlxsw_sp_neigh_entry_alloc(mlxsw_sp, n, rif->rif_index);
2058 	if (!neigh_entry)
2059 		return ERR_PTR(-ENOMEM);
2060 
2061 	err = mlxsw_sp_neigh_entry_insert(mlxsw_sp, neigh_entry);
2062 	if (err)
2063 		goto err_neigh_entry_insert;
2064 
2065 	mlxsw_sp_neigh_counter_alloc(mlxsw_sp, neigh_entry);
2066 	list_add(&neigh_entry->rif_list_node, &rif->neigh_list);
2067 
2068 	return neigh_entry;
2069 
2070 err_neigh_entry_insert:
2071 	mlxsw_sp_neigh_entry_free(neigh_entry);
2072 	return ERR_PTR(err);
2073 }
2074 
2075 static void
2076 mlxsw_sp_neigh_entry_destroy(struct mlxsw_sp *mlxsw_sp,
2077 			     struct mlxsw_sp_neigh_entry *neigh_entry)
2078 {
2079 	list_del(&neigh_entry->rif_list_node);
2080 	mlxsw_sp_neigh_counter_free(mlxsw_sp, neigh_entry);
2081 	mlxsw_sp_neigh_entry_remove(mlxsw_sp, neigh_entry);
2082 	mlxsw_sp_neigh_entry_free(neigh_entry);
2083 }
2084 
2085 static struct mlxsw_sp_neigh_entry *
2086 mlxsw_sp_neigh_entry_lookup(struct mlxsw_sp *mlxsw_sp, struct neighbour *n)
2087 {
2088 	struct mlxsw_sp_neigh_key key;
2089 
2090 	key.n = n;
2091 	return rhashtable_lookup_fast(&mlxsw_sp->router->neigh_ht,
2092 				      &key, mlxsw_sp_neigh_ht_params);
2093 }
2094 
2095 static void
2096 mlxsw_sp_router_neighs_update_interval_init(struct mlxsw_sp *mlxsw_sp)
2097 {
2098 	unsigned long interval;
2099 
2100 #if IS_ENABLED(CONFIG_IPV6)
2101 	interval = min_t(unsigned long,
2102 			 NEIGH_VAR(&arp_tbl.parms, DELAY_PROBE_TIME),
2103 			 NEIGH_VAR(&nd_tbl.parms, DELAY_PROBE_TIME));
2104 #else
2105 	interval = NEIGH_VAR(&arp_tbl.parms, DELAY_PROBE_TIME);
2106 #endif
2107 	mlxsw_sp->router->neighs_update.interval = jiffies_to_msecs(interval);
2108 }
2109 
2110 static void mlxsw_sp_router_neigh_ent_ipv4_process(struct mlxsw_sp *mlxsw_sp,
2111 						   char *rauhtd_pl,
2112 						   int ent_index)
2113 {
2114 	struct net_device *dev;
2115 	struct neighbour *n;
2116 	__be32 dipn;
2117 	u32 dip;
2118 	u16 rif;
2119 
2120 	mlxsw_reg_rauhtd_ent_ipv4_unpack(rauhtd_pl, ent_index, &rif, &dip);
2121 
2122 	if (!mlxsw_sp->router->rifs[rif]) {
2123 		dev_err_ratelimited(mlxsw_sp->bus_info->dev, "Incorrect RIF in neighbour entry\n");
2124 		return;
2125 	}
2126 
2127 	dipn = htonl(dip);
2128 	dev = mlxsw_sp->router->rifs[rif]->dev;
2129 	n = neigh_lookup(&arp_tbl, &dipn, dev);
2130 	if (!n)
2131 		return;
2132 
2133 	netdev_dbg(dev, "Updating neighbour with IP=%pI4h\n", &dip);
2134 	neigh_event_send(n, NULL);
2135 	neigh_release(n);
2136 }
2137 
2138 #if IS_ENABLED(CONFIG_IPV6)
2139 static void mlxsw_sp_router_neigh_ent_ipv6_process(struct mlxsw_sp *mlxsw_sp,
2140 						   char *rauhtd_pl,
2141 						   int rec_index)
2142 {
2143 	struct net_device *dev;
2144 	struct neighbour *n;
2145 	struct in6_addr dip;
2146 	u16 rif;
2147 
2148 	mlxsw_reg_rauhtd_ent_ipv6_unpack(rauhtd_pl, rec_index, &rif,
2149 					 (char *) &dip);
2150 
2151 	if (!mlxsw_sp->router->rifs[rif]) {
2152 		dev_err_ratelimited(mlxsw_sp->bus_info->dev, "Incorrect RIF in neighbour entry\n");
2153 		return;
2154 	}
2155 
2156 	dev = mlxsw_sp->router->rifs[rif]->dev;
2157 	n = neigh_lookup(&nd_tbl, &dip, dev);
2158 	if (!n)
2159 		return;
2160 
2161 	netdev_dbg(dev, "Updating neighbour with IP=%pI6c\n", &dip);
2162 	neigh_event_send(n, NULL);
2163 	neigh_release(n);
2164 }
2165 #else
2166 static void mlxsw_sp_router_neigh_ent_ipv6_process(struct mlxsw_sp *mlxsw_sp,
2167 						   char *rauhtd_pl,
2168 						   int rec_index)
2169 {
2170 }
2171 #endif
2172 
2173 static void mlxsw_sp_router_neigh_rec_ipv4_process(struct mlxsw_sp *mlxsw_sp,
2174 						   char *rauhtd_pl,
2175 						   int rec_index)
2176 {
2177 	u8 num_entries;
2178 	int i;
2179 
2180 	num_entries = mlxsw_reg_rauhtd_ipv4_rec_num_entries_get(rauhtd_pl,
2181 								rec_index);
2182 	/* Hardware starts counting at 0, so add 1. */
2183 	num_entries++;
2184 
2185 	/* Each record consists of several neighbour entries. */
2186 	for (i = 0; i < num_entries; i++) {
2187 		int ent_index;
2188 
2189 		ent_index = rec_index * MLXSW_REG_RAUHTD_IPV4_ENT_PER_REC + i;
2190 		mlxsw_sp_router_neigh_ent_ipv4_process(mlxsw_sp, rauhtd_pl,
2191 						       ent_index);
2192 	}
2193 
2194 }
2195 
2196 static void mlxsw_sp_router_neigh_rec_ipv6_process(struct mlxsw_sp *mlxsw_sp,
2197 						   char *rauhtd_pl,
2198 						   int rec_index)
2199 {
2200 	/* One record contains one entry. */
2201 	mlxsw_sp_router_neigh_ent_ipv6_process(mlxsw_sp, rauhtd_pl,
2202 					       rec_index);
2203 }
2204 
2205 static void mlxsw_sp_router_neigh_rec_process(struct mlxsw_sp *mlxsw_sp,
2206 					      char *rauhtd_pl, int rec_index)
2207 {
2208 	switch (mlxsw_reg_rauhtd_rec_type_get(rauhtd_pl, rec_index)) {
2209 	case MLXSW_REG_RAUHTD_TYPE_IPV4:
2210 		mlxsw_sp_router_neigh_rec_ipv4_process(mlxsw_sp, rauhtd_pl,
2211 						       rec_index);
2212 		break;
2213 	case MLXSW_REG_RAUHTD_TYPE_IPV6:
2214 		mlxsw_sp_router_neigh_rec_ipv6_process(mlxsw_sp, rauhtd_pl,
2215 						       rec_index);
2216 		break;
2217 	}
2218 }
2219 
2220 static bool mlxsw_sp_router_rauhtd_is_full(char *rauhtd_pl)
2221 {
2222 	u8 num_rec, last_rec_index, num_entries;
2223 
2224 	num_rec = mlxsw_reg_rauhtd_num_rec_get(rauhtd_pl);
2225 	last_rec_index = num_rec - 1;
2226 
2227 	if (num_rec < MLXSW_REG_RAUHTD_REC_MAX_NUM)
2228 		return false;
2229 	if (mlxsw_reg_rauhtd_rec_type_get(rauhtd_pl, last_rec_index) ==
2230 	    MLXSW_REG_RAUHTD_TYPE_IPV6)
2231 		return true;
2232 
2233 	num_entries = mlxsw_reg_rauhtd_ipv4_rec_num_entries_get(rauhtd_pl,
2234 								last_rec_index);
2235 	if (++num_entries == MLXSW_REG_RAUHTD_IPV4_ENT_PER_REC)
2236 		return true;
2237 	return false;
2238 }
2239 
2240 static int
2241 __mlxsw_sp_router_neighs_update_rauhtd(struct mlxsw_sp *mlxsw_sp,
2242 				       char *rauhtd_pl,
2243 				       enum mlxsw_reg_rauhtd_type type)
2244 {
2245 	int i, num_rec;
2246 	int err;
2247 
2248 	/* Make sure the neighbour's netdev isn't removed in the
2249 	 * process.
2250 	 */
2251 	rtnl_lock();
2252 	do {
2253 		mlxsw_reg_rauhtd_pack(rauhtd_pl, type);
2254 		err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(rauhtd),
2255 				      rauhtd_pl);
2256 		if (err) {
2257 			dev_err_ratelimited(mlxsw_sp->bus_info->dev, "Failed to dump neighbour table\n");
2258 			break;
2259 		}
2260 		num_rec = mlxsw_reg_rauhtd_num_rec_get(rauhtd_pl);
2261 		for (i = 0; i < num_rec; i++)
2262 			mlxsw_sp_router_neigh_rec_process(mlxsw_sp, rauhtd_pl,
2263 							  i);
2264 	} while (mlxsw_sp_router_rauhtd_is_full(rauhtd_pl));
2265 	rtnl_unlock();
2266 
2267 	return err;
2268 }
2269 
2270 static int mlxsw_sp_router_neighs_update_rauhtd(struct mlxsw_sp *mlxsw_sp)
2271 {
2272 	enum mlxsw_reg_rauhtd_type type;
2273 	char *rauhtd_pl;
2274 	int err;
2275 
2276 	rauhtd_pl = kmalloc(MLXSW_REG_RAUHTD_LEN, GFP_KERNEL);
2277 	if (!rauhtd_pl)
2278 		return -ENOMEM;
2279 
2280 	type = MLXSW_REG_RAUHTD_TYPE_IPV4;
2281 	err = __mlxsw_sp_router_neighs_update_rauhtd(mlxsw_sp, rauhtd_pl, type);
2282 	if (err)
2283 		goto out;
2284 
2285 	type = MLXSW_REG_RAUHTD_TYPE_IPV6;
2286 	err = __mlxsw_sp_router_neighs_update_rauhtd(mlxsw_sp, rauhtd_pl, type);
2287 out:
2288 	kfree(rauhtd_pl);
2289 	return err;
2290 }
2291 
2292 static void mlxsw_sp_router_neighs_update_nh(struct mlxsw_sp *mlxsw_sp)
2293 {
2294 	struct mlxsw_sp_neigh_entry *neigh_entry;
2295 
2296 	/* Take RTNL mutex here to prevent lists from changes */
2297 	rtnl_lock();
2298 	list_for_each_entry(neigh_entry, &mlxsw_sp->router->nexthop_neighs_list,
2299 			    nexthop_neighs_list_node)
2300 		/* If this neigh have nexthops, make the kernel think this neigh
2301 		 * is active regardless of the traffic.
2302 		 */
2303 		neigh_event_send(neigh_entry->key.n, NULL);
2304 	rtnl_unlock();
2305 }
2306 
2307 static void
2308 mlxsw_sp_router_neighs_update_work_schedule(struct mlxsw_sp *mlxsw_sp)
2309 {
2310 	unsigned long interval = mlxsw_sp->router->neighs_update.interval;
2311 
2312 	mlxsw_core_schedule_dw(&mlxsw_sp->router->neighs_update.dw,
2313 			       msecs_to_jiffies(interval));
2314 }
2315 
2316 static void mlxsw_sp_router_neighs_update_work(struct work_struct *work)
2317 {
2318 	struct mlxsw_sp_router *router;
2319 	int err;
2320 
2321 	router = container_of(work, struct mlxsw_sp_router,
2322 			      neighs_update.dw.work);
2323 	err = mlxsw_sp_router_neighs_update_rauhtd(router->mlxsw_sp);
2324 	if (err)
2325 		dev_err(router->mlxsw_sp->bus_info->dev, "Could not update kernel for neigh activity");
2326 
2327 	mlxsw_sp_router_neighs_update_nh(router->mlxsw_sp);
2328 
2329 	mlxsw_sp_router_neighs_update_work_schedule(router->mlxsw_sp);
2330 }
2331 
2332 static void mlxsw_sp_router_probe_unresolved_nexthops(struct work_struct *work)
2333 {
2334 	struct mlxsw_sp_neigh_entry *neigh_entry;
2335 	struct mlxsw_sp_router *router;
2336 
2337 	router = container_of(work, struct mlxsw_sp_router,
2338 			      nexthop_probe_dw.work);
2339 	/* Iterate over nexthop neighbours, find those who are unresolved and
2340 	 * send arp on them. This solves the chicken-egg problem when
2341 	 * the nexthop wouldn't get offloaded until the neighbor is resolved
2342 	 * but it wouldn't get resolved ever in case traffic is flowing in HW
2343 	 * using different nexthop.
2344 	 *
2345 	 * Take RTNL mutex here to prevent lists from changes.
2346 	 */
2347 	rtnl_lock();
2348 	list_for_each_entry(neigh_entry, &router->nexthop_neighs_list,
2349 			    nexthop_neighs_list_node)
2350 		if (!neigh_entry->connected)
2351 			neigh_event_send(neigh_entry->key.n, NULL);
2352 	rtnl_unlock();
2353 
2354 	mlxsw_core_schedule_dw(&router->nexthop_probe_dw,
2355 			       MLXSW_SP_UNRESOLVED_NH_PROBE_INTERVAL);
2356 }
2357 
2358 static void
2359 mlxsw_sp_nexthop_neigh_update(struct mlxsw_sp *mlxsw_sp,
2360 			      struct mlxsw_sp_neigh_entry *neigh_entry,
2361 			      bool removing);
2362 
2363 static enum mlxsw_reg_rauht_op mlxsw_sp_rauht_op(bool adding)
2364 {
2365 	return adding ? MLXSW_REG_RAUHT_OP_WRITE_ADD :
2366 			MLXSW_REG_RAUHT_OP_WRITE_DELETE;
2367 }
2368 
2369 static void
2370 mlxsw_sp_router_neigh_entry_op4(struct mlxsw_sp *mlxsw_sp,
2371 				struct mlxsw_sp_neigh_entry *neigh_entry,
2372 				enum mlxsw_reg_rauht_op op)
2373 {
2374 	struct neighbour *n = neigh_entry->key.n;
2375 	u32 dip = ntohl(*((__be32 *) n->primary_key));
2376 	char rauht_pl[MLXSW_REG_RAUHT_LEN];
2377 
2378 	mlxsw_reg_rauht_pack4(rauht_pl, op, neigh_entry->rif, neigh_entry->ha,
2379 			      dip);
2380 	if (neigh_entry->counter_valid)
2381 		mlxsw_reg_rauht_pack_counter(rauht_pl,
2382 					     neigh_entry->counter_index);
2383 	mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rauht), rauht_pl);
2384 }
2385 
2386 static void
2387 mlxsw_sp_router_neigh_entry_op6(struct mlxsw_sp *mlxsw_sp,
2388 				struct mlxsw_sp_neigh_entry *neigh_entry,
2389 				enum mlxsw_reg_rauht_op op)
2390 {
2391 	struct neighbour *n = neigh_entry->key.n;
2392 	char rauht_pl[MLXSW_REG_RAUHT_LEN];
2393 	const char *dip = n->primary_key;
2394 
2395 	mlxsw_reg_rauht_pack6(rauht_pl, op, neigh_entry->rif, neigh_entry->ha,
2396 			      dip);
2397 	if (neigh_entry->counter_valid)
2398 		mlxsw_reg_rauht_pack_counter(rauht_pl,
2399 					     neigh_entry->counter_index);
2400 	mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rauht), rauht_pl);
2401 }
2402 
2403 bool mlxsw_sp_neigh_ipv6_ignore(struct mlxsw_sp_neigh_entry *neigh_entry)
2404 {
2405 	struct neighbour *n = neigh_entry->key.n;
2406 
2407 	/* Packets with a link-local destination address are trapped
2408 	 * after LPM lookup and never reach the neighbour table, so
2409 	 * there is no need to program such neighbours to the device.
2410 	 */
2411 	if (ipv6_addr_type((struct in6_addr *) &n->primary_key) &
2412 	    IPV6_ADDR_LINKLOCAL)
2413 		return true;
2414 	return false;
2415 }
2416 
2417 static void
2418 mlxsw_sp_neigh_entry_update(struct mlxsw_sp *mlxsw_sp,
2419 			    struct mlxsw_sp_neigh_entry *neigh_entry,
2420 			    bool adding)
2421 {
2422 	if (!adding && !neigh_entry->connected)
2423 		return;
2424 	neigh_entry->connected = adding;
2425 	if (neigh_entry->key.n->tbl->family == AF_INET) {
2426 		mlxsw_sp_router_neigh_entry_op4(mlxsw_sp, neigh_entry,
2427 						mlxsw_sp_rauht_op(adding));
2428 	} else if (neigh_entry->key.n->tbl->family == AF_INET6) {
2429 		if (mlxsw_sp_neigh_ipv6_ignore(neigh_entry))
2430 			return;
2431 		mlxsw_sp_router_neigh_entry_op6(mlxsw_sp, neigh_entry,
2432 						mlxsw_sp_rauht_op(adding));
2433 	} else {
2434 		WARN_ON_ONCE(1);
2435 	}
2436 }
2437 
2438 void
2439 mlxsw_sp_neigh_entry_counter_update(struct mlxsw_sp *mlxsw_sp,
2440 				    struct mlxsw_sp_neigh_entry *neigh_entry,
2441 				    bool adding)
2442 {
2443 	if (adding)
2444 		mlxsw_sp_neigh_counter_alloc(mlxsw_sp, neigh_entry);
2445 	else
2446 		mlxsw_sp_neigh_counter_free(mlxsw_sp, neigh_entry);
2447 	mlxsw_sp_neigh_entry_update(mlxsw_sp, neigh_entry, true);
2448 }
2449 
2450 struct mlxsw_sp_netevent_work {
2451 	struct work_struct work;
2452 	struct mlxsw_sp *mlxsw_sp;
2453 	struct neighbour *n;
2454 };
2455 
2456 static void mlxsw_sp_router_neigh_event_work(struct work_struct *work)
2457 {
2458 	struct mlxsw_sp_netevent_work *net_work =
2459 		container_of(work, struct mlxsw_sp_netevent_work, work);
2460 	struct mlxsw_sp *mlxsw_sp = net_work->mlxsw_sp;
2461 	struct mlxsw_sp_neigh_entry *neigh_entry;
2462 	struct neighbour *n = net_work->n;
2463 	unsigned char ha[ETH_ALEN];
2464 	bool entry_connected;
2465 	u8 nud_state, dead;
2466 
2467 	/* If these parameters are changed after we release the lock,
2468 	 * then we are guaranteed to receive another event letting us
2469 	 * know about it.
2470 	 */
2471 	read_lock_bh(&n->lock);
2472 	memcpy(ha, n->ha, ETH_ALEN);
2473 	nud_state = n->nud_state;
2474 	dead = n->dead;
2475 	read_unlock_bh(&n->lock);
2476 
2477 	rtnl_lock();
2478 	mlxsw_sp_span_respin(mlxsw_sp);
2479 
2480 	entry_connected = nud_state & NUD_VALID && !dead;
2481 	neigh_entry = mlxsw_sp_neigh_entry_lookup(mlxsw_sp, n);
2482 	if (!entry_connected && !neigh_entry)
2483 		goto out;
2484 	if (!neigh_entry) {
2485 		neigh_entry = mlxsw_sp_neigh_entry_create(mlxsw_sp, n);
2486 		if (IS_ERR(neigh_entry))
2487 			goto out;
2488 	}
2489 
2490 	memcpy(neigh_entry->ha, ha, ETH_ALEN);
2491 	mlxsw_sp_neigh_entry_update(mlxsw_sp, neigh_entry, entry_connected);
2492 	mlxsw_sp_nexthop_neigh_update(mlxsw_sp, neigh_entry, !entry_connected);
2493 
2494 	if (!neigh_entry->connected && list_empty(&neigh_entry->nexthop_list))
2495 		mlxsw_sp_neigh_entry_destroy(mlxsw_sp, neigh_entry);
2496 
2497 out:
2498 	rtnl_unlock();
2499 	neigh_release(n);
2500 	kfree(net_work);
2501 }
2502 
2503 static int mlxsw_sp_mp_hash_init(struct mlxsw_sp *mlxsw_sp);
2504 
2505 static void mlxsw_sp_router_mp_hash_event_work(struct work_struct *work)
2506 {
2507 	struct mlxsw_sp_netevent_work *net_work =
2508 		container_of(work, struct mlxsw_sp_netevent_work, work);
2509 	struct mlxsw_sp *mlxsw_sp = net_work->mlxsw_sp;
2510 
2511 	mlxsw_sp_mp_hash_init(mlxsw_sp);
2512 	kfree(net_work);
2513 }
2514 
2515 static int __mlxsw_sp_router_init(struct mlxsw_sp *mlxsw_sp);
2516 
2517 static void mlxsw_sp_router_update_priority_work(struct work_struct *work)
2518 {
2519 	struct mlxsw_sp_netevent_work *net_work =
2520 		container_of(work, struct mlxsw_sp_netevent_work, work);
2521 	struct mlxsw_sp *mlxsw_sp = net_work->mlxsw_sp;
2522 
2523 	__mlxsw_sp_router_init(mlxsw_sp);
2524 	kfree(net_work);
2525 }
2526 
2527 static int mlxsw_sp_router_schedule_work(struct net *net,
2528 					 struct notifier_block *nb,
2529 					 void (*cb)(struct work_struct *))
2530 {
2531 	struct mlxsw_sp_netevent_work *net_work;
2532 	struct mlxsw_sp_router *router;
2533 
2534 	if (!net_eq(net, &init_net))
2535 		return NOTIFY_DONE;
2536 
2537 	net_work = kzalloc(sizeof(*net_work), GFP_ATOMIC);
2538 	if (!net_work)
2539 		return NOTIFY_BAD;
2540 
2541 	router = container_of(nb, struct mlxsw_sp_router, netevent_nb);
2542 	INIT_WORK(&net_work->work, cb);
2543 	net_work->mlxsw_sp = router->mlxsw_sp;
2544 	mlxsw_core_schedule_work(&net_work->work);
2545 	return NOTIFY_DONE;
2546 }
2547 
2548 static int mlxsw_sp_router_netevent_event(struct notifier_block *nb,
2549 					  unsigned long event, void *ptr)
2550 {
2551 	struct mlxsw_sp_netevent_work *net_work;
2552 	struct mlxsw_sp_port *mlxsw_sp_port;
2553 	struct mlxsw_sp *mlxsw_sp;
2554 	unsigned long interval;
2555 	struct neigh_parms *p;
2556 	struct neighbour *n;
2557 
2558 	switch (event) {
2559 	case NETEVENT_DELAY_PROBE_TIME_UPDATE:
2560 		p = ptr;
2561 
2562 		/* We don't care about changes in the default table. */
2563 		if (!p->dev || (p->tbl->family != AF_INET &&
2564 				p->tbl->family != AF_INET6))
2565 			return NOTIFY_DONE;
2566 
2567 		/* We are in atomic context and can't take RTNL mutex,
2568 		 * so use RCU variant to walk the device chain.
2569 		 */
2570 		mlxsw_sp_port = mlxsw_sp_port_lower_dev_hold(p->dev);
2571 		if (!mlxsw_sp_port)
2572 			return NOTIFY_DONE;
2573 
2574 		mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
2575 		interval = jiffies_to_msecs(NEIGH_VAR(p, DELAY_PROBE_TIME));
2576 		mlxsw_sp->router->neighs_update.interval = interval;
2577 
2578 		mlxsw_sp_port_dev_put(mlxsw_sp_port);
2579 		break;
2580 	case NETEVENT_NEIGH_UPDATE:
2581 		n = ptr;
2582 
2583 		if (n->tbl->family != AF_INET && n->tbl->family != AF_INET6)
2584 			return NOTIFY_DONE;
2585 
2586 		mlxsw_sp_port = mlxsw_sp_port_lower_dev_hold(n->dev);
2587 		if (!mlxsw_sp_port)
2588 			return NOTIFY_DONE;
2589 
2590 		net_work = kzalloc(sizeof(*net_work), GFP_ATOMIC);
2591 		if (!net_work) {
2592 			mlxsw_sp_port_dev_put(mlxsw_sp_port);
2593 			return NOTIFY_BAD;
2594 		}
2595 
2596 		INIT_WORK(&net_work->work, mlxsw_sp_router_neigh_event_work);
2597 		net_work->mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
2598 		net_work->n = n;
2599 
2600 		/* Take a reference to ensure the neighbour won't be
2601 		 * destructed until we drop the reference in delayed
2602 		 * work.
2603 		 */
2604 		neigh_clone(n);
2605 		mlxsw_core_schedule_work(&net_work->work);
2606 		mlxsw_sp_port_dev_put(mlxsw_sp_port);
2607 		break;
2608 	case NETEVENT_IPV4_MPATH_HASH_UPDATE:
2609 	case NETEVENT_IPV6_MPATH_HASH_UPDATE:
2610 		return mlxsw_sp_router_schedule_work(ptr, nb,
2611 				mlxsw_sp_router_mp_hash_event_work);
2612 
2613 	case NETEVENT_IPV4_FWD_UPDATE_PRIORITY_UPDATE:
2614 		return mlxsw_sp_router_schedule_work(ptr, nb,
2615 				mlxsw_sp_router_update_priority_work);
2616 	}
2617 
2618 	return NOTIFY_DONE;
2619 }
2620 
2621 static int mlxsw_sp_neigh_init(struct mlxsw_sp *mlxsw_sp)
2622 {
2623 	int err;
2624 
2625 	err = rhashtable_init(&mlxsw_sp->router->neigh_ht,
2626 			      &mlxsw_sp_neigh_ht_params);
2627 	if (err)
2628 		return err;
2629 
2630 	/* Initialize the polling interval according to the default
2631 	 * table.
2632 	 */
2633 	mlxsw_sp_router_neighs_update_interval_init(mlxsw_sp);
2634 
2635 	/* Create the delayed works for the activity_update */
2636 	INIT_DELAYED_WORK(&mlxsw_sp->router->neighs_update.dw,
2637 			  mlxsw_sp_router_neighs_update_work);
2638 	INIT_DELAYED_WORK(&mlxsw_sp->router->nexthop_probe_dw,
2639 			  mlxsw_sp_router_probe_unresolved_nexthops);
2640 	mlxsw_core_schedule_dw(&mlxsw_sp->router->neighs_update.dw, 0);
2641 	mlxsw_core_schedule_dw(&mlxsw_sp->router->nexthop_probe_dw, 0);
2642 	return 0;
2643 }
2644 
2645 static void mlxsw_sp_neigh_fini(struct mlxsw_sp *mlxsw_sp)
2646 {
2647 	cancel_delayed_work_sync(&mlxsw_sp->router->neighs_update.dw);
2648 	cancel_delayed_work_sync(&mlxsw_sp->router->nexthop_probe_dw);
2649 	rhashtable_destroy(&mlxsw_sp->router->neigh_ht);
2650 }
2651 
2652 static void mlxsw_sp_neigh_rif_gone_sync(struct mlxsw_sp *mlxsw_sp,
2653 					 struct mlxsw_sp_rif *rif)
2654 {
2655 	struct mlxsw_sp_neigh_entry *neigh_entry, *tmp;
2656 
2657 	list_for_each_entry_safe(neigh_entry, tmp, &rif->neigh_list,
2658 				 rif_list_node) {
2659 		mlxsw_sp_neigh_entry_update(mlxsw_sp, neigh_entry, false);
2660 		mlxsw_sp_neigh_entry_destroy(mlxsw_sp, neigh_entry);
2661 	}
2662 }
2663 
2664 enum mlxsw_sp_nexthop_type {
2665 	MLXSW_SP_NEXTHOP_TYPE_ETH,
2666 	MLXSW_SP_NEXTHOP_TYPE_IPIP,
2667 };
2668 
2669 struct mlxsw_sp_nexthop_key {
2670 	struct fib_nh *fib_nh;
2671 };
2672 
2673 struct mlxsw_sp_nexthop {
2674 	struct list_head neigh_list_node; /* member of neigh entry list */
2675 	struct list_head rif_list_node;
2676 	struct list_head router_list_node;
2677 	struct mlxsw_sp_nexthop_group *nh_grp; /* pointer back to the group
2678 						* this belongs to
2679 						*/
2680 	struct rhash_head ht_node;
2681 	struct mlxsw_sp_nexthop_key key;
2682 	unsigned char gw_addr[sizeof(struct in6_addr)];
2683 	int ifindex;
2684 	int nh_weight;
2685 	int norm_nh_weight;
2686 	int num_adj_entries;
2687 	struct mlxsw_sp_rif *rif;
2688 	u8 should_offload:1, /* set indicates this neigh is connected and
2689 			      * should be put to KVD linear area of this group.
2690 			      */
2691 	   offloaded:1, /* set in case the neigh is actually put into
2692 			 * KVD linear area of this group.
2693 			 */
2694 	   update:1; /* set indicates that MAC of this neigh should be
2695 		      * updated in HW
2696 		      */
2697 	enum mlxsw_sp_nexthop_type type;
2698 	union {
2699 		struct mlxsw_sp_neigh_entry *neigh_entry;
2700 		struct mlxsw_sp_ipip_entry *ipip_entry;
2701 	};
2702 	unsigned int counter_index;
2703 	bool counter_valid;
2704 };
2705 
2706 struct mlxsw_sp_nexthop_group {
2707 	void *priv;
2708 	struct rhash_head ht_node;
2709 	struct list_head fib_list; /* list of fib entries that use this group */
2710 	struct neigh_table *neigh_tbl;
2711 	u8 adj_index_valid:1,
2712 	   gateway:1; /* routes using the group use a gateway */
2713 	u32 adj_index;
2714 	u16 ecmp_size;
2715 	u16 count;
2716 	int sum_norm_weight;
2717 	struct mlxsw_sp_nexthop nexthops[0];
2718 #define nh_rif	nexthops[0].rif
2719 };
2720 
2721 void mlxsw_sp_nexthop_counter_alloc(struct mlxsw_sp *mlxsw_sp,
2722 				    struct mlxsw_sp_nexthop *nh)
2723 {
2724 	struct devlink *devlink;
2725 
2726 	devlink = priv_to_devlink(mlxsw_sp->core);
2727 	if (!devlink_dpipe_table_counter_enabled(devlink,
2728 						 MLXSW_SP_DPIPE_TABLE_NAME_ADJ))
2729 		return;
2730 
2731 	if (mlxsw_sp_flow_counter_alloc(mlxsw_sp, &nh->counter_index))
2732 		return;
2733 
2734 	nh->counter_valid = true;
2735 }
2736 
2737 void mlxsw_sp_nexthop_counter_free(struct mlxsw_sp *mlxsw_sp,
2738 				   struct mlxsw_sp_nexthop *nh)
2739 {
2740 	if (!nh->counter_valid)
2741 		return;
2742 	mlxsw_sp_flow_counter_free(mlxsw_sp, nh->counter_index);
2743 	nh->counter_valid = false;
2744 }
2745 
2746 int mlxsw_sp_nexthop_counter_get(struct mlxsw_sp *mlxsw_sp,
2747 				 struct mlxsw_sp_nexthop *nh, u64 *p_counter)
2748 {
2749 	if (!nh->counter_valid)
2750 		return -EINVAL;
2751 
2752 	return mlxsw_sp_flow_counter_get(mlxsw_sp, nh->counter_index,
2753 					 p_counter, NULL);
2754 }
2755 
2756 struct mlxsw_sp_nexthop *mlxsw_sp_nexthop_next(struct mlxsw_sp_router *router,
2757 					       struct mlxsw_sp_nexthop *nh)
2758 {
2759 	if (!nh) {
2760 		if (list_empty(&router->nexthop_list))
2761 			return NULL;
2762 		else
2763 			return list_first_entry(&router->nexthop_list,
2764 						typeof(*nh), router_list_node);
2765 	}
2766 	if (list_is_last(&nh->router_list_node, &router->nexthop_list))
2767 		return NULL;
2768 	return list_next_entry(nh, router_list_node);
2769 }
2770 
2771 bool mlxsw_sp_nexthop_offload(struct mlxsw_sp_nexthop *nh)
2772 {
2773 	return nh->offloaded;
2774 }
2775 
2776 unsigned char *mlxsw_sp_nexthop_ha(struct mlxsw_sp_nexthop *nh)
2777 {
2778 	if (!nh->offloaded)
2779 		return NULL;
2780 	return nh->neigh_entry->ha;
2781 }
2782 
2783 int mlxsw_sp_nexthop_indexes(struct mlxsw_sp_nexthop *nh, u32 *p_adj_index,
2784 			     u32 *p_adj_size, u32 *p_adj_hash_index)
2785 {
2786 	struct mlxsw_sp_nexthop_group *nh_grp = nh->nh_grp;
2787 	u32 adj_hash_index = 0;
2788 	int i;
2789 
2790 	if (!nh->offloaded || !nh_grp->adj_index_valid)
2791 		return -EINVAL;
2792 
2793 	*p_adj_index = nh_grp->adj_index;
2794 	*p_adj_size = nh_grp->ecmp_size;
2795 
2796 	for (i = 0; i < nh_grp->count; i++) {
2797 		struct mlxsw_sp_nexthop *nh_iter = &nh_grp->nexthops[i];
2798 
2799 		if (nh_iter == nh)
2800 			break;
2801 		if (nh_iter->offloaded)
2802 			adj_hash_index += nh_iter->num_adj_entries;
2803 	}
2804 
2805 	*p_adj_hash_index = adj_hash_index;
2806 	return 0;
2807 }
2808 
2809 struct mlxsw_sp_rif *mlxsw_sp_nexthop_rif(struct mlxsw_sp_nexthop *nh)
2810 {
2811 	return nh->rif;
2812 }
2813 
2814 bool mlxsw_sp_nexthop_group_has_ipip(struct mlxsw_sp_nexthop *nh)
2815 {
2816 	struct mlxsw_sp_nexthop_group *nh_grp = nh->nh_grp;
2817 	int i;
2818 
2819 	for (i = 0; i < nh_grp->count; i++) {
2820 		struct mlxsw_sp_nexthop *nh_iter = &nh_grp->nexthops[i];
2821 
2822 		if (nh_iter->type == MLXSW_SP_NEXTHOP_TYPE_IPIP)
2823 			return true;
2824 	}
2825 	return false;
2826 }
2827 
2828 static struct fib_info *
2829 mlxsw_sp_nexthop4_group_fi(const struct mlxsw_sp_nexthop_group *nh_grp)
2830 {
2831 	return nh_grp->priv;
2832 }
2833 
2834 struct mlxsw_sp_nexthop_group_cmp_arg {
2835 	enum mlxsw_sp_l3proto proto;
2836 	union {
2837 		struct fib_info *fi;
2838 		struct mlxsw_sp_fib6_entry *fib6_entry;
2839 	};
2840 };
2841 
2842 static bool
2843 mlxsw_sp_nexthop6_group_has_nexthop(const struct mlxsw_sp_nexthop_group *nh_grp,
2844 				    const struct in6_addr *gw, int ifindex,
2845 				    int weight)
2846 {
2847 	int i;
2848 
2849 	for (i = 0; i < nh_grp->count; i++) {
2850 		const struct mlxsw_sp_nexthop *nh;
2851 
2852 		nh = &nh_grp->nexthops[i];
2853 		if (nh->ifindex == ifindex && nh->nh_weight == weight &&
2854 		    ipv6_addr_equal(gw, (struct in6_addr *) nh->gw_addr))
2855 			return true;
2856 	}
2857 
2858 	return false;
2859 }
2860 
2861 static bool
2862 mlxsw_sp_nexthop6_group_cmp(const struct mlxsw_sp_nexthop_group *nh_grp,
2863 			    const struct mlxsw_sp_fib6_entry *fib6_entry)
2864 {
2865 	struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
2866 
2867 	if (nh_grp->count != fib6_entry->nrt6)
2868 		return false;
2869 
2870 	list_for_each_entry(mlxsw_sp_rt6, &fib6_entry->rt6_list, list) {
2871 		struct in6_addr *gw;
2872 		int ifindex, weight;
2873 
2874 		ifindex = mlxsw_sp_rt6->rt->fib6_nh.nh_dev->ifindex;
2875 		weight = mlxsw_sp_rt6->rt->fib6_nh.nh_weight;
2876 		gw = &mlxsw_sp_rt6->rt->fib6_nh.nh_gw;
2877 		if (!mlxsw_sp_nexthop6_group_has_nexthop(nh_grp, gw, ifindex,
2878 							 weight))
2879 			return false;
2880 	}
2881 
2882 	return true;
2883 }
2884 
2885 static int
2886 mlxsw_sp_nexthop_group_cmp(struct rhashtable_compare_arg *arg, const void *ptr)
2887 {
2888 	const struct mlxsw_sp_nexthop_group_cmp_arg *cmp_arg = arg->key;
2889 	const struct mlxsw_sp_nexthop_group *nh_grp = ptr;
2890 
2891 	switch (cmp_arg->proto) {
2892 	case MLXSW_SP_L3_PROTO_IPV4:
2893 		return cmp_arg->fi != mlxsw_sp_nexthop4_group_fi(nh_grp);
2894 	case MLXSW_SP_L3_PROTO_IPV6:
2895 		return !mlxsw_sp_nexthop6_group_cmp(nh_grp,
2896 						    cmp_arg->fib6_entry);
2897 	default:
2898 		WARN_ON(1);
2899 		return 1;
2900 	}
2901 }
2902 
2903 static int
2904 mlxsw_sp_nexthop_group_type(const struct mlxsw_sp_nexthop_group *nh_grp)
2905 {
2906 	return nh_grp->neigh_tbl->family;
2907 }
2908 
2909 static u32 mlxsw_sp_nexthop_group_hash_obj(const void *data, u32 len, u32 seed)
2910 {
2911 	const struct mlxsw_sp_nexthop_group *nh_grp = data;
2912 	const struct mlxsw_sp_nexthop *nh;
2913 	struct fib_info *fi;
2914 	unsigned int val;
2915 	int i;
2916 
2917 	switch (mlxsw_sp_nexthop_group_type(nh_grp)) {
2918 	case AF_INET:
2919 		fi = mlxsw_sp_nexthop4_group_fi(nh_grp);
2920 		return jhash(&fi, sizeof(fi), seed);
2921 	case AF_INET6:
2922 		val = nh_grp->count;
2923 		for (i = 0; i < nh_grp->count; i++) {
2924 			nh = &nh_grp->nexthops[i];
2925 			val ^= nh->ifindex;
2926 		}
2927 		return jhash(&val, sizeof(val), seed);
2928 	default:
2929 		WARN_ON(1);
2930 		return 0;
2931 	}
2932 }
2933 
2934 static u32
2935 mlxsw_sp_nexthop6_group_hash(struct mlxsw_sp_fib6_entry *fib6_entry, u32 seed)
2936 {
2937 	unsigned int val = fib6_entry->nrt6;
2938 	struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
2939 	struct net_device *dev;
2940 
2941 	list_for_each_entry(mlxsw_sp_rt6, &fib6_entry->rt6_list, list) {
2942 		dev = mlxsw_sp_rt6->rt->fib6_nh.nh_dev;
2943 		val ^= dev->ifindex;
2944 	}
2945 
2946 	return jhash(&val, sizeof(val), seed);
2947 }
2948 
2949 static u32
2950 mlxsw_sp_nexthop_group_hash(const void *data, u32 len, u32 seed)
2951 {
2952 	const struct mlxsw_sp_nexthop_group_cmp_arg *cmp_arg = data;
2953 
2954 	switch (cmp_arg->proto) {
2955 	case MLXSW_SP_L3_PROTO_IPV4:
2956 		return jhash(&cmp_arg->fi, sizeof(cmp_arg->fi), seed);
2957 	case MLXSW_SP_L3_PROTO_IPV6:
2958 		return mlxsw_sp_nexthop6_group_hash(cmp_arg->fib6_entry, seed);
2959 	default:
2960 		WARN_ON(1);
2961 		return 0;
2962 	}
2963 }
2964 
2965 static const struct rhashtable_params mlxsw_sp_nexthop_group_ht_params = {
2966 	.head_offset = offsetof(struct mlxsw_sp_nexthop_group, ht_node),
2967 	.hashfn	     = mlxsw_sp_nexthop_group_hash,
2968 	.obj_hashfn  = mlxsw_sp_nexthop_group_hash_obj,
2969 	.obj_cmpfn   = mlxsw_sp_nexthop_group_cmp,
2970 };
2971 
2972 static int mlxsw_sp_nexthop_group_insert(struct mlxsw_sp *mlxsw_sp,
2973 					 struct mlxsw_sp_nexthop_group *nh_grp)
2974 {
2975 	if (mlxsw_sp_nexthop_group_type(nh_grp) == AF_INET6 &&
2976 	    !nh_grp->gateway)
2977 		return 0;
2978 
2979 	return rhashtable_insert_fast(&mlxsw_sp->router->nexthop_group_ht,
2980 				      &nh_grp->ht_node,
2981 				      mlxsw_sp_nexthop_group_ht_params);
2982 }
2983 
2984 static void mlxsw_sp_nexthop_group_remove(struct mlxsw_sp *mlxsw_sp,
2985 					  struct mlxsw_sp_nexthop_group *nh_grp)
2986 {
2987 	if (mlxsw_sp_nexthop_group_type(nh_grp) == AF_INET6 &&
2988 	    !nh_grp->gateway)
2989 		return;
2990 
2991 	rhashtable_remove_fast(&mlxsw_sp->router->nexthop_group_ht,
2992 			       &nh_grp->ht_node,
2993 			       mlxsw_sp_nexthop_group_ht_params);
2994 }
2995 
2996 static struct mlxsw_sp_nexthop_group *
2997 mlxsw_sp_nexthop4_group_lookup(struct mlxsw_sp *mlxsw_sp,
2998 			       struct fib_info *fi)
2999 {
3000 	struct mlxsw_sp_nexthop_group_cmp_arg cmp_arg;
3001 
3002 	cmp_arg.proto = MLXSW_SP_L3_PROTO_IPV4;
3003 	cmp_arg.fi = fi;
3004 	return rhashtable_lookup_fast(&mlxsw_sp->router->nexthop_group_ht,
3005 				      &cmp_arg,
3006 				      mlxsw_sp_nexthop_group_ht_params);
3007 }
3008 
3009 static struct mlxsw_sp_nexthop_group *
3010 mlxsw_sp_nexthop6_group_lookup(struct mlxsw_sp *mlxsw_sp,
3011 			       struct mlxsw_sp_fib6_entry *fib6_entry)
3012 {
3013 	struct mlxsw_sp_nexthop_group_cmp_arg cmp_arg;
3014 
3015 	cmp_arg.proto = MLXSW_SP_L3_PROTO_IPV6;
3016 	cmp_arg.fib6_entry = fib6_entry;
3017 	return rhashtable_lookup_fast(&mlxsw_sp->router->nexthop_group_ht,
3018 				      &cmp_arg,
3019 				      mlxsw_sp_nexthop_group_ht_params);
3020 }
3021 
3022 static const struct rhashtable_params mlxsw_sp_nexthop_ht_params = {
3023 	.key_offset = offsetof(struct mlxsw_sp_nexthop, key),
3024 	.head_offset = offsetof(struct mlxsw_sp_nexthop, ht_node),
3025 	.key_len = sizeof(struct mlxsw_sp_nexthop_key),
3026 };
3027 
3028 static int mlxsw_sp_nexthop_insert(struct mlxsw_sp *mlxsw_sp,
3029 				   struct mlxsw_sp_nexthop *nh)
3030 {
3031 	return rhashtable_insert_fast(&mlxsw_sp->router->nexthop_ht,
3032 				      &nh->ht_node, mlxsw_sp_nexthop_ht_params);
3033 }
3034 
3035 static void mlxsw_sp_nexthop_remove(struct mlxsw_sp *mlxsw_sp,
3036 				    struct mlxsw_sp_nexthop *nh)
3037 {
3038 	rhashtable_remove_fast(&mlxsw_sp->router->nexthop_ht, &nh->ht_node,
3039 			       mlxsw_sp_nexthop_ht_params);
3040 }
3041 
3042 static struct mlxsw_sp_nexthop *
3043 mlxsw_sp_nexthop_lookup(struct mlxsw_sp *mlxsw_sp,
3044 			struct mlxsw_sp_nexthop_key key)
3045 {
3046 	return rhashtable_lookup_fast(&mlxsw_sp->router->nexthop_ht, &key,
3047 				      mlxsw_sp_nexthop_ht_params);
3048 }
3049 
3050 static int mlxsw_sp_adj_index_mass_update_vr(struct mlxsw_sp *mlxsw_sp,
3051 					     const struct mlxsw_sp_fib *fib,
3052 					     u32 adj_index, u16 ecmp_size,
3053 					     u32 new_adj_index,
3054 					     u16 new_ecmp_size)
3055 {
3056 	char raleu_pl[MLXSW_REG_RALEU_LEN];
3057 
3058 	mlxsw_reg_raleu_pack(raleu_pl,
3059 			     (enum mlxsw_reg_ralxx_protocol) fib->proto,
3060 			     fib->vr->id, adj_index, ecmp_size, new_adj_index,
3061 			     new_ecmp_size);
3062 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(raleu), raleu_pl);
3063 }
3064 
3065 static int mlxsw_sp_adj_index_mass_update(struct mlxsw_sp *mlxsw_sp,
3066 					  struct mlxsw_sp_nexthop_group *nh_grp,
3067 					  u32 old_adj_index, u16 old_ecmp_size)
3068 {
3069 	struct mlxsw_sp_fib_entry *fib_entry;
3070 	struct mlxsw_sp_fib *fib = NULL;
3071 	int err;
3072 
3073 	list_for_each_entry(fib_entry, &nh_grp->fib_list, nexthop_group_node) {
3074 		if (fib == fib_entry->fib_node->fib)
3075 			continue;
3076 		fib = fib_entry->fib_node->fib;
3077 		err = mlxsw_sp_adj_index_mass_update_vr(mlxsw_sp, fib,
3078 							old_adj_index,
3079 							old_ecmp_size,
3080 							nh_grp->adj_index,
3081 							nh_grp->ecmp_size);
3082 		if (err)
3083 			return err;
3084 	}
3085 	return 0;
3086 }
3087 
3088 static int __mlxsw_sp_nexthop_update(struct mlxsw_sp *mlxsw_sp, u32 adj_index,
3089 				     struct mlxsw_sp_nexthop *nh)
3090 {
3091 	struct mlxsw_sp_neigh_entry *neigh_entry = nh->neigh_entry;
3092 	char ratr_pl[MLXSW_REG_RATR_LEN];
3093 
3094 	mlxsw_reg_ratr_pack(ratr_pl, MLXSW_REG_RATR_OP_WRITE_WRITE_ENTRY,
3095 			    true, MLXSW_REG_RATR_TYPE_ETHERNET,
3096 			    adj_index, neigh_entry->rif);
3097 	mlxsw_reg_ratr_eth_entry_pack(ratr_pl, neigh_entry->ha);
3098 	if (nh->counter_valid)
3099 		mlxsw_reg_ratr_counter_pack(ratr_pl, nh->counter_index, true);
3100 	else
3101 		mlxsw_reg_ratr_counter_pack(ratr_pl, 0, false);
3102 
3103 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ratr), ratr_pl);
3104 }
3105 
3106 int mlxsw_sp_nexthop_update(struct mlxsw_sp *mlxsw_sp, u32 adj_index,
3107 			    struct mlxsw_sp_nexthop *nh)
3108 {
3109 	int i;
3110 
3111 	for (i = 0; i < nh->num_adj_entries; i++) {
3112 		int err;
3113 
3114 		err = __mlxsw_sp_nexthop_update(mlxsw_sp, adj_index + i, nh);
3115 		if (err)
3116 			return err;
3117 	}
3118 
3119 	return 0;
3120 }
3121 
3122 static int __mlxsw_sp_nexthop_ipip_update(struct mlxsw_sp *mlxsw_sp,
3123 					  u32 adj_index,
3124 					  struct mlxsw_sp_nexthop *nh)
3125 {
3126 	const struct mlxsw_sp_ipip_ops *ipip_ops;
3127 
3128 	ipip_ops = mlxsw_sp->router->ipip_ops_arr[nh->ipip_entry->ipipt];
3129 	return ipip_ops->nexthop_update(mlxsw_sp, adj_index, nh->ipip_entry);
3130 }
3131 
3132 static int mlxsw_sp_nexthop_ipip_update(struct mlxsw_sp *mlxsw_sp,
3133 					u32 adj_index,
3134 					struct mlxsw_sp_nexthop *nh)
3135 {
3136 	int i;
3137 
3138 	for (i = 0; i < nh->num_adj_entries; i++) {
3139 		int err;
3140 
3141 		err = __mlxsw_sp_nexthop_ipip_update(mlxsw_sp, adj_index + i,
3142 						     nh);
3143 		if (err)
3144 			return err;
3145 	}
3146 
3147 	return 0;
3148 }
3149 
3150 static int
3151 mlxsw_sp_nexthop_group_update(struct mlxsw_sp *mlxsw_sp,
3152 			      struct mlxsw_sp_nexthop_group *nh_grp,
3153 			      bool reallocate)
3154 {
3155 	u32 adj_index = nh_grp->adj_index; /* base */
3156 	struct mlxsw_sp_nexthop *nh;
3157 	int i;
3158 	int err;
3159 
3160 	for (i = 0; i < nh_grp->count; i++) {
3161 		nh = &nh_grp->nexthops[i];
3162 
3163 		if (!nh->should_offload) {
3164 			nh->offloaded = 0;
3165 			continue;
3166 		}
3167 
3168 		if (nh->update || reallocate) {
3169 			switch (nh->type) {
3170 			case MLXSW_SP_NEXTHOP_TYPE_ETH:
3171 				err = mlxsw_sp_nexthop_update
3172 					    (mlxsw_sp, adj_index, nh);
3173 				break;
3174 			case MLXSW_SP_NEXTHOP_TYPE_IPIP:
3175 				err = mlxsw_sp_nexthop_ipip_update
3176 					    (mlxsw_sp, adj_index, nh);
3177 				break;
3178 			}
3179 			if (err)
3180 				return err;
3181 			nh->update = 0;
3182 			nh->offloaded = 1;
3183 		}
3184 		adj_index += nh->num_adj_entries;
3185 	}
3186 	return 0;
3187 }
3188 
3189 static bool
3190 mlxsw_sp_fib_node_entry_is_first(const struct mlxsw_sp_fib_node *fib_node,
3191 				 const struct mlxsw_sp_fib_entry *fib_entry);
3192 
3193 static int
3194 mlxsw_sp_nexthop_fib_entries_update(struct mlxsw_sp *mlxsw_sp,
3195 				    struct mlxsw_sp_nexthop_group *nh_grp)
3196 {
3197 	struct mlxsw_sp_fib_entry *fib_entry;
3198 	int err;
3199 
3200 	list_for_each_entry(fib_entry, &nh_grp->fib_list, nexthop_group_node) {
3201 		if (!mlxsw_sp_fib_node_entry_is_first(fib_entry->fib_node,
3202 						      fib_entry))
3203 			continue;
3204 		err = mlxsw_sp_fib_entry_update(mlxsw_sp, fib_entry);
3205 		if (err)
3206 			return err;
3207 	}
3208 	return 0;
3209 }
3210 
3211 static void
3212 mlxsw_sp_fib_entry_offload_refresh(struct mlxsw_sp_fib_entry *fib_entry,
3213 				   enum mlxsw_reg_ralue_op op, int err);
3214 
3215 static void
3216 mlxsw_sp_nexthop_fib_entries_refresh(struct mlxsw_sp_nexthop_group *nh_grp)
3217 {
3218 	enum mlxsw_reg_ralue_op op = MLXSW_REG_RALUE_OP_WRITE_WRITE;
3219 	struct mlxsw_sp_fib_entry *fib_entry;
3220 
3221 	list_for_each_entry(fib_entry, &nh_grp->fib_list, nexthop_group_node) {
3222 		if (!mlxsw_sp_fib_node_entry_is_first(fib_entry->fib_node,
3223 						      fib_entry))
3224 			continue;
3225 		mlxsw_sp_fib_entry_offload_refresh(fib_entry, op, 0);
3226 	}
3227 }
3228 
3229 static void mlxsw_sp_adj_grp_size_round_up(u16 *p_adj_grp_size)
3230 {
3231 	/* Valid sizes for an adjacency group are:
3232 	 * 1-64, 512, 1024, 2048 and 4096.
3233 	 */
3234 	if (*p_adj_grp_size <= 64)
3235 		return;
3236 	else if (*p_adj_grp_size <= 512)
3237 		*p_adj_grp_size = 512;
3238 	else if (*p_adj_grp_size <= 1024)
3239 		*p_adj_grp_size = 1024;
3240 	else if (*p_adj_grp_size <= 2048)
3241 		*p_adj_grp_size = 2048;
3242 	else
3243 		*p_adj_grp_size = 4096;
3244 }
3245 
3246 static void mlxsw_sp_adj_grp_size_round_down(u16 *p_adj_grp_size,
3247 					     unsigned int alloc_size)
3248 {
3249 	if (alloc_size >= 4096)
3250 		*p_adj_grp_size = 4096;
3251 	else if (alloc_size >= 2048)
3252 		*p_adj_grp_size = 2048;
3253 	else if (alloc_size >= 1024)
3254 		*p_adj_grp_size = 1024;
3255 	else if (alloc_size >= 512)
3256 		*p_adj_grp_size = 512;
3257 }
3258 
3259 static int mlxsw_sp_fix_adj_grp_size(struct mlxsw_sp *mlxsw_sp,
3260 				     u16 *p_adj_grp_size)
3261 {
3262 	unsigned int alloc_size;
3263 	int err;
3264 
3265 	/* Round up the requested group size to the next size supported
3266 	 * by the device and make sure the request can be satisfied.
3267 	 */
3268 	mlxsw_sp_adj_grp_size_round_up(p_adj_grp_size);
3269 	err = mlxsw_sp_kvdl_alloc_count_query(mlxsw_sp,
3270 					      MLXSW_SP_KVDL_ENTRY_TYPE_ADJ,
3271 					      *p_adj_grp_size, &alloc_size);
3272 	if (err)
3273 		return err;
3274 	/* It is possible the allocation results in more allocated
3275 	 * entries than requested. Try to use as much of them as
3276 	 * possible.
3277 	 */
3278 	mlxsw_sp_adj_grp_size_round_down(p_adj_grp_size, alloc_size);
3279 
3280 	return 0;
3281 }
3282 
3283 static void
3284 mlxsw_sp_nexthop_group_normalize(struct mlxsw_sp_nexthop_group *nh_grp)
3285 {
3286 	int i, g = 0, sum_norm_weight = 0;
3287 	struct mlxsw_sp_nexthop *nh;
3288 
3289 	for (i = 0; i < nh_grp->count; i++) {
3290 		nh = &nh_grp->nexthops[i];
3291 
3292 		if (!nh->should_offload)
3293 			continue;
3294 		if (g > 0)
3295 			g = gcd(nh->nh_weight, g);
3296 		else
3297 			g = nh->nh_weight;
3298 	}
3299 
3300 	for (i = 0; i < nh_grp->count; i++) {
3301 		nh = &nh_grp->nexthops[i];
3302 
3303 		if (!nh->should_offload)
3304 			continue;
3305 		nh->norm_nh_weight = nh->nh_weight / g;
3306 		sum_norm_weight += nh->norm_nh_weight;
3307 	}
3308 
3309 	nh_grp->sum_norm_weight = sum_norm_weight;
3310 }
3311 
3312 static void
3313 mlxsw_sp_nexthop_group_rebalance(struct mlxsw_sp_nexthop_group *nh_grp)
3314 {
3315 	int total = nh_grp->sum_norm_weight;
3316 	u16 ecmp_size = nh_grp->ecmp_size;
3317 	int i, weight = 0, lower_bound = 0;
3318 
3319 	for (i = 0; i < nh_grp->count; i++) {
3320 		struct mlxsw_sp_nexthop *nh = &nh_grp->nexthops[i];
3321 		int upper_bound;
3322 
3323 		if (!nh->should_offload)
3324 			continue;
3325 		weight += nh->norm_nh_weight;
3326 		upper_bound = DIV_ROUND_CLOSEST(ecmp_size * weight, total);
3327 		nh->num_adj_entries = upper_bound - lower_bound;
3328 		lower_bound = upper_bound;
3329 	}
3330 }
3331 
3332 static void
3333 mlxsw_sp_nexthop_group_refresh(struct mlxsw_sp *mlxsw_sp,
3334 			       struct mlxsw_sp_nexthop_group *nh_grp)
3335 {
3336 	u16 ecmp_size, old_ecmp_size;
3337 	struct mlxsw_sp_nexthop *nh;
3338 	bool offload_change = false;
3339 	u32 adj_index;
3340 	bool old_adj_index_valid;
3341 	u32 old_adj_index;
3342 	int i;
3343 	int err;
3344 
3345 	if (!nh_grp->gateway) {
3346 		mlxsw_sp_nexthop_fib_entries_update(mlxsw_sp, nh_grp);
3347 		return;
3348 	}
3349 
3350 	for (i = 0; i < nh_grp->count; i++) {
3351 		nh = &nh_grp->nexthops[i];
3352 
3353 		if (nh->should_offload != nh->offloaded) {
3354 			offload_change = true;
3355 			if (nh->should_offload)
3356 				nh->update = 1;
3357 		}
3358 	}
3359 	if (!offload_change) {
3360 		/* Nothing was added or removed, so no need to reallocate. Just
3361 		 * update MAC on existing adjacency indexes.
3362 		 */
3363 		err = mlxsw_sp_nexthop_group_update(mlxsw_sp, nh_grp, false);
3364 		if (err) {
3365 			dev_warn(mlxsw_sp->bus_info->dev, "Failed to update neigh MAC in adjacency table.\n");
3366 			goto set_trap;
3367 		}
3368 		return;
3369 	}
3370 	mlxsw_sp_nexthop_group_normalize(nh_grp);
3371 	if (!nh_grp->sum_norm_weight)
3372 		/* No neigh of this group is connected so we just set
3373 		 * the trap and let everthing flow through kernel.
3374 		 */
3375 		goto set_trap;
3376 
3377 	ecmp_size = nh_grp->sum_norm_weight;
3378 	err = mlxsw_sp_fix_adj_grp_size(mlxsw_sp, &ecmp_size);
3379 	if (err)
3380 		/* No valid allocation size available. */
3381 		goto set_trap;
3382 
3383 	err = mlxsw_sp_kvdl_alloc(mlxsw_sp, MLXSW_SP_KVDL_ENTRY_TYPE_ADJ,
3384 				  ecmp_size, &adj_index);
3385 	if (err) {
3386 		/* We ran out of KVD linear space, just set the
3387 		 * trap and let everything flow through kernel.
3388 		 */
3389 		dev_warn(mlxsw_sp->bus_info->dev, "Failed to allocate KVD linear area for nexthop group.\n");
3390 		goto set_trap;
3391 	}
3392 	old_adj_index_valid = nh_grp->adj_index_valid;
3393 	old_adj_index = nh_grp->adj_index;
3394 	old_ecmp_size = nh_grp->ecmp_size;
3395 	nh_grp->adj_index_valid = 1;
3396 	nh_grp->adj_index = adj_index;
3397 	nh_grp->ecmp_size = ecmp_size;
3398 	mlxsw_sp_nexthop_group_rebalance(nh_grp);
3399 	err = mlxsw_sp_nexthop_group_update(mlxsw_sp, nh_grp, true);
3400 	if (err) {
3401 		dev_warn(mlxsw_sp->bus_info->dev, "Failed to update neigh MAC in adjacency table.\n");
3402 		goto set_trap;
3403 	}
3404 
3405 	if (!old_adj_index_valid) {
3406 		/* The trap was set for fib entries, so we have to call
3407 		 * fib entry update to unset it and use adjacency index.
3408 		 */
3409 		err = mlxsw_sp_nexthop_fib_entries_update(mlxsw_sp, nh_grp);
3410 		if (err) {
3411 			dev_warn(mlxsw_sp->bus_info->dev, "Failed to add adjacency index to fib entries.\n");
3412 			goto set_trap;
3413 		}
3414 		return;
3415 	}
3416 
3417 	err = mlxsw_sp_adj_index_mass_update(mlxsw_sp, nh_grp,
3418 					     old_adj_index, old_ecmp_size);
3419 	mlxsw_sp_kvdl_free(mlxsw_sp, MLXSW_SP_KVDL_ENTRY_TYPE_ADJ,
3420 			   old_ecmp_size, old_adj_index);
3421 	if (err) {
3422 		dev_warn(mlxsw_sp->bus_info->dev, "Failed to mass-update adjacency index for nexthop group.\n");
3423 		goto set_trap;
3424 	}
3425 
3426 	/* Offload state within the group changed, so update the flags. */
3427 	mlxsw_sp_nexthop_fib_entries_refresh(nh_grp);
3428 
3429 	return;
3430 
3431 set_trap:
3432 	old_adj_index_valid = nh_grp->adj_index_valid;
3433 	nh_grp->adj_index_valid = 0;
3434 	for (i = 0; i < nh_grp->count; i++) {
3435 		nh = &nh_grp->nexthops[i];
3436 		nh->offloaded = 0;
3437 	}
3438 	err = mlxsw_sp_nexthop_fib_entries_update(mlxsw_sp, nh_grp);
3439 	if (err)
3440 		dev_warn(mlxsw_sp->bus_info->dev, "Failed to set traps for fib entries.\n");
3441 	if (old_adj_index_valid)
3442 		mlxsw_sp_kvdl_free(mlxsw_sp, MLXSW_SP_KVDL_ENTRY_TYPE_ADJ,
3443 				   nh_grp->ecmp_size, nh_grp->adj_index);
3444 }
3445 
3446 static void __mlxsw_sp_nexthop_neigh_update(struct mlxsw_sp_nexthop *nh,
3447 					    bool removing)
3448 {
3449 	if (!removing)
3450 		nh->should_offload = 1;
3451 	else
3452 		nh->should_offload = 0;
3453 	nh->update = 1;
3454 }
3455 
3456 static void
3457 mlxsw_sp_nexthop_neigh_update(struct mlxsw_sp *mlxsw_sp,
3458 			      struct mlxsw_sp_neigh_entry *neigh_entry,
3459 			      bool removing)
3460 {
3461 	struct mlxsw_sp_nexthop *nh;
3462 
3463 	list_for_each_entry(nh, &neigh_entry->nexthop_list,
3464 			    neigh_list_node) {
3465 		__mlxsw_sp_nexthop_neigh_update(nh, removing);
3466 		mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh->nh_grp);
3467 	}
3468 }
3469 
3470 static void mlxsw_sp_nexthop_rif_init(struct mlxsw_sp_nexthop *nh,
3471 				      struct mlxsw_sp_rif *rif)
3472 {
3473 	if (nh->rif)
3474 		return;
3475 
3476 	nh->rif = rif;
3477 	list_add(&nh->rif_list_node, &rif->nexthop_list);
3478 }
3479 
3480 static void mlxsw_sp_nexthop_rif_fini(struct mlxsw_sp_nexthop *nh)
3481 {
3482 	if (!nh->rif)
3483 		return;
3484 
3485 	list_del(&nh->rif_list_node);
3486 	nh->rif = NULL;
3487 }
3488 
3489 static int mlxsw_sp_nexthop_neigh_init(struct mlxsw_sp *mlxsw_sp,
3490 				       struct mlxsw_sp_nexthop *nh)
3491 {
3492 	struct mlxsw_sp_neigh_entry *neigh_entry;
3493 	struct neighbour *n;
3494 	u8 nud_state, dead;
3495 	int err;
3496 
3497 	if (!nh->nh_grp->gateway || nh->neigh_entry)
3498 		return 0;
3499 
3500 	/* Take a reference of neigh here ensuring that neigh would
3501 	 * not be destructed before the nexthop entry is finished.
3502 	 * The reference is taken either in neigh_lookup() or
3503 	 * in neigh_create() in case n is not found.
3504 	 */
3505 	n = neigh_lookup(nh->nh_grp->neigh_tbl, &nh->gw_addr, nh->rif->dev);
3506 	if (!n) {
3507 		n = neigh_create(nh->nh_grp->neigh_tbl, &nh->gw_addr,
3508 				 nh->rif->dev);
3509 		if (IS_ERR(n))
3510 			return PTR_ERR(n);
3511 		neigh_event_send(n, NULL);
3512 	}
3513 	neigh_entry = mlxsw_sp_neigh_entry_lookup(mlxsw_sp, n);
3514 	if (!neigh_entry) {
3515 		neigh_entry = mlxsw_sp_neigh_entry_create(mlxsw_sp, n);
3516 		if (IS_ERR(neigh_entry)) {
3517 			err = -EINVAL;
3518 			goto err_neigh_entry_create;
3519 		}
3520 	}
3521 
3522 	/* If that is the first nexthop connected to that neigh, add to
3523 	 * nexthop_neighs_list
3524 	 */
3525 	if (list_empty(&neigh_entry->nexthop_list))
3526 		list_add_tail(&neigh_entry->nexthop_neighs_list_node,
3527 			      &mlxsw_sp->router->nexthop_neighs_list);
3528 
3529 	nh->neigh_entry = neigh_entry;
3530 	list_add_tail(&nh->neigh_list_node, &neigh_entry->nexthop_list);
3531 	read_lock_bh(&n->lock);
3532 	nud_state = n->nud_state;
3533 	dead = n->dead;
3534 	read_unlock_bh(&n->lock);
3535 	__mlxsw_sp_nexthop_neigh_update(nh, !(nud_state & NUD_VALID && !dead));
3536 
3537 	return 0;
3538 
3539 err_neigh_entry_create:
3540 	neigh_release(n);
3541 	return err;
3542 }
3543 
3544 static void mlxsw_sp_nexthop_neigh_fini(struct mlxsw_sp *mlxsw_sp,
3545 					struct mlxsw_sp_nexthop *nh)
3546 {
3547 	struct mlxsw_sp_neigh_entry *neigh_entry = nh->neigh_entry;
3548 	struct neighbour *n;
3549 
3550 	if (!neigh_entry)
3551 		return;
3552 	n = neigh_entry->key.n;
3553 
3554 	__mlxsw_sp_nexthop_neigh_update(nh, true);
3555 	list_del(&nh->neigh_list_node);
3556 	nh->neigh_entry = NULL;
3557 
3558 	/* If that is the last nexthop connected to that neigh, remove from
3559 	 * nexthop_neighs_list
3560 	 */
3561 	if (list_empty(&neigh_entry->nexthop_list))
3562 		list_del(&neigh_entry->nexthop_neighs_list_node);
3563 
3564 	if (!neigh_entry->connected && list_empty(&neigh_entry->nexthop_list))
3565 		mlxsw_sp_neigh_entry_destroy(mlxsw_sp, neigh_entry);
3566 
3567 	neigh_release(n);
3568 }
3569 
3570 static bool mlxsw_sp_ipip_netdev_ul_up(struct net_device *ol_dev)
3571 {
3572 	struct net_device *ul_dev = __mlxsw_sp_ipip_netdev_ul_dev_get(ol_dev);
3573 
3574 	return ul_dev ? (ul_dev->flags & IFF_UP) : true;
3575 }
3576 
3577 static void mlxsw_sp_nexthop_ipip_init(struct mlxsw_sp *mlxsw_sp,
3578 				       struct mlxsw_sp_nexthop *nh,
3579 				       struct mlxsw_sp_ipip_entry *ipip_entry)
3580 {
3581 	bool removing;
3582 
3583 	if (!nh->nh_grp->gateway || nh->ipip_entry)
3584 		return;
3585 
3586 	nh->ipip_entry = ipip_entry;
3587 	removing = !mlxsw_sp_ipip_netdev_ul_up(ipip_entry->ol_dev);
3588 	__mlxsw_sp_nexthop_neigh_update(nh, removing);
3589 	mlxsw_sp_nexthop_rif_init(nh, &ipip_entry->ol_lb->common);
3590 }
3591 
3592 static void mlxsw_sp_nexthop_ipip_fini(struct mlxsw_sp *mlxsw_sp,
3593 				       struct mlxsw_sp_nexthop *nh)
3594 {
3595 	struct mlxsw_sp_ipip_entry *ipip_entry = nh->ipip_entry;
3596 
3597 	if (!ipip_entry)
3598 		return;
3599 
3600 	__mlxsw_sp_nexthop_neigh_update(nh, true);
3601 	nh->ipip_entry = NULL;
3602 }
3603 
3604 static bool mlxsw_sp_nexthop4_ipip_type(const struct mlxsw_sp *mlxsw_sp,
3605 					const struct fib_nh *fib_nh,
3606 					enum mlxsw_sp_ipip_type *p_ipipt)
3607 {
3608 	struct net_device *dev = fib_nh->nh_dev;
3609 
3610 	return dev &&
3611 	       fib_nh->nh_parent->fib_type == RTN_UNICAST &&
3612 	       mlxsw_sp_netdev_ipip_type(mlxsw_sp, dev, p_ipipt);
3613 }
3614 
3615 static void mlxsw_sp_nexthop_type_fini(struct mlxsw_sp *mlxsw_sp,
3616 				       struct mlxsw_sp_nexthop *nh)
3617 {
3618 	switch (nh->type) {
3619 	case MLXSW_SP_NEXTHOP_TYPE_ETH:
3620 		mlxsw_sp_nexthop_neigh_fini(mlxsw_sp, nh);
3621 		mlxsw_sp_nexthop_rif_fini(nh);
3622 		break;
3623 	case MLXSW_SP_NEXTHOP_TYPE_IPIP:
3624 		mlxsw_sp_nexthop_rif_fini(nh);
3625 		mlxsw_sp_nexthop_ipip_fini(mlxsw_sp, nh);
3626 		break;
3627 	}
3628 }
3629 
3630 static int mlxsw_sp_nexthop4_type_init(struct mlxsw_sp *mlxsw_sp,
3631 				       struct mlxsw_sp_nexthop *nh,
3632 				       struct fib_nh *fib_nh)
3633 {
3634 	const struct mlxsw_sp_ipip_ops *ipip_ops;
3635 	struct net_device *dev = fib_nh->nh_dev;
3636 	struct mlxsw_sp_ipip_entry *ipip_entry;
3637 	struct mlxsw_sp_rif *rif;
3638 	int err;
3639 
3640 	ipip_entry = mlxsw_sp_ipip_entry_find_by_ol_dev(mlxsw_sp, dev);
3641 	if (ipip_entry) {
3642 		ipip_ops = mlxsw_sp->router->ipip_ops_arr[ipip_entry->ipipt];
3643 		if (ipip_ops->can_offload(mlxsw_sp, dev,
3644 					  MLXSW_SP_L3_PROTO_IPV4)) {
3645 			nh->type = MLXSW_SP_NEXTHOP_TYPE_IPIP;
3646 			mlxsw_sp_nexthop_ipip_init(mlxsw_sp, nh, ipip_entry);
3647 			return 0;
3648 		}
3649 	}
3650 
3651 	nh->type = MLXSW_SP_NEXTHOP_TYPE_ETH;
3652 	rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev);
3653 	if (!rif)
3654 		return 0;
3655 
3656 	mlxsw_sp_nexthop_rif_init(nh, rif);
3657 	err = mlxsw_sp_nexthop_neigh_init(mlxsw_sp, nh);
3658 	if (err)
3659 		goto err_neigh_init;
3660 
3661 	return 0;
3662 
3663 err_neigh_init:
3664 	mlxsw_sp_nexthop_rif_fini(nh);
3665 	return err;
3666 }
3667 
3668 static void mlxsw_sp_nexthop4_type_fini(struct mlxsw_sp *mlxsw_sp,
3669 					struct mlxsw_sp_nexthop *nh)
3670 {
3671 	mlxsw_sp_nexthop_type_fini(mlxsw_sp, nh);
3672 }
3673 
3674 static int mlxsw_sp_nexthop4_init(struct mlxsw_sp *mlxsw_sp,
3675 				  struct mlxsw_sp_nexthop_group *nh_grp,
3676 				  struct mlxsw_sp_nexthop *nh,
3677 				  struct fib_nh *fib_nh)
3678 {
3679 	struct net_device *dev = fib_nh->nh_dev;
3680 	struct in_device *in_dev;
3681 	int err;
3682 
3683 	nh->nh_grp = nh_grp;
3684 	nh->key.fib_nh = fib_nh;
3685 #ifdef CONFIG_IP_ROUTE_MULTIPATH
3686 	nh->nh_weight = fib_nh->nh_weight;
3687 #else
3688 	nh->nh_weight = 1;
3689 #endif
3690 	memcpy(&nh->gw_addr, &fib_nh->nh_gw, sizeof(fib_nh->nh_gw));
3691 	err = mlxsw_sp_nexthop_insert(mlxsw_sp, nh);
3692 	if (err)
3693 		return err;
3694 
3695 	mlxsw_sp_nexthop_counter_alloc(mlxsw_sp, nh);
3696 	list_add_tail(&nh->router_list_node, &mlxsw_sp->router->nexthop_list);
3697 
3698 	if (!dev)
3699 		return 0;
3700 
3701 	in_dev = __in_dev_get_rtnl(dev);
3702 	if (in_dev && IN_DEV_IGNORE_ROUTES_WITH_LINKDOWN(in_dev) &&
3703 	    fib_nh->nh_flags & RTNH_F_LINKDOWN)
3704 		return 0;
3705 
3706 	err = mlxsw_sp_nexthop4_type_init(mlxsw_sp, nh, fib_nh);
3707 	if (err)
3708 		goto err_nexthop_neigh_init;
3709 
3710 	return 0;
3711 
3712 err_nexthop_neigh_init:
3713 	mlxsw_sp_nexthop_remove(mlxsw_sp, nh);
3714 	return err;
3715 }
3716 
3717 static void mlxsw_sp_nexthop4_fini(struct mlxsw_sp *mlxsw_sp,
3718 				   struct mlxsw_sp_nexthop *nh)
3719 {
3720 	mlxsw_sp_nexthop4_type_fini(mlxsw_sp, nh);
3721 	list_del(&nh->router_list_node);
3722 	mlxsw_sp_nexthop_counter_free(mlxsw_sp, nh);
3723 	mlxsw_sp_nexthop_remove(mlxsw_sp, nh);
3724 }
3725 
3726 static void mlxsw_sp_nexthop4_event(struct mlxsw_sp *mlxsw_sp,
3727 				    unsigned long event, struct fib_nh *fib_nh)
3728 {
3729 	struct mlxsw_sp_nexthop_key key;
3730 	struct mlxsw_sp_nexthop *nh;
3731 
3732 	if (mlxsw_sp->router->aborted)
3733 		return;
3734 
3735 	key.fib_nh = fib_nh;
3736 	nh = mlxsw_sp_nexthop_lookup(mlxsw_sp, key);
3737 	if (WARN_ON_ONCE(!nh))
3738 		return;
3739 
3740 	switch (event) {
3741 	case FIB_EVENT_NH_ADD:
3742 		mlxsw_sp_nexthop4_type_init(mlxsw_sp, nh, fib_nh);
3743 		break;
3744 	case FIB_EVENT_NH_DEL:
3745 		mlxsw_sp_nexthop4_type_fini(mlxsw_sp, nh);
3746 		break;
3747 	}
3748 
3749 	mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh->nh_grp);
3750 }
3751 
3752 static void mlxsw_sp_nexthop_rif_update(struct mlxsw_sp *mlxsw_sp,
3753 					struct mlxsw_sp_rif *rif)
3754 {
3755 	struct mlxsw_sp_nexthop *nh;
3756 	bool removing;
3757 
3758 	list_for_each_entry(nh, &rif->nexthop_list, rif_list_node) {
3759 		switch (nh->type) {
3760 		case MLXSW_SP_NEXTHOP_TYPE_ETH:
3761 			removing = false;
3762 			break;
3763 		case MLXSW_SP_NEXTHOP_TYPE_IPIP:
3764 			removing = !mlxsw_sp_ipip_netdev_ul_up(rif->dev);
3765 			break;
3766 		default:
3767 			WARN_ON(1);
3768 			continue;
3769 		}
3770 
3771 		__mlxsw_sp_nexthop_neigh_update(nh, removing);
3772 		mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh->nh_grp);
3773 	}
3774 }
3775 
3776 static void mlxsw_sp_nexthop_rif_migrate(struct mlxsw_sp *mlxsw_sp,
3777 					 struct mlxsw_sp_rif *old_rif,
3778 					 struct mlxsw_sp_rif *new_rif)
3779 {
3780 	struct mlxsw_sp_nexthop *nh;
3781 
3782 	list_splice_init(&old_rif->nexthop_list, &new_rif->nexthop_list);
3783 	list_for_each_entry(nh, &new_rif->nexthop_list, rif_list_node)
3784 		nh->rif = new_rif;
3785 	mlxsw_sp_nexthop_rif_update(mlxsw_sp, new_rif);
3786 }
3787 
3788 static void mlxsw_sp_nexthop_rif_gone_sync(struct mlxsw_sp *mlxsw_sp,
3789 					   struct mlxsw_sp_rif *rif)
3790 {
3791 	struct mlxsw_sp_nexthop *nh, *tmp;
3792 
3793 	list_for_each_entry_safe(nh, tmp, &rif->nexthop_list, rif_list_node) {
3794 		mlxsw_sp_nexthop_type_fini(mlxsw_sp, nh);
3795 		mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh->nh_grp);
3796 	}
3797 }
3798 
3799 static bool mlxsw_sp_fi_is_gateway(const struct mlxsw_sp *mlxsw_sp,
3800 				   const struct fib_info *fi)
3801 {
3802 	return fi->fib_nh->nh_scope == RT_SCOPE_LINK ||
3803 	       mlxsw_sp_nexthop4_ipip_type(mlxsw_sp, fi->fib_nh, NULL);
3804 }
3805 
3806 static struct mlxsw_sp_nexthop_group *
3807 mlxsw_sp_nexthop4_group_create(struct mlxsw_sp *mlxsw_sp, struct fib_info *fi)
3808 {
3809 	struct mlxsw_sp_nexthop_group *nh_grp;
3810 	struct mlxsw_sp_nexthop *nh;
3811 	struct fib_nh *fib_nh;
3812 	size_t alloc_size;
3813 	int i;
3814 	int err;
3815 
3816 	alloc_size = sizeof(*nh_grp) +
3817 		     fi->fib_nhs * sizeof(struct mlxsw_sp_nexthop);
3818 	nh_grp = kzalloc(alloc_size, GFP_KERNEL);
3819 	if (!nh_grp)
3820 		return ERR_PTR(-ENOMEM);
3821 	nh_grp->priv = fi;
3822 	INIT_LIST_HEAD(&nh_grp->fib_list);
3823 	nh_grp->neigh_tbl = &arp_tbl;
3824 
3825 	nh_grp->gateway = mlxsw_sp_fi_is_gateway(mlxsw_sp, fi);
3826 	nh_grp->count = fi->fib_nhs;
3827 	fib_info_hold(fi);
3828 	for (i = 0; i < nh_grp->count; i++) {
3829 		nh = &nh_grp->nexthops[i];
3830 		fib_nh = &fi->fib_nh[i];
3831 		err = mlxsw_sp_nexthop4_init(mlxsw_sp, nh_grp, nh, fib_nh);
3832 		if (err)
3833 			goto err_nexthop4_init;
3834 	}
3835 	err = mlxsw_sp_nexthop_group_insert(mlxsw_sp, nh_grp);
3836 	if (err)
3837 		goto err_nexthop_group_insert;
3838 	mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh_grp);
3839 	return nh_grp;
3840 
3841 err_nexthop_group_insert:
3842 err_nexthop4_init:
3843 	for (i--; i >= 0; i--) {
3844 		nh = &nh_grp->nexthops[i];
3845 		mlxsw_sp_nexthop4_fini(mlxsw_sp, nh);
3846 	}
3847 	fib_info_put(fi);
3848 	kfree(nh_grp);
3849 	return ERR_PTR(err);
3850 }
3851 
3852 static void
3853 mlxsw_sp_nexthop4_group_destroy(struct mlxsw_sp *mlxsw_sp,
3854 				struct mlxsw_sp_nexthop_group *nh_grp)
3855 {
3856 	struct mlxsw_sp_nexthop *nh;
3857 	int i;
3858 
3859 	mlxsw_sp_nexthop_group_remove(mlxsw_sp, nh_grp);
3860 	for (i = 0; i < nh_grp->count; i++) {
3861 		nh = &nh_grp->nexthops[i];
3862 		mlxsw_sp_nexthop4_fini(mlxsw_sp, nh);
3863 	}
3864 	mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh_grp);
3865 	WARN_ON_ONCE(nh_grp->adj_index_valid);
3866 	fib_info_put(mlxsw_sp_nexthop4_group_fi(nh_grp));
3867 	kfree(nh_grp);
3868 }
3869 
3870 static int mlxsw_sp_nexthop4_group_get(struct mlxsw_sp *mlxsw_sp,
3871 				       struct mlxsw_sp_fib_entry *fib_entry,
3872 				       struct fib_info *fi)
3873 {
3874 	struct mlxsw_sp_nexthop_group *nh_grp;
3875 
3876 	nh_grp = mlxsw_sp_nexthop4_group_lookup(mlxsw_sp, fi);
3877 	if (!nh_grp) {
3878 		nh_grp = mlxsw_sp_nexthop4_group_create(mlxsw_sp, fi);
3879 		if (IS_ERR(nh_grp))
3880 			return PTR_ERR(nh_grp);
3881 	}
3882 	list_add_tail(&fib_entry->nexthop_group_node, &nh_grp->fib_list);
3883 	fib_entry->nh_group = nh_grp;
3884 	return 0;
3885 }
3886 
3887 static void mlxsw_sp_nexthop4_group_put(struct mlxsw_sp *mlxsw_sp,
3888 					struct mlxsw_sp_fib_entry *fib_entry)
3889 {
3890 	struct mlxsw_sp_nexthop_group *nh_grp = fib_entry->nh_group;
3891 
3892 	list_del(&fib_entry->nexthop_group_node);
3893 	if (!list_empty(&nh_grp->fib_list))
3894 		return;
3895 	mlxsw_sp_nexthop4_group_destroy(mlxsw_sp, nh_grp);
3896 }
3897 
3898 static bool
3899 mlxsw_sp_fib4_entry_should_offload(const struct mlxsw_sp_fib_entry *fib_entry)
3900 {
3901 	struct mlxsw_sp_fib4_entry *fib4_entry;
3902 
3903 	fib4_entry = container_of(fib_entry, struct mlxsw_sp_fib4_entry,
3904 				  common);
3905 	return !fib4_entry->tos;
3906 }
3907 
3908 static bool
3909 mlxsw_sp_fib_entry_should_offload(const struct mlxsw_sp_fib_entry *fib_entry)
3910 {
3911 	struct mlxsw_sp_nexthop_group *nh_group = fib_entry->nh_group;
3912 
3913 	switch (fib_entry->fib_node->fib->proto) {
3914 	case MLXSW_SP_L3_PROTO_IPV4:
3915 		if (!mlxsw_sp_fib4_entry_should_offload(fib_entry))
3916 			return false;
3917 		break;
3918 	case MLXSW_SP_L3_PROTO_IPV6:
3919 		break;
3920 	}
3921 
3922 	switch (fib_entry->type) {
3923 	case MLXSW_SP_FIB_ENTRY_TYPE_REMOTE:
3924 		return !!nh_group->adj_index_valid;
3925 	case MLXSW_SP_FIB_ENTRY_TYPE_LOCAL:
3926 		return !!nh_group->nh_rif;
3927 	case MLXSW_SP_FIB_ENTRY_TYPE_IPIP_DECAP:
3928 	case MLXSW_SP_FIB_ENTRY_TYPE_NVE_DECAP:
3929 		return true;
3930 	default:
3931 		return false;
3932 	}
3933 }
3934 
3935 static struct mlxsw_sp_nexthop *
3936 mlxsw_sp_rt6_nexthop(struct mlxsw_sp_nexthop_group *nh_grp,
3937 		     const struct mlxsw_sp_rt6 *mlxsw_sp_rt6)
3938 {
3939 	int i;
3940 
3941 	for (i = 0; i < nh_grp->count; i++) {
3942 		struct mlxsw_sp_nexthop *nh = &nh_grp->nexthops[i];
3943 		struct fib6_info *rt = mlxsw_sp_rt6->rt;
3944 
3945 		if (nh->rif && nh->rif->dev == rt->fib6_nh.nh_dev &&
3946 		    ipv6_addr_equal((const struct in6_addr *) &nh->gw_addr,
3947 				    &rt->fib6_nh.nh_gw))
3948 			return nh;
3949 		continue;
3950 	}
3951 
3952 	return NULL;
3953 }
3954 
3955 static void
3956 mlxsw_sp_fib4_entry_offload_set(struct mlxsw_sp_fib_entry *fib_entry)
3957 {
3958 	struct mlxsw_sp_nexthop_group *nh_grp = fib_entry->nh_group;
3959 	int i;
3960 
3961 	if (fib_entry->type == MLXSW_SP_FIB_ENTRY_TYPE_LOCAL ||
3962 	    fib_entry->type == MLXSW_SP_FIB_ENTRY_TYPE_IPIP_DECAP ||
3963 	    fib_entry->type == MLXSW_SP_FIB_ENTRY_TYPE_NVE_DECAP) {
3964 		nh_grp->nexthops->key.fib_nh->nh_flags |= RTNH_F_OFFLOAD;
3965 		return;
3966 	}
3967 
3968 	for (i = 0; i < nh_grp->count; i++) {
3969 		struct mlxsw_sp_nexthop *nh = &nh_grp->nexthops[i];
3970 
3971 		if (nh->offloaded)
3972 			nh->key.fib_nh->nh_flags |= RTNH_F_OFFLOAD;
3973 		else
3974 			nh->key.fib_nh->nh_flags &= ~RTNH_F_OFFLOAD;
3975 	}
3976 }
3977 
3978 static void
3979 mlxsw_sp_fib4_entry_offload_unset(struct mlxsw_sp_fib_entry *fib_entry)
3980 {
3981 	struct mlxsw_sp_nexthop_group *nh_grp = fib_entry->nh_group;
3982 	int i;
3983 
3984 	if (!list_is_singular(&nh_grp->fib_list))
3985 		return;
3986 
3987 	for (i = 0; i < nh_grp->count; i++) {
3988 		struct mlxsw_sp_nexthop *nh = &nh_grp->nexthops[i];
3989 
3990 		nh->key.fib_nh->nh_flags &= ~RTNH_F_OFFLOAD;
3991 	}
3992 }
3993 
3994 static void
3995 mlxsw_sp_fib6_entry_offload_set(struct mlxsw_sp_fib_entry *fib_entry)
3996 {
3997 	struct mlxsw_sp_fib6_entry *fib6_entry;
3998 	struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
3999 
4000 	fib6_entry = container_of(fib_entry, struct mlxsw_sp_fib6_entry,
4001 				  common);
4002 
4003 	if (fib_entry->type == MLXSW_SP_FIB_ENTRY_TYPE_LOCAL) {
4004 		list_first_entry(&fib6_entry->rt6_list, struct mlxsw_sp_rt6,
4005 				 list)->rt->fib6_nh.nh_flags |= RTNH_F_OFFLOAD;
4006 		return;
4007 	}
4008 
4009 	list_for_each_entry(mlxsw_sp_rt6, &fib6_entry->rt6_list, list) {
4010 		struct mlxsw_sp_nexthop_group *nh_grp = fib_entry->nh_group;
4011 		struct mlxsw_sp_nexthop *nh;
4012 
4013 		nh = mlxsw_sp_rt6_nexthop(nh_grp, mlxsw_sp_rt6);
4014 		if (nh && nh->offloaded)
4015 			mlxsw_sp_rt6->rt->fib6_nh.nh_flags |= RTNH_F_OFFLOAD;
4016 		else
4017 			mlxsw_sp_rt6->rt->fib6_nh.nh_flags &= ~RTNH_F_OFFLOAD;
4018 	}
4019 }
4020 
4021 static void
4022 mlxsw_sp_fib6_entry_offload_unset(struct mlxsw_sp_fib_entry *fib_entry)
4023 {
4024 	struct mlxsw_sp_fib6_entry *fib6_entry;
4025 	struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
4026 
4027 	fib6_entry = container_of(fib_entry, struct mlxsw_sp_fib6_entry,
4028 				  common);
4029 	list_for_each_entry(mlxsw_sp_rt6, &fib6_entry->rt6_list, list) {
4030 		struct fib6_info *rt = mlxsw_sp_rt6->rt;
4031 
4032 		rt->fib6_nh.nh_flags &= ~RTNH_F_OFFLOAD;
4033 	}
4034 }
4035 
4036 static void mlxsw_sp_fib_entry_offload_set(struct mlxsw_sp_fib_entry *fib_entry)
4037 {
4038 	switch (fib_entry->fib_node->fib->proto) {
4039 	case MLXSW_SP_L3_PROTO_IPV4:
4040 		mlxsw_sp_fib4_entry_offload_set(fib_entry);
4041 		break;
4042 	case MLXSW_SP_L3_PROTO_IPV6:
4043 		mlxsw_sp_fib6_entry_offload_set(fib_entry);
4044 		break;
4045 	}
4046 }
4047 
4048 static void
4049 mlxsw_sp_fib_entry_offload_unset(struct mlxsw_sp_fib_entry *fib_entry)
4050 {
4051 	switch (fib_entry->fib_node->fib->proto) {
4052 	case MLXSW_SP_L3_PROTO_IPV4:
4053 		mlxsw_sp_fib4_entry_offload_unset(fib_entry);
4054 		break;
4055 	case MLXSW_SP_L3_PROTO_IPV6:
4056 		mlxsw_sp_fib6_entry_offload_unset(fib_entry);
4057 		break;
4058 	}
4059 }
4060 
4061 static void
4062 mlxsw_sp_fib_entry_offload_refresh(struct mlxsw_sp_fib_entry *fib_entry,
4063 				   enum mlxsw_reg_ralue_op op, int err)
4064 {
4065 	switch (op) {
4066 	case MLXSW_REG_RALUE_OP_WRITE_DELETE:
4067 		return mlxsw_sp_fib_entry_offload_unset(fib_entry);
4068 	case MLXSW_REG_RALUE_OP_WRITE_WRITE:
4069 		if (err)
4070 			return;
4071 		if (mlxsw_sp_fib_entry_should_offload(fib_entry))
4072 			mlxsw_sp_fib_entry_offload_set(fib_entry);
4073 		else
4074 			mlxsw_sp_fib_entry_offload_unset(fib_entry);
4075 		return;
4076 	default:
4077 		return;
4078 	}
4079 }
4080 
4081 static void
4082 mlxsw_sp_fib_entry_ralue_pack(char *ralue_pl,
4083 			      const struct mlxsw_sp_fib_entry *fib_entry,
4084 			      enum mlxsw_reg_ralue_op op)
4085 {
4086 	struct mlxsw_sp_fib *fib = fib_entry->fib_node->fib;
4087 	enum mlxsw_reg_ralxx_protocol proto;
4088 	u32 *p_dip;
4089 
4090 	proto = (enum mlxsw_reg_ralxx_protocol) fib->proto;
4091 
4092 	switch (fib->proto) {
4093 	case MLXSW_SP_L3_PROTO_IPV4:
4094 		p_dip = (u32 *) fib_entry->fib_node->key.addr;
4095 		mlxsw_reg_ralue_pack4(ralue_pl, proto, op, fib->vr->id,
4096 				      fib_entry->fib_node->key.prefix_len,
4097 				      *p_dip);
4098 		break;
4099 	case MLXSW_SP_L3_PROTO_IPV6:
4100 		mlxsw_reg_ralue_pack6(ralue_pl, proto, op, fib->vr->id,
4101 				      fib_entry->fib_node->key.prefix_len,
4102 				      fib_entry->fib_node->key.addr);
4103 		break;
4104 	}
4105 }
4106 
4107 static int mlxsw_sp_fib_entry_op_remote(struct mlxsw_sp *mlxsw_sp,
4108 					struct mlxsw_sp_fib_entry *fib_entry,
4109 					enum mlxsw_reg_ralue_op op)
4110 {
4111 	char ralue_pl[MLXSW_REG_RALUE_LEN];
4112 	enum mlxsw_reg_ralue_trap_action trap_action;
4113 	u16 trap_id = 0;
4114 	u32 adjacency_index = 0;
4115 	u16 ecmp_size = 0;
4116 
4117 	/* In case the nexthop group adjacency index is valid, use it
4118 	 * with provided ECMP size. Otherwise, setup trap and pass
4119 	 * traffic to kernel.
4120 	 */
4121 	if (mlxsw_sp_fib_entry_should_offload(fib_entry)) {
4122 		trap_action = MLXSW_REG_RALUE_TRAP_ACTION_NOP;
4123 		adjacency_index = fib_entry->nh_group->adj_index;
4124 		ecmp_size = fib_entry->nh_group->ecmp_size;
4125 	} else {
4126 		trap_action = MLXSW_REG_RALUE_TRAP_ACTION_TRAP;
4127 		trap_id = MLXSW_TRAP_ID_RTR_INGRESS0;
4128 	}
4129 
4130 	mlxsw_sp_fib_entry_ralue_pack(ralue_pl, fib_entry, op);
4131 	mlxsw_reg_ralue_act_remote_pack(ralue_pl, trap_action, trap_id,
4132 					adjacency_index, ecmp_size);
4133 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralue), ralue_pl);
4134 }
4135 
4136 static int mlxsw_sp_fib_entry_op_local(struct mlxsw_sp *mlxsw_sp,
4137 				       struct mlxsw_sp_fib_entry *fib_entry,
4138 				       enum mlxsw_reg_ralue_op op)
4139 {
4140 	struct mlxsw_sp_rif *rif = fib_entry->nh_group->nh_rif;
4141 	enum mlxsw_reg_ralue_trap_action trap_action;
4142 	char ralue_pl[MLXSW_REG_RALUE_LEN];
4143 	u16 trap_id = 0;
4144 	u16 rif_index = 0;
4145 
4146 	if (mlxsw_sp_fib_entry_should_offload(fib_entry)) {
4147 		trap_action = MLXSW_REG_RALUE_TRAP_ACTION_NOP;
4148 		rif_index = rif->rif_index;
4149 	} else {
4150 		trap_action = MLXSW_REG_RALUE_TRAP_ACTION_TRAP;
4151 		trap_id = MLXSW_TRAP_ID_RTR_INGRESS0;
4152 	}
4153 
4154 	mlxsw_sp_fib_entry_ralue_pack(ralue_pl, fib_entry, op);
4155 	mlxsw_reg_ralue_act_local_pack(ralue_pl, trap_action, trap_id,
4156 				       rif_index);
4157 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralue), ralue_pl);
4158 }
4159 
4160 static int mlxsw_sp_fib_entry_op_trap(struct mlxsw_sp *mlxsw_sp,
4161 				      struct mlxsw_sp_fib_entry *fib_entry,
4162 				      enum mlxsw_reg_ralue_op op)
4163 {
4164 	char ralue_pl[MLXSW_REG_RALUE_LEN];
4165 
4166 	mlxsw_sp_fib_entry_ralue_pack(ralue_pl, fib_entry, op);
4167 	mlxsw_reg_ralue_act_ip2me_pack(ralue_pl);
4168 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralue), ralue_pl);
4169 }
4170 
4171 static int
4172 mlxsw_sp_fib_entry_op_ipip_decap(struct mlxsw_sp *mlxsw_sp,
4173 				 struct mlxsw_sp_fib_entry *fib_entry,
4174 				 enum mlxsw_reg_ralue_op op)
4175 {
4176 	struct mlxsw_sp_ipip_entry *ipip_entry = fib_entry->decap.ipip_entry;
4177 	const struct mlxsw_sp_ipip_ops *ipip_ops;
4178 
4179 	if (WARN_ON(!ipip_entry))
4180 		return -EINVAL;
4181 
4182 	ipip_ops = mlxsw_sp->router->ipip_ops_arr[ipip_entry->ipipt];
4183 	return ipip_ops->fib_entry_op(mlxsw_sp, ipip_entry, op,
4184 				      fib_entry->decap.tunnel_index);
4185 }
4186 
4187 static int mlxsw_sp_fib_entry_op_nve_decap(struct mlxsw_sp *mlxsw_sp,
4188 					   struct mlxsw_sp_fib_entry *fib_entry,
4189 					   enum mlxsw_reg_ralue_op op)
4190 {
4191 	char ralue_pl[MLXSW_REG_RALUE_LEN];
4192 
4193 	mlxsw_sp_fib_entry_ralue_pack(ralue_pl, fib_entry, op);
4194 	mlxsw_reg_ralue_act_ip2me_tun_pack(ralue_pl,
4195 					   fib_entry->decap.tunnel_index);
4196 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralue), ralue_pl);
4197 }
4198 
4199 static int __mlxsw_sp_fib_entry_op(struct mlxsw_sp *mlxsw_sp,
4200 				   struct mlxsw_sp_fib_entry *fib_entry,
4201 				   enum mlxsw_reg_ralue_op op)
4202 {
4203 	switch (fib_entry->type) {
4204 	case MLXSW_SP_FIB_ENTRY_TYPE_REMOTE:
4205 		return mlxsw_sp_fib_entry_op_remote(mlxsw_sp, fib_entry, op);
4206 	case MLXSW_SP_FIB_ENTRY_TYPE_LOCAL:
4207 		return mlxsw_sp_fib_entry_op_local(mlxsw_sp, fib_entry, op);
4208 	case MLXSW_SP_FIB_ENTRY_TYPE_TRAP:
4209 		return mlxsw_sp_fib_entry_op_trap(mlxsw_sp, fib_entry, op);
4210 	case MLXSW_SP_FIB_ENTRY_TYPE_IPIP_DECAP:
4211 		return mlxsw_sp_fib_entry_op_ipip_decap(mlxsw_sp,
4212 							fib_entry, op);
4213 	case MLXSW_SP_FIB_ENTRY_TYPE_NVE_DECAP:
4214 		return mlxsw_sp_fib_entry_op_nve_decap(mlxsw_sp, fib_entry, op);
4215 	}
4216 	return -EINVAL;
4217 }
4218 
4219 static int mlxsw_sp_fib_entry_op(struct mlxsw_sp *mlxsw_sp,
4220 				 struct mlxsw_sp_fib_entry *fib_entry,
4221 				 enum mlxsw_reg_ralue_op op)
4222 {
4223 	int err = __mlxsw_sp_fib_entry_op(mlxsw_sp, fib_entry, op);
4224 
4225 	mlxsw_sp_fib_entry_offload_refresh(fib_entry, op, err);
4226 
4227 	return err;
4228 }
4229 
4230 static int mlxsw_sp_fib_entry_update(struct mlxsw_sp *mlxsw_sp,
4231 				     struct mlxsw_sp_fib_entry *fib_entry)
4232 {
4233 	return mlxsw_sp_fib_entry_op(mlxsw_sp, fib_entry,
4234 				     MLXSW_REG_RALUE_OP_WRITE_WRITE);
4235 }
4236 
4237 static int mlxsw_sp_fib_entry_del(struct mlxsw_sp *mlxsw_sp,
4238 				  struct mlxsw_sp_fib_entry *fib_entry)
4239 {
4240 	return mlxsw_sp_fib_entry_op(mlxsw_sp, fib_entry,
4241 				     MLXSW_REG_RALUE_OP_WRITE_DELETE);
4242 }
4243 
4244 static int
4245 mlxsw_sp_fib4_entry_type_set(struct mlxsw_sp *mlxsw_sp,
4246 			     const struct fib_entry_notifier_info *fen_info,
4247 			     struct mlxsw_sp_fib_entry *fib_entry)
4248 {
4249 	union mlxsw_sp_l3addr dip = { .addr4 = htonl(fen_info->dst) };
4250 	u32 tb_id = mlxsw_sp_fix_tb_id(fen_info->tb_id);
4251 	struct net_device *dev = fen_info->fi->fib_dev;
4252 	struct mlxsw_sp_ipip_entry *ipip_entry;
4253 	struct fib_info *fi = fen_info->fi;
4254 
4255 	switch (fen_info->type) {
4256 	case RTN_LOCAL:
4257 		ipip_entry = mlxsw_sp_ipip_entry_find_by_decap(mlxsw_sp, dev,
4258 						 MLXSW_SP_L3_PROTO_IPV4, dip);
4259 		if (ipip_entry && ipip_entry->ol_dev->flags & IFF_UP) {
4260 			fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_IPIP_DECAP;
4261 			return mlxsw_sp_fib_entry_decap_init(mlxsw_sp,
4262 							     fib_entry,
4263 							     ipip_entry);
4264 		}
4265 		if (mlxsw_sp_nve_ipv4_route_is_decap(mlxsw_sp, tb_id,
4266 						     dip.addr4)) {
4267 			u32 t_index;
4268 
4269 			t_index = mlxsw_sp_nve_decap_tunnel_index_get(mlxsw_sp);
4270 			fib_entry->decap.tunnel_index = t_index;
4271 			fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_NVE_DECAP;
4272 			return 0;
4273 		}
4274 		/* fall through */
4275 	case RTN_BROADCAST:
4276 		fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_TRAP;
4277 		return 0;
4278 	case RTN_UNREACHABLE: /* fall through */
4279 	case RTN_BLACKHOLE: /* fall through */
4280 	case RTN_PROHIBIT:
4281 		/* Packets hitting these routes need to be trapped, but
4282 		 * can do so with a lower priority than packets directed
4283 		 * at the host, so use action type local instead of trap.
4284 		 */
4285 		fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_LOCAL;
4286 		return 0;
4287 	case RTN_UNICAST:
4288 		if (mlxsw_sp_fi_is_gateway(mlxsw_sp, fi))
4289 			fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_REMOTE;
4290 		else
4291 			fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_LOCAL;
4292 		return 0;
4293 	default:
4294 		return -EINVAL;
4295 	}
4296 }
4297 
4298 static struct mlxsw_sp_fib4_entry *
4299 mlxsw_sp_fib4_entry_create(struct mlxsw_sp *mlxsw_sp,
4300 			   struct mlxsw_sp_fib_node *fib_node,
4301 			   const struct fib_entry_notifier_info *fen_info)
4302 {
4303 	struct mlxsw_sp_fib4_entry *fib4_entry;
4304 	struct mlxsw_sp_fib_entry *fib_entry;
4305 	int err;
4306 
4307 	fib4_entry = kzalloc(sizeof(*fib4_entry), GFP_KERNEL);
4308 	if (!fib4_entry)
4309 		return ERR_PTR(-ENOMEM);
4310 	fib_entry = &fib4_entry->common;
4311 
4312 	err = mlxsw_sp_fib4_entry_type_set(mlxsw_sp, fen_info, fib_entry);
4313 	if (err)
4314 		goto err_fib4_entry_type_set;
4315 
4316 	err = mlxsw_sp_nexthop4_group_get(mlxsw_sp, fib_entry, fen_info->fi);
4317 	if (err)
4318 		goto err_nexthop4_group_get;
4319 
4320 	fib4_entry->prio = fen_info->fi->fib_priority;
4321 	fib4_entry->tb_id = fen_info->tb_id;
4322 	fib4_entry->type = fen_info->type;
4323 	fib4_entry->tos = fen_info->tos;
4324 
4325 	fib_entry->fib_node = fib_node;
4326 
4327 	return fib4_entry;
4328 
4329 err_nexthop4_group_get:
4330 err_fib4_entry_type_set:
4331 	kfree(fib4_entry);
4332 	return ERR_PTR(err);
4333 }
4334 
4335 static void mlxsw_sp_fib4_entry_destroy(struct mlxsw_sp *mlxsw_sp,
4336 					struct mlxsw_sp_fib4_entry *fib4_entry)
4337 {
4338 	mlxsw_sp_nexthop4_group_put(mlxsw_sp, &fib4_entry->common);
4339 	kfree(fib4_entry);
4340 }
4341 
4342 static struct mlxsw_sp_fib4_entry *
4343 mlxsw_sp_fib4_entry_lookup(struct mlxsw_sp *mlxsw_sp,
4344 			   const struct fib_entry_notifier_info *fen_info)
4345 {
4346 	struct mlxsw_sp_fib4_entry *fib4_entry;
4347 	struct mlxsw_sp_fib_node *fib_node;
4348 	struct mlxsw_sp_fib *fib;
4349 	struct mlxsw_sp_vr *vr;
4350 
4351 	vr = mlxsw_sp_vr_find(mlxsw_sp, fen_info->tb_id);
4352 	if (!vr)
4353 		return NULL;
4354 	fib = mlxsw_sp_vr_fib(vr, MLXSW_SP_L3_PROTO_IPV4);
4355 
4356 	fib_node = mlxsw_sp_fib_node_lookup(fib, &fen_info->dst,
4357 					    sizeof(fen_info->dst),
4358 					    fen_info->dst_len);
4359 	if (!fib_node)
4360 		return NULL;
4361 
4362 	list_for_each_entry(fib4_entry, &fib_node->entry_list, common.list) {
4363 		if (fib4_entry->tb_id == fen_info->tb_id &&
4364 		    fib4_entry->tos == fen_info->tos &&
4365 		    fib4_entry->type == fen_info->type &&
4366 		    mlxsw_sp_nexthop4_group_fi(fib4_entry->common.nh_group) ==
4367 		    fen_info->fi) {
4368 			return fib4_entry;
4369 		}
4370 	}
4371 
4372 	return NULL;
4373 }
4374 
4375 static const struct rhashtable_params mlxsw_sp_fib_ht_params = {
4376 	.key_offset = offsetof(struct mlxsw_sp_fib_node, key),
4377 	.head_offset = offsetof(struct mlxsw_sp_fib_node, ht_node),
4378 	.key_len = sizeof(struct mlxsw_sp_fib_key),
4379 	.automatic_shrinking = true,
4380 };
4381 
4382 static int mlxsw_sp_fib_node_insert(struct mlxsw_sp_fib *fib,
4383 				    struct mlxsw_sp_fib_node *fib_node)
4384 {
4385 	return rhashtable_insert_fast(&fib->ht, &fib_node->ht_node,
4386 				      mlxsw_sp_fib_ht_params);
4387 }
4388 
4389 static void mlxsw_sp_fib_node_remove(struct mlxsw_sp_fib *fib,
4390 				     struct mlxsw_sp_fib_node *fib_node)
4391 {
4392 	rhashtable_remove_fast(&fib->ht, &fib_node->ht_node,
4393 			       mlxsw_sp_fib_ht_params);
4394 }
4395 
4396 static struct mlxsw_sp_fib_node *
4397 mlxsw_sp_fib_node_lookup(struct mlxsw_sp_fib *fib, const void *addr,
4398 			 size_t addr_len, unsigned char prefix_len)
4399 {
4400 	struct mlxsw_sp_fib_key key;
4401 
4402 	memset(&key, 0, sizeof(key));
4403 	memcpy(key.addr, addr, addr_len);
4404 	key.prefix_len = prefix_len;
4405 	return rhashtable_lookup_fast(&fib->ht, &key, mlxsw_sp_fib_ht_params);
4406 }
4407 
4408 static struct mlxsw_sp_fib_node *
4409 mlxsw_sp_fib_node_create(struct mlxsw_sp_fib *fib, const void *addr,
4410 			 size_t addr_len, unsigned char prefix_len)
4411 {
4412 	struct mlxsw_sp_fib_node *fib_node;
4413 
4414 	fib_node = kzalloc(sizeof(*fib_node), GFP_KERNEL);
4415 	if (!fib_node)
4416 		return NULL;
4417 
4418 	INIT_LIST_HEAD(&fib_node->entry_list);
4419 	list_add(&fib_node->list, &fib->node_list);
4420 	memcpy(fib_node->key.addr, addr, addr_len);
4421 	fib_node->key.prefix_len = prefix_len;
4422 
4423 	return fib_node;
4424 }
4425 
4426 static void mlxsw_sp_fib_node_destroy(struct mlxsw_sp_fib_node *fib_node)
4427 {
4428 	list_del(&fib_node->list);
4429 	WARN_ON(!list_empty(&fib_node->entry_list));
4430 	kfree(fib_node);
4431 }
4432 
4433 static bool
4434 mlxsw_sp_fib_node_entry_is_first(const struct mlxsw_sp_fib_node *fib_node,
4435 				 const struct mlxsw_sp_fib_entry *fib_entry)
4436 {
4437 	return list_first_entry(&fib_node->entry_list,
4438 				struct mlxsw_sp_fib_entry, list) == fib_entry;
4439 }
4440 
4441 static int mlxsw_sp_fib_lpm_tree_link(struct mlxsw_sp *mlxsw_sp,
4442 				      struct mlxsw_sp_fib_node *fib_node)
4443 {
4444 	struct mlxsw_sp_prefix_usage req_prefix_usage;
4445 	struct mlxsw_sp_fib *fib = fib_node->fib;
4446 	struct mlxsw_sp_lpm_tree *lpm_tree;
4447 	int err;
4448 
4449 	lpm_tree = mlxsw_sp->router->lpm.proto_trees[fib->proto];
4450 	if (lpm_tree->prefix_ref_count[fib_node->key.prefix_len] != 0)
4451 		goto out;
4452 
4453 	mlxsw_sp_prefix_usage_cpy(&req_prefix_usage, &lpm_tree->prefix_usage);
4454 	mlxsw_sp_prefix_usage_set(&req_prefix_usage, fib_node->key.prefix_len);
4455 	lpm_tree = mlxsw_sp_lpm_tree_get(mlxsw_sp, &req_prefix_usage,
4456 					 fib->proto);
4457 	if (IS_ERR(lpm_tree))
4458 		return PTR_ERR(lpm_tree);
4459 
4460 	err = mlxsw_sp_vrs_lpm_tree_replace(mlxsw_sp, fib, lpm_tree);
4461 	if (err)
4462 		goto err_lpm_tree_replace;
4463 
4464 out:
4465 	lpm_tree->prefix_ref_count[fib_node->key.prefix_len]++;
4466 	return 0;
4467 
4468 err_lpm_tree_replace:
4469 	mlxsw_sp_lpm_tree_put(mlxsw_sp, lpm_tree);
4470 	return err;
4471 }
4472 
4473 static void mlxsw_sp_fib_lpm_tree_unlink(struct mlxsw_sp *mlxsw_sp,
4474 					 struct mlxsw_sp_fib_node *fib_node)
4475 {
4476 	struct mlxsw_sp_lpm_tree *lpm_tree = fib_node->fib->lpm_tree;
4477 	struct mlxsw_sp_prefix_usage req_prefix_usage;
4478 	struct mlxsw_sp_fib *fib = fib_node->fib;
4479 	int err;
4480 
4481 	if (--lpm_tree->prefix_ref_count[fib_node->key.prefix_len] != 0)
4482 		return;
4483 	/* Try to construct a new LPM tree from the current prefix usage
4484 	 * minus the unused one. If we fail, continue using the old one.
4485 	 */
4486 	mlxsw_sp_prefix_usage_cpy(&req_prefix_usage, &lpm_tree->prefix_usage);
4487 	mlxsw_sp_prefix_usage_clear(&req_prefix_usage,
4488 				    fib_node->key.prefix_len);
4489 	lpm_tree = mlxsw_sp_lpm_tree_get(mlxsw_sp, &req_prefix_usage,
4490 					 fib->proto);
4491 	if (IS_ERR(lpm_tree))
4492 		return;
4493 
4494 	err = mlxsw_sp_vrs_lpm_tree_replace(mlxsw_sp, fib, lpm_tree);
4495 	if (err)
4496 		goto err_lpm_tree_replace;
4497 
4498 	return;
4499 
4500 err_lpm_tree_replace:
4501 	mlxsw_sp_lpm_tree_put(mlxsw_sp, lpm_tree);
4502 }
4503 
4504 static int mlxsw_sp_fib_node_init(struct mlxsw_sp *mlxsw_sp,
4505 				  struct mlxsw_sp_fib_node *fib_node,
4506 				  struct mlxsw_sp_fib *fib)
4507 {
4508 	int err;
4509 
4510 	err = mlxsw_sp_fib_node_insert(fib, fib_node);
4511 	if (err)
4512 		return err;
4513 	fib_node->fib = fib;
4514 
4515 	err = mlxsw_sp_fib_lpm_tree_link(mlxsw_sp, fib_node);
4516 	if (err)
4517 		goto err_fib_lpm_tree_link;
4518 
4519 	return 0;
4520 
4521 err_fib_lpm_tree_link:
4522 	fib_node->fib = NULL;
4523 	mlxsw_sp_fib_node_remove(fib, fib_node);
4524 	return err;
4525 }
4526 
4527 static void mlxsw_sp_fib_node_fini(struct mlxsw_sp *mlxsw_sp,
4528 				   struct mlxsw_sp_fib_node *fib_node)
4529 {
4530 	struct mlxsw_sp_fib *fib = fib_node->fib;
4531 
4532 	mlxsw_sp_fib_lpm_tree_unlink(mlxsw_sp, fib_node);
4533 	fib_node->fib = NULL;
4534 	mlxsw_sp_fib_node_remove(fib, fib_node);
4535 }
4536 
4537 static struct mlxsw_sp_fib_node *
4538 mlxsw_sp_fib_node_get(struct mlxsw_sp *mlxsw_sp, u32 tb_id, const void *addr,
4539 		      size_t addr_len, unsigned char prefix_len,
4540 		      enum mlxsw_sp_l3proto proto)
4541 {
4542 	struct mlxsw_sp_fib_node *fib_node;
4543 	struct mlxsw_sp_fib *fib;
4544 	struct mlxsw_sp_vr *vr;
4545 	int err;
4546 
4547 	vr = mlxsw_sp_vr_get(mlxsw_sp, tb_id, NULL);
4548 	if (IS_ERR(vr))
4549 		return ERR_CAST(vr);
4550 	fib = mlxsw_sp_vr_fib(vr, proto);
4551 
4552 	fib_node = mlxsw_sp_fib_node_lookup(fib, addr, addr_len, prefix_len);
4553 	if (fib_node)
4554 		return fib_node;
4555 
4556 	fib_node = mlxsw_sp_fib_node_create(fib, addr, addr_len, prefix_len);
4557 	if (!fib_node) {
4558 		err = -ENOMEM;
4559 		goto err_fib_node_create;
4560 	}
4561 
4562 	err = mlxsw_sp_fib_node_init(mlxsw_sp, fib_node, fib);
4563 	if (err)
4564 		goto err_fib_node_init;
4565 
4566 	return fib_node;
4567 
4568 err_fib_node_init:
4569 	mlxsw_sp_fib_node_destroy(fib_node);
4570 err_fib_node_create:
4571 	mlxsw_sp_vr_put(mlxsw_sp, vr);
4572 	return ERR_PTR(err);
4573 }
4574 
4575 static void mlxsw_sp_fib_node_put(struct mlxsw_sp *mlxsw_sp,
4576 				  struct mlxsw_sp_fib_node *fib_node)
4577 {
4578 	struct mlxsw_sp_vr *vr = fib_node->fib->vr;
4579 
4580 	if (!list_empty(&fib_node->entry_list))
4581 		return;
4582 	mlxsw_sp_fib_node_fini(mlxsw_sp, fib_node);
4583 	mlxsw_sp_fib_node_destroy(fib_node);
4584 	mlxsw_sp_vr_put(mlxsw_sp, vr);
4585 }
4586 
4587 static struct mlxsw_sp_fib4_entry *
4588 mlxsw_sp_fib4_node_entry_find(const struct mlxsw_sp_fib_node *fib_node,
4589 			      const struct mlxsw_sp_fib4_entry *new4_entry)
4590 {
4591 	struct mlxsw_sp_fib4_entry *fib4_entry;
4592 
4593 	list_for_each_entry(fib4_entry, &fib_node->entry_list, common.list) {
4594 		if (fib4_entry->tb_id > new4_entry->tb_id)
4595 			continue;
4596 		if (fib4_entry->tb_id != new4_entry->tb_id)
4597 			break;
4598 		if (fib4_entry->tos > new4_entry->tos)
4599 			continue;
4600 		if (fib4_entry->prio >= new4_entry->prio ||
4601 		    fib4_entry->tos < new4_entry->tos)
4602 			return fib4_entry;
4603 	}
4604 
4605 	return NULL;
4606 }
4607 
4608 static int
4609 mlxsw_sp_fib4_node_list_append(struct mlxsw_sp_fib4_entry *fib4_entry,
4610 			       struct mlxsw_sp_fib4_entry *new4_entry)
4611 {
4612 	struct mlxsw_sp_fib_node *fib_node;
4613 
4614 	if (WARN_ON(!fib4_entry))
4615 		return -EINVAL;
4616 
4617 	fib_node = fib4_entry->common.fib_node;
4618 	list_for_each_entry_from(fib4_entry, &fib_node->entry_list,
4619 				 common.list) {
4620 		if (fib4_entry->tb_id != new4_entry->tb_id ||
4621 		    fib4_entry->tos != new4_entry->tos ||
4622 		    fib4_entry->prio != new4_entry->prio)
4623 			break;
4624 	}
4625 
4626 	list_add_tail(&new4_entry->common.list, &fib4_entry->common.list);
4627 	return 0;
4628 }
4629 
4630 static int
4631 mlxsw_sp_fib4_node_list_insert(struct mlxsw_sp_fib4_entry *new4_entry,
4632 			       bool replace, bool append)
4633 {
4634 	struct mlxsw_sp_fib_node *fib_node = new4_entry->common.fib_node;
4635 	struct mlxsw_sp_fib4_entry *fib4_entry;
4636 
4637 	fib4_entry = mlxsw_sp_fib4_node_entry_find(fib_node, new4_entry);
4638 
4639 	if (append)
4640 		return mlxsw_sp_fib4_node_list_append(fib4_entry, new4_entry);
4641 	if (replace && WARN_ON(!fib4_entry))
4642 		return -EINVAL;
4643 
4644 	/* Insert new entry before replaced one, so that we can later
4645 	 * remove the second.
4646 	 */
4647 	if (fib4_entry) {
4648 		list_add_tail(&new4_entry->common.list,
4649 			      &fib4_entry->common.list);
4650 	} else {
4651 		struct mlxsw_sp_fib4_entry *last;
4652 
4653 		list_for_each_entry(last, &fib_node->entry_list, common.list) {
4654 			if (new4_entry->tb_id > last->tb_id)
4655 				break;
4656 			fib4_entry = last;
4657 		}
4658 
4659 		if (fib4_entry)
4660 			list_add(&new4_entry->common.list,
4661 				 &fib4_entry->common.list);
4662 		else
4663 			list_add(&new4_entry->common.list,
4664 				 &fib_node->entry_list);
4665 	}
4666 
4667 	return 0;
4668 }
4669 
4670 static void
4671 mlxsw_sp_fib4_node_list_remove(struct mlxsw_sp_fib4_entry *fib4_entry)
4672 {
4673 	list_del(&fib4_entry->common.list);
4674 }
4675 
4676 static int mlxsw_sp_fib_node_entry_add(struct mlxsw_sp *mlxsw_sp,
4677 				       struct mlxsw_sp_fib_entry *fib_entry)
4678 {
4679 	struct mlxsw_sp_fib_node *fib_node = fib_entry->fib_node;
4680 
4681 	if (!mlxsw_sp_fib_node_entry_is_first(fib_node, fib_entry))
4682 		return 0;
4683 
4684 	/* To prevent packet loss, overwrite the previously offloaded
4685 	 * entry.
4686 	 */
4687 	if (!list_is_singular(&fib_node->entry_list)) {
4688 		enum mlxsw_reg_ralue_op op = MLXSW_REG_RALUE_OP_WRITE_DELETE;
4689 		struct mlxsw_sp_fib_entry *n = list_next_entry(fib_entry, list);
4690 
4691 		mlxsw_sp_fib_entry_offload_refresh(n, op, 0);
4692 	}
4693 
4694 	return mlxsw_sp_fib_entry_update(mlxsw_sp, fib_entry);
4695 }
4696 
4697 static void mlxsw_sp_fib_node_entry_del(struct mlxsw_sp *mlxsw_sp,
4698 					struct mlxsw_sp_fib_entry *fib_entry)
4699 {
4700 	struct mlxsw_sp_fib_node *fib_node = fib_entry->fib_node;
4701 
4702 	if (!mlxsw_sp_fib_node_entry_is_first(fib_node, fib_entry))
4703 		return;
4704 
4705 	/* Promote the next entry by overwriting the deleted entry */
4706 	if (!list_is_singular(&fib_node->entry_list)) {
4707 		struct mlxsw_sp_fib_entry *n = list_next_entry(fib_entry, list);
4708 		enum mlxsw_reg_ralue_op op = MLXSW_REG_RALUE_OP_WRITE_DELETE;
4709 
4710 		mlxsw_sp_fib_entry_update(mlxsw_sp, n);
4711 		mlxsw_sp_fib_entry_offload_refresh(fib_entry, op, 0);
4712 		return;
4713 	}
4714 
4715 	mlxsw_sp_fib_entry_del(mlxsw_sp, fib_entry);
4716 }
4717 
4718 static int mlxsw_sp_fib4_node_entry_link(struct mlxsw_sp *mlxsw_sp,
4719 					 struct mlxsw_sp_fib4_entry *fib4_entry,
4720 					 bool replace, bool append)
4721 {
4722 	int err;
4723 
4724 	err = mlxsw_sp_fib4_node_list_insert(fib4_entry, replace, append);
4725 	if (err)
4726 		return err;
4727 
4728 	err = mlxsw_sp_fib_node_entry_add(mlxsw_sp, &fib4_entry->common);
4729 	if (err)
4730 		goto err_fib_node_entry_add;
4731 
4732 	return 0;
4733 
4734 err_fib_node_entry_add:
4735 	mlxsw_sp_fib4_node_list_remove(fib4_entry);
4736 	return err;
4737 }
4738 
4739 static void
4740 mlxsw_sp_fib4_node_entry_unlink(struct mlxsw_sp *mlxsw_sp,
4741 				struct mlxsw_sp_fib4_entry *fib4_entry)
4742 {
4743 	mlxsw_sp_fib_node_entry_del(mlxsw_sp, &fib4_entry->common);
4744 	mlxsw_sp_fib4_node_list_remove(fib4_entry);
4745 
4746 	if (fib4_entry->common.type == MLXSW_SP_FIB_ENTRY_TYPE_IPIP_DECAP)
4747 		mlxsw_sp_fib_entry_decap_fini(mlxsw_sp, &fib4_entry->common);
4748 }
4749 
4750 static void mlxsw_sp_fib4_entry_replace(struct mlxsw_sp *mlxsw_sp,
4751 					struct mlxsw_sp_fib4_entry *fib4_entry,
4752 					bool replace)
4753 {
4754 	struct mlxsw_sp_fib_node *fib_node = fib4_entry->common.fib_node;
4755 	struct mlxsw_sp_fib4_entry *replaced;
4756 
4757 	if (!replace)
4758 		return;
4759 
4760 	/* We inserted the new entry before replaced one */
4761 	replaced = list_next_entry(fib4_entry, common.list);
4762 
4763 	mlxsw_sp_fib4_node_entry_unlink(mlxsw_sp, replaced);
4764 	mlxsw_sp_fib4_entry_destroy(mlxsw_sp, replaced);
4765 	mlxsw_sp_fib_node_put(mlxsw_sp, fib_node);
4766 }
4767 
4768 static int
4769 mlxsw_sp_router_fib4_add(struct mlxsw_sp *mlxsw_sp,
4770 			 const struct fib_entry_notifier_info *fen_info,
4771 			 bool replace, bool append)
4772 {
4773 	struct mlxsw_sp_fib4_entry *fib4_entry;
4774 	struct mlxsw_sp_fib_node *fib_node;
4775 	int err;
4776 
4777 	if (mlxsw_sp->router->aborted)
4778 		return 0;
4779 
4780 	fib_node = mlxsw_sp_fib_node_get(mlxsw_sp, fen_info->tb_id,
4781 					 &fen_info->dst, sizeof(fen_info->dst),
4782 					 fen_info->dst_len,
4783 					 MLXSW_SP_L3_PROTO_IPV4);
4784 	if (IS_ERR(fib_node)) {
4785 		dev_warn(mlxsw_sp->bus_info->dev, "Failed to get FIB node\n");
4786 		return PTR_ERR(fib_node);
4787 	}
4788 
4789 	fib4_entry = mlxsw_sp_fib4_entry_create(mlxsw_sp, fib_node, fen_info);
4790 	if (IS_ERR(fib4_entry)) {
4791 		dev_warn(mlxsw_sp->bus_info->dev, "Failed to create FIB entry\n");
4792 		err = PTR_ERR(fib4_entry);
4793 		goto err_fib4_entry_create;
4794 	}
4795 
4796 	err = mlxsw_sp_fib4_node_entry_link(mlxsw_sp, fib4_entry, replace,
4797 					    append);
4798 	if (err) {
4799 		dev_warn(mlxsw_sp->bus_info->dev, "Failed to link FIB entry to node\n");
4800 		goto err_fib4_node_entry_link;
4801 	}
4802 
4803 	mlxsw_sp_fib4_entry_replace(mlxsw_sp, fib4_entry, replace);
4804 
4805 	return 0;
4806 
4807 err_fib4_node_entry_link:
4808 	mlxsw_sp_fib4_entry_destroy(mlxsw_sp, fib4_entry);
4809 err_fib4_entry_create:
4810 	mlxsw_sp_fib_node_put(mlxsw_sp, fib_node);
4811 	return err;
4812 }
4813 
4814 static void mlxsw_sp_router_fib4_del(struct mlxsw_sp *mlxsw_sp,
4815 				     struct fib_entry_notifier_info *fen_info)
4816 {
4817 	struct mlxsw_sp_fib4_entry *fib4_entry;
4818 	struct mlxsw_sp_fib_node *fib_node;
4819 
4820 	if (mlxsw_sp->router->aborted)
4821 		return;
4822 
4823 	fib4_entry = mlxsw_sp_fib4_entry_lookup(mlxsw_sp, fen_info);
4824 	if (WARN_ON(!fib4_entry))
4825 		return;
4826 	fib_node = fib4_entry->common.fib_node;
4827 
4828 	mlxsw_sp_fib4_node_entry_unlink(mlxsw_sp, fib4_entry);
4829 	mlxsw_sp_fib4_entry_destroy(mlxsw_sp, fib4_entry);
4830 	mlxsw_sp_fib_node_put(mlxsw_sp, fib_node);
4831 }
4832 
4833 static bool mlxsw_sp_fib6_rt_should_ignore(const struct fib6_info *rt)
4834 {
4835 	/* Packets with link-local destination IP arriving to the router
4836 	 * are trapped to the CPU, so no need to program specific routes
4837 	 * for them.
4838 	 */
4839 	if (ipv6_addr_type(&rt->fib6_dst.addr) & IPV6_ADDR_LINKLOCAL)
4840 		return true;
4841 
4842 	/* Multicast routes aren't supported, so ignore them. Neighbour
4843 	 * Discovery packets are specifically trapped.
4844 	 */
4845 	if (ipv6_addr_type(&rt->fib6_dst.addr) & IPV6_ADDR_MULTICAST)
4846 		return true;
4847 
4848 	/* Cloned routes are irrelevant in the forwarding path. */
4849 	if (rt->fib6_flags & RTF_CACHE)
4850 		return true;
4851 
4852 	return false;
4853 }
4854 
4855 static struct mlxsw_sp_rt6 *mlxsw_sp_rt6_create(struct fib6_info *rt)
4856 {
4857 	struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
4858 
4859 	mlxsw_sp_rt6 = kzalloc(sizeof(*mlxsw_sp_rt6), GFP_KERNEL);
4860 	if (!mlxsw_sp_rt6)
4861 		return ERR_PTR(-ENOMEM);
4862 
4863 	/* In case of route replace, replaced route is deleted with
4864 	 * no notification. Take reference to prevent accessing freed
4865 	 * memory.
4866 	 */
4867 	mlxsw_sp_rt6->rt = rt;
4868 	fib6_info_hold(rt);
4869 
4870 	return mlxsw_sp_rt6;
4871 }
4872 
4873 #if IS_ENABLED(CONFIG_IPV6)
4874 static void mlxsw_sp_rt6_release(struct fib6_info *rt)
4875 {
4876 	fib6_info_release(rt);
4877 }
4878 #else
4879 static void mlxsw_sp_rt6_release(struct fib6_info *rt)
4880 {
4881 }
4882 #endif
4883 
4884 static void mlxsw_sp_rt6_destroy(struct mlxsw_sp_rt6 *mlxsw_sp_rt6)
4885 {
4886 	mlxsw_sp_rt6_release(mlxsw_sp_rt6->rt);
4887 	kfree(mlxsw_sp_rt6);
4888 }
4889 
4890 static bool mlxsw_sp_fib6_rt_can_mp(const struct fib6_info *rt)
4891 {
4892 	/* RTF_CACHE routes are ignored */
4893 	return (rt->fib6_flags & (RTF_GATEWAY | RTF_ADDRCONF)) == RTF_GATEWAY;
4894 }
4895 
4896 static struct fib6_info *
4897 mlxsw_sp_fib6_entry_rt(const struct mlxsw_sp_fib6_entry *fib6_entry)
4898 {
4899 	return list_first_entry(&fib6_entry->rt6_list, struct mlxsw_sp_rt6,
4900 				list)->rt;
4901 }
4902 
4903 static struct mlxsw_sp_fib6_entry *
4904 mlxsw_sp_fib6_node_mp_entry_find(const struct mlxsw_sp_fib_node *fib_node,
4905 				 const struct fib6_info *nrt, bool replace)
4906 {
4907 	struct mlxsw_sp_fib6_entry *fib6_entry;
4908 
4909 	if (!mlxsw_sp_fib6_rt_can_mp(nrt) || replace)
4910 		return NULL;
4911 
4912 	list_for_each_entry(fib6_entry, &fib_node->entry_list, common.list) {
4913 		struct fib6_info *rt = mlxsw_sp_fib6_entry_rt(fib6_entry);
4914 
4915 		/* RT6_TABLE_LOCAL and RT6_TABLE_MAIN share the same
4916 		 * virtual router.
4917 		 */
4918 		if (rt->fib6_table->tb6_id > nrt->fib6_table->tb6_id)
4919 			continue;
4920 		if (rt->fib6_table->tb6_id != nrt->fib6_table->tb6_id)
4921 			break;
4922 		if (rt->fib6_metric < nrt->fib6_metric)
4923 			continue;
4924 		if (rt->fib6_metric == nrt->fib6_metric &&
4925 		    mlxsw_sp_fib6_rt_can_mp(rt))
4926 			return fib6_entry;
4927 		if (rt->fib6_metric > nrt->fib6_metric)
4928 			break;
4929 	}
4930 
4931 	return NULL;
4932 }
4933 
4934 static struct mlxsw_sp_rt6 *
4935 mlxsw_sp_fib6_entry_rt_find(const struct mlxsw_sp_fib6_entry *fib6_entry,
4936 			    const struct fib6_info *rt)
4937 {
4938 	struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
4939 
4940 	list_for_each_entry(mlxsw_sp_rt6, &fib6_entry->rt6_list, list) {
4941 		if (mlxsw_sp_rt6->rt == rt)
4942 			return mlxsw_sp_rt6;
4943 	}
4944 
4945 	return NULL;
4946 }
4947 
4948 static bool mlxsw_sp_nexthop6_ipip_type(const struct mlxsw_sp *mlxsw_sp,
4949 					const struct fib6_info *rt,
4950 					enum mlxsw_sp_ipip_type *ret)
4951 {
4952 	return rt->fib6_nh.nh_dev &&
4953 	       mlxsw_sp_netdev_ipip_type(mlxsw_sp, rt->fib6_nh.nh_dev, ret);
4954 }
4955 
4956 static int mlxsw_sp_nexthop6_type_init(struct mlxsw_sp *mlxsw_sp,
4957 				       struct mlxsw_sp_nexthop_group *nh_grp,
4958 				       struct mlxsw_sp_nexthop *nh,
4959 				       const struct fib6_info *rt)
4960 {
4961 	const struct mlxsw_sp_ipip_ops *ipip_ops;
4962 	struct mlxsw_sp_ipip_entry *ipip_entry;
4963 	struct net_device *dev = rt->fib6_nh.nh_dev;
4964 	struct mlxsw_sp_rif *rif;
4965 	int err;
4966 
4967 	ipip_entry = mlxsw_sp_ipip_entry_find_by_ol_dev(mlxsw_sp, dev);
4968 	if (ipip_entry) {
4969 		ipip_ops = mlxsw_sp->router->ipip_ops_arr[ipip_entry->ipipt];
4970 		if (ipip_ops->can_offload(mlxsw_sp, dev,
4971 					  MLXSW_SP_L3_PROTO_IPV6)) {
4972 			nh->type = MLXSW_SP_NEXTHOP_TYPE_IPIP;
4973 			mlxsw_sp_nexthop_ipip_init(mlxsw_sp, nh, ipip_entry);
4974 			return 0;
4975 		}
4976 	}
4977 
4978 	nh->type = MLXSW_SP_NEXTHOP_TYPE_ETH;
4979 	rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev);
4980 	if (!rif)
4981 		return 0;
4982 	mlxsw_sp_nexthop_rif_init(nh, rif);
4983 
4984 	err = mlxsw_sp_nexthop_neigh_init(mlxsw_sp, nh);
4985 	if (err)
4986 		goto err_nexthop_neigh_init;
4987 
4988 	return 0;
4989 
4990 err_nexthop_neigh_init:
4991 	mlxsw_sp_nexthop_rif_fini(nh);
4992 	return err;
4993 }
4994 
4995 static void mlxsw_sp_nexthop6_type_fini(struct mlxsw_sp *mlxsw_sp,
4996 					struct mlxsw_sp_nexthop *nh)
4997 {
4998 	mlxsw_sp_nexthop_type_fini(mlxsw_sp, nh);
4999 }
5000 
5001 static int mlxsw_sp_nexthop6_init(struct mlxsw_sp *mlxsw_sp,
5002 				  struct mlxsw_sp_nexthop_group *nh_grp,
5003 				  struct mlxsw_sp_nexthop *nh,
5004 				  const struct fib6_info *rt)
5005 {
5006 	struct net_device *dev = rt->fib6_nh.nh_dev;
5007 
5008 	nh->nh_grp = nh_grp;
5009 	nh->nh_weight = rt->fib6_nh.nh_weight;
5010 	memcpy(&nh->gw_addr, &rt->fib6_nh.nh_gw, sizeof(nh->gw_addr));
5011 	mlxsw_sp_nexthop_counter_alloc(mlxsw_sp, nh);
5012 
5013 	list_add_tail(&nh->router_list_node, &mlxsw_sp->router->nexthop_list);
5014 
5015 	if (!dev)
5016 		return 0;
5017 	nh->ifindex = dev->ifindex;
5018 
5019 	return mlxsw_sp_nexthop6_type_init(mlxsw_sp, nh_grp, nh, rt);
5020 }
5021 
5022 static void mlxsw_sp_nexthop6_fini(struct mlxsw_sp *mlxsw_sp,
5023 				   struct mlxsw_sp_nexthop *nh)
5024 {
5025 	mlxsw_sp_nexthop6_type_fini(mlxsw_sp, nh);
5026 	list_del(&nh->router_list_node);
5027 	mlxsw_sp_nexthop_counter_free(mlxsw_sp, nh);
5028 }
5029 
5030 static bool mlxsw_sp_rt6_is_gateway(const struct mlxsw_sp *mlxsw_sp,
5031 				    const struct fib6_info *rt)
5032 {
5033 	return rt->fib6_flags & RTF_GATEWAY ||
5034 	       mlxsw_sp_nexthop6_ipip_type(mlxsw_sp, rt, NULL);
5035 }
5036 
5037 static struct mlxsw_sp_nexthop_group *
5038 mlxsw_sp_nexthop6_group_create(struct mlxsw_sp *mlxsw_sp,
5039 			       struct mlxsw_sp_fib6_entry *fib6_entry)
5040 {
5041 	struct mlxsw_sp_nexthop_group *nh_grp;
5042 	struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
5043 	struct mlxsw_sp_nexthop *nh;
5044 	size_t alloc_size;
5045 	int i = 0;
5046 	int err;
5047 
5048 	alloc_size = sizeof(*nh_grp) +
5049 		     fib6_entry->nrt6 * sizeof(struct mlxsw_sp_nexthop);
5050 	nh_grp = kzalloc(alloc_size, GFP_KERNEL);
5051 	if (!nh_grp)
5052 		return ERR_PTR(-ENOMEM);
5053 	INIT_LIST_HEAD(&nh_grp->fib_list);
5054 #if IS_ENABLED(CONFIG_IPV6)
5055 	nh_grp->neigh_tbl = &nd_tbl;
5056 #endif
5057 	mlxsw_sp_rt6 = list_first_entry(&fib6_entry->rt6_list,
5058 					struct mlxsw_sp_rt6, list);
5059 	nh_grp->gateway = mlxsw_sp_rt6_is_gateway(mlxsw_sp, mlxsw_sp_rt6->rt);
5060 	nh_grp->count = fib6_entry->nrt6;
5061 	for (i = 0; i < nh_grp->count; i++) {
5062 		struct fib6_info *rt = mlxsw_sp_rt6->rt;
5063 
5064 		nh = &nh_grp->nexthops[i];
5065 		err = mlxsw_sp_nexthop6_init(mlxsw_sp, nh_grp, nh, rt);
5066 		if (err)
5067 			goto err_nexthop6_init;
5068 		mlxsw_sp_rt6 = list_next_entry(mlxsw_sp_rt6, list);
5069 	}
5070 
5071 	err = mlxsw_sp_nexthop_group_insert(mlxsw_sp, nh_grp);
5072 	if (err)
5073 		goto err_nexthop_group_insert;
5074 
5075 	mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh_grp);
5076 	return nh_grp;
5077 
5078 err_nexthop_group_insert:
5079 err_nexthop6_init:
5080 	for (i--; i >= 0; i--) {
5081 		nh = &nh_grp->nexthops[i];
5082 		mlxsw_sp_nexthop6_fini(mlxsw_sp, nh);
5083 	}
5084 	kfree(nh_grp);
5085 	return ERR_PTR(err);
5086 }
5087 
5088 static void
5089 mlxsw_sp_nexthop6_group_destroy(struct mlxsw_sp *mlxsw_sp,
5090 				struct mlxsw_sp_nexthop_group *nh_grp)
5091 {
5092 	struct mlxsw_sp_nexthop *nh;
5093 	int i = nh_grp->count;
5094 
5095 	mlxsw_sp_nexthop_group_remove(mlxsw_sp, nh_grp);
5096 	for (i--; i >= 0; i--) {
5097 		nh = &nh_grp->nexthops[i];
5098 		mlxsw_sp_nexthop6_fini(mlxsw_sp, nh);
5099 	}
5100 	mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh_grp);
5101 	WARN_ON(nh_grp->adj_index_valid);
5102 	kfree(nh_grp);
5103 }
5104 
5105 static int mlxsw_sp_nexthop6_group_get(struct mlxsw_sp *mlxsw_sp,
5106 				       struct mlxsw_sp_fib6_entry *fib6_entry)
5107 {
5108 	struct mlxsw_sp_nexthop_group *nh_grp;
5109 
5110 	nh_grp = mlxsw_sp_nexthop6_group_lookup(mlxsw_sp, fib6_entry);
5111 	if (!nh_grp) {
5112 		nh_grp = mlxsw_sp_nexthop6_group_create(mlxsw_sp, fib6_entry);
5113 		if (IS_ERR(nh_grp))
5114 			return PTR_ERR(nh_grp);
5115 	}
5116 
5117 	list_add_tail(&fib6_entry->common.nexthop_group_node,
5118 		      &nh_grp->fib_list);
5119 	fib6_entry->common.nh_group = nh_grp;
5120 
5121 	return 0;
5122 }
5123 
5124 static void mlxsw_sp_nexthop6_group_put(struct mlxsw_sp *mlxsw_sp,
5125 					struct mlxsw_sp_fib_entry *fib_entry)
5126 {
5127 	struct mlxsw_sp_nexthop_group *nh_grp = fib_entry->nh_group;
5128 
5129 	list_del(&fib_entry->nexthop_group_node);
5130 	if (!list_empty(&nh_grp->fib_list))
5131 		return;
5132 	mlxsw_sp_nexthop6_group_destroy(mlxsw_sp, nh_grp);
5133 }
5134 
5135 static int
5136 mlxsw_sp_nexthop6_group_update(struct mlxsw_sp *mlxsw_sp,
5137 			       struct mlxsw_sp_fib6_entry *fib6_entry)
5138 {
5139 	struct mlxsw_sp_nexthop_group *old_nh_grp = fib6_entry->common.nh_group;
5140 	int err;
5141 
5142 	fib6_entry->common.nh_group = NULL;
5143 	list_del(&fib6_entry->common.nexthop_group_node);
5144 
5145 	err = mlxsw_sp_nexthop6_group_get(mlxsw_sp, fib6_entry);
5146 	if (err)
5147 		goto err_nexthop6_group_get;
5148 
5149 	/* In case this entry is offloaded, then the adjacency index
5150 	 * currently associated with it in the device's table is that
5151 	 * of the old group. Start using the new one instead.
5152 	 */
5153 	err = mlxsw_sp_fib_node_entry_add(mlxsw_sp, &fib6_entry->common);
5154 	if (err)
5155 		goto err_fib_node_entry_add;
5156 
5157 	if (list_empty(&old_nh_grp->fib_list))
5158 		mlxsw_sp_nexthop6_group_destroy(mlxsw_sp, old_nh_grp);
5159 
5160 	return 0;
5161 
5162 err_fib_node_entry_add:
5163 	mlxsw_sp_nexthop6_group_put(mlxsw_sp, &fib6_entry->common);
5164 err_nexthop6_group_get:
5165 	list_add_tail(&fib6_entry->common.nexthop_group_node,
5166 		      &old_nh_grp->fib_list);
5167 	fib6_entry->common.nh_group = old_nh_grp;
5168 	return err;
5169 }
5170 
5171 static int
5172 mlxsw_sp_fib6_entry_nexthop_add(struct mlxsw_sp *mlxsw_sp,
5173 				struct mlxsw_sp_fib6_entry *fib6_entry,
5174 				struct fib6_info *rt)
5175 {
5176 	struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
5177 	int err;
5178 
5179 	mlxsw_sp_rt6 = mlxsw_sp_rt6_create(rt);
5180 	if (IS_ERR(mlxsw_sp_rt6))
5181 		return PTR_ERR(mlxsw_sp_rt6);
5182 
5183 	list_add_tail(&mlxsw_sp_rt6->list, &fib6_entry->rt6_list);
5184 	fib6_entry->nrt6++;
5185 
5186 	err = mlxsw_sp_nexthop6_group_update(mlxsw_sp, fib6_entry);
5187 	if (err)
5188 		goto err_nexthop6_group_update;
5189 
5190 	return 0;
5191 
5192 err_nexthop6_group_update:
5193 	fib6_entry->nrt6--;
5194 	list_del(&mlxsw_sp_rt6->list);
5195 	mlxsw_sp_rt6_destroy(mlxsw_sp_rt6);
5196 	return err;
5197 }
5198 
5199 static void
5200 mlxsw_sp_fib6_entry_nexthop_del(struct mlxsw_sp *mlxsw_sp,
5201 				struct mlxsw_sp_fib6_entry *fib6_entry,
5202 				struct fib6_info *rt)
5203 {
5204 	struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
5205 
5206 	mlxsw_sp_rt6 = mlxsw_sp_fib6_entry_rt_find(fib6_entry, rt);
5207 	if (WARN_ON(!mlxsw_sp_rt6))
5208 		return;
5209 
5210 	fib6_entry->nrt6--;
5211 	list_del(&mlxsw_sp_rt6->list);
5212 	mlxsw_sp_nexthop6_group_update(mlxsw_sp, fib6_entry);
5213 	mlxsw_sp_rt6_destroy(mlxsw_sp_rt6);
5214 }
5215 
5216 static void mlxsw_sp_fib6_entry_type_set(struct mlxsw_sp *mlxsw_sp,
5217 					 struct mlxsw_sp_fib_entry *fib_entry,
5218 					 const struct fib6_info *rt)
5219 {
5220 	/* Packets hitting RTF_REJECT routes need to be discarded by the
5221 	 * stack. We can rely on their destination device not having a
5222 	 * RIF (it's the loopback device) and can thus use action type
5223 	 * local, which will cause them to be trapped with a lower
5224 	 * priority than packets that need to be locally received.
5225 	 */
5226 	if (rt->fib6_flags & (RTF_LOCAL | RTF_ANYCAST))
5227 		fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_TRAP;
5228 	else if (rt->fib6_flags & RTF_REJECT)
5229 		fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_LOCAL;
5230 	else if (mlxsw_sp_rt6_is_gateway(mlxsw_sp, rt))
5231 		fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_REMOTE;
5232 	else
5233 		fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_LOCAL;
5234 }
5235 
5236 static void
5237 mlxsw_sp_fib6_entry_rt_destroy_all(struct mlxsw_sp_fib6_entry *fib6_entry)
5238 {
5239 	struct mlxsw_sp_rt6 *mlxsw_sp_rt6, *tmp;
5240 
5241 	list_for_each_entry_safe(mlxsw_sp_rt6, tmp, &fib6_entry->rt6_list,
5242 				 list) {
5243 		fib6_entry->nrt6--;
5244 		list_del(&mlxsw_sp_rt6->list);
5245 		mlxsw_sp_rt6_destroy(mlxsw_sp_rt6);
5246 	}
5247 }
5248 
5249 static struct mlxsw_sp_fib6_entry *
5250 mlxsw_sp_fib6_entry_create(struct mlxsw_sp *mlxsw_sp,
5251 			   struct mlxsw_sp_fib_node *fib_node,
5252 			   struct fib6_info *rt)
5253 {
5254 	struct mlxsw_sp_fib6_entry *fib6_entry;
5255 	struct mlxsw_sp_fib_entry *fib_entry;
5256 	struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
5257 	int err;
5258 
5259 	fib6_entry = kzalloc(sizeof(*fib6_entry), GFP_KERNEL);
5260 	if (!fib6_entry)
5261 		return ERR_PTR(-ENOMEM);
5262 	fib_entry = &fib6_entry->common;
5263 
5264 	mlxsw_sp_rt6 = mlxsw_sp_rt6_create(rt);
5265 	if (IS_ERR(mlxsw_sp_rt6)) {
5266 		err = PTR_ERR(mlxsw_sp_rt6);
5267 		goto err_rt6_create;
5268 	}
5269 
5270 	mlxsw_sp_fib6_entry_type_set(mlxsw_sp, fib_entry, mlxsw_sp_rt6->rt);
5271 
5272 	INIT_LIST_HEAD(&fib6_entry->rt6_list);
5273 	list_add_tail(&mlxsw_sp_rt6->list, &fib6_entry->rt6_list);
5274 	fib6_entry->nrt6 = 1;
5275 	err = mlxsw_sp_nexthop6_group_get(mlxsw_sp, fib6_entry);
5276 	if (err)
5277 		goto err_nexthop6_group_get;
5278 
5279 	fib_entry->fib_node = fib_node;
5280 
5281 	return fib6_entry;
5282 
5283 err_nexthop6_group_get:
5284 	list_del(&mlxsw_sp_rt6->list);
5285 	mlxsw_sp_rt6_destroy(mlxsw_sp_rt6);
5286 err_rt6_create:
5287 	kfree(fib6_entry);
5288 	return ERR_PTR(err);
5289 }
5290 
5291 static void mlxsw_sp_fib6_entry_destroy(struct mlxsw_sp *mlxsw_sp,
5292 					struct mlxsw_sp_fib6_entry *fib6_entry)
5293 {
5294 	mlxsw_sp_nexthop6_group_put(mlxsw_sp, &fib6_entry->common);
5295 	mlxsw_sp_fib6_entry_rt_destroy_all(fib6_entry);
5296 	WARN_ON(fib6_entry->nrt6);
5297 	kfree(fib6_entry);
5298 }
5299 
5300 static struct mlxsw_sp_fib6_entry *
5301 mlxsw_sp_fib6_node_entry_find(const struct mlxsw_sp_fib_node *fib_node,
5302 			      const struct fib6_info *nrt, bool replace)
5303 {
5304 	struct mlxsw_sp_fib6_entry *fib6_entry, *fallback = NULL;
5305 
5306 	list_for_each_entry(fib6_entry, &fib_node->entry_list, common.list) {
5307 		struct fib6_info *rt = mlxsw_sp_fib6_entry_rt(fib6_entry);
5308 
5309 		if (rt->fib6_table->tb6_id > nrt->fib6_table->tb6_id)
5310 			continue;
5311 		if (rt->fib6_table->tb6_id != nrt->fib6_table->tb6_id)
5312 			break;
5313 		if (replace && rt->fib6_metric == nrt->fib6_metric) {
5314 			if (mlxsw_sp_fib6_rt_can_mp(rt) ==
5315 			    mlxsw_sp_fib6_rt_can_mp(nrt))
5316 				return fib6_entry;
5317 			if (mlxsw_sp_fib6_rt_can_mp(nrt))
5318 				fallback = fallback ?: fib6_entry;
5319 		}
5320 		if (rt->fib6_metric > nrt->fib6_metric)
5321 			return fallback ?: fib6_entry;
5322 	}
5323 
5324 	return fallback;
5325 }
5326 
5327 static int
5328 mlxsw_sp_fib6_node_list_insert(struct mlxsw_sp_fib6_entry *new6_entry,
5329 			       bool replace)
5330 {
5331 	struct mlxsw_sp_fib_node *fib_node = new6_entry->common.fib_node;
5332 	struct fib6_info *nrt = mlxsw_sp_fib6_entry_rt(new6_entry);
5333 	struct mlxsw_sp_fib6_entry *fib6_entry;
5334 
5335 	fib6_entry = mlxsw_sp_fib6_node_entry_find(fib_node, nrt, replace);
5336 
5337 	if (replace && WARN_ON(!fib6_entry))
5338 		return -EINVAL;
5339 
5340 	if (fib6_entry) {
5341 		list_add_tail(&new6_entry->common.list,
5342 			      &fib6_entry->common.list);
5343 	} else {
5344 		struct mlxsw_sp_fib6_entry *last;
5345 
5346 		list_for_each_entry(last, &fib_node->entry_list, common.list) {
5347 			struct fib6_info *rt = mlxsw_sp_fib6_entry_rt(last);
5348 
5349 			if (nrt->fib6_table->tb6_id > rt->fib6_table->tb6_id)
5350 				break;
5351 			fib6_entry = last;
5352 		}
5353 
5354 		if (fib6_entry)
5355 			list_add(&new6_entry->common.list,
5356 				 &fib6_entry->common.list);
5357 		else
5358 			list_add(&new6_entry->common.list,
5359 				 &fib_node->entry_list);
5360 	}
5361 
5362 	return 0;
5363 }
5364 
5365 static void
5366 mlxsw_sp_fib6_node_list_remove(struct mlxsw_sp_fib6_entry *fib6_entry)
5367 {
5368 	list_del(&fib6_entry->common.list);
5369 }
5370 
5371 static int mlxsw_sp_fib6_node_entry_link(struct mlxsw_sp *mlxsw_sp,
5372 					 struct mlxsw_sp_fib6_entry *fib6_entry,
5373 					 bool replace)
5374 {
5375 	int err;
5376 
5377 	err = mlxsw_sp_fib6_node_list_insert(fib6_entry, replace);
5378 	if (err)
5379 		return err;
5380 
5381 	err = mlxsw_sp_fib_node_entry_add(mlxsw_sp, &fib6_entry->common);
5382 	if (err)
5383 		goto err_fib_node_entry_add;
5384 
5385 	return 0;
5386 
5387 err_fib_node_entry_add:
5388 	mlxsw_sp_fib6_node_list_remove(fib6_entry);
5389 	return err;
5390 }
5391 
5392 static void
5393 mlxsw_sp_fib6_node_entry_unlink(struct mlxsw_sp *mlxsw_sp,
5394 				struct mlxsw_sp_fib6_entry *fib6_entry)
5395 {
5396 	mlxsw_sp_fib_node_entry_del(mlxsw_sp, &fib6_entry->common);
5397 	mlxsw_sp_fib6_node_list_remove(fib6_entry);
5398 }
5399 
5400 static struct mlxsw_sp_fib6_entry *
5401 mlxsw_sp_fib6_entry_lookup(struct mlxsw_sp *mlxsw_sp,
5402 			   const struct fib6_info *rt)
5403 {
5404 	struct mlxsw_sp_fib6_entry *fib6_entry;
5405 	struct mlxsw_sp_fib_node *fib_node;
5406 	struct mlxsw_sp_fib *fib;
5407 	struct mlxsw_sp_vr *vr;
5408 
5409 	vr = mlxsw_sp_vr_find(mlxsw_sp, rt->fib6_table->tb6_id);
5410 	if (!vr)
5411 		return NULL;
5412 	fib = mlxsw_sp_vr_fib(vr, MLXSW_SP_L3_PROTO_IPV6);
5413 
5414 	fib_node = mlxsw_sp_fib_node_lookup(fib, &rt->fib6_dst.addr,
5415 					    sizeof(rt->fib6_dst.addr),
5416 					    rt->fib6_dst.plen);
5417 	if (!fib_node)
5418 		return NULL;
5419 
5420 	list_for_each_entry(fib6_entry, &fib_node->entry_list, common.list) {
5421 		struct fib6_info *iter_rt = mlxsw_sp_fib6_entry_rt(fib6_entry);
5422 
5423 		if (rt->fib6_table->tb6_id == iter_rt->fib6_table->tb6_id &&
5424 		    rt->fib6_metric == iter_rt->fib6_metric &&
5425 		    mlxsw_sp_fib6_entry_rt_find(fib6_entry, rt))
5426 			return fib6_entry;
5427 	}
5428 
5429 	return NULL;
5430 }
5431 
5432 static void mlxsw_sp_fib6_entry_replace(struct mlxsw_sp *mlxsw_sp,
5433 					struct mlxsw_sp_fib6_entry *fib6_entry,
5434 					bool replace)
5435 {
5436 	struct mlxsw_sp_fib_node *fib_node = fib6_entry->common.fib_node;
5437 	struct mlxsw_sp_fib6_entry *replaced;
5438 
5439 	if (!replace)
5440 		return;
5441 
5442 	replaced = list_next_entry(fib6_entry, common.list);
5443 
5444 	mlxsw_sp_fib6_node_entry_unlink(mlxsw_sp, replaced);
5445 	mlxsw_sp_fib6_entry_destroy(mlxsw_sp, replaced);
5446 	mlxsw_sp_fib_node_put(mlxsw_sp, fib_node);
5447 }
5448 
5449 static int mlxsw_sp_router_fib6_add(struct mlxsw_sp *mlxsw_sp,
5450 				    struct fib6_info *rt, bool replace)
5451 {
5452 	struct mlxsw_sp_fib6_entry *fib6_entry;
5453 	struct mlxsw_sp_fib_node *fib_node;
5454 	int err;
5455 
5456 	if (mlxsw_sp->router->aborted)
5457 		return 0;
5458 
5459 	if (rt->fib6_src.plen)
5460 		return -EINVAL;
5461 
5462 	if (mlxsw_sp_fib6_rt_should_ignore(rt))
5463 		return 0;
5464 
5465 	fib_node = mlxsw_sp_fib_node_get(mlxsw_sp, rt->fib6_table->tb6_id,
5466 					 &rt->fib6_dst.addr,
5467 					 sizeof(rt->fib6_dst.addr),
5468 					 rt->fib6_dst.plen,
5469 					 MLXSW_SP_L3_PROTO_IPV6);
5470 	if (IS_ERR(fib_node))
5471 		return PTR_ERR(fib_node);
5472 
5473 	/* Before creating a new entry, try to append route to an existing
5474 	 * multipath entry.
5475 	 */
5476 	fib6_entry = mlxsw_sp_fib6_node_mp_entry_find(fib_node, rt, replace);
5477 	if (fib6_entry) {
5478 		err = mlxsw_sp_fib6_entry_nexthop_add(mlxsw_sp, fib6_entry, rt);
5479 		if (err)
5480 			goto err_fib6_entry_nexthop_add;
5481 		return 0;
5482 	}
5483 
5484 	fib6_entry = mlxsw_sp_fib6_entry_create(mlxsw_sp, fib_node, rt);
5485 	if (IS_ERR(fib6_entry)) {
5486 		err = PTR_ERR(fib6_entry);
5487 		goto err_fib6_entry_create;
5488 	}
5489 
5490 	err = mlxsw_sp_fib6_node_entry_link(mlxsw_sp, fib6_entry, replace);
5491 	if (err)
5492 		goto err_fib6_node_entry_link;
5493 
5494 	mlxsw_sp_fib6_entry_replace(mlxsw_sp, fib6_entry, replace);
5495 
5496 	return 0;
5497 
5498 err_fib6_node_entry_link:
5499 	mlxsw_sp_fib6_entry_destroy(mlxsw_sp, fib6_entry);
5500 err_fib6_entry_create:
5501 err_fib6_entry_nexthop_add:
5502 	mlxsw_sp_fib_node_put(mlxsw_sp, fib_node);
5503 	return err;
5504 }
5505 
5506 static void mlxsw_sp_router_fib6_del(struct mlxsw_sp *mlxsw_sp,
5507 				     struct fib6_info *rt)
5508 {
5509 	struct mlxsw_sp_fib6_entry *fib6_entry;
5510 	struct mlxsw_sp_fib_node *fib_node;
5511 
5512 	if (mlxsw_sp->router->aborted)
5513 		return;
5514 
5515 	if (mlxsw_sp_fib6_rt_should_ignore(rt))
5516 		return;
5517 
5518 	fib6_entry = mlxsw_sp_fib6_entry_lookup(mlxsw_sp, rt);
5519 	if (WARN_ON(!fib6_entry))
5520 		return;
5521 
5522 	/* If route is part of a multipath entry, but not the last one
5523 	 * removed, then only reduce its nexthop group.
5524 	 */
5525 	if (!list_is_singular(&fib6_entry->rt6_list)) {
5526 		mlxsw_sp_fib6_entry_nexthop_del(mlxsw_sp, fib6_entry, rt);
5527 		return;
5528 	}
5529 
5530 	fib_node = fib6_entry->common.fib_node;
5531 
5532 	mlxsw_sp_fib6_node_entry_unlink(mlxsw_sp, fib6_entry);
5533 	mlxsw_sp_fib6_entry_destroy(mlxsw_sp, fib6_entry);
5534 	mlxsw_sp_fib_node_put(mlxsw_sp, fib_node);
5535 }
5536 
5537 static int __mlxsw_sp_router_set_abort_trap(struct mlxsw_sp *mlxsw_sp,
5538 					    enum mlxsw_reg_ralxx_protocol proto,
5539 					    u8 tree_id)
5540 {
5541 	char ralta_pl[MLXSW_REG_RALTA_LEN];
5542 	char ralst_pl[MLXSW_REG_RALST_LEN];
5543 	int i, err;
5544 
5545 	mlxsw_reg_ralta_pack(ralta_pl, true, proto, tree_id);
5546 	err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralta), ralta_pl);
5547 	if (err)
5548 		return err;
5549 
5550 	mlxsw_reg_ralst_pack(ralst_pl, 0xff, tree_id);
5551 	err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralst), ralst_pl);
5552 	if (err)
5553 		return err;
5554 
5555 	for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_VRS); i++) {
5556 		struct mlxsw_sp_vr *vr = &mlxsw_sp->router->vrs[i];
5557 		char raltb_pl[MLXSW_REG_RALTB_LEN];
5558 		char ralue_pl[MLXSW_REG_RALUE_LEN];
5559 
5560 		mlxsw_reg_raltb_pack(raltb_pl, vr->id, proto, tree_id);
5561 		err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(raltb),
5562 				      raltb_pl);
5563 		if (err)
5564 			return err;
5565 
5566 		mlxsw_reg_ralue_pack(ralue_pl, proto,
5567 				     MLXSW_REG_RALUE_OP_WRITE_WRITE, vr->id, 0);
5568 		mlxsw_reg_ralue_act_ip2me_pack(ralue_pl);
5569 		err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralue),
5570 				      ralue_pl);
5571 		if (err)
5572 			return err;
5573 	}
5574 
5575 	return 0;
5576 }
5577 
5578 static struct mlxsw_sp_mr_table *
5579 mlxsw_sp_router_fibmr_family_to_table(struct mlxsw_sp_vr *vr, int family)
5580 {
5581 	if (family == RTNL_FAMILY_IPMR)
5582 		return vr->mr_table[MLXSW_SP_L3_PROTO_IPV4];
5583 	else
5584 		return vr->mr_table[MLXSW_SP_L3_PROTO_IPV6];
5585 }
5586 
5587 static int mlxsw_sp_router_fibmr_add(struct mlxsw_sp *mlxsw_sp,
5588 				     struct mfc_entry_notifier_info *men_info,
5589 				     bool replace)
5590 {
5591 	struct mlxsw_sp_mr_table *mrt;
5592 	struct mlxsw_sp_vr *vr;
5593 
5594 	if (mlxsw_sp->router->aborted)
5595 		return 0;
5596 
5597 	vr = mlxsw_sp_vr_get(mlxsw_sp, men_info->tb_id, NULL);
5598 	if (IS_ERR(vr))
5599 		return PTR_ERR(vr);
5600 
5601 	mrt = mlxsw_sp_router_fibmr_family_to_table(vr, men_info->info.family);
5602 	return mlxsw_sp_mr_route_add(mrt, men_info->mfc, replace);
5603 }
5604 
5605 static void mlxsw_sp_router_fibmr_del(struct mlxsw_sp *mlxsw_sp,
5606 				      struct mfc_entry_notifier_info *men_info)
5607 {
5608 	struct mlxsw_sp_mr_table *mrt;
5609 	struct mlxsw_sp_vr *vr;
5610 
5611 	if (mlxsw_sp->router->aborted)
5612 		return;
5613 
5614 	vr = mlxsw_sp_vr_find(mlxsw_sp, men_info->tb_id);
5615 	if (WARN_ON(!vr))
5616 		return;
5617 
5618 	mrt = mlxsw_sp_router_fibmr_family_to_table(vr, men_info->info.family);
5619 	mlxsw_sp_mr_route_del(mrt, men_info->mfc);
5620 	mlxsw_sp_vr_put(mlxsw_sp, vr);
5621 }
5622 
5623 static int
5624 mlxsw_sp_router_fibmr_vif_add(struct mlxsw_sp *mlxsw_sp,
5625 			      struct vif_entry_notifier_info *ven_info)
5626 {
5627 	struct mlxsw_sp_mr_table *mrt;
5628 	struct mlxsw_sp_rif *rif;
5629 	struct mlxsw_sp_vr *vr;
5630 
5631 	if (mlxsw_sp->router->aborted)
5632 		return 0;
5633 
5634 	vr = mlxsw_sp_vr_get(mlxsw_sp, ven_info->tb_id, NULL);
5635 	if (IS_ERR(vr))
5636 		return PTR_ERR(vr);
5637 
5638 	mrt = mlxsw_sp_router_fibmr_family_to_table(vr, ven_info->info.family);
5639 	rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, ven_info->dev);
5640 	return mlxsw_sp_mr_vif_add(mrt, ven_info->dev,
5641 				   ven_info->vif_index,
5642 				   ven_info->vif_flags, rif);
5643 }
5644 
5645 static void
5646 mlxsw_sp_router_fibmr_vif_del(struct mlxsw_sp *mlxsw_sp,
5647 			      struct vif_entry_notifier_info *ven_info)
5648 {
5649 	struct mlxsw_sp_mr_table *mrt;
5650 	struct mlxsw_sp_vr *vr;
5651 
5652 	if (mlxsw_sp->router->aborted)
5653 		return;
5654 
5655 	vr = mlxsw_sp_vr_find(mlxsw_sp, ven_info->tb_id);
5656 	if (WARN_ON(!vr))
5657 		return;
5658 
5659 	mrt = mlxsw_sp_router_fibmr_family_to_table(vr, ven_info->info.family);
5660 	mlxsw_sp_mr_vif_del(mrt, ven_info->vif_index);
5661 	mlxsw_sp_vr_put(mlxsw_sp, vr);
5662 }
5663 
5664 static int mlxsw_sp_router_set_abort_trap(struct mlxsw_sp *mlxsw_sp)
5665 {
5666 	enum mlxsw_reg_ralxx_protocol proto = MLXSW_REG_RALXX_PROTOCOL_IPV4;
5667 	int err;
5668 
5669 	err = __mlxsw_sp_router_set_abort_trap(mlxsw_sp, proto,
5670 					       MLXSW_SP_LPM_TREE_MIN);
5671 	if (err)
5672 		return err;
5673 
5674 	/* The multicast router code does not need an abort trap as by default,
5675 	 * packets that don't match any routes are trapped to the CPU.
5676 	 */
5677 
5678 	proto = MLXSW_REG_RALXX_PROTOCOL_IPV6;
5679 	return __mlxsw_sp_router_set_abort_trap(mlxsw_sp, proto,
5680 						MLXSW_SP_LPM_TREE_MIN + 1);
5681 }
5682 
5683 static void mlxsw_sp_fib4_node_flush(struct mlxsw_sp *mlxsw_sp,
5684 				     struct mlxsw_sp_fib_node *fib_node)
5685 {
5686 	struct mlxsw_sp_fib4_entry *fib4_entry, *tmp;
5687 
5688 	list_for_each_entry_safe(fib4_entry, tmp, &fib_node->entry_list,
5689 				 common.list) {
5690 		bool do_break = &tmp->common.list == &fib_node->entry_list;
5691 
5692 		mlxsw_sp_fib4_node_entry_unlink(mlxsw_sp, fib4_entry);
5693 		mlxsw_sp_fib4_entry_destroy(mlxsw_sp, fib4_entry);
5694 		mlxsw_sp_fib_node_put(mlxsw_sp, fib_node);
5695 		/* Break when entry list is empty and node was freed.
5696 		 * Otherwise, we'll access freed memory in the next
5697 		 * iteration.
5698 		 */
5699 		if (do_break)
5700 			break;
5701 	}
5702 }
5703 
5704 static void mlxsw_sp_fib6_node_flush(struct mlxsw_sp *mlxsw_sp,
5705 				     struct mlxsw_sp_fib_node *fib_node)
5706 {
5707 	struct mlxsw_sp_fib6_entry *fib6_entry, *tmp;
5708 
5709 	list_for_each_entry_safe(fib6_entry, tmp, &fib_node->entry_list,
5710 				 common.list) {
5711 		bool do_break = &tmp->common.list == &fib_node->entry_list;
5712 
5713 		mlxsw_sp_fib6_node_entry_unlink(mlxsw_sp, fib6_entry);
5714 		mlxsw_sp_fib6_entry_destroy(mlxsw_sp, fib6_entry);
5715 		mlxsw_sp_fib_node_put(mlxsw_sp, fib_node);
5716 		if (do_break)
5717 			break;
5718 	}
5719 }
5720 
5721 static void mlxsw_sp_fib_node_flush(struct mlxsw_sp *mlxsw_sp,
5722 				    struct mlxsw_sp_fib_node *fib_node)
5723 {
5724 	switch (fib_node->fib->proto) {
5725 	case MLXSW_SP_L3_PROTO_IPV4:
5726 		mlxsw_sp_fib4_node_flush(mlxsw_sp, fib_node);
5727 		break;
5728 	case MLXSW_SP_L3_PROTO_IPV6:
5729 		mlxsw_sp_fib6_node_flush(mlxsw_sp, fib_node);
5730 		break;
5731 	}
5732 }
5733 
5734 static void mlxsw_sp_vr_fib_flush(struct mlxsw_sp *mlxsw_sp,
5735 				  struct mlxsw_sp_vr *vr,
5736 				  enum mlxsw_sp_l3proto proto)
5737 {
5738 	struct mlxsw_sp_fib *fib = mlxsw_sp_vr_fib(vr, proto);
5739 	struct mlxsw_sp_fib_node *fib_node, *tmp;
5740 
5741 	list_for_each_entry_safe(fib_node, tmp, &fib->node_list, list) {
5742 		bool do_break = &tmp->list == &fib->node_list;
5743 
5744 		mlxsw_sp_fib_node_flush(mlxsw_sp, fib_node);
5745 		if (do_break)
5746 			break;
5747 	}
5748 }
5749 
5750 static void mlxsw_sp_router_fib_flush(struct mlxsw_sp *mlxsw_sp)
5751 {
5752 	int i, j;
5753 
5754 	for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_VRS); i++) {
5755 		struct mlxsw_sp_vr *vr = &mlxsw_sp->router->vrs[i];
5756 
5757 		if (!mlxsw_sp_vr_is_used(vr))
5758 			continue;
5759 
5760 		for (j = 0; j < MLXSW_SP_L3_PROTO_MAX; j++)
5761 			mlxsw_sp_mr_table_flush(vr->mr_table[j]);
5762 		mlxsw_sp_vr_fib_flush(mlxsw_sp, vr, MLXSW_SP_L3_PROTO_IPV4);
5763 
5764 		/* If virtual router was only used for IPv4, then it's no
5765 		 * longer used.
5766 		 */
5767 		if (!mlxsw_sp_vr_is_used(vr))
5768 			continue;
5769 		mlxsw_sp_vr_fib_flush(mlxsw_sp, vr, MLXSW_SP_L3_PROTO_IPV6);
5770 	}
5771 }
5772 
5773 static void mlxsw_sp_router_fib_abort(struct mlxsw_sp *mlxsw_sp)
5774 {
5775 	int err;
5776 
5777 	if (mlxsw_sp->router->aborted)
5778 		return;
5779 	dev_warn(mlxsw_sp->bus_info->dev, "FIB abort triggered. Note that FIB entries are no longer being offloaded to this device.\n");
5780 	mlxsw_sp_router_fib_flush(mlxsw_sp);
5781 	mlxsw_sp->router->aborted = true;
5782 	err = mlxsw_sp_router_set_abort_trap(mlxsw_sp);
5783 	if (err)
5784 		dev_warn(mlxsw_sp->bus_info->dev, "Failed to set abort trap.\n");
5785 }
5786 
5787 struct mlxsw_sp_fib_event_work {
5788 	struct work_struct work;
5789 	union {
5790 		struct fib6_entry_notifier_info fen6_info;
5791 		struct fib_entry_notifier_info fen_info;
5792 		struct fib_rule_notifier_info fr_info;
5793 		struct fib_nh_notifier_info fnh_info;
5794 		struct mfc_entry_notifier_info men_info;
5795 		struct vif_entry_notifier_info ven_info;
5796 	};
5797 	struct mlxsw_sp *mlxsw_sp;
5798 	unsigned long event;
5799 };
5800 
5801 static void mlxsw_sp_router_fib4_event_work(struct work_struct *work)
5802 {
5803 	struct mlxsw_sp_fib_event_work *fib_work =
5804 		container_of(work, struct mlxsw_sp_fib_event_work, work);
5805 	struct mlxsw_sp *mlxsw_sp = fib_work->mlxsw_sp;
5806 	bool replace, append;
5807 	int err;
5808 
5809 	/* Protect internal structures from changes */
5810 	rtnl_lock();
5811 	mlxsw_sp_span_respin(mlxsw_sp);
5812 
5813 	switch (fib_work->event) {
5814 	case FIB_EVENT_ENTRY_REPLACE: /* fall through */
5815 	case FIB_EVENT_ENTRY_APPEND: /* fall through */
5816 	case FIB_EVENT_ENTRY_ADD:
5817 		replace = fib_work->event == FIB_EVENT_ENTRY_REPLACE;
5818 		append = fib_work->event == FIB_EVENT_ENTRY_APPEND;
5819 		err = mlxsw_sp_router_fib4_add(mlxsw_sp, &fib_work->fen_info,
5820 					       replace, append);
5821 		if (err)
5822 			mlxsw_sp_router_fib_abort(mlxsw_sp);
5823 		fib_info_put(fib_work->fen_info.fi);
5824 		break;
5825 	case FIB_EVENT_ENTRY_DEL:
5826 		mlxsw_sp_router_fib4_del(mlxsw_sp, &fib_work->fen_info);
5827 		fib_info_put(fib_work->fen_info.fi);
5828 		break;
5829 	case FIB_EVENT_RULE_ADD:
5830 		/* if we get here, a rule was added that we do not support.
5831 		 * just do the fib_abort
5832 		 */
5833 		mlxsw_sp_router_fib_abort(mlxsw_sp);
5834 		break;
5835 	case FIB_EVENT_NH_ADD: /* fall through */
5836 	case FIB_EVENT_NH_DEL:
5837 		mlxsw_sp_nexthop4_event(mlxsw_sp, fib_work->event,
5838 					fib_work->fnh_info.fib_nh);
5839 		fib_info_put(fib_work->fnh_info.fib_nh->nh_parent);
5840 		break;
5841 	}
5842 	rtnl_unlock();
5843 	kfree(fib_work);
5844 }
5845 
5846 static void mlxsw_sp_router_fib6_event_work(struct work_struct *work)
5847 {
5848 	struct mlxsw_sp_fib_event_work *fib_work =
5849 		container_of(work, struct mlxsw_sp_fib_event_work, work);
5850 	struct mlxsw_sp *mlxsw_sp = fib_work->mlxsw_sp;
5851 	bool replace;
5852 	int err;
5853 
5854 	rtnl_lock();
5855 	mlxsw_sp_span_respin(mlxsw_sp);
5856 
5857 	switch (fib_work->event) {
5858 	case FIB_EVENT_ENTRY_REPLACE: /* fall through */
5859 	case FIB_EVENT_ENTRY_APPEND: /* fall through */
5860 	case FIB_EVENT_ENTRY_ADD:
5861 		replace = fib_work->event == FIB_EVENT_ENTRY_REPLACE;
5862 		err = mlxsw_sp_router_fib6_add(mlxsw_sp,
5863 					       fib_work->fen6_info.rt, replace);
5864 		if (err)
5865 			mlxsw_sp_router_fib_abort(mlxsw_sp);
5866 		mlxsw_sp_rt6_release(fib_work->fen6_info.rt);
5867 		break;
5868 	case FIB_EVENT_ENTRY_DEL:
5869 		mlxsw_sp_router_fib6_del(mlxsw_sp, fib_work->fen6_info.rt);
5870 		mlxsw_sp_rt6_release(fib_work->fen6_info.rt);
5871 		break;
5872 	case FIB_EVENT_RULE_ADD:
5873 		/* if we get here, a rule was added that we do not support.
5874 		 * just do the fib_abort
5875 		 */
5876 		mlxsw_sp_router_fib_abort(mlxsw_sp);
5877 		break;
5878 	}
5879 	rtnl_unlock();
5880 	kfree(fib_work);
5881 }
5882 
5883 static void mlxsw_sp_router_fibmr_event_work(struct work_struct *work)
5884 {
5885 	struct mlxsw_sp_fib_event_work *fib_work =
5886 		container_of(work, struct mlxsw_sp_fib_event_work, work);
5887 	struct mlxsw_sp *mlxsw_sp = fib_work->mlxsw_sp;
5888 	bool replace;
5889 	int err;
5890 
5891 	rtnl_lock();
5892 	switch (fib_work->event) {
5893 	case FIB_EVENT_ENTRY_REPLACE: /* fall through */
5894 	case FIB_EVENT_ENTRY_ADD:
5895 		replace = fib_work->event == FIB_EVENT_ENTRY_REPLACE;
5896 
5897 		err = mlxsw_sp_router_fibmr_add(mlxsw_sp, &fib_work->men_info,
5898 						replace);
5899 		if (err)
5900 			mlxsw_sp_router_fib_abort(mlxsw_sp);
5901 		mr_cache_put(fib_work->men_info.mfc);
5902 		break;
5903 	case FIB_EVENT_ENTRY_DEL:
5904 		mlxsw_sp_router_fibmr_del(mlxsw_sp, &fib_work->men_info);
5905 		mr_cache_put(fib_work->men_info.mfc);
5906 		break;
5907 	case FIB_EVENT_VIF_ADD:
5908 		err = mlxsw_sp_router_fibmr_vif_add(mlxsw_sp,
5909 						    &fib_work->ven_info);
5910 		if (err)
5911 			mlxsw_sp_router_fib_abort(mlxsw_sp);
5912 		dev_put(fib_work->ven_info.dev);
5913 		break;
5914 	case FIB_EVENT_VIF_DEL:
5915 		mlxsw_sp_router_fibmr_vif_del(mlxsw_sp,
5916 					      &fib_work->ven_info);
5917 		dev_put(fib_work->ven_info.dev);
5918 		break;
5919 	case FIB_EVENT_RULE_ADD:
5920 		/* if we get here, a rule was added that we do not support.
5921 		 * just do the fib_abort
5922 		 */
5923 		mlxsw_sp_router_fib_abort(mlxsw_sp);
5924 		break;
5925 	}
5926 	rtnl_unlock();
5927 	kfree(fib_work);
5928 }
5929 
5930 static void mlxsw_sp_router_fib4_event(struct mlxsw_sp_fib_event_work *fib_work,
5931 				       struct fib_notifier_info *info)
5932 {
5933 	struct fib_entry_notifier_info *fen_info;
5934 	struct fib_nh_notifier_info *fnh_info;
5935 
5936 	switch (fib_work->event) {
5937 	case FIB_EVENT_ENTRY_REPLACE: /* fall through */
5938 	case FIB_EVENT_ENTRY_APPEND: /* fall through */
5939 	case FIB_EVENT_ENTRY_ADD: /* fall through */
5940 	case FIB_EVENT_ENTRY_DEL:
5941 		fen_info = container_of(info, struct fib_entry_notifier_info,
5942 					info);
5943 		fib_work->fen_info = *fen_info;
5944 		/* Take reference on fib_info to prevent it from being
5945 		 * freed while work is queued. Release it afterwards.
5946 		 */
5947 		fib_info_hold(fib_work->fen_info.fi);
5948 		break;
5949 	case FIB_EVENT_NH_ADD: /* fall through */
5950 	case FIB_EVENT_NH_DEL:
5951 		fnh_info = container_of(info, struct fib_nh_notifier_info,
5952 					info);
5953 		fib_work->fnh_info = *fnh_info;
5954 		fib_info_hold(fib_work->fnh_info.fib_nh->nh_parent);
5955 		break;
5956 	}
5957 }
5958 
5959 static void mlxsw_sp_router_fib6_event(struct mlxsw_sp_fib_event_work *fib_work,
5960 				       struct fib_notifier_info *info)
5961 {
5962 	struct fib6_entry_notifier_info *fen6_info;
5963 
5964 	switch (fib_work->event) {
5965 	case FIB_EVENT_ENTRY_REPLACE: /* fall through */
5966 	case FIB_EVENT_ENTRY_APPEND: /* fall through */
5967 	case FIB_EVENT_ENTRY_ADD: /* fall through */
5968 	case FIB_EVENT_ENTRY_DEL:
5969 		fen6_info = container_of(info, struct fib6_entry_notifier_info,
5970 					 info);
5971 		fib_work->fen6_info = *fen6_info;
5972 		fib6_info_hold(fib_work->fen6_info.rt);
5973 		break;
5974 	}
5975 }
5976 
5977 static void
5978 mlxsw_sp_router_fibmr_event(struct mlxsw_sp_fib_event_work *fib_work,
5979 			    struct fib_notifier_info *info)
5980 {
5981 	switch (fib_work->event) {
5982 	case FIB_EVENT_ENTRY_REPLACE: /* fall through */
5983 	case FIB_EVENT_ENTRY_ADD: /* fall through */
5984 	case FIB_EVENT_ENTRY_DEL:
5985 		memcpy(&fib_work->men_info, info, sizeof(fib_work->men_info));
5986 		mr_cache_hold(fib_work->men_info.mfc);
5987 		break;
5988 	case FIB_EVENT_VIF_ADD: /* fall through */
5989 	case FIB_EVENT_VIF_DEL:
5990 		memcpy(&fib_work->ven_info, info, sizeof(fib_work->ven_info));
5991 		dev_hold(fib_work->ven_info.dev);
5992 		break;
5993 	}
5994 }
5995 
5996 static int mlxsw_sp_router_fib_rule_event(unsigned long event,
5997 					  struct fib_notifier_info *info,
5998 					  struct mlxsw_sp *mlxsw_sp)
5999 {
6000 	struct netlink_ext_ack *extack = info->extack;
6001 	struct fib_rule_notifier_info *fr_info;
6002 	struct fib_rule *rule;
6003 	int err = 0;
6004 
6005 	/* nothing to do at the moment */
6006 	if (event == FIB_EVENT_RULE_DEL)
6007 		return 0;
6008 
6009 	if (mlxsw_sp->router->aborted)
6010 		return 0;
6011 
6012 	fr_info = container_of(info, struct fib_rule_notifier_info, info);
6013 	rule = fr_info->rule;
6014 
6015 	switch (info->family) {
6016 	case AF_INET:
6017 		if (!fib4_rule_default(rule) && !rule->l3mdev)
6018 			err = -EOPNOTSUPP;
6019 		break;
6020 	case AF_INET6:
6021 		if (!fib6_rule_default(rule) && !rule->l3mdev)
6022 			err = -EOPNOTSUPP;
6023 		break;
6024 	case RTNL_FAMILY_IPMR:
6025 		if (!ipmr_rule_default(rule) && !rule->l3mdev)
6026 			err = -EOPNOTSUPP;
6027 		break;
6028 	case RTNL_FAMILY_IP6MR:
6029 		if (!ip6mr_rule_default(rule) && !rule->l3mdev)
6030 			err = -EOPNOTSUPP;
6031 		break;
6032 	}
6033 
6034 	if (err < 0)
6035 		NL_SET_ERR_MSG_MOD(extack, "FIB rules not supported");
6036 
6037 	return err;
6038 }
6039 
6040 /* Called with rcu_read_lock() */
6041 static int mlxsw_sp_router_fib_event(struct notifier_block *nb,
6042 				     unsigned long event, void *ptr)
6043 {
6044 	struct mlxsw_sp_fib_event_work *fib_work;
6045 	struct fib_notifier_info *info = ptr;
6046 	struct mlxsw_sp_router *router;
6047 	int err;
6048 
6049 	if (!net_eq(info->net, &init_net) ||
6050 	    (info->family != AF_INET && info->family != AF_INET6 &&
6051 	     info->family != RTNL_FAMILY_IPMR &&
6052 	     info->family != RTNL_FAMILY_IP6MR))
6053 		return NOTIFY_DONE;
6054 
6055 	router = container_of(nb, struct mlxsw_sp_router, fib_nb);
6056 
6057 	switch (event) {
6058 	case FIB_EVENT_RULE_ADD: /* fall through */
6059 	case FIB_EVENT_RULE_DEL:
6060 		err = mlxsw_sp_router_fib_rule_event(event, info,
6061 						     router->mlxsw_sp);
6062 		if (!err || info->extack)
6063 			return notifier_from_errno(err);
6064 		break;
6065 	case FIB_EVENT_ENTRY_ADD:
6066 		if (router->aborted) {
6067 			NL_SET_ERR_MSG_MOD(info->extack, "FIB offload was aborted. Not configuring route");
6068 			return notifier_from_errno(-EINVAL);
6069 		}
6070 		break;
6071 	}
6072 
6073 	fib_work = kzalloc(sizeof(*fib_work), GFP_ATOMIC);
6074 	if (WARN_ON(!fib_work))
6075 		return NOTIFY_BAD;
6076 
6077 	fib_work->mlxsw_sp = router->mlxsw_sp;
6078 	fib_work->event = event;
6079 
6080 	switch (info->family) {
6081 	case AF_INET:
6082 		INIT_WORK(&fib_work->work, mlxsw_sp_router_fib4_event_work);
6083 		mlxsw_sp_router_fib4_event(fib_work, info);
6084 		break;
6085 	case AF_INET6:
6086 		INIT_WORK(&fib_work->work, mlxsw_sp_router_fib6_event_work);
6087 		mlxsw_sp_router_fib6_event(fib_work, info);
6088 		break;
6089 	case RTNL_FAMILY_IP6MR:
6090 	case RTNL_FAMILY_IPMR:
6091 		INIT_WORK(&fib_work->work, mlxsw_sp_router_fibmr_event_work);
6092 		mlxsw_sp_router_fibmr_event(fib_work, info);
6093 		break;
6094 	}
6095 
6096 	mlxsw_core_schedule_work(&fib_work->work);
6097 
6098 	return NOTIFY_DONE;
6099 }
6100 
6101 struct mlxsw_sp_rif *
6102 mlxsw_sp_rif_find_by_dev(const struct mlxsw_sp *mlxsw_sp,
6103 			 const struct net_device *dev)
6104 {
6105 	int i;
6106 
6107 	for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_RIFS); i++)
6108 		if (mlxsw_sp->router->rifs[i] &&
6109 		    mlxsw_sp->router->rifs[i]->dev == dev)
6110 			return mlxsw_sp->router->rifs[i];
6111 
6112 	return NULL;
6113 }
6114 
6115 static int mlxsw_sp_router_rif_disable(struct mlxsw_sp *mlxsw_sp, u16 rif)
6116 {
6117 	char ritr_pl[MLXSW_REG_RITR_LEN];
6118 	int err;
6119 
6120 	mlxsw_reg_ritr_rif_pack(ritr_pl, rif);
6121 	err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
6122 	if (WARN_ON_ONCE(err))
6123 		return err;
6124 
6125 	mlxsw_reg_ritr_enable_set(ritr_pl, false);
6126 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
6127 }
6128 
6129 static void mlxsw_sp_router_rif_gone_sync(struct mlxsw_sp *mlxsw_sp,
6130 					  struct mlxsw_sp_rif *rif)
6131 {
6132 	mlxsw_sp_router_rif_disable(mlxsw_sp, rif->rif_index);
6133 	mlxsw_sp_nexthop_rif_gone_sync(mlxsw_sp, rif);
6134 	mlxsw_sp_neigh_rif_gone_sync(mlxsw_sp, rif);
6135 }
6136 
6137 static bool
6138 mlxsw_sp_rif_should_config(struct mlxsw_sp_rif *rif, struct net_device *dev,
6139 			   unsigned long event)
6140 {
6141 	struct inet6_dev *inet6_dev;
6142 	bool addr_list_empty = true;
6143 	struct in_device *idev;
6144 
6145 	switch (event) {
6146 	case NETDEV_UP:
6147 		return rif == NULL;
6148 	case NETDEV_DOWN:
6149 		idev = __in_dev_get_rtnl(dev);
6150 		if (idev && idev->ifa_list)
6151 			addr_list_empty = false;
6152 
6153 		inet6_dev = __in6_dev_get(dev);
6154 		if (addr_list_empty && inet6_dev &&
6155 		    !list_empty(&inet6_dev->addr_list))
6156 			addr_list_empty = false;
6157 
6158 		/* macvlans do not have a RIF, but rather piggy back on the
6159 		 * RIF of their lower device.
6160 		 */
6161 		if (netif_is_macvlan(dev) && addr_list_empty)
6162 			return true;
6163 
6164 		if (rif && addr_list_empty &&
6165 		    !netif_is_l3_slave(rif->dev))
6166 			return true;
6167 		/* It is possible we already removed the RIF ourselves
6168 		 * if it was assigned to a netdev that is now a bridge
6169 		 * or LAG slave.
6170 		 */
6171 		return false;
6172 	}
6173 
6174 	return false;
6175 }
6176 
6177 static enum mlxsw_sp_rif_type
6178 mlxsw_sp_dev_rif_type(const struct mlxsw_sp *mlxsw_sp,
6179 		      const struct net_device *dev)
6180 {
6181 	enum mlxsw_sp_fid_type type;
6182 
6183 	if (mlxsw_sp_netdev_ipip_type(mlxsw_sp, dev, NULL))
6184 		return MLXSW_SP_RIF_TYPE_IPIP_LB;
6185 
6186 	/* Otherwise RIF type is derived from the type of the underlying FID. */
6187 	if (is_vlan_dev(dev) && netif_is_bridge_master(vlan_dev_real_dev(dev)))
6188 		type = MLXSW_SP_FID_TYPE_8021Q;
6189 	else if (netif_is_bridge_master(dev) && br_vlan_enabled(dev))
6190 		type = MLXSW_SP_FID_TYPE_8021Q;
6191 	else if (netif_is_bridge_master(dev))
6192 		type = MLXSW_SP_FID_TYPE_8021D;
6193 	else
6194 		type = MLXSW_SP_FID_TYPE_RFID;
6195 
6196 	return mlxsw_sp_fid_type_rif_type(mlxsw_sp, type);
6197 }
6198 
6199 static int mlxsw_sp_rif_index_alloc(struct mlxsw_sp *mlxsw_sp, u16 *p_rif_index)
6200 {
6201 	int i;
6202 
6203 	for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_RIFS); i++) {
6204 		if (!mlxsw_sp->router->rifs[i]) {
6205 			*p_rif_index = i;
6206 			return 0;
6207 		}
6208 	}
6209 
6210 	return -ENOBUFS;
6211 }
6212 
6213 static struct mlxsw_sp_rif *mlxsw_sp_rif_alloc(size_t rif_size, u16 rif_index,
6214 					       u16 vr_id,
6215 					       struct net_device *l3_dev)
6216 {
6217 	struct mlxsw_sp_rif *rif;
6218 
6219 	rif = kzalloc(rif_size, GFP_KERNEL);
6220 	if (!rif)
6221 		return NULL;
6222 
6223 	INIT_LIST_HEAD(&rif->nexthop_list);
6224 	INIT_LIST_HEAD(&rif->neigh_list);
6225 	ether_addr_copy(rif->addr, l3_dev->dev_addr);
6226 	rif->mtu = l3_dev->mtu;
6227 	rif->vr_id = vr_id;
6228 	rif->dev = l3_dev;
6229 	rif->rif_index = rif_index;
6230 
6231 	return rif;
6232 }
6233 
6234 struct mlxsw_sp_rif *mlxsw_sp_rif_by_index(const struct mlxsw_sp *mlxsw_sp,
6235 					   u16 rif_index)
6236 {
6237 	return mlxsw_sp->router->rifs[rif_index];
6238 }
6239 
6240 u16 mlxsw_sp_rif_index(const struct mlxsw_sp_rif *rif)
6241 {
6242 	return rif->rif_index;
6243 }
6244 
6245 u16 mlxsw_sp_ipip_lb_rif_index(const struct mlxsw_sp_rif_ipip_lb *lb_rif)
6246 {
6247 	return lb_rif->common.rif_index;
6248 }
6249 
6250 u16 mlxsw_sp_ipip_lb_ul_vr_id(const struct mlxsw_sp_rif_ipip_lb *lb_rif)
6251 {
6252 	return lb_rif->ul_vr_id;
6253 }
6254 
6255 int mlxsw_sp_rif_dev_ifindex(const struct mlxsw_sp_rif *rif)
6256 {
6257 	return rif->dev->ifindex;
6258 }
6259 
6260 const struct net_device *mlxsw_sp_rif_dev(const struct mlxsw_sp_rif *rif)
6261 {
6262 	return rif->dev;
6263 }
6264 
6265 struct mlxsw_sp_fid *mlxsw_sp_rif_fid(const struct mlxsw_sp_rif *rif)
6266 {
6267 	return rif->fid;
6268 }
6269 
6270 static struct mlxsw_sp_rif *
6271 mlxsw_sp_rif_create(struct mlxsw_sp *mlxsw_sp,
6272 		    const struct mlxsw_sp_rif_params *params,
6273 		    struct netlink_ext_ack *extack)
6274 {
6275 	u32 tb_id = l3mdev_fib_table(params->dev);
6276 	const struct mlxsw_sp_rif_ops *ops;
6277 	struct mlxsw_sp_fid *fid = NULL;
6278 	enum mlxsw_sp_rif_type type;
6279 	struct mlxsw_sp_rif *rif;
6280 	struct mlxsw_sp_vr *vr;
6281 	u16 rif_index;
6282 	int i, err;
6283 
6284 	type = mlxsw_sp_dev_rif_type(mlxsw_sp, params->dev);
6285 	ops = mlxsw_sp->router->rif_ops_arr[type];
6286 
6287 	vr = mlxsw_sp_vr_get(mlxsw_sp, tb_id ? : RT_TABLE_MAIN, extack);
6288 	if (IS_ERR(vr))
6289 		return ERR_CAST(vr);
6290 	vr->rif_count++;
6291 
6292 	err = mlxsw_sp_rif_index_alloc(mlxsw_sp, &rif_index);
6293 	if (err) {
6294 		NL_SET_ERR_MSG_MOD(extack, "Exceeded number of supported router interfaces");
6295 		goto err_rif_index_alloc;
6296 	}
6297 
6298 	rif = mlxsw_sp_rif_alloc(ops->rif_size, rif_index, vr->id, params->dev);
6299 	if (!rif) {
6300 		err = -ENOMEM;
6301 		goto err_rif_alloc;
6302 	}
6303 	rif->mlxsw_sp = mlxsw_sp;
6304 	rif->ops = ops;
6305 
6306 	if (ops->fid_get) {
6307 		fid = ops->fid_get(rif, extack);
6308 		if (IS_ERR(fid)) {
6309 			err = PTR_ERR(fid);
6310 			goto err_fid_get;
6311 		}
6312 		rif->fid = fid;
6313 	}
6314 
6315 	if (ops->setup)
6316 		ops->setup(rif, params);
6317 
6318 	err = ops->configure(rif);
6319 	if (err)
6320 		goto err_configure;
6321 
6322 	for (i = 0; i < MLXSW_SP_L3_PROTO_MAX; i++) {
6323 		err = mlxsw_sp_mr_rif_add(vr->mr_table[i], rif);
6324 		if (err)
6325 			goto err_mr_rif_add;
6326 	}
6327 
6328 	mlxsw_sp_rif_counters_alloc(rif);
6329 	mlxsw_sp->router->rifs[rif_index] = rif;
6330 
6331 	return rif;
6332 
6333 err_mr_rif_add:
6334 	for (i--; i >= 0; i--)
6335 		mlxsw_sp_mr_rif_del(vr->mr_table[i], rif);
6336 	ops->deconfigure(rif);
6337 err_configure:
6338 	if (fid)
6339 		mlxsw_sp_fid_put(fid);
6340 err_fid_get:
6341 	kfree(rif);
6342 err_rif_alloc:
6343 err_rif_index_alloc:
6344 	vr->rif_count--;
6345 	mlxsw_sp_vr_put(mlxsw_sp, vr);
6346 	return ERR_PTR(err);
6347 }
6348 
6349 void mlxsw_sp_rif_destroy(struct mlxsw_sp_rif *rif)
6350 {
6351 	const struct mlxsw_sp_rif_ops *ops = rif->ops;
6352 	struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
6353 	struct mlxsw_sp_fid *fid = rif->fid;
6354 	struct mlxsw_sp_vr *vr;
6355 	int i;
6356 
6357 	mlxsw_sp_router_rif_gone_sync(mlxsw_sp, rif);
6358 	vr = &mlxsw_sp->router->vrs[rif->vr_id];
6359 
6360 	mlxsw_sp->router->rifs[rif->rif_index] = NULL;
6361 	mlxsw_sp_rif_counters_free(rif);
6362 	for (i = 0; i < MLXSW_SP_L3_PROTO_MAX; i++)
6363 		mlxsw_sp_mr_rif_del(vr->mr_table[i], rif);
6364 	ops->deconfigure(rif);
6365 	if (fid)
6366 		/* Loopback RIFs are not associated with a FID. */
6367 		mlxsw_sp_fid_put(fid);
6368 	kfree(rif);
6369 	vr->rif_count--;
6370 	mlxsw_sp_vr_put(mlxsw_sp, vr);
6371 }
6372 
6373 void mlxsw_sp_rif_destroy_by_dev(struct mlxsw_sp *mlxsw_sp,
6374 				 struct net_device *dev)
6375 {
6376 	struct mlxsw_sp_rif *rif;
6377 
6378 	rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev);
6379 	if (!rif)
6380 		return;
6381 	mlxsw_sp_rif_destroy(rif);
6382 }
6383 
6384 static void
6385 mlxsw_sp_rif_subport_params_init(struct mlxsw_sp_rif_params *params,
6386 				 struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan)
6387 {
6388 	struct mlxsw_sp_port *mlxsw_sp_port = mlxsw_sp_port_vlan->mlxsw_sp_port;
6389 
6390 	params->vid = mlxsw_sp_port_vlan->vid;
6391 	params->lag = mlxsw_sp_port->lagged;
6392 	if (params->lag)
6393 		params->lag_id = mlxsw_sp_port->lag_id;
6394 	else
6395 		params->system_port = mlxsw_sp_port->local_port;
6396 }
6397 
6398 static int
6399 mlxsw_sp_port_vlan_router_join(struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan,
6400 			       struct net_device *l3_dev,
6401 			       struct netlink_ext_ack *extack)
6402 {
6403 	struct mlxsw_sp_port *mlxsw_sp_port = mlxsw_sp_port_vlan->mlxsw_sp_port;
6404 	struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
6405 	u16 vid = mlxsw_sp_port_vlan->vid;
6406 	struct mlxsw_sp_rif *rif;
6407 	struct mlxsw_sp_fid *fid;
6408 	int err;
6409 
6410 	rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, l3_dev);
6411 	if (!rif) {
6412 		struct mlxsw_sp_rif_params params = {
6413 			.dev = l3_dev,
6414 		};
6415 
6416 		mlxsw_sp_rif_subport_params_init(&params, mlxsw_sp_port_vlan);
6417 		rif = mlxsw_sp_rif_create(mlxsw_sp, &params, extack);
6418 		if (IS_ERR(rif))
6419 			return PTR_ERR(rif);
6420 	}
6421 
6422 	/* FID was already created, just take a reference */
6423 	fid = rif->ops->fid_get(rif, extack);
6424 	err = mlxsw_sp_fid_port_vid_map(fid, mlxsw_sp_port, vid);
6425 	if (err)
6426 		goto err_fid_port_vid_map;
6427 
6428 	err = mlxsw_sp_port_vid_learning_set(mlxsw_sp_port, vid, false);
6429 	if (err)
6430 		goto err_port_vid_learning_set;
6431 
6432 	err = mlxsw_sp_port_vid_stp_set(mlxsw_sp_port, vid,
6433 					BR_STATE_FORWARDING);
6434 	if (err)
6435 		goto err_port_vid_stp_set;
6436 
6437 	mlxsw_sp_port_vlan->fid = fid;
6438 
6439 	return 0;
6440 
6441 err_port_vid_stp_set:
6442 	mlxsw_sp_port_vid_learning_set(mlxsw_sp_port, vid, true);
6443 err_port_vid_learning_set:
6444 	mlxsw_sp_fid_port_vid_unmap(fid, mlxsw_sp_port, vid);
6445 err_fid_port_vid_map:
6446 	mlxsw_sp_fid_put(fid);
6447 	return err;
6448 }
6449 
6450 void
6451 mlxsw_sp_port_vlan_router_leave(struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan)
6452 {
6453 	struct mlxsw_sp_port *mlxsw_sp_port = mlxsw_sp_port_vlan->mlxsw_sp_port;
6454 	struct mlxsw_sp_fid *fid = mlxsw_sp_port_vlan->fid;
6455 	u16 vid = mlxsw_sp_port_vlan->vid;
6456 
6457 	if (WARN_ON(mlxsw_sp_fid_type(fid) != MLXSW_SP_FID_TYPE_RFID))
6458 		return;
6459 
6460 	mlxsw_sp_port_vlan->fid = NULL;
6461 	mlxsw_sp_port_vid_stp_set(mlxsw_sp_port, vid, BR_STATE_BLOCKING);
6462 	mlxsw_sp_port_vid_learning_set(mlxsw_sp_port, vid, true);
6463 	mlxsw_sp_fid_port_vid_unmap(fid, mlxsw_sp_port, vid);
6464 	/* If router port holds the last reference on the rFID, then the
6465 	 * associated Sub-port RIF will be destroyed.
6466 	 */
6467 	mlxsw_sp_fid_put(fid);
6468 }
6469 
6470 static int mlxsw_sp_inetaddr_port_vlan_event(struct net_device *l3_dev,
6471 					     struct net_device *port_dev,
6472 					     unsigned long event, u16 vid,
6473 					     struct netlink_ext_ack *extack)
6474 {
6475 	struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(port_dev);
6476 	struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan;
6477 
6478 	mlxsw_sp_port_vlan = mlxsw_sp_port_vlan_find_by_vid(mlxsw_sp_port, vid);
6479 	if (WARN_ON(!mlxsw_sp_port_vlan))
6480 		return -EINVAL;
6481 
6482 	switch (event) {
6483 	case NETDEV_UP:
6484 		return mlxsw_sp_port_vlan_router_join(mlxsw_sp_port_vlan,
6485 						      l3_dev, extack);
6486 	case NETDEV_DOWN:
6487 		mlxsw_sp_port_vlan_router_leave(mlxsw_sp_port_vlan);
6488 		break;
6489 	}
6490 
6491 	return 0;
6492 }
6493 
6494 static int mlxsw_sp_inetaddr_port_event(struct net_device *port_dev,
6495 					unsigned long event,
6496 					struct netlink_ext_ack *extack)
6497 {
6498 	if (netif_is_bridge_port(port_dev) ||
6499 	    netif_is_lag_port(port_dev) ||
6500 	    netif_is_ovs_port(port_dev))
6501 		return 0;
6502 
6503 	return mlxsw_sp_inetaddr_port_vlan_event(port_dev, port_dev, event, 1,
6504 						 extack);
6505 }
6506 
6507 static int __mlxsw_sp_inetaddr_lag_event(struct net_device *l3_dev,
6508 					 struct net_device *lag_dev,
6509 					 unsigned long event, u16 vid,
6510 					 struct netlink_ext_ack *extack)
6511 {
6512 	struct net_device *port_dev;
6513 	struct list_head *iter;
6514 	int err;
6515 
6516 	netdev_for_each_lower_dev(lag_dev, port_dev, iter) {
6517 		if (mlxsw_sp_port_dev_check(port_dev)) {
6518 			err = mlxsw_sp_inetaddr_port_vlan_event(l3_dev,
6519 								port_dev,
6520 								event, vid,
6521 								extack);
6522 			if (err)
6523 				return err;
6524 		}
6525 	}
6526 
6527 	return 0;
6528 }
6529 
6530 static int mlxsw_sp_inetaddr_lag_event(struct net_device *lag_dev,
6531 				       unsigned long event,
6532 				       struct netlink_ext_ack *extack)
6533 {
6534 	if (netif_is_bridge_port(lag_dev))
6535 		return 0;
6536 
6537 	return __mlxsw_sp_inetaddr_lag_event(lag_dev, lag_dev, event, 1,
6538 					     extack);
6539 }
6540 
6541 static int mlxsw_sp_inetaddr_bridge_event(struct net_device *l3_dev,
6542 					  unsigned long event,
6543 					  struct netlink_ext_ack *extack)
6544 {
6545 	struct mlxsw_sp *mlxsw_sp = mlxsw_sp_lower_get(l3_dev);
6546 	struct mlxsw_sp_rif_params params = {
6547 		.dev = l3_dev,
6548 	};
6549 	struct mlxsw_sp_rif *rif;
6550 
6551 	switch (event) {
6552 	case NETDEV_UP:
6553 		rif = mlxsw_sp_rif_create(mlxsw_sp, &params, extack);
6554 		if (IS_ERR(rif))
6555 			return PTR_ERR(rif);
6556 		break;
6557 	case NETDEV_DOWN:
6558 		rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, l3_dev);
6559 		mlxsw_sp_rif_destroy(rif);
6560 		break;
6561 	}
6562 
6563 	return 0;
6564 }
6565 
6566 static int mlxsw_sp_inetaddr_vlan_event(struct net_device *vlan_dev,
6567 					unsigned long event,
6568 					struct netlink_ext_ack *extack)
6569 {
6570 	struct net_device *real_dev = vlan_dev_real_dev(vlan_dev);
6571 	u16 vid = vlan_dev_vlan_id(vlan_dev);
6572 
6573 	if (netif_is_bridge_port(vlan_dev))
6574 		return 0;
6575 
6576 	if (mlxsw_sp_port_dev_check(real_dev))
6577 		return mlxsw_sp_inetaddr_port_vlan_event(vlan_dev, real_dev,
6578 							 event, vid, extack);
6579 	else if (netif_is_lag_master(real_dev))
6580 		return __mlxsw_sp_inetaddr_lag_event(vlan_dev, real_dev, event,
6581 						     vid, extack);
6582 	else if (netif_is_bridge_master(real_dev) && br_vlan_enabled(real_dev))
6583 		return mlxsw_sp_inetaddr_bridge_event(vlan_dev, event, extack);
6584 
6585 	return 0;
6586 }
6587 
6588 static bool mlxsw_sp_rif_macvlan_is_vrrp4(const u8 *mac)
6589 {
6590 	u8 vrrp4[ETH_ALEN] = { 0x00, 0x00, 0x5e, 0x00, 0x01, 0x00 };
6591 	u8 mask[ETH_ALEN] = { 0xff, 0xff, 0xff, 0xff, 0xff, 0x00 };
6592 
6593 	return ether_addr_equal_masked(mac, vrrp4, mask);
6594 }
6595 
6596 static bool mlxsw_sp_rif_macvlan_is_vrrp6(const u8 *mac)
6597 {
6598 	u8 vrrp6[ETH_ALEN] = { 0x00, 0x00, 0x5e, 0x00, 0x02, 0x00 };
6599 	u8 mask[ETH_ALEN] = { 0xff, 0xff, 0xff, 0xff, 0xff, 0x00 };
6600 
6601 	return ether_addr_equal_masked(mac, vrrp6, mask);
6602 }
6603 
6604 static int mlxsw_sp_rif_vrrp_op(struct mlxsw_sp *mlxsw_sp, u16 rif_index,
6605 				const u8 *mac, bool adding)
6606 {
6607 	char ritr_pl[MLXSW_REG_RITR_LEN];
6608 	u8 vrrp_id = adding ? mac[5] : 0;
6609 	int err;
6610 
6611 	if (!mlxsw_sp_rif_macvlan_is_vrrp4(mac) &&
6612 	    !mlxsw_sp_rif_macvlan_is_vrrp6(mac))
6613 		return 0;
6614 
6615 	mlxsw_reg_ritr_rif_pack(ritr_pl, rif_index);
6616 	err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
6617 	if (err)
6618 		return err;
6619 
6620 	if (mlxsw_sp_rif_macvlan_is_vrrp4(mac))
6621 		mlxsw_reg_ritr_if_vrrp_id_ipv4_set(ritr_pl, vrrp_id);
6622 	else
6623 		mlxsw_reg_ritr_if_vrrp_id_ipv6_set(ritr_pl, vrrp_id);
6624 
6625 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
6626 }
6627 
6628 static int mlxsw_sp_rif_macvlan_add(struct mlxsw_sp *mlxsw_sp,
6629 				    const struct net_device *macvlan_dev,
6630 				    struct netlink_ext_ack *extack)
6631 {
6632 	struct macvlan_dev *vlan = netdev_priv(macvlan_dev);
6633 	struct mlxsw_sp_rif *rif;
6634 	int err;
6635 
6636 	rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, vlan->lowerdev);
6637 	if (!rif) {
6638 		NL_SET_ERR_MSG_MOD(extack, "macvlan is only supported on top of router interfaces");
6639 		return -EOPNOTSUPP;
6640 	}
6641 
6642 	err = mlxsw_sp_rif_fdb_op(mlxsw_sp, macvlan_dev->dev_addr,
6643 				  mlxsw_sp_fid_index(rif->fid), true);
6644 	if (err)
6645 		return err;
6646 
6647 	err = mlxsw_sp_rif_vrrp_op(mlxsw_sp, rif->rif_index,
6648 				   macvlan_dev->dev_addr, true);
6649 	if (err)
6650 		goto err_rif_vrrp_add;
6651 
6652 	/* Make sure the bridge driver does not have this MAC pointing at
6653 	 * some other port.
6654 	 */
6655 	if (rif->ops->fdb_del)
6656 		rif->ops->fdb_del(rif, macvlan_dev->dev_addr);
6657 
6658 	return 0;
6659 
6660 err_rif_vrrp_add:
6661 	mlxsw_sp_rif_fdb_op(mlxsw_sp, macvlan_dev->dev_addr,
6662 			    mlxsw_sp_fid_index(rif->fid), false);
6663 	return err;
6664 }
6665 
6666 void mlxsw_sp_rif_macvlan_del(struct mlxsw_sp *mlxsw_sp,
6667 			      const struct net_device *macvlan_dev)
6668 {
6669 	struct macvlan_dev *vlan = netdev_priv(macvlan_dev);
6670 	struct mlxsw_sp_rif *rif;
6671 
6672 	rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, vlan->lowerdev);
6673 	/* If we do not have a RIF, then we already took care of
6674 	 * removing the macvlan's MAC during RIF deletion.
6675 	 */
6676 	if (!rif)
6677 		return;
6678 	mlxsw_sp_rif_vrrp_op(mlxsw_sp, rif->rif_index, macvlan_dev->dev_addr,
6679 			     false);
6680 	mlxsw_sp_rif_fdb_op(mlxsw_sp, macvlan_dev->dev_addr,
6681 			    mlxsw_sp_fid_index(rif->fid), false);
6682 }
6683 
6684 static int mlxsw_sp_inetaddr_macvlan_event(struct net_device *macvlan_dev,
6685 					   unsigned long event,
6686 					   struct netlink_ext_ack *extack)
6687 {
6688 	struct mlxsw_sp *mlxsw_sp;
6689 
6690 	mlxsw_sp = mlxsw_sp_lower_get(macvlan_dev);
6691 	if (!mlxsw_sp)
6692 		return 0;
6693 
6694 	switch (event) {
6695 	case NETDEV_UP:
6696 		return mlxsw_sp_rif_macvlan_add(mlxsw_sp, macvlan_dev, extack);
6697 	case NETDEV_DOWN:
6698 		mlxsw_sp_rif_macvlan_del(mlxsw_sp, macvlan_dev);
6699 		break;
6700 	}
6701 
6702 	return 0;
6703 }
6704 
6705 static int __mlxsw_sp_inetaddr_event(struct net_device *dev,
6706 				     unsigned long event,
6707 				     struct netlink_ext_ack *extack)
6708 {
6709 	if (mlxsw_sp_port_dev_check(dev))
6710 		return mlxsw_sp_inetaddr_port_event(dev, event, extack);
6711 	else if (netif_is_lag_master(dev))
6712 		return mlxsw_sp_inetaddr_lag_event(dev, event, extack);
6713 	else if (netif_is_bridge_master(dev))
6714 		return mlxsw_sp_inetaddr_bridge_event(dev, event, extack);
6715 	else if (is_vlan_dev(dev))
6716 		return mlxsw_sp_inetaddr_vlan_event(dev, event, extack);
6717 	else if (netif_is_macvlan(dev))
6718 		return mlxsw_sp_inetaddr_macvlan_event(dev, event, extack);
6719 	else
6720 		return 0;
6721 }
6722 
6723 int mlxsw_sp_inetaddr_event(struct notifier_block *unused,
6724 			    unsigned long event, void *ptr)
6725 {
6726 	struct in_ifaddr *ifa = (struct in_ifaddr *) ptr;
6727 	struct net_device *dev = ifa->ifa_dev->dev;
6728 	struct mlxsw_sp *mlxsw_sp;
6729 	struct mlxsw_sp_rif *rif;
6730 	int err = 0;
6731 
6732 	/* NETDEV_UP event is handled by mlxsw_sp_inetaddr_valid_event */
6733 	if (event == NETDEV_UP)
6734 		goto out;
6735 
6736 	mlxsw_sp = mlxsw_sp_lower_get(dev);
6737 	if (!mlxsw_sp)
6738 		goto out;
6739 
6740 	rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev);
6741 	if (!mlxsw_sp_rif_should_config(rif, dev, event))
6742 		goto out;
6743 
6744 	err = __mlxsw_sp_inetaddr_event(dev, event, NULL);
6745 out:
6746 	return notifier_from_errno(err);
6747 }
6748 
6749 int mlxsw_sp_inetaddr_valid_event(struct notifier_block *unused,
6750 				  unsigned long event, void *ptr)
6751 {
6752 	struct in_validator_info *ivi = (struct in_validator_info *) ptr;
6753 	struct net_device *dev = ivi->ivi_dev->dev;
6754 	struct mlxsw_sp *mlxsw_sp;
6755 	struct mlxsw_sp_rif *rif;
6756 	int err = 0;
6757 
6758 	mlxsw_sp = mlxsw_sp_lower_get(dev);
6759 	if (!mlxsw_sp)
6760 		goto out;
6761 
6762 	rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev);
6763 	if (!mlxsw_sp_rif_should_config(rif, dev, event))
6764 		goto out;
6765 
6766 	err = __mlxsw_sp_inetaddr_event(dev, event, ivi->extack);
6767 out:
6768 	return notifier_from_errno(err);
6769 }
6770 
6771 struct mlxsw_sp_inet6addr_event_work {
6772 	struct work_struct work;
6773 	struct net_device *dev;
6774 	unsigned long event;
6775 };
6776 
6777 static void mlxsw_sp_inet6addr_event_work(struct work_struct *work)
6778 {
6779 	struct mlxsw_sp_inet6addr_event_work *inet6addr_work =
6780 		container_of(work, struct mlxsw_sp_inet6addr_event_work, work);
6781 	struct net_device *dev = inet6addr_work->dev;
6782 	unsigned long event = inet6addr_work->event;
6783 	struct mlxsw_sp *mlxsw_sp;
6784 	struct mlxsw_sp_rif *rif;
6785 
6786 	rtnl_lock();
6787 	mlxsw_sp = mlxsw_sp_lower_get(dev);
6788 	if (!mlxsw_sp)
6789 		goto out;
6790 
6791 	rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev);
6792 	if (!mlxsw_sp_rif_should_config(rif, dev, event))
6793 		goto out;
6794 
6795 	__mlxsw_sp_inetaddr_event(dev, event, NULL);
6796 out:
6797 	rtnl_unlock();
6798 	dev_put(dev);
6799 	kfree(inet6addr_work);
6800 }
6801 
6802 /* Called with rcu_read_lock() */
6803 int mlxsw_sp_inet6addr_event(struct notifier_block *unused,
6804 			     unsigned long event, void *ptr)
6805 {
6806 	struct inet6_ifaddr *if6 = (struct inet6_ifaddr *) ptr;
6807 	struct mlxsw_sp_inet6addr_event_work *inet6addr_work;
6808 	struct net_device *dev = if6->idev->dev;
6809 
6810 	/* NETDEV_UP event is handled by mlxsw_sp_inet6addr_valid_event */
6811 	if (event == NETDEV_UP)
6812 		return NOTIFY_DONE;
6813 
6814 	if (!mlxsw_sp_port_dev_lower_find_rcu(dev))
6815 		return NOTIFY_DONE;
6816 
6817 	inet6addr_work = kzalloc(sizeof(*inet6addr_work), GFP_ATOMIC);
6818 	if (!inet6addr_work)
6819 		return NOTIFY_BAD;
6820 
6821 	INIT_WORK(&inet6addr_work->work, mlxsw_sp_inet6addr_event_work);
6822 	inet6addr_work->dev = dev;
6823 	inet6addr_work->event = event;
6824 	dev_hold(dev);
6825 	mlxsw_core_schedule_work(&inet6addr_work->work);
6826 
6827 	return NOTIFY_DONE;
6828 }
6829 
6830 int mlxsw_sp_inet6addr_valid_event(struct notifier_block *unused,
6831 				   unsigned long event, void *ptr)
6832 {
6833 	struct in6_validator_info *i6vi = (struct in6_validator_info *) ptr;
6834 	struct net_device *dev = i6vi->i6vi_dev->dev;
6835 	struct mlxsw_sp *mlxsw_sp;
6836 	struct mlxsw_sp_rif *rif;
6837 	int err = 0;
6838 
6839 	mlxsw_sp = mlxsw_sp_lower_get(dev);
6840 	if (!mlxsw_sp)
6841 		goto out;
6842 
6843 	rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev);
6844 	if (!mlxsw_sp_rif_should_config(rif, dev, event))
6845 		goto out;
6846 
6847 	err = __mlxsw_sp_inetaddr_event(dev, event, i6vi->extack);
6848 out:
6849 	return notifier_from_errno(err);
6850 }
6851 
6852 static int mlxsw_sp_rif_edit(struct mlxsw_sp *mlxsw_sp, u16 rif_index,
6853 			     const char *mac, int mtu)
6854 {
6855 	char ritr_pl[MLXSW_REG_RITR_LEN];
6856 	int err;
6857 
6858 	mlxsw_reg_ritr_rif_pack(ritr_pl, rif_index);
6859 	err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
6860 	if (err)
6861 		return err;
6862 
6863 	mlxsw_reg_ritr_mtu_set(ritr_pl, mtu);
6864 	mlxsw_reg_ritr_if_mac_memcpy_to(ritr_pl, mac);
6865 	mlxsw_reg_ritr_op_set(ritr_pl, MLXSW_REG_RITR_RIF_CREATE);
6866 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
6867 }
6868 
6869 int mlxsw_sp_netdevice_router_port_event(struct net_device *dev)
6870 {
6871 	struct mlxsw_sp *mlxsw_sp;
6872 	struct mlxsw_sp_rif *rif;
6873 	u16 fid_index;
6874 	int err;
6875 
6876 	mlxsw_sp = mlxsw_sp_lower_get(dev);
6877 	if (!mlxsw_sp)
6878 		return 0;
6879 
6880 	rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev);
6881 	if (!rif)
6882 		return 0;
6883 	fid_index = mlxsw_sp_fid_index(rif->fid);
6884 
6885 	err = mlxsw_sp_rif_fdb_op(mlxsw_sp, rif->addr, fid_index, false);
6886 	if (err)
6887 		return err;
6888 
6889 	err = mlxsw_sp_rif_edit(mlxsw_sp, rif->rif_index, dev->dev_addr,
6890 				dev->mtu);
6891 	if (err)
6892 		goto err_rif_edit;
6893 
6894 	err = mlxsw_sp_rif_fdb_op(mlxsw_sp, dev->dev_addr, fid_index, true);
6895 	if (err)
6896 		goto err_rif_fdb_op;
6897 
6898 	if (rif->mtu != dev->mtu) {
6899 		struct mlxsw_sp_vr *vr;
6900 		int i;
6901 
6902 		/* The RIF is relevant only to its mr_table instance, as unlike
6903 		 * unicast routing, in multicast routing a RIF cannot be shared
6904 		 * between several multicast routing tables.
6905 		 */
6906 		vr = &mlxsw_sp->router->vrs[rif->vr_id];
6907 		for (i = 0; i < MLXSW_SP_L3_PROTO_MAX; i++)
6908 			mlxsw_sp_mr_rif_mtu_update(vr->mr_table[i],
6909 						   rif, dev->mtu);
6910 	}
6911 
6912 	ether_addr_copy(rif->addr, dev->dev_addr);
6913 	rif->mtu = dev->mtu;
6914 
6915 	netdev_dbg(dev, "Updated RIF=%d\n", rif->rif_index);
6916 
6917 	return 0;
6918 
6919 err_rif_fdb_op:
6920 	mlxsw_sp_rif_edit(mlxsw_sp, rif->rif_index, rif->addr, rif->mtu);
6921 err_rif_edit:
6922 	mlxsw_sp_rif_fdb_op(mlxsw_sp, rif->addr, fid_index, true);
6923 	return err;
6924 }
6925 
6926 static int mlxsw_sp_port_vrf_join(struct mlxsw_sp *mlxsw_sp,
6927 				  struct net_device *l3_dev,
6928 				  struct netlink_ext_ack *extack)
6929 {
6930 	struct mlxsw_sp_rif *rif;
6931 
6932 	/* If netdev is already associated with a RIF, then we need to
6933 	 * destroy it and create a new one with the new virtual router ID.
6934 	 */
6935 	rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, l3_dev);
6936 	if (rif)
6937 		__mlxsw_sp_inetaddr_event(l3_dev, NETDEV_DOWN, extack);
6938 
6939 	return __mlxsw_sp_inetaddr_event(l3_dev, NETDEV_UP, extack);
6940 }
6941 
6942 static void mlxsw_sp_port_vrf_leave(struct mlxsw_sp *mlxsw_sp,
6943 				    struct net_device *l3_dev)
6944 {
6945 	struct mlxsw_sp_rif *rif;
6946 
6947 	rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, l3_dev);
6948 	if (!rif)
6949 		return;
6950 	__mlxsw_sp_inetaddr_event(l3_dev, NETDEV_DOWN, NULL);
6951 }
6952 
6953 int mlxsw_sp_netdevice_vrf_event(struct net_device *l3_dev, unsigned long event,
6954 				 struct netdev_notifier_changeupper_info *info)
6955 {
6956 	struct mlxsw_sp *mlxsw_sp = mlxsw_sp_lower_get(l3_dev);
6957 	int err = 0;
6958 
6959 	/* We do not create a RIF for a macvlan, but only use it to
6960 	 * direct more MAC addresses to the router.
6961 	 */
6962 	if (!mlxsw_sp || netif_is_macvlan(l3_dev))
6963 		return 0;
6964 
6965 	switch (event) {
6966 	case NETDEV_PRECHANGEUPPER:
6967 		return 0;
6968 	case NETDEV_CHANGEUPPER:
6969 		if (info->linking) {
6970 			struct netlink_ext_ack *extack;
6971 
6972 			extack = netdev_notifier_info_to_extack(&info->info);
6973 			err = mlxsw_sp_port_vrf_join(mlxsw_sp, l3_dev, extack);
6974 		} else {
6975 			mlxsw_sp_port_vrf_leave(mlxsw_sp, l3_dev);
6976 		}
6977 		break;
6978 	}
6979 
6980 	return err;
6981 }
6982 
6983 static int __mlxsw_sp_rif_macvlan_flush(struct net_device *dev, void *data)
6984 {
6985 	struct mlxsw_sp_rif *rif = data;
6986 
6987 	if (!netif_is_macvlan(dev))
6988 		return 0;
6989 
6990 	return mlxsw_sp_rif_fdb_op(rif->mlxsw_sp, dev->dev_addr,
6991 				   mlxsw_sp_fid_index(rif->fid), false);
6992 }
6993 
6994 static int mlxsw_sp_rif_macvlan_flush(struct mlxsw_sp_rif *rif)
6995 {
6996 	if (!netif_is_macvlan_port(rif->dev))
6997 		return 0;
6998 
6999 	netdev_warn(rif->dev, "Router interface is deleted. Upper macvlans will not work\n");
7000 	return netdev_walk_all_upper_dev_rcu(rif->dev,
7001 					     __mlxsw_sp_rif_macvlan_flush, rif);
7002 }
7003 
7004 static struct mlxsw_sp_rif_subport *
7005 mlxsw_sp_rif_subport_rif(const struct mlxsw_sp_rif *rif)
7006 {
7007 	return container_of(rif, struct mlxsw_sp_rif_subport, common);
7008 }
7009 
7010 static void mlxsw_sp_rif_subport_setup(struct mlxsw_sp_rif *rif,
7011 				       const struct mlxsw_sp_rif_params *params)
7012 {
7013 	struct mlxsw_sp_rif_subport *rif_subport;
7014 
7015 	rif_subport = mlxsw_sp_rif_subport_rif(rif);
7016 	rif_subport->vid = params->vid;
7017 	rif_subport->lag = params->lag;
7018 	if (params->lag)
7019 		rif_subport->lag_id = params->lag_id;
7020 	else
7021 		rif_subport->system_port = params->system_port;
7022 }
7023 
7024 static int mlxsw_sp_rif_subport_op(struct mlxsw_sp_rif *rif, bool enable)
7025 {
7026 	struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
7027 	struct mlxsw_sp_rif_subport *rif_subport;
7028 	char ritr_pl[MLXSW_REG_RITR_LEN];
7029 
7030 	rif_subport = mlxsw_sp_rif_subport_rif(rif);
7031 	mlxsw_reg_ritr_pack(ritr_pl, enable, MLXSW_REG_RITR_SP_IF,
7032 			    rif->rif_index, rif->vr_id, rif->dev->mtu);
7033 	mlxsw_reg_ritr_mac_pack(ritr_pl, rif->dev->dev_addr);
7034 	mlxsw_reg_ritr_sp_if_pack(ritr_pl, rif_subport->lag,
7035 				  rif_subport->lag ? rif_subport->lag_id :
7036 						     rif_subport->system_port,
7037 				  rif_subport->vid);
7038 
7039 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
7040 }
7041 
7042 static int mlxsw_sp_rif_subport_configure(struct mlxsw_sp_rif *rif)
7043 {
7044 	int err;
7045 
7046 	err = mlxsw_sp_rif_subport_op(rif, true);
7047 	if (err)
7048 		return err;
7049 
7050 	err = mlxsw_sp_rif_fdb_op(rif->mlxsw_sp, rif->dev->dev_addr,
7051 				  mlxsw_sp_fid_index(rif->fid), true);
7052 	if (err)
7053 		goto err_rif_fdb_op;
7054 
7055 	mlxsw_sp_fid_rif_set(rif->fid, rif);
7056 	return 0;
7057 
7058 err_rif_fdb_op:
7059 	mlxsw_sp_rif_subport_op(rif, false);
7060 	return err;
7061 }
7062 
7063 static void mlxsw_sp_rif_subport_deconfigure(struct mlxsw_sp_rif *rif)
7064 {
7065 	struct mlxsw_sp_fid *fid = rif->fid;
7066 
7067 	mlxsw_sp_fid_rif_set(fid, NULL);
7068 	mlxsw_sp_rif_fdb_op(rif->mlxsw_sp, rif->dev->dev_addr,
7069 			    mlxsw_sp_fid_index(fid), false);
7070 	mlxsw_sp_rif_macvlan_flush(rif);
7071 	mlxsw_sp_rif_subport_op(rif, false);
7072 }
7073 
7074 static struct mlxsw_sp_fid *
7075 mlxsw_sp_rif_subport_fid_get(struct mlxsw_sp_rif *rif,
7076 			     struct netlink_ext_ack *extack)
7077 {
7078 	return mlxsw_sp_fid_rfid_get(rif->mlxsw_sp, rif->rif_index);
7079 }
7080 
7081 static const struct mlxsw_sp_rif_ops mlxsw_sp_rif_subport_ops = {
7082 	.type			= MLXSW_SP_RIF_TYPE_SUBPORT,
7083 	.rif_size		= sizeof(struct mlxsw_sp_rif_subport),
7084 	.setup			= mlxsw_sp_rif_subport_setup,
7085 	.configure		= mlxsw_sp_rif_subport_configure,
7086 	.deconfigure		= mlxsw_sp_rif_subport_deconfigure,
7087 	.fid_get		= mlxsw_sp_rif_subport_fid_get,
7088 };
7089 
7090 static int mlxsw_sp_rif_vlan_fid_op(struct mlxsw_sp_rif *rif,
7091 				    enum mlxsw_reg_ritr_if_type type,
7092 				    u16 vid_fid, bool enable)
7093 {
7094 	struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
7095 	char ritr_pl[MLXSW_REG_RITR_LEN];
7096 
7097 	mlxsw_reg_ritr_pack(ritr_pl, enable, type, rif->rif_index, rif->vr_id,
7098 			    rif->dev->mtu);
7099 	mlxsw_reg_ritr_mac_pack(ritr_pl, rif->dev->dev_addr);
7100 	mlxsw_reg_ritr_fid_set(ritr_pl, type, vid_fid);
7101 
7102 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
7103 }
7104 
7105 u8 mlxsw_sp_router_port(const struct mlxsw_sp *mlxsw_sp)
7106 {
7107 	return mlxsw_core_max_ports(mlxsw_sp->core) + 1;
7108 }
7109 
7110 static int mlxsw_sp_rif_vlan_configure(struct mlxsw_sp_rif *rif)
7111 {
7112 	struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
7113 	u16 vid = mlxsw_sp_fid_8021q_vid(rif->fid);
7114 	int err;
7115 
7116 	err = mlxsw_sp_rif_vlan_fid_op(rif, MLXSW_REG_RITR_VLAN_IF, vid, true);
7117 	if (err)
7118 		return err;
7119 
7120 	err = mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_MC,
7121 				     mlxsw_sp_router_port(mlxsw_sp), true);
7122 	if (err)
7123 		goto err_fid_mc_flood_set;
7124 
7125 	err = mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_BC,
7126 				     mlxsw_sp_router_port(mlxsw_sp), true);
7127 	if (err)
7128 		goto err_fid_bc_flood_set;
7129 
7130 	err = mlxsw_sp_rif_fdb_op(rif->mlxsw_sp, rif->dev->dev_addr,
7131 				  mlxsw_sp_fid_index(rif->fid), true);
7132 	if (err)
7133 		goto err_rif_fdb_op;
7134 
7135 	mlxsw_sp_fid_rif_set(rif->fid, rif);
7136 	return 0;
7137 
7138 err_rif_fdb_op:
7139 	mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_BC,
7140 			       mlxsw_sp_router_port(mlxsw_sp), false);
7141 err_fid_bc_flood_set:
7142 	mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_MC,
7143 			       mlxsw_sp_router_port(mlxsw_sp), false);
7144 err_fid_mc_flood_set:
7145 	mlxsw_sp_rif_vlan_fid_op(rif, MLXSW_REG_RITR_VLAN_IF, vid, false);
7146 	return err;
7147 }
7148 
7149 static void mlxsw_sp_rif_vlan_deconfigure(struct mlxsw_sp_rif *rif)
7150 {
7151 	u16 vid = mlxsw_sp_fid_8021q_vid(rif->fid);
7152 	struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
7153 	struct mlxsw_sp_fid *fid = rif->fid;
7154 
7155 	mlxsw_sp_fid_rif_set(fid, NULL);
7156 	mlxsw_sp_rif_fdb_op(rif->mlxsw_sp, rif->dev->dev_addr,
7157 			    mlxsw_sp_fid_index(fid), false);
7158 	mlxsw_sp_rif_macvlan_flush(rif);
7159 	mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_BC,
7160 			       mlxsw_sp_router_port(mlxsw_sp), false);
7161 	mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_MC,
7162 			       mlxsw_sp_router_port(mlxsw_sp), false);
7163 	mlxsw_sp_rif_vlan_fid_op(rif, MLXSW_REG_RITR_VLAN_IF, vid, false);
7164 }
7165 
7166 static struct mlxsw_sp_fid *
7167 mlxsw_sp_rif_vlan_fid_get(struct mlxsw_sp_rif *rif,
7168 			  struct netlink_ext_ack *extack)
7169 {
7170 	u16 vid;
7171 	int err;
7172 
7173 	if (is_vlan_dev(rif->dev)) {
7174 		vid = vlan_dev_vlan_id(rif->dev);
7175 	} else {
7176 		err = br_vlan_get_pvid(rif->dev, &vid);
7177 		if (err < 0 || !vid) {
7178 			NL_SET_ERR_MSG_MOD(extack, "Couldn't determine bridge PVID");
7179 			return ERR_PTR(-EINVAL);
7180 		}
7181 	}
7182 
7183 	return mlxsw_sp_fid_8021q_get(rif->mlxsw_sp, vid);
7184 }
7185 
7186 static void mlxsw_sp_rif_vlan_fdb_del(struct mlxsw_sp_rif *rif, const char *mac)
7187 {
7188 	u16 vid = mlxsw_sp_fid_8021q_vid(rif->fid);
7189 	struct switchdev_notifier_fdb_info info;
7190 	struct net_device *br_dev;
7191 	struct net_device *dev;
7192 
7193 	br_dev = is_vlan_dev(rif->dev) ? vlan_dev_real_dev(rif->dev) : rif->dev;
7194 	dev = br_fdb_find_port(br_dev, mac, vid);
7195 	if (!dev)
7196 		return;
7197 
7198 	info.addr = mac;
7199 	info.vid = vid;
7200 	call_switchdev_notifiers(SWITCHDEV_FDB_DEL_TO_BRIDGE, dev, &info.info);
7201 }
7202 
7203 static const struct mlxsw_sp_rif_ops mlxsw_sp_rif_vlan_ops = {
7204 	.type			= MLXSW_SP_RIF_TYPE_VLAN,
7205 	.rif_size		= sizeof(struct mlxsw_sp_rif),
7206 	.configure		= mlxsw_sp_rif_vlan_configure,
7207 	.deconfigure		= mlxsw_sp_rif_vlan_deconfigure,
7208 	.fid_get		= mlxsw_sp_rif_vlan_fid_get,
7209 	.fdb_del		= mlxsw_sp_rif_vlan_fdb_del,
7210 };
7211 
7212 static int mlxsw_sp_rif_fid_configure(struct mlxsw_sp_rif *rif)
7213 {
7214 	struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
7215 	u16 fid_index = mlxsw_sp_fid_index(rif->fid);
7216 	int err;
7217 
7218 	err = mlxsw_sp_rif_vlan_fid_op(rif, MLXSW_REG_RITR_FID_IF, fid_index,
7219 				       true);
7220 	if (err)
7221 		return err;
7222 
7223 	err = mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_MC,
7224 				     mlxsw_sp_router_port(mlxsw_sp), true);
7225 	if (err)
7226 		goto err_fid_mc_flood_set;
7227 
7228 	err = mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_BC,
7229 				     mlxsw_sp_router_port(mlxsw_sp), true);
7230 	if (err)
7231 		goto err_fid_bc_flood_set;
7232 
7233 	err = mlxsw_sp_rif_fdb_op(rif->mlxsw_sp, rif->dev->dev_addr,
7234 				  mlxsw_sp_fid_index(rif->fid), true);
7235 	if (err)
7236 		goto err_rif_fdb_op;
7237 
7238 	mlxsw_sp_fid_rif_set(rif->fid, rif);
7239 	return 0;
7240 
7241 err_rif_fdb_op:
7242 	mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_BC,
7243 			       mlxsw_sp_router_port(mlxsw_sp), false);
7244 err_fid_bc_flood_set:
7245 	mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_MC,
7246 			       mlxsw_sp_router_port(mlxsw_sp), false);
7247 err_fid_mc_flood_set:
7248 	mlxsw_sp_rif_vlan_fid_op(rif, MLXSW_REG_RITR_FID_IF, fid_index, false);
7249 	return err;
7250 }
7251 
7252 static void mlxsw_sp_rif_fid_deconfigure(struct mlxsw_sp_rif *rif)
7253 {
7254 	u16 fid_index = mlxsw_sp_fid_index(rif->fid);
7255 	struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
7256 	struct mlxsw_sp_fid *fid = rif->fid;
7257 
7258 	mlxsw_sp_fid_rif_set(fid, NULL);
7259 	mlxsw_sp_rif_fdb_op(rif->mlxsw_sp, rif->dev->dev_addr,
7260 			    mlxsw_sp_fid_index(fid), false);
7261 	mlxsw_sp_rif_macvlan_flush(rif);
7262 	mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_BC,
7263 			       mlxsw_sp_router_port(mlxsw_sp), false);
7264 	mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_MC,
7265 			       mlxsw_sp_router_port(mlxsw_sp), false);
7266 	mlxsw_sp_rif_vlan_fid_op(rif, MLXSW_REG_RITR_FID_IF, fid_index, false);
7267 }
7268 
7269 static struct mlxsw_sp_fid *
7270 mlxsw_sp_rif_fid_fid_get(struct mlxsw_sp_rif *rif,
7271 			 struct netlink_ext_ack *extack)
7272 {
7273 	return mlxsw_sp_fid_8021d_get(rif->mlxsw_sp, rif->dev->ifindex);
7274 }
7275 
7276 static void mlxsw_sp_rif_fid_fdb_del(struct mlxsw_sp_rif *rif, const char *mac)
7277 {
7278 	struct switchdev_notifier_fdb_info info;
7279 	struct net_device *dev;
7280 
7281 	dev = br_fdb_find_port(rif->dev, mac, 0);
7282 	if (!dev)
7283 		return;
7284 
7285 	info.addr = mac;
7286 	info.vid = 0;
7287 	call_switchdev_notifiers(SWITCHDEV_FDB_DEL_TO_BRIDGE, dev, &info.info);
7288 }
7289 
7290 static const struct mlxsw_sp_rif_ops mlxsw_sp_rif_fid_ops = {
7291 	.type			= MLXSW_SP_RIF_TYPE_FID,
7292 	.rif_size		= sizeof(struct mlxsw_sp_rif),
7293 	.configure		= mlxsw_sp_rif_fid_configure,
7294 	.deconfigure		= mlxsw_sp_rif_fid_deconfigure,
7295 	.fid_get		= mlxsw_sp_rif_fid_fid_get,
7296 	.fdb_del		= mlxsw_sp_rif_fid_fdb_del,
7297 };
7298 
7299 static struct mlxsw_sp_rif_ipip_lb *
7300 mlxsw_sp_rif_ipip_lb_rif(struct mlxsw_sp_rif *rif)
7301 {
7302 	return container_of(rif, struct mlxsw_sp_rif_ipip_lb, common);
7303 }
7304 
7305 static void
7306 mlxsw_sp_rif_ipip_lb_setup(struct mlxsw_sp_rif *rif,
7307 			   const struct mlxsw_sp_rif_params *params)
7308 {
7309 	struct mlxsw_sp_rif_params_ipip_lb *params_lb;
7310 	struct mlxsw_sp_rif_ipip_lb *rif_lb;
7311 
7312 	params_lb = container_of(params, struct mlxsw_sp_rif_params_ipip_lb,
7313 				 common);
7314 	rif_lb = mlxsw_sp_rif_ipip_lb_rif(rif);
7315 	rif_lb->lb_config = params_lb->lb_config;
7316 }
7317 
7318 static int
7319 mlxsw_sp_rif_ipip_lb_configure(struct mlxsw_sp_rif *rif)
7320 {
7321 	struct mlxsw_sp_rif_ipip_lb *lb_rif = mlxsw_sp_rif_ipip_lb_rif(rif);
7322 	u32 ul_tb_id = mlxsw_sp_ipip_dev_ul_tb_id(rif->dev);
7323 	struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
7324 	struct mlxsw_sp_vr *ul_vr;
7325 	int err;
7326 
7327 	ul_vr = mlxsw_sp_vr_get(mlxsw_sp, ul_tb_id, NULL);
7328 	if (IS_ERR(ul_vr))
7329 		return PTR_ERR(ul_vr);
7330 
7331 	err = mlxsw_sp_rif_ipip_lb_op(lb_rif, ul_vr, true);
7332 	if (err)
7333 		goto err_loopback_op;
7334 
7335 	lb_rif->ul_vr_id = ul_vr->id;
7336 	++ul_vr->rif_count;
7337 	return 0;
7338 
7339 err_loopback_op:
7340 	mlxsw_sp_vr_put(mlxsw_sp, ul_vr);
7341 	return err;
7342 }
7343 
7344 static void mlxsw_sp_rif_ipip_lb_deconfigure(struct mlxsw_sp_rif *rif)
7345 {
7346 	struct mlxsw_sp_rif_ipip_lb *lb_rif = mlxsw_sp_rif_ipip_lb_rif(rif);
7347 	struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
7348 	struct mlxsw_sp_vr *ul_vr;
7349 
7350 	ul_vr = &mlxsw_sp->router->vrs[lb_rif->ul_vr_id];
7351 	mlxsw_sp_rif_ipip_lb_op(lb_rif, ul_vr, false);
7352 
7353 	--ul_vr->rif_count;
7354 	mlxsw_sp_vr_put(mlxsw_sp, ul_vr);
7355 }
7356 
7357 static const struct mlxsw_sp_rif_ops mlxsw_sp_rif_ipip_lb_ops = {
7358 	.type			= MLXSW_SP_RIF_TYPE_IPIP_LB,
7359 	.rif_size		= sizeof(struct mlxsw_sp_rif_ipip_lb),
7360 	.setup                  = mlxsw_sp_rif_ipip_lb_setup,
7361 	.configure		= mlxsw_sp_rif_ipip_lb_configure,
7362 	.deconfigure		= mlxsw_sp_rif_ipip_lb_deconfigure,
7363 };
7364 
7365 static const struct mlxsw_sp_rif_ops *mlxsw_sp_rif_ops_arr[] = {
7366 	[MLXSW_SP_RIF_TYPE_SUBPORT]	= &mlxsw_sp_rif_subport_ops,
7367 	[MLXSW_SP_RIF_TYPE_VLAN]	= &mlxsw_sp_rif_vlan_ops,
7368 	[MLXSW_SP_RIF_TYPE_FID]		= &mlxsw_sp_rif_fid_ops,
7369 	[MLXSW_SP_RIF_TYPE_IPIP_LB]	= &mlxsw_sp_rif_ipip_lb_ops,
7370 };
7371 
7372 static int mlxsw_sp_rifs_init(struct mlxsw_sp *mlxsw_sp)
7373 {
7374 	u64 max_rifs = MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_RIFS);
7375 
7376 	mlxsw_sp->router->rifs = kcalloc(max_rifs,
7377 					 sizeof(struct mlxsw_sp_rif *),
7378 					 GFP_KERNEL);
7379 	if (!mlxsw_sp->router->rifs)
7380 		return -ENOMEM;
7381 
7382 	mlxsw_sp->router->rif_ops_arr = mlxsw_sp_rif_ops_arr;
7383 
7384 	return 0;
7385 }
7386 
7387 static void mlxsw_sp_rifs_fini(struct mlxsw_sp *mlxsw_sp)
7388 {
7389 	int i;
7390 
7391 	for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_RIFS); i++)
7392 		WARN_ON_ONCE(mlxsw_sp->router->rifs[i]);
7393 
7394 	kfree(mlxsw_sp->router->rifs);
7395 }
7396 
7397 static int
7398 mlxsw_sp_ipip_config_tigcr(struct mlxsw_sp *mlxsw_sp)
7399 {
7400 	char tigcr_pl[MLXSW_REG_TIGCR_LEN];
7401 
7402 	mlxsw_reg_tigcr_pack(tigcr_pl, true, 0);
7403 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(tigcr), tigcr_pl);
7404 }
7405 
7406 static int mlxsw_sp_ipips_init(struct mlxsw_sp *mlxsw_sp)
7407 {
7408 	mlxsw_sp->router->ipip_ops_arr = mlxsw_sp_ipip_ops_arr;
7409 	INIT_LIST_HEAD(&mlxsw_sp->router->ipip_list);
7410 	return mlxsw_sp_ipip_config_tigcr(mlxsw_sp);
7411 }
7412 
7413 static void mlxsw_sp_ipips_fini(struct mlxsw_sp *mlxsw_sp)
7414 {
7415 	WARN_ON(!list_empty(&mlxsw_sp->router->ipip_list));
7416 }
7417 
7418 static void mlxsw_sp_router_fib_dump_flush(struct notifier_block *nb)
7419 {
7420 	struct mlxsw_sp_router *router;
7421 
7422 	/* Flush pending FIB notifications and then flush the device's
7423 	 * table before requesting another dump. The FIB notification
7424 	 * block is unregistered, so no need to take RTNL.
7425 	 */
7426 	mlxsw_core_flush_owq();
7427 	router = container_of(nb, struct mlxsw_sp_router, fib_nb);
7428 	mlxsw_sp_router_fib_flush(router->mlxsw_sp);
7429 }
7430 
7431 #ifdef CONFIG_IP_ROUTE_MULTIPATH
7432 static void mlxsw_sp_mp_hash_header_set(char *recr2_pl, int header)
7433 {
7434 	mlxsw_reg_recr2_outer_header_enables_set(recr2_pl, header, true);
7435 }
7436 
7437 static void mlxsw_sp_mp_hash_field_set(char *recr2_pl, int field)
7438 {
7439 	mlxsw_reg_recr2_outer_header_fields_enable_set(recr2_pl, field, true);
7440 }
7441 
7442 static void mlxsw_sp_mp4_hash_init(char *recr2_pl)
7443 {
7444 	bool only_l3 = !init_net.ipv4.sysctl_fib_multipath_hash_policy;
7445 
7446 	mlxsw_sp_mp_hash_header_set(recr2_pl,
7447 				    MLXSW_REG_RECR2_IPV4_EN_NOT_TCP_NOT_UDP);
7448 	mlxsw_sp_mp_hash_header_set(recr2_pl, MLXSW_REG_RECR2_IPV4_EN_TCP_UDP);
7449 	mlxsw_reg_recr2_ipv4_sip_enable(recr2_pl);
7450 	mlxsw_reg_recr2_ipv4_dip_enable(recr2_pl);
7451 	if (only_l3)
7452 		return;
7453 	mlxsw_sp_mp_hash_header_set(recr2_pl, MLXSW_REG_RECR2_TCP_UDP_EN_IPV4);
7454 	mlxsw_sp_mp_hash_field_set(recr2_pl, MLXSW_REG_RECR2_IPV4_PROTOCOL);
7455 	mlxsw_sp_mp_hash_field_set(recr2_pl, MLXSW_REG_RECR2_TCP_UDP_SPORT);
7456 	mlxsw_sp_mp_hash_field_set(recr2_pl, MLXSW_REG_RECR2_TCP_UDP_DPORT);
7457 }
7458 
7459 static void mlxsw_sp_mp6_hash_init(char *recr2_pl)
7460 {
7461 	bool only_l3 = !ip6_multipath_hash_policy(&init_net);
7462 
7463 	mlxsw_sp_mp_hash_header_set(recr2_pl,
7464 				    MLXSW_REG_RECR2_IPV6_EN_NOT_TCP_NOT_UDP);
7465 	mlxsw_sp_mp_hash_header_set(recr2_pl, MLXSW_REG_RECR2_IPV6_EN_TCP_UDP);
7466 	mlxsw_reg_recr2_ipv6_sip_enable(recr2_pl);
7467 	mlxsw_reg_recr2_ipv6_dip_enable(recr2_pl);
7468 	mlxsw_sp_mp_hash_field_set(recr2_pl, MLXSW_REG_RECR2_IPV6_NEXT_HEADER);
7469 	if (only_l3) {
7470 		mlxsw_sp_mp_hash_field_set(recr2_pl,
7471 					   MLXSW_REG_RECR2_IPV6_FLOW_LABEL);
7472 	} else {
7473 		mlxsw_sp_mp_hash_header_set(recr2_pl,
7474 					    MLXSW_REG_RECR2_TCP_UDP_EN_IPV6);
7475 		mlxsw_sp_mp_hash_field_set(recr2_pl,
7476 					   MLXSW_REG_RECR2_TCP_UDP_SPORT);
7477 		mlxsw_sp_mp_hash_field_set(recr2_pl,
7478 					   MLXSW_REG_RECR2_TCP_UDP_DPORT);
7479 	}
7480 }
7481 
7482 static int mlxsw_sp_mp_hash_init(struct mlxsw_sp *mlxsw_sp)
7483 {
7484 	char recr2_pl[MLXSW_REG_RECR2_LEN];
7485 	u32 seed;
7486 
7487 	get_random_bytes(&seed, sizeof(seed));
7488 	mlxsw_reg_recr2_pack(recr2_pl, seed);
7489 	mlxsw_sp_mp4_hash_init(recr2_pl);
7490 	mlxsw_sp_mp6_hash_init(recr2_pl);
7491 
7492 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(recr2), recr2_pl);
7493 }
7494 #else
7495 static int mlxsw_sp_mp_hash_init(struct mlxsw_sp *mlxsw_sp)
7496 {
7497 	return 0;
7498 }
7499 #endif
7500 
7501 static int mlxsw_sp_dscp_init(struct mlxsw_sp *mlxsw_sp)
7502 {
7503 	char rdpm_pl[MLXSW_REG_RDPM_LEN];
7504 	unsigned int i;
7505 
7506 	MLXSW_REG_ZERO(rdpm, rdpm_pl);
7507 
7508 	/* HW is determining switch priority based on DSCP-bits, but the
7509 	 * kernel is still doing that based on the ToS. Since there's a
7510 	 * mismatch in bits we need to make sure to translate the right
7511 	 * value ToS would observe, skipping the 2 least-significant ECN bits.
7512 	 */
7513 	for (i = 0; i < MLXSW_REG_RDPM_DSCP_ENTRY_REC_MAX_COUNT; i++)
7514 		mlxsw_reg_rdpm_pack(rdpm_pl, i, rt_tos2priority(i << 2));
7515 
7516 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rdpm), rdpm_pl);
7517 }
7518 
7519 static int __mlxsw_sp_router_init(struct mlxsw_sp *mlxsw_sp)
7520 {
7521 	bool usp = init_net.ipv4.sysctl_ip_fwd_update_priority;
7522 	char rgcr_pl[MLXSW_REG_RGCR_LEN];
7523 	u64 max_rifs;
7524 	int err;
7525 
7526 	if (!MLXSW_CORE_RES_VALID(mlxsw_sp->core, MAX_RIFS))
7527 		return -EIO;
7528 	max_rifs = MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_RIFS);
7529 
7530 	mlxsw_reg_rgcr_pack(rgcr_pl, true, true);
7531 	mlxsw_reg_rgcr_max_router_interfaces_set(rgcr_pl, max_rifs);
7532 	mlxsw_reg_rgcr_usp_set(rgcr_pl, usp);
7533 	err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rgcr), rgcr_pl);
7534 	if (err)
7535 		return err;
7536 	return 0;
7537 }
7538 
7539 static void __mlxsw_sp_router_fini(struct mlxsw_sp *mlxsw_sp)
7540 {
7541 	char rgcr_pl[MLXSW_REG_RGCR_LEN];
7542 
7543 	mlxsw_reg_rgcr_pack(rgcr_pl, false, false);
7544 	mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rgcr), rgcr_pl);
7545 }
7546 
7547 int mlxsw_sp_router_init(struct mlxsw_sp *mlxsw_sp)
7548 {
7549 	struct mlxsw_sp_router *router;
7550 	int err;
7551 
7552 	router = kzalloc(sizeof(*mlxsw_sp->router), GFP_KERNEL);
7553 	if (!router)
7554 		return -ENOMEM;
7555 	mlxsw_sp->router = router;
7556 	router->mlxsw_sp = mlxsw_sp;
7557 
7558 	INIT_LIST_HEAD(&mlxsw_sp->router->nexthop_neighs_list);
7559 	err = __mlxsw_sp_router_init(mlxsw_sp);
7560 	if (err)
7561 		goto err_router_init;
7562 
7563 	err = mlxsw_sp_rifs_init(mlxsw_sp);
7564 	if (err)
7565 		goto err_rifs_init;
7566 
7567 	err = mlxsw_sp_ipips_init(mlxsw_sp);
7568 	if (err)
7569 		goto err_ipips_init;
7570 
7571 	err = rhashtable_init(&mlxsw_sp->router->nexthop_ht,
7572 			      &mlxsw_sp_nexthop_ht_params);
7573 	if (err)
7574 		goto err_nexthop_ht_init;
7575 
7576 	err = rhashtable_init(&mlxsw_sp->router->nexthop_group_ht,
7577 			      &mlxsw_sp_nexthop_group_ht_params);
7578 	if (err)
7579 		goto err_nexthop_group_ht_init;
7580 
7581 	INIT_LIST_HEAD(&mlxsw_sp->router->nexthop_list);
7582 	err = mlxsw_sp_lpm_init(mlxsw_sp);
7583 	if (err)
7584 		goto err_lpm_init;
7585 
7586 	err = mlxsw_sp_mr_init(mlxsw_sp, &mlxsw_sp_mr_tcam_ops);
7587 	if (err)
7588 		goto err_mr_init;
7589 
7590 	err = mlxsw_sp_vrs_init(mlxsw_sp);
7591 	if (err)
7592 		goto err_vrs_init;
7593 
7594 	err = mlxsw_sp_neigh_init(mlxsw_sp);
7595 	if (err)
7596 		goto err_neigh_init;
7597 
7598 	mlxsw_sp->router->netevent_nb.notifier_call =
7599 		mlxsw_sp_router_netevent_event;
7600 	err = register_netevent_notifier(&mlxsw_sp->router->netevent_nb);
7601 	if (err)
7602 		goto err_register_netevent_notifier;
7603 
7604 	err = mlxsw_sp_mp_hash_init(mlxsw_sp);
7605 	if (err)
7606 		goto err_mp_hash_init;
7607 
7608 	err = mlxsw_sp_dscp_init(mlxsw_sp);
7609 	if (err)
7610 		goto err_dscp_init;
7611 
7612 	mlxsw_sp->router->fib_nb.notifier_call = mlxsw_sp_router_fib_event;
7613 	err = register_fib_notifier(&mlxsw_sp->router->fib_nb,
7614 				    mlxsw_sp_router_fib_dump_flush);
7615 	if (err)
7616 		goto err_register_fib_notifier;
7617 
7618 	return 0;
7619 
7620 err_register_fib_notifier:
7621 err_dscp_init:
7622 err_mp_hash_init:
7623 	unregister_netevent_notifier(&mlxsw_sp->router->netevent_nb);
7624 err_register_netevent_notifier:
7625 	mlxsw_sp_neigh_fini(mlxsw_sp);
7626 err_neigh_init:
7627 	mlxsw_sp_vrs_fini(mlxsw_sp);
7628 err_vrs_init:
7629 	mlxsw_sp_mr_fini(mlxsw_sp);
7630 err_mr_init:
7631 	mlxsw_sp_lpm_fini(mlxsw_sp);
7632 err_lpm_init:
7633 	rhashtable_destroy(&mlxsw_sp->router->nexthop_group_ht);
7634 err_nexthop_group_ht_init:
7635 	rhashtable_destroy(&mlxsw_sp->router->nexthop_ht);
7636 err_nexthop_ht_init:
7637 	mlxsw_sp_ipips_fini(mlxsw_sp);
7638 err_ipips_init:
7639 	mlxsw_sp_rifs_fini(mlxsw_sp);
7640 err_rifs_init:
7641 	__mlxsw_sp_router_fini(mlxsw_sp);
7642 err_router_init:
7643 	kfree(mlxsw_sp->router);
7644 	return err;
7645 }
7646 
7647 void mlxsw_sp_router_fini(struct mlxsw_sp *mlxsw_sp)
7648 {
7649 	unregister_fib_notifier(&mlxsw_sp->router->fib_nb);
7650 	unregister_netevent_notifier(&mlxsw_sp->router->netevent_nb);
7651 	mlxsw_sp_neigh_fini(mlxsw_sp);
7652 	mlxsw_sp_vrs_fini(mlxsw_sp);
7653 	mlxsw_sp_mr_fini(mlxsw_sp);
7654 	mlxsw_sp_lpm_fini(mlxsw_sp);
7655 	rhashtable_destroy(&mlxsw_sp->router->nexthop_group_ht);
7656 	rhashtable_destroy(&mlxsw_sp->router->nexthop_ht);
7657 	mlxsw_sp_ipips_fini(mlxsw_sp);
7658 	mlxsw_sp_rifs_fini(mlxsw_sp);
7659 	__mlxsw_sp_router_fini(mlxsw_sp);
7660 	kfree(mlxsw_sp->router);
7661 }
7662