1 // SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0
2 /* Copyright (c) 2016-2018 Mellanox Technologies. All rights reserved */
3 
4 #include <linux/kernel.h>
5 #include <linux/types.h>
6 #include <linux/rhashtable.h>
7 #include <linux/bitops.h>
8 #include <linux/in6.h>
9 #include <linux/notifier.h>
10 #include <linux/inetdevice.h>
11 #include <linux/netdevice.h>
12 #include <linux/if_bridge.h>
13 #include <linux/socket.h>
14 #include <linux/route.h>
15 #include <linux/gcd.h>
16 #include <linux/random.h>
17 #include <linux/if_macvlan.h>
18 #include <linux/refcount.h>
19 #include <net/netevent.h>
20 #include <net/neighbour.h>
21 #include <net/arp.h>
22 #include <net/ip_fib.h>
23 #include <net/ip6_fib.h>
24 #include <net/fib_rules.h>
25 #include <net/ip_tunnels.h>
26 #include <net/l3mdev.h>
27 #include <net/addrconf.h>
28 #include <net/ndisc.h>
29 #include <net/ipv6.h>
30 #include <net/fib_notifier.h>
31 #include <net/switchdev.h>
32 
33 #include "spectrum.h"
34 #include "core.h"
35 #include "reg.h"
36 #include "spectrum_cnt.h"
37 #include "spectrum_dpipe.h"
38 #include "spectrum_ipip.h"
39 #include "spectrum_mr.h"
40 #include "spectrum_mr_tcam.h"
41 #include "spectrum_router.h"
42 #include "spectrum_span.h"
43 
44 struct mlxsw_sp_fib;
45 struct mlxsw_sp_vr;
46 struct mlxsw_sp_lpm_tree;
47 struct mlxsw_sp_rif_ops;
48 
49 struct mlxsw_sp_router {
50 	struct mlxsw_sp *mlxsw_sp;
51 	struct mlxsw_sp_rif **rifs;
52 	struct mlxsw_sp_vr *vrs;
53 	struct rhashtable neigh_ht;
54 	struct rhashtable nexthop_group_ht;
55 	struct rhashtable nexthop_ht;
56 	struct list_head nexthop_list;
57 	struct {
58 		/* One tree for each protocol: IPv4 and IPv6 */
59 		struct mlxsw_sp_lpm_tree *proto_trees[2];
60 		struct mlxsw_sp_lpm_tree *trees;
61 		unsigned int tree_count;
62 	} lpm;
63 	struct {
64 		struct delayed_work dw;
65 		unsigned long interval;	/* ms */
66 	} neighs_update;
67 	struct delayed_work nexthop_probe_dw;
68 #define MLXSW_SP_UNRESOLVED_NH_PROBE_INTERVAL 5000 /* ms */
69 	struct list_head nexthop_neighs_list;
70 	struct list_head ipip_list;
71 	bool aborted;
72 	struct notifier_block fib_nb;
73 	struct notifier_block netevent_nb;
74 	struct notifier_block inetaddr_nb;
75 	struct notifier_block inet6addr_nb;
76 	const struct mlxsw_sp_rif_ops **rif_ops_arr;
77 	const struct mlxsw_sp_ipip_ops **ipip_ops_arr;
78 };
79 
80 struct mlxsw_sp_rif {
81 	struct list_head nexthop_list;
82 	struct list_head neigh_list;
83 	struct net_device *dev;
84 	struct mlxsw_sp_fid *fid;
85 	unsigned char addr[ETH_ALEN];
86 	int mtu;
87 	u16 rif_index;
88 	u16 vr_id;
89 	const struct mlxsw_sp_rif_ops *ops;
90 	struct mlxsw_sp *mlxsw_sp;
91 
92 	unsigned int counter_ingress;
93 	bool counter_ingress_valid;
94 	unsigned int counter_egress;
95 	bool counter_egress_valid;
96 };
97 
98 struct mlxsw_sp_rif_params {
99 	struct net_device *dev;
100 	union {
101 		u16 system_port;
102 		u16 lag_id;
103 	};
104 	u16 vid;
105 	bool lag;
106 };
107 
108 struct mlxsw_sp_rif_subport {
109 	struct mlxsw_sp_rif common;
110 	refcount_t ref_count;
111 	union {
112 		u16 system_port;
113 		u16 lag_id;
114 	};
115 	u16 vid;
116 	bool lag;
117 };
118 
119 struct mlxsw_sp_rif_ipip_lb {
120 	struct mlxsw_sp_rif common;
121 	struct mlxsw_sp_rif_ipip_lb_config lb_config;
122 	u16 ul_vr_id; /* Reserved for Spectrum-2. */
123 };
124 
125 struct mlxsw_sp_rif_params_ipip_lb {
126 	struct mlxsw_sp_rif_params common;
127 	struct mlxsw_sp_rif_ipip_lb_config lb_config;
128 };
129 
130 struct mlxsw_sp_rif_ops {
131 	enum mlxsw_sp_rif_type type;
132 	size_t rif_size;
133 
134 	void (*setup)(struct mlxsw_sp_rif *rif,
135 		      const struct mlxsw_sp_rif_params *params);
136 	int (*configure)(struct mlxsw_sp_rif *rif);
137 	void (*deconfigure)(struct mlxsw_sp_rif *rif);
138 	struct mlxsw_sp_fid * (*fid_get)(struct mlxsw_sp_rif *rif,
139 					 struct netlink_ext_ack *extack);
140 	void (*fdb_del)(struct mlxsw_sp_rif *rif, const char *mac);
141 };
142 
143 static void mlxsw_sp_rif_destroy(struct mlxsw_sp_rif *rif);
144 static void mlxsw_sp_lpm_tree_hold(struct mlxsw_sp_lpm_tree *lpm_tree);
145 static void mlxsw_sp_lpm_tree_put(struct mlxsw_sp *mlxsw_sp,
146 				  struct mlxsw_sp_lpm_tree *lpm_tree);
147 static int mlxsw_sp_vr_lpm_tree_bind(struct mlxsw_sp *mlxsw_sp,
148 				     const struct mlxsw_sp_fib *fib,
149 				     u8 tree_id);
150 static int mlxsw_sp_vr_lpm_tree_unbind(struct mlxsw_sp *mlxsw_sp,
151 				       const struct mlxsw_sp_fib *fib);
152 
153 static unsigned int *
154 mlxsw_sp_rif_p_counter_get(struct mlxsw_sp_rif *rif,
155 			   enum mlxsw_sp_rif_counter_dir dir)
156 {
157 	switch (dir) {
158 	case MLXSW_SP_RIF_COUNTER_EGRESS:
159 		return &rif->counter_egress;
160 	case MLXSW_SP_RIF_COUNTER_INGRESS:
161 		return &rif->counter_ingress;
162 	}
163 	return NULL;
164 }
165 
166 static bool
167 mlxsw_sp_rif_counter_valid_get(struct mlxsw_sp_rif *rif,
168 			       enum mlxsw_sp_rif_counter_dir dir)
169 {
170 	switch (dir) {
171 	case MLXSW_SP_RIF_COUNTER_EGRESS:
172 		return rif->counter_egress_valid;
173 	case MLXSW_SP_RIF_COUNTER_INGRESS:
174 		return rif->counter_ingress_valid;
175 	}
176 	return false;
177 }
178 
179 static void
180 mlxsw_sp_rif_counter_valid_set(struct mlxsw_sp_rif *rif,
181 			       enum mlxsw_sp_rif_counter_dir dir,
182 			       bool valid)
183 {
184 	switch (dir) {
185 	case MLXSW_SP_RIF_COUNTER_EGRESS:
186 		rif->counter_egress_valid = valid;
187 		break;
188 	case MLXSW_SP_RIF_COUNTER_INGRESS:
189 		rif->counter_ingress_valid = valid;
190 		break;
191 	}
192 }
193 
194 static int mlxsw_sp_rif_counter_edit(struct mlxsw_sp *mlxsw_sp, u16 rif_index,
195 				     unsigned int counter_index, bool enable,
196 				     enum mlxsw_sp_rif_counter_dir dir)
197 {
198 	char ritr_pl[MLXSW_REG_RITR_LEN];
199 	bool is_egress = false;
200 	int err;
201 
202 	if (dir == MLXSW_SP_RIF_COUNTER_EGRESS)
203 		is_egress = true;
204 	mlxsw_reg_ritr_rif_pack(ritr_pl, rif_index);
205 	err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
206 	if (err)
207 		return err;
208 
209 	mlxsw_reg_ritr_counter_pack(ritr_pl, counter_index, enable,
210 				    is_egress);
211 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
212 }
213 
214 int mlxsw_sp_rif_counter_value_get(struct mlxsw_sp *mlxsw_sp,
215 				   struct mlxsw_sp_rif *rif,
216 				   enum mlxsw_sp_rif_counter_dir dir, u64 *cnt)
217 {
218 	char ricnt_pl[MLXSW_REG_RICNT_LEN];
219 	unsigned int *p_counter_index;
220 	bool valid;
221 	int err;
222 
223 	valid = mlxsw_sp_rif_counter_valid_get(rif, dir);
224 	if (!valid)
225 		return -EINVAL;
226 
227 	p_counter_index = mlxsw_sp_rif_p_counter_get(rif, dir);
228 	if (!p_counter_index)
229 		return -EINVAL;
230 	mlxsw_reg_ricnt_pack(ricnt_pl, *p_counter_index,
231 			     MLXSW_REG_RICNT_OPCODE_NOP);
232 	err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(ricnt), ricnt_pl);
233 	if (err)
234 		return err;
235 	*cnt = mlxsw_reg_ricnt_good_unicast_packets_get(ricnt_pl);
236 	return 0;
237 }
238 
239 static int mlxsw_sp_rif_counter_clear(struct mlxsw_sp *mlxsw_sp,
240 				      unsigned int counter_index)
241 {
242 	char ricnt_pl[MLXSW_REG_RICNT_LEN];
243 
244 	mlxsw_reg_ricnt_pack(ricnt_pl, counter_index,
245 			     MLXSW_REG_RICNT_OPCODE_CLEAR);
246 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ricnt), ricnt_pl);
247 }
248 
249 int mlxsw_sp_rif_counter_alloc(struct mlxsw_sp *mlxsw_sp,
250 			       struct mlxsw_sp_rif *rif,
251 			       enum mlxsw_sp_rif_counter_dir dir)
252 {
253 	unsigned int *p_counter_index;
254 	int err;
255 
256 	p_counter_index = mlxsw_sp_rif_p_counter_get(rif, dir);
257 	if (!p_counter_index)
258 		return -EINVAL;
259 	err = mlxsw_sp_counter_alloc(mlxsw_sp, MLXSW_SP_COUNTER_SUB_POOL_RIF,
260 				     p_counter_index);
261 	if (err)
262 		return err;
263 
264 	err = mlxsw_sp_rif_counter_clear(mlxsw_sp, *p_counter_index);
265 	if (err)
266 		goto err_counter_clear;
267 
268 	err = mlxsw_sp_rif_counter_edit(mlxsw_sp, rif->rif_index,
269 					*p_counter_index, true, dir);
270 	if (err)
271 		goto err_counter_edit;
272 	mlxsw_sp_rif_counter_valid_set(rif, dir, true);
273 	return 0;
274 
275 err_counter_edit:
276 err_counter_clear:
277 	mlxsw_sp_counter_free(mlxsw_sp, MLXSW_SP_COUNTER_SUB_POOL_RIF,
278 			      *p_counter_index);
279 	return err;
280 }
281 
282 void mlxsw_sp_rif_counter_free(struct mlxsw_sp *mlxsw_sp,
283 			       struct mlxsw_sp_rif *rif,
284 			       enum mlxsw_sp_rif_counter_dir dir)
285 {
286 	unsigned int *p_counter_index;
287 
288 	if (!mlxsw_sp_rif_counter_valid_get(rif, dir))
289 		return;
290 
291 	p_counter_index = mlxsw_sp_rif_p_counter_get(rif, dir);
292 	if (WARN_ON(!p_counter_index))
293 		return;
294 	mlxsw_sp_rif_counter_edit(mlxsw_sp, rif->rif_index,
295 				  *p_counter_index, false, dir);
296 	mlxsw_sp_counter_free(mlxsw_sp, MLXSW_SP_COUNTER_SUB_POOL_RIF,
297 			      *p_counter_index);
298 	mlxsw_sp_rif_counter_valid_set(rif, dir, false);
299 }
300 
301 static void mlxsw_sp_rif_counters_alloc(struct mlxsw_sp_rif *rif)
302 {
303 	struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
304 	struct devlink *devlink;
305 
306 	devlink = priv_to_devlink(mlxsw_sp->core);
307 	if (!devlink_dpipe_table_counter_enabled(devlink,
308 						 MLXSW_SP_DPIPE_TABLE_NAME_ERIF))
309 		return;
310 	mlxsw_sp_rif_counter_alloc(mlxsw_sp, rif, MLXSW_SP_RIF_COUNTER_EGRESS);
311 }
312 
313 static void mlxsw_sp_rif_counters_free(struct mlxsw_sp_rif *rif)
314 {
315 	struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
316 
317 	mlxsw_sp_rif_counter_free(mlxsw_sp, rif, MLXSW_SP_RIF_COUNTER_EGRESS);
318 }
319 
320 #define MLXSW_SP_PREFIX_COUNT (sizeof(struct in6_addr) * BITS_PER_BYTE + 1)
321 
322 struct mlxsw_sp_prefix_usage {
323 	DECLARE_BITMAP(b, MLXSW_SP_PREFIX_COUNT);
324 };
325 
326 #define mlxsw_sp_prefix_usage_for_each(prefix, prefix_usage) \
327 	for_each_set_bit(prefix, (prefix_usage)->b, MLXSW_SP_PREFIX_COUNT)
328 
329 static bool
330 mlxsw_sp_prefix_usage_eq(struct mlxsw_sp_prefix_usage *prefix_usage1,
331 			 struct mlxsw_sp_prefix_usage *prefix_usage2)
332 {
333 	return !memcmp(prefix_usage1, prefix_usage2, sizeof(*prefix_usage1));
334 }
335 
336 static void
337 mlxsw_sp_prefix_usage_cpy(struct mlxsw_sp_prefix_usage *prefix_usage1,
338 			  struct mlxsw_sp_prefix_usage *prefix_usage2)
339 {
340 	memcpy(prefix_usage1, prefix_usage2, sizeof(*prefix_usage1));
341 }
342 
343 static void
344 mlxsw_sp_prefix_usage_set(struct mlxsw_sp_prefix_usage *prefix_usage,
345 			  unsigned char prefix_len)
346 {
347 	set_bit(prefix_len, prefix_usage->b);
348 }
349 
350 static void
351 mlxsw_sp_prefix_usage_clear(struct mlxsw_sp_prefix_usage *prefix_usage,
352 			    unsigned char prefix_len)
353 {
354 	clear_bit(prefix_len, prefix_usage->b);
355 }
356 
357 struct mlxsw_sp_fib_key {
358 	unsigned char addr[sizeof(struct in6_addr)];
359 	unsigned char prefix_len;
360 };
361 
362 enum mlxsw_sp_fib_entry_type {
363 	MLXSW_SP_FIB_ENTRY_TYPE_REMOTE,
364 	MLXSW_SP_FIB_ENTRY_TYPE_LOCAL,
365 	MLXSW_SP_FIB_ENTRY_TYPE_TRAP,
366 
367 	/* This is a special case of local delivery, where a packet should be
368 	 * decapsulated on reception. Note that there is no corresponding ENCAP,
369 	 * because that's a type of next hop, not of FIB entry. (There can be
370 	 * several next hops in a REMOTE entry, and some of them may be
371 	 * encapsulating entries.)
372 	 */
373 	MLXSW_SP_FIB_ENTRY_TYPE_IPIP_DECAP,
374 	MLXSW_SP_FIB_ENTRY_TYPE_NVE_DECAP,
375 };
376 
377 struct mlxsw_sp_nexthop_group;
378 
379 struct mlxsw_sp_fib_node {
380 	struct list_head entry_list;
381 	struct list_head list;
382 	struct rhash_head ht_node;
383 	struct mlxsw_sp_fib *fib;
384 	struct mlxsw_sp_fib_key key;
385 };
386 
387 struct mlxsw_sp_fib_entry_decap {
388 	struct mlxsw_sp_ipip_entry *ipip_entry;
389 	u32 tunnel_index;
390 };
391 
392 struct mlxsw_sp_fib_entry {
393 	struct list_head list;
394 	struct mlxsw_sp_fib_node *fib_node;
395 	enum mlxsw_sp_fib_entry_type type;
396 	struct list_head nexthop_group_node;
397 	struct mlxsw_sp_nexthop_group *nh_group;
398 	struct mlxsw_sp_fib_entry_decap decap; /* Valid for decap entries. */
399 };
400 
401 struct mlxsw_sp_fib4_entry {
402 	struct mlxsw_sp_fib_entry common;
403 	u32 tb_id;
404 	u32 prio;
405 	u8 tos;
406 	u8 type;
407 };
408 
409 struct mlxsw_sp_fib6_entry {
410 	struct mlxsw_sp_fib_entry common;
411 	struct list_head rt6_list;
412 	unsigned int nrt6;
413 };
414 
415 struct mlxsw_sp_rt6 {
416 	struct list_head list;
417 	struct fib6_info *rt;
418 };
419 
420 struct mlxsw_sp_lpm_tree {
421 	u8 id; /* tree ID */
422 	unsigned int ref_count;
423 	enum mlxsw_sp_l3proto proto;
424 	unsigned long prefix_ref_count[MLXSW_SP_PREFIX_COUNT];
425 	struct mlxsw_sp_prefix_usage prefix_usage;
426 };
427 
428 struct mlxsw_sp_fib {
429 	struct rhashtable ht;
430 	struct list_head node_list;
431 	struct mlxsw_sp_vr *vr;
432 	struct mlxsw_sp_lpm_tree *lpm_tree;
433 	enum mlxsw_sp_l3proto proto;
434 };
435 
436 struct mlxsw_sp_vr {
437 	u16 id; /* virtual router ID */
438 	u32 tb_id; /* kernel fib table id */
439 	unsigned int rif_count;
440 	struct mlxsw_sp_fib *fib4;
441 	struct mlxsw_sp_fib *fib6;
442 	struct mlxsw_sp_mr_table *mr_table[MLXSW_SP_L3_PROTO_MAX];
443 };
444 
445 static const struct rhashtable_params mlxsw_sp_fib_ht_params;
446 
447 static struct mlxsw_sp_fib *mlxsw_sp_fib_create(struct mlxsw_sp *mlxsw_sp,
448 						struct mlxsw_sp_vr *vr,
449 						enum mlxsw_sp_l3proto proto)
450 {
451 	struct mlxsw_sp_lpm_tree *lpm_tree;
452 	struct mlxsw_sp_fib *fib;
453 	int err;
454 
455 	lpm_tree = mlxsw_sp->router->lpm.proto_trees[proto];
456 	fib = kzalloc(sizeof(*fib), GFP_KERNEL);
457 	if (!fib)
458 		return ERR_PTR(-ENOMEM);
459 	err = rhashtable_init(&fib->ht, &mlxsw_sp_fib_ht_params);
460 	if (err)
461 		goto err_rhashtable_init;
462 	INIT_LIST_HEAD(&fib->node_list);
463 	fib->proto = proto;
464 	fib->vr = vr;
465 	fib->lpm_tree = lpm_tree;
466 	mlxsw_sp_lpm_tree_hold(lpm_tree);
467 	err = mlxsw_sp_vr_lpm_tree_bind(mlxsw_sp, fib, lpm_tree->id);
468 	if (err)
469 		goto err_lpm_tree_bind;
470 	return fib;
471 
472 err_lpm_tree_bind:
473 	mlxsw_sp_lpm_tree_put(mlxsw_sp, lpm_tree);
474 err_rhashtable_init:
475 	kfree(fib);
476 	return ERR_PTR(err);
477 }
478 
479 static void mlxsw_sp_fib_destroy(struct mlxsw_sp *mlxsw_sp,
480 				 struct mlxsw_sp_fib *fib)
481 {
482 	mlxsw_sp_vr_lpm_tree_unbind(mlxsw_sp, fib);
483 	mlxsw_sp_lpm_tree_put(mlxsw_sp, fib->lpm_tree);
484 	WARN_ON(!list_empty(&fib->node_list));
485 	rhashtable_destroy(&fib->ht);
486 	kfree(fib);
487 }
488 
489 static struct mlxsw_sp_lpm_tree *
490 mlxsw_sp_lpm_tree_find_unused(struct mlxsw_sp *mlxsw_sp)
491 {
492 	static struct mlxsw_sp_lpm_tree *lpm_tree;
493 	int i;
494 
495 	for (i = 0; i < mlxsw_sp->router->lpm.tree_count; i++) {
496 		lpm_tree = &mlxsw_sp->router->lpm.trees[i];
497 		if (lpm_tree->ref_count == 0)
498 			return lpm_tree;
499 	}
500 	return NULL;
501 }
502 
503 static int mlxsw_sp_lpm_tree_alloc(struct mlxsw_sp *mlxsw_sp,
504 				   struct mlxsw_sp_lpm_tree *lpm_tree)
505 {
506 	char ralta_pl[MLXSW_REG_RALTA_LEN];
507 
508 	mlxsw_reg_ralta_pack(ralta_pl, true,
509 			     (enum mlxsw_reg_ralxx_protocol) lpm_tree->proto,
510 			     lpm_tree->id);
511 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralta), ralta_pl);
512 }
513 
514 static void mlxsw_sp_lpm_tree_free(struct mlxsw_sp *mlxsw_sp,
515 				   struct mlxsw_sp_lpm_tree *lpm_tree)
516 {
517 	char ralta_pl[MLXSW_REG_RALTA_LEN];
518 
519 	mlxsw_reg_ralta_pack(ralta_pl, false,
520 			     (enum mlxsw_reg_ralxx_protocol) lpm_tree->proto,
521 			     lpm_tree->id);
522 	mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralta), ralta_pl);
523 }
524 
525 static int
526 mlxsw_sp_lpm_tree_left_struct_set(struct mlxsw_sp *mlxsw_sp,
527 				  struct mlxsw_sp_prefix_usage *prefix_usage,
528 				  struct mlxsw_sp_lpm_tree *lpm_tree)
529 {
530 	char ralst_pl[MLXSW_REG_RALST_LEN];
531 	u8 root_bin = 0;
532 	u8 prefix;
533 	u8 last_prefix = MLXSW_REG_RALST_BIN_NO_CHILD;
534 
535 	mlxsw_sp_prefix_usage_for_each(prefix, prefix_usage)
536 		root_bin = prefix;
537 
538 	mlxsw_reg_ralst_pack(ralst_pl, root_bin, lpm_tree->id);
539 	mlxsw_sp_prefix_usage_for_each(prefix, prefix_usage) {
540 		if (prefix == 0)
541 			continue;
542 		mlxsw_reg_ralst_bin_pack(ralst_pl, prefix, last_prefix,
543 					 MLXSW_REG_RALST_BIN_NO_CHILD);
544 		last_prefix = prefix;
545 	}
546 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralst), ralst_pl);
547 }
548 
549 static struct mlxsw_sp_lpm_tree *
550 mlxsw_sp_lpm_tree_create(struct mlxsw_sp *mlxsw_sp,
551 			 struct mlxsw_sp_prefix_usage *prefix_usage,
552 			 enum mlxsw_sp_l3proto proto)
553 {
554 	struct mlxsw_sp_lpm_tree *lpm_tree;
555 	int err;
556 
557 	lpm_tree = mlxsw_sp_lpm_tree_find_unused(mlxsw_sp);
558 	if (!lpm_tree)
559 		return ERR_PTR(-EBUSY);
560 	lpm_tree->proto = proto;
561 	err = mlxsw_sp_lpm_tree_alloc(mlxsw_sp, lpm_tree);
562 	if (err)
563 		return ERR_PTR(err);
564 
565 	err = mlxsw_sp_lpm_tree_left_struct_set(mlxsw_sp, prefix_usage,
566 						lpm_tree);
567 	if (err)
568 		goto err_left_struct_set;
569 	memcpy(&lpm_tree->prefix_usage, prefix_usage,
570 	       sizeof(lpm_tree->prefix_usage));
571 	memset(&lpm_tree->prefix_ref_count, 0,
572 	       sizeof(lpm_tree->prefix_ref_count));
573 	lpm_tree->ref_count = 1;
574 	return lpm_tree;
575 
576 err_left_struct_set:
577 	mlxsw_sp_lpm_tree_free(mlxsw_sp, lpm_tree);
578 	return ERR_PTR(err);
579 }
580 
581 static void mlxsw_sp_lpm_tree_destroy(struct mlxsw_sp *mlxsw_sp,
582 				      struct mlxsw_sp_lpm_tree *lpm_tree)
583 {
584 	mlxsw_sp_lpm_tree_free(mlxsw_sp, lpm_tree);
585 }
586 
587 static struct mlxsw_sp_lpm_tree *
588 mlxsw_sp_lpm_tree_get(struct mlxsw_sp *mlxsw_sp,
589 		      struct mlxsw_sp_prefix_usage *prefix_usage,
590 		      enum mlxsw_sp_l3proto proto)
591 {
592 	struct mlxsw_sp_lpm_tree *lpm_tree;
593 	int i;
594 
595 	for (i = 0; i < mlxsw_sp->router->lpm.tree_count; i++) {
596 		lpm_tree = &mlxsw_sp->router->lpm.trees[i];
597 		if (lpm_tree->ref_count != 0 &&
598 		    lpm_tree->proto == proto &&
599 		    mlxsw_sp_prefix_usage_eq(&lpm_tree->prefix_usage,
600 					     prefix_usage)) {
601 			mlxsw_sp_lpm_tree_hold(lpm_tree);
602 			return lpm_tree;
603 		}
604 	}
605 	return mlxsw_sp_lpm_tree_create(mlxsw_sp, prefix_usage, proto);
606 }
607 
608 static void mlxsw_sp_lpm_tree_hold(struct mlxsw_sp_lpm_tree *lpm_tree)
609 {
610 	lpm_tree->ref_count++;
611 }
612 
613 static void mlxsw_sp_lpm_tree_put(struct mlxsw_sp *mlxsw_sp,
614 				  struct mlxsw_sp_lpm_tree *lpm_tree)
615 {
616 	if (--lpm_tree->ref_count == 0)
617 		mlxsw_sp_lpm_tree_destroy(mlxsw_sp, lpm_tree);
618 }
619 
620 #define MLXSW_SP_LPM_TREE_MIN 1 /* tree 0 is reserved */
621 
622 static int mlxsw_sp_lpm_init(struct mlxsw_sp *mlxsw_sp)
623 {
624 	struct mlxsw_sp_prefix_usage req_prefix_usage = {{ 0 } };
625 	struct mlxsw_sp_lpm_tree *lpm_tree;
626 	u64 max_trees;
627 	int err, i;
628 
629 	if (!MLXSW_CORE_RES_VALID(mlxsw_sp->core, MAX_LPM_TREES))
630 		return -EIO;
631 
632 	max_trees = MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_LPM_TREES);
633 	mlxsw_sp->router->lpm.tree_count = max_trees - MLXSW_SP_LPM_TREE_MIN;
634 	mlxsw_sp->router->lpm.trees = kcalloc(mlxsw_sp->router->lpm.tree_count,
635 					     sizeof(struct mlxsw_sp_lpm_tree),
636 					     GFP_KERNEL);
637 	if (!mlxsw_sp->router->lpm.trees)
638 		return -ENOMEM;
639 
640 	for (i = 0; i < mlxsw_sp->router->lpm.tree_count; i++) {
641 		lpm_tree = &mlxsw_sp->router->lpm.trees[i];
642 		lpm_tree->id = i + MLXSW_SP_LPM_TREE_MIN;
643 	}
644 
645 	lpm_tree = mlxsw_sp_lpm_tree_get(mlxsw_sp, &req_prefix_usage,
646 					 MLXSW_SP_L3_PROTO_IPV4);
647 	if (IS_ERR(lpm_tree)) {
648 		err = PTR_ERR(lpm_tree);
649 		goto err_ipv4_tree_get;
650 	}
651 	mlxsw_sp->router->lpm.proto_trees[MLXSW_SP_L3_PROTO_IPV4] = lpm_tree;
652 
653 	lpm_tree = mlxsw_sp_lpm_tree_get(mlxsw_sp, &req_prefix_usage,
654 					 MLXSW_SP_L3_PROTO_IPV6);
655 	if (IS_ERR(lpm_tree)) {
656 		err = PTR_ERR(lpm_tree);
657 		goto err_ipv6_tree_get;
658 	}
659 	mlxsw_sp->router->lpm.proto_trees[MLXSW_SP_L3_PROTO_IPV6] = lpm_tree;
660 
661 	return 0;
662 
663 err_ipv6_tree_get:
664 	lpm_tree = mlxsw_sp->router->lpm.proto_trees[MLXSW_SP_L3_PROTO_IPV4];
665 	mlxsw_sp_lpm_tree_put(mlxsw_sp, lpm_tree);
666 err_ipv4_tree_get:
667 	kfree(mlxsw_sp->router->lpm.trees);
668 	return err;
669 }
670 
671 static void mlxsw_sp_lpm_fini(struct mlxsw_sp *mlxsw_sp)
672 {
673 	struct mlxsw_sp_lpm_tree *lpm_tree;
674 
675 	lpm_tree = mlxsw_sp->router->lpm.proto_trees[MLXSW_SP_L3_PROTO_IPV6];
676 	mlxsw_sp_lpm_tree_put(mlxsw_sp, lpm_tree);
677 
678 	lpm_tree = mlxsw_sp->router->lpm.proto_trees[MLXSW_SP_L3_PROTO_IPV4];
679 	mlxsw_sp_lpm_tree_put(mlxsw_sp, lpm_tree);
680 
681 	kfree(mlxsw_sp->router->lpm.trees);
682 }
683 
684 static bool mlxsw_sp_vr_is_used(const struct mlxsw_sp_vr *vr)
685 {
686 	return !!vr->fib4 || !!vr->fib6 ||
687 	       !!vr->mr_table[MLXSW_SP_L3_PROTO_IPV4] ||
688 	       !!vr->mr_table[MLXSW_SP_L3_PROTO_IPV6];
689 }
690 
691 static struct mlxsw_sp_vr *mlxsw_sp_vr_find_unused(struct mlxsw_sp *mlxsw_sp)
692 {
693 	struct mlxsw_sp_vr *vr;
694 	int i;
695 
696 	for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_VRS); i++) {
697 		vr = &mlxsw_sp->router->vrs[i];
698 		if (!mlxsw_sp_vr_is_used(vr))
699 			return vr;
700 	}
701 	return NULL;
702 }
703 
704 static int mlxsw_sp_vr_lpm_tree_bind(struct mlxsw_sp *mlxsw_sp,
705 				     const struct mlxsw_sp_fib *fib, u8 tree_id)
706 {
707 	char raltb_pl[MLXSW_REG_RALTB_LEN];
708 
709 	mlxsw_reg_raltb_pack(raltb_pl, fib->vr->id,
710 			     (enum mlxsw_reg_ralxx_protocol) fib->proto,
711 			     tree_id);
712 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(raltb), raltb_pl);
713 }
714 
715 static int mlxsw_sp_vr_lpm_tree_unbind(struct mlxsw_sp *mlxsw_sp,
716 				       const struct mlxsw_sp_fib *fib)
717 {
718 	char raltb_pl[MLXSW_REG_RALTB_LEN];
719 
720 	/* Bind to tree 0 which is default */
721 	mlxsw_reg_raltb_pack(raltb_pl, fib->vr->id,
722 			     (enum mlxsw_reg_ralxx_protocol) fib->proto, 0);
723 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(raltb), raltb_pl);
724 }
725 
726 static u32 mlxsw_sp_fix_tb_id(u32 tb_id)
727 {
728 	/* For our purpose, squash main, default and local tables into one */
729 	if (tb_id == RT_TABLE_LOCAL || tb_id == RT_TABLE_DEFAULT)
730 		tb_id = RT_TABLE_MAIN;
731 	return tb_id;
732 }
733 
734 static struct mlxsw_sp_vr *mlxsw_sp_vr_find(struct mlxsw_sp *mlxsw_sp,
735 					    u32 tb_id)
736 {
737 	struct mlxsw_sp_vr *vr;
738 	int i;
739 
740 	tb_id = mlxsw_sp_fix_tb_id(tb_id);
741 
742 	for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_VRS); i++) {
743 		vr = &mlxsw_sp->router->vrs[i];
744 		if (mlxsw_sp_vr_is_used(vr) && vr->tb_id == tb_id)
745 			return vr;
746 	}
747 	return NULL;
748 }
749 
750 int mlxsw_sp_router_tb_id_vr_id(struct mlxsw_sp *mlxsw_sp, u32 tb_id,
751 				u16 *vr_id)
752 {
753 	struct mlxsw_sp_vr *vr;
754 
755 	vr = mlxsw_sp_vr_find(mlxsw_sp, tb_id);
756 	if (!vr)
757 		return -ESRCH;
758 	*vr_id = vr->id;
759 
760 	return 0;
761 }
762 
763 static struct mlxsw_sp_fib *mlxsw_sp_vr_fib(const struct mlxsw_sp_vr *vr,
764 					    enum mlxsw_sp_l3proto proto)
765 {
766 	switch (proto) {
767 	case MLXSW_SP_L3_PROTO_IPV4:
768 		return vr->fib4;
769 	case MLXSW_SP_L3_PROTO_IPV6:
770 		return vr->fib6;
771 	}
772 	return NULL;
773 }
774 
775 static struct mlxsw_sp_vr *mlxsw_sp_vr_create(struct mlxsw_sp *mlxsw_sp,
776 					      u32 tb_id,
777 					      struct netlink_ext_ack *extack)
778 {
779 	struct mlxsw_sp_mr_table *mr4_table, *mr6_table;
780 	struct mlxsw_sp_fib *fib4;
781 	struct mlxsw_sp_fib *fib6;
782 	struct mlxsw_sp_vr *vr;
783 	int err;
784 
785 	vr = mlxsw_sp_vr_find_unused(mlxsw_sp);
786 	if (!vr) {
787 		NL_SET_ERR_MSG_MOD(extack, "Exceeded number of supported virtual routers");
788 		return ERR_PTR(-EBUSY);
789 	}
790 	fib4 = mlxsw_sp_fib_create(mlxsw_sp, vr, MLXSW_SP_L3_PROTO_IPV4);
791 	if (IS_ERR(fib4))
792 		return ERR_CAST(fib4);
793 	fib6 = mlxsw_sp_fib_create(mlxsw_sp, vr, MLXSW_SP_L3_PROTO_IPV6);
794 	if (IS_ERR(fib6)) {
795 		err = PTR_ERR(fib6);
796 		goto err_fib6_create;
797 	}
798 	mr4_table = mlxsw_sp_mr_table_create(mlxsw_sp, vr->id,
799 					     MLXSW_SP_L3_PROTO_IPV4);
800 	if (IS_ERR(mr4_table)) {
801 		err = PTR_ERR(mr4_table);
802 		goto err_mr4_table_create;
803 	}
804 	mr6_table = mlxsw_sp_mr_table_create(mlxsw_sp, vr->id,
805 					     MLXSW_SP_L3_PROTO_IPV6);
806 	if (IS_ERR(mr6_table)) {
807 		err = PTR_ERR(mr6_table);
808 		goto err_mr6_table_create;
809 	}
810 
811 	vr->fib4 = fib4;
812 	vr->fib6 = fib6;
813 	vr->mr_table[MLXSW_SP_L3_PROTO_IPV4] = mr4_table;
814 	vr->mr_table[MLXSW_SP_L3_PROTO_IPV6] = mr6_table;
815 	vr->tb_id = tb_id;
816 	return vr;
817 
818 err_mr6_table_create:
819 	mlxsw_sp_mr_table_destroy(mr4_table);
820 err_mr4_table_create:
821 	mlxsw_sp_fib_destroy(mlxsw_sp, fib6);
822 err_fib6_create:
823 	mlxsw_sp_fib_destroy(mlxsw_sp, fib4);
824 	return ERR_PTR(err);
825 }
826 
827 static void mlxsw_sp_vr_destroy(struct mlxsw_sp *mlxsw_sp,
828 				struct mlxsw_sp_vr *vr)
829 {
830 	mlxsw_sp_mr_table_destroy(vr->mr_table[MLXSW_SP_L3_PROTO_IPV6]);
831 	vr->mr_table[MLXSW_SP_L3_PROTO_IPV6] = NULL;
832 	mlxsw_sp_mr_table_destroy(vr->mr_table[MLXSW_SP_L3_PROTO_IPV4]);
833 	vr->mr_table[MLXSW_SP_L3_PROTO_IPV4] = NULL;
834 	mlxsw_sp_fib_destroy(mlxsw_sp, vr->fib6);
835 	vr->fib6 = NULL;
836 	mlxsw_sp_fib_destroy(mlxsw_sp, vr->fib4);
837 	vr->fib4 = NULL;
838 }
839 
840 static struct mlxsw_sp_vr *mlxsw_sp_vr_get(struct mlxsw_sp *mlxsw_sp, u32 tb_id,
841 					   struct netlink_ext_ack *extack)
842 {
843 	struct mlxsw_sp_vr *vr;
844 
845 	tb_id = mlxsw_sp_fix_tb_id(tb_id);
846 	vr = mlxsw_sp_vr_find(mlxsw_sp, tb_id);
847 	if (!vr)
848 		vr = mlxsw_sp_vr_create(mlxsw_sp, tb_id, extack);
849 	return vr;
850 }
851 
852 static void mlxsw_sp_vr_put(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_vr *vr)
853 {
854 	if (!vr->rif_count && list_empty(&vr->fib4->node_list) &&
855 	    list_empty(&vr->fib6->node_list) &&
856 	    mlxsw_sp_mr_table_empty(vr->mr_table[MLXSW_SP_L3_PROTO_IPV4]) &&
857 	    mlxsw_sp_mr_table_empty(vr->mr_table[MLXSW_SP_L3_PROTO_IPV6]))
858 		mlxsw_sp_vr_destroy(mlxsw_sp, vr);
859 }
860 
861 static bool
862 mlxsw_sp_vr_lpm_tree_should_replace(struct mlxsw_sp_vr *vr,
863 				    enum mlxsw_sp_l3proto proto, u8 tree_id)
864 {
865 	struct mlxsw_sp_fib *fib = mlxsw_sp_vr_fib(vr, proto);
866 
867 	if (!mlxsw_sp_vr_is_used(vr))
868 		return false;
869 	if (fib->lpm_tree->id == tree_id)
870 		return true;
871 	return false;
872 }
873 
874 static int mlxsw_sp_vr_lpm_tree_replace(struct mlxsw_sp *mlxsw_sp,
875 					struct mlxsw_sp_fib *fib,
876 					struct mlxsw_sp_lpm_tree *new_tree)
877 {
878 	struct mlxsw_sp_lpm_tree *old_tree = fib->lpm_tree;
879 	int err;
880 
881 	fib->lpm_tree = new_tree;
882 	mlxsw_sp_lpm_tree_hold(new_tree);
883 	err = mlxsw_sp_vr_lpm_tree_bind(mlxsw_sp, fib, new_tree->id);
884 	if (err)
885 		goto err_tree_bind;
886 	mlxsw_sp_lpm_tree_put(mlxsw_sp, old_tree);
887 	return 0;
888 
889 err_tree_bind:
890 	mlxsw_sp_lpm_tree_put(mlxsw_sp, new_tree);
891 	fib->lpm_tree = old_tree;
892 	return err;
893 }
894 
895 static int mlxsw_sp_vrs_lpm_tree_replace(struct mlxsw_sp *mlxsw_sp,
896 					 struct mlxsw_sp_fib *fib,
897 					 struct mlxsw_sp_lpm_tree *new_tree)
898 {
899 	enum mlxsw_sp_l3proto proto = fib->proto;
900 	struct mlxsw_sp_lpm_tree *old_tree;
901 	u8 old_id, new_id = new_tree->id;
902 	struct mlxsw_sp_vr *vr;
903 	int i, err;
904 
905 	old_tree = mlxsw_sp->router->lpm.proto_trees[proto];
906 	old_id = old_tree->id;
907 
908 	for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_VRS); i++) {
909 		vr = &mlxsw_sp->router->vrs[i];
910 		if (!mlxsw_sp_vr_lpm_tree_should_replace(vr, proto, old_id))
911 			continue;
912 		err = mlxsw_sp_vr_lpm_tree_replace(mlxsw_sp,
913 						   mlxsw_sp_vr_fib(vr, proto),
914 						   new_tree);
915 		if (err)
916 			goto err_tree_replace;
917 	}
918 
919 	memcpy(new_tree->prefix_ref_count, old_tree->prefix_ref_count,
920 	       sizeof(new_tree->prefix_ref_count));
921 	mlxsw_sp->router->lpm.proto_trees[proto] = new_tree;
922 	mlxsw_sp_lpm_tree_put(mlxsw_sp, old_tree);
923 
924 	return 0;
925 
926 err_tree_replace:
927 	for (i--; i >= 0; i--) {
928 		if (!mlxsw_sp_vr_lpm_tree_should_replace(vr, proto, new_id))
929 			continue;
930 		mlxsw_sp_vr_lpm_tree_replace(mlxsw_sp,
931 					     mlxsw_sp_vr_fib(vr, proto),
932 					     old_tree);
933 	}
934 	return err;
935 }
936 
937 static int mlxsw_sp_vrs_init(struct mlxsw_sp *mlxsw_sp)
938 {
939 	struct mlxsw_sp_vr *vr;
940 	u64 max_vrs;
941 	int i;
942 
943 	if (!MLXSW_CORE_RES_VALID(mlxsw_sp->core, MAX_VRS))
944 		return -EIO;
945 
946 	max_vrs = MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_VRS);
947 	mlxsw_sp->router->vrs = kcalloc(max_vrs, sizeof(struct mlxsw_sp_vr),
948 					GFP_KERNEL);
949 	if (!mlxsw_sp->router->vrs)
950 		return -ENOMEM;
951 
952 	for (i = 0; i < max_vrs; i++) {
953 		vr = &mlxsw_sp->router->vrs[i];
954 		vr->id = i;
955 	}
956 
957 	return 0;
958 }
959 
960 static void mlxsw_sp_router_fib_flush(struct mlxsw_sp *mlxsw_sp);
961 
962 static void mlxsw_sp_vrs_fini(struct mlxsw_sp *mlxsw_sp)
963 {
964 	/* At this stage we're guaranteed not to have new incoming
965 	 * FIB notifications and the work queue is free from FIBs
966 	 * sitting on top of mlxsw netdevs. However, we can still
967 	 * have other FIBs queued. Flush the queue before flushing
968 	 * the device's tables. No need for locks, as we're the only
969 	 * writer.
970 	 */
971 	mlxsw_core_flush_owq();
972 	mlxsw_sp_router_fib_flush(mlxsw_sp);
973 	kfree(mlxsw_sp->router->vrs);
974 }
975 
976 static struct net_device *
977 __mlxsw_sp_ipip_netdev_ul_dev_get(const struct net_device *ol_dev)
978 {
979 	struct ip_tunnel *tun = netdev_priv(ol_dev);
980 	struct net *net = dev_net(ol_dev);
981 
982 	return __dev_get_by_index(net, tun->parms.link);
983 }
984 
985 u32 mlxsw_sp_ipip_dev_ul_tb_id(const struct net_device *ol_dev)
986 {
987 	struct net_device *d = __mlxsw_sp_ipip_netdev_ul_dev_get(ol_dev);
988 
989 	if (d)
990 		return l3mdev_fib_table(d) ? : RT_TABLE_MAIN;
991 	else
992 		return l3mdev_fib_table(ol_dev) ? : RT_TABLE_MAIN;
993 }
994 
995 static struct mlxsw_sp_rif *
996 mlxsw_sp_rif_create(struct mlxsw_sp *mlxsw_sp,
997 		    const struct mlxsw_sp_rif_params *params,
998 		    struct netlink_ext_ack *extack);
999 
1000 static struct mlxsw_sp_rif_ipip_lb *
1001 mlxsw_sp_ipip_ol_ipip_lb_create(struct mlxsw_sp *mlxsw_sp,
1002 				enum mlxsw_sp_ipip_type ipipt,
1003 				struct net_device *ol_dev,
1004 				struct netlink_ext_ack *extack)
1005 {
1006 	struct mlxsw_sp_rif_params_ipip_lb lb_params;
1007 	const struct mlxsw_sp_ipip_ops *ipip_ops;
1008 	struct mlxsw_sp_rif *rif;
1009 
1010 	ipip_ops = mlxsw_sp->router->ipip_ops_arr[ipipt];
1011 	lb_params = (struct mlxsw_sp_rif_params_ipip_lb) {
1012 		.common.dev = ol_dev,
1013 		.common.lag = false,
1014 		.lb_config = ipip_ops->ol_loopback_config(mlxsw_sp, ol_dev),
1015 	};
1016 
1017 	rif = mlxsw_sp_rif_create(mlxsw_sp, &lb_params.common, extack);
1018 	if (IS_ERR(rif))
1019 		return ERR_CAST(rif);
1020 	return container_of(rif, struct mlxsw_sp_rif_ipip_lb, common);
1021 }
1022 
1023 static struct mlxsw_sp_ipip_entry *
1024 mlxsw_sp_ipip_entry_alloc(struct mlxsw_sp *mlxsw_sp,
1025 			  enum mlxsw_sp_ipip_type ipipt,
1026 			  struct net_device *ol_dev)
1027 {
1028 	const struct mlxsw_sp_ipip_ops *ipip_ops;
1029 	struct mlxsw_sp_ipip_entry *ipip_entry;
1030 	struct mlxsw_sp_ipip_entry *ret = NULL;
1031 
1032 	ipip_ops = mlxsw_sp->router->ipip_ops_arr[ipipt];
1033 	ipip_entry = kzalloc(sizeof(*ipip_entry), GFP_KERNEL);
1034 	if (!ipip_entry)
1035 		return ERR_PTR(-ENOMEM);
1036 
1037 	ipip_entry->ol_lb = mlxsw_sp_ipip_ol_ipip_lb_create(mlxsw_sp, ipipt,
1038 							    ol_dev, NULL);
1039 	if (IS_ERR(ipip_entry->ol_lb)) {
1040 		ret = ERR_CAST(ipip_entry->ol_lb);
1041 		goto err_ol_ipip_lb_create;
1042 	}
1043 
1044 	ipip_entry->ipipt = ipipt;
1045 	ipip_entry->ol_dev = ol_dev;
1046 
1047 	switch (ipip_ops->ul_proto) {
1048 	case MLXSW_SP_L3_PROTO_IPV4:
1049 		ipip_entry->parms4 = mlxsw_sp_ipip_netdev_parms4(ol_dev);
1050 		break;
1051 	case MLXSW_SP_L3_PROTO_IPV6:
1052 		WARN_ON(1);
1053 		break;
1054 	}
1055 
1056 	return ipip_entry;
1057 
1058 err_ol_ipip_lb_create:
1059 	kfree(ipip_entry);
1060 	return ret;
1061 }
1062 
1063 static void
1064 mlxsw_sp_ipip_entry_dealloc(struct mlxsw_sp_ipip_entry *ipip_entry)
1065 {
1066 	mlxsw_sp_rif_destroy(&ipip_entry->ol_lb->common);
1067 	kfree(ipip_entry);
1068 }
1069 
1070 static bool
1071 mlxsw_sp_ipip_entry_saddr_matches(struct mlxsw_sp *mlxsw_sp,
1072 				  const enum mlxsw_sp_l3proto ul_proto,
1073 				  union mlxsw_sp_l3addr saddr,
1074 				  u32 ul_tb_id,
1075 				  struct mlxsw_sp_ipip_entry *ipip_entry)
1076 {
1077 	u32 tun_ul_tb_id = mlxsw_sp_ipip_dev_ul_tb_id(ipip_entry->ol_dev);
1078 	enum mlxsw_sp_ipip_type ipipt = ipip_entry->ipipt;
1079 	union mlxsw_sp_l3addr tun_saddr;
1080 
1081 	if (mlxsw_sp->router->ipip_ops_arr[ipipt]->ul_proto != ul_proto)
1082 		return false;
1083 
1084 	tun_saddr = mlxsw_sp_ipip_netdev_saddr(ul_proto, ipip_entry->ol_dev);
1085 	return tun_ul_tb_id == ul_tb_id &&
1086 	       mlxsw_sp_l3addr_eq(&tun_saddr, &saddr);
1087 }
1088 
1089 static int
1090 mlxsw_sp_fib_entry_decap_init(struct mlxsw_sp *mlxsw_sp,
1091 			      struct mlxsw_sp_fib_entry *fib_entry,
1092 			      struct mlxsw_sp_ipip_entry *ipip_entry)
1093 {
1094 	u32 tunnel_index;
1095 	int err;
1096 
1097 	err = mlxsw_sp_kvdl_alloc(mlxsw_sp, MLXSW_SP_KVDL_ENTRY_TYPE_ADJ,
1098 				  1, &tunnel_index);
1099 	if (err)
1100 		return err;
1101 
1102 	ipip_entry->decap_fib_entry = fib_entry;
1103 	fib_entry->decap.ipip_entry = ipip_entry;
1104 	fib_entry->decap.tunnel_index = tunnel_index;
1105 	return 0;
1106 }
1107 
1108 static void mlxsw_sp_fib_entry_decap_fini(struct mlxsw_sp *mlxsw_sp,
1109 					  struct mlxsw_sp_fib_entry *fib_entry)
1110 {
1111 	/* Unlink this node from the IPIP entry that it's the decap entry of. */
1112 	fib_entry->decap.ipip_entry->decap_fib_entry = NULL;
1113 	fib_entry->decap.ipip_entry = NULL;
1114 	mlxsw_sp_kvdl_free(mlxsw_sp, MLXSW_SP_KVDL_ENTRY_TYPE_ADJ,
1115 			   1, fib_entry->decap.tunnel_index);
1116 }
1117 
1118 static struct mlxsw_sp_fib_node *
1119 mlxsw_sp_fib_node_lookup(struct mlxsw_sp_fib *fib, const void *addr,
1120 			 size_t addr_len, unsigned char prefix_len);
1121 static int mlxsw_sp_fib_entry_update(struct mlxsw_sp *mlxsw_sp,
1122 				     struct mlxsw_sp_fib_entry *fib_entry);
1123 
1124 static void
1125 mlxsw_sp_ipip_entry_demote_decap(struct mlxsw_sp *mlxsw_sp,
1126 				 struct mlxsw_sp_ipip_entry *ipip_entry)
1127 {
1128 	struct mlxsw_sp_fib_entry *fib_entry = ipip_entry->decap_fib_entry;
1129 
1130 	mlxsw_sp_fib_entry_decap_fini(mlxsw_sp, fib_entry);
1131 	fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_TRAP;
1132 
1133 	mlxsw_sp_fib_entry_update(mlxsw_sp, fib_entry);
1134 }
1135 
1136 static void
1137 mlxsw_sp_ipip_entry_promote_decap(struct mlxsw_sp *mlxsw_sp,
1138 				  struct mlxsw_sp_ipip_entry *ipip_entry,
1139 				  struct mlxsw_sp_fib_entry *decap_fib_entry)
1140 {
1141 	if (mlxsw_sp_fib_entry_decap_init(mlxsw_sp, decap_fib_entry,
1142 					  ipip_entry))
1143 		return;
1144 	decap_fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_IPIP_DECAP;
1145 
1146 	if (mlxsw_sp_fib_entry_update(mlxsw_sp, decap_fib_entry))
1147 		mlxsw_sp_ipip_entry_demote_decap(mlxsw_sp, ipip_entry);
1148 }
1149 
1150 static struct mlxsw_sp_fib_entry *
1151 mlxsw_sp_router_ip2me_fib_entry_find(struct mlxsw_sp *mlxsw_sp, u32 tb_id,
1152 				     enum mlxsw_sp_l3proto proto,
1153 				     const union mlxsw_sp_l3addr *addr,
1154 				     enum mlxsw_sp_fib_entry_type type)
1155 {
1156 	struct mlxsw_sp_fib_entry *fib_entry;
1157 	struct mlxsw_sp_fib_node *fib_node;
1158 	unsigned char addr_prefix_len;
1159 	struct mlxsw_sp_fib *fib;
1160 	struct mlxsw_sp_vr *vr;
1161 	const void *addrp;
1162 	size_t addr_len;
1163 	u32 addr4;
1164 
1165 	vr = mlxsw_sp_vr_find(mlxsw_sp, tb_id);
1166 	if (!vr)
1167 		return NULL;
1168 	fib = mlxsw_sp_vr_fib(vr, proto);
1169 
1170 	switch (proto) {
1171 	case MLXSW_SP_L3_PROTO_IPV4:
1172 		addr4 = be32_to_cpu(addr->addr4);
1173 		addrp = &addr4;
1174 		addr_len = 4;
1175 		addr_prefix_len = 32;
1176 		break;
1177 	case MLXSW_SP_L3_PROTO_IPV6: /* fall through */
1178 	default:
1179 		WARN_ON(1);
1180 		return NULL;
1181 	}
1182 
1183 	fib_node = mlxsw_sp_fib_node_lookup(fib, addrp, addr_len,
1184 					    addr_prefix_len);
1185 	if (!fib_node || list_empty(&fib_node->entry_list))
1186 		return NULL;
1187 
1188 	fib_entry = list_first_entry(&fib_node->entry_list,
1189 				     struct mlxsw_sp_fib_entry, list);
1190 	if (fib_entry->type != type)
1191 		return NULL;
1192 
1193 	return fib_entry;
1194 }
1195 
1196 /* Given an IPIP entry, find the corresponding decap route. */
1197 static struct mlxsw_sp_fib_entry *
1198 mlxsw_sp_ipip_entry_find_decap(struct mlxsw_sp *mlxsw_sp,
1199 			       struct mlxsw_sp_ipip_entry *ipip_entry)
1200 {
1201 	static struct mlxsw_sp_fib_node *fib_node;
1202 	const struct mlxsw_sp_ipip_ops *ipip_ops;
1203 	struct mlxsw_sp_fib_entry *fib_entry;
1204 	unsigned char saddr_prefix_len;
1205 	union mlxsw_sp_l3addr saddr;
1206 	struct mlxsw_sp_fib *ul_fib;
1207 	struct mlxsw_sp_vr *ul_vr;
1208 	const void *saddrp;
1209 	size_t saddr_len;
1210 	u32 ul_tb_id;
1211 	u32 saddr4;
1212 
1213 	ipip_ops = mlxsw_sp->router->ipip_ops_arr[ipip_entry->ipipt];
1214 
1215 	ul_tb_id = mlxsw_sp_ipip_dev_ul_tb_id(ipip_entry->ol_dev);
1216 	ul_vr = mlxsw_sp_vr_find(mlxsw_sp, ul_tb_id);
1217 	if (!ul_vr)
1218 		return NULL;
1219 
1220 	ul_fib = mlxsw_sp_vr_fib(ul_vr, ipip_ops->ul_proto);
1221 	saddr = mlxsw_sp_ipip_netdev_saddr(ipip_ops->ul_proto,
1222 					   ipip_entry->ol_dev);
1223 
1224 	switch (ipip_ops->ul_proto) {
1225 	case MLXSW_SP_L3_PROTO_IPV4:
1226 		saddr4 = be32_to_cpu(saddr.addr4);
1227 		saddrp = &saddr4;
1228 		saddr_len = 4;
1229 		saddr_prefix_len = 32;
1230 		break;
1231 	case MLXSW_SP_L3_PROTO_IPV6:
1232 		WARN_ON(1);
1233 		return NULL;
1234 	}
1235 
1236 	fib_node = mlxsw_sp_fib_node_lookup(ul_fib, saddrp, saddr_len,
1237 					    saddr_prefix_len);
1238 	if (!fib_node || list_empty(&fib_node->entry_list))
1239 		return NULL;
1240 
1241 	fib_entry = list_first_entry(&fib_node->entry_list,
1242 				     struct mlxsw_sp_fib_entry, list);
1243 	if (fib_entry->type != MLXSW_SP_FIB_ENTRY_TYPE_TRAP)
1244 		return NULL;
1245 
1246 	return fib_entry;
1247 }
1248 
1249 static struct mlxsw_sp_ipip_entry *
1250 mlxsw_sp_ipip_entry_create(struct mlxsw_sp *mlxsw_sp,
1251 			   enum mlxsw_sp_ipip_type ipipt,
1252 			   struct net_device *ol_dev)
1253 {
1254 	struct mlxsw_sp_ipip_entry *ipip_entry;
1255 
1256 	ipip_entry = mlxsw_sp_ipip_entry_alloc(mlxsw_sp, ipipt, ol_dev);
1257 	if (IS_ERR(ipip_entry))
1258 		return ipip_entry;
1259 
1260 	list_add_tail(&ipip_entry->ipip_list_node,
1261 		      &mlxsw_sp->router->ipip_list);
1262 
1263 	return ipip_entry;
1264 }
1265 
1266 static void
1267 mlxsw_sp_ipip_entry_destroy(struct mlxsw_sp *mlxsw_sp,
1268 			    struct mlxsw_sp_ipip_entry *ipip_entry)
1269 {
1270 	list_del(&ipip_entry->ipip_list_node);
1271 	mlxsw_sp_ipip_entry_dealloc(ipip_entry);
1272 }
1273 
1274 static bool
1275 mlxsw_sp_ipip_entry_matches_decap(struct mlxsw_sp *mlxsw_sp,
1276 				  const struct net_device *ul_dev,
1277 				  enum mlxsw_sp_l3proto ul_proto,
1278 				  union mlxsw_sp_l3addr ul_dip,
1279 				  struct mlxsw_sp_ipip_entry *ipip_entry)
1280 {
1281 	u32 ul_tb_id = l3mdev_fib_table(ul_dev) ? : RT_TABLE_MAIN;
1282 	enum mlxsw_sp_ipip_type ipipt = ipip_entry->ipipt;
1283 
1284 	if (mlxsw_sp->router->ipip_ops_arr[ipipt]->ul_proto != ul_proto)
1285 		return false;
1286 
1287 	return mlxsw_sp_ipip_entry_saddr_matches(mlxsw_sp, ul_proto, ul_dip,
1288 						 ul_tb_id, ipip_entry);
1289 }
1290 
1291 /* Given decap parameters, find the corresponding IPIP entry. */
1292 static struct mlxsw_sp_ipip_entry *
1293 mlxsw_sp_ipip_entry_find_by_decap(struct mlxsw_sp *mlxsw_sp,
1294 				  const struct net_device *ul_dev,
1295 				  enum mlxsw_sp_l3proto ul_proto,
1296 				  union mlxsw_sp_l3addr ul_dip)
1297 {
1298 	struct mlxsw_sp_ipip_entry *ipip_entry;
1299 
1300 	list_for_each_entry(ipip_entry, &mlxsw_sp->router->ipip_list,
1301 			    ipip_list_node)
1302 		if (mlxsw_sp_ipip_entry_matches_decap(mlxsw_sp, ul_dev,
1303 						      ul_proto, ul_dip,
1304 						      ipip_entry))
1305 			return ipip_entry;
1306 
1307 	return NULL;
1308 }
1309 
1310 static bool mlxsw_sp_netdev_ipip_type(const struct mlxsw_sp *mlxsw_sp,
1311 				      const struct net_device *dev,
1312 				      enum mlxsw_sp_ipip_type *p_type)
1313 {
1314 	struct mlxsw_sp_router *router = mlxsw_sp->router;
1315 	const struct mlxsw_sp_ipip_ops *ipip_ops;
1316 	enum mlxsw_sp_ipip_type ipipt;
1317 
1318 	for (ipipt = 0; ipipt < MLXSW_SP_IPIP_TYPE_MAX; ++ipipt) {
1319 		ipip_ops = router->ipip_ops_arr[ipipt];
1320 		if (dev->type == ipip_ops->dev_type) {
1321 			if (p_type)
1322 				*p_type = ipipt;
1323 			return true;
1324 		}
1325 	}
1326 	return false;
1327 }
1328 
1329 bool mlxsw_sp_netdev_is_ipip_ol(const struct mlxsw_sp *mlxsw_sp,
1330 				const struct net_device *dev)
1331 {
1332 	return mlxsw_sp_netdev_ipip_type(mlxsw_sp, dev, NULL);
1333 }
1334 
1335 static struct mlxsw_sp_ipip_entry *
1336 mlxsw_sp_ipip_entry_find_by_ol_dev(struct mlxsw_sp *mlxsw_sp,
1337 				   const struct net_device *ol_dev)
1338 {
1339 	struct mlxsw_sp_ipip_entry *ipip_entry;
1340 
1341 	list_for_each_entry(ipip_entry, &mlxsw_sp->router->ipip_list,
1342 			    ipip_list_node)
1343 		if (ipip_entry->ol_dev == ol_dev)
1344 			return ipip_entry;
1345 
1346 	return NULL;
1347 }
1348 
1349 static struct mlxsw_sp_ipip_entry *
1350 mlxsw_sp_ipip_entry_find_by_ul_dev(const struct mlxsw_sp *mlxsw_sp,
1351 				   const struct net_device *ul_dev,
1352 				   struct mlxsw_sp_ipip_entry *start)
1353 {
1354 	struct mlxsw_sp_ipip_entry *ipip_entry;
1355 
1356 	ipip_entry = list_prepare_entry(start, &mlxsw_sp->router->ipip_list,
1357 					ipip_list_node);
1358 	list_for_each_entry_continue(ipip_entry, &mlxsw_sp->router->ipip_list,
1359 				     ipip_list_node) {
1360 		struct net_device *ipip_ul_dev =
1361 			__mlxsw_sp_ipip_netdev_ul_dev_get(ipip_entry->ol_dev);
1362 
1363 		if (ipip_ul_dev == ul_dev)
1364 			return ipip_entry;
1365 	}
1366 
1367 	return NULL;
1368 }
1369 
1370 bool mlxsw_sp_netdev_is_ipip_ul(const struct mlxsw_sp *mlxsw_sp,
1371 				const struct net_device *dev)
1372 {
1373 	return mlxsw_sp_ipip_entry_find_by_ul_dev(mlxsw_sp, dev, NULL);
1374 }
1375 
1376 static bool mlxsw_sp_netdevice_ipip_can_offload(struct mlxsw_sp *mlxsw_sp,
1377 						const struct net_device *ol_dev,
1378 						enum mlxsw_sp_ipip_type ipipt)
1379 {
1380 	const struct mlxsw_sp_ipip_ops *ops
1381 		= mlxsw_sp->router->ipip_ops_arr[ipipt];
1382 
1383 	/* For deciding whether decap should be offloaded, we don't care about
1384 	 * overlay protocol, so ask whether either one is supported.
1385 	 */
1386 	return ops->can_offload(mlxsw_sp, ol_dev, MLXSW_SP_L3_PROTO_IPV4) ||
1387 	       ops->can_offload(mlxsw_sp, ol_dev, MLXSW_SP_L3_PROTO_IPV6);
1388 }
1389 
1390 static int mlxsw_sp_netdevice_ipip_ol_reg_event(struct mlxsw_sp *mlxsw_sp,
1391 						struct net_device *ol_dev)
1392 {
1393 	struct mlxsw_sp_ipip_entry *ipip_entry;
1394 	enum mlxsw_sp_l3proto ul_proto;
1395 	enum mlxsw_sp_ipip_type ipipt;
1396 	union mlxsw_sp_l3addr saddr;
1397 	u32 ul_tb_id;
1398 
1399 	mlxsw_sp_netdev_ipip_type(mlxsw_sp, ol_dev, &ipipt);
1400 	if (mlxsw_sp_netdevice_ipip_can_offload(mlxsw_sp, ol_dev, ipipt)) {
1401 		ul_tb_id = mlxsw_sp_ipip_dev_ul_tb_id(ol_dev);
1402 		ul_proto = mlxsw_sp->router->ipip_ops_arr[ipipt]->ul_proto;
1403 		saddr = mlxsw_sp_ipip_netdev_saddr(ul_proto, ol_dev);
1404 		if (!mlxsw_sp_ipip_demote_tunnel_by_saddr(mlxsw_sp, ul_proto,
1405 							  saddr, ul_tb_id,
1406 							  NULL)) {
1407 			ipip_entry = mlxsw_sp_ipip_entry_create(mlxsw_sp, ipipt,
1408 								ol_dev);
1409 			if (IS_ERR(ipip_entry))
1410 				return PTR_ERR(ipip_entry);
1411 		}
1412 	}
1413 
1414 	return 0;
1415 }
1416 
1417 static void mlxsw_sp_netdevice_ipip_ol_unreg_event(struct mlxsw_sp *mlxsw_sp,
1418 						   struct net_device *ol_dev)
1419 {
1420 	struct mlxsw_sp_ipip_entry *ipip_entry;
1421 
1422 	ipip_entry = mlxsw_sp_ipip_entry_find_by_ol_dev(mlxsw_sp, ol_dev);
1423 	if (ipip_entry)
1424 		mlxsw_sp_ipip_entry_destroy(mlxsw_sp, ipip_entry);
1425 }
1426 
1427 static void
1428 mlxsw_sp_ipip_entry_ol_up_event(struct mlxsw_sp *mlxsw_sp,
1429 				struct mlxsw_sp_ipip_entry *ipip_entry)
1430 {
1431 	struct mlxsw_sp_fib_entry *decap_fib_entry;
1432 
1433 	decap_fib_entry = mlxsw_sp_ipip_entry_find_decap(mlxsw_sp, ipip_entry);
1434 	if (decap_fib_entry)
1435 		mlxsw_sp_ipip_entry_promote_decap(mlxsw_sp, ipip_entry,
1436 						  decap_fib_entry);
1437 }
1438 
1439 static int
1440 mlxsw_sp_rif_ipip_lb_op(struct mlxsw_sp_rif_ipip_lb *lb_rif,
1441 			struct mlxsw_sp_vr *ul_vr, bool enable)
1442 {
1443 	struct mlxsw_sp_rif_ipip_lb_config lb_cf = lb_rif->lb_config;
1444 	struct mlxsw_sp_rif *rif = &lb_rif->common;
1445 	struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
1446 	char ritr_pl[MLXSW_REG_RITR_LEN];
1447 	u32 saddr4;
1448 
1449 	switch (lb_cf.ul_protocol) {
1450 	case MLXSW_SP_L3_PROTO_IPV4:
1451 		saddr4 = be32_to_cpu(lb_cf.saddr.addr4);
1452 		mlxsw_reg_ritr_pack(ritr_pl, enable, MLXSW_REG_RITR_LOOPBACK_IF,
1453 				    rif->rif_index, rif->vr_id, rif->dev->mtu);
1454 		mlxsw_reg_ritr_loopback_ipip4_pack(ritr_pl, lb_cf.lb_ipipt,
1455 			    MLXSW_REG_RITR_LOOPBACK_IPIP_OPTIONS_GRE_KEY_PRESET,
1456 			    ul_vr->id, saddr4, lb_cf.okey);
1457 		break;
1458 
1459 	case MLXSW_SP_L3_PROTO_IPV6:
1460 		return -EAFNOSUPPORT;
1461 	}
1462 
1463 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
1464 }
1465 
1466 static int mlxsw_sp_netdevice_ipip_ol_update_mtu(struct mlxsw_sp *mlxsw_sp,
1467 						 struct net_device *ol_dev)
1468 {
1469 	struct mlxsw_sp_ipip_entry *ipip_entry;
1470 	struct mlxsw_sp_rif_ipip_lb *lb_rif;
1471 	struct mlxsw_sp_vr *ul_vr;
1472 	int err = 0;
1473 
1474 	ipip_entry = mlxsw_sp_ipip_entry_find_by_ol_dev(mlxsw_sp, ol_dev);
1475 	if (ipip_entry) {
1476 		lb_rif = ipip_entry->ol_lb;
1477 		ul_vr = &mlxsw_sp->router->vrs[lb_rif->ul_vr_id];
1478 		err = mlxsw_sp_rif_ipip_lb_op(lb_rif, ul_vr, true);
1479 		if (err)
1480 			goto out;
1481 		lb_rif->common.mtu = ol_dev->mtu;
1482 	}
1483 
1484 out:
1485 	return err;
1486 }
1487 
1488 static void mlxsw_sp_netdevice_ipip_ol_up_event(struct mlxsw_sp *mlxsw_sp,
1489 						struct net_device *ol_dev)
1490 {
1491 	struct mlxsw_sp_ipip_entry *ipip_entry;
1492 
1493 	ipip_entry = mlxsw_sp_ipip_entry_find_by_ol_dev(mlxsw_sp, ol_dev);
1494 	if (ipip_entry)
1495 		mlxsw_sp_ipip_entry_ol_up_event(mlxsw_sp, ipip_entry);
1496 }
1497 
1498 static void
1499 mlxsw_sp_ipip_entry_ol_down_event(struct mlxsw_sp *mlxsw_sp,
1500 				  struct mlxsw_sp_ipip_entry *ipip_entry)
1501 {
1502 	if (ipip_entry->decap_fib_entry)
1503 		mlxsw_sp_ipip_entry_demote_decap(mlxsw_sp, ipip_entry);
1504 }
1505 
1506 static void mlxsw_sp_netdevice_ipip_ol_down_event(struct mlxsw_sp *mlxsw_sp,
1507 						  struct net_device *ol_dev)
1508 {
1509 	struct mlxsw_sp_ipip_entry *ipip_entry;
1510 
1511 	ipip_entry = mlxsw_sp_ipip_entry_find_by_ol_dev(mlxsw_sp, ol_dev);
1512 	if (ipip_entry)
1513 		mlxsw_sp_ipip_entry_ol_down_event(mlxsw_sp, ipip_entry);
1514 }
1515 
1516 static void mlxsw_sp_nexthop_rif_migrate(struct mlxsw_sp *mlxsw_sp,
1517 					 struct mlxsw_sp_rif *old_rif,
1518 					 struct mlxsw_sp_rif *new_rif);
1519 static int
1520 mlxsw_sp_ipip_entry_ol_lb_update(struct mlxsw_sp *mlxsw_sp,
1521 				 struct mlxsw_sp_ipip_entry *ipip_entry,
1522 				 bool keep_encap,
1523 				 struct netlink_ext_ack *extack)
1524 {
1525 	struct mlxsw_sp_rif_ipip_lb *old_lb_rif = ipip_entry->ol_lb;
1526 	struct mlxsw_sp_rif_ipip_lb *new_lb_rif;
1527 
1528 	new_lb_rif = mlxsw_sp_ipip_ol_ipip_lb_create(mlxsw_sp,
1529 						     ipip_entry->ipipt,
1530 						     ipip_entry->ol_dev,
1531 						     extack);
1532 	if (IS_ERR(new_lb_rif))
1533 		return PTR_ERR(new_lb_rif);
1534 	ipip_entry->ol_lb = new_lb_rif;
1535 
1536 	if (keep_encap)
1537 		mlxsw_sp_nexthop_rif_migrate(mlxsw_sp, &old_lb_rif->common,
1538 					     &new_lb_rif->common);
1539 
1540 	mlxsw_sp_rif_destroy(&old_lb_rif->common);
1541 
1542 	return 0;
1543 }
1544 
1545 static void mlxsw_sp_nexthop_rif_update(struct mlxsw_sp *mlxsw_sp,
1546 					struct mlxsw_sp_rif *rif);
1547 
1548 /**
1549  * Update the offload related to an IPIP entry. This always updates decap, and
1550  * in addition to that it also:
1551  * @recreate_loopback: recreates the associated loopback RIF
1552  * @keep_encap: updates next hops that use the tunnel netdevice. This is only
1553  *              relevant when recreate_loopback is true.
1554  * @update_nexthops: updates next hops, keeping the current loopback RIF. This
1555  *                   is only relevant when recreate_loopback is false.
1556  */
1557 int __mlxsw_sp_ipip_entry_update_tunnel(struct mlxsw_sp *mlxsw_sp,
1558 					struct mlxsw_sp_ipip_entry *ipip_entry,
1559 					bool recreate_loopback,
1560 					bool keep_encap,
1561 					bool update_nexthops,
1562 					struct netlink_ext_ack *extack)
1563 {
1564 	int err;
1565 
1566 	/* RIFs can't be edited, so to update loopback, we need to destroy and
1567 	 * recreate it. That creates a window of opportunity where RALUE and
1568 	 * RATR registers end up referencing a RIF that's already gone. RATRs
1569 	 * are handled in mlxsw_sp_ipip_entry_ol_lb_update(), and to take care
1570 	 * of RALUE, demote the decap route back.
1571 	 */
1572 	if (ipip_entry->decap_fib_entry)
1573 		mlxsw_sp_ipip_entry_demote_decap(mlxsw_sp, ipip_entry);
1574 
1575 	if (recreate_loopback) {
1576 		err = mlxsw_sp_ipip_entry_ol_lb_update(mlxsw_sp, ipip_entry,
1577 						       keep_encap, extack);
1578 		if (err)
1579 			return err;
1580 	} else if (update_nexthops) {
1581 		mlxsw_sp_nexthop_rif_update(mlxsw_sp,
1582 					    &ipip_entry->ol_lb->common);
1583 	}
1584 
1585 	if (ipip_entry->ol_dev->flags & IFF_UP)
1586 		mlxsw_sp_ipip_entry_ol_up_event(mlxsw_sp, ipip_entry);
1587 
1588 	return 0;
1589 }
1590 
1591 static int mlxsw_sp_netdevice_ipip_ol_vrf_event(struct mlxsw_sp *mlxsw_sp,
1592 						struct net_device *ol_dev,
1593 						struct netlink_ext_ack *extack)
1594 {
1595 	struct mlxsw_sp_ipip_entry *ipip_entry =
1596 		mlxsw_sp_ipip_entry_find_by_ol_dev(mlxsw_sp, ol_dev);
1597 	enum mlxsw_sp_l3proto ul_proto;
1598 	union mlxsw_sp_l3addr saddr;
1599 	u32 ul_tb_id;
1600 
1601 	if (!ipip_entry)
1602 		return 0;
1603 
1604 	/* For flat configuration cases, moving overlay to a different VRF might
1605 	 * cause local address conflict, and the conflicting tunnels need to be
1606 	 * demoted.
1607 	 */
1608 	ul_tb_id = mlxsw_sp_ipip_dev_ul_tb_id(ol_dev);
1609 	ul_proto = mlxsw_sp->router->ipip_ops_arr[ipip_entry->ipipt]->ul_proto;
1610 	saddr = mlxsw_sp_ipip_netdev_saddr(ul_proto, ol_dev);
1611 	if (mlxsw_sp_ipip_demote_tunnel_by_saddr(mlxsw_sp, ul_proto,
1612 						 saddr, ul_tb_id,
1613 						 ipip_entry)) {
1614 		mlxsw_sp_ipip_entry_demote_tunnel(mlxsw_sp, ipip_entry);
1615 		return 0;
1616 	}
1617 
1618 	return __mlxsw_sp_ipip_entry_update_tunnel(mlxsw_sp, ipip_entry,
1619 						   true, false, false, extack);
1620 }
1621 
1622 static int
1623 mlxsw_sp_netdevice_ipip_ul_vrf_event(struct mlxsw_sp *mlxsw_sp,
1624 				     struct mlxsw_sp_ipip_entry *ipip_entry,
1625 				     struct net_device *ul_dev,
1626 				     struct netlink_ext_ack *extack)
1627 {
1628 	return __mlxsw_sp_ipip_entry_update_tunnel(mlxsw_sp, ipip_entry,
1629 						   true, true, false, extack);
1630 }
1631 
1632 static int
1633 mlxsw_sp_netdevice_ipip_ul_up_event(struct mlxsw_sp *mlxsw_sp,
1634 				    struct mlxsw_sp_ipip_entry *ipip_entry,
1635 				    struct net_device *ul_dev)
1636 {
1637 	return __mlxsw_sp_ipip_entry_update_tunnel(mlxsw_sp, ipip_entry,
1638 						   false, false, true, NULL);
1639 }
1640 
1641 static int
1642 mlxsw_sp_netdevice_ipip_ul_down_event(struct mlxsw_sp *mlxsw_sp,
1643 				      struct mlxsw_sp_ipip_entry *ipip_entry,
1644 				      struct net_device *ul_dev)
1645 {
1646 	/* A down underlay device causes encapsulated packets to not be
1647 	 * forwarded, but decap still works. So refresh next hops without
1648 	 * touching anything else.
1649 	 */
1650 	return __mlxsw_sp_ipip_entry_update_tunnel(mlxsw_sp, ipip_entry,
1651 						   false, false, true, NULL);
1652 }
1653 
1654 static int
1655 mlxsw_sp_netdevice_ipip_ol_change_event(struct mlxsw_sp *mlxsw_sp,
1656 					struct net_device *ol_dev,
1657 					struct netlink_ext_ack *extack)
1658 {
1659 	const struct mlxsw_sp_ipip_ops *ipip_ops;
1660 	struct mlxsw_sp_ipip_entry *ipip_entry;
1661 	int err;
1662 
1663 	ipip_entry = mlxsw_sp_ipip_entry_find_by_ol_dev(mlxsw_sp, ol_dev);
1664 	if (!ipip_entry)
1665 		/* A change might make a tunnel eligible for offloading, but
1666 		 * that is currently not implemented. What falls to slow path
1667 		 * stays there.
1668 		 */
1669 		return 0;
1670 
1671 	/* A change might make a tunnel not eligible for offloading. */
1672 	if (!mlxsw_sp_netdevice_ipip_can_offload(mlxsw_sp, ol_dev,
1673 						 ipip_entry->ipipt)) {
1674 		mlxsw_sp_ipip_entry_demote_tunnel(mlxsw_sp, ipip_entry);
1675 		return 0;
1676 	}
1677 
1678 	ipip_ops = mlxsw_sp->router->ipip_ops_arr[ipip_entry->ipipt];
1679 	err = ipip_ops->ol_netdev_change(mlxsw_sp, ipip_entry, extack);
1680 	return err;
1681 }
1682 
1683 void mlxsw_sp_ipip_entry_demote_tunnel(struct mlxsw_sp *mlxsw_sp,
1684 				       struct mlxsw_sp_ipip_entry *ipip_entry)
1685 {
1686 	struct net_device *ol_dev = ipip_entry->ol_dev;
1687 
1688 	if (ol_dev->flags & IFF_UP)
1689 		mlxsw_sp_ipip_entry_ol_down_event(mlxsw_sp, ipip_entry);
1690 	mlxsw_sp_ipip_entry_destroy(mlxsw_sp, ipip_entry);
1691 }
1692 
1693 /* The configuration where several tunnels have the same local address in the
1694  * same underlay table needs special treatment in the HW. That is currently not
1695  * implemented in the driver. This function finds and demotes the first tunnel
1696  * with a given source address, except the one passed in in the argument
1697  * `except'.
1698  */
1699 bool
1700 mlxsw_sp_ipip_demote_tunnel_by_saddr(struct mlxsw_sp *mlxsw_sp,
1701 				     enum mlxsw_sp_l3proto ul_proto,
1702 				     union mlxsw_sp_l3addr saddr,
1703 				     u32 ul_tb_id,
1704 				     const struct mlxsw_sp_ipip_entry *except)
1705 {
1706 	struct mlxsw_sp_ipip_entry *ipip_entry, *tmp;
1707 
1708 	list_for_each_entry_safe(ipip_entry, tmp, &mlxsw_sp->router->ipip_list,
1709 				 ipip_list_node) {
1710 		if (ipip_entry != except &&
1711 		    mlxsw_sp_ipip_entry_saddr_matches(mlxsw_sp, ul_proto, saddr,
1712 						      ul_tb_id, ipip_entry)) {
1713 			mlxsw_sp_ipip_entry_demote_tunnel(mlxsw_sp, ipip_entry);
1714 			return true;
1715 		}
1716 	}
1717 
1718 	return false;
1719 }
1720 
1721 static void mlxsw_sp_ipip_demote_tunnel_by_ul_netdev(struct mlxsw_sp *mlxsw_sp,
1722 						     struct net_device *ul_dev)
1723 {
1724 	struct mlxsw_sp_ipip_entry *ipip_entry, *tmp;
1725 
1726 	list_for_each_entry_safe(ipip_entry, tmp, &mlxsw_sp->router->ipip_list,
1727 				 ipip_list_node) {
1728 		struct net_device *ipip_ul_dev =
1729 			__mlxsw_sp_ipip_netdev_ul_dev_get(ipip_entry->ol_dev);
1730 
1731 		if (ipip_ul_dev == ul_dev)
1732 			mlxsw_sp_ipip_entry_demote_tunnel(mlxsw_sp, ipip_entry);
1733 	}
1734 }
1735 
1736 int mlxsw_sp_netdevice_ipip_ol_event(struct mlxsw_sp *mlxsw_sp,
1737 				     struct net_device *ol_dev,
1738 				     unsigned long event,
1739 				     struct netdev_notifier_info *info)
1740 {
1741 	struct netdev_notifier_changeupper_info *chup;
1742 	struct netlink_ext_ack *extack;
1743 
1744 	switch (event) {
1745 	case NETDEV_REGISTER:
1746 		return mlxsw_sp_netdevice_ipip_ol_reg_event(mlxsw_sp, ol_dev);
1747 	case NETDEV_UNREGISTER:
1748 		mlxsw_sp_netdevice_ipip_ol_unreg_event(mlxsw_sp, ol_dev);
1749 		return 0;
1750 	case NETDEV_UP:
1751 		mlxsw_sp_netdevice_ipip_ol_up_event(mlxsw_sp, ol_dev);
1752 		return 0;
1753 	case NETDEV_DOWN:
1754 		mlxsw_sp_netdevice_ipip_ol_down_event(mlxsw_sp, ol_dev);
1755 		return 0;
1756 	case NETDEV_CHANGEUPPER:
1757 		chup = container_of(info, typeof(*chup), info);
1758 		extack = info->extack;
1759 		if (netif_is_l3_master(chup->upper_dev))
1760 			return mlxsw_sp_netdevice_ipip_ol_vrf_event(mlxsw_sp,
1761 								    ol_dev,
1762 								    extack);
1763 		return 0;
1764 	case NETDEV_CHANGE:
1765 		extack = info->extack;
1766 		return mlxsw_sp_netdevice_ipip_ol_change_event(mlxsw_sp,
1767 							       ol_dev, extack);
1768 	case NETDEV_CHANGEMTU:
1769 		return mlxsw_sp_netdevice_ipip_ol_update_mtu(mlxsw_sp, ol_dev);
1770 	}
1771 	return 0;
1772 }
1773 
1774 static int
1775 __mlxsw_sp_netdevice_ipip_ul_event(struct mlxsw_sp *mlxsw_sp,
1776 				   struct mlxsw_sp_ipip_entry *ipip_entry,
1777 				   struct net_device *ul_dev,
1778 				   unsigned long event,
1779 				   struct netdev_notifier_info *info)
1780 {
1781 	struct netdev_notifier_changeupper_info *chup;
1782 	struct netlink_ext_ack *extack;
1783 
1784 	switch (event) {
1785 	case NETDEV_CHANGEUPPER:
1786 		chup = container_of(info, typeof(*chup), info);
1787 		extack = info->extack;
1788 		if (netif_is_l3_master(chup->upper_dev))
1789 			return mlxsw_sp_netdevice_ipip_ul_vrf_event(mlxsw_sp,
1790 								    ipip_entry,
1791 								    ul_dev,
1792 								    extack);
1793 		break;
1794 
1795 	case NETDEV_UP:
1796 		return mlxsw_sp_netdevice_ipip_ul_up_event(mlxsw_sp, ipip_entry,
1797 							   ul_dev);
1798 	case NETDEV_DOWN:
1799 		return mlxsw_sp_netdevice_ipip_ul_down_event(mlxsw_sp,
1800 							     ipip_entry,
1801 							     ul_dev);
1802 	}
1803 	return 0;
1804 }
1805 
1806 int
1807 mlxsw_sp_netdevice_ipip_ul_event(struct mlxsw_sp *mlxsw_sp,
1808 				 struct net_device *ul_dev,
1809 				 unsigned long event,
1810 				 struct netdev_notifier_info *info)
1811 {
1812 	struct mlxsw_sp_ipip_entry *ipip_entry = NULL;
1813 	int err;
1814 
1815 	while ((ipip_entry = mlxsw_sp_ipip_entry_find_by_ul_dev(mlxsw_sp,
1816 								ul_dev,
1817 								ipip_entry))) {
1818 		err = __mlxsw_sp_netdevice_ipip_ul_event(mlxsw_sp, ipip_entry,
1819 							 ul_dev, event, info);
1820 		if (err) {
1821 			mlxsw_sp_ipip_demote_tunnel_by_ul_netdev(mlxsw_sp,
1822 								 ul_dev);
1823 			return err;
1824 		}
1825 	}
1826 
1827 	return 0;
1828 }
1829 
1830 int mlxsw_sp_router_nve_promote_decap(struct mlxsw_sp *mlxsw_sp, u32 ul_tb_id,
1831 				      enum mlxsw_sp_l3proto ul_proto,
1832 				      const union mlxsw_sp_l3addr *ul_sip,
1833 				      u32 tunnel_index)
1834 {
1835 	enum mlxsw_sp_fib_entry_type type = MLXSW_SP_FIB_ENTRY_TYPE_TRAP;
1836 	struct mlxsw_sp_fib_entry *fib_entry;
1837 	int err;
1838 
1839 	/* It is valid to create a tunnel with a local IP and only later
1840 	 * assign this IP address to a local interface
1841 	 */
1842 	fib_entry = mlxsw_sp_router_ip2me_fib_entry_find(mlxsw_sp, ul_tb_id,
1843 							 ul_proto, ul_sip,
1844 							 type);
1845 	if (!fib_entry)
1846 		return 0;
1847 
1848 	fib_entry->decap.tunnel_index = tunnel_index;
1849 	fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_NVE_DECAP;
1850 
1851 	err = mlxsw_sp_fib_entry_update(mlxsw_sp, fib_entry);
1852 	if (err)
1853 		goto err_fib_entry_update;
1854 
1855 	return 0;
1856 
1857 err_fib_entry_update:
1858 	fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_TRAP;
1859 	mlxsw_sp_fib_entry_update(mlxsw_sp, fib_entry);
1860 	return err;
1861 }
1862 
1863 void mlxsw_sp_router_nve_demote_decap(struct mlxsw_sp *mlxsw_sp, u32 ul_tb_id,
1864 				      enum mlxsw_sp_l3proto ul_proto,
1865 				      const union mlxsw_sp_l3addr *ul_sip)
1866 {
1867 	enum mlxsw_sp_fib_entry_type type = MLXSW_SP_FIB_ENTRY_TYPE_NVE_DECAP;
1868 	struct mlxsw_sp_fib_entry *fib_entry;
1869 
1870 	fib_entry = mlxsw_sp_router_ip2me_fib_entry_find(mlxsw_sp, ul_tb_id,
1871 							 ul_proto, ul_sip,
1872 							 type);
1873 	if (!fib_entry)
1874 		return;
1875 
1876 	fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_TRAP;
1877 	mlxsw_sp_fib_entry_update(mlxsw_sp, fib_entry);
1878 }
1879 
1880 struct mlxsw_sp_neigh_key {
1881 	struct neighbour *n;
1882 };
1883 
1884 struct mlxsw_sp_neigh_entry {
1885 	struct list_head rif_list_node;
1886 	struct rhash_head ht_node;
1887 	struct mlxsw_sp_neigh_key key;
1888 	u16 rif;
1889 	bool connected;
1890 	unsigned char ha[ETH_ALEN];
1891 	struct list_head nexthop_list; /* list of nexthops using
1892 					* this neigh entry
1893 					*/
1894 	struct list_head nexthop_neighs_list_node;
1895 	unsigned int counter_index;
1896 	bool counter_valid;
1897 };
1898 
1899 static const struct rhashtable_params mlxsw_sp_neigh_ht_params = {
1900 	.key_offset = offsetof(struct mlxsw_sp_neigh_entry, key),
1901 	.head_offset = offsetof(struct mlxsw_sp_neigh_entry, ht_node),
1902 	.key_len = sizeof(struct mlxsw_sp_neigh_key),
1903 };
1904 
1905 struct mlxsw_sp_neigh_entry *
1906 mlxsw_sp_rif_neigh_next(struct mlxsw_sp_rif *rif,
1907 			struct mlxsw_sp_neigh_entry *neigh_entry)
1908 {
1909 	if (!neigh_entry) {
1910 		if (list_empty(&rif->neigh_list))
1911 			return NULL;
1912 		else
1913 			return list_first_entry(&rif->neigh_list,
1914 						typeof(*neigh_entry),
1915 						rif_list_node);
1916 	}
1917 	if (list_is_last(&neigh_entry->rif_list_node, &rif->neigh_list))
1918 		return NULL;
1919 	return list_next_entry(neigh_entry, rif_list_node);
1920 }
1921 
1922 int mlxsw_sp_neigh_entry_type(struct mlxsw_sp_neigh_entry *neigh_entry)
1923 {
1924 	return neigh_entry->key.n->tbl->family;
1925 }
1926 
1927 unsigned char *
1928 mlxsw_sp_neigh_entry_ha(struct mlxsw_sp_neigh_entry *neigh_entry)
1929 {
1930 	return neigh_entry->ha;
1931 }
1932 
1933 u32 mlxsw_sp_neigh4_entry_dip(struct mlxsw_sp_neigh_entry *neigh_entry)
1934 {
1935 	struct neighbour *n;
1936 
1937 	n = neigh_entry->key.n;
1938 	return ntohl(*((__be32 *) n->primary_key));
1939 }
1940 
1941 struct in6_addr *
1942 mlxsw_sp_neigh6_entry_dip(struct mlxsw_sp_neigh_entry *neigh_entry)
1943 {
1944 	struct neighbour *n;
1945 
1946 	n = neigh_entry->key.n;
1947 	return (struct in6_addr *) &n->primary_key;
1948 }
1949 
1950 int mlxsw_sp_neigh_counter_get(struct mlxsw_sp *mlxsw_sp,
1951 			       struct mlxsw_sp_neigh_entry *neigh_entry,
1952 			       u64 *p_counter)
1953 {
1954 	if (!neigh_entry->counter_valid)
1955 		return -EINVAL;
1956 
1957 	return mlxsw_sp_flow_counter_get(mlxsw_sp, neigh_entry->counter_index,
1958 					 p_counter, NULL);
1959 }
1960 
1961 static struct mlxsw_sp_neigh_entry *
1962 mlxsw_sp_neigh_entry_alloc(struct mlxsw_sp *mlxsw_sp, struct neighbour *n,
1963 			   u16 rif)
1964 {
1965 	struct mlxsw_sp_neigh_entry *neigh_entry;
1966 
1967 	neigh_entry = kzalloc(sizeof(*neigh_entry), GFP_KERNEL);
1968 	if (!neigh_entry)
1969 		return NULL;
1970 
1971 	neigh_entry->key.n = n;
1972 	neigh_entry->rif = rif;
1973 	INIT_LIST_HEAD(&neigh_entry->nexthop_list);
1974 
1975 	return neigh_entry;
1976 }
1977 
1978 static void mlxsw_sp_neigh_entry_free(struct mlxsw_sp_neigh_entry *neigh_entry)
1979 {
1980 	kfree(neigh_entry);
1981 }
1982 
1983 static int
1984 mlxsw_sp_neigh_entry_insert(struct mlxsw_sp *mlxsw_sp,
1985 			    struct mlxsw_sp_neigh_entry *neigh_entry)
1986 {
1987 	return rhashtable_insert_fast(&mlxsw_sp->router->neigh_ht,
1988 				      &neigh_entry->ht_node,
1989 				      mlxsw_sp_neigh_ht_params);
1990 }
1991 
1992 static void
1993 mlxsw_sp_neigh_entry_remove(struct mlxsw_sp *mlxsw_sp,
1994 			    struct mlxsw_sp_neigh_entry *neigh_entry)
1995 {
1996 	rhashtable_remove_fast(&mlxsw_sp->router->neigh_ht,
1997 			       &neigh_entry->ht_node,
1998 			       mlxsw_sp_neigh_ht_params);
1999 }
2000 
2001 static bool
2002 mlxsw_sp_neigh_counter_should_alloc(struct mlxsw_sp *mlxsw_sp,
2003 				    struct mlxsw_sp_neigh_entry *neigh_entry)
2004 {
2005 	struct devlink *devlink;
2006 	const char *table_name;
2007 
2008 	switch (mlxsw_sp_neigh_entry_type(neigh_entry)) {
2009 	case AF_INET:
2010 		table_name = MLXSW_SP_DPIPE_TABLE_NAME_HOST4;
2011 		break;
2012 	case AF_INET6:
2013 		table_name = MLXSW_SP_DPIPE_TABLE_NAME_HOST6;
2014 		break;
2015 	default:
2016 		WARN_ON(1);
2017 		return false;
2018 	}
2019 
2020 	devlink = priv_to_devlink(mlxsw_sp->core);
2021 	return devlink_dpipe_table_counter_enabled(devlink, table_name);
2022 }
2023 
2024 static void
2025 mlxsw_sp_neigh_counter_alloc(struct mlxsw_sp *mlxsw_sp,
2026 			     struct mlxsw_sp_neigh_entry *neigh_entry)
2027 {
2028 	if (!mlxsw_sp_neigh_counter_should_alloc(mlxsw_sp, neigh_entry))
2029 		return;
2030 
2031 	if (mlxsw_sp_flow_counter_alloc(mlxsw_sp, &neigh_entry->counter_index))
2032 		return;
2033 
2034 	neigh_entry->counter_valid = true;
2035 }
2036 
2037 static void
2038 mlxsw_sp_neigh_counter_free(struct mlxsw_sp *mlxsw_sp,
2039 			    struct mlxsw_sp_neigh_entry *neigh_entry)
2040 {
2041 	if (!neigh_entry->counter_valid)
2042 		return;
2043 	mlxsw_sp_flow_counter_free(mlxsw_sp,
2044 				   neigh_entry->counter_index);
2045 	neigh_entry->counter_valid = false;
2046 }
2047 
2048 static struct mlxsw_sp_neigh_entry *
2049 mlxsw_sp_neigh_entry_create(struct mlxsw_sp *mlxsw_sp, struct neighbour *n)
2050 {
2051 	struct mlxsw_sp_neigh_entry *neigh_entry;
2052 	struct mlxsw_sp_rif *rif;
2053 	int err;
2054 
2055 	rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, n->dev);
2056 	if (!rif)
2057 		return ERR_PTR(-EINVAL);
2058 
2059 	neigh_entry = mlxsw_sp_neigh_entry_alloc(mlxsw_sp, n, rif->rif_index);
2060 	if (!neigh_entry)
2061 		return ERR_PTR(-ENOMEM);
2062 
2063 	err = mlxsw_sp_neigh_entry_insert(mlxsw_sp, neigh_entry);
2064 	if (err)
2065 		goto err_neigh_entry_insert;
2066 
2067 	mlxsw_sp_neigh_counter_alloc(mlxsw_sp, neigh_entry);
2068 	list_add(&neigh_entry->rif_list_node, &rif->neigh_list);
2069 
2070 	return neigh_entry;
2071 
2072 err_neigh_entry_insert:
2073 	mlxsw_sp_neigh_entry_free(neigh_entry);
2074 	return ERR_PTR(err);
2075 }
2076 
2077 static void
2078 mlxsw_sp_neigh_entry_destroy(struct mlxsw_sp *mlxsw_sp,
2079 			     struct mlxsw_sp_neigh_entry *neigh_entry)
2080 {
2081 	list_del(&neigh_entry->rif_list_node);
2082 	mlxsw_sp_neigh_counter_free(mlxsw_sp, neigh_entry);
2083 	mlxsw_sp_neigh_entry_remove(mlxsw_sp, neigh_entry);
2084 	mlxsw_sp_neigh_entry_free(neigh_entry);
2085 }
2086 
2087 static struct mlxsw_sp_neigh_entry *
2088 mlxsw_sp_neigh_entry_lookup(struct mlxsw_sp *mlxsw_sp, struct neighbour *n)
2089 {
2090 	struct mlxsw_sp_neigh_key key;
2091 
2092 	key.n = n;
2093 	return rhashtable_lookup_fast(&mlxsw_sp->router->neigh_ht,
2094 				      &key, mlxsw_sp_neigh_ht_params);
2095 }
2096 
2097 static void
2098 mlxsw_sp_router_neighs_update_interval_init(struct mlxsw_sp *mlxsw_sp)
2099 {
2100 	unsigned long interval;
2101 
2102 #if IS_ENABLED(CONFIG_IPV6)
2103 	interval = min_t(unsigned long,
2104 			 NEIGH_VAR(&arp_tbl.parms, DELAY_PROBE_TIME),
2105 			 NEIGH_VAR(&nd_tbl.parms, DELAY_PROBE_TIME));
2106 #else
2107 	interval = NEIGH_VAR(&arp_tbl.parms, DELAY_PROBE_TIME);
2108 #endif
2109 	mlxsw_sp->router->neighs_update.interval = jiffies_to_msecs(interval);
2110 }
2111 
2112 static void mlxsw_sp_router_neigh_ent_ipv4_process(struct mlxsw_sp *mlxsw_sp,
2113 						   char *rauhtd_pl,
2114 						   int ent_index)
2115 {
2116 	struct net_device *dev;
2117 	struct neighbour *n;
2118 	__be32 dipn;
2119 	u32 dip;
2120 	u16 rif;
2121 
2122 	mlxsw_reg_rauhtd_ent_ipv4_unpack(rauhtd_pl, ent_index, &rif, &dip);
2123 
2124 	if (!mlxsw_sp->router->rifs[rif]) {
2125 		dev_err_ratelimited(mlxsw_sp->bus_info->dev, "Incorrect RIF in neighbour entry\n");
2126 		return;
2127 	}
2128 
2129 	dipn = htonl(dip);
2130 	dev = mlxsw_sp->router->rifs[rif]->dev;
2131 	n = neigh_lookup(&arp_tbl, &dipn, dev);
2132 	if (!n)
2133 		return;
2134 
2135 	netdev_dbg(dev, "Updating neighbour with IP=%pI4h\n", &dip);
2136 	neigh_event_send(n, NULL);
2137 	neigh_release(n);
2138 }
2139 
2140 #if IS_ENABLED(CONFIG_IPV6)
2141 static void mlxsw_sp_router_neigh_ent_ipv6_process(struct mlxsw_sp *mlxsw_sp,
2142 						   char *rauhtd_pl,
2143 						   int rec_index)
2144 {
2145 	struct net_device *dev;
2146 	struct neighbour *n;
2147 	struct in6_addr dip;
2148 	u16 rif;
2149 
2150 	mlxsw_reg_rauhtd_ent_ipv6_unpack(rauhtd_pl, rec_index, &rif,
2151 					 (char *) &dip);
2152 
2153 	if (!mlxsw_sp->router->rifs[rif]) {
2154 		dev_err_ratelimited(mlxsw_sp->bus_info->dev, "Incorrect RIF in neighbour entry\n");
2155 		return;
2156 	}
2157 
2158 	dev = mlxsw_sp->router->rifs[rif]->dev;
2159 	n = neigh_lookup(&nd_tbl, &dip, dev);
2160 	if (!n)
2161 		return;
2162 
2163 	netdev_dbg(dev, "Updating neighbour with IP=%pI6c\n", &dip);
2164 	neigh_event_send(n, NULL);
2165 	neigh_release(n);
2166 }
2167 #else
2168 static void mlxsw_sp_router_neigh_ent_ipv6_process(struct mlxsw_sp *mlxsw_sp,
2169 						   char *rauhtd_pl,
2170 						   int rec_index)
2171 {
2172 }
2173 #endif
2174 
2175 static void mlxsw_sp_router_neigh_rec_ipv4_process(struct mlxsw_sp *mlxsw_sp,
2176 						   char *rauhtd_pl,
2177 						   int rec_index)
2178 {
2179 	u8 num_entries;
2180 	int i;
2181 
2182 	num_entries = mlxsw_reg_rauhtd_ipv4_rec_num_entries_get(rauhtd_pl,
2183 								rec_index);
2184 	/* Hardware starts counting at 0, so add 1. */
2185 	num_entries++;
2186 
2187 	/* Each record consists of several neighbour entries. */
2188 	for (i = 0; i < num_entries; i++) {
2189 		int ent_index;
2190 
2191 		ent_index = rec_index * MLXSW_REG_RAUHTD_IPV4_ENT_PER_REC + i;
2192 		mlxsw_sp_router_neigh_ent_ipv4_process(mlxsw_sp, rauhtd_pl,
2193 						       ent_index);
2194 	}
2195 
2196 }
2197 
2198 static void mlxsw_sp_router_neigh_rec_ipv6_process(struct mlxsw_sp *mlxsw_sp,
2199 						   char *rauhtd_pl,
2200 						   int rec_index)
2201 {
2202 	/* One record contains one entry. */
2203 	mlxsw_sp_router_neigh_ent_ipv6_process(mlxsw_sp, rauhtd_pl,
2204 					       rec_index);
2205 }
2206 
2207 static void mlxsw_sp_router_neigh_rec_process(struct mlxsw_sp *mlxsw_sp,
2208 					      char *rauhtd_pl, int rec_index)
2209 {
2210 	switch (mlxsw_reg_rauhtd_rec_type_get(rauhtd_pl, rec_index)) {
2211 	case MLXSW_REG_RAUHTD_TYPE_IPV4:
2212 		mlxsw_sp_router_neigh_rec_ipv4_process(mlxsw_sp, rauhtd_pl,
2213 						       rec_index);
2214 		break;
2215 	case MLXSW_REG_RAUHTD_TYPE_IPV6:
2216 		mlxsw_sp_router_neigh_rec_ipv6_process(mlxsw_sp, rauhtd_pl,
2217 						       rec_index);
2218 		break;
2219 	}
2220 }
2221 
2222 static bool mlxsw_sp_router_rauhtd_is_full(char *rauhtd_pl)
2223 {
2224 	u8 num_rec, last_rec_index, num_entries;
2225 
2226 	num_rec = mlxsw_reg_rauhtd_num_rec_get(rauhtd_pl);
2227 	last_rec_index = num_rec - 1;
2228 
2229 	if (num_rec < MLXSW_REG_RAUHTD_REC_MAX_NUM)
2230 		return false;
2231 	if (mlxsw_reg_rauhtd_rec_type_get(rauhtd_pl, last_rec_index) ==
2232 	    MLXSW_REG_RAUHTD_TYPE_IPV6)
2233 		return true;
2234 
2235 	num_entries = mlxsw_reg_rauhtd_ipv4_rec_num_entries_get(rauhtd_pl,
2236 								last_rec_index);
2237 	if (++num_entries == MLXSW_REG_RAUHTD_IPV4_ENT_PER_REC)
2238 		return true;
2239 	return false;
2240 }
2241 
2242 static int
2243 __mlxsw_sp_router_neighs_update_rauhtd(struct mlxsw_sp *mlxsw_sp,
2244 				       char *rauhtd_pl,
2245 				       enum mlxsw_reg_rauhtd_type type)
2246 {
2247 	int i, num_rec;
2248 	int err;
2249 
2250 	/* Make sure the neighbour's netdev isn't removed in the
2251 	 * process.
2252 	 */
2253 	rtnl_lock();
2254 	do {
2255 		mlxsw_reg_rauhtd_pack(rauhtd_pl, type);
2256 		err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(rauhtd),
2257 				      rauhtd_pl);
2258 		if (err) {
2259 			dev_err_ratelimited(mlxsw_sp->bus_info->dev, "Failed to dump neighbour table\n");
2260 			break;
2261 		}
2262 		num_rec = mlxsw_reg_rauhtd_num_rec_get(rauhtd_pl);
2263 		for (i = 0; i < num_rec; i++)
2264 			mlxsw_sp_router_neigh_rec_process(mlxsw_sp, rauhtd_pl,
2265 							  i);
2266 	} while (mlxsw_sp_router_rauhtd_is_full(rauhtd_pl));
2267 	rtnl_unlock();
2268 
2269 	return err;
2270 }
2271 
2272 static int mlxsw_sp_router_neighs_update_rauhtd(struct mlxsw_sp *mlxsw_sp)
2273 {
2274 	enum mlxsw_reg_rauhtd_type type;
2275 	char *rauhtd_pl;
2276 	int err;
2277 
2278 	rauhtd_pl = kmalloc(MLXSW_REG_RAUHTD_LEN, GFP_KERNEL);
2279 	if (!rauhtd_pl)
2280 		return -ENOMEM;
2281 
2282 	type = MLXSW_REG_RAUHTD_TYPE_IPV4;
2283 	err = __mlxsw_sp_router_neighs_update_rauhtd(mlxsw_sp, rauhtd_pl, type);
2284 	if (err)
2285 		goto out;
2286 
2287 	type = MLXSW_REG_RAUHTD_TYPE_IPV6;
2288 	err = __mlxsw_sp_router_neighs_update_rauhtd(mlxsw_sp, rauhtd_pl, type);
2289 out:
2290 	kfree(rauhtd_pl);
2291 	return err;
2292 }
2293 
2294 static void mlxsw_sp_router_neighs_update_nh(struct mlxsw_sp *mlxsw_sp)
2295 {
2296 	struct mlxsw_sp_neigh_entry *neigh_entry;
2297 
2298 	/* Take RTNL mutex here to prevent lists from changes */
2299 	rtnl_lock();
2300 	list_for_each_entry(neigh_entry, &mlxsw_sp->router->nexthop_neighs_list,
2301 			    nexthop_neighs_list_node)
2302 		/* If this neigh have nexthops, make the kernel think this neigh
2303 		 * is active regardless of the traffic.
2304 		 */
2305 		neigh_event_send(neigh_entry->key.n, NULL);
2306 	rtnl_unlock();
2307 }
2308 
2309 static void
2310 mlxsw_sp_router_neighs_update_work_schedule(struct mlxsw_sp *mlxsw_sp)
2311 {
2312 	unsigned long interval = mlxsw_sp->router->neighs_update.interval;
2313 
2314 	mlxsw_core_schedule_dw(&mlxsw_sp->router->neighs_update.dw,
2315 			       msecs_to_jiffies(interval));
2316 }
2317 
2318 static void mlxsw_sp_router_neighs_update_work(struct work_struct *work)
2319 {
2320 	struct mlxsw_sp_router *router;
2321 	int err;
2322 
2323 	router = container_of(work, struct mlxsw_sp_router,
2324 			      neighs_update.dw.work);
2325 	err = mlxsw_sp_router_neighs_update_rauhtd(router->mlxsw_sp);
2326 	if (err)
2327 		dev_err(router->mlxsw_sp->bus_info->dev, "Could not update kernel for neigh activity");
2328 
2329 	mlxsw_sp_router_neighs_update_nh(router->mlxsw_sp);
2330 
2331 	mlxsw_sp_router_neighs_update_work_schedule(router->mlxsw_sp);
2332 }
2333 
2334 static void mlxsw_sp_router_probe_unresolved_nexthops(struct work_struct *work)
2335 {
2336 	struct mlxsw_sp_neigh_entry *neigh_entry;
2337 	struct mlxsw_sp_router *router;
2338 
2339 	router = container_of(work, struct mlxsw_sp_router,
2340 			      nexthop_probe_dw.work);
2341 	/* Iterate over nexthop neighbours, find those who are unresolved and
2342 	 * send arp on them. This solves the chicken-egg problem when
2343 	 * the nexthop wouldn't get offloaded until the neighbor is resolved
2344 	 * but it wouldn't get resolved ever in case traffic is flowing in HW
2345 	 * using different nexthop.
2346 	 *
2347 	 * Take RTNL mutex here to prevent lists from changes.
2348 	 */
2349 	rtnl_lock();
2350 	list_for_each_entry(neigh_entry, &router->nexthop_neighs_list,
2351 			    nexthop_neighs_list_node)
2352 		if (!neigh_entry->connected)
2353 			neigh_event_send(neigh_entry->key.n, NULL);
2354 	rtnl_unlock();
2355 
2356 	mlxsw_core_schedule_dw(&router->nexthop_probe_dw,
2357 			       MLXSW_SP_UNRESOLVED_NH_PROBE_INTERVAL);
2358 }
2359 
2360 static void
2361 mlxsw_sp_nexthop_neigh_update(struct mlxsw_sp *mlxsw_sp,
2362 			      struct mlxsw_sp_neigh_entry *neigh_entry,
2363 			      bool removing);
2364 
2365 static enum mlxsw_reg_rauht_op mlxsw_sp_rauht_op(bool adding)
2366 {
2367 	return adding ? MLXSW_REG_RAUHT_OP_WRITE_ADD :
2368 			MLXSW_REG_RAUHT_OP_WRITE_DELETE;
2369 }
2370 
2371 static void
2372 mlxsw_sp_router_neigh_entry_op4(struct mlxsw_sp *mlxsw_sp,
2373 				struct mlxsw_sp_neigh_entry *neigh_entry,
2374 				enum mlxsw_reg_rauht_op op)
2375 {
2376 	struct neighbour *n = neigh_entry->key.n;
2377 	u32 dip = ntohl(*((__be32 *) n->primary_key));
2378 	char rauht_pl[MLXSW_REG_RAUHT_LEN];
2379 
2380 	mlxsw_reg_rauht_pack4(rauht_pl, op, neigh_entry->rif, neigh_entry->ha,
2381 			      dip);
2382 	if (neigh_entry->counter_valid)
2383 		mlxsw_reg_rauht_pack_counter(rauht_pl,
2384 					     neigh_entry->counter_index);
2385 	mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rauht), rauht_pl);
2386 }
2387 
2388 static void
2389 mlxsw_sp_router_neigh_entry_op6(struct mlxsw_sp *mlxsw_sp,
2390 				struct mlxsw_sp_neigh_entry *neigh_entry,
2391 				enum mlxsw_reg_rauht_op op)
2392 {
2393 	struct neighbour *n = neigh_entry->key.n;
2394 	char rauht_pl[MLXSW_REG_RAUHT_LEN];
2395 	const char *dip = n->primary_key;
2396 
2397 	mlxsw_reg_rauht_pack6(rauht_pl, op, neigh_entry->rif, neigh_entry->ha,
2398 			      dip);
2399 	if (neigh_entry->counter_valid)
2400 		mlxsw_reg_rauht_pack_counter(rauht_pl,
2401 					     neigh_entry->counter_index);
2402 	mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rauht), rauht_pl);
2403 }
2404 
2405 bool mlxsw_sp_neigh_ipv6_ignore(struct mlxsw_sp_neigh_entry *neigh_entry)
2406 {
2407 	struct neighbour *n = neigh_entry->key.n;
2408 
2409 	/* Packets with a link-local destination address are trapped
2410 	 * after LPM lookup and never reach the neighbour table, so
2411 	 * there is no need to program such neighbours to the device.
2412 	 */
2413 	if (ipv6_addr_type((struct in6_addr *) &n->primary_key) &
2414 	    IPV6_ADDR_LINKLOCAL)
2415 		return true;
2416 	return false;
2417 }
2418 
2419 static void
2420 mlxsw_sp_neigh_entry_update(struct mlxsw_sp *mlxsw_sp,
2421 			    struct mlxsw_sp_neigh_entry *neigh_entry,
2422 			    bool adding)
2423 {
2424 	if (!adding && !neigh_entry->connected)
2425 		return;
2426 	neigh_entry->connected = adding;
2427 	if (neigh_entry->key.n->tbl->family == AF_INET) {
2428 		mlxsw_sp_router_neigh_entry_op4(mlxsw_sp, neigh_entry,
2429 						mlxsw_sp_rauht_op(adding));
2430 	} else if (neigh_entry->key.n->tbl->family == AF_INET6) {
2431 		if (mlxsw_sp_neigh_ipv6_ignore(neigh_entry))
2432 			return;
2433 		mlxsw_sp_router_neigh_entry_op6(mlxsw_sp, neigh_entry,
2434 						mlxsw_sp_rauht_op(adding));
2435 	} else {
2436 		WARN_ON_ONCE(1);
2437 	}
2438 }
2439 
2440 void
2441 mlxsw_sp_neigh_entry_counter_update(struct mlxsw_sp *mlxsw_sp,
2442 				    struct mlxsw_sp_neigh_entry *neigh_entry,
2443 				    bool adding)
2444 {
2445 	if (adding)
2446 		mlxsw_sp_neigh_counter_alloc(mlxsw_sp, neigh_entry);
2447 	else
2448 		mlxsw_sp_neigh_counter_free(mlxsw_sp, neigh_entry);
2449 	mlxsw_sp_neigh_entry_update(mlxsw_sp, neigh_entry, true);
2450 }
2451 
2452 struct mlxsw_sp_netevent_work {
2453 	struct work_struct work;
2454 	struct mlxsw_sp *mlxsw_sp;
2455 	struct neighbour *n;
2456 };
2457 
2458 static void mlxsw_sp_router_neigh_event_work(struct work_struct *work)
2459 {
2460 	struct mlxsw_sp_netevent_work *net_work =
2461 		container_of(work, struct mlxsw_sp_netevent_work, work);
2462 	struct mlxsw_sp *mlxsw_sp = net_work->mlxsw_sp;
2463 	struct mlxsw_sp_neigh_entry *neigh_entry;
2464 	struct neighbour *n = net_work->n;
2465 	unsigned char ha[ETH_ALEN];
2466 	bool entry_connected;
2467 	u8 nud_state, dead;
2468 
2469 	/* If these parameters are changed after we release the lock,
2470 	 * then we are guaranteed to receive another event letting us
2471 	 * know about it.
2472 	 */
2473 	read_lock_bh(&n->lock);
2474 	memcpy(ha, n->ha, ETH_ALEN);
2475 	nud_state = n->nud_state;
2476 	dead = n->dead;
2477 	read_unlock_bh(&n->lock);
2478 
2479 	rtnl_lock();
2480 	mlxsw_sp_span_respin(mlxsw_sp);
2481 
2482 	entry_connected = nud_state & NUD_VALID && !dead;
2483 	neigh_entry = mlxsw_sp_neigh_entry_lookup(mlxsw_sp, n);
2484 	if (!entry_connected && !neigh_entry)
2485 		goto out;
2486 	if (!neigh_entry) {
2487 		neigh_entry = mlxsw_sp_neigh_entry_create(mlxsw_sp, n);
2488 		if (IS_ERR(neigh_entry))
2489 			goto out;
2490 	}
2491 
2492 	memcpy(neigh_entry->ha, ha, ETH_ALEN);
2493 	mlxsw_sp_neigh_entry_update(mlxsw_sp, neigh_entry, entry_connected);
2494 	mlxsw_sp_nexthop_neigh_update(mlxsw_sp, neigh_entry, !entry_connected);
2495 
2496 	if (!neigh_entry->connected && list_empty(&neigh_entry->nexthop_list))
2497 		mlxsw_sp_neigh_entry_destroy(mlxsw_sp, neigh_entry);
2498 
2499 out:
2500 	rtnl_unlock();
2501 	neigh_release(n);
2502 	kfree(net_work);
2503 }
2504 
2505 static int mlxsw_sp_mp_hash_init(struct mlxsw_sp *mlxsw_sp);
2506 
2507 static void mlxsw_sp_router_mp_hash_event_work(struct work_struct *work)
2508 {
2509 	struct mlxsw_sp_netevent_work *net_work =
2510 		container_of(work, struct mlxsw_sp_netevent_work, work);
2511 	struct mlxsw_sp *mlxsw_sp = net_work->mlxsw_sp;
2512 
2513 	mlxsw_sp_mp_hash_init(mlxsw_sp);
2514 	kfree(net_work);
2515 }
2516 
2517 static int __mlxsw_sp_router_init(struct mlxsw_sp *mlxsw_sp);
2518 
2519 static void mlxsw_sp_router_update_priority_work(struct work_struct *work)
2520 {
2521 	struct mlxsw_sp_netevent_work *net_work =
2522 		container_of(work, struct mlxsw_sp_netevent_work, work);
2523 	struct mlxsw_sp *mlxsw_sp = net_work->mlxsw_sp;
2524 
2525 	__mlxsw_sp_router_init(mlxsw_sp);
2526 	kfree(net_work);
2527 }
2528 
2529 static int mlxsw_sp_router_schedule_work(struct net *net,
2530 					 struct notifier_block *nb,
2531 					 void (*cb)(struct work_struct *))
2532 {
2533 	struct mlxsw_sp_netevent_work *net_work;
2534 	struct mlxsw_sp_router *router;
2535 
2536 	if (!net_eq(net, &init_net))
2537 		return NOTIFY_DONE;
2538 
2539 	net_work = kzalloc(sizeof(*net_work), GFP_ATOMIC);
2540 	if (!net_work)
2541 		return NOTIFY_BAD;
2542 
2543 	router = container_of(nb, struct mlxsw_sp_router, netevent_nb);
2544 	INIT_WORK(&net_work->work, cb);
2545 	net_work->mlxsw_sp = router->mlxsw_sp;
2546 	mlxsw_core_schedule_work(&net_work->work);
2547 	return NOTIFY_DONE;
2548 }
2549 
2550 static int mlxsw_sp_router_netevent_event(struct notifier_block *nb,
2551 					  unsigned long event, void *ptr)
2552 {
2553 	struct mlxsw_sp_netevent_work *net_work;
2554 	struct mlxsw_sp_port *mlxsw_sp_port;
2555 	struct mlxsw_sp *mlxsw_sp;
2556 	unsigned long interval;
2557 	struct neigh_parms *p;
2558 	struct neighbour *n;
2559 
2560 	switch (event) {
2561 	case NETEVENT_DELAY_PROBE_TIME_UPDATE:
2562 		p = ptr;
2563 
2564 		/* We don't care about changes in the default table. */
2565 		if (!p->dev || (p->tbl->family != AF_INET &&
2566 				p->tbl->family != AF_INET6))
2567 			return NOTIFY_DONE;
2568 
2569 		/* We are in atomic context and can't take RTNL mutex,
2570 		 * so use RCU variant to walk the device chain.
2571 		 */
2572 		mlxsw_sp_port = mlxsw_sp_port_lower_dev_hold(p->dev);
2573 		if (!mlxsw_sp_port)
2574 			return NOTIFY_DONE;
2575 
2576 		mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
2577 		interval = jiffies_to_msecs(NEIGH_VAR(p, DELAY_PROBE_TIME));
2578 		mlxsw_sp->router->neighs_update.interval = interval;
2579 
2580 		mlxsw_sp_port_dev_put(mlxsw_sp_port);
2581 		break;
2582 	case NETEVENT_NEIGH_UPDATE:
2583 		n = ptr;
2584 
2585 		if (n->tbl->family != AF_INET && n->tbl->family != AF_INET6)
2586 			return NOTIFY_DONE;
2587 
2588 		mlxsw_sp_port = mlxsw_sp_port_lower_dev_hold(n->dev);
2589 		if (!mlxsw_sp_port)
2590 			return NOTIFY_DONE;
2591 
2592 		net_work = kzalloc(sizeof(*net_work), GFP_ATOMIC);
2593 		if (!net_work) {
2594 			mlxsw_sp_port_dev_put(mlxsw_sp_port);
2595 			return NOTIFY_BAD;
2596 		}
2597 
2598 		INIT_WORK(&net_work->work, mlxsw_sp_router_neigh_event_work);
2599 		net_work->mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
2600 		net_work->n = n;
2601 
2602 		/* Take a reference to ensure the neighbour won't be
2603 		 * destructed until we drop the reference in delayed
2604 		 * work.
2605 		 */
2606 		neigh_clone(n);
2607 		mlxsw_core_schedule_work(&net_work->work);
2608 		mlxsw_sp_port_dev_put(mlxsw_sp_port);
2609 		break;
2610 	case NETEVENT_IPV4_MPATH_HASH_UPDATE:
2611 	case NETEVENT_IPV6_MPATH_HASH_UPDATE:
2612 		return mlxsw_sp_router_schedule_work(ptr, nb,
2613 				mlxsw_sp_router_mp_hash_event_work);
2614 
2615 	case NETEVENT_IPV4_FWD_UPDATE_PRIORITY_UPDATE:
2616 		return mlxsw_sp_router_schedule_work(ptr, nb,
2617 				mlxsw_sp_router_update_priority_work);
2618 	}
2619 
2620 	return NOTIFY_DONE;
2621 }
2622 
2623 static int mlxsw_sp_neigh_init(struct mlxsw_sp *mlxsw_sp)
2624 {
2625 	int err;
2626 
2627 	err = rhashtable_init(&mlxsw_sp->router->neigh_ht,
2628 			      &mlxsw_sp_neigh_ht_params);
2629 	if (err)
2630 		return err;
2631 
2632 	/* Initialize the polling interval according to the default
2633 	 * table.
2634 	 */
2635 	mlxsw_sp_router_neighs_update_interval_init(mlxsw_sp);
2636 
2637 	/* Create the delayed works for the activity_update */
2638 	INIT_DELAYED_WORK(&mlxsw_sp->router->neighs_update.dw,
2639 			  mlxsw_sp_router_neighs_update_work);
2640 	INIT_DELAYED_WORK(&mlxsw_sp->router->nexthop_probe_dw,
2641 			  mlxsw_sp_router_probe_unresolved_nexthops);
2642 	mlxsw_core_schedule_dw(&mlxsw_sp->router->neighs_update.dw, 0);
2643 	mlxsw_core_schedule_dw(&mlxsw_sp->router->nexthop_probe_dw, 0);
2644 	return 0;
2645 }
2646 
2647 static void mlxsw_sp_neigh_fini(struct mlxsw_sp *mlxsw_sp)
2648 {
2649 	cancel_delayed_work_sync(&mlxsw_sp->router->neighs_update.dw);
2650 	cancel_delayed_work_sync(&mlxsw_sp->router->nexthop_probe_dw);
2651 	rhashtable_destroy(&mlxsw_sp->router->neigh_ht);
2652 }
2653 
2654 static void mlxsw_sp_neigh_rif_gone_sync(struct mlxsw_sp *mlxsw_sp,
2655 					 struct mlxsw_sp_rif *rif)
2656 {
2657 	struct mlxsw_sp_neigh_entry *neigh_entry, *tmp;
2658 
2659 	list_for_each_entry_safe(neigh_entry, tmp, &rif->neigh_list,
2660 				 rif_list_node) {
2661 		mlxsw_sp_neigh_entry_update(mlxsw_sp, neigh_entry, false);
2662 		mlxsw_sp_neigh_entry_destroy(mlxsw_sp, neigh_entry);
2663 	}
2664 }
2665 
2666 enum mlxsw_sp_nexthop_type {
2667 	MLXSW_SP_NEXTHOP_TYPE_ETH,
2668 	MLXSW_SP_NEXTHOP_TYPE_IPIP,
2669 };
2670 
2671 struct mlxsw_sp_nexthop_key {
2672 	struct fib_nh *fib_nh;
2673 };
2674 
2675 struct mlxsw_sp_nexthop {
2676 	struct list_head neigh_list_node; /* member of neigh entry list */
2677 	struct list_head rif_list_node;
2678 	struct list_head router_list_node;
2679 	struct mlxsw_sp_nexthop_group *nh_grp; /* pointer back to the group
2680 						* this belongs to
2681 						*/
2682 	struct rhash_head ht_node;
2683 	struct mlxsw_sp_nexthop_key key;
2684 	unsigned char gw_addr[sizeof(struct in6_addr)];
2685 	int ifindex;
2686 	int nh_weight;
2687 	int norm_nh_weight;
2688 	int num_adj_entries;
2689 	struct mlxsw_sp_rif *rif;
2690 	u8 should_offload:1, /* set indicates this neigh is connected and
2691 			      * should be put to KVD linear area of this group.
2692 			      */
2693 	   offloaded:1, /* set in case the neigh is actually put into
2694 			 * KVD linear area of this group.
2695 			 */
2696 	   update:1; /* set indicates that MAC of this neigh should be
2697 		      * updated in HW
2698 		      */
2699 	enum mlxsw_sp_nexthop_type type;
2700 	union {
2701 		struct mlxsw_sp_neigh_entry *neigh_entry;
2702 		struct mlxsw_sp_ipip_entry *ipip_entry;
2703 	};
2704 	unsigned int counter_index;
2705 	bool counter_valid;
2706 };
2707 
2708 struct mlxsw_sp_nexthop_group {
2709 	void *priv;
2710 	struct rhash_head ht_node;
2711 	struct list_head fib_list; /* list of fib entries that use this group */
2712 	struct neigh_table *neigh_tbl;
2713 	u8 adj_index_valid:1,
2714 	   gateway:1; /* routes using the group use a gateway */
2715 	u32 adj_index;
2716 	u16 ecmp_size;
2717 	u16 count;
2718 	int sum_norm_weight;
2719 	struct mlxsw_sp_nexthop nexthops[0];
2720 #define nh_rif	nexthops[0].rif
2721 };
2722 
2723 void mlxsw_sp_nexthop_counter_alloc(struct mlxsw_sp *mlxsw_sp,
2724 				    struct mlxsw_sp_nexthop *nh)
2725 {
2726 	struct devlink *devlink;
2727 
2728 	devlink = priv_to_devlink(mlxsw_sp->core);
2729 	if (!devlink_dpipe_table_counter_enabled(devlink,
2730 						 MLXSW_SP_DPIPE_TABLE_NAME_ADJ))
2731 		return;
2732 
2733 	if (mlxsw_sp_flow_counter_alloc(mlxsw_sp, &nh->counter_index))
2734 		return;
2735 
2736 	nh->counter_valid = true;
2737 }
2738 
2739 void mlxsw_sp_nexthop_counter_free(struct mlxsw_sp *mlxsw_sp,
2740 				   struct mlxsw_sp_nexthop *nh)
2741 {
2742 	if (!nh->counter_valid)
2743 		return;
2744 	mlxsw_sp_flow_counter_free(mlxsw_sp, nh->counter_index);
2745 	nh->counter_valid = false;
2746 }
2747 
2748 int mlxsw_sp_nexthop_counter_get(struct mlxsw_sp *mlxsw_sp,
2749 				 struct mlxsw_sp_nexthop *nh, u64 *p_counter)
2750 {
2751 	if (!nh->counter_valid)
2752 		return -EINVAL;
2753 
2754 	return mlxsw_sp_flow_counter_get(mlxsw_sp, nh->counter_index,
2755 					 p_counter, NULL);
2756 }
2757 
2758 struct mlxsw_sp_nexthop *mlxsw_sp_nexthop_next(struct mlxsw_sp_router *router,
2759 					       struct mlxsw_sp_nexthop *nh)
2760 {
2761 	if (!nh) {
2762 		if (list_empty(&router->nexthop_list))
2763 			return NULL;
2764 		else
2765 			return list_first_entry(&router->nexthop_list,
2766 						typeof(*nh), router_list_node);
2767 	}
2768 	if (list_is_last(&nh->router_list_node, &router->nexthop_list))
2769 		return NULL;
2770 	return list_next_entry(nh, router_list_node);
2771 }
2772 
2773 bool mlxsw_sp_nexthop_offload(struct mlxsw_sp_nexthop *nh)
2774 {
2775 	return nh->offloaded;
2776 }
2777 
2778 unsigned char *mlxsw_sp_nexthop_ha(struct mlxsw_sp_nexthop *nh)
2779 {
2780 	if (!nh->offloaded)
2781 		return NULL;
2782 	return nh->neigh_entry->ha;
2783 }
2784 
2785 int mlxsw_sp_nexthop_indexes(struct mlxsw_sp_nexthop *nh, u32 *p_adj_index,
2786 			     u32 *p_adj_size, u32 *p_adj_hash_index)
2787 {
2788 	struct mlxsw_sp_nexthop_group *nh_grp = nh->nh_grp;
2789 	u32 adj_hash_index = 0;
2790 	int i;
2791 
2792 	if (!nh->offloaded || !nh_grp->adj_index_valid)
2793 		return -EINVAL;
2794 
2795 	*p_adj_index = nh_grp->adj_index;
2796 	*p_adj_size = nh_grp->ecmp_size;
2797 
2798 	for (i = 0; i < nh_grp->count; i++) {
2799 		struct mlxsw_sp_nexthop *nh_iter = &nh_grp->nexthops[i];
2800 
2801 		if (nh_iter == nh)
2802 			break;
2803 		if (nh_iter->offloaded)
2804 			adj_hash_index += nh_iter->num_adj_entries;
2805 	}
2806 
2807 	*p_adj_hash_index = adj_hash_index;
2808 	return 0;
2809 }
2810 
2811 struct mlxsw_sp_rif *mlxsw_sp_nexthop_rif(struct mlxsw_sp_nexthop *nh)
2812 {
2813 	return nh->rif;
2814 }
2815 
2816 bool mlxsw_sp_nexthop_group_has_ipip(struct mlxsw_sp_nexthop *nh)
2817 {
2818 	struct mlxsw_sp_nexthop_group *nh_grp = nh->nh_grp;
2819 	int i;
2820 
2821 	for (i = 0; i < nh_grp->count; i++) {
2822 		struct mlxsw_sp_nexthop *nh_iter = &nh_grp->nexthops[i];
2823 
2824 		if (nh_iter->type == MLXSW_SP_NEXTHOP_TYPE_IPIP)
2825 			return true;
2826 	}
2827 	return false;
2828 }
2829 
2830 static struct fib_info *
2831 mlxsw_sp_nexthop4_group_fi(const struct mlxsw_sp_nexthop_group *nh_grp)
2832 {
2833 	return nh_grp->priv;
2834 }
2835 
2836 struct mlxsw_sp_nexthop_group_cmp_arg {
2837 	enum mlxsw_sp_l3proto proto;
2838 	union {
2839 		struct fib_info *fi;
2840 		struct mlxsw_sp_fib6_entry *fib6_entry;
2841 	};
2842 };
2843 
2844 static bool
2845 mlxsw_sp_nexthop6_group_has_nexthop(const struct mlxsw_sp_nexthop_group *nh_grp,
2846 				    const struct in6_addr *gw, int ifindex,
2847 				    int weight)
2848 {
2849 	int i;
2850 
2851 	for (i = 0; i < nh_grp->count; i++) {
2852 		const struct mlxsw_sp_nexthop *nh;
2853 
2854 		nh = &nh_grp->nexthops[i];
2855 		if (nh->ifindex == ifindex && nh->nh_weight == weight &&
2856 		    ipv6_addr_equal(gw, (struct in6_addr *) nh->gw_addr))
2857 			return true;
2858 	}
2859 
2860 	return false;
2861 }
2862 
2863 static bool
2864 mlxsw_sp_nexthop6_group_cmp(const struct mlxsw_sp_nexthop_group *nh_grp,
2865 			    const struct mlxsw_sp_fib6_entry *fib6_entry)
2866 {
2867 	struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
2868 
2869 	if (nh_grp->count != fib6_entry->nrt6)
2870 		return false;
2871 
2872 	list_for_each_entry(mlxsw_sp_rt6, &fib6_entry->rt6_list, list) {
2873 		struct in6_addr *gw;
2874 		int ifindex, weight;
2875 
2876 		ifindex = mlxsw_sp_rt6->rt->fib6_nh.nh_dev->ifindex;
2877 		weight = mlxsw_sp_rt6->rt->fib6_nh.nh_weight;
2878 		gw = &mlxsw_sp_rt6->rt->fib6_nh.nh_gw;
2879 		if (!mlxsw_sp_nexthop6_group_has_nexthop(nh_grp, gw, ifindex,
2880 							 weight))
2881 			return false;
2882 	}
2883 
2884 	return true;
2885 }
2886 
2887 static int
2888 mlxsw_sp_nexthop_group_cmp(struct rhashtable_compare_arg *arg, const void *ptr)
2889 {
2890 	const struct mlxsw_sp_nexthop_group_cmp_arg *cmp_arg = arg->key;
2891 	const struct mlxsw_sp_nexthop_group *nh_grp = ptr;
2892 
2893 	switch (cmp_arg->proto) {
2894 	case MLXSW_SP_L3_PROTO_IPV4:
2895 		return cmp_arg->fi != mlxsw_sp_nexthop4_group_fi(nh_grp);
2896 	case MLXSW_SP_L3_PROTO_IPV6:
2897 		return !mlxsw_sp_nexthop6_group_cmp(nh_grp,
2898 						    cmp_arg->fib6_entry);
2899 	default:
2900 		WARN_ON(1);
2901 		return 1;
2902 	}
2903 }
2904 
2905 static int
2906 mlxsw_sp_nexthop_group_type(const struct mlxsw_sp_nexthop_group *nh_grp)
2907 {
2908 	return nh_grp->neigh_tbl->family;
2909 }
2910 
2911 static u32 mlxsw_sp_nexthop_group_hash_obj(const void *data, u32 len, u32 seed)
2912 {
2913 	const struct mlxsw_sp_nexthop_group *nh_grp = data;
2914 	const struct mlxsw_sp_nexthop *nh;
2915 	struct fib_info *fi;
2916 	unsigned int val;
2917 	int i;
2918 
2919 	switch (mlxsw_sp_nexthop_group_type(nh_grp)) {
2920 	case AF_INET:
2921 		fi = mlxsw_sp_nexthop4_group_fi(nh_grp);
2922 		return jhash(&fi, sizeof(fi), seed);
2923 	case AF_INET6:
2924 		val = nh_grp->count;
2925 		for (i = 0; i < nh_grp->count; i++) {
2926 			nh = &nh_grp->nexthops[i];
2927 			val ^= nh->ifindex;
2928 		}
2929 		return jhash(&val, sizeof(val), seed);
2930 	default:
2931 		WARN_ON(1);
2932 		return 0;
2933 	}
2934 }
2935 
2936 static u32
2937 mlxsw_sp_nexthop6_group_hash(struct mlxsw_sp_fib6_entry *fib6_entry, u32 seed)
2938 {
2939 	unsigned int val = fib6_entry->nrt6;
2940 	struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
2941 	struct net_device *dev;
2942 
2943 	list_for_each_entry(mlxsw_sp_rt6, &fib6_entry->rt6_list, list) {
2944 		dev = mlxsw_sp_rt6->rt->fib6_nh.nh_dev;
2945 		val ^= dev->ifindex;
2946 	}
2947 
2948 	return jhash(&val, sizeof(val), seed);
2949 }
2950 
2951 static u32
2952 mlxsw_sp_nexthop_group_hash(const void *data, u32 len, u32 seed)
2953 {
2954 	const struct mlxsw_sp_nexthop_group_cmp_arg *cmp_arg = data;
2955 
2956 	switch (cmp_arg->proto) {
2957 	case MLXSW_SP_L3_PROTO_IPV4:
2958 		return jhash(&cmp_arg->fi, sizeof(cmp_arg->fi), seed);
2959 	case MLXSW_SP_L3_PROTO_IPV6:
2960 		return mlxsw_sp_nexthop6_group_hash(cmp_arg->fib6_entry, seed);
2961 	default:
2962 		WARN_ON(1);
2963 		return 0;
2964 	}
2965 }
2966 
2967 static const struct rhashtable_params mlxsw_sp_nexthop_group_ht_params = {
2968 	.head_offset = offsetof(struct mlxsw_sp_nexthop_group, ht_node),
2969 	.hashfn	     = mlxsw_sp_nexthop_group_hash,
2970 	.obj_hashfn  = mlxsw_sp_nexthop_group_hash_obj,
2971 	.obj_cmpfn   = mlxsw_sp_nexthop_group_cmp,
2972 };
2973 
2974 static int mlxsw_sp_nexthop_group_insert(struct mlxsw_sp *mlxsw_sp,
2975 					 struct mlxsw_sp_nexthop_group *nh_grp)
2976 {
2977 	if (mlxsw_sp_nexthop_group_type(nh_grp) == AF_INET6 &&
2978 	    !nh_grp->gateway)
2979 		return 0;
2980 
2981 	return rhashtable_insert_fast(&mlxsw_sp->router->nexthop_group_ht,
2982 				      &nh_grp->ht_node,
2983 				      mlxsw_sp_nexthop_group_ht_params);
2984 }
2985 
2986 static void mlxsw_sp_nexthop_group_remove(struct mlxsw_sp *mlxsw_sp,
2987 					  struct mlxsw_sp_nexthop_group *nh_grp)
2988 {
2989 	if (mlxsw_sp_nexthop_group_type(nh_grp) == AF_INET6 &&
2990 	    !nh_grp->gateway)
2991 		return;
2992 
2993 	rhashtable_remove_fast(&mlxsw_sp->router->nexthop_group_ht,
2994 			       &nh_grp->ht_node,
2995 			       mlxsw_sp_nexthop_group_ht_params);
2996 }
2997 
2998 static struct mlxsw_sp_nexthop_group *
2999 mlxsw_sp_nexthop4_group_lookup(struct mlxsw_sp *mlxsw_sp,
3000 			       struct fib_info *fi)
3001 {
3002 	struct mlxsw_sp_nexthop_group_cmp_arg cmp_arg;
3003 
3004 	cmp_arg.proto = MLXSW_SP_L3_PROTO_IPV4;
3005 	cmp_arg.fi = fi;
3006 	return rhashtable_lookup_fast(&mlxsw_sp->router->nexthop_group_ht,
3007 				      &cmp_arg,
3008 				      mlxsw_sp_nexthop_group_ht_params);
3009 }
3010 
3011 static struct mlxsw_sp_nexthop_group *
3012 mlxsw_sp_nexthop6_group_lookup(struct mlxsw_sp *mlxsw_sp,
3013 			       struct mlxsw_sp_fib6_entry *fib6_entry)
3014 {
3015 	struct mlxsw_sp_nexthop_group_cmp_arg cmp_arg;
3016 
3017 	cmp_arg.proto = MLXSW_SP_L3_PROTO_IPV6;
3018 	cmp_arg.fib6_entry = fib6_entry;
3019 	return rhashtable_lookup_fast(&mlxsw_sp->router->nexthop_group_ht,
3020 				      &cmp_arg,
3021 				      mlxsw_sp_nexthop_group_ht_params);
3022 }
3023 
3024 static const struct rhashtable_params mlxsw_sp_nexthop_ht_params = {
3025 	.key_offset = offsetof(struct mlxsw_sp_nexthop, key),
3026 	.head_offset = offsetof(struct mlxsw_sp_nexthop, ht_node),
3027 	.key_len = sizeof(struct mlxsw_sp_nexthop_key),
3028 };
3029 
3030 static int mlxsw_sp_nexthop_insert(struct mlxsw_sp *mlxsw_sp,
3031 				   struct mlxsw_sp_nexthop *nh)
3032 {
3033 	return rhashtable_insert_fast(&mlxsw_sp->router->nexthop_ht,
3034 				      &nh->ht_node, mlxsw_sp_nexthop_ht_params);
3035 }
3036 
3037 static void mlxsw_sp_nexthop_remove(struct mlxsw_sp *mlxsw_sp,
3038 				    struct mlxsw_sp_nexthop *nh)
3039 {
3040 	rhashtable_remove_fast(&mlxsw_sp->router->nexthop_ht, &nh->ht_node,
3041 			       mlxsw_sp_nexthop_ht_params);
3042 }
3043 
3044 static struct mlxsw_sp_nexthop *
3045 mlxsw_sp_nexthop_lookup(struct mlxsw_sp *mlxsw_sp,
3046 			struct mlxsw_sp_nexthop_key key)
3047 {
3048 	return rhashtable_lookup_fast(&mlxsw_sp->router->nexthop_ht, &key,
3049 				      mlxsw_sp_nexthop_ht_params);
3050 }
3051 
3052 static int mlxsw_sp_adj_index_mass_update_vr(struct mlxsw_sp *mlxsw_sp,
3053 					     const struct mlxsw_sp_fib *fib,
3054 					     u32 adj_index, u16 ecmp_size,
3055 					     u32 new_adj_index,
3056 					     u16 new_ecmp_size)
3057 {
3058 	char raleu_pl[MLXSW_REG_RALEU_LEN];
3059 
3060 	mlxsw_reg_raleu_pack(raleu_pl,
3061 			     (enum mlxsw_reg_ralxx_protocol) fib->proto,
3062 			     fib->vr->id, adj_index, ecmp_size, new_adj_index,
3063 			     new_ecmp_size);
3064 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(raleu), raleu_pl);
3065 }
3066 
3067 static int mlxsw_sp_adj_index_mass_update(struct mlxsw_sp *mlxsw_sp,
3068 					  struct mlxsw_sp_nexthop_group *nh_grp,
3069 					  u32 old_adj_index, u16 old_ecmp_size)
3070 {
3071 	struct mlxsw_sp_fib_entry *fib_entry;
3072 	struct mlxsw_sp_fib *fib = NULL;
3073 	int err;
3074 
3075 	list_for_each_entry(fib_entry, &nh_grp->fib_list, nexthop_group_node) {
3076 		if (fib == fib_entry->fib_node->fib)
3077 			continue;
3078 		fib = fib_entry->fib_node->fib;
3079 		err = mlxsw_sp_adj_index_mass_update_vr(mlxsw_sp, fib,
3080 							old_adj_index,
3081 							old_ecmp_size,
3082 							nh_grp->adj_index,
3083 							nh_grp->ecmp_size);
3084 		if (err)
3085 			return err;
3086 	}
3087 	return 0;
3088 }
3089 
3090 static int __mlxsw_sp_nexthop_update(struct mlxsw_sp *mlxsw_sp, u32 adj_index,
3091 				     struct mlxsw_sp_nexthop *nh)
3092 {
3093 	struct mlxsw_sp_neigh_entry *neigh_entry = nh->neigh_entry;
3094 	char ratr_pl[MLXSW_REG_RATR_LEN];
3095 
3096 	mlxsw_reg_ratr_pack(ratr_pl, MLXSW_REG_RATR_OP_WRITE_WRITE_ENTRY,
3097 			    true, MLXSW_REG_RATR_TYPE_ETHERNET,
3098 			    adj_index, neigh_entry->rif);
3099 	mlxsw_reg_ratr_eth_entry_pack(ratr_pl, neigh_entry->ha);
3100 	if (nh->counter_valid)
3101 		mlxsw_reg_ratr_counter_pack(ratr_pl, nh->counter_index, true);
3102 	else
3103 		mlxsw_reg_ratr_counter_pack(ratr_pl, 0, false);
3104 
3105 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ratr), ratr_pl);
3106 }
3107 
3108 int mlxsw_sp_nexthop_update(struct mlxsw_sp *mlxsw_sp, u32 adj_index,
3109 			    struct mlxsw_sp_nexthop *nh)
3110 {
3111 	int i;
3112 
3113 	for (i = 0; i < nh->num_adj_entries; i++) {
3114 		int err;
3115 
3116 		err = __mlxsw_sp_nexthop_update(mlxsw_sp, adj_index + i, nh);
3117 		if (err)
3118 			return err;
3119 	}
3120 
3121 	return 0;
3122 }
3123 
3124 static int __mlxsw_sp_nexthop_ipip_update(struct mlxsw_sp *mlxsw_sp,
3125 					  u32 adj_index,
3126 					  struct mlxsw_sp_nexthop *nh)
3127 {
3128 	const struct mlxsw_sp_ipip_ops *ipip_ops;
3129 
3130 	ipip_ops = mlxsw_sp->router->ipip_ops_arr[nh->ipip_entry->ipipt];
3131 	return ipip_ops->nexthop_update(mlxsw_sp, adj_index, nh->ipip_entry);
3132 }
3133 
3134 static int mlxsw_sp_nexthop_ipip_update(struct mlxsw_sp *mlxsw_sp,
3135 					u32 adj_index,
3136 					struct mlxsw_sp_nexthop *nh)
3137 {
3138 	int i;
3139 
3140 	for (i = 0; i < nh->num_adj_entries; i++) {
3141 		int err;
3142 
3143 		err = __mlxsw_sp_nexthop_ipip_update(mlxsw_sp, adj_index + i,
3144 						     nh);
3145 		if (err)
3146 			return err;
3147 	}
3148 
3149 	return 0;
3150 }
3151 
3152 static int
3153 mlxsw_sp_nexthop_group_update(struct mlxsw_sp *mlxsw_sp,
3154 			      struct mlxsw_sp_nexthop_group *nh_grp,
3155 			      bool reallocate)
3156 {
3157 	u32 adj_index = nh_grp->adj_index; /* base */
3158 	struct mlxsw_sp_nexthop *nh;
3159 	int i;
3160 	int err;
3161 
3162 	for (i = 0; i < nh_grp->count; i++) {
3163 		nh = &nh_grp->nexthops[i];
3164 
3165 		if (!nh->should_offload) {
3166 			nh->offloaded = 0;
3167 			continue;
3168 		}
3169 
3170 		if (nh->update || reallocate) {
3171 			switch (nh->type) {
3172 			case MLXSW_SP_NEXTHOP_TYPE_ETH:
3173 				err = mlxsw_sp_nexthop_update
3174 					    (mlxsw_sp, adj_index, nh);
3175 				break;
3176 			case MLXSW_SP_NEXTHOP_TYPE_IPIP:
3177 				err = mlxsw_sp_nexthop_ipip_update
3178 					    (mlxsw_sp, adj_index, nh);
3179 				break;
3180 			}
3181 			if (err)
3182 				return err;
3183 			nh->update = 0;
3184 			nh->offloaded = 1;
3185 		}
3186 		adj_index += nh->num_adj_entries;
3187 	}
3188 	return 0;
3189 }
3190 
3191 static bool
3192 mlxsw_sp_fib_node_entry_is_first(const struct mlxsw_sp_fib_node *fib_node,
3193 				 const struct mlxsw_sp_fib_entry *fib_entry);
3194 
3195 static int
3196 mlxsw_sp_nexthop_fib_entries_update(struct mlxsw_sp *mlxsw_sp,
3197 				    struct mlxsw_sp_nexthop_group *nh_grp)
3198 {
3199 	struct mlxsw_sp_fib_entry *fib_entry;
3200 	int err;
3201 
3202 	list_for_each_entry(fib_entry, &nh_grp->fib_list, nexthop_group_node) {
3203 		if (!mlxsw_sp_fib_node_entry_is_first(fib_entry->fib_node,
3204 						      fib_entry))
3205 			continue;
3206 		err = mlxsw_sp_fib_entry_update(mlxsw_sp, fib_entry);
3207 		if (err)
3208 			return err;
3209 	}
3210 	return 0;
3211 }
3212 
3213 static void
3214 mlxsw_sp_fib_entry_offload_refresh(struct mlxsw_sp_fib_entry *fib_entry,
3215 				   enum mlxsw_reg_ralue_op op, int err);
3216 
3217 static void
3218 mlxsw_sp_nexthop_fib_entries_refresh(struct mlxsw_sp_nexthop_group *nh_grp)
3219 {
3220 	enum mlxsw_reg_ralue_op op = MLXSW_REG_RALUE_OP_WRITE_WRITE;
3221 	struct mlxsw_sp_fib_entry *fib_entry;
3222 
3223 	list_for_each_entry(fib_entry, &nh_grp->fib_list, nexthop_group_node) {
3224 		if (!mlxsw_sp_fib_node_entry_is_first(fib_entry->fib_node,
3225 						      fib_entry))
3226 			continue;
3227 		mlxsw_sp_fib_entry_offload_refresh(fib_entry, op, 0);
3228 	}
3229 }
3230 
3231 static void mlxsw_sp_adj_grp_size_round_up(u16 *p_adj_grp_size)
3232 {
3233 	/* Valid sizes for an adjacency group are:
3234 	 * 1-64, 512, 1024, 2048 and 4096.
3235 	 */
3236 	if (*p_adj_grp_size <= 64)
3237 		return;
3238 	else if (*p_adj_grp_size <= 512)
3239 		*p_adj_grp_size = 512;
3240 	else if (*p_adj_grp_size <= 1024)
3241 		*p_adj_grp_size = 1024;
3242 	else if (*p_adj_grp_size <= 2048)
3243 		*p_adj_grp_size = 2048;
3244 	else
3245 		*p_adj_grp_size = 4096;
3246 }
3247 
3248 static void mlxsw_sp_adj_grp_size_round_down(u16 *p_adj_grp_size,
3249 					     unsigned int alloc_size)
3250 {
3251 	if (alloc_size >= 4096)
3252 		*p_adj_grp_size = 4096;
3253 	else if (alloc_size >= 2048)
3254 		*p_adj_grp_size = 2048;
3255 	else if (alloc_size >= 1024)
3256 		*p_adj_grp_size = 1024;
3257 	else if (alloc_size >= 512)
3258 		*p_adj_grp_size = 512;
3259 }
3260 
3261 static int mlxsw_sp_fix_adj_grp_size(struct mlxsw_sp *mlxsw_sp,
3262 				     u16 *p_adj_grp_size)
3263 {
3264 	unsigned int alloc_size;
3265 	int err;
3266 
3267 	/* Round up the requested group size to the next size supported
3268 	 * by the device and make sure the request can be satisfied.
3269 	 */
3270 	mlxsw_sp_adj_grp_size_round_up(p_adj_grp_size);
3271 	err = mlxsw_sp_kvdl_alloc_count_query(mlxsw_sp,
3272 					      MLXSW_SP_KVDL_ENTRY_TYPE_ADJ,
3273 					      *p_adj_grp_size, &alloc_size);
3274 	if (err)
3275 		return err;
3276 	/* It is possible the allocation results in more allocated
3277 	 * entries than requested. Try to use as much of them as
3278 	 * possible.
3279 	 */
3280 	mlxsw_sp_adj_grp_size_round_down(p_adj_grp_size, alloc_size);
3281 
3282 	return 0;
3283 }
3284 
3285 static void
3286 mlxsw_sp_nexthop_group_normalize(struct mlxsw_sp_nexthop_group *nh_grp)
3287 {
3288 	int i, g = 0, sum_norm_weight = 0;
3289 	struct mlxsw_sp_nexthop *nh;
3290 
3291 	for (i = 0; i < nh_grp->count; i++) {
3292 		nh = &nh_grp->nexthops[i];
3293 
3294 		if (!nh->should_offload)
3295 			continue;
3296 		if (g > 0)
3297 			g = gcd(nh->nh_weight, g);
3298 		else
3299 			g = nh->nh_weight;
3300 	}
3301 
3302 	for (i = 0; i < nh_grp->count; i++) {
3303 		nh = &nh_grp->nexthops[i];
3304 
3305 		if (!nh->should_offload)
3306 			continue;
3307 		nh->norm_nh_weight = nh->nh_weight / g;
3308 		sum_norm_weight += nh->norm_nh_weight;
3309 	}
3310 
3311 	nh_grp->sum_norm_weight = sum_norm_weight;
3312 }
3313 
3314 static void
3315 mlxsw_sp_nexthop_group_rebalance(struct mlxsw_sp_nexthop_group *nh_grp)
3316 {
3317 	int total = nh_grp->sum_norm_weight;
3318 	u16 ecmp_size = nh_grp->ecmp_size;
3319 	int i, weight = 0, lower_bound = 0;
3320 
3321 	for (i = 0; i < nh_grp->count; i++) {
3322 		struct mlxsw_sp_nexthop *nh = &nh_grp->nexthops[i];
3323 		int upper_bound;
3324 
3325 		if (!nh->should_offload)
3326 			continue;
3327 		weight += nh->norm_nh_weight;
3328 		upper_bound = DIV_ROUND_CLOSEST(ecmp_size * weight, total);
3329 		nh->num_adj_entries = upper_bound - lower_bound;
3330 		lower_bound = upper_bound;
3331 	}
3332 }
3333 
3334 static void
3335 mlxsw_sp_nexthop_group_refresh(struct mlxsw_sp *mlxsw_sp,
3336 			       struct mlxsw_sp_nexthop_group *nh_grp)
3337 {
3338 	u16 ecmp_size, old_ecmp_size;
3339 	struct mlxsw_sp_nexthop *nh;
3340 	bool offload_change = false;
3341 	u32 adj_index;
3342 	bool old_adj_index_valid;
3343 	u32 old_adj_index;
3344 	int i;
3345 	int err;
3346 
3347 	if (!nh_grp->gateway) {
3348 		mlxsw_sp_nexthop_fib_entries_update(mlxsw_sp, nh_grp);
3349 		return;
3350 	}
3351 
3352 	for (i = 0; i < nh_grp->count; i++) {
3353 		nh = &nh_grp->nexthops[i];
3354 
3355 		if (nh->should_offload != nh->offloaded) {
3356 			offload_change = true;
3357 			if (nh->should_offload)
3358 				nh->update = 1;
3359 		}
3360 	}
3361 	if (!offload_change) {
3362 		/* Nothing was added or removed, so no need to reallocate. Just
3363 		 * update MAC on existing adjacency indexes.
3364 		 */
3365 		err = mlxsw_sp_nexthop_group_update(mlxsw_sp, nh_grp, false);
3366 		if (err) {
3367 			dev_warn(mlxsw_sp->bus_info->dev, "Failed to update neigh MAC in adjacency table.\n");
3368 			goto set_trap;
3369 		}
3370 		return;
3371 	}
3372 	mlxsw_sp_nexthop_group_normalize(nh_grp);
3373 	if (!nh_grp->sum_norm_weight)
3374 		/* No neigh of this group is connected so we just set
3375 		 * the trap and let everthing flow through kernel.
3376 		 */
3377 		goto set_trap;
3378 
3379 	ecmp_size = nh_grp->sum_norm_weight;
3380 	err = mlxsw_sp_fix_adj_grp_size(mlxsw_sp, &ecmp_size);
3381 	if (err)
3382 		/* No valid allocation size available. */
3383 		goto set_trap;
3384 
3385 	err = mlxsw_sp_kvdl_alloc(mlxsw_sp, MLXSW_SP_KVDL_ENTRY_TYPE_ADJ,
3386 				  ecmp_size, &adj_index);
3387 	if (err) {
3388 		/* We ran out of KVD linear space, just set the
3389 		 * trap and let everything flow through kernel.
3390 		 */
3391 		dev_warn(mlxsw_sp->bus_info->dev, "Failed to allocate KVD linear area for nexthop group.\n");
3392 		goto set_trap;
3393 	}
3394 	old_adj_index_valid = nh_grp->adj_index_valid;
3395 	old_adj_index = nh_grp->adj_index;
3396 	old_ecmp_size = nh_grp->ecmp_size;
3397 	nh_grp->adj_index_valid = 1;
3398 	nh_grp->adj_index = adj_index;
3399 	nh_grp->ecmp_size = ecmp_size;
3400 	mlxsw_sp_nexthop_group_rebalance(nh_grp);
3401 	err = mlxsw_sp_nexthop_group_update(mlxsw_sp, nh_grp, true);
3402 	if (err) {
3403 		dev_warn(mlxsw_sp->bus_info->dev, "Failed to update neigh MAC in adjacency table.\n");
3404 		goto set_trap;
3405 	}
3406 
3407 	if (!old_adj_index_valid) {
3408 		/* The trap was set for fib entries, so we have to call
3409 		 * fib entry update to unset it and use adjacency index.
3410 		 */
3411 		err = mlxsw_sp_nexthop_fib_entries_update(mlxsw_sp, nh_grp);
3412 		if (err) {
3413 			dev_warn(mlxsw_sp->bus_info->dev, "Failed to add adjacency index to fib entries.\n");
3414 			goto set_trap;
3415 		}
3416 		return;
3417 	}
3418 
3419 	err = mlxsw_sp_adj_index_mass_update(mlxsw_sp, nh_grp,
3420 					     old_adj_index, old_ecmp_size);
3421 	mlxsw_sp_kvdl_free(mlxsw_sp, MLXSW_SP_KVDL_ENTRY_TYPE_ADJ,
3422 			   old_ecmp_size, old_adj_index);
3423 	if (err) {
3424 		dev_warn(mlxsw_sp->bus_info->dev, "Failed to mass-update adjacency index for nexthop group.\n");
3425 		goto set_trap;
3426 	}
3427 
3428 	/* Offload state within the group changed, so update the flags. */
3429 	mlxsw_sp_nexthop_fib_entries_refresh(nh_grp);
3430 
3431 	return;
3432 
3433 set_trap:
3434 	old_adj_index_valid = nh_grp->adj_index_valid;
3435 	nh_grp->adj_index_valid = 0;
3436 	for (i = 0; i < nh_grp->count; i++) {
3437 		nh = &nh_grp->nexthops[i];
3438 		nh->offloaded = 0;
3439 	}
3440 	err = mlxsw_sp_nexthop_fib_entries_update(mlxsw_sp, nh_grp);
3441 	if (err)
3442 		dev_warn(mlxsw_sp->bus_info->dev, "Failed to set traps for fib entries.\n");
3443 	if (old_adj_index_valid)
3444 		mlxsw_sp_kvdl_free(mlxsw_sp, MLXSW_SP_KVDL_ENTRY_TYPE_ADJ,
3445 				   nh_grp->ecmp_size, nh_grp->adj_index);
3446 }
3447 
3448 static void __mlxsw_sp_nexthop_neigh_update(struct mlxsw_sp_nexthop *nh,
3449 					    bool removing)
3450 {
3451 	if (!removing)
3452 		nh->should_offload = 1;
3453 	else
3454 		nh->should_offload = 0;
3455 	nh->update = 1;
3456 }
3457 
3458 static void
3459 mlxsw_sp_nexthop_neigh_update(struct mlxsw_sp *mlxsw_sp,
3460 			      struct mlxsw_sp_neigh_entry *neigh_entry,
3461 			      bool removing)
3462 {
3463 	struct mlxsw_sp_nexthop *nh;
3464 
3465 	list_for_each_entry(nh, &neigh_entry->nexthop_list,
3466 			    neigh_list_node) {
3467 		__mlxsw_sp_nexthop_neigh_update(nh, removing);
3468 		mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh->nh_grp);
3469 	}
3470 }
3471 
3472 static void mlxsw_sp_nexthop_rif_init(struct mlxsw_sp_nexthop *nh,
3473 				      struct mlxsw_sp_rif *rif)
3474 {
3475 	if (nh->rif)
3476 		return;
3477 
3478 	nh->rif = rif;
3479 	list_add(&nh->rif_list_node, &rif->nexthop_list);
3480 }
3481 
3482 static void mlxsw_sp_nexthop_rif_fini(struct mlxsw_sp_nexthop *nh)
3483 {
3484 	if (!nh->rif)
3485 		return;
3486 
3487 	list_del(&nh->rif_list_node);
3488 	nh->rif = NULL;
3489 }
3490 
3491 static int mlxsw_sp_nexthop_neigh_init(struct mlxsw_sp *mlxsw_sp,
3492 				       struct mlxsw_sp_nexthop *nh)
3493 {
3494 	struct mlxsw_sp_neigh_entry *neigh_entry;
3495 	struct neighbour *n;
3496 	u8 nud_state, dead;
3497 	int err;
3498 
3499 	if (!nh->nh_grp->gateway || nh->neigh_entry)
3500 		return 0;
3501 
3502 	/* Take a reference of neigh here ensuring that neigh would
3503 	 * not be destructed before the nexthop entry is finished.
3504 	 * The reference is taken either in neigh_lookup() or
3505 	 * in neigh_create() in case n is not found.
3506 	 */
3507 	n = neigh_lookup(nh->nh_grp->neigh_tbl, &nh->gw_addr, nh->rif->dev);
3508 	if (!n) {
3509 		n = neigh_create(nh->nh_grp->neigh_tbl, &nh->gw_addr,
3510 				 nh->rif->dev);
3511 		if (IS_ERR(n))
3512 			return PTR_ERR(n);
3513 		neigh_event_send(n, NULL);
3514 	}
3515 	neigh_entry = mlxsw_sp_neigh_entry_lookup(mlxsw_sp, n);
3516 	if (!neigh_entry) {
3517 		neigh_entry = mlxsw_sp_neigh_entry_create(mlxsw_sp, n);
3518 		if (IS_ERR(neigh_entry)) {
3519 			err = -EINVAL;
3520 			goto err_neigh_entry_create;
3521 		}
3522 	}
3523 
3524 	/* If that is the first nexthop connected to that neigh, add to
3525 	 * nexthop_neighs_list
3526 	 */
3527 	if (list_empty(&neigh_entry->nexthop_list))
3528 		list_add_tail(&neigh_entry->nexthop_neighs_list_node,
3529 			      &mlxsw_sp->router->nexthop_neighs_list);
3530 
3531 	nh->neigh_entry = neigh_entry;
3532 	list_add_tail(&nh->neigh_list_node, &neigh_entry->nexthop_list);
3533 	read_lock_bh(&n->lock);
3534 	nud_state = n->nud_state;
3535 	dead = n->dead;
3536 	read_unlock_bh(&n->lock);
3537 	__mlxsw_sp_nexthop_neigh_update(nh, !(nud_state & NUD_VALID && !dead));
3538 
3539 	return 0;
3540 
3541 err_neigh_entry_create:
3542 	neigh_release(n);
3543 	return err;
3544 }
3545 
3546 static void mlxsw_sp_nexthop_neigh_fini(struct mlxsw_sp *mlxsw_sp,
3547 					struct mlxsw_sp_nexthop *nh)
3548 {
3549 	struct mlxsw_sp_neigh_entry *neigh_entry = nh->neigh_entry;
3550 	struct neighbour *n;
3551 
3552 	if (!neigh_entry)
3553 		return;
3554 	n = neigh_entry->key.n;
3555 
3556 	__mlxsw_sp_nexthop_neigh_update(nh, true);
3557 	list_del(&nh->neigh_list_node);
3558 	nh->neigh_entry = NULL;
3559 
3560 	/* If that is the last nexthop connected to that neigh, remove from
3561 	 * nexthop_neighs_list
3562 	 */
3563 	if (list_empty(&neigh_entry->nexthop_list))
3564 		list_del(&neigh_entry->nexthop_neighs_list_node);
3565 
3566 	if (!neigh_entry->connected && list_empty(&neigh_entry->nexthop_list))
3567 		mlxsw_sp_neigh_entry_destroy(mlxsw_sp, neigh_entry);
3568 
3569 	neigh_release(n);
3570 }
3571 
3572 static bool mlxsw_sp_ipip_netdev_ul_up(struct net_device *ol_dev)
3573 {
3574 	struct net_device *ul_dev = __mlxsw_sp_ipip_netdev_ul_dev_get(ol_dev);
3575 
3576 	return ul_dev ? (ul_dev->flags & IFF_UP) : true;
3577 }
3578 
3579 static void mlxsw_sp_nexthop_ipip_init(struct mlxsw_sp *mlxsw_sp,
3580 				       struct mlxsw_sp_nexthop *nh,
3581 				       struct mlxsw_sp_ipip_entry *ipip_entry)
3582 {
3583 	bool removing;
3584 
3585 	if (!nh->nh_grp->gateway || nh->ipip_entry)
3586 		return;
3587 
3588 	nh->ipip_entry = ipip_entry;
3589 	removing = !mlxsw_sp_ipip_netdev_ul_up(ipip_entry->ol_dev);
3590 	__mlxsw_sp_nexthop_neigh_update(nh, removing);
3591 	mlxsw_sp_nexthop_rif_init(nh, &ipip_entry->ol_lb->common);
3592 }
3593 
3594 static void mlxsw_sp_nexthop_ipip_fini(struct mlxsw_sp *mlxsw_sp,
3595 				       struct mlxsw_sp_nexthop *nh)
3596 {
3597 	struct mlxsw_sp_ipip_entry *ipip_entry = nh->ipip_entry;
3598 
3599 	if (!ipip_entry)
3600 		return;
3601 
3602 	__mlxsw_sp_nexthop_neigh_update(nh, true);
3603 	nh->ipip_entry = NULL;
3604 }
3605 
3606 static bool mlxsw_sp_nexthop4_ipip_type(const struct mlxsw_sp *mlxsw_sp,
3607 					const struct fib_nh *fib_nh,
3608 					enum mlxsw_sp_ipip_type *p_ipipt)
3609 {
3610 	struct net_device *dev = fib_nh->nh_dev;
3611 
3612 	return dev &&
3613 	       fib_nh->nh_parent->fib_type == RTN_UNICAST &&
3614 	       mlxsw_sp_netdev_ipip_type(mlxsw_sp, dev, p_ipipt);
3615 }
3616 
3617 static void mlxsw_sp_nexthop_type_fini(struct mlxsw_sp *mlxsw_sp,
3618 				       struct mlxsw_sp_nexthop *nh)
3619 {
3620 	switch (nh->type) {
3621 	case MLXSW_SP_NEXTHOP_TYPE_ETH:
3622 		mlxsw_sp_nexthop_neigh_fini(mlxsw_sp, nh);
3623 		mlxsw_sp_nexthop_rif_fini(nh);
3624 		break;
3625 	case MLXSW_SP_NEXTHOP_TYPE_IPIP:
3626 		mlxsw_sp_nexthop_rif_fini(nh);
3627 		mlxsw_sp_nexthop_ipip_fini(mlxsw_sp, nh);
3628 		break;
3629 	}
3630 }
3631 
3632 static int mlxsw_sp_nexthop4_type_init(struct mlxsw_sp *mlxsw_sp,
3633 				       struct mlxsw_sp_nexthop *nh,
3634 				       struct fib_nh *fib_nh)
3635 {
3636 	const struct mlxsw_sp_ipip_ops *ipip_ops;
3637 	struct net_device *dev = fib_nh->nh_dev;
3638 	struct mlxsw_sp_ipip_entry *ipip_entry;
3639 	struct mlxsw_sp_rif *rif;
3640 	int err;
3641 
3642 	ipip_entry = mlxsw_sp_ipip_entry_find_by_ol_dev(mlxsw_sp, dev);
3643 	if (ipip_entry) {
3644 		ipip_ops = mlxsw_sp->router->ipip_ops_arr[ipip_entry->ipipt];
3645 		if (ipip_ops->can_offload(mlxsw_sp, dev,
3646 					  MLXSW_SP_L3_PROTO_IPV4)) {
3647 			nh->type = MLXSW_SP_NEXTHOP_TYPE_IPIP;
3648 			mlxsw_sp_nexthop_ipip_init(mlxsw_sp, nh, ipip_entry);
3649 			return 0;
3650 		}
3651 	}
3652 
3653 	nh->type = MLXSW_SP_NEXTHOP_TYPE_ETH;
3654 	rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev);
3655 	if (!rif)
3656 		return 0;
3657 
3658 	mlxsw_sp_nexthop_rif_init(nh, rif);
3659 	err = mlxsw_sp_nexthop_neigh_init(mlxsw_sp, nh);
3660 	if (err)
3661 		goto err_neigh_init;
3662 
3663 	return 0;
3664 
3665 err_neigh_init:
3666 	mlxsw_sp_nexthop_rif_fini(nh);
3667 	return err;
3668 }
3669 
3670 static void mlxsw_sp_nexthop4_type_fini(struct mlxsw_sp *mlxsw_sp,
3671 					struct mlxsw_sp_nexthop *nh)
3672 {
3673 	mlxsw_sp_nexthop_type_fini(mlxsw_sp, nh);
3674 }
3675 
3676 static int mlxsw_sp_nexthop4_init(struct mlxsw_sp *mlxsw_sp,
3677 				  struct mlxsw_sp_nexthop_group *nh_grp,
3678 				  struct mlxsw_sp_nexthop *nh,
3679 				  struct fib_nh *fib_nh)
3680 {
3681 	struct net_device *dev = fib_nh->nh_dev;
3682 	struct in_device *in_dev;
3683 	int err;
3684 
3685 	nh->nh_grp = nh_grp;
3686 	nh->key.fib_nh = fib_nh;
3687 #ifdef CONFIG_IP_ROUTE_MULTIPATH
3688 	nh->nh_weight = fib_nh->nh_weight;
3689 #else
3690 	nh->nh_weight = 1;
3691 #endif
3692 	memcpy(&nh->gw_addr, &fib_nh->nh_gw, sizeof(fib_nh->nh_gw));
3693 	err = mlxsw_sp_nexthop_insert(mlxsw_sp, nh);
3694 	if (err)
3695 		return err;
3696 
3697 	mlxsw_sp_nexthop_counter_alloc(mlxsw_sp, nh);
3698 	list_add_tail(&nh->router_list_node, &mlxsw_sp->router->nexthop_list);
3699 
3700 	if (!dev)
3701 		return 0;
3702 
3703 	in_dev = __in_dev_get_rtnl(dev);
3704 	if (in_dev && IN_DEV_IGNORE_ROUTES_WITH_LINKDOWN(in_dev) &&
3705 	    fib_nh->nh_flags & RTNH_F_LINKDOWN)
3706 		return 0;
3707 
3708 	err = mlxsw_sp_nexthop4_type_init(mlxsw_sp, nh, fib_nh);
3709 	if (err)
3710 		goto err_nexthop_neigh_init;
3711 
3712 	return 0;
3713 
3714 err_nexthop_neigh_init:
3715 	mlxsw_sp_nexthop_remove(mlxsw_sp, nh);
3716 	return err;
3717 }
3718 
3719 static void mlxsw_sp_nexthop4_fini(struct mlxsw_sp *mlxsw_sp,
3720 				   struct mlxsw_sp_nexthop *nh)
3721 {
3722 	mlxsw_sp_nexthop4_type_fini(mlxsw_sp, nh);
3723 	list_del(&nh->router_list_node);
3724 	mlxsw_sp_nexthop_counter_free(mlxsw_sp, nh);
3725 	mlxsw_sp_nexthop_remove(mlxsw_sp, nh);
3726 }
3727 
3728 static void mlxsw_sp_nexthop4_event(struct mlxsw_sp *mlxsw_sp,
3729 				    unsigned long event, struct fib_nh *fib_nh)
3730 {
3731 	struct mlxsw_sp_nexthop_key key;
3732 	struct mlxsw_sp_nexthop *nh;
3733 
3734 	if (mlxsw_sp->router->aborted)
3735 		return;
3736 
3737 	key.fib_nh = fib_nh;
3738 	nh = mlxsw_sp_nexthop_lookup(mlxsw_sp, key);
3739 	if (WARN_ON_ONCE(!nh))
3740 		return;
3741 
3742 	switch (event) {
3743 	case FIB_EVENT_NH_ADD:
3744 		mlxsw_sp_nexthop4_type_init(mlxsw_sp, nh, fib_nh);
3745 		break;
3746 	case FIB_EVENT_NH_DEL:
3747 		mlxsw_sp_nexthop4_type_fini(mlxsw_sp, nh);
3748 		break;
3749 	}
3750 
3751 	mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh->nh_grp);
3752 }
3753 
3754 static void mlxsw_sp_nexthop_rif_update(struct mlxsw_sp *mlxsw_sp,
3755 					struct mlxsw_sp_rif *rif)
3756 {
3757 	struct mlxsw_sp_nexthop *nh;
3758 	bool removing;
3759 
3760 	list_for_each_entry(nh, &rif->nexthop_list, rif_list_node) {
3761 		switch (nh->type) {
3762 		case MLXSW_SP_NEXTHOP_TYPE_ETH:
3763 			removing = false;
3764 			break;
3765 		case MLXSW_SP_NEXTHOP_TYPE_IPIP:
3766 			removing = !mlxsw_sp_ipip_netdev_ul_up(rif->dev);
3767 			break;
3768 		default:
3769 			WARN_ON(1);
3770 			continue;
3771 		}
3772 
3773 		__mlxsw_sp_nexthop_neigh_update(nh, removing);
3774 		mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh->nh_grp);
3775 	}
3776 }
3777 
3778 static void mlxsw_sp_nexthop_rif_migrate(struct mlxsw_sp *mlxsw_sp,
3779 					 struct mlxsw_sp_rif *old_rif,
3780 					 struct mlxsw_sp_rif *new_rif)
3781 {
3782 	struct mlxsw_sp_nexthop *nh;
3783 
3784 	list_splice_init(&old_rif->nexthop_list, &new_rif->nexthop_list);
3785 	list_for_each_entry(nh, &new_rif->nexthop_list, rif_list_node)
3786 		nh->rif = new_rif;
3787 	mlxsw_sp_nexthop_rif_update(mlxsw_sp, new_rif);
3788 }
3789 
3790 static void mlxsw_sp_nexthop_rif_gone_sync(struct mlxsw_sp *mlxsw_sp,
3791 					   struct mlxsw_sp_rif *rif)
3792 {
3793 	struct mlxsw_sp_nexthop *nh, *tmp;
3794 
3795 	list_for_each_entry_safe(nh, tmp, &rif->nexthop_list, rif_list_node) {
3796 		mlxsw_sp_nexthop_type_fini(mlxsw_sp, nh);
3797 		mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh->nh_grp);
3798 	}
3799 }
3800 
3801 static bool mlxsw_sp_fi_is_gateway(const struct mlxsw_sp *mlxsw_sp,
3802 				   const struct fib_info *fi)
3803 {
3804 	return fi->fib_nh->nh_scope == RT_SCOPE_LINK ||
3805 	       mlxsw_sp_nexthop4_ipip_type(mlxsw_sp, fi->fib_nh, NULL);
3806 }
3807 
3808 static struct mlxsw_sp_nexthop_group *
3809 mlxsw_sp_nexthop4_group_create(struct mlxsw_sp *mlxsw_sp, struct fib_info *fi)
3810 {
3811 	struct mlxsw_sp_nexthop_group *nh_grp;
3812 	struct mlxsw_sp_nexthop *nh;
3813 	struct fib_nh *fib_nh;
3814 	size_t alloc_size;
3815 	int i;
3816 	int err;
3817 
3818 	alloc_size = sizeof(*nh_grp) +
3819 		     fi->fib_nhs * sizeof(struct mlxsw_sp_nexthop);
3820 	nh_grp = kzalloc(alloc_size, GFP_KERNEL);
3821 	if (!nh_grp)
3822 		return ERR_PTR(-ENOMEM);
3823 	nh_grp->priv = fi;
3824 	INIT_LIST_HEAD(&nh_grp->fib_list);
3825 	nh_grp->neigh_tbl = &arp_tbl;
3826 
3827 	nh_grp->gateway = mlxsw_sp_fi_is_gateway(mlxsw_sp, fi);
3828 	nh_grp->count = fi->fib_nhs;
3829 	fib_info_hold(fi);
3830 	for (i = 0; i < nh_grp->count; i++) {
3831 		nh = &nh_grp->nexthops[i];
3832 		fib_nh = &fi->fib_nh[i];
3833 		err = mlxsw_sp_nexthop4_init(mlxsw_sp, nh_grp, nh, fib_nh);
3834 		if (err)
3835 			goto err_nexthop4_init;
3836 	}
3837 	err = mlxsw_sp_nexthop_group_insert(mlxsw_sp, nh_grp);
3838 	if (err)
3839 		goto err_nexthop_group_insert;
3840 	mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh_grp);
3841 	return nh_grp;
3842 
3843 err_nexthop_group_insert:
3844 err_nexthop4_init:
3845 	for (i--; i >= 0; i--) {
3846 		nh = &nh_grp->nexthops[i];
3847 		mlxsw_sp_nexthop4_fini(mlxsw_sp, nh);
3848 	}
3849 	fib_info_put(fi);
3850 	kfree(nh_grp);
3851 	return ERR_PTR(err);
3852 }
3853 
3854 static void
3855 mlxsw_sp_nexthop4_group_destroy(struct mlxsw_sp *mlxsw_sp,
3856 				struct mlxsw_sp_nexthop_group *nh_grp)
3857 {
3858 	struct mlxsw_sp_nexthop *nh;
3859 	int i;
3860 
3861 	mlxsw_sp_nexthop_group_remove(mlxsw_sp, nh_grp);
3862 	for (i = 0; i < nh_grp->count; i++) {
3863 		nh = &nh_grp->nexthops[i];
3864 		mlxsw_sp_nexthop4_fini(mlxsw_sp, nh);
3865 	}
3866 	mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh_grp);
3867 	WARN_ON_ONCE(nh_grp->adj_index_valid);
3868 	fib_info_put(mlxsw_sp_nexthop4_group_fi(nh_grp));
3869 	kfree(nh_grp);
3870 }
3871 
3872 static int mlxsw_sp_nexthop4_group_get(struct mlxsw_sp *mlxsw_sp,
3873 				       struct mlxsw_sp_fib_entry *fib_entry,
3874 				       struct fib_info *fi)
3875 {
3876 	struct mlxsw_sp_nexthop_group *nh_grp;
3877 
3878 	nh_grp = mlxsw_sp_nexthop4_group_lookup(mlxsw_sp, fi);
3879 	if (!nh_grp) {
3880 		nh_grp = mlxsw_sp_nexthop4_group_create(mlxsw_sp, fi);
3881 		if (IS_ERR(nh_grp))
3882 			return PTR_ERR(nh_grp);
3883 	}
3884 	list_add_tail(&fib_entry->nexthop_group_node, &nh_grp->fib_list);
3885 	fib_entry->nh_group = nh_grp;
3886 	return 0;
3887 }
3888 
3889 static void mlxsw_sp_nexthop4_group_put(struct mlxsw_sp *mlxsw_sp,
3890 					struct mlxsw_sp_fib_entry *fib_entry)
3891 {
3892 	struct mlxsw_sp_nexthop_group *nh_grp = fib_entry->nh_group;
3893 
3894 	list_del(&fib_entry->nexthop_group_node);
3895 	if (!list_empty(&nh_grp->fib_list))
3896 		return;
3897 	mlxsw_sp_nexthop4_group_destroy(mlxsw_sp, nh_grp);
3898 }
3899 
3900 static bool
3901 mlxsw_sp_fib4_entry_should_offload(const struct mlxsw_sp_fib_entry *fib_entry)
3902 {
3903 	struct mlxsw_sp_fib4_entry *fib4_entry;
3904 
3905 	fib4_entry = container_of(fib_entry, struct mlxsw_sp_fib4_entry,
3906 				  common);
3907 	return !fib4_entry->tos;
3908 }
3909 
3910 static bool
3911 mlxsw_sp_fib_entry_should_offload(const struct mlxsw_sp_fib_entry *fib_entry)
3912 {
3913 	struct mlxsw_sp_nexthop_group *nh_group = fib_entry->nh_group;
3914 
3915 	switch (fib_entry->fib_node->fib->proto) {
3916 	case MLXSW_SP_L3_PROTO_IPV4:
3917 		if (!mlxsw_sp_fib4_entry_should_offload(fib_entry))
3918 			return false;
3919 		break;
3920 	case MLXSW_SP_L3_PROTO_IPV6:
3921 		break;
3922 	}
3923 
3924 	switch (fib_entry->type) {
3925 	case MLXSW_SP_FIB_ENTRY_TYPE_REMOTE:
3926 		return !!nh_group->adj_index_valid;
3927 	case MLXSW_SP_FIB_ENTRY_TYPE_LOCAL:
3928 		return !!nh_group->nh_rif;
3929 	case MLXSW_SP_FIB_ENTRY_TYPE_IPIP_DECAP:
3930 	case MLXSW_SP_FIB_ENTRY_TYPE_NVE_DECAP:
3931 		return true;
3932 	default:
3933 		return false;
3934 	}
3935 }
3936 
3937 static struct mlxsw_sp_nexthop *
3938 mlxsw_sp_rt6_nexthop(struct mlxsw_sp_nexthop_group *nh_grp,
3939 		     const struct mlxsw_sp_rt6 *mlxsw_sp_rt6)
3940 {
3941 	int i;
3942 
3943 	for (i = 0; i < nh_grp->count; i++) {
3944 		struct mlxsw_sp_nexthop *nh = &nh_grp->nexthops[i];
3945 		struct fib6_info *rt = mlxsw_sp_rt6->rt;
3946 
3947 		if (nh->rif && nh->rif->dev == rt->fib6_nh.nh_dev &&
3948 		    ipv6_addr_equal((const struct in6_addr *) &nh->gw_addr,
3949 				    &rt->fib6_nh.nh_gw))
3950 			return nh;
3951 		continue;
3952 	}
3953 
3954 	return NULL;
3955 }
3956 
3957 static void
3958 mlxsw_sp_fib4_entry_offload_set(struct mlxsw_sp_fib_entry *fib_entry)
3959 {
3960 	struct mlxsw_sp_nexthop_group *nh_grp = fib_entry->nh_group;
3961 	int i;
3962 
3963 	if (fib_entry->type == MLXSW_SP_FIB_ENTRY_TYPE_LOCAL ||
3964 	    fib_entry->type == MLXSW_SP_FIB_ENTRY_TYPE_IPIP_DECAP ||
3965 	    fib_entry->type == MLXSW_SP_FIB_ENTRY_TYPE_NVE_DECAP) {
3966 		nh_grp->nexthops->key.fib_nh->nh_flags |= RTNH_F_OFFLOAD;
3967 		return;
3968 	}
3969 
3970 	for (i = 0; i < nh_grp->count; i++) {
3971 		struct mlxsw_sp_nexthop *nh = &nh_grp->nexthops[i];
3972 
3973 		if (nh->offloaded)
3974 			nh->key.fib_nh->nh_flags |= RTNH_F_OFFLOAD;
3975 		else
3976 			nh->key.fib_nh->nh_flags &= ~RTNH_F_OFFLOAD;
3977 	}
3978 }
3979 
3980 static void
3981 mlxsw_sp_fib4_entry_offload_unset(struct mlxsw_sp_fib_entry *fib_entry)
3982 {
3983 	struct mlxsw_sp_nexthop_group *nh_grp = fib_entry->nh_group;
3984 	int i;
3985 
3986 	if (!list_is_singular(&nh_grp->fib_list))
3987 		return;
3988 
3989 	for (i = 0; i < nh_grp->count; i++) {
3990 		struct mlxsw_sp_nexthop *nh = &nh_grp->nexthops[i];
3991 
3992 		nh->key.fib_nh->nh_flags &= ~RTNH_F_OFFLOAD;
3993 	}
3994 }
3995 
3996 static void
3997 mlxsw_sp_fib6_entry_offload_set(struct mlxsw_sp_fib_entry *fib_entry)
3998 {
3999 	struct mlxsw_sp_fib6_entry *fib6_entry;
4000 	struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
4001 
4002 	fib6_entry = container_of(fib_entry, struct mlxsw_sp_fib6_entry,
4003 				  common);
4004 
4005 	if (fib_entry->type == MLXSW_SP_FIB_ENTRY_TYPE_LOCAL) {
4006 		list_first_entry(&fib6_entry->rt6_list, struct mlxsw_sp_rt6,
4007 				 list)->rt->fib6_nh.nh_flags |= RTNH_F_OFFLOAD;
4008 		return;
4009 	}
4010 
4011 	list_for_each_entry(mlxsw_sp_rt6, &fib6_entry->rt6_list, list) {
4012 		struct mlxsw_sp_nexthop_group *nh_grp = fib_entry->nh_group;
4013 		struct mlxsw_sp_nexthop *nh;
4014 
4015 		nh = mlxsw_sp_rt6_nexthop(nh_grp, mlxsw_sp_rt6);
4016 		if (nh && nh->offloaded)
4017 			mlxsw_sp_rt6->rt->fib6_nh.nh_flags |= RTNH_F_OFFLOAD;
4018 		else
4019 			mlxsw_sp_rt6->rt->fib6_nh.nh_flags &= ~RTNH_F_OFFLOAD;
4020 	}
4021 }
4022 
4023 static void
4024 mlxsw_sp_fib6_entry_offload_unset(struct mlxsw_sp_fib_entry *fib_entry)
4025 {
4026 	struct mlxsw_sp_fib6_entry *fib6_entry;
4027 	struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
4028 
4029 	fib6_entry = container_of(fib_entry, struct mlxsw_sp_fib6_entry,
4030 				  common);
4031 	list_for_each_entry(mlxsw_sp_rt6, &fib6_entry->rt6_list, list) {
4032 		struct fib6_info *rt = mlxsw_sp_rt6->rt;
4033 
4034 		rt->fib6_nh.nh_flags &= ~RTNH_F_OFFLOAD;
4035 	}
4036 }
4037 
4038 static void mlxsw_sp_fib_entry_offload_set(struct mlxsw_sp_fib_entry *fib_entry)
4039 {
4040 	switch (fib_entry->fib_node->fib->proto) {
4041 	case MLXSW_SP_L3_PROTO_IPV4:
4042 		mlxsw_sp_fib4_entry_offload_set(fib_entry);
4043 		break;
4044 	case MLXSW_SP_L3_PROTO_IPV6:
4045 		mlxsw_sp_fib6_entry_offload_set(fib_entry);
4046 		break;
4047 	}
4048 }
4049 
4050 static void
4051 mlxsw_sp_fib_entry_offload_unset(struct mlxsw_sp_fib_entry *fib_entry)
4052 {
4053 	switch (fib_entry->fib_node->fib->proto) {
4054 	case MLXSW_SP_L3_PROTO_IPV4:
4055 		mlxsw_sp_fib4_entry_offload_unset(fib_entry);
4056 		break;
4057 	case MLXSW_SP_L3_PROTO_IPV6:
4058 		mlxsw_sp_fib6_entry_offload_unset(fib_entry);
4059 		break;
4060 	}
4061 }
4062 
4063 static void
4064 mlxsw_sp_fib_entry_offload_refresh(struct mlxsw_sp_fib_entry *fib_entry,
4065 				   enum mlxsw_reg_ralue_op op, int err)
4066 {
4067 	switch (op) {
4068 	case MLXSW_REG_RALUE_OP_WRITE_DELETE:
4069 		return mlxsw_sp_fib_entry_offload_unset(fib_entry);
4070 	case MLXSW_REG_RALUE_OP_WRITE_WRITE:
4071 		if (err)
4072 			return;
4073 		if (mlxsw_sp_fib_entry_should_offload(fib_entry))
4074 			mlxsw_sp_fib_entry_offload_set(fib_entry);
4075 		else
4076 			mlxsw_sp_fib_entry_offload_unset(fib_entry);
4077 		return;
4078 	default:
4079 		return;
4080 	}
4081 }
4082 
4083 static void
4084 mlxsw_sp_fib_entry_ralue_pack(char *ralue_pl,
4085 			      const struct mlxsw_sp_fib_entry *fib_entry,
4086 			      enum mlxsw_reg_ralue_op op)
4087 {
4088 	struct mlxsw_sp_fib *fib = fib_entry->fib_node->fib;
4089 	enum mlxsw_reg_ralxx_protocol proto;
4090 	u32 *p_dip;
4091 
4092 	proto = (enum mlxsw_reg_ralxx_protocol) fib->proto;
4093 
4094 	switch (fib->proto) {
4095 	case MLXSW_SP_L3_PROTO_IPV4:
4096 		p_dip = (u32 *) fib_entry->fib_node->key.addr;
4097 		mlxsw_reg_ralue_pack4(ralue_pl, proto, op, fib->vr->id,
4098 				      fib_entry->fib_node->key.prefix_len,
4099 				      *p_dip);
4100 		break;
4101 	case MLXSW_SP_L3_PROTO_IPV6:
4102 		mlxsw_reg_ralue_pack6(ralue_pl, proto, op, fib->vr->id,
4103 				      fib_entry->fib_node->key.prefix_len,
4104 				      fib_entry->fib_node->key.addr);
4105 		break;
4106 	}
4107 }
4108 
4109 static int mlxsw_sp_fib_entry_op_remote(struct mlxsw_sp *mlxsw_sp,
4110 					struct mlxsw_sp_fib_entry *fib_entry,
4111 					enum mlxsw_reg_ralue_op op)
4112 {
4113 	char ralue_pl[MLXSW_REG_RALUE_LEN];
4114 	enum mlxsw_reg_ralue_trap_action trap_action;
4115 	u16 trap_id = 0;
4116 	u32 adjacency_index = 0;
4117 	u16 ecmp_size = 0;
4118 
4119 	/* In case the nexthop group adjacency index is valid, use it
4120 	 * with provided ECMP size. Otherwise, setup trap and pass
4121 	 * traffic to kernel.
4122 	 */
4123 	if (mlxsw_sp_fib_entry_should_offload(fib_entry)) {
4124 		trap_action = MLXSW_REG_RALUE_TRAP_ACTION_NOP;
4125 		adjacency_index = fib_entry->nh_group->adj_index;
4126 		ecmp_size = fib_entry->nh_group->ecmp_size;
4127 	} else {
4128 		trap_action = MLXSW_REG_RALUE_TRAP_ACTION_TRAP;
4129 		trap_id = MLXSW_TRAP_ID_RTR_INGRESS0;
4130 	}
4131 
4132 	mlxsw_sp_fib_entry_ralue_pack(ralue_pl, fib_entry, op);
4133 	mlxsw_reg_ralue_act_remote_pack(ralue_pl, trap_action, trap_id,
4134 					adjacency_index, ecmp_size);
4135 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralue), ralue_pl);
4136 }
4137 
4138 static int mlxsw_sp_fib_entry_op_local(struct mlxsw_sp *mlxsw_sp,
4139 				       struct mlxsw_sp_fib_entry *fib_entry,
4140 				       enum mlxsw_reg_ralue_op op)
4141 {
4142 	struct mlxsw_sp_rif *rif = fib_entry->nh_group->nh_rif;
4143 	enum mlxsw_reg_ralue_trap_action trap_action;
4144 	char ralue_pl[MLXSW_REG_RALUE_LEN];
4145 	u16 trap_id = 0;
4146 	u16 rif_index = 0;
4147 
4148 	if (mlxsw_sp_fib_entry_should_offload(fib_entry)) {
4149 		trap_action = MLXSW_REG_RALUE_TRAP_ACTION_NOP;
4150 		rif_index = rif->rif_index;
4151 	} else {
4152 		trap_action = MLXSW_REG_RALUE_TRAP_ACTION_TRAP;
4153 		trap_id = MLXSW_TRAP_ID_RTR_INGRESS0;
4154 	}
4155 
4156 	mlxsw_sp_fib_entry_ralue_pack(ralue_pl, fib_entry, op);
4157 	mlxsw_reg_ralue_act_local_pack(ralue_pl, trap_action, trap_id,
4158 				       rif_index);
4159 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralue), ralue_pl);
4160 }
4161 
4162 static int mlxsw_sp_fib_entry_op_trap(struct mlxsw_sp *mlxsw_sp,
4163 				      struct mlxsw_sp_fib_entry *fib_entry,
4164 				      enum mlxsw_reg_ralue_op op)
4165 {
4166 	char ralue_pl[MLXSW_REG_RALUE_LEN];
4167 
4168 	mlxsw_sp_fib_entry_ralue_pack(ralue_pl, fib_entry, op);
4169 	mlxsw_reg_ralue_act_ip2me_pack(ralue_pl);
4170 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralue), ralue_pl);
4171 }
4172 
4173 static int
4174 mlxsw_sp_fib_entry_op_ipip_decap(struct mlxsw_sp *mlxsw_sp,
4175 				 struct mlxsw_sp_fib_entry *fib_entry,
4176 				 enum mlxsw_reg_ralue_op op)
4177 {
4178 	struct mlxsw_sp_ipip_entry *ipip_entry = fib_entry->decap.ipip_entry;
4179 	const struct mlxsw_sp_ipip_ops *ipip_ops;
4180 
4181 	if (WARN_ON(!ipip_entry))
4182 		return -EINVAL;
4183 
4184 	ipip_ops = mlxsw_sp->router->ipip_ops_arr[ipip_entry->ipipt];
4185 	return ipip_ops->fib_entry_op(mlxsw_sp, ipip_entry, op,
4186 				      fib_entry->decap.tunnel_index);
4187 }
4188 
4189 static int mlxsw_sp_fib_entry_op_nve_decap(struct mlxsw_sp *mlxsw_sp,
4190 					   struct mlxsw_sp_fib_entry *fib_entry,
4191 					   enum mlxsw_reg_ralue_op op)
4192 {
4193 	char ralue_pl[MLXSW_REG_RALUE_LEN];
4194 
4195 	mlxsw_sp_fib_entry_ralue_pack(ralue_pl, fib_entry, op);
4196 	mlxsw_reg_ralue_act_ip2me_tun_pack(ralue_pl,
4197 					   fib_entry->decap.tunnel_index);
4198 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralue), ralue_pl);
4199 }
4200 
4201 static int __mlxsw_sp_fib_entry_op(struct mlxsw_sp *mlxsw_sp,
4202 				   struct mlxsw_sp_fib_entry *fib_entry,
4203 				   enum mlxsw_reg_ralue_op op)
4204 {
4205 	switch (fib_entry->type) {
4206 	case MLXSW_SP_FIB_ENTRY_TYPE_REMOTE:
4207 		return mlxsw_sp_fib_entry_op_remote(mlxsw_sp, fib_entry, op);
4208 	case MLXSW_SP_FIB_ENTRY_TYPE_LOCAL:
4209 		return mlxsw_sp_fib_entry_op_local(mlxsw_sp, fib_entry, op);
4210 	case MLXSW_SP_FIB_ENTRY_TYPE_TRAP:
4211 		return mlxsw_sp_fib_entry_op_trap(mlxsw_sp, fib_entry, op);
4212 	case MLXSW_SP_FIB_ENTRY_TYPE_IPIP_DECAP:
4213 		return mlxsw_sp_fib_entry_op_ipip_decap(mlxsw_sp,
4214 							fib_entry, op);
4215 	case MLXSW_SP_FIB_ENTRY_TYPE_NVE_DECAP:
4216 		return mlxsw_sp_fib_entry_op_nve_decap(mlxsw_sp, fib_entry, op);
4217 	}
4218 	return -EINVAL;
4219 }
4220 
4221 static int mlxsw_sp_fib_entry_op(struct mlxsw_sp *mlxsw_sp,
4222 				 struct mlxsw_sp_fib_entry *fib_entry,
4223 				 enum mlxsw_reg_ralue_op op)
4224 {
4225 	int err = __mlxsw_sp_fib_entry_op(mlxsw_sp, fib_entry, op);
4226 
4227 	mlxsw_sp_fib_entry_offload_refresh(fib_entry, op, err);
4228 
4229 	return err;
4230 }
4231 
4232 static int mlxsw_sp_fib_entry_update(struct mlxsw_sp *mlxsw_sp,
4233 				     struct mlxsw_sp_fib_entry *fib_entry)
4234 {
4235 	return mlxsw_sp_fib_entry_op(mlxsw_sp, fib_entry,
4236 				     MLXSW_REG_RALUE_OP_WRITE_WRITE);
4237 }
4238 
4239 static int mlxsw_sp_fib_entry_del(struct mlxsw_sp *mlxsw_sp,
4240 				  struct mlxsw_sp_fib_entry *fib_entry)
4241 {
4242 	return mlxsw_sp_fib_entry_op(mlxsw_sp, fib_entry,
4243 				     MLXSW_REG_RALUE_OP_WRITE_DELETE);
4244 }
4245 
4246 static int
4247 mlxsw_sp_fib4_entry_type_set(struct mlxsw_sp *mlxsw_sp,
4248 			     const struct fib_entry_notifier_info *fen_info,
4249 			     struct mlxsw_sp_fib_entry *fib_entry)
4250 {
4251 	union mlxsw_sp_l3addr dip = { .addr4 = htonl(fen_info->dst) };
4252 	u32 tb_id = mlxsw_sp_fix_tb_id(fen_info->tb_id);
4253 	struct net_device *dev = fen_info->fi->fib_dev;
4254 	struct mlxsw_sp_ipip_entry *ipip_entry;
4255 	struct fib_info *fi = fen_info->fi;
4256 
4257 	switch (fen_info->type) {
4258 	case RTN_LOCAL:
4259 		ipip_entry = mlxsw_sp_ipip_entry_find_by_decap(mlxsw_sp, dev,
4260 						 MLXSW_SP_L3_PROTO_IPV4, dip);
4261 		if (ipip_entry && ipip_entry->ol_dev->flags & IFF_UP) {
4262 			fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_IPIP_DECAP;
4263 			return mlxsw_sp_fib_entry_decap_init(mlxsw_sp,
4264 							     fib_entry,
4265 							     ipip_entry);
4266 		}
4267 		if (mlxsw_sp_nve_ipv4_route_is_decap(mlxsw_sp, tb_id,
4268 						     dip.addr4)) {
4269 			u32 t_index;
4270 
4271 			t_index = mlxsw_sp_nve_decap_tunnel_index_get(mlxsw_sp);
4272 			fib_entry->decap.tunnel_index = t_index;
4273 			fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_NVE_DECAP;
4274 			return 0;
4275 		}
4276 		/* fall through */
4277 	case RTN_BROADCAST:
4278 		fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_TRAP;
4279 		return 0;
4280 	case RTN_UNREACHABLE: /* fall through */
4281 	case RTN_BLACKHOLE: /* fall through */
4282 	case RTN_PROHIBIT:
4283 		/* Packets hitting these routes need to be trapped, but
4284 		 * can do so with a lower priority than packets directed
4285 		 * at the host, so use action type local instead of trap.
4286 		 */
4287 		fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_LOCAL;
4288 		return 0;
4289 	case RTN_UNICAST:
4290 		if (mlxsw_sp_fi_is_gateway(mlxsw_sp, fi))
4291 			fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_REMOTE;
4292 		else
4293 			fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_LOCAL;
4294 		return 0;
4295 	default:
4296 		return -EINVAL;
4297 	}
4298 }
4299 
4300 static struct mlxsw_sp_fib4_entry *
4301 mlxsw_sp_fib4_entry_create(struct mlxsw_sp *mlxsw_sp,
4302 			   struct mlxsw_sp_fib_node *fib_node,
4303 			   const struct fib_entry_notifier_info *fen_info)
4304 {
4305 	struct mlxsw_sp_fib4_entry *fib4_entry;
4306 	struct mlxsw_sp_fib_entry *fib_entry;
4307 	int err;
4308 
4309 	fib4_entry = kzalloc(sizeof(*fib4_entry), GFP_KERNEL);
4310 	if (!fib4_entry)
4311 		return ERR_PTR(-ENOMEM);
4312 	fib_entry = &fib4_entry->common;
4313 
4314 	err = mlxsw_sp_fib4_entry_type_set(mlxsw_sp, fen_info, fib_entry);
4315 	if (err)
4316 		goto err_fib4_entry_type_set;
4317 
4318 	err = mlxsw_sp_nexthop4_group_get(mlxsw_sp, fib_entry, fen_info->fi);
4319 	if (err)
4320 		goto err_nexthop4_group_get;
4321 
4322 	fib4_entry->prio = fen_info->fi->fib_priority;
4323 	fib4_entry->tb_id = fen_info->tb_id;
4324 	fib4_entry->type = fen_info->type;
4325 	fib4_entry->tos = fen_info->tos;
4326 
4327 	fib_entry->fib_node = fib_node;
4328 
4329 	return fib4_entry;
4330 
4331 err_nexthop4_group_get:
4332 err_fib4_entry_type_set:
4333 	kfree(fib4_entry);
4334 	return ERR_PTR(err);
4335 }
4336 
4337 static void mlxsw_sp_fib4_entry_destroy(struct mlxsw_sp *mlxsw_sp,
4338 					struct mlxsw_sp_fib4_entry *fib4_entry)
4339 {
4340 	mlxsw_sp_nexthop4_group_put(mlxsw_sp, &fib4_entry->common);
4341 	kfree(fib4_entry);
4342 }
4343 
4344 static struct mlxsw_sp_fib4_entry *
4345 mlxsw_sp_fib4_entry_lookup(struct mlxsw_sp *mlxsw_sp,
4346 			   const struct fib_entry_notifier_info *fen_info)
4347 {
4348 	struct mlxsw_sp_fib4_entry *fib4_entry;
4349 	struct mlxsw_sp_fib_node *fib_node;
4350 	struct mlxsw_sp_fib *fib;
4351 	struct mlxsw_sp_vr *vr;
4352 
4353 	vr = mlxsw_sp_vr_find(mlxsw_sp, fen_info->tb_id);
4354 	if (!vr)
4355 		return NULL;
4356 	fib = mlxsw_sp_vr_fib(vr, MLXSW_SP_L3_PROTO_IPV4);
4357 
4358 	fib_node = mlxsw_sp_fib_node_lookup(fib, &fen_info->dst,
4359 					    sizeof(fen_info->dst),
4360 					    fen_info->dst_len);
4361 	if (!fib_node)
4362 		return NULL;
4363 
4364 	list_for_each_entry(fib4_entry, &fib_node->entry_list, common.list) {
4365 		if (fib4_entry->tb_id == fen_info->tb_id &&
4366 		    fib4_entry->tos == fen_info->tos &&
4367 		    fib4_entry->type == fen_info->type &&
4368 		    mlxsw_sp_nexthop4_group_fi(fib4_entry->common.nh_group) ==
4369 		    fen_info->fi) {
4370 			return fib4_entry;
4371 		}
4372 	}
4373 
4374 	return NULL;
4375 }
4376 
4377 static const struct rhashtable_params mlxsw_sp_fib_ht_params = {
4378 	.key_offset = offsetof(struct mlxsw_sp_fib_node, key),
4379 	.head_offset = offsetof(struct mlxsw_sp_fib_node, ht_node),
4380 	.key_len = sizeof(struct mlxsw_sp_fib_key),
4381 	.automatic_shrinking = true,
4382 };
4383 
4384 static int mlxsw_sp_fib_node_insert(struct mlxsw_sp_fib *fib,
4385 				    struct mlxsw_sp_fib_node *fib_node)
4386 {
4387 	return rhashtable_insert_fast(&fib->ht, &fib_node->ht_node,
4388 				      mlxsw_sp_fib_ht_params);
4389 }
4390 
4391 static void mlxsw_sp_fib_node_remove(struct mlxsw_sp_fib *fib,
4392 				     struct mlxsw_sp_fib_node *fib_node)
4393 {
4394 	rhashtable_remove_fast(&fib->ht, &fib_node->ht_node,
4395 			       mlxsw_sp_fib_ht_params);
4396 }
4397 
4398 static struct mlxsw_sp_fib_node *
4399 mlxsw_sp_fib_node_lookup(struct mlxsw_sp_fib *fib, const void *addr,
4400 			 size_t addr_len, unsigned char prefix_len)
4401 {
4402 	struct mlxsw_sp_fib_key key;
4403 
4404 	memset(&key, 0, sizeof(key));
4405 	memcpy(key.addr, addr, addr_len);
4406 	key.prefix_len = prefix_len;
4407 	return rhashtable_lookup_fast(&fib->ht, &key, mlxsw_sp_fib_ht_params);
4408 }
4409 
4410 static struct mlxsw_sp_fib_node *
4411 mlxsw_sp_fib_node_create(struct mlxsw_sp_fib *fib, const void *addr,
4412 			 size_t addr_len, unsigned char prefix_len)
4413 {
4414 	struct mlxsw_sp_fib_node *fib_node;
4415 
4416 	fib_node = kzalloc(sizeof(*fib_node), GFP_KERNEL);
4417 	if (!fib_node)
4418 		return NULL;
4419 
4420 	INIT_LIST_HEAD(&fib_node->entry_list);
4421 	list_add(&fib_node->list, &fib->node_list);
4422 	memcpy(fib_node->key.addr, addr, addr_len);
4423 	fib_node->key.prefix_len = prefix_len;
4424 
4425 	return fib_node;
4426 }
4427 
4428 static void mlxsw_sp_fib_node_destroy(struct mlxsw_sp_fib_node *fib_node)
4429 {
4430 	list_del(&fib_node->list);
4431 	WARN_ON(!list_empty(&fib_node->entry_list));
4432 	kfree(fib_node);
4433 }
4434 
4435 static bool
4436 mlxsw_sp_fib_node_entry_is_first(const struct mlxsw_sp_fib_node *fib_node,
4437 				 const struct mlxsw_sp_fib_entry *fib_entry)
4438 {
4439 	return list_first_entry(&fib_node->entry_list,
4440 				struct mlxsw_sp_fib_entry, list) == fib_entry;
4441 }
4442 
4443 static int mlxsw_sp_fib_lpm_tree_link(struct mlxsw_sp *mlxsw_sp,
4444 				      struct mlxsw_sp_fib_node *fib_node)
4445 {
4446 	struct mlxsw_sp_prefix_usage req_prefix_usage;
4447 	struct mlxsw_sp_fib *fib = fib_node->fib;
4448 	struct mlxsw_sp_lpm_tree *lpm_tree;
4449 	int err;
4450 
4451 	lpm_tree = mlxsw_sp->router->lpm.proto_trees[fib->proto];
4452 	if (lpm_tree->prefix_ref_count[fib_node->key.prefix_len] != 0)
4453 		goto out;
4454 
4455 	mlxsw_sp_prefix_usage_cpy(&req_prefix_usage, &lpm_tree->prefix_usage);
4456 	mlxsw_sp_prefix_usage_set(&req_prefix_usage, fib_node->key.prefix_len);
4457 	lpm_tree = mlxsw_sp_lpm_tree_get(mlxsw_sp, &req_prefix_usage,
4458 					 fib->proto);
4459 	if (IS_ERR(lpm_tree))
4460 		return PTR_ERR(lpm_tree);
4461 
4462 	err = mlxsw_sp_vrs_lpm_tree_replace(mlxsw_sp, fib, lpm_tree);
4463 	if (err)
4464 		goto err_lpm_tree_replace;
4465 
4466 out:
4467 	lpm_tree->prefix_ref_count[fib_node->key.prefix_len]++;
4468 	return 0;
4469 
4470 err_lpm_tree_replace:
4471 	mlxsw_sp_lpm_tree_put(mlxsw_sp, lpm_tree);
4472 	return err;
4473 }
4474 
4475 static void mlxsw_sp_fib_lpm_tree_unlink(struct mlxsw_sp *mlxsw_sp,
4476 					 struct mlxsw_sp_fib_node *fib_node)
4477 {
4478 	struct mlxsw_sp_lpm_tree *lpm_tree = fib_node->fib->lpm_tree;
4479 	struct mlxsw_sp_prefix_usage req_prefix_usage;
4480 	struct mlxsw_sp_fib *fib = fib_node->fib;
4481 	int err;
4482 
4483 	if (--lpm_tree->prefix_ref_count[fib_node->key.prefix_len] != 0)
4484 		return;
4485 	/* Try to construct a new LPM tree from the current prefix usage
4486 	 * minus the unused one. If we fail, continue using the old one.
4487 	 */
4488 	mlxsw_sp_prefix_usage_cpy(&req_prefix_usage, &lpm_tree->prefix_usage);
4489 	mlxsw_sp_prefix_usage_clear(&req_prefix_usage,
4490 				    fib_node->key.prefix_len);
4491 	lpm_tree = mlxsw_sp_lpm_tree_get(mlxsw_sp, &req_prefix_usage,
4492 					 fib->proto);
4493 	if (IS_ERR(lpm_tree))
4494 		return;
4495 
4496 	err = mlxsw_sp_vrs_lpm_tree_replace(mlxsw_sp, fib, lpm_tree);
4497 	if (err)
4498 		goto err_lpm_tree_replace;
4499 
4500 	return;
4501 
4502 err_lpm_tree_replace:
4503 	mlxsw_sp_lpm_tree_put(mlxsw_sp, lpm_tree);
4504 }
4505 
4506 static int mlxsw_sp_fib_node_init(struct mlxsw_sp *mlxsw_sp,
4507 				  struct mlxsw_sp_fib_node *fib_node,
4508 				  struct mlxsw_sp_fib *fib)
4509 {
4510 	int err;
4511 
4512 	err = mlxsw_sp_fib_node_insert(fib, fib_node);
4513 	if (err)
4514 		return err;
4515 	fib_node->fib = fib;
4516 
4517 	err = mlxsw_sp_fib_lpm_tree_link(mlxsw_sp, fib_node);
4518 	if (err)
4519 		goto err_fib_lpm_tree_link;
4520 
4521 	return 0;
4522 
4523 err_fib_lpm_tree_link:
4524 	fib_node->fib = NULL;
4525 	mlxsw_sp_fib_node_remove(fib, fib_node);
4526 	return err;
4527 }
4528 
4529 static void mlxsw_sp_fib_node_fini(struct mlxsw_sp *mlxsw_sp,
4530 				   struct mlxsw_sp_fib_node *fib_node)
4531 {
4532 	struct mlxsw_sp_fib *fib = fib_node->fib;
4533 
4534 	mlxsw_sp_fib_lpm_tree_unlink(mlxsw_sp, fib_node);
4535 	fib_node->fib = NULL;
4536 	mlxsw_sp_fib_node_remove(fib, fib_node);
4537 }
4538 
4539 static struct mlxsw_sp_fib_node *
4540 mlxsw_sp_fib_node_get(struct mlxsw_sp *mlxsw_sp, u32 tb_id, const void *addr,
4541 		      size_t addr_len, unsigned char prefix_len,
4542 		      enum mlxsw_sp_l3proto proto)
4543 {
4544 	struct mlxsw_sp_fib_node *fib_node;
4545 	struct mlxsw_sp_fib *fib;
4546 	struct mlxsw_sp_vr *vr;
4547 	int err;
4548 
4549 	vr = mlxsw_sp_vr_get(mlxsw_sp, tb_id, NULL);
4550 	if (IS_ERR(vr))
4551 		return ERR_CAST(vr);
4552 	fib = mlxsw_sp_vr_fib(vr, proto);
4553 
4554 	fib_node = mlxsw_sp_fib_node_lookup(fib, addr, addr_len, prefix_len);
4555 	if (fib_node)
4556 		return fib_node;
4557 
4558 	fib_node = mlxsw_sp_fib_node_create(fib, addr, addr_len, prefix_len);
4559 	if (!fib_node) {
4560 		err = -ENOMEM;
4561 		goto err_fib_node_create;
4562 	}
4563 
4564 	err = mlxsw_sp_fib_node_init(mlxsw_sp, fib_node, fib);
4565 	if (err)
4566 		goto err_fib_node_init;
4567 
4568 	return fib_node;
4569 
4570 err_fib_node_init:
4571 	mlxsw_sp_fib_node_destroy(fib_node);
4572 err_fib_node_create:
4573 	mlxsw_sp_vr_put(mlxsw_sp, vr);
4574 	return ERR_PTR(err);
4575 }
4576 
4577 static void mlxsw_sp_fib_node_put(struct mlxsw_sp *mlxsw_sp,
4578 				  struct mlxsw_sp_fib_node *fib_node)
4579 {
4580 	struct mlxsw_sp_vr *vr = fib_node->fib->vr;
4581 
4582 	if (!list_empty(&fib_node->entry_list))
4583 		return;
4584 	mlxsw_sp_fib_node_fini(mlxsw_sp, fib_node);
4585 	mlxsw_sp_fib_node_destroy(fib_node);
4586 	mlxsw_sp_vr_put(mlxsw_sp, vr);
4587 }
4588 
4589 static struct mlxsw_sp_fib4_entry *
4590 mlxsw_sp_fib4_node_entry_find(const struct mlxsw_sp_fib_node *fib_node,
4591 			      const struct mlxsw_sp_fib4_entry *new4_entry)
4592 {
4593 	struct mlxsw_sp_fib4_entry *fib4_entry;
4594 
4595 	list_for_each_entry(fib4_entry, &fib_node->entry_list, common.list) {
4596 		if (fib4_entry->tb_id > new4_entry->tb_id)
4597 			continue;
4598 		if (fib4_entry->tb_id != new4_entry->tb_id)
4599 			break;
4600 		if (fib4_entry->tos > new4_entry->tos)
4601 			continue;
4602 		if (fib4_entry->prio >= new4_entry->prio ||
4603 		    fib4_entry->tos < new4_entry->tos)
4604 			return fib4_entry;
4605 	}
4606 
4607 	return NULL;
4608 }
4609 
4610 static int
4611 mlxsw_sp_fib4_node_list_append(struct mlxsw_sp_fib4_entry *fib4_entry,
4612 			       struct mlxsw_sp_fib4_entry *new4_entry)
4613 {
4614 	struct mlxsw_sp_fib_node *fib_node;
4615 
4616 	if (WARN_ON(!fib4_entry))
4617 		return -EINVAL;
4618 
4619 	fib_node = fib4_entry->common.fib_node;
4620 	list_for_each_entry_from(fib4_entry, &fib_node->entry_list,
4621 				 common.list) {
4622 		if (fib4_entry->tb_id != new4_entry->tb_id ||
4623 		    fib4_entry->tos != new4_entry->tos ||
4624 		    fib4_entry->prio != new4_entry->prio)
4625 			break;
4626 	}
4627 
4628 	list_add_tail(&new4_entry->common.list, &fib4_entry->common.list);
4629 	return 0;
4630 }
4631 
4632 static int
4633 mlxsw_sp_fib4_node_list_insert(struct mlxsw_sp_fib4_entry *new4_entry,
4634 			       bool replace, bool append)
4635 {
4636 	struct mlxsw_sp_fib_node *fib_node = new4_entry->common.fib_node;
4637 	struct mlxsw_sp_fib4_entry *fib4_entry;
4638 
4639 	fib4_entry = mlxsw_sp_fib4_node_entry_find(fib_node, new4_entry);
4640 
4641 	if (append)
4642 		return mlxsw_sp_fib4_node_list_append(fib4_entry, new4_entry);
4643 	if (replace && WARN_ON(!fib4_entry))
4644 		return -EINVAL;
4645 
4646 	/* Insert new entry before replaced one, so that we can later
4647 	 * remove the second.
4648 	 */
4649 	if (fib4_entry) {
4650 		list_add_tail(&new4_entry->common.list,
4651 			      &fib4_entry->common.list);
4652 	} else {
4653 		struct mlxsw_sp_fib4_entry *last;
4654 
4655 		list_for_each_entry(last, &fib_node->entry_list, common.list) {
4656 			if (new4_entry->tb_id > last->tb_id)
4657 				break;
4658 			fib4_entry = last;
4659 		}
4660 
4661 		if (fib4_entry)
4662 			list_add(&new4_entry->common.list,
4663 				 &fib4_entry->common.list);
4664 		else
4665 			list_add(&new4_entry->common.list,
4666 				 &fib_node->entry_list);
4667 	}
4668 
4669 	return 0;
4670 }
4671 
4672 static void
4673 mlxsw_sp_fib4_node_list_remove(struct mlxsw_sp_fib4_entry *fib4_entry)
4674 {
4675 	list_del(&fib4_entry->common.list);
4676 }
4677 
4678 static int mlxsw_sp_fib_node_entry_add(struct mlxsw_sp *mlxsw_sp,
4679 				       struct mlxsw_sp_fib_entry *fib_entry)
4680 {
4681 	struct mlxsw_sp_fib_node *fib_node = fib_entry->fib_node;
4682 
4683 	if (!mlxsw_sp_fib_node_entry_is_first(fib_node, fib_entry))
4684 		return 0;
4685 
4686 	/* To prevent packet loss, overwrite the previously offloaded
4687 	 * entry.
4688 	 */
4689 	if (!list_is_singular(&fib_node->entry_list)) {
4690 		enum mlxsw_reg_ralue_op op = MLXSW_REG_RALUE_OP_WRITE_DELETE;
4691 		struct mlxsw_sp_fib_entry *n = list_next_entry(fib_entry, list);
4692 
4693 		mlxsw_sp_fib_entry_offload_refresh(n, op, 0);
4694 	}
4695 
4696 	return mlxsw_sp_fib_entry_update(mlxsw_sp, fib_entry);
4697 }
4698 
4699 static void mlxsw_sp_fib_node_entry_del(struct mlxsw_sp *mlxsw_sp,
4700 					struct mlxsw_sp_fib_entry *fib_entry)
4701 {
4702 	struct mlxsw_sp_fib_node *fib_node = fib_entry->fib_node;
4703 
4704 	if (!mlxsw_sp_fib_node_entry_is_first(fib_node, fib_entry))
4705 		return;
4706 
4707 	/* Promote the next entry by overwriting the deleted entry */
4708 	if (!list_is_singular(&fib_node->entry_list)) {
4709 		struct mlxsw_sp_fib_entry *n = list_next_entry(fib_entry, list);
4710 		enum mlxsw_reg_ralue_op op = MLXSW_REG_RALUE_OP_WRITE_DELETE;
4711 
4712 		mlxsw_sp_fib_entry_update(mlxsw_sp, n);
4713 		mlxsw_sp_fib_entry_offload_refresh(fib_entry, op, 0);
4714 		return;
4715 	}
4716 
4717 	mlxsw_sp_fib_entry_del(mlxsw_sp, fib_entry);
4718 }
4719 
4720 static int mlxsw_sp_fib4_node_entry_link(struct mlxsw_sp *mlxsw_sp,
4721 					 struct mlxsw_sp_fib4_entry *fib4_entry,
4722 					 bool replace, bool append)
4723 {
4724 	int err;
4725 
4726 	err = mlxsw_sp_fib4_node_list_insert(fib4_entry, replace, append);
4727 	if (err)
4728 		return err;
4729 
4730 	err = mlxsw_sp_fib_node_entry_add(mlxsw_sp, &fib4_entry->common);
4731 	if (err)
4732 		goto err_fib_node_entry_add;
4733 
4734 	return 0;
4735 
4736 err_fib_node_entry_add:
4737 	mlxsw_sp_fib4_node_list_remove(fib4_entry);
4738 	return err;
4739 }
4740 
4741 static void
4742 mlxsw_sp_fib4_node_entry_unlink(struct mlxsw_sp *mlxsw_sp,
4743 				struct mlxsw_sp_fib4_entry *fib4_entry)
4744 {
4745 	mlxsw_sp_fib_node_entry_del(mlxsw_sp, &fib4_entry->common);
4746 	mlxsw_sp_fib4_node_list_remove(fib4_entry);
4747 
4748 	if (fib4_entry->common.type == MLXSW_SP_FIB_ENTRY_TYPE_IPIP_DECAP)
4749 		mlxsw_sp_fib_entry_decap_fini(mlxsw_sp, &fib4_entry->common);
4750 }
4751 
4752 static void mlxsw_sp_fib4_entry_replace(struct mlxsw_sp *mlxsw_sp,
4753 					struct mlxsw_sp_fib4_entry *fib4_entry,
4754 					bool replace)
4755 {
4756 	struct mlxsw_sp_fib_node *fib_node = fib4_entry->common.fib_node;
4757 	struct mlxsw_sp_fib4_entry *replaced;
4758 
4759 	if (!replace)
4760 		return;
4761 
4762 	/* We inserted the new entry before replaced one */
4763 	replaced = list_next_entry(fib4_entry, common.list);
4764 
4765 	mlxsw_sp_fib4_node_entry_unlink(mlxsw_sp, replaced);
4766 	mlxsw_sp_fib4_entry_destroy(mlxsw_sp, replaced);
4767 	mlxsw_sp_fib_node_put(mlxsw_sp, fib_node);
4768 }
4769 
4770 static int
4771 mlxsw_sp_router_fib4_add(struct mlxsw_sp *mlxsw_sp,
4772 			 const struct fib_entry_notifier_info *fen_info,
4773 			 bool replace, bool append)
4774 {
4775 	struct mlxsw_sp_fib4_entry *fib4_entry;
4776 	struct mlxsw_sp_fib_node *fib_node;
4777 	int err;
4778 
4779 	if (mlxsw_sp->router->aborted)
4780 		return 0;
4781 
4782 	fib_node = mlxsw_sp_fib_node_get(mlxsw_sp, fen_info->tb_id,
4783 					 &fen_info->dst, sizeof(fen_info->dst),
4784 					 fen_info->dst_len,
4785 					 MLXSW_SP_L3_PROTO_IPV4);
4786 	if (IS_ERR(fib_node)) {
4787 		dev_warn(mlxsw_sp->bus_info->dev, "Failed to get FIB node\n");
4788 		return PTR_ERR(fib_node);
4789 	}
4790 
4791 	fib4_entry = mlxsw_sp_fib4_entry_create(mlxsw_sp, fib_node, fen_info);
4792 	if (IS_ERR(fib4_entry)) {
4793 		dev_warn(mlxsw_sp->bus_info->dev, "Failed to create FIB entry\n");
4794 		err = PTR_ERR(fib4_entry);
4795 		goto err_fib4_entry_create;
4796 	}
4797 
4798 	err = mlxsw_sp_fib4_node_entry_link(mlxsw_sp, fib4_entry, replace,
4799 					    append);
4800 	if (err) {
4801 		dev_warn(mlxsw_sp->bus_info->dev, "Failed to link FIB entry to node\n");
4802 		goto err_fib4_node_entry_link;
4803 	}
4804 
4805 	mlxsw_sp_fib4_entry_replace(mlxsw_sp, fib4_entry, replace);
4806 
4807 	return 0;
4808 
4809 err_fib4_node_entry_link:
4810 	mlxsw_sp_fib4_entry_destroy(mlxsw_sp, fib4_entry);
4811 err_fib4_entry_create:
4812 	mlxsw_sp_fib_node_put(mlxsw_sp, fib_node);
4813 	return err;
4814 }
4815 
4816 static void mlxsw_sp_router_fib4_del(struct mlxsw_sp *mlxsw_sp,
4817 				     struct fib_entry_notifier_info *fen_info)
4818 {
4819 	struct mlxsw_sp_fib4_entry *fib4_entry;
4820 	struct mlxsw_sp_fib_node *fib_node;
4821 
4822 	if (mlxsw_sp->router->aborted)
4823 		return;
4824 
4825 	fib4_entry = mlxsw_sp_fib4_entry_lookup(mlxsw_sp, fen_info);
4826 	if (WARN_ON(!fib4_entry))
4827 		return;
4828 	fib_node = fib4_entry->common.fib_node;
4829 
4830 	mlxsw_sp_fib4_node_entry_unlink(mlxsw_sp, fib4_entry);
4831 	mlxsw_sp_fib4_entry_destroy(mlxsw_sp, fib4_entry);
4832 	mlxsw_sp_fib_node_put(mlxsw_sp, fib_node);
4833 }
4834 
4835 static bool mlxsw_sp_fib6_rt_should_ignore(const struct fib6_info *rt)
4836 {
4837 	/* Packets with link-local destination IP arriving to the router
4838 	 * are trapped to the CPU, so no need to program specific routes
4839 	 * for them.
4840 	 */
4841 	if (ipv6_addr_type(&rt->fib6_dst.addr) & IPV6_ADDR_LINKLOCAL)
4842 		return true;
4843 
4844 	/* Multicast routes aren't supported, so ignore them. Neighbour
4845 	 * Discovery packets are specifically trapped.
4846 	 */
4847 	if (ipv6_addr_type(&rt->fib6_dst.addr) & IPV6_ADDR_MULTICAST)
4848 		return true;
4849 
4850 	/* Cloned routes are irrelevant in the forwarding path. */
4851 	if (rt->fib6_flags & RTF_CACHE)
4852 		return true;
4853 
4854 	return false;
4855 }
4856 
4857 static struct mlxsw_sp_rt6 *mlxsw_sp_rt6_create(struct fib6_info *rt)
4858 {
4859 	struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
4860 
4861 	mlxsw_sp_rt6 = kzalloc(sizeof(*mlxsw_sp_rt6), GFP_KERNEL);
4862 	if (!mlxsw_sp_rt6)
4863 		return ERR_PTR(-ENOMEM);
4864 
4865 	/* In case of route replace, replaced route is deleted with
4866 	 * no notification. Take reference to prevent accessing freed
4867 	 * memory.
4868 	 */
4869 	mlxsw_sp_rt6->rt = rt;
4870 	fib6_info_hold(rt);
4871 
4872 	return mlxsw_sp_rt6;
4873 }
4874 
4875 #if IS_ENABLED(CONFIG_IPV6)
4876 static void mlxsw_sp_rt6_release(struct fib6_info *rt)
4877 {
4878 	fib6_info_release(rt);
4879 }
4880 #else
4881 static void mlxsw_sp_rt6_release(struct fib6_info *rt)
4882 {
4883 }
4884 #endif
4885 
4886 static void mlxsw_sp_rt6_destroy(struct mlxsw_sp_rt6 *mlxsw_sp_rt6)
4887 {
4888 	mlxsw_sp_rt6_release(mlxsw_sp_rt6->rt);
4889 	kfree(mlxsw_sp_rt6);
4890 }
4891 
4892 static bool mlxsw_sp_fib6_rt_can_mp(const struct fib6_info *rt)
4893 {
4894 	/* RTF_CACHE routes are ignored */
4895 	return (rt->fib6_flags & (RTF_GATEWAY | RTF_ADDRCONF)) == RTF_GATEWAY;
4896 }
4897 
4898 static struct fib6_info *
4899 mlxsw_sp_fib6_entry_rt(const struct mlxsw_sp_fib6_entry *fib6_entry)
4900 {
4901 	return list_first_entry(&fib6_entry->rt6_list, struct mlxsw_sp_rt6,
4902 				list)->rt;
4903 }
4904 
4905 static struct mlxsw_sp_fib6_entry *
4906 mlxsw_sp_fib6_node_mp_entry_find(const struct mlxsw_sp_fib_node *fib_node,
4907 				 const struct fib6_info *nrt, bool replace)
4908 {
4909 	struct mlxsw_sp_fib6_entry *fib6_entry;
4910 
4911 	if (!mlxsw_sp_fib6_rt_can_mp(nrt) || replace)
4912 		return NULL;
4913 
4914 	list_for_each_entry(fib6_entry, &fib_node->entry_list, common.list) {
4915 		struct fib6_info *rt = mlxsw_sp_fib6_entry_rt(fib6_entry);
4916 
4917 		/* RT6_TABLE_LOCAL and RT6_TABLE_MAIN share the same
4918 		 * virtual router.
4919 		 */
4920 		if (rt->fib6_table->tb6_id > nrt->fib6_table->tb6_id)
4921 			continue;
4922 		if (rt->fib6_table->tb6_id != nrt->fib6_table->tb6_id)
4923 			break;
4924 		if (rt->fib6_metric < nrt->fib6_metric)
4925 			continue;
4926 		if (rt->fib6_metric == nrt->fib6_metric &&
4927 		    mlxsw_sp_fib6_rt_can_mp(rt))
4928 			return fib6_entry;
4929 		if (rt->fib6_metric > nrt->fib6_metric)
4930 			break;
4931 	}
4932 
4933 	return NULL;
4934 }
4935 
4936 static struct mlxsw_sp_rt6 *
4937 mlxsw_sp_fib6_entry_rt_find(const struct mlxsw_sp_fib6_entry *fib6_entry,
4938 			    const struct fib6_info *rt)
4939 {
4940 	struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
4941 
4942 	list_for_each_entry(mlxsw_sp_rt6, &fib6_entry->rt6_list, list) {
4943 		if (mlxsw_sp_rt6->rt == rt)
4944 			return mlxsw_sp_rt6;
4945 	}
4946 
4947 	return NULL;
4948 }
4949 
4950 static bool mlxsw_sp_nexthop6_ipip_type(const struct mlxsw_sp *mlxsw_sp,
4951 					const struct fib6_info *rt,
4952 					enum mlxsw_sp_ipip_type *ret)
4953 {
4954 	return rt->fib6_nh.nh_dev &&
4955 	       mlxsw_sp_netdev_ipip_type(mlxsw_sp, rt->fib6_nh.nh_dev, ret);
4956 }
4957 
4958 static int mlxsw_sp_nexthop6_type_init(struct mlxsw_sp *mlxsw_sp,
4959 				       struct mlxsw_sp_nexthop_group *nh_grp,
4960 				       struct mlxsw_sp_nexthop *nh,
4961 				       const struct fib6_info *rt)
4962 {
4963 	const struct mlxsw_sp_ipip_ops *ipip_ops;
4964 	struct mlxsw_sp_ipip_entry *ipip_entry;
4965 	struct net_device *dev = rt->fib6_nh.nh_dev;
4966 	struct mlxsw_sp_rif *rif;
4967 	int err;
4968 
4969 	ipip_entry = mlxsw_sp_ipip_entry_find_by_ol_dev(mlxsw_sp, dev);
4970 	if (ipip_entry) {
4971 		ipip_ops = mlxsw_sp->router->ipip_ops_arr[ipip_entry->ipipt];
4972 		if (ipip_ops->can_offload(mlxsw_sp, dev,
4973 					  MLXSW_SP_L3_PROTO_IPV6)) {
4974 			nh->type = MLXSW_SP_NEXTHOP_TYPE_IPIP;
4975 			mlxsw_sp_nexthop_ipip_init(mlxsw_sp, nh, ipip_entry);
4976 			return 0;
4977 		}
4978 	}
4979 
4980 	nh->type = MLXSW_SP_NEXTHOP_TYPE_ETH;
4981 	rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev);
4982 	if (!rif)
4983 		return 0;
4984 	mlxsw_sp_nexthop_rif_init(nh, rif);
4985 
4986 	err = mlxsw_sp_nexthop_neigh_init(mlxsw_sp, nh);
4987 	if (err)
4988 		goto err_nexthop_neigh_init;
4989 
4990 	return 0;
4991 
4992 err_nexthop_neigh_init:
4993 	mlxsw_sp_nexthop_rif_fini(nh);
4994 	return err;
4995 }
4996 
4997 static void mlxsw_sp_nexthop6_type_fini(struct mlxsw_sp *mlxsw_sp,
4998 					struct mlxsw_sp_nexthop *nh)
4999 {
5000 	mlxsw_sp_nexthop_type_fini(mlxsw_sp, nh);
5001 }
5002 
5003 static int mlxsw_sp_nexthop6_init(struct mlxsw_sp *mlxsw_sp,
5004 				  struct mlxsw_sp_nexthop_group *nh_grp,
5005 				  struct mlxsw_sp_nexthop *nh,
5006 				  const struct fib6_info *rt)
5007 {
5008 	struct net_device *dev = rt->fib6_nh.nh_dev;
5009 
5010 	nh->nh_grp = nh_grp;
5011 	nh->nh_weight = rt->fib6_nh.nh_weight;
5012 	memcpy(&nh->gw_addr, &rt->fib6_nh.nh_gw, sizeof(nh->gw_addr));
5013 	mlxsw_sp_nexthop_counter_alloc(mlxsw_sp, nh);
5014 
5015 	list_add_tail(&nh->router_list_node, &mlxsw_sp->router->nexthop_list);
5016 
5017 	if (!dev)
5018 		return 0;
5019 	nh->ifindex = dev->ifindex;
5020 
5021 	return mlxsw_sp_nexthop6_type_init(mlxsw_sp, nh_grp, nh, rt);
5022 }
5023 
5024 static void mlxsw_sp_nexthop6_fini(struct mlxsw_sp *mlxsw_sp,
5025 				   struct mlxsw_sp_nexthop *nh)
5026 {
5027 	mlxsw_sp_nexthop6_type_fini(mlxsw_sp, nh);
5028 	list_del(&nh->router_list_node);
5029 	mlxsw_sp_nexthop_counter_free(mlxsw_sp, nh);
5030 }
5031 
5032 static bool mlxsw_sp_rt6_is_gateway(const struct mlxsw_sp *mlxsw_sp,
5033 				    const struct fib6_info *rt)
5034 {
5035 	return rt->fib6_flags & RTF_GATEWAY ||
5036 	       mlxsw_sp_nexthop6_ipip_type(mlxsw_sp, rt, NULL);
5037 }
5038 
5039 static struct mlxsw_sp_nexthop_group *
5040 mlxsw_sp_nexthop6_group_create(struct mlxsw_sp *mlxsw_sp,
5041 			       struct mlxsw_sp_fib6_entry *fib6_entry)
5042 {
5043 	struct mlxsw_sp_nexthop_group *nh_grp;
5044 	struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
5045 	struct mlxsw_sp_nexthop *nh;
5046 	size_t alloc_size;
5047 	int i = 0;
5048 	int err;
5049 
5050 	alloc_size = sizeof(*nh_grp) +
5051 		     fib6_entry->nrt6 * sizeof(struct mlxsw_sp_nexthop);
5052 	nh_grp = kzalloc(alloc_size, GFP_KERNEL);
5053 	if (!nh_grp)
5054 		return ERR_PTR(-ENOMEM);
5055 	INIT_LIST_HEAD(&nh_grp->fib_list);
5056 #if IS_ENABLED(CONFIG_IPV6)
5057 	nh_grp->neigh_tbl = &nd_tbl;
5058 #endif
5059 	mlxsw_sp_rt6 = list_first_entry(&fib6_entry->rt6_list,
5060 					struct mlxsw_sp_rt6, list);
5061 	nh_grp->gateway = mlxsw_sp_rt6_is_gateway(mlxsw_sp, mlxsw_sp_rt6->rt);
5062 	nh_grp->count = fib6_entry->nrt6;
5063 	for (i = 0; i < nh_grp->count; i++) {
5064 		struct fib6_info *rt = mlxsw_sp_rt6->rt;
5065 
5066 		nh = &nh_grp->nexthops[i];
5067 		err = mlxsw_sp_nexthop6_init(mlxsw_sp, nh_grp, nh, rt);
5068 		if (err)
5069 			goto err_nexthop6_init;
5070 		mlxsw_sp_rt6 = list_next_entry(mlxsw_sp_rt6, list);
5071 	}
5072 
5073 	err = mlxsw_sp_nexthop_group_insert(mlxsw_sp, nh_grp);
5074 	if (err)
5075 		goto err_nexthop_group_insert;
5076 
5077 	mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh_grp);
5078 	return nh_grp;
5079 
5080 err_nexthop_group_insert:
5081 err_nexthop6_init:
5082 	for (i--; i >= 0; i--) {
5083 		nh = &nh_grp->nexthops[i];
5084 		mlxsw_sp_nexthop6_fini(mlxsw_sp, nh);
5085 	}
5086 	kfree(nh_grp);
5087 	return ERR_PTR(err);
5088 }
5089 
5090 static void
5091 mlxsw_sp_nexthop6_group_destroy(struct mlxsw_sp *mlxsw_sp,
5092 				struct mlxsw_sp_nexthop_group *nh_grp)
5093 {
5094 	struct mlxsw_sp_nexthop *nh;
5095 	int i = nh_grp->count;
5096 
5097 	mlxsw_sp_nexthop_group_remove(mlxsw_sp, nh_grp);
5098 	for (i--; i >= 0; i--) {
5099 		nh = &nh_grp->nexthops[i];
5100 		mlxsw_sp_nexthop6_fini(mlxsw_sp, nh);
5101 	}
5102 	mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh_grp);
5103 	WARN_ON(nh_grp->adj_index_valid);
5104 	kfree(nh_grp);
5105 }
5106 
5107 static int mlxsw_sp_nexthop6_group_get(struct mlxsw_sp *mlxsw_sp,
5108 				       struct mlxsw_sp_fib6_entry *fib6_entry)
5109 {
5110 	struct mlxsw_sp_nexthop_group *nh_grp;
5111 
5112 	nh_grp = mlxsw_sp_nexthop6_group_lookup(mlxsw_sp, fib6_entry);
5113 	if (!nh_grp) {
5114 		nh_grp = mlxsw_sp_nexthop6_group_create(mlxsw_sp, fib6_entry);
5115 		if (IS_ERR(nh_grp))
5116 			return PTR_ERR(nh_grp);
5117 	}
5118 
5119 	list_add_tail(&fib6_entry->common.nexthop_group_node,
5120 		      &nh_grp->fib_list);
5121 	fib6_entry->common.nh_group = nh_grp;
5122 
5123 	return 0;
5124 }
5125 
5126 static void mlxsw_sp_nexthop6_group_put(struct mlxsw_sp *mlxsw_sp,
5127 					struct mlxsw_sp_fib_entry *fib_entry)
5128 {
5129 	struct mlxsw_sp_nexthop_group *nh_grp = fib_entry->nh_group;
5130 
5131 	list_del(&fib_entry->nexthop_group_node);
5132 	if (!list_empty(&nh_grp->fib_list))
5133 		return;
5134 	mlxsw_sp_nexthop6_group_destroy(mlxsw_sp, nh_grp);
5135 }
5136 
5137 static int
5138 mlxsw_sp_nexthop6_group_update(struct mlxsw_sp *mlxsw_sp,
5139 			       struct mlxsw_sp_fib6_entry *fib6_entry)
5140 {
5141 	struct mlxsw_sp_nexthop_group *old_nh_grp = fib6_entry->common.nh_group;
5142 	int err;
5143 
5144 	fib6_entry->common.nh_group = NULL;
5145 	list_del(&fib6_entry->common.nexthop_group_node);
5146 
5147 	err = mlxsw_sp_nexthop6_group_get(mlxsw_sp, fib6_entry);
5148 	if (err)
5149 		goto err_nexthop6_group_get;
5150 
5151 	/* In case this entry is offloaded, then the adjacency index
5152 	 * currently associated with it in the device's table is that
5153 	 * of the old group. Start using the new one instead.
5154 	 */
5155 	err = mlxsw_sp_fib_node_entry_add(mlxsw_sp, &fib6_entry->common);
5156 	if (err)
5157 		goto err_fib_node_entry_add;
5158 
5159 	if (list_empty(&old_nh_grp->fib_list))
5160 		mlxsw_sp_nexthop6_group_destroy(mlxsw_sp, old_nh_grp);
5161 
5162 	return 0;
5163 
5164 err_fib_node_entry_add:
5165 	mlxsw_sp_nexthop6_group_put(mlxsw_sp, &fib6_entry->common);
5166 err_nexthop6_group_get:
5167 	list_add_tail(&fib6_entry->common.nexthop_group_node,
5168 		      &old_nh_grp->fib_list);
5169 	fib6_entry->common.nh_group = old_nh_grp;
5170 	return err;
5171 }
5172 
5173 static int
5174 mlxsw_sp_fib6_entry_nexthop_add(struct mlxsw_sp *mlxsw_sp,
5175 				struct mlxsw_sp_fib6_entry *fib6_entry,
5176 				struct fib6_info *rt)
5177 {
5178 	struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
5179 	int err;
5180 
5181 	mlxsw_sp_rt6 = mlxsw_sp_rt6_create(rt);
5182 	if (IS_ERR(mlxsw_sp_rt6))
5183 		return PTR_ERR(mlxsw_sp_rt6);
5184 
5185 	list_add_tail(&mlxsw_sp_rt6->list, &fib6_entry->rt6_list);
5186 	fib6_entry->nrt6++;
5187 
5188 	err = mlxsw_sp_nexthop6_group_update(mlxsw_sp, fib6_entry);
5189 	if (err)
5190 		goto err_nexthop6_group_update;
5191 
5192 	return 0;
5193 
5194 err_nexthop6_group_update:
5195 	fib6_entry->nrt6--;
5196 	list_del(&mlxsw_sp_rt6->list);
5197 	mlxsw_sp_rt6_destroy(mlxsw_sp_rt6);
5198 	return err;
5199 }
5200 
5201 static void
5202 mlxsw_sp_fib6_entry_nexthop_del(struct mlxsw_sp *mlxsw_sp,
5203 				struct mlxsw_sp_fib6_entry *fib6_entry,
5204 				struct fib6_info *rt)
5205 {
5206 	struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
5207 
5208 	mlxsw_sp_rt6 = mlxsw_sp_fib6_entry_rt_find(fib6_entry, rt);
5209 	if (WARN_ON(!mlxsw_sp_rt6))
5210 		return;
5211 
5212 	fib6_entry->nrt6--;
5213 	list_del(&mlxsw_sp_rt6->list);
5214 	mlxsw_sp_nexthop6_group_update(mlxsw_sp, fib6_entry);
5215 	mlxsw_sp_rt6_destroy(mlxsw_sp_rt6);
5216 }
5217 
5218 static void mlxsw_sp_fib6_entry_type_set(struct mlxsw_sp *mlxsw_sp,
5219 					 struct mlxsw_sp_fib_entry *fib_entry,
5220 					 const struct fib6_info *rt)
5221 {
5222 	/* Packets hitting RTF_REJECT routes need to be discarded by the
5223 	 * stack. We can rely on their destination device not having a
5224 	 * RIF (it's the loopback device) and can thus use action type
5225 	 * local, which will cause them to be trapped with a lower
5226 	 * priority than packets that need to be locally received.
5227 	 */
5228 	if (rt->fib6_flags & (RTF_LOCAL | RTF_ANYCAST))
5229 		fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_TRAP;
5230 	else if (rt->fib6_flags & RTF_REJECT)
5231 		fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_LOCAL;
5232 	else if (mlxsw_sp_rt6_is_gateway(mlxsw_sp, rt))
5233 		fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_REMOTE;
5234 	else
5235 		fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_LOCAL;
5236 }
5237 
5238 static void
5239 mlxsw_sp_fib6_entry_rt_destroy_all(struct mlxsw_sp_fib6_entry *fib6_entry)
5240 {
5241 	struct mlxsw_sp_rt6 *mlxsw_sp_rt6, *tmp;
5242 
5243 	list_for_each_entry_safe(mlxsw_sp_rt6, tmp, &fib6_entry->rt6_list,
5244 				 list) {
5245 		fib6_entry->nrt6--;
5246 		list_del(&mlxsw_sp_rt6->list);
5247 		mlxsw_sp_rt6_destroy(mlxsw_sp_rt6);
5248 	}
5249 }
5250 
5251 static struct mlxsw_sp_fib6_entry *
5252 mlxsw_sp_fib6_entry_create(struct mlxsw_sp *mlxsw_sp,
5253 			   struct mlxsw_sp_fib_node *fib_node,
5254 			   struct fib6_info *rt)
5255 {
5256 	struct mlxsw_sp_fib6_entry *fib6_entry;
5257 	struct mlxsw_sp_fib_entry *fib_entry;
5258 	struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
5259 	int err;
5260 
5261 	fib6_entry = kzalloc(sizeof(*fib6_entry), GFP_KERNEL);
5262 	if (!fib6_entry)
5263 		return ERR_PTR(-ENOMEM);
5264 	fib_entry = &fib6_entry->common;
5265 
5266 	mlxsw_sp_rt6 = mlxsw_sp_rt6_create(rt);
5267 	if (IS_ERR(mlxsw_sp_rt6)) {
5268 		err = PTR_ERR(mlxsw_sp_rt6);
5269 		goto err_rt6_create;
5270 	}
5271 
5272 	mlxsw_sp_fib6_entry_type_set(mlxsw_sp, fib_entry, mlxsw_sp_rt6->rt);
5273 
5274 	INIT_LIST_HEAD(&fib6_entry->rt6_list);
5275 	list_add_tail(&mlxsw_sp_rt6->list, &fib6_entry->rt6_list);
5276 	fib6_entry->nrt6 = 1;
5277 	err = mlxsw_sp_nexthop6_group_get(mlxsw_sp, fib6_entry);
5278 	if (err)
5279 		goto err_nexthop6_group_get;
5280 
5281 	fib_entry->fib_node = fib_node;
5282 
5283 	return fib6_entry;
5284 
5285 err_nexthop6_group_get:
5286 	list_del(&mlxsw_sp_rt6->list);
5287 	mlxsw_sp_rt6_destroy(mlxsw_sp_rt6);
5288 err_rt6_create:
5289 	kfree(fib6_entry);
5290 	return ERR_PTR(err);
5291 }
5292 
5293 static void mlxsw_sp_fib6_entry_destroy(struct mlxsw_sp *mlxsw_sp,
5294 					struct mlxsw_sp_fib6_entry *fib6_entry)
5295 {
5296 	mlxsw_sp_nexthop6_group_put(mlxsw_sp, &fib6_entry->common);
5297 	mlxsw_sp_fib6_entry_rt_destroy_all(fib6_entry);
5298 	WARN_ON(fib6_entry->nrt6);
5299 	kfree(fib6_entry);
5300 }
5301 
5302 static struct mlxsw_sp_fib6_entry *
5303 mlxsw_sp_fib6_node_entry_find(const struct mlxsw_sp_fib_node *fib_node,
5304 			      const struct fib6_info *nrt, bool replace)
5305 {
5306 	struct mlxsw_sp_fib6_entry *fib6_entry, *fallback = NULL;
5307 
5308 	list_for_each_entry(fib6_entry, &fib_node->entry_list, common.list) {
5309 		struct fib6_info *rt = mlxsw_sp_fib6_entry_rt(fib6_entry);
5310 
5311 		if (rt->fib6_table->tb6_id > nrt->fib6_table->tb6_id)
5312 			continue;
5313 		if (rt->fib6_table->tb6_id != nrt->fib6_table->tb6_id)
5314 			break;
5315 		if (replace && rt->fib6_metric == nrt->fib6_metric) {
5316 			if (mlxsw_sp_fib6_rt_can_mp(rt) ==
5317 			    mlxsw_sp_fib6_rt_can_mp(nrt))
5318 				return fib6_entry;
5319 			if (mlxsw_sp_fib6_rt_can_mp(nrt))
5320 				fallback = fallback ?: fib6_entry;
5321 		}
5322 		if (rt->fib6_metric > nrt->fib6_metric)
5323 			return fallback ?: fib6_entry;
5324 	}
5325 
5326 	return fallback;
5327 }
5328 
5329 static int
5330 mlxsw_sp_fib6_node_list_insert(struct mlxsw_sp_fib6_entry *new6_entry,
5331 			       bool replace)
5332 {
5333 	struct mlxsw_sp_fib_node *fib_node = new6_entry->common.fib_node;
5334 	struct fib6_info *nrt = mlxsw_sp_fib6_entry_rt(new6_entry);
5335 	struct mlxsw_sp_fib6_entry *fib6_entry;
5336 
5337 	fib6_entry = mlxsw_sp_fib6_node_entry_find(fib_node, nrt, replace);
5338 
5339 	if (replace && WARN_ON(!fib6_entry))
5340 		return -EINVAL;
5341 
5342 	if (fib6_entry) {
5343 		list_add_tail(&new6_entry->common.list,
5344 			      &fib6_entry->common.list);
5345 	} else {
5346 		struct mlxsw_sp_fib6_entry *last;
5347 
5348 		list_for_each_entry(last, &fib_node->entry_list, common.list) {
5349 			struct fib6_info *rt = mlxsw_sp_fib6_entry_rt(last);
5350 
5351 			if (nrt->fib6_table->tb6_id > rt->fib6_table->tb6_id)
5352 				break;
5353 			fib6_entry = last;
5354 		}
5355 
5356 		if (fib6_entry)
5357 			list_add(&new6_entry->common.list,
5358 				 &fib6_entry->common.list);
5359 		else
5360 			list_add(&new6_entry->common.list,
5361 				 &fib_node->entry_list);
5362 	}
5363 
5364 	return 0;
5365 }
5366 
5367 static void
5368 mlxsw_sp_fib6_node_list_remove(struct mlxsw_sp_fib6_entry *fib6_entry)
5369 {
5370 	list_del(&fib6_entry->common.list);
5371 }
5372 
5373 static int mlxsw_sp_fib6_node_entry_link(struct mlxsw_sp *mlxsw_sp,
5374 					 struct mlxsw_sp_fib6_entry *fib6_entry,
5375 					 bool replace)
5376 {
5377 	int err;
5378 
5379 	err = mlxsw_sp_fib6_node_list_insert(fib6_entry, replace);
5380 	if (err)
5381 		return err;
5382 
5383 	err = mlxsw_sp_fib_node_entry_add(mlxsw_sp, &fib6_entry->common);
5384 	if (err)
5385 		goto err_fib_node_entry_add;
5386 
5387 	return 0;
5388 
5389 err_fib_node_entry_add:
5390 	mlxsw_sp_fib6_node_list_remove(fib6_entry);
5391 	return err;
5392 }
5393 
5394 static void
5395 mlxsw_sp_fib6_node_entry_unlink(struct mlxsw_sp *mlxsw_sp,
5396 				struct mlxsw_sp_fib6_entry *fib6_entry)
5397 {
5398 	mlxsw_sp_fib_node_entry_del(mlxsw_sp, &fib6_entry->common);
5399 	mlxsw_sp_fib6_node_list_remove(fib6_entry);
5400 }
5401 
5402 static struct mlxsw_sp_fib6_entry *
5403 mlxsw_sp_fib6_entry_lookup(struct mlxsw_sp *mlxsw_sp,
5404 			   const struct fib6_info *rt)
5405 {
5406 	struct mlxsw_sp_fib6_entry *fib6_entry;
5407 	struct mlxsw_sp_fib_node *fib_node;
5408 	struct mlxsw_sp_fib *fib;
5409 	struct mlxsw_sp_vr *vr;
5410 
5411 	vr = mlxsw_sp_vr_find(mlxsw_sp, rt->fib6_table->tb6_id);
5412 	if (!vr)
5413 		return NULL;
5414 	fib = mlxsw_sp_vr_fib(vr, MLXSW_SP_L3_PROTO_IPV6);
5415 
5416 	fib_node = mlxsw_sp_fib_node_lookup(fib, &rt->fib6_dst.addr,
5417 					    sizeof(rt->fib6_dst.addr),
5418 					    rt->fib6_dst.plen);
5419 	if (!fib_node)
5420 		return NULL;
5421 
5422 	list_for_each_entry(fib6_entry, &fib_node->entry_list, common.list) {
5423 		struct fib6_info *iter_rt = mlxsw_sp_fib6_entry_rt(fib6_entry);
5424 
5425 		if (rt->fib6_table->tb6_id == iter_rt->fib6_table->tb6_id &&
5426 		    rt->fib6_metric == iter_rt->fib6_metric &&
5427 		    mlxsw_sp_fib6_entry_rt_find(fib6_entry, rt))
5428 			return fib6_entry;
5429 	}
5430 
5431 	return NULL;
5432 }
5433 
5434 static void mlxsw_sp_fib6_entry_replace(struct mlxsw_sp *mlxsw_sp,
5435 					struct mlxsw_sp_fib6_entry *fib6_entry,
5436 					bool replace)
5437 {
5438 	struct mlxsw_sp_fib_node *fib_node = fib6_entry->common.fib_node;
5439 	struct mlxsw_sp_fib6_entry *replaced;
5440 
5441 	if (!replace)
5442 		return;
5443 
5444 	replaced = list_next_entry(fib6_entry, common.list);
5445 
5446 	mlxsw_sp_fib6_node_entry_unlink(mlxsw_sp, replaced);
5447 	mlxsw_sp_fib6_entry_destroy(mlxsw_sp, replaced);
5448 	mlxsw_sp_fib_node_put(mlxsw_sp, fib_node);
5449 }
5450 
5451 static int mlxsw_sp_router_fib6_add(struct mlxsw_sp *mlxsw_sp,
5452 				    struct fib6_info *rt, bool replace)
5453 {
5454 	struct mlxsw_sp_fib6_entry *fib6_entry;
5455 	struct mlxsw_sp_fib_node *fib_node;
5456 	int err;
5457 
5458 	if (mlxsw_sp->router->aborted)
5459 		return 0;
5460 
5461 	if (rt->fib6_src.plen)
5462 		return -EINVAL;
5463 
5464 	if (mlxsw_sp_fib6_rt_should_ignore(rt))
5465 		return 0;
5466 
5467 	fib_node = mlxsw_sp_fib_node_get(mlxsw_sp, rt->fib6_table->tb6_id,
5468 					 &rt->fib6_dst.addr,
5469 					 sizeof(rt->fib6_dst.addr),
5470 					 rt->fib6_dst.plen,
5471 					 MLXSW_SP_L3_PROTO_IPV6);
5472 	if (IS_ERR(fib_node))
5473 		return PTR_ERR(fib_node);
5474 
5475 	/* Before creating a new entry, try to append route to an existing
5476 	 * multipath entry.
5477 	 */
5478 	fib6_entry = mlxsw_sp_fib6_node_mp_entry_find(fib_node, rt, replace);
5479 	if (fib6_entry) {
5480 		err = mlxsw_sp_fib6_entry_nexthop_add(mlxsw_sp, fib6_entry, rt);
5481 		if (err)
5482 			goto err_fib6_entry_nexthop_add;
5483 		return 0;
5484 	}
5485 
5486 	fib6_entry = mlxsw_sp_fib6_entry_create(mlxsw_sp, fib_node, rt);
5487 	if (IS_ERR(fib6_entry)) {
5488 		err = PTR_ERR(fib6_entry);
5489 		goto err_fib6_entry_create;
5490 	}
5491 
5492 	err = mlxsw_sp_fib6_node_entry_link(mlxsw_sp, fib6_entry, replace);
5493 	if (err)
5494 		goto err_fib6_node_entry_link;
5495 
5496 	mlxsw_sp_fib6_entry_replace(mlxsw_sp, fib6_entry, replace);
5497 
5498 	return 0;
5499 
5500 err_fib6_node_entry_link:
5501 	mlxsw_sp_fib6_entry_destroy(mlxsw_sp, fib6_entry);
5502 err_fib6_entry_create:
5503 err_fib6_entry_nexthop_add:
5504 	mlxsw_sp_fib_node_put(mlxsw_sp, fib_node);
5505 	return err;
5506 }
5507 
5508 static void mlxsw_sp_router_fib6_del(struct mlxsw_sp *mlxsw_sp,
5509 				     struct fib6_info *rt)
5510 {
5511 	struct mlxsw_sp_fib6_entry *fib6_entry;
5512 	struct mlxsw_sp_fib_node *fib_node;
5513 
5514 	if (mlxsw_sp->router->aborted)
5515 		return;
5516 
5517 	if (mlxsw_sp_fib6_rt_should_ignore(rt))
5518 		return;
5519 
5520 	fib6_entry = mlxsw_sp_fib6_entry_lookup(mlxsw_sp, rt);
5521 	if (WARN_ON(!fib6_entry))
5522 		return;
5523 
5524 	/* If route is part of a multipath entry, but not the last one
5525 	 * removed, then only reduce its nexthop group.
5526 	 */
5527 	if (!list_is_singular(&fib6_entry->rt6_list)) {
5528 		mlxsw_sp_fib6_entry_nexthop_del(mlxsw_sp, fib6_entry, rt);
5529 		return;
5530 	}
5531 
5532 	fib_node = fib6_entry->common.fib_node;
5533 
5534 	mlxsw_sp_fib6_node_entry_unlink(mlxsw_sp, fib6_entry);
5535 	mlxsw_sp_fib6_entry_destroy(mlxsw_sp, fib6_entry);
5536 	mlxsw_sp_fib_node_put(mlxsw_sp, fib_node);
5537 }
5538 
5539 static int __mlxsw_sp_router_set_abort_trap(struct mlxsw_sp *mlxsw_sp,
5540 					    enum mlxsw_reg_ralxx_protocol proto,
5541 					    u8 tree_id)
5542 {
5543 	char ralta_pl[MLXSW_REG_RALTA_LEN];
5544 	char ralst_pl[MLXSW_REG_RALST_LEN];
5545 	int i, err;
5546 
5547 	mlxsw_reg_ralta_pack(ralta_pl, true, proto, tree_id);
5548 	err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralta), ralta_pl);
5549 	if (err)
5550 		return err;
5551 
5552 	mlxsw_reg_ralst_pack(ralst_pl, 0xff, tree_id);
5553 	err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralst), ralst_pl);
5554 	if (err)
5555 		return err;
5556 
5557 	for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_VRS); i++) {
5558 		struct mlxsw_sp_vr *vr = &mlxsw_sp->router->vrs[i];
5559 		char raltb_pl[MLXSW_REG_RALTB_LEN];
5560 		char ralue_pl[MLXSW_REG_RALUE_LEN];
5561 
5562 		mlxsw_reg_raltb_pack(raltb_pl, vr->id, proto, tree_id);
5563 		err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(raltb),
5564 				      raltb_pl);
5565 		if (err)
5566 			return err;
5567 
5568 		mlxsw_reg_ralue_pack(ralue_pl, proto,
5569 				     MLXSW_REG_RALUE_OP_WRITE_WRITE, vr->id, 0);
5570 		mlxsw_reg_ralue_act_ip2me_pack(ralue_pl);
5571 		err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralue),
5572 				      ralue_pl);
5573 		if (err)
5574 			return err;
5575 	}
5576 
5577 	return 0;
5578 }
5579 
5580 static struct mlxsw_sp_mr_table *
5581 mlxsw_sp_router_fibmr_family_to_table(struct mlxsw_sp_vr *vr, int family)
5582 {
5583 	if (family == RTNL_FAMILY_IPMR)
5584 		return vr->mr_table[MLXSW_SP_L3_PROTO_IPV4];
5585 	else
5586 		return vr->mr_table[MLXSW_SP_L3_PROTO_IPV6];
5587 }
5588 
5589 static int mlxsw_sp_router_fibmr_add(struct mlxsw_sp *mlxsw_sp,
5590 				     struct mfc_entry_notifier_info *men_info,
5591 				     bool replace)
5592 {
5593 	struct mlxsw_sp_mr_table *mrt;
5594 	struct mlxsw_sp_vr *vr;
5595 
5596 	if (mlxsw_sp->router->aborted)
5597 		return 0;
5598 
5599 	vr = mlxsw_sp_vr_get(mlxsw_sp, men_info->tb_id, NULL);
5600 	if (IS_ERR(vr))
5601 		return PTR_ERR(vr);
5602 
5603 	mrt = mlxsw_sp_router_fibmr_family_to_table(vr, men_info->info.family);
5604 	return mlxsw_sp_mr_route_add(mrt, men_info->mfc, replace);
5605 }
5606 
5607 static void mlxsw_sp_router_fibmr_del(struct mlxsw_sp *mlxsw_sp,
5608 				      struct mfc_entry_notifier_info *men_info)
5609 {
5610 	struct mlxsw_sp_mr_table *mrt;
5611 	struct mlxsw_sp_vr *vr;
5612 
5613 	if (mlxsw_sp->router->aborted)
5614 		return;
5615 
5616 	vr = mlxsw_sp_vr_find(mlxsw_sp, men_info->tb_id);
5617 	if (WARN_ON(!vr))
5618 		return;
5619 
5620 	mrt = mlxsw_sp_router_fibmr_family_to_table(vr, men_info->info.family);
5621 	mlxsw_sp_mr_route_del(mrt, men_info->mfc);
5622 	mlxsw_sp_vr_put(mlxsw_sp, vr);
5623 }
5624 
5625 static int
5626 mlxsw_sp_router_fibmr_vif_add(struct mlxsw_sp *mlxsw_sp,
5627 			      struct vif_entry_notifier_info *ven_info)
5628 {
5629 	struct mlxsw_sp_mr_table *mrt;
5630 	struct mlxsw_sp_rif *rif;
5631 	struct mlxsw_sp_vr *vr;
5632 
5633 	if (mlxsw_sp->router->aborted)
5634 		return 0;
5635 
5636 	vr = mlxsw_sp_vr_get(mlxsw_sp, ven_info->tb_id, NULL);
5637 	if (IS_ERR(vr))
5638 		return PTR_ERR(vr);
5639 
5640 	mrt = mlxsw_sp_router_fibmr_family_to_table(vr, ven_info->info.family);
5641 	rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, ven_info->dev);
5642 	return mlxsw_sp_mr_vif_add(mrt, ven_info->dev,
5643 				   ven_info->vif_index,
5644 				   ven_info->vif_flags, rif);
5645 }
5646 
5647 static void
5648 mlxsw_sp_router_fibmr_vif_del(struct mlxsw_sp *mlxsw_sp,
5649 			      struct vif_entry_notifier_info *ven_info)
5650 {
5651 	struct mlxsw_sp_mr_table *mrt;
5652 	struct mlxsw_sp_vr *vr;
5653 
5654 	if (mlxsw_sp->router->aborted)
5655 		return;
5656 
5657 	vr = mlxsw_sp_vr_find(mlxsw_sp, ven_info->tb_id);
5658 	if (WARN_ON(!vr))
5659 		return;
5660 
5661 	mrt = mlxsw_sp_router_fibmr_family_to_table(vr, ven_info->info.family);
5662 	mlxsw_sp_mr_vif_del(mrt, ven_info->vif_index);
5663 	mlxsw_sp_vr_put(mlxsw_sp, vr);
5664 }
5665 
5666 static int mlxsw_sp_router_set_abort_trap(struct mlxsw_sp *mlxsw_sp)
5667 {
5668 	enum mlxsw_reg_ralxx_protocol proto = MLXSW_REG_RALXX_PROTOCOL_IPV4;
5669 	int err;
5670 
5671 	err = __mlxsw_sp_router_set_abort_trap(mlxsw_sp, proto,
5672 					       MLXSW_SP_LPM_TREE_MIN);
5673 	if (err)
5674 		return err;
5675 
5676 	/* The multicast router code does not need an abort trap as by default,
5677 	 * packets that don't match any routes are trapped to the CPU.
5678 	 */
5679 
5680 	proto = MLXSW_REG_RALXX_PROTOCOL_IPV6;
5681 	return __mlxsw_sp_router_set_abort_trap(mlxsw_sp, proto,
5682 						MLXSW_SP_LPM_TREE_MIN + 1);
5683 }
5684 
5685 static void mlxsw_sp_fib4_node_flush(struct mlxsw_sp *mlxsw_sp,
5686 				     struct mlxsw_sp_fib_node *fib_node)
5687 {
5688 	struct mlxsw_sp_fib4_entry *fib4_entry, *tmp;
5689 
5690 	list_for_each_entry_safe(fib4_entry, tmp, &fib_node->entry_list,
5691 				 common.list) {
5692 		bool do_break = &tmp->common.list == &fib_node->entry_list;
5693 
5694 		mlxsw_sp_fib4_node_entry_unlink(mlxsw_sp, fib4_entry);
5695 		mlxsw_sp_fib4_entry_destroy(mlxsw_sp, fib4_entry);
5696 		mlxsw_sp_fib_node_put(mlxsw_sp, fib_node);
5697 		/* Break when entry list is empty and node was freed.
5698 		 * Otherwise, we'll access freed memory in the next
5699 		 * iteration.
5700 		 */
5701 		if (do_break)
5702 			break;
5703 	}
5704 }
5705 
5706 static void mlxsw_sp_fib6_node_flush(struct mlxsw_sp *mlxsw_sp,
5707 				     struct mlxsw_sp_fib_node *fib_node)
5708 {
5709 	struct mlxsw_sp_fib6_entry *fib6_entry, *tmp;
5710 
5711 	list_for_each_entry_safe(fib6_entry, tmp, &fib_node->entry_list,
5712 				 common.list) {
5713 		bool do_break = &tmp->common.list == &fib_node->entry_list;
5714 
5715 		mlxsw_sp_fib6_node_entry_unlink(mlxsw_sp, fib6_entry);
5716 		mlxsw_sp_fib6_entry_destroy(mlxsw_sp, fib6_entry);
5717 		mlxsw_sp_fib_node_put(mlxsw_sp, fib_node);
5718 		if (do_break)
5719 			break;
5720 	}
5721 }
5722 
5723 static void mlxsw_sp_fib_node_flush(struct mlxsw_sp *mlxsw_sp,
5724 				    struct mlxsw_sp_fib_node *fib_node)
5725 {
5726 	switch (fib_node->fib->proto) {
5727 	case MLXSW_SP_L3_PROTO_IPV4:
5728 		mlxsw_sp_fib4_node_flush(mlxsw_sp, fib_node);
5729 		break;
5730 	case MLXSW_SP_L3_PROTO_IPV6:
5731 		mlxsw_sp_fib6_node_flush(mlxsw_sp, fib_node);
5732 		break;
5733 	}
5734 }
5735 
5736 static void mlxsw_sp_vr_fib_flush(struct mlxsw_sp *mlxsw_sp,
5737 				  struct mlxsw_sp_vr *vr,
5738 				  enum mlxsw_sp_l3proto proto)
5739 {
5740 	struct mlxsw_sp_fib *fib = mlxsw_sp_vr_fib(vr, proto);
5741 	struct mlxsw_sp_fib_node *fib_node, *tmp;
5742 
5743 	list_for_each_entry_safe(fib_node, tmp, &fib->node_list, list) {
5744 		bool do_break = &tmp->list == &fib->node_list;
5745 
5746 		mlxsw_sp_fib_node_flush(mlxsw_sp, fib_node);
5747 		if (do_break)
5748 			break;
5749 	}
5750 }
5751 
5752 static void mlxsw_sp_router_fib_flush(struct mlxsw_sp *mlxsw_sp)
5753 {
5754 	int i, j;
5755 
5756 	for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_VRS); i++) {
5757 		struct mlxsw_sp_vr *vr = &mlxsw_sp->router->vrs[i];
5758 
5759 		if (!mlxsw_sp_vr_is_used(vr))
5760 			continue;
5761 
5762 		for (j = 0; j < MLXSW_SP_L3_PROTO_MAX; j++)
5763 			mlxsw_sp_mr_table_flush(vr->mr_table[j]);
5764 		mlxsw_sp_vr_fib_flush(mlxsw_sp, vr, MLXSW_SP_L3_PROTO_IPV4);
5765 
5766 		/* If virtual router was only used for IPv4, then it's no
5767 		 * longer used.
5768 		 */
5769 		if (!mlxsw_sp_vr_is_used(vr))
5770 			continue;
5771 		mlxsw_sp_vr_fib_flush(mlxsw_sp, vr, MLXSW_SP_L3_PROTO_IPV6);
5772 	}
5773 }
5774 
5775 static void mlxsw_sp_router_fib_abort(struct mlxsw_sp *mlxsw_sp)
5776 {
5777 	int err;
5778 
5779 	if (mlxsw_sp->router->aborted)
5780 		return;
5781 	dev_warn(mlxsw_sp->bus_info->dev, "FIB abort triggered. Note that FIB entries are no longer being offloaded to this device.\n");
5782 	mlxsw_sp_router_fib_flush(mlxsw_sp);
5783 	mlxsw_sp->router->aborted = true;
5784 	err = mlxsw_sp_router_set_abort_trap(mlxsw_sp);
5785 	if (err)
5786 		dev_warn(mlxsw_sp->bus_info->dev, "Failed to set abort trap.\n");
5787 }
5788 
5789 struct mlxsw_sp_fib_event_work {
5790 	struct work_struct work;
5791 	union {
5792 		struct fib6_entry_notifier_info fen6_info;
5793 		struct fib_entry_notifier_info fen_info;
5794 		struct fib_rule_notifier_info fr_info;
5795 		struct fib_nh_notifier_info fnh_info;
5796 		struct mfc_entry_notifier_info men_info;
5797 		struct vif_entry_notifier_info ven_info;
5798 	};
5799 	struct mlxsw_sp *mlxsw_sp;
5800 	unsigned long event;
5801 };
5802 
5803 static void mlxsw_sp_router_fib4_event_work(struct work_struct *work)
5804 {
5805 	struct mlxsw_sp_fib_event_work *fib_work =
5806 		container_of(work, struct mlxsw_sp_fib_event_work, work);
5807 	struct mlxsw_sp *mlxsw_sp = fib_work->mlxsw_sp;
5808 	bool replace, append;
5809 	int err;
5810 
5811 	/* Protect internal structures from changes */
5812 	rtnl_lock();
5813 	mlxsw_sp_span_respin(mlxsw_sp);
5814 
5815 	switch (fib_work->event) {
5816 	case FIB_EVENT_ENTRY_REPLACE: /* fall through */
5817 	case FIB_EVENT_ENTRY_APPEND: /* fall through */
5818 	case FIB_EVENT_ENTRY_ADD:
5819 		replace = fib_work->event == FIB_EVENT_ENTRY_REPLACE;
5820 		append = fib_work->event == FIB_EVENT_ENTRY_APPEND;
5821 		err = mlxsw_sp_router_fib4_add(mlxsw_sp, &fib_work->fen_info,
5822 					       replace, append);
5823 		if (err)
5824 			mlxsw_sp_router_fib_abort(mlxsw_sp);
5825 		fib_info_put(fib_work->fen_info.fi);
5826 		break;
5827 	case FIB_EVENT_ENTRY_DEL:
5828 		mlxsw_sp_router_fib4_del(mlxsw_sp, &fib_work->fen_info);
5829 		fib_info_put(fib_work->fen_info.fi);
5830 		break;
5831 	case FIB_EVENT_RULE_ADD:
5832 		/* if we get here, a rule was added that we do not support.
5833 		 * just do the fib_abort
5834 		 */
5835 		mlxsw_sp_router_fib_abort(mlxsw_sp);
5836 		break;
5837 	case FIB_EVENT_NH_ADD: /* fall through */
5838 	case FIB_EVENT_NH_DEL:
5839 		mlxsw_sp_nexthop4_event(mlxsw_sp, fib_work->event,
5840 					fib_work->fnh_info.fib_nh);
5841 		fib_info_put(fib_work->fnh_info.fib_nh->nh_parent);
5842 		break;
5843 	}
5844 	rtnl_unlock();
5845 	kfree(fib_work);
5846 }
5847 
5848 static void mlxsw_sp_router_fib6_event_work(struct work_struct *work)
5849 {
5850 	struct mlxsw_sp_fib_event_work *fib_work =
5851 		container_of(work, struct mlxsw_sp_fib_event_work, work);
5852 	struct mlxsw_sp *mlxsw_sp = fib_work->mlxsw_sp;
5853 	bool replace;
5854 	int err;
5855 
5856 	rtnl_lock();
5857 	mlxsw_sp_span_respin(mlxsw_sp);
5858 
5859 	switch (fib_work->event) {
5860 	case FIB_EVENT_ENTRY_REPLACE: /* fall through */
5861 	case FIB_EVENT_ENTRY_APPEND: /* fall through */
5862 	case FIB_EVENT_ENTRY_ADD:
5863 		replace = fib_work->event == FIB_EVENT_ENTRY_REPLACE;
5864 		err = mlxsw_sp_router_fib6_add(mlxsw_sp,
5865 					       fib_work->fen6_info.rt, replace);
5866 		if (err)
5867 			mlxsw_sp_router_fib_abort(mlxsw_sp);
5868 		mlxsw_sp_rt6_release(fib_work->fen6_info.rt);
5869 		break;
5870 	case FIB_EVENT_ENTRY_DEL:
5871 		mlxsw_sp_router_fib6_del(mlxsw_sp, fib_work->fen6_info.rt);
5872 		mlxsw_sp_rt6_release(fib_work->fen6_info.rt);
5873 		break;
5874 	case FIB_EVENT_RULE_ADD:
5875 		/* if we get here, a rule was added that we do not support.
5876 		 * just do the fib_abort
5877 		 */
5878 		mlxsw_sp_router_fib_abort(mlxsw_sp);
5879 		break;
5880 	}
5881 	rtnl_unlock();
5882 	kfree(fib_work);
5883 }
5884 
5885 static void mlxsw_sp_router_fibmr_event_work(struct work_struct *work)
5886 {
5887 	struct mlxsw_sp_fib_event_work *fib_work =
5888 		container_of(work, struct mlxsw_sp_fib_event_work, work);
5889 	struct mlxsw_sp *mlxsw_sp = fib_work->mlxsw_sp;
5890 	bool replace;
5891 	int err;
5892 
5893 	rtnl_lock();
5894 	switch (fib_work->event) {
5895 	case FIB_EVENT_ENTRY_REPLACE: /* fall through */
5896 	case FIB_EVENT_ENTRY_ADD:
5897 		replace = fib_work->event == FIB_EVENT_ENTRY_REPLACE;
5898 
5899 		err = mlxsw_sp_router_fibmr_add(mlxsw_sp, &fib_work->men_info,
5900 						replace);
5901 		if (err)
5902 			mlxsw_sp_router_fib_abort(mlxsw_sp);
5903 		mr_cache_put(fib_work->men_info.mfc);
5904 		break;
5905 	case FIB_EVENT_ENTRY_DEL:
5906 		mlxsw_sp_router_fibmr_del(mlxsw_sp, &fib_work->men_info);
5907 		mr_cache_put(fib_work->men_info.mfc);
5908 		break;
5909 	case FIB_EVENT_VIF_ADD:
5910 		err = mlxsw_sp_router_fibmr_vif_add(mlxsw_sp,
5911 						    &fib_work->ven_info);
5912 		if (err)
5913 			mlxsw_sp_router_fib_abort(mlxsw_sp);
5914 		dev_put(fib_work->ven_info.dev);
5915 		break;
5916 	case FIB_EVENT_VIF_DEL:
5917 		mlxsw_sp_router_fibmr_vif_del(mlxsw_sp,
5918 					      &fib_work->ven_info);
5919 		dev_put(fib_work->ven_info.dev);
5920 		break;
5921 	case FIB_EVENT_RULE_ADD:
5922 		/* if we get here, a rule was added that we do not support.
5923 		 * just do the fib_abort
5924 		 */
5925 		mlxsw_sp_router_fib_abort(mlxsw_sp);
5926 		break;
5927 	}
5928 	rtnl_unlock();
5929 	kfree(fib_work);
5930 }
5931 
5932 static void mlxsw_sp_router_fib4_event(struct mlxsw_sp_fib_event_work *fib_work,
5933 				       struct fib_notifier_info *info)
5934 {
5935 	struct fib_entry_notifier_info *fen_info;
5936 	struct fib_nh_notifier_info *fnh_info;
5937 
5938 	switch (fib_work->event) {
5939 	case FIB_EVENT_ENTRY_REPLACE: /* fall through */
5940 	case FIB_EVENT_ENTRY_APPEND: /* fall through */
5941 	case FIB_EVENT_ENTRY_ADD: /* fall through */
5942 	case FIB_EVENT_ENTRY_DEL:
5943 		fen_info = container_of(info, struct fib_entry_notifier_info,
5944 					info);
5945 		fib_work->fen_info = *fen_info;
5946 		/* Take reference on fib_info to prevent it from being
5947 		 * freed while work is queued. Release it afterwards.
5948 		 */
5949 		fib_info_hold(fib_work->fen_info.fi);
5950 		break;
5951 	case FIB_EVENT_NH_ADD: /* fall through */
5952 	case FIB_EVENT_NH_DEL:
5953 		fnh_info = container_of(info, struct fib_nh_notifier_info,
5954 					info);
5955 		fib_work->fnh_info = *fnh_info;
5956 		fib_info_hold(fib_work->fnh_info.fib_nh->nh_parent);
5957 		break;
5958 	}
5959 }
5960 
5961 static void mlxsw_sp_router_fib6_event(struct mlxsw_sp_fib_event_work *fib_work,
5962 				       struct fib_notifier_info *info)
5963 {
5964 	struct fib6_entry_notifier_info *fen6_info;
5965 
5966 	switch (fib_work->event) {
5967 	case FIB_EVENT_ENTRY_REPLACE: /* fall through */
5968 	case FIB_EVENT_ENTRY_APPEND: /* fall through */
5969 	case FIB_EVENT_ENTRY_ADD: /* fall through */
5970 	case FIB_EVENT_ENTRY_DEL:
5971 		fen6_info = container_of(info, struct fib6_entry_notifier_info,
5972 					 info);
5973 		fib_work->fen6_info = *fen6_info;
5974 		fib6_info_hold(fib_work->fen6_info.rt);
5975 		break;
5976 	}
5977 }
5978 
5979 static void
5980 mlxsw_sp_router_fibmr_event(struct mlxsw_sp_fib_event_work *fib_work,
5981 			    struct fib_notifier_info *info)
5982 {
5983 	switch (fib_work->event) {
5984 	case FIB_EVENT_ENTRY_REPLACE: /* fall through */
5985 	case FIB_EVENT_ENTRY_ADD: /* fall through */
5986 	case FIB_EVENT_ENTRY_DEL:
5987 		memcpy(&fib_work->men_info, info, sizeof(fib_work->men_info));
5988 		mr_cache_hold(fib_work->men_info.mfc);
5989 		break;
5990 	case FIB_EVENT_VIF_ADD: /* fall through */
5991 	case FIB_EVENT_VIF_DEL:
5992 		memcpy(&fib_work->ven_info, info, sizeof(fib_work->ven_info));
5993 		dev_hold(fib_work->ven_info.dev);
5994 		break;
5995 	}
5996 }
5997 
5998 static int mlxsw_sp_router_fib_rule_event(unsigned long event,
5999 					  struct fib_notifier_info *info,
6000 					  struct mlxsw_sp *mlxsw_sp)
6001 {
6002 	struct netlink_ext_ack *extack = info->extack;
6003 	struct fib_rule_notifier_info *fr_info;
6004 	struct fib_rule *rule;
6005 	int err = 0;
6006 
6007 	/* nothing to do at the moment */
6008 	if (event == FIB_EVENT_RULE_DEL)
6009 		return 0;
6010 
6011 	if (mlxsw_sp->router->aborted)
6012 		return 0;
6013 
6014 	fr_info = container_of(info, struct fib_rule_notifier_info, info);
6015 	rule = fr_info->rule;
6016 
6017 	switch (info->family) {
6018 	case AF_INET:
6019 		if (!fib4_rule_default(rule) && !rule->l3mdev)
6020 			err = -EOPNOTSUPP;
6021 		break;
6022 	case AF_INET6:
6023 		if (!fib6_rule_default(rule) && !rule->l3mdev)
6024 			err = -EOPNOTSUPP;
6025 		break;
6026 	case RTNL_FAMILY_IPMR:
6027 		if (!ipmr_rule_default(rule) && !rule->l3mdev)
6028 			err = -EOPNOTSUPP;
6029 		break;
6030 	case RTNL_FAMILY_IP6MR:
6031 		if (!ip6mr_rule_default(rule) && !rule->l3mdev)
6032 			err = -EOPNOTSUPP;
6033 		break;
6034 	}
6035 
6036 	if (err < 0)
6037 		NL_SET_ERR_MSG_MOD(extack, "FIB rules not supported");
6038 
6039 	return err;
6040 }
6041 
6042 /* Called with rcu_read_lock() */
6043 static int mlxsw_sp_router_fib_event(struct notifier_block *nb,
6044 				     unsigned long event, void *ptr)
6045 {
6046 	struct mlxsw_sp_fib_event_work *fib_work;
6047 	struct fib_notifier_info *info = ptr;
6048 	struct mlxsw_sp_router *router;
6049 	int err;
6050 
6051 	if (!net_eq(info->net, &init_net) ||
6052 	    (info->family != AF_INET && info->family != AF_INET6 &&
6053 	     info->family != RTNL_FAMILY_IPMR &&
6054 	     info->family != RTNL_FAMILY_IP6MR))
6055 		return NOTIFY_DONE;
6056 
6057 	router = container_of(nb, struct mlxsw_sp_router, fib_nb);
6058 
6059 	switch (event) {
6060 	case FIB_EVENT_RULE_ADD: /* fall through */
6061 	case FIB_EVENT_RULE_DEL:
6062 		err = mlxsw_sp_router_fib_rule_event(event, info,
6063 						     router->mlxsw_sp);
6064 		if (!err || info->extack)
6065 			return notifier_from_errno(err);
6066 		break;
6067 	case FIB_EVENT_ENTRY_ADD:
6068 		if (router->aborted) {
6069 			NL_SET_ERR_MSG_MOD(info->extack, "FIB offload was aborted. Not configuring route");
6070 			return notifier_from_errno(-EINVAL);
6071 		}
6072 		break;
6073 	}
6074 
6075 	fib_work = kzalloc(sizeof(*fib_work), GFP_ATOMIC);
6076 	if (WARN_ON(!fib_work))
6077 		return NOTIFY_BAD;
6078 
6079 	fib_work->mlxsw_sp = router->mlxsw_sp;
6080 	fib_work->event = event;
6081 
6082 	switch (info->family) {
6083 	case AF_INET:
6084 		INIT_WORK(&fib_work->work, mlxsw_sp_router_fib4_event_work);
6085 		mlxsw_sp_router_fib4_event(fib_work, info);
6086 		break;
6087 	case AF_INET6:
6088 		INIT_WORK(&fib_work->work, mlxsw_sp_router_fib6_event_work);
6089 		mlxsw_sp_router_fib6_event(fib_work, info);
6090 		break;
6091 	case RTNL_FAMILY_IP6MR:
6092 	case RTNL_FAMILY_IPMR:
6093 		INIT_WORK(&fib_work->work, mlxsw_sp_router_fibmr_event_work);
6094 		mlxsw_sp_router_fibmr_event(fib_work, info);
6095 		break;
6096 	}
6097 
6098 	mlxsw_core_schedule_work(&fib_work->work);
6099 
6100 	return NOTIFY_DONE;
6101 }
6102 
6103 struct mlxsw_sp_rif *
6104 mlxsw_sp_rif_find_by_dev(const struct mlxsw_sp *mlxsw_sp,
6105 			 const struct net_device *dev)
6106 {
6107 	int i;
6108 
6109 	for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_RIFS); i++)
6110 		if (mlxsw_sp->router->rifs[i] &&
6111 		    mlxsw_sp->router->rifs[i]->dev == dev)
6112 			return mlxsw_sp->router->rifs[i];
6113 
6114 	return NULL;
6115 }
6116 
6117 static int mlxsw_sp_router_rif_disable(struct mlxsw_sp *mlxsw_sp, u16 rif)
6118 {
6119 	char ritr_pl[MLXSW_REG_RITR_LEN];
6120 	int err;
6121 
6122 	mlxsw_reg_ritr_rif_pack(ritr_pl, rif);
6123 	err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
6124 	if (WARN_ON_ONCE(err))
6125 		return err;
6126 
6127 	mlxsw_reg_ritr_enable_set(ritr_pl, false);
6128 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
6129 }
6130 
6131 static void mlxsw_sp_router_rif_gone_sync(struct mlxsw_sp *mlxsw_sp,
6132 					  struct mlxsw_sp_rif *rif)
6133 {
6134 	mlxsw_sp_router_rif_disable(mlxsw_sp, rif->rif_index);
6135 	mlxsw_sp_nexthop_rif_gone_sync(mlxsw_sp, rif);
6136 	mlxsw_sp_neigh_rif_gone_sync(mlxsw_sp, rif);
6137 }
6138 
6139 static bool
6140 mlxsw_sp_rif_should_config(struct mlxsw_sp_rif *rif, struct net_device *dev,
6141 			   unsigned long event)
6142 {
6143 	struct inet6_dev *inet6_dev;
6144 	bool addr_list_empty = true;
6145 	struct in_device *idev;
6146 
6147 	switch (event) {
6148 	case NETDEV_UP:
6149 		return rif == NULL;
6150 	case NETDEV_DOWN:
6151 		idev = __in_dev_get_rtnl(dev);
6152 		if (idev && idev->ifa_list)
6153 			addr_list_empty = false;
6154 
6155 		inet6_dev = __in6_dev_get(dev);
6156 		if (addr_list_empty && inet6_dev &&
6157 		    !list_empty(&inet6_dev->addr_list))
6158 			addr_list_empty = false;
6159 
6160 		/* macvlans do not have a RIF, but rather piggy back on the
6161 		 * RIF of their lower device.
6162 		 */
6163 		if (netif_is_macvlan(dev) && addr_list_empty)
6164 			return true;
6165 
6166 		if (rif && addr_list_empty &&
6167 		    !netif_is_l3_slave(rif->dev))
6168 			return true;
6169 		/* It is possible we already removed the RIF ourselves
6170 		 * if it was assigned to a netdev that is now a bridge
6171 		 * or LAG slave.
6172 		 */
6173 		return false;
6174 	}
6175 
6176 	return false;
6177 }
6178 
6179 static enum mlxsw_sp_rif_type
6180 mlxsw_sp_dev_rif_type(const struct mlxsw_sp *mlxsw_sp,
6181 		      const struct net_device *dev)
6182 {
6183 	enum mlxsw_sp_fid_type type;
6184 
6185 	if (mlxsw_sp_netdev_ipip_type(mlxsw_sp, dev, NULL))
6186 		return MLXSW_SP_RIF_TYPE_IPIP_LB;
6187 
6188 	/* Otherwise RIF type is derived from the type of the underlying FID. */
6189 	if (is_vlan_dev(dev) && netif_is_bridge_master(vlan_dev_real_dev(dev)))
6190 		type = MLXSW_SP_FID_TYPE_8021Q;
6191 	else if (netif_is_bridge_master(dev) && br_vlan_enabled(dev))
6192 		type = MLXSW_SP_FID_TYPE_8021Q;
6193 	else if (netif_is_bridge_master(dev))
6194 		type = MLXSW_SP_FID_TYPE_8021D;
6195 	else
6196 		type = MLXSW_SP_FID_TYPE_RFID;
6197 
6198 	return mlxsw_sp_fid_type_rif_type(mlxsw_sp, type);
6199 }
6200 
6201 static int mlxsw_sp_rif_index_alloc(struct mlxsw_sp *mlxsw_sp, u16 *p_rif_index)
6202 {
6203 	int i;
6204 
6205 	for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_RIFS); i++) {
6206 		if (!mlxsw_sp->router->rifs[i]) {
6207 			*p_rif_index = i;
6208 			return 0;
6209 		}
6210 	}
6211 
6212 	return -ENOBUFS;
6213 }
6214 
6215 static struct mlxsw_sp_rif *mlxsw_sp_rif_alloc(size_t rif_size, u16 rif_index,
6216 					       u16 vr_id,
6217 					       struct net_device *l3_dev)
6218 {
6219 	struct mlxsw_sp_rif *rif;
6220 
6221 	rif = kzalloc(rif_size, GFP_KERNEL);
6222 	if (!rif)
6223 		return NULL;
6224 
6225 	INIT_LIST_HEAD(&rif->nexthop_list);
6226 	INIT_LIST_HEAD(&rif->neigh_list);
6227 	ether_addr_copy(rif->addr, l3_dev->dev_addr);
6228 	rif->mtu = l3_dev->mtu;
6229 	rif->vr_id = vr_id;
6230 	rif->dev = l3_dev;
6231 	rif->rif_index = rif_index;
6232 
6233 	return rif;
6234 }
6235 
6236 struct mlxsw_sp_rif *mlxsw_sp_rif_by_index(const struct mlxsw_sp *mlxsw_sp,
6237 					   u16 rif_index)
6238 {
6239 	return mlxsw_sp->router->rifs[rif_index];
6240 }
6241 
6242 u16 mlxsw_sp_rif_index(const struct mlxsw_sp_rif *rif)
6243 {
6244 	return rif->rif_index;
6245 }
6246 
6247 u16 mlxsw_sp_ipip_lb_rif_index(const struct mlxsw_sp_rif_ipip_lb *lb_rif)
6248 {
6249 	return lb_rif->common.rif_index;
6250 }
6251 
6252 u16 mlxsw_sp_ipip_lb_ul_vr_id(const struct mlxsw_sp_rif_ipip_lb *lb_rif)
6253 {
6254 	return lb_rif->ul_vr_id;
6255 }
6256 
6257 int mlxsw_sp_rif_dev_ifindex(const struct mlxsw_sp_rif *rif)
6258 {
6259 	return rif->dev->ifindex;
6260 }
6261 
6262 const struct net_device *mlxsw_sp_rif_dev(const struct mlxsw_sp_rif *rif)
6263 {
6264 	return rif->dev;
6265 }
6266 
6267 struct mlxsw_sp_fid *mlxsw_sp_rif_fid(const struct mlxsw_sp_rif *rif)
6268 {
6269 	return rif->fid;
6270 }
6271 
6272 static struct mlxsw_sp_rif *
6273 mlxsw_sp_rif_create(struct mlxsw_sp *mlxsw_sp,
6274 		    const struct mlxsw_sp_rif_params *params,
6275 		    struct netlink_ext_ack *extack)
6276 {
6277 	u32 tb_id = l3mdev_fib_table(params->dev);
6278 	const struct mlxsw_sp_rif_ops *ops;
6279 	struct mlxsw_sp_fid *fid = NULL;
6280 	enum mlxsw_sp_rif_type type;
6281 	struct mlxsw_sp_rif *rif;
6282 	struct mlxsw_sp_vr *vr;
6283 	u16 rif_index;
6284 	int i, err;
6285 
6286 	type = mlxsw_sp_dev_rif_type(mlxsw_sp, params->dev);
6287 	ops = mlxsw_sp->router->rif_ops_arr[type];
6288 
6289 	vr = mlxsw_sp_vr_get(mlxsw_sp, tb_id ? : RT_TABLE_MAIN, extack);
6290 	if (IS_ERR(vr))
6291 		return ERR_CAST(vr);
6292 	vr->rif_count++;
6293 
6294 	err = mlxsw_sp_rif_index_alloc(mlxsw_sp, &rif_index);
6295 	if (err) {
6296 		NL_SET_ERR_MSG_MOD(extack, "Exceeded number of supported router interfaces");
6297 		goto err_rif_index_alloc;
6298 	}
6299 
6300 	rif = mlxsw_sp_rif_alloc(ops->rif_size, rif_index, vr->id, params->dev);
6301 	if (!rif) {
6302 		err = -ENOMEM;
6303 		goto err_rif_alloc;
6304 	}
6305 	dev_hold(rif->dev);
6306 	rif->mlxsw_sp = mlxsw_sp;
6307 	rif->ops = ops;
6308 
6309 	if (ops->fid_get) {
6310 		fid = ops->fid_get(rif, extack);
6311 		if (IS_ERR(fid)) {
6312 			err = PTR_ERR(fid);
6313 			goto err_fid_get;
6314 		}
6315 		rif->fid = fid;
6316 	}
6317 
6318 	if (ops->setup)
6319 		ops->setup(rif, params);
6320 
6321 	err = ops->configure(rif);
6322 	if (err)
6323 		goto err_configure;
6324 
6325 	for (i = 0; i < MLXSW_SP_L3_PROTO_MAX; i++) {
6326 		err = mlxsw_sp_mr_rif_add(vr->mr_table[i], rif);
6327 		if (err)
6328 			goto err_mr_rif_add;
6329 	}
6330 
6331 	mlxsw_sp_rif_counters_alloc(rif);
6332 	mlxsw_sp->router->rifs[rif_index] = rif;
6333 
6334 	return rif;
6335 
6336 err_mr_rif_add:
6337 	for (i--; i >= 0; i--)
6338 		mlxsw_sp_mr_rif_del(vr->mr_table[i], rif);
6339 	ops->deconfigure(rif);
6340 err_configure:
6341 	if (fid)
6342 		mlxsw_sp_fid_put(fid);
6343 err_fid_get:
6344 	dev_put(rif->dev);
6345 	kfree(rif);
6346 err_rif_alloc:
6347 err_rif_index_alloc:
6348 	vr->rif_count--;
6349 	mlxsw_sp_vr_put(mlxsw_sp, vr);
6350 	return ERR_PTR(err);
6351 }
6352 
6353 static void mlxsw_sp_rif_destroy(struct mlxsw_sp_rif *rif)
6354 {
6355 	const struct mlxsw_sp_rif_ops *ops = rif->ops;
6356 	struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
6357 	struct mlxsw_sp_fid *fid = rif->fid;
6358 	struct mlxsw_sp_vr *vr;
6359 	int i;
6360 
6361 	mlxsw_sp_router_rif_gone_sync(mlxsw_sp, rif);
6362 	vr = &mlxsw_sp->router->vrs[rif->vr_id];
6363 
6364 	mlxsw_sp->router->rifs[rif->rif_index] = NULL;
6365 	mlxsw_sp_rif_counters_free(rif);
6366 	for (i = 0; i < MLXSW_SP_L3_PROTO_MAX; i++)
6367 		mlxsw_sp_mr_rif_del(vr->mr_table[i], rif);
6368 	ops->deconfigure(rif);
6369 	if (fid)
6370 		/* Loopback RIFs are not associated with a FID. */
6371 		mlxsw_sp_fid_put(fid);
6372 	dev_put(rif->dev);
6373 	kfree(rif);
6374 	vr->rif_count--;
6375 	mlxsw_sp_vr_put(mlxsw_sp, vr);
6376 }
6377 
6378 void mlxsw_sp_rif_destroy_by_dev(struct mlxsw_sp *mlxsw_sp,
6379 				 struct net_device *dev)
6380 {
6381 	struct mlxsw_sp_rif *rif;
6382 
6383 	rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev);
6384 	if (!rif)
6385 		return;
6386 	mlxsw_sp_rif_destroy(rif);
6387 }
6388 
6389 static void
6390 mlxsw_sp_rif_subport_params_init(struct mlxsw_sp_rif_params *params,
6391 				 struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan)
6392 {
6393 	struct mlxsw_sp_port *mlxsw_sp_port = mlxsw_sp_port_vlan->mlxsw_sp_port;
6394 
6395 	params->vid = mlxsw_sp_port_vlan->vid;
6396 	params->lag = mlxsw_sp_port->lagged;
6397 	if (params->lag)
6398 		params->lag_id = mlxsw_sp_port->lag_id;
6399 	else
6400 		params->system_port = mlxsw_sp_port->local_port;
6401 }
6402 
6403 static struct mlxsw_sp_rif_subport *
6404 mlxsw_sp_rif_subport_rif(const struct mlxsw_sp_rif *rif)
6405 {
6406 	return container_of(rif, struct mlxsw_sp_rif_subport, common);
6407 }
6408 
6409 static struct mlxsw_sp_rif *
6410 mlxsw_sp_rif_subport_get(struct mlxsw_sp *mlxsw_sp,
6411 			 const struct mlxsw_sp_rif_params *params,
6412 			 struct netlink_ext_ack *extack)
6413 {
6414 	struct mlxsw_sp_rif_subport *rif_subport;
6415 	struct mlxsw_sp_rif *rif;
6416 
6417 	rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, params->dev);
6418 	if (!rif)
6419 		return mlxsw_sp_rif_create(mlxsw_sp, params, extack);
6420 
6421 	rif_subport = mlxsw_sp_rif_subport_rif(rif);
6422 	refcount_inc(&rif_subport->ref_count);
6423 	return rif;
6424 }
6425 
6426 static void mlxsw_sp_rif_subport_put(struct mlxsw_sp_rif *rif)
6427 {
6428 	struct mlxsw_sp_rif_subport *rif_subport;
6429 
6430 	rif_subport = mlxsw_sp_rif_subport_rif(rif);
6431 	if (!refcount_dec_and_test(&rif_subport->ref_count))
6432 		return;
6433 
6434 	mlxsw_sp_rif_destroy(rif);
6435 }
6436 
6437 static int
6438 mlxsw_sp_port_vlan_router_join(struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan,
6439 			       struct net_device *l3_dev,
6440 			       struct netlink_ext_ack *extack)
6441 {
6442 	struct mlxsw_sp_port *mlxsw_sp_port = mlxsw_sp_port_vlan->mlxsw_sp_port;
6443 	struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
6444 	struct mlxsw_sp_rif_params params = {
6445 		.dev = l3_dev,
6446 	};
6447 	u16 vid = mlxsw_sp_port_vlan->vid;
6448 	struct mlxsw_sp_rif *rif;
6449 	struct mlxsw_sp_fid *fid;
6450 	int err;
6451 
6452 	mlxsw_sp_rif_subport_params_init(&params, mlxsw_sp_port_vlan);
6453 	rif = mlxsw_sp_rif_subport_get(mlxsw_sp, &params, extack);
6454 	if (IS_ERR(rif))
6455 		return PTR_ERR(rif);
6456 
6457 	/* FID was already created, just take a reference */
6458 	fid = rif->ops->fid_get(rif, extack);
6459 	err = mlxsw_sp_fid_port_vid_map(fid, mlxsw_sp_port, vid);
6460 	if (err)
6461 		goto err_fid_port_vid_map;
6462 
6463 	err = mlxsw_sp_port_vid_learning_set(mlxsw_sp_port, vid, false);
6464 	if (err)
6465 		goto err_port_vid_learning_set;
6466 
6467 	err = mlxsw_sp_port_vid_stp_set(mlxsw_sp_port, vid,
6468 					BR_STATE_FORWARDING);
6469 	if (err)
6470 		goto err_port_vid_stp_set;
6471 
6472 	mlxsw_sp_port_vlan->fid = fid;
6473 
6474 	return 0;
6475 
6476 err_port_vid_stp_set:
6477 	mlxsw_sp_port_vid_learning_set(mlxsw_sp_port, vid, true);
6478 err_port_vid_learning_set:
6479 	mlxsw_sp_fid_port_vid_unmap(fid, mlxsw_sp_port, vid);
6480 err_fid_port_vid_map:
6481 	mlxsw_sp_fid_put(fid);
6482 	mlxsw_sp_rif_subport_put(rif);
6483 	return err;
6484 }
6485 
6486 void
6487 mlxsw_sp_port_vlan_router_leave(struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan)
6488 {
6489 	struct mlxsw_sp_port *mlxsw_sp_port = mlxsw_sp_port_vlan->mlxsw_sp_port;
6490 	struct mlxsw_sp_fid *fid = mlxsw_sp_port_vlan->fid;
6491 	struct mlxsw_sp_rif *rif = mlxsw_sp_fid_rif(fid);
6492 	u16 vid = mlxsw_sp_port_vlan->vid;
6493 
6494 	if (WARN_ON(mlxsw_sp_fid_type(fid) != MLXSW_SP_FID_TYPE_RFID))
6495 		return;
6496 
6497 	mlxsw_sp_port_vlan->fid = NULL;
6498 	mlxsw_sp_port_vid_stp_set(mlxsw_sp_port, vid, BR_STATE_BLOCKING);
6499 	mlxsw_sp_port_vid_learning_set(mlxsw_sp_port, vid, true);
6500 	mlxsw_sp_fid_port_vid_unmap(fid, mlxsw_sp_port, vid);
6501 	mlxsw_sp_fid_put(fid);
6502 	mlxsw_sp_rif_subport_put(rif);
6503 }
6504 
6505 static int mlxsw_sp_inetaddr_port_vlan_event(struct net_device *l3_dev,
6506 					     struct net_device *port_dev,
6507 					     unsigned long event, u16 vid,
6508 					     struct netlink_ext_ack *extack)
6509 {
6510 	struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(port_dev);
6511 	struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan;
6512 
6513 	mlxsw_sp_port_vlan = mlxsw_sp_port_vlan_find_by_vid(mlxsw_sp_port, vid);
6514 	if (WARN_ON(!mlxsw_sp_port_vlan))
6515 		return -EINVAL;
6516 
6517 	switch (event) {
6518 	case NETDEV_UP:
6519 		return mlxsw_sp_port_vlan_router_join(mlxsw_sp_port_vlan,
6520 						      l3_dev, extack);
6521 	case NETDEV_DOWN:
6522 		mlxsw_sp_port_vlan_router_leave(mlxsw_sp_port_vlan);
6523 		break;
6524 	}
6525 
6526 	return 0;
6527 }
6528 
6529 static int mlxsw_sp_inetaddr_port_event(struct net_device *port_dev,
6530 					unsigned long event,
6531 					struct netlink_ext_ack *extack)
6532 {
6533 	if (netif_is_bridge_port(port_dev) ||
6534 	    netif_is_lag_port(port_dev) ||
6535 	    netif_is_ovs_port(port_dev))
6536 		return 0;
6537 
6538 	return mlxsw_sp_inetaddr_port_vlan_event(port_dev, port_dev, event,
6539 						 MLXSW_SP_DEFAULT_VID, extack);
6540 }
6541 
6542 static int __mlxsw_sp_inetaddr_lag_event(struct net_device *l3_dev,
6543 					 struct net_device *lag_dev,
6544 					 unsigned long event, u16 vid,
6545 					 struct netlink_ext_ack *extack)
6546 {
6547 	struct net_device *port_dev;
6548 	struct list_head *iter;
6549 	int err;
6550 
6551 	netdev_for_each_lower_dev(lag_dev, port_dev, iter) {
6552 		if (mlxsw_sp_port_dev_check(port_dev)) {
6553 			err = mlxsw_sp_inetaddr_port_vlan_event(l3_dev,
6554 								port_dev,
6555 								event, vid,
6556 								extack);
6557 			if (err)
6558 				return err;
6559 		}
6560 	}
6561 
6562 	return 0;
6563 }
6564 
6565 static int mlxsw_sp_inetaddr_lag_event(struct net_device *lag_dev,
6566 				       unsigned long event,
6567 				       struct netlink_ext_ack *extack)
6568 {
6569 	if (netif_is_bridge_port(lag_dev))
6570 		return 0;
6571 
6572 	return __mlxsw_sp_inetaddr_lag_event(lag_dev, lag_dev, event,
6573 					     MLXSW_SP_DEFAULT_VID, extack);
6574 }
6575 
6576 static int mlxsw_sp_inetaddr_bridge_event(struct mlxsw_sp *mlxsw_sp,
6577 					  struct net_device *l3_dev,
6578 					  unsigned long event,
6579 					  struct netlink_ext_ack *extack)
6580 {
6581 	struct mlxsw_sp_rif_params params = {
6582 		.dev = l3_dev,
6583 	};
6584 	struct mlxsw_sp_rif *rif;
6585 
6586 	switch (event) {
6587 	case NETDEV_UP:
6588 		rif = mlxsw_sp_rif_create(mlxsw_sp, &params, extack);
6589 		if (IS_ERR(rif))
6590 			return PTR_ERR(rif);
6591 		break;
6592 	case NETDEV_DOWN:
6593 		rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, l3_dev);
6594 		mlxsw_sp_rif_destroy(rif);
6595 		break;
6596 	}
6597 
6598 	return 0;
6599 }
6600 
6601 static int mlxsw_sp_inetaddr_vlan_event(struct mlxsw_sp *mlxsw_sp,
6602 					struct net_device *vlan_dev,
6603 					unsigned long event,
6604 					struct netlink_ext_ack *extack)
6605 {
6606 	struct net_device *real_dev = vlan_dev_real_dev(vlan_dev);
6607 	u16 vid = vlan_dev_vlan_id(vlan_dev);
6608 
6609 	if (netif_is_bridge_port(vlan_dev))
6610 		return 0;
6611 
6612 	if (mlxsw_sp_port_dev_check(real_dev))
6613 		return mlxsw_sp_inetaddr_port_vlan_event(vlan_dev, real_dev,
6614 							 event, vid, extack);
6615 	else if (netif_is_lag_master(real_dev))
6616 		return __mlxsw_sp_inetaddr_lag_event(vlan_dev, real_dev, event,
6617 						     vid, extack);
6618 	else if (netif_is_bridge_master(real_dev) && br_vlan_enabled(real_dev))
6619 		return mlxsw_sp_inetaddr_bridge_event(mlxsw_sp, vlan_dev, event,
6620 						      extack);
6621 
6622 	return 0;
6623 }
6624 
6625 static bool mlxsw_sp_rif_macvlan_is_vrrp4(const u8 *mac)
6626 {
6627 	u8 vrrp4[ETH_ALEN] = { 0x00, 0x00, 0x5e, 0x00, 0x01, 0x00 };
6628 	u8 mask[ETH_ALEN] = { 0xff, 0xff, 0xff, 0xff, 0xff, 0x00 };
6629 
6630 	return ether_addr_equal_masked(mac, vrrp4, mask);
6631 }
6632 
6633 static bool mlxsw_sp_rif_macvlan_is_vrrp6(const u8 *mac)
6634 {
6635 	u8 vrrp6[ETH_ALEN] = { 0x00, 0x00, 0x5e, 0x00, 0x02, 0x00 };
6636 	u8 mask[ETH_ALEN] = { 0xff, 0xff, 0xff, 0xff, 0xff, 0x00 };
6637 
6638 	return ether_addr_equal_masked(mac, vrrp6, mask);
6639 }
6640 
6641 static int mlxsw_sp_rif_vrrp_op(struct mlxsw_sp *mlxsw_sp, u16 rif_index,
6642 				const u8 *mac, bool adding)
6643 {
6644 	char ritr_pl[MLXSW_REG_RITR_LEN];
6645 	u8 vrrp_id = adding ? mac[5] : 0;
6646 	int err;
6647 
6648 	if (!mlxsw_sp_rif_macvlan_is_vrrp4(mac) &&
6649 	    !mlxsw_sp_rif_macvlan_is_vrrp6(mac))
6650 		return 0;
6651 
6652 	mlxsw_reg_ritr_rif_pack(ritr_pl, rif_index);
6653 	err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
6654 	if (err)
6655 		return err;
6656 
6657 	if (mlxsw_sp_rif_macvlan_is_vrrp4(mac))
6658 		mlxsw_reg_ritr_if_vrrp_id_ipv4_set(ritr_pl, vrrp_id);
6659 	else
6660 		mlxsw_reg_ritr_if_vrrp_id_ipv6_set(ritr_pl, vrrp_id);
6661 
6662 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
6663 }
6664 
6665 static int mlxsw_sp_rif_macvlan_add(struct mlxsw_sp *mlxsw_sp,
6666 				    const struct net_device *macvlan_dev,
6667 				    struct netlink_ext_ack *extack)
6668 {
6669 	struct macvlan_dev *vlan = netdev_priv(macvlan_dev);
6670 	struct mlxsw_sp_rif *rif;
6671 	int err;
6672 
6673 	rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, vlan->lowerdev);
6674 	if (!rif) {
6675 		NL_SET_ERR_MSG_MOD(extack, "macvlan is only supported on top of router interfaces");
6676 		return -EOPNOTSUPP;
6677 	}
6678 
6679 	err = mlxsw_sp_rif_fdb_op(mlxsw_sp, macvlan_dev->dev_addr,
6680 				  mlxsw_sp_fid_index(rif->fid), true);
6681 	if (err)
6682 		return err;
6683 
6684 	err = mlxsw_sp_rif_vrrp_op(mlxsw_sp, rif->rif_index,
6685 				   macvlan_dev->dev_addr, true);
6686 	if (err)
6687 		goto err_rif_vrrp_add;
6688 
6689 	/* Make sure the bridge driver does not have this MAC pointing at
6690 	 * some other port.
6691 	 */
6692 	if (rif->ops->fdb_del)
6693 		rif->ops->fdb_del(rif, macvlan_dev->dev_addr);
6694 
6695 	return 0;
6696 
6697 err_rif_vrrp_add:
6698 	mlxsw_sp_rif_fdb_op(mlxsw_sp, macvlan_dev->dev_addr,
6699 			    mlxsw_sp_fid_index(rif->fid), false);
6700 	return err;
6701 }
6702 
6703 void mlxsw_sp_rif_macvlan_del(struct mlxsw_sp *mlxsw_sp,
6704 			      const struct net_device *macvlan_dev)
6705 {
6706 	struct macvlan_dev *vlan = netdev_priv(macvlan_dev);
6707 	struct mlxsw_sp_rif *rif;
6708 
6709 	rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, vlan->lowerdev);
6710 	/* If we do not have a RIF, then we already took care of
6711 	 * removing the macvlan's MAC during RIF deletion.
6712 	 */
6713 	if (!rif)
6714 		return;
6715 	mlxsw_sp_rif_vrrp_op(mlxsw_sp, rif->rif_index, macvlan_dev->dev_addr,
6716 			     false);
6717 	mlxsw_sp_rif_fdb_op(mlxsw_sp, macvlan_dev->dev_addr,
6718 			    mlxsw_sp_fid_index(rif->fid), false);
6719 }
6720 
6721 static int mlxsw_sp_inetaddr_macvlan_event(struct mlxsw_sp *mlxsw_sp,
6722 					   struct net_device *macvlan_dev,
6723 					   unsigned long event,
6724 					   struct netlink_ext_ack *extack)
6725 {
6726 	switch (event) {
6727 	case NETDEV_UP:
6728 		return mlxsw_sp_rif_macvlan_add(mlxsw_sp, macvlan_dev, extack);
6729 	case NETDEV_DOWN:
6730 		mlxsw_sp_rif_macvlan_del(mlxsw_sp, macvlan_dev);
6731 		break;
6732 	}
6733 
6734 	return 0;
6735 }
6736 
6737 static int mlxsw_sp_router_port_check_rif_addr(struct mlxsw_sp *mlxsw_sp,
6738 					       struct net_device *dev,
6739 					       const unsigned char *dev_addr,
6740 					       struct netlink_ext_ack *extack)
6741 {
6742 	struct mlxsw_sp_rif *rif;
6743 	int i;
6744 
6745 	/* A RIF is not created for macvlan netdevs. Their MAC is used to
6746 	 * populate the FDB
6747 	 */
6748 	if (netif_is_macvlan(dev))
6749 		return 0;
6750 
6751 	for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_RIFS); i++) {
6752 		rif = mlxsw_sp->router->rifs[i];
6753 		if (rif && rif->dev != dev &&
6754 		    !ether_addr_equal_masked(rif->dev->dev_addr, dev_addr,
6755 					     mlxsw_sp->mac_mask)) {
6756 			NL_SET_ERR_MSG_MOD(extack, "All router interface MAC addresses must have the same prefix");
6757 			return -EINVAL;
6758 		}
6759 	}
6760 
6761 	return 0;
6762 }
6763 
6764 static int __mlxsw_sp_inetaddr_event(struct mlxsw_sp *mlxsw_sp,
6765 				     struct net_device *dev,
6766 				     unsigned long event,
6767 				     struct netlink_ext_ack *extack)
6768 {
6769 	if (mlxsw_sp_port_dev_check(dev))
6770 		return mlxsw_sp_inetaddr_port_event(dev, event, extack);
6771 	else if (netif_is_lag_master(dev))
6772 		return mlxsw_sp_inetaddr_lag_event(dev, event, extack);
6773 	else if (netif_is_bridge_master(dev))
6774 		return mlxsw_sp_inetaddr_bridge_event(mlxsw_sp, dev, event,
6775 						      extack);
6776 	else if (is_vlan_dev(dev))
6777 		return mlxsw_sp_inetaddr_vlan_event(mlxsw_sp, dev, event,
6778 						    extack);
6779 	else if (netif_is_macvlan(dev))
6780 		return mlxsw_sp_inetaddr_macvlan_event(mlxsw_sp, dev, event,
6781 						       extack);
6782 	else
6783 		return 0;
6784 }
6785 
6786 static int mlxsw_sp_inetaddr_event(struct notifier_block *nb,
6787 				   unsigned long event, void *ptr)
6788 {
6789 	struct in_ifaddr *ifa = (struct in_ifaddr *) ptr;
6790 	struct net_device *dev = ifa->ifa_dev->dev;
6791 	struct mlxsw_sp_router *router;
6792 	struct mlxsw_sp_rif *rif;
6793 	int err = 0;
6794 
6795 	/* NETDEV_UP event is handled by mlxsw_sp_inetaddr_valid_event */
6796 	if (event == NETDEV_UP)
6797 		goto out;
6798 
6799 	router = container_of(nb, struct mlxsw_sp_router, inetaddr_nb);
6800 	rif = mlxsw_sp_rif_find_by_dev(router->mlxsw_sp, dev);
6801 	if (!mlxsw_sp_rif_should_config(rif, dev, event))
6802 		goto out;
6803 
6804 	err = __mlxsw_sp_inetaddr_event(router->mlxsw_sp, dev, event, NULL);
6805 out:
6806 	return notifier_from_errno(err);
6807 }
6808 
6809 int mlxsw_sp_inetaddr_valid_event(struct notifier_block *unused,
6810 				  unsigned long event, void *ptr)
6811 {
6812 	struct in_validator_info *ivi = (struct in_validator_info *) ptr;
6813 	struct net_device *dev = ivi->ivi_dev->dev;
6814 	struct mlxsw_sp *mlxsw_sp;
6815 	struct mlxsw_sp_rif *rif;
6816 	int err = 0;
6817 
6818 	mlxsw_sp = mlxsw_sp_lower_get(dev);
6819 	if (!mlxsw_sp)
6820 		goto out;
6821 
6822 	rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev);
6823 	if (!mlxsw_sp_rif_should_config(rif, dev, event))
6824 		goto out;
6825 
6826 	err = mlxsw_sp_router_port_check_rif_addr(mlxsw_sp, dev, dev->dev_addr,
6827 						  ivi->extack);
6828 	if (err)
6829 		goto out;
6830 
6831 	err = __mlxsw_sp_inetaddr_event(mlxsw_sp, dev, event, ivi->extack);
6832 out:
6833 	return notifier_from_errno(err);
6834 }
6835 
6836 struct mlxsw_sp_inet6addr_event_work {
6837 	struct work_struct work;
6838 	struct mlxsw_sp *mlxsw_sp;
6839 	struct net_device *dev;
6840 	unsigned long event;
6841 };
6842 
6843 static void mlxsw_sp_inet6addr_event_work(struct work_struct *work)
6844 {
6845 	struct mlxsw_sp_inet6addr_event_work *inet6addr_work =
6846 		container_of(work, struct mlxsw_sp_inet6addr_event_work, work);
6847 	struct mlxsw_sp *mlxsw_sp = inet6addr_work->mlxsw_sp;
6848 	struct net_device *dev = inet6addr_work->dev;
6849 	unsigned long event = inet6addr_work->event;
6850 	struct mlxsw_sp_rif *rif;
6851 
6852 	rtnl_lock();
6853 
6854 	rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev);
6855 	if (!mlxsw_sp_rif_should_config(rif, dev, event))
6856 		goto out;
6857 
6858 	__mlxsw_sp_inetaddr_event(mlxsw_sp, dev, event, NULL);
6859 out:
6860 	rtnl_unlock();
6861 	dev_put(dev);
6862 	kfree(inet6addr_work);
6863 }
6864 
6865 /* Called with rcu_read_lock() */
6866 static int mlxsw_sp_inet6addr_event(struct notifier_block *nb,
6867 				    unsigned long event, void *ptr)
6868 {
6869 	struct inet6_ifaddr *if6 = (struct inet6_ifaddr *) ptr;
6870 	struct mlxsw_sp_inet6addr_event_work *inet6addr_work;
6871 	struct net_device *dev = if6->idev->dev;
6872 	struct mlxsw_sp_router *router;
6873 
6874 	/* NETDEV_UP event is handled by mlxsw_sp_inet6addr_valid_event */
6875 	if (event == NETDEV_UP)
6876 		return NOTIFY_DONE;
6877 
6878 	inet6addr_work = kzalloc(sizeof(*inet6addr_work), GFP_ATOMIC);
6879 	if (!inet6addr_work)
6880 		return NOTIFY_BAD;
6881 
6882 	router = container_of(nb, struct mlxsw_sp_router, inet6addr_nb);
6883 	INIT_WORK(&inet6addr_work->work, mlxsw_sp_inet6addr_event_work);
6884 	inet6addr_work->mlxsw_sp = router->mlxsw_sp;
6885 	inet6addr_work->dev = dev;
6886 	inet6addr_work->event = event;
6887 	dev_hold(dev);
6888 	mlxsw_core_schedule_work(&inet6addr_work->work);
6889 
6890 	return NOTIFY_DONE;
6891 }
6892 
6893 int mlxsw_sp_inet6addr_valid_event(struct notifier_block *unused,
6894 				   unsigned long event, void *ptr)
6895 {
6896 	struct in6_validator_info *i6vi = (struct in6_validator_info *) ptr;
6897 	struct net_device *dev = i6vi->i6vi_dev->dev;
6898 	struct mlxsw_sp *mlxsw_sp;
6899 	struct mlxsw_sp_rif *rif;
6900 	int err = 0;
6901 
6902 	mlxsw_sp = mlxsw_sp_lower_get(dev);
6903 	if (!mlxsw_sp)
6904 		goto out;
6905 
6906 	rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev);
6907 	if (!mlxsw_sp_rif_should_config(rif, dev, event))
6908 		goto out;
6909 
6910 	err = mlxsw_sp_router_port_check_rif_addr(mlxsw_sp, dev, dev->dev_addr,
6911 						  i6vi->extack);
6912 	if (err)
6913 		goto out;
6914 
6915 	err = __mlxsw_sp_inetaddr_event(mlxsw_sp, dev, event, i6vi->extack);
6916 out:
6917 	return notifier_from_errno(err);
6918 }
6919 
6920 static int mlxsw_sp_rif_edit(struct mlxsw_sp *mlxsw_sp, u16 rif_index,
6921 			     const char *mac, int mtu)
6922 {
6923 	char ritr_pl[MLXSW_REG_RITR_LEN];
6924 	int err;
6925 
6926 	mlxsw_reg_ritr_rif_pack(ritr_pl, rif_index);
6927 	err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
6928 	if (err)
6929 		return err;
6930 
6931 	mlxsw_reg_ritr_mtu_set(ritr_pl, mtu);
6932 	mlxsw_reg_ritr_if_mac_memcpy_to(ritr_pl, mac);
6933 	mlxsw_reg_ritr_op_set(ritr_pl, MLXSW_REG_RITR_RIF_CREATE);
6934 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
6935 }
6936 
6937 static int
6938 mlxsw_sp_router_port_change_event(struct mlxsw_sp *mlxsw_sp,
6939 				  struct mlxsw_sp_rif *rif)
6940 {
6941 	struct net_device *dev = rif->dev;
6942 	u16 fid_index;
6943 	int err;
6944 
6945 	fid_index = mlxsw_sp_fid_index(rif->fid);
6946 
6947 	err = mlxsw_sp_rif_fdb_op(mlxsw_sp, rif->addr, fid_index, false);
6948 	if (err)
6949 		return err;
6950 
6951 	err = mlxsw_sp_rif_edit(mlxsw_sp, rif->rif_index, dev->dev_addr,
6952 				dev->mtu);
6953 	if (err)
6954 		goto err_rif_edit;
6955 
6956 	err = mlxsw_sp_rif_fdb_op(mlxsw_sp, dev->dev_addr, fid_index, true);
6957 	if (err)
6958 		goto err_rif_fdb_op;
6959 
6960 	if (rif->mtu != dev->mtu) {
6961 		struct mlxsw_sp_vr *vr;
6962 		int i;
6963 
6964 		/* The RIF is relevant only to its mr_table instance, as unlike
6965 		 * unicast routing, in multicast routing a RIF cannot be shared
6966 		 * between several multicast routing tables.
6967 		 */
6968 		vr = &mlxsw_sp->router->vrs[rif->vr_id];
6969 		for (i = 0; i < MLXSW_SP_L3_PROTO_MAX; i++)
6970 			mlxsw_sp_mr_rif_mtu_update(vr->mr_table[i],
6971 						   rif, dev->mtu);
6972 	}
6973 
6974 	ether_addr_copy(rif->addr, dev->dev_addr);
6975 	rif->mtu = dev->mtu;
6976 
6977 	netdev_dbg(dev, "Updated RIF=%d\n", rif->rif_index);
6978 
6979 	return 0;
6980 
6981 err_rif_fdb_op:
6982 	mlxsw_sp_rif_edit(mlxsw_sp, rif->rif_index, rif->addr, rif->mtu);
6983 err_rif_edit:
6984 	mlxsw_sp_rif_fdb_op(mlxsw_sp, rif->addr, fid_index, true);
6985 	return err;
6986 }
6987 
6988 static int mlxsw_sp_router_port_pre_changeaddr_event(struct mlxsw_sp_rif *rif,
6989 			    struct netdev_notifier_pre_changeaddr_info *info)
6990 {
6991 	struct netlink_ext_ack *extack;
6992 
6993 	extack = netdev_notifier_info_to_extack(&info->info);
6994 	return mlxsw_sp_router_port_check_rif_addr(rif->mlxsw_sp, rif->dev,
6995 						   info->dev_addr, extack);
6996 }
6997 
6998 int mlxsw_sp_netdevice_router_port_event(struct net_device *dev,
6999 					 unsigned long event, void *ptr)
7000 {
7001 	struct mlxsw_sp *mlxsw_sp;
7002 	struct mlxsw_sp_rif *rif;
7003 
7004 	mlxsw_sp = mlxsw_sp_lower_get(dev);
7005 	if (!mlxsw_sp)
7006 		return 0;
7007 
7008 	rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev);
7009 	if (!rif)
7010 		return 0;
7011 
7012 	switch (event) {
7013 	case NETDEV_CHANGEMTU: /* fall through */
7014 	case NETDEV_CHANGEADDR:
7015 		return mlxsw_sp_router_port_change_event(mlxsw_sp, rif);
7016 	case NETDEV_PRE_CHANGEADDR:
7017 		return mlxsw_sp_router_port_pre_changeaddr_event(rif, ptr);
7018 	}
7019 
7020 	return 0;
7021 }
7022 
7023 static int mlxsw_sp_port_vrf_join(struct mlxsw_sp *mlxsw_sp,
7024 				  struct net_device *l3_dev,
7025 				  struct netlink_ext_ack *extack)
7026 {
7027 	struct mlxsw_sp_rif *rif;
7028 
7029 	/* If netdev is already associated with a RIF, then we need to
7030 	 * destroy it and create a new one with the new virtual router ID.
7031 	 */
7032 	rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, l3_dev);
7033 	if (rif)
7034 		__mlxsw_sp_inetaddr_event(mlxsw_sp, l3_dev, NETDEV_DOWN,
7035 					  extack);
7036 
7037 	return __mlxsw_sp_inetaddr_event(mlxsw_sp, l3_dev, NETDEV_UP, extack);
7038 }
7039 
7040 static void mlxsw_sp_port_vrf_leave(struct mlxsw_sp *mlxsw_sp,
7041 				    struct net_device *l3_dev)
7042 {
7043 	struct mlxsw_sp_rif *rif;
7044 
7045 	rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, l3_dev);
7046 	if (!rif)
7047 		return;
7048 	__mlxsw_sp_inetaddr_event(mlxsw_sp, l3_dev, NETDEV_DOWN, NULL);
7049 }
7050 
7051 int mlxsw_sp_netdevice_vrf_event(struct net_device *l3_dev, unsigned long event,
7052 				 struct netdev_notifier_changeupper_info *info)
7053 {
7054 	struct mlxsw_sp *mlxsw_sp = mlxsw_sp_lower_get(l3_dev);
7055 	int err = 0;
7056 
7057 	/* We do not create a RIF for a macvlan, but only use it to
7058 	 * direct more MAC addresses to the router.
7059 	 */
7060 	if (!mlxsw_sp || netif_is_macvlan(l3_dev))
7061 		return 0;
7062 
7063 	switch (event) {
7064 	case NETDEV_PRECHANGEUPPER:
7065 		return 0;
7066 	case NETDEV_CHANGEUPPER:
7067 		if (info->linking) {
7068 			struct netlink_ext_ack *extack;
7069 
7070 			extack = netdev_notifier_info_to_extack(&info->info);
7071 			err = mlxsw_sp_port_vrf_join(mlxsw_sp, l3_dev, extack);
7072 		} else {
7073 			mlxsw_sp_port_vrf_leave(mlxsw_sp, l3_dev);
7074 		}
7075 		break;
7076 	}
7077 
7078 	return err;
7079 }
7080 
7081 static int __mlxsw_sp_rif_macvlan_flush(struct net_device *dev, void *data)
7082 {
7083 	struct mlxsw_sp_rif *rif = data;
7084 
7085 	if (!netif_is_macvlan(dev))
7086 		return 0;
7087 
7088 	return mlxsw_sp_rif_fdb_op(rif->mlxsw_sp, dev->dev_addr,
7089 				   mlxsw_sp_fid_index(rif->fid), false);
7090 }
7091 
7092 static int mlxsw_sp_rif_macvlan_flush(struct mlxsw_sp_rif *rif)
7093 {
7094 	if (!netif_is_macvlan_port(rif->dev))
7095 		return 0;
7096 
7097 	netdev_warn(rif->dev, "Router interface is deleted. Upper macvlans will not work\n");
7098 	return netdev_walk_all_upper_dev_rcu(rif->dev,
7099 					     __mlxsw_sp_rif_macvlan_flush, rif);
7100 }
7101 
7102 static void mlxsw_sp_rif_subport_setup(struct mlxsw_sp_rif *rif,
7103 				       const struct mlxsw_sp_rif_params *params)
7104 {
7105 	struct mlxsw_sp_rif_subport *rif_subport;
7106 
7107 	rif_subport = mlxsw_sp_rif_subport_rif(rif);
7108 	refcount_set(&rif_subport->ref_count, 1);
7109 	rif_subport->vid = params->vid;
7110 	rif_subport->lag = params->lag;
7111 	if (params->lag)
7112 		rif_subport->lag_id = params->lag_id;
7113 	else
7114 		rif_subport->system_port = params->system_port;
7115 }
7116 
7117 static int mlxsw_sp_rif_subport_op(struct mlxsw_sp_rif *rif, bool enable)
7118 {
7119 	struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
7120 	struct mlxsw_sp_rif_subport *rif_subport;
7121 	char ritr_pl[MLXSW_REG_RITR_LEN];
7122 
7123 	rif_subport = mlxsw_sp_rif_subport_rif(rif);
7124 	mlxsw_reg_ritr_pack(ritr_pl, enable, MLXSW_REG_RITR_SP_IF,
7125 			    rif->rif_index, rif->vr_id, rif->dev->mtu);
7126 	mlxsw_reg_ritr_mac_pack(ritr_pl, rif->dev->dev_addr);
7127 	mlxsw_reg_ritr_sp_if_pack(ritr_pl, rif_subport->lag,
7128 				  rif_subport->lag ? rif_subport->lag_id :
7129 						     rif_subport->system_port,
7130 				  rif_subport->vid);
7131 
7132 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
7133 }
7134 
7135 static int mlxsw_sp_rif_subport_configure(struct mlxsw_sp_rif *rif)
7136 {
7137 	int err;
7138 
7139 	err = mlxsw_sp_rif_subport_op(rif, true);
7140 	if (err)
7141 		return err;
7142 
7143 	err = mlxsw_sp_rif_fdb_op(rif->mlxsw_sp, rif->dev->dev_addr,
7144 				  mlxsw_sp_fid_index(rif->fid), true);
7145 	if (err)
7146 		goto err_rif_fdb_op;
7147 
7148 	mlxsw_sp_fid_rif_set(rif->fid, rif);
7149 	return 0;
7150 
7151 err_rif_fdb_op:
7152 	mlxsw_sp_rif_subport_op(rif, false);
7153 	return err;
7154 }
7155 
7156 static void mlxsw_sp_rif_subport_deconfigure(struct mlxsw_sp_rif *rif)
7157 {
7158 	struct mlxsw_sp_fid *fid = rif->fid;
7159 
7160 	mlxsw_sp_fid_rif_set(fid, NULL);
7161 	mlxsw_sp_rif_fdb_op(rif->mlxsw_sp, rif->dev->dev_addr,
7162 			    mlxsw_sp_fid_index(fid), false);
7163 	mlxsw_sp_rif_macvlan_flush(rif);
7164 	mlxsw_sp_rif_subport_op(rif, false);
7165 }
7166 
7167 static struct mlxsw_sp_fid *
7168 mlxsw_sp_rif_subport_fid_get(struct mlxsw_sp_rif *rif,
7169 			     struct netlink_ext_ack *extack)
7170 {
7171 	return mlxsw_sp_fid_rfid_get(rif->mlxsw_sp, rif->rif_index);
7172 }
7173 
7174 static const struct mlxsw_sp_rif_ops mlxsw_sp_rif_subport_ops = {
7175 	.type			= MLXSW_SP_RIF_TYPE_SUBPORT,
7176 	.rif_size		= sizeof(struct mlxsw_sp_rif_subport),
7177 	.setup			= mlxsw_sp_rif_subport_setup,
7178 	.configure		= mlxsw_sp_rif_subport_configure,
7179 	.deconfigure		= mlxsw_sp_rif_subport_deconfigure,
7180 	.fid_get		= mlxsw_sp_rif_subport_fid_get,
7181 };
7182 
7183 static int mlxsw_sp_rif_vlan_fid_op(struct mlxsw_sp_rif *rif,
7184 				    enum mlxsw_reg_ritr_if_type type,
7185 				    u16 vid_fid, bool enable)
7186 {
7187 	struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
7188 	char ritr_pl[MLXSW_REG_RITR_LEN];
7189 
7190 	mlxsw_reg_ritr_pack(ritr_pl, enable, type, rif->rif_index, rif->vr_id,
7191 			    rif->dev->mtu);
7192 	mlxsw_reg_ritr_mac_pack(ritr_pl, rif->dev->dev_addr);
7193 	mlxsw_reg_ritr_fid_set(ritr_pl, type, vid_fid);
7194 
7195 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
7196 }
7197 
7198 u8 mlxsw_sp_router_port(const struct mlxsw_sp *mlxsw_sp)
7199 {
7200 	return mlxsw_core_max_ports(mlxsw_sp->core) + 1;
7201 }
7202 
7203 static int mlxsw_sp_rif_vlan_configure(struct mlxsw_sp_rif *rif)
7204 {
7205 	struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
7206 	u16 vid = mlxsw_sp_fid_8021q_vid(rif->fid);
7207 	int err;
7208 
7209 	err = mlxsw_sp_rif_vlan_fid_op(rif, MLXSW_REG_RITR_VLAN_IF, vid, true);
7210 	if (err)
7211 		return err;
7212 
7213 	err = mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_MC,
7214 				     mlxsw_sp_router_port(mlxsw_sp), true);
7215 	if (err)
7216 		goto err_fid_mc_flood_set;
7217 
7218 	err = mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_BC,
7219 				     mlxsw_sp_router_port(mlxsw_sp), true);
7220 	if (err)
7221 		goto err_fid_bc_flood_set;
7222 
7223 	err = mlxsw_sp_rif_fdb_op(rif->mlxsw_sp, rif->dev->dev_addr,
7224 				  mlxsw_sp_fid_index(rif->fid), true);
7225 	if (err)
7226 		goto err_rif_fdb_op;
7227 
7228 	mlxsw_sp_fid_rif_set(rif->fid, rif);
7229 	return 0;
7230 
7231 err_rif_fdb_op:
7232 	mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_BC,
7233 			       mlxsw_sp_router_port(mlxsw_sp), false);
7234 err_fid_bc_flood_set:
7235 	mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_MC,
7236 			       mlxsw_sp_router_port(mlxsw_sp), false);
7237 err_fid_mc_flood_set:
7238 	mlxsw_sp_rif_vlan_fid_op(rif, MLXSW_REG_RITR_VLAN_IF, vid, false);
7239 	return err;
7240 }
7241 
7242 static void mlxsw_sp_rif_vlan_deconfigure(struct mlxsw_sp_rif *rif)
7243 {
7244 	u16 vid = mlxsw_sp_fid_8021q_vid(rif->fid);
7245 	struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
7246 	struct mlxsw_sp_fid *fid = rif->fid;
7247 
7248 	mlxsw_sp_fid_rif_set(fid, NULL);
7249 	mlxsw_sp_rif_fdb_op(rif->mlxsw_sp, rif->dev->dev_addr,
7250 			    mlxsw_sp_fid_index(fid), false);
7251 	mlxsw_sp_rif_macvlan_flush(rif);
7252 	mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_BC,
7253 			       mlxsw_sp_router_port(mlxsw_sp), false);
7254 	mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_MC,
7255 			       mlxsw_sp_router_port(mlxsw_sp), false);
7256 	mlxsw_sp_rif_vlan_fid_op(rif, MLXSW_REG_RITR_VLAN_IF, vid, false);
7257 }
7258 
7259 static struct mlxsw_sp_fid *
7260 mlxsw_sp_rif_vlan_fid_get(struct mlxsw_sp_rif *rif,
7261 			  struct netlink_ext_ack *extack)
7262 {
7263 	struct net_device *br_dev = rif->dev;
7264 	u16 vid;
7265 	int err;
7266 
7267 	if (is_vlan_dev(rif->dev)) {
7268 		vid = vlan_dev_vlan_id(rif->dev);
7269 		br_dev = vlan_dev_real_dev(rif->dev);
7270 		if (WARN_ON(!netif_is_bridge_master(br_dev)))
7271 			return ERR_PTR(-EINVAL);
7272 	} else {
7273 		err = br_vlan_get_pvid(rif->dev, &vid);
7274 		if (err < 0 || !vid) {
7275 			NL_SET_ERR_MSG_MOD(extack, "Couldn't determine bridge PVID");
7276 			return ERR_PTR(-EINVAL);
7277 		}
7278 	}
7279 
7280 	return mlxsw_sp_bridge_fid_get(rif->mlxsw_sp, br_dev, vid, extack);
7281 }
7282 
7283 static void mlxsw_sp_rif_vlan_fdb_del(struct mlxsw_sp_rif *rif, const char *mac)
7284 {
7285 	u16 vid = mlxsw_sp_fid_8021q_vid(rif->fid);
7286 	struct switchdev_notifier_fdb_info info;
7287 	struct net_device *br_dev;
7288 	struct net_device *dev;
7289 
7290 	br_dev = is_vlan_dev(rif->dev) ? vlan_dev_real_dev(rif->dev) : rif->dev;
7291 	dev = br_fdb_find_port(br_dev, mac, vid);
7292 	if (!dev)
7293 		return;
7294 
7295 	info.addr = mac;
7296 	info.vid = vid;
7297 	call_switchdev_notifiers(SWITCHDEV_FDB_DEL_TO_BRIDGE, dev, &info.info);
7298 }
7299 
7300 static const struct mlxsw_sp_rif_ops mlxsw_sp_rif_vlan_ops = {
7301 	.type			= MLXSW_SP_RIF_TYPE_VLAN,
7302 	.rif_size		= sizeof(struct mlxsw_sp_rif),
7303 	.configure		= mlxsw_sp_rif_vlan_configure,
7304 	.deconfigure		= mlxsw_sp_rif_vlan_deconfigure,
7305 	.fid_get		= mlxsw_sp_rif_vlan_fid_get,
7306 	.fdb_del		= mlxsw_sp_rif_vlan_fdb_del,
7307 };
7308 
7309 static int mlxsw_sp_rif_fid_configure(struct mlxsw_sp_rif *rif)
7310 {
7311 	struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
7312 	u16 fid_index = mlxsw_sp_fid_index(rif->fid);
7313 	int err;
7314 
7315 	err = mlxsw_sp_rif_vlan_fid_op(rif, MLXSW_REG_RITR_FID_IF, fid_index,
7316 				       true);
7317 	if (err)
7318 		return err;
7319 
7320 	err = mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_MC,
7321 				     mlxsw_sp_router_port(mlxsw_sp), true);
7322 	if (err)
7323 		goto err_fid_mc_flood_set;
7324 
7325 	err = mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_BC,
7326 				     mlxsw_sp_router_port(mlxsw_sp), true);
7327 	if (err)
7328 		goto err_fid_bc_flood_set;
7329 
7330 	err = mlxsw_sp_rif_fdb_op(rif->mlxsw_sp, rif->dev->dev_addr,
7331 				  mlxsw_sp_fid_index(rif->fid), true);
7332 	if (err)
7333 		goto err_rif_fdb_op;
7334 
7335 	mlxsw_sp_fid_rif_set(rif->fid, rif);
7336 	return 0;
7337 
7338 err_rif_fdb_op:
7339 	mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_BC,
7340 			       mlxsw_sp_router_port(mlxsw_sp), false);
7341 err_fid_bc_flood_set:
7342 	mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_MC,
7343 			       mlxsw_sp_router_port(mlxsw_sp), false);
7344 err_fid_mc_flood_set:
7345 	mlxsw_sp_rif_vlan_fid_op(rif, MLXSW_REG_RITR_FID_IF, fid_index, false);
7346 	return err;
7347 }
7348 
7349 static void mlxsw_sp_rif_fid_deconfigure(struct mlxsw_sp_rif *rif)
7350 {
7351 	u16 fid_index = mlxsw_sp_fid_index(rif->fid);
7352 	struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
7353 	struct mlxsw_sp_fid *fid = rif->fid;
7354 
7355 	mlxsw_sp_fid_rif_set(fid, NULL);
7356 	mlxsw_sp_rif_fdb_op(rif->mlxsw_sp, rif->dev->dev_addr,
7357 			    mlxsw_sp_fid_index(fid), false);
7358 	mlxsw_sp_rif_macvlan_flush(rif);
7359 	mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_BC,
7360 			       mlxsw_sp_router_port(mlxsw_sp), false);
7361 	mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_MC,
7362 			       mlxsw_sp_router_port(mlxsw_sp), false);
7363 	mlxsw_sp_rif_vlan_fid_op(rif, MLXSW_REG_RITR_FID_IF, fid_index, false);
7364 }
7365 
7366 static struct mlxsw_sp_fid *
7367 mlxsw_sp_rif_fid_fid_get(struct mlxsw_sp_rif *rif,
7368 			 struct netlink_ext_ack *extack)
7369 {
7370 	return mlxsw_sp_bridge_fid_get(rif->mlxsw_sp, rif->dev, 0, extack);
7371 }
7372 
7373 static void mlxsw_sp_rif_fid_fdb_del(struct mlxsw_sp_rif *rif, const char *mac)
7374 {
7375 	struct switchdev_notifier_fdb_info info;
7376 	struct net_device *dev;
7377 
7378 	dev = br_fdb_find_port(rif->dev, mac, 0);
7379 	if (!dev)
7380 		return;
7381 
7382 	info.addr = mac;
7383 	info.vid = 0;
7384 	call_switchdev_notifiers(SWITCHDEV_FDB_DEL_TO_BRIDGE, dev, &info.info);
7385 }
7386 
7387 static const struct mlxsw_sp_rif_ops mlxsw_sp_rif_fid_ops = {
7388 	.type			= MLXSW_SP_RIF_TYPE_FID,
7389 	.rif_size		= sizeof(struct mlxsw_sp_rif),
7390 	.configure		= mlxsw_sp_rif_fid_configure,
7391 	.deconfigure		= mlxsw_sp_rif_fid_deconfigure,
7392 	.fid_get		= mlxsw_sp_rif_fid_fid_get,
7393 	.fdb_del		= mlxsw_sp_rif_fid_fdb_del,
7394 };
7395 
7396 static const struct mlxsw_sp_rif_ops mlxsw_sp_rif_vlan_emu_ops = {
7397 	.type			= MLXSW_SP_RIF_TYPE_VLAN,
7398 	.rif_size		= sizeof(struct mlxsw_sp_rif),
7399 	.configure		= mlxsw_sp_rif_fid_configure,
7400 	.deconfigure		= mlxsw_sp_rif_fid_deconfigure,
7401 	.fid_get		= mlxsw_sp_rif_vlan_fid_get,
7402 	.fdb_del		= mlxsw_sp_rif_vlan_fdb_del,
7403 };
7404 
7405 static struct mlxsw_sp_rif_ipip_lb *
7406 mlxsw_sp_rif_ipip_lb_rif(struct mlxsw_sp_rif *rif)
7407 {
7408 	return container_of(rif, struct mlxsw_sp_rif_ipip_lb, common);
7409 }
7410 
7411 static void
7412 mlxsw_sp_rif_ipip_lb_setup(struct mlxsw_sp_rif *rif,
7413 			   const struct mlxsw_sp_rif_params *params)
7414 {
7415 	struct mlxsw_sp_rif_params_ipip_lb *params_lb;
7416 	struct mlxsw_sp_rif_ipip_lb *rif_lb;
7417 
7418 	params_lb = container_of(params, struct mlxsw_sp_rif_params_ipip_lb,
7419 				 common);
7420 	rif_lb = mlxsw_sp_rif_ipip_lb_rif(rif);
7421 	rif_lb->lb_config = params_lb->lb_config;
7422 }
7423 
7424 static int
7425 mlxsw_sp_rif_ipip_lb_configure(struct mlxsw_sp_rif *rif)
7426 {
7427 	struct mlxsw_sp_rif_ipip_lb *lb_rif = mlxsw_sp_rif_ipip_lb_rif(rif);
7428 	u32 ul_tb_id = mlxsw_sp_ipip_dev_ul_tb_id(rif->dev);
7429 	struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
7430 	struct mlxsw_sp_vr *ul_vr;
7431 	int err;
7432 
7433 	ul_vr = mlxsw_sp_vr_get(mlxsw_sp, ul_tb_id, NULL);
7434 	if (IS_ERR(ul_vr))
7435 		return PTR_ERR(ul_vr);
7436 
7437 	err = mlxsw_sp_rif_ipip_lb_op(lb_rif, ul_vr, true);
7438 	if (err)
7439 		goto err_loopback_op;
7440 
7441 	lb_rif->ul_vr_id = ul_vr->id;
7442 	++ul_vr->rif_count;
7443 	return 0;
7444 
7445 err_loopback_op:
7446 	mlxsw_sp_vr_put(mlxsw_sp, ul_vr);
7447 	return err;
7448 }
7449 
7450 static void mlxsw_sp_rif_ipip_lb_deconfigure(struct mlxsw_sp_rif *rif)
7451 {
7452 	struct mlxsw_sp_rif_ipip_lb *lb_rif = mlxsw_sp_rif_ipip_lb_rif(rif);
7453 	struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
7454 	struct mlxsw_sp_vr *ul_vr;
7455 
7456 	ul_vr = &mlxsw_sp->router->vrs[lb_rif->ul_vr_id];
7457 	mlxsw_sp_rif_ipip_lb_op(lb_rif, ul_vr, false);
7458 
7459 	--ul_vr->rif_count;
7460 	mlxsw_sp_vr_put(mlxsw_sp, ul_vr);
7461 }
7462 
7463 static const struct mlxsw_sp_rif_ops mlxsw_sp_rif_ipip_lb_ops = {
7464 	.type			= MLXSW_SP_RIF_TYPE_IPIP_LB,
7465 	.rif_size		= sizeof(struct mlxsw_sp_rif_ipip_lb),
7466 	.setup                  = mlxsw_sp_rif_ipip_lb_setup,
7467 	.configure		= mlxsw_sp_rif_ipip_lb_configure,
7468 	.deconfigure		= mlxsw_sp_rif_ipip_lb_deconfigure,
7469 };
7470 
7471 static const struct mlxsw_sp_rif_ops *mlxsw_sp_rif_ops_arr[] = {
7472 	[MLXSW_SP_RIF_TYPE_SUBPORT]	= &mlxsw_sp_rif_subport_ops,
7473 	[MLXSW_SP_RIF_TYPE_VLAN]	= &mlxsw_sp_rif_vlan_emu_ops,
7474 	[MLXSW_SP_RIF_TYPE_FID]		= &mlxsw_sp_rif_fid_ops,
7475 	[MLXSW_SP_RIF_TYPE_IPIP_LB]	= &mlxsw_sp_rif_ipip_lb_ops,
7476 };
7477 
7478 static int mlxsw_sp_rifs_init(struct mlxsw_sp *mlxsw_sp)
7479 {
7480 	u64 max_rifs = MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_RIFS);
7481 
7482 	mlxsw_sp->router->rifs = kcalloc(max_rifs,
7483 					 sizeof(struct mlxsw_sp_rif *),
7484 					 GFP_KERNEL);
7485 	if (!mlxsw_sp->router->rifs)
7486 		return -ENOMEM;
7487 
7488 	mlxsw_sp->router->rif_ops_arr = mlxsw_sp_rif_ops_arr;
7489 
7490 	return 0;
7491 }
7492 
7493 static void mlxsw_sp_rifs_fini(struct mlxsw_sp *mlxsw_sp)
7494 {
7495 	int i;
7496 
7497 	for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_RIFS); i++)
7498 		WARN_ON_ONCE(mlxsw_sp->router->rifs[i]);
7499 
7500 	kfree(mlxsw_sp->router->rifs);
7501 }
7502 
7503 static int
7504 mlxsw_sp_ipip_config_tigcr(struct mlxsw_sp *mlxsw_sp)
7505 {
7506 	char tigcr_pl[MLXSW_REG_TIGCR_LEN];
7507 
7508 	mlxsw_reg_tigcr_pack(tigcr_pl, true, 0);
7509 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(tigcr), tigcr_pl);
7510 }
7511 
7512 static int mlxsw_sp_ipips_init(struct mlxsw_sp *mlxsw_sp)
7513 {
7514 	mlxsw_sp->router->ipip_ops_arr = mlxsw_sp_ipip_ops_arr;
7515 	INIT_LIST_HEAD(&mlxsw_sp->router->ipip_list);
7516 	return mlxsw_sp_ipip_config_tigcr(mlxsw_sp);
7517 }
7518 
7519 static void mlxsw_sp_ipips_fini(struct mlxsw_sp *mlxsw_sp)
7520 {
7521 	WARN_ON(!list_empty(&mlxsw_sp->router->ipip_list));
7522 }
7523 
7524 static void mlxsw_sp_router_fib_dump_flush(struct notifier_block *nb)
7525 {
7526 	struct mlxsw_sp_router *router;
7527 
7528 	/* Flush pending FIB notifications and then flush the device's
7529 	 * table before requesting another dump. The FIB notification
7530 	 * block is unregistered, so no need to take RTNL.
7531 	 */
7532 	mlxsw_core_flush_owq();
7533 	router = container_of(nb, struct mlxsw_sp_router, fib_nb);
7534 	mlxsw_sp_router_fib_flush(router->mlxsw_sp);
7535 }
7536 
7537 #ifdef CONFIG_IP_ROUTE_MULTIPATH
7538 static void mlxsw_sp_mp_hash_header_set(char *recr2_pl, int header)
7539 {
7540 	mlxsw_reg_recr2_outer_header_enables_set(recr2_pl, header, true);
7541 }
7542 
7543 static void mlxsw_sp_mp_hash_field_set(char *recr2_pl, int field)
7544 {
7545 	mlxsw_reg_recr2_outer_header_fields_enable_set(recr2_pl, field, true);
7546 }
7547 
7548 static void mlxsw_sp_mp4_hash_init(char *recr2_pl)
7549 {
7550 	bool only_l3 = !init_net.ipv4.sysctl_fib_multipath_hash_policy;
7551 
7552 	mlxsw_sp_mp_hash_header_set(recr2_pl,
7553 				    MLXSW_REG_RECR2_IPV4_EN_NOT_TCP_NOT_UDP);
7554 	mlxsw_sp_mp_hash_header_set(recr2_pl, MLXSW_REG_RECR2_IPV4_EN_TCP_UDP);
7555 	mlxsw_reg_recr2_ipv4_sip_enable(recr2_pl);
7556 	mlxsw_reg_recr2_ipv4_dip_enable(recr2_pl);
7557 	if (only_l3)
7558 		return;
7559 	mlxsw_sp_mp_hash_header_set(recr2_pl, MLXSW_REG_RECR2_TCP_UDP_EN_IPV4);
7560 	mlxsw_sp_mp_hash_field_set(recr2_pl, MLXSW_REG_RECR2_IPV4_PROTOCOL);
7561 	mlxsw_sp_mp_hash_field_set(recr2_pl, MLXSW_REG_RECR2_TCP_UDP_SPORT);
7562 	mlxsw_sp_mp_hash_field_set(recr2_pl, MLXSW_REG_RECR2_TCP_UDP_DPORT);
7563 }
7564 
7565 static void mlxsw_sp_mp6_hash_init(char *recr2_pl)
7566 {
7567 	bool only_l3 = !ip6_multipath_hash_policy(&init_net);
7568 
7569 	mlxsw_sp_mp_hash_header_set(recr2_pl,
7570 				    MLXSW_REG_RECR2_IPV6_EN_NOT_TCP_NOT_UDP);
7571 	mlxsw_sp_mp_hash_header_set(recr2_pl, MLXSW_REG_RECR2_IPV6_EN_TCP_UDP);
7572 	mlxsw_reg_recr2_ipv6_sip_enable(recr2_pl);
7573 	mlxsw_reg_recr2_ipv6_dip_enable(recr2_pl);
7574 	mlxsw_sp_mp_hash_field_set(recr2_pl, MLXSW_REG_RECR2_IPV6_NEXT_HEADER);
7575 	if (only_l3) {
7576 		mlxsw_sp_mp_hash_field_set(recr2_pl,
7577 					   MLXSW_REG_RECR2_IPV6_FLOW_LABEL);
7578 	} else {
7579 		mlxsw_sp_mp_hash_header_set(recr2_pl,
7580 					    MLXSW_REG_RECR2_TCP_UDP_EN_IPV6);
7581 		mlxsw_sp_mp_hash_field_set(recr2_pl,
7582 					   MLXSW_REG_RECR2_TCP_UDP_SPORT);
7583 		mlxsw_sp_mp_hash_field_set(recr2_pl,
7584 					   MLXSW_REG_RECR2_TCP_UDP_DPORT);
7585 	}
7586 }
7587 
7588 static int mlxsw_sp_mp_hash_init(struct mlxsw_sp *mlxsw_sp)
7589 {
7590 	char recr2_pl[MLXSW_REG_RECR2_LEN];
7591 	u32 seed;
7592 
7593 	get_random_bytes(&seed, sizeof(seed));
7594 	mlxsw_reg_recr2_pack(recr2_pl, seed);
7595 	mlxsw_sp_mp4_hash_init(recr2_pl);
7596 	mlxsw_sp_mp6_hash_init(recr2_pl);
7597 
7598 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(recr2), recr2_pl);
7599 }
7600 #else
7601 static int mlxsw_sp_mp_hash_init(struct mlxsw_sp *mlxsw_sp)
7602 {
7603 	return 0;
7604 }
7605 #endif
7606 
7607 static int mlxsw_sp_dscp_init(struct mlxsw_sp *mlxsw_sp)
7608 {
7609 	char rdpm_pl[MLXSW_REG_RDPM_LEN];
7610 	unsigned int i;
7611 
7612 	MLXSW_REG_ZERO(rdpm, rdpm_pl);
7613 
7614 	/* HW is determining switch priority based on DSCP-bits, but the
7615 	 * kernel is still doing that based on the ToS. Since there's a
7616 	 * mismatch in bits we need to make sure to translate the right
7617 	 * value ToS would observe, skipping the 2 least-significant ECN bits.
7618 	 */
7619 	for (i = 0; i < MLXSW_REG_RDPM_DSCP_ENTRY_REC_MAX_COUNT; i++)
7620 		mlxsw_reg_rdpm_pack(rdpm_pl, i, rt_tos2priority(i << 2));
7621 
7622 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rdpm), rdpm_pl);
7623 }
7624 
7625 static int __mlxsw_sp_router_init(struct mlxsw_sp *mlxsw_sp)
7626 {
7627 	bool usp = init_net.ipv4.sysctl_ip_fwd_update_priority;
7628 	char rgcr_pl[MLXSW_REG_RGCR_LEN];
7629 	u64 max_rifs;
7630 	int err;
7631 
7632 	if (!MLXSW_CORE_RES_VALID(mlxsw_sp->core, MAX_RIFS))
7633 		return -EIO;
7634 	max_rifs = MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_RIFS);
7635 
7636 	mlxsw_reg_rgcr_pack(rgcr_pl, true, true);
7637 	mlxsw_reg_rgcr_max_router_interfaces_set(rgcr_pl, max_rifs);
7638 	mlxsw_reg_rgcr_usp_set(rgcr_pl, usp);
7639 	err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rgcr), rgcr_pl);
7640 	if (err)
7641 		return err;
7642 	return 0;
7643 }
7644 
7645 static void __mlxsw_sp_router_fini(struct mlxsw_sp *mlxsw_sp)
7646 {
7647 	char rgcr_pl[MLXSW_REG_RGCR_LEN];
7648 
7649 	mlxsw_reg_rgcr_pack(rgcr_pl, false, false);
7650 	mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rgcr), rgcr_pl);
7651 }
7652 
7653 int mlxsw_sp_router_init(struct mlxsw_sp *mlxsw_sp)
7654 {
7655 	struct mlxsw_sp_router *router;
7656 	int err;
7657 
7658 	router = kzalloc(sizeof(*mlxsw_sp->router), GFP_KERNEL);
7659 	if (!router)
7660 		return -ENOMEM;
7661 	mlxsw_sp->router = router;
7662 	router->mlxsw_sp = mlxsw_sp;
7663 
7664 	router->inetaddr_nb.notifier_call = mlxsw_sp_inetaddr_event;
7665 	err = register_inetaddr_notifier(&router->inetaddr_nb);
7666 	if (err)
7667 		goto err_register_inetaddr_notifier;
7668 
7669 	router->inet6addr_nb.notifier_call = mlxsw_sp_inet6addr_event;
7670 	err = register_inet6addr_notifier(&router->inet6addr_nb);
7671 	if (err)
7672 		goto err_register_inet6addr_notifier;
7673 
7674 	INIT_LIST_HEAD(&mlxsw_sp->router->nexthop_neighs_list);
7675 	err = __mlxsw_sp_router_init(mlxsw_sp);
7676 	if (err)
7677 		goto err_router_init;
7678 
7679 	err = mlxsw_sp_rifs_init(mlxsw_sp);
7680 	if (err)
7681 		goto err_rifs_init;
7682 
7683 	err = mlxsw_sp_ipips_init(mlxsw_sp);
7684 	if (err)
7685 		goto err_ipips_init;
7686 
7687 	err = rhashtable_init(&mlxsw_sp->router->nexthop_ht,
7688 			      &mlxsw_sp_nexthop_ht_params);
7689 	if (err)
7690 		goto err_nexthop_ht_init;
7691 
7692 	err = rhashtable_init(&mlxsw_sp->router->nexthop_group_ht,
7693 			      &mlxsw_sp_nexthop_group_ht_params);
7694 	if (err)
7695 		goto err_nexthop_group_ht_init;
7696 
7697 	INIT_LIST_HEAD(&mlxsw_sp->router->nexthop_list);
7698 	err = mlxsw_sp_lpm_init(mlxsw_sp);
7699 	if (err)
7700 		goto err_lpm_init;
7701 
7702 	err = mlxsw_sp_mr_init(mlxsw_sp, &mlxsw_sp_mr_tcam_ops);
7703 	if (err)
7704 		goto err_mr_init;
7705 
7706 	err = mlxsw_sp_vrs_init(mlxsw_sp);
7707 	if (err)
7708 		goto err_vrs_init;
7709 
7710 	err = mlxsw_sp_neigh_init(mlxsw_sp);
7711 	if (err)
7712 		goto err_neigh_init;
7713 
7714 	mlxsw_sp->router->netevent_nb.notifier_call =
7715 		mlxsw_sp_router_netevent_event;
7716 	err = register_netevent_notifier(&mlxsw_sp->router->netevent_nb);
7717 	if (err)
7718 		goto err_register_netevent_notifier;
7719 
7720 	err = mlxsw_sp_mp_hash_init(mlxsw_sp);
7721 	if (err)
7722 		goto err_mp_hash_init;
7723 
7724 	err = mlxsw_sp_dscp_init(mlxsw_sp);
7725 	if (err)
7726 		goto err_dscp_init;
7727 
7728 	mlxsw_sp->router->fib_nb.notifier_call = mlxsw_sp_router_fib_event;
7729 	err = register_fib_notifier(&mlxsw_sp->router->fib_nb,
7730 				    mlxsw_sp_router_fib_dump_flush);
7731 	if (err)
7732 		goto err_register_fib_notifier;
7733 
7734 	return 0;
7735 
7736 err_register_fib_notifier:
7737 err_dscp_init:
7738 err_mp_hash_init:
7739 	unregister_netevent_notifier(&mlxsw_sp->router->netevent_nb);
7740 err_register_netevent_notifier:
7741 	mlxsw_sp_neigh_fini(mlxsw_sp);
7742 err_neigh_init:
7743 	mlxsw_sp_vrs_fini(mlxsw_sp);
7744 err_vrs_init:
7745 	mlxsw_sp_mr_fini(mlxsw_sp);
7746 err_mr_init:
7747 	mlxsw_sp_lpm_fini(mlxsw_sp);
7748 err_lpm_init:
7749 	rhashtable_destroy(&mlxsw_sp->router->nexthop_group_ht);
7750 err_nexthop_group_ht_init:
7751 	rhashtable_destroy(&mlxsw_sp->router->nexthop_ht);
7752 err_nexthop_ht_init:
7753 	mlxsw_sp_ipips_fini(mlxsw_sp);
7754 err_ipips_init:
7755 	mlxsw_sp_rifs_fini(mlxsw_sp);
7756 err_rifs_init:
7757 	__mlxsw_sp_router_fini(mlxsw_sp);
7758 err_router_init:
7759 	unregister_inet6addr_notifier(&router->inet6addr_nb);
7760 err_register_inet6addr_notifier:
7761 	unregister_inetaddr_notifier(&router->inetaddr_nb);
7762 err_register_inetaddr_notifier:
7763 	kfree(mlxsw_sp->router);
7764 	return err;
7765 }
7766 
7767 void mlxsw_sp_router_fini(struct mlxsw_sp *mlxsw_sp)
7768 {
7769 	unregister_fib_notifier(&mlxsw_sp->router->fib_nb);
7770 	unregister_netevent_notifier(&mlxsw_sp->router->netevent_nb);
7771 	mlxsw_sp_neigh_fini(mlxsw_sp);
7772 	mlxsw_sp_vrs_fini(mlxsw_sp);
7773 	mlxsw_sp_mr_fini(mlxsw_sp);
7774 	mlxsw_sp_lpm_fini(mlxsw_sp);
7775 	rhashtable_destroy(&mlxsw_sp->router->nexthop_group_ht);
7776 	rhashtable_destroy(&mlxsw_sp->router->nexthop_ht);
7777 	mlxsw_sp_ipips_fini(mlxsw_sp);
7778 	mlxsw_sp_rifs_fini(mlxsw_sp);
7779 	__mlxsw_sp_router_fini(mlxsw_sp);
7780 	unregister_inet6addr_notifier(&mlxsw_sp->router->inet6addr_nb);
7781 	unregister_inetaddr_notifier(&mlxsw_sp->router->inetaddr_nb);
7782 	kfree(mlxsw_sp->router);
7783 }
7784