xref: /openbmc/linux/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c (revision 05cf4fe738242183f1237f1b3a28b4479348c0a1)
1 // SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0
2 /* Copyright (c) 2016-2018 Mellanox Technologies. All rights reserved */
3 
4 #include <linux/kernel.h>
5 #include <linux/types.h>
6 #include <linux/rhashtable.h>
7 #include <linux/bitops.h>
8 #include <linux/in6.h>
9 #include <linux/notifier.h>
10 #include <linux/inetdevice.h>
11 #include <linux/netdevice.h>
12 #include <linux/if_bridge.h>
13 #include <linux/socket.h>
14 #include <linux/route.h>
15 #include <linux/gcd.h>
16 #include <linux/random.h>
17 #include <linux/if_macvlan.h>
18 #include <net/netevent.h>
19 #include <net/neighbour.h>
20 #include <net/arp.h>
21 #include <net/ip_fib.h>
22 #include <net/ip6_fib.h>
23 #include <net/fib_rules.h>
24 #include <net/ip_tunnels.h>
25 #include <net/l3mdev.h>
26 #include <net/addrconf.h>
27 #include <net/ndisc.h>
28 #include <net/ipv6.h>
29 #include <net/fib_notifier.h>
30 #include <net/switchdev.h>
31 
32 #include "spectrum.h"
33 #include "core.h"
34 #include "reg.h"
35 #include "spectrum_cnt.h"
36 #include "spectrum_dpipe.h"
37 #include "spectrum_ipip.h"
38 #include "spectrum_mr.h"
39 #include "spectrum_mr_tcam.h"
40 #include "spectrum_router.h"
41 #include "spectrum_span.h"
42 
43 struct mlxsw_sp_fib;
44 struct mlxsw_sp_vr;
45 struct mlxsw_sp_lpm_tree;
46 struct mlxsw_sp_rif_ops;
47 
48 struct mlxsw_sp_router {
49 	struct mlxsw_sp *mlxsw_sp;
50 	struct mlxsw_sp_rif **rifs;
51 	struct mlxsw_sp_vr *vrs;
52 	struct rhashtable neigh_ht;
53 	struct rhashtable nexthop_group_ht;
54 	struct rhashtable nexthop_ht;
55 	struct list_head nexthop_list;
56 	struct {
57 		/* One tree for each protocol: IPv4 and IPv6 */
58 		struct mlxsw_sp_lpm_tree *proto_trees[2];
59 		struct mlxsw_sp_lpm_tree *trees;
60 		unsigned int tree_count;
61 	} lpm;
62 	struct {
63 		struct delayed_work dw;
64 		unsigned long interval;	/* ms */
65 	} neighs_update;
66 	struct delayed_work nexthop_probe_dw;
67 #define MLXSW_SP_UNRESOLVED_NH_PROBE_INTERVAL 5000 /* ms */
68 	struct list_head nexthop_neighs_list;
69 	struct list_head ipip_list;
70 	bool aborted;
71 	struct notifier_block fib_nb;
72 	struct notifier_block netevent_nb;
73 	const struct mlxsw_sp_rif_ops **rif_ops_arr;
74 	const struct mlxsw_sp_ipip_ops **ipip_ops_arr;
75 };
76 
77 struct mlxsw_sp_rif {
78 	struct list_head nexthop_list;
79 	struct list_head neigh_list;
80 	struct net_device *dev;
81 	struct mlxsw_sp_fid *fid;
82 	unsigned char addr[ETH_ALEN];
83 	int mtu;
84 	u16 rif_index;
85 	u16 vr_id;
86 	const struct mlxsw_sp_rif_ops *ops;
87 	struct mlxsw_sp *mlxsw_sp;
88 
89 	unsigned int counter_ingress;
90 	bool counter_ingress_valid;
91 	unsigned int counter_egress;
92 	bool counter_egress_valid;
93 };
94 
95 struct mlxsw_sp_rif_params {
96 	struct net_device *dev;
97 	union {
98 		u16 system_port;
99 		u16 lag_id;
100 	};
101 	u16 vid;
102 	bool lag;
103 };
104 
105 struct mlxsw_sp_rif_subport {
106 	struct mlxsw_sp_rif common;
107 	union {
108 		u16 system_port;
109 		u16 lag_id;
110 	};
111 	u16 vid;
112 	bool lag;
113 };
114 
115 struct mlxsw_sp_rif_ipip_lb {
116 	struct mlxsw_sp_rif common;
117 	struct mlxsw_sp_rif_ipip_lb_config lb_config;
118 	u16 ul_vr_id; /* Reserved for Spectrum-2. */
119 };
120 
121 struct mlxsw_sp_rif_params_ipip_lb {
122 	struct mlxsw_sp_rif_params common;
123 	struct mlxsw_sp_rif_ipip_lb_config lb_config;
124 };
125 
126 struct mlxsw_sp_rif_ops {
127 	enum mlxsw_sp_rif_type type;
128 	size_t rif_size;
129 
130 	void (*setup)(struct mlxsw_sp_rif *rif,
131 		      const struct mlxsw_sp_rif_params *params);
132 	int (*configure)(struct mlxsw_sp_rif *rif);
133 	void (*deconfigure)(struct mlxsw_sp_rif *rif);
134 	struct mlxsw_sp_fid * (*fid_get)(struct mlxsw_sp_rif *rif,
135 					 struct netlink_ext_ack *extack);
136 	void (*fdb_del)(struct mlxsw_sp_rif *rif, const char *mac);
137 };
138 
139 static void mlxsw_sp_lpm_tree_hold(struct mlxsw_sp_lpm_tree *lpm_tree);
140 static void mlxsw_sp_lpm_tree_put(struct mlxsw_sp *mlxsw_sp,
141 				  struct mlxsw_sp_lpm_tree *lpm_tree);
142 static int mlxsw_sp_vr_lpm_tree_bind(struct mlxsw_sp *mlxsw_sp,
143 				     const struct mlxsw_sp_fib *fib,
144 				     u8 tree_id);
145 static int mlxsw_sp_vr_lpm_tree_unbind(struct mlxsw_sp *mlxsw_sp,
146 				       const struct mlxsw_sp_fib *fib);
147 
148 static unsigned int *
149 mlxsw_sp_rif_p_counter_get(struct mlxsw_sp_rif *rif,
150 			   enum mlxsw_sp_rif_counter_dir dir)
151 {
152 	switch (dir) {
153 	case MLXSW_SP_RIF_COUNTER_EGRESS:
154 		return &rif->counter_egress;
155 	case MLXSW_SP_RIF_COUNTER_INGRESS:
156 		return &rif->counter_ingress;
157 	}
158 	return NULL;
159 }
160 
161 static bool
162 mlxsw_sp_rif_counter_valid_get(struct mlxsw_sp_rif *rif,
163 			       enum mlxsw_sp_rif_counter_dir dir)
164 {
165 	switch (dir) {
166 	case MLXSW_SP_RIF_COUNTER_EGRESS:
167 		return rif->counter_egress_valid;
168 	case MLXSW_SP_RIF_COUNTER_INGRESS:
169 		return rif->counter_ingress_valid;
170 	}
171 	return false;
172 }
173 
174 static void
175 mlxsw_sp_rif_counter_valid_set(struct mlxsw_sp_rif *rif,
176 			       enum mlxsw_sp_rif_counter_dir dir,
177 			       bool valid)
178 {
179 	switch (dir) {
180 	case MLXSW_SP_RIF_COUNTER_EGRESS:
181 		rif->counter_egress_valid = valid;
182 		break;
183 	case MLXSW_SP_RIF_COUNTER_INGRESS:
184 		rif->counter_ingress_valid = valid;
185 		break;
186 	}
187 }
188 
189 static int mlxsw_sp_rif_counter_edit(struct mlxsw_sp *mlxsw_sp, u16 rif_index,
190 				     unsigned int counter_index, bool enable,
191 				     enum mlxsw_sp_rif_counter_dir dir)
192 {
193 	char ritr_pl[MLXSW_REG_RITR_LEN];
194 	bool is_egress = false;
195 	int err;
196 
197 	if (dir == MLXSW_SP_RIF_COUNTER_EGRESS)
198 		is_egress = true;
199 	mlxsw_reg_ritr_rif_pack(ritr_pl, rif_index);
200 	err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
201 	if (err)
202 		return err;
203 
204 	mlxsw_reg_ritr_counter_pack(ritr_pl, counter_index, enable,
205 				    is_egress);
206 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
207 }
208 
209 int mlxsw_sp_rif_counter_value_get(struct mlxsw_sp *mlxsw_sp,
210 				   struct mlxsw_sp_rif *rif,
211 				   enum mlxsw_sp_rif_counter_dir dir, u64 *cnt)
212 {
213 	char ricnt_pl[MLXSW_REG_RICNT_LEN];
214 	unsigned int *p_counter_index;
215 	bool valid;
216 	int err;
217 
218 	valid = mlxsw_sp_rif_counter_valid_get(rif, dir);
219 	if (!valid)
220 		return -EINVAL;
221 
222 	p_counter_index = mlxsw_sp_rif_p_counter_get(rif, dir);
223 	if (!p_counter_index)
224 		return -EINVAL;
225 	mlxsw_reg_ricnt_pack(ricnt_pl, *p_counter_index,
226 			     MLXSW_REG_RICNT_OPCODE_NOP);
227 	err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(ricnt), ricnt_pl);
228 	if (err)
229 		return err;
230 	*cnt = mlxsw_reg_ricnt_good_unicast_packets_get(ricnt_pl);
231 	return 0;
232 }
233 
234 static int mlxsw_sp_rif_counter_clear(struct mlxsw_sp *mlxsw_sp,
235 				      unsigned int counter_index)
236 {
237 	char ricnt_pl[MLXSW_REG_RICNT_LEN];
238 
239 	mlxsw_reg_ricnt_pack(ricnt_pl, counter_index,
240 			     MLXSW_REG_RICNT_OPCODE_CLEAR);
241 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ricnt), ricnt_pl);
242 }
243 
244 int mlxsw_sp_rif_counter_alloc(struct mlxsw_sp *mlxsw_sp,
245 			       struct mlxsw_sp_rif *rif,
246 			       enum mlxsw_sp_rif_counter_dir dir)
247 {
248 	unsigned int *p_counter_index;
249 	int err;
250 
251 	p_counter_index = mlxsw_sp_rif_p_counter_get(rif, dir);
252 	if (!p_counter_index)
253 		return -EINVAL;
254 	err = mlxsw_sp_counter_alloc(mlxsw_sp, MLXSW_SP_COUNTER_SUB_POOL_RIF,
255 				     p_counter_index);
256 	if (err)
257 		return err;
258 
259 	err = mlxsw_sp_rif_counter_clear(mlxsw_sp, *p_counter_index);
260 	if (err)
261 		goto err_counter_clear;
262 
263 	err = mlxsw_sp_rif_counter_edit(mlxsw_sp, rif->rif_index,
264 					*p_counter_index, true, dir);
265 	if (err)
266 		goto err_counter_edit;
267 	mlxsw_sp_rif_counter_valid_set(rif, dir, true);
268 	return 0;
269 
270 err_counter_edit:
271 err_counter_clear:
272 	mlxsw_sp_counter_free(mlxsw_sp, MLXSW_SP_COUNTER_SUB_POOL_RIF,
273 			      *p_counter_index);
274 	return err;
275 }
276 
277 void mlxsw_sp_rif_counter_free(struct mlxsw_sp *mlxsw_sp,
278 			       struct mlxsw_sp_rif *rif,
279 			       enum mlxsw_sp_rif_counter_dir dir)
280 {
281 	unsigned int *p_counter_index;
282 
283 	if (!mlxsw_sp_rif_counter_valid_get(rif, dir))
284 		return;
285 
286 	p_counter_index = mlxsw_sp_rif_p_counter_get(rif, dir);
287 	if (WARN_ON(!p_counter_index))
288 		return;
289 	mlxsw_sp_rif_counter_edit(mlxsw_sp, rif->rif_index,
290 				  *p_counter_index, false, dir);
291 	mlxsw_sp_counter_free(mlxsw_sp, MLXSW_SP_COUNTER_SUB_POOL_RIF,
292 			      *p_counter_index);
293 	mlxsw_sp_rif_counter_valid_set(rif, dir, false);
294 }
295 
296 static void mlxsw_sp_rif_counters_alloc(struct mlxsw_sp_rif *rif)
297 {
298 	struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
299 	struct devlink *devlink;
300 
301 	devlink = priv_to_devlink(mlxsw_sp->core);
302 	if (!devlink_dpipe_table_counter_enabled(devlink,
303 						 MLXSW_SP_DPIPE_TABLE_NAME_ERIF))
304 		return;
305 	mlxsw_sp_rif_counter_alloc(mlxsw_sp, rif, MLXSW_SP_RIF_COUNTER_EGRESS);
306 }
307 
308 static void mlxsw_sp_rif_counters_free(struct mlxsw_sp_rif *rif)
309 {
310 	struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
311 
312 	mlxsw_sp_rif_counter_free(mlxsw_sp, rif, MLXSW_SP_RIF_COUNTER_EGRESS);
313 }
314 
315 #define MLXSW_SP_PREFIX_COUNT (sizeof(struct in6_addr) * BITS_PER_BYTE + 1)
316 
317 struct mlxsw_sp_prefix_usage {
318 	DECLARE_BITMAP(b, MLXSW_SP_PREFIX_COUNT);
319 };
320 
321 #define mlxsw_sp_prefix_usage_for_each(prefix, prefix_usage) \
322 	for_each_set_bit(prefix, (prefix_usage)->b, MLXSW_SP_PREFIX_COUNT)
323 
324 static bool
325 mlxsw_sp_prefix_usage_eq(struct mlxsw_sp_prefix_usage *prefix_usage1,
326 			 struct mlxsw_sp_prefix_usage *prefix_usage2)
327 {
328 	return !memcmp(prefix_usage1, prefix_usage2, sizeof(*prefix_usage1));
329 }
330 
331 static void
332 mlxsw_sp_prefix_usage_cpy(struct mlxsw_sp_prefix_usage *prefix_usage1,
333 			  struct mlxsw_sp_prefix_usage *prefix_usage2)
334 {
335 	memcpy(prefix_usage1, prefix_usage2, sizeof(*prefix_usage1));
336 }
337 
338 static void
339 mlxsw_sp_prefix_usage_set(struct mlxsw_sp_prefix_usage *prefix_usage,
340 			  unsigned char prefix_len)
341 {
342 	set_bit(prefix_len, prefix_usage->b);
343 }
344 
345 static void
346 mlxsw_sp_prefix_usage_clear(struct mlxsw_sp_prefix_usage *prefix_usage,
347 			    unsigned char prefix_len)
348 {
349 	clear_bit(prefix_len, prefix_usage->b);
350 }
351 
352 struct mlxsw_sp_fib_key {
353 	unsigned char addr[sizeof(struct in6_addr)];
354 	unsigned char prefix_len;
355 };
356 
357 enum mlxsw_sp_fib_entry_type {
358 	MLXSW_SP_FIB_ENTRY_TYPE_REMOTE,
359 	MLXSW_SP_FIB_ENTRY_TYPE_LOCAL,
360 	MLXSW_SP_FIB_ENTRY_TYPE_TRAP,
361 
362 	/* This is a special case of local delivery, where a packet should be
363 	 * decapsulated on reception. Note that there is no corresponding ENCAP,
364 	 * because that's a type of next hop, not of FIB entry. (There can be
365 	 * several next hops in a REMOTE entry, and some of them may be
366 	 * encapsulating entries.)
367 	 */
368 	MLXSW_SP_FIB_ENTRY_TYPE_IPIP_DECAP,
369 	MLXSW_SP_FIB_ENTRY_TYPE_NVE_DECAP,
370 };
371 
372 struct mlxsw_sp_nexthop_group;
373 
374 struct mlxsw_sp_fib_node {
375 	struct list_head entry_list;
376 	struct list_head list;
377 	struct rhash_head ht_node;
378 	struct mlxsw_sp_fib *fib;
379 	struct mlxsw_sp_fib_key key;
380 };
381 
382 struct mlxsw_sp_fib_entry_decap {
383 	struct mlxsw_sp_ipip_entry *ipip_entry;
384 	u32 tunnel_index;
385 };
386 
387 struct mlxsw_sp_fib_entry {
388 	struct list_head list;
389 	struct mlxsw_sp_fib_node *fib_node;
390 	enum mlxsw_sp_fib_entry_type type;
391 	struct list_head nexthop_group_node;
392 	struct mlxsw_sp_nexthop_group *nh_group;
393 	struct mlxsw_sp_fib_entry_decap decap; /* Valid for decap entries. */
394 };
395 
396 struct mlxsw_sp_fib4_entry {
397 	struct mlxsw_sp_fib_entry common;
398 	u32 tb_id;
399 	u32 prio;
400 	u8 tos;
401 	u8 type;
402 };
403 
404 struct mlxsw_sp_fib6_entry {
405 	struct mlxsw_sp_fib_entry common;
406 	struct list_head rt6_list;
407 	unsigned int nrt6;
408 };
409 
410 struct mlxsw_sp_rt6 {
411 	struct list_head list;
412 	struct fib6_info *rt;
413 };
414 
415 struct mlxsw_sp_lpm_tree {
416 	u8 id; /* tree ID */
417 	unsigned int ref_count;
418 	enum mlxsw_sp_l3proto proto;
419 	unsigned long prefix_ref_count[MLXSW_SP_PREFIX_COUNT];
420 	struct mlxsw_sp_prefix_usage prefix_usage;
421 };
422 
423 struct mlxsw_sp_fib {
424 	struct rhashtable ht;
425 	struct list_head node_list;
426 	struct mlxsw_sp_vr *vr;
427 	struct mlxsw_sp_lpm_tree *lpm_tree;
428 	enum mlxsw_sp_l3proto proto;
429 };
430 
431 struct mlxsw_sp_vr {
432 	u16 id; /* virtual router ID */
433 	u32 tb_id; /* kernel fib table id */
434 	unsigned int rif_count;
435 	struct mlxsw_sp_fib *fib4;
436 	struct mlxsw_sp_fib *fib6;
437 	struct mlxsw_sp_mr_table *mr_table[MLXSW_SP_L3_PROTO_MAX];
438 };
439 
440 static const struct rhashtable_params mlxsw_sp_fib_ht_params;
441 
442 static struct mlxsw_sp_fib *mlxsw_sp_fib_create(struct mlxsw_sp *mlxsw_sp,
443 						struct mlxsw_sp_vr *vr,
444 						enum mlxsw_sp_l3proto proto)
445 {
446 	struct mlxsw_sp_lpm_tree *lpm_tree;
447 	struct mlxsw_sp_fib *fib;
448 	int err;
449 
450 	lpm_tree = mlxsw_sp->router->lpm.proto_trees[proto];
451 	fib = kzalloc(sizeof(*fib), GFP_KERNEL);
452 	if (!fib)
453 		return ERR_PTR(-ENOMEM);
454 	err = rhashtable_init(&fib->ht, &mlxsw_sp_fib_ht_params);
455 	if (err)
456 		goto err_rhashtable_init;
457 	INIT_LIST_HEAD(&fib->node_list);
458 	fib->proto = proto;
459 	fib->vr = vr;
460 	fib->lpm_tree = lpm_tree;
461 	mlxsw_sp_lpm_tree_hold(lpm_tree);
462 	err = mlxsw_sp_vr_lpm_tree_bind(mlxsw_sp, fib, lpm_tree->id);
463 	if (err)
464 		goto err_lpm_tree_bind;
465 	return fib;
466 
467 err_lpm_tree_bind:
468 	mlxsw_sp_lpm_tree_put(mlxsw_sp, lpm_tree);
469 err_rhashtable_init:
470 	kfree(fib);
471 	return ERR_PTR(err);
472 }
473 
474 static void mlxsw_sp_fib_destroy(struct mlxsw_sp *mlxsw_sp,
475 				 struct mlxsw_sp_fib *fib)
476 {
477 	mlxsw_sp_vr_lpm_tree_unbind(mlxsw_sp, fib);
478 	mlxsw_sp_lpm_tree_put(mlxsw_sp, fib->lpm_tree);
479 	WARN_ON(!list_empty(&fib->node_list));
480 	rhashtable_destroy(&fib->ht);
481 	kfree(fib);
482 }
483 
484 static struct mlxsw_sp_lpm_tree *
485 mlxsw_sp_lpm_tree_find_unused(struct mlxsw_sp *mlxsw_sp)
486 {
487 	static struct mlxsw_sp_lpm_tree *lpm_tree;
488 	int i;
489 
490 	for (i = 0; i < mlxsw_sp->router->lpm.tree_count; i++) {
491 		lpm_tree = &mlxsw_sp->router->lpm.trees[i];
492 		if (lpm_tree->ref_count == 0)
493 			return lpm_tree;
494 	}
495 	return NULL;
496 }
497 
498 static int mlxsw_sp_lpm_tree_alloc(struct mlxsw_sp *mlxsw_sp,
499 				   struct mlxsw_sp_lpm_tree *lpm_tree)
500 {
501 	char ralta_pl[MLXSW_REG_RALTA_LEN];
502 
503 	mlxsw_reg_ralta_pack(ralta_pl, true,
504 			     (enum mlxsw_reg_ralxx_protocol) lpm_tree->proto,
505 			     lpm_tree->id);
506 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralta), ralta_pl);
507 }
508 
509 static void mlxsw_sp_lpm_tree_free(struct mlxsw_sp *mlxsw_sp,
510 				   struct mlxsw_sp_lpm_tree *lpm_tree)
511 {
512 	char ralta_pl[MLXSW_REG_RALTA_LEN];
513 
514 	mlxsw_reg_ralta_pack(ralta_pl, false,
515 			     (enum mlxsw_reg_ralxx_protocol) lpm_tree->proto,
516 			     lpm_tree->id);
517 	mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralta), ralta_pl);
518 }
519 
520 static int
521 mlxsw_sp_lpm_tree_left_struct_set(struct mlxsw_sp *mlxsw_sp,
522 				  struct mlxsw_sp_prefix_usage *prefix_usage,
523 				  struct mlxsw_sp_lpm_tree *lpm_tree)
524 {
525 	char ralst_pl[MLXSW_REG_RALST_LEN];
526 	u8 root_bin = 0;
527 	u8 prefix;
528 	u8 last_prefix = MLXSW_REG_RALST_BIN_NO_CHILD;
529 
530 	mlxsw_sp_prefix_usage_for_each(prefix, prefix_usage)
531 		root_bin = prefix;
532 
533 	mlxsw_reg_ralst_pack(ralst_pl, root_bin, lpm_tree->id);
534 	mlxsw_sp_prefix_usage_for_each(prefix, prefix_usage) {
535 		if (prefix == 0)
536 			continue;
537 		mlxsw_reg_ralst_bin_pack(ralst_pl, prefix, last_prefix,
538 					 MLXSW_REG_RALST_BIN_NO_CHILD);
539 		last_prefix = prefix;
540 	}
541 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralst), ralst_pl);
542 }
543 
544 static struct mlxsw_sp_lpm_tree *
545 mlxsw_sp_lpm_tree_create(struct mlxsw_sp *mlxsw_sp,
546 			 struct mlxsw_sp_prefix_usage *prefix_usage,
547 			 enum mlxsw_sp_l3proto proto)
548 {
549 	struct mlxsw_sp_lpm_tree *lpm_tree;
550 	int err;
551 
552 	lpm_tree = mlxsw_sp_lpm_tree_find_unused(mlxsw_sp);
553 	if (!lpm_tree)
554 		return ERR_PTR(-EBUSY);
555 	lpm_tree->proto = proto;
556 	err = mlxsw_sp_lpm_tree_alloc(mlxsw_sp, lpm_tree);
557 	if (err)
558 		return ERR_PTR(err);
559 
560 	err = mlxsw_sp_lpm_tree_left_struct_set(mlxsw_sp, prefix_usage,
561 						lpm_tree);
562 	if (err)
563 		goto err_left_struct_set;
564 	memcpy(&lpm_tree->prefix_usage, prefix_usage,
565 	       sizeof(lpm_tree->prefix_usage));
566 	memset(&lpm_tree->prefix_ref_count, 0,
567 	       sizeof(lpm_tree->prefix_ref_count));
568 	lpm_tree->ref_count = 1;
569 	return lpm_tree;
570 
571 err_left_struct_set:
572 	mlxsw_sp_lpm_tree_free(mlxsw_sp, lpm_tree);
573 	return ERR_PTR(err);
574 }
575 
576 static void mlxsw_sp_lpm_tree_destroy(struct mlxsw_sp *mlxsw_sp,
577 				      struct mlxsw_sp_lpm_tree *lpm_tree)
578 {
579 	mlxsw_sp_lpm_tree_free(mlxsw_sp, lpm_tree);
580 }
581 
582 static struct mlxsw_sp_lpm_tree *
583 mlxsw_sp_lpm_tree_get(struct mlxsw_sp *mlxsw_sp,
584 		      struct mlxsw_sp_prefix_usage *prefix_usage,
585 		      enum mlxsw_sp_l3proto proto)
586 {
587 	struct mlxsw_sp_lpm_tree *lpm_tree;
588 	int i;
589 
590 	for (i = 0; i < mlxsw_sp->router->lpm.tree_count; i++) {
591 		lpm_tree = &mlxsw_sp->router->lpm.trees[i];
592 		if (lpm_tree->ref_count != 0 &&
593 		    lpm_tree->proto == proto &&
594 		    mlxsw_sp_prefix_usage_eq(&lpm_tree->prefix_usage,
595 					     prefix_usage)) {
596 			mlxsw_sp_lpm_tree_hold(lpm_tree);
597 			return lpm_tree;
598 		}
599 	}
600 	return mlxsw_sp_lpm_tree_create(mlxsw_sp, prefix_usage, proto);
601 }
602 
603 static void mlxsw_sp_lpm_tree_hold(struct mlxsw_sp_lpm_tree *lpm_tree)
604 {
605 	lpm_tree->ref_count++;
606 }
607 
608 static void mlxsw_sp_lpm_tree_put(struct mlxsw_sp *mlxsw_sp,
609 				  struct mlxsw_sp_lpm_tree *lpm_tree)
610 {
611 	if (--lpm_tree->ref_count == 0)
612 		mlxsw_sp_lpm_tree_destroy(mlxsw_sp, lpm_tree);
613 }
614 
615 #define MLXSW_SP_LPM_TREE_MIN 1 /* tree 0 is reserved */
616 
617 static int mlxsw_sp_lpm_init(struct mlxsw_sp *mlxsw_sp)
618 {
619 	struct mlxsw_sp_prefix_usage req_prefix_usage = {{ 0 } };
620 	struct mlxsw_sp_lpm_tree *lpm_tree;
621 	u64 max_trees;
622 	int err, i;
623 
624 	if (!MLXSW_CORE_RES_VALID(mlxsw_sp->core, MAX_LPM_TREES))
625 		return -EIO;
626 
627 	max_trees = MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_LPM_TREES);
628 	mlxsw_sp->router->lpm.tree_count = max_trees - MLXSW_SP_LPM_TREE_MIN;
629 	mlxsw_sp->router->lpm.trees = kcalloc(mlxsw_sp->router->lpm.tree_count,
630 					     sizeof(struct mlxsw_sp_lpm_tree),
631 					     GFP_KERNEL);
632 	if (!mlxsw_sp->router->lpm.trees)
633 		return -ENOMEM;
634 
635 	for (i = 0; i < mlxsw_sp->router->lpm.tree_count; i++) {
636 		lpm_tree = &mlxsw_sp->router->lpm.trees[i];
637 		lpm_tree->id = i + MLXSW_SP_LPM_TREE_MIN;
638 	}
639 
640 	lpm_tree = mlxsw_sp_lpm_tree_get(mlxsw_sp, &req_prefix_usage,
641 					 MLXSW_SP_L3_PROTO_IPV4);
642 	if (IS_ERR(lpm_tree)) {
643 		err = PTR_ERR(lpm_tree);
644 		goto err_ipv4_tree_get;
645 	}
646 	mlxsw_sp->router->lpm.proto_trees[MLXSW_SP_L3_PROTO_IPV4] = lpm_tree;
647 
648 	lpm_tree = mlxsw_sp_lpm_tree_get(mlxsw_sp, &req_prefix_usage,
649 					 MLXSW_SP_L3_PROTO_IPV6);
650 	if (IS_ERR(lpm_tree)) {
651 		err = PTR_ERR(lpm_tree);
652 		goto err_ipv6_tree_get;
653 	}
654 	mlxsw_sp->router->lpm.proto_trees[MLXSW_SP_L3_PROTO_IPV6] = lpm_tree;
655 
656 	return 0;
657 
658 err_ipv6_tree_get:
659 	lpm_tree = mlxsw_sp->router->lpm.proto_trees[MLXSW_SP_L3_PROTO_IPV4];
660 	mlxsw_sp_lpm_tree_put(mlxsw_sp, lpm_tree);
661 err_ipv4_tree_get:
662 	kfree(mlxsw_sp->router->lpm.trees);
663 	return err;
664 }
665 
666 static void mlxsw_sp_lpm_fini(struct mlxsw_sp *mlxsw_sp)
667 {
668 	struct mlxsw_sp_lpm_tree *lpm_tree;
669 
670 	lpm_tree = mlxsw_sp->router->lpm.proto_trees[MLXSW_SP_L3_PROTO_IPV6];
671 	mlxsw_sp_lpm_tree_put(mlxsw_sp, lpm_tree);
672 
673 	lpm_tree = mlxsw_sp->router->lpm.proto_trees[MLXSW_SP_L3_PROTO_IPV4];
674 	mlxsw_sp_lpm_tree_put(mlxsw_sp, lpm_tree);
675 
676 	kfree(mlxsw_sp->router->lpm.trees);
677 }
678 
679 static bool mlxsw_sp_vr_is_used(const struct mlxsw_sp_vr *vr)
680 {
681 	return !!vr->fib4 || !!vr->fib6 ||
682 	       !!vr->mr_table[MLXSW_SP_L3_PROTO_IPV4] ||
683 	       !!vr->mr_table[MLXSW_SP_L3_PROTO_IPV6];
684 }
685 
686 static struct mlxsw_sp_vr *mlxsw_sp_vr_find_unused(struct mlxsw_sp *mlxsw_sp)
687 {
688 	struct mlxsw_sp_vr *vr;
689 	int i;
690 
691 	for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_VRS); i++) {
692 		vr = &mlxsw_sp->router->vrs[i];
693 		if (!mlxsw_sp_vr_is_used(vr))
694 			return vr;
695 	}
696 	return NULL;
697 }
698 
699 static int mlxsw_sp_vr_lpm_tree_bind(struct mlxsw_sp *mlxsw_sp,
700 				     const struct mlxsw_sp_fib *fib, u8 tree_id)
701 {
702 	char raltb_pl[MLXSW_REG_RALTB_LEN];
703 
704 	mlxsw_reg_raltb_pack(raltb_pl, fib->vr->id,
705 			     (enum mlxsw_reg_ralxx_protocol) fib->proto,
706 			     tree_id);
707 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(raltb), raltb_pl);
708 }
709 
710 static int mlxsw_sp_vr_lpm_tree_unbind(struct mlxsw_sp *mlxsw_sp,
711 				       const struct mlxsw_sp_fib *fib)
712 {
713 	char raltb_pl[MLXSW_REG_RALTB_LEN];
714 
715 	/* Bind to tree 0 which is default */
716 	mlxsw_reg_raltb_pack(raltb_pl, fib->vr->id,
717 			     (enum mlxsw_reg_ralxx_protocol) fib->proto, 0);
718 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(raltb), raltb_pl);
719 }
720 
721 static u32 mlxsw_sp_fix_tb_id(u32 tb_id)
722 {
723 	/* For our purpose, squash main, default and local tables into one */
724 	if (tb_id == RT_TABLE_LOCAL || tb_id == RT_TABLE_DEFAULT)
725 		tb_id = RT_TABLE_MAIN;
726 	return tb_id;
727 }
728 
729 static struct mlxsw_sp_vr *mlxsw_sp_vr_find(struct mlxsw_sp *mlxsw_sp,
730 					    u32 tb_id)
731 {
732 	struct mlxsw_sp_vr *vr;
733 	int i;
734 
735 	tb_id = mlxsw_sp_fix_tb_id(tb_id);
736 
737 	for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_VRS); i++) {
738 		vr = &mlxsw_sp->router->vrs[i];
739 		if (mlxsw_sp_vr_is_used(vr) && vr->tb_id == tb_id)
740 			return vr;
741 	}
742 	return NULL;
743 }
744 
745 int mlxsw_sp_router_tb_id_vr_id(struct mlxsw_sp *mlxsw_sp, u32 tb_id,
746 				u16 *vr_id)
747 {
748 	struct mlxsw_sp_vr *vr;
749 
750 	vr = mlxsw_sp_vr_find(mlxsw_sp, tb_id);
751 	if (!vr)
752 		return -ESRCH;
753 	*vr_id = vr->id;
754 
755 	return 0;
756 }
757 
758 static struct mlxsw_sp_fib *mlxsw_sp_vr_fib(const struct mlxsw_sp_vr *vr,
759 					    enum mlxsw_sp_l3proto proto)
760 {
761 	switch (proto) {
762 	case MLXSW_SP_L3_PROTO_IPV4:
763 		return vr->fib4;
764 	case MLXSW_SP_L3_PROTO_IPV6:
765 		return vr->fib6;
766 	}
767 	return NULL;
768 }
769 
770 static struct mlxsw_sp_vr *mlxsw_sp_vr_create(struct mlxsw_sp *mlxsw_sp,
771 					      u32 tb_id,
772 					      struct netlink_ext_ack *extack)
773 {
774 	struct mlxsw_sp_mr_table *mr4_table, *mr6_table;
775 	struct mlxsw_sp_fib *fib4;
776 	struct mlxsw_sp_fib *fib6;
777 	struct mlxsw_sp_vr *vr;
778 	int err;
779 
780 	vr = mlxsw_sp_vr_find_unused(mlxsw_sp);
781 	if (!vr) {
782 		NL_SET_ERR_MSG_MOD(extack, "Exceeded number of supported virtual routers");
783 		return ERR_PTR(-EBUSY);
784 	}
785 	fib4 = mlxsw_sp_fib_create(mlxsw_sp, vr, MLXSW_SP_L3_PROTO_IPV4);
786 	if (IS_ERR(fib4))
787 		return ERR_CAST(fib4);
788 	fib6 = mlxsw_sp_fib_create(mlxsw_sp, vr, MLXSW_SP_L3_PROTO_IPV6);
789 	if (IS_ERR(fib6)) {
790 		err = PTR_ERR(fib6);
791 		goto err_fib6_create;
792 	}
793 	mr4_table = mlxsw_sp_mr_table_create(mlxsw_sp, vr->id,
794 					     MLXSW_SP_L3_PROTO_IPV4);
795 	if (IS_ERR(mr4_table)) {
796 		err = PTR_ERR(mr4_table);
797 		goto err_mr4_table_create;
798 	}
799 	mr6_table = mlxsw_sp_mr_table_create(mlxsw_sp, vr->id,
800 					     MLXSW_SP_L3_PROTO_IPV6);
801 	if (IS_ERR(mr6_table)) {
802 		err = PTR_ERR(mr6_table);
803 		goto err_mr6_table_create;
804 	}
805 
806 	vr->fib4 = fib4;
807 	vr->fib6 = fib6;
808 	vr->mr_table[MLXSW_SP_L3_PROTO_IPV4] = mr4_table;
809 	vr->mr_table[MLXSW_SP_L3_PROTO_IPV6] = mr6_table;
810 	vr->tb_id = tb_id;
811 	return vr;
812 
813 err_mr6_table_create:
814 	mlxsw_sp_mr_table_destroy(mr4_table);
815 err_mr4_table_create:
816 	mlxsw_sp_fib_destroy(mlxsw_sp, fib6);
817 err_fib6_create:
818 	mlxsw_sp_fib_destroy(mlxsw_sp, fib4);
819 	return ERR_PTR(err);
820 }
821 
822 static void mlxsw_sp_vr_destroy(struct mlxsw_sp *mlxsw_sp,
823 				struct mlxsw_sp_vr *vr)
824 {
825 	mlxsw_sp_mr_table_destroy(vr->mr_table[MLXSW_SP_L3_PROTO_IPV6]);
826 	vr->mr_table[MLXSW_SP_L3_PROTO_IPV6] = NULL;
827 	mlxsw_sp_mr_table_destroy(vr->mr_table[MLXSW_SP_L3_PROTO_IPV4]);
828 	vr->mr_table[MLXSW_SP_L3_PROTO_IPV4] = NULL;
829 	mlxsw_sp_fib_destroy(mlxsw_sp, vr->fib6);
830 	vr->fib6 = NULL;
831 	mlxsw_sp_fib_destroy(mlxsw_sp, vr->fib4);
832 	vr->fib4 = NULL;
833 }
834 
835 static struct mlxsw_sp_vr *mlxsw_sp_vr_get(struct mlxsw_sp *mlxsw_sp, u32 tb_id,
836 					   struct netlink_ext_ack *extack)
837 {
838 	struct mlxsw_sp_vr *vr;
839 
840 	tb_id = mlxsw_sp_fix_tb_id(tb_id);
841 	vr = mlxsw_sp_vr_find(mlxsw_sp, tb_id);
842 	if (!vr)
843 		vr = mlxsw_sp_vr_create(mlxsw_sp, tb_id, extack);
844 	return vr;
845 }
846 
847 static void mlxsw_sp_vr_put(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_vr *vr)
848 {
849 	if (!vr->rif_count && list_empty(&vr->fib4->node_list) &&
850 	    list_empty(&vr->fib6->node_list) &&
851 	    mlxsw_sp_mr_table_empty(vr->mr_table[MLXSW_SP_L3_PROTO_IPV4]) &&
852 	    mlxsw_sp_mr_table_empty(vr->mr_table[MLXSW_SP_L3_PROTO_IPV6]))
853 		mlxsw_sp_vr_destroy(mlxsw_sp, vr);
854 }
855 
856 static bool
857 mlxsw_sp_vr_lpm_tree_should_replace(struct mlxsw_sp_vr *vr,
858 				    enum mlxsw_sp_l3proto proto, u8 tree_id)
859 {
860 	struct mlxsw_sp_fib *fib = mlxsw_sp_vr_fib(vr, proto);
861 
862 	if (!mlxsw_sp_vr_is_used(vr))
863 		return false;
864 	if (fib->lpm_tree->id == tree_id)
865 		return true;
866 	return false;
867 }
868 
869 static int mlxsw_sp_vr_lpm_tree_replace(struct mlxsw_sp *mlxsw_sp,
870 					struct mlxsw_sp_fib *fib,
871 					struct mlxsw_sp_lpm_tree *new_tree)
872 {
873 	struct mlxsw_sp_lpm_tree *old_tree = fib->lpm_tree;
874 	int err;
875 
876 	fib->lpm_tree = new_tree;
877 	mlxsw_sp_lpm_tree_hold(new_tree);
878 	err = mlxsw_sp_vr_lpm_tree_bind(mlxsw_sp, fib, new_tree->id);
879 	if (err)
880 		goto err_tree_bind;
881 	mlxsw_sp_lpm_tree_put(mlxsw_sp, old_tree);
882 	return 0;
883 
884 err_tree_bind:
885 	mlxsw_sp_lpm_tree_put(mlxsw_sp, new_tree);
886 	fib->lpm_tree = old_tree;
887 	return err;
888 }
889 
890 static int mlxsw_sp_vrs_lpm_tree_replace(struct mlxsw_sp *mlxsw_sp,
891 					 struct mlxsw_sp_fib *fib,
892 					 struct mlxsw_sp_lpm_tree *new_tree)
893 {
894 	enum mlxsw_sp_l3proto proto = fib->proto;
895 	struct mlxsw_sp_lpm_tree *old_tree;
896 	u8 old_id, new_id = new_tree->id;
897 	struct mlxsw_sp_vr *vr;
898 	int i, err;
899 
900 	old_tree = mlxsw_sp->router->lpm.proto_trees[proto];
901 	old_id = old_tree->id;
902 
903 	for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_VRS); i++) {
904 		vr = &mlxsw_sp->router->vrs[i];
905 		if (!mlxsw_sp_vr_lpm_tree_should_replace(vr, proto, old_id))
906 			continue;
907 		err = mlxsw_sp_vr_lpm_tree_replace(mlxsw_sp,
908 						   mlxsw_sp_vr_fib(vr, proto),
909 						   new_tree);
910 		if (err)
911 			goto err_tree_replace;
912 	}
913 
914 	memcpy(new_tree->prefix_ref_count, old_tree->prefix_ref_count,
915 	       sizeof(new_tree->prefix_ref_count));
916 	mlxsw_sp->router->lpm.proto_trees[proto] = new_tree;
917 	mlxsw_sp_lpm_tree_put(mlxsw_sp, old_tree);
918 
919 	return 0;
920 
921 err_tree_replace:
922 	for (i--; i >= 0; i--) {
923 		if (!mlxsw_sp_vr_lpm_tree_should_replace(vr, proto, new_id))
924 			continue;
925 		mlxsw_sp_vr_lpm_tree_replace(mlxsw_sp,
926 					     mlxsw_sp_vr_fib(vr, proto),
927 					     old_tree);
928 	}
929 	return err;
930 }
931 
932 static int mlxsw_sp_vrs_init(struct mlxsw_sp *mlxsw_sp)
933 {
934 	struct mlxsw_sp_vr *vr;
935 	u64 max_vrs;
936 	int i;
937 
938 	if (!MLXSW_CORE_RES_VALID(mlxsw_sp->core, MAX_VRS))
939 		return -EIO;
940 
941 	max_vrs = MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_VRS);
942 	mlxsw_sp->router->vrs = kcalloc(max_vrs, sizeof(struct mlxsw_sp_vr),
943 					GFP_KERNEL);
944 	if (!mlxsw_sp->router->vrs)
945 		return -ENOMEM;
946 
947 	for (i = 0; i < max_vrs; i++) {
948 		vr = &mlxsw_sp->router->vrs[i];
949 		vr->id = i;
950 	}
951 
952 	return 0;
953 }
954 
955 static void mlxsw_sp_router_fib_flush(struct mlxsw_sp *mlxsw_sp);
956 
957 static void mlxsw_sp_vrs_fini(struct mlxsw_sp *mlxsw_sp)
958 {
959 	/* At this stage we're guaranteed not to have new incoming
960 	 * FIB notifications and the work queue is free from FIBs
961 	 * sitting on top of mlxsw netdevs. However, we can still
962 	 * have other FIBs queued. Flush the queue before flushing
963 	 * the device's tables. No need for locks, as we're the only
964 	 * writer.
965 	 */
966 	mlxsw_core_flush_owq();
967 	mlxsw_sp_router_fib_flush(mlxsw_sp);
968 	kfree(mlxsw_sp->router->vrs);
969 }
970 
971 static struct net_device *
972 __mlxsw_sp_ipip_netdev_ul_dev_get(const struct net_device *ol_dev)
973 {
974 	struct ip_tunnel *tun = netdev_priv(ol_dev);
975 	struct net *net = dev_net(ol_dev);
976 
977 	return __dev_get_by_index(net, tun->parms.link);
978 }
979 
980 u32 mlxsw_sp_ipip_dev_ul_tb_id(const struct net_device *ol_dev)
981 {
982 	struct net_device *d = __mlxsw_sp_ipip_netdev_ul_dev_get(ol_dev);
983 
984 	if (d)
985 		return l3mdev_fib_table(d) ? : RT_TABLE_MAIN;
986 	else
987 		return l3mdev_fib_table(ol_dev) ? : RT_TABLE_MAIN;
988 }
989 
990 static struct mlxsw_sp_rif *
991 mlxsw_sp_rif_create(struct mlxsw_sp *mlxsw_sp,
992 		    const struct mlxsw_sp_rif_params *params,
993 		    struct netlink_ext_ack *extack);
994 
995 static struct mlxsw_sp_rif_ipip_lb *
996 mlxsw_sp_ipip_ol_ipip_lb_create(struct mlxsw_sp *mlxsw_sp,
997 				enum mlxsw_sp_ipip_type ipipt,
998 				struct net_device *ol_dev,
999 				struct netlink_ext_ack *extack)
1000 {
1001 	struct mlxsw_sp_rif_params_ipip_lb lb_params;
1002 	const struct mlxsw_sp_ipip_ops *ipip_ops;
1003 	struct mlxsw_sp_rif *rif;
1004 
1005 	ipip_ops = mlxsw_sp->router->ipip_ops_arr[ipipt];
1006 	lb_params = (struct mlxsw_sp_rif_params_ipip_lb) {
1007 		.common.dev = ol_dev,
1008 		.common.lag = false,
1009 		.lb_config = ipip_ops->ol_loopback_config(mlxsw_sp, ol_dev),
1010 	};
1011 
1012 	rif = mlxsw_sp_rif_create(mlxsw_sp, &lb_params.common, extack);
1013 	if (IS_ERR(rif))
1014 		return ERR_CAST(rif);
1015 	return container_of(rif, struct mlxsw_sp_rif_ipip_lb, common);
1016 }
1017 
1018 static struct mlxsw_sp_ipip_entry *
1019 mlxsw_sp_ipip_entry_alloc(struct mlxsw_sp *mlxsw_sp,
1020 			  enum mlxsw_sp_ipip_type ipipt,
1021 			  struct net_device *ol_dev)
1022 {
1023 	const struct mlxsw_sp_ipip_ops *ipip_ops;
1024 	struct mlxsw_sp_ipip_entry *ipip_entry;
1025 	struct mlxsw_sp_ipip_entry *ret = NULL;
1026 
1027 	ipip_ops = mlxsw_sp->router->ipip_ops_arr[ipipt];
1028 	ipip_entry = kzalloc(sizeof(*ipip_entry), GFP_KERNEL);
1029 	if (!ipip_entry)
1030 		return ERR_PTR(-ENOMEM);
1031 
1032 	ipip_entry->ol_lb = mlxsw_sp_ipip_ol_ipip_lb_create(mlxsw_sp, ipipt,
1033 							    ol_dev, NULL);
1034 	if (IS_ERR(ipip_entry->ol_lb)) {
1035 		ret = ERR_CAST(ipip_entry->ol_lb);
1036 		goto err_ol_ipip_lb_create;
1037 	}
1038 
1039 	ipip_entry->ipipt = ipipt;
1040 	ipip_entry->ol_dev = ol_dev;
1041 
1042 	switch (ipip_ops->ul_proto) {
1043 	case MLXSW_SP_L3_PROTO_IPV4:
1044 		ipip_entry->parms4 = mlxsw_sp_ipip_netdev_parms4(ol_dev);
1045 		break;
1046 	case MLXSW_SP_L3_PROTO_IPV6:
1047 		WARN_ON(1);
1048 		break;
1049 	}
1050 
1051 	return ipip_entry;
1052 
1053 err_ol_ipip_lb_create:
1054 	kfree(ipip_entry);
1055 	return ret;
1056 }
1057 
1058 static void
1059 mlxsw_sp_ipip_entry_dealloc(struct mlxsw_sp_ipip_entry *ipip_entry)
1060 {
1061 	mlxsw_sp_rif_destroy(&ipip_entry->ol_lb->common);
1062 	kfree(ipip_entry);
1063 }
1064 
1065 static bool
1066 mlxsw_sp_ipip_entry_saddr_matches(struct mlxsw_sp *mlxsw_sp,
1067 				  const enum mlxsw_sp_l3proto ul_proto,
1068 				  union mlxsw_sp_l3addr saddr,
1069 				  u32 ul_tb_id,
1070 				  struct mlxsw_sp_ipip_entry *ipip_entry)
1071 {
1072 	u32 tun_ul_tb_id = mlxsw_sp_ipip_dev_ul_tb_id(ipip_entry->ol_dev);
1073 	enum mlxsw_sp_ipip_type ipipt = ipip_entry->ipipt;
1074 	union mlxsw_sp_l3addr tun_saddr;
1075 
1076 	if (mlxsw_sp->router->ipip_ops_arr[ipipt]->ul_proto != ul_proto)
1077 		return false;
1078 
1079 	tun_saddr = mlxsw_sp_ipip_netdev_saddr(ul_proto, ipip_entry->ol_dev);
1080 	return tun_ul_tb_id == ul_tb_id &&
1081 	       mlxsw_sp_l3addr_eq(&tun_saddr, &saddr);
1082 }
1083 
1084 static int
1085 mlxsw_sp_fib_entry_decap_init(struct mlxsw_sp *mlxsw_sp,
1086 			      struct mlxsw_sp_fib_entry *fib_entry,
1087 			      struct mlxsw_sp_ipip_entry *ipip_entry)
1088 {
1089 	u32 tunnel_index;
1090 	int err;
1091 
1092 	err = mlxsw_sp_kvdl_alloc(mlxsw_sp, MLXSW_SP_KVDL_ENTRY_TYPE_ADJ,
1093 				  1, &tunnel_index);
1094 	if (err)
1095 		return err;
1096 
1097 	ipip_entry->decap_fib_entry = fib_entry;
1098 	fib_entry->decap.ipip_entry = ipip_entry;
1099 	fib_entry->decap.tunnel_index = tunnel_index;
1100 	return 0;
1101 }
1102 
1103 static void mlxsw_sp_fib_entry_decap_fini(struct mlxsw_sp *mlxsw_sp,
1104 					  struct mlxsw_sp_fib_entry *fib_entry)
1105 {
1106 	/* Unlink this node from the IPIP entry that it's the decap entry of. */
1107 	fib_entry->decap.ipip_entry->decap_fib_entry = NULL;
1108 	fib_entry->decap.ipip_entry = NULL;
1109 	mlxsw_sp_kvdl_free(mlxsw_sp, MLXSW_SP_KVDL_ENTRY_TYPE_ADJ,
1110 			   1, fib_entry->decap.tunnel_index);
1111 }
1112 
1113 static struct mlxsw_sp_fib_node *
1114 mlxsw_sp_fib_node_lookup(struct mlxsw_sp_fib *fib, const void *addr,
1115 			 size_t addr_len, unsigned char prefix_len);
1116 static int mlxsw_sp_fib_entry_update(struct mlxsw_sp *mlxsw_sp,
1117 				     struct mlxsw_sp_fib_entry *fib_entry);
1118 
1119 static void
1120 mlxsw_sp_ipip_entry_demote_decap(struct mlxsw_sp *mlxsw_sp,
1121 				 struct mlxsw_sp_ipip_entry *ipip_entry)
1122 {
1123 	struct mlxsw_sp_fib_entry *fib_entry = ipip_entry->decap_fib_entry;
1124 
1125 	mlxsw_sp_fib_entry_decap_fini(mlxsw_sp, fib_entry);
1126 	fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_TRAP;
1127 
1128 	mlxsw_sp_fib_entry_update(mlxsw_sp, fib_entry);
1129 }
1130 
1131 static void
1132 mlxsw_sp_ipip_entry_promote_decap(struct mlxsw_sp *mlxsw_sp,
1133 				  struct mlxsw_sp_ipip_entry *ipip_entry,
1134 				  struct mlxsw_sp_fib_entry *decap_fib_entry)
1135 {
1136 	if (mlxsw_sp_fib_entry_decap_init(mlxsw_sp, decap_fib_entry,
1137 					  ipip_entry))
1138 		return;
1139 	decap_fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_IPIP_DECAP;
1140 
1141 	if (mlxsw_sp_fib_entry_update(mlxsw_sp, decap_fib_entry))
1142 		mlxsw_sp_ipip_entry_demote_decap(mlxsw_sp, ipip_entry);
1143 }
1144 
1145 static struct mlxsw_sp_fib_entry *
1146 mlxsw_sp_router_ip2me_fib_entry_find(struct mlxsw_sp *mlxsw_sp, u32 tb_id,
1147 				     enum mlxsw_sp_l3proto proto,
1148 				     const union mlxsw_sp_l3addr *addr,
1149 				     enum mlxsw_sp_fib_entry_type type)
1150 {
1151 	struct mlxsw_sp_fib_entry *fib_entry;
1152 	struct mlxsw_sp_fib_node *fib_node;
1153 	unsigned char addr_prefix_len;
1154 	struct mlxsw_sp_fib *fib;
1155 	struct mlxsw_sp_vr *vr;
1156 	const void *addrp;
1157 	size_t addr_len;
1158 	u32 addr4;
1159 
1160 	vr = mlxsw_sp_vr_find(mlxsw_sp, tb_id);
1161 	if (!vr)
1162 		return NULL;
1163 	fib = mlxsw_sp_vr_fib(vr, proto);
1164 
1165 	switch (proto) {
1166 	case MLXSW_SP_L3_PROTO_IPV4:
1167 		addr4 = be32_to_cpu(addr->addr4);
1168 		addrp = &addr4;
1169 		addr_len = 4;
1170 		addr_prefix_len = 32;
1171 		break;
1172 	case MLXSW_SP_L3_PROTO_IPV6: /* fall through */
1173 	default:
1174 		WARN_ON(1);
1175 		return NULL;
1176 	}
1177 
1178 	fib_node = mlxsw_sp_fib_node_lookup(fib, addrp, addr_len,
1179 					    addr_prefix_len);
1180 	if (!fib_node || list_empty(&fib_node->entry_list))
1181 		return NULL;
1182 
1183 	fib_entry = list_first_entry(&fib_node->entry_list,
1184 				     struct mlxsw_sp_fib_entry, list);
1185 	if (fib_entry->type != type)
1186 		return NULL;
1187 
1188 	return fib_entry;
1189 }
1190 
1191 /* Given an IPIP entry, find the corresponding decap route. */
1192 static struct mlxsw_sp_fib_entry *
1193 mlxsw_sp_ipip_entry_find_decap(struct mlxsw_sp *mlxsw_sp,
1194 			       struct mlxsw_sp_ipip_entry *ipip_entry)
1195 {
1196 	static struct mlxsw_sp_fib_node *fib_node;
1197 	const struct mlxsw_sp_ipip_ops *ipip_ops;
1198 	struct mlxsw_sp_fib_entry *fib_entry;
1199 	unsigned char saddr_prefix_len;
1200 	union mlxsw_sp_l3addr saddr;
1201 	struct mlxsw_sp_fib *ul_fib;
1202 	struct mlxsw_sp_vr *ul_vr;
1203 	const void *saddrp;
1204 	size_t saddr_len;
1205 	u32 ul_tb_id;
1206 	u32 saddr4;
1207 
1208 	ipip_ops = mlxsw_sp->router->ipip_ops_arr[ipip_entry->ipipt];
1209 
1210 	ul_tb_id = mlxsw_sp_ipip_dev_ul_tb_id(ipip_entry->ol_dev);
1211 	ul_vr = mlxsw_sp_vr_find(mlxsw_sp, ul_tb_id);
1212 	if (!ul_vr)
1213 		return NULL;
1214 
1215 	ul_fib = mlxsw_sp_vr_fib(ul_vr, ipip_ops->ul_proto);
1216 	saddr = mlxsw_sp_ipip_netdev_saddr(ipip_ops->ul_proto,
1217 					   ipip_entry->ol_dev);
1218 
1219 	switch (ipip_ops->ul_proto) {
1220 	case MLXSW_SP_L3_PROTO_IPV4:
1221 		saddr4 = be32_to_cpu(saddr.addr4);
1222 		saddrp = &saddr4;
1223 		saddr_len = 4;
1224 		saddr_prefix_len = 32;
1225 		break;
1226 	case MLXSW_SP_L3_PROTO_IPV6:
1227 		WARN_ON(1);
1228 		return NULL;
1229 	}
1230 
1231 	fib_node = mlxsw_sp_fib_node_lookup(ul_fib, saddrp, saddr_len,
1232 					    saddr_prefix_len);
1233 	if (!fib_node || list_empty(&fib_node->entry_list))
1234 		return NULL;
1235 
1236 	fib_entry = list_first_entry(&fib_node->entry_list,
1237 				     struct mlxsw_sp_fib_entry, list);
1238 	if (fib_entry->type != MLXSW_SP_FIB_ENTRY_TYPE_TRAP)
1239 		return NULL;
1240 
1241 	return fib_entry;
1242 }
1243 
1244 static struct mlxsw_sp_ipip_entry *
1245 mlxsw_sp_ipip_entry_create(struct mlxsw_sp *mlxsw_sp,
1246 			   enum mlxsw_sp_ipip_type ipipt,
1247 			   struct net_device *ol_dev)
1248 {
1249 	struct mlxsw_sp_ipip_entry *ipip_entry;
1250 
1251 	ipip_entry = mlxsw_sp_ipip_entry_alloc(mlxsw_sp, ipipt, ol_dev);
1252 	if (IS_ERR(ipip_entry))
1253 		return ipip_entry;
1254 
1255 	list_add_tail(&ipip_entry->ipip_list_node,
1256 		      &mlxsw_sp->router->ipip_list);
1257 
1258 	return ipip_entry;
1259 }
1260 
1261 static void
1262 mlxsw_sp_ipip_entry_destroy(struct mlxsw_sp *mlxsw_sp,
1263 			    struct mlxsw_sp_ipip_entry *ipip_entry)
1264 {
1265 	list_del(&ipip_entry->ipip_list_node);
1266 	mlxsw_sp_ipip_entry_dealloc(ipip_entry);
1267 }
1268 
1269 static bool
1270 mlxsw_sp_ipip_entry_matches_decap(struct mlxsw_sp *mlxsw_sp,
1271 				  const struct net_device *ul_dev,
1272 				  enum mlxsw_sp_l3proto ul_proto,
1273 				  union mlxsw_sp_l3addr ul_dip,
1274 				  struct mlxsw_sp_ipip_entry *ipip_entry)
1275 {
1276 	u32 ul_tb_id = l3mdev_fib_table(ul_dev) ? : RT_TABLE_MAIN;
1277 	enum mlxsw_sp_ipip_type ipipt = ipip_entry->ipipt;
1278 
1279 	if (mlxsw_sp->router->ipip_ops_arr[ipipt]->ul_proto != ul_proto)
1280 		return false;
1281 
1282 	return mlxsw_sp_ipip_entry_saddr_matches(mlxsw_sp, ul_proto, ul_dip,
1283 						 ul_tb_id, ipip_entry);
1284 }
1285 
1286 /* Given decap parameters, find the corresponding IPIP entry. */
1287 static struct mlxsw_sp_ipip_entry *
1288 mlxsw_sp_ipip_entry_find_by_decap(struct mlxsw_sp *mlxsw_sp,
1289 				  const struct net_device *ul_dev,
1290 				  enum mlxsw_sp_l3proto ul_proto,
1291 				  union mlxsw_sp_l3addr ul_dip)
1292 {
1293 	struct mlxsw_sp_ipip_entry *ipip_entry;
1294 
1295 	list_for_each_entry(ipip_entry, &mlxsw_sp->router->ipip_list,
1296 			    ipip_list_node)
1297 		if (mlxsw_sp_ipip_entry_matches_decap(mlxsw_sp, ul_dev,
1298 						      ul_proto, ul_dip,
1299 						      ipip_entry))
1300 			return ipip_entry;
1301 
1302 	return NULL;
1303 }
1304 
1305 static bool mlxsw_sp_netdev_ipip_type(const struct mlxsw_sp *mlxsw_sp,
1306 				      const struct net_device *dev,
1307 				      enum mlxsw_sp_ipip_type *p_type)
1308 {
1309 	struct mlxsw_sp_router *router = mlxsw_sp->router;
1310 	const struct mlxsw_sp_ipip_ops *ipip_ops;
1311 	enum mlxsw_sp_ipip_type ipipt;
1312 
1313 	for (ipipt = 0; ipipt < MLXSW_SP_IPIP_TYPE_MAX; ++ipipt) {
1314 		ipip_ops = router->ipip_ops_arr[ipipt];
1315 		if (dev->type == ipip_ops->dev_type) {
1316 			if (p_type)
1317 				*p_type = ipipt;
1318 			return true;
1319 		}
1320 	}
1321 	return false;
1322 }
1323 
1324 bool mlxsw_sp_netdev_is_ipip_ol(const struct mlxsw_sp *mlxsw_sp,
1325 				const struct net_device *dev)
1326 {
1327 	return mlxsw_sp_netdev_ipip_type(mlxsw_sp, dev, NULL);
1328 }
1329 
1330 static struct mlxsw_sp_ipip_entry *
1331 mlxsw_sp_ipip_entry_find_by_ol_dev(struct mlxsw_sp *mlxsw_sp,
1332 				   const struct net_device *ol_dev)
1333 {
1334 	struct mlxsw_sp_ipip_entry *ipip_entry;
1335 
1336 	list_for_each_entry(ipip_entry, &mlxsw_sp->router->ipip_list,
1337 			    ipip_list_node)
1338 		if (ipip_entry->ol_dev == ol_dev)
1339 			return ipip_entry;
1340 
1341 	return NULL;
1342 }
1343 
1344 static struct mlxsw_sp_ipip_entry *
1345 mlxsw_sp_ipip_entry_find_by_ul_dev(const struct mlxsw_sp *mlxsw_sp,
1346 				   const struct net_device *ul_dev,
1347 				   struct mlxsw_sp_ipip_entry *start)
1348 {
1349 	struct mlxsw_sp_ipip_entry *ipip_entry;
1350 
1351 	ipip_entry = list_prepare_entry(start, &mlxsw_sp->router->ipip_list,
1352 					ipip_list_node);
1353 	list_for_each_entry_continue(ipip_entry, &mlxsw_sp->router->ipip_list,
1354 				     ipip_list_node) {
1355 		struct net_device *ipip_ul_dev =
1356 			__mlxsw_sp_ipip_netdev_ul_dev_get(ipip_entry->ol_dev);
1357 
1358 		if (ipip_ul_dev == ul_dev)
1359 			return ipip_entry;
1360 	}
1361 
1362 	return NULL;
1363 }
1364 
1365 bool mlxsw_sp_netdev_is_ipip_ul(const struct mlxsw_sp *mlxsw_sp,
1366 				const struct net_device *dev)
1367 {
1368 	return mlxsw_sp_ipip_entry_find_by_ul_dev(mlxsw_sp, dev, NULL);
1369 }
1370 
1371 static bool mlxsw_sp_netdevice_ipip_can_offload(struct mlxsw_sp *mlxsw_sp,
1372 						const struct net_device *ol_dev,
1373 						enum mlxsw_sp_ipip_type ipipt)
1374 {
1375 	const struct mlxsw_sp_ipip_ops *ops
1376 		= mlxsw_sp->router->ipip_ops_arr[ipipt];
1377 
1378 	/* For deciding whether decap should be offloaded, we don't care about
1379 	 * overlay protocol, so ask whether either one is supported.
1380 	 */
1381 	return ops->can_offload(mlxsw_sp, ol_dev, MLXSW_SP_L3_PROTO_IPV4) ||
1382 	       ops->can_offload(mlxsw_sp, ol_dev, MLXSW_SP_L3_PROTO_IPV6);
1383 }
1384 
1385 static int mlxsw_sp_netdevice_ipip_ol_reg_event(struct mlxsw_sp *mlxsw_sp,
1386 						struct net_device *ol_dev)
1387 {
1388 	struct mlxsw_sp_ipip_entry *ipip_entry;
1389 	enum mlxsw_sp_l3proto ul_proto;
1390 	enum mlxsw_sp_ipip_type ipipt;
1391 	union mlxsw_sp_l3addr saddr;
1392 	u32 ul_tb_id;
1393 
1394 	mlxsw_sp_netdev_ipip_type(mlxsw_sp, ol_dev, &ipipt);
1395 	if (mlxsw_sp_netdevice_ipip_can_offload(mlxsw_sp, ol_dev, ipipt)) {
1396 		ul_tb_id = mlxsw_sp_ipip_dev_ul_tb_id(ol_dev);
1397 		ul_proto = mlxsw_sp->router->ipip_ops_arr[ipipt]->ul_proto;
1398 		saddr = mlxsw_sp_ipip_netdev_saddr(ul_proto, ol_dev);
1399 		if (!mlxsw_sp_ipip_demote_tunnel_by_saddr(mlxsw_sp, ul_proto,
1400 							  saddr, ul_tb_id,
1401 							  NULL)) {
1402 			ipip_entry = mlxsw_sp_ipip_entry_create(mlxsw_sp, ipipt,
1403 								ol_dev);
1404 			if (IS_ERR(ipip_entry))
1405 				return PTR_ERR(ipip_entry);
1406 		}
1407 	}
1408 
1409 	return 0;
1410 }
1411 
1412 static void mlxsw_sp_netdevice_ipip_ol_unreg_event(struct mlxsw_sp *mlxsw_sp,
1413 						   struct net_device *ol_dev)
1414 {
1415 	struct mlxsw_sp_ipip_entry *ipip_entry;
1416 
1417 	ipip_entry = mlxsw_sp_ipip_entry_find_by_ol_dev(mlxsw_sp, ol_dev);
1418 	if (ipip_entry)
1419 		mlxsw_sp_ipip_entry_destroy(mlxsw_sp, ipip_entry);
1420 }
1421 
1422 static void
1423 mlxsw_sp_ipip_entry_ol_up_event(struct mlxsw_sp *mlxsw_sp,
1424 				struct mlxsw_sp_ipip_entry *ipip_entry)
1425 {
1426 	struct mlxsw_sp_fib_entry *decap_fib_entry;
1427 
1428 	decap_fib_entry = mlxsw_sp_ipip_entry_find_decap(mlxsw_sp, ipip_entry);
1429 	if (decap_fib_entry)
1430 		mlxsw_sp_ipip_entry_promote_decap(mlxsw_sp, ipip_entry,
1431 						  decap_fib_entry);
1432 }
1433 
1434 static int
1435 mlxsw_sp_rif_ipip_lb_op(struct mlxsw_sp_rif_ipip_lb *lb_rif,
1436 			struct mlxsw_sp_vr *ul_vr, bool enable)
1437 {
1438 	struct mlxsw_sp_rif_ipip_lb_config lb_cf = lb_rif->lb_config;
1439 	struct mlxsw_sp_rif *rif = &lb_rif->common;
1440 	struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
1441 	char ritr_pl[MLXSW_REG_RITR_LEN];
1442 	u32 saddr4;
1443 
1444 	switch (lb_cf.ul_protocol) {
1445 	case MLXSW_SP_L3_PROTO_IPV4:
1446 		saddr4 = be32_to_cpu(lb_cf.saddr.addr4);
1447 		mlxsw_reg_ritr_pack(ritr_pl, enable, MLXSW_REG_RITR_LOOPBACK_IF,
1448 				    rif->rif_index, rif->vr_id, rif->dev->mtu);
1449 		mlxsw_reg_ritr_loopback_ipip4_pack(ritr_pl, lb_cf.lb_ipipt,
1450 			    MLXSW_REG_RITR_LOOPBACK_IPIP_OPTIONS_GRE_KEY_PRESET,
1451 			    ul_vr->id, saddr4, lb_cf.okey);
1452 		break;
1453 
1454 	case MLXSW_SP_L3_PROTO_IPV6:
1455 		return -EAFNOSUPPORT;
1456 	}
1457 
1458 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
1459 }
1460 
1461 static int mlxsw_sp_netdevice_ipip_ol_update_mtu(struct mlxsw_sp *mlxsw_sp,
1462 						 struct net_device *ol_dev)
1463 {
1464 	struct mlxsw_sp_ipip_entry *ipip_entry;
1465 	struct mlxsw_sp_rif_ipip_lb *lb_rif;
1466 	struct mlxsw_sp_vr *ul_vr;
1467 	int err = 0;
1468 
1469 	ipip_entry = mlxsw_sp_ipip_entry_find_by_ol_dev(mlxsw_sp, ol_dev);
1470 	if (ipip_entry) {
1471 		lb_rif = ipip_entry->ol_lb;
1472 		ul_vr = &mlxsw_sp->router->vrs[lb_rif->ul_vr_id];
1473 		err = mlxsw_sp_rif_ipip_lb_op(lb_rif, ul_vr, true);
1474 		if (err)
1475 			goto out;
1476 		lb_rif->common.mtu = ol_dev->mtu;
1477 	}
1478 
1479 out:
1480 	return err;
1481 }
1482 
1483 static void mlxsw_sp_netdevice_ipip_ol_up_event(struct mlxsw_sp *mlxsw_sp,
1484 						struct net_device *ol_dev)
1485 {
1486 	struct mlxsw_sp_ipip_entry *ipip_entry;
1487 
1488 	ipip_entry = mlxsw_sp_ipip_entry_find_by_ol_dev(mlxsw_sp, ol_dev);
1489 	if (ipip_entry)
1490 		mlxsw_sp_ipip_entry_ol_up_event(mlxsw_sp, ipip_entry);
1491 }
1492 
1493 static void
1494 mlxsw_sp_ipip_entry_ol_down_event(struct mlxsw_sp *mlxsw_sp,
1495 				  struct mlxsw_sp_ipip_entry *ipip_entry)
1496 {
1497 	if (ipip_entry->decap_fib_entry)
1498 		mlxsw_sp_ipip_entry_demote_decap(mlxsw_sp, ipip_entry);
1499 }
1500 
1501 static void mlxsw_sp_netdevice_ipip_ol_down_event(struct mlxsw_sp *mlxsw_sp,
1502 						  struct net_device *ol_dev)
1503 {
1504 	struct mlxsw_sp_ipip_entry *ipip_entry;
1505 
1506 	ipip_entry = mlxsw_sp_ipip_entry_find_by_ol_dev(mlxsw_sp, ol_dev);
1507 	if (ipip_entry)
1508 		mlxsw_sp_ipip_entry_ol_down_event(mlxsw_sp, ipip_entry);
1509 }
1510 
1511 static void mlxsw_sp_nexthop_rif_migrate(struct mlxsw_sp *mlxsw_sp,
1512 					 struct mlxsw_sp_rif *old_rif,
1513 					 struct mlxsw_sp_rif *new_rif);
1514 static int
1515 mlxsw_sp_ipip_entry_ol_lb_update(struct mlxsw_sp *mlxsw_sp,
1516 				 struct mlxsw_sp_ipip_entry *ipip_entry,
1517 				 bool keep_encap,
1518 				 struct netlink_ext_ack *extack)
1519 {
1520 	struct mlxsw_sp_rif_ipip_lb *old_lb_rif = ipip_entry->ol_lb;
1521 	struct mlxsw_sp_rif_ipip_lb *new_lb_rif;
1522 
1523 	new_lb_rif = mlxsw_sp_ipip_ol_ipip_lb_create(mlxsw_sp,
1524 						     ipip_entry->ipipt,
1525 						     ipip_entry->ol_dev,
1526 						     extack);
1527 	if (IS_ERR(new_lb_rif))
1528 		return PTR_ERR(new_lb_rif);
1529 	ipip_entry->ol_lb = new_lb_rif;
1530 
1531 	if (keep_encap)
1532 		mlxsw_sp_nexthop_rif_migrate(mlxsw_sp, &old_lb_rif->common,
1533 					     &new_lb_rif->common);
1534 
1535 	mlxsw_sp_rif_destroy(&old_lb_rif->common);
1536 
1537 	return 0;
1538 }
1539 
1540 static void mlxsw_sp_nexthop_rif_update(struct mlxsw_sp *mlxsw_sp,
1541 					struct mlxsw_sp_rif *rif);
1542 
1543 /**
1544  * Update the offload related to an IPIP entry. This always updates decap, and
1545  * in addition to that it also:
1546  * @recreate_loopback: recreates the associated loopback RIF
1547  * @keep_encap: updates next hops that use the tunnel netdevice. This is only
1548  *              relevant when recreate_loopback is true.
1549  * @update_nexthops: updates next hops, keeping the current loopback RIF. This
1550  *                   is only relevant when recreate_loopback is false.
1551  */
1552 int __mlxsw_sp_ipip_entry_update_tunnel(struct mlxsw_sp *mlxsw_sp,
1553 					struct mlxsw_sp_ipip_entry *ipip_entry,
1554 					bool recreate_loopback,
1555 					bool keep_encap,
1556 					bool update_nexthops,
1557 					struct netlink_ext_ack *extack)
1558 {
1559 	int err;
1560 
1561 	/* RIFs can't be edited, so to update loopback, we need to destroy and
1562 	 * recreate it. That creates a window of opportunity where RALUE and
1563 	 * RATR registers end up referencing a RIF that's already gone. RATRs
1564 	 * are handled in mlxsw_sp_ipip_entry_ol_lb_update(), and to take care
1565 	 * of RALUE, demote the decap route back.
1566 	 */
1567 	if (ipip_entry->decap_fib_entry)
1568 		mlxsw_sp_ipip_entry_demote_decap(mlxsw_sp, ipip_entry);
1569 
1570 	if (recreate_loopback) {
1571 		err = mlxsw_sp_ipip_entry_ol_lb_update(mlxsw_sp, ipip_entry,
1572 						       keep_encap, extack);
1573 		if (err)
1574 			return err;
1575 	} else if (update_nexthops) {
1576 		mlxsw_sp_nexthop_rif_update(mlxsw_sp,
1577 					    &ipip_entry->ol_lb->common);
1578 	}
1579 
1580 	if (ipip_entry->ol_dev->flags & IFF_UP)
1581 		mlxsw_sp_ipip_entry_ol_up_event(mlxsw_sp, ipip_entry);
1582 
1583 	return 0;
1584 }
1585 
1586 static int mlxsw_sp_netdevice_ipip_ol_vrf_event(struct mlxsw_sp *mlxsw_sp,
1587 						struct net_device *ol_dev,
1588 						struct netlink_ext_ack *extack)
1589 {
1590 	struct mlxsw_sp_ipip_entry *ipip_entry =
1591 		mlxsw_sp_ipip_entry_find_by_ol_dev(mlxsw_sp, ol_dev);
1592 	enum mlxsw_sp_l3proto ul_proto;
1593 	union mlxsw_sp_l3addr saddr;
1594 	u32 ul_tb_id;
1595 
1596 	if (!ipip_entry)
1597 		return 0;
1598 
1599 	/* For flat configuration cases, moving overlay to a different VRF might
1600 	 * cause local address conflict, and the conflicting tunnels need to be
1601 	 * demoted.
1602 	 */
1603 	ul_tb_id = mlxsw_sp_ipip_dev_ul_tb_id(ol_dev);
1604 	ul_proto = mlxsw_sp->router->ipip_ops_arr[ipip_entry->ipipt]->ul_proto;
1605 	saddr = mlxsw_sp_ipip_netdev_saddr(ul_proto, ol_dev);
1606 	if (mlxsw_sp_ipip_demote_tunnel_by_saddr(mlxsw_sp, ul_proto,
1607 						 saddr, ul_tb_id,
1608 						 ipip_entry)) {
1609 		mlxsw_sp_ipip_entry_demote_tunnel(mlxsw_sp, ipip_entry);
1610 		return 0;
1611 	}
1612 
1613 	return __mlxsw_sp_ipip_entry_update_tunnel(mlxsw_sp, ipip_entry,
1614 						   true, false, false, extack);
1615 }
1616 
1617 static int
1618 mlxsw_sp_netdevice_ipip_ul_vrf_event(struct mlxsw_sp *mlxsw_sp,
1619 				     struct mlxsw_sp_ipip_entry *ipip_entry,
1620 				     struct net_device *ul_dev,
1621 				     struct netlink_ext_ack *extack)
1622 {
1623 	return __mlxsw_sp_ipip_entry_update_tunnel(mlxsw_sp, ipip_entry,
1624 						   true, true, false, extack);
1625 }
1626 
1627 static int
1628 mlxsw_sp_netdevice_ipip_ul_up_event(struct mlxsw_sp *mlxsw_sp,
1629 				    struct mlxsw_sp_ipip_entry *ipip_entry,
1630 				    struct net_device *ul_dev)
1631 {
1632 	return __mlxsw_sp_ipip_entry_update_tunnel(mlxsw_sp, ipip_entry,
1633 						   false, false, true, NULL);
1634 }
1635 
1636 static int
1637 mlxsw_sp_netdevice_ipip_ul_down_event(struct mlxsw_sp *mlxsw_sp,
1638 				      struct mlxsw_sp_ipip_entry *ipip_entry,
1639 				      struct net_device *ul_dev)
1640 {
1641 	/* A down underlay device causes encapsulated packets to not be
1642 	 * forwarded, but decap still works. So refresh next hops without
1643 	 * touching anything else.
1644 	 */
1645 	return __mlxsw_sp_ipip_entry_update_tunnel(mlxsw_sp, ipip_entry,
1646 						   false, false, true, NULL);
1647 }
1648 
1649 static int
1650 mlxsw_sp_netdevice_ipip_ol_change_event(struct mlxsw_sp *mlxsw_sp,
1651 					struct net_device *ol_dev,
1652 					struct netlink_ext_ack *extack)
1653 {
1654 	const struct mlxsw_sp_ipip_ops *ipip_ops;
1655 	struct mlxsw_sp_ipip_entry *ipip_entry;
1656 	int err;
1657 
1658 	ipip_entry = mlxsw_sp_ipip_entry_find_by_ol_dev(mlxsw_sp, ol_dev);
1659 	if (!ipip_entry)
1660 		/* A change might make a tunnel eligible for offloading, but
1661 		 * that is currently not implemented. What falls to slow path
1662 		 * stays there.
1663 		 */
1664 		return 0;
1665 
1666 	/* A change might make a tunnel not eligible for offloading. */
1667 	if (!mlxsw_sp_netdevice_ipip_can_offload(mlxsw_sp, ol_dev,
1668 						 ipip_entry->ipipt)) {
1669 		mlxsw_sp_ipip_entry_demote_tunnel(mlxsw_sp, ipip_entry);
1670 		return 0;
1671 	}
1672 
1673 	ipip_ops = mlxsw_sp->router->ipip_ops_arr[ipip_entry->ipipt];
1674 	err = ipip_ops->ol_netdev_change(mlxsw_sp, ipip_entry, extack);
1675 	return err;
1676 }
1677 
1678 void mlxsw_sp_ipip_entry_demote_tunnel(struct mlxsw_sp *mlxsw_sp,
1679 				       struct mlxsw_sp_ipip_entry *ipip_entry)
1680 {
1681 	struct net_device *ol_dev = ipip_entry->ol_dev;
1682 
1683 	if (ol_dev->flags & IFF_UP)
1684 		mlxsw_sp_ipip_entry_ol_down_event(mlxsw_sp, ipip_entry);
1685 	mlxsw_sp_ipip_entry_destroy(mlxsw_sp, ipip_entry);
1686 }
1687 
1688 /* The configuration where several tunnels have the same local address in the
1689  * same underlay table needs special treatment in the HW. That is currently not
1690  * implemented in the driver. This function finds and demotes the first tunnel
1691  * with a given source address, except the one passed in in the argument
1692  * `except'.
1693  */
1694 bool
1695 mlxsw_sp_ipip_demote_tunnel_by_saddr(struct mlxsw_sp *mlxsw_sp,
1696 				     enum mlxsw_sp_l3proto ul_proto,
1697 				     union mlxsw_sp_l3addr saddr,
1698 				     u32 ul_tb_id,
1699 				     const struct mlxsw_sp_ipip_entry *except)
1700 {
1701 	struct mlxsw_sp_ipip_entry *ipip_entry, *tmp;
1702 
1703 	list_for_each_entry_safe(ipip_entry, tmp, &mlxsw_sp->router->ipip_list,
1704 				 ipip_list_node) {
1705 		if (ipip_entry != except &&
1706 		    mlxsw_sp_ipip_entry_saddr_matches(mlxsw_sp, ul_proto, saddr,
1707 						      ul_tb_id, ipip_entry)) {
1708 			mlxsw_sp_ipip_entry_demote_tunnel(mlxsw_sp, ipip_entry);
1709 			return true;
1710 		}
1711 	}
1712 
1713 	return false;
1714 }
1715 
1716 static void mlxsw_sp_ipip_demote_tunnel_by_ul_netdev(struct mlxsw_sp *mlxsw_sp,
1717 						     struct net_device *ul_dev)
1718 {
1719 	struct mlxsw_sp_ipip_entry *ipip_entry, *tmp;
1720 
1721 	list_for_each_entry_safe(ipip_entry, tmp, &mlxsw_sp->router->ipip_list,
1722 				 ipip_list_node) {
1723 		struct net_device *ipip_ul_dev =
1724 			__mlxsw_sp_ipip_netdev_ul_dev_get(ipip_entry->ol_dev);
1725 
1726 		if (ipip_ul_dev == ul_dev)
1727 			mlxsw_sp_ipip_entry_demote_tunnel(mlxsw_sp, ipip_entry);
1728 	}
1729 }
1730 
1731 int mlxsw_sp_netdevice_ipip_ol_event(struct mlxsw_sp *mlxsw_sp,
1732 				     struct net_device *ol_dev,
1733 				     unsigned long event,
1734 				     struct netdev_notifier_info *info)
1735 {
1736 	struct netdev_notifier_changeupper_info *chup;
1737 	struct netlink_ext_ack *extack;
1738 
1739 	switch (event) {
1740 	case NETDEV_REGISTER:
1741 		return mlxsw_sp_netdevice_ipip_ol_reg_event(mlxsw_sp, ol_dev);
1742 	case NETDEV_UNREGISTER:
1743 		mlxsw_sp_netdevice_ipip_ol_unreg_event(mlxsw_sp, ol_dev);
1744 		return 0;
1745 	case NETDEV_UP:
1746 		mlxsw_sp_netdevice_ipip_ol_up_event(mlxsw_sp, ol_dev);
1747 		return 0;
1748 	case NETDEV_DOWN:
1749 		mlxsw_sp_netdevice_ipip_ol_down_event(mlxsw_sp, ol_dev);
1750 		return 0;
1751 	case NETDEV_CHANGEUPPER:
1752 		chup = container_of(info, typeof(*chup), info);
1753 		extack = info->extack;
1754 		if (netif_is_l3_master(chup->upper_dev))
1755 			return mlxsw_sp_netdevice_ipip_ol_vrf_event(mlxsw_sp,
1756 								    ol_dev,
1757 								    extack);
1758 		return 0;
1759 	case NETDEV_CHANGE:
1760 		extack = info->extack;
1761 		return mlxsw_sp_netdevice_ipip_ol_change_event(mlxsw_sp,
1762 							       ol_dev, extack);
1763 	case NETDEV_CHANGEMTU:
1764 		return mlxsw_sp_netdevice_ipip_ol_update_mtu(mlxsw_sp, ol_dev);
1765 	}
1766 	return 0;
1767 }
1768 
1769 static int
1770 __mlxsw_sp_netdevice_ipip_ul_event(struct mlxsw_sp *mlxsw_sp,
1771 				   struct mlxsw_sp_ipip_entry *ipip_entry,
1772 				   struct net_device *ul_dev,
1773 				   unsigned long event,
1774 				   struct netdev_notifier_info *info)
1775 {
1776 	struct netdev_notifier_changeupper_info *chup;
1777 	struct netlink_ext_ack *extack;
1778 
1779 	switch (event) {
1780 	case NETDEV_CHANGEUPPER:
1781 		chup = container_of(info, typeof(*chup), info);
1782 		extack = info->extack;
1783 		if (netif_is_l3_master(chup->upper_dev))
1784 			return mlxsw_sp_netdevice_ipip_ul_vrf_event(mlxsw_sp,
1785 								    ipip_entry,
1786 								    ul_dev,
1787 								    extack);
1788 		break;
1789 
1790 	case NETDEV_UP:
1791 		return mlxsw_sp_netdevice_ipip_ul_up_event(mlxsw_sp, ipip_entry,
1792 							   ul_dev);
1793 	case NETDEV_DOWN:
1794 		return mlxsw_sp_netdevice_ipip_ul_down_event(mlxsw_sp,
1795 							     ipip_entry,
1796 							     ul_dev);
1797 	}
1798 	return 0;
1799 }
1800 
1801 int
1802 mlxsw_sp_netdevice_ipip_ul_event(struct mlxsw_sp *mlxsw_sp,
1803 				 struct net_device *ul_dev,
1804 				 unsigned long event,
1805 				 struct netdev_notifier_info *info)
1806 {
1807 	struct mlxsw_sp_ipip_entry *ipip_entry = NULL;
1808 	int err;
1809 
1810 	while ((ipip_entry = mlxsw_sp_ipip_entry_find_by_ul_dev(mlxsw_sp,
1811 								ul_dev,
1812 								ipip_entry))) {
1813 		err = __mlxsw_sp_netdevice_ipip_ul_event(mlxsw_sp, ipip_entry,
1814 							 ul_dev, event, info);
1815 		if (err) {
1816 			mlxsw_sp_ipip_demote_tunnel_by_ul_netdev(mlxsw_sp,
1817 								 ul_dev);
1818 			return err;
1819 		}
1820 	}
1821 
1822 	return 0;
1823 }
1824 
1825 int mlxsw_sp_router_nve_promote_decap(struct mlxsw_sp *mlxsw_sp, u32 ul_tb_id,
1826 				      enum mlxsw_sp_l3proto ul_proto,
1827 				      const union mlxsw_sp_l3addr *ul_sip,
1828 				      u32 tunnel_index)
1829 {
1830 	enum mlxsw_sp_fib_entry_type type = MLXSW_SP_FIB_ENTRY_TYPE_TRAP;
1831 	struct mlxsw_sp_fib_entry *fib_entry;
1832 	int err;
1833 
1834 	/* It is valid to create a tunnel with a local IP and only later
1835 	 * assign this IP address to a local interface
1836 	 */
1837 	fib_entry = mlxsw_sp_router_ip2me_fib_entry_find(mlxsw_sp, ul_tb_id,
1838 							 ul_proto, ul_sip,
1839 							 type);
1840 	if (!fib_entry)
1841 		return 0;
1842 
1843 	fib_entry->decap.tunnel_index = tunnel_index;
1844 	fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_NVE_DECAP;
1845 
1846 	err = mlxsw_sp_fib_entry_update(mlxsw_sp, fib_entry);
1847 	if (err)
1848 		goto err_fib_entry_update;
1849 
1850 	return 0;
1851 
1852 err_fib_entry_update:
1853 	fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_TRAP;
1854 	mlxsw_sp_fib_entry_update(mlxsw_sp, fib_entry);
1855 	return err;
1856 }
1857 
1858 void mlxsw_sp_router_nve_demote_decap(struct mlxsw_sp *mlxsw_sp, u32 ul_tb_id,
1859 				      enum mlxsw_sp_l3proto ul_proto,
1860 				      const union mlxsw_sp_l3addr *ul_sip)
1861 {
1862 	enum mlxsw_sp_fib_entry_type type = MLXSW_SP_FIB_ENTRY_TYPE_NVE_DECAP;
1863 	struct mlxsw_sp_fib_entry *fib_entry;
1864 
1865 	fib_entry = mlxsw_sp_router_ip2me_fib_entry_find(mlxsw_sp, ul_tb_id,
1866 							 ul_proto, ul_sip,
1867 							 type);
1868 	if (!fib_entry)
1869 		return;
1870 
1871 	fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_TRAP;
1872 	mlxsw_sp_fib_entry_update(mlxsw_sp, fib_entry);
1873 }
1874 
1875 struct mlxsw_sp_neigh_key {
1876 	struct neighbour *n;
1877 };
1878 
1879 struct mlxsw_sp_neigh_entry {
1880 	struct list_head rif_list_node;
1881 	struct rhash_head ht_node;
1882 	struct mlxsw_sp_neigh_key key;
1883 	u16 rif;
1884 	bool connected;
1885 	unsigned char ha[ETH_ALEN];
1886 	struct list_head nexthop_list; /* list of nexthops using
1887 					* this neigh entry
1888 					*/
1889 	struct list_head nexthop_neighs_list_node;
1890 	unsigned int counter_index;
1891 	bool counter_valid;
1892 };
1893 
1894 static const struct rhashtable_params mlxsw_sp_neigh_ht_params = {
1895 	.key_offset = offsetof(struct mlxsw_sp_neigh_entry, key),
1896 	.head_offset = offsetof(struct mlxsw_sp_neigh_entry, ht_node),
1897 	.key_len = sizeof(struct mlxsw_sp_neigh_key),
1898 };
1899 
1900 struct mlxsw_sp_neigh_entry *
1901 mlxsw_sp_rif_neigh_next(struct mlxsw_sp_rif *rif,
1902 			struct mlxsw_sp_neigh_entry *neigh_entry)
1903 {
1904 	if (!neigh_entry) {
1905 		if (list_empty(&rif->neigh_list))
1906 			return NULL;
1907 		else
1908 			return list_first_entry(&rif->neigh_list,
1909 						typeof(*neigh_entry),
1910 						rif_list_node);
1911 	}
1912 	if (list_is_last(&neigh_entry->rif_list_node, &rif->neigh_list))
1913 		return NULL;
1914 	return list_next_entry(neigh_entry, rif_list_node);
1915 }
1916 
1917 int mlxsw_sp_neigh_entry_type(struct mlxsw_sp_neigh_entry *neigh_entry)
1918 {
1919 	return neigh_entry->key.n->tbl->family;
1920 }
1921 
1922 unsigned char *
1923 mlxsw_sp_neigh_entry_ha(struct mlxsw_sp_neigh_entry *neigh_entry)
1924 {
1925 	return neigh_entry->ha;
1926 }
1927 
1928 u32 mlxsw_sp_neigh4_entry_dip(struct mlxsw_sp_neigh_entry *neigh_entry)
1929 {
1930 	struct neighbour *n;
1931 
1932 	n = neigh_entry->key.n;
1933 	return ntohl(*((__be32 *) n->primary_key));
1934 }
1935 
1936 struct in6_addr *
1937 mlxsw_sp_neigh6_entry_dip(struct mlxsw_sp_neigh_entry *neigh_entry)
1938 {
1939 	struct neighbour *n;
1940 
1941 	n = neigh_entry->key.n;
1942 	return (struct in6_addr *) &n->primary_key;
1943 }
1944 
1945 int mlxsw_sp_neigh_counter_get(struct mlxsw_sp *mlxsw_sp,
1946 			       struct mlxsw_sp_neigh_entry *neigh_entry,
1947 			       u64 *p_counter)
1948 {
1949 	if (!neigh_entry->counter_valid)
1950 		return -EINVAL;
1951 
1952 	return mlxsw_sp_flow_counter_get(mlxsw_sp, neigh_entry->counter_index,
1953 					 p_counter, NULL);
1954 }
1955 
1956 static struct mlxsw_sp_neigh_entry *
1957 mlxsw_sp_neigh_entry_alloc(struct mlxsw_sp *mlxsw_sp, struct neighbour *n,
1958 			   u16 rif)
1959 {
1960 	struct mlxsw_sp_neigh_entry *neigh_entry;
1961 
1962 	neigh_entry = kzalloc(sizeof(*neigh_entry), GFP_KERNEL);
1963 	if (!neigh_entry)
1964 		return NULL;
1965 
1966 	neigh_entry->key.n = n;
1967 	neigh_entry->rif = rif;
1968 	INIT_LIST_HEAD(&neigh_entry->nexthop_list);
1969 
1970 	return neigh_entry;
1971 }
1972 
1973 static void mlxsw_sp_neigh_entry_free(struct mlxsw_sp_neigh_entry *neigh_entry)
1974 {
1975 	kfree(neigh_entry);
1976 }
1977 
1978 static int
1979 mlxsw_sp_neigh_entry_insert(struct mlxsw_sp *mlxsw_sp,
1980 			    struct mlxsw_sp_neigh_entry *neigh_entry)
1981 {
1982 	return rhashtable_insert_fast(&mlxsw_sp->router->neigh_ht,
1983 				      &neigh_entry->ht_node,
1984 				      mlxsw_sp_neigh_ht_params);
1985 }
1986 
1987 static void
1988 mlxsw_sp_neigh_entry_remove(struct mlxsw_sp *mlxsw_sp,
1989 			    struct mlxsw_sp_neigh_entry *neigh_entry)
1990 {
1991 	rhashtable_remove_fast(&mlxsw_sp->router->neigh_ht,
1992 			       &neigh_entry->ht_node,
1993 			       mlxsw_sp_neigh_ht_params);
1994 }
1995 
1996 static bool
1997 mlxsw_sp_neigh_counter_should_alloc(struct mlxsw_sp *mlxsw_sp,
1998 				    struct mlxsw_sp_neigh_entry *neigh_entry)
1999 {
2000 	struct devlink *devlink;
2001 	const char *table_name;
2002 
2003 	switch (mlxsw_sp_neigh_entry_type(neigh_entry)) {
2004 	case AF_INET:
2005 		table_name = MLXSW_SP_DPIPE_TABLE_NAME_HOST4;
2006 		break;
2007 	case AF_INET6:
2008 		table_name = MLXSW_SP_DPIPE_TABLE_NAME_HOST6;
2009 		break;
2010 	default:
2011 		WARN_ON(1);
2012 		return false;
2013 	}
2014 
2015 	devlink = priv_to_devlink(mlxsw_sp->core);
2016 	return devlink_dpipe_table_counter_enabled(devlink, table_name);
2017 }
2018 
2019 static void
2020 mlxsw_sp_neigh_counter_alloc(struct mlxsw_sp *mlxsw_sp,
2021 			     struct mlxsw_sp_neigh_entry *neigh_entry)
2022 {
2023 	if (!mlxsw_sp_neigh_counter_should_alloc(mlxsw_sp, neigh_entry))
2024 		return;
2025 
2026 	if (mlxsw_sp_flow_counter_alloc(mlxsw_sp, &neigh_entry->counter_index))
2027 		return;
2028 
2029 	neigh_entry->counter_valid = true;
2030 }
2031 
2032 static void
2033 mlxsw_sp_neigh_counter_free(struct mlxsw_sp *mlxsw_sp,
2034 			    struct mlxsw_sp_neigh_entry *neigh_entry)
2035 {
2036 	if (!neigh_entry->counter_valid)
2037 		return;
2038 	mlxsw_sp_flow_counter_free(mlxsw_sp,
2039 				   neigh_entry->counter_index);
2040 	neigh_entry->counter_valid = false;
2041 }
2042 
2043 static struct mlxsw_sp_neigh_entry *
2044 mlxsw_sp_neigh_entry_create(struct mlxsw_sp *mlxsw_sp, struct neighbour *n)
2045 {
2046 	struct mlxsw_sp_neigh_entry *neigh_entry;
2047 	struct mlxsw_sp_rif *rif;
2048 	int err;
2049 
2050 	rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, n->dev);
2051 	if (!rif)
2052 		return ERR_PTR(-EINVAL);
2053 
2054 	neigh_entry = mlxsw_sp_neigh_entry_alloc(mlxsw_sp, n, rif->rif_index);
2055 	if (!neigh_entry)
2056 		return ERR_PTR(-ENOMEM);
2057 
2058 	err = mlxsw_sp_neigh_entry_insert(mlxsw_sp, neigh_entry);
2059 	if (err)
2060 		goto err_neigh_entry_insert;
2061 
2062 	mlxsw_sp_neigh_counter_alloc(mlxsw_sp, neigh_entry);
2063 	list_add(&neigh_entry->rif_list_node, &rif->neigh_list);
2064 
2065 	return neigh_entry;
2066 
2067 err_neigh_entry_insert:
2068 	mlxsw_sp_neigh_entry_free(neigh_entry);
2069 	return ERR_PTR(err);
2070 }
2071 
2072 static void
2073 mlxsw_sp_neigh_entry_destroy(struct mlxsw_sp *mlxsw_sp,
2074 			     struct mlxsw_sp_neigh_entry *neigh_entry)
2075 {
2076 	list_del(&neigh_entry->rif_list_node);
2077 	mlxsw_sp_neigh_counter_free(mlxsw_sp, neigh_entry);
2078 	mlxsw_sp_neigh_entry_remove(mlxsw_sp, neigh_entry);
2079 	mlxsw_sp_neigh_entry_free(neigh_entry);
2080 }
2081 
2082 static struct mlxsw_sp_neigh_entry *
2083 mlxsw_sp_neigh_entry_lookup(struct mlxsw_sp *mlxsw_sp, struct neighbour *n)
2084 {
2085 	struct mlxsw_sp_neigh_key key;
2086 
2087 	key.n = n;
2088 	return rhashtable_lookup_fast(&mlxsw_sp->router->neigh_ht,
2089 				      &key, mlxsw_sp_neigh_ht_params);
2090 }
2091 
2092 static void
2093 mlxsw_sp_router_neighs_update_interval_init(struct mlxsw_sp *mlxsw_sp)
2094 {
2095 	unsigned long interval;
2096 
2097 #if IS_ENABLED(CONFIG_IPV6)
2098 	interval = min_t(unsigned long,
2099 			 NEIGH_VAR(&arp_tbl.parms, DELAY_PROBE_TIME),
2100 			 NEIGH_VAR(&nd_tbl.parms, DELAY_PROBE_TIME));
2101 #else
2102 	interval = NEIGH_VAR(&arp_tbl.parms, DELAY_PROBE_TIME);
2103 #endif
2104 	mlxsw_sp->router->neighs_update.interval = jiffies_to_msecs(interval);
2105 }
2106 
2107 static void mlxsw_sp_router_neigh_ent_ipv4_process(struct mlxsw_sp *mlxsw_sp,
2108 						   char *rauhtd_pl,
2109 						   int ent_index)
2110 {
2111 	struct net_device *dev;
2112 	struct neighbour *n;
2113 	__be32 dipn;
2114 	u32 dip;
2115 	u16 rif;
2116 
2117 	mlxsw_reg_rauhtd_ent_ipv4_unpack(rauhtd_pl, ent_index, &rif, &dip);
2118 
2119 	if (!mlxsw_sp->router->rifs[rif]) {
2120 		dev_err_ratelimited(mlxsw_sp->bus_info->dev, "Incorrect RIF in neighbour entry\n");
2121 		return;
2122 	}
2123 
2124 	dipn = htonl(dip);
2125 	dev = mlxsw_sp->router->rifs[rif]->dev;
2126 	n = neigh_lookup(&arp_tbl, &dipn, dev);
2127 	if (!n)
2128 		return;
2129 
2130 	netdev_dbg(dev, "Updating neighbour with IP=%pI4h\n", &dip);
2131 	neigh_event_send(n, NULL);
2132 	neigh_release(n);
2133 }
2134 
2135 #if IS_ENABLED(CONFIG_IPV6)
2136 static void mlxsw_sp_router_neigh_ent_ipv6_process(struct mlxsw_sp *mlxsw_sp,
2137 						   char *rauhtd_pl,
2138 						   int rec_index)
2139 {
2140 	struct net_device *dev;
2141 	struct neighbour *n;
2142 	struct in6_addr dip;
2143 	u16 rif;
2144 
2145 	mlxsw_reg_rauhtd_ent_ipv6_unpack(rauhtd_pl, rec_index, &rif,
2146 					 (char *) &dip);
2147 
2148 	if (!mlxsw_sp->router->rifs[rif]) {
2149 		dev_err_ratelimited(mlxsw_sp->bus_info->dev, "Incorrect RIF in neighbour entry\n");
2150 		return;
2151 	}
2152 
2153 	dev = mlxsw_sp->router->rifs[rif]->dev;
2154 	n = neigh_lookup(&nd_tbl, &dip, dev);
2155 	if (!n)
2156 		return;
2157 
2158 	netdev_dbg(dev, "Updating neighbour with IP=%pI6c\n", &dip);
2159 	neigh_event_send(n, NULL);
2160 	neigh_release(n);
2161 }
2162 #else
2163 static void mlxsw_sp_router_neigh_ent_ipv6_process(struct mlxsw_sp *mlxsw_sp,
2164 						   char *rauhtd_pl,
2165 						   int rec_index)
2166 {
2167 }
2168 #endif
2169 
2170 static void mlxsw_sp_router_neigh_rec_ipv4_process(struct mlxsw_sp *mlxsw_sp,
2171 						   char *rauhtd_pl,
2172 						   int rec_index)
2173 {
2174 	u8 num_entries;
2175 	int i;
2176 
2177 	num_entries = mlxsw_reg_rauhtd_ipv4_rec_num_entries_get(rauhtd_pl,
2178 								rec_index);
2179 	/* Hardware starts counting at 0, so add 1. */
2180 	num_entries++;
2181 
2182 	/* Each record consists of several neighbour entries. */
2183 	for (i = 0; i < num_entries; i++) {
2184 		int ent_index;
2185 
2186 		ent_index = rec_index * MLXSW_REG_RAUHTD_IPV4_ENT_PER_REC + i;
2187 		mlxsw_sp_router_neigh_ent_ipv4_process(mlxsw_sp, rauhtd_pl,
2188 						       ent_index);
2189 	}
2190 
2191 }
2192 
2193 static void mlxsw_sp_router_neigh_rec_ipv6_process(struct mlxsw_sp *mlxsw_sp,
2194 						   char *rauhtd_pl,
2195 						   int rec_index)
2196 {
2197 	/* One record contains one entry. */
2198 	mlxsw_sp_router_neigh_ent_ipv6_process(mlxsw_sp, rauhtd_pl,
2199 					       rec_index);
2200 }
2201 
2202 static void mlxsw_sp_router_neigh_rec_process(struct mlxsw_sp *mlxsw_sp,
2203 					      char *rauhtd_pl, int rec_index)
2204 {
2205 	switch (mlxsw_reg_rauhtd_rec_type_get(rauhtd_pl, rec_index)) {
2206 	case MLXSW_REG_RAUHTD_TYPE_IPV4:
2207 		mlxsw_sp_router_neigh_rec_ipv4_process(mlxsw_sp, rauhtd_pl,
2208 						       rec_index);
2209 		break;
2210 	case MLXSW_REG_RAUHTD_TYPE_IPV6:
2211 		mlxsw_sp_router_neigh_rec_ipv6_process(mlxsw_sp, rauhtd_pl,
2212 						       rec_index);
2213 		break;
2214 	}
2215 }
2216 
2217 static bool mlxsw_sp_router_rauhtd_is_full(char *rauhtd_pl)
2218 {
2219 	u8 num_rec, last_rec_index, num_entries;
2220 
2221 	num_rec = mlxsw_reg_rauhtd_num_rec_get(rauhtd_pl);
2222 	last_rec_index = num_rec - 1;
2223 
2224 	if (num_rec < MLXSW_REG_RAUHTD_REC_MAX_NUM)
2225 		return false;
2226 	if (mlxsw_reg_rauhtd_rec_type_get(rauhtd_pl, last_rec_index) ==
2227 	    MLXSW_REG_RAUHTD_TYPE_IPV6)
2228 		return true;
2229 
2230 	num_entries = mlxsw_reg_rauhtd_ipv4_rec_num_entries_get(rauhtd_pl,
2231 								last_rec_index);
2232 	if (++num_entries == MLXSW_REG_RAUHTD_IPV4_ENT_PER_REC)
2233 		return true;
2234 	return false;
2235 }
2236 
2237 static int
2238 __mlxsw_sp_router_neighs_update_rauhtd(struct mlxsw_sp *mlxsw_sp,
2239 				       char *rauhtd_pl,
2240 				       enum mlxsw_reg_rauhtd_type type)
2241 {
2242 	int i, num_rec;
2243 	int err;
2244 
2245 	/* Make sure the neighbour's netdev isn't removed in the
2246 	 * process.
2247 	 */
2248 	rtnl_lock();
2249 	do {
2250 		mlxsw_reg_rauhtd_pack(rauhtd_pl, type);
2251 		err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(rauhtd),
2252 				      rauhtd_pl);
2253 		if (err) {
2254 			dev_err_ratelimited(mlxsw_sp->bus_info->dev, "Failed to dump neighbour table\n");
2255 			break;
2256 		}
2257 		num_rec = mlxsw_reg_rauhtd_num_rec_get(rauhtd_pl);
2258 		for (i = 0; i < num_rec; i++)
2259 			mlxsw_sp_router_neigh_rec_process(mlxsw_sp, rauhtd_pl,
2260 							  i);
2261 	} while (mlxsw_sp_router_rauhtd_is_full(rauhtd_pl));
2262 	rtnl_unlock();
2263 
2264 	return err;
2265 }
2266 
2267 static int mlxsw_sp_router_neighs_update_rauhtd(struct mlxsw_sp *mlxsw_sp)
2268 {
2269 	enum mlxsw_reg_rauhtd_type type;
2270 	char *rauhtd_pl;
2271 	int err;
2272 
2273 	rauhtd_pl = kmalloc(MLXSW_REG_RAUHTD_LEN, GFP_KERNEL);
2274 	if (!rauhtd_pl)
2275 		return -ENOMEM;
2276 
2277 	type = MLXSW_REG_RAUHTD_TYPE_IPV4;
2278 	err = __mlxsw_sp_router_neighs_update_rauhtd(mlxsw_sp, rauhtd_pl, type);
2279 	if (err)
2280 		goto out;
2281 
2282 	type = MLXSW_REG_RAUHTD_TYPE_IPV6;
2283 	err = __mlxsw_sp_router_neighs_update_rauhtd(mlxsw_sp, rauhtd_pl, type);
2284 out:
2285 	kfree(rauhtd_pl);
2286 	return err;
2287 }
2288 
2289 static void mlxsw_sp_router_neighs_update_nh(struct mlxsw_sp *mlxsw_sp)
2290 {
2291 	struct mlxsw_sp_neigh_entry *neigh_entry;
2292 
2293 	/* Take RTNL mutex here to prevent lists from changes */
2294 	rtnl_lock();
2295 	list_for_each_entry(neigh_entry, &mlxsw_sp->router->nexthop_neighs_list,
2296 			    nexthop_neighs_list_node)
2297 		/* If this neigh have nexthops, make the kernel think this neigh
2298 		 * is active regardless of the traffic.
2299 		 */
2300 		neigh_event_send(neigh_entry->key.n, NULL);
2301 	rtnl_unlock();
2302 }
2303 
2304 static void
2305 mlxsw_sp_router_neighs_update_work_schedule(struct mlxsw_sp *mlxsw_sp)
2306 {
2307 	unsigned long interval = mlxsw_sp->router->neighs_update.interval;
2308 
2309 	mlxsw_core_schedule_dw(&mlxsw_sp->router->neighs_update.dw,
2310 			       msecs_to_jiffies(interval));
2311 }
2312 
2313 static void mlxsw_sp_router_neighs_update_work(struct work_struct *work)
2314 {
2315 	struct mlxsw_sp_router *router;
2316 	int err;
2317 
2318 	router = container_of(work, struct mlxsw_sp_router,
2319 			      neighs_update.dw.work);
2320 	err = mlxsw_sp_router_neighs_update_rauhtd(router->mlxsw_sp);
2321 	if (err)
2322 		dev_err(router->mlxsw_sp->bus_info->dev, "Could not update kernel for neigh activity");
2323 
2324 	mlxsw_sp_router_neighs_update_nh(router->mlxsw_sp);
2325 
2326 	mlxsw_sp_router_neighs_update_work_schedule(router->mlxsw_sp);
2327 }
2328 
2329 static void mlxsw_sp_router_probe_unresolved_nexthops(struct work_struct *work)
2330 {
2331 	struct mlxsw_sp_neigh_entry *neigh_entry;
2332 	struct mlxsw_sp_router *router;
2333 
2334 	router = container_of(work, struct mlxsw_sp_router,
2335 			      nexthop_probe_dw.work);
2336 	/* Iterate over nexthop neighbours, find those who are unresolved and
2337 	 * send arp on them. This solves the chicken-egg problem when
2338 	 * the nexthop wouldn't get offloaded until the neighbor is resolved
2339 	 * but it wouldn't get resolved ever in case traffic is flowing in HW
2340 	 * using different nexthop.
2341 	 *
2342 	 * Take RTNL mutex here to prevent lists from changes.
2343 	 */
2344 	rtnl_lock();
2345 	list_for_each_entry(neigh_entry, &router->nexthop_neighs_list,
2346 			    nexthop_neighs_list_node)
2347 		if (!neigh_entry->connected)
2348 			neigh_event_send(neigh_entry->key.n, NULL);
2349 	rtnl_unlock();
2350 
2351 	mlxsw_core_schedule_dw(&router->nexthop_probe_dw,
2352 			       MLXSW_SP_UNRESOLVED_NH_PROBE_INTERVAL);
2353 }
2354 
2355 static void
2356 mlxsw_sp_nexthop_neigh_update(struct mlxsw_sp *mlxsw_sp,
2357 			      struct mlxsw_sp_neigh_entry *neigh_entry,
2358 			      bool removing);
2359 
2360 static enum mlxsw_reg_rauht_op mlxsw_sp_rauht_op(bool adding)
2361 {
2362 	return adding ? MLXSW_REG_RAUHT_OP_WRITE_ADD :
2363 			MLXSW_REG_RAUHT_OP_WRITE_DELETE;
2364 }
2365 
2366 static void
2367 mlxsw_sp_router_neigh_entry_op4(struct mlxsw_sp *mlxsw_sp,
2368 				struct mlxsw_sp_neigh_entry *neigh_entry,
2369 				enum mlxsw_reg_rauht_op op)
2370 {
2371 	struct neighbour *n = neigh_entry->key.n;
2372 	u32 dip = ntohl(*((__be32 *) n->primary_key));
2373 	char rauht_pl[MLXSW_REG_RAUHT_LEN];
2374 
2375 	mlxsw_reg_rauht_pack4(rauht_pl, op, neigh_entry->rif, neigh_entry->ha,
2376 			      dip);
2377 	if (neigh_entry->counter_valid)
2378 		mlxsw_reg_rauht_pack_counter(rauht_pl,
2379 					     neigh_entry->counter_index);
2380 	mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rauht), rauht_pl);
2381 }
2382 
2383 static void
2384 mlxsw_sp_router_neigh_entry_op6(struct mlxsw_sp *mlxsw_sp,
2385 				struct mlxsw_sp_neigh_entry *neigh_entry,
2386 				enum mlxsw_reg_rauht_op op)
2387 {
2388 	struct neighbour *n = neigh_entry->key.n;
2389 	char rauht_pl[MLXSW_REG_RAUHT_LEN];
2390 	const char *dip = n->primary_key;
2391 
2392 	mlxsw_reg_rauht_pack6(rauht_pl, op, neigh_entry->rif, neigh_entry->ha,
2393 			      dip);
2394 	if (neigh_entry->counter_valid)
2395 		mlxsw_reg_rauht_pack_counter(rauht_pl,
2396 					     neigh_entry->counter_index);
2397 	mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rauht), rauht_pl);
2398 }
2399 
2400 bool mlxsw_sp_neigh_ipv6_ignore(struct mlxsw_sp_neigh_entry *neigh_entry)
2401 {
2402 	struct neighbour *n = neigh_entry->key.n;
2403 
2404 	/* Packets with a link-local destination address are trapped
2405 	 * after LPM lookup and never reach the neighbour table, so
2406 	 * there is no need to program such neighbours to the device.
2407 	 */
2408 	if (ipv6_addr_type((struct in6_addr *) &n->primary_key) &
2409 	    IPV6_ADDR_LINKLOCAL)
2410 		return true;
2411 	return false;
2412 }
2413 
2414 static void
2415 mlxsw_sp_neigh_entry_update(struct mlxsw_sp *mlxsw_sp,
2416 			    struct mlxsw_sp_neigh_entry *neigh_entry,
2417 			    bool adding)
2418 {
2419 	if (!adding && !neigh_entry->connected)
2420 		return;
2421 	neigh_entry->connected = adding;
2422 	if (neigh_entry->key.n->tbl->family == AF_INET) {
2423 		mlxsw_sp_router_neigh_entry_op4(mlxsw_sp, neigh_entry,
2424 						mlxsw_sp_rauht_op(adding));
2425 	} else if (neigh_entry->key.n->tbl->family == AF_INET6) {
2426 		if (mlxsw_sp_neigh_ipv6_ignore(neigh_entry))
2427 			return;
2428 		mlxsw_sp_router_neigh_entry_op6(mlxsw_sp, neigh_entry,
2429 						mlxsw_sp_rauht_op(adding));
2430 	} else {
2431 		WARN_ON_ONCE(1);
2432 	}
2433 }
2434 
2435 void
2436 mlxsw_sp_neigh_entry_counter_update(struct mlxsw_sp *mlxsw_sp,
2437 				    struct mlxsw_sp_neigh_entry *neigh_entry,
2438 				    bool adding)
2439 {
2440 	if (adding)
2441 		mlxsw_sp_neigh_counter_alloc(mlxsw_sp, neigh_entry);
2442 	else
2443 		mlxsw_sp_neigh_counter_free(mlxsw_sp, neigh_entry);
2444 	mlxsw_sp_neigh_entry_update(mlxsw_sp, neigh_entry, true);
2445 }
2446 
2447 struct mlxsw_sp_netevent_work {
2448 	struct work_struct work;
2449 	struct mlxsw_sp *mlxsw_sp;
2450 	struct neighbour *n;
2451 };
2452 
2453 static void mlxsw_sp_router_neigh_event_work(struct work_struct *work)
2454 {
2455 	struct mlxsw_sp_netevent_work *net_work =
2456 		container_of(work, struct mlxsw_sp_netevent_work, work);
2457 	struct mlxsw_sp *mlxsw_sp = net_work->mlxsw_sp;
2458 	struct mlxsw_sp_neigh_entry *neigh_entry;
2459 	struct neighbour *n = net_work->n;
2460 	unsigned char ha[ETH_ALEN];
2461 	bool entry_connected;
2462 	u8 nud_state, dead;
2463 
2464 	/* If these parameters are changed after we release the lock,
2465 	 * then we are guaranteed to receive another event letting us
2466 	 * know about it.
2467 	 */
2468 	read_lock_bh(&n->lock);
2469 	memcpy(ha, n->ha, ETH_ALEN);
2470 	nud_state = n->nud_state;
2471 	dead = n->dead;
2472 	read_unlock_bh(&n->lock);
2473 
2474 	rtnl_lock();
2475 	mlxsw_sp_span_respin(mlxsw_sp);
2476 
2477 	entry_connected = nud_state & NUD_VALID && !dead;
2478 	neigh_entry = mlxsw_sp_neigh_entry_lookup(mlxsw_sp, n);
2479 	if (!entry_connected && !neigh_entry)
2480 		goto out;
2481 	if (!neigh_entry) {
2482 		neigh_entry = mlxsw_sp_neigh_entry_create(mlxsw_sp, n);
2483 		if (IS_ERR(neigh_entry))
2484 			goto out;
2485 	}
2486 
2487 	memcpy(neigh_entry->ha, ha, ETH_ALEN);
2488 	mlxsw_sp_neigh_entry_update(mlxsw_sp, neigh_entry, entry_connected);
2489 	mlxsw_sp_nexthop_neigh_update(mlxsw_sp, neigh_entry, !entry_connected);
2490 
2491 	if (!neigh_entry->connected && list_empty(&neigh_entry->nexthop_list))
2492 		mlxsw_sp_neigh_entry_destroy(mlxsw_sp, neigh_entry);
2493 
2494 out:
2495 	rtnl_unlock();
2496 	neigh_release(n);
2497 	kfree(net_work);
2498 }
2499 
2500 static int mlxsw_sp_mp_hash_init(struct mlxsw_sp *mlxsw_sp);
2501 
2502 static void mlxsw_sp_router_mp_hash_event_work(struct work_struct *work)
2503 {
2504 	struct mlxsw_sp_netevent_work *net_work =
2505 		container_of(work, struct mlxsw_sp_netevent_work, work);
2506 	struct mlxsw_sp *mlxsw_sp = net_work->mlxsw_sp;
2507 
2508 	mlxsw_sp_mp_hash_init(mlxsw_sp);
2509 	kfree(net_work);
2510 }
2511 
2512 static int __mlxsw_sp_router_init(struct mlxsw_sp *mlxsw_sp);
2513 
2514 static void mlxsw_sp_router_update_priority_work(struct work_struct *work)
2515 {
2516 	struct mlxsw_sp_netevent_work *net_work =
2517 		container_of(work, struct mlxsw_sp_netevent_work, work);
2518 	struct mlxsw_sp *mlxsw_sp = net_work->mlxsw_sp;
2519 
2520 	__mlxsw_sp_router_init(mlxsw_sp);
2521 	kfree(net_work);
2522 }
2523 
2524 static int mlxsw_sp_router_schedule_work(struct net *net,
2525 					 struct notifier_block *nb,
2526 					 void (*cb)(struct work_struct *))
2527 {
2528 	struct mlxsw_sp_netevent_work *net_work;
2529 	struct mlxsw_sp_router *router;
2530 
2531 	if (!net_eq(net, &init_net))
2532 		return NOTIFY_DONE;
2533 
2534 	net_work = kzalloc(sizeof(*net_work), GFP_ATOMIC);
2535 	if (!net_work)
2536 		return NOTIFY_BAD;
2537 
2538 	router = container_of(nb, struct mlxsw_sp_router, netevent_nb);
2539 	INIT_WORK(&net_work->work, cb);
2540 	net_work->mlxsw_sp = router->mlxsw_sp;
2541 	mlxsw_core_schedule_work(&net_work->work);
2542 	return NOTIFY_DONE;
2543 }
2544 
2545 static int mlxsw_sp_router_netevent_event(struct notifier_block *nb,
2546 					  unsigned long event, void *ptr)
2547 {
2548 	struct mlxsw_sp_netevent_work *net_work;
2549 	struct mlxsw_sp_port *mlxsw_sp_port;
2550 	struct mlxsw_sp *mlxsw_sp;
2551 	unsigned long interval;
2552 	struct neigh_parms *p;
2553 	struct neighbour *n;
2554 
2555 	switch (event) {
2556 	case NETEVENT_DELAY_PROBE_TIME_UPDATE:
2557 		p = ptr;
2558 
2559 		/* We don't care about changes in the default table. */
2560 		if (!p->dev || (p->tbl->family != AF_INET &&
2561 				p->tbl->family != AF_INET6))
2562 			return NOTIFY_DONE;
2563 
2564 		/* We are in atomic context and can't take RTNL mutex,
2565 		 * so use RCU variant to walk the device chain.
2566 		 */
2567 		mlxsw_sp_port = mlxsw_sp_port_lower_dev_hold(p->dev);
2568 		if (!mlxsw_sp_port)
2569 			return NOTIFY_DONE;
2570 
2571 		mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
2572 		interval = jiffies_to_msecs(NEIGH_VAR(p, DELAY_PROBE_TIME));
2573 		mlxsw_sp->router->neighs_update.interval = interval;
2574 
2575 		mlxsw_sp_port_dev_put(mlxsw_sp_port);
2576 		break;
2577 	case NETEVENT_NEIGH_UPDATE:
2578 		n = ptr;
2579 
2580 		if (n->tbl->family != AF_INET && n->tbl->family != AF_INET6)
2581 			return NOTIFY_DONE;
2582 
2583 		mlxsw_sp_port = mlxsw_sp_port_lower_dev_hold(n->dev);
2584 		if (!mlxsw_sp_port)
2585 			return NOTIFY_DONE;
2586 
2587 		net_work = kzalloc(sizeof(*net_work), GFP_ATOMIC);
2588 		if (!net_work) {
2589 			mlxsw_sp_port_dev_put(mlxsw_sp_port);
2590 			return NOTIFY_BAD;
2591 		}
2592 
2593 		INIT_WORK(&net_work->work, mlxsw_sp_router_neigh_event_work);
2594 		net_work->mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
2595 		net_work->n = n;
2596 
2597 		/* Take a reference to ensure the neighbour won't be
2598 		 * destructed until we drop the reference in delayed
2599 		 * work.
2600 		 */
2601 		neigh_clone(n);
2602 		mlxsw_core_schedule_work(&net_work->work);
2603 		mlxsw_sp_port_dev_put(mlxsw_sp_port);
2604 		break;
2605 	case NETEVENT_IPV4_MPATH_HASH_UPDATE:
2606 	case NETEVENT_IPV6_MPATH_HASH_UPDATE:
2607 		return mlxsw_sp_router_schedule_work(ptr, nb,
2608 				mlxsw_sp_router_mp_hash_event_work);
2609 
2610 	case NETEVENT_IPV4_FWD_UPDATE_PRIORITY_UPDATE:
2611 		return mlxsw_sp_router_schedule_work(ptr, nb,
2612 				mlxsw_sp_router_update_priority_work);
2613 	}
2614 
2615 	return NOTIFY_DONE;
2616 }
2617 
2618 static int mlxsw_sp_neigh_init(struct mlxsw_sp *mlxsw_sp)
2619 {
2620 	int err;
2621 
2622 	err = rhashtable_init(&mlxsw_sp->router->neigh_ht,
2623 			      &mlxsw_sp_neigh_ht_params);
2624 	if (err)
2625 		return err;
2626 
2627 	/* Initialize the polling interval according to the default
2628 	 * table.
2629 	 */
2630 	mlxsw_sp_router_neighs_update_interval_init(mlxsw_sp);
2631 
2632 	/* Create the delayed works for the activity_update */
2633 	INIT_DELAYED_WORK(&mlxsw_sp->router->neighs_update.dw,
2634 			  mlxsw_sp_router_neighs_update_work);
2635 	INIT_DELAYED_WORK(&mlxsw_sp->router->nexthop_probe_dw,
2636 			  mlxsw_sp_router_probe_unresolved_nexthops);
2637 	mlxsw_core_schedule_dw(&mlxsw_sp->router->neighs_update.dw, 0);
2638 	mlxsw_core_schedule_dw(&mlxsw_sp->router->nexthop_probe_dw, 0);
2639 	return 0;
2640 }
2641 
2642 static void mlxsw_sp_neigh_fini(struct mlxsw_sp *mlxsw_sp)
2643 {
2644 	cancel_delayed_work_sync(&mlxsw_sp->router->neighs_update.dw);
2645 	cancel_delayed_work_sync(&mlxsw_sp->router->nexthop_probe_dw);
2646 	rhashtable_destroy(&mlxsw_sp->router->neigh_ht);
2647 }
2648 
2649 static void mlxsw_sp_neigh_rif_gone_sync(struct mlxsw_sp *mlxsw_sp,
2650 					 struct mlxsw_sp_rif *rif)
2651 {
2652 	struct mlxsw_sp_neigh_entry *neigh_entry, *tmp;
2653 
2654 	list_for_each_entry_safe(neigh_entry, tmp, &rif->neigh_list,
2655 				 rif_list_node) {
2656 		mlxsw_sp_neigh_entry_update(mlxsw_sp, neigh_entry, false);
2657 		mlxsw_sp_neigh_entry_destroy(mlxsw_sp, neigh_entry);
2658 	}
2659 }
2660 
2661 enum mlxsw_sp_nexthop_type {
2662 	MLXSW_SP_NEXTHOP_TYPE_ETH,
2663 	MLXSW_SP_NEXTHOP_TYPE_IPIP,
2664 };
2665 
2666 struct mlxsw_sp_nexthop_key {
2667 	struct fib_nh *fib_nh;
2668 };
2669 
2670 struct mlxsw_sp_nexthop {
2671 	struct list_head neigh_list_node; /* member of neigh entry list */
2672 	struct list_head rif_list_node;
2673 	struct list_head router_list_node;
2674 	struct mlxsw_sp_nexthop_group *nh_grp; /* pointer back to the group
2675 						* this belongs to
2676 						*/
2677 	struct rhash_head ht_node;
2678 	struct mlxsw_sp_nexthop_key key;
2679 	unsigned char gw_addr[sizeof(struct in6_addr)];
2680 	int ifindex;
2681 	int nh_weight;
2682 	int norm_nh_weight;
2683 	int num_adj_entries;
2684 	struct mlxsw_sp_rif *rif;
2685 	u8 should_offload:1, /* set indicates this neigh is connected and
2686 			      * should be put to KVD linear area of this group.
2687 			      */
2688 	   offloaded:1, /* set in case the neigh is actually put into
2689 			 * KVD linear area of this group.
2690 			 */
2691 	   update:1; /* set indicates that MAC of this neigh should be
2692 		      * updated in HW
2693 		      */
2694 	enum mlxsw_sp_nexthop_type type;
2695 	union {
2696 		struct mlxsw_sp_neigh_entry *neigh_entry;
2697 		struct mlxsw_sp_ipip_entry *ipip_entry;
2698 	};
2699 	unsigned int counter_index;
2700 	bool counter_valid;
2701 };
2702 
2703 struct mlxsw_sp_nexthop_group {
2704 	void *priv;
2705 	struct rhash_head ht_node;
2706 	struct list_head fib_list; /* list of fib entries that use this group */
2707 	struct neigh_table *neigh_tbl;
2708 	u8 adj_index_valid:1,
2709 	   gateway:1; /* routes using the group use a gateway */
2710 	u32 adj_index;
2711 	u16 ecmp_size;
2712 	u16 count;
2713 	int sum_norm_weight;
2714 	struct mlxsw_sp_nexthop nexthops[0];
2715 #define nh_rif	nexthops[0].rif
2716 };
2717 
2718 void mlxsw_sp_nexthop_counter_alloc(struct mlxsw_sp *mlxsw_sp,
2719 				    struct mlxsw_sp_nexthop *nh)
2720 {
2721 	struct devlink *devlink;
2722 
2723 	devlink = priv_to_devlink(mlxsw_sp->core);
2724 	if (!devlink_dpipe_table_counter_enabled(devlink,
2725 						 MLXSW_SP_DPIPE_TABLE_NAME_ADJ))
2726 		return;
2727 
2728 	if (mlxsw_sp_flow_counter_alloc(mlxsw_sp, &nh->counter_index))
2729 		return;
2730 
2731 	nh->counter_valid = true;
2732 }
2733 
2734 void mlxsw_sp_nexthop_counter_free(struct mlxsw_sp *mlxsw_sp,
2735 				   struct mlxsw_sp_nexthop *nh)
2736 {
2737 	if (!nh->counter_valid)
2738 		return;
2739 	mlxsw_sp_flow_counter_free(mlxsw_sp, nh->counter_index);
2740 	nh->counter_valid = false;
2741 }
2742 
2743 int mlxsw_sp_nexthop_counter_get(struct mlxsw_sp *mlxsw_sp,
2744 				 struct mlxsw_sp_nexthop *nh, u64 *p_counter)
2745 {
2746 	if (!nh->counter_valid)
2747 		return -EINVAL;
2748 
2749 	return mlxsw_sp_flow_counter_get(mlxsw_sp, nh->counter_index,
2750 					 p_counter, NULL);
2751 }
2752 
2753 struct mlxsw_sp_nexthop *mlxsw_sp_nexthop_next(struct mlxsw_sp_router *router,
2754 					       struct mlxsw_sp_nexthop *nh)
2755 {
2756 	if (!nh) {
2757 		if (list_empty(&router->nexthop_list))
2758 			return NULL;
2759 		else
2760 			return list_first_entry(&router->nexthop_list,
2761 						typeof(*nh), router_list_node);
2762 	}
2763 	if (list_is_last(&nh->router_list_node, &router->nexthop_list))
2764 		return NULL;
2765 	return list_next_entry(nh, router_list_node);
2766 }
2767 
2768 bool mlxsw_sp_nexthop_offload(struct mlxsw_sp_nexthop *nh)
2769 {
2770 	return nh->offloaded;
2771 }
2772 
2773 unsigned char *mlxsw_sp_nexthop_ha(struct mlxsw_sp_nexthop *nh)
2774 {
2775 	if (!nh->offloaded)
2776 		return NULL;
2777 	return nh->neigh_entry->ha;
2778 }
2779 
2780 int mlxsw_sp_nexthop_indexes(struct mlxsw_sp_nexthop *nh, u32 *p_adj_index,
2781 			     u32 *p_adj_size, u32 *p_adj_hash_index)
2782 {
2783 	struct mlxsw_sp_nexthop_group *nh_grp = nh->nh_grp;
2784 	u32 adj_hash_index = 0;
2785 	int i;
2786 
2787 	if (!nh->offloaded || !nh_grp->adj_index_valid)
2788 		return -EINVAL;
2789 
2790 	*p_adj_index = nh_grp->adj_index;
2791 	*p_adj_size = nh_grp->ecmp_size;
2792 
2793 	for (i = 0; i < nh_grp->count; i++) {
2794 		struct mlxsw_sp_nexthop *nh_iter = &nh_grp->nexthops[i];
2795 
2796 		if (nh_iter == nh)
2797 			break;
2798 		if (nh_iter->offloaded)
2799 			adj_hash_index += nh_iter->num_adj_entries;
2800 	}
2801 
2802 	*p_adj_hash_index = adj_hash_index;
2803 	return 0;
2804 }
2805 
2806 struct mlxsw_sp_rif *mlxsw_sp_nexthop_rif(struct mlxsw_sp_nexthop *nh)
2807 {
2808 	return nh->rif;
2809 }
2810 
2811 bool mlxsw_sp_nexthop_group_has_ipip(struct mlxsw_sp_nexthop *nh)
2812 {
2813 	struct mlxsw_sp_nexthop_group *nh_grp = nh->nh_grp;
2814 	int i;
2815 
2816 	for (i = 0; i < nh_grp->count; i++) {
2817 		struct mlxsw_sp_nexthop *nh_iter = &nh_grp->nexthops[i];
2818 
2819 		if (nh_iter->type == MLXSW_SP_NEXTHOP_TYPE_IPIP)
2820 			return true;
2821 	}
2822 	return false;
2823 }
2824 
2825 static struct fib_info *
2826 mlxsw_sp_nexthop4_group_fi(const struct mlxsw_sp_nexthop_group *nh_grp)
2827 {
2828 	return nh_grp->priv;
2829 }
2830 
2831 struct mlxsw_sp_nexthop_group_cmp_arg {
2832 	enum mlxsw_sp_l3proto proto;
2833 	union {
2834 		struct fib_info *fi;
2835 		struct mlxsw_sp_fib6_entry *fib6_entry;
2836 	};
2837 };
2838 
2839 static bool
2840 mlxsw_sp_nexthop6_group_has_nexthop(const struct mlxsw_sp_nexthop_group *nh_grp,
2841 				    const struct in6_addr *gw, int ifindex,
2842 				    int weight)
2843 {
2844 	int i;
2845 
2846 	for (i = 0; i < nh_grp->count; i++) {
2847 		const struct mlxsw_sp_nexthop *nh;
2848 
2849 		nh = &nh_grp->nexthops[i];
2850 		if (nh->ifindex == ifindex && nh->nh_weight == weight &&
2851 		    ipv6_addr_equal(gw, (struct in6_addr *) nh->gw_addr))
2852 			return true;
2853 	}
2854 
2855 	return false;
2856 }
2857 
2858 static bool
2859 mlxsw_sp_nexthop6_group_cmp(const struct mlxsw_sp_nexthop_group *nh_grp,
2860 			    const struct mlxsw_sp_fib6_entry *fib6_entry)
2861 {
2862 	struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
2863 
2864 	if (nh_grp->count != fib6_entry->nrt6)
2865 		return false;
2866 
2867 	list_for_each_entry(mlxsw_sp_rt6, &fib6_entry->rt6_list, list) {
2868 		struct in6_addr *gw;
2869 		int ifindex, weight;
2870 
2871 		ifindex = mlxsw_sp_rt6->rt->fib6_nh.nh_dev->ifindex;
2872 		weight = mlxsw_sp_rt6->rt->fib6_nh.nh_weight;
2873 		gw = &mlxsw_sp_rt6->rt->fib6_nh.nh_gw;
2874 		if (!mlxsw_sp_nexthop6_group_has_nexthop(nh_grp, gw, ifindex,
2875 							 weight))
2876 			return false;
2877 	}
2878 
2879 	return true;
2880 }
2881 
2882 static int
2883 mlxsw_sp_nexthop_group_cmp(struct rhashtable_compare_arg *arg, const void *ptr)
2884 {
2885 	const struct mlxsw_sp_nexthop_group_cmp_arg *cmp_arg = arg->key;
2886 	const struct mlxsw_sp_nexthop_group *nh_grp = ptr;
2887 
2888 	switch (cmp_arg->proto) {
2889 	case MLXSW_SP_L3_PROTO_IPV4:
2890 		return cmp_arg->fi != mlxsw_sp_nexthop4_group_fi(nh_grp);
2891 	case MLXSW_SP_L3_PROTO_IPV6:
2892 		return !mlxsw_sp_nexthop6_group_cmp(nh_grp,
2893 						    cmp_arg->fib6_entry);
2894 	default:
2895 		WARN_ON(1);
2896 		return 1;
2897 	}
2898 }
2899 
2900 static int
2901 mlxsw_sp_nexthop_group_type(const struct mlxsw_sp_nexthop_group *nh_grp)
2902 {
2903 	return nh_grp->neigh_tbl->family;
2904 }
2905 
2906 static u32 mlxsw_sp_nexthop_group_hash_obj(const void *data, u32 len, u32 seed)
2907 {
2908 	const struct mlxsw_sp_nexthop_group *nh_grp = data;
2909 	const struct mlxsw_sp_nexthop *nh;
2910 	struct fib_info *fi;
2911 	unsigned int val;
2912 	int i;
2913 
2914 	switch (mlxsw_sp_nexthop_group_type(nh_grp)) {
2915 	case AF_INET:
2916 		fi = mlxsw_sp_nexthop4_group_fi(nh_grp);
2917 		return jhash(&fi, sizeof(fi), seed);
2918 	case AF_INET6:
2919 		val = nh_grp->count;
2920 		for (i = 0; i < nh_grp->count; i++) {
2921 			nh = &nh_grp->nexthops[i];
2922 			val ^= nh->ifindex;
2923 		}
2924 		return jhash(&val, sizeof(val), seed);
2925 	default:
2926 		WARN_ON(1);
2927 		return 0;
2928 	}
2929 }
2930 
2931 static u32
2932 mlxsw_sp_nexthop6_group_hash(struct mlxsw_sp_fib6_entry *fib6_entry, u32 seed)
2933 {
2934 	unsigned int val = fib6_entry->nrt6;
2935 	struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
2936 	struct net_device *dev;
2937 
2938 	list_for_each_entry(mlxsw_sp_rt6, &fib6_entry->rt6_list, list) {
2939 		dev = mlxsw_sp_rt6->rt->fib6_nh.nh_dev;
2940 		val ^= dev->ifindex;
2941 	}
2942 
2943 	return jhash(&val, sizeof(val), seed);
2944 }
2945 
2946 static u32
2947 mlxsw_sp_nexthop_group_hash(const void *data, u32 len, u32 seed)
2948 {
2949 	const struct mlxsw_sp_nexthop_group_cmp_arg *cmp_arg = data;
2950 
2951 	switch (cmp_arg->proto) {
2952 	case MLXSW_SP_L3_PROTO_IPV4:
2953 		return jhash(&cmp_arg->fi, sizeof(cmp_arg->fi), seed);
2954 	case MLXSW_SP_L3_PROTO_IPV6:
2955 		return mlxsw_sp_nexthop6_group_hash(cmp_arg->fib6_entry, seed);
2956 	default:
2957 		WARN_ON(1);
2958 		return 0;
2959 	}
2960 }
2961 
2962 static const struct rhashtable_params mlxsw_sp_nexthop_group_ht_params = {
2963 	.head_offset = offsetof(struct mlxsw_sp_nexthop_group, ht_node),
2964 	.hashfn	     = mlxsw_sp_nexthop_group_hash,
2965 	.obj_hashfn  = mlxsw_sp_nexthop_group_hash_obj,
2966 	.obj_cmpfn   = mlxsw_sp_nexthop_group_cmp,
2967 };
2968 
2969 static int mlxsw_sp_nexthop_group_insert(struct mlxsw_sp *mlxsw_sp,
2970 					 struct mlxsw_sp_nexthop_group *nh_grp)
2971 {
2972 	if (mlxsw_sp_nexthop_group_type(nh_grp) == AF_INET6 &&
2973 	    !nh_grp->gateway)
2974 		return 0;
2975 
2976 	return rhashtable_insert_fast(&mlxsw_sp->router->nexthop_group_ht,
2977 				      &nh_grp->ht_node,
2978 				      mlxsw_sp_nexthop_group_ht_params);
2979 }
2980 
2981 static void mlxsw_sp_nexthop_group_remove(struct mlxsw_sp *mlxsw_sp,
2982 					  struct mlxsw_sp_nexthop_group *nh_grp)
2983 {
2984 	if (mlxsw_sp_nexthop_group_type(nh_grp) == AF_INET6 &&
2985 	    !nh_grp->gateway)
2986 		return;
2987 
2988 	rhashtable_remove_fast(&mlxsw_sp->router->nexthop_group_ht,
2989 			       &nh_grp->ht_node,
2990 			       mlxsw_sp_nexthop_group_ht_params);
2991 }
2992 
2993 static struct mlxsw_sp_nexthop_group *
2994 mlxsw_sp_nexthop4_group_lookup(struct mlxsw_sp *mlxsw_sp,
2995 			       struct fib_info *fi)
2996 {
2997 	struct mlxsw_sp_nexthop_group_cmp_arg cmp_arg;
2998 
2999 	cmp_arg.proto = MLXSW_SP_L3_PROTO_IPV4;
3000 	cmp_arg.fi = fi;
3001 	return rhashtable_lookup_fast(&mlxsw_sp->router->nexthop_group_ht,
3002 				      &cmp_arg,
3003 				      mlxsw_sp_nexthop_group_ht_params);
3004 }
3005 
3006 static struct mlxsw_sp_nexthop_group *
3007 mlxsw_sp_nexthop6_group_lookup(struct mlxsw_sp *mlxsw_sp,
3008 			       struct mlxsw_sp_fib6_entry *fib6_entry)
3009 {
3010 	struct mlxsw_sp_nexthop_group_cmp_arg cmp_arg;
3011 
3012 	cmp_arg.proto = MLXSW_SP_L3_PROTO_IPV6;
3013 	cmp_arg.fib6_entry = fib6_entry;
3014 	return rhashtable_lookup_fast(&mlxsw_sp->router->nexthop_group_ht,
3015 				      &cmp_arg,
3016 				      mlxsw_sp_nexthop_group_ht_params);
3017 }
3018 
3019 static const struct rhashtable_params mlxsw_sp_nexthop_ht_params = {
3020 	.key_offset = offsetof(struct mlxsw_sp_nexthop, key),
3021 	.head_offset = offsetof(struct mlxsw_sp_nexthop, ht_node),
3022 	.key_len = sizeof(struct mlxsw_sp_nexthop_key),
3023 };
3024 
3025 static int mlxsw_sp_nexthop_insert(struct mlxsw_sp *mlxsw_sp,
3026 				   struct mlxsw_sp_nexthop *nh)
3027 {
3028 	return rhashtable_insert_fast(&mlxsw_sp->router->nexthop_ht,
3029 				      &nh->ht_node, mlxsw_sp_nexthop_ht_params);
3030 }
3031 
3032 static void mlxsw_sp_nexthop_remove(struct mlxsw_sp *mlxsw_sp,
3033 				    struct mlxsw_sp_nexthop *nh)
3034 {
3035 	rhashtable_remove_fast(&mlxsw_sp->router->nexthop_ht, &nh->ht_node,
3036 			       mlxsw_sp_nexthop_ht_params);
3037 }
3038 
3039 static struct mlxsw_sp_nexthop *
3040 mlxsw_sp_nexthop_lookup(struct mlxsw_sp *mlxsw_sp,
3041 			struct mlxsw_sp_nexthop_key key)
3042 {
3043 	return rhashtable_lookup_fast(&mlxsw_sp->router->nexthop_ht, &key,
3044 				      mlxsw_sp_nexthop_ht_params);
3045 }
3046 
3047 static int mlxsw_sp_adj_index_mass_update_vr(struct mlxsw_sp *mlxsw_sp,
3048 					     const struct mlxsw_sp_fib *fib,
3049 					     u32 adj_index, u16 ecmp_size,
3050 					     u32 new_adj_index,
3051 					     u16 new_ecmp_size)
3052 {
3053 	char raleu_pl[MLXSW_REG_RALEU_LEN];
3054 
3055 	mlxsw_reg_raleu_pack(raleu_pl,
3056 			     (enum mlxsw_reg_ralxx_protocol) fib->proto,
3057 			     fib->vr->id, adj_index, ecmp_size, new_adj_index,
3058 			     new_ecmp_size);
3059 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(raleu), raleu_pl);
3060 }
3061 
3062 static int mlxsw_sp_adj_index_mass_update(struct mlxsw_sp *mlxsw_sp,
3063 					  struct mlxsw_sp_nexthop_group *nh_grp,
3064 					  u32 old_adj_index, u16 old_ecmp_size)
3065 {
3066 	struct mlxsw_sp_fib_entry *fib_entry;
3067 	struct mlxsw_sp_fib *fib = NULL;
3068 	int err;
3069 
3070 	list_for_each_entry(fib_entry, &nh_grp->fib_list, nexthop_group_node) {
3071 		if (fib == fib_entry->fib_node->fib)
3072 			continue;
3073 		fib = fib_entry->fib_node->fib;
3074 		err = mlxsw_sp_adj_index_mass_update_vr(mlxsw_sp, fib,
3075 							old_adj_index,
3076 							old_ecmp_size,
3077 							nh_grp->adj_index,
3078 							nh_grp->ecmp_size);
3079 		if (err)
3080 			return err;
3081 	}
3082 	return 0;
3083 }
3084 
3085 static int __mlxsw_sp_nexthop_update(struct mlxsw_sp *mlxsw_sp, u32 adj_index,
3086 				     struct mlxsw_sp_nexthop *nh)
3087 {
3088 	struct mlxsw_sp_neigh_entry *neigh_entry = nh->neigh_entry;
3089 	char ratr_pl[MLXSW_REG_RATR_LEN];
3090 
3091 	mlxsw_reg_ratr_pack(ratr_pl, MLXSW_REG_RATR_OP_WRITE_WRITE_ENTRY,
3092 			    true, MLXSW_REG_RATR_TYPE_ETHERNET,
3093 			    adj_index, neigh_entry->rif);
3094 	mlxsw_reg_ratr_eth_entry_pack(ratr_pl, neigh_entry->ha);
3095 	if (nh->counter_valid)
3096 		mlxsw_reg_ratr_counter_pack(ratr_pl, nh->counter_index, true);
3097 	else
3098 		mlxsw_reg_ratr_counter_pack(ratr_pl, 0, false);
3099 
3100 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ratr), ratr_pl);
3101 }
3102 
3103 int mlxsw_sp_nexthop_update(struct mlxsw_sp *mlxsw_sp, u32 adj_index,
3104 			    struct mlxsw_sp_nexthop *nh)
3105 {
3106 	int i;
3107 
3108 	for (i = 0; i < nh->num_adj_entries; i++) {
3109 		int err;
3110 
3111 		err = __mlxsw_sp_nexthop_update(mlxsw_sp, adj_index + i, nh);
3112 		if (err)
3113 			return err;
3114 	}
3115 
3116 	return 0;
3117 }
3118 
3119 static int __mlxsw_sp_nexthop_ipip_update(struct mlxsw_sp *mlxsw_sp,
3120 					  u32 adj_index,
3121 					  struct mlxsw_sp_nexthop *nh)
3122 {
3123 	const struct mlxsw_sp_ipip_ops *ipip_ops;
3124 
3125 	ipip_ops = mlxsw_sp->router->ipip_ops_arr[nh->ipip_entry->ipipt];
3126 	return ipip_ops->nexthop_update(mlxsw_sp, adj_index, nh->ipip_entry);
3127 }
3128 
3129 static int mlxsw_sp_nexthop_ipip_update(struct mlxsw_sp *mlxsw_sp,
3130 					u32 adj_index,
3131 					struct mlxsw_sp_nexthop *nh)
3132 {
3133 	int i;
3134 
3135 	for (i = 0; i < nh->num_adj_entries; i++) {
3136 		int err;
3137 
3138 		err = __mlxsw_sp_nexthop_ipip_update(mlxsw_sp, adj_index + i,
3139 						     nh);
3140 		if (err)
3141 			return err;
3142 	}
3143 
3144 	return 0;
3145 }
3146 
3147 static int
3148 mlxsw_sp_nexthop_group_update(struct mlxsw_sp *mlxsw_sp,
3149 			      struct mlxsw_sp_nexthop_group *nh_grp,
3150 			      bool reallocate)
3151 {
3152 	u32 adj_index = nh_grp->adj_index; /* base */
3153 	struct mlxsw_sp_nexthop *nh;
3154 	int i;
3155 	int err;
3156 
3157 	for (i = 0; i < nh_grp->count; i++) {
3158 		nh = &nh_grp->nexthops[i];
3159 
3160 		if (!nh->should_offload) {
3161 			nh->offloaded = 0;
3162 			continue;
3163 		}
3164 
3165 		if (nh->update || reallocate) {
3166 			switch (nh->type) {
3167 			case MLXSW_SP_NEXTHOP_TYPE_ETH:
3168 				err = mlxsw_sp_nexthop_update
3169 					    (mlxsw_sp, adj_index, nh);
3170 				break;
3171 			case MLXSW_SP_NEXTHOP_TYPE_IPIP:
3172 				err = mlxsw_sp_nexthop_ipip_update
3173 					    (mlxsw_sp, adj_index, nh);
3174 				break;
3175 			}
3176 			if (err)
3177 				return err;
3178 			nh->update = 0;
3179 			nh->offloaded = 1;
3180 		}
3181 		adj_index += nh->num_adj_entries;
3182 	}
3183 	return 0;
3184 }
3185 
3186 static bool
3187 mlxsw_sp_fib_node_entry_is_first(const struct mlxsw_sp_fib_node *fib_node,
3188 				 const struct mlxsw_sp_fib_entry *fib_entry);
3189 
3190 static int
3191 mlxsw_sp_nexthop_fib_entries_update(struct mlxsw_sp *mlxsw_sp,
3192 				    struct mlxsw_sp_nexthop_group *nh_grp)
3193 {
3194 	struct mlxsw_sp_fib_entry *fib_entry;
3195 	int err;
3196 
3197 	list_for_each_entry(fib_entry, &nh_grp->fib_list, nexthop_group_node) {
3198 		if (!mlxsw_sp_fib_node_entry_is_first(fib_entry->fib_node,
3199 						      fib_entry))
3200 			continue;
3201 		err = mlxsw_sp_fib_entry_update(mlxsw_sp, fib_entry);
3202 		if (err)
3203 			return err;
3204 	}
3205 	return 0;
3206 }
3207 
3208 static void
3209 mlxsw_sp_fib_entry_offload_refresh(struct mlxsw_sp_fib_entry *fib_entry,
3210 				   enum mlxsw_reg_ralue_op op, int err);
3211 
3212 static void
3213 mlxsw_sp_nexthop_fib_entries_refresh(struct mlxsw_sp_nexthop_group *nh_grp)
3214 {
3215 	enum mlxsw_reg_ralue_op op = MLXSW_REG_RALUE_OP_WRITE_WRITE;
3216 	struct mlxsw_sp_fib_entry *fib_entry;
3217 
3218 	list_for_each_entry(fib_entry, &nh_grp->fib_list, nexthop_group_node) {
3219 		if (!mlxsw_sp_fib_node_entry_is_first(fib_entry->fib_node,
3220 						      fib_entry))
3221 			continue;
3222 		mlxsw_sp_fib_entry_offload_refresh(fib_entry, op, 0);
3223 	}
3224 }
3225 
3226 static void mlxsw_sp_adj_grp_size_round_up(u16 *p_adj_grp_size)
3227 {
3228 	/* Valid sizes for an adjacency group are:
3229 	 * 1-64, 512, 1024, 2048 and 4096.
3230 	 */
3231 	if (*p_adj_grp_size <= 64)
3232 		return;
3233 	else if (*p_adj_grp_size <= 512)
3234 		*p_adj_grp_size = 512;
3235 	else if (*p_adj_grp_size <= 1024)
3236 		*p_adj_grp_size = 1024;
3237 	else if (*p_adj_grp_size <= 2048)
3238 		*p_adj_grp_size = 2048;
3239 	else
3240 		*p_adj_grp_size = 4096;
3241 }
3242 
3243 static void mlxsw_sp_adj_grp_size_round_down(u16 *p_adj_grp_size,
3244 					     unsigned int alloc_size)
3245 {
3246 	if (alloc_size >= 4096)
3247 		*p_adj_grp_size = 4096;
3248 	else if (alloc_size >= 2048)
3249 		*p_adj_grp_size = 2048;
3250 	else if (alloc_size >= 1024)
3251 		*p_adj_grp_size = 1024;
3252 	else if (alloc_size >= 512)
3253 		*p_adj_grp_size = 512;
3254 }
3255 
3256 static int mlxsw_sp_fix_adj_grp_size(struct mlxsw_sp *mlxsw_sp,
3257 				     u16 *p_adj_grp_size)
3258 {
3259 	unsigned int alloc_size;
3260 	int err;
3261 
3262 	/* Round up the requested group size to the next size supported
3263 	 * by the device and make sure the request can be satisfied.
3264 	 */
3265 	mlxsw_sp_adj_grp_size_round_up(p_adj_grp_size);
3266 	err = mlxsw_sp_kvdl_alloc_count_query(mlxsw_sp,
3267 					      MLXSW_SP_KVDL_ENTRY_TYPE_ADJ,
3268 					      *p_adj_grp_size, &alloc_size);
3269 	if (err)
3270 		return err;
3271 	/* It is possible the allocation results in more allocated
3272 	 * entries than requested. Try to use as much of them as
3273 	 * possible.
3274 	 */
3275 	mlxsw_sp_adj_grp_size_round_down(p_adj_grp_size, alloc_size);
3276 
3277 	return 0;
3278 }
3279 
3280 static void
3281 mlxsw_sp_nexthop_group_normalize(struct mlxsw_sp_nexthop_group *nh_grp)
3282 {
3283 	int i, g = 0, sum_norm_weight = 0;
3284 	struct mlxsw_sp_nexthop *nh;
3285 
3286 	for (i = 0; i < nh_grp->count; i++) {
3287 		nh = &nh_grp->nexthops[i];
3288 
3289 		if (!nh->should_offload)
3290 			continue;
3291 		if (g > 0)
3292 			g = gcd(nh->nh_weight, g);
3293 		else
3294 			g = nh->nh_weight;
3295 	}
3296 
3297 	for (i = 0; i < nh_grp->count; i++) {
3298 		nh = &nh_grp->nexthops[i];
3299 
3300 		if (!nh->should_offload)
3301 			continue;
3302 		nh->norm_nh_weight = nh->nh_weight / g;
3303 		sum_norm_weight += nh->norm_nh_weight;
3304 	}
3305 
3306 	nh_grp->sum_norm_weight = sum_norm_weight;
3307 }
3308 
3309 static void
3310 mlxsw_sp_nexthop_group_rebalance(struct mlxsw_sp_nexthop_group *nh_grp)
3311 {
3312 	int total = nh_grp->sum_norm_weight;
3313 	u16 ecmp_size = nh_grp->ecmp_size;
3314 	int i, weight = 0, lower_bound = 0;
3315 
3316 	for (i = 0; i < nh_grp->count; i++) {
3317 		struct mlxsw_sp_nexthop *nh = &nh_grp->nexthops[i];
3318 		int upper_bound;
3319 
3320 		if (!nh->should_offload)
3321 			continue;
3322 		weight += nh->norm_nh_weight;
3323 		upper_bound = DIV_ROUND_CLOSEST(ecmp_size * weight, total);
3324 		nh->num_adj_entries = upper_bound - lower_bound;
3325 		lower_bound = upper_bound;
3326 	}
3327 }
3328 
3329 static void
3330 mlxsw_sp_nexthop_group_refresh(struct mlxsw_sp *mlxsw_sp,
3331 			       struct mlxsw_sp_nexthop_group *nh_grp)
3332 {
3333 	u16 ecmp_size, old_ecmp_size;
3334 	struct mlxsw_sp_nexthop *nh;
3335 	bool offload_change = false;
3336 	u32 adj_index;
3337 	bool old_adj_index_valid;
3338 	u32 old_adj_index;
3339 	int i;
3340 	int err;
3341 
3342 	if (!nh_grp->gateway) {
3343 		mlxsw_sp_nexthop_fib_entries_update(mlxsw_sp, nh_grp);
3344 		return;
3345 	}
3346 
3347 	for (i = 0; i < nh_grp->count; i++) {
3348 		nh = &nh_grp->nexthops[i];
3349 
3350 		if (nh->should_offload != nh->offloaded) {
3351 			offload_change = true;
3352 			if (nh->should_offload)
3353 				nh->update = 1;
3354 		}
3355 	}
3356 	if (!offload_change) {
3357 		/* Nothing was added or removed, so no need to reallocate. Just
3358 		 * update MAC on existing adjacency indexes.
3359 		 */
3360 		err = mlxsw_sp_nexthop_group_update(mlxsw_sp, nh_grp, false);
3361 		if (err) {
3362 			dev_warn(mlxsw_sp->bus_info->dev, "Failed to update neigh MAC in adjacency table.\n");
3363 			goto set_trap;
3364 		}
3365 		return;
3366 	}
3367 	mlxsw_sp_nexthop_group_normalize(nh_grp);
3368 	if (!nh_grp->sum_norm_weight)
3369 		/* No neigh of this group is connected so we just set
3370 		 * the trap and let everthing flow through kernel.
3371 		 */
3372 		goto set_trap;
3373 
3374 	ecmp_size = nh_grp->sum_norm_weight;
3375 	err = mlxsw_sp_fix_adj_grp_size(mlxsw_sp, &ecmp_size);
3376 	if (err)
3377 		/* No valid allocation size available. */
3378 		goto set_trap;
3379 
3380 	err = mlxsw_sp_kvdl_alloc(mlxsw_sp, MLXSW_SP_KVDL_ENTRY_TYPE_ADJ,
3381 				  ecmp_size, &adj_index);
3382 	if (err) {
3383 		/* We ran out of KVD linear space, just set the
3384 		 * trap and let everything flow through kernel.
3385 		 */
3386 		dev_warn(mlxsw_sp->bus_info->dev, "Failed to allocate KVD linear area for nexthop group.\n");
3387 		goto set_trap;
3388 	}
3389 	old_adj_index_valid = nh_grp->adj_index_valid;
3390 	old_adj_index = nh_grp->adj_index;
3391 	old_ecmp_size = nh_grp->ecmp_size;
3392 	nh_grp->adj_index_valid = 1;
3393 	nh_grp->adj_index = adj_index;
3394 	nh_grp->ecmp_size = ecmp_size;
3395 	mlxsw_sp_nexthop_group_rebalance(nh_grp);
3396 	err = mlxsw_sp_nexthop_group_update(mlxsw_sp, nh_grp, true);
3397 	if (err) {
3398 		dev_warn(mlxsw_sp->bus_info->dev, "Failed to update neigh MAC in adjacency table.\n");
3399 		goto set_trap;
3400 	}
3401 
3402 	if (!old_adj_index_valid) {
3403 		/* The trap was set for fib entries, so we have to call
3404 		 * fib entry update to unset it and use adjacency index.
3405 		 */
3406 		err = mlxsw_sp_nexthop_fib_entries_update(mlxsw_sp, nh_grp);
3407 		if (err) {
3408 			dev_warn(mlxsw_sp->bus_info->dev, "Failed to add adjacency index to fib entries.\n");
3409 			goto set_trap;
3410 		}
3411 		return;
3412 	}
3413 
3414 	err = mlxsw_sp_adj_index_mass_update(mlxsw_sp, nh_grp,
3415 					     old_adj_index, old_ecmp_size);
3416 	mlxsw_sp_kvdl_free(mlxsw_sp, MLXSW_SP_KVDL_ENTRY_TYPE_ADJ,
3417 			   old_ecmp_size, old_adj_index);
3418 	if (err) {
3419 		dev_warn(mlxsw_sp->bus_info->dev, "Failed to mass-update adjacency index for nexthop group.\n");
3420 		goto set_trap;
3421 	}
3422 
3423 	/* Offload state within the group changed, so update the flags. */
3424 	mlxsw_sp_nexthop_fib_entries_refresh(nh_grp);
3425 
3426 	return;
3427 
3428 set_trap:
3429 	old_adj_index_valid = nh_grp->adj_index_valid;
3430 	nh_grp->adj_index_valid = 0;
3431 	for (i = 0; i < nh_grp->count; i++) {
3432 		nh = &nh_grp->nexthops[i];
3433 		nh->offloaded = 0;
3434 	}
3435 	err = mlxsw_sp_nexthop_fib_entries_update(mlxsw_sp, nh_grp);
3436 	if (err)
3437 		dev_warn(mlxsw_sp->bus_info->dev, "Failed to set traps for fib entries.\n");
3438 	if (old_adj_index_valid)
3439 		mlxsw_sp_kvdl_free(mlxsw_sp, MLXSW_SP_KVDL_ENTRY_TYPE_ADJ,
3440 				   nh_grp->ecmp_size, nh_grp->adj_index);
3441 }
3442 
3443 static void __mlxsw_sp_nexthop_neigh_update(struct mlxsw_sp_nexthop *nh,
3444 					    bool removing)
3445 {
3446 	if (!removing)
3447 		nh->should_offload = 1;
3448 	else
3449 		nh->should_offload = 0;
3450 	nh->update = 1;
3451 }
3452 
3453 static void
3454 mlxsw_sp_nexthop_neigh_update(struct mlxsw_sp *mlxsw_sp,
3455 			      struct mlxsw_sp_neigh_entry *neigh_entry,
3456 			      bool removing)
3457 {
3458 	struct mlxsw_sp_nexthop *nh;
3459 
3460 	list_for_each_entry(nh, &neigh_entry->nexthop_list,
3461 			    neigh_list_node) {
3462 		__mlxsw_sp_nexthop_neigh_update(nh, removing);
3463 		mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh->nh_grp);
3464 	}
3465 }
3466 
3467 static void mlxsw_sp_nexthop_rif_init(struct mlxsw_sp_nexthop *nh,
3468 				      struct mlxsw_sp_rif *rif)
3469 {
3470 	if (nh->rif)
3471 		return;
3472 
3473 	nh->rif = rif;
3474 	list_add(&nh->rif_list_node, &rif->nexthop_list);
3475 }
3476 
3477 static void mlxsw_sp_nexthop_rif_fini(struct mlxsw_sp_nexthop *nh)
3478 {
3479 	if (!nh->rif)
3480 		return;
3481 
3482 	list_del(&nh->rif_list_node);
3483 	nh->rif = NULL;
3484 }
3485 
3486 static int mlxsw_sp_nexthop_neigh_init(struct mlxsw_sp *mlxsw_sp,
3487 				       struct mlxsw_sp_nexthop *nh)
3488 {
3489 	struct mlxsw_sp_neigh_entry *neigh_entry;
3490 	struct neighbour *n;
3491 	u8 nud_state, dead;
3492 	int err;
3493 
3494 	if (!nh->nh_grp->gateway || nh->neigh_entry)
3495 		return 0;
3496 
3497 	/* Take a reference of neigh here ensuring that neigh would
3498 	 * not be destructed before the nexthop entry is finished.
3499 	 * The reference is taken either in neigh_lookup() or
3500 	 * in neigh_create() in case n is not found.
3501 	 */
3502 	n = neigh_lookup(nh->nh_grp->neigh_tbl, &nh->gw_addr, nh->rif->dev);
3503 	if (!n) {
3504 		n = neigh_create(nh->nh_grp->neigh_tbl, &nh->gw_addr,
3505 				 nh->rif->dev);
3506 		if (IS_ERR(n))
3507 			return PTR_ERR(n);
3508 		neigh_event_send(n, NULL);
3509 	}
3510 	neigh_entry = mlxsw_sp_neigh_entry_lookup(mlxsw_sp, n);
3511 	if (!neigh_entry) {
3512 		neigh_entry = mlxsw_sp_neigh_entry_create(mlxsw_sp, n);
3513 		if (IS_ERR(neigh_entry)) {
3514 			err = -EINVAL;
3515 			goto err_neigh_entry_create;
3516 		}
3517 	}
3518 
3519 	/* If that is the first nexthop connected to that neigh, add to
3520 	 * nexthop_neighs_list
3521 	 */
3522 	if (list_empty(&neigh_entry->nexthop_list))
3523 		list_add_tail(&neigh_entry->nexthop_neighs_list_node,
3524 			      &mlxsw_sp->router->nexthop_neighs_list);
3525 
3526 	nh->neigh_entry = neigh_entry;
3527 	list_add_tail(&nh->neigh_list_node, &neigh_entry->nexthop_list);
3528 	read_lock_bh(&n->lock);
3529 	nud_state = n->nud_state;
3530 	dead = n->dead;
3531 	read_unlock_bh(&n->lock);
3532 	__mlxsw_sp_nexthop_neigh_update(nh, !(nud_state & NUD_VALID && !dead));
3533 
3534 	return 0;
3535 
3536 err_neigh_entry_create:
3537 	neigh_release(n);
3538 	return err;
3539 }
3540 
3541 static void mlxsw_sp_nexthop_neigh_fini(struct mlxsw_sp *mlxsw_sp,
3542 					struct mlxsw_sp_nexthop *nh)
3543 {
3544 	struct mlxsw_sp_neigh_entry *neigh_entry = nh->neigh_entry;
3545 	struct neighbour *n;
3546 
3547 	if (!neigh_entry)
3548 		return;
3549 	n = neigh_entry->key.n;
3550 
3551 	__mlxsw_sp_nexthop_neigh_update(nh, true);
3552 	list_del(&nh->neigh_list_node);
3553 	nh->neigh_entry = NULL;
3554 
3555 	/* If that is the last nexthop connected to that neigh, remove from
3556 	 * nexthop_neighs_list
3557 	 */
3558 	if (list_empty(&neigh_entry->nexthop_list))
3559 		list_del(&neigh_entry->nexthop_neighs_list_node);
3560 
3561 	if (!neigh_entry->connected && list_empty(&neigh_entry->nexthop_list))
3562 		mlxsw_sp_neigh_entry_destroy(mlxsw_sp, neigh_entry);
3563 
3564 	neigh_release(n);
3565 }
3566 
3567 static bool mlxsw_sp_ipip_netdev_ul_up(struct net_device *ol_dev)
3568 {
3569 	struct net_device *ul_dev = __mlxsw_sp_ipip_netdev_ul_dev_get(ol_dev);
3570 
3571 	return ul_dev ? (ul_dev->flags & IFF_UP) : true;
3572 }
3573 
3574 static void mlxsw_sp_nexthop_ipip_init(struct mlxsw_sp *mlxsw_sp,
3575 				       struct mlxsw_sp_nexthop *nh,
3576 				       struct mlxsw_sp_ipip_entry *ipip_entry)
3577 {
3578 	bool removing;
3579 
3580 	if (!nh->nh_grp->gateway || nh->ipip_entry)
3581 		return;
3582 
3583 	nh->ipip_entry = ipip_entry;
3584 	removing = !mlxsw_sp_ipip_netdev_ul_up(ipip_entry->ol_dev);
3585 	__mlxsw_sp_nexthop_neigh_update(nh, removing);
3586 	mlxsw_sp_nexthop_rif_init(nh, &ipip_entry->ol_lb->common);
3587 }
3588 
3589 static void mlxsw_sp_nexthop_ipip_fini(struct mlxsw_sp *mlxsw_sp,
3590 				       struct mlxsw_sp_nexthop *nh)
3591 {
3592 	struct mlxsw_sp_ipip_entry *ipip_entry = nh->ipip_entry;
3593 
3594 	if (!ipip_entry)
3595 		return;
3596 
3597 	__mlxsw_sp_nexthop_neigh_update(nh, true);
3598 	nh->ipip_entry = NULL;
3599 }
3600 
3601 static bool mlxsw_sp_nexthop4_ipip_type(const struct mlxsw_sp *mlxsw_sp,
3602 					const struct fib_nh *fib_nh,
3603 					enum mlxsw_sp_ipip_type *p_ipipt)
3604 {
3605 	struct net_device *dev = fib_nh->nh_dev;
3606 
3607 	return dev &&
3608 	       fib_nh->nh_parent->fib_type == RTN_UNICAST &&
3609 	       mlxsw_sp_netdev_ipip_type(mlxsw_sp, dev, p_ipipt);
3610 }
3611 
3612 static void mlxsw_sp_nexthop_type_fini(struct mlxsw_sp *mlxsw_sp,
3613 				       struct mlxsw_sp_nexthop *nh)
3614 {
3615 	switch (nh->type) {
3616 	case MLXSW_SP_NEXTHOP_TYPE_ETH:
3617 		mlxsw_sp_nexthop_neigh_fini(mlxsw_sp, nh);
3618 		mlxsw_sp_nexthop_rif_fini(nh);
3619 		break;
3620 	case MLXSW_SP_NEXTHOP_TYPE_IPIP:
3621 		mlxsw_sp_nexthop_rif_fini(nh);
3622 		mlxsw_sp_nexthop_ipip_fini(mlxsw_sp, nh);
3623 		break;
3624 	}
3625 }
3626 
3627 static int mlxsw_sp_nexthop4_type_init(struct mlxsw_sp *mlxsw_sp,
3628 				       struct mlxsw_sp_nexthop *nh,
3629 				       struct fib_nh *fib_nh)
3630 {
3631 	const struct mlxsw_sp_ipip_ops *ipip_ops;
3632 	struct net_device *dev = fib_nh->nh_dev;
3633 	struct mlxsw_sp_ipip_entry *ipip_entry;
3634 	struct mlxsw_sp_rif *rif;
3635 	int err;
3636 
3637 	ipip_entry = mlxsw_sp_ipip_entry_find_by_ol_dev(mlxsw_sp, dev);
3638 	if (ipip_entry) {
3639 		ipip_ops = mlxsw_sp->router->ipip_ops_arr[ipip_entry->ipipt];
3640 		if (ipip_ops->can_offload(mlxsw_sp, dev,
3641 					  MLXSW_SP_L3_PROTO_IPV4)) {
3642 			nh->type = MLXSW_SP_NEXTHOP_TYPE_IPIP;
3643 			mlxsw_sp_nexthop_ipip_init(mlxsw_sp, nh, ipip_entry);
3644 			return 0;
3645 		}
3646 	}
3647 
3648 	nh->type = MLXSW_SP_NEXTHOP_TYPE_ETH;
3649 	rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev);
3650 	if (!rif)
3651 		return 0;
3652 
3653 	mlxsw_sp_nexthop_rif_init(nh, rif);
3654 	err = mlxsw_sp_nexthop_neigh_init(mlxsw_sp, nh);
3655 	if (err)
3656 		goto err_neigh_init;
3657 
3658 	return 0;
3659 
3660 err_neigh_init:
3661 	mlxsw_sp_nexthop_rif_fini(nh);
3662 	return err;
3663 }
3664 
3665 static void mlxsw_sp_nexthop4_type_fini(struct mlxsw_sp *mlxsw_sp,
3666 					struct mlxsw_sp_nexthop *nh)
3667 {
3668 	mlxsw_sp_nexthop_type_fini(mlxsw_sp, nh);
3669 }
3670 
3671 static int mlxsw_sp_nexthop4_init(struct mlxsw_sp *mlxsw_sp,
3672 				  struct mlxsw_sp_nexthop_group *nh_grp,
3673 				  struct mlxsw_sp_nexthop *nh,
3674 				  struct fib_nh *fib_nh)
3675 {
3676 	struct net_device *dev = fib_nh->nh_dev;
3677 	struct in_device *in_dev;
3678 	int err;
3679 
3680 	nh->nh_grp = nh_grp;
3681 	nh->key.fib_nh = fib_nh;
3682 #ifdef CONFIG_IP_ROUTE_MULTIPATH
3683 	nh->nh_weight = fib_nh->nh_weight;
3684 #else
3685 	nh->nh_weight = 1;
3686 #endif
3687 	memcpy(&nh->gw_addr, &fib_nh->nh_gw, sizeof(fib_nh->nh_gw));
3688 	err = mlxsw_sp_nexthop_insert(mlxsw_sp, nh);
3689 	if (err)
3690 		return err;
3691 
3692 	mlxsw_sp_nexthop_counter_alloc(mlxsw_sp, nh);
3693 	list_add_tail(&nh->router_list_node, &mlxsw_sp->router->nexthop_list);
3694 
3695 	if (!dev)
3696 		return 0;
3697 
3698 	in_dev = __in_dev_get_rtnl(dev);
3699 	if (in_dev && IN_DEV_IGNORE_ROUTES_WITH_LINKDOWN(in_dev) &&
3700 	    fib_nh->nh_flags & RTNH_F_LINKDOWN)
3701 		return 0;
3702 
3703 	err = mlxsw_sp_nexthop4_type_init(mlxsw_sp, nh, fib_nh);
3704 	if (err)
3705 		goto err_nexthop_neigh_init;
3706 
3707 	return 0;
3708 
3709 err_nexthop_neigh_init:
3710 	mlxsw_sp_nexthop_remove(mlxsw_sp, nh);
3711 	return err;
3712 }
3713 
3714 static void mlxsw_sp_nexthop4_fini(struct mlxsw_sp *mlxsw_sp,
3715 				   struct mlxsw_sp_nexthop *nh)
3716 {
3717 	mlxsw_sp_nexthop4_type_fini(mlxsw_sp, nh);
3718 	list_del(&nh->router_list_node);
3719 	mlxsw_sp_nexthop_counter_free(mlxsw_sp, nh);
3720 	mlxsw_sp_nexthop_remove(mlxsw_sp, nh);
3721 }
3722 
3723 static void mlxsw_sp_nexthop4_event(struct mlxsw_sp *mlxsw_sp,
3724 				    unsigned long event, struct fib_nh *fib_nh)
3725 {
3726 	struct mlxsw_sp_nexthop_key key;
3727 	struct mlxsw_sp_nexthop *nh;
3728 
3729 	if (mlxsw_sp->router->aborted)
3730 		return;
3731 
3732 	key.fib_nh = fib_nh;
3733 	nh = mlxsw_sp_nexthop_lookup(mlxsw_sp, key);
3734 	if (WARN_ON_ONCE(!nh))
3735 		return;
3736 
3737 	switch (event) {
3738 	case FIB_EVENT_NH_ADD:
3739 		mlxsw_sp_nexthop4_type_init(mlxsw_sp, nh, fib_nh);
3740 		break;
3741 	case FIB_EVENT_NH_DEL:
3742 		mlxsw_sp_nexthop4_type_fini(mlxsw_sp, nh);
3743 		break;
3744 	}
3745 
3746 	mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh->nh_grp);
3747 }
3748 
3749 static void mlxsw_sp_nexthop_rif_update(struct mlxsw_sp *mlxsw_sp,
3750 					struct mlxsw_sp_rif *rif)
3751 {
3752 	struct mlxsw_sp_nexthop *nh;
3753 	bool removing;
3754 
3755 	list_for_each_entry(nh, &rif->nexthop_list, rif_list_node) {
3756 		switch (nh->type) {
3757 		case MLXSW_SP_NEXTHOP_TYPE_ETH:
3758 			removing = false;
3759 			break;
3760 		case MLXSW_SP_NEXTHOP_TYPE_IPIP:
3761 			removing = !mlxsw_sp_ipip_netdev_ul_up(rif->dev);
3762 			break;
3763 		default:
3764 			WARN_ON(1);
3765 			continue;
3766 		}
3767 
3768 		__mlxsw_sp_nexthop_neigh_update(nh, removing);
3769 		mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh->nh_grp);
3770 	}
3771 }
3772 
3773 static void mlxsw_sp_nexthop_rif_migrate(struct mlxsw_sp *mlxsw_sp,
3774 					 struct mlxsw_sp_rif *old_rif,
3775 					 struct mlxsw_sp_rif *new_rif)
3776 {
3777 	struct mlxsw_sp_nexthop *nh;
3778 
3779 	list_splice_init(&old_rif->nexthop_list, &new_rif->nexthop_list);
3780 	list_for_each_entry(nh, &new_rif->nexthop_list, rif_list_node)
3781 		nh->rif = new_rif;
3782 	mlxsw_sp_nexthop_rif_update(mlxsw_sp, new_rif);
3783 }
3784 
3785 static void mlxsw_sp_nexthop_rif_gone_sync(struct mlxsw_sp *mlxsw_sp,
3786 					   struct mlxsw_sp_rif *rif)
3787 {
3788 	struct mlxsw_sp_nexthop *nh, *tmp;
3789 
3790 	list_for_each_entry_safe(nh, tmp, &rif->nexthop_list, rif_list_node) {
3791 		mlxsw_sp_nexthop_type_fini(mlxsw_sp, nh);
3792 		mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh->nh_grp);
3793 	}
3794 }
3795 
3796 static bool mlxsw_sp_fi_is_gateway(const struct mlxsw_sp *mlxsw_sp,
3797 				   const struct fib_info *fi)
3798 {
3799 	return fi->fib_nh->nh_scope == RT_SCOPE_LINK ||
3800 	       mlxsw_sp_nexthop4_ipip_type(mlxsw_sp, fi->fib_nh, NULL);
3801 }
3802 
3803 static struct mlxsw_sp_nexthop_group *
3804 mlxsw_sp_nexthop4_group_create(struct mlxsw_sp *mlxsw_sp, struct fib_info *fi)
3805 {
3806 	struct mlxsw_sp_nexthop_group *nh_grp;
3807 	struct mlxsw_sp_nexthop *nh;
3808 	struct fib_nh *fib_nh;
3809 	size_t alloc_size;
3810 	int i;
3811 	int err;
3812 
3813 	alloc_size = sizeof(*nh_grp) +
3814 		     fi->fib_nhs * sizeof(struct mlxsw_sp_nexthop);
3815 	nh_grp = kzalloc(alloc_size, GFP_KERNEL);
3816 	if (!nh_grp)
3817 		return ERR_PTR(-ENOMEM);
3818 	nh_grp->priv = fi;
3819 	INIT_LIST_HEAD(&nh_grp->fib_list);
3820 	nh_grp->neigh_tbl = &arp_tbl;
3821 
3822 	nh_grp->gateway = mlxsw_sp_fi_is_gateway(mlxsw_sp, fi);
3823 	nh_grp->count = fi->fib_nhs;
3824 	fib_info_hold(fi);
3825 	for (i = 0; i < nh_grp->count; i++) {
3826 		nh = &nh_grp->nexthops[i];
3827 		fib_nh = &fi->fib_nh[i];
3828 		err = mlxsw_sp_nexthop4_init(mlxsw_sp, nh_grp, nh, fib_nh);
3829 		if (err)
3830 			goto err_nexthop4_init;
3831 	}
3832 	err = mlxsw_sp_nexthop_group_insert(mlxsw_sp, nh_grp);
3833 	if (err)
3834 		goto err_nexthop_group_insert;
3835 	mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh_grp);
3836 	return nh_grp;
3837 
3838 err_nexthop_group_insert:
3839 err_nexthop4_init:
3840 	for (i--; i >= 0; i--) {
3841 		nh = &nh_grp->nexthops[i];
3842 		mlxsw_sp_nexthop4_fini(mlxsw_sp, nh);
3843 	}
3844 	fib_info_put(fi);
3845 	kfree(nh_grp);
3846 	return ERR_PTR(err);
3847 }
3848 
3849 static void
3850 mlxsw_sp_nexthop4_group_destroy(struct mlxsw_sp *mlxsw_sp,
3851 				struct mlxsw_sp_nexthop_group *nh_grp)
3852 {
3853 	struct mlxsw_sp_nexthop *nh;
3854 	int i;
3855 
3856 	mlxsw_sp_nexthop_group_remove(mlxsw_sp, nh_grp);
3857 	for (i = 0; i < nh_grp->count; i++) {
3858 		nh = &nh_grp->nexthops[i];
3859 		mlxsw_sp_nexthop4_fini(mlxsw_sp, nh);
3860 	}
3861 	mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh_grp);
3862 	WARN_ON_ONCE(nh_grp->adj_index_valid);
3863 	fib_info_put(mlxsw_sp_nexthop4_group_fi(nh_grp));
3864 	kfree(nh_grp);
3865 }
3866 
3867 static int mlxsw_sp_nexthop4_group_get(struct mlxsw_sp *mlxsw_sp,
3868 				       struct mlxsw_sp_fib_entry *fib_entry,
3869 				       struct fib_info *fi)
3870 {
3871 	struct mlxsw_sp_nexthop_group *nh_grp;
3872 
3873 	nh_grp = mlxsw_sp_nexthop4_group_lookup(mlxsw_sp, fi);
3874 	if (!nh_grp) {
3875 		nh_grp = mlxsw_sp_nexthop4_group_create(mlxsw_sp, fi);
3876 		if (IS_ERR(nh_grp))
3877 			return PTR_ERR(nh_grp);
3878 	}
3879 	list_add_tail(&fib_entry->nexthop_group_node, &nh_grp->fib_list);
3880 	fib_entry->nh_group = nh_grp;
3881 	return 0;
3882 }
3883 
3884 static void mlxsw_sp_nexthop4_group_put(struct mlxsw_sp *mlxsw_sp,
3885 					struct mlxsw_sp_fib_entry *fib_entry)
3886 {
3887 	struct mlxsw_sp_nexthop_group *nh_grp = fib_entry->nh_group;
3888 
3889 	list_del(&fib_entry->nexthop_group_node);
3890 	if (!list_empty(&nh_grp->fib_list))
3891 		return;
3892 	mlxsw_sp_nexthop4_group_destroy(mlxsw_sp, nh_grp);
3893 }
3894 
3895 static bool
3896 mlxsw_sp_fib4_entry_should_offload(const struct mlxsw_sp_fib_entry *fib_entry)
3897 {
3898 	struct mlxsw_sp_fib4_entry *fib4_entry;
3899 
3900 	fib4_entry = container_of(fib_entry, struct mlxsw_sp_fib4_entry,
3901 				  common);
3902 	return !fib4_entry->tos;
3903 }
3904 
3905 static bool
3906 mlxsw_sp_fib_entry_should_offload(const struct mlxsw_sp_fib_entry *fib_entry)
3907 {
3908 	struct mlxsw_sp_nexthop_group *nh_group = fib_entry->nh_group;
3909 
3910 	switch (fib_entry->fib_node->fib->proto) {
3911 	case MLXSW_SP_L3_PROTO_IPV4:
3912 		if (!mlxsw_sp_fib4_entry_should_offload(fib_entry))
3913 			return false;
3914 		break;
3915 	case MLXSW_SP_L3_PROTO_IPV6:
3916 		break;
3917 	}
3918 
3919 	switch (fib_entry->type) {
3920 	case MLXSW_SP_FIB_ENTRY_TYPE_REMOTE:
3921 		return !!nh_group->adj_index_valid;
3922 	case MLXSW_SP_FIB_ENTRY_TYPE_LOCAL:
3923 		return !!nh_group->nh_rif;
3924 	case MLXSW_SP_FIB_ENTRY_TYPE_IPIP_DECAP:
3925 	case MLXSW_SP_FIB_ENTRY_TYPE_NVE_DECAP:
3926 		return true;
3927 	default:
3928 		return false;
3929 	}
3930 }
3931 
3932 static struct mlxsw_sp_nexthop *
3933 mlxsw_sp_rt6_nexthop(struct mlxsw_sp_nexthop_group *nh_grp,
3934 		     const struct mlxsw_sp_rt6 *mlxsw_sp_rt6)
3935 {
3936 	int i;
3937 
3938 	for (i = 0; i < nh_grp->count; i++) {
3939 		struct mlxsw_sp_nexthop *nh = &nh_grp->nexthops[i];
3940 		struct fib6_info *rt = mlxsw_sp_rt6->rt;
3941 
3942 		if (nh->rif && nh->rif->dev == rt->fib6_nh.nh_dev &&
3943 		    ipv6_addr_equal((const struct in6_addr *) &nh->gw_addr,
3944 				    &rt->fib6_nh.nh_gw))
3945 			return nh;
3946 		continue;
3947 	}
3948 
3949 	return NULL;
3950 }
3951 
3952 static void
3953 mlxsw_sp_fib4_entry_offload_set(struct mlxsw_sp_fib_entry *fib_entry)
3954 {
3955 	struct mlxsw_sp_nexthop_group *nh_grp = fib_entry->nh_group;
3956 	int i;
3957 
3958 	if (fib_entry->type == MLXSW_SP_FIB_ENTRY_TYPE_LOCAL ||
3959 	    fib_entry->type == MLXSW_SP_FIB_ENTRY_TYPE_IPIP_DECAP ||
3960 	    fib_entry->type == MLXSW_SP_FIB_ENTRY_TYPE_NVE_DECAP) {
3961 		nh_grp->nexthops->key.fib_nh->nh_flags |= RTNH_F_OFFLOAD;
3962 		return;
3963 	}
3964 
3965 	for (i = 0; i < nh_grp->count; i++) {
3966 		struct mlxsw_sp_nexthop *nh = &nh_grp->nexthops[i];
3967 
3968 		if (nh->offloaded)
3969 			nh->key.fib_nh->nh_flags |= RTNH_F_OFFLOAD;
3970 		else
3971 			nh->key.fib_nh->nh_flags &= ~RTNH_F_OFFLOAD;
3972 	}
3973 }
3974 
3975 static void
3976 mlxsw_sp_fib4_entry_offload_unset(struct mlxsw_sp_fib_entry *fib_entry)
3977 {
3978 	struct mlxsw_sp_nexthop_group *nh_grp = fib_entry->nh_group;
3979 	int i;
3980 
3981 	if (!list_is_singular(&nh_grp->fib_list))
3982 		return;
3983 
3984 	for (i = 0; i < nh_grp->count; i++) {
3985 		struct mlxsw_sp_nexthop *nh = &nh_grp->nexthops[i];
3986 
3987 		nh->key.fib_nh->nh_flags &= ~RTNH_F_OFFLOAD;
3988 	}
3989 }
3990 
3991 static void
3992 mlxsw_sp_fib6_entry_offload_set(struct mlxsw_sp_fib_entry *fib_entry)
3993 {
3994 	struct mlxsw_sp_fib6_entry *fib6_entry;
3995 	struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
3996 
3997 	fib6_entry = container_of(fib_entry, struct mlxsw_sp_fib6_entry,
3998 				  common);
3999 
4000 	if (fib_entry->type == MLXSW_SP_FIB_ENTRY_TYPE_LOCAL) {
4001 		list_first_entry(&fib6_entry->rt6_list, struct mlxsw_sp_rt6,
4002 				 list)->rt->fib6_nh.nh_flags |= RTNH_F_OFFLOAD;
4003 		return;
4004 	}
4005 
4006 	list_for_each_entry(mlxsw_sp_rt6, &fib6_entry->rt6_list, list) {
4007 		struct mlxsw_sp_nexthop_group *nh_grp = fib_entry->nh_group;
4008 		struct mlxsw_sp_nexthop *nh;
4009 
4010 		nh = mlxsw_sp_rt6_nexthop(nh_grp, mlxsw_sp_rt6);
4011 		if (nh && nh->offloaded)
4012 			mlxsw_sp_rt6->rt->fib6_nh.nh_flags |= RTNH_F_OFFLOAD;
4013 		else
4014 			mlxsw_sp_rt6->rt->fib6_nh.nh_flags &= ~RTNH_F_OFFLOAD;
4015 	}
4016 }
4017 
4018 static void
4019 mlxsw_sp_fib6_entry_offload_unset(struct mlxsw_sp_fib_entry *fib_entry)
4020 {
4021 	struct mlxsw_sp_fib6_entry *fib6_entry;
4022 	struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
4023 
4024 	fib6_entry = container_of(fib_entry, struct mlxsw_sp_fib6_entry,
4025 				  common);
4026 	list_for_each_entry(mlxsw_sp_rt6, &fib6_entry->rt6_list, list) {
4027 		struct fib6_info *rt = mlxsw_sp_rt6->rt;
4028 
4029 		rt->fib6_nh.nh_flags &= ~RTNH_F_OFFLOAD;
4030 	}
4031 }
4032 
4033 static void mlxsw_sp_fib_entry_offload_set(struct mlxsw_sp_fib_entry *fib_entry)
4034 {
4035 	switch (fib_entry->fib_node->fib->proto) {
4036 	case MLXSW_SP_L3_PROTO_IPV4:
4037 		mlxsw_sp_fib4_entry_offload_set(fib_entry);
4038 		break;
4039 	case MLXSW_SP_L3_PROTO_IPV6:
4040 		mlxsw_sp_fib6_entry_offload_set(fib_entry);
4041 		break;
4042 	}
4043 }
4044 
4045 static void
4046 mlxsw_sp_fib_entry_offload_unset(struct mlxsw_sp_fib_entry *fib_entry)
4047 {
4048 	switch (fib_entry->fib_node->fib->proto) {
4049 	case MLXSW_SP_L3_PROTO_IPV4:
4050 		mlxsw_sp_fib4_entry_offload_unset(fib_entry);
4051 		break;
4052 	case MLXSW_SP_L3_PROTO_IPV6:
4053 		mlxsw_sp_fib6_entry_offload_unset(fib_entry);
4054 		break;
4055 	}
4056 }
4057 
4058 static void
4059 mlxsw_sp_fib_entry_offload_refresh(struct mlxsw_sp_fib_entry *fib_entry,
4060 				   enum mlxsw_reg_ralue_op op, int err)
4061 {
4062 	switch (op) {
4063 	case MLXSW_REG_RALUE_OP_WRITE_DELETE:
4064 		return mlxsw_sp_fib_entry_offload_unset(fib_entry);
4065 	case MLXSW_REG_RALUE_OP_WRITE_WRITE:
4066 		if (err)
4067 			return;
4068 		if (mlxsw_sp_fib_entry_should_offload(fib_entry))
4069 			mlxsw_sp_fib_entry_offload_set(fib_entry);
4070 		else
4071 			mlxsw_sp_fib_entry_offload_unset(fib_entry);
4072 		return;
4073 	default:
4074 		return;
4075 	}
4076 }
4077 
4078 static void
4079 mlxsw_sp_fib_entry_ralue_pack(char *ralue_pl,
4080 			      const struct mlxsw_sp_fib_entry *fib_entry,
4081 			      enum mlxsw_reg_ralue_op op)
4082 {
4083 	struct mlxsw_sp_fib *fib = fib_entry->fib_node->fib;
4084 	enum mlxsw_reg_ralxx_protocol proto;
4085 	u32 *p_dip;
4086 
4087 	proto = (enum mlxsw_reg_ralxx_protocol) fib->proto;
4088 
4089 	switch (fib->proto) {
4090 	case MLXSW_SP_L3_PROTO_IPV4:
4091 		p_dip = (u32 *) fib_entry->fib_node->key.addr;
4092 		mlxsw_reg_ralue_pack4(ralue_pl, proto, op, fib->vr->id,
4093 				      fib_entry->fib_node->key.prefix_len,
4094 				      *p_dip);
4095 		break;
4096 	case MLXSW_SP_L3_PROTO_IPV6:
4097 		mlxsw_reg_ralue_pack6(ralue_pl, proto, op, fib->vr->id,
4098 				      fib_entry->fib_node->key.prefix_len,
4099 				      fib_entry->fib_node->key.addr);
4100 		break;
4101 	}
4102 }
4103 
4104 static int mlxsw_sp_fib_entry_op_remote(struct mlxsw_sp *mlxsw_sp,
4105 					struct mlxsw_sp_fib_entry *fib_entry,
4106 					enum mlxsw_reg_ralue_op op)
4107 {
4108 	char ralue_pl[MLXSW_REG_RALUE_LEN];
4109 	enum mlxsw_reg_ralue_trap_action trap_action;
4110 	u16 trap_id = 0;
4111 	u32 adjacency_index = 0;
4112 	u16 ecmp_size = 0;
4113 
4114 	/* In case the nexthop group adjacency index is valid, use it
4115 	 * with provided ECMP size. Otherwise, setup trap and pass
4116 	 * traffic to kernel.
4117 	 */
4118 	if (mlxsw_sp_fib_entry_should_offload(fib_entry)) {
4119 		trap_action = MLXSW_REG_RALUE_TRAP_ACTION_NOP;
4120 		adjacency_index = fib_entry->nh_group->adj_index;
4121 		ecmp_size = fib_entry->nh_group->ecmp_size;
4122 	} else {
4123 		trap_action = MLXSW_REG_RALUE_TRAP_ACTION_TRAP;
4124 		trap_id = MLXSW_TRAP_ID_RTR_INGRESS0;
4125 	}
4126 
4127 	mlxsw_sp_fib_entry_ralue_pack(ralue_pl, fib_entry, op);
4128 	mlxsw_reg_ralue_act_remote_pack(ralue_pl, trap_action, trap_id,
4129 					adjacency_index, ecmp_size);
4130 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralue), ralue_pl);
4131 }
4132 
4133 static int mlxsw_sp_fib_entry_op_local(struct mlxsw_sp *mlxsw_sp,
4134 				       struct mlxsw_sp_fib_entry *fib_entry,
4135 				       enum mlxsw_reg_ralue_op op)
4136 {
4137 	struct mlxsw_sp_rif *rif = fib_entry->nh_group->nh_rif;
4138 	enum mlxsw_reg_ralue_trap_action trap_action;
4139 	char ralue_pl[MLXSW_REG_RALUE_LEN];
4140 	u16 trap_id = 0;
4141 	u16 rif_index = 0;
4142 
4143 	if (mlxsw_sp_fib_entry_should_offload(fib_entry)) {
4144 		trap_action = MLXSW_REG_RALUE_TRAP_ACTION_NOP;
4145 		rif_index = rif->rif_index;
4146 	} else {
4147 		trap_action = MLXSW_REG_RALUE_TRAP_ACTION_TRAP;
4148 		trap_id = MLXSW_TRAP_ID_RTR_INGRESS0;
4149 	}
4150 
4151 	mlxsw_sp_fib_entry_ralue_pack(ralue_pl, fib_entry, op);
4152 	mlxsw_reg_ralue_act_local_pack(ralue_pl, trap_action, trap_id,
4153 				       rif_index);
4154 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralue), ralue_pl);
4155 }
4156 
4157 static int mlxsw_sp_fib_entry_op_trap(struct mlxsw_sp *mlxsw_sp,
4158 				      struct mlxsw_sp_fib_entry *fib_entry,
4159 				      enum mlxsw_reg_ralue_op op)
4160 {
4161 	char ralue_pl[MLXSW_REG_RALUE_LEN];
4162 
4163 	mlxsw_sp_fib_entry_ralue_pack(ralue_pl, fib_entry, op);
4164 	mlxsw_reg_ralue_act_ip2me_pack(ralue_pl);
4165 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralue), ralue_pl);
4166 }
4167 
4168 static int
4169 mlxsw_sp_fib_entry_op_ipip_decap(struct mlxsw_sp *mlxsw_sp,
4170 				 struct mlxsw_sp_fib_entry *fib_entry,
4171 				 enum mlxsw_reg_ralue_op op)
4172 {
4173 	struct mlxsw_sp_ipip_entry *ipip_entry = fib_entry->decap.ipip_entry;
4174 	const struct mlxsw_sp_ipip_ops *ipip_ops;
4175 
4176 	if (WARN_ON(!ipip_entry))
4177 		return -EINVAL;
4178 
4179 	ipip_ops = mlxsw_sp->router->ipip_ops_arr[ipip_entry->ipipt];
4180 	return ipip_ops->fib_entry_op(mlxsw_sp, ipip_entry, op,
4181 				      fib_entry->decap.tunnel_index);
4182 }
4183 
4184 static int mlxsw_sp_fib_entry_op_nve_decap(struct mlxsw_sp *mlxsw_sp,
4185 					   struct mlxsw_sp_fib_entry *fib_entry,
4186 					   enum mlxsw_reg_ralue_op op)
4187 {
4188 	char ralue_pl[MLXSW_REG_RALUE_LEN];
4189 
4190 	mlxsw_sp_fib_entry_ralue_pack(ralue_pl, fib_entry, op);
4191 	mlxsw_reg_ralue_act_ip2me_tun_pack(ralue_pl,
4192 					   fib_entry->decap.tunnel_index);
4193 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralue), ralue_pl);
4194 }
4195 
4196 static int __mlxsw_sp_fib_entry_op(struct mlxsw_sp *mlxsw_sp,
4197 				   struct mlxsw_sp_fib_entry *fib_entry,
4198 				   enum mlxsw_reg_ralue_op op)
4199 {
4200 	switch (fib_entry->type) {
4201 	case MLXSW_SP_FIB_ENTRY_TYPE_REMOTE:
4202 		return mlxsw_sp_fib_entry_op_remote(mlxsw_sp, fib_entry, op);
4203 	case MLXSW_SP_FIB_ENTRY_TYPE_LOCAL:
4204 		return mlxsw_sp_fib_entry_op_local(mlxsw_sp, fib_entry, op);
4205 	case MLXSW_SP_FIB_ENTRY_TYPE_TRAP:
4206 		return mlxsw_sp_fib_entry_op_trap(mlxsw_sp, fib_entry, op);
4207 	case MLXSW_SP_FIB_ENTRY_TYPE_IPIP_DECAP:
4208 		return mlxsw_sp_fib_entry_op_ipip_decap(mlxsw_sp,
4209 							fib_entry, op);
4210 	case MLXSW_SP_FIB_ENTRY_TYPE_NVE_DECAP:
4211 		return mlxsw_sp_fib_entry_op_nve_decap(mlxsw_sp, fib_entry, op);
4212 	}
4213 	return -EINVAL;
4214 }
4215 
4216 static int mlxsw_sp_fib_entry_op(struct mlxsw_sp *mlxsw_sp,
4217 				 struct mlxsw_sp_fib_entry *fib_entry,
4218 				 enum mlxsw_reg_ralue_op op)
4219 {
4220 	int err = __mlxsw_sp_fib_entry_op(mlxsw_sp, fib_entry, op);
4221 
4222 	mlxsw_sp_fib_entry_offload_refresh(fib_entry, op, err);
4223 
4224 	return err;
4225 }
4226 
4227 static int mlxsw_sp_fib_entry_update(struct mlxsw_sp *mlxsw_sp,
4228 				     struct mlxsw_sp_fib_entry *fib_entry)
4229 {
4230 	return mlxsw_sp_fib_entry_op(mlxsw_sp, fib_entry,
4231 				     MLXSW_REG_RALUE_OP_WRITE_WRITE);
4232 }
4233 
4234 static int mlxsw_sp_fib_entry_del(struct mlxsw_sp *mlxsw_sp,
4235 				  struct mlxsw_sp_fib_entry *fib_entry)
4236 {
4237 	return mlxsw_sp_fib_entry_op(mlxsw_sp, fib_entry,
4238 				     MLXSW_REG_RALUE_OP_WRITE_DELETE);
4239 }
4240 
4241 static int
4242 mlxsw_sp_fib4_entry_type_set(struct mlxsw_sp *mlxsw_sp,
4243 			     const struct fib_entry_notifier_info *fen_info,
4244 			     struct mlxsw_sp_fib_entry *fib_entry)
4245 {
4246 	union mlxsw_sp_l3addr dip = { .addr4 = htonl(fen_info->dst) };
4247 	u32 tb_id = mlxsw_sp_fix_tb_id(fen_info->tb_id);
4248 	struct net_device *dev = fen_info->fi->fib_dev;
4249 	struct mlxsw_sp_ipip_entry *ipip_entry;
4250 	struct fib_info *fi = fen_info->fi;
4251 
4252 	switch (fen_info->type) {
4253 	case RTN_LOCAL:
4254 		ipip_entry = mlxsw_sp_ipip_entry_find_by_decap(mlxsw_sp, dev,
4255 						 MLXSW_SP_L3_PROTO_IPV4, dip);
4256 		if (ipip_entry && ipip_entry->ol_dev->flags & IFF_UP) {
4257 			fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_IPIP_DECAP;
4258 			return mlxsw_sp_fib_entry_decap_init(mlxsw_sp,
4259 							     fib_entry,
4260 							     ipip_entry);
4261 		}
4262 		if (mlxsw_sp_nve_ipv4_route_is_decap(mlxsw_sp, tb_id,
4263 						     dip.addr4)) {
4264 			u32 t_index;
4265 
4266 			t_index = mlxsw_sp_nve_decap_tunnel_index_get(mlxsw_sp);
4267 			fib_entry->decap.tunnel_index = t_index;
4268 			fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_NVE_DECAP;
4269 			return 0;
4270 		}
4271 		/* fall through */
4272 	case RTN_BROADCAST:
4273 		fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_TRAP;
4274 		return 0;
4275 	case RTN_UNREACHABLE: /* fall through */
4276 	case RTN_BLACKHOLE: /* fall through */
4277 	case RTN_PROHIBIT:
4278 		/* Packets hitting these routes need to be trapped, but
4279 		 * can do so with a lower priority than packets directed
4280 		 * at the host, so use action type local instead of trap.
4281 		 */
4282 		fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_LOCAL;
4283 		return 0;
4284 	case RTN_UNICAST:
4285 		if (mlxsw_sp_fi_is_gateway(mlxsw_sp, fi))
4286 			fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_REMOTE;
4287 		else
4288 			fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_LOCAL;
4289 		return 0;
4290 	default:
4291 		return -EINVAL;
4292 	}
4293 }
4294 
4295 static struct mlxsw_sp_fib4_entry *
4296 mlxsw_sp_fib4_entry_create(struct mlxsw_sp *mlxsw_sp,
4297 			   struct mlxsw_sp_fib_node *fib_node,
4298 			   const struct fib_entry_notifier_info *fen_info)
4299 {
4300 	struct mlxsw_sp_fib4_entry *fib4_entry;
4301 	struct mlxsw_sp_fib_entry *fib_entry;
4302 	int err;
4303 
4304 	fib4_entry = kzalloc(sizeof(*fib4_entry), GFP_KERNEL);
4305 	if (!fib4_entry)
4306 		return ERR_PTR(-ENOMEM);
4307 	fib_entry = &fib4_entry->common;
4308 
4309 	err = mlxsw_sp_fib4_entry_type_set(mlxsw_sp, fen_info, fib_entry);
4310 	if (err)
4311 		goto err_fib4_entry_type_set;
4312 
4313 	err = mlxsw_sp_nexthop4_group_get(mlxsw_sp, fib_entry, fen_info->fi);
4314 	if (err)
4315 		goto err_nexthop4_group_get;
4316 
4317 	fib4_entry->prio = fen_info->fi->fib_priority;
4318 	fib4_entry->tb_id = fen_info->tb_id;
4319 	fib4_entry->type = fen_info->type;
4320 	fib4_entry->tos = fen_info->tos;
4321 
4322 	fib_entry->fib_node = fib_node;
4323 
4324 	return fib4_entry;
4325 
4326 err_nexthop4_group_get:
4327 err_fib4_entry_type_set:
4328 	kfree(fib4_entry);
4329 	return ERR_PTR(err);
4330 }
4331 
4332 static void mlxsw_sp_fib4_entry_destroy(struct mlxsw_sp *mlxsw_sp,
4333 					struct mlxsw_sp_fib4_entry *fib4_entry)
4334 {
4335 	mlxsw_sp_nexthop4_group_put(mlxsw_sp, &fib4_entry->common);
4336 	kfree(fib4_entry);
4337 }
4338 
4339 static struct mlxsw_sp_fib4_entry *
4340 mlxsw_sp_fib4_entry_lookup(struct mlxsw_sp *mlxsw_sp,
4341 			   const struct fib_entry_notifier_info *fen_info)
4342 {
4343 	struct mlxsw_sp_fib4_entry *fib4_entry;
4344 	struct mlxsw_sp_fib_node *fib_node;
4345 	struct mlxsw_sp_fib *fib;
4346 	struct mlxsw_sp_vr *vr;
4347 
4348 	vr = mlxsw_sp_vr_find(mlxsw_sp, fen_info->tb_id);
4349 	if (!vr)
4350 		return NULL;
4351 	fib = mlxsw_sp_vr_fib(vr, MLXSW_SP_L3_PROTO_IPV4);
4352 
4353 	fib_node = mlxsw_sp_fib_node_lookup(fib, &fen_info->dst,
4354 					    sizeof(fen_info->dst),
4355 					    fen_info->dst_len);
4356 	if (!fib_node)
4357 		return NULL;
4358 
4359 	list_for_each_entry(fib4_entry, &fib_node->entry_list, common.list) {
4360 		if (fib4_entry->tb_id == fen_info->tb_id &&
4361 		    fib4_entry->tos == fen_info->tos &&
4362 		    fib4_entry->type == fen_info->type &&
4363 		    mlxsw_sp_nexthop4_group_fi(fib4_entry->common.nh_group) ==
4364 		    fen_info->fi) {
4365 			return fib4_entry;
4366 		}
4367 	}
4368 
4369 	return NULL;
4370 }
4371 
4372 static const struct rhashtable_params mlxsw_sp_fib_ht_params = {
4373 	.key_offset = offsetof(struct mlxsw_sp_fib_node, key),
4374 	.head_offset = offsetof(struct mlxsw_sp_fib_node, ht_node),
4375 	.key_len = sizeof(struct mlxsw_sp_fib_key),
4376 	.automatic_shrinking = true,
4377 };
4378 
4379 static int mlxsw_sp_fib_node_insert(struct mlxsw_sp_fib *fib,
4380 				    struct mlxsw_sp_fib_node *fib_node)
4381 {
4382 	return rhashtable_insert_fast(&fib->ht, &fib_node->ht_node,
4383 				      mlxsw_sp_fib_ht_params);
4384 }
4385 
4386 static void mlxsw_sp_fib_node_remove(struct mlxsw_sp_fib *fib,
4387 				     struct mlxsw_sp_fib_node *fib_node)
4388 {
4389 	rhashtable_remove_fast(&fib->ht, &fib_node->ht_node,
4390 			       mlxsw_sp_fib_ht_params);
4391 }
4392 
4393 static struct mlxsw_sp_fib_node *
4394 mlxsw_sp_fib_node_lookup(struct mlxsw_sp_fib *fib, const void *addr,
4395 			 size_t addr_len, unsigned char prefix_len)
4396 {
4397 	struct mlxsw_sp_fib_key key;
4398 
4399 	memset(&key, 0, sizeof(key));
4400 	memcpy(key.addr, addr, addr_len);
4401 	key.prefix_len = prefix_len;
4402 	return rhashtable_lookup_fast(&fib->ht, &key, mlxsw_sp_fib_ht_params);
4403 }
4404 
4405 static struct mlxsw_sp_fib_node *
4406 mlxsw_sp_fib_node_create(struct mlxsw_sp_fib *fib, const void *addr,
4407 			 size_t addr_len, unsigned char prefix_len)
4408 {
4409 	struct mlxsw_sp_fib_node *fib_node;
4410 
4411 	fib_node = kzalloc(sizeof(*fib_node), GFP_KERNEL);
4412 	if (!fib_node)
4413 		return NULL;
4414 
4415 	INIT_LIST_HEAD(&fib_node->entry_list);
4416 	list_add(&fib_node->list, &fib->node_list);
4417 	memcpy(fib_node->key.addr, addr, addr_len);
4418 	fib_node->key.prefix_len = prefix_len;
4419 
4420 	return fib_node;
4421 }
4422 
4423 static void mlxsw_sp_fib_node_destroy(struct mlxsw_sp_fib_node *fib_node)
4424 {
4425 	list_del(&fib_node->list);
4426 	WARN_ON(!list_empty(&fib_node->entry_list));
4427 	kfree(fib_node);
4428 }
4429 
4430 static bool
4431 mlxsw_sp_fib_node_entry_is_first(const struct mlxsw_sp_fib_node *fib_node,
4432 				 const struct mlxsw_sp_fib_entry *fib_entry)
4433 {
4434 	return list_first_entry(&fib_node->entry_list,
4435 				struct mlxsw_sp_fib_entry, list) == fib_entry;
4436 }
4437 
4438 static int mlxsw_sp_fib_lpm_tree_link(struct mlxsw_sp *mlxsw_sp,
4439 				      struct mlxsw_sp_fib_node *fib_node)
4440 {
4441 	struct mlxsw_sp_prefix_usage req_prefix_usage;
4442 	struct mlxsw_sp_fib *fib = fib_node->fib;
4443 	struct mlxsw_sp_lpm_tree *lpm_tree;
4444 	int err;
4445 
4446 	lpm_tree = mlxsw_sp->router->lpm.proto_trees[fib->proto];
4447 	if (lpm_tree->prefix_ref_count[fib_node->key.prefix_len] != 0)
4448 		goto out;
4449 
4450 	mlxsw_sp_prefix_usage_cpy(&req_prefix_usage, &lpm_tree->prefix_usage);
4451 	mlxsw_sp_prefix_usage_set(&req_prefix_usage, fib_node->key.prefix_len);
4452 	lpm_tree = mlxsw_sp_lpm_tree_get(mlxsw_sp, &req_prefix_usage,
4453 					 fib->proto);
4454 	if (IS_ERR(lpm_tree))
4455 		return PTR_ERR(lpm_tree);
4456 
4457 	err = mlxsw_sp_vrs_lpm_tree_replace(mlxsw_sp, fib, lpm_tree);
4458 	if (err)
4459 		goto err_lpm_tree_replace;
4460 
4461 out:
4462 	lpm_tree->prefix_ref_count[fib_node->key.prefix_len]++;
4463 	return 0;
4464 
4465 err_lpm_tree_replace:
4466 	mlxsw_sp_lpm_tree_put(mlxsw_sp, lpm_tree);
4467 	return err;
4468 }
4469 
4470 static void mlxsw_sp_fib_lpm_tree_unlink(struct mlxsw_sp *mlxsw_sp,
4471 					 struct mlxsw_sp_fib_node *fib_node)
4472 {
4473 	struct mlxsw_sp_lpm_tree *lpm_tree = fib_node->fib->lpm_tree;
4474 	struct mlxsw_sp_prefix_usage req_prefix_usage;
4475 	struct mlxsw_sp_fib *fib = fib_node->fib;
4476 	int err;
4477 
4478 	if (--lpm_tree->prefix_ref_count[fib_node->key.prefix_len] != 0)
4479 		return;
4480 	/* Try to construct a new LPM tree from the current prefix usage
4481 	 * minus the unused one. If we fail, continue using the old one.
4482 	 */
4483 	mlxsw_sp_prefix_usage_cpy(&req_prefix_usage, &lpm_tree->prefix_usage);
4484 	mlxsw_sp_prefix_usage_clear(&req_prefix_usage,
4485 				    fib_node->key.prefix_len);
4486 	lpm_tree = mlxsw_sp_lpm_tree_get(mlxsw_sp, &req_prefix_usage,
4487 					 fib->proto);
4488 	if (IS_ERR(lpm_tree))
4489 		return;
4490 
4491 	err = mlxsw_sp_vrs_lpm_tree_replace(mlxsw_sp, fib, lpm_tree);
4492 	if (err)
4493 		goto err_lpm_tree_replace;
4494 
4495 	return;
4496 
4497 err_lpm_tree_replace:
4498 	mlxsw_sp_lpm_tree_put(mlxsw_sp, lpm_tree);
4499 }
4500 
4501 static int mlxsw_sp_fib_node_init(struct mlxsw_sp *mlxsw_sp,
4502 				  struct mlxsw_sp_fib_node *fib_node,
4503 				  struct mlxsw_sp_fib *fib)
4504 {
4505 	int err;
4506 
4507 	err = mlxsw_sp_fib_node_insert(fib, fib_node);
4508 	if (err)
4509 		return err;
4510 	fib_node->fib = fib;
4511 
4512 	err = mlxsw_sp_fib_lpm_tree_link(mlxsw_sp, fib_node);
4513 	if (err)
4514 		goto err_fib_lpm_tree_link;
4515 
4516 	return 0;
4517 
4518 err_fib_lpm_tree_link:
4519 	fib_node->fib = NULL;
4520 	mlxsw_sp_fib_node_remove(fib, fib_node);
4521 	return err;
4522 }
4523 
4524 static void mlxsw_sp_fib_node_fini(struct mlxsw_sp *mlxsw_sp,
4525 				   struct mlxsw_sp_fib_node *fib_node)
4526 {
4527 	struct mlxsw_sp_fib *fib = fib_node->fib;
4528 
4529 	mlxsw_sp_fib_lpm_tree_unlink(mlxsw_sp, fib_node);
4530 	fib_node->fib = NULL;
4531 	mlxsw_sp_fib_node_remove(fib, fib_node);
4532 }
4533 
4534 static struct mlxsw_sp_fib_node *
4535 mlxsw_sp_fib_node_get(struct mlxsw_sp *mlxsw_sp, u32 tb_id, const void *addr,
4536 		      size_t addr_len, unsigned char prefix_len,
4537 		      enum mlxsw_sp_l3proto proto)
4538 {
4539 	struct mlxsw_sp_fib_node *fib_node;
4540 	struct mlxsw_sp_fib *fib;
4541 	struct mlxsw_sp_vr *vr;
4542 	int err;
4543 
4544 	vr = mlxsw_sp_vr_get(mlxsw_sp, tb_id, NULL);
4545 	if (IS_ERR(vr))
4546 		return ERR_CAST(vr);
4547 	fib = mlxsw_sp_vr_fib(vr, proto);
4548 
4549 	fib_node = mlxsw_sp_fib_node_lookup(fib, addr, addr_len, prefix_len);
4550 	if (fib_node)
4551 		return fib_node;
4552 
4553 	fib_node = mlxsw_sp_fib_node_create(fib, addr, addr_len, prefix_len);
4554 	if (!fib_node) {
4555 		err = -ENOMEM;
4556 		goto err_fib_node_create;
4557 	}
4558 
4559 	err = mlxsw_sp_fib_node_init(mlxsw_sp, fib_node, fib);
4560 	if (err)
4561 		goto err_fib_node_init;
4562 
4563 	return fib_node;
4564 
4565 err_fib_node_init:
4566 	mlxsw_sp_fib_node_destroy(fib_node);
4567 err_fib_node_create:
4568 	mlxsw_sp_vr_put(mlxsw_sp, vr);
4569 	return ERR_PTR(err);
4570 }
4571 
4572 static void mlxsw_sp_fib_node_put(struct mlxsw_sp *mlxsw_sp,
4573 				  struct mlxsw_sp_fib_node *fib_node)
4574 {
4575 	struct mlxsw_sp_vr *vr = fib_node->fib->vr;
4576 
4577 	if (!list_empty(&fib_node->entry_list))
4578 		return;
4579 	mlxsw_sp_fib_node_fini(mlxsw_sp, fib_node);
4580 	mlxsw_sp_fib_node_destroy(fib_node);
4581 	mlxsw_sp_vr_put(mlxsw_sp, vr);
4582 }
4583 
4584 static struct mlxsw_sp_fib4_entry *
4585 mlxsw_sp_fib4_node_entry_find(const struct mlxsw_sp_fib_node *fib_node,
4586 			      const struct mlxsw_sp_fib4_entry *new4_entry)
4587 {
4588 	struct mlxsw_sp_fib4_entry *fib4_entry;
4589 
4590 	list_for_each_entry(fib4_entry, &fib_node->entry_list, common.list) {
4591 		if (fib4_entry->tb_id > new4_entry->tb_id)
4592 			continue;
4593 		if (fib4_entry->tb_id != new4_entry->tb_id)
4594 			break;
4595 		if (fib4_entry->tos > new4_entry->tos)
4596 			continue;
4597 		if (fib4_entry->prio >= new4_entry->prio ||
4598 		    fib4_entry->tos < new4_entry->tos)
4599 			return fib4_entry;
4600 	}
4601 
4602 	return NULL;
4603 }
4604 
4605 static int
4606 mlxsw_sp_fib4_node_list_append(struct mlxsw_sp_fib4_entry *fib4_entry,
4607 			       struct mlxsw_sp_fib4_entry *new4_entry)
4608 {
4609 	struct mlxsw_sp_fib_node *fib_node;
4610 
4611 	if (WARN_ON(!fib4_entry))
4612 		return -EINVAL;
4613 
4614 	fib_node = fib4_entry->common.fib_node;
4615 	list_for_each_entry_from(fib4_entry, &fib_node->entry_list,
4616 				 common.list) {
4617 		if (fib4_entry->tb_id != new4_entry->tb_id ||
4618 		    fib4_entry->tos != new4_entry->tos ||
4619 		    fib4_entry->prio != new4_entry->prio)
4620 			break;
4621 	}
4622 
4623 	list_add_tail(&new4_entry->common.list, &fib4_entry->common.list);
4624 	return 0;
4625 }
4626 
4627 static int
4628 mlxsw_sp_fib4_node_list_insert(struct mlxsw_sp_fib4_entry *new4_entry,
4629 			       bool replace, bool append)
4630 {
4631 	struct mlxsw_sp_fib_node *fib_node = new4_entry->common.fib_node;
4632 	struct mlxsw_sp_fib4_entry *fib4_entry;
4633 
4634 	fib4_entry = mlxsw_sp_fib4_node_entry_find(fib_node, new4_entry);
4635 
4636 	if (append)
4637 		return mlxsw_sp_fib4_node_list_append(fib4_entry, new4_entry);
4638 	if (replace && WARN_ON(!fib4_entry))
4639 		return -EINVAL;
4640 
4641 	/* Insert new entry before replaced one, so that we can later
4642 	 * remove the second.
4643 	 */
4644 	if (fib4_entry) {
4645 		list_add_tail(&new4_entry->common.list,
4646 			      &fib4_entry->common.list);
4647 	} else {
4648 		struct mlxsw_sp_fib4_entry *last;
4649 
4650 		list_for_each_entry(last, &fib_node->entry_list, common.list) {
4651 			if (new4_entry->tb_id > last->tb_id)
4652 				break;
4653 			fib4_entry = last;
4654 		}
4655 
4656 		if (fib4_entry)
4657 			list_add(&new4_entry->common.list,
4658 				 &fib4_entry->common.list);
4659 		else
4660 			list_add(&new4_entry->common.list,
4661 				 &fib_node->entry_list);
4662 	}
4663 
4664 	return 0;
4665 }
4666 
4667 static void
4668 mlxsw_sp_fib4_node_list_remove(struct mlxsw_sp_fib4_entry *fib4_entry)
4669 {
4670 	list_del(&fib4_entry->common.list);
4671 }
4672 
4673 static int mlxsw_sp_fib_node_entry_add(struct mlxsw_sp *mlxsw_sp,
4674 				       struct mlxsw_sp_fib_entry *fib_entry)
4675 {
4676 	struct mlxsw_sp_fib_node *fib_node = fib_entry->fib_node;
4677 
4678 	if (!mlxsw_sp_fib_node_entry_is_first(fib_node, fib_entry))
4679 		return 0;
4680 
4681 	/* To prevent packet loss, overwrite the previously offloaded
4682 	 * entry.
4683 	 */
4684 	if (!list_is_singular(&fib_node->entry_list)) {
4685 		enum mlxsw_reg_ralue_op op = MLXSW_REG_RALUE_OP_WRITE_DELETE;
4686 		struct mlxsw_sp_fib_entry *n = list_next_entry(fib_entry, list);
4687 
4688 		mlxsw_sp_fib_entry_offload_refresh(n, op, 0);
4689 	}
4690 
4691 	return mlxsw_sp_fib_entry_update(mlxsw_sp, fib_entry);
4692 }
4693 
4694 static void mlxsw_sp_fib_node_entry_del(struct mlxsw_sp *mlxsw_sp,
4695 					struct mlxsw_sp_fib_entry *fib_entry)
4696 {
4697 	struct mlxsw_sp_fib_node *fib_node = fib_entry->fib_node;
4698 
4699 	if (!mlxsw_sp_fib_node_entry_is_first(fib_node, fib_entry))
4700 		return;
4701 
4702 	/* Promote the next entry by overwriting the deleted entry */
4703 	if (!list_is_singular(&fib_node->entry_list)) {
4704 		struct mlxsw_sp_fib_entry *n = list_next_entry(fib_entry, list);
4705 		enum mlxsw_reg_ralue_op op = MLXSW_REG_RALUE_OP_WRITE_DELETE;
4706 
4707 		mlxsw_sp_fib_entry_update(mlxsw_sp, n);
4708 		mlxsw_sp_fib_entry_offload_refresh(fib_entry, op, 0);
4709 		return;
4710 	}
4711 
4712 	mlxsw_sp_fib_entry_del(mlxsw_sp, fib_entry);
4713 }
4714 
4715 static int mlxsw_sp_fib4_node_entry_link(struct mlxsw_sp *mlxsw_sp,
4716 					 struct mlxsw_sp_fib4_entry *fib4_entry,
4717 					 bool replace, bool append)
4718 {
4719 	int err;
4720 
4721 	err = mlxsw_sp_fib4_node_list_insert(fib4_entry, replace, append);
4722 	if (err)
4723 		return err;
4724 
4725 	err = mlxsw_sp_fib_node_entry_add(mlxsw_sp, &fib4_entry->common);
4726 	if (err)
4727 		goto err_fib_node_entry_add;
4728 
4729 	return 0;
4730 
4731 err_fib_node_entry_add:
4732 	mlxsw_sp_fib4_node_list_remove(fib4_entry);
4733 	return err;
4734 }
4735 
4736 static void
4737 mlxsw_sp_fib4_node_entry_unlink(struct mlxsw_sp *mlxsw_sp,
4738 				struct mlxsw_sp_fib4_entry *fib4_entry)
4739 {
4740 	mlxsw_sp_fib_node_entry_del(mlxsw_sp, &fib4_entry->common);
4741 	mlxsw_sp_fib4_node_list_remove(fib4_entry);
4742 
4743 	if (fib4_entry->common.type == MLXSW_SP_FIB_ENTRY_TYPE_IPIP_DECAP)
4744 		mlxsw_sp_fib_entry_decap_fini(mlxsw_sp, &fib4_entry->common);
4745 }
4746 
4747 static void mlxsw_sp_fib4_entry_replace(struct mlxsw_sp *mlxsw_sp,
4748 					struct mlxsw_sp_fib4_entry *fib4_entry,
4749 					bool replace)
4750 {
4751 	struct mlxsw_sp_fib_node *fib_node = fib4_entry->common.fib_node;
4752 	struct mlxsw_sp_fib4_entry *replaced;
4753 
4754 	if (!replace)
4755 		return;
4756 
4757 	/* We inserted the new entry before replaced one */
4758 	replaced = list_next_entry(fib4_entry, common.list);
4759 
4760 	mlxsw_sp_fib4_node_entry_unlink(mlxsw_sp, replaced);
4761 	mlxsw_sp_fib4_entry_destroy(mlxsw_sp, replaced);
4762 	mlxsw_sp_fib_node_put(mlxsw_sp, fib_node);
4763 }
4764 
4765 static int
4766 mlxsw_sp_router_fib4_add(struct mlxsw_sp *mlxsw_sp,
4767 			 const struct fib_entry_notifier_info *fen_info,
4768 			 bool replace, bool append)
4769 {
4770 	struct mlxsw_sp_fib4_entry *fib4_entry;
4771 	struct mlxsw_sp_fib_node *fib_node;
4772 	int err;
4773 
4774 	if (mlxsw_sp->router->aborted)
4775 		return 0;
4776 
4777 	fib_node = mlxsw_sp_fib_node_get(mlxsw_sp, fen_info->tb_id,
4778 					 &fen_info->dst, sizeof(fen_info->dst),
4779 					 fen_info->dst_len,
4780 					 MLXSW_SP_L3_PROTO_IPV4);
4781 	if (IS_ERR(fib_node)) {
4782 		dev_warn(mlxsw_sp->bus_info->dev, "Failed to get FIB node\n");
4783 		return PTR_ERR(fib_node);
4784 	}
4785 
4786 	fib4_entry = mlxsw_sp_fib4_entry_create(mlxsw_sp, fib_node, fen_info);
4787 	if (IS_ERR(fib4_entry)) {
4788 		dev_warn(mlxsw_sp->bus_info->dev, "Failed to create FIB entry\n");
4789 		err = PTR_ERR(fib4_entry);
4790 		goto err_fib4_entry_create;
4791 	}
4792 
4793 	err = mlxsw_sp_fib4_node_entry_link(mlxsw_sp, fib4_entry, replace,
4794 					    append);
4795 	if (err) {
4796 		dev_warn(mlxsw_sp->bus_info->dev, "Failed to link FIB entry to node\n");
4797 		goto err_fib4_node_entry_link;
4798 	}
4799 
4800 	mlxsw_sp_fib4_entry_replace(mlxsw_sp, fib4_entry, replace);
4801 
4802 	return 0;
4803 
4804 err_fib4_node_entry_link:
4805 	mlxsw_sp_fib4_entry_destroy(mlxsw_sp, fib4_entry);
4806 err_fib4_entry_create:
4807 	mlxsw_sp_fib_node_put(mlxsw_sp, fib_node);
4808 	return err;
4809 }
4810 
4811 static void mlxsw_sp_router_fib4_del(struct mlxsw_sp *mlxsw_sp,
4812 				     struct fib_entry_notifier_info *fen_info)
4813 {
4814 	struct mlxsw_sp_fib4_entry *fib4_entry;
4815 	struct mlxsw_sp_fib_node *fib_node;
4816 
4817 	if (mlxsw_sp->router->aborted)
4818 		return;
4819 
4820 	fib4_entry = mlxsw_sp_fib4_entry_lookup(mlxsw_sp, fen_info);
4821 	if (WARN_ON(!fib4_entry))
4822 		return;
4823 	fib_node = fib4_entry->common.fib_node;
4824 
4825 	mlxsw_sp_fib4_node_entry_unlink(mlxsw_sp, fib4_entry);
4826 	mlxsw_sp_fib4_entry_destroy(mlxsw_sp, fib4_entry);
4827 	mlxsw_sp_fib_node_put(mlxsw_sp, fib_node);
4828 }
4829 
4830 static bool mlxsw_sp_fib6_rt_should_ignore(const struct fib6_info *rt)
4831 {
4832 	/* Packets with link-local destination IP arriving to the router
4833 	 * are trapped to the CPU, so no need to program specific routes
4834 	 * for them.
4835 	 */
4836 	if (ipv6_addr_type(&rt->fib6_dst.addr) & IPV6_ADDR_LINKLOCAL)
4837 		return true;
4838 
4839 	/* Multicast routes aren't supported, so ignore them. Neighbour
4840 	 * Discovery packets are specifically trapped.
4841 	 */
4842 	if (ipv6_addr_type(&rt->fib6_dst.addr) & IPV6_ADDR_MULTICAST)
4843 		return true;
4844 
4845 	/* Cloned routes are irrelevant in the forwarding path. */
4846 	if (rt->fib6_flags & RTF_CACHE)
4847 		return true;
4848 
4849 	return false;
4850 }
4851 
4852 static struct mlxsw_sp_rt6 *mlxsw_sp_rt6_create(struct fib6_info *rt)
4853 {
4854 	struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
4855 
4856 	mlxsw_sp_rt6 = kzalloc(sizeof(*mlxsw_sp_rt6), GFP_KERNEL);
4857 	if (!mlxsw_sp_rt6)
4858 		return ERR_PTR(-ENOMEM);
4859 
4860 	/* In case of route replace, replaced route is deleted with
4861 	 * no notification. Take reference to prevent accessing freed
4862 	 * memory.
4863 	 */
4864 	mlxsw_sp_rt6->rt = rt;
4865 	fib6_info_hold(rt);
4866 
4867 	return mlxsw_sp_rt6;
4868 }
4869 
4870 #if IS_ENABLED(CONFIG_IPV6)
4871 static void mlxsw_sp_rt6_release(struct fib6_info *rt)
4872 {
4873 	fib6_info_release(rt);
4874 }
4875 #else
4876 static void mlxsw_sp_rt6_release(struct fib6_info *rt)
4877 {
4878 }
4879 #endif
4880 
4881 static void mlxsw_sp_rt6_destroy(struct mlxsw_sp_rt6 *mlxsw_sp_rt6)
4882 {
4883 	mlxsw_sp_rt6_release(mlxsw_sp_rt6->rt);
4884 	kfree(mlxsw_sp_rt6);
4885 }
4886 
4887 static bool mlxsw_sp_fib6_rt_can_mp(const struct fib6_info *rt)
4888 {
4889 	/* RTF_CACHE routes are ignored */
4890 	return (rt->fib6_flags & (RTF_GATEWAY | RTF_ADDRCONF)) == RTF_GATEWAY;
4891 }
4892 
4893 static struct fib6_info *
4894 mlxsw_sp_fib6_entry_rt(const struct mlxsw_sp_fib6_entry *fib6_entry)
4895 {
4896 	return list_first_entry(&fib6_entry->rt6_list, struct mlxsw_sp_rt6,
4897 				list)->rt;
4898 }
4899 
4900 static struct mlxsw_sp_fib6_entry *
4901 mlxsw_sp_fib6_node_mp_entry_find(const struct mlxsw_sp_fib_node *fib_node,
4902 				 const struct fib6_info *nrt, bool replace)
4903 {
4904 	struct mlxsw_sp_fib6_entry *fib6_entry;
4905 
4906 	if (!mlxsw_sp_fib6_rt_can_mp(nrt) || replace)
4907 		return NULL;
4908 
4909 	list_for_each_entry(fib6_entry, &fib_node->entry_list, common.list) {
4910 		struct fib6_info *rt = mlxsw_sp_fib6_entry_rt(fib6_entry);
4911 
4912 		/* RT6_TABLE_LOCAL and RT6_TABLE_MAIN share the same
4913 		 * virtual router.
4914 		 */
4915 		if (rt->fib6_table->tb6_id > nrt->fib6_table->tb6_id)
4916 			continue;
4917 		if (rt->fib6_table->tb6_id != nrt->fib6_table->tb6_id)
4918 			break;
4919 		if (rt->fib6_metric < nrt->fib6_metric)
4920 			continue;
4921 		if (rt->fib6_metric == nrt->fib6_metric &&
4922 		    mlxsw_sp_fib6_rt_can_mp(rt))
4923 			return fib6_entry;
4924 		if (rt->fib6_metric > nrt->fib6_metric)
4925 			break;
4926 	}
4927 
4928 	return NULL;
4929 }
4930 
4931 static struct mlxsw_sp_rt6 *
4932 mlxsw_sp_fib6_entry_rt_find(const struct mlxsw_sp_fib6_entry *fib6_entry,
4933 			    const struct fib6_info *rt)
4934 {
4935 	struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
4936 
4937 	list_for_each_entry(mlxsw_sp_rt6, &fib6_entry->rt6_list, list) {
4938 		if (mlxsw_sp_rt6->rt == rt)
4939 			return mlxsw_sp_rt6;
4940 	}
4941 
4942 	return NULL;
4943 }
4944 
4945 static bool mlxsw_sp_nexthop6_ipip_type(const struct mlxsw_sp *mlxsw_sp,
4946 					const struct fib6_info *rt,
4947 					enum mlxsw_sp_ipip_type *ret)
4948 {
4949 	return rt->fib6_nh.nh_dev &&
4950 	       mlxsw_sp_netdev_ipip_type(mlxsw_sp, rt->fib6_nh.nh_dev, ret);
4951 }
4952 
4953 static int mlxsw_sp_nexthop6_type_init(struct mlxsw_sp *mlxsw_sp,
4954 				       struct mlxsw_sp_nexthop_group *nh_grp,
4955 				       struct mlxsw_sp_nexthop *nh,
4956 				       const struct fib6_info *rt)
4957 {
4958 	const struct mlxsw_sp_ipip_ops *ipip_ops;
4959 	struct mlxsw_sp_ipip_entry *ipip_entry;
4960 	struct net_device *dev = rt->fib6_nh.nh_dev;
4961 	struct mlxsw_sp_rif *rif;
4962 	int err;
4963 
4964 	ipip_entry = mlxsw_sp_ipip_entry_find_by_ol_dev(mlxsw_sp, dev);
4965 	if (ipip_entry) {
4966 		ipip_ops = mlxsw_sp->router->ipip_ops_arr[ipip_entry->ipipt];
4967 		if (ipip_ops->can_offload(mlxsw_sp, dev,
4968 					  MLXSW_SP_L3_PROTO_IPV6)) {
4969 			nh->type = MLXSW_SP_NEXTHOP_TYPE_IPIP;
4970 			mlxsw_sp_nexthop_ipip_init(mlxsw_sp, nh, ipip_entry);
4971 			return 0;
4972 		}
4973 	}
4974 
4975 	nh->type = MLXSW_SP_NEXTHOP_TYPE_ETH;
4976 	rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev);
4977 	if (!rif)
4978 		return 0;
4979 	mlxsw_sp_nexthop_rif_init(nh, rif);
4980 
4981 	err = mlxsw_sp_nexthop_neigh_init(mlxsw_sp, nh);
4982 	if (err)
4983 		goto err_nexthop_neigh_init;
4984 
4985 	return 0;
4986 
4987 err_nexthop_neigh_init:
4988 	mlxsw_sp_nexthop_rif_fini(nh);
4989 	return err;
4990 }
4991 
4992 static void mlxsw_sp_nexthop6_type_fini(struct mlxsw_sp *mlxsw_sp,
4993 					struct mlxsw_sp_nexthop *nh)
4994 {
4995 	mlxsw_sp_nexthop_type_fini(mlxsw_sp, nh);
4996 }
4997 
4998 static int mlxsw_sp_nexthop6_init(struct mlxsw_sp *mlxsw_sp,
4999 				  struct mlxsw_sp_nexthop_group *nh_grp,
5000 				  struct mlxsw_sp_nexthop *nh,
5001 				  const struct fib6_info *rt)
5002 {
5003 	struct net_device *dev = rt->fib6_nh.nh_dev;
5004 
5005 	nh->nh_grp = nh_grp;
5006 	nh->nh_weight = rt->fib6_nh.nh_weight;
5007 	memcpy(&nh->gw_addr, &rt->fib6_nh.nh_gw, sizeof(nh->gw_addr));
5008 	mlxsw_sp_nexthop_counter_alloc(mlxsw_sp, nh);
5009 
5010 	list_add_tail(&nh->router_list_node, &mlxsw_sp->router->nexthop_list);
5011 
5012 	if (!dev)
5013 		return 0;
5014 	nh->ifindex = dev->ifindex;
5015 
5016 	return mlxsw_sp_nexthop6_type_init(mlxsw_sp, nh_grp, nh, rt);
5017 }
5018 
5019 static void mlxsw_sp_nexthop6_fini(struct mlxsw_sp *mlxsw_sp,
5020 				   struct mlxsw_sp_nexthop *nh)
5021 {
5022 	mlxsw_sp_nexthop6_type_fini(mlxsw_sp, nh);
5023 	list_del(&nh->router_list_node);
5024 	mlxsw_sp_nexthop_counter_free(mlxsw_sp, nh);
5025 }
5026 
5027 static bool mlxsw_sp_rt6_is_gateway(const struct mlxsw_sp *mlxsw_sp,
5028 				    const struct fib6_info *rt)
5029 {
5030 	return rt->fib6_flags & RTF_GATEWAY ||
5031 	       mlxsw_sp_nexthop6_ipip_type(mlxsw_sp, rt, NULL);
5032 }
5033 
5034 static struct mlxsw_sp_nexthop_group *
5035 mlxsw_sp_nexthop6_group_create(struct mlxsw_sp *mlxsw_sp,
5036 			       struct mlxsw_sp_fib6_entry *fib6_entry)
5037 {
5038 	struct mlxsw_sp_nexthop_group *nh_grp;
5039 	struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
5040 	struct mlxsw_sp_nexthop *nh;
5041 	size_t alloc_size;
5042 	int i = 0;
5043 	int err;
5044 
5045 	alloc_size = sizeof(*nh_grp) +
5046 		     fib6_entry->nrt6 * sizeof(struct mlxsw_sp_nexthop);
5047 	nh_grp = kzalloc(alloc_size, GFP_KERNEL);
5048 	if (!nh_grp)
5049 		return ERR_PTR(-ENOMEM);
5050 	INIT_LIST_HEAD(&nh_grp->fib_list);
5051 #if IS_ENABLED(CONFIG_IPV6)
5052 	nh_grp->neigh_tbl = &nd_tbl;
5053 #endif
5054 	mlxsw_sp_rt6 = list_first_entry(&fib6_entry->rt6_list,
5055 					struct mlxsw_sp_rt6, list);
5056 	nh_grp->gateway = mlxsw_sp_rt6_is_gateway(mlxsw_sp, mlxsw_sp_rt6->rt);
5057 	nh_grp->count = fib6_entry->nrt6;
5058 	for (i = 0; i < nh_grp->count; i++) {
5059 		struct fib6_info *rt = mlxsw_sp_rt6->rt;
5060 
5061 		nh = &nh_grp->nexthops[i];
5062 		err = mlxsw_sp_nexthop6_init(mlxsw_sp, nh_grp, nh, rt);
5063 		if (err)
5064 			goto err_nexthop6_init;
5065 		mlxsw_sp_rt6 = list_next_entry(mlxsw_sp_rt6, list);
5066 	}
5067 
5068 	err = mlxsw_sp_nexthop_group_insert(mlxsw_sp, nh_grp);
5069 	if (err)
5070 		goto err_nexthop_group_insert;
5071 
5072 	mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh_grp);
5073 	return nh_grp;
5074 
5075 err_nexthop_group_insert:
5076 err_nexthop6_init:
5077 	for (i--; i >= 0; i--) {
5078 		nh = &nh_grp->nexthops[i];
5079 		mlxsw_sp_nexthop6_fini(mlxsw_sp, nh);
5080 	}
5081 	kfree(nh_grp);
5082 	return ERR_PTR(err);
5083 }
5084 
5085 static void
5086 mlxsw_sp_nexthop6_group_destroy(struct mlxsw_sp *mlxsw_sp,
5087 				struct mlxsw_sp_nexthop_group *nh_grp)
5088 {
5089 	struct mlxsw_sp_nexthop *nh;
5090 	int i = nh_grp->count;
5091 
5092 	mlxsw_sp_nexthop_group_remove(mlxsw_sp, nh_grp);
5093 	for (i--; i >= 0; i--) {
5094 		nh = &nh_grp->nexthops[i];
5095 		mlxsw_sp_nexthop6_fini(mlxsw_sp, nh);
5096 	}
5097 	mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh_grp);
5098 	WARN_ON(nh_grp->adj_index_valid);
5099 	kfree(nh_grp);
5100 }
5101 
5102 static int mlxsw_sp_nexthop6_group_get(struct mlxsw_sp *mlxsw_sp,
5103 				       struct mlxsw_sp_fib6_entry *fib6_entry)
5104 {
5105 	struct mlxsw_sp_nexthop_group *nh_grp;
5106 
5107 	nh_grp = mlxsw_sp_nexthop6_group_lookup(mlxsw_sp, fib6_entry);
5108 	if (!nh_grp) {
5109 		nh_grp = mlxsw_sp_nexthop6_group_create(mlxsw_sp, fib6_entry);
5110 		if (IS_ERR(nh_grp))
5111 			return PTR_ERR(nh_grp);
5112 	}
5113 
5114 	list_add_tail(&fib6_entry->common.nexthop_group_node,
5115 		      &nh_grp->fib_list);
5116 	fib6_entry->common.nh_group = nh_grp;
5117 
5118 	return 0;
5119 }
5120 
5121 static void mlxsw_sp_nexthop6_group_put(struct mlxsw_sp *mlxsw_sp,
5122 					struct mlxsw_sp_fib_entry *fib_entry)
5123 {
5124 	struct mlxsw_sp_nexthop_group *nh_grp = fib_entry->nh_group;
5125 
5126 	list_del(&fib_entry->nexthop_group_node);
5127 	if (!list_empty(&nh_grp->fib_list))
5128 		return;
5129 	mlxsw_sp_nexthop6_group_destroy(mlxsw_sp, nh_grp);
5130 }
5131 
5132 static int
5133 mlxsw_sp_nexthop6_group_update(struct mlxsw_sp *mlxsw_sp,
5134 			       struct mlxsw_sp_fib6_entry *fib6_entry)
5135 {
5136 	struct mlxsw_sp_nexthop_group *old_nh_grp = fib6_entry->common.nh_group;
5137 	int err;
5138 
5139 	fib6_entry->common.nh_group = NULL;
5140 	list_del(&fib6_entry->common.nexthop_group_node);
5141 
5142 	err = mlxsw_sp_nexthop6_group_get(mlxsw_sp, fib6_entry);
5143 	if (err)
5144 		goto err_nexthop6_group_get;
5145 
5146 	/* In case this entry is offloaded, then the adjacency index
5147 	 * currently associated with it in the device's table is that
5148 	 * of the old group. Start using the new one instead.
5149 	 */
5150 	err = mlxsw_sp_fib_node_entry_add(mlxsw_sp, &fib6_entry->common);
5151 	if (err)
5152 		goto err_fib_node_entry_add;
5153 
5154 	if (list_empty(&old_nh_grp->fib_list))
5155 		mlxsw_sp_nexthop6_group_destroy(mlxsw_sp, old_nh_grp);
5156 
5157 	return 0;
5158 
5159 err_fib_node_entry_add:
5160 	mlxsw_sp_nexthop6_group_put(mlxsw_sp, &fib6_entry->common);
5161 err_nexthop6_group_get:
5162 	list_add_tail(&fib6_entry->common.nexthop_group_node,
5163 		      &old_nh_grp->fib_list);
5164 	fib6_entry->common.nh_group = old_nh_grp;
5165 	return err;
5166 }
5167 
5168 static int
5169 mlxsw_sp_fib6_entry_nexthop_add(struct mlxsw_sp *mlxsw_sp,
5170 				struct mlxsw_sp_fib6_entry *fib6_entry,
5171 				struct fib6_info *rt)
5172 {
5173 	struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
5174 	int err;
5175 
5176 	mlxsw_sp_rt6 = mlxsw_sp_rt6_create(rt);
5177 	if (IS_ERR(mlxsw_sp_rt6))
5178 		return PTR_ERR(mlxsw_sp_rt6);
5179 
5180 	list_add_tail(&mlxsw_sp_rt6->list, &fib6_entry->rt6_list);
5181 	fib6_entry->nrt6++;
5182 
5183 	err = mlxsw_sp_nexthop6_group_update(mlxsw_sp, fib6_entry);
5184 	if (err)
5185 		goto err_nexthop6_group_update;
5186 
5187 	return 0;
5188 
5189 err_nexthop6_group_update:
5190 	fib6_entry->nrt6--;
5191 	list_del(&mlxsw_sp_rt6->list);
5192 	mlxsw_sp_rt6_destroy(mlxsw_sp_rt6);
5193 	return err;
5194 }
5195 
5196 static void
5197 mlxsw_sp_fib6_entry_nexthop_del(struct mlxsw_sp *mlxsw_sp,
5198 				struct mlxsw_sp_fib6_entry *fib6_entry,
5199 				struct fib6_info *rt)
5200 {
5201 	struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
5202 
5203 	mlxsw_sp_rt6 = mlxsw_sp_fib6_entry_rt_find(fib6_entry, rt);
5204 	if (WARN_ON(!mlxsw_sp_rt6))
5205 		return;
5206 
5207 	fib6_entry->nrt6--;
5208 	list_del(&mlxsw_sp_rt6->list);
5209 	mlxsw_sp_nexthop6_group_update(mlxsw_sp, fib6_entry);
5210 	mlxsw_sp_rt6_destroy(mlxsw_sp_rt6);
5211 }
5212 
5213 static void mlxsw_sp_fib6_entry_type_set(struct mlxsw_sp *mlxsw_sp,
5214 					 struct mlxsw_sp_fib_entry *fib_entry,
5215 					 const struct fib6_info *rt)
5216 {
5217 	/* Packets hitting RTF_REJECT routes need to be discarded by the
5218 	 * stack. We can rely on their destination device not having a
5219 	 * RIF (it's the loopback device) and can thus use action type
5220 	 * local, which will cause them to be trapped with a lower
5221 	 * priority than packets that need to be locally received.
5222 	 */
5223 	if (rt->fib6_flags & (RTF_LOCAL | RTF_ANYCAST))
5224 		fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_TRAP;
5225 	else if (rt->fib6_flags & RTF_REJECT)
5226 		fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_LOCAL;
5227 	else if (mlxsw_sp_rt6_is_gateway(mlxsw_sp, rt))
5228 		fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_REMOTE;
5229 	else
5230 		fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_LOCAL;
5231 }
5232 
5233 static void
5234 mlxsw_sp_fib6_entry_rt_destroy_all(struct mlxsw_sp_fib6_entry *fib6_entry)
5235 {
5236 	struct mlxsw_sp_rt6 *mlxsw_sp_rt6, *tmp;
5237 
5238 	list_for_each_entry_safe(mlxsw_sp_rt6, tmp, &fib6_entry->rt6_list,
5239 				 list) {
5240 		fib6_entry->nrt6--;
5241 		list_del(&mlxsw_sp_rt6->list);
5242 		mlxsw_sp_rt6_destroy(mlxsw_sp_rt6);
5243 	}
5244 }
5245 
5246 static struct mlxsw_sp_fib6_entry *
5247 mlxsw_sp_fib6_entry_create(struct mlxsw_sp *mlxsw_sp,
5248 			   struct mlxsw_sp_fib_node *fib_node,
5249 			   struct fib6_info *rt)
5250 {
5251 	struct mlxsw_sp_fib6_entry *fib6_entry;
5252 	struct mlxsw_sp_fib_entry *fib_entry;
5253 	struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
5254 	int err;
5255 
5256 	fib6_entry = kzalloc(sizeof(*fib6_entry), GFP_KERNEL);
5257 	if (!fib6_entry)
5258 		return ERR_PTR(-ENOMEM);
5259 	fib_entry = &fib6_entry->common;
5260 
5261 	mlxsw_sp_rt6 = mlxsw_sp_rt6_create(rt);
5262 	if (IS_ERR(mlxsw_sp_rt6)) {
5263 		err = PTR_ERR(mlxsw_sp_rt6);
5264 		goto err_rt6_create;
5265 	}
5266 
5267 	mlxsw_sp_fib6_entry_type_set(mlxsw_sp, fib_entry, mlxsw_sp_rt6->rt);
5268 
5269 	INIT_LIST_HEAD(&fib6_entry->rt6_list);
5270 	list_add_tail(&mlxsw_sp_rt6->list, &fib6_entry->rt6_list);
5271 	fib6_entry->nrt6 = 1;
5272 	err = mlxsw_sp_nexthop6_group_get(mlxsw_sp, fib6_entry);
5273 	if (err)
5274 		goto err_nexthop6_group_get;
5275 
5276 	fib_entry->fib_node = fib_node;
5277 
5278 	return fib6_entry;
5279 
5280 err_nexthop6_group_get:
5281 	list_del(&mlxsw_sp_rt6->list);
5282 	mlxsw_sp_rt6_destroy(mlxsw_sp_rt6);
5283 err_rt6_create:
5284 	kfree(fib6_entry);
5285 	return ERR_PTR(err);
5286 }
5287 
5288 static void mlxsw_sp_fib6_entry_destroy(struct mlxsw_sp *mlxsw_sp,
5289 					struct mlxsw_sp_fib6_entry *fib6_entry)
5290 {
5291 	mlxsw_sp_nexthop6_group_put(mlxsw_sp, &fib6_entry->common);
5292 	mlxsw_sp_fib6_entry_rt_destroy_all(fib6_entry);
5293 	WARN_ON(fib6_entry->nrt6);
5294 	kfree(fib6_entry);
5295 }
5296 
5297 static struct mlxsw_sp_fib6_entry *
5298 mlxsw_sp_fib6_node_entry_find(const struct mlxsw_sp_fib_node *fib_node,
5299 			      const struct fib6_info *nrt, bool replace)
5300 {
5301 	struct mlxsw_sp_fib6_entry *fib6_entry, *fallback = NULL;
5302 
5303 	list_for_each_entry(fib6_entry, &fib_node->entry_list, common.list) {
5304 		struct fib6_info *rt = mlxsw_sp_fib6_entry_rt(fib6_entry);
5305 
5306 		if (rt->fib6_table->tb6_id > nrt->fib6_table->tb6_id)
5307 			continue;
5308 		if (rt->fib6_table->tb6_id != nrt->fib6_table->tb6_id)
5309 			break;
5310 		if (replace && rt->fib6_metric == nrt->fib6_metric) {
5311 			if (mlxsw_sp_fib6_rt_can_mp(rt) ==
5312 			    mlxsw_sp_fib6_rt_can_mp(nrt))
5313 				return fib6_entry;
5314 			if (mlxsw_sp_fib6_rt_can_mp(nrt))
5315 				fallback = fallback ?: fib6_entry;
5316 		}
5317 		if (rt->fib6_metric > nrt->fib6_metric)
5318 			return fallback ?: fib6_entry;
5319 	}
5320 
5321 	return fallback;
5322 }
5323 
5324 static int
5325 mlxsw_sp_fib6_node_list_insert(struct mlxsw_sp_fib6_entry *new6_entry,
5326 			       bool replace)
5327 {
5328 	struct mlxsw_sp_fib_node *fib_node = new6_entry->common.fib_node;
5329 	struct fib6_info *nrt = mlxsw_sp_fib6_entry_rt(new6_entry);
5330 	struct mlxsw_sp_fib6_entry *fib6_entry;
5331 
5332 	fib6_entry = mlxsw_sp_fib6_node_entry_find(fib_node, nrt, replace);
5333 
5334 	if (replace && WARN_ON(!fib6_entry))
5335 		return -EINVAL;
5336 
5337 	if (fib6_entry) {
5338 		list_add_tail(&new6_entry->common.list,
5339 			      &fib6_entry->common.list);
5340 	} else {
5341 		struct mlxsw_sp_fib6_entry *last;
5342 
5343 		list_for_each_entry(last, &fib_node->entry_list, common.list) {
5344 			struct fib6_info *rt = mlxsw_sp_fib6_entry_rt(last);
5345 
5346 			if (nrt->fib6_table->tb6_id > rt->fib6_table->tb6_id)
5347 				break;
5348 			fib6_entry = last;
5349 		}
5350 
5351 		if (fib6_entry)
5352 			list_add(&new6_entry->common.list,
5353 				 &fib6_entry->common.list);
5354 		else
5355 			list_add(&new6_entry->common.list,
5356 				 &fib_node->entry_list);
5357 	}
5358 
5359 	return 0;
5360 }
5361 
5362 static void
5363 mlxsw_sp_fib6_node_list_remove(struct mlxsw_sp_fib6_entry *fib6_entry)
5364 {
5365 	list_del(&fib6_entry->common.list);
5366 }
5367 
5368 static int mlxsw_sp_fib6_node_entry_link(struct mlxsw_sp *mlxsw_sp,
5369 					 struct mlxsw_sp_fib6_entry *fib6_entry,
5370 					 bool replace)
5371 {
5372 	int err;
5373 
5374 	err = mlxsw_sp_fib6_node_list_insert(fib6_entry, replace);
5375 	if (err)
5376 		return err;
5377 
5378 	err = mlxsw_sp_fib_node_entry_add(mlxsw_sp, &fib6_entry->common);
5379 	if (err)
5380 		goto err_fib_node_entry_add;
5381 
5382 	return 0;
5383 
5384 err_fib_node_entry_add:
5385 	mlxsw_sp_fib6_node_list_remove(fib6_entry);
5386 	return err;
5387 }
5388 
5389 static void
5390 mlxsw_sp_fib6_node_entry_unlink(struct mlxsw_sp *mlxsw_sp,
5391 				struct mlxsw_sp_fib6_entry *fib6_entry)
5392 {
5393 	mlxsw_sp_fib_node_entry_del(mlxsw_sp, &fib6_entry->common);
5394 	mlxsw_sp_fib6_node_list_remove(fib6_entry);
5395 }
5396 
5397 static struct mlxsw_sp_fib6_entry *
5398 mlxsw_sp_fib6_entry_lookup(struct mlxsw_sp *mlxsw_sp,
5399 			   const struct fib6_info *rt)
5400 {
5401 	struct mlxsw_sp_fib6_entry *fib6_entry;
5402 	struct mlxsw_sp_fib_node *fib_node;
5403 	struct mlxsw_sp_fib *fib;
5404 	struct mlxsw_sp_vr *vr;
5405 
5406 	vr = mlxsw_sp_vr_find(mlxsw_sp, rt->fib6_table->tb6_id);
5407 	if (!vr)
5408 		return NULL;
5409 	fib = mlxsw_sp_vr_fib(vr, MLXSW_SP_L3_PROTO_IPV6);
5410 
5411 	fib_node = mlxsw_sp_fib_node_lookup(fib, &rt->fib6_dst.addr,
5412 					    sizeof(rt->fib6_dst.addr),
5413 					    rt->fib6_dst.plen);
5414 	if (!fib_node)
5415 		return NULL;
5416 
5417 	list_for_each_entry(fib6_entry, &fib_node->entry_list, common.list) {
5418 		struct fib6_info *iter_rt = mlxsw_sp_fib6_entry_rt(fib6_entry);
5419 
5420 		if (rt->fib6_table->tb6_id == iter_rt->fib6_table->tb6_id &&
5421 		    rt->fib6_metric == iter_rt->fib6_metric &&
5422 		    mlxsw_sp_fib6_entry_rt_find(fib6_entry, rt))
5423 			return fib6_entry;
5424 	}
5425 
5426 	return NULL;
5427 }
5428 
5429 static void mlxsw_sp_fib6_entry_replace(struct mlxsw_sp *mlxsw_sp,
5430 					struct mlxsw_sp_fib6_entry *fib6_entry,
5431 					bool replace)
5432 {
5433 	struct mlxsw_sp_fib_node *fib_node = fib6_entry->common.fib_node;
5434 	struct mlxsw_sp_fib6_entry *replaced;
5435 
5436 	if (!replace)
5437 		return;
5438 
5439 	replaced = list_next_entry(fib6_entry, common.list);
5440 
5441 	mlxsw_sp_fib6_node_entry_unlink(mlxsw_sp, replaced);
5442 	mlxsw_sp_fib6_entry_destroy(mlxsw_sp, replaced);
5443 	mlxsw_sp_fib_node_put(mlxsw_sp, fib_node);
5444 }
5445 
5446 static int mlxsw_sp_router_fib6_add(struct mlxsw_sp *mlxsw_sp,
5447 				    struct fib6_info *rt, bool replace)
5448 {
5449 	struct mlxsw_sp_fib6_entry *fib6_entry;
5450 	struct mlxsw_sp_fib_node *fib_node;
5451 	int err;
5452 
5453 	if (mlxsw_sp->router->aborted)
5454 		return 0;
5455 
5456 	if (rt->fib6_src.plen)
5457 		return -EINVAL;
5458 
5459 	if (mlxsw_sp_fib6_rt_should_ignore(rt))
5460 		return 0;
5461 
5462 	fib_node = mlxsw_sp_fib_node_get(mlxsw_sp, rt->fib6_table->tb6_id,
5463 					 &rt->fib6_dst.addr,
5464 					 sizeof(rt->fib6_dst.addr),
5465 					 rt->fib6_dst.plen,
5466 					 MLXSW_SP_L3_PROTO_IPV6);
5467 	if (IS_ERR(fib_node))
5468 		return PTR_ERR(fib_node);
5469 
5470 	/* Before creating a new entry, try to append route to an existing
5471 	 * multipath entry.
5472 	 */
5473 	fib6_entry = mlxsw_sp_fib6_node_mp_entry_find(fib_node, rt, replace);
5474 	if (fib6_entry) {
5475 		err = mlxsw_sp_fib6_entry_nexthop_add(mlxsw_sp, fib6_entry, rt);
5476 		if (err)
5477 			goto err_fib6_entry_nexthop_add;
5478 		return 0;
5479 	}
5480 
5481 	fib6_entry = mlxsw_sp_fib6_entry_create(mlxsw_sp, fib_node, rt);
5482 	if (IS_ERR(fib6_entry)) {
5483 		err = PTR_ERR(fib6_entry);
5484 		goto err_fib6_entry_create;
5485 	}
5486 
5487 	err = mlxsw_sp_fib6_node_entry_link(mlxsw_sp, fib6_entry, replace);
5488 	if (err)
5489 		goto err_fib6_node_entry_link;
5490 
5491 	mlxsw_sp_fib6_entry_replace(mlxsw_sp, fib6_entry, replace);
5492 
5493 	return 0;
5494 
5495 err_fib6_node_entry_link:
5496 	mlxsw_sp_fib6_entry_destroy(mlxsw_sp, fib6_entry);
5497 err_fib6_entry_create:
5498 err_fib6_entry_nexthop_add:
5499 	mlxsw_sp_fib_node_put(mlxsw_sp, fib_node);
5500 	return err;
5501 }
5502 
5503 static void mlxsw_sp_router_fib6_del(struct mlxsw_sp *mlxsw_sp,
5504 				     struct fib6_info *rt)
5505 {
5506 	struct mlxsw_sp_fib6_entry *fib6_entry;
5507 	struct mlxsw_sp_fib_node *fib_node;
5508 
5509 	if (mlxsw_sp->router->aborted)
5510 		return;
5511 
5512 	if (mlxsw_sp_fib6_rt_should_ignore(rt))
5513 		return;
5514 
5515 	fib6_entry = mlxsw_sp_fib6_entry_lookup(mlxsw_sp, rt);
5516 	if (WARN_ON(!fib6_entry))
5517 		return;
5518 
5519 	/* If route is part of a multipath entry, but not the last one
5520 	 * removed, then only reduce its nexthop group.
5521 	 */
5522 	if (!list_is_singular(&fib6_entry->rt6_list)) {
5523 		mlxsw_sp_fib6_entry_nexthop_del(mlxsw_sp, fib6_entry, rt);
5524 		return;
5525 	}
5526 
5527 	fib_node = fib6_entry->common.fib_node;
5528 
5529 	mlxsw_sp_fib6_node_entry_unlink(mlxsw_sp, fib6_entry);
5530 	mlxsw_sp_fib6_entry_destroy(mlxsw_sp, fib6_entry);
5531 	mlxsw_sp_fib_node_put(mlxsw_sp, fib_node);
5532 }
5533 
5534 static int __mlxsw_sp_router_set_abort_trap(struct mlxsw_sp *mlxsw_sp,
5535 					    enum mlxsw_reg_ralxx_protocol proto,
5536 					    u8 tree_id)
5537 {
5538 	char ralta_pl[MLXSW_REG_RALTA_LEN];
5539 	char ralst_pl[MLXSW_REG_RALST_LEN];
5540 	int i, err;
5541 
5542 	mlxsw_reg_ralta_pack(ralta_pl, true, proto, tree_id);
5543 	err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralta), ralta_pl);
5544 	if (err)
5545 		return err;
5546 
5547 	mlxsw_reg_ralst_pack(ralst_pl, 0xff, tree_id);
5548 	err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralst), ralst_pl);
5549 	if (err)
5550 		return err;
5551 
5552 	for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_VRS); i++) {
5553 		struct mlxsw_sp_vr *vr = &mlxsw_sp->router->vrs[i];
5554 		char raltb_pl[MLXSW_REG_RALTB_LEN];
5555 		char ralue_pl[MLXSW_REG_RALUE_LEN];
5556 
5557 		mlxsw_reg_raltb_pack(raltb_pl, vr->id, proto, tree_id);
5558 		err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(raltb),
5559 				      raltb_pl);
5560 		if (err)
5561 			return err;
5562 
5563 		mlxsw_reg_ralue_pack(ralue_pl, proto,
5564 				     MLXSW_REG_RALUE_OP_WRITE_WRITE, vr->id, 0);
5565 		mlxsw_reg_ralue_act_ip2me_pack(ralue_pl);
5566 		err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralue),
5567 				      ralue_pl);
5568 		if (err)
5569 			return err;
5570 	}
5571 
5572 	return 0;
5573 }
5574 
5575 static struct mlxsw_sp_mr_table *
5576 mlxsw_sp_router_fibmr_family_to_table(struct mlxsw_sp_vr *vr, int family)
5577 {
5578 	if (family == RTNL_FAMILY_IPMR)
5579 		return vr->mr_table[MLXSW_SP_L3_PROTO_IPV4];
5580 	else
5581 		return vr->mr_table[MLXSW_SP_L3_PROTO_IPV6];
5582 }
5583 
5584 static int mlxsw_sp_router_fibmr_add(struct mlxsw_sp *mlxsw_sp,
5585 				     struct mfc_entry_notifier_info *men_info,
5586 				     bool replace)
5587 {
5588 	struct mlxsw_sp_mr_table *mrt;
5589 	struct mlxsw_sp_vr *vr;
5590 
5591 	if (mlxsw_sp->router->aborted)
5592 		return 0;
5593 
5594 	vr = mlxsw_sp_vr_get(mlxsw_sp, men_info->tb_id, NULL);
5595 	if (IS_ERR(vr))
5596 		return PTR_ERR(vr);
5597 
5598 	mrt = mlxsw_sp_router_fibmr_family_to_table(vr, men_info->info.family);
5599 	return mlxsw_sp_mr_route_add(mrt, men_info->mfc, replace);
5600 }
5601 
5602 static void mlxsw_sp_router_fibmr_del(struct mlxsw_sp *mlxsw_sp,
5603 				      struct mfc_entry_notifier_info *men_info)
5604 {
5605 	struct mlxsw_sp_mr_table *mrt;
5606 	struct mlxsw_sp_vr *vr;
5607 
5608 	if (mlxsw_sp->router->aborted)
5609 		return;
5610 
5611 	vr = mlxsw_sp_vr_find(mlxsw_sp, men_info->tb_id);
5612 	if (WARN_ON(!vr))
5613 		return;
5614 
5615 	mrt = mlxsw_sp_router_fibmr_family_to_table(vr, men_info->info.family);
5616 	mlxsw_sp_mr_route_del(mrt, men_info->mfc);
5617 	mlxsw_sp_vr_put(mlxsw_sp, vr);
5618 }
5619 
5620 static int
5621 mlxsw_sp_router_fibmr_vif_add(struct mlxsw_sp *mlxsw_sp,
5622 			      struct vif_entry_notifier_info *ven_info)
5623 {
5624 	struct mlxsw_sp_mr_table *mrt;
5625 	struct mlxsw_sp_rif *rif;
5626 	struct mlxsw_sp_vr *vr;
5627 
5628 	if (mlxsw_sp->router->aborted)
5629 		return 0;
5630 
5631 	vr = mlxsw_sp_vr_get(mlxsw_sp, ven_info->tb_id, NULL);
5632 	if (IS_ERR(vr))
5633 		return PTR_ERR(vr);
5634 
5635 	mrt = mlxsw_sp_router_fibmr_family_to_table(vr, ven_info->info.family);
5636 	rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, ven_info->dev);
5637 	return mlxsw_sp_mr_vif_add(mrt, ven_info->dev,
5638 				   ven_info->vif_index,
5639 				   ven_info->vif_flags, rif);
5640 }
5641 
5642 static void
5643 mlxsw_sp_router_fibmr_vif_del(struct mlxsw_sp *mlxsw_sp,
5644 			      struct vif_entry_notifier_info *ven_info)
5645 {
5646 	struct mlxsw_sp_mr_table *mrt;
5647 	struct mlxsw_sp_vr *vr;
5648 
5649 	if (mlxsw_sp->router->aborted)
5650 		return;
5651 
5652 	vr = mlxsw_sp_vr_find(mlxsw_sp, ven_info->tb_id);
5653 	if (WARN_ON(!vr))
5654 		return;
5655 
5656 	mrt = mlxsw_sp_router_fibmr_family_to_table(vr, ven_info->info.family);
5657 	mlxsw_sp_mr_vif_del(mrt, ven_info->vif_index);
5658 	mlxsw_sp_vr_put(mlxsw_sp, vr);
5659 }
5660 
5661 static int mlxsw_sp_router_set_abort_trap(struct mlxsw_sp *mlxsw_sp)
5662 {
5663 	enum mlxsw_reg_ralxx_protocol proto = MLXSW_REG_RALXX_PROTOCOL_IPV4;
5664 	int err;
5665 
5666 	err = __mlxsw_sp_router_set_abort_trap(mlxsw_sp, proto,
5667 					       MLXSW_SP_LPM_TREE_MIN);
5668 	if (err)
5669 		return err;
5670 
5671 	/* The multicast router code does not need an abort trap as by default,
5672 	 * packets that don't match any routes are trapped to the CPU.
5673 	 */
5674 
5675 	proto = MLXSW_REG_RALXX_PROTOCOL_IPV6;
5676 	return __mlxsw_sp_router_set_abort_trap(mlxsw_sp, proto,
5677 						MLXSW_SP_LPM_TREE_MIN + 1);
5678 }
5679 
5680 static void mlxsw_sp_fib4_node_flush(struct mlxsw_sp *mlxsw_sp,
5681 				     struct mlxsw_sp_fib_node *fib_node)
5682 {
5683 	struct mlxsw_sp_fib4_entry *fib4_entry, *tmp;
5684 
5685 	list_for_each_entry_safe(fib4_entry, tmp, &fib_node->entry_list,
5686 				 common.list) {
5687 		bool do_break = &tmp->common.list == &fib_node->entry_list;
5688 
5689 		mlxsw_sp_fib4_node_entry_unlink(mlxsw_sp, fib4_entry);
5690 		mlxsw_sp_fib4_entry_destroy(mlxsw_sp, fib4_entry);
5691 		mlxsw_sp_fib_node_put(mlxsw_sp, fib_node);
5692 		/* Break when entry list is empty and node was freed.
5693 		 * Otherwise, we'll access freed memory in the next
5694 		 * iteration.
5695 		 */
5696 		if (do_break)
5697 			break;
5698 	}
5699 }
5700 
5701 static void mlxsw_sp_fib6_node_flush(struct mlxsw_sp *mlxsw_sp,
5702 				     struct mlxsw_sp_fib_node *fib_node)
5703 {
5704 	struct mlxsw_sp_fib6_entry *fib6_entry, *tmp;
5705 
5706 	list_for_each_entry_safe(fib6_entry, tmp, &fib_node->entry_list,
5707 				 common.list) {
5708 		bool do_break = &tmp->common.list == &fib_node->entry_list;
5709 
5710 		mlxsw_sp_fib6_node_entry_unlink(mlxsw_sp, fib6_entry);
5711 		mlxsw_sp_fib6_entry_destroy(mlxsw_sp, fib6_entry);
5712 		mlxsw_sp_fib_node_put(mlxsw_sp, fib_node);
5713 		if (do_break)
5714 			break;
5715 	}
5716 }
5717 
5718 static void mlxsw_sp_fib_node_flush(struct mlxsw_sp *mlxsw_sp,
5719 				    struct mlxsw_sp_fib_node *fib_node)
5720 {
5721 	switch (fib_node->fib->proto) {
5722 	case MLXSW_SP_L3_PROTO_IPV4:
5723 		mlxsw_sp_fib4_node_flush(mlxsw_sp, fib_node);
5724 		break;
5725 	case MLXSW_SP_L3_PROTO_IPV6:
5726 		mlxsw_sp_fib6_node_flush(mlxsw_sp, fib_node);
5727 		break;
5728 	}
5729 }
5730 
5731 static void mlxsw_sp_vr_fib_flush(struct mlxsw_sp *mlxsw_sp,
5732 				  struct mlxsw_sp_vr *vr,
5733 				  enum mlxsw_sp_l3proto proto)
5734 {
5735 	struct mlxsw_sp_fib *fib = mlxsw_sp_vr_fib(vr, proto);
5736 	struct mlxsw_sp_fib_node *fib_node, *tmp;
5737 
5738 	list_for_each_entry_safe(fib_node, tmp, &fib->node_list, list) {
5739 		bool do_break = &tmp->list == &fib->node_list;
5740 
5741 		mlxsw_sp_fib_node_flush(mlxsw_sp, fib_node);
5742 		if (do_break)
5743 			break;
5744 	}
5745 }
5746 
5747 static void mlxsw_sp_router_fib_flush(struct mlxsw_sp *mlxsw_sp)
5748 {
5749 	int i, j;
5750 
5751 	for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_VRS); i++) {
5752 		struct mlxsw_sp_vr *vr = &mlxsw_sp->router->vrs[i];
5753 
5754 		if (!mlxsw_sp_vr_is_used(vr))
5755 			continue;
5756 
5757 		for (j = 0; j < MLXSW_SP_L3_PROTO_MAX; j++)
5758 			mlxsw_sp_mr_table_flush(vr->mr_table[j]);
5759 		mlxsw_sp_vr_fib_flush(mlxsw_sp, vr, MLXSW_SP_L3_PROTO_IPV4);
5760 
5761 		/* If virtual router was only used for IPv4, then it's no
5762 		 * longer used.
5763 		 */
5764 		if (!mlxsw_sp_vr_is_used(vr))
5765 			continue;
5766 		mlxsw_sp_vr_fib_flush(mlxsw_sp, vr, MLXSW_SP_L3_PROTO_IPV6);
5767 	}
5768 }
5769 
5770 static void mlxsw_sp_router_fib_abort(struct mlxsw_sp *mlxsw_sp)
5771 {
5772 	int err;
5773 
5774 	if (mlxsw_sp->router->aborted)
5775 		return;
5776 	dev_warn(mlxsw_sp->bus_info->dev, "FIB abort triggered. Note that FIB entries are no longer being offloaded to this device.\n");
5777 	mlxsw_sp_router_fib_flush(mlxsw_sp);
5778 	mlxsw_sp->router->aborted = true;
5779 	err = mlxsw_sp_router_set_abort_trap(mlxsw_sp);
5780 	if (err)
5781 		dev_warn(mlxsw_sp->bus_info->dev, "Failed to set abort trap.\n");
5782 }
5783 
5784 struct mlxsw_sp_fib_event_work {
5785 	struct work_struct work;
5786 	union {
5787 		struct fib6_entry_notifier_info fen6_info;
5788 		struct fib_entry_notifier_info fen_info;
5789 		struct fib_rule_notifier_info fr_info;
5790 		struct fib_nh_notifier_info fnh_info;
5791 		struct mfc_entry_notifier_info men_info;
5792 		struct vif_entry_notifier_info ven_info;
5793 	};
5794 	struct mlxsw_sp *mlxsw_sp;
5795 	unsigned long event;
5796 };
5797 
5798 static void mlxsw_sp_router_fib4_event_work(struct work_struct *work)
5799 {
5800 	struct mlxsw_sp_fib_event_work *fib_work =
5801 		container_of(work, struct mlxsw_sp_fib_event_work, work);
5802 	struct mlxsw_sp *mlxsw_sp = fib_work->mlxsw_sp;
5803 	bool replace, append;
5804 	int err;
5805 
5806 	/* Protect internal structures from changes */
5807 	rtnl_lock();
5808 	mlxsw_sp_span_respin(mlxsw_sp);
5809 
5810 	switch (fib_work->event) {
5811 	case FIB_EVENT_ENTRY_REPLACE: /* fall through */
5812 	case FIB_EVENT_ENTRY_APPEND: /* fall through */
5813 	case FIB_EVENT_ENTRY_ADD:
5814 		replace = fib_work->event == FIB_EVENT_ENTRY_REPLACE;
5815 		append = fib_work->event == FIB_EVENT_ENTRY_APPEND;
5816 		err = mlxsw_sp_router_fib4_add(mlxsw_sp, &fib_work->fen_info,
5817 					       replace, append);
5818 		if (err)
5819 			mlxsw_sp_router_fib_abort(mlxsw_sp);
5820 		fib_info_put(fib_work->fen_info.fi);
5821 		break;
5822 	case FIB_EVENT_ENTRY_DEL:
5823 		mlxsw_sp_router_fib4_del(mlxsw_sp, &fib_work->fen_info);
5824 		fib_info_put(fib_work->fen_info.fi);
5825 		break;
5826 	case FIB_EVENT_RULE_ADD:
5827 		/* if we get here, a rule was added that we do not support.
5828 		 * just do the fib_abort
5829 		 */
5830 		mlxsw_sp_router_fib_abort(mlxsw_sp);
5831 		break;
5832 	case FIB_EVENT_NH_ADD: /* fall through */
5833 	case FIB_EVENT_NH_DEL:
5834 		mlxsw_sp_nexthop4_event(mlxsw_sp, fib_work->event,
5835 					fib_work->fnh_info.fib_nh);
5836 		fib_info_put(fib_work->fnh_info.fib_nh->nh_parent);
5837 		break;
5838 	}
5839 	rtnl_unlock();
5840 	kfree(fib_work);
5841 }
5842 
5843 static void mlxsw_sp_router_fib6_event_work(struct work_struct *work)
5844 {
5845 	struct mlxsw_sp_fib_event_work *fib_work =
5846 		container_of(work, struct mlxsw_sp_fib_event_work, work);
5847 	struct mlxsw_sp *mlxsw_sp = fib_work->mlxsw_sp;
5848 	bool replace;
5849 	int err;
5850 
5851 	rtnl_lock();
5852 	mlxsw_sp_span_respin(mlxsw_sp);
5853 
5854 	switch (fib_work->event) {
5855 	case FIB_EVENT_ENTRY_REPLACE: /* fall through */
5856 	case FIB_EVENT_ENTRY_APPEND: /* fall through */
5857 	case FIB_EVENT_ENTRY_ADD:
5858 		replace = fib_work->event == FIB_EVENT_ENTRY_REPLACE;
5859 		err = mlxsw_sp_router_fib6_add(mlxsw_sp,
5860 					       fib_work->fen6_info.rt, replace);
5861 		if (err)
5862 			mlxsw_sp_router_fib_abort(mlxsw_sp);
5863 		mlxsw_sp_rt6_release(fib_work->fen6_info.rt);
5864 		break;
5865 	case FIB_EVENT_ENTRY_DEL:
5866 		mlxsw_sp_router_fib6_del(mlxsw_sp, fib_work->fen6_info.rt);
5867 		mlxsw_sp_rt6_release(fib_work->fen6_info.rt);
5868 		break;
5869 	case FIB_EVENT_RULE_ADD:
5870 		/* if we get here, a rule was added that we do not support.
5871 		 * just do the fib_abort
5872 		 */
5873 		mlxsw_sp_router_fib_abort(mlxsw_sp);
5874 		break;
5875 	}
5876 	rtnl_unlock();
5877 	kfree(fib_work);
5878 }
5879 
5880 static void mlxsw_sp_router_fibmr_event_work(struct work_struct *work)
5881 {
5882 	struct mlxsw_sp_fib_event_work *fib_work =
5883 		container_of(work, struct mlxsw_sp_fib_event_work, work);
5884 	struct mlxsw_sp *mlxsw_sp = fib_work->mlxsw_sp;
5885 	bool replace;
5886 	int err;
5887 
5888 	rtnl_lock();
5889 	switch (fib_work->event) {
5890 	case FIB_EVENT_ENTRY_REPLACE: /* fall through */
5891 	case FIB_EVENT_ENTRY_ADD:
5892 		replace = fib_work->event == FIB_EVENT_ENTRY_REPLACE;
5893 
5894 		err = mlxsw_sp_router_fibmr_add(mlxsw_sp, &fib_work->men_info,
5895 						replace);
5896 		if (err)
5897 			mlxsw_sp_router_fib_abort(mlxsw_sp);
5898 		mr_cache_put(fib_work->men_info.mfc);
5899 		break;
5900 	case FIB_EVENT_ENTRY_DEL:
5901 		mlxsw_sp_router_fibmr_del(mlxsw_sp, &fib_work->men_info);
5902 		mr_cache_put(fib_work->men_info.mfc);
5903 		break;
5904 	case FIB_EVENT_VIF_ADD:
5905 		err = mlxsw_sp_router_fibmr_vif_add(mlxsw_sp,
5906 						    &fib_work->ven_info);
5907 		if (err)
5908 			mlxsw_sp_router_fib_abort(mlxsw_sp);
5909 		dev_put(fib_work->ven_info.dev);
5910 		break;
5911 	case FIB_EVENT_VIF_DEL:
5912 		mlxsw_sp_router_fibmr_vif_del(mlxsw_sp,
5913 					      &fib_work->ven_info);
5914 		dev_put(fib_work->ven_info.dev);
5915 		break;
5916 	case FIB_EVENT_RULE_ADD:
5917 		/* if we get here, a rule was added that we do not support.
5918 		 * just do the fib_abort
5919 		 */
5920 		mlxsw_sp_router_fib_abort(mlxsw_sp);
5921 		break;
5922 	}
5923 	rtnl_unlock();
5924 	kfree(fib_work);
5925 }
5926 
5927 static void mlxsw_sp_router_fib4_event(struct mlxsw_sp_fib_event_work *fib_work,
5928 				       struct fib_notifier_info *info)
5929 {
5930 	struct fib_entry_notifier_info *fen_info;
5931 	struct fib_nh_notifier_info *fnh_info;
5932 
5933 	switch (fib_work->event) {
5934 	case FIB_EVENT_ENTRY_REPLACE: /* fall through */
5935 	case FIB_EVENT_ENTRY_APPEND: /* fall through */
5936 	case FIB_EVENT_ENTRY_ADD: /* fall through */
5937 	case FIB_EVENT_ENTRY_DEL:
5938 		fen_info = container_of(info, struct fib_entry_notifier_info,
5939 					info);
5940 		fib_work->fen_info = *fen_info;
5941 		/* Take reference on fib_info to prevent it from being
5942 		 * freed while work is queued. Release it afterwards.
5943 		 */
5944 		fib_info_hold(fib_work->fen_info.fi);
5945 		break;
5946 	case FIB_EVENT_NH_ADD: /* fall through */
5947 	case FIB_EVENT_NH_DEL:
5948 		fnh_info = container_of(info, struct fib_nh_notifier_info,
5949 					info);
5950 		fib_work->fnh_info = *fnh_info;
5951 		fib_info_hold(fib_work->fnh_info.fib_nh->nh_parent);
5952 		break;
5953 	}
5954 }
5955 
5956 static void mlxsw_sp_router_fib6_event(struct mlxsw_sp_fib_event_work *fib_work,
5957 				       struct fib_notifier_info *info)
5958 {
5959 	struct fib6_entry_notifier_info *fen6_info;
5960 
5961 	switch (fib_work->event) {
5962 	case FIB_EVENT_ENTRY_REPLACE: /* fall through */
5963 	case FIB_EVENT_ENTRY_APPEND: /* fall through */
5964 	case FIB_EVENT_ENTRY_ADD: /* fall through */
5965 	case FIB_EVENT_ENTRY_DEL:
5966 		fen6_info = container_of(info, struct fib6_entry_notifier_info,
5967 					 info);
5968 		fib_work->fen6_info = *fen6_info;
5969 		fib6_info_hold(fib_work->fen6_info.rt);
5970 		break;
5971 	}
5972 }
5973 
5974 static void
5975 mlxsw_sp_router_fibmr_event(struct mlxsw_sp_fib_event_work *fib_work,
5976 			    struct fib_notifier_info *info)
5977 {
5978 	switch (fib_work->event) {
5979 	case FIB_EVENT_ENTRY_REPLACE: /* fall through */
5980 	case FIB_EVENT_ENTRY_ADD: /* fall through */
5981 	case FIB_EVENT_ENTRY_DEL:
5982 		memcpy(&fib_work->men_info, info, sizeof(fib_work->men_info));
5983 		mr_cache_hold(fib_work->men_info.mfc);
5984 		break;
5985 	case FIB_EVENT_VIF_ADD: /* fall through */
5986 	case FIB_EVENT_VIF_DEL:
5987 		memcpy(&fib_work->ven_info, info, sizeof(fib_work->ven_info));
5988 		dev_hold(fib_work->ven_info.dev);
5989 		break;
5990 	}
5991 }
5992 
5993 static int mlxsw_sp_router_fib_rule_event(unsigned long event,
5994 					  struct fib_notifier_info *info,
5995 					  struct mlxsw_sp *mlxsw_sp)
5996 {
5997 	struct netlink_ext_ack *extack = info->extack;
5998 	struct fib_rule_notifier_info *fr_info;
5999 	struct fib_rule *rule;
6000 	int err = 0;
6001 
6002 	/* nothing to do at the moment */
6003 	if (event == FIB_EVENT_RULE_DEL)
6004 		return 0;
6005 
6006 	if (mlxsw_sp->router->aborted)
6007 		return 0;
6008 
6009 	fr_info = container_of(info, struct fib_rule_notifier_info, info);
6010 	rule = fr_info->rule;
6011 
6012 	switch (info->family) {
6013 	case AF_INET:
6014 		if (!fib4_rule_default(rule) && !rule->l3mdev)
6015 			err = -EOPNOTSUPP;
6016 		break;
6017 	case AF_INET6:
6018 		if (!fib6_rule_default(rule) && !rule->l3mdev)
6019 			err = -EOPNOTSUPP;
6020 		break;
6021 	case RTNL_FAMILY_IPMR:
6022 		if (!ipmr_rule_default(rule) && !rule->l3mdev)
6023 			err = -EOPNOTSUPP;
6024 		break;
6025 	case RTNL_FAMILY_IP6MR:
6026 		if (!ip6mr_rule_default(rule) && !rule->l3mdev)
6027 			err = -EOPNOTSUPP;
6028 		break;
6029 	}
6030 
6031 	if (err < 0)
6032 		NL_SET_ERR_MSG_MOD(extack, "FIB rules not supported");
6033 
6034 	return err;
6035 }
6036 
6037 /* Called with rcu_read_lock() */
6038 static int mlxsw_sp_router_fib_event(struct notifier_block *nb,
6039 				     unsigned long event, void *ptr)
6040 {
6041 	struct mlxsw_sp_fib_event_work *fib_work;
6042 	struct fib_notifier_info *info = ptr;
6043 	struct mlxsw_sp_router *router;
6044 	int err;
6045 
6046 	if (!net_eq(info->net, &init_net) ||
6047 	    (info->family != AF_INET && info->family != AF_INET6 &&
6048 	     info->family != RTNL_FAMILY_IPMR &&
6049 	     info->family != RTNL_FAMILY_IP6MR))
6050 		return NOTIFY_DONE;
6051 
6052 	router = container_of(nb, struct mlxsw_sp_router, fib_nb);
6053 
6054 	switch (event) {
6055 	case FIB_EVENT_RULE_ADD: /* fall through */
6056 	case FIB_EVENT_RULE_DEL:
6057 		err = mlxsw_sp_router_fib_rule_event(event, info,
6058 						     router->mlxsw_sp);
6059 		if (!err || info->extack)
6060 			return notifier_from_errno(err);
6061 		break;
6062 	case FIB_EVENT_ENTRY_ADD:
6063 		if (router->aborted) {
6064 			NL_SET_ERR_MSG_MOD(info->extack, "FIB offload was aborted. Not configuring route");
6065 			return notifier_from_errno(-EINVAL);
6066 		}
6067 		break;
6068 	}
6069 
6070 	fib_work = kzalloc(sizeof(*fib_work), GFP_ATOMIC);
6071 	if (WARN_ON(!fib_work))
6072 		return NOTIFY_BAD;
6073 
6074 	fib_work->mlxsw_sp = router->mlxsw_sp;
6075 	fib_work->event = event;
6076 
6077 	switch (info->family) {
6078 	case AF_INET:
6079 		INIT_WORK(&fib_work->work, mlxsw_sp_router_fib4_event_work);
6080 		mlxsw_sp_router_fib4_event(fib_work, info);
6081 		break;
6082 	case AF_INET6:
6083 		INIT_WORK(&fib_work->work, mlxsw_sp_router_fib6_event_work);
6084 		mlxsw_sp_router_fib6_event(fib_work, info);
6085 		break;
6086 	case RTNL_FAMILY_IP6MR:
6087 	case RTNL_FAMILY_IPMR:
6088 		INIT_WORK(&fib_work->work, mlxsw_sp_router_fibmr_event_work);
6089 		mlxsw_sp_router_fibmr_event(fib_work, info);
6090 		break;
6091 	}
6092 
6093 	mlxsw_core_schedule_work(&fib_work->work);
6094 
6095 	return NOTIFY_DONE;
6096 }
6097 
6098 struct mlxsw_sp_rif *
6099 mlxsw_sp_rif_find_by_dev(const struct mlxsw_sp *mlxsw_sp,
6100 			 const struct net_device *dev)
6101 {
6102 	int i;
6103 
6104 	for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_RIFS); i++)
6105 		if (mlxsw_sp->router->rifs[i] &&
6106 		    mlxsw_sp->router->rifs[i]->dev == dev)
6107 			return mlxsw_sp->router->rifs[i];
6108 
6109 	return NULL;
6110 }
6111 
6112 static int mlxsw_sp_router_rif_disable(struct mlxsw_sp *mlxsw_sp, u16 rif)
6113 {
6114 	char ritr_pl[MLXSW_REG_RITR_LEN];
6115 	int err;
6116 
6117 	mlxsw_reg_ritr_rif_pack(ritr_pl, rif);
6118 	err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
6119 	if (WARN_ON_ONCE(err))
6120 		return err;
6121 
6122 	mlxsw_reg_ritr_enable_set(ritr_pl, false);
6123 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
6124 }
6125 
6126 static void mlxsw_sp_router_rif_gone_sync(struct mlxsw_sp *mlxsw_sp,
6127 					  struct mlxsw_sp_rif *rif)
6128 {
6129 	mlxsw_sp_router_rif_disable(mlxsw_sp, rif->rif_index);
6130 	mlxsw_sp_nexthop_rif_gone_sync(mlxsw_sp, rif);
6131 	mlxsw_sp_neigh_rif_gone_sync(mlxsw_sp, rif);
6132 }
6133 
6134 static bool
6135 mlxsw_sp_rif_should_config(struct mlxsw_sp_rif *rif, struct net_device *dev,
6136 			   unsigned long event)
6137 {
6138 	struct inet6_dev *inet6_dev;
6139 	bool addr_list_empty = true;
6140 	struct in_device *idev;
6141 
6142 	switch (event) {
6143 	case NETDEV_UP:
6144 		return rif == NULL;
6145 	case NETDEV_DOWN:
6146 		idev = __in_dev_get_rtnl(dev);
6147 		if (idev && idev->ifa_list)
6148 			addr_list_empty = false;
6149 
6150 		inet6_dev = __in6_dev_get(dev);
6151 		if (addr_list_empty && inet6_dev &&
6152 		    !list_empty(&inet6_dev->addr_list))
6153 			addr_list_empty = false;
6154 
6155 		/* macvlans do not have a RIF, but rather piggy back on the
6156 		 * RIF of their lower device.
6157 		 */
6158 		if (netif_is_macvlan(dev) && addr_list_empty)
6159 			return true;
6160 
6161 		if (rif && addr_list_empty &&
6162 		    !netif_is_l3_slave(rif->dev))
6163 			return true;
6164 		/* It is possible we already removed the RIF ourselves
6165 		 * if it was assigned to a netdev that is now a bridge
6166 		 * or LAG slave.
6167 		 */
6168 		return false;
6169 	}
6170 
6171 	return false;
6172 }
6173 
6174 static enum mlxsw_sp_rif_type
6175 mlxsw_sp_dev_rif_type(const struct mlxsw_sp *mlxsw_sp,
6176 		      const struct net_device *dev)
6177 {
6178 	enum mlxsw_sp_fid_type type;
6179 
6180 	if (mlxsw_sp_netdev_ipip_type(mlxsw_sp, dev, NULL))
6181 		return MLXSW_SP_RIF_TYPE_IPIP_LB;
6182 
6183 	/* Otherwise RIF type is derived from the type of the underlying FID. */
6184 	if (is_vlan_dev(dev) && netif_is_bridge_master(vlan_dev_real_dev(dev)))
6185 		type = MLXSW_SP_FID_TYPE_8021Q;
6186 	else if (netif_is_bridge_master(dev) && br_vlan_enabled(dev))
6187 		type = MLXSW_SP_FID_TYPE_8021Q;
6188 	else if (netif_is_bridge_master(dev))
6189 		type = MLXSW_SP_FID_TYPE_8021D;
6190 	else
6191 		type = MLXSW_SP_FID_TYPE_RFID;
6192 
6193 	return mlxsw_sp_fid_type_rif_type(mlxsw_sp, type);
6194 }
6195 
6196 static int mlxsw_sp_rif_index_alloc(struct mlxsw_sp *mlxsw_sp, u16 *p_rif_index)
6197 {
6198 	int i;
6199 
6200 	for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_RIFS); i++) {
6201 		if (!mlxsw_sp->router->rifs[i]) {
6202 			*p_rif_index = i;
6203 			return 0;
6204 		}
6205 	}
6206 
6207 	return -ENOBUFS;
6208 }
6209 
6210 static struct mlxsw_sp_rif *mlxsw_sp_rif_alloc(size_t rif_size, u16 rif_index,
6211 					       u16 vr_id,
6212 					       struct net_device *l3_dev)
6213 {
6214 	struct mlxsw_sp_rif *rif;
6215 
6216 	rif = kzalloc(rif_size, GFP_KERNEL);
6217 	if (!rif)
6218 		return NULL;
6219 
6220 	INIT_LIST_HEAD(&rif->nexthop_list);
6221 	INIT_LIST_HEAD(&rif->neigh_list);
6222 	ether_addr_copy(rif->addr, l3_dev->dev_addr);
6223 	rif->mtu = l3_dev->mtu;
6224 	rif->vr_id = vr_id;
6225 	rif->dev = l3_dev;
6226 	rif->rif_index = rif_index;
6227 
6228 	return rif;
6229 }
6230 
6231 struct mlxsw_sp_rif *mlxsw_sp_rif_by_index(const struct mlxsw_sp *mlxsw_sp,
6232 					   u16 rif_index)
6233 {
6234 	return mlxsw_sp->router->rifs[rif_index];
6235 }
6236 
6237 u16 mlxsw_sp_rif_index(const struct mlxsw_sp_rif *rif)
6238 {
6239 	return rif->rif_index;
6240 }
6241 
6242 u16 mlxsw_sp_ipip_lb_rif_index(const struct mlxsw_sp_rif_ipip_lb *lb_rif)
6243 {
6244 	return lb_rif->common.rif_index;
6245 }
6246 
6247 u16 mlxsw_sp_ipip_lb_ul_vr_id(const struct mlxsw_sp_rif_ipip_lb *lb_rif)
6248 {
6249 	return lb_rif->ul_vr_id;
6250 }
6251 
6252 int mlxsw_sp_rif_dev_ifindex(const struct mlxsw_sp_rif *rif)
6253 {
6254 	return rif->dev->ifindex;
6255 }
6256 
6257 const struct net_device *mlxsw_sp_rif_dev(const struct mlxsw_sp_rif *rif)
6258 {
6259 	return rif->dev;
6260 }
6261 
6262 struct mlxsw_sp_fid *mlxsw_sp_rif_fid(const struct mlxsw_sp_rif *rif)
6263 {
6264 	return rif->fid;
6265 }
6266 
6267 static struct mlxsw_sp_rif *
6268 mlxsw_sp_rif_create(struct mlxsw_sp *mlxsw_sp,
6269 		    const struct mlxsw_sp_rif_params *params,
6270 		    struct netlink_ext_ack *extack)
6271 {
6272 	u32 tb_id = l3mdev_fib_table(params->dev);
6273 	const struct mlxsw_sp_rif_ops *ops;
6274 	struct mlxsw_sp_fid *fid = NULL;
6275 	enum mlxsw_sp_rif_type type;
6276 	struct mlxsw_sp_rif *rif;
6277 	struct mlxsw_sp_vr *vr;
6278 	u16 rif_index;
6279 	int i, err;
6280 
6281 	type = mlxsw_sp_dev_rif_type(mlxsw_sp, params->dev);
6282 	ops = mlxsw_sp->router->rif_ops_arr[type];
6283 
6284 	vr = mlxsw_sp_vr_get(mlxsw_sp, tb_id ? : RT_TABLE_MAIN, extack);
6285 	if (IS_ERR(vr))
6286 		return ERR_CAST(vr);
6287 	vr->rif_count++;
6288 
6289 	err = mlxsw_sp_rif_index_alloc(mlxsw_sp, &rif_index);
6290 	if (err) {
6291 		NL_SET_ERR_MSG_MOD(extack, "Exceeded number of supported router interfaces");
6292 		goto err_rif_index_alloc;
6293 	}
6294 
6295 	rif = mlxsw_sp_rif_alloc(ops->rif_size, rif_index, vr->id, params->dev);
6296 	if (!rif) {
6297 		err = -ENOMEM;
6298 		goto err_rif_alloc;
6299 	}
6300 	rif->mlxsw_sp = mlxsw_sp;
6301 	rif->ops = ops;
6302 
6303 	if (ops->fid_get) {
6304 		fid = ops->fid_get(rif, extack);
6305 		if (IS_ERR(fid)) {
6306 			err = PTR_ERR(fid);
6307 			goto err_fid_get;
6308 		}
6309 		rif->fid = fid;
6310 	}
6311 
6312 	if (ops->setup)
6313 		ops->setup(rif, params);
6314 
6315 	err = ops->configure(rif);
6316 	if (err)
6317 		goto err_configure;
6318 
6319 	for (i = 0; i < MLXSW_SP_L3_PROTO_MAX; i++) {
6320 		err = mlxsw_sp_mr_rif_add(vr->mr_table[i], rif);
6321 		if (err)
6322 			goto err_mr_rif_add;
6323 	}
6324 
6325 	mlxsw_sp_rif_counters_alloc(rif);
6326 	mlxsw_sp->router->rifs[rif_index] = rif;
6327 
6328 	return rif;
6329 
6330 err_mr_rif_add:
6331 	for (i--; i >= 0; i--)
6332 		mlxsw_sp_mr_rif_del(vr->mr_table[i], rif);
6333 	ops->deconfigure(rif);
6334 err_configure:
6335 	if (fid)
6336 		mlxsw_sp_fid_put(fid);
6337 err_fid_get:
6338 	kfree(rif);
6339 err_rif_alloc:
6340 err_rif_index_alloc:
6341 	vr->rif_count--;
6342 	mlxsw_sp_vr_put(mlxsw_sp, vr);
6343 	return ERR_PTR(err);
6344 }
6345 
6346 void mlxsw_sp_rif_destroy(struct mlxsw_sp_rif *rif)
6347 {
6348 	const struct mlxsw_sp_rif_ops *ops = rif->ops;
6349 	struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
6350 	struct mlxsw_sp_fid *fid = rif->fid;
6351 	struct mlxsw_sp_vr *vr;
6352 	int i;
6353 
6354 	mlxsw_sp_router_rif_gone_sync(mlxsw_sp, rif);
6355 	vr = &mlxsw_sp->router->vrs[rif->vr_id];
6356 
6357 	mlxsw_sp->router->rifs[rif->rif_index] = NULL;
6358 	mlxsw_sp_rif_counters_free(rif);
6359 	for (i = 0; i < MLXSW_SP_L3_PROTO_MAX; i++)
6360 		mlxsw_sp_mr_rif_del(vr->mr_table[i], rif);
6361 	ops->deconfigure(rif);
6362 	if (fid)
6363 		/* Loopback RIFs are not associated with a FID. */
6364 		mlxsw_sp_fid_put(fid);
6365 	kfree(rif);
6366 	vr->rif_count--;
6367 	mlxsw_sp_vr_put(mlxsw_sp, vr);
6368 }
6369 
6370 void mlxsw_sp_rif_destroy_by_dev(struct mlxsw_sp *mlxsw_sp,
6371 				 struct net_device *dev)
6372 {
6373 	struct mlxsw_sp_rif *rif;
6374 
6375 	rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev);
6376 	if (!rif)
6377 		return;
6378 	mlxsw_sp_rif_destroy(rif);
6379 }
6380 
6381 static void
6382 mlxsw_sp_rif_subport_params_init(struct mlxsw_sp_rif_params *params,
6383 				 struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan)
6384 {
6385 	struct mlxsw_sp_port *mlxsw_sp_port = mlxsw_sp_port_vlan->mlxsw_sp_port;
6386 
6387 	params->vid = mlxsw_sp_port_vlan->vid;
6388 	params->lag = mlxsw_sp_port->lagged;
6389 	if (params->lag)
6390 		params->lag_id = mlxsw_sp_port->lag_id;
6391 	else
6392 		params->system_port = mlxsw_sp_port->local_port;
6393 }
6394 
6395 static int
6396 mlxsw_sp_port_vlan_router_join(struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan,
6397 			       struct net_device *l3_dev,
6398 			       struct netlink_ext_ack *extack)
6399 {
6400 	struct mlxsw_sp_port *mlxsw_sp_port = mlxsw_sp_port_vlan->mlxsw_sp_port;
6401 	struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
6402 	u16 vid = mlxsw_sp_port_vlan->vid;
6403 	struct mlxsw_sp_rif *rif;
6404 	struct mlxsw_sp_fid *fid;
6405 	int err;
6406 
6407 	rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, l3_dev);
6408 	if (!rif) {
6409 		struct mlxsw_sp_rif_params params = {
6410 			.dev = l3_dev,
6411 		};
6412 
6413 		mlxsw_sp_rif_subport_params_init(&params, mlxsw_sp_port_vlan);
6414 		rif = mlxsw_sp_rif_create(mlxsw_sp, &params, extack);
6415 		if (IS_ERR(rif))
6416 			return PTR_ERR(rif);
6417 	}
6418 
6419 	/* FID was already created, just take a reference */
6420 	fid = rif->ops->fid_get(rif, extack);
6421 	err = mlxsw_sp_fid_port_vid_map(fid, mlxsw_sp_port, vid);
6422 	if (err)
6423 		goto err_fid_port_vid_map;
6424 
6425 	err = mlxsw_sp_port_vid_learning_set(mlxsw_sp_port, vid, false);
6426 	if (err)
6427 		goto err_port_vid_learning_set;
6428 
6429 	err = mlxsw_sp_port_vid_stp_set(mlxsw_sp_port, vid,
6430 					BR_STATE_FORWARDING);
6431 	if (err)
6432 		goto err_port_vid_stp_set;
6433 
6434 	mlxsw_sp_port_vlan->fid = fid;
6435 
6436 	return 0;
6437 
6438 err_port_vid_stp_set:
6439 	mlxsw_sp_port_vid_learning_set(mlxsw_sp_port, vid, true);
6440 err_port_vid_learning_set:
6441 	mlxsw_sp_fid_port_vid_unmap(fid, mlxsw_sp_port, vid);
6442 err_fid_port_vid_map:
6443 	mlxsw_sp_fid_put(fid);
6444 	return err;
6445 }
6446 
6447 void
6448 mlxsw_sp_port_vlan_router_leave(struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan)
6449 {
6450 	struct mlxsw_sp_port *mlxsw_sp_port = mlxsw_sp_port_vlan->mlxsw_sp_port;
6451 	struct mlxsw_sp_fid *fid = mlxsw_sp_port_vlan->fid;
6452 	u16 vid = mlxsw_sp_port_vlan->vid;
6453 
6454 	if (WARN_ON(mlxsw_sp_fid_type(fid) != MLXSW_SP_FID_TYPE_RFID))
6455 		return;
6456 
6457 	mlxsw_sp_port_vlan->fid = NULL;
6458 	mlxsw_sp_port_vid_stp_set(mlxsw_sp_port, vid, BR_STATE_BLOCKING);
6459 	mlxsw_sp_port_vid_learning_set(mlxsw_sp_port, vid, true);
6460 	mlxsw_sp_fid_port_vid_unmap(fid, mlxsw_sp_port, vid);
6461 	/* If router port holds the last reference on the rFID, then the
6462 	 * associated Sub-port RIF will be destroyed.
6463 	 */
6464 	mlxsw_sp_fid_put(fid);
6465 }
6466 
6467 static int mlxsw_sp_inetaddr_port_vlan_event(struct net_device *l3_dev,
6468 					     struct net_device *port_dev,
6469 					     unsigned long event, u16 vid,
6470 					     struct netlink_ext_ack *extack)
6471 {
6472 	struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(port_dev);
6473 	struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan;
6474 
6475 	mlxsw_sp_port_vlan = mlxsw_sp_port_vlan_find_by_vid(mlxsw_sp_port, vid);
6476 	if (WARN_ON(!mlxsw_sp_port_vlan))
6477 		return -EINVAL;
6478 
6479 	switch (event) {
6480 	case NETDEV_UP:
6481 		return mlxsw_sp_port_vlan_router_join(mlxsw_sp_port_vlan,
6482 						      l3_dev, extack);
6483 	case NETDEV_DOWN:
6484 		mlxsw_sp_port_vlan_router_leave(mlxsw_sp_port_vlan);
6485 		break;
6486 	}
6487 
6488 	return 0;
6489 }
6490 
6491 static int mlxsw_sp_inetaddr_port_event(struct net_device *port_dev,
6492 					unsigned long event,
6493 					struct netlink_ext_ack *extack)
6494 {
6495 	if (netif_is_bridge_port(port_dev) ||
6496 	    netif_is_lag_port(port_dev) ||
6497 	    netif_is_ovs_port(port_dev))
6498 		return 0;
6499 
6500 	return mlxsw_sp_inetaddr_port_vlan_event(port_dev, port_dev, event, 1,
6501 						 extack);
6502 }
6503 
6504 static int __mlxsw_sp_inetaddr_lag_event(struct net_device *l3_dev,
6505 					 struct net_device *lag_dev,
6506 					 unsigned long event, u16 vid,
6507 					 struct netlink_ext_ack *extack)
6508 {
6509 	struct net_device *port_dev;
6510 	struct list_head *iter;
6511 	int err;
6512 
6513 	netdev_for_each_lower_dev(lag_dev, port_dev, iter) {
6514 		if (mlxsw_sp_port_dev_check(port_dev)) {
6515 			err = mlxsw_sp_inetaddr_port_vlan_event(l3_dev,
6516 								port_dev,
6517 								event, vid,
6518 								extack);
6519 			if (err)
6520 				return err;
6521 		}
6522 	}
6523 
6524 	return 0;
6525 }
6526 
6527 static int mlxsw_sp_inetaddr_lag_event(struct net_device *lag_dev,
6528 				       unsigned long event,
6529 				       struct netlink_ext_ack *extack)
6530 {
6531 	if (netif_is_bridge_port(lag_dev))
6532 		return 0;
6533 
6534 	return __mlxsw_sp_inetaddr_lag_event(lag_dev, lag_dev, event, 1,
6535 					     extack);
6536 }
6537 
6538 static int mlxsw_sp_inetaddr_bridge_event(struct net_device *l3_dev,
6539 					  unsigned long event,
6540 					  struct netlink_ext_ack *extack)
6541 {
6542 	struct mlxsw_sp *mlxsw_sp = mlxsw_sp_lower_get(l3_dev);
6543 	struct mlxsw_sp_rif_params params = {
6544 		.dev = l3_dev,
6545 	};
6546 	struct mlxsw_sp_rif *rif;
6547 
6548 	switch (event) {
6549 	case NETDEV_UP:
6550 		rif = mlxsw_sp_rif_create(mlxsw_sp, &params, extack);
6551 		if (IS_ERR(rif))
6552 			return PTR_ERR(rif);
6553 		break;
6554 	case NETDEV_DOWN:
6555 		rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, l3_dev);
6556 		mlxsw_sp_rif_destroy(rif);
6557 		break;
6558 	}
6559 
6560 	return 0;
6561 }
6562 
6563 static int mlxsw_sp_inetaddr_vlan_event(struct net_device *vlan_dev,
6564 					unsigned long event,
6565 					struct netlink_ext_ack *extack)
6566 {
6567 	struct net_device *real_dev = vlan_dev_real_dev(vlan_dev);
6568 	u16 vid = vlan_dev_vlan_id(vlan_dev);
6569 
6570 	if (netif_is_bridge_port(vlan_dev))
6571 		return 0;
6572 
6573 	if (mlxsw_sp_port_dev_check(real_dev))
6574 		return mlxsw_sp_inetaddr_port_vlan_event(vlan_dev, real_dev,
6575 							 event, vid, extack);
6576 	else if (netif_is_lag_master(real_dev))
6577 		return __mlxsw_sp_inetaddr_lag_event(vlan_dev, real_dev, event,
6578 						     vid, extack);
6579 	else if (netif_is_bridge_master(real_dev) && br_vlan_enabled(real_dev))
6580 		return mlxsw_sp_inetaddr_bridge_event(vlan_dev, event, extack);
6581 
6582 	return 0;
6583 }
6584 
6585 static bool mlxsw_sp_rif_macvlan_is_vrrp4(const u8 *mac)
6586 {
6587 	u8 vrrp4[ETH_ALEN] = { 0x00, 0x00, 0x5e, 0x00, 0x01, 0x00 };
6588 	u8 mask[ETH_ALEN] = { 0xff, 0xff, 0xff, 0xff, 0xff, 0x00 };
6589 
6590 	return ether_addr_equal_masked(mac, vrrp4, mask);
6591 }
6592 
6593 static bool mlxsw_sp_rif_macvlan_is_vrrp6(const u8 *mac)
6594 {
6595 	u8 vrrp6[ETH_ALEN] = { 0x00, 0x00, 0x5e, 0x00, 0x02, 0x00 };
6596 	u8 mask[ETH_ALEN] = { 0xff, 0xff, 0xff, 0xff, 0xff, 0x00 };
6597 
6598 	return ether_addr_equal_masked(mac, vrrp6, mask);
6599 }
6600 
6601 static int mlxsw_sp_rif_vrrp_op(struct mlxsw_sp *mlxsw_sp, u16 rif_index,
6602 				const u8 *mac, bool adding)
6603 {
6604 	char ritr_pl[MLXSW_REG_RITR_LEN];
6605 	u8 vrrp_id = adding ? mac[5] : 0;
6606 	int err;
6607 
6608 	if (!mlxsw_sp_rif_macvlan_is_vrrp4(mac) &&
6609 	    !mlxsw_sp_rif_macvlan_is_vrrp6(mac))
6610 		return 0;
6611 
6612 	mlxsw_reg_ritr_rif_pack(ritr_pl, rif_index);
6613 	err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
6614 	if (err)
6615 		return err;
6616 
6617 	if (mlxsw_sp_rif_macvlan_is_vrrp4(mac))
6618 		mlxsw_reg_ritr_if_vrrp_id_ipv4_set(ritr_pl, vrrp_id);
6619 	else
6620 		mlxsw_reg_ritr_if_vrrp_id_ipv6_set(ritr_pl, vrrp_id);
6621 
6622 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
6623 }
6624 
6625 static int mlxsw_sp_rif_macvlan_add(struct mlxsw_sp *mlxsw_sp,
6626 				    const struct net_device *macvlan_dev,
6627 				    struct netlink_ext_ack *extack)
6628 {
6629 	struct macvlan_dev *vlan = netdev_priv(macvlan_dev);
6630 	struct mlxsw_sp_rif *rif;
6631 	int err;
6632 
6633 	rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, vlan->lowerdev);
6634 	if (!rif) {
6635 		NL_SET_ERR_MSG_MOD(extack, "macvlan is only supported on top of router interfaces");
6636 		return -EOPNOTSUPP;
6637 	}
6638 
6639 	err = mlxsw_sp_rif_fdb_op(mlxsw_sp, macvlan_dev->dev_addr,
6640 				  mlxsw_sp_fid_index(rif->fid), true);
6641 	if (err)
6642 		return err;
6643 
6644 	err = mlxsw_sp_rif_vrrp_op(mlxsw_sp, rif->rif_index,
6645 				   macvlan_dev->dev_addr, true);
6646 	if (err)
6647 		goto err_rif_vrrp_add;
6648 
6649 	/* Make sure the bridge driver does not have this MAC pointing at
6650 	 * some other port.
6651 	 */
6652 	if (rif->ops->fdb_del)
6653 		rif->ops->fdb_del(rif, macvlan_dev->dev_addr);
6654 
6655 	return 0;
6656 
6657 err_rif_vrrp_add:
6658 	mlxsw_sp_rif_fdb_op(mlxsw_sp, macvlan_dev->dev_addr,
6659 			    mlxsw_sp_fid_index(rif->fid), false);
6660 	return err;
6661 }
6662 
6663 void mlxsw_sp_rif_macvlan_del(struct mlxsw_sp *mlxsw_sp,
6664 			      const struct net_device *macvlan_dev)
6665 {
6666 	struct macvlan_dev *vlan = netdev_priv(macvlan_dev);
6667 	struct mlxsw_sp_rif *rif;
6668 
6669 	rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, vlan->lowerdev);
6670 	/* If we do not have a RIF, then we already took care of
6671 	 * removing the macvlan's MAC during RIF deletion.
6672 	 */
6673 	if (!rif)
6674 		return;
6675 	mlxsw_sp_rif_vrrp_op(mlxsw_sp, rif->rif_index, macvlan_dev->dev_addr,
6676 			     false);
6677 	mlxsw_sp_rif_fdb_op(mlxsw_sp, macvlan_dev->dev_addr,
6678 			    mlxsw_sp_fid_index(rif->fid), false);
6679 }
6680 
6681 static int mlxsw_sp_inetaddr_macvlan_event(struct net_device *macvlan_dev,
6682 					   unsigned long event,
6683 					   struct netlink_ext_ack *extack)
6684 {
6685 	struct mlxsw_sp *mlxsw_sp;
6686 
6687 	mlxsw_sp = mlxsw_sp_lower_get(macvlan_dev);
6688 	if (!mlxsw_sp)
6689 		return 0;
6690 
6691 	switch (event) {
6692 	case NETDEV_UP:
6693 		return mlxsw_sp_rif_macvlan_add(mlxsw_sp, macvlan_dev, extack);
6694 	case NETDEV_DOWN:
6695 		mlxsw_sp_rif_macvlan_del(mlxsw_sp, macvlan_dev);
6696 		break;
6697 	}
6698 
6699 	return 0;
6700 }
6701 
6702 static int __mlxsw_sp_inetaddr_event(struct net_device *dev,
6703 				     unsigned long event,
6704 				     struct netlink_ext_ack *extack)
6705 {
6706 	if (mlxsw_sp_port_dev_check(dev))
6707 		return mlxsw_sp_inetaddr_port_event(dev, event, extack);
6708 	else if (netif_is_lag_master(dev))
6709 		return mlxsw_sp_inetaddr_lag_event(dev, event, extack);
6710 	else if (netif_is_bridge_master(dev))
6711 		return mlxsw_sp_inetaddr_bridge_event(dev, event, extack);
6712 	else if (is_vlan_dev(dev))
6713 		return mlxsw_sp_inetaddr_vlan_event(dev, event, extack);
6714 	else if (netif_is_macvlan(dev))
6715 		return mlxsw_sp_inetaddr_macvlan_event(dev, event, extack);
6716 	else
6717 		return 0;
6718 }
6719 
6720 int mlxsw_sp_inetaddr_event(struct notifier_block *unused,
6721 			    unsigned long event, void *ptr)
6722 {
6723 	struct in_ifaddr *ifa = (struct in_ifaddr *) ptr;
6724 	struct net_device *dev = ifa->ifa_dev->dev;
6725 	struct mlxsw_sp *mlxsw_sp;
6726 	struct mlxsw_sp_rif *rif;
6727 	int err = 0;
6728 
6729 	/* NETDEV_UP event is handled by mlxsw_sp_inetaddr_valid_event */
6730 	if (event == NETDEV_UP)
6731 		goto out;
6732 
6733 	mlxsw_sp = mlxsw_sp_lower_get(dev);
6734 	if (!mlxsw_sp)
6735 		goto out;
6736 
6737 	rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev);
6738 	if (!mlxsw_sp_rif_should_config(rif, dev, event))
6739 		goto out;
6740 
6741 	err = __mlxsw_sp_inetaddr_event(dev, event, NULL);
6742 out:
6743 	return notifier_from_errno(err);
6744 }
6745 
6746 int mlxsw_sp_inetaddr_valid_event(struct notifier_block *unused,
6747 				  unsigned long event, void *ptr)
6748 {
6749 	struct in_validator_info *ivi = (struct in_validator_info *) ptr;
6750 	struct net_device *dev = ivi->ivi_dev->dev;
6751 	struct mlxsw_sp *mlxsw_sp;
6752 	struct mlxsw_sp_rif *rif;
6753 	int err = 0;
6754 
6755 	mlxsw_sp = mlxsw_sp_lower_get(dev);
6756 	if (!mlxsw_sp)
6757 		goto out;
6758 
6759 	rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev);
6760 	if (!mlxsw_sp_rif_should_config(rif, dev, event))
6761 		goto out;
6762 
6763 	err = __mlxsw_sp_inetaddr_event(dev, event, ivi->extack);
6764 out:
6765 	return notifier_from_errno(err);
6766 }
6767 
6768 struct mlxsw_sp_inet6addr_event_work {
6769 	struct work_struct work;
6770 	struct net_device *dev;
6771 	unsigned long event;
6772 };
6773 
6774 static void mlxsw_sp_inet6addr_event_work(struct work_struct *work)
6775 {
6776 	struct mlxsw_sp_inet6addr_event_work *inet6addr_work =
6777 		container_of(work, struct mlxsw_sp_inet6addr_event_work, work);
6778 	struct net_device *dev = inet6addr_work->dev;
6779 	unsigned long event = inet6addr_work->event;
6780 	struct mlxsw_sp *mlxsw_sp;
6781 	struct mlxsw_sp_rif *rif;
6782 
6783 	rtnl_lock();
6784 	mlxsw_sp = mlxsw_sp_lower_get(dev);
6785 	if (!mlxsw_sp)
6786 		goto out;
6787 
6788 	rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev);
6789 	if (!mlxsw_sp_rif_should_config(rif, dev, event))
6790 		goto out;
6791 
6792 	__mlxsw_sp_inetaddr_event(dev, event, NULL);
6793 out:
6794 	rtnl_unlock();
6795 	dev_put(dev);
6796 	kfree(inet6addr_work);
6797 }
6798 
6799 /* Called with rcu_read_lock() */
6800 int mlxsw_sp_inet6addr_event(struct notifier_block *unused,
6801 			     unsigned long event, void *ptr)
6802 {
6803 	struct inet6_ifaddr *if6 = (struct inet6_ifaddr *) ptr;
6804 	struct mlxsw_sp_inet6addr_event_work *inet6addr_work;
6805 	struct net_device *dev = if6->idev->dev;
6806 
6807 	/* NETDEV_UP event is handled by mlxsw_sp_inet6addr_valid_event */
6808 	if (event == NETDEV_UP)
6809 		return NOTIFY_DONE;
6810 
6811 	if (!mlxsw_sp_port_dev_lower_find_rcu(dev))
6812 		return NOTIFY_DONE;
6813 
6814 	inet6addr_work = kzalloc(sizeof(*inet6addr_work), GFP_ATOMIC);
6815 	if (!inet6addr_work)
6816 		return NOTIFY_BAD;
6817 
6818 	INIT_WORK(&inet6addr_work->work, mlxsw_sp_inet6addr_event_work);
6819 	inet6addr_work->dev = dev;
6820 	inet6addr_work->event = event;
6821 	dev_hold(dev);
6822 	mlxsw_core_schedule_work(&inet6addr_work->work);
6823 
6824 	return NOTIFY_DONE;
6825 }
6826 
6827 int mlxsw_sp_inet6addr_valid_event(struct notifier_block *unused,
6828 				   unsigned long event, void *ptr)
6829 {
6830 	struct in6_validator_info *i6vi = (struct in6_validator_info *) ptr;
6831 	struct net_device *dev = i6vi->i6vi_dev->dev;
6832 	struct mlxsw_sp *mlxsw_sp;
6833 	struct mlxsw_sp_rif *rif;
6834 	int err = 0;
6835 
6836 	mlxsw_sp = mlxsw_sp_lower_get(dev);
6837 	if (!mlxsw_sp)
6838 		goto out;
6839 
6840 	rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev);
6841 	if (!mlxsw_sp_rif_should_config(rif, dev, event))
6842 		goto out;
6843 
6844 	err = __mlxsw_sp_inetaddr_event(dev, event, i6vi->extack);
6845 out:
6846 	return notifier_from_errno(err);
6847 }
6848 
6849 static int mlxsw_sp_rif_edit(struct mlxsw_sp *mlxsw_sp, u16 rif_index,
6850 			     const char *mac, int mtu)
6851 {
6852 	char ritr_pl[MLXSW_REG_RITR_LEN];
6853 	int err;
6854 
6855 	mlxsw_reg_ritr_rif_pack(ritr_pl, rif_index);
6856 	err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
6857 	if (err)
6858 		return err;
6859 
6860 	mlxsw_reg_ritr_mtu_set(ritr_pl, mtu);
6861 	mlxsw_reg_ritr_if_mac_memcpy_to(ritr_pl, mac);
6862 	mlxsw_reg_ritr_op_set(ritr_pl, MLXSW_REG_RITR_RIF_CREATE);
6863 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
6864 }
6865 
6866 int mlxsw_sp_netdevice_router_port_event(struct net_device *dev)
6867 {
6868 	struct mlxsw_sp *mlxsw_sp;
6869 	struct mlxsw_sp_rif *rif;
6870 	u16 fid_index;
6871 	int err;
6872 
6873 	mlxsw_sp = mlxsw_sp_lower_get(dev);
6874 	if (!mlxsw_sp)
6875 		return 0;
6876 
6877 	rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev);
6878 	if (!rif)
6879 		return 0;
6880 	fid_index = mlxsw_sp_fid_index(rif->fid);
6881 
6882 	err = mlxsw_sp_rif_fdb_op(mlxsw_sp, rif->addr, fid_index, false);
6883 	if (err)
6884 		return err;
6885 
6886 	err = mlxsw_sp_rif_edit(mlxsw_sp, rif->rif_index, dev->dev_addr,
6887 				dev->mtu);
6888 	if (err)
6889 		goto err_rif_edit;
6890 
6891 	err = mlxsw_sp_rif_fdb_op(mlxsw_sp, dev->dev_addr, fid_index, true);
6892 	if (err)
6893 		goto err_rif_fdb_op;
6894 
6895 	if (rif->mtu != dev->mtu) {
6896 		struct mlxsw_sp_vr *vr;
6897 		int i;
6898 
6899 		/* The RIF is relevant only to its mr_table instance, as unlike
6900 		 * unicast routing, in multicast routing a RIF cannot be shared
6901 		 * between several multicast routing tables.
6902 		 */
6903 		vr = &mlxsw_sp->router->vrs[rif->vr_id];
6904 		for (i = 0; i < MLXSW_SP_L3_PROTO_MAX; i++)
6905 			mlxsw_sp_mr_rif_mtu_update(vr->mr_table[i],
6906 						   rif, dev->mtu);
6907 	}
6908 
6909 	ether_addr_copy(rif->addr, dev->dev_addr);
6910 	rif->mtu = dev->mtu;
6911 
6912 	netdev_dbg(dev, "Updated RIF=%d\n", rif->rif_index);
6913 
6914 	return 0;
6915 
6916 err_rif_fdb_op:
6917 	mlxsw_sp_rif_edit(mlxsw_sp, rif->rif_index, rif->addr, rif->mtu);
6918 err_rif_edit:
6919 	mlxsw_sp_rif_fdb_op(mlxsw_sp, rif->addr, fid_index, true);
6920 	return err;
6921 }
6922 
6923 static int mlxsw_sp_port_vrf_join(struct mlxsw_sp *mlxsw_sp,
6924 				  struct net_device *l3_dev,
6925 				  struct netlink_ext_ack *extack)
6926 {
6927 	struct mlxsw_sp_rif *rif;
6928 
6929 	/* If netdev is already associated with a RIF, then we need to
6930 	 * destroy it and create a new one with the new virtual router ID.
6931 	 */
6932 	rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, l3_dev);
6933 	if (rif)
6934 		__mlxsw_sp_inetaddr_event(l3_dev, NETDEV_DOWN, extack);
6935 
6936 	return __mlxsw_sp_inetaddr_event(l3_dev, NETDEV_UP, extack);
6937 }
6938 
6939 static void mlxsw_sp_port_vrf_leave(struct mlxsw_sp *mlxsw_sp,
6940 				    struct net_device *l3_dev)
6941 {
6942 	struct mlxsw_sp_rif *rif;
6943 
6944 	rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, l3_dev);
6945 	if (!rif)
6946 		return;
6947 	__mlxsw_sp_inetaddr_event(l3_dev, NETDEV_DOWN, NULL);
6948 }
6949 
6950 int mlxsw_sp_netdevice_vrf_event(struct net_device *l3_dev, unsigned long event,
6951 				 struct netdev_notifier_changeupper_info *info)
6952 {
6953 	struct mlxsw_sp *mlxsw_sp = mlxsw_sp_lower_get(l3_dev);
6954 	int err = 0;
6955 
6956 	/* We do not create a RIF for a macvlan, but only use it to
6957 	 * direct more MAC addresses to the router.
6958 	 */
6959 	if (!mlxsw_sp || netif_is_macvlan(l3_dev))
6960 		return 0;
6961 
6962 	switch (event) {
6963 	case NETDEV_PRECHANGEUPPER:
6964 		return 0;
6965 	case NETDEV_CHANGEUPPER:
6966 		if (info->linking) {
6967 			struct netlink_ext_ack *extack;
6968 
6969 			extack = netdev_notifier_info_to_extack(&info->info);
6970 			err = mlxsw_sp_port_vrf_join(mlxsw_sp, l3_dev, extack);
6971 		} else {
6972 			mlxsw_sp_port_vrf_leave(mlxsw_sp, l3_dev);
6973 		}
6974 		break;
6975 	}
6976 
6977 	return err;
6978 }
6979 
6980 static int __mlxsw_sp_rif_macvlan_flush(struct net_device *dev, void *data)
6981 {
6982 	struct mlxsw_sp_rif *rif = data;
6983 
6984 	if (!netif_is_macvlan(dev))
6985 		return 0;
6986 
6987 	return mlxsw_sp_rif_fdb_op(rif->mlxsw_sp, dev->dev_addr,
6988 				   mlxsw_sp_fid_index(rif->fid), false);
6989 }
6990 
6991 static int mlxsw_sp_rif_macvlan_flush(struct mlxsw_sp_rif *rif)
6992 {
6993 	if (!netif_is_macvlan_port(rif->dev))
6994 		return 0;
6995 
6996 	netdev_warn(rif->dev, "Router interface is deleted. Upper macvlans will not work\n");
6997 	return netdev_walk_all_upper_dev_rcu(rif->dev,
6998 					     __mlxsw_sp_rif_macvlan_flush, rif);
6999 }
7000 
7001 static struct mlxsw_sp_rif_subport *
7002 mlxsw_sp_rif_subport_rif(const struct mlxsw_sp_rif *rif)
7003 {
7004 	return container_of(rif, struct mlxsw_sp_rif_subport, common);
7005 }
7006 
7007 static void mlxsw_sp_rif_subport_setup(struct mlxsw_sp_rif *rif,
7008 				       const struct mlxsw_sp_rif_params *params)
7009 {
7010 	struct mlxsw_sp_rif_subport *rif_subport;
7011 
7012 	rif_subport = mlxsw_sp_rif_subport_rif(rif);
7013 	rif_subport->vid = params->vid;
7014 	rif_subport->lag = params->lag;
7015 	if (params->lag)
7016 		rif_subport->lag_id = params->lag_id;
7017 	else
7018 		rif_subport->system_port = params->system_port;
7019 }
7020 
7021 static int mlxsw_sp_rif_subport_op(struct mlxsw_sp_rif *rif, bool enable)
7022 {
7023 	struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
7024 	struct mlxsw_sp_rif_subport *rif_subport;
7025 	char ritr_pl[MLXSW_REG_RITR_LEN];
7026 
7027 	rif_subport = mlxsw_sp_rif_subport_rif(rif);
7028 	mlxsw_reg_ritr_pack(ritr_pl, enable, MLXSW_REG_RITR_SP_IF,
7029 			    rif->rif_index, rif->vr_id, rif->dev->mtu);
7030 	mlxsw_reg_ritr_mac_pack(ritr_pl, rif->dev->dev_addr);
7031 	mlxsw_reg_ritr_sp_if_pack(ritr_pl, rif_subport->lag,
7032 				  rif_subport->lag ? rif_subport->lag_id :
7033 						     rif_subport->system_port,
7034 				  rif_subport->vid);
7035 
7036 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
7037 }
7038 
7039 static int mlxsw_sp_rif_subport_configure(struct mlxsw_sp_rif *rif)
7040 {
7041 	int err;
7042 
7043 	err = mlxsw_sp_rif_subport_op(rif, true);
7044 	if (err)
7045 		return err;
7046 
7047 	err = mlxsw_sp_rif_fdb_op(rif->mlxsw_sp, rif->dev->dev_addr,
7048 				  mlxsw_sp_fid_index(rif->fid), true);
7049 	if (err)
7050 		goto err_rif_fdb_op;
7051 
7052 	mlxsw_sp_fid_rif_set(rif->fid, rif);
7053 	return 0;
7054 
7055 err_rif_fdb_op:
7056 	mlxsw_sp_rif_subport_op(rif, false);
7057 	return err;
7058 }
7059 
7060 static void mlxsw_sp_rif_subport_deconfigure(struct mlxsw_sp_rif *rif)
7061 {
7062 	struct mlxsw_sp_fid *fid = rif->fid;
7063 
7064 	mlxsw_sp_fid_rif_set(fid, NULL);
7065 	mlxsw_sp_rif_fdb_op(rif->mlxsw_sp, rif->dev->dev_addr,
7066 			    mlxsw_sp_fid_index(fid), false);
7067 	mlxsw_sp_rif_macvlan_flush(rif);
7068 	mlxsw_sp_rif_subport_op(rif, false);
7069 }
7070 
7071 static struct mlxsw_sp_fid *
7072 mlxsw_sp_rif_subport_fid_get(struct mlxsw_sp_rif *rif,
7073 			     struct netlink_ext_ack *extack)
7074 {
7075 	return mlxsw_sp_fid_rfid_get(rif->mlxsw_sp, rif->rif_index);
7076 }
7077 
7078 static const struct mlxsw_sp_rif_ops mlxsw_sp_rif_subport_ops = {
7079 	.type			= MLXSW_SP_RIF_TYPE_SUBPORT,
7080 	.rif_size		= sizeof(struct mlxsw_sp_rif_subport),
7081 	.setup			= mlxsw_sp_rif_subport_setup,
7082 	.configure		= mlxsw_sp_rif_subport_configure,
7083 	.deconfigure		= mlxsw_sp_rif_subport_deconfigure,
7084 	.fid_get		= mlxsw_sp_rif_subport_fid_get,
7085 };
7086 
7087 static int mlxsw_sp_rif_vlan_fid_op(struct mlxsw_sp_rif *rif,
7088 				    enum mlxsw_reg_ritr_if_type type,
7089 				    u16 vid_fid, bool enable)
7090 {
7091 	struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
7092 	char ritr_pl[MLXSW_REG_RITR_LEN];
7093 
7094 	mlxsw_reg_ritr_pack(ritr_pl, enable, type, rif->rif_index, rif->vr_id,
7095 			    rif->dev->mtu);
7096 	mlxsw_reg_ritr_mac_pack(ritr_pl, rif->dev->dev_addr);
7097 	mlxsw_reg_ritr_fid_set(ritr_pl, type, vid_fid);
7098 
7099 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
7100 }
7101 
7102 u8 mlxsw_sp_router_port(const struct mlxsw_sp *mlxsw_sp)
7103 {
7104 	return mlxsw_core_max_ports(mlxsw_sp->core) + 1;
7105 }
7106 
7107 static int mlxsw_sp_rif_vlan_configure(struct mlxsw_sp_rif *rif)
7108 {
7109 	struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
7110 	u16 vid = mlxsw_sp_fid_8021q_vid(rif->fid);
7111 	int err;
7112 
7113 	err = mlxsw_sp_rif_vlan_fid_op(rif, MLXSW_REG_RITR_VLAN_IF, vid, true);
7114 	if (err)
7115 		return err;
7116 
7117 	err = mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_MC,
7118 				     mlxsw_sp_router_port(mlxsw_sp), true);
7119 	if (err)
7120 		goto err_fid_mc_flood_set;
7121 
7122 	err = mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_BC,
7123 				     mlxsw_sp_router_port(mlxsw_sp), true);
7124 	if (err)
7125 		goto err_fid_bc_flood_set;
7126 
7127 	err = mlxsw_sp_rif_fdb_op(rif->mlxsw_sp, rif->dev->dev_addr,
7128 				  mlxsw_sp_fid_index(rif->fid), true);
7129 	if (err)
7130 		goto err_rif_fdb_op;
7131 
7132 	mlxsw_sp_fid_rif_set(rif->fid, rif);
7133 	return 0;
7134 
7135 err_rif_fdb_op:
7136 	mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_BC,
7137 			       mlxsw_sp_router_port(mlxsw_sp), false);
7138 err_fid_bc_flood_set:
7139 	mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_MC,
7140 			       mlxsw_sp_router_port(mlxsw_sp), false);
7141 err_fid_mc_flood_set:
7142 	mlxsw_sp_rif_vlan_fid_op(rif, MLXSW_REG_RITR_VLAN_IF, vid, false);
7143 	return err;
7144 }
7145 
7146 static void mlxsw_sp_rif_vlan_deconfigure(struct mlxsw_sp_rif *rif)
7147 {
7148 	u16 vid = mlxsw_sp_fid_8021q_vid(rif->fid);
7149 	struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
7150 	struct mlxsw_sp_fid *fid = rif->fid;
7151 
7152 	mlxsw_sp_fid_rif_set(fid, NULL);
7153 	mlxsw_sp_rif_fdb_op(rif->mlxsw_sp, rif->dev->dev_addr,
7154 			    mlxsw_sp_fid_index(fid), false);
7155 	mlxsw_sp_rif_macvlan_flush(rif);
7156 	mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_BC,
7157 			       mlxsw_sp_router_port(mlxsw_sp), false);
7158 	mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_MC,
7159 			       mlxsw_sp_router_port(mlxsw_sp), false);
7160 	mlxsw_sp_rif_vlan_fid_op(rif, MLXSW_REG_RITR_VLAN_IF, vid, false);
7161 }
7162 
7163 static struct mlxsw_sp_fid *
7164 mlxsw_sp_rif_vlan_fid_get(struct mlxsw_sp_rif *rif,
7165 			  struct netlink_ext_ack *extack)
7166 {
7167 	u16 vid;
7168 	int err;
7169 
7170 	if (is_vlan_dev(rif->dev)) {
7171 		vid = vlan_dev_vlan_id(rif->dev);
7172 	} else {
7173 		err = br_vlan_get_pvid(rif->dev, &vid);
7174 		if (err < 0 || !vid) {
7175 			NL_SET_ERR_MSG_MOD(extack, "Couldn't determine bridge PVID");
7176 			return ERR_PTR(-EINVAL);
7177 		}
7178 	}
7179 
7180 	return mlxsw_sp_fid_8021q_get(rif->mlxsw_sp, vid);
7181 }
7182 
7183 static void mlxsw_sp_rif_vlan_fdb_del(struct mlxsw_sp_rif *rif, const char *mac)
7184 {
7185 	u16 vid = mlxsw_sp_fid_8021q_vid(rif->fid);
7186 	struct switchdev_notifier_fdb_info info;
7187 	struct net_device *br_dev;
7188 	struct net_device *dev;
7189 
7190 	br_dev = is_vlan_dev(rif->dev) ? vlan_dev_real_dev(rif->dev) : rif->dev;
7191 	dev = br_fdb_find_port(br_dev, mac, vid);
7192 	if (!dev)
7193 		return;
7194 
7195 	info.addr = mac;
7196 	info.vid = vid;
7197 	call_switchdev_notifiers(SWITCHDEV_FDB_DEL_TO_BRIDGE, dev, &info.info);
7198 }
7199 
7200 static const struct mlxsw_sp_rif_ops mlxsw_sp_rif_vlan_ops = {
7201 	.type			= MLXSW_SP_RIF_TYPE_VLAN,
7202 	.rif_size		= sizeof(struct mlxsw_sp_rif),
7203 	.configure		= mlxsw_sp_rif_vlan_configure,
7204 	.deconfigure		= mlxsw_sp_rif_vlan_deconfigure,
7205 	.fid_get		= mlxsw_sp_rif_vlan_fid_get,
7206 	.fdb_del		= mlxsw_sp_rif_vlan_fdb_del,
7207 };
7208 
7209 static int mlxsw_sp_rif_fid_configure(struct mlxsw_sp_rif *rif)
7210 {
7211 	struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
7212 	u16 fid_index = mlxsw_sp_fid_index(rif->fid);
7213 	int err;
7214 
7215 	err = mlxsw_sp_rif_vlan_fid_op(rif, MLXSW_REG_RITR_FID_IF, fid_index,
7216 				       true);
7217 	if (err)
7218 		return err;
7219 
7220 	err = mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_MC,
7221 				     mlxsw_sp_router_port(mlxsw_sp), true);
7222 	if (err)
7223 		goto err_fid_mc_flood_set;
7224 
7225 	err = mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_BC,
7226 				     mlxsw_sp_router_port(mlxsw_sp), true);
7227 	if (err)
7228 		goto err_fid_bc_flood_set;
7229 
7230 	err = mlxsw_sp_rif_fdb_op(rif->mlxsw_sp, rif->dev->dev_addr,
7231 				  mlxsw_sp_fid_index(rif->fid), true);
7232 	if (err)
7233 		goto err_rif_fdb_op;
7234 
7235 	mlxsw_sp_fid_rif_set(rif->fid, rif);
7236 	return 0;
7237 
7238 err_rif_fdb_op:
7239 	mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_BC,
7240 			       mlxsw_sp_router_port(mlxsw_sp), false);
7241 err_fid_bc_flood_set:
7242 	mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_MC,
7243 			       mlxsw_sp_router_port(mlxsw_sp), false);
7244 err_fid_mc_flood_set:
7245 	mlxsw_sp_rif_vlan_fid_op(rif, MLXSW_REG_RITR_FID_IF, fid_index, false);
7246 	return err;
7247 }
7248 
7249 static void mlxsw_sp_rif_fid_deconfigure(struct mlxsw_sp_rif *rif)
7250 {
7251 	u16 fid_index = mlxsw_sp_fid_index(rif->fid);
7252 	struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
7253 	struct mlxsw_sp_fid *fid = rif->fid;
7254 
7255 	mlxsw_sp_fid_rif_set(fid, NULL);
7256 	mlxsw_sp_rif_fdb_op(rif->mlxsw_sp, rif->dev->dev_addr,
7257 			    mlxsw_sp_fid_index(fid), false);
7258 	mlxsw_sp_rif_macvlan_flush(rif);
7259 	mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_BC,
7260 			       mlxsw_sp_router_port(mlxsw_sp), false);
7261 	mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_MC,
7262 			       mlxsw_sp_router_port(mlxsw_sp), false);
7263 	mlxsw_sp_rif_vlan_fid_op(rif, MLXSW_REG_RITR_FID_IF, fid_index, false);
7264 }
7265 
7266 static struct mlxsw_sp_fid *
7267 mlxsw_sp_rif_fid_fid_get(struct mlxsw_sp_rif *rif,
7268 			 struct netlink_ext_ack *extack)
7269 {
7270 	return mlxsw_sp_fid_8021d_get(rif->mlxsw_sp, rif->dev->ifindex);
7271 }
7272 
7273 static void mlxsw_sp_rif_fid_fdb_del(struct mlxsw_sp_rif *rif, const char *mac)
7274 {
7275 	struct switchdev_notifier_fdb_info info;
7276 	struct net_device *dev;
7277 
7278 	dev = br_fdb_find_port(rif->dev, mac, 0);
7279 	if (!dev)
7280 		return;
7281 
7282 	info.addr = mac;
7283 	info.vid = 0;
7284 	call_switchdev_notifiers(SWITCHDEV_FDB_DEL_TO_BRIDGE, dev, &info.info);
7285 }
7286 
7287 static const struct mlxsw_sp_rif_ops mlxsw_sp_rif_fid_ops = {
7288 	.type			= MLXSW_SP_RIF_TYPE_FID,
7289 	.rif_size		= sizeof(struct mlxsw_sp_rif),
7290 	.configure		= mlxsw_sp_rif_fid_configure,
7291 	.deconfigure		= mlxsw_sp_rif_fid_deconfigure,
7292 	.fid_get		= mlxsw_sp_rif_fid_fid_get,
7293 	.fdb_del		= mlxsw_sp_rif_fid_fdb_del,
7294 };
7295 
7296 static struct mlxsw_sp_rif_ipip_lb *
7297 mlxsw_sp_rif_ipip_lb_rif(struct mlxsw_sp_rif *rif)
7298 {
7299 	return container_of(rif, struct mlxsw_sp_rif_ipip_lb, common);
7300 }
7301 
7302 static void
7303 mlxsw_sp_rif_ipip_lb_setup(struct mlxsw_sp_rif *rif,
7304 			   const struct mlxsw_sp_rif_params *params)
7305 {
7306 	struct mlxsw_sp_rif_params_ipip_lb *params_lb;
7307 	struct mlxsw_sp_rif_ipip_lb *rif_lb;
7308 
7309 	params_lb = container_of(params, struct mlxsw_sp_rif_params_ipip_lb,
7310 				 common);
7311 	rif_lb = mlxsw_sp_rif_ipip_lb_rif(rif);
7312 	rif_lb->lb_config = params_lb->lb_config;
7313 }
7314 
7315 static int
7316 mlxsw_sp_rif_ipip_lb_configure(struct mlxsw_sp_rif *rif)
7317 {
7318 	struct mlxsw_sp_rif_ipip_lb *lb_rif = mlxsw_sp_rif_ipip_lb_rif(rif);
7319 	u32 ul_tb_id = mlxsw_sp_ipip_dev_ul_tb_id(rif->dev);
7320 	struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
7321 	struct mlxsw_sp_vr *ul_vr;
7322 	int err;
7323 
7324 	ul_vr = mlxsw_sp_vr_get(mlxsw_sp, ul_tb_id, NULL);
7325 	if (IS_ERR(ul_vr))
7326 		return PTR_ERR(ul_vr);
7327 
7328 	err = mlxsw_sp_rif_ipip_lb_op(lb_rif, ul_vr, true);
7329 	if (err)
7330 		goto err_loopback_op;
7331 
7332 	lb_rif->ul_vr_id = ul_vr->id;
7333 	++ul_vr->rif_count;
7334 	return 0;
7335 
7336 err_loopback_op:
7337 	mlxsw_sp_vr_put(mlxsw_sp, ul_vr);
7338 	return err;
7339 }
7340 
7341 static void mlxsw_sp_rif_ipip_lb_deconfigure(struct mlxsw_sp_rif *rif)
7342 {
7343 	struct mlxsw_sp_rif_ipip_lb *lb_rif = mlxsw_sp_rif_ipip_lb_rif(rif);
7344 	struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
7345 	struct mlxsw_sp_vr *ul_vr;
7346 
7347 	ul_vr = &mlxsw_sp->router->vrs[lb_rif->ul_vr_id];
7348 	mlxsw_sp_rif_ipip_lb_op(lb_rif, ul_vr, false);
7349 
7350 	--ul_vr->rif_count;
7351 	mlxsw_sp_vr_put(mlxsw_sp, ul_vr);
7352 }
7353 
7354 static const struct mlxsw_sp_rif_ops mlxsw_sp_rif_ipip_lb_ops = {
7355 	.type			= MLXSW_SP_RIF_TYPE_IPIP_LB,
7356 	.rif_size		= sizeof(struct mlxsw_sp_rif_ipip_lb),
7357 	.setup                  = mlxsw_sp_rif_ipip_lb_setup,
7358 	.configure		= mlxsw_sp_rif_ipip_lb_configure,
7359 	.deconfigure		= mlxsw_sp_rif_ipip_lb_deconfigure,
7360 };
7361 
7362 static const struct mlxsw_sp_rif_ops *mlxsw_sp_rif_ops_arr[] = {
7363 	[MLXSW_SP_RIF_TYPE_SUBPORT]	= &mlxsw_sp_rif_subport_ops,
7364 	[MLXSW_SP_RIF_TYPE_VLAN]	= &mlxsw_sp_rif_vlan_ops,
7365 	[MLXSW_SP_RIF_TYPE_FID]		= &mlxsw_sp_rif_fid_ops,
7366 	[MLXSW_SP_RIF_TYPE_IPIP_LB]	= &mlxsw_sp_rif_ipip_lb_ops,
7367 };
7368 
7369 static int mlxsw_sp_rifs_init(struct mlxsw_sp *mlxsw_sp)
7370 {
7371 	u64 max_rifs = MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_RIFS);
7372 
7373 	mlxsw_sp->router->rifs = kcalloc(max_rifs,
7374 					 sizeof(struct mlxsw_sp_rif *),
7375 					 GFP_KERNEL);
7376 	if (!mlxsw_sp->router->rifs)
7377 		return -ENOMEM;
7378 
7379 	mlxsw_sp->router->rif_ops_arr = mlxsw_sp_rif_ops_arr;
7380 
7381 	return 0;
7382 }
7383 
7384 static void mlxsw_sp_rifs_fini(struct mlxsw_sp *mlxsw_sp)
7385 {
7386 	int i;
7387 
7388 	for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_RIFS); i++)
7389 		WARN_ON_ONCE(mlxsw_sp->router->rifs[i]);
7390 
7391 	kfree(mlxsw_sp->router->rifs);
7392 }
7393 
7394 static int
7395 mlxsw_sp_ipip_config_tigcr(struct mlxsw_sp *mlxsw_sp)
7396 {
7397 	char tigcr_pl[MLXSW_REG_TIGCR_LEN];
7398 
7399 	mlxsw_reg_tigcr_pack(tigcr_pl, true, 0);
7400 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(tigcr), tigcr_pl);
7401 }
7402 
7403 static int mlxsw_sp_ipips_init(struct mlxsw_sp *mlxsw_sp)
7404 {
7405 	mlxsw_sp->router->ipip_ops_arr = mlxsw_sp_ipip_ops_arr;
7406 	INIT_LIST_HEAD(&mlxsw_sp->router->ipip_list);
7407 	return mlxsw_sp_ipip_config_tigcr(mlxsw_sp);
7408 }
7409 
7410 static void mlxsw_sp_ipips_fini(struct mlxsw_sp *mlxsw_sp)
7411 {
7412 	WARN_ON(!list_empty(&mlxsw_sp->router->ipip_list));
7413 }
7414 
7415 static void mlxsw_sp_router_fib_dump_flush(struct notifier_block *nb)
7416 {
7417 	struct mlxsw_sp_router *router;
7418 
7419 	/* Flush pending FIB notifications and then flush the device's
7420 	 * table before requesting another dump. The FIB notification
7421 	 * block is unregistered, so no need to take RTNL.
7422 	 */
7423 	mlxsw_core_flush_owq();
7424 	router = container_of(nb, struct mlxsw_sp_router, fib_nb);
7425 	mlxsw_sp_router_fib_flush(router->mlxsw_sp);
7426 }
7427 
7428 #ifdef CONFIG_IP_ROUTE_MULTIPATH
7429 static void mlxsw_sp_mp_hash_header_set(char *recr2_pl, int header)
7430 {
7431 	mlxsw_reg_recr2_outer_header_enables_set(recr2_pl, header, true);
7432 }
7433 
7434 static void mlxsw_sp_mp_hash_field_set(char *recr2_pl, int field)
7435 {
7436 	mlxsw_reg_recr2_outer_header_fields_enable_set(recr2_pl, field, true);
7437 }
7438 
7439 static void mlxsw_sp_mp4_hash_init(char *recr2_pl)
7440 {
7441 	bool only_l3 = !init_net.ipv4.sysctl_fib_multipath_hash_policy;
7442 
7443 	mlxsw_sp_mp_hash_header_set(recr2_pl,
7444 				    MLXSW_REG_RECR2_IPV4_EN_NOT_TCP_NOT_UDP);
7445 	mlxsw_sp_mp_hash_header_set(recr2_pl, MLXSW_REG_RECR2_IPV4_EN_TCP_UDP);
7446 	mlxsw_reg_recr2_ipv4_sip_enable(recr2_pl);
7447 	mlxsw_reg_recr2_ipv4_dip_enable(recr2_pl);
7448 	if (only_l3)
7449 		return;
7450 	mlxsw_sp_mp_hash_header_set(recr2_pl, MLXSW_REG_RECR2_TCP_UDP_EN_IPV4);
7451 	mlxsw_sp_mp_hash_field_set(recr2_pl, MLXSW_REG_RECR2_IPV4_PROTOCOL);
7452 	mlxsw_sp_mp_hash_field_set(recr2_pl, MLXSW_REG_RECR2_TCP_UDP_SPORT);
7453 	mlxsw_sp_mp_hash_field_set(recr2_pl, MLXSW_REG_RECR2_TCP_UDP_DPORT);
7454 }
7455 
7456 static void mlxsw_sp_mp6_hash_init(char *recr2_pl)
7457 {
7458 	bool only_l3 = !ip6_multipath_hash_policy(&init_net);
7459 
7460 	mlxsw_sp_mp_hash_header_set(recr2_pl,
7461 				    MLXSW_REG_RECR2_IPV6_EN_NOT_TCP_NOT_UDP);
7462 	mlxsw_sp_mp_hash_header_set(recr2_pl, MLXSW_REG_RECR2_IPV6_EN_TCP_UDP);
7463 	mlxsw_reg_recr2_ipv6_sip_enable(recr2_pl);
7464 	mlxsw_reg_recr2_ipv6_dip_enable(recr2_pl);
7465 	mlxsw_sp_mp_hash_field_set(recr2_pl, MLXSW_REG_RECR2_IPV6_NEXT_HEADER);
7466 	if (only_l3) {
7467 		mlxsw_sp_mp_hash_field_set(recr2_pl,
7468 					   MLXSW_REG_RECR2_IPV6_FLOW_LABEL);
7469 	} else {
7470 		mlxsw_sp_mp_hash_header_set(recr2_pl,
7471 					    MLXSW_REG_RECR2_TCP_UDP_EN_IPV6);
7472 		mlxsw_sp_mp_hash_field_set(recr2_pl,
7473 					   MLXSW_REG_RECR2_TCP_UDP_SPORT);
7474 		mlxsw_sp_mp_hash_field_set(recr2_pl,
7475 					   MLXSW_REG_RECR2_TCP_UDP_DPORT);
7476 	}
7477 }
7478 
7479 static int mlxsw_sp_mp_hash_init(struct mlxsw_sp *mlxsw_sp)
7480 {
7481 	char recr2_pl[MLXSW_REG_RECR2_LEN];
7482 	u32 seed;
7483 
7484 	get_random_bytes(&seed, sizeof(seed));
7485 	mlxsw_reg_recr2_pack(recr2_pl, seed);
7486 	mlxsw_sp_mp4_hash_init(recr2_pl);
7487 	mlxsw_sp_mp6_hash_init(recr2_pl);
7488 
7489 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(recr2), recr2_pl);
7490 }
7491 #else
7492 static int mlxsw_sp_mp_hash_init(struct mlxsw_sp *mlxsw_sp)
7493 {
7494 	return 0;
7495 }
7496 #endif
7497 
7498 static int mlxsw_sp_dscp_init(struct mlxsw_sp *mlxsw_sp)
7499 {
7500 	char rdpm_pl[MLXSW_REG_RDPM_LEN];
7501 	unsigned int i;
7502 
7503 	MLXSW_REG_ZERO(rdpm, rdpm_pl);
7504 
7505 	/* HW is determining switch priority based on DSCP-bits, but the
7506 	 * kernel is still doing that based on the ToS. Since there's a
7507 	 * mismatch in bits we need to make sure to translate the right
7508 	 * value ToS would observe, skipping the 2 least-significant ECN bits.
7509 	 */
7510 	for (i = 0; i < MLXSW_REG_RDPM_DSCP_ENTRY_REC_MAX_COUNT; i++)
7511 		mlxsw_reg_rdpm_pack(rdpm_pl, i, rt_tos2priority(i << 2));
7512 
7513 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rdpm), rdpm_pl);
7514 }
7515 
7516 static int __mlxsw_sp_router_init(struct mlxsw_sp *mlxsw_sp)
7517 {
7518 	bool usp = init_net.ipv4.sysctl_ip_fwd_update_priority;
7519 	char rgcr_pl[MLXSW_REG_RGCR_LEN];
7520 	u64 max_rifs;
7521 	int err;
7522 
7523 	if (!MLXSW_CORE_RES_VALID(mlxsw_sp->core, MAX_RIFS))
7524 		return -EIO;
7525 	max_rifs = MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_RIFS);
7526 
7527 	mlxsw_reg_rgcr_pack(rgcr_pl, true, true);
7528 	mlxsw_reg_rgcr_max_router_interfaces_set(rgcr_pl, max_rifs);
7529 	mlxsw_reg_rgcr_usp_set(rgcr_pl, usp);
7530 	err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rgcr), rgcr_pl);
7531 	if (err)
7532 		return err;
7533 	return 0;
7534 }
7535 
7536 static void __mlxsw_sp_router_fini(struct mlxsw_sp *mlxsw_sp)
7537 {
7538 	char rgcr_pl[MLXSW_REG_RGCR_LEN];
7539 
7540 	mlxsw_reg_rgcr_pack(rgcr_pl, false, false);
7541 	mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rgcr), rgcr_pl);
7542 }
7543 
7544 int mlxsw_sp_router_init(struct mlxsw_sp *mlxsw_sp)
7545 {
7546 	struct mlxsw_sp_router *router;
7547 	int err;
7548 
7549 	router = kzalloc(sizeof(*mlxsw_sp->router), GFP_KERNEL);
7550 	if (!router)
7551 		return -ENOMEM;
7552 	mlxsw_sp->router = router;
7553 	router->mlxsw_sp = mlxsw_sp;
7554 
7555 	INIT_LIST_HEAD(&mlxsw_sp->router->nexthop_neighs_list);
7556 	err = __mlxsw_sp_router_init(mlxsw_sp);
7557 	if (err)
7558 		goto err_router_init;
7559 
7560 	err = mlxsw_sp_rifs_init(mlxsw_sp);
7561 	if (err)
7562 		goto err_rifs_init;
7563 
7564 	err = mlxsw_sp_ipips_init(mlxsw_sp);
7565 	if (err)
7566 		goto err_ipips_init;
7567 
7568 	err = rhashtable_init(&mlxsw_sp->router->nexthop_ht,
7569 			      &mlxsw_sp_nexthop_ht_params);
7570 	if (err)
7571 		goto err_nexthop_ht_init;
7572 
7573 	err = rhashtable_init(&mlxsw_sp->router->nexthop_group_ht,
7574 			      &mlxsw_sp_nexthop_group_ht_params);
7575 	if (err)
7576 		goto err_nexthop_group_ht_init;
7577 
7578 	INIT_LIST_HEAD(&mlxsw_sp->router->nexthop_list);
7579 	err = mlxsw_sp_lpm_init(mlxsw_sp);
7580 	if (err)
7581 		goto err_lpm_init;
7582 
7583 	err = mlxsw_sp_mr_init(mlxsw_sp, &mlxsw_sp_mr_tcam_ops);
7584 	if (err)
7585 		goto err_mr_init;
7586 
7587 	err = mlxsw_sp_vrs_init(mlxsw_sp);
7588 	if (err)
7589 		goto err_vrs_init;
7590 
7591 	err = mlxsw_sp_neigh_init(mlxsw_sp);
7592 	if (err)
7593 		goto err_neigh_init;
7594 
7595 	mlxsw_sp->router->netevent_nb.notifier_call =
7596 		mlxsw_sp_router_netevent_event;
7597 	err = register_netevent_notifier(&mlxsw_sp->router->netevent_nb);
7598 	if (err)
7599 		goto err_register_netevent_notifier;
7600 
7601 	err = mlxsw_sp_mp_hash_init(mlxsw_sp);
7602 	if (err)
7603 		goto err_mp_hash_init;
7604 
7605 	err = mlxsw_sp_dscp_init(mlxsw_sp);
7606 	if (err)
7607 		goto err_dscp_init;
7608 
7609 	mlxsw_sp->router->fib_nb.notifier_call = mlxsw_sp_router_fib_event;
7610 	err = register_fib_notifier(&mlxsw_sp->router->fib_nb,
7611 				    mlxsw_sp_router_fib_dump_flush);
7612 	if (err)
7613 		goto err_register_fib_notifier;
7614 
7615 	return 0;
7616 
7617 err_register_fib_notifier:
7618 err_dscp_init:
7619 err_mp_hash_init:
7620 	unregister_netevent_notifier(&mlxsw_sp->router->netevent_nb);
7621 err_register_netevent_notifier:
7622 	mlxsw_sp_neigh_fini(mlxsw_sp);
7623 err_neigh_init:
7624 	mlxsw_sp_vrs_fini(mlxsw_sp);
7625 err_vrs_init:
7626 	mlxsw_sp_mr_fini(mlxsw_sp);
7627 err_mr_init:
7628 	mlxsw_sp_lpm_fini(mlxsw_sp);
7629 err_lpm_init:
7630 	rhashtable_destroy(&mlxsw_sp->router->nexthop_group_ht);
7631 err_nexthop_group_ht_init:
7632 	rhashtable_destroy(&mlxsw_sp->router->nexthop_ht);
7633 err_nexthop_ht_init:
7634 	mlxsw_sp_ipips_fini(mlxsw_sp);
7635 err_ipips_init:
7636 	mlxsw_sp_rifs_fini(mlxsw_sp);
7637 err_rifs_init:
7638 	__mlxsw_sp_router_fini(mlxsw_sp);
7639 err_router_init:
7640 	kfree(mlxsw_sp->router);
7641 	return err;
7642 }
7643 
7644 void mlxsw_sp_router_fini(struct mlxsw_sp *mlxsw_sp)
7645 {
7646 	unregister_fib_notifier(&mlxsw_sp->router->fib_nb);
7647 	unregister_netevent_notifier(&mlxsw_sp->router->netevent_nb);
7648 	mlxsw_sp_neigh_fini(mlxsw_sp);
7649 	mlxsw_sp_vrs_fini(mlxsw_sp);
7650 	mlxsw_sp_mr_fini(mlxsw_sp);
7651 	mlxsw_sp_lpm_fini(mlxsw_sp);
7652 	rhashtable_destroy(&mlxsw_sp->router->nexthop_group_ht);
7653 	rhashtable_destroy(&mlxsw_sp->router->nexthop_ht);
7654 	mlxsw_sp_ipips_fini(mlxsw_sp);
7655 	mlxsw_sp_rifs_fini(mlxsw_sp);
7656 	__mlxsw_sp_router_fini(mlxsw_sp);
7657 	kfree(mlxsw_sp->router);
7658 }
7659